From 8fa1c7cd1fe9cdfc426a603e1f1eecd3f463c487 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 19 Oct 2023 10:02:42 -0700 Subject: drm/i915/mcr: Hold GT forcewake during steering operations The steering control and semaphore registers are inside an "always on" power domain with respect to RC6. However there are some issues if higher-level platform sleep states are entering/exiting at the same time these registers are accessed. Grabbing GT forcewake and holding it over the entire lock/steer/unlock cycle ensures that those sleep states have been fully exited before we access these registers. This is expected to become a formally documented/numbered workaround soon. Note that this patch alone isn't expected to have an immediately noticeable impact on MCR (mis)behavior; an upcoming pcode firmware update will also be necessary to provide the other half of this workaround. v2: - Move the forcewake inside the Xe_LPG-specific IP version check. This should only be necessary on platforms that have a steering semaphore. Fixes: 3100240bf846 ("drm/i915/mtl: Add hardware-level lock for steering") Cc: Radhakrishna Sripada Cc: Jonathan Cavitt Signed-off-by: Matt Roper Reviewed-by: Radhakrishna Sripada Reviewed-by: Jonathan Cavitt Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20231019170241.2102037-2-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c index 326c2ed1d99b..34913912d8ae 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c @@ -376,9 +376,26 @@ void intel_gt_mcr_lock(struct intel_gt *gt, unsigned long *flags) * driver threads, but also with hardware/firmware agents. A dedicated * locking register is used. */ - if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) { + /* + * The steering control and semaphore registers are inside an + * "always on" power domain with respect to RC6. However there + * are some issues if higher-level platform sleep states are + * entering/exiting at the same time these registers are + * accessed. Grabbing GT forcewake and holding it over the + * entire lock/steer/unlock cycle ensures that those sleep + * states have been fully exited before we access these + * registers. This wakeref will be released in the unlock + * routine. + * + * This is expected to become a formally documented/numbered + * workaround soon. + */ + intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_GT); + err = wait_for(intel_uncore_read_fw(gt->uncore, MTL_STEER_SEMAPHORE) == 0x1, 100); + } /* * Even on platforms with a hardware lock, we'll continue to grab @@ -415,8 +432,11 @@ void intel_gt_mcr_unlock(struct intel_gt *gt, unsigned long flags) { spin_unlock_irqrestore(>->mcr_lock, flags); - if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) { intel_uncore_write_fw(gt->uncore, MTL_STEER_SEMAPHORE, 0x1); + + intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_GT); + } } /** -- cgit v1.2.3 From 31f6a06f0c543b43a38fab10f39e5fc45ad62aa2 Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Fri, 20 Oct 2023 08:24:41 -0700 Subject: drm/i915/pmu: Check if pmu is closed before stopping event When the driver unbinds, pmu is unregistered and i915->uabi_engines is set to RB_ROOT. Due to this, when i915 PMU tries to stop the engine events, it issues a warn_on because engine lookup fails. All perf hooks are taking care of this using a pmu->closed flag that is set when PMU unregisters. The stop event seems to have been left out. Check for pmu->closed in pmu_event_stop as well. Based on discussion here - https://patchwork.freedesktop.org/patch/492079/?series=105790&rev=2 v2: s/is/if/ in commit title v3: Add fixes tag and cc stable Cc: # v5.11+ Fixes: b00bccb3f0bb ("drm/i915/pmu: Handle PCI unbind") Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Tvrtko Ursulin Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20231020152441.3764850-1-umesh.nerlige.ramappa@intel.com --- drivers/gpu/drm/i915/i915_pmu.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 108b675088ba..f861863eb7c1 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -831,9 +831,18 @@ static void i915_pmu_event_start(struct perf_event *event, int flags) static void i915_pmu_event_stop(struct perf_event *event, int flags) { + struct drm_i915_private *i915 = + container_of(event->pmu, typeof(*i915), pmu.base); + struct i915_pmu *pmu = &i915->pmu; + + if (pmu->closed) + goto out; + if (flags & PERF_EF_UPDATE) i915_pmu_event_read(event); i915_pmu_disable(event); + +out: event->hw.state = PERF_HES_STOPPED; } -- cgit v1.2.3 From 0520b30b219053cd789909bca45b3c486ef3ee09 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 16 Oct 2023 22:10:04 +0200 Subject: drm/i915/mtl: avoid stringop-overflow warning The newly added memset() causes a warning for some reason I could not figure out: In file included from arch/x86/include/asm/string.h:3, from drivers/gpu/drm/i915/gt/intel_rc6.c:6: In function 'rc6_res_reg_init', inlined from 'intel_rc6_init' at drivers/gpu/drm/i915/gt/intel_rc6.c:610:2: arch/x86/include/asm/string_32.h:195:29: error: '__builtin_memset' writing 16 bytes into a region of size 0 overflows the destination [-Werror=stringop-overflow=] 195 | #define memset(s, c, count) __builtin_memset(s, c, count) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/i915/gt/intel_rc6.c:584:9: note: in expansion of macro 'memset' 584 | memset(rc6->res_reg, INVALID_MMIO_REG.reg, sizeof(rc6->res_reg)); | ^~~~~~ In function 'intel_rc6_init': Change it to an normal initializer and an added memcpy() that does not have this problem. Fixes: 4bb9ca7ee074 ("drm/i915/mtl: C6 residency and C state type for MTL SAMedia") Signed-off-by: Arnd Bergmann Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20231016201012.1022812-1-arnd@kernel.org --- drivers/gpu/drm/i915/gt/intel_rc6.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 8b67abd720be..7090e4be29cb 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -581,19 +581,23 @@ static void __intel_rc6_disable(struct intel_rc6 *rc6) static void rc6_res_reg_init(struct intel_rc6 *rc6) { - memset(rc6->res_reg, INVALID_MMIO_REG.reg, sizeof(rc6->res_reg)); + i915_reg_t res_reg[INTEL_RC6_RES_MAX] = { + [0 ... INTEL_RC6_RES_MAX - 1] = INVALID_MMIO_REG, + }; switch (rc6_to_gt(rc6)->type) { case GT_MEDIA: - rc6->res_reg[INTEL_RC6_RES_RC6] = MTL_MEDIA_MC6; + res_reg[INTEL_RC6_RES_RC6] = MTL_MEDIA_MC6; break; default: - rc6->res_reg[INTEL_RC6_RES_RC6_LOCKED] = GEN6_GT_GFX_RC6_LOCKED; - rc6->res_reg[INTEL_RC6_RES_RC6] = GEN6_GT_GFX_RC6; - rc6->res_reg[INTEL_RC6_RES_RC6p] = GEN6_GT_GFX_RC6p; - rc6->res_reg[INTEL_RC6_RES_RC6pp] = GEN6_GT_GFX_RC6pp; + res_reg[INTEL_RC6_RES_RC6_LOCKED] = GEN6_GT_GFX_RC6_LOCKED; + res_reg[INTEL_RC6_RES_RC6] = GEN6_GT_GFX_RC6; + res_reg[INTEL_RC6_RES_RC6p] = GEN6_GT_GFX_RC6p; + res_reg[INTEL_RC6_RES_RC6pp] = GEN6_GT_GFX_RC6pp; break; } + + memcpy(rc6->res_reg, res_reg, sizeof(res_reg)); } void intel_rc6_init(struct intel_rc6 *rc6) -- cgit v1.2.3 From ffc02c67bf8d4909bd9571fbd14104381fe36b21 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Mon, 23 Oct 2023 14:13:05 +0200 Subject: drm/i915/gt: Use proper priority enum instead of 0 I915_PRIORITY_NORMAL is 0 so use that instead for better readability. Cc: John Harrison Signed-off-by: Nirmoy Das Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20231023121305.12560-1-nirmoy.das@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c index 9a527e1f5be6..1a8e2b7db013 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c @@ -188,7 +188,7 @@ static void heartbeat(struct work_struct *wrk) * low latency and no jitter] the chance to naturally * complete before being preempted. */ - attr.priority = 0; + attr.priority = I915_PRIORITY_NORMAL; if (rq->sched.attr.priority >= attr.priority) attr.priority = I915_PRIORITY_HEARTBEAT; if (rq->sched.attr.priority >= attr.priority) -- cgit v1.2.3 From 6ce33a8a45496d4eca27b45ab9b8c2436c657495 Mon Sep 17 00:00:00 2001 From: Soumya Negi Date: Wed, 25 Oct 2023 21:43:08 -0700 Subject: drm/i915/gt: Remove {} from if-else In accordance to Linux coding style(Documentation/process/4.Coding.rst), remove unneeded braces from if-else block as all arms of this block contain single statements. Suggested-by: Andi Shyti Signed-off-by: Soumya Negi Acked-by: Karolina Stolarek Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20231026044309.17213-1-soumya.negi97@gmail.com --- drivers/gpu/drm/i915/gt/intel_ggtt.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 1c93e84278a0..9f6f9e138532 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -226,16 +226,15 @@ static void guc_ggtt_invalidate(struct i915_ggtt *ggtt) gen8_ggtt_invalidate(ggtt); list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) { - if (intel_guc_tlb_invalidation_is_available(>->uc.guc)) { + if (intel_guc_tlb_invalidation_is_available(>->uc.guc)) guc_ggtt_ct_invalidate(gt); - } else if (GRAPHICS_VER(i915) >= 12) { + else if (GRAPHICS_VER(i915) >= 12) intel_uncore_write_fw(gt->uncore, GEN12_GUC_TLB_INV_CR, GEN12_GUC_TLB_INV_CR_INVALIDATE); - } else { + else intel_uncore_write_fw(gt->uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE); - } } } -- cgit v1.2.3 From 81de3e296b10a13e5c9f13172825b0d8d9495c68 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Wed, 18 Oct 2023 11:38:15 +0200 Subject: drm/i915: Flush WC GGTT only on required platforms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gen8_ggtt_invalidate() is only needed for limited set of platforms where GGTT is mapped as WC. This was added as way to fix WC based GGTT in commit 0f9b91c754b7 ("drm/i915: flush system agent TLBs on SNB") and there are no reference in HW docs that forces us to use this on non-WC backed GGTT. This can also cause unwanted side-effects on XE_HP platforms where GFX_FLSH_CNTL_GEN6 is not valid anymore. v2: Add a func to detect wc ggtt detection (Ville) v3: Improve commit log and add reference commit (Daniel) Fixes: d2eae8e98d59 ("drm/i915/dg2: Drop force_probe requirement") Cc: Rodrigo Vivi Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: Jani Nikula Cc: Jonathan Cavitt Cc: John Harrison Cc: Andi Shyti Cc: Ville Syrjälä Cc: Daniel Vetter Cc: # v6.2+ Suggested-by: Matt Roper Signed-off-by: Nirmoy Das Reviewed-by: Matt Roper Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20231018093815.1349-1-nirmoy.das@intel.com --- drivers/gpu/drm/i915/gt/intel_ggtt.c | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 9f6f9e138532..76b0ee2b906b 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -195,6 +195,21 @@ void gen6_ggtt_invalidate(struct i915_ggtt *ggtt) spin_unlock_irq(&uncore->lock); } +static bool needs_wc_ggtt_mapping(struct drm_i915_private *i915) +{ + /* + * On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range + * will be dropped. For WC mappings in general we have 64 byte burst + * writes when the WC buffer is flushed, so we can't use it, but have to + * resort to an uncached mapping. The WC issue is easily caught by the + * readback check when writing GTT PTE entries. + */ + if (!IS_GEN9_LP(i915) && GRAPHICS_VER(i915) < 11) + return true; + + return false; +} + static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt) { struct intel_uncore *uncore = ggtt->vm.gt->uncore; @@ -202,8 +217,12 @@ static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt) /* * Note that as an uncached mmio write, this will flush the * WCB of the writes into the GGTT before it triggers the invalidate. + * + * Only perform this when GGTT is mapped as WC, see ggtt_probe_common(). */ - intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); + if (needs_wc_ggtt_mapping(ggtt->vm.i915)) + intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, + GFX_FLSH_CNTL_EN); } static void guc_ggtt_ct_invalidate(struct intel_gt *gt) @@ -1139,17 +1158,11 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) GEM_WARN_ON(pci_resource_len(pdev, GEN4_GTTMMADR_BAR) != gen6_gttmmadr_size(i915)); phys_addr = pci_resource_start(pdev, GEN4_GTTMMADR_BAR) + gen6_gttadr_offset(i915); - /* - * On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range - * will be dropped. For WC mappings in general we have 64 byte burst - * writes when the WC buffer is flushed, so we can't use it, but have to - * resort to an uncached mapping. The WC issue is easily caught by the - * readback check when writing GTT PTE entries. - */ - if (IS_GEN9_LP(i915) || GRAPHICS_VER(i915) >= 11) - ggtt->gsm = ioremap(phys_addr, size); - else + if (needs_wc_ggtt_mapping(i915)) ggtt->gsm = ioremap_wc(phys_addr, size); + else + ggtt->gsm = ioremap(phys_addr, size); + if (!ggtt->gsm) { drm_err(&i915->drm, "Failed to map the ggtt page table\n"); return -ENOMEM; -- cgit v1.2.3 From 33f2af42a2019da4fecde30fe144a810b485762f Mon Sep 17 00:00:00 2001 From: Gilbert Adikankwu Date: Thu, 26 Oct 2023 11:56:23 +0100 Subject: drm/i915/gt: Remove unncessary {} from if-else Fix checkpatch.pl error: WARNING: braces {} are not necessary for any arm of this statement Signed-off-by: Gilbert Adikankwu Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20231026105623.480172-1-gilbertadikankwu@gmail.com --- drivers/gpu/drm/i915/gt/intel_sseu.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index f602895f6d0d..6a3246240e81 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -849,13 +849,12 @@ void intel_sseu_print_topology(struct drm_i915_private *i915, const struct sseu_dev_info *sseu, struct drm_printer *p) { - if (sseu->max_slices == 0) { + if (sseu->max_slices == 0) drm_printf(p, "Unavailable\n"); - } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { + else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) sseu_print_xehp_topology(sseu, p); - } else { + else sseu_print_hsw_topology(sseu, p); - } } void intel_sseu_print_ss_info(const char *type, -- cgit v1.2.3 From 36f27350ff745bd228ab04d7845dfbffc177a889 Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Fri, 27 Oct 2023 10:28:22 -0700 Subject: i915/perf: Fix NULL deref bugs with drm_dbg() calls When i915 perf interface is not available dereferencing it will lead to NULL dereferences. As returning -ENOTSUPP is pretty clear return when perf interface is not available. Fixes: 2fec539112e8 ("i915/perf: Replace DRM_DEBUG with driver specific drm_dbg call") Suggested-by: Tvrtko Ursulin Signed-off-by: Harshit Mogalapalli Reviewed-by: Tvrtko Ursulin Cc: # v6.0+ Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20231027172822.2753059-1-harshit.m.mogalapalli@oracle.com [tursulin: added stable tag] --- drivers/gpu/drm/i915/i915_perf.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 7178f298b3e6..f92c2a464ebe 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -4227,11 +4227,8 @@ int i915_perf_open_ioctl(struct drm_device *dev, void *data, u32 known_open_flags; int ret; - if (!perf->i915) { - drm_dbg(&perf->i915->drm, - "i915 perf interface not available for this system\n"); + if (!perf->i915) return -ENOTSUPP; - } known_open_flags = I915_PERF_FLAG_FD_CLOEXEC | I915_PERF_FLAG_FD_NONBLOCK | @@ -4607,11 +4604,8 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, struct i915_oa_reg *regs; int err, id; - if (!perf->i915) { - drm_dbg(&perf->i915->drm, - "i915 perf interface not available for this system\n"); + if (!perf->i915) return -ENOTSUPP; - } if (!perf->metrics_kobj) { drm_dbg(&perf->i915->drm, @@ -4773,11 +4767,8 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, struct i915_oa_config *oa_config; int ret; - if (!perf->i915) { - drm_dbg(&perf->i915->drm, - "i915 perf interface not available for this system\n"); + if (!perf->i915) return -ENOTSUPP; - } if (i915_perf_stream_paranoid && !perfmon_capable()) { drm_dbg(&perf->i915->drm, -- cgit v1.2.3 From 874d6fe4a6962cc18bb0e62dfc23adbebd0abbe2 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 23 Oct 2023 18:02:54 +0300 Subject: drm/i915/pmu: add pmu_to_i915() helper It's tedious to duplicate the container_of() everywhere. Add a helper. Signed-off-by: Jani Nikula Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20231023150256.438331-1-jani.nikula@intel.com --- drivers/gpu/drm/i915/i915_pmu.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index f861863eb7c1..52e42d7f23ec 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -31,6 +31,11 @@ static cpumask_t i915_pmu_cpumask; static unsigned int i915_pmu_target_cpu = -1; +static struct drm_i915_private *pmu_to_i915(struct i915_pmu *pmu) +{ + return container_of(pmu, struct drm_i915_private, pmu); +} + static u8 engine_config_sample(u64 config) { return config & I915_PMU_SAMPLE_MASK; @@ -141,7 +146,7 @@ static u32 frequency_enabled_mask(void) static bool pmu_needs_timer(struct i915_pmu *pmu) { - struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu); + struct drm_i915_private *i915 = pmu_to_i915(pmu); u32 enable; /* @@ -251,7 +256,7 @@ static u64 get_rc6(struct intel_gt *gt) static void init_rc6(struct i915_pmu *pmu) { - struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu); + struct drm_i915_private *i915 = pmu_to_i915(pmu); struct intel_gt *gt; unsigned int i; @@ -982,7 +987,7 @@ add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name, static struct attribute ** create_event_attributes(struct i915_pmu *pmu) { - struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu); + struct drm_i915_private *i915 = pmu_to_i915(pmu); static const struct { unsigned int counter; const char *name; -- cgit v1.2.3 From cb476dd1b8b10a40f6ba6e230f0b408916365c1f Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 23 Oct 2023 18:02:55 +0300 Subject: drm/i915/pmu: add event_to_pmu() helper It's tedious to duplicate the container_of() everywhere. Add a helper. Also do the logical steps of first getting from struct perf_event to struct i915_pmu, and then from struct i915_pmu to struct drm_i915_private if needed, instead of perf_event->i915->pmu. Not all places even need the i915 pointer. Signed-off-by: Jani Nikula Acked-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20231023150256.438331-2-jani.nikula@intel.com --- drivers/gpu/drm/i915/i915_pmu.c | 45 ++++++++++++++++++----------------------- 1 file changed, 20 insertions(+), 25 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 52e42d7f23ec..5c3c480886db 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -31,6 +31,11 @@ static cpumask_t i915_pmu_cpumask; static unsigned int i915_pmu_target_cpu = -1; +static struct i915_pmu *event_to_pmu(struct perf_event *event) +{ + return container_of(event->pmu, struct i915_pmu, base); +} + static struct drm_i915_private *pmu_to_i915(struct i915_pmu *pmu) { return container_of(pmu, struct drm_i915_private, pmu); @@ -510,8 +515,8 @@ static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer) static void i915_pmu_event_destroy(struct perf_event *event) { - struct drm_i915_private *i915 = - container_of(event->pmu, typeof(*i915), pmu.base); + struct i915_pmu *pmu = event_to_pmu(event); + struct drm_i915_private *i915 = pmu_to_i915(pmu); drm_WARN_ON(&i915->drm, event->parent); @@ -577,8 +582,8 @@ config_status(struct drm_i915_private *i915, u64 config) static int engine_event_init(struct perf_event *event) { - struct drm_i915_private *i915 = - container_of(event->pmu, typeof(*i915), pmu.base); + struct i915_pmu *pmu = event_to_pmu(event); + struct drm_i915_private *i915 = pmu_to_i915(pmu); struct intel_engine_cs *engine; engine = intel_engine_lookup_user(i915, engine_event_class(event), @@ -591,9 +596,8 @@ static int engine_event_init(struct perf_event *event) static int i915_pmu_event_init(struct perf_event *event) { - struct drm_i915_private *i915 = - container_of(event->pmu, typeof(*i915), pmu.base); - struct i915_pmu *pmu = &i915->pmu; + struct i915_pmu *pmu = event_to_pmu(event); + struct drm_i915_private *i915 = pmu_to_i915(pmu); int ret; if (pmu->closed) @@ -633,9 +637,8 @@ static int i915_pmu_event_init(struct perf_event *event) static u64 __i915_pmu_event_read(struct perf_event *event) { - struct drm_i915_private *i915 = - container_of(event->pmu, typeof(*i915), pmu.base); - struct i915_pmu *pmu = &i915->pmu; + struct i915_pmu *pmu = event_to_pmu(event); + struct drm_i915_private *i915 = pmu_to_i915(pmu); u64 val = 0; if (is_engine_event(event)) { @@ -691,10 +694,8 @@ static u64 __i915_pmu_event_read(struct perf_event *event) static void i915_pmu_event_read(struct perf_event *event) { - struct drm_i915_private *i915 = - container_of(event->pmu, typeof(*i915), pmu.base); + struct i915_pmu *pmu = event_to_pmu(event); struct hw_perf_event *hwc = &event->hw; - struct i915_pmu *pmu = &i915->pmu; u64 prev, new; if (pmu->closed) { @@ -712,10 +713,9 @@ static void i915_pmu_event_read(struct perf_event *event) static void i915_pmu_enable(struct perf_event *event) { - struct drm_i915_private *i915 = - container_of(event->pmu, typeof(*i915), pmu.base); + struct i915_pmu *pmu = event_to_pmu(event); + struct drm_i915_private *i915 = pmu_to_i915(pmu); const unsigned int bit = event_bit(event); - struct i915_pmu *pmu = &i915->pmu; unsigned long flags; if (bit == -1) @@ -776,10 +776,9 @@ update: static void i915_pmu_disable(struct perf_event *event) { - struct drm_i915_private *i915 = - container_of(event->pmu, typeof(*i915), pmu.base); + struct i915_pmu *pmu = event_to_pmu(event); + struct drm_i915_private *i915 = pmu_to_i915(pmu); const unsigned int bit = event_bit(event); - struct i915_pmu *pmu = &i915->pmu; unsigned long flags; if (bit == -1) @@ -823,9 +822,7 @@ static void i915_pmu_disable(struct perf_event *event) static void i915_pmu_event_start(struct perf_event *event, int flags) { - struct drm_i915_private *i915 = - container_of(event->pmu, typeof(*i915), pmu.base); - struct i915_pmu *pmu = &i915->pmu; + struct i915_pmu *pmu = event_to_pmu(event); if (pmu->closed) return; @@ -853,9 +850,7 @@ out: static int i915_pmu_event_add(struct perf_event *event, int flags) { - struct drm_i915_private *i915 = - container_of(event->pmu, typeof(*i915), pmu.base); - struct i915_pmu *pmu = &i915->pmu; + struct i915_pmu *pmu = event_to_pmu(event); if (pmu->closed) return -ENODEV; -- cgit v1.2.3 From 76310edddf11a5716f324785e9caad01a90e128a Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 23 Oct 2023 18:02:56 +0300 Subject: drm/i915/pmu: rearrange hrtimer pointer chasing Do the logical step of first getting from struct hrtimer to struct i915_pmu, and then from struct i915_pmu to struct drm_i915_private, instead of hrtimer->i915->pmu. Signed-off-by: Jani Nikula Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20231023150256.438331-3-jani.nikula@intel.com --- drivers/gpu/drm/i915/i915_pmu.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 5c3c480886db..878a27e1c8ef 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -478,9 +478,8 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns) static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer) { - struct drm_i915_private *i915 = - container_of(hrtimer, struct drm_i915_private, pmu.timer); - struct i915_pmu *pmu = &i915->pmu; + struct i915_pmu *pmu = container_of(hrtimer, struct i915_pmu, timer); + struct drm_i915_private *i915 = pmu_to_i915(pmu); unsigned int period_ns; struct intel_gt *gt; unsigned int i; -- cgit v1.2.3 From 8aa519f17512da50a2d850b60472de656e2b210a Mon Sep 17 00:00:00 2001 From: Dnyaneshwar Bhadane Date: Wed, 25 Oct 2023 18:47:09 +0530 Subject: drm/i915/mtl: Add Wa_22016670082 Implemented workaround for XeLPM+ BSpec: 51762 Signed-off-by: Dnyaneshwar Bhadane Reviewed-by: Balasubramani Vivekanandan Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20231025131709.3368517-1-dnyaneshwar.bhadane@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 192ac0e59afa..6ae7a4de83b0 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1674,6 +1674,9 @@ xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) */ wa_write_or(wal, XELPMP_GSC_MOD_CTRL, FORCE_MISS_FTLB); + /* Wa_22016670082 */ + wa_write_or(wal, GEN12_SQCNT1, GEN12_STRICT_RAR_ENABLE); + debug_dump_steering(gt); } -- cgit v1.2.3 From 9bb66c179f50e61df20ba13c9b34ca17d00b05fb Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Thu, 26 Oct 2023 20:36:26 +0200 Subject: drm/i915: Reserve some kernel space per vm Reserve one page in each vm for kernel space to use for things such as workarounds. v2: use real memory, do not decrease vm.total v4: reserve only one page and explain flag v5: remove allocated object on ppgtt cleanup v6: decrease vm->total by reservation size Suggested-by: Chris Wilson Signed-off-by: Andrzej Hajda Reviewed-by: Jonathan Cavitt Reviewed-by: Nirmoy Das Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20231026-wabb-v6-1-4aa7d55d0a8a@intel.com --- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 43 ++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_gtt.h | 4 ++++ 2 files changed, 47 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 9895e18df043..fa46d2308b0e 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -5,6 +5,7 @@ #include +#include "gem/i915_gem_internal.h" #include "gem/i915_gem_lmem.h" #include "gen8_ppgtt.h" @@ -222,6 +223,9 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) { struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); + if (vm->rsvd.obj) + i915_gem_object_put(vm->rsvd.obj); + if (intel_vgpu_active(vm->i915)) gen8_ppgtt_notify_vgt(ppgtt, false); @@ -950,6 +954,41 @@ err_pd: return ERR_PTR(err); } +static int gen8_init_rsvd(struct i915_address_space *vm) +{ + struct drm_i915_private *i915 = vm->i915; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int ret; + + /* The memory will be used only by GPU. */ + obj = i915_gem_object_create_lmem(i915, PAGE_SIZE, + I915_BO_ALLOC_VOLATILE | + I915_BO_ALLOC_GPU_ONLY); + if (IS_ERR(obj)) + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto unref; + } + + ret = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_HIGH); + if (ret) + goto unref; + + vm->rsvd.vma = i915_vma_make_unshrinkable(vma); + vm->rsvd.obj = obj; + vm->total -= vma->node.size; + return 0; +unref: + i915_gem_object_put(obj); + return ret; +} + /* * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers * with a net effect resembling a 2-level page table in normal x86 terms. Each @@ -1031,6 +1070,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt, if (intel_vgpu_active(gt->i915)) gen8_ppgtt_notify_vgt(ppgtt, true); + err = gen8_init_rsvd(&ppgtt->vm); + if (err) + goto err_put; + return ppgtt; err_put: diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index b471edac2699..028a5a988eea 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -249,6 +249,10 @@ struct i915_address_space { struct work_struct release_work; struct drm_mm mm; + struct { + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + } rsvd; struct intel_gt *gt; struct drm_i915_private *i915; struct device *dma; -- cgit v1.2.3 From 03fe4b87c6420fde29e3401f87fcdc271c960950 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Thu, 26 Oct 2023 20:36:27 +0200 Subject: drm/i915: Add WABB blit for Wa_16018031267 / Wa_16018063123 Apply WABB blit for Wa_16018031267 / Wa_16018063123. v3: drop unused enum definition v4: move selftest to separate patch, use wa only on BCS0. v5: fixed selftest caller to context_wabb Signed-off-by: Nirmoy Das Signed-off-by: Jonathan Cavitt Signed-off-by: Andrzej Hajda Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20231026-wabb-v6-2-4aa7d55d0a8a@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_regs.h | 3 + drivers/gpu/drm/i915/gt/intel_gt.h | 4 ++ drivers/gpu/drm/i915/gt/intel_lrc.c | 100 +++++++++++++++++++++++++++- drivers/gpu/drm/i915/gt/selftest_lrc.c | 2 +- 4 files changed, 105 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h b/drivers/gpu/drm/i915/gt/intel_engine_regs.h index fdd4ddd3a978..b8618ee3e304 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h @@ -118,6 +118,9 @@ #define CCID_EXTENDED_STATE_RESTORE BIT(2) #define CCID_EXTENDED_STATE_SAVE BIT(3) #define RING_BB_PER_CTX_PTR(base) _MMIO((base) + 0x1c0) /* gen8+ */ +#define PER_CTX_BB_FORCE BIT(2) +#define PER_CTX_BB_VALID BIT(0) + #define RING_INDIRECT_CTX(base) _MMIO((base) + 0x1c4) /* gen8+ */ #define RING_INDIRECT_CTX_OFFSET(base) _MMIO((base) + 0x1c8) /* gen8+ */ #define ECOSKPD(base) _MMIO((base) + 0x1d0) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 970bedf6b78a..9ffdb05e231e 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -82,6 +82,10 @@ struct drm_printer; ##__VA_ARGS__); \ } while (0) +#define NEEDS_FASTCOLOR_BLT_WABB(engine) ( \ + IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 55), IP_VER(12, 71)) && \ + engine->class == COPY_ENGINE_CLASS && engine->instance == 0) + static inline bool gt_is_root(struct intel_gt *gt) { return !gt->info.id; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index eaf66d903166..7c367ba8d9dc 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -828,6 +828,18 @@ lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine) return 0; } +static void +lrc_setup_bb_per_ctx(u32 *regs, + const struct intel_engine_cs *engine, + u32 ctx_bb_ggtt_addr) +{ + GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1); + regs[lrc_ring_wa_bb_per_ctx(engine) + 1] = + ctx_bb_ggtt_addr | + PER_CTX_BB_FORCE | + PER_CTX_BB_VALID; +} + static void lrc_setup_indirect_ctx(u32 *regs, const struct intel_engine_cs *engine, @@ -1020,7 +1032,13 @@ static u32 context_wa_bb_offset(const struct intel_context *ce) return PAGE_SIZE * ce->wa_bb_page; } -static u32 *context_indirect_bb(const struct intel_context *ce) +/* + * per_ctx below determines which WABB section is used. + * When true, the function returns the location of the + * PER_CTX_BB. When false, the function returns the + * location of the INDIRECT_CTX. + */ +static u32 *context_wabb(const struct intel_context *ce, bool per_ctx) { void *ptr; @@ -1029,6 +1047,7 @@ static u32 *context_indirect_bb(const struct intel_context *ce) ptr = ce->lrc_reg_state; ptr -= LRC_STATE_OFFSET; /* back to start of context image */ ptr += context_wa_bb_offset(ce); + ptr += per_ctx ? PAGE_SIZE : 0; return ptr; } @@ -1105,7 +1124,8 @@ __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine) if (GRAPHICS_VER(engine->i915) >= 12) { ce->wa_bb_page = context_size / PAGE_SIZE; - context_size += PAGE_SIZE; + /* INDIRECT_CTX and PER_CTX_BB need separate pages. */ + context_size += PAGE_SIZE * 2; } if (intel_context_is_parent(ce) && intel_engine_uses_guc(engine)) { @@ -1407,12 +1427,85 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs) return gen12_emit_aux_table_inv(ce->engine, cs); } +static u32 *xehp_emit_fastcolor_blt_wabb(const struct intel_context *ce, u32 *cs) +{ + struct intel_gt *gt = ce->engine->gt; + int mocs = gt->mocs.uc_index << 1; + + /** + * Wa_16018031267 / Wa_16018063123 requires that SW forces the + * main copy engine arbitration into round robin mode. We + * additionally need to submit the following WABB blt command + * to produce 4 subblits with each subblit generating 0 byte + * write requests as WABB: + * + * XY_FASTCOLOR_BLT + * BG0 -> 5100000E + * BG1 -> 0000003F (Dest pitch) + * BG2 -> 00000000 (X1, Y1) = (0, 0) + * BG3 -> 00040001 (X2, Y2) = (1, 4) + * BG4 -> scratch + * BG5 -> scratch + * BG6-12 -> 00000000 + * BG13 -> 20004004 (Surf. Width= 2,Surf. Height = 5 ) + * BG14 -> 00000010 (Qpitch = 4) + * BG15 -> 00000000 + */ + *cs++ = XY_FAST_COLOR_BLT_CMD | (16 - 2); + *cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, mocs) | 0x3f; + *cs++ = 0; + *cs++ = 4 << 16 | 1; + *cs++ = lower_32_bits(i915_vma_offset(ce->vm->rsvd.vma)); + *cs++ = upper_32_bits(i915_vma_offset(ce->vm->rsvd.vma)); + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0x20004004; + *cs++ = 0x10; + *cs++ = 0; + + return cs; +} + +static u32 * +xehp_emit_per_ctx_bb(const struct intel_context *ce, u32 *cs) +{ + /* Wa_16018031267, Wa_16018063123 */ + if (NEEDS_FASTCOLOR_BLT_WABB(ce->engine)) + cs = xehp_emit_fastcolor_blt_wabb(ce, cs); + + return cs; +} + +static void +setup_per_ctx_bb(const struct intel_context *ce, + const struct intel_engine_cs *engine, + u32 *(*emit)(const struct intel_context *, u32 *)) +{ + /* Place PER_CTX_BB on next page after INDIRECT_CTX */ + u32 * const start = context_wabb(ce, true); + u32 *cs; + + cs = emit(ce, start); + + /* PER_CTX_BB must manually terminate */ + *cs++ = MI_BATCH_BUFFER_END; + + GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs)); + lrc_setup_bb_per_ctx(ce->lrc_reg_state, engine, + lrc_indirect_bb(ce) + PAGE_SIZE); +} + static void setup_indirect_ctx_bb(const struct intel_context *ce, const struct intel_engine_cs *engine, u32 *(*emit)(const struct intel_context *, u32 *)) { - u32 * const start = context_indirect_bb(ce); + u32 * const start = context_wabb(ce, false); u32 *cs; cs = emit(ce, start); @@ -1511,6 +1604,7 @@ u32 lrc_update_regs(const struct intel_context *ce, /* Mutually exclusive wrt to global indirect bb */ GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size); setup_indirect_ctx_bb(ce, engine, fn); + setup_per_ctx_bb(ce, engine, xehp_emit_per_ctx_bb); } return lrc_descriptor(ce) | CTX_DESC_FORCE_RESTORE; diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 5f826b6dcf5d..823d38aa3934 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -1596,7 +1596,7 @@ emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs) static void indirect_ctx_bb_setup(struct intel_context *ce) { - u32 *cs = context_indirect_bb(ce); + u32 *cs = context_wabb(ce, false); cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d; -- cgit v1.2.3 From 3a32ef21ed5497f30f2bc99074014496748533d3 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Thu, 26 Oct 2023 20:36:28 +0200 Subject: drm/i915/gt: add selftest to exercise WABB Test re-uses logic form indirect ctx BB selftest. Signed-off-by: Nirmoy Das Signed-off-by: Jonathan Cavitt Signed-off-by: Andrzej Hajda Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20231026-wabb-v6-3-4aa7d55d0a8a@intel.com --- drivers/gpu/drm/i915/gt/selftest_lrc.c | 65 ++++++++++++++++++++++++---------- 1 file changed, 47 insertions(+), 18 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 823d38aa3934..e17b8777d21d 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -1555,7 +1555,7 @@ static int live_lrc_isolation(void *arg) return err; } -static int indirect_ctx_submit_req(struct intel_context *ce) +static int wabb_ctx_submit_req(struct intel_context *ce) { struct i915_request *rq; int err = 0; @@ -1579,7 +1579,8 @@ static int indirect_ctx_submit_req(struct intel_context *ce) #define CTX_BB_CANARY_INDEX (CTX_BB_CANARY_OFFSET / sizeof(u32)) static u32 * -emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs) +emit_wabb_ctx_canary(const struct intel_context *ce, + u32 *cs, bool per_ctx) { *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT | @@ -1587,26 +1588,43 @@ emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs) *cs++ = i915_mmio_reg_offset(RING_START(0)); *cs++ = i915_ggtt_offset(ce->state) + context_wa_bb_offset(ce) + - CTX_BB_CANARY_OFFSET; + CTX_BB_CANARY_OFFSET + + (per_ctx ? PAGE_SIZE : 0); *cs++ = 0; return cs; } +static u32 * +emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs) +{ + return emit_wabb_ctx_canary(ce, cs, false); +} + +static u32 * +emit_per_ctx_bb_canary(const struct intel_context *ce, u32 *cs) +{ + return emit_wabb_ctx_canary(ce, cs, true); +} + static void -indirect_ctx_bb_setup(struct intel_context *ce) +wabb_ctx_setup(struct intel_context *ce, bool per_ctx) { - u32 *cs = context_wabb(ce, false); + u32 *cs = context_wabb(ce, per_ctx); cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d; - setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary); + if (per_ctx) + setup_per_ctx_bb(ce, ce->engine, emit_per_ctx_bb_canary); + else + setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary); } -static bool check_ring_start(struct intel_context *ce) +static bool check_ring_start(struct intel_context *ce, bool per_ctx) { const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) - - LRC_STATE_OFFSET + context_wa_bb_offset(ce); + LRC_STATE_OFFSET + context_wa_bb_offset(ce) + + (per_ctx ? PAGE_SIZE : 0); if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START]) return true; @@ -1618,21 +1636,21 @@ static bool check_ring_start(struct intel_context *ce) return false; } -static int indirect_ctx_bb_check(struct intel_context *ce) +static int wabb_ctx_check(struct intel_context *ce, bool per_ctx) { int err; - err = indirect_ctx_submit_req(ce); + err = wabb_ctx_submit_req(ce); if (err) return err; - if (!check_ring_start(ce)) + if (!check_ring_start(ce, per_ctx)) return -EINVAL; return 0; } -static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine) +static int __lrc_wabb_ctx(struct intel_engine_cs *engine, bool per_ctx) { struct intel_context *a, *b; int err; @@ -1667,14 +1685,14 @@ static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine) * As ring start is restored apriori of starting the indirect ctx bb and * as it will be different for each context, it fits to this purpose. */ - indirect_ctx_bb_setup(a); - indirect_ctx_bb_setup(b); + wabb_ctx_setup(a, per_ctx); + wabb_ctx_setup(b, per_ctx); - err = indirect_ctx_bb_check(a); + err = wabb_ctx_check(a, per_ctx); if (err) goto unpin_b; - err = indirect_ctx_bb_check(b); + err = wabb_ctx_check(b, per_ctx); unpin_b: intel_context_unpin(b); @@ -1688,7 +1706,7 @@ put_a: return err; } -static int live_lrc_indirect_ctx_bb(void *arg) +static int lrc_wabb_ctx(void *arg, bool per_ctx) { struct intel_gt *gt = arg; struct intel_engine_cs *engine; @@ -1697,7 +1715,7 @@ static int live_lrc_indirect_ctx_bb(void *arg) for_each_engine(engine, gt, id) { intel_engine_pm_get(engine); - err = __live_lrc_indirect_ctx_bb(engine); + err = __lrc_wabb_ctx(engine, per_ctx); intel_engine_pm_put(engine); if (igt_flush_test(gt->i915)) @@ -1710,6 +1728,16 @@ static int live_lrc_indirect_ctx_bb(void *arg) return err; } +static int live_lrc_indirect_ctx_bb(void *arg) +{ + return lrc_wabb_ctx(arg, false); +} + +static int live_lrc_per_ctx_bb(void *arg) +{ + return lrc_wabb_ctx(arg, true); +} + static void garbage_reset(struct intel_engine_cs *engine, struct i915_request *rq) { @@ -1947,6 +1975,7 @@ int intel_lrc_live_selftests(struct drm_i915_private *i915) SUBTEST(live_lrc_garbage), SUBTEST(live_pphwsp_runtime), SUBTEST(live_lrc_indirect_ctx_bb), + SUBTEST(live_lrc_per_ctx_bb), }; if (!HAS_LOGICAL_RING_CONTEXTS(i915)) -- cgit v1.2.3 From 2fb771f3b840ff59e593dad9b6289276ea545698 Mon Sep 17 00:00:00 2001 From: Jonathan Cavitt Date: Thu, 26 Oct 2023 20:36:29 +0200 Subject: drm/i915: Set copy engine arbitration for Wa_16018031267 / Wa_16018063123 Set copy engine arbitration into round robin mode for part of Wa_16018031267 / Wa_16018063123 mitigation. Signed-off-by: Nirmoy Das Signed-off-by: Jonathan Cavitt Signed-off-by: Andrzej Hajda Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20231026-wabb-v6-4-4aa7d55d0a8a@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_regs.h | 3 +++ drivers/gpu/drm/i915/gt/intel_workarounds.c | 5 +++++ 2 files changed, 8 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h b/drivers/gpu/drm/i915/gt/intel_engine_regs.h index b8618ee3e304..c0c8c12edea1 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h @@ -124,6 +124,9 @@ #define RING_INDIRECT_CTX(base) _MMIO((base) + 0x1c4) /* gen8+ */ #define RING_INDIRECT_CTX_OFFSET(base) _MMIO((base) + 0x1c8) /* gen8+ */ #define ECOSKPD(base) _MMIO((base) + 0x1d0) +#define XEHP_BLITTER_SCHEDULING_MODE_MASK REG_GENMASK(12, 11) +#define XEHP_BLITTER_ROUND_ROBIN_MODE \ + REG_FIELD_PREP(XEHP_BLITTER_SCHEDULING_MODE_MASK, 1) #define ECO_CONSTANT_BUFFER_SR_DISABLE REG_BIT(4) #define ECO_GATING_CX_ONLY REG_BIT(3) #define GEN6_BLITTER_FBC_NOTIFY REG_BIT(3) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 6ae7a4de83b0..12859b8d2092 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -2785,6 +2785,11 @@ xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) RING_SEMA_WAIT_POLL(engine->mmio_base), 1); } + /* Wa_16018031267, Wa_16018063123 */ + if (NEEDS_FASTCOLOR_BLT_WABB(engine)) + wa_masked_field_set(wal, ECOSKPD(engine->mmio_base), + XEHP_BLITTER_SCHEDULING_MODE_MASK, + XEHP_BLITTER_ROUND_ROBIN_MODE); } static void -- cgit v1.2.3 From 43dea469e99b10ecc967a3576e50a5d416daf13c Mon Sep 17 00:00:00 2001 From: Dnyaneshwar Bhadane Date: Sat, 28 Oct 2023 01:20:52 +0530 Subject: drm/i915/mtl: Add Wa_14019821291 This workaround is primarily implemented by the BIOS. However if the BIOS applies the workaround it will reserve a small piece of our DSM (which should be at the top, right below the WOPCM); we just need to keep that region reserved so that nothing else attempts to re-use it. v2: Declare regs in intel_gt_regs.h (Matt Roper) v3: Shift WA implementation before calculation of *base (Matt Roper) v4: - Change condition gscpmi base to be fall in DSM range.(Matt Roper) Signed-off-by: Dnyaneshwar Bhadane Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20231027195052.3676632-1-dnyaneshwar.bhadane@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 21 +++++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_gt_regs.h | 3 +++ 2 files changed, 24 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index 1a766d8e7cce..8c88075eeab2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -386,6 +386,27 @@ static void icl_get_stolen_reserved(struct drm_i915_private *i915, drm_dbg(&i915->drm, "GEN6_STOLEN_RESERVED = 0x%016llx\n", reg_val); + /* Wa_14019821291 */ + if (MEDIA_VER_FULL(i915) == IP_VER(13, 0)) { + /* + * This workaround is primarily implemented by the BIOS. We + * just need to figure out whether the BIOS has applied the + * workaround (meaning the programmed address falls within + * the DSM) and, if so, reserve that part of the DSM to + * prevent accidental reuse. The DSM location should be just + * below the WOPCM. + */ + u64 gscpsmi_base = intel_uncore_read64_2x32(uncore, + MTL_GSCPSMI_BASEADDR_LSB, + MTL_GSCPSMI_BASEADDR_MSB); + if (gscpsmi_base >= i915->dsm.stolen.start && + gscpsmi_base < i915->dsm.stolen.end) { + *base = gscpsmi_base; + *size = i915->dsm.stolen.end - gscpsmi_base; + return; + } + } + switch (reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK) { case GEN8_STOLEN_RESERVED_1M: *size = 1024 * 1024; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index eecd0a87a647..9de41703fae5 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -537,6 +537,9 @@ #define XEHP_SQCM MCR_REG(0x8724) #define EN_32B_ACCESS REG_BIT(30) +#define MTL_GSCPSMI_BASEADDR_LSB _MMIO(0x880c) +#define MTL_GSCPSMI_BASEADDR_MSB _MMIO(0x8810) + #define HSW_IDICR _MMIO(0x9008) #define IDIHASHMSK(x) (((x) & 0x3f) << 16) -- cgit v1.2.3 From 2efb81e587961d5d863c2ad3156f96abde4d6a8f Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 31 Oct 2023 14:45:00 +0200 Subject: drm/i915: make some error capture functions static Not needed outside of i915_gpu_error.c. Signed-off-by: Jani Nikula Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20231031124502.1772160-1-jani.nikula@intel.com --- drivers/gpu/drm/i915/i915_gpu_error.c | 8 ++++---- drivers/gpu/drm/i915/i915_gpu_error.h | 5 ----- 2 files changed, 4 insertions(+), 9 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index b4e31e59c799..db7ac28c44e5 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -520,7 +520,7 @@ __find_vma(struct i915_vma_coredump *vma, const char *name) return NULL; } -struct i915_vma_coredump * +static struct i915_vma_coredump * intel_gpu_error_find_batch(const struct intel_engine_coredump *ee) { return __find_vma(ee->vma, "batch"); @@ -609,9 +609,9 @@ void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...) va_end(args); } -void intel_gpu_error_print_vma(struct drm_i915_error_state_buf *m, - const struct intel_engine_cs *engine, - const struct i915_vma_coredump *vma) +static void intel_gpu_error_print_vma(struct drm_i915_error_state_buf *m, + const struct intel_engine_cs *engine, + const struct i915_vma_coredump *vma) { char out[ASCII85_BUFSZ]; struct page *page; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index 9f5971f5e980..c982b162b7ff 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -275,11 +275,6 @@ static inline void intel_klog_error_capture(struct intel_gt *gt, __printf(2, 3) void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...); -void intel_gpu_error_print_vma(struct drm_i915_error_state_buf *m, - const struct intel_engine_cs *engine, - const struct i915_vma_coredump *vma); -struct i915_vma_coredump * -intel_gpu_error_find_batch(const struct intel_engine_coredump *ee); struct i915_gpu_coredump *i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask, u32 dump_flags); -- cgit v1.2.3 From 4fca51984371d930a5d9d5a8b0848b892dbfdecc Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 31 Oct 2023 14:45:01 +0200 Subject: drm/i915: move gpu error debugfs to i915_gpu_error.c Hide gpu error specifics in i915_gpu_error.c. This is also cleaner wrt conditional compilation, as i915_gpu_error.c is only built with DRM_I915_CAPTURE_ERROR=y. Signed-off-by: Jani Nikula Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20231031124502.1772160-2-jani.nikula@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 108 +-------------------------------- drivers/gpu/drm/i915/i915_gpu_error.c | 111 +++++++++++++++++++++++++++++++++- drivers/gpu/drm/i915/i915_gpu_error.h | 8 ++- 3 files changed, 119 insertions(+), 108 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index e9b79c2c37d8..beffac46a5e2 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -49,6 +49,7 @@ #include "i915_debugfs.h" #include "i915_debugfs_params.h" #include "i915_driver.h" +#include "i915_gpu_error.h" #include "i915_irq.h" #include "i915_reg.h" #include "i915_scheduler.h" @@ -297,107 +298,6 @@ static int i915_gem_object_info(struct seq_file *m, void *data) return 0; } -#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) -static ssize_t gpu_state_read(struct file *file, char __user *ubuf, - size_t count, loff_t *pos) -{ - struct i915_gpu_coredump *error; - ssize_t ret; - void *buf; - - error = file->private_data; - if (!error) - return 0; - - /* Bounce buffer required because of kernfs __user API convenience. */ - buf = kmalloc(count, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - ret = i915_gpu_coredump_copy_to_buffer(error, buf, *pos, count); - if (ret <= 0) - goto out; - - if (!copy_to_user(ubuf, buf, ret)) - *pos += ret; - else - ret = -EFAULT; - -out: - kfree(buf); - return ret; -} - -static int gpu_state_release(struct inode *inode, struct file *file) -{ - i915_gpu_coredump_put(file->private_data); - return 0; -} - -static int i915_gpu_info_open(struct inode *inode, struct file *file) -{ - struct drm_i915_private *i915 = inode->i_private; - struct i915_gpu_coredump *gpu; - intel_wakeref_t wakeref; - - gpu = NULL; - with_intel_runtime_pm(&i915->runtime_pm, wakeref) - gpu = i915_gpu_coredump(to_gt(i915), ALL_ENGINES, CORE_DUMP_FLAG_NONE); - - if (IS_ERR(gpu)) - return PTR_ERR(gpu); - - file->private_data = gpu; - return 0; -} - -static const struct file_operations i915_gpu_info_fops = { - .owner = THIS_MODULE, - .open = i915_gpu_info_open, - .read = gpu_state_read, - .llseek = default_llseek, - .release = gpu_state_release, -}; - -static ssize_t -i915_error_state_write(struct file *filp, - const char __user *ubuf, - size_t cnt, - loff_t *ppos) -{ - struct i915_gpu_coredump *error = filp->private_data; - - if (!error) - return 0; - - drm_dbg(&error->i915->drm, "Resetting error state\n"); - i915_reset_error_state(error->i915); - - return cnt; -} - -static int i915_error_state_open(struct inode *inode, struct file *file) -{ - struct i915_gpu_coredump *error; - - error = i915_first_error_state(inode->i_private); - if (IS_ERR(error)) - return PTR_ERR(error); - - file->private_data = error; - return 0; -} - -static const struct file_operations i915_error_state_fops = { - .owner = THIS_MODULE, - .open = i915_error_state_open, - .read = gpu_state_read, - .write = i915_error_state_write, - .llseek = default_llseek, - .release = gpu_state_release, -}; -#endif - static int i915_frequency_info(struct seq_file *m, void *unused) { struct drm_i915_private *i915 = node_to_i915(m->private); @@ -837,10 +737,6 @@ static const struct i915_debugfs_files { {"i915_perf_noa_delay", &i915_perf_noa_delay_fops}, {"i915_wedged", &i915_wedged_fops}, {"i915_gem_drop_caches", &i915_drop_caches_fops}, -#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) - {"i915_error_state", &i915_error_state_fops}, - {"i915_gpu_info", &i915_gpu_info_fops}, -#endif }; void i915_debugfs_register(struct drm_i915_private *dev_priv) @@ -863,4 +759,6 @@ void i915_debugfs_register(struct drm_i915_private *dev_priv) drm_debugfs_create_files(i915_debugfs_list, ARRAY_SIZE(i915_debugfs_list), minor->debugfs_root, minor); + + i915_gpu_error_debugfs_register(dev_priv); } diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index db7ac28c44e5..b4c8459deb7b 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -2137,7 +2137,7 @@ __i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask, u32 du return error; } -struct i915_gpu_coredump * +static struct i915_gpu_coredump * i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask, u32 dump_flags) { static DEFINE_MUTEX(capture_mutex); @@ -2375,3 +2375,112 @@ void intel_klog_error_capture(struct intel_gt *gt, drm_info(&i915->drm, "[Capture/%d.%d] Dumped %zd bytes\n", l_count, line++, pos_err); } #endif + +static ssize_t gpu_state_read(struct file *file, char __user *ubuf, + size_t count, loff_t *pos) +{ + struct i915_gpu_coredump *error; + ssize_t ret; + void *buf; + + error = file->private_data; + if (!error) + return 0; + + /* Bounce buffer required because of kernfs __user API convenience. */ + buf = kmalloc(count, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + ret = i915_gpu_coredump_copy_to_buffer(error, buf, *pos, count); + if (ret <= 0) + goto out; + + if (!copy_to_user(ubuf, buf, ret)) + *pos += ret; + else + ret = -EFAULT; + +out: + kfree(buf); + return ret; +} + +static int gpu_state_release(struct inode *inode, struct file *file) +{ + i915_gpu_coredump_put(file->private_data); + return 0; +} + +static int i915_gpu_info_open(struct inode *inode, struct file *file) +{ + struct drm_i915_private *i915 = inode->i_private; + struct i915_gpu_coredump *gpu; + intel_wakeref_t wakeref; + + gpu = NULL; + with_intel_runtime_pm(&i915->runtime_pm, wakeref) + gpu = i915_gpu_coredump(to_gt(i915), ALL_ENGINES, CORE_DUMP_FLAG_NONE); + + if (IS_ERR(gpu)) + return PTR_ERR(gpu); + + file->private_data = gpu; + return 0; +} + +static const struct file_operations i915_gpu_info_fops = { + .owner = THIS_MODULE, + .open = i915_gpu_info_open, + .read = gpu_state_read, + .llseek = default_llseek, + .release = gpu_state_release, +}; + +static ssize_t +i915_error_state_write(struct file *filp, + const char __user *ubuf, + size_t cnt, + loff_t *ppos) +{ + struct i915_gpu_coredump *error = filp->private_data; + + if (!error) + return 0; + + drm_dbg(&error->i915->drm, "Resetting error state\n"); + i915_reset_error_state(error->i915); + + return cnt; +} + +static int i915_error_state_open(struct inode *inode, struct file *file) +{ + struct i915_gpu_coredump *error; + + error = i915_first_error_state(inode->i_private); + if (IS_ERR(error)) + return PTR_ERR(error); + + file->private_data = error; + return 0; +} + +static const struct file_operations i915_error_state_fops = { + .owner = THIS_MODULE, + .open = i915_error_state_open, + .read = gpu_state_read, + .write = i915_error_state_write, + .llseek = default_llseek, + .release = gpu_state_release, +}; + +void i915_gpu_error_debugfs_register(struct drm_i915_private *i915) +{ + struct drm_minor *minor = i915->drm.primary; + + debugfs_create_file("i915_error_state", 0644, minor->debugfs_root, i915, + &i915_error_state_fops); + debugfs_create_file("i915_gpu_info", 0644, minor->debugfs_root, i915, + &i915_gpu_info_fops); +} diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index c982b162b7ff..a6f2a7518cf0 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -276,8 +276,6 @@ static inline void intel_klog_error_capture(struct intel_gt *gt, __printf(2, 3) void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...); -struct i915_gpu_coredump *i915_gpu_coredump(struct intel_gt *gt, - intel_engine_mask_t engine_mask, u32 dump_flags); void i915_capture_error_state(struct intel_gt *gt, intel_engine_mask_t engine_mask, u32 dump_flags); @@ -329,6 +327,8 @@ struct i915_gpu_coredump *i915_first_error_state(struct drm_i915_private *i915); void i915_reset_error_state(struct drm_i915_private *i915); void i915_disable_error_state(struct drm_i915_private *i915, int err); +void i915_gpu_error_debugfs_register(struct drm_i915_private *i915); + #else __printf(2, 3) @@ -411,6 +411,10 @@ static inline void i915_disable_error_state(struct drm_i915_private *i915, { } +static inline void i915_gpu_error_debugfs_register(struct drm_i915_private *i915) +{ +} + #endif /* IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) */ #endif /* _I915_GPU_ERROR_H_ */ -- cgit v1.2.3 From d581841076bc5de3c0ae72fd6bd50c59ce9f1638 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 31 Oct 2023 14:45:02 +0200 Subject: drm/i915: move gpu error sysfs to i915_gpu_error.c Hide gpu error specifics in i915_gpu_error.c. This is also cleaner wrt conditional compilation, as i915_gpu_error.c is only built with DRM_I915_CAPTURE_ERROR=y. With this, we can also make i915_first_error_state() static. Signed-off-by: Jani Nikula Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20231031124502.1772160-3-jani.nikula@intel.com --- drivers/gpu/drm/i915/i915_gpu_error.c | 75 ++++++++++++++++++++++++++++++++- drivers/gpu/drm/i915/i915_gpu_error.h | 17 ++++---- drivers/gpu/drm/i915/i915_sysfs.c | 79 +---------------------------------- 3 files changed, 86 insertions(+), 85 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index b4c8459deb7b..f9e750217f18 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -57,6 +57,7 @@ #include "i915_memcpy.h" #include "i915_reg.h" #include "i915_scatterlist.h" +#include "i915_sysfs.h" #include "i915_utils.h" #define ALLOW_FAIL (__GFP_KSWAPD_RECLAIM | __GFP_RETRY_MAYFAIL | __GFP_NOWARN) @@ -2208,7 +2209,7 @@ void i915_capture_error_state(struct intel_gt *gt, i915_gpu_coredump_put(error); } -struct i915_gpu_coredump * +static struct i915_gpu_coredump * i915_first_error_state(struct drm_i915_private *i915) { struct i915_gpu_coredump *error; @@ -2484,3 +2485,75 @@ void i915_gpu_error_debugfs_register(struct drm_i915_private *i915) debugfs_create_file("i915_gpu_info", 0644, minor->debugfs_root, i915, &i915_gpu_info_fops); } + +static ssize_t error_state_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, + loff_t off, size_t count) +{ + + struct device *kdev = kobj_to_dev(kobj); + struct drm_i915_private *i915 = kdev_minor_to_i915(kdev); + struct i915_gpu_coredump *gpu; + ssize_t ret = 0; + + /* + * FIXME: Concurrent clients triggering resets and reading + clearing + * dumps can cause inconsistent sysfs reads when a user calls in with a + * non-zero offset to complete a prior partial read but the + * gpu_coredump has been cleared or replaced. + */ + + gpu = i915_first_error_state(i915); + if (IS_ERR(gpu)) { + ret = PTR_ERR(gpu); + } else if (gpu) { + ret = i915_gpu_coredump_copy_to_buffer(gpu, buf, off, count); + i915_gpu_coredump_put(gpu); + } else { + const char *str = "No error state collected\n"; + size_t len = strlen(str); + + if (off < len) { + ret = min_t(size_t, count, len - off); + memcpy(buf, str + off, ret); + } + } + + return ret; +} + +static ssize_t error_state_write(struct file *file, struct kobject *kobj, + struct bin_attribute *attr, char *buf, + loff_t off, size_t count) +{ + struct device *kdev = kobj_to_dev(kobj); + struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + + drm_dbg(&dev_priv->drm, "Resetting error state\n"); + i915_reset_error_state(dev_priv); + + return count; +} + +static const struct bin_attribute error_state_attr = { + .attr.name = "error", + .attr.mode = S_IRUSR | S_IWUSR, + .size = 0, + .read = error_state_read, + .write = error_state_write, +}; + +void i915_gpu_error_sysfs_setup(struct drm_i915_private *i915) +{ + struct device *kdev = i915->drm.primary->kdev; + + if (sysfs_create_bin_file(&kdev->kobj, &error_state_attr)) + drm_err(&i915->drm, "error_state sysfs setup failed\n"); +} + +void i915_gpu_error_sysfs_teardown(struct drm_i915_private *i915) +{ + struct device *kdev = i915->drm.primary->kdev; + + sysfs_remove_bin_file(&kdev->kobj, &error_state_attr); +} diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index a6f2a7518cf0..68c964d6720a 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -323,11 +323,12 @@ static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu) kref_put(&gpu->ref, __i915_gpu_coredump_free); } -struct i915_gpu_coredump *i915_first_error_state(struct drm_i915_private *i915); void i915_reset_error_state(struct drm_i915_private *i915); void i915_disable_error_state(struct drm_i915_private *i915, int err); void i915_gpu_error_debugfs_register(struct drm_i915_private *i915); +void i915_gpu_error_sysfs_setup(struct drm_i915_private *i915); +void i915_gpu_error_sysfs_teardown(struct drm_i915_private *i915); #else @@ -396,12 +397,6 @@ static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu) { } -static inline struct i915_gpu_coredump * -i915_first_error_state(struct drm_i915_private *i915) -{ - return ERR_PTR(-ENODEV); -} - static inline void i915_reset_error_state(struct drm_i915_private *i915) { } @@ -415,6 +410,14 @@ static inline void i915_gpu_error_debugfs_register(struct drm_i915_private *i915 { } +static inline void i915_gpu_error_sysfs_setup(struct drm_i915_private *i915) +{ +} + +static inline void i915_gpu_error_sysfs_teardown(struct drm_i915_private *i915) +{ +} + #endif /* IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) */ #endif /* _I915_GPU_ERROR_H_ */ diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index e88bb4f04305..613decd47760 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -155,81 +155,6 @@ static const struct bin_attribute dpf_attrs_1 = { .private = (void *)1 }; -#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) - -static ssize_t error_state_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, char *buf, - loff_t off, size_t count) -{ - - struct device *kdev = kobj_to_dev(kobj); - struct drm_i915_private *i915 = kdev_minor_to_i915(kdev); - struct i915_gpu_coredump *gpu; - ssize_t ret = 0; - - /* - * FIXME: Concurrent clients triggering resets and reading + clearing - * dumps can cause inconsistent sysfs reads when a user calls in with a - * non-zero offset to complete a prior partial read but the - * gpu_coredump has been cleared or replaced. - */ - - gpu = i915_first_error_state(i915); - if (IS_ERR(gpu)) { - ret = PTR_ERR(gpu); - } else if (gpu) { - ret = i915_gpu_coredump_copy_to_buffer(gpu, buf, off, count); - i915_gpu_coredump_put(gpu); - } else { - const char *str = "No error state collected\n"; - size_t len = strlen(str); - - if (off < len) { - ret = min_t(size_t, count, len - off); - memcpy(buf, str + off, ret); - } - } - - return ret; -} - -static ssize_t error_state_write(struct file *file, struct kobject *kobj, - struct bin_attribute *attr, char *buf, - loff_t off, size_t count) -{ - struct device *kdev = kobj_to_dev(kobj); - struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - - drm_dbg(&dev_priv->drm, "Resetting error state\n"); - i915_reset_error_state(dev_priv); - - return count; -} - -static const struct bin_attribute error_state_attr = { - .attr.name = "error", - .attr.mode = S_IRUSR | S_IWUSR, - .size = 0, - .read = error_state_read, - .write = error_state_write, -}; - -static void i915_setup_error_capture(struct device *kdev) -{ - if (sysfs_create_bin_file(&kdev->kobj, &error_state_attr)) - drm_err(&kdev_minor_to_i915(kdev)->drm, - "error_state sysfs setup failed\n"); -} - -static void i915_teardown_error_capture(struct device *kdev) -{ - sysfs_remove_bin_file(&kdev->kobj, &error_state_attr); -} -#else -static void i915_setup_error_capture(struct device *kdev) {} -static void i915_teardown_error_capture(struct device *kdev) {} -#endif - void i915_setup_sysfs(struct drm_i915_private *dev_priv) { struct device *kdev = dev_priv->drm.primary->kdev; @@ -255,7 +180,7 @@ void i915_setup_sysfs(struct drm_i915_private *dev_priv) drm_warn(&dev_priv->drm, "failed to register GT sysfs directory\n"); - i915_setup_error_capture(kdev); + i915_gpu_error_sysfs_setup(dev_priv); intel_engines_add_sysfs(dev_priv); } @@ -264,7 +189,7 @@ void i915_teardown_sysfs(struct drm_i915_private *dev_priv) { struct device *kdev = dev_priv->drm.primary->kdev; - i915_teardown_error_capture(kdev); + i915_gpu_error_sysfs_teardown(dev_priv); device_remove_bin_file(kdev, &dpf_attrs_1); device_remove_bin_file(kdev, &dpf_attrs); -- cgit v1.2.3 From 5fbae6874c92eec51cdcdcb68a4bafb535c066bf Mon Sep 17 00:00:00 2001 From: Dorcas AnonoLitunya Date: Fri, 27 Oct 2023 20:47:45 +0300 Subject: drm/i915/gt: Remove prohibited space after opening parenthesis Removes space after opening parenthesis. Fixes the checkpatch.pl error: ERROR: space prohibited after that opening parenthesis '(' Signed-off-by: Dorcas AnonoLitunya Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20231027174745.4058-1-anonolitunya@gmail.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 4a11219e560e..40687806d22a 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -47,7 +47,7 @@ #define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE) #define GEN11_LR_CONTEXT_RENDER_SIZE (14 * PAGE_SIZE) -#define GEN8_LR_CONTEXT_OTHER_SIZE ( 2 * PAGE_SIZE) +#define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE) #define MAX_MMIO_BASES 3 struct engine_info { -- cgit v1.2.3 From 27b086382c22efb7e0a16442f7bdc2e120108ef3 Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Fri, 3 Nov 2023 11:09:22 +0000 Subject: drm/i915: Fix potential spectre vulnerability Fix smatch warning: drivers/gpu/drm/i915/gem/i915_gem_context.c:847 set_proto_ctx_sseu() warn: potential spectre issue 'pc->user_engines' [r] (local cap) Fixes: d4433c7600f7 ("drm/i915/gem: Use the proto-context to handle create parameters (v5)") Cc: # v5.15+ Signed-off-by: Kunwu Chan Reviewed-by: Tvrtko Ursulin Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20231103110922.430122-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 9a9ff84c90d7..e38f06a6e56e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -844,6 +844,7 @@ static int set_proto_ctx_sseu(struct drm_i915_file_private *fpriv, if (idx >= pc->num_user_engines) return -EINVAL; + idx = array_index_nospec(idx, pc->num_user_engines); pe = &pc->user_engines[idx]; /* Only render engine supports RPCS configuration. */ -- cgit v1.2.3 From 34df0a031d8f3488fe72627b041a1f82437fa6ec Mon Sep 17 00:00:00 2001 From: Jonathan Cavitt Date: Thu, 2 Nov 2023 10:58:31 -0700 Subject: drm/i915/gt: Temporarily disable CPU caching into DMA for MTL FIXME: It is suspected that some Address Translation Service (ATS) issue on IOMMU is causing CAT errors to occur on some MTL workloads. Applying a write barrier to the ppgtt set entry functions appeared to have no effect, so we must temporarily use I915_MAP_WC in the map_pt_dma class of functions on MTL until a proper ATS solution is found. Signed-off-by: Jonathan Cavitt CC: Chris Wilson Reviewed-by: Radhakrishna Sripada Acked-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20231102175831.872763-1-jonathan.cavitt@intel.com --- drivers/gpu/drm/i915/gt/intel_gtt.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index 4fbed27ef0ec..21719563a602 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -95,6 +95,16 @@ int map_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj) void *vaddr; type = intel_gt_coherent_map_type(vm->gt, obj, true); + /* + * FIXME: It is suspected that some Address Translation Service (ATS) + * issue on IOMMU is causing CAT errors to occur on some MTL workloads. + * Applying a write barrier to the ppgtt set entry functions appeared + * to have no effect, so we must temporarily use I915_MAP_WC here on + * MTL until a proper ATS solution is found. + */ + if (IS_METEORLAKE(vm->i915)) + type = I915_MAP_WC; + vaddr = i915_gem_object_pin_map_unlocked(obj, type); if (IS_ERR(vaddr)) return PTR_ERR(vaddr); @@ -109,6 +119,16 @@ int map_pt_dma_locked(struct i915_address_space *vm, struct drm_i915_gem_object void *vaddr; type = intel_gt_coherent_map_type(vm->gt, obj, true); + /* + * FIXME: It is suspected that some Address Translation Service (ATS) + * issue on IOMMU is causing CAT errors to occur on some MTL workloads. + * Applying a write barrier to the ppgtt set entry functions appeared + * to have no effect, so we must temporarily use I915_MAP_WC here on + * MTL until a proper ATS solution is found. + */ + if (IS_METEORLAKE(vm->i915)) + type = I915_MAP_WC; + vaddr = i915_gem_object_pin_map(obj, type); if (IS_ERR(vaddr)) return PTR_ERR(vaddr); -- cgit v1.2.3 From 1d9e6bc97eabac150b775d91d9a656ba24e92014 Mon Sep 17 00:00:00 2001 From: Gustavo Sousa Date: Mon, 6 Nov 2023 17:19:59 -0300 Subject: drm/i915/xelpmp: Add Wa_16021867713 This workaround applies to all steppings of Xe_LPM+. Implement the KMD part. v2: - Put the definition of VDBOX_CGCTL3F1C() in the correct sort order. (Matt) Reviewed-by: Matt Roper Signed-off-by: Gustavo Sousa Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20231106201959.156943-1-gustavo.sousa@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_regs.h | 2 ++ drivers/gpu/drm/i915/gt/intel_workarounds.c | 14 ++++++++++++++ 2 files changed, 16 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h b/drivers/gpu/drm/i915/gt/intel_engine_regs.h index c0c8c12edea1..a8eac59e3779 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h @@ -263,5 +263,7 @@ #define VDBOX_CGCTL3F18(base) _MMIO((base) + 0x3f18) #define ALNUNIT_CLKGATE_DIS REG_BIT(13) +#define VDBOX_CGCTL3F1C(base) _MMIO((base) + 0x3f1c) +#define MFXPIPE_CLKGATE_DIS REG_BIT(3) #endif /* __INTEL_ENGINE_REGS__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 12859b8d2092..63205edfea50 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1662,9 +1662,23 @@ xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) debug_dump_steering(gt); } +static void +wa_16021867713(struct intel_gt *gt, struct i915_wa_list *wal) +{ + struct intel_engine_cs *engine; + int id; + + for_each_engine(engine, gt, id) + if (engine->class == VIDEO_DECODE_CLASS) + wa_write_or(wal, VDBOX_CGCTL3F1C(engine->mmio_base), + MFXPIPE_CLKGATE_DIS); +} + static void xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { + wa_16021867713(gt, wal); + /* * Wa_14018778641 * Wa_18018781329 -- cgit v1.2.3 From bae9fca9684335478ff147413bd69c8d77b66cf9 Mon Sep 17 00:00:00 2001 From: Sam James Date: Tue, 7 Nov 2023 21:55:33 +0000 Subject: drm: i915: Adapt to -Walloc-size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GCC 14 introduces a new -Walloc-size included in -Wextra which errors out like: ``` drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c: In function ‘eb_copy_relocations’: drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c:1681:24: error: allocation of insufficient size ‘1’ for type ‘struct drm_i915_gem_relocation_entry’ with size ‘32’ [-Werror=alloc-size] 1681 | relocs = kvmalloc_array(size, 1, GFP_KERNEL); | ^ ``` So, just swap the number of members and size arguments to match the prototype, as we're initialising 1 element of size `size`. GCC then sees we're not doing anything wrong. Signed-off-by: Sam James Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20231107215538.1891359-1-sam@gentoo.org --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 683fd8d3151c..45b9d9e34b8b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -1678,7 +1678,7 @@ static int eb_copy_relocations(const struct i915_execbuffer *eb) urelocs = u64_to_user_ptr(eb->exec[i].relocs_ptr); size = nreloc * sizeof(*relocs); - relocs = kvmalloc_array(size, 1, GFP_KERNEL); + relocs = kvmalloc_array(1, size, GFP_KERNEL); if (!relocs) { err = -ENOMEM; goto err; -- cgit v1.2.3 From e4ae85e364fc652ea15d85b0f3a6da304c9b5ce7 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 7 Nov 2023 10:18:01 +0000 Subject: drm/i915: Add ability for tracking buffer objects per client In order to show per client memory usage lets add some infrastructure which enables tracking buffer objects owned by clients. We add a per client list protected by a new per client lock and to support delayed destruction (post client exit) we make tracked objects hold references to the owning client. Also, object memory region teardown is moved to the existing RCU free callback to allow safe dereference from the fdinfo RCU read section. Signed-off-by: Tvrtko Ursulin Reviewed-by: Aravind Iddamsetty Link: https://patchwork.freedesktop.org/patch/msgid/20231107101806.608990-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_object.c | 13 +++++++-- drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 12 ++++++++ drivers/gpu/drm/i915/i915_drm_client.c | 36 ++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_drm_client.h | 32 +++++++++++++++++++++ 4 files changed, 90 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index c26d87555825..25eeeb863209 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -106,6 +106,10 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, INIT_LIST_HEAD(&obj->mm.link); +#ifdef CONFIG_PROC_FS + INIT_LIST_HEAD(&obj->client_link); +#endif + INIT_LIST_HEAD(&obj->lut_list); spin_lock_init(&obj->lut_lock); @@ -293,6 +297,10 @@ void __i915_gem_free_object_rcu(struct rcu_head *head) container_of(head, typeof(*obj), rcu); struct drm_i915_private *i915 = to_i915(obj->base.dev); + /* We need to keep this alive for RCU read access from fdinfo. */ + if (obj->mm.n_placements > 1) + kfree(obj->mm.placements); + i915_gem_object_free(obj); GEM_BUG_ON(!atomic_read(&i915->mm.free_count)); @@ -389,9 +397,6 @@ void __i915_gem_free_object(struct drm_i915_gem_object *obj) if (obj->ops->release) obj->ops->release(obj); - if (obj->mm.n_placements > 1) - kfree(obj->mm.placements); - if (obj->shares_resv_from) i915_vm_resv_put(obj->shares_resv_from); @@ -442,6 +447,8 @@ static void i915_gem_free_object(struct drm_gem_object *gem_obj) GEM_BUG_ON(i915_gem_object_is_framebuffer(obj)); + i915_drm_client_remove_object(obj); + /* * Before we free the object, make sure any pure RCU-only * read-side critical sections are complete, e.g. diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 2292404007c8..0c5cdab278b6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -302,6 +302,18 @@ struct drm_i915_gem_object { */ struct i915_address_space *shares_resv_from; +#ifdef CONFIG_PROC_FS + /** + * @client: @i915_drm_client which created the object + */ + struct i915_drm_client *client; + + /** + * @client_link: Link into @i915_drm_client.objects_list + */ + struct list_head client_link; +#endif + union { struct rcu_head rcu; struct llist_node freed; diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c index 2a44b3876cb5..2e5e69edc0f9 100644 --- a/drivers/gpu/drm/i915/i915_drm_client.c +++ b/drivers/gpu/drm/i915/i915_drm_client.c @@ -28,6 +28,10 @@ struct i915_drm_client *i915_drm_client_alloc(void) kref_init(&client->kref); spin_lock_init(&client->ctx_lock); INIT_LIST_HEAD(&client->ctx_list); +#ifdef CONFIG_PROC_FS + spin_lock_init(&client->objects_lock); + INIT_LIST_HEAD(&client->objects_list); +#endif return client; } @@ -108,4 +112,36 @@ void i915_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file) for (i = 0; i < ARRAY_SIZE(uabi_class_names); i++) show_client_class(p, i915, file_priv->client, i); } + +void i915_drm_client_add_object(struct i915_drm_client *client, + struct drm_i915_gem_object *obj) +{ + unsigned long flags; + + GEM_WARN_ON(obj->client); + GEM_WARN_ON(!list_empty(&obj->client_link)); + + spin_lock_irqsave(&client->objects_lock, flags); + obj->client = i915_drm_client_get(client); + list_add_tail_rcu(&obj->client_link, &client->objects_list); + spin_unlock_irqrestore(&client->objects_lock, flags); +} + +bool i915_drm_client_remove_object(struct drm_i915_gem_object *obj) +{ + struct i915_drm_client *client = fetch_and_zero(&obj->client); + unsigned long flags; + + /* Object may not be associated with a client. */ + if (!client) + return false; + + spin_lock_irqsave(&client->objects_lock, flags); + list_del_rcu(&obj->client_link); + spin_unlock_irqrestore(&client->objects_lock, flags); + + i915_drm_client_put(client); + + return true; +} #endif diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h index 67816c912bca..5f58fdf7dcb8 100644 --- a/drivers/gpu/drm/i915/i915_drm_client.h +++ b/drivers/gpu/drm/i915/i915_drm_client.h @@ -12,6 +12,9 @@ #include +#include "i915_file_private.h" +#include "gem/i915_gem_object_types.h" + #define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_COMPUTE struct drm_file; @@ -25,6 +28,20 @@ struct i915_drm_client { spinlock_t ctx_lock; /* For add/remove from ctx_list. */ struct list_head ctx_list; /* List of contexts belonging to client. */ +#ifdef CONFIG_PROC_FS + /** + * @objects_lock: lock protecting @objects_list + */ + spinlock_t objects_lock; + + /** + * @objects_list: list of objects created by this client + * + * Protected by @objects_lock. + */ + struct list_head objects_list; +#endif + /** * @past_runtime: Accumulation of pphwsp runtimes from closed contexts. */ @@ -49,4 +66,19 @@ struct i915_drm_client *i915_drm_client_alloc(void); void i915_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file); +#ifdef CONFIG_PROC_FS +void i915_drm_client_add_object(struct i915_drm_client *client, + struct drm_i915_gem_object *obj); +bool i915_drm_client_remove_object(struct drm_i915_gem_object *obj); +#else +static inline void i915_drm_client_add_object(struct i915_drm_client *client, + struct drm_i915_gem_object *obj) +{ +} + +static inline bool i915_drm_client_remove_object(struct drm_i915_gem_object *obj) +{ +} +#endif + #endif /* !__I915_DRM_CLIENT_H__ */ -- cgit v1.2.3 From ca02a0119f814b792484cba0c148fba292327ed6 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 7 Nov 2023 10:18:02 +0000 Subject: drm/i915: Record which client owns a VM To enable accounting of indirect client memory usage (such as page tables) in the following patch, lets start recording the creator of each PPGTT. Signed-off-by: Tvrtko Ursulin Reviewed-by: Aravind Iddamsetty Link: https://patchwork.freedesktop.org/patch/msgid/20231107101806.608990-2-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 11 ++++++++--- drivers/gpu/drm/i915/gem/i915_gem_context_types.h | 3 +++ drivers/gpu/drm/i915/gem/selftests/mock_context.c | 4 ++-- drivers/gpu/drm/i915/gt/intel_gtt.h | 1 + 4 files changed, 14 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index e38f06a6e56e..dcbfe32fd30c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -279,7 +279,8 @@ static int proto_context_set_protected(struct drm_i915_private *i915, } static struct i915_gem_proto_context * -proto_context_create(struct drm_i915_private *i915, unsigned int flags) +proto_context_create(struct drm_i915_file_private *fpriv, + struct drm_i915_private *i915, unsigned int flags) { struct i915_gem_proto_context *pc, *err; @@ -287,6 +288,7 @@ proto_context_create(struct drm_i915_private *i915, unsigned int flags) if (!pc) return ERR_PTR(-ENOMEM); + pc->fpriv = fpriv; pc->num_user_engines = -1; pc->user_engines = NULL; pc->user_flags = BIT(UCONTEXT_BANNABLE) | @@ -1622,6 +1624,7 @@ i915_gem_create_context(struct drm_i915_private *i915, err = PTR_ERR(ppgtt); goto err_ctx; } + ppgtt->vm.fpriv = pc->fpriv; vm = &ppgtt->vm; } if (vm) @@ -1741,7 +1744,7 @@ int i915_gem_context_open(struct drm_i915_private *i915, /* 0 reserved for invalid/unassigned ppgtt */ xa_init_flags(&file_priv->vm_xa, XA_FLAGS_ALLOC1); - pc = proto_context_create(i915, 0); + pc = proto_context_create(file_priv, i915, 0); if (IS_ERR(pc)) { err = PTR_ERR(pc); goto err; @@ -1823,6 +1826,7 @@ int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data, GEM_BUG_ON(id == 0); /* reserved for invalid/unassigned ppgtt */ args->vm_id = id; + ppgtt->vm.fpriv = file_priv; return 0; err_put: @@ -2285,7 +2289,8 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, return -EIO; } - ext_data.pc = proto_context_create(i915, args->flags); + ext_data.pc = proto_context_create(file->driver_priv, i915, + args->flags); if (IS_ERR(ext_data.pc)) return PTR_ERR(ext_data.pc); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index cb78214a7dcd..c573c067779f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -188,6 +188,9 @@ struct i915_gem_proto_engine { * CONTEXT_CREATE_SET_PARAM during GEM_CONTEXT_CREATE. */ struct i915_gem_proto_context { + /** @fpriv: Client which creates the context */ + struct drm_i915_file_private *fpriv; + /** @vm: See &i915_gem_context.vm */ struct i915_address_space *vm; diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c index e199d7dbb876..2b0327cc47c2 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c @@ -83,7 +83,7 @@ live_context(struct drm_i915_private *i915, struct file *file) int err; u32 id; - pc = proto_context_create(i915, 0); + pc = proto_context_create(fpriv, i915, 0); if (IS_ERR(pc)) return ERR_CAST(pc); @@ -152,7 +152,7 @@ kernel_context(struct drm_i915_private *i915, struct i915_gem_context *ctx; struct i915_gem_proto_context *pc; - pc = proto_context_create(i915, 0); + pc = proto_context_create(NULL, i915, 0); if (IS_ERR(pc)) return ERR_CAST(pc); diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index 028a5a988eea..6b85222ee3ea 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -255,6 +255,7 @@ struct i915_address_space { } rsvd; struct intel_gt *gt; struct drm_i915_private *i915; + struct drm_i915_file_private *fpriv; struct device *dma; u64 total; /* size addr space maps (ex. 2GB for ggtt) */ u64 reserved; /* size addr space reserved */ -- cgit v1.2.3 From 978e1a52ca1f0228eccc51ad5ed3a118bac1ad1c Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 7 Nov 2023 10:18:03 +0000 Subject: drm/i915: Track page table backing store usage Account page table backing store against the owning client memory usage stats. Signed-off-by: Tvrtko Ursulin Reviewed-by: Aravind Iddamsetty Link: https://patchwork.freedesktop.org/patch/msgid/20231107101806.608990-3-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_gtt.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index 21719563a602..86f73fe558ca 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -63,6 +63,9 @@ struct drm_i915_gem_object *alloc_pt_lmem(struct i915_address_space *vm, int sz) if (!IS_ERR(obj)) { obj->base.resv = i915_vm_resv_get(vm); obj->shares_resv_from = vm; + + if (vm->fpriv) + i915_drm_client_add_object(vm->fpriv->client, obj); } return obj; @@ -84,6 +87,9 @@ struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz) if (!IS_ERR(obj)) { obj->base.resv = i915_vm_resv_get(vm); obj->shares_resv_from = vm; + + if (vm->fpriv) + i915_drm_client_add_object(vm->fpriv->client, obj); } return obj; -- cgit v1.2.3 From dc1a2775070f0618b661500310b2ea8643592ed1 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 7 Nov 2023 10:18:04 +0000 Subject: drm/i915: Account ring buffer and context state storage Account ring buffers and logical context space against the owning client memory usage stats. Signed-off-by: Tvrtko Ursulin Reviewed-by: Aravind Iddamsetty Link: https://patchwork.freedesktop.org/patch/msgid/20231107101806.608990-4-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_context.c | 14 ++++++++++++++ drivers/gpu/drm/i915/i915_drm_client.c | 10 ++++++++++ drivers/gpu/drm/i915/i915_drm_client.h | 9 +++++++++ 3 files changed, 33 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index a53b26178f0a..a2f1245741bb 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -6,6 +6,7 @@ #include "gem/i915_gem_context.h" #include "gem/i915_gem_pm.h" +#include "i915_drm_client.h" #include "i915_drv.h" #include "i915_trace.h" @@ -50,6 +51,7 @@ intel_context_create(struct intel_engine_cs *engine) int intel_context_alloc_state(struct intel_context *ce) { + struct i915_gem_context *ctx; int err = 0; if (mutex_lock_interruptible(&ce->pin_mutex)) @@ -66,6 +68,18 @@ int intel_context_alloc_state(struct intel_context *ce) goto unlock; set_bit(CONTEXT_ALLOC_BIT, &ce->flags); + + rcu_read_lock(); + ctx = rcu_dereference(ce->gem_context); + if (ctx && !kref_get_unless_zero(&ctx->ref)) + ctx = NULL; + rcu_read_unlock(); + if (ctx) { + if (ctx->client) + i915_drm_client_add_context_objects(ctx->client, + ce); + i915_gem_context_put(ctx); + } } unlock: diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c index 2e5e69edc0f9..a61356012df8 100644 --- a/drivers/gpu/drm/i915/i915_drm_client.c +++ b/drivers/gpu/drm/i915/i915_drm_client.c @@ -144,4 +144,14 @@ bool i915_drm_client_remove_object(struct drm_i915_gem_object *obj) return true; } + +void i915_drm_client_add_context_objects(struct i915_drm_client *client, + struct intel_context *ce) +{ + if (ce->state) + i915_drm_client_add_object(client, ce->state->obj); + + if (ce->ring != ce->engine->legacy.ring && ce->ring->vma) + i915_drm_client_add_object(client, ce->ring->vma->obj); +} #endif diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h index 5f58fdf7dcb8..69cedfcd3d69 100644 --- a/drivers/gpu/drm/i915/i915_drm_client.h +++ b/drivers/gpu/drm/i915/i915_drm_client.h @@ -14,6 +14,7 @@ #include "i915_file_private.h" #include "gem/i915_gem_object_types.h" +#include "gt/intel_context_types.h" #define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_COMPUTE @@ -70,6 +71,8 @@ void i915_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file); void i915_drm_client_add_object(struct i915_drm_client *client, struct drm_i915_gem_object *obj); bool i915_drm_client_remove_object(struct drm_i915_gem_object *obj); +void i915_drm_client_add_context_objects(struct i915_drm_client *client, + struct intel_context *ce); #else static inline void i915_drm_client_add_object(struct i915_drm_client *client, struct drm_i915_gem_object *obj) @@ -79,6 +82,12 @@ static inline void i915_drm_client_add_object(struct i915_drm_client *client, static inline bool i915_drm_client_remove_object(struct drm_i915_gem_object *obj) { } + +static inline void +i915_drm_client_add_context_objects(struct i915_drm_client *client, + struct intel_context *ce) +{ +} #endif #endif /* !__I915_DRM_CLIENT_H__ */ -- cgit v1.2.3 From 3b38d35157530c12c84fc02cccd469b9a0a00ae7 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 7 Nov 2023 10:18:05 +0000 Subject: drm/i915: Add stable memory region names At the moment memory region names are a bit too varied and too inconsistent to be used for ABI purposes, like for upcoming fdinfo memory stats. System memory can be either system or system-ttm. Local memory has the instance number appended, others do not. Not only incosistent but thi kind of implementation detail is uninteresting for intended users of fdinfo memory stats. Add a stable name always formed as $type$instance. Could have chosen a different stable scheme, but I think any consistent and stable scheme should do just fine. Signed-off-by: Tvrtko Ursulin Reviewed-by: Aravind Iddamsetty Link: https://patchwork.freedesktop.org/patch/msgid/20231107101806.608990-5-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/intel_memory_region.c | 19 +++++++++++++++++++ drivers/gpu/drm/i915/intel_memory_region.h | 1 + 2 files changed, 20 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c index 3d1fdea9811d..60a03340bbd4 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.c +++ b/drivers/gpu/drm/i915/intel_memory_region.c @@ -216,6 +216,22 @@ static int intel_memory_region_memtest(struct intel_memory_region *mem, return err; } +static const char *region_type_str(u16 type) +{ + switch (type) { + case INTEL_MEMORY_SYSTEM: + return "system"; + case INTEL_MEMORY_LOCAL: + return "local"; + case INTEL_MEMORY_STOLEN_LOCAL: + return "stolen-local"; + case INTEL_MEMORY_STOLEN_SYSTEM: + return "stolen-system"; + default: + return "unknown"; + } +} + struct intel_memory_region * intel_memory_region_create(struct drm_i915_private *i915, resource_size_t start, @@ -244,6 +260,9 @@ intel_memory_region_create(struct drm_i915_private *i915, mem->type = type; mem->instance = instance; + snprintf(mem->uabi_name, sizeof(mem->uabi_name), "%s%u", + region_type_str(type), instance); + mutex_init(&mem->objects.lock); INIT_LIST_HEAD(&mem->objects.list); diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h index 2953ed5c3248..9ba36454e51b 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.h +++ b/drivers/gpu/drm/i915/intel_memory_region.h @@ -80,6 +80,7 @@ struct intel_memory_region { u16 instance; enum intel_region_id id; char name[16]; + char uabi_name[16]; bool private; /* not for userspace */ struct { -- cgit v1.2.3 From 968853033d8aa4dbb80fbafa6f5d9b6a0ea21272 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 7 Nov 2023 10:18:06 +0000 Subject: drm/i915: Implement fdinfo memory stats printing Use the newly added drm_print_memory_stats helper to show memory utilisation of our objects in drm/driver specific fdinfo output. To collect the stats we walk the per memory regions object lists and accumulate object size into the respective drm_memory_stats categories. v2: * Only account against the active region. * Use DMA_RESV_USAGE_BOOKKEEP when testing for active. (Tejas) v3: * Update commit text. (Aravind) * Update to use memory regions uabi names. Signed-off-by: Tvrtko Ursulin Cc: Aravind Iddamsetty Cc: Rob Clark Cc: Andi Shyti Cc: Tejas Upadhyay Reviewed-by: Andi Shyti Reviewed-by: Aravind Iddamsetty Link: https://patchwork.freedesktop.org/patch/msgid/20231107101806.608990-6-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_drm_client.c | 64 ++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c index a61356012df8..7efffdaa508d 100644 --- a/drivers/gpu/drm/i915/i915_drm_client.c +++ b/drivers/gpu/drm/i915/i915_drm_client.c @@ -45,6 +45,68 @@ void __i915_drm_client_free(struct kref *kref) } #ifdef CONFIG_PROC_FS +static void +obj_meminfo(struct drm_i915_gem_object *obj, + struct drm_memory_stats stats[INTEL_REGION_UNKNOWN]) +{ + const enum intel_region_id id = obj->mm.region ? + obj->mm.region->id : INTEL_REGION_SMEM; + const u64 sz = obj->base.size; + + if (obj->base.handle_count > 1) + stats[id].shared += sz; + else + stats[id].private += sz; + + if (i915_gem_object_has_pages(obj)) { + stats[id].resident += sz; + + if (!dma_resv_test_signaled(obj->base.resv, + DMA_RESV_USAGE_BOOKKEEP)) + stats[id].active += sz; + else if (i915_gem_object_is_shrinkable(obj) && + obj->mm.madv == I915_MADV_DONTNEED) + stats[id].purgeable += sz; + } +} + +static void show_meminfo(struct drm_printer *p, struct drm_file *file) +{ + struct drm_memory_stats stats[INTEL_REGION_UNKNOWN] = {}; + struct drm_i915_file_private *fpriv = file->driver_priv; + struct i915_drm_client *client = fpriv->client; + struct drm_i915_private *i915 = fpriv->i915; + struct drm_i915_gem_object *obj; + struct intel_memory_region *mr; + struct list_head *pos; + unsigned int id; + + /* Public objects. */ + spin_lock(&file->table_lock); + idr_for_each_entry(&file->object_idr, obj, id) + obj_meminfo(obj, stats); + spin_unlock(&file->table_lock); + + /* Internal objects. */ + rcu_read_lock(); + list_for_each_rcu(pos, &client->objects_list) { + obj = i915_gem_object_get_rcu(list_entry(pos, typeof(*obj), + client_link)); + if (!obj) + continue; + obj_meminfo(obj, stats); + i915_gem_object_put(obj); + } + rcu_read_unlock(); + + for_each_memory_region(mr, i915, id) + drm_print_memory_stats(p, + &stats[id], + DRM_GEM_OBJECT_RESIDENT | + DRM_GEM_OBJECT_PURGEABLE, + mr->uabi_name); +} + static const char * const uabi_class_names[] = { [I915_ENGINE_CLASS_RENDER] = "render", [I915_ENGINE_CLASS_COPY] = "copy", @@ -106,6 +168,8 @@ void i915_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file) * ****************************************************************** */ + show_meminfo(p, file); + if (GRAPHICS_VER(i915) < 8) return; -- cgit v1.2.3 From d7b4832cbeb85075293b1211a9c89fad4fdda1f1 Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Thu, 9 Nov 2023 11:21:48 -0800 Subject: drm/i915: Read a shadowed mmio register for ggtt flush We read RENDER_HEAD as a part of the flush. If GT is in deeper sleep states, this could lead to read errors since we are not using a forcewake. Safer to read a shadowed register instead. Cc: John Harrison Cc: Daniele Ceraolo Spurio Signed-off-by: Vinay Belgaumkar Reviewed-by: Radhakrishna Sripada Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20231109192148.475156-1-vinay.belgaumkar@intel.com --- drivers/gpu/drm/i915/gt/intel_gt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index ed32bf5b1546..ea814ea5f700 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -451,7 +451,7 @@ void intel_gt_flush_ggtt_writes(struct intel_gt *gt) spin_lock_irqsave(&uncore->lock, flags); intel_uncore_posting_read_fw(uncore, - RING_HEAD(RENDER_RING_BASE)); + RING_TAIL(RENDER_RING_BASE)); spin_unlock_irqrestore(&uncore->lock, flags); } } -- cgit v1.2.3 From 57bdac8ee2998d6bba091326e16967b4e5f74ae8 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Wed, 15 Nov 2023 13:10:33 +0100 Subject: drm/i915/gt: add missing new-line to GT_TRACE Trace requires new-line at the end of message (in opposition to printk), otherwise trace dump becomes messy. Signed-off-by: Andrzej Hajda Acked-by: Janusz Krzysztofik Reviewed-by: Nirmoy Das Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20231115-eols-v1-1-d47a2f52b807@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index f5899d503e23..471b7cdc10ba 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -167,7 +167,7 @@ static void gt_sanitize(struct intel_gt *gt, bool force) enum intel_engine_id id; intel_wakeref_t wakeref; - GT_TRACE(gt, "force:%s", str_yes_no(force)); + GT_TRACE(gt, "force:%s\n", str_yes_no(force)); /* Use a raw wakeref to avoid calling intel_display_power_get early */ wakeref = intel_runtime_pm_get(gt->uncore->rpm); -- cgit v1.2.3 From e899505533852bf1da133f2f4c9a9655ff77f7e5 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Wed, 15 Nov 2023 11:54:03 +0100 Subject: drm/i915: do not clean GT table on error path The only task of intel_gt_release_all is to zero gt table. Calling it on error path prevents intel_gt_driver_late_release_all (called from i915_driver_late_release) to cleanup GTs, causing leakage. After i915_driver_late_release GT array is not used anymore so it does not need cleaning at all. Sample leak report: BUG i915_request (...): Objects remaining in i915_request on __kmem_cache_shutdown() ... Object 0xffff888113420040 @offset=64 Allocated in __i915_request_create+0x75/0x610 [i915] age=18339 cpu=1 pid=1454 kmem_cache_alloc+0x25b/0x270 __i915_request_create+0x75/0x610 [i915] i915_request_create+0x109/0x290 [i915] __engines_record_defaults+0xca/0x440 [i915] intel_gt_init+0x275/0x430 [i915] i915_gem_init+0x135/0x2c0 [i915] i915_driver_probe+0x8d1/0xdc0 [i915] v2: removed whole intel_gt_release_all Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/8489 Fixes: bec68cc9ea42 ("drm/i915: Prepare for multiple GTs") Signed-off-by: Andrzej Hajda Reviewed-by: Tvrtko Ursulin Reviewed-by: Nirmoy Das Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20231115-dont_clean_gt_on_error_path-v2-1-54250125470a@intel.com --- drivers/gpu/drm/i915/gt/intel_gt.c | 11 ----------- drivers/gpu/drm/i915/i915_driver.c | 4 +--- 2 files changed, 1 insertion(+), 14 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index ea814ea5f700..a425db5ed3a2 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -982,8 +982,6 @@ int intel_gt_probe_all(struct drm_i915_private *i915) err: i915_probe_error(i915, "Failed to initialize %s! (%d)\n", gtdef->name, ret); - intel_gt_release_all(i915); - return ret; } @@ -1002,15 +1000,6 @@ int intel_gt_tiles_init(struct drm_i915_private *i915) return 0; } -void intel_gt_release_all(struct drm_i915_private *i915) -{ - struct intel_gt *gt; - unsigned int id; - - for_each_gt(gt, i915, id) - i915->gt[id] = NULL; -} - void intel_gt_info_print(const struct intel_gt_info *info, struct drm_printer *p) { diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 8a0e2c745e1f..802de2c6decb 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -782,7 +782,7 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent) ret = i915_driver_mmio_probe(i915); if (ret < 0) - goto out_tiles_cleanup; + goto out_runtime_pm_put; ret = i915_driver_hw_probe(i915); if (ret < 0) @@ -842,8 +842,6 @@ out_cleanup_hw: i915_ggtt_driver_late_release(i915); out_cleanup_mmio: i915_driver_mmio_release(i915); -out_tiles_cleanup: - intel_gt_release_all(i915); out_runtime_pm_put: enable_rpm_wakeref_asserts(&i915->runtime_pm); i915_driver_late_release(i915); -- cgit v1.2.3 From d3715a6471c8f0a90fb852c10a5a84948d6a1ff5 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Thu, 9 Nov 2023 15:54:36 -0800 Subject: drm/i915/huc: Stop printing about unsupported HuC on MTL On MTL, the HuC is only supported on the media GT, so our validation check on the module parameter detects an inconsistency on the root GT (the modparams asks to enable HuC, but the support is not there) and prints the following info message: [drm] GT0: Incompatible option enable_guc=3 - HuC is not supported! This can be confusing to the user and make them think that something is wrong when it isn't, so we need to silence it. Given that any platform that supports HuC also supports GuC, if a user tries to enable HuC on a platform that really doesn't support it they'll already see a message about GuC not being supported, so instead of just silencing the HuC message on newer platforms we can just get rid of it entirely. Signed-off-by: Daniele Ceraolo Spurio Cc: John Harrison Reviewed-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20231109235436.2349963-1-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 27f6561dd731..3872d309ed31 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -106,11 +106,6 @@ static void __confirm_options(struct intel_uc *uc) gt_info(gt, "Incompatible option enable_guc=%d - %s\n", i915->params.enable_guc, "GuC is not supported!"); - if (i915->params.enable_guc & ENABLE_GUC_LOAD_HUC && - !intel_uc_supports_huc(uc)) - gt_info(gt, "Incompatible option enable_guc=%d - %s\n", - i915->params.enable_guc, "HuC is not supported!"); - if (i915->params.enable_guc & ENABLE_GUC_SUBMISSION && !intel_uc_supports_guc_submission(uc)) gt_info(gt, "Incompatible option enable_guc=%d - %s\n", -- cgit v1.2.3 From c8031019dc95e3ab7cc0b09f1894c5f52dc0c187 Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Tue, 10 Oct 2023 07:35:06 -0700 Subject: drm/amdgpu: Implement a new 64bit sequence memory driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Developed a new driver which allocates a 64bit memory on each request in sequence order. At the moment, user queue fence memory is the main consumer of this seq64 driver. v2: Worked on review comments from Christian for the following modifications - Move driver name from "semaphore" to "seq64" - Remove unnecessary PT/PD mapping - Move enable_mes check into init/fini functions. v3: Worked on review comments from Christian - drop enable_mes check - use DECLARE_BITMAP for bit array - added kerneldoc for seq64 v4: Worked on review comments from Christian - Rename amdgpu_seq64_get name with amdgpu_seq64_alloc v5: Worked on review comments from Christian - Fix seq64 lockdep warning - move fpriv->seq64_va check into amdgpu_seq64_unmap() - make the function amdgpu_seq64_unmap() return as void. - reserve the buffers as not interruptible. v6: port to drm_exec (Alex) v7: disable for now (Arun) Signed-off-by: Arunpravin Paneer Selvam Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Makefile | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 5 + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 7 + drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c | 247 +++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h | 49 ++++++ 6 files changed, 311 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 2afecc55090f..260e32ef7bae 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -80,7 +80,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \ amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \ amdgpu_fw_attestation.o amdgpu_securedisplay.o \ amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \ - amdgpu_ring_mux.o amdgpu_xcp.o + amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 9d92ca157677..4d100f8d75bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -109,6 +109,7 @@ #include "amdgpu_mca.h" #include "amdgpu_ras.h" #include "amdgpu_xcp.h" +#include "amdgpu_seq64.h" #define MAX_GPU_INSTANCE 64 @@ -468,6 +469,7 @@ struct amdgpu_fpriv { struct amdgpu_vm vm; struct amdgpu_bo_va *prt_va; struct amdgpu_bo_va *csa_va; + struct amdgpu_bo_va *seq64_va; struct mutex bo_list_lock; struct idr bo_list_handles; struct amdgpu_ctx_mgr ctx_mgr; @@ -986,6 +988,9 @@ struct amdgpu_device { /* GDS */ struct amdgpu_gds gds; + /* for userq and VM fences */ + struct amdgpu_seq64 seq64; + /* KFD */ struct amdgpu_kfd_dev kfd; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 7eeaf0aa7f81..151f8b950b72 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2676,6 +2676,12 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) goto init_failed; } } + + r = amdgpu_seq64_init(adev); + if (r) { + DRM_ERROR("allocate seq64 failed %d\n", r); + goto init_failed; + } } } @@ -3138,6 +3144,7 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) amdgpu_device_wb_fini(adev); amdgpu_device_mem_scratch_fini(adev); amdgpu_ib_pool_fini(adev); + amdgpu_seq64_fini(adev); } r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 583cf03950cd..b5ebafd4a3ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -1428,6 +1428,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, fpriv->csa_va = NULL; } + amdgpu_seq64_unmap(adev, fpriv); + pasid = fpriv->vm.pasid; pd = amdgpu_bo_ref(fpriv->vm.root.bo); if (!WARN_ON(amdgpu_bo_reserve(pd, true))) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c new file mode 100644 index 000000000000..f3de02193138 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c @@ -0,0 +1,247 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "amdgpu_seq64.h" + +#include + +/** + * DOC: amdgpu_seq64 + * + * amdgpu_seq64 allocates a 64bit memory on each request in sequence order. + * seq64 driver is required for user queue fence memory allocation, TLB + * counters and VM updates. It has maximum count of 32768 64 bit slots. + */ + +/** + * amdgpu_seq64_map - Map the seq64 memory to VM + * + * @adev: amdgpu_device pointer + * @vm: vm pointer + * @bo_va: bo_va pointer + * @seq64_addr: seq64 vaddr start address + * @size: seq64 pool size + * + * Map the seq64 memory to the given VM. + * + * Returns: + * 0 on success or a negative error code on failure + */ +int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct amdgpu_bo_va **bo_va, u64 seq64_addr, + uint32_t size) +{ + struct amdgpu_bo *bo; + struct drm_exec exec; + int r; + + bo = adev->seq64.sbo; + if (!bo) + return -EINVAL; + + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_until_all_locked(&exec) { + r = amdgpu_vm_lock_pd(vm, &exec, 0); + if (likely(!r)) + r = drm_exec_lock_obj(&exec, &bo->tbo.base); + drm_exec_retry_on_contention(&exec); + if (unlikely(r)) + goto error; + } + + *bo_va = amdgpu_vm_bo_add(adev, vm, bo); + if (!*bo_va) { + r = -ENOMEM; + goto error; + } + + r = amdgpu_vm_bo_map(adev, *bo_va, seq64_addr, 0, size, + AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | + AMDGPU_PTE_EXECUTABLE); + if (r) { + DRM_ERROR("failed to do bo_map on userq sem, err=%d\n", r); + amdgpu_vm_bo_del(adev, *bo_va); + goto error; + } + + r = amdgpu_vm_bo_update(adev, *bo_va, false); + if (r) { + DRM_ERROR("failed to do vm_bo_update on userq sem\n"); + amdgpu_vm_bo_del(adev, *bo_va); + goto error; + } + +error: + drm_exec_fini(&exec); + return r; +} + +/** + * amdgpu_seq64_unmap - Unmap the seq64 memory + * + * @adev: amdgpu_device pointer + * @fpriv: DRM file private + * + * Unmap the seq64 memory from the given VM. + */ +void amdgpu_seq64_unmap(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv) +{ + struct amdgpu_vm *vm; + struct amdgpu_bo *bo; + struct drm_exec exec; + int r; + + if (!fpriv->seq64_va) + return; + + bo = adev->seq64.sbo; + if (!bo) + return; + + vm = &fpriv->vm; + + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_until_all_locked(&exec) { + r = amdgpu_vm_lock_pd(vm, &exec, 0); + if (likely(!r)) + r = drm_exec_lock_obj(&exec, &bo->tbo.base); + drm_exec_retry_on_contention(&exec); + if (unlikely(r)) + goto error; + } + + amdgpu_vm_bo_del(adev, fpriv->seq64_va); + + fpriv->seq64_va = NULL; + +error: + drm_exec_fini(&exec); +} + +/** + * amdgpu_seq64_alloc - Allocate a 64 bit memory + * + * @adev: amdgpu_device pointer + * @gpu_addr: allocated gpu VA start address + * @cpu_addr: allocated cpu VA start address + * + * Alloc a 64 bit memory from seq64 pool. + * + * Returns: + * 0 on success or a negative error code on failure + */ +int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *gpu_addr, + u64 **cpu_addr) +{ + unsigned long bit_pos; + u32 offset; + + bit_pos = find_first_zero_bit(adev->seq64.used, adev->seq64.num_sem); + + if (bit_pos < adev->seq64.num_sem) { + __set_bit(bit_pos, adev->seq64.used); + offset = bit_pos << 6; /* convert to qw offset */ + } else { + return -EINVAL; + } + + *gpu_addr = offset + AMDGPU_SEQ64_VADDR_START; + *cpu_addr = offset + adev->seq64.cpu_base_addr; + + return 0; +} + +/** + * amdgpu_seq64_free - Free the given 64 bit memory + * + * @adev: amdgpu_device pointer + * @gpu_addr: gpu start address to be freed + * + * Free the given 64 bit memory from seq64 pool. + * + */ +void amdgpu_seq64_free(struct amdgpu_device *adev, u64 gpu_addr) +{ + u32 offset; + + offset = gpu_addr - AMDGPU_SEQ64_VADDR_START; + + offset >>= 6; + if (offset < adev->seq64.num_sem) + __clear_bit(offset, adev->seq64.used); +} + +/** + * amdgpu_seq64_fini - Cleanup seq64 driver + * + * @adev: amdgpu_device pointer + * + * Free the memory space allocated for seq64. + * + */ +void amdgpu_seq64_fini(struct amdgpu_device *adev) +{ + amdgpu_bo_free_kernel(&adev->seq64.sbo, + NULL, + (void **)&adev->seq64.cpu_base_addr); +} + +/** + * amdgpu_seq64_init - Initialize seq64 driver + * + * @adev: amdgpu_device pointer + * + * Allocate the required memory space for seq64. + * + * Returns: + * 0 on success or a negative error code on failure + */ +int amdgpu_seq64_init(struct amdgpu_device *adev) +{ + int r; + + if (adev->seq64.sbo) + return 0; + + /* + * AMDGPU_MAX_SEQ64_SLOTS * sizeof(u64) * 8 = AMDGPU_MAX_SEQ64_SLOTS + * 64bit slots + */ + r = amdgpu_bo_create_kernel(adev, AMDGPU_SEQ64_SIZE, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, + &adev->seq64.sbo, NULL, + (void **)&adev->seq64.cpu_base_addr); + if (r) { + dev_warn(adev->dev, "(%d) create seq64 failed\n", r); + return r; + } + + memset(adev->seq64.cpu_base_addr, 0, AMDGPU_SEQ64_SIZE); + + adev->seq64.num_sem = AMDGPU_MAX_SEQ64_SLOTS; + memset(&adev->seq64.used, 0, sizeof(adev->seq64.used)); + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h new file mode 100644 index 000000000000..2196e72be508 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_SEQ64_H__ +#define __AMDGPU_SEQ64_H__ + +#define AMDGPU_SEQ64_SIZE (2ULL << 20) +#define AMDGPU_MAX_SEQ64_SLOTS (AMDGPU_SEQ64_SIZE / (sizeof(u64) * 8)) +#define AMDGPU_SEQ64_VADDR_OFFSET 0x50000 +#define AMDGPU_SEQ64_VADDR_START (AMDGPU_VA_RESERVED_SIZE + AMDGPU_SEQ64_VADDR_OFFSET) + +struct amdgpu_seq64 { + struct amdgpu_bo *sbo; + u32 num_sem; + u64 *cpu_base_addr; + DECLARE_BITMAP(used, AMDGPU_MAX_SEQ64_SLOTS); +}; + +void amdgpu_seq64_fini(struct amdgpu_device *adev); +int amdgpu_seq64_init(struct amdgpu_device *adev); +int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *gpu_addr, u64 **cpu_addr); +void amdgpu_seq64_free(struct amdgpu_device *adev, u64 gpu_addr); +int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct amdgpu_bo_va **bo_va, u64 seq64_addr, uint32_t size); +void amdgpu_seq64_unmap(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv); + +#endif + -- cgit v1.2.3 From e6ed364efae39455cb1d6b1895a1d31599608a2b Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 16 Mar 2022 18:46:56 -0400 Subject: drm/amdgpu: update mappings not managed by KFD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When restoring after an eviction, use amdgpu_vm_handle_moved to update BO VA mappings in KFD VMs that are not managed through the KFD API. This should allow using the render node API to create more flexible memory mappings in KFD VMs. v2: rebase on drm_exec changes (Alex) Signed-off-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 28 +++++++++++++++++++----- 1 file changed, 22 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 41fbc4fd0fac..b98c0969d3be 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -2825,12 +2825,6 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) if (ret) goto validate_map_fail; - ret = process_sync_pds_resv(process_info, &sync_obj); - if (ret) { - pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n"); - goto validate_map_fail; - } - /* Validate BOs and map them to GPUVM (update VM page tables). */ list_for_each_entry(mem, &process_info->kfd_bo_list, validate_list) { @@ -2881,6 +2875,19 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) if (failed_size) pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size); + /* Update mappings not managed by KFD */ + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) { + struct amdgpu_device *adev = amdgpu_ttm_adev( + peer_vm->root.bo->tbo.bdev); + + ret = amdgpu_vm_handle_moved(adev, peer_vm, &exec.ticket); + if (ret) { + pr_debug("Memory eviction: handle moved failed. Try again\n"); + goto validate_map_fail; + } + } + /* Update page directories */ ret = process_update_pds(process_info, &sync_obj); if (ret) { @@ -2888,6 +2895,15 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) goto validate_map_fail; } + /* Sync with fences on all the page tables. They implicitly depend on any + * move fences from amdgpu_vm_handle_moved above. + */ + ret = process_sync_pds_resv(process_info, &sync_obj); + if (ret) { + pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n"); + goto validate_map_fail; + } + /* Wait for validate and PT updates to finish */ amdgpu_sync_wait(&sync_obj, false); -- cgit v1.2.3 From 94e2dae0a8bfd456abfd866f1eee8342f0858012 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Fri, 24 Feb 2023 18:22:32 -0500 Subject: drm/amdkfd: Move TLB flushing logic into amdgpu MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will make it possible for amdgpu GEM ioctls to flush TLBs on compute VMs. This removes VMID-based TLB flushing and always uses PASID-based flushing. This still works because it scans the VMID-PASID mapping registers to find the right VMID. It's only slightly less efficient. This is not a production use case. Signed-off-by: Felix Kuehling Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 29 -------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 5 ---- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 44 ++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 5 ++++ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 10 +++++-- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 31 --------------------- 6 files changed, 57 insertions(+), 67 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index b8412202a1b0..6ab17330a6ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -710,35 +710,6 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) return false; } -int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev, - uint16_t vmid) -{ - if (adev->family == AMDGPU_FAMILY_AI) { - int i; - - for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) - amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0); - } else { - amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(0), 0); - } - - return 0; -} - -int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev, - uint16_t pasid, - enum TLB_FLUSH_TYPE flush_type, - uint32_t inst) -{ - bool all_hub = false; - - if (adev->family == AMDGPU_FAMILY_AI || - adev->family == AMDGPU_FAMILY_RV) - all_hub = true; - - return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub, inst); -} - bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev) { return adev->have_atomics_support; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index dac983da961d..16794c2eea35 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -162,11 +162,6 @@ int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev, uint32_t *ib_cmd, uint32_t ib_len); void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle); bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev); -int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev, - uint16_t vmid); -int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev, - uint16_t pasid, enum TLB_FLUSH_TYPE flush_type, - uint32_t inst); bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index d1b8afd105c9..d59154ddaaed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1437,6 +1437,50 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, return 0; } +/** + * amdgpu_vm_flush_compute_tlb - Flush TLB on compute VM + * + * @adev: amdgpu_device pointer + * @vm: requested vm + * @flush_type: flush type + * + * Flush TLB if needed for a compute VM. + * + * Returns: + * 0 for success. + */ +int amdgpu_vm_flush_compute_tlb(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + uint32_t flush_type, + uint32_t xcc_mask) +{ + uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm); + bool all_hub = false; + int xcc = 0, r = 0; + + WARN_ON_ONCE(!vm->is_compute_context); + + /* + * It can be that we race and lose here, but that is extremely unlikely + * and the worst thing which could happen is that we flush the changes + * into the TLB once more which is harmless. + */ + if (atomic64_xchg(&vm->kfd_last_flushed_seq, tlb_seq) == tlb_seq) + return 0; + + if (adev->family == AMDGPU_FAMILY_AI || + adev->family == AMDGPU_FAMILY_RV) + all_hub = true; + + for_each_inst(xcc, xcc_mask) { + r = amdgpu_gmc_flush_gpu_tlb_pasid(adev, vm->pasid, flush_type, + all_hub, xcc); + if (r) + break; + } + return r; +} + /** * amdgpu_vm_bo_add - add a bo to a specific vm * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 2cd86d2bf73f..b6cd565562ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -324,6 +324,7 @@ struct amdgpu_vm { /* Last finished delayed update */ atomic64_t tlb_seq; struct dma_fence *last_tlb_flush; + atomic64_t kfd_last_flushed_seq; /* How many times we had to re-generate the page tables */ uint64_t generation; @@ -445,6 +446,10 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, int amdgpu_vm_handle_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct ww_acquire_ctx *ticket); +int amdgpu_vm_flush_compute_tlb(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + uint32_t flush_type, + uint32_t xcc_mask); void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, struct amdgpu_vm *vm, struct amdgpu_bo *bo); int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 9cc32f577e38..a40f8cfc6aa5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -748,7 +748,6 @@ struct kfd_process_device { /* VM context for GPUVM allocations */ struct file *drm_file; void *drm_priv; - atomic64_t tlb_seq; /* GPUVM allocations storage */ struct idr alloc_idr; @@ -1462,7 +1461,14 @@ void kfd_signal_reset_event(struct kfd_node *dev); void kfd_signal_poison_consumed_event(struct kfd_node *dev, u32 pasid); -void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type); +static inline void kfd_flush_tlb(struct kfd_process_device *pdd, + enum TLB_FLUSH_TYPE type) +{ + struct amdgpu_device *adev = pdd->dev->adev; + struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv); + + amdgpu_vm_flush_compute_tlb(adev, vm, type, pdd->dev->xcc_mask); +} static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 7a33e06f5c90..c10d050e1a61 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1667,7 +1667,6 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd, return ret; } pdd->drm_priv = drm_file->private_data; - atomic64_set(&pdd->tlb_seq, 0); ret = kfd_process_device_reserve_ib_mem(pdd); if (ret) @@ -2059,36 +2058,6 @@ int kfd_reserved_mem_mmap(struct kfd_node *dev, struct kfd_process *process, KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot); } -void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type) -{ - struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv); - uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm); - struct kfd_node *dev = pdd->dev; - uint32_t xcc_mask = dev->xcc_mask; - int xcc = 0; - - /* - * It can be that we race and lose here, but that is extremely unlikely - * and the worst thing which could happen is that we flush the changes - * into the TLB once more which is harmless. - */ - if (atomic64_xchg(&pdd->tlb_seq, tlb_seq) == tlb_seq) - return; - - if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { - /* Nothing to flush until a VMID is assigned, which - * only happens when the first queue is created. - */ - if (pdd->qpd.vmid) - amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->adev, - pdd->qpd.vmid); - } else { - for_each_inst(xcc, xcc_mask) - amdgpu_amdkfd_flush_gpu_tlb_pasid( - dev->adev, pdd->process->pasid, type, xcc); - } -} - /* assumes caller holds process lock. */ int kfd_process_drain_interrupts(struct kfd_process_device *pdd) { -- cgit v1.2.3 From fbbcb3f2b7c269c92218f315d22d6ab00524798a Mon Sep 17 00:00:00 2001 From: Ma Jun Date: Thu, 2 Nov 2023 11:19:46 +0800 Subject: drm/amd/pm: Fix return value and drop redundant param Fix the return value and drop redundant parameter of get_asic_baco_capability function. Signed-off-by: Ma Jun Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/include/kgd_pp_interface.h | 2 +- drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 8 +++----- drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c | 11 ++++------- drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c | 7 +++---- drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.h | 2 +- drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.c | 9 ++++----- drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.h | 2 +- drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c | 9 ++++----- drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h | 2 +- drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h | 2 +- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 12 +++++------- 11 files changed, 28 insertions(+), 38 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index cd3c40a86029..b14231f470dc 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -421,7 +421,7 @@ struct amd_pm_funcs { int (*set_hard_min_dcefclk_by_freq)(void *handle, uint32_t clock); int (*set_hard_min_fclk_by_freq)(void *handle, uint32_t clock); int (*set_min_deep_sleep_dcefclk)(void *handle, uint32_t clock); - int (*get_asic_baco_capability)(void *handle, bool *cap); + bool (*get_asic_baco_capability)(void *handle); int (*get_asic_baco_state)(void *handle, int *state); int (*set_asic_baco_state)(void *handle, int state); int (*get_ppfeature_status)(void *handle, char *buf); diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index 08cb79401410..1ae3b81548fa 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -185,8 +185,7 @@ bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev) { const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; void *pp_handle = adev->powerplay.pp_handle; - bool baco_cap; - int ret = 0; + bool ret; if (!pp_funcs || !pp_funcs->get_asic_baco_capability) return false; @@ -204,12 +203,11 @@ bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev) mutex_lock(&adev->pm.mutex); - ret = pp_funcs->get_asic_baco_capability(pp_handle, - &baco_cap); + ret = pp_funcs->get_asic_baco_capability(pp_handle); mutex_unlock(&adev->pm.mutex); - return ret ? false : baco_cap; + return ret; } int amdgpu_dpm_mode2_reset(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c index 914c15387157..aed0e2cefbf9 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c @@ -1371,21 +1371,18 @@ static int pp_set_active_display_count(void *handle, uint32_t count) return phm_set_active_display_count(hwmgr, count); } -static int pp_get_asic_baco_capability(void *handle, bool *cap) +static bool pp_get_asic_baco_capability(void *handle) { struct pp_hwmgr *hwmgr = handle; - *cap = false; if (!hwmgr) - return -EINVAL; + return false; if (!(hwmgr->not_vf && amdgpu_dpm) || !hwmgr->hwmgr_func->get_asic_baco_capability) - return 0; + return false; - hwmgr->hwmgr_func->get_asic_baco_capability(hwmgr, cap); - - return 0; + return hwmgr->hwmgr_func->get_asic_baco_capability(hwmgr); } static int pp_get_asic_baco_state(void *handle, int *state) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c index 044cda005aed..e8a9471c1898 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c @@ -33,21 +33,20 @@ #include "smu/smu_7_1_2_d.h" #include "smu/smu_7_1_2_sh_mask.h" -int smu7_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap) +bool smu7_baco_get_capability(struct pp_hwmgr *hwmgr) { struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev); uint32_t reg; - *cap = false; if (!phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_BACO)) return 0; reg = RREG32(mmCC_BIF_BX_FUSESTRAP0); if (reg & CC_BIF_BX_FUSESTRAP0__STRAP_BIF_PX_CAPABLE_MASK) - *cap = true; + return true; - return 0; + return false; } int smu7_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.h b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.h index be0d98abb536..73a773f4ce2e 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.h +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.h @@ -25,7 +25,7 @@ #include "hwmgr.h" #include "common_baco.h" -extern int smu7_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap); +extern bool smu7_baco_get_capability(struct pp_hwmgr *hwmgr); extern int smu7_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state); extern int smu7_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state); diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.c index de0a37f7c632..c66ef9741535 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.c @@ -28,14 +28,13 @@ #include "vega10_inc.h" #include "smu9_baco.h" -int smu9_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap) +bool smu9_baco_get_capability(struct pp_hwmgr *hwmgr) { struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev); uint32_t reg, data; - *cap = false; if (!phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_BACO)) - return 0; + return false; WREG32(0x12074, 0xFFF0003B); data = RREG32(0x12075); @@ -44,10 +43,10 @@ int smu9_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap) reg = RREG32_SOC15(NBIF, 0, mmRCC_BIF_STRAP0); if (reg & RCC_BIF_STRAP0__STRAP_PX_CAPABLE_MASK) - *cap = true; + return true; } - return 0; + return false; } int smu9_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.h b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.h index 84e90f801ac3..9ff7c2ea1b58 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.h +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.h @@ -25,7 +25,7 @@ #include "hwmgr.h" #include "common_baco.h" -extern int smu9_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap); +extern bool smu9_baco_get_capability(struct pp_hwmgr *hwmgr); extern int smu9_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state); #endif diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c index 994c0d374bfa..dad4c80aee58 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c @@ -36,23 +36,22 @@ static const struct soc15_baco_cmd_entry clean_baco_tbl[] = { {CMD_WRITE, SOC15_REG_ENTRY(NBIF, 0, mmBIOS_SCRATCH_7), 0, 0, 0, 0}, }; -int vega20_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap) +bool vega20_baco_get_capability(struct pp_hwmgr *hwmgr) { struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev); uint32_t reg; - *cap = false; if (!phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_BACO)) - return 0; + return false; if (((RREG32(0x17569) & 0x20000000) >> 29) == 0x1) { reg = RREG32_SOC15(NBIF, 0, mmRCC_BIF_STRAP0); if (reg & RCC_BIF_STRAP0__STRAP_PX_CAPABLE_MASK) - *cap = true; + return true; } - return 0; + return false; } int vega20_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h index f06471e712dc..bdad9c915631 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h @@ -25,7 +25,7 @@ #include "hwmgr.h" #include "common_baco.h" -extern int vega20_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap); +extern bool vega20_baco_get_capability(struct pp_hwmgr *hwmgr); extern int vega20_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state); extern int vega20_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state); extern int vega20_baco_apply_vdci_flush_workaround(struct pp_hwmgr *hwmgr); diff --git a/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h index 81650727a5de..6f536159df4d 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h +++ b/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h @@ -351,7 +351,7 @@ struct pp_hwmgr_func { int (*set_hard_min_fclk_by_freq)(struct pp_hwmgr *hwmgr, uint32_t clock); int (*set_hard_min_gfxclk_by_freq)(struct pp_hwmgr *hwmgr, uint32_t clock); int (*set_soft_max_gfxclk_by_freq)(struct pp_hwmgr *hwmgr, uint32_t clock); - int (*get_asic_baco_capability)(struct pp_hwmgr *hwmgr, bool *cap); + bool (*get_asic_baco_capability)(struct pp_hwmgr *hwmgr); int (*get_asic_baco_state)(struct pp_hwmgr *hwmgr, enum BACO_STATE *state); int (*set_asic_baco_state)(struct pp_hwmgr *hwmgr, enum BACO_STATE state); int (*get_ppfeature_status)(struct pp_hwmgr *hwmgr, char *buf); diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 1ead323f1c78..37c2605f9b35 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -3005,19 +3005,17 @@ static int smu_set_xgmi_pstate(void *handle, return ret; } -static int smu_get_baco_capability(void *handle, bool *cap) +static bool smu_get_baco_capability(void *handle) { struct smu_context *smu = handle; - *cap = false; - if (!smu->pm_enabled) - return 0; + return false; - if (smu->ppt_funcs && smu->ppt_funcs->baco_is_support) - *cap = smu->ppt_funcs->baco_is_support(smu); + if (!smu->ppt_funcs || !smu->ppt_funcs->baco_is_support) + return false; - return 0; + return smu->ppt_funcs->baco_is_support(smu); } static int smu_baco_set_state(void *handle, int state) -- cgit v1.2.3 From b5a52d2afe1b75f9d51461bb235ca40735e99fe7 Mon Sep 17 00:00:00 2001 From: Sam James Date: Sun, 5 Nov 2023 16:06:50 +0000 Subject: amdgpu: Adjust kmalloc_array calls for new -Walloc-size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GCC 14 introduces a new -Walloc-size included in -Wextra which errors out on various files in drivers/gpu/drm/amd/amdgpu like: ``` amdgpu_amdkfd_gfx_v8.c:241:15: error: allocation of insufficient size ‘4’ for type ‘uint32_t[2]’ {aka ‘unsigned int[2]'} with size ‘8’ [-Werror=alloc-size] ``` This is because each HQD_N_REGS is actually a uint32_t[2]. Move the * 2 to the size argument so GCC sees we're allocating enough. Originally did 'sizeof(uint32_t) * 2' for the size but a friend suggested 'sizeof(**dump)' better communicates the intent. Link: https://lore.kernel.org/all/87wmuwo7i3.fsf@gentoo.org/ Signed-off-by: Sam James Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 4 ++-- 5 files changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index 625db444df1c..0ba15dcbe4e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -200,7 +200,7 @@ int kgd_arcturus_hqd_sdma_dump(struct amdgpu_device *adev, #undef HQD_N_REGS #define HQD_N_REGS (19+6+7+10) - *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c index f6598b9e4faa..a5c7259cf2a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c @@ -141,7 +141,7 @@ static int kgd_gfx_v9_4_3_hqd_sdma_dump(struct amdgpu_device *adev, (*dump)[i++][1] = RREG32(addr); \ } while (0) - *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 6bf448ab3dff..ca4a6b82817f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -214,7 +214,7 @@ static int kgd_hqd_dump(struct amdgpu_device *adev, (*dump)[i++][1] = RREG32(addr); \ } while (0) - *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; @@ -301,7 +301,7 @@ static int kgd_hqd_sdma_dump(struct amdgpu_device *adev, #undef HQD_N_REGS #define HQD_N_REGS (19+4) - *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index cd06e4a6d1da..0f3e2944edd7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -238,7 +238,7 @@ static int kgd_hqd_dump(struct amdgpu_device *adev, (*dump)[i++][1] = RREG32(addr); \ } while (0) - *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; @@ -324,7 +324,7 @@ static int kgd_hqd_sdma_dump(struct amdgpu_device *adev, #undef HQD_N_REGS #define HQD_N_REGS (19+4+2+3+7) - *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 00fbc0f44c92..5a35a8ca8922 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -363,7 +363,7 @@ int kgd_gfx_v9_hqd_dump(struct amdgpu_device *adev, (*dump)[i++][1] = RREG32(addr); \ } while (0) - *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; @@ -460,7 +460,7 @@ static int kgd_hqd_sdma_dump(struct amdgpu_device *adev, #undef HQD_N_REGS #define HQD_N_REGS (19+6+7+10) - *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; -- cgit v1.2.3 From d8a3813713c3843351123138c8b191142c266521 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Fri, 3 Nov 2023 21:20:13 +0530 Subject: drm/radeon: Fix warning using plain integer as NULL sparse static analysis tools generate a warning with this message "Using plain integer as NULL pointer". In this case this warning is being shown because we are trying to intialize a pointer to NULL using integer value 0. Signed-off-by: Abhinav Singh Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/clearstate_evergreen.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/radeon/clearstate_evergreen.h b/drivers/gpu/drm/radeon/clearstate_evergreen.h index 63a1ffbb3ced..3b645558f133 100644 --- a/drivers/gpu/drm/radeon/clearstate_evergreen.h +++ b/drivers/gpu/drm/radeon/clearstate_evergreen.h @@ -1049,7 +1049,7 @@ static const struct cs_extent_def SECT_CONTEXT_defs[] = {SECT_CONTEXT_def_5, 0x0000a29e, 5 }, {SECT_CONTEXT_def_6, 0x0000a2a5, 56 }, {SECT_CONTEXT_def_7, 0x0000a2de, 290 }, - { 0, 0, 0 } + { NULL, 0, 0 } }; static const u32 SECT_CLEAR_def_1[] = { @@ -1060,7 +1060,7 @@ static const u32 SECT_CLEAR_def_1[] = static const struct cs_extent_def SECT_CLEAR_defs[] = { {SECT_CLEAR_def_1, 0x0000ffc0, 3 }, - { 0, 0, 0 } + { NULL, 0, 0 } }; static const u32 SECT_CTRLCONST_def_1[] = { @@ -1070,11 +1070,11 @@ static const u32 SECT_CTRLCONST_def_1[] = static const struct cs_extent_def SECT_CTRLCONST_defs[] = { {SECT_CTRLCONST_def_1, 0x0000f3fc, 2 }, - { 0, 0, 0 } + { NULL, 0, 0 } }; static const struct cs_section_def evergreen_cs_data[] = { { SECT_CONTEXT_defs, SECT_CONTEXT }, { SECT_CLEAR_defs, SECT_CLEAR }, { SECT_CTRLCONST_defs, SECT_CTRLCONST }, - { 0, SECT_NONE } + { NULL, SECT_NONE } }; -- cgit v1.2.3 From 857c838c782728318c581cb656fddd74faa89ad2 Mon Sep 17 00:00:00 2001 From: Ma Jun Date: Fri, 3 Nov 2023 09:48:06 +0800 Subject: drm/amd/pm: Move some functions to smu_v13_0.c as generic code Use generic functions and remove the duplicate code Signed-off-by: Ma Jun Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 38 ++++++++++++++++------ .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 36 ++------------------ .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 36 ++------------------ 3 files changed, 32 insertions(+), 78 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index cf1b84060bc3..b446f860baf6 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -2283,22 +2283,40 @@ int smu_v13_0_baco_set_state(struct smu_context *smu, int smu_v13_0_baco_enter(struct smu_context *smu) { - int ret = 0; - - ret = smu_v13_0_baco_set_state(smu, - SMU_BACO_STATE_ENTER); - if (ret) - return ret; + struct smu_baco_context *smu_baco = &smu->smu_baco; + struct amdgpu_device *adev = smu->adev; + int ret; - msleep(10); + if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) { + return smu_v13_0_baco_set_armd3_sequence(smu, + (smu_baco->maco_support && amdgpu_runtime_pm != 1) ? + BACO_SEQ_BAMACO : BACO_SEQ_BACO); + } else { + ret = smu_v13_0_baco_set_state(smu, SMU_BACO_STATE_ENTER); + if (!ret) + usleep_range(10000, 11000); - return ret; + return ret; + } } int smu_v13_0_baco_exit(struct smu_context *smu) { - return smu_v13_0_baco_set_state(smu, - SMU_BACO_STATE_EXIT); + struct amdgpu_device *adev = smu->adev; + int ret; + + if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) { + /* Wait for PMFW handling for the Dstate change */ + usleep_range(10000, 11000); + ret = smu_v13_0_baco_set_armd3_sequence(smu, BACO_SEQ_ULPS); + } else { + ret = smu_v13_0_baco_set_state(smu, SMU_BACO_STATE_EXIT); + } + + if (!ret) + adev->gfx.is_poweron = false; + + return ret; } int smu_v13_0_set_gfx_power_up_by_imu(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 82c4e1f1c6f0..a4debd7ab8bd 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -2558,38 +2558,6 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, NULL); } -static int smu_v13_0_0_baco_enter(struct smu_context *smu) -{ - struct smu_baco_context *smu_baco = &smu->smu_baco; - struct amdgpu_device *adev = smu->adev; - - if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) - return smu_v13_0_baco_set_armd3_sequence(smu, - (smu_baco->maco_support && amdgpu_runtime_pm != 1) ? - BACO_SEQ_BAMACO : BACO_SEQ_BACO); - else - return smu_v13_0_baco_enter(smu); -} - -static int smu_v13_0_0_baco_exit(struct smu_context *smu) -{ - struct amdgpu_device *adev = smu->adev; - int ret; - - if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) { - /* Wait for PMFW handling for the Dstate change */ - usleep_range(10000, 11000); - ret = smu_v13_0_baco_set_armd3_sequence(smu, BACO_SEQ_ULPS); - } else { - ret = smu_v13_0_baco_exit(smu); - } - - if (!ret) - adev->gfx.is_poweron = false; - - return ret; -} - static bool smu_v13_0_0_is_mode1_reset_supported(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; @@ -3037,8 +3005,8 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .baco_is_support = smu_v13_0_baco_is_support, .baco_get_state = smu_v13_0_baco_get_state, .baco_set_state = smu_v13_0_baco_set_state, - .baco_enter = smu_v13_0_0_baco_enter, - .baco_exit = smu_v13_0_0_baco_exit, + .baco_enter = smu_v13_0_baco_enter, + .baco_exit = smu_v13_0_baco_exit, .mode1_reset_is_support = smu_v13_0_0_is_mode1_reset_supported, .mode1_reset = smu_v13_0_0_mode1_reset, .mode2_reset = smu_v13_0_0_mode2_reset, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 81eafed76045..f5596f031d00 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -2515,38 +2515,6 @@ static int smu_v13_0_7_set_mp1_state(struct smu_context *smu, return ret; } -static int smu_v13_0_7_baco_enter(struct smu_context *smu) -{ - struct smu_baco_context *smu_baco = &smu->smu_baco; - struct amdgpu_device *adev = smu->adev; - - if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) - return smu_v13_0_baco_set_armd3_sequence(smu, - (smu_baco->maco_support && amdgpu_runtime_pm != 1) ? - BACO_SEQ_BAMACO : BACO_SEQ_BACO); - else - return smu_v13_0_baco_enter(smu); -} - -static int smu_v13_0_7_baco_exit(struct smu_context *smu) -{ - struct amdgpu_device *adev = smu->adev; - int ret; - - if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) { - /* Wait for PMFW handling for the Dstate change */ - usleep_range(10000, 11000); - ret = smu_v13_0_baco_set_armd3_sequence(smu, BACO_SEQ_ULPS); - } else { - ret = smu_v13_0_baco_exit(smu); - } - - if (!ret) - adev->gfx.is_poweron = false; - - return ret; -} - static bool smu_v13_0_7_is_mode1_reset_supported(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; @@ -2628,8 +2596,8 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = { .baco_is_support = smu_v13_0_baco_is_support, .baco_get_state = smu_v13_0_baco_get_state, .baco_set_state = smu_v13_0_baco_set_state, - .baco_enter = smu_v13_0_7_baco_enter, - .baco_exit = smu_v13_0_7_baco_exit, + .baco_enter = smu_v13_0_baco_enter, + .baco_exit = smu_v13_0_baco_exit, .mode1_reset_is_support = smu_v13_0_7_is_mode1_reset_supported, .mode1_reset = smu_v13_0_mode1_reset, .set_mp1_state = smu_v13_0_7_set_mp1_state, -- cgit v1.2.3 From 5ce8eccd53a357f91f2c2fe29918f9c65a1fe970 Mon Sep 17 00:00:00 2001 From: Ma Jun Date: Wed, 8 Nov 2023 16:00:30 +0800 Subject: drm/amd/pm: Make smu_v13_0_baco_set_armd3_sequence() static smu_v13_0_baco_set_armd3_sequence is not used by other files, so make it as static type. Signed-off-by: Ma Jun Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h | 3 --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h index 95cb919718ae..48f5926d8153 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h @@ -210,9 +210,6 @@ int smu_v13_0_set_azalia_d3_pme(struct smu_context *smu); int smu_v13_0_get_max_sustainable_clocks_by_dc(struct smu_context *smu, struct pp_smu_nv_clock_table *max_clocks); -int smu_v13_0_baco_set_armd3_sequence(struct smu_context *smu, - enum smu_baco_seq baco_seq); - bool smu_v13_0_baco_is_support(struct smu_context *smu); enum smu_baco_state smu_v13_0_baco_get_state(struct smu_context *smu); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index b446f860baf6..86fc7273d588 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -2199,7 +2199,7 @@ int smu_v13_0_gfx_ulv_control(struct smu_context *smu, return ret; } -int smu_v13_0_baco_set_armd3_sequence(struct smu_context *smu, +static int smu_v13_0_baco_set_armd3_sequence(struct smu_context *smu, enum smu_baco_seq baco_seq) { struct smu_baco_context *smu_baco = &smu->smu_baco; -- cgit v1.2.3 From 8a1de314d1890793bbf9e77542574ceda007564e Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Wed, 2 Aug 2023 07:49:14 +0530 Subject: drm/amdgpu: Refactor 'amdgpu_connector_dvi_detect' in amdgpu_connectors.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the below: WARNING: Prefer 'unsigned int' to bare use of 'unsigned' WARNING: Missing a blank line after declarations WARNING: Too many leading tabs - consider code refactoring + if (list_connector->connector_type != DRM_MODE_CONNECTOR_VGA) { WARNING: Too many leading tabs - consider code refactoring + if (!amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd)) { Cc: Guchun Chen Cc: Christian König Cc: Alex Deucher Cc: "Pan, Xinhui" Cc: Rodrigo Siqueira Cc: Aurabindo Pillai Signed-off-by: Srinivasan Shanmugam Acked-by: Guchun Chen Reviewed-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c | 69 ++++++++++++++++---------- 1 file changed, 42 insertions(+), 27 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index 7473a42f7d45..96f63fd39b9e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -103,7 +103,7 @@ int amdgpu_connector_get_monitor_bpc(struct drm_connector *connector) struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); struct amdgpu_connector_atom_dig *dig_connector; int bpc = 8; - unsigned mode_clock, max_tmds_clock; + unsigned int mode_clock, max_tmds_clock; switch (connector->connector_type) { case DRM_MODE_CONNECTOR_DVII: @@ -255,6 +255,7 @@ struct edid *amdgpu_connector_edid(struct drm_connector *connector) return amdgpu_connector->edid; } else if (edid_blob) { struct edid *edid = kmemdup(edid_blob->data, edid_blob->length, GFP_KERNEL); + if (edid) amdgpu_connector->edid = edid; } @@ -581,6 +582,7 @@ static int amdgpu_connector_set_property(struct drm_connector *connector, amdgpu_encoder = to_amdgpu_encoder(connector->encoder); } else { const struct drm_connector_helper_funcs *connector_funcs = connector->helper_private; + amdgpu_encoder = to_amdgpu_encoder(connector_funcs->best_encoder(connector)); } @@ -797,6 +799,7 @@ static int amdgpu_connector_set_lcd_property(struct drm_connector *connector, amdgpu_encoder = to_amdgpu_encoder(connector->encoder); else { const struct drm_connector_helper_funcs *connector_funcs = connector->helper_private; + amdgpu_encoder = to_amdgpu_encoder(connector_funcs->best_encoder(connector)); } @@ -979,6 +982,41 @@ amdgpu_connector_check_hpd_status_unchanged(struct drm_connector *connector) return false; } +static void amdgpu_connector_shared_ddc(enum drm_connector_status *status, + struct drm_connector *connector, + struct amdgpu_connector *amdgpu_connector) +{ + struct drm_connector *list_connector; + struct drm_connector_list_iter iter; + struct amdgpu_connector *list_amdgpu_connector; + struct drm_device *dev = connector->dev; + struct amdgpu_device *adev = drm_to_adev(dev); + + if (amdgpu_connector->shared_ddc && *status == connector_status_connected) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(list_connector, + &iter) { + if (connector == list_connector) + continue; + list_amdgpu_connector = to_amdgpu_connector(list_connector); + if (list_amdgpu_connector->shared_ddc && + list_amdgpu_connector->ddc_bus->rec.i2c_id == + amdgpu_connector->ddc_bus->rec.i2c_id) { + /* cases where both connectors are digital */ + if (list_connector->connector_type != DRM_MODE_CONNECTOR_VGA) { + /* hpd is our only option in this case */ + if (!amdgpu_display_hpd_sense(adev, + amdgpu_connector->hpd.hpd)) { + amdgpu_connector_free_edid(connector); + *status = connector_status_disconnected; + } + } + } + } + drm_connector_list_iter_end(&iter); + } +} + /* * DVI is complicated * Do a DDC probe, if DDC probe passes, get the full EDID so @@ -1065,32 +1103,7 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force) * DDC line. The latter is more complex because with DVI<->HDMI adapters * you don't really know what's connected to which port as both are digital. */ - if (amdgpu_connector->shared_ddc && (ret == connector_status_connected)) { - struct drm_connector *list_connector; - struct drm_connector_list_iter iter; - struct amdgpu_connector *list_amdgpu_connector; - - drm_connector_list_iter_begin(dev, &iter); - drm_for_each_connector_iter(list_connector, - &iter) { - if (connector == list_connector) - continue; - list_amdgpu_connector = to_amdgpu_connector(list_connector); - if (list_amdgpu_connector->shared_ddc && - (list_amdgpu_connector->ddc_bus->rec.i2c_id == - amdgpu_connector->ddc_bus->rec.i2c_id)) { - /* cases where both connectors are digital */ - if (list_connector->connector_type != DRM_MODE_CONNECTOR_VGA) { - /* hpd is our only option in this case */ - if (!amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd)) { - amdgpu_connector_free_edid(connector); - ret = connector_status_disconnected; - } - } - } - } - drm_connector_list_iter_end(&iter); - } + amdgpu_connector_shared_ddc(&ret, connector, amdgpu_connector); } } @@ -1192,6 +1205,7 @@ amdgpu_connector_dvi_encoder(struct drm_connector *connector) static void amdgpu_connector_dvi_force(struct drm_connector *connector) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); + if (connector->force == DRM_FORCE_ON) amdgpu_connector->use_digital = false; if (connector->force == DRM_FORCE_ON_DIGITAL) @@ -1426,6 +1440,7 @@ amdgpu_connector_dp_detect(struct drm_connector *connector, bool force) ret = connector_status_connected; else if (amdgpu_connector->dac_load_detect) { /* try load detection */ const struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private; + ret = encoder_funcs->detect(encoder, connector); } } -- cgit v1.2.3 From f4fac4163c2f99aada9cc60292f2ea377afe6c71 Mon Sep 17 00:00:00 2001 From: Laurent Morichetti Date: Wed, 8 Nov 2023 16:16:18 -0800 Subject: drm/amdkfd: Clear the VALU exception state in the trap handler The trap handler could be entered with pending VALU exceptions, so clear the exception state before issuing vector instructions. Reviewed-by: Jay Cornwall Acked-by: Alex Deucher Signed-off-by: Laurent Morichetti Tested-by: Lancelot Six Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h | 664 ++++++++++----------- .../gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm | 6 + 2 files changed, 338 insertions(+), 332 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h index d7cd5fa313ff..df75863393fc 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h @@ -2069,7 +2069,7 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = { }; static const uint32_t cwsr_trap_gfx10_hex[] = { - 0xbf820001, 0xbf820220, + 0xbf820001, 0xbf820221, 0xb0804004, 0xb978f802, 0x8a78ff78, 0x00020006, 0xb97bf803, 0x876eff78, @@ -2118,391 +2118,391 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0xbf900004, 0xbf8cc07f, 0x877aff7f, 0x04000000, 0x8f7a857a, 0x886d7a6d, - 0xbefa037e, 0x877bff7f, - 0x0000ffff, 0xbefe03c1, - 0xbeff03c1, 0xdc5f8000, - 0x007a0000, 0x7e000280, - 0xbefe037a, 0xbeff037b, - 0xb97b02dc, 0x8f7b997b, - 0xb97a3a05, 0x807a817a, - 0xbf0d997b, 0xbf850002, - 0x8f7a897a, 0xbf820001, - 0x8f7a8a7a, 0xb97b1e06, - 0x8f7b8a7b, 0x807a7b7a, + 0x7e008200, 0xbefa037e, 0x877bff7f, 0x0000ffff, - 0x807aff7a, 0x00000200, - 0x807a7e7a, 0x827b807b, - 0xd7610000, 0x00010870, - 0xd7610000, 0x00010a71, - 0xd7610000, 0x00010c72, - 0xd7610000, 0x00010e73, - 0xd7610000, 0x00011074, - 0xd7610000, 0x00011275, - 0xd7610000, 0x00011476, - 0xd7610000, 0x00011677, - 0xd7610000, 0x00011a79, - 0xd7610000, 0x00011c7e, - 0xd7610000, 0x00011e7f, - 0xbefe03ff, 0x00003fff, - 0xbeff0380, 0xdc5f8040, - 0x007a0000, 0xd760007a, - 0x00011d00, 0xd760007b, - 0x00011f00, 0xbefe037a, - 0xbeff037b, 0xbef4037e, - 0x8775ff7f, 0x0000ffff, - 0x8875ff75, 0x00040000, - 0xbef60380, 0xbef703ff, - 0x10807fac, 0xbef1037c, - 0xbef00380, 0xb97302dc, - 0x8f739973, 0xbefe03c1, - 0x907c9973, 0x877c817c, - 0xbf06817c, 0xbf850002, - 0xbeff0380, 0xbf820002, - 0xbeff03c1, 0xbf820009, + 0xbefe03c1, 0xbeff03c1, + 0xdc5f8000, 0x007a0000, + 0x7e000280, 0xbefe037a, + 0xbeff037b, 0xb97b02dc, + 0x8f7b997b, 0xb97a3a05, + 0x807a817a, 0xbf0d997b, + 0xbf850002, 0x8f7a897a, + 0xbf820001, 0x8f7a8a7a, + 0xb97b1e06, 0x8f7b8a7b, + 0x807a7b7a, 0x877bff7f, + 0x0000ffff, 0x807aff7a, + 0x00000200, 0x807a7e7a, + 0x827b807b, 0xd7610000, + 0x00010870, 0xd7610000, + 0x00010a71, 0xd7610000, + 0x00010c72, 0xd7610000, + 0x00010e73, 0xd7610000, + 0x00011074, 0xd7610000, + 0x00011275, 0xd7610000, + 0x00011476, 0xd7610000, + 0x00011677, 0xd7610000, + 0x00011a79, 0xd7610000, + 0x00011c7e, 0xd7610000, + 0x00011e7f, 0xbefe03ff, + 0x00003fff, 0xbeff0380, + 0xdc5f8040, 0x007a0000, + 0xd760007a, 0x00011d00, + 0xd760007b, 0x00011f00, + 0xbefe037a, 0xbeff037b, + 0xbef4037e, 0x8775ff7f, + 0x0000ffff, 0x8875ff75, + 0x00040000, 0xbef60380, + 0xbef703ff, 0x10807fac, + 0xbef1037c, 0xbef00380, + 0xb97302dc, 0x8f739973, + 0xbefe03c1, 0x907c9973, + 0x877c817c, 0xbf06817c, + 0xbf850002, 0xbeff0380, + 0xbf820002, 0xbeff03c1, + 0xbf820009, 0xbef603ff, + 0x01000000, 0xe0704080, + 0x705d0100, 0xe0704100, + 0x705d0200, 0xe0704180, + 0x705d0300, 0xbf820008, 0xbef603ff, 0x01000000, - 0xe0704080, 0x705d0100, - 0xe0704100, 0x705d0200, - 0xe0704180, 0x705d0300, - 0xbf820008, 0xbef603ff, - 0x01000000, 0xe0704100, - 0x705d0100, 0xe0704200, - 0x705d0200, 0xe0704300, - 0x705d0300, 0xb9703a05, - 0x80708170, 0xbf0d9973, - 0xbf850002, 0x8f708970, - 0xbf820001, 0x8f708a70, - 0xb97a1e06, 0x8f7a8a7a, - 0x80707a70, 0x8070ff70, - 0x00000200, 0xbef603ff, - 0x01000000, 0x7e000280, - 0x7e020280, 0x7e040280, - 0xbefc0380, 0xd7610002, - 0x0000f871, 0x807c817c, - 0xd7610002, 0x0000f86c, - 0x807c817c, 0x8a7aff6d, - 0x80000000, 0xd7610002, - 0x0000f87a, 0x807c817c, - 0xd7610002, 0x0000f86e, - 0x807c817c, 0xd7610002, - 0x0000f86f, 0x807c817c, - 0xd7610002, 0x0000f878, - 0x807c817c, 0xb97af803, - 0xd7610002, 0x0000f87a, - 0x807c817c, 0xd7610002, - 0x0000f87b, 0x807c817c, - 0xb971f801, 0xd7610002, - 0x0000f871, 0x807c817c, - 0xb971f814, 0xd7610002, - 0x0000f871, 0x807c817c, - 0xb971f815, 0xd7610002, - 0x0000f871, 0x807c817c, - 0xbefe03ff, 0x0000ffff, - 0xbeff0380, 0xe0704000, - 0x705d0200, 0xbefe03c1, + 0xe0704100, 0x705d0100, + 0xe0704200, 0x705d0200, + 0xe0704300, 0x705d0300, 0xb9703a05, 0x80708170, 0xbf0d9973, 0xbf850002, 0x8f708970, 0xbf820001, 0x8f708a70, 0xb97a1e06, 0x8f7a8a7a, 0x80707a70, + 0x8070ff70, 0x00000200, 0xbef603ff, 0x01000000, - 0xbef90380, 0xbefc0380, - 0xbf800000, 0xbe802f00, - 0xbe822f02, 0xbe842f04, - 0xbe862f06, 0xbe882f08, - 0xbe8a2f0a, 0xbe8c2f0c, - 0xbe8e2f0e, 0xd7610002, - 0x0000f200, 0x80798179, - 0xd7610002, 0x0000f201, + 0x7e000280, 0x7e020280, + 0x7e040280, 0xbefc0380, + 0xd7610002, 0x0000f871, + 0x807c817c, 0xd7610002, + 0x0000f86c, 0x807c817c, + 0x8a7aff6d, 0x80000000, + 0xd7610002, 0x0000f87a, + 0x807c817c, 0xd7610002, + 0x0000f86e, 0x807c817c, + 0xd7610002, 0x0000f86f, + 0x807c817c, 0xd7610002, + 0x0000f878, 0x807c817c, + 0xb97af803, 0xd7610002, + 0x0000f87a, 0x807c817c, + 0xd7610002, 0x0000f87b, + 0x807c817c, 0xb971f801, + 0xd7610002, 0x0000f871, + 0x807c817c, 0xb971f814, + 0xd7610002, 0x0000f871, + 0x807c817c, 0xb971f815, + 0xd7610002, 0x0000f871, + 0x807c817c, 0xbefe03ff, + 0x0000ffff, 0xbeff0380, + 0xe0704000, 0x705d0200, + 0xbefe03c1, 0xb9703a05, + 0x80708170, 0xbf0d9973, + 0xbf850002, 0x8f708970, + 0xbf820001, 0x8f708a70, + 0xb97a1e06, 0x8f7a8a7a, + 0x80707a70, 0xbef603ff, + 0x01000000, 0xbef90380, + 0xbefc0380, 0xbf800000, + 0xbe802f00, 0xbe822f02, + 0xbe842f04, 0xbe862f06, + 0xbe882f08, 0xbe8a2f0a, + 0xbe8c2f0c, 0xbe8e2f0e, + 0xd7610002, 0x0000f200, 0x80798179, 0xd7610002, - 0x0000f202, 0x80798179, - 0xd7610002, 0x0000f203, + 0x0000f201, 0x80798179, + 0xd7610002, 0x0000f202, 0x80798179, 0xd7610002, - 0x0000f204, 0x80798179, - 0xd7610002, 0x0000f205, + 0x0000f203, 0x80798179, + 0xd7610002, 0x0000f204, 0x80798179, 0xd7610002, - 0x0000f206, 0x80798179, - 0xd7610002, 0x0000f207, + 0x0000f205, 0x80798179, + 0xd7610002, 0x0000f206, 0x80798179, 0xd7610002, - 0x0000f208, 0x80798179, - 0xd7610002, 0x0000f209, + 0x0000f207, 0x80798179, + 0xd7610002, 0x0000f208, 0x80798179, 0xd7610002, - 0x0000f20a, 0x80798179, - 0xd7610002, 0x0000f20b, + 0x0000f209, 0x80798179, + 0xd7610002, 0x0000f20a, 0x80798179, 0xd7610002, - 0x0000f20c, 0x80798179, - 0xd7610002, 0x0000f20d, + 0x0000f20b, 0x80798179, + 0xd7610002, 0x0000f20c, 0x80798179, 0xd7610002, - 0x0000f20e, 0x80798179, - 0xd7610002, 0x0000f20f, - 0x80798179, 0xbf06a079, - 0xbf840006, 0xe0704000, - 0x705d0200, 0x8070ff70, - 0x00000080, 0xbef90380, - 0x7e040280, 0x807c907c, - 0xbf0aff7c, 0x00000060, - 0xbf85ffbc, 0xbe802f00, - 0xbe822f02, 0xbe842f04, - 0xbe862f06, 0xbe882f08, - 0xbe8a2f0a, 0xd7610002, - 0x0000f200, 0x80798179, - 0xd7610002, 0x0000f201, + 0x0000f20d, 0x80798179, + 0xd7610002, 0x0000f20e, 0x80798179, 0xd7610002, - 0x0000f202, 0x80798179, - 0xd7610002, 0x0000f203, + 0x0000f20f, 0x80798179, + 0xbf06a079, 0xbf840006, + 0xe0704000, 0x705d0200, + 0x8070ff70, 0x00000080, + 0xbef90380, 0x7e040280, + 0x807c907c, 0xbf0aff7c, + 0x00000060, 0xbf85ffbc, + 0xbe802f00, 0xbe822f02, + 0xbe842f04, 0xbe862f06, + 0xbe882f08, 0xbe8a2f0a, + 0xd7610002, 0x0000f200, 0x80798179, 0xd7610002, - 0x0000f204, 0x80798179, - 0xd7610002, 0x0000f205, + 0x0000f201, 0x80798179, + 0xd7610002, 0x0000f202, 0x80798179, 0xd7610002, - 0x0000f206, 0x80798179, - 0xd7610002, 0x0000f207, + 0x0000f203, 0x80798179, + 0xd7610002, 0x0000f204, 0x80798179, 0xd7610002, - 0x0000f208, 0x80798179, - 0xd7610002, 0x0000f209, + 0x0000f205, 0x80798179, + 0xd7610002, 0x0000f206, 0x80798179, 0xd7610002, - 0x0000f20a, 0x80798179, - 0xd7610002, 0x0000f20b, - 0x80798179, 0xe0704000, - 0x705d0200, 0xbefe03c1, - 0x907c9973, 0x877c817c, - 0xbf06817c, 0xbf850002, - 0xbeff0380, 0xbf820001, - 0xbeff03c1, 0xb97b4306, - 0x877bc17b, 0xbf840044, - 0xbf8a0000, 0x877aff6d, - 0x80000000, 0xbf840040, - 0x8f7b867b, 0x8f7b827b, - 0xbef6037b, 0xb9703a05, - 0x80708170, 0xbf0d9973, - 0xbf850002, 0x8f708970, - 0xbf820001, 0x8f708a70, - 0xb97a1e06, 0x8f7a8a7a, - 0x80707a70, 0x8070ff70, - 0x00000200, 0x8070ff70, - 0x00000080, 0xbef603ff, - 0x01000000, 0xd7650000, - 0x000100c1, 0xd7660000, - 0x000200c1, 0x16000084, - 0x907c9973, 0x877c817c, - 0xbf06817c, 0xbefc0380, - 0xbf850012, 0xbe8303ff, - 0x00000080, 0xbf800000, - 0xbf800000, 0xbf800000, - 0xd8d80000, 0x01000000, - 0xbf8c0000, 0xe0704000, - 0x705d0100, 0x807c037c, - 0x80700370, 0xd5250000, - 0x0001ff00, 0x00000080, - 0xbf0a7b7c, 0xbf85fff4, - 0xbf820011, 0xbe8303ff, - 0x00000100, 0xbf800000, - 0xbf800000, 0xbf800000, - 0xd8d80000, 0x01000000, - 0xbf8c0000, 0xe0704000, - 0x705d0100, 0x807c037c, - 0x80700370, 0xd5250000, - 0x0001ff00, 0x00000100, - 0xbf0a7b7c, 0xbf85fff4, + 0x0000f207, 0x80798179, + 0xd7610002, 0x0000f208, + 0x80798179, 0xd7610002, + 0x0000f209, 0x80798179, + 0xd7610002, 0x0000f20a, + 0x80798179, 0xd7610002, + 0x0000f20b, 0x80798179, + 0xe0704000, 0x705d0200, 0xbefe03c1, 0x907c9973, 0x877c817c, 0xbf06817c, - 0xbf850004, 0xbef003ff, - 0x00000200, 0xbeff0380, - 0xbf820003, 0xbef003ff, - 0x00000400, 0xbeff03c1, - 0xb97b3a05, 0x807b817b, - 0x8f7b827b, 0x907c9973, + 0xbf850002, 0xbeff0380, + 0xbf820001, 0xbeff03c1, + 0xb97b4306, 0x877bc17b, + 0xbf840044, 0xbf8a0000, + 0x877aff6d, 0x80000000, + 0xbf840040, 0x8f7b867b, + 0x8f7b827b, 0xbef6037b, + 0xb9703a05, 0x80708170, + 0xbf0d9973, 0xbf850002, + 0x8f708970, 0xbf820001, + 0x8f708a70, 0xb97a1e06, + 0x8f7a8a7a, 0x80707a70, + 0x8070ff70, 0x00000200, + 0x8070ff70, 0x00000080, + 0xbef603ff, 0x01000000, + 0xd7650000, 0x000100c1, + 0xd7660000, 0x000200c1, + 0x16000084, 0x907c9973, 0x877c817c, 0xbf06817c, - 0xbf850017, 0xbef603ff, - 0x01000000, 0xbefc0384, - 0xbf0a7b7c, 0xbf840037, - 0x7e008700, 0x7e028701, - 0x7e048702, 0x7e068703, - 0xe0704000, 0x705d0000, - 0xe0704080, 0x705d0100, - 0xe0704100, 0x705d0200, - 0xe0704180, 0x705d0300, - 0x807c847c, 0x8070ff70, - 0x00000200, 0xbf0a7b7c, - 0xbf85ffef, 0xbf820025, + 0xbefc0380, 0xbf850012, + 0xbe8303ff, 0x00000080, + 0xbf800000, 0xbf800000, + 0xbf800000, 0xd8d80000, + 0x01000000, 0xbf8c0000, + 0xe0704000, 0x705d0100, + 0x807c037c, 0x80700370, + 0xd5250000, 0x0001ff00, + 0x00000080, 0xbf0a7b7c, + 0xbf85fff4, 0xbf820011, + 0xbe8303ff, 0x00000100, + 0xbf800000, 0xbf800000, + 0xbf800000, 0xd8d80000, + 0x01000000, 0xbf8c0000, + 0xe0704000, 0x705d0100, + 0x807c037c, 0x80700370, + 0xd5250000, 0x0001ff00, + 0x00000100, 0xbf0a7b7c, + 0xbf85fff4, 0xbefe03c1, + 0x907c9973, 0x877c817c, + 0xbf06817c, 0xbf850004, + 0xbef003ff, 0x00000200, + 0xbeff0380, 0xbf820003, + 0xbef003ff, 0x00000400, + 0xbeff03c1, 0xb97b3a05, + 0x807b817b, 0x8f7b827b, + 0x907c9973, 0x877c817c, + 0xbf06817c, 0xbf850017, 0xbef603ff, 0x01000000, 0xbefc0384, 0xbf0a7b7c, - 0xbf840011, 0x7e008700, + 0xbf840037, 0x7e008700, 0x7e028701, 0x7e048702, 0x7e068703, 0xe0704000, - 0x705d0000, 0xe0704100, - 0x705d0100, 0xe0704200, - 0x705d0200, 0xe0704300, + 0x705d0000, 0xe0704080, + 0x705d0100, 0xe0704100, + 0x705d0200, 0xe0704180, 0x705d0300, 0x807c847c, - 0x8070ff70, 0x00000400, + 0x8070ff70, 0x00000200, 0xbf0a7b7c, 0xbf85ffef, - 0xb97b1e06, 0x877bc17b, - 0xbf84000c, 0x8f7b837b, - 0x807b7c7b, 0xbefe03c1, - 0xbeff0380, 0x7e008700, + 0xbf820025, 0xbef603ff, + 0x01000000, 0xbefc0384, + 0xbf0a7b7c, 0xbf840011, + 0x7e008700, 0x7e028701, + 0x7e048702, 0x7e068703, 0xe0704000, 0x705d0000, - 0x807c817c, 0x8070ff70, - 0x00000080, 0xbf0a7b7c, - 0xbf85fff8, 0xbf82013b, - 0xbef4037e, 0x8775ff7f, - 0x0000ffff, 0x8875ff75, - 0x00040000, 0xbef60380, - 0xbef703ff, 0x10807fac, - 0xb97202dc, 0x8f729972, - 0x876eff7f, 0x04000000, - 0xbf840034, 0xbefe03c1, - 0x907c9972, 0x877c817c, - 0xbf06817c, 0xbf850002, - 0xbeff0380, 0xbf820001, - 0xbeff03c1, 0xb96f4306, - 0x876fc16f, 0xbf840029, - 0x8f6f866f, 0x8f6f826f, - 0xbef6036f, 0xb9783a05, - 0x80788178, 0xbf0d9972, - 0xbf850002, 0x8f788978, - 0xbf820001, 0x8f788a78, - 0xb96e1e06, 0x8f6e8a6e, - 0x80786e78, 0x8078ff78, - 0x00000200, 0x8078ff78, - 0x00000080, 0xbef603ff, - 0x01000000, 0x907c9972, - 0x877c817c, 0xbf06817c, - 0xbefc0380, 0xbf850009, - 0xe0310000, 0x781d0000, - 0x807cff7c, 0x00000080, - 0x8078ff78, 0x00000080, - 0xbf0a6f7c, 0xbf85fff8, - 0xbf820008, 0xe0310000, - 0x781d0000, 0x807cff7c, - 0x00000100, 0x8078ff78, - 0x00000100, 0xbf0a6f7c, - 0xbf85fff8, 0xbef80380, + 0xe0704100, 0x705d0100, + 0xe0704200, 0x705d0200, + 0xe0704300, 0x705d0300, + 0x807c847c, 0x8070ff70, + 0x00000400, 0xbf0a7b7c, + 0xbf85ffef, 0xb97b1e06, + 0x877bc17b, 0xbf84000c, + 0x8f7b837b, 0x807b7c7b, + 0xbefe03c1, 0xbeff0380, + 0x7e008700, 0xe0704000, + 0x705d0000, 0x807c817c, + 0x8070ff70, 0x00000080, + 0xbf0a7b7c, 0xbf85fff8, + 0xbf82013b, 0xbef4037e, + 0x8775ff7f, 0x0000ffff, + 0x8875ff75, 0x00040000, + 0xbef60380, 0xbef703ff, + 0x10807fac, 0xb97202dc, + 0x8f729972, 0x876eff7f, + 0x04000000, 0xbf840034, 0xbefe03c1, 0x907c9972, 0x877c817c, 0xbf06817c, 0xbf850002, 0xbeff0380, 0xbf820001, 0xbeff03c1, - 0xb96f3a05, 0x806f816f, - 0x8f6f826f, 0x907c9972, - 0x877c817c, 0xbf06817c, - 0xbf850024, 0xbef603ff, - 0x01000000, 0xbeee0378, + 0xb96f4306, 0x876fc16f, + 0xbf840029, 0x8f6f866f, + 0x8f6f826f, 0xbef6036f, + 0xb9783a05, 0x80788178, + 0xbf0d9972, 0xbf850002, + 0x8f788978, 0xbf820001, + 0x8f788a78, 0xb96e1e06, + 0x8f6e8a6e, 0x80786e78, 0x8078ff78, 0x00000200, - 0xbefc0384, 0xbf0a6f7c, - 0xbf840050, 0xe0304000, - 0x785d0000, 0xe0304080, - 0x785d0100, 0xe0304100, - 0x785d0200, 0xe0304180, - 0x785d0300, 0xbf8c3f70, - 0x7e008500, 0x7e028501, - 0x7e048502, 0x7e068503, - 0x807c847c, 0x8078ff78, - 0x00000200, 0xbf0a6f7c, - 0xbf85ffee, 0xe0304000, - 0x6e5d0000, 0xe0304080, - 0x6e5d0100, 0xe0304100, - 0x6e5d0200, 0xe0304180, - 0x6e5d0300, 0xbf8c3f70, - 0xbf820034, 0xbef603ff, - 0x01000000, 0xbeee0378, - 0x8078ff78, 0x00000400, - 0xbefc0384, 0xbf0a6f7c, - 0xbf840012, 0xe0304000, - 0x785d0000, 0xe0304100, - 0x785d0100, 0xe0304200, - 0x785d0200, 0xe0304300, - 0x785d0300, 0xbf8c3f70, - 0x7e008500, 0x7e028501, - 0x7e048502, 0x7e068503, - 0x807c847c, 0x8078ff78, - 0x00000400, 0xbf0a6f7c, - 0xbf85ffee, 0xb96f1e06, - 0x876fc16f, 0xbf84000e, - 0x8f6f836f, 0x806f7c6f, - 0xbefe03c1, 0xbeff0380, + 0x8078ff78, 0x00000080, + 0xbef603ff, 0x01000000, + 0x907c9972, 0x877c817c, + 0xbf06817c, 0xbefc0380, + 0xbf850009, 0xe0310000, + 0x781d0000, 0x807cff7c, + 0x00000080, 0x8078ff78, + 0x00000080, 0xbf0a6f7c, + 0xbf85fff8, 0xbf820008, + 0xe0310000, 0x781d0000, + 0x807cff7c, 0x00000100, + 0x8078ff78, 0x00000100, + 0xbf0a6f7c, 0xbf85fff8, + 0xbef80380, 0xbefe03c1, + 0x907c9972, 0x877c817c, + 0xbf06817c, 0xbf850002, + 0xbeff0380, 0xbf820001, + 0xbeff03c1, 0xb96f3a05, + 0x806f816f, 0x8f6f826f, + 0x907c9972, 0x877c817c, + 0xbf06817c, 0xbf850024, + 0xbef603ff, 0x01000000, + 0xbeee0378, 0x8078ff78, + 0x00000200, 0xbefc0384, + 0xbf0a6f7c, 0xbf840050, 0xe0304000, 0x785d0000, + 0xe0304080, 0x785d0100, + 0xe0304100, 0x785d0200, + 0xe0304180, 0x785d0300, 0xbf8c3f70, 0x7e008500, - 0x807c817c, 0x8078ff78, - 0x00000080, 0xbf0a6f7c, - 0xbf85fff7, 0xbeff03c1, + 0x7e028501, 0x7e048502, + 0x7e068503, 0x807c847c, + 0x8078ff78, 0x00000200, + 0xbf0a6f7c, 0xbf85ffee, 0xe0304000, 0x6e5d0000, - 0xe0304100, 0x6e5d0100, - 0xe0304200, 0x6e5d0200, - 0xe0304300, 0x6e5d0300, - 0xbf8c3f70, 0xb9783a05, - 0x80788178, 0xbf0d9972, - 0xbf850002, 0x8f788978, - 0xbf820001, 0x8f788a78, - 0xb96e1e06, 0x8f6e8a6e, - 0x80786e78, 0x8078ff78, - 0x00000200, 0x80f8ff78, - 0x00000050, 0xbef603ff, - 0x01000000, 0xbefc03ff, - 0x0000006c, 0x80f89078, - 0xf429003a, 0xf0000000, - 0xbf8cc07f, 0x80fc847c, - 0xbf800000, 0xbe803100, - 0xbe823102, 0x80f8a078, - 0xf42d003a, 0xf0000000, - 0xbf8cc07f, 0x80fc887c, - 0xbf800000, 0xbe803100, - 0xbe823102, 0xbe843104, - 0xbe863106, 0x80f8c078, - 0xf431003a, 0xf0000000, - 0xbf8cc07f, 0x80fc907c, - 0xbf800000, 0xbe803100, - 0xbe823102, 0xbe843104, - 0xbe863106, 0xbe883108, - 0xbe8a310a, 0xbe8c310c, - 0xbe8e310e, 0xbf06807c, - 0xbf84fff0, 0xba80f801, - 0x00000000, 0xbf8a0000, + 0xe0304080, 0x6e5d0100, + 0xe0304100, 0x6e5d0200, + 0xe0304180, 0x6e5d0300, + 0xbf8c3f70, 0xbf820034, + 0xbef603ff, 0x01000000, + 0xbeee0378, 0x8078ff78, + 0x00000400, 0xbefc0384, + 0xbf0a6f7c, 0xbf840012, + 0xe0304000, 0x785d0000, + 0xe0304100, 0x785d0100, + 0xe0304200, 0x785d0200, + 0xe0304300, 0x785d0300, + 0xbf8c3f70, 0x7e008500, + 0x7e028501, 0x7e048502, + 0x7e068503, 0x807c847c, + 0x8078ff78, 0x00000400, + 0xbf0a6f7c, 0xbf85ffee, + 0xb96f1e06, 0x876fc16f, + 0xbf84000e, 0x8f6f836f, + 0x806f7c6f, 0xbefe03c1, + 0xbeff0380, 0xe0304000, + 0x785d0000, 0xbf8c3f70, + 0x7e008500, 0x807c817c, + 0x8078ff78, 0x00000080, + 0xbf0a6f7c, 0xbf85fff7, + 0xbeff03c1, 0xe0304000, + 0x6e5d0000, 0xe0304100, + 0x6e5d0100, 0xe0304200, + 0x6e5d0200, 0xe0304300, + 0x6e5d0300, 0xbf8c3f70, 0xb9783a05, 0x80788178, 0xbf0d9972, 0xbf850002, 0x8f788978, 0xbf820001, 0x8f788a78, 0xb96e1e06, 0x8f6e8a6e, 0x80786e78, 0x8078ff78, 0x00000200, + 0x80f8ff78, 0x00000050, 0xbef603ff, 0x01000000, - 0xf4211bfa, 0xf0000000, - 0x80788478, 0xf4211b3a, + 0xbefc03ff, 0x0000006c, + 0x80f89078, 0xf429003a, + 0xf0000000, 0xbf8cc07f, + 0x80fc847c, 0xbf800000, + 0xbe803100, 0xbe823102, + 0x80f8a078, 0xf42d003a, + 0xf0000000, 0xbf8cc07f, + 0x80fc887c, 0xbf800000, + 0xbe803100, 0xbe823102, + 0xbe843104, 0xbe863106, + 0x80f8c078, 0xf431003a, + 0xf0000000, 0xbf8cc07f, + 0x80fc907c, 0xbf800000, + 0xbe803100, 0xbe823102, + 0xbe843104, 0xbe863106, + 0xbe883108, 0xbe8a310a, + 0xbe8c310c, 0xbe8e310e, + 0xbf06807c, 0xbf84fff0, + 0xba80f801, 0x00000000, + 0xbf8a0000, 0xb9783a05, + 0x80788178, 0xbf0d9972, + 0xbf850002, 0x8f788978, + 0xbf820001, 0x8f788a78, + 0xb96e1e06, 0x8f6e8a6e, + 0x80786e78, 0x8078ff78, + 0x00000200, 0xbef603ff, + 0x01000000, 0xf4211bfa, 0xf0000000, 0x80788478, - 0xf4211b7a, 0xf0000000, - 0x80788478, 0xf4211c3a, + 0xf4211b3a, 0xf0000000, + 0x80788478, 0xf4211b7a, 0xf0000000, 0x80788478, - 0xf4211c7a, 0xf0000000, - 0x80788478, 0xf4211eba, + 0xf4211c3a, 0xf0000000, + 0x80788478, 0xf4211c7a, 0xf0000000, 0x80788478, - 0xf4211efa, 0xf0000000, - 0x80788478, 0xf4211e7a, + 0xf4211eba, 0xf0000000, + 0x80788478, 0xf4211efa, 0xf0000000, 0x80788478, - 0xf4211cfa, 0xf0000000, - 0x80788478, 0xf4211bba, + 0xf4211e7a, 0xf0000000, + 0x80788478, 0xf4211cfa, 0xf0000000, 0x80788478, - 0xbf8cc07f, 0xb9eef814, 0xf4211bba, 0xf0000000, 0x80788478, 0xbf8cc07f, - 0xb9eef815, 0xbefc036f, - 0xbefe0370, 0xbeff0371, - 0x876f7bff, 0x000003ff, - 0xb9ef4803, 0x876f7bff, - 0xfffff800, 0x906f8b6f, - 0xb9efa2c3, 0xb9f3f801, - 0xb96e3a05, 0x806e816e, - 0xbf0d9972, 0xbf850002, - 0x8f6e896e, 0xbf820001, - 0x8f6e8a6e, 0xb96f1e06, - 0x8f6f8a6f, 0x806e6f6e, - 0x806eff6e, 0x00000200, - 0x806e746e, 0x826f8075, - 0x876fff6f, 0x0000ffff, - 0xf4091c37, 0xfa000050, - 0xf4091d37, 0xfa000060, - 0xf4011e77, 0xfa000074, - 0xbf8cc07f, 0x876dff6d, - 0x0000ffff, 0x87fe7e7e, - 0x87ea6a6a, 0xb9faf802, - 0xbe80226c, 0xbf810000, + 0xb9eef814, 0xf4211bba, + 0xf0000000, 0x80788478, + 0xbf8cc07f, 0xb9eef815, + 0xbefc036f, 0xbefe0370, + 0xbeff0371, 0x876f7bff, + 0x000003ff, 0xb9ef4803, + 0x876f7bff, 0xfffff800, + 0x906f8b6f, 0xb9efa2c3, + 0xb9f3f801, 0xb96e3a05, + 0x806e816e, 0xbf0d9972, + 0xbf850002, 0x8f6e896e, + 0xbf820001, 0x8f6e8a6e, + 0xb96f1e06, 0x8f6f8a6f, + 0x806e6f6e, 0x806eff6e, + 0x00000200, 0x806e746e, + 0x826f8075, 0x876fff6f, + 0x0000ffff, 0xf4091c37, + 0xfa000050, 0xf4091d37, + 0xfa000060, 0xf4011e77, + 0xfa000074, 0xbf8cc07f, + 0x876dff6d, 0x0000ffff, + 0x87fe7e7e, 0x87ea6a6a, + 0xb9faf802, 0xbe80226c, + 0xbf810000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, - 0xbf9f0000, 0x00000000, }; static const uint32_t cwsr_trap_gfx11_hex[] = { diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm index fdab64624422..e0140df0b0ec 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm @@ -369,6 +369,12 @@ L_SLEEP: s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp #if NO_SQC_STORE +#if ASIC_FAMILY <= CHIP_SIENNA_CICHLID + // gfx10: If there was a VALU exception, the exception state must be + // cleared before executing the VALU instructions below. + v_clrexcp +#endif + // Trap temporaries must be saved via VGPR but all VGPRs are in use. // There is no ttmp space to hold the resource constant for VGPR save. // Save v0 by itself since it requires only two SGPRs. -- cgit v1.2.3 From efb91fea652a42fcc037d2a9ef4ecd1ffc5ff4b7 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Fri, 27 Oct 2023 15:59:48 -0400 Subject: drm/amd/display: Fix a debugfs null pointer error [WHY & HOW] Check whether get_subvp_en() callback exists before calling it. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Alex Hung Acked-by: Alex Hung Signed-off-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index 13a177d34376..45c972f2630d 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -3647,12 +3647,16 @@ static int capabilities_show(struct seq_file *m, void *unused) bool mall_supported = dc->caps.mall_size_total; bool subvp_supported = dc->caps.subvp_fw_processing_delay_us; unsigned int mall_in_use = false; - unsigned int subvp_in_use = dc->cap_funcs.get_subvp_en(dc, dc->current_state); + unsigned int subvp_in_use = false; + struct hubbub *hubbub = dc->res_pool->hubbub; if (hubbub->funcs->get_mall_en) hubbub->funcs->get_mall_en(hubbub, &mall_in_use); + if (dc->cap_funcs.get_subvp_en) + subvp_in_use = dc->cap_funcs.get_subvp_en(dc, dc->current_state); + seq_printf(m, "mall supported: %s, enabled: %s\n", mall_supported ? "yes" : "no", mall_in_use ? "yes" : "no"); seq_printf(m, "sub-viewport supported: %s, enabled: %s\n", -- cgit v1.2.3 From ef71bb4119c786f6f1d132b8863698874321798b Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Fri, 10 Nov 2023 13:14:41 +0800 Subject: drm/amdgpu: correct mca ipid die/socket/addr decode correct mca ipid die/socket/addr decode v2: squash in fix from Yang Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 0e5a77c3c2e2..f723a4190ee5 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -2392,8 +2392,8 @@ static const struct mca_bank_ipid smu_v13_0_6_mca_ipid_table[AMDGPU_MCA_IP_COUNT static void mca_bank_entry_info_decode(struct mca_bank_entry *entry, struct mca_bank_info *info) { - uint64_t ipid = entry->regs[MCA_REG_IDX_IPID]; - uint32_t insthi; + u64 ipid = entry->regs[MCA_REG_IDX_IPID]; + u32 instidhi, instid; /* NOTE: All MCA IPID register share the same format, * so the driver can share the MCMP1 register header file. @@ -2402,9 +2402,15 @@ static void mca_bank_entry_info_decode(struct mca_bank_entry *entry, struct mca_ info->hwid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, HardwareID); info->mcatype = REG_GET_FIELD(ipid, MCMP1_IPIDT0, McaType); - insthi = REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdHi); - info->aid = ((insthi >> 2) & 0x03); - info->socket_id = insthi & 0x03; + /* + * Unfied DieID Format: SAASS. A:AID, S:Socket. + * Unfied DieID[4] = InstanceId[0] + * Unfied DieID[0:3] = InstanceIdHi[0:3] + */ + instidhi = REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdHi); + instid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdLo); + info->aid = ((instidhi >> 2) & 0x03); + info->socket_id = ((instid & 0x1) << 2) | (instidhi & 0x03); } static int mca_bank_read_reg(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, @@ -2578,6 +2584,7 @@ static bool mca_gfx_smu_bank_is_valid(const struct mca_ras_info *mca_ras, struct uint32_t instlo; instlo = REG_GET_FIELD(entry->regs[MCA_REG_IDX_IPID], MCMP1_IPIDT0, InstanceIdLo); + instlo &= GENMASK(31, 1); switch (instlo) { case 0x36430400: /* SMNAID XCD 0 */ case 0x38430400: /* SMNAID XCD 1 */ @@ -2596,6 +2603,7 @@ static bool mca_smu_bank_is_valid(const struct mca_ras_info *mca_ras, struct amd uint32_t errcode, instlo; instlo = REG_GET_FIELD(entry->regs[MCA_REG_IDX_IPID], MCMP1_IPIDT0, InstanceIdLo); + instlo &= GENMASK(31, 1); if (instlo != 0x03b30400) return false; -- cgit v1.2.3 From fcfc6ceec3ebb725a0d6381a1120e7cd546e1df4 Mon Sep 17 00:00:00 2001 From: Yihan Zhu Date: Mon, 30 Oct 2023 13:29:51 -0400 Subject: drm/amd/display: Enable CM low mem power optimization [WHY & HOW] MPC MCM low mem power optimization still causes color distortion on first SCE enablement, only forces light sleep for it. DPP low memory power optimization still needs this bit to save power. Reviewed-by: Nicholas Kazlauskas Acked-by: Alex Hung Signed-off-by: Yihan Zhu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c | 13 ++++++++----- drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c | 2 +- 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c index 994b21ed272f..1a2adb354718 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c @@ -71,21 +71,24 @@ void mpc32_power_on_blnd_lut( { struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc); +/* if (mpc->ctx->dc->debug.enable_mem_low_power.bits.cm) { if (power_on) { REG_UPDATE(MPCC_MCM_MEM_PWR_CTRL[mpcc_id], MPCC_MCM_1DLUT_MEM_PWR_FORCE, 0); REG_WAIT(MPCC_MCM_MEM_PWR_CTRL[mpcc_id], MPCC_MCM_1DLUT_MEM_PWR_STATE, 0, 1, 5); } else if (!mpc->ctx->dc->debug.disable_mem_low_power) { - ASSERT(false); - /* TODO: change to mpc - * dpp_base->ctx->dc->optimized_required = true; - * dpp_base->deferred_reg_writes.bits.disable_blnd_lut = true; - */ + //TODO: change to mpc + dpp_base->ctx->dc->optimized_required = true; + dpp_base->deferred_reg_writes.bits.disable_blnd_lut = true; } } else { REG_SET(MPCC_MCM_MEM_PWR_CTRL[mpcc_id], 0, MPCC_MCM_1DLUT_MEM_PWR_FORCE, power_on == true ? 0 : 1); } +*/ + + REG_SET(MPCC_MCM_MEM_PWR_CTRL[mpcc_id], 0, + MPCC_MCM_1DLUT_MEM_PWR_FORCE, power_on == true ? 0 : 1); } static enum dc_lut_mode mpc32_get_post1dlut_current(struct mpc *mpc, uint32_t mpcc_id) diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c index c7e011d26d41..7a3faf2b1f06 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c @@ -724,7 +724,7 @@ static const struct dc_debug_options debug_defaults_drv = { .i2c = true, .dmcu = false, // This is previously known to cause hang on S3 cycles if enabled .dscl = true, - .cm = false, + .cm = true, .mpc = true, .optc = true, .vpg = true, -- cgit v1.2.3 From c41028a2a16303e5a59e11338d6ef5475945c79d Mon Sep 17 00:00:00 2001 From: Hamza Mahfooz Date: Thu, 9 Nov 2023 16:42:32 -0500 Subject: drm/amd/display: add a debugfs interface for the DMUB trace mask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For features that are implemented primarily in DMUB (e.g. PSR), it is useful to be able to trace them at a DMUB level from the kernel, especially when debugging issues. So, introduce a debugfs interface that is able to read and set the DMUB trace mask dynamically at runtime and document how to use it. Cc: Alex Deucher Cc: Mario Limonciello Reviewed-by: Aurabindo Pillai Acked-by: Christian König Signed-off-by: Hamza Mahfooz Signed-off-by: Alex Deucher --- Documentation/gpu/amdgpu/display/dc-debug.rst | 41 +++++++++ .../gpu/amdgpu/display/trace-groups-table.csv | 29 ++++++ .../drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 101 +++++++++++++++++++++ drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 40 +++++++- 4 files changed, 209 insertions(+), 2 deletions(-) create mode 100644 Documentation/gpu/amdgpu/display/trace-groups-table.csv (limited to 'drivers/gpu') diff --git a/Documentation/gpu/amdgpu/display/dc-debug.rst b/Documentation/gpu/amdgpu/display/dc-debug.rst index 40c55a618918..817631b1dbf3 100644 --- a/Documentation/gpu/amdgpu/display/dc-debug.rst +++ b/Documentation/gpu/amdgpu/display/dc-debug.rst @@ -75,3 +75,44 @@ change in real-time by using something like:: When reporting a bug related to DC, consider attaching this log before and after you reproduce the bug. + +DMUB Firmware Debug +=================== + +Sometimes, dmesg logs aren't enough. This is especially true if a feature is +implemented primarily in DMUB firmware. In such cases, all we see in dmesg when +an issue arises is some generic timeout error. So, to get more relevant +information, we can trace DMUB commands by enabling the relevant bits in +`amdgpu_dm_dmub_trace_mask`. + +Currently, we support the tracing of the following groups: + +Trace Groups +------------ + +.. csv-table:: + :header-rows: 1 + :widths: 1, 1 + :file: ./trace-groups-table.csv + +**Note: Not all ASICs support all of the listed trace groups** + +So, to enable just PSR tracing you can use the following command:: + + # echo 0x8020 > /sys/kernel/debug/dri/0/amdgpu_dm_dmub_trace_mask + +Then, you need to enable logging trace events to the buffer, which you can do +using the following:: + + # echo 1 > /sys/kernel/debug/dri/0/amdgpu_dm_dmcub_trace_event_en + +Lastly, after you are able to reproduce the issue you are trying to debug, +you can disable tracing and read the trace log by using the following:: + + # echo 0 > /sys/kernel/debug/dri/0/amdgpu_dm_dmcub_trace_event_en + # cat /sys/kernel/debug/dri/0/amdgpu_dm_dmub_tracebuffer + +So, when reporting bugs related to features such as PSR and ABM, consider +enabling the relevant bits in the mask before reproducing the issue and +attach the log that you obtain from the trace buffer in any bug reports that you +create. diff --git a/Documentation/gpu/amdgpu/display/trace-groups-table.csv b/Documentation/gpu/amdgpu/display/trace-groups-table.csv new file mode 100644 index 000000000000..3f6a50d1d883 --- /dev/null +++ b/Documentation/gpu/amdgpu/display/trace-groups-table.csv @@ -0,0 +1,29 @@ +Name, Mask Value +INFO, 0x1 +IRQ SVC, 0x2 +VBIOS, 0x4 +REGISTER, 0x8 +PHY DBG, 0x10 +PSR, 0x20 +AUX, 0x40 +SMU, 0x80 +MALL, 0x100 +ABM, 0x200 +ALPM, 0x400 +TIMER, 0x800 +HW LOCK MGR, 0x1000 +INBOX1, 0x2000 +PHY SEQ, 0x4000 +PSR STATE, 0x8000 +ZSTATE, 0x10000 +TRANSMITTER CTL, 0x20000 +PANEL CNTL, 0x40000 +FAMS, 0x80000 +DPIA, 0x100000 +SUBVP, 0x200000 +INBOX0, 0x400000 +SDP, 0x4000000 +REPLAY, 0x8000000 +REPLAY RESIDENCY, 0x20000000 +CURSOR INFO, 0x80000000 +IPS, 0x100000000 diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index 45c972f2630d..98b41ec7288e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -2971,6 +2971,104 @@ static int allow_edp_hotplug_detection_set(void *data, u64 val) return 0; } +static int dmub_trace_mask_set(void *data, u64 val) +{ + struct amdgpu_device *adev = data; + struct dmub_srv *srv = adev->dm.dc->ctx->dmub_srv->dmub; + enum dmub_gpint_command cmd; + enum dmub_status status; + u64 mask = 0xffff; + u8 shift = 0; + u32 res; + int i; + + if (!srv->fw_version) + return -EINVAL; + + for (i = 0; i < 4; i++) { + res = (val & mask) >> shift; + + switch (i) { + case 0: + cmd = DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD0; + break; + case 1: + cmd = DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD1; + break; + case 2: + cmd = DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD2; + break; + case 3: + cmd = DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD3; + break; + } + + status = dmub_srv_send_gpint_command(srv, cmd, res, 30); + + if (status == DMUB_STATUS_TIMEOUT) + return -ETIMEDOUT; + else if (status == DMUB_STATUS_INVALID) + return -EINVAL; + else if (status != DMUB_STATUS_OK) + return -EIO; + + usleep_range(100, 1000); + + mask <<= 16; + shift += 16; + } + + return 0; +} + +static int dmub_trace_mask_show(void *data, u64 *val) +{ + enum dmub_gpint_command cmd = DMUB_GPINT__GET_TRACE_BUFFER_MASK_WORD0; + struct amdgpu_device *adev = data; + struct dmub_srv *srv = adev->dm.dc->ctx->dmub_srv->dmub; + enum dmub_status status; + u8 shift = 0; + u64 raw = 0; + u64 res = 0; + int i = 0; + + if (!srv->fw_version) + return -EINVAL; + + while (i < 4) { + status = dmub_srv_send_gpint_command(srv, cmd, 0, 30); + + if (status == DMUB_STATUS_OK) { + status = dmub_srv_get_gpint_response(srv, (u32 *) &raw); + + if (status == DMUB_STATUS_INVALID) + return -EINVAL; + else if (status != DMUB_STATUS_OK) + return -EIO; + } else if (status == DMUB_STATUS_TIMEOUT) { + return -ETIMEDOUT; + } else if (status == DMUB_STATUS_INVALID) { + return -EINVAL; + } else { + return -EIO; + } + + usleep_range(100, 1000); + + cmd++; + res |= (raw << shift); + shift += 16; + i++; + } + + *val = res; + + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(dmub_trace_mask_fops, dmub_trace_mask_show, + dmub_trace_mask_set, "0x%llx\n"); + /* * Set dmcub trace event IRQ enable or disable. * Usage to enable dmcub trace event IRQ: echo 1 > /sys/kernel/debug/dri/0/amdgpu_dm_dmcub_trace_event_en @@ -3884,6 +3982,9 @@ void dtn_debugfs_init(struct amdgpu_device *adev) debugfs_create_file_unsafe("amdgpu_dm_force_timing_sync", 0644, root, adev, &force_timing_sync_ops); + debugfs_create_file_unsafe("amdgpu_dm_dmub_trace_mask", 0644, root, + adev, &dmub_trace_mask_fops); + debugfs_create_file_unsafe("amdgpu_dm_dmcub_trace_event_en", 0644, root, adev, &dmcub_trace_event_state_fops); diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index ed4379c04715..aa6e6923afed 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -818,18 +818,54 @@ enum dmub_gpint_command { * RETURN: Lower 32-bit mask. */ DMUB_GPINT__UPDATE_TRACE_BUFFER_MASK = 101, + /** - * DESC: Updates the trace buffer lower 32-bit mask. + * DESC: Updates the trace buffer mask bit0~bit15. * ARGS: The new mask * RETURN: Lower 32-bit mask. */ DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD0 = 102, + /** - * DESC: Updates the trace buffer mask bi0~bit15. + * DESC: Updates the trace buffer mask bit16~bit31. * ARGS: The new mask * RETURN: Lower 32-bit mask. */ DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD1 = 103, + + /** + * DESC: Updates the trace buffer mask bit32~bit47. + * ARGS: The new mask + * RETURN: Lower 32-bit mask. + */ + DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD2 = 114, + + /** + * DESC: Updates the trace buffer mask bit48~bit63. + * ARGS: The new mask + * RETURN: Lower 32-bit mask. + */ + DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD3 = 115, + + /** + * DESC: Read the trace buffer mask bi0~bit15. + */ + DMUB_GPINT__GET_TRACE_BUFFER_MASK_WORD0 = 116, + + /** + * DESC: Read the trace buffer mask bit16~bit31. + */ + DMUB_GPINT__GET_TRACE_BUFFER_MASK_WORD1 = 117, + + /** + * DESC: Read the trace buffer mask bi32~bit47. + */ + DMUB_GPINT__GET_TRACE_BUFFER_MASK_WORD2 = 118, + + /** + * DESC: Updates the trace buffer mask bit32~bit63. + */ + DMUB_GPINT__GET_TRACE_BUFFER_MASK_WORD3 = 119, }; /** -- cgit v1.2.3 From 8b8eed05a1c650c27e78bc47d07f7d6c9ba779e8 Mon Sep 17 00:00:00 2001 From: Mounika Adhuri Date: Fri, 6 Oct 2023 15:05:42 +0530 Subject: drm/amd/display: Refactor resource into component directory [WHY] Move all resource files to unique folder resource. [HOW] Created resource folder in dc, moved the dcnxx_resource.c and dcnxx_resource.h files into corresponding new folders inside the resource and made appropriate changes for compilation in Makefiles. Reviewed-by: Martin Leung Acked-by: Alex Hung Signed-off-by: Mounika Adhuri Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/Makefile | 1 + .../amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 2 +- drivers/gpu/drm/amd/display/dc/Makefile | 5 +- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 4 +- drivers/gpu/drm/amd/display/dc/dce100/Makefile | 46 - .../drm/amd/display/dc/dce100/dce100_resource.c | 1179 -------- .../drm/amd/display/dc/dce100/dce100_resource.h | 54 - drivers/gpu/drm/amd/display/dc/dce110/Makefile | 4 +- .../drm/amd/display/dc/dce110/dce110_resource.c | 1551 ----------- .../drm/amd/display/dc/dce110/dce110_resource.h | 54 - drivers/gpu/drm/amd/display/dc/dce112/Makefile | 3 +- .../drm/amd/display/dc/dce112/dce112_resource.c | 1431 ---------- .../drm/amd/display/dc/dce112/dce112_resource.h | 57 - drivers/gpu/drm/amd/display/dc/dce120/Makefile | 2 +- .../drm/amd/display/dc/dce120/dce120_resource.c | 1288 --------- .../drm/amd/display/dc/dce120/dce120_resource.h | 39 - drivers/gpu/drm/amd/display/dc/dce80/Makefile | 3 +- .../gpu/drm/amd/display/dc/dce80/dce80_resource.c | 1544 ----------- .../gpu/drm/amd/display/dc/dce80/dce80_resource.h | 47 - drivers/gpu/drm/amd/display/dc/dcn10/Makefile | 2 +- .../gpu/drm/amd/display/dc/dcn10/dcn10_resource.c | 1686 ------------ .../gpu/drm/amd/display/dc/dcn10/dcn10_resource.h | 56 - drivers/gpu/drm/amd/display/dc/dcn20/Makefile | 2 +- .../gpu/drm/amd/display/dc/dcn20/dcn20_resource.c | 2789 ------------------- .../gpu/drm/amd/display/dc/dcn20/dcn20_resource.h | 170 -- drivers/gpu/drm/amd/display/dc/dcn201/Makefile | 2 +- .../drm/amd/display/dc/dcn201/dcn201_resource.c | 1308 --------- .../drm/amd/display/dc/dcn201/dcn201_resource.h | 50 - drivers/gpu/drm/amd/display/dc/dcn21/Makefile | 2 +- .../gpu/drm/amd/display/dc/dcn21/dcn21_resource.c | 1745 ------------ .../gpu/drm/amd/display/dc/dcn21/dcn21_resource.h | 56 - drivers/gpu/drm/amd/display/dc/dcn30/Makefile | 1 - .../gpu/drm/amd/display/dc/dcn30/dcn30_resource.c | 2611 ------------------ .../gpu/drm/amd/display/dc/dcn30/dcn30_resource.h | 108 - drivers/gpu/drm/amd/display/dc/dcn301/Makefile | 2 +- .../drm/amd/display/dc/dcn301/dcn301_resource.c | 1728 ------------ .../drm/amd/display/dc/dcn301/dcn301_resource.h | 45 - drivers/gpu/drm/amd/display/dc/dcn302/Makefile | 2 +- .../drm/amd/display/dc/dcn302/dcn302_resource.c | 1518 ----------- .../drm/amd/display/dc/dcn302/dcn302_resource.h | 38 - drivers/gpu/drm/amd/display/dc/dcn303/Makefile | 2 +- .../drm/amd/display/dc/dcn303/dcn303_resource.c | 1448 ---------- .../drm/amd/display/dc/dcn303/dcn303_resource.h | 38 - drivers/gpu/drm/amd/display/dc/dcn31/Makefile | 2 +- .../gpu/drm/amd/display/dc/dcn31/dcn31_resource.c | 2218 --------------- .../gpu/drm/amd/display/dc/dcn31/dcn31_resource.h | 97 - drivers/gpu/drm/amd/display/dc/dcn314/Makefile | 2 +- .../drm/amd/display/dc/dcn314/dcn314_resource.c | 2180 --------------- .../drm/amd/display/dc/dcn314/dcn314_resource.h | 50 - drivers/gpu/drm/amd/display/dc/dcn315/Makefile | 30 - .../drm/amd/display/dc/dcn315/dcn315_resource.c | 2151 --------------- .../drm/amd/display/dc/dcn315/dcn315_resource.h | 44 - drivers/gpu/drm/amd/display/dc/dcn316/Makefile | 30 - .../drm/amd/display/dc/dcn316/dcn316_resource.c | 2038 -------------- .../drm/amd/display/dc/dcn316/dcn316_resource.h | 44 - drivers/gpu/drm/amd/display/dc/dcn32/Makefile | 2 +- .../gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 2862 -------------------- .../gpu/drm/amd/display/dc/dcn32/dcn32_resource.h | 1263 --------- .../amd/display/dc/dcn32/dcn32_resource_helpers.c | 2 +- drivers/gpu/drm/amd/display/dc/dcn321/Makefile | 2 +- .../drm/amd/display/dc/dcn321/dcn321_resource.c | 2065 -------------- .../drm/amd/display/dc/dcn321/dcn321_resource.h | 45 - drivers/gpu/drm/amd/display/dc/dcn35/Makefile | 2 +- .../gpu/drm/amd/display/dc/dcn35/dcn35_resource.c | 2141 --------------- .../gpu/drm/amd/display/dc/dcn35/dcn35_resource.h | 310 --- .../gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c | 2 +- drivers/gpu/drm/amd/display/dc/resource/Makefile | 199 ++ .../display/dc/resource/dce100/dce100_resource.c | 1179 ++++++++ .../display/dc/resource/dce100/dce100_resource.h | 54 + .../display/dc/resource/dce110/dce110_resource.c | 1551 +++++++++++ .../display/dc/resource/dce110/dce110_resource.h | 54 + .../display/dc/resource/dce112/dce112_resource.c | 1431 ++++++++++ .../display/dc/resource/dce112/dce112_resource.h | 57 + .../display/dc/resource/dce120/dce120_resource.c | 1288 +++++++++ .../display/dc/resource/dce120/dce120_resource.h | 39 + .../amd/display/dc/resource/dce80/CMakeLists.txt | 4 + .../amd/display/dc/resource/dce80/dce80_resource.c | 1544 +++++++++++ .../amd/display/dc/resource/dce80/dce80_resource.h | 47 + .../amd/display/dc/resource/dcn10/dcn10_resource.c | 1689 ++++++++++++ .../amd/display/dc/resource/dcn10/dcn10_resource.h | 56 + .../amd/display/dc/resource/dcn20/dcn20_resource.c | 2789 +++++++++++++++++++ .../amd/display/dc/resource/dcn20/dcn20_resource.h | 170 ++ .../display/dc/resource/dcn201/dcn201_resource.c | 1308 +++++++++ .../display/dc/resource/dcn201/dcn201_resource.h | 50 + .../amd/display/dc/resource/dcn21/dcn21_resource.c | 1745 ++++++++++++ .../amd/display/dc/resource/dcn21/dcn21_resource.h | 56 + .../amd/display/dc/resource/dcn30/dcn30_resource.c | 2611 ++++++++++++++++++ .../amd/display/dc/resource/dcn30/dcn30_resource.h | 108 + .../display/dc/resource/dcn301/dcn301_resource.c | 1728 ++++++++++++ .../display/dc/resource/dcn301/dcn301_resource.h | 45 + .../display/dc/resource/dcn302/dcn302_resource.c | 1518 +++++++++++ .../display/dc/resource/dcn302/dcn302_resource.h | 38 + .../display/dc/resource/dcn303/dcn303_resource.c | 1448 ++++++++++ .../display/dc/resource/dcn303/dcn303_resource.h | 38 + .../amd/display/dc/resource/dcn31/dcn31_resource.c | 2218 +++++++++++++++ .../amd/display/dc/resource/dcn31/dcn31_resource.h | 97 + .../display/dc/resource/dcn314/dcn314_resource.c | 2180 +++++++++++++++ .../display/dc/resource/dcn314/dcn314_resource.h | 50 + .../display/dc/resource/dcn315/dcn315_resource.c | 2151 +++++++++++++++ .../display/dc/resource/dcn315/dcn315_resource.h | 44 + .../display/dc/resource/dcn316/dcn316_resource.c | 2038 ++++++++++++++ .../display/dc/resource/dcn316/dcn316_resource.h | 44 + .../amd/display/dc/resource/dcn32/dcn32_resource.c | 2862 ++++++++++++++++++++ .../amd/display/dc/resource/dcn32/dcn32_resource.h | 1263 +++++++++ .../display/dc/resource/dcn321/dcn321_resource.c | 2065 ++++++++++++++ .../display/dc/resource/dcn321/dcn321_resource.h | 45 + .../amd/display/dc/resource/dcn35/dcn35_resource.c | 2141 +++++++++++++++ .../amd/display/dc/resource/dcn35/dcn35_resource.h | 310 +++ 108 files changed, 40376 insertions(+), 40281 deletions(-) delete mode 100644 drivers/gpu/drm/amd/display/dc/dce100/Makefile delete mode 100644 drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn315/Makefile delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn316/Makefile delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.h create mode 100644 drivers/gpu/drm/amd/display/dc/resource/Makefile create mode 100644 drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c create mode 100644 drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.h create mode 100644 drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c create mode 100644 drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.h create mode 100644 drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c create mode 100644 drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.h create mode 100644 drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c create mode 100644 drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.h create mode 100644 drivers/gpu/drm/amd/display/dc/resource/dce80/CMakeLists.txt create mode 100644 drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c create mode 100644 drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.h create mode 100644 drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c create mode 100644 drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.h create mode 100644 drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c create mode 100644 drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.h create mode 100644 drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c create mode 100644 drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.h create mode 100644 drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c create mode 100644 drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.h create mode 100644 drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c create mode 100644 drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.h create mode 100644 drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c create mode 100644 drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.h create mode 100644 drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c create mode 100644 drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.h create mode 100644 drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c create mode 100644 drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.h create mode 100644 drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c create mode 100644 drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h create mode 100644 drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c create mode 100644 drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.h create mode 100644 drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c create mode 100644 drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.h create mode 100644 drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c create mode 100644 drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.h create mode 100644 drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c create mode 100644 drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h create mode 100644 drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c create mode 100644 drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.h create mode 100644 drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c create mode 100644 drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/Makefile b/drivers/gpu/drm/amd/display/Makefile index af17ab8027df..71192fc81a20 100644 --- a/drivers/gpu/drm/amd/display/Makefile +++ b/drivers/gpu/drm/amd/display/Makefile @@ -30,6 +30,7 @@ subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/inc/ subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/inc/hw subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/clk_mgr subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/hwss +subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/resource subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/inc subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/freesync subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/color diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 11da0eebee6c..ec87e31c6fe8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -44,7 +44,7 @@ #include "amdgpu_dm_debugfs.h" #endif -#include "dc/dcn20/dcn20_resource.h" +#include "dc/resource/dcn20/dcn20_resource.h" #define PEAK_FACTOR_X1000 1006 diff --git a/drivers/gpu/drm/amd/display/dc/Makefile b/drivers/gpu/drm/amd/display/dc/Makefile index 3a169b78e7e4..ab51a065cf0e 100644 --- a/drivers/gpu/drm/amd/display/dc/Makefile +++ b/drivers/gpu/drm/amd/display/dc/Makefile @@ -22,7 +22,7 @@ # # Makefile for Display Core (dc) component. -DC_LIBS = basics bios dml clk_mgr dce gpio hwss irq link virtual dsc +DC_LIBS = basics bios dml clk_mgr dce gpio hwss irq link virtual dsc resource ifdef CONFIG_DRM_AMD_DC_FP @@ -38,8 +38,6 @@ DC_LIBS += dcn302 DC_LIBS += dcn303 DC_LIBS += dcn31 DC_LIBS += dcn314 -DC_LIBS += dcn315 -DC_LIBS += dcn316 DC_LIBS += dcn32 DC_LIBS += dcn321 DC_LIBS += dcn35 @@ -51,7 +49,6 @@ DC_LIBS += dce120 DC_LIBS += dce112 DC_LIBS += dce110 -DC_LIBS += dce100 DC_LIBS += dce80 ifdef CONFIG_DRM_AMD_DC_SI diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index a1f1d1003992..6159d819c5c5 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -69,8 +69,8 @@ #include "dcn314/dcn314_resource.h" #include "dcn315/dcn315_resource.h" #include "dcn316/dcn316_resource.h" -#include "../dcn32/dcn32_resource.h" -#include "../dcn321/dcn321_resource.h" +#include "dcn32/dcn32_resource.h" +#include "dcn321/dcn321_resource.h" #include "dcn35/dcn35_resource.h" #define VISUAL_CONFIRM_BASE_DEFAULT 3 diff --git a/drivers/gpu/drm/amd/display/dc/dce100/Makefile b/drivers/gpu/drm/amd/display/dc/dce100/Makefile deleted file mode 100644 index 0d2f6bbf7558..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dce100/Makefile +++ /dev/null @@ -1,46 +0,0 @@ -# -# Copyright 2017 Advanced Micro Devices, Inc. -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. -# -# -# Makefile for the 'controller' sub-component of DAL. -# It provides the control and status of HW CRTC block. - -CFLAGS_$(AMDDALPATH)/dc/dce100/dce100_resource.o = $(call cc-disable-warning, override-init) - -DCE100 = dce100_resource.o - -AMD_DAL_DCE100 = $(addprefix $(AMDDALPATH)/dc/dce100/,$(DCE100)) - -AMD_DISPLAY_FILES += $(AMD_DAL_DCE100) - - -############################################################################### -# DCE 10x -############################################################################### -ifdef 0#CONFIG_DRM_AMD_DC_DCE11_0 -TG_DCE100 = dce100_resource.o - -AMD_DAL_TG_DCE100 = $(addprefix \ - $(AMDDALPATH)/dc/dce100/,$(TG_DCE100)) - -AMD_DISPLAY_FILES += $(AMD_DAL_TG_DCE100) -endif - diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c deleted file mode 100644 index 53a5f4cb648c..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c +++ /dev/null @@ -1,1179 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dm_services.h" - -#include "link_encoder.h" -#include "stream_encoder.h" - -#include "resource.h" -#include "include/irq_service_interface.h" -#include "virtual/virtual_stream_encoder.h" -#include "dce110/dce110_resource.h" -#include "dce110/dce110_timing_generator.h" -#include "irq/dce110/irq_service_dce110.h" -#include "dce/dce_link_encoder.h" -#include "dce/dce_stream_encoder.h" -#include "dce/dce_mem_input.h" -#include "dce/dce_ipp.h" -#include "dce/dce_transform.h" -#include "dce/dce_opp.h" -#include "dce/dce_clock_source.h" -#include "dce/dce_audio.h" -#include "dce/dce_hwseq.h" -#include "dce100/dce100_hwseq.h" -#include "dce/dce_panel_cntl.h" - -#include "reg_helper.h" - -#include "dce/dce_10_0_d.h" -#include "dce/dce_10_0_sh_mask.h" - -#include "dce/dce_dmcu.h" -#include "dce/dce_aux.h" -#include "dce/dce_abm.h" -#include "dce/dce_i2c.h" - -#include "dce100_resource.h" - -#ifndef mmMC_HUB_RDREQ_DMIF_LIMIT -#include "gmc/gmc_8_2_d.h" -#include "gmc/gmc_8_2_sh_mask.h" -#endif - -#ifndef mmDP_DPHY_INTERNAL_CTRL - #define mmDP_DPHY_INTERNAL_CTRL 0x4aa7 - #define mmDP0_DP_DPHY_INTERNAL_CTRL 0x4aa7 - #define mmDP1_DP_DPHY_INTERNAL_CTRL 0x4ba7 - #define mmDP2_DP_DPHY_INTERNAL_CTRL 0x4ca7 - #define mmDP3_DP_DPHY_INTERNAL_CTRL 0x4da7 - #define mmDP4_DP_DPHY_INTERNAL_CTRL 0x4ea7 - #define mmDP5_DP_DPHY_INTERNAL_CTRL 0x4fa7 - #define mmDP6_DP_DPHY_INTERNAL_CTRL 0x54a7 - #define mmDP7_DP_DPHY_INTERNAL_CTRL 0x56a7 - #define mmDP8_DP_DPHY_INTERNAL_CTRL 0x57a7 -#endif - -#ifndef mmBIOS_SCRATCH_2 - #define mmBIOS_SCRATCH_2 0x05CB - #define mmBIOS_SCRATCH_3 0x05CC - #define mmBIOS_SCRATCH_6 0x05CF -#endif - -#ifndef mmDP_DPHY_BS_SR_SWAP_CNTL - #define mmDP_DPHY_BS_SR_SWAP_CNTL 0x4ADC - #define mmDP0_DP_DPHY_BS_SR_SWAP_CNTL 0x4ADC - #define mmDP1_DP_DPHY_BS_SR_SWAP_CNTL 0x4BDC - #define mmDP2_DP_DPHY_BS_SR_SWAP_CNTL 0x4CDC - #define mmDP3_DP_DPHY_BS_SR_SWAP_CNTL 0x4DDC - #define mmDP4_DP_DPHY_BS_SR_SWAP_CNTL 0x4EDC - #define mmDP5_DP_DPHY_BS_SR_SWAP_CNTL 0x4FDC - #define mmDP6_DP_DPHY_BS_SR_SWAP_CNTL 0x54DC -#endif - -#ifndef mmDP_DPHY_FAST_TRAINING - #define mmDP_DPHY_FAST_TRAINING 0x4ABC - #define mmDP0_DP_DPHY_FAST_TRAINING 0x4ABC - #define mmDP1_DP_DPHY_FAST_TRAINING 0x4BBC - #define mmDP2_DP_DPHY_FAST_TRAINING 0x4CBC - #define mmDP3_DP_DPHY_FAST_TRAINING 0x4DBC - #define mmDP4_DP_DPHY_FAST_TRAINING 0x4EBC - #define mmDP5_DP_DPHY_FAST_TRAINING 0x4FBC - #define mmDP6_DP_DPHY_FAST_TRAINING 0x54BC -#endif - -static const struct dce110_timing_generator_offsets dce100_tg_offsets[] = { - { - .crtc = (mmCRTC0_CRTC_CONTROL - mmCRTC_CONTROL), - .dcp = (mmDCP0_GRPH_CONTROL - mmGRPH_CONTROL), - }, - { - .crtc = (mmCRTC1_CRTC_CONTROL - mmCRTC_CONTROL), - .dcp = (mmDCP1_GRPH_CONTROL - mmGRPH_CONTROL), - }, - { - .crtc = (mmCRTC2_CRTC_CONTROL - mmCRTC_CONTROL), - .dcp = (mmDCP2_GRPH_CONTROL - mmGRPH_CONTROL), - }, - { - .crtc = (mmCRTC3_CRTC_CONTROL - mmCRTC_CONTROL), - .dcp = (mmDCP3_GRPH_CONTROL - mmGRPH_CONTROL), - }, - { - .crtc = (mmCRTC4_CRTC_CONTROL - mmCRTC_CONTROL), - .dcp = (mmDCP4_GRPH_CONTROL - mmGRPH_CONTROL), - }, - { - .crtc = (mmCRTC5_CRTC_CONTROL - mmCRTC_CONTROL), - .dcp = (mmDCP5_GRPH_CONTROL - mmGRPH_CONTROL), - } -}; - -/* set register offset */ -#define SR(reg_name)\ - .reg_name = mm ## reg_name - -/* set register offset with instance */ -#define SRI(reg_name, block, id)\ - .reg_name = mm ## block ## id ## _ ## reg_name - -#define ipp_regs(id)\ -[id] = {\ - IPP_DCE100_REG_LIST_DCE_BASE(id)\ -} - -static const struct dce_ipp_registers ipp_regs[] = { - ipp_regs(0), - ipp_regs(1), - ipp_regs(2), - ipp_regs(3), - ipp_regs(4), - ipp_regs(5) -}; - -static const struct dce_ipp_shift ipp_shift = { - IPP_DCE100_MASK_SH_LIST_DCE_COMMON_BASE(__SHIFT) -}; - -static const struct dce_ipp_mask ipp_mask = { - IPP_DCE100_MASK_SH_LIST_DCE_COMMON_BASE(_MASK) -}; - -#define transform_regs(id)\ -[id] = {\ - XFM_COMMON_REG_LIST_DCE100(id)\ -} - -static const struct dce_transform_registers xfm_regs[] = { - transform_regs(0), - transform_regs(1), - transform_regs(2), - transform_regs(3), - transform_regs(4), - transform_regs(5) -}; - -static const struct dce_transform_shift xfm_shift = { - XFM_COMMON_MASK_SH_LIST_DCE110(__SHIFT) -}; - -static const struct dce_transform_mask xfm_mask = { - XFM_COMMON_MASK_SH_LIST_DCE110(_MASK) -}; - -#define aux_regs(id)\ -[id] = {\ - AUX_REG_LIST(id)\ -} - -static const struct dce110_link_enc_aux_registers link_enc_aux_regs[] = { - aux_regs(0), - aux_regs(1), - aux_regs(2), - aux_regs(3), - aux_regs(4), - aux_regs(5) -}; - -#define hpd_regs(id)\ -[id] = {\ - HPD_REG_LIST(id)\ -} - -static const struct dce110_link_enc_hpd_registers link_enc_hpd_regs[] = { - hpd_regs(0), - hpd_regs(1), - hpd_regs(2), - hpd_regs(3), - hpd_regs(4), - hpd_regs(5) -}; - -#define link_regs(id)\ -[id] = {\ - LE_DCE100_REG_LIST(id)\ -} - -static const struct dce110_link_enc_registers link_enc_regs[] = { - link_regs(0), - link_regs(1), - link_regs(2), - link_regs(3), - link_regs(4), - link_regs(5), - link_regs(6), -}; - -#define stream_enc_regs(id)\ -[id] = {\ - SE_COMMON_REG_LIST_DCE_BASE(id),\ - .AFMT_CNTL = 0,\ -} - -static const struct dce110_stream_enc_registers stream_enc_regs[] = { - stream_enc_regs(0), - stream_enc_regs(1), - stream_enc_regs(2), - stream_enc_regs(3), - stream_enc_regs(4), - stream_enc_regs(5), - stream_enc_regs(6) -}; - -static const struct dce_stream_encoder_shift se_shift = { - SE_COMMON_MASK_SH_LIST_DCE80_100(__SHIFT) -}; - -static const struct dce_stream_encoder_mask se_mask = { - SE_COMMON_MASK_SH_LIST_DCE80_100(_MASK) -}; - -static const struct dce_panel_cntl_registers panel_cntl_regs[] = { - { DCE_PANEL_CNTL_REG_LIST() } -}; - -static const struct dce_panel_cntl_shift panel_cntl_shift = { - DCE_PANEL_CNTL_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_panel_cntl_mask panel_cntl_mask = { - DCE_PANEL_CNTL_MASK_SH_LIST(_MASK) -}; - -#define opp_regs(id)\ -[id] = {\ - OPP_DCE_100_REG_LIST(id),\ -} - -static const struct dce_opp_registers opp_regs[] = { - opp_regs(0), - opp_regs(1), - opp_regs(2), - opp_regs(3), - opp_regs(4), - opp_regs(5) -}; - -static const struct dce_opp_shift opp_shift = { - OPP_COMMON_MASK_SH_LIST_DCE_100(__SHIFT) -}; - -static const struct dce_opp_mask opp_mask = { - OPP_COMMON_MASK_SH_LIST_DCE_100(_MASK) -}; -#define aux_engine_regs(id)\ -[id] = {\ - AUX_COMMON_REG_LIST(id), \ - .AUX_RESET_MASK = 0 \ -} - -static const struct dce110_aux_registers aux_engine_regs[] = { - aux_engine_regs(0), - aux_engine_regs(1), - aux_engine_regs(2), - aux_engine_regs(3), - aux_engine_regs(4), - aux_engine_regs(5) -}; - -#define audio_regs(id)\ -[id] = {\ - AUD_COMMON_REG_LIST(id)\ -} - -static const struct dce_audio_registers audio_regs[] = { - audio_regs(0), - audio_regs(1), - audio_regs(2), - audio_regs(3), - audio_regs(4), - audio_regs(5), - audio_regs(6), -}; - -static const struct dce_audio_shift audio_shift = { - AUD_COMMON_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_audio_mask audio_mask = { - AUD_COMMON_MASK_SH_LIST(_MASK) -}; - -#define clk_src_regs(id)\ -[id] = {\ - CS_COMMON_REG_LIST_DCE_100_110(id),\ -} - -static const struct dce110_clk_src_regs clk_src_regs[] = { - clk_src_regs(0), - clk_src_regs(1), - clk_src_regs(2) -}; - -static const struct dce110_clk_src_shift cs_shift = { - CS_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(__SHIFT) -}; - -static const struct dce110_clk_src_mask cs_mask = { - CS_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(_MASK) -}; - -static const struct dce_dmcu_registers dmcu_regs = { - DMCU_DCE110_COMMON_REG_LIST() -}; - -static const struct dce_dmcu_shift dmcu_shift = { - DMCU_MASK_SH_LIST_DCE110(__SHIFT) -}; - -static const struct dce_dmcu_mask dmcu_mask = { - DMCU_MASK_SH_LIST_DCE110(_MASK) -}; - -static const struct dce_abm_registers abm_regs = { - ABM_DCE110_COMMON_REG_LIST() -}; - -static const struct dce_abm_shift abm_shift = { - ABM_MASK_SH_LIST_DCE110(__SHIFT) -}; - -static const struct dce_abm_mask abm_mask = { - ABM_MASK_SH_LIST_DCE110(_MASK) -}; - -#define DCFE_MEM_PWR_CTRL_REG_BASE 0x1b03 - -static const struct bios_registers bios_regs = { - .BIOS_SCRATCH_3 = mmBIOS_SCRATCH_3, - .BIOS_SCRATCH_6 = mmBIOS_SCRATCH_6 -}; - -static const struct resource_caps res_cap = { - .num_timing_generator = 6, - .num_audio = 6, - .num_stream_encoder = 6, - .num_pll = 3, - .num_ddc = 6, -}; - -static const struct dc_plane_cap plane_cap = { - .type = DC_PLANE_TYPE_DCE_RGB, - - .pixel_format_support = { - .argb8888 = true, - .nv12 = false, - .fp16 = true - }, - - .max_upscale_factor = { - .argb8888 = 16000, - .nv12 = 1, - .fp16 = 1 - }, - - .max_downscale_factor = { - .argb8888 = 250, - .nv12 = 1, - .fp16 = 1 - } -}; - -static const struct dc_debug_options debug_defaults = { - .enable_legacy_fast_update = true, -}; - -#define CTX ctx -#define REG(reg) mm ## reg - -#ifndef mmCC_DC_HDMI_STRAPS -#define mmCC_DC_HDMI_STRAPS 0x1918 -#define CC_DC_HDMI_STRAPS__HDMI_DISABLE_MASK 0x40 -#define CC_DC_HDMI_STRAPS__HDMI_DISABLE__SHIFT 0x6 -#define CC_DC_HDMI_STRAPS__AUDIO_STREAM_NUMBER_MASK 0x700 -#define CC_DC_HDMI_STRAPS__AUDIO_STREAM_NUMBER__SHIFT 0x8 -#endif - -static int map_transmitter_id_to_phy_instance( - enum transmitter transmitter) -{ - switch (transmitter) { - case TRANSMITTER_UNIPHY_A: - return 0; - case TRANSMITTER_UNIPHY_B: - return 1; - case TRANSMITTER_UNIPHY_C: - return 2; - case TRANSMITTER_UNIPHY_D: - return 3; - case TRANSMITTER_UNIPHY_E: - return 4; - case TRANSMITTER_UNIPHY_F: - return 5; - case TRANSMITTER_UNIPHY_G: - return 6; - default: - ASSERT(0); - return 0; - } -} - -static void read_dce_straps( - struct dc_context *ctx, - struct resource_straps *straps) -{ - REG_GET_2(CC_DC_HDMI_STRAPS, - HDMI_DISABLE, &straps->hdmi_disable, - AUDIO_STREAM_NUMBER, &straps->audio_stream_number); - - REG_GET(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO, &straps->dc_pinstraps_audio); -} - -static struct audio *create_audio( - struct dc_context *ctx, unsigned int inst) -{ - return dce_audio_create(ctx, inst, - &audio_regs[inst], &audio_shift, &audio_mask); -} - -static struct timing_generator *dce100_timing_generator_create( - struct dc_context *ctx, - uint32_t instance, - const struct dce110_timing_generator_offsets *offsets) -{ - struct dce110_timing_generator *tg110 = - kzalloc(sizeof(struct dce110_timing_generator), GFP_KERNEL); - - if (!tg110) - return NULL; - - dce110_timing_generator_construct(tg110, ctx, instance, offsets); - return &tg110->base; -} - -static struct stream_encoder *dce100_stream_encoder_create( - enum engine_id eng_id, - struct dc_context *ctx) -{ - struct dce110_stream_encoder *enc110 = - kzalloc(sizeof(struct dce110_stream_encoder), GFP_KERNEL); - - if (!enc110) - return NULL; - - dce110_stream_encoder_construct(enc110, ctx, ctx->dc_bios, eng_id, - &stream_enc_regs[eng_id], &se_shift, &se_mask); - return &enc110->base; -} - -#define SRII(reg_name, block, id)\ - .reg_name[id] = mm ## block ## id ## _ ## reg_name - -static const struct dce_hwseq_registers hwseq_reg = { - HWSEQ_DCE10_REG_LIST() -}; - -static const struct dce_hwseq_shift hwseq_shift = { - HWSEQ_DCE10_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_hwseq_mask hwseq_mask = { - HWSEQ_DCE10_MASK_SH_LIST(_MASK) -}; - -static struct dce_hwseq *dce100_hwseq_create( - struct dc_context *ctx) -{ - struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL); - - if (hws) { - hws->ctx = ctx; - hws->regs = &hwseq_reg; - hws->shifts = &hwseq_shift; - hws->masks = &hwseq_mask; - } - return hws; -} - -static const struct resource_create_funcs res_create_funcs = { - .read_dce_straps = read_dce_straps, - .create_audio = create_audio, - .create_stream_encoder = dce100_stream_encoder_create, - .create_hwseq = dce100_hwseq_create, -}; - -#define mi_inst_regs(id) { \ - MI_DCE8_REG_LIST(id), \ - .MC_HUB_RDREQ_DMIF_LIMIT = mmMC_HUB_RDREQ_DMIF_LIMIT \ -} -static const struct dce_mem_input_registers mi_regs[] = { - mi_inst_regs(0), - mi_inst_regs(1), - mi_inst_regs(2), - mi_inst_regs(3), - mi_inst_regs(4), - mi_inst_regs(5), -}; - -static const struct dce_mem_input_shift mi_shifts = { - MI_DCE8_MASK_SH_LIST(__SHIFT), - .ENABLE = MC_HUB_RDREQ_DMIF_LIMIT__ENABLE__SHIFT -}; - -static const struct dce_mem_input_mask mi_masks = { - MI_DCE8_MASK_SH_LIST(_MASK), - .ENABLE = MC_HUB_RDREQ_DMIF_LIMIT__ENABLE_MASK -}; - -static const struct dce110_aux_registers_shift aux_shift = { - DCE10_AUX_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce110_aux_registers_mask aux_mask = { - DCE10_AUX_MASK_SH_LIST(_MASK) -}; - -static struct mem_input *dce100_mem_input_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dce_mem_input *dce_mi = kzalloc(sizeof(struct dce_mem_input), - GFP_KERNEL); - - if (!dce_mi) { - BREAK_TO_DEBUGGER(); - return NULL; - } - - dce_mem_input_construct(dce_mi, ctx, inst, &mi_regs[inst], &mi_shifts, &mi_masks); - dce_mi->wa.single_head_rdreq_dmif_limit = 2; - return &dce_mi->base; -} - -static void dce100_transform_destroy(struct transform **xfm) -{ - kfree(TO_DCE_TRANSFORM(*xfm)); - *xfm = NULL; -} - -static struct transform *dce100_transform_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dce_transform *transform = - kzalloc(sizeof(struct dce_transform), GFP_KERNEL); - - if (!transform) - return NULL; - - dce_transform_construct(transform, ctx, inst, - &xfm_regs[inst], &xfm_shift, &xfm_mask); - return &transform->base; -} - -static struct input_pixel_processor *dce100_ipp_create( - struct dc_context *ctx, uint32_t inst) -{ - struct dce_ipp *ipp = kzalloc(sizeof(struct dce_ipp), GFP_KERNEL); - - if (!ipp) { - BREAK_TO_DEBUGGER(); - return NULL; - } - - dce_ipp_construct(ipp, ctx, inst, - &ipp_regs[inst], &ipp_shift, &ipp_mask); - return &ipp->base; -} - -static const struct encoder_feature_support link_enc_feature = { - .max_hdmi_deep_color = COLOR_DEPTH_121212, - .max_hdmi_pixel_clock = 300000, - .flags.bits.IS_HBR2_CAPABLE = true, - .flags.bits.IS_TPS3_CAPABLE = true -}; - -static struct link_encoder *dce100_link_encoder_create( - struct dc_context *ctx, - const struct encoder_init_data *enc_init_data) -{ - struct dce110_link_encoder *enc110 = - kzalloc(sizeof(struct dce110_link_encoder), GFP_KERNEL); - int link_regs_id; - - if (!enc110) - return NULL; - - link_regs_id = - map_transmitter_id_to_phy_instance(enc_init_data->transmitter); - - dce110_link_encoder_construct(enc110, - enc_init_data, - &link_enc_feature, - &link_enc_regs[link_regs_id], - &link_enc_aux_regs[enc_init_data->channel - 1], - &link_enc_hpd_regs[enc_init_data->hpd_source]); - return &enc110->base; -} - -static struct panel_cntl *dce100_panel_cntl_create(const struct panel_cntl_init_data *init_data) -{ - struct dce_panel_cntl *panel_cntl = - kzalloc(sizeof(struct dce_panel_cntl), GFP_KERNEL); - - if (!panel_cntl) - return NULL; - - dce_panel_cntl_construct(panel_cntl, - init_data, - &panel_cntl_regs[init_data->inst], - &panel_cntl_shift, - &panel_cntl_mask); - - return &panel_cntl->base; -} - -static struct output_pixel_processor *dce100_opp_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dce110_opp *opp = - kzalloc(sizeof(struct dce110_opp), GFP_KERNEL); - - if (!opp) - return NULL; - - dce110_opp_construct(opp, - ctx, inst, &opp_regs[inst], &opp_shift, &opp_mask); - return &opp->base; -} - -static struct dce_aux *dce100_aux_engine_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct aux_engine_dce110 *aux_engine = - kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL); - - if (!aux_engine) - return NULL; - - dce110_aux_engine_construct(aux_engine, ctx, inst, - SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, - &aux_engine_regs[inst], - &aux_mask, - &aux_shift, - ctx->dc->caps.extended_aux_timeout_support); - - return &aux_engine->base; -} -#define i2c_inst_regs(id) { I2C_HW_ENGINE_COMMON_REG_LIST(id) } - -static const struct dce_i2c_registers i2c_hw_regs[] = { - i2c_inst_regs(1), - i2c_inst_regs(2), - i2c_inst_regs(3), - i2c_inst_regs(4), - i2c_inst_regs(5), - i2c_inst_regs(6), -}; - -static const struct dce_i2c_shift i2c_shifts = { - I2C_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(__SHIFT) -}; - -static const struct dce_i2c_mask i2c_masks = { - I2C_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(_MASK) -}; - -static struct dce_i2c_hw *dce100_i2c_hw_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dce_i2c_hw *dce_i2c_hw = - kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL); - - if (!dce_i2c_hw) - return NULL; - - dce100_i2c_hw_construct(dce_i2c_hw, ctx, inst, - &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks); - - return dce_i2c_hw; -} -static struct clock_source *dce100_clock_source_create( - struct dc_context *ctx, - struct dc_bios *bios, - enum clock_source_id id, - const struct dce110_clk_src_regs *regs, - bool dp_clk_src) -{ - struct dce110_clk_src *clk_src = - kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL); - - if (!clk_src) - return NULL; - - if (dce110_clk_src_construct(clk_src, ctx, bios, id, - regs, &cs_shift, &cs_mask)) { - clk_src->base.dp_clk_src = dp_clk_src; - return &clk_src->base; - } - - kfree(clk_src); - BREAK_TO_DEBUGGER(); - return NULL; -} - -static void dce100_clock_source_destroy(struct clock_source **clk_src) -{ - kfree(TO_DCE110_CLK_SRC(*clk_src)); - *clk_src = NULL; -} - -static void dce100_resource_destruct(struct dce110_resource_pool *pool) -{ - unsigned int i; - - for (i = 0; i < pool->base.pipe_count; i++) { - if (pool->base.opps[i] != NULL) - dce110_opp_destroy(&pool->base.opps[i]); - - if (pool->base.transforms[i] != NULL) - dce100_transform_destroy(&pool->base.transforms[i]); - - if (pool->base.ipps[i] != NULL) - dce_ipp_destroy(&pool->base.ipps[i]); - - if (pool->base.mis[i] != NULL) { - kfree(TO_DCE_MEM_INPUT(pool->base.mis[i])); - pool->base.mis[i] = NULL; - } - - if (pool->base.timing_generators[i] != NULL) { - kfree(DCE110TG_FROM_TG(pool->base.timing_generators[i])); - pool->base.timing_generators[i] = NULL; - } - } - - for (i = 0; i < pool->base.res_cap->num_ddc; i++) { - if (pool->base.engines[i] != NULL) - dce110_engine_destroy(&pool->base.engines[i]); - if (pool->base.hw_i2cs[i] != NULL) { - kfree(pool->base.hw_i2cs[i]); - pool->base.hw_i2cs[i] = NULL; - } - if (pool->base.sw_i2cs[i] != NULL) { - kfree(pool->base.sw_i2cs[i]); - pool->base.sw_i2cs[i] = NULL; - } - } - - for (i = 0; i < pool->base.stream_enc_count; i++) { - if (pool->base.stream_enc[i] != NULL) - kfree(DCE110STRENC_FROM_STRENC(pool->base.stream_enc[i])); - } - - for (i = 0; i < pool->base.clk_src_count; i++) { - if (pool->base.clock_sources[i] != NULL) - dce100_clock_source_destroy(&pool->base.clock_sources[i]); - } - - if (pool->base.dp_clock_source != NULL) - dce100_clock_source_destroy(&pool->base.dp_clock_source); - - for (i = 0; i < pool->base.audio_count; i++) { - if (pool->base.audios[i] != NULL) - dce_aud_destroy(&pool->base.audios[i]); - } - - if (pool->base.abm != NULL) - dce_abm_destroy(&pool->base.abm); - - if (pool->base.dmcu != NULL) - dce_dmcu_destroy(&pool->base.dmcu); - - if (pool->base.irqs != NULL) - dal_irq_service_destroy(&pool->base.irqs); -} - -static enum dc_status build_mapped_resource( - const struct dc *dc, - struct dc_state *context, - struct dc_stream_state *stream) -{ - struct pipe_ctx *pipe_ctx = resource_get_otg_master_for_stream(&context->res_ctx, stream); - - if (!pipe_ctx) - return DC_ERROR_UNEXPECTED; - - dce110_resource_build_pipe_hw_param(pipe_ctx); - - resource_build_info_frame(pipe_ctx); - - return DC_OK; -} - -static bool dce100_validate_bandwidth( - struct dc *dc, - struct dc_state *context, - bool fast_validate) -{ - int i; - bool at_least_one_pipe = false; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - if (context->res_ctx.pipe_ctx[i].stream) - at_least_one_pipe = true; - } - - if (at_least_one_pipe) { - /* TODO implement when needed but for now hardcode max value*/ - context->bw_ctx.bw.dce.dispclk_khz = 681000; - context->bw_ctx.bw.dce.yclk_khz = 250000 * MEMORY_TYPE_MULTIPLIER_CZ; - } else { - context->bw_ctx.bw.dce.dispclk_khz = 0; - context->bw_ctx.bw.dce.yclk_khz = 0; - } - - return true; -} - -static bool dce100_validate_surface_sets( - struct dc_state *context) -{ - int i; - - for (i = 0; i < context->stream_count; i++) { - if (context->stream_status[i].plane_count == 0) - continue; - - if (context->stream_status[i].plane_count > 1) - return false; - - if (context->stream_status[i].plane_states[0]->format - >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) - return false; - } - - return true; -} - -static enum dc_status dce100_validate_global( - struct dc *dc, - struct dc_state *context) -{ - if (!dce100_validate_surface_sets(context)) - return DC_FAIL_SURFACE_VALIDATE; - - return DC_OK; -} - -enum dc_status dce100_add_stream_to_ctx( - struct dc *dc, - struct dc_state *new_ctx, - struct dc_stream_state *dc_stream) -{ - enum dc_status result = DC_ERROR_UNEXPECTED; - - result = resource_map_pool_resources(dc, new_ctx, dc_stream); - - if (result == DC_OK) - result = resource_map_clock_resources(dc, new_ctx, dc_stream); - - if (result == DC_OK) - result = build_mapped_resource(dc, new_ctx, dc_stream); - - return result; -} - -static void dce100_destroy_resource_pool(struct resource_pool **pool) -{ - struct dce110_resource_pool *dce110_pool = TO_DCE110_RES_POOL(*pool); - - dce100_resource_destruct(dce110_pool); - kfree(dce110_pool); - *pool = NULL; -} - -enum dc_status dce100_validate_plane(const struct dc_plane_state *plane_state, struct dc_caps *caps) -{ - - if (plane_state->format < SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) - return DC_OK; - - return DC_FAIL_SURFACE_VALIDATE; -} - -struct stream_encoder *dce100_find_first_free_match_stream_enc_for_link( - struct resource_context *res_ctx, - const struct resource_pool *pool, - struct dc_stream_state *stream) -{ - int i; - int j = -1; - struct dc_link *link = stream->link; - - for (i = 0; i < pool->stream_enc_count; i++) { - if (!res_ctx->is_stream_enc_acquired[i] && - pool->stream_enc[i]) { - /* Store first available for MST second display - * in daisy chain use case - */ - j = i; - if (pool->stream_enc[i]->id == - link->link_enc->preferred_engine) - return pool->stream_enc[i]; - } - } - - /* - * below can happen in cases when stream encoder is acquired: - * 1) for second MST display in chain, so preferred engine already - * acquired; - * 2) for another link, which preferred engine already acquired by any - * MST configuration. - * - * If signal is of DP type and preferred engine not found, return last available - * - * TODO - This is just a patch up and a generic solution is - * required for non DP connectors. - */ - - if (j >= 0 && link->connector_signal == SIGNAL_TYPE_DISPLAY_PORT) - return pool->stream_enc[j]; - - return NULL; -} - -static const struct resource_funcs dce100_res_pool_funcs = { - .destroy = dce100_destroy_resource_pool, - .link_enc_create = dce100_link_encoder_create, - .panel_cntl_create = dce100_panel_cntl_create, - .validate_bandwidth = dce100_validate_bandwidth, - .validate_plane = dce100_validate_plane, - .add_stream_to_ctx = dce100_add_stream_to_ctx, - .validate_global = dce100_validate_global, - .find_first_free_match_stream_enc_for_link = dce100_find_first_free_match_stream_enc_for_link -}; - -static bool dce100_resource_construct( - uint8_t num_virtual_links, - struct dc *dc, - struct dce110_resource_pool *pool) -{ - unsigned int i; - struct dc_context *ctx = dc->ctx; - struct dc_bios *bp; - - ctx->dc_bios->regs = &bios_regs; - - pool->base.res_cap = &res_cap; - pool->base.funcs = &dce100_res_pool_funcs; - pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE; - - bp = ctx->dc_bios; - - if (bp->fw_info_valid && bp->fw_info.external_clock_source_frequency_for_dp != 0) { - pool->base.dp_clock_source = - dce100_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_EXTERNAL, NULL, true); - - pool->base.clock_sources[0] = - dce100_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL0, &clk_src_regs[0], false); - pool->base.clock_sources[1] = - dce100_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL1, &clk_src_regs[1], false); - pool->base.clock_sources[2] = - dce100_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL2, &clk_src_regs[2], false); - pool->base.clk_src_count = 3; - - } else { - pool->base.dp_clock_source = - dce100_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL0, &clk_src_regs[0], true); - - pool->base.clock_sources[0] = - dce100_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL1, &clk_src_regs[1], false); - pool->base.clock_sources[1] = - dce100_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL2, &clk_src_regs[2], false); - pool->base.clk_src_count = 2; - } - - if (pool->base.dp_clock_source == NULL) { - dm_error("DC: failed to create dp clock source!\n"); - BREAK_TO_DEBUGGER(); - goto res_create_fail; - } - - for (i = 0; i < pool->base.clk_src_count; i++) { - if (pool->base.clock_sources[i] == NULL) { - dm_error("DC: failed to create clock sources!\n"); - BREAK_TO_DEBUGGER(); - goto res_create_fail; - } - } - - pool->base.dmcu = dce_dmcu_create(ctx, - &dmcu_regs, - &dmcu_shift, - &dmcu_mask); - if (pool->base.dmcu == NULL) { - dm_error("DC: failed to create dmcu!\n"); - BREAK_TO_DEBUGGER(); - goto res_create_fail; - } - - pool->base.abm = dce_abm_create(ctx, - &abm_regs, - &abm_shift, - &abm_mask); - if (pool->base.abm == NULL) { - dm_error("DC: failed to create abm!\n"); - BREAK_TO_DEBUGGER(); - goto res_create_fail; - } - - { - struct irq_service_init_data init_data; - init_data.ctx = dc->ctx; - pool->base.irqs = dal_irq_service_dce110_create(&init_data); - if (!pool->base.irqs) - goto res_create_fail; - } - - /************************************************* - * Resource + asic cap harcoding * - *************************************************/ - pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE; - pool->base.pipe_count = res_cap.num_timing_generator; - pool->base.timing_generator_count = pool->base.res_cap->num_timing_generator; - dc->caps.max_downscale_ratio = 200; - dc->caps.i2c_speed_in_khz = 40; - dc->caps.i2c_speed_in_khz = 40; - dc->caps.max_cursor_size = 128; - dc->caps.min_horizontal_blanking_period = 80; - dc->caps.dual_link_dvi = true; - dc->caps.disable_dp_clk_share = true; - dc->caps.extended_aux_timeout_support = false; - dc->debug = debug_defaults; - - for (i = 0; i < pool->base.pipe_count; i++) { - pool->base.timing_generators[i] = - dce100_timing_generator_create( - ctx, - i, - &dce100_tg_offsets[i]); - if (pool->base.timing_generators[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create tg!\n"); - goto res_create_fail; - } - - pool->base.mis[i] = dce100_mem_input_create(ctx, i); - if (pool->base.mis[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create memory input!\n"); - goto res_create_fail; - } - - pool->base.ipps[i] = dce100_ipp_create(ctx, i); - if (pool->base.ipps[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create input pixel processor!\n"); - goto res_create_fail; - } - - pool->base.transforms[i] = dce100_transform_create(ctx, i); - if (pool->base.transforms[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create transform!\n"); - goto res_create_fail; - } - - pool->base.opps[i] = dce100_opp_create(ctx, i); - if (pool->base.opps[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create output pixel processor!\n"); - goto res_create_fail; - } - } - - for (i = 0; i < pool->base.res_cap->num_ddc; i++) { - pool->base.engines[i] = dce100_aux_engine_create(ctx, i); - if (pool->base.engines[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC:failed to create aux engine!!\n"); - goto res_create_fail; - } - pool->base.hw_i2cs[i] = dce100_i2c_hw_create(ctx, i); - if (pool->base.hw_i2cs[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC:failed to create i2c engine!!\n"); - goto res_create_fail; - } - pool->base.sw_i2cs[i] = NULL; - } - - dc->caps.max_planes = pool->base.pipe_count; - - for (i = 0; i < dc->caps.max_planes; ++i) - dc->caps.planes[i] = plane_cap; - - if (!resource_construct(num_virtual_links, dc, &pool->base, - &res_create_funcs)) - goto res_create_fail; - - /* Create hardware sequencer */ - dce100_hw_sequencer_construct(dc); - return true; - -res_create_fail: - dce100_resource_destruct(pool); - - return false; -} - -struct resource_pool *dce100_create_resource_pool( - uint8_t num_virtual_links, - struct dc *dc) -{ - struct dce110_resource_pool *pool = - kzalloc(sizeof(struct dce110_resource_pool), GFP_KERNEL); - - if (!pool) - return NULL; - - if (dce100_resource_construct(num_virtual_links, dc, pool)) - return &pool->base; - - kfree(pool); - BREAK_TO_DEBUGGER(); - return NULL; -} - diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h deleted file mode 100644 index fecab7c560f5..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright 2017 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * - */ -/* - * dce100_resource.h - * - * Created on: 2016-01-20 - * Author: qyang - */ - -#ifndef DCE100_RESOURCE_H_ -#define DCE100_RESOURCE_H_ - -struct dc; -struct resource_pool; -struct dc_validation_set; - -struct resource_pool *dce100_create_resource_pool( - uint8_t num_virtual_links, - struct dc *dc); - -enum dc_status dce100_validate_plane(const struct dc_plane_state *plane_state, struct dc_caps *caps); - -enum dc_status dce100_add_stream_to_ctx( - struct dc *dc, - struct dc_state *new_ctx, - struct dc_stream_state *dc_stream); - -struct stream_encoder *dce100_find_first_free_match_stream_enc_for_link( - struct resource_context *res_ctx, - const struct resource_pool *pool, - struct dc_stream_state *stream); - -#endif /* DCE100_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dce110/Makefile b/drivers/gpu/drm/amd/display/dc/dce110/Makefile index 695a50ed5ad2..f0777d61c2cb 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dce110/Makefile @@ -26,8 +26,8 @@ CFLAGS_$(AMDDALPATH)/dc/dce110/dce110_resource.o = $(call cc-disable-warning, override-init) DCE110 = dce110_timing_generator.o \ -dce110_compressor.o dce110_resource.o \ -dce110_opp_regamma_v.o dce110_opp_csc_v.o dce110_timing_generator_v.o \ +dce110_compressor.o dce110_opp_regamma_v.o \ +dce110_opp_csc_v.o dce110_timing_generator_v.o \ dce110_mem_input_v.o dce110_opp_v.o dce110_transform_v.o AMD_DAL_DCE110 = $(addprefix $(AMDDALPATH)/dc/dce110/,$(DCE110)) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c deleted file mode 100644 index fe518fd27b08..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c +++ /dev/null @@ -1,1551 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dm_services.h" - -#include "link_encoder.h" -#include "stream_encoder.h" - -#include "resource.h" -#include "dce110/dce110_resource.h" -#include "include/irq_service_interface.h" -#include "dce/dce_audio.h" -#include "dce110/dce110_timing_generator.h" -#include "irq/dce110/irq_service_dce110.h" -#include "dce110/dce110_timing_generator_v.h" -#include "dce/dce_link_encoder.h" -#include "dce/dce_stream_encoder.h" -#include "dce/dce_mem_input.h" -#include "dce110/dce110_mem_input_v.h" -#include "dce/dce_ipp.h" -#include "dce/dce_transform.h" -#include "dce110/dce110_transform_v.h" -#include "dce/dce_opp.h" -#include "dce110/dce110_opp_v.h" -#include "dce/dce_clock_source.h" -#include "dce/dce_hwseq.h" -#include "dce110/dce110_hwseq.h" -#include "dce/dce_aux.h" -#include "dce/dce_abm.h" -#include "dce/dce_dmcu.h" -#include "dce/dce_i2c.h" -#include "dce/dce_panel_cntl.h" - -#define DC_LOGGER \ - dc->ctx->logger - -#include "dce110/dce110_compressor.h" - -#include "reg_helper.h" - -#include "dce/dce_11_0_d.h" -#include "dce/dce_11_0_sh_mask.h" - -#ifndef mmMC_HUB_RDREQ_DMIF_LIMIT -#include "gmc/gmc_8_2_d.h" -#include "gmc/gmc_8_2_sh_mask.h" -#endif - -#ifndef mmDP_DPHY_INTERNAL_CTRL - #define mmDP_DPHY_INTERNAL_CTRL 0x4aa7 - #define mmDP0_DP_DPHY_INTERNAL_CTRL 0x4aa7 - #define mmDP1_DP_DPHY_INTERNAL_CTRL 0x4ba7 - #define mmDP2_DP_DPHY_INTERNAL_CTRL 0x4ca7 - #define mmDP3_DP_DPHY_INTERNAL_CTRL 0x4da7 - #define mmDP4_DP_DPHY_INTERNAL_CTRL 0x4ea7 - #define mmDP5_DP_DPHY_INTERNAL_CTRL 0x4fa7 - #define mmDP6_DP_DPHY_INTERNAL_CTRL 0x54a7 - #define mmDP7_DP_DPHY_INTERNAL_CTRL 0x56a7 - #define mmDP8_DP_DPHY_INTERNAL_CTRL 0x57a7 -#endif - -#ifndef mmBIOS_SCRATCH_2 - #define mmBIOS_SCRATCH_2 0x05CB - #define mmBIOS_SCRATCH_3 0x05CC - #define mmBIOS_SCRATCH_6 0x05CF -#endif - -#ifndef mmDP_DPHY_BS_SR_SWAP_CNTL - #define mmDP_DPHY_BS_SR_SWAP_CNTL 0x4ADC - #define mmDP0_DP_DPHY_BS_SR_SWAP_CNTL 0x4ADC - #define mmDP1_DP_DPHY_BS_SR_SWAP_CNTL 0x4BDC - #define mmDP2_DP_DPHY_BS_SR_SWAP_CNTL 0x4CDC - #define mmDP3_DP_DPHY_BS_SR_SWAP_CNTL 0x4DDC - #define mmDP4_DP_DPHY_BS_SR_SWAP_CNTL 0x4EDC - #define mmDP5_DP_DPHY_BS_SR_SWAP_CNTL 0x4FDC - #define mmDP6_DP_DPHY_BS_SR_SWAP_CNTL 0x54DC -#endif - -#ifndef mmDP_DPHY_FAST_TRAINING - #define mmDP_DPHY_FAST_TRAINING 0x4ABC - #define mmDP0_DP_DPHY_FAST_TRAINING 0x4ABC - #define mmDP1_DP_DPHY_FAST_TRAINING 0x4BBC - #define mmDP2_DP_DPHY_FAST_TRAINING 0x4CBC - #define mmDP3_DP_DPHY_FAST_TRAINING 0x4DBC - #define mmDP4_DP_DPHY_FAST_TRAINING 0x4EBC - #define mmDP5_DP_DPHY_FAST_TRAINING 0x4FBC - #define mmDP6_DP_DPHY_FAST_TRAINING 0x54BC -#endif - -#ifndef DPHY_RX_FAST_TRAINING_CAPABLE - #define DPHY_RX_FAST_TRAINING_CAPABLE 0x1 -#endif - -static const struct dce110_timing_generator_offsets dce110_tg_offsets[] = { - { - .crtc = (mmCRTC0_CRTC_CONTROL - mmCRTC_CONTROL), - .dcp = (mmDCP0_GRPH_CONTROL - mmGRPH_CONTROL), - }, - { - .crtc = (mmCRTC1_CRTC_CONTROL - mmCRTC_CONTROL), - .dcp = (mmDCP1_GRPH_CONTROL - mmGRPH_CONTROL), - }, - { - .crtc = (mmCRTC2_CRTC_CONTROL - mmCRTC_CONTROL), - .dcp = (mmDCP2_GRPH_CONTROL - mmGRPH_CONTROL), - }, - { - .crtc = (mmCRTC3_CRTC_CONTROL - mmCRTC_CONTROL), - .dcp = (mmDCP3_GRPH_CONTROL - mmGRPH_CONTROL), - }, - { - .crtc = (mmCRTC4_CRTC_CONTROL - mmCRTC_CONTROL), - .dcp = (mmDCP4_GRPH_CONTROL - mmGRPH_CONTROL), - }, - { - .crtc = (mmCRTC5_CRTC_CONTROL - mmCRTC_CONTROL), - .dcp = (mmDCP5_GRPH_CONTROL - mmGRPH_CONTROL), - } -}; - -/* set register offset */ -#define SR(reg_name)\ - .reg_name = mm ## reg_name - -/* set register offset with instance */ -#define SRI(reg_name, block, id)\ - .reg_name = mm ## block ## id ## _ ## reg_name - -static const struct dce_dmcu_registers dmcu_regs = { - DMCU_DCE110_COMMON_REG_LIST() -}; - -static const struct dce_dmcu_shift dmcu_shift = { - DMCU_MASK_SH_LIST_DCE110(__SHIFT) -}; - -static const struct dce_dmcu_mask dmcu_mask = { - DMCU_MASK_SH_LIST_DCE110(_MASK) -}; - -static const struct dce_abm_registers abm_regs = { - ABM_DCE110_COMMON_REG_LIST() -}; - -static const struct dce_abm_shift abm_shift = { - ABM_MASK_SH_LIST_DCE110(__SHIFT) -}; - -static const struct dce_abm_mask abm_mask = { - ABM_MASK_SH_LIST_DCE110(_MASK) -}; - -#define ipp_regs(id)\ -[id] = {\ - IPP_DCE110_REG_LIST_DCE_BASE(id)\ -} - -static const struct dce_ipp_registers ipp_regs[] = { - ipp_regs(0), - ipp_regs(1), - ipp_regs(2) -}; - -static const struct dce_ipp_shift ipp_shift = { - IPP_DCE100_MASK_SH_LIST_DCE_COMMON_BASE(__SHIFT) -}; - -static const struct dce_ipp_mask ipp_mask = { - IPP_DCE100_MASK_SH_LIST_DCE_COMMON_BASE(_MASK) -}; - -#define transform_regs(id)\ -[id] = {\ - XFM_COMMON_REG_LIST_DCE110(id)\ -} - -static const struct dce_transform_registers xfm_regs[] = { - transform_regs(0), - transform_regs(1), - transform_regs(2) -}; - -static const struct dce_transform_shift xfm_shift = { - XFM_COMMON_MASK_SH_LIST_DCE110(__SHIFT) -}; - -static const struct dce_transform_mask xfm_mask = { - XFM_COMMON_MASK_SH_LIST_DCE110(_MASK) -}; - -#define aux_regs(id)\ -[id] = {\ - AUX_REG_LIST(id)\ -} - -static const struct dce110_link_enc_aux_registers link_enc_aux_regs[] = { - aux_regs(0), - aux_regs(1), - aux_regs(2), - aux_regs(3), - aux_regs(4), - aux_regs(5) -}; - -#define hpd_regs(id)\ -[id] = {\ - HPD_REG_LIST(id)\ -} - -static const struct dce110_link_enc_hpd_registers link_enc_hpd_regs[] = { - hpd_regs(0), - hpd_regs(1), - hpd_regs(2), - hpd_regs(3), - hpd_regs(4), - hpd_regs(5) -}; - - -#define link_regs(id)\ -[id] = {\ - LE_DCE110_REG_LIST(id)\ -} - -static const struct dce110_link_enc_registers link_enc_regs[] = { - link_regs(0), - link_regs(1), - link_regs(2), - link_regs(3), - link_regs(4), - link_regs(5), - link_regs(6), -}; - -#define stream_enc_regs(id)\ -[id] = {\ - SE_COMMON_REG_LIST(id),\ - .TMDS_CNTL = 0,\ -} - -static const struct dce110_stream_enc_registers stream_enc_regs[] = { - stream_enc_regs(0), - stream_enc_regs(1), - stream_enc_regs(2) -}; - -static const struct dce_stream_encoder_shift se_shift = { - SE_COMMON_MASK_SH_LIST_DCE110(__SHIFT) -}; - -static const struct dce_stream_encoder_mask se_mask = { - SE_COMMON_MASK_SH_LIST_DCE110(_MASK) -}; - -static const struct dce_panel_cntl_registers panel_cntl_regs[] = { - { DCE_PANEL_CNTL_REG_LIST() } -}; - -static const struct dce_panel_cntl_shift panel_cntl_shift = { - DCE_PANEL_CNTL_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_panel_cntl_mask panel_cntl_mask = { - DCE_PANEL_CNTL_MASK_SH_LIST(_MASK) -}; - -static const struct dce110_aux_registers_shift aux_shift = { - DCE_AUX_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce110_aux_registers_mask aux_mask = { - DCE_AUX_MASK_SH_LIST(_MASK) -}; - -#define opp_regs(id)\ -[id] = {\ - OPP_DCE_110_REG_LIST(id),\ -} - -static const struct dce_opp_registers opp_regs[] = { - opp_regs(0), - opp_regs(1), - opp_regs(2), - opp_regs(3), - opp_regs(4), - opp_regs(5) -}; - -static const struct dce_opp_shift opp_shift = { - OPP_COMMON_MASK_SH_LIST_DCE_110(__SHIFT) -}; - -static const struct dce_opp_mask opp_mask = { - OPP_COMMON_MASK_SH_LIST_DCE_110(_MASK) -}; - -#define aux_engine_regs(id)\ -[id] = {\ - AUX_COMMON_REG_LIST(id), \ - .AUX_RESET_MASK = 0 \ -} - -static const struct dce110_aux_registers aux_engine_regs[] = { - aux_engine_regs(0), - aux_engine_regs(1), - aux_engine_regs(2), - aux_engine_regs(3), - aux_engine_regs(4), - aux_engine_regs(5) -}; - -#define audio_regs(id)\ -[id] = {\ - AUD_COMMON_REG_LIST(id)\ -} - -static const struct dce_audio_registers audio_regs[] = { - audio_regs(0), - audio_regs(1), - audio_regs(2), - audio_regs(3), - audio_regs(4), - audio_regs(5), - audio_regs(6), -}; - -static const struct dce_audio_shift audio_shift = { - AUD_COMMON_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_audio_mask audio_mask = { - AUD_COMMON_MASK_SH_LIST(_MASK) -}; - -/* AG TBD Needs to be reduced back to 3 pipes once dce10 hw sequencer implemented. */ - - -#define clk_src_regs(id)\ -[id] = {\ - CS_COMMON_REG_LIST_DCE_100_110(id),\ -} - -static const struct dce110_clk_src_regs clk_src_regs[] = { - clk_src_regs(0), - clk_src_regs(1), - clk_src_regs(2) -}; - -static const struct dce110_clk_src_shift cs_shift = { - CS_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(__SHIFT) -}; - -static const struct dce110_clk_src_mask cs_mask = { - CS_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(_MASK) -}; - -static const struct bios_registers bios_regs = { - .BIOS_SCRATCH_3 = mmBIOS_SCRATCH_3, - .BIOS_SCRATCH_6 = mmBIOS_SCRATCH_6 -}; - -static const struct resource_caps carrizo_resource_cap = { - .num_timing_generator = 3, - .num_video_plane = 1, - .num_audio = 3, - .num_stream_encoder = 3, - .num_pll = 2, - .num_ddc = 3, -}; - -static const struct resource_caps stoney_resource_cap = { - .num_timing_generator = 2, - .num_video_plane = 1, - .num_audio = 3, - .num_stream_encoder = 3, - .num_pll = 2, - .num_ddc = 3, -}; - -static const struct dc_plane_cap plane_cap = { - .type = DC_PLANE_TYPE_DCE_RGB, - .per_pixel_alpha = 1, - - .pixel_format_support = { - .argb8888 = true, - .nv12 = false, - .fp16 = true - }, - - .max_upscale_factor = { - .argb8888 = 16000, - .nv12 = 1, - .fp16 = 1 - }, - - .max_downscale_factor = { - .argb8888 = 250, - .nv12 = 1, - .fp16 = 1 - }, - 64, - 64 -}; - -static const struct dc_debug_options debug_defaults = { - .enable_legacy_fast_update = true, -}; - -static const struct dc_plane_cap underlay_plane_cap = { - .type = DC_PLANE_TYPE_DCE_UNDERLAY, - .per_pixel_alpha = 1, - - .pixel_format_support = { - .argb8888 = false, - .nv12 = true, - .fp16 = false - }, - - .max_upscale_factor = { - .argb8888 = 1, - .nv12 = 16000, - .fp16 = 1 - }, - - .max_downscale_factor = { - .argb8888 = 1, - .nv12 = 250, - .fp16 = 1 - }, - 64, - 64 -}; - -#define CTX ctx -#define REG(reg) mm ## reg - -#ifndef mmCC_DC_HDMI_STRAPS -#define mmCC_DC_HDMI_STRAPS 0x4819 -#define CC_DC_HDMI_STRAPS__HDMI_DISABLE_MASK 0x40 -#define CC_DC_HDMI_STRAPS__HDMI_DISABLE__SHIFT 0x6 -#define CC_DC_HDMI_STRAPS__AUDIO_STREAM_NUMBER_MASK 0x700 -#define CC_DC_HDMI_STRAPS__AUDIO_STREAM_NUMBER__SHIFT 0x8 -#endif - -static int map_transmitter_id_to_phy_instance( - enum transmitter transmitter) -{ - switch (transmitter) { - case TRANSMITTER_UNIPHY_A: - return 0; - case TRANSMITTER_UNIPHY_B: - return 1; - case TRANSMITTER_UNIPHY_C: - return 2; - case TRANSMITTER_UNIPHY_D: - return 3; - case TRANSMITTER_UNIPHY_E: - return 4; - case TRANSMITTER_UNIPHY_F: - return 5; - case TRANSMITTER_UNIPHY_G: - return 6; - default: - ASSERT(0); - return 0; - } -} - -static void read_dce_straps( - struct dc_context *ctx, - struct resource_straps *straps) -{ - REG_GET_2(CC_DC_HDMI_STRAPS, - HDMI_DISABLE, &straps->hdmi_disable, - AUDIO_STREAM_NUMBER, &straps->audio_stream_number); - - REG_GET(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO, &straps->dc_pinstraps_audio); -} - -static struct audio *create_audio( - struct dc_context *ctx, unsigned int inst) -{ - return dce_audio_create(ctx, inst, - &audio_regs[inst], &audio_shift, &audio_mask); -} - -static struct timing_generator *dce110_timing_generator_create( - struct dc_context *ctx, - uint32_t instance, - const struct dce110_timing_generator_offsets *offsets) -{ - struct dce110_timing_generator *tg110 = - kzalloc(sizeof(struct dce110_timing_generator), GFP_KERNEL); - - if (!tg110) - return NULL; - - dce110_timing_generator_construct(tg110, ctx, instance, offsets); - return &tg110->base; -} - -static struct stream_encoder *dce110_stream_encoder_create( - enum engine_id eng_id, - struct dc_context *ctx) -{ - struct dce110_stream_encoder *enc110 = - kzalloc(sizeof(struct dce110_stream_encoder), GFP_KERNEL); - - if (!enc110) - return NULL; - - dce110_stream_encoder_construct(enc110, ctx, ctx->dc_bios, eng_id, - &stream_enc_regs[eng_id], - &se_shift, &se_mask); - return &enc110->base; -} - -#define SRII(reg_name, block, id)\ - .reg_name[id] = mm ## block ## id ## _ ## reg_name - -static const struct dce_hwseq_registers hwseq_stoney_reg = { - HWSEQ_ST_REG_LIST() -}; - -static const struct dce_hwseq_registers hwseq_cz_reg = { - HWSEQ_CZ_REG_LIST() -}; - -static const struct dce_hwseq_shift hwseq_shift = { - HWSEQ_DCE11_MASK_SH_LIST(__SHIFT), -}; - -static const struct dce_hwseq_mask hwseq_mask = { - HWSEQ_DCE11_MASK_SH_LIST(_MASK), -}; - -static struct dce_hwseq *dce110_hwseq_create( - struct dc_context *ctx) -{ - struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL); - - if (hws) { - hws->ctx = ctx; - hws->regs = ASIC_REV_IS_STONEY(ctx->asic_id.hw_internal_rev) ? - &hwseq_stoney_reg : &hwseq_cz_reg; - hws->shifts = &hwseq_shift; - hws->masks = &hwseq_mask; - hws->wa.blnd_crtc_trigger = true; - } - return hws; -} - -static const struct resource_create_funcs res_create_funcs = { - .read_dce_straps = read_dce_straps, - .create_audio = create_audio, - .create_stream_encoder = dce110_stream_encoder_create, - .create_hwseq = dce110_hwseq_create, -}; - -#define mi_inst_regs(id) { \ - MI_DCE11_REG_LIST(id), \ - .MC_HUB_RDREQ_DMIF_LIMIT = mmMC_HUB_RDREQ_DMIF_LIMIT \ -} -static const struct dce_mem_input_registers mi_regs[] = { - mi_inst_regs(0), - mi_inst_regs(1), - mi_inst_regs(2), -}; - -static const struct dce_mem_input_shift mi_shifts = { - MI_DCE11_MASK_SH_LIST(__SHIFT), - .ENABLE = MC_HUB_RDREQ_DMIF_LIMIT__ENABLE__SHIFT -}; - -static const struct dce_mem_input_mask mi_masks = { - MI_DCE11_MASK_SH_LIST(_MASK), - .ENABLE = MC_HUB_RDREQ_DMIF_LIMIT__ENABLE_MASK -}; - - -static struct mem_input *dce110_mem_input_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dce_mem_input *dce_mi = kzalloc(sizeof(struct dce_mem_input), - GFP_KERNEL); - - if (!dce_mi) { - BREAK_TO_DEBUGGER(); - return NULL; - } - - dce_mem_input_construct(dce_mi, ctx, inst, &mi_regs[inst], &mi_shifts, &mi_masks); - dce_mi->wa.single_head_rdreq_dmif_limit = 3; - return &dce_mi->base; -} - -static void dce110_transform_destroy(struct transform **xfm) -{ - kfree(TO_DCE_TRANSFORM(*xfm)); - *xfm = NULL; -} - -static struct transform *dce110_transform_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dce_transform *transform = - kzalloc(sizeof(struct dce_transform), GFP_KERNEL); - - if (!transform) - return NULL; - - dce_transform_construct(transform, ctx, inst, - &xfm_regs[inst], &xfm_shift, &xfm_mask); - return &transform->base; -} - -static struct input_pixel_processor *dce110_ipp_create( - struct dc_context *ctx, uint32_t inst) -{ - struct dce_ipp *ipp = kzalloc(sizeof(struct dce_ipp), GFP_KERNEL); - - if (!ipp) { - BREAK_TO_DEBUGGER(); - return NULL; - } - - dce_ipp_construct(ipp, ctx, inst, - &ipp_regs[inst], &ipp_shift, &ipp_mask); - return &ipp->base; -} - -static const struct encoder_feature_support link_enc_feature = { - .max_hdmi_deep_color = COLOR_DEPTH_121212, - .max_hdmi_pixel_clock = 300000, - .flags.bits.IS_HBR2_CAPABLE = true, - .flags.bits.IS_TPS3_CAPABLE = true -}; - -static struct link_encoder *dce110_link_encoder_create( - struct dc_context *ctx, - const struct encoder_init_data *enc_init_data) -{ - struct dce110_link_encoder *enc110 = - kzalloc(sizeof(struct dce110_link_encoder), GFP_KERNEL); - int link_regs_id; - - if (!enc110) - return NULL; - - link_regs_id = - map_transmitter_id_to_phy_instance(enc_init_data->transmitter); - - dce110_link_encoder_construct(enc110, - enc_init_data, - &link_enc_feature, - &link_enc_regs[link_regs_id], - &link_enc_aux_regs[enc_init_data->channel - 1], - &link_enc_hpd_regs[enc_init_data->hpd_source]); - return &enc110->base; -} - -static struct panel_cntl *dce110_panel_cntl_create(const struct panel_cntl_init_data *init_data) -{ - struct dce_panel_cntl *panel_cntl = - kzalloc(sizeof(struct dce_panel_cntl), GFP_KERNEL); - - if (!panel_cntl) - return NULL; - - dce_panel_cntl_construct(panel_cntl, - init_data, - &panel_cntl_regs[init_data->inst], - &panel_cntl_shift, - &panel_cntl_mask); - - return &panel_cntl->base; -} - -static struct output_pixel_processor *dce110_opp_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dce110_opp *opp = - kzalloc(sizeof(struct dce110_opp), GFP_KERNEL); - - if (!opp) - return NULL; - - dce110_opp_construct(opp, - ctx, inst, &opp_regs[inst], &opp_shift, &opp_mask); - return &opp->base; -} - -static struct dce_aux *dce110_aux_engine_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct aux_engine_dce110 *aux_engine = - kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL); - - if (!aux_engine) - return NULL; - - dce110_aux_engine_construct(aux_engine, ctx, inst, - SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, - &aux_engine_regs[inst], - &aux_mask, - &aux_shift, - ctx->dc->caps.extended_aux_timeout_support); - - return &aux_engine->base; -} -#define i2c_inst_regs(id) { I2C_HW_ENGINE_COMMON_REG_LIST(id) } - -static const struct dce_i2c_registers i2c_hw_regs[] = { - i2c_inst_regs(1), - i2c_inst_regs(2), - i2c_inst_regs(3), - i2c_inst_regs(4), - i2c_inst_regs(5), - i2c_inst_regs(6), -}; - -static const struct dce_i2c_shift i2c_shifts = { - I2C_COMMON_MASK_SH_LIST_DCE110(__SHIFT) -}; - -static const struct dce_i2c_mask i2c_masks = { - I2C_COMMON_MASK_SH_LIST_DCE110(_MASK) -}; - -static struct dce_i2c_hw *dce110_i2c_hw_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dce_i2c_hw *dce_i2c_hw = - kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL); - - if (!dce_i2c_hw) - return NULL; - - dce100_i2c_hw_construct(dce_i2c_hw, ctx, inst, - &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks); - - return dce_i2c_hw; -} -static struct clock_source *dce110_clock_source_create( - struct dc_context *ctx, - struct dc_bios *bios, - enum clock_source_id id, - const struct dce110_clk_src_regs *regs, - bool dp_clk_src) -{ - struct dce110_clk_src *clk_src = - kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL); - - if (!clk_src) - return NULL; - - if (dce110_clk_src_construct(clk_src, ctx, bios, id, - regs, &cs_shift, &cs_mask)) { - clk_src->base.dp_clk_src = dp_clk_src; - return &clk_src->base; - } - - kfree(clk_src); - BREAK_TO_DEBUGGER(); - return NULL; -} - -static void dce110_clock_source_destroy(struct clock_source **clk_src) -{ - struct dce110_clk_src *dce110_clk_src; - - if (!clk_src) - return; - - dce110_clk_src = TO_DCE110_CLK_SRC(*clk_src); - - kfree(dce110_clk_src->dp_ss_params); - kfree(dce110_clk_src->hdmi_ss_params); - kfree(dce110_clk_src->dvi_ss_params); - - kfree(dce110_clk_src); - *clk_src = NULL; -} - -static void dce110_resource_destruct(struct dce110_resource_pool *pool) -{ - unsigned int i; - - for (i = 0; i < pool->base.pipe_count; i++) { - if (pool->base.opps[i] != NULL) - dce110_opp_destroy(&pool->base.opps[i]); - - if (pool->base.transforms[i] != NULL) - dce110_transform_destroy(&pool->base.transforms[i]); - - if (pool->base.ipps[i] != NULL) - dce_ipp_destroy(&pool->base.ipps[i]); - - if (pool->base.mis[i] != NULL) { - kfree(TO_DCE_MEM_INPUT(pool->base.mis[i])); - pool->base.mis[i] = NULL; - } - - if (pool->base.timing_generators[i] != NULL) { - kfree(DCE110TG_FROM_TG(pool->base.timing_generators[i])); - pool->base.timing_generators[i] = NULL; - } - } - - for (i = 0; i < pool->base.res_cap->num_ddc; i++) { - if (pool->base.engines[i] != NULL) - dce110_engine_destroy(&pool->base.engines[i]); - if (pool->base.hw_i2cs[i] != NULL) { - kfree(pool->base.hw_i2cs[i]); - pool->base.hw_i2cs[i] = NULL; - } - if (pool->base.sw_i2cs[i] != NULL) { - kfree(pool->base.sw_i2cs[i]); - pool->base.sw_i2cs[i] = NULL; - } - } - - for (i = 0; i < pool->base.stream_enc_count; i++) { - if (pool->base.stream_enc[i] != NULL) - kfree(DCE110STRENC_FROM_STRENC(pool->base.stream_enc[i])); - } - - for (i = 0; i < pool->base.clk_src_count; i++) { - if (pool->base.clock_sources[i] != NULL) { - dce110_clock_source_destroy(&pool->base.clock_sources[i]); - } - } - - if (pool->base.dp_clock_source != NULL) - dce110_clock_source_destroy(&pool->base.dp_clock_source); - - for (i = 0; i < pool->base.audio_count; i++) { - if (pool->base.audios[i] != NULL) { - dce_aud_destroy(&pool->base.audios[i]); - } - } - - if (pool->base.abm != NULL) - dce_abm_destroy(&pool->base.abm); - - if (pool->base.dmcu != NULL) - dce_dmcu_destroy(&pool->base.dmcu); - - if (pool->base.irqs != NULL) { - dal_irq_service_destroy(&pool->base.irqs); - } -} - - -static void get_pixel_clock_parameters( - const struct pipe_ctx *pipe_ctx, - struct pixel_clk_params *pixel_clk_params) -{ - const struct dc_stream_state *stream = pipe_ctx->stream; - - /*TODO: is this halved for YCbCr 420? in that case we might want to move - * the pixel clock normalization for hdmi up to here instead of doing it - * in pll_adjust_pix_clk - */ - pixel_clk_params->requested_pix_clk_100hz = stream->timing.pix_clk_100hz; - pixel_clk_params->encoder_object_id = stream->link->link_enc->id; - pixel_clk_params->signal_type = pipe_ctx->stream->signal; - pixel_clk_params->controller_id = pipe_ctx->stream_res.tg->inst + 1; - /* TODO: un-hardcode*/ - pixel_clk_params->requested_sym_clk = LINK_RATE_LOW * - LINK_RATE_REF_FREQ_IN_KHZ; - pixel_clk_params->flags.ENABLE_SS = 0; - pixel_clk_params->color_depth = - stream->timing.display_color_depth; - pixel_clk_params->flags.DISPLAY_BLANKED = 1; - pixel_clk_params->flags.SUPPORT_YCBCR420 = (stream->timing.pixel_encoding == - PIXEL_ENCODING_YCBCR420); - pixel_clk_params->pixel_encoding = stream->timing.pixel_encoding; - if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR422) { - pixel_clk_params->color_depth = COLOR_DEPTH_888; - } - if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) { - pixel_clk_params->requested_pix_clk_100hz = pixel_clk_params->requested_pix_clk_100hz / 2; - } - if (stream->timing.timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING) - pixel_clk_params->requested_pix_clk_100hz *= 2; - -} - -void dce110_resource_build_pipe_hw_param(struct pipe_ctx *pipe_ctx) -{ - get_pixel_clock_parameters(pipe_ctx, &pipe_ctx->stream_res.pix_clk_params); - pipe_ctx->clock_source->funcs->get_pix_clk_dividers( - pipe_ctx->clock_source, - &pipe_ctx->stream_res.pix_clk_params, - &pipe_ctx->pll_settings); - resource_build_bit_depth_reduction_params(pipe_ctx->stream, - &pipe_ctx->stream->bit_depth_params); - pipe_ctx->stream->clamping.pixel_encoding = pipe_ctx->stream->timing.pixel_encoding; -} - -static bool is_surface_pixel_format_supported(struct pipe_ctx *pipe_ctx, unsigned int underlay_idx) -{ - if (pipe_ctx->pipe_idx != underlay_idx) - return true; - if (!pipe_ctx->plane_state) - return false; - if (pipe_ctx->plane_state->format < SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) - return false; - return true; -} - -static enum dc_status build_mapped_resource( - const struct dc *dc, - struct dc_state *context, - struct dc_stream_state *stream) -{ - struct pipe_ctx *pipe_ctx = resource_get_otg_master_for_stream(&context->res_ctx, stream); - - if (!pipe_ctx) - return DC_ERROR_UNEXPECTED; - - if (!is_surface_pixel_format_supported(pipe_ctx, - dc->res_pool->underlay_pipe_index)) - return DC_SURFACE_PIXEL_FORMAT_UNSUPPORTED; - - dce110_resource_build_pipe_hw_param(pipe_ctx); - - /* TODO: validate audio ASIC caps, encoder */ - - resource_build_info_frame(pipe_ctx); - - return DC_OK; -} - -static bool dce110_validate_bandwidth( - struct dc *dc, - struct dc_state *context, - bool fast_validate) -{ - bool result = false; - - DC_LOG_BANDWIDTH_CALCS( - "%s: start", - __func__); - - if (bw_calcs( - dc->ctx, - dc->bw_dceip, - dc->bw_vbios, - context->res_ctx.pipe_ctx, - dc->res_pool->pipe_count, - &context->bw_ctx.bw.dce)) - result = true; - - if (!result) - DC_LOG_BANDWIDTH_VALIDATION("%s: %dx%d@%d Bandwidth validation failed!\n", - __func__, - context->streams[0]->timing.h_addressable, - context->streams[0]->timing.v_addressable, - context->streams[0]->timing.pix_clk_100hz / 10); - - if (memcmp(&dc->current_state->bw_ctx.bw.dce, - &context->bw_ctx.bw.dce, sizeof(context->bw_ctx.bw.dce))) { - - DC_LOG_BANDWIDTH_CALCS( - "%s: finish,\n" - "nbpMark_b: %d nbpMark_a: %d urgentMark_b: %d urgentMark_a: %d\n" - "stutMark_b: %d stutMark_a: %d\n" - "nbpMark_b: %d nbpMark_a: %d urgentMark_b: %d urgentMark_a: %d\n" - "stutMark_b: %d stutMark_a: %d\n" - "nbpMark_b: %d nbpMark_a: %d urgentMark_b: %d urgentMark_a: %d\n" - "stutMark_b: %d stutMark_a: %d stutter_mode_enable: %d\n" - "cstate: %d pstate: %d nbpstate: %d sync: %d dispclk: %d\n" - "sclk: %d sclk_sleep: %d yclk: %d blackout_recovery_time_us: %d\n" - , - __func__, - context->bw_ctx.bw.dce.nbp_state_change_wm_ns[0].b_mark, - context->bw_ctx.bw.dce.nbp_state_change_wm_ns[0].a_mark, - context->bw_ctx.bw.dce.urgent_wm_ns[0].b_mark, - context->bw_ctx.bw.dce.urgent_wm_ns[0].a_mark, - context->bw_ctx.bw.dce.stutter_exit_wm_ns[0].b_mark, - context->bw_ctx.bw.dce.stutter_exit_wm_ns[0].a_mark, - context->bw_ctx.bw.dce.nbp_state_change_wm_ns[1].b_mark, - context->bw_ctx.bw.dce.nbp_state_change_wm_ns[1].a_mark, - context->bw_ctx.bw.dce.urgent_wm_ns[1].b_mark, - context->bw_ctx.bw.dce.urgent_wm_ns[1].a_mark, - context->bw_ctx.bw.dce.stutter_exit_wm_ns[1].b_mark, - context->bw_ctx.bw.dce.stutter_exit_wm_ns[1].a_mark, - context->bw_ctx.bw.dce.nbp_state_change_wm_ns[2].b_mark, - context->bw_ctx.bw.dce.nbp_state_change_wm_ns[2].a_mark, - context->bw_ctx.bw.dce.urgent_wm_ns[2].b_mark, - context->bw_ctx.bw.dce.urgent_wm_ns[2].a_mark, - context->bw_ctx.bw.dce.stutter_exit_wm_ns[2].b_mark, - context->bw_ctx.bw.dce.stutter_exit_wm_ns[2].a_mark, - context->bw_ctx.bw.dce.stutter_mode_enable, - context->bw_ctx.bw.dce.cpuc_state_change_enable, - context->bw_ctx.bw.dce.cpup_state_change_enable, - context->bw_ctx.bw.dce.nbp_state_change_enable, - context->bw_ctx.bw.dce.all_displays_in_sync, - context->bw_ctx.bw.dce.dispclk_khz, - context->bw_ctx.bw.dce.sclk_khz, - context->bw_ctx.bw.dce.sclk_deep_sleep_khz, - context->bw_ctx.bw.dce.yclk_khz, - context->bw_ctx.bw.dce.blackout_recovery_time_us); - } - return result; -} - -static enum dc_status dce110_validate_plane(const struct dc_plane_state *plane_state, - struct dc_caps *caps) -{ - if (((plane_state->dst_rect.width * 2) < plane_state->src_rect.width) || - ((plane_state->dst_rect.height * 2) < plane_state->src_rect.height)) - return DC_FAIL_SURFACE_VALIDATE; - - return DC_OK; -} - -static bool dce110_validate_surface_sets( - struct dc_state *context) -{ - int i, j; - - for (i = 0; i < context->stream_count; i++) { - if (context->stream_status[i].plane_count == 0) - continue; - - if (context->stream_status[i].plane_count > 2) - return false; - - for (j = 0; j < context->stream_status[i].plane_count; j++) { - struct dc_plane_state *plane = - context->stream_status[i].plane_states[j]; - - /* underlay validation */ - if (plane->format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) { - - if ((plane->src_rect.width > 1920 || - plane->src_rect.height > 1080)) - return false; - - /* we don't have the logic to support underlay - * only yet so block the use case where we get - * NV12 plane as top layer - */ - if (j == 0) - return false; - - /* irrespective of plane format, - * stream should be RGB encoded - */ - if (context->streams[i]->timing.pixel_encoding - != PIXEL_ENCODING_RGB) - return false; - - } - - } - } - - return true; -} - -static enum dc_status dce110_validate_global( - struct dc *dc, - struct dc_state *context) -{ - if (!dce110_validate_surface_sets(context)) - return DC_FAIL_SURFACE_VALIDATE; - - return DC_OK; -} - -static enum dc_status dce110_add_stream_to_ctx( - struct dc *dc, - struct dc_state *new_ctx, - struct dc_stream_state *dc_stream) -{ - enum dc_status result = DC_ERROR_UNEXPECTED; - - result = resource_map_pool_resources(dc, new_ctx, dc_stream); - - if (result == DC_OK) - result = resource_map_clock_resources(dc, new_ctx, dc_stream); - - - if (result == DC_OK) - result = build_mapped_resource(dc, new_ctx, dc_stream); - - return result; -} - -static struct pipe_ctx *dce110_acquire_underlay( - const struct dc_state *cur_ctx, - struct dc_state *new_ctx, - const struct resource_pool *pool, - const struct pipe_ctx *opp_head_pipe) -{ - struct dc_stream_state *stream = opp_head_pipe->stream; - struct dc *dc = stream->ctx->dc; - struct dce_hwseq *hws = dc->hwseq; - struct resource_context *res_ctx = &new_ctx->res_ctx; - unsigned int underlay_idx = pool->underlay_pipe_index; - struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[underlay_idx]; - - if (res_ctx->pipe_ctx[underlay_idx].stream) - return NULL; - - pipe_ctx->stream_res.tg = pool->timing_generators[underlay_idx]; - pipe_ctx->plane_res.mi = pool->mis[underlay_idx]; - /*pipe_ctx->plane_res.ipp = res_ctx->pool->ipps[underlay_idx];*/ - pipe_ctx->plane_res.xfm = pool->transforms[underlay_idx]; - pipe_ctx->stream_res.opp = pool->opps[underlay_idx]; - pipe_ctx->pipe_idx = underlay_idx; - - pipe_ctx->stream = stream; - - if (!dc->current_state->res_ctx.pipe_ctx[underlay_idx].stream) { - struct tg_color black_color = {0}; - struct dc_bios *dcb = dc->ctx->dc_bios; - - hws->funcs.enable_display_power_gating( - dc, - pipe_ctx->stream_res.tg->inst, - dcb, PIPE_GATING_CONTROL_DISABLE); - - /* - * This is for powering on underlay, so crtc does not - * need to be enabled - */ - - pipe_ctx->stream_res.tg->funcs->program_timing(pipe_ctx->stream_res.tg, - &stream->timing, - 0, - 0, - 0, - 0, - pipe_ctx->stream->signal, - false); - - pipe_ctx->stream_res.tg->funcs->enable_advanced_request( - pipe_ctx->stream_res.tg, - true, - &stream->timing); - - pipe_ctx->plane_res.mi->funcs->allocate_mem_input(pipe_ctx->plane_res.mi, - stream->timing.h_total, - stream->timing.v_total, - stream->timing.pix_clk_100hz / 10, - new_ctx->stream_count); - - color_space_to_black_color(dc, - COLOR_SPACE_YCBCR601, &black_color); - pipe_ctx->stream_res.tg->funcs->set_blank_color( - pipe_ctx->stream_res.tg, - &black_color); - } - - return pipe_ctx; -} - -static void dce110_destroy_resource_pool(struct resource_pool **pool) -{ - struct dce110_resource_pool *dce110_pool = TO_DCE110_RES_POOL(*pool); - - dce110_resource_destruct(dce110_pool); - kfree(dce110_pool); - *pool = NULL; -} - -struct stream_encoder *dce110_find_first_free_match_stream_enc_for_link( - struct resource_context *res_ctx, - const struct resource_pool *pool, - struct dc_stream_state *stream) -{ - int i; - int j = -1; - struct dc_link *link = stream->link; - - for (i = 0; i < pool->stream_enc_count; i++) { - if (!res_ctx->is_stream_enc_acquired[i] && - pool->stream_enc[i]) { - /* Store first available for MST second display - * in daisy chain use case - */ - j = i; - if (pool->stream_enc[i]->id == - link->link_enc->preferred_engine) - return pool->stream_enc[i]; - } - } - - /* - * For CZ and later, we can allow DIG FE and BE to differ for all display types - */ - - if (j >= 0) - return pool->stream_enc[j]; - - return NULL; -} - - -static const struct resource_funcs dce110_res_pool_funcs = { - .destroy = dce110_destroy_resource_pool, - .link_enc_create = dce110_link_encoder_create, - .panel_cntl_create = dce110_panel_cntl_create, - .validate_bandwidth = dce110_validate_bandwidth, - .validate_plane = dce110_validate_plane, - .acquire_free_pipe_as_secondary_dpp_pipe = dce110_acquire_underlay, - .add_stream_to_ctx = dce110_add_stream_to_ctx, - .validate_global = dce110_validate_global, - .find_first_free_match_stream_enc_for_link = dce110_find_first_free_match_stream_enc_for_link -}; - -static bool underlay_create(struct dc_context *ctx, struct resource_pool *pool) -{ - struct dce110_timing_generator *dce110_tgv = kzalloc(sizeof(*dce110_tgv), - GFP_KERNEL); - struct dce_transform *dce110_xfmv = kzalloc(sizeof(*dce110_xfmv), - GFP_KERNEL); - struct dce_mem_input *dce110_miv = kzalloc(sizeof(*dce110_miv), - GFP_KERNEL); - struct dce110_opp *dce110_oppv = kzalloc(sizeof(*dce110_oppv), - GFP_KERNEL); - - if (!dce110_tgv || !dce110_xfmv || !dce110_miv || !dce110_oppv) { - kfree(dce110_tgv); - kfree(dce110_xfmv); - kfree(dce110_miv); - kfree(dce110_oppv); - return false; - } - - dce110_opp_v_construct(dce110_oppv, ctx); - - dce110_timing_generator_v_construct(dce110_tgv, ctx); - dce110_mem_input_v_construct(dce110_miv, ctx); - dce110_transform_v_construct(dce110_xfmv, ctx); - - pool->opps[pool->pipe_count] = &dce110_oppv->base; - pool->timing_generators[pool->pipe_count] = &dce110_tgv->base; - pool->mis[pool->pipe_count] = &dce110_miv->base; - pool->transforms[pool->pipe_count] = &dce110_xfmv->base; - pool->pipe_count++; - - /* update the public caps to indicate an underlay is available */ - ctx->dc->caps.max_slave_planes = 1; - ctx->dc->caps.max_slave_yuv_planes = 1; - ctx->dc->caps.max_slave_rgb_planes = 0; - - return true; -} - -static void bw_calcs_data_update_from_pplib(struct dc *dc) -{ - struct dm_pp_clock_levels clks = {0}; - - /*do system clock*/ - dm_pp_get_clock_levels_by_type( - dc->ctx, - DM_PP_CLOCK_TYPE_ENGINE_CLK, - &clks); - /* convert all the clock fro kHz to fix point mHz */ - dc->bw_vbios->high_sclk = bw_frc_to_fixed( - clks.clocks_in_khz[clks.num_levels-1], 1000); - dc->bw_vbios->mid1_sclk = bw_frc_to_fixed( - clks.clocks_in_khz[clks.num_levels/8], 1000); - dc->bw_vbios->mid2_sclk = bw_frc_to_fixed( - clks.clocks_in_khz[clks.num_levels*2/8], 1000); - dc->bw_vbios->mid3_sclk = bw_frc_to_fixed( - clks.clocks_in_khz[clks.num_levels*3/8], 1000); - dc->bw_vbios->mid4_sclk = bw_frc_to_fixed( - clks.clocks_in_khz[clks.num_levels*4/8], 1000); - dc->bw_vbios->mid5_sclk = bw_frc_to_fixed( - clks.clocks_in_khz[clks.num_levels*5/8], 1000); - dc->bw_vbios->mid6_sclk = bw_frc_to_fixed( - clks.clocks_in_khz[clks.num_levels*6/8], 1000); - dc->bw_vbios->low_sclk = bw_frc_to_fixed( - clks.clocks_in_khz[0], 1000); - dc->sclk_lvls = clks; - - /*do display clock*/ - dm_pp_get_clock_levels_by_type( - dc->ctx, - DM_PP_CLOCK_TYPE_DISPLAY_CLK, - &clks); - dc->bw_vbios->high_voltage_max_dispclk = bw_frc_to_fixed( - clks.clocks_in_khz[clks.num_levels-1], 1000); - dc->bw_vbios->mid_voltage_max_dispclk = bw_frc_to_fixed( - clks.clocks_in_khz[clks.num_levels>>1], 1000); - dc->bw_vbios->low_voltage_max_dispclk = bw_frc_to_fixed( - clks.clocks_in_khz[0], 1000); - - /*do memory clock*/ - dm_pp_get_clock_levels_by_type( - dc->ctx, - DM_PP_CLOCK_TYPE_MEMORY_CLK, - &clks); - - dc->bw_vbios->low_yclk = bw_frc_to_fixed( - clks.clocks_in_khz[0] * MEMORY_TYPE_MULTIPLIER_CZ, 1000); - dc->bw_vbios->mid_yclk = bw_frc_to_fixed( - clks.clocks_in_khz[clks.num_levels>>1] * MEMORY_TYPE_MULTIPLIER_CZ, - 1000); - dc->bw_vbios->high_yclk = bw_frc_to_fixed( - clks.clocks_in_khz[clks.num_levels-1] * MEMORY_TYPE_MULTIPLIER_CZ, - 1000); -} - -static const struct resource_caps *dce110_resource_cap( - struct hw_asic_id *asic_id) -{ - if (ASIC_REV_IS_STONEY(asic_id->hw_internal_rev)) - return &stoney_resource_cap; - else - return &carrizo_resource_cap; -} - -static bool dce110_resource_construct( - uint8_t num_virtual_links, - struct dc *dc, - struct dce110_resource_pool *pool, - struct hw_asic_id asic_id) -{ - unsigned int i; - struct dc_context *ctx = dc->ctx; - struct dc_bios *bp; - - ctx->dc_bios->regs = &bios_regs; - - pool->base.res_cap = dce110_resource_cap(&ctx->asic_id); - pool->base.funcs = &dce110_res_pool_funcs; - - /************************************************* - * Resource + asic cap harcoding * - *************************************************/ - - pool->base.pipe_count = pool->base.res_cap->num_timing_generator; - pool->base.underlay_pipe_index = pool->base.pipe_count; - pool->base.timing_generator_count = pool->base.res_cap->num_timing_generator; - dc->caps.max_downscale_ratio = 150; - dc->caps.i2c_speed_in_khz = 40; - dc->caps.i2c_speed_in_khz_hdcp = 40; - dc->caps.max_cursor_size = 128; - dc->caps.min_horizontal_blanking_period = 80; - dc->caps.is_apu = true; - dc->caps.extended_aux_timeout_support = false; - dc->debug = debug_defaults; - - /************************************************* - * Create resources * - *************************************************/ - - bp = ctx->dc_bios; - - if (bp->fw_info_valid && bp->fw_info.external_clock_source_frequency_for_dp != 0) { - pool->base.dp_clock_source = - dce110_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_EXTERNAL, NULL, true); - - pool->base.clock_sources[0] = - dce110_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL0, - &clk_src_regs[0], false); - pool->base.clock_sources[1] = - dce110_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL1, - &clk_src_regs[1], false); - - pool->base.clk_src_count = 2; - - /* TODO: find out if CZ support 3 PLLs */ - } - - if (pool->base.dp_clock_source == NULL) { - dm_error("DC: failed to create dp clock source!\n"); - BREAK_TO_DEBUGGER(); - goto res_create_fail; - } - - for (i = 0; i < pool->base.clk_src_count; i++) { - if (pool->base.clock_sources[i] == NULL) { - dm_error("DC: failed to create clock sources!\n"); - BREAK_TO_DEBUGGER(); - goto res_create_fail; - } - } - - pool->base.dmcu = dce_dmcu_create(ctx, - &dmcu_regs, - &dmcu_shift, - &dmcu_mask); - if (pool->base.dmcu == NULL) { - dm_error("DC: failed to create dmcu!\n"); - BREAK_TO_DEBUGGER(); - goto res_create_fail; - } - - pool->base.abm = dce_abm_create(ctx, - &abm_regs, - &abm_shift, - &abm_mask); - if (pool->base.abm == NULL) { - dm_error("DC: failed to create abm!\n"); - BREAK_TO_DEBUGGER(); - goto res_create_fail; - } - - { - struct irq_service_init_data init_data; - init_data.ctx = dc->ctx; - pool->base.irqs = dal_irq_service_dce110_create(&init_data); - if (!pool->base.irqs) - goto res_create_fail; - } - - for (i = 0; i < pool->base.pipe_count; i++) { - pool->base.timing_generators[i] = dce110_timing_generator_create( - ctx, i, &dce110_tg_offsets[i]); - if (pool->base.timing_generators[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create tg!\n"); - goto res_create_fail; - } - - pool->base.mis[i] = dce110_mem_input_create(ctx, i); - if (pool->base.mis[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create memory input!\n"); - goto res_create_fail; - } - - pool->base.ipps[i] = dce110_ipp_create(ctx, i); - if (pool->base.ipps[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create input pixel processor!\n"); - goto res_create_fail; - } - - pool->base.transforms[i] = dce110_transform_create(ctx, i); - if (pool->base.transforms[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create transform!\n"); - goto res_create_fail; - } - - pool->base.opps[i] = dce110_opp_create(ctx, i); - if (pool->base.opps[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create output pixel processor!\n"); - goto res_create_fail; - } - } - - for (i = 0; i < pool->base.res_cap->num_ddc; i++) { - pool->base.engines[i] = dce110_aux_engine_create(ctx, i); - if (pool->base.engines[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC:failed to create aux engine!!\n"); - goto res_create_fail; - } - pool->base.hw_i2cs[i] = dce110_i2c_hw_create(ctx, i); - if (pool->base.hw_i2cs[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC:failed to create i2c engine!!\n"); - goto res_create_fail; - } - pool->base.sw_i2cs[i] = NULL; - } - - if (dc->config.fbc_support) - dc->fbc_compressor = dce110_compressor_create(ctx); - - if (!underlay_create(ctx, &pool->base)) - goto res_create_fail; - - if (!resource_construct(num_virtual_links, dc, &pool->base, - &res_create_funcs)) - goto res_create_fail; - - /* Create hardware sequencer */ - dce110_hw_sequencer_construct(dc); - - dc->caps.max_planes = pool->base.pipe_count; - - for (i = 0; i < pool->base.underlay_pipe_index; ++i) - dc->caps.planes[i] = plane_cap; - - dc->caps.planes[pool->base.underlay_pipe_index] = underlay_plane_cap; - - bw_calcs_init(dc->bw_dceip, dc->bw_vbios, dc->ctx->asic_id); - - bw_calcs_data_update_from_pplib(dc); - - return true; - -res_create_fail: - dce110_resource_destruct(pool); - return false; -} - -struct resource_pool *dce110_create_resource_pool( - uint8_t num_virtual_links, - struct dc *dc, - struct hw_asic_id asic_id) -{ - struct dce110_resource_pool *pool = - kzalloc(sizeof(struct dce110_resource_pool), GFP_KERNEL); - - if (!pool) - return NULL; - - if (dce110_resource_construct(num_virtual_links, dc, pool, asic_id)) - return &pool->base; - - kfree(pool); - BREAK_TO_DEBUGGER(); - return NULL; -} diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.h b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.h deleted file mode 100644 index aa4531e0800e..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.h +++ /dev/null @@ -1,54 +0,0 @@ -/* -* Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DC_RESOURCE_DCE110_H__ -#define __DC_RESOURCE_DCE110_H__ - -#include "core_types.h" - -struct dc; -struct resource_pool; - -#define TO_DCE110_RES_POOL(pool)\ - container_of(pool, struct dce110_resource_pool, base) - -struct dce110_resource_pool { - struct resource_pool base; -}; - -void dce110_resource_build_pipe_hw_param(struct pipe_ctx *pipe_ctx); - -struct resource_pool *dce110_create_resource_pool( - uint8_t num_virtual_links, - struct dc *dc, - struct hw_asic_id asic_id); - -struct stream_encoder *dce110_find_first_free_match_stream_enc_for_link( - struct resource_context *res_ctx, - const struct resource_pool *pool, - struct dc_stream_state *stream); - -#endif /* __DC_RESOURCE_DCE110_H__ */ - diff --git a/drivers/gpu/drm/amd/display/dc/dce112/Makefile b/drivers/gpu/drm/amd/display/dc/dce112/Makefile index e846ef58cab3..7e92effec894 100644 --- a/drivers/gpu/drm/amd/display/dc/dce112/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dce112/Makefile @@ -25,8 +25,7 @@ CFLAGS_$(AMDDALPATH)/dc/dce112/dce112_resource.o = $(call cc-disable-warning, override-init) -DCE112 = dce112_compressor.o \ -dce112_resource.o +DCE112 = dce112_compressor.o AMD_DAL_DCE112 = $(addprefix $(AMDDALPATH)/dc/dce112/,$(DCE112)) diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c deleted file mode 100644 index d1edac46c9a0..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c +++ /dev/null @@ -1,1431 +0,0 @@ -/* -* Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dm_services.h" - -#include "link_encoder.h" -#include "stream_encoder.h" - -#include "resource.h" -#include "include/irq_service_interface.h" -#include "dce110/dce110_resource.h" -#include "dce110/dce110_timing_generator.h" - -#include "irq/dce110/irq_service_dce110.h" -#include "dce/dce_mem_input.h" -#include "dce/dce_transform.h" -#include "dce/dce_link_encoder.h" -#include "dce/dce_stream_encoder.h" -#include "dce/dce_audio.h" -#include "dce/dce_opp.h" -#include "dce/dce_ipp.h" -#include "dce/dce_clock_source.h" - -#include "dce/dce_hwseq.h" -#include "dce112/dce112_hwseq.h" -#include "dce/dce_abm.h" -#include "dce/dce_dmcu.h" -#include "dce/dce_aux.h" -#include "dce/dce_i2c.h" -#include "dce/dce_panel_cntl.h" - -#include "reg_helper.h" - -#include "dce/dce_11_2_d.h" -#include "dce/dce_11_2_sh_mask.h" - -#include "dce100/dce100_resource.h" -#include "dce112_resource.h" - -#define DC_LOGGER \ - dc->ctx->logger - -#ifndef mmDP_DPHY_INTERNAL_CTRL - #define mmDP_DPHY_INTERNAL_CTRL 0x4aa7 - #define mmDP0_DP_DPHY_INTERNAL_CTRL 0x4aa7 - #define mmDP1_DP_DPHY_INTERNAL_CTRL 0x4ba7 - #define mmDP2_DP_DPHY_INTERNAL_CTRL 0x4ca7 - #define mmDP3_DP_DPHY_INTERNAL_CTRL 0x4da7 - #define mmDP4_DP_DPHY_INTERNAL_CTRL 0x4ea7 - #define mmDP5_DP_DPHY_INTERNAL_CTRL 0x4fa7 - #define mmDP6_DP_DPHY_INTERNAL_CTRL 0x54a7 - #define mmDP7_DP_DPHY_INTERNAL_CTRL 0x56a7 - #define mmDP8_DP_DPHY_INTERNAL_CTRL 0x57a7 -#endif - -#ifndef mmBIOS_SCRATCH_2 - #define mmBIOS_SCRATCH_2 0x05CB - #define mmBIOS_SCRATCH_3 0x05CC - #define mmBIOS_SCRATCH_6 0x05CF -#endif - -#ifndef mmDP_DPHY_BS_SR_SWAP_CNTL - #define mmDP_DPHY_BS_SR_SWAP_CNTL 0x4ADC - #define mmDP0_DP_DPHY_BS_SR_SWAP_CNTL 0x4ADC - #define mmDP1_DP_DPHY_BS_SR_SWAP_CNTL 0x4BDC - #define mmDP2_DP_DPHY_BS_SR_SWAP_CNTL 0x4CDC - #define mmDP3_DP_DPHY_BS_SR_SWAP_CNTL 0x4DDC - #define mmDP4_DP_DPHY_BS_SR_SWAP_CNTL 0x4EDC - #define mmDP5_DP_DPHY_BS_SR_SWAP_CNTL 0x4FDC - #define mmDP6_DP_DPHY_BS_SR_SWAP_CNTL 0x54DC -#endif - -#ifndef mmDP_DPHY_FAST_TRAINING - #define mmDP_DPHY_FAST_TRAINING 0x4ABC - #define mmDP0_DP_DPHY_FAST_TRAINING 0x4ABC - #define mmDP1_DP_DPHY_FAST_TRAINING 0x4BBC - #define mmDP2_DP_DPHY_FAST_TRAINING 0x4CBC - #define mmDP3_DP_DPHY_FAST_TRAINING 0x4DBC - #define mmDP4_DP_DPHY_FAST_TRAINING 0x4EBC - #define mmDP5_DP_DPHY_FAST_TRAINING 0x4FBC - #define mmDP6_DP_DPHY_FAST_TRAINING 0x54BC -#endif - -enum dce112_clk_src_array_id { - DCE112_CLK_SRC_PLL0, - DCE112_CLK_SRC_PLL1, - DCE112_CLK_SRC_PLL2, - DCE112_CLK_SRC_PLL3, - DCE112_CLK_SRC_PLL4, - DCE112_CLK_SRC_PLL5, - - DCE112_CLK_SRC_TOTAL -}; - -static const struct dce110_timing_generator_offsets dce112_tg_offsets[] = { - { - .crtc = (mmCRTC0_CRTC_CONTROL - mmCRTC_CONTROL), - .dcp = (mmDCP0_GRPH_CONTROL - mmGRPH_CONTROL), - }, - { - .crtc = (mmCRTC1_CRTC_CONTROL - mmCRTC_CONTROL), - .dcp = (mmDCP1_GRPH_CONTROL - mmGRPH_CONTROL), - }, - { - .crtc = (mmCRTC2_CRTC_CONTROL - mmCRTC_CONTROL), - .dcp = (mmDCP2_GRPH_CONTROL - mmGRPH_CONTROL), - }, - { - .crtc = (mmCRTC3_CRTC_CONTROL - mmCRTC_CONTROL), - .dcp = (mmDCP3_GRPH_CONTROL - mmGRPH_CONTROL), - }, - { - .crtc = (mmCRTC4_CRTC_CONTROL - mmCRTC_CONTROL), - .dcp = (mmDCP4_GRPH_CONTROL - mmGRPH_CONTROL), - }, - { - .crtc = (mmCRTC5_CRTC_CONTROL - mmCRTC_CONTROL), - .dcp = (mmDCP5_GRPH_CONTROL - mmGRPH_CONTROL), - } -}; - -/* set register offset */ -#define SR(reg_name)\ - .reg_name = mm ## reg_name - -/* set register offset with instance */ -#define SRI(reg_name, block, id)\ - .reg_name = mm ## block ## id ## _ ## reg_name - -static const struct dce_dmcu_registers dmcu_regs = { - DMCU_DCE110_COMMON_REG_LIST() -}; - -static const struct dce_dmcu_shift dmcu_shift = { - DMCU_MASK_SH_LIST_DCE110(__SHIFT) -}; - -static const struct dce_dmcu_mask dmcu_mask = { - DMCU_MASK_SH_LIST_DCE110(_MASK) -}; - -static const struct dce_abm_registers abm_regs = { - ABM_DCE110_COMMON_REG_LIST() -}; - -static const struct dce_abm_shift abm_shift = { - ABM_MASK_SH_LIST_DCE110(__SHIFT) -}; - -static const struct dce_abm_mask abm_mask = { - ABM_MASK_SH_LIST_DCE110(_MASK) -}; - -static const struct dce110_aux_registers_shift aux_shift = { - DCE_AUX_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce110_aux_registers_mask aux_mask = { - DCE_AUX_MASK_SH_LIST(_MASK) -}; - -#define ipp_regs(id)\ -[id] = {\ - IPP_DCE110_REG_LIST_DCE_BASE(id)\ -} - -static const struct dce_ipp_registers ipp_regs[] = { - ipp_regs(0), - ipp_regs(1), - ipp_regs(2), - ipp_regs(3), - ipp_regs(4), - ipp_regs(5) -}; - -static const struct dce_ipp_shift ipp_shift = { - IPP_DCE100_MASK_SH_LIST_DCE_COMMON_BASE(__SHIFT) -}; - -static const struct dce_ipp_mask ipp_mask = { - IPP_DCE100_MASK_SH_LIST_DCE_COMMON_BASE(_MASK) -}; - -#define transform_regs(id)\ -[id] = {\ - XFM_COMMON_REG_LIST_DCE110(id)\ -} - -static const struct dce_transform_registers xfm_regs[] = { - transform_regs(0), - transform_regs(1), - transform_regs(2), - transform_regs(3), - transform_regs(4), - transform_regs(5) -}; - -static const struct dce_transform_shift xfm_shift = { - XFM_COMMON_MASK_SH_LIST_DCE110(__SHIFT) -}; - -static const struct dce_transform_mask xfm_mask = { - XFM_COMMON_MASK_SH_LIST_DCE110(_MASK) -}; - -#define aux_regs(id)\ -[id] = {\ - AUX_REG_LIST(id)\ -} - -static const struct dce110_link_enc_aux_registers link_enc_aux_regs[] = { - aux_regs(0), - aux_regs(1), - aux_regs(2), - aux_regs(3), - aux_regs(4), - aux_regs(5) -}; - -static const struct dce_panel_cntl_registers panel_cntl_regs[] = { - { DCE_PANEL_CNTL_REG_LIST() } -}; - -static const struct dce_panel_cntl_shift panel_cntl_shift = { - DCE_PANEL_CNTL_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_panel_cntl_mask panel_cntl_mask = { - DCE_PANEL_CNTL_MASK_SH_LIST(_MASK) -}; - -#define hpd_regs(id)\ -[id] = {\ - HPD_REG_LIST(id)\ -} - -static const struct dce110_link_enc_hpd_registers link_enc_hpd_regs[] = { - hpd_regs(0), - hpd_regs(1), - hpd_regs(2), - hpd_regs(3), - hpd_regs(4), - hpd_regs(5) -}; - -#define link_regs(id)\ -[id] = {\ - LE_DCE110_REG_LIST(id)\ -} - -static const struct dce110_link_enc_registers link_enc_regs[] = { - link_regs(0), - link_regs(1), - link_regs(2), - link_regs(3), - link_regs(4), - link_regs(5), - link_regs(6), -}; - -#define stream_enc_regs(id)\ -[id] = {\ - SE_COMMON_REG_LIST(id),\ - .TMDS_CNTL = 0,\ -} - -static const struct dce110_stream_enc_registers stream_enc_regs[] = { - stream_enc_regs(0), - stream_enc_regs(1), - stream_enc_regs(2), - stream_enc_regs(3), - stream_enc_regs(4), - stream_enc_regs(5) -}; - -static const struct dce_stream_encoder_shift se_shift = { - SE_COMMON_MASK_SH_LIST_DCE112(__SHIFT) -}; - -static const struct dce_stream_encoder_mask se_mask = { - SE_COMMON_MASK_SH_LIST_DCE112(_MASK) -}; - -#define opp_regs(id)\ -[id] = {\ - OPP_DCE_112_REG_LIST(id),\ -} - -static const struct dce_opp_registers opp_regs[] = { - opp_regs(0), - opp_regs(1), - opp_regs(2), - opp_regs(3), - opp_regs(4), - opp_regs(5) -}; - -static const struct dce_opp_shift opp_shift = { - OPP_COMMON_MASK_SH_LIST_DCE_112(__SHIFT) -}; - -static const struct dce_opp_mask opp_mask = { - OPP_COMMON_MASK_SH_LIST_DCE_112(_MASK) -}; - -#define aux_engine_regs(id)\ -[id] = {\ - AUX_COMMON_REG_LIST(id), \ - .AUX_RESET_MASK = 0 \ -} - -static const struct dce110_aux_registers aux_engine_regs[] = { - aux_engine_regs(0), - aux_engine_regs(1), - aux_engine_regs(2), - aux_engine_regs(3), - aux_engine_regs(4), - aux_engine_regs(5) -}; - -#define audio_regs(id)\ -[id] = {\ - AUD_COMMON_REG_LIST(id)\ -} - -static const struct dce_audio_registers audio_regs[] = { - audio_regs(0), - audio_regs(1), - audio_regs(2), - audio_regs(3), - audio_regs(4), - audio_regs(5) -}; - -static const struct dce_audio_shift audio_shift = { - AUD_COMMON_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_audio_mask audio_mask = { - AUD_COMMON_MASK_SH_LIST(_MASK) -}; - -#define clk_src_regs(index, id)\ -[index] = {\ - CS_COMMON_REG_LIST_DCE_112(id),\ -} - -static const struct dce110_clk_src_regs clk_src_regs[] = { - clk_src_regs(0, A), - clk_src_regs(1, B), - clk_src_regs(2, C), - clk_src_regs(3, D), - clk_src_regs(4, E), - clk_src_regs(5, F) -}; - -static const struct dce110_clk_src_shift cs_shift = { - CS_COMMON_MASK_SH_LIST_DCE_112(__SHIFT) -}; - -static const struct dce110_clk_src_mask cs_mask = { - CS_COMMON_MASK_SH_LIST_DCE_112(_MASK) -}; - -static const struct bios_registers bios_regs = { - .BIOS_SCRATCH_3 = mmBIOS_SCRATCH_3, - .BIOS_SCRATCH_6 = mmBIOS_SCRATCH_6 -}; - -static const struct resource_caps polaris_10_resource_cap = { - .num_timing_generator = 6, - .num_audio = 6, - .num_stream_encoder = 6, - .num_pll = 8, /* why 8? 6 combo PHY PLL + 2 regular PLLs? */ - .num_ddc = 6, -}; - -static const struct resource_caps polaris_11_resource_cap = { - .num_timing_generator = 5, - .num_audio = 5, - .num_stream_encoder = 5, - .num_pll = 8, /* why 8? 6 combo PHY PLL + 2 regular PLLs? */ - .num_ddc = 5, -}; - -static const struct dc_plane_cap plane_cap = { - .type = DC_PLANE_TYPE_DCE_RGB, - - .pixel_format_support = { - .argb8888 = true, - .nv12 = false, - .fp16 = true - }, - - .max_upscale_factor = { - .argb8888 = 16000, - .nv12 = 1, - .fp16 = 1 - }, - - .max_downscale_factor = { - .argb8888 = 250, - .nv12 = 1, - .fp16 = 1 - }, - 64, - 64 -}; - -static const struct dc_debug_options debug_defaults = { - .enable_legacy_fast_update = true, -}; - -#define CTX ctx -#define REG(reg) mm ## reg - -#ifndef mmCC_DC_HDMI_STRAPS -#define mmCC_DC_HDMI_STRAPS 0x4819 -#define CC_DC_HDMI_STRAPS__HDMI_DISABLE_MASK 0x40 -#define CC_DC_HDMI_STRAPS__HDMI_DISABLE__SHIFT 0x6 -#define CC_DC_HDMI_STRAPS__AUDIO_STREAM_NUMBER_MASK 0x700 -#define CC_DC_HDMI_STRAPS__AUDIO_STREAM_NUMBER__SHIFT 0x8 -#endif - -static int map_transmitter_id_to_phy_instance( - enum transmitter transmitter) -{ - switch (transmitter) { - case TRANSMITTER_UNIPHY_A: - return 0; - case TRANSMITTER_UNIPHY_B: - return 1; - case TRANSMITTER_UNIPHY_C: - return 2; - case TRANSMITTER_UNIPHY_D: - return 3; - case TRANSMITTER_UNIPHY_E: - return 4; - case TRANSMITTER_UNIPHY_F: - return 5; - case TRANSMITTER_UNIPHY_G: - return 6; - default: - ASSERT(0); - return 0; - } -} - -static void read_dce_straps( - struct dc_context *ctx, - struct resource_straps *straps) -{ - REG_GET_2(CC_DC_HDMI_STRAPS, - HDMI_DISABLE, &straps->hdmi_disable, - AUDIO_STREAM_NUMBER, &straps->audio_stream_number); - - REG_GET(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO, &straps->dc_pinstraps_audio); -} - -static struct audio *create_audio( - struct dc_context *ctx, unsigned int inst) -{ - return dce_audio_create(ctx, inst, - &audio_regs[inst], &audio_shift, &audio_mask); -} - - -static struct timing_generator *dce112_timing_generator_create( - struct dc_context *ctx, - uint32_t instance, - const struct dce110_timing_generator_offsets *offsets) -{ - struct dce110_timing_generator *tg110 = - kzalloc(sizeof(struct dce110_timing_generator), GFP_KERNEL); - - if (!tg110) - return NULL; - - dce110_timing_generator_construct(tg110, ctx, instance, offsets); - return &tg110->base; -} - -static struct stream_encoder *dce112_stream_encoder_create( - enum engine_id eng_id, - struct dc_context *ctx) -{ - struct dce110_stream_encoder *enc110 = - kzalloc(sizeof(struct dce110_stream_encoder), GFP_KERNEL); - - if (!enc110) - return NULL; - - dce110_stream_encoder_construct(enc110, ctx, ctx->dc_bios, eng_id, - &stream_enc_regs[eng_id], - &se_shift, &se_mask); - return &enc110->base; -} - -#define SRII(reg_name, block, id)\ - .reg_name[id] = mm ## block ## id ## _ ## reg_name - -static const struct dce_hwseq_registers hwseq_reg = { - HWSEQ_DCE112_REG_LIST() -}; - -static const struct dce_hwseq_shift hwseq_shift = { - HWSEQ_DCE112_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_hwseq_mask hwseq_mask = { - HWSEQ_DCE112_MASK_SH_LIST(_MASK) -}; - -static struct dce_hwseq *dce112_hwseq_create( - struct dc_context *ctx) -{ - struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL); - - if (hws) { - hws->ctx = ctx; - hws->regs = &hwseq_reg; - hws->shifts = &hwseq_shift; - hws->masks = &hwseq_mask; - } - return hws; -} - -static const struct resource_create_funcs res_create_funcs = { - .read_dce_straps = read_dce_straps, - .create_audio = create_audio, - .create_stream_encoder = dce112_stream_encoder_create, - .create_hwseq = dce112_hwseq_create, -}; - -#define mi_inst_regs(id) { MI_DCE11_2_REG_LIST(id) } -static const struct dce_mem_input_registers mi_regs[] = { - mi_inst_regs(0), - mi_inst_regs(1), - mi_inst_regs(2), - mi_inst_regs(3), - mi_inst_regs(4), - mi_inst_regs(5), -}; - -static const struct dce_mem_input_shift mi_shifts = { - MI_DCE11_2_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_mem_input_mask mi_masks = { - MI_DCE11_2_MASK_SH_LIST(_MASK) -}; - -static struct mem_input *dce112_mem_input_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dce_mem_input *dce_mi = kzalloc(sizeof(struct dce_mem_input), - GFP_KERNEL); - - if (!dce_mi) { - BREAK_TO_DEBUGGER(); - return NULL; - } - - dce112_mem_input_construct(dce_mi, ctx, inst, &mi_regs[inst], &mi_shifts, &mi_masks); - return &dce_mi->base; -} - -static void dce112_transform_destroy(struct transform **xfm) -{ - kfree(TO_DCE_TRANSFORM(*xfm)); - *xfm = NULL; -} - -static struct transform *dce112_transform_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dce_transform *transform = - kzalloc(sizeof(struct dce_transform), GFP_KERNEL); - - if (!transform) - return NULL; - - dce_transform_construct(transform, ctx, inst, - &xfm_regs[inst], &xfm_shift, &xfm_mask); - transform->lb_memory_size = 0x1404; /*5124*/ - return &transform->base; -} - -static const struct encoder_feature_support link_enc_feature = { - .max_hdmi_deep_color = COLOR_DEPTH_121212, - .max_hdmi_pixel_clock = 600000, - .hdmi_ycbcr420_supported = true, - .dp_ycbcr420_supported = false, - .flags.bits.IS_HBR2_CAPABLE = true, - .flags.bits.IS_HBR3_CAPABLE = true, - .flags.bits.IS_TPS3_CAPABLE = true, - .flags.bits.IS_TPS4_CAPABLE = true -}; - -static struct link_encoder *dce112_link_encoder_create( - struct dc_context *ctx, - const struct encoder_init_data *enc_init_data) -{ - struct dce110_link_encoder *enc110 = - kzalloc(sizeof(struct dce110_link_encoder), GFP_KERNEL); - int link_regs_id; - - if (!enc110) - return NULL; - - link_regs_id = - map_transmitter_id_to_phy_instance(enc_init_data->transmitter); - - dce110_link_encoder_construct(enc110, - enc_init_data, - &link_enc_feature, - &link_enc_regs[link_regs_id], - &link_enc_aux_regs[enc_init_data->channel - 1], - &link_enc_hpd_regs[enc_init_data->hpd_source]); - return &enc110->base; -} - -static struct panel_cntl *dce112_panel_cntl_create(const struct panel_cntl_init_data *init_data) -{ - struct dce_panel_cntl *panel_cntl = - kzalloc(sizeof(struct dce_panel_cntl), GFP_KERNEL); - - if (!panel_cntl) - return NULL; - - dce_panel_cntl_construct(panel_cntl, - init_data, - &panel_cntl_regs[init_data->inst], - &panel_cntl_shift, - &panel_cntl_mask); - - return &panel_cntl->base; -} - -static struct input_pixel_processor *dce112_ipp_create( - struct dc_context *ctx, uint32_t inst) -{ - struct dce_ipp *ipp = kzalloc(sizeof(struct dce_ipp), GFP_KERNEL); - - if (!ipp) { - BREAK_TO_DEBUGGER(); - return NULL; - } - - dce_ipp_construct(ipp, ctx, inst, - &ipp_regs[inst], &ipp_shift, &ipp_mask); - return &ipp->base; -} - -static struct output_pixel_processor *dce112_opp_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dce110_opp *opp = - kzalloc(sizeof(struct dce110_opp), GFP_KERNEL); - - if (!opp) - return NULL; - - dce110_opp_construct(opp, - ctx, inst, &opp_regs[inst], &opp_shift, &opp_mask); - return &opp->base; -} - -static struct dce_aux *dce112_aux_engine_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct aux_engine_dce110 *aux_engine = - kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL); - - if (!aux_engine) - return NULL; - - dce110_aux_engine_construct(aux_engine, ctx, inst, - SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, - &aux_engine_regs[inst], - &aux_mask, - &aux_shift, - ctx->dc->caps.extended_aux_timeout_support); - - return &aux_engine->base; -} -#define i2c_inst_regs(id) { I2C_HW_ENGINE_COMMON_REG_LIST(id) } - -static const struct dce_i2c_registers i2c_hw_regs[] = { - i2c_inst_regs(1), - i2c_inst_regs(2), - i2c_inst_regs(3), - i2c_inst_regs(4), - i2c_inst_regs(5), - i2c_inst_regs(6), -}; - -static const struct dce_i2c_shift i2c_shifts = { - I2C_COMMON_MASK_SH_LIST_DCE110(__SHIFT) -}; - -static const struct dce_i2c_mask i2c_masks = { - I2C_COMMON_MASK_SH_LIST_DCE110(_MASK) -}; - -static struct dce_i2c_hw *dce112_i2c_hw_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dce_i2c_hw *dce_i2c_hw = - kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL); - - if (!dce_i2c_hw) - return NULL; - - dce112_i2c_hw_construct(dce_i2c_hw, ctx, inst, - &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks); - - return dce_i2c_hw; -} -static struct clock_source *dce112_clock_source_create( - struct dc_context *ctx, - struct dc_bios *bios, - enum clock_source_id id, - const struct dce110_clk_src_regs *regs, - bool dp_clk_src) -{ - struct dce110_clk_src *clk_src = - kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL); - - if (!clk_src) - return NULL; - - if (dce112_clk_src_construct(clk_src, ctx, bios, id, - regs, &cs_shift, &cs_mask)) { - clk_src->base.dp_clk_src = dp_clk_src; - return &clk_src->base; - } - - kfree(clk_src); - BREAK_TO_DEBUGGER(); - return NULL; -} - -static void dce112_clock_source_destroy(struct clock_source **clk_src) -{ - kfree(TO_DCE110_CLK_SRC(*clk_src)); - *clk_src = NULL; -} - -static void dce112_resource_destruct(struct dce110_resource_pool *pool) -{ - unsigned int i; - - for (i = 0; i < pool->base.pipe_count; i++) { - if (pool->base.opps[i] != NULL) - dce110_opp_destroy(&pool->base.opps[i]); - - if (pool->base.transforms[i] != NULL) - dce112_transform_destroy(&pool->base.transforms[i]); - - if (pool->base.ipps[i] != NULL) - dce_ipp_destroy(&pool->base.ipps[i]); - - if (pool->base.mis[i] != NULL) { - kfree(TO_DCE_MEM_INPUT(pool->base.mis[i])); - pool->base.mis[i] = NULL; - } - - if (pool->base.timing_generators[i] != NULL) { - kfree(DCE110TG_FROM_TG(pool->base.timing_generators[i])); - pool->base.timing_generators[i] = NULL; - } - } - - for (i = 0; i < pool->base.res_cap->num_ddc; i++) { - if (pool->base.engines[i] != NULL) - dce110_engine_destroy(&pool->base.engines[i]); - if (pool->base.hw_i2cs[i] != NULL) { - kfree(pool->base.hw_i2cs[i]); - pool->base.hw_i2cs[i] = NULL; - } - if (pool->base.sw_i2cs[i] != NULL) { - kfree(pool->base.sw_i2cs[i]); - pool->base.sw_i2cs[i] = NULL; - } - } - - for (i = 0; i < pool->base.stream_enc_count; i++) { - if (pool->base.stream_enc[i] != NULL) - kfree(DCE110STRENC_FROM_STRENC(pool->base.stream_enc[i])); - } - - for (i = 0; i < pool->base.clk_src_count; i++) { - if (pool->base.clock_sources[i] != NULL) { - dce112_clock_source_destroy(&pool->base.clock_sources[i]); - } - } - - if (pool->base.dp_clock_source != NULL) - dce112_clock_source_destroy(&pool->base.dp_clock_source); - - for (i = 0; i < pool->base.audio_count; i++) { - if (pool->base.audios[i] != NULL) { - dce_aud_destroy(&pool->base.audios[i]); - } - } - - if (pool->base.abm != NULL) - dce_abm_destroy(&pool->base.abm); - - if (pool->base.dmcu != NULL) - dce_dmcu_destroy(&pool->base.dmcu); - - if (pool->base.irqs != NULL) { - dal_irq_service_destroy(&pool->base.irqs); - } -} - -static struct clock_source *find_matching_pll( - struct resource_context *res_ctx, - const struct resource_pool *pool, - const struct dc_stream_state *const stream) -{ - switch (stream->link->link_enc->transmitter) { - case TRANSMITTER_UNIPHY_A: - return pool->clock_sources[DCE112_CLK_SRC_PLL0]; - case TRANSMITTER_UNIPHY_B: - return pool->clock_sources[DCE112_CLK_SRC_PLL1]; - case TRANSMITTER_UNIPHY_C: - return pool->clock_sources[DCE112_CLK_SRC_PLL2]; - case TRANSMITTER_UNIPHY_D: - return pool->clock_sources[DCE112_CLK_SRC_PLL3]; - case TRANSMITTER_UNIPHY_E: - return pool->clock_sources[DCE112_CLK_SRC_PLL4]; - case TRANSMITTER_UNIPHY_F: - return pool->clock_sources[DCE112_CLK_SRC_PLL5]; - default: - return NULL; - } - - return NULL; -} - -static enum dc_status build_mapped_resource( - const struct dc *dc, - struct dc_state *context, - struct dc_stream_state *stream) -{ - struct pipe_ctx *pipe_ctx = resource_get_otg_master_for_stream(&context->res_ctx, stream); - - if (!pipe_ctx) - return DC_ERROR_UNEXPECTED; - - dce110_resource_build_pipe_hw_param(pipe_ctx); - - resource_build_info_frame(pipe_ctx); - - return DC_OK; -} - -bool dce112_validate_bandwidth( - struct dc *dc, - struct dc_state *context, - bool fast_validate) -{ - bool result = false; - - DC_LOG_BANDWIDTH_CALCS( - "%s: start", - __func__); - - if (bw_calcs( - dc->ctx, - dc->bw_dceip, - dc->bw_vbios, - context->res_ctx.pipe_ctx, - dc->res_pool->pipe_count, - &context->bw_ctx.bw.dce)) - result = true; - - if (!result) - DC_LOG_BANDWIDTH_VALIDATION( - "%s: Bandwidth validation failed!", - __func__); - - if (memcmp(&dc->current_state->bw_ctx.bw.dce, - &context->bw_ctx.bw.dce, sizeof(context->bw_ctx.bw.dce))) { - - DC_LOG_BANDWIDTH_CALCS( - "%s: finish,\n" - "nbpMark_b: %d nbpMark_a: %d urgentMark_b: %d urgentMark_a: %d\n" - "stutMark_b: %d stutMark_a: %d\n" - "nbpMark_b: %d nbpMark_a: %d urgentMark_b: %d urgentMark_a: %d\n" - "stutMark_b: %d stutMark_a: %d\n" - "nbpMark_b: %d nbpMark_a: %d urgentMark_b: %d urgentMark_a: %d\n" - "stutMark_b: %d stutMark_a: %d stutter_mode_enable: %d\n" - "cstate: %d pstate: %d nbpstate: %d sync: %d dispclk: %d\n" - "sclk: %d sclk_sleep: %d yclk: %d blackout_recovery_time_us: %d\n" - , - __func__, - context->bw_ctx.bw.dce.nbp_state_change_wm_ns[0].b_mark, - context->bw_ctx.bw.dce.nbp_state_change_wm_ns[0].a_mark, - context->bw_ctx.bw.dce.urgent_wm_ns[0].b_mark, - context->bw_ctx.bw.dce.urgent_wm_ns[0].a_mark, - context->bw_ctx.bw.dce.stutter_exit_wm_ns[0].b_mark, - context->bw_ctx.bw.dce.stutter_exit_wm_ns[0].a_mark, - context->bw_ctx.bw.dce.nbp_state_change_wm_ns[1].b_mark, - context->bw_ctx.bw.dce.nbp_state_change_wm_ns[1].a_mark, - context->bw_ctx.bw.dce.urgent_wm_ns[1].b_mark, - context->bw_ctx.bw.dce.urgent_wm_ns[1].a_mark, - context->bw_ctx.bw.dce.stutter_exit_wm_ns[1].b_mark, - context->bw_ctx.bw.dce.stutter_exit_wm_ns[1].a_mark, - context->bw_ctx.bw.dce.nbp_state_change_wm_ns[2].b_mark, - context->bw_ctx.bw.dce.nbp_state_change_wm_ns[2].a_mark, - context->bw_ctx.bw.dce.urgent_wm_ns[2].b_mark, - context->bw_ctx.bw.dce.urgent_wm_ns[2].a_mark, - context->bw_ctx.bw.dce.stutter_exit_wm_ns[2].b_mark, - context->bw_ctx.bw.dce.stutter_exit_wm_ns[2].a_mark, - context->bw_ctx.bw.dce.stutter_mode_enable, - context->bw_ctx.bw.dce.cpuc_state_change_enable, - context->bw_ctx.bw.dce.cpup_state_change_enable, - context->bw_ctx.bw.dce.nbp_state_change_enable, - context->bw_ctx.bw.dce.all_displays_in_sync, - context->bw_ctx.bw.dce.dispclk_khz, - context->bw_ctx.bw.dce.sclk_khz, - context->bw_ctx.bw.dce.sclk_deep_sleep_khz, - context->bw_ctx.bw.dce.yclk_khz, - context->bw_ctx.bw.dce.blackout_recovery_time_us); - } - return result; -} - -enum dc_status resource_map_phy_clock_resources( - const struct dc *dc, - struct dc_state *context, - struct dc_stream_state *stream) -{ - - /* acquire new resources */ - struct pipe_ctx *pipe_ctx = resource_get_otg_master_for_stream( - &context->res_ctx, stream); - - if (!pipe_ctx) - return DC_ERROR_UNEXPECTED; - - if (dc_is_dp_signal(pipe_ctx->stream->signal) - || dc_is_virtual_signal(pipe_ctx->stream->signal)) - pipe_ctx->clock_source = - dc->res_pool->dp_clock_source; - else { - if (stream && stream->link && stream->link->link_enc) - pipe_ctx->clock_source = find_matching_pll( - &context->res_ctx, dc->res_pool, - stream); - } - - if (pipe_ctx->clock_source == NULL) - return DC_NO_CLOCK_SOURCE_RESOURCE; - - resource_reference_clock_source( - &context->res_ctx, - dc->res_pool, - pipe_ctx->clock_source); - - return DC_OK; -} - -static bool dce112_validate_surface_sets( - struct dc_state *context) -{ - int i; - - for (i = 0; i < context->stream_count; i++) { - if (context->stream_status[i].plane_count == 0) - continue; - - if (context->stream_status[i].plane_count > 1) - return false; - - if (context->stream_status[i].plane_states[0]->format - >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) - return false; - } - - return true; -} - -enum dc_status dce112_add_stream_to_ctx( - struct dc *dc, - struct dc_state *new_ctx, - struct dc_stream_state *dc_stream) -{ - enum dc_status result; - - result = resource_map_pool_resources(dc, new_ctx, dc_stream); - - if (result == DC_OK) - result = resource_map_phy_clock_resources(dc, new_ctx, dc_stream); - - - if (result == DC_OK) - result = build_mapped_resource(dc, new_ctx, dc_stream); - - return result; -} - -static enum dc_status dce112_validate_global( - struct dc *dc, - struct dc_state *context) -{ - if (!dce112_validate_surface_sets(context)) - return DC_FAIL_SURFACE_VALIDATE; - - return DC_OK; -} - -static void dce112_destroy_resource_pool(struct resource_pool **pool) -{ - struct dce110_resource_pool *dce110_pool = TO_DCE110_RES_POOL(*pool); - - dce112_resource_destruct(dce110_pool); - kfree(dce110_pool); - *pool = NULL; -} - -static const struct resource_funcs dce112_res_pool_funcs = { - .destroy = dce112_destroy_resource_pool, - .link_enc_create = dce112_link_encoder_create, - .panel_cntl_create = dce112_panel_cntl_create, - .validate_bandwidth = dce112_validate_bandwidth, - .validate_plane = dce100_validate_plane, - .add_stream_to_ctx = dce112_add_stream_to_ctx, - .validate_global = dce112_validate_global, - .find_first_free_match_stream_enc_for_link = dce110_find_first_free_match_stream_enc_for_link -}; - -static void bw_calcs_data_update_from_pplib(struct dc *dc) -{ - struct dm_pp_clock_levels_with_latency eng_clks = {0}; - struct dm_pp_clock_levels_with_latency mem_clks = {0}; - struct dm_pp_wm_sets_with_clock_ranges clk_ranges = {0}; - struct dm_pp_clock_levels clks = {0}; - int memory_type_multiplier = MEMORY_TYPE_MULTIPLIER_CZ; - - if (dc->bw_vbios && dc->bw_vbios->memory_type == bw_def_hbm) - memory_type_multiplier = MEMORY_TYPE_HBM; - - /*do system clock TODO PPLIB: after PPLIB implement, - * then remove old way - */ - if (!dm_pp_get_clock_levels_by_type_with_latency( - dc->ctx, - DM_PP_CLOCK_TYPE_ENGINE_CLK, - &eng_clks)) { - - /* This is only for temporary */ - dm_pp_get_clock_levels_by_type( - dc->ctx, - DM_PP_CLOCK_TYPE_ENGINE_CLK, - &clks); - /* convert all the clock fro kHz to fix point mHz */ - dc->bw_vbios->high_sclk = bw_frc_to_fixed( - clks.clocks_in_khz[clks.num_levels-1], 1000); - dc->bw_vbios->mid1_sclk = bw_frc_to_fixed( - clks.clocks_in_khz[clks.num_levels/8], 1000); - dc->bw_vbios->mid2_sclk = bw_frc_to_fixed( - clks.clocks_in_khz[clks.num_levels*2/8], 1000); - dc->bw_vbios->mid3_sclk = bw_frc_to_fixed( - clks.clocks_in_khz[clks.num_levels*3/8], 1000); - dc->bw_vbios->mid4_sclk = bw_frc_to_fixed( - clks.clocks_in_khz[clks.num_levels*4/8], 1000); - dc->bw_vbios->mid5_sclk = bw_frc_to_fixed( - clks.clocks_in_khz[clks.num_levels*5/8], 1000); - dc->bw_vbios->mid6_sclk = bw_frc_to_fixed( - clks.clocks_in_khz[clks.num_levels*6/8], 1000); - dc->bw_vbios->low_sclk = bw_frc_to_fixed( - clks.clocks_in_khz[0], 1000); - - /*do memory clock*/ - dm_pp_get_clock_levels_by_type( - dc->ctx, - DM_PP_CLOCK_TYPE_MEMORY_CLK, - &clks); - - dc->bw_vbios->low_yclk = bw_frc_to_fixed( - clks.clocks_in_khz[0] * memory_type_multiplier, 1000); - dc->bw_vbios->mid_yclk = bw_frc_to_fixed( - clks.clocks_in_khz[clks.num_levels>>1] * memory_type_multiplier, - 1000); - dc->bw_vbios->high_yclk = bw_frc_to_fixed( - clks.clocks_in_khz[clks.num_levels-1] * memory_type_multiplier, - 1000); - - return; - } - - /* convert all the clock fro kHz to fix point mHz TODO: wloop data */ - dc->bw_vbios->high_sclk = bw_frc_to_fixed( - eng_clks.data[eng_clks.num_levels-1].clocks_in_khz, 1000); - dc->bw_vbios->mid1_sclk = bw_frc_to_fixed( - eng_clks.data[eng_clks.num_levels/8].clocks_in_khz, 1000); - dc->bw_vbios->mid2_sclk = bw_frc_to_fixed( - eng_clks.data[eng_clks.num_levels*2/8].clocks_in_khz, 1000); - dc->bw_vbios->mid3_sclk = bw_frc_to_fixed( - eng_clks.data[eng_clks.num_levels*3/8].clocks_in_khz, 1000); - dc->bw_vbios->mid4_sclk = bw_frc_to_fixed( - eng_clks.data[eng_clks.num_levels*4/8].clocks_in_khz, 1000); - dc->bw_vbios->mid5_sclk = bw_frc_to_fixed( - eng_clks.data[eng_clks.num_levels*5/8].clocks_in_khz, 1000); - dc->bw_vbios->mid6_sclk = bw_frc_to_fixed( - eng_clks.data[eng_clks.num_levels*6/8].clocks_in_khz, 1000); - dc->bw_vbios->low_sclk = bw_frc_to_fixed( - eng_clks.data[0].clocks_in_khz, 1000); - - /*do memory clock*/ - dm_pp_get_clock_levels_by_type_with_latency( - dc->ctx, - DM_PP_CLOCK_TYPE_MEMORY_CLK, - &mem_clks); - - /* we don't need to call PPLIB for validation clock since they - * also give us the highest sclk and highest mclk (UMA clock). - * ALSO always convert UMA clock (from PPLIB) to YCLK (HW formula): - * YCLK = UMACLK*m_memoryTypeMultiplier - */ - dc->bw_vbios->low_yclk = bw_frc_to_fixed( - mem_clks.data[0].clocks_in_khz * memory_type_multiplier, 1000); - dc->bw_vbios->mid_yclk = bw_frc_to_fixed( - mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz * memory_type_multiplier, - 1000); - dc->bw_vbios->high_yclk = bw_frc_to_fixed( - mem_clks.data[mem_clks.num_levels-1].clocks_in_khz * memory_type_multiplier, - 1000); - - /* Now notify PPLib/SMU about which Watermarks sets they should select - * depending on DPM state they are in. And update BW MGR GFX Engine and - * Memory clock member variables for Watermarks calculations for each - * Watermark Set - */ - clk_ranges.num_wm_sets = 4; - clk_ranges.wm_clk_ranges[0].wm_set_id = WM_SET_A; - clk_ranges.wm_clk_ranges[0].wm_min_eng_clk_in_khz = - eng_clks.data[0].clocks_in_khz; - clk_ranges.wm_clk_ranges[0].wm_max_eng_clk_in_khz = - eng_clks.data[eng_clks.num_levels*3/8].clocks_in_khz - 1; - clk_ranges.wm_clk_ranges[0].wm_min_mem_clk_in_khz = - mem_clks.data[0].clocks_in_khz; - clk_ranges.wm_clk_ranges[0].wm_max_mem_clk_in_khz = - mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz - 1; - - clk_ranges.wm_clk_ranges[1].wm_set_id = WM_SET_B; - clk_ranges.wm_clk_ranges[1].wm_min_eng_clk_in_khz = - eng_clks.data[eng_clks.num_levels*3/8].clocks_in_khz; - /* 5 GHz instead of data[7].clockInKHz to cover Overdrive */ - clk_ranges.wm_clk_ranges[1].wm_max_eng_clk_in_khz = 5000000; - clk_ranges.wm_clk_ranges[1].wm_min_mem_clk_in_khz = - mem_clks.data[0].clocks_in_khz; - clk_ranges.wm_clk_ranges[1].wm_max_mem_clk_in_khz = - mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz - 1; - - clk_ranges.wm_clk_ranges[2].wm_set_id = WM_SET_C; - clk_ranges.wm_clk_ranges[2].wm_min_eng_clk_in_khz = - eng_clks.data[0].clocks_in_khz; - clk_ranges.wm_clk_ranges[2].wm_max_eng_clk_in_khz = - eng_clks.data[eng_clks.num_levels*3/8].clocks_in_khz - 1; - clk_ranges.wm_clk_ranges[2].wm_min_mem_clk_in_khz = - mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz; - /* 5 GHz instead of data[2].clockInKHz to cover Overdrive */ - clk_ranges.wm_clk_ranges[2].wm_max_mem_clk_in_khz = 5000000; - - clk_ranges.wm_clk_ranges[3].wm_set_id = WM_SET_D; - clk_ranges.wm_clk_ranges[3].wm_min_eng_clk_in_khz = - eng_clks.data[eng_clks.num_levels*3/8].clocks_in_khz; - /* 5 GHz instead of data[7].clockInKHz to cover Overdrive */ - clk_ranges.wm_clk_ranges[3].wm_max_eng_clk_in_khz = 5000000; - clk_ranges.wm_clk_ranges[3].wm_min_mem_clk_in_khz = - mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz; - /* 5 GHz instead of data[2].clockInKHz to cover Overdrive */ - clk_ranges.wm_clk_ranges[3].wm_max_mem_clk_in_khz = 5000000; - - /* Notify PP Lib/SMU which Watermarks to use for which clock ranges */ - dm_pp_notify_wm_clock_changes(dc->ctx, &clk_ranges); -} - -static const struct resource_caps *dce112_resource_cap( - struct hw_asic_id *asic_id) -{ - if (ASIC_REV_IS_POLARIS11_M(asic_id->hw_internal_rev) || - ASIC_REV_IS_POLARIS12_V(asic_id->hw_internal_rev)) - return &polaris_11_resource_cap; - else - return &polaris_10_resource_cap; -} - -static bool dce112_resource_construct( - uint8_t num_virtual_links, - struct dc *dc, - struct dce110_resource_pool *pool) -{ - unsigned int i; - struct dc_context *ctx = dc->ctx; - - ctx->dc_bios->regs = &bios_regs; - - pool->base.res_cap = dce112_resource_cap(&ctx->asic_id); - pool->base.funcs = &dce112_res_pool_funcs; - - /************************************************* - * Resource + asic cap harcoding * - *************************************************/ - pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE; - pool->base.pipe_count = pool->base.res_cap->num_timing_generator; - pool->base.timing_generator_count = pool->base.res_cap->num_timing_generator; - dc->caps.max_downscale_ratio = 200; - dc->caps.i2c_speed_in_khz = 100; - dc->caps.i2c_speed_in_khz_hdcp = 100; /*1.4 w/a not applied by default*/ - dc->caps.max_cursor_size = 128; - dc->caps.min_horizontal_blanking_period = 80; - dc->caps.dual_link_dvi = true; - dc->caps.extended_aux_timeout_support = false; - dc->debug = debug_defaults; - - /************************************************* - * Create resources * - *************************************************/ - - pool->base.clock_sources[DCE112_CLK_SRC_PLL0] = - dce112_clock_source_create( - ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL0, - &clk_src_regs[0], false); - pool->base.clock_sources[DCE112_CLK_SRC_PLL1] = - dce112_clock_source_create( - ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL1, - &clk_src_regs[1], false); - pool->base.clock_sources[DCE112_CLK_SRC_PLL2] = - dce112_clock_source_create( - ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL2, - &clk_src_regs[2], false); - pool->base.clock_sources[DCE112_CLK_SRC_PLL3] = - dce112_clock_source_create( - ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL3, - &clk_src_regs[3], false); - pool->base.clock_sources[DCE112_CLK_SRC_PLL4] = - dce112_clock_source_create( - ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL4, - &clk_src_regs[4], false); - pool->base.clock_sources[DCE112_CLK_SRC_PLL5] = - dce112_clock_source_create( - ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL5, - &clk_src_regs[5], false); - pool->base.clk_src_count = DCE112_CLK_SRC_TOTAL; - - pool->base.dp_clock_source = dce112_clock_source_create( - ctx, ctx->dc_bios, - CLOCK_SOURCE_ID_DP_DTO, &clk_src_regs[0], true); - - - for (i = 0; i < pool->base.clk_src_count; i++) { - if (pool->base.clock_sources[i] == NULL) { - dm_error("DC: failed to create clock sources!\n"); - BREAK_TO_DEBUGGER(); - goto res_create_fail; - } - } - - pool->base.dmcu = dce_dmcu_create(ctx, - &dmcu_regs, - &dmcu_shift, - &dmcu_mask); - if (pool->base.dmcu == NULL) { - dm_error("DC: failed to create dmcu!\n"); - BREAK_TO_DEBUGGER(); - goto res_create_fail; - } - - pool->base.abm = dce_abm_create(ctx, - &abm_regs, - &abm_shift, - &abm_mask); - if (pool->base.abm == NULL) { - dm_error("DC: failed to create abm!\n"); - BREAK_TO_DEBUGGER(); - goto res_create_fail; - } - - { - struct irq_service_init_data init_data; - init_data.ctx = dc->ctx; - pool->base.irqs = dal_irq_service_dce110_create(&init_data); - if (!pool->base.irqs) - goto res_create_fail; - } - - for (i = 0; i < pool->base.pipe_count; i++) { - pool->base.timing_generators[i] = - dce112_timing_generator_create( - ctx, - i, - &dce112_tg_offsets[i]); - if (pool->base.timing_generators[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create tg!\n"); - goto res_create_fail; - } - - pool->base.mis[i] = dce112_mem_input_create(ctx, i); - if (pool->base.mis[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create memory input!\n"); - goto res_create_fail; - } - - pool->base.ipps[i] = dce112_ipp_create(ctx, i); - if (pool->base.ipps[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC:failed to create input pixel processor!\n"); - goto res_create_fail; - } - - pool->base.transforms[i] = dce112_transform_create(ctx, i); - if (pool->base.transforms[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create transform!\n"); - goto res_create_fail; - } - - pool->base.opps[i] = dce112_opp_create( - ctx, - i); - if (pool->base.opps[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC:failed to create output pixel processor!\n"); - goto res_create_fail; - } - } - - for (i = 0; i < pool->base.res_cap->num_ddc; i++) { - pool->base.engines[i] = dce112_aux_engine_create(ctx, i); - if (pool->base.engines[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC:failed to create aux engine!!\n"); - goto res_create_fail; - } - pool->base.hw_i2cs[i] = dce112_i2c_hw_create(ctx, i); - if (pool->base.hw_i2cs[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC:failed to create i2c engine!!\n"); - goto res_create_fail; - } - pool->base.sw_i2cs[i] = NULL; - } - - if (!resource_construct(num_virtual_links, dc, &pool->base, - &res_create_funcs)) - goto res_create_fail; - - dc->caps.max_planes = pool->base.pipe_count; - - for (i = 0; i < dc->caps.max_planes; ++i) - dc->caps.planes[i] = plane_cap; - - /* Create hardware sequencer */ - dce112_hw_sequencer_construct(dc); - - bw_calcs_init(dc->bw_dceip, dc->bw_vbios, dc->ctx->asic_id); - - bw_calcs_data_update_from_pplib(dc); - - return true; - -res_create_fail: - dce112_resource_destruct(pool); - return false; -} - -struct resource_pool *dce112_create_resource_pool( - uint8_t num_virtual_links, - struct dc *dc) -{ - struct dce110_resource_pool *pool = - kzalloc(sizeof(struct dce110_resource_pool), GFP_KERNEL); - - if (!pool) - return NULL; - - if (dce112_resource_construct(num_virtual_links, dc, pool)) - return &pool->base; - - kfree(pool); - BREAK_TO_DEBUGGER(); - return NULL; -} diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.h b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.h deleted file mode 100644 index 1f57ebc6f9b4..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.h +++ /dev/null @@ -1,57 +0,0 @@ -/* -* Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DC_RESOURCE_DCE112_H__ -#define __DC_RESOURCE_DCE112_H__ - -#include "core_types.h" - -struct dc; -struct resource_pool; - -struct resource_pool *dce112_create_resource_pool( - uint8_t num_virtual_links, - struct dc *dc); - -enum dc_status dce112_validate_with_context( - struct dc *dc, - const struct dc_validation_set set[], - int set_count, - struct dc_state *context, - struct dc_state *old_context); - -bool dce112_validate_bandwidth( - struct dc *dc, - struct dc_state *context, - bool fast_validate); - -enum dc_status dce112_add_stream_to_ctx( - struct dc *dc, - struct dc_state *new_ctx, - struct dc_stream_state *dc_stream); - - -#endif /* __DC_RESOURCE_DCE112_H__ */ - diff --git a/drivers/gpu/drm/amd/display/dc/dce120/Makefile b/drivers/gpu/drm/amd/display/dc/dce120/Makefile index 097cf407a15d..1e3ef68a452a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dce120/Makefile @@ -26,7 +26,7 @@ CFLAGS_$(AMDDALPATH)/dc/dce120/dce120_resource.o = $(call cc-disable-warning, override-init) -DCE120 = dce120_resource.o dce120_timing_generator.o \ +DCE120 = dce120_timing_generator.o AMD_DAL_DCE120 = $(addprefix $(AMDDALPATH)/dc/dce120/,$(DCE120)) diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c deleted file mode 100644 index 962de79be169..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c +++ /dev/null @@ -1,1288 +0,0 @@ -/* -* Copyright 2012-15 Advanced Micro Devices, Inc.cls -* - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dm_services.h" - - -#include "stream_encoder.h" -#include "resource.h" -#include "include/irq_service_interface.h" -#include "dce120_resource.h" - -#include "dce112/dce112_resource.h" - -#include "dce110/dce110_resource.h" -#include "virtual/virtual_stream_encoder.h" -#include "dce120_timing_generator.h" -#include "irq/dce120/irq_service_dce120.h" -#include "dce/dce_opp.h" -#include "dce/dce_clock_source.h" -#include "dce/dce_ipp.h" -#include "dce/dce_mem_input.h" -#include "dce/dce_panel_cntl.h" - -#include "dce110/dce110_hwseq.h" -#include "dce120/dce120_hwseq.h" -#include "dce/dce_transform.h" -#include "clk_mgr.h" -#include "dce/dce_audio.h" -#include "dce/dce_link_encoder.h" -#include "dce/dce_stream_encoder.h" -#include "dce/dce_hwseq.h" -#include "dce/dce_abm.h" -#include "dce/dce_dmcu.h" -#include "dce/dce_aux.h" -#include "dce/dce_i2c.h" - -#include "dce/dce_12_0_offset.h" -#include "dce/dce_12_0_sh_mask.h" -#include "soc15_hw_ip.h" -#include "vega10_ip_offset.h" -#include "nbio/nbio_6_1_offset.h" -#include "mmhub/mmhub_1_0_offset.h" -#include "mmhub/mmhub_1_0_sh_mask.h" -#include "reg_helper.h" - -#include "dce100/dce100_resource.h" - -#ifndef mmDP0_DP_DPHY_INTERNAL_CTRL - #define mmDP0_DP_DPHY_INTERNAL_CTRL 0x210f - #define mmDP0_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2 - #define mmDP1_DP_DPHY_INTERNAL_CTRL 0x220f - #define mmDP1_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2 - #define mmDP2_DP_DPHY_INTERNAL_CTRL 0x230f - #define mmDP2_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2 - #define mmDP3_DP_DPHY_INTERNAL_CTRL 0x240f - #define mmDP3_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2 - #define mmDP4_DP_DPHY_INTERNAL_CTRL 0x250f - #define mmDP4_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2 - #define mmDP5_DP_DPHY_INTERNAL_CTRL 0x260f - #define mmDP5_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2 - #define mmDP6_DP_DPHY_INTERNAL_CTRL 0x270f - #define mmDP6_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2 -#endif - -enum dce120_clk_src_array_id { - DCE120_CLK_SRC_PLL0, - DCE120_CLK_SRC_PLL1, - DCE120_CLK_SRC_PLL2, - DCE120_CLK_SRC_PLL3, - DCE120_CLK_SRC_PLL4, - DCE120_CLK_SRC_PLL5, - - DCE120_CLK_SRC_TOTAL -}; - -static const struct dce110_timing_generator_offsets dce120_tg_offsets[] = { - { - .crtc = (mmCRTC0_CRTC_CONTROL - mmCRTC0_CRTC_CONTROL), - }, - { - .crtc = (mmCRTC1_CRTC_CONTROL - mmCRTC0_CRTC_CONTROL), - }, - { - .crtc = (mmCRTC2_CRTC_CONTROL - mmCRTC0_CRTC_CONTROL), - }, - { - .crtc = (mmCRTC3_CRTC_CONTROL - mmCRTC0_CRTC_CONTROL), - }, - { - .crtc = (mmCRTC4_CRTC_CONTROL - mmCRTC0_CRTC_CONTROL), - }, - { - .crtc = (mmCRTC5_CRTC_CONTROL - mmCRTC0_CRTC_CONTROL), - } -}; - -/* begin ********************* - * macros to expend register list macro defined in HW object header file */ - -#define BASE_INNER(seg) \ - DCE_BASE__INST0_SEG ## seg - -#define NBIO_BASE_INNER(seg) \ - NBIF_BASE__INST0_SEG ## seg - -#define NBIO_BASE(seg) \ - NBIO_BASE_INNER(seg) - -/* compile time expand base address. */ -#define BASE(seg) \ - BASE_INNER(seg) - -#define SR(reg_name)\ - .reg_name = BASE(mm ## reg_name ## _BASE_IDX) + \ - mm ## reg_name - -#define SRI(reg_name, block, id)\ - .reg_name = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - mm ## block ## id ## _ ## reg_name - -/* MMHUB */ -#define MMHUB_BASE_INNER(seg) \ - MMHUB_BASE__INST0_SEG ## seg - -#define MMHUB_BASE(seg) \ - MMHUB_BASE_INNER(seg) - -#define MMHUB_SR(reg_name)\ - .reg_name = MMHUB_BASE(mm ## reg_name ## _BASE_IDX) + \ - mm ## reg_name - -/* macros to expend register list macro defined in HW object header file - * end *********************/ - - -static const struct dce_dmcu_registers dmcu_regs = { - DMCU_DCE110_COMMON_REG_LIST() -}; - -static const struct dce_dmcu_shift dmcu_shift = { - DMCU_MASK_SH_LIST_DCE110(__SHIFT) -}; - -static const struct dce_dmcu_mask dmcu_mask = { - DMCU_MASK_SH_LIST_DCE110(_MASK) -}; - -static const struct dce_abm_registers abm_regs = { - ABM_DCE110_COMMON_REG_LIST() -}; - -static const struct dce_abm_shift abm_shift = { - ABM_MASK_SH_LIST_DCE110(__SHIFT) -}; - -static const struct dce_abm_mask abm_mask = { - ABM_MASK_SH_LIST_DCE110(_MASK) -}; - -#define ipp_regs(id)\ -[id] = {\ - IPP_DCE110_REG_LIST_DCE_BASE(id)\ -} - -static const struct dce_ipp_registers ipp_regs[] = { - ipp_regs(0), - ipp_regs(1), - ipp_regs(2), - ipp_regs(3), - ipp_regs(4), - ipp_regs(5) -}; - -static const struct dce_ipp_shift ipp_shift = { - IPP_DCE120_MASK_SH_LIST_SOC_BASE(__SHIFT) -}; - -static const struct dce_ipp_mask ipp_mask = { - IPP_DCE120_MASK_SH_LIST_SOC_BASE(_MASK) -}; - -#define transform_regs(id)\ -[id] = {\ - XFM_COMMON_REG_LIST_DCE110(id)\ -} - -static const struct dce_transform_registers xfm_regs[] = { - transform_regs(0), - transform_regs(1), - transform_regs(2), - transform_regs(3), - transform_regs(4), - transform_regs(5) -}; - -static const struct dce_transform_shift xfm_shift = { - XFM_COMMON_MASK_SH_LIST_SOC_BASE(__SHIFT) -}; - -static const struct dce_transform_mask xfm_mask = { - XFM_COMMON_MASK_SH_LIST_SOC_BASE(_MASK) -}; - -#define aux_regs(id)\ -[id] = {\ - AUX_REG_LIST(id)\ -} - -static const struct dce110_link_enc_aux_registers link_enc_aux_regs[] = { - aux_regs(0), - aux_regs(1), - aux_regs(2), - aux_regs(3), - aux_regs(4), - aux_regs(5) -}; - -#define hpd_regs(id)\ -[id] = {\ - HPD_REG_LIST(id)\ -} - -static const struct dce110_link_enc_hpd_registers link_enc_hpd_regs[] = { - hpd_regs(0), - hpd_regs(1), - hpd_regs(2), - hpd_regs(3), - hpd_regs(4), - hpd_regs(5) -}; - -#define link_regs(id)\ -[id] = {\ - LE_DCE120_REG_LIST(id), \ - SRI(DP_DPHY_INTERNAL_CTRL, DP, id) \ -} - -static const struct dce110_link_enc_registers link_enc_regs[] = { - link_regs(0), - link_regs(1), - link_regs(2), - link_regs(3), - link_regs(4), - link_regs(5), - link_regs(6), -}; - - -#define stream_enc_regs(id)\ -[id] = {\ - SE_COMMON_REG_LIST(id),\ - .TMDS_CNTL = 0,\ -} - -static const struct dce110_stream_enc_registers stream_enc_regs[] = { - stream_enc_regs(0), - stream_enc_regs(1), - stream_enc_regs(2), - stream_enc_regs(3), - stream_enc_regs(4), - stream_enc_regs(5) -}; - -static const struct dce_stream_encoder_shift se_shift = { - SE_COMMON_MASK_SH_LIST_DCE120(__SHIFT) -}; - -static const struct dce_stream_encoder_mask se_mask = { - SE_COMMON_MASK_SH_LIST_DCE120(_MASK) -}; - -static const struct dce_panel_cntl_registers panel_cntl_regs[] = { - { DCE_PANEL_CNTL_REG_LIST() } -}; - -static const struct dce_panel_cntl_shift panel_cntl_shift = { - DCE_PANEL_CNTL_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_panel_cntl_mask panel_cntl_mask = { - DCE_PANEL_CNTL_MASK_SH_LIST(_MASK) -}; - -static const struct dce110_aux_registers_shift aux_shift = { - DCE12_AUX_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce110_aux_registers_mask aux_mask = { - DCE12_AUX_MASK_SH_LIST(_MASK) -}; - -#define opp_regs(id)\ -[id] = {\ - OPP_DCE_120_REG_LIST(id),\ -} - -static const struct dce_opp_registers opp_regs[] = { - opp_regs(0), - opp_regs(1), - opp_regs(2), - opp_regs(3), - opp_regs(4), - opp_regs(5) -}; - -static const struct dce_opp_shift opp_shift = { - OPP_COMMON_MASK_SH_LIST_DCE_120(__SHIFT) -}; - -static const struct dce_opp_mask opp_mask = { - OPP_COMMON_MASK_SH_LIST_DCE_120(_MASK) -}; - #define aux_engine_regs(id)\ -[id] = {\ - AUX_COMMON_REG_LIST(id), \ - .AUX_RESET_MASK = 0 \ -} - -static const struct dce110_aux_registers aux_engine_regs[] = { - aux_engine_regs(0), - aux_engine_regs(1), - aux_engine_regs(2), - aux_engine_regs(3), - aux_engine_regs(4), - aux_engine_regs(5) -}; - -#define audio_regs(id)\ -[id] = {\ - AUD_COMMON_REG_LIST(id)\ -} - -static const struct dce_audio_registers audio_regs[] = { - audio_regs(0), - audio_regs(1), - audio_regs(2), - audio_regs(3), - audio_regs(4), - audio_regs(5), - audio_regs(6), -}; - -#define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\ - SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_INDEX, AZALIA_ENDPOINT_REG_INDEX, mask_sh),\ - SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_DATA, AZALIA_ENDPOINT_REG_DATA, mask_sh),\ - AUD_COMMON_MASK_SH_LIST_BASE(mask_sh) - -static const struct dce_audio_shift audio_shift = { - DCE120_AUD_COMMON_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_audio_mask audio_mask = { - DCE120_AUD_COMMON_MASK_SH_LIST(_MASK) -}; - -static int map_transmitter_id_to_phy_instance( - enum transmitter transmitter) -{ - switch (transmitter) { - case TRANSMITTER_UNIPHY_A: - return 0; - case TRANSMITTER_UNIPHY_B: - return 1; - case TRANSMITTER_UNIPHY_C: - return 2; - case TRANSMITTER_UNIPHY_D: - return 3; - case TRANSMITTER_UNIPHY_E: - return 4; - case TRANSMITTER_UNIPHY_F: - return 5; - case TRANSMITTER_UNIPHY_G: - return 6; - default: - ASSERT(0); - return 0; - } -} - -#define clk_src_regs(index, id)\ -[index] = {\ - CS_COMMON_REG_LIST_DCE_112(id),\ -} - -static const struct dce110_clk_src_regs clk_src_regs[] = { - clk_src_regs(0, A), - clk_src_regs(1, B), - clk_src_regs(2, C), - clk_src_regs(3, D), - clk_src_regs(4, E), - clk_src_regs(5, F) -}; - -static const struct dce110_clk_src_shift cs_shift = { - CS_COMMON_MASK_SH_LIST_DCE_112(__SHIFT) -}; - -static const struct dce110_clk_src_mask cs_mask = { - CS_COMMON_MASK_SH_LIST_DCE_112(_MASK) -}; - -static struct output_pixel_processor *dce120_opp_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dce110_opp *opp = - kzalloc(sizeof(struct dce110_opp), GFP_KERNEL); - - if (!opp) - return NULL; - - dce110_opp_construct(opp, - ctx, inst, &opp_regs[inst], &opp_shift, &opp_mask); - return &opp->base; -} -static struct dce_aux *dce120_aux_engine_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct aux_engine_dce110 *aux_engine = - kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL); - - if (!aux_engine) - return NULL; - - dce110_aux_engine_construct(aux_engine, ctx, inst, - SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, - &aux_engine_regs[inst], - &aux_mask, - &aux_shift, - ctx->dc->caps.extended_aux_timeout_support); - - return &aux_engine->base; -} -#define i2c_inst_regs(id) { I2C_HW_ENGINE_COMMON_REG_LIST(id) } - -static const struct dce_i2c_registers i2c_hw_regs[] = { - i2c_inst_regs(1), - i2c_inst_regs(2), - i2c_inst_regs(3), - i2c_inst_regs(4), - i2c_inst_regs(5), - i2c_inst_regs(6), -}; - -static const struct dce_i2c_shift i2c_shifts = { - I2C_COMMON_MASK_SH_LIST_DCE110(__SHIFT) -}; - -static const struct dce_i2c_mask i2c_masks = { - I2C_COMMON_MASK_SH_LIST_DCE110(_MASK) -}; - -static struct dce_i2c_hw *dce120_i2c_hw_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dce_i2c_hw *dce_i2c_hw = - kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL); - - if (!dce_i2c_hw) - return NULL; - - dce112_i2c_hw_construct(dce_i2c_hw, ctx, inst, - &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks); - - return dce_i2c_hw; -} -static const struct bios_registers bios_regs = { - .BIOS_SCRATCH_3 = mmBIOS_SCRATCH_3 + NBIO_BASE(mmBIOS_SCRATCH_3_BASE_IDX), - .BIOS_SCRATCH_6 = mmBIOS_SCRATCH_6 + NBIO_BASE(mmBIOS_SCRATCH_6_BASE_IDX) -}; - -static const struct resource_caps res_cap = { - .num_timing_generator = 6, - .num_audio = 7, - .num_stream_encoder = 6, - .num_pll = 6, - .num_ddc = 6, -}; - -static const struct dc_plane_cap plane_cap = { - .type = DC_PLANE_TYPE_DCE_RGB, - - .pixel_format_support = { - .argb8888 = true, - .nv12 = false, - .fp16 = true - }, - - .max_upscale_factor = { - .argb8888 = 16000, - .nv12 = 1, - .fp16 = 1 - }, - - .max_downscale_factor = { - .argb8888 = 250, - .nv12 = 1, - .fp16 = 1 - } -}; - -static const struct dc_debug_options debug_defaults = { - .disable_clock_gate = true, - .enable_legacy_fast_update = true, -}; - -static struct clock_source *dce120_clock_source_create( - struct dc_context *ctx, - struct dc_bios *bios, - enum clock_source_id id, - const struct dce110_clk_src_regs *regs, - bool dp_clk_src) -{ - struct dce110_clk_src *clk_src = - kzalloc(sizeof(*clk_src), GFP_KERNEL); - - if (!clk_src) - return NULL; - - if (dce112_clk_src_construct(clk_src, ctx, bios, id, - regs, &cs_shift, &cs_mask)) { - clk_src->base.dp_clk_src = dp_clk_src; - return &clk_src->base; - } - - kfree(clk_src); - BREAK_TO_DEBUGGER(); - return NULL; -} - -static void dce120_clock_source_destroy(struct clock_source **clk_src) -{ - kfree(TO_DCE110_CLK_SRC(*clk_src)); - *clk_src = NULL; -} - - -static bool dce120_hw_sequencer_create(struct dc *dc) -{ - /* All registers used by dce11.2 match those in dce11 in offset and - * structure - */ - dce120_hw_sequencer_construct(dc); - - /*TODO Move to separate file and Override what is needed */ - - return true; -} - -static struct timing_generator *dce120_timing_generator_create( - struct dc_context *ctx, - uint32_t instance, - const struct dce110_timing_generator_offsets *offsets) -{ - struct dce110_timing_generator *tg110 = - kzalloc(sizeof(struct dce110_timing_generator), GFP_KERNEL); - - if (!tg110) - return NULL; - - dce120_timing_generator_construct(tg110, ctx, instance, offsets); - return &tg110->base; -} - -static void dce120_transform_destroy(struct transform **xfm) -{ - kfree(TO_DCE_TRANSFORM(*xfm)); - *xfm = NULL; -} - -static void dce120_resource_destruct(struct dce110_resource_pool *pool) -{ - unsigned int i; - - for (i = 0; i < pool->base.pipe_count; i++) { - if (pool->base.opps[i] != NULL) - dce110_opp_destroy(&pool->base.opps[i]); - - if (pool->base.transforms[i] != NULL) - dce120_transform_destroy(&pool->base.transforms[i]); - - if (pool->base.ipps[i] != NULL) - dce_ipp_destroy(&pool->base.ipps[i]); - - if (pool->base.mis[i] != NULL) { - kfree(TO_DCE_MEM_INPUT(pool->base.mis[i])); - pool->base.mis[i] = NULL; - } - - if (pool->base.irqs != NULL) { - dal_irq_service_destroy(&pool->base.irqs); - } - - if (pool->base.timing_generators[i] != NULL) { - kfree(DCE110TG_FROM_TG(pool->base.timing_generators[i])); - pool->base.timing_generators[i] = NULL; - } - } - - for (i = 0; i < pool->base.res_cap->num_ddc; i++) { - if (pool->base.engines[i] != NULL) - dce110_engine_destroy(&pool->base.engines[i]); - if (pool->base.hw_i2cs[i] != NULL) { - kfree(pool->base.hw_i2cs[i]); - pool->base.hw_i2cs[i] = NULL; - } - if (pool->base.sw_i2cs[i] != NULL) { - kfree(pool->base.sw_i2cs[i]); - pool->base.sw_i2cs[i] = NULL; - } - } - - for (i = 0; i < pool->base.audio_count; i++) { - if (pool->base.audios[i]) - dce_aud_destroy(&pool->base.audios[i]); - } - - for (i = 0; i < pool->base.stream_enc_count; i++) { - if (pool->base.stream_enc[i] != NULL) - kfree(DCE110STRENC_FROM_STRENC(pool->base.stream_enc[i])); - } - - for (i = 0; i < pool->base.clk_src_count; i++) { - if (pool->base.clock_sources[i] != NULL) - dce120_clock_source_destroy( - &pool->base.clock_sources[i]); - } - - if (pool->base.dp_clock_source != NULL) - dce120_clock_source_destroy(&pool->base.dp_clock_source); - - if (pool->base.abm != NULL) - dce_abm_destroy(&pool->base.abm); - - if (pool->base.dmcu != NULL) - dce_dmcu_destroy(&pool->base.dmcu); -} - -static void read_dce_straps( - struct dc_context *ctx, - struct resource_straps *straps) -{ - uint32_t reg_val = dm_read_reg_soc15(ctx, mmCC_DC_MISC_STRAPS, 0); - - straps->audio_stream_number = get_reg_field_value(reg_val, - CC_DC_MISC_STRAPS, - AUDIO_STREAM_NUMBER); - straps->hdmi_disable = get_reg_field_value(reg_val, - CC_DC_MISC_STRAPS, - HDMI_DISABLE); - - reg_val = dm_read_reg_soc15(ctx, mmDC_PINSTRAPS, 0); - straps->dc_pinstraps_audio = get_reg_field_value(reg_val, - DC_PINSTRAPS, - DC_PINSTRAPS_AUDIO); -} - -static struct audio *create_audio( - struct dc_context *ctx, unsigned int inst) -{ - return dce_audio_create(ctx, inst, - &audio_regs[inst], &audio_shift, &audio_mask); -} - -static const struct encoder_feature_support link_enc_feature = { - .max_hdmi_deep_color = COLOR_DEPTH_121212, - .max_hdmi_pixel_clock = 600000, - .hdmi_ycbcr420_supported = true, - .dp_ycbcr420_supported = false, - .flags.bits.IS_HBR2_CAPABLE = true, - .flags.bits.IS_HBR3_CAPABLE = true, - .flags.bits.IS_TPS3_CAPABLE = true, - .flags.bits.IS_TPS4_CAPABLE = true, -}; - -static struct link_encoder *dce120_link_encoder_create( - struct dc_context *ctx, - const struct encoder_init_data *enc_init_data) -{ - struct dce110_link_encoder *enc110 = - kzalloc(sizeof(struct dce110_link_encoder), GFP_KERNEL); - int link_regs_id; - - if (!enc110) - return NULL; - - link_regs_id = - map_transmitter_id_to_phy_instance(enc_init_data->transmitter); - - dce110_link_encoder_construct(enc110, - enc_init_data, - &link_enc_feature, - &link_enc_regs[link_regs_id], - &link_enc_aux_regs[enc_init_data->channel - 1], - &link_enc_hpd_regs[enc_init_data->hpd_source]); - - return &enc110->base; -} - -static struct panel_cntl *dce120_panel_cntl_create(const struct panel_cntl_init_data *init_data) -{ - struct dce_panel_cntl *panel_cntl = - kzalloc(sizeof(struct dce_panel_cntl), GFP_KERNEL); - - if (!panel_cntl) - return NULL; - - dce_panel_cntl_construct(panel_cntl, - init_data, - &panel_cntl_regs[init_data->inst], - &panel_cntl_shift, - &panel_cntl_mask); - - return &panel_cntl->base; -} - -static struct input_pixel_processor *dce120_ipp_create( - struct dc_context *ctx, uint32_t inst) -{ - struct dce_ipp *ipp = kzalloc(sizeof(struct dce_ipp), GFP_KERNEL); - - if (!ipp) { - BREAK_TO_DEBUGGER(); - return NULL; - } - - dce_ipp_construct(ipp, ctx, inst, - &ipp_regs[inst], &ipp_shift, &ipp_mask); - return &ipp->base; -} - -static struct stream_encoder *dce120_stream_encoder_create( - enum engine_id eng_id, - struct dc_context *ctx) -{ - struct dce110_stream_encoder *enc110 = - kzalloc(sizeof(struct dce110_stream_encoder), GFP_KERNEL); - - if (!enc110) - return NULL; - - dce110_stream_encoder_construct(enc110, ctx, ctx->dc_bios, eng_id, - &stream_enc_regs[eng_id], - &se_shift, &se_mask); - return &enc110->base; -} - -#define SRII(reg_name, block, id)\ - .reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - mm ## block ## id ## _ ## reg_name - -static const struct dce_hwseq_registers hwseq_reg = { - HWSEQ_DCE120_REG_LIST() -}; - -static const struct dce_hwseq_shift hwseq_shift = { - HWSEQ_DCE12_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_hwseq_mask hwseq_mask = { - HWSEQ_DCE12_MASK_SH_LIST(_MASK) -}; - -/* HWSEQ regs for VG20 */ -static const struct dce_hwseq_registers dce121_hwseq_reg = { - HWSEQ_VG20_REG_LIST() -}; - -static const struct dce_hwseq_shift dce121_hwseq_shift = { - HWSEQ_VG20_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_hwseq_mask dce121_hwseq_mask = { - HWSEQ_VG20_MASK_SH_LIST(_MASK) -}; - -static struct dce_hwseq *dce120_hwseq_create( - struct dc_context *ctx) -{ - struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL); - - if (hws) { - hws->ctx = ctx; - hws->regs = &hwseq_reg; - hws->shifts = &hwseq_shift; - hws->masks = &hwseq_mask; - } - return hws; -} - -static struct dce_hwseq *dce121_hwseq_create( - struct dc_context *ctx) -{ - struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL); - - if (hws) { - hws->ctx = ctx; - hws->regs = &dce121_hwseq_reg; - hws->shifts = &dce121_hwseq_shift; - hws->masks = &dce121_hwseq_mask; - } - return hws; -} - -static const struct resource_create_funcs res_create_funcs = { - .read_dce_straps = read_dce_straps, - .create_audio = create_audio, - .create_stream_encoder = dce120_stream_encoder_create, - .create_hwseq = dce120_hwseq_create, -}; - -static const struct resource_create_funcs dce121_res_create_funcs = { - .read_dce_straps = read_dce_straps, - .create_audio = create_audio, - .create_stream_encoder = dce120_stream_encoder_create, - .create_hwseq = dce121_hwseq_create, -}; - - -#define mi_inst_regs(id) { MI_DCE12_REG_LIST(id) } -static const struct dce_mem_input_registers mi_regs[] = { - mi_inst_regs(0), - mi_inst_regs(1), - mi_inst_regs(2), - mi_inst_regs(3), - mi_inst_regs(4), - mi_inst_regs(5), -}; - -static const struct dce_mem_input_shift mi_shifts = { - MI_DCE12_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_mem_input_mask mi_masks = { - MI_DCE12_MASK_SH_LIST(_MASK) -}; - -static struct mem_input *dce120_mem_input_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dce_mem_input *dce_mi = kzalloc(sizeof(struct dce_mem_input), - GFP_KERNEL); - - if (!dce_mi) { - BREAK_TO_DEBUGGER(); - return NULL; - } - - dce120_mem_input_construct(dce_mi, ctx, inst, &mi_regs[inst], &mi_shifts, &mi_masks); - return &dce_mi->base; -} - -static struct transform *dce120_transform_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dce_transform *transform = - kzalloc(sizeof(struct dce_transform), GFP_KERNEL); - - if (!transform) - return NULL; - - dce_transform_construct(transform, ctx, inst, - &xfm_regs[inst], &xfm_shift, &xfm_mask); - transform->lb_memory_size = 0x1404; /*5124*/ - return &transform->base; -} - -static void dce120_destroy_resource_pool(struct resource_pool **pool) -{ - struct dce110_resource_pool *dce110_pool = TO_DCE110_RES_POOL(*pool); - - dce120_resource_destruct(dce110_pool); - kfree(dce110_pool); - *pool = NULL; -} - -static const struct resource_funcs dce120_res_pool_funcs = { - .destroy = dce120_destroy_resource_pool, - .link_enc_create = dce120_link_encoder_create, - .panel_cntl_create = dce120_panel_cntl_create, - .validate_bandwidth = dce112_validate_bandwidth, - .validate_plane = dce100_validate_plane, - .add_stream_to_ctx = dce112_add_stream_to_ctx, - .find_first_free_match_stream_enc_for_link = dce110_find_first_free_match_stream_enc_for_link -}; - -static void bw_calcs_data_update_from_pplib(struct dc *dc) -{ - struct dm_pp_clock_levels_with_latency eng_clks = {0}; - struct dm_pp_clock_levels_with_latency mem_clks = {0}; - struct dm_pp_wm_sets_with_clock_ranges clk_ranges = {0}; - int i; - unsigned int clk; - unsigned int latency; - /*original logic in dal3*/ - int memory_type_multiplier = MEMORY_TYPE_MULTIPLIER_CZ; - - /*do system clock*/ - if (!dm_pp_get_clock_levels_by_type_with_latency( - dc->ctx, - DM_PP_CLOCK_TYPE_ENGINE_CLK, - &eng_clks) || eng_clks.num_levels == 0) { - - eng_clks.num_levels = 8; - clk = 300000; - - for (i = 0; i < eng_clks.num_levels; i++) { - eng_clks.data[i].clocks_in_khz = clk; - clk += 100000; - } - } - - /* convert all the clock fro kHz to fix point mHz TODO: wloop data */ - dc->bw_vbios->high_sclk = bw_frc_to_fixed( - eng_clks.data[eng_clks.num_levels-1].clocks_in_khz, 1000); - dc->bw_vbios->mid1_sclk = bw_frc_to_fixed( - eng_clks.data[eng_clks.num_levels/8].clocks_in_khz, 1000); - dc->bw_vbios->mid2_sclk = bw_frc_to_fixed( - eng_clks.data[eng_clks.num_levels*2/8].clocks_in_khz, 1000); - dc->bw_vbios->mid3_sclk = bw_frc_to_fixed( - eng_clks.data[eng_clks.num_levels*3/8].clocks_in_khz, 1000); - dc->bw_vbios->mid4_sclk = bw_frc_to_fixed( - eng_clks.data[eng_clks.num_levels*4/8].clocks_in_khz, 1000); - dc->bw_vbios->mid5_sclk = bw_frc_to_fixed( - eng_clks.data[eng_clks.num_levels*5/8].clocks_in_khz, 1000); - dc->bw_vbios->mid6_sclk = bw_frc_to_fixed( - eng_clks.data[eng_clks.num_levels*6/8].clocks_in_khz, 1000); - dc->bw_vbios->low_sclk = bw_frc_to_fixed( - eng_clks.data[0].clocks_in_khz, 1000); - - /*do memory clock*/ - if (!dm_pp_get_clock_levels_by_type_with_latency( - dc->ctx, - DM_PP_CLOCK_TYPE_MEMORY_CLK, - &mem_clks) || mem_clks.num_levels == 0) { - - mem_clks.num_levels = 3; - clk = 250000; - latency = 45; - - for (i = 0; i < eng_clks.num_levels; i++) { - mem_clks.data[i].clocks_in_khz = clk; - mem_clks.data[i].latency_in_us = latency; - clk += 500000; - latency -= 5; - } - - } - - /* we don't need to call PPLIB for validation clock since they - * also give us the highest sclk and highest mclk (UMA clock). - * ALSO always convert UMA clock (from PPLIB) to YCLK (HW formula): - * YCLK = UMACLK*m_memoryTypeMultiplier - */ - if (dc->bw_vbios->memory_type == bw_def_hbm) - memory_type_multiplier = MEMORY_TYPE_HBM; - - dc->bw_vbios->low_yclk = bw_frc_to_fixed( - mem_clks.data[0].clocks_in_khz * memory_type_multiplier, 1000); - dc->bw_vbios->mid_yclk = bw_frc_to_fixed( - mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz * memory_type_multiplier, - 1000); - dc->bw_vbios->high_yclk = bw_frc_to_fixed( - mem_clks.data[mem_clks.num_levels-1].clocks_in_khz * memory_type_multiplier, - 1000); - - /* Now notify PPLib/SMU about which Watermarks sets they should select - * depending on DPM state they are in. And update BW MGR GFX Engine and - * Memory clock member variables for Watermarks calculations for each - * Watermark Set - */ - clk_ranges.num_wm_sets = 4; - clk_ranges.wm_clk_ranges[0].wm_set_id = WM_SET_A; - clk_ranges.wm_clk_ranges[0].wm_min_eng_clk_in_khz = - eng_clks.data[0].clocks_in_khz; - clk_ranges.wm_clk_ranges[0].wm_max_eng_clk_in_khz = - eng_clks.data[eng_clks.num_levels*3/8].clocks_in_khz - 1; - clk_ranges.wm_clk_ranges[0].wm_min_mem_clk_in_khz = - mem_clks.data[0].clocks_in_khz; - clk_ranges.wm_clk_ranges[0].wm_max_mem_clk_in_khz = - mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz - 1; - - clk_ranges.wm_clk_ranges[1].wm_set_id = WM_SET_B; - clk_ranges.wm_clk_ranges[1].wm_min_eng_clk_in_khz = - eng_clks.data[eng_clks.num_levels*3/8].clocks_in_khz; - /* 5 GHz instead of data[7].clockInKHz to cover Overdrive */ - clk_ranges.wm_clk_ranges[1].wm_max_eng_clk_in_khz = 5000000; - clk_ranges.wm_clk_ranges[1].wm_min_mem_clk_in_khz = - mem_clks.data[0].clocks_in_khz; - clk_ranges.wm_clk_ranges[1].wm_max_mem_clk_in_khz = - mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz - 1; - - clk_ranges.wm_clk_ranges[2].wm_set_id = WM_SET_C; - clk_ranges.wm_clk_ranges[2].wm_min_eng_clk_in_khz = - eng_clks.data[0].clocks_in_khz; - clk_ranges.wm_clk_ranges[2].wm_max_eng_clk_in_khz = - eng_clks.data[eng_clks.num_levels*3/8].clocks_in_khz - 1; - clk_ranges.wm_clk_ranges[2].wm_min_mem_clk_in_khz = - mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz; - /* 5 GHz instead of data[2].clockInKHz to cover Overdrive */ - clk_ranges.wm_clk_ranges[2].wm_max_mem_clk_in_khz = 5000000; - - clk_ranges.wm_clk_ranges[3].wm_set_id = WM_SET_D; - clk_ranges.wm_clk_ranges[3].wm_min_eng_clk_in_khz = - eng_clks.data[eng_clks.num_levels*3/8].clocks_in_khz; - /* 5 GHz instead of data[7].clockInKHz to cover Overdrive */ - clk_ranges.wm_clk_ranges[3].wm_max_eng_clk_in_khz = 5000000; - clk_ranges.wm_clk_ranges[3].wm_min_mem_clk_in_khz = - mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz; - /* 5 GHz instead of data[2].clockInKHz to cover Overdrive */ - clk_ranges.wm_clk_ranges[3].wm_max_mem_clk_in_khz = 5000000; - - /* Notify PP Lib/SMU which Watermarks to use for which clock ranges */ - dm_pp_notify_wm_clock_changes(dc->ctx, &clk_ranges); -} - -static uint32_t read_pipe_fuses(struct dc_context *ctx) -{ - uint32_t value = dm_read_reg_soc15(ctx, mmCC_DC_PIPE_DIS, 0); - /* VG20 support max 6 pipes */ - value = value & 0x3f; - return value; -} - -static bool dce120_resource_construct( - uint8_t num_virtual_links, - struct dc *dc, - struct dce110_resource_pool *pool) -{ - unsigned int i; - int j; - struct dc_context *ctx = dc->ctx; - struct irq_service_init_data irq_init_data; - static const struct resource_create_funcs *res_funcs; - bool is_vg20 = ASICREV_IS_VEGA20_P(ctx->asic_id.hw_internal_rev); - uint32_t pipe_fuses; - - ctx->dc_bios->regs = &bios_regs; - - pool->base.res_cap = &res_cap; - pool->base.funcs = &dce120_res_pool_funcs; - - /* TODO: Fill more data from GreenlandAsicCapability.cpp */ - pool->base.pipe_count = res_cap.num_timing_generator; - pool->base.timing_generator_count = pool->base.res_cap->num_timing_generator; - pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE; - - dc->caps.max_downscale_ratio = 200; - dc->caps.i2c_speed_in_khz = 100; - dc->caps.i2c_speed_in_khz_hdcp = 100; /*1.4 w/a not applied by default*/ - dc->caps.max_cursor_size = 128; - dc->caps.min_horizontal_blanking_period = 80; - dc->caps.dual_link_dvi = true; - dc->caps.psp_setup_panel_mode = true; - dc->caps.extended_aux_timeout_support = false; - dc->debug = debug_defaults; - - /************************************************* - * Create resources * - *************************************************/ - - pool->base.clock_sources[DCE120_CLK_SRC_PLL0] = - dce120_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL0, - &clk_src_regs[0], false); - pool->base.clock_sources[DCE120_CLK_SRC_PLL1] = - dce120_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL1, - &clk_src_regs[1], false); - pool->base.clock_sources[DCE120_CLK_SRC_PLL2] = - dce120_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL2, - &clk_src_regs[2], false); - pool->base.clock_sources[DCE120_CLK_SRC_PLL3] = - dce120_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL3, - &clk_src_regs[3], false); - pool->base.clock_sources[DCE120_CLK_SRC_PLL4] = - dce120_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL4, - &clk_src_regs[4], false); - pool->base.clock_sources[DCE120_CLK_SRC_PLL5] = - dce120_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL5, - &clk_src_regs[5], false); - pool->base.clk_src_count = DCE120_CLK_SRC_TOTAL; - - pool->base.dp_clock_source = - dce120_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_ID_DP_DTO, - &clk_src_regs[0], true); - - for (i = 0; i < pool->base.clk_src_count; i++) { - if (pool->base.clock_sources[i] == NULL) { - dm_error("DC: failed to create clock sources!\n"); - BREAK_TO_DEBUGGER(); - goto clk_src_create_fail; - } - } - - pool->base.dmcu = dce_dmcu_create(ctx, - &dmcu_regs, - &dmcu_shift, - &dmcu_mask); - if (pool->base.dmcu == NULL) { - dm_error("DC: failed to create dmcu!\n"); - BREAK_TO_DEBUGGER(); - goto res_create_fail; - } - - pool->base.abm = dce_abm_create(ctx, - &abm_regs, - &abm_shift, - &abm_mask); - if (pool->base.abm == NULL) { - dm_error("DC: failed to create abm!\n"); - BREAK_TO_DEBUGGER(); - goto res_create_fail; - } - - - irq_init_data.ctx = dc->ctx; - pool->base.irqs = dal_irq_service_dce120_create(&irq_init_data); - if (!pool->base.irqs) - goto irqs_create_fail; - - /* VG20: Pipe harvesting enabled, retrieve valid pipe fuses */ - if (is_vg20) - pipe_fuses = read_pipe_fuses(ctx); - - /* index to valid pipe resource */ - j = 0; - for (i = 0; i < pool->base.pipe_count; i++) { - if (is_vg20) { - if ((pipe_fuses & (1 << i)) != 0) { - dm_error("DC: skip invalid pipe %d!\n", i); - continue; - } - } - - pool->base.timing_generators[j] = - dce120_timing_generator_create( - ctx, - i, - &dce120_tg_offsets[i]); - if (pool->base.timing_generators[j] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create tg!\n"); - goto controller_create_fail; - } - - pool->base.mis[j] = dce120_mem_input_create(ctx, i); - - if (pool->base.mis[j] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create memory input!\n"); - goto controller_create_fail; - } - - pool->base.ipps[j] = dce120_ipp_create(ctx, i); - if (pool->base.ipps[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create input pixel processor!\n"); - goto controller_create_fail; - } - - pool->base.transforms[j] = dce120_transform_create(ctx, i); - if (pool->base.transforms[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create transform!\n"); - goto res_create_fail; - } - - pool->base.opps[j] = dce120_opp_create( - ctx, - i); - if (pool->base.opps[j] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create output pixel processor!\n"); - } - - /* check next valid pipe */ - j++; - } - - for (i = 0; i < pool->base.res_cap->num_ddc; i++) { - pool->base.engines[i] = dce120_aux_engine_create(ctx, i); - if (pool->base.engines[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC:failed to create aux engine!!\n"); - goto res_create_fail; - } - pool->base.hw_i2cs[i] = dce120_i2c_hw_create(ctx, i); - if (pool->base.hw_i2cs[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC:failed to create i2c engine!!\n"); - goto res_create_fail; - } - pool->base.sw_i2cs[i] = NULL; - } - - /* valid pipe num */ - pool->base.pipe_count = j; - pool->base.timing_generator_count = j; - - if (is_vg20) - res_funcs = &dce121_res_create_funcs; - else - res_funcs = &res_create_funcs; - - if (!resource_construct(num_virtual_links, dc, &pool->base, res_funcs)) - goto res_create_fail; - - /* Create hardware sequencer */ - if (!dce120_hw_sequencer_create(dc)) - goto controller_create_fail; - - dc->caps.max_planes = pool->base.pipe_count; - - for (i = 0; i < dc->caps.max_planes; ++i) - dc->caps.planes[i] = plane_cap; - - bw_calcs_init(dc->bw_dceip, dc->bw_vbios, dc->ctx->asic_id); - - bw_calcs_data_update_from_pplib(dc); - - return true; - -irqs_create_fail: -controller_create_fail: -clk_src_create_fail: -res_create_fail: - - dce120_resource_destruct(pool); - - return false; -} - -struct resource_pool *dce120_create_resource_pool( - uint8_t num_virtual_links, - struct dc *dc) -{ - struct dce110_resource_pool *pool = - kzalloc(sizeof(struct dce110_resource_pool), GFP_KERNEL); - - if (!pool) - return NULL; - - if (dce120_resource_construct(num_virtual_links, dc, pool)) - return &pool->base; - - kfree(pool); - BREAK_TO_DEBUGGER(); - return NULL; -} diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.h b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.h deleted file mode 100644 index 3d1f3cf012f4..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.h +++ /dev/null @@ -1,39 +0,0 @@ -/* -* Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DC_RESOURCE_DCE120_H__ -#define __DC_RESOURCE_DCE120_H__ - -#include "core_types.h" - -struct dc; -struct resource_pool; - -struct resource_pool *dce120_create_resource_pool( - uint8_t num_virtual_links, - struct dc *dc); - -#endif /* __DC_RESOURCE_DCE120_H__ */ - diff --git a/drivers/gpu/drm/amd/display/dc/dce80/Makefile b/drivers/gpu/drm/amd/display/dc/dce80/Makefile index 93dd68c31275..7eefffbdc925 100644 --- a/drivers/gpu/drm/amd/display/dc/dce80/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dce80/Makefile @@ -25,8 +25,7 @@ CFLAGS_$(AMDDALPATH)/dc/dce80/dce80_resource.o = $(call cc-disable-warning, override-init) -DCE80 = dce80_timing_generator.o \ - dce80_resource.o +DCE80 = dce80_timing_generator.o AMD_DAL_DCE80 = $(addprefix $(AMDDALPATH)/dc/dce80/,$(DCE80)) diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c deleted file mode 100644 index 35a2cce0c2b8..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c +++ /dev/null @@ -1,1544 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dce/dce_8_0_d.h" -#include "dce/dce_8_0_sh_mask.h" - -#include "dm_services.h" - -#include "link_encoder.h" -#include "stream_encoder.h" - -#include "resource.h" -#include "include/irq_service_interface.h" -#include "irq/dce80/irq_service_dce80.h" -#include "dce110/dce110_timing_generator.h" -#include "dce110/dce110_resource.h" -#include "dce80/dce80_timing_generator.h" -#include "dce/dce_mem_input.h" -#include "dce/dce_link_encoder.h" -#include "dce/dce_stream_encoder.h" -#include "dce/dce_ipp.h" -#include "dce/dce_transform.h" -#include "dce/dce_opp.h" -#include "dce/dce_clock_source.h" -#include "dce/dce_audio.h" -#include "dce/dce_hwseq.h" -#include "dce80/dce80_hwseq.h" -#include "dce100/dce100_resource.h" -#include "dce/dce_panel_cntl.h" - -#include "reg_helper.h" - -#include "dce/dce_dmcu.h" -#include "dce/dce_aux.h" -#include "dce/dce_abm.h" -#include "dce/dce_i2c.h" -/* TODO remove this include */ - -#ifndef mmMC_HUB_RDREQ_DMIF_LIMIT -#include "gmc/gmc_7_1_d.h" -#include "gmc/gmc_7_1_sh_mask.h" -#endif - -#include "dce80/dce80_resource.h" - -#ifndef mmDP_DPHY_INTERNAL_CTRL -#define mmDP_DPHY_INTERNAL_CTRL 0x1CDE -#define mmDP0_DP_DPHY_INTERNAL_CTRL 0x1CDE -#define mmDP1_DP_DPHY_INTERNAL_CTRL 0x1FDE -#define mmDP2_DP_DPHY_INTERNAL_CTRL 0x42DE -#define mmDP3_DP_DPHY_INTERNAL_CTRL 0x45DE -#define mmDP4_DP_DPHY_INTERNAL_CTRL 0x48DE -#define mmDP5_DP_DPHY_INTERNAL_CTRL 0x4BDE -#define mmDP6_DP_DPHY_INTERNAL_CTRL 0x4EDE -#endif - - -#ifndef mmBIOS_SCRATCH_2 - #define mmBIOS_SCRATCH_2 0x05CB - #define mmBIOS_SCRATCH_3 0x05CC - #define mmBIOS_SCRATCH_6 0x05CF -#endif - -#ifndef mmDP_DPHY_FAST_TRAINING - #define mmDP_DPHY_FAST_TRAINING 0x1CCE - #define mmDP0_DP_DPHY_FAST_TRAINING 0x1CCE - #define mmDP1_DP_DPHY_FAST_TRAINING 0x1FCE - #define mmDP2_DP_DPHY_FAST_TRAINING 0x42CE - #define mmDP3_DP_DPHY_FAST_TRAINING 0x45CE - #define mmDP4_DP_DPHY_FAST_TRAINING 0x48CE - #define mmDP5_DP_DPHY_FAST_TRAINING 0x4BCE - #define mmDP6_DP_DPHY_FAST_TRAINING 0x4ECE -#endif - - -#ifndef mmHPD_DC_HPD_CONTROL - #define mmHPD_DC_HPD_CONTROL 0x189A - #define mmHPD0_DC_HPD_CONTROL 0x189A - #define mmHPD1_DC_HPD_CONTROL 0x18A2 - #define mmHPD2_DC_HPD_CONTROL 0x18AA - #define mmHPD3_DC_HPD_CONTROL 0x18B2 - #define mmHPD4_DC_HPD_CONTROL 0x18BA - #define mmHPD5_DC_HPD_CONTROL 0x18C2 -#endif - -#define DCE11_DIG_FE_CNTL 0x4a00 -#define DCE11_DIG_BE_CNTL 0x4a47 -#define DCE11_DP_SEC 0x4ac3 - -static const struct dce110_timing_generator_offsets dce80_tg_offsets[] = { - { - .crtc = (mmCRTC0_CRTC_CONTROL - mmCRTC_CONTROL), - .dcp = (mmGRPH_CONTROL - mmGRPH_CONTROL), - .dmif = (mmDMIF_PG0_DPG_WATERMARK_MASK_CONTROL - - mmDPG_WATERMARK_MASK_CONTROL), - }, - { - .crtc = (mmCRTC1_CRTC_CONTROL - mmCRTC_CONTROL), - .dcp = (mmDCP1_GRPH_CONTROL - mmGRPH_CONTROL), - .dmif = (mmDMIF_PG1_DPG_WATERMARK_MASK_CONTROL - - mmDPG_WATERMARK_MASK_CONTROL), - }, - { - .crtc = (mmCRTC2_CRTC_CONTROL - mmCRTC_CONTROL), - .dcp = (mmDCP2_GRPH_CONTROL - mmGRPH_CONTROL), - .dmif = (mmDMIF_PG2_DPG_WATERMARK_MASK_CONTROL - - mmDPG_WATERMARK_MASK_CONTROL), - }, - { - .crtc = (mmCRTC3_CRTC_CONTROL - mmCRTC_CONTROL), - .dcp = (mmDCP3_GRPH_CONTROL - mmGRPH_CONTROL), - .dmif = (mmDMIF_PG3_DPG_WATERMARK_MASK_CONTROL - - mmDPG_WATERMARK_MASK_CONTROL), - }, - { - .crtc = (mmCRTC4_CRTC_CONTROL - mmCRTC_CONTROL), - .dcp = (mmDCP4_GRPH_CONTROL - mmGRPH_CONTROL), - .dmif = (mmDMIF_PG4_DPG_WATERMARK_MASK_CONTROL - - mmDPG_WATERMARK_MASK_CONTROL), - }, - { - .crtc = (mmCRTC5_CRTC_CONTROL - mmCRTC_CONTROL), - .dcp = (mmDCP5_GRPH_CONTROL - mmGRPH_CONTROL), - .dmif = (mmDMIF_PG5_DPG_WATERMARK_MASK_CONTROL - - mmDPG_WATERMARK_MASK_CONTROL), - } -}; - -/* set register offset */ -#define SR(reg_name)\ - .reg_name = mm ## reg_name - -/* set register offset with instance */ -#define SRI(reg_name, block, id)\ - .reg_name = mm ## block ## id ## _ ## reg_name - -#define ipp_regs(id)\ -[id] = {\ - IPP_COMMON_REG_LIST_DCE_BASE(id)\ -} - -static const struct dce_ipp_registers ipp_regs[] = { - ipp_regs(0), - ipp_regs(1), - ipp_regs(2), - ipp_regs(3), - ipp_regs(4), - ipp_regs(5) -}; - -static const struct dce_ipp_shift ipp_shift = { - IPP_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(__SHIFT) -}; - -static const struct dce_ipp_mask ipp_mask = { - IPP_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(_MASK) -}; - -#define transform_regs(id)\ -[id] = {\ - XFM_COMMON_REG_LIST_DCE80(id)\ -} - -static const struct dce_transform_registers xfm_regs[] = { - transform_regs(0), - transform_regs(1), - transform_regs(2), - transform_regs(3), - transform_regs(4), - transform_regs(5) -}; - -static const struct dce_transform_shift xfm_shift = { - XFM_COMMON_MASK_SH_LIST_DCE80(__SHIFT) -}; - -static const struct dce_transform_mask xfm_mask = { - XFM_COMMON_MASK_SH_LIST_DCE80(_MASK) -}; - -#define aux_regs(id)\ -[id] = {\ - AUX_REG_LIST(id)\ -} - -static const struct dce110_link_enc_aux_registers link_enc_aux_regs[] = { - aux_regs(0), - aux_regs(1), - aux_regs(2), - aux_regs(3), - aux_regs(4), - aux_regs(5) -}; - -#define hpd_regs(id)\ -[id] = {\ - HPD_REG_LIST(id)\ -} - -static const struct dce110_link_enc_hpd_registers link_enc_hpd_regs[] = { - hpd_regs(0), - hpd_regs(1), - hpd_regs(2), - hpd_regs(3), - hpd_regs(4), - hpd_regs(5) -}; - -#define link_regs(id)\ -[id] = {\ - LE_DCE80_REG_LIST(id)\ -} - -static const struct dce110_link_enc_registers link_enc_regs[] = { - link_regs(0), - link_regs(1), - link_regs(2), - link_regs(3), - link_regs(4), - link_regs(5), - link_regs(6), -}; - -#define stream_enc_regs(id)\ -[id] = {\ - SE_COMMON_REG_LIST_DCE_BASE(id),\ - .AFMT_CNTL = 0,\ -} - -static const struct dce110_stream_enc_registers stream_enc_regs[] = { - stream_enc_regs(0), - stream_enc_regs(1), - stream_enc_regs(2), - stream_enc_regs(3), - stream_enc_regs(4), - stream_enc_regs(5), - stream_enc_regs(6) -}; - -static const struct dce_stream_encoder_shift se_shift = { - SE_COMMON_MASK_SH_LIST_DCE80_100(__SHIFT) -}; - -static const struct dce_stream_encoder_mask se_mask = { - SE_COMMON_MASK_SH_LIST_DCE80_100(_MASK) -}; - -static const struct dce_panel_cntl_registers panel_cntl_regs[] = { - { DCE_PANEL_CNTL_REG_LIST() } -}; - -static const struct dce_panel_cntl_shift panel_cntl_shift = { - DCE_PANEL_CNTL_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_panel_cntl_mask panel_cntl_mask = { - DCE_PANEL_CNTL_MASK_SH_LIST(_MASK) -}; - -#define opp_regs(id)\ -[id] = {\ - OPP_DCE_80_REG_LIST(id),\ -} - -static const struct dce_opp_registers opp_regs[] = { - opp_regs(0), - opp_regs(1), - opp_regs(2), - opp_regs(3), - opp_regs(4), - opp_regs(5) -}; - -static const struct dce_opp_shift opp_shift = { - OPP_COMMON_MASK_SH_LIST_DCE_80(__SHIFT) -}; - -static const struct dce_opp_mask opp_mask = { - OPP_COMMON_MASK_SH_LIST_DCE_80(_MASK) -}; - -static const struct dce110_aux_registers_shift aux_shift = { - DCE10_AUX_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce110_aux_registers_mask aux_mask = { - DCE10_AUX_MASK_SH_LIST(_MASK) -}; - -#define aux_engine_regs(id)\ -[id] = {\ - AUX_COMMON_REG_LIST(id), \ - .AUX_RESET_MASK = 0 \ -} - -static const struct dce110_aux_registers aux_engine_regs[] = { - aux_engine_regs(0), - aux_engine_regs(1), - aux_engine_regs(2), - aux_engine_regs(3), - aux_engine_regs(4), - aux_engine_regs(5) -}; - -#define audio_regs(id)\ -[id] = {\ - AUD_COMMON_REG_LIST(id)\ -} - -static const struct dce_audio_registers audio_regs[] = { - audio_regs(0), - audio_regs(1), - audio_regs(2), - audio_regs(3), - audio_regs(4), - audio_regs(5), - audio_regs(6), -}; - -static const struct dce_audio_shift audio_shift = { - AUD_COMMON_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_audio_mask audio_mask = { - AUD_COMMON_MASK_SH_LIST(_MASK) -}; - -#define clk_src_regs(id)\ -[id] = {\ - CS_COMMON_REG_LIST_DCE_80(id),\ -} - - -static const struct dce110_clk_src_regs clk_src_regs[] = { - clk_src_regs(0), - clk_src_regs(1), - clk_src_regs(2) -}; - -static const struct dce110_clk_src_shift cs_shift = { - CS_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(__SHIFT) -}; - -static const struct dce110_clk_src_mask cs_mask = { - CS_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(_MASK) -}; - -static const struct bios_registers bios_regs = { - .BIOS_SCRATCH_3 = mmBIOS_SCRATCH_3, - .BIOS_SCRATCH_6 = mmBIOS_SCRATCH_6 -}; - -static const struct resource_caps res_cap = { - .num_timing_generator = 6, - .num_audio = 6, - .num_stream_encoder = 6, - .num_pll = 3, - .num_ddc = 6, -}; - -static const struct resource_caps res_cap_81 = { - .num_timing_generator = 4, - .num_audio = 7, - .num_stream_encoder = 7, - .num_pll = 3, - .num_ddc = 6, -}; - -static const struct resource_caps res_cap_83 = { - .num_timing_generator = 2, - .num_audio = 6, - .num_stream_encoder = 6, - .num_pll = 2, - .num_ddc = 2, -}; - -static const struct dc_plane_cap plane_cap = { - .type = DC_PLANE_TYPE_DCE_RGB, - - .pixel_format_support = { - .argb8888 = true, - .nv12 = false, - .fp16 = true - }, - - .max_upscale_factor = { - .argb8888 = 16000, - .nv12 = 1, - .fp16 = 1 - }, - - .max_downscale_factor = { - .argb8888 = 250, - .nv12 = 1, - .fp16 = 1 - } -}; - -static const struct dc_debug_options debug_defaults = { - .enable_legacy_fast_update = true, -}; - -static const struct dce_dmcu_registers dmcu_regs = { - DMCU_DCE80_REG_LIST() -}; - -static const struct dce_dmcu_shift dmcu_shift = { - DMCU_MASK_SH_LIST_DCE80(__SHIFT) -}; - -static const struct dce_dmcu_mask dmcu_mask = { - DMCU_MASK_SH_LIST_DCE80(_MASK) -}; -static const struct dce_abm_registers abm_regs = { - ABM_DCE110_COMMON_REG_LIST() -}; - -static const struct dce_abm_shift abm_shift = { - ABM_MASK_SH_LIST_DCE110(__SHIFT) -}; - -static const struct dce_abm_mask abm_mask = { - ABM_MASK_SH_LIST_DCE110(_MASK) -}; - -#define CTX ctx -#define REG(reg) mm ## reg - -#ifndef mmCC_DC_HDMI_STRAPS -#define mmCC_DC_HDMI_STRAPS 0x1918 -#define CC_DC_HDMI_STRAPS__HDMI_DISABLE_MASK 0x40 -#define CC_DC_HDMI_STRAPS__HDMI_DISABLE__SHIFT 0x6 -#define CC_DC_HDMI_STRAPS__AUDIO_STREAM_NUMBER_MASK 0x700 -#define CC_DC_HDMI_STRAPS__AUDIO_STREAM_NUMBER__SHIFT 0x8 -#endif - -static int map_transmitter_id_to_phy_instance( - enum transmitter transmitter) -{ - switch (transmitter) { - case TRANSMITTER_UNIPHY_A: - return 0; - case TRANSMITTER_UNIPHY_B: - return 1; - case TRANSMITTER_UNIPHY_C: - return 2; - case TRANSMITTER_UNIPHY_D: - return 3; - case TRANSMITTER_UNIPHY_E: - return 4; - case TRANSMITTER_UNIPHY_F: - return 5; - case TRANSMITTER_UNIPHY_G: - return 6; - default: - ASSERT(0); - return 0; - } -} - -static void read_dce_straps( - struct dc_context *ctx, - struct resource_straps *straps) -{ - REG_GET_2(CC_DC_HDMI_STRAPS, - HDMI_DISABLE, &straps->hdmi_disable, - AUDIO_STREAM_NUMBER, &straps->audio_stream_number); - - REG_GET(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO, &straps->dc_pinstraps_audio); -} - -static struct audio *create_audio( - struct dc_context *ctx, unsigned int inst) -{ - return dce_audio_create(ctx, inst, - &audio_regs[inst], &audio_shift, &audio_mask); -} - -static struct timing_generator *dce80_timing_generator_create( - struct dc_context *ctx, - uint32_t instance, - const struct dce110_timing_generator_offsets *offsets) -{ - struct dce110_timing_generator *tg110 = - kzalloc(sizeof(struct dce110_timing_generator), GFP_KERNEL); - - if (!tg110) - return NULL; - - dce80_timing_generator_construct(tg110, ctx, instance, offsets); - return &tg110->base; -} - -static struct output_pixel_processor *dce80_opp_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dce110_opp *opp = - kzalloc(sizeof(struct dce110_opp), GFP_KERNEL); - - if (!opp) - return NULL; - - dce110_opp_construct(opp, - ctx, inst, &opp_regs[inst], &opp_shift, &opp_mask); - return &opp->base; -} - -static struct dce_aux *dce80_aux_engine_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct aux_engine_dce110 *aux_engine = - kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL); - - if (!aux_engine) - return NULL; - - dce110_aux_engine_construct(aux_engine, ctx, inst, - SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, - &aux_engine_regs[inst], - &aux_mask, - &aux_shift, - ctx->dc->caps.extended_aux_timeout_support); - - return &aux_engine->base; -} -#define i2c_inst_regs(id) { I2C_HW_ENGINE_COMMON_REG_LIST(id) } - -static const struct dce_i2c_registers i2c_hw_regs[] = { - i2c_inst_regs(1), - i2c_inst_regs(2), - i2c_inst_regs(3), - i2c_inst_regs(4), - i2c_inst_regs(5), - i2c_inst_regs(6), -}; - -static const struct dce_i2c_shift i2c_shifts = { - I2C_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(__SHIFT) -}; - -static const struct dce_i2c_mask i2c_masks = { - I2C_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(_MASK) -}; - -static struct dce_i2c_hw *dce80_i2c_hw_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dce_i2c_hw *dce_i2c_hw = - kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL); - - if (!dce_i2c_hw) - return NULL; - - dce_i2c_hw_construct(dce_i2c_hw, ctx, inst, - &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks); - - return dce_i2c_hw; -} - -static struct dce_i2c_sw *dce80_i2c_sw_create( - struct dc_context *ctx) -{ - struct dce_i2c_sw *dce_i2c_sw = - kzalloc(sizeof(struct dce_i2c_sw), GFP_KERNEL); - - if (!dce_i2c_sw) - return NULL; - - dce_i2c_sw_construct(dce_i2c_sw, ctx); - - return dce_i2c_sw; -} -static struct stream_encoder *dce80_stream_encoder_create( - enum engine_id eng_id, - struct dc_context *ctx) -{ - struct dce110_stream_encoder *enc110 = - kzalloc(sizeof(struct dce110_stream_encoder), GFP_KERNEL); - - if (!enc110) - return NULL; - - dce110_stream_encoder_construct(enc110, ctx, ctx->dc_bios, eng_id, - &stream_enc_regs[eng_id], - &se_shift, &se_mask); - return &enc110->base; -} - -#define SRII(reg_name, block, id)\ - .reg_name[id] = mm ## block ## id ## _ ## reg_name - -static const struct dce_hwseq_registers hwseq_reg = { - HWSEQ_DCE8_REG_LIST() -}; - -static const struct dce_hwseq_shift hwseq_shift = { - HWSEQ_DCE8_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_hwseq_mask hwseq_mask = { - HWSEQ_DCE8_MASK_SH_LIST(_MASK) -}; - -static struct dce_hwseq *dce80_hwseq_create( - struct dc_context *ctx) -{ - struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL); - - if (hws) { - hws->ctx = ctx; - hws->regs = &hwseq_reg; - hws->shifts = &hwseq_shift; - hws->masks = &hwseq_mask; - } - return hws; -} - -static const struct resource_create_funcs res_create_funcs = { - .read_dce_straps = read_dce_straps, - .create_audio = create_audio, - .create_stream_encoder = dce80_stream_encoder_create, - .create_hwseq = dce80_hwseq_create, -}; - -#define mi_inst_regs(id) { \ - MI_DCE8_REG_LIST(id), \ - .MC_HUB_RDREQ_DMIF_LIMIT = mmMC_HUB_RDREQ_DMIF_LIMIT \ -} -static const struct dce_mem_input_registers mi_regs[] = { - mi_inst_regs(0), - mi_inst_regs(1), - mi_inst_regs(2), - mi_inst_regs(3), - mi_inst_regs(4), - mi_inst_regs(5), -}; - -static const struct dce_mem_input_shift mi_shifts = { - MI_DCE8_MASK_SH_LIST(__SHIFT), - .ENABLE = MC_HUB_RDREQ_DMIF_LIMIT__ENABLE__SHIFT -}; - -static const struct dce_mem_input_mask mi_masks = { - MI_DCE8_MASK_SH_LIST(_MASK), - .ENABLE = MC_HUB_RDREQ_DMIF_LIMIT__ENABLE_MASK -}; - -static struct mem_input *dce80_mem_input_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dce_mem_input *dce_mi = kzalloc(sizeof(struct dce_mem_input), - GFP_KERNEL); - - if (!dce_mi) { - BREAK_TO_DEBUGGER(); - return NULL; - } - - dce_mem_input_construct(dce_mi, ctx, inst, &mi_regs[inst], &mi_shifts, &mi_masks); - dce_mi->wa.single_head_rdreq_dmif_limit = 2; - return &dce_mi->base; -} - -static void dce80_transform_destroy(struct transform **xfm) -{ - kfree(TO_DCE_TRANSFORM(*xfm)); - *xfm = NULL; -} - -static struct transform *dce80_transform_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dce_transform *transform = - kzalloc(sizeof(struct dce_transform), GFP_KERNEL); - - if (!transform) - return NULL; - - dce_transform_construct(transform, ctx, inst, - &xfm_regs[inst], &xfm_shift, &xfm_mask); - transform->prescaler_on = false; - return &transform->base; -} - -static const struct encoder_feature_support link_enc_feature = { - .max_hdmi_deep_color = COLOR_DEPTH_121212, - .max_hdmi_pixel_clock = 297000, - .flags.bits.IS_HBR2_CAPABLE = true, - .flags.bits.IS_TPS3_CAPABLE = true -}; - -static struct link_encoder *dce80_link_encoder_create( - struct dc_context *ctx, - const struct encoder_init_data *enc_init_data) -{ - struct dce110_link_encoder *enc110 = - kzalloc(sizeof(struct dce110_link_encoder), GFP_KERNEL); - int link_regs_id; - - if (!enc110) - return NULL; - - link_regs_id = - map_transmitter_id_to_phy_instance(enc_init_data->transmitter); - - dce110_link_encoder_construct(enc110, - enc_init_data, - &link_enc_feature, - &link_enc_regs[link_regs_id], - &link_enc_aux_regs[enc_init_data->channel - 1], - &link_enc_hpd_regs[enc_init_data->hpd_source]); - return &enc110->base; -} - -static struct panel_cntl *dce80_panel_cntl_create(const struct panel_cntl_init_data *init_data) -{ - struct dce_panel_cntl *panel_cntl = - kzalloc(sizeof(struct dce_panel_cntl), GFP_KERNEL); - - if (!panel_cntl) - return NULL; - - dce_panel_cntl_construct(panel_cntl, - init_data, - &panel_cntl_regs[init_data->inst], - &panel_cntl_shift, - &panel_cntl_mask); - - return &panel_cntl->base; -} - -static struct clock_source *dce80_clock_source_create( - struct dc_context *ctx, - struct dc_bios *bios, - enum clock_source_id id, - const struct dce110_clk_src_regs *regs, - bool dp_clk_src) -{ - struct dce110_clk_src *clk_src = - kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL); - - if (!clk_src) - return NULL; - - if (dce110_clk_src_construct(clk_src, ctx, bios, id, - regs, &cs_shift, &cs_mask)) { - clk_src->base.dp_clk_src = dp_clk_src; - return &clk_src->base; - } - - kfree(clk_src); - BREAK_TO_DEBUGGER(); - return NULL; -} - -static void dce80_clock_source_destroy(struct clock_source **clk_src) -{ - kfree(TO_DCE110_CLK_SRC(*clk_src)); - *clk_src = NULL; -} - -static struct input_pixel_processor *dce80_ipp_create( - struct dc_context *ctx, uint32_t inst) -{ - struct dce_ipp *ipp = kzalloc(sizeof(struct dce_ipp), GFP_KERNEL); - - if (!ipp) { - BREAK_TO_DEBUGGER(); - return NULL; - } - - dce_ipp_construct(ipp, ctx, inst, - &ipp_regs[inst], &ipp_shift, &ipp_mask); - return &ipp->base; -} - -static void dce80_resource_destruct(struct dce110_resource_pool *pool) -{ - unsigned int i; - - for (i = 0; i < pool->base.pipe_count; i++) { - if (pool->base.opps[i] != NULL) - dce110_opp_destroy(&pool->base.opps[i]); - - if (pool->base.transforms[i] != NULL) - dce80_transform_destroy(&pool->base.transforms[i]); - - if (pool->base.ipps[i] != NULL) - dce_ipp_destroy(&pool->base.ipps[i]); - - if (pool->base.mis[i] != NULL) { - kfree(TO_DCE_MEM_INPUT(pool->base.mis[i])); - pool->base.mis[i] = NULL; - } - - if (pool->base.timing_generators[i] != NULL) { - kfree(DCE110TG_FROM_TG(pool->base.timing_generators[i])); - pool->base.timing_generators[i] = NULL; - } - } - - for (i = 0; i < pool->base.res_cap->num_ddc; i++) { - if (pool->base.engines[i] != NULL) - dce110_engine_destroy(&pool->base.engines[i]); - if (pool->base.hw_i2cs[i] != NULL) { - kfree(pool->base.hw_i2cs[i]); - pool->base.hw_i2cs[i] = NULL; - } - if (pool->base.sw_i2cs[i] != NULL) { - kfree(pool->base.sw_i2cs[i]); - pool->base.sw_i2cs[i] = NULL; - } - } - - for (i = 0; i < pool->base.stream_enc_count; i++) { - if (pool->base.stream_enc[i] != NULL) - kfree(DCE110STRENC_FROM_STRENC(pool->base.stream_enc[i])); - } - - for (i = 0; i < pool->base.clk_src_count; i++) { - if (pool->base.clock_sources[i] != NULL) { - dce80_clock_source_destroy(&pool->base.clock_sources[i]); - } - } - - if (pool->base.abm != NULL) - dce_abm_destroy(&pool->base.abm); - - if (pool->base.dmcu != NULL) - dce_dmcu_destroy(&pool->base.dmcu); - - if (pool->base.dp_clock_source != NULL) - dce80_clock_source_destroy(&pool->base.dp_clock_source); - - for (i = 0; i < pool->base.audio_count; i++) { - if (pool->base.audios[i] != NULL) { - dce_aud_destroy(&pool->base.audios[i]); - } - } - - if (pool->base.irqs != NULL) { - dal_irq_service_destroy(&pool->base.irqs); - } -} - -static bool dce80_validate_bandwidth( - struct dc *dc, - struct dc_state *context, - bool fast_validate) -{ - int i; - bool at_least_one_pipe = false; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - if (context->res_ctx.pipe_ctx[i].stream) - at_least_one_pipe = true; - } - - if (at_least_one_pipe) { - /* TODO implement when needed but for now hardcode max value*/ - context->bw_ctx.bw.dce.dispclk_khz = 681000; - context->bw_ctx.bw.dce.yclk_khz = 250000 * MEMORY_TYPE_MULTIPLIER_CZ; - } else { - context->bw_ctx.bw.dce.dispclk_khz = 0; - context->bw_ctx.bw.dce.yclk_khz = 0; - } - - return true; -} - -static bool dce80_validate_surface_sets( - struct dc_state *context) -{ - int i; - - for (i = 0; i < context->stream_count; i++) { - if (context->stream_status[i].plane_count == 0) - continue; - - if (context->stream_status[i].plane_count > 1) - return false; - - if (context->stream_status[i].plane_states[0]->format - >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) - return false; - } - - return true; -} - -static enum dc_status dce80_validate_global( - struct dc *dc, - struct dc_state *context) -{ - if (!dce80_validate_surface_sets(context)) - return DC_FAIL_SURFACE_VALIDATE; - - return DC_OK; -} - -static void dce80_destroy_resource_pool(struct resource_pool **pool) -{ - struct dce110_resource_pool *dce110_pool = TO_DCE110_RES_POOL(*pool); - - dce80_resource_destruct(dce110_pool); - kfree(dce110_pool); - *pool = NULL; -} - -static const struct resource_funcs dce80_res_pool_funcs = { - .destroy = dce80_destroy_resource_pool, - .link_enc_create = dce80_link_encoder_create, - .panel_cntl_create = dce80_panel_cntl_create, - .validate_bandwidth = dce80_validate_bandwidth, - .validate_plane = dce100_validate_plane, - .add_stream_to_ctx = dce100_add_stream_to_ctx, - .validate_global = dce80_validate_global, - .find_first_free_match_stream_enc_for_link = dce100_find_first_free_match_stream_enc_for_link -}; - -static bool dce80_construct( - uint8_t num_virtual_links, - struct dc *dc, - struct dce110_resource_pool *pool) -{ - unsigned int i; - struct dc_context *ctx = dc->ctx; - struct dc_bios *bp; - - ctx->dc_bios->regs = &bios_regs; - - pool->base.res_cap = &res_cap; - pool->base.funcs = &dce80_res_pool_funcs; - - - /************************************************* - * Resource + asic cap harcoding * - *************************************************/ - pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE; - pool->base.pipe_count = res_cap.num_timing_generator; - pool->base.timing_generator_count = res_cap.num_timing_generator; - dc->caps.max_downscale_ratio = 200; - dc->caps.i2c_speed_in_khz = 40; - dc->caps.i2c_speed_in_khz_hdcp = 40; - dc->caps.max_cursor_size = 128; - dc->caps.min_horizontal_blanking_period = 80; - dc->caps.dual_link_dvi = true; - dc->caps.extended_aux_timeout_support = false; - dc->debug = debug_defaults; - - /************************************************* - * Create resources * - *************************************************/ - - bp = ctx->dc_bios; - - if (bp->fw_info_valid && bp->fw_info.external_clock_source_frequency_for_dp != 0) { - pool->base.dp_clock_source = - dce80_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_EXTERNAL, NULL, true); - - pool->base.clock_sources[0] = - dce80_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL0, &clk_src_regs[0], false); - pool->base.clock_sources[1] = - dce80_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL1, &clk_src_regs[1], false); - pool->base.clock_sources[2] = - dce80_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL2, &clk_src_regs[2], false); - pool->base.clk_src_count = 3; - - } else { - pool->base.dp_clock_source = - dce80_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL0, &clk_src_regs[0], true); - - pool->base.clock_sources[0] = - dce80_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL1, &clk_src_regs[1], false); - pool->base.clock_sources[1] = - dce80_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL2, &clk_src_regs[2], false); - pool->base.clk_src_count = 2; - } - - if (pool->base.dp_clock_source == NULL) { - dm_error("DC: failed to create dp clock source!\n"); - BREAK_TO_DEBUGGER(); - goto res_create_fail; - } - - for (i = 0; i < pool->base.clk_src_count; i++) { - if (pool->base.clock_sources[i] == NULL) { - dm_error("DC: failed to create clock sources!\n"); - BREAK_TO_DEBUGGER(); - goto res_create_fail; - } - } - - pool->base.dmcu = dce_dmcu_create(ctx, - &dmcu_regs, - &dmcu_shift, - &dmcu_mask); - if (pool->base.dmcu == NULL) { - dm_error("DC: failed to create dmcu!\n"); - BREAK_TO_DEBUGGER(); - goto res_create_fail; - } - - pool->base.abm = dce_abm_create(ctx, - &abm_regs, - &abm_shift, - &abm_mask); - if (pool->base.abm == NULL) { - dm_error("DC: failed to create abm!\n"); - BREAK_TO_DEBUGGER(); - goto res_create_fail; - } - - { - struct irq_service_init_data init_data; - init_data.ctx = dc->ctx; - pool->base.irqs = dal_irq_service_dce80_create(&init_data); - if (!pool->base.irqs) - goto res_create_fail; - } - - for (i = 0; i < pool->base.pipe_count; i++) { - pool->base.timing_generators[i] = dce80_timing_generator_create( - ctx, i, &dce80_tg_offsets[i]); - if (pool->base.timing_generators[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create tg!\n"); - goto res_create_fail; - } - - pool->base.mis[i] = dce80_mem_input_create(ctx, i); - if (pool->base.mis[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create memory input!\n"); - goto res_create_fail; - } - - pool->base.ipps[i] = dce80_ipp_create(ctx, i); - if (pool->base.ipps[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create input pixel processor!\n"); - goto res_create_fail; - } - - pool->base.transforms[i] = dce80_transform_create(ctx, i); - if (pool->base.transforms[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create transform!\n"); - goto res_create_fail; - } - - pool->base.opps[i] = dce80_opp_create(ctx, i); - if (pool->base.opps[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create output pixel processor!\n"); - goto res_create_fail; - } - } - - for (i = 0; i < pool->base.res_cap->num_ddc; i++) { - pool->base.engines[i] = dce80_aux_engine_create(ctx, i); - if (pool->base.engines[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC:failed to create aux engine!!\n"); - goto res_create_fail; - } - pool->base.hw_i2cs[i] = dce80_i2c_hw_create(ctx, i); - if (pool->base.hw_i2cs[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC:failed to create i2c engine!!\n"); - goto res_create_fail; - } - pool->base.sw_i2cs[i] = dce80_i2c_sw_create(ctx); - if (pool->base.sw_i2cs[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC:failed to create sw i2c!!\n"); - goto res_create_fail; - } - } - - dc->caps.max_planes = pool->base.pipe_count; - - for (i = 0; i < dc->caps.max_planes; ++i) - dc->caps.planes[i] = plane_cap; - - dc->caps.disable_dp_clk_share = true; - - if (!resource_construct(num_virtual_links, dc, &pool->base, - &res_create_funcs)) - goto res_create_fail; - - /* Create hardware sequencer */ - dce80_hw_sequencer_construct(dc); - - return true; - -res_create_fail: - dce80_resource_destruct(pool); - return false; -} - -struct resource_pool *dce80_create_resource_pool( - uint8_t num_virtual_links, - struct dc *dc) -{ - struct dce110_resource_pool *pool = - kzalloc(sizeof(struct dce110_resource_pool), GFP_KERNEL); - - if (!pool) - return NULL; - - if (dce80_construct(num_virtual_links, dc, pool)) - return &pool->base; - - kfree(pool); - BREAK_TO_DEBUGGER(); - return NULL; -} - -static bool dce81_construct( - uint8_t num_virtual_links, - struct dc *dc, - struct dce110_resource_pool *pool) -{ - unsigned int i; - struct dc_context *ctx = dc->ctx; - struct dc_bios *bp; - - ctx->dc_bios->regs = &bios_regs; - - pool->base.res_cap = &res_cap_81; - pool->base.funcs = &dce80_res_pool_funcs; - - - /************************************************* - * Resource + asic cap harcoding * - *************************************************/ - pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE; - pool->base.pipe_count = res_cap_81.num_timing_generator; - pool->base.timing_generator_count = res_cap_81.num_timing_generator; - dc->caps.max_downscale_ratio = 200; - dc->caps.i2c_speed_in_khz = 40; - dc->caps.i2c_speed_in_khz_hdcp = 40; - dc->caps.max_cursor_size = 128; - dc->caps.min_horizontal_blanking_period = 80; - dc->caps.is_apu = true; - - /************************************************* - * Create resources * - *************************************************/ - - bp = ctx->dc_bios; - - if (bp->fw_info_valid && bp->fw_info.external_clock_source_frequency_for_dp != 0) { - pool->base.dp_clock_source = - dce80_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_EXTERNAL, NULL, true); - - pool->base.clock_sources[0] = - dce80_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL0, &clk_src_regs[0], false); - pool->base.clock_sources[1] = - dce80_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL1, &clk_src_regs[1], false); - pool->base.clock_sources[2] = - dce80_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL2, &clk_src_regs[2], false); - pool->base.clk_src_count = 3; - - } else { - pool->base.dp_clock_source = - dce80_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL0, &clk_src_regs[0], true); - - pool->base.clock_sources[0] = - dce80_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL1, &clk_src_regs[1], false); - pool->base.clock_sources[1] = - dce80_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL2, &clk_src_regs[2], false); - pool->base.clk_src_count = 2; - } - - if (pool->base.dp_clock_source == NULL) { - dm_error("DC: failed to create dp clock source!\n"); - BREAK_TO_DEBUGGER(); - goto res_create_fail; - } - - for (i = 0; i < pool->base.clk_src_count; i++) { - if (pool->base.clock_sources[i] == NULL) { - dm_error("DC: failed to create clock sources!\n"); - BREAK_TO_DEBUGGER(); - goto res_create_fail; - } - } - - pool->base.dmcu = dce_dmcu_create(ctx, - &dmcu_regs, - &dmcu_shift, - &dmcu_mask); - if (pool->base.dmcu == NULL) { - dm_error("DC: failed to create dmcu!\n"); - BREAK_TO_DEBUGGER(); - goto res_create_fail; - } - - pool->base.abm = dce_abm_create(ctx, - &abm_regs, - &abm_shift, - &abm_mask); - if (pool->base.abm == NULL) { - dm_error("DC: failed to create abm!\n"); - BREAK_TO_DEBUGGER(); - goto res_create_fail; - } - - { - struct irq_service_init_data init_data; - init_data.ctx = dc->ctx; - pool->base.irqs = dal_irq_service_dce80_create(&init_data); - if (!pool->base.irqs) - goto res_create_fail; - } - - for (i = 0; i < pool->base.pipe_count; i++) { - pool->base.timing_generators[i] = dce80_timing_generator_create( - ctx, i, &dce80_tg_offsets[i]); - if (pool->base.timing_generators[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create tg!\n"); - goto res_create_fail; - } - - pool->base.mis[i] = dce80_mem_input_create(ctx, i); - if (pool->base.mis[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create memory input!\n"); - goto res_create_fail; - } - - pool->base.ipps[i] = dce80_ipp_create(ctx, i); - if (pool->base.ipps[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create input pixel processor!\n"); - goto res_create_fail; - } - - pool->base.transforms[i] = dce80_transform_create(ctx, i); - if (pool->base.transforms[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create transform!\n"); - goto res_create_fail; - } - - pool->base.opps[i] = dce80_opp_create(ctx, i); - if (pool->base.opps[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create output pixel processor!\n"); - goto res_create_fail; - } - } - - for (i = 0; i < pool->base.res_cap->num_ddc; i++) { - pool->base.engines[i] = dce80_aux_engine_create(ctx, i); - if (pool->base.engines[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC:failed to create aux engine!!\n"); - goto res_create_fail; - } - pool->base.hw_i2cs[i] = dce80_i2c_hw_create(ctx, i); - if (pool->base.hw_i2cs[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC:failed to create i2c engine!!\n"); - goto res_create_fail; - } - pool->base.sw_i2cs[i] = dce80_i2c_sw_create(ctx); - if (pool->base.sw_i2cs[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC:failed to create sw i2c!!\n"); - goto res_create_fail; - } - } - - dc->caps.max_planes = pool->base.pipe_count; - - for (i = 0; i < dc->caps.max_planes; ++i) - dc->caps.planes[i] = plane_cap; - - dc->caps.disable_dp_clk_share = true; - - if (!resource_construct(num_virtual_links, dc, &pool->base, - &res_create_funcs)) - goto res_create_fail; - - /* Create hardware sequencer */ - dce80_hw_sequencer_construct(dc); - - return true; - -res_create_fail: - dce80_resource_destruct(pool); - return false; -} - -struct resource_pool *dce81_create_resource_pool( - uint8_t num_virtual_links, - struct dc *dc) -{ - struct dce110_resource_pool *pool = - kzalloc(sizeof(struct dce110_resource_pool), GFP_KERNEL); - - if (!pool) - return NULL; - - if (dce81_construct(num_virtual_links, dc, pool)) - return &pool->base; - - kfree(pool); - BREAK_TO_DEBUGGER(); - return NULL; -} - -static bool dce83_construct( - uint8_t num_virtual_links, - struct dc *dc, - struct dce110_resource_pool *pool) -{ - unsigned int i; - struct dc_context *ctx = dc->ctx; - struct dc_bios *bp; - - ctx->dc_bios->regs = &bios_regs; - - pool->base.res_cap = &res_cap_83; - pool->base.funcs = &dce80_res_pool_funcs; - - - /************************************************* - * Resource + asic cap harcoding * - *************************************************/ - pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE; - pool->base.pipe_count = res_cap_83.num_timing_generator; - pool->base.timing_generator_count = res_cap_83.num_timing_generator; - dc->caps.max_downscale_ratio = 200; - dc->caps.i2c_speed_in_khz = 40; - dc->caps.i2c_speed_in_khz_hdcp = 40; - dc->caps.max_cursor_size = 128; - dc->caps.min_horizontal_blanking_period = 80; - dc->caps.is_apu = true; - dc->debug = debug_defaults; - - /************************************************* - * Create resources * - *************************************************/ - - bp = ctx->dc_bios; - - if (bp->fw_info_valid && bp->fw_info.external_clock_source_frequency_for_dp != 0) { - pool->base.dp_clock_source = - dce80_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_EXTERNAL, NULL, true); - - pool->base.clock_sources[0] = - dce80_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL1, &clk_src_regs[0], false); - pool->base.clock_sources[1] = - dce80_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL2, &clk_src_regs[1], false); - pool->base.clk_src_count = 2; - - } else { - pool->base.dp_clock_source = - dce80_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL1, &clk_src_regs[0], true); - - pool->base.clock_sources[0] = - dce80_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL2, &clk_src_regs[1], false); - pool->base.clk_src_count = 1; - } - - if (pool->base.dp_clock_source == NULL) { - dm_error("DC: failed to create dp clock source!\n"); - BREAK_TO_DEBUGGER(); - goto res_create_fail; - } - - for (i = 0; i < pool->base.clk_src_count; i++) { - if (pool->base.clock_sources[i] == NULL) { - dm_error("DC: failed to create clock sources!\n"); - BREAK_TO_DEBUGGER(); - goto res_create_fail; - } - } - - pool->base.dmcu = dce_dmcu_create(ctx, - &dmcu_regs, - &dmcu_shift, - &dmcu_mask); - if (pool->base.dmcu == NULL) { - dm_error("DC: failed to create dmcu!\n"); - BREAK_TO_DEBUGGER(); - goto res_create_fail; - } - - pool->base.abm = dce_abm_create(ctx, - &abm_regs, - &abm_shift, - &abm_mask); - if (pool->base.abm == NULL) { - dm_error("DC: failed to create abm!\n"); - BREAK_TO_DEBUGGER(); - goto res_create_fail; - } - - { - struct irq_service_init_data init_data; - init_data.ctx = dc->ctx; - pool->base.irqs = dal_irq_service_dce80_create(&init_data); - if (!pool->base.irqs) - goto res_create_fail; - } - - for (i = 0; i < pool->base.pipe_count; i++) { - pool->base.timing_generators[i] = dce80_timing_generator_create( - ctx, i, &dce80_tg_offsets[i]); - if (pool->base.timing_generators[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create tg!\n"); - goto res_create_fail; - } - - pool->base.mis[i] = dce80_mem_input_create(ctx, i); - if (pool->base.mis[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create memory input!\n"); - goto res_create_fail; - } - - pool->base.ipps[i] = dce80_ipp_create(ctx, i); - if (pool->base.ipps[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create input pixel processor!\n"); - goto res_create_fail; - } - - pool->base.transforms[i] = dce80_transform_create(ctx, i); - if (pool->base.transforms[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create transform!\n"); - goto res_create_fail; - } - - pool->base.opps[i] = dce80_opp_create(ctx, i); - if (pool->base.opps[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create output pixel processor!\n"); - goto res_create_fail; - } - } - - for (i = 0; i < pool->base.res_cap->num_ddc; i++) { - pool->base.engines[i] = dce80_aux_engine_create(ctx, i); - if (pool->base.engines[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC:failed to create aux engine!!\n"); - goto res_create_fail; - } - pool->base.hw_i2cs[i] = dce80_i2c_hw_create(ctx, i); - if (pool->base.hw_i2cs[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC:failed to create i2c engine!!\n"); - goto res_create_fail; - } - pool->base.sw_i2cs[i] = dce80_i2c_sw_create(ctx); - if (pool->base.sw_i2cs[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC:failed to create sw i2c!!\n"); - goto res_create_fail; - } - } - - dc->caps.max_planes = pool->base.pipe_count; - - for (i = 0; i < dc->caps.max_planes; ++i) - dc->caps.planes[i] = plane_cap; - - dc->caps.disable_dp_clk_share = true; - - if (!resource_construct(num_virtual_links, dc, &pool->base, - &res_create_funcs)) - goto res_create_fail; - - /* Create hardware sequencer */ - dce80_hw_sequencer_construct(dc); - - return true; - -res_create_fail: - dce80_resource_destruct(pool); - return false; -} - -struct resource_pool *dce83_create_resource_pool( - uint8_t num_virtual_links, - struct dc *dc) -{ - struct dce110_resource_pool *pool = - kzalloc(sizeof(struct dce110_resource_pool), GFP_KERNEL); - - if (!pool) - return NULL; - - if (dce83_construct(num_virtual_links, dc, pool)) - return &pool->base; - - BREAK_TO_DEBUGGER(); - return NULL; -} diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.h b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.h deleted file mode 100644 index eff31ab83a39..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.h +++ /dev/null @@ -1,47 +0,0 @@ -/* -* Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DC_RESOURCE_DCE80_H__ -#define __DC_RESOURCE_DCE80_H__ - -#include "core_types.h" - -struct dc; -struct resource_pool; - -struct resource_pool *dce80_create_resource_pool( - uint8_t num_virtual_links, - struct dc *dc); - -struct resource_pool *dce81_create_resource_pool( - uint8_t num_virtual_links, - struct dc *dc); - -struct resource_pool *dce83_create_resource_pool( - uint8_t num_virtual_links, - struct dc *dc); - -#endif /* __DC_RESOURCE_DCE80_H__ */ - diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile index 2d2007c3e2b6..1eb7418ced3a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile @@ -22,7 +22,7 @@ # # Makefile for DCN. -DCN10 = dcn10_init.o dcn10_resource.o dcn10_ipp.o \ +DCN10 = dcn10_init.o dcn10_ipp.o \ dcn10_hw_sequencer_debug.o \ dcn10_dpp.o dcn10_opp.o dcn10_optc.o \ dcn10_hubp.o dcn10_mpc.o \ diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c deleted file mode 100644 index b94c5c97eee7..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ /dev/null @@ -1,1686 +0,0 @@ -/* -* Copyright 2016 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dm_services.h" -#include "dc.h" - -#include "dcn10_init.h" - -#include "resource.h" -#include "include/irq_service_interface.h" -#include "dcn10_resource.h" -#include "dcn10_ipp.h" -#include "dcn10_mpc.h" -#include "irq/dcn10/irq_service_dcn10.h" -#include "dcn10_dpp.h" -#include "dcn10_optc.h" -#include "dcn10/dcn10_hwseq.h" -#include "dce110/dce110_hwseq.h" -#include "dcn10_opp.h" -#include "dcn10_link_encoder.h" -#include "dcn10_stream_encoder.h" -#include "dce/dce_clock_source.h" -#include "dce/dce_audio.h" -#include "dce/dce_hwseq.h" -#include "virtual/virtual_stream_encoder.h" -#include "dce110/dce110_resource.h" -#include "dce112/dce112_resource.h" -#include "dcn10_hubp.h" -#include "dcn10_hubbub.h" -#include "dce/dce_panel_cntl.h" - -#include "soc15_hw_ip.h" -#include "vega10_ip_offset.h" - -#include "dcn/dcn_1_0_offset.h" -#include "dcn/dcn_1_0_sh_mask.h" - -#include "nbio/nbio_7_0_offset.h" - -#include "mmhub/mmhub_9_1_offset.h" -#include "mmhub/mmhub_9_1_sh_mask.h" - -#include "reg_helper.h" -#include "dce/dce_abm.h" -#include "dce/dce_dmcu.h" -#include "dce/dce_aux.h" -#include "dce/dce_i2c.h" - -#ifndef mmDP0_DP_DPHY_INTERNAL_CTRL - #define mmDP0_DP_DPHY_INTERNAL_CTRL 0x210f - #define mmDP0_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2 - #define mmDP1_DP_DPHY_INTERNAL_CTRL 0x220f - #define mmDP1_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2 - #define mmDP2_DP_DPHY_INTERNAL_CTRL 0x230f - #define mmDP2_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2 - #define mmDP3_DP_DPHY_INTERNAL_CTRL 0x240f - #define mmDP3_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2 - #define mmDP4_DP_DPHY_INTERNAL_CTRL 0x250f - #define mmDP4_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2 - #define mmDP5_DP_DPHY_INTERNAL_CTRL 0x260f - #define mmDP5_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2 - #define mmDP6_DP_DPHY_INTERNAL_CTRL 0x270f - #define mmDP6_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2 -#endif - - -enum dcn10_clk_src_array_id { - DCN10_CLK_SRC_PLL0, - DCN10_CLK_SRC_PLL1, - DCN10_CLK_SRC_PLL2, - DCN10_CLK_SRC_PLL3, - DCN10_CLK_SRC_TOTAL, - DCN101_CLK_SRC_TOTAL = DCN10_CLK_SRC_PLL3 -}; - -/* begin ********************* - * macros to expend register list macro defined in HW object header file */ - -/* DCN */ -#define BASE_INNER(seg) \ - DCE_BASE__INST0_SEG ## seg - -#define BASE(seg) \ - BASE_INNER(seg) - -#define SR(reg_name)\ - .reg_name = BASE(mm ## reg_name ## _BASE_IDX) + \ - mm ## reg_name - -#define SRI(reg_name, block, id)\ - .reg_name = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - mm ## block ## id ## _ ## reg_name - - -#define SRII(reg_name, block, id)\ - .reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - mm ## block ## id ## _ ## reg_name - -#define VUPDATE_SRII(reg_name, block, id)\ - .reg_name[id] = BASE(mm ## reg_name ## 0 ## _ ## block ## id ## _BASE_IDX) + \ - mm ## reg_name ## 0 ## _ ## block ## id - -/* set field/register/bitfield name */ -#define SFRB(field_name, reg_name, bitfield, post_fix)\ - .field_name = reg_name ## __ ## bitfield ## post_fix - -/* NBIO */ -#define NBIO_BASE_INNER(seg) \ - NBIF_BASE__INST0_SEG ## seg - -#define NBIO_BASE(seg) \ - NBIO_BASE_INNER(seg) - -#define NBIO_SR(reg_name)\ - .reg_name = NBIO_BASE(mm ## reg_name ## _BASE_IDX) + \ - mm ## reg_name - -/* MMHUB */ -#define MMHUB_BASE_INNER(seg) \ - MMHUB_BASE__INST0_SEG ## seg - -#define MMHUB_BASE(seg) \ - MMHUB_BASE_INNER(seg) - -#define MMHUB_SR(reg_name)\ - .reg_name = MMHUB_BASE(mm ## reg_name ## _BASE_IDX) + \ - mm ## reg_name - -/* macros to expend register list macro defined in HW object header file - * end *********************/ - - -static const struct dce_dmcu_registers dmcu_regs = { - DMCU_DCN10_REG_LIST() -}; - -static const struct dce_dmcu_shift dmcu_shift = { - DMCU_MASK_SH_LIST_DCN10(__SHIFT) -}; - -static const struct dce_dmcu_mask dmcu_mask = { - DMCU_MASK_SH_LIST_DCN10(_MASK) -}; - -static const struct dce_abm_registers abm_regs = { - ABM_DCN10_REG_LIST(0) -}; - -static const struct dce_abm_shift abm_shift = { - ABM_MASK_SH_LIST_DCN10(__SHIFT) -}; - -static const struct dce_abm_mask abm_mask = { - ABM_MASK_SH_LIST_DCN10(_MASK) -}; - -#define stream_enc_regs(id)\ -[id] = {\ - SE_DCN_REG_LIST(id)\ -} - -static const struct dcn10_stream_enc_registers stream_enc_regs[] = { - stream_enc_regs(0), - stream_enc_regs(1), - stream_enc_regs(2), - stream_enc_regs(3), -}; - -static const struct dcn10_stream_encoder_shift se_shift = { - SE_COMMON_MASK_SH_LIST_DCN10(__SHIFT) -}; - -static const struct dcn10_stream_encoder_mask se_mask = { - SE_COMMON_MASK_SH_LIST_DCN10(_MASK) -}; - -#define audio_regs(id)\ -[id] = {\ - AUD_COMMON_REG_LIST(id)\ -} - -static const struct dce_audio_registers audio_regs[] = { - audio_regs(0), - audio_regs(1), - audio_regs(2), - audio_regs(3), -}; - -#define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\ - SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_INDEX, AZALIA_ENDPOINT_REG_INDEX, mask_sh),\ - SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_DATA, AZALIA_ENDPOINT_REG_DATA, mask_sh),\ - AUD_COMMON_MASK_SH_LIST_BASE(mask_sh) - -static const struct dce_audio_shift audio_shift = { - DCE120_AUD_COMMON_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_audio_mask audio_mask = { - DCE120_AUD_COMMON_MASK_SH_LIST(_MASK) -}; - -#define aux_regs(id)\ -[id] = {\ - AUX_REG_LIST(id)\ -} - -static const struct dcn10_link_enc_aux_registers link_enc_aux_regs[] = { - aux_regs(0), - aux_regs(1), - aux_regs(2), - aux_regs(3) -}; - -#define hpd_regs(id)\ -[id] = {\ - HPD_REG_LIST(id)\ -} - -static const struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[] = { - hpd_regs(0), - hpd_regs(1), - hpd_regs(2), - hpd_regs(3) -}; - -#define link_regs(id)\ -[id] = {\ - LE_DCN10_REG_LIST(id), \ - SRI(DP_DPHY_INTERNAL_CTRL, DP, id) \ -} - -static const struct dcn10_link_enc_registers link_enc_regs[] = { - link_regs(0), - link_regs(1), - link_regs(2), - link_regs(3) -}; - -static const struct dcn10_link_enc_shift le_shift = { - LINK_ENCODER_MASK_SH_LIST_DCN10(__SHIFT) -}; - -static const struct dcn10_link_enc_mask le_mask = { - LINK_ENCODER_MASK_SH_LIST_DCN10(_MASK) -}; - -static const struct dce_panel_cntl_registers panel_cntl_regs[] = { - { DCN_PANEL_CNTL_REG_LIST() } -}; - -static const struct dce_panel_cntl_shift panel_cntl_shift = { - DCE_PANEL_CNTL_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_panel_cntl_mask panel_cntl_mask = { - DCE_PANEL_CNTL_MASK_SH_LIST(_MASK) -}; - -static const struct dce110_aux_registers_shift aux_shift = { - DCN10_AUX_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce110_aux_registers_mask aux_mask = { - DCN10_AUX_MASK_SH_LIST(_MASK) -}; - -#define ipp_regs(id)\ -[id] = {\ - IPP_REG_LIST_DCN10(id),\ -} - -static const struct dcn10_ipp_registers ipp_regs[] = { - ipp_regs(0), - ipp_regs(1), - ipp_regs(2), - ipp_regs(3), -}; - -static const struct dcn10_ipp_shift ipp_shift = { - IPP_MASK_SH_LIST_DCN10(__SHIFT) -}; - -static const struct dcn10_ipp_mask ipp_mask = { - IPP_MASK_SH_LIST_DCN10(_MASK), -}; - -#define opp_regs(id)\ -[id] = {\ - OPP_REG_LIST_DCN10(id),\ -} - -static const struct dcn10_opp_registers opp_regs[] = { - opp_regs(0), - opp_regs(1), - opp_regs(2), - opp_regs(3), -}; - -static const struct dcn10_opp_shift opp_shift = { - OPP_MASK_SH_LIST_DCN10(__SHIFT) -}; - -static const struct dcn10_opp_mask opp_mask = { - OPP_MASK_SH_LIST_DCN10(_MASK), -}; - -#define aux_engine_regs(id)\ -[id] = {\ - AUX_COMMON_REG_LIST(id), \ - .AUX_RESET_MASK = 0 \ -} - -static const struct dce110_aux_registers aux_engine_regs[] = { - aux_engine_regs(0), - aux_engine_regs(1), - aux_engine_regs(2), - aux_engine_regs(3), - aux_engine_regs(4), - aux_engine_regs(5) -}; - -#define tf_regs(id)\ -[id] = {\ - TF_REG_LIST_DCN10(id),\ -} - -static const struct dcn_dpp_registers tf_regs[] = { - tf_regs(0), - tf_regs(1), - tf_regs(2), - tf_regs(3), -}; - -static const struct dcn_dpp_shift tf_shift = { - TF_REG_LIST_SH_MASK_DCN10(__SHIFT), - TF_DEBUG_REG_LIST_SH_DCN10 - -}; - -static const struct dcn_dpp_mask tf_mask = { - TF_REG_LIST_SH_MASK_DCN10(_MASK), - TF_DEBUG_REG_LIST_MASK_DCN10 -}; - -static const struct dcn_mpc_registers mpc_regs = { - MPC_COMMON_REG_LIST_DCN1_0(0), - MPC_COMMON_REG_LIST_DCN1_0(1), - MPC_COMMON_REG_LIST_DCN1_0(2), - MPC_COMMON_REG_LIST_DCN1_0(3), - MPC_OUT_MUX_COMMON_REG_LIST_DCN1_0(0), - MPC_OUT_MUX_COMMON_REG_LIST_DCN1_0(1), - MPC_OUT_MUX_COMMON_REG_LIST_DCN1_0(2), - MPC_OUT_MUX_COMMON_REG_LIST_DCN1_0(3) -}; - -static const struct dcn_mpc_shift mpc_shift = { - MPC_COMMON_MASK_SH_LIST_DCN1_0(__SHIFT),\ - SFRB(CUR_VUPDATE_LOCK_SET, CUR0_VUPDATE_LOCK_SET0, CUR0_VUPDATE_LOCK_SET, __SHIFT) -}; - -static const struct dcn_mpc_mask mpc_mask = { - MPC_COMMON_MASK_SH_LIST_DCN1_0(_MASK),\ - SFRB(CUR_VUPDATE_LOCK_SET, CUR0_VUPDATE_LOCK_SET0, CUR0_VUPDATE_LOCK_SET, _MASK) -}; - -#define tg_regs(id)\ -[id] = {TG_COMMON_REG_LIST_DCN1_0(id)} - -static const struct dcn_optc_registers tg_regs[] = { - tg_regs(0), - tg_regs(1), - tg_regs(2), - tg_regs(3), -}; - -static const struct dcn_optc_shift tg_shift = { - TG_COMMON_MASK_SH_LIST_DCN1_0(__SHIFT) -}; - -static const struct dcn_optc_mask tg_mask = { - TG_COMMON_MASK_SH_LIST_DCN1_0(_MASK) -}; - -static const struct bios_registers bios_regs = { - NBIO_SR(BIOS_SCRATCH_3), - NBIO_SR(BIOS_SCRATCH_6) -}; - -#define hubp_regs(id)\ -[id] = {\ - HUBP_REG_LIST_DCN10(id)\ -} - -static const struct dcn_mi_registers hubp_regs[] = { - hubp_regs(0), - hubp_regs(1), - hubp_regs(2), - hubp_regs(3), -}; - -static const struct dcn_mi_shift hubp_shift = { - HUBP_MASK_SH_LIST_DCN10(__SHIFT) -}; - -static const struct dcn_mi_mask hubp_mask = { - HUBP_MASK_SH_LIST_DCN10(_MASK) -}; - -static const struct dcn_hubbub_registers hubbub_reg = { - HUBBUB_REG_LIST_DCN10(0) -}; - -static const struct dcn_hubbub_shift hubbub_shift = { - HUBBUB_MASK_SH_LIST_DCN10(__SHIFT) -}; - -static const struct dcn_hubbub_mask hubbub_mask = { - HUBBUB_MASK_SH_LIST_DCN10(_MASK) -}; - -static int map_transmitter_id_to_phy_instance( - enum transmitter transmitter) -{ - switch (transmitter) { - case TRANSMITTER_UNIPHY_A: - return 0; - break; - case TRANSMITTER_UNIPHY_B: - return 1; - break; - case TRANSMITTER_UNIPHY_C: - return 2; - break; - case TRANSMITTER_UNIPHY_D: - return 3; - break; - default: - ASSERT(0); - return 0; - } -} - -#define clk_src_regs(index, pllid)\ -[index] = {\ - CS_COMMON_REG_LIST_DCN1_0(index, pllid),\ -} - -static const struct dce110_clk_src_regs clk_src_regs[] = { - clk_src_regs(0, A), - clk_src_regs(1, B), - clk_src_regs(2, C), - clk_src_regs(3, D) -}; - -static const struct dce110_clk_src_shift cs_shift = { - CS_COMMON_MASK_SH_LIST_DCN1_0(__SHIFT) -}; - -static const struct dce110_clk_src_mask cs_mask = { - CS_COMMON_MASK_SH_LIST_DCN1_0(_MASK) -}; - -static const struct resource_caps res_cap = { - .num_timing_generator = 4, - .num_opp = 4, - .num_video_plane = 4, - .num_audio = 4, - .num_stream_encoder = 4, - .num_pll = 4, - .num_ddc = 4, -}; - -static const struct resource_caps rv2_res_cap = { - .num_timing_generator = 3, - .num_opp = 3, - .num_video_plane = 3, - .num_audio = 3, - .num_stream_encoder = 3, - .num_pll = 3, - .num_ddc = 4, -}; - -static const struct dc_plane_cap plane_cap = { - .type = DC_PLANE_TYPE_DCN_UNIVERSAL, - .per_pixel_alpha = true, - - .pixel_format_support = { - .argb8888 = true, - .nv12 = true, - .fp16 = true, - .p010 = true - }, - - .max_upscale_factor = { - .argb8888 = 16000, - .nv12 = 16000, - .fp16 = 1 - }, - - .max_downscale_factor = { - .argb8888 = 250, - .nv12 = 250, - .fp16 = 1 - } -}; - -static const struct dc_debug_options debug_defaults_drv = { - .sanity_checks = true, - .disable_dmcu = false, - .force_abm_enable = false, - .timing_trace = false, - .clock_trace = true, - - /* raven smu dones't allow 0 disp clk, - * smu min disp clk limit is 50Mhz - * keep min disp clk 100Mhz avoid smu hang - */ - .min_disp_clk_khz = 100000, - - .disable_pplib_clock_request = false, - .disable_pplib_wm_range = false, - .pplib_wm_report_mode = WM_REPORT_DEFAULT, - .pipe_split_policy = MPC_SPLIT_DYNAMIC, - .force_single_disp_pipe_split = true, - .disable_dcc = DCC_ENABLE, - .voltage_align_fclk = true, - .disable_stereo_support = true, - .vsr_support = true, - .performance_trace = false, - .az_endpoint_mute_only = true, - .recovery_enabled = false, /*enable this by default after testing.*/ - .max_downscale_src_width = 3840, - .underflow_assert_delay_us = 0xFFFFFFFF, - .enable_legacy_fast_update = true, - .using_dml2 = false, -}; - -static const struct dc_debug_options debug_defaults_diags = { - .disable_dmcu = false, - .force_abm_enable = false, - .timing_trace = true, - .clock_trace = true, - .disable_stutter = true, - .disable_pplib_clock_request = true, - .disable_pplib_wm_range = true, - .underflow_assert_delay_us = 0xFFFFFFFF, -}; - -static void dcn10_dpp_destroy(struct dpp **dpp) -{ - kfree(TO_DCN10_DPP(*dpp)); - *dpp = NULL; -} - -static struct dpp *dcn10_dpp_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dcn10_dpp *dpp = - kzalloc(sizeof(struct dcn10_dpp), GFP_KERNEL); - - if (!dpp) - return NULL; - - dpp1_construct(dpp, ctx, inst, - &tf_regs[inst], &tf_shift, &tf_mask); - return &dpp->base; -} - -static struct input_pixel_processor *dcn10_ipp_create( - struct dc_context *ctx, uint32_t inst) -{ - struct dcn10_ipp *ipp = - kzalloc(sizeof(struct dcn10_ipp), GFP_KERNEL); - - if (!ipp) { - BREAK_TO_DEBUGGER(); - return NULL; - } - - dcn10_ipp_construct(ipp, ctx, inst, - &ipp_regs[inst], &ipp_shift, &ipp_mask); - return &ipp->base; -} - - -static struct output_pixel_processor *dcn10_opp_create( - struct dc_context *ctx, uint32_t inst) -{ - struct dcn10_opp *opp = - kzalloc(sizeof(struct dcn10_opp), GFP_KERNEL); - - if (!opp) { - BREAK_TO_DEBUGGER(); - return NULL; - } - - dcn10_opp_construct(opp, ctx, inst, - &opp_regs[inst], &opp_shift, &opp_mask); - return &opp->base; -} - -static struct dce_aux *dcn10_aux_engine_create(struct dc_context *ctx, - uint32_t inst) -{ - struct aux_engine_dce110 *aux_engine = - kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL); - - if (!aux_engine) - return NULL; - - dce110_aux_engine_construct(aux_engine, ctx, inst, - SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, - &aux_engine_regs[inst], - &aux_mask, - &aux_shift, - ctx->dc->caps.extended_aux_timeout_support); - - return &aux_engine->base; -} -#define i2c_inst_regs(id) { I2C_HW_ENGINE_COMMON_REG_LIST(id) } - -static const struct dce_i2c_registers i2c_hw_regs[] = { - i2c_inst_regs(1), - i2c_inst_regs(2), - i2c_inst_regs(3), - i2c_inst_regs(4), - i2c_inst_regs(5), - i2c_inst_regs(6), -}; - -static const struct dce_i2c_shift i2c_shifts = { - I2C_COMMON_MASK_SH_LIST_DCE110(__SHIFT) -}; - -static const struct dce_i2c_mask i2c_masks = { - I2C_COMMON_MASK_SH_LIST_DCE110(_MASK) -}; - -static struct dce_i2c_hw *dcn10_i2c_hw_create(struct dc_context *ctx, - uint32_t inst) -{ - struct dce_i2c_hw *dce_i2c_hw = - kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL); - - if (!dce_i2c_hw) - return NULL; - - dcn1_i2c_hw_construct(dce_i2c_hw, ctx, inst, - &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks); - - return dce_i2c_hw; -} -static struct mpc *dcn10_mpc_create(struct dc_context *ctx) -{ - struct dcn10_mpc *mpc10 = kzalloc(sizeof(struct dcn10_mpc), - GFP_KERNEL); - - if (!mpc10) - return NULL; - - dcn10_mpc_construct(mpc10, ctx, - &mpc_regs, - &mpc_shift, - &mpc_mask, - 4); - - return &mpc10->base; -} - -static struct hubbub *dcn10_hubbub_create(struct dc_context *ctx) -{ - struct dcn10_hubbub *dcn10_hubbub = kzalloc(sizeof(struct dcn10_hubbub), - GFP_KERNEL); - - if (!dcn10_hubbub) - return NULL; - - hubbub1_construct(&dcn10_hubbub->base, ctx, - &hubbub_reg, - &hubbub_shift, - &hubbub_mask); - - return &dcn10_hubbub->base; -} - -static struct timing_generator *dcn10_timing_generator_create( - struct dc_context *ctx, - uint32_t instance) -{ - struct optc *tgn10 = - kzalloc(sizeof(struct optc), GFP_KERNEL); - - if (!tgn10) - return NULL; - - tgn10->base.inst = instance; - tgn10->base.ctx = ctx; - - tgn10->tg_regs = &tg_regs[instance]; - tgn10->tg_shift = &tg_shift; - tgn10->tg_mask = &tg_mask; - - dcn10_timing_generator_init(tgn10); - - return &tgn10->base; -} - -static const struct encoder_feature_support link_enc_feature = { - .max_hdmi_deep_color = COLOR_DEPTH_121212, - .max_hdmi_pixel_clock = 600000, - .hdmi_ycbcr420_supported = true, - .dp_ycbcr420_supported = true, - .flags.bits.IS_HBR2_CAPABLE = true, - .flags.bits.IS_HBR3_CAPABLE = true, - .flags.bits.IS_TPS3_CAPABLE = true, - .flags.bits.IS_TPS4_CAPABLE = true -}; - -static struct link_encoder *dcn10_link_encoder_create( - struct dc_context *ctx, - const struct encoder_init_data *enc_init_data) -{ - struct dcn10_link_encoder *enc10 = - kzalloc(sizeof(struct dcn10_link_encoder), GFP_KERNEL); - int link_regs_id; - - if (!enc10) - return NULL; - - link_regs_id = - map_transmitter_id_to_phy_instance(enc_init_data->transmitter); - - dcn10_link_encoder_construct(enc10, - enc_init_data, - &link_enc_feature, - &link_enc_regs[link_regs_id], - &link_enc_aux_regs[enc_init_data->channel - 1], - &link_enc_hpd_regs[enc_init_data->hpd_source], - &le_shift, - &le_mask); - - return &enc10->base; -} - -static struct panel_cntl *dcn10_panel_cntl_create(const struct panel_cntl_init_data *init_data) -{ - struct dce_panel_cntl *panel_cntl = - kzalloc(sizeof(struct dce_panel_cntl), GFP_KERNEL); - - if (!panel_cntl) - return NULL; - - dce_panel_cntl_construct(panel_cntl, - init_data, - &panel_cntl_regs[init_data->inst], - &panel_cntl_shift, - &panel_cntl_mask); - - return &panel_cntl->base; -} - -static struct clock_source *dcn10_clock_source_create( - struct dc_context *ctx, - struct dc_bios *bios, - enum clock_source_id id, - const struct dce110_clk_src_regs *regs, - bool dp_clk_src) -{ - struct dce110_clk_src *clk_src = - kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL); - - if (!clk_src) - return NULL; - - if (dce112_clk_src_construct(clk_src, ctx, bios, id, - regs, &cs_shift, &cs_mask)) { - clk_src->base.dp_clk_src = dp_clk_src; - return &clk_src->base; - } - - kfree(clk_src); - BREAK_TO_DEBUGGER(); - return NULL; -} - -static void read_dce_straps( - struct dc_context *ctx, - struct resource_straps *straps) -{ - generic_reg_get(ctx, mmDC_PINSTRAPS + BASE(mmDC_PINSTRAPS_BASE_IDX), - FN(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO), &straps->dc_pinstraps_audio); -} - -static struct audio *create_audio( - struct dc_context *ctx, unsigned int inst) -{ - return dce_audio_create(ctx, inst, - &audio_regs[inst], &audio_shift, &audio_mask); -} - -static struct stream_encoder *dcn10_stream_encoder_create( - enum engine_id eng_id, - struct dc_context *ctx) -{ - struct dcn10_stream_encoder *enc1 = - kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL); - - if (!enc1) - return NULL; - - dcn10_stream_encoder_construct(enc1, ctx, ctx->dc_bios, eng_id, - &stream_enc_regs[eng_id], - &se_shift, &se_mask); - return &enc1->base; -} - -static const struct dce_hwseq_registers hwseq_reg = { - HWSEQ_DCN1_REG_LIST() -}; - -static const struct dce_hwseq_shift hwseq_shift = { - HWSEQ_DCN1_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_hwseq_mask hwseq_mask = { - HWSEQ_DCN1_MASK_SH_LIST(_MASK) -}; - -static struct dce_hwseq *dcn10_hwseq_create( - struct dc_context *ctx) -{ - struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL); - - if (hws) { - hws->ctx = ctx; - hws->regs = &hwseq_reg; - hws->shifts = &hwseq_shift; - hws->masks = &hwseq_mask; - hws->wa.DEGVIDCN10_253 = true; - hws->wa.false_optc_underflow = true; - hws->wa.DEGVIDCN10_254 = true; - - if ((ctx->asic_id.chip_family == FAMILY_RV) && - ASICREV_IS_RAVEN2(ctx->asic_id.hw_internal_rev)) - switch (ctx->asic_id.pci_revision_id) { - case PRID_POLLOCK_94: - case PRID_POLLOCK_95: - case PRID_POLLOCK_E9: - case PRID_POLLOCK_EA: - case PRID_POLLOCK_EB: - hws->wa.wait_hubpret_read_start_during_mpo_transition = true; - break; - default: - hws->wa.wait_hubpret_read_start_during_mpo_transition = false; - break; - } - } - return hws; -} - -static const struct resource_create_funcs res_create_funcs = { - .read_dce_straps = read_dce_straps, - .create_audio = create_audio, - .create_stream_encoder = dcn10_stream_encoder_create, - .create_hwseq = dcn10_hwseq_create, -}; - -static void dcn10_clock_source_destroy(struct clock_source **clk_src) -{ - kfree(TO_DCE110_CLK_SRC(*clk_src)); - *clk_src = NULL; -} - -static struct pp_smu_funcs *dcn10_pp_smu_create(struct dc_context *ctx) -{ - struct pp_smu_funcs *pp_smu = kzalloc(sizeof(*pp_smu), GFP_KERNEL); - - if (!pp_smu) - return pp_smu; - - dm_pp_get_funcs(ctx, pp_smu); - return pp_smu; -} - -static void dcn10_resource_destruct(struct dcn10_resource_pool *pool) -{ - unsigned int i; - - for (i = 0; i < pool->base.stream_enc_count; i++) { - if (pool->base.stream_enc[i] != NULL) { - kfree(DCN10STRENC_FROM_STRENC(pool->base.stream_enc[i])); - pool->base.stream_enc[i] = NULL; - } - } - - if (pool->base.mpc != NULL) { - kfree(TO_DCN10_MPC(pool->base.mpc)); - pool->base.mpc = NULL; - } - - kfree(pool->base.hubbub); - pool->base.hubbub = NULL; - - for (i = 0; i < pool->base.pipe_count; i++) { - if (pool->base.opps[i] != NULL) - pool->base.opps[i]->funcs->opp_destroy(&pool->base.opps[i]); - - if (pool->base.dpps[i] != NULL) - dcn10_dpp_destroy(&pool->base.dpps[i]); - - if (pool->base.ipps[i] != NULL) - pool->base.ipps[i]->funcs->ipp_destroy(&pool->base.ipps[i]); - - if (pool->base.hubps[i] != NULL) { - kfree(TO_DCN10_HUBP(pool->base.hubps[i])); - pool->base.hubps[i] = NULL; - } - - if (pool->base.irqs != NULL) { - dal_irq_service_destroy(&pool->base.irqs); - } - - if (pool->base.timing_generators[i] != NULL) { - kfree(DCN10TG_FROM_TG(pool->base.timing_generators[i])); - pool->base.timing_generators[i] = NULL; - } - } - - for (i = 0; i < pool->base.res_cap->num_ddc; i++) { - if (pool->base.engines[i] != NULL) - dce110_engine_destroy(&pool->base.engines[i]); - kfree(pool->base.hw_i2cs[i]); - pool->base.hw_i2cs[i] = NULL; - kfree(pool->base.sw_i2cs[i]); - pool->base.sw_i2cs[i] = NULL; - } - - for (i = 0; i < pool->base.audio_count; i++) { - if (pool->base.audios[i]) - dce_aud_destroy(&pool->base.audios[i]); - } - - for (i = 0; i < pool->base.clk_src_count; i++) { - if (pool->base.clock_sources[i] != NULL) { - dcn10_clock_source_destroy(&pool->base.clock_sources[i]); - pool->base.clock_sources[i] = NULL; - } - } - - if (pool->base.dp_clock_source != NULL) { - dcn10_clock_source_destroy(&pool->base.dp_clock_source); - pool->base.dp_clock_source = NULL; - } - - if (pool->base.abm != NULL) - dce_abm_destroy(&pool->base.abm); - - if (pool->base.dmcu != NULL) - dce_dmcu_destroy(&pool->base.dmcu); - - kfree(pool->base.pp_smu); -} - -static struct hubp *dcn10_hubp_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dcn10_hubp *hubp1 = - kzalloc(sizeof(struct dcn10_hubp), GFP_KERNEL); - - if (!hubp1) - return NULL; - - dcn10_hubp_construct(hubp1, ctx, inst, - &hubp_regs[inst], &hubp_shift, &hubp_mask); - return &hubp1->base; -} - -static void get_pixel_clock_parameters( - const struct pipe_ctx *pipe_ctx, - struct pixel_clk_params *pixel_clk_params) -{ - const struct dc_stream_state *stream = pipe_ctx->stream; - pixel_clk_params->requested_pix_clk_100hz = stream->timing.pix_clk_100hz; - pixel_clk_params->encoder_object_id = stream->link->link_enc->id; - pixel_clk_params->signal_type = pipe_ctx->stream->signal; - pixel_clk_params->controller_id = pipe_ctx->stream_res.tg->inst + 1; - /* TODO: un-hardcode*/ - pixel_clk_params->requested_sym_clk = LINK_RATE_LOW * - LINK_RATE_REF_FREQ_IN_KHZ; - pixel_clk_params->flags.ENABLE_SS = 0; - pixel_clk_params->color_depth = - stream->timing.display_color_depth; - pixel_clk_params->flags.DISPLAY_BLANKED = 1; - pixel_clk_params->pixel_encoding = stream->timing.pixel_encoding; - - if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR422) - pixel_clk_params->color_depth = COLOR_DEPTH_888; - - if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) - pixel_clk_params->requested_pix_clk_100hz /= 2; - if (stream->timing.timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING) - pixel_clk_params->requested_pix_clk_100hz *= 2; - -} - -static void build_clamping_params(struct dc_stream_state *stream) -{ - stream->clamping.clamping_level = CLAMPING_FULL_RANGE; - stream->clamping.c_depth = stream->timing.display_color_depth; - stream->clamping.pixel_encoding = stream->timing.pixel_encoding; -} - -static void build_pipe_hw_param(struct pipe_ctx *pipe_ctx) -{ - - get_pixel_clock_parameters(pipe_ctx, &pipe_ctx->stream_res.pix_clk_params); - - pipe_ctx->clock_source->funcs->get_pix_clk_dividers( - pipe_ctx->clock_source, - &pipe_ctx->stream_res.pix_clk_params, - &pipe_ctx->pll_settings); - - pipe_ctx->stream->clamping.pixel_encoding = pipe_ctx->stream->timing.pixel_encoding; - - resource_build_bit_depth_reduction_params(pipe_ctx->stream, - &pipe_ctx->stream->bit_depth_params); - build_clamping_params(pipe_ctx->stream); -} - -static enum dc_status build_mapped_resource( - const struct dc *dc, - struct dc_state *context, - struct dc_stream_state *stream) -{ - struct pipe_ctx *pipe_ctx = resource_get_otg_master_for_stream(&context->res_ctx, stream); - - if (!pipe_ctx) - return DC_ERROR_UNEXPECTED; - - build_pipe_hw_param(pipe_ctx); - return DC_OK; -} - -static enum dc_status dcn10_add_stream_to_ctx( - struct dc *dc, - struct dc_state *new_ctx, - struct dc_stream_state *dc_stream) -{ - enum dc_status result = DC_ERROR_UNEXPECTED; - - result = resource_map_pool_resources(dc, new_ctx, dc_stream); - - if (result == DC_OK) - result = resource_map_phy_clock_resources(dc, new_ctx, dc_stream); - - - if (result == DC_OK) - result = build_mapped_resource(dc, new_ctx, dc_stream); - - return result; -} - -static struct pipe_ctx *dcn10_acquire_free_pipe_for_layer( - const struct dc_state *cur_ctx, - struct dc_state *new_ctx, - const struct resource_pool *pool, - const struct pipe_ctx *opp_head_pipe) -{ - struct resource_context *res_ctx = &new_ctx->res_ctx; - struct pipe_ctx *head_pipe = resource_get_otg_master_for_stream(res_ctx, opp_head_pipe->stream); - struct pipe_ctx *idle_pipe = resource_find_free_secondary_pipe_legacy(res_ctx, pool, head_pipe); - - if (!head_pipe) { - ASSERT(0); - return NULL; - } - - if (!idle_pipe) - return NULL; - - idle_pipe->stream = head_pipe->stream; - idle_pipe->stream_res.tg = head_pipe->stream_res.tg; - idle_pipe->stream_res.abm = head_pipe->stream_res.abm; - idle_pipe->stream_res.opp = head_pipe->stream_res.opp; - - idle_pipe->plane_res.hubp = pool->hubps[idle_pipe->pipe_idx]; - idle_pipe->plane_res.ipp = pool->ipps[idle_pipe->pipe_idx]; - idle_pipe->plane_res.dpp = pool->dpps[idle_pipe->pipe_idx]; - idle_pipe->plane_res.mpcc_inst = pool->dpps[idle_pipe->pipe_idx]->inst; - - return idle_pipe; -} - -static bool dcn10_get_dcc_compression_cap(const struct dc *dc, - const struct dc_dcc_surface_param *input, - struct dc_surface_dcc_cap *output) -{ - return dc->res_pool->hubbub->funcs->get_dcc_compression_cap( - dc->res_pool->hubbub, - input, - output); -} - -static void dcn10_destroy_resource_pool(struct resource_pool **pool) -{ - struct dcn10_resource_pool *dcn10_pool = TO_DCN10_RES_POOL(*pool); - - dcn10_resource_destruct(dcn10_pool); - kfree(dcn10_pool); - *pool = NULL; -} - -static bool dcn10_validate_bandwidth( - struct dc *dc, - struct dc_state *context, - bool fast_validate) -{ - bool voltage_supported; - - DC_FP_START(); - voltage_supported = dcn_validate_bandwidth(dc, context, fast_validate); - DC_FP_END(); - - return voltage_supported; -} - -static enum dc_status dcn10_validate_plane(const struct dc_plane_state *plane_state, struct dc_caps *caps) -{ - if (plane_state->format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN - && caps->max_video_width != 0 - && plane_state->src_rect.width > caps->max_video_width) - return DC_FAIL_SURFACE_VALIDATE; - - return DC_OK; -} - -static enum dc_status dcn10_validate_global(struct dc *dc, struct dc_state *context) -{ - int i, j; - bool video_down_scaled = false; - bool video_large = false; - bool desktop_large = false; - bool dcc_disabled = false; - bool mpo_enabled = false; - - for (i = 0; i < context->stream_count; i++) { - if (context->stream_status[i].plane_count == 0) - continue; - - if (context->stream_status[i].plane_count > 2) - return DC_FAIL_UNSUPPORTED_1; - - if (context->stream_status[i].plane_count > 1) - mpo_enabled = true; - - for (j = 0; j < context->stream_status[i].plane_count; j++) { - struct dc_plane_state *plane = - context->stream_status[i].plane_states[j]; - - - if (plane->format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) { - - if (plane->src_rect.width > plane->dst_rect.width || - plane->src_rect.height > plane->dst_rect.height) - video_down_scaled = true; - - if (plane->src_rect.width >= 3840) - video_large = true; - - } else { - if (plane->src_rect.width >= 3840) - desktop_large = true; - if (!plane->dcc.enable) - dcc_disabled = true; - } - } - } - - /* Disable MPO in multi-display configurations. */ - if (context->stream_count > 1 && mpo_enabled) - return DC_FAIL_UNSUPPORTED_1; - - /* - * Workaround: On DCN10 there is UMC issue that causes underflow when - * playing 4k video on 4k desktop with video downscaled and single channel - * memory - */ - if (video_large && desktop_large && video_down_scaled && dcc_disabled && - dc->dcn_soc->number_of_channels == 1) - return DC_FAIL_SURFACE_VALIDATE; - - return DC_OK; -} - -static enum dc_status dcn10_patch_unknown_plane_state(struct dc_plane_state *plane_state) -{ - enum surface_pixel_format surf_pix_format = plane_state->format; - unsigned int bpp = resource_pixel_format_to_bpp(surf_pix_format); - - enum swizzle_mode_values swizzle = DC_SW_LINEAR; - - if (bpp == 64) - swizzle = DC_SW_64KB_D; - else - swizzle = DC_SW_64KB_S; - - plane_state->tiling_info.gfx9.swizzle = swizzle; - return DC_OK; -} - -struct stream_encoder *dcn10_find_first_free_match_stream_enc_for_link( - struct resource_context *res_ctx, - const struct resource_pool *pool, - struct dc_stream_state *stream) -{ - int i; - int j = -1; - struct dc_link *link = stream->link; - - for (i = 0; i < pool->stream_enc_count; i++) { - if (!res_ctx->is_stream_enc_acquired[i] && - pool->stream_enc[i]) { - /* Store first available for MST second display - * in daisy chain use case - */ - j = i; - if (link->ep_type == DISPLAY_ENDPOINT_PHY && pool->stream_enc[i]->id == - link->link_enc->preferred_engine) - return pool->stream_enc[i]; - } - } - - /* - * For CZ and later, we can allow DIG FE and BE to differ for all display types - */ - - if (j >= 0) - return pool->stream_enc[j]; - - return NULL; -} - -static const struct dc_cap_funcs cap_funcs = { - .get_dcc_compression_cap = dcn10_get_dcc_compression_cap -}; - -static const struct resource_funcs dcn10_res_pool_funcs = { - .destroy = dcn10_destroy_resource_pool, - .link_enc_create = dcn10_link_encoder_create, - .panel_cntl_create = dcn10_panel_cntl_create, - .validate_bandwidth = dcn10_validate_bandwidth, - .acquire_free_pipe_as_secondary_dpp_pipe = dcn10_acquire_free_pipe_for_layer, - .validate_plane = dcn10_validate_plane, - .validate_global = dcn10_validate_global, - .add_stream_to_ctx = dcn10_add_stream_to_ctx, - .patch_unknown_plane_state = dcn10_patch_unknown_plane_state, - .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link -}; - -static uint32_t read_pipe_fuses(struct dc_context *ctx) -{ - uint32_t value = dm_read_reg_soc15(ctx, mmCC_DC_PIPE_DIS, 0); - /* RV1 support max 4 pipes */ - value = value & 0xf; - return value; -} - -static bool verify_clock_values(struct dm_pp_clock_levels_with_voltage *clks) -{ - int i; - - if (clks->num_levels == 0) - return false; - - for (i = 0; i < clks->num_levels; i++) - /* Ensure that the result is sane */ - if (clks->data[i].clocks_in_khz == 0) - return false; - - return true; -} - -static bool dcn10_resource_construct( - uint8_t num_virtual_links, - struct dc *dc, - struct dcn10_resource_pool *pool) -{ - int i; - int j; - struct dc_context *ctx = dc->ctx; - uint32_t pipe_fuses = read_pipe_fuses(ctx); - struct dm_pp_clock_levels_with_voltage fclks = {0}, dcfclks = {0}; - int min_fclk_khz, min_dcfclk_khz, socclk_khz; - bool res; - - ctx->dc_bios->regs = &bios_regs; - - if (ctx->dce_version == DCN_VERSION_1_01) - pool->base.res_cap = &rv2_res_cap; - else - pool->base.res_cap = &res_cap; - pool->base.funcs = &dcn10_res_pool_funcs; - - /* - * TODO fill in from actual raven resource when we create - * more than virtual encoder - */ - - /************************************************* - * Resource + asic cap harcoding * - *************************************************/ - pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE; - - /* max pipe num for ASIC before check pipe fuses */ - pool->base.pipe_count = pool->base.res_cap->num_timing_generator; - - if (dc->ctx->dce_version == DCN_VERSION_1_01) - pool->base.pipe_count = 3; - dc->caps.max_video_width = 3840; - dc->caps.max_downscale_ratio = 200; - dc->caps.i2c_speed_in_khz = 100; - dc->caps.i2c_speed_in_khz_hdcp = 100; /*1.4 w/a not applied by default*/ - dc->caps.max_cursor_size = 256; - dc->caps.min_horizontal_blanking_period = 80; - dc->caps.max_slave_planes = 1; - dc->caps.max_slave_yuv_planes = 1; - dc->caps.max_slave_rgb_planes = 0; - dc->caps.is_apu = true; - dc->caps.post_blend_color_processing = false; - dc->caps.extended_aux_timeout_support = false; - - /* Raven DP PHY HBR2 eye diagram pattern is not stable. Use TP4 */ - dc->caps.force_dp_tps4_for_cp2520 = true; - - /* Color pipeline capabilities */ - dc->caps.color.dpp.dcn_arch = 1; - dc->caps.color.dpp.input_lut_shared = 1; - dc->caps.color.dpp.icsc = 1; - dc->caps.color.dpp.dgam_ram = 1; - dc->caps.color.dpp.dgam_rom_caps.srgb = 1; - dc->caps.color.dpp.dgam_rom_caps.bt2020 = 1; - dc->caps.color.dpp.dgam_rom_caps.gamma2_2 = 0; - dc->caps.color.dpp.dgam_rom_caps.pq = 0; - dc->caps.color.dpp.dgam_rom_caps.hlg = 0; - dc->caps.color.dpp.post_csc = 0; - dc->caps.color.dpp.gamma_corr = 0; - dc->caps.color.dpp.dgam_rom_for_yuv = 1; - - dc->caps.color.dpp.hw_3d_lut = 0; - dc->caps.color.dpp.ogam_ram = 1; // RGAM on DCN1 - dc->caps.color.dpp.ogam_rom_caps.srgb = 1; - dc->caps.color.dpp.ogam_rom_caps.bt2020 = 1; - dc->caps.color.dpp.ogam_rom_caps.gamma2_2 = 0; - dc->caps.color.dpp.ogam_rom_caps.pq = 0; - dc->caps.color.dpp.ogam_rom_caps.hlg = 0; - dc->caps.color.dpp.ocsc = 1; - - /* no post-blend color operations */ - dc->caps.color.mpc.gamut_remap = 0; - dc->caps.color.mpc.num_3dluts = 0; - dc->caps.color.mpc.shared_3d_lut = 0; - dc->caps.color.mpc.ogam_ram = 0; - dc->caps.color.mpc.ogam_rom_caps.srgb = 0; - dc->caps.color.mpc.ogam_rom_caps.bt2020 = 0; - dc->caps.color.mpc.ogam_rom_caps.gamma2_2 = 0; - dc->caps.color.mpc.ogam_rom_caps.pq = 0; - dc->caps.color.mpc.ogam_rom_caps.hlg = 0; - dc->caps.color.mpc.ocsc = 0; - - if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) - dc->debug = debug_defaults_drv; - else - dc->debug = debug_defaults_diags; - - /************************************************* - * Create resources * - *************************************************/ - - pool->base.clock_sources[DCN10_CLK_SRC_PLL0] = - dcn10_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL0, - &clk_src_regs[0], false); - pool->base.clock_sources[DCN10_CLK_SRC_PLL1] = - dcn10_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL1, - &clk_src_regs[1], false); - pool->base.clock_sources[DCN10_CLK_SRC_PLL2] = - dcn10_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL2, - &clk_src_regs[2], false); - - if (dc->ctx->dce_version == DCN_VERSION_1_0) { - pool->base.clock_sources[DCN10_CLK_SRC_PLL3] = - dcn10_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL3, - &clk_src_regs[3], false); - } - - pool->base.clk_src_count = DCN10_CLK_SRC_TOTAL; - - if (dc->ctx->dce_version == DCN_VERSION_1_01) - pool->base.clk_src_count = DCN101_CLK_SRC_TOTAL; - - pool->base.dp_clock_source = - dcn10_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_ID_DP_DTO, - /* todo: not reuse phy_pll registers */ - &clk_src_regs[0], true); - - for (i = 0; i < pool->base.clk_src_count; i++) { - if (pool->base.clock_sources[i] == NULL) { - dm_error("DC: failed to create clock sources!\n"); - BREAK_TO_DEBUGGER(); - goto fail; - } - } - - pool->base.dmcu = dcn10_dmcu_create(ctx, - &dmcu_regs, - &dmcu_shift, - &dmcu_mask); - if (pool->base.dmcu == NULL) { - dm_error("DC: failed to create dmcu!\n"); - BREAK_TO_DEBUGGER(); - goto fail; - } - - pool->base.abm = dce_abm_create(ctx, - &abm_regs, - &abm_shift, - &abm_mask); - if (pool->base.abm == NULL) { - dm_error("DC: failed to create abm!\n"); - BREAK_TO_DEBUGGER(); - goto fail; - } - - dml_init_instance(&dc->dml, &dcn1_0_soc, &dcn1_0_ip, DML_PROJECT_RAVEN1); - memcpy(dc->dcn_ip, &dcn10_ip_defaults, sizeof(dcn10_ip_defaults)); - memcpy(dc->dcn_soc, &dcn10_soc_defaults, sizeof(dcn10_soc_defaults)); - - DC_FP_START(); - dcn10_resource_construct_fp(dc); - DC_FP_END(); - - if (!dc->config.is_vmin_only_asic) - if (ASICREV_IS_RAVEN2(dc->ctx->asic_id.hw_internal_rev)) - switch (dc->ctx->asic_id.pci_revision_id) { - case PRID_DALI_DE: - case PRID_DALI_DF: - case PRID_DALI_E3: - case PRID_DALI_E4: - case PRID_POLLOCK_94: - case PRID_POLLOCK_95: - case PRID_POLLOCK_E9: - case PRID_POLLOCK_EA: - case PRID_POLLOCK_EB: - dc->config.is_vmin_only_asic = true; - break; - default: - break; - } - - pool->base.pp_smu = dcn10_pp_smu_create(ctx); - - /* - * Right now SMU/PPLIB and DAL all have the AZ D3 force PME notification * - * implemented. So AZ D3 should work.For issue 197007. * - */ - if (pool->base.pp_smu != NULL - && pool->base.pp_smu->rv_funcs.set_pme_wa_enable != NULL) - dc->debug.az_endpoint_mute_only = false; - - - if (!dc->debug.disable_pplib_clock_request) { - /* - * TODO: This is not the proper way to obtain - * fabric_and_dram_bandwidth, should be min(fclk, memclk). - */ - res = dm_pp_get_clock_levels_by_type_with_voltage( - ctx, DM_PP_CLOCK_TYPE_FCLK, &fclks); - - DC_FP_START(); - - if (res) - res = verify_clock_values(&fclks); - - if (res) - dcn_bw_update_from_pplib_fclks(dc, &fclks); - else - BREAK_TO_DEBUGGER(); - - DC_FP_END(); - - res = dm_pp_get_clock_levels_by_type_with_voltage( - ctx, DM_PP_CLOCK_TYPE_DCFCLK, &dcfclks); - - DC_FP_START(); - - if (res) - res = verify_clock_values(&dcfclks); - - if (res) - dcn_bw_update_from_pplib_dcfclks(dc, &dcfclks); - else - BREAK_TO_DEBUGGER(); - - DC_FP_END(); - } - - dcn_bw_sync_calcs_and_dml(dc); - if (!dc->debug.disable_pplib_wm_range) { - dc->res_pool = &pool->base; - DC_FP_START(); - dcn_get_soc_clks( - dc, &min_fclk_khz, &min_dcfclk_khz, &socclk_khz); - DC_FP_END(); - dcn_bw_notify_pplib_of_wm_ranges( - dc, min_fclk_khz, min_dcfclk_khz, socclk_khz); - } - - { - struct irq_service_init_data init_data; - init_data.ctx = dc->ctx; - pool->base.irqs = dal_irq_service_dcn10_create(&init_data); - if (!pool->base.irqs) - goto fail; - } - - /* index to valid pipe resource */ - j = 0; - /* mem input -> ipp -> dpp -> opp -> TG */ - for (i = 0; i < pool->base.pipe_count; i++) { - /* if pipe is disabled, skip instance of HW pipe, - * i.e, skip ASIC register instance - */ - if ((pipe_fuses & (1 << i)) != 0) - continue; - - pool->base.hubps[j] = dcn10_hubp_create(ctx, i); - if (pool->base.hubps[j] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create memory input!\n"); - goto fail; - } - - pool->base.ipps[j] = dcn10_ipp_create(ctx, i); - if (pool->base.ipps[j] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create input pixel processor!\n"); - goto fail; - } - - pool->base.dpps[j] = dcn10_dpp_create(ctx, i); - if (pool->base.dpps[j] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create dpp!\n"); - goto fail; - } - - pool->base.opps[j] = dcn10_opp_create(ctx, i); - if (pool->base.opps[j] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create output pixel processor!\n"); - goto fail; - } - - pool->base.timing_generators[j] = dcn10_timing_generator_create( - ctx, i); - if (pool->base.timing_generators[j] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create tg!\n"); - goto fail; - } - /* check next valid pipe */ - j++; - } - - for (i = 0; i < pool->base.res_cap->num_ddc; i++) { - pool->base.engines[i] = dcn10_aux_engine_create(ctx, i); - if (pool->base.engines[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC:failed to create aux engine!!\n"); - goto fail; - } - pool->base.hw_i2cs[i] = dcn10_i2c_hw_create(ctx, i); - if (pool->base.hw_i2cs[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC:failed to create hw i2c!!\n"); - goto fail; - } - pool->base.sw_i2cs[i] = NULL; - } - - /* valid pipe num */ - pool->base.pipe_count = j; - pool->base.timing_generator_count = j; - - /* within dml lib, it is hard code to 4. If ASIC pipe is fused, - * the value may be changed - */ - dc->dml.ip.max_num_dpp = pool->base.pipe_count; - dc->dcn_ip->max_num_dpp = pool->base.pipe_count; - - pool->base.mpc = dcn10_mpc_create(ctx); - if (pool->base.mpc == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create mpc!\n"); - goto fail; - } - - pool->base.hubbub = dcn10_hubbub_create(ctx); - if (pool->base.hubbub == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create hubbub!\n"); - goto fail; - } - - if (!resource_construct(num_virtual_links, dc, &pool->base, - &res_create_funcs)) - goto fail; - - dcn10_hw_sequencer_construct(dc); - dc->caps.max_planes = pool->base.pipe_count; - - for (i = 0; i < dc->caps.max_planes; ++i) - dc->caps.planes[i] = plane_cap; - - dc->cap_funcs = cap_funcs; - - return true; - -fail: - - dcn10_resource_destruct(pool); - - return false; -} - -struct resource_pool *dcn10_create_resource_pool( - const struct dc_init_data *init_data, - struct dc *dc) -{ - struct dcn10_resource_pool *pool = - kzalloc(sizeof(struct dcn10_resource_pool), GFP_KERNEL); - - if (!pool) - return NULL; - - if (dcn10_resource_construct(init_data->num_virtual_links, dc, pool)) - return &pool->base; - - kfree(pool); - BREAK_TO_DEBUGGER(); - return NULL; -} diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.h deleted file mode 100644 index bf8e33cd8147..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.h +++ /dev/null @@ -1,56 +0,0 @@ -/* -* Copyright 2016 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DC_RESOURCE_DCN10_H__ -#define __DC_RESOURCE_DCN10_H__ - -#include "core_types.h" -#include "dml/dcn10/dcn10_fpu.h" - -#define TO_DCN10_RES_POOL(pool)\ - container_of(pool, struct dcn10_resource_pool, base) - -struct dc; -struct resource_pool; -struct _vcs_dpi_display_pipe_params_st; - -extern struct _vcs_dpi_ip_params_st dcn1_0_ip; -extern struct _vcs_dpi_soc_bounding_box_st dcn1_0_soc; - -struct dcn10_resource_pool { - struct resource_pool base; -}; -struct resource_pool *dcn10_create_resource_pool( - const struct dc_init_data *init_data, - struct dc *dc); - -struct stream_encoder *dcn10_find_first_free_match_stream_enc_for_link( - struct resource_context *res_ctx, - const struct resource_pool *pool, - struct dc_stream_state *stream); - - -#endif /* __DC_RESOURCE_DCN10_H__ */ - diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile index d7dc9696a8c8..1cac1eca8111 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile @@ -2,7 +2,7 @@ # # Makefile for DCN. -DCN20 = dcn20_resource.o dcn20_init.o dcn20_dpp.o dcn20_dpp_cm.o dcn20_hubp.o \ +DCN20 = dcn20_init.o dcn20_dpp.o dcn20_dpp_cm.o dcn20_hubp.o \ dcn20_mpc.o dcn20_opp.o dcn20_hubbub.o dcn20_optc.o dcn20_mmhubbub.o \ dcn20_stream_encoder.o dcn20_link_encoder.o dcn20_dccg.o \ dcn20_vmid.o dcn20_dwb.o dcn20_dwb_scl.o diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c deleted file mode 100644 index 0a422fbb14bc..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ /dev/null @@ -1,2789 +0,0 @@ -/* -* Copyright 2016 Advanced Micro Devices, Inc. - * Copyright 2019 Raptor Engineering, LLC - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include - -#include "dm_services.h" -#include "dc.h" - -#include "dcn20_init.h" - -#include "resource.h" -#include "include/irq_service_interface.h" -#include "dcn20/dcn20_resource.h" - -#include "dml/dcn20/dcn20_fpu.h" - -#include "dcn10/dcn10_hubp.h" -#include "dcn10/dcn10_ipp.h" -#include "dcn20_hubbub.h" -#include "dcn20_mpc.h" -#include "dcn20_hubp.h" -#include "irq/dcn20/irq_service_dcn20.h" -#include "dcn20_dpp.h" -#include "dcn20_optc.h" -#include "dcn20/dcn20_hwseq.h" -#include "dce110/dce110_hwseq.h" -#include "dcn10/dcn10_resource.h" -#include "dcn20_opp.h" - -#include "dcn20_dsc.h" - -#include "dcn20_link_encoder.h" -#include "dcn20_stream_encoder.h" -#include "dce/dce_clock_source.h" -#include "dce/dce_audio.h" -#include "dce/dce_hwseq.h" -#include "virtual/virtual_stream_encoder.h" -#include "dce110/dce110_resource.h" -#include "dml/display_mode_vba.h" -#include "dcn20_dccg.h" -#include "dcn20_vmid.h" -#include "dce/dce_panel_cntl.h" - -#include "navi10_ip_offset.h" - -#include "dcn/dcn_2_0_0_offset.h" -#include "dcn/dcn_2_0_0_sh_mask.h" -#include "dpcs/dpcs_2_0_0_offset.h" -#include "dpcs/dpcs_2_0_0_sh_mask.h" - -#include "nbio/nbio_2_3_offset.h" - -#include "dcn20/dcn20_dwb.h" -#include "dcn20/dcn20_mmhubbub.h" - -#include "mmhub/mmhub_2_0_0_offset.h" -#include "mmhub/mmhub_2_0_0_sh_mask.h" - -#include "reg_helper.h" -#include "dce/dce_abm.h" -#include "dce/dce_dmcu.h" -#include "dce/dce_aux.h" -#include "dce/dce_i2c.h" -#include "vm_helper.h" -#include "link_enc_cfg.h" - -#include "amdgpu_socbb.h" - -#include "link.h" -#define DC_LOGGER_INIT(logger) - -#ifndef mmDP0_DP_DPHY_INTERNAL_CTRL - #define mmDP0_DP_DPHY_INTERNAL_CTRL 0x210f - #define mmDP0_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2 - #define mmDP1_DP_DPHY_INTERNAL_CTRL 0x220f - #define mmDP1_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2 - #define mmDP2_DP_DPHY_INTERNAL_CTRL 0x230f - #define mmDP2_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2 - #define mmDP3_DP_DPHY_INTERNAL_CTRL 0x240f - #define mmDP3_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2 - #define mmDP4_DP_DPHY_INTERNAL_CTRL 0x250f - #define mmDP4_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2 - #define mmDP5_DP_DPHY_INTERNAL_CTRL 0x260f - #define mmDP5_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2 - #define mmDP6_DP_DPHY_INTERNAL_CTRL 0x270f - #define mmDP6_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2 -#endif - - -enum dcn20_clk_src_array_id { - DCN20_CLK_SRC_PLL0, - DCN20_CLK_SRC_PLL1, - DCN20_CLK_SRC_PLL2, - DCN20_CLK_SRC_PLL3, - DCN20_CLK_SRC_PLL4, - DCN20_CLK_SRC_PLL5, - DCN20_CLK_SRC_TOTAL -}; - -/* begin ********************* - * macros to expend register list macro defined in HW object header file */ - -/* DCN */ -#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg - -#define BASE(seg) BASE_INNER(seg) - -#define SR(reg_name)\ - .reg_name = BASE(mm ## reg_name ## _BASE_IDX) + \ - mm ## reg_name - -#define SRI(reg_name, block, id)\ - .reg_name = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - mm ## block ## id ## _ ## reg_name - -#define SRI2_DWB(reg_name, block, id)\ - .reg_name = BASE(mm ## reg_name ## _BASE_IDX) + \ - mm ## reg_name -#define SF_DWB(reg_name, field_name, post_fix)\ - .field_name = reg_name ## __ ## field_name ## post_fix - -#define SF_DWB2(reg_name, block, id, field_name, post_fix) \ - .field_name = reg_name ## __ ## field_name ## post_fix - -#define SRIR(var_name, reg_name, block, id)\ - .var_name = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - mm ## block ## id ## _ ## reg_name - -#define SRII(reg_name, block, id)\ - .reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - mm ## block ## id ## _ ## reg_name - -#define DCCG_SRII(reg_name, block, id)\ - .block ## _ ## reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - mm ## block ## id ## _ ## reg_name - -#define VUPDATE_SRII(reg_name, block, id)\ - .reg_name[id] = BASE(mm ## reg_name ## _ ## block ## id ## _BASE_IDX) + \ - mm ## reg_name ## _ ## block ## id - -/* NBIO */ -#define NBIO_BASE_INNER(seg) \ - NBIO_BASE__INST0_SEG ## seg - -#define NBIO_BASE(seg) \ - NBIO_BASE_INNER(seg) - -#define NBIO_SR(reg_name)\ - .reg_name = NBIO_BASE(mm ## reg_name ## _BASE_IDX) + \ - mm ## reg_name - -/* MMHUB */ -#define MMHUB_BASE_INNER(seg) \ - MMHUB_BASE__INST0_SEG ## seg - -#define MMHUB_BASE(seg) \ - MMHUB_BASE_INNER(seg) - -#define MMHUB_SR(reg_name)\ - .reg_name = MMHUB_BASE(mmMM ## reg_name ## _BASE_IDX) + \ - mmMM ## reg_name - -static const struct bios_registers bios_regs = { - NBIO_SR(BIOS_SCRATCH_3), - NBIO_SR(BIOS_SCRATCH_6) -}; - -#define clk_src_regs(index, pllid)\ -[index] = {\ - CS_COMMON_REG_LIST_DCN2_0(index, pllid),\ -} - -static const struct dce110_clk_src_regs clk_src_regs[] = { - clk_src_regs(0, A), - clk_src_regs(1, B), - clk_src_regs(2, C), - clk_src_regs(3, D), - clk_src_regs(4, E), - clk_src_regs(5, F) -}; - -static const struct dce110_clk_src_shift cs_shift = { - CS_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT) -}; - -static const struct dce110_clk_src_mask cs_mask = { - CS_COMMON_MASK_SH_LIST_DCN2_0(_MASK) -}; - -static const struct dce_dmcu_registers dmcu_regs = { - DMCU_DCN10_REG_LIST() -}; - -static const struct dce_dmcu_shift dmcu_shift = { - DMCU_MASK_SH_LIST_DCN10(__SHIFT) -}; - -static const struct dce_dmcu_mask dmcu_mask = { - DMCU_MASK_SH_LIST_DCN10(_MASK) -}; - -static const struct dce_abm_registers abm_regs = { - ABM_DCN20_REG_LIST() -}; - -static const struct dce_abm_shift abm_shift = { - ABM_MASK_SH_LIST_DCN20(__SHIFT) -}; - -static const struct dce_abm_mask abm_mask = { - ABM_MASK_SH_LIST_DCN20(_MASK) -}; - -#define audio_regs(id)\ -[id] = {\ - AUD_COMMON_REG_LIST(id)\ -} - -static const struct dce_audio_registers audio_regs[] = { - audio_regs(0), - audio_regs(1), - audio_regs(2), - audio_regs(3), - audio_regs(4), - audio_regs(5), - audio_regs(6), -}; - -#define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\ - SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_INDEX, AZALIA_ENDPOINT_REG_INDEX, mask_sh),\ - SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_DATA, AZALIA_ENDPOINT_REG_DATA, mask_sh),\ - AUD_COMMON_MASK_SH_LIST_BASE(mask_sh) - -static const struct dce_audio_shift audio_shift = { - DCE120_AUD_COMMON_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_audio_mask audio_mask = { - DCE120_AUD_COMMON_MASK_SH_LIST(_MASK) -}; - -#define stream_enc_regs(id)\ -[id] = {\ - SE_DCN2_REG_LIST(id)\ -} - -static const struct dcn10_stream_enc_registers stream_enc_regs[] = { - stream_enc_regs(0), - stream_enc_regs(1), - stream_enc_regs(2), - stream_enc_regs(3), - stream_enc_regs(4), - stream_enc_regs(5), -}; - -static const struct dcn10_stream_encoder_shift se_shift = { - SE_COMMON_MASK_SH_LIST_DCN20(__SHIFT) -}; - -static const struct dcn10_stream_encoder_mask se_mask = { - SE_COMMON_MASK_SH_LIST_DCN20(_MASK) -}; - - -#define aux_regs(id)\ -[id] = {\ - DCN2_AUX_REG_LIST(id)\ -} - -static const struct dcn10_link_enc_aux_registers link_enc_aux_regs[] = { - aux_regs(0), - aux_regs(1), - aux_regs(2), - aux_regs(3), - aux_regs(4), - aux_regs(5) -}; - -#define hpd_regs(id)\ -[id] = {\ - HPD_REG_LIST(id)\ -} - -static const struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[] = { - hpd_regs(0), - hpd_regs(1), - hpd_regs(2), - hpd_regs(3), - hpd_regs(4), - hpd_regs(5) -}; - -#define link_regs(id, phyid)\ -[id] = {\ - LE_DCN10_REG_LIST(id), \ - UNIPHY_DCN2_REG_LIST(phyid), \ - DPCS_DCN2_REG_LIST(id), \ - SRI(DP_DPHY_INTERNAL_CTRL, DP, id) \ -} - -static const struct dcn10_link_enc_registers link_enc_regs[] = { - link_regs(0, A), - link_regs(1, B), - link_regs(2, C), - link_regs(3, D), - link_regs(4, E), - link_regs(5, F) -}; - -static const struct dcn10_link_enc_shift le_shift = { - LINK_ENCODER_MASK_SH_LIST_DCN20(__SHIFT),\ - DPCS_DCN2_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn10_link_enc_mask le_mask = { - LINK_ENCODER_MASK_SH_LIST_DCN20(_MASK),\ - DPCS_DCN2_MASK_SH_LIST(_MASK) -}; - -static const struct dce_panel_cntl_registers panel_cntl_regs[] = { - { DCN_PANEL_CNTL_REG_LIST() } -}; - -static const struct dce_panel_cntl_shift panel_cntl_shift = { - DCE_PANEL_CNTL_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_panel_cntl_mask panel_cntl_mask = { - DCE_PANEL_CNTL_MASK_SH_LIST(_MASK) -}; - -#define ipp_regs(id)\ -[id] = {\ - IPP_REG_LIST_DCN20(id),\ -} - -static const struct dcn10_ipp_registers ipp_regs[] = { - ipp_regs(0), - ipp_regs(1), - ipp_regs(2), - ipp_regs(3), - ipp_regs(4), - ipp_regs(5), -}; - -static const struct dcn10_ipp_shift ipp_shift = { - IPP_MASK_SH_LIST_DCN20(__SHIFT) -}; - -static const struct dcn10_ipp_mask ipp_mask = { - IPP_MASK_SH_LIST_DCN20(_MASK), -}; - -#define opp_regs(id)\ -[id] = {\ - OPP_REG_LIST_DCN20(id),\ -} - -static const struct dcn20_opp_registers opp_regs[] = { - opp_regs(0), - opp_regs(1), - opp_regs(2), - opp_regs(3), - opp_regs(4), - opp_regs(5), -}; - -static const struct dcn20_opp_shift opp_shift = { - OPP_MASK_SH_LIST_DCN20(__SHIFT) -}; - -static const struct dcn20_opp_mask opp_mask = { - OPP_MASK_SH_LIST_DCN20(_MASK) -}; - -#define aux_engine_regs(id)\ -[id] = {\ - AUX_COMMON_REG_LIST0(id), \ - .AUXN_IMPCAL = 0, \ - .AUXP_IMPCAL = 0, \ - .AUX_RESET_MASK = DP_AUX0_AUX_CONTROL__AUX_RESET_MASK, \ -} - -static const struct dce110_aux_registers aux_engine_regs[] = { - aux_engine_regs(0), - aux_engine_regs(1), - aux_engine_regs(2), - aux_engine_regs(3), - aux_engine_regs(4), - aux_engine_regs(5) -}; - -#define tf_regs(id)\ -[id] = {\ - TF_REG_LIST_DCN20(id),\ - TF_REG_LIST_DCN20_COMMON_APPEND(id),\ -} - -static const struct dcn2_dpp_registers tf_regs[] = { - tf_regs(0), - tf_regs(1), - tf_regs(2), - tf_regs(3), - tf_regs(4), - tf_regs(5), -}; - -static const struct dcn2_dpp_shift tf_shift = { - TF_REG_LIST_SH_MASK_DCN20(__SHIFT), - TF_DEBUG_REG_LIST_SH_DCN20 -}; - -static const struct dcn2_dpp_mask tf_mask = { - TF_REG_LIST_SH_MASK_DCN20(_MASK), - TF_DEBUG_REG_LIST_MASK_DCN20 -}; - -#define dwbc_regs_dcn2(id)\ -[id] = {\ - DWBC_COMMON_REG_LIST_DCN2_0(id),\ - } - -static const struct dcn20_dwbc_registers dwbc20_regs[] = { - dwbc_regs_dcn2(0), -}; - -static const struct dcn20_dwbc_shift dwbc20_shift = { - DWBC_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT) -}; - -static const struct dcn20_dwbc_mask dwbc20_mask = { - DWBC_COMMON_MASK_SH_LIST_DCN2_0(_MASK) -}; - -#define mcif_wb_regs_dcn2(id)\ -[id] = {\ - MCIF_WB_COMMON_REG_LIST_DCN2_0(id),\ - } - -static const struct dcn20_mmhubbub_registers mcif_wb20_regs[] = { - mcif_wb_regs_dcn2(0), -}; - -static const struct dcn20_mmhubbub_shift mcif_wb20_shift = { - MCIF_WB_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT) -}; - -static const struct dcn20_mmhubbub_mask mcif_wb20_mask = { - MCIF_WB_COMMON_MASK_SH_LIST_DCN2_0(_MASK) -}; - -static const struct dcn20_mpc_registers mpc_regs = { - MPC_REG_LIST_DCN2_0(0), - MPC_REG_LIST_DCN2_0(1), - MPC_REG_LIST_DCN2_0(2), - MPC_REG_LIST_DCN2_0(3), - MPC_REG_LIST_DCN2_0(4), - MPC_REG_LIST_DCN2_0(5), - MPC_OUT_MUX_REG_LIST_DCN2_0(0), - MPC_OUT_MUX_REG_LIST_DCN2_0(1), - MPC_OUT_MUX_REG_LIST_DCN2_0(2), - MPC_OUT_MUX_REG_LIST_DCN2_0(3), - MPC_OUT_MUX_REG_LIST_DCN2_0(4), - MPC_OUT_MUX_REG_LIST_DCN2_0(5), - MPC_DBG_REG_LIST_DCN2_0() -}; - -static const struct dcn20_mpc_shift mpc_shift = { - MPC_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT), - MPC_DEBUG_REG_LIST_SH_DCN20 -}; - -static const struct dcn20_mpc_mask mpc_mask = { - MPC_COMMON_MASK_SH_LIST_DCN2_0(_MASK), - MPC_DEBUG_REG_LIST_MASK_DCN20 -}; - -#define tg_regs(id)\ -[id] = {TG_COMMON_REG_LIST_DCN2_0(id)} - - -static const struct dcn_optc_registers tg_regs[] = { - tg_regs(0), - tg_regs(1), - tg_regs(2), - tg_regs(3), - tg_regs(4), - tg_regs(5) -}; - -static const struct dcn_optc_shift tg_shift = { - TG_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT) -}; - -static const struct dcn_optc_mask tg_mask = { - TG_COMMON_MASK_SH_LIST_DCN2_0(_MASK) -}; - -#define hubp_regs(id)\ -[id] = {\ - HUBP_REG_LIST_DCN20(id)\ -} - -static const struct dcn_hubp2_registers hubp_regs[] = { - hubp_regs(0), - hubp_regs(1), - hubp_regs(2), - hubp_regs(3), - hubp_regs(4), - hubp_regs(5) -}; - -static const struct dcn_hubp2_shift hubp_shift = { - HUBP_MASK_SH_LIST_DCN20(__SHIFT) -}; - -static const struct dcn_hubp2_mask hubp_mask = { - HUBP_MASK_SH_LIST_DCN20(_MASK) -}; - -static const struct dcn_hubbub_registers hubbub_reg = { - HUBBUB_REG_LIST_DCN20(0) -}; - -static const struct dcn_hubbub_shift hubbub_shift = { - HUBBUB_MASK_SH_LIST_DCN20(__SHIFT) -}; - -static const struct dcn_hubbub_mask hubbub_mask = { - HUBBUB_MASK_SH_LIST_DCN20(_MASK) -}; - -#define vmid_regs(id)\ -[id] = {\ - DCN20_VMID_REG_LIST(id)\ -} - -static const struct dcn_vmid_registers vmid_regs[] = { - vmid_regs(0), - vmid_regs(1), - vmid_regs(2), - vmid_regs(3), - vmid_regs(4), - vmid_regs(5), - vmid_regs(6), - vmid_regs(7), - vmid_regs(8), - vmid_regs(9), - vmid_regs(10), - vmid_regs(11), - vmid_regs(12), - vmid_regs(13), - vmid_regs(14), - vmid_regs(15) -}; - -static const struct dcn20_vmid_shift vmid_shifts = { - DCN20_VMID_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn20_vmid_mask vmid_masks = { - DCN20_VMID_MASK_SH_LIST(_MASK) -}; - -static const struct dce110_aux_registers_shift aux_shift = { - DCN_AUX_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce110_aux_registers_mask aux_mask = { - DCN_AUX_MASK_SH_LIST(_MASK) -}; - -static int map_transmitter_id_to_phy_instance( - enum transmitter transmitter) -{ - switch (transmitter) { - case TRANSMITTER_UNIPHY_A: - return 0; - break; - case TRANSMITTER_UNIPHY_B: - return 1; - break; - case TRANSMITTER_UNIPHY_C: - return 2; - break; - case TRANSMITTER_UNIPHY_D: - return 3; - break; - case TRANSMITTER_UNIPHY_E: - return 4; - break; - case TRANSMITTER_UNIPHY_F: - return 5; - break; - default: - ASSERT(0); - return 0; - } -} - -#define dsc_regsDCN20(id)\ -[id] = {\ - DSC_REG_LIST_DCN20(id)\ -} - -static const struct dcn20_dsc_registers dsc_regs[] = { - dsc_regsDCN20(0), - dsc_regsDCN20(1), - dsc_regsDCN20(2), - dsc_regsDCN20(3), - dsc_regsDCN20(4), - dsc_regsDCN20(5) -}; - -static const struct dcn20_dsc_shift dsc_shift = { - DSC_REG_LIST_SH_MASK_DCN20(__SHIFT) -}; - -static const struct dcn20_dsc_mask dsc_mask = { - DSC_REG_LIST_SH_MASK_DCN20(_MASK) -}; - -static const struct dccg_registers dccg_regs = { - DCCG_REG_LIST_DCN2() -}; - -static const struct dccg_shift dccg_shift = { - DCCG_MASK_SH_LIST_DCN2(__SHIFT) -}; - -static const struct dccg_mask dccg_mask = { - DCCG_MASK_SH_LIST_DCN2(_MASK) -}; - -static const struct resource_caps res_cap_nv10 = { - .num_timing_generator = 6, - .num_opp = 6, - .num_video_plane = 6, - .num_audio = 7, - .num_stream_encoder = 6, - .num_pll = 6, - .num_dwb = 1, - .num_ddc = 6, - .num_vmid = 16, - .num_dsc = 6, -}; - -static const struct dc_plane_cap plane_cap = { - .type = DC_PLANE_TYPE_DCN_UNIVERSAL, - .per_pixel_alpha = true, - - .pixel_format_support = { - .argb8888 = true, - .nv12 = true, - .fp16 = true, - .p010 = true - }, - - .max_upscale_factor = { - .argb8888 = 16000, - .nv12 = 16000, - .fp16 = 1 - }, - - .max_downscale_factor = { - .argb8888 = 250, - .nv12 = 250, - .fp16 = 1 - }, - 16, - 16 -}; -static const struct resource_caps res_cap_nv14 = { - .num_timing_generator = 5, - .num_opp = 5, - .num_video_plane = 5, - .num_audio = 6, - .num_stream_encoder = 5, - .num_pll = 5, - .num_dwb = 1, - .num_ddc = 5, - .num_vmid = 16, - .num_dsc = 5, -}; - -static const struct dc_debug_options debug_defaults_drv = { - .disable_dmcu = false, - .force_abm_enable = false, - .timing_trace = false, - .clock_trace = true, - .disable_pplib_clock_request = true, - .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, - .force_single_disp_pipe_split = false, - .disable_dcc = DCC_ENABLE, - .vsr_support = true, - .performance_trace = false, - .max_downscale_src_width = 5120,/*upto 5K*/ - .disable_pplib_wm_range = false, - .scl_reset_length10 = true, - .sanity_checks = false, - .underflow_assert_delay_us = 0xFFFFFFFF, - .enable_legacy_fast_update = true, - .using_dml2 = false, -}; - -void dcn20_dpp_destroy(struct dpp **dpp) -{ - kfree(TO_DCN20_DPP(*dpp)); - *dpp = NULL; -} - -struct dpp *dcn20_dpp_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dcn20_dpp *dpp = - kzalloc(sizeof(struct dcn20_dpp), GFP_ATOMIC); - - if (!dpp) - return NULL; - - if (dpp2_construct(dpp, ctx, inst, - &tf_regs[inst], &tf_shift, &tf_mask)) - return &dpp->base; - - BREAK_TO_DEBUGGER(); - kfree(dpp); - return NULL; -} - -struct input_pixel_processor *dcn20_ipp_create( - struct dc_context *ctx, uint32_t inst) -{ - struct dcn10_ipp *ipp = - kzalloc(sizeof(struct dcn10_ipp), GFP_ATOMIC); - - if (!ipp) { - BREAK_TO_DEBUGGER(); - return NULL; - } - - dcn20_ipp_construct(ipp, ctx, inst, - &ipp_regs[inst], &ipp_shift, &ipp_mask); - return &ipp->base; -} - - -struct output_pixel_processor *dcn20_opp_create( - struct dc_context *ctx, uint32_t inst) -{ - struct dcn20_opp *opp = - kzalloc(sizeof(struct dcn20_opp), GFP_ATOMIC); - - if (!opp) { - BREAK_TO_DEBUGGER(); - return NULL; - } - - dcn20_opp_construct(opp, ctx, inst, - &opp_regs[inst], &opp_shift, &opp_mask); - return &opp->base; -} - -struct dce_aux *dcn20_aux_engine_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct aux_engine_dce110 *aux_engine = - kzalloc(sizeof(struct aux_engine_dce110), GFP_ATOMIC); - - if (!aux_engine) - return NULL; - - dce110_aux_engine_construct(aux_engine, ctx, inst, - SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, - &aux_engine_regs[inst], - &aux_mask, - &aux_shift, - ctx->dc->caps.extended_aux_timeout_support); - - return &aux_engine->base; -} -#define i2c_inst_regs(id) { I2C_HW_ENGINE_COMMON_REG_LIST(id) } - -static const struct dce_i2c_registers i2c_hw_regs[] = { - i2c_inst_regs(1), - i2c_inst_regs(2), - i2c_inst_regs(3), - i2c_inst_regs(4), - i2c_inst_regs(5), - i2c_inst_regs(6), -}; - -static const struct dce_i2c_shift i2c_shifts = { - I2C_COMMON_MASK_SH_LIST_DCN2(__SHIFT) -}; - -static const struct dce_i2c_mask i2c_masks = { - I2C_COMMON_MASK_SH_LIST_DCN2(_MASK) -}; - -struct dce_i2c_hw *dcn20_i2c_hw_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dce_i2c_hw *dce_i2c_hw = - kzalloc(sizeof(struct dce_i2c_hw), GFP_ATOMIC); - - if (!dce_i2c_hw) - return NULL; - - dcn2_i2c_hw_construct(dce_i2c_hw, ctx, inst, - &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks); - - return dce_i2c_hw; -} -struct mpc *dcn20_mpc_create(struct dc_context *ctx) -{ - struct dcn20_mpc *mpc20 = kzalloc(sizeof(struct dcn20_mpc), - GFP_ATOMIC); - - if (!mpc20) - return NULL; - - dcn20_mpc_construct(mpc20, ctx, - &mpc_regs, - &mpc_shift, - &mpc_mask, - 6); - - return &mpc20->base; -} - -struct hubbub *dcn20_hubbub_create(struct dc_context *ctx) -{ - int i; - struct dcn20_hubbub *hubbub = kzalloc(sizeof(struct dcn20_hubbub), - GFP_ATOMIC); - - if (!hubbub) - return NULL; - - hubbub2_construct(hubbub, ctx, - &hubbub_reg, - &hubbub_shift, - &hubbub_mask); - - for (i = 0; i < res_cap_nv10.num_vmid; i++) { - struct dcn20_vmid *vmid = &hubbub->vmid[i]; - - vmid->ctx = ctx; - - vmid->regs = &vmid_regs[i]; - vmid->shifts = &vmid_shifts; - vmid->masks = &vmid_masks; - } - - return &hubbub->base; -} - -struct timing_generator *dcn20_timing_generator_create( - struct dc_context *ctx, - uint32_t instance) -{ - struct optc *tgn10 = - kzalloc(sizeof(struct optc), GFP_ATOMIC); - - if (!tgn10) - return NULL; - - tgn10->base.inst = instance; - tgn10->base.ctx = ctx; - - tgn10->tg_regs = &tg_regs[instance]; - tgn10->tg_shift = &tg_shift; - tgn10->tg_mask = &tg_mask; - - dcn20_timing_generator_init(tgn10); - - return &tgn10->base; -} - -static const struct encoder_feature_support link_enc_feature = { - .max_hdmi_deep_color = COLOR_DEPTH_121212, - .max_hdmi_pixel_clock = 600000, - .hdmi_ycbcr420_supported = true, - .dp_ycbcr420_supported = true, - .fec_supported = true, - .flags.bits.IS_HBR2_CAPABLE = true, - .flags.bits.IS_HBR3_CAPABLE = true, - .flags.bits.IS_TPS3_CAPABLE = true, - .flags.bits.IS_TPS4_CAPABLE = true -}; - -struct link_encoder *dcn20_link_encoder_create( - struct dc_context *ctx, - const struct encoder_init_data *enc_init_data) -{ - struct dcn20_link_encoder *enc20 = - kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL); - int link_regs_id; - - if (!enc20) - return NULL; - - link_regs_id = - map_transmitter_id_to_phy_instance(enc_init_data->transmitter); - - dcn20_link_encoder_construct(enc20, - enc_init_data, - &link_enc_feature, - &link_enc_regs[link_regs_id], - &link_enc_aux_regs[enc_init_data->channel - 1], - &link_enc_hpd_regs[enc_init_data->hpd_source], - &le_shift, - &le_mask); - - return &enc20->enc10.base; -} - -static struct panel_cntl *dcn20_panel_cntl_create(const struct panel_cntl_init_data *init_data) -{ - struct dce_panel_cntl *panel_cntl = - kzalloc(sizeof(struct dce_panel_cntl), GFP_KERNEL); - - if (!panel_cntl) - return NULL; - - dce_panel_cntl_construct(panel_cntl, - init_data, - &panel_cntl_regs[init_data->inst], - &panel_cntl_shift, - &panel_cntl_mask); - - return &panel_cntl->base; -} - -static struct clock_source *dcn20_clock_source_create( - struct dc_context *ctx, - struct dc_bios *bios, - enum clock_source_id id, - const struct dce110_clk_src_regs *regs, - bool dp_clk_src) -{ - struct dce110_clk_src *clk_src = - kzalloc(sizeof(struct dce110_clk_src), GFP_ATOMIC); - - if (!clk_src) - return NULL; - - if (dcn20_clk_src_construct(clk_src, ctx, bios, id, - regs, &cs_shift, &cs_mask)) { - clk_src->base.dp_clk_src = dp_clk_src; - return &clk_src->base; - } - - kfree(clk_src); - BREAK_TO_DEBUGGER(); - return NULL; -} - -static void read_dce_straps( - struct dc_context *ctx, - struct resource_straps *straps) -{ - generic_reg_get(ctx, mmDC_PINSTRAPS + BASE(mmDC_PINSTRAPS_BASE_IDX), - FN(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO), &straps->dc_pinstraps_audio); -} - -static struct audio *dcn20_create_audio( - struct dc_context *ctx, unsigned int inst) -{ - return dce_audio_create(ctx, inst, - &audio_regs[inst], &audio_shift, &audio_mask); -} - -struct stream_encoder *dcn20_stream_encoder_create( - enum engine_id eng_id, - struct dc_context *ctx) -{ - struct dcn10_stream_encoder *enc1 = - kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL); - - if (!enc1) - return NULL; - - if (ASICREV_IS_NAVI14_M(ctx->asic_id.hw_internal_rev)) { - if (eng_id >= ENGINE_ID_DIGD) - eng_id++; - } - - dcn20_stream_encoder_construct(enc1, ctx, ctx->dc_bios, eng_id, - &stream_enc_regs[eng_id], - &se_shift, &se_mask); - - return &enc1->base; -} - -static const struct dce_hwseq_registers hwseq_reg = { - HWSEQ_DCN2_REG_LIST() -}; - -static const struct dce_hwseq_shift hwseq_shift = { - HWSEQ_DCN2_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_hwseq_mask hwseq_mask = { - HWSEQ_DCN2_MASK_SH_LIST(_MASK) -}; - -struct dce_hwseq *dcn20_hwseq_create( - struct dc_context *ctx) -{ - struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL); - - if (hws) { - hws->ctx = ctx; - hws->regs = &hwseq_reg; - hws->shifts = &hwseq_shift; - hws->masks = &hwseq_mask; - } - return hws; -} - -static const struct resource_create_funcs res_create_funcs = { - .read_dce_straps = read_dce_straps, - .create_audio = dcn20_create_audio, - .create_stream_encoder = dcn20_stream_encoder_create, - .create_hwseq = dcn20_hwseq_create, -}; - -static void dcn20_pp_smu_destroy(struct pp_smu_funcs **pp_smu); - -void dcn20_clock_source_destroy(struct clock_source **clk_src) -{ - kfree(TO_DCE110_CLK_SRC(*clk_src)); - *clk_src = NULL; -} - - -struct display_stream_compressor *dcn20_dsc_create( - struct dc_context *ctx, uint32_t inst) -{ - struct dcn20_dsc *dsc = - kzalloc(sizeof(struct dcn20_dsc), GFP_ATOMIC); - - if (!dsc) { - BREAK_TO_DEBUGGER(); - return NULL; - } - - dsc2_construct(dsc, ctx, inst, &dsc_regs[inst], &dsc_shift, &dsc_mask); - return &dsc->base; -} - -void dcn20_dsc_destroy(struct display_stream_compressor **dsc) -{ - kfree(container_of(*dsc, struct dcn20_dsc, base)); - *dsc = NULL; -} - - -static void dcn20_resource_destruct(struct dcn20_resource_pool *pool) -{ - unsigned int i; - - for (i = 0; i < pool->base.stream_enc_count; i++) { - if (pool->base.stream_enc[i] != NULL) { - kfree(DCN10STRENC_FROM_STRENC(pool->base.stream_enc[i])); - pool->base.stream_enc[i] = NULL; - } - } - - for (i = 0; i < pool->base.res_cap->num_dsc; i++) { - if (pool->base.dscs[i] != NULL) - dcn20_dsc_destroy(&pool->base.dscs[i]); - } - - if (pool->base.mpc != NULL) { - kfree(TO_DCN20_MPC(pool->base.mpc)); - pool->base.mpc = NULL; - } - if (pool->base.hubbub != NULL) { - kfree(pool->base.hubbub); - pool->base.hubbub = NULL; - } - for (i = 0; i < pool->base.pipe_count; i++) { - if (pool->base.dpps[i] != NULL) - dcn20_dpp_destroy(&pool->base.dpps[i]); - - if (pool->base.ipps[i] != NULL) - pool->base.ipps[i]->funcs->ipp_destroy(&pool->base.ipps[i]); - - if (pool->base.hubps[i] != NULL) { - kfree(TO_DCN20_HUBP(pool->base.hubps[i])); - pool->base.hubps[i] = NULL; - } - - if (pool->base.irqs != NULL) { - dal_irq_service_destroy(&pool->base.irqs); - } - } - - for (i = 0; i < pool->base.res_cap->num_ddc; i++) { - if (pool->base.engines[i] != NULL) - dce110_engine_destroy(&pool->base.engines[i]); - if (pool->base.hw_i2cs[i] != NULL) { - kfree(pool->base.hw_i2cs[i]); - pool->base.hw_i2cs[i] = NULL; - } - if (pool->base.sw_i2cs[i] != NULL) { - kfree(pool->base.sw_i2cs[i]); - pool->base.sw_i2cs[i] = NULL; - } - } - - for (i = 0; i < pool->base.res_cap->num_opp; i++) { - if (pool->base.opps[i] != NULL) - pool->base.opps[i]->funcs->opp_destroy(&pool->base.opps[i]); - } - - for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { - if (pool->base.timing_generators[i] != NULL) { - kfree(DCN10TG_FROM_TG(pool->base.timing_generators[i])); - pool->base.timing_generators[i] = NULL; - } - } - - for (i = 0; i < pool->base.res_cap->num_dwb; i++) { - if (pool->base.dwbc[i] != NULL) { - kfree(TO_DCN20_DWBC(pool->base.dwbc[i])); - pool->base.dwbc[i] = NULL; - } - if (pool->base.mcif_wb[i] != NULL) { - kfree(TO_DCN20_MMHUBBUB(pool->base.mcif_wb[i])); - pool->base.mcif_wb[i] = NULL; - } - } - - for (i = 0; i < pool->base.audio_count; i++) { - if (pool->base.audios[i]) - dce_aud_destroy(&pool->base.audios[i]); - } - - for (i = 0; i < pool->base.clk_src_count; i++) { - if (pool->base.clock_sources[i] != NULL) { - dcn20_clock_source_destroy(&pool->base.clock_sources[i]); - pool->base.clock_sources[i] = NULL; - } - } - - if (pool->base.dp_clock_source != NULL) { - dcn20_clock_source_destroy(&pool->base.dp_clock_source); - pool->base.dp_clock_source = NULL; - } - - - if (pool->base.abm != NULL) - dce_abm_destroy(&pool->base.abm); - - if (pool->base.dmcu != NULL) - dce_dmcu_destroy(&pool->base.dmcu); - - if (pool->base.dccg != NULL) - dcn_dccg_destroy(&pool->base.dccg); - - if (pool->base.pp_smu != NULL) - dcn20_pp_smu_destroy(&pool->base.pp_smu); - - if (pool->base.oem_device != NULL) { - struct dc *dc = pool->base.oem_device->ctx->dc; - - dc->link_srv->destroy_ddc_service(&pool->base.oem_device); - } -} - -struct hubp *dcn20_hubp_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dcn20_hubp *hubp2 = - kzalloc(sizeof(struct dcn20_hubp), GFP_ATOMIC); - - if (!hubp2) - return NULL; - - if (hubp2_construct(hubp2, ctx, inst, - &hubp_regs[inst], &hubp_shift, &hubp_mask)) - return &hubp2->base; - - BREAK_TO_DEBUGGER(); - kfree(hubp2); - return NULL; -} - -static void get_pixel_clock_parameters( - struct pipe_ctx *pipe_ctx, - struct pixel_clk_params *pixel_clk_params) -{ - const struct dc_stream_state *stream = pipe_ctx->stream; - struct pipe_ctx *odm_pipe; - int opp_cnt = 1; - struct dc_link *link = stream->link; - struct link_encoder *link_enc = NULL; - struct dc *dc = pipe_ctx->stream->ctx->dc; - struct dce_hwseq *hws = dc->hwseq; - - for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) - opp_cnt++; - - pixel_clk_params->requested_pix_clk_100hz = stream->timing.pix_clk_100hz; - - link_enc = link_enc_cfg_get_link_enc(link); - if (link_enc) - pixel_clk_params->encoder_object_id = link_enc->id; - - pixel_clk_params->signal_type = pipe_ctx->stream->signal; - pixel_clk_params->controller_id = pipe_ctx->stream_res.tg->inst + 1; - /* TODO: un-hardcode*/ - /* TODO - DP2.0 HW: calculate requested_sym_clk for UHBR rates */ - pixel_clk_params->requested_sym_clk = LINK_RATE_LOW * - LINK_RATE_REF_FREQ_IN_KHZ; - pixel_clk_params->flags.ENABLE_SS = 0; - pixel_clk_params->color_depth = - stream->timing.display_color_depth; - pixel_clk_params->flags.DISPLAY_BLANKED = 1; - pixel_clk_params->pixel_encoding = stream->timing.pixel_encoding; - - if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR422) - pixel_clk_params->color_depth = COLOR_DEPTH_888; - - if (opp_cnt == 4) - pixel_clk_params->requested_pix_clk_100hz /= 4; - else if (optc2_is_two_pixels_per_containter(&stream->timing) || opp_cnt == 2) - pixel_clk_params->requested_pix_clk_100hz /= 2; - else if (hws->funcs.is_dp_dig_pixel_rate_div_policy) { - if (hws->funcs.is_dp_dig_pixel_rate_div_policy(pipe_ctx)) - pixel_clk_params->requested_pix_clk_100hz /= 2; - } - - if (stream->timing.timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING) - pixel_clk_params->requested_pix_clk_100hz *= 2; - -} - -static void build_clamping_params(struct dc_stream_state *stream) -{ - stream->clamping.clamping_level = CLAMPING_FULL_RANGE; - stream->clamping.c_depth = stream->timing.display_color_depth; - stream->clamping.pixel_encoding = stream->timing.pixel_encoding; -} - -static enum dc_status build_pipe_hw_param(struct pipe_ctx *pipe_ctx) -{ - - get_pixel_clock_parameters(pipe_ctx, &pipe_ctx->stream_res.pix_clk_params); - - pipe_ctx->clock_source->funcs->get_pix_clk_dividers( - pipe_ctx->clock_source, - &pipe_ctx->stream_res.pix_clk_params, - &pipe_ctx->pll_settings); - - pipe_ctx->stream->clamping.pixel_encoding = pipe_ctx->stream->timing.pixel_encoding; - - resource_build_bit_depth_reduction_params(pipe_ctx->stream, - &pipe_ctx->stream->bit_depth_params); - build_clamping_params(pipe_ctx->stream); - - return DC_OK; -} - -enum dc_status dcn20_build_mapped_resource(const struct dc *dc, struct dc_state *context, struct dc_stream_state *stream) -{ - enum dc_status status = DC_OK; - struct pipe_ctx *pipe_ctx = resource_get_otg_master_for_stream(&context->res_ctx, stream); - - if (!pipe_ctx) - return DC_ERROR_UNEXPECTED; - - - status = build_pipe_hw_param(pipe_ctx); - - return status; -} - - -void dcn20_acquire_dsc(const struct dc *dc, - struct resource_context *res_ctx, - struct display_stream_compressor **dsc, - int pipe_idx) -{ - int i; - const struct resource_pool *pool = dc->res_pool; - struct display_stream_compressor *dsc_old = dc->current_state->res_ctx.pipe_ctx[pipe_idx].stream_res.dsc; - - ASSERT(*dsc == NULL); /* If this ASSERT fails, dsc was not released properly */ - *dsc = NULL; - - /* Always do 1-to-1 mapping when number of DSCs is same as number of pipes */ - if (pool->res_cap->num_dsc == pool->res_cap->num_opp) { - *dsc = pool->dscs[pipe_idx]; - res_ctx->is_dsc_acquired[pipe_idx] = true; - return; - } - - /* Return old DSC to avoid the need for re-programming */ - if (dsc_old && !res_ctx->is_dsc_acquired[dsc_old->inst]) { - *dsc = dsc_old; - res_ctx->is_dsc_acquired[dsc_old->inst] = true; - return ; - } - - /* Find first free DSC */ - for (i = 0; i < pool->res_cap->num_dsc; i++) - if (!res_ctx->is_dsc_acquired[i]) { - *dsc = pool->dscs[i]; - res_ctx->is_dsc_acquired[i] = true; - break; - } -} - -void dcn20_release_dsc(struct resource_context *res_ctx, - const struct resource_pool *pool, - struct display_stream_compressor **dsc) -{ - int i; - - for (i = 0; i < pool->res_cap->num_dsc; i++) - if (pool->dscs[i] == *dsc) { - res_ctx->is_dsc_acquired[i] = false; - *dsc = NULL; - break; - } -} - - - -enum dc_status dcn20_add_dsc_to_stream_resource(struct dc *dc, - struct dc_state *dc_ctx, - struct dc_stream_state *dc_stream) -{ - enum dc_status result = DC_OK; - int i; - - /* Get a DSC if required and available */ - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe_ctx = &dc_ctx->res_ctx.pipe_ctx[i]; - - if (pipe_ctx->top_pipe) - continue; - - if (pipe_ctx->stream != dc_stream) - continue; - - if (pipe_ctx->stream_res.dsc) - continue; - - dcn20_acquire_dsc(dc, &dc_ctx->res_ctx, &pipe_ctx->stream_res.dsc, i); - - /* The number of DSCs can be less than the number of pipes */ - if (!pipe_ctx->stream_res.dsc) { - result = DC_NO_DSC_RESOURCE; - } - - break; - } - - return result; -} - - -static enum dc_status remove_dsc_from_stream_resource(struct dc *dc, - struct dc_state *new_ctx, - struct dc_stream_state *dc_stream) -{ - struct pipe_ctx *pipe_ctx = NULL; - int i; - - for (i = 0; i < MAX_PIPES; i++) { - if (new_ctx->res_ctx.pipe_ctx[i].stream == dc_stream && !new_ctx->res_ctx.pipe_ctx[i].top_pipe) { - pipe_ctx = &new_ctx->res_ctx.pipe_ctx[i]; - - if (pipe_ctx->stream_res.dsc) - dcn20_release_dsc(&new_ctx->res_ctx, dc->res_pool, &pipe_ctx->stream_res.dsc); - } - } - - if (!pipe_ctx) - return DC_ERROR_UNEXPECTED; - else - return DC_OK; -} - - -enum dc_status dcn20_add_stream_to_ctx(struct dc *dc, struct dc_state *new_ctx, struct dc_stream_state *dc_stream) -{ - enum dc_status result = DC_ERROR_UNEXPECTED; - - result = resource_map_pool_resources(dc, new_ctx, dc_stream); - - if (result == DC_OK) - result = resource_map_phy_clock_resources(dc, new_ctx, dc_stream); - - /* Get a DSC if required and available */ - if (result == DC_OK && dc_stream->timing.flags.DSC) - result = dcn20_add_dsc_to_stream_resource(dc, new_ctx, dc_stream); - - if (result == DC_OK) - result = dcn20_build_mapped_resource(dc, new_ctx, dc_stream); - - return result; -} - - -enum dc_status dcn20_remove_stream_from_ctx(struct dc *dc, struct dc_state *new_ctx, struct dc_stream_state *dc_stream) -{ - enum dc_status result = DC_OK; - - result = remove_dsc_from_stream_resource(dc, new_ctx, dc_stream); - - return result; -} - -/** - * dcn20_split_stream_for_odm - Check if stream can be splited for ODM - * - * @dc: DC object with resource pool info required for pipe split - * @res_ctx: Persistent state of resources - * @prev_odm_pipe: Reference to the previous ODM pipe - * @next_odm_pipe: Reference to the next ODM pipe - * - * This function takes a logically active pipe and a logically free pipe and - * halves all the scaling parameters that need to be halved while populating - * the free pipe with the required resources and configuring the next/previous - * ODM pipe pointers. - * - * Return: - * Return true if split stream for ODM is possible, otherwise, return false. - */ -bool dcn20_split_stream_for_odm( - const struct dc *dc, - struct resource_context *res_ctx, - struct pipe_ctx *prev_odm_pipe, - struct pipe_ctx *next_odm_pipe) -{ - int pipe_idx = next_odm_pipe->pipe_idx; - const struct resource_pool *pool = dc->res_pool; - - *next_odm_pipe = *prev_odm_pipe; - - next_odm_pipe->pipe_idx = pipe_idx; - next_odm_pipe->plane_res.mi = pool->mis[next_odm_pipe->pipe_idx]; - next_odm_pipe->plane_res.hubp = pool->hubps[next_odm_pipe->pipe_idx]; - next_odm_pipe->plane_res.ipp = pool->ipps[next_odm_pipe->pipe_idx]; - next_odm_pipe->plane_res.xfm = pool->transforms[next_odm_pipe->pipe_idx]; - next_odm_pipe->plane_res.dpp = pool->dpps[next_odm_pipe->pipe_idx]; - next_odm_pipe->plane_res.mpcc_inst = pool->dpps[next_odm_pipe->pipe_idx]->inst; - next_odm_pipe->stream_res.dsc = NULL; - if (prev_odm_pipe->next_odm_pipe && prev_odm_pipe->next_odm_pipe != next_odm_pipe) { - next_odm_pipe->next_odm_pipe = prev_odm_pipe->next_odm_pipe; - next_odm_pipe->next_odm_pipe->prev_odm_pipe = next_odm_pipe; - } - if (prev_odm_pipe->top_pipe && prev_odm_pipe->top_pipe->next_odm_pipe) { - prev_odm_pipe->top_pipe->next_odm_pipe->bottom_pipe = next_odm_pipe; - next_odm_pipe->top_pipe = prev_odm_pipe->top_pipe->next_odm_pipe; - } - if (prev_odm_pipe->bottom_pipe && prev_odm_pipe->bottom_pipe->next_odm_pipe) { - prev_odm_pipe->bottom_pipe->next_odm_pipe->top_pipe = next_odm_pipe; - next_odm_pipe->bottom_pipe = prev_odm_pipe->bottom_pipe->next_odm_pipe; - } - prev_odm_pipe->next_odm_pipe = next_odm_pipe; - next_odm_pipe->prev_odm_pipe = prev_odm_pipe; - - if (prev_odm_pipe->plane_state) { - struct scaler_data *sd = &prev_odm_pipe->plane_res.scl_data; - int new_width; - - /* HACTIVE halved for odm combine */ - sd->h_active /= 2; - /* Calculate new vp and recout for left pipe */ - /* Need at least 16 pixels width per side */ - if (sd->recout.x + 16 >= sd->h_active) - return false; - new_width = sd->h_active - sd->recout.x; - sd->viewport.width -= dc_fixpt_floor(dc_fixpt_mul_int( - sd->ratios.horz, sd->recout.width - new_width)); - sd->viewport_c.width -= dc_fixpt_floor(dc_fixpt_mul_int( - sd->ratios.horz_c, sd->recout.width - new_width)); - sd->recout.width = new_width; - - /* Calculate new vp and recout for right pipe */ - sd = &next_odm_pipe->plane_res.scl_data; - /* HACTIVE halved for odm combine */ - sd->h_active /= 2; - /* Need at least 16 pixels width per side */ - if (new_width <= 16) - return false; - new_width = sd->recout.width + sd->recout.x - sd->h_active; - sd->viewport.width -= dc_fixpt_floor(dc_fixpt_mul_int( - sd->ratios.horz, sd->recout.width - new_width)); - sd->viewport_c.width -= dc_fixpt_floor(dc_fixpt_mul_int( - sd->ratios.horz_c, sd->recout.width - new_width)); - sd->recout.width = new_width; - sd->viewport.x += dc_fixpt_floor(dc_fixpt_mul_int( - sd->ratios.horz, sd->h_active - sd->recout.x)); - sd->viewport_c.x += dc_fixpt_floor(dc_fixpt_mul_int( - sd->ratios.horz_c, sd->h_active - sd->recout.x)); - sd->recout.x = 0; - } - if (!next_odm_pipe->top_pipe) - next_odm_pipe->stream_res.opp = pool->opps[next_odm_pipe->pipe_idx]; - else - next_odm_pipe->stream_res.opp = next_odm_pipe->top_pipe->stream_res.opp; - if (next_odm_pipe->stream->timing.flags.DSC == 1 && !next_odm_pipe->top_pipe) { - dcn20_acquire_dsc(dc, res_ctx, &next_odm_pipe->stream_res.dsc, next_odm_pipe->pipe_idx); - ASSERT(next_odm_pipe->stream_res.dsc); - if (next_odm_pipe->stream_res.dsc == NULL) - return false; - } - - return true; -} - -void dcn20_split_stream_for_mpc( - struct resource_context *res_ctx, - const struct resource_pool *pool, - struct pipe_ctx *primary_pipe, - struct pipe_ctx *secondary_pipe) -{ - int pipe_idx = secondary_pipe->pipe_idx; - struct pipe_ctx *sec_bot_pipe = secondary_pipe->bottom_pipe; - - *secondary_pipe = *primary_pipe; - secondary_pipe->bottom_pipe = sec_bot_pipe; - - secondary_pipe->pipe_idx = pipe_idx; - secondary_pipe->plane_res.mi = pool->mis[secondary_pipe->pipe_idx]; - secondary_pipe->plane_res.hubp = pool->hubps[secondary_pipe->pipe_idx]; - secondary_pipe->plane_res.ipp = pool->ipps[secondary_pipe->pipe_idx]; - secondary_pipe->plane_res.xfm = pool->transforms[secondary_pipe->pipe_idx]; - secondary_pipe->plane_res.dpp = pool->dpps[secondary_pipe->pipe_idx]; - secondary_pipe->plane_res.mpcc_inst = pool->dpps[secondary_pipe->pipe_idx]->inst; - secondary_pipe->stream_res.dsc = NULL; - if (primary_pipe->bottom_pipe && primary_pipe->bottom_pipe != secondary_pipe) { - ASSERT(!secondary_pipe->bottom_pipe); - secondary_pipe->bottom_pipe = primary_pipe->bottom_pipe; - secondary_pipe->bottom_pipe->top_pipe = secondary_pipe; - } - primary_pipe->bottom_pipe = secondary_pipe; - secondary_pipe->top_pipe = primary_pipe; - - ASSERT(primary_pipe->plane_state); -} - -unsigned int dcn20_calc_max_scaled_time( - unsigned int time_per_pixel, - enum mmhubbub_wbif_mode mode, - unsigned int urgent_watermark) -{ - unsigned int time_per_byte = 0; - unsigned int total_y_free_entry = 0x200; /* two memory piece for luma */ - unsigned int total_c_free_entry = 0x140; /* two memory piece for chroma */ - unsigned int small_free_entry, max_free_entry; - unsigned int buf_lh_capability; - unsigned int max_scaled_time; - - if (mode == PACKED_444) /* packed mode */ - time_per_byte = time_per_pixel/4; - else if (mode == PLANAR_420_8BPC) - time_per_byte = time_per_pixel; - else if (mode == PLANAR_420_10BPC) /* p010 */ - time_per_byte = time_per_pixel * 819/1024; - - if (time_per_byte == 0) - time_per_byte = 1; - - small_free_entry = (total_y_free_entry > total_c_free_entry) ? total_c_free_entry : total_y_free_entry; - max_free_entry = (mode == PACKED_444) ? total_y_free_entry + total_c_free_entry : small_free_entry; - buf_lh_capability = max_free_entry*time_per_byte*32/16; /* there is 4bit fraction */ - max_scaled_time = buf_lh_capability - urgent_watermark; - return max_scaled_time; -} - -void dcn20_set_mcif_arb_params( - struct dc *dc, - struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int pipe_cnt) -{ - enum mmhubbub_wbif_mode wbif_mode; - struct mcif_arb_params *wb_arb_params; - int i, j, dwb_pipe; - - /* Writeback MCIF_WB arbitration parameters */ - dwb_pipe = 0; - for (i = 0; i < dc->res_pool->pipe_count; i++) { - - if (!context->res_ctx.pipe_ctx[i].stream) - continue; - - for (j = 0; j < MAX_DWB_PIPES; j++) { - if (context->res_ctx.pipe_ctx[i].stream->writeback_info[j].wb_enabled == false) - continue; - - //wb_arb_params = &context->res_ctx.pipe_ctx[i].stream->writeback_info[j].mcif_arb_params; - wb_arb_params = &context->bw_ctx.bw.dcn.bw_writeback.mcif_wb_arb[dwb_pipe]; - - if (context->res_ctx.pipe_ctx[i].stream->writeback_info[j].dwb_params.out_format == dwb_scaler_mode_yuv420) { - if (context->res_ctx.pipe_ctx[i].stream->writeback_info[j].dwb_params.output_depth == DWB_OUTPUT_PIXEL_DEPTH_8BPC) - wbif_mode = PLANAR_420_8BPC; - else - wbif_mode = PLANAR_420_10BPC; - } else - wbif_mode = PACKED_444; - - DC_FP_START(); - dcn20_fpu_set_wb_arb_params(wb_arb_params, context, pipes, pipe_cnt, i); - DC_FP_END(); - - wb_arb_params->slice_lines = 32; - wb_arb_params->arbitration_slice = 2; - wb_arb_params->max_scaled_time = dcn20_calc_max_scaled_time(wb_arb_params->time_per_pixel, - wbif_mode, - wb_arb_params->cli_watermark[0]); /* assume 4 watermark sets have the same value */ - - dwb_pipe++; - - if (dwb_pipe >= MAX_DWB_PIPES) - return; - } - if (dwb_pipe >= MAX_DWB_PIPES) - return; - } -} - -bool dcn20_validate_dsc(struct dc *dc, struct dc_state *new_ctx) -{ - int i; - - /* Validate DSC config, dsc count validation is already done */ - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe_ctx = &new_ctx->res_ctx.pipe_ctx[i]; - struct dc_stream_state *stream = pipe_ctx->stream; - struct dsc_config dsc_cfg; - struct pipe_ctx *odm_pipe; - int opp_cnt = 1; - - for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) - opp_cnt++; - - /* Only need to validate top pipe */ - if (pipe_ctx->top_pipe || pipe_ctx->prev_odm_pipe || !stream || !stream->timing.flags.DSC) - continue; - - dsc_cfg.pic_width = (stream->timing.h_addressable + stream->timing.h_border_left - + stream->timing.h_border_right) / opp_cnt; - dsc_cfg.pic_height = stream->timing.v_addressable + stream->timing.v_border_top - + stream->timing.v_border_bottom; - dsc_cfg.pixel_encoding = stream->timing.pixel_encoding; - dsc_cfg.color_depth = stream->timing.display_color_depth; - dsc_cfg.is_odm = pipe_ctx->next_odm_pipe ? true : false; - dsc_cfg.dc_dsc_cfg = stream->timing.dsc_cfg; - dsc_cfg.dc_dsc_cfg.num_slices_h /= opp_cnt; - - if (!pipe_ctx->stream_res.dsc->funcs->dsc_validate_stream(pipe_ctx->stream_res.dsc, &dsc_cfg)) - return false; - } - return true; -} - -struct pipe_ctx *dcn20_find_secondary_pipe(struct dc *dc, - struct resource_context *res_ctx, - const struct resource_pool *pool, - const struct pipe_ctx *primary_pipe) -{ - struct pipe_ctx *secondary_pipe = NULL; - - if (dc && primary_pipe) { - int j; - int preferred_pipe_idx = 0; - - /* first check the prev dc state: - * if this primary pipe has a bottom pipe in prev. state - * and if the bottom pipe is still available (which it should be), - * pick that pipe as secondary - * Same logic applies for ODM pipes - */ - if (dc->current_state->res_ctx.pipe_ctx[primary_pipe->pipe_idx].next_odm_pipe) { - preferred_pipe_idx = dc->current_state->res_ctx.pipe_ctx[primary_pipe->pipe_idx].next_odm_pipe->pipe_idx; - if (res_ctx->pipe_ctx[preferred_pipe_idx].stream == NULL) { - secondary_pipe = &res_ctx->pipe_ctx[preferred_pipe_idx]; - secondary_pipe->pipe_idx = preferred_pipe_idx; - } - } - if (secondary_pipe == NULL && - dc->current_state->res_ctx.pipe_ctx[primary_pipe->pipe_idx].bottom_pipe) { - preferred_pipe_idx = dc->current_state->res_ctx.pipe_ctx[primary_pipe->pipe_idx].bottom_pipe->pipe_idx; - if (res_ctx->pipe_ctx[preferred_pipe_idx].stream == NULL) { - secondary_pipe = &res_ctx->pipe_ctx[preferred_pipe_idx]; - secondary_pipe->pipe_idx = preferred_pipe_idx; - } - } - - /* - * if this primary pipe does not have a bottom pipe in prev. state - * start backward and find a pipe that did not used to be a bottom pipe in - * prev. dc state. This way we make sure we keep the same assignment as - * last state and will not have to reprogram every pipe - */ - if (secondary_pipe == NULL) { - for (j = dc->res_pool->pipe_count - 1; j >= 0; j--) { - if (dc->current_state->res_ctx.pipe_ctx[j].top_pipe == NULL - && dc->current_state->res_ctx.pipe_ctx[j].prev_odm_pipe == NULL) { - preferred_pipe_idx = j; - - if (res_ctx->pipe_ctx[preferred_pipe_idx].stream == NULL) { - secondary_pipe = &res_ctx->pipe_ctx[preferred_pipe_idx]; - secondary_pipe->pipe_idx = preferred_pipe_idx; - break; - } - } - } - } - /* - * We should never hit this assert unless assignments are shuffled around - * if this happens we will prob. hit a vsync tdr - */ - ASSERT(secondary_pipe); - /* - * search backwards for the second pipe to keep pipe - * assignment more consistent - */ - if (secondary_pipe == NULL) { - for (j = dc->res_pool->pipe_count - 1; j >= 0; j--) { - preferred_pipe_idx = j; - - if (res_ctx->pipe_ctx[preferred_pipe_idx].stream == NULL) { - secondary_pipe = &res_ctx->pipe_ctx[preferred_pipe_idx]; - secondary_pipe->pipe_idx = preferred_pipe_idx; - break; - } - } - } - } - - return secondary_pipe; -} - -void dcn20_merge_pipes_for_validate( - struct dc *dc, - struct dc_state *context) -{ - int i; - - /* merge previously split odm pipes since mode support needs to make the decision */ - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - struct pipe_ctx *odm_pipe = pipe->next_odm_pipe; - - if (pipe->prev_odm_pipe) - continue; - - pipe->next_odm_pipe = NULL; - while (odm_pipe) { - struct pipe_ctx *next_odm_pipe = odm_pipe->next_odm_pipe; - - odm_pipe->plane_state = NULL; - odm_pipe->stream = NULL; - odm_pipe->top_pipe = NULL; - odm_pipe->bottom_pipe = NULL; - odm_pipe->prev_odm_pipe = NULL; - odm_pipe->next_odm_pipe = NULL; - if (odm_pipe->stream_res.dsc) - dcn20_release_dsc(&context->res_ctx, dc->res_pool, &odm_pipe->stream_res.dsc); - /* Clear plane_res and stream_res */ - memset(&odm_pipe->plane_res, 0, sizeof(odm_pipe->plane_res)); - memset(&odm_pipe->stream_res, 0, sizeof(odm_pipe->stream_res)); - odm_pipe = next_odm_pipe; - } - if (pipe->plane_state) - resource_build_scaling_params(pipe); - } - - /* merge previously mpc split pipes since mode support needs to make the decision */ - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - struct pipe_ctx *hsplit_pipe = pipe->bottom_pipe; - - if (!hsplit_pipe || hsplit_pipe->plane_state != pipe->plane_state) - continue; - - pipe->bottom_pipe = hsplit_pipe->bottom_pipe; - if (hsplit_pipe->bottom_pipe) - hsplit_pipe->bottom_pipe->top_pipe = pipe; - hsplit_pipe->plane_state = NULL; - hsplit_pipe->stream = NULL; - hsplit_pipe->top_pipe = NULL; - hsplit_pipe->bottom_pipe = NULL; - - /* Clear plane_res and stream_res */ - memset(&hsplit_pipe->plane_res, 0, sizeof(hsplit_pipe->plane_res)); - memset(&hsplit_pipe->stream_res, 0, sizeof(hsplit_pipe->stream_res)); - if (pipe->plane_state) - resource_build_scaling_params(pipe); - } -} - -int dcn20_validate_apply_pipe_split_flags( - struct dc *dc, - struct dc_state *context, - int vlevel, - int *split, - bool *merge) -{ - int i, pipe_idx, vlevel_split; - int plane_count = 0; - bool force_split = false; - bool avoid_split = dc->debug.pipe_split_policy == MPC_SPLIT_AVOID; - struct vba_vars_st *v = &context->bw_ctx.dml.vba; - int max_mpc_comb = v->maxMpcComb; - - if (context->stream_count > 1) { - if (dc->debug.pipe_split_policy == MPC_SPLIT_AVOID_MULT_DISP) - avoid_split = true; - } else if (dc->debug.force_single_disp_pipe_split) - force_split = true; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - /** - * Workaround for avoiding pipe-split in cases where we'd split - * planes that are too small, resulting in splits that aren't - * valid for the scaler. - */ - if (pipe->plane_state && - (pipe->plane_state->dst_rect.width <= 16 || - pipe->plane_state->dst_rect.height <= 16 || - pipe->plane_state->src_rect.width <= 16 || - pipe->plane_state->src_rect.height <= 16)) - avoid_split = true; - - /* TODO: fix dc bugs and remove this split threshold thing */ - if (pipe->stream && !pipe->prev_odm_pipe && - (!pipe->top_pipe || pipe->top_pipe->plane_state != pipe->plane_state)) - ++plane_count; - } - if (plane_count > dc->res_pool->pipe_count / 2) - avoid_split = true; - - /* W/A: Mode timing with borders may not work well with pipe split, avoid for this corner case */ - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - struct dc_crtc_timing timing; - - if (!pipe->stream) - continue; - else { - timing = pipe->stream->timing; - if (timing.h_border_left + timing.h_border_right - + timing.v_border_top + timing.v_border_bottom > 0) { - avoid_split = true; - break; - } - } - } - - /* Avoid split loop looks for lowest voltage level that allows most unsplit pipes possible */ - if (avoid_split) { - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - if (!context->res_ctx.pipe_ctx[i].stream) - continue; - - for (vlevel_split = vlevel; vlevel <= context->bw_ctx.dml.soc.num_states; vlevel++) - if (v->NoOfDPP[vlevel][0][pipe_idx] == 1 && - v->ModeSupport[vlevel][0]) - break; - /* Impossible to not split this pipe */ - if (vlevel > context->bw_ctx.dml.soc.num_states) - vlevel = vlevel_split; - else - max_mpc_comb = 0; - pipe_idx++; - } - v->maxMpcComb = max_mpc_comb; - } - - /* Split loop sets which pipe should be split based on dml outputs and dc flags */ - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - int pipe_plane = v->pipe_plane[pipe_idx]; - bool split4mpc = context->stream_count == 1 && plane_count == 1 - && dc->config.enable_4to1MPC && dc->res_pool->pipe_count >= 4; - - if (!context->res_ctx.pipe_ctx[i].stream) - continue; - - if (split4mpc || v->NoOfDPP[vlevel][max_mpc_comb][pipe_plane] == 4) - split[i] = 4; - else if (force_split || v->NoOfDPP[vlevel][max_mpc_comb][pipe_plane] == 2) - split[i] = 2; - - if ((pipe->stream->view_format == - VIEW_3D_FORMAT_SIDE_BY_SIDE || - pipe->stream->view_format == - VIEW_3D_FORMAT_TOP_AND_BOTTOM) && - (pipe->stream->timing.timing_3d_format == - TIMING_3D_FORMAT_TOP_AND_BOTTOM || - pipe->stream->timing.timing_3d_format == - TIMING_3D_FORMAT_SIDE_BY_SIDE)) - split[i] = 2; - if (dc->debug.force_odm_combine & (1 << pipe->stream_res.tg->inst)) { - split[i] = 2; - v->ODMCombineEnablePerState[vlevel][pipe_plane] = dm_odm_combine_mode_2to1; - } - if (dc->debug.force_odm_combine_4to1 & (1 << pipe->stream_res.tg->inst)) { - split[i] = 4; - v->ODMCombineEnablePerState[vlevel][pipe_plane] = dm_odm_combine_mode_4to1; - } - /*420 format workaround*/ - if (pipe->stream->timing.h_addressable > 7680 && - pipe->stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) { - split[i] = 4; - } - v->ODMCombineEnabled[pipe_plane] = - v->ODMCombineEnablePerState[vlevel][pipe_plane]; - - if (v->ODMCombineEnabled[pipe_plane] == dm_odm_combine_mode_disabled) { - if (resource_get_mpc_slice_count(pipe) == 2) { - /*If need split for mpc but 2 way split already*/ - if (split[i] == 4) - split[i] = 2; /* 2 -> 4 MPC */ - else if (split[i] == 2) - split[i] = 0; /* 2 -> 2 MPC */ - else if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state) - merge[i] = true; /* 2 -> 1 MPC */ - } else if (resource_get_mpc_slice_count(pipe) == 4) { - /*If need split for mpc but 4 way split already*/ - if (split[i] == 2 && ((pipe->top_pipe && !pipe->top_pipe->top_pipe) - || !pipe->bottom_pipe)) { - merge[i] = true; /* 4 -> 2 MPC */ - } else if (split[i] == 0 && pipe->top_pipe && - pipe->top_pipe->plane_state == pipe->plane_state) - merge[i] = true; /* 4 -> 1 MPC */ - split[i] = 0; - } else if (resource_get_odm_slice_count(pipe) > 1) { - /* ODM -> MPC transition */ - if (pipe->prev_odm_pipe) { - split[i] = 0; - merge[i] = true; - } - } - } else { - if (resource_get_odm_slice_count(pipe) == 2) { - /*If need split for odm but 2 way split already*/ - if (split[i] == 4) - split[i] = 2; /* 2 -> 4 ODM */ - else if (split[i] == 2) - split[i] = 0; /* 2 -> 2 ODM */ - else if (pipe->prev_odm_pipe) { - ASSERT(0); /* NOT expected yet */ - merge[i] = true; /* exit ODM */ - } - } else if (resource_get_odm_slice_count(pipe) == 4) { - /*If need split for odm but 4 way split already*/ - if (split[i] == 2 && ((pipe->prev_odm_pipe && !pipe->prev_odm_pipe->prev_odm_pipe) - || !pipe->next_odm_pipe)) { - merge[i] = true; /* 4 -> 2 ODM */ - } else if (split[i] == 0 && pipe->prev_odm_pipe) { - ASSERT(0); /* NOT expected yet */ - merge[i] = true; /* exit ODM */ - } - split[i] = 0; - } else if (resource_get_mpc_slice_count(pipe) > 1) { - /* MPC -> ODM transition */ - ASSERT(0); /* NOT expected yet */ - if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state) { - split[i] = 0; - merge[i] = true; - } - } - } - - /* Adjust dppclk when split is forced, do not bother with dispclk */ - if (split[i] != 0 && v->NoOfDPP[vlevel][max_mpc_comb][pipe_idx] == 1) { - DC_FP_START(); - dcn20_fpu_adjust_dppclk(v, vlevel, max_mpc_comb, pipe_idx, false); - DC_FP_END(); - } - pipe_idx++; - } - - return vlevel; -} - -bool dcn20_fast_validate_bw( - struct dc *dc, - struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int *pipe_cnt_out, - int *pipe_split_from, - int *vlevel_out, - bool fast_validate) -{ - bool out = false; - int split[MAX_PIPES] = { 0 }; - int pipe_cnt, i, pipe_idx, vlevel; - - ASSERT(pipes); - if (!pipes) - return false; - - dcn20_merge_pipes_for_validate(dc, context); - - DC_FP_START(); - pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate); - DC_FP_END(); - - *pipe_cnt_out = pipe_cnt; - - if (!pipe_cnt) { - out = true; - goto validate_out; - } - - vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt); - - if (vlevel > context->bw_ctx.dml.soc.num_states) - goto validate_fail; - - vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, NULL); - - /*initialize pipe_just_split_from to invalid idx*/ - for (i = 0; i < MAX_PIPES; i++) - pipe_split_from[i] = -1; - - for (i = 0, pipe_idx = -1; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - struct pipe_ctx *hsplit_pipe = pipe->bottom_pipe; - - if (!pipe->stream || pipe_split_from[i] >= 0) - continue; - - pipe_idx++; - - if (!pipe->top_pipe && !pipe->plane_state && context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_idx]) { - hsplit_pipe = dcn20_find_secondary_pipe(dc, &context->res_ctx, dc->res_pool, pipe); - ASSERT(hsplit_pipe); - if (!dcn20_split_stream_for_odm( - dc, &context->res_ctx, - pipe, hsplit_pipe)) - goto validate_fail; - pipe_split_from[hsplit_pipe->pipe_idx] = pipe_idx; - dcn20_build_mapped_resource(dc, context, pipe->stream); - } - - if (!pipe->plane_state) - continue; - /* Skip 2nd half of already split pipe */ - if (pipe->top_pipe && pipe->plane_state == pipe->top_pipe->plane_state) - continue; - - /* We do not support mpo + odm at the moment */ - if (hsplit_pipe && hsplit_pipe->plane_state != pipe->plane_state - && context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_idx]) - goto validate_fail; - - if (split[i] == 2) { - if (!hsplit_pipe || hsplit_pipe->plane_state != pipe->plane_state) { - /* pipe not split previously needs split */ - hsplit_pipe = dcn20_find_secondary_pipe(dc, &context->res_ctx, dc->res_pool, pipe); - ASSERT(hsplit_pipe); - if (!hsplit_pipe) { - DC_FP_START(); - dcn20_fpu_adjust_dppclk(&context->bw_ctx.dml.vba, vlevel, context->bw_ctx.dml.vba.maxMpcComb, pipe_idx, true); - DC_FP_END(); - continue; - } - if (context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_idx]) { - if (!dcn20_split_stream_for_odm( - dc, &context->res_ctx, - pipe, hsplit_pipe)) - goto validate_fail; - dcn20_build_mapped_resource(dc, context, pipe->stream); - } else { - dcn20_split_stream_for_mpc( - &context->res_ctx, dc->res_pool, - pipe, hsplit_pipe); - resource_build_scaling_params(pipe); - resource_build_scaling_params(hsplit_pipe); - } - pipe_split_from[hsplit_pipe->pipe_idx] = pipe_idx; - } - } else if (hsplit_pipe && hsplit_pipe->plane_state == pipe->plane_state) { - /* merge should already have been done */ - ASSERT(0); - } - } - /* Actual dsc count per stream dsc validation*/ - if (!dcn20_validate_dsc(dc, context)) { - context->bw_ctx.dml.vba.ValidationStatus[context->bw_ctx.dml.vba.soc.num_states] = - DML_FAIL_DSC_VALIDATION_FAILURE; - goto validate_fail; - } - - *vlevel_out = vlevel; - - out = true; - goto validate_out; - -validate_fail: - out = false; - -validate_out: - return out; -} - -bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context, - bool fast_validate) -{ - bool voltage_supported; - display_e2e_pipe_params_st *pipes; - - pipes = kcalloc(dc->res_pool->pipe_count, sizeof(display_e2e_pipe_params_st), GFP_KERNEL); - if (!pipes) - return false; - - DC_FP_START(); - voltage_supported = dcn20_validate_bandwidth_fp(dc, context, fast_validate, pipes); - DC_FP_END(); - - kfree(pipes); - return voltage_supported; -} - -struct pipe_ctx *dcn20_acquire_free_pipe_for_layer( - const struct dc_state *cur_ctx, - struct dc_state *new_ctx, - const struct resource_pool *pool, - const struct pipe_ctx *opp_head) -{ - struct resource_context *res_ctx = &new_ctx->res_ctx; - struct pipe_ctx *otg_master = resource_get_otg_master_for_stream(res_ctx, opp_head->stream); - struct pipe_ctx *sec_dpp_pipe = resource_find_free_secondary_pipe_legacy(res_ctx, pool, otg_master); - - ASSERT(otg_master); - - if (!sec_dpp_pipe) - return NULL; - - sec_dpp_pipe->stream = opp_head->stream; - sec_dpp_pipe->stream_res.tg = opp_head->stream_res.tg; - sec_dpp_pipe->stream_res.opp = opp_head->stream_res.opp; - - sec_dpp_pipe->plane_res.hubp = pool->hubps[sec_dpp_pipe->pipe_idx]; - sec_dpp_pipe->plane_res.ipp = pool->ipps[sec_dpp_pipe->pipe_idx]; - sec_dpp_pipe->plane_res.dpp = pool->dpps[sec_dpp_pipe->pipe_idx]; - sec_dpp_pipe->plane_res.mpcc_inst = pool->dpps[sec_dpp_pipe->pipe_idx]->inst; - - return sec_dpp_pipe; -} - -bool dcn20_get_dcc_compression_cap(const struct dc *dc, - const struct dc_dcc_surface_param *input, - struct dc_surface_dcc_cap *output) -{ - return dc->res_pool->hubbub->funcs->get_dcc_compression_cap( - dc->res_pool->hubbub, - input, - output); -} - -static void dcn20_destroy_resource_pool(struct resource_pool **pool) -{ - struct dcn20_resource_pool *dcn20_pool = TO_DCN20_RES_POOL(*pool); - - dcn20_resource_destruct(dcn20_pool); - kfree(dcn20_pool); - *pool = NULL; -} - - -static struct dc_cap_funcs cap_funcs = { - .get_dcc_compression_cap = dcn20_get_dcc_compression_cap -}; - - -enum dc_status dcn20_patch_unknown_plane_state(struct dc_plane_state *plane_state) -{ - enum surface_pixel_format surf_pix_format = plane_state->format; - unsigned int bpp = resource_pixel_format_to_bpp(surf_pix_format); - - plane_state->tiling_info.gfx9.swizzle = DC_SW_64KB_S; - if (bpp == 64) - plane_state->tiling_info.gfx9.swizzle = DC_SW_64KB_D; - - return DC_OK; -} - -void dcn20_release_pipe(struct dc_state *context, - struct pipe_ctx *pipe, - const struct resource_pool *pool) -{ - if (resource_is_pipe_type(pipe, OPP_HEAD) && pipe->stream_res.dsc) - dcn20_release_dsc(&context->res_ctx, pool, &pipe->stream_res.dsc); - memset(pipe, 0, sizeof(*pipe)); -} - -static const struct resource_funcs dcn20_res_pool_funcs = { - .destroy = dcn20_destroy_resource_pool, - .link_enc_create = dcn20_link_encoder_create, - .panel_cntl_create = dcn20_panel_cntl_create, - .validate_bandwidth = dcn20_validate_bandwidth, - .acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer, - .release_pipe = dcn20_release_pipe, - .add_stream_to_ctx = dcn20_add_stream_to_ctx, - .add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource, - .remove_stream_from_ctx = dcn20_remove_stream_from_ctx, - .populate_dml_writeback_from_context = dcn20_populate_dml_writeback_from_context, - .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, - .set_mcif_arb_params = dcn20_set_mcif_arb_params, - .populate_dml_pipes = dcn20_populate_dml_pipes_from_context, - .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link -}; - -bool dcn20_dwbc_create(struct dc_context *ctx, struct resource_pool *pool) -{ - int i; - uint32_t pipe_count = pool->res_cap->num_dwb; - - for (i = 0; i < pipe_count; i++) { - struct dcn20_dwbc *dwbc20 = kzalloc(sizeof(struct dcn20_dwbc), - GFP_KERNEL); - - if (!dwbc20) { - dm_error("DC: failed to create dwbc20!\n"); - return false; - } - dcn20_dwbc_construct(dwbc20, ctx, - &dwbc20_regs[i], - &dwbc20_shift, - &dwbc20_mask, - i); - pool->dwbc[i] = &dwbc20->base; - } - return true; -} - -bool dcn20_mmhubbub_create(struct dc_context *ctx, struct resource_pool *pool) -{ - int i; - uint32_t pipe_count = pool->res_cap->num_dwb; - - ASSERT(pipe_count > 0); - - for (i = 0; i < pipe_count; i++) { - struct dcn20_mmhubbub *mcif_wb20 = kzalloc(sizeof(struct dcn20_mmhubbub), - GFP_KERNEL); - - if (!mcif_wb20) { - dm_error("DC: failed to create mcif_wb20!\n"); - return false; - } - - dcn20_mmhubbub_construct(mcif_wb20, ctx, - &mcif_wb20_regs[i], - &mcif_wb20_shift, - &mcif_wb20_mask, - i); - - pool->mcif_wb[i] = &mcif_wb20->base; - } - return true; -} - -static struct pp_smu_funcs *dcn20_pp_smu_create(struct dc_context *ctx) -{ - struct pp_smu_funcs *pp_smu = kzalloc(sizeof(*pp_smu), GFP_ATOMIC); - - if (!pp_smu) - return pp_smu; - - dm_pp_get_funcs(ctx, pp_smu); - - if (pp_smu->ctx.ver != PP_SMU_VER_NV) - pp_smu = memset(pp_smu, 0, sizeof(struct pp_smu_funcs)); - - return pp_smu; -} - -static void dcn20_pp_smu_destroy(struct pp_smu_funcs **pp_smu) -{ - if (pp_smu && *pp_smu) { - kfree(*pp_smu); - *pp_smu = NULL; - } -} - -static struct _vcs_dpi_soc_bounding_box_st *get_asic_rev_soc_bb( - uint32_t hw_internal_rev) -{ - if (ASICREV_IS_NAVI14_M(hw_internal_rev)) - return &dcn2_0_nv14_soc; - - if (ASICREV_IS_NAVI12_P(hw_internal_rev)) - return &dcn2_0_nv12_soc; - - return &dcn2_0_soc; -} - -static struct _vcs_dpi_ip_params_st *get_asic_rev_ip_params( - uint32_t hw_internal_rev) -{ - /* NV14 */ - if (ASICREV_IS_NAVI14_M(hw_internal_rev)) - return &dcn2_0_nv14_ip; - - /* NV12 and NV10 */ - return &dcn2_0_ip; -} - -static enum dml_project get_dml_project_version(uint32_t hw_internal_rev) -{ - return DML_PROJECT_NAVI10v2; -} - -static bool init_soc_bounding_box(struct dc *dc, - struct dcn20_resource_pool *pool) -{ - struct _vcs_dpi_soc_bounding_box_st *loaded_bb = - get_asic_rev_soc_bb(dc->ctx->asic_id.hw_internal_rev); - struct _vcs_dpi_ip_params_st *loaded_ip = - get_asic_rev_ip_params(dc->ctx->asic_id.hw_internal_rev); - - DC_LOGGER_INIT(dc->ctx->logger); - - if (pool->base.pp_smu) { - struct pp_smu_nv_clock_table max_clocks = {0}; - unsigned int uclk_states[8] = {0}; - unsigned int num_states = 0; - enum pp_smu_status status; - bool clock_limits_available = false; - bool uclk_states_available = false; - - if (pool->base.pp_smu->nv_funcs.get_uclk_dpm_states) { - status = (pool->base.pp_smu->nv_funcs.get_uclk_dpm_states) - (&pool->base.pp_smu->nv_funcs.pp_smu, uclk_states, &num_states); - - uclk_states_available = (status == PP_SMU_RESULT_OK); - } - - if (pool->base.pp_smu->nv_funcs.get_maximum_sustainable_clocks) { - status = (*pool->base.pp_smu->nv_funcs.get_maximum_sustainable_clocks) - (&pool->base.pp_smu->nv_funcs.pp_smu, &max_clocks); - /* SMU cannot set DCF clock to anything equal to or higher than SOC clock - */ - if (max_clocks.dcfClockInKhz >= max_clocks.socClockInKhz) - max_clocks.dcfClockInKhz = max_clocks.socClockInKhz - 1000; - clock_limits_available = (status == PP_SMU_RESULT_OK); - } - - if (clock_limits_available && uclk_states_available && num_states) { - DC_FP_START(); - dcn20_update_bounding_box(dc, loaded_bb, &max_clocks, uclk_states, num_states); - DC_FP_END(); - } else if (clock_limits_available) { - DC_FP_START(); - dcn20_cap_soc_clocks(loaded_bb, max_clocks); - DC_FP_END(); - } - } - - loaded_ip->max_num_otg = pool->base.res_cap->num_timing_generator; - loaded_ip->max_num_dpp = pool->base.pipe_count; - DC_FP_START(); - dcn20_patch_bounding_box(dc, loaded_bb); - DC_FP_END(); - return true; -} - -static bool dcn20_resource_construct( - uint8_t num_virtual_links, - struct dc *dc, - struct dcn20_resource_pool *pool) -{ - int i; - struct dc_context *ctx = dc->ctx; - struct irq_service_init_data init_data; - struct ddc_service_init_data ddc_init_data = {0}; - struct _vcs_dpi_soc_bounding_box_st *loaded_bb = - get_asic_rev_soc_bb(ctx->asic_id.hw_internal_rev); - struct _vcs_dpi_ip_params_st *loaded_ip = - get_asic_rev_ip_params(ctx->asic_id.hw_internal_rev); - enum dml_project dml_project_version = - get_dml_project_version(ctx->asic_id.hw_internal_rev); - - ctx->dc_bios->regs = &bios_regs; - pool->base.funcs = &dcn20_res_pool_funcs; - - if (ASICREV_IS_NAVI14_M(ctx->asic_id.hw_internal_rev)) { - pool->base.res_cap = &res_cap_nv14; - pool->base.pipe_count = 5; - pool->base.mpcc_count = 5; - } else { - pool->base.res_cap = &res_cap_nv10; - pool->base.pipe_count = 6; - pool->base.mpcc_count = 6; - } - /************************************************* - * Resource + asic cap harcoding * - *************************************************/ - pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE; - - dc->caps.max_downscale_ratio = 200; - dc->caps.i2c_speed_in_khz = 100; - dc->caps.i2c_speed_in_khz_hdcp = 100; /*1.4 w/a not applied by default*/ - dc->caps.max_cursor_size = 256; - dc->caps.min_horizontal_blanking_period = 80; - dc->caps.dmdata_alloc_size = 2048; - - dc->caps.max_slave_planes = 1; - dc->caps.max_slave_yuv_planes = 1; - dc->caps.max_slave_rgb_planes = 1; - dc->caps.post_blend_color_processing = true; - dc->caps.force_dp_tps4_for_cp2520 = true; - dc->caps.extended_aux_timeout_support = true; - - /* Color pipeline capabilities */ - dc->caps.color.dpp.dcn_arch = 1; - dc->caps.color.dpp.input_lut_shared = 0; - dc->caps.color.dpp.icsc = 1; - dc->caps.color.dpp.dgam_ram = 1; - dc->caps.color.dpp.dgam_rom_caps.srgb = 1; - dc->caps.color.dpp.dgam_rom_caps.bt2020 = 1; - dc->caps.color.dpp.dgam_rom_caps.gamma2_2 = 0; - dc->caps.color.dpp.dgam_rom_caps.pq = 0; - dc->caps.color.dpp.dgam_rom_caps.hlg = 0; - dc->caps.color.dpp.post_csc = 0; - dc->caps.color.dpp.gamma_corr = 0; - dc->caps.color.dpp.dgam_rom_for_yuv = 1; - - dc->caps.color.dpp.hw_3d_lut = 1; - dc->caps.color.dpp.ogam_ram = 1; - // no OGAM ROM on DCN2, only MPC ROM - dc->caps.color.dpp.ogam_rom_caps.srgb = 0; - dc->caps.color.dpp.ogam_rom_caps.bt2020 = 0; - dc->caps.color.dpp.ogam_rom_caps.gamma2_2 = 0; - dc->caps.color.dpp.ogam_rom_caps.pq = 0; - dc->caps.color.dpp.ogam_rom_caps.hlg = 0; - dc->caps.color.dpp.ocsc = 0; - - dc->caps.color.mpc.gamut_remap = 0; - dc->caps.color.mpc.num_3dluts = 0; - dc->caps.color.mpc.shared_3d_lut = 0; - dc->caps.color.mpc.ogam_ram = 1; - dc->caps.color.mpc.ogam_rom_caps.srgb = 0; - dc->caps.color.mpc.ogam_rom_caps.bt2020 = 0; - dc->caps.color.mpc.ogam_rom_caps.gamma2_2 = 0; - dc->caps.color.mpc.ogam_rom_caps.pq = 0; - dc->caps.color.mpc.ogam_rom_caps.hlg = 0; - dc->caps.color.mpc.ocsc = 1; - - dc->caps.dp_hdmi21_pcon_support = true; - - if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) - dc->debug = debug_defaults_drv; - - //dcn2.0x - dc->work_arounds.dedcn20_305_wa = true; - - // Init the vm_helper - if (dc->vm_helper) - vm_helper_init(dc->vm_helper, 16); - - /************************************************* - * Create resources * - *************************************************/ - - pool->base.clock_sources[DCN20_CLK_SRC_PLL0] = - dcn20_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL0, - &clk_src_regs[0], false); - pool->base.clock_sources[DCN20_CLK_SRC_PLL1] = - dcn20_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL1, - &clk_src_regs[1], false); - pool->base.clock_sources[DCN20_CLK_SRC_PLL2] = - dcn20_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL2, - &clk_src_regs[2], false); - pool->base.clock_sources[DCN20_CLK_SRC_PLL3] = - dcn20_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL3, - &clk_src_regs[3], false); - pool->base.clock_sources[DCN20_CLK_SRC_PLL4] = - dcn20_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL4, - &clk_src_regs[4], false); - pool->base.clock_sources[DCN20_CLK_SRC_PLL5] = - dcn20_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL5, - &clk_src_regs[5], false); - pool->base.clk_src_count = DCN20_CLK_SRC_TOTAL; - /* todo: not reuse phy_pll registers */ - pool->base.dp_clock_source = - dcn20_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_ID_DP_DTO, - &clk_src_regs[0], true); - - for (i = 0; i < pool->base.clk_src_count; i++) { - if (pool->base.clock_sources[i] == NULL) { - dm_error("DC: failed to create clock sources!\n"); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - } - - pool->base.dccg = dccg2_create(ctx, &dccg_regs, &dccg_shift, &dccg_mask); - if (pool->base.dccg == NULL) { - dm_error("DC: failed to create dccg!\n"); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - - pool->base.dmcu = dcn20_dmcu_create(ctx, - &dmcu_regs, - &dmcu_shift, - &dmcu_mask); - if (pool->base.dmcu == NULL) { - dm_error("DC: failed to create dmcu!\n"); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - - pool->base.abm = dce_abm_create(ctx, - &abm_regs, - &abm_shift, - &abm_mask); - if (pool->base.abm == NULL) { - dm_error("DC: failed to create abm!\n"); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - - pool->base.pp_smu = dcn20_pp_smu_create(ctx); - - - if (!init_soc_bounding_box(dc, pool)) { - dm_error("DC: failed to initialize soc bounding box!\n"); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - - dml_init_instance(&dc->dml, loaded_bb, loaded_ip, dml_project_version); - - if (!dc->debug.disable_pplib_wm_range) { - struct pp_smu_wm_range_sets ranges = {0}; - int i = 0; - - ranges.num_reader_wm_sets = 0; - - if (loaded_bb->num_states == 1) { - ranges.reader_wm_sets[0].wm_inst = i; - ranges.reader_wm_sets[0].min_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN; - ranges.reader_wm_sets[0].max_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX; - ranges.reader_wm_sets[0].min_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN; - ranges.reader_wm_sets[0].max_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX; - - ranges.num_reader_wm_sets = 1; - } else if (loaded_bb->num_states > 1) { - for (i = 0; i < 4 && i < loaded_bb->num_states; i++) { - ranges.reader_wm_sets[i].wm_inst = i; - ranges.reader_wm_sets[i].min_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN; - ranges.reader_wm_sets[i].max_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX; - DC_FP_START(); - dcn20_fpu_set_wm_ranges(i, &ranges, loaded_bb); - DC_FP_END(); - - ranges.num_reader_wm_sets = i + 1; - } - - ranges.reader_wm_sets[0].min_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN; - ranges.reader_wm_sets[ranges.num_reader_wm_sets - 1].max_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX; - } - - ranges.num_writer_wm_sets = 1; - - ranges.writer_wm_sets[0].wm_inst = 0; - ranges.writer_wm_sets[0].min_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN; - ranges.writer_wm_sets[0].max_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX; - ranges.writer_wm_sets[0].min_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN; - ranges.writer_wm_sets[0].max_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX; - - /* Notify PP Lib/SMU which Watermarks to use for which clock ranges */ - if (pool->base.pp_smu->nv_funcs.set_wm_ranges) - pool->base.pp_smu->nv_funcs.set_wm_ranges(&pool->base.pp_smu->nv_funcs.pp_smu, &ranges); - } - - init_data.ctx = dc->ctx; - pool->base.irqs = dal_irq_service_dcn20_create(&init_data); - if (!pool->base.irqs) - goto create_fail; - - /* mem input -> ipp -> dpp -> opp -> TG */ - for (i = 0; i < pool->base.pipe_count; i++) { - pool->base.hubps[i] = dcn20_hubp_create(ctx, i); - if (pool->base.hubps[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create memory input!\n"); - goto create_fail; - } - - pool->base.ipps[i] = dcn20_ipp_create(ctx, i); - if (pool->base.ipps[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create input pixel processor!\n"); - goto create_fail; - } - - pool->base.dpps[i] = dcn20_dpp_create(ctx, i); - if (pool->base.dpps[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create dpps!\n"); - goto create_fail; - } - } - for (i = 0; i < pool->base.res_cap->num_ddc; i++) { - pool->base.engines[i] = dcn20_aux_engine_create(ctx, i); - if (pool->base.engines[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC:failed to create aux engine!!\n"); - goto create_fail; - } - pool->base.hw_i2cs[i] = dcn20_i2c_hw_create(ctx, i); - if (pool->base.hw_i2cs[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC:failed to create hw i2c!!\n"); - goto create_fail; - } - pool->base.sw_i2cs[i] = NULL; - } - - for (i = 0; i < pool->base.res_cap->num_opp; i++) { - pool->base.opps[i] = dcn20_opp_create(ctx, i); - if (pool->base.opps[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create output pixel processor!\n"); - goto create_fail; - } - } - - for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { - pool->base.timing_generators[i] = dcn20_timing_generator_create( - ctx, i); - if (pool->base.timing_generators[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create tg!\n"); - goto create_fail; - } - } - - pool->base.timing_generator_count = i; - - pool->base.mpc = dcn20_mpc_create(ctx); - if (pool->base.mpc == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create mpc!\n"); - goto create_fail; - } - - pool->base.hubbub = dcn20_hubbub_create(ctx); - if (pool->base.hubbub == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create hubbub!\n"); - goto create_fail; - } - - for (i = 0; i < pool->base.res_cap->num_dsc; i++) { - pool->base.dscs[i] = dcn20_dsc_create(ctx, i); - if (pool->base.dscs[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create display stream compressor %d!\n", i); - goto create_fail; - } - } - - if (!dcn20_dwbc_create(ctx, &pool->base)) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create dwbc!\n"); - goto create_fail; - } - if (!dcn20_mmhubbub_create(ctx, &pool->base)) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create mcif_wb!\n"); - goto create_fail; - } - - if (!resource_construct(num_virtual_links, dc, &pool->base, - &res_create_funcs)) - goto create_fail; - - dcn20_hw_sequencer_construct(dc); - - // IF NV12, set PG function pointer to NULL. It's not that - // PG isn't supported for NV12, it's that we don't want to - // program the registers because that will cause more power - // to be consumed. We could have created dcn20_init_hw to get - // the same effect by checking ASIC rev, but there was a - // request at some point to not check ASIC rev on hw sequencer. - if (ASICREV_IS_NAVI12_P(dc->ctx->asic_id.hw_internal_rev)) { - dc->hwseq->funcs.enable_power_gating_plane = NULL; - dc->debug.disable_dpp_power_gate = true; - dc->debug.disable_hubp_power_gate = true; - } - - - dc->caps.max_planes = pool->base.pipe_count; - - for (i = 0; i < dc->caps.max_planes; ++i) - dc->caps.planes[i] = plane_cap; - - dc->cap_funcs = cap_funcs; - - if (dc->ctx->dc_bios->fw_info.oem_i2c_present) { - ddc_init_data.ctx = dc->ctx; - ddc_init_data.link = NULL; - ddc_init_data.id.id = dc->ctx->dc_bios->fw_info.oem_i2c_obj_id; - ddc_init_data.id.enum_id = 0; - ddc_init_data.id.type = OBJECT_TYPE_GENERIC; - pool->base.oem_device = dc->link_srv->create_ddc_service(&ddc_init_data); - } else { - pool->base.oem_device = NULL; - } - - return true; - -create_fail: - - dcn20_resource_destruct(pool); - - return false; -} - -struct resource_pool *dcn20_create_resource_pool( - const struct dc_init_data *init_data, - struct dc *dc) -{ - struct dcn20_resource_pool *pool = - kzalloc(sizeof(struct dcn20_resource_pool), GFP_ATOMIC); - - if (!pool) - return NULL; - - if (dcn20_resource_construct(init_data->num_virtual_links, dc, pool)) - return &pool->base; - - BREAK_TO_DEBUGGER(); - kfree(pool); - return NULL; -} diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h deleted file mode 100644 index 37ecaccc5d12..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h +++ /dev/null @@ -1,170 +0,0 @@ -/* -* Copyright 2017 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DC_RESOURCE_DCN20_H__ -#define __DC_RESOURCE_DCN20_H__ - -#include "core_types.h" -#include "dml/dcn20/dcn20_fpu.h" - -#define TO_DCN20_RES_POOL(pool)\ - container_of(pool, struct dcn20_resource_pool, base) - -struct dc; -struct resource_pool; -struct _vcs_dpi_display_pipe_params_st; - -extern struct _vcs_dpi_ip_params_st dcn2_0_ip; -extern struct _vcs_dpi_ip_params_st dcn2_0_nv14_ip; -extern struct _vcs_dpi_soc_bounding_box_st dcn2_0_soc; -extern struct _vcs_dpi_soc_bounding_box_st dcn2_0_nv14_soc; -extern struct _vcs_dpi_soc_bounding_box_st dcn2_0_nv12_soc; - -struct dcn20_resource_pool { - struct resource_pool base; -}; -struct resource_pool *dcn20_create_resource_pool( - const struct dc_init_data *init_data, - struct dc *dc); - -struct link_encoder *dcn20_link_encoder_create( - struct dc_context *ctx, - const struct encoder_init_data *enc_init_data); - -unsigned int dcn20_calc_max_scaled_time( - unsigned int time_per_pixel, - enum mmhubbub_wbif_mode mode, - unsigned int urgent_watermark); - -struct pipe_ctx *dcn20_acquire_free_pipe_for_layer( - const struct dc_state *cur_ctx, - struct dc_state *new_ctx, - const struct resource_pool *pool, - const struct pipe_ctx *opp_head_pipe); -void dcn20_release_pipe(struct dc_state *context, - struct pipe_ctx *pipe, - const struct resource_pool *pool); -struct stream_encoder *dcn20_stream_encoder_create( - enum engine_id eng_id, - struct dc_context *ctx); - -struct dce_hwseq *dcn20_hwseq_create( - struct dc_context *ctx); - -bool dcn20_get_dcc_compression_cap(const struct dc *dc, - const struct dc_dcc_surface_param *input, - struct dc_surface_dcc_cap *output); - -void dcn20_dpp_destroy(struct dpp **dpp); - -struct dpp *dcn20_dpp_create( - struct dc_context *ctx, - uint32_t inst); - -struct input_pixel_processor *dcn20_ipp_create( - struct dc_context *ctx, uint32_t inst); - -struct output_pixel_processor *dcn20_opp_create( - struct dc_context *ctx, uint32_t inst); - -struct dce_aux *dcn20_aux_engine_create( - struct dc_context *ctx, uint32_t inst); - -struct dce_i2c_hw *dcn20_i2c_hw_create( - struct dc_context *ctx, - uint32_t inst); - -void dcn20_clock_source_destroy(struct clock_source **clk_src); - -struct display_stream_compressor *dcn20_dsc_create( - struct dc_context *ctx, uint32_t inst); -void dcn20_dsc_destroy(struct display_stream_compressor **dsc); - -struct hubp *dcn20_hubp_create( - struct dc_context *ctx, - uint32_t inst); -struct timing_generator *dcn20_timing_generator_create( - struct dc_context *ctx, - uint32_t instance); -struct mpc *dcn20_mpc_create(struct dc_context *ctx); -struct hubbub *dcn20_hubbub_create(struct dc_context *ctx); - -bool dcn20_dwbc_create(struct dc_context *ctx, struct resource_pool *pool); -bool dcn20_mmhubbub_create(struct dc_context *ctx, struct resource_pool *pool); - -void dcn20_set_mcif_arb_params( - struct dc *dc, - struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int pipe_cnt); -bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context, bool fast_validate); -void dcn20_merge_pipes_for_validate( - struct dc *dc, - struct dc_state *context); -int dcn20_validate_apply_pipe_split_flags( - struct dc *dc, - struct dc_state *context, - int vlevel, - int *split, - bool *merge); -void dcn20_release_dsc(struct resource_context *res_ctx, - const struct resource_pool *pool, - struct display_stream_compressor **dsc); -bool dcn20_validate_dsc(struct dc *dc, struct dc_state *new_ctx); -void dcn20_split_stream_for_mpc( - struct resource_context *res_ctx, - const struct resource_pool *pool, - struct pipe_ctx *primary_pipe, - struct pipe_ctx *secondary_pipe); -bool dcn20_split_stream_for_odm( - const struct dc *dc, - struct resource_context *res_ctx, - struct pipe_ctx *prev_odm_pipe, - struct pipe_ctx *next_odm_pipe); -void dcn20_acquire_dsc(const struct dc *dc, - struct resource_context *res_ctx, - struct display_stream_compressor **dsc, - int pipe_idx); -struct pipe_ctx *dcn20_find_secondary_pipe(struct dc *dc, - struct resource_context *res_ctx, - const struct resource_pool *pool, - const struct pipe_ctx *primary_pipe); -bool dcn20_fast_validate_bw( - struct dc *dc, - struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int *pipe_cnt_out, - int *pipe_split_from, - int *vlevel_out, - bool fast_validate); - -enum dc_status dcn20_build_mapped_resource(const struct dc *dc, struct dc_state *context, struct dc_stream_state *stream); -enum dc_status dcn20_add_stream_to_ctx(struct dc *dc, struct dc_state *new_ctx, struct dc_stream_state *dc_stream); -enum dc_status dcn20_add_dsc_to_stream_resource(struct dc *dc, struct dc_state *dc_ctx, struct dc_stream_state *dc_stream); -enum dc_status dcn20_remove_stream_from_ctx(struct dc *dc, struct dc_state *new_ctx, struct dc_stream_state *dc_stream); -enum dc_status dcn20_patch_unknown_plane_state(struct dc_plane_state *plane_state); - -#endif /* __DC_RESOURCE_DCN20_H__ */ - diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/Makefile b/drivers/gpu/drm/amd/display/dc/dcn201/Makefile index 3a41a97b0729..c069a894db92 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn201/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn201/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: MIT # # Makefile for DCN. -DCN201 = dcn201_init.o dcn201_resource.o \ +DCN201 = dcn201_init.o \ dcn201_hubbub.o\ dcn201_mpc.o dcn201_hubp.o dcn201_opp.o dcn201_optc.o dcn201_dpp.o \ dcn201_dccg.o dcn201_link_encoder.o diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c deleted file mode 100644 index bca22d867696..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c +++ /dev/null @@ -1,1308 +0,0 @@ -/* -* Copyright 2016 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dm_services.h" -#include "dc.h" - -#include "dcn201_init.h" -#include "dml/dcn20/dcn20_fpu.h" -#include "resource.h" -#include "include/irq_service_interface.h" -#include "dcn201_resource.h" - -#include "dcn20/dcn20_resource.h" - -#include "dcn10/dcn10_hubp.h" -#include "dcn10/dcn10_ipp.h" -#include "dcn201_mpc.h" -#include "dcn201_hubp.h" -#include "irq/dcn201/irq_service_dcn201.h" -#include "dcn201/dcn201_dpp.h" -#include "dcn201/dcn201_hubbub.h" -#include "dcn201_dccg.h" -#include "dcn201_optc.h" -#include "dcn201/dcn201_hwseq.h" -#include "dce110/dce110_hwseq.h" -#include "dcn201_opp.h" -#include "dcn201/dcn201_link_encoder.h" -#include "dcn20/dcn20_stream_encoder.h" -#include "dce/dce_clock_source.h" -#include "dce/dce_audio.h" -#include "dce/dce_hwseq.h" -#include "virtual/virtual_stream_encoder.h" -#include "dce110/dce110_resource.h" -#include "dce/dce_aux.h" -#include "dce/dce_i2c.h" -#include "dcn201_hubbub.h" -#include "dcn10/dcn10_resource.h" - -#include "cyan_skillfish_ip_offset.h" - -#include "dcn/dcn_2_0_3_offset.h" -#include "dcn/dcn_2_0_3_sh_mask.h" -#include "dpcs/dpcs_2_0_3_offset.h" -#include "dpcs/dpcs_2_0_3_sh_mask.h" - -#include "mmhub/mmhub_2_0_0_offset.h" -#include "mmhub/mmhub_2_0_0_sh_mask.h" -#include "nbio/nbio_7_4_offset.h" - -#include "reg_helper.h" - -#define MIN_DISP_CLK_KHZ 100000 -#define MIN_DPP_CLK_KHZ 100000 - -static struct _vcs_dpi_ip_params_st dcn201_ip = { - .gpuvm_enable = 0, - .hostvm_enable = 0, - .gpuvm_max_page_table_levels = 4, - .hostvm_max_page_table_levels = 4, - .hostvm_cached_page_table_levels = 0, - .pte_group_size_bytes = 2048, - .rob_buffer_size_kbytes = 168, - .det_buffer_size_kbytes = 164, - .dpte_buffer_size_in_pte_reqs_luma = 84, - .pde_proc_buffer_size_64k_reqs = 48, - .dpp_output_buffer_pixels = 2560, - .opp_output_buffer_lines = 1, - .pixel_chunk_size_kbytes = 8, - .pte_chunk_size_kbytes = 2, - .meta_chunk_size_kbytes = 2, - .writeback_chunk_size_kbytes = 2, - .line_buffer_size_bits = 789504, - .is_line_buffer_bpp_fixed = 0, - .line_buffer_fixed_bpp = 0, - .dcc_supported = true, - .max_line_buffer_lines = 12, - .writeback_luma_buffer_size_kbytes = 12, - .writeback_chroma_buffer_size_kbytes = 8, - .writeback_chroma_line_buffer_width_pixels = 4, - .writeback_max_hscl_ratio = 1, - .writeback_max_vscl_ratio = 1, - .writeback_min_hscl_ratio = 1, - .writeback_min_vscl_ratio = 1, - .writeback_max_hscl_taps = 12, - .writeback_max_vscl_taps = 12, - .writeback_line_buffer_luma_buffer_size = 0, - .writeback_line_buffer_chroma_buffer_size = 9600, - .cursor_buffer_size = 8, - .cursor_chunk_size = 2, - .max_num_otg = 2, - .max_num_dpp = 4, - .max_num_wb = 0, - .max_dchub_pscl_bw_pix_per_clk = 4, - .max_pscl_lb_bw_pix_per_clk = 2, - .max_lb_vscl_bw_pix_per_clk = 4, - .max_vscl_hscl_bw_pix_per_clk = 4, - .max_hscl_ratio = 8, - .max_vscl_ratio = 8, - .hscl_mults = 4, - .vscl_mults = 4, - .max_hscl_taps = 8, - .max_vscl_taps = 8, - .dispclk_ramp_margin_percent = 1, - .underscan_factor = 1.10, - .min_vblank_lines = 30, - .dppclk_delay_subtotal = 77, - .dppclk_delay_scl_lb_only = 16, - .dppclk_delay_scl = 50, - .dppclk_delay_cnvc_formatter = 8, - .dppclk_delay_cnvc_cursor = 6, - .dispclk_delay_subtotal = 87, - .dcfclk_cstate_latency = 10, - .max_inter_dcn_tile_repeaters = 8, - .number_of_cursors = 1, -}; - -static struct _vcs_dpi_soc_bounding_box_st dcn201_soc = { - .clock_limits = { - { - .state = 0, - .dscclk_mhz = 400.0, - .dcfclk_mhz = 1000.0, - .fabricclk_mhz = 200.0, - .dispclk_mhz = 300.0, - .dppclk_mhz = 300.0, - .phyclk_mhz = 810.0, - .socclk_mhz = 1254.0, - .dram_speed_mts = 2000.0, - }, - { - .state = 1, - .dscclk_mhz = 400.0, - .dcfclk_mhz = 1000.0, - .fabricclk_mhz = 250.0, - .dispclk_mhz = 1200.0, - .dppclk_mhz = 1200.0, - .phyclk_mhz = 810.0, - .socclk_mhz = 1254.0, - .dram_speed_mts = 3600.0, - }, - { - .state = 2, - .dscclk_mhz = 400.0, - .dcfclk_mhz = 1000.0, - .fabricclk_mhz = 750.0, - .dispclk_mhz = 1200.0, - .dppclk_mhz = 1200.0, - .phyclk_mhz = 810.0, - .socclk_mhz = 1254.0, - .dram_speed_mts = 6800.0, - }, - { - .state = 3, - .dscclk_mhz = 400.0, - .dcfclk_mhz = 1000.0, - .fabricclk_mhz = 250.0, - .dispclk_mhz = 1200.0, - .dppclk_mhz = 1200.0, - .phyclk_mhz = 810.0, - .socclk_mhz = 1254.0, - .dram_speed_mts = 14000.0, - }, - { - .state = 4, - .dscclk_mhz = 400.0, - .dcfclk_mhz = 1000.0, - .fabricclk_mhz = 750.0, - .dispclk_mhz = 1200.0, - .dppclk_mhz = 1200.0, - .phyclk_mhz = 810.0, - .socclk_mhz = 1254.0, - .dram_speed_mts = 14000.0, - } - }, - .num_states = 4, - .sr_exit_time_us = 9.0, - .sr_enter_plus_exit_time_us = 11.0, - .urgent_latency_us = 4.0, - .urgent_latency_pixel_data_only_us = 4.0, - .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, - .urgent_latency_vm_data_only_us = 4.0, - .urgent_out_of_order_return_per_channel_pixel_only_bytes = 256, - .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 256, - .urgent_out_of_order_return_per_channel_vm_only_bytes = 256, - .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 80.0, - .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 80.0, - .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 80.0, - .max_avg_sdp_bw_use_normal_percent = 80.0, - .max_avg_dram_bw_use_normal_percent = 69.0, - .writeback_latency_us = 12.0, - .ideal_dram_bw_after_urgent_percent = 80.0, - .max_request_size_bytes = 256, - .dram_channel_width_bytes = 2, - .fabric_datapath_to_dcn_data_return_bytes = 64, - .dcn_downspread_percent = 0.3, - .downspread_percent = 0.3, - .dram_page_open_time_ns = 50.0, - .dram_rw_turnaround_time_ns = 17.5, - .dram_return_buffer_per_channel_bytes = 8192, - .round_trip_ping_latency_dcfclk_cycles = 128, - .urgent_out_of_order_return_per_channel_bytes = 256, - .channel_interleave_bytes = 256, - .num_banks = 8, - .num_chans = 16, - .vmm_page_size_bytes = 4096, - .dram_clock_change_latency_us = 250.0, - .writeback_dram_clock_change_latency_us = 23.0, - .return_bus_width_bytes = 64, - .dispclk_dppclk_vco_speed_mhz = 3000, - .use_urgent_burst_bw = 0, -}; - -enum dcn20_clk_src_array_id { - DCN20_CLK_SRC_PLL0, - DCN20_CLK_SRC_PLL1, - DCN20_CLK_SRC_TOTAL_DCN201 -}; - -/* begin ********************* - * macros to expend register list macro defined in HW object header file */ - -/* DCN */ - -#undef BASE_INNER -#define BASE_INNER(seg) DMU_BASE__INST0_SEG ## seg - -#define BASE(seg) BASE_INNER(seg) - -#define SR(reg_name)\ - .reg_name = BASE(mm ## reg_name ## _BASE_IDX) + \ - mm ## reg_name - -#define SRI(reg_name, block, id)\ - .reg_name = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - mm ## block ## id ## _ ## reg_name - -#define SRIR(var_name, reg_name, block, id)\ - .var_name = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - mm ## block ## id ## _ ## reg_name - -#define SRII(reg_name, block, id)\ - .reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - mm ## block ## id ## _ ## reg_name - -#define SRI_IX(reg_name, block, id)\ - .reg_name = ix ## block ## id ## _ ## reg_name - -#define DCCG_SRII(reg_name, block, id)\ - .block ## _ ## reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - mm ## block ## id ## _ ## reg_name - -#define VUPDATE_SRII(reg_name, block, id)\ - .reg_name[id] = BASE(mm ## reg_name ## _ ## block ## id ## _BASE_IDX) + \ - mm ## reg_name ## _ ## block ## id - -/* NBIO */ -#define NBIO_BASE_INNER(seg) \ - NBIO_BASE__INST0_SEG ## seg - -#define NBIO_BASE(seg) \ - NBIO_BASE_INNER(seg) - -#define NBIO_SR(reg_name)\ - .reg_name = NBIO_BASE(mm ## reg_name ## _BASE_IDX) + \ - mm ## reg_name - -/* MMHUB */ -#define MMHUB_BASE_INNER(seg) \ - MMHUB_BASE__INST0_SEG ## seg - -#define MMHUB_BASE(seg) \ - MMHUB_BASE_INNER(seg) - -#define MMHUB_SR(reg_name)\ - .reg_name = MMHUB_BASE(mmMM ## reg_name ## _BASE_IDX) + \ - mmMM ## reg_name - -static const struct bios_registers bios_regs = { - NBIO_SR(BIOS_SCRATCH_3), - NBIO_SR(BIOS_SCRATCH_6) -}; - -#define clk_src_regs(index, pllid)\ -[index] = {\ - CS_COMMON_REG_LIST_DCN201(index, pllid),\ -} - -static const struct dce110_clk_src_regs clk_src_regs[] = { - clk_src_regs(0, A), - clk_src_regs(1, B) -}; - -static const struct dce110_clk_src_shift cs_shift = { - CS_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT) -}; - -static const struct dce110_clk_src_mask cs_mask = { - CS_COMMON_MASK_SH_LIST_DCN2_0(_MASK) -}; - -#define audio_regs(id)\ -[id] = {\ - AUD_COMMON_REG_LIST(id)\ -} - -static const struct dce_audio_registers audio_regs[] = { - audio_regs(0), - audio_regs(1), -}; - -#define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\ - SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_INDEX, AZALIA_ENDPOINT_REG_INDEX, mask_sh),\ - SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_DATA, AZALIA_ENDPOINT_REG_DATA, mask_sh),\ - AUD_COMMON_MASK_SH_LIST_BASE(mask_sh) - -static const struct dce_audio_shift audio_shift = { - DCE120_AUD_COMMON_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_audio_mask audio_mask = { - DCE120_AUD_COMMON_MASK_SH_LIST(_MASK) -}; - -#define stream_enc_regs(id)\ -[id] = {\ - SE_DCN2_REG_LIST(id)\ -} - -static const struct dcn10_stream_enc_registers stream_enc_regs[] = { - stream_enc_regs(0), - stream_enc_regs(1) -}; - -static const struct dcn10_stream_encoder_shift se_shift = { - SE_COMMON_MASK_SH_LIST_DCN20(__SHIFT) -}; - -static const struct dcn10_stream_encoder_mask se_mask = { - SE_COMMON_MASK_SH_LIST_DCN20(_MASK) -}; - -static const struct dce110_aux_registers_shift aux_shift = { - DCN_AUX_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce110_aux_registers_mask aux_mask = { - DCN_AUX_MASK_SH_LIST(_MASK) -}; - -#define aux_regs(id)\ -[id] = {\ - DCN2_AUX_REG_LIST(id)\ -} - -static const struct dcn10_link_enc_aux_registers link_enc_aux_regs[] = { - aux_regs(0), - aux_regs(1), -}; - -#define hpd_regs(id)\ -[id] = {\ - HPD_REG_LIST(id)\ -} - -static const struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[] = { - hpd_regs(0), - hpd_regs(1), -}; - -#define link_regs(id, phyid)\ -[id] = {\ - LE_DCN_COMMON_REG_LIST(id), \ - UNIPHY_DCN2_REG_LIST(phyid) \ -} - -static const struct dcn10_link_enc_registers link_enc_regs[] = { - link_regs(0, A), - link_regs(1, B), -}; - -#define LINK_ENCODER_MASK_SH_LIST_DCN201(mask_sh)\ - LINK_ENCODER_MASK_SH_LIST_DCN20(mask_sh) - -static const struct dcn10_link_enc_shift le_shift = { - LINK_ENCODER_MASK_SH_LIST_DCN201(__SHIFT) -}; - -static const struct dcn10_link_enc_mask le_mask = { - LINK_ENCODER_MASK_SH_LIST_DCN201(_MASK) -}; - -#define ipp_regs(id)\ -[id] = {\ - IPP_REG_LIST_DCN201(id),\ -} - -static const struct dcn10_ipp_registers ipp_regs[] = { - ipp_regs(0), - ipp_regs(1), - ipp_regs(2), - ipp_regs(3), -}; - -static const struct dcn10_ipp_shift ipp_shift = { - IPP_MASK_SH_LIST_DCN201(__SHIFT) -}; - -static const struct dcn10_ipp_mask ipp_mask = { - IPP_MASK_SH_LIST_DCN201(_MASK) -}; - -#define opp_regs(id)\ -[id] = {\ - OPP_REG_LIST_DCN201(id),\ -} - -static const struct dcn201_opp_registers opp_regs[] = { - opp_regs(0), - opp_regs(1), -}; - -static const struct dcn201_opp_shift opp_shift = { - OPP_MASK_SH_LIST_DCN201(__SHIFT) -}; - -static const struct dcn201_opp_mask opp_mask = { - OPP_MASK_SH_LIST_DCN201(_MASK) -}; - -#define aux_engine_regs(id)\ -[id] = {\ - AUX_COMMON_REG_LIST0(id), \ - .AUX_RESET_MASK = 0 \ -} - -static const struct dce110_aux_registers aux_engine_regs[] = { - aux_engine_regs(0), - aux_engine_regs(1) -}; - -#define tf_regs(id)\ -[id] = {\ - TF_REG_LIST_DCN201(id),\ -} - -static const struct dcn201_dpp_registers tf_regs[] = { - tf_regs(0), - tf_regs(1), - tf_regs(2), - tf_regs(3), -}; - -static const struct dcn201_dpp_shift tf_shift = { - TF_REG_LIST_SH_MASK_DCN201(__SHIFT) -}; - -static const struct dcn201_dpp_mask tf_mask = { - TF_REG_LIST_SH_MASK_DCN201(_MASK) -}; - -static const struct dcn201_mpc_registers mpc_regs = { - MPC_REG_LIST_DCN201(0), - MPC_REG_LIST_DCN201(1), - MPC_REG_LIST_DCN201(2), - MPC_REG_LIST_DCN201(3), - MPC_REG_LIST_DCN201(4), - MPC_OUT_MUX_REG_LIST_DCN201(0), - MPC_OUT_MUX_REG_LIST_DCN201(1), -}; - -static const struct dcn201_mpc_shift mpc_shift = { - MPC_COMMON_MASK_SH_LIST_DCN201(__SHIFT) -}; - -static const struct dcn201_mpc_mask mpc_mask = { - MPC_COMMON_MASK_SH_LIST_DCN201(_MASK) -}; - -#define tg_regs_dcn201(id)\ -[id] = {TG_COMMON_REG_LIST_DCN201(id)} - -static const struct dcn_optc_registers tg_regs[] = { - tg_regs_dcn201(0), - tg_regs_dcn201(1) -}; - -static const struct dcn_optc_shift tg_shift = { - TG_COMMON_MASK_SH_LIST_DCN201(__SHIFT) -}; - -static const struct dcn_optc_mask tg_mask = { - TG_COMMON_MASK_SH_LIST_DCN201(_MASK) -}; - -#define hubp_regsDCN201(id)\ -[id] = {\ - HUBP_REG_LIST_DCN201(id)\ -} - -static const struct dcn201_hubp_registers hubp_regs[] = { - hubp_regsDCN201(0), - hubp_regsDCN201(1), - hubp_regsDCN201(2), - hubp_regsDCN201(3) -}; - -static const struct dcn201_hubp_shift hubp_shift = { - HUBP_MASK_SH_LIST_DCN201(__SHIFT) -}; - -static const struct dcn201_hubp_mask hubp_mask = { - HUBP_MASK_SH_LIST_DCN201(_MASK) -}; - -static const struct dcn_hubbub_registers hubbub_reg = { - HUBBUB_REG_LIST_DCN201(0) -}; - -static const struct dcn_hubbub_shift hubbub_shift = { - HUBBUB_MASK_SH_LIST_DCN201(__SHIFT) -}; - -static const struct dcn_hubbub_mask hubbub_mask = { - HUBBUB_MASK_SH_LIST_DCN201(_MASK) -}; - - -static const struct dccg_registers dccg_regs = { - DCCG_COMMON_REG_LIST_DCN_BASE() -}; - -static const struct dccg_shift dccg_shift = { - DCCG_COMMON_MASK_SH_LIST_DCN_COMMON_BASE(__SHIFT) -}; - -static const struct dccg_mask dccg_mask = { - DCCG_COMMON_MASK_SH_LIST_DCN_COMMON_BASE(_MASK) -}; - -static const struct resource_caps res_cap_dnc201 = { - .num_timing_generator = 2, - .num_opp = 2, - .num_video_plane = 4, - .num_audio = 2, - .num_stream_encoder = 2, - .num_pll = 2, - .num_ddc = 2, -}; - -static const struct dc_plane_cap plane_cap = { - .type = DC_PLANE_TYPE_DCN_UNIVERSAL, - .per_pixel_alpha = true, - - .pixel_format_support = { - .argb8888 = true, - .nv12 = false, - .fp16 = true, - .p010 = false, - }, - - .max_upscale_factor = { - .argb8888 = 16000, - .nv12 = 16000, - .fp16 = 1 - }, - - .max_downscale_factor = { - .argb8888 = 250, - .nv12 = 250, - .fp16 = 250 - }, - 64, - 64 -}; - -static const struct dc_debug_options debug_defaults_drv = { - .disable_dmcu = true, - .force_abm_enable = false, - .timing_trace = false, - .clock_trace = true, - .disable_pplib_clock_request = true, - .pipe_split_policy = MPC_SPLIT_DYNAMIC, - .force_single_disp_pipe_split = false, - .disable_dcc = DCC_ENABLE, - .vsr_support = true, - .performance_trace = false, - .az_endpoint_mute_only = true, - .max_downscale_src_width = 3840, - .disable_pplib_wm_range = true, - .scl_reset_length10 = true, - .sanity_checks = false, - .underflow_assert_delay_us = 0xFFFFFFFF, - .enable_tri_buf = false, - .enable_legacy_fast_update = true, - .using_dml2 = false, -}; - -static void dcn201_dpp_destroy(struct dpp **dpp) -{ - kfree(TO_DCN201_DPP(*dpp)); - *dpp = NULL; -} - -static struct dpp *dcn201_dpp_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dcn201_dpp *dpp = - kzalloc(sizeof(struct dcn201_dpp), GFP_ATOMIC); - - if (!dpp) - return NULL; - - if (dpp201_construct(dpp, ctx, inst, - &tf_regs[inst], &tf_shift, &tf_mask)) - return &dpp->base; - - kfree(dpp); - return NULL; -} - -static struct input_pixel_processor *dcn201_ipp_create( - struct dc_context *ctx, uint32_t inst) -{ - struct dcn10_ipp *ipp = - kzalloc(sizeof(struct dcn10_ipp), GFP_ATOMIC); - - if (!ipp) { - return NULL; - } - - dcn20_ipp_construct(ipp, ctx, inst, - &ipp_regs[inst], &ipp_shift, &ipp_mask); - return &ipp->base; -} - - -static struct output_pixel_processor *dcn201_opp_create( - struct dc_context *ctx, uint32_t inst) -{ - struct dcn201_opp *opp = - kzalloc(sizeof(struct dcn201_opp), GFP_ATOMIC); - - if (!opp) { - return NULL; - } - - dcn201_opp_construct(opp, ctx, inst, - &opp_regs[inst], &opp_shift, &opp_mask); - return &opp->base; -} - -static struct dce_aux *dcn201_aux_engine_create(struct dc_context *ctx, - uint32_t inst) -{ - struct aux_engine_dce110 *aux_engine = - kzalloc(sizeof(struct aux_engine_dce110), GFP_ATOMIC); - - if (!aux_engine) - return NULL; - - dce110_aux_engine_construct(aux_engine, ctx, inst, - SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, - &aux_engine_regs[inst], - &aux_mask, - &aux_shift, - ctx->dc->caps.extended_aux_timeout_support); - - return &aux_engine->base; -} -#define i2c_inst_regs(id) { I2C_HW_ENGINE_COMMON_REG_LIST(id) } - -static const struct dce_i2c_registers i2c_hw_regs[] = { - i2c_inst_regs(1), - i2c_inst_regs(2), -}; - -static const struct dce_i2c_shift i2c_shifts = { - I2C_COMMON_MASK_SH_LIST_DCN2(__SHIFT) -}; - -static const struct dce_i2c_mask i2c_masks = { - I2C_COMMON_MASK_SH_LIST_DCN2(_MASK) -}; - -static struct dce_i2c_hw *dcn201_i2c_hw_create(struct dc_context *ctx, - uint32_t inst) -{ - struct dce_i2c_hw *dce_i2c_hw = - kzalloc(sizeof(struct dce_i2c_hw), GFP_ATOMIC); - - if (!dce_i2c_hw) - return NULL; - - dcn2_i2c_hw_construct(dce_i2c_hw, ctx, inst, - &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks); - - return dce_i2c_hw; -} - -static struct mpc *dcn201_mpc_create(struct dc_context *ctx, uint32_t num_mpcc) -{ - struct dcn201_mpc *mpc201 = kzalloc(sizeof(struct dcn201_mpc), - GFP_ATOMIC); - - if (!mpc201) - return NULL; - - dcn201_mpc_construct(mpc201, ctx, - &mpc_regs, - &mpc_shift, - &mpc_mask, - num_mpcc); - - return &mpc201->base; -} - -static struct hubbub *dcn201_hubbub_create(struct dc_context *ctx) -{ - struct dcn20_hubbub *hubbub = kzalloc(sizeof(struct dcn20_hubbub), - GFP_ATOMIC); - - if (!hubbub) - return NULL; - - hubbub201_construct(hubbub, ctx, - &hubbub_reg, - &hubbub_shift, - &hubbub_mask); - - return &hubbub->base; -} - -static struct timing_generator *dcn201_timing_generator_create( - struct dc_context *ctx, - uint32_t instance) -{ - struct optc *tgn10 = - kzalloc(sizeof(struct optc), GFP_ATOMIC); - - if (!tgn10) - return NULL; - - tgn10->base.inst = instance; - tgn10->base.ctx = ctx; - - tgn10->tg_regs = &tg_regs[instance]; - tgn10->tg_shift = &tg_shift; - tgn10->tg_mask = &tg_mask; - - dcn201_timing_generator_init(tgn10); - - return &tgn10->base; -} - -static const struct encoder_feature_support link_enc_feature = { - .max_hdmi_deep_color = COLOR_DEPTH_121212, - .max_hdmi_pixel_clock = 600000, - .hdmi_ycbcr420_supported = true, - .dp_ycbcr420_supported = true, - .fec_supported = true, - .flags.bits.IS_HBR2_CAPABLE = true, - .flags.bits.IS_HBR3_CAPABLE = true, - .flags.bits.IS_TPS3_CAPABLE = true, - .flags.bits.IS_TPS4_CAPABLE = true -}; - -static struct link_encoder *dcn201_link_encoder_create( - struct dc_context *ctx, - const struct encoder_init_data *enc_init_data) -{ - struct dcn20_link_encoder *enc20 = - kzalloc(sizeof(struct dcn20_link_encoder), GFP_ATOMIC); - struct dcn10_link_encoder *enc10 = &enc20->enc10; - - if (!enc20) - return NULL; - - dcn201_link_encoder_construct(enc20, - enc_init_data, - &link_enc_feature, - &link_enc_regs[enc_init_data->transmitter], - &link_enc_aux_regs[enc_init_data->channel - 1], - &link_enc_hpd_regs[enc_init_data->hpd_source], - &le_shift, - &le_mask); - - return &enc10->base; -} - -static struct clock_source *dcn201_clock_source_create( - struct dc_context *ctx, - struct dc_bios *bios, - enum clock_source_id id, - const struct dce110_clk_src_regs *regs, - bool dp_clk_src) -{ - struct dce110_clk_src *clk_src = - kzalloc(sizeof(struct dce110_clk_src), GFP_ATOMIC); - - if (!clk_src) - return NULL; - - if (dce112_clk_src_construct(clk_src, ctx, bios, id, - regs, &cs_shift, &cs_mask)) { - clk_src->base.dp_clk_src = dp_clk_src; - return &clk_src->base; - } - kfree(clk_src); - return NULL; -} - -static void read_dce_straps( - struct dc_context *ctx, - struct resource_straps *straps) -{ - generic_reg_get(ctx, mmDC_PINSTRAPS + BASE(mmDC_PINSTRAPS_BASE_IDX), - - FN(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO), &straps->dc_pinstraps_audio); -} - -static struct audio *dcn201_create_audio( - struct dc_context *ctx, unsigned int inst) -{ - return dce_audio_create(ctx, inst, - &audio_regs[inst], &audio_shift, &audio_mask); -} - -static struct stream_encoder *dcn201_stream_encoder_create( - enum engine_id eng_id, - struct dc_context *ctx) -{ - struct dcn10_stream_encoder *enc1 = - kzalloc(sizeof(struct dcn10_stream_encoder), GFP_ATOMIC); - - if (!enc1) - return NULL; - - dcn20_stream_encoder_construct(enc1, ctx, ctx->dc_bios, eng_id, - &stream_enc_regs[eng_id], - &se_shift, &se_mask); - - return &enc1->base; -} - -static const struct dce_hwseq_registers hwseq_reg = { - HWSEQ_DCN201_REG_LIST() -}; - -static const struct dce_hwseq_shift hwseq_shift = { - HWSEQ_DCN201_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_hwseq_mask hwseq_mask = { - HWSEQ_DCN201_MASK_SH_LIST(_MASK) -}; - -static struct dce_hwseq *dcn201_hwseq_create( - struct dc_context *ctx) -{ - struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_ATOMIC); - - if (hws) { - hws->ctx = ctx; - hws->regs = &hwseq_reg; - hws->shifts = &hwseq_shift; - hws->masks = &hwseq_mask; - } - return hws; -} - -static const struct resource_create_funcs res_create_funcs = { - .read_dce_straps = read_dce_straps, - .create_audio = dcn201_create_audio, - .create_stream_encoder = dcn201_stream_encoder_create, - .create_hwseq = dcn201_hwseq_create, -}; - -static void dcn201_clock_source_destroy(struct clock_source **clk_src) -{ - kfree(TO_DCE110_CLK_SRC(*clk_src)); - *clk_src = NULL; -} - -static void dcn201_resource_destruct(struct dcn201_resource_pool *pool) -{ - unsigned int i; - - for (i = 0; i < pool->base.stream_enc_count; i++) { - if (pool->base.stream_enc[i] != NULL) { - kfree(DCN10STRENC_FROM_STRENC(pool->base.stream_enc[i])); - pool->base.stream_enc[i] = NULL; - } - } - - - if (pool->base.mpc != NULL) { - kfree(TO_DCN201_MPC(pool->base.mpc)); - pool->base.mpc = NULL; - } - - if (pool->base.hubbub != NULL) { - kfree(pool->base.hubbub); - pool->base.hubbub = NULL; - } - - for (i = 0; i < pool->base.pipe_count; i++) { - if (pool->base.dpps[i] != NULL) - dcn201_dpp_destroy(&pool->base.dpps[i]); - - if (pool->base.ipps[i] != NULL) - pool->base.ipps[i]->funcs->ipp_destroy(&pool->base.ipps[i]); - - if (pool->base.hubps[i] != NULL) { - kfree(TO_DCN10_HUBP(pool->base.hubps[i])); - pool->base.hubps[i] = NULL; - } - - if (pool->base.irqs != NULL) { - dal_irq_service_destroy(&pool->base.irqs); - } - } - - for (i = 0; i < pool->base.res_cap->num_opp; i++) { - if (pool->base.opps[i] != NULL) - pool->base.opps[i]->funcs->opp_destroy(&pool->base.opps[i]); - } - - for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { - if (pool->base.timing_generators[i] != NULL) { - kfree(DCN10TG_FROM_TG(pool->base.timing_generators[i])); - pool->base.timing_generators[i] = NULL; - } - } - for (i = 0; i < pool->base.audio_count; i++) { - if (pool->base.audios[i]) - dce_aud_destroy(&pool->base.audios[i]); - } - - for (i = 0; i < pool->base.clk_src_count; i++) { - if (pool->base.clock_sources[i] != NULL) { - dcn201_clock_source_destroy(&pool->base.clock_sources[i]); - pool->base.clock_sources[i] = NULL; - } - } - - if (pool->base.dp_clock_source != NULL) { - dcn201_clock_source_destroy(&pool->base.dp_clock_source); - pool->base.dp_clock_source = NULL; - } - - if (pool->base.dccg != NULL) - dcn_dccg_destroy(&pool->base.dccg); -} - -static struct hubp *dcn201_hubp_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dcn201_hubp *hubp201 = - kzalloc(sizeof(struct dcn201_hubp), GFP_ATOMIC); - - if (!hubp201) - return NULL; - - if (dcn201_hubp_construct(hubp201, ctx, inst, - &hubp_regs[inst], &hubp_shift, &hubp_mask)) - return &hubp201->base; - - kfree(hubp201); - return NULL; -} - -static struct pipe_ctx *dcn201_acquire_free_pipe_for_layer( - const struct dc_state *cur_ctx, - struct dc_state *new_ctx, - const struct resource_pool *pool, - const struct pipe_ctx *opp_head_pipe) -{ - struct resource_context *res_ctx = &new_ctx->res_ctx; - struct pipe_ctx *head_pipe = resource_get_otg_master_for_stream(res_ctx, opp_head_pipe->stream); - struct pipe_ctx *idle_pipe = resource_find_free_secondary_pipe_legacy(res_ctx, pool, head_pipe); - - if (!head_pipe) - ASSERT(0); - - if (!idle_pipe) - return NULL; - - idle_pipe->stream = head_pipe->stream; - idle_pipe->stream_res.tg = head_pipe->stream_res.tg; - idle_pipe->stream_res.opp = head_pipe->stream_res.opp; - - idle_pipe->plane_res.hubp = pool->hubps[idle_pipe->pipe_idx]; - idle_pipe->plane_res.ipp = pool->ipps[idle_pipe->pipe_idx]; - idle_pipe->plane_res.dpp = pool->dpps[idle_pipe->pipe_idx]; - idle_pipe->plane_res.mpcc_inst = pool->dpps[idle_pipe->pipe_idx]->inst; - - return idle_pipe; -} - -static bool dcn201_get_dcc_compression_cap(const struct dc *dc, - const struct dc_dcc_surface_param *input, - struct dc_surface_dcc_cap *output) -{ - return dc->res_pool->hubbub->funcs->get_dcc_compression_cap( - dc->res_pool->hubbub, - input, - output); -} - -static void dcn201_populate_dml_writeback_from_context(struct dc *dc, - struct resource_context *res_ctx, - display_e2e_pipe_params_st *pipes) -{ - DC_FP_START(); - dcn201_populate_dml_writeback_from_context_fpu(dc, res_ctx, pipes); - DC_FP_END(); -} - -static void dcn201_destroy_resource_pool(struct resource_pool **pool) -{ - struct dcn201_resource_pool *dcn201_pool = TO_DCN201_RES_POOL(*pool); - - dcn201_resource_destruct(dcn201_pool); - kfree(dcn201_pool); - *pool = NULL; -} - -static void dcn201_link_init(struct dc_link *link) -{ - if (link->ctx->dc_bios->integrated_info) - link->dp_ss_off = !link->ctx->dc_bios->integrated_info->dp_ss_control; -} - -static struct dc_cap_funcs cap_funcs = { - .get_dcc_compression_cap = dcn201_get_dcc_compression_cap, -}; - -static struct resource_funcs dcn201_res_pool_funcs = { - .link_init = dcn201_link_init, - .destroy = dcn201_destroy_resource_pool, - .link_enc_create = dcn201_link_encoder_create, - .panel_cntl_create = NULL, - .validate_bandwidth = dcn20_validate_bandwidth, - .populate_dml_pipes = dcn20_populate_dml_pipes_from_context, - .add_stream_to_ctx = dcn20_add_stream_to_ctx, - .add_dsc_to_stream_resource = NULL, - .remove_stream_from_ctx = dcn20_remove_stream_from_ctx, - .acquire_free_pipe_as_secondary_dpp_pipe = dcn201_acquire_free_pipe_for_layer, - .release_pipe = dcn20_release_pipe, - .populate_dml_writeback_from_context = dcn201_populate_dml_writeback_from_context, - .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, - .set_mcif_arb_params = dcn20_set_mcif_arb_params, - .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link -}; - -static bool dcn201_resource_construct( - uint8_t num_virtual_links, - struct dc *dc, - struct dcn201_resource_pool *pool) -{ - int i; - struct dc_context *ctx = dc->ctx; - - ctx->dc_bios->regs = &bios_regs; - - pool->base.res_cap = &res_cap_dnc201; - pool->base.funcs = &dcn201_res_pool_funcs; - - /************************************************* - * Resource + asic cap harcoding * - *************************************************/ - pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE; - - pool->base.pipe_count = 4; - pool->base.mpcc_count = 5; - dc->caps.max_downscale_ratio = 200; - dc->caps.i2c_speed_in_khz = 100; - dc->caps.i2c_speed_in_khz_hdcp = 5; /*1.5 w/a applied by default*/ - dc->caps.max_cursor_size = 256; - dc->caps.min_horizontal_blanking_period = 80; - dc->caps.dmdata_alloc_size = 2048; - - dc->caps.max_slave_planes = 1; - dc->caps.max_slave_yuv_planes = 1; - dc->caps.max_slave_rgb_planes = 1; - dc->caps.post_blend_color_processing = true; - dc->caps.force_dp_tps4_for_cp2520 = true; - dc->caps.extended_aux_timeout_support = true; - - /* Color pipeline capabilities */ - dc->caps.color.dpp.dcn_arch = 1; - dc->caps.color.dpp.input_lut_shared = 0; - dc->caps.color.dpp.icsc = 1; - dc->caps.color.dpp.dgam_ram = 1; - dc->caps.color.dpp.dgam_rom_caps.srgb = 1; - dc->caps.color.dpp.dgam_rom_caps.bt2020 = 1; - dc->caps.color.dpp.dgam_rom_caps.gamma2_2 = 0; - dc->caps.color.dpp.dgam_rom_caps.pq = 0; - dc->caps.color.dpp.dgam_rom_caps.hlg = 0; - dc->caps.color.dpp.post_csc = 0; - dc->caps.color.dpp.gamma_corr = 0; - dc->caps.color.dpp.dgam_rom_for_yuv = 1; - - dc->caps.color.dpp.hw_3d_lut = 1; - dc->caps.color.dpp.ogam_ram = 1; - // no OGAM ROM on DCN2 - dc->caps.color.dpp.ogam_rom_caps.srgb = 0; - dc->caps.color.dpp.ogam_rom_caps.bt2020 = 0; - dc->caps.color.dpp.ogam_rom_caps.gamma2_2 = 0; - dc->caps.color.dpp.ogam_rom_caps.pq = 0; - dc->caps.color.dpp.ogam_rom_caps.hlg = 0; - dc->caps.color.dpp.ocsc = 0; - - dc->caps.color.mpc.gamut_remap = 0; - dc->caps.color.mpc.num_3dluts = 0; - dc->caps.color.mpc.shared_3d_lut = 0; - dc->caps.color.mpc.ogam_ram = 1; - dc->caps.color.mpc.ogam_rom_caps.srgb = 0; - dc->caps.color.mpc.ogam_rom_caps.bt2020 = 0; - dc->caps.color.mpc.ogam_rom_caps.gamma2_2 = 0; - dc->caps.color.mpc.ogam_rom_caps.pq = 0; - dc->caps.color.mpc.ogam_rom_caps.hlg = 0; - dc->caps.color.mpc.ocsc = 1; - - dc->debug = debug_defaults_drv; - - /*a0 only, remove later*/ - dc->work_arounds.no_connect_phy_config = true; - dc->work_arounds.dedcn20_305_wa = true; - /************************************************* - * Create resources * - *************************************************/ - - pool->base.clock_sources[DCN20_CLK_SRC_PLL0] = - dcn201_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL0, - &clk_src_regs[0], false); - pool->base.clock_sources[DCN20_CLK_SRC_PLL1] = - dcn201_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL1, - &clk_src_regs[1], false); - - pool->base.clk_src_count = DCN20_CLK_SRC_TOTAL_DCN201; - - /* todo: not reuse phy_pll registers */ - pool->base.dp_clock_source = - dcn201_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_ID_DP_DTO, - &clk_src_regs[0], true); - - for (i = 0; i < pool->base.clk_src_count; i++) { - if (pool->base.clock_sources[i] == NULL) { - dm_error("DC: failed to create clock sources!\n"); - goto create_fail; - } - } - - pool->base.dccg = dccg201_create(ctx, &dccg_regs, &dccg_shift, &dccg_mask); - if (pool->base.dccg == NULL) { - dm_error("DC: failed to create dccg!\n"); - goto create_fail; - } - - dcn201_ip.max_num_otg = pool->base.res_cap->num_timing_generator; - dcn201_ip.max_num_dpp = pool->base.pipe_count; - dml_init_instance(&dc->dml, &dcn201_soc, &dcn201_ip, DML_PROJECT_DCN201); - { - struct irq_service_init_data init_data; - init_data.ctx = dc->ctx; - pool->base.irqs = dal_irq_service_dcn201_create(&init_data); - if (!pool->base.irqs) - goto create_fail; - } - - /* mem input -> ipp -> dpp -> opp -> TG */ - for (i = 0; i < pool->base.pipe_count; i++) { - pool->base.hubps[i] = dcn201_hubp_create(ctx, i); - if (pool->base.hubps[i] == NULL) { - dm_error( - "DC: failed to create memory input!\n"); - goto create_fail; - } - - pool->base.ipps[i] = dcn201_ipp_create(ctx, i); - if (pool->base.ipps[i] == NULL) { - dm_error( - "DC: failed to create input pixel processor!\n"); - goto create_fail; - } - - pool->base.dpps[i] = dcn201_dpp_create(ctx, i); - if (pool->base.dpps[i] == NULL) { - dm_error( - "DC: failed to create dpps!\n"); - goto create_fail; - } - } - - for (i = 0; i < pool->base.res_cap->num_opp; i++) { - pool->base.opps[i] = dcn201_opp_create(ctx, i); - if (pool->base.opps[i] == NULL) { - dm_error( - "DC: failed to create output pixel processor!\n"); - goto create_fail; - } - } - - for (i = 0; i < pool->base.res_cap->num_ddc; i++) { - pool->base.engines[i] = dcn201_aux_engine_create(ctx, i); - if (pool->base.engines[i] == NULL) { - dm_error( - "DC:failed to create aux engine!!\n"); - goto create_fail; - } - pool->base.hw_i2cs[i] = dcn201_i2c_hw_create(ctx, i); - if (pool->base.hw_i2cs[i] == NULL) { - dm_error( - "DC:failed to create hw i2c!!\n"); - goto create_fail; - } - pool->base.sw_i2cs[i] = NULL; - } - - for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { - pool->base.timing_generators[i] = dcn201_timing_generator_create( - ctx, i); - if (pool->base.timing_generators[i] == NULL) { - dm_error("DC: failed to create tg!\n"); - goto create_fail; - } - } - - pool->base.timing_generator_count = i; - - pool->base.mpc = dcn201_mpc_create(ctx, pool->base.mpcc_count); - if (pool->base.mpc == NULL) { - dm_error("DC: failed to create mpc!\n"); - goto create_fail; - } - - pool->base.hubbub = dcn201_hubbub_create(ctx); - if (pool->base.hubbub == NULL) { - dm_error("DC: failed to create hubbub!\n"); - goto create_fail; - } - - if (!resource_construct(num_virtual_links, dc, &pool->base, - &res_create_funcs)) - goto create_fail; - - dcn201_hw_sequencer_construct(dc); - - dc->caps.max_planes = pool->base.pipe_count; - - for (i = 0; i < dc->caps.max_planes; ++i) - dc->caps.planes[i] = plane_cap; - - dc->cap_funcs = cap_funcs; - - return true; - -create_fail: - - dcn201_resource_destruct(pool); - - return false; -} - -struct resource_pool *dcn201_create_resource_pool( - const struct dc_init_data *init_data, - struct dc *dc) -{ - struct dcn201_resource_pool *pool = - kzalloc(sizeof(struct dcn201_resource_pool), GFP_ATOMIC); - - if (!pool) - return NULL; - - if (dcn201_resource_construct(init_data->num_virtual_links, dc, pool)) - return &pool->base; - - kfree(pool); - return NULL; -} diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.h b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.h deleted file mode 100644 index e0467d17d4ae..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.h +++ /dev/null @@ -1,50 +0,0 @@ -/* -* Copyright 2017 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DC_RESOURCE_DCN201_H__ -#define __DC_RESOURCE_DCN201_H__ - -#include "core_types.h" - -#define RRDPCS_PHY_DP_TX_PSTATE_POWER_UP 0x00000000 -#define RRDPCS_PHY_DP_TX_PSTATE_HOLD 0x00000001 -#define RRDPCS_PHY_DP_TX_PSTATE_HOLD_OFF 0x00000002 -#define RRDPCS_PHY_DP_TX_PSTATE_POWER_DOWN 0x00000003 - -#define TO_DCN201_RES_POOL(pool)\ - container_of(pool, struct dcn201_resource_pool, base) - -struct dc; -struct resource_pool; -struct _vcs_dpi_display_pipe_params_st; - -struct dcn201_resource_pool { - struct resource_pool base; -}; -struct resource_pool *dcn201_create_resource_pool( - const struct dc_init_data *init_data, - struct dc *dc); - -#endif /* __DC_RESOURCE_DCN201_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile index ce1be0afae4a..dd1eea7212f4 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile @@ -2,7 +2,7 @@ # # Makefile for DCN21. -DCN21 = dcn21_init.o dcn21_hubp.o dcn21_hubbub.o dcn21_resource.o \ +DCN21 = dcn21_init.o dcn21_hubp.o dcn21_hubbub.o \ dcn21_link_encoder.o dcn21_dccg.o AMD_DAL_DCN21 = $(addprefix $(AMDDALPATH)/dc/dcn21/,$(DCN21)) diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c deleted file mode 100644 index 42277b280586..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ /dev/null @@ -1,1745 +0,0 @@ -/* -* Copyright 2018 Advanced Micro Devices, Inc. - * Copyright 2019 Raptor Engineering, LLC - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include - -#include "dm_services.h" -#include "dc.h" - -#include "dcn21_init.h" - -#include "resource.h" -#include "include/irq_service_interface.h" -#include "dcn20/dcn20_resource.h" -#include "dcn21/dcn21_resource.h" - -#include "dml/dcn20/dcn20_fpu.h" - -#include "clk_mgr.h" -#include "dcn10/dcn10_hubp.h" -#include "dcn10/dcn10_ipp.h" -#include "dcn20/dcn20_hubbub.h" -#include "dcn20/dcn20_mpc.h" -#include "dcn20/dcn20_hubp.h" -#include "dcn21_hubp.h" -#include "irq/dcn21/irq_service_dcn21.h" -#include "dcn20/dcn20_dpp.h" -#include "dcn20/dcn20_optc.h" -#include "dcn21/dcn21_hwseq.h" -#include "dce110/dce110_hwseq.h" -#include "dcn20/dcn20_opp.h" -#include "dcn20/dcn20_dsc.h" -#include "dcn21/dcn21_link_encoder.h" -#include "dcn20/dcn20_stream_encoder.h" -#include "dce/dce_clock_source.h" -#include "dce/dce_audio.h" -#include "dce/dce_hwseq.h" -#include "virtual/virtual_stream_encoder.h" -#include "dml/display_mode_vba.h" -#include "dcn20/dcn20_dccg.h" -#include "dcn21/dcn21_dccg.h" -#include "dcn21_hubbub.h" -#include "dcn10/dcn10_resource.h" -#include "dce/dce_panel_cntl.h" - -#include "dcn20/dcn20_dwb.h" -#include "dcn20/dcn20_mmhubbub.h" -#include "dpcs/dpcs_2_1_0_offset.h" -#include "dpcs/dpcs_2_1_0_sh_mask.h" - -#include "renoir_ip_offset.h" -#include "dcn/dcn_2_1_0_offset.h" -#include "dcn/dcn_2_1_0_sh_mask.h" - -#include "nbio/nbio_7_0_offset.h" - -#include "mmhub/mmhub_2_0_0_offset.h" -#include "mmhub/mmhub_2_0_0_sh_mask.h" - -#include "reg_helper.h" -#include "dce/dce_abm.h" -#include "dce/dce_dmcu.h" -#include "dce/dce_aux.h" -#include "dce/dce_i2c.h" -#include "dcn21_resource.h" -#include "vm_helper.h" -#include "dcn20/dcn20_vmid.h" -#include "dce/dmub_psr.h" -#include "dce/dmub_abm.h" - -/* begin ********************* - * macros to expend register list macro defined in HW object header file */ - -/* DCN */ -#define BASE_INNER(seg) DMU_BASE__INST0_SEG ## seg - -#define BASE(seg) BASE_INNER(seg) - -#define SR(reg_name)\ - .reg_name = BASE(mm ## reg_name ## _BASE_IDX) + \ - mm ## reg_name - -#define SRI(reg_name, block, id)\ - .reg_name = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - mm ## block ## id ## _ ## reg_name - -#define SRIR(var_name, reg_name, block, id)\ - .var_name = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - mm ## block ## id ## _ ## reg_name - -#define SRII(reg_name, block, id)\ - .reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - mm ## block ## id ## _ ## reg_name - -#define DCCG_SRII(reg_name, block, id)\ - .block ## _ ## reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - mm ## block ## id ## _ ## reg_name - -#define VUPDATE_SRII(reg_name, block, id)\ - .reg_name[id] = BASE(mm ## reg_name ## _ ## block ## id ## _BASE_IDX) + \ - mm ## reg_name ## _ ## block ## id - -/* NBIO */ -#define NBIO_BASE_INNER(seg) \ - NBIF0_BASE__INST0_SEG ## seg - -#define NBIO_BASE(seg) \ - NBIO_BASE_INNER(seg) - -#define NBIO_SR(reg_name)\ - .reg_name = NBIO_BASE(mm ## reg_name ## _BASE_IDX) + \ - mm ## reg_name - -/* MMHUB */ -#define MMHUB_BASE_INNER(seg) \ - MMHUB_BASE__INST0_SEG ## seg - -#define MMHUB_BASE(seg) \ - MMHUB_BASE_INNER(seg) - -#define MMHUB_SR(reg_name)\ - .reg_name = MMHUB_BASE(mmMM ## reg_name ## _BASE_IDX) + \ - mmMM ## reg_name - -#define clk_src_regs(index, pllid)\ -[index] = {\ - CS_COMMON_REG_LIST_DCN2_1(index, pllid),\ -} - -static const struct dce110_clk_src_regs clk_src_regs[] = { - clk_src_regs(0, A), - clk_src_regs(1, B), - clk_src_regs(2, C), - clk_src_regs(3, D), - clk_src_regs(4, E), -}; - -static const struct dce110_clk_src_shift cs_shift = { - CS_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT) -}; - -static const struct dce110_clk_src_mask cs_mask = { - CS_COMMON_MASK_SH_LIST_DCN2_0(_MASK) -}; - -static const struct bios_registers bios_regs = { - NBIO_SR(BIOS_SCRATCH_3), - NBIO_SR(BIOS_SCRATCH_6) -}; - -static const struct dce_dmcu_registers dmcu_regs = { - DMCU_DCN20_REG_LIST() -}; - -static const struct dce_dmcu_shift dmcu_shift = { - DMCU_MASK_SH_LIST_DCN10(__SHIFT) -}; - -static const struct dce_dmcu_mask dmcu_mask = { - DMCU_MASK_SH_LIST_DCN10(_MASK) -}; - -static const struct dce_abm_registers abm_regs = { - ABM_DCN20_REG_LIST() -}; - -static const struct dce_abm_shift abm_shift = { - ABM_MASK_SH_LIST_DCN20(__SHIFT) -}; - -static const struct dce_abm_mask abm_mask = { - ABM_MASK_SH_LIST_DCN20(_MASK) -}; - -#define audio_regs(id)\ -[id] = {\ - AUD_COMMON_REG_LIST(id)\ -} - -static const struct dce_audio_registers audio_regs[] = { - audio_regs(0), - audio_regs(1), - audio_regs(2), - audio_regs(3), - audio_regs(4), - audio_regs(5), -}; - -#define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\ - SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_INDEX, AZALIA_ENDPOINT_REG_INDEX, mask_sh),\ - SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_DATA, AZALIA_ENDPOINT_REG_DATA, mask_sh),\ - AUD_COMMON_MASK_SH_LIST_BASE(mask_sh) - -static const struct dce_audio_shift audio_shift = { - DCE120_AUD_COMMON_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_audio_mask audio_mask = { - DCE120_AUD_COMMON_MASK_SH_LIST(_MASK) -}; - -static const struct dccg_registers dccg_regs = { - DCCG_COMMON_REG_LIST_DCN_BASE() -}; - -static const struct dccg_shift dccg_shift = { - DCCG_MASK_SH_LIST_DCN2_1(__SHIFT) -}; - -static const struct dccg_mask dccg_mask = { - DCCG_MASK_SH_LIST_DCN2_1(_MASK) -}; - -#define opp_regs(id)\ -[id] = {\ - OPP_REG_LIST_DCN20(id),\ -} - -static const struct dcn20_opp_registers opp_regs[] = { - opp_regs(0), - opp_regs(1), - opp_regs(2), - opp_regs(3), - opp_regs(4), - opp_regs(5), -}; - -static const struct dcn20_opp_shift opp_shift = { - OPP_MASK_SH_LIST_DCN20(__SHIFT) -}; - -static const struct dcn20_opp_mask opp_mask = { - OPP_MASK_SH_LIST_DCN20(_MASK) -}; - -#define tg_regs(id)\ -[id] = {TG_COMMON_REG_LIST_DCN2_0(id)} - -static const struct dcn_optc_registers tg_regs[] = { - tg_regs(0), - tg_regs(1), - tg_regs(2), - tg_regs(3) -}; - -static const struct dcn_optc_shift tg_shift = { - TG_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT) -}; - -static const struct dcn_optc_mask tg_mask = { - TG_COMMON_MASK_SH_LIST_DCN2_0(_MASK) -}; - -static const struct dcn20_mpc_registers mpc_regs = { - MPC_REG_LIST_DCN2_0(0), - MPC_REG_LIST_DCN2_0(1), - MPC_REG_LIST_DCN2_0(2), - MPC_REG_LIST_DCN2_0(3), - MPC_REG_LIST_DCN2_0(4), - MPC_REG_LIST_DCN2_0(5), - MPC_OUT_MUX_REG_LIST_DCN2_0(0), - MPC_OUT_MUX_REG_LIST_DCN2_0(1), - MPC_OUT_MUX_REG_LIST_DCN2_0(2), - MPC_OUT_MUX_REG_LIST_DCN2_0(3), - MPC_DBG_REG_LIST_DCN2_0() -}; - -static const struct dcn20_mpc_shift mpc_shift = { - MPC_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT), - MPC_DEBUG_REG_LIST_SH_DCN20 -}; - -static const struct dcn20_mpc_mask mpc_mask = { - MPC_COMMON_MASK_SH_LIST_DCN2_0(_MASK), - MPC_DEBUG_REG_LIST_MASK_DCN20 -}; - -#define hubp_regs(id)\ -[id] = {\ - HUBP_REG_LIST_DCN21(id)\ -} - -static const struct dcn_hubp2_registers hubp_regs[] = { - hubp_regs(0), - hubp_regs(1), - hubp_regs(2), - hubp_regs(3) -}; - -static const struct dcn_hubp2_shift hubp_shift = { - HUBP_MASK_SH_LIST_DCN21(__SHIFT) -}; - -static const struct dcn_hubp2_mask hubp_mask = { - HUBP_MASK_SH_LIST_DCN21(_MASK) -}; - -static const struct dcn_hubbub_registers hubbub_reg = { - HUBBUB_REG_LIST_DCN21() -}; - -static const struct dcn_hubbub_shift hubbub_shift = { - HUBBUB_MASK_SH_LIST_DCN21(__SHIFT) -}; - -static const struct dcn_hubbub_mask hubbub_mask = { - HUBBUB_MASK_SH_LIST_DCN21(_MASK) -}; - - -#define vmid_regs(id)\ -[id] = {\ - DCN20_VMID_REG_LIST(id)\ -} - -static const struct dcn_vmid_registers vmid_regs[] = { - vmid_regs(0), - vmid_regs(1), - vmid_regs(2), - vmid_regs(3), - vmid_regs(4), - vmid_regs(5), - vmid_regs(6), - vmid_regs(7), - vmid_regs(8), - vmid_regs(9), - vmid_regs(10), - vmid_regs(11), - vmid_regs(12), - vmid_regs(13), - vmid_regs(14), - vmid_regs(15) -}; - -static const struct dcn20_vmid_shift vmid_shifts = { - DCN20_VMID_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn20_vmid_mask vmid_masks = { - DCN20_VMID_MASK_SH_LIST(_MASK) -}; - -#define dsc_regsDCN20(id)\ -[id] = {\ - DSC_REG_LIST_DCN20(id)\ -} - -static const struct dcn20_dsc_registers dsc_regs[] = { - dsc_regsDCN20(0), - dsc_regsDCN20(1), - dsc_regsDCN20(2), - dsc_regsDCN20(3), - dsc_regsDCN20(4), - dsc_regsDCN20(5) -}; - -static const struct dcn20_dsc_shift dsc_shift = { - DSC_REG_LIST_SH_MASK_DCN20(__SHIFT) -}; - -static const struct dcn20_dsc_mask dsc_mask = { - DSC_REG_LIST_SH_MASK_DCN20(_MASK) -}; - -#define ipp_regs(id)\ -[id] = {\ - IPP_REG_LIST_DCN20(id),\ -} - -static const struct dcn10_ipp_registers ipp_regs[] = { - ipp_regs(0), - ipp_regs(1), - ipp_regs(2), - ipp_regs(3), -}; - -static const struct dcn10_ipp_shift ipp_shift = { - IPP_MASK_SH_LIST_DCN20(__SHIFT) -}; - -static const struct dcn10_ipp_mask ipp_mask = { - IPP_MASK_SH_LIST_DCN20(_MASK), -}; - -#define opp_regs(id)\ -[id] = {\ - OPP_REG_LIST_DCN20(id),\ -} - - -#define aux_engine_regs(id)\ -[id] = {\ - AUX_COMMON_REG_LIST0(id), \ - .AUXN_IMPCAL = 0, \ - .AUXP_IMPCAL = 0, \ - .AUX_RESET_MASK = DP_AUX0_AUX_CONTROL__AUX_RESET_MASK, \ -} - -static const struct dce110_aux_registers aux_engine_regs[] = { - aux_engine_regs(0), - aux_engine_regs(1), - aux_engine_regs(2), - aux_engine_regs(3), - aux_engine_regs(4), -}; - -#define tf_regs(id)\ -[id] = {\ - TF_REG_LIST_DCN20(id),\ - TF_REG_LIST_DCN20_COMMON_APPEND(id),\ -} - -static const struct dcn2_dpp_registers tf_regs[] = { - tf_regs(0), - tf_regs(1), - tf_regs(2), - tf_regs(3), -}; - -static const struct dcn2_dpp_shift tf_shift = { - TF_REG_LIST_SH_MASK_DCN20(__SHIFT), - TF_DEBUG_REG_LIST_SH_DCN20 -}; - -static const struct dcn2_dpp_mask tf_mask = { - TF_REG_LIST_SH_MASK_DCN20(_MASK), - TF_DEBUG_REG_LIST_MASK_DCN20 -}; - -#define stream_enc_regs(id)\ -[id] = {\ - SE_DCN2_REG_LIST(id)\ -} - -static const struct dcn10_stream_enc_registers stream_enc_regs[] = { - stream_enc_regs(0), - stream_enc_regs(1), - stream_enc_regs(2), - stream_enc_regs(3), - stream_enc_regs(4), -}; - -static const struct dce110_aux_registers_shift aux_shift = { - DCN_AUX_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce110_aux_registers_mask aux_mask = { - DCN_AUX_MASK_SH_LIST(_MASK) -}; - -static const struct dcn10_stream_encoder_shift se_shift = { - SE_COMMON_MASK_SH_LIST_DCN20(__SHIFT) -}; - -static const struct dcn10_stream_encoder_mask se_mask = { - SE_COMMON_MASK_SH_LIST_DCN20(_MASK) -}; - -static void dcn21_pp_smu_destroy(struct pp_smu_funcs **pp_smu); - -static struct input_pixel_processor *dcn21_ipp_create( - struct dc_context *ctx, uint32_t inst) -{ - struct dcn10_ipp *ipp = - kzalloc(sizeof(struct dcn10_ipp), GFP_KERNEL); - - if (!ipp) { - BREAK_TO_DEBUGGER(); - return NULL; - } - - dcn20_ipp_construct(ipp, ctx, inst, - &ipp_regs[inst], &ipp_shift, &ipp_mask); - return &ipp->base; -} - -static struct dpp *dcn21_dpp_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dcn20_dpp *dpp = - kzalloc(sizeof(struct dcn20_dpp), GFP_KERNEL); - - if (!dpp) - return NULL; - - if (dpp2_construct(dpp, ctx, inst, - &tf_regs[inst], &tf_shift, &tf_mask)) - return &dpp->base; - - BREAK_TO_DEBUGGER(); - kfree(dpp); - return NULL; -} - -static struct dce_aux *dcn21_aux_engine_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct aux_engine_dce110 *aux_engine = - kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL); - - if (!aux_engine) - return NULL; - - dce110_aux_engine_construct(aux_engine, ctx, inst, - SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, - &aux_engine_regs[inst], - &aux_mask, - &aux_shift, - ctx->dc->caps.extended_aux_timeout_support); - - return &aux_engine->base; -} - -#define i2c_inst_regs(id) { I2C_HW_ENGINE_COMMON_REG_LIST(id) } - -static const struct dce_i2c_registers i2c_hw_regs[] = { - i2c_inst_regs(1), - i2c_inst_regs(2), - i2c_inst_regs(3), - i2c_inst_regs(4), - i2c_inst_regs(5), -}; - -static const struct dce_i2c_shift i2c_shifts = { - I2C_COMMON_MASK_SH_LIST_DCN2(__SHIFT) -}; - -static const struct dce_i2c_mask i2c_masks = { - I2C_COMMON_MASK_SH_LIST_DCN2(_MASK) -}; - -static struct dce_i2c_hw *dcn21_i2c_hw_create(struct dc_context *ctx, - uint32_t inst) -{ - struct dce_i2c_hw *dce_i2c_hw = - kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL); - - if (!dce_i2c_hw) - return NULL; - - dcn2_i2c_hw_construct(dce_i2c_hw, ctx, inst, - &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks); - - return dce_i2c_hw; -} - -static const struct resource_caps res_cap_rn = { - .num_timing_generator = 4, - .num_opp = 4, - .num_video_plane = 4, - .num_audio = 4, // 4 audio endpoints. 4 audio streams - .num_stream_encoder = 5, - .num_pll = 5, // maybe 3 because the last two used for USB-c - .num_dwb = 1, - .num_ddc = 5, - .num_vmid = 16, - .num_dsc = 3, -}; - -#ifdef DIAGS_BUILD -static const struct resource_caps res_cap_rn_FPGA_4pipe = { - .num_timing_generator = 4, - .num_opp = 4, - .num_video_plane = 4, - .num_audio = 7, - .num_stream_encoder = 4, - .num_pll = 4, - .num_dwb = 1, - .num_ddc = 4, - .num_dsc = 0, -}; - -static const struct resource_caps res_cap_rn_FPGA_2pipe_dsc = { - .num_timing_generator = 2, - .num_opp = 2, - .num_video_plane = 2, - .num_audio = 7, - .num_stream_encoder = 2, - .num_pll = 4, - .num_dwb = 1, - .num_ddc = 4, - .num_dsc = 2, -}; -#endif - -static const struct dc_plane_cap plane_cap = { - .type = DC_PLANE_TYPE_DCN_UNIVERSAL, - .per_pixel_alpha = true, - - .pixel_format_support = { - .argb8888 = true, - .nv12 = true, - .fp16 = true, - .p010 = true - }, - - .max_upscale_factor = { - .argb8888 = 16000, - .nv12 = 16000, - .fp16 = 16000 - }, - - .max_downscale_factor = { - .argb8888 = 250, - .nv12 = 250, - .fp16 = 250 - }, - 64, - 64 -}; - -static const struct dc_debug_options debug_defaults_drv = { - .disable_dmcu = false, - .force_abm_enable = false, - .timing_trace = false, - .clock_trace = true, - .disable_pplib_clock_request = true, - .min_disp_clk_khz = 100000, - .pipe_split_policy = MPC_SPLIT_DYNAMIC, - .force_single_disp_pipe_split = false, - .disable_dcc = DCC_ENABLE, - .vsr_support = true, - .performance_trace = false, - .max_downscale_src_width = 4096, - .disable_pplib_wm_range = false, - .scl_reset_length10 = true, - .sanity_checks = true, - .disable_48mhz_pwrdwn = false, - .usbc_combo_phy_reset_wa = true, - .dmub_command_table = true, - .use_max_lb = true, - .enable_legacy_fast_update = true, - .using_dml2 = false, -}; - -static const struct dc_panel_config panel_config_defaults = { - .psr = { - .disable_psr = false, - .disallow_psrsu = false, - .disallow_replay = false, - }, - .ilr = { - .optimize_edp_link_rate = true, - }, -}; - -enum dcn20_clk_src_array_id { - DCN20_CLK_SRC_PLL0, - DCN20_CLK_SRC_PLL1, - DCN20_CLK_SRC_PLL2, - DCN20_CLK_SRC_PLL3, - DCN20_CLK_SRC_PLL4, - DCN20_CLK_SRC_TOTAL_DCN21 -}; - -static void dcn21_resource_destruct(struct dcn21_resource_pool *pool) -{ - unsigned int i; - - for (i = 0; i < pool->base.stream_enc_count; i++) { - if (pool->base.stream_enc[i] != NULL) { - kfree(DCN10STRENC_FROM_STRENC(pool->base.stream_enc[i])); - pool->base.stream_enc[i] = NULL; - } - } - - for (i = 0; i < pool->base.res_cap->num_dsc; i++) { - if (pool->base.dscs[i] != NULL) - dcn20_dsc_destroy(&pool->base.dscs[i]); - } - - if (pool->base.mpc != NULL) { - kfree(TO_DCN20_MPC(pool->base.mpc)); - pool->base.mpc = NULL; - } - if (pool->base.hubbub != NULL) { - kfree(pool->base.hubbub); - pool->base.hubbub = NULL; - } - for (i = 0; i < pool->base.pipe_count; i++) { - if (pool->base.dpps[i] != NULL) - dcn20_dpp_destroy(&pool->base.dpps[i]); - - if (pool->base.ipps[i] != NULL) - pool->base.ipps[i]->funcs->ipp_destroy(&pool->base.ipps[i]); - - if (pool->base.hubps[i] != NULL) { - kfree(TO_DCN20_HUBP(pool->base.hubps[i])); - pool->base.hubps[i] = NULL; - } - - if (pool->base.irqs != NULL) { - dal_irq_service_destroy(&pool->base.irqs); - } - } - - for (i = 0; i < pool->base.res_cap->num_ddc; i++) { - if (pool->base.engines[i] != NULL) - dce110_engine_destroy(&pool->base.engines[i]); - if (pool->base.hw_i2cs[i] != NULL) { - kfree(pool->base.hw_i2cs[i]); - pool->base.hw_i2cs[i] = NULL; - } - if (pool->base.sw_i2cs[i] != NULL) { - kfree(pool->base.sw_i2cs[i]); - pool->base.sw_i2cs[i] = NULL; - } - } - - for (i = 0; i < pool->base.res_cap->num_opp; i++) { - if (pool->base.opps[i] != NULL) - pool->base.opps[i]->funcs->opp_destroy(&pool->base.opps[i]); - } - - for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { - if (pool->base.timing_generators[i] != NULL) { - kfree(DCN10TG_FROM_TG(pool->base.timing_generators[i])); - pool->base.timing_generators[i] = NULL; - } - } - - for (i = 0; i < pool->base.res_cap->num_dwb; i++) { - if (pool->base.dwbc[i] != NULL) { - kfree(TO_DCN20_DWBC(pool->base.dwbc[i])); - pool->base.dwbc[i] = NULL; - } - if (pool->base.mcif_wb[i] != NULL) { - kfree(TO_DCN20_MMHUBBUB(pool->base.mcif_wb[i])); - pool->base.mcif_wb[i] = NULL; - } - } - - for (i = 0; i < pool->base.audio_count; i++) { - if (pool->base.audios[i]) - dce_aud_destroy(&pool->base.audios[i]); - } - - for (i = 0; i < pool->base.clk_src_count; i++) { - if (pool->base.clock_sources[i] != NULL) { - dcn20_clock_source_destroy(&pool->base.clock_sources[i]); - pool->base.clock_sources[i] = NULL; - } - } - - if (pool->base.dp_clock_source != NULL) { - dcn20_clock_source_destroy(&pool->base.dp_clock_source); - pool->base.dp_clock_source = NULL; - } - - if (pool->base.abm != NULL) { - if (pool->base.abm->ctx->dc->config.disable_dmcu) - dmub_abm_destroy(&pool->base.abm); - else - dce_abm_destroy(&pool->base.abm); - } - - if (pool->base.dmcu != NULL) - dce_dmcu_destroy(&pool->base.dmcu); - - if (pool->base.psr != NULL) - dmub_psr_destroy(&pool->base.psr); - - if (pool->base.dccg != NULL) - dcn_dccg_destroy(&pool->base.dccg); - - if (pool->base.pp_smu != NULL) - dcn21_pp_smu_destroy(&pool->base.pp_smu); -} - -bool dcn21_fast_validate_bw(struct dc *dc, - struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int *pipe_cnt_out, - int *pipe_split_from, - int *vlevel_out, - bool fast_validate) -{ - bool out = false; - int split[MAX_PIPES] = { 0 }; - int pipe_cnt, i, pipe_idx, vlevel; - - ASSERT(pipes); - if (!pipes) - return false; - - dcn20_merge_pipes_for_validate(dc, context); - - DC_FP_START(); - pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate); - DC_FP_END(); - - *pipe_cnt_out = pipe_cnt; - - if (!pipe_cnt) { - out = true; - goto validate_out; - } - /* - * DML favors voltage over p-state, but we're more interested in - * supporting p-state over voltage. We can't support p-state in - * prefetch mode > 0 so try capping the prefetch mode to start. - */ - context->bw_ctx.dml.soc.allow_dram_self_refresh_or_dram_clock_change_in_vblank = - dm_allow_self_refresh_and_mclk_switch; - vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt); - - if (vlevel > context->bw_ctx.dml.soc.num_states) { - /* - * If mode is unsupported or there's still no p-state support then - * fall back to favoring voltage. - * - * We don't actually support prefetch mode 2, so require that we - * at least support prefetch mode 1. - */ - context->bw_ctx.dml.soc.allow_dram_self_refresh_or_dram_clock_change_in_vblank = - dm_allow_self_refresh; - vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt); - if (vlevel > context->bw_ctx.dml.soc.num_states) - goto validate_fail; - } - - vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, NULL); - - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - struct pipe_ctx *mpo_pipe = pipe->bottom_pipe; - struct vba_vars_st *vba = &context->bw_ctx.dml.vba; - - if (!pipe->stream) - continue; - - /* We only support full screen mpo with ODM */ - if (vba->ODMCombineEnabled[vba->pipe_plane[pipe_idx]] != dm_odm_combine_mode_disabled - && pipe->plane_state && mpo_pipe - && memcmp(&mpo_pipe->plane_state->clip_rect, - &pipe->stream->src, - sizeof(struct rect)) != 0) { - ASSERT(mpo_pipe->plane_state != pipe->plane_state); - goto validate_fail; - } - pipe_idx++; - } - - /*initialize pipe_just_split_from to invalid idx*/ - for (i = 0; i < MAX_PIPES; i++) - pipe_split_from[i] = -1; - - for (i = 0, pipe_idx = -1; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - struct pipe_ctx *hsplit_pipe = pipe->bottom_pipe; - - if (!pipe->stream || pipe_split_from[i] >= 0) - continue; - - pipe_idx++; - - if (!pipe->top_pipe && !pipe->plane_state && context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_idx]) { - hsplit_pipe = dcn20_find_secondary_pipe(dc, &context->res_ctx, dc->res_pool, pipe); - ASSERT(hsplit_pipe); - if (!dcn20_split_stream_for_odm( - dc, &context->res_ctx, - pipe, hsplit_pipe)) - goto validate_fail; - pipe_split_from[hsplit_pipe->pipe_idx] = pipe_idx; - dcn20_build_mapped_resource(dc, context, pipe->stream); - } - - if (!pipe->plane_state) - continue; - /* Skip 2nd half of already split pipe */ - if (pipe->top_pipe && pipe->plane_state == pipe->top_pipe->plane_state) - continue; - - if (split[i] == 2) { - if (!hsplit_pipe || hsplit_pipe->plane_state != pipe->plane_state) { - /* pipe not split previously needs split */ - hsplit_pipe = dcn20_find_secondary_pipe(dc, &context->res_ctx, dc->res_pool, pipe); - ASSERT(hsplit_pipe); - if (!hsplit_pipe) { - DC_FP_START(); - dcn20_fpu_adjust_dppclk(&context->bw_ctx.dml.vba, vlevel, context->bw_ctx.dml.vba.maxMpcComb, pipe_idx, true); - DC_FP_END(); - continue; - } - if (context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_idx]) { - if (!dcn20_split_stream_for_odm( - dc, &context->res_ctx, - pipe, hsplit_pipe)) - goto validate_fail; - dcn20_build_mapped_resource(dc, context, pipe->stream); - } else { - dcn20_split_stream_for_mpc( - &context->res_ctx, dc->res_pool, - pipe, hsplit_pipe); - resource_build_scaling_params(pipe); - resource_build_scaling_params(hsplit_pipe); - } - pipe_split_from[hsplit_pipe->pipe_idx] = pipe_idx; - } - } else if (hsplit_pipe && hsplit_pipe->plane_state == pipe->plane_state) { - /* merge should already have been done */ - ASSERT(0); - } - } - /* Actual dsc count per stream dsc validation*/ - if (!dcn20_validate_dsc(dc, context)) { - context->bw_ctx.dml.vba.ValidationStatus[context->bw_ctx.dml.vba.soc.num_states] = - DML_FAIL_DSC_VALIDATION_FAILURE; - goto validate_fail; - } - - *vlevel_out = vlevel; - - out = true; - goto validate_out; - -validate_fail: - out = false; - -validate_out: - return out; -} - -/* - * Some of the functions further below use the FPU, so we need to wrap this - * with DC_FP_START()/DC_FP_END(). Use the same approach as for - * dcn20_validate_bandwidth in dcn20_resource.c. - */ -static bool dcn21_validate_bandwidth(struct dc *dc, struct dc_state *context, - bool fast_validate) -{ - bool voltage_supported; - display_e2e_pipe_params_st *pipes; - - pipes = kcalloc(dc->res_pool->pipe_count, sizeof(display_e2e_pipe_params_st), GFP_KERNEL); - if (!pipes) - return false; - - DC_FP_START(); - voltage_supported = dcn21_validate_bandwidth_fp(dc, context, fast_validate, pipes); - DC_FP_END(); - - kfree(pipes); - return voltage_supported; -} - -static void dcn21_destroy_resource_pool(struct resource_pool **pool) -{ - struct dcn21_resource_pool *dcn21_pool = TO_DCN21_RES_POOL(*pool); - - dcn21_resource_destruct(dcn21_pool); - kfree(dcn21_pool); - *pool = NULL; -} - -static struct clock_source *dcn21_clock_source_create( - struct dc_context *ctx, - struct dc_bios *bios, - enum clock_source_id id, - const struct dce110_clk_src_regs *regs, - bool dp_clk_src) -{ - struct dce110_clk_src *clk_src = - kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL); - - if (!clk_src) - return NULL; - - if (dcn20_clk_src_construct(clk_src, ctx, bios, id, - regs, &cs_shift, &cs_mask)) { - clk_src->base.dp_clk_src = dp_clk_src; - return &clk_src->base; - } - - kfree(clk_src); - BREAK_TO_DEBUGGER(); - return NULL; -} - -static struct hubp *dcn21_hubp_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dcn21_hubp *hubp21 = - kzalloc(sizeof(struct dcn21_hubp), GFP_KERNEL); - - if (!hubp21) - return NULL; - - if (hubp21_construct(hubp21, ctx, inst, - &hubp_regs[inst], &hubp_shift, &hubp_mask)) - return &hubp21->base; - - BREAK_TO_DEBUGGER(); - kfree(hubp21); - return NULL; -} - -static struct hubbub *dcn21_hubbub_create(struct dc_context *ctx) -{ - int i; - - struct dcn20_hubbub *hubbub = kzalloc(sizeof(struct dcn20_hubbub), - GFP_KERNEL); - - if (!hubbub) - return NULL; - - hubbub21_construct(hubbub, ctx, - &hubbub_reg, - &hubbub_shift, - &hubbub_mask); - - for (i = 0; i < res_cap_rn.num_vmid; i++) { - struct dcn20_vmid *vmid = &hubbub->vmid[i]; - - vmid->ctx = ctx; - - vmid->regs = &vmid_regs[i]; - vmid->shifts = &vmid_shifts; - vmid->masks = &vmid_masks; - } - hubbub->num_vmid = res_cap_rn.num_vmid; - - return &hubbub->base; -} - -static struct output_pixel_processor *dcn21_opp_create(struct dc_context *ctx, - uint32_t inst) -{ - struct dcn20_opp *opp = - kzalloc(sizeof(struct dcn20_opp), GFP_KERNEL); - - if (!opp) { - BREAK_TO_DEBUGGER(); - return NULL; - } - - dcn20_opp_construct(opp, ctx, inst, - &opp_regs[inst], &opp_shift, &opp_mask); - return &opp->base; -} - -static struct timing_generator *dcn21_timing_generator_create(struct dc_context *ctx, - uint32_t instance) -{ - struct optc *tgn10 = - kzalloc(sizeof(struct optc), GFP_KERNEL); - - if (!tgn10) - return NULL; - - tgn10->base.inst = instance; - tgn10->base.ctx = ctx; - - tgn10->tg_regs = &tg_regs[instance]; - tgn10->tg_shift = &tg_shift; - tgn10->tg_mask = &tg_mask; - - dcn20_timing_generator_init(tgn10); - - return &tgn10->base; -} - -static struct mpc *dcn21_mpc_create(struct dc_context *ctx) -{ - struct dcn20_mpc *mpc20 = kzalloc(sizeof(struct dcn20_mpc), - GFP_KERNEL); - - if (!mpc20) - return NULL; - - dcn20_mpc_construct(mpc20, ctx, - &mpc_regs, - &mpc_shift, - &mpc_mask, - 6); - - return &mpc20->base; -} - -static void read_dce_straps( - struct dc_context *ctx, - struct resource_straps *straps) -{ - generic_reg_get(ctx, mmDC_PINSTRAPS + BASE(mmDC_PINSTRAPS_BASE_IDX), - FN(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO), &straps->dc_pinstraps_audio); - -} - - -static struct display_stream_compressor *dcn21_dsc_create(struct dc_context *ctx, - uint32_t inst) -{ - struct dcn20_dsc *dsc = - kzalloc(sizeof(struct dcn20_dsc), GFP_KERNEL); - - if (!dsc) { - BREAK_TO_DEBUGGER(); - return NULL; - } - - dsc2_construct(dsc, ctx, inst, &dsc_regs[inst], &dsc_shift, &dsc_mask); - return &dsc->base; -} - -static struct pp_smu_funcs *dcn21_pp_smu_create(struct dc_context *ctx) -{ - struct pp_smu_funcs *pp_smu = kzalloc(sizeof(*pp_smu), GFP_KERNEL); - - if (!pp_smu) - return pp_smu; - - dm_pp_get_funcs(ctx, pp_smu); - - if (pp_smu->ctx.ver != PP_SMU_VER_RN) - pp_smu = memset(pp_smu, 0, sizeof(struct pp_smu_funcs)); - - - return pp_smu; -} - -static void dcn21_pp_smu_destroy(struct pp_smu_funcs **pp_smu) -{ - if (pp_smu && *pp_smu) { - kfree(*pp_smu); - *pp_smu = NULL; - } -} - -static struct audio *dcn21_create_audio( - struct dc_context *ctx, unsigned int inst) -{ - return dce_audio_create(ctx, inst, - &audio_regs[inst], &audio_shift, &audio_mask); -} - -static struct dc_cap_funcs cap_funcs = { - .get_dcc_compression_cap = dcn20_get_dcc_compression_cap -}; - -static struct stream_encoder *dcn21_stream_encoder_create(enum engine_id eng_id, - struct dc_context *ctx) -{ - struct dcn10_stream_encoder *enc1 = - kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL); - - if (!enc1) - return NULL; - - dcn20_stream_encoder_construct(enc1, ctx, ctx->dc_bios, eng_id, - &stream_enc_regs[eng_id], - &se_shift, &se_mask); - - return &enc1->base; -} - -static const struct dce_hwseq_registers hwseq_reg = { - HWSEQ_DCN21_REG_LIST() -}; - -static const struct dce_hwseq_shift hwseq_shift = { - HWSEQ_DCN21_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_hwseq_mask hwseq_mask = { - HWSEQ_DCN21_MASK_SH_LIST(_MASK) -}; - -static struct dce_hwseq *dcn21_hwseq_create( - struct dc_context *ctx) -{ - struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL); - - if (hws) { - hws->ctx = ctx; - hws->regs = &hwseq_reg; - hws->shifts = &hwseq_shift; - hws->masks = &hwseq_mask; - hws->wa.DEGVIDCN21 = true; - hws->wa.disallow_self_refresh_during_multi_plane_transition = true; - } - return hws; -} - -static const struct resource_create_funcs res_create_funcs = { - .read_dce_straps = read_dce_straps, - .create_audio = dcn21_create_audio, - .create_stream_encoder = dcn21_stream_encoder_create, - .create_hwseq = dcn21_hwseq_create, -}; - -static const struct encoder_feature_support link_enc_feature = { - .max_hdmi_deep_color = COLOR_DEPTH_121212, - .max_hdmi_pixel_clock = 600000, - .hdmi_ycbcr420_supported = true, - .dp_ycbcr420_supported = true, - .fec_supported = true, - .flags.bits.IS_HBR2_CAPABLE = true, - .flags.bits.IS_HBR3_CAPABLE = true, - .flags.bits.IS_TPS3_CAPABLE = true, - .flags.bits.IS_TPS4_CAPABLE = true -}; - - -#define link_regs(id, phyid)\ -[id] = {\ - LE_DCN2_REG_LIST(id), \ - UNIPHY_DCN2_REG_LIST(phyid), \ - DPCS_DCN21_REG_LIST(id), \ - SRI(DP_DPHY_INTERNAL_CTRL, DP, id) \ -} - -static const struct dcn10_link_enc_registers link_enc_regs[] = { - link_regs(0, A), - link_regs(1, B), - link_regs(2, C), - link_regs(3, D), - link_regs(4, E), -}; - -static const struct dce_panel_cntl_registers panel_cntl_regs[] = { - { DCN_PANEL_CNTL_REG_LIST() } -}; - -static const struct dce_panel_cntl_shift panel_cntl_shift = { - DCE_PANEL_CNTL_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_panel_cntl_mask panel_cntl_mask = { - DCE_PANEL_CNTL_MASK_SH_LIST(_MASK) -}; - -#define aux_regs(id)\ -[id] = {\ - DCN2_AUX_REG_LIST(id)\ -} - -static const struct dcn10_link_enc_aux_registers link_enc_aux_regs[] = { - aux_regs(0), - aux_regs(1), - aux_regs(2), - aux_regs(3), - aux_regs(4) -}; - -#define hpd_regs(id)\ -[id] = {\ - HPD_REG_LIST(id)\ -} - -static const struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[] = { - hpd_regs(0), - hpd_regs(1), - hpd_regs(2), - hpd_regs(3), - hpd_regs(4) -}; - -static const struct dcn10_link_enc_shift le_shift = { - LINK_ENCODER_MASK_SH_LIST_DCN20(__SHIFT),\ - DPCS_DCN21_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn10_link_enc_mask le_mask = { - LINK_ENCODER_MASK_SH_LIST_DCN20(_MASK),\ - DPCS_DCN21_MASK_SH_LIST(_MASK) -}; - -static int map_transmitter_id_to_phy_instance( - enum transmitter transmitter) -{ - switch (transmitter) { - case TRANSMITTER_UNIPHY_A: - return 0; - break; - case TRANSMITTER_UNIPHY_B: - return 1; - break; - case TRANSMITTER_UNIPHY_C: - return 2; - break; - case TRANSMITTER_UNIPHY_D: - return 3; - break; - case TRANSMITTER_UNIPHY_E: - return 4; - break; - default: - ASSERT(0); - return 0; - } -} - -static struct link_encoder *dcn21_link_encoder_create( - struct dc_context *ctx, - const struct encoder_init_data *enc_init_data) -{ - struct dcn21_link_encoder *enc21 = - kzalloc(sizeof(struct dcn21_link_encoder), GFP_KERNEL); - int link_regs_id; - - if (!enc21) - return NULL; - - link_regs_id = - map_transmitter_id_to_phy_instance(enc_init_data->transmitter); - - dcn21_link_encoder_construct(enc21, - enc_init_data, - &link_enc_feature, - &link_enc_regs[link_regs_id], - &link_enc_aux_regs[enc_init_data->channel - 1], - &link_enc_hpd_regs[enc_init_data->hpd_source], - &le_shift, - &le_mask); - - return &enc21->enc10.base; -} - -static struct panel_cntl *dcn21_panel_cntl_create(const struct panel_cntl_init_data *init_data) -{ - struct dce_panel_cntl *panel_cntl = - kzalloc(sizeof(struct dce_panel_cntl), GFP_KERNEL); - - if (!panel_cntl) - return NULL; - - dce_panel_cntl_construct(panel_cntl, - init_data, - &panel_cntl_regs[init_data->inst], - &panel_cntl_shift, - &panel_cntl_mask); - - return &panel_cntl->base; -} - -static void dcn21_get_panel_config_defaults(struct dc_panel_config *panel_config) -{ - *panel_config = panel_config_defaults; -} - -#define CTX ctx - -#define REG(reg_name) \ - (DCN_BASE.instance[0].segment[mm ## reg_name ## _BASE_IDX] + mm ## reg_name) - -static uint32_t read_pipe_fuses(struct dc_context *ctx) -{ - uint32_t value = REG_READ(CC_DC_PIPE_DIS); - /* RV1 support max 4 pipes */ - value = value & 0xf; - return value; -} - -static enum dc_status dcn21_patch_unknown_plane_state(struct dc_plane_state *plane_state) -{ - if (plane_state->ctx->dc->debug.disable_dcc == DCC_ENABLE) { - plane_state->dcc.enable = 1; - /* align to our worst case block width */ - plane_state->dcc.meta_pitch = ((plane_state->src_rect.width + 1023) / 1024) * 1024; - } - - return dcn20_patch_unknown_plane_state(plane_state); -} - -static const struct resource_funcs dcn21_res_pool_funcs = { - .destroy = dcn21_destroy_resource_pool, - .link_enc_create = dcn21_link_encoder_create, - .panel_cntl_create = dcn21_panel_cntl_create, - .validate_bandwidth = dcn21_validate_bandwidth, - .populate_dml_pipes = dcn21_populate_dml_pipes_from_context, - .add_stream_to_ctx = dcn20_add_stream_to_ctx, - .add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource, - .remove_stream_from_ctx = dcn20_remove_stream_from_ctx, - .acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer, - .release_pipe = dcn20_release_pipe, - .populate_dml_writeback_from_context = dcn20_populate_dml_writeback_from_context, - .patch_unknown_plane_state = dcn21_patch_unknown_plane_state, - .set_mcif_arb_params = dcn20_set_mcif_arb_params, - .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link, - .update_bw_bounding_box = dcn21_update_bw_bounding_box, - .get_panel_config_defaults = dcn21_get_panel_config_defaults, -}; - -static bool dcn21_resource_construct( - uint8_t num_virtual_links, - struct dc *dc, - struct dcn21_resource_pool *pool) -{ - int i, j; - struct dc_context *ctx = dc->ctx; - struct irq_service_init_data init_data; - uint32_t pipe_fuses = read_pipe_fuses(ctx); - uint32_t num_pipes; - - ctx->dc_bios->regs = &bios_regs; - - pool->base.res_cap = &res_cap_rn; -#ifdef DIAGS_BUILD - if (IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) - //pool->base.res_cap = &res_cap_nv10_FPGA_2pipe_dsc; - pool->base.res_cap = &res_cap_rn_FPGA_4pipe; -#endif - - pool->base.funcs = &dcn21_res_pool_funcs; - - /************************************************* - * Resource + asic cap harcoding * - *************************************************/ - pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE; - - /* max pipe num for ASIC before check pipe fuses */ - pool->base.pipe_count = pool->base.res_cap->num_timing_generator; - - dc->caps.max_downscale_ratio = 200; - dc->caps.i2c_speed_in_khz = 100; - dc->caps.i2c_speed_in_khz_hdcp = 5; /*1.4 w/a applied by default*/ - dc->caps.max_cursor_size = 256; - dc->caps.min_horizontal_blanking_period = 80; - dc->caps.dmdata_alloc_size = 2048; - - dc->caps.max_slave_planes = 1; - dc->caps.max_slave_yuv_planes = 1; - dc->caps.max_slave_rgb_planes = 1; - dc->caps.post_blend_color_processing = true; - dc->caps.force_dp_tps4_for_cp2520 = true; - dc->caps.extended_aux_timeout_support = true; - dc->caps.dmcub_support = true; - dc->caps.is_apu = true; - - /* Color pipeline capabilities */ - dc->caps.color.dpp.dcn_arch = 1; - dc->caps.color.dpp.input_lut_shared = 0; - dc->caps.color.dpp.icsc = 1; - dc->caps.color.dpp.dgam_ram = 1; - dc->caps.color.dpp.dgam_rom_caps.srgb = 1; - dc->caps.color.dpp.dgam_rom_caps.bt2020 = 1; - dc->caps.color.dpp.dgam_rom_caps.gamma2_2 = 0; - dc->caps.color.dpp.dgam_rom_caps.pq = 0; - dc->caps.color.dpp.dgam_rom_caps.hlg = 0; - dc->caps.color.dpp.post_csc = 0; - dc->caps.color.dpp.gamma_corr = 0; - dc->caps.color.dpp.dgam_rom_for_yuv = 1; - - dc->caps.color.dpp.hw_3d_lut = 1; - dc->caps.color.dpp.ogam_ram = 1; - // no OGAM ROM on DCN2 - dc->caps.color.dpp.ogam_rom_caps.srgb = 0; - dc->caps.color.dpp.ogam_rom_caps.bt2020 = 0; - dc->caps.color.dpp.ogam_rom_caps.gamma2_2 = 0; - dc->caps.color.dpp.ogam_rom_caps.pq = 0; - dc->caps.color.dpp.ogam_rom_caps.hlg = 0; - dc->caps.color.dpp.ocsc = 0; - - dc->caps.color.mpc.gamut_remap = 0; - dc->caps.color.mpc.num_3dluts = 0; - dc->caps.color.mpc.shared_3d_lut = 0; - dc->caps.color.mpc.ogam_ram = 1; - dc->caps.color.mpc.ogam_rom_caps.srgb = 0; - dc->caps.color.mpc.ogam_rom_caps.bt2020 = 0; - dc->caps.color.mpc.ogam_rom_caps.gamma2_2 = 0; - dc->caps.color.mpc.ogam_rom_caps.pq = 0; - dc->caps.color.mpc.ogam_rom_caps.hlg = 0; - dc->caps.color.mpc.ocsc = 1; - - dc->caps.dp_hdmi21_pcon_support = true; - - if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) - dc->debug = debug_defaults_drv; - - // Init the vm_helper - if (dc->vm_helper) - vm_helper_init(dc->vm_helper, 16); - - /************************************************* - * Create resources * - *************************************************/ - - pool->base.clock_sources[DCN20_CLK_SRC_PLL0] = - dcn21_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL0, - &clk_src_regs[0], false); - pool->base.clock_sources[DCN20_CLK_SRC_PLL1] = - dcn21_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL1, - &clk_src_regs[1], false); - pool->base.clock_sources[DCN20_CLK_SRC_PLL2] = - dcn21_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL2, - &clk_src_regs[2], false); - pool->base.clock_sources[DCN20_CLK_SRC_PLL3] = - dcn21_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL3, - &clk_src_regs[3], false); - pool->base.clock_sources[DCN20_CLK_SRC_PLL4] = - dcn21_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL4, - &clk_src_regs[4], false); - - pool->base.clk_src_count = DCN20_CLK_SRC_TOTAL_DCN21; - - /* todo: not reuse phy_pll registers */ - pool->base.dp_clock_source = - dcn21_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_ID_DP_DTO, - &clk_src_regs[0], true); - - for (i = 0; i < pool->base.clk_src_count; i++) { - if (pool->base.clock_sources[i] == NULL) { - dm_error("DC: failed to create clock sources!\n"); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - } - - pool->base.dccg = dccg21_create(ctx, &dccg_regs, &dccg_shift, &dccg_mask); - if (pool->base.dccg == NULL) { - dm_error("DC: failed to create dccg!\n"); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - - if (!dc->config.disable_dmcu) { - pool->base.dmcu = dcn21_dmcu_create(ctx, - &dmcu_regs, - &dmcu_shift, - &dmcu_mask); - if (pool->base.dmcu == NULL) { - dm_error("DC: failed to create dmcu!\n"); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - - dc->debug.dmub_command_table = false; - } - - if (dc->config.disable_dmcu) { - pool->base.psr = dmub_psr_create(ctx); - - if (pool->base.psr == NULL) { - dm_error("DC: failed to create psr obj!\n"); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - } - - if (dc->config.disable_dmcu) - pool->base.abm = dmub_abm_create(ctx, - &abm_regs, - &abm_shift, - &abm_mask); - else - pool->base.abm = dce_abm_create(ctx, - &abm_regs, - &abm_shift, - &abm_mask); - - pool->base.pp_smu = dcn21_pp_smu_create(ctx); - - num_pipes = dcn2_1_ip.max_num_dpp; - - for (i = 0; i < dcn2_1_ip.max_num_dpp; i++) - if (pipe_fuses & 1 << i) - num_pipes--; - dcn2_1_ip.max_num_dpp = num_pipes; - dcn2_1_ip.max_num_otg = num_pipes; - - dml_init_instance(&dc->dml, &dcn2_1_soc, &dcn2_1_ip, DML_PROJECT_DCN21); - - init_data.ctx = dc->ctx; - pool->base.irqs = dal_irq_service_dcn21_create(&init_data); - if (!pool->base.irqs) - goto create_fail; - - j = 0; - /* mem input -> ipp -> dpp -> opp -> TG */ - for (i = 0; i < pool->base.pipe_count; i++) { - /* if pipe is disabled, skip instance of HW pipe, - * i.e, skip ASIC register instance - */ - if ((pipe_fuses & (1 << i)) != 0) - continue; - - pool->base.hubps[j] = dcn21_hubp_create(ctx, i); - if (pool->base.hubps[j] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create memory input!\n"); - goto create_fail; - } - - pool->base.ipps[j] = dcn21_ipp_create(ctx, i); - if (pool->base.ipps[j] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create input pixel processor!\n"); - goto create_fail; - } - - pool->base.dpps[j] = dcn21_dpp_create(ctx, i); - if (pool->base.dpps[j] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create dpps!\n"); - goto create_fail; - } - - pool->base.opps[j] = dcn21_opp_create(ctx, i); - if (pool->base.opps[j] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create output pixel processor!\n"); - goto create_fail; - } - - pool->base.timing_generators[j] = dcn21_timing_generator_create( - ctx, i); - if (pool->base.timing_generators[j] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create tg!\n"); - goto create_fail; - } - j++; - } - - for (i = 0; i < pool->base.res_cap->num_ddc; i++) { - pool->base.engines[i] = dcn21_aux_engine_create(ctx, i); - if (pool->base.engines[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC:failed to create aux engine!!\n"); - goto create_fail; - } - pool->base.hw_i2cs[i] = dcn21_i2c_hw_create(ctx, i); - if (pool->base.hw_i2cs[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC:failed to create hw i2c!!\n"); - goto create_fail; - } - pool->base.sw_i2cs[i] = NULL; - } - - pool->base.timing_generator_count = j; - pool->base.pipe_count = j; - pool->base.mpcc_count = j; - - pool->base.mpc = dcn21_mpc_create(ctx); - if (pool->base.mpc == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create mpc!\n"); - goto create_fail; - } - - pool->base.hubbub = dcn21_hubbub_create(ctx); - if (pool->base.hubbub == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create hubbub!\n"); - goto create_fail; - } - - for (i = 0; i < pool->base.res_cap->num_dsc; i++) { - pool->base.dscs[i] = dcn21_dsc_create(ctx, i); - if (pool->base.dscs[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create display stream compressor %d!\n", i); - goto create_fail; - } - } - - if (!dcn20_dwbc_create(ctx, &pool->base)) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create dwbc!\n"); - goto create_fail; - } - if (!dcn20_mmhubbub_create(ctx, &pool->base)) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create mcif_wb!\n"); - goto create_fail; - } - - if (!resource_construct(num_virtual_links, dc, &pool->base, - &res_create_funcs)) - goto create_fail; - - dcn21_hw_sequencer_construct(dc); - - dc->caps.max_planes = pool->base.pipe_count; - - for (i = 0; i < dc->caps.max_planes; ++i) - dc->caps.planes[i] = plane_cap; - - dc->cap_funcs = cap_funcs; - - return true; - -create_fail: - - dcn21_resource_destruct(pool); - - return false; -} - -struct resource_pool *dcn21_create_resource_pool( - const struct dc_init_data *init_data, - struct dc *dc) -{ - struct dcn21_resource_pool *pool = - kzalloc(sizeof(struct dcn21_resource_pool), GFP_KERNEL); - - if (!pool) - return NULL; - - if (dcn21_resource_construct(init_data->num_virtual_links, dc, pool)) - return &pool->base; - - BREAK_TO_DEBUGGER(); - kfree(pool); - return NULL; -} diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.h b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.h deleted file mode 100644 index f7ecc002c2f7..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright 2018 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef _DCN21_RESOURCE_H_ -#define _DCN21_RESOURCE_H_ - -#include "core_types.h" - -#define TO_DCN21_RES_POOL(pool)\ - container_of(pool, struct dcn21_resource_pool, base) - -struct dc; -struct resource_pool; -struct _vcs_dpi_display_pipe_params_st; - -extern struct _vcs_dpi_ip_params_st dcn2_1_ip; -extern struct _vcs_dpi_soc_bounding_box_st dcn2_1_soc; - -struct dcn21_resource_pool { - struct resource_pool base; -}; -struct resource_pool *dcn21_create_resource_pool( - const struct dc_init_data *init_data, - struct dc *dc); -bool dcn21_fast_validate_bw( - struct dc *dc, - struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int *pipe_cnt_out, - int *pipe_split_from, - int *vlevel_out, - bool fast_validate); - -#endif /* _DCN21_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile index af4d2065d2c1..9dcf06c0954d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile @@ -38,7 +38,6 @@ DCN30 := \ dcn30_dwb_cm.o \ dcn30_cm_common.o \ dcn30_mmhubbub.o \ - dcn30_resource.o \ dcn30_dio_link_encoder.o diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c deleted file mode 100644 index 7b259cb5f418..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c +++ /dev/null @@ -1,2611 +0,0 @@ -/* - * Copyright 2020 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - - -#include "dm_services.h" -#include "dc.h" - -#include "dcn30_init.h" - -#include "resource.h" -#include "include/irq_service_interface.h" -#include "dcn20/dcn20_resource.h" - -#include "dcn30_resource.h" - -#include "dcn10/dcn10_ipp.h" -#include "dcn30/dcn30_hubbub.h" -#include "dcn30/dcn30_mpc.h" -#include "dcn30/dcn30_hubp.h" -#include "irq/dcn30/irq_service_dcn30.h" -#include "dcn30/dcn30_dpp.h" -#include "dcn30/dcn30_optc.h" -#include "dcn20/dcn20_hwseq.h" -#include "dcn30/dcn30_hwseq.h" -#include "dce110/dce110_hwseq.h" -#include "dcn30/dcn30_opp.h" -#include "dcn20/dcn20_dsc.h" -#include "dcn30/dcn30_vpg.h" -#include "dcn30/dcn30_afmt.h" -#include "dcn30/dcn30_dio_stream_encoder.h" -#include "dcn30/dcn30_dio_link_encoder.h" -#include "dce/dce_clock_source.h" -#include "dce/dce_audio.h" -#include "dce/dce_hwseq.h" -#include "clk_mgr.h" -#include "virtual/virtual_stream_encoder.h" -#include "dce110/dce110_resource.h" -#include "dml/display_mode_vba.h" -#include "dcn30/dcn30_dccg.h" -#include "dcn10/dcn10_resource.h" -#include "link.h" -#include "dce/dce_panel_cntl.h" - -#include "dcn30/dcn30_dwb.h" -#include "dcn30/dcn30_mmhubbub.h" - -#include "sienna_cichlid_ip_offset.h" -#include "dcn/dcn_3_0_0_offset.h" -#include "dcn/dcn_3_0_0_sh_mask.h" - -#include "nbio/nbio_7_4_offset.h" - -#include "dpcs/dpcs_3_0_0_offset.h" -#include "dpcs/dpcs_3_0_0_sh_mask.h" - -#include "mmhub/mmhub_2_0_0_offset.h" -#include "mmhub/mmhub_2_0_0_sh_mask.h" - -#include "reg_helper.h" -#include "dce/dmub_abm.h" -#include "dce/dmub_psr.h" -#include "dce/dce_aux.h" -#include "dce/dce_i2c.h" - -#include "dml/dcn30/dcn30_fpu.h" -#include "dml/dcn30/display_mode_vba_30.h" -#include "vm_helper.h" -#include "dcn20/dcn20_vmid.h" -#include "amdgpu_socbb.h" -#include "dc_dmub_srv.h" - -#define DC_LOGGER \ - dc->ctx->logger -#define DC_LOGGER_INIT(logger) - -enum dcn30_clk_src_array_id { - DCN30_CLK_SRC_PLL0, - DCN30_CLK_SRC_PLL1, - DCN30_CLK_SRC_PLL2, - DCN30_CLK_SRC_PLL3, - DCN30_CLK_SRC_PLL4, - DCN30_CLK_SRC_PLL5, - DCN30_CLK_SRC_TOTAL -}; - -/* begin ********************* - * macros to expend register list macro defined in HW object header file - */ - -/* DCN */ -#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg - -#define BASE(seg) BASE_INNER(seg) - -#define SR(reg_name)\ - .reg_name = BASE(mm ## reg_name ## _BASE_IDX) + \ - mm ## reg_name - -#define SRI(reg_name, block, id)\ - .reg_name = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - mm ## block ## id ## _ ## reg_name - -#define SRI2(reg_name, block, id)\ - .reg_name = BASE(mm ## reg_name ## _BASE_IDX) + \ - mm ## reg_name - -#define SRIR(var_name, reg_name, block, id)\ - .var_name = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - mm ## block ## id ## _ ## reg_name - -#define SRII(reg_name, block, id)\ - .reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - mm ## block ## id ## _ ## reg_name - -#define SRII_MPC_RMU(reg_name, block, id)\ - .RMU##_##reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - mm ## block ## id ## _ ## reg_name - -#define SRII_DWB(reg_name, temp_name, block, id)\ - .reg_name[id] = BASE(mm ## block ## id ## _ ## temp_name ## _BASE_IDX) + \ - mm ## block ## id ## _ ## temp_name - -#define SF_DWB2(reg_name, block, id, field_name, post_fix) \ - .field_name = reg_name ## __ ## field_name ## post_fix - -#define DCCG_SRII(reg_name, block, id)\ - .block ## _ ## reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - mm ## block ## id ## _ ## reg_name - -#define VUPDATE_SRII(reg_name, block, id)\ - .reg_name[id] = BASE(mm ## reg_name ## _ ## block ## id ## _BASE_IDX) + \ - mm ## reg_name ## _ ## block ## id - -/* NBIO */ -#define NBIO_BASE_INNER(seg) \ - NBIO_BASE__INST0_SEG ## seg - -#define NBIO_BASE(seg) \ - NBIO_BASE_INNER(seg) - -#define NBIO_SR(reg_name)\ - .reg_name = NBIO_BASE(mm ## reg_name ## _BASE_IDX) + \ - mm ## reg_name - -/* MMHUB */ -#define MMHUB_BASE_INNER(seg) \ - MMHUB_BASE__INST0_SEG ## seg - -#define MMHUB_BASE(seg) \ - MMHUB_BASE_INNER(seg) - -#define MMHUB_SR(reg_name)\ - .reg_name = MMHUB_BASE(mmMM ## reg_name ## _BASE_IDX) + \ - mmMM ## reg_name - -/* CLOCK */ -#define CLK_BASE_INNER(seg) \ - CLK_BASE__INST0_SEG ## seg - -#define CLK_BASE(seg) \ - CLK_BASE_INNER(seg) - -#define CLK_SRI(reg_name, block, inst)\ - .reg_name = CLK_BASE(mm ## block ## _ ## inst ## _ ## reg_name ## _BASE_IDX) + \ - mm ## block ## _ ## inst ## _ ## reg_name - - -static const struct bios_registers bios_regs = { - NBIO_SR(BIOS_SCRATCH_3), - NBIO_SR(BIOS_SCRATCH_6) -}; - -#define clk_src_regs(index, pllid)\ -[index] = {\ - CS_COMMON_REG_LIST_DCN2_0(index, pllid),\ -} - -static const struct dce110_clk_src_regs clk_src_regs[] = { - clk_src_regs(0, A), - clk_src_regs(1, B), - clk_src_regs(2, C), - clk_src_regs(3, D), - clk_src_regs(4, E), - clk_src_regs(5, F) -}; - -static const struct dce110_clk_src_shift cs_shift = { - CS_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT) -}; - -static const struct dce110_clk_src_mask cs_mask = { - CS_COMMON_MASK_SH_LIST_DCN2_0(_MASK) -}; - -#define abm_regs(id)\ -[id] = {\ - ABM_DCN30_REG_LIST(id)\ -} - -static const struct dce_abm_registers abm_regs[] = { - abm_regs(0), - abm_regs(1), - abm_regs(2), - abm_regs(3), - abm_regs(4), - abm_regs(5), -}; - -static const struct dce_abm_shift abm_shift = { - ABM_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dce_abm_mask abm_mask = { - ABM_MASK_SH_LIST_DCN30(_MASK) -}; - - - -#define audio_regs(id)\ -[id] = {\ - AUD_COMMON_REG_LIST(id)\ -} - -static const struct dce_audio_registers audio_regs[] = { - audio_regs(0), - audio_regs(1), - audio_regs(2), - audio_regs(3), - audio_regs(4), - audio_regs(5), - audio_regs(6) -}; - -#define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\ - SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_INDEX, AZALIA_ENDPOINT_REG_INDEX, mask_sh),\ - SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_DATA, AZALIA_ENDPOINT_REG_DATA, mask_sh),\ - AUD_COMMON_MASK_SH_LIST_BASE(mask_sh) - -static const struct dce_audio_shift audio_shift = { - DCE120_AUD_COMMON_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_audio_mask audio_mask = { - DCE120_AUD_COMMON_MASK_SH_LIST(_MASK) -}; - -#define vpg_regs(id)\ -[id] = {\ - VPG_DCN3_REG_LIST(id)\ -} - -static const struct dcn30_vpg_registers vpg_regs[] = { - vpg_regs(0), - vpg_regs(1), - vpg_regs(2), - vpg_regs(3), - vpg_regs(4), - vpg_regs(5), - vpg_regs(6), -}; - -static const struct dcn30_vpg_shift vpg_shift = { - DCN3_VPG_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn30_vpg_mask vpg_mask = { - DCN3_VPG_MASK_SH_LIST(_MASK) -}; - -#define afmt_regs(id)\ -[id] = {\ - AFMT_DCN3_REG_LIST(id)\ -} - -static const struct dcn30_afmt_registers afmt_regs[] = { - afmt_regs(0), - afmt_regs(1), - afmt_regs(2), - afmt_regs(3), - afmt_regs(4), - afmt_regs(5), - afmt_regs(6), -}; - -static const struct dcn30_afmt_shift afmt_shift = { - DCN3_AFMT_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn30_afmt_mask afmt_mask = { - DCN3_AFMT_MASK_SH_LIST(_MASK) -}; - -#define stream_enc_regs(id)\ -[id] = {\ - SE_DCN3_REG_LIST(id)\ -} - -static const struct dcn10_stream_enc_registers stream_enc_regs[] = { - stream_enc_regs(0), - stream_enc_regs(1), - stream_enc_regs(2), - stream_enc_regs(3), - stream_enc_regs(4), - stream_enc_regs(5) -}; - -static const struct dcn10_stream_encoder_shift se_shift = { - SE_COMMON_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dcn10_stream_encoder_mask se_mask = { - SE_COMMON_MASK_SH_LIST_DCN30(_MASK) -}; - - -#define aux_regs(id)\ -[id] = {\ - DCN2_AUX_REG_LIST(id)\ -} - -static const struct dcn10_link_enc_aux_registers link_enc_aux_regs[] = { - aux_regs(0), - aux_regs(1), - aux_regs(2), - aux_regs(3), - aux_regs(4), - aux_regs(5) -}; - -#define hpd_regs(id)\ -[id] = {\ - HPD_REG_LIST(id)\ -} - -static const struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[] = { - hpd_regs(0), - hpd_regs(1), - hpd_regs(2), - hpd_regs(3), - hpd_regs(4), - hpd_regs(5) -}; - -#define link_regs(id, phyid)\ -[id] = {\ - LE_DCN3_REG_LIST(id), \ - UNIPHY_DCN2_REG_LIST(phyid), \ - DPCS_DCN2_REG_LIST(id), \ - SRI(DP_DPHY_INTERNAL_CTRL, DP, id) \ -} - -static const struct dce110_aux_registers_shift aux_shift = { - DCN_AUX_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce110_aux_registers_mask aux_mask = { - DCN_AUX_MASK_SH_LIST(_MASK) -}; - -static const struct dcn10_link_enc_registers link_enc_regs[] = { - link_regs(0, A), - link_regs(1, B), - link_regs(2, C), - link_regs(3, D), - link_regs(4, E), - link_regs(5, F) -}; - -static const struct dcn10_link_enc_shift le_shift = { - LINK_ENCODER_MASK_SH_LIST_DCN30(__SHIFT),\ - DPCS_DCN2_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn10_link_enc_mask le_mask = { - LINK_ENCODER_MASK_SH_LIST_DCN30(_MASK),\ - DPCS_DCN2_MASK_SH_LIST(_MASK) -}; - - -static const struct dce_panel_cntl_registers panel_cntl_regs[] = { - { DCN_PANEL_CNTL_REG_LIST() } -}; - -static const struct dce_panel_cntl_shift panel_cntl_shift = { - DCE_PANEL_CNTL_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_panel_cntl_mask panel_cntl_mask = { - DCE_PANEL_CNTL_MASK_SH_LIST(_MASK) -}; - -#define dpp_regs(id)\ -[id] = {\ - DPP_REG_LIST_DCN30(id),\ -} - -static const struct dcn3_dpp_registers dpp_regs[] = { - dpp_regs(0), - dpp_regs(1), - dpp_regs(2), - dpp_regs(3), - dpp_regs(4), - dpp_regs(5), -}; - -static const struct dcn3_dpp_shift tf_shift = { - DPP_REG_LIST_SH_MASK_DCN30(__SHIFT) -}; - -static const struct dcn3_dpp_mask tf_mask = { - DPP_REG_LIST_SH_MASK_DCN30(_MASK) -}; - -#define opp_regs(id)\ -[id] = {\ - OPP_REG_LIST_DCN30(id),\ -} - -static const struct dcn20_opp_registers opp_regs[] = { - opp_regs(0), - opp_regs(1), - opp_regs(2), - opp_regs(3), - opp_regs(4), - opp_regs(5) -}; - -static const struct dcn20_opp_shift opp_shift = { - OPP_MASK_SH_LIST_DCN20(__SHIFT) -}; - -static const struct dcn20_opp_mask opp_mask = { - OPP_MASK_SH_LIST_DCN20(_MASK) -}; - -#define aux_engine_regs(id)\ -[id] = {\ - AUX_COMMON_REG_LIST0(id), \ - .AUXN_IMPCAL = 0, \ - .AUXP_IMPCAL = 0, \ - .AUX_RESET_MASK = DP_AUX0_AUX_CONTROL__AUX_RESET_MASK, \ -} - -static const struct dce110_aux_registers aux_engine_regs[] = { - aux_engine_regs(0), - aux_engine_regs(1), - aux_engine_regs(2), - aux_engine_regs(3), - aux_engine_regs(4), - aux_engine_regs(5) -}; - -#define dwbc_regs_dcn3(id)\ -[id] = {\ - DWBC_COMMON_REG_LIST_DCN30(id),\ -} - -static const struct dcn30_dwbc_registers dwbc30_regs[] = { - dwbc_regs_dcn3(0), -}; - -static const struct dcn30_dwbc_shift dwbc30_shift = { - DWBC_COMMON_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dcn30_dwbc_mask dwbc30_mask = { - DWBC_COMMON_MASK_SH_LIST_DCN30(_MASK) -}; - -#define mcif_wb_regs_dcn3(id)\ -[id] = {\ - MCIF_WB_COMMON_REG_LIST_DCN30(id),\ -} - -static const struct dcn30_mmhubbub_registers mcif_wb30_regs[] = { - mcif_wb_regs_dcn3(0) -}; - -static const struct dcn30_mmhubbub_shift mcif_wb30_shift = { - MCIF_WB_COMMON_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dcn30_mmhubbub_mask mcif_wb30_mask = { - MCIF_WB_COMMON_MASK_SH_LIST_DCN30(_MASK) -}; - -#define dsc_regsDCN20(id)\ -[id] = {\ - DSC_REG_LIST_DCN20(id)\ -} - -static const struct dcn20_dsc_registers dsc_regs[] = { - dsc_regsDCN20(0), - dsc_regsDCN20(1), - dsc_regsDCN20(2), - dsc_regsDCN20(3), - dsc_regsDCN20(4), - dsc_regsDCN20(5) -}; - -static const struct dcn20_dsc_shift dsc_shift = { - DSC_REG_LIST_SH_MASK_DCN20(__SHIFT) -}; - -static const struct dcn20_dsc_mask dsc_mask = { - DSC_REG_LIST_SH_MASK_DCN20(_MASK) -}; - -static const struct dcn30_mpc_registers mpc_regs = { - MPC_REG_LIST_DCN3_0(0), - MPC_REG_LIST_DCN3_0(1), - MPC_REG_LIST_DCN3_0(2), - MPC_REG_LIST_DCN3_0(3), - MPC_REG_LIST_DCN3_0(4), - MPC_REG_LIST_DCN3_0(5), - MPC_OUT_MUX_REG_LIST_DCN3_0(0), - MPC_OUT_MUX_REG_LIST_DCN3_0(1), - MPC_OUT_MUX_REG_LIST_DCN3_0(2), - MPC_OUT_MUX_REG_LIST_DCN3_0(3), - MPC_OUT_MUX_REG_LIST_DCN3_0(4), - MPC_OUT_MUX_REG_LIST_DCN3_0(5), - MPC_RMU_GLOBAL_REG_LIST_DCN3AG, - MPC_RMU_REG_LIST_DCN3AG(0), - MPC_RMU_REG_LIST_DCN3AG(1), - MPC_RMU_REG_LIST_DCN3AG(2), - MPC_DWB_MUX_REG_LIST_DCN3_0(0), -}; - -static const struct dcn30_mpc_shift mpc_shift = { - MPC_COMMON_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dcn30_mpc_mask mpc_mask = { - MPC_COMMON_MASK_SH_LIST_DCN30(_MASK) -}; - -#define optc_regs(id)\ -[id] = {OPTC_COMMON_REG_LIST_DCN3_0(id)} - - -static const struct dcn_optc_registers optc_regs[] = { - optc_regs(0), - optc_regs(1), - optc_regs(2), - optc_regs(3), - optc_regs(4), - optc_regs(5) -}; - -static const struct dcn_optc_shift optc_shift = { - OPTC_COMMON_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dcn_optc_mask optc_mask = { - OPTC_COMMON_MASK_SH_LIST_DCN30(_MASK) -}; - -#define hubp_regs(id)\ -[id] = {\ - HUBP_REG_LIST_DCN30(id)\ -} - -static const struct dcn_hubp2_registers hubp_regs[] = { - hubp_regs(0), - hubp_regs(1), - hubp_regs(2), - hubp_regs(3), - hubp_regs(4), - hubp_regs(5) -}; - -static const struct dcn_hubp2_shift hubp_shift = { - HUBP_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dcn_hubp2_mask hubp_mask = { - HUBP_MASK_SH_LIST_DCN30(_MASK) -}; - -static const struct dcn_hubbub_registers hubbub_reg = { - HUBBUB_REG_LIST_DCN30(0) -}; - -static const struct dcn_hubbub_shift hubbub_shift = { - HUBBUB_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dcn_hubbub_mask hubbub_mask = { - HUBBUB_MASK_SH_LIST_DCN30(_MASK) -}; - -static const struct dccg_registers dccg_regs = { - DCCG_REG_LIST_DCN30() -}; - -static const struct dccg_shift dccg_shift = { - DCCG_MASK_SH_LIST_DCN3(__SHIFT) -}; - -static const struct dccg_mask dccg_mask = { - DCCG_MASK_SH_LIST_DCN3(_MASK) -}; - -static const struct dce_hwseq_registers hwseq_reg = { - HWSEQ_DCN30_REG_LIST() -}; - -static const struct dce_hwseq_shift hwseq_shift = { - HWSEQ_DCN30_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_hwseq_mask hwseq_mask = { - HWSEQ_DCN30_MASK_SH_LIST(_MASK) -}; -#define vmid_regs(id)\ -[id] = {\ - DCN20_VMID_REG_LIST(id)\ -} - -static const struct dcn_vmid_registers vmid_regs[] = { - vmid_regs(0), - vmid_regs(1), - vmid_regs(2), - vmid_regs(3), - vmid_regs(4), - vmid_regs(5), - vmid_regs(6), - vmid_regs(7), - vmid_regs(8), - vmid_regs(9), - vmid_regs(10), - vmid_regs(11), - vmid_regs(12), - vmid_regs(13), - vmid_regs(14), - vmid_regs(15) -}; - -static const struct dcn20_vmid_shift vmid_shifts = { - DCN20_VMID_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn20_vmid_mask vmid_masks = { - DCN20_VMID_MASK_SH_LIST(_MASK) -}; - -static const struct resource_caps res_cap_dcn3 = { - .num_timing_generator = 6, - .num_opp = 6, - .num_video_plane = 6, - .num_audio = 6, - .num_stream_encoder = 6, - .num_pll = 6, - .num_dwb = 1, - .num_ddc = 6, - .num_vmid = 16, - .num_mpc_3dlut = 3, - .num_dsc = 6, -}; - -static const struct dc_plane_cap plane_cap = { - .type = DC_PLANE_TYPE_DCN_UNIVERSAL, - .per_pixel_alpha = true, - - .pixel_format_support = { - .argb8888 = true, - .nv12 = true, - .fp16 = true, - .p010 = true, - .ayuv = false, - }, - - .max_upscale_factor = { - .argb8888 = 16000, - .nv12 = 16000, - .fp16 = 16000 - }, - - /* 6:1 downscaling ratio: 1000/6 = 166.666 */ - .max_downscale_factor = { - .argb8888 = 167, - .nv12 = 167, - .fp16 = 167 - }, - 16, - 16 -}; - -static const struct dc_debug_options debug_defaults_drv = { - .disable_dmcu = true, //No DMCU on DCN30 - .force_abm_enable = false, - .timing_trace = false, - .clock_trace = true, - .disable_pplib_clock_request = true, - .pipe_split_policy = MPC_SPLIT_DYNAMIC, - .force_single_disp_pipe_split = false, - .disable_dcc = DCC_ENABLE, - .vsr_support = true, - .performance_trace = false, - .max_downscale_src_width = 7680,/*upto 8K*/ - .disable_pplib_wm_range = false, - .scl_reset_length10 = true, - .sanity_checks = false, - .underflow_assert_delay_us = 0xFFFFFFFF, - .dwb_fi_phase = -1, // -1 = disable, - .dmub_command_table = true, - .use_max_lb = true, - .exit_idle_opt_for_cursor_updates = true, - .enable_legacy_fast_update = false, - .using_dml2 = false, -}; - -static const struct dc_panel_config panel_config_defaults = { - .psr = { - .disable_psr = false, - .disallow_psrsu = false, - .disallow_replay = false, - }, -}; - -static void dcn30_dpp_destroy(struct dpp **dpp) -{ - kfree(TO_DCN20_DPP(*dpp)); - *dpp = NULL; -} - -static struct dpp *dcn30_dpp_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dcn3_dpp *dpp = - kzalloc(sizeof(struct dcn3_dpp), GFP_KERNEL); - - if (!dpp) - return NULL; - - if (dpp3_construct(dpp, ctx, inst, - &dpp_regs[inst], &tf_shift, &tf_mask)) - return &dpp->base; - - BREAK_TO_DEBUGGER(); - kfree(dpp); - return NULL; -} - -static struct output_pixel_processor *dcn30_opp_create( - struct dc_context *ctx, uint32_t inst) -{ - struct dcn20_opp *opp = - kzalloc(sizeof(struct dcn20_opp), GFP_KERNEL); - - if (!opp) { - BREAK_TO_DEBUGGER(); - return NULL; - } - - dcn20_opp_construct(opp, ctx, inst, - &opp_regs[inst], &opp_shift, &opp_mask); - return &opp->base; -} - -static struct dce_aux *dcn30_aux_engine_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct aux_engine_dce110 *aux_engine = - kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL); - - if (!aux_engine) - return NULL; - - dce110_aux_engine_construct(aux_engine, ctx, inst, - SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, - &aux_engine_regs[inst], - &aux_mask, - &aux_shift, - ctx->dc->caps.extended_aux_timeout_support); - - return &aux_engine->base; -} - -#define i2c_inst_regs(id) { I2C_HW_ENGINE_COMMON_REG_LIST_DCN30(id) } - -static const struct dce_i2c_registers i2c_hw_regs[] = { - i2c_inst_regs(1), - i2c_inst_regs(2), - i2c_inst_regs(3), - i2c_inst_regs(4), - i2c_inst_regs(5), - i2c_inst_regs(6), -}; - -static const struct dce_i2c_shift i2c_shifts = { - I2C_COMMON_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dce_i2c_mask i2c_masks = { - I2C_COMMON_MASK_SH_LIST_DCN30(_MASK) -}; - -static struct dce_i2c_hw *dcn30_i2c_hw_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dce_i2c_hw *dce_i2c_hw = - kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL); - - if (!dce_i2c_hw) - return NULL; - - dcn2_i2c_hw_construct(dce_i2c_hw, ctx, inst, - &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks); - - return dce_i2c_hw; -} - -static struct mpc *dcn30_mpc_create( - struct dc_context *ctx, - int num_mpcc, - int num_rmu) -{ - struct dcn30_mpc *mpc30 = kzalloc(sizeof(struct dcn30_mpc), - GFP_KERNEL); - - if (!mpc30) - return NULL; - - dcn30_mpc_construct(mpc30, ctx, - &mpc_regs, - &mpc_shift, - &mpc_mask, - num_mpcc, - num_rmu); - - return &mpc30->base; -} - -static struct hubbub *dcn30_hubbub_create(struct dc_context *ctx) -{ - int i; - - struct dcn20_hubbub *hubbub3 = kzalloc(sizeof(struct dcn20_hubbub), - GFP_KERNEL); - - if (!hubbub3) - return NULL; - - hubbub3_construct(hubbub3, ctx, - &hubbub_reg, - &hubbub_shift, - &hubbub_mask); - - - for (i = 0; i < res_cap_dcn3.num_vmid; i++) { - struct dcn20_vmid *vmid = &hubbub3->vmid[i]; - - vmid->ctx = ctx; - - vmid->regs = &vmid_regs[i]; - vmid->shifts = &vmid_shifts; - vmid->masks = &vmid_masks; - } - - return &hubbub3->base; -} - -static struct timing_generator *dcn30_timing_generator_create( - struct dc_context *ctx, - uint32_t instance) -{ - struct optc *tgn10 = - kzalloc(sizeof(struct optc), GFP_KERNEL); - - if (!tgn10) - return NULL; - - tgn10->base.inst = instance; - tgn10->base.ctx = ctx; - - tgn10->tg_regs = &optc_regs[instance]; - tgn10->tg_shift = &optc_shift; - tgn10->tg_mask = &optc_mask; - - dcn30_timing_generator_init(tgn10); - - return &tgn10->base; -} - -static const struct encoder_feature_support link_enc_feature = { - .max_hdmi_deep_color = COLOR_DEPTH_121212, - .max_hdmi_pixel_clock = 600000, - .hdmi_ycbcr420_supported = true, - .dp_ycbcr420_supported = true, - .fec_supported = true, - .flags.bits.IS_HBR2_CAPABLE = true, - .flags.bits.IS_HBR3_CAPABLE = true, - .flags.bits.IS_TPS3_CAPABLE = true, - .flags.bits.IS_TPS4_CAPABLE = true -}; - -static struct link_encoder *dcn30_link_encoder_create( - struct dc_context *ctx, - const struct encoder_init_data *enc_init_data) -{ - struct dcn20_link_encoder *enc20 = - kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL); - - if (!enc20) - return NULL; - - dcn30_link_encoder_construct(enc20, - enc_init_data, - &link_enc_feature, - &link_enc_regs[enc_init_data->transmitter], - &link_enc_aux_regs[enc_init_data->channel - 1], - &link_enc_hpd_regs[enc_init_data->hpd_source], - &le_shift, - &le_mask); - - return &enc20->enc10.base; -} - -static struct panel_cntl *dcn30_panel_cntl_create(const struct panel_cntl_init_data *init_data) -{ - struct dce_panel_cntl *panel_cntl = - kzalloc(sizeof(struct dce_panel_cntl), GFP_KERNEL); - - if (!panel_cntl) - return NULL; - - dce_panel_cntl_construct(panel_cntl, - init_data, - &panel_cntl_regs[init_data->inst], - &panel_cntl_shift, - &panel_cntl_mask); - - return &panel_cntl->base; -} - -static void read_dce_straps( - struct dc_context *ctx, - struct resource_straps *straps) -{ - generic_reg_get(ctx, mmDC_PINSTRAPS + BASE(mmDC_PINSTRAPS_BASE_IDX), - FN(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO), &straps->dc_pinstraps_audio); - -} - -static struct audio *dcn30_create_audio( - struct dc_context *ctx, unsigned int inst) -{ - return dce_audio_create(ctx, inst, - &audio_regs[inst], &audio_shift, &audio_mask); -} - -static struct vpg *dcn30_vpg_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dcn30_vpg *vpg3 = kzalloc(sizeof(struct dcn30_vpg), GFP_KERNEL); - - if (!vpg3) - return NULL; - - vpg3_construct(vpg3, ctx, inst, - &vpg_regs[inst], - &vpg_shift, - &vpg_mask); - - return &vpg3->base; -} - -static struct afmt *dcn30_afmt_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dcn30_afmt *afmt3 = kzalloc(sizeof(struct dcn30_afmt), GFP_KERNEL); - - if (!afmt3) - return NULL; - - afmt3_construct(afmt3, ctx, inst, - &afmt_regs[inst], - &afmt_shift, - &afmt_mask); - - return &afmt3->base; -} - -static struct stream_encoder *dcn30_stream_encoder_create(enum engine_id eng_id, - struct dc_context *ctx) -{ - struct dcn10_stream_encoder *enc1; - struct vpg *vpg; - struct afmt *afmt; - int vpg_inst; - int afmt_inst; - - /* Mapping of VPG, AFMT, DME register blocks to DIO block instance */ - if (eng_id <= ENGINE_ID_DIGF) { - vpg_inst = eng_id; - afmt_inst = eng_id; - } else - return NULL; - - enc1 = kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL); - vpg = dcn30_vpg_create(ctx, vpg_inst); - afmt = dcn30_afmt_create(ctx, afmt_inst); - - if (!enc1 || !vpg || !afmt) { - kfree(enc1); - kfree(vpg); - kfree(afmt); - return NULL; - } - - dcn30_dio_stream_encoder_construct(enc1, ctx, ctx->dc_bios, - eng_id, vpg, afmt, - &stream_enc_regs[eng_id], - &se_shift, &se_mask); - - return &enc1->base; -} - -static struct dce_hwseq *dcn30_hwseq_create(struct dc_context *ctx) -{ - struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL); - - if (hws) { - hws->ctx = ctx; - hws->regs = &hwseq_reg; - hws->shifts = &hwseq_shift; - hws->masks = &hwseq_mask; - } - return hws; -} -static const struct resource_create_funcs res_create_funcs = { - .read_dce_straps = read_dce_straps, - .create_audio = dcn30_create_audio, - .create_stream_encoder = dcn30_stream_encoder_create, - .create_hwseq = dcn30_hwseq_create, -}; - -static void dcn30_resource_destruct(struct dcn30_resource_pool *pool) -{ - unsigned int i; - - for (i = 0; i < pool->base.stream_enc_count; i++) { - if (pool->base.stream_enc[i] != NULL) { - if (pool->base.stream_enc[i]->vpg != NULL) { - kfree(DCN30_VPG_FROM_VPG(pool->base.stream_enc[i]->vpg)); - pool->base.stream_enc[i]->vpg = NULL; - } - if (pool->base.stream_enc[i]->afmt != NULL) { - kfree(DCN30_AFMT_FROM_AFMT(pool->base.stream_enc[i]->afmt)); - pool->base.stream_enc[i]->afmt = NULL; - } - kfree(DCN10STRENC_FROM_STRENC(pool->base.stream_enc[i])); - pool->base.stream_enc[i] = NULL; - } - } - - for (i = 0; i < pool->base.res_cap->num_dsc; i++) { - if (pool->base.dscs[i] != NULL) - dcn20_dsc_destroy(&pool->base.dscs[i]); - } - - if (pool->base.mpc != NULL) { - kfree(TO_DCN20_MPC(pool->base.mpc)); - pool->base.mpc = NULL; - } - if (pool->base.hubbub != NULL) { - kfree(pool->base.hubbub); - pool->base.hubbub = NULL; - } - for (i = 0; i < pool->base.pipe_count; i++) { - if (pool->base.dpps[i] != NULL) - dcn30_dpp_destroy(&pool->base.dpps[i]); - - if (pool->base.ipps[i] != NULL) - pool->base.ipps[i]->funcs->ipp_destroy(&pool->base.ipps[i]); - - if (pool->base.hubps[i] != NULL) { - kfree(TO_DCN20_HUBP(pool->base.hubps[i])); - pool->base.hubps[i] = NULL; - } - - if (pool->base.irqs != NULL) { - dal_irq_service_destroy(&pool->base.irqs); - } - } - - for (i = 0; i < pool->base.res_cap->num_ddc; i++) { - if (pool->base.engines[i] != NULL) - dce110_engine_destroy(&pool->base.engines[i]); - if (pool->base.hw_i2cs[i] != NULL) { - kfree(pool->base.hw_i2cs[i]); - pool->base.hw_i2cs[i] = NULL; - } - if (pool->base.sw_i2cs[i] != NULL) { - kfree(pool->base.sw_i2cs[i]); - pool->base.sw_i2cs[i] = NULL; - } - } - - for (i = 0; i < pool->base.res_cap->num_opp; i++) { - if (pool->base.opps[i] != NULL) - pool->base.opps[i]->funcs->opp_destroy(&pool->base.opps[i]); - } - - for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { - if (pool->base.timing_generators[i] != NULL) { - kfree(DCN10TG_FROM_TG(pool->base.timing_generators[i])); - pool->base.timing_generators[i] = NULL; - } - } - - for (i = 0; i < pool->base.res_cap->num_dwb; i++) { - if (pool->base.dwbc[i] != NULL) { - kfree(TO_DCN30_DWBC(pool->base.dwbc[i])); - pool->base.dwbc[i] = NULL; - } - if (pool->base.mcif_wb[i] != NULL) { - kfree(TO_DCN30_MMHUBBUB(pool->base.mcif_wb[i])); - pool->base.mcif_wb[i] = NULL; - } - } - - for (i = 0; i < pool->base.audio_count; i++) { - if (pool->base.audios[i]) - dce_aud_destroy(&pool->base.audios[i]); - } - - for (i = 0; i < pool->base.clk_src_count; i++) { - if (pool->base.clock_sources[i] != NULL) { - dcn20_clock_source_destroy(&pool->base.clock_sources[i]); - pool->base.clock_sources[i] = NULL; - } - } - - for (i = 0; i < pool->base.res_cap->num_mpc_3dlut; i++) { - if (pool->base.mpc_lut[i] != NULL) { - dc_3dlut_func_release(pool->base.mpc_lut[i]); - pool->base.mpc_lut[i] = NULL; - } - if (pool->base.mpc_shaper[i] != NULL) { - dc_transfer_func_release(pool->base.mpc_shaper[i]); - pool->base.mpc_shaper[i] = NULL; - } - } - - if (pool->base.dp_clock_source != NULL) { - dcn20_clock_source_destroy(&pool->base.dp_clock_source); - pool->base.dp_clock_source = NULL; - } - - for (i = 0; i < pool->base.pipe_count; i++) { - if (pool->base.multiple_abms[i] != NULL) - dce_abm_destroy(&pool->base.multiple_abms[i]); - } - - if (pool->base.psr != NULL) - dmub_psr_destroy(&pool->base.psr); - - if (pool->base.dccg != NULL) - dcn_dccg_destroy(&pool->base.dccg); - - if (pool->base.oem_device != NULL) { - struct dc *dc = pool->base.oem_device->ctx->dc; - - dc->link_srv->destroy_ddc_service(&pool->base.oem_device); - } -} - -static struct hubp *dcn30_hubp_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dcn20_hubp *hubp2 = - kzalloc(sizeof(struct dcn20_hubp), GFP_KERNEL); - - if (!hubp2) - return NULL; - - if (hubp3_construct(hubp2, ctx, inst, - &hubp_regs[inst], &hubp_shift, &hubp_mask)) - return &hubp2->base; - - BREAK_TO_DEBUGGER(); - kfree(hubp2); - return NULL; -} - -static bool dcn30_dwbc_create(struct dc_context *ctx, struct resource_pool *pool) -{ - int i; - uint32_t pipe_count = pool->res_cap->num_dwb; - - for (i = 0; i < pipe_count; i++) { - struct dcn30_dwbc *dwbc30 = kzalloc(sizeof(struct dcn30_dwbc), - GFP_KERNEL); - - if (!dwbc30) { - dm_error("DC: failed to create dwbc30!\n"); - return false; - } - - dcn30_dwbc_construct(dwbc30, ctx, - &dwbc30_regs[i], - &dwbc30_shift, - &dwbc30_mask, - i); - - pool->dwbc[i] = &dwbc30->base; - } - return true; -} - -static bool dcn30_mmhubbub_create(struct dc_context *ctx, struct resource_pool *pool) -{ - int i; - uint32_t pipe_count = pool->res_cap->num_dwb; - - for (i = 0; i < pipe_count; i++) { - struct dcn30_mmhubbub *mcif_wb30 = kzalloc(sizeof(struct dcn30_mmhubbub), - GFP_KERNEL); - - if (!mcif_wb30) { - dm_error("DC: failed to create mcif_wb30!\n"); - return false; - } - - dcn30_mmhubbub_construct(mcif_wb30, ctx, - &mcif_wb30_regs[i], - &mcif_wb30_shift, - &mcif_wb30_mask, - i); - - pool->mcif_wb[i] = &mcif_wb30->base; - } - return true; -} - -static struct display_stream_compressor *dcn30_dsc_create( - struct dc_context *ctx, uint32_t inst) -{ - struct dcn20_dsc *dsc = - kzalloc(sizeof(struct dcn20_dsc), GFP_KERNEL); - - if (!dsc) { - BREAK_TO_DEBUGGER(); - return NULL; - } - - dsc2_construct(dsc, ctx, inst, &dsc_regs[inst], &dsc_shift, &dsc_mask); - return &dsc->base; -} - -enum dc_status dcn30_add_stream_to_ctx(struct dc *dc, struct dc_state *new_ctx, struct dc_stream_state *dc_stream) -{ - - return dcn20_add_stream_to_ctx(dc, new_ctx, dc_stream); -} - -static void dcn30_destroy_resource_pool(struct resource_pool **pool) -{ - struct dcn30_resource_pool *dcn30_pool = TO_DCN30_RES_POOL(*pool); - - dcn30_resource_destruct(dcn30_pool); - kfree(dcn30_pool); - *pool = NULL; -} - -static struct clock_source *dcn30_clock_source_create( - struct dc_context *ctx, - struct dc_bios *bios, - enum clock_source_id id, - const struct dce110_clk_src_regs *regs, - bool dp_clk_src) -{ - struct dce110_clk_src *clk_src = - kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL); - - if (!clk_src) - return NULL; - - if (dcn3_clk_src_construct(clk_src, ctx, bios, id, - regs, &cs_shift, &cs_mask)) { - clk_src->base.dp_clk_src = dp_clk_src; - return &clk_src->base; - } - - kfree(clk_src); - BREAK_TO_DEBUGGER(); - return NULL; -} - -int dcn30_populate_dml_pipes_from_context( - struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes, - bool fast_validate) -{ - int i, pipe_cnt; - struct resource_context *res_ctx = &context->res_ctx; - - DC_FP_START(); - dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); - DC_FP_END(); - - for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { - if (!res_ctx->pipe_ctx[i].stream) - continue; - - pipes[pipe_cnt++].pipe.scale_ratio_depth.lb_depth = - dm_lb_16; - } - - return pipe_cnt; -} - -void dcn30_populate_dml_writeback_from_context( - struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes) -{ - DC_FP_START(); - dcn30_fpu_populate_dml_writeback_from_context(dc, res_ctx, pipes); - DC_FP_END(); -} - -unsigned int dcn30_calc_max_scaled_time( - unsigned int time_per_pixel, - enum mmhubbub_wbif_mode mode, - unsigned int urgent_watermark) -{ - unsigned int time_per_byte = 0; - unsigned int total_free_entry = 0xb40; - unsigned int buf_lh_capability; - unsigned int max_scaled_time; - - if (mode == PACKED_444) /* packed mode 32 bpp */ - time_per_byte = time_per_pixel/4; - else if (mode == PACKED_444_FP16) /* packed mode 64 bpp */ - time_per_byte = time_per_pixel/8; - - if (time_per_byte == 0) - time_per_byte = 1; - - buf_lh_capability = (total_free_entry*time_per_byte*32) >> 6; /* time_per_byte is in u6.6*/ - max_scaled_time = buf_lh_capability - urgent_watermark; - return max_scaled_time; -} - -void dcn30_set_mcif_arb_params( - struct dc *dc, - struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int pipe_cnt) -{ - enum mmhubbub_wbif_mode wbif_mode; - struct display_mode_lib *dml = &context->bw_ctx.dml; - struct mcif_arb_params *wb_arb_params; - int i, j, dwb_pipe; - - /* Writeback MCIF_WB arbitration parameters */ - dwb_pipe = 0; - for (i = 0; i < dc->res_pool->pipe_count; i++) { - - if (!context->res_ctx.pipe_ctx[i].stream) - continue; - - for (j = 0; j < MAX_DWB_PIPES; j++) { - struct dc_writeback_info *writeback_info = &context->res_ctx.pipe_ctx[i].stream->writeback_info[j]; - - if (writeback_info->wb_enabled == false) - continue; - - //wb_arb_params = &context->res_ctx.pipe_ctx[i].stream->writeback_info[j].mcif_arb_params; - wb_arb_params = &context->bw_ctx.bw.dcn.bw_writeback.mcif_wb_arb[dwb_pipe]; - - if (writeback_info->dwb_params.cnv_params.fc_out_format == DWB_OUT_FORMAT_64BPP_ARGB || - writeback_info->dwb_params.cnv_params.fc_out_format == DWB_OUT_FORMAT_64BPP_RGBA) - wbif_mode = PACKED_444_FP16; - else - wbif_mode = PACKED_444; - - DC_FP_START(); - dcn30_fpu_set_mcif_arb_params(wb_arb_params, dml, pipes, pipe_cnt, j); - DC_FP_END(); - wb_arb_params->time_per_pixel = (1000000 << 6) / context->res_ctx.pipe_ctx[i].stream->phy_pix_clk; /* time_per_pixel should be in u6.6 format */ - wb_arb_params->slice_lines = 32; - wb_arb_params->arbitration_slice = 2; /* irrelevant since there is no YUV output */ - wb_arb_params->max_scaled_time = dcn30_calc_max_scaled_time(wb_arb_params->time_per_pixel, - wbif_mode, - wb_arb_params->cli_watermark[0]); /* assume 4 watermark sets have the same value */ - - dwb_pipe++; - - if (dwb_pipe >= MAX_DWB_PIPES) - return; - } - if (dwb_pipe >= MAX_DWB_PIPES) - return; - } - -} - -static struct dc_cap_funcs cap_funcs = { - .get_dcc_compression_cap = dcn20_get_dcc_compression_cap -}; - -bool dcn30_acquire_post_bldn_3dlut( - struct resource_context *res_ctx, - const struct resource_pool *pool, - int mpcc_id, - struct dc_3dlut **lut, - struct dc_transfer_func **shaper) -{ - int i; - bool ret = false; - union dc_3dlut_state *state; - - ASSERT(*lut == NULL && *shaper == NULL); - *lut = NULL; - *shaper = NULL; - - for (i = 0; i < pool->res_cap->num_mpc_3dlut; i++) { - if (!res_ctx->is_mpc_3dlut_acquired[i]) { - *lut = pool->mpc_lut[i]; - *shaper = pool->mpc_shaper[i]; - state = &pool->mpc_lut[i]->state; - res_ctx->is_mpc_3dlut_acquired[i] = true; - state->bits.rmu_idx_valid = 1; - state->bits.rmu_mux_num = i; - if (state->bits.rmu_mux_num == 0) - state->bits.mpc_rmu0_mux = mpcc_id; - else if (state->bits.rmu_mux_num == 1) - state->bits.mpc_rmu1_mux = mpcc_id; - else if (state->bits.rmu_mux_num == 2) - state->bits.mpc_rmu2_mux = mpcc_id; - ret = true; - break; - } - } - return ret; -} - -bool dcn30_release_post_bldn_3dlut( - struct resource_context *res_ctx, - const struct resource_pool *pool, - struct dc_3dlut **lut, - struct dc_transfer_func **shaper) -{ - int i; - bool ret = false; - - for (i = 0; i < pool->res_cap->num_mpc_3dlut; i++) { - if (pool->mpc_lut[i] == *lut && pool->mpc_shaper[i] == *shaper) { - res_ctx->is_mpc_3dlut_acquired[i] = false; - pool->mpc_lut[i]->state.raw = 0; - *lut = NULL; - *shaper = NULL; - ret = true; - break; - } - } - return ret; -} - -static bool is_soc_bounding_box_valid(struct dc *dc) -{ - uint32_t hw_internal_rev = dc->ctx->asic_id.hw_internal_rev; - - if (ASICREV_IS_SIENNA_CICHLID_P(hw_internal_rev)) - return true; - - return false; -} - -static bool init_soc_bounding_box(struct dc *dc, - struct dcn30_resource_pool *pool) -{ - struct _vcs_dpi_soc_bounding_box_st *loaded_bb = &dcn3_0_soc; - struct _vcs_dpi_ip_params_st *loaded_ip = &dcn3_0_ip; - - DC_LOGGER_INIT(dc->ctx->logger); - - if (!is_soc_bounding_box_valid(dc)) { - DC_LOG_ERROR("%s: not valid soc bounding box\n", __func__); - return false; - } - - loaded_ip->max_num_otg = pool->base.res_cap->num_timing_generator; - loaded_ip->max_num_dpp = pool->base.pipe_count; - loaded_ip->clamp_min_dcfclk = dc->config.clamp_min_dcfclk; - dcn20_patch_bounding_box(dc, loaded_bb); - DC_FP_START(); - patch_dcn30_soc_bounding_box(dc, &dcn3_0_soc); - DC_FP_END(); - - return true; -} - -static bool dcn30_split_stream_for_mpc_or_odm( - const struct dc *dc, - struct resource_context *res_ctx, - struct pipe_ctx *pri_pipe, - struct pipe_ctx *sec_pipe, - bool odm) -{ - int pipe_idx = sec_pipe->pipe_idx; - const struct resource_pool *pool = dc->res_pool; - - *sec_pipe = *pri_pipe; - - sec_pipe->pipe_idx = pipe_idx; - sec_pipe->plane_res.mi = pool->mis[pipe_idx]; - sec_pipe->plane_res.hubp = pool->hubps[pipe_idx]; - sec_pipe->plane_res.ipp = pool->ipps[pipe_idx]; - sec_pipe->plane_res.xfm = pool->transforms[pipe_idx]; - sec_pipe->plane_res.dpp = pool->dpps[pipe_idx]; - sec_pipe->plane_res.mpcc_inst = pool->dpps[pipe_idx]->inst; - sec_pipe->stream_res.dsc = NULL; - if (odm) { - if (pri_pipe->next_odm_pipe) { - ASSERT(pri_pipe->next_odm_pipe != sec_pipe); - sec_pipe->next_odm_pipe = pri_pipe->next_odm_pipe; - sec_pipe->next_odm_pipe->prev_odm_pipe = sec_pipe; - } - if (pri_pipe->top_pipe && pri_pipe->top_pipe->next_odm_pipe) { - pri_pipe->top_pipe->next_odm_pipe->bottom_pipe = sec_pipe; - sec_pipe->top_pipe = pri_pipe->top_pipe->next_odm_pipe; - } - if (pri_pipe->bottom_pipe && pri_pipe->bottom_pipe->next_odm_pipe) { - pri_pipe->bottom_pipe->next_odm_pipe->top_pipe = sec_pipe; - sec_pipe->bottom_pipe = pri_pipe->bottom_pipe->next_odm_pipe; - } - pri_pipe->next_odm_pipe = sec_pipe; - sec_pipe->prev_odm_pipe = pri_pipe; - - if (!sec_pipe->top_pipe) - sec_pipe->stream_res.opp = pool->opps[pipe_idx]; - else - sec_pipe->stream_res.opp = sec_pipe->top_pipe->stream_res.opp; - if (sec_pipe->stream->timing.flags.DSC == 1) { - dcn20_acquire_dsc(dc, res_ctx, &sec_pipe->stream_res.dsc, pipe_idx); - ASSERT(sec_pipe->stream_res.dsc); - if (sec_pipe->stream_res.dsc == NULL) - return false; - } - } else { - if (pri_pipe->bottom_pipe) { - ASSERT(pri_pipe->bottom_pipe != sec_pipe); - sec_pipe->bottom_pipe = pri_pipe->bottom_pipe; - sec_pipe->bottom_pipe->top_pipe = sec_pipe; - } - pri_pipe->bottom_pipe = sec_pipe; - sec_pipe->top_pipe = pri_pipe; - - ASSERT(pri_pipe->plane_state); - } - - return true; -} - -static struct pipe_ctx *dcn30_find_split_pipe( - struct dc *dc, - struct dc_state *context, - int old_index) -{ - struct pipe_ctx *pipe = NULL; - int i; - - if (old_index >= 0 && context->res_ctx.pipe_ctx[old_index].stream == NULL) { - pipe = &context->res_ctx.pipe_ctx[old_index]; - pipe->pipe_idx = old_index; - } - - if (!pipe) - for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) { - if (dc->current_state->res_ctx.pipe_ctx[i].top_pipe == NULL - && dc->current_state->res_ctx.pipe_ctx[i].prev_odm_pipe == NULL) { - if (context->res_ctx.pipe_ctx[i].stream == NULL) { - pipe = &context->res_ctx.pipe_ctx[i]; - pipe->pipe_idx = i; - break; - } - } - } - - /* - * May need to fix pipes getting tossed from 1 opp to another on flip - * Add for debugging transient underflow during topology updates: - * ASSERT(pipe); - */ - if (!pipe) - for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) { - if (context->res_ctx.pipe_ctx[i].stream == NULL) { - pipe = &context->res_ctx.pipe_ctx[i]; - pipe->pipe_idx = i; - break; - } - } - - return pipe; -} - -noinline bool dcn30_internal_validate_bw( - struct dc *dc, - struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int *pipe_cnt_out, - int *vlevel_out, - bool fast_validate, - bool allow_self_refresh_only) -{ - bool out = false; - bool repopulate_pipes = false; - int split[MAX_PIPES] = { 0 }; - bool merge[MAX_PIPES] = { false }; - bool newly_split[MAX_PIPES] = { false }; - int pipe_cnt, i, pipe_idx, vlevel; - struct vba_vars_st *vba = &context->bw_ctx.dml.vba; - - ASSERT(pipes); - if (!pipes) - return false; - - context->bw_ctx.dml.vba.maxMpcComb = 0; - context->bw_ctx.dml.vba.VoltageLevel = 0; - context->bw_ctx.dml.vba.DRAMClockChangeSupport[0][0] = dm_dram_clock_change_vactive; - dc->res_pool->funcs->update_soc_for_wm_a(dc, context); - pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate); - - if (!pipe_cnt) { - out = true; - goto validate_out; - } - - dml_log_pipe_params(&context->bw_ctx.dml, pipes, pipe_cnt); - - if (!fast_validate || !allow_self_refresh_only) { - /* - * DML favors voltage over p-state, but we're more interested in - * supporting p-state over voltage. We can't support p-state in - * prefetch mode > 0 so try capping the prefetch mode to start. - */ - context->bw_ctx.dml.soc.allow_dram_self_refresh_or_dram_clock_change_in_vblank = - dm_allow_self_refresh_and_mclk_switch; - vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt); - /* This may adjust vlevel and maxMpcComb */ - if (vlevel < context->bw_ctx.dml.soc.num_states) - vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, merge); - } - if (allow_self_refresh_only && - (fast_validate || vlevel == context->bw_ctx.dml.soc.num_states || - vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported)) { - /* - * If mode is unsupported or there's still no p-state support - * then fall back to favoring voltage. - * - * We don't actually support prefetch mode 2, so require that we - * at least support prefetch mode 1. - */ - context->bw_ctx.dml.soc.allow_dram_self_refresh_or_dram_clock_change_in_vblank = - dm_allow_self_refresh; - - vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt); - if (vlevel < context->bw_ctx.dml.soc.num_states) { - memset(split, 0, sizeof(split)); - memset(merge, 0, sizeof(merge)); - vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, merge); - } - } - - dml_log_mode_support_params(&context->bw_ctx.dml); - - if (vlevel == context->bw_ctx.dml.soc.num_states) - goto validate_fail; - - if (!dc->config.enable_windowed_mpo_odm) { - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - struct pipe_ctx *mpo_pipe = pipe->bottom_pipe; - - if (!pipe->stream) - continue; - - /* We only support full screen mpo with ODM */ - if (vba->ODMCombineEnabled[vba->pipe_plane[pipe_idx]] != dm_odm_combine_mode_disabled - && pipe->plane_state && mpo_pipe - && memcmp(&mpo_pipe->plane_state->clip_rect, - &pipe->stream->src, - sizeof(struct rect)) != 0) { - ASSERT(mpo_pipe->plane_state != pipe->plane_state); - goto validate_fail; - } - pipe_idx++; - } - } - - /* merge pipes if necessary */ - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - /*skip pipes that don't need merging*/ - if (!merge[i]) - continue; - - /* if ODM merge we ignore mpc tree, mpo pipes will have their own flags */ - if (pipe->prev_odm_pipe) { - /*split off odm pipe*/ - pipe->prev_odm_pipe->next_odm_pipe = pipe->next_odm_pipe; - if (pipe->next_odm_pipe) - pipe->next_odm_pipe->prev_odm_pipe = pipe->prev_odm_pipe; - - pipe->bottom_pipe = NULL; - pipe->next_odm_pipe = NULL; - pipe->plane_state = NULL; - pipe->stream = NULL; - pipe->top_pipe = NULL; - pipe->prev_odm_pipe = NULL; - if (pipe->stream_res.dsc) - dcn20_release_dsc(&context->res_ctx, dc->res_pool, &pipe->stream_res.dsc); - memset(&pipe->plane_res, 0, sizeof(pipe->plane_res)); - memset(&pipe->stream_res, 0, sizeof(pipe->stream_res)); - repopulate_pipes = true; - } else if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state) { - struct pipe_ctx *top_pipe = pipe->top_pipe; - struct pipe_ctx *bottom_pipe = pipe->bottom_pipe; - - top_pipe->bottom_pipe = bottom_pipe; - if (bottom_pipe) - bottom_pipe->top_pipe = top_pipe; - - pipe->top_pipe = NULL; - pipe->bottom_pipe = NULL; - pipe->plane_state = NULL; - pipe->stream = NULL; - memset(&pipe->plane_res, 0, sizeof(pipe->plane_res)); - memset(&pipe->stream_res, 0, sizeof(pipe->stream_res)); - repopulate_pipes = true; - } else - ASSERT(0); /* Should never try to merge master pipe */ - - } - - for (i = 0, pipe_idx = -1; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i]; - struct pipe_ctx *hsplit_pipe = NULL; - bool odm; - int old_index = -1; - - if (!pipe->stream || newly_split[i]) - continue; - - pipe_idx++; - odm = vba->ODMCombineEnabled[vba->pipe_plane[pipe_idx]] != dm_odm_combine_mode_disabled; - - if (!pipe->plane_state && !odm) - continue; - - if (split[i]) { - if (odm) { - if (split[i] == 4 && old_pipe->next_odm_pipe && old_pipe->next_odm_pipe->next_odm_pipe) - old_index = old_pipe->next_odm_pipe->next_odm_pipe->pipe_idx; - else if (old_pipe->next_odm_pipe) - old_index = old_pipe->next_odm_pipe->pipe_idx; - } else { - if (split[i] == 4 && old_pipe->bottom_pipe && old_pipe->bottom_pipe->bottom_pipe && - old_pipe->bottom_pipe->bottom_pipe->plane_state == old_pipe->plane_state) - old_index = old_pipe->bottom_pipe->bottom_pipe->pipe_idx; - else if (old_pipe->bottom_pipe && - old_pipe->bottom_pipe->plane_state == old_pipe->plane_state) - old_index = old_pipe->bottom_pipe->pipe_idx; - } - hsplit_pipe = dcn30_find_split_pipe(dc, context, old_index); - ASSERT(hsplit_pipe); - if (!hsplit_pipe) - goto validate_fail; - - if (!dcn30_split_stream_for_mpc_or_odm( - dc, &context->res_ctx, - pipe, hsplit_pipe, odm)) - goto validate_fail; - - newly_split[hsplit_pipe->pipe_idx] = true; - repopulate_pipes = true; - } - if (split[i] == 4) { - struct pipe_ctx *pipe_4to1; - - if (odm && old_pipe->next_odm_pipe) - old_index = old_pipe->next_odm_pipe->pipe_idx; - else if (!odm && old_pipe->bottom_pipe && - old_pipe->bottom_pipe->plane_state == old_pipe->plane_state) - old_index = old_pipe->bottom_pipe->pipe_idx; - else - old_index = -1; - pipe_4to1 = dcn30_find_split_pipe(dc, context, old_index); - ASSERT(pipe_4to1); - if (!pipe_4to1) - goto validate_fail; - if (!dcn30_split_stream_for_mpc_or_odm( - dc, &context->res_ctx, - pipe, pipe_4to1, odm)) - goto validate_fail; - newly_split[pipe_4to1->pipe_idx] = true; - - if (odm && old_pipe->next_odm_pipe && old_pipe->next_odm_pipe->next_odm_pipe - && old_pipe->next_odm_pipe->next_odm_pipe->next_odm_pipe) - old_index = old_pipe->next_odm_pipe->next_odm_pipe->next_odm_pipe->pipe_idx; - else if (!odm && old_pipe->bottom_pipe && old_pipe->bottom_pipe->bottom_pipe && - old_pipe->bottom_pipe->bottom_pipe->bottom_pipe && - old_pipe->bottom_pipe->bottom_pipe->bottom_pipe->plane_state == old_pipe->plane_state) - old_index = old_pipe->bottom_pipe->bottom_pipe->bottom_pipe->pipe_idx; - else - old_index = -1; - pipe_4to1 = dcn30_find_split_pipe(dc, context, old_index); - ASSERT(pipe_4to1); - if (!pipe_4to1) - goto validate_fail; - if (!dcn30_split_stream_for_mpc_or_odm( - dc, &context->res_ctx, - hsplit_pipe, pipe_4to1, odm)) - goto validate_fail; - newly_split[pipe_4to1->pipe_idx] = true; - } - if (odm) - dcn20_build_mapped_resource(dc, context, pipe->stream); - } - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - if (pipe->plane_state) { - if (!resource_build_scaling_params(pipe)) - goto validate_fail; - } - } - - /* Actual dsc count per stream dsc validation*/ - if (!dcn20_validate_dsc(dc, context)) { - vba->ValidationStatus[vba->soc.num_states] = DML_FAIL_DSC_VALIDATION_FAILURE; - goto validate_fail; - } - - if (repopulate_pipes) - pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate); - context->bw_ctx.dml.vba.VoltageLevel = vlevel; - *vlevel_out = vlevel; - *pipe_cnt_out = pipe_cnt; - - out = true; - goto validate_out; - -validate_fail: - out = false; - -validate_out: - return out; -} - -static int get_refresh_rate(struct dc_state *context) -{ - int refresh_rate = 0; - int h_v_total = 0; - struct dc_crtc_timing *timing = NULL; - - if (context == NULL || context->streams[0] == NULL) - return 0; - - /* check if refresh rate at least 120hz */ - timing = &context->streams[0]->timing; - if (timing == NULL) - return 0; - - h_v_total = timing->h_total * timing->v_total; - if (h_v_total == 0) - return 0; - - refresh_rate = ((timing->pix_clk_100hz * 100) / (h_v_total)) + 1; - return refresh_rate; -} - -#define MAX_STRETCHED_V_BLANK 500 // in micro-seconds -/* - * Scaling factor for v_blank stretch calculations considering timing in - * micro-seconds and pixel clock in 100hz. - * Note: the parenthesis are necessary to ensure the correct order of - * operation where V_SCALE is used. - */ -#define V_SCALE (10000 / MAX_STRETCHED_V_BLANK) - -static int get_frame_rate_at_max_stretch_100hz(struct dc_state *context) -{ - struct dc_crtc_timing *timing = NULL; - uint32_t sec_per_100_lines; - uint32_t max_v_blank; - uint32_t curr_v_blank; - uint32_t v_stretch_max; - uint32_t stretched_frame_pix_cnt; - uint32_t scaled_stretched_frame_pix_cnt; - uint32_t scaled_refresh_rate; - - if (context == NULL || context->streams[0] == NULL) - return 0; - - /* check if refresh rate at least 120hz */ - timing = &context->streams[0]->timing; - if (timing == NULL) - return 0; - - sec_per_100_lines = timing->pix_clk_100hz / timing->h_total + 1; - max_v_blank = sec_per_100_lines / V_SCALE + 1; - curr_v_blank = timing->v_total - timing->v_addressable; - v_stretch_max = (max_v_blank > curr_v_blank) ? (max_v_blank - curr_v_blank) : (0); - stretched_frame_pix_cnt = (v_stretch_max + timing->v_total) * timing->h_total; - scaled_stretched_frame_pix_cnt = stretched_frame_pix_cnt / 10000; - scaled_refresh_rate = (timing->pix_clk_100hz) / scaled_stretched_frame_pix_cnt + 1; - - return scaled_refresh_rate; -} - -static bool is_refresh_rate_support_mclk_switch_using_fw_based_vblank_stretch(struct dc_state *context) -{ - int refresh_rate_max_stretch_100hz; - int min_refresh_100hz; - - if (context == NULL || context->streams[0] == NULL) - return false; - - refresh_rate_max_stretch_100hz = get_frame_rate_at_max_stretch_100hz(context); - min_refresh_100hz = context->streams[0]->timing.min_refresh_in_uhz / 10000; - - if (refresh_rate_max_stretch_100hz < min_refresh_100hz) - return false; - - return true; -} - -bool dcn30_can_support_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, struct dc_state *context) -{ - int refresh_rate = 0; - const int minimum_refreshrate_supported = 120; - - if (context == NULL || context->streams[0] == NULL) - return false; - - if (context->streams[0]->sink->edid_caps.panel_patch.disable_fams) - return false; - - if (dc->debug.disable_fams) - return false; - - if (!dc->caps.dmub_caps.mclk_sw) - return false; - - if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching_shut_down) - return false; - - /* more then 1 monitor connected */ - if (context->stream_count != 1) - return false; - - refresh_rate = get_refresh_rate(context); - if (refresh_rate < minimum_refreshrate_supported) - return false; - - if (!is_refresh_rate_support_mclk_switch_using_fw_based_vblank_stretch(context)) - return false; - - if (!context->streams[0]->allow_freesync) - return false; - - if (context->streams[0]->vrr_active_variable && dc->debug.disable_fams_gaming) - return false; - - context->streams[0]->fpo_in_use = true; - - return true; -} - -/* - * set up FPO watermarks, pstate, dram latency - */ -void dcn30_setup_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, struct dc_state *context) -{ - ASSERT(dc != NULL && context != NULL); - if (dc == NULL || context == NULL) - return; - - /* Set wm_a.pstate so high natural MCLK switches are impossible: 4 seconds */ - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 4U * 1000U * 1000U * 1000U; -} - -void dcn30_update_soc_for_wm_a(struct dc *dc, struct dc_state *context) -{ - DC_FP_START(); - dcn30_fpu_update_soc_for_wm_a(dc, context); - DC_FP_END(); -} - -void dcn30_calculate_wm_and_dlg( - struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int pipe_cnt, - int vlevel) -{ - DC_FP_START(); - dcn30_fpu_calculate_wm_and_dlg(dc, context, pipes, pipe_cnt, vlevel); - DC_FP_END(); -} - -bool dcn30_validate_bandwidth(struct dc *dc, - struct dc_state *context, - bool fast_validate) -{ - bool out = false; - - BW_VAL_TRACE_SETUP(); - - int vlevel = 0; - int pipe_cnt = 0; - display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_KERNEL); - DC_LOGGER_INIT(dc->ctx->logger); - - BW_VAL_TRACE_COUNT(); - - DC_FP_START(); - out = dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate, true); - DC_FP_END(); - - if (pipe_cnt == 0) - goto validate_out; - - if (!out) - goto validate_fail; - - BW_VAL_TRACE_END_VOLTAGE_LEVEL(); - - if (fast_validate) { - BW_VAL_TRACE_SKIP(fast); - goto validate_out; - } - - DC_FP_START(); - if (dc->res_pool->funcs->calculate_wm_and_dlg) - dc->res_pool->funcs->calculate_wm_and_dlg(dc, context, pipes, pipe_cnt, vlevel); - DC_FP_END(); - - BW_VAL_TRACE_END_WATERMARKS(); - - goto validate_out; - -validate_fail: - DC_LOG_WARNING("Mode Validation Warning: %s failed validation.\n", - dml_get_status_message(context->bw_ctx.dml.vba.ValidationStatus[context->bw_ctx.dml.vba.soc.num_states])); - - BW_VAL_TRACE_SKIP(fail); - out = false; - -validate_out: - kfree(pipes); - - BW_VAL_TRACE_FINISH(); - - return out; -} - -void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) -{ - unsigned int i, j; - unsigned int num_states = 0; - - unsigned int dcfclk_mhz[DC__VOLTAGE_STATES] = {0}; - unsigned int dram_speed_mts[DC__VOLTAGE_STATES] = {0}; - unsigned int optimal_uclk_for_dcfclk_sta_targets[DC__VOLTAGE_STATES] = {0}; - unsigned int optimal_dcfclk_for_uclk[DC__VOLTAGE_STATES] = {0}; - - unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {694, 875, 1000, 1200}; - unsigned int num_dcfclk_sta_targets = 4; - unsigned int num_uclk_states; - - struct dc_bounding_box_max_clk dcn30_bb_max_clk; - - memset(&dcn30_bb_max_clk, 0, sizeof(dcn30_bb_max_clk)); - - if (dc->ctx->dc_bios->vram_info.num_chans) - dcn3_0_soc.num_chans = dc->ctx->dc_bios->vram_info.num_chans; - - DC_FP_START(); - dcn30_fpu_update_dram_channel_width_bytes(dc); - DC_FP_END(); - - if (bw_params->clk_table.entries[0].memclk_mhz) { - - for (i = 0; i < MAX_NUM_DPM_LVL; i++) { - if (bw_params->clk_table.entries[i].dcfclk_mhz > dcn30_bb_max_clk.max_dcfclk_mhz) - dcn30_bb_max_clk.max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz; - if (bw_params->clk_table.entries[i].dispclk_mhz > dcn30_bb_max_clk.max_dispclk_mhz) - dcn30_bb_max_clk.max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz; - if (bw_params->clk_table.entries[i].dppclk_mhz > dcn30_bb_max_clk.max_dppclk_mhz) - dcn30_bb_max_clk.max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz; - if (bw_params->clk_table.entries[i].phyclk_mhz > dcn30_bb_max_clk.max_phyclk_mhz) - dcn30_bb_max_clk.max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz; - } - - DC_FP_START(); - dcn30_fpu_update_max_clk(&dcn30_bb_max_clk); - DC_FP_END(); - - if (dcn30_bb_max_clk.max_dcfclk_mhz > dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { - // If max DCFCLK is greater than the max DCFCLK STA target, insert into the DCFCLK STA target array - dcfclk_sta_targets[num_dcfclk_sta_targets] = dcn30_bb_max_clk.max_dcfclk_mhz; - num_dcfclk_sta_targets++; - } else if (dcn30_bb_max_clk.max_dcfclk_mhz < dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { - // If max DCFCLK is less than the max DCFCLK STA target, cap values and remove duplicates - for (i = 0; i < num_dcfclk_sta_targets; i++) { - if (dcfclk_sta_targets[i] > dcn30_bb_max_clk.max_dcfclk_mhz) { - dcfclk_sta_targets[i] = dcn30_bb_max_clk.max_dcfclk_mhz; - break; - } - } - // Update size of array since we "removed" duplicates - num_dcfclk_sta_targets = i + 1; - } - - num_uclk_states = bw_params->clk_table.num_entries; - - // Calculate optimal dcfclk for each uclk - for (i = 0; i < num_uclk_states; i++) { - DC_FP_START(); - dcn30_fpu_get_optimal_dcfclk_fclk_for_uclk(bw_params->clk_table.entries[i].memclk_mhz * 16, - &optimal_dcfclk_for_uclk[i], NULL); - DC_FP_END(); - if (optimal_dcfclk_for_uclk[i] < bw_params->clk_table.entries[0].dcfclk_mhz) { - optimal_dcfclk_for_uclk[i] = bw_params->clk_table.entries[0].dcfclk_mhz; - } - } - - // Calculate optimal uclk for each dcfclk sta target - for (i = 0; i < num_dcfclk_sta_targets; i++) { - for (j = 0; j < num_uclk_states; j++) { - if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) { - optimal_uclk_for_dcfclk_sta_targets[i] = - bw_params->clk_table.entries[j].memclk_mhz * 16; - break; - } - } - } - - i = 0; - j = 0; - // create the final dcfclk and uclk table - while (i < num_dcfclk_sta_targets && j < num_uclk_states && num_states < DC__VOLTAGE_STATES) { - if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j] && i < num_dcfclk_sta_targets) { - dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; - dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; - } else { - if (j < num_uclk_states && optimal_dcfclk_for_uclk[j] <= dcn30_bb_max_clk.max_dcfclk_mhz) { - dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j]; - dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16; - } else { - j = num_uclk_states; - } - } - } - - while (i < num_dcfclk_sta_targets && num_states < DC__VOLTAGE_STATES) { - dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; - dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; - } - - while (j < num_uclk_states && num_states < DC__VOLTAGE_STATES && - optimal_dcfclk_for_uclk[j] <= dcn30_bb_max_clk.max_dcfclk_mhz) { - dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j]; - dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16; - } - - dcn3_0_soc.num_states = num_states; - DC_FP_START(); - dcn30_fpu_update_bw_bounding_box(dc, bw_params, &dcn30_bb_max_clk, dcfclk_mhz, dram_speed_mts); - DC_FP_END(); - } -} - -static void dcn30_get_panel_config_defaults(struct dc_panel_config *panel_config) -{ - *panel_config = panel_config_defaults; -} - -static const struct resource_funcs dcn30_res_pool_funcs = { - .destroy = dcn30_destroy_resource_pool, - .link_enc_create = dcn30_link_encoder_create, - .panel_cntl_create = dcn30_panel_cntl_create, - .validate_bandwidth = dcn30_validate_bandwidth, - .calculate_wm_and_dlg = dcn30_calculate_wm_and_dlg, - .update_soc_for_wm_a = dcn30_update_soc_for_wm_a, - .populate_dml_pipes = dcn30_populate_dml_pipes_from_context, - .acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer, - .release_pipe = dcn20_release_pipe, - .add_stream_to_ctx = dcn30_add_stream_to_ctx, - .add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource, - .remove_stream_from_ctx = dcn20_remove_stream_from_ctx, - .populate_dml_writeback_from_context = dcn30_populate_dml_writeback_from_context, - .set_mcif_arb_params = dcn30_set_mcif_arb_params, - .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link, - .acquire_post_bldn_3dlut = dcn30_acquire_post_bldn_3dlut, - .release_post_bldn_3dlut = dcn30_release_post_bldn_3dlut, - .update_bw_bounding_box = dcn30_update_bw_bounding_box, - .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, - .get_panel_config_defaults = dcn30_get_panel_config_defaults, -}; - -#define CTX ctx - -#define REG(reg_name) \ - (DCN_BASE.instance[0].segment[mm ## reg_name ## _BASE_IDX] + mm ## reg_name) - -static uint32_t read_pipe_fuses(struct dc_context *ctx) -{ - uint32_t value = REG_READ(CC_DC_PIPE_DIS); - /* Support for max 6 pipes */ - value = value & 0x3f; - return value; -} - -static bool dcn30_resource_construct( - uint8_t num_virtual_links, - struct dc *dc, - struct dcn30_resource_pool *pool) -{ - int i; - struct dc_context *ctx = dc->ctx; - struct irq_service_init_data init_data; - struct ddc_service_init_data ddc_init_data = {0}; - uint32_t pipe_fuses = read_pipe_fuses(ctx); - uint32_t num_pipes = 0; - - if (!(pipe_fuses == 0 || pipe_fuses == 0x3e)) { - BREAK_TO_DEBUGGER(); - dm_error("DC: Unexpected fuse recipe for navi2x !\n"); - /* fault to single pipe */ - pipe_fuses = 0x3e; - } - - DC_FP_START(); - - ctx->dc_bios->regs = &bios_regs; - - pool->base.res_cap = &res_cap_dcn3; - - pool->base.funcs = &dcn30_res_pool_funcs; - - /************************************************* - * Resource + asic cap harcoding * - *************************************************/ - pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE; - pool->base.pipe_count = pool->base.res_cap->num_timing_generator; - pool->base.mpcc_count = pool->base.res_cap->num_timing_generator; - dc->caps.max_downscale_ratio = 600; - dc->caps.i2c_speed_in_khz = 100; - dc->caps.i2c_speed_in_khz_hdcp = 100; /*1.4 w/a not applied by default*/ - dc->caps.max_cursor_size = 256; - dc->caps.min_horizontal_blanking_period = 80; - dc->caps.dmdata_alloc_size = 2048; - dc->caps.mall_size_per_mem_channel = 8; - /* total size = mall per channel * num channels * 1024 * 1024 */ - dc->caps.mall_size_total = dc->caps.mall_size_per_mem_channel * dc->ctx->dc_bios->vram_info.num_chans * 1048576; - dc->caps.cursor_cache_size = dc->caps.max_cursor_size * dc->caps.max_cursor_size * 8; - - dc->caps.max_slave_planes = 2; - dc->caps.max_slave_yuv_planes = 2; - dc->caps.max_slave_rgb_planes = 2; - dc->caps.post_blend_color_processing = true; - dc->caps.force_dp_tps4_for_cp2520 = true; - dc->caps.extended_aux_timeout_support = true; - dc->caps.dmcub_support = true; - - /* Color pipeline capabilities */ - dc->caps.color.dpp.dcn_arch = 1; - dc->caps.color.dpp.input_lut_shared = 0; - dc->caps.color.dpp.icsc = 1; - dc->caps.color.dpp.dgam_ram = 0; // must use gamma_corr - dc->caps.color.dpp.dgam_rom_caps.srgb = 1; - dc->caps.color.dpp.dgam_rom_caps.bt2020 = 1; - dc->caps.color.dpp.dgam_rom_caps.gamma2_2 = 1; - dc->caps.color.dpp.dgam_rom_caps.pq = 1; - dc->caps.color.dpp.dgam_rom_caps.hlg = 1; - dc->caps.color.dpp.post_csc = 1; - dc->caps.color.dpp.gamma_corr = 1; - dc->caps.color.dpp.dgam_rom_for_yuv = 0; - - dc->caps.color.dpp.hw_3d_lut = 1; - dc->caps.color.dpp.ogam_ram = 1; - // no OGAM ROM on DCN3 - dc->caps.color.dpp.ogam_rom_caps.srgb = 0; - dc->caps.color.dpp.ogam_rom_caps.bt2020 = 0; - dc->caps.color.dpp.ogam_rom_caps.gamma2_2 = 0; - dc->caps.color.dpp.ogam_rom_caps.pq = 0; - dc->caps.color.dpp.ogam_rom_caps.hlg = 0; - dc->caps.color.dpp.ocsc = 0; - - dc->caps.color.mpc.gamut_remap = 1; - dc->caps.color.mpc.num_3dluts = pool->base.res_cap->num_mpc_3dlut; //3 - dc->caps.color.mpc.ogam_ram = 1; - dc->caps.color.mpc.ogam_rom_caps.srgb = 0; - dc->caps.color.mpc.ogam_rom_caps.bt2020 = 0; - dc->caps.color.mpc.ogam_rom_caps.gamma2_2 = 0; - dc->caps.color.mpc.ogam_rom_caps.pq = 0; - dc->caps.color.mpc.ogam_rom_caps.hlg = 0; - dc->caps.color.mpc.ocsc = 1; - - dc->caps.dp_hdmi21_pcon_support = true; - dc->caps.max_v_total = (1 << 15) - 1; - - /* read VBIOS LTTPR caps */ - { - if (ctx->dc_bios->funcs->get_lttpr_caps) { - enum bp_result bp_query_result; - uint8_t is_vbios_lttpr_enable = 0; - - bp_query_result = ctx->dc_bios->funcs->get_lttpr_caps(ctx->dc_bios, &is_vbios_lttpr_enable); - dc->caps.vbios_lttpr_enable = (bp_query_result == BP_RESULT_OK) && !!is_vbios_lttpr_enable; - } - - if (ctx->dc_bios->funcs->get_lttpr_interop) { - enum bp_result bp_query_result; - uint8_t is_vbios_interop_enabled = 0; - - bp_query_result = ctx->dc_bios->funcs->get_lttpr_interop(ctx->dc_bios, - &is_vbios_interop_enabled); - dc->caps.vbios_lttpr_aware = (bp_query_result == BP_RESULT_OK) && !!is_vbios_interop_enabled; - } - } - - if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) - dc->debug = debug_defaults_drv; - - // Init the vm_helper - if (dc->vm_helper) - vm_helper_init(dc->vm_helper, 16); - - /************************************************* - * Create resources * - *************************************************/ - - /* Clock Sources for Pixel Clock*/ - pool->base.clock_sources[DCN30_CLK_SRC_PLL0] = - dcn30_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL0, - &clk_src_regs[0], false); - pool->base.clock_sources[DCN30_CLK_SRC_PLL1] = - dcn30_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL1, - &clk_src_regs[1], false); - pool->base.clock_sources[DCN30_CLK_SRC_PLL2] = - dcn30_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL2, - &clk_src_regs[2], false); - pool->base.clock_sources[DCN30_CLK_SRC_PLL3] = - dcn30_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL3, - &clk_src_regs[3], false); - pool->base.clock_sources[DCN30_CLK_SRC_PLL4] = - dcn30_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL4, - &clk_src_regs[4], false); - pool->base.clock_sources[DCN30_CLK_SRC_PLL5] = - dcn30_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL5, - &clk_src_regs[5], false); - - pool->base.clk_src_count = DCN30_CLK_SRC_TOTAL; - - /* todo: not reuse phy_pll registers */ - pool->base.dp_clock_source = - dcn30_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_ID_DP_DTO, - &clk_src_regs[0], true); - - for (i = 0; i < pool->base.clk_src_count; i++) { - if (pool->base.clock_sources[i] == NULL) { - dm_error("DC: failed to create clock sources!\n"); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - } - - /* DCCG */ - pool->base.dccg = dccg30_create(ctx, &dccg_regs, &dccg_shift, &dccg_mask); - if (pool->base.dccg == NULL) { - dm_error("DC: failed to create dccg!\n"); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - - /* PP Lib and SMU interfaces */ - init_soc_bounding_box(dc, pool); - - num_pipes = dcn3_0_ip.max_num_dpp; - - for (i = 0; i < dcn3_0_ip.max_num_dpp; i++) - if (pipe_fuses & 1 << i) - num_pipes--; - - dcn3_0_ip.max_num_dpp = num_pipes; - dcn3_0_ip.max_num_otg = num_pipes; - - dml_init_instance(&dc->dml, &dcn3_0_soc, &dcn3_0_ip, DML_PROJECT_DCN30); - - /* IRQ */ - init_data.ctx = dc->ctx; - pool->base.irqs = dal_irq_service_dcn30_create(&init_data); - if (!pool->base.irqs) - goto create_fail; - - /* HUBBUB */ - pool->base.hubbub = dcn30_hubbub_create(ctx); - if (pool->base.hubbub == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create hubbub!\n"); - goto create_fail; - } - - /* HUBPs, DPPs, OPPs and TGs */ - for (i = 0; i < pool->base.pipe_count; i++) { - pool->base.hubps[i] = dcn30_hubp_create(ctx, i); - if (pool->base.hubps[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create hubps!\n"); - goto create_fail; - } - - pool->base.dpps[i] = dcn30_dpp_create(ctx, i); - if (pool->base.dpps[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create dpps!\n"); - goto create_fail; - } - } - - for (i = 0; i < pool->base.res_cap->num_opp; i++) { - pool->base.opps[i] = dcn30_opp_create(ctx, i); - if (pool->base.opps[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create output pixel processor!\n"); - goto create_fail; - } - } - - for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { - pool->base.timing_generators[i] = dcn30_timing_generator_create( - ctx, i); - if (pool->base.timing_generators[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create tg!\n"); - goto create_fail; - } - } - pool->base.timing_generator_count = i; - /* PSR */ - pool->base.psr = dmub_psr_create(ctx); - - if (pool->base.psr == NULL) { - dm_error("DC: failed to create PSR obj!\n"); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - - /* ABM */ - for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { - pool->base.multiple_abms[i] = dmub_abm_create(ctx, - &abm_regs[i], - &abm_shift, - &abm_mask); - if (pool->base.multiple_abms[i] == NULL) { - dm_error("DC: failed to create abm for pipe %d!\n", i); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - } - /* MPC and DSC */ - pool->base.mpc = dcn30_mpc_create(ctx, pool->base.mpcc_count, pool->base.res_cap->num_mpc_3dlut); - if (pool->base.mpc == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create mpc!\n"); - goto create_fail; - } - - for (i = 0; i < pool->base.res_cap->num_dsc; i++) { - pool->base.dscs[i] = dcn30_dsc_create(ctx, i); - if (pool->base.dscs[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create display stream compressor %d!\n", i); - goto create_fail; - } - } - - /* DWB and MMHUBBUB */ - if (!dcn30_dwbc_create(ctx, &pool->base)) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create dwbc!\n"); - goto create_fail; - } - - if (!dcn30_mmhubbub_create(ctx, &pool->base)) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create mcif_wb!\n"); - goto create_fail; - } - - /* AUX and I2C */ - for (i = 0; i < pool->base.res_cap->num_ddc; i++) { - pool->base.engines[i] = dcn30_aux_engine_create(ctx, i); - if (pool->base.engines[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC:failed to create aux engine!!\n"); - goto create_fail; - } - pool->base.hw_i2cs[i] = dcn30_i2c_hw_create(ctx, i); - if (pool->base.hw_i2cs[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC:failed to create hw i2c!!\n"); - goto create_fail; - } - pool->base.sw_i2cs[i] = NULL; - } - - /* Audio, Stream Encoders including DIG and virtual, MPC 3D LUTs */ - if (!resource_construct(num_virtual_links, dc, &pool->base, - &res_create_funcs)) - goto create_fail; - - /* HW Sequencer and Plane caps */ - dcn30_hw_sequencer_construct(dc); - - dc->caps.max_planes = pool->base.pipe_count; - - for (i = 0; i < dc->caps.max_planes; ++i) - dc->caps.planes[i] = plane_cap; - - dc->cap_funcs = cap_funcs; - - if (dc->ctx->dc_bios->fw_info.oem_i2c_present) { - ddc_init_data.ctx = dc->ctx; - ddc_init_data.link = NULL; - ddc_init_data.id.id = dc->ctx->dc_bios->fw_info.oem_i2c_obj_id; - ddc_init_data.id.enum_id = 0; - ddc_init_data.id.type = OBJECT_TYPE_GENERIC; - pool->base.oem_device = dc->link_srv->create_ddc_service(&ddc_init_data); - } else { - pool->base.oem_device = NULL; - } - - DC_FP_END(); - - return true; - -create_fail: - - DC_FP_END(); - dcn30_resource_destruct(pool); - - return false; -} - -struct resource_pool *dcn30_create_resource_pool( - const struct dc_init_data *init_data, - struct dc *dc) -{ - struct dcn30_resource_pool *pool = - kzalloc(sizeof(struct dcn30_resource_pool), GFP_KERNEL); - - if (!pool) - return NULL; - - if (dcn30_resource_construct(init_data->num_virtual_links, dc, pool)) - return &pool->base; - - BREAK_TO_DEBUGGER(); - kfree(pool); - return NULL; -} diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h deleted file mode 100644 index 8e6b8b7368fd..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright 2020 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef _DCN30_RESOURCE_H_ -#define _DCN30_RESOURCE_H_ - -#include "core_types.h" - -#define TO_DCN30_RES_POOL(pool)\ - container_of(pool, struct dcn30_resource_pool, base) - -struct dc; -struct resource_pool; -struct _vcs_dpi_display_pipe_params_st; - -extern struct _vcs_dpi_ip_params_st dcn3_0_ip; -extern struct _vcs_dpi_soc_bounding_box_st dcn3_0_soc; - -struct dcn30_resource_pool { - struct resource_pool base; -}; -struct resource_pool *dcn30_create_resource_pool( - const struct dc_init_data *init_data, - struct dc *dc); - -void dcn30_set_mcif_arb_params( - struct dc *dc, - struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int pipe_cnt); - -unsigned int dcn30_calc_max_scaled_time( - unsigned int time_per_pixel, - enum mmhubbub_wbif_mode mode, - unsigned int urgent_watermark); - -bool dcn30_validate_bandwidth(struct dc *dc, struct dc_state *context, - bool fast_validate); -bool dcn30_internal_validate_bw( - struct dc *dc, - struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int *pipe_cnt_out, - int *vlevel_out, - bool fast_validate, - bool allow_self_refresh_only); -void dcn30_calculate_wm_and_dlg( - struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int pipe_cnt, - int vlevel); -void dcn30_update_soc_for_wm_a(struct dc *dc, struct dc_state *context); -void dcn30_populate_dml_writeback_from_context( - struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes); - -int dcn30_populate_dml_pipes_from_context( - struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes, - bool fast_validate); - -bool dcn30_acquire_post_bldn_3dlut( - struct resource_context *res_ctx, - const struct resource_pool *pool, - int mpcc_id, - struct dc_3dlut **lut, - struct dc_transfer_func **shaper); - -bool dcn30_release_post_bldn_3dlut( - struct resource_context *res_ctx, - const struct resource_pool *pool, - struct dc_3dlut **lut, - struct dc_transfer_func **shaper); - -enum dc_status dcn30_add_stream_to_ctx( - struct dc *dc, - struct dc_state *new_ctx, - struct dc_stream_state *dc_stream); - -void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params); - -bool dcn30_can_support_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, struct dc_state *context); -void dcn30_setup_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, struct dc_state *context); -int dcn30_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes, int pipe_cnt, int vlevel); - -#endif /* _DCN30_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/Makefile b/drivers/gpu/drm/amd/display/dc/dcn301/Makefile index 30fbc5e06dca..cbf59d7e78c4 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn301/Makefile @@ -10,7 +10,7 @@ # # Makefile for dcn30. -DCN301 = dcn301_init.o dcn301_resource.o dcn301_dccg.o \ +DCN301 = dcn301_init.o dcn301_dccg.o \ dcn301_dio_link_encoder.o dcn301_panel_cntl.o dcn301_hubbub.o \ dcn301_optc.o diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c deleted file mode 100644 index f3b75f283aa2..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c +++ /dev/null @@ -1,1728 +0,0 @@ -/* - * Copyright 2019-2021 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - - -#include "dm_services.h" -#include "dc.h" - -#include "dcn301_init.h" - -#include "resource.h" -#include "include/irq_service_interface.h" -#include "dcn30/dcn30_resource.h" -#include "dcn301_resource.h" - -#include "dcn20/dcn20_resource.h" - -#include "dcn10/dcn10_ipp.h" -#include "dcn301/dcn301_hubbub.h" -#include "dcn30/dcn30_mpc.h" -#include "dcn30/dcn30_hubp.h" -#include "irq/dcn30/irq_service_dcn30.h" -#include "dcn30/dcn30_dpp.h" -#include "dcn301/dcn301_optc.h" -#include "dcn20/dcn20_hwseq.h" -#include "dcn30/dcn30_hwseq.h" -#include "dce110/dce110_hwseq.h" -#include "dcn30/dcn30_opp.h" -#include "dcn20/dcn20_dsc.h" -#include "dcn30/dcn30_vpg.h" -#include "dcn30/dcn30_afmt.h" -#include "dce/dce_clock_source.h" -#include "dce/dce_audio.h" -#include "dce/dce_hwseq.h" -#include "clk_mgr.h" -#include "virtual/virtual_stream_encoder.h" -#include "dce110/dce110_resource.h" -#include "dml/display_mode_vba.h" -#include "dcn301/dcn301_dccg.h" -#include "dcn10/dcn10_resource.h" -#include "dcn30/dcn30_dio_stream_encoder.h" -#include "dcn301/dcn301_dio_link_encoder.h" -#include "dcn301_panel_cntl.h" - -#include "vangogh_ip_offset.h" - -#include "dcn30/dcn30_dwb.h" -#include "dcn30/dcn30_mmhubbub.h" - -#include "dcn/dcn_3_0_1_offset.h" -#include "dcn/dcn_3_0_1_sh_mask.h" - -#include "nbio/nbio_7_2_0_offset.h" - -#include "dpcs/dpcs_3_0_0_offset.h" -#include "dpcs/dpcs_3_0_0_sh_mask.h" - -#include "reg_helper.h" -#include "dce/dmub_abm.h" -#include "dce/dce_aux.h" -#include "dce/dce_i2c.h" - -#include "dml/dcn30/dcn30_fpu.h" - -#include "dml/dcn30/display_mode_vba_30.h" -#include "dml/dcn301/dcn301_fpu.h" -#include "vm_helper.h" -#include "dcn20/dcn20_vmid.h" -#include "amdgpu_socbb.h" - -#define TO_DCN301_RES_POOL(pool)\ - container_of(pool, struct dcn301_resource_pool, base) - -#define DC_LOGGER \ - dc->ctx->logger -#define DC_LOGGER_INIT(logger) - -enum dcn301_clk_src_array_id { - DCN301_CLK_SRC_PLL0, - DCN301_CLK_SRC_PLL1, - DCN301_CLK_SRC_PLL2, - DCN301_CLK_SRC_PLL3, - DCN301_CLK_SRC_TOTAL -}; - -/* begin ********************* - * macros to expend register list macro defined in HW object header file - */ - -/* DCN */ -#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg - -#define BASE(seg) BASE_INNER(seg) - -#define SR(reg_name)\ - .reg_name = BASE(mm ## reg_name ## _BASE_IDX) + \ - mm ## reg_name - -#define SRI(reg_name, block, id)\ - .reg_name = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - mm ## block ## id ## _ ## reg_name - -#define SRI2(reg_name, block, id)\ - .reg_name = BASE(mm ## reg_name ## _BASE_IDX) + \ - mm ## reg_name - -#define SRIR(var_name, reg_name, block, id)\ - .var_name = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - mm ## block ## id ## _ ## reg_name - -#define SRII(reg_name, block, id)\ - .reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - mm ## block ## id ## _ ## reg_name - -#define SRII2(reg_name_pre, reg_name_post, id)\ - .reg_name_pre ## _ ## reg_name_post[id] = BASE(mm ## reg_name_pre \ - ## id ## _ ## reg_name_post ## _BASE_IDX) + \ - mm ## reg_name_pre ## id ## _ ## reg_name_post - -#define SRII_MPC_RMU(reg_name, block, id)\ - .RMU##_##reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - mm ## block ## id ## _ ## reg_name - -#define SRII_DWB(reg_name, temp_name, block, id)\ - .reg_name[id] = BASE(mm ## block ## id ## _ ## temp_name ## _BASE_IDX) + \ - mm ## block ## id ## _ ## temp_name - -#define SF_DWB2(reg_name, block, id, field_name, post_fix) \ - .field_name = reg_name ## __ ## field_name ## post_fix - -#define DCCG_SRII(reg_name, block, id)\ - .block ## _ ## reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - mm ## block ## id ## _ ## reg_name - -#define VUPDATE_SRII(reg_name, block, id)\ - .reg_name[id] = BASE(mm ## reg_name ## _ ## block ## id ## _BASE_IDX) + \ - mm ## reg_name ## _ ## block ## id - -/* NBIO */ -#define NBIO_BASE_INNER(seg) \ - NBIO_BASE__INST0_SEG ## seg - -#define NBIO_BASE(seg) \ - NBIO_BASE_INNER(seg) - -#define NBIO_SR(reg_name)\ - .reg_name = NBIO_BASE(regBIF_BX0_ ## reg_name ## _BASE_IDX) + \ - regBIF_BX0_ ## reg_name - -/* MMHUB */ -#define MMHUB_BASE_INNER(seg) \ - MMHUB_BASE__INST0_SEG ## seg - -#define MMHUB_BASE(seg) \ - MMHUB_BASE_INNER(seg) - -#define MMHUB_SR(reg_name)\ - .reg_name = MMHUB_BASE(regMM ## reg_name ## _BASE_IDX) + \ - regMM ## reg_name - -/* CLOCK */ -#define CLK_BASE_INNER(seg) \ - CLK_BASE__INST0_SEG ## seg - -#define CLK_BASE(seg) \ - CLK_BASE_INNER(seg) - -#define CLK_SRI(reg_name, block, inst)\ - .reg_name = CLK_BASE(mm ## block ## _ ## inst ## _ ## reg_name ## _BASE_IDX) + \ - mm ## block ## _ ## inst ## _ ## reg_name - -static const struct bios_registers bios_regs = { - NBIO_SR(BIOS_SCRATCH_3), - NBIO_SR(BIOS_SCRATCH_6) -}; - -#define clk_src_regs(index, pllid)\ -[index] = {\ - CS_COMMON_REG_LIST_DCN3_01(index, pllid),\ -} - -static const struct dce110_clk_src_regs clk_src_regs[] = { - clk_src_regs(0, A), - clk_src_regs(1, B), - clk_src_regs(2, C), - clk_src_regs(3, D) -}; - -static const struct dce110_clk_src_shift cs_shift = { - CS_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT) -}; - -static const struct dce110_clk_src_mask cs_mask = { - CS_COMMON_MASK_SH_LIST_DCN2_0(_MASK) -}; - -#define abm_regs(id)\ -[id] = {\ - ABM_DCN301_REG_LIST(id)\ -} - -static const struct dce_abm_registers abm_regs[] = { - abm_regs(0), - abm_regs(1), - abm_regs(2), - abm_regs(3), -}; - -static const struct dce_abm_shift abm_shift = { - ABM_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dce_abm_mask abm_mask = { - ABM_MASK_SH_LIST_DCN30(_MASK) -}; - -#define audio_regs(id)\ -[id] = {\ - AUD_COMMON_REG_LIST(id)\ -} - -static const struct dce_audio_registers audio_regs[] = { - audio_regs(0), - audio_regs(1), - audio_regs(2), - audio_regs(3), - audio_regs(4), - audio_regs(5), - audio_regs(6) -}; - -#define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\ - SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_INDEX, AZALIA_ENDPOINT_REG_INDEX, mask_sh),\ - SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_DATA, AZALIA_ENDPOINT_REG_DATA, mask_sh),\ - AUD_COMMON_MASK_SH_LIST_BASE(mask_sh) - -static const struct dce_audio_shift audio_shift = { - DCE120_AUD_COMMON_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_audio_mask audio_mask = { - DCE120_AUD_COMMON_MASK_SH_LIST(_MASK) -}; - -#define vpg_regs(id)\ -[id] = {\ - VPG_DCN3_REG_LIST(id)\ -} - -static const struct dcn30_vpg_registers vpg_regs[] = { - vpg_regs(0), - vpg_regs(1), - vpg_regs(2), - vpg_regs(3), -}; - -static const struct dcn30_vpg_shift vpg_shift = { - DCN3_VPG_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn30_vpg_mask vpg_mask = { - DCN3_VPG_MASK_SH_LIST(_MASK) -}; - -#define afmt_regs(id)\ -[id] = {\ - AFMT_DCN3_REG_LIST(id)\ -} - -static const struct dcn30_afmt_registers afmt_regs[] = { - afmt_regs(0), - afmt_regs(1), - afmt_regs(2), - afmt_regs(3), -}; - -static const struct dcn30_afmt_shift afmt_shift = { - DCN3_AFMT_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn30_afmt_mask afmt_mask = { - DCN3_AFMT_MASK_SH_LIST(_MASK) -}; - -#define stream_enc_regs(id)\ -[id] = {\ - SE_DCN3_REG_LIST(id)\ -} - -static const struct dcn10_stream_enc_registers stream_enc_regs[] = { - stream_enc_regs(0), - stream_enc_regs(1), - stream_enc_regs(2), - stream_enc_regs(3), -}; - -static const struct dcn10_stream_encoder_shift se_shift = { - SE_COMMON_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dcn10_stream_encoder_mask se_mask = { - SE_COMMON_MASK_SH_LIST_DCN30(_MASK) -}; - - -#define aux_regs(id)\ -[id] = {\ - DCN2_AUX_REG_LIST(id)\ -} - -static const struct dcn10_link_enc_aux_registers link_enc_aux_regs[] = { - aux_regs(0), - aux_regs(1), - aux_regs(2), - aux_regs(3), -}; - -#define hpd_regs(id)\ -[id] = {\ - HPD_REG_LIST(id)\ -} - -static const struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[] = { - hpd_regs(0), - hpd_regs(1), - hpd_regs(2), - hpd_regs(3), -}; - - -#define link_regs(id, phyid)\ -[id] = {\ - LE_DCN301_REG_LIST(id), \ - UNIPHY_DCN2_REG_LIST(phyid), \ - DPCS_DCN2_REG_LIST(id), \ - SRI(DP_DPHY_INTERNAL_CTRL, DP, id) \ -} - -static const struct dce110_aux_registers_shift aux_shift = { - DCN_AUX_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce110_aux_registers_mask aux_mask = { - DCN_AUX_MASK_SH_LIST(_MASK) -}; - -static const struct dcn10_link_enc_registers link_enc_regs[] = { - link_regs(0, A), - link_regs(1, B), - link_regs(2, C), - link_regs(3, D), -}; - -static const struct dcn10_link_enc_shift le_shift = { - LINK_ENCODER_MASK_SH_LIST_DCN301(__SHIFT),\ - DPCS_DCN2_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn10_link_enc_mask le_mask = { - LINK_ENCODER_MASK_SH_LIST_DCN301(_MASK),\ - DPCS_DCN2_MASK_SH_LIST(_MASK) -}; - -#define panel_cntl_regs(id)\ -[id] = {\ - DCN301_PANEL_CNTL_REG_LIST(id),\ -} - -static const struct dce_panel_cntl_registers panel_cntl_regs[] = { - panel_cntl_regs(0), - panel_cntl_regs(1), -}; - -static const struct dcn301_panel_cntl_shift panel_cntl_shift = { - DCN301_PANEL_CNTL_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn301_panel_cntl_mask panel_cntl_mask = { - DCN301_PANEL_CNTL_MASK_SH_LIST(_MASK) -}; - -#define dpp_regs(id)\ -[id] = {\ - DPP_REG_LIST_DCN30(id),\ -} - -static const struct dcn3_dpp_registers dpp_regs[] = { - dpp_regs(0), - dpp_regs(1), - dpp_regs(2), - dpp_regs(3), -}; - -static const struct dcn3_dpp_shift tf_shift = { - DPP_REG_LIST_SH_MASK_DCN30(__SHIFT) -}; - -static const struct dcn3_dpp_mask tf_mask = { - DPP_REG_LIST_SH_MASK_DCN30(_MASK) -}; - -#define opp_regs(id)\ -[id] = {\ - OPP_REG_LIST_DCN30(id),\ -} - -static const struct dcn20_opp_registers opp_regs[] = { - opp_regs(0), - opp_regs(1), - opp_regs(2), - opp_regs(3), -}; - -static const struct dcn20_opp_shift opp_shift = { - OPP_MASK_SH_LIST_DCN20(__SHIFT) -}; - -static const struct dcn20_opp_mask opp_mask = { - OPP_MASK_SH_LIST_DCN20(_MASK) -}; - -#define aux_engine_regs(id)\ -[id] = {\ - AUX_COMMON_REG_LIST0(id), \ - .AUXN_IMPCAL = 0, \ - .AUXP_IMPCAL = 0, \ - .AUX_RESET_MASK = DP_AUX0_AUX_CONTROL__AUX_RESET_MASK, \ -} - -static const struct dce110_aux_registers aux_engine_regs[] = { - aux_engine_regs(0), - aux_engine_regs(1), - aux_engine_regs(2), - aux_engine_regs(3), -}; - -#define dwbc_regs_dcn3(id)\ -[id] = {\ - DWBC_COMMON_REG_LIST_DCN30(id),\ -} - -static const struct dcn30_dwbc_registers dwbc30_regs[] = { - dwbc_regs_dcn3(0), -}; - -static const struct dcn30_dwbc_shift dwbc30_shift = { - DWBC_COMMON_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dcn30_dwbc_mask dwbc30_mask = { - DWBC_COMMON_MASK_SH_LIST_DCN30(_MASK) -}; - -#define mcif_wb_regs_dcn3(id)\ -[id] = {\ - MCIF_WB_COMMON_REG_LIST_DCN30(id),\ -} - -static const struct dcn30_mmhubbub_registers mcif_wb30_regs[] = { - mcif_wb_regs_dcn3(0) -}; - -static const struct dcn30_mmhubbub_shift mcif_wb30_shift = { - MCIF_WB_COMMON_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dcn30_mmhubbub_mask mcif_wb30_mask = { - MCIF_WB_COMMON_MASK_SH_LIST_DCN30(_MASK) -}; - -#define dsc_regsDCN20(id)\ -[id] = {\ - DSC_REG_LIST_DCN20(id)\ -} - -static const struct dcn20_dsc_registers dsc_regs[] = { - dsc_regsDCN20(0), - dsc_regsDCN20(1), - dsc_regsDCN20(2), -}; - -static const struct dcn20_dsc_shift dsc_shift = { - DSC_REG_LIST_SH_MASK_DCN20(__SHIFT) -}; - -static const struct dcn20_dsc_mask dsc_mask = { - DSC_REG_LIST_SH_MASK_DCN20(_MASK) -}; - -static const struct dcn30_mpc_registers mpc_regs = { - MPC_REG_LIST_DCN3_0(0), - MPC_REG_LIST_DCN3_0(1), - MPC_REG_LIST_DCN3_0(2), - MPC_REG_LIST_DCN3_0(3), - MPC_OUT_MUX_REG_LIST_DCN3_0(0), - MPC_OUT_MUX_REG_LIST_DCN3_0(1), - MPC_OUT_MUX_REG_LIST_DCN3_0(2), - MPC_OUT_MUX_REG_LIST_DCN3_0(3), - MPC_RMU_GLOBAL_REG_LIST_DCN3AG, - MPC_RMU_REG_LIST_DCN3AG(0), - MPC_RMU_REG_LIST_DCN3AG(1), - MPC_DWB_MUX_REG_LIST_DCN3_0(0), -}; - -static const struct dcn30_mpc_shift mpc_shift = { - MPC_COMMON_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dcn30_mpc_mask mpc_mask = { - MPC_COMMON_MASK_SH_LIST_DCN30(_MASK) -}; - -#define optc_regs(id)\ -[id] = {OPTC_COMMON_REG_LIST_DCN3_0(id)} - - -static const struct dcn_optc_registers optc_regs[] = { - optc_regs(0), - optc_regs(1), - optc_regs(2), - optc_regs(3), -}; - -static const struct dcn_optc_shift optc_shift = { - OPTC_COMMON_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dcn_optc_mask optc_mask = { - OPTC_COMMON_MASK_SH_LIST_DCN30(_MASK) -}; - -#define hubp_regs(id)\ -[id] = {\ - HUBP_REG_LIST_DCN30(id)\ -} - -static const struct dcn_hubp2_registers hubp_regs[] = { - hubp_regs(0), - hubp_regs(1), - hubp_regs(2), - hubp_regs(3), -}; - -static const struct dcn_hubp2_shift hubp_shift = { - HUBP_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dcn_hubp2_mask hubp_mask = { - HUBP_MASK_SH_LIST_DCN30(_MASK) -}; - -static const struct dcn_hubbub_registers hubbub_reg = { - HUBBUB_REG_LIST_DCN301(0) -}; - -static const struct dcn_hubbub_shift hubbub_shift = { - HUBBUB_MASK_SH_LIST_DCN301(__SHIFT) -}; - -static const struct dcn_hubbub_mask hubbub_mask = { - HUBBUB_MASK_SH_LIST_DCN301(_MASK) -}; - -static const struct dccg_registers dccg_regs = { - DCCG_REG_LIST_DCN301() -}; - -static const struct dccg_shift dccg_shift = { - DCCG_MASK_SH_LIST_DCN301(__SHIFT) -}; - -static const struct dccg_mask dccg_mask = { - DCCG_MASK_SH_LIST_DCN301(_MASK) -}; - -static const struct dce_hwseq_registers hwseq_reg = { - HWSEQ_DCN301_REG_LIST() -}; - -static const struct dce_hwseq_shift hwseq_shift = { - HWSEQ_DCN301_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_hwseq_mask hwseq_mask = { - HWSEQ_DCN301_MASK_SH_LIST(_MASK) -}; -#define vmid_regs(id)\ -[id] = {\ - DCN20_VMID_REG_LIST(id)\ -} - -static const struct dcn_vmid_registers vmid_regs[] = { - vmid_regs(0), - vmid_regs(1), - vmid_regs(2), - vmid_regs(3), - vmid_regs(4), - vmid_regs(5), - vmid_regs(6), - vmid_regs(7), - vmid_regs(8), - vmid_regs(9), - vmid_regs(10), - vmid_regs(11), - vmid_regs(12), - vmid_regs(13), - vmid_regs(14), - vmid_regs(15) -}; - -static const struct dcn20_vmid_shift vmid_shifts = { - DCN20_VMID_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn20_vmid_mask vmid_masks = { - DCN20_VMID_MASK_SH_LIST(_MASK) -}; - -static struct resource_caps res_cap_dcn301 = { - .num_timing_generator = 4, - .num_opp = 4, - .num_video_plane = 4, - .num_audio = 4, - .num_stream_encoder = 4, - .num_pll = 4, - .num_dwb = 1, - .num_ddc = 4, - .num_vmid = 16, - .num_mpc_3dlut = 2, - .num_dsc = 3, -}; - -static const struct dc_plane_cap plane_cap = { - .type = DC_PLANE_TYPE_DCN_UNIVERSAL, - .per_pixel_alpha = true, - - .pixel_format_support = { - .argb8888 = true, - .nv12 = true, - .fp16 = true, - .p010 = true, - .ayuv = false, - }, - - .max_upscale_factor = { - .argb8888 = 16000, - .nv12 = 16000, - .fp16 = 16000 - }, - - /* 6:1 downscaling ratio: 1000/6 = 166.666 */ - .max_downscale_factor = { - .argb8888 = 167, - .nv12 = 167, - .fp16 = 167 - }, - 64, - 64 -}; - -static const struct dc_debug_options debug_defaults_drv = { - .disable_dmcu = true, - .force_abm_enable = false, - .timing_trace = false, - .clock_trace = true, - .disable_dpp_power_gate = false, - .disable_hubp_power_gate = false, - .disable_clock_gate = true, - .disable_pplib_clock_request = true, - .disable_pplib_wm_range = true, - .pipe_split_policy = MPC_SPLIT_DYNAMIC, - .force_single_disp_pipe_split = false, - .disable_dcc = DCC_ENABLE, - .vsr_support = true, - .performance_trace = false, - .max_downscale_src_width = 7680,/*upto 8K*/ - .scl_reset_length10 = true, - .sanity_checks = false, - .underflow_assert_delay_us = 0xFFFFFFFF, - .dwb_fi_phase = -1, // -1 = disable - .dmub_command_table = true, - .use_max_lb = false, - .exit_idle_opt_for_cursor_updates = true, - .using_dml2 = false, -}; - -static void dcn301_dpp_destroy(struct dpp **dpp) -{ - kfree(TO_DCN20_DPP(*dpp)); - *dpp = NULL; -} - -static struct dpp *dcn301_dpp_create(struct dc_context *ctx, uint32_t inst) -{ - struct dcn3_dpp *dpp = - kzalloc(sizeof(struct dcn3_dpp), GFP_KERNEL); - - if (!dpp) - return NULL; - - if (dpp3_construct(dpp, ctx, inst, - &dpp_regs[inst], &tf_shift, &tf_mask)) - return &dpp->base; - - BREAK_TO_DEBUGGER(); - kfree(dpp); - return NULL; -} -static struct output_pixel_processor *dcn301_opp_create(struct dc_context *ctx, - uint32_t inst) -{ - struct dcn20_opp *opp = - kzalloc(sizeof(struct dcn20_opp), GFP_KERNEL); - - if (!opp) { - BREAK_TO_DEBUGGER(); - return NULL; - } - - dcn20_opp_construct(opp, ctx, inst, - &opp_regs[inst], &opp_shift, &opp_mask); - return &opp->base; -} - -static struct dce_aux *dcn301_aux_engine_create(struct dc_context *ctx, uint32_t inst) -{ - struct aux_engine_dce110 *aux_engine = - kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL); - - if (!aux_engine) - return NULL; - - dce110_aux_engine_construct(aux_engine, ctx, inst, - SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, - &aux_engine_regs[inst], - &aux_mask, - &aux_shift, - ctx->dc->caps.extended_aux_timeout_support); - - return &aux_engine->base; -} -#define i2c_inst_regs(id) { I2C_HW_ENGINE_COMMON_REG_LIST(id) } - -static const struct dce_i2c_registers i2c_hw_regs[] = { - i2c_inst_regs(1), - i2c_inst_regs(2), - i2c_inst_regs(3), - i2c_inst_regs(4), -}; - -static const struct dce_i2c_shift i2c_shifts = { - I2C_COMMON_MASK_SH_LIST_DCN2(__SHIFT) -}; - -static const struct dce_i2c_mask i2c_masks = { - I2C_COMMON_MASK_SH_LIST_DCN2(_MASK) -}; - -static struct dce_i2c_hw *dcn301_i2c_hw_create(struct dc_context *ctx, uint32_t inst) -{ - struct dce_i2c_hw *dce_i2c_hw = - kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL); - - if (!dce_i2c_hw) - return NULL; - - dcn2_i2c_hw_construct(dce_i2c_hw, ctx, inst, - &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks); - - return dce_i2c_hw; -} -static struct mpc *dcn301_mpc_create( - struct dc_context *ctx, - int num_mpcc, - int num_rmu) -{ - struct dcn30_mpc *mpc30 = kzalloc(sizeof(struct dcn30_mpc), - GFP_KERNEL); - - if (!mpc30) - return NULL; - - dcn30_mpc_construct(mpc30, ctx, - &mpc_regs, - &mpc_shift, - &mpc_mask, - num_mpcc, - num_rmu); - - return &mpc30->base; -} - -static struct hubbub *dcn301_hubbub_create(struct dc_context *ctx) -{ - int i; - - struct dcn20_hubbub *hubbub3 = kzalloc(sizeof(struct dcn20_hubbub), - GFP_KERNEL); - - if (!hubbub3) - return NULL; - - hubbub301_construct(hubbub3, ctx, - &hubbub_reg, - &hubbub_shift, - &hubbub_mask); - - - for (i = 0; i < res_cap_dcn301.num_vmid; i++) { - struct dcn20_vmid *vmid = &hubbub3->vmid[i]; - - vmid->ctx = ctx; - - vmid->regs = &vmid_regs[i]; - vmid->shifts = &vmid_shifts; - vmid->masks = &vmid_masks; - } - - hubbub3->num_vmid = res_cap_dcn301.num_vmid; - - return &hubbub3->base; -} - -static struct timing_generator *dcn301_timing_generator_create( - struct dc_context *ctx, uint32_t instance) -{ - struct optc *tgn10 = - kzalloc(sizeof(struct optc), GFP_KERNEL); - - if (!tgn10) - return NULL; - - tgn10->base.inst = instance; - tgn10->base.ctx = ctx; - - tgn10->tg_regs = &optc_regs[instance]; - tgn10->tg_shift = &optc_shift; - tgn10->tg_mask = &optc_mask; - - dcn301_timing_generator_init(tgn10); - - return &tgn10->base; -} - -static const struct encoder_feature_support link_enc_feature = { - .max_hdmi_deep_color = COLOR_DEPTH_121212, - .max_hdmi_pixel_clock = 600000, - .hdmi_ycbcr420_supported = true, - .dp_ycbcr420_supported = true, - .fec_supported = true, - .flags.bits.IS_HBR2_CAPABLE = true, - .flags.bits.IS_HBR3_CAPABLE = true, - .flags.bits.IS_TPS3_CAPABLE = true, - .flags.bits.IS_TPS4_CAPABLE = true -}; - -static struct link_encoder *dcn301_link_encoder_create( - struct dc_context *ctx, - const struct encoder_init_data *enc_init_data) -{ - struct dcn20_link_encoder *enc20 = - kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL); - - if (!enc20) - return NULL; - - dcn301_link_encoder_construct(enc20, - enc_init_data, - &link_enc_feature, - &link_enc_regs[enc_init_data->transmitter], - &link_enc_aux_regs[enc_init_data->channel - 1], - &link_enc_hpd_regs[enc_init_data->hpd_source], - &le_shift, - &le_mask); - - return &enc20->enc10.base; -} - -static struct panel_cntl *dcn301_panel_cntl_create(const struct panel_cntl_init_data *init_data) -{ - struct dcn301_panel_cntl *panel_cntl = - kzalloc(sizeof(struct dcn301_panel_cntl), GFP_KERNEL); - - if (!panel_cntl) - return NULL; - - dcn301_panel_cntl_construct(panel_cntl, - init_data, - &panel_cntl_regs[init_data->inst], - &panel_cntl_shift, - &panel_cntl_mask); - - return &panel_cntl->base; -} - - -#define CTX ctx - -#define REG(reg_name) \ - (DCN_BASE.instance[0].segment[mm ## reg_name ## _BASE_IDX] + mm ## reg_name) - -static uint32_t read_pipe_fuses(struct dc_context *ctx) -{ - uint32_t value = REG_READ(CC_DC_PIPE_DIS); - /* RV1 support max 4 pipes */ - value = value & 0xf; - return value; -} - - -static void read_dce_straps( - struct dc_context *ctx, - struct resource_straps *straps) -{ - generic_reg_get(ctx, mmDC_PINSTRAPS + BASE(mmDC_PINSTRAPS_BASE_IDX), - FN(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO), &straps->dc_pinstraps_audio); - -} - -static struct audio *dcn301_create_audio( - struct dc_context *ctx, unsigned int inst) -{ - return dce_audio_create(ctx, inst, - &audio_regs[inst], &audio_shift, &audio_mask); -} - -static struct vpg *dcn301_vpg_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dcn30_vpg *vpg3 = kzalloc(sizeof(struct dcn30_vpg), GFP_KERNEL); - - if (!vpg3) - return NULL; - - vpg3_construct(vpg3, ctx, inst, - &vpg_regs[inst], - &vpg_shift, - &vpg_mask); - - return &vpg3->base; -} - -static struct afmt *dcn301_afmt_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dcn30_afmt *afmt3 = kzalloc(sizeof(struct dcn30_afmt), GFP_KERNEL); - - if (!afmt3) - return NULL; - - afmt3_construct(afmt3, ctx, inst, - &afmt_regs[inst], - &afmt_shift, - &afmt_mask); - - return &afmt3->base; -} - -static struct stream_encoder *dcn301_stream_encoder_create(enum engine_id eng_id, - struct dc_context *ctx) -{ - struct dcn10_stream_encoder *enc1; - struct vpg *vpg; - struct afmt *afmt; - int vpg_inst; - int afmt_inst; - - /* Mapping of VPG, AFMT, DME register blocks to DIO block instance */ - if (eng_id <= ENGINE_ID_DIGF) { - vpg_inst = eng_id; - afmt_inst = eng_id; - } else - return NULL; - - enc1 = kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL); - vpg = dcn301_vpg_create(ctx, vpg_inst); - afmt = dcn301_afmt_create(ctx, afmt_inst); - - if (!enc1 || !vpg || !afmt) { - kfree(enc1); - kfree(vpg); - kfree(afmt); - return NULL; - } - - dcn30_dio_stream_encoder_construct(enc1, ctx, ctx->dc_bios, - eng_id, vpg, afmt, - &stream_enc_regs[eng_id], - &se_shift, &se_mask); - - return &enc1->base; -} - -static struct dce_hwseq *dcn301_hwseq_create(struct dc_context *ctx) -{ - struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL); - - if (hws) { - hws->ctx = ctx; - hws->regs = &hwseq_reg; - hws->shifts = &hwseq_shift; - hws->masks = &hwseq_mask; - } - return hws; -} -static const struct resource_create_funcs res_create_funcs = { - .read_dce_straps = read_dce_straps, - .create_audio = dcn301_create_audio, - .create_stream_encoder = dcn301_stream_encoder_create, - .create_hwseq = dcn301_hwseq_create, -}; - -static void dcn301_destruct(struct dcn301_resource_pool *pool) -{ - unsigned int i; - - for (i = 0; i < pool->base.stream_enc_count; i++) { - if (pool->base.stream_enc[i] != NULL) { - if (pool->base.stream_enc[i]->vpg != NULL) { - kfree(DCN30_VPG_FROM_VPG(pool->base.stream_enc[i]->vpg)); - pool->base.stream_enc[i]->vpg = NULL; - } - if (pool->base.stream_enc[i]->afmt != NULL) { - kfree(DCN30_AFMT_FROM_AFMT(pool->base.stream_enc[i]->afmt)); - pool->base.stream_enc[i]->afmt = NULL; - } - kfree(DCN10STRENC_FROM_STRENC(pool->base.stream_enc[i])); - pool->base.stream_enc[i] = NULL; - } - } - - for (i = 0; i < pool->base.res_cap->num_dsc; i++) { - if (pool->base.dscs[i] != NULL) - dcn20_dsc_destroy(&pool->base.dscs[i]); - } - - if (pool->base.mpc != NULL) { - kfree(TO_DCN20_MPC(pool->base.mpc)); - pool->base.mpc = NULL; - } - if (pool->base.hubbub != NULL) { - kfree(pool->base.hubbub); - pool->base.hubbub = NULL; - } - for (i = 0; i < pool->base.pipe_count; i++) { - if (pool->base.dpps[i] != NULL) - dcn301_dpp_destroy(&pool->base.dpps[i]); - - if (pool->base.ipps[i] != NULL) - pool->base.ipps[i]->funcs->ipp_destroy(&pool->base.ipps[i]); - - if (pool->base.hubps[i] != NULL) { - kfree(TO_DCN20_HUBP(pool->base.hubps[i])); - pool->base.hubps[i] = NULL; - } - - if (pool->base.irqs != NULL) { - dal_irq_service_destroy(&pool->base.irqs); - } - } - - for (i = 0; i < pool->base.res_cap->num_ddc; i++) { - if (pool->base.engines[i] != NULL) - dce110_engine_destroy(&pool->base.engines[i]); - if (pool->base.hw_i2cs[i] != NULL) { - kfree(pool->base.hw_i2cs[i]); - pool->base.hw_i2cs[i] = NULL; - } - if (pool->base.sw_i2cs[i] != NULL) { - kfree(pool->base.sw_i2cs[i]); - pool->base.sw_i2cs[i] = NULL; - } - } - - for (i = 0; i < pool->base.res_cap->num_opp; i++) { - if (pool->base.opps[i] != NULL) - pool->base.opps[i]->funcs->opp_destroy(&pool->base.opps[i]); - } - - for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { - if (pool->base.timing_generators[i] != NULL) { - kfree(DCN10TG_FROM_TG(pool->base.timing_generators[i])); - pool->base.timing_generators[i] = NULL; - } - } - - for (i = 0; i < pool->base.res_cap->num_dwb; i++) { - if (pool->base.dwbc[i] != NULL) { - kfree(TO_DCN30_DWBC(pool->base.dwbc[i])); - pool->base.dwbc[i] = NULL; - } - if (pool->base.mcif_wb[i] != NULL) { - kfree(TO_DCN30_MMHUBBUB(pool->base.mcif_wb[i])); - pool->base.mcif_wb[i] = NULL; - } - } - - for (i = 0; i < pool->base.audio_count; i++) { - if (pool->base.audios[i]) - dce_aud_destroy(&pool->base.audios[i]); - } - - for (i = 0; i < pool->base.clk_src_count; i++) { - if (pool->base.clock_sources[i] != NULL) { - dcn20_clock_source_destroy(&pool->base.clock_sources[i]); - pool->base.clock_sources[i] = NULL; - } - } - - for (i = 0; i < pool->base.res_cap->num_mpc_3dlut; i++) { - if (pool->base.mpc_lut[i] != NULL) { - dc_3dlut_func_release(pool->base.mpc_lut[i]); - pool->base.mpc_lut[i] = NULL; - } - if (pool->base.mpc_shaper[i] != NULL) { - dc_transfer_func_release(pool->base.mpc_shaper[i]); - pool->base.mpc_shaper[i] = NULL; - } - } - - if (pool->base.dp_clock_source != NULL) { - dcn20_clock_source_destroy(&pool->base.dp_clock_source); - pool->base.dp_clock_source = NULL; - } - - for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { - if (pool->base.multiple_abms[i] != NULL) - dce_abm_destroy(&pool->base.multiple_abms[i]); - } - - if (pool->base.dccg != NULL) - dcn_dccg_destroy(&pool->base.dccg); -} - -static struct hubp *dcn301_hubp_create(struct dc_context *ctx, uint32_t inst) -{ - struct dcn20_hubp *hubp2 = - kzalloc(sizeof(struct dcn20_hubp), GFP_KERNEL); - - if (!hubp2) - return NULL; - - if (hubp3_construct(hubp2, ctx, inst, - &hubp_regs[inst], &hubp_shift, &hubp_mask)) - return &hubp2->base; - - BREAK_TO_DEBUGGER(); - kfree(hubp2); - return NULL; -} - -static bool dcn301_dwbc_create(struct dc_context *ctx, struct resource_pool *pool) -{ - int i; - uint32_t pipe_count = pool->res_cap->num_dwb; - - for (i = 0; i < pipe_count; i++) { - struct dcn30_dwbc *dwbc30 = kzalloc(sizeof(struct dcn30_dwbc), - GFP_KERNEL); - - if (!dwbc30) { - dm_error("DC: failed to create dwbc30!\n"); - return false; - } - - dcn30_dwbc_construct(dwbc30, ctx, - &dwbc30_regs[i], - &dwbc30_shift, - &dwbc30_mask, - i); - - pool->dwbc[i] = &dwbc30->base; - } - return true; -} - -static bool dcn301_mmhubbub_create(struct dc_context *ctx, struct resource_pool *pool) -{ - int i; - uint32_t pipe_count = pool->res_cap->num_dwb; - - for (i = 0; i < pipe_count; i++) { - struct dcn30_mmhubbub *mcif_wb30 = kzalloc(sizeof(struct dcn30_mmhubbub), - GFP_KERNEL); - - if (!mcif_wb30) { - dm_error("DC: failed to create mcif_wb30!\n"); - return false; - } - - dcn30_mmhubbub_construct(mcif_wb30, ctx, - &mcif_wb30_regs[i], - &mcif_wb30_shift, - &mcif_wb30_mask, - i); - - pool->mcif_wb[i] = &mcif_wb30->base; - } - return true; -} - -static struct display_stream_compressor *dcn301_dsc_create( - struct dc_context *ctx, uint32_t inst) -{ - struct dcn20_dsc *dsc = - kzalloc(sizeof(struct dcn20_dsc), GFP_KERNEL); - - if (!dsc) { - BREAK_TO_DEBUGGER(); - return NULL; - } - - dsc2_construct(dsc, ctx, inst, &dsc_regs[inst], &dsc_shift, &dsc_mask); - return &dsc->base; -} - - -static void dcn301_destroy_resource_pool(struct resource_pool **pool) -{ - struct dcn301_resource_pool *dcn301_pool = TO_DCN301_RES_POOL(*pool); - - dcn301_destruct(dcn301_pool); - kfree(dcn301_pool); - *pool = NULL; -} - -static struct clock_source *dcn301_clock_source_create( - struct dc_context *ctx, - struct dc_bios *bios, - enum clock_source_id id, - const struct dce110_clk_src_regs *regs, - bool dp_clk_src) -{ - struct dce110_clk_src *clk_src = - kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL); - - if (!clk_src) - return NULL; - - if (dcn301_clk_src_construct(clk_src, ctx, bios, id, - regs, &cs_shift, &cs_mask)) { - clk_src->base.dp_clk_src = dp_clk_src; - return &clk_src->base; - } - - kfree(clk_src); - BREAK_TO_DEBUGGER(); - return NULL; -} - -static struct dc_cap_funcs cap_funcs = { - .get_dcc_compression_cap = dcn20_get_dcc_compression_cap -}; - - -static bool is_soc_bounding_box_valid(struct dc *dc) -{ - uint32_t hw_internal_rev = dc->ctx->asic_id.hw_internal_rev; - - if (ASICREV_IS_VANGOGH(hw_internal_rev)) - return true; - - return false; -} - -static bool init_soc_bounding_box(struct dc *dc, - struct dcn301_resource_pool *pool) -{ - struct _vcs_dpi_soc_bounding_box_st *loaded_bb = &dcn3_01_soc; - struct _vcs_dpi_ip_params_st *loaded_ip = &dcn3_01_ip; - - DC_LOGGER_INIT(dc->ctx->logger); - - if (!is_soc_bounding_box_valid(dc)) { - DC_LOG_ERROR("%s: not valid soc bounding box\n", __func__); - return false; - } - - loaded_ip->max_num_otg = pool->base.res_cap->num_timing_generator; - loaded_ip->max_num_dpp = pool->base.pipe_count; - DC_FP_START(); - dcn20_patch_bounding_box(dc, loaded_bb); - DC_FP_END(); - - if (dc->ctx->dc_bios->funcs->get_soc_bb_info) { - struct bp_soc_bb_info bb_info = {0}; - - if (dc->ctx->dc_bios->funcs->get_soc_bb_info(dc->ctx->dc_bios, &bb_info) == BP_RESULT_OK) { - DC_FP_START(); - dcn301_fpu_init_soc_bounding_box(bb_info); - DC_FP_END(); - } - } - - return true; -} - - -static void set_wm_ranges( - struct pp_smu_funcs *pp_smu, - struct _vcs_dpi_soc_bounding_box_st *loaded_bb) -{ - struct pp_smu_wm_range_sets ranges = {0}; - int i; - - ranges.num_reader_wm_sets = 0; - - if (loaded_bb->num_states == 1) { - ranges.reader_wm_sets[0].wm_inst = 0; - ranges.reader_wm_sets[0].min_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN; - ranges.reader_wm_sets[0].max_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX; - ranges.reader_wm_sets[0].min_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN; - ranges.reader_wm_sets[0].max_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX; - - ranges.num_reader_wm_sets = 1; - } else if (loaded_bb->num_states > 1) { - for (i = 0; i < 4 && i < loaded_bb->num_states; i++) { - ranges.reader_wm_sets[i].wm_inst = i; - ranges.reader_wm_sets[i].min_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN; - ranges.reader_wm_sets[i].max_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX; - DC_FP_START(); - dcn301_fpu_set_wm_ranges(i, &ranges, loaded_bb); - DC_FP_END(); - ranges.num_reader_wm_sets = i + 1; - } - - ranges.reader_wm_sets[0].min_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN; - ranges.reader_wm_sets[ranges.num_reader_wm_sets - 1].max_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX; - } - - ranges.num_writer_wm_sets = 1; - - ranges.writer_wm_sets[0].wm_inst = 0; - ranges.writer_wm_sets[0].min_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN; - ranges.writer_wm_sets[0].max_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX; - ranges.writer_wm_sets[0].min_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN; - ranges.writer_wm_sets[0].max_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX; - - /* Notify PP Lib/SMU which Watermarks to use for which clock ranges */ - pp_smu->nv_funcs.set_wm_ranges(&pp_smu->nv_funcs.pp_smu, &ranges); -} - -static void dcn301_calculate_wm_and_dlg( - struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int pipe_cnt, - int vlevel) -{ - DC_FP_START(); - dcn301_calculate_wm_and_dlg_fp(dc, context, pipes, pipe_cnt, vlevel); - DC_FP_END(); -} - -static struct resource_funcs dcn301_res_pool_funcs = { - .destroy = dcn301_destroy_resource_pool, - .link_enc_create = dcn301_link_encoder_create, - .panel_cntl_create = dcn301_panel_cntl_create, - .validate_bandwidth = dcn30_validate_bandwidth, - .calculate_wm_and_dlg = dcn301_calculate_wm_and_dlg, - .update_soc_for_wm_a = dcn30_update_soc_for_wm_a, - .populate_dml_pipes = dcn30_populate_dml_pipes_from_context, - .acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer, - .release_pipe = dcn20_release_pipe, - .add_stream_to_ctx = dcn30_add_stream_to_ctx, - .add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource, - .remove_stream_from_ctx = dcn20_remove_stream_from_ctx, - .populate_dml_writeback_from_context = dcn30_populate_dml_writeback_from_context, - .set_mcif_arb_params = dcn30_set_mcif_arb_params, - .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link, - .acquire_post_bldn_3dlut = dcn30_acquire_post_bldn_3dlut, - .release_post_bldn_3dlut = dcn30_release_post_bldn_3dlut, - .update_bw_bounding_box = dcn301_update_bw_bounding_box, - .patch_unknown_plane_state = dcn20_patch_unknown_plane_state -}; - -static bool dcn301_resource_construct( - uint8_t num_virtual_links, - struct dc *dc, - struct dcn301_resource_pool *pool) -{ - int i, j; - struct dc_context *ctx = dc->ctx; - struct irq_service_init_data init_data; - uint32_t pipe_fuses = read_pipe_fuses(ctx); - uint32_t num_pipes = 0; - - DC_LOGGER_INIT(dc->ctx->logger); - - ctx->dc_bios->regs = &bios_regs; - - if (dc->ctx->asic_id.chip_id == DEVICE_ID_VGH_1435) - res_cap_dcn301.num_pll = 2; - pool->base.res_cap = &res_cap_dcn301; - - pool->base.funcs = &dcn301_res_pool_funcs; - - /************************************************* - * Resource + asic cap harcoding * - *************************************************/ - pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE; - pool->base.pipe_count = pool->base.res_cap->num_timing_generator; - pool->base.mpcc_count = pool->base.res_cap->num_timing_generator; - dc->caps.max_downscale_ratio = 600; - dc->caps.i2c_speed_in_khz = 100; - dc->caps.i2c_speed_in_khz_hdcp = 5; /*1.4 w/a enabled by default*/ - dc->caps.max_cursor_size = 256; - dc->caps.min_horizontal_blanking_period = 80; - dc->caps.dmdata_alloc_size = 2048; - dc->caps.max_slave_planes = 2; - dc->caps.max_slave_yuv_planes = 2; - dc->caps.max_slave_rgb_planes = 2; - dc->caps.is_apu = true; - dc->caps.post_blend_color_processing = true; - dc->caps.force_dp_tps4_for_cp2520 = true; - dc->caps.extended_aux_timeout_support = true; - dc->caps.dmcub_support = true; - - /* Color pipeline capabilities */ - dc->caps.color.dpp.dcn_arch = 1; - dc->caps.color.dpp.input_lut_shared = 0; - dc->caps.color.dpp.icsc = 1; - dc->caps.color.dpp.dgam_ram = 0; // must use gamma_corr - dc->caps.color.dpp.dgam_rom_caps.srgb = 1; - dc->caps.color.dpp.dgam_rom_caps.bt2020 = 1; - dc->caps.color.dpp.dgam_rom_caps.gamma2_2 = 1; - dc->caps.color.dpp.dgam_rom_caps.pq = 1; - dc->caps.color.dpp.dgam_rom_caps.hlg = 1; - dc->caps.color.dpp.post_csc = 1; - dc->caps.color.dpp.gamma_corr = 1; - dc->caps.color.dpp.dgam_rom_for_yuv = 0; - - dc->caps.color.dpp.hw_3d_lut = 1; - dc->caps.color.dpp.ogam_ram = 1; - // no OGAM ROM on DCN301 - dc->caps.color.dpp.ogam_rom_caps.srgb = 0; - dc->caps.color.dpp.ogam_rom_caps.bt2020 = 0; - dc->caps.color.dpp.ogam_rom_caps.gamma2_2 = 0; - dc->caps.color.dpp.ogam_rom_caps.pq = 0; - dc->caps.color.dpp.ogam_rom_caps.hlg = 0; - dc->caps.color.dpp.ocsc = 0; - - dc->caps.color.mpc.gamut_remap = 1; - dc->caps.color.mpc.num_3dluts = pool->base.res_cap->num_mpc_3dlut; //2 - dc->caps.color.mpc.ogam_ram = 1; - dc->caps.color.mpc.ogam_rom_caps.srgb = 0; - dc->caps.color.mpc.ogam_rom_caps.bt2020 = 0; - dc->caps.color.mpc.ogam_rom_caps.gamma2_2 = 0; - dc->caps.color.mpc.ogam_rom_caps.pq = 0; - dc->caps.color.mpc.ogam_rom_caps.hlg = 0; - dc->caps.color.mpc.ocsc = 1; - - dc->caps.dp_hdmi21_pcon_support = true; - - /* read VBIOS LTTPR caps */ - if (ctx->dc_bios->funcs->get_lttpr_caps) { - enum bp_result bp_query_result; - uint8_t is_vbios_lttpr_enable = 0; - - bp_query_result = ctx->dc_bios->funcs->get_lttpr_caps(ctx->dc_bios, &is_vbios_lttpr_enable); - dc->caps.vbios_lttpr_enable = (bp_query_result == BP_RESULT_OK) && !!is_vbios_lttpr_enable; - } - - if (ctx->dc_bios->funcs->get_lttpr_interop) { - enum bp_result bp_query_result; - uint8_t is_vbios_interop_enabled = 0; - - bp_query_result = ctx->dc_bios->funcs->get_lttpr_interop(ctx->dc_bios, &is_vbios_interop_enabled); - dc->caps.vbios_lttpr_aware = (bp_query_result == BP_RESULT_OK) && !!is_vbios_interop_enabled; - } - - if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) - dc->debug = debug_defaults_drv; - - // Init the vm_helper - if (dc->vm_helper) - vm_helper_init(dc->vm_helper, 16); - - /************************************************* - * Create resources * - *************************************************/ - - /* Clock Sources for Pixel Clock*/ - pool->base.clock_sources[DCN301_CLK_SRC_PLL0] = - dcn301_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL0, - &clk_src_regs[0], false); - pool->base.clock_sources[DCN301_CLK_SRC_PLL1] = - dcn301_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL1, - &clk_src_regs[1], false); - pool->base.clock_sources[DCN301_CLK_SRC_PLL2] = - dcn301_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL2, - &clk_src_regs[2], false); - pool->base.clock_sources[DCN301_CLK_SRC_PLL3] = - dcn301_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL3, - &clk_src_regs[3], false); - - pool->base.clk_src_count = DCN301_CLK_SRC_TOTAL; - - /* todo: not reuse phy_pll registers */ - pool->base.dp_clock_source = - dcn301_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_ID_DP_DTO, - &clk_src_regs[0], true); - - for (i = 0; i < pool->base.clk_src_count; i++) { - if (pool->base.clock_sources[i] == NULL) { - dm_error("DC: failed to create clock sources!\n"); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - } - - /* DCCG */ - pool->base.dccg = dccg301_create(ctx, &dccg_regs, &dccg_shift, &dccg_mask); - if (pool->base.dccg == NULL) { - dm_error("DC: failed to create dccg!\n"); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - - init_soc_bounding_box(dc, pool); - - if (!dc->debug.disable_pplib_wm_range && pool->base.pp_smu->nv_funcs.set_wm_ranges) - set_wm_ranges(pool->base.pp_smu, &dcn3_01_soc); - - num_pipes = dcn3_01_ip.max_num_dpp; - - for (i = 0; i < dcn3_01_ip.max_num_dpp; i++) - if (pipe_fuses & 1 << i) - num_pipes--; - dcn3_01_ip.max_num_dpp = num_pipes; - dcn3_01_ip.max_num_otg = num_pipes; - - - dml_init_instance(&dc->dml, &dcn3_01_soc, &dcn3_01_ip, DML_PROJECT_DCN30); - - /* IRQ */ - init_data.ctx = dc->ctx; - pool->base.irqs = dal_irq_service_dcn30_create(&init_data); - if (!pool->base.irqs) - goto create_fail; - - /* HUBBUB */ - pool->base.hubbub = dcn301_hubbub_create(ctx); - if (pool->base.hubbub == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create hubbub!\n"); - goto create_fail; - } - - j = 0; - /* HUBPs, DPPs, OPPs and TGs */ - for (i = 0; i < pool->base.pipe_count; i++) { - - /* if pipe is disabled, skip instance of HW pipe, - * i.e, skip ASIC register instance - */ - if ((pipe_fuses & (1 << i)) != 0) { - DC_LOG_DEBUG("%s: fusing pipe %d\n", __func__, i); - continue; - } - - pool->base.hubps[j] = dcn301_hubp_create(ctx, i); - if (pool->base.hubps[j] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create hubps!\n"); - goto create_fail; - } - - pool->base.dpps[j] = dcn301_dpp_create(ctx, i); - if (pool->base.dpps[j] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create dpps!\n"); - goto create_fail; - } - - pool->base.opps[j] = dcn301_opp_create(ctx, i); - if (pool->base.opps[j] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create output pixel processor!\n"); - goto create_fail; - } - - pool->base.timing_generators[j] = dcn301_timing_generator_create(ctx, i); - if (pool->base.timing_generators[j] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create tg!\n"); - goto create_fail; - } - j++; - } - pool->base.timing_generator_count = j; - pool->base.pipe_count = j; - pool->base.mpcc_count = j; - - /* ABM (or ABMs for NV2x) */ - /* TODO: */ - for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { - pool->base.multiple_abms[i] = dmub_abm_create(ctx, - &abm_regs[i], - &abm_shift, - &abm_mask); - if (pool->base.multiple_abms[i] == NULL) { - dm_error("DC: failed to create abm for pipe %d!\n", i); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - } - - /* MPC and DSC */ - pool->base.mpc = dcn301_mpc_create(ctx, pool->base.mpcc_count, pool->base.res_cap->num_mpc_3dlut); - if (pool->base.mpc == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create mpc!\n"); - goto create_fail; - } - - for (i = 0; i < pool->base.res_cap->num_dsc; i++) { - pool->base.dscs[i] = dcn301_dsc_create(ctx, i); - if (pool->base.dscs[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create display stream compressor %d!\n", i); - goto create_fail; - } - } - - /* DWB and MMHUBBUB */ - if (!dcn301_dwbc_create(ctx, &pool->base)) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create dwbc!\n"); - goto create_fail; - } - - if (!dcn301_mmhubbub_create(ctx, &pool->base)) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create mcif_wb!\n"); - goto create_fail; - } - - /* AUX and I2C */ - for (i = 0; i < pool->base.res_cap->num_ddc; i++) { - pool->base.engines[i] = dcn301_aux_engine_create(ctx, i); - if (pool->base.engines[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC:failed to create aux engine!!\n"); - goto create_fail; - } - pool->base.hw_i2cs[i] = dcn301_i2c_hw_create(ctx, i); - if (pool->base.hw_i2cs[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC:failed to create hw i2c!!\n"); - goto create_fail; - } - pool->base.sw_i2cs[i] = NULL; - } - - /* Audio, Stream Encoders including HPO and virtual, MPC 3D LUTs */ - if (!resource_construct(num_virtual_links, dc, &pool->base, - &res_create_funcs)) - goto create_fail; - - /* HW Sequencer and Plane caps */ - dcn301_hw_sequencer_construct(dc); - - dc->caps.max_planes = pool->base.pipe_count; - - for (i = 0; i < dc->caps.max_planes; ++i) - dc->caps.planes[i] = plane_cap; - - dc->cap_funcs = cap_funcs; - - return true; - -create_fail: - - dcn301_destruct(pool); - - return false; -} - -struct resource_pool *dcn301_create_resource_pool( - const struct dc_init_data *init_data, - struct dc *dc) -{ - struct dcn301_resource_pool *pool = - kzalloc(sizeof(struct dcn301_resource_pool), GFP_KERNEL); - - if (!pool) - return NULL; - - if (dcn301_resource_construct(init_data->num_virtual_links, dc, pool)) - return &pool->base; - - BREAK_TO_DEBUGGER(); - kfree(pool); - return NULL; -} diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.h b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.h deleted file mode 100644 index ae8672680cdd..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright 2020 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef _DCN301_RESOURCE_H_ -#define _DCN301_RESOURCE_H_ - -#include "core_types.h" - -struct dc; -struct resource_pool; -struct _vcs_dpi_display_pipe_params_st; - -extern struct _vcs_dpi_ip_params_st dcn3_01_ip; -extern struct _vcs_dpi_soc_bounding_box_st dcn3_01_soc; - -struct dcn301_resource_pool { - struct resource_pool base; -}; -struct resource_pool *dcn301_create_resource_pool( - const struct dc_init_data *init_data, - struct dc *dc); - -#endif /* _DCN301_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/Makefile b/drivers/gpu/drm/amd/display/dc/dcn302/Makefile index 95b66baf39e9..0fcd03569d74 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn302/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn302/Makefile @@ -5,7 +5,7 @@ # # Makefile for dcn302. -DCN3_02 = dcn302_init.o dcn302_resource.o +DCN3_02 = dcn302_init.o AMD_DAL_DCN3_02 = $(addprefix $(AMDDALPATH)/dc/dcn302/,$(DCN3_02)) diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c deleted file mode 100644 index 63ac984a04f7..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c +++ /dev/null @@ -1,1518 +0,0 @@ -/* - * Copyright 2020 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dcn302_init.h" -#include "dcn302_resource.h" -#include "dcn302_dccg.h" -#include "irq/dcn302/irq_service_dcn302.h" - -#include "dcn30/dcn30_dio_link_encoder.h" -#include "dcn30/dcn30_dio_stream_encoder.h" -#include "dcn30/dcn30_dwb.h" -#include "dcn30/dcn30_dpp.h" -#include "dcn30/dcn30_hubbub.h" -#include "dcn30/dcn30_hubp.h" -#include "dcn30/dcn30_mmhubbub.h" -#include "dcn30/dcn30_mpc.h" -#include "dcn30/dcn30_opp.h" -#include "dcn30/dcn30_optc.h" -#include "dcn30/dcn30_resource.h" - -#include "dcn20/dcn20_dsc.h" -#include "dcn20/dcn20_resource.h" - -#include "dml/dcn30/dcn30_fpu.h" - -#include "dcn10/dcn10_resource.h" - -#include "link.h" -#include "dce/dce_abm.h" -#include "dce/dce_audio.h" -#include "dce/dce_aux.h" -#include "dce/dce_clock_source.h" -#include "dce/dce_hwseq.h" -#include "dce/dce_i2c_hw.h" -#include "dce/dce_panel_cntl.h" -#include "dce/dmub_abm.h" -#include "dce/dmub_psr.h" -#include "clk_mgr.h" - -#include "hw_sequencer_private.h" -#include "reg_helper.h" -#include "resource.h" -#include "vm_helper.h" - -#include "dml/dcn302/dcn302_fpu.h" - -#include "dimgrey_cavefish_ip_offset.h" -#include "dcn/dcn_3_0_2_offset.h" -#include "dcn/dcn_3_0_2_sh_mask.h" -#include "dpcs/dpcs_3_0_0_offset.h" -#include "dpcs/dpcs_3_0_0_sh_mask.h" -#include "nbio/nbio_7_4_offset.h" -#include "amdgpu_socbb.h" - -#define DC_LOGGER \ - dc->ctx->logger -#define DC_LOGGER_INIT(logger) - -static const struct dc_debug_options debug_defaults_drv = { - .disable_dmcu = true, - .force_abm_enable = false, - .timing_trace = false, - .clock_trace = true, - .disable_pplib_clock_request = true, - .pipe_split_policy = MPC_SPLIT_DYNAMIC, - .force_single_disp_pipe_split = false, - .disable_dcc = DCC_ENABLE, - .vsr_support = true, - .performance_trace = false, - .max_downscale_src_width = 7680,/*upto 8K*/ - .disable_pplib_wm_range = false, - .scl_reset_length10 = true, - .sanity_checks = false, - .underflow_assert_delay_us = 0xFFFFFFFF, - .dwb_fi_phase = -1, // -1 = disable, - .dmub_command_table = true, - .use_max_lb = true, - .exit_idle_opt_for_cursor_updates = true, - .enable_legacy_fast_update = false, - .using_dml2 = false, -}; - -static const struct dc_panel_config panel_config_defaults = { - .psr = { - .disable_psr = false, - .disallow_psrsu = false, - .disallow_replay = false, - }, -}; - -enum dcn302_clk_src_array_id { - DCN302_CLK_SRC_PLL0, - DCN302_CLK_SRC_PLL1, - DCN302_CLK_SRC_PLL2, - DCN302_CLK_SRC_PLL3, - DCN302_CLK_SRC_PLL4, - DCN302_CLK_SRC_TOTAL -}; - -static const struct resource_caps res_cap_dcn302 = { - .num_timing_generator = 5, - .num_opp = 5, - .num_video_plane = 5, - .num_audio = 5, - .num_stream_encoder = 5, - .num_dwb = 1, - .num_ddc = 5, - .num_vmid = 16, - .num_mpc_3dlut = 2, - .num_dsc = 5, -}; - -static const struct dc_plane_cap plane_cap = { - .type = DC_PLANE_TYPE_DCN_UNIVERSAL, - .per_pixel_alpha = true, - .pixel_format_support = { - .argb8888 = true, - .nv12 = true, - .fp16 = true, - .p010 = true, - .ayuv = false, - }, - .max_upscale_factor = { - .argb8888 = 16000, - .nv12 = 16000, - .fp16 = 16000 - }, - /* 6:1 downscaling ratio: 1000/6 = 166.666 */ - .max_downscale_factor = { - .argb8888 = 167, - .nv12 = 167, - .fp16 = 167 - }, - 16, - 16 -}; - -/* NBIO */ -#define NBIO_BASE_INNER(seg) \ - NBIO_BASE__INST0_SEG ## seg - -#define NBIO_BASE(seg) \ - NBIO_BASE_INNER(seg) - -#define NBIO_SR(reg_name)\ - .reg_name = NBIO_BASE(mm ## reg_name ## _BASE_IDX) + \ - mm ## reg_name - -/* DCN */ -#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg - -#define BASE(seg) BASE_INNER(seg) - -#define SR(reg_name)\ - .reg_name = BASE(mm ## reg_name ## _BASE_IDX) + mm ## reg_name - -#define SF(reg_name, field_name, post_fix)\ - .field_name = reg_name ## __ ## field_name ## post_fix - -#define SRI(reg_name, block, id)\ - .reg_name = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + mm ## block ## id ## _ ## reg_name - -#define SRI2(reg_name, block, id)\ - .reg_name = BASE(mm ## reg_name ## _BASE_IDX) + mm ## reg_name - -#define SRII(reg_name, block, id)\ - .reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - mm ## block ## id ## _ ## reg_name - -#define DCCG_SRII(reg_name, block, id)\ - .block ## _ ## reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - mm ## block ## id ## _ ## reg_name - -#define VUPDATE_SRII(reg_name, block, id)\ - .reg_name[id] = BASE(mm ## reg_name ## _ ## block ## id ## _BASE_IDX) + \ - mm ## reg_name ## _ ## block ## id - -#define SRII_DWB(reg_name, temp_name, block, id)\ - .reg_name[id] = BASE(mm ## block ## id ## _ ## temp_name ## _BASE_IDX) + \ - mm ## block ## id ## _ ## temp_name - -#define SF_DWB2(reg_name, block, id, field_name, post_fix) \ - .field_name = reg_name ## __ ## field_name ## post_fix - -#define SRII_MPC_RMU(reg_name, block, id)\ - .RMU##_##reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - mm ## block ## id ## _ ## reg_name - -static const struct dcn_hubbub_registers hubbub_reg = { - HUBBUB_REG_LIST_DCN30(0) -}; - -static const struct dcn_hubbub_shift hubbub_shift = { - HUBBUB_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dcn_hubbub_mask hubbub_mask = { - HUBBUB_MASK_SH_LIST_DCN30(_MASK) -}; - -#define vmid_regs(id)\ - [id] = { DCN20_VMID_REG_LIST(id) } - -static const struct dcn_vmid_registers vmid_regs[] = { - vmid_regs(0), - vmid_regs(1), - vmid_regs(2), - vmid_regs(3), - vmid_regs(4), - vmid_regs(5), - vmid_regs(6), - vmid_regs(7), - vmid_regs(8), - vmid_regs(9), - vmid_regs(10), - vmid_regs(11), - vmid_regs(12), - vmid_regs(13), - vmid_regs(14), - vmid_regs(15) -}; - -static const struct dcn20_vmid_shift vmid_shifts = { - DCN20_VMID_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn20_vmid_mask vmid_masks = { - DCN20_VMID_MASK_SH_LIST(_MASK) -}; - -static struct hubbub *dcn302_hubbub_create(struct dc_context *ctx) -{ - int i; - - struct dcn20_hubbub *hubbub3 = kzalloc(sizeof(struct dcn20_hubbub), GFP_KERNEL); - - if (!hubbub3) - return NULL; - - hubbub3_construct(hubbub3, ctx, &hubbub_reg, &hubbub_shift, &hubbub_mask); - - for (i = 0; i < res_cap_dcn302.num_vmid; i++) { - struct dcn20_vmid *vmid = &hubbub3->vmid[i]; - - vmid->ctx = ctx; - - vmid->regs = &vmid_regs[i]; - vmid->shifts = &vmid_shifts; - vmid->masks = &vmid_masks; - } - - return &hubbub3->base; -} - -#define vpg_regs(id)\ - [id] = { VPG_DCN3_REG_LIST(id) } - -static const struct dcn30_vpg_registers vpg_regs[] = { - vpg_regs(0), - vpg_regs(1), - vpg_regs(2), - vpg_regs(3), - vpg_regs(4), - vpg_regs(5) -}; - -static const struct dcn30_vpg_shift vpg_shift = { - DCN3_VPG_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn30_vpg_mask vpg_mask = { - DCN3_VPG_MASK_SH_LIST(_MASK) -}; - -static struct vpg *dcn302_vpg_create(struct dc_context *ctx, uint32_t inst) -{ - struct dcn30_vpg *vpg3 = kzalloc(sizeof(struct dcn30_vpg), GFP_KERNEL); - - if (!vpg3) - return NULL; - - vpg3_construct(vpg3, ctx, inst, &vpg_regs[inst], &vpg_shift, &vpg_mask); - - return &vpg3->base; -} - -#define afmt_regs(id)\ - [id] = { AFMT_DCN3_REG_LIST(id) } - -static const struct dcn30_afmt_registers afmt_regs[] = { - afmt_regs(0), - afmt_regs(1), - afmt_regs(2), - afmt_regs(3), - afmt_regs(4), - afmt_regs(5) -}; - -static const struct dcn30_afmt_shift afmt_shift = { - DCN3_AFMT_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn30_afmt_mask afmt_mask = { - DCN3_AFMT_MASK_SH_LIST(_MASK) -}; - -static struct afmt *dcn302_afmt_create(struct dc_context *ctx, uint32_t inst) -{ - struct dcn30_afmt *afmt3 = kzalloc(sizeof(struct dcn30_afmt), GFP_KERNEL); - - if (!afmt3) - return NULL; - - afmt3_construct(afmt3, ctx, inst, &afmt_regs[inst], &afmt_shift, &afmt_mask); - - return &afmt3->base; -} - -#define audio_regs(id)\ - [id] = { AUD_COMMON_REG_LIST(id) } - -static const struct dce_audio_registers audio_regs[] = { - audio_regs(0), - audio_regs(1), - audio_regs(2), - audio_regs(3), - audio_regs(4), - audio_regs(5), - audio_regs(6) -}; - -#define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\ - SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_INDEX, AZALIA_ENDPOINT_REG_INDEX, mask_sh),\ - SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_DATA, AZALIA_ENDPOINT_REG_DATA, mask_sh),\ - AUD_COMMON_MASK_SH_LIST_BASE(mask_sh) - -static const struct dce_audio_shift audio_shift = { - DCE120_AUD_COMMON_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_audio_mask audio_mask = { - DCE120_AUD_COMMON_MASK_SH_LIST(_MASK) -}; - -static struct audio *dcn302_create_audio(struct dc_context *ctx, unsigned int inst) -{ - return dce_audio_create(ctx, inst, &audio_regs[inst], &audio_shift, &audio_mask); -} - -#define stream_enc_regs(id)\ - [id] = { SE_DCN3_REG_LIST(id) } - -static const struct dcn10_stream_enc_registers stream_enc_regs[] = { - stream_enc_regs(0), - stream_enc_regs(1), - stream_enc_regs(2), - stream_enc_regs(3), - stream_enc_regs(4) -}; - -static const struct dcn10_stream_encoder_shift se_shift = { - SE_COMMON_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dcn10_stream_encoder_mask se_mask = { - SE_COMMON_MASK_SH_LIST_DCN30(_MASK) -}; - -static struct stream_encoder *dcn302_stream_encoder_create(enum engine_id eng_id, struct dc_context *ctx) -{ - struct dcn10_stream_encoder *enc1; - struct vpg *vpg; - struct afmt *afmt; - int vpg_inst; - int afmt_inst; - - /* Mapping of VPG, AFMT, DME register blocks to DIO block instance */ - if (eng_id <= ENGINE_ID_DIGE) { - vpg_inst = eng_id; - afmt_inst = eng_id; - } else - return NULL; - - enc1 = kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL); - vpg = dcn302_vpg_create(ctx, vpg_inst); - afmt = dcn302_afmt_create(ctx, afmt_inst); - - if (!enc1 || !vpg || !afmt) { - kfree(enc1); - kfree(vpg); - kfree(afmt); - return NULL; - } - - dcn30_dio_stream_encoder_construct(enc1, ctx, ctx->dc_bios, eng_id, vpg, afmt, &stream_enc_regs[eng_id], - &se_shift, &se_mask); - - return &enc1->base; -} - -#define clk_src_regs(index, pllid)\ - [index] = { CS_COMMON_REG_LIST_DCN3_02(index, pllid) } - -static const struct dce110_clk_src_regs clk_src_regs[] = { - clk_src_regs(0, A), - clk_src_regs(1, B), - clk_src_regs(2, C), - clk_src_regs(3, D), - clk_src_regs(4, E) -}; - -static const struct dce110_clk_src_shift cs_shift = { - CS_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT) -}; - -static const struct dce110_clk_src_mask cs_mask = { - CS_COMMON_MASK_SH_LIST_DCN2_0(_MASK) -}; - -static struct clock_source *dcn302_clock_source_create(struct dc_context *ctx, struct dc_bios *bios, - enum clock_source_id id, const struct dce110_clk_src_regs *regs, bool dp_clk_src) -{ - struct dce110_clk_src *clk_src = kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL); - - if (!clk_src) - return NULL; - - if (dcn3_clk_src_construct(clk_src, ctx, bios, id, regs, &cs_shift, &cs_mask)) { - clk_src->base.dp_clk_src = dp_clk_src; - return &clk_src->base; - } - - kfree(clk_src); - BREAK_TO_DEBUGGER(); - return NULL; -} - -static const struct dce_hwseq_registers hwseq_reg = { - HWSEQ_DCN302_REG_LIST() -}; - -static const struct dce_hwseq_shift hwseq_shift = { - HWSEQ_DCN302_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_hwseq_mask hwseq_mask = { - HWSEQ_DCN302_MASK_SH_LIST(_MASK) -}; - -static struct dce_hwseq *dcn302_hwseq_create(struct dc_context *ctx) -{ - struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL); - - if (hws) { - hws->ctx = ctx; - hws->regs = &hwseq_reg; - hws->shifts = &hwseq_shift; - hws->masks = &hwseq_mask; - } - return hws; -} - -#define hubp_regs(id)\ - [id] = { HUBP_REG_LIST_DCN30(id) } - -static const struct dcn_hubp2_registers hubp_regs[] = { - hubp_regs(0), - hubp_regs(1), - hubp_regs(2), - hubp_regs(3), - hubp_regs(4) -}; - -static const struct dcn_hubp2_shift hubp_shift = { - HUBP_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dcn_hubp2_mask hubp_mask = { - HUBP_MASK_SH_LIST_DCN30(_MASK) -}; - -static struct hubp *dcn302_hubp_create(struct dc_context *ctx, uint32_t inst) -{ - struct dcn20_hubp *hubp2 = kzalloc(sizeof(struct dcn20_hubp), GFP_KERNEL); - - if (!hubp2) - return NULL; - - if (hubp3_construct(hubp2, ctx, inst, &hubp_regs[inst], &hubp_shift, &hubp_mask)) - return &hubp2->base; - - BREAK_TO_DEBUGGER(); - kfree(hubp2); - return NULL; -} - -#define dpp_regs(id)\ - [id] = { DPP_REG_LIST_DCN30(id) } - -static const struct dcn3_dpp_registers dpp_regs[] = { - dpp_regs(0), - dpp_regs(1), - dpp_regs(2), - dpp_regs(3), - dpp_regs(4) -}; - -static const struct dcn3_dpp_shift tf_shift = { - DPP_REG_LIST_SH_MASK_DCN30(__SHIFT) -}; - -static const struct dcn3_dpp_mask tf_mask = { - DPP_REG_LIST_SH_MASK_DCN30(_MASK) -}; - -static struct dpp *dcn302_dpp_create(struct dc_context *ctx, uint32_t inst) -{ - struct dcn3_dpp *dpp = kzalloc(sizeof(struct dcn3_dpp), GFP_KERNEL); - - if (!dpp) - return NULL; - - if (dpp3_construct(dpp, ctx, inst, &dpp_regs[inst], &tf_shift, &tf_mask)) - return &dpp->base; - - BREAK_TO_DEBUGGER(); - kfree(dpp); - return NULL; -} - -#define opp_regs(id)\ - [id] = { OPP_REG_LIST_DCN30(id) } - -static const struct dcn20_opp_registers opp_regs[] = { - opp_regs(0), - opp_regs(1), - opp_regs(2), - opp_regs(3), - opp_regs(4) -}; - -static const struct dcn20_opp_shift opp_shift = { - OPP_MASK_SH_LIST_DCN20(__SHIFT) -}; - -static const struct dcn20_opp_mask opp_mask = { - OPP_MASK_SH_LIST_DCN20(_MASK) -}; - -static struct output_pixel_processor *dcn302_opp_create(struct dc_context *ctx, uint32_t inst) -{ - struct dcn20_opp *opp = kzalloc(sizeof(struct dcn20_opp), GFP_KERNEL); - - if (!opp) { - BREAK_TO_DEBUGGER(); - return NULL; - } - - dcn20_opp_construct(opp, ctx, inst, &opp_regs[inst], &opp_shift, &opp_mask); - return &opp->base; -} - -#define optc_regs(id)\ - [id] = { OPTC_COMMON_REG_LIST_DCN3_0(id) } - -static const struct dcn_optc_registers optc_regs[] = { - optc_regs(0), - optc_regs(1), - optc_regs(2), - optc_regs(3), - optc_regs(4) -}; - -static const struct dcn_optc_shift optc_shift = { - OPTC_COMMON_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dcn_optc_mask optc_mask = { - OPTC_COMMON_MASK_SH_LIST_DCN30(_MASK) -}; - -static struct timing_generator *dcn302_timing_generator_create(struct dc_context *ctx, uint32_t instance) -{ - struct optc *tgn10 = kzalloc(sizeof(struct optc), GFP_KERNEL); - - if (!tgn10) - return NULL; - - tgn10->base.inst = instance; - tgn10->base.ctx = ctx; - - tgn10->tg_regs = &optc_regs[instance]; - tgn10->tg_shift = &optc_shift; - tgn10->tg_mask = &optc_mask; - - dcn30_timing_generator_init(tgn10); - - return &tgn10->base; -} - -static const struct dcn30_mpc_registers mpc_regs = { - MPC_REG_LIST_DCN3_0(0), - MPC_REG_LIST_DCN3_0(1), - MPC_REG_LIST_DCN3_0(2), - MPC_REG_LIST_DCN3_0(3), - MPC_REG_LIST_DCN3_0(4), - MPC_OUT_MUX_REG_LIST_DCN3_0(0), - MPC_OUT_MUX_REG_LIST_DCN3_0(1), - MPC_OUT_MUX_REG_LIST_DCN3_0(2), - MPC_OUT_MUX_REG_LIST_DCN3_0(3), - MPC_OUT_MUX_REG_LIST_DCN3_0(4), - MPC_RMU_GLOBAL_REG_LIST_DCN3AG, - MPC_RMU_REG_LIST_DCN3AG(0), - MPC_RMU_REG_LIST_DCN3AG(1), - MPC_RMU_REG_LIST_DCN3AG(2), - MPC_DWB_MUX_REG_LIST_DCN3_0(0), -}; - -static const struct dcn30_mpc_shift mpc_shift = { - MPC_COMMON_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dcn30_mpc_mask mpc_mask = { - MPC_COMMON_MASK_SH_LIST_DCN30(_MASK) -}; - -static struct mpc *dcn302_mpc_create(struct dc_context *ctx, int num_mpcc, int num_rmu) -{ - struct dcn30_mpc *mpc30 = kzalloc(sizeof(struct dcn30_mpc), GFP_KERNEL); - - if (!mpc30) - return NULL; - - dcn30_mpc_construct(mpc30, ctx, &mpc_regs, &mpc_shift, &mpc_mask, num_mpcc, num_rmu); - - return &mpc30->base; -} - -#define dsc_regsDCN20(id)\ -[id] = { DSC_REG_LIST_DCN20(id) } - -static const struct dcn20_dsc_registers dsc_regs[] = { - dsc_regsDCN20(0), - dsc_regsDCN20(1), - dsc_regsDCN20(2), - dsc_regsDCN20(3), - dsc_regsDCN20(4) -}; - -static const struct dcn20_dsc_shift dsc_shift = { - DSC_REG_LIST_SH_MASK_DCN20(__SHIFT) -}; - -static const struct dcn20_dsc_mask dsc_mask = { - DSC_REG_LIST_SH_MASK_DCN20(_MASK) -}; - -static struct display_stream_compressor *dcn302_dsc_create(struct dc_context *ctx, uint32_t inst) -{ - struct dcn20_dsc *dsc = kzalloc(sizeof(struct dcn20_dsc), GFP_KERNEL); - - if (!dsc) { - BREAK_TO_DEBUGGER(); - return NULL; - } - - dsc2_construct(dsc, ctx, inst, &dsc_regs[inst], &dsc_shift, &dsc_mask); - return &dsc->base; -} - -#define dwbc_regs_dcn3(id)\ -[id] = { DWBC_COMMON_REG_LIST_DCN30(id) } - -static const struct dcn30_dwbc_registers dwbc30_regs[] = { - dwbc_regs_dcn3(0) -}; - -static const struct dcn30_dwbc_shift dwbc30_shift = { - DWBC_COMMON_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dcn30_dwbc_mask dwbc30_mask = { - DWBC_COMMON_MASK_SH_LIST_DCN30(_MASK) -}; - -static bool dcn302_dwbc_create(struct dc_context *ctx, struct resource_pool *pool) -{ - int i; - uint32_t pipe_count = pool->res_cap->num_dwb; - - for (i = 0; i < pipe_count; i++) { - struct dcn30_dwbc *dwbc30 = kzalloc(sizeof(struct dcn30_dwbc), GFP_KERNEL); - - if (!dwbc30) { - dm_error("DC: failed to create dwbc30!\n"); - return false; - } - - dcn30_dwbc_construct(dwbc30, ctx, &dwbc30_regs[i], &dwbc30_shift, &dwbc30_mask, i); - - pool->dwbc[i] = &dwbc30->base; - } - return true; -} - -#define mcif_wb_regs_dcn3(id)\ -[id] = { MCIF_WB_COMMON_REG_LIST_DCN30(id) } - -static const struct dcn30_mmhubbub_registers mcif_wb30_regs[] = { - mcif_wb_regs_dcn3(0) -}; - -static const struct dcn30_mmhubbub_shift mcif_wb30_shift = { - MCIF_WB_COMMON_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dcn30_mmhubbub_mask mcif_wb30_mask = { - MCIF_WB_COMMON_MASK_SH_LIST_DCN30(_MASK) -}; - -static bool dcn302_mmhubbub_create(struct dc_context *ctx, struct resource_pool *pool) -{ - int i; - uint32_t pipe_count = pool->res_cap->num_dwb; - - for (i = 0; i < pipe_count; i++) { - struct dcn30_mmhubbub *mcif_wb30 = kzalloc(sizeof(struct dcn30_mmhubbub), GFP_KERNEL); - - if (!mcif_wb30) { - dm_error("DC: failed to create mcif_wb30!\n"); - return false; - } - - dcn30_mmhubbub_construct(mcif_wb30, ctx, &mcif_wb30_regs[i], &mcif_wb30_shift, &mcif_wb30_mask, i); - - pool->mcif_wb[i] = &mcif_wb30->base; - } - return true; -} - -#define aux_engine_regs(id)\ -[id] = {\ - AUX_COMMON_REG_LIST0(id), \ - .AUXN_IMPCAL = 0, \ - .AUXP_IMPCAL = 0, \ - .AUX_RESET_MASK = DP_AUX0_AUX_CONTROL__AUX_RESET_MASK, \ -} - -static const struct dce110_aux_registers aux_engine_regs[] = { - aux_engine_regs(0), - aux_engine_regs(1), - aux_engine_regs(2), - aux_engine_regs(3), - aux_engine_regs(4) -}; - -static const struct dce110_aux_registers_shift aux_shift = { - DCN_AUX_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce110_aux_registers_mask aux_mask = { - DCN_AUX_MASK_SH_LIST(_MASK) -}; - -static struct dce_aux *dcn302_aux_engine_create(struct dc_context *ctx, uint32_t inst) -{ - struct aux_engine_dce110 *aux_engine = kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL); - - if (!aux_engine) - return NULL; - - dce110_aux_engine_construct(aux_engine, ctx, inst, SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, - &aux_engine_regs[inst], &aux_mask, &aux_shift, ctx->dc->caps.extended_aux_timeout_support); - - return &aux_engine->base; -} - -#define i2c_inst_regs(id) { I2C_HW_ENGINE_COMMON_REG_LIST(id) } - -static const struct dce_i2c_registers i2c_hw_regs[] = { - i2c_inst_regs(1), - i2c_inst_regs(2), - i2c_inst_regs(3), - i2c_inst_regs(4), - i2c_inst_regs(5) -}; - -static const struct dce_i2c_shift i2c_shifts = { - I2C_COMMON_MASK_SH_LIST_DCN2(__SHIFT) -}; - -static const struct dce_i2c_mask i2c_masks = { - I2C_COMMON_MASK_SH_LIST_DCN2(_MASK) -}; - -static struct dce_i2c_hw *dcn302_i2c_hw_create(struct dc_context *ctx, uint32_t inst) -{ - struct dce_i2c_hw *dce_i2c_hw = kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL); - - if (!dce_i2c_hw) - return NULL; - - dcn2_i2c_hw_construct(dce_i2c_hw, ctx, inst, &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks); - - return dce_i2c_hw; -} - -static const struct encoder_feature_support link_enc_feature = { - .max_hdmi_deep_color = COLOR_DEPTH_121212, - .max_hdmi_pixel_clock = 600000, - .hdmi_ycbcr420_supported = true, - .dp_ycbcr420_supported = true, - .fec_supported = true, - .flags.bits.IS_HBR2_CAPABLE = true, - .flags.bits.IS_HBR3_CAPABLE = true, - .flags.bits.IS_TPS3_CAPABLE = true, - .flags.bits.IS_TPS4_CAPABLE = true -}; - -#define link_regs(id, phyid)\ - [id] = {\ - LE_DCN3_REG_LIST(id), \ - UNIPHY_DCN2_REG_LIST(phyid), \ - DPCS_DCN2_REG_LIST(id), \ - SRI(DP_DPHY_INTERNAL_CTRL, DP, id) \ - } - -static const struct dcn10_link_enc_registers link_enc_regs[] = { - link_regs(0, A), - link_regs(1, B), - link_regs(2, C), - link_regs(3, D), - link_regs(4, E) -}; - -static const struct dcn10_link_enc_shift le_shift = { - LINK_ENCODER_MASK_SH_LIST_DCN30(__SHIFT), - DPCS_DCN2_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn10_link_enc_mask le_mask = { - LINK_ENCODER_MASK_SH_LIST_DCN30(_MASK), - DPCS_DCN2_MASK_SH_LIST(_MASK) -}; - -#define aux_regs(id)\ - [id] = { DCN2_AUX_REG_LIST(id) } - -static const struct dcn10_link_enc_aux_registers link_enc_aux_regs[] = { - aux_regs(0), - aux_regs(1), - aux_regs(2), - aux_regs(3), - aux_regs(4) -}; - -#define hpd_regs(id)\ - [id] = { HPD_REG_LIST(id) } - -static const struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[] = { - hpd_regs(0), - hpd_regs(1), - hpd_regs(2), - hpd_regs(3), - hpd_regs(4) -}; - -static struct link_encoder *dcn302_link_encoder_create( - struct dc_context *ctx, - const struct encoder_init_data *enc_init_data) -{ - struct dcn20_link_encoder *enc20 = kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL); - - if (!enc20) - return NULL; - - dcn30_link_encoder_construct(enc20, enc_init_data, &link_enc_feature, - &link_enc_regs[enc_init_data->transmitter], &link_enc_aux_regs[enc_init_data->channel - 1], - &link_enc_hpd_regs[enc_init_data->hpd_source], &le_shift, &le_mask); - - return &enc20->enc10.base; -} - -static const struct dce_panel_cntl_registers panel_cntl_regs[] = { - { DCN_PANEL_CNTL_REG_LIST() } -}; - -static const struct dce_panel_cntl_shift panel_cntl_shift = { - DCE_PANEL_CNTL_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_panel_cntl_mask panel_cntl_mask = { - DCE_PANEL_CNTL_MASK_SH_LIST(_MASK) -}; - -static struct panel_cntl *dcn302_panel_cntl_create(const struct panel_cntl_init_data *init_data) -{ - struct dce_panel_cntl *panel_cntl = kzalloc(sizeof(struct dce_panel_cntl), GFP_KERNEL); - - if (!panel_cntl) - return NULL; - - dce_panel_cntl_construct(panel_cntl, init_data, &panel_cntl_regs[init_data->inst], - &panel_cntl_shift, &panel_cntl_mask); - - return &panel_cntl->base; -} - -static void read_dce_straps(struct dc_context *ctx, struct resource_straps *straps) -{ - generic_reg_get(ctx, mmDC_PINSTRAPS + BASE(mmDC_PINSTRAPS_BASE_IDX), - FN(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO), &straps->dc_pinstraps_audio); -} - -static const struct resource_create_funcs res_create_funcs = { - .read_dce_straps = read_dce_straps, - .create_audio = dcn302_create_audio, - .create_stream_encoder = dcn302_stream_encoder_create, - .create_hwseq = dcn302_hwseq_create, -}; - -static bool is_soc_bounding_box_valid(struct dc *dc) -{ - uint32_t hw_internal_rev = dc->ctx->asic_id.hw_internal_rev; - - if (ASICREV_IS_DIMGREY_CAVEFISH_P(hw_internal_rev)) - return true; - - return false; -} - -static bool init_soc_bounding_box(struct dc *dc, struct resource_pool *pool) -{ - struct _vcs_dpi_soc_bounding_box_st *loaded_bb = &dcn3_02_soc; - struct _vcs_dpi_ip_params_st *loaded_ip = &dcn3_02_ip; - - DC_LOGGER_INIT(dc->ctx->logger); - - if (!is_soc_bounding_box_valid(dc)) { - DC_LOG_ERROR("%s: not valid soc bounding box\n", __func__); - return false; - } - - loaded_ip->max_num_otg = pool->pipe_count; - loaded_ip->max_num_dpp = pool->pipe_count; - loaded_ip->clamp_min_dcfclk = dc->config.clamp_min_dcfclk; - DC_FP_START(); - dcn20_patch_bounding_box(dc, loaded_bb); - DC_FP_END(); - - if (dc->ctx->dc_bios->funcs->get_soc_bb_info) { - struct bp_soc_bb_info bb_info = { 0 }; - - if (dc->ctx->dc_bios->funcs->get_soc_bb_info( - dc->ctx->dc_bios, &bb_info) == BP_RESULT_OK) { - - DC_FP_START(); - dcn302_fpu_init_soc_bounding_box(bb_info); - DC_FP_END(); - } - } - - return true; -} - -static void dcn302_resource_destruct(struct resource_pool *pool) -{ - unsigned int i; - - for (i = 0; i < pool->stream_enc_count; i++) { - if (pool->stream_enc[i] != NULL) { - if (pool->stream_enc[i]->vpg != NULL) { - kfree(DCN30_VPG_FROM_VPG(pool->stream_enc[i]->vpg)); - pool->stream_enc[i]->vpg = NULL; - } - if (pool->stream_enc[i]->afmt != NULL) { - kfree(DCN30_AFMT_FROM_AFMT(pool->stream_enc[i]->afmt)); - pool->stream_enc[i]->afmt = NULL; - } - kfree(DCN10STRENC_FROM_STRENC(pool->stream_enc[i])); - pool->stream_enc[i] = NULL; - } - } - - for (i = 0; i < pool->res_cap->num_dsc; i++) { - if (pool->dscs[i] != NULL) - dcn20_dsc_destroy(&pool->dscs[i]); - } - - if (pool->mpc != NULL) { - kfree(TO_DCN20_MPC(pool->mpc)); - pool->mpc = NULL; - } - - if (pool->hubbub != NULL) { - kfree(pool->hubbub); - pool->hubbub = NULL; - } - - for (i = 0; i < pool->pipe_count; i++) { - if (pool->dpps[i] != NULL) { - kfree(TO_DCN20_DPP(pool->dpps[i])); - pool->dpps[i] = NULL; - } - - if (pool->hubps[i] != NULL) { - kfree(TO_DCN20_HUBP(pool->hubps[i])); - pool->hubps[i] = NULL; - } - - if (pool->irqs != NULL) - dal_irq_service_destroy(&pool->irqs); - } - - for (i = 0; i < pool->res_cap->num_ddc; i++) { - if (pool->engines[i] != NULL) - dce110_engine_destroy(&pool->engines[i]); - if (pool->hw_i2cs[i] != NULL) { - kfree(pool->hw_i2cs[i]); - pool->hw_i2cs[i] = NULL; - } - if (pool->sw_i2cs[i] != NULL) { - kfree(pool->sw_i2cs[i]); - pool->sw_i2cs[i] = NULL; - } - } - - for (i = 0; i < pool->res_cap->num_opp; i++) { - if (pool->opps[i] != NULL) - pool->opps[i]->funcs->opp_destroy(&pool->opps[i]); - } - - for (i = 0; i < pool->res_cap->num_timing_generator; i++) { - if (pool->timing_generators[i] != NULL) { - kfree(DCN10TG_FROM_TG(pool->timing_generators[i])); - pool->timing_generators[i] = NULL; - } - } - - for (i = 0; i < pool->res_cap->num_dwb; i++) { - if (pool->dwbc[i] != NULL) { - kfree(TO_DCN30_DWBC(pool->dwbc[i])); - pool->dwbc[i] = NULL; - } - if (pool->mcif_wb[i] != NULL) { - kfree(TO_DCN30_MMHUBBUB(pool->mcif_wb[i])); - pool->mcif_wb[i] = NULL; - } - } - - for (i = 0; i < pool->audio_count; i++) { - if (pool->audios[i]) - dce_aud_destroy(&pool->audios[i]); - } - - for (i = 0; i < pool->clk_src_count; i++) { - if (pool->clock_sources[i] != NULL) - dcn20_clock_source_destroy(&pool->clock_sources[i]); - } - - if (pool->dp_clock_source != NULL) - dcn20_clock_source_destroy(&pool->dp_clock_source); - - for (i = 0; i < pool->res_cap->num_mpc_3dlut; i++) { - if (pool->mpc_lut[i] != NULL) { - dc_3dlut_func_release(pool->mpc_lut[i]); - pool->mpc_lut[i] = NULL; - } - if (pool->mpc_shaper[i] != NULL) { - dc_transfer_func_release(pool->mpc_shaper[i]); - pool->mpc_shaper[i] = NULL; - } - } - - for (i = 0; i < pool->pipe_count; i++) { - if (pool->multiple_abms[i] != NULL) - dce_abm_destroy(&pool->multiple_abms[i]); - } - - if (pool->psr != NULL) - dmub_psr_destroy(&pool->psr); - - if (pool->dccg != NULL) - dcn_dccg_destroy(&pool->dccg); - - if (pool->oem_device != NULL) { - struct dc *dc = pool->oem_device->ctx->dc; - - dc->link_srv->destroy_ddc_service(&pool->oem_device); - } -} - -static void dcn302_destroy_resource_pool(struct resource_pool **pool) -{ - dcn302_resource_destruct(*pool); - kfree(*pool); - *pool = NULL; -} - -void dcn302_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) -{ - DC_FP_START(); - dcn302_fpu_update_bw_bounding_box(dc, bw_params); - DC_FP_END(); -} - -static void dcn302_get_panel_config_defaults(struct dc_panel_config *panel_config) -{ - *panel_config = panel_config_defaults; -} - -static struct resource_funcs dcn302_res_pool_funcs = { - .destroy = dcn302_destroy_resource_pool, - .link_enc_create = dcn302_link_encoder_create, - .panel_cntl_create = dcn302_panel_cntl_create, - .validate_bandwidth = dcn30_validate_bandwidth, - .calculate_wm_and_dlg = dcn30_calculate_wm_and_dlg, - .update_soc_for_wm_a = dcn30_update_soc_for_wm_a, - .populate_dml_pipes = dcn30_populate_dml_pipes_from_context, - .acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer, - .release_pipe = dcn20_release_pipe, - .add_stream_to_ctx = dcn30_add_stream_to_ctx, - .add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource, - .remove_stream_from_ctx = dcn20_remove_stream_from_ctx, - .populate_dml_writeback_from_context = dcn30_populate_dml_writeback_from_context, - .set_mcif_arb_params = dcn30_set_mcif_arb_params, - .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link, - .acquire_post_bldn_3dlut = dcn30_acquire_post_bldn_3dlut, - .release_post_bldn_3dlut = dcn30_release_post_bldn_3dlut, - .update_bw_bounding_box = dcn302_update_bw_bounding_box, - .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, - .get_panel_config_defaults = dcn302_get_panel_config_defaults, -}; - -static struct dc_cap_funcs cap_funcs = { - .get_dcc_compression_cap = dcn20_get_dcc_compression_cap -}; - -static const struct bios_registers bios_regs = { - NBIO_SR(BIOS_SCRATCH_3), - NBIO_SR(BIOS_SCRATCH_6) -}; - -static const struct dccg_registers dccg_regs = { - DCCG_REG_LIST_DCN3_02() -}; - -static const struct dccg_shift dccg_shift = { - DCCG_MASK_SH_LIST_DCN3_02(__SHIFT) -}; - -static const struct dccg_mask dccg_mask = { - DCCG_MASK_SH_LIST_DCN3_02(_MASK) -}; - -#define abm_regs(id)\ - [id] = { ABM_DCN302_REG_LIST(id) } - -static const struct dce_abm_registers abm_regs[] = { - abm_regs(0), - abm_regs(1), - abm_regs(2), - abm_regs(3), - abm_regs(4) -}; - -static const struct dce_abm_shift abm_shift = { - ABM_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dce_abm_mask abm_mask = { - ABM_MASK_SH_LIST_DCN30(_MASK) -}; - -static bool dcn302_resource_construct( - uint8_t num_virtual_links, - struct dc *dc, - struct resource_pool *pool) -{ - int i; - struct dc_context *ctx = dc->ctx; - struct irq_service_init_data init_data; - struct ddc_service_init_data ddc_init_data = {0}; - - ctx->dc_bios->regs = &bios_regs; - - pool->res_cap = &res_cap_dcn302; - - pool->funcs = &dcn302_res_pool_funcs; - - /************************************************* - * Resource + asic cap harcoding * - *************************************************/ - pool->underlay_pipe_index = NO_UNDERLAY_PIPE; - pool->pipe_count = pool->res_cap->num_timing_generator; - pool->mpcc_count = pool->res_cap->num_timing_generator; - dc->caps.max_downscale_ratio = 600; - dc->caps.i2c_speed_in_khz = 100; - dc->caps.i2c_speed_in_khz_hdcp = 5; /*1.4 w/a applied by derfault*/ - dc->caps.max_cursor_size = 256; - dc->caps.min_horizontal_blanking_period = 80; - dc->caps.dmdata_alloc_size = 2048; - dc->caps.mall_size_per_mem_channel = 4; - /* total size = mall per channel * num channels * 1024 * 1024 */ - dc->caps.mall_size_total = dc->caps.mall_size_per_mem_channel * dc->ctx->dc_bios->vram_info.num_chans * 1048576; - dc->caps.cursor_cache_size = dc->caps.max_cursor_size * dc->caps.max_cursor_size * 8; - dc->caps.max_slave_planes = 2; - dc->caps.max_slave_yuv_planes = 2; - dc->caps.max_slave_rgb_planes = 2; - dc->caps.post_blend_color_processing = true; - dc->caps.force_dp_tps4_for_cp2520 = true; - dc->caps.extended_aux_timeout_support = true; - dc->caps.dmcub_support = true; - dc->caps.max_v_total = (1 << 15) - 1; - - /* Color pipeline capabilities */ - dc->caps.color.dpp.dcn_arch = 1; - dc->caps.color.dpp.input_lut_shared = 0; - dc->caps.color.dpp.icsc = 1; - dc->caps.color.dpp.dgam_ram = 0; // must use gamma_corr - dc->caps.color.dpp.dgam_rom_caps.srgb = 1; - dc->caps.color.dpp.dgam_rom_caps.bt2020 = 1; - dc->caps.color.dpp.dgam_rom_caps.gamma2_2 = 1; - dc->caps.color.dpp.dgam_rom_caps.pq = 1; - dc->caps.color.dpp.dgam_rom_caps.hlg = 1; - dc->caps.color.dpp.post_csc = 1; - dc->caps.color.dpp.gamma_corr = 1; - dc->caps.color.dpp.dgam_rom_for_yuv = 0; - - dc->caps.color.dpp.hw_3d_lut = 1; - dc->caps.color.dpp.ogam_ram = 1; - // no OGAM ROM on DCN3 - dc->caps.color.dpp.ogam_rom_caps.srgb = 0; - dc->caps.color.dpp.ogam_rom_caps.bt2020 = 0; - dc->caps.color.dpp.ogam_rom_caps.gamma2_2 = 0; - dc->caps.color.dpp.ogam_rom_caps.pq = 0; - dc->caps.color.dpp.ogam_rom_caps.hlg = 0; - dc->caps.color.dpp.ocsc = 0; - - dc->caps.color.mpc.gamut_remap = 1; - dc->caps.color.mpc.num_3dluts = pool->res_cap->num_mpc_3dlut; //3 - dc->caps.color.mpc.ogam_ram = 1; - dc->caps.color.mpc.ogam_rom_caps.srgb = 0; - dc->caps.color.mpc.ogam_rom_caps.bt2020 = 0; - dc->caps.color.mpc.ogam_rom_caps.gamma2_2 = 0; - dc->caps.color.mpc.ogam_rom_caps.pq = 0; - dc->caps.color.mpc.ogam_rom_caps.hlg = 0; - dc->caps.color.mpc.ocsc = 1; - - dc->caps.dp_hdmi21_pcon_support = true; - - /* read VBIOS LTTPR caps */ - if (ctx->dc_bios->funcs->get_lttpr_caps) { - enum bp_result bp_query_result; - uint8_t is_vbios_lttpr_enable = 0; - - bp_query_result = ctx->dc_bios->funcs->get_lttpr_caps(ctx->dc_bios, &is_vbios_lttpr_enable); - dc->caps.vbios_lttpr_enable = (bp_query_result == BP_RESULT_OK) && !!is_vbios_lttpr_enable; - } - - if (ctx->dc_bios->funcs->get_lttpr_interop) { - enum bp_result bp_query_result; - uint8_t is_vbios_interop_enabled = 0; - - bp_query_result = ctx->dc_bios->funcs->get_lttpr_interop(ctx->dc_bios, - &is_vbios_interop_enabled); - dc->caps.vbios_lttpr_aware = (bp_query_result == BP_RESULT_OK) && !!is_vbios_interop_enabled; - } - - if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) - dc->debug = debug_defaults_drv; - - // Init the vm_helper - if (dc->vm_helper) - vm_helper_init(dc->vm_helper, 16); - - /************************************************* - * Create resources * - *************************************************/ - - /* Clock Sources for Pixel Clock*/ - pool->clock_sources[DCN302_CLK_SRC_PLL0] = - dcn302_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL0, - &clk_src_regs[0], false); - pool->clock_sources[DCN302_CLK_SRC_PLL1] = - dcn302_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL1, - &clk_src_regs[1], false); - pool->clock_sources[DCN302_CLK_SRC_PLL2] = - dcn302_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL2, - &clk_src_regs[2], false); - pool->clock_sources[DCN302_CLK_SRC_PLL3] = - dcn302_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL3, - &clk_src_regs[3], false); - pool->clock_sources[DCN302_CLK_SRC_PLL4] = - dcn302_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL4, - &clk_src_regs[4], false); - - pool->clk_src_count = DCN302_CLK_SRC_TOTAL; - - /* todo: not reuse phy_pll registers */ - pool->dp_clock_source = - dcn302_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_ID_DP_DTO, - &clk_src_regs[0], true); - - for (i = 0; i < pool->clk_src_count; i++) { - if (pool->clock_sources[i] == NULL) { - dm_error("DC: failed to create clock sources!\n"); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - } - - /* DCCG */ - pool->dccg = dccg30_create(ctx, &dccg_regs, &dccg_shift, &dccg_mask); - if (pool->dccg == NULL) { - dm_error("DC: failed to create dccg!\n"); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - - /* PP Lib and SMU interfaces */ - init_soc_bounding_box(dc, pool); - - /* DML */ - dml_init_instance(&dc->dml, &dcn3_02_soc, &dcn3_02_ip, DML_PROJECT_DCN30); - - /* IRQ */ - init_data.ctx = dc->ctx; - pool->irqs = dal_irq_service_dcn302_create(&init_data); - if (!pool->irqs) - goto create_fail; - - /* HUBBUB */ - pool->hubbub = dcn302_hubbub_create(ctx); - if (pool->hubbub == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create hubbub!\n"); - goto create_fail; - } - - /* HUBPs, DPPs, OPPs and TGs */ - for (i = 0; i < pool->pipe_count; i++) { - pool->hubps[i] = dcn302_hubp_create(ctx, i); - if (pool->hubps[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create hubps!\n"); - goto create_fail; - } - - pool->dpps[i] = dcn302_dpp_create(ctx, i); - if (pool->dpps[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create dpps!\n"); - goto create_fail; - } - } - - for (i = 0; i < pool->res_cap->num_opp; i++) { - pool->opps[i] = dcn302_opp_create(ctx, i); - if (pool->opps[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create output pixel processor!\n"); - goto create_fail; - } - } - - for (i = 0; i < pool->res_cap->num_timing_generator; i++) { - pool->timing_generators[i] = dcn302_timing_generator_create(ctx, i); - if (pool->timing_generators[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create tg!\n"); - goto create_fail; - } - } - pool->timing_generator_count = i; - - /* PSR */ - pool->psr = dmub_psr_create(ctx); - if (pool->psr == NULL) { - dm_error("DC: failed to create psr!\n"); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - - /* ABMs */ - for (i = 0; i < pool->res_cap->num_timing_generator; i++) { - pool->multiple_abms[i] = dmub_abm_create(ctx, &abm_regs[i], &abm_shift, &abm_mask); - if (pool->multiple_abms[i] == NULL) { - dm_error("DC: failed to create abm for pipe %d!\n", i); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - } - - /* MPC and DSC */ - pool->mpc = dcn302_mpc_create(ctx, pool->mpcc_count, pool->res_cap->num_mpc_3dlut); - if (pool->mpc == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create mpc!\n"); - goto create_fail; - } - - for (i = 0; i < pool->res_cap->num_dsc; i++) { - pool->dscs[i] = dcn302_dsc_create(ctx, i); - if (pool->dscs[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create display stream compressor %d!\n", i); - goto create_fail; - } - } - - /* DWB and MMHUBBUB */ - if (!dcn302_dwbc_create(ctx, pool)) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create dwbc!\n"); - goto create_fail; - } - - if (!dcn302_mmhubbub_create(ctx, pool)) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create mcif_wb!\n"); - goto create_fail; - } - - /* AUX and I2C */ - for (i = 0; i < pool->res_cap->num_ddc; i++) { - pool->engines[i] = dcn302_aux_engine_create(ctx, i); - if (pool->engines[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC:failed to create aux engine!!\n"); - goto create_fail; - } - pool->hw_i2cs[i] = dcn302_i2c_hw_create(ctx, i); - if (pool->hw_i2cs[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC:failed to create hw i2c!!\n"); - goto create_fail; - } - pool->sw_i2cs[i] = NULL; - } - - /* Audio, Stream Encoders including HPO and virtual, MPC 3D LUTs */ - if (!resource_construct(num_virtual_links, dc, pool, - &res_create_funcs)) - goto create_fail; - - /* HW Sequencer and Plane caps */ - dcn302_hw_sequencer_construct(dc); - - dc->caps.max_planes = pool->pipe_count; - - for (i = 0; i < dc->caps.max_planes; ++i) - dc->caps.planes[i] = plane_cap; - - dc->cap_funcs = cap_funcs; - - if (dc->ctx->dc_bios->fw_info.oem_i2c_present) { - ddc_init_data.ctx = dc->ctx; - ddc_init_data.link = NULL; - ddc_init_data.id.id = dc->ctx->dc_bios->fw_info.oem_i2c_obj_id; - ddc_init_data.id.enum_id = 0; - ddc_init_data.id.type = OBJECT_TYPE_GENERIC; - pool->oem_device = dc->link_srv->create_ddc_service(&ddc_init_data); - } else { - pool->oem_device = NULL; - } - - return true; - -create_fail: - - dcn302_resource_destruct(pool); - - return false; -} - -struct resource_pool *dcn302_create_resource_pool(const struct dc_init_data *init_data, struct dc *dc) -{ - struct resource_pool *pool = kzalloc(sizeof(struct resource_pool), GFP_KERNEL); - - if (!pool) - return NULL; - - if (dcn302_resource_construct(init_data->num_virtual_links, dc, pool)) - return pool; - - BREAK_TO_DEBUGGER(); - kfree(pool); - return NULL; -} diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.h b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.h deleted file mode 100644 index 9f24e73b92b3..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.h +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright 2020 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef _DCN302_RESOURCE_H_ -#define _DCN302_RESOURCE_H_ - -#include "core_types.h" - -extern struct _vcs_dpi_ip_params_st dcn3_02_ip; -extern struct _vcs_dpi_soc_bounding_box_st dcn3_02_soc; - -struct resource_pool *dcn302_create_resource_pool(const struct dc_init_data *init_data, struct dc *dc); - -void dcn302_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params); - -#endif /* _DCN302_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/Makefile b/drivers/gpu/drm/amd/display/dc/dcn303/Makefile index d7b3ad780e5d..a954e316aca2 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn303/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn303/Makefile @@ -6,7 +6,7 @@ # # Makefile for dcn303. -DCN3_03 = dcn303_init.o dcn303_resource.o +DCN3_03 = dcn303_init.o AMD_DAL_DCN3_03 = $(addprefix $(AMDDALPATH)/dc/dcn303/,$(DCN3_03)) diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c deleted file mode 100644 index 49cb7fde416a..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c +++ /dev/null @@ -1,1448 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright (C) 2021 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - */ - -#include "dcn303_init.h" -#include "dcn303_resource.h" -#include "dcn303_dccg.h" -#include "irq/dcn303/irq_service_dcn303.h" - -#include "dcn30/dcn30_dio_link_encoder.h" -#include "dcn30/dcn30_dio_stream_encoder.h" -#include "dcn30/dcn30_dpp.h" -#include "dcn30/dcn30_dwb.h" -#include "dcn30/dcn30_hubbub.h" -#include "dcn30/dcn30_hubp.h" -#include "dcn30/dcn30_mmhubbub.h" -#include "dcn30/dcn30_mpc.h" -#include "dcn30/dcn30_opp.h" -#include "dcn30/dcn30_optc.h" -#include "dcn30/dcn30_resource.h" - -#include "dcn20/dcn20_dsc.h" -#include "dcn20/dcn20_resource.h" - -#include "dml/dcn30/dcn30_fpu.h" - -#include "dcn10/dcn10_resource.h" - -#include "link.h" - -#include "dce/dce_abm.h" -#include "dce/dce_audio.h" -#include "dce/dce_aux.h" -#include "dce/dce_clock_source.h" -#include "dce/dce_hwseq.h" -#include "dce/dce_i2c_hw.h" -#include "dce/dce_panel_cntl.h" -#include "dce/dmub_abm.h" -#include "dce/dmub_psr.h" -#include "clk_mgr.h" - -#include "hw_sequencer_private.h" -#include "reg_helper.h" -#include "resource.h" -#include "vm_helper.h" - -#include "sienna_cichlid_ip_offset.h" -#include "dcn/dcn_3_0_3_offset.h" -#include "dcn/dcn_3_0_3_sh_mask.h" -#include "dpcs/dpcs_3_0_3_offset.h" -#include "dpcs/dpcs_3_0_3_sh_mask.h" -#include "nbio/nbio_2_3_offset.h" - -#include "dml/dcn303/dcn303_fpu.h" - -#define DC_LOGGER \ - dc->ctx->logger -#define DC_LOGGER_INIT(logger) - - -static const struct dc_debug_options debug_defaults_drv = { - .disable_dmcu = true, - .force_abm_enable = false, - .timing_trace = false, - .clock_trace = true, - .disable_pplib_clock_request = true, - .pipe_split_policy = MPC_SPLIT_AVOID, - .force_single_disp_pipe_split = false, - .disable_dcc = DCC_ENABLE, - .vsr_support = true, - .performance_trace = false, - .max_downscale_src_width = 7680,/*upto 8K*/ - .disable_pplib_wm_range = false, - .scl_reset_length10 = true, - .sanity_checks = false, - .underflow_assert_delay_us = 0xFFFFFFFF, - .dwb_fi_phase = -1, // -1 = disable, - .dmub_command_table = true, - .exit_idle_opt_for_cursor_updates = true, - .disable_idle_power_optimizations = false, - .using_dml2 = false, -}; - -static const struct dc_panel_config panel_config_defaults = { - .psr = { - .disable_psr = false, - .disallow_psrsu = false, - .disallow_replay = false, - }, -}; - -enum dcn303_clk_src_array_id { - DCN303_CLK_SRC_PLL0, - DCN303_CLK_SRC_PLL1, - DCN303_CLK_SRC_TOTAL -}; - -static const struct resource_caps res_cap_dcn303 = { - .num_timing_generator = 2, - .num_opp = 2, - .num_video_plane = 2, - .num_audio = 2, - .num_stream_encoder = 2, - .num_dwb = 1, - .num_ddc = 2, - .num_vmid = 16, - .num_mpc_3dlut = 1, - .num_dsc = 2, -}; - -static const struct dc_plane_cap plane_cap = { - .type = DC_PLANE_TYPE_DCN_UNIVERSAL, - .per_pixel_alpha = true, - .pixel_format_support = { - .argb8888 = true, - .nv12 = true, - .fp16 = true, - .p010 = true, - .ayuv = false, - }, - .max_upscale_factor = { - .argb8888 = 16000, - .nv12 = 16000, - .fp16 = 16000 - }, - .max_downscale_factor = { - .argb8888 = 600, - .nv12 = 600, - .fp16 = 600 - }, - 16, - 16 -}; - -/* NBIO */ -#define NBIO_BASE_INNER(seg) \ - NBIO_BASE__INST0_SEG ## seg - -#define NBIO_BASE(seg) \ - NBIO_BASE_INNER(seg) - -#define NBIO_SR(reg_name)\ - .reg_name = NBIO_BASE(mm ## reg_name ## _BASE_IDX) + \ - mm ## reg_name - -/* DCN */ -#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg - -#define BASE(seg) BASE_INNER(seg) - -#define SR(reg_name)\ - .reg_name = BASE(mm ## reg_name ## _BASE_IDX) + mm ## reg_name - -#define SF(reg_name, field_name, post_fix)\ - .field_name = reg_name ## __ ## field_name ## post_fix - -#define SRI(reg_name, block, id)\ - .reg_name = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + mm ## block ## id ## _ ## reg_name - -#define SRI2(reg_name, block, id)\ - .reg_name = BASE(mm ## reg_name ## _BASE_IDX) + mm ## reg_name - -#define SRII(reg_name, block, id)\ - .reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - mm ## block ## id ## _ ## reg_name - -#define DCCG_SRII(reg_name, block, id)\ - .block ## _ ## reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - mm ## block ## id ## _ ## reg_name - -#define VUPDATE_SRII(reg_name, block, id)\ - .reg_name[id] = BASE(mm ## reg_name ## _ ## block ## id ## _BASE_IDX) + \ - mm ## reg_name ## _ ## block ## id - -#define SRII_DWB(reg_name, temp_name, block, id)\ - .reg_name[id] = BASE(mm ## block ## id ## _ ## temp_name ## _BASE_IDX) + \ - mm ## block ## id ## _ ## temp_name - -#define SF_DWB2(reg_name, block, id, field_name, post_fix) \ - .field_name = reg_name ## __ ## field_name ## post_fix - -#define SRII_MPC_RMU(reg_name, block, id)\ - .RMU##_##reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - mm ## block ## id ## _ ## reg_name - -static const struct dcn_hubbub_registers hubbub_reg = { - HUBBUB_REG_LIST_DCN30(0) -}; - -static const struct dcn_hubbub_shift hubbub_shift = { - HUBBUB_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dcn_hubbub_mask hubbub_mask = { - HUBBUB_MASK_SH_LIST_DCN30(_MASK) -}; - -#define vmid_regs(id)\ - [id] = { DCN20_VMID_REG_LIST(id) } - -static const struct dcn_vmid_registers vmid_regs[] = { - vmid_regs(0), - vmid_regs(1), - vmid_regs(2), - vmid_regs(3), - vmid_regs(4), - vmid_regs(5), - vmid_regs(6), - vmid_regs(7), - vmid_regs(8), - vmid_regs(9), - vmid_regs(10), - vmid_regs(11), - vmid_regs(12), - vmid_regs(13), - vmid_regs(14), - vmid_regs(15) -}; - -static const struct dcn20_vmid_shift vmid_shifts = { - DCN20_VMID_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn20_vmid_mask vmid_masks = { - DCN20_VMID_MASK_SH_LIST(_MASK) -}; - -static struct hubbub *dcn303_hubbub_create(struct dc_context *ctx) -{ - int i; - - struct dcn20_hubbub *hubbub3 = kzalloc(sizeof(struct dcn20_hubbub), GFP_KERNEL); - - if (!hubbub3) - return NULL; - - hubbub3_construct(hubbub3, ctx, &hubbub_reg, &hubbub_shift, &hubbub_mask); - - for (i = 0; i < res_cap_dcn303.num_vmid; i++) { - struct dcn20_vmid *vmid = &hubbub3->vmid[i]; - - vmid->ctx = ctx; - - vmid->regs = &vmid_regs[i]; - vmid->shifts = &vmid_shifts; - vmid->masks = &vmid_masks; - } - - return &hubbub3->base; -} - -#define vpg_regs(id)\ - [id] = { VPG_DCN3_REG_LIST(id) } - -static const struct dcn30_vpg_registers vpg_regs[] = { - vpg_regs(0), - vpg_regs(1), - vpg_regs(2) -}; - -static const struct dcn30_vpg_shift vpg_shift = { - DCN3_VPG_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn30_vpg_mask vpg_mask = { - DCN3_VPG_MASK_SH_LIST(_MASK) -}; - -static struct vpg *dcn303_vpg_create(struct dc_context *ctx, uint32_t inst) -{ - struct dcn30_vpg *vpg3 = kzalloc(sizeof(struct dcn30_vpg), GFP_KERNEL); - - if (!vpg3) - return NULL; - - vpg3_construct(vpg3, ctx, inst, &vpg_regs[inst], &vpg_shift, &vpg_mask); - - return &vpg3->base; -} - -#define afmt_regs(id)\ - [id] = { AFMT_DCN3_REG_LIST(id) } - -static const struct dcn30_afmt_registers afmt_regs[] = { - afmt_regs(0), - afmt_regs(1), - afmt_regs(2) -}; - -static const struct dcn30_afmt_shift afmt_shift = { - DCN3_AFMT_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn30_afmt_mask afmt_mask = { - DCN3_AFMT_MASK_SH_LIST(_MASK) -}; - -static struct afmt *dcn303_afmt_create(struct dc_context *ctx, uint32_t inst) -{ - struct dcn30_afmt *afmt3 = kzalloc(sizeof(struct dcn30_afmt), GFP_KERNEL); - - if (!afmt3) - return NULL; - - afmt3_construct(afmt3, ctx, inst, &afmt_regs[inst], &afmt_shift, &afmt_mask); - - return &afmt3->base; -} - -#define audio_regs(id)\ - [id] = { AUD_COMMON_REG_LIST(id) } - -static const struct dce_audio_registers audio_regs[] = { - audio_regs(0), - audio_regs(1), - audio_regs(2), - audio_regs(3), - audio_regs(4), - audio_regs(5), - audio_regs(6) -}; - -#define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\ - SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_INDEX, AZALIA_ENDPOINT_REG_INDEX, mask_sh),\ - SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_DATA, AZALIA_ENDPOINT_REG_DATA, mask_sh),\ - AUD_COMMON_MASK_SH_LIST_BASE(mask_sh) - -static const struct dce_audio_shift audio_shift = { - DCE120_AUD_COMMON_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_audio_mask audio_mask = { - DCE120_AUD_COMMON_MASK_SH_LIST(_MASK) -}; - -static struct audio *dcn303_create_audio(struct dc_context *ctx, unsigned int inst) -{ - return dce_audio_create(ctx, inst, &audio_regs[inst], &audio_shift, &audio_mask); -} - -#define stream_enc_regs(id)\ - [id] = { SE_DCN3_REG_LIST(id) } - -static const struct dcn10_stream_enc_registers stream_enc_regs[] = { - stream_enc_regs(0), - stream_enc_regs(1) -}; - -static const struct dcn10_stream_encoder_shift se_shift = { - SE_COMMON_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dcn10_stream_encoder_mask se_mask = { - SE_COMMON_MASK_SH_LIST_DCN30(_MASK) -}; - -static struct stream_encoder *dcn303_stream_encoder_create(enum engine_id eng_id, struct dc_context *ctx) -{ - struct dcn10_stream_encoder *enc1; - struct vpg *vpg; - struct afmt *afmt; - int vpg_inst; - int afmt_inst; - - /* Mapping of VPG, AFMT, DME register blocks to DIO block instance */ - if (eng_id <= ENGINE_ID_DIGB) { - vpg_inst = eng_id; - afmt_inst = eng_id; - } else - return NULL; - - enc1 = kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL); - vpg = dcn303_vpg_create(ctx, vpg_inst); - afmt = dcn303_afmt_create(ctx, afmt_inst); - - if (!enc1 || !vpg || !afmt) { - kfree(enc1); - kfree(vpg); - kfree(afmt); - return NULL; - } - - dcn30_dio_stream_encoder_construct(enc1, ctx, ctx->dc_bios, eng_id, vpg, afmt, &stream_enc_regs[eng_id], - &se_shift, &se_mask); - - return &enc1->base; -} - -#define clk_src_regs(index, pllid)\ - [index] = { CS_COMMON_REG_LIST_DCN3_03(index, pllid) } - -static const struct dce110_clk_src_regs clk_src_regs[] = { - clk_src_regs(0, A), - clk_src_regs(1, B) -}; - -static const struct dce110_clk_src_shift cs_shift = { - CS_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT) -}; - -static const struct dce110_clk_src_mask cs_mask = { - CS_COMMON_MASK_SH_LIST_DCN2_0(_MASK) -}; - -static struct clock_source *dcn303_clock_source_create(struct dc_context *ctx, struct dc_bios *bios, - enum clock_source_id id, const struct dce110_clk_src_regs *regs, bool dp_clk_src) -{ - struct dce110_clk_src *clk_src = kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL); - - if (!clk_src) - return NULL; - - if (dcn3_clk_src_construct(clk_src, ctx, bios, id, regs, &cs_shift, &cs_mask)) { - clk_src->base.dp_clk_src = dp_clk_src; - return &clk_src->base; - } - - kfree(clk_src); - BREAK_TO_DEBUGGER(); - return NULL; -} - -static const struct dce_hwseq_registers hwseq_reg = { - HWSEQ_DCN303_REG_LIST() -}; - -static const struct dce_hwseq_shift hwseq_shift = { - HWSEQ_DCN303_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_hwseq_mask hwseq_mask = { - HWSEQ_DCN303_MASK_SH_LIST(_MASK) -}; - -static struct dce_hwseq *dcn303_hwseq_create(struct dc_context *ctx) -{ - struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL); - - if (hws) { - hws->ctx = ctx; - hws->regs = &hwseq_reg; - hws->shifts = &hwseq_shift; - hws->masks = &hwseq_mask; - } - return hws; -} - -#define hubp_regs(id)\ - [id] = { HUBP_REG_LIST_DCN30(id) } - -static const struct dcn_hubp2_registers hubp_regs[] = { - hubp_regs(0), - hubp_regs(1) -}; - -static const struct dcn_hubp2_shift hubp_shift = { - HUBP_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dcn_hubp2_mask hubp_mask = { - HUBP_MASK_SH_LIST_DCN30(_MASK) -}; - -static struct hubp *dcn303_hubp_create(struct dc_context *ctx, uint32_t inst) -{ - struct dcn20_hubp *hubp2 = kzalloc(sizeof(struct dcn20_hubp), GFP_KERNEL); - - if (!hubp2) - return NULL; - - if (hubp3_construct(hubp2, ctx, inst, &hubp_regs[inst], &hubp_shift, &hubp_mask)) - return &hubp2->base; - - BREAK_TO_DEBUGGER(); - kfree(hubp2); - return NULL; -} - -#define dpp_regs(id)\ - [id] = { DPP_REG_LIST_DCN30(id) } - -static const struct dcn3_dpp_registers dpp_regs[] = { - dpp_regs(0), - dpp_regs(1) -}; - -static const struct dcn3_dpp_shift tf_shift = { - DPP_REG_LIST_SH_MASK_DCN30(__SHIFT) -}; - -static const struct dcn3_dpp_mask tf_mask = { - DPP_REG_LIST_SH_MASK_DCN30(_MASK) -}; - -static struct dpp *dcn303_dpp_create(struct dc_context *ctx, uint32_t inst) -{ - struct dcn3_dpp *dpp = kzalloc(sizeof(struct dcn3_dpp), GFP_KERNEL); - - if (!dpp) - return NULL; - - if (dpp3_construct(dpp, ctx, inst, &dpp_regs[inst], &tf_shift, &tf_mask)) - return &dpp->base; - - BREAK_TO_DEBUGGER(); - kfree(dpp); - return NULL; -} - -#define opp_regs(id)\ - [id] = { OPP_REG_LIST_DCN30(id) } - -static const struct dcn20_opp_registers opp_regs[] = { - opp_regs(0), - opp_regs(1) -}; - -static const struct dcn20_opp_shift opp_shift = { - OPP_MASK_SH_LIST_DCN20(__SHIFT) -}; - -static const struct dcn20_opp_mask opp_mask = { - OPP_MASK_SH_LIST_DCN20(_MASK) -}; - -static struct output_pixel_processor *dcn303_opp_create(struct dc_context *ctx, uint32_t inst) -{ - struct dcn20_opp *opp = kzalloc(sizeof(struct dcn20_opp), GFP_KERNEL); - - if (!opp) { - BREAK_TO_DEBUGGER(); - return NULL; - } - - dcn20_opp_construct(opp, ctx, inst, &opp_regs[inst], &opp_shift, &opp_mask); - return &opp->base; -} - -#define optc_regs(id)\ - [id] = { OPTC_COMMON_REG_LIST_DCN3_0(id) } - -static const struct dcn_optc_registers optc_regs[] = { - optc_regs(0), - optc_regs(1) -}; - -static const struct dcn_optc_shift optc_shift = { - OPTC_COMMON_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dcn_optc_mask optc_mask = { - OPTC_COMMON_MASK_SH_LIST_DCN30(_MASK) -}; - -static struct timing_generator *dcn303_timing_generator_create(struct dc_context *ctx, uint32_t instance) -{ - struct optc *tgn10 = kzalloc(sizeof(struct optc), GFP_KERNEL); - - if (!tgn10) - return NULL; - - tgn10->base.inst = instance; - tgn10->base.ctx = ctx; - - tgn10->tg_regs = &optc_regs[instance]; - tgn10->tg_shift = &optc_shift; - tgn10->tg_mask = &optc_mask; - - dcn30_timing_generator_init(tgn10); - - return &tgn10->base; -} - -static const struct dcn30_mpc_registers mpc_regs = { - MPC_REG_LIST_DCN3_0(0), - MPC_REG_LIST_DCN3_0(1), - MPC_OUT_MUX_REG_LIST_DCN3_0(0), - MPC_OUT_MUX_REG_LIST_DCN3_0(1), - MPC_RMU_GLOBAL_REG_LIST_DCN3AG, - MPC_RMU_REG_LIST_DCN3AG(0), - MPC_DWB_MUX_REG_LIST_DCN3_0(0), -}; - -static const struct dcn30_mpc_shift mpc_shift = { - MPC_COMMON_MASK_SH_LIST_DCN303(__SHIFT) -}; - -static const struct dcn30_mpc_mask mpc_mask = { - MPC_COMMON_MASK_SH_LIST_DCN303(_MASK) -}; - -static struct mpc *dcn303_mpc_create(struct dc_context *ctx, int num_mpcc, int num_rmu) -{ - struct dcn30_mpc *mpc30 = kzalloc(sizeof(struct dcn30_mpc), GFP_KERNEL); - - if (!mpc30) - return NULL; - - dcn30_mpc_construct(mpc30, ctx, &mpc_regs, &mpc_shift, &mpc_mask, num_mpcc, num_rmu); - - return &mpc30->base; -} - -#define dsc_regsDCN20(id)\ -[id] = { DSC_REG_LIST_DCN20(id) } - -static const struct dcn20_dsc_registers dsc_regs[] = { - dsc_regsDCN20(0), - dsc_regsDCN20(1) -}; - -static const struct dcn20_dsc_shift dsc_shift = { - DSC_REG_LIST_SH_MASK_DCN20(__SHIFT) -}; - -static const struct dcn20_dsc_mask dsc_mask = { - DSC_REG_LIST_SH_MASK_DCN20(_MASK) -}; - -static struct display_stream_compressor *dcn303_dsc_create(struct dc_context *ctx, uint32_t inst) -{ - struct dcn20_dsc *dsc = kzalloc(sizeof(struct dcn20_dsc), GFP_KERNEL); - - if (!dsc) { - BREAK_TO_DEBUGGER(); - return NULL; - } - - dsc2_construct(dsc, ctx, inst, &dsc_regs[inst], &dsc_shift, &dsc_mask); - return &dsc->base; -} - -#define dwbc_regs_dcn3(id)\ -[id] = { DWBC_COMMON_REG_LIST_DCN30(id) } - -static const struct dcn30_dwbc_registers dwbc30_regs[] = { - dwbc_regs_dcn3(0) -}; - -static const struct dcn30_dwbc_shift dwbc30_shift = { - DWBC_COMMON_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dcn30_dwbc_mask dwbc30_mask = { - DWBC_COMMON_MASK_SH_LIST_DCN30(_MASK) -}; - -static bool dcn303_dwbc_create(struct dc_context *ctx, struct resource_pool *pool) -{ - int i; - uint32_t pipe_count = pool->res_cap->num_dwb; - - for (i = 0; i < pipe_count; i++) { - struct dcn30_dwbc *dwbc30 = kzalloc(sizeof(struct dcn30_dwbc), GFP_KERNEL); - - if (!dwbc30) { - dm_error("DC: failed to create dwbc30!\n"); - return false; - } - - dcn30_dwbc_construct(dwbc30, ctx, &dwbc30_regs[i], &dwbc30_shift, &dwbc30_mask, i); - - pool->dwbc[i] = &dwbc30->base; - } - return true; -} - -#define mcif_wb_regs_dcn3(id)\ -[id] = { MCIF_WB_COMMON_REG_LIST_DCN30(id) } - -static const struct dcn30_mmhubbub_registers mcif_wb30_regs[] = { - mcif_wb_regs_dcn3(0) -}; - -static const struct dcn30_mmhubbub_shift mcif_wb30_shift = { - MCIF_WB_COMMON_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dcn30_mmhubbub_mask mcif_wb30_mask = { - MCIF_WB_COMMON_MASK_SH_LIST_DCN30(_MASK) -}; - -static bool dcn303_mmhubbub_create(struct dc_context *ctx, struct resource_pool *pool) -{ - int i; - uint32_t pipe_count = pool->res_cap->num_dwb; - - for (i = 0; i < pipe_count; i++) { - struct dcn30_mmhubbub *mcif_wb30 = kzalloc(sizeof(struct dcn30_mmhubbub), GFP_KERNEL); - - if (!mcif_wb30) { - dm_error("DC: failed to create mcif_wb30!\n"); - return false; - } - - dcn30_mmhubbub_construct(mcif_wb30, ctx, &mcif_wb30_regs[i], &mcif_wb30_shift, &mcif_wb30_mask, i); - - pool->mcif_wb[i] = &mcif_wb30->base; - } - return true; -} - -#define aux_engine_regs(id)\ -[id] = {\ - AUX_COMMON_REG_LIST0(id), \ - .AUXN_IMPCAL = 0, \ - .AUXP_IMPCAL = 0, \ - .AUX_RESET_MASK = DP_AUX0_AUX_CONTROL__AUX_RESET_MASK, \ -} - -static const struct dce110_aux_registers aux_engine_regs[] = { - aux_engine_regs(0), - aux_engine_regs(1) -}; - -static const struct dce110_aux_registers_shift aux_shift = { - DCN_AUX_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce110_aux_registers_mask aux_mask = { - DCN_AUX_MASK_SH_LIST(_MASK) -}; - -static struct dce_aux *dcn303_aux_engine_create(struct dc_context *ctx, uint32_t inst) -{ - struct aux_engine_dce110 *aux_engine = kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL); - - if (!aux_engine) - return NULL; - - dce110_aux_engine_construct(aux_engine, ctx, inst, SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, - &aux_engine_regs[inst], &aux_mask, &aux_shift, ctx->dc->caps.extended_aux_timeout_support); - - return &aux_engine->base; -} - -#define i2c_inst_regs(id) { I2C_HW_ENGINE_COMMON_REG_LIST(id) } - -static const struct dce_i2c_registers i2c_hw_regs[] = { - i2c_inst_regs(1), - i2c_inst_regs(2) -}; - -static const struct dce_i2c_shift i2c_shifts = { - I2C_COMMON_MASK_SH_LIST_DCN2(__SHIFT) -}; - -static const struct dce_i2c_mask i2c_masks = { - I2C_COMMON_MASK_SH_LIST_DCN2(_MASK) -}; - -static struct dce_i2c_hw *dcn303_i2c_hw_create(struct dc_context *ctx, uint32_t inst) -{ - struct dce_i2c_hw *dce_i2c_hw = kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL); - - if (!dce_i2c_hw) - return NULL; - - dcn2_i2c_hw_construct(dce_i2c_hw, ctx, inst, &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks); - - return dce_i2c_hw; -} - -static const struct encoder_feature_support link_enc_feature = { - .max_hdmi_deep_color = COLOR_DEPTH_121212, - .max_hdmi_pixel_clock = 600000, - .hdmi_ycbcr420_supported = true, - .dp_ycbcr420_supported = true, - .fec_supported = true, - .flags.bits.IS_HBR2_CAPABLE = true, - .flags.bits.IS_HBR3_CAPABLE = true, - .flags.bits.IS_TPS3_CAPABLE = true, - .flags.bits.IS_TPS4_CAPABLE = true -}; - -#define link_regs(id, phyid)\ - [id] = {\ - LE_DCN3_REG_LIST(id), \ - UNIPHY_DCN2_REG_LIST(phyid), \ - SRI(DP_DPHY_INTERNAL_CTRL, DP, id) \ - } - -static const struct dcn10_link_enc_registers link_enc_regs[] = { - link_regs(0, A), - link_regs(1, B) -}; - -static const struct dcn10_link_enc_shift le_shift = { - LINK_ENCODER_MASK_SH_LIST_DCN30(__SHIFT), - DPCS_DCN2_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn10_link_enc_mask le_mask = { - LINK_ENCODER_MASK_SH_LIST_DCN30(_MASK), - DPCS_DCN2_MASK_SH_LIST(_MASK) -}; - -#define aux_regs(id)\ - [id] = { DCN2_AUX_REG_LIST(id) } - -static const struct dcn10_link_enc_aux_registers link_enc_aux_regs[] = { - aux_regs(0), - aux_regs(1) -}; - -#define hpd_regs(id)\ - [id] = { HPD_REG_LIST(id) } - -static const struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[] = { - hpd_regs(0), - hpd_regs(1) -}; - -static struct link_encoder *dcn303_link_encoder_create( - struct dc_context *ctx, - const struct encoder_init_data *enc_init_data) -{ - struct dcn20_link_encoder *enc20 = kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL); - - if (!enc20) - return NULL; - - dcn30_link_encoder_construct(enc20, enc_init_data, &link_enc_feature, - &link_enc_regs[enc_init_data->transmitter], &link_enc_aux_regs[enc_init_data->channel - 1], - &link_enc_hpd_regs[enc_init_data->hpd_source], &le_shift, &le_mask); - - return &enc20->enc10.base; -} - -static const struct dce_panel_cntl_registers panel_cntl_regs[] = { - { DCN_PANEL_CNTL_REG_LIST() } -}; - -static const struct dce_panel_cntl_shift panel_cntl_shift = { - DCE_PANEL_CNTL_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_panel_cntl_mask panel_cntl_mask = { - DCE_PANEL_CNTL_MASK_SH_LIST(_MASK) -}; - -static struct panel_cntl *dcn303_panel_cntl_create(const struct panel_cntl_init_data *init_data) -{ - struct dce_panel_cntl *panel_cntl = kzalloc(sizeof(struct dce_panel_cntl), GFP_KERNEL); - - if (!panel_cntl) - return NULL; - - dce_panel_cntl_construct(panel_cntl, init_data, &panel_cntl_regs[init_data->inst], - &panel_cntl_shift, &panel_cntl_mask); - - return &panel_cntl->base; -} - -static void read_dce_straps(struct dc_context *ctx, struct resource_straps *straps) -{ - generic_reg_get(ctx, mmDC_PINSTRAPS + BASE(mmDC_PINSTRAPS_BASE_IDX), - FN(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO), &straps->dc_pinstraps_audio); -} - -static const struct resource_create_funcs res_create_funcs = { - .read_dce_straps = read_dce_straps, - .create_audio = dcn303_create_audio, - .create_stream_encoder = dcn303_stream_encoder_create, - .create_hwseq = dcn303_hwseq_create, -}; - -static bool is_soc_bounding_box_valid(struct dc *dc) -{ - uint32_t hw_internal_rev = dc->ctx->asic_id.hw_internal_rev; - - if (ASICREV_IS_BEIGE_GOBY_P(hw_internal_rev)) - return true; - - return false; -} - -static bool init_soc_bounding_box(struct dc *dc, struct resource_pool *pool) -{ - struct _vcs_dpi_soc_bounding_box_st *loaded_bb = &dcn3_03_soc; - struct _vcs_dpi_ip_params_st *loaded_ip = &dcn3_03_ip; - - DC_LOGGER_INIT(dc->ctx->logger); - - if (!is_soc_bounding_box_valid(dc)) { - DC_LOG_ERROR("%s: not valid soc bounding box/n", __func__); - return false; - } - - loaded_ip->max_num_otg = pool->pipe_count; - loaded_ip->max_num_dpp = pool->pipe_count; - loaded_ip->clamp_min_dcfclk = dc->config.clamp_min_dcfclk; - DC_FP_START(); - dcn20_patch_bounding_box(dc, loaded_bb); - DC_FP_END(); - - if (dc->ctx->dc_bios->funcs->get_soc_bb_info) { - struct bp_soc_bb_info bb_info = { 0 }; - - if (dc->ctx->dc_bios->funcs->get_soc_bb_info( - dc->ctx->dc_bios, &bb_info) == BP_RESULT_OK) { - DC_FP_START(); - dcn303_fpu_init_soc_bounding_box(bb_info); - DC_FP_END(); - } - } - - return true; -} - -static void dcn303_resource_destruct(struct resource_pool *pool) -{ - unsigned int i; - - for (i = 0; i < pool->stream_enc_count; i++) { - if (pool->stream_enc[i] != NULL) { - if (pool->stream_enc[i]->vpg != NULL) { - kfree(DCN30_VPG_FROM_VPG(pool->stream_enc[i]->vpg)); - pool->stream_enc[i]->vpg = NULL; - } - if (pool->stream_enc[i]->afmt != NULL) { - kfree(DCN30_AFMT_FROM_AFMT(pool->stream_enc[i]->afmt)); - pool->stream_enc[i]->afmt = NULL; - } - kfree(DCN10STRENC_FROM_STRENC(pool->stream_enc[i])); - pool->stream_enc[i] = NULL; - } - } - - for (i = 0; i < pool->res_cap->num_dsc; i++) { - if (pool->dscs[i] != NULL) - dcn20_dsc_destroy(&pool->dscs[i]); - } - - if (pool->mpc != NULL) { - kfree(TO_DCN20_MPC(pool->mpc)); - pool->mpc = NULL; - } - - if (pool->hubbub != NULL) { - kfree(pool->hubbub); - pool->hubbub = NULL; - } - - for (i = 0; i < pool->pipe_count; i++) { - if (pool->dpps[i] != NULL) { - kfree(TO_DCN20_DPP(pool->dpps[i])); - pool->dpps[i] = NULL; - } - - if (pool->hubps[i] != NULL) { - kfree(TO_DCN20_HUBP(pool->hubps[i])); - pool->hubps[i] = NULL; - } - - if (pool->irqs != NULL) - dal_irq_service_destroy(&pool->irqs); - } - - for (i = 0; i < pool->res_cap->num_ddc; i++) { - if (pool->engines[i] != NULL) - dce110_engine_destroy(&pool->engines[i]); - if (pool->hw_i2cs[i] != NULL) { - kfree(pool->hw_i2cs[i]); - pool->hw_i2cs[i] = NULL; - } - if (pool->sw_i2cs[i] != NULL) { - kfree(pool->sw_i2cs[i]); - pool->sw_i2cs[i] = NULL; - } - } - - for (i = 0; i < pool->res_cap->num_opp; i++) { - if (pool->opps[i] != NULL) - pool->opps[i]->funcs->opp_destroy(&pool->opps[i]); - } - - for (i = 0; i < pool->res_cap->num_timing_generator; i++) { - if (pool->timing_generators[i] != NULL) { - kfree(DCN10TG_FROM_TG(pool->timing_generators[i])); - pool->timing_generators[i] = NULL; - } - } - - for (i = 0; i < pool->res_cap->num_dwb; i++) { - if (pool->dwbc[i] != NULL) { - kfree(TO_DCN30_DWBC(pool->dwbc[i])); - pool->dwbc[i] = NULL; - } - if (pool->mcif_wb[i] != NULL) { - kfree(TO_DCN30_MMHUBBUB(pool->mcif_wb[i])); - pool->mcif_wb[i] = NULL; - } - } - - for (i = 0; i < pool->audio_count; i++) { - if (pool->audios[i]) - dce_aud_destroy(&pool->audios[i]); - } - - for (i = 0; i < pool->clk_src_count; i++) { - if (pool->clock_sources[i] != NULL) - dcn20_clock_source_destroy(&pool->clock_sources[i]); - } - - if (pool->dp_clock_source != NULL) - dcn20_clock_source_destroy(&pool->dp_clock_source); - - for (i = 0; i < pool->res_cap->num_mpc_3dlut; i++) { - if (pool->mpc_lut[i] != NULL) { - dc_3dlut_func_release(pool->mpc_lut[i]); - pool->mpc_lut[i] = NULL; - } - if (pool->mpc_shaper[i] != NULL) { - dc_transfer_func_release(pool->mpc_shaper[i]); - pool->mpc_shaper[i] = NULL; - } - } - - for (i = 0; i < pool->pipe_count; i++) { - if (pool->multiple_abms[i] != NULL) - dce_abm_destroy(&pool->multiple_abms[i]); - } - - if (pool->psr != NULL) - dmub_psr_destroy(&pool->psr); - - if (pool->dccg != NULL) - dcn_dccg_destroy(&pool->dccg); - - if (pool->oem_device != NULL) { - struct dc *dc = pool->oem_device->ctx->dc; - - dc->link_srv->destroy_ddc_service(&pool->oem_device); - } -} - -static void dcn303_destroy_resource_pool(struct resource_pool **pool) -{ - dcn303_resource_destruct(*pool); - kfree(*pool); - *pool = NULL; -} - -static void dcn303_get_panel_config_defaults(struct dc_panel_config *panel_config) -{ - *panel_config = panel_config_defaults; -} - -void dcn303_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) -{ - DC_FP_START(); - dcn303_fpu_update_bw_bounding_box(dc, bw_params); - DC_FP_END(); -} - -static struct resource_funcs dcn303_res_pool_funcs = { - .destroy = dcn303_destroy_resource_pool, - .link_enc_create = dcn303_link_encoder_create, - .panel_cntl_create = dcn303_panel_cntl_create, - .validate_bandwidth = dcn30_validate_bandwidth, - .calculate_wm_and_dlg = dcn30_calculate_wm_and_dlg, - .update_soc_for_wm_a = dcn30_update_soc_for_wm_a, - .populate_dml_pipes = dcn30_populate_dml_pipes_from_context, - .acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer, - .release_pipe = dcn20_release_pipe, - .add_stream_to_ctx = dcn30_add_stream_to_ctx, - .add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource, - .remove_stream_from_ctx = dcn20_remove_stream_from_ctx, - .populate_dml_writeback_from_context = dcn30_populate_dml_writeback_from_context, - .set_mcif_arb_params = dcn30_set_mcif_arb_params, - .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link, - .acquire_post_bldn_3dlut = dcn30_acquire_post_bldn_3dlut, - .release_post_bldn_3dlut = dcn30_release_post_bldn_3dlut, - .update_bw_bounding_box = dcn303_update_bw_bounding_box, - .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, - .get_panel_config_defaults = dcn303_get_panel_config_defaults, -}; - -static struct dc_cap_funcs cap_funcs = { - .get_dcc_compression_cap = dcn20_get_dcc_compression_cap -}; - -static const struct bios_registers bios_regs = { - NBIO_SR(BIOS_SCRATCH_3), - NBIO_SR(BIOS_SCRATCH_6) -}; - -static const struct dccg_registers dccg_regs = { - DCCG_REG_LIST_DCN3_03() -}; - -static const struct dccg_shift dccg_shift = { - DCCG_MASK_SH_LIST_DCN3_03(__SHIFT) -}; - -static const struct dccg_mask dccg_mask = { - DCCG_MASK_SH_LIST_DCN3_03(_MASK) -}; - -#define abm_regs(id)\ - [id] = { ABM_DCN302_REG_LIST(id) } - -static const struct dce_abm_registers abm_regs[] = { - abm_regs(0), - abm_regs(1) -}; - -static const struct dce_abm_shift abm_shift = { - ABM_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dce_abm_mask abm_mask = { - ABM_MASK_SH_LIST_DCN30(_MASK) -}; - -static bool dcn303_resource_construct( - uint8_t num_virtual_links, - struct dc *dc, - struct resource_pool *pool) -{ - int i; - struct dc_context *ctx = dc->ctx; - struct irq_service_init_data init_data; - struct ddc_service_init_data ddc_init_data; - - ctx->dc_bios->regs = &bios_regs; - - pool->res_cap = &res_cap_dcn303; - - pool->funcs = &dcn303_res_pool_funcs; - - /************************************************* - * Resource + asic cap harcoding * - *************************************************/ - pool->underlay_pipe_index = NO_UNDERLAY_PIPE; - pool->pipe_count = pool->res_cap->num_timing_generator; - pool->mpcc_count = pool->res_cap->num_timing_generator; - dc->caps.max_downscale_ratio = 600; - dc->caps.i2c_speed_in_khz = 100; - dc->caps.i2c_speed_in_khz_hdcp = 5; /*1.4 w/a applied by derfault*/ - dc->caps.max_cursor_size = 256; - dc->caps.min_horizontal_blanking_period = 80; - dc->caps.dmdata_alloc_size = 2048; - dc->caps.mall_size_per_mem_channel = 4; - /* total size = mall per channel * num channels * 1024 * 1024 */ - dc->caps.mall_size_total = dc->caps.mall_size_per_mem_channel * - dc->ctx->dc_bios->vram_info.num_chans * - 1024 * 1024; - dc->caps.cursor_cache_size = - dc->caps.max_cursor_size * dc->caps.max_cursor_size * 8; - dc->caps.max_slave_planes = 1; - dc->caps.post_blend_color_processing = true; - dc->caps.force_dp_tps4_for_cp2520 = true; - dc->caps.extended_aux_timeout_support = true; - dc->caps.dmcub_support = true; - dc->caps.max_v_total = (1 << 15) - 1; - - /* Color pipeline capabilities */ - dc->caps.color.dpp.dcn_arch = 1; - dc->caps.color.dpp.input_lut_shared = 0; - dc->caps.color.dpp.icsc = 1; - dc->caps.color.dpp.dgam_ram = 0; // must use gamma_corr - dc->caps.color.dpp.dgam_rom_caps.srgb = 1; - dc->caps.color.dpp.dgam_rom_caps.bt2020 = 1; - dc->caps.color.dpp.dgam_rom_caps.gamma2_2 = 1; - dc->caps.color.dpp.dgam_rom_caps.pq = 1; - dc->caps.color.dpp.dgam_rom_caps.hlg = 1; - dc->caps.color.dpp.post_csc = 1; - dc->caps.color.dpp.gamma_corr = 1; - dc->caps.color.dpp.dgam_rom_for_yuv = 0; - - dc->caps.color.dpp.hw_3d_lut = 1; - dc->caps.color.dpp.ogam_ram = 1; - // no OGAM ROM on DCN3 - dc->caps.color.dpp.ogam_rom_caps.srgb = 0; - dc->caps.color.dpp.ogam_rom_caps.bt2020 = 0; - dc->caps.color.dpp.ogam_rom_caps.gamma2_2 = 0; - dc->caps.color.dpp.ogam_rom_caps.pq = 0; - dc->caps.color.dpp.ogam_rom_caps.hlg = 0; - dc->caps.color.dpp.ocsc = 0; - - dc->caps.color.mpc.gamut_remap = 1; - dc->caps.color.mpc.num_3dluts = pool->res_cap->num_mpc_3dlut; //3 - dc->caps.color.mpc.ogam_ram = 1; - dc->caps.color.mpc.ogam_rom_caps.srgb = 0; - dc->caps.color.mpc.ogam_rom_caps.bt2020 = 0; - dc->caps.color.mpc.ogam_rom_caps.gamma2_2 = 0; - dc->caps.color.mpc.ogam_rom_caps.pq = 0; - dc->caps.color.mpc.ogam_rom_caps.hlg = 0; - dc->caps.color.mpc.ocsc = 1; - - dc->caps.dp_hdmi21_pcon_support = true; - - dc->config.dc_mode_clk_limit_support = true; - /* read VBIOS LTTPR caps */ - if (ctx->dc_bios->funcs->get_lttpr_caps) { - enum bp_result bp_query_result; - uint8_t is_vbios_lttpr_enable = 0; - - bp_query_result = ctx->dc_bios->funcs->get_lttpr_caps(ctx->dc_bios, &is_vbios_lttpr_enable); - dc->caps.vbios_lttpr_enable = (bp_query_result == BP_RESULT_OK) && !!is_vbios_lttpr_enable; - } - - if (ctx->dc_bios->funcs->get_lttpr_interop) { - enum bp_result bp_query_result; - uint8_t is_vbios_interop_enabled = 0; - - bp_query_result = ctx->dc_bios->funcs->get_lttpr_interop(ctx->dc_bios, &is_vbios_interop_enabled); - dc->caps.vbios_lttpr_aware = (bp_query_result == BP_RESULT_OK) && !!is_vbios_interop_enabled; - } - - if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) - dc->debug = debug_defaults_drv; - - // Init the vm_helper - if (dc->vm_helper) - vm_helper_init(dc->vm_helper, 16); - - /************************************************* - * Create resources * - *************************************************/ - - /* Clock Sources for Pixel Clock*/ - pool->clock_sources[DCN303_CLK_SRC_PLL0] = - dcn303_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL0, - &clk_src_regs[0], false); - pool->clock_sources[DCN303_CLK_SRC_PLL1] = - dcn303_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL1, - &clk_src_regs[1], false); - - pool->clk_src_count = DCN303_CLK_SRC_TOTAL; - - /* todo: not reuse phy_pll registers */ - pool->dp_clock_source = - dcn303_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_ID_DP_DTO, - &clk_src_regs[0], true); - - for (i = 0; i < pool->clk_src_count; i++) { - if (pool->clock_sources[i] == NULL) { - dm_error("DC: failed to create clock sources!\n"); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - } - - /* DCCG */ - pool->dccg = dccg30_create(ctx, &dccg_regs, &dccg_shift, &dccg_mask); - if (pool->dccg == NULL) { - dm_error("DC: failed to create dccg!\n"); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - - /* PP Lib and SMU interfaces */ - init_soc_bounding_box(dc, pool); - - /* DML */ - dml_init_instance(&dc->dml, &dcn3_03_soc, &dcn3_03_ip, DML_PROJECT_DCN30); - - /* IRQ */ - init_data.ctx = dc->ctx; - pool->irqs = dal_irq_service_dcn303_create(&init_data); - if (!pool->irqs) - goto create_fail; - - /* HUBBUB */ - pool->hubbub = dcn303_hubbub_create(ctx); - if (pool->hubbub == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create hubbub!\n"); - goto create_fail; - } - - /* HUBPs, DPPs, OPPs and TGs */ - for (i = 0; i < pool->pipe_count; i++) { - pool->hubps[i] = dcn303_hubp_create(ctx, i); - if (pool->hubps[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create hubps!\n"); - goto create_fail; - } - - pool->dpps[i] = dcn303_dpp_create(ctx, i); - if (pool->dpps[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create dpps!\n"); - goto create_fail; - } - } - - for (i = 0; i < pool->res_cap->num_opp; i++) { - pool->opps[i] = dcn303_opp_create(ctx, i); - if (pool->opps[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create output pixel processor!\n"); - goto create_fail; - } - } - - for (i = 0; i < pool->res_cap->num_timing_generator; i++) { - pool->timing_generators[i] = dcn303_timing_generator_create(ctx, i); - if (pool->timing_generators[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create tg!\n"); - goto create_fail; - } - } - pool->timing_generator_count = i; - - /* PSR */ - pool->psr = dmub_psr_create(ctx); - if (pool->psr == NULL) { - dm_error("DC: failed to create psr!\n"); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - - /* ABM */ - for (i = 0; i < pool->res_cap->num_timing_generator; i++) { - pool->multiple_abms[i] = dmub_abm_create(ctx, &abm_regs[i], &abm_shift, &abm_mask); - if (pool->multiple_abms[i] == NULL) { - dm_error("DC: failed to create abm for pipe %d!\n", i); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - } - - /* MPC and DSC */ - pool->mpc = dcn303_mpc_create(ctx, pool->mpcc_count, pool->res_cap->num_mpc_3dlut); - if (pool->mpc == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create mpc!\n"); - goto create_fail; - } - - for (i = 0; i < pool->res_cap->num_dsc; i++) { - pool->dscs[i] = dcn303_dsc_create(ctx, i); - if (pool->dscs[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create display stream compressor %d!\n", i); - goto create_fail; - } - } - - /* DWB and MMHUBBUB */ - if (!dcn303_dwbc_create(ctx, pool)) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create dwbc!\n"); - goto create_fail; - } - - if (!dcn303_mmhubbub_create(ctx, pool)) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create mcif_wb!\n"); - goto create_fail; - } - - /* AUX and I2C */ - for (i = 0; i < pool->res_cap->num_ddc; i++) { - pool->engines[i] = dcn303_aux_engine_create(ctx, i); - if (pool->engines[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC:failed to create aux engine!!\n"); - goto create_fail; - } - pool->hw_i2cs[i] = dcn303_i2c_hw_create(ctx, i); - if (pool->hw_i2cs[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC:failed to create hw i2c!!\n"); - goto create_fail; - } - pool->sw_i2cs[i] = NULL; - } - - /* Audio, Stream Encoders including HPO and virtual, MPC 3D LUTs */ - if (!resource_construct(num_virtual_links, dc, pool, - &res_create_funcs)) - goto create_fail; - - /* HW Sequencer and Plane caps */ - dcn303_hw_sequencer_construct(dc); - - dc->caps.max_planes = pool->pipe_count; - - for (i = 0; i < dc->caps.max_planes; ++i) - dc->caps.planes[i] = plane_cap; - - dc->cap_funcs = cap_funcs; - - if (dc->ctx->dc_bios->fw_info.oem_i2c_present) { - ddc_init_data.ctx = dc->ctx; - ddc_init_data.link = NULL; - ddc_init_data.id.id = dc->ctx->dc_bios->fw_info.oem_i2c_obj_id; - ddc_init_data.id.enum_id = 0; - ddc_init_data.id.type = OBJECT_TYPE_GENERIC; - pool->oem_device = dc->link_srv->create_ddc_service(&ddc_init_data); - } else { - pool->oem_device = NULL; - } - - return true; - -create_fail: - - dcn303_resource_destruct(pool); - - return false; -} - -struct resource_pool *dcn303_create_resource_pool(const struct dc_init_data *init_data, struct dc *dc) -{ - struct resource_pool *pool = kzalloc(sizeof(struct resource_pool), GFP_KERNEL); - - if (!pool) - return NULL; - - if (dcn303_resource_construct(init_data->num_virtual_links, dc, pool)) - return pool; - - BREAK_TO_DEBUGGER(); - kfree(pool); - return NULL; -} diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.h b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.h deleted file mode 100644 index 37cf1525820b..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.h +++ /dev/null @@ -1,38 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright (C) 2021 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - */ - -#ifndef _DCN303_RESOURCE_H_ -#define _DCN303_RESOURCE_H_ - -#include "core_types.h" - -extern struct _vcs_dpi_ip_params_st dcn3_03_ip; -extern struct _vcs_dpi_soc_bounding_box_st dcn3_03_soc; - -struct resource_pool *dcn303_create_resource_pool(const struct dc_init_data *init_data, struct dc *dc); - -void dcn303_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params); - -#endif /* _DCN303_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile index 96e45c9efb46..212287008c0a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile @@ -10,7 +10,7 @@ # # Makefile for dcn31. -DCN31 = dcn31_resource.o dcn31_hubbub.o dcn31_init.o dcn31_hubp.o \ +DCN31 = dcn31_hubbub.o dcn31_init.o dcn31_hubp.o \ dcn31_dccg.o dcn31_optc.o dcn31_dio_link_encoder.o dcn31_panel_cntl.o \ dcn31_apg.o dcn31_hpo_dp_stream_encoder.o dcn31_hpo_dp_link_encoder.o \ dcn31_afmt.o dcn31_vpg.o diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c deleted file mode 100644 index 79416cfb22f0..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ /dev/null @@ -1,2218 +0,0 @@ -/* - * Copyright 2019 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - - -#include "dm_services.h" -#include "dc.h" - -#include "dcn31/dcn31_init.h" - -#include "resource.h" -#include "include/irq_service_interface.h" -#include "dcn31_resource.h" - -#include "dcn20/dcn20_resource.h" -#include "dcn30/dcn30_resource.h" - -#include "dml/dcn30/dcn30_fpu.h" - -#include "dcn10/dcn10_ipp.h" -#include "dcn30/dcn30_hubbub.h" -#include "dcn31/dcn31_hubbub.h" -#include "dcn30/dcn30_mpc.h" -#include "dcn31/dcn31_hubp.h" -#include "irq/dcn31/irq_service_dcn31.h" -#include "dcn30/dcn30_dpp.h" -#include "dcn31/dcn31_optc.h" -#include "dcn20/dcn20_hwseq.h" -#include "dcn30/dcn30_hwseq.h" -#include "dce110/dce110_hwseq.h" -#include "dcn30/dcn30_opp.h" -#include "dcn20/dcn20_dsc.h" -#include "dcn30/dcn30_vpg.h" -#include "dcn30/dcn30_afmt.h" -#include "dcn30/dcn30_dio_stream_encoder.h" -#include "dcn31/dcn31_hpo_dp_stream_encoder.h" -#include "dcn31/dcn31_hpo_dp_link_encoder.h" -#include "dcn31/dcn31_apg.h" -#include "dcn31/dcn31_dio_link_encoder.h" -#include "dcn31/dcn31_vpg.h" -#include "dcn31/dcn31_afmt.h" -#include "dce/dce_clock_source.h" -#include "dce/dce_audio.h" -#include "dce/dce_hwseq.h" -#include "clk_mgr.h" -#include "virtual/virtual_stream_encoder.h" -#include "dce110/dce110_resource.h" -#include "dml/display_mode_vba.h" -#include "dml/dcn31/dcn31_fpu.h" -#include "dcn31/dcn31_dccg.h" -#include "dcn10/dcn10_resource.h" -#include "dcn31_panel_cntl.h" - -#include "dcn30/dcn30_dwb.h" -#include "dcn30/dcn30_mmhubbub.h" - -// TODO: change include headers /amd/include/asic_reg after upstream -#include "yellow_carp_offset.h" -#include "dcn/dcn_3_1_2_offset.h" -#include "dcn/dcn_3_1_2_sh_mask.h" -#include "nbio/nbio_7_2_0_offset.h" -#include "dpcs/dpcs_4_2_0_offset.h" -#include "dpcs/dpcs_4_2_0_sh_mask.h" -#include "mmhub/mmhub_2_3_0_offset.h" -#include "mmhub/mmhub_2_3_0_sh_mask.h" - - -#define regDCHUBBUB_DEBUG_CTRL_0 0x04d6 -#define regDCHUBBUB_DEBUG_CTRL_0_BASE_IDX 2 -#define DCHUBBUB_DEBUG_CTRL_0__DET_DEPTH__SHIFT 0x10 -#define DCHUBBUB_DEBUG_CTRL_0__DET_DEPTH_MASK 0x01FF0000L - -#include "reg_helper.h" -#include "dce/dmub_abm.h" -#include "dce/dmub_psr.h" -#include "dce/dce_aux.h" -#include "dce/dce_i2c.h" -#include "dce/dmub_replay.h" - -#include "dml/dcn30/display_mode_vba_30.h" -#include "vm_helper.h" -#include "dcn20/dcn20_vmid.h" - -#include "link_enc_cfg.h" - -#define DC_LOGGER \ - dc->ctx->logger -#define DC_LOGGER_INIT(logger) - -enum dcn31_clk_src_array_id { - DCN31_CLK_SRC_PLL0, - DCN31_CLK_SRC_PLL1, - DCN31_CLK_SRC_PLL2, - DCN31_CLK_SRC_PLL3, - DCN31_CLK_SRC_PLL4, - DCN30_CLK_SRC_TOTAL -}; - -/* begin ********************* - * macros to expend register list macro defined in HW object header file - */ - -/* DCN */ -#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg - -#define BASE(seg) BASE_INNER(seg) - -#define SR(reg_name)\ - .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ - reg ## reg_name - -#define SRI(reg_name, block, id)\ - .reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## reg_name - -#define SRI2(reg_name, block, id)\ - .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ - reg ## reg_name - -#define SRIR(var_name, reg_name, block, id)\ - .var_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## reg_name - -#define SRII(reg_name, block, id)\ - .reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## reg_name - -#define SRII_MPC_RMU(reg_name, block, id)\ - .RMU##_##reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## reg_name - -#define SRII_DWB(reg_name, temp_name, block, id)\ - .reg_name[id] = BASE(reg ## block ## id ## _ ## temp_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## temp_name - -#define SF_DWB2(reg_name, block, id, field_name, post_fix) \ - .field_name = reg_name ## __ ## field_name ## post_fix - -#define DCCG_SRII(reg_name, block, id)\ - .block ## _ ## reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## reg_name - -#define VUPDATE_SRII(reg_name, block, id)\ - .reg_name[id] = BASE(reg ## reg_name ## _ ## block ## id ## _BASE_IDX) + \ - reg ## reg_name ## _ ## block ## id - -/* NBIO */ -#define NBIO_BASE_INNER(seg) \ - NBIO_BASE__INST0_SEG ## seg - -#define NBIO_BASE(seg) \ - NBIO_BASE_INNER(seg) - -#define NBIO_SR(reg_name)\ - .reg_name = NBIO_BASE(regBIF_BX1_ ## reg_name ## _BASE_IDX) + \ - regBIF_BX1_ ## reg_name - -/* MMHUB */ -#define MMHUB_BASE_INNER(seg) \ - MMHUB_BASE__INST0_SEG ## seg - -#define MMHUB_BASE(seg) \ - MMHUB_BASE_INNER(seg) - -#define MMHUB_SR(reg_name)\ - .reg_name = MMHUB_BASE(mm ## reg_name ## _BASE_IDX) + \ - mm ## reg_name - -/* CLOCK */ -#define CLK_BASE_INNER(seg) \ - CLK_BASE__INST0_SEG ## seg - -#define CLK_BASE(seg) \ - CLK_BASE_INNER(seg) - -#define CLK_SRI(reg_name, block, inst)\ - .reg_name = CLK_BASE(reg ## block ## _ ## inst ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## _ ## inst ## _ ## reg_name - - -static const struct bios_registers bios_regs = { - NBIO_SR(BIOS_SCRATCH_3), - NBIO_SR(BIOS_SCRATCH_6) -}; - -#define clk_src_regs(index, pllid)\ -[index] = {\ - CS_COMMON_REG_LIST_DCN3_0(index, pllid),\ -} - -static const struct dce110_clk_src_regs clk_src_regs[] = { - clk_src_regs(0, A), - clk_src_regs(1, B), - clk_src_regs(2, C), - clk_src_regs(3, D), - clk_src_regs(4, E) -}; -/*pll_id being rempped in dmub, in driver it is logical instance*/ -static const struct dce110_clk_src_regs clk_src_regs_b0[] = { - clk_src_regs(0, A), - clk_src_regs(1, B), - clk_src_regs(2, F), - clk_src_regs(3, G), - clk_src_regs(4, E) -}; - -static const struct dce110_clk_src_shift cs_shift = { - CS_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT) -}; - -static const struct dce110_clk_src_mask cs_mask = { - CS_COMMON_MASK_SH_LIST_DCN2_0(_MASK) -}; - -#define abm_regs(id)\ -[id] = {\ - ABM_DCN302_REG_LIST(id)\ -} - -static const struct dce_abm_registers abm_regs[] = { - abm_regs(0), - abm_regs(1), - abm_regs(2), - abm_regs(3), -}; - -static const struct dce_abm_shift abm_shift = { - ABM_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dce_abm_mask abm_mask = { - ABM_MASK_SH_LIST_DCN30(_MASK) -}; - -#define audio_regs(id)\ -[id] = {\ - AUD_COMMON_REG_LIST(id)\ -} - -static const struct dce_audio_registers audio_regs[] = { - audio_regs(0), - audio_regs(1), - audio_regs(2), - audio_regs(3), - audio_regs(4), - audio_regs(5), - audio_regs(6) -}; - -#define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\ - SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_INDEX, AZALIA_ENDPOINT_REG_INDEX, mask_sh),\ - SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_DATA, AZALIA_ENDPOINT_REG_DATA, mask_sh),\ - AUD_COMMON_MASK_SH_LIST_BASE(mask_sh) - -static const struct dce_audio_shift audio_shift = { - DCE120_AUD_COMMON_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_audio_mask audio_mask = { - DCE120_AUD_COMMON_MASK_SH_LIST(_MASK) -}; - -#define vpg_regs(id)\ -[id] = {\ - VPG_DCN31_REG_LIST(id)\ -} - -static const struct dcn31_vpg_registers vpg_regs[] = { - vpg_regs(0), - vpg_regs(1), - vpg_regs(2), - vpg_regs(3), - vpg_regs(4), - vpg_regs(5), - vpg_regs(6), - vpg_regs(7), - vpg_regs(8), - vpg_regs(9), -}; - -static const struct dcn31_vpg_shift vpg_shift = { - DCN31_VPG_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn31_vpg_mask vpg_mask = { - DCN31_VPG_MASK_SH_LIST(_MASK) -}; - -#define afmt_regs(id)\ -[id] = {\ - AFMT_DCN31_REG_LIST(id)\ -} - -static const struct dcn31_afmt_registers afmt_regs[] = { - afmt_regs(0), - afmt_regs(1), - afmt_regs(2), - afmt_regs(3), - afmt_regs(4), - afmt_regs(5) -}; - -static const struct dcn31_afmt_shift afmt_shift = { - DCN31_AFMT_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn31_afmt_mask afmt_mask = { - DCN31_AFMT_MASK_SH_LIST(_MASK) -}; - -#define apg_regs(id)\ -[id] = {\ - APG_DCN31_REG_LIST(id)\ -} - -static const struct dcn31_apg_registers apg_regs[] = { - apg_regs(0), - apg_regs(1), - apg_regs(2), - apg_regs(3) -}; - -static const struct dcn31_apg_shift apg_shift = { - DCN31_APG_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn31_apg_mask apg_mask = { - DCN31_APG_MASK_SH_LIST(_MASK) -}; - -#define stream_enc_regs(id)\ -[id] = {\ - SE_DCN3_REG_LIST(id)\ -} - -/* Some encoders won't be initialized here - but they're logical, not physical. */ -static const struct dcn10_stream_enc_registers stream_enc_regs[ENGINE_ID_COUNT] = { - stream_enc_regs(0), - stream_enc_regs(1), - stream_enc_regs(2), - stream_enc_regs(3), - stream_enc_regs(4) -}; - -static const struct dcn10_stream_encoder_shift se_shift = { - SE_COMMON_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dcn10_stream_encoder_mask se_mask = { - SE_COMMON_MASK_SH_LIST_DCN30(_MASK) -}; - - -#define aux_regs(id)\ -[id] = {\ - DCN2_AUX_REG_LIST(id)\ -} - -static const struct dcn10_link_enc_aux_registers link_enc_aux_regs[] = { - aux_regs(0), - aux_regs(1), - aux_regs(2), - aux_regs(3), - aux_regs(4) -}; - -#define hpd_regs(id)\ -[id] = {\ - HPD_REG_LIST(id)\ -} - -static const struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[] = { - hpd_regs(0), - hpd_regs(1), - hpd_regs(2), - hpd_regs(3), - hpd_regs(4) -}; - -#define link_regs(id, phyid)\ -[id] = {\ - LE_DCN31_REG_LIST(id), \ - UNIPHY_DCN2_REG_LIST(phyid), \ - DPCS_DCN31_REG_LIST(id), \ -} - -static const struct dce110_aux_registers_shift aux_shift = { - DCN_AUX_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce110_aux_registers_mask aux_mask = { - DCN_AUX_MASK_SH_LIST(_MASK) -}; - -static const struct dcn10_link_enc_registers link_enc_regs[] = { - link_regs(0, A), - link_regs(1, B), - link_regs(2, C), - link_regs(3, D), - link_regs(4, E) -}; - -static const struct dcn10_link_enc_shift le_shift = { - LINK_ENCODER_MASK_SH_LIST_DCN31(__SHIFT), \ - DPCS_DCN31_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn10_link_enc_mask le_mask = { - LINK_ENCODER_MASK_SH_LIST_DCN31(_MASK), \ - DPCS_DCN31_MASK_SH_LIST(_MASK) -}; - -#define hpo_dp_stream_encoder_reg_list(id)\ -[id] = {\ - DCN3_1_HPO_DP_STREAM_ENC_REG_LIST(id)\ -} - -static const struct dcn31_hpo_dp_stream_encoder_registers hpo_dp_stream_enc_regs[] = { - hpo_dp_stream_encoder_reg_list(0), - hpo_dp_stream_encoder_reg_list(1), - hpo_dp_stream_encoder_reg_list(2), - hpo_dp_stream_encoder_reg_list(3), -}; - -static const struct dcn31_hpo_dp_stream_encoder_shift hpo_dp_se_shift = { - DCN3_1_HPO_DP_STREAM_ENC_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn31_hpo_dp_stream_encoder_mask hpo_dp_se_mask = { - DCN3_1_HPO_DP_STREAM_ENC_MASK_SH_LIST(_MASK) -}; - -#define hpo_dp_link_encoder_reg_list(id)\ -[id] = {\ - DCN3_1_HPO_DP_LINK_ENC_REG_LIST(id),\ - DCN3_1_RDPCSTX_REG_LIST(0),\ - DCN3_1_RDPCSTX_REG_LIST(1),\ - DCN3_1_RDPCSTX_REG_LIST(2),\ - DCN3_1_RDPCSTX_REG_LIST(3),\ - DCN3_1_RDPCSTX_REG_LIST(4)\ -} - -static const struct dcn31_hpo_dp_link_encoder_registers hpo_dp_link_enc_regs[] = { - hpo_dp_link_encoder_reg_list(0), - hpo_dp_link_encoder_reg_list(1), -}; - -static const struct dcn31_hpo_dp_link_encoder_shift hpo_dp_le_shift = { - DCN3_1_HPO_DP_LINK_ENC_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn31_hpo_dp_link_encoder_mask hpo_dp_le_mask = { - DCN3_1_HPO_DP_LINK_ENC_MASK_SH_LIST(_MASK) -}; - -#define dpp_regs(id)\ -[id] = {\ - DPP_REG_LIST_DCN30(id),\ -} - -static const struct dcn3_dpp_registers dpp_regs[] = { - dpp_regs(0), - dpp_regs(1), - dpp_regs(2), - dpp_regs(3) -}; - -static const struct dcn3_dpp_shift tf_shift = { - DPP_REG_LIST_SH_MASK_DCN30(__SHIFT) -}; - -static const struct dcn3_dpp_mask tf_mask = { - DPP_REG_LIST_SH_MASK_DCN30(_MASK) -}; - -#define opp_regs(id)\ -[id] = {\ - OPP_REG_LIST_DCN30(id),\ -} - -static const struct dcn20_opp_registers opp_regs[] = { - opp_regs(0), - opp_regs(1), - opp_regs(2), - opp_regs(3) -}; - -static const struct dcn20_opp_shift opp_shift = { - OPP_MASK_SH_LIST_DCN20(__SHIFT) -}; - -static const struct dcn20_opp_mask opp_mask = { - OPP_MASK_SH_LIST_DCN20(_MASK) -}; - -#define aux_engine_regs(id)\ -[id] = {\ - AUX_COMMON_REG_LIST0(id), \ - .AUXN_IMPCAL = 0, \ - .AUXP_IMPCAL = 0, \ - .AUX_RESET_MASK = DP_AUX0_AUX_CONTROL__AUX_RESET_MASK, \ -} - -static const struct dce110_aux_registers aux_engine_regs[] = { - aux_engine_regs(0), - aux_engine_regs(1), - aux_engine_regs(2), - aux_engine_regs(3), - aux_engine_regs(4) -}; - -#define dwbc_regs_dcn3(id)\ -[id] = {\ - DWBC_COMMON_REG_LIST_DCN30(id),\ -} - -static const struct dcn30_dwbc_registers dwbc30_regs[] = { - dwbc_regs_dcn3(0), -}; - -static const struct dcn30_dwbc_shift dwbc30_shift = { - DWBC_COMMON_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dcn30_dwbc_mask dwbc30_mask = { - DWBC_COMMON_MASK_SH_LIST_DCN30(_MASK) -}; - -#define mcif_wb_regs_dcn3(id)\ -[id] = {\ - MCIF_WB_COMMON_REG_LIST_DCN30(id),\ -} - -static const struct dcn30_mmhubbub_registers mcif_wb30_regs[] = { - mcif_wb_regs_dcn3(0) -}; - -static const struct dcn30_mmhubbub_shift mcif_wb30_shift = { - MCIF_WB_COMMON_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dcn30_mmhubbub_mask mcif_wb30_mask = { - MCIF_WB_COMMON_MASK_SH_LIST_DCN30(_MASK) -}; - -#define dsc_regsDCN20(id)\ -[id] = {\ - DSC_REG_LIST_DCN20(id)\ -} - -static const struct dcn20_dsc_registers dsc_regs[] = { - dsc_regsDCN20(0), - dsc_regsDCN20(1), - dsc_regsDCN20(2) -}; - -static const struct dcn20_dsc_shift dsc_shift = { - DSC_REG_LIST_SH_MASK_DCN20(__SHIFT) -}; - -static const struct dcn20_dsc_mask dsc_mask = { - DSC_REG_LIST_SH_MASK_DCN20(_MASK) -}; - -static const struct dcn30_mpc_registers mpc_regs = { - MPC_REG_LIST_DCN3_0(0), - MPC_REG_LIST_DCN3_0(1), - MPC_REG_LIST_DCN3_0(2), - MPC_REG_LIST_DCN3_0(3), - MPC_OUT_MUX_REG_LIST_DCN3_0(0), - MPC_OUT_MUX_REG_LIST_DCN3_0(1), - MPC_OUT_MUX_REG_LIST_DCN3_0(2), - MPC_OUT_MUX_REG_LIST_DCN3_0(3), - MPC_RMU_GLOBAL_REG_LIST_DCN3AG, - MPC_RMU_REG_LIST_DCN3AG(0), - MPC_RMU_REG_LIST_DCN3AG(1), - //MPC_RMU_REG_LIST_DCN3AG(2), - MPC_DWB_MUX_REG_LIST_DCN3_0(0), -}; - -static const struct dcn30_mpc_shift mpc_shift = { - MPC_COMMON_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dcn30_mpc_mask mpc_mask = { - MPC_COMMON_MASK_SH_LIST_DCN30(_MASK) -}; - -#define optc_regs(id)\ -[id] = {OPTC_COMMON_REG_LIST_DCN3_1(id)} - -static const struct dcn_optc_registers optc_regs[] = { - optc_regs(0), - optc_regs(1), - optc_regs(2), - optc_regs(3) -}; - -static const struct dcn_optc_shift optc_shift = { - OPTC_COMMON_MASK_SH_LIST_DCN3_1(__SHIFT) -}; - -static const struct dcn_optc_mask optc_mask = { - OPTC_COMMON_MASK_SH_LIST_DCN3_1(_MASK) -}; - -#define hubp_regs(id)\ -[id] = {\ - HUBP_REG_LIST_DCN30(id)\ -} - -static const struct dcn_hubp2_registers hubp_regs[] = { - hubp_regs(0), - hubp_regs(1), - hubp_regs(2), - hubp_regs(3) -}; - - -static const struct dcn_hubp2_shift hubp_shift = { - HUBP_MASK_SH_LIST_DCN31(__SHIFT) -}; - -static const struct dcn_hubp2_mask hubp_mask = { - HUBP_MASK_SH_LIST_DCN31(_MASK) -}; -static const struct dcn_hubbub_registers hubbub_reg = { - HUBBUB_REG_LIST_DCN31(0) -}; - -static const struct dcn_hubbub_shift hubbub_shift = { - HUBBUB_MASK_SH_LIST_DCN31(__SHIFT) -}; - -static const struct dcn_hubbub_mask hubbub_mask = { - HUBBUB_MASK_SH_LIST_DCN31(_MASK) -}; - -static const struct dccg_registers dccg_regs = { - DCCG_REG_LIST_DCN31() -}; - -static const struct dccg_shift dccg_shift = { - DCCG_MASK_SH_LIST_DCN31(__SHIFT) -}; - -static const struct dccg_mask dccg_mask = { - DCCG_MASK_SH_LIST_DCN31(_MASK) -}; - - -#define SRII2(reg_name_pre, reg_name_post, id)\ - .reg_name_pre ## _ ## reg_name_post[id] = BASE(reg ## reg_name_pre \ - ## id ## _ ## reg_name_post ## _BASE_IDX) + \ - reg ## reg_name_pre ## id ## _ ## reg_name_post - - -#define HWSEQ_DCN31_REG_LIST()\ - SR(DCHUBBUB_GLOBAL_TIMER_CNTL), \ - SR(DCHUBBUB_ARB_HOSTVM_CNTL), \ - SR(DIO_MEM_PWR_CTRL), \ - SR(ODM_MEM_PWR_CTRL3), \ - SR(DMU_MEM_PWR_CNTL), \ - SR(MMHUBBUB_MEM_PWR_CNTL), \ - SR(DCCG_GATE_DISABLE_CNTL), \ - SR(DCCG_GATE_DISABLE_CNTL2), \ - SR(DCFCLK_CNTL),\ - SR(DC_MEM_GLOBAL_PWR_REQ_CNTL), \ - SRII(PIXEL_RATE_CNTL, OTG, 0), \ - SRII(PIXEL_RATE_CNTL, OTG, 1),\ - SRII(PIXEL_RATE_CNTL, OTG, 2),\ - SRII(PIXEL_RATE_CNTL, OTG, 3),\ - SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 0),\ - SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 1),\ - SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 2),\ - SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 3),\ - SR(MICROSECOND_TIME_BASE_DIV), \ - SR(MILLISECOND_TIME_BASE_DIV), \ - SR(DISPCLK_FREQ_CHANGE_CNTL), \ - SR(RBBMIF_TIMEOUT_DIS), \ - SR(RBBMIF_TIMEOUT_DIS_2), \ - SR(DCHUBBUB_CRC_CTRL), \ - SR(DPP_TOP0_DPP_CRC_CTRL), \ - SR(DPP_TOP0_DPP_CRC_VAL_B_A), \ - SR(DPP_TOP0_DPP_CRC_VAL_R_G), \ - SR(MPC_CRC_CTRL), \ - SR(MPC_CRC_RESULT_GB), \ - SR(MPC_CRC_RESULT_C), \ - SR(MPC_CRC_RESULT_AR), \ - SR(DOMAIN0_PG_CONFIG), \ - SR(DOMAIN1_PG_CONFIG), \ - SR(DOMAIN2_PG_CONFIG), \ - SR(DOMAIN3_PG_CONFIG), \ - SR(DOMAIN16_PG_CONFIG), \ - SR(DOMAIN17_PG_CONFIG), \ - SR(DOMAIN18_PG_CONFIG), \ - SR(DOMAIN0_PG_STATUS), \ - SR(DOMAIN1_PG_STATUS), \ - SR(DOMAIN2_PG_STATUS), \ - SR(DOMAIN3_PG_STATUS), \ - SR(DOMAIN16_PG_STATUS), \ - SR(DOMAIN17_PG_STATUS), \ - SR(DOMAIN18_PG_STATUS), \ - SR(D1VGA_CONTROL), \ - SR(D2VGA_CONTROL), \ - SR(D3VGA_CONTROL), \ - SR(D4VGA_CONTROL), \ - SR(D5VGA_CONTROL), \ - SR(D6VGA_CONTROL), \ - SR(DC_IP_REQUEST_CNTL), \ - SR(AZALIA_AUDIO_DTO), \ - SR(AZALIA_CONTROLLER_CLOCK_GATING), \ - SR(HPO_TOP_HW_CONTROL) - -static const struct dce_hwseq_registers hwseq_reg = { - HWSEQ_DCN31_REG_LIST() -}; - -#define HWSEQ_DCN31_MASK_SH_LIST(mask_sh)\ - HWSEQ_DCN_MASK_SH_LIST(mask_sh), \ - HWS_SF(, DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_REFDIV, mask_sh), \ - HWS_SF(, DCHUBBUB_ARB_HOSTVM_CNTL, DISABLE_HOSTVM_FORCE_ALLOW_PSTATE, mask_sh), \ - HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN1_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN1_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN2_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN2_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN3_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN3_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN16_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN16_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN17_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN17_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN18_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN18_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN0_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN1_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN2_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN3_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN16_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN17_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN18_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DC_IP_REQUEST_CNTL, IP_REQUEST_EN, mask_sh), \ - HWS_SF(, AZALIA_AUDIO_DTO, AZALIA_AUDIO_DTO_MODULE, mask_sh), \ - HWS_SF(, HPO_TOP_CLOCK_CONTROL, HPO_HDMISTREAMCLK_G_GATE_DIS, mask_sh), \ - HWS_SF(, DMU_MEM_PWR_CNTL, DMCU_ERAM_MEM_PWR_FORCE, mask_sh), \ - HWS_SF(, ODM_MEM_PWR_CTRL3, ODM_MEM_UNASSIGNED_PWR_MODE, mask_sh), \ - HWS_SF(, ODM_MEM_PWR_CTRL3, ODM_MEM_VBLANK_PWR_MODE, mask_sh), \ - HWS_SF(, MMHUBBUB_MEM_PWR_CNTL, VGA_MEM_PWR_FORCE, mask_sh), \ - HWS_SF(, DIO_MEM_PWR_CTRL, I2C_LIGHT_SLEEP_FORCE, mask_sh), \ - HWS_SF(, HPO_TOP_HW_CONTROL, HPO_IO_EN, mask_sh) - -static const struct dce_hwseq_shift hwseq_shift = { - HWSEQ_DCN31_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_hwseq_mask hwseq_mask = { - HWSEQ_DCN31_MASK_SH_LIST(_MASK) -}; -#define vmid_regs(id)\ -[id] = {\ - DCN20_VMID_REG_LIST(id)\ -} - -static const struct dcn_vmid_registers vmid_regs[] = { - vmid_regs(0), - vmid_regs(1), - vmid_regs(2), - vmid_regs(3), - vmid_regs(4), - vmid_regs(5), - vmid_regs(6), - vmid_regs(7), - vmid_regs(8), - vmid_regs(9), - vmid_regs(10), - vmid_regs(11), - vmid_regs(12), - vmid_regs(13), - vmid_regs(14), - vmid_regs(15) -}; - -static const struct dcn20_vmid_shift vmid_shifts = { - DCN20_VMID_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn20_vmid_mask vmid_masks = { - DCN20_VMID_MASK_SH_LIST(_MASK) -}; - -static const struct resource_caps res_cap_dcn31 = { - .num_timing_generator = 4, - .num_opp = 4, - .num_video_plane = 4, - .num_audio = 5, - .num_stream_encoder = 5, - .num_dig_link_enc = 5, - .num_hpo_dp_stream_encoder = 4, - .num_hpo_dp_link_encoder = 2, - .num_pll = 5, - .num_dwb = 1, - .num_ddc = 5, - .num_vmid = 16, - .num_mpc_3dlut = 2, - .num_dsc = 3, -}; - -static const struct dc_plane_cap plane_cap = { - .type = DC_PLANE_TYPE_DCN_UNIVERSAL, - .per_pixel_alpha = true, - - .pixel_format_support = { - .argb8888 = true, - .nv12 = true, - .fp16 = true, - .p010 = true, - .ayuv = false, - }, - - .max_upscale_factor = { - .argb8888 = 16000, - .nv12 = 16000, - .fp16 = 16000 - }, - - // 6:1 downscaling ratio: 1000/6 = 166.666 - .max_downscale_factor = { - .argb8888 = 167, - .nv12 = 167, - .fp16 = 167 - }, - 64, - 64 -}; - -static const struct dc_debug_options debug_defaults_drv = { - .disable_dmcu = true, - .force_abm_enable = false, - .timing_trace = false, - .clock_trace = true, - .disable_pplib_clock_request = false, - .pipe_split_policy = MPC_SPLIT_DYNAMIC, - .force_single_disp_pipe_split = false, - .disable_dcc = DCC_ENABLE, - .vsr_support = true, - .performance_trace = false, - .max_downscale_src_width = 4096,/*upto true 4K*/ - .disable_pplib_wm_range = false, - .scl_reset_length10 = true, - .sanity_checks = true, - .underflow_assert_delay_us = 0xFFFFFFFF, - .dwb_fi_phase = -1, // -1 = disable, - .dmub_command_table = true, - .pstate_enabled = true, - .use_max_lb = true, - .enable_mem_low_power = { - .bits = { - .vga = true, - .i2c = true, - .dmcu = false, // This is previously known to cause hang on S3 cycles if enabled - .dscl = true, - .cm = true, - .mpc = true, - .optc = true, - .vpg = true, - .afmt = true, - } - }, - .disable_z10 = true, - .enable_legacy_fast_update = true, - .enable_z9_disable_interface = true, /* Allow support for the PMFW interface for disable Z9*/ - .dml_hostvm_override = DML_HOSTVM_OVERRIDE_FALSE, - .using_dml2 = false, -}; - -static const struct dc_panel_config panel_config_defaults = { - .psr = { - .disable_psr = false, - .disallow_psrsu = false, - .disallow_replay = false, - }, - .ilr = { - .optimize_edp_link_rate = true, - }, -}; - -static void dcn31_dpp_destroy(struct dpp **dpp) -{ - kfree(TO_DCN20_DPP(*dpp)); - *dpp = NULL; -} - -static struct dpp *dcn31_dpp_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dcn3_dpp *dpp = - kzalloc(sizeof(struct dcn3_dpp), GFP_KERNEL); - - if (!dpp) - return NULL; - - if (dpp3_construct(dpp, ctx, inst, - &dpp_regs[inst], &tf_shift, &tf_mask)) - return &dpp->base; - - BREAK_TO_DEBUGGER(); - kfree(dpp); - return NULL; -} - -static struct output_pixel_processor *dcn31_opp_create( - struct dc_context *ctx, uint32_t inst) -{ - struct dcn20_opp *opp = - kzalloc(sizeof(struct dcn20_opp), GFP_KERNEL); - - if (!opp) { - BREAK_TO_DEBUGGER(); - return NULL; - } - - dcn20_opp_construct(opp, ctx, inst, - &opp_regs[inst], &opp_shift, &opp_mask); - return &opp->base; -} - -static struct dce_aux *dcn31_aux_engine_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct aux_engine_dce110 *aux_engine = - kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL); - - if (!aux_engine) - return NULL; - - dce110_aux_engine_construct(aux_engine, ctx, inst, - SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, - &aux_engine_regs[inst], - &aux_mask, - &aux_shift, - ctx->dc->caps.extended_aux_timeout_support); - - return &aux_engine->base; -} -#define i2c_inst_regs(id) { I2C_HW_ENGINE_COMMON_REG_LIST_DCN30(id) } - -static const struct dce_i2c_registers i2c_hw_regs[] = { - i2c_inst_regs(1), - i2c_inst_regs(2), - i2c_inst_regs(3), - i2c_inst_regs(4), - i2c_inst_regs(5), -}; - -static const struct dce_i2c_shift i2c_shifts = { - I2C_COMMON_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dce_i2c_mask i2c_masks = { - I2C_COMMON_MASK_SH_LIST_DCN30(_MASK) -}; - -static struct dce_i2c_hw *dcn31_i2c_hw_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dce_i2c_hw *dce_i2c_hw = - kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL); - - if (!dce_i2c_hw) - return NULL; - - dcn2_i2c_hw_construct(dce_i2c_hw, ctx, inst, - &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks); - - return dce_i2c_hw; -} -static struct mpc *dcn31_mpc_create( - struct dc_context *ctx, - int num_mpcc, - int num_rmu) -{ - struct dcn30_mpc *mpc30 = kzalloc(sizeof(struct dcn30_mpc), - GFP_KERNEL); - - if (!mpc30) - return NULL; - - dcn30_mpc_construct(mpc30, ctx, - &mpc_regs, - &mpc_shift, - &mpc_mask, - num_mpcc, - num_rmu); - - return &mpc30->base; -} - -static struct hubbub *dcn31_hubbub_create(struct dc_context *ctx) -{ - int i; - - struct dcn20_hubbub *hubbub3 = kzalloc(sizeof(struct dcn20_hubbub), - GFP_KERNEL); - - if (!hubbub3) - return NULL; - - hubbub31_construct(hubbub3, ctx, - &hubbub_reg, - &hubbub_shift, - &hubbub_mask, - dcn3_1_ip.det_buffer_size_kbytes, - dcn3_1_ip.pixel_chunk_size_kbytes, - dcn3_1_ip.config_return_buffer_size_in_kbytes); - - - for (i = 0; i < res_cap_dcn31.num_vmid; i++) { - struct dcn20_vmid *vmid = &hubbub3->vmid[i]; - - vmid->ctx = ctx; - - vmid->regs = &vmid_regs[i]; - vmid->shifts = &vmid_shifts; - vmid->masks = &vmid_masks; - } - - return &hubbub3->base; -} - -static struct timing_generator *dcn31_timing_generator_create( - struct dc_context *ctx, - uint32_t instance) -{ - struct optc *tgn10 = - kzalloc(sizeof(struct optc), GFP_KERNEL); - - if (!tgn10) - return NULL; - - tgn10->base.inst = instance; - tgn10->base.ctx = ctx; - - tgn10->tg_regs = &optc_regs[instance]; - tgn10->tg_shift = &optc_shift; - tgn10->tg_mask = &optc_mask; - - dcn31_timing_generator_init(tgn10); - - return &tgn10->base; -} - -static const struct encoder_feature_support link_enc_feature = { - .max_hdmi_deep_color = COLOR_DEPTH_121212, - .max_hdmi_pixel_clock = 600000, - .hdmi_ycbcr420_supported = true, - .dp_ycbcr420_supported = true, - .fec_supported = true, - .flags.bits.IS_HBR2_CAPABLE = true, - .flags.bits.IS_HBR3_CAPABLE = true, - .flags.bits.IS_TPS3_CAPABLE = true, - .flags.bits.IS_TPS4_CAPABLE = true -}; - -static struct link_encoder *dcn31_link_encoder_create( - struct dc_context *ctx, - const struct encoder_init_data *enc_init_data) -{ - struct dcn20_link_encoder *enc20 = - kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL); - - if (!enc20) - return NULL; - - dcn31_link_encoder_construct(enc20, - enc_init_data, - &link_enc_feature, - &link_enc_regs[enc_init_data->transmitter], - &link_enc_aux_regs[enc_init_data->channel - 1], - &link_enc_hpd_regs[enc_init_data->hpd_source], - &le_shift, - &le_mask); - - return &enc20->enc10.base; -} - -/* Create a minimal link encoder object not associated with a particular - * physical connector. - * resource_funcs.link_enc_create_minimal - */ -static struct link_encoder *dcn31_link_enc_create_minimal( - struct dc_context *ctx, enum engine_id eng_id) -{ - struct dcn20_link_encoder *enc20; - - if ((eng_id - ENGINE_ID_DIGA) > ctx->dc->res_pool->res_cap->num_dig_link_enc) - return NULL; - - enc20 = kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL); - if (!enc20) - return NULL; - - dcn31_link_encoder_construct_minimal( - enc20, - ctx, - &link_enc_feature, - &link_enc_regs[eng_id - ENGINE_ID_DIGA], - eng_id); - - return &enc20->enc10.base; -} - -static struct panel_cntl *dcn31_panel_cntl_create(const struct panel_cntl_init_data *init_data) -{ - struct dcn31_panel_cntl *panel_cntl = - kzalloc(sizeof(struct dcn31_panel_cntl), GFP_KERNEL); - - if (!panel_cntl) - return NULL; - - dcn31_panel_cntl_construct(panel_cntl, init_data); - - return &panel_cntl->base; -} - -static void read_dce_straps( - struct dc_context *ctx, - struct resource_straps *straps) -{ - generic_reg_get(ctx, regDC_PINSTRAPS + BASE(regDC_PINSTRAPS_BASE_IDX), - FN(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO), &straps->dc_pinstraps_audio); - -} - -static struct audio *dcn31_create_audio( - struct dc_context *ctx, unsigned int inst) -{ - return dce_audio_create(ctx, inst, - &audio_regs[inst], &audio_shift, &audio_mask); -} - -static struct vpg *dcn31_vpg_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dcn31_vpg *vpg31 = kzalloc(sizeof(struct dcn31_vpg), GFP_KERNEL); - - if (!vpg31) - return NULL; - - vpg31_construct(vpg31, ctx, inst, - &vpg_regs[inst], - &vpg_shift, - &vpg_mask); - - return &vpg31->base; -} - -static struct afmt *dcn31_afmt_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dcn31_afmt *afmt31 = kzalloc(sizeof(struct dcn31_afmt), GFP_KERNEL); - - if (!afmt31) - return NULL; - - afmt31_construct(afmt31, ctx, inst, - &afmt_regs[inst], - &afmt_shift, - &afmt_mask); - - // Light sleep by default, no need to power down here - - return &afmt31->base; -} - -static struct apg *dcn31_apg_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dcn31_apg *apg31 = kzalloc(sizeof(struct dcn31_apg), GFP_KERNEL); - - if (!apg31) - return NULL; - - apg31_construct(apg31, ctx, inst, - &apg_regs[inst], - &apg_shift, - &apg_mask); - - return &apg31->base; -} - -static struct stream_encoder *dcn31_stream_encoder_create( - enum engine_id eng_id, - struct dc_context *ctx) -{ - struct dcn10_stream_encoder *enc1; - struct vpg *vpg; - struct afmt *afmt; - int vpg_inst; - int afmt_inst; - - /* Mapping of VPG, AFMT, DME register blocks to DIO block instance */ - if (eng_id <= ENGINE_ID_DIGF) { - vpg_inst = eng_id; - afmt_inst = eng_id; - } else - return NULL; - - enc1 = kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL); - vpg = dcn31_vpg_create(ctx, vpg_inst); - afmt = dcn31_afmt_create(ctx, afmt_inst); - - if (!enc1 || !vpg || !afmt) { - kfree(enc1); - kfree(vpg); - kfree(afmt); - return NULL; - } - - dcn30_dio_stream_encoder_construct(enc1, ctx, ctx->dc_bios, - eng_id, vpg, afmt, - &stream_enc_regs[eng_id], - &se_shift, &se_mask); - - return &enc1->base; -} - -static struct hpo_dp_stream_encoder *dcn31_hpo_dp_stream_encoder_create( - enum engine_id eng_id, - struct dc_context *ctx) -{ - struct dcn31_hpo_dp_stream_encoder *hpo_dp_enc31; - struct vpg *vpg; - struct apg *apg; - uint32_t hpo_dp_inst; - uint32_t vpg_inst; - uint32_t apg_inst; - - ASSERT((eng_id >= ENGINE_ID_HPO_DP_0) && (eng_id <= ENGINE_ID_HPO_DP_3)); - hpo_dp_inst = eng_id - ENGINE_ID_HPO_DP_0; - - /* Mapping of VPG register blocks to HPO DP block instance: - * VPG[6] -> HPO_DP[0] - * VPG[7] -> HPO_DP[1] - * VPG[8] -> HPO_DP[2] - * VPG[9] -> HPO_DP[3] - */ - vpg_inst = hpo_dp_inst + 6; - - /* Mapping of APG register blocks to HPO DP block instance: - * APG[0] -> HPO_DP[0] - * APG[1] -> HPO_DP[1] - * APG[2] -> HPO_DP[2] - * APG[3] -> HPO_DP[3] - */ - apg_inst = hpo_dp_inst; - - /* allocate HPO stream encoder and create VPG sub-block */ - hpo_dp_enc31 = kzalloc(sizeof(struct dcn31_hpo_dp_stream_encoder), GFP_KERNEL); - vpg = dcn31_vpg_create(ctx, vpg_inst); - apg = dcn31_apg_create(ctx, apg_inst); - - if (!hpo_dp_enc31 || !vpg || !apg) { - kfree(hpo_dp_enc31); - kfree(vpg); - kfree(apg); - return NULL; - } - - dcn31_hpo_dp_stream_encoder_construct(hpo_dp_enc31, ctx, ctx->dc_bios, - hpo_dp_inst, eng_id, vpg, apg, - &hpo_dp_stream_enc_regs[hpo_dp_inst], - &hpo_dp_se_shift, &hpo_dp_se_mask); - - return &hpo_dp_enc31->base; -} - -static struct hpo_dp_link_encoder *dcn31_hpo_dp_link_encoder_create( - uint8_t inst, - struct dc_context *ctx) -{ - struct dcn31_hpo_dp_link_encoder *hpo_dp_enc31; - - /* allocate HPO link encoder */ - hpo_dp_enc31 = kzalloc(sizeof(struct dcn31_hpo_dp_link_encoder), GFP_KERNEL); - - hpo_dp_link_encoder31_construct(hpo_dp_enc31, ctx, inst, - &hpo_dp_link_enc_regs[inst], - &hpo_dp_le_shift, &hpo_dp_le_mask); - - return &hpo_dp_enc31->base; -} - -static struct dce_hwseq *dcn31_hwseq_create( - struct dc_context *ctx) -{ - struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL); - - if (hws) { - hws->ctx = ctx; - hws->regs = &hwseq_reg; - hws->shifts = &hwseq_shift; - hws->masks = &hwseq_mask; - } - return hws; -} -static const struct resource_create_funcs res_create_funcs = { - .read_dce_straps = read_dce_straps, - .create_audio = dcn31_create_audio, - .create_stream_encoder = dcn31_stream_encoder_create, - .create_hpo_dp_stream_encoder = dcn31_hpo_dp_stream_encoder_create, - .create_hpo_dp_link_encoder = dcn31_hpo_dp_link_encoder_create, - .create_hwseq = dcn31_hwseq_create, -}; - -static void dcn31_resource_destruct(struct dcn31_resource_pool *pool) -{ - unsigned int i; - - for (i = 0; i < pool->base.stream_enc_count; i++) { - if (pool->base.stream_enc[i] != NULL) { - if (pool->base.stream_enc[i]->vpg != NULL) { - kfree(DCN30_VPG_FROM_VPG(pool->base.stream_enc[i]->vpg)); - pool->base.stream_enc[i]->vpg = NULL; - } - if (pool->base.stream_enc[i]->afmt != NULL) { - kfree(DCN30_AFMT_FROM_AFMT(pool->base.stream_enc[i]->afmt)); - pool->base.stream_enc[i]->afmt = NULL; - } - kfree(DCN10STRENC_FROM_STRENC(pool->base.stream_enc[i])); - pool->base.stream_enc[i] = NULL; - } - } - - for (i = 0; i < pool->base.hpo_dp_stream_enc_count; i++) { - if (pool->base.hpo_dp_stream_enc[i] != NULL) { - if (pool->base.hpo_dp_stream_enc[i]->vpg != NULL) { - kfree(DCN30_VPG_FROM_VPG(pool->base.hpo_dp_stream_enc[i]->vpg)); - pool->base.hpo_dp_stream_enc[i]->vpg = NULL; - } - if (pool->base.hpo_dp_stream_enc[i]->apg != NULL) { - kfree(DCN31_APG_FROM_APG(pool->base.hpo_dp_stream_enc[i]->apg)); - pool->base.hpo_dp_stream_enc[i]->apg = NULL; - } - kfree(DCN3_1_HPO_DP_STREAM_ENC_FROM_HPO_STREAM_ENC(pool->base.hpo_dp_stream_enc[i])); - pool->base.hpo_dp_stream_enc[i] = NULL; - } - } - - for (i = 0; i < pool->base.hpo_dp_link_enc_count; i++) { - if (pool->base.hpo_dp_link_enc[i] != NULL) { - kfree(DCN3_1_HPO_DP_LINK_ENC_FROM_HPO_LINK_ENC(pool->base.hpo_dp_link_enc[i])); - pool->base.hpo_dp_link_enc[i] = NULL; - } - } - - for (i = 0; i < pool->base.res_cap->num_dsc; i++) { - if (pool->base.dscs[i] != NULL) - dcn20_dsc_destroy(&pool->base.dscs[i]); - } - - if (pool->base.mpc != NULL) { - kfree(TO_DCN20_MPC(pool->base.mpc)); - pool->base.mpc = NULL; - } - if (pool->base.hubbub != NULL) { - kfree(pool->base.hubbub); - pool->base.hubbub = NULL; - } - for (i = 0; i < pool->base.pipe_count; i++) { - if (pool->base.dpps[i] != NULL) - dcn31_dpp_destroy(&pool->base.dpps[i]); - - if (pool->base.ipps[i] != NULL) - pool->base.ipps[i]->funcs->ipp_destroy(&pool->base.ipps[i]); - - if (pool->base.hubps[i] != NULL) { - kfree(TO_DCN20_HUBP(pool->base.hubps[i])); - pool->base.hubps[i] = NULL; - } - - if (pool->base.irqs != NULL) { - dal_irq_service_destroy(&pool->base.irqs); - } - } - - for (i = 0; i < pool->base.res_cap->num_ddc; i++) { - if (pool->base.engines[i] != NULL) - dce110_engine_destroy(&pool->base.engines[i]); - if (pool->base.hw_i2cs[i] != NULL) { - kfree(pool->base.hw_i2cs[i]); - pool->base.hw_i2cs[i] = NULL; - } - if (pool->base.sw_i2cs[i] != NULL) { - kfree(pool->base.sw_i2cs[i]); - pool->base.sw_i2cs[i] = NULL; - } - } - - for (i = 0; i < pool->base.res_cap->num_opp; i++) { - if (pool->base.opps[i] != NULL) - pool->base.opps[i]->funcs->opp_destroy(&pool->base.opps[i]); - } - - for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { - if (pool->base.timing_generators[i] != NULL) { - kfree(DCN10TG_FROM_TG(pool->base.timing_generators[i])); - pool->base.timing_generators[i] = NULL; - } - } - - for (i = 0; i < pool->base.res_cap->num_dwb; i++) { - if (pool->base.dwbc[i] != NULL) { - kfree(TO_DCN30_DWBC(pool->base.dwbc[i])); - pool->base.dwbc[i] = NULL; - } - if (pool->base.mcif_wb[i] != NULL) { - kfree(TO_DCN30_MMHUBBUB(pool->base.mcif_wb[i])); - pool->base.mcif_wb[i] = NULL; - } - } - - for (i = 0; i < pool->base.audio_count; i++) { - if (pool->base.audios[i]) - dce_aud_destroy(&pool->base.audios[i]); - } - - for (i = 0; i < pool->base.clk_src_count; i++) { - if (pool->base.clock_sources[i] != NULL) { - dcn20_clock_source_destroy(&pool->base.clock_sources[i]); - pool->base.clock_sources[i] = NULL; - } - } - - for (i = 0; i < pool->base.res_cap->num_mpc_3dlut; i++) { - if (pool->base.mpc_lut[i] != NULL) { - dc_3dlut_func_release(pool->base.mpc_lut[i]); - pool->base.mpc_lut[i] = NULL; - } - if (pool->base.mpc_shaper[i] != NULL) { - dc_transfer_func_release(pool->base.mpc_shaper[i]); - pool->base.mpc_shaper[i] = NULL; - } - } - - if (pool->base.dp_clock_source != NULL) { - dcn20_clock_source_destroy(&pool->base.dp_clock_source); - pool->base.dp_clock_source = NULL; - } - - for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { - if (pool->base.multiple_abms[i] != NULL) - dce_abm_destroy(&pool->base.multiple_abms[i]); - } - - if (pool->base.psr != NULL) - dmub_psr_destroy(&pool->base.psr); - - if (pool->base.replay != NULL) - dmub_replay_destroy(&pool->base.replay); - - if (pool->base.dccg != NULL) - dcn_dccg_destroy(&pool->base.dccg); -} - -static struct hubp *dcn31_hubp_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dcn20_hubp *hubp2 = - kzalloc(sizeof(struct dcn20_hubp), GFP_KERNEL); - - if (!hubp2) - return NULL; - - if (hubp31_construct(hubp2, ctx, inst, - &hubp_regs[inst], &hubp_shift, &hubp_mask)) - return &hubp2->base; - - BREAK_TO_DEBUGGER(); - kfree(hubp2); - return NULL; -} - -static bool dcn31_dwbc_create(struct dc_context *ctx, struct resource_pool *pool) -{ - int i; - uint32_t pipe_count = pool->res_cap->num_dwb; - - for (i = 0; i < pipe_count; i++) { - struct dcn30_dwbc *dwbc30 = kzalloc(sizeof(struct dcn30_dwbc), - GFP_KERNEL); - - if (!dwbc30) { - dm_error("DC: failed to create dwbc30!\n"); - return false; - } - - dcn30_dwbc_construct(dwbc30, ctx, - &dwbc30_regs[i], - &dwbc30_shift, - &dwbc30_mask, - i); - - pool->dwbc[i] = &dwbc30->base; - } - return true; -} - -static bool dcn31_mmhubbub_create(struct dc_context *ctx, struct resource_pool *pool) -{ - int i; - uint32_t pipe_count = pool->res_cap->num_dwb; - - for (i = 0; i < pipe_count; i++) { - struct dcn30_mmhubbub *mcif_wb30 = kzalloc(sizeof(struct dcn30_mmhubbub), - GFP_KERNEL); - - if (!mcif_wb30) { - dm_error("DC: failed to create mcif_wb30!\n"); - return false; - } - - dcn30_mmhubbub_construct(mcif_wb30, ctx, - &mcif_wb30_regs[i], - &mcif_wb30_shift, - &mcif_wb30_mask, - i); - - pool->mcif_wb[i] = &mcif_wb30->base; - } - return true; -} - -static struct display_stream_compressor *dcn31_dsc_create( - struct dc_context *ctx, uint32_t inst) -{ - struct dcn20_dsc *dsc = - kzalloc(sizeof(struct dcn20_dsc), GFP_KERNEL); - - if (!dsc) { - BREAK_TO_DEBUGGER(); - return NULL; - } - - dsc2_construct(dsc, ctx, inst, &dsc_regs[inst], &dsc_shift, &dsc_mask); - return &dsc->base; -} - -static void dcn31_destroy_resource_pool(struct resource_pool **pool) -{ - struct dcn31_resource_pool *dcn31_pool = TO_DCN31_RES_POOL(*pool); - - dcn31_resource_destruct(dcn31_pool); - kfree(dcn31_pool); - *pool = NULL; -} - -static struct clock_source *dcn31_clock_source_create( - struct dc_context *ctx, - struct dc_bios *bios, - enum clock_source_id id, - const struct dce110_clk_src_regs *regs, - bool dp_clk_src) -{ - struct dce110_clk_src *clk_src = - kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL); - - if (!clk_src) - return NULL; - - if (dcn3_clk_src_construct(clk_src, ctx, bios, id, - regs, &cs_shift, &cs_mask)) { - clk_src->base.dp_clk_src = dp_clk_src; - return &clk_src->base; - } - - kfree(clk_src); - BREAK_TO_DEBUGGER(); - return NULL; -} - -static bool is_dual_plane(enum surface_pixel_format format) -{ - return format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA; -} - -int dcn31x_populate_dml_pipes_from_context(struct dc *dc, - struct dc_state *context, - display_e2e_pipe_params_st *pipes, - bool fast_validate) -{ - uint32_t pipe_cnt; - int i; - - dc_assert_fp_enabled(); - - pipe_cnt = dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); - - for (i = 0; i < pipe_cnt; i++) { - pipes[i].pipe.src.gpuvm = 1; - if (dc->debug.dml_hostvm_override == DML_HOSTVM_NO_OVERRIDE) { - //pipes[pipe_cnt].pipe.src.hostvm = dc->res_pool->hubbub->riommu_active; - pipes[i].pipe.src.hostvm = dc->vm_pa_config.is_hvm_enabled; - } else if (dc->debug.dml_hostvm_override == DML_HOSTVM_OVERRIDE_FALSE) - pipes[i].pipe.src.hostvm = false; - else if (dc->debug.dml_hostvm_override == DML_HOSTVM_OVERRIDE_TRUE) - pipes[i].pipe.src.hostvm = true; - } - return pipe_cnt; -} - -int dcn31_populate_dml_pipes_from_context( - struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes, - bool fast_validate) -{ - int i, pipe_cnt; - struct resource_context *res_ctx = &context->res_ctx; - struct pipe_ctx *pipe; - bool upscaled = false; - - DC_FP_START(); - dcn31x_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); - DC_FP_END(); - - for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { - struct dc_crtc_timing *timing; - - if (!res_ctx->pipe_ctx[i].stream) - continue; - pipe = &res_ctx->pipe_ctx[i]; - timing = &pipe->stream->timing; - if (pipe->plane_state && - (pipe->plane_state->src_rect.height < pipe->plane_state->dst_rect.height || - pipe->plane_state->src_rect.width < pipe->plane_state->dst_rect.width)) - upscaled = true; - - /* - * Immediate flip can be set dynamically after enabling the plane. - * We need to require support for immediate flip or underflow can be - * intermittently experienced depending on peak b/w requirements. - */ - pipes[pipe_cnt].pipe.src.immediate_flip = true; - pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; - pipes[pipe_cnt].pipe.src.gpuvm = true; - pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch; - pipes[pipe_cnt].pipe.src.dcc_rate = 3; - pipes[pipe_cnt].dout.dsc_input_bpc = 0; - DC_FP_START(); - dcn31_zero_pipe_dcc_fraction(pipes, pipe_cnt); - DC_FP_END(); - - - if (pipes[pipe_cnt].dout.dsc_enable) { - switch (timing->display_color_depth) { - case COLOR_DEPTH_888: - pipes[pipe_cnt].dout.dsc_input_bpc = 8; - break; - case COLOR_DEPTH_101010: - pipes[pipe_cnt].dout.dsc_input_bpc = 10; - break; - case COLOR_DEPTH_121212: - pipes[pipe_cnt].dout.dsc_input_bpc = 12; - break; - default: - ASSERT(0); - break; - } - } - - pipe_cnt++; - } - context->bw_ctx.dml.ip.det_buffer_size_kbytes = DCN3_1_DEFAULT_DET_SIZE; - dc->config.enable_4to1MPC = false; - if (pipe_cnt == 1 && pipe->plane_state && !dc->debug.disable_z9_mpc) { - if (is_dual_plane(pipe->plane_state->format) - && pipe->plane_state->src_rect.width <= 1920 && pipe->plane_state->src_rect.height <= 1080) { - dc->config.enable_4to1MPC = true; - } else if (!is_dual_plane(pipe->plane_state->format) && pipe->plane_state->src_rect.width <= 5120) { - /* Limit to 5k max to avoid forced pipe split when there is not enough detile for swath */ - context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192; - pipes[0].pipe.src.unbounded_req_mode = true; - } - } else if (context->stream_count >= dc->debug.crb_alloc_policy_min_disp_count - && dc->debug.crb_alloc_policy > DET_SIZE_DEFAULT) { - context->bw_ctx.dml.ip.det_buffer_size_kbytes = dc->debug.crb_alloc_policy * 64; - } else if (context->stream_count >= 3 && upscaled) { - context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192; - } - - return pipe_cnt; -} - -void dcn31_calculate_wm_and_dlg( - struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int pipe_cnt, - int vlevel) -{ - DC_FP_START(); - dcn31_calculate_wm_and_dlg_fp(dc, context, pipes, pipe_cnt, vlevel); - DC_FP_END(); -} - -void -dcn31_populate_dml_writeback_from_context(struct dc *dc, - struct resource_context *res_ctx, - display_e2e_pipe_params_st *pipes) -{ - DC_FP_START(); - dcn30_populate_dml_writeback_from_context(dc, res_ctx, pipes); - DC_FP_END(); -} - -void -dcn31_set_mcif_arb_params(struct dc *dc, - struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int pipe_cnt) -{ - DC_FP_START(); - dcn30_set_mcif_arb_params(dc, context, pipes, pipe_cnt); - DC_FP_END(); -} - -bool dcn31_validate_bandwidth(struct dc *dc, - struct dc_state *context, - bool fast_validate) -{ - bool out = false; - - BW_VAL_TRACE_SETUP(); - - int vlevel = 0; - int pipe_cnt = 0; - display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_KERNEL); - DC_LOGGER_INIT(dc->ctx->logger); - - BW_VAL_TRACE_COUNT(); - - DC_FP_START(); - out = dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate, true); - DC_FP_END(); - - // Disable fast_validate to set min dcfclk in alculate_wm_and_dlg - if (pipe_cnt == 0) - fast_validate = false; - - if (!out) - goto validate_fail; - - BW_VAL_TRACE_END_VOLTAGE_LEVEL(); - - if (fast_validate) { - BW_VAL_TRACE_SKIP(fast); - goto validate_out; - } - if (dc->res_pool->funcs->calculate_wm_and_dlg) - dc->res_pool->funcs->calculate_wm_and_dlg(dc, context, pipes, pipe_cnt, vlevel); - - BW_VAL_TRACE_END_WATERMARKS(); - - goto validate_out; - -validate_fail: - DC_LOG_WARNING("Mode Validation Warning: %s failed validation.\n", - dml_get_status_message(context->bw_ctx.dml.vba.ValidationStatus[context->bw_ctx.dml.vba.soc.num_states])); - - BW_VAL_TRACE_SKIP(fail); - out = false; - -validate_out: - kfree(pipes); - - BW_VAL_TRACE_FINISH(); - - return out; -} - -static void dcn31_get_panel_config_defaults(struct dc_panel_config *panel_config) -{ - *panel_config = panel_config_defaults; -} - -static struct dc_cap_funcs cap_funcs = { - .get_dcc_compression_cap = dcn20_get_dcc_compression_cap -}; - -static struct resource_funcs dcn31_res_pool_funcs = { - .destroy = dcn31_destroy_resource_pool, - .link_enc_create = dcn31_link_encoder_create, - .link_enc_create_minimal = dcn31_link_enc_create_minimal, - .link_encs_assign = link_enc_cfg_link_encs_assign, - .link_enc_unassign = link_enc_cfg_link_enc_unassign, - .panel_cntl_create = dcn31_panel_cntl_create, - .validate_bandwidth = dcn31_validate_bandwidth, - .calculate_wm_and_dlg = dcn31_calculate_wm_and_dlg, - .update_soc_for_wm_a = dcn31_update_soc_for_wm_a, - .populate_dml_pipes = dcn31_populate_dml_pipes_from_context, - .acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer, - .release_pipe = dcn20_release_pipe, - .add_stream_to_ctx = dcn30_add_stream_to_ctx, - .add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource, - .remove_stream_from_ctx = dcn20_remove_stream_from_ctx, - .populate_dml_writeback_from_context = dcn31_populate_dml_writeback_from_context, - .set_mcif_arb_params = dcn31_set_mcif_arb_params, - .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link, - .acquire_post_bldn_3dlut = dcn30_acquire_post_bldn_3dlut, - .release_post_bldn_3dlut = dcn30_release_post_bldn_3dlut, - .update_bw_bounding_box = dcn31_update_bw_bounding_box, - .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, - .get_panel_config_defaults = dcn31_get_panel_config_defaults, -}; - -static struct clock_source *dcn30_clock_source_create( - struct dc_context *ctx, - struct dc_bios *bios, - enum clock_source_id id, - const struct dce110_clk_src_regs *regs, - bool dp_clk_src) -{ - struct dce110_clk_src *clk_src = - kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL); - - if (!clk_src) - return NULL; - - if (dcn31_clk_src_construct(clk_src, ctx, bios, id, - regs, &cs_shift, &cs_mask)) { - clk_src->base.dp_clk_src = dp_clk_src; - return &clk_src->base; - } - - BREAK_TO_DEBUGGER(); - return NULL; -} - -static bool dcn31_resource_construct( - uint8_t num_virtual_links, - struct dc *dc, - struct dcn31_resource_pool *pool) -{ - int i; - struct dc_context *ctx = dc->ctx; - struct irq_service_init_data init_data; - - ctx->dc_bios->regs = &bios_regs; - - pool->base.res_cap = &res_cap_dcn31; - - pool->base.funcs = &dcn31_res_pool_funcs; - - /************************************************* - * Resource + asic cap harcoding * - *************************************************/ - pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE; - pool->base.pipe_count = pool->base.res_cap->num_timing_generator; - pool->base.mpcc_count = pool->base.res_cap->num_timing_generator; - dc->caps.max_downscale_ratio = 600; - dc->caps.i2c_speed_in_khz = 100; - dc->caps.i2c_speed_in_khz_hdcp = 5; /*1.4 w/a applied by default*/ - dc->caps.max_cursor_size = 256; - dc->caps.min_horizontal_blanking_period = 80; - dc->caps.dmdata_alloc_size = 2048; - - dc->caps.max_slave_planes = 2; - dc->caps.max_slave_yuv_planes = 2; - dc->caps.max_slave_rgb_planes = 2; - dc->caps.post_blend_color_processing = true; - dc->caps.force_dp_tps4_for_cp2520 = true; - if (dc->config.forceHBR2CP2520) - dc->caps.force_dp_tps4_for_cp2520 = false; - dc->caps.dp_hpo = true; - dc->caps.dp_hdmi21_pcon_support = true; - dc->caps.edp_dsc_support = true; - dc->caps.extended_aux_timeout_support = true; - dc->caps.dmcub_support = true; - dc->caps.is_apu = true; - dc->caps.zstate_support = true; - - /* Color pipeline capabilities */ - dc->caps.color.dpp.dcn_arch = 1; - dc->caps.color.dpp.input_lut_shared = 0; - dc->caps.color.dpp.icsc = 1; - dc->caps.color.dpp.dgam_ram = 0; // must use gamma_corr - dc->caps.color.dpp.dgam_rom_caps.srgb = 1; - dc->caps.color.dpp.dgam_rom_caps.bt2020 = 1; - dc->caps.color.dpp.dgam_rom_caps.gamma2_2 = 1; - dc->caps.color.dpp.dgam_rom_caps.pq = 1; - dc->caps.color.dpp.dgam_rom_caps.hlg = 1; - dc->caps.color.dpp.post_csc = 1; - dc->caps.color.dpp.gamma_corr = 1; - dc->caps.color.dpp.dgam_rom_for_yuv = 0; - - dc->caps.color.dpp.hw_3d_lut = 1; - dc->caps.color.dpp.ogam_ram = 1; - // no OGAM ROM on DCN301 - dc->caps.color.dpp.ogam_rom_caps.srgb = 0; - dc->caps.color.dpp.ogam_rom_caps.bt2020 = 0; - dc->caps.color.dpp.ogam_rom_caps.gamma2_2 = 0; - dc->caps.color.dpp.ogam_rom_caps.pq = 0; - dc->caps.color.dpp.ogam_rom_caps.hlg = 0; - dc->caps.color.dpp.ocsc = 0; - - dc->caps.color.mpc.gamut_remap = 1; - dc->caps.color.mpc.num_3dluts = pool->base.res_cap->num_mpc_3dlut; //2 - dc->caps.color.mpc.ogam_ram = 1; - dc->caps.color.mpc.ogam_rom_caps.srgb = 0; - dc->caps.color.mpc.ogam_rom_caps.bt2020 = 0; - dc->caps.color.mpc.ogam_rom_caps.gamma2_2 = 0; - dc->caps.color.mpc.ogam_rom_caps.pq = 0; - dc->caps.color.mpc.ogam_rom_caps.hlg = 0; - dc->caps.color.mpc.ocsc = 1; - - dc->config.use_old_fixed_vs_sequence = true; - - /* Use pipe context based otg sync logic */ - dc->config.use_pipe_ctx_sync_logic = true; - - /* read VBIOS LTTPR caps */ - { - if (ctx->dc_bios->funcs->get_lttpr_caps) { - enum bp_result bp_query_result; - uint8_t is_vbios_lttpr_enable = 0; - - bp_query_result = ctx->dc_bios->funcs->get_lttpr_caps(ctx->dc_bios, &is_vbios_lttpr_enable); - dc->caps.vbios_lttpr_enable = (bp_query_result == BP_RESULT_OK) && !!is_vbios_lttpr_enable; - } - - /* interop bit is implicit */ - { - dc->caps.vbios_lttpr_aware = true; - } - } - - if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) - dc->debug = debug_defaults_drv; - - // Init the vm_helper - if (dc->vm_helper) - vm_helper_init(dc->vm_helper, 16); - - /************************************************* - * Create resources * - *************************************************/ - - /* Clock Sources for Pixel Clock*/ - pool->base.clock_sources[DCN31_CLK_SRC_PLL0] = - dcn30_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL0, - &clk_src_regs[0], false); - pool->base.clock_sources[DCN31_CLK_SRC_PLL1] = - dcn30_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL1, - &clk_src_regs[1], false); - /*move phypllx_pixclk_resync to dmub next*/ - if (dc->ctx->asic_id.hw_internal_rev == YELLOW_CARP_B0) { - pool->base.clock_sources[DCN31_CLK_SRC_PLL2] = - dcn30_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL2, - &clk_src_regs_b0[2], false); - pool->base.clock_sources[DCN31_CLK_SRC_PLL3] = - dcn30_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL3, - &clk_src_regs_b0[3], false); - } else { - pool->base.clock_sources[DCN31_CLK_SRC_PLL2] = - dcn30_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL2, - &clk_src_regs[2], false); - pool->base.clock_sources[DCN31_CLK_SRC_PLL3] = - dcn30_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL3, - &clk_src_regs[3], false); - } - - pool->base.clock_sources[DCN31_CLK_SRC_PLL4] = - dcn30_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL4, - &clk_src_regs[4], false); - - pool->base.clk_src_count = DCN30_CLK_SRC_TOTAL; - - /* todo: not reuse phy_pll registers */ - pool->base.dp_clock_source = - dcn31_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_ID_DP_DTO, - &clk_src_regs[0], true); - - for (i = 0; i < pool->base.clk_src_count; i++) { - if (pool->base.clock_sources[i] == NULL) { - dm_error("DC: failed to create clock sources!\n"); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - } - - /* TODO: DCCG */ - pool->base.dccg = dccg31_create(ctx, &dccg_regs, &dccg_shift, &dccg_mask); - if (pool->base.dccg == NULL) { - dm_error("DC: failed to create dccg!\n"); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - - /* TODO: IRQ */ - init_data.ctx = dc->ctx; - pool->base.irqs = dal_irq_service_dcn31_create(&init_data); - if (!pool->base.irqs) - goto create_fail; - - /* HUBBUB */ - pool->base.hubbub = dcn31_hubbub_create(ctx); - if (pool->base.hubbub == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create hubbub!\n"); - goto create_fail; - } - - /* HUBPs, DPPs, OPPs and TGs */ - for (i = 0; i < pool->base.pipe_count; i++) { - pool->base.hubps[i] = dcn31_hubp_create(ctx, i); - if (pool->base.hubps[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create hubps!\n"); - goto create_fail; - } - - pool->base.dpps[i] = dcn31_dpp_create(ctx, i); - if (pool->base.dpps[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create dpps!\n"); - goto create_fail; - } - } - - for (i = 0; i < pool->base.res_cap->num_opp; i++) { - pool->base.opps[i] = dcn31_opp_create(ctx, i); - if (pool->base.opps[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create output pixel processor!\n"); - goto create_fail; - } - } - - for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { - pool->base.timing_generators[i] = dcn31_timing_generator_create( - ctx, i); - if (pool->base.timing_generators[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create tg!\n"); - goto create_fail; - } - } - pool->base.timing_generator_count = i; - - /* PSR */ - pool->base.psr = dmub_psr_create(ctx); - if (pool->base.psr == NULL) { - dm_error("DC: failed to create psr obj!\n"); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - - /* Replay */ - pool->base.replay = dmub_replay_create(ctx); - if (pool->base.replay == NULL) { - dm_error("DC: failed to create replay obj!\n"); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - - /* ABM */ - for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { - pool->base.multiple_abms[i] = dmub_abm_create(ctx, - &abm_regs[i], - &abm_shift, - &abm_mask); - if (pool->base.multiple_abms[i] == NULL) { - dm_error("DC: failed to create abm for pipe %d!\n", i); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - } - - /* MPC and DSC */ - pool->base.mpc = dcn31_mpc_create(ctx, pool->base.mpcc_count, pool->base.res_cap->num_mpc_3dlut); - if (pool->base.mpc == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create mpc!\n"); - goto create_fail; - } - - for (i = 0; i < pool->base.res_cap->num_dsc; i++) { - pool->base.dscs[i] = dcn31_dsc_create(ctx, i); - if (pool->base.dscs[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create display stream compressor %d!\n", i); - goto create_fail; - } - } - - /* DWB and MMHUBBUB */ - if (!dcn31_dwbc_create(ctx, &pool->base)) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create dwbc!\n"); - goto create_fail; - } - - if (!dcn31_mmhubbub_create(ctx, &pool->base)) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create mcif_wb!\n"); - goto create_fail; - } - - /* AUX and I2C */ - for (i = 0; i < pool->base.res_cap->num_ddc; i++) { - pool->base.engines[i] = dcn31_aux_engine_create(ctx, i); - if (pool->base.engines[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC:failed to create aux engine!!\n"); - goto create_fail; - } - pool->base.hw_i2cs[i] = dcn31_i2c_hw_create(ctx, i); - if (pool->base.hw_i2cs[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC:failed to create hw i2c!!\n"); - goto create_fail; - } - pool->base.sw_i2cs[i] = NULL; - } - - if (dc->ctx->asic_id.chip_family == FAMILY_YELLOW_CARP && - dc->ctx->asic_id.hw_internal_rev == YELLOW_CARP_B0 && - !dc->debug.dpia_debug.bits.disable_dpia) { - /* YELLOW CARP B0 has 4 DPIA's */ - pool->base.usb4_dpia_count = 4; - } - - if (dc->ctx->asic_id.chip_family == AMDGPU_FAMILY_GC_11_0_1) - pool->base.usb4_dpia_count = 4; - - /* Audio, Stream Encoders including HPO and virtual, MPC 3D LUTs */ - if (!resource_construct(num_virtual_links, dc, &pool->base, - &res_create_funcs)) - goto create_fail; - - /* HW Sequencer and Plane caps */ - dcn31_hw_sequencer_construct(dc); - - dc->caps.max_planes = pool->base.pipe_count; - - for (i = 0; i < dc->caps.max_planes; ++i) - dc->caps.planes[i] = plane_cap; - - dc->cap_funcs = cap_funcs; - - dc->dcn_ip->max_num_dpp = dcn3_1_ip.max_num_dpp; - - return true; - -create_fail: - dcn31_resource_destruct(pool); - - return false; -} - -struct resource_pool *dcn31_create_resource_pool( - const struct dc_init_data *init_data, - struct dc *dc) -{ - struct dcn31_resource_pool *pool = - kzalloc(sizeof(struct dcn31_resource_pool), GFP_KERNEL); - - if (!pool) - return NULL; - - if (dcn31_resource_construct(init_data->num_virtual_links, dc, pool)) - return &pool->base; - - BREAK_TO_DEBUGGER(); - kfree(pool); - return NULL; -} diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h deleted file mode 100644 index 901436591ed4..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright 2020 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef _DCN31_RESOURCE_H_ -#define _DCN31_RESOURCE_H_ - -#include "core_types.h" - -#define TO_DCN31_RES_POOL(pool)\ - container_of(pool, struct dcn31_resource_pool, base) - -extern struct _vcs_dpi_ip_params_st dcn3_1_ip; - -struct dcn31_resource_pool { - struct resource_pool base; -}; - -bool dcn31_validate_bandwidth(struct dc *dc, - struct dc_state *context, - bool fast_validate); -void dcn31_calculate_wm_and_dlg( - struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int pipe_cnt, - int vlevel); -int dcn31_populate_dml_pipes_from_context( - struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes, - bool fast_validate); -void -dcn31_populate_dml_writeback_from_context(struct dc *dc, - struct resource_context *res_ctx, - display_e2e_pipe_params_st *pipes); -void -dcn31_set_mcif_arb_params(struct dc *dc, - struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int pipe_cnt); - -struct resource_pool *dcn31_create_resource_pool( - const struct dc_init_data *init_data, - struct dc *dc); - -/*temp: B0 specific before switch to dcn313 headers*/ -#ifndef regPHYPLLF_PIXCLK_RESYNC_CNTL -#define regPHYPLLF_PIXCLK_RESYNC_CNTL 0x007e -#define regPHYPLLF_PIXCLK_RESYNC_CNTL_BASE_IDX 1 -#define regPHYPLLG_PIXCLK_RESYNC_CNTL 0x005f -#define regPHYPLLG_PIXCLK_RESYNC_CNTL_BASE_IDX 1 - -//PHYPLLF_PIXCLK_RESYNC_CNTL -#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_PIXCLK_RESYNC_ENABLE__SHIFT 0x0 -#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_DEEP_COLOR_DTO_ENABLE_STATUS__SHIFT 0x1 -#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_DCCG_DEEP_COLOR_CNTL__SHIFT 0x4 -#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_PIXCLK_ENABLE__SHIFT 0x8 -#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_PIXCLK_DOUBLE_RATE_ENABLE__SHIFT 0x9 -#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_PIXCLK_RESYNC_ENABLE_MASK 0x00000001L -#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_DEEP_COLOR_DTO_ENABLE_STATUS_MASK 0x00000002L -#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_DCCG_DEEP_COLOR_CNTL_MASK 0x00000030L -#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_PIXCLK_ENABLE_MASK 0x00000100L -#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_PIXCLK_DOUBLE_RATE_ENABLE_MASK 0x00000200L - -//PHYPLLG_PIXCLK_RESYNC_CNTL -#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_PIXCLK_RESYNC_ENABLE__SHIFT 0x0 -#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_DEEP_COLOR_DTO_ENABLE_STATUS__SHIFT 0x1 -#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_DCCG_DEEP_COLOR_CNTL__SHIFT 0x4 -#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_PIXCLK_ENABLE__SHIFT 0x8 -#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_PIXCLK_DOUBLE_RATE_ENABLE__SHIFT 0x9 -#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_PIXCLK_RESYNC_ENABLE_MASK 0x00000001L -#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_DEEP_COLOR_DTO_ENABLE_STATUS_MASK 0x00000002L -#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_DCCG_DEEP_COLOR_CNTL_MASK 0x00000030L -#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_PIXCLK_ENABLE_MASK 0x00000100L -#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_PIXCLK_DOUBLE_RATE_ENABLE_MASK 0x00000200L -#endif -#endif /* _DCN31_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/Makefile b/drivers/gpu/drm/amd/display/dc/dcn314/Makefile index 72456debb99f..6ea47e00d62d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn314/Makefile @@ -10,7 +10,7 @@ # # Makefile for dcn314. -DCN314 = dcn314_resource.o dcn314_init.o \ +DCN314 = dcn314_init.o \ dcn314_dio_stream_encoder.o dcn314_dccg.o dcn314_optc.o AMD_DAL_DCN314 = $(addprefix $(AMDDALPATH)/dc/dcn314/,$(DCN314)) diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c deleted file mode 100644 index 677361d74a4e..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c +++ /dev/null @@ -1,2180 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright 2022 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - - -#include "dm_services.h" -#include "dc.h" - -#include "dcn31/dcn31_init.h" -#include "dcn314/dcn314_init.h" - -#include "resource.h" -#include "include/irq_service_interface.h" -#include "dcn314_resource.h" - -#include "dcn20/dcn20_resource.h" -#include "dcn30/dcn30_resource.h" -#include "dcn31/dcn31_resource.h" - -#include "dcn10/dcn10_ipp.h" -#include "dcn30/dcn30_hubbub.h" -#include "dcn31/dcn31_hubbub.h" -#include "dcn30/dcn30_mpc.h" -#include "dcn31/dcn31_hubp.h" -#include "irq/dcn31/irq_service_dcn31.h" -#include "irq/dcn314/irq_service_dcn314.h" -#include "dcn30/dcn30_dpp.h" -#include "dcn314/dcn314_optc.h" -#include "dcn20/dcn20_hwseq.h" -#include "dcn30/dcn30_hwseq.h" -#include "dce110/dce110_hwseq.h" -#include "dcn30/dcn30_opp.h" -#include "dcn20/dcn20_dsc.h" -#include "dcn30/dcn30_vpg.h" -#include "dcn30/dcn30_afmt.h" -#include "dcn31/dcn31_dio_link_encoder.h" -#include "dcn314/dcn314_dio_stream_encoder.h" -#include "dcn31/dcn31_hpo_dp_stream_encoder.h" -#include "dcn31/dcn31_hpo_dp_link_encoder.h" -#include "dcn31/dcn31_apg.h" -#include "dcn31/dcn31_vpg.h" -#include "dcn31/dcn31_afmt.h" -#include "dce/dce_clock_source.h" -#include "dce/dce_audio.h" -#include "dce/dce_hwseq.h" -#include "clk_mgr.h" -#include "virtual/virtual_stream_encoder.h" -#include "dce110/dce110_resource.h" -#include "dml/display_mode_vba.h" -#include "dml/dcn31/dcn31_fpu.h" -#include "dml/dcn314/dcn314_fpu.h" -#include "dcn314/dcn314_dccg.h" -#include "dcn10/dcn10_resource.h" -#include "dcn31/dcn31_panel_cntl.h" -#include "dcn314/dcn314_hwseq.h" - -#include "dcn30/dcn30_dwb.h" -#include "dcn30/dcn30_mmhubbub.h" - -#include "dcn/dcn_3_1_4_offset.h" -#include "dcn/dcn_3_1_4_sh_mask.h" -#include "dpcs/dpcs_3_1_4_offset.h" -#include "dpcs/dpcs_3_1_4_sh_mask.h" - -#define DCHUBBUB_DEBUG_CTRL_0__DET_DEPTH__SHIFT 0x10 -#define DCHUBBUB_DEBUG_CTRL_0__DET_DEPTH_MASK 0x01FF0000L - -#define DSCC0_DSCC_CONFIG0__ICH_RESET_AT_END_OF_LINE__SHIFT 0x0 -#define DSCC0_DSCC_CONFIG0__ICH_RESET_AT_END_OF_LINE_MASK 0x0000000FL - -#include "reg_helper.h" -#include "dce/dmub_abm.h" -#include "dce/dmub_psr.h" -#include "dce/dmub_replay.h" -#include "dce/dce_aux.h" -#include "dce/dce_i2c.h" -#include "dml/dcn314/display_mode_vba_314.h" -#include "vm_helper.h" -#include "dcn20/dcn20_vmid.h" - -#include "link_enc_cfg.h" - -#define DCN_BASE__INST0_SEG1 0x000000C0 -#define DCN_BASE__INST0_SEG2 0x000034C0 -#define DCN_BASE__INST0_SEG3 0x00009000 - -#define NBIO_BASE__INST0_SEG1 0x00000014 - -#define MAX_INSTANCE 7 -#define MAX_SEGMENT 8 - -#define regBIF_BX2_BIOS_SCRATCH_2 0x003a -#define regBIF_BX2_BIOS_SCRATCH_2_BASE_IDX 1 -#define regBIF_BX2_BIOS_SCRATCH_3 0x003b -#define regBIF_BX2_BIOS_SCRATCH_3_BASE_IDX 1 -#define regBIF_BX2_BIOS_SCRATCH_6 0x003e -#define regBIF_BX2_BIOS_SCRATCH_6_BASE_IDX 1 - -#define DC_LOGGER \ - dc->ctx->logger -#define DC_LOGGER_INIT(logger) - -enum dcn31_clk_src_array_id { - DCN31_CLK_SRC_PLL0, - DCN31_CLK_SRC_PLL1, - DCN31_CLK_SRC_PLL2, - DCN31_CLK_SRC_PLL3, - DCN31_CLK_SRC_PLL4, - DCN30_CLK_SRC_TOTAL -}; - -/* begin ********************* - * macros to expend register list macro defined in HW object header file - */ - -/* DCN */ -/* TODO awful hack. fixup dcn20_dwb.h */ -#undef BASE_INNER -#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg - -#define BASE(seg) BASE_INNER(seg) - -#define SR(reg_name)\ - .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ - reg ## reg_name - -#define SRI(reg_name, block, id)\ - .reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## reg_name - -#define SRI2(reg_name, block, id)\ - .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ - reg ## reg_name - -#define SRIR(var_name, reg_name, block, id)\ - .var_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## reg_name - -#define SRII(reg_name, block, id)\ - .reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## reg_name - -#define SRII_MPC_RMU(reg_name, block, id)\ - .RMU##_##reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## reg_name - -#define SRII_DWB(reg_name, temp_name, block, id)\ - .reg_name[id] = BASE(reg ## block ## id ## _ ## temp_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## temp_name - -#define SF_DWB2(reg_name, block, id, field_name, post_fix) \ - .field_name = reg_name ## __ ## field_name ## post_fix - -#define DCCG_SRII(reg_name, block, id)\ - .block ## _ ## reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## reg_name - -#define VUPDATE_SRII(reg_name, block, id)\ - .reg_name[id] = BASE(reg ## reg_name ## _ ## block ## id ## _BASE_IDX) + \ - reg ## reg_name ## _ ## block ## id - -/* NBIO */ -#define NBIO_BASE_INNER(seg) \ - NBIO_BASE__INST0_SEG ## seg - -#define NBIO_BASE(seg) \ - NBIO_BASE_INNER(seg) - -#define NBIO_SR(reg_name)\ - .reg_name = NBIO_BASE(regBIF_BX2_ ## reg_name ## _BASE_IDX) + \ - regBIF_BX2_ ## reg_name - -/* MMHUB */ -#define MMHUB_BASE_INNER(seg) \ - MMHUB_BASE__INST0_SEG ## seg - -#define MMHUB_BASE(seg) \ - MMHUB_BASE_INNER(seg) - -#define MMHUB_SR(reg_name)\ - .reg_name = MMHUB_BASE(reg ## reg_name ## _BASE_IDX) + \ - reg ## reg_name - -/* CLOCK */ -#define CLK_BASE_INNER(seg) \ - CLK_BASE__INST0_SEG ## seg - -#define CLK_BASE(seg) \ - CLK_BASE_INNER(seg) - -#define CLK_SRI(reg_name, block, inst)\ - .reg_name = CLK_BASE(reg ## block ## _ ## inst ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## _ ## inst ## _ ## reg_name - - -static const struct bios_registers bios_regs = { - NBIO_SR(BIOS_SCRATCH_3), - NBIO_SR(BIOS_SCRATCH_6) -}; - -#define clk_src_regs(index, pllid)\ -[index] = {\ - CS_COMMON_REG_LIST_DCN3_0(index, pllid),\ -} - -static const struct dce110_clk_src_regs clk_src_regs[] = { - clk_src_regs(0, A), - clk_src_regs(1, B), - clk_src_regs(2, C), - clk_src_regs(3, D), - clk_src_regs(4, E) -}; - -static const struct dce110_clk_src_shift cs_shift = { - CS_COMMON_MASK_SH_LIST_DCN3_1_4(__SHIFT) -}; - -static const struct dce110_clk_src_mask cs_mask = { - CS_COMMON_MASK_SH_LIST_DCN3_1_4(_MASK) -}; - -#define abm_regs(id)\ -[id] = {\ - ABM_DCN302_REG_LIST(id)\ -} - -static const struct dce_abm_registers abm_regs[] = { - abm_regs(0), - abm_regs(1), - abm_regs(2), - abm_regs(3), -}; - -static const struct dce_abm_shift abm_shift = { - ABM_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dce_abm_mask abm_mask = { - ABM_MASK_SH_LIST_DCN30(_MASK) -}; - -#define audio_regs(id)\ -[id] = {\ - AUD_COMMON_REG_LIST(id)\ -} - -static const struct dce_audio_registers audio_regs[] = { - audio_regs(0), - audio_regs(1), - audio_regs(2), - audio_regs(3), - audio_regs(4), - audio_regs(5), - audio_regs(6) -}; - -#define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\ - SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_INDEX, AZALIA_ENDPOINT_REG_INDEX, mask_sh),\ - SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_DATA, AZALIA_ENDPOINT_REG_DATA, mask_sh),\ - AUD_COMMON_MASK_SH_LIST_BASE(mask_sh) - -static const struct dce_audio_shift audio_shift = { - DCE120_AUD_COMMON_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_audio_mask audio_mask = { - DCE120_AUD_COMMON_MASK_SH_LIST(_MASK) -}; - -#define vpg_regs(id)\ -[id] = {\ - VPG_DCN31_REG_LIST(id)\ -} - -static const struct dcn31_vpg_registers vpg_regs[] = { - vpg_regs(0), - vpg_regs(1), - vpg_regs(2), - vpg_regs(3), - vpg_regs(4), - vpg_regs(5), - vpg_regs(6), - vpg_regs(7), - vpg_regs(8), - vpg_regs(9), -}; - -static const struct dcn31_vpg_shift vpg_shift = { - DCN31_VPG_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn31_vpg_mask vpg_mask = { - DCN31_VPG_MASK_SH_LIST(_MASK) -}; - -#define afmt_regs(id)\ -[id] = {\ - AFMT_DCN31_REG_LIST(id)\ -} - -static const struct dcn31_afmt_registers afmt_regs[] = { - afmt_regs(0), - afmt_regs(1), - afmt_regs(2), - afmt_regs(3), - afmt_regs(4), - afmt_regs(5) -}; - -static const struct dcn31_afmt_shift afmt_shift = { - DCN31_AFMT_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn31_afmt_mask afmt_mask = { - DCN31_AFMT_MASK_SH_LIST(_MASK) -}; - -#define apg_regs(id)\ -[id] = {\ - APG_DCN31_REG_LIST(id)\ -} - -static const struct dcn31_apg_registers apg_regs[] = { - apg_regs(0), - apg_regs(1), - apg_regs(2), - apg_regs(3) -}; - -static const struct dcn31_apg_shift apg_shift = { - DCN31_APG_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn31_apg_mask apg_mask = { - DCN31_APG_MASK_SH_LIST(_MASK) -}; - -#define stream_enc_regs(id)\ -[id] = {\ - SE_DCN314_REG_LIST(id)\ -} - -static const struct dcn10_stream_enc_registers stream_enc_regs[] = { - stream_enc_regs(0), - stream_enc_regs(1), - stream_enc_regs(2), - stream_enc_regs(3), - stream_enc_regs(4) -}; - -static const struct dcn10_stream_encoder_shift se_shift = { - SE_COMMON_MASK_SH_LIST_DCN314(__SHIFT) -}; - -static const struct dcn10_stream_encoder_mask se_mask = { - SE_COMMON_MASK_SH_LIST_DCN314(_MASK) -}; - - -#define aux_regs(id)\ -[id] = {\ - DCN2_AUX_REG_LIST(id)\ -} - -static const struct dcn10_link_enc_aux_registers link_enc_aux_regs[] = { - aux_regs(0), - aux_regs(1), - aux_regs(2), - aux_regs(3), - aux_regs(4) -}; - -#define hpd_regs(id)\ -[id] = {\ - HPD_REG_LIST(id)\ -} - -static const struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[] = { - hpd_regs(0), - hpd_regs(1), - hpd_regs(2), - hpd_regs(3), - hpd_regs(4) -}; - -#define link_regs(id, phyid)\ -[id] = {\ - LE_DCN31_REG_LIST(id), \ - UNIPHY_DCN2_REG_LIST(phyid), \ -} - -static const struct dce110_aux_registers_shift aux_shift = { - DCN_AUX_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce110_aux_registers_mask aux_mask = { - DCN_AUX_MASK_SH_LIST(_MASK) -}; - -static const struct dcn10_link_enc_registers link_enc_regs[] = { - link_regs(0, A), - link_regs(1, B), - link_regs(2, C), - link_regs(3, D), - link_regs(4, E) -}; - -static const struct dcn10_link_enc_shift le_shift = { - LINK_ENCODER_MASK_SH_LIST_DCN31(__SHIFT), - DPCS_DCN31_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn10_link_enc_mask le_mask = { - LINK_ENCODER_MASK_SH_LIST_DCN31(_MASK), - DPCS_DCN31_MASK_SH_LIST(_MASK) -}; - -#define hpo_dp_stream_encoder_reg_list(id)\ -[id] = {\ - DCN3_1_HPO_DP_STREAM_ENC_REG_LIST(id)\ -} - -static const struct dcn31_hpo_dp_stream_encoder_registers hpo_dp_stream_enc_regs[] = { - hpo_dp_stream_encoder_reg_list(0), - hpo_dp_stream_encoder_reg_list(1), - hpo_dp_stream_encoder_reg_list(2), - hpo_dp_stream_encoder_reg_list(3) -}; - -static const struct dcn31_hpo_dp_stream_encoder_shift hpo_dp_se_shift = { - DCN3_1_HPO_DP_STREAM_ENC_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn31_hpo_dp_stream_encoder_mask hpo_dp_se_mask = { - DCN3_1_HPO_DP_STREAM_ENC_MASK_SH_LIST(_MASK) -}; - - -#define hpo_dp_link_encoder_reg_list(id)\ -[id] = {\ - DCN3_1_HPO_DP_LINK_ENC_REG_LIST(id),\ - DCN3_1_RDPCSTX_REG_LIST(0),\ - DCN3_1_RDPCSTX_REG_LIST(1),\ - DCN3_1_RDPCSTX_REG_LIST(2),\ -} - -static const struct dcn31_hpo_dp_link_encoder_registers hpo_dp_link_enc_regs[] = { - hpo_dp_link_encoder_reg_list(0), - hpo_dp_link_encoder_reg_list(1), -}; - -static const struct dcn31_hpo_dp_link_encoder_shift hpo_dp_le_shift = { - DCN3_1_HPO_DP_LINK_ENC_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn31_hpo_dp_link_encoder_mask hpo_dp_le_mask = { - DCN3_1_HPO_DP_LINK_ENC_MASK_SH_LIST(_MASK) -}; - -#define dpp_regs(id)\ -[id] = {\ - DPP_REG_LIST_DCN30(id),\ -} - -static const struct dcn3_dpp_registers dpp_regs[] = { - dpp_regs(0), - dpp_regs(1), - dpp_regs(2), - dpp_regs(3) -}; - -static const struct dcn3_dpp_shift tf_shift = { - DPP_REG_LIST_SH_MASK_DCN30(__SHIFT) -}; - -static const struct dcn3_dpp_mask tf_mask = { - DPP_REG_LIST_SH_MASK_DCN30(_MASK) -}; - -#define opp_regs(id)\ -[id] = {\ - OPP_REG_LIST_DCN30(id),\ -} - -static const struct dcn20_opp_registers opp_regs[] = { - opp_regs(0), - opp_regs(1), - opp_regs(2), - opp_regs(3) -}; - -static const struct dcn20_opp_shift opp_shift = { - OPP_MASK_SH_LIST_DCN20(__SHIFT) -}; - -static const struct dcn20_opp_mask opp_mask = { - OPP_MASK_SH_LIST_DCN20(_MASK) -}; - -#define aux_engine_regs(id)\ -[id] = {\ - AUX_COMMON_REG_LIST0(id), \ - .AUXN_IMPCAL = 0, \ - .AUXP_IMPCAL = 0, \ - .AUX_RESET_MASK = DP_AUX0_AUX_CONTROL__AUX_RESET_MASK, \ -} - -static const struct dce110_aux_registers aux_engine_regs[] = { - aux_engine_regs(0), - aux_engine_regs(1), - aux_engine_regs(2), - aux_engine_regs(3), - aux_engine_regs(4) -}; - -#define dwbc_regs_dcn3(id)\ -[id] = {\ - DWBC_COMMON_REG_LIST_DCN30(id),\ -} - -static const struct dcn30_dwbc_registers dwbc30_regs[] = { - dwbc_regs_dcn3(0), -}; - -static const struct dcn30_dwbc_shift dwbc30_shift = { - DWBC_COMMON_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dcn30_dwbc_mask dwbc30_mask = { - DWBC_COMMON_MASK_SH_LIST_DCN30(_MASK) -}; - -#define mcif_wb_regs_dcn3(id)\ -[id] = {\ - MCIF_WB_COMMON_REG_LIST_DCN30(id),\ -} - -static const struct dcn30_mmhubbub_registers mcif_wb30_regs[] = { - mcif_wb_regs_dcn3(0) -}; - -static const struct dcn30_mmhubbub_shift mcif_wb30_shift = { - MCIF_WB_COMMON_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dcn30_mmhubbub_mask mcif_wb30_mask = { - MCIF_WB_COMMON_MASK_SH_LIST_DCN30(_MASK) -}; - -#define dsc_regsDCN314(id)\ -[id] = {\ - DSC_REG_LIST_DCN20(id)\ -} - -static const struct dcn20_dsc_registers dsc_regs[] = { - dsc_regsDCN314(0), - dsc_regsDCN314(1), - dsc_regsDCN314(2), - dsc_regsDCN314(3) -}; - -static const struct dcn20_dsc_shift dsc_shift = { - DSC_REG_LIST_SH_MASK_DCN20(__SHIFT) -}; - -static const struct dcn20_dsc_mask dsc_mask = { - DSC_REG_LIST_SH_MASK_DCN20(_MASK) -}; - -static const struct dcn30_mpc_registers mpc_regs = { - MPC_REG_LIST_DCN3_0(0), - MPC_REG_LIST_DCN3_0(1), - MPC_REG_LIST_DCN3_0(2), - MPC_REG_LIST_DCN3_0(3), - MPC_OUT_MUX_REG_LIST_DCN3_0(0), - MPC_OUT_MUX_REG_LIST_DCN3_0(1), - MPC_OUT_MUX_REG_LIST_DCN3_0(2), - MPC_OUT_MUX_REG_LIST_DCN3_0(3), - MPC_RMU_GLOBAL_REG_LIST_DCN3AG, - MPC_RMU_REG_LIST_DCN3AG(0), - MPC_RMU_REG_LIST_DCN3AG(1), - //MPC_RMU_REG_LIST_DCN3AG(2), - MPC_DWB_MUX_REG_LIST_DCN3_0(0), -}; - -static const struct dcn30_mpc_shift mpc_shift = { - MPC_COMMON_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dcn30_mpc_mask mpc_mask = { - MPC_COMMON_MASK_SH_LIST_DCN30(_MASK) -}; - -#define optc_regs(id)\ -[id] = {OPTC_COMMON_REG_LIST_DCN3_14(id)} - -static const struct dcn_optc_registers optc_regs[] = { - optc_regs(0), - optc_regs(1), - optc_regs(2), - optc_regs(3) -}; - -static const struct dcn_optc_shift optc_shift = { - OPTC_COMMON_MASK_SH_LIST_DCN3_14(__SHIFT) -}; - -static const struct dcn_optc_mask optc_mask = { - OPTC_COMMON_MASK_SH_LIST_DCN3_14(_MASK) -}; - -#define hubp_regs(id)\ -[id] = {\ - HUBP_REG_LIST_DCN30(id)\ -} - -static const struct dcn_hubp2_registers hubp_regs[] = { - hubp_regs(0), - hubp_regs(1), - hubp_regs(2), - hubp_regs(3) -}; - - -static const struct dcn_hubp2_shift hubp_shift = { - HUBP_MASK_SH_LIST_DCN31(__SHIFT) -}; - -static const struct dcn_hubp2_mask hubp_mask = { - HUBP_MASK_SH_LIST_DCN31(_MASK) -}; -static const struct dcn_hubbub_registers hubbub_reg = { - HUBBUB_REG_LIST_DCN31(0) -}; - -static const struct dcn_hubbub_shift hubbub_shift = { - HUBBUB_MASK_SH_LIST_DCN31(__SHIFT) -}; - -static const struct dcn_hubbub_mask hubbub_mask = { - HUBBUB_MASK_SH_LIST_DCN31(_MASK) -}; - -static const struct dccg_registers dccg_regs = { - DCCG_REG_LIST_DCN314() -}; - -static const struct dccg_shift dccg_shift = { - DCCG_MASK_SH_LIST_DCN314(__SHIFT) -}; - -static const struct dccg_mask dccg_mask = { - DCCG_MASK_SH_LIST_DCN314(_MASK) -}; - - -#define SRII2(reg_name_pre, reg_name_post, id)\ - .reg_name_pre ## _ ## reg_name_post[id] = BASE(reg ## reg_name_pre \ - ## id ## _ ## reg_name_post ## _BASE_IDX) + \ - reg ## reg_name_pre ## id ## _ ## reg_name_post - - -#define HWSEQ_DCN31_REG_LIST()\ - SR(DCHUBBUB_GLOBAL_TIMER_CNTL), \ - SR(DCHUBBUB_ARB_HOSTVM_CNTL), \ - SR(DIO_MEM_PWR_CTRL), \ - SR(ODM_MEM_PWR_CTRL3), \ - SR(DMU_MEM_PWR_CNTL), \ - SR(MMHUBBUB_MEM_PWR_CNTL), \ - SR(DCCG_GATE_DISABLE_CNTL), \ - SR(DCCG_GATE_DISABLE_CNTL2), \ - SR(DCFCLK_CNTL),\ - SR(DC_MEM_GLOBAL_PWR_REQ_CNTL), \ - SRII(PIXEL_RATE_CNTL, OTG, 0), \ - SRII(PIXEL_RATE_CNTL, OTG, 1),\ - SRII(PIXEL_RATE_CNTL, OTG, 2),\ - SRII(PIXEL_RATE_CNTL, OTG, 3),\ - SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 0),\ - SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 1),\ - SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 2),\ - SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 3),\ - SR(MICROSECOND_TIME_BASE_DIV), \ - SR(MILLISECOND_TIME_BASE_DIV), \ - SR(DISPCLK_FREQ_CHANGE_CNTL), \ - SR(RBBMIF_TIMEOUT_DIS), \ - SR(RBBMIF_TIMEOUT_DIS_2), \ - SR(DCHUBBUB_CRC_CTRL), \ - SR(DPP_TOP0_DPP_CRC_CTRL), \ - SR(DPP_TOP0_DPP_CRC_VAL_B_A), \ - SR(DPP_TOP0_DPP_CRC_VAL_R_G), \ - SR(MPC_CRC_CTRL), \ - SR(MPC_CRC_RESULT_GB), \ - SR(MPC_CRC_RESULT_C), \ - SR(MPC_CRC_RESULT_AR), \ - SR(DOMAIN0_PG_CONFIG), \ - SR(DOMAIN1_PG_CONFIG), \ - SR(DOMAIN2_PG_CONFIG), \ - SR(DOMAIN3_PG_CONFIG), \ - SR(DOMAIN16_PG_CONFIG), \ - SR(DOMAIN17_PG_CONFIG), \ - SR(DOMAIN18_PG_CONFIG), \ - SR(DOMAIN19_PG_CONFIG), \ - SR(DOMAIN0_PG_STATUS), \ - SR(DOMAIN1_PG_STATUS), \ - SR(DOMAIN2_PG_STATUS), \ - SR(DOMAIN3_PG_STATUS), \ - SR(DOMAIN16_PG_STATUS), \ - SR(DOMAIN17_PG_STATUS), \ - SR(DOMAIN18_PG_STATUS), \ - SR(DOMAIN19_PG_STATUS), \ - SR(D1VGA_CONTROL), \ - SR(D2VGA_CONTROL), \ - SR(D3VGA_CONTROL), \ - SR(D4VGA_CONTROL), \ - SR(D5VGA_CONTROL), \ - SR(D6VGA_CONTROL), \ - SR(DC_IP_REQUEST_CNTL), \ - SR(AZALIA_AUDIO_DTO), \ - SR(AZALIA_CONTROLLER_CLOCK_GATING), \ - SR(HPO_TOP_HW_CONTROL) - -static const struct dce_hwseq_registers hwseq_reg = { - HWSEQ_DCN31_REG_LIST() -}; - -#define HWSEQ_DCN31_MASK_SH_LIST(mask_sh)\ - HWSEQ_DCN_MASK_SH_LIST(mask_sh), \ - HWS_SF(, DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_REFDIV, mask_sh), \ - HWS_SF(, DCHUBBUB_ARB_HOSTVM_CNTL, DISABLE_HOSTVM_FORCE_ALLOW_PSTATE, mask_sh), \ - HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN1_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN1_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN2_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN2_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN3_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN3_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN16_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN16_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN17_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN17_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN18_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN18_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN19_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN19_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN0_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN1_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN2_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN3_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN16_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN17_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN18_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN19_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DC_IP_REQUEST_CNTL, IP_REQUEST_EN, mask_sh), \ - HWS_SF(, AZALIA_AUDIO_DTO, AZALIA_AUDIO_DTO_MODULE, mask_sh), \ - HWS_SF(, HPO_TOP_CLOCK_CONTROL, HPO_HDMISTREAMCLK_G_GATE_DIS, mask_sh), \ - HWS_SF(, DMU_MEM_PWR_CNTL, DMCU_ERAM_MEM_PWR_FORCE, mask_sh), \ - HWS_SF(, ODM_MEM_PWR_CTRL3, ODM_MEM_UNASSIGNED_PWR_MODE, mask_sh), \ - HWS_SF(, ODM_MEM_PWR_CTRL3, ODM_MEM_VBLANK_PWR_MODE, mask_sh), \ - HWS_SF(, MMHUBBUB_MEM_PWR_CNTL, VGA_MEM_PWR_FORCE, mask_sh), \ - HWS_SF(, DIO_MEM_PWR_CTRL, I2C_LIGHT_SLEEP_FORCE, mask_sh), \ - HWS_SF(, HPO_TOP_HW_CONTROL, HPO_IO_EN, mask_sh) - -static const struct dce_hwseq_shift hwseq_shift = { - HWSEQ_DCN31_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_hwseq_mask hwseq_mask = { - HWSEQ_DCN31_MASK_SH_LIST(_MASK) -}; -#define vmid_regs(id)\ -[id] = {\ - DCN20_VMID_REG_LIST(id)\ -} - -static const struct dcn_vmid_registers vmid_regs[] = { - vmid_regs(0), - vmid_regs(1), - vmid_regs(2), - vmid_regs(3), - vmid_regs(4), - vmid_regs(5), - vmid_regs(6), - vmid_regs(7), - vmid_regs(8), - vmid_regs(9), - vmid_regs(10), - vmid_regs(11), - vmid_regs(12), - vmid_regs(13), - vmid_regs(14), - vmid_regs(15) -}; - -static const struct dcn20_vmid_shift vmid_shifts = { - DCN20_VMID_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn20_vmid_mask vmid_masks = { - DCN20_VMID_MASK_SH_LIST(_MASK) -}; - -static const struct resource_caps res_cap_dcn314 = { - .num_timing_generator = 4, - .num_opp = 4, - .num_video_plane = 4, - .num_audio = 5, - .num_stream_encoder = 5, - .num_dig_link_enc = 5, - .num_hpo_dp_stream_encoder = 4, - .num_hpo_dp_link_encoder = 2, - .num_pll = 5, - .num_dwb = 1, - .num_ddc = 5, - .num_vmid = 16, - .num_mpc_3dlut = 2, - .num_dsc = 4, -}; - -static const struct dc_plane_cap plane_cap = { - .type = DC_PLANE_TYPE_DCN_UNIVERSAL, - .per_pixel_alpha = true, - - .pixel_format_support = { - .argb8888 = true, - .nv12 = true, - .fp16 = true, - .p010 = true, - .ayuv = false, - }, - - .max_upscale_factor = { - .argb8888 = 16000, - .nv12 = 16000, - .fp16 = 16000 - }, - - // 6:1 downscaling ratio: 1000/6 = 166.666 - // 4:1 downscaling ratio for ARGB888 to prevent underflow during P010 playback: 1000/4 = 250 - .max_downscale_factor = { - .argb8888 = 250, - .nv12 = 167, - .fp16 = 167 - }, - 64, - 64 -}; - -static const struct dc_debug_options debug_defaults_drv = { - .disable_z10 = false, - .enable_z9_disable_interface = true, - .minimum_z8_residency_time = 2000, - .psr_skip_crtc_disable = true, - .replay_skip_crtc_disabled = true, - .disable_dmcu = true, - .force_abm_enable = false, - .timing_trace = false, - .clock_trace = true, - .disable_dpp_power_gate = false, - .disable_hubp_power_gate = false, - .disable_pplib_clock_request = false, - .pipe_split_policy = MPC_SPLIT_DYNAMIC, - .force_single_disp_pipe_split = false, - .disable_dcc = DCC_ENABLE, - .vsr_support = true, - .performance_trace = false, - .max_downscale_src_width = 4096,/*upto true 4k*/ - .disable_pplib_wm_range = false, - .scl_reset_length10 = true, - .sanity_checks = true, - .underflow_assert_delay_us = 0xFFFFFFFF, - .dwb_fi_phase = -1, // -1 = disable, - .dmub_command_table = true, - .pstate_enabled = true, - .use_max_lb = true, - .enable_mem_low_power = { - .bits = { - .vga = true, - .i2c = true, - .dmcu = false, // This is previously known to cause hang on S3 cycles if enabled - .dscl = true, - .cm = true, - .mpc = true, - .optc = true, - .vpg = true, - .afmt = true, - } - }, - - .root_clock_optimization = { - .bits = { - .dpp = true, - .dsc = true, - .hdmistream = true, - .hdmichar = true, - .dpstream = true, - .symclk32_se = false, - .symclk32_le = true, - .symclk_fe = true, - .physymclk = true, - .dpiasymclk = true, - } - }, - - .seamless_boot_odm_combine = true, - .using_dml2 = false, -}; - -static const struct dc_debug_options debug_defaults_diags = { - .disable_dmcu = true, - .force_abm_enable = false, - .timing_trace = true, - .clock_trace = true, - .disable_dpp_power_gate = true, - .disable_hubp_power_gate = true, - .disable_clock_gate = true, - .disable_pplib_clock_request = true, - .disable_pplib_wm_range = true, - .disable_stutter = false, - .scl_reset_length10 = true, - .dwb_fi_phase = -1, // -1 = disable - .dmub_command_table = true, - .enable_tri_buf = true, - .use_max_lb = true -}; - -static const struct dc_panel_config panel_config_defaults = { - .psr = { - .disable_psr = false, - .disallow_psrsu = false, - .disallow_replay = false, - }, - .ilr = { - .optimize_edp_link_rate = true, - }, -}; - -static void dcn31_dpp_destroy(struct dpp **dpp) -{ - kfree(TO_DCN20_DPP(*dpp)); - *dpp = NULL; -} - -static struct dpp *dcn31_dpp_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dcn3_dpp *dpp = - kzalloc(sizeof(struct dcn3_dpp), GFP_KERNEL); - - if (!dpp) - return NULL; - - if (dpp3_construct(dpp, ctx, inst, - &dpp_regs[inst], &tf_shift, &tf_mask)) - return &dpp->base; - - BREAK_TO_DEBUGGER(); - kfree(dpp); - return NULL; -} - -static struct output_pixel_processor *dcn31_opp_create( - struct dc_context *ctx, uint32_t inst) -{ - struct dcn20_opp *opp = - kzalloc(sizeof(struct dcn20_opp), GFP_KERNEL); - - if (!opp) { - BREAK_TO_DEBUGGER(); - return NULL; - } - - dcn20_opp_construct(opp, ctx, inst, - &opp_regs[inst], &opp_shift, &opp_mask); - return &opp->base; -} - -static struct dce_aux *dcn31_aux_engine_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct aux_engine_dce110 *aux_engine = - kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL); - - if (!aux_engine) - return NULL; - - dce110_aux_engine_construct(aux_engine, ctx, inst, - SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, - &aux_engine_regs[inst], - &aux_mask, - &aux_shift, - ctx->dc->caps.extended_aux_timeout_support); - - return &aux_engine->base; -} -#define i2c_inst_regs(id) { I2C_HW_ENGINE_COMMON_REG_LIST_DCN30(id) } - -static const struct dce_i2c_registers i2c_hw_regs[] = { - i2c_inst_regs(1), - i2c_inst_regs(2), - i2c_inst_regs(3), - i2c_inst_regs(4), - i2c_inst_regs(5), -}; - -static const struct dce_i2c_shift i2c_shifts = { - I2C_COMMON_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dce_i2c_mask i2c_masks = { - I2C_COMMON_MASK_SH_LIST_DCN30(_MASK) -}; - -/* ========================================================== */ - -/* - * DPIA index | Preferred Encoder | Host Router - * 0 | C | 0 - * 1 | First Available | 0 - * 2 | D | 1 - * 3 | First Available | 1 - */ -/* ========================================================== */ -static const enum engine_id dpia_to_preferred_enc_id_table[] = { - ENGINE_ID_DIGC, - ENGINE_ID_DIGC, - ENGINE_ID_DIGD, - ENGINE_ID_DIGD -}; - -static enum engine_id dcn314_get_preferred_eng_id_dpia(unsigned int dpia_index) -{ - return dpia_to_preferred_enc_id_table[dpia_index]; -} - -static struct dce_i2c_hw *dcn31_i2c_hw_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dce_i2c_hw *dce_i2c_hw = - kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL); - - if (!dce_i2c_hw) - return NULL; - - dcn2_i2c_hw_construct(dce_i2c_hw, ctx, inst, - &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks); - - return dce_i2c_hw; -} -static struct mpc *dcn31_mpc_create( - struct dc_context *ctx, - int num_mpcc, - int num_rmu) -{ - struct dcn30_mpc *mpc30 = kzalloc(sizeof(struct dcn30_mpc), - GFP_KERNEL); - - if (!mpc30) - return NULL; - - dcn30_mpc_construct(mpc30, ctx, - &mpc_regs, - &mpc_shift, - &mpc_mask, - num_mpcc, - num_rmu); - - return &mpc30->base; -} - -static struct hubbub *dcn31_hubbub_create(struct dc_context *ctx) -{ - int i; - - struct dcn20_hubbub *hubbub3 = kzalloc(sizeof(struct dcn20_hubbub), - GFP_KERNEL); - - if (!hubbub3) - return NULL; - - hubbub31_construct(hubbub3, ctx, - &hubbub_reg, - &hubbub_shift, - &hubbub_mask, - dcn3_14_ip.det_buffer_size_kbytes, - dcn3_14_ip.pixel_chunk_size_kbytes, - dcn3_14_ip.config_return_buffer_size_in_kbytes); - - - for (i = 0; i < res_cap_dcn314.num_vmid; i++) { - struct dcn20_vmid *vmid = &hubbub3->vmid[i]; - - vmid->ctx = ctx; - - vmid->regs = &vmid_regs[i]; - vmid->shifts = &vmid_shifts; - vmid->masks = &vmid_masks; - } - - return &hubbub3->base; -} - -static struct timing_generator *dcn31_timing_generator_create( - struct dc_context *ctx, - uint32_t instance) -{ - struct optc *tgn10 = - kzalloc(sizeof(struct optc), GFP_KERNEL); - - if (!tgn10) - return NULL; - - tgn10->base.inst = instance; - tgn10->base.ctx = ctx; - - tgn10->tg_regs = &optc_regs[instance]; - tgn10->tg_shift = &optc_shift; - tgn10->tg_mask = &optc_mask; - - dcn314_timing_generator_init(tgn10); - - return &tgn10->base; -} - -static const struct encoder_feature_support link_enc_feature = { - .max_hdmi_deep_color = COLOR_DEPTH_121212, - .max_hdmi_pixel_clock = 600000, - .hdmi_ycbcr420_supported = true, - .dp_ycbcr420_supported = true, - .fec_supported = true, - .flags.bits.IS_HBR2_CAPABLE = true, - .flags.bits.IS_HBR3_CAPABLE = true, - .flags.bits.IS_TPS3_CAPABLE = true, - .flags.bits.IS_TPS4_CAPABLE = true -}; - -static struct link_encoder *dcn31_link_encoder_create( - struct dc_context *ctx, - const struct encoder_init_data *enc_init_data) -{ - struct dcn20_link_encoder *enc20 = - kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL); - - if (!enc20) - return NULL; - - dcn31_link_encoder_construct(enc20, - enc_init_data, - &link_enc_feature, - &link_enc_regs[enc_init_data->transmitter], - &link_enc_aux_regs[enc_init_data->channel - 1], - &link_enc_hpd_regs[enc_init_data->hpd_source], - &le_shift, - &le_mask); - - return &enc20->enc10.base; -} - -/* Create a minimal link encoder object not associated with a particular - * physical connector. - * resource_funcs.link_enc_create_minimal - */ -static struct link_encoder *dcn31_link_enc_create_minimal( - struct dc_context *ctx, enum engine_id eng_id) -{ - struct dcn20_link_encoder *enc20; - - if ((eng_id - ENGINE_ID_DIGA) > ctx->dc->res_pool->res_cap->num_dig_link_enc) - return NULL; - - enc20 = kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL); - if (!enc20) - return NULL; - - dcn31_link_encoder_construct_minimal( - enc20, - ctx, - &link_enc_feature, - &link_enc_regs[eng_id - ENGINE_ID_DIGA], - eng_id); - - return &enc20->enc10.base; -} - -static struct panel_cntl *dcn31_panel_cntl_create(const struct panel_cntl_init_data *init_data) -{ - struct dcn31_panel_cntl *panel_cntl = - kzalloc(sizeof(struct dcn31_panel_cntl), GFP_KERNEL); - - if (!panel_cntl) - return NULL; - - dcn31_panel_cntl_construct(panel_cntl, init_data); - - return &panel_cntl->base; -} - -static void read_dce_straps( - struct dc_context *ctx, - struct resource_straps *straps) -{ - generic_reg_get(ctx, regDC_PINSTRAPS + BASE(regDC_PINSTRAPS_BASE_IDX), - FN(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO), &straps->dc_pinstraps_audio); - -} - -static struct audio *dcn31_create_audio( - struct dc_context *ctx, unsigned int inst) -{ - return dce_audio_create(ctx, inst, - &audio_regs[inst], &audio_shift, &audio_mask); -} - -static struct vpg *dcn31_vpg_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dcn31_vpg *vpg31 = kzalloc(sizeof(struct dcn31_vpg), GFP_KERNEL); - - if (!vpg31) - return NULL; - - vpg31_construct(vpg31, ctx, inst, - &vpg_regs[inst], - &vpg_shift, - &vpg_mask); - - return &vpg31->base; -} - -static struct afmt *dcn31_afmt_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dcn31_afmt *afmt31 = kzalloc(sizeof(struct dcn31_afmt), GFP_KERNEL); - - if (!afmt31) - return NULL; - - afmt31_construct(afmt31, ctx, inst, - &afmt_regs[inst], - &afmt_shift, - &afmt_mask); - - // Light sleep by default, no need to power down here - - return &afmt31->base; -} - -static struct apg *dcn31_apg_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dcn31_apg *apg31 = kzalloc(sizeof(struct dcn31_apg), GFP_KERNEL); - - if (!apg31) - return NULL; - - apg31_construct(apg31, ctx, inst, - &apg_regs[inst], - &apg_shift, - &apg_mask); - - return &apg31->base; -} - -static struct stream_encoder *dcn314_stream_encoder_create( - enum engine_id eng_id, - struct dc_context *ctx) -{ - struct dcn10_stream_encoder *enc1; - struct vpg *vpg; - struct afmt *afmt; - int vpg_inst; - int afmt_inst; - - /* Mapping of VPG, AFMT, DME register blocks to DIO block instance */ - if (eng_id < ENGINE_ID_DIGF) { - vpg_inst = eng_id; - afmt_inst = eng_id; - } else - return NULL; - - enc1 = kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL); - vpg = dcn31_vpg_create(ctx, vpg_inst); - afmt = dcn31_afmt_create(ctx, afmt_inst); - - if (!enc1 || !vpg || !afmt) { - kfree(enc1); - kfree(vpg); - kfree(afmt); - return NULL; - } - - dcn314_dio_stream_encoder_construct(enc1, ctx, ctx->dc_bios, - eng_id, vpg, afmt, - &stream_enc_regs[eng_id], - &se_shift, &se_mask); - - return &enc1->base; -} - -static struct hpo_dp_stream_encoder *dcn31_hpo_dp_stream_encoder_create( - enum engine_id eng_id, - struct dc_context *ctx) -{ - struct dcn31_hpo_dp_stream_encoder *hpo_dp_enc31; - struct vpg *vpg; - struct apg *apg; - uint32_t hpo_dp_inst; - uint32_t vpg_inst; - uint32_t apg_inst; - - ASSERT((eng_id >= ENGINE_ID_HPO_DP_0) && (eng_id <= ENGINE_ID_HPO_DP_3)); - hpo_dp_inst = eng_id - ENGINE_ID_HPO_DP_0; - - /* Mapping of VPG register blocks to HPO DP block instance: - * VPG[6] -> HPO_DP[0] - * VPG[7] -> HPO_DP[1] - * VPG[8] -> HPO_DP[2] - * VPG[9] -> HPO_DP[3] - */ - //Uses offset index 5-8, but actually maps to vpg_inst 6-9 - vpg_inst = hpo_dp_inst + 5; - - /* Mapping of APG register blocks to HPO DP block instance: - * APG[0] -> HPO_DP[0] - * APG[1] -> HPO_DP[1] - * APG[2] -> HPO_DP[2] - * APG[3] -> HPO_DP[3] - */ - apg_inst = hpo_dp_inst; - - /* allocate HPO stream encoder and create VPG sub-block */ - hpo_dp_enc31 = kzalloc(sizeof(struct dcn31_hpo_dp_stream_encoder), GFP_KERNEL); - vpg = dcn31_vpg_create(ctx, vpg_inst); - apg = dcn31_apg_create(ctx, apg_inst); - - if (!hpo_dp_enc31 || !vpg || !apg) { - kfree(hpo_dp_enc31); - kfree(vpg); - kfree(apg); - return NULL; - } - - dcn31_hpo_dp_stream_encoder_construct(hpo_dp_enc31, ctx, ctx->dc_bios, - hpo_dp_inst, eng_id, vpg, apg, - &hpo_dp_stream_enc_regs[hpo_dp_inst], - &hpo_dp_se_shift, &hpo_dp_se_mask); - - return &hpo_dp_enc31->base; -} - -static struct hpo_dp_link_encoder *dcn31_hpo_dp_link_encoder_create( - uint8_t inst, - struct dc_context *ctx) -{ - struct dcn31_hpo_dp_link_encoder *hpo_dp_enc31; - - /* allocate HPO link encoder */ - hpo_dp_enc31 = kzalloc(sizeof(struct dcn31_hpo_dp_link_encoder), GFP_KERNEL); - - hpo_dp_link_encoder31_construct(hpo_dp_enc31, ctx, inst, - &hpo_dp_link_enc_regs[inst], - &hpo_dp_le_shift, &hpo_dp_le_mask); - - return &hpo_dp_enc31->base; -} - -static struct dce_hwseq *dcn314_hwseq_create( - struct dc_context *ctx) -{ - struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL); - - if (hws) { - hws->ctx = ctx; - hws->regs = &hwseq_reg; - hws->shifts = &hwseq_shift; - hws->masks = &hwseq_mask; - } - return hws; -} -static const struct resource_create_funcs res_create_funcs = { - .read_dce_straps = read_dce_straps, - .create_audio = dcn31_create_audio, - .create_stream_encoder = dcn314_stream_encoder_create, - .create_hpo_dp_stream_encoder = dcn31_hpo_dp_stream_encoder_create, - .create_hpo_dp_link_encoder = dcn31_hpo_dp_link_encoder_create, - .create_hwseq = dcn314_hwseq_create, -}; - -static void dcn314_resource_destruct(struct dcn314_resource_pool *pool) -{ - unsigned int i; - - for (i = 0; i < pool->base.stream_enc_count; i++) { - if (pool->base.stream_enc[i] != NULL) { - if (pool->base.stream_enc[i]->vpg != NULL) { - kfree(DCN30_VPG_FROM_VPG(pool->base.stream_enc[i]->vpg)); - pool->base.stream_enc[i]->vpg = NULL; - } - if (pool->base.stream_enc[i]->afmt != NULL) { - kfree(DCN30_AFMT_FROM_AFMT(pool->base.stream_enc[i]->afmt)); - pool->base.stream_enc[i]->afmt = NULL; - } - kfree(DCN10STRENC_FROM_STRENC(pool->base.stream_enc[i])); - pool->base.stream_enc[i] = NULL; - } - } - - for (i = 0; i < pool->base.hpo_dp_stream_enc_count; i++) { - if (pool->base.hpo_dp_stream_enc[i] != NULL) { - if (pool->base.hpo_dp_stream_enc[i]->vpg != NULL) { - kfree(DCN30_VPG_FROM_VPG(pool->base.hpo_dp_stream_enc[i]->vpg)); - pool->base.hpo_dp_stream_enc[i]->vpg = NULL; - } - if (pool->base.hpo_dp_stream_enc[i]->apg != NULL) { - kfree(DCN31_APG_FROM_APG(pool->base.hpo_dp_stream_enc[i]->apg)); - pool->base.hpo_dp_stream_enc[i]->apg = NULL; - } - kfree(DCN3_1_HPO_DP_STREAM_ENC_FROM_HPO_STREAM_ENC(pool->base.hpo_dp_stream_enc[i])); - pool->base.hpo_dp_stream_enc[i] = NULL; - } - } - - for (i = 0; i < pool->base.hpo_dp_link_enc_count; i++) { - if (pool->base.hpo_dp_link_enc[i] != NULL) { - kfree(DCN3_1_HPO_DP_LINK_ENC_FROM_HPO_LINK_ENC(pool->base.hpo_dp_link_enc[i])); - pool->base.hpo_dp_link_enc[i] = NULL; - } - } - - for (i = 0; i < pool->base.res_cap->num_dsc; i++) { - if (pool->base.dscs[i] != NULL) - dcn20_dsc_destroy(&pool->base.dscs[i]); - } - - if (pool->base.mpc != NULL) { - kfree(TO_DCN20_MPC(pool->base.mpc)); - pool->base.mpc = NULL; - } - if (pool->base.hubbub != NULL) { - kfree(pool->base.hubbub); - pool->base.hubbub = NULL; - } - for (i = 0; i < pool->base.pipe_count; i++) { - if (pool->base.dpps[i] != NULL) - dcn31_dpp_destroy(&pool->base.dpps[i]); - - if (pool->base.ipps[i] != NULL) - pool->base.ipps[i]->funcs->ipp_destroy(&pool->base.ipps[i]); - - if (pool->base.hubps[i] != NULL) { - kfree(TO_DCN20_HUBP(pool->base.hubps[i])); - pool->base.hubps[i] = NULL; - } - - if (pool->base.irqs != NULL) - dal_irq_service_destroy(&pool->base.irqs); - } - - for (i = 0; i < pool->base.res_cap->num_ddc; i++) { - if (pool->base.engines[i] != NULL) - dce110_engine_destroy(&pool->base.engines[i]); - if (pool->base.hw_i2cs[i] != NULL) { - kfree(pool->base.hw_i2cs[i]); - pool->base.hw_i2cs[i] = NULL; - } - if (pool->base.sw_i2cs[i] != NULL) { - kfree(pool->base.sw_i2cs[i]); - pool->base.sw_i2cs[i] = NULL; - } - } - - for (i = 0; i < pool->base.res_cap->num_opp; i++) { - if (pool->base.opps[i] != NULL) - pool->base.opps[i]->funcs->opp_destroy(&pool->base.opps[i]); - } - - for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { - if (pool->base.timing_generators[i] != NULL) { - kfree(DCN10TG_FROM_TG(pool->base.timing_generators[i])); - pool->base.timing_generators[i] = NULL; - } - } - - for (i = 0; i < pool->base.res_cap->num_dwb; i++) { - if (pool->base.dwbc[i] != NULL) { - kfree(TO_DCN30_DWBC(pool->base.dwbc[i])); - pool->base.dwbc[i] = NULL; - } - if (pool->base.mcif_wb[i] != NULL) { - kfree(TO_DCN30_MMHUBBUB(pool->base.mcif_wb[i])); - pool->base.mcif_wb[i] = NULL; - } - } - - for (i = 0; i < pool->base.audio_count; i++) { - if (pool->base.audios[i]) - dce_aud_destroy(&pool->base.audios[i]); - } - - for (i = 0; i < pool->base.clk_src_count; i++) { - if (pool->base.clock_sources[i] != NULL) { - dcn20_clock_source_destroy(&pool->base.clock_sources[i]); - pool->base.clock_sources[i] = NULL; - } - } - - for (i = 0; i < pool->base.res_cap->num_mpc_3dlut; i++) { - if (pool->base.mpc_lut[i] != NULL) { - dc_3dlut_func_release(pool->base.mpc_lut[i]); - pool->base.mpc_lut[i] = NULL; - } - if (pool->base.mpc_shaper[i] != NULL) { - dc_transfer_func_release(pool->base.mpc_shaper[i]); - pool->base.mpc_shaper[i] = NULL; - } - } - - if (pool->base.dp_clock_source != NULL) { - dcn20_clock_source_destroy(&pool->base.dp_clock_source); - pool->base.dp_clock_source = NULL; - } - - for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { - if (pool->base.multiple_abms[i] != NULL) - dce_abm_destroy(&pool->base.multiple_abms[i]); - } - - if (pool->base.psr != NULL) - dmub_psr_destroy(&pool->base.psr); - - if (pool->base.replay != NULL) - dmub_replay_destroy(&pool->base.replay); - - if (pool->base.dccg != NULL) - dcn_dccg_destroy(&pool->base.dccg); -} - -static struct hubp *dcn31_hubp_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dcn20_hubp *hubp2 = - kzalloc(sizeof(struct dcn20_hubp), GFP_KERNEL); - - if (!hubp2) - return NULL; - - if (hubp31_construct(hubp2, ctx, inst, - &hubp_regs[inst], &hubp_shift, &hubp_mask)) - return &hubp2->base; - - BREAK_TO_DEBUGGER(); - kfree(hubp2); - return NULL; -} - -static bool dcn31_dwbc_create(struct dc_context *ctx, struct resource_pool *pool) -{ - int i; - uint32_t pipe_count = pool->res_cap->num_dwb; - - for (i = 0; i < pipe_count; i++) { - struct dcn30_dwbc *dwbc30 = kzalloc(sizeof(struct dcn30_dwbc), - GFP_KERNEL); - - if (!dwbc30) { - dm_error("DC: failed to create dwbc30!\n"); - return false; - } - - dcn30_dwbc_construct(dwbc30, ctx, - &dwbc30_regs[i], - &dwbc30_shift, - &dwbc30_mask, - i); - - pool->dwbc[i] = &dwbc30->base; - } - return true; -} - -static bool dcn31_mmhubbub_create(struct dc_context *ctx, struct resource_pool *pool) -{ - int i; - uint32_t pipe_count = pool->res_cap->num_dwb; - - for (i = 0; i < pipe_count; i++) { - struct dcn30_mmhubbub *mcif_wb30 = kzalloc(sizeof(struct dcn30_mmhubbub), - GFP_KERNEL); - - if (!mcif_wb30) { - dm_error("DC: failed to create mcif_wb30!\n"); - return false; - } - - dcn30_mmhubbub_construct(mcif_wb30, ctx, - &mcif_wb30_regs[i], - &mcif_wb30_shift, - &mcif_wb30_mask, - i); - - pool->mcif_wb[i] = &mcif_wb30->base; - } - return true; -} - -static struct display_stream_compressor *dcn314_dsc_create( - struct dc_context *ctx, uint32_t inst) -{ - struct dcn20_dsc *dsc = - kzalloc(sizeof(struct dcn20_dsc), GFP_KERNEL); - - if (!dsc) { - BREAK_TO_DEBUGGER(); - return NULL; - } - - dsc2_construct(dsc, ctx, inst, &dsc_regs[inst], &dsc_shift, &dsc_mask); - return &dsc->base; -} - -static void dcn314_destroy_resource_pool(struct resource_pool **pool) -{ - struct dcn314_resource_pool *dcn314_pool = TO_DCN314_RES_POOL(*pool); - - dcn314_resource_destruct(dcn314_pool); - kfree(dcn314_pool); - *pool = NULL; -} - -static struct clock_source *dcn31_clock_source_create( - struct dc_context *ctx, - struct dc_bios *bios, - enum clock_source_id id, - const struct dce110_clk_src_regs *regs, - bool dp_clk_src) -{ - struct dce110_clk_src *clk_src = - kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL); - - if (!clk_src) - return NULL; - - if (dcn31_clk_src_construct(clk_src, ctx, bios, id, - regs, &cs_shift, &cs_mask)) { - clk_src->base.dp_clk_src = dp_clk_src; - return &clk_src->base; - } - - BREAK_TO_DEBUGGER(); - kfree(clk_src); - return NULL; -} - -static int dcn314_populate_dml_pipes_from_context( - struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes, - bool fast_validate) -{ - int pipe_cnt; - - DC_FP_START(); - pipe_cnt = dcn314_populate_dml_pipes_from_context_fpu(dc, context, pipes, fast_validate); - DC_FP_END(); - - return pipe_cnt; -} - -static struct dc_cap_funcs cap_funcs = { - .get_dcc_compression_cap = dcn20_get_dcc_compression_cap -}; - -static void dcn314_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) -{ - DC_FP_START(); - dcn314_update_bw_bounding_box_fpu(dc, bw_params); - DC_FP_END(); -} - -static void dcn314_get_panel_config_defaults(struct dc_panel_config *panel_config) -{ - *panel_config = panel_config_defaults; -} - -static bool filter_modes_for_single_channel_workaround(struct dc *dc, - struct dc_state *context) -{ - // Filter 2K@240Hz+8K@24fps above combination timing if memory only has single dimm LPDDR - if (dc->clk_mgr->bw_params->vram_type == 34 && - dc->clk_mgr->bw_params->num_channels < 2 && - context->stream_count > 1) { - int total_phy_pix_clk = 0; - - for (int i = 0; i < context->stream_count; i++) - if (context->res_ctx.pipe_ctx[i].stream) - total_phy_pix_clk += context->res_ctx.pipe_ctx[i].stream->phy_pix_clk; - - if (total_phy_pix_clk >= (1148928+826260)) //2K@240Hz+8K@24fps - return true; - } - return false; -} - -bool dcn314_validate_bandwidth(struct dc *dc, - struct dc_state *context, - bool fast_validate) -{ - bool out = false; - - BW_VAL_TRACE_SETUP(); - - int vlevel = 0; - int pipe_cnt = 0; - display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_KERNEL); - DC_LOGGER_INIT(dc->ctx->logger); - - BW_VAL_TRACE_COUNT(); - - if (filter_modes_for_single_channel_workaround(dc, context)) - goto validate_fail; - - DC_FP_START(); - // do not support self refresh only - out = dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate, false); - DC_FP_END(); - - // Disable fast_validate to set min dcfclk in calculate_wm_and_dlg - if (pipe_cnt == 0) - fast_validate = false; - - if (!out) - goto validate_fail; - - BW_VAL_TRACE_END_VOLTAGE_LEVEL(); - - if (fast_validate) { - BW_VAL_TRACE_SKIP(fast); - goto validate_out; - } - if (dc->res_pool->funcs->calculate_wm_and_dlg) - dc->res_pool->funcs->calculate_wm_and_dlg(dc, context, pipes, pipe_cnt, vlevel); - - BW_VAL_TRACE_END_WATERMARKS(); - - goto validate_out; - -validate_fail: - DC_LOG_WARNING("Mode Validation Warning: %s failed validation.\n", - dml_get_status_message(context->bw_ctx.dml.vba.ValidationStatus[context->bw_ctx.dml.vba.soc.num_states])); - - BW_VAL_TRACE_SKIP(fail); - out = false; - -validate_out: - kfree(pipes); - - BW_VAL_TRACE_FINISH(); - - return out; -} - -static struct resource_funcs dcn314_res_pool_funcs = { - .destroy = dcn314_destroy_resource_pool, - .link_enc_create = dcn31_link_encoder_create, - .link_enc_create_minimal = dcn31_link_enc_create_minimal, - .link_encs_assign = link_enc_cfg_link_encs_assign, - .link_enc_unassign = link_enc_cfg_link_enc_unassign, - .panel_cntl_create = dcn31_panel_cntl_create, - .validate_bandwidth = dcn314_validate_bandwidth, - .calculate_wm_and_dlg = dcn31_calculate_wm_and_dlg, - .update_soc_for_wm_a = dcn31_update_soc_for_wm_a, - .populate_dml_pipes = dcn314_populate_dml_pipes_from_context, - .acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer, - .release_pipe = dcn20_release_pipe, - .add_stream_to_ctx = dcn30_add_stream_to_ctx, - .add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource, - .remove_stream_from_ctx = dcn20_remove_stream_from_ctx, - .populate_dml_writeback_from_context = dcn30_populate_dml_writeback_from_context, - .set_mcif_arb_params = dcn30_set_mcif_arb_params, - .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link, - .acquire_post_bldn_3dlut = dcn30_acquire_post_bldn_3dlut, - .release_post_bldn_3dlut = dcn30_release_post_bldn_3dlut, - .update_bw_bounding_box = dcn314_update_bw_bounding_box, - .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, - .get_panel_config_defaults = dcn314_get_panel_config_defaults, - .get_preferred_eng_id_dpia = dcn314_get_preferred_eng_id_dpia, -}; - -static struct clock_source *dcn30_clock_source_create( - struct dc_context *ctx, - struct dc_bios *bios, - enum clock_source_id id, - const struct dce110_clk_src_regs *regs, - bool dp_clk_src) -{ - struct dce110_clk_src *clk_src = - kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL); - - if (!clk_src) - return NULL; - - if (dcn31_clk_src_construct(clk_src, ctx, bios, id, - regs, &cs_shift, &cs_mask)) { - clk_src->base.dp_clk_src = dp_clk_src; - return &clk_src->base; - } - - BREAK_TO_DEBUGGER(); - kfree(clk_src); - return NULL; -} - -static bool dcn314_resource_construct( - uint8_t num_virtual_links, - struct dc *dc, - struct dcn314_resource_pool *pool) -{ - int i; - struct dc_context *ctx = dc->ctx; - struct irq_service_init_data init_data; - - ctx->dc_bios->regs = &bios_regs; - - pool->base.res_cap = &res_cap_dcn314; - pool->base.funcs = &dcn314_res_pool_funcs; - - /************************************************* - * Resource + asic cap harcoding * - *************************************************/ - pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE; - pool->base.pipe_count = pool->base.res_cap->num_timing_generator; - pool->base.mpcc_count = pool->base.res_cap->num_timing_generator; - dc->caps.max_downscale_ratio = 400; - dc->caps.i2c_speed_in_khz = 100; - dc->caps.i2c_speed_in_khz_hdcp = 100; - dc->caps.max_cursor_size = 256; - dc->caps.min_horizontal_blanking_period = 80; - dc->caps.dmdata_alloc_size = 2048; - dc->caps.max_slave_planes = 2; - dc->caps.max_slave_yuv_planes = 2; - dc->caps.max_slave_rgb_planes = 2; - dc->caps.post_blend_color_processing = true; - dc->caps.force_dp_tps4_for_cp2520 = true; - if (dc->config.forceHBR2CP2520) - dc->caps.force_dp_tps4_for_cp2520 = false; - dc->caps.dp_hpo = true; - dc->caps.dp_hdmi21_pcon_support = true; - dc->caps.edp_dsc_support = true; - dc->caps.extended_aux_timeout_support = true; - dc->caps.dmcub_support = true; - dc->caps.is_apu = true; - dc->caps.seamless_odm = true; - - dc->caps.zstate_support = true; - - /* Color pipeline capabilities */ - dc->caps.color.dpp.dcn_arch = 1; - dc->caps.color.dpp.input_lut_shared = 0; - dc->caps.color.dpp.icsc = 1; - dc->caps.color.dpp.dgam_ram = 0; // must use gamma_corr - dc->caps.color.dpp.dgam_rom_caps.srgb = 1; - dc->caps.color.dpp.dgam_rom_caps.bt2020 = 1; - dc->caps.color.dpp.dgam_rom_caps.gamma2_2 = 1; - dc->caps.color.dpp.dgam_rom_caps.pq = 1; - dc->caps.color.dpp.dgam_rom_caps.hlg = 1; - dc->caps.color.dpp.post_csc = 1; - dc->caps.color.dpp.gamma_corr = 1; - dc->caps.color.dpp.dgam_rom_for_yuv = 0; - - dc->caps.color.dpp.hw_3d_lut = 1; - dc->caps.color.dpp.ogam_ram = 1; - // no OGAM ROM on DCN301 - dc->caps.color.dpp.ogam_rom_caps.srgb = 0; - dc->caps.color.dpp.ogam_rom_caps.bt2020 = 0; - dc->caps.color.dpp.ogam_rom_caps.gamma2_2 = 0; - dc->caps.color.dpp.ogam_rom_caps.pq = 0; - dc->caps.color.dpp.ogam_rom_caps.hlg = 0; - dc->caps.color.dpp.ocsc = 0; - - dc->caps.color.mpc.gamut_remap = 1; - dc->caps.color.mpc.num_3dluts = pool->base.res_cap->num_mpc_3dlut; //2 - dc->caps.color.mpc.ogam_ram = 1; - dc->caps.color.mpc.ogam_rom_caps.srgb = 0; - dc->caps.color.mpc.ogam_rom_caps.bt2020 = 0; - dc->caps.color.mpc.ogam_rom_caps.gamma2_2 = 0; - dc->caps.color.mpc.ogam_rom_caps.pq = 0; - dc->caps.color.mpc.ogam_rom_caps.hlg = 0; - dc->caps.color.mpc.ocsc = 1; - - dc->caps.max_disp_clock_khz_at_vmin = 650000; - - /* Use pipe context based otg sync logic */ - dc->config.use_pipe_ctx_sync_logic = true; - - /* read VBIOS LTTPR caps */ - { - if (ctx->dc_bios->funcs->get_lttpr_caps) { - enum bp_result bp_query_result; - uint8_t is_vbios_lttpr_enable = 0; - - bp_query_result = ctx->dc_bios->funcs->get_lttpr_caps(ctx->dc_bios, &is_vbios_lttpr_enable); - dc->caps.vbios_lttpr_enable = (bp_query_result == BP_RESULT_OK) && !!is_vbios_lttpr_enable; - } - - /* interop bit is implicit */ - { - dc->caps.vbios_lttpr_aware = true; - } - } - - if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) - dc->debug = debug_defaults_drv; - else - dc->debug = debug_defaults_diags; - - /* Disable pipe power gating */ - dc->debug.disable_dpp_power_gate = true; - dc->debug.disable_hubp_power_gate = true; - - /* Disable root clock optimization */ - dc->debug.root_clock_optimization.u32All = 0; - - // Init the vm_helper - if (dc->vm_helper) - vm_helper_init(dc->vm_helper, 16); - - /************************************************* - * Create resources * - *************************************************/ - - /* Clock Sources for Pixel Clock*/ - pool->base.clock_sources[DCN31_CLK_SRC_PLL0] = - dcn30_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL0, - &clk_src_regs[0], false); - pool->base.clock_sources[DCN31_CLK_SRC_PLL1] = - dcn30_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL1, - &clk_src_regs[1], false); - pool->base.clock_sources[DCN31_CLK_SRC_PLL2] = - dcn30_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL2, - &clk_src_regs[2], false); - pool->base.clock_sources[DCN31_CLK_SRC_PLL3] = - dcn30_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL3, - &clk_src_regs[3], false); - pool->base.clock_sources[DCN31_CLK_SRC_PLL4] = - dcn30_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL4, - &clk_src_regs[4], false); - - pool->base.clk_src_count = DCN30_CLK_SRC_TOTAL; - - /* todo: not reuse phy_pll registers */ - pool->base.dp_clock_source = - dcn31_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_ID_DP_DTO, - &clk_src_regs[0], true); - - for (i = 0; i < pool->base.clk_src_count; i++) { - if (pool->base.clock_sources[i] == NULL) { - dm_error("DC: failed to create clock sources!\n"); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - } - - pool->base.dccg = dccg314_create(ctx, &dccg_regs, &dccg_shift, &dccg_mask); - if (pool->base.dccg == NULL) { - dm_error("DC: failed to create dccg!\n"); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - - init_data.ctx = dc->ctx; - pool->base.irqs = dal_irq_service_dcn314_create(&init_data); - if (!pool->base.irqs) - goto create_fail; - - /* HUBBUB */ - pool->base.hubbub = dcn31_hubbub_create(ctx); - if (pool->base.hubbub == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create hubbub!\n"); - goto create_fail; - } - - /* HUBPs, DPPs, OPPs and TGs */ - for (i = 0; i < pool->base.pipe_count; i++) { - pool->base.hubps[i] = dcn31_hubp_create(ctx, i); - if (pool->base.hubps[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create hubps!\n"); - goto create_fail; - } - - pool->base.dpps[i] = dcn31_dpp_create(ctx, i); - if (pool->base.dpps[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create dpps!\n"); - goto create_fail; - } - } - - for (i = 0; i < pool->base.res_cap->num_opp; i++) { - pool->base.opps[i] = dcn31_opp_create(ctx, i); - if (pool->base.opps[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create output pixel processor!\n"); - goto create_fail; - } - } - - for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { - pool->base.timing_generators[i] = dcn31_timing_generator_create( - ctx, i); - if (pool->base.timing_generators[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create tg!\n"); - goto create_fail; - } - } - pool->base.timing_generator_count = i; - - /* PSR */ - pool->base.psr = dmub_psr_create(ctx); - if (pool->base.psr == NULL) { - dm_error("DC: failed to create psr obj!\n"); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - - /* Replay */ - pool->base.replay = dmub_replay_create(ctx); - if (pool->base.replay == NULL) { - dm_error("DC: failed to create replay obj!\n"); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - - /* ABM */ - for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { - pool->base.multiple_abms[i] = dmub_abm_create(ctx, - &abm_regs[i], - &abm_shift, - &abm_mask); - if (pool->base.multiple_abms[i] == NULL) { - dm_error("DC: failed to create abm for pipe %d!\n", i); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - } - - /* MPC and DSC */ - pool->base.mpc = dcn31_mpc_create(ctx, pool->base.mpcc_count, pool->base.res_cap->num_mpc_3dlut); - if (pool->base.mpc == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create mpc!\n"); - goto create_fail; - } - - for (i = 0; i < pool->base.res_cap->num_dsc; i++) { - pool->base.dscs[i] = dcn314_dsc_create(ctx, i); - if (pool->base.dscs[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create display stream compressor %d!\n", i); - goto create_fail; - } - } - - /* DWB and MMHUBBUB */ - if (!dcn31_dwbc_create(ctx, &pool->base)) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create dwbc!\n"); - goto create_fail; - } - - if (!dcn31_mmhubbub_create(ctx, &pool->base)) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create mcif_wb!\n"); - goto create_fail; - } - - /* AUX and I2C */ - for (i = 0; i < pool->base.res_cap->num_ddc; i++) { - pool->base.engines[i] = dcn31_aux_engine_create(ctx, i); - if (pool->base.engines[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC:failed to create aux engine!!\n"); - goto create_fail; - } - pool->base.hw_i2cs[i] = dcn31_i2c_hw_create(ctx, i); - if (pool->base.hw_i2cs[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC:failed to create hw i2c!!\n"); - goto create_fail; - } - pool->base.sw_i2cs[i] = NULL; - } - - /* DCN314 has 4 DPIA */ - pool->base.usb4_dpia_count = 4; - - /* Audio, Stream Encoders including HPO and virtual, MPC 3D LUTs */ - if (!resource_construct(num_virtual_links, dc, &pool->base, - &res_create_funcs)) - goto create_fail; - - /* HW Sequencer and Plane caps */ - dcn314_hw_sequencer_construct(dc); - - dc->caps.max_planes = pool->base.pipe_count; - - for (i = 0; i < dc->caps.max_planes; ++i) - dc->caps.planes[i] = plane_cap; - - dc->cap_funcs = cap_funcs; - - dc->dcn_ip->max_num_dpp = dcn3_14_ip.max_num_dpp; - - return true; - -create_fail: - - dcn314_resource_destruct(pool); - - return false; -} - -struct resource_pool *dcn314_create_resource_pool( - const struct dc_init_data *init_data, - struct dc *dc) -{ - struct dcn314_resource_pool *pool = - kzalloc(sizeof(struct dcn314_resource_pool), GFP_KERNEL); - - if (!pool) - return NULL; - - if (dcn314_resource_construct(init_data->num_virtual_links, dc, pool)) - return &pool->base; - - BREAK_TO_DEBUGGER(); - kfree(pool); - return NULL; -} diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.h b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.h deleted file mode 100644 index 49ffe71018df..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.h +++ /dev/null @@ -1,50 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright 2022 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef _DCN314_RESOURCE_H_ -#define _DCN314_RESOURCE_H_ - -#include "core_types.h" - -extern struct _vcs_dpi_ip_params_st dcn3_14_ip; -extern struct _vcs_dpi_soc_bounding_box_st dcn3_14_soc; - -#define TO_DCN314_RES_POOL(pool)\ - container_of(pool, struct dcn314_resource_pool, base) - -struct dcn314_resource_pool { - struct resource_pool base; -}; - -bool dcn314_validate_bandwidth(struct dc *dc, - struct dc_state *context, - bool fast_validate); - -struct resource_pool *dcn314_create_resource_pool( - const struct dc_init_data *init_data, - struct dc *dc); - -#endif /* _DCN314_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/Makefile b/drivers/gpu/drm/amd/display/dc/dcn315/Makefile deleted file mode 100644 index 59381d24800b..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn315/Makefile +++ /dev/null @@ -1,30 +0,0 @@ -# -# Copyright © 2021 Advanced Micro Devices, Inc. -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. -# -# Authors: AMD -# -# Makefile for dcn315. - -DCN315 = dcn315_resource.o - -AMD_DAL_DCN315 = $(addprefix $(AMDDALPATH)/dc/dcn315/,$(DCN315)) - -AMD_DISPLAY_FILES += $(AMD_DAL_DCN315) diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c deleted file mode 100644 index cb8024eee8e4..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c +++ /dev/null @@ -1,2151 +0,0 @@ -/* - * Copyright 2021 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - - -#include "dm_services.h" -#include "dc.h" - -#include "dcn31/dcn31_init.h" - -#include "resource.h" -#include "include/irq_service_interface.h" -#include "dcn315_resource.h" - -#include "dcn20/dcn20_resource.h" -#include "dcn30/dcn30_resource.h" -#include "dcn31/dcn31_resource.h" - -#include "dcn10/dcn10_ipp.h" -#include "dcn30/dcn30_hubbub.h" -#include "dcn31/dcn31_hubbub.h" -#include "dcn30/dcn30_mpc.h" -#include "dcn31/dcn31_hubp.h" -#include "irq/dcn315/irq_service_dcn315.h" -#include "dcn30/dcn30_dpp.h" -#include "dcn31/dcn31_optc.h" -#include "dcn20/dcn20_hwseq.h" -#include "dcn30/dcn30_hwseq.h" -#include "dce110/dce110_hwseq.h" -#include "dcn30/dcn30_opp.h" -#include "dcn20/dcn20_dsc.h" -#include "dcn30/dcn30_vpg.h" -#include "dcn30/dcn30_afmt.h" -#include "dcn30/dcn30_dio_stream_encoder.h" -#include "dcn31/dcn31_hpo_dp_stream_encoder.h" -#include "dcn31/dcn31_hpo_dp_link_encoder.h" -#include "dcn31/dcn31_apg.h" -#include "dcn31/dcn31_dio_link_encoder.h" -#include "dcn31/dcn31_vpg.h" -#include "dcn31/dcn31_afmt.h" -#include "dce/dce_clock_source.h" -#include "dce/dce_audio.h" -#include "dce/dce_hwseq.h" -#include "clk_mgr.h" -#include "virtual/virtual_stream_encoder.h" -#include "dce110/dce110_resource.h" -#include "dml/display_mode_vba.h" -#include "dml/dcn31/dcn31_fpu.h" -#include "dcn31/dcn31_dccg.h" -#include "dcn10/dcn10_resource.h" -#include "dcn31/dcn31_panel_cntl.h" - -#include "dcn30/dcn30_dwb.h" -#include "dcn30/dcn30_mmhubbub.h" - -#include "dcn/dcn_3_1_5_offset.h" -#include "dcn/dcn_3_1_5_sh_mask.h" -#include "dpcs/dpcs_4_2_2_offset.h" -#include "dpcs/dpcs_4_2_2_sh_mask.h" - -#define NBIO_BASE__INST0_SEG0 0x00000000 -#define NBIO_BASE__INST0_SEG1 0x00000014 -#define NBIO_BASE__INST0_SEG2 0x00000D20 -#define NBIO_BASE__INST0_SEG3 0x00010400 -#define NBIO_BASE__INST0_SEG4 0x0241B000 -#define NBIO_BASE__INST0_SEG5 0x04040000 - -#define DPCS_BASE__INST0_SEG0 0x00000012 -#define DPCS_BASE__INST0_SEG1 0x000000C0 -#define DPCS_BASE__INST0_SEG2 0x000034C0 -#define DPCS_BASE__INST0_SEG3 0x00009000 -#define DPCS_BASE__INST0_SEG4 0x02403C00 -#define DPCS_BASE__INST0_SEG5 0 - -#define DCN_BASE__INST0_SEG0 0x00000012 -#define DCN_BASE__INST0_SEG1 0x000000C0 -#define DCN_BASE__INST0_SEG2 0x000034C0 -#define DCN_BASE__INST0_SEG3 0x00009000 -#define DCN_BASE__INST0_SEG4 0x02403C00 -#define DCN_BASE__INST0_SEG5 0 - -#define regBIF_BX_PF2_RSMU_INDEX 0x0000 -#define regBIF_BX_PF2_RSMU_INDEX_BASE_IDX 1 -#define regBIF_BX_PF2_RSMU_DATA 0x0001 -#define regBIF_BX_PF2_RSMU_DATA_BASE_IDX 1 -#define regBIF_BX2_BIOS_SCRATCH_6 0x003e -#define regBIF_BX2_BIOS_SCRATCH_6_BASE_IDX 1 -#define BIF_BX2_BIOS_SCRATCH_6__BIOS_SCRATCH_6__SHIFT 0x0 -#define BIF_BX2_BIOS_SCRATCH_6__BIOS_SCRATCH_6_MASK 0xFFFFFFFFL -#define regBIF_BX2_BIOS_SCRATCH_2 0x003a -#define regBIF_BX2_BIOS_SCRATCH_2_BASE_IDX 1 -#define BIF_BX2_BIOS_SCRATCH_2__BIOS_SCRATCH_2__SHIFT 0x0 -#define BIF_BX2_BIOS_SCRATCH_2__BIOS_SCRATCH_2_MASK 0xFFFFFFFFL -#define regBIF_BX2_BIOS_SCRATCH_3 0x003b -#define regBIF_BX2_BIOS_SCRATCH_3_BASE_IDX 1 -#define BIF_BX2_BIOS_SCRATCH_3__BIOS_SCRATCH_3__SHIFT 0x0 -#define BIF_BX2_BIOS_SCRATCH_3__BIOS_SCRATCH_3_MASK 0xFFFFFFFFL - -#define regDCHUBBUB_DEBUG_CTRL_0 0x04d6 -#define regDCHUBBUB_DEBUG_CTRL_0_BASE_IDX 2 -#define DCHUBBUB_DEBUG_CTRL_0__DET_DEPTH__SHIFT 0x10 -#define DCHUBBUB_DEBUG_CTRL_0__DET_DEPTH_MASK 0x01FF0000L - -#include "reg_helper.h" -#include "dce/dmub_abm.h" -#include "dce/dmub_psr.h" -#include "dce/dce_aux.h" -#include "dce/dce_i2c.h" - -#include "dml/dcn30/display_mode_vba_30.h" -#include "vm_helper.h" -#include "dcn20/dcn20_vmid.h" - -#include "link_enc_cfg.h" - -#define DCN3_15_MAX_DET_SIZE 384 -#define DCN3_15_CRB_SEGMENT_SIZE_KB 64 -#define DCN3_15_MAX_DET_SEGS (DCN3_15_MAX_DET_SIZE / DCN3_15_CRB_SEGMENT_SIZE_KB) -/* Minimum 3 extra segments need to be in compbuf and claimable to guarantee seamless mpo transitions */ -#define MIN_RESERVED_DET_SEGS 3 - -enum dcn31_clk_src_array_id { - DCN31_CLK_SRC_PLL0, - DCN31_CLK_SRC_PLL1, - DCN31_CLK_SRC_PLL2, - DCN31_CLK_SRC_PLL3, - DCN31_CLK_SRC_PLL4, - DCN30_CLK_SRC_TOTAL -}; - -/* begin ********************* - * macros to expend register list macro defined in HW object header file - */ - -/* DCN */ -#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg - -#define BASE(seg) BASE_INNER(seg) - -#define SR(reg_name)\ - .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ - reg ## reg_name - -#define SRI(reg_name, block, id)\ - .reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## reg_name - -#define SRI2(reg_name, block, id)\ - .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ - reg ## reg_name - -#define SRIR(var_name, reg_name, block, id)\ - .var_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## reg_name - -#define SRII(reg_name, block, id)\ - .reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## reg_name - -#define SRII_MPC_RMU(reg_name, block, id)\ - .RMU##_##reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## reg_name - -#define SRII_DWB(reg_name, temp_name, block, id)\ - .reg_name[id] = BASE(reg ## block ## id ## _ ## temp_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## temp_name - -#define SF_DWB2(reg_name, block, id, field_name, post_fix) \ - .field_name = reg_name ## __ ## field_name ## post_fix - -#define DCCG_SRII(reg_name, block, id)\ - .block ## _ ## reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## reg_name - -#define VUPDATE_SRII(reg_name, block, id)\ - .reg_name[id] = BASE(reg ## reg_name ## _ ## block ## id ## _BASE_IDX) + \ - reg ## reg_name ## _ ## block ## id - -/* NBIO */ -#define NBIO_BASE_INNER(seg) \ - NBIO_BASE__INST0_SEG ## seg - -#define NBIO_BASE(seg) \ - NBIO_BASE_INNER(seg) - -#define NBIO_SR(reg_name)\ - .reg_name = NBIO_BASE(regBIF_BX2_ ## reg_name ## _BASE_IDX) + \ - regBIF_BX2_ ## reg_name - -static const struct bios_registers bios_regs = { - NBIO_SR(BIOS_SCRATCH_3), - NBIO_SR(BIOS_SCRATCH_6) -}; - -#define clk_src_regs(index, pllid)\ -[index] = {\ - CS_COMMON_REG_LIST_DCN3_0(index, pllid),\ -} - -static const struct dce110_clk_src_regs clk_src_regs[] = { - clk_src_regs(0, A), - clk_src_regs(1, B), - clk_src_regs(2, C), - clk_src_regs(3, D), - clk_src_regs(4, E) -}; - -static const struct dce110_clk_src_shift cs_shift = { - CS_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT) -}; - -static const struct dce110_clk_src_mask cs_mask = { - CS_COMMON_MASK_SH_LIST_DCN2_0(_MASK) -}; - -#define abm_regs(id)\ -[id] = {\ - ABM_DCN302_REG_LIST(id)\ -} - -static const struct dce_abm_registers abm_regs[] = { - abm_regs(0), - abm_regs(1), - abm_regs(2), - abm_regs(3), -}; - -static const struct dce_abm_shift abm_shift = { - ABM_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dce_abm_mask abm_mask = { - ABM_MASK_SH_LIST_DCN30(_MASK) -}; - -#define audio_regs(id)\ -[id] = {\ - AUD_COMMON_REG_LIST(id)\ -} - -static const struct dce_audio_registers audio_regs[] = { - audio_regs(0), - audio_regs(1), - audio_regs(2), - audio_regs(3), - audio_regs(4), - audio_regs(5), - audio_regs(6) -}; - -#define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\ - SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_INDEX, AZALIA_ENDPOINT_REG_INDEX, mask_sh),\ - SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_DATA, AZALIA_ENDPOINT_REG_DATA, mask_sh),\ - AUD_COMMON_MASK_SH_LIST_BASE(mask_sh) - -static const struct dce_audio_shift audio_shift = { - DCE120_AUD_COMMON_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_audio_mask audio_mask = { - DCE120_AUD_COMMON_MASK_SH_LIST(_MASK) -}; - -#define vpg_regs(id)\ -[id] = {\ - VPG_DCN31_REG_LIST(id)\ -} - -static const struct dcn31_vpg_registers vpg_regs[] = { - vpg_regs(0), - vpg_regs(1), - vpg_regs(2), - vpg_regs(3), - vpg_regs(4), - vpg_regs(5), - vpg_regs(6), - vpg_regs(7), - vpg_regs(8), - vpg_regs(9), -}; - -static const struct dcn31_vpg_shift vpg_shift = { - DCN31_VPG_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn31_vpg_mask vpg_mask = { - DCN31_VPG_MASK_SH_LIST(_MASK) -}; - -#define afmt_regs(id)\ -[id] = {\ - AFMT_DCN31_REG_LIST(id)\ -} - -static const struct dcn31_afmt_registers afmt_regs[] = { - afmt_regs(0), - afmt_regs(1), - afmt_regs(2), - afmt_regs(3), - afmt_regs(4), - afmt_regs(5) -}; - -static const struct dcn31_afmt_shift afmt_shift = { - DCN31_AFMT_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn31_afmt_mask afmt_mask = { - DCN31_AFMT_MASK_SH_LIST(_MASK) -}; - -#define apg_regs(id)\ -[id] = {\ - APG_DCN31_REG_LIST(id)\ -} - -static const struct dcn31_apg_registers apg_regs[] = { - apg_regs(0), - apg_regs(1), - apg_regs(2), - apg_regs(3) -}; - -static const struct dcn31_apg_shift apg_shift = { - DCN31_APG_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn31_apg_mask apg_mask = { - DCN31_APG_MASK_SH_LIST(_MASK) -}; - -#define stream_enc_regs(id)\ -[id] = {\ - SE_DCN3_REG_LIST(id)\ -} - -static const struct dcn10_stream_enc_registers stream_enc_regs[] = { - stream_enc_regs(0), - stream_enc_regs(1), - stream_enc_regs(2), - stream_enc_regs(3), - stream_enc_regs(4) -}; - -static const struct dcn10_stream_encoder_shift se_shift = { - SE_COMMON_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dcn10_stream_encoder_mask se_mask = { - SE_COMMON_MASK_SH_LIST_DCN30(_MASK) -}; - - -#define aux_regs(id)\ -[id] = {\ - DCN2_AUX_REG_LIST(id)\ -} - -static const struct dcn10_link_enc_aux_registers link_enc_aux_regs[] = { - aux_regs(0), - aux_regs(1), - aux_regs(2), - aux_regs(3), - aux_regs(4) -}; - -#define hpd_regs(id)\ -[id] = {\ - HPD_REG_LIST(id)\ -} - -static const struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[] = { - hpd_regs(0), - hpd_regs(1), - hpd_regs(2), - hpd_regs(3), - hpd_regs(4) -}; - -#define link_regs(id, phyid)\ -[id] = {\ - LE_DCN31_REG_LIST(id), \ - UNIPHY_DCN2_REG_LIST(phyid), \ - DPCS_DCN31_REG_LIST(id), \ -} - -static const struct dce110_aux_registers_shift aux_shift = { - DCN_AUX_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce110_aux_registers_mask aux_mask = { - DCN_AUX_MASK_SH_LIST(_MASK) -}; - -static const struct dcn10_link_enc_registers link_enc_regs[] = { - link_regs(0, A), - link_regs(1, B), - link_regs(2, C), - link_regs(3, D), - link_regs(4, E) -}; - -static const struct dcn10_link_enc_shift le_shift = { - LINK_ENCODER_MASK_SH_LIST_DCN31(__SHIFT), \ - DPCS_DCN31_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn10_link_enc_mask le_mask = { - LINK_ENCODER_MASK_SH_LIST_DCN31(_MASK), \ - DPCS_DCN31_MASK_SH_LIST(_MASK) -}; - -#define hpo_dp_stream_encoder_reg_list(id)\ -[id] = {\ - DCN3_1_HPO_DP_STREAM_ENC_REG_LIST(id)\ -} - -static const struct dcn31_hpo_dp_stream_encoder_registers hpo_dp_stream_enc_regs[] = { - hpo_dp_stream_encoder_reg_list(0), - hpo_dp_stream_encoder_reg_list(1), - hpo_dp_stream_encoder_reg_list(2), - hpo_dp_stream_encoder_reg_list(3), -}; - -static const struct dcn31_hpo_dp_stream_encoder_shift hpo_dp_se_shift = { - DCN3_1_HPO_DP_STREAM_ENC_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn31_hpo_dp_stream_encoder_mask hpo_dp_se_mask = { - DCN3_1_HPO_DP_STREAM_ENC_MASK_SH_LIST(_MASK) -}; - - -#define hpo_dp_link_encoder_reg_list(id)\ -[id] = {\ - DCN3_1_HPO_DP_LINK_ENC_REG_LIST(id),\ - DCN3_1_RDPCSTX_REG_LIST(0),\ - DCN3_1_RDPCSTX_REG_LIST(1),\ - DCN3_1_RDPCSTX_REG_LIST(2),\ - DCN3_1_RDPCSTX_REG_LIST(3),\ - DCN3_1_RDPCSTX_REG_LIST(4)\ -} - -static const struct dcn31_hpo_dp_link_encoder_registers hpo_dp_link_enc_regs[] = { - hpo_dp_link_encoder_reg_list(0), - hpo_dp_link_encoder_reg_list(1), -}; - -static const struct dcn31_hpo_dp_link_encoder_shift hpo_dp_le_shift = { - DCN3_1_HPO_DP_LINK_ENC_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn31_hpo_dp_link_encoder_mask hpo_dp_le_mask = { - DCN3_1_HPO_DP_LINK_ENC_MASK_SH_LIST(_MASK) -}; - -#define dpp_regs(id)\ -[id] = {\ - DPP_REG_LIST_DCN30(id),\ -} - -static const struct dcn3_dpp_registers dpp_regs[] = { - dpp_regs(0), - dpp_regs(1), - dpp_regs(2), - dpp_regs(3) -}; - -static const struct dcn3_dpp_shift tf_shift = { - DPP_REG_LIST_SH_MASK_DCN30(__SHIFT) -}; - -static const struct dcn3_dpp_mask tf_mask = { - DPP_REG_LIST_SH_MASK_DCN30(_MASK) -}; - -#define opp_regs(id)\ -[id] = {\ - OPP_REG_LIST_DCN30(id),\ -} - -static const struct dcn20_opp_registers opp_regs[] = { - opp_regs(0), - opp_regs(1), - opp_regs(2), - opp_regs(3) -}; - -static const struct dcn20_opp_shift opp_shift = { - OPP_MASK_SH_LIST_DCN20(__SHIFT) -}; - -static const struct dcn20_opp_mask opp_mask = { - OPP_MASK_SH_LIST_DCN20(_MASK) -}; - -#define aux_engine_regs(id)\ -[id] = {\ - AUX_COMMON_REG_LIST0(id), \ - .AUXN_IMPCAL = 0, \ - .AUXP_IMPCAL = 0, \ - .AUX_RESET_MASK = DP_AUX0_AUX_CONTROL__AUX_RESET_MASK, \ -} - -static const struct dce110_aux_registers aux_engine_regs[] = { - aux_engine_regs(0), - aux_engine_regs(1), - aux_engine_regs(2), - aux_engine_regs(3), - aux_engine_regs(4) -}; - -#define dwbc_regs_dcn3(id)\ -[id] = {\ - DWBC_COMMON_REG_LIST_DCN30(id),\ -} - -static const struct dcn30_dwbc_registers dwbc30_regs[] = { - dwbc_regs_dcn3(0), -}; - -static const struct dcn30_dwbc_shift dwbc30_shift = { - DWBC_COMMON_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dcn30_dwbc_mask dwbc30_mask = { - DWBC_COMMON_MASK_SH_LIST_DCN30(_MASK) -}; - -#define mcif_wb_regs_dcn3(id)\ -[id] = {\ - MCIF_WB_COMMON_REG_LIST_DCN30(id),\ -} - -static const struct dcn30_mmhubbub_registers mcif_wb30_regs[] = { - mcif_wb_regs_dcn3(0) -}; - -static const struct dcn30_mmhubbub_shift mcif_wb30_shift = { - MCIF_WB_COMMON_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dcn30_mmhubbub_mask mcif_wb30_mask = { - MCIF_WB_COMMON_MASK_SH_LIST_DCN30(_MASK) -}; - -#define dsc_regsDCN20(id)\ -[id] = {\ - DSC_REG_LIST_DCN20(id)\ -} - -static const struct dcn20_dsc_registers dsc_regs[] = { - dsc_regsDCN20(0), - dsc_regsDCN20(1), - dsc_regsDCN20(2) -}; - -static const struct dcn20_dsc_shift dsc_shift = { - DSC_REG_LIST_SH_MASK_DCN20(__SHIFT) -}; - -static const struct dcn20_dsc_mask dsc_mask = { - DSC_REG_LIST_SH_MASK_DCN20(_MASK) -}; - -static const struct dcn30_mpc_registers mpc_regs = { - MPC_REG_LIST_DCN3_0(0), - MPC_REG_LIST_DCN3_0(1), - MPC_REG_LIST_DCN3_0(2), - MPC_REG_LIST_DCN3_0(3), - MPC_OUT_MUX_REG_LIST_DCN3_0(0), - MPC_OUT_MUX_REG_LIST_DCN3_0(1), - MPC_OUT_MUX_REG_LIST_DCN3_0(2), - MPC_OUT_MUX_REG_LIST_DCN3_0(3), - MPC_DWB_MUX_REG_LIST_DCN3_0(0), -}; - -static const struct dcn30_mpc_shift mpc_shift = { - MPC_COMMON_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dcn30_mpc_mask mpc_mask = { - MPC_COMMON_MASK_SH_LIST_DCN30(_MASK) -}; - -#define optc_regs(id)\ -[id] = {OPTC_COMMON_REG_LIST_DCN3_1(id)} - -static const struct dcn_optc_registers optc_regs[] = { - optc_regs(0), - optc_regs(1), - optc_regs(2), - optc_regs(3) -}; - -static const struct dcn_optc_shift optc_shift = { - OPTC_COMMON_MASK_SH_LIST_DCN3_1(__SHIFT) -}; - -static const struct dcn_optc_mask optc_mask = { - OPTC_COMMON_MASK_SH_LIST_DCN3_1(_MASK) -}; - -#define hubp_regs(id)\ -[id] = {\ - HUBP_REG_LIST_DCN30(id)\ -} - -static const struct dcn_hubp2_registers hubp_regs[] = { - hubp_regs(0), - hubp_regs(1), - hubp_regs(2), - hubp_regs(3) -}; - - -static const struct dcn_hubp2_shift hubp_shift = { - HUBP_MASK_SH_LIST_DCN31(__SHIFT) -}; - -static const struct dcn_hubp2_mask hubp_mask = { - HUBP_MASK_SH_LIST_DCN31(_MASK) -}; -static const struct dcn_hubbub_registers hubbub_reg = { - HUBBUB_REG_LIST_DCN31(0) -}; - -static const struct dcn_hubbub_shift hubbub_shift = { - HUBBUB_MASK_SH_LIST_DCN31(__SHIFT) -}; - -static const struct dcn_hubbub_mask hubbub_mask = { - HUBBUB_MASK_SH_LIST_DCN31(_MASK) -}; - -static const struct dccg_registers dccg_regs = { - DCCG_REG_LIST_DCN31() -}; - -static const struct dccg_shift dccg_shift = { - DCCG_MASK_SH_LIST_DCN31(__SHIFT) -}; - -static const struct dccg_mask dccg_mask = { - DCCG_MASK_SH_LIST_DCN31(_MASK) -}; - - -#define SRII2(reg_name_pre, reg_name_post, id)\ - .reg_name_pre ## _ ## reg_name_post[id] = BASE(reg ## reg_name_pre \ - ## id ## _ ## reg_name_post ## _BASE_IDX) + \ - reg ## reg_name_pre ## id ## _ ## reg_name_post - - -#define HWSEQ_DCN31_REG_LIST()\ - SR(DCHUBBUB_GLOBAL_TIMER_CNTL), \ - SR(DCHUBBUB_ARB_HOSTVM_CNTL), \ - SR(DIO_MEM_PWR_CTRL), \ - SR(ODM_MEM_PWR_CTRL3), \ - SR(DMU_MEM_PWR_CNTL), \ - SR(MMHUBBUB_MEM_PWR_CNTL), \ - SR(DCCG_GATE_DISABLE_CNTL), \ - SR(DCCG_GATE_DISABLE_CNTL2), \ - SR(DCFCLK_CNTL),\ - SR(DC_MEM_GLOBAL_PWR_REQ_CNTL), \ - SRII(PIXEL_RATE_CNTL, OTG, 0), \ - SRII(PIXEL_RATE_CNTL, OTG, 1),\ - SRII(PIXEL_RATE_CNTL, OTG, 2),\ - SRII(PIXEL_RATE_CNTL, OTG, 3),\ - SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 0),\ - SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 1),\ - SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 2),\ - SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 3),\ - SR(MICROSECOND_TIME_BASE_DIV), \ - SR(MILLISECOND_TIME_BASE_DIV), \ - SR(DISPCLK_FREQ_CHANGE_CNTL), \ - SR(RBBMIF_TIMEOUT_DIS), \ - SR(RBBMIF_TIMEOUT_DIS_2), \ - SR(DCHUBBUB_CRC_CTRL), \ - SR(DPP_TOP0_DPP_CRC_CTRL), \ - SR(DPP_TOP0_DPP_CRC_VAL_B_A), \ - SR(DPP_TOP0_DPP_CRC_VAL_R_G), \ - SR(MPC_CRC_CTRL), \ - SR(MPC_CRC_RESULT_GB), \ - SR(MPC_CRC_RESULT_C), \ - SR(MPC_CRC_RESULT_AR), \ - SR(DOMAIN0_PG_CONFIG), \ - SR(DOMAIN1_PG_CONFIG), \ - SR(DOMAIN2_PG_CONFIG), \ - SR(DOMAIN3_PG_CONFIG), \ - SR(DOMAIN16_PG_CONFIG), \ - SR(DOMAIN17_PG_CONFIG), \ - SR(DOMAIN18_PG_CONFIG), \ - SR(DOMAIN0_PG_STATUS), \ - SR(DOMAIN1_PG_STATUS), \ - SR(DOMAIN2_PG_STATUS), \ - SR(DOMAIN3_PG_STATUS), \ - SR(DOMAIN16_PG_STATUS), \ - SR(DOMAIN17_PG_STATUS), \ - SR(DOMAIN18_PG_STATUS), \ - SR(D1VGA_CONTROL), \ - SR(D2VGA_CONTROL), \ - SR(D3VGA_CONTROL), \ - SR(D4VGA_CONTROL), \ - SR(D5VGA_CONTROL), \ - SR(D6VGA_CONTROL), \ - SR(DC_IP_REQUEST_CNTL), \ - SR(AZALIA_AUDIO_DTO), \ - SR(AZALIA_CONTROLLER_CLOCK_GATING), \ - SR(HPO_TOP_HW_CONTROL) - -static const struct dce_hwseq_registers hwseq_reg = { - HWSEQ_DCN31_REG_LIST() -}; - -#define HWSEQ_DCN31_MASK_SH_LIST(mask_sh)\ - HWSEQ_DCN_MASK_SH_LIST(mask_sh), \ - HWS_SF(, DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_REFDIV, mask_sh), \ - HWS_SF(, DCHUBBUB_ARB_HOSTVM_CNTL, DISABLE_HOSTVM_FORCE_ALLOW_PSTATE, mask_sh), \ - HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN1_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN1_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN2_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN2_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN3_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN3_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN16_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN16_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN17_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN17_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN18_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN18_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN0_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN1_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN2_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN3_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN16_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN17_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN18_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DC_IP_REQUEST_CNTL, IP_REQUEST_EN, mask_sh), \ - HWS_SF(, AZALIA_AUDIO_DTO, AZALIA_AUDIO_DTO_MODULE, mask_sh), \ - HWS_SF(, HPO_TOP_CLOCK_CONTROL, HPO_HDMISTREAMCLK_G_GATE_DIS, mask_sh), \ - HWS_SF(, DMU_MEM_PWR_CNTL, DMCU_ERAM_MEM_PWR_FORCE, mask_sh), \ - HWS_SF(, ODM_MEM_PWR_CTRL3, ODM_MEM_UNASSIGNED_PWR_MODE, mask_sh), \ - HWS_SF(, ODM_MEM_PWR_CTRL3, ODM_MEM_VBLANK_PWR_MODE, mask_sh), \ - HWS_SF(, MMHUBBUB_MEM_PWR_CNTL, VGA_MEM_PWR_FORCE, mask_sh), \ - HWS_SF(, DIO_MEM_PWR_CTRL, I2C_LIGHT_SLEEP_FORCE, mask_sh), \ - HWS_SF(, HPO_TOP_HW_CONTROL, HPO_IO_EN, mask_sh) - -static const struct dce_hwseq_shift hwseq_shift = { - HWSEQ_DCN31_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_hwseq_mask hwseq_mask = { - HWSEQ_DCN31_MASK_SH_LIST(_MASK) -}; -#define vmid_regs(id)\ -[id] = {\ - DCN20_VMID_REG_LIST(id)\ -} - -static const struct dcn_vmid_registers vmid_regs[] = { - vmid_regs(0), - vmid_regs(1), - vmid_regs(2), - vmid_regs(3), - vmid_regs(4), - vmid_regs(5), - vmid_regs(6), - vmid_regs(7), - vmid_regs(8), - vmid_regs(9), - vmid_regs(10), - vmid_regs(11), - vmid_regs(12), - vmid_regs(13), - vmid_regs(14), - vmid_regs(15) -}; - -static const struct dcn20_vmid_shift vmid_shifts = { - DCN20_VMID_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn20_vmid_mask vmid_masks = { - DCN20_VMID_MASK_SH_LIST(_MASK) -}; - -static const struct resource_caps res_cap_dcn31 = { - .num_timing_generator = 4, - .num_opp = 4, - .num_video_plane = 4, - .num_audio = 5, - .num_stream_encoder = 5, - .num_dig_link_enc = 5, - .num_hpo_dp_stream_encoder = 4, - .num_hpo_dp_link_encoder = 2, - .num_pll = 5, - .num_dwb = 1, - .num_ddc = 5, - .num_vmid = 16, - .num_mpc_3dlut = 2, - .num_dsc = 3, -}; - -static const struct dc_plane_cap plane_cap = { - .type = DC_PLANE_TYPE_DCN_UNIVERSAL, - .per_pixel_alpha = true, - - .pixel_format_support = { - .argb8888 = true, - .nv12 = true, - .fp16 = true, - .p010 = true, - .ayuv = false, - }, - - .max_upscale_factor = { - .argb8888 = 16000, - .nv12 = 16000, - .fp16 = 16000 - }, - - // 6:1 downscaling ratio: 1000/6 = 166.666 - .max_downscale_factor = { - .argb8888 = 167, - .nv12 = 167, - .fp16 = 167 - }, - 64, - 64 -}; - -static const struct dc_debug_options debug_defaults_drv = { - .disable_z10 = true, /*hw not support it*/ - .disable_dmcu = true, - .force_abm_enable = false, - .timing_trace = false, - .clock_trace = true, - .disable_pplib_clock_request = false, - .pipe_split_policy = MPC_SPLIT_DYNAMIC, - .force_single_disp_pipe_split = false, - .disable_dcc = DCC_ENABLE, - .vsr_support = true, - .performance_trace = false, - .max_downscale_src_width = 4096,/*upto true 4k*/ - .disable_pplib_wm_range = false, - .scl_reset_length10 = true, - .sanity_checks = false, - .underflow_assert_delay_us = 0xFFFFFFFF, - .dwb_fi_phase = -1, // -1 = disable, - .dmub_command_table = true, - .pstate_enabled = true, - .use_max_lb = true, - .enable_mem_low_power = { - .bits = { - .vga = true, - .i2c = true, - .dmcu = false, // This is previously known to cause hang on S3 cycles if enabled - .dscl = true, - .cm = true, - .mpc = true, - .optc = true, - .vpg = true, - .afmt = true, - } - }, - .enable_legacy_fast_update = true, - .psr_power_use_phy_fsm = 0, - .using_dml2 = false, -}; - -static const struct dc_panel_config panel_config_defaults = { - .psr = { - .disable_psr = false, - .disallow_psrsu = false, - .disallow_replay = false, - }, - .ilr = { - .optimize_edp_link_rate = true, - }, -}; - -static void dcn31_dpp_destroy(struct dpp **dpp) -{ - kfree(TO_DCN20_DPP(*dpp)); - *dpp = NULL; -} - -static struct dpp *dcn31_dpp_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dcn3_dpp *dpp = - kzalloc(sizeof(struct dcn3_dpp), GFP_KERNEL); - - if (!dpp) - return NULL; - - if (dpp3_construct(dpp, ctx, inst, - &dpp_regs[inst], &tf_shift, &tf_mask)) - return &dpp->base; - - BREAK_TO_DEBUGGER(); - kfree(dpp); - return NULL; -} - -static struct output_pixel_processor *dcn31_opp_create( - struct dc_context *ctx, uint32_t inst) -{ - struct dcn20_opp *opp = - kzalloc(sizeof(struct dcn20_opp), GFP_KERNEL); - - if (!opp) { - BREAK_TO_DEBUGGER(); - return NULL; - } - - dcn20_opp_construct(opp, ctx, inst, - &opp_regs[inst], &opp_shift, &opp_mask); - return &opp->base; -} - -static struct dce_aux *dcn31_aux_engine_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct aux_engine_dce110 *aux_engine = - kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL); - - if (!aux_engine) - return NULL; - - dce110_aux_engine_construct(aux_engine, ctx, inst, - SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, - &aux_engine_regs[inst], - &aux_mask, - &aux_shift, - ctx->dc->caps.extended_aux_timeout_support); - - return &aux_engine->base; -} -#define i2c_inst_regs(id) { I2C_HW_ENGINE_COMMON_REG_LIST_DCN30(id) } - -static const struct dce_i2c_registers i2c_hw_regs[] = { - i2c_inst_regs(1), - i2c_inst_regs(2), - i2c_inst_regs(3), - i2c_inst_regs(4), - i2c_inst_regs(5), -}; - -static const struct dce_i2c_shift i2c_shifts = { - I2C_COMMON_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dce_i2c_mask i2c_masks = { - I2C_COMMON_MASK_SH_LIST_DCN30(_MASK) -}; - -static struct dce_i2c_hw *dcn31_i2c_hw_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dce_i2c_hw *dce_i2c_hw = - kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL); - - if (!dce_i2c_hw) - return NULL; - - dcn2_i2c_hw_construct(dce_i2c_hw, ctx, inst, - &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks); - - return dce_i2c_hw; -} -static struct mpc *dcn31_mpc_create( - struct dc_context *ctx, - int num_mpcc, - int num_rmu) -{ - struct dcn30_mpc *mpc30 = kzalloc(sizeof(struct dcn30_mpc), - GFP_KERNEL); - - if (!mpc30) - return NULL; - - dcn30_mpc_construct(mpc30, ctx, - &mpc_regs, - &mpc_shift, - &mpc_mask, - num_mpcc, - num_rmu); - - return &mpc30->base; -} - -static struct hubbub *dcn31_hubbub_create(struct dc_context *ctx) -{ - int i; - - struct dcn20_hubbub *hubbub3 = kzalloc(sizeof(struct dcn20_hubbub), - GFP_KERNEL); - - if (!hubbub3) - return NULL; - - hubbub31_construct(hubbub3, ctx, - &hubbub_reg, - &hubbub_shift, - &hubbub_mask, - dcn3_15_ip.det_buffer_size_kbytes, - dcn3_15_ip.pixel_chunk_size_kbytes, - dcn3_15_ip.config_return_buffer_size_in_kbytes); - - - for (i = 0; i < res_cap_dcn31.num_vmid; i++) { - struct dcn20_vmid *vmid = &hubbub3->vmid[i]; - - vmid->ctx = ctx; - - vmid->regs = &vmid_regs[i]; - vmid->shifts = &vmid_shifts; - vmid->masks = &vmid_masks; - } - - return &hubbub3->base; -} - -static struct timing_generator *dcn31_timing_generator_create( - struct dc_context *ctx, - uint32_t instance) -{ - struct optc *tgn10 = - kzalloc(sizeof(struct optc), GFP_KERNEL); - - if (!tgn10) - return NULL; - - tgn10->base.inst = instance; - tgn10->base.ctx = ctx; - - tgn10->tg_regs = &optc_regs[instance]; - tgn10->tg_shift = &optc_shift; - tgn10->tg_mask = &optc_mask; - - dcn31_timing_generator_init(tgn10); - - return &tgn10->base; -} - -static const struct encoder_feature_support link_enc_feature = { - .max_hdmi_deep_color = COLOR_DEPTH_121212, - .max_hdmi_pixel_clock = 600000, - .hdmi_ycbcr420_supported = true, - .dp_ycbcr420_supported = true, - .fec_supported = true, - .flags.bits.IS_HBR2_CAPABLE = true, - .flags.bits.IS_HBR3_CAPABLE = true, - .flags.bits.IS_TPS3_CAPABLE = true, - .flags.bits.IS_TPS4_CAPABLE = true -}; - -static struct link_encoder *dcn31_link_encoder_create( - struct dc_context *ctx, - const struct encoder_init_data *enc_init_data) -{ - struct dcn20_link_encoder *enc20 = - kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL); - - if (!enc20) - return NULL; - - dcn31_link_encoder_construct(enc20, - enc_init_data, - &link_enc_feature, - &link_enc_regs[enc_init_data->transmitter], - &link_enc_aux_regs[enc_init_data->channel - 1], - &link_enc_hpd_regs[enc_init_data->hpd_source], - &le_shift, - &le_mask); - - return &enc20->enc10.base; -} - -/* Create a minimal link encoder object not associated with a particular - * physical connector. - * resource_funcs.link_enc_create_minimal - */ -static struct link_encoder *dcn31_link_enc_create_minimal( - struct dc_context *ctx, enum engine_id eng_id) -{ - struct dcn20_link_encoder *enc20; - - if ((eng_id - ENGINE_ID_DIGA) > ctx->dc->res_pool->res_cap->num_dig_link_enc) - return NULL; - - enc20 = kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL); - if (!enc20) - return NULL; - - dcn31_link_encoder_construct_minimal( - enc20, - ctx, - &link_enc_feature, - &link_enc_regs[eng_id - ENGINE_ID_DIGA], - eng_id); - - return &enc20->enc10.base; -} - -static struct panel_cntl *dcn31_panel_cntl_create(const struct panel_cntl_init_data *init_data) -{ - struct dcn31_panel_cntl *panel_cntl = - kzalloc(sizeof(struct dcn31_panel_cntl), GFP_KERNEL); - - if (!panel_cntl) - return NULL; - - dcn31_panel_cntl_construct(panel_cntl, init_data); - - return &panel_cntl->base; -} - -static void read_dce_straps( - struct dc_context *ctx, - struct resource_straps *straps) -{ - generic_reg_get(ctx, regDC_PINSTRAPS + BASE(regDC_PINSTRAPS_BASE_IDX), - FN(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO), &straps->dc_pinstraps_audio); - -} - -static struct audio *dcn31_create_audio( - struct dc_context *ctx, unsigned int inst) -{ - return dce_audio_create(ctx, inst, - &audio_regs[inst], &audio_shift, &audio_mask); -} - -static struct vpg *dcn31_vpg_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dcn31_vpg *vpg31 = kzalloc(sizeof(struct dcn31_vpg), GFP_KERNEL); - - if (!vpg31) - return NULL; - - vpg31_construct(vpg31, ctx, inst, - &vpg_regs[inst], - &vpg_shift, - &vpg_mask); - - return &vpg31->base; -} - -static struct afmt *dcn31_afmt_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dcn31_afmt *afmt31 = kzalloc(sizeof(struct dcn31_afmt), GFP_KERNEL); - - if (!afmt31) - return NULL; - - afmt31_construct(afmt31, ctx, inst, - &afmt_regs[inst], - &afmt_shift, - &afmt_mask); - - // Light sleep by default, no need to power down here - - return &afmt31->base; -} - -static struct apg *dcn31_apg_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dcn31_apg *apg31 = kzalloc(sizeof(struct dcn31_apg), GFP_KERNEL); - - if (!apg31) - return NULL; - - apg31_construct(apg31, ctx, inst, - &apg_regs[inst], - &apg_shift, - &apg_mask); - - return &apg31->base; -} - -static struct stream_encoder *dcn315_stream_encoder_create( - enum engine_id eng_id, - struct dc_context *ctx) -{ - struct dcn10_stream_encoder *enc1; - struct vpg *vpg; - struct afmt *afmt; - int vpg_inst; - int afmt_inst; - - /*PHYB is wired off in HW, allow front end to remapping, otherwise needs more changes*/ - - /* Mapping of VPG, AFMT, DME register blocks to DIO block instance */ - if (eng_id <= ENGINE_ID_DIGF) { - vpg_inst = eng_id; - afmt_inst = eng_id; - } else - return NULL; - - enc1 = kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL); - vpg = dcn31_vpg_create(ctx, vpg_inst); - afmt = dcn31_afmt_create(ctx, afmt_inst); - - if (!enc1 || !vpg || !afmt) { - kfree(enc1); - kfree(vpg); - kfree(afmt); - return NULL; - } - - dcn30_dio_stream_encoder_construct(enc1, ctx, ctx->dc_bios, - eng_id, vpg, afmt, - &stream_enc_regs[eng_id], - &se_shift, &se_mask); - - return &enc1->base; -} - -static struct hpo_dp_stream_encoder *dcn31_hpo_dp_stream_encoder_create( - enum engine_id eng_id, - struct dc_context *ctx) -{ - struct dcn31_hpo_dp_stream_encoder *hpo_dp_enc31; - struct vpg *vpg; - struct apg *apg; - uint32_t hpo_dp_inst; - uint32_t vpg_inst; - uint32_t apg_inst; - - ASSERT((eng_id >= ENGINE_ID_HPO_DP_0) && (eng_id <= ENGINE_ID_HPO_DP_3)); - hpo_dp_inst = eng_id - ENGINE_ID_HPO_DP_0; - - /* Mapping of VPG register blocks to HPO DP block instance: - * VPG[6] -> HPO_DP[0] - * VPG[7] -> HPO_DP[1] - * VPG[8] -> HPO_DP[2] - * VPG[9] -> HPO_DP[3] - */ - vpg_inst = hpo_dp_inst + 6; - - /* Mapping of APG register blocks to HPO DP block instance: - * APG[0] -> HPO_DP[0] - * APG[1] -> HPO_DP[1] - * APG[2] -> HPO_DP[2] - * APG[3] -> HPO_DP[3] - */ - apg_inst = hpo_dp_inst; - - /* allocate HPO stream encoder and create VPG sub-block */ - hpo_dp_enc31 = kzalloc(sizeof(struct dcn31_hpo_dp_stream_encoder), GFP_KERNEL); - vpg = dcn31_vpg_create(ctx, vpg_inst); - apg = dcn31_apg_create(ctx, apg_inst); - - if (!hpo_dp_enc31 || !vpg || !apg) { - kfree(hpo_dp_enc31); - kfree(vpg); - kfree(apg); - return NULL; - } - - dcn31_hpo_dp_stream_encoder_construct(hpo_dp_enc31, ctx, ctx->dc_bios, - hpo_dp_inst, eng_id, vpg, apg, - &hpo_dp_stream_enc_regs[hpo_dp_inst], - &hpo_dp_se_shift, &hpo_dp_se_mask); - - return &hpo_dp_enc31->base; -} - -static struct hpo_dp_link_encoder *dcn31_hpo_dp_link_encoder_create( - uint8_t inst, - struct dc_context *ctx) -{ - struct dcn31_hpo_dp_link_encoder *hpo_dp_enc31; - - /* allocate HPO link encoder */ - hpo_dp_enc31 = kzalloc(sizeof(struct dcn31_hpo_dp_link_encoder), GFP_KERNEL); - - hpo_dp_link_encoder31_construct(hpo_dp_enc31, ctx, inst, - &hpo_dp_link_enc_regs[inst], - &hpo_dp_le_shift, &hpo_dp_le_mask); - - return &hpo_dp_enc31->base; -} - -static struct dce_hwseq *dcn31_hwseq_create( - struct dc_context *ctx) -{ - struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL); - - if (hws) { - hws->ctx = ctx; - hws->regs = &hwseq_reg; - hws->shifts = &hwseq_shift; - hws->masks = &hwseq_mask; - } - return hws; -} -static const struct resource_create_funcs res_create_funcs = { - .read_dce_straps = read_dce_straps, - .create_audio = dcn31_create_audio, - .create_stream_encoder = dcn315_stream_encoder_create, - .create_hpo_dp_stream_encoder = dcn31_hpo_dp_stream_encoder_create, - .create_hpo_dp_link_encoder = dcn31_hpo_dp_link_encoder_create, - .create_hwseq = dcn31_hwseq_create, -}; - -static void dcn315_resource_destruct(struct dcn315_resource_pool *pool) -{ - unsigned int i; - - for (i = 0; i < pool->base.stream_enc_count; i++) { - if (pool->base.stream_enc[i] != NULL) { - if (pool->base.stream_enc[i]->vpg != NULL) { - kfree(DCN30_VPG_FROM_VPG(pool->base.stream_enc[i]->vpg)); - pool->base.stream_enc[i]->vpg = NULL; - } - if (pool->base.stream_enc[i]->afmt != NULL) { - kfree(DCN30_AFMT_FROM_AFMT(pool->base.stream_enc[i]->afmt)); - pool->base.stream_enc[i]->afmt = NULL; - } - kfree(DCN10STRENC_FROM_STRENC(pool->base.stream_enc[i])); - pool->base.stream_enc[i] = NULL; - } - } - - for (i = 0; i < pool->base.hpo_dp_stream_enc_count; i++) { - if (pool->base.hpo_dp_stream_enc[i] != NULL) { - if (pool->base.hpo_dp_stream_enc[i]->vpg != NULL) { - kfree(DCN30_VPG_FROM_VPG(pool->base.hpo_dp_stream_enc[i]->vpg)); - pool->base.hpo_dp_stream_enc[i]->vpg = NULL; - } - if (pool->base.hpo_dp_stream_enc[i]->apg != NULL) { - kfree(DCN31_APG_FROM_APG(pool->base.hpo_dp_stream_enc[i]->apg)); - pool->base.hpo_dp_stream_enc[i]->apg = NULL; - } - kfree(DCN3_1_HPO_DP_STREAM_ENC_FROM_HPO_STREAM_ENC(pool->base.hpo_dp_stream_enc[i])); - pool->base.hpo_dp_stream_enc[i] = NULL; - } - } - - for (i = 0; i < pool->base.hpo_dp_link_enc_count; i++) { - if (pool->base.hpo_dp_link_enc[i] != NULL) { - kfree(DCN3_1_HPO_DP_LINK_ENC_FROM_HPO_LINK_ENC(pool->base.hpo_dp_link_enc[i])); - pool->base.hpo_dp_link_enc[i] = NULL; - } - } - - for (i = 0; i < pool->base.res_cap->num_dsc; i++) { - if (pool->base.dscs[i] != NULL) - dcn20_dsc_destroy(&pool->base.dscs[i]); - } - - if (pool->base.mpc != NULL) { - kfree(TO_DCN20_MPC(pool->base.mpc)); - pool->base.mpc = NULL; - } - if (pool->base.hubbub != NULL) { - kfree(pool->base.hubbub); - pool->base.hubbub = NULL; - } - for (i = 0; i < pool->base.pipe_count; i++) { - if (pool->base.dpps[i] != NULL) - dcn31_dpp_destroy(&pool->base.dpps[i]); - - if (pool->base.ipps[i] != NULL) - pool->base.ipps[i]->funcs->ipp_destroy(&pool->base.ipps[i]); - - if (pool->base.hubps[i] != NULL) { - kfree(TO_DCN20_HUBP(pool->base.hubps[i])); - pool->base.hubps[i] = NULL; - } - - if (pool->base.irqs != NULL) { - dal_irq_service_destroy(&pool->base.irqs); - } - } - - for (i = 0; i < pool->base.res_cap->num_ddc; i++) { - if (pool->base.engines[i] != NULL) - dce110_engine_destroy(&pool->base.engines[i]); - if (pool->base.hw_i2cs[i] != NULL) { - kfree(pool->base.hw_i2cs[i]); - pool->base.hw_i2cs[i] = NULL; - } - if (pool->base.sw_i2cs[i] != NULL) { - kfree(pool->base.sw_i2cs[i]); - pool->base.sw_i2cs[i] = NULL; - } - } - - for (i = 0; i < pool->base.res_cap->num_opp; i++) { - if (pool->base.opps[i] != NULL) - pool->base.opps[i]->funcs->opp_destroy(&pool->base.opps[i]); - } - - for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { - if (pool->base.timing_generators[i] != NULL) { - kfree(DCN10TG_FROM_TG(pool->base.timing_generators[i])); - pool->base.timing_generators[i] = NULL; - } - } - - for (i = 0; i < pool->base.res_cap->num_dwb; i++) { - if (pool->base.dwbc[i] != NULL) { - kfree(TO_DCN30_DWBC(pool->base.dwbc[i])); - pool->base.dwbc[i] = NULL; - } - if (pool->base.mcif_wb[i] != NULL) { - kfree(TO_DCN30_MMHUBBUB(pool->base.mcif_wb[i])); - pool->base.mcif_wb[i] = NULL; - } - } - - for (i = 0; i < pool->base.audio_count; i++) { - if (pool->base.audios[i]) - dce_aud_destroy(&pool->base.audios[i]); - } - - for (i = 0; i < pool->base.clk_src_count; i++) { - if (pool->base.clock_sources[i] != NULL) { - dcn20_clock_source_destroy(&pool->base.clock_sources[i]); - pool->base.clock_sources[i] = NULL; - } - } - - for (i = 0; i < pool->base.res_cap->num_mpc_3dlut; i++) { - if (pool->base.mpc_lut[i] != NULL) { - dc_3dlut_func_release(pool->base.mpc_lut[i]); - pool->base.mpc_lut[i] = NULL; - } - if (pool->base.mpc_shaper[i] != NULL) { - dc_transfer_func_release(pool->base.mpc_shaper[i]); - pool->base.mpc_shaper[i] = NULL; - } - } - - if (pool->base.dp_clock_source != NULL) { - dcn20_clock_source_destroy(&pool->base.dp_clock_source); - pool->base.dp_clock_source = NULL; - } - - for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { - if (pool->base.multiple_abms[i] != NULL) - dce_abm_destroy(&pool->base.multiple_abms[i]); - } - - if (pool->base.psr != NULL) - dmub_psr_destroy(&pool->base.psr); - - if (pool->base.dccg != NULL) - dcn_dccg_destroy(&pool->base.dccg); -} - -static struct hubp *dcn31_hubp_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dcn20_hubp *hubp2 = - kzalloc(sizeof(struct dcn20_hubp), GFP_KERNEL); - - if (!hubp2) - return NULL; - - if (hubp31_construct(hubp2, ctx, inst, - &hubp_regs[inst], &hubp_shift, &hubp_mask)) - return &hubp2->base; - - BREAK_TO_DEBUGGER(); - kfree(hubp2); - return NULL; -} - -static bool dcn31_dwbc_create(struct dc_context *ctx, struct resource_pool *pool) -{ - int i; - uint32_t pipe_count = pool->res_cap->num_dwb; - - for (i = 0; i < pipe_count; i++) { - struct dcn30_dwbc *dwbc30 = kzalloc(sizeof(struct dcn30_dwbc), - GFP_KERNEL); - - if (!dwbc30) { - dm_error("DC: failed to create dwbc30!\n"); - return false; - } - - dcn30_dwbc_construct(dwbc30, ctx, - &dwbc30_regs[i], - &dwbc30_shift, - &dwbc30_mask, - i); - - pool->dwbc[i] = &dwbc30->base; - } - return true; -} - -static bool dcn31_mmhubbub_create(struct dc_context *ctx, struct resource_pool *pool) -{ - int i; - uint32_t pipe_count = pool->res_cap->num_dwb; - - for (i = 0; i < pipe_count; i++) { - struct dcn30_mmhubbub *mcif_wb30 = kzalloc(sizeof(struct dcn30_mmhubbub), - GFP_KERNEL); - - if (!mcif_wb30) { - dm_error("DC: failed to create mcif_wb30!\n"); - return false; - } - - dcn30_mmhubbub_construct(mcif_wb30, ctx, - &mcif_wb30_regs[i], - &mcif_wb30_shift, - &mcif_wb30_mask, - i); - - pool->mcif_wb[i] = &mcif_wb30->base; - } - return true; -} - -static struct display_stream_compressor *dcn31_dsc_create( - struct dc_context *ctx, uint32_t inst) -{ - struct dcn20_dsc *dsc = - kzalloc(sizeof(struct dcn20_dsc), GFP_KERNEL); - - if (!dsc) { - BREAK_TO_DEBUGGER(); - return NULL; - } - - dsc2_construct(dsc, ctx, inst, &dsc_regs[inst], &dsc_shift, &dsc_mask); - return &dsc->base; -} - -static void dcn315_destroy_resource_pool(struct resource_pool **pool) -{ - struct dcn315_resource_pool *dcn31_pool = TO_DCN315_RES_POOL(*pool); - - dcn315_resource_destruct(dcn31_pool); - kfree(dcn31_pool); - *pool = NULL; -} - -static struct clock_source *dcn31_clock_source_create( - struct dc_context *ctx, - struct dc_bios *bios, - enum clock_source_id id, - const struct dce110_clk_src_regs *regs, - bool dp_clk_src) -{ - struct dce110_clk_src *clk_src = - kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL); - - if (!clk_src) - return NULL; - - if (dcn31_clk_src_construct(clk_src, ctx, bios, id, - regs, &cs_shift, &cs_mask)) { - clk_src->base.dp_clk_src = dp_clk_src; - return &clk_src->base; - } - - kfree(clk_src); - BREAK_TO_DEBUGGER(); - return NULL; -} - -static bool is_dual_plane(enum surface_pixel_format format) -{ - return format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA; -} - -static int source_format_to_bpp (enum source_format_class SourcePixelFormat) -{ - if (SourcePixelFormat == dm_444_64) - return 8; - else if (SourcePixelFormat == dm_444_16) - return 2; - else if (SourcePixelFormat == dm_444_8) - return 1; - else if (SourcePixelFormat == dm_rgbe_alpha) - return 5; - else if (SourcePixelFormat == dm_420_8) - return 3; - else if (SourcePixelFormat == dm_420_12) - return 6; - else - return 4; -} - -static bool allow_pixel_rate_crb(struct dc *dc, struct dc_state *context) -{ - int i; - struct resource_context *res_ctx = &context->res_ctx; - - /*Don't apply for single stream*/ - if (context->stream_count < 2) - return false; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - if (!res_ctx->pipe_ctx[i].stream) - continue; - - /*Don't apply if scaling*/ - if (res_ctx->pipe_ctx[i].stream->src.width != res_ctx->pipe_ctx[i].stream->dst.width || - res_ctx->pipe_ctx[i].stream->src.height != res_ctx->pipe_ctx[i].stream->dst.height || - (res_ctx->pipe_ctx[i].plane_state && (res_ctx->pipe_ctx[i].plane_state->src_rect.width - != res_ctx->pipe_ctx[i].plane_state->dst_rect.width || - res_ctx->pipe_ctx[i].plane_state->src_rect.height - != res_ctx->pipe_ctx[i].plane_state->dst_rect.height))) - return false; - /*Don't apply if MPO to avoid transition issues*/ - if (res_ctx->pipe_ctx[i].top_pipe && res_ctx->pipe_ctx[i].top_pipe->plane_state != res_ctx->pipe_ctx[i].plane_state) - return false; - } - return true; -} - -static int dcn315_populate_dml_pipes_from_context( - struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes, - bool fast_validate) -{ - int i, pipe_cnt, crb_idx, crb_pipes; - struct resource_context *res_ctx = &context->res_ctx; - struct pipe_ctx *pipe = NULL; - const int max_usable_det = context->bw_ctx.dml.ip.config_return_buffer_size_in_kbytes - DCN3_15_MIN_COMPBUF_SIZE_KB; - int remaining_det_segs = max_usable_det / DCN3_15_CRB_SEGMENT_SIZE_KB; - bool pixel_rate_crb = allow_pixel_rate_crb(dc, context); - - DC_FP_START(); - dcn31x_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); - DC_FP_END(); - - for (i = 0, pipe_cnt = 0, crb_pipes = 0; i < dc->res_pool->pipe_count; i++) { - struct dc_crtc_timing *timing; - - if (!res_ctx->pipe_ctx[i].stream) - continue; - pipe = &res_ctx->pipe_ctx[i]; - timing = &pipe->stream->timing; - - /* - * Immediate flip can be set dynamically after enabling the plane. - * We need to require support for immediate flip or underflow can be - * intermittently experienced depending on peak b/w requirements. - */ - pipes[pipe_cnt].pipe.src.immediate_flip = true; - - pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; - pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch; - pipes[pipe_cnt].pipe.src.dcc_rate = 3; - pipes[pipe_cnt].dout.dsc_input_bpc = 0; - DC_FP_START(); - dcn31_zero_pipe_dcc_fraction(pipes, pipe_cnt); - if (pixel_rate_crb && !pipe->top_pipe && !pipe->prev_odm_pipe) { - int bpp = source_format_to_bpp(pipes[pipe_cnt].pipe.src.source_format); - /* Ceil to crb segment size */ - int approx_det_segs_required_for_pstate = dcn_get_approx_det_segs_required_for_pstate( - &context->bw_ctx.dml.soc, timing->pix_clk_100hz, bpp, DCN3_15_CRB_SEGMENT_SIZE_KB); - - if (approx_det_segs_required_for_pstate <= 2 * DCN3_15_MAX_DET_SEGS) { - bool split_required = approx_det_segs_required_for_pstate > DCN3_15_MAX_DET_SEGS; - split_required = split_required || timing->pix_clk_100hz >= dcn_get_max_non_odm_pix_rate_100hz(&dc->dml.soc); - split_required = split_required || (pipe->plane_state && pipe->plane_state->src_rect.width > 5120); - - /* Minimum 2 segments to allow mpc/odm combine if its used later */ - if (approx_det_segs_required_for_pstate < 2) - approx_det_segs_required_for_pstate = 2; - if (split_required) - approx_det_segs_required_for_pstate += approx_det_segs_required_for_pstate % 2; - pipes[pipe_cnt].pipe.src.det_size_override = approx_det_segs_required_for_pstate; - remaining_det_segs -= approx_det_segs_required_for_pstate; - } else - remaining_det_segs = -1; - crb_pipes++; - } - DC_FP_END(); - - if (pipes[pipe_cnt].dout.dsc_enable) { - switch (timing->display_color_depth) { - case COLOR_DEPTH_888: - pipes[pipe_cnt].dout.dsc_input_bpc = 8; - break; - case COLOR_DEPTH_101010: - pipes[pipe_cnt].dout.dsc_input_bpc = 10; - break; - case COLOR_DEPTH_121212: - pipes[pipe_cnt].dout.dsc_input_bpc = 12; - break; - default: - ASSERT(0); - break; - } - } - pipe_cnt++; - } - - /* Spread remaining unreserved crb evenly among all pipes*/ - if (pixel_rate_crb) { - for (i = 0, pipe_cnt = 0, crb_idx = 0; i < dc->res_pool->pipe_count; i++) { - pipe = &res_ctx->pipe_ctx[i]; - if (!pipe->stream) - continue; - - /* Do not use asymetric crb if not enough for pstate support */ - if (remaining_det_segs < 0) { - pipes[pipe_cnt].pipe.src.det_size_override = 0; - pipe_cnt++; - continue; - } - - if (!pipe->top_pipe && !pipe->prev_odm_pipe) { - bool split_required = pipe->stream->timing.pix_clk_100hz >= dcn_get_max_non_odm_pix_rate_100hz(&dc->dml.soc) - || (pipe->plane_state && pipe->plane_state->src_rect.width > 5120); - - if (remaining_det_segs > MIN_RESERVED_DET_SEGS) - pipes[pipe_cnt].pipe.src.det_size_override += (remaining_det_segs - MIN_RESERVED_DET_SEGS) / crb_pipes + - (crb_idx < (remaining_det_segs - MIN_RESERVED_DET_SEGS) % crb_pipes ? 1 : 0); - if (pipes[pipe_cnt].pipe.src.det_size_override > 2 * DCN3_15_MAX_DET_SEGS) { - /* Clamp to 2 pipe split max det segments */ - remaining_det_segs += pipes[pipe_cnt].pipe.src.det_size_override - 2 * (DCN3_15_MAX_DET_SEGS); - pipes[pipe_cnt].pipe.src.det_size_override = 2 * DCN3_15_MAX_DET_SEGS; - } - if (pipes[pipe_cnt].pipe.src.det_size_override > DCN3_15_MAX_DET_SEGS || split_required) { - /* If we are splitting we must have an even number of segments */ - remaining_det_segs += pipes[pipe_cnt].pipe.src.det_size_override % 2; - pipes[pipe_cnt].pipe.src.det_size_override -= pipes[pipe_cnt].pipe.src.det_size_override % 2; - } - /* Convert segments into size for DML use */ - pipes[pipe_cnt].pipe.src.det_size_override *= DCN3_15_CRB_SEGMENT_SIZE_KB; - - crb_idx++; - } - pipe_cnt++; - } - } - - if (pipe_cnt) - context->bw_ctx.dml.ip.det_buffer_size_kbytes = - (max_usable_det / DCN3_15_CRB_SEGMENT_SIZE_KB / pipe_cnt) * DCN3_15_CRB_SEGMENT_SIZE_KB; - if (context->bw_ctx.dml.ip.det_buffer_size_kbytes > DCN3_15_MAX_DET_SIZE) - context->bw_ctx.dml.ip.det_buffer_size_kbytes = DCN3_15_MAX_DET_SIZE; - - dc->config.enable_4to1MPC = false; - if (pipe_cnt == 1 && pipe->plane_state && !dc->debug.disable_z9_mpc) { - if (is_dual_plane(pipe->plane_state->format) - && pipe->plane_state->src_rect.width <= 1920 && pipe->plane_state->src_rect.height <= 1080) { - dc->config.enable_4to1MPC = true; - context->bw_ctx.dml.ip.det_buffer_size_kbytes = - (max_usable_det / DCN3_15_CRB_SEGMENT_SIZE_KB / 4) * DCN3_15_CRB_SEGMENT_SIZE_KB; - } else if (!is_dual_plane(pipe->plane_state->format) - && pipe->plane_state->src_rect.width <= 5120 - && pipe->stream->timing.pix_clk_100hz < dcn_get_max_non_odm_pix_rate_100hz(&dc->dml.soc)) { - /* Limit to 5k max to avoid forced pipe split when there is not enough detile for swath */ - context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192; - pipes[0].pipe.src.unbounded_req_mode = true; - } - } - - return pipe_cnt; -} - -static void dcn315_get_panel_config_defaults(struct dc_panel_config *panel_config) -{ - *panel_config = panel_config_defaults; -} - -static struct dc_cap_funcs cap_funcs = { - .get_dcc_compression_cap = dcn20_get_dcc_compression_cap -}; - -static struct resource_funcs dcn315_res_pool_funcs = { - .destroy = dcn315_destroy_resource_pool, - .link_enc_create = dcn31_link_encoder_create, - .link_enc_create_minimal = dcn31_link_enc_create_minimal, - .link_encs_assign = link_enc_cfg_link_encs_assign, - .link_enc_unassign = link_enc_cfg_link_enc_unassign, - .panel_cntl_create = dcn31_panel_cntl_create, - .validate_bandwidth = dcn31_validate_bandwidth, - .calculate_wm_and_dlg = dcn31_calculate_wm_and_dlg, - .update_soc_for_wm_a = dcn315_update_soc_for_wm_a, - .populate_dml_pipes = dcn315_populate_dml_pipes_from_context, - .acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer, - .release_pipe = dcn20_release_pipe, - .add_stream_to_ctx = dcn30_add_stream_to_ctx, - .add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource, - .remove_stream_from_ctx = dcn20_remove_stream_from_ctx, - .populate_dml_writeback_from_context = dcn31_populate_dml_writeback_from_context, - .set_mcif_arb_params = dcn31_set_mcif_arb_params, - .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link, - .acquire_post_bldn_3dlut = dcn30_acquire_post_bldn_3dlut, - .release_post_bldn_3dlut = dcn30_release_post_bldn_3dlut, - .update_bw_bounding_box = dcn315_update_bw_bounding_box, - .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, - .get_panel_config_defaults = dcn315_get_panel_config_defaults, -}; - -static bool dcn315_resource_construct( - uint8_t num_virtual_links, - struct dc *dc, - struct dcn315_resource_pool *pool) -{ - int i; - struct dc_context *ctx = dc->ctx; - struct irq_service_init_data init_data; - - ctx->dc_bios->regs = &bios_regs; - - pool->base.res_cap = &res_cap_dcn31; - - pool->base.funcs = &dcn315_res_pool_funcs; - - /************************************************* - * Resource + asic cap harcoding * - *************************************************/ - pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE; - pool->base.pipe_count = pool->base.res_cap->num_timing_generator; - pool->base.mpcc_count = pool->base.res_cap->num_timing_generator; - dc->caps.max_downscale_ratio = 600; - dc->caps.i2c_speed_in_khz = 100; - dc->caps.i2c_speed_in_khz_hdcp = 100; - dc->caps.max_cursor_size = 256; - dc->caps.min_horizontal_blanking_period = 80; - dc->caps.dmdata_alloc_size = 2048; - dc->caps.max_slave_planes = 2; - dc->caps.max_slave_yuv_planes = 2; - dc->caps.max_slave_rgb_planes = 2; - dc->caps.post_blend_color_processing = true; - dc->caps.force_dp_tps4_for_cp2520 = true; - if (dc->config.forceHBR2CP2520) - dc->caps.force_dp_tps4_for_cp2520 = false; - dc->caps.dp_hpo = true; - dc->caps.dp_hdmi21_pcon_support = true; - dc->caps.edp_dsc_support = true; - dc->caps.extended_aux_timeout_support = true; - dc->caps.dmcub_support = true; - dc->caps.is_apu = true; - - /* Color pipeline capabilities */ - dc->caps.color.dpp.dcn_arch = 1; - dc->caps.color.dpp.input_lut_shared = 0; - dc->caps.color.dpp.icsc = 1; - dc->caps.color.dpp.dgam_ram = 0; // must use gamma_corr - dc->caps.color.dpp.dgam_rom_caps.srgb = 1; - dc->caps.color.dpp.dgam_rom_caps.bt2020 = 1; - dc->caps.color.dpp.dgam_rom_caps.gamma2_2 = 1; - dc->caps.color.dpp.dgam_rom_caps.pq = 1; - dc->caps.color.dpp.dgam_rom_caps.hlg = 1; - dc->caps.color.dpp.post_csc = 1; - dc->caps.color.dpp.gamma_corr = 1; - dc->caps.color.dpp.dgam_rom_for_yuv = 0; - - dc->caps.color.dpp.hw_3d_lut = 1; - dc->caps.color.dpp.ogam_ram = 1; - // no OGAM ROM on DCN301 - dc->caps.color.dpp.ogam_rom_caps.srgb = 0; - dc->caps.color.dpp.ogam_rom_caps.bt2020 = 0; - dc->caps.color.dpp.ogam_rom_caps.gamma2_2 = 0; - dc->caps.color.dpp.ogam_rom_caps.pq = 0; - dc->caps.color.dpp.ogam_rom_caps.hlg = 0; - dc->caps.color.dpp.ocsc = 0; - - dc->caps.color.mpc.gamut_remap = 1; - dc->caps.color.mpc.num_3dluts = pool->base.res_cap->num_mpc_3dlut; //2 - dc->caps.color.mpc.ogam_ram = 1; - dc->caps.color.mpc.ogam_rom_caps.srgb = 0; - dc->caps.color.mpc.ogam_rom_caps.bt2020 = 0; - dc->caps.color.mpc.ogam_rom_caps.gamma2_2 = 0; - dc->caps.color.mpc.ogam_rom_caps.pq = 0; - dc->caps.color.mpc.ogam_rom_caps.hlg = 0; - dc->caps.color.mpc.ocsc = 1; - - /* read VBIOS LTTPR caps */ - { - if (ctx->dc_bios->funcs->get_lttpr_caps) { - enum bp_result bp_query_result; - uint8_t is_vbios_lttpr_enable = 0; - - bp_query_result = ctx->dc_bios->funcs->get_lttpr_caps(ctx->dc_bios, &is_vbios_lttpr_enable); - dc->caps.vbios_lttpr_enable = (bp_query_result == BP_RESULT_OK) && !!is_vbios_lttpr_enable; - } - - /* interop bit is implicit */ - { - dc->caps.vbios_lttpr_aware = true; - } - } - - if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) - dc->debug = debug_defaults_drv; - - // Init the vm_helper - if (dc->vm_helper) - vm_helper_init(dc->vm_helper, 16); - - /************************************************* - * Create resources * - *************************************************/ - - /* Clock Sources for Pixel Clock*/ - pool->base.clock_sources[DCN31_CLK_SRC_PLL0] = - dcn31_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL0, - &clk_src_regs[0], false); - pool->base.clock_sources[DCN31_CLK_SRC_PLL1] = - dcn31_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL1, - &clk_src_regs[1], false); - pool->base.clock_sources[DCN31_CLK_SRC_PLL2] = - dcn31_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL2, - &clk_src_regs[2], false); - pool->base.clock_sources[DCN31_CLK_SRC_PLL3] = - dcn31_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL3, - &clk_src_regs[3], false); - pool->base.clock_sources[DCN31_CLK_SRC_PLL4] = - dcn31_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL4, - &clk_src_regs[4], false); - - pool->base.clk_src_count = DCN30_CLK_SRC_TOTAL; - - /* todo: not reuse phy_pll registers */ - pool->base.dp_clock_source = - dcn31_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_ID_DP_DTO, - &clk_src_regs[0], true); - - for (i = 0; i < pool->base.clk_src_count; i++) { - if (pool->base.clock_sources[i] == NULL) { - dm_error("DC: failed to create clock sources!\n"); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - } - - /* TODO: DCCG */ - pool->base.dccg = dccg31_create(ctx, &dccg_regs, &dccg_shift, &dccg_mask); - if (pool->base.dccg == NULL) { - dm_error("DC: failed to create dccg!\n"); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - - /* TODO: IRQ */ - init_data.ctx = dc->ctx; - pool->base.irqs = dal_irq_service_dcn315_create(&init_data); - if (!pool->base.irqs) - goto create_fail; - - /* HUBBUB */ - pool->base.hubbub = dcn31_hubbub_create(ctx); - if (pool->base.hubbub == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create hubbub!\n"); - goto create_fail; - } - - /* HUBPs, DPPs, OPPs and TGs */ - for (i = 0; i < pool->base.pipe_count; i++) { - pool->base.hubps[i] = dcn31_hubp_create(ctx, i); - if (pool->base.hubps[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create hubps!\n"); - goto create_fail; - } - - pool->base.dpps[i] = dcn31_dpp_create(ctx, i); - if (pool->base.dpps[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create dpps!\n"); - goto create_fail; - } - } - - for (i = 0; i < pool->base.res_cap->num_opp; i++) { - pool->base.opps[i] = dcn31_opp_create(ctx, i); - if (pool->base.opps[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create output pixel processor!\n"); - goto create_fail; - } - } - - for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { - pool->base.timing_generators[i] = dcn31_timing_generator_create( - ctx, i); - if (pool->base.timing_generators[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create tg!\n"); - goto create_fail; - } - } - pool->base.timing_generator_count = i; - - /* PSR */ - pool->base.psr = dmub_psr_create(ctx); - if (pool->base.psr == NULL) { - dm_error("DC: failed to create psr obj!\n"); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - - /* ABM */ - for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { - pool->base.multiple_abms[i] = dmub_abm_create(ctx, - &abm_regs[i], - &abm_shift, - &abm_mask); - if (pool->base.multiple_abms[i] == NULL) { - dm_error("DC: failed to create abm for pipe %d!\n", i); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - } - - /* MPC and DSC */ - pool->base.mpc = dcn31_mpc_create(ctx, pool->base.mpcc_count, pool->base.res_cap->num_mpc_3dlut); - if (pool->base.mpc == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create mpc!\n"); - goto create_fail; - } - - for (i = 0; i < pool->base.res_cap->num_dsc; i++) { - pool->base.dscs[i] = dcn31_dsc_create(ctx, i); - if (pool->base.dscs[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create display stream compressor %d!\n", i); - goto create_fail; - } - } - - /* DWB and MMHUBBUB */ - if (!dcn31_dwbc_create(ctx, &pool->base)) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create dwbc!\n"); - goto create_fail; - } - - if (!dcn31_mmhubbub_create(ctx, &pool->base)) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create mcif_wb!\n"); - goto create_fail; - } - - /* AUX and I2C */ - for (i = 0; i < pool->base.res_cap->num_ddc; i++) { - pool->base.engines[i] = dcn31_aux_engine_create(ctx, i); - if (pool->base.engines[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC:failed to create aux engine!!\n"); - goto create_fail; - } - pool->base.hw_i2cs[i] = dcn31_i2c_hw_create(ctx, i); - if (pool->base.hw_i2cs[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC:failed to create hw i2c!!\n"); - goto create_fail; - } - pool->base.sw_i2cs[i] = NULL; - } - - /* Audio, Stream Encoders including HPO and virtual, MPC 3D LUTs */ - if (!resource_construct(num_virtual_links, dc, &pool->base, - &res_create_funcs)) - goto create_fail; - - /* HW Sequencer and Plane caps */ - dcn31_hw_sequencer_construct(dc); - - dc->caps.max_planes = pool->base.pipe_count; - - for (i = 0; i < dc->caps.max_planes; ++i) - dc->caps.planes[i] = plane_cap; - - dc->cap_funcs = cap_funcs; - - dc->dcn_ip->max_num_dpp = dcn3_15_ip.max_num_dpp; - - return true; - -create_fail: - - dcn315_resource_destruct(pool); - - return false; -} - -struct resource_pool *dcn315_create_resource_pool( - const struct dc_init_data *init_data, - struct dc *dc) -{ - struct dcn315_resource_pool *pool = - kzalloc(sizeof(struct dcn315_resource_pool), GFP_KERNEL); - - if (!pool) - return NULL; - - if (dcn315_resource_construct(init_data->num_virtual_links, dc, pool)) - return &pool->base; - - BREAK_TO_DEBUGGER(); - kfree(pool); - return NULL; -} diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h deleted file mode 100644 index 22849eaa6f24..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright 2021 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef _DCN315_RESOURCE_H_ -#define _DCN315_RESOURCE_H_ - -#include "core_types.h" - -#define TO_DCN315_RES_POOL(pool)\ - container_of(pool, struct dcn315_resource_pool, base) - -extern struct _vcs_dpi_ip_params_st dcn3_15_ip; - -struct dcn315_resource_pool { - struct resource_pool base; -}; - -struct resource_pool *dcn315_create_resource_pool( - const struct dc_init_data *init_data, - struct dc *dc); - -#endif /* _DCN315_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/Makefile b/drivers/gpu/drm/amd/display/dc/dcn316/Makefile deleted file mode 100644 index 819d44a9439b..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn316/Makefile +++ /dev/null @@ -1,30 +0,0 @@ -# -# Copyright 2021 Advanced Micro Devices, Inc. -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. -# -# Authors: AMD -# -# Makefile for dcn316. - -DCN316 = dcn316_resource.o - -AMD_DAL_DCN316 = $(addprefix $(AMDDALPATH)/dc/dcn316/,$(DCN316)) - -AMD_DISPLAY_FILES += $(AMD_DAL_DCN316) diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c deleted file mode 100644 index b9753d4606f8..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c +++ /dev/null @@ -1,2038 +0,0 @@ -/* - * Copyright 2021 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - - -#include "dm_services.h" -#include "dc.h" - -#include "dcn31/dcn31_init.h" - -#include "resource.h" -#include "include/irq_service_interface.h" -#include "dcn316_resource.h" - -#include "dcn20/dcn20_resource.h" -#include "dcn30/dcn30_resource.h" -#include "dcn31/dcn31_resource.h" - -#include "dcn10/dcn10_ipp.h" -#include "dcn30/dcn30_hubbub.h" -#include "dcn31/dcn31_hubbub.h" -#include "dcn30/dcn30_mpc.h" -#include "dcn31/dcn31_hubp.h" -#include "irq/dcn31/irq_service_dcn31.h" -#include "dcn30/dcn30_dpp.h" -#include "dcn31/dcn31_optc.h" -#include "dcn20/dcn20_hwseq.h" -#include "dcn30/dcn30_hwseq.h" -#include "dce110/dce110_hwseq.h" -#include "dcn30/dcn30_opp.h" -#include "dcn20/dcn20_dsc.h" -#include "dcn30/dcn30_vpg.h" -#include "dcn30/dcn30_afmt.h" -#include "dcn30/dcn30_dio_stream_encoder.h" -#include "dcn31/dcn31_hpo_dp_stream_encoder.h" -#include "dcn31/dcn31_hpo_dp_link_encoder.h" -#include "dcn31/dcn31_apg.h" -#include "dcn31/dcn31_dio_link_encoder.h" -#include "dcn31/dcn31_vpg.h" -#include "dcn31/dcn31_afmt.h" -#include "dce/dce_clock_source.h" -#include "dce/dce_audio.h" -#include "dce/dce_hwseq.h" -#include "clk_mgr.h" -#include "virtual/virtual_stream_encoder.h" -#include "dce110/dce110_resource.h" -#include "dml/display_mode_vba.h" -#include "dml/dcn31/dcn31_fpu.h" -#include "dcn31/dcn31_dccg.h" -#include "dcn10/dcn10_resource.h" -#include "dcn31/dcn31_panel_cntl.h" - -#include "dcn30/dcn30_dwb.h" -#include "dcn30/dcn30_mmhubbub.h" - -#include "dcn/dcn_3_1_6_offset.h" -#include "dcn/dcn_3_1_6_sh_mask.h" -#include "dpcs/dpcs_4_2_3_offset.h" -#include "dpcs/dpcs_4_2_3_sh_mask.h" - -#define regBIF_BX1_BIOS_SCRATCH_2 0x003a -#define regBIF_BX1_BIOS_SCRATCH_2_BASE_IDX 1 -#define regBIF_BX1_BIOS_SCRATCH_3 0x003b -#define regBIF_BX1_BIOS_SCRATCH_3_BASE_IDX 1 -#define regBIF_BX1_BIOS_SCRATCH_6 0x003e -#define regBIF_BX1_BIOS_SCRATCH_6_BASE_IDX 1 - -#define regDCHUBBUB_DEBUG_CTRL_0 0x04d6 -#define regDCHUBBUB_DEBUG_CTRL_0_BASE_IDX 2 -#define DCHUBBUB_DEBUG_CTRL_0__DET_DEPTH__SHIFT 0x10 -#define DCHUBBUB_DEBUG_CTRL_0__DET_DEPTH_MASK 0x01FF0000L - -#define DCN_BASE__INST0_SEG0 0x00000012 -#define DCN_BASE__INST0_SEG1 0x000000C0 -#define DCN_BASE__INST0_SEG2 0x000034C0 -#define DCN_BASE__INST0_SEG3 0x00009000 -#define DCN_BASE__INST0_SEG4 0x02403C00 -#define DCN_BASE__INST0_SEG5 0 - -#define DPCS_BASE__INST0_SEG0 0x00000012 -#define DPCS_BASE__INST0_SEG1 0x000000C0 -#define DPCS_BASE__INST0_SEG2 0x000034C0 -#define DPCS_BASE__INST0_SEG3 0x00009000 -#define DPCS_BASE__INST0_SEG4 0x02403C00 -#define DPCS_BASE__INST0_SEG5 0 - -#define NBIO_BASE__INST0_SEG0 0x00000000 -#define NBIO_BASE__INST0_SEG1 0x00000014 -#define NBIO_BASE__INST0_SEG2 0x00000D20 -#define NBIO_BASE__INST0_SEG3 0x00010400 -#define NBIO_BASE__INST0_SEG4 0x0241B000 -#define NBIO_BASE__INST0_SEG5 0x04040000 - -#include "reg_helper.h" -#include "dce/dmub_abm.h" -#include "dce/dmub_psr.h" -#include "dce/dce_aux.h" -#include "dce/dce_i2c.h" - -#include "dml/dcn30/display_mode_vba_30.h" -#include "vm_helper.h" -#include "dcn20/dcn20_vmid.h" - -#include "link_enc_cfg.h" - -#define DCN3_16_MAX_DET_SIZE 384 -#define DCN3_16_MIN_COMPBUF_SIZE_KB 128 -#define DCN3_16_CRB_SEGMENT_SIZE_KB 64 - -enum dcn31_clk_src_array_id { - DCN31_CLK_SRC_PLL0, - DCN31_CLK_SRC_PLL1, - DCN31_CLK_SRC_PLL2, - DCN31_CLK_SRC_PLL3, - DCN31_CLK_SRC_PLL4, - DCN30_CLK_SRC_TOTAL -}; - -/* begin ********************* - * macros to expend register list macro defined in HW object header file - */ - -/* DCN */ -#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg - -#define BASE(seg) BASE_INNER(seg) - -#define SR(reg_name)\ - .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ - reg ## reg_name - -#define SRI(reg_name, block, id)\ - .reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## reg_name - -#define SRI2(reg_name, block, id)\ - .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ - reg ## reg_name - -#define SRIR(var_name, reg_name, block, id)\ - .var_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## reg_name - -#define SRII(reg_name, block, id)\ - .reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## reg_name - -#define SRII_MPC_RMU(reg_name, block, id)\ - .RMU##_##reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## reg_name - -#define SRII_DWB(reg_name, temp_name, block, id)\ - .reg_name[id] = BASE(reg ## block ## id ## _ ## temp_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## temp_name - -#define SF_DWB2(reg_name, block, id, field_name, post_fix) \ - .field_name = reg_name ## __ ## field_name ## post_fix - -#define DCCG_SRII(reg_name, block, id)\ - .block ## _ ## reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## reg_name - -#define VUPDATE_SRII(reg_name, block, id)\ - .reg_name[id] = BASE(reg ## reg_name ## _ ## block ## id ## _BASE_IDX) + \ - reg ## reg_name ## _ ## block ## id - -/* NBIO */ -#define NBIO_BASE_INNER(seg) \ - NBIO_BASE__INST0_SEG ## seg - -#define NBIO_BASE(seg) \ - NBIO_BASE_INNER(seg) - -#define NBIO_SR(reg_name)\ - .reg_name = NBIO_BASE(regBIF_BX1_ ## reg_name ## _BASE_IDX) + \ - regBIF_BX1_ ## reg_name - -static const struct bios_registers bios_regs = { - NBIO_SR(BIOS_SCRATCH_3), - NBIO_SR(BIOS_SCRATCH_6) -}; - -#define clk_src_regs(index, pllid)\ -[index] = {\ - CS_COMMON_REG_LIST_DCN3_0(index, pllid),\ -} - -static const struct dce110_clk_src_regs clk_src_regs[] = { - clk_src_regs(0, A), - clk_src_regs(1, B), - clk_src_regs(2, C), - clk_src_regs(3, D), - clk_src_regs(4, E) -}; - -static const struct dce110_clk_src_shift cs_shift = { - CS_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT) -}; - -static const struct dce110_clk_src_mask cs_mask = { - CS_COMMON_MASK_SH_LIST_DCN2_0(_MASK) -}; - -#define abm_regs(id)\ -[id] = {\ - ABM_DCN302_REG_LIST(id)\ -} - -static const struct dce_abm_registers abm_regs[] = { - abm_regs(0), - abm_regs(1), - abm_regs(2), - abm_regs(3), -}; - -static const struct dce_abm_shift abm_shift = { - ABM_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dce_abm_mask abm_mask = { - ABM_MASK_SH_LIST_DCN30(_MASK) -}; - -#define audio_regs(id)\ -[id] = {\ - AUD_COMMON_REG_LIST(id)\ -} - -static const struct dce_audio_registers audio_regs[] = { - audio_regs(0), - audio_regs(1), - audio_regs(2), - audio_regs(3), - audio_regs(4), - audio_regs(5), - audio_regs(6) -}; - -#define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\ - SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_INDEX, AZALIA_ENDPOINT_REG_INDEX, mask_sh),\ - SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_DATA, AZALIA_ENDPOINT_REG_DATA, mask_sh),\ - AUD_COMMON_MASK_SH_LIST_BASE(mask_sh) - -static const struct dce_audio_shift audio_shift = { - DCE120_AUD_COMMON_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_audio_mask audio_mask = { - DCE120_AUD_COMMON_MASK_SH_LIST(_MASK) -}; - -#define vpg_regs(id)\ -[id] = {\ - VPG_DCN31_REG_LIST(id)\ -} - -static const struct dcn31_vpg_registers vpg_regs[] = { - vpg_regs(0), - vpg_regs(1), - vpg_regs(2), - vpg_regs(3), - vpg_regs(4), - vpg_regs(5), - vpg_regs(6), - vpg_regs(7), - vpg_regs(8), - vpg_regs(9), -}; - -static const struct dcn31_vpg_shift vpg_shift = { - DCN31_VPG_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn31_vpg_mask vpg_mask = { - DCN31_VPG_MASK_SH_LIST(_MASK) -}; - -#define afmt_regs(id)\ -[id] = {\ - AFMT_DCN31_REG_LIST(id)\ -} - -static const struct dcn31_afmt_registers afmt_regs[] = { - afmt_regs(0), - afmt_regs(1), - afmt_regs(2), - afmt_regs(3), - afmt_regs(4), - afmt_regs(5) -}; - -static const struct dcn31_afmt_shift afmt_shift = { - DCN31_AFMT_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn31_afmt_mask afmt_mask = { - DCN31_AFMT_MASK_SH_LIST(_MASK) -}; - - -#define apg_regs(id)\ -[id] = {\ - APG_DCN31_REG_LIST(id)\ -} - -static const struct dcn31_apg_registers apg_regs[] = { - apg_regs(0), - apg_regs(1), - apg_regs(2), - apg_regs(3) -}; - -static const struct dcn31_apg_shift apg_shift = { - DCN31_APG_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn31_apg_mask apg_mask = { - DCN31_APG_MASK_SH_LIST(_MASK) -}; - - -#define stream_enc_regs(id)\ -[id] = {\ - SE_DCN3_REG_LIST(id)\ -} - -static const struct dcn10_stream_enc_registers stream_enc_regs[] = { - stream_enc_regs(0), - stream_enc_regs(1), - stream_enc_regs(2), - stream_enc_regs(3), - stream_enc_regs(4) -}; - -static const struct dcn10_stream_encoder_shift se_shift = { - SE_COMMON_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dcn10_stream_encoder_mask se_mask = { - SE_COMMON_MASK_SH_LIST_DCN30(_MASK) -}; - - -#define aux_regs(id)\ -[id] = {\ - DCN2_AUX_REG_LIST(id)\ -} - -static const struct dcn10_link_enc_aux_registers link_enc_aux_regs[] = { - aux_regs(0), - aux_regs(1), - aux_regs(2), - aux_regs(3), - aux_regs(4) -}; - -#define hpd_regs(id)\ -[id] = {\ - HPD_REG_LIST(id)\ -} - -static const struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[] = { - hpd_regs(0), - hpd_regs(1), - hpd_regs(2), - hpd_regs(3), - hpd_regs(4) -}; - -#define link_regs(id, phyid)\ -[id] = {\ - LE_DCN31_REG_LIST(id), \ - UNIPHY_DCN2_REG_LIST(phyid), \ - DPCS_DCN31_REG_LIST(id), \ -} - -static const struct dce110_aux_registers_shift aux_shift = { - DCN_AUX_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce110_aux_registers_mask aux_mask = { - DCN_AUX_MASK_SH_LIST(_MASK) -}; - -static const struct dcn10_link_enc_registers link_enc_regs[] = { - link_regs(0, A), - link_regs(1, B), - link_regs(2, C), - link_regs(3, D), - link_regs(4, E) -}; - -static const struct dcn10_link_enc_shift le_shift = { - LINK_ENCODER_MASK_SH_LIST_DCN31(__SHIFT), \ - DPCS_DCN31_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn10_link_enc_mask le_mask = { - LINK_ENCODER_MASK_SH_LIST_DCN31(_MASK), \ - DPCS_DCN31_MASK_SH_LIST(_MASK) -}; - - - -#define hpo_dp_stream_encoder_reg_list(id)\ -[id] = {\ - DCN3_1_HPO_DP_STREAM_ENC_REG_LIST(id)\ -} - -static const struct dcn31_hpo_dp_stream_encoder_registers hpo_dp_stream_enc_regs[] = { - hpo_dp_stream_encoder_reg_list(0), - hpo_dp_stream_encoder_reg_list(1), - hpo_dp_stream_encoder_reg_list(2), - hpo_dp_stream_encoder_reg_list(3), -}; - -static const struct dcn31_hpo_dp_stream_encoder_shift hpo_dp_se_shift = { - DCN3_1_HPO_DP_STREAM_ENC_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn31_hpo_dp_stream_encoder_mask hpo_dp_se_mask = { - DCN3_1_HPO_DP_STREAM_ENC_MASK_SH_LIST(_MASK) -}; - - -#define hpo_dp_link_encoder_reg_list(id)\ -[id] = {\ - DCN3_1_HPO_DP_LINK_ENC_REG_LIST(id),\ - DCN3_1_RDPCSTX_REG_LIST(0),\ - DCN3_1_RDPCSTX_REG_LIST(1),\ - DCN3_1_RDPCSTX_REG_LIST(2),\ - DCN3_1_RDPCSTX_REG_LIST(3),\ - DCN3_1_RDPCSTX_REG_LIST(4)\ -} - -static const struct dcn31_hpo_dp_link_encoder_registers hpo_dp_link_enc_regs[] = { - hpo_dp_link_encoder_reg_list(0), - hpo_dp_link_encoder_reg_list(1), -}; - -static const struct dcn31_hpo_dp_link_encoder_shift hpo_dp_le_shift = { - DCN3_1_HPO_DP_LINK_ENC_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn31_hpo_dp_link_encoder_mask hpo_dp_le_mask = { - DCN3_1_HPO_DP_LINK_ENC_MASK_SH_LIST(_MASK) -}; - - -#define dpp_regs(id)\ -[id] = {\ - DPP_REG_LIST_DCN30(id),\ -} - -static const struct dcn3_dpp_registers dpp_regs[] = { - dpp_regs(0), - dpp_regs(1), - dpp_regs(2), - dpp_regs(3) -}; - -static const struct dcn3_dpp_shift tf_shift = { - DPP_REG_LIST_SH_MASK_DCN30(__SHIFT) -}; - -static const struct dcn3_dpp_mask tf_mask = { - DPP_REG_LIST_SH_MASK_DCN30(_MASK) -}; - -#define opp_regs(id)\ -[id] = {\ - OPP_REG_LIST_DCN30(id),\ -} - -static const struct dcn20_opp_registers opp_regs[] = { - opp_regs(0), - opp_regs(1), - opp_regs(2), - opp_regs(3) -}; - -static const struct dcn20_opp_shift opp_shift = { - OPP_MASK_SH_LIST_DCN20(__SHIFT) -}; - -static const struct dcn20_opp_mask opp_mask = { - OPP_MASK_SH_LIST_DCN20(_MASK) -}; - -#define aux_engine_regs(id)\ -[id] = {\ - AUX_COMMON_REG_LIST0(id), \ - .AUXN_IMPCAL = 0, \ - .AUXP_IMPCAL = 0, \ - .AUX_RESET_MASK = DP_AUX0_AUX_CONTROL__AUX_RESET_MASK, \ -} - -static const struct dce110_aux_registers aux_engine_regs[] = { - aux_engine_regs(0), - aux_engine_regs(1), - aux_engine_regs(2), - aux_engine_regs(3), - aux_engine_regs(4) -}; - -#define dwbc_regs_dcn3(id)\ -[id] = {\ - DWBC_COMMON_REG_LIST_DCN30(id),\ -} - -static const struct dcn30_dwbc_registers dwbc30_regs[] = { - dwbc_regs_dcn3(0), -}; - -static const struct dcn30_dwbc_shift dwbc30_shift = { - DWBC_COMMON_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dcn30_dwbc_mask dwbc30_mask = { - DWBC_COMMON_MASK_SH_LIST_DCN30(_MASK) -}; - -#define mcif_wb_regs_dcn3(id)\ -[id] = {\ - MCIF_WB_COMMON_REG_LIST_DCN30(id),\ -} - -static const struct dcn30_mmhubbub_registers mcif_wb30_regs[] = { - mcif_wb_regs_dcn3(0) -}; - -static const struct dcn30_mmhubbub_shift mcif_wb30_shift = { - MCIF_WB_COMMON_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dcn30_mmhubbub_mask mcif_wb30_mask = { - MCIF_WB_COMMON_MASK_SH_LIST_DCN30(_MASK) -}; - -#define dsc_regsDCN20(id)\ -[id] = {\ - DSC_REG_LIST_DCN20(id)\ -} - -static const struct dcn20_dsc_registers dsc_regs[] = { - dsc_regsDCN20(0), - dsc_regsDCN20(1), - dsc_regsDCN20(2) -}; - -static const struct dcn20_dsc_shift dsc_shift = { - DSC_REG_LIST_SH_MASK_DCN20(__SHIFT) -}; - -static const struct dcn20_dsc_mask dsc_mask = { - DSC_REG_LIST_SH_MASK_DCN20(_MASK) -}; - -static const struct dcn30_mpc_registers mpc_regs = { - MPC_REG_LIST_DCN3_0(0), - MPC_REG_LIST_DCN3_0(1), - MPC_REG_LIST_DCN3_0(2), - MPC_REG_LIST_DCN3_0(3), - MPC_OUT_MUX_REG_LIST_DCN3_0(0), - MPC_OUT_MUX_REG_LIST_DCN3_0(1), - MPC_OUT_MUX_REG_LIST_DCN3_0(2), - MPC_OUT_MUX_REG_LIST_DCN3_0(3), - MPC_RMU_GLOBAL_REG_LIST_DCN3AG, - MPC_RMU_REG_LIST_DCN3AG(0), - MPC_RMU_REG_LIST_DCN3AG(1), - //MPC_RMU_REG_LIST_DCN3AG(2), - MPC_DWB_MUX_REG_LIST_DCN3_0(0), -}; - -static const struct dcn30_mpc_shift mpc_shift = { - MPC_COMMON_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dcn30_mpc_mask mpc_mask = { - MPC_COMMON_MASK_SH_LIST_DCN30(_MASK) -}; - -#define optc_regs(id)\ -[id] = {OPTC_COMMON_REG_LIST_DCN3_1(id)} - -static const struct dcn_optc_registers optc_regs[] = { - optc_regs(0), - optc_regs(1), - optc_regs(2), - optc_regs(3) -}; - -static const struct dcn_optc_shift optc_shift = { - OPTC_COMMON_MASK_SH_LIST_DCN3_1(__SHIFT) -}; - -static const struct dcn_optc_mask optc_mask = { - OPTC_COMMON_MASK_SH_LIST_DCN3_1(_MASK) -}; - -#define hubp_regs(id)\ -[id] = {\ - HUBP_REG_LIST_DCN30(id)\ -} - -static const struct dcn_hubp2_registers hubp_regs[] = { - hubp_regs(0), - hubp_regs(1), - hubp_regs(2), - hubp_regs(3) -}; - - -static const struct dcn_hubp2_shift hubp_shift = { - HUBP_MASK_SH_LIST_DCN31(__SHIFT) -}; - -static const struct dcn_hubp2_mask hubp_mask = { - HUBP_MASK_SH_LIST_DCN31(_MASK) -}; -static const struct dcn_hubbub_registers hubbub_reg = { - HUBBUB_REG_LIST_DCN31(0) -}; - -static const struct dcn_hubbub_shift hubbub_shift = { - HUBBUB_MASK_SH_LIST_DCN31(__SHIFT) -}; - -static const struct dcn_hubbub_mask hubbub_mask = { - HUBBUB_MASK_SH_LIST_DCN31(_MASK) -}; - -static const struct dccg_registers dccg_regs = { - DCCG_REG_LIST_DCN31() -}; - -static const struct dccg_shift dccg_shift = { - DCCG_MASK_SH_LIST_DCN31(__SHIFT) -}; - -static const struct dccg_mask dccg_mask = { - DCCG_MASK_SH_LIST_DCN31(_MASK) -}; - - -#define SRII2(reg_name_pre, reg_name_post, id)\ - .reg_name_pre ## _ ## reg_name_post[id] = BASE(reg ## reg_name_pre \ - ## id ## _ ## reg_name_post ## _BASE_IDX) + \ - reg ## reg_name_pre ## id ## _ ## reg_name_post - - -#define HWSEQ_DCN31_REG_LIST()\ - SR(DCHUBBUB_GLOBAL_TIMER_CNTL), \ - SR(DCHUBBUB_ARB_HOSTVM_CNTL), \ - SR(DIO_MEM_PWR_CTRL), \ - SR(ODM_MEM_PWR_CTRL3), \ - SR(DMU_MEM_PWR_CNTL), \ - SR(MMHUBBUB_MEM_PWR_CNTL), \ - SR(DCCG_GATE_DISABLE_CNTL), \ - SR(DCCG_GATE_DISABLE_CNTL2), \ - SR(DCFCLK_CNTL),\ - SR(DC_MEM_GLOBAL_PWR_REQ_CNTL), \ - SRII(PIXEL_RATE_CNTL, OTG, 0), \ - SRII(PIXEL_RATE_CNTL, OTG, 1),\ - SRII(PIXEL_RATE_CNTL, OTG, 2),\ - SRII(PIXEL_RATE_CNTL, OTG, 3),\ - SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 0),\ - SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 1),\ - SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 2),\ - SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 3),\ - SR(MICROSECOND_TIME_BASE_DIV), \ - SR(MILLISECOND_TIME_BASE_DIV), \ - SR(DISPCLK_FREQ_CHANGE_CNTL), \ - SR(RBBMIF_TIMEOUT_DIS), \ - SR(RBBMIF_TIMEOUT_DIS_2), \ - SR(DCHUBBUB_CRC_CTRL), \ - SR(DPP_TOP0_DPP_CRC_CTRL), \ - SR(DPP_TOP0_DPP_CRC_VAL_B_A), \ - SR(DPP_TOP0_DPP_CRC_VAL_R_G), \ - SR(MPC_CRC_CTRL), \ - SR(MPC_CRC_RESULT_GB), \ - SR(MPC_CRC_RESULT_C), \ - SR(MPC_CRC_RESULT_AR), \ - SR(DOMAIN0_PG_CONFIG), \ - SR(DOMAIN1_PG_CONFIG), \ - SR(DOMAIN2_PG_CONFIG), \ - SR(DOMAIN3_PG_CONFIG), \ - SR(DOMAIN16_PG_CONFIG), \ - SR(DOMAIN17_PG_CONFIG), \ - SR(DOMAIN18_PG_CONFIG), \ - SR(DOMAIN0_PG_STATUS), \ - SR(DOMAIN1_PG_STATUS), \ - SR(DOMAIN2_PG_STATUS), \ - SR(DOMAIN3_PG_STATUS), \ - SR(DOMAIN16_PG_STATUS), \ - SR(DOMAIN17_PG_STATUS), \ - SR(DOMAIN18_PG_STATUS), \ - SR(D1VGA_CONTROL), \ - SR(D2VGA_CONTROL), \ - SR(D3VGA_CONTROL), \ - SR(D4VGA_CONTROL), \ - SR(D5VGA_CONTROL), \ - SR(D6VGA_CONTROL), \ - SR(DC_IP_REQUEST_CNTL), \ - SR(AZALIA_AUDIO_DTO), \ - SR(AZALIA_CONTROLLER_CLOCK_GATING), \ - SR(HPO_TOP_HW_CONTROL) - -static const struct dce_hwseq_registers hwseq_reg = { - HWSEQ_DCN31_REG_LIST() -}; - -#define HWSEQ_DCN31_MASK_SH_LIST(mask_sh)\ - HWSEQ_DCN_MASK_SH_LIST(mask_sh), \ - HWS_SF(, DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_REFDIV, mask_sh), \ - HWS_SF(, DCHUBBUB_ARB_HOSTVM_CNTL, DISABLE_HOSTVM_FORCE_ALLOW_PSTATE, mask_sh), \ - HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN1_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN1_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN2_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN2_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN3_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN3_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN16_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN16_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN17_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN17_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN18_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN18_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN0_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN1_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN2_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN3_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN16_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN17_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN18_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DC_IP_REQUEST_CNTL, IP_REQUEST_EN, mask_sh), \ - HWS_SF(, AZALIA_AUDIO_DTO, AZALIA_AUDIO_DTO_MODULE, mask_sh), \ - HWS_SF(, HPO_TOP_CLOCK_CONTROL, HPO_HDMISTREAMCLK_G_GATE_DIS, mask_sh), \ - HWS_SF(, DMU_MEM_PWR_CNTL, DMCU_ERAM_MEM_PWR_FORCE, mask_sh), \ - HWS_SF(, ODM_MEM_PWR_CTRL3, ODM_MEM_UNASSIGNED_PWR_MODE, mask_sh), \ - HWS_SF(, ODM_MEM_PWR_CTRL3, ODM_MEM_VBLANK_PWR_MODE, mask_sh), \ - HWS_SF(, MMHUBBUB_MEM_PWR_CNTL, VGA_MEM_PWR_FORCE, mask_sh), \ - HWS_SF(, DIO_MEM_PWR_CTRL, I2C_LIGHT_SLEEP_FORCE, mask_sh), \ - HWS_SF(, HPO_TOP_HW_CONTROL, HPO_IO_EN, mask_sh) - -static const struct dce_hwseq_shift hwseq_shift = { - HWSEQ_DCN31_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_hwseq_mask hwseq_mask = { - HWSEQ_DCN31_MASK_SH_LIST(_MASK) -}; -#define vmid_regs(id)\ -[id] = {\ - DCN20_VMID_REG_LIST(id)\ -} - -static const struct dcn_vmid_registers vmid_regs[] = { - vmid_regs(0), - vmid_regs(1), - vmid_regs(2), - vmid_regs(3), - vmid_regs(4), - vmid_regs(5), - vmid_regs(6), - vmid_regs(7), - vmid_regs(8), - vmid_regs(9), - vmid_regs(10), - vmid_regs(11), - vmid_regs(12), - vmid_regs(13), - vmid_regs(14), - vmid_regs(15) -}; - -static const struct dcn20_vmid_shift vmid_shifts = { - DCN20_VMID_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn20_vmid_mask vmid_masks = { - DCN20_VMID_MASK_SH_LIST(_MASK) -}; - -static const struct resource_caps res_cap_dcn31 = { - .num_timing_generator = 4, - .num_opp = 4, - .num_video_plane = 4, - .num_audio = 5, - .num_stream_encoder = 5, - .num_dig_link_enc = 5, - .num_hpo_dp_stream_encoder = 4, - .num_hpo_dp_link_encoder = 2, - .num_pll = 5, - .num_dwb = 1, - .num_ddc = 5, - .num_vmid = 16, - .num_mpc_3dlut = 2, - .num_dsc = 3, -}; - -static const struct dc_plane_cap plane_cap = { - .type = DC_PLANE_TYPE_DCN_UNIVERSAL, - .per_pixel_alpha = true, - - .pixel_format_support = { - .argb8888 = true, - .nv12 = true, - .fp16 = true, - .p010 = true, - .ayuv = false, - }, - - .max_upscale_factor = { - .argb8888 = 16000, - .nv12 = 16000, - .fp16 = 16000 - }, - - // 6:1 downscaling ratio: 1000/6 = 166.666 - .max_downscale_factor = { - .argb8888 = 167, - .nv12 = 167, - .fp16 = 167 - }, - 64, - 64 -}; - -static const struct dc_debug_options debug_defaults_drv = { - .disable_z10 = true, /*hw not support it*/ - .disable_dmcu = true, - .force_abm_enable = false, - .timing_trace = false, - .clock_trace = true, - .disable_pplib_clock_request = false, - .pipe_split_policy = MPC_SPLIT_DYNAMIC, - .force_single_disp_pipe_split = false, - .disable_dcc = DCC_ENABLE, - .vsr_support = true, - .performance_trace = false, - .max_downscale_src_width = 4096,/*upto true 4k*/ - .disable_pplib_wm_range = false, - .scl_reset_length10 = true, - .sanity_checks = false, - .underflow_assert_delay_us = 0xFFFFFFFF, - .dwb_fi_phase = -1, // -1 = disable, - .dmub_command_table = true, - .pstate_enabled = true, - .use_max_lb = true, - .enable_mem_low_power = { - .bits = { - .vga = true, - .i2c = true, - .dmcu = false, // This is previously known to cause hang on S3 cycles if enabled - .dscl = true, - .cm = true, - .mpc = true, - .optc = true, - .vpg = true, - .afmt = true, - } - }, - .enable_legacy_fast_update = true, - .using_dml2 = false, -}; - -static const struct dc_panel_config panel_config_defaults = { - .psr = { - .disable_psr = false, - .disallow_psrsu = false, - .disallow_replay = false, - }, - .ilr = { - .optimize_edp_link_rate = true, - }, -}; - -static void dcn31_dpp_destroy(struct dpp **dpp) -{ - kfree(TO_DCN20_DPP(*dpp)); - *dpp = NULL; -} - -static struct dpp *dcn31_dpp_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dcn3_dpp *dpp = - kzalloc(sizeof(struct dcn3_dpp), GFP_KERNEL); - - if (!dpp) - return NULL; - - if (dpp3_construct(dpp, ctx, inst, - &dpp_regs[inst], &tf_shift, &tf_mask)) - return &dpp->base; - - BREAK_TO_DEBUGGER(); - kfree(dpp); - return NULL; -} - -static struct output_pixel_processor *dcn31_opp_create( - struct dc_context *ctx, uint32_t inst) -{ - struct dcn20_opp *opp = - kzalloc(sizeof(struct dcn20_opp), GFP_KERNEL); - - if (!opp) { - BREAK_TO_DEBUGGER(); - return NULL; - } - - dcn20_opp_construct(opp, ctx, inst, - &opp_regs[inst], &opp_shift, &opp_mask); - return &opp->base; -} - -static struct dce_aux *dcn31_aux_engine_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct aux_engine_dce110 *aux_engine = - kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL); - - if (!aux_engine) - return NULL; - - dce110_aux_engine_construct(aux_engine, ctx, inst, - SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, - &aux_engine_regs[inst], - &aux_mask, - &aux_shift, - ctx->dc->caps.extended_aux_timeout_support); - - return &aux_engine->base; -} -#define i2c_inst_regs(id) { I2C_HW_ENGINE_COMMON_REG_LIST_DCN30(id) } - -static const struct dce_i2c_registers i2c_hw_regs[] = { - i2c_inst_regs(1), - i2c_inst_regs(2), - i2c_inst_regs(3), - i2c_inst_regs(4), - i2c_inst_regs(5), -}; - -static const struct dce_i2c_shift i2c_shifts = { - I2C_COMMON_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dce_i2c_mask i2c_masks = { - I2C_COMMON_MASK_SH_LIST_DCN30(_MASK) -}; - -static struct dce_i2c_hw *dcn31_i2c_hw_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dce_i2c_hw *dce_i2c_hw = - kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL); - - if (!dce_i2c_hw) - return NULL; - - dcn2_i2c_hw_construct(dce_i2c_hw, ctx, inst, - &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks); - - return dce_i2c_hw; -} -static struct mpc *dcn31_mpc_create( - struct dc_context *ctx, - int num_mpcc, - int num_rmu) -{ - struct dcn30_mpc *mpc30 = kzalloc(sizeof(struct dcn30_mpc), - GFP_KERNEL); - - if (!mpc30) - return NULL; - - dcn30_mpc_construct(mpc30, ctx, - &mpc_regs, - &mpc_shift, - &mpc_mask, - num_mpcc, - num_rmu); - - return &mpc30->base; -} - -static struct hubbub *dcn31_hubbub_create(struct dc_context *ctx) -{ - int i; - - struct dcn20_hubbub *hubbub3 = kzalloc(sizeof(struct dcn20_hubbub), - GFP_KERNEL); - - if (!hubbub3) - return NULL; - - hubbub31_construct(hubbub3, ctx, - &hubbub_reg, - &hubbub_shift, - &hubbub_mask, - dcn3_16_ip.det_buffer_size_kbytes, - dcn3_16_ip.pixel_chunk_size_kbytes, - dcn3_16_ip.config_return_buffer_size_in_kbytes); - - - for (i = 0; i < res_cap_dcn31.num_vmid; i++) { - struct dcn20_vmid *vmid = &hubbub3->vmid[i]; - - vmid->ctx = ctx; - - vmid->regs = &vmid_regs[i]; - vmid->shifts = &vmid_shifts; - vmid->masks = &vmid_masks; - } - - return &hubbub3->base; -} - -static struct timing_generator *dcn31_timing_generator_create( - struct dc_context *ctx, - uint32_t instance) -{ - struct optc *tgn10 = - kzalloc(sizeof(struct optc), GFP_KERNEL); - - if (!tgn10) - return NULL; - - tgn10->base.inst = instance; - tgn10->base.ctx = ctx; - - tgn10->tg_regs = &optc_regs[instance]; - tgn10->tg_shift = &optc_shift; - tgn10->tg_mask = &optc_mask; - - dcn31_timing_generator_init(tgn10); - - return &tgn10->base; -} - -static const struct encoder_feature_support link_enc_feature = { - .max_hdmi_deep_color = COLOR_DEPTH_121212, - .max_hdmi_pixel_clock = 600000, - .hdmi_ycbcr420_supported = true, - .dp_ycbcr420_supported = true, - .fec_supported = true, - .flags.bits.IS_HBR2_CAPABLE = true, - .flags.bits.IS_HBR3_CAPABLE = true, - .flags.bits.IS_TPS3_CAPABLE = true, - .flags.bits.IS_TPS4_CAPABLE = true -}; - -static struct link_encoder *dcn31_link_encoder_create( - struct dc_context *ctx, - const struct encoder_init_data *enc_init_data) -{ - struct dcn20_link_encoder *enc20 = - kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL); - - if (!enc20) - return NULL; - - dcn31_link_encoder_construct(enc20, - enc_init_data, - &link_enc_feature, - &link_enc_regs[enc_init_data->transmitter], - &link_enc_aux_regs[enc_init_data->channel - 1], - &link_enc_hpd_regs[enc_init_data->hpd_source], - &le_shift, - &le_mask); - - return &enc20->enc10.base; -} - -/* Create a minimal link encoder object not associated with a particular - * physical connector. - * resource_funcs.link_enc_create_minimal - */ -static struct link_encoder *dcn31_link_enc_create_minimal( - struct dc_context *ctx, enum engine_id eng_id) -{ - struct dcn20_link_encoder *enc20; - - if ((eng_id - ENGINE_ID_DIGA) > ctx->dc->res_pool->res_cap->num_dig_link_enc) - return NULL; - - enc20 = kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL); - if (!enc20) - return NULL; - - dcn31_link_encoder_construct_minimal( - enc20, - ctx, - &link_enc_feature, - &link_enc_regs[eng_id - ENGINE_ID_DIGA], - eng_id); - - return &enc20->enc10.base; -} - -static struct panel_cntl *dcn31_panel_cntl_create(const struct panel_cntl_init_data *init_data) -{ - struct dcn31_panel_cntl *panel_cntl = - kzalloc(sizeof(struct dcn31_panel_cntl), GFP_KERNEL); - - if (!panel_cntl) - return NULL; - - dcn31_panel_cntl_construct(panel_cntl, init_data); - - return &panel_cntl->base; -} - -static void read_dce_straps( - struct dc_context *ctx, - struct resource_straps *straps) -{ - generic_reg_get(ctx, regDC_PINSTRAPS + BASE(regDC_PINSTRAPS_BASE_IDX), - FN(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO), &straps->dc_pinstraps_audio); - -} - -static struct audio *dcn31_create_audio( - struct dc_context *ctx, unsigned int inst) -{ - return dce_audio_create(ctx, inst, - &audio_regs[inst], &audio_shift, &audio_mask); -} - -static struct vpg *dcn31_vpg_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dcn31_vpg *vpg31 = kzalloc(sizeof(struct dcn31_vpg), GFP_KERNEL); - - if (!vpg31) - return NULL; - - vpg31_construct(vpg31, ctx, inst, - &vpg_regs[inst], - &vpg_shift, - &vpg_mask); - - return &vpg31->base; -} - -static struct afmt *dcn31_afmt_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dcn31_afmt *afmt31 = kzalloc(sizeof(struct dcn31_afmt), GFP_KERNEL); - - if (!afmt31) - return NULL; - - afmt31_construct(afmt31, ctx, inst, - &afmt_regs[inst], - &afmt_shift, - &afmt_mask); - - // Light sleep by default, no need to power down here - - return &afmt31->base; -} - - -static struct apg *dcn31_apg_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dcn31_apg *apg31 = kzalloc(sizeof(struct dcn31_apg), GFP_KERNEL); - - if (!apg31) - return NULL; - - apg31_construct(apg31, ctx, inst, - &apg_regs[inst], - &apg_shift, - &apg_mask); - - return &apg31->base; -} - - -static struct stream_encoder *dcn316_stream_encoder_create( - enum engine_id eng_id, - struct dc_context *ctx) -{ - struct dcn10_stream_encoder *enc1; - struct vpg *vpg; - struct afmt *afmt; - int vpg_inst; - int afmt_inst; - - /* Mapping of VPG, AFMT, DME register blocks to DIO block instance */ - if (eng_id <= ENGINE_ID_DIGF) { - vpg_inst = eng_id; - afmt_inst = eng_id; - } else - return NULL; - - enc1 = kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL); - vpg = dcn31_vpg_create(ctx, vpg_inst); - afmt = dcn31_afmt_create(ctx, afmt_inst); - - if (!enc1 || !vpg || !afmt) { - kfree(enc1); - kfree(vpg); - kfree(afmt); - return NULL; - } - - dcn30_dio_stream_encoder_construct(enc1, ctx, ctx->dc_bios, - eng_id, vpg, afmt, - &stream_enc_regs[eng_id], - &se_shift, &se_mask); - - return &enc1->base; -} - - -static struct hpo_dp_stream_encoder *dcn31_hpo_dp_stream_encoder_create( - enum engine_id eng_id, - struct dc_context *ctx) -{ - struct dcn31_hpo_dp_stream_encoder *hpo_dp_enc31; - struct vpg *vpg; - struct apg *apg; - uint32_t hpo_dp_inst; - uint32_t vpg_inst; - uint32_t apg_inst; - - ASSERT((eng_id >= ENGINE_ID_HPO_DP_0) && (eng_id <= ENGINE_ID_HPO_DP_3)); - hpo_dp_inst = eng_id - ENGINE_ID_HPO_DP_0; - - /* Mapping of VPG register blocks to HPO DP block instance: - * VPG[6] -> HPO_DP[0] - * VPG[7] -> HPO_DP[1] - * VPG[8] -> HPO_DP[2] - * VPG[9] -> HPO_DP[3] - */ - vpg_inst = hpo_dp_inst + 6; - - /* Mapping of APG register blocks to HPO DP block instance: - * APG[0] -> HPO_DP[0] - * APG[1] -> HPO_DP[1] - * APG[2] -> HPO_DP[2] - * APG[3] -> HPO_DP[3] - */ - apg_inst = hpo_dp_inst; - - /* allocate HPO stream encoder and create VPG sub-block */ - hpo_dp_enc31 = kzalloc(sizeof(struct dcn31_hpo_dp_stream_encoder), GFP_KERNEL); - vpg = dcn31_vpg_create(ctx, vpg_inst); - apg = dcn31_apg_create(ctx, apg_inst); - - if (!hpo_dp_enc31 || !vpg || !apg) { - kfree(hpo_dp_enc31); - kfree(vpg); - kfree(apg); - return NULL; - } - - dcn31_hpo_dp_stream_encoder_construct(hpo_dp_enc31, ctx, ctx->dc_bios, - hpo_dp_inst, eng_id, vpg, apg, - &hpo_dp_stream_enc_regs[hpo_dp_inst], - &hpo_dp_se_shift, &hpo_dp_se_mask); - - return &hpo_dp_enc31->base; -} - -static struct hpo_dp_link_encoder *dcn31_hpo_dp_link_encoder_create( - uint8_t inst, - struct dc_context *ctx) -{ - struct dcn31_hpo_dp_link_encoder *hpo_dp_enc31; - - /* allocate HPO link encoder */ - hpo_dp_enc31 = kzalloc(sizeof(struct dcn31_hpo_dp_link_encoder), GFP_KERNEL); - - hpo_dp_link_encoder31_construct(hpo_dp_enc31, ctx, inst, - &hpo_dp_link_enc_regs[inst], - &hpo_dp_le_shift, &hpo_dp_le_mask); - - return &hpo_dp_enc31->base; -} - - -static struct dce_hwseq *dcn31_hwseq_create( - struct dc_context *ctx) -{ - struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL); - - if (hws) { - hws->ctx = ctx; - hws->regs = &hwseq_reg; - hws->shifts = &hwseq_shift; - hws->masks = &hwseq_mask; - } - return hws; -} -static const struct resource_create_funcs res_create_funcs = { - .read_dce_straps = read_dce_straps, - .create_audio = dcn31_create_audio, - .create_stream_encoder = dcn316_stream_encoder_create, - .create_hpo_dp_stream_encoder = dcn31_hpo_dp_stream_encoder_create, - .create_hpo_dp_link_encoder = dcn31_hpo_dp_link_encoder_create, - .create_hwseq = dcn31_hwseq_create, -}; - -static void dcn316_resource_destruct(struct dcn316_resource_pool *pool) -{ - unsigned int i; - - for (i = 0; i < pool->base.stream_enc_count; i++) { - if (pool->base.stream_enc[i] != NULL) { - if (pool->base.stream_enc[i]->vpg != NULL) { - kfree(DCN30_VPG_FROM_VPG(pool->base.stream_enc[i]->vpg)); - pool->base.stream_enc[i]->vpg = NULL; - } - if (pool->base.stream_enc[i]->afmt != NULL) { - kfree(DCN30_AFMT_FROM_AFMT(pool->base.stream_enc[i]->afmt)); - pool->base.stream_enc[i]->afmt = NULL; - } - kfree(DCN10STRENC_FROM_STRENC(pool->base.stream_enc[i])); - pool->base.stream_enc[i] = NULL; - } - } - - for (i = 0; i < pool->base.hpo_dp_stream_enc_count; i++) { - if (pool->base.hpo_dp_stream_enc[i] != NULL) { - if (pool->base.hpo_dp_stream_enc[i]->vpg != NULL) { - kfree(DCN30_VPG_FROM_VPG(pool->base.hpo_dp_stream_enc[i]->vpg)); - pool->base.hpo_dp_stream_enc[i]->vpg = NULL; - } - if (pool->base.hpo_dp_stream_enc[i]->apg != NULL) { - kfree(DCN31_APG_FROM_APG(pool->base.hpo_dp_stream_enc[i]->apg)); - pool->base.hpo_dp_stream_enc[i]->apg = NULL; - } - kfree(DCN3_1_HPO_DP_STREAM_ENC_FROM_HPO_STREAM_ENC(pool->base.hpo_dp_stream_enc[i])); - pool->base.hpo_dp_stream_enc[i] = NULL; - } - } - - for (i = 0; i < pool->base.hpo_dp_link_enc_count; i++) { - if (pool->base.hpo_dp_link_enc[i] != NULL) { - kfree(DCN3_1_HPO_DP_LINK_ENC_FROM_HPO_LINK_ENC(pool->base.hpo_dp_link_enc[i])); - pool->base.hpo_dp_link_enc[i] = NULL; - } - } - - for (i = 0; i < pool->base.res_cap->num_dsc; i++) { - if (pool->base.dscs[i] != NULL) - dcn20_dsc_destroy(&pool->base.dscs[i]); - } - - if (pool->base.mpc != NULL) { - kfree(TO_DCN20_MPC(pool->base.mpc)); - pool->base.mpc = NULL; - } - if (pool->base.hubbub != NULL) { - kfree(pool->base.hubbub); - pool->base.hubbub = NULL; - } - for (i = 0; i < pool->base.pipe_count; i++) { - if (pool->base.dpps[i] != NULL) - dcn31_dpp_destroy(&pool->base.dpps[i]); - - if (pool->base.ipps[i] != NULL) - pool->base.ipps[i]->funcs->ipp_destroy(&pool->base.ipps[i]); - - if (pool->base.hubps[i] != NULL) { - kfree(TO_DCN20_HUBP(pool->base.hubps[i])); - pool->base.hubps[i] = NULL; - } - - if (pool->base.irqs != NULL) { - dal_irq_service_destroy(&pool->base.irqs); - } - } - - for (i = 0; i < pool->base.res_cap->num_ddc; i++) { - if (pool->base.engines[i] != NULL) - dce110_engine_destroy(&pool->base.engines[i]); - if (pool->base.hw_i2cs[i] != NULL) { - kfree(pool->base.hw_i2cs[i]); - pool->base.hw_i2cs[i] = NULL; - } - if (pool->base.sw_i2cs[i] != NULL) { - kfree(pool->base.sw_i2cs[i]); - pool->base.sw_i2cs[i] = NULL; - } - } - - for (i = 0; i < pool->base.res_cap->num_opp; i++) { - if (pool->base.opps[i] != NULL) - pool->base.opps[i]->funcs->opp_destroy(&pool->base.opps[i]); - } - - for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { - if (pool->base.timing_generators[i] != NULL) { - kfree(DCN10TG_FROM_TG(pool->base.timing_generators[i])); - pool->base.timing_generators[i] = NULL; - } - } - - for (i = 0; i < pool->base.res_cap->num_dwb; i++) { - if (pool->base.dwbc[i] != NULL) { - kfree(TO_DCN30_DWBC(pool->base.dwbc[i])); - pool->base.dwbc[i] = NULL; - } - if (pool->base.mcif_wb[i] != NULL) { - kfree(TO_DCN30_MMHUBBUB(pool->base.mcif_wb[i])); - pool->base.mcif_wb[i] = NULL; - } - } - - for (i = 0; i < pool->base.audio_count; i++) { - if (pool->base.audios[i]) - dce_aud_destroy(&pool->base.audios[i]); - } - - for (i = 0; i < pool->base.clk_src_count; i++) { - if (pool->base.clock_sources[i] != NULL) { - dcn20_clock_source_destroy(&pool->base.clock_sources[i]); - pool->base.clock_sources[i] = NULL; - } - } - - for (i = 0; i < pool->base.res_cap->num_mpc_3dlut; i++) { - if (pool->base.mpc_lut[i] != NULL) { - dc_3dlut_func_release(pool->base.mpc_lut[i]); - pool->base.mpc_lut[i] = NULL; - } - if (pool->base.mpc_shaper[i] != NULL) { - dc_transfer_func_release(pool->base.mpc_shaper[i]); - pool->base.mpc_shaper[i] = NULL; - } - } - - if (pool->base.dp_clock_source != NULL) { - dcn20_clock_source_destroy(&pool->base.dp_clock_source); - pool->base.dp_clock_source = NULL; - } - - for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { - if (pool->base.multiple_abms[i] != NULL) - dce_abm_destroy(&pool->base.multiple_abms[i]); - } - - if (pool->base.psr != NULL) - dmub_psr_destroy(&pool->base.psr); - - if (pool->base.dccg != NULL) - dcn_dccg_destroy(&pool->base.dccg); -} - -static struct hubp *dcn31_hubp_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dcn20_hubp *hubp2 = - kzalloc(sizeof(struct dcn20_hubp), GFP_KERNEL); - - if (!hubp2) - return NULL; - - if (hubp31_construct(hubp2, ctx, inst, - &hubp_regs[inst], &hubp_shift, &hubp_mask)) - return &hubp2->base; - - BREAK_TO_DEBUGGER(); - kfree(hubp2); - return NULL; -} - -static bool dcn31_dwbc_create(struct dc_context *ctx, struct resource_pool *pool) -{ - int i; - uint32_t pipe_count = pool->res_cap->num_dwb; - - for (i = 0; i < pipe_count; i++) { - struct dcn30_dwbc *dwbc30 = kzalloc(sizeof(struct dcn30_dwbc), - GFP_KERNEL); - - if (!dwbc30) { - dm_error("DC: failed to create dwbc30!\n"); - return false; - } - - dcn30_dwbc_construct(dwbc30, ctx, - &dwbc30_regs[i], - &dwbc30_shift, - &dwbc30_mask, - i); - - pool->dwbc[i] = &dwbc30->base; - } - return true; -} - -static bool dcn31_mmhubbub_create(struct dc_context *ctx, struct resource_pool *pool) -{ - int i; - uint32_t pipe_count = pool->res_cap->num_dwb; - - for (i = 0; i < pipe_count; i++) { - struct dcn30_mmhubbub *mcif_wb30 = kzalloc(sizeof(struct dcn30_mmhubbub), - GFP_KERNEL); - - if (!mcif_wb30) { - dm_error("DC: failed to create mcif_wb30!\n"); - return false; - } - - dcn30_mmhubbub_construct(mcif_wb30, ctx, - &mcif_wb30_regs[i], - &mcif_wb30_shift, - &mcif_wb30_mask, - i); - - pool->mcif_wb[i] = &mcif_wb30->base; - } - return true; -} - -static struct display_stream_compressor *dcn31_dsc_create( - struct dc_context *ctx, uint32_t inst) -{ - struct dcn20_dsc *dsc = - kzalloc(sizeof(struct dcn20_dsc), GFP_KERNEL); - - if (!dsc) { - BREAK_TO_DEBUGGER(); - return NULL; - } - - dsc2_construct(dsc, ctx, inst, &dsc_regs[inst], &dsc_shift, &dsc_mask); - return &dsc->base; -} - -static void dcn316_destroy_resource_pool(struct resource_pool **pool) -{ - struct dcn316_resource_pool *dcn31_pool = TO_DCN316_RES_POOL(*pool); - - dcn316_resource_destruct(dcn31_pool); - kfree(dcn31_pool); - *pool = NULL; -} - -static struct clock_source *dcn31_clock_source_create( - struct dc_context *ctx, - struct dc_bios *bios, - enum clock_source_id id, - const struct dce110_clk_src_regs *regs, - bool dp_clk_src) -{ - struct dce110_clk_src *clk_src = - kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL); - - if (!clk_src) - return NULL; - - if (dcn31_clk_src_construct(clk_src, ctx, bios, id, - regs, &cs_shift, &cs_mask)) { - clk_src->base.dp_clk_src = dp_clk_src; - return &clk_src->base; - } - - kfree(clk_src); - - BREAK_TO_DEBUGGER(); - return NULL; -} - -static bool is_dual_plane(enum surface_pixel_format format) -{ - return format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA; -} - -static int dcn316_populate_dml_pipes_from_context( - struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes, - bool fast_validate) -{ - int i, pipe_cnt; - struct resource_context *res_ctx = &context->res_ctx; - struct pipe_ctx *pipe; - const int max_usable_det = context->bw_ctx.dml.ip.config_return_buffer_size_in_kbytes - DCN3_16_MIN_COMPBUF_SIZE_KB; - - DC_FP_START(); - dcn31x_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); - DC_FP_END(); - - for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { - struct dc_crtc_timing *timing; - - if (!res_ctx->pipe_ctx[i].stream) - continue; - pipe = &res_ctx->pipe_ctx[i]; - timing = &pipe->stream->timing; - - /* - * Immediate flip can be set dynamically after enabling the plane. - * We need to require support for immediate flip or underflow can be - * intermittently experienced depending on peak b/w requirements. - */ - pipes[pipe_cnt].pipe.src.immediate_flip = true; - - pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; - pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch; - pipes[pipe_cnt].pipe.src.dcc_rate = 3; - pipes[pipe_cnt].dout.dsc_input_bpc = 0; - DC_FP_START(); - dcn31_zero_pipe_dcc_fraction(pipes, pipe_cnt); - DC_FP_END(); - - if (pipes[pipe_cnt].dout.dsc_enable) { - switch (timing->display_color_depth) { - case COLOR_DEPTH_888: - pipes[pipe_cnt].dout.dsc_input_bpc = 8; - break; - case COLOR_DEPTH_101010: - pipes[pipe_cnt].dout.dsc_input_bpc = 10; - break; - case COLOR_DEPTH_121212: - pipes[pipe_cnt].dout.dsc_input_bpc = 12; - break; - default: - ASSERT(0); - break; - } - } - - pipe_cnt++; - } - - if (pipe_cnt) - context->bw_ctx.dml.ip.det_buffer_size_kbytes = - (max_usable_det / DCN3_16_CRB_SEGMENT_SIZE_KB / pipe_cnt) * DCN3_16_CRB_SEGMENT_SIZE_KB; - if (context->bw_ctx.dml.ip.det_buffer_size_kbytes > DCN3_16_MAX_DET_SIZE) - context->bw_ctx.dml.ip.det_buffer_size_kbytes = DCN3_16_MAX_DET_SIZE; - ASSERT(context->bw_ctx.dml.ip.det_buffer_size_kbytes >= DCN3_16_DEFAULT_DET_SIZE); - dc->config.enable_4to1MPC = false; - if (pipe_cnt == 1 && pipe->plane_state && !dc->debug.disable_z9_mpc) { - if (is_dual_plane(pipe->plane_state->format) - && pipe->plane_state->src_rect.width <= 1920 && pipe->plane_state->src_rect.height <= 1080) { - dc->config.enable_4to1MPC = true; - context->bw_ctx.dml.ip.det_buffer_size_kbytes = - (max_usable_det / DCN3_16_CRB_SEGMENT_SIZE_KB / 4) * DCN3_16_CRB_SEGMENT_SIZE_KB; - } else if (!is_dual_plane(pipe->plane_state->format)) { - context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192; - pipes[0].pipe.src.unbounded_req_mode = true; - } - } - - return pipe_cnt; -} - -static void dcn316_get_panel_config_defaults(struct dc_panel_config *panel_config) -{ - *panel_config = panel_config_defaults; -} - -static struct dc_cap_funcs cap_funcs = { - .get_dcc_compression_cap = dcn20_get_dcc_compression_cap -}; - -static struct resource_funcs dcn316_res_pool_funcs = { - .destroy = dcn316_destroy_resource_pool, - .link_enc_create = dcn31_link_encoder_create, - .link_enc_create_minimal = dcn31_link_enc_create_minimal, - .link_encs_assign = link_enc_cfg_link_encs_assign, - .link_enc_unassign = link_enc_cfg_link_enc_unassign, - .panel_cntl_create = dcn31_panel_cntl_create, - .validate_bandwidth = dcn31_validate_bandwidth, - .calculate_wm_and_dlg = dcn31_calculate_wm_and_dlg, - .update_soc_for_wm_a = dcn31_update_soc_for_wm_a, - .populate_dml_pipes = dcn316_populate_dml_pipes_from_context, - .acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer, - .release_pipe = dcn20_release_pipe, - .add_stream_to_ctx = dcn30_add_stream_to_ctx, - .add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource, - .remove_stream_from_ctx = dcn20_remove_stream_from_ctx, - .populate_dml_writeback_from_context = dcn31_populate_dml_writeback_from_context, - .set_mcif_arb_params = dcn31_set_mcif_arb_params, - .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link, - .acquire_post_bldn_3dlut = dcn30_acquire_post_bldn_3dlut, - .release_post_bldn_3dlut = dcn30_release_post_bldn_3dlut, - .update_bw_bounding_box = dcn316_update_bw_bounding_box, - .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, - .get_panel_config_defaults = dcn316_get_panel_config_defaults, -}; - -static bool dcn316_resource_construct( - uint8_t num_virtual_links, - struct dc *dc, - struct dcn316_resource_pool *pool) -{ - int i; - struct dc_context *ctx = dc->ctx; - struct irq_service_init_data init_data; - - ctx->dc_bios->regs = &bios_regs; - - pool->base.res_cap = &res_cap_dcn31; - - pool->base.funcs = &dcn316_res_pool_funcs; - - /************************************************* - * Resource + asic cap harcoding * - *************************************************/ - pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE; - pool->base.pipe_count = pool->base.res_cap->num_timing_generator; - pool->base.mpcc_count = pool->base.res_cap->num_timing_generator; - dc->caps.max_downscale_ratio = 600; - dc->caps.i2c_speed_in_khz = 100; - dc->caps.i2c_speed_in_khz_hdcp = 5; /*1.5 w/a applied by default*/ - dc->caps.max_cursor_size = 256; - dc->caps.min_horizontal_blanking_period = 80; - dc->caps.dmdata_alloc_size = 2048; - dc->caps.max_slave_planes = 2; - dc->caps.max_slave_yuv_planes = 2; - dc->caps.max_slave_rgb_planes = 2; - dc->caps.post_blend_color_processing = true; - dc->caps.force_dp_tps4_for_cp2520 = true; - if (dc->config.forceHBR2CP2520) - dc->caps.force_dp_tps4_for_cp2520 = false; - dc->caps.dp_hpo = true; - dc->caps.dp_hdmi21_pcon_support = true; - dc->caps.edp_dsc_support = true; - dc->caps.extended_aux_timeout_support = true; - dc->caps.dmcub_support = true; - dc->caps.is_apu = true; - - /* Color pipeline capabilities */ - dc->caps.color.dpp.dcn_arch = 1; - dc->caps.color.dpp.input_lut_shared = 0; - dc->caps.color.dpp.icsc = 1; - dc->caps.color.dpp.dgam_ram = 0; // must use gamma_corr - dc->caps.color.dpp.dgam_rom_caps.srgb = 1; - dc->caps.color.dpp.dgam_rom_caps.bt2020 = 1; - dc->caps.color.dpp.dgam_rom_caps.gamma2_2 = 1; - dc->caps.color.dpp.dgam_rom_caps.pq = 1; - dc->caps.color.dpp.dgam_rom_caps.hlg = 1; - dc->caps.color.dpp.post_csc = 1; - dc->caps.color.dpp.gamma_corr = 1; - dc->caps.color.dpp.dgam_rom_for_yuv = 0; - - dc->caps.color.dpp.hw_3d_lut = 1; - dc->caps.color.dpp.ogam_ram = 1; - // no OGAM ROM on DCN301 - dc->caps.color.dpp.ogam_rom_caps.srgb = 0; - dc->caps.color.dpp.ogam_rom_caps.bt2020 = 0; - dc->caps.color.dpp.ogam_rom_caps.gamma2_2 = 0; - dc->caps.color.dpp.ogam_rom_caps.pq = 0; - dc->caps.color.dpp.ogam_rom_caps.hlg = 0; - dc->caps.color.dpp.ocsc = 0; - - dc->caps.color.mpc.gamut_remap = 1; - dc->caps.color.mpc.num_3dluts = pool->base.res_cap->num_mpc_3dlut; //2 - dc->caps.color.mpc.ogam_ram = 1; - dc->caps.color.mpc.ogam_rom_caps.srgb = 0; - dc->caps.color.mpc.ogam_rom_caps.bt2020 = 0; - dc->caps.color.mpc.ogam_rom_caps.gamma2_2 = 0; - dc->caps.color.mpc.ogam_rom_caps.pq = 0; - dc->caps.color.mpc.ogam_rom_caps.hlg = 0; - dc->caps.color.mpc.ocsc = 1; - - /* read VBIOS LTTPR caps */ - { - if (ctx->dc_bios->funcs->get_lttpr_caps) { - enum bp_result bp_query_result; - uint8_t is_vbios_lttpr_enable = 0; - - bp_query_result = ctx->dc_bios->funcs->get_lttpr_caps(ctx->dc_bios, &is_vbios_lttpr_enable); - dc->caps.vbios_lttpr_enable = (bp_query_result == BP_RESULT_OK) && !!is_vbios_lttpr_enable; - } - - /* interop bit is implicit */ - { - dc->caps.vbios_lttpr_aware = true; - } - } - - if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) - dc->debug = debug_defaults_drv; - - // Init the vm_helper - if (dc->vm_helper) - vm_helper_init(dc->vm_helper, 16); - - /************************************************* - * Create resources * - *************************************************/ - - /* Clock Sources for Pixel Clock*/ - pool->base.clock_sources[DCN31_CLK_SRC_PLL0] = - dcn31_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL0, - &clk_src_regs[0], false); - pool->base.clock_sources[DCN31_CLK_SRC_PLL1] = - dcn31_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL1, - &clk_src_regs[1], false); - pool->base.clock_sources[DCN31_CLK_SRC_PLL2] = - dcn31_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL2, - &clk_src_regs[2], false); - pool->base.clock_sources[DCN31_CLK_SRC_PLL3] = - dcn31_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL3, - &clk_src_regs[3], false); - pool->base.clock_sources[DCN31_CLK_SRC_PLL4] = - dcn31_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL4, - &clk_src_regs[4], false); - - pool->base.clk_src_count = DCN30_CLK_SRC_TOTAL; - - /* todo: not reuse phy_pll registers */ - pool->base.dp_clock_source = - dcn31_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_ID_DP_DTO, - &clk_src_regs[0], true); - - for (i = 0; i < pool->base.clk_src_count; i++) { - if (pool->base.clock_sources[i] == NULL) { - dm_error("DC: failed to create clock sources!\n"); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - } - - /* TODO: DCCG */ - pool->base.dccg = dccg31_create(ctx, &dccg_regs, &dccg_shift, &dccg_mask); - if (pool->base.dccg == NULL) { - dm_error("DC: failed to create dccg!\n"); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - - /* TODO: IRQ */ - init_data.ctx = dc->ctx; - pool->base.irqs = dal_irq_service_dcn31_create(&init_data); - if (!pool->base.irqs) - goto create_fail; - - /* HUBBUB */ - pool->base.hubbub = dcn31_hubbub_create(ctx); - if (pool->base.hubbub == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create hubbub!\n"); - goto create_fail; - } - - /* HUBPs, DPPs, OPPs and TGs */ - for (i = 0; i < pool->base.pipe_count; i++) { - pool->base.hubps[i] = dcn31_hubp_create(ctx, i); - if (pool->base.hubps[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create hubps!\n"); - goto create_fail; - } - - pool->base.dpps[i] = dcn31_dpp_create(ctx, i); - if (pool->base.dpps[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create dpps!\n"); - goto create_fail; - } - } - - for (i = 0; i < pool->base.res_cap->num_opp; i++) { - pool->base.opps[i] = dcn31_opp_create(ctx, i); - if (pool->base.opps[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create output pixel processor!\n"); - goto create_fail; - } - } - - for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { - pool->base.timing_generators[i] = dcn31_timing_generator_create( - ctx, i); - if (pool->base.timing_generators[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create tg!\n"); - goto create_fail; - } - } - pool->base.timing_generator_count = i; - - /* PSR */ - pool->base.psr = dmub_psr_create(ctx); - if (pool->base.psr == NULL) { - dm_error("DC: failed to create psr obj!\n"); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - - /* ABM */ - for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { - pool->base.multiple_abms[i] = dmub_abm_create(ctx, - &abm_regs[i], - &abm_shift, - &abm_mask); - if (pool->base.multiple_abms[i] == NULL) { - dm_error("DC: failed to create abm for pipe %d!\n", i); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - } - - /* MPC and DSC */ - pool->base.mpc = dcn31_mpc_create(ctx, pool->base.mpcc_count, pool->base.res_cap->num_mpc_3dlut); - if (pool->base.mpc == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create mpc!\n"); - goto create_fail; - } - - for (i = 0; i < pool->base.res_cap->num_dsc; i++) { - pool->base.dscs[i] = dcn31_dsc_create(ctx, i); - if (pool->base.dscs[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create display stream compressor %d!\n", i); - goto create_fail; - } - } - - /* DWB and MMHUBBUB */ - if (!dcn31_dwbc_create(ctx, &pool->base)) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create dwbc!\n"); - goto create_fail; - } - - if (!dcn31_mmhubbub_create(ctx, &pool->base)) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create mcif_wb!\n"); - goto create_fail; - } - - /* AUX and I2C */ - for (i = 0; i < pool->base.res_cap->num_ddc; i++) { - pool->base.engines[i] = dcn31_aux_engine_create(ctx, i); - if (pool->base.engines[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC:failed to create aux engine!!\n"); - goto create_fail; - } - pool->base.hw_i2cs[i] = dcn31_i2c_hw_create(ctx, i); - if (pool->base.hw_i2cs[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC:failed to create hw i2c!!\n"); - goto create_fail; - } - pool->base.sw_i2cs[i] = NULL; - } - - /* Audio, Stream Encoders including HPO and virtual, MPC 3D LUTs */ - if (!resource_construct(num_virtual_links, dc, &pool->base, - &res_create_funcs)) - goto create_fail; - - /* HW Sequencer and Plane caps */ - dcn31_hw_sequencer_construct(dc); - - dc->caps.max_planes = pool->base.pipe_count; - - for (i = 0; i < dc->caps.max_planes; ++i) - dc->caps.planes[i] = plane_cap; - - dc->cap_funcs = cap_funcs; - - dc->dcn_ip->max_num_dpp = dcn3_16_ip.max_num_dpp; - - return true; - -create_fail: - - dcn316_resource_destruct(pool); - - return false; -} - -struct resource_pool *dcn316_create_resource_pool( - const struct dc_init_data *init_data, - struct dc *dc) -{ - struct dcn316_resource_pool *pool = - kzalloc(sizeof(struct dcn316_resource_pool), GFP_KERNEL); - - if (!pool) - return NULL; - - if (dcn316_resource_construct(init_data->num_virtual_links, dc, pool)) - return &pool->base; - - BREAK_TO_DEBUGGER(); - kfree(pool); - return NULL; -} diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h deleted file mode 100644 index aba6d634131b..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright 2021 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef _DCN316_RESOURCE_H_ -#define _DCN316_RESOURCE_H_ - -#include "core_types.h" - -#define TO_DCN316_RES_POOL(pool)\ - container_of(pool, struct dcn316_resource_pool, base) - -extern struct _vcs_dpi_ip_params_st dcn3_16_ip; - -struct dcn316_resource_pool { - struct resource_pool base; -}; - -struct resource_pool *dcn316_create_resource_pool( - const struct dc_init_data *init_data, - struct dc *dc); - -#endif /* _DCN316_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/Makefile b/drivers/gpu/drm/amd/display/dc/dcn32/Makefile index 8bb251307247..3bb17dd01e4c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn32/Makefile @@ -10,7 +10,7 @@ # # Makefile for dcn32. -DCN32 = dcn32_resource.o dcn32_hubbub.o dcn32_init.o dcn32_dccg.o \ +DCN32 = dcn32_hubbub.o dcn32_init.o dcn32_dccg.o \ dcn32_dccg.o dcn32_optc.o dcn32_mmhubbub.o dcn32_hubp.o dcn32_dpp.o \ dcn32_dio_stream_encoder.o dcn32_dio_link_encoder.o dcn32_hpo_dp_link_encoder.o \ dcn32_resource_helpers.o dcn32_mpc.o diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c deleted file mode 100644 index 89b072447dba..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ /dev/null @@ -1,2862 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright 2022 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dm_services.h" -#include "dc.h" - -#include "dcn32_init.h" - -#include "resource.h" -#include "include/irq_service_interface.h" -#include "dcn32_resource.h" - -#include "dcn20/dcn20_resource.h" -#include "dcn30/dcn30_resource.h" - -#include "dcn10/dcn10_ipp.h" -#include "dcn30/dcn30_hubbub.h" -#include "dcn31/dcn31_hubbub.h" -#include "dcn32/dcn32_hubbub.h" -#include "dcn32/dcn32_mpc.h" -#include "dcn32_hubp.h" -#include "irq/dcn32/irq_service_dcn32.h" -#include "dcn32/dcn32_dpp.h" -#include "dcn32/dcn32_optc.h" -#include "dcn20/dcn20_hwseq.h" -#include "dcn30/dcn30_hwseq.h" -#include "dce110/dce110_hwseq.h" -#include "dcn30/dcn30_opp.h" -#include "dcn20/dcn20_dsc.h" -#include "dcn30/dcn30_vpg.h" -#include "dcn30/dcn30_afmt.h" -#include "dcn30/dcn30_dio_stream_encoder.h" -#include "dcn32/dcn32_dio_stream_encoder.h" -#include "dcn31/dcn31_hpo_dp_stream_encoder.h" -#include "dcn31/dcn31_hpo_dp_link_encoder.h" -#include "dcn32/dcn32_hpo_dp_link_encoder.h" -#include "dcn31/dcn31_apg.h" -#include "dcn31/dcn31_dio_link_encoder.h" -#include "dcn32/dcn32_dio_link_encoder.h" -#include "dce/dce_clock_source.h" -#include "dce/dce_audio.h" -#include "dce/dce_hwseq.h" -#include "clk_mgr.h" -#include "virtual/virtual_stream_encoder.h" -#include "dml/display_mode_vba.h" -#include "dcn32/dcn32_dccg.h" -#include "dcn10/dcn10_resource.h" -#include "link.h" -#include "dcn31/dcn31_panel_cntl.h" - -#include "dcn30/dcn30_dwb.h" -#include "dcn32/dcn32_mmhubbub.h" - -#include "dcn/dcn_3_2_0_offset.h" -#include "dcn/dcn_3_2_0_sh_mask.h" -#include "nbio/nbio_4_3_0_offset.h" - -#include "reg_helper.h" -#include "dce/dmub_abm.h" -#include "dce/dmub_psr.h" -#include "dce/dce_aux.h" -#include "dce/dce_i2c.h" - -#include "dml/dcn30/display_mode_vba_30.h" -#include "vm_helper.h" -#include "dcn20/dcn20_vmid.h" -#include "dml/dcn32/dcn32_fpu.h" - -#include "dml2/dml2_wrapper.h" - -#define DC_LOGGER_INIT(logger) - -enum dcn32_clk_src_array_id { - DCN32_CLK_SRC_PLL0, - DCN32_CLK_SRC_PLL1, - DCN32_CLK_SRC_PLL2, - DCN32_CLK_SRC_PLL3, - DCN32_CLK_SRC_PLL4, - DCN32_CLK_SRC_TOTAL -}; - -/* begin ********************* - * macros to expend register list macro defined in HW object header file - */ - -/* DCN */ -#define BASE_INNER(seg) ctx->dcn_reg_offsets[seg] - -#define BASE(seg) BASE_INNER(seg) - -#define SR(reg_name)\ - REG_STRUCT.reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ - reg ## reg_name -#define SR_ARR(reg_name, id) \ - REG_STRUCT[id].reg_name = BASE(reg##reg_name##_BASE_IDX) + reg##reg_name - -#define SR_ARR_INIT(reg_name, id, value) \ - REG_STRUCT[id].reg_name = value - -#define SRI(reg_name, block, id)\ - REG_STRUCT.reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## reg_name - -#define SRI_ARR(reg_name, block, id)\ - REG_STRUCT[id].reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## reg_name - -#define SR_ARR_I2C(reg_name, id) \ - REG_STRUCT[id-1].reg_name = BASE(reg##reg_name##_BASE_IDX) + reg##reg_name - -#define SRI_ARR_I2C(reg_name, block, id)\ - REG_STRUCT[id-1].reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## reg_name - -#define SRI_ARR_ALPHABET(reg_name, block, index, id)\ - REG_STRUCT[index].reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## reg_name - -#define SRI2(reg_name, block, id)\ - .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ - reg ## reg_name -#define SRI2_ARR(reg_name, block, id)\ - REG_STRUCT[id].reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ - reg ## reg_name - -#define SRIR(var_name, reg_name, block, id)\ - .var_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## reg_name - -#define SRII(reg_name, block, id)\ - REG_STRUCT.reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## reg_name - -#define SRII_ARR_2(reg_name, block, id, inst)\ - REG_STRUCT[inst].reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## reg_name - -#define SRII_MPC_RMU(reg_name, block, id)\ - .RMU##_##reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## reg_name - -#define SRII_DWB(reg_name, temp_name, block, id)\ - REG_STRUCT.reg_name[id] = BASE(reg ## block ## id ## _ ## temp_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## temp_name - -#define SF_DWB2(reg_name, block, id, field_name, post_fix) \ - .field_name = reg_name ## __ ## field_name ## post_fix - -#define DCCG_SRII(reg_name, block, id)\ - REG_STRUCT.block ## _ ## reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## reg_name - -#define VUPDATE_SRII(reg_name, block, id)\ - REG_STRUCT.reg_name[id] = BASE(reg ## reg_name ## _ ## block ## id ## _BASE_IDX) + \ - reg ## reg_name ## _ ## block ## id - -/* NBIO */ -#define NBIO_BASE_INNER(seg) ctx->nbio_reg_offsets[seg] - -#define NBIO_BASE(seg) \ - NBIO_BASE_INNER(seg) - -#define NBIO_SR(reg_name)\ - REG_STRUCT.reg_name = NBIO_BASE(regBIF_BX0_ ## reg_name ## _BASE_IDX) + \ - regBIF_BX0_ ## reg_name -#define NBIO_SR_ARR(reg_name, id)\ - REG_STRUCT[id].reg_name = NBIO_BASE(regBIF_BX0_ ## reg_name ## _BASE_IDX) + \ - regBIF_BX0_ ## reg_name - -#undef CTX -#define CTX ctx -#define REG(reg_name) \ - (ctx->dcn_reg_offsets[reg ## reg_name ## _BASE_IDX] + reg ## reg_name) - -static struct bios_registers bios_regs; - -#define bios_regs_init() \ - ( \ - NBIO_SR(BIOS_SCRATCH_3),\ - NBIO_SR(BIOS_SCRATCH_6)\ - ) - -#define clk_src_regs_init(index, pllid)\ - CS_COMMON_REG_LIST_DCN3_0_RI(index, pllid) - -static struct dce110_clk_src_regs clk_src_regs[5]; - -static const struct dce110_clk_src_shift cs_shift = { - CS_COMMON_MASK_SH_LIST_DCN3_2(__SHIFT) -}; - -static const struct dce110_clk_src_mask cs_mask = { - CS_COMMON_MASK_SH_LIST_DCN3_2(_MASK) -}; - -#define abm_regs_init(id)\ - ABM_DCN32_REG_LIST_RI(id) - -static struct dce_abm_registers abm_regs[4]; - -static const struct dce_abm_shift abm_shift = { - ABM_MASK_SH_LIST_DCN32(__SHIFT) -}; - -static const struct dce_abm_mask abm_mask = { - ABM_MASK_SH_LIST_DCN32(_MASK) -}; - -#define audio_regs_init(id)\ - AUD_COMMON_REG_LIST_RI(id) - -static struct dce_audio_registers audio_regs[5]; - -#define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\ - SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_INDEX, AZALIA_ENDPOINT_REG_INDEX, mask_sh),\ - SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_DATA, AZALIA_ENDPOINT_REG_DATA, mask_sh),\ - AUD_COMMON_MASK_SH_LIST_BASE(mask_sh) - -static const struct dce_audio_shift audio_shift = { - DCE120_AUD_COMMON_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_audio_mask audio_mask = { - DCE120_AUD_COMMON_MASK_SH_LIST(_MASK) -}; - -#define vpg_regs_init(id)\ - VPG_DCN3_REG_LIST_RI(id) - -static struct dcn30_vpg_registers vpg_regs[10]; - -static const struct dcn30_vpg_shift vpg_shift = { - DCN3_VPG_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn30_vpg_mask vpg_mask = { - DCN3_VPG_MASK_SH_LIST(_MASK) -}; - -#define afmt_regs_init(id)\ - AFMT_DCN3_REG_LIST_RI(id) - -static struct dcn30_afmt_registers afmt_regs[6]; - -static const struct dcn30_afmt_shift afmt_shift = { - DCN3_AFMT_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn30_afmt_mask afmt_mask = { - DCN3_AFMT_MASK_SH_LIST(_MASK) -}; - -#define apg_regs_init(id)\ - APG_DCN31_REG_LIST_RI(id) - -static struct dcn31_apg_registers apg_regs[4]; - -static const struct dcn31_apg_shift apg_shift = { - DCN31_APG_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn31_apg_mask apg_mask = { - DCN31_APG_MASK_SH_LIST(_MASK) -}; - -#define stream_enc_regs_init(id)\ - SE_DCN32_REG_LIST_RI(id) - -static struct dcn10_stream_enc_registers stream_enc_regs[5]; - -static const struct dcn10_stream_encoder_shift se_shift = { - SE_COMMON_MASK_SH_LIST_DCN32(__SHIFT) -}; - -static const struct dcn10_stream_encoder_mask se_mask = { - SE_COMMON_MASK_SH_LIST_DCN32(_MASK) -}; - - -#define aux_regs_init(id)\ - DCN2_AUX_REG_LIST_RI(id) - -static struct dcn10_link_enc_aux_registers link_enc_aux_regs[5]; - -#define hpd_regs_init(id)\ - HPD_REG_LIST_RI(id) - -static struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[5]; - -#define link_regs_init(id, phyid)\ - ( \ - LE_DCN31_REG_LIST_RI(id), \ - UNIPHY_DCN2_REG_LIST_RI(id, phyid)\ - ) - /*DPCS_DCN31_REG_LIST(id),*/ \ - -static struct dcn10_link_enc_registers link_enc_regs[5]; - -static const struct dcn10_link_enc_shift le_shift = { - LINK_ENCODER_MASK_SH_LIST_DCN31(__SHIFT), \ - //DPCS_DCN31_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn10_link_enc_mask le_mask = { - LINK_ENCODER_MASK_SH_LIST_DCN31(_MASK), \ - //DPCS_DCN31_MASK_SH_LIST(_MASK) -}; - -#define hpo_dp_stream_encoder_reg_init(id)\ - DCN3_1_HPO_DP_STREAM_ENC_REG_LIST_RI(id) - -static struct dcn31_hpo_dp_stream_encoder_registers hpo_dp_stream_enc_regs[4]; - -static const struct dcn31_hpo_dp_stream_encoder_shift hpo_dp_se_shift = { - DCN3_1_HPO_DP_STREAM_ENC_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn31_hpo_dp_stream_encoder_mask hpo_dp_se_mask = { - DCN3_1_HPO_DP_STREAM_ENC_MASK_SH_LIST(_MASK) -}; - - -#define hpo_dp_link_encoder_reg_init(id)\ - DCN3_1_HPO_DP_LINK_ENC_REG_LIST_RI(id) - /*DCN3_1_RDPCSTX_REG_LIST(0),*/ - /*DCN3_1_RDPCSTX_REG_LIST(1),*/ - /*DCN3_1_RDPCSTX_REG_LIST(2),*/ - /*DCN3_1_RDPCSTX_REG_LIST(3),*/ - -static struct dcn31_hpo_dp_link_encoder_registers hpo_dp_link_enc_regs[2]; - -static const struct dcn31_hpo_dp_link_encoder_shift hpo_dp_le_shift = { - DCN3_2_HPO_DP_LINK_ENC_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn31_hpo_dp_link_encoder_mask hpo_dp_le_mask = { - DCN3_2_HPO_DP_LINK_ENC_MASK_SH_LIST(_MASK) -}; - -#define dpp_regs_init(id)\ - DPP_REG_LIST_DCN30_COMMON_RI(id) - -static struct dcn3_dpp_registers dpp_regs[4]; - -static const struct dcn3_dpp_shift tf_shift = { - DPP_REG_LIST_SH_MASK_DCN30_COMMON(__SHIFT) -}; - -static const struct dcn3_dpp_mask tf_mask = { - DPP_REG_LIST_SH_MASK_DCN30_COMMON(_MASK) -}; - - -#define opp_regs_init(id)\ - OPP_REG_LIST_DCN30_RI(id) - -static struct dcn20_opp_registers opp_regs[4]; - -static const struct dcn20_opp_shift opp_shift = { - OPP_MASK_SH_LIST_DCN20(__SHIFT) -}; - -static const struct dcn20_opp_mask opp_mask = { - OPP_MASK_SH_LIST_DCN20(_MASK) -}; - -#define aux_engine_regs_init(id)\ - ( \ - AUX_COMMON_REG_LIST0_RI(id), \ - SR_ARR_INIT(AUXN_IMPCAL, id, 0), \ - SR_ARR_INIT(AUXP_IMPCAL, id, 0), \ - SR_ARR_INIT(AUX_RESET_MASK, id, DP_AUX0_AUX_CONTROL__AUX_RESET_MASK), \ - SR_ARR_INIT(AUX_RESET_MASK, id, DP_AUX0_AUX_CONTROL__AUX_RESET_MASK)\ - ) - -static struct dce110_aux_registers aux_engine_regs[5]; - -static const struct dce110_aux_registers_shift aux_shift = { - DCN_AUX_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce110_aux_registers_mask aux_mask = { - DCN_AUX_MASK_SH_LIST(_MASK) -}; - -#define dwbc_regs_dcn3_init(id)\ - DWBC_COMMON_REG_LIST_DCN30_RI(id) - -static struct dcn30_dwbc_registers dwbc30_regs[1]; - -static const struct dcn30_dwbc_shift dwbc30_shift = { - DWBC_COMMON_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dcn30_dwbc_mask dwbc30_mask = { - DWBC_COMMON_MASK_SH_LIST_DCN30(_MASK) -}; - -#define mcif_wb_regs_dcn3_init(id)\ - MCIF_WB_COMMON_REG_LIST_DCN32_RI(id) - -static struct dcn30_mmhubbub_registers mcif_wb30_regs[1]; - -static const struct dcn30_mmhubbub_shift mcif_wb30_shift = { - MCIF_WB_COMMON_MASK_SH_LIST_DCN32(__SHIFT) -}; - -static const struct dcn30_mmhubbub_mask mcif_wb30_mask = { - MCIF_WB_COMMON_MASK_SH_LIST_DCN32(_MASK) -}; - -#define dsc_regsDCN20_init(id)\ - DSC_REG_LIST_DCN20_RI(id) - -static struct dcn20_dsc_registers dsc_regs[4]; - -static const struct dcn20_dsc_shift dsc_shift = { - DSC_REG_LIST_SH_MASK_DCN20(__SHIFT) -}; - -static const struct dcn20_dsc_mask dsc_mask = { - DSC_REG_LIST_SH_MASK_DCN20(_MASK) -}; - -static struct dcn30_mpc_registers mpc_regs; - -#define dcn_mpc_regs_init() \ - MPC_REG_LIST_DCN3_2_RI(0),\ - MPC_REG_LIST_DCN3_2_RI(1),\ - MPC_REG_LIST_DCN3_2_RI(2),\ - MPC_REG_LIST_DCN3_2_RI(3),\ - MPC_OUT_MUX_REG_LIST_DCN3_0_RI(0),\ - MPC_OUT_MUX_REG_LIST_DCN3_0_RI(1),\ - MPC_OUT_MUX_REG_LIST_DCN3_0_RI(2),\ - MPC_OUT_MUX_REG_LIST_DCN3_0_RI(3),\ - MPC_DWB_MUX_REG_LIST_DCN3_0_RI(0) - -static const struct dcn30_mpc_shift mpc_shift = { - MPC_COMMON_MASK_SH_LIST_DCN32(__SHIFT) -}; - -static const struct dcn30_mpc_mask mpc_mask = { - MPC_COMMON_MASK_SH_LIST_DCN32(_MASK) -}; - -#define optc_regs_init(id)\ - OPTC_COMMON_REG_LIST_DCN3_2_RI(id) - -static struct dcn_optc_registers optc_regs[4]; - -static const struct dcn_optc_shift optc_shift = { - OPTC_COMMON_MASK_SH_LIST_DCN3_2(__SHIFT) -}; - -static const struct dcn_optc_mask optc_mask = { - OPTC_COMMON_MASK_SH_LIST_DCN3_2(_MASK) -}; - -#define hubp_regs_init(id)\ - HUBP_REG_LIST_DCN32_RI(id) - -static struct dcn_hubp2_registers hubp_regs[4]; - - -static const struct dcn_hubp2_shift hubp_shift = { - HUBP_MASK_SH_LIST_DCN32(__SHIFT) -}; - -static const struct dcn_hubp2_mask hubp_mask = { - HUBP_MASK_SH_LIST_DCN32(_MASK) -}; - -static struct dcn_hubbub_registers hubbub_reg; -#define hubbub_reg_init()\ - HUBBUB_REG_LIST_DCN32_RI(0) - -static const struct dcn_hubbub_shift hubbub_shift = { - HUBBUB_MASK_SH_LIST_DCN32(__SHIFT) -}; - -static const struct dcn_hubbub_mask hubbub_mask = { - HUBBUB_MASK_SH_LIST_DCN32(_MASK) -}; - -static struct dccg_registers dccg_regs; - -#define dccg_regs_init()\ - DCCG_REG_LIST_DCN32_RI() - -static const struct dccg_shift dccg_shift = { - DCCG_MASK_SH_LIST_DCN32(__SHIFT) -}; - -static const struct dccg_mask dccg_mask = { - DCCG_MASK_SH_LIST_DCN32(_MASK) -}; - - -#define SRII2(reg_name_pre, reg_name_post, id)\ - .reg_name_pre ## _ ## reg_name_post[id] = BASE(reg ## reg_name_pre \ - ## id ## _ ## reg_name_post ## _BASE_IDX) + \ - reg ## reg_name_pre ## id ## _ ## reg_name_post - - -#define HWSEQ_DCN32_REG_LIST()\ - SR(DCHUBBUB_GLOBAL_TIMER_CNTL), \ - SR(DIO_MEM_PWR_CTRL), \ - SR(ODM_MEM_PWR_CTRL3), \ - SR(MMHUBBUB_MEM_PWR_CNTL), \ - SR(DCCG_GATE_DISABLE_CNTL), \ - SR(DCCG_GATE_DISABLE_CNTL2), \ - SR(DCFCLK_CNTL),\ - SR(DC_MEM_GLOBAL_PWR_REQ_CNTL), \ - SRII(PIXEL_RATE_CNTL, OTG, 0), \ - SRII(PIXEL_RATE_CNTL, OTG, 1),\ - SRII(PIXEL_RATE_CNTL, OTG, 2),\ - SRII(PIXEL_RATE_CNTL, OTG, 3),\ - SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 0),\ - SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 1),\ - SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 2),\ - SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 3),\ - SR(MICROSECOND_TIME_BASE_DIV), \ - SR(MILLISECOND_TIME_BASE_DIV), \ - SR(DISPCLK_FREQ_CHANGE_CNTL), \ - SR(RBBMIF_TIMEOUT_DIS), \ - SR(RBBMIF_TIMEOUT_DIS_2), \ - SR(DCHUBBUB_CRC_CTRL), \ - SR(DPP_TOP0_DPP_CRC_CTRL), \ - SR(DPP_TOP0_DPP_CRC_VAL_B_A), \ - SR(DPP_TOP0_DPP_CRC_VAL_R_G), \ - SR(MPC_CRC_CTRL), \ - SR(MPC_CRC_RESULT_GB), \ - SR(MPC_CRC_RESULT_C), \ - SR(MPC_CRC_RESULT_AR), \ - SR(DOMAIN0_PG_CONFIG), \ - SR(DOMAIN1_PG_CONFIG), \ - SR(DOMAIN2_PG_CONFIG), \ - SR(DOMAIN3_PG_CONFIG), \ - SR(DOMAIN16_PG_CONFIG), \ - SR(DOMAIN17_PG_CONFIG), \ - SR(DOMAIN18_PG_CONFIG), \ - SR(DOMAIN19_PG_CONFIG), \ - SR(DOMAIN0_PG_STATUS), \ - SR(DOMAIN1_PG_STATUS), \ - SR(DOMAIN2_PG_STATUS), \ - SR(DOMAIN3_PG_STATUS), \ - SR(DOMAIN16_PG_STATUS), \ - SR(DOMAIN17_PG_STATUS), \ - SR(DOMAIN18_PG_STATUS), \ - SR(DOMAIN19_PG_STATUS), \ - SR(D1VGA_CONTROL), \ - SR(D2VGA_CONTROL), \ - SR(D3VGA_CONTROL), \ - SR(D4VGA_CONTROL), \ - SR(D5VGA_CONTROL), \ - SR(D6VGA_CONTROL), \ - SR(DC_IP_REQUEST_CNTL), \ - SR(AZALIA_AUDIO_DTO), \ - SR(AZALIA_CONTROLLER_CLOCK_GATING) - -static struct dce_hwseq_registers hwseq_reg; - -#define hwseq_reg_init()\ - HWSEQ_DCN32_REG_LIST() - -#define HWSEQ_DCN32_MASK_SH_LIST(mask_sh)\ - HWSEQ_DCN_MASK_SH_LIST(mask_sh), \ - HWS_SF(, DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_REFDIV, mask_sh), \ - HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN1_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN1_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN2_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN2_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN3_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN3_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN16_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN16_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN17_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN17_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN18_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN18_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN19_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN19_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN0_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN1_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN2_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN3_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN16_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN17_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN18_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN19_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DC_IP_REQUEST_CNTL, IP_REQUEST_EN, mask_sh), \ - HWS_SF(, AZALIA_AUDIO_DTO, AZALIA_AUDIO_DTO_MODULE, mask_sh), \ - HWS_SF(, HPO_TOP_CLOCK_CONTROL, HPO_HDMISTREAMCLK_G_GATE_DIS, mask_sh), \ - HWS_SF(, ODM_MEM_PWR_CTRL3, ODM_MEM_UNASSIGNED_PWR_MODE, mask_sh), \ - HWS_SF(, ODM_MEM_PWR_CTRL3, ODM_MEM_VBLANK_PWR_MODE, mask_sh), \ - HWS_SF(, MMHUBBUB_MEM_PWR_CNTL, VGA_MEM_PWR_FORCE, mask_sh) - -static const struct dce_hwseq_shift hwseq_shift = { - HWSEQ_DCN32_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_hwseq_mask hwseq_mask = { - HWSEQ_DCN32_MASK_SH_LIST(_MASK) -}; -#define vmid_regs_init(id)\ - DCN20_VMID_REG_LIST_RI(id) - -static struct dcn_vmid_registers vmid_regs[16]; - -static const struct dcn20_vmid_shift vmid_shifts = { - DCN20_VMID_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn20_vmid_mask vmid_masks = { - DCN20_VMID_MASK_SH_LIST(_MASK) -}; - -static const struct resource_caps res_cap_dcn32 = { - .num_timing_generator = 4, - .num_opp = 4, - .num_video_plane = 4, - .num_audio = 5, - .num_stream_encoder = 5, - .num_hpo_dp_stream_encoder = 4, - .num_hpo_dp_link_encoder = 2, - .num_pll = 5, - .num_dwb = 1, - .num_ddc = 5, - .num_vmid = 16, - .num_mpc_3dlut = 4, - .num_dsc = 4, -}; - -static const struct dc_plane_cap plane_cap = { - .type = DC_PLANE_TYPE_DCN_UNIVERSAL, - .per_pixel_alpha = true, - - .pixel_format_support = { - .argb8888 = true, - .nv12 = true, - .fp16 = true, - .p010 = true, - .ayuv = false, - }, - - .max_upscale_factor = { - .argb8888 = 16000, - .nv12 = 16000, - .fp16 = 16000 - }, - - // 6:1 downscaling ratio: 1000/6 = 166.666 - .max_downscale_factor = { - .argb8888 = 167, - .nv12 = 167, - .fp16 = 167 - }, - 64, - 64 -}; - -static const struct dc_debug_options debug_defaults_drv = { - .disable_dmcu = true, - .force_abm_enable = false, - .timing_trace = false, - .clock_trace = true, - .disable_pplib_clock_request = false, - .pipe_split_policy = MPC_SPLIT_AVOID, // Due to CRB, no need to MPC split anymore - .force_single_disp_pipe_split = false, - .disable_dcc = DCC_ENABLE, - .vsr_support = true, - .performance_trace = false, - .max_downscale_src_width = 7680,/*upto 8K*/ - .disable_pplib_wm_range = false, - .scl_reset_length10 = true, - .sanity_checks = false, - .underflow_assert_delay_us = 0xFFFFFFFF, - .dwb_fi_phase = -1, // -1 = disable, - .dmub_command_table = true, - .enable_mem_low_power = { - .bits = { - .vga = false, - .i2c = false, - .dmcu = false, // This is previously known to cause hang on S3 cycles if enabled - .dscl = false, - .cm = false, - .mpc = false, - .optc = true, - } - }, - .use_max_lb = true, - .force_disable_subvp = false, - .exit_idle_opt_for_cursor_updates = true, - .using_dml2 = false, - .enable_single_display_2to1_odm_policy = true, - - /* Must match enable_single_display_2to1_odm_policy to support dynamic ODM transitions*/ - .enable_double_buffered_dsc_pg_support = true, - .enable_dp_dig_pixel_rate_div_policy = 1, - .allow_sw_cursor_fallback = false, // Linux can't do SW cursor "fallback" - .alloc_extra_way_for_cursor = true, - .min_prefetch_in_strobe_ns = 60000, // 60us - .disable_unbounded_requesting = false, - .override_dispclk_programming = true, - .disable_fpo_optimizations = false, - .fpo_vactive_margin_us = 2000, // 2000us - .disable_fpo_vactive = false, - .disable_boot_optimizations = false, - .disable_subvp_high_refresh = false, - .disable_dp_plus_plus_wa = true, - .fpo_vactive_min_active_margin_us = 200, - .fpo_vactive_max_blank_us = 1000, - .enable_legacy_fast_update = false, -}; - -static struct dce_aux *dcn32_aux_engine_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct aux_engine_dce110 *aux_engine = - kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL); - - if (!aux_engine) - return NULL; - -#undef REG_STRUCT -#define REG_STRUCT aux_engine_regs - aux_engine_regs_init(0), - aux_engine_regs_init(1), - aux_engine_regs_init(2), - aux_engine_regs_init(3), - aux_engine_regs_init(4); - - dce110_aux_engine_construct(aux_engine, ctx, inst, - SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, - &aux_engine_regs[inst], - &aux_mask, - &aux_shift, - ctx->dc->caps.extended_aux_timeout_support); - - return &aux_engine->base; -} -#define i2c_inst_regs_init(id)\ - I2C_HW_ENGINE_COMMON_REG_LIST_DCN30_RI(id) - -static struct dce_i2c_registers i2c_hw_regs[5]; - -static const struct dce_i2c_shift i2c_shifts = { - I2C_COMMON_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dce_i2c_mask i2c_masks = { - I2C_COMMON_MASK_SH_LIST_DCN30(_MASK) -}; - -static struct dce_i2c_hw *dcn32_i2c_hw_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dce_i2c_hw *dce_i2c_hw = - kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL); - - if (!dce_i2c_hw) - return NULL; - -#undef REG_STRUCT -#define REG_STRUCT i2c_hw_regs - i2c_inst_regs_init(1), - i2c_inst_regs_init(2), - i2c_inst_regs_init(3), - i2c_inst_regs_init(4), - i2c_inst_regs_init(5); - - dcn2_i2c_hw_construct(dce_i2c_hw, ctx, inst, - &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks); - - return dce_i2c_hw; -} - -static struct clock_source *dcn32_clock_source_create( - struct dc_context *ctx, - struct dc_bios *bios, - enum clock_source_id id, - const struct dce110_clk_src_regs *regs, - bool dp_clk_src) -{ - struct dce110_clk_src *clk_src = - kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL); - - if (!clk_src) - return NULL; - - if (dcn31_clk_src_construct(clk_src, ctx, bios, id, - regs, &cs_shift, &cs_mask)) { - clk_src->base.dp_clk_src = dp_clk_src; - return &clk_src->base; - } - - kfree(clk_src); - BREAK_TO_DEBUGGER(); - return NULL; -} - -static struct hubbub *dcn32_hubbub_create(struct dc_context *ctx) -{ - int i; - - struct dcn20_hubbub *hubbub2 = kzalloc(sizeof(struct dcn20_hubbub), - GFP_KERNEL); - - if (!hubbub2) - return NULL; - -#undef REG_STRUCT -#define REG_STRUCT hubbub_reg - hubbub_reg_init(); - -#undef REG_STRUCT -#define REG_STRUCT vmid_regs - vmid_regs_init(0), - vmid_regs_init(1), - vmid_regs_init(2), - vmid_regs_init(3), - vmid_regs_init(4), - vmid_regs_init(5), - vmid_regs_init(6), - vmid_regs_init(7), - vmid_regs_init(8), - vmid_regs_init(9), - vmid_regs_init(10), - vmid_regs_init(11), - vmid_regs_init(12), - vmid_regs_init(13), - vmid_regs_init(14), - vmid_regs_init(15); - - hubbub32_construct(hubbub2, ctx, - &hubbub_reg, - &hubbub_shift, - &hubbub_mask, - ctx->dc->dml.ip.det_buffer_size_kbytes, - ctx->dc->dml.ip.pixel_chunk_size_kbytes, - ctx->dc->dml.ip.config_return_buffer_size_in_kbytes); - - - for (i = 0; i < res_cap_dcn32.num_vmid; i++) { - struct dcn20_vmid *vmid = &hubbub2->vmid[i]; - - vmid->ctx = ctx; - - vmid->regs = &vmid_regs[i]; - vmid->shifts = &vmid_shifts; - vmid->masks = &vmid_masks; - } - - return &hubbub2->base; -} - -static struct hubp *dcn32_hubp_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dcn20_hubp *hubp2 = - kzalloc(sizeof(struct dcn20_hubp), GFP_KERNEL); - - if (!hubp2) - return NULL; - -#undef REG_STRUCT -#define REG_STRUCT hubp_regs - hubp_regs_init(0), - hubp_regs_init(1), - hubp_regs_init(2), - hubp_regs_init(3); - - if (hubp32_construct(hubp2, ctx, inst, - &hubp_regs[inst], &hubp_shift, &hubp_mask)) - return &hubp2->base; - - BREAK_TO_DEBUGGER(); - kfree(hubp2); - return NULL; -} - -static void dcn32_dpp_destroy(struct dpp **dpp) -{ - kfree(TO_DCN30_DPP(*dpp)); - *dpp = NULL; -} - -static struct dpp *dcn32_dpp_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dcn3_dpp *dpp3 = - kzalloc(sizeof(struct dcn3_dpp), GFP_KERNEL); - - if (!dpp3) - return NULL; - -#undef REG_STRUCT -#define REG_STRUCT dpp_regs - dpp_regs_init(0), - dpp_regs_init(1), - dpp_regs_init(2), - dpp_regs_init(3); - - if (dpp32_construct(dpp3, ctx, inst, - &dpp_regs[inst], &tf_shift, &tf_mask)) - return &dpp3->base; - - BREAK_TO_DEBUGGER(); - kfree(dpp3); - return NULL; -} - -static struct mpc *dcn32_mpc_create( - struct dc_context *ctx, - int num_mpcc, - int num_rmu) -{ - struct dcn30_mpc *mpc30 = kzalloc(sizeof(struct dcn30_mpc), - GFP_KERNEL); - - if (!mpc30) - return NULL; - -#undef REG_STRUCT -#define REG_STRUCT mpc_regs - dcn_mpc_regs_init(); - - dcn32_mpc_construct(mpc30, ctx, - &mpc_regs, - &mpc_shift, - &mpc_mask, - num_mpcc, - num_rmu); - - return &mpc30->base; -} - -static struct output_pixel_processor *dcn32_opp_create( - struct dc_context *ctx, uint32_t inst) -{ - struct dcn20_opp *opp2 = - kzalloc(sizeof(struct dcn20_opp), GFP_KERNEL); - - if (!opp2) { - BREAK_TO_DEBUGGER(); - return NULL; - } - -#undef REG_STRUCT -#define REG_STRUCT opp_regs - opp_regs_init(0), - opp_regs_init(1), - opp_regs_init(2), - opp_regs_init(3); - - dcn20_opp_construct(opp2, ctx, inst, - &opp_regs[inst], &opp_shift, &opp_mask); - return &opp2->base; -} - - -static struct timing_generator *dcn32_timing_generator_create( - struct dc_context *ctx, - uint32_t instance) -{ - struct optc *tgn10 = - kzalloc(sizeof(struct optc), GFP_KERNEL); - - if (!tgn10) - return NULL; - -#undef REG_STRUCT -#define REG_STRUCT optc_regs - optc_regs_init(0), - optc_regs_init(1), - optc_regs_init(2), - optc_regs_init(3); - - tgn10->base.inst = instance; - tgn10->base.ctx = ctx; - - tgn10->tg_regs = &optc_regs[instance]; - tgn10->tg_shift = &optc_shift; - tgn10->tg_mask = &optc_mask; - - dcn32_timing_generator_init(tgn10); - - return &tgn10->base; -} - -static const struct encoder_feature_support link_enc_feature = { - .max_hdmi_deep_color = COLOR_DEPTH_121212, - .max_hdmi_pixel_clock = 600000, - .hdmi_ycbcr420_supported = true, - .dp_ycbcr420_supported = true, - .fec_supported = true, - .flags.bits.IS_HBR2_CAPABLE = true, - .flags.bits.IS_HBR3_CAPABLE = true, - .flags.bits.IS_TPS3_CAPABLE = true, - .flags.bits.IS_TPS4_CAPABLE = true -}; - -static struct link_encoder *dcn32_link_encoder_create( - struct dc_context *ctx, - const struct encoder_init_data *enc_init_data) -{ - struct dcn20_link_encoder *enc20 = - kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL); - - if (!enc20) - return NULL; - -#undef REG_STRUCT -#define REG_STRUCT link_enc_aux_regs - aux_regs_init(0), - aux_regs_init(1), - aux_regs_init(2), - aux_regs_init(3), - aux_regs_init(4); - -#undef REG_STRUCT -#define REG_STRUCT link_enc_hpd_regs - hpd_regs_init(0), - hpd_regs_init(1), - hpd_regs_init(2), - hpd_regs_init(3), - hpd_regs_init(4); - -#undef REG_STRUCT -#define REG_STRUCT link_enc_regs - link_regs_init(0, A), - link_regs_init(1, B), - link_regs_init(2, C), - link_regs_init(3, D), - link_regs_init(4, E); - - dcn32_link_encoder_construct(enc20, - enc_init_data, - &link_enc_feature, - &link_enc_regs[enc_init_data->transmitter], - &link_enc_aux_regs[enc_init_data->channel - 1], - &link_enc_hpd_regs[enc_init_data->hpd_source], - &le_shift, - &le_mask); - - return &enc20->enc10.base; -} - -struct panel_cntl *dcn32_panel_cntl_create(const struct panel_cntl_init_data *init_data) -{ - struct dcn31_panel_cntl *panel_cntl = - kzalloc(sizeof(struct dcn31_panel_cntl), GFP_KERNEL); - - if (!panel_cntl) - return NULL; - - dcn31_panel_cntl_construct(panel_cntl, init_data); - - return &panel_cntl->base; -} - -static void read_dce_straps( - struct dc_context *ctx, - struct resource_straps *straps) -{ - generic_reg_get(ctx, ctx->dcn_reg_offsets[regDC_PINSTRAPS_BASE_IDX] + regDC_PINSTRAPS, - FN(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO), &straps->dc_pinstraps_audio); - -} - -static struct audio *dcn32_create_audio( - struct dc_context *ctx, unsigned int inst) -{ - -#undef REG_STRUCT -#define REG_STRUCT audio_regs - audio_regs_init(0), - audio_regs_init(1), - audio_regs_init(2), - audio_regs_init(3), - audio_regs_init(4); - - return dce_audio_create(ctx, inst, - &audio_regs[inst], &audio_shift, &audio_mask); -} - -static struct vpg *dcn32_vpg_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dcn30_vpg *vpg3 = kzalloc(sizeof(struct dcn30_vpg), GFP_KERNEL); - - if (!vpg3) - return NULL; - -#undef REG_STRUCT -#define REG_STRUCT vpg_regs - vpg_regs_init(0), - vpg_regs_init(1), - vpg_regs_init(2), - vpg_regs_init(3), - vpg_regs_init(4), - vpg_regs_init(5), - vpg_regs_init(6), - vpg_regs_init(7), - vpg_regs_init(8), - vpg_regs_init(9); - - vpg3_construct(vpg3, ctx, inst, - &vpg_regs[inst], - &vpg_shift, - &vpg_mask); - - return &vpg3->base; -} - -static struct afmt *dcn32_afmt_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dcn30_afmt *afmt3 = kzalloc(sizeof(struct dcn30_afmt), GFP_KERNEL); - - if (!afmt3) - return NULL; - -#undef REG_STRUCT -#define REG_STRUCT afmt_regs - afmt_regs_init(0), - afmt_regs_init(1), - afmt_regs_init(2), - afmt_regs_init(3), - afmt_regs_init(4), - afmt_regs_init(5); - - afmt3_construct(afmt3, ctx, inst, - &afmt_regs[inst], - &afmt_shift, - &afmt_mask); - - return &afmt3->base; -} - -static struct apg *dcn31_apg_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dcn31_apg *apg31 = kzalloc(sizeof(struct dcn31_apg), GFP_KERNEL); - - if (!apg31) - return NULL; - -#undef REG_STRUCT -#define REG_STRUCT apg_regs - apg_regs_init(0), - apg_regs_init(1), - apg_regs_init(2), - apg_regs_init(3); - - apg31_construct(apg31, ctx, inst, - &apg_regs[inst], - &apg_shift, - &apg_mask); - - return &apg31->base; -} - -static struct stream_encoder *dcn32_stream_encoder_create( - enum engine_id eng_id, - struct dc_context *ctx) -{ - struct dcn10_stream_encoder *enc1; - struct vpg *vpg; - struct afmt *afmt; - int vpg_inst; - int afmt_inst; - - /* Mapping of VPG, AFMT, DME register blocks to DIO block instance */ - if (eng_id <= ENGINE_ID_DIGF) { - vpg_inst = eng_id; - afmt_inst = eng_id; - } else - return NULL; - - enc1 = kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL); - vpg = dcn32_vpg_create(ctx, vpg_inst); - afmt = dcn32_afmt_create(ctx, afmt_inst); - - if (!enc1 || !vpg || !afmt) { - kfree(enc1); - kfree(vpg); - kfree(afmt); - return NULL; - } - -#undef REG_STRUCT -#define REG_STRUCT stream_enc_regs - stream_enc_regs_init(0), - stream_enc_regs_init(1), - stream_enc_regs_init(2), - stream_enc_regs_init(3), - stream_enc_regs_init(4); - - dcn32_dio_stream_encoder_construct(enc1, ctx, ctx->dc_bios, - eng_id, vpg, afmt, - &stream_enc_regs[eng_id], - &se_shift, &se_mask); - - return &enc1->base; -} - -static struct hpo_dp_stream_encoder *dcn32_hpo_dp_stream_encoder_create( - enum engine_id eng_id, - struct dc_context *ctx) -{ - struct dcn31_hpo_dp_stream_encoder *hpo_dp_enc31; - struct vpg *vpg; - struct apg *apg; - uint32_t hpo_dp_inst; - uint32_t vpg_inst; - uint32_t apg_inst; - - ASSERT((eng_id >= ENGINE_ID_HPO_DP_0) && (eng_id <= ENGINE_ID_HPO_DP_3)); - hpo_dp_inst = eng_id - ENGINE_ID_HPO_DP_0; - - /* Mapping of VPG register blocks to HPO DP block instance: - * VPG[6] -> HPO_DP[0] - * VPG[7] -> HPO_DP[1] - * VPG[8] -> HPO_DP[2] - * VPG[9] -> HPO_DP[3] - */ - vpg_inst = hpo_dp_inst + 6; - - /* Mapping of APG register blocks to HPO DP block instance: - * APG[0] -> HPO_DP[0] - * APG[1] -> HPO_DP[1] - * APG[2] -> HPO_DP[2] - * APG[3] -> HPO_DP[3] - */ - apg_inst = hpo_dp_inst; - - /* allocate HPO stream encoder and create VPG sub-block */ - hpo_dp_enc31 = kzalloc(sizeof(struct dcn31_hpo_dp_stream_encoder), GFP_KERNEL); - vpg = dcn32_vpg_create(ctx, vpg_inst); - apg = dcn31_apg_create(ctx, apg_inst); - - if (!hpo_dp_enc31 || !vpg || !apg) { - kfree(hpo_dp_enc31); - kfree(vpg); - kfree(apg); - return NULL; - } - -#undef REG_STRUCT -#define REG_STRUCT hpo_dp_stream_enc_regs - hpo_dp_stream_encoder_reg_init(0), - hpo_dp_stream_encoder_reg_init(1), - hpo_dp_stream_encoder_reg_init(2), - hpo_dp_stream_encoder_reg_init(3); - - dcn31_hpo_dp_stream_encoder_construct(hpo_dp_enc31, ctx, ctx->dc_bios, - hpo_dp_inst, eng_id, vpg, apg, - &hpo_dp_stream_enc_regs[hpo_dp_inst], - &hpo_dp_se_shift, &hpo_dp_se_mask); - - return &hpo_dp_enc31->base; -} - -static struct hpo_dp_link_encoder *dcn32_hpo_dp_link_encoder_create( - uint8_t inst, - struct dc_context *ctx) -{ - struct dcn31_hpo_dp_link_encoder *hpo_dp_enc31; - - /* allocate HPO link encoder */ - hpo_dp_enc31 = kzalloc(sizeof(struct dcn31_hpo_dp_link_encoder), GFP_KERNEL); - -#undef REG_STRUCT -#define REG_STRUCT hpo_dp_link_enc_regs - hpo_dp_link_encoder_reg_init(0), - hpo_dp_link_encoder_reg_init(1); - - hpo_dp_link_encoder32_construct(hpo_dp_enc31, ctx, inst, - &hpo_dp_link_enc_regs[inst], - &hpo_dp_le_shift, &hpo_dp_le_mask); - - return &hpo_dp_enc31->base; -} - -static struct dce_hwseq *dcn32_hwseq_create( - struct dc_context *ctx) -{ - struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL); - -#undef REG_STRUCT -#define REG_STRUCT hwseq_reg - hwseq_reg_init(); - - if (hws) { - hws->ctx = ctx; - hws->regs = &hwseq_reg; - hws->shifts = &hwseq_shift; - hws->masks = &hwseq_mask; - } - return hws; -} -static const struct resource_create_funcs res_create_funcs = { - .read_dce_straps = read_dce_straps, - .create_audio = dcn32_create_audio, - .create_stream_encoder = dcn32_stream_encoder_create, - .create_hpo_dp_stream_encoder = dcn32_hpo_dp_stream_encoder_create, - .create_hpo_dp_link_encoder = dcn32_hpo_dp_link_encoder_create, - .create_hwseq = dcn32_hwseq_create, -}; - -static void dcn32_resource_destruct(struct dcn32_resource_pool *pool) -{ - unsigned int i; - - for (i = 0; i < pool->base.stream_enc_count; i++) { - if (pool->base.stream_enc[i] != NULL) { - if (pool->base.stream_enc[i]->vpg != NULL) { - kfree(DCN30_VPG_FROM_VPG(pool->base.stream_enc[i]->vpg)); - pool->base.stream_enc[i]->vpg = NULL; - } - if (pool->base.stream_enc[i]->afmt != NULL) { - kfree(DCN30_AFMT_FROM_AFMT(pool->base.stream_enc[i]->afmt)); - pool->base.stream_enc[i]->afmt = NULL; - } - kfree(DCN10STRENC_FROM_STRENC(pool->base.stream_enc[i])); - pool->base.stream_enc[i] = NULL; - } - } - - for (i = 0; i < pool->base.hpo_dp_stream_enc_count; i++) { - if (pool->base.hpo_dp_stream_enc[i] != NULL) { - if (pool->base.hpo_dp_stream_enc[i]->vpg != NULL) { - kfree(DCN30_VPG_FROM_VPG(pool->base.hpo_dp_stream_enc[i]->vpg)); - pool->base.hpo_dp_stream_enc[i]->vpg = NULL; - } - if (pool->base.hpo_dp_stream_enc[i]->apg != NULL) { - kfree(DCN31_APG_FROM_APG(pool->base.hpo_dp_stream_enc[i]->apg)); - pool->base.hpo_dp_stream_enc[i]->apg = NULL; - } - kfree(DCN3_1_HPO_DP_STREAM_ENC_FROM_HPO_STREAM_ENC(pool->base.hpo_dp_stream_enc[i])); - pool->base.hpo_dp_stream_enc[i] = NULL; - } - } - - for (i = 0; i < pool->base.hpo_dp_link_enc_count; i++) { - if (pool->base.hpo_dp_link_enc[i] != NULL) { - kfree(DCN3_1_HPO_DP_LINK_ENC_FROM_HPO_LINK_ENC(pool->base.hpo_dp_link_enc[i])); - pool->base.hpo_dp_link_enc[i] = NULL; - } - } - - for (i = 0; i < pool->base.res_cap->num_dsc; i++) { - if (pool->base.dscs[i] != NULL) - dcn20_dsc_destroy(&pool->base.dscs[i]); - } - - if (pool->base.mpc != NULL) { - kfree(TO_DCN20_MPC(pool->base.mpc)); - pool->base.mpc = NULL; - } - if (pool->base.hubbub != NULL) { - kfree(TO_DCN20_HUBBUB(pool->base.hubbub)); - pool->base.hubbub = NULL; - } - for (i = 0; i < pool->base.pipe_count; i++) { - if (pool->base.dpps[i] != NULL) - dcn32_dpp_destroy(&pool->base.dpps[i]); - - if (pool->base.ipps[i] != NULL) - pool->base.ipps[i]->funcs->ipp_destroy(&pool->base.ipps[i]); - - if (pool->base.hubps[i] != NULL) { - kfree(TO_DCN20_HUBP(pool->base.hubps[i])); - pool->base.hubps[i] = NULL; - } - - if (pool->base.irqs != NULL) { - dal_irq_service_destroy(&pool->base.irqs); - } - } - - for (i = 0; i < pool->base.res_cap->num_ddc; i++) { - if (pool->base.engines[i] != NULL) - dce110_engine_destroy(&pool->base.engines[i]); - if (pool->base.hw_i2cs[i] != NULL) { - kfree(pool->base.hw_i2cs[i]); - pool->base.hw_i2cs[i] = NULL; - } - if (pool->base.sw_i2cs[i] != NULL) { - kfree(pool->base.sw_i2cs[i]); - pool->base.sw_i2cs[i] = NULL; - } - } - - for (i = 0; i < pool->base.res_cap->num_opp; i++) { - if (pool->base.opps[i] != NULL) - pool->base.opps[i]->funcs->opp_destroy(&pool->base.opps[i]); - } - - for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { - if (pool->base.timing_generators[i] != NULL) { - kfree(DCN10TG_FROM_TG(pool->base.timing_generators[i])); - pool->base.timing_generators[i] = NULL; - } - } - - for (i = 0; i < pool->base.res_cap->num_dwb; i++) { - if (pool->base.dwbc[i] != NULL) { - kfree(TO_DCN30_DWBC(pool->base.dwbc[i])); - pool->base.dwbc[i] = NULL; - } - if (pool->base.mcif_wb[i] != NULL) { - kfree(TO_DCN30_MMHUBBUB(pool->base.mcif_wb[i])); - pool->base.mcif_wb[i] = NULL; - } - } - - for (i = 0; i < pool->base.audio_count; i++) { - if (pool->base.audios[i]) - dce_aud_destroy(&pool->base.audios[i]); - } - - for (i = 0; i < pool->base.clk_src_count; i++) { - if (pool->base.clock_sources[i] != NULL) { - dcn20_clock_source_destroy(&pool->base.clock_sources[i]); - pool->base.clock_sources[i] = NULL; - } - } - - for (i = 0; i < pool->base.res_cap->num_mpc_3dlut; i++) { - if (pool->base.mpc_lut[i] != NULL) { - dc_3dlut_func_release(pool->base.mpc_lut[i]); - pool->base.mpc_lut[i] = NULL; - } - if (pool->base.mpc_shaper[i] != NULL) { - dc_transfer_func_release(pool->base.mpc_shaper[i]); - pool->base.mpc_shaper[i] = NULL; - } - } - - if (pool->base.dp_clock_source != NULL) { - dcn20_clock_source_destroy(&pool->base.dp_clock_source); - pool->base.dp_clock_source = NULL; - } - - for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { - if (pool->base.multiple_abms[i] != NULL) - dce_abm_destroy(&pool->base.multiple_abms[i]); - } - - if (pool->base.psr != NULL) - dmub_psr_destroy(&pool->base.psr); - - if (pool->base.dccg != NULL) - dcn_dccg_destroy(&pool->base.dccg); - - if (pool->base.oem_device != NULL) { - struct dc *dc = pool->base.oem_device->ctx->dc; - - dc->link_srv->destroy_ddc_service(&pool->base.oem_device); - } -} - - -static bool dcn32_dwbc_create(struct dc_context *ctx, struct resource_pool *pool) -{ - int i; - uint32_t dwb_count = pool->res_cap->num_dwb; - - for (i = 0; i < dwb_count; i++) { - struct dcn30_dwbc *dwbc30 = kzalloc(sizeof(struct dcn30_dwbc), - GFP_KERNEL); - - if (!dwbc30) { - dm_error("DC: failed to create dwbc30!\n"); - return false; - } - -#undef REG_STRUCT -#define REG_STRUCT dwbc30_regs - dwbc_regs_dcn3_init(0); - - dcn30_dwbc_construct(dwbc30, ctx, - &dwbc30_regs[i], - &dwbc30_shift, - &dwbc30_mask, - i); - - pool->dwbc[i] = &dwbc30->base; - } - return true; -} - -static bool dcn32_mmhubbub_create(struct dc_context *ctx, struct resource_pool *pool) -{ - int i; - uint32_t dwb_count = pool->res_cap->num_dwb; - - for (i = 0; i < dwb_count; i++) { - struct dcn30_mmhubbub *mcif_wb30 = kzalloc(sizeof(struct dcn30_mmhubbub), - GFP_KERNEL); - - if (!mcif_wb30) { - dm_error("DC: failed to create mcif_wb30!\n"); - return false; - } - -#undef REG_STRUCT -#define REG_STRUCT mcif_wb30_regs - mcif_wb_regs_dcn3_init(0); - - dcn32_mmhubbub_construct(mcif_wb30, ctx, - &mcif_wb30_regs[i], - &mcif_wb30_shift, - &mcif_wb30_mask, - i); - - pool->mcif_wb[i] = &mcif_wb30->base; - } - return true; -} - -static struct display_stream_compressor *dcn32_dsc_create( - struct dc_context *ctx, uint32_t inst) -{ - struct dcn20_dsc *dsc = - kzalloc(sizeof(struct dcn20_dsc), GFP_KERNEL); - - if (!dsc) { - BREAK_TO_DEBUGGER(); - return NULL; - } - -#undef REG_STRUCT -#define REG_STRUCT dsc_regs - dsc_regsDCN20_init(0), - dsc_regsDCN20_init(1), - dsc_regsDCN20_init(2), - dsc_regsDCN20_init(3); - - dsc2_construct(dsc, ctx, inst, &dsc_regs[inst], &dsc_shift, &dsc_mask); - - dsc->max_image_width = 6016; - - return &dsc->base; -} - -static void dcn32_destroy_resource_pool(struct resource_pool **pool) -{ - struct dcn32_resource_pool *dcn32_pool = TO_DCN32_RES_POOL(*pool); - - dcn32_resource_destruct(dcn32_pool); - kfree(dcn32_pool); - *pool = NULL; -} - -bool dcn32_acquire_post_bldn_3dlut( - struct resource_context *res_ctx, - const struct resource_pool *pool, - int mpcc_id, - struct dc_3dlut **lut, - struct dc_transfer_func **shaper) -{ - bool ret = false; - - ASSERT(*lut == NULL && *shaper == NULL); - *lut = NULL; - *shaper = NULL; - - if (!res_ctx->is_mpc_3dlut_acquired[mpcc_id]) { - *lut = pool->mpc_lut[mpcc_id]; - *shaper = pool->mpc_shaper[mpcc_id]; - res_ctx->is_mpc_3dlut_acquired[mpcc_id] = true; - ret = true; - } - return ret; -} - -bool dcn32_release_post_bldn_3dlut( - struct resource_context *res_ctx, - const struct resource_pool *pool, - struct dc_3dlut **lut, - struct dc_transfer_func **shaper) -{ - int i; - bool ret = false; - - for (i = 0; i < pool->res_cap->num_mpc_3dlut; i++) { - if (pool->mpc_lut[i] == *lut && pool->mpc_shaper[i] == *shaper) { - res_ctx->is_mpc_3dlut_acquired[i] = false; - pool->mpc_lut[i]->state.raw = 0; - *lut = NULL; - *shaper = NULL; - ret = true; - break; - } - } - return ret; -} - -static void dcn32_enable_phantom_plane(struct dc *dc, - struct dc_state *context, - struct dc_stream_state *phantom_stream, - unsigned int dc_pipe_idx) -{ - struct dc_plane_state *phantom_plane = NULL; - struct dc_plane_state *prev_phantom_plane = NULL; - struct pipe_ctx *curr_pipe = &context->res_ctx.pipe_ctx[dc_pipe_idx]; - - while (curr_pipe) { - if (curr_pipe->top_pipe && curr_pipe->top_pipe->plane_state == curr_pipe->plane_state) - phantom_plane = prev_phantom_plane; - else - phantom_plane = dc_create_plane_state(dc); - - memcpy(&phantom_plane->address, &curr_pipe->plane_state->address, sizeof(phantom_plane->address)); - memcpy(&phantom_plane->scaling_quality, &curr_pipe->plane_state->scaling_quality, - sizeof(phantom_plane->scaling_quality)); - memcpy(&phantom_plane->src_rect, &curr_pipe->plane_state->src_rect, sizeof(phantom_plane->src_rect)); - memcpy(&phantom_plane->dst_rect, &curr_pipe->plane_state->dst_rect, sizeof(phantom_plane->dst_rect)); - memcpy(&phantom_plane->clip_rect, &curr_pipe->plane_state->clip_rect, sizeof(phantom_plane->clip_rect)); - memcpy(&phantom_plane->plane_size, &curr_pipe->plane_state->plane_size, - sizeof(phantom_plane->plane_size)); - memcpy(&phantom_plane->tiling_info, &curr_pipe->plane_state->tiling_info, - sizeof(phantom_plane->tiling_info)); - memcpy(&phantom_plane->dcc, &curr_pipe->plane_state->dcc, sizeof(phantom_plane->dcc)); - phantom_plane->format = curr_pipe->plane_state->format; - phantom_plane->rotation = curr_pipe->plane_state->rotation; - phantom_plane->visible = curr_pipe->plane_state->visible; - - /* Shadow pipe has small viewport. */ - phantom_plane->clip_rect.y = 0; - phantom_plane->clip_rect.height = phantom_stream->src.height; - - phantom_plane->is_phantom = true; - - dc_add_plane_to_context(dc, phantom_stream, phantom_plane, context); - - curr_pipe = curr_pipe->bottom_pipe; - prev_phantom_plane = phantom_plane; - } -} - -static struct dc_stream_state *dcn32_enable_phantom_stream(struct dc *dc, - struct dc_state *context, - display_e2e_pipe_params_st *pipes, - unsigned int pipe_cnt, - unsigned int dc_pipe_idx) -{ - struct dc_stream_state *phantom_stream = NULL; - struct pipe_ctx *ref_pipe = &context->res_ctx.pipe_ctx[dc_pipe_idx]; - - phantom_stream = dc_create_stream_for_sink(ref_pipe->stream->sink); - phantom_stream->signal = SIGNAL_TYPE_VIRTUAL; - phantom_stream->dpms_off = true; - phantom_stream->mall_stream_config.type = SUBVP_PHANTOM; - phantom_stream->mall_stream_config.paired_stream = ref_pipe->stream; - ref_pipe->stream->mall_stream_config.type = SUBVP_MAIN; - ref_pipe->stream->mall_stream_config.paired_stream = phantom_stream; - - /* stream has limited viewport and small timing */ - memcpy(&phantom_stream->timing, &ref_pipe->stream->timing, sizeof(phantom_stream->timing)); - memcpy(&phantom_stream->src, &ref_pipe->stream->src, sizeof(phantom_stream->src)); - memcpy(&phantom_stream->dst, &ref_pipe->stream->dst, sizeof(phantom_stream->dst)); - DC_FP_START(); - dcn32_set_phantom_stream_timing(dc, context, ref_pipe, phantom_stream, pipes, pipe_cnt, dc_pipe_idx); - DC_FP_END(); - - dc_add_stream_to_ctx(dc, context, phantom_stream); - return phantom_stream; -} - -void dcn32_retain_phantom_pipes(struct dc *dc, struct dc_state *context) -{ - int i; - struct dc_plane_state *phantom_plane = NULL; - struct dc_stream_state *phantom_stream = NULL; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - if (resource_is_pipe_type(pipe, OTG_MASTER) && - resource_is_pipe_type(pipe, DPP_PIPE) && - pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { - phantom_plane = pipe->plane_state; - phantom_stream = pipe->stream; - - dc_plane_state_retain(phantom_plane); - dc_stream_retain(phantom_stream); - } - } -} - -// return true if removed piped from ctx, false otherwise -bool dcn32_remove_phantom_pipes(struct dc *dc, struct dc_state *context, bool fast_update) -{ - int i; - bool removed_pipe = false; - struct dc_plane_state *phantom_plane = NULL; - struct dc_stream_state *phantom_stream = NULL; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - // build scaling params for phantom pipes - if (pipe->plane_state && pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { - phantom_plane = pipe->plane_state; - phantom_stream = pipe->stream; - - dc_rem_all_planes_for_stream(dc, pipe->stream, context); - dc_remove_stream_from_ctx(dc, context, pipe->stream); - - /* Ref count is incremented on allocation and also when added to the context. - * Therefore we must call release for the the phantom plane and stream once - * they are removed from the ctx to finally decrement the refcount to 0 to free. - */ - dc_plane_state_release(phantom_plane); - dc_stream_release(phantom_stream); - - removed_pipe = true; - } - - /* For non-full updates, a shallow copy of the current state - * is created. In this case we don't want to erase the current - * state (there can be 2 HIRQL threads, one in flip, and one in - * checkMPO) that can cause a race condition. - * - * This is just a workaround, needs a proper fix. - */ - if (!fast_update) { - // Clear all phantom stream info - if (pipe->stream) { - pipe->stream->mall_stream_config.type = SUBVP_NONE; - pipe->stream->mall_stream_config.paired_stream = NULL; - } - - if (pipe->plane_state) { - pipe->plane_state->is_phantom = false; - } - } - } - return removed_pipe; -} - -/* TODO: Input to this function should indicate which pipe indexes (or streams) - * require a phantom pipe / stream - */ -void dcn32_add_phantom_pipes(struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes, - unsigned int pipe_cnt, - unsigned int index) -{ - struct dc_stream_state *phantom_stream = NULL; - unsigned int i; - - // The index of the DC pipe passed into this function is guarenteed to - // be a valid candidate for SubVP (i.e. has a plane, stream, doesn't - // already have phantom pipe assigned, etc.) by previous checks. - phantom_stream = dcn32_enable_phantom_stream(dc, context, pipes, pipe_cnt, index); - dcn32_enable_phantom_plane(dc, context, phantom_stream, index); - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - // Build scaling params for phantom pipes which were newly added. - // We determine which phantom pipes were added by comparing with - // the phantom stream. - if (pipe->plane_state && pipe->stream && pipe->stream == phantom_stream && - pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { - pipe->stream->use_dynamic_meta = false; - pipe->plane_state->flip_immediate = false; - if (!resource_build_scaling_params(pipe)) { - // Log / remove phantom pipes since failed to build scaling params - } - } - } -} - -static bool dml1_validate(struct dc *dc, struct dc_state *context, bool fast_validate) -{ - bool out = false; - - BW_VAL_TRACE_SETUP(); - - int vlevel = 0; - int pipe_cnt = 0; - display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_KERNEL); - struct mall_temp_config mall_temp_config; - - /* To handle Freesync properly, setting FreeSync DML parameters - * to its default state for the first stage of validation - */ - context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = false; - context->bw_ctx.dml.soc.dram_clock_change_requirement_final = true; - - DC_LOGGER_INIT(dc->ctx->logger); - - /* For fast validation, there are situations where a shallow copy of - * of the dc->current_state is created for the validation. In this case - * we want to save and restore the mall config because we always - * teardown subvp at the beginning of validation (and don't attempt - * to add it back if it's fast validation). If we don't restore the - * subvp config in cases of fast validation + shallow copy of the - * dc->current_state, the dc->current_state will have a partially - * removed subvp state when we did not intend to remove it. - */ - if (fast_validate) { - memset(&mall_temp_config, 0, sizeof(mall_temp_config)); - dcn32_save_mall_state(dc, context, &mall_temp_config); - } - - BW_VAL_TRACE_COUNT(); - - DC_FP_START(); - out = dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate); - DC_FP_END(); - - if (fast_validate) - dcn32_restore_mall_state(dc, context, &mall_temp_config); - - if (pipe_cnt == 0) - goto validate_out; - - if (!out) - goto validate_fail; - - BW_VAL_TRACE_END_VOLTAGE_LEVEL(); - - if (fast_validate) { - BW_VAL_TRACE_SKIP(fast); - goto validate_out; - } - - dc->res_pool->funcs->calculate_wm_and_dlg(dc, context, pipes, pipe_cnt, vlevel); - - dcn32_override_min_req_memclk(dc, context); - - BW_VAL_TRACE_END_WATERMARKS(); - - goto validate_out; - -validate_fail: - DC_LOG_WARNING("Mode Validation Warning: %s failed validation.\n", - dml_get_status_message(context->bw_ctx.dml.vba.ValidationStatus[context->bw_ctx.dml.vba.soc.num_states])); - - BW_VAL_TRACE_SKIP(fail); - out = false; - -validate_out: - kfree(pipes); - - BW_VAL_TRACE_FINISH(); - - return out; -} - -bool dcn32_validate_bandwidth(struct dc *dc, - struct dc_state *context, - bool fast_validate) -{ - bool out = false; - - if (dc->debug.using_dml2) - out = dml2_validate(dc, context, fast_validate); - else - out = dml1_validate(dc, context, fast_validate); - return out; -} - -int dcn32_populate_dml_pipes_from_context( - struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes, - bool fast_validate) -{ - int i, pipe_cnt; - struct resource_context *res_ctx = &context->res_ctx; - struct pipe_ctx *pipe = NULL; - bool subvp_in_use = false; - struct dc_crtc_timing *timing; - - dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); - - for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { - - if (!res_ctx->pipe_ctx[i].stream) - continue; - pipe = &res_ctx->pipe_ctx[i]; - timing = &pipe->stream->timing; - - pipes[pipe_cnt].pipe.src.gpuvm = true; - DC_FP_START(); - dcn32_zero_pipe_dcc_fraction(pipes, pipe_cnt); - DC_FP_END(); - pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch; - pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_dal; - pipes[pipe_cnt].pipe.src.gpuvm_min_page_size_kbytes = 256; // according to spreadsheet - pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; - pipes[pipe_cnt].pipe.scale_ratio_depth.lb_depth = dm_lb_19; - - /* Only populate DML input with subvp info for full updates. - * This is just a workaround -- needs a proper fix. - */ - if (!fast_validate) { - switch (pipe->stream->mall_stream_config.type) { - case SUBVP_MAIN: - pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_sub_viewport; - subvp_in_use = true; - break; - case SUBVP_PHANTOM: - pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_phantom_pipe; - pipes[pipe_cnt].pipe.src.use_mall_for_static_screen = dm_use_mall_static_screen_disable; - // Disallow unbounded req for SubVP according to DCHUB programming guide - pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; - break; - case SUBVP_NONE: - pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_disable; - pipes[pipe_cnt].pipe.src.use_mall_for_static_screen = dm_use_mall_static_screen_disable; - break; - default: - break; - } - } - - pipes[pipe_cnt].dout.dsc_input_bpc = 0; - if (pipes[pipe_cnt].dout.dsc_enable) { - switch (timing->display_color_depth) { - case COLOR_DEPTH_888: - pipes[pipe_cnt].dout.dsc_input_bpc = 8; - break; - case COLOR_DEPTH_101010: - pipes[pipe_cnt].dout.dsc_input_bpc = 10; - break; - case COLOR_DEPTH_121212: - pipes[pipe_cnt].dout.dsc_input_bpc = 12; - break; - default: - ASSERT(0); - break; - } - } - - - pipe_cnt++; - } - - /* For DET allocation, we don't want to use DML policy (not optimal for utilizing all - * the DET available for each pipe). Use the DET override input to maintain our driver - * policy. - */ - dcn32_set_det_allocations(dc, context, pipes); - - // In general cases we want to keep the dram clock change requirement - // (prefer configs that support MCLK switch). Only override to false - // for SubVP - if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching || subvp_in_use) - context->bw_ctx.dml.soc.dram_clock_change_requirement_final = false; - else - context->bw_ctx.dml.soc.dram_clock_change_requirement_final = true; - - return pipe_cnt; -} - -static struct dc_cap_funcs cap_funcs = { - .get_dcc_compression_cap = dcn20_get_dcc_compression_cap, - .get_subvp_en = dcn32_subvp_in_use, -}; - -void dcn32_calculate_wm_and_dlg(struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int pipe_cnt, - int vlevel) -{ - DC_FP_START(); - dcn32_calculate_wm_and_dlg_fpu(dc, context, pipes, pipe_cnt, vlevel); - DC_FP_END(); -} - -static void dcn32_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) -{ - DC_FP_START(); - dcn32_update_bw_bounding_box_fpu(dc, bw_params); - DC_FP_END(); -} - -static struct resource_funcs dcn32_res_pool_funcs = { - .destroy = dcn32_destroy_resource_pool, - .link_enc_create = dcn32_link_encoder_create, - .link_enc_create_minimal = NULL, - .panel_cntl_create = dcn32_panel_cntl_create, - .validate_bandwidth = dcn32_validate_bandwidth, - .calculate_wm_and_dlg = dcn32_calculate_wm_and_dlg, - .populate_dml_pipes = dcn32_populate_dml_pipes_from_context, - .acquire_free_pipe_as_secondary_dpp_pipe = dcn32_acquire_free_pipe_as_secondary_dpp_pipe, - .acquire_free_pipe_as_secondary_opp_head = dcn32_acquire_free_pipe_as_secondary_opp_head, - .release_pipe = dcn20_release_pipe, - .add_stream_to_ctx = dcn30_add_stream_to_ctx, - .add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource, - .remove_stream_from_ctx = dcn20_remove_stream_from_ctx, - .populate_dml_writeback_from_context = dcn30_populate_dml_writeback_from_context, - .set_mcif_arb_params = dcn30_set_mcif_arb_params, - .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link, - .acquire_post_bldn_3dlut = dcn32_acquire_post_bldn_3dlut, - .release_post_bldn_3dlut = dcn32_release_post_bldn_3dlut, - .update_bw_bounding_box = dcn32_update_bw_bounding_box, - .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, - .update_soc_for_wm_a = dcn30_update_soc_for_wm_a, - .add_phantom_pipes = dcn32_add_phantom_pipes, - .remove_phantom_pipes = dcn32_remove_phantom_pipes, - .retain_phantom_pipes = dcn32_retain_phantom_pipes, - .save_mall_state = dcn32_save_mall_state, - .restore_mall_state = dcn32_restore_mall_state, -}; - -static uint32_t read_pipe_fuses(struct dc_context *ctx) -{ - uint32_t value = REG_READ(CC_DC_PIPE_DIS); - /* DCN32 support max 4 pipes */ - value = value & 0xf; - return value; -} - - -static bool dcn32_resource_construct( - uint8_t num_virtual_links, - struct dc *dc, - struct dcn32_resource_pool *pool) -{ - int i, j; - struct dc_context *ctx = dc->ctx; - struct irq_service_init_data init_data; - struct ddc_service_init_data ddc_init_data = {0}; - uint32_t pipe_fuses = 0; - uint32_t num_pipes = 4; - -#undef REG_STRUCT -#define REG_STRUCT bios_regs - bios_regs_init(); - -#undef REG_STRUCT -#define REG_STRUCT clk_src_regs - clk_src_regs_init(0, A), - clk_src_regs_init(1, B), - clk_src_regs_init(2, C), - clk_src_regs_init(3, D), - clk_src_regs_init(4, E); - -#undef REG_STRUCT -#define REG_STRUCT abm_regs - abm_regs_init(0), - abm_regs_init(1), - abm_regs_init(2), - abm_regs_init(3); - -#undef REG_STRUCT -#define REG_STRUCT dccg_regs - dccg_regs_init(); - - DC_FP_START(); - - ctx->dc_bios->regs = &bios_regs; - - pool->base.res_cap = &res_cap_dcn32; - /* max number of pipes for ASIC before checking for pipe fuses */ - num_pipes = pool->base.res_cap->num_timing_generator; - pipe_fuses = read_pipe_fuses(ctx); - - for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) - if (pipe_fuses & 1 << i) - num_pipes--; - - if (pipe_fuses & 1) - ASSERT(0); //Unexpected - Pipe 0 should always be fully functional! - - if (pipe_fuses & CC_DC_PIPE_DIS__DC_FULL_DIS_MASK) - ASSERT(0); //Entire DCN is harvested! - - /* within dml lib, initial value is hard coded, if ASIC pipe is fused, the - * value will be changed, update max_num_dpp and max_num_otg for dml. - */ - dcn3_2_ip.max_num_dpp = num_pipes; - dcn3_2_ip.max_num_otg = num_pipes; - - pool->base.funcs = &dcn32_res_pool_funcs; - - /************************************************* - * Resource + asic cap harcoding * - *************************************************/ - pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE; - pool->base.timing_generator_count = num_pipes; - pool->base.pipe_count = num_pipes; - pool->base.mpcc_count = num_pipes; - dc->caps.max_downscale_ratio = 600; - dc->caps.i2c_speed_in_khz = 100; - dc->caps.i2c_speed_in_khz_hdcp = 100; /*1.4 w/a applied by default*/ - /* TODO: Bring max_cursor_size back to 256 after subvp cursor corruption is fixed*/ - dc->caps.max_cursor_size = 64; - dc->caps.min_horizontal_blanking_period = 80; - dc->caps.dmdata_alloc_size = 2048; - dc->caps.mall_size_per_mem_channel = 4; - dc->caps.mall_size_total = 0; - dc->caps.cursor_cache_size = dc->caps.max_cursor_size * dc->caps.max_cursor_size * 8; - - dc->caps.cache_line_size = 64; - dc->caps.cache_num_ways = 16; - - /* Calculate the available MALL space */ - dc->caps.max_cab_allocation_bytes = dcn32_calc_num_avail_chans_for_mall( - dc, dc->ctx->dc_bios->vram_info.num_chans) * - dc->caps.mall_size_per_mem_channel * 1024 * 1024; - dc->caps.mall_size_total = dc->caps.max_cab_allocation_bytes; - - dc->caps.subvp_fw_processing_delay_us = 15; - dc->caps.subvp_drr_max_vblank_margin_us = 40; - dc->caps.subvp_prefetch_end_to_mall_start_us = 15; - dc->caps.subvp_swath_height_margin_lines = 16; - dc->caps.subvp_pstate_allow_width_us = 20; - dc->caps.subvp_vertical_int_margin_us = 30; - dc->caps.subvp_drr_vblank_start_margin_us = 100; // 100us margin - - dc->caps.max_slave_planes = 2; - dc->caps.max_slave_yuv_planes = 2; - dc->caps.max_slave_rgb_planes = 2; - dc->caps.post_blend_color_processing = true; - dc->caps.force_dp_tps4_for_cp2520 = true; - if (dc->config.forceHBR2CP2520) - dc->caps.force_dp_tps4_for_cp2520 = false; - dc->caps.dp_hpo = true; - dc->caps.dp_hdmi21_pcon_support = true; - dc->caps.edp_dsc_support = true; - dc->caps.extended_aux_timeout_support = true; - dc->caps.dmcub_support = true; - dc->caps.seamless_odm = true; - dc->caps.max_v_total = (1 << 15) - 1; - - /* Color pipeline capabilities */ - dc->caps.color.dpp.dcn_arch = 1; - dc->caps.color.dpp.input_lut_shared = 0; - dc->caps.color.dpp.icsc = 1; - dc->caps.color.dpp.dgam_ram = 0; // must use gamma_corr - dc->caps.color.dpp.dgam_rom_caps.srgb = 1; - dc->caps.color.dpp.dgam_rom_caps.bt2020 = 1; - dc->caps.color.dpp.dgam_rom_caps.gamma2_2 = 1; - dc->caps.color.dpp.dgam_rom_caps.pq = 1; - dc->caps.color.dpp.dgam_rom_caps.hlg = 1; - dc->caps.color.dpp.post_csc = 1; - dc->caps.color.dpp.gamma_corr = 1; - dc->caps.color.dpp.dgam_rom_for_yuv = 0; - - dc->caps.color.dpp.hw_3d_lut = 1; - dc->caps.color.dpp.ogam_ram = 0; // no OGAM in DPP since DCN1 - // no OGAM ROM on DCN2 and later ASICs - dc->caps.color.dpp.ogam_rom_caps.srgb = 0; - dc->caps.color.dpp.ogam_rom_caps.bt2020 = 0; - dc->caps.color.dpp.ogam_rom_caps.gamma2_2 = 0; - dc->caps.color.dpp.ogam_rom_caps.pq = 0; - dc->caps.color.dpp.ogam_rom_caps.hlg = 0; - dc->caps.color.dpp.ocsc = 0; - - dc->caps.color.mpc.gamut_remap = 1; - dc->caps.color.mpc.num_3dluts = pool->base.res_cap->num_mpc_3dlut; //4, configurable to be before or after BLND in MPCC - dc->caps.color.mpc.ogam_ram = 1; - dc->caps.color.mpc.ogam_rom_caps.srgb = 0; - dc->caps.color.mpc.ogam_rom_caps.bt2020 = 0; - dc->caps.color.mpc.ogam_rom_caps.gamma2_2 = 0; - dc->caps.color.mpc.ogam_rom_caps.pq = 0; - dc->caps.color.mpc.ogam_rom_caps.hlg = 0; - dc->caps.color.mpc.ocsc = 1; - - /* Use pipe context based otg sync logic */ - dc->config.use_pipe_ctx_sync_logic = true; - - dc->config.dc_mode_clk_limit_support = true; - /* read VBIOS LTTPR caps */ - { - if (ctx->dc_bios->funcs->get_lttpr_caps) { - enum bp_result bp_query_result; - uint8_t is_vbios_lttpr_enable = 0; - - bp_query_result = ctx->dc_bios->funcs->get_lttpr_caps(ctx->dc_bios, &is_vbios_lttpr_enable); - dc->caps.vbios_lttpr_enable = (bp_query_result == BP_RESULT_OK) && !!is_vbios_lttpr_enable; - } - - /* interop bit is implicit */ - { - dc->caps.vbios_lttpr_aware = true; - } - } - - if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) - dc->debug = debug_defaults_drv; - - // Init the vm_helper - if (dc->vm_helper) - vm_helper_init(dc->vm_helper, 16); - - /************************************************* - * Create resources * - *************************************************/ - - /* Clock Sources for Pixel Clock*/ - pool->base.clock_sources[DCN32_CLK_SRC_PLL0] = - dcn32_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL0, - &clk_src_regs[0], false); - pool->base.clock_sources[DCN32_CLK_SRC_PLL1] = - dcn32_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL1, - &clk_src_regs[1], false); - pool->base.clock_sources[DCN32_CLK_SRC_PLL2] = - dcn32_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL2, - &clk_src_regs[2], false); - pool->base.clock_sources[DCN32_CLK_SRC_PLL3] = - dcn32_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL3, - &clk_src_regs[3], false); - pool->base.clock_sources[DCN32_CLK_SRC_PLL4] = - dcn32_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL4, - &clk_src_regs[4], false); - - pool->base.clk_src_count = DCN32_CLK_SRC_TOTAL; - - /* todo: not reuse phy_pll registers */ - pool->base.dp_clock_source = - dcn32_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_ID_DP_DTO, - &clk_src_regs[0], true); - - for (i = 0; i < pool->base.clk_src_count; i++) { - if (pool->base.clock_sources[i] == NULL) { - dm_error("DC: failed to create clock sources!\n"); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - } - - /* DCCG */ - pool->base.dccg = dccg32_create(ctx, &dccg_regs, &dccg_shift, &dccg_mask); - if (pool->base.dccg == NULL) { - dm_error("DC: failed to create dccg!\n"); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - - /* DML */ - dml_init_instance(&dc->dml, &dcn3_2_soc, &dcn3_2_ip, DML_PROJECT_DCN32); - - /* IRQ Service */ - init_data.ctx = dc->ctx; - pool->base.irqs = dal_irq_service_dcn32_create(&init_data); - if (!pool->base.irqs) - goto create_fail; - - /* HUBBUB */ - pool->base.hubbub = dcn32_hubbub_create(ctx); - if (pool->base.hubbub == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create hubbub!\n"); - goto create_fail; - } - - /* HUBPs, DPPs, OPPs, TGs, ABMs */ - for (i = 0, j = 0; i < pool->base.res_cap->num_timing_generator; i++) { - - /* if pipe is disabled, skip instance of HW pipe, - * i.e, skip ASIC register instance - */ - if (pipe_fuses & 1 << i) - continue; - - /* HUBPs */ - pool->base.hubps[j] = dcn32_hubp_create(ctx, i); - if (pool->base.hubps[j] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create hubps!\n"); - goto create_fail; - } - - /* DPPs */ - pool->base.dpps[j] = dcn32_dpp_create(ctx, i); - if (pool->base.dpps[j] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create dpps!\n"); - goto create_fail; - } - - /* OPPs */ - pool->base.opps[j] = dcn32_opp_create(ctx, i); - if (pool->base.opps[j] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create output pixel processor!\n"); - goto create_fail; - } - - /* TGs */ - pool->base.timing_generators[j] = dcn32_timing_generator_create( - ctx, i); - if (pool->base.timing_generators[j] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create tg!\n"); - goto create_fail; - } - - /* ABMs */ - pool->base.multiple_abms[j] = dmub_abm_create(ctx, - &abm_regs[i], - &abm_shift, - &abm_mask); - if (pool->base.multiple_abms[j] == NULL) { - dm_error("DC: failed to create abm for pipe %d!\n", i); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - - /* index for resource pool arrays for next valid pipe */ - j++; - } - - /* PSR */ - pool->base.psr = dmub_psr_create(ctx); - if (pool->base.psr == NULL) { - dm_error("DC: failed to create psr obj!\n"); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - - /* MPCCs */ - pool->base.mpc = dcn32_mpc_create(ctx, pool->base.res_cap->num_timing_generator, pool->base.res_cap->num_mpc_3dlut); - if (pool->base.mpc == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create mpc!\n"); - goto create_fail; - } - - /* DSCs */ - for (i = 0; i < pool->base.res_cap->num_dsc; i++) { - pool->base.dscs[i] = dcn32_dsc_create(ctx, i); - if (pool->base.dscs[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create display stream compressor %d!\n", i); - goto create_fail; - } - } - - /* DWB */ - if (!dcn32_dwbc_create(ctx, &pool->base)) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create dwbc!\n"); - goto create_fail; - } - - /* MMHUBBUB */ - if (!dcn32_mmhubbub_create(ctx, &pool->base)) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create mcif_wb!\n"); - goto create_fail; - } - - /* AUX and I2C */ - for (i = 0; i < pool->base.res_cap->num_ddc; i++) { - pool->base.engines[i] = dcn32_aux_engine_create(ctx, i); - if (pool->base.engines[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC:failed to create aux engine!!\n"); - goto create_fail; - } - pool->base.hw_i2cs[i] = dcn32_i2c_hw_create(ctx, i); - if (pool->base.hw_i2cs[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC:failed to create hw i2c!!\n"); - goto create_fail; - } - pool->base.sw_i2cs[i] = NULL; - } - - /* Audio, HWSeq, Stream Encoders including HPO and virtual, MPC 3D LUTs */ - if (!resource_construct(num_virtual_links, dc, &pool->base, - &res_create_funcs)) - goto create_fail; - - /* HW Sequencer init functions and Plane caps */ - dcn32_hw_sequencer_init_functions(dc); - - dc->caps.max_planes = pool->base.pipe_count; - - for (i = 0; i < dc->caps.max_planes; ++i) - dc->caps.planes[i] = plane_cap; - - dc->cap_funcs = cap_funcs; - - if (dc->ctx->dc_bios->fw_info.oem_i2c_present) { - ddc_init_data.ctx = dc->ctx; - ddc_init_data.link = NULL; - ddc_init_data.id.id = dc->ctx->dc_bios->fw_info.oem_i2c_obj_id; - ddc_init_data.id.enum_id = 0; - ddc_init_data.id.type = OBJECT_TYPE_GENERIC; - pool->base.oem_device = dc->link_srv->create_ddc_service(&ddc_init_data); - } else { - pool->base.oem_device = NULL; - } - - dc->dml2_options.dcn_pipe_count = pool->base.pipe_count; - dc->dml2_options.use_native_pstate_optimization = false; - dc->dml2_options.use_native_soc_bb_construction = true; - dc->dml2_options.minimize_dispclk_using_odm = true; - - dc->dml2_options.callbacks.dc = dc; - dc->dml2_options.callbacks.build_scaling_params = &resource_build_scaling_params; - dc->dml2_options.callbacks.can_support_mclk_switch_using_fw_based_vblank_stretch = &dcn30_can_support_mclk_switch_using_fw_based_vblank_stretch; - dc->dml2_options.callbacks.acquire_secondary_pipe_for_mpc_odm = &dc_resource_acquire_secondary_pipe_for_mpc_odm_legacy; - dc->dml2_options.callbacks.update_pipes_for_stream_with_slice_count = &resource_update_pipes_for_stream_with_slice_count; - dc->dml2_options.callbacks.update_pipes_for_plane_with_slice_count = &resource_update_pipes_for_plane_with_slice_count; - dc->dml2_options.callbacks.get_mpc_slice_index = &resource_get_mpc_slice_index; - dc->dml2_options.callbacks.get_odm_slice_index = &resource_get_odm_slice_index; - dc->dml2_options.callbacks.get_opp_head = &resource_get_opp_head; - - dc->dml2_options.svp_pstate.callbacks.dc = dc; - dc->dml2_options.svp_pstate.callbacks.add_plane_to_context = &dc_add_plane_to_context; - dc->dml2_options.svp_pstate.callbacks.add_stream_to_ctx = &dc_add_stream_to_ctx; - dc->dml2_options.svp_pstate.callbacks.build_scaling_params = &resource_build_scaling_params; - dc->dml2_options.svp_pstate.callbacks.create_plane = &dc_create_plane_state; - dc->dml2_options.svp_pstate.callbacks.remove_plane_from_context = &dc_remove_plane_from_context; - dc->dml2_options.svp_pstate.callbacks.remove_stream_from_ctx = &dc_remove_stream_from_ctx; - dc->dml2_options.svp_pstate.callbacks.create_stream_for_sink = &dc_create_stream_for_sink; - dc->dml2_options.svp_pstate.callbacks.plane_state_release = &dc_plane_state_release; - dc->dml2_options.svp_pstate.callbacks.stream_release = &dc_stream_release; - dc->dml2_options.svp_pstate.callbacks.release_dsc = &dcn20_release_dsc; - - dc->dml2_options.svp_pstate.subvp_fw_processing_delay_us = dc->caps.subvp_fw_processing_delay_us; - dc->dml2_options.svp_pstate.subvp_prefetch_end_to_mall_start_us = dc->caps.subvp_prefetch_end_to_mall_start_us; - dc->dml2_options.svp_pstate.subvp_pstate_allow_width_us = dc->caps.subvp_pstate_allow_width_us; - dc->dml2_options.svp_pstate.subvp_swath_height_margin_lines = dc->caps.subvp_swath_height_margin_lines; - - dc->dml2_options.svp_pstate.force_disable_subvp = dc->debug.force_disable_subvp; - dc->dml2_options.svp_pstate.force_enable_subvp = dc->debug.force_subvp_mclk_switch; - - dc->dml2_options.mall_cfg.cache_line_size_bytes = dc->caps.cache_line_size; - dc->dml2_options.mall_cfg.cache_num_ways = dc->caps.cache_num_ways; - dc->dml2_options.mall_cfg.max_cab_allocation_bytes = dc->caps.max_cab_allocation_bytes; - dc->dml2_options.mall_cfg.mblk_height_4bpe_pixels = DCN3_2_MBLK_HEIGHT_4BPE; - dc->dml2_options.mall_cfg.mblk_height_8bpe_pixels = DCN3_2_MBLK_HEIGHT_8BPE; - dc->dml2_options.mall_cfg.mblk_size_bytes = DCN3_2_MALL_MBLK_SIZE_BYTES; - dc->dml2_options.mall_cfg.mblk_width_pixels = DCN3_2_MBLK_WIDTH; - - dc->dml2_options.max_segments_per_hubp = 18; - dc->dml2_options.det_segment_size = DCN3_2_DET_SEG_SIZE; - dc->dml2_options.map_dc_pipes_with_callbacks = true; - - if (ASICREV_IS_GC_11_0_3(dc->ctx->asic_id.hw_internal_rev) && (dc->config.sdpif_request_limit_words_per_umc == 0)) - dc->config.sdpif_request_limit_words_per_umc = 16; - - DC_FP_END(); - - return true; - -create_fail: - - DC_FP_END(); - - dcn32_resource_destruct(pool); - - return false; -} - -struct resource_pool *dcn32_create_resource_pool( - const struct dc_init_data *init_data, - struct dc *dc) -{ - struct dcn32_resource_pool *pool = - kzalloc(sizeof(struct dcn32_resource_pool), GFP_KERNEL); - - if (!pool) - return NULL; - - if (dcn32_resource_construct(init_data->num_virtual_links, dc, pool)) - return &pool->base; - - BREAK_TO_DEBUGGER(); - kfree(pool); - return NULL; -} - -/* - * Find the most optimal free pipe from res_ctx, which could be used as a - * secondary dpp pipe for input opp head pipe. - * - * a free pipe - a pipe in input res_ctx not yet used for any streams or - * planes. - * secondary dpp pipe - a pipe gets inserted to a head OPP pipe's MPC blending - * tree. This is typical used for rendering MPO planes or additional offset - * areas in MPCC combine. - * - * Hardware Transition Minimization Algorithm for Finding a Secondary DPP Pipe - * ------------------------------------------------------------------------- - * - * PROBLEM: - * - * 1. There is a hardware limitation that a secondary DPP pipe cannot be - * transferred from one MPC blending tree to the other in a single frame. - * Otherwise it could cause glitches on the screen. - * - * For instance, we cannot transition from state 1 to state 2 in one frame. This - * is because PIPE1 is transferred from PIPE0's MPC blending tree over to - * PIPE2's MPC blending tree, which is not supported by hardware. - * To support this transition we need to first remove PIPE1 from PIPE0's MPC - * blending tree in one frame and then insert PIPE1 to PIPE2's MPC blending tree - * in the next frame. This is not optimal as it will delay the flip for two - * frames. - * - * State 1: - * PIPE0 -- secondary DPP pipe --> (PIPE1) - * PIPE2 -- secondary DPP pipe --> NONE - * - * State 2: - * PIPE0 -- secondary DPP pipe --> NONE - * PIPE2 -- secondary DPP pipe --> (PIPE1) - * - * 2. We want to in general minimize the unnecessary changes in pipe topology. - * If a pipe is already added in current blending tree and there are no changes - * to plane topology, we don't want to swap it with another free pipe - * unnecessarily in every update. Powering up and down a pipe would require a - * full update which delays the flip for 1 frame. If we use the original pipe - * we don't have to toggle its power. So we can flip faster. - */ -static int find_optimal_free_pipe_as_secondary_dpp_pipe( - const struct resource_context *cur_res_ctx, - struct resource_context *new_res_ctx, - const struct resource_pool *pool, - const struct pipe_ctx *new_opp_head) -{ - const struct pipe_ctx *cur_opp_head; - int free_pipe_idx; - - cur_opp_head = &cur_res_ctx->pipe_ctx[new_opp_head->pipe_idx]; - free_pipe_idx = resource_find_free_pipe_used_in_cur_mpc_blending_tree( - cur_res_ctx, new_res_ctx, cur_opp_head); - - /* Up until here if we have not found a free secondary pipe, we will - * need to wait for at least one frame to complete the transition - * sequence. - */ - if (free_pipe_idx == FREE_PIPE_INDEX_NOT_FOUND) - free_pipe_idx = recource_find_free_pipe_not_used_in_cur_res_ctx( - cur_res_ctx, new_res_ctx, pool); - - /* Up until here if we have not found a free secondary pipe, we will - * need to wait for at least two frames to complete the transition - * sequence. It really doesn't matter which pipe we decide take from - * current enabled pipes. It won't save our frame time when we swap only - * one pipe or more pipes. - */ - if (free_pipe_idx == FREE_PIPE_INDEX_NOT_FOUND) - free_pipe_idx = resource_find_free_pipe_used_as_cur_sec_dpp_in_mpcc_combine( - cur_res_ctx, new_res_ctx, pool); - - if (free_pipe_idx == FREE_PIPE_INDEX_NOT_FOUND) - free_pipe_idx = resource_find_any_free_pipe(new_res_ctx, pool); - - return free_pipe_idx; -} - -static struct pipe_ctx *find_idle_secondary_pipe_check_mpo( - struct resource_context *res_ctx, - const struct resource_pool *pool, - const struct pipe_ctx *primary_pipe) -{ - int i; - struct pipe_ctx *secondary_pipe = NULL; - struct pipe_ctx *next_odm_mpo_pipe = NULL; - int primary_index, preferred_pipe_idx; - struct pipe_ctx *old_primary_pipe = NULL; - - /* - * Modified from find_idle_secondary_pipe - * With windowed MPO and ODM, we want to avoid the case where we want a - * free pipe for the left side but the free pipe is being used on the - * right side. - * Add check on current_state if the primary_pipe is the left side, - * to check the right side ( primary_pipe->next_odm_pipe ) to see if - * it is using a pipe for MPO ( primary_pipe->next_odm_pipe->bottom_pipe ) - * - If so, then don't use this pipe - * EXCEPTION - 3 plane ( 2 MPO plane ) case - * - in this case, the primary pipe has already gotten a free pipe for the - * MPO window in the left - * - when it tries to get a free pipe for the MPO window on the right, - * it will see that it is already assigned to the right side - * ( primary_pipe->next_odm_pipe ). But in this case, we want this - * free pipe, since it will be for the right side. So add an - * additional condition, that skipping the free pipe on the right only - * applies if the primary pipe has no bottom pipe currently assigned - */ - if (primary_pipe) { - primary_index = primary_pipe->pipe_idx; - old_primary_pipe = &primary_pipe->stream->ctx->dc->current_state->res_ctx.pipe_ctx[primary_index]; - if ((old_primary_pipe->next_odm_pipe) && (old_primary_pipe->next_odm_pipe->bottom_pipe) - && (!primary_pipe->bottom_pipe)) - next_odm_mpo_pipe = old_primary_pipe->next_odm_pipe->bottom_pipe; - - preferred_pipe_idx = (pool->pipe_count - 1) - primary_pipe->pipe_idx; - if ((res_ctx->pipe_ctx[preferred_pipe_idx].stream == NULL) && - !(next_odm_mpo_pipe && next_odm_mpo_pipe->pipe_idx == preferred_pipe_idx)) { - secondary_pipe = &res_ctx->pipe_ctx[preferred_pipe_idx]; - secondary_pipe->pipe_idx = preferred_pipe_idx; - } - } - - /* - * search backwards for the second pipe to keep pipe - * assignment more consistent - */ - if (!secondary_pipe) - for (i = pool->pipe_count - 1; i >= 0; i--) { - if ((res_ctx->pipe_ctx[i].stream == NULL) && - !(next_odm_mpo_pipe && next_odm_mpo_pipe->pipe_idx == i)) { - secondary_pipe = &res_ctx->pipe_ctx[i]; - secondary_pipe->pipe_idx = i; - break; - } - } - - return secondary_pipe; -} - -static struct pipe_ctx *dcn32_acquire_idle_pipe_for_head_pipe_in_layer( - struct dc_state *state, - const struct resource_pool *pool, - struct dc_stream_state *stream, - const struct pipe_ctx *head_pipe) -{ - struct resource_context *res_ctx = &state->res_ctx; - struct pipe_ctx *idle_pipe, *pipe; - struct resource_context *old_ctx = &stream->ctx->dc->current_state->res_ctx; - int head_index; - - if (!head_pipe) - ASSERT(0); - - /* - * Modified from dcn20_acquire_idle_pipe_for_layer - * Check if head_pipe in old_context already has bottom_pipe allocated. - * - If so, check if that pipe is available in the current context. - * -- If so, reuse pipe from old_context - */ - head_index = head_pipe->pipe_idx; - pipe = &old_ctx->pipe_ctx[head_index]; - if (pipe->bottom_pipe && res_ctx->pipe_ctx[pipe->bottom_pipe->pipe_idx].stream == NULL) { - idle_pipe = &res_ctx->pipe_ctx[pipe->bottom_pipe->pipe_idx]; - idle_pipe->pipe_idx = pipe->bottom_pipe->pipe_idx; - } else { - idle_pipe = find_idle_secondary_pipe_check_mpo(res_ctx, pool, head_pipe); - if (!idle_pipe) - return NULL; - } - - idle_pipe->stream = head_pipe->stream; - idle_pipe->stream_res.tg = head_pipe->stream_res.tg; - idle_pipe->stream_res.opp = head_pipe->stream_res.opp; - - idle_pipe->plane_res.hubp = pool->hubps[idle_pipe->pipe_idx]; - idle_pipe->plane_res.ipp = pool->ipps[idle_pipe->pipe_idx]; - idle_pipe->plane_res.dpp = pool->dpps[idle_pipe->pipe_idx]; - idle_pipe->plane_res.mpcc_inst = pool->dpps[idle_pipe->pipe_idx]->inst; - - return idle_pipe; -} - -static int find_optimal_free_pipe_as_secondary_opp_head( - const struct resource_context *cur_res_ctx, - struct resource_context *new_res_ctx, - const struct resource_pool *pool, - const struct pipe_ctx *new_otg_master) -{ - const struct pipe_ctx *cur_otg_master; - int free_pipe_idx; - - cur_otg_master = &cur_res_ctx->pipe_ctx[new_otg_master->pipe_idx]; - free_pipe_idx = resource_find_free_pipe_used_as_sec_opp_head_by_cur_otg_master( - cur_res_ctx, new_res_ctx, cur_otg_master); - - /* Up until here if we have not found a free secondary pipe, we will - * need to wait for at least one frame to complete the transition - * sequence. - */ - if (free_pipe_idx == FREE_PIPE_INDEX_NOT_FOUND) - free_pipe_idx = recource_find_free_pipe_not_used_in_cur_res_ctx( - cur_res_ctx, new_res_ctx, pool); - - if (free_pipe_idx == FREE_PIPE_INDEX_NOT_FOUND) - free_pipe_idx = resource_find_any_free_pipe(new_res_ctx, pool); - - return free_pipe_idx; -} - -struct pipe_ctx *dcn32_acquire_free_pipe_as_secondary_dpp_pipe( - const struct dc_state *cur_ctx, - struct dc_state *new_ctx, - const struct resource_pool *pool, - const struct pipe_ctx *opp_head_pipe) -{ - - int free_pipe_idx; - struct pipe_ctx *free_pipe; - - if (!opp_head_pipe->stream->ctx->dc->config.enable_windowed_mpo_odm) - return dcn32_acquire_idle_pipe_for_head_pipe_in_layer( - new_ctx, pool, opp_head_pipe->stream, opp_head_pipe); - - free_pipe_idx = find_optimal_free_pipe_as_secondary_dpp_pipe( - &cur_ctx->res_ctx, &new_ctx->res_ctx, - pool, opp_head_pipe); - if (free_pipe_idx >= 0) { - free_pipe = &new_ctx->res_ctx.pipe_ctx[free_pipe_idx]; - free_pipe->pipe_idx = free_pipe_idx; - free_pipe->stream = opp_head_pipe->stream; - free_pipe->stream_res.tg = opp_head_pipe->stream_res.tg; - free_pipe->stream_res.opp = opp_head_pipe->stream_res.opp; - - free_pipe->plane_res.hubp = pool->hubps[free_pipe->pipe_idx]; - free_pipe->plane_res.ipp = pool->ipps[free_pipe->pipe_idx]; - free_pipe->plane_res.dpp = pool->dpps[free_pipe->pipe_idx]; - free_pipe->plane_res.mpcc_inst = - pool->dpps[free_pipe->pipe_idx]->inst; - } else { - ASSERT(opp_head_pipe); - free_pipe = NULL; - } - - return free_pipe; -} - -struct pipe_ctx *dcn32_acquire_free_pipe_as_secondary_opp_head( - const struct dc_state *cur_ctx, - struct dc_state *new_ctx, - const struct resource_pool *pool, - const struct pipe_ctx *otg_master) -{ - int free_pipe_idx = find_optimal_free_pipe_as_secondary_opp_head( - &cur_ctx->res_ctx, &new_ctx->res_ctx, - pool, otg_master); - struct pipe_ctx *free_pipe; - - if (free_pipe_idx >= 0) { - free_pipe = &new_ctx->res_ctx.pipe_ctx[free_pipe_idx]; - free_pipe->pipe_idx = free_pipe_idx; - free_pipe->stream = otg_master->stream; - free_pipe->stream_res.tg = otg_master->stream_res.tg; - free_pipe->stream_res.dsc = NULL; - free_pipe->stream_res.opp = pool->opps[free_pipe_idx]; - free_pipe->plane_res.mi = pool->mis[free_pipe_idx]; - free_pipe->plane_res.hubp = pool->hubps[free_pipe_idx]; - free_pipe->plane_res.ipp = pool->ipps[free_pipe_idx]; - free_pipe->plane_res.xfm = pool->transforms[free_pipe_idx]; - free_pipe->plane_res.dpp = pool->dpps[free_pipe_idx]; - free_pipe->plane_res.mpcc_inst = pool->dpps[free_pipe_idx]->inst; - if (free_pipe->stream->timing.flags.DSC == 1) { - dcn20_acquire_dsc(free_pipe->stream->ctx->dc, - &new_ctx->res_ctx, - &free_pipe->stream_res.dsc, - free_pipe_idx); - ASSERT(free_pipe->stream_res.dsc); - if (free_pipe->stream_res.dsc == NULL) { - memset(free_pipe, 0, sizeof(*free_pipe)); - free_pipe = NULL; - } - } - } else { - ASSERT(otg_master); - free_pipe = NULL; - } - - return free_pipe; -} - -unsigned int dcn32_calc_num_avail_chans_for_mall(struct dc *dc, int num_chans) -{ - /* - * DCN32 and DCN321 SKUs may have different sizes for MALL - * but we may not be able to access all the MALL space. - * If the num_chans is power of 2, then we can access all - * of the available MALL space. Otherwise, we can only - * access: - * - * max_cab_size_in_bytes = total_cache_size_in_bytes * - * ((2^floor(log2(num_chans)))/num_chans) - * - * Calculating the MALL sizes for all available SKUs, we - * have come up with the follow simplified check. - * - we have max_chans which provides the max MALL size. - * Each chans supports 4MB of MALL so: - * - * total_cache_size_in_bytes = max_chans * 4 MB - * - * - we have avail_chans which shows the number of channels - * we can use if we can't access the entire MALL space. - * It is generally half of max_chans - * - so we use the following checks: - * - * if (num_chans == max_chans), return max_chans - * if (num_chans < max_chans), return avail_chans - * - * - exception is GC_11_0_0 where we can't access max_chans, - * so we define max_avail_chans as the maximum available - * MALL space - * - */ - int gc_11_0_0_max_chans = 48; - int gc_11_0_0_max_avail_chans = 32; - int gc_11_0_0_avail_chans = 16; - int gc_11_0_3_max_chans = 16; - int gc_11_0_3_avail_chans = 8; - int gc_11_0_2_max_chans = 8; - int gc_11_0_2_avail_chans = 4; - - if (ASICREV_IS_GC_11_0_0(dc->ctx->asic_id.hw_internal_rev)) { - return (num_chans == gc_11_0_0_max_chans) ? - gc_11_0_0_max_avail_chans : gc_11_0_0_avail_chans; - } else if (ASICREV_IS_GC_11_0_2(dc->ctx->asic_id.hw_internal_rev)) { - return (num_chans == gc_11_0_2_max_chans) ? - gc_11_0_2_max_chans : gc_11_0_2_avail_chans; - } else { // if (ASICREV_IS_GC_11_0_3(dc->ctx->asic_id.hw_internal_rev)) { - return (num_chans == gc_11_0_3_max_chans) ? - gc_11_0_3_max_chans : gc_11_0_3_avail_chans; - } -} diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h deleted file mode 100644 index b931008114c9..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h +++ /dev/null @@ -1,1263 +0,0 @@ -/* - * Copyright 2020 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef _DCN32_RESOURCE_H_ -#define _DCN32_RESOURCE_H_ - -#include "core_types.h" - -#define DCN3_2_DEFAULT_DET_SIZE 256 -#define DCN3_2_MAX_DET_SIZE 1152 -#define DCN3_2_MIN_DET_SIZE 128 -#define DCN3_2_MIN_COMPBUF_SIZE_KB 128 -#define DCN3_2_DET_SEG_SIZE 64 -#define DCN3_2_MALL_MBLK_SIZE_BYTES 65536 // 64 * 1024 -#define DCN3_2_MBLK_WIDTH 128 -#define DCN3_2_MBLK_HEIGHT_4BPE 128 -#define DCN3_2_MBLK_HEIGHT_8BPE 64 -#define DCN3_2_DCFCLK_DS_INIT_KHZ 10000 // Choose 10Mhz for init DCFCLK DS freq -#define SUBVP_HIGH_REFRESH_LIST_LEN 4 -#define DCN3_2_MAX_SUBVP_PIXEL_RATE_MHZ 1800 -#define DCN3_2_VMIN_DISPCLK_HZ 717000000 - -#define TO_DCN32_RES_POOL(pool)\ - container_of(pool, struct dcn32_resource_pool, base) - -extern struct _vcs_dpi_ip_params_st dcn3_2_ip; -extern struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc; - -struct subvp_high_refresh_list { - int min_refresh; - int max_refresh; - struct resolution { - int width; - int height; - } res[SUBVP_HIGH_REFRESH_LIST_LEN]; -}; - -struct dcn32_resource_pool { - struct resource_pool base; -}; - -struct resource_pool *dcn32_create_resource_pool( - const struct dc_init_data *init_data, - struct dc *dc); - -struct panel_cntl *dcn32_panel_cntl_create( - const struct panel_cntl_init_data *init_data); - -bool dcn32_acquire_post_bldn_3dlut( - struct resource_context *res_ctx, - const struct resource_pool *pool, - int mpcc_id, - struct dc_3dlut **lut, - struct dc_transfer_func **shaper); - -bool dcn32_release_post_bldn_3dlut( - struct resource_context *res_ctx, - const struct resource_pool *pool, - struct dc_3dlut **lut, - struct dc_transfer_func **shaper); - -bool dcn32_remove_phantom_pipes(struct dc *dc, - struct dc_state *context, bool fast_update); - -void dcn32_retain_phantom_pipes(struct dc *dc, - struct dc_state *context); - -void dcn32_add_phantom_pipes(struct dc *dc, - struct dc_state *context, - display_e2e_pipe_params_st *pipes, - unsigned int pipe_cnt, - unsigned int index); - -bool dcn32_validate_bandwidth(struct dc *dc, - struct dc_state *context, - bool fast_validate); - -int dcn32_populate_dml_pipes_from_context( - struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes, - bool fast_validate); - -void dcn32_calculate_wm_and_dlg( - struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int pipe_cnt, - int vlevel); - -uint32_t dcn32_helper_mall_bytes_to_ways( - struct dc *dc, - uint32_t total_size_in_mall_bytes); - -uint32_t dcn32_helper_calculate_mall_bytes_for_cursor( - struct dc *dc, - struct pipe_ctx *pipe_ctx, - bool ignore_cursor_buf); - -uint32_t dcn32_helper_calculate_num_ways_for_subvp( - struct dc *dc, - struct dc_state *context); - -void dcn32_merge_pipes_for_subvp(struct dc *dc, - struct dc_state *context); - -bool dcn32_all_pipes_have_stream_and_plane(struct dc *dc, - struct dc_state *context); - -bool dcn32_subvp_in_use(struct dc *dc, - struct dc_state *context); - -bool dcn32_mpo_in_use(struct dc_state *context); - -bool dcn32_any_surfaces_rotated(struct dc *dc, struct dc_state *context); -bool dcn32_is_center_timing(struct pipe_ctx *pipe); -bool dcn32_is_psr_capable(struct pipe_ctx *pipe); - -struct pipe_ctx *dcn32_acquire_free_pipe_as_secondary_dpp_pipe( - const struct dc_state *cur_ctx, - struct dc_state *new_ctx, - const struct resource_pool *pool, - const struct pipe_ctx *opp_head_pipe); - -struct pipe_ctx *dcn32_acquire_free_pipe_as_secondary_opp_head( - const struct dc_state *cur_ctx, - struct dc_state *new_ctx, - const struct resource_pool *pool, - const struct pipe_ctx *otg_master); - -void dcn32_release_pipe(struct dc_state *context, - struct pipe_ctx *pipe, - const struct resource_pool *pool); - -void dcn32_determine_det_override(struct dc *dc, - struct dc_state *context, - display_e2e_pipe_params_st *pipes); - -void dcn32_set_det_allocations(struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes); - -void dcn32_save_mall_state(struct dc *dc, - struct dc_state *context, - struct mall_temp_config *temp_config); - -void dcn32_restore_mall_state(struct dc *dc, - struct dc_state *context, - struct mall_temp_config *temp_config); - -struct dc_stream_state *dcn32_can_support_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, const struct dc_state *context); - -bool dcn32_allow_subvp_with_active_margin(struct pipe_ctx *pipe); - -bool dcn32_allow_subvp_high_refresh_rate(struct dc *dc, struct dc_state *context, struct pipe_ctx *pipe); - -unsigned int dcn32_calc_num_avail_chans_for_mall(struct dc *dc, int num_chans); - -double dcn32_determine_max_vratio_prefetch(struct dc *dc, struct dc_state *context); - -bool dcn32_check_native_scaling_for_res(struct pipe_ctx *pipe, unsigned int width, unsigned int height); - -bool dcn32_subvp_drr_admissable(struct dc *dc, struct dc_state *context); - -bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int vlevel); - -/* definitions for run time init of reg offsets */ - -/* CLK SRC */ -#define CS_COMMON_REG_LIST_DCN3_0_RI(index, pllid) \ - SRI_ARR_ALPHABET(PIXCLK_RESYNC_CNTL, PHYPLL, index, pllid), \ - SRII_ARR_2(PHASE, DP_DTO, 0, index), \ - SRII_ARR_2(PHASE, DP_DTO, 1, index), \ - SRII_ARR_2(PHASE, DP_DTO, 2, index), \ - SRII_ARR_2(PHASE, DP_DTO, 3, index), \ - SRII_ARR_2(MODULO, DP_DTO, 0, index), \ - SRII_ARR_2(MODULO, DP_DTO, 1, index), \ - SRII_ARR_2(MODULO, DP_DTO, 2, index), \ - SRII_ARR_2(MODULO, DP_DTO, 3, index), \ - SRII_ARR_2(PIXEL_RATE_CNTL, OTG, 0, index), \ - SRII_ARR_2(PIXEL_RATE_CNTL, OTG, 1, index), \ - SRII_ARR_2(PIXEL_RATE_CNTL, OTG, 2, index), \ - SRII_ARR_2(PIXEL_RATE_CNTL, OTG, 3, index) - -/* ABM */ -#define ABM_DCN32_REG_LIST_RI(id) \ - SRI_ARR(DC_ABM1_HG_SAMPLE_RATE, ABM, id), \ - SRI_ARR(DC_ABM1_LS_SAMPLE_RATE, ABM, id), \ - SRI_ARR(BL1_PWM_BL_UPDATE_SAMPLE_RATE, ABM, id), \ - SRI_ARR(DC_ABM1_HG_MISC_CTRL, ABM, id), \ - SRI_ARR(DC_ABM1_IPCSC_COEFF_SEL, ABM, id), \ - SRI_ARR(BL1_PWM_CURRENT_ABM_LEVEL, ABM, id), \ - SRI_ARR(BL1_PWM_TARGET_ABM_LEVEL, ABM, id), \ - SRI_ARR(BL1_PWM_USER_LEVEL, ABM, id), \ - SRI_ARR(DC_ABM1_LS_MIN_MAX_PIXEL_VALUE_THRES, ABM, id), \ - SRI_ARR(DC_ABM1_HGLS_REG_READ_PROGRESS, ABM, id), \ - SRI_ARR(DC_ABM1_ACE_OFFSET_SLOPE_0, ABM, id), \ - SRI_ARR(DC_ABM1_ACE_THRES_12, ABM, id), NBIO_SR_ARR(BIOS_SCRATCH_2, id) - -/* Audio */ -#define AUD_COMMON_REG_LIST_RI(id) \ - SRI_ARR(AZALIA_F0_CODEC_ENDPOINT_INDEX, AZF0ENDPOINT, id), \ - SRI_ARR(AZALIA_F0_CODEC_ENDPOINT_DATA, AZF0ENDPOINT, id), \ - SR_ARR(AZALIA_F0_CODEC_FUNCTION_PARAMETER_STREAM_FORMATS, id), \ - SR_ARR(AZALIA_F0_CODEC_FUNCTION_PARAMETER_SUPPORTED_SIZE_RATES, id), \ - SR_ARR(AZALIA_F0_CODEC_FUNCTION_PARAMETER_POWER_STATES, id), \ - SR_ARR(DCCG_AUDIO_DTO_SOURCE, id), SR_ARR(DCCG_AUDIO_DTO0_MODULE, id), \ - SR_ARR(DCCG_AUDIO_DTO0_PHASE, id), SR_ARR(DCCG_AUDIO_DTO1_MODULE, id), \ - SR_ARR(DCCG_AUDIO_DTO1_PHASE, id) \ - -/* VPG */ - -#define VPG_DCN3_REG_LIST_RI(id) \ - SRI_ARR(VPG_GENERIC_STATUS, VPG, id), \ - SRI_ARR(VPG_GENERIC_PACKET_ACCESS_CTRL, VPG, id), \ - SRI_ARR(VPG_GENERIC_PACKET_DATA, VPG, id), \ - SRI_ARR(VPG_GSP_FRAME_UPDATE_CTRL, VPG, id), \ - SRI_ARR(VPG_GSP_IMMEDIATE_UPDATE_CTRL, VPG, id) - -/* AFMT */ -#define AFMT_DCN3_REG_LIST_RI(id) \ - SRI_ARR(AFMT_INFOFRAME_CONTROL0, AFMT, id), \ - SRI_ARR(AFMT_VBI_PACKET_CONTROL, AFMT, id), \ - SRI_ARR(AFMT_AUDIO_PACKET_CONTROL, AFMT, id), \ - SRI_ARR(AFMT_AUDIO_PACKET_CONTROL2, AFMT, id), \ - SRI_ARR(AFMT_AUDIO_SRC_CONTROL, AFMT, id), \ - SRI_ARR(AFMT_60958_0, AFMT, id), SRI_ARR(AFMT_60958_1, AFMT, id), \ - SRI_ARR(AFMT_60958_2, AFMT, id), SRI_ARR(AFMT_MEM_PWR, AFMT, id) - -/* APG */ -#define APG_DCN31_REG_LIST_RI(id) \ - SRI_ARR(APG_CONTROL, APG, id), SRI_ARR(APG_CONTROL2, APG, id), \ - SRI_ARR(APG_MEM_PWR, APG, id), SRI_ARR(APG_DBG_GEN_CONTROL, APG, id) - -/* Stream encoder */ -#define SE_DCN32_REG_LIST_RI(id) \ - SRI_ARR(AFMT_CNTL, DIG, id), SRI_ARR(DIG_FE_CNTL, DIG, id), \ - SRI_ARR(HDMI_CONTROL, DIG, id), SRI_ARR(HDMI_DB_CONTROL, DIG, id), \ - SRI_ARR(HDMI_GC, DIG, id), \ - SRI_ARR(HDMI_GENERIC_PACKET_CONTROL0, DIG, id), \ - SRI_ARR(HDMI_GENERIC_PACKET_CONTROL1, DIG, id), \ - SRI_ARR(HDMI_GENERIC_PACKET_CONTROL2, DIG, id), \ - SRI_ARR(HDMI_GENERIC_PACKET_CONTROL3, DIG, id), \ - SRI_ARR(HDMI_GENERIC_PACKET_CONTROL4, DIG, id), \ - SRI_ARR(HDMI_GENERIC_PACKET_CONTROL5, DIG, id), \ - SRI_ARR(HDMI_GENERIC_PACKET_CONTROL6, DIG, id), \ - SRI_ARR(HDMI_GENERIC_PACKET_CONTROL7, DIG, id), \ - SRI_ARR(HDMI_GENERIC_PACKET_CONTROL8, DIG, id), \ - SRI_ARR(HDMI_GENERIC_PACKET_CONTROL9, DIG, id), \ - SRI_ARR(HDMI_GENERIC_PACKET_CONTROL10, DIG, id), \ - SRI_ARR(HDMI_INFOFRAME_CONTROL0, DIG, id), \ - SRI_ARR(HDMI_INFOFRAME_CONTROL1, DIG, id), \ - SRI_ARR(HDMI_VBI_PACKET_CONTROL, DIG, id), \ - SRI_ARR(HDMI_AUDIO_PACKET_CONTROL, DIG, id), \ - SRI_ARR(HDMI_ACR_PACKET_CONTROL, DIG, id), \ - SRI_ARR(HDMI_ACR_32_0, DIG, id), SRI_ARR(HDMI_ACR_32_1, DIG, id), \ - SRI_ARR(HDMI_ACR_44_0, DIG, id), SRI_ARR(HDMI_ACR_44_1, DIG, id), \ - SRI_ARR(HDMI_ACR_48_0, DIG, id), SRI_ARR(HDMI_ACR_48_1, DIG, id), \ - SRI_ARR(DP_DB_CNTL, DP, id), SRI_ARR(DP_MSA_MISC, DP, id), \ - SRI_ARR(DP_MSA_VBID_MISC, DP, id), SRI_ARR(DP_MSA_COLORIMETRY, DP, id), \ - SRI_ARR(DP_MSA_TIMING_PARAM1, DP, id), \ - SRI_ARR(DP_MSA_TIMING_PARAM2, DP, id), \ - SRI_ARR(DP_MSA_TIMING_PARAM3, DP, id), \ - SRI_ARR(DP_MSA_TIMING_PARAM4, DP, id), \ - SRI_ARR(DP_MSE_RATE_CNTL, DP, id), SRI_ARR(DP_MSE_RATE_UPDATE, DP, id), \ - SRI_ARR(DP_PIXEL_FORMAT, DP, id), SRI_ARR(DP_SEC_CNTL, DP, id), \ - SRI_ARR(DP_SEC_CNTL1, DP, id), SRI_ARR(DP_SEC_CNTL2, DP, id), \ - SRI_ARR(DP_SEC_CNTL5, DP, id), SRI_ARR(DP_SEC_CNTL6, DP, id), \ - SRI_ARR(DP_STEER_FIFO, DP, id), SRI_ARR(DP_VID_M, DP, id), \ - SRI_ARR(DP_VID_N, DP, id), SRI_ARR(DP_VID_STREAM_CNTL, DP, id), \ - SRI_ARR(DP_VID_TIMING, DP, id), SRI_ARR(DP_SEC_AUD_N, DP, id), \ - SRI_ARR(DP_SEC_TIMESTAMP, DP, id), SRI_ARR(DP_DSC_CNTL, DP, id), \ - SRI_ARR(DP_SEC_METADATA_TRANSMISSION, DP, id), \ - SRI_ARR(HDMI_METADATA_PACKET_CONTROL, DIG, id), \ - SRI_ARR(DP_SEC_FRAMING4, DP, id), SRI_ARR(DP_GSP11_CNTL, DP, id), \ - SRI_ARR(DME_CONTROL, DME, id), \ - SRI_ARR(DP_SEC_METADATA_TRANSMISSION, DP, id), \ - SRI_ARR(HDMI_METADATA_PACKET_CONTROL, DIG, id), \ - SRI_ARR(DIG_FE_CNTL, DIG, id), SRI_ARR(DIG_CLOCK_PATTERN, DIG, id), \ - SRI_ARR(DIG_FIFO_CTRL0, DIG, id) - -/* Aux regs */ - -#define AUX_REG_LIST_RI(id) \ - SRI_ARR(AUX_CONTROL, DP_AUX, id), SRI_ARR(AUX_DPHY_RX_CONTROL0, DP_AUX, id), \ - SRI_ARR(AUX_DPHY_RX_CONTROL1, DP_AUX, id) - -#define DCN2_AUX_REG_LIST_RI(id) \ - AUX_REG_LIST_RI(id), SRI_ARR(AUX_DPHY_TX_CONTROL, DP_AUX, id) - -/* HDP */ -#define HPD_REG_LIST_RI(id) SRI_ARR(DC_HPD_CONTROL, HPD, id) - -/* Link encoder */ -#define LE_DCN3_REG_LIST_RI(id) \ - SRI_ARR(DIG_BE_CNTL, DIG, id), SRI_ARR(DIG_BE_EN_CNTL, DIG, id), \ - SRI_ARR(TMDS_CTL_BITS, DIG, id), \ - SRI_ARR(TMDS_DCBALANCER_CONTROL, DIG, id), SRI_ARR(DP_CONFIG, DP, id), \ - SRI_ARR(DP_DPHY_CNTL, DP, id), SRI_ARR(DP_DPHY_PRBS_CNTL, DP, id), \ - SRI_ARR(DP_DPHY_SCRAM_CNTL, DP, id), SRI_ARR(DP_DPHY_SYM0, DP, id), \ - SRI_ARR(DP_DPHY_SYM1, DP, id), SRI_ARR(DP_DPHY_SYM2, DP, id), \ - SRI_ARR(DP_DPHY_TRAINING_PATTERN_SEL, DP, id), \ - SRI_ARR(DP_LINK_CNTL, DP, id), SRI_ARR(DP_LINK_FRAMING_CNTL, DP, id), \ - SRI_ARR(DP_MSE_SAT0, DP, id), SRI_ARR(DP_MSE_SAT1, DP, id), \ - SRI_ARR(DP_MSE_SAT2, DP, id), SRI_ARR(DP_MSE_SAT_UPDATE, DP, id), \ - SRI_ARR(DP_SEC_CNTL, DP, id), SRI_ARR(DP_VID_STREAM_CNTL, DP, id), \ - SRI_ARR(DP_DPHY_FAST_TRAINING, DP, id), SRI_ARR(DP_SEC_CNTL1, DP, id), \ - SRI_ARR(DP_DPHY_BS_SR_SWAP_CNTL, DP, id), \ - SRI_ARR(DP_DPHY_HBR2_PATTERN_CONTROL, DP, id) - -#define LE_DCN31_REG_LIST_RI(id) \ - LE_DCN3_REG_LIST_RI(id), SRI_ARR(DP_DPHY_INTERNAL_CTRL, DP, id), \ - SR_ARR(DIO_LINKA_CNTL, id), SR_ARR(DIO_LINKB_CNTL, id), \ - SR_ARR(DIO_LINKC_CNTL, id), SR_ARR(DIO_LINKD_CNTL, id), \ - SR_ARR(DIO_LINKE_CNTL, id), SR_ARR(DIO_LINKF_CNTL, id) - -#define UNIPHY_DCN2_REG_LIST_RI(id, phyid) \ - SRI_ARR_ALPHABET(CLOCK_ENABLE, SYMCLK, id, phyid), \ - SRI_ARR_ALPHABET(CHANNEL_XBAR_CNTL, UNIPHY, id, phyid) - -/* HPO DP stream encoder */ -#define DCN3_1_HPO_DP_STREAM_ENC_REG_LIST_RI(id) \ - SR_ARR(DP_STREAM_MAPPER_CONTROL0, id), \ - SR_ARR(DP_STREAM_MAPPER_CONTROL1, id), \ - SR_ARR(DP_STREAM_MAPPER_CONTROL2, id), \ - SR_ARR(DP_STREAM_MAPPER_CONTROL3, id), \ - SRI_ARR(DP_STREAM_ENC_CLOCK_CONTROL, DP_STREAM_ENC, id), \ - SRI_ARR(DP_STREAM_ENC_INPUT_MUX_CONTROL, DP_STREAM_ENC, id), \ - SRI_ARR(DP_STREAM_ENC_AUDIO_CONTROL, DP_STREAM_ENC, id), \ - SRI_ARR(DP_STREAM_ENC_CLOCK_RAMP_ADJUSTER_FIFO_STATUS_CONTROL0, DP_STREAM_ENC, id), \ - SRI_ARR(DP_SYM32_ENC_CONTROL, DP_SYM32_ENC, id), \ - SRI_ARR(DP_SYM32_ENC_VID_PIXEL_FORMAT, DP_SYM32_ENC, id), \ - SRI_ARR(DP_SYM32_ENC_VID_PIXEL_FORMAT_DOUBLE_BUFFER_CONTROL, DP_SYM32_ENC, id), \ - SRI_ARR(DP_SYM32_ENC_VID_MSA0, DP_SYM32_ENC, id), \ - SRI_ARR(DP_SYM32_ENC_VID_MSA1, DP_SYM32_ENC, id), \ - SRI_ARR(DP_SYM32_ENC_VID_MSA2, DP_SYM32_ENC, id), \ - SRI_ARR(DP_SYM32_ENC_VID_MSA3, DP_SYM32_ENC, id), \ - SRI_ARR(DP_SYM32_ENC_VID_MSA4, DP_SYM32_ENC, id), \ - SRI_ARR(DP_SYM32_ENC_VID_MSA5, DP_SYM32_ENC, id), \ - SRI_ARR(DP_SYM32_ENC_VID_MSA6, DP_SYM32_ENC, id), \ - SRI_ARR(DP_SYM32_ENC_VID_MSA7, DP_SYM32_ENC, id), \ - SRI_ARR(DP_SYM32_ENC_VID_MSA8, DP_SYM32_ENC, id), \ - SRI_ARR(DP_SYM32_ENC_VID_MSA_CONTROL, DP_SYM32_ENC, id), \ - SRI_ARR(DP_SYM32_ENC_VID_MSA_DOUBLE_BUFFER_CONTROL, DP_SYM32_ENC, id), \ - SRI_ARR(DP_SYM32_ENC_VID_FIFO_CONTROL, DP_SYM32_ENC, id), \ - SRI_ARR(DP_SYM32_ENC_VID_STREAM_CONTROL, DP_SYM32_ENC, id), \ - SRI_ARR(DP_SYM32_ENC_VID_VBID_CONTROL, DP_SYM32_ENC, id), \ - SRI_ARR(DP_SYM32_ENC_SDP_CONTROL, DP_SYM32_ENC, id), \ - SRI_ARR(DP_SYM32_ENC_SDP_GSP_CONTROL0, DP_SYM32_ENC, id), \ - SRI_ARR(DP_SYM32_ENC_SDP_GSP_CONTROL2, DP_SYM32_ENC, id), \ - SRI_ARR(DP_SYM32_ENC_SDP_GSP_CONTROL3, DP_SYM32_ENC, id), \ - SRI_ARR(DP_SYM32_ENC_SDP_GSP_CONTROL5, DP_SYM32_ENC, id), \ - SRI_ARR(DP_SYM32_ENC_SDP_GSP_CONTROL11, DP_SYM32_ENC, id), \ - SRI_ARR(DP_SYM32_ENC_SDP_METADATA_PACKET_CONTROL, DP_SYM32_ENC, id), \ - SRI_ARR(DP_SYM32_ENC_SDP_AUDIO_CONTROL0, DP_SYM32_ENC, id), \ - SRI_ARR(DP_SYM32_ENC_VID_CRC_CONTROL, DP_SYM32_ENC, id), \ - SRI_ARR(DP_SYM32_ENC_HBLANK_CONTROL, DP_SYM32_ENC, id) - -/* HPO DP link encoder regs */ -#define DCN3_1_HPO_DP_LINK_ENC_REG_LIST_RI(id) \ - SRI_ARR(DP_LINK_ENC_CLOCK_CONTROL, DP_LINK_ENC, id), \ - SRI_ARR(DP_DPHY_SYM32_CONTROL, DP_DPHY_SYM32, id), \ - SRI_ARR(DP_DPHY_SYM32_STATUS, DP_DPHY_SYM32, id), \ - SRI_ARR(DP_DPHY_SYM32_TP_CONFIG, DP_DPHY_SYM32, id), \ - SRI_ARR(DP_DPHY_SYM32_TP_PRBS_SEED0, DP_DPHY_SYM32, id), \ - SRI_ARR(DP_DPHY_SYM32_TP_PRBS_SEED1, DP_DPHY_SYM32, id), \ - SRI_ARR(DP_DPHY_SYM32_TP_PRBS_SEED2, DP_DPHY_SYM32, id), \ - SRI_ARR(DP_DPHY_SYM32_TP_PRBS_SEED3, DP_DPHY_SYM32, id), \ - SRI_ARR(DP_DPHY_SYM32_TP_SQ_PULSE, DP_DPHY_SYM32, id), \ - SRI_ARR(DP_DPHY_SYM32_TP_CUSTOM0, DP_DPHY_SYM32, id), \ - SRI_ARR(DP_DPHY_SYM32_TP_CUSTOM1, DP_DPHY_SYM32, id), \ - SRI_ARR(DP_DPHY_SYM32_TP_CUSTOM2, DP_DPHY_SYM32, id), \ - SRI_ARR(DP_DPHY_SYM32_TP_CUSTOM3, DP_DPHY_SYM32, id), \ - SRI_ARR(DP_DPHY_SYM32_TP_CUSTOM4, DP_DPHY_SYM32, id), \ - SRI_ARR(DP_DPHY_SYM32_TP_CUSTOM5, DP_DPHY_SYM32, id), \ - SRI_ARR(DP_DPHY_SYM32_TP_CUSTOM6, DP_DPHY_SYM32, id), \ - SRI_ARR(DP_DPHY_SYM32_TP_CUSTOM7, DP_DPHY_SYM32, id), \ - SRI_ARR(DP_DPHY_SYM32_TP_CUSTOM8, DP_DPHY_SYM32, id), \ - SRI_ARR(DP_DPHY_SYM32_TP_CUSTOM9, DP_DPHY_SYM32, id), \ - SRI_ARR(DP_DPHY_SYM32_TP_CUSTOM10, DP_DPHY_SYM32, id), \ - SRI_ARR(DP_DPHY_SYM32_SAT_VC0, DP_DPHY_SYM32, id), \ - SRI_ARR(DP_DPHY_SYM32_SAT_VC1, DP_DPHY_SYM32, id), \ - SRI_ARR(DP_DPHY_SYM32_SAT_VC2, DP_DPHY_SYM32, id), \ - SRI_ARR(DP_DPHY_SYM32_SAT_VC3, DP_DPHY_SYM32, id), \ - SRI_ARR(DP_DPHY_SYM32_VC_RATE_CNTL0, DP_DPHY_SYM32, id), \ - SRI_ARR(DP_DPHY_SYM32_VC_RATE_CNTL1, DP_DPHY_SYM32, id), \ - SRI_ARR(DP_DPHY_SYM32_VC_RATE_CNTL2, DP_DPHY_SYM32, id), \ - SRI_ARR(DP_DPHY_SYM32_VC_RATE_CNTL3, DP_DPHY_SYM32, id), \ - SRI_ARR(DP_DPHY_SYM32_SAT_UPDATE, DP_DPHY_SYM32, id) - -/* DPP */ -#define DPP_REG_LIST_DCN30_COMMON_RI(id) \ - SRI_ARR(CM_DEALPHA, CM, id), SRI_ARR(CM_MEM_PWR_STATUS, CM, id), \ - SRI_ARR(CM_BIAS_CR_R, CM, id), SRI_ARR(CM_BIAS_Y_G_CB_B, CM, id), \ - SRI_ARR(PRE_DEGAM, CNVC_CFG, id), SRI_ARR(CM_GAMCOR_CONTROL, CM, id), \ - SRI_ARR(CM_GAMCOR_LUT_CONTROL, CM, id), \ - SRI_ARR(CM_GAMCOR_LUT_INDEX, CM, id), \ - SRI_ARR(CM_GAMCOR_LUT_INDEX, CM, id), \ - SRI_ARR(CM_GAMCOR_LUT_DATA, CM, id), \ - SRI_ARR(CM_GAMCOR_RAMB_START_CNTL_B, CM, id), \ - SRI_ARR(CM_GAMCOR_RAMB_START_CNTL_G, CM, id), \ - SRI_ARR(CM_GAMCOR_RAMB_START_CNTL_R, CM, id), \ - SRI_ARR(CM_GAMCOR_RAMB_START_SLOPE_CNTL_B, CM, id), \ - SRI_ARR(CM_GAMCOR_RAMB_START_SLOPE_CNTL_G, CM, id), \ - SRI_ARR(CM_GAMCOR_RAMB_START_SLOPE_CNTL_R, CM, id), \ - SRI_ARR(CM_GAMCOR_RAMB_END_CNTL1_B, CM, id), \ - SRI_ARR(CM_GAMCOR_RAMB_END_CNTL2_B, CM, id), \ - SRI_ARR(CM_GAMCOR_RAMB_END_CNTL1_G, CM, id), \ - SRI_ARR(CM_GAMCOR_RAMB_END_CNTL2_G, CM, id), \ - SRI_ARR(CM_GAMCOR_RAMB_END_CNTL1_R, CM, id), \ - SRI_ARR(CM_GAMCOR_RAMB_END_CNTL2_R, CM, id), \ - SRI_ARR(CM_GAMCOR_RAMB_REGION_0_1, CM, id), \ - SRI_ARR(CM_GAMCOR_RAMB_REGION_32_33, CM, id), \ - SRI_ARR(CM_GAMCOR_RAMB_OFFSET_B, CM, id), \ - SRI_ARR(CM_GAMCOR_RAMB_OFFSET_G, CM, id), \ - SRI_ARR(CM_GAMCOR_RAMB_OFFSET_R, CM, id), \ - SRI_ARR(CM_GAMCOR_RAMB_START_BASE_CNTL_B, CM, id), \ - SRI_ARR(CM_GAMCOR_RAMB_START_BASE_CNTL_G, CM, id), \ - SRI_ARR(CM_GAMCOR_RAMB_START_BASE_CNTL_R, CM, id), \ - SRI_ARR(CM_GAMCOR_RAMA_START_CNTL_B, CM, id), \ - SRI_ARR(CM_GAMCOR_RAMA_START_CNTL_G, CM, id), \ - SRI_ARR(CM_GAMCOR_RAMA_START_CNTL_R, CM, id), \ - SRI_ARR(CM_GAMCOR_RAMA_START_SLOPE_CNTL_B, CM, id), \ - SRI_ARR(CM_GAMCOR_RAMA_START_SLOPE_CNTL_G, CM, id), \ - SRI_ARR(CM_GAMCOR_RAMA_START_SLOPE_CNTL_R, CM, id), \ - SRI_ARR(CM_GAMCOR_RAMA_END_CNTL1_B, CM, id), \ - SRI_ARR(CM_GAMCOR_RAMA_END_CNTL2_B, CM, id), \ - SRI_ARR(CM_GAMCOR_RAMA_END_CNTL1_G, CM, id), \ - SRI_ARR(CM_GAMCOR_RAMA_END_CNTL2_G, CM, id), \ - SRI_ARR(CM_GAMCOR_RAMA_END_CNTL1_R, CM, id), \ - SRI_ARR(CM_GAMCOR_RAMA_END_CNTL2_R, CM, id), \ - SRI_ARR(CM_GAMCOR_RAMA_REGION_0_1, CM, id), \ - SRI_ARR(CM_GAMCOR_RAMA_REGION_32_33, CM, id), \ - SRI_ARR(CM_GAMCOR_RAMA_OFFSET_B, CM, id), \ - SRI_ARR(CM_GAMCOR_RAMA_OFFSET_G, CM, id), \ - SRI_ARR(CM_GAMCOR_RAMA_OFFSET_R, CM, id), \ - SRI_ARR(CM_GAMCOR_RAMA_START_BASE_CNTL_B, CM, id), \ - SRI_ARR(CM_GAMCOR_RAMA_START_BASE_CNTL_G, CM, id), \ - SRI_ARR(CM_GAMCOR_RAMA_START_BASE_CNTL_R, CM, id), \ - SRI_ARR(CM_GAMUT_REMAP_CONTROL, CM, id), \ - SRI_ARR(CM_GAMUT_REMAP_C11_C12, CM, id), \ - SRI_ARR(CM_GAMUT_REMAP_C13_C14, CM, id), \ - SRI_ARR(CM_GAMUT_REMAP_C21_C22, CM, id), \ - SRI_ARR(CM_GAMUT_REMAP_C23_C24, CM, id), \ - SRI_ARR(CM_GAMUT_REMAP_C31_C32, CM, id), \ - SRI_ARR(CM_GAMUT_REMAP_C33_C34, CM, id), \ - SRI_ARR(CM_GAMUT_REMAP_B_C11_C12, CM, id), \ - SRI_ARR(CM_GAMUT_REMAP_B_C13_C14, CM, id), \ - SRI_ARR(CM_GAMUT_REMAP_B_C21_C22, CM, id), \ - SRI_ARR(CM_GAMUT_REMAP_B_C23_C24, CM, id), \ - SRI_ARR(CM_GAMUT_REMAP_B_C31_C32, CM, id), \ - SRI_ARR(CM_GAMUT_REMAP_B_C33_C34, CM, id), \ - SRI_ARR(DSCL_EXT_OVERSCAN_LEFT_RIGHT, DSCL, id), \ - SRI_ARR(DSCL_EXT_OVERSCAN_TOP_BOTTOM, DSCL, id), \ - SRI_ARR(OTG_H_BLANK, DSCL, id), SRI_ARR(OTG_V_BLANK, DSCL, id), \ - SRI_ARR(SCL_MODE, DSCL, id), SRI_ARR(LB_DATA_FORMAT, DSCL, id), \ - SRI_ARR(LB_MEMORY_CTRL, DSCL, id), SRI_ARR(DSCL_AUTOCAL, DSCL, id), \ - SRI_ARR(DSCL_CONTROL, DSCL, id), \ - SRI_ARR(SCL_TAP_CONTROL, DSCL, id), \ - SRI_ARR(SCL_COEF_RAM_TAP_SELECT, DSCL, id), \ - SRI_ARR(SCL_COEF_RAM_TAP_DATA, DSCL, id), \ - SRI_ARR(DSCL_2TAP_CONTROL, DSCL, id), SRI_ARR(MPC_SIZE, DSCL, id), \ - SRI_ARR(SCL_HORZ_FILTER_SCALE_RATIO, DSCL, id), \ - SRI_ARR(SCL_VERT_FILTER_SCALE_RATIO, DSCL, id), \ - SRI_ARR(SCL_HORZ_FILTER_SCALE_RATIO_C, DSCL, id), \ - SRI_ARR(SCL_VERT_FILTER_SCALE_RATIO_C, DSCL, id), \ - SRI_ARR(SCL_HORZ_FILTER_INIT, DSCL, id), \ - SRI_ARR(SCL_HORZ_FILTER_INIT_C, DSCL, id), \ - SRI_ARR(SCL_VERT_FILTER_INIT, DSCL, id), \ - SRI_ARR(SCL_VERT_FILTER_INIT_C, DSCL, id), \ - SRI_ARR(RECOUT_START, DSCL, id), SRI_ARR(RECOUT_SIZE, DSCL, id), \ - SRI_ARR(PRE_DEALPHA, CNVC_CFG, id), SRI_ARR(PRE_REALPHA, CNVC_CFG, id), \ - SRI_ARR(PRE_CSC_MODE, CNVC_CFG, id), \ - SRI_ARR(PRE_CSC_C11_C12, CNVC_CFG, id), \ - SRI_ARR(PRE_CSC_C33_C34, CNVC_CFG, id), \ - SRI_ARR(PRE_CSC_B_C11_C12, CNVC_CFG, id), \ - SRI_ARR(PRE_CSC_B_C33_C34, CNVC_CFG, id), \ - SRI_ARR(CM_POST_CSC_CONTROL, CM, id), \ - SRI_ARR(CM_POST_CSC_C11_C12, CM, id), \ - SRI_ARR(CM_POST_CSC_C33_C34, CM, id), \ - SRI_ARR(CM_POST_CSC_B_C11_C12, CM, id), \ - SRI_ARR(CM_POST_CSC_B_C33_C34, CM, id), \ - SRI_ARR(CM_MEM_PWR_CTRL, CM, id), SRI_ARR(CM_CONTROL, CM, id), \ - SRI_ARR(FORMAT_CONTROL, CNVC_CFG, id), \ - SRI_ARR(CNVC_SURFACE_PIXEL_FORMAT, CNVC_CFG, id), \ - SRI_ARR(CURSOR0_CONTROL, CNVC_CUR, id), \ - SRI_ARR(CURSOR0_COLOR0, CNVC_CUR, id), \ - SRI_ARR(CURSOR0_COLOR1, CNVC_CUR, id), \ - SRI_ARR(CURSOR0_FP_SCALE_BIAS, CNVC_CUR, id), \ - SRI_ARR(DPP_CONTROL, DPP_TOP, id), SRI_ARR(CM_HDR_MULT_COEF, CM, id), \ - SRI_ARR(CURSOR_CONTROL, CURSOR0_, id), \ - SRI_ARR(ALPHA_2BIT_LUT, CNVC_CFG, id), \ - SRI_ARR(FCNV_FP_BIAS_R, CNVC_CFG, id), \ - SRI_ARR(FCNV_FP_BIAS_G, CNVC_CFG, id), \ - SRI_ARR(FCNV_FP_BIAS_B, CNVC_CFG, id), \ - SRI_ARR(FCNV_FP_SCALE_R, CNVC_CFG, id), \ - SRI_ARR(FCNV_FP_SCALE_G, CNVC_CFG, id), \ - SRI_ARR(FCNV_FP_SCALE_B, CNVC_CFG, id), \ - SRI_ARR(COLOR_KEYER_CONTROL, CNVC_CFG, id), \ - SRI_ARR(COLOR_KEYER_ALPHA, CNVC_CFG, id), \ - SRI_ARR(COLOR_KEYER_RED, CNVC_CFG, id), \ - SRI_ARR(COLOR_KEYER_GREEN, CNVC_CFG, id), \ - SRI_ARR(COLOR_KEYER_BLUE, CNVC_CFG, id), \ - SRI_ARR(CURSOR_CONTROL, CURSOR0_, id), \ - SRI_ARR(OBUF_MEM_PWR_CTRL, DSCL, id), \ - SRI_ARR(DSCL_MEM_PWR_STATUS, DSCL, id), \ - SRI_ARR(DSCL_MEM_PWR_CTRL, DSCL, id) - -/* OPP */ -#define OPP_REG_LIST_DCN_RI(id) \ - SRI_ARR(FMT_BIT_DEPTH_CONTROL, FMT, id), SRI_ARR(FMT_CONTROL, FMT, id), \ - SRI_ARR(FMT_DITHER_RAND_R_SEED, FMT, id), \ - SRI_ARR(FMT_DITHER_RAND_G_SEED, FMT, id), \ - SRI_ARR(FMT_DITHER_RAND_B_SEED, FMT, id), \ - SRI_ARR(FMT_CLAMP_CNTL, FMT, id), \ - SRI_ARR(FMT_DYNAMIC_EXP_CNTL, FMT, id), \ - SRI_ARR(FMT_MAP420_MEMORY_CONTROL, FMT, id), \ - SRI_ARR(OPPBUF_CONTROL, OPPBUF, id), \ - SRI_ARR(OPPBUF_3D_PARAMETERS_0, OPPBUF, id), \ - SRI_ARR(OPPBUF_3D_PARAMETERS_1, OPPBUF, id), \ - SRI_ARR(OPP_PIPE_CONTROL, OPP_PIPE, id) \ - -#define OPP_REG_LIST_DCN10_RI(id) OPP_REG_LIST_DCN_RI(id) - -#define OPP_DPG_REG_LIST_RI(id) \ - SRI_ARR(DPG_CONTROL, DPG, id), SRI_ARR(DPG_DIMENSIONS, DPG, id), \ - SRI_ARR(DPG_OFFSET_SEGMENT, DPG, id), SRI_ARR(DPG_COLOUR_B_CB, DPG, id), \ - SRI_ARR(DPG_COLOUR_G_Y, DPG, id), SRI_ARR(DPG_COLOUR_R_CR, DPG, id), \ - SRI_ARR(DPG_RAMP_CONTROL, DPG, id), SRI_ARR(DPG_STATUS, DPG, id) - -#define OPP_REG_LIST_DCN30_RI(id) \ - OPP_REG_LIST_DCN10_RI(id), OPP_DPG_REG_LIST_RI(id), \ - SRI_ARR(FMT_422_CONTROL, FMT, id) - -/* Aux engine regs */ -#define AUX_COMMON_REG_LIST0_RI(id) \ - SRI_ARR(AUX_CONTROL, DP_AUX, id), SRI_ARR(AUX_ARB_CONTROL, DP_AUX, id), \ - SRI_ARR(AUX_SW_DATA, DP_AUX, id), SRI_ARR(AUX_SW_CONTROL, DP_AUX, id), \ - SRI_ARR(AUX_INTERRUPT_CONTROL, DP_AUX, id), \ - SRI_ARR(AUX_DPHY_RX_CONTROL1, DP_AUX, id), \ - SRI_ARR(AUX_SW_STATUS, DP_AUX, id) - -/* DWBC */ -#define DWBC_COMMON_REG_LIST_DCN30_RI(id) \ - SR_ARR(DWB_ENABLE_CLK_CTRL, id), SR_ARR(DWB_MEM_PWR_CTRL, id), \ - SR_ARR(FC_MODE_CTRL, id), SR_ARR(FC_FLOW_CTRL, id), \ - SR_ARR(FC_WINDOW_START, id), SR_ARR(FC_WINDOW_SIZE, id), \ - SR_ARR(FC_SOURCE_SIZE, id), SR_ARR(DWB_UPDATE_CTRL, id), \ - SR_ARR(DWB_CRC_CTRL, id), SR_ARR(DWB_CRC_MASK_R_G, id), \ - SR_ARR(DWB_CRC_MASK_B_A, id), SR_ARR(DWB_CRC_VAL_R_G, id), \ - SR_ARR(DWB_CRC_VAL_B_A, id), SR_ARR(DWB_OUT_CTRL, id), \ - SR_ARR(DWB_MMHUBBUB_BACKPRESSURE_CNT_EN, id), \ - SR_ARR(DWB_MMHUBBUB_BACKPRESSURE_CNT, id), \ - SR_ARR(DWB_HOST_READ_CONTROL, id), SR_ARR(DWB_SOFT_RESET, id), \ - SR_ARR(DWB_HDR_MULT_COEF, id), SR_ARR(DWB_GAMUT_REMAP_MODE, id), \ - SR_ARR(DWB_GAMUT_REMAP_COEF_FORMAT, id), \ - SR_ARR(DWB_GAMUT_REMAPA_C11_C12, id), \ - SR_ARR(DWB_GAMUT_REMAPA_C13_C14, id), \ - SR_ARR(DWB_GAMUT_REMAPA_C21_C22, id), \ - SR_ARR(DWB_GAMUT_REMAPA_C23_C24, id), \ - SR_ARR(DWB_GAMUT_REMAPA_C31_C32, id), \ - SR_ARR(DWB_GAMUT_REMAPA_C33_C34, id), \ - SR_ARR(DWB_GAMUT_REMAPB_C11_C12, id), \ - SR_ARR(DWB_GAMUT_REMAPB_C13_C14, id), \ - SR_ARR(DWB_GAMUT_REMAPB_C21_C22, id), \ - SR_ARR(DWB_GAMUT_REMAPB_C23_C24, id), \ - SR_ARR(DWB_GAMUT_REMAPB_C31_C32, id), \ - SR_ARR(DWB_GAMUT_REMAPB_C33_C34, id), SR_ARR(DWB_OGAM_CONTROL, id), \ - SR_ARR(DWB_OGAM_LUT_INDEX, id), SR_ARR(DWB_OGAM_LUT_DATA, id), \ - SR_ARR(DWB_OGAM_LUT_CONTROL, id), \ - SR_ARR(DWB_OGAM_RAMA_START_CNTL_B, id), \ - SR_ARR(DWB_OGAM_RAMA_START_CNTL_G, id), \ - SR_ARR(DWB_OGAM_RAMA_START_CNTL_R, id), \ - SR_ARR(DWB_OGAM_RAMA_START_BASE_CNTL_B, id), \ - SR_ARR(DWB_OGAM_RAMA_START_SLOPE_CNTL_B, id), \ - SR_ARR(DWB_OGAM_RAMA_START_BASE_CNTL_G, id), \ - SR_ARR(DWB_OGAM_RAMA_START_SLOPE_CNTL_G, id), \ - SR_ARR(DWB_OGAM_RAMA_START_BASE_CNTL_R, id), \ - SR_ARR(DWB_OGAM_RAMA_START_SLOPE_CNTL_R, id), \ - SR_ARR(DWB_OGAM_RAMA_END_CNTL1_B, id), \ - SR_ARR(DWB_OGAM_RAMA_END_CNTL2_B, id), \ - SR_ARR(DWB_OGAM_RAMA_END_CNTL1_G, id), \ - SR_ARR(DWB_OGAM_RAMA_END_CNTL2_G, id), \ - SR_ARR(DWB_OGAM_RAMA_END_CNTL1_R, id), \ - SR_ARR(DWB_OGAM_RAMA_END_CNTL2_R, id), \ - SR_ARR(DWB_OGAM_RAMA_OFFSET_B, id), SR_ARR(DWB_OGAM_RAMA_OFFSET_G, id), \ - SR_ARR(DWB_OGAM_RAMA_OFFSET_R, id), \ - SR_ARR(DWB_OGAM_RAMA_REGION_0_1, id), \ - SR_ARR(DWB_OGAM_RAMA_REGION_2_3, id), \ - SR_ARR(DWB_OGAM_RAMA_REGION_4_5, id), \ - SR_ARR(DWB_OGAM_RAMA_REGION_6_7, id), \ - SR_ARR(DWB_OGAM_RAMA_REGION_8_9, id), \ - SR_ARR(DWB_OGAM_RAMA_REGION_10_11, id), \ - SR_ARR(DWB_OGAM_RAMA_REGION_12_13, id), \ - SR_ARR(DWB_OGAM_RAMA_REGION_14_15, id), \ - SR_ARR(DWB_OGAM_RAMA_REGION_16_17, id), \ - SR_ARR(DWB_OGAM_RAMA_REGION_18_19, id), \ - SR_ARR(DWB_OGAM_RAMA_REGION_20_21, id), \ - SR_ARR(DWB_OGAM_RAMA_REGION_22_23, id), \ - SR_ARR(DWB_OGAM_RAMA_REGION_24_25, id), \ - SR_ARR(DWB_OGAM_RAMA_REGION_26_27, id), \ - SR_ARR(DWB_OGAM_RAMA_REGION_28_29, id), \ - SR_ARR(DWB_OGAM_RAMA_REGION_30_31, id), \ - SR_ARR(DWB_OGAM_RAMA_REGION_32_33, id), \ - SR_ARR(DWB_OGAM_RAMB_START_CNTL_B, id), \ - SR_ARR(DWB_OGAM_RAMB_START_CNTL_G, id), \ - SR_ARR(DWB_OGAM_RAMB_START_CNTL_R, id), \ - SR_ARR(DWB_OGAM_RAMB_START_BASE_CNTL_B, id), \ - SR_ARR(DWB_OGAM_RAMB_START_SLOPE_CNTL_B, id), \ - SR_ARR(DWB_OGAM_RAMB_START_BASE_CNTL_G, id), \ - SR_ARR(DWB_OGAM_RAMB_START_SLOPE_CNTL_G, id), \ - SR_ARR(DWB_OGAM_RAMB_START_BASE_CNTL_R, id), \ - SR_ARR(DWB_OGAM_RAMB_START_SLOPE_CNTL_R, id), \ - SR_ARR(DWB_OGAM_RAMB_END_CNTL1_B, id), \ - SR_ARR(DWB_OGAM_RAMB_END_CNTL2_B, id), \ - SR_ARR(DWB_OGAM_RAMB_END_CNTL1_G, id), \ - SR_ARR(DWB_OGAM_RAMB_END_CNTL2_G, id), \ - SR_ARR(DWB_OGAM_RAMB_END_CNTL1_R, id), \ - SR_ARR(DWB_OGAM_RAMB_END_CNTL2_R, id), \ - SR_ARR(DWB_OGAM_RAMB_OFFSET_B, id), SR_ARR(DWB_OGAM_RAMB_OFFSET_G, id), \ - SR_ARR(DWB_OGAM_RAMB_OFFSET_R, id), \ - SR_ARR(DWB_OGAM_RAMB_REGION_0_1, id), \ - SR_ARR(DWB_OGAM_RAMB_REGION_2_3, id), \ - SR_ARR(DWB_OGAM_RAMB_REGION_4_5, id), \ - SR_ARR(DWB_OGAM_RAMB_REGION_6_7, id), \ - SR_ARR(DWB_OGAM_RAMB_REGION_8_9, id), \ - SR_ARR(DWB_OGAM_RAMB_REGION_10_11, id), \ - SR_ARR(DWB_OGAM_RAMB_REGION_12_13, id), \ - SR_ARR(DWB_OGAM_RAMB_REGION_14_15, id), \ - SR_ARR(DWB_OGAM_RAMB_REGION_16_17, id), \ - SR_ARR(DWB_OGAM_RAMB_REGION_18_19, id), \ - SR_ARR(DWB_OGAM_RAMB_REGION_20_21, id), \ - SR_ARR(DWB_OGAM_RAMB_REGION_22_23, id), \ - SR_ARR(DWB_OGAM_RAMB_REGION_24_25, id), \ - SR_ARR(DWB_OGAM_RAMB_REGION_26_27, id), \ - SR_ARR(DWB_OGAM_RAMB_REGION_28_29, id), \ - SR_ARR(DWB_OGAM_RAMB_REGION_30_31, id), \ - SR_ARR(DWB_OGAM_RAMB_REGION_32_33, id) - -/* MCIF */ - -#define MCIF_WB_COMMON_REG_LIST_DCN32_RI(inst) \ - SRI2_ARR(MCIF_WB_BUFMGR_SW_CONTROL, MCIF_WB, inst), \ - SRI2_ARR(MCIF_WB_BUFMGR_STATUS, MCIF_WB, inst), \ - SRI2_ARR(MCIF_WB_BUF_PITCH, MCIF_WB, inst), \ - SRI2_ARR(MCIF_WB_BUF_1_STATUS, MCIF_WB, inst), \ - SRI2_ARR(MCIF_WB_BUF_1_STATUS2, MCIF_WB, inst), \ - SRI2_ARR(MCIF_WB_BUF_2_STATUS, MCIF_WB, inst), \ - SRI2_ARR(MCIF_WB_BUF_2_STATUS2, MCIF_WB, inst), \ - SRI2_ARR(MCIF_WB_BUF_3_STATUS, MCIF_WB, inst), \ - SRI2_ARR(MCIF_WB_BUF_3_STATUS2, MCIF_WB, inst), \ - SRI2_ARR(MCIF_WB_BUF_4_STATUS, MCIF_WB, inst), \ - SRI2_ARR(MCIF_WB_BUF_4_STATUS2, MCIF_WB, inst), \ - SRI2_ARR(MCIF_WB_ARBITRATION_CONTROL, MCIF_WB, inst), \ - SRI2_ARR(MCIF_WB_SCLK_CHANGE, MCIF_WB, inst), \ - SRI2_ARR(MCIF_WB_BUF_1_ADDR_Y, MCIF_WB, inst), \ - SRI2_ARR(MCIF_WB_BUF_1_ADDR_C, MCIF_WB, inst), \ - SRI2_ARR(MCIF_WB_BUF_2_ADDR_Y, MCIF_WB, inst), \ - SRI2_ARR(MCIF_WB_BUF_2_ADDR_C, MCIF_WB, inst), \ - SRI2_ARR(MCIF_WB_BUF_3_ADDR_Y, MCIF_WB, inst), \ - SRI2_ARR(MCIF_WB_BUF_3_ADDR_C, MCIF_WB, inst), \ - SRI2_ARR(MCIF_WB_BUF_4_ADDR_Y, MCIF_WB, inst), \ - SRI2_ARR(MCIF_WB_BUF_4_ADDR_C, MCIF_WB, inst), \ - SRI2_ARR(MCIF_WB_BUFMGR_VCE_CONTROL, MCIF_WB, inst), \ - SRI2_ARR(MCIF_WB_NB_PSTATE_LATENCY_WATERMARK, MMHUBBUB, inst), \ - SRI2_ARR(MCIF_WB_NB_PSTATE_CONTROL, MCIF_WB, inst), \ - SRI2_ARR(MCIF_WB_WATERMARK, MMHUBBUB, inst), \ - SRI2_ARR(MCIF_WB_CLOCK_GATER_CONTROL, MCIF_WB, inst), \ - SRI2_ARR(MCIF_WB_SELF_REFRESH_CONTROL, MCIF_WB, inst), \ - SRI2_ARR(MULTI_LEVEL_QOS_CTRL, MCIF_WB, inst), \ - SRI2_ARR(MCIF_WB_SECURITY_LEVEL, MCIF_WB, inst), \ - SRI2_ARR(MCIF_WB_BUF_LUMA_SIZE, MCIF_WB, inst), \ - SRI2_ARR(MCIF_WB_BUF_CHROMA_SIZE, MCIF_WB, inst), \ - SRI2_ARR(MCIF_WB_BUF_1_ADDR_Y_HIGH, MCIF_WB, inst), \ - SRI2_ARR(MCIF_WB_BUF_1_ADDR_C_HIGH, MCIF_WB, inst), \ - SRI2_ARR(MCIF_WB_BUF_2_ADDR_Y_HIGH, MCIF_WB, inst), \ - SRI2_ARR(MCIF_WB_BUF_2_ADDR_C_HIGH, MCIF_WB, inst), \ - SRI2_ARR(MCIF_WB_BUF_3_ADDR_Y_HIGH, MCIF_WB, inst), \ - SRI2_ARR(MCIF_WB_BUF_3_ADDR_C_HIGH, MCIF_WB, inst), \ - SRI2_ARR(MCIF_WB_BUF_4_ADDR_Y_HIGH, MCIF_WB, inst), \ - SRI2_ARR(MCIF_WB_BUF_4_ADDR_C_HIGH, MCIF_WB, inst), \ - SRI2_ARR(MCIF_WB_BUF_1_RESOLUTION, MCIF_WB, inst), \ - SRI2_ARR(MCIF_WB_BUF_2_RESOLUTION, MCIF_WB, inst), \ - SRI2_ARR(MCIF_WB_BUF_3_RESOLUTION, MCIF_WB, inst), \ - SRI2_ARR(MCIF_WB_BUF_4_RESOLUTION, MCIF_WB, inst), \ - SRI2_ARR(MMHUBBUB_MEM_PWR_CNTL, MMHUBBUB, inst), \ - SRI2_ARR(MMHUBBUB_WARMUP_ADDR_REGION, MMHUBBUB, inst), \ - SRI2_ARR(MMHUBBUB_WARMUP_BASE_ADDR_HIGH, MMHUBBUB, inst), \ - SRI2_ARR(MMHUBBUB_WARMUP_BASE_ADDR_LOW, MMHUBBUB, inst), \ - SRI2_ARR(MMHUBBUB_WARMUP_CONTROL_STATUS, MMHUBBUB, inst) - -/* DSC */ - -#define DSC_REG_LIST_DCN20_RI(id) \ - SRI_ARR(DSC_TOP_CONTROL, DSC_TOP, id), \ - SRI_ARR(DSC_DEBUG_CONTROL, DSC_TOP, id), \ - SRI_ARR(DSCC_CONFIG0, DSCC, id), SRI_ARR(DSCC_CONFIG1, DSCC, id), \ - SRI_ARR(DSCC_STATUS, DSCC, id), \ - SRI_ARR(DSCC_INTERRUPT_CONTROL_STATUS, DSCC, id), \ - SRI_ARR(DSCC_PPS_CONFIG0, DSCC, id), \ - SRI_ARR(DSCC_PPS_CONFIG1, DSCC, id), \ - SRI_ARR(DSCC_PPS_CONFIG2, DSCC, id), \ - SRI_ARR(DSCC_PPS_CONFIG3, DSCC, id), \ - SRI_ARR(DSCC_PPS_CONFIG4, DSCC, id), \ - SRI_ARR(DSCC_PPS_CONFIG5, DSCC, id), \ - SRI_ARR(DSCC_PPS_CONFIG6, DSCC, id), \ - SRI_ARR(DSCC_PPS_CONFIG7, DSCC, id), \ - SRI_ARR(DSCC_PPS_CONFIG8, DSCC, id), \ - SRI_ARR(DSCC_PPS_CONFIG9, DSCC, id), \ - SRI_ARR(DSCC_PPS_CONFIG10, DSCC, id), \ - SRI_ARR(DSCC_PPS_CONFIG11, DSCC, id), \ - SRI_ARR(DSCC_PPS_CONFIG12, DSCC, id), \ - SRI_ARR(DSCC_PPS_CONFIG13, DSCC, id), \ - SRI_ARR(DSCC_PPS_CONFIG14, DSCC, id), \ - SRI_ARR(DSCC_PPS_CONFIG15, DSCC, id), \ - SRI_ARR(DSCC_PPS_CONFIG16, DSCC, id), \ - SRI_ARR(DSCC_PPS_CONFIG17, DSCC, id), \ - SRI_ARR(DSCC_PPS_CONFIG18, DSCC, id), \ - SRI_ARR(DSCC_PPS_CONFIG19, DSCC, id), \ - SRI_ARR(DSCC_PPS_CONFIG20, DSCC, id), \ - SRI_ARR(DSCC_PPS_CONFIG21, DSCC, id), \ - SRI_ARR(DSCC_PPS_CONFIG22, DSCC, id), \ - SRI_ARR(DSCC_MEM_POWER_CONTROL, DSCC, id), \ - SRI_ARR(DSCC_R_Y_SQUARED_ERROR_LOWER, DSCC, id), \ - SRI_ARR(DSCC_R_Y_SQUARED_ERROR_UPPER, DSCC, id), \ - SRI_ARR(DSCC_G_CB_SQUARED_ERROR_LOWER, DSCC, id), \ - SRI_ARR(DSCC_G_CB_SQUARED_ERROR_UPPER, DSCC, id), \ - SRI_ARR(DSCC_B_CR_SQUARED_ERROR_LOWER, DSCC, id), \ - SRI_ARR(DSCC_B_CR_SQUARED_ERROR_UPPER, DSCC, id), \ - SRI_ARR(DSCC_MAX_ABS_ERROR0, DSCC, id), \ - SRI_ARR(DSCC_MAX_ABS_ERROR1, DSCC, id), \ - SRI_ARR(DSCC_RATE_BUFFER0_MAX_FULLNESS_LEVEL, DSCC, id), \ - SRI_ARR(DSCC_RATE_BUFFER1_MAX_FULLNESS_LEVEL, DSCC, id), \ - SRI_ARR(DSCC_RATE_BUFFER2_MAX_FULLNESS_LEVEL, DSCC, id), \ - SRI_ARR(DSCC_RATE_BUFFER3_MAX_FULLNESS_LEVEL, DSCC, id), \ - SRI_ARR(DSCC_RATE_CONTROL_BUFFER0_MAX_FULLNESS_LEVEL, DSCC, id), \ - SRI_ARR(DSCC_RATE_CONTROL_BUFFER1_MAX_FULLNESS_LEVEL, DSCC, id), \ - SRI_ARR(DSCC_RATE_CONTROL_BUFFER2_MAX_FULLNESS_LEVEL, DSCC, id), \ - SRI_ARR(DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL, DSCC, id), \ - SRI_ARR(DSCCIF_CONFIG0, DSCCIF, id), \ - SRI_ARR(DSCCIF_CONFIG1, DSCCIF, id), \ - SRI_ARR(DSCRM_DSC_FORWARD_CONFIG, DSCRM, id) - -/* MPC */ - -#define MPC_DWB_MUX_REG_LIST_DCN3_0_RI(inst) \ - SRII_DWB(DWB_MUX, MUX, MPC_DWB, inst) - -#define MPC_OUT_MUX_COMMON_REG_LIST_DCN1_0_RI(inst) \ - SRII(MUX, MPC_OUT, inst), VUPDATE_SRII(CUR, VUPDATE_LOCK_SET, inst) - -#define MPC_OUT_MUX_REG_LIST_DCN3_0_RI(inst) \ - MPC_OUT_MUX_COMMON_REG_LIST_DCN1_0_RI(inst), SRII(CSC_MODE, MPC_OUT, inst), \ - SRII(CSC_C11_C12_A, MPC_OUT, inst), SRII(CSC_C33_C34_A, MPC_OUT, inst), \ - SRII(CSC_C11_C12_B, MPC_OUT, inst), SRII(CSC_C33_C34_B, MPC_OUT, inst), \ - SRII(DENORM_CONTROL, MPC_OUT, inst), \ - SRII(DENORM_CLAMP_G_Y, MPC_OUT, inst), \ - SRII(DENORM_CLAMP_B_CB, MPC_OUT, inst), SR(MPC_OUT_CSC_COEF_FORMAT) - -#define MPC_COMMON_REG_LIST_DCN1_0_RI(inst) \ - SRII(MPCC_TOP_SEL, MPCC, inst), SRII(MPCC_BOT_SEL, MPCC, inst), \ - SRII(MPCC_CONTROL, MPCC, inst), SRII(MPCC_STATUS, MPCC, inst), \ - SRII(MPCC_OPP_ID, MPCC, inst), SRII(MPCC_BG_G_Y, MPCC, inst), \ - SRII(MPCC_BG_R_CR, MPCC, inst), SRII(MPCC_BG_B_CB, MPCC, inst), \ - SRII(MPCC_SM_CONTROL, MPCC, inst), \ - SRII(MPCC_UPDATE_LOCK_SEL, MPCC, inst) - -#define MPC_REG_LIST_DCN3_0_RI(inst) \ - MPC_COMMON_REG_LIST_DCN1_0_RI(inst), SRII(MPCC_TOP_GAIN, MPCC, inst), \ - SRII(MPCC_BOT_GAIN_INSIDE, MPCC, inst), \ - SRII(MPCC_BOT_GAIN_OUTSIDE, MPCC, inst), \ - SRII(MPCC_MEM_PWR_CTRL, MPCC, inst), \ - SRII(MPCC_OGAM_LUT_INDEX, MPCC_OGAM, inst), \ - SRII(MPCC_OGAM_LUT_DATA, MPCC_OGAM, inst), \ - SRII(MPCC_GAMUT_REMAP_COEF_FORMAT, MPCC_OGAM, inst), \ - SRII(MPCC_GAMUT_REMAP_MODE, MPCC_OGAM, inst), \ - SRII(MPC_GAMUT_REMAP_C11_C12_A, MPCC_OGAM, inst), \ - SRII(MPC_GAMUT_REMAP_C33_C34_A, MPCC_OGAM, inst), \ - SRII(MPC_GAMUT_REMAP_C11_C12_B, MPCC_OGAM, inst), \ - SRII(MPC_GAMUT_REMAP_C33_C34_B, MPCC_OGAM, inst), \ - SRII(MPCC_OGAM_RAMA_START_CNTL_B, MPCC_OGAM, inst), \ - SRII(MPCC_OGAM_RAMA_START_CNTL_G, MPCC_OGAM, inst), \ - SRII(MPCC_OGAM_RAMA_START_CNTL_R, MPCC_OGAM, inst), \ - SRII(MPCC_OGAM_RAMA_START_SLOPE_CNTL_B, MPCC_OGAM, inst), \ - SRII(MPCC_OGAM_RAMA_START_SLOPE_CNTL_G, MPCC_OGAM, inst), \ - SRII(MPCC_OGAM_RAMA_START_SLOPE_CNTL_R, MPCC_OGAM, inst), \ - SRII(MPCC_OGAM_RAMA_END_CNTL1_B, MPCC_OGAM, inst), \ - SRII(MPCC_OGAM_RAMA_END_CNTL2_B, MPCC_OGAM, inst), \ - SRII(MPCC_OGAM_RAMA_END_CNTL1_G, MPCC_OGAM, inst), \ - SRII(MPCC_OGAM_RAMA_END_CNTL2_G, MPCC_OGAM, inst), \ - SRII(MPCC_OGAM_RAMA_END_CNTL1_R, MPCC_OGAM, inst), \ - SRII(MPCC_OGAM_RAMA_END_CNTL2_R, MPCC_OGAM, inst), \ - SRII(MPCC_OGAM_RAMA_REGION_0_1, MPCC_OGAM, inst), \ - SRII(MPCC_OGAM_RAMA_REGION_32_33, MPCC_OGAM, inst), \ - SRII(MPCC_OGAM_RAMA_OFFSET_B, MPCC_OGAM, inst), \ - SRII(MPCC_OGAM_RAMA_OFFSET_G, MPCC_OGAM, inst), \ - SRII(MPCC_OGAM_RAMA_OFFSET_R, MPCC_OGAM, inst), \ - SRII(MPCC_OGAM_RAMA_START_BASE_CNTL_B, MPCC_OGAM, inst), \ - SRII(MPCC_OGAM_RAMA_START_BASE_CNTL_G, MPCC_OGAM, inst), \ - SRII(MPCC_OGAM_RAMA_START_BASE_CNTL_R, MPCC_OGAM, inst), \ - SRII(MPCC_OGAM_RAMB_START_CNTL_B, MPCC_OGAM, inst), \ - SRII(MPCC_OGAM_RAMB_START_CNTL_G, MPCC_OGAM, inst), \ - SRII(MPCC_OGAM_RAMB_START_CNTL_R, MPCC_OGAM, inst), \ - SRII(MPCC_OGAM_RAMB_START_SLOPE_CNTL_B, MPCC_OGAM, inst), \ - SRII(MPCC_OGAM_RAMB_START_SLOPE_CNTL_G, MPCC_OGAM, inst), \ - SRII(MPCC_OGAM_RAMB_START_SLOPE_CNTL_R, MPCC_OGAM, inst), \ - SRII(MPCC_OGAM_RAMB_END_CNTL1_B, MPCC_OGAM, inst), \ - SRII(MPCC_OGAM_RAMB_END_CNTL2_B, MPCC_OGAM, inst), \ - SRII(MPCC_OGAM_RAMB_END_CNTL1_G, MPCC_OGAM, inst), \ - SRII(MPCC_OGAM_RAMB_END_CNTL2_G, MPCC_OGAM, inst), \ - SRII(MPCC_OGAM_RAMB_END_CNTL1_R, MPCC_OGAM, inst), \ - SRII(MPCC_OGAM_RAMB_END_CNTL2_R, MPCC_OGAM, inst), \ - SRII(MPCC_OGAM_RAMB_REGION_0_1, MPCC_OGAM, inst), \ - SRII(MPCC_OGAM_RAMB_REGION_32_33, MPCC_OGAM, inst), \ - SRII(MPCC_OGAM_RAMB_OFFSET_B, MPCC_OGAM, inst), \ - SRII(MPCC_OGAM_RAMB_OFFSET_G, MPCC_OGAM, inst), \ - SRII(MPCC_OGAM_RAMB_OFFSET_R, MPCC_OGAM, inst), \ - SRII(MPCC_OGAM_RAMB_START_BASE_CNTL_B, MPCC_OGAM, inst), \ - SRII(MPCC_OGAM_RAMB_START_BASE_CNTL_G, MPCC_OGAM, inst), \ - SRII(MPCC_OGAM_RAMB_START_BASE_CNTL_R, MPCC_OGAM, inst), \ - SRII(MPCC_OGAM_CONTROL, MPCC_OGAM, inst), \ - SRII(MPCC_OGAM_LUT_CONTROL, MPCC_OGAM, inst) - -#define MPC_REG_LIST_DCN3_2_RI(inst) \ - MPC_REG_LIST_DCN3_0_RI(inst),\ - SRII(MPCC_MOVABLE_CM_LOCATION_CONTROL, MPCC, inst),\ - SRII(MPCC_MCM_SHAPER_CONTROL, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_OFFSET_R, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_OFFSET_G, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_OFFSET_B, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_SCALE_R, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_SCALE_G_B, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_LUT_INDEX, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_LUT_DATA, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_LUT_WRITE_EN_MASK, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_RAMA_START_CNTL_B, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_RAMA_START_CNTL_G, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_RAMA_START_CNTL_R, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_RAMA_END_CNTL_B, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_RAMA_END_CNTL_G, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_RAMA_END_CNTL_R, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_RAMA_REGION_0_1, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_RAMA_REGION_2_3, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_RAMA_REGION_4_5, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_RAMA_REGION_6_7, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_RAMA_REGION_8_9, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_RAMA_REGION_10_11, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_RAMA_REGION_12_13, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_RAMA_REGION_14_15, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_RAMA_REGION_16_17, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_RAMA_REGION_18_19, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_RAMA_REGION_20_21, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_RAMA_REGION_22_23, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_RAMA_REGION_24_25, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_RAMA_REGION_26_27, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_RAMA_REGION_28_29, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_RAMA_REGION_30_31, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_RAMA_REGION_32_33, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_RAMB_START_CNTL_B, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_RAMB_START_CNTL_G, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_RAMB_START_CNTL_R, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_RAMB_END_CNTL_B, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_RAMB_END_CNTL_G, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_RAMB_END_CNTL_R, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_RAMB_REGION_0_1, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_RAMB_REGION_2_3, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_RAMB_REGION_4_5, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_RAMB_REGION_6_7, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_RAMB_REGION_8_9, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_RAMB_REGION_10_11, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_RAMB_REGION_12_13, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_RAMB_REGION_14_15, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_RAMB_REGION_16_17, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_RAMB_REGION_18_19, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_RAMB_REGION_20_21, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_RAMB_REGION_22_23, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_RAMB_REGION_24_25, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_RAMB_REGION_26_27, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_RAMB_REGION_28_29, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_RAMB_REGION_30_31, MPCC_MCM, inst),\ - SRII(MPCC_MCM_SHAPER_RAMB_REGION_32_33, MPCC_MCM, inst),\ - SRII(MPCC_MCM_3DLUT_MODE, MPCC_MCM, inst), /*TODO: may need to add other 3DLUT regs*/\ - SRII(MPCC_MCM_3DLUT_INDEX, MPCC_MCM, inst),\ - SRII(MPCC_MCM_3DLUT_DATA, MPCC_MCM, inst),\ - SRII(MPCC_MCM_3DLUT_DATA_30BIT, MPCC_MCM, inst),\ - SRII(MPCC_MCM_3DLUT_READ_WRITE_CONTROL, MPCC_MCM, inst),\ - SRII(MPCC_MCM_3DLUT_OUT_NORM_FACTOR, MPCC_MCM, inst),\ - SRII(MPCC_MCM_3DLUT_OUT_OFFSET_R, MPCC_MCM, inst),\ - SRII(MPCC_MCM_3DLUT_OUT_OFFSET_G, MPCC_MCM, inst),\ - SRII(MPCC_MCM_3DLUT_OUT_OFFSET_B, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_CONTROL, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_LUT_INDEX, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_LUT_DATA, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_LUT_CONTROL, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMA_START_CNTL_B, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMA_START_CNTL_G, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMA_START_CNTL_R, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMA_START_SLOPE_CNTL_B, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMA_START_SLOPE_CNTL_G, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMA_START_SLOPE_CNTL_R, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMA_START_BASE_CNTL_B, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMA_START_BASE_CNTL_G, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMA_START_BASE_CNTL_R, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMA_END_CNTL1_B, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMA_END_CNTL2_B, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMA_END_CNTL1_G, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMA_END_CNTL2_G, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMA_END_CNTL1_R, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMA_END_CNTL2_R, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMA_OFFSET_B, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMA_OFFSET_G, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMA_OFFSET_R, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMA_REGION_0_1, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMA_REGION_2_3, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMA_REGION_4_5, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMA_REGION_6_7, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMA_REGION_8_9, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMA_REGION_10_11, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMA_REGION_12_13, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMA_REGION_14_15, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMA_REGION_16_17, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMA_REGION_18_19, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMA_REGION_20_21, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMA_REGION_22_23, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMA_REGION_24_25, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMA_REGION_26_27, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMA_REGION_28_29, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMA_REGION_30_31, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMA_REGION_32_33, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMB_START_CNTL_B, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMB_START_CNTL_G, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMB_START_CNTL_R, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMB_START_SLOPE_CNTL_B, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMB_START_SLOPE_CNTL_G, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMB_START_SLOPE_CNTL_R, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMB_START_BASE_CNTL_B, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMB_START_BASE_CNTL_G, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMB_START_BASE_CNTL_R, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMB_END_CNTL1_B, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMB_END_CNTL2_B, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMB_END_CNTL1_G, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMB_END_CNTL2_G, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMB_END_CNTL1_R, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMB_END_CNTL2_R, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMB_OFFSET_B, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMB_OFFSET_G, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMB_OFFSET_R, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMB_REGION_0_1, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMB_REGION_2_3, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMB_REGION_4_5, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMB_REGION_6_7, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMB_REGION_8_9, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMB_REGION_10_11, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMB_REGION_12_13, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMB_REGION_14_15, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMB_REGION_16_17, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMB_REGION_18_19, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMB_REGION_20_21, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMB_REGION_22_23, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMB_REGION_24_25, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMB_REGION_26_27, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMB_REGION_28_29, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMB_REGION_30_31, MPCC_MCM, inst),\ - SRII(MPCC_MCM_1DLUT_RAMB_REGION_32_33, MPCC_MCM, inst),\ - SRII(MPCC_MCM_MEM_PWR_CTRL, MPCC_MCM, inst) -/* OPTC */ - -#define OPTC_COMMON_REG_LIST_DCN3_2_RI(inst) \ - SRI_ARR(OTG_VSTARTUP_PARAM, OTG, inst), \ - SRI_ARR(OTG_VUPDATE_PARAM, OTG, inst), \ - SRI_ARR(OTG_VREADY_PARAM, OTG, inst), \ - SRI_ARR(OTG_MASTER_UPDATE_LOCK, OTG, inst), \ - SRI_ARR(OTG_GLOBAL_CONTROL0, OTG, inst), \ - SRI_ARR(OTG_GLOBAL_CONTROL1, OTG, inst), \ - SRI_ARR(OTG_GLOBAL_CONTROL2, OTG, inst), \ - SRI_ARR(OTG_GLOBAL_CONTROL4, OTG, inst), \ - SRI_ARR(OTG_DOUBLE_BUFFER_CONTROL, OTG, inst), \ - SRI_ARR(OTG_H_TOTAL, OTG, inst), \ - SRI_ARR(OTG_H_BLANK_START_END, OTG, inst), \ - SRI_ARR(OTG_H_SYNC_A, OTG, inst), SRI_ARR(OTG_H_SYNC_A_CNTL, OTG, inst), \ - SRI_ARR(OTG_H_TIMING_CNTL, OTG, inst), SRI_ARR(OTG_V_TOTAL, OTG, inst), \ - SRI_ARR(OTG_V_BLANK_START_END, OTG, inst), \ - SRI_ARR(OTG_V_SYNC_A, OTG, inst), SRI_ARR(OTG_V_SYNC_A_CNTL, OTG, inst), \ - SRI_ARR(OTG_CONTROL, OTG, inst), SRI_ARR(OTG_STEREO_CONTROL, OTG, inst), \ - SRI_ARR(OTG_3D_STRUCTURE_CONTROL, OTG, inst), \ - SRI_ARR(OTG_STEREO_STATUS, OTG, inst), \ - SRI_ARR(OTG_V_TOTAL_MAX, OTG, inst), \ - SRI_ARR(OTG_V_TOTAL_MIN, OTG, inst), \ - SRI_ARR(OTG_V_TOTAL_CONTROL, OTG, inst), \ - SRI_ARR(OTG_TRIGA_CNTL, OTG, inst), \ - SRI_ARR(OTG_FORCE_COUNT_NOW_CNTL, OTG, inst), \ - SRI_ARR(OTG_STATIC_SCREEN_CONTROL, OTG, inst), \ - SRI_ARR(OTG_STATUS_FRAME_COUNT, OTG, inst), \ - SRI_ARR(OTG_STATUS, OTG, inst), SRI_ARR(OTG_STATUS_POSITION, OTG, inst), \ - SRI_ARR(OTG_NOM_VERT_POSITION, OTG, inst), \ - SRI_ARR(OTG_M_CONST_DTO0, OTG, inst), \ - SRI_ARR(OTG_M_CONST_DTO1, OTG, inst), \ - SRI_ARR(OTG_CLOCK_CONTROL, OTG, inst), \ - SRI_ARR(OTG_VERTICAL_INTERRUPT0_CONTROL, OTG, inst), \ - SRI_ARR(OTG_VERTICAL_INTERRUPT0_POSITION, OTG, inst), \ - SRI_ARR(OTG_VERTICAL_INTERRUPT1_CONTROL, OTG, inst), \ - SRI_ARR(OTG_VERTICAL_INTERRUPT1_POSITION, OTG, inst), \ - SRI_ARR(OTG_VERTICAL_INTERRUPT2_CONTROL, OTG, inst), \ - SRI_ARR(OTG_VERTICAL_INTERRUPT2_POSITION, OTG, inst), \ - SRI_ARR(OPTC_INPUT_CLOCK_CONTROL, ODM, inst), \ - SRI_ARR(OPTC_DATA_SOURCE_SELECT, ODM, inst), \ - SRI_ARR(OPTC_INPUT_GLOBAL_CONTROL, ODM, inst), \ - SRI_ARR(CONTROL, VTG, inst), SRI_ARR(OTG_VERT_SYNC_CONTROL, OTG, inst), \ - SRI_ARR(OTG_GSL_CONTROL, OTG, inst), SRI_ARR(OTG_CRC_CNTL, OTG, inst), \ - SRI_ARR(OTG_CRC0_DATA_RG, OTG, inst), \ - SRI_ARR(OTG_CRC0_DATA_B, OTG, inst), \ - SRI_ARR(OTG_CRC0_WINDOWA_X_CONTROL, OTG, inst), \ - SRI_ARR(OTG_CRC0_WINDOWA_Y_CONTROL, OTG, inst), \ - SRI_ARR(OTG_CRC0_WINDOWB_X_CONTROL, OTG, inst), \ - SRI_ARR(OTG_CRC0_WINDOWB_Y_CONTROL, OTG, inst), \ - SR_ARR(GSL_SOURCE_SELECT, inst), \ - SRI_ARR(OTG_TRIGA_MANUAL_TRIG, OTG, inst), \ - SRI_ARR(OTG_GLOBAL_CONTROL1, OTG, inst), \ - SRI_ARR(OTG_GLOBAL_CONTROL2, OTG, inst), \ - SRI_ARR(OTG_GSL_WINDOW_X, OTG, inst), \ - SRI_ARR(OTG_GSL_WINDOW_Y, OTG, inst), \ - SRI_ARR(OTG_VUPDATE_KEEPOUT, OTG, inst), \ - SRI_ARR(OTG_DSC_START_POSITION, OTG, inst), \ - SRI_ARR(OTG_DRR_TRIGGER_WINDOW, OTG, inst), \ - SRI_ARR(OTG_DRR_V_TOTAL_CHANGE, OTG, inst), \ - SRI_ARR(OPTC_DATA_FORMAT_CONTROL, ODM, inst), \ - SRI_ARR(OPTC_BYTES_PER_PIXEL, ODM, inst), \ - SRI_ARR(OPTC_WIDTH_CONTROL, ODM, inst), \ - SRI_ARR(OPTC_MEMORY_CONFIG, ODM, inst), \ - SRI_ARR(OTG_DRR_CONTROL, OTG, inst) - -/* HUBP */ - -#define HUBP_REG_LIST_DCN_VM_RI(id) \ - SRI_ARR(NOM_PARAMETERS_0, HUBPREQ, id), \ - SRI_ARR(NOM_PARAMETERS_1, HUBPREQ, id), \ - SRI_ARR(NOM_PARAMETERS_2, HUBPREQ, id), \ - SRI_ARR(NOM_PARAMETERS_3, HUBPREQ, id), \ - SRI_ARR(DCN_VM_MX_L1_TLB_CNTL, HUBPREQ, id) -#define HUBP_REG_LIST_DCN_RI(id) \ - SRI_ARR(DCHUBP_CNTL, HUBP, id), SRI_ARR(HUBPREQ_DEBUG_DB, HUBP, id), \ - SRI_ARR(HUBPREQ_DEBUG, HUBP, id), SRI_ARR(DCSURF_ADDR_CONFIG, HUBP, id), \ - SRI_ARR(DCSURF_TILING_CONFIG, HUBP, id), \ - SRI_ARR(DCSURF_SURFACE_PITCH, HUBPREQ, id), \ - SRI_ARR(DCSURF_SURFACE_PITCH_C, HUBPREQ, id), \ - SRI_ARR(DCSURF_SURFACE_CONFIG, HUBP, id), \ - SRI_ARR(DCSURF_FLIP_CONTROL, HUBPREQ, id), \ - SRI_ARR(DCSURF_PRI_VIEWPORT_DIMENSION, HUBP, id), \ - SRI_ARR(DCSURF_PRI_VIEWPORT_START, HUBP, id), \ - SRI_ARR(DCSURF_SEC_VIEWPORT_DIMENSION, HUBP, id), \ - SRI_ARR(DCSURF_SEC_VIEWPORT_START, HUBP, id), \ - SRI_ARR(DCSURF_PRI_VIEWPORT_DIMENSION_C, HUBP, id), \ - SRI_ARR(DCSURF_PRI_VIEWPORT_START_C, HUBP, id), \ - SRI_ARR(DCSURF_SEC_VIEWPORT_DIMENSION_C, HUBP, id), \ - SRI_ARR(DCSURF_SEC_VIEWPORT_START_C, HUBP, id), \ - SRI_ARR(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH, HUBPREQ, id), \ - SRI_ARR(DCSURF_PRIMARY_SURFACE_ADDRESS, HUBPREQ, id), \ - SRI_ARR(DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH, HUBPREQ, id), \ - SRI_ARR(DCSURF_SECONDARY_SURFACE_ADDRESS, HUBPREQ, id), \ - SRI_ARR(DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH, HUBPREQ, id), \ - SRI_ARR(DCSURF_PRIMARY_META_SURFACE_ADDRESS, HUBPREQ, id), \ - SRI_ARR(DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH, HUBPREQ, id), \ - SRI_ARR(DCSURF_SECONDARY_META_SURFACE_ADDRESS, HUBPREQ, id), \ - SRI_ARR(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C, HUBPREQ, id), \ - SRI_ARR(DCSURF_PRIMARY_SURFACE_ADDRESS_C, HUBPREQ, id), \ - SRI_ARR(DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH_C, HUBPREQ, id), \ - SRI_ARR(DCSURF_SECONDARY_SURFACE_ADDRESS_C, HUBPREQ, id), \ - SRI_ARR(DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C, HUBPREQ, id), \ - SRI_ARR(DCSURF_PRIMARY_META_SURFACE_ADDRESS_C, HUBPREQ, id), \ - SRI_ARR(DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH_C, HUBPREQ, id), \ - SRI_ARR(DCSURF_SECONDARY_META_SURFACE_ADDRESS_C, HUBPREQ, id), \ - SRI_ARR(DCSURF_SURFACE_INUSE, HUBPREQ, id), \ - SRI_ARR(DCSURF_SURFACE_INUSE_HIGH, HUBPREQ, id), \ - SRI_ARR(DCSURF_SURFACE_INUSE_C, HUBPREQ, id), \ - SRI_ARR(DCSURF_SURFACE_INUSE_HIGH_C, HUBPREQ, id), \ - SRI_ARR(DCSURF_SURFACE_EARLIEST_INUSE, HUBPREQ, id), \ - SRI_ARR(DCSURF_SURFACE_EARLIEST_INUSE_HIGH, HUBPREQ, id), \ - SRI_ARR(DCSURF_SURFACE_EARLIEST_INUSE_C, HUBPREQ, id), \ - SRI_ARR(DCSURF_SURFACE_EARLIEST_INUSE_HIGH_C, HUBPREQ, id), \ - SRI_ARR(DCSURF_SURFACE_CONTROL, HUBPREQ, id), \ - SRI_ARR(DCSURF_SURFACE_FLIP_INTERRUPT, HUBPREQ, id), \ - SRI_ARR(HUBPRET_CONTROL, HUBPRET, id), \ - SRI_ARR(HUBPRET_READ_LINE_STATUS, HUBPRET, id), \ - SRI_ARR(DCN_EXPANSION_MODE, HUBPREQ, id), \ - SRI_ARR(DCHUBP_REQ_SIZE_CONFIG, HUBP, id), \ - SRI_ARR(DCHUBP_REQ_SIZE_CONFIG_C, HUBP, id), \ - SRI_ARR(BLANK_OFFSET_0, HUBPREQ, id), \ - SRI_ARR(BLANK_OFFSET_1, HUBPREQ, id), \ - SRI_ARR(DST_DIMENSIONS, HUBPREQ, id), \ - SRI_ARR(DST_AFTER_SCALER, HUBPREQ, id), \ - SRI_ARR(VBLANK_PARAMETERS_0, HUBPREQ, id), \ - SRI_ARR(REF_FREQ_TO_PIX_FREQ, HUBPREQ, id), \ - SRI_ARR(VBLANK_PARAMETERS_1, HUBPREQ, id), \ - SRI_ARR(VBLANK_PARAMETERS_3, HUBPREQ, id), \ - SRI_ARR(NOM_PARAMETERS_4, HUBPREQ, id), \ - SRI_ARR(NOM_PARAMETERS_5, HUBPREQ, id), \ - SRI_ARR(PER_LINE_DELIVERY_PRE, HUBPREQ, id), \ - SRI_ARR(PER_LINE_DELIVERY, HUBPREQ, id), \ - SRI_ARR(VBLANK_PARAMETERS_2, HUBPREQ, id), \ - SRI_ARR(VBLANK_PARAMETERS_4, HUBPREQ, id), \ - SRI_ARR(NOM_PARAMETERS_6, HUBPREQ, id), \ - SRI_ARR(NOM_PARAMETERS_7, HUBPREQ, id), \ - SRI_ARR(DCN_TTU_QOS_WM, HUBPREQ, id), \ - SRI_ARR(DCN_GLOBAL_TTU_CNTL, HUBPREQ, id), \ - SRI_ARR(DCN_SURF0_TTU_CNTL0, HUBPREQ, id), \ - SRI_ARR(DCN_SURF0_TTU_CNTL1, HUBPREQ, id), \ - SRI_ARR(DCN_SURF1_TTU_CNTL0, HUBPREQ, id), \ - SRI_ARR(DCN_SURF1_TTU_CNTL1, HUBPREQ, id), \ - SRI_ARR(DCN_CUR0_TTU_CNTL0, HUBPREQ, id), \ - SRI_ARR(DCN_CUR0_TTU_CNTL1, HUBPREQ, id), \ - SRI_ARR(HUBP_CLK_CNTL, HUBP, id) -#define HUBP_REG_LIST_DCN2_COMMON_RI(id) \ - HUBP_REG_LIST_DCN_RI(id), HUBP_REG_LIST_DCN_VM_RI(id), \ - SRI_ARR(PREFETCH_SETTINGS, HUBPREQ, id), \ - SRI_ARR(PREFETCH_SETTINGS_C, HUBPREQ, id), \ - SRI_ARR(DCN_VM_SYSTEM_APERTURE_LOW_ADDR, HUBPREQ, id), \ - SRI_ARR(DCN_VM_SYSTEM_APERTURE_HIGH_ADDR, HUBPREQ, id), \ - SRI_ARR(CURSOR_SETTINGS, HUBPREQ, id), \ - SRI_ARR(CURSOR_SURFACE_ADDRESS_HIGH, CURSOR0_, id), \ - SRI_ARR(CURSOR_SURFACE_ADDRESS, CURSOR0_, id), \ - SRI_ARR(CURSOR_SIZE, CURSOR0_, id), \ - SRI_ARR(CURSOR_CONTROL, CURSOR0_, id), \ - SRI_ARR(CURSOR_POSITION, CURSOR0_, id), \ - SRI_ARR(CURSOR_HOT_SPOT, CURSOR0_, id), \ - SRI_ARR(CURSOR_DST_OFFSET, CURSOR0_, id), \ - SRI_ARR(DMDATA_ADDRESS_HIGH, CURSOR0_, id), \ - SRI_ARR(DMDATA_ADDRESS_LOW, CURSOR0_, id), \ - SRI_ARR(DMDATA_CNTL, CURSOR0_, id), \ - SRI_ARR(DMDATA_SW_CNTL, CURSOR0_, id), \ - SRI_ARR(DMDATA_QOS_CNTL, CURSOR0_, id), \ - SRI_ARR(DMDATA_SW_DATA, CURSOR0_, id), \ - SRI_ARR(DMDATA_STATUS, CURSOR0_, id), \ - SRI_ARR(FLIP_PARAMETERS_0, HUBPREQ, id), \ - SRI_ARR(FLIP_PARAMETERS_1, HUBPREQ, id), \ - SRI_ARR(FLIP_PARAMETERS_2, HUBPREQ, id), \ - SRI_ARR(DCN_CUR1_TTU_CNTL0, HUBPREQ, id), \ - SRI_ARR(DCN_CUR1_TTU_CNTL1, HUBPREQ, id), \ - SRI_ARR(DCSURF_FLIP_CONTROL2, HUBPREQ, id), \ - SRI_ARR(VMID_SETTINGS_0, HUBPREQ, id) -#define HUBP_REG_LIST_DCN21_RI(id) \ - HUBP_REG_LIST_DCN2_COMMON_RI(id), SRI_ARR(FLIP_PARAMETERS_3, HUBPREQ, id), \ - SRI_ARR(FLIP_PARAMETERS_4, HUBPREQ, id), \ - SRI_ARR(FLIP_PARAMETERS_5, HUBPREQ, id), \ - SRI_ARR(FLIP_PARAMETERS_6, HUBPREQ, id), \ - SRI_ARR(VBLANK_PARAMETERS_5, HUBPREQ, id), \ - SRI_ARR(VBLANK_PARAMETERS_6, HUBPREQ, id) -#define HUBP_REG_LIST_DCN30_RI(id) \ - HUBP_REG_LIST_DCN21_RI(id), SRI_ARR(DCN_DMDATA_VM_CNTL, HUBPREQ, id) -#define HUBP_REG_LIST_DCN32_RI(id) \ - HUBP_REG_LIST_DCN30_RI(id), SRI_ARR(DCHUBP_MALL_CONFIG, HUBP, id), \ - SRI_ARR(DCHUBP_VMPG_CONFIG, HUBP, id), \ - SRI_ARR(UCLK_PSTATE_FORCE, HUBPREQ, id) - -/* HUBBUB */ - -#define HUBBUB_REG_LIST_DCN32_RI(id) \ - SR(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A), \ - SR(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B), \ - SR(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_C), \ - SR(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_D), \ - SR(DCHUBBUB_ARB_WATERMARK_CHANGE_CNTL), \ - SR(DCHUBBUB_ARB_DRAM_STATE_CNTL), SR(DCHUBBUB_ARB_SAT_LEVEL), \ - SR(DCHUBBUB_ARB_DF_REQ_OUTSTAND), SR(DCHUBBUB_GLOBAL_TIMER_CNTL), \ - SR(DCHUBBUB_SOFT_RESET), SR(DCHUBBUB_CRC_CTRL), \ - SR(DCN_VM_FB_LOCATION_BASE), SR(DCN_VM_FB_LOCATION_TOP), \ - SR(DCN_VM_FB_OFFSET), SR(DCN_VM_AGP_BOT), SR(DCN_VM_AGP_TOP), \ - SR(DCN_VM_AGP_BASE), HUBBUB_SR_WATERMARK_REG_LIST(), \ - SR(DCHUBBUB_ARB_FRAC_URG_BW_NOM_A), SR(DCHUBBUB_ARB_FRAC_URG_BW_NOM_B), \ - SR(DCHUBBUB_ARB_FRAC_URG_BW_NOM_C), SR(DCHUBBUB_ARB_FRAC_URG_BW_NOM_D), \ - SR(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_A), \ - SR(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_B), \ - SR(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_C), \ - SR(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_D), \ - SR(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_A), \ - SR(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_B), \ - SR(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_C), \ - SR(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_D), SR(DCHUBBUB_DET0_CTRL), \ - SR(DCHUBBUB_DET1_CTRL), SR(DCHUBBUB_DET2_CTRL), SR(DCHUBBUB_DET3_CTRL), \ - SR(DCHUBBUB_COMPBUF_CTRL), SR(COMPBUF_RESERVED_SPACE), \ - SR(DCHUBBUB_DEBUG_CTRL_0), \ - SR(DCHUBBUB_ARB_USR_RETRAINING_CNTL), \ - SR(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_A), \ - SR(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_B), \ - SR(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_C), \ - SR(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_D), \ - SR(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_A), \ - SR(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_B), \ - SR(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_C), \ - SR(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_D), \ - SR(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_A), \ - SR(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_B), \ - SR(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_C), \ - SR(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_D), \ - SR(DCHUBBUB_ARB_MALL_CNTL), \ - SR(DCN_VM_FAULT_ADDR_MSB), SR(DCN_VM_FAULT_ADDR_LSB), \ - SR(DCN_VM_FAULT_CNTL), SR(DCN_VM_FAULT_STATUS), \ - SR(SDPIF_REQUEST_RATE_LIMIT) - -/* DCCG */ - -#define DCCG_REG_LIST_DCN32_RI() \ - SR(DPPCLK_DTO_CTRL), DCCG_SRII(DTO_PARAM, DPPCLK, 0), \ - DCCG_SRII(DTO_PARAM, DPPCLK, 1), DCCG_SRII(DTO_PARAM, DPPCLK, 2), \ - DCCG_SRII(DTO_PARAM, DPPCLK, 3), DCCG_SRII(CLOCK_CNTL, HDMICHARCLK, 0), \ - SR(PHYASYMCLK_CLOCK_CNTL), SR(PHYBSYMCLK_CLOCK_CNTL), \ - SR(PHYCSYMCLK_CLOCK_CNTL), SR(PHYDSYMCLK_CLOCK_CNTL), \ - SR(PHYESYMCLK_CLOCK_CNTL), SR(DPSTREAMCLK_CNTL), SR(HDMISTREAMCLK_CNTL), \ - SR(SYMCLK32_SE_CNTL), SR(SYMCLK32_LE_CNTL), \ - DCCG_SRII(PIXEL_RATE_CNTL, OTG, 0), DCCG_SRII(PIXEL_RATE_CNTL, OTG, 1), \ - DCCG_SRII(PIXEL_RATE_CNTL, OTG, 2), DCCG_SRII(PIXEL_RATE_CNTL, OTG, 3), \ - DCCG_SRII(MODULO, DTBCLK_DTO, 0), DCCG_SRII(MODULO, DTBCLK_DTO, 1), \ - DCCG_SRII(MODULO, DTBCLK_DTO, 2), DCCG_SRII(MODULO, DTBCLK_DTO, 3), \ - DCCG_SRII(PHASE, DTBCLK_DTO, 0), DCCG_SRII(PHASE, DTBCLK_DTO, 1), \ - DCCG_SRII(PHASE, DTBCLK_DTO, 2), DCCG_SRII(PHASE, DTBCLK_DTO, 3), \ - SR(DCCG_AUDIO_DTBCLK_DTO_MODULO), SR(DCCG_AUDIO_DTBCLK_DTO_PHASE), \ - SR(OTG_PIXEL_RATE_DIV), SR(DTBCLK_P_CNTL), \ - SR(DCCG_AUDIO_DTO_SOURCE), SR(DENTIST_DISPCLK_CNTL) - -/* VMID */ -#define DCN20_VMID_REG_LIST_RI(id) \ - SRI_ARR(CNTL, DCN_VM_CONTEXT, id), \ - SRI_ARR(PAGE_TABLE_BASE_ADDR_HI32, DCN_VM_CONTEXT, id), \ - SRI_ARR(PAGE_TABLE_BASE_ADDR_LO32, DCN_VM_CONTEXT, id), \ - SRI_ARR(PAGE_TABLE_START_ADDR_HI32, DCN_VM_CONTEXT, id), \ - SRI_ARR(PAGE_TABLE_START_ADDR_LO32, DCN_VM_CONTEXT, id), \ - SRI_ARR(PAGE_TABLE_END_ADDR_HI32, DCN_VM_CONTEXT, id), \ - SRI_ARR(PAGE_TABLE_END_ADDR_LO32, DCN_VM_CONTEXT, id) - -/* I2C HW */ - -#define I2C_HW_ENGINE_COMMON_REG_LIST_RI(id) \ - SRI_ARR_I2C(SETUP, DC_I2C_DDC, id), SRI_ARR_I2C(SPEED, DC_I2C_DDC, id), \ - SRI_ARR_I2C(HW_STATUS, DC_I2C_DDC, id), \ - SR_ARR_I2C(DC_I2C_ARBITRATION, id), \ - SR_ARR_I2C(DC_I2C_CONTROL, id), SR_ARR_I2C(DC_I2C_SW_STATUS, id), \ - SR_ARR_I2C(DC_I2C_TRANSACTION0, id), SR_ARR_I2C(DC_I2C_TRANSACTION1, id),\ - SR_ARR_I2C(DC_I2C_TRANSACTION2, id), SR_ARR_I2C(DC_I2C_TRANSACTION3, id),\ - SR_ARR_I2C(DC_I2C_DATA, id), SR_ARR_I2C(MICROSECOND_TIME_BASE_DIV, id) - -#define I2C_HW_ENGINE_COMMON_REG_LIST_DCN30_RI(id) \ - I2C_HW_ENGINE_COMMON_REG_LIST_RI(id), SR_ARR_I2C(DIO_MEM_PWR_CTRL, id), \ - SR_ARR_I2C(DIO_MEM_PWR_STATUS, id) - -#endif /* _DCN32_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c index bc5f0db23d0c..ef0a2b01734d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c @@ -24,7 +24,7 @@ */ // header file of functions being implemented -#include "dcn32_resource.h" +#include "dcn32/dcn32_resource.h" #include "dcn20/dcn20_resource.h" #include "dml/dcn32/display_mode_vba_util_32.h" #include "dml/dcn32/dcn32_fpu.h" diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/Makefile b/drivers/gpu/drm/amd/display/dc/dcn321/Makefile index 0a199c83bb5b..c195c47f58b4 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn321/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn321/Makefile @@ -10,7 +10,7 @@ # # Makefile for dcn321. -DCN321 = dcn321_resource.o dcn321_dio_link_encoder.o +DCN321 = dcn321_dio_link_encoder.o AMD_DAL_DCN321 = $(addprefix $(AMDDALPATH)/dc/dcn321/,$(DCN321)) diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c deleted file mode 100644 index f7de3eca1225..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c +++ /dev/null @@ -1,2065 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright 2019 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dm_services.h" -#include "dc.h" - -#include "dcn32/dcn32_init.h" - -#include "resource.h" -#include "include/irq_service_interface.h" -#include "dcn32/dcn32_resource.h" -#include "dcn321_resource.h" - -#include "dcn20/dcn20_resource.h" -#include "dcn30/dcn30_resource.h" - -#include "dml/dcn321/dcn321_fpu.h" - -#include "dcn10/dcn10_ipp.h" -#include "dcn30/dcn30_hubbub.h" -#include "dcn31/dcn31_hubbub.h" -#include "dcn32/dcn32_hubbub.h" -#include "dcn32/dcn32_mpc.h" -#include "dcn32/dcn32_hubp.h" -#include "irq/dcn32/irq_service_dcn32.h" -#include "dcn32/dcn32_dpp.h" -#include "dcn32/dcn32_optc.h" -#include "dcn20/dcn20_hwseq.h" -#include "dcn30/dcn30_hwseq.h" -#include "dce110/dce110_hwseq.h" -#include "dcn30/dcn30_opp.h" -#include "dcn20/dcn20_dsc.h" -#include "dcn30/dcn30_vpg.h" -#include "dcn30/dcn30_afmt.h" -#include "dcn30/dcn30_dio_stream_encoder.h" -#include "dcn32/dcn32_dio_stream_encoder.h" -#include "dcn31/dcn31_hpo_dp_stream_encoder.h" -#include "dcn31/dcn31_hpo_dp_link_encoder.h" -#include "dcn32/dcn32_hpo_dp_link_encoder.h" -#include "dcn31/dcn31_apg.h" -#include "dcn31/dcn31_dio_link_encoder.h" -#include "dcn32/dcn32_dio_link_encoder.h" -#include "dcn321_dio_link_encoder.h" -#include "dce/dce_clock_source.h" -#include "dce/dce_audio.h" -#include "dce/dce_hwseq.h" -#include "clk_mgr.h" -#include "virtual/virtual_stream_encoder.h" -#include "dml/display_mode_vba.h" -#include "dcn32/dcn32_dccg.h" -#include "dcn10/dcn10_resource.h" -#include "link.h" -#include "dcn31/dcn31_panel_cntl.h" - -#include "dcn30/dcn30_dwb.h" -#include "dcn32/dcn32_mmhubbub.h" - -#include "dcn/dcn_3_2_1_offset.h" -#include "dcn/dcn_3_2_1_sh_mask.h" -#include "nbio/nbio_4_3_0_offset.h" - -#include "reg_helper.h" -#include "dce/dmub_abm.h" -#include "dce/dmub_psr.h" -#include "dce/dce_aux.h" -#include "dce/dce_i2c.h" - -#include "dml/dcn30/display_mode_vba_30.h" -#include "vm_helper.h" -#include "dcn20/dcn20_vmid.h" - -#define DC_LOGGER_INIT(logger) - -enum dcn321_clk_src_array_id { - DCN321_CLK_SRC_PLL0, - DCN321_CLK_SRC_PLL1, - DCN321_CLK_SRC_PLL2, - DCN321_CLK_SRC_PLL3, - DCN321_CLK_SRC_PLL4, - DCN321_CLK_SRC_TOTAL -}; - -/* begin ********************* - * macros to expend register list macro defined in HW object header file - */ - -/* DCN */ -#define BASE_INNER(seg) ctx->dcn_reg_offsets[seg] - -#define BASE(seg) BASE_INNER(seg) - -#define SR(reg_name)\ - REG_STRUCT.reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ - reg ## reg_name -#define SR_ARR(reg_name, id)\ - REG_STRUCT[id].reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ - reg ## reg_name -#define SR_ARR_INIT(reg_name, id, value)\ - REG_STRUCT[id].reg_name = value - -#define SRI(reg_name, block, id)\ - REG_STRUCT.reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## reg_name - -#define SRI_ARR(reg_name, block, id)\ - REG_STRUCT[id].reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## reg_name - -#define SR_ARR_I2C(reg_name, id) \ - REG_STRUCT[id-1].reg_name = BASE(reg##reg_name##_BASE_IDX) + reg##reg_name - -#define SRI_ARR_I2C(reg_name, block, id)\ - REG_STRUCT[id-1].reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## reg_name - -#define SRI_ARR_ALPHABET(reg_name, block, index, id)\ - REG_STRUCT[index].reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## reg_name - -#define SRI2(reg_name, block, id)\ - .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ - reg ## reg_name -#define SRI2_ARR(reg_name, block, id)\ - REG_STRUCT[id].reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ - reg ## reg_name - -#define SRIR(var_name, reg_name, block, id)\ - .var_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## reg_name - -#define SRII(reg_name, block, id)\ - REG_STRUCT.reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## reg_name - -#define SRII_ARR_2(reg_name, block, id, inst)\ - REG_STRUCT[inst].reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## reg_name - -#define SRII_MPC_RMU(reg_name, block, id)\ - .RMU##_##reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## reg_name - -#define SRII_DWB(reg_name, temp_name, block, id)\ - REG_STRUCT.reg_name[id] = BASE(reg ## block ## id ## _ ## temp_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## temp_name - -#define DCCG_SRII(reg_name, block, id)\ - REG_STRUCT.block ## _ ## reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## reg_name - -#define SF_DWB2(reg_name, block, id, field_name, post_fix) \ - .field_name = reg_name ## __ ## field_name ## post_fix - -#define VUPDATE_SRII(reg_name, block, id)\ - REG_STRUCT.reg_name[id] = BASE(reg ## reg_name ## _ ## block ## id ## _BASE_IDX) + \ - reg ## reg_name ## _ ## block ## id - -/* NBIO */ -#define NBIO_BASE_INNER(seg) ctx->nbio_reg_offsets[seg] - -#define NBIO_BASE(seg) \ - NBIO_BASE_INNER(seg) - -#define NBIO_SR(reg_name)\ - REG_STRUCT.reg_name = NBIO_BASE(regBIF_BX0_ ## reg_name ## _BASE_IDX) + \ - regBIF_BX0_ ## reg_name -#define NBIO_SR_ARR(reg_name, id)\ - REG_STRUCT[id].reg_name = NBIO_BASE(regBIF_BX0_ ## reg_name ## _BASE_IDX) + \ - regBIF_BX0_ ## reg_name - -#define CTX ctx -#define REG(reg_name) \ - (ctx->dcn_reg_offsets[reg ## reg_name ## _BASE_IDX] + reg ## reg_name) - -static struct bios_registers bios_regs; - -#define bios_regs_init() \ - ( \ - NBIO_SR(BIOS_SCRATCH_3),\ - NBIO_SR(BIOS_SCRATCH_6)\ - ) - -#define clk_src_regs_init(index, pllid)\ - CS_COMMON_REG_LIST_DCN3_0_RI(index, pllid) - -static struct dce110_clk_src_regs clk_src_regs[5]; - -static const struct dce110_clk_src_shift cs_shift = { - CS_COMMON_MASK_SH_LIST_DCN3_2(__SHIFT) -}; - -static const struct dce110_clk_src_mask cs_mask = { - CS_COMMON_MASK_SH_LIST_DCN3_2(_MASK) -}; - -#define abm_regs_init(id)\ - ABM_DCN32_REG_LIST_RI(id) - -static struct dce_abm_registers abm_regs[4]; - -static const struct dce_abm_shift abm_shift = { - ABM_MASK_SH_LIST_DCN32(__SHIFT) -}; - -static const struct dce_abm_mask abm_mask = { - ABM_MASK_SH_LIST_DCN32(_MASK) -}; - -#define audio_regs_init(id)\ - AUD_COMMON_REG_LIST_RI(id) - -static struct dce_audio_registers audio_regs[5]; - -#define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\ - SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_INDEX, AZALIA_ENDPOINT_REG_INDEX, mask_sh),\ - SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_DATA, AZALIA_ENDPOINT_REG_DATA, mask_sh),\ - AUD_COMMON_MASK_SH_LIST_BASE(mask_sh) - -static const struct dce_audio_shift audio_shift = { - DCE120_AUD_COMMON_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_audio_mask audio_mask = { - DCE120_AUD_COMMON_MASK_SH_LIST(_MASK) -}; - -#define vpg_regs_init(id)\ - VPG_DCN3_REG_LIST_RI(id) - -static struct dcn30_vpg_registers vpg_regs[10]; - -static const struct dcn30_vpg_shift vpg_shift = { - DCN3_VPG_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn30_vpg_mask vpg_mask = { - DCN3_VPG_MASK_SH_LIST(_MASK) -}; - -#define afmt_regs_init(id)\ - AFMT_DCN3_REG_LIST_RI(id) - -static struct dcn30_afmt_registers afmt_regs[6]; - -static const struct dcn30_afmt_shift afmt_shift = { - DCN3_AFMT_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn30_afmt_mask afmt_mask = { - DCN3_AFMT_MASK_SH_LIST(_MASK) -}; - -#define apg_regs_init(id)\ - APG_DCN31_REG_LIST_RI(id) - -static struct dcn31_apg_registers apg_regs[4]; - -static const struct dcn31_apg_shift apg_shift = { - DCN31_APG_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn31_apg_mask apg_mask = { - DCN31_APG_MASK_SH_LIST(_MASK) -}; - -#define stream_enc_regs_init(id)\ - SE_DCN32_REG_LIST_RI(id) - -static struct dcn10_stream_enc_registers stream_enc_regs[5]; - -static const struct dcn10_stream_encoder_shift se_shift = { - SE_COMMON_MASK_SH_LIST_DCN32(__SHIFT) -}; - -static const struct dcn10_stream_encoder_mask se_mask = { - SE_COMMON_MASK_SH_LIST_DCN32(_MASK) -}; - - -#define aux_regs_init(id)\ - DCN2_AUX_REG_LIST_RI(id) - -static struct dcn10_link_enc_aux_registers link_enc_aux_regs[5]; - -#define hpd_regs_init(id)\ - HPD_REG_LIST_RI(id) - -static struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[5]; - -#define link_regs_init(id, phyid)\ - ( \ - LE_DCN31_REG_LIST_RI(id), \ - UNIPHY_DCN2_REG_LIST_RI(id, phyid)\ - ) - /*DPCS_DCN31_REG_LIST(id),*/ \ - -static struct dcn10_link_enc_registers link_enc_regs[5]; - -static const struct dcn10_link_enc_shift le_shift = { - LINK_ENCODER_MASK_SH_LIST_DCN31(__SHIFT), \ -// DPCS_DCN31_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn10_link_enc_mask le_mask = { - LINK_ENCODER_MASK_SH_LIST_DCN31(_MASK), \ -// DPCS_DCN31_MASK_SH_LIST(_MASK) -}; - -#define hpo_dp_stream_encoder_reg_init(id)\ - DCN3_1_HPO_DP_STREAM_ENC_REG_LIST_RI(id) - -static struct dcn31_hpo_dp_stream_encoder_registers hpo_dp_stream_enc_regs[4]; - -static const struct dcn31_hpo_dp_stream_encoder_shift hpo_dp_se_shift = { - DCN3_1_HPO_DP_STREAM_ENC_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn31_hpo_dp_stream_encoder_mask hpo_dp_se_mask = { - DCN3_1_HPO_DP_STREAM_ENC_MASK_SH_LIST(_MASK) -}; - - -#define hpo_dp_link_encoder_reg_init(id)\ - DCN3_1_HPO_DP_LINK_ENC_REG_LIST_RI(id) - /*DCN3_1_RDPCSTX_REG_LIST(0),*/ - /*DCN3_1_RDPCSTX_REG_LIST(1),*/ - /*DCN3_1_RDPCSTX_REG_LIST(2),*/ - /*DCN3_1_RDPCSTX_REG_LIST(3),*/ - -static struct dcn31_hpo_dp_link_encoder_registers hpo_dp_link_enc_regs[2]; - -static const struct dcn31_hpo_dp_link_encoder_shift hpo_dp_le_shift = { - DCN3_2_HPO_DP_LINK_ENC_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn31_hpo_dp_link_encoder_mask hpo_dp_le_mask = { - DCN3_2_HPO_DP_LINK_ENC_MASK_SH_LIST(_MASK) -}; - -#define dpp_regs_init(id)\ - DPP_REG_LIST_DCN30_COMMON_RI(id) - -static struct dcn3_dpp_registers dpp_regs[4]; - -static const struct dcn3_dpp_shift tf_shift = { - DPP_REG_LIST_SH_MASK_DCN30_COMMON(__SHIFT) -}; - -static const struct dcn3_dpp_mask tf_mask = { - DPP_REG_LIST_SH_MASK_DCN30_COMMON(_MASK) -}; - - -#define opp_regs_init(id)\ - OPP_REG_LIST_DCN30_RI(id) - -static struct dcn20_opp_registers opp_regs[4]; - -static const struct dcn20_opp_shift opp_shift = { - OPP_MASK_SH_LIST_DCN20(__SHIFT) -}; - -static const struct dcn20_opp_mask opp_mask = { - OPP_MASK_SH_LIST_DCN20(_MASK) -}; - -#define aux_engine_regs_init(id) \ - ( \ - AUX_COMMON_REG_LIST0_RI(id), SR_ARR_INIT(AUXN_IMPCAL, id, 0), \ - SR_ARR_INIT(AUXP_IMPCAL, id, 0), \ - SR_ARR_INIT(AUX_RESET_MASK, id, DP_AUX0_AUX_CONTROL__AUX_RESET_MASK), \ - SR_ARR_INIT(AUX_RESET_MASK, id, DP_AUX0_AUX_CONTROL__AUX_RESET_MASK)\ - ) - -static struct dce110_aux_registers aux_engine_regs[5]; - -static const struct dce110_aux_registers_shift aux_shift = { - DCN_AUX_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce110_aux_registers_mask aux_mask = { - DCN_AUX_MASK_SH_LIST(_MASK) -}; - -#define dwbc_regs_dcn3_init(id)\ - DWBC_COMMON_REG_LIST_DCN30_RI(id) - -static struct dcn30_dwbc_registers dwbc30_regs[1]; - -static const struct dcn30_dwbc_shift dwbc30_shift = { - DWBC_COMMON_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dcn30_dwbc_mask dwbc30_mask = { - DWBC_COMMON_MASK_SH_LIST_DCN30(_MASK) -}; - -#define mcif_wb_regs_dcn3_init(id)\ - MCIF_WB_COMMON_REG_LIST_DCN32_RI(id) - -static struct dcn30_mmhubbub_registers mcif_wb30_regs[1]; - -static const struct dcn30_mmhubbub_shift mcif_wb30_shift = { - MCIF_WB_COMMON_MASK_SH_LIST_DCN32(__SHIFT) -}; - -static const struct dcn30_mmhubbub_mask mcif_wb30_mask = { - MCIF_WB_COMMON_MASK_SH_LIST_DCN32(_MASK) -}; - -#define dsc_regsDCN20_init(id)\ - DSC_REG_LIST_DCN20_RI(id) - -static struct dcn20_dsc_registers dsc_regs[4]; - -static const struct dcn20_dsc_shift dsc_shift = { - DSC_REG_LIST_SH_MASK_DCN20(__SHIFT) -}; - -static const struct dcn20_dsc_mask dsc_mask = { - DSC_REG_LIST_SH_MASK_DCN20(_MASK) -}; - -static struct dcn30_mpc_registers mpc_regs; -#define dcn_mpc_regs_init()\ - MPC_REG_LIST_DCN3_2_RI(0),\ - MPC_REG_LIST_DCN3_2_RI(1),\ - MPC_REG_LIST_DCN3_2_RI(2),\ - MPC_REG_LIST_DCN3_2_RI(3),\ - MPC_OUT_MUX_REG_LIST_DCN3_0_RI(0),\ - MPC_OUT_MUX_REG_LIST_DCN3_0_RI(1),\ - MPC_OUT_MUX_REG_LIST_DCN3_0_RI(2),\ - MPC_OUT_MUX_REG_LIST_DCN3_0_RI(3),\ - MPC_DWB_MUX_REG_LIST_DCN3_0_RI(0) - -static const struct dcn30_mpc_shift mpc_shift = { - MPC_COMMON_MASK_SH_LIST_DCN32(__SHIFT) -}; - -static const struct dcn30_mpc_mask mpc_mask = { - MPC_COMMON_MASK_SH_LIST_DCN32(_MASK) -}; - -#define optc_regs_init(id)\ - OPTC_COMMON_REG_LIST_DCN3_2_RI(id) - -static struct dcn_optc_registers optc_regs[4]; - -static const struct dcn_optc_shift optc_shift = { - OPTC_COMMON_MASK_SH_LIST_DCN3_2(__SHIFT) -}; - -static const struct dcn_optc_mask optc_mask = { - OPTC_COMMON_MASK_SH_LIST_DCN3_2(_MASK) -}; - -#define hubp_regs_init(id) \ - HUBP_REG_LIST_DCN32_RI(id) - -static struct dcn_hubp2_registers hubp_regs[4]; - -static const struct dcn_hubp2_shift hubp_shift = { - HUBP_MASK_SH_LIST_DCN32(__SHIFT) -}; - -static const struct dcn_hubp2_mask hubp_mask = { - HUBP_MASK_SH_LIST_DCN32(_MASK) -}; - -static struct dcn_hubbub_registers hubbub_reg; -#define hubbub_reg_init()\ - HUBBUB_REG_LIST_DCN32_RI(0) - -static const struct dcn_hubbub_shift hubbub_shift = { - HUBBUB_MASK_SH_LIST_DCN32(__SHIFT) -}; - -static const struct dcn_hubbub_mask hubbub_mask = { - HUBBUB_MASK_SH_LIST_DCN32(_MASK) -}; - -static struct dccg_registers dccg_regs; - -#define dccg_regs_init()\ - DCCG_REG_LIST_DCN32_RI() - -static const struct dccg_shift dccg_shift = { - DCCG_MASK_SH_LIST_DCN32(__SHIFT) -}; - -static const struct dccg_mask dccg_mask = { - DCCG_MASK_SH_LIST_DCN32(_MASK) -}; - - -#define SRII2(reg_name_pre, reg_name_post, id)\ - .reg_name_pre ## _ ## reg_name_post[id] = BASE(reg ## reg_name_pre \ - ## id ## _ ## reg_name_post ## _BASE_IDX) + \ - reg ## reg_name_pre ## id ## _ ## reg_name_post - - -#define HWSEQ_DCN32_REG_LIST()\ - SR(DCHUBBUB_GLOBAL_TIMER_CNTL), \ - SR(DIO_MEM_PWR_CTRL), \ - SR(ODM_MEM_PWR_CTRL3), \ - SR(MMHUBBUB_MEM_PWR_CNTL), \ - SR(DCCG_GATE_DISABLE_CNTL), \ - SR(DCCG_GATE_DISABLE_CNTL2), \ - SR(DCFCLK_CNTL),\ - SR(DC_MEM_GLOBAL_PWR_REQ_CNTL), \ - SRII(PIXEL_RATE_CNTL, OTG, 0), \ - SRII(PIXEL_RATE_CNTL, OTG, 1),\ - SRII(PIXEL_RATE_CNTL, OTG, 2),\ - SRII(PIXEL_RATE_CNTL, OTG, 3),\ - SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 0),\ - SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 1),\ - SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 2),\ - SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 3),\ - SR(MICROSECOND_TIME_BASE_DIV), \ - SR(MILLISECOND_TIME_BASE_DIV), \ - SR(DISPCLK_FREQ_CHANGE_CNTL), \ - SR(RBBMIF_TIMEOUT_DIS), \ - SR(RBBMIF_TIMEOUT_DIS_2), \ - SR(DCHUBBUB_CRC_CTRL), \ - SR(DPP_TOP0_DPP_CRC_CTRL), \ - SR(DPP_TOP0_DPP_CRC_VAL_B_A), \ - SR(DPP_TOP0_DPP_CRC_VAL_R_G), \ - SR(MPC_CRC_CTRL), \ - SR(MPC_CRC_RESULT_GB), \ - SR(MPC_CRC_RESULT_C), \ - SR(MPC_CRC_RESULT_AR), \ - SR(DOMAIN0_PG_CONFIG), \ - SR(DOMAIN1_PG_CONFIG), \ - SR(DOMAIN2_PG_CONFIG), \ - SR(DOMAIN3_PG_CONFIG), \ - SR(DOMAIN16_PG_CONFIG), \ - SR(DOMAIN17_PG_CONFIG), \ - SR(DOMAIN18_PG_CONFIG), \ - SR(DOMAIN19_PG_CONFIG), \ - SR(DOMAIN0_PG_STATUS), \ - SR(DOMAIN1_PG_STATUS), \ - SR(DOMAIN2_PG_STATUS), \ - SR(DOMAIN3_PG_STATUS), \ - SR(DOMAIN16_PG_STATUS), \ - SR(DOMAIN17_PG_STATUS), \ - SR(DOMAIN18_PG_STATUS), \ - SR(DOMAIN19_PG_STATUS), \ - SR(D1VGA_CONTROL), \ - SR(D2VGA_CONTROL), \ - SR(D3VGA_CONTROL), \ - SR(D4VGA_CONTROL), \ - SR(D5VGA_CONTROL), \ - SR(D6VGA_CONTROL), \ - SR(DC_IP_REQUEST_CNTL), \ - SR(AZALIA_AUDIO_DTO), \ - SR(AZALIA_CONTROLLER_CLOCK_GATING) - -static struct dce_hwseq_registers hwseq_reg; - -#define hwseq_reg_init()\ - HWSEQ_DCN32_REG_LIST() - -#define HWSEQ_DCN32_MASK_SH_LIST(mask_sh)\ - HWSEQ_DCN_MASK_SH_LIST(mask_sh), \ - HWS_SF(, DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_REFDIV, mask_sh), \ - HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN1_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN1_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN2_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN2_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN3_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN3_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN16_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN16_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN17_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN17_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN18_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN18_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN19_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN19_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN0_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN1_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN2_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN3_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN16_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN17_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN18_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN19_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DC_IP_REQUEST_CNTL, IP_REQUEST_EN, mask_sh), \ - HWS_SF(, AZALIA_AUDIO_DTO, AZALIA_AUDIO_DTO_MODULE, mask_sh), \ - HWS_SF(, HPO_TOP_CLOCK_CONTROL, HPO_HDMISTREAMCLK_G_GATE_DIS, mask_sh), \ - HWS_SF(, ODM_MEM_PWR_CTRL3, ODM_MEM_UNASSIGNED_PWR_MODE, mask_sh), \ - HWS_SF(, ODM_MEM_PWR_CTRL3, ODM_MEM_VBLANK_PWR_MODE, mask_sh), \ - HWS_SF(, MMHUBBUB_MEM_PWR_CNTL, VGA_MEM_PWR_FORCE, mask_sh) - -static const struct dce_hwseq_shift hwseq_shift = { - HWSEQ_DCN32_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_hwseq_mask hwseq_mask = { - HWSEQ_DCN32_MASK_SH_LIST(_MASK) -}; -#define vmid_regs_init(id)\ - DCN20_VMID_REG_LIST_RI(id) - -static struct dcn_vmid_registers vmid_regs[16]; - -static const struct dcn20_vmid_shift vmid_shifts = { - DCN20_VMID_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn20_vmid_mask vmid_masks = { - DCN20_VMID_MASK_SH_LIST(_MASK) -}; - -static const struct resource_caps res_cap_dcn321 = { - .num_timing_generator = 4, - .num_opp = 4, - .num_video_plane = 4, - .num_audio = 5, - .num_stream_encoder = 5, - .num_hpo_dp_stream_encoder = 4, - .num_hpo_dp_link_encoder = 2, - .num_pll = 5, - .num_dwb = 1, - .num_ddc = 5, - .num_vmid = 16, - .num_mpc_3dlut = 4, - .num_dsc = 4, -}; - -static const struct dc_plane_cap plane_cap = { - .type = DC_PLANE_TYPE_DCN_UNIVERSAL, - .per_pixel_alpha = true, - - .pixel_format_support = { - .argb8888 = true, - .nv12 = true, - .fp16 = true, - .p010 = true, - .ayuv = false, - }, - - .max_upscale_factor = { - .argb8888 = 16000, - .nv12 = 16000, - .fp16 = 16000 - }, - - // 6:1 downscaling ratio: 1000/6 = 166.666 - .max_downscale_factor = { - .argb8888 = 167, - .nv12 = 167, - .fp16 = 167 - }, - 64, - 64 -}; - -static const struct dc_debug_options debug_defaults_drv = { - .disable_dmcu = true, - .force_abm_enable = false, - .timing_trace = false, - .clock_trace = true, - .disable_pplib_clock_request = false, - .pipe_split_policy = MPC_SPLIT_AVOID, - .force_single_disp_pipe_split = false, - .disable_dcc = DCC_ENABLE, - .vsr_support = true, - .performance_trace = false, - .max_downscale_src_width = 7680,/*upto 8K*/ - .disable_pplib_wm_range = false, - .scl_reset_length10 = true, - .sanity_checks = false, - .underflow_assert_delay_us = 0xFFFFFFFF, - .dwb_fi_phase = -1, // -1 = disable, - .dmub_command_table = true, - .enable_mem_low_power = { - .bits = { - .vga = false, - .i2c = false, - .dmcu = false, // This is previously known to cause hang on S3 cycles if enabled - .dscl = false, - .cm = false, - .mpc = false, - .optc = true, - } - }, - .use_max_lb = true, - .force_disable_subvp = false, - .exit_idle_opt_for_cursor_updates = true, - .enable_single_display_2to1_odm_policy = true, - - /*must match enable_single_display_2to1_odm_policy to support dynamic ODM transitions*/ - .enable_double_buffered_dsc_pg_support = true, - .enable_dp_dig_pixel_rate_div_policy = 1, - .allow_sw_cursor_fallback = false, // Linux can't do SW cursor "fallback" - .alloc_extra_way_for_cursor = true, - .min_prefetch_in_strobe_ns = 60000, // 60us - .disable_unbounded_requesting = false, - .override_dispclk_programming = true, - .disable_fpo_optimizations = false, - .fpo_vactive_margin_us = 2000, // 2000us - .disable_fpo_vactive = false, - .disable_boot_optimizations = false, - .disable_subvp_high_refresh = false, - .fpo_vactive_min_active_margin_us = 200, - .fpo_vactive_max_blank_us = 1000, - .enable_legacy_fast_update = false, - .disable_dc_mode_overwrite = true, - .using_dml2 = false, -}; - -static struct dce_aux *dcn321_aux_engine_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct aux_engine_dce110 *aux_engine = - kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL); - - if (!aux_engine) - return NULL; - -#undef REG_STRUCT -#define REG_STRUCT aux_engine_regs - aux_engine_regs_init(0), - aux_engine_regs_init(1), - aux_engine_regs_init(2), - aux_engine_regs_init(3), - aux_engine_regs_init(4); - - dce110_aux_engine_construct(aux_engine, ctx, inst, - SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, - &aux_engine_regs[inst], - &aux_mask, - &aux_shift, - ctx->dc->caps.extended_aux_timeout_support); - - return &aux_engine->base; -} -#define i2c_inst_regs_init(id)\ - I2C_HW_ENGINE_COMMON_REG_LIST_DCN30_RI(id) - -static struct dce_i2c_registers i2c_hw_regs[5]; - -static const struct dce_i2c_shift i2c_shifts = { - I2C_COMMON_MASK_SH_LIST_DCN30(__SHIFT) -}; - -static const struct dce_i2c_mask i2c_masks = { - I2C_COMMON_MASK_SH_LIST_DCN30(_MASK) -}; - -static struct dce_i2c_hw *dcn321_i2c_hw_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dce_i2c_hw *dce_i2c_hw = - kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL); - - if (!dce_i2c_hw) - return NULL; - -#undef REG_STRUCT -#define REG_STRUCT i2c_hw_regs - i2c_inst_regs_init(1), - i2c_inst_regs_init(2), - i2c_inst_regs_init(3), - i2c_inst_regs_init(4), - i2c_inst_regs_init(5); - - dcn2_i2c_hw_construct(dce_i2c_hw, ctx, inst, - &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks); - - return dce_i2c_hw; -} - -static struct clock_source *dcn321_clock_source_create( - struct dc_context *ctx, - struct dc_bios *bios, - enum clock_source_id id, - const struct dce110_clk_src_regs *regs, - bool dp_clk_src) -{ - struct dce110_clk_src *clk_src = - kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL); - - if (!clk_src) - return NULL; - - if (dcn31_clk_src_construct(clk_src, ctx, bios, id, - regs, &cs_shift, &cs_mask)) { - clk_src->base.dp_clk_src = dp_clk_src; - return &clk_src->base; - } - - kfree(clk_src); - BREAK_TO_DEBUGGER(); - return NULL; -} - -static struct hubbub *dcn321_hubbub_create(struct dc_context *ctx) -{ - int i; - - struct dcn20_hubbub *hubbub2 = kzalloc(sizeof(struct dcn20_hubbub), - GFP_KERNEL); - - if (!hubbub2) - return NULL; - -#undef REG_STRUCT -#define REG_STRUCT hubbub_reg - hubbub_reg_init(); - -#undef REG_STRUCT -#define REG_STRUCT vmid_regs - vmid_regs_init(0), - vmid_regs_init(1), - vmid_regs_init(2), - vmid_regs_init(3), - vmid_regs_init(4), - vmid_regs_init(5), - vmid_regs_init(6), - vmid_regs_init(7), - vmid_regs_init(8), - vmid_regs_init(9), - vmid_regs_init(10), - vmid_regs_init(11), - vmid_regs_init(12), - vmid_regs_init(13), - vmid_regs_init(14), - vmid_regs_init(15); - - hubbub32_construct(hubbub2, ctx, - &hubbub_reg, - &hubbub_shift, - &hubbub_mask, - ctx->dc->dml.ip.det_buffer_size_kbytes, - ctx->dc->dml.ip.pixel_chunk_size_kbytes, - ctx->dc->dml.ip.config_return_buffer_size_in_kbytes); - - - for (i = 0; i < res_cap_dcn321.num_vmid; i++) { - struct dcn20_vmid *vmid = &hubbub2->vmid[i]; - - vmid->ctx = ctx; - - vmid->regs = &vmid_regs[i]; - vmid->shifts = &vmid_shifts; - vmid->masks = &vmid_masks; - } - - return &hubbub2->base; -} - -static struct hubp *dcn321_hubp_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dcn20_hubp *hubp2 = - kzalloc(sizeof(struct dcn20_hubp), GFP_KERNEL); - - if (!hubp2) - return NULL; - -#undef REG_STRUCT -#define REG_STRUCT hubp_regs - hubp_regs_init(0), - hubp_regs_init(1), - hubp_regs_init(2), - hubp_regs_init(3); - - if (hubp32_construct(hubp2, ctx, inst, - &hubp_regs[inst], &hubp_shift, &hubp_mask)) - return &hubp2->base; - - BREAK_TO_DEBUGGER(); - kfree(hubp2); - return NULL; -} - -static void dcn321_dpp_destroy(struct dpp **dpp) -{ - kfree(TO_DCN30_DPP(*dpp)); - *dpp = NULL; -} - -static struct dpp *dcn321_dpp_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dcn3_dpp *dpp3 = - kzalloc(sizeof(struct dcn3_dpp), GFP_KERNEL); - - if (!dpp3) - return NULL; - -#undef REG_STRUCT -#define REG_STRUCT dpp_regs - dpp_regs_init(0), - dpp_regs_init(1), - dpp_regs_init(2), - dpp_regs_init(3); - - if (dpp32_construct(dpp3, ctx, inst, - &dpp_regs[inst], &tf_shift, &tf_mask)) - return &dpp3->base; - - BREAK_TO_DEBUGGER(); - kfree(dpp3); - return NULL; -} - -static struct mpc *dcn321_mpc_create( - struct dc_context *ctx, - int num_mpcc, - int num_rmu) -{ - struct dcn30_mpc *mpc30 = kzalloc(sizeof(struct dcn30_mpc), - GFP_KERNEL); - - if (!mpc30) - return NULL; - -#undef REG_STRUCT -#define REG_STRUCT mpc_regs - dcn_mpc_regs_init(); - - dcn32_mpc_construct(mpc30, ctx, - &mpc_regs, - &mpc_shift, - &mpc_mask, - num_mpcc, - num_rmu); - - return &mpc30->base; -} - -static struct output_pixel_processor *dcn321_opp_create( - struct dc_context *ctx, uint32_t inst) -{ - struct dcn20_opp *opp2 = - kzalloc(sizeof(struct dcn20_opp), GFP_KERNEL); - - if (!opp2) { - BREAK_TO_DEBUGGER(); - return NULL; - } - -#undef REG_STRUCT -#define REG_STRUCT opp_regs - opp_regs_init(0), - opp_regs_init(1), - opp_regs_init(2), - opp_regs_init(3); - - dcn20_opp_construct(opp2, ctx, inst, - &opp_regs[inst], &opp_shift, &opp_mask); - return &opp2->base; -} - - -static struct timing_generator *dcn321_timing_generator_create( - struct dc_context *ctx, - uint32_t instance) -{ - struct optc *tgn10 = - kzalloc(sizeof(struct optc), GFP_KERNEL); - - if (!tgn10) - return NULL; - -#undef REG_STRUCT -#define REG_STRUCT optc_regs - optc_regs_init(0), - optc_regs_init(1), - optc_regs_init(2), - optc_regs_init(3); - - tgn10->base.inst = instance; - tgn10->base.ctx = ctx; - - tgn10->tg_regs = &optc_regs[instance]; - tgn10->tg_shift = &optc_shift; - tgn10->tg_mask = &optc_mask; - - dcn32_timing_generator_init(tgn10); - - return &tgn10->base; -} - -static const struct encoder_feature_support link_enc_feature = { - .max_hdmi_deep_color = COLOR_DEPTH_121212, - .max_hdmi_pixel_clock = 600000, - .hdmi_ycbcr420_supported = true, - .dp_ycbcr420_supported = true, - .fec_supported = true, - .flags.bits.IS_HBR2_CAPABLE = true, - .flags.bits.IS_HBR3_CAPABLE = true, - .flags.bits.IS_TPS3_CAPABLE = true, - .flags.bits.IS_TPS4_CAPABLE = true -}; - -static struct link_encoder *dcn321_link_encoder_create( - struct dc_context *ctx, - const struct encoder_init_data *enc_init_data) -{ - struct dcn20_link_encoder *enc20 = - kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL); - - if (!enc20) - return NULL; - -#undef REG_STRUCT -#define REG_STRUCT link_enc_aux_regs - aux_regs_init(0), - aux_regs_init(1), - aux_regs_init(2), - aux_regs_init(3), - aux_regs_init(4); - -#undef REG_STRUCT -#define REG_STRUCT link_enc_hpd_regs - hpd_regs_init(0), - hpd_regs_init(1), - hpd_regs_init(2), - hpd_regs_init(3), - hpd_regs_init(4); - -#undef REG_STRUCT -#define REG_STRUCT link_enc_regs - link_regs_init(0, A), - link_regs_init(1, B), - link_regs_init(2, C), - link_regs_init(3, D), - link_regs_init(4, E); - - dcn321_link_encoder_construct(enc20, - enc_init_data, - &link_enc_feature, - &link_enc_regs[enc_init_data->transmitter], - &link_enc_aux_regs[enc_init_data->channel - 1], - &link_enc_hpd_regs[enc_init_data->hpd_source], - &le_shift, - &le_mask); - - return &enc20->enc10.base; -} - -static void read_dce_straps( - struct dc_context *ctx, - struct resource_straps *straps) -{ - generic_reg_get(ctx, ctx->dcn_reg_offsets[regDC_PINSTRAPS_BASE_IDX] + regDC_PINSTRAPS, - FN(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO), &straps->dc_pinstraps_audio); - -} - -static struct audio *dcn321_create_audio( - struct dc_context *ctx, unsigned int inst) -{ - -#undef REG_STRUCT -#define REG_STRUCT audio_regs - audio_regs_init(0), - audio_regs_init(1), - audio_regs_init(2), - audio_regs_init(3), - audio_regs_init(4); - - return dce_audio_create(ctx, inst, - &audio_regs[inst], &audio_shift, &audio_mask); -} - -static struct vpg *dcn321_vpg_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dcn30_vpg *vpg3 = kzalloc(sizeof(struct dcn30_vpg), GFP_KERNEL); - - if (!vpg3) - return NULL; - -#undef REG_STRUCT -#define REG_STRUCT vpg_regs - vpg_regs_init(0), - vpg_regs_init(1), - vpg_regs_init(2), - vpg_regs_init(3), - vpg_regs_init(4), - vpg_regs_init(5), - vpg_regs_init(6), - vpg_regs_init(7), - vpg_regs_init(8), - vpg_regs_init(9); - - vpg3_construct(vpg3, ctx, inst, - &vpg_regs[inst], - &vpg_shift, - &vpg_mask); - - return &vpg3->base; -} - -static struct afmt *dcn321_afmt_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dcn30_afmt *afmt3 = kzalloc(sizeof(struct dcn30_afmt), GFP_KERNEL); - - if (!afmt3) - return NULL; - -#undef REG_STRUCT -#define REG_STRUCT afmt_regs - afmt_regs_init(0), - afmt_regs_init(1), - afmt_regs_init(2), - afmt_regs_init(3), - afmt_regs_init(4), - afmt_regs_init(5); - - afmt3_construct(afmt3, ctx, inst, - &afmt_regs[inst], - &afmt_shift, - &afmt_mask); - - return &afmt3->base; -} - -static struct apg *dcn321_apg_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dcn31_apg *apg31 = kzalloc(sizeof(struct dcn31_apg), GFP_KERNEL); - - if (!apg31) - return NULL; - -#undef REG_STRUCT -#define REG_STRUCT apg_regs - apg_regs_init(0), - apg_regs_init(1), - apg_regs_init(2), - apg_regs_init(3); - - apg31_construct(apg31, ctx, inst, - &apg_regs[inst], - &apg_shift, - &apg_mask); - - return &apg31->base; -} - -static struct stream_encoder *dcn321_stream_encoder_create( - enum engine_id eng_id, - struct dc_context *ctx) -{ - struct dcn10_stream_encoder *enc1; - struct vpg *vpg; - struct afmt *afmt; - int vpg_inst; - int afmt_inst; - - /* Mapping of VPG, AFMT, DME register blocks to DIO block instance */ - if (eng_id <= ENGINE_ID_DIGF) { - vpg_inst = eng_id; - afmt_inst = eng_id; - } else - return NULL; - - enc1 = kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL); - vpg = dcn321_vpg_create(ctx, vpg_inst); - afmt = dcn321_afmt_create(ctx, afmt_inst); - - if (!enc1 || !vpg || !afmt) { - kfree(enc1); - kfree(vpg); - kfree(afmt); - return NULL; - } - -#undef REG_STRUCT -#define REG_STRUCT stream_enc_regs - stream_enc_regs_init(0), - stream_enc_regs_init(1), - stream_enc_regs_init(2), - stream_enc_regs_init(3), - stream_enc_regs_init(4); - - dcn32_dio_stream_encoder_construct(enc1, ctx, ctx->dc_bios, - eng_id, vpg, afmt, - &stream_enc_regs[eng_id], - &se_shift, &se_mask); - - return &enc1->base; -} - -static struct hpo_dp_stream_encoder *dcn321_hpo_dp_stream_encoder_create( - enum engine_id eng_id, - struct dc_context *ctx) -{ - struct dcn31_hpo_dp_stream_encoder *hpo_dp_enc31; - struct vpg *vpg; - struct apg *apg; - uint32_t hpo_dp_inst; - uint32_t vpg_inst; - uint32_t apg_inst; - - ASSERT((eng_id >= ENGINE_ID_HPO_DP_0) && (eng_id <= ENGINE_ID_HPO_DP_3)); - hpo_dp_inst = eng_id - ENGINE_ID_HPO_DP_0; - - /* Mapping of VPG register blocks to HPO DP block instance: - * VPG[6] -> HPO_DP[0] - * VPG[7] -> HPO_DP[1] - * VPG[8] -> HPO_DP[2] - * VPG[9] -> HPO_DP[3] - */ - vpg_inst = hpo_dp_inst + 6; - - /* Mapping of APG register blocks to HPO DP block instance: - * APG[0] -> HPO_DP[0] - * APG[1] -> HPO_DP[1] - * APG[2] -> HPO_DP[2] - * APG[3] -> HPO_DP[3] - */ - apg_inst = hpo_dp_inst; - - /* allocate HPO stream encoder and create VPG sub-block */ - hpo_dp_enc31 = kzalloc(sizeof(struct dcn31_hpo_dp_stream_encoder), GFP_KERNEL); - vpg = dcn321_vpg_create(ctx, vpg_inst); - apg = dcn321_apg_create(ctx, apg_inst); - - if (!hpo_dp_enc31 || !vpg || !apg) { - kfree(hpo_dp_enc31); - kfree(vpg); - kfree(apg); - return NULL; - } - -#undef REG_STRUCT -#define REG_STRUCT hpo_dp_stream_enc_regs - hpo_dp_stream_encoder_reg_init(0), - hpo_dp_stream_encoder_reg_init(1), - hpo_dp_stream_encoder_reg_init(2), - hpo_dp_stream_encoder_reg_init(3); - - dcn31_hpo_dp_stream_encoder_construct(hpo_dp_enc31, ctx, ctx->dc_bios, - hpo_dp_inst, eng_id, vpg, apg, - &hpo_dp_stream_enc_regs[hpo_dp_inst], - &hpo_dp_se_shift, &hpo_dp_se_mask); - - return &hpo_dp_enc31->base; -} - -static struct hpo_dp_link_encoder *dcn321_hpo_dp_link_encoder_create( - uint8_t inst, - struct dc_context *ctx) -{ - struct dcn31_hpo_dp_link_encoder *hpo_dp_enc31; - - /* allocate HPO link encoder */ - hpo_dp_enc31 = kzalloc(sizeof(struct dcn31_hpo_dp_link_encoder), GFP_KERNEL); - -#undef REG_STRUCT -#define REG_STRUCT hpo_dp_link_enc_regs - hpo_dp_link_encoder_reg_init(0), - hpo_dp_link_encoder_reg_init(1); - - hpo_dp_link_encoder32_construct(hpo_dp_enc31, ctx, inst, - &hpo_dp_link_enc_regs[inst], - &hpo_dp_le_shift, &hpo_dp_le_mask); - - return &hpo_dp_enc31->base; -} - -static struct dce_hwseq *dcn321_hwseq_create( - struct dc_context *ctx) -{ - struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL); - -#undef REG_STRUCT -#define REG_STRUCT hwseq_reg - hwseq_reg_init(); - - if (hws) { - hws->ctx = ctx; - hws->regs = &hwseq_reg; - hws->shifts = &hwseq_shift; - hws->masks = &hwseq_mask; - } - return hws; -} -static const struct resource_create_funcs res_create_funcs = { - .read_dce_straps = read_dce_straps, - .create_audio = dcn321_create_audio, - .create_stream_encoder = dcn321_stream_encoder_create, - .create_hpo_dp_stream_encoder = dcn321_hpo_dp_stream_encoder_create, - .create_hpo_dp_link_encoder = dcn321_hpo_dp_link_encoder_create, - .create_hwseq = dcn321_hwseq_create, -}; - -static void dcn321_resource_destruct(struct dcn321_resource_pool *pool) -{ - unsigned int i; - - for (i = 0; i < pool->base.stream_enc_count; i++) { - if (pool->base.stream_enc[i] != NULL) { - if (pool->base.stream_enc[i]->vpg != NULL) { - kfree(DCN30_VPG_FROM_VPG(pool->base.stream_enc[i]->vpg)); - pool->base.stream_enc[i]->vpg = NULL; - } - if (pool->base.stream_enc[i]->afmt != NULL) { - kfree(DCN30_AFMT_FROM_AFMT(pool->base.stream_enc[i]->afmt)); - pool->base.stream_enc[i]->afmt = NULL; - } - kfree(DCN10STRENC_FROM_STRENC(pool->base.stream_enc[i])); - pool->base.stream_enc[i] = NULL; - } - } - - for (i = 0; i < pool->base.hpo_dp_stream_enc_count; i++) { - if (pool->base.hpo_dp_stream_enc[i] != NULL) { - if (pool->base.hpo_dp_stream_enc[i]->vpg != NULL) { - kfree(DCN30_VPG_FROM_VPG(pool->base.hpo_dp_stream_enc[i]->vpg)); - pool->base.hpo_dp_stream_enc[i]->vpg = NULL; - } - if (pool->base.hpo_dp_stream_enc[i]->apg != NULL) { - kfree(DCN31_APG_FROM_APG(pool->base.hpo_dp_stream_enc[i]->apg)); - pool->base.hpo_dp_stream_enc[i]->apg = NULL; - } - kfree(DCN3_1_HPO_DP_STREAM_ENC_FROM_HPO_STREAM_ENC(pool->base.hpo_dp_stream_enc[i])); - pool->base.hpo_dp_stream_enc[i] = NULL; - } - } - - for (i = 0; i < pool->base.hpo_dp_link_enc_count; i++) { - if (pool->base.hpo_dp_link_enc[i] != NULL) { - kfree(DCN3_1_HPO_DP_LINK_ENC_FROM_HPO_LINK_ENC(pool->base.hpo_dp_link_enc[i])); - pool->base.hpo_dp_link_enc[i] = NULL; - } - } - - for (i = 0; i < pool->base.res_cap->num_dsc; i++) { - if (pool->base.dscs[i] != NULL) - dcn20_dsc_destroy(&pool->base.dscs[i]); - } - - if (pool->base.mpc != NULL) { - kfree(TO_DCN20_MPC(pool->base.mpc)); - pool->base.mpc = NULL; - } - if (pool->base.hubbub != NULL) { - kfree(TO_DCN20_HUBBUB(pool->base.hubbub)); - pool->base.hubbub = NULL; - } - for (i = 0; i < pool->base.pipe_count; i++) { - if (pool->base.dpps[i] != NULL) - dcn321_dpp_destroy(&pool->base.dpps[i]); - - if (pool->base.ipps[i] != NULL) - pool->base.ipps[i]->funcs->ipp_destroy(&pool->base.ipps[i]); - - if (pool->base.hubps[i] != NULL) { - kfree(TO_DCN20_HUBP(pool->base.hubps[i])); - pool->base.hubps[i] = NULL; - } - - if (pool->base.irqs != NULL) - dal_irq_service_destroy(&pool->base.irqs); - } - - for (i = 0; i < pool->base.res_cap->num_ddc; i++) { - if (pool->base.engines[i] != NULL) - dce110_engine_destroy(&pool->base.engines[i]); - if (pool->base.hw_i2cs[i] != NULL) { - kfree(pool->base.hw_i2cs[i]); - pool->base.hw_i2cs[i] = NULL; - } - if (pool->base.sw_i2cs[i] != NULL) { - kfree(pool->base.sw_i2cs[i]); - pool->base.sw_i2cs[i] = NULL; - } - } - - for (i = 0; i < pool->base.res_cap->num_opp; i++) { - if (pool->base.opps[i] != NULL) - pool->base.opps[i]->funcs->opp_destroy(&pool->base.opps[i]); - } - - for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { - if (pool->base.timing_generators[i] != NULL) { - kfree(DCN10TG_FROM_TG(pool->base.timing_generators[i])); - pool->base.timing_generators[i] = NULL; - } - } - - for (i = 0; i < pool->base.res_cap->num_dwb; i++) { - if (pool->base.dwbc[i] != NULL) { - kfree(TO_DCN30_DWBC(pool->base.dwbc[i])); - pool->base.dwbc[i] = NULL; - } - if (pool->base.mcif_wb[i] != NULL) { - kfree(TO_DCN30_MMHUBBUB(pool->base.mcif_wb[i])); - pool->base.mcif_wb[i] = NULL; - } - } - - for (i = 0; i < pool->base.audio_count; i++) { - if (pool->base.audios[i]) - dce_aud_destroy(&pool->base.audios[i]); - } - - for (i = 0; i < pool->base.clk_src_count; i++) { - if (pool->base.clock_sources[i] != NULL) { - dcn20_clock_source_destroy(&pool->base.clock_sources[i]); - pool->base.clock_sources[i] = NULL; - } - } - - for (i = 0; i < pool->base.res_cap->num_mpc_3dlut; i++) { - if (pool->base.mpc_lut[i] != NULL) { - dc_3dlut_func_release(pool->base.mpc_lut[i]); - pool->base.mpc_lut[i] = NULL; - } - if (pool->base.mpc_shaper[i] != NULL) { - dc_transfer_func_release(pool->base.mpc_shaper[i]); - pool->base.mpc_shaper[i] = NULL; - } - } - - if (pool->base.dp_clock_source != NULL) { - dcn20_clock_source_destroy(&pool->base.dp_clock_source); - pool->base.dp_clock_source = NULL; - } - - for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { - if (pool->base.multiple_abms[i] != NULL) - dce_abm_destroy(&pool->base.multiple_abms[i]); - } - - if (pool->base.psr != NULL) - dmub_psr_destroy(&pool->base.psr); - - if (pool->base.dccg != NULL) - dcn_dccg_destroy(&pool->base.dccg); - - if (pool->base.oem_device != NULL) { - struct dc *dc = pool->base.oem_device->ctx->dc; - - dc->link_srv->destroy_ddc_service(&pool->base.oem_device); - } -} - - -static bool dcn321_dwbc_create(struct dc_context *ctx, struct resource_pool *pool) -{ - int i; - uint32_t dwb_count = pool->res_cap->num_dwb; - - for (i = 0; i < dwb_count; i++) { - struct dcn30_dwbc *dwbc30 = kzalloc(sizeof(struct dcn30_dwbc), - GFP_KERNEL); - - if (!dwbc30) { - dm_error("DC: failed to create dwbc30!\n"); - return false; - } - -#undef REG_STRUCT -#define REG_STRUCT dwbc30_regs - dwbc_regs_dcn3_init(0); - - dcn30_dwbc_construct(dwbc30, ctx, - &dwbc30_regs[i], - &dwbc30_shift, - &dwbc30_mask, - i); - - pool->dwbc[i] = &dwbc30->base; - } - return true; -} - -static bool dcn321_mmhubbub_create(struct dc_context *ctx, struct resource_pool *pool) -{ - int i; - uint32_t dwb_count = pool->res_cap->num_dwb; - - for (i = 0; i < dwb_count; i++) { - struct dcn30_mmhubbub *mcif_wb30 = kzalloc(sizeof(struct dcn30_mmhubbub), - GFP_KERNEL); - - if (!mcif_wb30) { - dm_error("DC: failed to create mcif_wb30!\n"); - return false; - } - -#undef REG_STRUCT -#define REG_STRUCT mcif_wb30_regs - mcif_wb_regs_dcn3_init(0); - - dcn32_mmhubbub_construct(mcif_wb30, ctx, - &mcif_wb30_regs[i], - &mcif_wb30_shift, - &mcif_wb30_mask, - i); - - pool->mcif_wb[i] = &mcif_wb30->base; - } - return true; -} - -static struct display_stream_compressor *dcn321_dsc_create( - struct dc_context *ctx, uint32_t inst) -{ - struct dcn20_dsc *dsc = - kzalloc(sizeof(struct dcn20_dsc), GFP_KERNEL); - - if (!dsc) { - BREAK_TO_DEBUGGER(); - return NULL; - } - -#undef REG_STRUCT -#define REG_STRUCT dsc_regs - dsc_regsDCN20_init(0), - dsc_regsDCN20_init(1), - dsc_regsDCN20_init(2), - dsc_regsDCN20_init(3); - - dsc2_construct(dsc, ctx, inst, &dsc_regs[inst], &dsc_shift, &dsc_mask); - - dsc->max_image_width = 6016; - - return &dsc->base; -} - -static void dcn321_destroy_resource_pool(struct resource_pool **pool) -{ - struct dcn321_resource_pool *dcn321_pool = TO_DCN321_RES_POOL(*pool); - - dcn321_resource_destruct(dcn321_pool); - kfree(dcn321_pool); - *pool = NULL; -} - -static struct dc_cap_funcs cap_funcs = { - .get_dcc_compression_cap = dcn20_get_dcc_compression_cap, - .get_subvp_en = dcn32_subvp_in_use, -}; - -static void dcn321_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) -{ - DC_FP_START(); - dcn321_update_bw_bounding_box_fpu(dc, bw_params); - DC_FP_END(); -} - -static struct resource_funcs dcn321_res_pool_funcs = { - .destroy = dcn321_destroy_resource_pool, - .link_enc_create = dcn321_link_encoder_create, - .link_enc_create_minimal = NULL, - .panel_cntl_create = dcn32_panel_cntl_create, - .validate_bandwidth = dcn32_validate_bandwidth, - .calculate_wm_and_dlg = dcn32_calculate_wm_and_dlg, - .populate_dml_pipes = dcn32_populate_dml_pipes_from_context, - .acquire_free_pipe_as_secondary_dpp_pipe = dcn32_acquire_free_pipe_as_secondary_dpp_pipe, - .acquire_free_pipe_as_secondary_opp_head = dcn32_acquire_free_pipe_as_secondary_opp_head, - .release_pipe = dcn20_release_pipe, - .add_stream_to_ctx = dcn30_add_stream_to_ctx, - .add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource, - .remove_stream_from_ctx = dcn20_remove_stream_from_ctx, - .populate_dml_writeback_from_context = dcn30_populate_dml_writeback_from_context, - .set_mcif_arb_params = dcn30_set_mcif_arb_params, - .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link, - .acquire_post_bldn_3dlut = dcn32_acquire_post_bldn_3dlut, - .release_post_bldn_3dlut = dcn32_release_post_bldn_3dlut, - .update_bw_bounding_box = dcn321_update_bw_bounding_box, - .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, - .update_soc_for_wm_a = dcn30_update_soc_for_wm_a, - .add_phantom_pipes = dcn32_add_phantom_pipes, - .remove_phantom_pipes = dcn32_remove_phantom_pipes, - .retain_phantom_pipes = dcn32_retain_phantom_pipes, - .save_mall_state = dcn32_save_mall_state, - .restore_mall_state = dcn32_restore_mall_state, -}; - -static uint32_t read_pipe_fuses(struct dc_context *ctx) -{ - uint32_t value = REG_READ(CC_DC_PIPE_DIS); - /* DCN321 support max 4 pipes */ - value = value & 0xf; - return value; -} - - -static bool dcn321_resource_construct( - uint8_t num_virtual_links, - struct dc *dc, - struct dcn321_resource_pool *pool) -{ - int i, j; - struct dc_context *ctx = dc->ctx; - struct irq_service_init_data init_data; - struct ddc_service_init_data ddc_init_data = {0}; - uint32_t pipe_fuses = 0; - uint32_t num_pipes = 4; - -#undef REG_STRUCT -#define REG_STRUCT bios_regs - bios_regs_init(); - -#undef REG_STRUCT -#define REG_STRUCT clk_src_regs - clk_src_regs_init(0, A), - clk_src_regs_init(1, B), - clk_src_regs_init(2, C), - clk_src_regs_init(3, D), - clk_src_regs_init(4, E); - -#undef REG_STRUCT -#define REG_STRUCT abm_regs - abm_regs_init(0), - abm_regs_init(1), - abm_regs_init(2), - abm_regs_init(3); - -#undef REG_STRUCT -#define REG_STRUCT dccg_regs - dccg_regs_init(); - - - ctx->dc_bios->regs = &bios_regs; - - pool->base.res_cap = &res_cap_dcn321; - /* max number of pipes for ASIC before checking for pipe fuses */ - num_pipes = pool->base.res_cap->num_timing_generator; - pipe_fuses = read_pipe_fuses(ctx); - - for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) - if (pipe_fuses & 1 << i) - num_pipes--; - - if (pipe_fuses & 1) - ASSERT(0); //Unexpected - Pipe 0 should always be fully functional! - - if (pipe_fuses & CC_DC_PIPE_DIS__DC_FULL_DIS_MASK) - ASSERT(0); //Entire DCN is harvested! - - /* within dml lib, initial value is hard coded, if ASIC pipe is fused, the - * value will be changed, update max_num_dpp and max_num_otg for dml. - */ - dcn3_21_ip.max_num_dpp = num_pipes; - dcn3_21_ip.max_num_otg = num_pipes; - - pool->base.funcs = &dcn321_res_pool_funcs; - - /************************************************* - * Resource + asic cap harcoding * - *************************************************/ - pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE; - pool->base.timing_generator_count = num_pipes; - pool->base.pipe_count = num_pipes; - pool->base.mpcc_count = num_pipes; - dc->caps.max_downscale_ratio = 600; - dc->caps.i2c_speed_in_khz = 100; - dc->caps.i2c_speed_in_khz_hdcp = 100; /*1.4 w/a applied by default*/ - /* TODO: Bring max cursor size back to 256 after subvp cursor corruption is fixed*/ - dc->caps.max_cursor_size = 64; - dc->caps.min_horizontal_blanking_period = 80; - dc->caps.dmdata_alloc_size = 2048; - dc->caps.mall_size_per_mem_channel = 4; - dc->caps.mall_size_total = 0; - dc->caps.cursor_cache_size = dc->caps.max_cursor_size * dc->caps.max_cursor_size * 8; - dc->caps.cache_line_size = 64; - dc->caps.cache_num_ways = 16; - - /* Calculate the available MALL space */ - dc->caps.max_cab_allocation_bytes = dcn32_calc_num_avail_chans_for_mall( - dc, dc->ctx->dc_bios->vram_info.num_chans) * - dc->caps.mall_size_per_mem_channel * 1024 * 1024; - dc->caps.mall_size_total = dc->caps.max_cab_allocation_bytes; - - dc->caps.subvp_fw_processing_delay_us = 15; - dc->caps.subvp_drr_max_vblank_margin_us = 40; - dc->caps.subvp_prefetch_end_to_mall_start_us = 15; - dc->caps.subvp_swath_height_margin_lines = 16; - dc->caps.subvp_pstate_allow_width_us = 20; - dc->caps.subvp_vertical_int_margin_us = 30; - dc->caps.subvp_drr_vblank_start_margin_us = 100; // 100us margin - dc->caps.max_slave_planes = 2; - dc->caps.max_slave_yuv_planes = 2; - dc->caps.max_slave_rgb_planes = 2; - dc->caps.post_blend_color_processing = true; - dc->caps.force_dp_tps4_for_cp2520 = true; - dc->caps.dp_hpo = true; - dc->caps.dp_hdmi21_pcon_support = true; - dc->caps.edp_dsc_support = true; - dc->caps.extended_aux_timeout_support = true; - dc->caps.dmcub_support = true; - dc->caps.max_v_total = (1 << 15) - 1; - - /* Color pipeline capabilities */ - dc->caps.color.dpp.dcn_arch = 1; - dc->caps.color.dpp.input_lut_shared = 0; - dc->caps.color.dpp.icsc = 1; - dc->caps.color.dpp.dgam_ram = 0; // must use gamma_corr - dc->caps.color.dpp.dgam_rom_caps.srgb = 1; - dc->caps.color.dpp.dgam_rom_caps.bt2020 = 1; - dc->caps.color.dpp.dgam_rom_caps.gamma2_2 = 1; - dc->caps.color.dpp.dgam_rom_caps.pq = 1; - dc->caps.color.dpp.dgam_rom_caps.hlg = 1; - dc->caps.color.dpp.post_csc = 1; - dc->caps.color.dpp.gamma_corr = 1; - dc->caps.color.dpp.dgam_rom_for_yuv = 0; - - dc->caps.color.dpp.hw_3d_lut = 1; - dc->caps.color.dpp.ogam_ram = 1; - // no OGAM ROM on DCN2 and later ASICs - dc->caps.color.dpp.ogam_rom_caps.srgb = 0; - dc->caps.color.dpp.ogam_rom_caps.bt2020 = 0; - dc->caps.color.dpp.ogam_rom_caps.gamma2_2 = 0; - dc->caps.color.dpp.ogam_rom_caps.pq = 0; - dc->caps.color.dpp.ogam_rom_caps.hlg = 0; - dc->caps.color.dpp.ocsc = 0; - - dc->caps.color.mpc.gamut_remap = 1; - dc->caps.color.mpc.num_3dluts = pool->base.res_cap->num_mpc_3dlut; //4, configurable to be before or after BLND in MPCC - dc->caps.color.mpc.ogam_ram = 1; - dc->caps.color.mpc.ogam_rom_caps.srgb = 0; - dc->caps.color.mpc.ogam_rom_caps.bt2020 = 0; - dc->caps.color.mpc.ogam_rom_caps.gamma2_2 = 0; - dc->caps.color.mpc.ogam_rom_caps.pq = 0; - dc->caps.color.mpc.ogam_rom_caps.hlg = 0; - dc->caps.color.mpc.ocsc = 1; - - dc->config.dc_mode_clk_limit_support = true; - /* read VBIOS LTTPR caps */ - { - if (ctx->dc_bios->funcs->get_lttpr_caps) { - enum bp_result bp_query_result; - uint8_t is_vbios_lttpr_enable = 0; - - bp_query_result = ctx->dc_bios->funcs->get_lttpr_caps(ctx->dc_bios, &is_vbios_lttpr_enable); - dc->caps.vbios_lttpr_enable = (bp_query_result == BP_RESULT_OK) && !!is_vbios_lttpr_enable; - } - - /* interop bit is implicit */ - { - dc->caps.vbios_lttpr_aware = true; - } - } - - if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) - dc->debug = debug_defaults_drv; - - // Init the vm_helper - if (dc->vm_helper) - vm_helper_init(dc->vm_helper, 16); - - /************************************************* - * Create resources * - *************************************************/ - - /* Clock Sources for Pixel Clock*/ - pool->base.clock_sources[DCN321_CLK_SRC_PLL0] = - dcn321_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL0, - &clk_src_regs[0], false); - pool->base.clock_sources[DCN321_CLK_SRC_PLL1] = - dcn321_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL1, - &clk_src_regs[1], false); - pool->base.clock_sources[DCN321_CLK_SRC_PLL2] = - dcn321_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL2, - &clk_src_regs[2], false); - pool->base.clock_sources[DCN321_CLK_SRC_PLL3] = - dcn321_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL3, - &clk_src_regs[3], false); - pool->base.clock_sources[DCN321_CLK_SRC_PLL4] = - dcn321_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL4, - &clk_src_regs[4], false); - - pool->base.clk_src_count = DCN321_CLK_SRC_TOTAL; - - /* todo: not reuse phy_pll registers */ - pool->base.dp_clock_source = - dcn321_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_ID_DP_DTO, - &clk_src_regs[0], true); - - for (i = 0; i < pool->base.clk_src_count; i++) { - if (pool->base.clock_sources[i] == NULL) { - dm_error("DC: failed to create clock sources!\n"); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - } - - /* DCCG */ - pool->base.dccg = dccg32_create(ctx, &dccg_regs, &dccg_shift, &dccg_mask); - if (pool->base.dccg == NULL) { - dm_error("DC: failed to create dccg!\n"); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - - /* DML */ - dml_init_instance(&dc->dml, &dcn3_21_soc, &dcn3_21_ip, DML_PROJECT_DCN32); - - /* IRQ Service */ - init_data.ctx = dc->ctx; - pool->base.irqs = dal_irq_service_dcn32_create(&init_data); - if (!pool->base.irqs) - goto create_fail; - - /* HUBBUB */ - pool->base.hubbub = dcn321_hubbub_create(ctx); - if (pool->base.hubbub == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create hubbub!\n"); - goto create_fail; - } - - /* HUBPs, DPPs, OPPs, TGs, ABMs */ - for (i = 0, j = 0; i < pool->base.res_cap->num_timing_generator; i++) { - - /* if pipe is disabled, skip instance of HW pipe, - * i.e, skip ASIC register instance - */ - if (pipe_fuses & 1 << i) - continue; - - pool->base.hubps[j] = dcn321_hubp_create(ctx, i); - if (pool->base.hubps[j] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create hubps!\n"); - goto create_fail; - } - - pool->base.dpps[j] = dcn321_dpp_create(ctx, i); - if (pool->base.dpps[j] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create dpps!\n"); - goto create_fail; - } - - pool->base.opps[j] = dcn321_opp_create(ctx, i); - if (pool->base.opps[j] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create output pixel processor!\n"); - goto create_fail; - } - - pool->base.timing_generators[j] = dcn321_timing_generator_create( - ctx, i); - if (pool->base.timing_generators[j] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create tg!\n"); - goto create_fail; - } - - pool->base.multiple_abms[j] = dmub_abm_create(ctx, - &abm_regs[i], - &abm_shift, - &abm_mask); - if (pool->base.multiple_abms[j] == NULL) { - dm_error("DC: failed to create abm for pipe %d!\n", i); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - - /* index for resource pool arrays for next valid pipe */ - j++; - } - - /* PSR */ - pool->base.psr = dmub_psr_create(ctx); - if (pool->base.psr == NULL) { - dm_error("DC: failed to create psr obj!\n"); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - - /* MPCCs */ - pool->base.mpc = dcn321_mpc_create(ctx, pool->base.res_cap->num_timing_generator, pool->base.res_cap->num_mpc_3dlut); - if (pool->base.mpc == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create mpc!\n"); - goto create_fail; - } - - /* DSCs */ - for (i = 0; i < pool->base.res_cap->num_dsc; i++) { - pool->base.dscs[i] = dcn321_dsc_create(ctx, i); - if (pool->base.dscs[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create display stream compressor %d!\n", i); - goto create_fail; - } - } - - /* DWB */ - if (!dcn321_dwbc_create(ctx, &pool->base)) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create dwbc!\n"); - goto create_fail; - } - - /* MMHUBBUB */ - if (!dcn321_mmhubbub_create(ctx, &pool->base)) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create mcif_wb!\n"); - goto create_fail; - } - - /* AUX and I2C */ - for (i = 0; i < pool->base.res_cap->num_ddc; i++) { - pool->base.engines[i] = dcn321_aux_engine_create(ctx, i); - if (pool->base.engines[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC:failed to create aux engine!!\n"); - goto create_fail; - } - pool->base.hw_i2cs[i] = dcn321_i2c_hw_create(ctx, i); - if (pool->base.hw_i2cs[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC:failed to create hw i2c!!\n"); - goto create_fail; - } - pool->base.sw_i2cs[i] = NULL; - } - - /* Audio, HWSeq, Stream Encoders including HPO and virtual, MPC 3D LUTs */ - if (!resource_construct(num_virtual_links, dc, &pool->base, - &res_create_funcs)) - goto create_fail; - - /* HW Sequencer init functions and Plane caps */ - dcn32_hw_sequencer_init_functions(dc); - - dc->caps.max_planes = pool->base.pipe_count; - - for (i = 0; i < dc->caps.max_planes; ++i) - dc->caps.planes[i] = plane_cap; - - dc->cap_funcs = cap_funcs; - - if (dc->ctx->dc_bios->fw_info.oem_i2c_present) { - ddc_init_data.ctx = dc->ctx; - ddc_init_data.link = NULL; - ddc_init_data.id.id = dc->ctx->dc_bios->fw_info.oem_i2c_obj_id; - ddc_init_data.id.enum_id = 0; - ddc_init_data.id.type = OBJECT_TYPE_GENERIC; - pool->base.oem_device = dc->link_srv->create_ddc_service(&ddc_init_data); - } else { - pool->base.oem_device = NULL; - } - - dc->dml2_options.dcn_pipe_count = pool->base.pipe_count; - dc->dml2_options.use_native_pstate_optimization = false; - dc->dml2_options.use_native_soc_bb_construction = true; - dc->dml2_options.minimize_dispclk_using_odm = true; - - dc->dml2_options.callbacks.dc = dc; - dc->dml2_options.callbacks.build_scaling_params = &resource_build_scaling_params; - dc->dml2_options.callbacks.can_support_mclk_switch_using_fw_based_vblank_stretch = &dcn30_can_support_mclk_switch_using_fw_based_vblank_stretch; - dc->dml2_options.callbacks.acquire_secondary_pipe_for_mpc_odm = &dc_resource_acquire_secondary_pipe_for_mpc_odm_legacy; - dc->dml2_options.callbacks.update_pipes_for_stream_with_slice_count = &resource_update_pipes_for_stream_with_slice_count; - dc->dml2_options.callbacks.update_pipes_for_plane_with_slice_count = &resource_update_pipes_for_plane_with_slice_count; - dc->dml2_options.callbacks.get_mpc_slice_index = &resource_get_mpc_slice_index; - dc->dml2_options.callbacks.get_odm_slice_index = &resource_get_odm_slice_index; - dc->dml2_options.callbacks.get_opp_head = &resource_get_opp_head; - - dc->dml2_options.svp_pstate.callbacks.dc = dc; - dc->dml2_options.svp_pstate.callbacks.add_plane_to_context = &dc_add_plane_to_context; - dc->dml2_options.svp_pstate.callbacks.add_stream_to_ctx = &dc_add_stream_to_ctx; - dc->dml2_options.svp_pstate.callbacks.build_scaling_params = &resource_build_scaling_params; - dc->dml2_options.svp_pstate.callbacks.create_plane = &dc_create_plane_state; - dc->dml2_options.svp_pstate.callbacks.remove_plane_from_context = &dc_remove_plane_from_context; - dc->dml2_options.svp_pstate.callbacks.remove_stream_from_ctx = &dc_remove_stream_from_ctx; - dc->dml2_options.svp_pstate.callbacks.create_stream_for_sink = &dc_create_stream_for_sink; - dc->dml2_options.svp_pstate.callbacks.plane_state_release = &dc_plane_state_release; - dc->dml2_options.svp_pstate.callbacks.stream_release = &dc_stream_release; - dc->dml2_options.svp_pstate.callbacks.release_dsc = &dcn20_release_dsc; - - dc->dml2_options.svp_pstate.subvp_fw_processing_delay_us = dc->caps.subvp_fw_processing_delay_us; - dc->dml2_options.svp_pstate.subvp_prefetch_end_to_mall_start_us = dc->caps.subvp_prefetch_end_to_mall_start_us; - dc->dml2_options.svp_pstate.subvp_pstate_allow_width_us = dc->caps.subvp_pstate_allow_width_us; - dc->dml2_options.svp_pstate.subvp_swath_height_margin_lines = dc->caps.subvp_swath_height_margin_lines; - - dc->dml2_options.svp_pstate.force_disable_subvp = dc->debug.force_disable_subvp; - dc->dml2_options.svp_pstate.force_enable_subvp = dc->debug.force_subvp_mclk_switch; - - dc->dml2_options.mall_cfg.cache_line_size_bytes = dc->caps.cache_line_size; - dc->dml2_options.mall_cfg.cache_num_ways = dc->caps.cache_num_ways; - dc->dml2_options.mall_cfg.max_cab_allocation_bytes = dc->caps.max_cab_allocation_bytes; - dc->dml2_options.mall_cfg.mblk_height_4bpe_pixels = DCN3_2_MBLK_HEIGHT_4BPE; - dc->dml2_options.mall_cfg.mblk_height_8bpe_pixels = DCN3_2_MBLK_HEIGHT_8BPE; - dc->dml2_options.mall_cfg.mblk_size_bytes = DCN3_2_MALL_MBLK_SIZE_BYTES; - dc->dml2_options.mall_cfg.mblk_width_pixels = DCN3_2_MBLK_WIDTH; - - dc->dml2_options.max_segments_per_hubp = 18; - dc->dml2_options.det_segment_size = DCN3_2_DET_SEG_SIZE; - - return true; - -create_fail: - - dcn321_resource_destruct(pool); - - return false; -} - -struct resource_pool *dcn321_create_resource_pool( - const struct dc_init_data *init_data, - struct dc *dc) -{ - struct dcn321_resource_pool *pool = - kzalloc(sizeof(struct dcn321_resource_pool), GFP_KERNEL); - - if (!pool) - return NULL; - - if (dcn321_resource_construct(init_data->num_virtual_links, dc, pool)) - return &pool->base; - - BREAK_TO_DEBUGGER(); - kfree(pool); - return NULL; -} diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.h b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.h deleted file mode 100644 index 82cbf009f2d3..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright 2020 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef _DCN321_RESOURCE_H_ -#define _DCN321_RESOURCE_H_ - -#include "core_types.h" - -#define TO_DCN321_RES_POOL(pool)\ - container_of(pool, struct dcn321_resource_pool, base) - -extern struct _vcs_dpi_ip_params_st dcn3_21_ip; -extern struct _vcs_dpi_soc_bounding_box_st dcn3_21_soc; - -struct dcn321_resource_pool { - struct resource_pool base; -}; - -struct resource_pool *dcn321_create_resource_pool( - const struct dc_init_data *init_data, - struct dc *dc); - -#endif /* _DCN321_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/Makefile b/drivers/gpu/drm/amd/display/dc/dcn35/Makefile index 20d0eef1a13b..719afb5a3b12 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn35/Makefile @@ -10,7 +10,7 @@ # # Makefile for DCN35. -DCN35 = dcn35_resource.o dcn35_init.o dcn35_dio_stream_encoder.o \ +DCN35 = dcn35_init.o dcn35_dio_stream_encoder.o \ dcn35_dio_link_encoder.o dcn35_dccg.o dcn35_optc.o \ dcn35_dsc.o dcn35_hubp.o dcn35_hubbub.o \ dcn35_mmhubbub.o dcn35_opp.o dcn35_dpp.o dcn35_pg_cntl.o dcn35_dwb.o diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c deleted file mode 100644 index 7a3faf2b1f06..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c +++ /dev/null @@ -1,2141 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright 2023 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dm_services.h" -#include "dc.h" - -#include "dcn31/dcn31_init.h" -#include "dcn35/dcn35_init.h" - -#include "resource.h" -#include "include/irq_service_interface.h" -#include "dcn35_resource.h" -#include "dml2/dml2_wrapper.h" - -#include "dcn20/dcn20_resource.h" -#include "dcn30/dcn30_resource.h" -#include "dcn31/dcn31_resource.h" -#include "dcn32/dcn32_resource.h" - -#include "dcn10/dcn10_ipp.h" -#include "dcn30/dcn30_hubbub.h" -#include "dcn31/dcn31_hubbub.h" -#include "dcn35/dcn35_hubbub.h" -#include "dcn32/dcn32_mpc.h" -#include "dcn35/dcn35_hubp.h" -#include "irq/dcn35/irq_service_dcn35.h" -#include "dcn35/dcn35_dpp.h" -#include "dcn35/dcn35_optc.h" -#include "dcn20/dcn20_hwseq.h" -#include "dcn30/dcn30_hwseq.h" -#include "dce110/dce110_hwseq.h" -#include "dcn35/dcn35_opp.h" -#include "dcn35/dcn35_dsc.h" -#include "dcn30/dcn30_vpg.h" -#include "dcn30/dcn30_afmt.h" -#include "dcn31/dcn31_dio_link_encoder.h" -#include "dcn35/dcn35_dio_stream_encoder.h" -#include "dcn31/dcn31_hpo_dp_stream_encoder.h" -#include "dcn31/dcn31_hpo_dp_link_encoder.h" -#include "dcn32/dcn32_hpo_dp_link_encoder.h" -#include "link.h" -#include "dcn31/dcn31_apg.h" -#include "dcn32/dcn32_dio_link_encoder.h" -#include "dcn31/dcn31_vpg.h" -#include "dcn31/dcn31_afmt.h" -#include "dce/dce_clock_source.h" -#include "dce/dce_audio.h" -#include "dce/dce_hwseq.h" -#include "clk_mgr.h" -#include "virtual/virtual_stream_encoder.h" -#include "dce110/dce110_resource.h" -#include "dml/display_mode_vba.h" -#include "dcn35/dcn35_dccg.h" -#include "dcn35/dcn35_pg_cntl.h" -#include "dcn10/dcn10_resource.h" -#include "dcn31/dcn31_panel_cntl.h" -#include "dcn35/dcn35_hwseq.h" -#include "dcn35_dio_link_encoder.h" -#include "dml/dcn31/dcn31_fpu.h" /*todo*/ -#include "dml/dcn35/dcn35_fpu.h" -#include "dcn35/dcn35_dwb.h" -#include "dcn35/dcn35_mmhubbub.h" - -#include "dcn/dcn_3_5_0_offset.h" -#include "dcn/dcn_3_5_0_sh_mask.h" -#include "nbio/nbio_7_11_0_offset.h" -#include "mmhub/mmhub_3_3_0_offset.h" -#include "mmhub/mmhub_3_3_0_sh_mask.h" - -#define DSCC0_DSCC_CONFIG0__ICH_RESET_AT_END_OF_LINE__SHIFT 0x0 -#define DSCC0_DSCC_CONFIG0__ICH_RESET_AT_END_OF_LINE_MASK 0x0000000FL - -#include "reg_helper.h" -#include "dce/dmub_abm.h" -#include "dce/dmub_psr.h" -#include "dce/dce_aux.h" -#include "dce/dce_i2c.h" -#include "dml/dcn31/display_mode_vba_31.h" /*temp*/ -#include "vm_helper.h" -#include "dcn20/dcn20_vmid.h" - -#include "link_enc_cfg.h" -#define DC_LOGGER_INIT(logger) - -enum dcn35_clk_src_array_id { - DCN35_CLK_SRC_PLL0, - DCN35_CLK_SRC_PLL1, - DCN35_CLK_SRC_PLL2, - DCN35_CLK_SRC_PLL3, - DCN35_CLK_SRC_PLL4, - DCN35_CLK_SRC_TOTAL -}; - -/* begin ********************* - * macros to expend register list macro defined in HW object header file - */ - -/* DCN */ -/* TODO awful hack. fixup dcn20_dwb.h */ -#undef BASE_INNER -#define BASE_INNER(seg) ctx->dcn_reg_offsets[seg] - -#define BASE(seg) BASE_INNER(seg) - -#define SR(reg_name)\ - REG_STRUCT.reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ - reg ## reg_name - -#define SR_ARR(reg_name, id) \ - REG_STRUCT[id].reg_name = BASE(reg##reg_name##_BASE_IDX) + reg##reg_name - -#define SR_ARR_INIT(reg_name, id, value) \ - REG_STRUCT[id].reg_name = value - -#define SRI(reg_name, block, id)\ - REG_STRUCT.reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## reg_name - -#define SRI_ARR(reg_name, block, id)\ - REG_STRUCT[id].reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## reg_name - -#define SR_ARR_I2C(reg_name, id) \ - REG_STRUCT[id-1].reg_name = BASE(reg##reg_name##_BASE_IDX) + reg##reg_name - -#define SRI_ARR_I2C(reg_name, block, id)\ - REG_STRUCT[id-1].reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## reg_name - -#define SRI_ARR_ALPHABET(reg_name, block, index, id)\ - REG_STRUCT[index].reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## reg_name - -#define SRI2(reg_name, block, id)\ - .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ - reg ## reg_name - -#define SRI2_ARR(reg_name, block, id)\ - REG_STRUCT[id].reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ - reg ## reg_name - -#define SRIR(var_name, reg_name, block, id)\ - .var_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## reg_name - -#define SRII(reg_name, block, id)\ - REG_STRUCT.reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## reg_name - -#define SRII_ARR_2(reg_name, block, id, inst)\ - REG_STRUCT[inst].reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## reg_name - -#define SRII_MPC_RMU(reg_name, block, id)\ - .RMU##_##reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## reg_name - -#define SRII_DWB(reg_name, temp_name, block, id)\ - REG_STRUCT.reg_name[id] = BASE(reg ## block ## id ## _ ## temp_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## temp_name - -#define SF_DWB2(reg_name, block, id, field_name, post_fix) \ - .field_name = reg_name ## __ ## field_name ## post_fix - -#define DCCG_SRII(reg_name, block, id)\ - REG_STRUCT.block ## _ ## reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ - reg ## block ## id ## _ ## reg_name - -#define VUPDATE_SRII(reg_name, block, id)\ - REG_STRUCT.reg_name[id] = BASE(reg ## reg_name ## _ ## block ## id ## _BASE_IDX) + \ - reg ## reg_name ## _ ## block ## id - -/* NBIO */ -#define NBIO_BASE_INNER(seg) ctx->nbio_reg_offsets[seg] - -#define NBIO_BASE(seg) \ - NBIO_BASE_INNER(seg) - -#define NBIO_SR(reg_name)\ - REG_STRUCT.reg_name = NBIO_BASE(regBIF_BX2_ ## reg_name ## _BASE_IDX) + \ - regBIF_BX2_ ## reg_name - -#define NBIO_SR_ARR(reg_name, id)\ - REG_STRUCT[id].reg_name = NBIO_BASE(regBIF_BX2_ ## reg_name ## _BASE_IDX) + \ - regBIF_BX2_ ## reg_name - -#define bios_regs_init() \ - ( \ - NBIO_SR(BIOS_SCRATCH_3),\ - NBIO_SR(BIOS_SCRATCH_6)\ - ) - -static struct bios_registers bios_regs; - -#define clk_src_regs_init(index, pllid)\ - CS_COMMON_REG_LIST_DCN3_0_RI(index, pllid) - -static struct dce110_clk_src_regs clk_src_regs[5]; - -static const struct dce110_clk_src_shift cs_shift = { - CS_COMMON_MASK_SH_LIST_DCN3_1_4(__SHIFT) -}; - -static const struct dce110_clk_src_mask cs_mask = { - CS_COMMON_MASK_SH_LIST_DCN3_1_4(_MASK) -}; - -#define abm_regs_init(id)\ - ABM_DCN32_REG_LIST_RI(id) - -static struct dce_abm_registers abm_regs[4]; - -static const struct dce_abm_shift abm_shift = { - ABM_MASK_SH_LIST_DCN35(__SHIFT) -}; - -static const struct dce_abm_mask abm_mask = { - ABM_MASK_SH_LIST_DCN35(_MASK) -}; - -#define audio_regs_init(id)\ - AUD_COMMON_REG_LIST_RI(id) - -static struct dce_audio_registers audio_regs[7]; - - -#define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\ - SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_INDEX, AZALIA_ENDPOINT_REG_INDEX, mask_sh),\ - SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_DATA, AZALIA_ENDPOINT_REG_DATA, mask_sh),\ - AUD_COMMON_MASK_SH_LIST_BASE(mask_sh) - -static const struct dce_audio_shift audio_shift = { - DCE120_AUD_COMMON_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_audio_mask audio_mask = { - DCE120_AUD_COMMON_MASK_SH_LIST(_MASK) -}; - -#define vpg_regs_init(id)\ - VPG_DCN31_REG_LIST_RI(id) - -static struct dcn31_vpg_registers vpg_regs[10]; - -static const struct dcn31_vpg_shift vpg_shift = { - DCN31_VPG_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn31_vpg_mask vpg_mask = { - DCN31_VPG_MASK_SH_LIST(_MASK) -}; - -#define afmt_regs_init(id)\ - AFMT_DCN31_REG_LIST_RI(id) - -static struct dcn31_afmt_registers afmt_regs[6]; - -static const struct dcn31_afmt_shift afmt_shift = { - DCN31_AFMT_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn31_afmt_mask afmt_mask = { - DCN31_AFMT_MASK_SH_LIST(_MASK) -}; - -#define apg_regs_init(id)\ - APG_DCN31_REG_LIST_RI(id) - -static struct dcn31_apg_registers apg_regs[4]; - -static const struct dcn31_apg_shift apg_shift = { - DCN31_APG_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn31_apg_mask apg_mask = { - DCN31_APG_MASK_SH_LIST(_MASK) -}; - -#define stream_enc_regs_init(id)\ - SE_DCN35_REG_LIST_RI(id) - -static struct dcn10_stream_enc_registers stream_enc_regs[5]; - -static const struct dcn10_stream_encoder_shift se_shift = { - SE_COMMON_MASK_SH_LIST_DCN35(__SHIFT) -}; - -static const struct dcn10_stream_encoder_mask se_mask = { - SE_COMMON_MASK_SH_LIST_DCN35(_MASK) -}; - -#define aux_regs_init(id)\ - DCN2_AUX_REG_LIST_RI(id) - -static struct dcn10_link_enc_aux_registers link_enc_aux_regs[5]; - -#define hpd_regs_init(id)\ - HPD_REG_LIST_RI(id) - -static struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[5]; - - -static const struct dce110_aux_registers_shift aux_shift = { - DCN_AUX_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce110_aux_registers_mask aux_mask = { - DCN_AUX_MASK_SH_LIST(_MASK) -}; - -#define link_regs_init(id, phyid)\ - ( \ - LE_DCN35_REG_LIST_RI(id), \ - UNIPHY_DCN2_REG_LIST_RI(id, phyid)\ - ) - -static struct dcn10_link_enc_registers link_enc_regs[5]; - -static const struct dcn10_link_enc_shift le_shift = { - LINK_ENCODER_MASK_SH_LIST_DCN35(__SHIFT), \ - //DPCS_DCN31_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn10_link_enc_mask le_mask = { - LINK_ENCODER_MASK_SH_LIST_DCN35(_MASK), \ - //DPCS_DCN31_MASK_SH_LIST(_MASK) -}; - -#define hpo_dp_stream_encoder_reg_init(id)\ - DCN3_1_HPO_DP_STREAM_ENC_REG_LIST_RI(id) - -static struct dcn31_hpo_dp_stream_encoder_registers hpo_dp_stream_enc_regs[4]; - -static const struct dcn31_hpo_dp_stream_encoder_shift hpo_dp_se_shift = { - DCN3_1_HPO_DP_STREAM_ENC_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn31_hpo_dp_stream_encoder_mask hpo_dp_se_mask = { - DCN3_1_HPO_DP_STREAM_ENC_MASK_SH_LIST(_MASK) -}; - -#define hpo_dp_link_encoder_reg_init(id)\ - DCN3_1_HPO_DP_LINK_ENC_REG_LIST_RI(id) - /*DCN3_1_RDPCSTX_REG_LIST(0),*/ - /*DCN3_1_RDPCSTX_REG_LIST(1),*/ - /*DCN3_1_RDPCSTX_REG_LIST(2),*/ - /*DCN3_1_RDPCSTX_REG_LIST(3),*/ - -static struct dcn31_hpo_dp_link_encoder_registers hpo_dp_link_enc_regs[2]; - -static const struct dcn31_hpo_dp_link_encoder_shift hpo_dp_le_shift = { - DCN3_1_HPO_DP_LINK_ENC_COMMON_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn31_hpo_dp_link_encoder_mask hpo_dp_le_mask = { - DCN3_1_HPO_DP_LINK_ENC_COMMON_MASK_SH_LIST(_MASK) -}; - -#define dpp_regs_init(id)\ - DPP_REG_LIST_DCN35_RI(id) - -static struct dcn3_dpp_registers dpp_regs[4]; - -static const struct dcn35_dpp_shift tf_shift = { - DPP_REG_LIST_SH_MASK_DCN35(__SHIFT) -}; - -static const struct dcn35_dpp_mask tf_mask = { - DPP_REG_LIST_SH_MASK_DCN35(_MASK) -}; - -#define opp_regs_init(id)\ - OPP_REG_LIST_DCN35_RI(id) - -static struct dcn35_opp_registers opp_regs[4]; - -static const struct dcn35_opp_shift opp_shift = { - OPP_MASK_SH_LIST_DCN35(__SHIFT) -}; - -static const struct dcn35_opp_mask opp_mask = { - OPP_MASK_SH_LIST_DCN35(_MASK) -}; - -#define aux_engine_regs_init(id)\ - ( \ - AUX_COMMON_REG_LIST0_RI(id), \ - SR_ARR_INIT(AUXN_IMPCAL, id, 0), \ - SR_ARR_INIT(AUXP_IMPCAL, id, 0), \ - SR_ARR_INIT(AUX_RESET_MASK, id, DP_AUX0_AUX_CONTROL__AUX_RESET_MASK) \ - ) - -static struct dce110_aux_registers aux_engine_regs[5]; - -#define dwbc_regs_dcn3_init(id)\ - DWBC_COMMON_REG_LIST_DCN30_RI(id) - -static struct dcn30_dwbc_registers dwbc35_regs[1]; - -static const struct dcn35_dwbc_shift dwbc35_shift = { - DWBC_COMMON_MASK_SH_LIST_DCN35(__SHIFT) -}; - -static const struct dcn35_dwbc_mask dwbc35_mask = { - DWBC_COMMON_MASK_SH_LIST_DCN35(_MASK) -}; - -#define mcif_wb_regs_dcn3_init(id)\ - MCIF_WB_COMMON_REG_LIST_DCN3_5_RI(id) - -static struct dcn35_mmhubbub_registers mcif_wb35_regs[1]; - -static const struct dcn35_mmhubbub_shift mcif_wb35_shift = { - MCIF_WB_COMMON_MASK_SH_LIST_DCN3_5(__SHIFT) -}; - -static const struct dcn35_mmhubbub_mask mcif_wb35_mask = { - MCIF_WB_COMMON_MASK_SH_LIST_DCN3_5(_MASK) -}; - -#define dsc_regsDCN35_init(id)\ - DSC_REG_LIST_DCN20_RI(id) - -static struct dcn20_dsc_registers dsc_regs[4]; - -static const struct dcn35_dsc_shift dsc_shift = { - DSC_REG_LIST_SH_MASK_DCN35(__SHIFT) -}; - -static const struct dcn35_dsc_mask dsc_mask = { - DSC_REG_LIST_SH_MASK_DCN35(_MASK) -}; - -static struct dcn30_mpc_registers mpc_regs; - -#define dcn_mpc_regs_init() \ - MPC_REG_LIST_DCN3_2_RI(0),\ - MPC_REG_LIST_DCN3_2_RI(1),\ - MPC_REG_LIST_DCN3_2_RI(2),\ - MPC_REG_LIST_DCN3_2_RI(3),\ - MPC_OUT_MUX_REG_LIST_DCN3_0_RI(0),\ - MPC_OUT_MUX_REG_LIST_DCN3_0_RI(1),\ - MPC_OUT_MUX_REG_LIST_DCN3_0_RI(2),\ - MPC_OUT_MUX_REG_LIST_DCN3_0_RI(3),\ - MPC_DWB_MUX_REG_LIST_DCN3_0_RI(0) - -static const struct dcn30_mpc_shift mpc_shift = { - MPC_COMMON_MASK_SH_LIST_DCN32(__SHIFT) -}; - -static const struct dcn30_mpc_mask mpc_mask = { - MPC_COMMON_MASK_SH_LIST_DCN32(_MASK) -}; - -#define optc_regs_init(id)\ - OPTC_COMMON_REG_LIST_DCN3_5_RI(id) - -static struct dcn_optc_registers optc_regs[4]; - -static const struct dcn_optc_shift optc_shift = { - OPTC_COMMON_MASK_SH_LIST_DCN3_5(__SHIFT) -}; - -static const struct dcn_optc_mask optc_mask = { - OPTC_COMMON_MASK_SH_LIST_DCN3_5(_MASK) -}; - -#define hubp_regs_init(id)\ - HUBP_REG_LIST_DCN30_RI(id) - -static struct dcn_hubp2_registers hubp_regs[4]; - - -static const struct dcn35_hubp2_shift hubp_shift = { - HUBP_MASK_SH_LIST_DCN35(__SHIFT) -}; - -static const struct dcn35_hubp2_mask hubp_mask = { - HUBP_MASK_SH_LIST_DCN35(_MASK) -}; - -static struct dcn_hubbub_registers hubbub_reg; - -#define hubbub_reg_init()\ - HUBBUB_REG_LIST_DCN35(0) - -static const struct dcn_hubbub_shift hubbub_shift = { - HUBBUB_MASK_SH_LIST_DCN35(__SHIFT) -}; - -static const struct dcn_hubbub_mask hubbub_mask = { - HUBBUB_MASK_SH_LIST_DCN35(_MASK) -}; - -static struct dccg_registers dccg_regs; - -#define dccg_regs_init()\ - DCCG_REG_LIST_DCN35() - -static const struct dccg_shift dccg_shift = { - DCCG_MASK_SH_LIST_DCN35(__SHIFT) -}; - -static const struct dccg_mask dccg_mask = { - DCCG_MASK_SH_LIST_DCN35(_MASK) -}; - -static struct pg_cntl_registers pg_cntl_regs; - -#define pg_cntl_dcn35_regs_init() \ - PG_CNTL_REG_LIST_DCN35() - -static const struct pg_cntl_shift pg_cntl_shift = { - PG_CNTL_MASK_SH_LIST_DCN35(__SHIFT) -}; - -static const struct pg_cntl_mask pg_cntl_mask = { - PG_CNTL_MASK_SH_LIST_DCN35(_MASK) -}; - -#define SRII2(reg_name_pre, reg_name_post, id)\ - .reg_name_pre ## _ ## reg_name_post[id] = BASE(reg ## reg_name_pre \ - ## id ## _ ## reg_name_post ## _BASE_IDX) + \ - reg ## reg_name_pre ## id ## _ ## reg_name_post - -static struct dce_hwseq_registers hwseq_reg; - -#define hwseq_reg_init()\ - HWSEQ_DCN35_REG_LIST() - -#define HWSEQ_DCN35_MASK_SH_LIST(mask_sh)\ - HWSEQ_DCN_MASK_SH_LIST(mask_sh), \ - HWS_SF(, DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_REFDIV, mask_sh), \ - HWS_SF(, DCHUBBUB_ARB_HOSTVM_CNTL, DISABLE_HOSTVM_FORCE_ALLOW_PSTATE, mask_sh), \ - HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN1_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN1_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN2_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN2_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN3_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN3_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN16_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN16_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN17_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN17_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN18_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN18_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN19_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN19_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN22_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN22_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN23_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN23_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN24_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN24_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN25_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ - HWS_SF(, DOMAIN25_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ - HWS_SF(, DOMAIN0_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN1_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN2_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN3_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN16_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN17_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN18_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN19_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN22_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN23_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN24_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DOMAIN25_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ - HWS_SF(, DC_IP_REQUEST_CNTL, IP_REQUEST_EN, mask_sh), \ - HWS_SF(, AZALIA_AUDIO_DTO, AZALIA_AUDIO_DTO_MODULE, mask_sh), \ - HWS_SF(, HPO_TOP_CLOCK_CONTROL, HPO_HDMISTREAMCLK_G_GATE_DIS, mask_sh), \ - HWS_SF(, ODM_MEM_PWR_CTRL3, ODM_MEM_UNASSIGNED_PWR_MODE, mask_sh), \ - HWS_SF(, ODM_MEM_PWR_CTRL3, ODM_MEM_VBLANK_PWR_MODE, mask_sh), \ - HWS_SF(, DIO_MEM_PWR_CTRL, I2C_LIGHT_SLEEP_FORCE, mask_sh), \ - HWS_SF(, HPO_TOP_HW_CONTROL, HPO_IO_EN, mask_sh),\ - HWS_SF(, DMU_CLK_CNTL, DISPCLK_R_DMU_GATE_DIS, mask_sh),\ - HWS_SF(, DMU_CLK_CNTL, DISPCLK_G_RBBMIF_GATE_DIS, mask_sh),\ - HWS_SF(, DMU_CLK_CNTL, RBBMIF_FGCG_REP_DIS, mask_sh),\ - HWS_SF(, DMU_CLK_CNTL, DPREFCLK_ALLOW_DS_CLKSTOP, mask_sh),\ - HWS_SF(, DMU_CLK_CNTL, DISPCLK_ALLOW_DS_CLKSTOP, mask_sh),\ - HWS_SF(, DMU_CLK_CNTL, DPPCLK_ALLOW_DS_CLKSTOP, mask_sh),\ - HWS_SF(, DMU_CLK_CNTL, DTBCLK_ALLOW_DS_CLKSTOP, mask_sh),\ - HWS_SF(, DMU_CLK_CNTL, DCFCLK_ALLOW_DS_CLKSTOP, mask_sh),\ - HWS_SF(, DMU_CLK_CNTL, DPIACLK_ALLOW_DS_CLKSTOP, mask_sh),\ - HWS_SF(, DMU_CLK_CNTL, LONO_FGCG_REP_DIS, mask_sh),\ - HWS_SF(, DMU_CLK_CNTL, LONO_DISPCLK_GATE_DISABLE, mask_sh),\ - HWS_SF(, DMU_CLK_CNTL, LONO_SOCCLK_GATE_DISABLE, mask_sh),\ - HWS_SF(, DMU_CLK_CNTL, LONO_DMCUBCLK_GATE_DISABLE, mask_sh),\ - HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKA_FE_GATE_DISABLE, mask_sh), \ - HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKB_FE_GATE_DISABLE, mask_sh), \ - HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKC_FE_GATE_DISABLE, mask_sh), \ - HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKD_FE_GATE_DISABLE, mask_sh), \ - HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKE_FE_GATE_DISABLE, mask_sh), \ - HWS_SF(, DCCG_GATE_DISABLE_CNTL2, HDMICHARCLK0_GATE_DISABLE, mask_sh), \ - HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKA_GATE_DISABLE, mask_sh), \ - HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKB_GATE_DISABLE, mask_sh), \ - HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKC_GATE_DISABLE, mask_sh), \ - HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKD_GATE_DISABLE, mask_sh), \ - HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKE_GATE_DISABLE, mask_sh), \ - HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYASYMCLK_ROOT_GATE_DISABLE, mask_sh), \ - HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYBSYMCLK_ROOT_GATE_DISABLE, mask_sh), \ - HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYCSYMCLK_ROOT_GATE_DISABLE, mask_sh), \ - HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYDSYMCLK_ROOT_GATE_DISABLE, mask_sh), \ - HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYESYMCLK_ROOT_GATE_DISABLE, mask_sh) - -static const struct dce_hwseq_shift hwseq_shift = { - HWSEQ_DCN35_MASK_SH_LIST(__SHIFT) -}; - -static const struct dce_hwseq_mask hwseq_mask = { - HWSEQ_DCN35_MASK_SH_LIST(_MASK) -}; - -#define vmid_regs_init(id)\ - DCN20_VMID_REG_LIST_RI(id) - -static struct dcn_vmid_registers vmid_regs[16]; - -static const struct dcn20_vmid_shift vmid_shifts = { - DCN20_VMID_MASK_SH_LIST(__SHIFT) -}; - -static const struct dcn20_vmid_mask vmid_masks = { - DCN20_VMID_MASK_SH_LIST(_MASK) -}; - -static const struct resource_caps res_cap_dcn35 = { - .num_timing_generator = 4, - .num_opp = 4, - .num_video_plane = 4, - .num_audio = 5, - .num_stream_encoder = 5, - .num_dig_link_enc = 5, - .num_hpo_dp_stream_encoder = 4, - .num_hpo_dp_link_encoder = 2, - .num_pll = 4,/*1 c10 edp, 3xc20 combo PHY*/ - .num_dwb = 1, - .num_ddc = 5, - .num_vmid = 16, - .num_mpc_3dlut = 2, - .num_dsc = 4, -}; - -static const struct dc_plane_cap plane_cap = { - .type = DC_PLANE_TYPE_DCN_UNIVERSAL, - .per_pixel_alpha = true, - - .pixel_format_support = { - .argb8888 = true, - .nv12 = true, - .fp16 = true, - .p010 = true, - .ayuv = false, - }, - - .max_upscale_factor = { - .argb8888 = 16000, - .nv12 = 16000, - .fp16 = 16000 - }, - - // 6:1 downscaling ratio: 1000/6 = 166.666 - .max_downscale_factor = { - .argb8888 = 167, - .nv12 = 167, - .fp16 = 167 - }, - 64, - 64 -}; - -static const struct dc_debug_options debug_defaults_drv = { - .disable_dmcu = true, - .force_abm_enable = false, - .timing_trace = false, - .clock_trace = true, - .disable_pplib_clock_request = false, - .pipe_split_policy = MPC_SPLIT_AVOID, - .force_single_disp_pipe_split = false, - .disable_dcc = DCC_ENABLE, - .disable_dpp_power_gate = true, - .disable_hubp_power_gate = true, - .disable_clock_gate = true, - .disable_dsc_power_gate = true, - .vsr_support = true, - .performance_trace = false, - .max_downscale_src_width = 4096,/*upto true 4k*/ - .disable_pplib_wm_range = false, - .scl_reset_length10 = true, - .sanity_checks = false, - .underflow_assert_delay_us = 0xFFFFFFFF, - .dwb_fi_phase = -1, // -1 = disable, - .dmub_command_table = true, - .pstate_enabled = true, - .use_max_lb = true, - .enable_mem_low_power = { - .bits = { - .vga = false, - .i2c = true, - .dmcu = false, // This is previously known to cause hang on S3 cycles if enabled - .dscl = true, - .cm = true, - .mpc = true, - .optc = true, - .vpg = true, - .afmt = true, - } - }, - .root_clock_optimization = { - .bits = { - .dpp = true, - .dsc = true,/*dscclk and dsc pg*/ - .hdmistream = true, - .hdmichar = true, - .dpstream = true, - .symclk32_se = true, - .symclk32_le = true, - .symclk_fe = true, - .physymclk = true, - .dpiasymclk = true, - } - }, - .seamless_boot_odm_combine = DML_FAIL_SOURCE_PIXEL_FORMAT, - .enable_z9_disable_interface = true, /* Allow support for the PMFW interface for disable Z9*/ - .using_dml2 = true, - .support_eDP1_5 = true, - .enable_hpo_pg_support = false, - .enable_legacy_fast_update = true, - .enable_single_display_2to1_odm_policy = false, - .disable_idle_power_optimizations = true, - .dmcub_emulation = false, - .disable_boot_optimizations = false, - .disable_unbounded_requesting = false, - .disable_mem_low_power = false, - //must match enable_single_display_2to1_odm_policy to support dynamic ODM transitions - .enable_double_buffered_dsc_pg_support = true, - .enable_dp_dig_pixel_rate_div_policy = 1, - .disable_z10 = false, - .ignore_pg = true, - .psp_disabled_wa = true, - .ips2_eval_delay_us = 200, - .ips2_entry_delay_us = 400 -}; - -static const struct dc_panel_config panel_config_defaults = { - .psr = { - .disable_psr = false, - .disallow_psrsu = false, - }, - .ilr = { - .optimize_edp_link_rate = true, - }, -}; - -static void dcn35_dpp_destroy(struct dpp **dpp) -{ - kfree(TO_DCN20_DPP(*dpp)); - *dpp = NULL; -} - -static struct dpp *dcn35_dpp_create(struct dc_context *ctx, uint32_t inst) -{ - struct dcn3_dpp *dpp = kzalloc(sizeof(struct dcn3_dpp), GFP_KERNEL); - bool success = (dpp != NULL); - - if (!success) - return NULL; - -#undef REG_STRUCT -#define REG_STRUCT dpp_regs - dpp_regs_init(0), - dpp_regs_init(1), - dpp_regs_init(2), - dpp_regs_init(3); - - success = dpp35_construct(dpp, ctx, inst, &dpp_regs[inst], &tf_shift, - &tf_mask); - if (success) { - dpp35_set_fgcg( - dpp, - ctx->dc->debug.enable_fine_grain_clock_gating.bits.dpp); - return &dpp->base; - } - - BREAK_TO_DEBUGGER(); - kfree(dpp); - return NULL; -} - -static struct output_pixel_processor *dcn35_opp_create( - struct dc_context *ctx, uint32_t inst) -{ - struct dcn20_opp *opp = - kzalloc(sizeof(struct dcn20_opp), GFP_KERNEL); - - if (!opp) { - BREAK_TO_DEBUGGER(); - return NULL; - } - -#undef REG_STRUCT -#define REG_STRUCT opp_regs - opp_regs_init(0), - opp_regs_init(1), - opp_regs_init(2), - opp_regs_init(3); - - dcn35_opp_construct(opp, ctx, inst, - &opp_regs[inst], &opp_shift, &opp_mask); - - dcn35_opp_set_fgcg(opp, ctx->dc->debug.enable_fine_grain_clock_gating.bits.opp); - - return &opp->base; -} - -static struct dce_aux *dcn31_aux_engine_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct aux_engine_dce110 *aux_engine = - kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL); - - if (!aux_engine) - return NULL; - -#undef REG_STRUCT -#define REG_STRUCT aux_engine_regs - aux_engine_regs_init(0), - aux_engine_regs_init(1), - aux_engine_regs_init(2), - aux_engine_regs_init(3), - aux_engine_regs_init(4); - - dce110_aux_engine_construct(aux_engine, ctx, inst, - SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, - &aux_engine_regs[inst], - &aux_mask, - &aux_shift, - ctx->dc->caps.extended_aux_timeout_support); - - return &aux_engine->base; -} - -#define i2c_inst_regs_init(id)\ - I2C_HW_ENGINE_COMMON_REG_LIST_DCN30_RI(id) - -static struct dce_i2c_registers i2c_hw_regs[5]; - -static const struct dce_i2c_shift i2c_shifts = { - I2C_COMMON_MASK_SH_LIST_DCN35(__SHIFT) -}; - -static const struct dce_i2c_mask i2c_masks = { - I2C_COMMON_MASK_SH_LIST_DCN35(_MASK) -}; - -/* ========================================================== */ - -/* - * DPIA index | Preferred Encoder | Host Router - * 0 | C | 0 - * 1 | First Available | 0 - * 2 | D | 1 - * 3 | First Available | 1 - */ -/* ========================================================== */ -static const enum engine_id dpia_to_preferred_enc_id_table[] = { - ENGINE_ID_DIGC, - ENGINE_ID_DIGC, - ENGINE_ID_DIGD, - ENGINE_ID_DIGD -}; - -static enum engine_id dcn35_get_preferred_eng_id_dpia(unsigned int dpia_index) -{ - return dpia_to_preferred_enc_id_table[dpia_index]; -} - -static struct dce_i2c_hw *dcn31_i2c_hw_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dce_i2c_hw *dce_i2c_hw = - kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL); - - if (!dce_i2c_hw) - return NULL; - -#undef REG_STRUCT -#define REG_STRUCT i2c_hw_regs - i2c_inst_regs_init(1), - i2c_inst_regs_init(2), - i2c_inst_regs_init(3), - i2c_inst_regs_init(4), - i2c_inst_regs_init(5); - - dcn2_i2c_hw_construct(dce_i2c_hw, ctx, inst, - &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks); - - return dce_i2c_hw; -} -static struct mpc *dcn35_mpc_create( - struct dc_context *ctx, - int num_mpcc, - int num_rmu) -{ - struct dcn30_mpc *mpc30 = kzalloc(sizeof(struct dcn30_mpc), GFP_KERNEL); - - if (!mpc30) - return NULL; - -#undef REG_STRUCT -#define REG_STRUCT mpc_regs - dcn_mpc_regs_init(); - - dcn32_mpc_construct(mpc30, ctx, - &mpc_regs, - &mpc_shift, - &mpc_mask, - num_mpcc, - num_rmu); - - return &mpc30->base; -} - -static struct hubbub *dcn35_hubbub_create(struct dc_context *ctx) -{ - int i; - - struct dcn20_hubbub *hubbub3 = kzalloc(sizeof(struct dcn20_hubbub), - GFP_KERNEL); - - if (!hubbub3) - return NULL; - -#undef REG_STRUCT -#define REG_STRUCT hubbub_reg - hubbub_reg_init(); - -#undef REG_STRUCT -#define REG_STRUCT vmid_regs - vmid_regs_init(0), - vmid_regs_init(1), - vmid_regs_init(2), - vmid_regs_init(3), - vmid_regs_init(4), - vmid_regs_init(5), - vmid_regs_init(6), - vmid_regs_init(7), - vmid_regs_init(8), - vmid_regs_init(9), - vmid_regs_init(10), - vmid_regs_init(11), - vmid_regs_init(12), - vmid_regs_init(13), - vmid_regs_init(14), - vmid_regs_init(15); - - hubbub35_construct(hubbub3, ctx, - &hubbub_reg, - &hubbub_shift, - &hubbub_mask, - 384,/*ctx->dc->dml.ip.det_buffer_size_kbytes,*/ - 8, /*ctx->dc->dml.ip.pixel_chunk_size_kbytes,*/ - 1792 /*ctx->dc->dml.ip.config_return_buffer_size_in_kbytes*/); - - - for (i = 0; i < res_cap_dcn35.num_vmid; i++) { - struct dcn20_vmid *vmid = &hubbub3->vmid[i]; - - vmid->ctx = ctx; - - vmid->regs = &vmid_regs[i]; - vmid->shifts = &vmid_shifts; - vmid->masks = &vmid_masks; - } - - return &hubbub3->base; -} - -static struct timing_generator *dcn35_timing_generator_create( - struct dc_context *ctx, - uint32_t instance) -{ - struct optc *tgn10 = - kzalloc(sizeof(struct optc), GFP_KERNEL); - - if (!tgn10) - return NULL; - -#undef REG_STRUCT -#define REG_STRUCT optc_regs - optc_regs_init(0), - optc_regs_init(1), - optc_regs_init(2), - optc_regs_init(3); - - tgn10->base.inst = instance; - tgn10->base.ctx = ctx; - - tgn10->tg_regs = &optc_regs[instance]; - tgn10->tg_shift = &optc_shift; - tgn10->tg_mask = &optc_mask; - - dcn35_timing_generator_init(tgn10); - - return &tgn10->base; -} - -static const struct encoder_feature_support link_enc_feature = { - .max_hdmi_deep_color = COLOR_DEPTH_121212, - .max_hdmi_pixel_clock = 600000, - .hdmi_ycbcr420_supported = true, - .dp_ycbcr420_supported = true, - .fec_supported = true, - .flags.bits.IS_HBR2_CAPABLE = true, - .flags.bits.IS_HBR3_CAPABLE = true, - .flags.bits.IS_TPS3_CAPABLE = true, - .flags.bits.IS_TPS4_CAPABLE = true -}; - -static struct link_encoder *dcn35_link_encoder_create( - struct dc_context *ctx, - const struct encoder_init_data *enc_init_data) -{ - struct dcn20_link_encoder *enc20 = - kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL); - - if (!enc20) - return NULL; - -#undef REG_STRUCT -#define REG_STRUCT link_enc_aux_regs - aux_regs_init(0), - aux_regs_init(1), - aux_regs_init(2), - aux_regs_init(3), - aux_regs_init(4); - -#undef REG_STRUCT -#define REG_STRUCT link_enc_hpd_regs - hpd_regs_init(0), - hpd_regs_init(1), - hpd_regs_init(2), - hpd_regs_init(3), - hpd_regs_init(4); - -#undef REG_STRUCT -#define REG_STRUCT link_enc_regs - link_regs_init(0, A), - link_regs_init(1, B), - link_regs_init(2, C), - link_regs_init(3, D), - link_regs_init(4, E); - - dcn35_link_encoder_construct(enc20, - enc_init_data, - &link_enc_feature, - &link_enc_regs[enc_init_data->transmitter], - &link_enc_aux_regs[enc_init_data->channel - 1], - &link_enc_hpd_regs[enc_init_data->hpd_source], - &le_shift, - &le_mask); - - return &enc20->enc10.base; -} - -/* Create a minimal link encoder object not associated with a particular - * physical connector. - * resource_funcs.link_enc_create_minimal - */ -static struct link_encoder *dcn31_link_enc_create_minimal( - struct dc_context *ctx, enum engine_id eng_id) -{ - struct dcn20_link_encoder *enc20; - - if ((eng_id - ENGINE_ID_DIGA) > ctx->dc->res_pool->res_cap->num_dig_link_enc) - return NULL; - - enc20 = kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL); - if (!enc20) - return NULL; - - dcn31_link_encoder_construct_minimal( - enc20, - ctx, - &link_enc_feature, - &link_enc_regs[eng_id - ENGINE_ID_DIGA], - eng_id); - - return &enc20->enc10.base; -} - -static struct panel_cntl *dcn31_panel_cntl_create(const struct panel_cntl_init_data *init_data) -{ - struct dcn31_panel_cntl *panel_cntl = - kzalloc(sizeof(struct dcn31_panel_cntl), GFP_KERNEL); - - if (!panel_cntl) - return NULL; - - dcn31_panel_cntl_construct(panel_cntl, init_data); - - return &panel_cntl->base; -} - -static void read_dce_straps( - struct dc_context *ctx, - struct resource_straps *straps) -{ - generic_reg_get(ctx, regDC_PINSTRAPS + BASE(regDC_PINSTRAPS_BASE_IDX), - FN(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO), &straps->dc_pinstraps_audio); - -} - -static struct audio *dcn31_create_audio( - struct dc_context *ctx, unsigned int inst) -{ - -#undef REG_STRUCT -#define REG_STRUCT audio_regs - audio_regs_init(0), - audio_regs_init(1), - audio_regs_init(2), - audio_regs_init(3), - audio_regs_init(4); - audio_regs_init(5); - audio_regs_init(6); - - return dce_audio_create(ctx, inst, - &audio_regs[inst], &audio_shift, &audio_mask); -} - -static struct vpg *dcn31_vpg_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dcn31_vpg *vpg31 = kzalloc(sizeof(struct dcn31_vpg), GFP_KERNEL); - - if (!vpg31) - return NULL; - -#undef REG_STRUCT -#define REG_STRUCT vpg_regs - vpg_regs_init(0), - vpg_regs_init(1), - vpg_regs_init(2), - vpg_regs_init(3), - vpg_regs_init(4), - vpg_regs_init(5), - vpg_regs_init(6), - vpg_regs_init(7), - vpg_regs_init(8), - vpg_regs_init(9); - - vpg31_construct(vpg31, ctx, inst, - &vpg_regs[inst], - &vpg_shift, - &vpg_mask); - - return &vpg31->base; -} - -static struct afmt *dcn31_afmt_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dcn31_afmt *afmt31 = kzalloc(sizeof(struct dcn31_afmt), GFP_KERNEL); - - if (!afmt31) - return NULL; - -#undef REG_STRUCT -#define REG_STRUCT afmt_regs - afmt_regs_init(0), - afmt_regs_init(1), - afmt_regs_init(2), - afmt_regs_init(3), - afmt_regs_init(4), - afmt_regs_init(5); - - afmt31_construct(afmt31, ctx, inst, - &afmt_regs[inst], - &afmt_shift, - &afmt_mask); - - // Light sleep by default, no need to power down here - - return &afmt31->base; -} - -static struct apg *dcn31_apg_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dcn31_apg *apg31 = kzalloc(sizeof(struct dcn31_apg), GFP_KERNEL); - - if (!apg31) - return NULL; - -#undef REG_STRUCT -#define REG_STRUCT apg_regs - apg_regs_init(0), - apg_regs_init(1), - apg_regs_init(2), - apg_regs_init(3); - - apg31_construct(apg31, ctx, inst, - &apg_regs[inst], - &apg_shift, - &apg_mask); - - return &apg31->base; -} - -static struct stream_encoder *dcn35_stream_encoder_create( - enum engine_id eng_id, - struct dc_context *ctx) -{ - struct dcn10_stream_encoder *enc1; - struct vpg *vpg; - struct afmt *afmt; - int vpg_inst; - int afmt_inst; - - /* Mapping of VPG, AFMT, DME register blocks to DIO block instance */ - if (eng_id <= ENGINE_ID_DIGF) { - vpg_inst = eng_id; - afmt_inst = eng_id; - } else - return NULL; - - enc1 = kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL); - vpg = dcn31_vpg_create(ctx, vpg_inst); - afmt = dcn31_afmt_create(ctx, afmt_inst); - - if (!enc1 || !vpg || !afmt) { - kfree(enc1); - kfree(vpg); - kfree(afmt); - return NULL; - } - -#undef REG_STRUCT -#define REG_STRUCT stream_enc_regs - stream_enc_regs_init(0), - stream_enc_regs_init(1), - stream_enc_regs_init(2), - stream_enc_regs_init(3), - stream_enc_regs_init(4); - - dcn35_dio_stream_encoder_construct(enc1, ctx, ctx->dc_bios, - eng_id, vpg, afmt, - &stream_enc_regs[eng_id], - &se_shift, &se_mask); - - return &enc1->base; -} - -static struct hpo_dp_stream_encoder *dcn31_hpo_dp_stream_encoder_create( - enum engine_id eng_id, - struct dc_context *ctx) -{ - struct dcn31_hpo_dp_stream_encoder *hpo_dp_enc31; - struct vpg *vpg; - struct apg *apg; - uint32_t hpo_dp_inst; - uint32_t vpg_inst; - uint32_t apg_inst; - - ASSERT((eng_id >= ENGINE_ID_HPO_DP_0) && (eng_id <= ENGINE_ID_HPO_DP_3)); - hpo_dp_inst = eng_id - ENGINE_ID_HPO_DP_0; - - /* Mapping of VPG register blocks to HPO DP block instance: - * VPG[6] -> HPO_DP[0] - * VPG[7] -> HPO_DP[1] - * VPG[8] -> HPO_DP[2] - * VPG[9] -> HPO_DP[3] - */ - vpg_inst = hpo_dp_inst + 6; - - /* Mapping of APG register blocks to HPO DP block instance: - * APG[0] -> HPO_DP[0] - * APG[1] -> HPO_DP[1] - * APG[2] -> HPO_DP[2] - * APG[3] -> HPO_DP[3] - */ - apg_inst = hpo_dp_inst; - - /* allocate HPO stream encoder and create VPG sub-block */ - hpo_dp_enc31 = kzalloc(sizeof(struct dcn31_hpo_dp_stream_encoder), GFP_KERNEL); - vpg = dcn31_vpg_create(ctx, vpg_inst); - apg = dcn31_apg_create(ctx, apg_inst); - - if (!hpo_dp_enc31 || !vpg || !apg) { - kfree(hpo_dp_enc31); - kfree(vpg); - kfree(apg); - return NULL; - } - -#undef REG_STRUCT -#define REG_STRUCT hpo_dp_stream_enc_regs - hpo_dp_stream_encoder_reg_init(0), - hpo_dp_stream_encoder_reg_init(1), - hpo_dp_stream_encoder_reg_init(2), - hpo_dp_stream_encoder_reg_init(3); - - dcn31_hpo_dp_stream_encoder_construct(hpo_dp_enc31, ctx, ctx->dc_bios, - hpo_dp_inst, eng_id, vpg, apg, - &hpo_dp_stream_enc_regs[hpo_dp_inst], - &hpo_dp_se_shift, &hpo_dp_se_mask); - - return &hpo_dp_enc31->base; -} - -static struct hpo_dp_link_encoder *dcn31_hpo_dp_link_encoder_create( - uint8_t inst, - struct dc_context *ctx) -{ - struct dcn31_hpo_dp_link_encoder *hpo_dp_enc31; - - /* allocate HPO link encoder */ - hpo_dp_enc31 = kzalloc(sizeof(struct dcn31_hpo_dp_link_encoder), GFP_KERNEL); - -#undef REG_STRUCT -#define REG_STRUCT hpo_dp_link_enc_regs - hpo_dp_link_encoder_reg_init(0), - hpo_dp_link_encoder_reg_init(1); - - hpo_dp_link_encoder31_construct(hpo_dp_enc31, ctx, inst, - &hpo_dp_link_enc_regs[inst], - &hpo_dp_le_shift, &hpo_dp_le_mask); - - return &hpo_dp_enc31->base; -} - -static struct dce_hwseq *dcn35_hwseq_create( - struct dc_context *ctx) -{ - struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL); - -#undef REG_STRUCT -#define REG_STRUCT hwseq_reg - hwseq_reg_init(); - - if (hws) { - hws->ctx = ctx; - hws->regs = &hwseq_reg; - hws->shifts = &hwseq_shift; - hws->masks = &hwseq_mask; - } - return hws; -} -static const struct resource_create_funcs res_create_funcs = { - .read_dce_straps = read_dce_straps, - .create_audio = dcn31_create_audio, - .create_stream_encoder = dcn35_stream_encoder_create, - .create_hpo_dp_stream_encoder = dcn31_hpo_dp_stream_encoder_create, - .create_hpo_dp_link_encoder = dcn31_hpo_dp_link_encoder_create, - .create_hwseq = dcn35_hwseq_create, -}; - -static void dcn35_resource_destruct(struct dcn35_resource_pool *pool) -{ - unsigned int i; - - for (i = 0; i < pool->base.stream_enc_count; i++) { - if (pool->base.stream_enc[i] != NULL) { - if (pool->base.stream_enc[i]->vpg != NULL) { - kfree(DCN30_VPG_FROM_VPG(pool->base.stream_enc[i]->vpg)); - pool->base.stream_enc[i]->vpg = NULL; - } - if (pool->base.stream_enc[i]->afmt != NULL) { - kfree(DCN30_AFMT_FROM_AFMT(pool->base.stream_enc[i]->afmt)); - pool->base.stream_enc[i]->afmt = NULL; - } - kfree(DCN10STRENC_FROM_STRENC(pool->base.stream_enc[i])); - pool->base.stream_enc[i] = NULL; - } - } - - for (i = 0; i < pool->base.hpo_dp_stream_enc_count; i++) { - if (pool->base.hpo_dp_stream_enc[i] != NULL) { - if (pool->base.hpo_dp_stream_enc[i]->vpg != NULL) { - kfree(DCN30_VPG_FROM_VPG(pool->base.hpo_dp_stream_enc[i]->vpg)); - pool->base.hpo_dp_stream_enc[i]->vpg = NULL; - } - if (pool->base.hpo_dp_stream_enc[i]->apg != NULL) { - kfree(DCN31_APG_FROM_APG(pool->base.hpo_dp_stream_enc[i]->apg)); - pool->base.hpo_dp_stream_enc[i]->apg = NULL; - } - kfree(DCN3_1_HPO_DP_STREAM_ENC_FROM_HPO_STREAM_ENC(pool->base.hpo_dp_stream_enc[i])); - pool->base.hpo_dp_stream_enc[i] = NULL; - } - } - - for (i = 0; i < pool->base.hpo_dp_link_enc_count; i++) { - if (pool->base.hpo_dp_link_enc[i] != NULL) { - kfree(DCN3_1_HPO_DP_LINK_ENC_FROM_HPO_LINK_ENC(pool->base.hpo_dp_link_enc[i])); - pool->base.hpo_dp_link_enc[i] = NULL; - } - } - - for (i = 0; i < pool->base.res_cap->num_dsc; i++) { - if (pool->base.dscs[i] != NULL) - dcn20_dsc_destroy(&pool->base.dscs[i]); - } - - if (pool->base.mpc != NULL) { - kfree(TO_DCN20_MPC(pool->base.mpc)); - pool->base.mpc = NULL; - } - if (pool->base.hubbub != NULL) { - kfree(pool->base.hubbub); - pool->base.hubbub = NULL; - } - for (i = 0; i < pool->base.pipe_count; i++) { - if (pool->base.dpps[i] != NULL) - dcn35_dpp_destroy(&pool->base.dpps[i]); - - if (pool->base.ipps[i] != NULL) - pool->base.ipps[i]->funcs->ipp_destroy(&pool->base.ipps[i]); - - if (pool->base.hubps[i] != NULL) { - kfree(TO_DCN20_HUBP(pool->base.hubps[i])); - pool->base.hubps[i] = NULL; - } - - if (pool->base.irqs != NULL) { - dal_irq_service_destroy(&pool->base.irqs); - } - } - - for (i = 0; i < pool->base.res_cap->num_ddc; i++) { - if (pool->base.engines[i] != NULL) - dce110_engine_destroy(&pool->base.engines[i]); - if (pool->base.hw_i2cs[i] != NULL) { - kfree(pool->base.hw_i2cs[i]); - pool->base.hw_i2cs[i] = NULL; - } - if (pool->base.sw_i2cs[i] != NULL) { - kfree(pool->base.sw_i2cs[i]); - pool->base.sw_i2cs[i] = NULL; - } - } - - for (i = 0; i < pool->base.res_cap->num_opp; i++) { - if (pool->base.opps[i] != NULL) - pool->base.opps[i]->funcs->opp_destroy(&pool->base.opps[i]); - } - - for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { - if (pool->base.timing_generators[i] != NULL) { - kfree(DCN10TG_FROM_TG(pool->base.timing_generators[i])); - pool->base.timing_generators[i] = NULL; - } - } - - for (i = 0; i < pool->base.res_cap->num_dwb; i++) { - if (pool->base.dwbc[i] != NULL) { - kfree(TO_DCN30_DWBC(pool->base.dwbc[i])); - pool->base.dwbc[i] = NULL; - } - if (pool->base.mcif_wb[i] != NULL) { - kfree(TO_DCN30_MMHUBBUB(pool->base.mcif_wb[i])); - pool->base.mcif_wb[i] = NULL; - } - } - - for (i = 0; i < pool->base.audio_count; i++) { - if (pool->base.audios[i]) - dce_aud_destroy(&pool->base.audios[i]); - } - - for (i = 0; i < pool->base.clk_src_count; i++) { - if (pool->base.clock_sources[i] != NULL) { - dcn20_clock_source_destroy(&pool->base.clock_sources[i]); - pool->base.clock_sources[i] = NULL; - } - } - - for (i = 0; i < pool->base.res_cap->num_mpc_3dlut; i++) { - if (pool->base.mpc_lut[i] != NULL) { - dc_3dlut_func_release(pool->base.mpc_lut[i]); - pool->base.mpc_lut[i] = NULL; - } - if (pool->base.mpc_shaper[i] != NULL) { - dc_transfer_func_release(pool->base.mpc_shaper[i]); - pool->base.mpc_shaper[i] = NULL; - } - } - - if (pool->base.dp_clock_source != NULL) { - dcn20_clock_source_destroy(&pool->base.dp_clock_source); - pool->base.dp_clock_source = NULL; - } - - for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { - if (pool->base.multiple_abms[i] != NULL) - dce_abm_destroy(&pool->base.multiple_abms[i]); - } - - if (pool->base.psr != NULL) - dmub_psr_destroy(&pool->base.psr); - - if (pool->base.pg_cntl != NULL) - dcn_pg_cntl_destroy(&pool->base.pg_cntl); - - if (pool->base.dccg != NULL) - dcn_dccg_destroy(&pool->base.dccg); -} - -static struct hubp *dcn35_hubp_create( - struct dc_context *ctx, - uint32_t inst) -{ - struct dcn20_hubp *hubp2 = - kzalloc(sizeof(struct dcn20_hubp), GFP_KERNEL); - - if (!hubp2) - return NULL; - -#undef REG_STRUCT -#define REG_STRUCT hubp_regs - hubp_regs_init(0), - hubp_regs_init(1), - hubp_regs_init(2), - hubp_regs_init(3); - - if (hubp35_construct(hubp2, ctx, inst, - &hubp_regs[inst], &hubp_shift, &hubp_mask)) - return &hubp2->base; - - BREAK_TO_DEBUGGER(); - kfree(hubp2); - return NULL; -} - -static void dcn35_dwbc_init(struct dcn30_dwbc *dwbc30, struct dc_context *ctx) -{ - dcn35_dwbc_set_fgcg( - dwbc30, ctx->dc->debug.enable_fine_grain_clock_gating.bits.dwb); -} - -static bool dcn35_dwbc_create(struct dc_context *ctx, struct resource_pool *pool) -{ - int i; - uint32_t pipe_count = pool->res_cap->num_dwb; - - for (i = 0; i < pipe_count; i++) { - struct dcn30_dwbc *dwbc30 = kzalloc(sizeof(struct dcn30_dwbc), - GFP_KERNEL); - - if (!dwbc30) { - dm_error("DC: failed to create dwbc30!\n"); - return false; - } - -#undef REG_STRUCT -#define REG_STRUCT dwbc35_regs - dwbc_regs_dcn3_init(0); - - dcn35_dwbc_construct(dwbc30, ctx, - &dwbc35_regs[i], - &dwbc35_shift, - &dwbc35_mask, - i); - - pool->dwbc[i] = &dwbc30->base; - - dcn35_dwbc_init(dwbc30, ctx); - } - return true; -} - -static void dcn35_mmhubbub_init(struct dcn30_mmhubbub *mcif_wb30, - struct dc_context *ctx) -{ - dcn35_mmhubbub_set_fgcg( - mcif_wb30, - ctx->dc->debug.enable_fine_grain_clock_gating.bits.mmhubbub); -} - -static bool dcn35_mmhubbub_create(struct dc_context *ctx, struct resource_pool *pool) -{ - int i; - uint32_t pipe_count = pool->res_cap->num_dwb; - - for (i = 0; i < pipe_count; i++) { - struct dcn30_mmhubbub *mcif_wb30 = kzalloc(sizeof(struct dcn30_mmhubbub), - GFP_KERNEL); - - if (!mcif_wb30) { - dm_error("DC: failed to create mcif_wb30!\n"); - return false; - } - -#undef REG_STRUCT -#define REG_STRUCT mcif_wb35_regs - mcif_wb_regs_dcn3_init(0); - - dcn35_mmhubbub_construct(mcif_wb30, ctx, - &mcif_wb35_regs[i], - &mcif_wb35_shift, - &mcif_wb35_mask, - i); - - dcn35_mmhubbub_init(mcif_wb30, ctx); - - pool->mcif_wb[i] = &mcif_wb30->base; - } - return true; -} - -static struct display_stream_compressor *dcn35_dsc_create( - struct dc_context *ctx, uint32_t inst) -{ - struct dcn20_dsc *dsc = - kzalloc(sizeof(struct dcn20_dsc), GFP_KERNEL); - - if (!dsc) { - BREAK_TO_DEBUGGER(); - return NULL; - } - -#undef REG_STRUCT -#define REG_STRUCT dsc_regs - dsc_regsDCN35_init(0), - dsc_regsDCN35_init(1), - dsc_regsDCN35_init(2), - dsc_regsDCN35_init(3); - - dsc35_construct(dsc, ctx, inst, &dsc_regs[inst], &dsc_shift, &dsc_mask); - dsc35_set_fgcg(dsc, - ctx->dc->debug.enable_fine_grain_clock_gating.bits.dsc); - return &dsc->base; -} - -static void dcn35_destroy_resource_pool(struct resource_pool **pool) -{ - struct dcn35_resource_pool *dcn35_pool = TO_DCN35_RES_POOL(*pool); - - dcn35_resource_destruct(dcn35_pool); - kfree(dcn35_pool); - *pool = NULL; -} - -static struct clock_source *dcn35_clock_source_create( - struct dc_context *ctx, - struct dc_bios *bios, - enum clock_source_id id, - const struct dce110_clk_src_regs *regs, - bool dp_clk_src) -{ - struct dce110_clk_src *clk_src = - kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL); - - if (!clk_src) - return NULL; - - if (dcn31_clk_src_construct(clk_src, ctx, bios, id, - regs, &cs_shift, &cs_mask)) { - clk_src->base.dp_clk_src = dp_clk_src; - return &clk_src->base; - } - - BREAK_TO_DEBUGGER(); - return NULL; -} - -static struct dc_cap_funcs cap_funcs = { - .get_dcc_compression_cap = dcn20_get_dcc_compression_cap -}; - -static void dcn35_get_panel_config_defaults(struct dc_panel_config *panel_config) -{ - *panel_config = panel_config_defaults; -} - - -static bool dcn35_validate_bandwidth(struct dc *dc, - struct dc_state *context, - bool fast_validate) -{ - bool out = false; - - out = dml2_validate(dc, context, fast_validate); - - return out; -} - - -static struct resource_funcs dcn35_res_pool_funcs = { - .destroy = dcn35_destroy_resource_pool, - .link_enc_create = dcn35_link_encoder_create, - .link_enc_create_minimal = dcn31_link_enc_create_minimal, - .link_encs_assign = link_enc_cfg_link_encs_assign, - .link_enc_unassign = link_enc_cfg_link_enc_unassign, - .panel_cntl_create = dcn31_panel_cntl_create, - .validate_bandwidth = dcn35_validate_bandwidth, - .calculate_wm_and_dlg = NULL, - .update_soc_for_wm_a = dcn31_update_soc_for_wm_a, - .populate_dml_pipes = dcn35_populate_dml_pipes_from_context_fpu, - .acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer, - .release_pipe = dcn20_release_pipe, - .add_stream_to_ctx = dcn30_add_stream_to_ctx, - .add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource, - .remove_stream_from_ctx = dcn20_remove_stream_from_ctx, - .populate_dml_writeback_from_context = dcn30_populate_dml_writeback_from_context, - .set_mcif_arb_params = dcn30_set_mcif_arb_params, - .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link, - .acquire_post_bldn_3dlut = dcn30_acquire_post_bldn_3dlut, - .release_post_bldn_3dlut = dcn30_release_post_bldn_3dlut, - .update_bw_bounding_box = dcn35_update_bw_bounding_box_fpu, - .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, - .get_panel_config_defaults = dcn35_get_panel_config_defaults, - .get_preferred_eng_id_dpia = dcn35_get_preferred_eng_id_dpia, -}; - -static bool dcn35_resource_construct( - uint8_t num_virtual_links, - struct dc *dc, - struct dcn35_resource_pool *pool) -{ - int i; - struct dc_context *ctx = dc->ctx; - struct irq_service_init_data init_data; - -#undef REG_STRUCT -#define REG_STRUCT bios_regs - bios_regs_init(); - -#undef REG_STRUCT -#define REG_STRUCT clk_src_regs - clk_src_regs_init(0, A), - clk_src_regs_init(1, B), - clk_src_regs_init(2, C), - clk_src_regs_init(3, D), - clk_src_regs_init(4, E); - -#undef REG_STRUCT -#define REG_STRUCT abm_regs - abm_regs_init(0), - abm_regs_init(1), - abm_regs_init(2), - abm_regs_init(3); - -#undef REG_STRUCT -#define REG_STRUCT dccg_regs - dccg_regs_init(); - - ctx->dc_bios->regs = &bios_regs; - - pool->base.res_cap = &res_cap_dcn35; - - pool->base.funcs = &dcn35_res_pool_funcs; - - /************************************************* - * Resource + asic cap harcoding * - *************************************************/ - pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE; - pool->base.pipe_count = pool->base.res_cap->num_timing_generator; - pool->base.mpcc_count = pool->base.res_cap->num_timing_generator; - dc->caps.max_downscale_ratio = 600; - dc->caps.i2c_speed_in_khz = 100; - dc->caps.i2c_speed_in_khz_hdcp = 100; - dc->caps.max_cursor_size = 256; - dc->caps.min_horizontal_blanking_period = 80; - dc->caps.dmdata_alloc_size = 2048; - dc->caps.max_slave_planes = 2; - dc->caps.max_slave_yuv_planes = 2; - dc->caps.max_slave_rgb_planes = 2; - dc->caps.post_blend_color_processing = true; - dc->caps.force_dp_tps4_for_cp2520 = true; - if (dc->config.forceHBR2CP2520) - dc->caps.force_dp_tps4_for_cp2520 = false; - dc->caps.dp_hpo = true; - dc->caps.dp_hdmi21_pcon_support = true; - - dc->caps.edp_dsc_support = true; - dc->caps.extended_aux_timeout_support = true; - dc->caps.dmcub_support = true; - dc->caps.is_apu = true; - dc->caps.seamless_odm = true; - - dc->caps.zstate_support = true; - dc->caps.ips_support = true; - dc->caps.max_v_total = (1 << 15) - 1; - - /* Color pipeline capabilities */ - dc->caps.color.dpp.dcn_arch = 1; - dc->caps.color.dpp.input_lut_shared = 0; - dc->caps.color.dpp.icsc = 1; - dc->caps.color.dpp.dgam_ram = 0; // must use gamma_corr - dc->caps.color.dpp.dgam_rom_caps.srgb = 1; - dc->caps.color.dpp.dgam_rom_caps.bt2020 = 1; - dc->caps.color.dpp.dgam_rom_caps.gamma2_2 = 1; - dc->caps.color.dpp.dgam_rom_caps.pq = 1; - dc->caps.color.dpp.dgam_rom_caps.hlg = 1; - dc->caps.color.dpp.post_csc = 1; - dc->caps.color.dpp.gamma_corr = 1; - dc->caps.color.dpp.dgam_rom_for_yuv = 0; - - dc->caps.color.dpp.hw_3d_lut = 1; - dc->caps.color.dpp.ogam_ram = 0; // no OGAM in DPP since DCN1 - // no OGAM ROM on DCN301 - dc->caps.color.dpp.ogam_rom_caps.srgb = 0; - dc->caps.color.dpp.ogam_rom_caps.bt2020 = 0; - dc->caps.color.dpp.ogam_rom_caps.gamma2_2 = 0; - dc->caps.color.dpp.ogam_rom_caps.pq = 0; - dc->caps.color.dpp.ogam_rom_caps.hlg = 0; - dc->caps.color.dpp.ocsc = 0; - - dc->caps.color.mpc.gamut_remap = 1; - dc->caps.color.mpc.num_3dluts = pool->base.res_cap->num_mpc_3dlut; //2 - dc->caps.color.mpc.ogam_ram = 1; - dc->caps.color.mpc.ogam_rom_caps.srgb = 0; - dc->caps.color.mpc.ogam_rom_caps.bt2020 = 0; - dc->caps.color.mpc.ogam_rom_caps.gamma2_2 = 0; - dc->caps.color.mpc.ogam_rom_caps.pq = 0; - dc->caps.color.mpc.ogam_rom_caps.hlg = 0; - dc->caps.color.mpc.ocsc = 1; - - /* max_disp_clock_khz_at_vmin is slightly lower than the STA value in order - * to provide some margin. - * It's expected for furture ASIC to have equal or higher value, in order to - * have determinstic power improvement from generate to genration. - * (i.e., we should not expect new ASIC generation with lower vmin rate) - */ - dc->caps.max_disp_clock_khz_at_vmin = 650000; - - /* Use pipe context based otg sync logic */ - dc->config.use_pipe_ctx_sync_logic = true; - dc->config.use_default_clock_table = false; - /* read VBIOS LTTPR caps */ - { - if (ctx->dc_bios->funcs->get_lttpr_caps) { - enum bp_result bp_query_result; - uint8_t is_vbios_lttpr_enable = 0; - - bp_query_result = ctx->dc_bios->funcs->get_lttpr_caps(ctx->dc_bios, &is_vbios_lttpr_enable); - dc->caps.vbios_lttpr_enable = (bp_query_result == BP_RESULT_OK) && !!is_vbios_lttpr_enable; - } - - /* interop bit is implicit */ - { - dc->caps.vbios_lttpr_aware = true; - } - } - - if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) - dc->debug = debug_defaults_drv; - - // Init the vm_helper - if (dc->vm_helper) - vm_helper_init(dc->vm_helper, 16); - - /************************************************* - * Create resources * - *************************************************/ - - /* Clock Sources for Pixel Clock*/ - pool->base.clock_sources[DCN35_CLK_SRC_PLL0] = - dcn35_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL0, - &clk_src_regs[0], false); - pool->base.clock_sources[DCN35_CLK_SRC_PLL1] = - dcn35_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL1, - &clk_src_regs[1], false); - pool->base.clock_sources[DCN35_CLK_SRC_PLL2] = - dcn35_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL2, - &clk_src_regs[2], false); - pool->base.clock_sources[DCN35_CLK_SRC_PLL3] = - dcn35_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL3, - &clk_src_regs[3], false); - pool->base.clock_sources[DCN35_CLK_SRC_PLL4] = - dcn35_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL4, - &clk_src_regs[4], false); - - pool->base.clk_src_count = DCN35_CLK_SRC_TOTAL; - - /* todo: not reuse phy_pll registers */ - pool->base.dp_clock_source = - dcn35_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_ID_DP_DTO, - &clk_src_regs[0], true); - - for (i = 0; i < pool->base.clk_src_count; i++) { - if (pool->base.clock_sources[i] == NULL) { - dm_error("DC: failed to create clock sources!\n"); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - } - /*temp till dml2 fully work without dml1*/ - dml_init_instance(&dc->dml, &dcn3_5_soc, &dcn3_5_ip, DML_PROJECT_DCN31); - - /* TODO: DCCG */ - pool->base.dccg = dccg35_create(ctx, &dccg_regs, &dccg_shift, &dccg_mask); - if (pool->base.dccg == NULL) { - dm_error("DC: failed to create dccg!\n"); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - -#undef REG_STRUCT -#define REG_STRUCT pg_cntl_regs - pg_cntl_dcn35_regs_init(); - - pool->base.pg_cntl = pg_cntl35_create(ctx, &pg_cntl_regs, &pg_cntl_shift, &pg_cntl_mask); - if (pool->base.pg_cntl == NULL) { - dm_error("DC: failed to create power gate control!\n"); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - - /* TODO: IRQ */ - init_data.ctx = dc->ctx; - pool->base.irqs = dal_irq_service_dcn35_create(&init_data); - if (!pool->base.irqs) - goto create_fail; - - /* HUBBUB */ - pool->base.hubbub = dcn35_hubbub_create(ctx); - if (pool->base.hubbub == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create hubbub!\n"); - goto create_fail; - } - - /* HUBPs, DPPs, OPPs and TGs */ - for (i = 0; i < pool->base.pipe_count; i++) { - pool->base.hubps[i] = dcn35_hubp_create(ctx, i); - if (pool->base.hubps[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create hubps!\n"); - goto create_fail; - } - - pool->base.dpps[i] = dcn35_dpp_create(ctx, i); - if (pool->base.dpps[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create dpps!\n"); - goto create_fail; - } - } - - for (i = 0; i < pool->base.res_cap->num_opp; i++) { - pool->base.opps[i] = dcn35_opp_create(ctx, i); - if (pool->base.opps[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC: failed to create output pixel processor!\n"); - goto create_fail; - } - } - - for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { - pool->base.timing_generators[i] = dcn35_timing_generator_create( - ctx, i); - if (pool->base.timing_generators[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create tg!\n"); - goto create_fail; - } - } - pool->base.timing_generator_count = i; - - /* PSR */ - pool->base.psr = dmub_psr_create(ctx); - if (pool->base.psr == NULL) { - dm_error("DC: failed to create psr obj!\n"); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - - /* ABM */ - for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { - pool->base.multiple_abms[i] = dmub_abm_create(ctx, - &abm_regs[i], - &abm_shift, - &abm_mask); - if (pool->base.multiple_abms[i] == NULL) { - dm_error("DC: failed to create abm for pipe %d!\n", i); - BREAK_TO_DEBUGGER(); - goto create_fail; - } - } - - /* MPC and DSC */ - pool->base.mpc = dcn35_mpc_create(ctx, pool->base.mpcc_count, pool->base.res_cap->num_mpc_3dlut); - if (pool->base.mpc == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create mpc!\n"); - goto create_fail; - } - - for (i = 0; i < pool->base.res_cap->num_dsc; i++) { - pool->base.dscs[i] = dcn35_dsc_create(ctx, i); - if (pool->base.dscs[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create display stream compressor %d!\n", i); - goto create_fail; - } - } - - /* DWB and MMHUBBUB */ - if (!dcn35_dwbc_create(ctx, &pool->base)) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create dwbc!\n"); - goto create_fail; - } - - if (!dcn35_mmhubbub_create(ctx, &pool->base)) { - BREAK_TO_DEBUGGER(); - dm_error("DC: failed to create mcif_wb!\n"); - goto create_fail; - } - - /* AUX and I2C */ - for (i = 0; i < pool->base.res_cap->num_ddc; i++) { - pool->base.engines[i] = dcn31_aux_engine_create(ctx, i); - if (pool->base.engines[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC:failed to create aux engine!!\n"); - goto create_fail; - } - pool->base.hw_i2cs[i] = dcn31_i2c_hw_create(ctx, i); - if (pool->base.hw_i2cs[i] == NULL) { - BREAK_TO_DEBUGGER(); - dm_error( - "DC:failed to create hw i2c!!\n"); - goto create_fail; - } - pool->base.sw_i2cs[i] = NULL; - } - - /* DCN3.5 has 6 DPIA */ - pool->base.usb4_dpia_count = 4; - if (dc->debug.dpia_debug.bits.disable_dpia) - pool->base.usb4_dpia_count = 0; - - /* Audio, Stream Encoders including HPO and virtual, MPC 3D LUTs */ - if (!resource_construct(num_virtual_links, dc, &pool->base, - &res_create_funcs)) - goto create_fail; - - /* HW Sequencer and Plane caps */ - dcn35_hw_sequencer_construct(dc); - - dc->caps.max_planes = pool->base.pipe_count; - - for (i = 0; i < dc->caps.max_planes; ++i) - dc->caps.planes[i] = plane_cap; - - dc->cap_funcs = cap_funcs; - - dc->dcn_ip->max_num_dpp = pool->base.pipe_count; - - dc->dml2_options.dcn_pipe_count = pool->base.pipe_count; - dc->dml2_options.use_native_pstate_optimization = true; - dc->dml2_options.use_native_soc_bb_construction = true; - if (dc->config.EnableMinDispClkODM) - dc->dml2_options.minimize_dispclk_using_odm = true; - dc->dml2_options.enable_windowed_mpo_odm = dc->config.enable_windowed_mpo_odm; - - dc->dml2_options.callbacks.dc = dc; - dc->dml2_options.callbacks.build_scaling_params = &resource_build_scaling_params; - dc->dml2_options.callbacks.can_support_mclk_switch_using_fw_based_vblank_stretch = &dcn30_can_support_mclk_switch_using_fw_based_vblank_stretch; - dc->dml2_options.callbacks.acquire_secondary_pipe_for_mpc_odm = &dc_resource_acquire_secondary_pipe_for_mpc_odm_legacy; - dc->dml2_options.callbacks.update_pipes_for_stream_with_slice_count = &resource_update_pipes_for_stream_with_slice_count; - dc->dml2_options.callbacks.update_pipes_for_plane_with_slice_count = &resource_update_pipes_for_plane_with_slice_count; - dc->dml2_options.callbacks.get_mpc_slice_index = &resource_get_mpc_slice_index; - dc->dml2_options.callbacks.get_odm_slice_index = &resource_get_odm_slice_index; - dc->dml2_options.callbacks.get_opp_head = &resource_get_opp_head; - dc->dml2_options.max_segments_per_hubp = 24; - - dc->dml2_options.det_segment_size = DCN3_2_DET_SEG_SIZE;/*todo*/ - - if (dc->config.sdpif_request_limit_words_per_umc == 0) - dc->config.sdpif_request_limit_words_per_umc = 16;/*todo*/ - - return true; - -create_fail: - - dcn35_resource_destruct(pool); - - return false; -} - -struct resource_pool *dcn35_create_resource_pool( - const struct dc_init_data *init_data, - struct dc *dc) -{ - struct dcn35_resource_pool *pool = - kzalloc(sizeof(struct dcn35_resource_pool), GFP_KERNEL); - - if (!pool) - return NULL; - - if (dcn35_resource_construct(init_data->num_virtual_links, dc, pool)) - return &pool->base; - - BREAK_TO_DEBUGGER(); - kfree(pool); - return NULL; -} diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.h b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.h deleted file mode 100644 index 99aea102e3f7..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.h +++ /dev/null @@ -1,310 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright 2023 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef _DCN35_RESOURCE_H_ -#define _DCN35_RESOURCE_H_ - -#include "core_types.h" - -#define DCN3_5_VMIN_DISPCLK_HZ 717000000 -#define TO_DCN35_RES_POOL(pool)\ - container_of(pool, struct dcn35_resource_pool, base) - -extern struct _vcs_dpi_ip_params_st dcn3_5_ip; -extern struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc; - -struct dcn35_resource_pool { - struct resource_pool base; -}; - -struct resource_pool *dcn35_create_resource_pool( - const struct dc_init_data *init_data, - struct dc *dc); - -/* Defs for runtime init of registers */ - -#define OPP_REG_LIST_DCN20_RI(id) \ - OPP_REG_LIST_DCN10_RI(id), \ - OPP_DPG_REG_LIST_RI(id), \ - SRI_ARR(FMT_422_CONTROL, FMT, id), \ - SRI_ARR(OPPBUF_CONTROL1, OPPBUF, id) - -#define OPP_REG_LIST_DCN35_RI(id) \ - OPP_REG_LIST_DCN20_RI(id), \ - SRI2_ARR(OPP_TOP_CLK_CONTROL, OPP, id) - -#define VPG_DCN31_REG_LIST_RI(id) \ - SRI_ARR(VPG_GENERIC_STATUS, VPG, id), \ - SRI_ARR(VPG_GENERIC_PACKET_ACCESS_CTRL, VPG, id), \ - SRI_ARR(VPG_GENERIC_PACKET_DATA, VPG, id), \ - SRI_ARR(VPG_GSP_FRAME_UPDATE_CTRL, VPG, id), \ - SRI_ARR(VPG_GSP_IMMEDIATE_UPDATE_CTRL, VPG, id), \ - SRI_ARR(VPG_MEM_PWR, VPG, id) - -#define AFMT_DCN31_REG_LIST_RI(id) \ - SRI_ARR(AFMT_INFOFRAME_CONTROL0, AFMT, id), \ - SRI_ARR(AFMT_VBI_PACKET_CONTROL, AFMT, id), \ - SRI_ARR(AFMT_AUDIO_PACKET_CONTROL, AFMT, id), \ - SRI_ARR(AFMT_AUDIO_PACKET_CONTROL2, AFMT, id), \ - SRI_ARR(AFMT_AUDIO_SRC_CONTROL, AFMT, id), \ - SRI_ARR(AFMT_60958_0, AFMT, id), \ - SRI_ARR(AFMT_60958_1, AFMT, id), \ - SRI_ARR(AFMT_60958_2, AFMT, id), \ - SRI_ARR(AFMT_MEM_PWR, AFMT, id) - -/* Stream encoder */ -#define SE_DCN35_REG_LIST_RI(id) \ - SRI_ARR(AFMT_CNTL, DIG, id), \ - SRI_ARR(DIG_FE_CNTL, DIG, id), \ - SRI_ARR(HDMI_CONTROL, DIG, id), \ - SRI_ARR(HDMI_DB_CONTROL, DIG, id), \ - SRI_ARR(HDMI_GC, DIG, id), \ - SRI_ARR(HDMI_GENERIC_PACKET_CONTROL0, DIG, id), \ - SRI_ARR(HDMI_GENERIC_PACKET_CONTROL1, DIG, id), \ - SRI_ARR(HDMI_GENERIC_PACKET_CONTROL2, DIG, id), \ - SRI_ARR(HDMI_GENERIC_PACKET_CONTROL3, DIG, id), \ - SRI_ARR(HDMI_GENERIC_PACKET_CONTROL4, DIG, id), \ - SRI_ARR(HDMI_GENERIC_PACKET_CONTROL5, DIG, id), \ - SRI_ARR(HDMI_GENERIC_PACKET_CONTROL6, DIG, id), \ - SRI_ARR(HDMI_GENERIC_PACKET_CONTROL7, DIG, id), \ - SRI_ARR(HDMI_GENERIC_PACKET_CONTROL8, DIG, id), \ - SRI_ARR(HDMI_GENERIC_PACKET_CONTROL9, DIG, id), \ - SRI_ARR(HDMI_GENERIC_PACKET_CONTROL10, DIG, id), \ - SRI_ARR(HDMI_INFOFRAME_CONTROL0, DIG, id), \ - SRI_ARR(HDMI_INFOFRAME_CONTROL1, DIG, id), \ - SRI_ARR(HDMI_VBI_PACKET_CONTROL, DIG, id), \ - SRI_ARR(HDMI_AUDIO_PACKET_CONTROL, DIG, id),\ - SRI_ARR(HDMI_ACR_PACKET_CONTROL, DIG, id),\ - SRI_ARR(HDMI_ACR_32_0, DIG, id),\ - SRI_ARR(HDMI_ACR_32_1, DIG, id),\ - SRI_ARR(HDMI_ACR_44_0, DIG, id),\ - SRI_ARR(HDMI_ACR_44_1, DIG, id),\ - SRI_ARR(HDMI_ACR_48_0, DIG, id),\ - SRI_ARR(HDMI_ACR_48_1, DIG, id),\ - SRI_ARR(DP_DB_CNTL, DP, id), \ - SRI_ARR(DP_MSA_MISC, DP, id), \ - SRI_ARR(DP_MSA_VBID_MISC, DP, id), \ - SRI_ARR(DP_MSA_COLORIMETRY, DP, id), \ - SRI_ARR(DP_MSA_TIMING_PARAM1, DP, id), \ - SRI_ARR(DP_MSA_TIMING_PARAM2, DP, id), \ - SRI_ARR(DP_MSA_TIMING_PARAM3, DP, id), \ - SRI_ARR(DP_MSA_TIMING_PARAM4, DP, id), \ - SRI_ARR(DP_MSE_RATE_CNTL, DP, id), \ - SRI_ARR(DP_MSE_RATE_UPDATE, DP, id), \ - SRI_ARR(DP_PIXEL_FORMAT, DP, id), \ - SRI_ARR(DP_SEC_CNTL, DP, id), \ - SRI_ARR(DP_SEC_CNTL1, DP, id), \ - SRI_ARR(DP_SEC_CNTL2, DP, id), \ - SRI_ARR(DP_SEC_CNTL5, DP, id), \ - SRI_ARR(DP_SEC_CNTL6, DP, id), \ - SRI_ARR(DP_STEER_FIFO, DP, id), \ - SRI_ARR(DP_VID_M, DP, id), \ - SRI_ARR(DP_VID_N, DP, id), \ - SRI_ARR(DP_VID_STREAM_CNTL, DP, id), \ - SRI_ARR(DP_VID_TIMING, DP, id), \ - SRI_ARR(DP_SEC_AUD_N, DP, id), \ - SRI_ARR(DP_SEC_TIMESTAMP, DP, id), \ - SRI_ARR(DP_DSC_CNTL, DP, id), \ - SRI_ARR(DP_SEC_METADATA_TRANSMISSION, DP, id), \ - SRI_ARR(HDMI_METADATA_PACKET_CONTROL, DIG, id), \ - SRI_ARR(DP_SEC_FRAMING4, DP, id), \ - SRI_ARR(DP_GSP11_CNTL, DP, id), \ - SRI_ARR(DME_CONTROL, DME, id),\ - SRI_ARR(DP_SEC_METADATA_TRANSMISSION, DP, id), \ - SRI_ARR(HDMI_METADATA_PACKET_CONTROL, DIG, id), \ - SRI_ARR(DIG_FE_CNTL, DIG, id), \ - SRI_ARR(DIG_FE_EN_CNTL, DIG, id), \ - SRI_ARR(DIG_FE_CLK_CNTL, DIG, id), \ - SRI_ARR(DIG_CLOCK_PATTERN, DIG, id), \ - SRI_ARR(DIG_FIFO_CTRL0, DIG, id), \ - SRI_ARR(STREAM_MAPPER_CONTROL, DIG, id) - -#define LE_DCN35_REG_LIST_RI(id)\ - LE_DCN3_REG_LIST_RI(id),\ - SRI_ARR(DP_DPHY_INTERNAL_CTRL, DP, id), \ - SR_ARR(DIO_LINKA_CNTL, id), \ - SR_ARR(DIO_LINKB_CNTL, id), \ - SR_ARR(DIO_LINKC_CNTL, id), \ - SR_ARR(DIO_LINKD_CNTL, id), \ - SR_ARR(DIO_LINKE_CNTL, id), \ - SR_ARR(DIO_LINKF_CNTL, id),\ - SRI_ARR(DIG_BE_CLK_CNTL, DIG, id),\ - SR_ARR(DIO_CLK_CNTL, id) - -#define MCIF_WB_COMMON_REG_LIST_DCN3_5_RI(inst) \ - MCIF_WB_COMMON_REG_LIST_DCN32_RI(inst), \ - SRI2_ARR(MMHUBBUB_CLOCK_CNTL, MMHUBBUB, inst) - -#define HWSEQ_DCN35_REG_LIST()\ - SR(DCHUBBUB_GLOBAL_TIMER_CNTL), \ - SR(DCHUBBUB_ARB_HOSTVM_CNTL), \ - SR(DIO_MEM_PWR_CTRL), \ - SR(ODM_MEM_PWR_CTRL3), \ - SR(MMHUBBUB_MEM_PWR_CNTL), \ - SR(DCCG_GATE_DISABLE_CNTL), \ - SR(DCCG_GATE_DISABLE_CNTL2), \ - SR(DCCG_GATE_DISABLE_CNTL5), \ - SR(DCFCLK_CNTL),\ - SR(DC_MEM_GLOBAL_PWR_REQ_CNTL), \ - SRII(PIXEL_RATE_CNTL, OTG, 0), \ - SRII(PIXEL_RATE_CNTL, OTG, 1),\ - SRII(PIXEL_RATE_CNTL, OTG, 2),\ - SRII(PIXEL_RATE_CNTL, OTG, 3),\ - SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 0),\ - SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 1),\ - SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 2),\ - SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 3),\ - SR(MICROSECOND_TIME_BASE_DIV), \ - SR(MILLISECOND_TIME_BASE_DIV), \ - SR(DISPCLK_FREQ_CHANGE_CNTL), \ - SR(RBBMIF_TIMEOUT_DIS), \ - SR(RBBMIF_TIMEOUT_DIS_2), \ - SR(DCHUBBUB_CRC_CTRL), \ - SR(DPP_TOP0_DPP_CRC_CTRL), \ - SR(DPP_TOP0_DPP_CRC_VAL_B_A), \ - SR(DPP_TOP0_DPP_CRC_VAL_R_G), \ - SR(MPC_CRC_CTRL), \ - SR(MPC_CRC_RESULT_GB), \ - SR(MPC_CRC_RESULT_C), \ - SR(MPC_CRC_RESULT_AR), \ - SR(DOMAIN0_PG_CONFIG), \ - SR(DOMAIN1_PG_CONFIG), \ - SR(DOMAIN2_PG_CONFIG), \ - SR(DOMAIN3_PG_CONFIG), \ - SR(DOMAIN16_PG_CONFIG), \ - SR(DOMAIN17_PG_CONFIG), \ - SR(DOMAIN18_PG_CONFIG), \ - SR(DOMAIN19_PG_CONFIG), \ - SR(DOMAIN0_PG_STATUS), \ - SR(DOMAIN1_PG_STATUS), \ - SR(DOMAIN2_PG_STATUS), \ - SR(DOMAIN3_PG_STATUS), \ - SR(DOMAIN16_PG_STATUS), \ - SR(DOMAIN17_PG_STATUS), \ - SR(DOMAIN18_PG_STATUS), \ - SR(DOMAIN19_PG_STATUS), \ - SR(DC_IP_REQUEST_CNTL), \ - SR(AZALIA_AUDIO_DTO), \ - SR(AZALIA_CONTROLLER_CLOCK_GATING), \ - SR(HPO_TOP_HW_CONTROL),\ - SR(DMU_CLK_CNTL) - -/* OPTC */ -#define OPTC_COMMON_REG_LIST_DCN3_5_RI(inst) \ - SRI_ARR(OTG_VSTARTUP_PARAM, OTG, inst),\ - SRI_ARR(OTG_VUPDATE_PARAM, OTG, inst),\ - SRI_ARR(OTG_VREADY_PARAM, OTG, inst),\ - SRI_ARR(OTG_MASTER_UPDATE_LOCK, OTG, inst),\ - SRI_ARR(OTG_GLOBAL_CONTROL0, OTG, inst),\ - SRI_ARR(OTG_GLOBAL_CONTROL1, OTG, inst),\ - SRI_ARR(OTG_GLOBAL_CONTROL2, OTG, inst),\ - SRI_ARR(OTG_GLOBAL_CONTROL4, OTG, inst),\ - SRI_ARR(OTG_DOUBLE_BUFFER_CONTROL, OTG, inst),\ - SRI_ARR(OTG_H_TOTAL, OTG, inst),\ - SRI_ARR(OTG_H_BLANK_START_END, OTG, inst),\ - SRI_ARR(OTG_H_SYNC_A, OTG, inst),\ - SRI_ARR(OTG_H_SYNC_A_CNTL, OTG, inst),\ - SRI_ARR(OTG_H_TIMING_CNTL, OTG, inst),\ - SRI_ARR(OTG_V_TOTAL, OTG, inst),\ - SRI_ARR(OTG_V_BLANK_START_END, OTG, inst),\ - SRI_ARR(OTG_V_SYNC_A, OTG, inst),\ - SRI_ARR(OTG_V_SYNC_A_CNTL, OTG, inst),\ - SRI_ARR(OTG_CONTROL, OTG, inst),\ - SRI_ARR(OTG_STEREO_CONTROL, OTG, inst),\ - SRI_ARR(OTG_3D_STRUCTURE_CONTROL, OTG, inst),\ - SRI_ARR(OTG_STEREO_STATUS, OTG, inst),\ - SRI_ARR(OTG_V_TOTAL_MAX, OTG, inst),\ - SRI_ARR(OTG_V_TOTAL_MIN, OTG, inst),\ - SRI_ARR(OTG_V_TOTAL_CONTROL, OTG, inst),\ - SRI_ARR(OTG_TRIGA_CNTL, OTG, inst),\ - SRI_ARR(OTG_FORCE_COUNT_NOW_CNTL, OTG, inst),\ - SRI_ARR(OTG_STATIC_SCREEN_CONTROL, OTG, inst),\ - SRI_ARR(OTG_STATUS_FRAME_COUNT, OTG, inst),\ - SRI_ARR(OTG_STATUS, OTG, inst),\ - SRI_ARR(OTG_STATUS_POSITION, OTG, inst),\ - SRI_ARR(OTG_NOM_VERT_POSITION, OTG, inst),\ - SRI_ARR(OTG_M_CONST_DTO0, OTG, inst),\ - SRI_ARR(OTG_M_CONST_DTO1, OTG, inst),\ - SRI_ARR(OTG_CLOCK_CONTROL, OTG, inst),\ - SRI_ARR(OTG_VERTICAL_INTERRUPT0_CONTROL, OTG, inst),\ - SRI_ARR(OTG_VERTICAL_INTERRUPT0_POSITION, OTG, inst),\ - SRI_ARR(OTG_VERTICAL_INTERRUPT1_CONTROL, OTG, inst),\ - SRI_ARR(OTG_VERTICAL_INTERRUPT1_POSITION, OTG, inst),\ - SRI_ARR(OTG_VERTICAL_INTERRUPT2_CONTROL, OTG, inst),\ - SRI_ARR(OTG_VERTICAL_INTERRUPT2_POSITION, OTG, inst),\ - SRI_ARR(OPTC_INPUT_CLOCK_CONTROL, ODM, inst),\ - SRI_ARR(OPTC_DATA_SOURCE_SELECT, ODM, inst),\ - SRI_ARR(OPTC_INPUT_GLOBAL_CONTROL, ODM, inst),\ - SRI_ARR(CONTROL, VTG, inst),\ - SRI_ARR(OTG_VERT_SYNC_CONTROL, OTG, inst),\ - SRI_ARR(OTG_GSL_CONTROL, OTG, inst),\ - SRI_ARR(OTG_CRC_CNTL, OTG, inst),\ - SRI_ARR(OTG_CRC0_DATA_RG, OTG, inst),\ - SRI_ARR(OTG_CRC0_DATA_B, OTG, inst),\ - SRI_ARR(OTG_CRC1_DATA_RG, OTG, inst),\ - SRI_ARR(OTG_CRC1_DATA_B, OTG, inst),\ - SRI_ARR(OTG_CRC2_DATA_RG, OTG, inst),\ - SRI_ARR(OTG_CRC2_DATA_B, OTG, inst),\ - SRI_ARR(OTG_CRC3_DATA_RG, OTG, inst),\ - SRI_ARR(OTG_CRC3_DATA_B, OTG, inst),\ - SRI_ARR(OTG_CRC0_WINDOWA_X_CONTROL, OTG, inst),\ - SRI_ARR(OTG_CRC0_WINDOWA_Y_CONTROL, OTG, inst),\ - SRI_ARR(OTG_CRC0_WINDOWB_X_CONTROL, OTG, inst),\ - SRI_ARR(OTG_CRC0_WINDOWB_Y_CONTROL, OTG, inst),\ - SRI_ARR(OTG_CRC1_WINDOWA_X_CONTROL, OTG, inst),\ - SRI_ARR(OTG_CRC1_WINDOWA_Y_CONTROL, OTG, inst),\ - SRI_ARR(OTG_CRC1_WINDOWB_X_CONTROL, OTG, inst),\ - SRI_ARR(OTG_CRC1_WINDOWB_Y_CONTROL, OTG, inst),\ - SRI_ARR(OTG_CRC0_WINDOWA_X_CONTROL_READBACK, OTG, inst),\ - SRI_ARR(OTG_CRC0_WINDOWA_Y_CONTROL_READBACK, OTG, inst),\ - SRI_ARR(OTG_CRC0_WINDOWB_X_CONTROL_READBACK, OTG, inst),\ - SRI_ARR(OTG_CRC0_WINDOWB_Y_CONTROL_READBACK, OTG, inst),\ - SRI_ARR(OTG_CRC1_WINDOWA_X_CONTROL_READBACK, OTG, inst),\ - SRI_ARR(OTG_CRC1_WINDOWA_Y_CONTROL_READBACK, OTG, inst),\ - SRI_ARR(OTG_CRC1_WINDOWB_X_CONTROL_READBACK, OTG, inst),\ - SRI_ARR(OTG_CRC1_WINDOWB_Y_CONTROL_READBACK, OTG, inst),\ - SR_ARR(GSL_SOURCE_SELECT, inst),\ - SRI_ARR(OTG_TRIGA_MANUAL_TRIG, OTG, inst),\ - SRI_ARR(OTG_GLOBAL_CONTROL1, OTG, inst),\ - SRI_ARR(OTG_GLOBAL_CONTROL2, OTG, inst),\ - SRI_ARR(OTG_GSL_WINDOW_X, OTG, inst),\ - SRI_ARR(OTG_GSL_WINDOW_Y, OTG, inst),\ - SRI_ARR(OTG_VUPDATE_KEEPOUT, OTG, inst),\ - SRI_ARR(OTG_DSC_START_POSITION, OTG, inst),\ - SRI_ARR(OTG_DRR_TRIGGER_WINDOW, OTG, inst),\ - SRI_ARR(OTG_DRR_V_TOTAL_CHANGE, OTG, inst),\ - SRI_ARR(OPTC_DATA_FORMAT_CONTROL, ODM, inst),\ - SRI_ARR(OPTC_BYTES_PER_PIXEL, ODM, inst),\ - SRI_ARR(OPTC_WIDTH_CONTROL, ODM, inst),\ - SRI_ARR(OPTC_MEMORY_CONFIG, ODM, inst),\ - SRI_ARR(OTG_DRR_CONTROL, OTG, inst),\ - SRI2_ARR(OPTC_CLOCK_CONTROL, OPTC, inst) - -/* DPP */ -#define DPP_REG_LIST_DCN35_RI(id)\ - DPP_REG_LIST_DCN30_COMMON_RI(id) - -#endif /* _DCN35_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c index 50b0434354f8..0c4a8fe8e5ca 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c @@ -30,7 +30,7 @@ #include "dcn_calc_auto.h" #include "dal_asic_id.h" #include "resource.h" -#include "dcn10/dcn10_resource.h" +#include "resource/dcn10/dcn10_resource.h" #include "dcn10/dcn10_hubbub.h" #include "dml/dml1_display_rq_dlg_calc.h" diff --git a/drivers/gpu/drm/amd/display/dc/resource/Makefile b/drivers/gpu/drm/amd/display/dc/resource/Makefile new file mode 100644 index 000000000000..0a75ed8962a5 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/resource/Makefile @@ -0,0 +1,199 @@ + +# Copyright 2022 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# Makefile for the 'resource' sub-component of DAL. +# + + +############################################################################### +# DCE +############################################################################### + +RESOURCE_DCE100 = dce100_resource.o + +AMD_DAL_RESOURCE_DCE100 = $(addprefix $(AMDDALPATH)/dc/resource/dce100/,$(RESOURCE_DCE100)) + +AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCE100) + +############################################################################### + +RESOURCE_DCE110 = dce110_resource.o + +AMD_DAL_RESOURCE_DCE110 = $(addprefix $(AMDDALPATH)/dc/resource/dce110/,$(RESOURCE_DCE110)) + +AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCE110) + +############################################################################### + +RESOURCE_DCE112 = dce112_resource.o + +AMD_DAL_RESOURCE_DCE112 = $(addprefix $(AMDDALPATH)/dc/resource/dce112/,$(RESOURCE_DCE112)) + +AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCE112) + +############################################################################### + +RESOURCE_DCE120 = dce120_resource.o + +AMD_DAL_RESOURCE_DCE120 = $(addprefix $(AMDDALPATH)/dc/resource/dce120/,$(RESOURCE_DCE120)) + +AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCE120) + +############################################################################### + +RESOURCE_DCE80 = dce80_resource.o + +AMD_DAL_RESOURCE_DCE80 = $(addprefix $(AMDDALPATH)/dc/resource/dce80/,$(RESOURCE_DCE80)) + +AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCE80) + +ifdef CONFIG_DRM_AMD_DC_FP +############################################################################### +# DCN +############################################################################### + +RESOURCE_DCN10 = dcn10_resource.o + +AMD_DAL_RESOURCE_DCN10 = $(addprefix $(AMDDALPATH)/dc/resource/dcn10/,$(RESOURCE_DCN10)) + +AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN10) + +############################################################################### + +RESOURCE_DCN20 = dcn20_resource.o + +AMD_DAL_RESOURCE_DCN20 = $(addprefix $(AMDDALPATH)/dc/resource/dcn20/,$(RESOURCE_DCN20)) + +AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN20) + +############################################################################### + +RESOURCE_DCN201 = dcn201_resource.o + +AMD_DAL_RESOURCE_DCN201 = $(addprefix $(AMDDALPATH)/dc/resource/dcn201/,$(RESOURCE_DCN201)) + +AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN201) + +############################################################################### + +RESOURCE_DCN21 = dcn21_resource.o + +AMD_DAL_RESOURCE_DCN21 = $(addprefix $(AMDDALPATH)/dc/resource/dcn21/,$(RESOURCE_DCN21)) + +AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN21) + +############################################################################### + +############################################################################### + +############################################################################### + +RESOURCE_DCN30 = dcn30_resource.o + +AMD_DAL_RESOURCE_DCN30 = $(addprefix $(AMDDALPATH)/dc/resource/dcn30/,$(RESOURCE_DCN30)) + +AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN30) + +############################################################################### + +RESOURCE_DCN301 = dcn301_resource.o + +AMD_DAL_RESOURCE_DCN301 = $(addprefix $(AMDDALPATH)/dc/resource/dcn301/,$(RESOURCE_DCN301)) + +AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN301) + +############################################################################### + +RESOURCE_DCN302 = dcn302_resource.o + +AMD_DAL_RESOURCE_DCN302 = $(addprefix $(AMDDALPATH)/dc/resource/dcn302/,$(RESOURCE_DCN302)) + +AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN302) + +############################################################################### + +RESOURCE_DCN303 = dcn303_resource.o + +AMD_DAL_RESOURCE_DCN303 = $(addprefix $(AMDDALPATH)/dc/resource/dcn303/,$(RESOURCE_DCN303)) + +AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN303) + +############################################################################### + +RESOURCE_DCN31 = dcn31_resource.o + +AMD_DAL_RESOURCE_DCN31 = $(addprefix $(AMDDALPATH)/dc/resource/dcn31/,$(RESOURCE_DCN31)) + +AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN31) + +############################################################################### + +RESOURCE_DCN314 = dcn314_resource.o + +AMD_DAL_RESOURCE_DCN314 = $(addprefix $(AMDDALPATH)/dc/resource/dcn314/,$(RESOURCE_DCN314)) + +AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN314) + +############################################################################### + +RESOURCE_DCN315 = dcn315_resource.o + +AMD_DAL_RESOURCE_DCN315 = $(addprefix $(AMDDALPATH)/dc/resource/dcn315/,$(RESOURCE_DCN315)) + +AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN315) + +############################################################################### + +RESOURCE_DCN316 = dcn316_resource.o + +AMD_DAL_RESOURCE_DCN316 = $(addprefix $(AMDDALPATH)/dc/resource/dcn316/,$(RESOURCE_DCN316)) + +AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN316) + +############################################################################### + +RESOURCE_DCN32 = dcn32_resource.o + +AMD_DAL_RESOURCE_DCN32 = $(addprefix $(AMDDALPATH)/dc/resource/dcn32/,$(RESOURCE_DCN32)) + +AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN32) + +############################################################################### + +RESOURCE_DCN321 = dcn321_resource.o + +AMD_DAL_RESOURCE_DCN321 = $(addprefix $(AMDDALPATH)/dc/resource/dcn321/,$(RESOURCE_DCN321)) + +AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN321) + +############################################################################### + +RESOURCE_DCN35 = dcn35_resource.o + +AMD_DAL_RESOURCE_DCN35 = $(addprefix $(AMDDALPATH)/dc/resource/dcn35/,$(RESOURCE_DCN35)) + +AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN35) + +############################################################################### + +############################################################################### + +endif diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c new file mode 100644 index 000000000000..53a5f4cb648c --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c @@ -0,0 +1,1179 @@ +/* + * Copyright 2012-15 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dm_services.h" + +#include "link_encoder.h" +#include "stream_encoder.h" + +#include "resource.h" +#include "include/irq_service_interface.h" +#include "virtual/virtual_stream_encoder.h" +#include "dce110/dce110_resource.h" +#include "dce110/dce110_timing_generator.h" +#include "irq/dce110/irq_service_dce110.h" +#include "dce/dce_link_encoder.h" +#include "dce/dce_stream_encoder.h" +#include "dce/dce_mem_input.h" +#include "dce/dce_ipp.h" +#include "dce/dce_transform.h" +#include "dce/dce_opp.h" +#include "dce/dce_clock_source.h" +#include "dce/dce_audio.h" +#include "dce/dce_hwseq.h" +#include "dce100/dce100_hwseq.h" +#include "dce/dce_panel_cntl.h" + +#include "reg_helper.h" + +#include "dce/dce_10_0_d.h" +#include "dce/dce_10_0_sh_mask.h" + +#include "dce/dce_dmcu.h" +#include "dce/dce_aux.h" +#include "dce/dce_abm.h" +#include "dce/dce_i2c.h" + +#include "dce100_resource.h" + +#ifndef mmMC_HUB_RDREQ_DMIF_LIMIT +#include "gmc/gmc_8_2_d.h" +#include "gmc/gmc_8_2_sh_mask.h" +#endif + +#ifndef mmDP_DPHY_INTERNAL_CTRL + #define mmDP_DPHY_INTERNAL_CTRL 0x4aa7 + #define mmDP0_DP_DPHY_INTERNAL_CTRL 0x4aa7 + #define mmDP1_DP_DPHY_INTERNAL_CTRL 0x4ba7 + #define mmDP2_DP_DPHY_INTERNAL_CTRL 0x4ca7 + #define mmDP3_DP_DPHY_INTERNAL_CTRL 0x4da7 + #define mmDP4_DP_DPHY_INTERNAL_CTRL 0x4ea7 + #define mmDP5_DP_DPHY_INTERNAL_CTRL 0x4fa7 + #define mmDP6_DP_DPHY_INTERNAL_CTRL 0x54a7 + #define mmDP7_DP_DPHY_INTERNAL_CTRL 0x56a7 + #define mmDP8_DP_DPHY_INTERNAL_CTRL 0x57a7 +#endif + +#ifndef mmBIOS_SCRATCH_2 + #define mmBIOS_SCRATCH_2 0x05CB + #define mmBIOS_SCRATCH_3 0x05CC + #define mmBIOS_SCRATCH_6 0x05CF +#endif + +#ifndef mmDP_DPHY_BS_SR_SWAP_CNTL + #define mmDP_DPHY_BS_SR_SWAP_CNTL 0x4ADC + #define mmDP0_DP_DPHY_BS_SR_SWAP_CNTL 0x4ADC + #define mmDP1_DP_DPHY_BS_SR_SWAP_CNTL 0x4BDC + #define mmDP2_DP_DPHY_BS_SR_SWAP_CNTL 0x4CDC + #define mmDP3_DP_DPHY_BS_SR_SWAP_CNTL 0x4DDC + #define mmDP4_DP_DPHY_BS_SR_SWAP_CNTL 0x4EDC + #define mmDP5_DP_DPHY_BS_SR_SWAP_CNTL 0x4FDC + #define mmDP6_DP_DPHY_BS_SR_SWAP_CNTL 0x54DC +#endif + +#ifndef mmDP_DPHY_FAST_TRAINING + #define mmDP_DPHY_FAST_TRAINING 0x4ABC + #define mmDP0_DP_DPHY_FAST_TRAINING 0x4ABC + #define mmDP1_DP_DPHY_FAST_TRAINING 0x4BBC + #define mmDP2_DP_DPHY_FAST_TRAINING 0x4CBC + #define mmDP3_DP_DPHY_FAST_TRAINING 0x4DBC + #define mmDP4_DP_DPHY_FAST_TRAINING 0x4EBC + #define mmDP5_DP_DPHY_FAST_TRAINING 0x4FBC + #define mmDP6_DP_DPHY_FAST_TRAINING 0x54BC +#endif + +static const struct dce110_timing_generator_offsets dce100_tg_offsets[] = { + { + .crtc = (mmCRTC0_CRTC_CONTROL - mmCRTC_CONTROL), + .dcp = (mmDCP0_GRPH_CONTROL - mmGRPH_CONTROL), + }, + { + .crtc = (mmCRTC1_CRTC_CONTROL - mmCRTC_CONTROL), + .dcp = (mmDCP1_GRPH_CONTROL - mmGRPH_CONTROL), + }, + { + .crtc = (mmCRTC2_CRTC_CONTROL - mmCRTC_CONTROL), + .dcp = (mmDCP2_GRPH_CONTROL - mmGRPH_CONTROL), + }, + { + .crtc = (mmCRTC3_CRTC_CONTROL - mmCRTC_CONTROL), + .dcp = (mmDCP3_GRPH_CONTROL - mmGRPH_CONTROL), + }, + { + .crtc = (mmCRTC4_CRTC_CONTROL - mmCRTC_CONTROL), + .dcp = (mmDCP4_GRPH_CONTROL - mmGRPH_CONTROL), + }, + { + .crtc = (mmCRTC5_CRTC_CONTROL - mmCRTC_CONTROL), + .dcp = (mmDCP5_GRPH_CONTROL - mmGRPH_CONTROL), + } +}; + +/* set register offset */ +#define SR(reg_name)\ + .reg_name = mm ## reg_name + +/* set register offset with instance */ +#define SRI(reg_name, block, id)\ + .reg_name = mm ## block ## id ## _ ## reg_name + +#define ipp_regs(id)\ +[id] = {\ + IPP_DCE100_REG_LIST_DCE_BASE(id)\ +} + +static const struct dce_ipp_registers ipp_regs[] = { + ipp_regs(0), + ipp_regs(1), + ipp_regs(2), + ipp_regs(3), + ipp_regs(4), + ipp_regs(5) +}; + +static const struct dce_ipp_shift ipp_shift = { + IPP_DCE100_MASK_SH_LIST_DCE_COMMON_BASE(__SHIFT) +}; + +static const struct dce_ipp_mask ipp_mask = { + IPP_DCE100_MASK_SH_LIST_DCE_COMMON_BASE(_MASK) +}; + +#define transform_regs(id)\ +[id] = {\ + XFM_COMMON_REG_LIST_DCE100(id)\ +} + +static const struct dce_transform_registers xfm_regs[] = { + transform_regs(0), + transform_regs(1), + transform_regs(2), + transform_regs(3), + transform_regs(4), + transform_regs(5) +}; + +static const struct dce_transform_shift xfm_shift = { + XFM_COMMON_MASK_SH_LIST_DCE110(__SHIFT) +}; + +static const struct dce_transform_mask xfm_mask = { + XFM_COMMON_MASK_SH_LIST_DCE110(_MASK) +}; + +#define aux_regs(id)\ +[id] = {\ + AUX_REG_LIST(id)\ +} + +static const struct dce110_link_enc_aux_registers link_enc_aux_regs[] = { + aux_regs(0), + aux_regs(1), + aux_regs(2), + aux_regs(3), + aux_regs(4), + aux_regs(5) +}; + +#define hpd_regs(id)\ +[id] = {\ + HPD_REG_LIST(id)\ +} + +static const struct dce110_link_enc_hpd_registers link_enc_hpd_regs[] = { + hpd_regs(0), + hpd_regs(1), + hpd_regs(2), + hpd_regs(3), + hpd_regs(4), + hpd_regs(5) +}; + +#define link_regs(id)\ +[id] = {\ + LE_DCE100_REG_LIST(id)\ +} + +static const struct dce110_link_enc_registers link_enc_regs[] = { + link_regs(0), + link_regs(1), + link_regs(2), + link_regs(3), + link_regs(4), + link_regs(5), + link_regs(6), +}; + +#define stream_enc_regs(id)\ +[id] = {\ + SE_COMMON_REG_LIST_DCE_BASE(id),\ + .AFMT_CNTL = 0,\ +} + +static const struct dce110_stream_enc_registers stream_enc_regs[] = { + stream_enc_regs(0), + stream_enc_regs(1), + stream_enc_regs(2), + stream_enc_regs(3), + stream_enc_regs(4), + stream_enc_regs(5), + stream_enc_regs(6) +}; + +static const struct dce_stream_encoder_shift se_shift = { + SE_COMMON_MASK_SH_LIST_DCE80_100(__SHIFT) +}; + +static const struct dce_stream_encoder_mask se_mask = { + SE_COMMON_MASK_SH_LIST_DCE80_100(_MASK) +}; + +static const struct dce_panel_cntl_registers panel_cntl_regs[] = { + { DCE_PANEL_CNTL_REG_LIST() } +}; + +static const struct dce_panel_cntl_shift panel_cntl_shift = { + DCE_PANEL_CNTL_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_panel_cntl_mask panel_cntl_mask = { + DCE_PANEL_CNTL_MASK_SH_LIST(_MASK) +}; + +#define opp_regs(id)\ +[id] = {\ + OPP_DCE_100_REG_LIST(id),\ +} + +static const struct dce_opp_registers opp_regs[] = { + opp_regs(0), + opp_regs(1), + opp_regs(2), + opp_regs(3), + opp_regs(4), + opp_regs(5) +}; + +static const struct dce_opp_shift opp_shift = { + OPP_COMMON_MASK_SH_LIST_DCE_100(__SHIFT) +}; + +static const struct dce_opp_mask opp_mask = { + OPP_COMMON_MASK_SH_LIST_DCE_100(_MASK) +}; +#define aux_engine_regs(id)\ +[id] = {\ + AUX_COMMON_REG_LIST(id), \ + .AUX_RESET_MASK = 0 \ +} + +static const struct dce110_aux_registers aux_engine_regs[] = { + aux_engine_regs(0), + aux_engine_regs(1), + aux_engine_regs(2), + aux_engine_regs(3), + aux_engine_regs(4), + aux_engine_regs(5) +}; + +#define audio_regs(id)\ +[id] = {\ + AUD_COMMON_REG_LIST(id)\ +} + +static const struct dce_audio_registers audio_regs[] = { + audio_regs(0), + audio_regs(1), + audio_regs(2), + audio_regs(3), + audio_regs(4), + audio_regs(5), + audio_regs(6), +}; + +static const struct dce_audio_shift audio_shift = { + AUD_COMMON_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_audio_mask audio_mask = { + AUD_COMMON_MASK_SH_LIST(_MASK) +}; + +#define clk_src_regs(id)\ +[id] = {\ + CS_COMMON_REG_LIST_DCE_100_110(id),\ +} + +static const struct dce110_clk_src_regs clk_src_regs[] = { + clk_src_regs(0), + clk_src_regs(1), + clk_src_regs(2) +}; + +static const struct dce110_clk_src_shift cs_shift = { + CS_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(__SHIFT) +}; + +static const struct dce110_clk_src_mask cs_mask = { + CS_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(_MASK) +}; + +static const struct dce_dmcu_registers dmcu_regs = { + DMCU_DCE110_COMMON_REG_LIST() +}; + +static const struct dce_dmcu_shift dmcu_shift = { + DMCU_MASK_SH_LIST_DCE110(__SHIFT) +}; + +static const struct dce_dmcu_mask dmcu_mask = { + DMCU_MASK_SH_LIST_DCE110(_MASK) +}; + +static const struct dce_abm_registers abm_regs = { + ABM_DCE110_COMMON_REG_LIST() +}; + +static const struct dce_abm_shift abm_shift = { + ABM_MASK_SH_LIST_DCE110(__SHIFT) +}; + +static const struct dce_abm_mask abm_mask = { + ABM_MASK_SH_LIST_DCE110(_MASK) +}; + +#define DCFE_MEM_PWR_CTRL_REG_BASE 0x1b03 + +static const struct bios_registers bios_regs = { + .BIOS_SCRATCH_3 = mmBIOS_SCRATCH_3, + .BIOS_SCRATCH_6 = mmBIOS_SCRATCH_6 +}; + +static const struct resource_caps res_cap = { + .num_timing_generator = 6, + .num_audio = 6, + .num_stream_encoder = 6, + .num_pll = 3, + .num_ddc = 6, +}; + +static const struct dc_plane_cap plane_cap = { + .type = DC_PLANE_TYPE_DCE_RGB, + + .pixel_format_support = { + .argb8888 = true, + .nv12 = false, + .fp16 = true + }, + + .max_upscale_factor = { + .argb8888 = 16000, + .nv12 = 1, + .fp16 = 1 + }, + + .max_downscale_factor = { + .argb8888 = 250, + .nv12 = 1, + .fp16 = 1 + } +}; + +static const struct dc_debug_options debug_defaults = { + .enable_legacy_fast_update = true, +}; + +#define CTX ctx +#define REG(reg) mm ## reg + +#ifndef mmCC_DC_HDMI_STRAPS +#define mmCC_DC_HDMI_STRAPS 0x1918 +#define CC_DC_HDMI_STRAPS__HDMI_DISABLE_MASK 0x40 +#define CC_DC_HDMI_STRAPS__HDMI_DISABLE__SHIFT 0x6 +#define CC_DC_HDMI_STRAPS__AUDIO_STREAM_NUMBER_MASK 0x700 +#define CC_DC_HDMI_STRAPS__AUDIO_STREAM_NUMBER__SHIFT 0x8 +#endif + +static int map_transmitter_id_to_phy_instance( + enum transmitter transmitter) +{ + switch (transmitter) { + case TRANSMITTER_UNIPHY_A: + return 0; + case TRANSMITTER_UNIPHY_B: + return 1; + case TRANSMITTER_UNIPHY_C: + return 2; + case TRANSMITTER_UNIPHY_D: + return 3; + case TRANSMITTER_UNIPHY_E: + return 4; + case TRANSMITTER_UNIPHY_F: + return 5; + case TRANSMITTER_UNIPHY_G: + return 6; + default: + ASSERT(0); + return 0; + } +} + +static void read_dce_straps( + struct dc_context *ctx, + struct resource_straps *straps) +{ + REG_GET_2(CC_DC_HDMI_STRAPS, + HDMI_DISABLE, &straps->hdmi_disable, + AUDIO_STREAM_NUMBER, &straps->audio_stream_number); + + REG_GET(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO, &straps->dc_pinstraps_audio); +} + +static struct audio *create_audio( + struct dc_context *ctx, unsigned int inst) +{ + return dce_audio_create(ctx, inst, + &audio_regs[inst], &audio_shift, &audio_mask); +} + +static struct timing_generator *dce100_timing_generator_create( + struct dc_context *ctx, + uint32_t instance, + const struct dce110_timing_generator_offsets *offsets) +{ + struct dce110_timing_generator *tg110 = + kzalloc(sizeof(struct dce110_timing_generator), GFP_KERNEL); + + if (!tg110) + return NULL; + + dce110_timing_generator_construct(tg110, ctx, instance, offsets); + return &tg110->base; +} + +static struct stream_encoder *dce100_stream_encoder_create( + enum engine_id eng_id, + struct dc_context *ctx) +{ + struct dce110_stream_encoder *enc110 = + kzalloc(sizeof(struct dce110_stream_encoder), GFP_KERNEL); + + if (!enc110) + return NULL; + + dce110_stream_encoder_construct(enc110, ctx, ctx->dc_bios, eng_id, + &stream_enc_regs[eng_id], &se_shift, &se_mask); + return &enc110->base; +} + +#define SRII(reg_name, block, id)\ + .reg_name[id] = mm ## block ## id ## _ ## reg_name + +static const struct dce_hwseq_registers hwseq_reg = { + HWSEQ_DCE10_REG_LIST() +}; + +static const struct dce_hwseq_shift hwseq_shift = { + HWSEQ_DCE10_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_hwseq_mask hwseq_mask = { + HWSEQ_DCE10_MASK_SH_LIST(_MASK) +}; + +static struct dce_hwseq *dce100_hwseq_create( + struct dc_context *ctx) +{ + struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL); + + if (hws) { + hws->ctx = ctx; + hws->regs = &hwseq_reg; + hws->shifts = &hwseq_shift; + hws->masks = &hwseq_mask; + } + return hws; +} + +static const struct resource_create_funcs res_create_funcs = { + .read_dce_straps = read_dce_straps, + .create_audio = create_audio, + .create_stream_encoder = dce100_stream_encoder_create, + .create_hwseq = dce100_hwseq_create, +}; + +#define mi_inst_regs(id) { \ + MI_DCE8_REG_LIST(id), \ + .MC_HUB_RDREQ_DMIF_LIMIT = mmMC_HUB_RDREQ_DMIF_LIMIT \ +} +static const struct dce_mem_input_registers mi_regs[] = { + mi_inst_regs(0), + mi_inst_regs(1), + mi_inst_regs(2), + mi_inst_regs(3), + mi_inst_regs(4), + mi_inst_regs(5), +}; + +static const struct dce_mem_input_shift mi_shifts = { + MI_DCE8_MASK_SH_LIST(__SHIFT), + .ENABLE = MC_HUB_RDREQ_DMIF_LIMIT__ENABLE__SHIFT +}; + +static const struct dce_mem_input_mask mi_masks = { + MI_DCE8_MASK_SH_LIST(_MASK), + .ENABLE = MC_HUB_RDREQ_DMIF_LIMIT__ENABLE_MASK +}; + +static const struct dce110_aux_registers_shift aux_shift = { + DCE10_AUX_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce110_aux_registers_mask aux_mask = { + DCE10_AUX_MASK_SH_LIST(_MASK) +}; + +static struct mem_input *dce100_mem_input_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dce_mem_input *dce_mi = kzalloc(sizeof(struct dce_mem_input), + GFP_KERNEL); + + if (!dce_mi) { + BREAK_TO_DEBUGGER(); + return NULL; + } + + dce_mem_input_construct(dce_mi, ctx, inst, &mi_regs[inst], &mi_shifts, &mi_masks); + dce_mi->wa.single_head_rdreq_dmif_limit = 2; + return &dce_mi->base; +} + +static void dce100_transform_destroy(struct transform **xfm) +{ + kfree(TO_DCE_TRANSFORM(*xfm)); + *xfm = NULL; +} + +static struct transform *dce100_transform_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dce_transform *transform = + kzalloc(sizeof(struct dce_transform), GFP_KERNEL); + + if (!transform) + return NULL; + + dce_transform_construct(transform, ctx, inst, + &xfm_regs[inst], &xfm_shift, &xfm_mask); + return &transform->base; +} + +static struct input_pixel_processor *dce100_ipp_create( + struct dc_context *ctx, uint32_t inst) +{ + struct dce_ipp *ipp = kzalloc(sizeof(struct dce_ipp), GFP_KERNEL); + + if (!ipp) { + BREAK_TO_DEBUGGER(); + return NULL; + } + + dce_ipp_construct(ipp, ctx, inst, + &ipp_regs[inst], &ipp_shift, &ipp_mask); + return &ipp->base; +} + +static const struct encoder_feature_support link_enc_feature = { + .max_hdmi_deep_color = COLOR_DEPTH_121212, + .max_hdmi_pixel_clock = 300000, + .flags.bits.IS_HBR2_CAPABLE = true, + .flags.bits.IS_TPS3_CAPABLE = true +}; + +static struct link_encoder *dce100_link_encoder_create( + struct dc_context *ctx, + const struct encoder_init_data *enc_init_data) +{ + struct dce110_link_encoder *enc110 = + kzalloc(sizeof(struct dce110_link_encoder), GFP_KERNEL); + int link_regs_id; + + if (!enc110) + return NULL; + + link_regs_id = + map_transmitter_id_to_phy_instance(enc_init_data->transmitter); + + dce110_link_encoder_construct(enc110, + enc_init_data, + &link_enc_feature, + &link_enc_regs[link_regs_id], + &link_enc_aux_regs[enc_init_data->channel - 1], + &link_enc_hpd_regs[enc_init_data->hpd_source]); + return &enc110->base; +} + +static struct panel_cntl *dce100_panel_cntl_create(const struct panel_cntl_init_data *init_data) +{ + struct dce_panel_cntl *panel_cntl = + kzalloc(sizeof(struct dce_panel_cntl), GFP_KERNEL); + + if (!panel_cntl) + return NULL; + + dce_panel_cntl_construct(panel_cntl, + init_data, + &panel_cntl_regs[init_data->inst], + &panel_cntl_shift, + &panel_cntl_mask); + + return &panel_cntl->base; +} + +static struct output_pixel_processor *dce100_opp_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dce110_opp *opp = + kzalloc(sizeof(struct dce110_opp), GFP_KERNEL); + + if (!opp) + return NULL; + + dce110_opp_construct(opp, + ctx, inst, &opp_regs[inst], &opp_shift, &opp_mask); + return &opp->base; +} + +static struct dce_aux *dce100_aux_engine_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct aux_engine_dce110 *aux_engine = + kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL); + + if (!aux_engine) + return NULL; + + dce110_aux_engine_construct(aux_engine, ctx, inst, + SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, + &aux_engine_regs[inst], + &aux_mask, + &aux_shift, + ctx->dc->caps.extended_aux_timeout_support); + + return &aux_engine->base; +} +#define i2c_inst_regs(id) { I2C_HW_ENGINE_COMMON_REG_LIST(id) } + +static const struct dce_i2c_registers i2c_hw_regs[] = { + i2c_inst_regs(1), + i2c_inst_regs(2), + i2c_inst_regs(3), + i2c_inst_regs(4), + i2c_inst_regs(5), + i2c_inst_regs(6), +}; + +static const struct dce_i2c_shift i2c_shifts = { + I2C_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(__SHIFT) +}; + +static const struct dce_i2c_mask i2c_masks = { + I2C_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(_MASK) +}; + +static struct dce_i2c_hw *dce100_i2c_hw_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dce_i2c_hw *dce_i2c_hw = + kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL); + + if (!dce_i2c_hw) + return NULL; + + dce100_i2c_hw_construct(dce_i2c_hw, ctx, inst, + &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks); + + return dce_i2c_hw; +} +static struct clock_source *dce100_clock_source_create( + struct dc_context *ctx, + struct dc_bios *bios, + enum clock_source_id id, + const struct dce110_clk_src_regs *regs, + bool dp_clk_src) +{ + struct dce110_clk_src *clk_src = + kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL); + + if (!clk_src) + return NULL; + + if (dce110_clk_src_construct(clk_src, ctx, bios, id, + regs, &cs_shift, &cs_mask)) { + clk_src->base.dp_clk_src = dp_clk_src; + return &clk_src->base; + } + + kfree(clk_src); + BREAK_TO_DEBUGGER(); + return NULL; +} + +static void dce100_clock_source_destroy(struct clock_source **clk_src) +{ + kfree(TO_DCE110_CLK_SRC(*clk_src)); + *clk_src = NULL; +} + +static void dce100_resource_destruct(struct dce110_resource_pool *pool) +{ + unsigned int i; + + for (i = 0; i < pool->base.pipe_count; i++) { + if (pool->base.opps[i] != NULL) + dce110_opp_destroy(&pool->base.opps[i]); + + if (pool->base.transforms[i] != NULL) + dce100_transform_destroy(&pool->base.transforms[i]); + + if (pool->base.ipps[i] != NULL) + dce_ipp_destroy(&pool->base.ipps[i]); + + if (pool->base.mis[i] != NULL) { + kfree(TO_DCE_MEM_INPUT(pool->base.mis[i])); + pool->base.mis[i] = NULL; + } + + if (pool->base.timing_generators[i] != NULL) { + kfree(DCE110TG_FROM_TG(pool->base.timing_generators[i])); + pool->base.timing_generators[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_ddc; i++) { + if (pool->base.engines[i] != NULL) + dce110_engine_destroy(&pool->base.engines[i]); + if (pool->base.hw_i2cs[i] != NULL) { + kfree(pool->base.hw_i2cs[i]); + pool->base.hw_i2cs[i] = NULL; + } + if (pool->base.sw_i2cs[i] != NULL) { + kfree(pool->base.sw_i2cs[i]); + pool->base.sw_i2cs[i] = NULL; + } + } + + for (i = 0; i < pool->base.stream_enc_count; i++) { + if (pool->base.stream_enc[i] != NULL) + kfree(DCE110STRENC_FROM_STRENC(pool->base.stream_enc[i])); + } + + for (i = 0; i < pool->base.clk_src_count; i++) { + if (pool->base.clock_sources[i] != NULL) + dce100_clock_source_destroy(&pool->base.clock_sources[i]); + } + + if (pool->base.dp_clock_source != NULL) + dce100_clock_source_destroy(&pool->base.dp_clock_source); + + for (i = 0; i < pool->base.audio_count; i++) { + if (pool->base.audios[i] != NULL) + dce_aud_destroy(&pool->base.audios[i]); + } + + if (pool->base.abm != NULL) + dce_abm_destroy(&pool->base.abm); + + if (pool->base.dmcu != NULL) + dce_dmcu_destroy(&pool->base.dmcu); + + if (pool->base.irqs != NULL) + dal_irq_service_destroy(&pool->base.irqs); +} + +static enum dc_status build_mapped_resource( + const struct dc *dc, + struct dc_state *context, + struct dc_stream_state *stream) +{ + struct pipe_ctx *pipe_ctx = resource_get_otg_master_for_stream(&context->res_ctx, stream); + + if (!pipe_ctx) + return DC_ERROR_UNEXPECTED; + + dce110_resource_build_pipe_hw_param(pipe_ctx); + + resource_build_info_frame(pipe_ctx); + + return DC_OK; +} + +static bool dce100_validate_bandwidth( + struct dc *dc, + struct dc_state *context, + bool fast_validate) +{ + int i; + bool at_least_one_pipe = false; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + if (context->res_ctx.pipe_ctx[i].stream) + at_least_one_pipe = true; + } + + if (at_least_one_pipe) { + /* TODO implement when needed but for now hardcode max value*/ + context->bw_ctx.bw.dce.dispclk_khz = 681000; + context->bw_ctx.bw.dce.yclk_khz = 250000 * MEMORY_TYPE_MULTIPLIER_CZ; + } else { + context->bw_ctx.bw.dce.dispclk_khz = 0; + context->bw_ctx.bw.dce.yclk_khz = 0; + } + + return true; +} + +static bool dce100_validate_surface_sets( + struct dc_state *context) +{ + int i; + + for (i = 0; i < context->stream_count; i++) { + if (context->stream_status[i].plane_count == 0) + continue; + + if (context->stream_status[i].plane_count > 1) + return false; + + if (context->stream_status[i].plane_states[0]->format + >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) + return false; + } + + return true; +} + +static enum dc_status dce100_validate_global( + struct dc *dc, + struct dc_state *context) +{ + if (!dce100_validate_surface_sets(context)) + return DC_FAIL_SURFACE_VALIDATE; + + return DC_OK; +} + +enum dc_status dce100_add_stream_to_ctx( + struct dc *dc, + struct dc_state *new_ctx, + struct dc_stream_state *dc_stream) +{ + enum dc_status result = DC_ERROR_UNEXPECTED; + + result = resource_map_pool_resources(dc, new_ctx, dc_stream); + + if (result == DC_OK) + result = resource_map_clock_resources(dc, new_ctx, dc_stream); + + if (result == DC_OK) + result = build_mapped_resource(dc, new_ctx, dc_stream); + + return result; +} + +static void dce100_destroy_resource_pool(struct resource_pool **pool) +{ + struct dce110_resource_pool *dce110_pool = TO_DCE110_RES_POOL(*pool); + + dce100_resource_destruct(dce110_pool); + kfree(dce110_pool); + *pool = NULL; +} + +enum dc_status dce100_validate_plane(const struct dc_plane_state *plane_state, struct dc_caps *caps) +{ + + if (plane_state->format < SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) + return DC_OK; + + return DC_FAIL_SURFACE_VALIDATE; +} + +struct stream_encoder *dce100_find_first_free_match_stream_enc_for_link( + struct resource_context *res_ctx, + const struct resource_pool *pool, + struct dc_stream_state *stream) +{ + int i; + int j = -1; + struct dc_link *link = stream->link; + + for (i = 0; i < pool->stream_enc_count; i++) { + if (!res_ctx->is_stream_enc_acquired[i] && + pool->stream_enc[i]) { + /* Store first available for MST second display + * in daisy chain use case + */ + j = i; + if (pool->stream_enc[i]->id == + link->link_enc->preferred_engine) + return pool->stream_enc[i]; + } + } + + /* + * below can happen in cases when stream encoder is acquired: + * 1) for second MST display in chain, so preferred engine already + * acquired; + * 2) for another link, which preferred engine already acquired by any + * MST configuration. + * + * If signal is of DP type and preferred engine not found, return last available + * + * TODO - This is just a patch up and a generic solution is + * required for non DP connectors. + */ + + if (j >= 0 && link->connector_signal == SIGNAL_TYPE_DISPLAY_PORT) + return pool->stream_enc[j]; + + return NULL; +} + +static const struct resource_funcs dce100_res_pool_funcs = { + .destroy = dce100_destroy_resource_pool, + .link_enc_create = dce100_link_encoder_create, + .panel_cntl_create = dce100_panel_cntl_create, + .validate_bandwidth = dce100_validate_bandwidth, + .validate_plane = dce100_validate_plane, + .add_stream_to_ctx = dce100_add_stream_to_ctx, + .validate_global = dce100_validate_global, + .find_first_free_match_stream_enc_for_link = dce100_find_first_free_match_stream_enc_for_link +}; + +static bool dce100_resource_construct( + uint8_t num_virtual_links, + struct dc *dc, + struct dce110_resource_pool *pool) +{ + unsigned int i; + struct dc_context *ctx = dc->ctx; + struct dc_bios *bp; + + ctx->dc_bios->regs = &bios_regs; + + pool->base.res_cap = &res_cap; + pool->base.funcs = &dce100_res_pool_funcs; + pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE; + + bp = ctx->dc_bios; + + if (bp->fw_info_valid && bp->fw_info.external_clock_source_frequency_for_dp != 0) { + pool->base.dp_clock_source = + dce100_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_EXTERNAL, NULL, true); + + pool->base.clock_sources[0] = + dce100_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL0, &clk_src_regs[0], false); + pool->base.clock_sources[1] = + dce100_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL1, &clk_src_regs[1], false); + pool->base.clock_sources[2] = + dce100_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL2, &clk_src_regs[2], false); + pool->base.clk_src_count = 3; + + } else { + pool->base.dp_clock_source = + dce100_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL0, &clk_src_regs[0], true); + + pool->base.clock_sources[0] = + dce100_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL1, &clk_src_regs[1], false); + pool->base.clock_sources[1] = + dce100_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL2, &clk_src_regs[2], false); + pool->base.clk_src_count = 2; + } + + if (pool->base.dp_clock_source == NULL) { + dm_error("DC: failed to create dp clock source!\n"); + BREAK_TO_DEBUGGER(); + goto res_create_fail; + } + + for (i = 0; i < pool->base.clk_src_count; i++) { + if (pool->base.clock_sources[i] == NULL) { + dm_error("DC: failed to create clock sources!\n"); + BREAK_TO_DEBUGGER(); + goto res_create_fail; + } + } + + pool->base.dmcu = dce_dmcu_create(ctx, + &dmcu_regs, + &dmcu_shift, + &dmcu_mask); + if (pool->base.dmcu == NULL) { + dm_error("DC: failed to create dmcu!\n"); + BREAK_TO_DEBUGGER(); + goto res_create_fail; + } + + pool->base.abm = dce_abm_create(ctx, + &abm_regs, + &abm_shift, + &abm_mask); + if (pool->base.abm == NULL) { + dm_error("DC: failed to create abm!\n"); + BREAK_TO_DEBUGGER(); + goto res_create_fail; + } + + { + struct irq_service_init_data init_data; + init_data.ctx = dc->ctx; + pool->base.irqs = dal_irq_service_dce110_create(&init_data); + if (!pool->base.irqs) + goto res_create_fail; + } + + /************************************************* + * Resource + asic cap harcoding * + *************************************************/ + pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE; + pool->base.pipe_count = res_cap.num_timing_generator; + pool->base.timing_generator_count = pool->base.res_cap->num_timing_generator; + dc->caps.max_downscale_ratio = 200; + dc->caps.i2c_speed_in_khz = 40; + dc->caps.i2c_speed_in_khz = 40; + dc->caps.max_cursor_size = 128; + dc->caps.min_horizontal_blanking_period = 80; + dc->caps.dual_link_dvi = true; + dc->caps.disable_dp_clk_share = true; + dc->caps.extended_aux_timeout_support = false; + dc->debug = debug_defaults; + + for (i = 0; i < pool->base.pipe_count; i++) { + pool->base.timing_generators[i] = + dce100_timing_generator_create( + ctx, + i, + &dce100_tg_offsets[i]); + if (pool->base.timing_generators[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create tg!\n"); + goto res_create_fail; + } + + pool->base.mis[i] = dce100_mem_input_create(ctx, i); + if (pool->base.mis[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create memory input!\n"); + goto res_create_fail; + } + + pool->base.ipps[i] = dce100_ipp_create(ctx, i); + if (pool->base.ipps[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create input pixel processor!\n"); + goto res_create_fail; + } + + pool->base.transforms[i] = dce100_transform_create(ctx, i); + if (pool->base.transforms[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create transform!\n"); + goto res_create_fail; + } + + pool->base.opps[i] = dce100_opp_create(ctx, i); + if (pool->base.opps[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create output pixel processor!\n"); + goto res_create_fail; + } + } + + for (i = 0; i < pool->base.res_cap->num_ddc; i++) { + pool->base.engines[i] = dce100_aux_engine_create(ctx, i); + if (pool->base.engines[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC:failed to create aux engine!!\n"); + goto res_create_fail; + } + pool->base.hw_i2cs[i] = dce100_i2c_hw_create(ctx, i); + if (pool->base.hw_i2cs[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC:failed to create i2c engine!!\n"); + goto res_create_fail; + } + pool->base.sw_i2cs[i] = NULL; + } + + dc->caps.max_planes = pool->base.pipe_count; + + for (i = 0; i < dc->caps.max_planes; ++i) + dc->caps.planes[i] = plane_cap; + + if (!resource_construct(num_virtual_links, dc, &pool->base, + &res_create_funcs)) + goto res_create_fail; + + /* Create hardware sequencer */ + dce100_hw_sequencer_construct(dc); + return true; + +res_create_fail: + dce100_resource_destruct(pool); + + return false; +} + +struct resource_pool *dce100_create_resource_pool( + uint8_t num_virtual_links, + struct dc *dc) +{ + struct dce110_resource_pool *pool = + kzalloc(sizeof(struct dce110_resource_pool), GFP_KERNEL); + + if (!pool) + return NULL; + + if (dce100_resource_construct(num_virtual_links, dc, pool)) + return &pool->base; + + kfree(pool); + BREAK_TO_DEBUGGER(); + return NULL; +} + diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.h new file mode 100644 index 000000000000..fecab7c560f5 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.h @@ -0,0 +1,54 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * + */ +/* + * dce100_resource.h + * + * Created on: 2016-01-20 + * Author: qyang + */ + +#ifndef DCE100_RESOURCE_H_ +#define DCE100_RESOURCE_H_ + +struct dc; +struct resource_pool; +struct dc_validation_set; + +struct resource_pool *dce100_create_resource_pool( + uint8_t num_virtual_links, + struct dc *dc); + +enum dc_status dce100_validate_plane(const struct dc_plane_state *plane_state, struct dc_caps *caps); + +enum dc_status dce100_add_stream_to_ctx( + struct dc *dc, + struct dc_state *new_ctx, + struct dc_stream_state *dc_stream); + +struct stream_encoder *dce100_find_first_free_match_stream_enc_for_link( + struct resource_context *res_ctx, + const struct resource_pool *pool, + struct dc_stream_state *stream); + +#endif /* DCE100_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c new file mode 100644 index 000000000000..fe518fd27b08 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c @@ -0,0 +1,1551 @@ +/* + * Copyright 2012-15 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dm_services.h" + +#include "link_encoder.h" +#include "stream_encoder.h" + +#include "resource.h" +#include "dce110/dce110_resource.h" +#include "include/irq_service_interface.h" +#include "dce/dce_audio.h" +#include "dce110/dce110_timing_generator.h" +#include "irq/dce110/irq_service_dce110.h" +#include "dce110/dce110_timing_generator_v.h" +#include "dce/dce_link_encoder.h" +#include "dce/dce_stream_encoder.h" +#include "dce/dce_mem_input.h" +#include "dce110/dce110_mem_input_v.h" +#include "dce/dce_ipp.h" +#include "dce/dce_transform.h" +#include "dce110/dce110_transform_v.h" +#include "dce/dce_opp.h" +#include "dce110/dce110_opp_v.h" +#include "dce/dce_clock_source.h" +#include "dce/dce_hwseq.h" +#include "dce110/dce110_hwseq.h" +#include "dce/dce_aux.h" +#include "dce/dce_abm.h" +#include "dce/dce_dmcu.h" +#include "dce/dce_i2c.h" +#include "dce/dce_panel_cntl.h" + +#define DC_LOGGER \ + dc->ctx->logger + +#include "dce110/dce110_compressor.h" + +#include "reg_helper.h" + +#include "dce/dce_11_0_d.h" +#include "dce/dce_11_0_sh_mask.h" + +#ifndef mmMC_HUB_RDREQ_DMIF_LIMIT +#include "gmc/gmc_8_2_d.h" +#include "gmc/gmc_8_2_sh_mask.h" +#endif + +#ifndef mmDP_DPHY_INTERNAL_CTRL + #define mmDP_DPHY_INTERNAL_CTRL 0x4aa7 + #define mmDP0_DP_DPHY_INTERNAL_CTRL 0x4aa7 + #define mmDP1_DP_DPHY_INTERNAL_CTRL 0x4ba7 + #define mmDP2_DP_DPHY_INTERNAL_CTRL 0x4ca7 + #define mmDP3_DP_DPHY_INTERNAL_CTRL 0x4da7 + #define mmDP4_DP_DPHY_INTERNAL_CTRL 0x4ea7 + #define mmDP5_DP_DPHY_INTERNAL_CTRL 0x4fa7 + #define mmDP6_DP_DPHY_INTERNAL_CTRL 0x54a7 + #define mmDP7_DP_DPHY_INTERNAL_CTRL 0x56a7 + #define mmDP8_DP_DPHY_INTERNAL_CTRL 0x57a7 +#endif + +#ifndef mmBIOS_SCRATCH_2 + #define mmBIOS_SCRATCH_2 0x05CB + #define mmBIOS_SCRATCH_3 0x05CC + #define mmBIOS_SCRATCH_6 0x05CF +#endif + +#ifndef mmDP_DPHY_BS_SR_SWAP_CNTL + #define mmDP_DPHY_BS_SR_SWAP_CNTL 0x4ADC + #define mmDP0_DP_DPHY_BS_SR_SWAP_CNTL 0x4ADC + #define mmDP1_DP_DPHY_BS_SR_SWAP_CNTL 0x4BDC + #define mmDP2_DP_DPHY_BS_SR_SWAP_CNTL 0x4CDC + #define mmDP3_DP_DPHY_BS_SR_SWAP_CNTL 0x4DDC + #define mmDP4_DP_DPHY_BS_SR_SWAP_CNTL 0x4EDC + #define mmDP5_DP_DPHY_BS_SR_SWAP_CNTL 0x4FDC + #define mmDP6_DP_DPHY_BS_SR_SWAP_CNTL 0x54DC +#endif + +#ifndef mmDP_DPHY_FAST_TRAINING + #define mmDP_DPHY_FAST_TRAINING 0x4ABC + #define mmDP0_DP_DPHY_FAST_TRAINING 0x4ABC + #define mmDP1_DP_DPHY_FAST_TRAINING 0x4BBC + #define mmDP2_DP_DPHY_FAST_TRAINING 0x4CBC + #define mmDP3_DP_DPHY_FAST_TRAINING 0x4DBC + #define mmDP4_DP_DPHY_FAST_TRAINING 0x4EBC + #define mmDP5_DP_DPHY_FAST_TRAINING 0x4FBC + #define mmDP6_DP_DPHY_FAST_TRAINING 0x54BC +#endif + +#ifndef DPHY_RX_FAST_TRAINING_CAPABLE + #define DPHY_RX_FAST_TRAINING_CAPABLE 0x1 +#endif + +static const struct dce110_timing_generator_offsets dce110_tg_offsets[] = { + { + .crtc = (mmCRTC0_CRTC_CONTROL - mmCRTC_CONTROL), + .dcp = (mmDCP0_GRPH_CONTROL - mmGRPH_CONTROL), + }, + { + .crtc = (mmCRTC1_CRTC_CONTROL - mmCRTC_CONTROL), + .dcp = (mmDCP1_GRPH_CONTROL - mmGRPH_CONTROL), + }, + { + .crtc = (mmCRTC2_CRTC_CONTROL - mmCRTC_CONTROL), + .dcp = (mmDCP2_GRPH_CONTROL - mmGRPH_CONTROL), + }, + { + .crtc = (mmCRTC3_CRTC_CONTROL - mmCRTC_CONTROL), + .dcp = (mmDCP3_GRPH_CONTROL - mmGRPH_CONTROL), + }, + { + .crtc = (mmCRTC4_CRTC_CONTROL - mmCRTC_CONTROL), + .dcp = (mmDCP4_GRPH_CONTROL - mmGRPH_CONTROL), + }, + { + .crtc = (mmCRTC5_CRTC_CONTROL - mmCRTC_CONTROL), + .dcp = (mmDCP5_GRPH_CONTROL - mmGRPH_CONTROL), + } +}; + +/* set register offset */ +#define SR(reg_name)\ + .reg_name = mm ## reg_name + +/* set register offset with instance */ +#define SRI(reg_name, block, id)\ + .reg_name = mm ## block ## id ## _ ## reg_name + +static const struct dce_dmcu_registers dmcu_regs = { + DMCU_DCE110_COMMON_REG_LIST() +}; + +static const struct dce_dmcu_shift dmcu_shift = { + DMCU_MASK_SH_LIST_DCE110(__SHIFT) +}; + +static const struct dce_dmcu_mask dmcu_mask = { + DMCU_MASK_SH_LIST_DCE110(_MASK) +}; + +static const struct dce_abm_registers abm_regs = { + ABM_DCE110_COMMON_REG_LIST() +}; + +static const struct dce_abm_shift abm_shift = { + ABM_MASK_SH_LIST_DCE110(__SHIFT) +}; + +static const struct dce_abm_mask abm_mask = { + ABM_MASK_SH_LIST_DCE110(_MASK) +}; + +#define ipp_regs(id)\ +[id] = {\ + IPP_DCE110_REG_LIST_DCE_BASE(id)\ +} + +static const struct dce_ipp_registers ipp_regs[] = { + ipp_regs(0), + ipp_regs(1), + ipp_regs(2) +}; + +static const struct dce_ipp_shift ipp_shift = { + IPP_DCE100_MASK_SH_LIST_DCE_COMMON_BASE(__SHIFT) +}; + +static const struct dce_ipp_mask ipp_mask = { + IPP_DCE100_MASK_SH_LIST_DCE_COMMON_BASE(_MASK) +}; + +#define transform_regs(id)\ +[id] = {\ + XFM_COMMON_REG_LIST_DCE110(id)\ +} + +static const struct dce_transform_registers xfm_regs[] = { + transform_regs(0), + transform_regs(1), + transform_regs(2) +}; + +static const struct dce_transform_shift xfm_shift = { + XFM_COMMON_MASK_SH_LIST_DCE110(__SHIFT) +}; + +static const struct dce_transform_mask xfm_mask = { + XFM_COMMON_MASK_SH_LIST_DCE110(_MASK) +}; + +#define aux_regs(id)\ +[id] = {\ + AUX_REG_LIST(id)\ +} + +static const struct dce110_link_enc_aux_registers link_enc_aux_regs[] = { + aux_regs(0), + aux_regs(1), + aux_regs(2), + aux_regs(3), + aux_regs(4), + aux_regs(5) +}; + +#define hpd_regs(id)\ +[id] = {\ + HPD_REG_LIST(id)\ +} + +static const struct dce110_link_enc_hpd_registers link_enc_hpd_regs[] = { + hpd_regs(0), + hpd_regs(1), + hpd_regs(2), + hpd_regs(3), + hpd_regs(4), + hpd_regs(5) +}; + + +#define link_regs(id)\ +[id] = {\ + LE_DCE110_REG_LIST(id)\ +} + +static const struct dce110_link_enc_registers link_enc_regs[] = { + link_regs(0), + link_regs(1), + link_regs(2), + link_regs(3), + link_regs(4), + link_regs(5), + link_regs(6), +}; + +#define stream_enc_regs(id)\ +[id] = {\ + SE_COMMON_REG_LIST(id),\ + .TMDS_CNTL = 0,\ +} + +static const struct dce110_stream_enc_registers stream_enc_regs[] = { + stream_enc_regs(0), + stream_enc_regs(1), + stream_enc_regs(2) +}; + +static const struct dce_stream_encoder_shift se_shift = { + SE_COMMON_MASK_SH_LIST_DCE110(__SHIFT) +}; + +static const struct dce_stream_encoder_mask se_mask = { + SE_COMMON_MASK_SH_LIST_DCE110(_MASK) +}; + +static const struct dce_panel_cntl_registers panel_cntl_regs[] = { + { DCE_PANEL_CNTL_REG_LIST() } +}; + +static const struct dce_panel_cntl_shift panel_cntl_shift = { + DCE_PANEL_CNTL_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_panel_cntl_mask panel_cntl_mask = { + DCE_PANEL_CNTL_MASK_SH_LIST(_MASK) +}; + +static const struct dce110_aux_registers_shift aux_shift = { + DCE_AUX_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce110_aux_registers_mask aux_mask = { + DCE_AUX_MASK_SH_LIST(_MASK) +}; + +#define opp_regs(id)\ +[id] = {\ + OPP_DCE_110_REG_LIST(id),\ +} + +static const struct dce_opp_registers opp_regs[] = { + opp_regs(0), + opp_regs(1), + opp_regs(2), + opp_regs(3), + opp_regs(4), + opp_regs(5) +}; + +static const struct dce_opp_shift opp_shift = { + OPP_COMMON_MASK_SH_LIST_DCE_110(__SHIFT) +}; + +static const struct dce_opp_mask opp_mask = { + OPP_COMMON_MASK_SH_LIST_DCE_110(_MASK) +}; + +#define aux_engine_regs(id)\ +[id] = {\ + AUX_COMMON_REG_LIST(id), \ + .AUX_RESET_MASK = 0 \ +} + +static const struct dce110_aux_registers aux_engine_regs[] = { + aux_engine_regs(0), + aux_engine_regs(1), + aux_engine_regs(2), + aux_engine_regs(3), + aux_engine_regs(4), + aux_engine_regs(5) +}; + +#define audio_regs(id)\ +[id] = {\ + AUD_COMMON_REG_LIST(id)\ +} + +static const struct dce_audio_registers audio_regs[] = { + audio_regs(0), + audio_regs(1), + audio_regs(2), + audio_regs(3), + audio_regs(4), + audio_regs(5), + audio_regs(6), +}; + +static const struct dce_audio_shift audio_shift = { + AUD_COMMON_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_audio_mask audio_mask = { + AUD_COMMON_MASK_SH_LIST(_MASK) +}; + +/* AG TBD Needs to be reduced back to 3 pipes once dce10 hw sequencer implemented. */ + + +#define clk_src_regs(id)\ +[id] = {\ + CS_COMMON_REG_LIST_DCE_100_110(id),\ +} + +static const struct dce110_clk_src_regs clk_src_regs[] = { + clk_src_regs(0), + clk_src_regs(1), + clk_src_regs(2) +}; + +static const struct dce110_clk_src_shift cs_shift = { + CS_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(__SHIFT) +}; + +static const struct dce110_clk_src_mask cs_mask = { + CS_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(_MASK) +}; + +static const struct bios_registers bios_regs = { + .BIOS_SCRATCH_3 = mmBIOS_SCRATCH_3, + .BIOS_SCRATCH_6 = mmBIOS_SCRATCH_6 +}; + +static const struct resource_caps carrizo_resource_cap = { + .num_timing_generator = 3, + .num_video_plane = 1, + .num_audio = 3, + .num_stream_encoder = 3, + .num_pll = 2, + .num_ddc = 3, +}; + +static const struct resource_caps stoney_resource_cap = { + .num_timing_generator = 2, + .num_video_plane = 1, + .num_audio = 3, + .num_stream_encoder = 3, + .num_pll = 2, + .num_ddc = 3, +}; + +static const struct dc_plane_cap plane_cap = { + .type = DC_PLANE_TYPE_DCE_RGB, + .per_pixel_alpha = 1, + + .pixel_format_support = { + .argb8888 = true, + .nv12 = false, + .fp16 = true + }, + + .max_upscale_factor = { + .argb8888 = 16000, + .nv12 = 1, + .fp16 = 1 + }, + + .max_downscale_factor = { + .argb8888 = 250, + .nv12 = 1, + .fp16 = 1 + }, + 64, + 64 +}; + +static const struct dc_debug_options debug_defaults = { + .enable_legacy_fast_update = true, +}; + +static const struct dc_plane_cap underlay_plane_cap = { + .type = DC_PLANE_TYPE_DCE_UNDERLAY, + .per_pixel_alpha = 1, + + .pixel_format_support = { + .argb8888 = false, + .nv12 = true, + .fp16 = false + }, + + .max_upscale_factor = { + .argb8888 = 1, + .nv12 = 16000, + .fp16 = 1 + }, + + .max_downscale_factor = { + .argb8888 = 1, + .nv12 = 250, + .fp16 = 1 + }, + 64, + 64 +}; + +#define CTX ctx +#define REG(reg) mm ## reg + +#ifndef mmCC_DC_HDMI_STRAPS +#define mmCC_DC_HDMI_STRAPS 0x4819 +#define CC_DC_HDMI_STRAPS__HDMI_DISABLE_MASK 0x40 +#define CC_DC_HDMI_STRAPS__HDMI_DISABLE__SHIFT 0x6 +#define CC_DC_HDMI_STRAPS__AUDIO_STREAM_NUMBER_MASK 0x700 +#define CC_DC_HDMI_STRAPS__AUDIO_STREAM_NUMBER__SHIFT 0x8 +#endif + +static int map_transmitter_id_to_phy_instance( + enum transmitter transmitter) +{ + switch (transmitter) { + case TRANSMITTER_UNIPHY_A: + return 0; + case TRANSMITTER_UNIPHY_B: + return 1; + case TRANSMITTER_UNIPHY_C: + return 2; + case TRANSMITTER_UNIPHY_D: + return 3; + case TRANSMITTER_UNIPHY_E: + return 4; + case TRANSMITTER_UNIPHY_F: + return 5; + case TRANSMITTER_UNIPHY_G: + return 6; + default: + ASSERT(0); + return 0; + } +} + +static void read_dce_straps( + struct dc_context *ctx, + struct resource_straps *straps) +{ + REG_GET_2(CC_DC_HDMI_STRAPS, + HDMI_DISABLE, &straps->hdmi_disable, + AUDIO_STREAM_NUMBER, &straps->audio_stream_number); + + REG_GET(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO, &straps->dc_pinstraps_audio); +} + +static struct audio *create_audio( + struct dc_context *ctx, unsigned int inst) +{ + return dce_audio_create(ctx, inst, + &audio_regs[inst], &audio_shift, &audio_mask); +} + +static struct timing_generator *dce110_timing_generator_create( + struct dc_context *ctx, + uint32_t instance, + const struct dce110_timing_generator_offsets *offsets) +{ + struct dce110_timing_generator *tg110 = + kzalloc(sizeof(struct dce110_timing_generator), GFP_KERNEL); + + if (!tg110) + return NULL; + + dce110_timing_generator_construct(tg110, ctx, instance, offsets); + return &tg110->base; +} + +static struct stream_encoder *dce110_stream_encoder_create( + enum engine_id eng_id, + struct dc_context *ctx) +{ + struct dce110_stream_encoder *enc110 = + kzalloc(sizeof(struct dce110_stream_encoder), GFP_KERNEL); + + if (!enc110) + return NULL; + + dce110_stream_encoder_construct(enc110, ctx, ctx->dc_bios, eng_id, + &stream_enc_regs[eng_id], + &se_shift, &se_mask); + return &enc110->base; +} + +#define SRII(reg_name, block, id)\ + .reg_name[id] = mm ## block ## id ## _ ## reg_name + +static const struct dce_hwseq_registers hwseq_stoney_reg = { + HWSEQ_ST_REG_LIST() +}; + +static const struct dce_hwseq_registers hwseq_cz_reg = { + HWSEQ_CZ_REG_LIST() +}; + +static const struct dce_hwseq_shift hwseq_shift = { + HWSEQ_DCE11_MASK_SH_LIST(__SHIFT), +}; + +static const struct dce_hwseq_mask hwseq_mask = { + HWSEQ_DCE11_MASK_SH_LIST(_MASK), +}; + +static struct dce_hwseq *dce110_hwseq_create( + struct dc_context *ctx) +{ + struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL); + + if (hws) { + hws->ctx = ctx; + hws->regs = ASIC_REV_IS_STONEY(ctx->asic_id.hw_internal_rev) ? + &hwseq_stoney_reg : &hwseq_cz_reg; + hws->shifts = &hwseq_shift; + hws->masks = &hwseq_mask; + hws->wa.blnd_crtc_trigger = true; + } + return hws; +} + +static const struct resource_create_funcs res_create_funcs = { + .read_dce_straps = read_dce_straps, + .create_audio = create_audio, + .create_stream_encoder = dce110_stream_encoder_create, + .create_hwseq = dce110_hwseq_create, +}; + +#define mi_inst_regs(id) { \ + MI_DCE11_REG_LIST(id), \ + .MC_HUB_RDREQ_DMIF_LIMIT = mmMC_HUB_RDREQ_DMIF_LIMIT \ +} +static const struct dce_mem_input_registers mi_regs[] = { + mi_inst_regs(0), + mi_inst_regs(1), + mi_inst_regs(2), +}; + +static const struct dce_mem_input_shift mi_shifts = { + MI_DCE11_MASK_SH_LIST(__SHIFT), + .ENABLE = MC_HUB_RDREQ_DMIF_LIMIT__ENABLE__SHIFT +}; + +static const struct dce_mem_input_mask mi_masks = { + MI_DCE11_MASK_SH_LIST(_MASK), + .ENABLE = MC_HUB_RDREQ_DMIF_LIMIT__ENABLE_MASK +}; + + +static struct mem_input *dce110_mem_input_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dce_mem_input *dce_mi = kzalloc(sizeof(struct dce_mem_input), + GFP_KERNEL); + + if (!dce_mi) { + BREAK_TO_DEBUGGER(); + return NULL; + } + + dce_mem_input_construct(dce_mi, ctx, inst, &mi_regs[inst], &mi_shifts, &mi_masks); + dce_mi->wa.single_head_rdreq_dmif_limit = 3; + return &dce_mi->base; +} + +static void dce110_transform_destroy(struct transform **xfm) +{ + kfree(TO_DCE_TRANSFORM(*xfm)); + *xfm = NULL; +} + +static struct transform *dce110_transform_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dce_transform *transform = + kzalloc(sizeof(struct dce_transform), GFP_KERNEL); + + if (!transform) + return NULL; + + dce_transform_construct(transform, ctx, inst, + &xfm_regs[inst], &xfm_shift, &xfm_mask); + return &transform->base; +} + +static struct input_pixel_processor *dce110_ipp_create( + struct dc_context *ctx, uint32_t inst) +{ + struct dce_ipp *ipp = kzalloc(sizeof(struct dce_ipp), GFP_KERNEL); + + if (!ipp) { + BREAK_TO_DEBUGGER(); + return NULL; + } + + dce_ipp_construct(ipp, ctx, inst, + &ipp_regs[inst], &ipp_shift, &ipp_mask); + return &ipp->base; +} + +static const struct encoder_feature_support link_enc_feature = { + .max_hdmi_deep_color = COLOR_DEPTH_121212, + .max_hdmi_pixel_clock = 300000, + .flags.bits.IS_HBR2_CAPABLE = true, + .flags.bits.IS_TPS3_CAPABLE = true +}; + +static struct link_encoder *dce110_link_encoder_create( + struct dc_context *ctx, + const struct encoder_init_data *enc_init_data) +{ + struct dce110_link_encoder *enc110 = + kzalloc(sizeof(struct dce110_link_encoder), GFP_KERNEL); + int link_regs_id; + + if (!enc110) + return NULL; + + link_regs_id = + map_transmitter_id_to_phy_instance(enc_init_data->transmitter); + + dce110_link_encoder_construct(enc110, + enc_init_data, + &link_enc_feature, + &link_enc_regs[link_regs_id], + &link_enc_aux_regs[enc_init_data->channel - 1], + &link_enc_hpd_regs[enc_init_data->hpd_source]); + return &enc110->base; +} + +static struct panel_cntl *dce110_panel_cntl_create(const struct panel_cntl_init_data *init_data) +{ + struct dce_panel_cntl *panel_cntl = + kzalloc(sizeof(struct dce_panel_cntl), GFP_KERNEL); + + if (!panel_cntl) + return NULL; + + dce_panel_cntl_construct(panel_cntl, + init_data, + &panel_cntl_regs[init_data->inst], + &panel_cntl_shift, + &panel_cntl_mask); + + return &panel_cntl->base; +} + +static struct output_pixel_processor *dce110_opp_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dce110_opp *opp = + kzalloc(sizeof(struct dce110_opp), GFP_KERNEL); + + if (!opp) + return NULL; + + dce110_opp_construct(opp, + ctx, inst, &opp_regs[inst], &opp_shift, &opp_mask); + return &opp->base; +} + +static struct dce_aux *dce110_aux_engine_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct aux_engine_dce110 *aux_engine = + kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL); + + if (!aux_engine) + return NULL; + + dce110_aux_engine_construct(aux_engine, ctx, inst, + SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, + &aux_engine_regs[inst], + &aux_mask, + &aux_shift, + ctx->dc->caps.extended_aux_timeout_support); + + return &aux_engine->base; +} +#define i2c_inst_regs(id) { I2C_HW_ENGINE_COMMON_REG_LIST(id) } + +static const struct dce_i2c_registers i2c_hw_regs[] = { + i2c_inst_regs(1), + i2c_inst_regs(2), + i2c_inst_regs(3), + i2c_inst_regs(4), + i2c_inst_regs(5), + i2c_inst_regs(6), +}; + +static const struct dce_i2c_shift i2c_shifts = { + I2C_COMMON_MASK_SH_LIST_DCE110(__SHIFT) +}; + +static const struct dce_i2c_mask i2c_masks = { + I2C_COMMON_MASK_SH_LIST_DCE110(_MASK) +}; + +static struct dce_i2c_hw *dce110_i2c_hw_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dce_i2c_hw *dce_i2c_hw = + kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL); + + if (!dce_i2c_hw) + return NULL; + + dce100_i2c_hw_construct(dce_i2c_hw, ctx, inst, + &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks); + + return dce_i2c_hw; +} +static struct clock_source *dce110_clock_source_create( + struct dc_context *ctx, + struct dc_bios *bios, + enum clock_source_id id, + const struct dce110_clk_src_regs *regs, + bool dp_clk_src) +{ + struct dce110_clk_src *clk_src = + kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL); + + if (!clk_src) + return NULL; + + if (dce110_clk_src_construct(clk_src, ctx, bios, id, + regs, &cs_shift, &cs_mask)) { + clk_src->base.dp_clk_src = dp_clk_src; + return &clk_src->base; + } + + kfree(clk_src); + BREAK_TO_DEBUGGER(); + return NULL; +} + +static void dce110_clock_source_destroy(struct clock_source **clk_src) +{ + struct dce110_clk_src *dce110_clk_src; + + if (!clk_src) + return; + + dce110_clk_src = TO_DCE110_CLK_SRC(*clk_src); + + kfree(dce110_clk_src->dp_ss_params); + kfree(dce110_clk_src->hdmi_ss_params); + kfree(dce110_clk_src->dvi_ss_params); + + kfree(dce110_clk_src); + *clk_src = NULL; +} + +static void dce110_resource_destruct(struct dce110_resource_pool *pool) +{ + unsigned int i; + + for (i = 0; i < pool->base.pipe_count; i++) { + if (pool->base.opps[i] != NULL) + dce110_opp_destroy(&pool->base.opps[i]); + + if (pool->base.transforms[i] != NULL) + dce110_transform_destroy(&pool->base.transforms[i]); + + if (pool->base.ipps[i] != NULL) + dce_ipp_destroy(&pool->base.ipps[i]); + + if (pool->base.mis[i] != NULL) { + kfree(TO_DCE_MEM_INPUT(pool->base.mis[i])); + pool->base.mis[i] = NULL; + } + + if (pool->base.timing_generators[i] != NULL) { + kfree(DCE110TG_FROM_TG(pool->base.timing_generators[i])); + pool->base.timing_generators[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_ddc; i++) { + if (pool->base.engines[i] != NULL) + dce110_engine_destroy(&pool->base.engines[i]); + if (pool->base.hw_i2cs[i] != NULL) { + kfree(pool->base.hw_i2cs[i]); + pool->base.hw_i2cs[i] = NULL; + } + if (pool->base.sw_i2cs[i] != NULL) { + kfree(pool->base.sw_i2cs[i]); + pool->base.sw_i2cs[i] = NULL; + } + } + + for (i = 0; i < pool->base.stream_enc_count; i++) { + if (pool->base.stream_enc[i] != NULL) + kfree(DCE110STRENC_FROM_STRENC(pool->base.stream_enc[i])); + } + + for (i = 0; i < pool->base.clk_src_count; i++) { + if (pool->base.clock_sources[i] != NULL) { + dce110_clock_source_destroy(&pool->base.clock_sources[i]); + } + } + + if (pool->base.dp_clock_source != NULL) + dce110_clock_source_destroy(&pool->base.dp_clock_source); + + for (i = 0; i < pool->base.audio_count; i++) { + if (pool->base.audios[i] != NULL) { + dce_aud_destroy(&pool->base.audios[i]); + } + } + + if (pool->base.abm != NULL) + dce_abm_destroy(&pool->base.abm); + + if (pool->base.dmcu != NULL) + dce_dmcu_destroy(&pool->base.dmcu); + + if (pool->base.irqs != NULL) { + dal_irq_service_destroy(&pool->base.irqs); + } +} + + +static void get_pixel_clock_parameters( + const struct pipe_ctx *pipe_ctx, + struct pixel_clk_params *pixel_clk_params) +{ + const struct dc_stream_state *stream = pipe_ctx->stream; + + /*TODO: is this halved for YCbCr 420? in that case we might want to move + * the pixel clock normalization for hdmi up to here instead of doing it + * in pll_adjust_pix_clk + */ + pixel_clk_params->requested_pix_clk_100hz = stream->timing.pix_clk_100hz; + pixel_clk_params->encoder_object_id = stream->link->link_enc->id; + pixel_clk_params->signal_type = pipe_ctx->stream->signal; + pixel_clk_params->controller_id = pipe_ctx->stream_res.tg->inst + 1; + /* TODO: un-hardcode*/ + pixel_clk_params->requested_sym_clk = LINK_RATE_LOW * + LINK_RATE_REF_FREQ_IN_KHZ; + pixel_clk_params->flags.ENABLE_SS = 0; + pixel_clk_params->color_depth = + stream->timing.display_color_depth; + pixel_clk_params->flags.DISPLAY_BLANKED = 1; + pixel_clk_params->flags.SUPPORT_YCBCR420 = (stream->timing.pixel_encoding == + PIXEL_ENCODING_YCBCR420); + pixel_clk_params->pixel_encoding = stream->timing.pixel_encoding; + if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR422) { + pixel_clk_params->color_depth = COLOR_DEPTH_888; + } + if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) { + pixel_clk_params->requested_pix_clk_100hz = pixel_clk_params->requested_pix_clk_100hz / 2; + } + if (stream->timing.timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING) + pixel_clk_params->requested_pix_clk_100hz *= 2; + +} + +void dce110_resource_build_pipe_hw_param(struct pipe_ctx *pipe_ctx) +{ + get_pixel_clock_parameters(pipe_ctx, &pipe_ctx->stream_res.pix_clk_params); + pipe_ctx->clock_source->funcs->get_pix_clk_dividers( + pipe_ctx->clock_source, + &pipe_ctx->stream_res.pix_clk_params, + &pipe_ctx->pll_settings); + resource_build_bit_depth_reduction_params(pipe_ctx->stream, + &pipe_ctx->stream->bit_depth_params); + pipe_ctx->stream->clamping.pixel_encoding = pipe_ctx->stream->timing.pixel_encoding; +} + +static bool is_surface_pixel_format_supported(struct pipe_ctx *pipe_ctx, unsigned int underlay_idx) +{ + if (pipe_ctx->pipe_idx != underlay_idx) + return true; + if (!pipe_ctx->plane_state) + return false; + if (pipe_ctx->plane_state->format < SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) + return false; + return true; +} + +static enum dc_status build_mapped_resource( + const struct dc *dc, + struct dc_state *context, + struct dc_stream_state *stream) +{ + struct pipe_ctx *pipe_ctx = resource_get_otg_master_for_stream(&context->res_ctx, stream); + + if (!pipe_ctx) + return DC_ERROR_UNEXPECTED; + + if (!is_surface_pixel_format_supported(pipe_ctx, + dc->res_pool->underlay_pipe_index)) + return DC_SURFACE_PIXEL_FORMAT_UNSUPPORTED; + + dce110_resource_build_pipe_hw_param(pipe_ctx); + + /* TODO: validate audio ASIC caps, encoder */ + + resource_build_info_frame(pipe_ctx); + + return DC_OK; +} + +static bool dce110_validate_bandwidth( + struct dc *dc, + struct dc_state *context, + bool fast_validate) +{ + bool result = false; + + DC_LOG_BANDWIDTH_CALCS( + "%s: start", + __func__); + + if (bw_calcs( + dc->ctx, + dc->bw_dceip, + dc->bw_vbios, + context->res_ctx.pipe_ctx, + dc->res_pool->pipe_count, + &context->bw_ctx.bw.dce)) + result = true; + + if (!result) + DC_LOG_BANDWIDTH_VALIDATION("%s: %dx%d@%d Bandwidth validation failed!\n", + __func__, + context->streams[0]->timing.h_addressable, + context->streams[0]->timing.v_addressable, + context->streams[0]->timing.pix_clk_100hz / 10); + + if (memcmp(&dc->current_state->bw_ctx.bw.dce, + &context->bw_ctx.bw.dce, sizeof(context->bw_ctx.bw.dce))) { + + DC_LOG_BANDWIDTH_CALCS( + "%s: finish,\n" + "nbpMark_b: %d nbpMark_a: %d urgentMark_b: %d urgentMark_a: %d\n" + "stutMark_b: %d stutMark_a: %d\n" + "nbpMark_b: %d nbpMark_a: %d urgentMark_b: %d urgentMark_a: %d\n" + "stutMark_b: %d stutMark_a: %d\n" + "nbpMark_b: %d nbpMark_a: %d urgentMark_b: %d urgentMark_a: %d\n" + "stutMark_b: %d stutMark_a: %d stutter_mode_enable: %d\n" + "cstate: %d pstate: %d nbpstate: %d sync: %d dispclk: %d\n" + "sclk: %d sclk_sleep: %d yclk: %d blackout_recovery_time_us: %d\n" + , + __func__, + context->bw_ctx.bw.dce.nbp_state_change_wm_ns[0].b_mark, + context->bw_ctx.bw.dce.nbp_state_change_wm_ns[0].a_mark, + context->bw_ctx.bw.dce.urgent_wm_ns[0].b_mark, + context->bw_ctx.bw.dce.urgent_wm_ns[0].a_mark, + context->bw_ctx.bw.dce.stutter_exit_wm_ns[0].b_mark, + context->bw_ctx.bw.dce.stutter_exit_wm_ns[0].a_mark, + context->bw_ctx.bw.dce.nbp_state_change_wm_ns[1].b_mark, + context->bw_ctx.bw.dce.nbp_state_change_wm_ns[1].a_mark, + context->bw_ctx.bw.dce.urgent_wm_ns[1].b_mark, + context->bw_ctx.bw.dce.urgent_wm_ns[1].a_mark, + context->bw_ctx.bw.dce.stutter_exit_wm_ns[1].b_mark, + context->bw_ctx.bw.dce.stutter_exit_wm_ns[1].a_mark, + context->bw_ctx.bw.dce.nbp_state_change_wm_ns[2].b_mark, + context->bw_ctx.bw.dce.nbp_state_change_wm_ns[2].a_mark, + context->bw_ctx.bw.dce.urgent_wm_ns[2].b_mark, + context->bw_ctx.bw.dce.urgent_wm_ns[2].a_mark, + context->bw_ctx.bw.dce.stutter_exit_wm_ns[2].b_mark, + context->bw_ctx.bw.dce.stutter_exit_wm_ns[2].a_mark, + context->bw_ctx.bw.dce.stutter_mode_enable, + context->bw_ctx.bw.dce.cpuc_state_change_enable, + context->bw_ctx.bw.dce.cpup_state_change_enable, + context->bw_ctx.bw.dce.nbp_state_change_enable, + context->bw_ctx.bw.dce.all_displays_in_sync, + context->bw_ctx.bw.dce.dispclk_khz, + context->bw_ctx.bw.dce.sclk_khz, + context->bw_ctx.bw.dce.sclk_deep_sleep_khz, + context->bw_ctx.bw.dce.yclk_khz, + context->bw_ctx.bw.dce.blackout_recovery_time_us); + } + return result; +} + +static enum dc_status dce110_validate_plane(const struct dc_plane_state *plane_state, + struct dc_caps *caps) +{ + if (((plane_state->dst_rect.width * 2) < plane_state->src_rect.width) || + ((plane_state->dst_rect.height * 2) < plane_state->src_rect.height)) + return DC_FAIL_SURFACE_VALIDATE; + + return DC_OK; +} + +static bool dce110_validate_surface_sets( + struct dc_state *context) +{ + int i, j; + + for (i = 0; i < context->stream_count; i++) { + if (context->stream_status[i].plane_count == 0) + continue; + + if (context->stream_status[i].plane_count > 2) + return false; + + for (j = 0; j < context->stream_status[i].plane_count; j++) { + struct dc_plane_state *plane = + context->stream_status[i].plane_states[j]; + + /* underlay validation */ + if (plane->format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) { + + if ((plane->src_rect.width > 1920 || + plane->src_rect.height > 1080)) + return false; + + /* we don't have the logic to support underlay + * only yet so block the use case where we get + * NV12 plane as top layer + */ + if (j == 0) + return false; + + /* irrespective of plane format, + * stream should be RGB encoded + */ + if (context->streams[i]->timing.pixel_encoding + != PIXEL_ENCODING_RGB) + return false; + + } + + } + } + + return true; +} + +static enum dc_status dce110_validate_global( + struct dc *dc, + struct dc_state *context) +{ + if (!dce110_validate_surface_sets(context)) + return DC_FAIL_SURFACE_VALIDATE; + + return DC_OK; +} + +static enum dc_status dce110_add_stream_to_ctx( + struct dc *dc, + struct dc_state *new_ctx, + struct dc_stream_state *dc_stream) +{ + enum dc_status result = DC_ERROR_UNEXPECTED; + + result = resource_map_pool_resources(dc, new_ctx, dc_stream); + + if (result == DC_OK) + result = resource_map_clock_resources(dc, new_ctx, dc_stream); + + + if (result == DC_OK) + result = build_mapped_resource(dc, new_ctx, dc_stream); + + return result; +} + +static struct pipe_ctx *dce110_acquire_underlay( + const struct dc_state *cur_ctx, + struct dc_state *new_ctx, + const struct resource_pool *pool, + const struct pipe_ctx *opp_head_pipe) +{ + struct dc_stream_state *stream = opp_head_pipe->stream; + struct dc *dc = stream->ctx->dc; + struct dce_hwseq *hws = dc->hwseq; + struct resource_context *res_ctx = &new_ctx->res_ctx; + unsigned int underlay_idx = pool->underlay_pipe_index; + struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[underlay_idx]; + + if (res_ctx->pipe_ctx[underlay_idx].stream) + return NULL; + + pipe_ctx->stream_res.tg = pool->timing_generators[underlay_idx]; + pipe_ctx->plane_res.mi = pool->mis[underlay_idx]; + /*pipe_ctx->plane_res.ipp = res_ctx->pool->ipps[underlay_idx];*/ + pipe_ctx->plane_res.xfm = pool->transforms[underlay_idx]; + pipe_ctx->stream_res.opp = pool->opps[underlay_idx]; + pipe_ctx->pipe_idx = underlay_idx; + + pipe_ctx->stream = stream; + + if (!dc->current_state->res_ctx.pipe_ctx[underlay_idx].stream) { + struct tg_color black_color = {0}; + struct dc_bios *dcb = dc->ctx->dc_bios; + + hws->funcs.enable_display_power_gating( + dc, + pipe_ctx->stream_res.tg->inst, + dcb, PIPE_GATING_CONTROL_DISABLE); + + /* + * This is for powering on underlay, so crtc does not + * need to be enabled + */ + + pipe_ctx->stream_res.tg->funcs->program_timing(pipe_ctx->stream_res.tg, + &stream->timing, + 0, + 0, + 0, + 0, + pipe_ctx->stream->signal, + false); + + pipe_ctx->stream_res.tg->funcs->enable_advanced_request( + pipe_ctx->stream_res.tg, + true, + &stream->timing); + + pipe_ctx->plane_res.mi->funcs->allocate_mem_input(pipe_ctx->plane_res.mi, + stream->timing.h_total, + stream->timing.v_total, + stream->timing.pix_clk_100hz / 10, + new_ctx->stream_count); + + color_space_to_black_color(dc, + COLOR_SPACE_YCBCR601, &black_color); + pipe_ctx->stream_res.tg->funcs->set_blank_color( + pipe_ctx->stream_res.tg, + &black_color); + } + + return pipe_ctx; +} + +static void dce110_destroy_resource_pool(struct resource_pool **pool) +{ + struct dce110_resource_pool *dce110_pool = TO_DCE110_RES_POOL(*pool); + + dce110_resource_destruct(dce110_pool); + kfree(dce110_pool); + *pool = NULL; +} + +struct stream_encoder *dce110_find_first_free_match_stream_enc_for_link( + struct resource_context *res_ctx, + const struct resource_pool *pool, + struct dc_stream_state *stream) +{ + int i; + int j = -1; + struct dc_link *link = stream->link; + + for (i = 0; i < pool->stream_enc_count; i++) { + if (!res_ctx->is_stream_enc_acquired[i] && + pool->stream_enc[i]) { + /* Store first available for MST second display + * in daisy chain use case + */ + j = i; + if (pool->stream_enc[i]->id == + link->link_enc->preferred_engine) + return pool->stream_enc[i]; + } + } + + /* + * For CZ and later, we can allow DIG FE and BE to differ for all display types + */ + + if (j >= 0) + return pool->stream_enc[j]; + + return NULL; +} + + +static const struct resource_funcs dce110_res_pool_funcs = { + .destroy = dce110_destroy_resource_pool, + .link_enc_create = dce110_link_encoder_create, + .panel_cntl_create = dce110_panel_cntl_create, + .validate_bandwidth = dce110_validate_bandwidth, + .validate_plane = dce110_validate_plane, + .acquire_free_pipe_as_secondary_dpp_pipe = dce110_acquire_underlay, + .add_stream_to_ctx = dce110_add_stream_to_ctx, + .validate_global = dce110_validate_global, + .find_first_free_match_stream_enc_for_link = dce110_find_first_free_match_stream_enc_for_link +}; + +static bool underlay_create(struct dc_context *ctx, struct resource_pool *pool) +{ + struct dce110_timing_generator *dce110_tgv = kzalloc(sizeof(*dce110_tgv), + GFP_KERNEL); + struct dce_transform *dce110_xfmv = kzalloc(sizeof(*dce110_xfmv), + GFP_KERNEL); + struct dce_mem_input *dce110_miv = kzalloc(sizeof(*dce110_miv), + GFP_KERNEL); + struct dce110_opp *dce110_oppv = kzalloc(sizeof(*dce110_oppv), + GFP_KERNEL); + + if (!dce110_tgv || !dce110_xfmv || !dce110_miv || !dce110_oppv) { + kfree(dce110_tgv); + kfree(dce110_xfmv); + kfree(dce110_miv); + kfree(dce110_oppv); + return false; + } + + dce110_opp_v_construct(dce110_oppv, ctx); + + dce110_timing_generator_v_construct(dce110_tgv, ctx); + dce110_mem_input_v_construct(dce110_miv, ctx); + dce110_transform_v_construct(dce110_xfmv, ctx); + + pool->opps[pool->pipe_count] = &dce110_oppv->base; + pool->timing_generators[pool->pipe_count] = &dce110_tgv->base; + pool->mis[pool->pipe_count] = &dce110_miv->base; + pool->transforms[pool->pipe_count] = &dce110_xfmv->base; + pool->pipe_count++; + + /* update the public caps to indicate an underlay is available */ + ctx->dc->caps.max_slave_planes = 1; + ctx->dc->caps.max_slave_yuv_planes = 1; + ctx->dc->caps.max_slave_rgb_planes = 0; + + return true; +} + +static void bw_calcs_data_update_from_pplib(struct dc *dc) +{ + struct dm_pp_clock_levels clks = {0}; + + /*do system clock*/ + dm_pp_get_clock_levels_by_type( + dc->ctx, + DM_PP_CLOCK_TYPE_ENGINE_CLK, + &clks); + /* convert all the clock fro kHz to fix point mHz */ + dc->bw_vbios->high_sclk = bw_frc_to_fixed( + clks.clocks_in_khz[clks.num_levels-1], 1000); + dc->bw_vbios->mid1_sclk = bw_frc_to_fixed( + clks.clocks_in_khz[clks.num_levels/8], 1000); + dc->bw_vbios->mid2_sclk = bw_frc_to_fixed( + clks.clocks_in_khz[clks.num_levels*2/8], 1000); + dc->bw_vbios->mid3_sclk = bw_frc_to_fixed( + clks.clocks_in_khz[clks.num_levels*3/8], 1000); + dc->bw_vbios->mid4_sclk = bw_frc_to_fixed( + clks.clocks_in_khz[clks.num_levels*4/8], 1000); + dc->bw_vbios->mid5_sclk = bw_frc_to_fixed( + clks.clocks_in_khz[clks.num_levels*5/8], 1000); + dc->bw_vbios->mid6_sclk = bw_frc_to_fixed( + clks.clocks_in_khz[clks.num_levels*6/8], 1000); + dc->bw_vbios->low_sclk = bw_frc_to_fixed( + clks.clocks_in_khz[0], 1000); + dc->sclk_lvls = clks; + + /*do display clock*/ + dm_pp_get_clock_levels_by_type( + dc->ctx, + DM_PP_CLOCK_TYPE_DISPLAY_CLK, + &clks); + dc->bw_vbios->high_voltage_max_dispclk = bw_frc_to_fixed( + clks.clocks_in_khz[clks.num_levels-1], 1000); + dc->bw_vbios->mid_voltage_max_dispclk = bw_frc_to_fixed( + clks.clocks_in_khz[clks.num_levels>>1], 1000); + dc->bw_vbios->low_voltage_max_dispclk = bw_frc_to_fixed( + clks.clocks_in_khz[0], 1000); + + /*do memory clock*/ + dm_pp_get_clock_levels_by_type( + dc->ctx, + DM_PP_CLOCK_TYPE_MEMORY_CLK, + &clks); + + dc->bw_vbios->low_yclk = bw_frc_to_fixed( + clks.clocks_in_khz[0] * MEMORY_TYPE_MULTIPLIER_CZ, 1000); + dc->bw_vbios->mid_yclk = bw_frc_to_fixed( + clks.clocks_in_khz[clks.num_levels>>1] * MEMORY_TYPE_MULTIPLIER_CZ, + 1000); + dc->bw_vbios->high_yclk = bw_frc_to_fixed( + clks.clocks_in_khz[clks.num_levels-1] * MEMORY_TYPE_MULTIPLIER_CZ, + 1000); +} + +static const struct resource_caps *dce110_resource_cap( + struct hw_asic_id *asic_id) +{ + if (ASIC_REV_IS_STONEY(asic_id->hw_internal_rev)) + return &stoney_resource_cap; + else + return &carrizo_resource_cap; +} + +static bool dce110_resource_construct( + uint8_t num_virtual_links, + struct dc *dc, + struct dce110_resource_pool *pool, + struct hw_asic_id asic_id) +{ + unsigned int i; + struct dc_context *ctx = dc->ctx; + struct dc_bios *bp; + + ctx->dc_bios->regs = &bios_regs; + + pool->base.res_cap = dce110_resource_cap(&ctx->asic_id); + pool->base.funcs = &dce110_res_pool_funcs; + + /************************************************* + * Resource + asic cap harcoding * + *************************************************/ + + pool->base.pipe_count = pool->base.res_cap->num_timing_generator; + pool->base.underlay_pipe_index = pool->base.pipe_count; + pool->base.timing_generator_count = pool->base.res_cap->num_timing_generator; + dc->caps.max_downscale_ratio = 150; + dc->caps.i2c_speed_in_khz = 40; + dc->caps.i2c_speed_in_khz_hdcp = 40; + dc->caps.max_cursor_size = 128; + dc->caps.min_horizontal_blanking_period = 80; + dc->caps.is_apu = true; + dc->caps.extended_aux_timeout_support = false; + dc->debug = debug_defaults; + + /************************************************* + * Create resources * + *************************************************/ + + bp = ctx->dc_bios; + + if (bp->fw_info_valid && bp->fw_info.external_clock_source_frequency_for_dp != 0) { + pool->base.dp_clock_source = + dce110_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_EXTERNAL, NULL, true); + + pool->base.clock_sources[0] = + dce110_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL0, + &clk_src_regs[0], false); + pool->base.clock_sources[1] = + dce110_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL1, + &clk_src_regs[1], false); + + pool->base.clk_src_count = 2; + + /* TODO: find out if CZ support 3 PLLs */ + } + + if (pool->base.dp_clock_source == NULL) { + dm_error("DC: failed to create dp clock source!\n"); + BREAK_TO_DEBUGGER(); + goto res_create_fail; + } + + for (i = 0; i < pool->base.clk_src_count; i++) { + if (pool->base.clock_sources[i] == NULL) { + dm_error("DC: failed to create clock sources!\n"); + BREAK_TO_DEBUGGER(); + goto res_create_fail; + } + } + + pool->base.dmcu = dce_dmcu_create(ctx, + &dmcu_regs, + &dmcu_shift, + &dmcu_mask); + if (pool->base.dmcu == NULL) { + dm_error("DC: failed to create dmcu!\n"); + BREAK_TO_DEBUGGER(); + goto res_create_fail; + } + + pool->base.abm = dce_abm_create(ctx, + &abm_regs, + &abm_shift, + &abm_mask); + if (pool->base.abm == NULL) { + dm_error("DC: failed to create abm!\n"); + BREAK_TO_DEBUGGER(); + goto res_create_fail; + } + + { + struct irq_service_init_data init_data; + init_data.ctx = dc->ctx; + pool->base.irqs = dal_irq_service_dce110_create(&init_data); + if (!pool->base.irqs) + goto res_create_fail; + } + + for (i = 0; i < pool->base.pipe_count; i++) { + pool->base.timing_generators[i] = dce110_timing_generator_create( + ctx, i, &dce110_tg_offsets[i]); + if (pool->base.timing_generators[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create tg!\n"); + goto res_create_fail; + } + + pool->base.mis[i] = dce110_mem_input_create(ctx, i); + if (pool->base.mis[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create memory input!\n"); + goto res_create_fail; + } + + pool->base.ipps[i] = dce110_ipp_create(ctx, i); + if (pool->base.ipps[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create input pixel processor!\n"); + goto res_create_fail; + } + + pool->base.transforms[i] = dce110_transform_create(ctx, i); + if (pool->base.transforms[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create transform!\n"); + goto res_create_fail; + } + + pool->base.opps[i] = dce110_opp_create(ctx, i); + if (pool->base.opps[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create output pixel processor!\n"); + goto res_create_fail; + } + } + + for (i = 0; i < pool->base.res_cap->num_ddc; i++) { + pool->base.engines[i] = dce110_aux_engine_create(ctx, i); + if (pool->base.engines[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC:failed to create aux engine!!\n"); + goto res_create_fail; + } + pool->base.hw_i2cs[i] = dce110_i2c_hw_create(ctx, i); + if (pool->base.hw_i2cs[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC:failed to create i2c engine!!\n"); + goto res_create_fail; + } + pool->base.sw_i2cs[i] = NULL; + } + + if (dc->config.fbc_support) + dc->fbc_compressor = dce110_compressor_create(ctx); + + if (!underlay_create(ctx, &pool->base)) + goto res_create_fail; + + if (!resource_construct(num_virtual_links, dc, &pool->base, + &res_create_funcs)) + goto res_create_fail; + + /* Create hardware sequencer */ + dce110_hw_sequencer_construct(dc); + + dc->caps.max_planes = pool->base.pipe_count; + + for (i = 0; i < pool->base.underlay_pipe_index; ++i) + dc->caps.planes[i] = plane_cap; + + dc->caps.planes[pool->base.underlay_pipe_index] = underlay_plane_cap; + + bw_calcs_init(dc->bw_dceip, dc->bw_vbios, dc->ctx->asic_id); + + bw_calcs_data_update_from_pplib(dc); + + return true; + +res_create_fail: + dce110_resource_destruct(pool); + return false; +} + +struct resource_pool *dce110_create_resource_pool( + uint8_t num_virtual_links, + struct dc *dc, + struct hw_asic_id asic_id) +{ + struct dce110_resource_pool *pool = + kzalloc(sizeof(struct dce110_resource_pool), GFP_KERNEL); + + if (!pool) + return NULL; + + if (dce110_resource_construct(num_virtual_links, dc, pool, asic_id)) + return &pool->base; + + kfree(pool); + BREAK_TO_DEBUGGER(); + return NULL; +} diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.h new file mode 100644 index 000000000000..aa4531e0800e --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.h @@ -0,0 +1,54 @@ +/* +* Copyright 2012-15 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DC_RESOURCE_DCE110_H__ +#define __DC_RESOURCE_DCE110_H__ + +#include "core_types.h" + +struct dc; +struct resource_pool; + +#define TO_DCE110_RES_POOL(pool)\ + container_of(pool, struct dce110_resource_pool, base) + +struct dce110_resource_pool { + struct resource_pool base; +}; + +void dce110_resource_build_pipe_hw_param(struct pipe_ctx *pipe_ctx); + +struct resource_pool *dce110_create_resource_pool( + uint8_t num_virtual_links, + struct dc *dc, + struct hw_asic_id asic_id); + +struct stream_encoder *dce110_find_first_free_match_stream_enc_for_link( + struct resource_context *res_ctx, + const struct resource_pool *pool, + struct dc_stream_state *stream); + +#endif /* __DC_RESOURCE_DCE110_H__ */ + diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c new file mode 100644 index 000000000000..d1edac46c9a0 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c @@ -0,0 +1,1431 @@ +/* +* Copyright 2012-15 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dm_services.h" + +#include "link_encoder.h" +#include "stream_encoder.h" + +#include "resource.h" +#include "include/irq_service_interface.h" +#include "dce110/dce110_resource.h" +#include "dce110/dce110_timing_generator.h" + +#include "irq/dce110/irq_service_dce110.h" +#include "dce/dce_mem_input.h" +#include "dce/dce_transform.h" +#include "dce/dce_link_encoder.h" +#include "dce/dce_stream_encoder.h" +#include "dce/dce_audio.h" +#include "dce/dce_opp.h" +#include "dce/dce_ipp.h" +#include "dce/dce_clock_source.h" + +#include "dce/dce_hwseq.h" +#include "dce112/dce112_hwseq.h" +#include "dce/dce_abm.h" +#include "dce/dce_dmcu.h" +#include "dce/dce_aux.h" +#include "dce/dce_i2c.h" +#include "dce/dce_panel_cntl.h" + +#include "reg_helper.h" + +#include "dce/dce_11_2_d.h" +#include "dce/dce_11_2_sh_mask.h" + +#include "dce100/dce100_resource.h" +#include "dce112_resource.h" + +#define DC_LOGGER \ + dc->ctx->logger + +#ifndef mmDP_DPHY_INTERNAL_CTRL + #define mmDP_DPHY_INTERNAL_CTRL 0x4aa7 + #define mmDP0_DP_DPHY_INTERNAL_CTRL 0x4aa7 + #define mmDP1_DP_DPHY_INTERNAL_CTRL 0x4ba7 + #define mmDP2_DP_DPHY_INTERNAL_CTRL 0x4ca7 + #define mmDP3_DP_DPHY_INTERNAL_CTRL 0x4da7 + #define mmDP4_DP_DPHY_INTERNAL_CTRL 0x4ea7 + #define mmDP5_DP_DPHY_INTERNAL_CTRL 0x4fa7 + #define mmDP6_DP_DPHY_INTERNAL_CTRL 0x54a7 + #define mmDP7_DP_DPHY_INTERNAL_CTRL 0x56a7 + #define mmDP8_DP_DPHY_INTERNAL_CTRL 0x57a7 +#endif + +#ifndef mmBIOS_SCRATCH_2 + #define mmBIOS_SCRATCH_2 0x05CB + #define mmBIOS_SCRATCH_3 0x05CC + #define mmBIOS_SCRATCH_6 0x05CF +#endif + +#ifndef mmDP_DPHY_BS_SR_SWAP_CNTL + #define mmDP_DPHY_BS_SR_SWAP_CNTL 0x4ADC + #define mmDP0_DP_DPHY_BS_SR_SWAP_CNTL 0x4ADC + #define mmDP1_DP_DPHY_BS_SR_SWAP_CNTL 0x4BDC + #define mmDP2_DP_DPHY_BS_SR_SWAP_CNTL 0x4CDC + #define mmDP3_DP_DPHY_BS_SR_SWAP_CNTL 0x4DDC + #define mmDP4_DP_DPHY_BS_SR_SWAP_CNTL 0x4EDC + #define mmDP5_DP_DPHY_BS_SR_SWAP_CNTL 0x4FDC + #define mmDP6_DP_DPHY_BS_SR_SWAP_CNTL 0x54DC +#endif + +#ifndef mmDP_DPHY_FAST_TRAINING + #define mmDP_DPHY_FAST_TRAINING 0x4ABC + #define mmDP0_DP_DPHY_FAST_TRAINING 0x4ABC + #define mmDP1_DP_DPHY_FAST_TRAINING 0x4BBC + #define mmDP2_DP_DPHY_FAST_TRAINING 0x4CBC + #define mmDP3_DP_DPHY_FAST_TRAINING 0x4DBC + #define mmDP4_DP_DPHY_FAST_TRAINING 0x4EBC + #define mmDP5_DP_DPHY_FAST_TRAINING 0x4FBC + #define mmDP6_DP_DPHY_FAST_TRAINING 0x54BC +#endif + +enum dce112_clk_src_array_id { + DCE112_CLK_SRC_PLL0, + DCE112_CLK_SRC_PLL1, + DCE112_CLK_SRC_PLL2, + DCE112_CLK_SRC_PLL3, + DCE112_CLK_SRC_PLL4, + DCE112_CLK_SRC_PLL5, + + DCE112_CLK_SRC_TOTAL +}; + +static const struct dce110_timing_generator_offsets dce112_tg_offsets[] = { + { + .crtc = (mmCRTC0_CRTC_CONTROL - mmCRTC_CONTROL), + .dcp = (mmDCP0_GRPH_CONTROL - mmGRPH_CONTROL), + }, + { + .crtc = (mmCRTC1_CRTC_CONTROL - mmCRTC_CONTROL), + .dcp = (mmDCP1_GRPH_CONTROL - mmGRPH_CONTROL), + }, + { + .crtc = (mmCRTC2_CRTC_CONTROL - mmCRTC_CONTROL), + .dcp = (mmDCP2_GRPH_CONTROL - mmGRPH_CONTROL), + }, + { + .crtc = (mmCRTC3_CRTC_CONTROL - mmCRTC_CONTROL), + .dcp = (mmDCP3_GRPH_CONTROL - mmGRPH_CONTROL), + }, + { + .crtc = (mmCRTC4_CRTC_CONTROL - mmCRTC_CONTROL), + .dcp = (mmDCP4_GRPH_CONTROL - mmGRPH_CONTROL), + }, + { + .crtc = (mmCRTC5_CRTC_CONTROL - mmCRTC_CONTROL), + .dcp = (mmDCP5_GRPH_CONTROL - mmGRPH_CONTROL), + } +}; + +/* set register offset */ +#define SR(reg_name)\ + .reg_name = mm ## reg_name + +/* set register offset with instance */ +#define SRI(reg_name, block, id)\ + .reg_name = mm ## block ## id ## _ ## reg_name + +static const struct dce_dmcu_registers dmcu_regs = { + DMCU_DCE110_COMMON_REG_LIST() +}; + +static const struct dce_dmcu_shift dmcu_shift = { + DMCU_MASK_SH_LIST_DCE110(__SHIFT) +}; + +static const struct dce_dmcu_mask dmcu_mask = { + DMCU_MASK_SH_LIST_DCE110(_MASK) +}; + +static const struct dce_abm_registers abm_regs = { + ABM_DCE110_COMMON_REG_LIST() +}; + +static const struct dce_abm_shift abm_shift = { + ABM_MASK_SH_LIST_DCE110(__SHIFT) +}; + +static const struct dce_abm_mask abm_mask = { + ABM_MASK_SH_LIST_DCE110(_MASK) +}; + +static const struct dce110_aux_registers_shift aux_shift = { + DCE_AUX_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce110_aux_registers_mask aux_mask = { + DCE_AUX_MASK_SH_LIST(_MASK) +}; + +#define ipp_regs(id)\ +[id] = {\ + IPP_DCE110_REG_LIST_DCE_BASE(id)\ +} + +static const struct dce_ipp_registers ipp_regs[] = { + ipp_regs(0), + ipp_regs(1), + ipp_regs(2), + ipp_regs(3), + ipp_regs(4), + ipp_regs(5) +}; + +static const struct dce_ipp_shift ipp_shift = { + IPP_DCE100_MASK_SH_LIST_DCE_COMMON_BASE(__SHIFT) +}; + +static const struct dce_ipp_mask ipp_mask = { + IPP_DCE100_MASK_SH_LIST_DCE_COMMON_BASE(_MASK) +}; + +#define transform_regs(id)\ +[id] = {\ + XFM_COMMON_REG_LIST_DCE110(id)\ +} + +static const struct dce_transform_registers xfm_regs[] = { + transform_regs(0), + transform_regs(1), + transform_regs(2), + transform_regs(3), + transform_regs(4), + transform_regs(5) +}; + +static const struct dce_transform_shift xfm_shift = { + XFM_COMMON_MASK_SH_LIST_DCE110(__SHIFT) +}; + +static const struct dce_transform_mask xfm_mask = { + XFM_COMMON_MASK_SH_LIST_DCE110(_MASK) +}; + +#define aux_regs(id)\ +[id] = {\ + AUX_REG_LIST(id)\ +} + +static const struct dce110_link_enc_aux_registers link_enc_aux_regs[] = { + aux_regs(0), + aux_regs(1), + aux_regs(2), + aux_regs(3), + aux_regs(4), + aux_regs(5) +}; + +static const struct dce_panel_cntl_registers panel_cntl_regs[] = { + { DCE_PANEL_CNTL_REG_LIST() } +}; + +static const struct dce_panel_cntl_shift panel_cntl_shift = { + DCE_PANEL_CNTL_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_panel_cntl_mask panel_cntl_mask = { + DCE_PANEL_CNTL_MASK_SH_LIST(_MASK) +}; + +#define hpd_regs(id)\ +[id] = {\ + HPD_REG_LIST(id)\ +} + +static const struct dce110_link_enc_hpd_registers link_enc_hpd_regs[] = { + hpd_regs(0), + hpd_regs(1), + hpd_regs(2), + hpd_regs(3), + hpd_regs(4), + hpd_regs(5) +}; + +#define link_regs(id)\ +[id] = {\ + LE_DCE110_REG_LIST(id)\ +} + +static const struct dce110_link_enc_registers link_enc_regs[] = { + link_regs(0), + link_regs(1), + link_regs(2), + link_regs(3), + link_regs(4), + link_regs(5), + link_regs(6), +}; + +#define stream_enc_regs(id)\ +[id] = {\ + SE_COMMON_REG_LIST(id),\ + .TMDS_CNTL = 0,\ +} + +static const struct dce110_stream_enc_registers stream_enc_regs[] = { + stream_enc_regs(0), + stream_enc_regs(1), + stream_enc_regs(2), + stream_enc_regs(3), + stream_enc_regs(4), + stream_enc_regs(5) +}; + +static const struct dce_stream_encoder_shift se_shift = { + SE_COMMON_MASK_SH_LIST_DCE112(__SHIFT) +}; + +static const struct dce_stream_encoder_mask se_mask = { + SE_COMMON_MASK_SH_LIST_DCE112(_MASK) +}; + +#define opp_regs(id)\ +[id] = {\ + OPP_DCE_112_REG_LIST(id),\ +} + +static const struct dce_opp_registers opp_regs[] = { + opp_regs(0), + opp_regs(1), + opp_regs(2), + opp_regs(3), + opp_regs(4), + opp_regs(5) +}; + +static const struct dce_opp_shift opp_shift = { + OPP_COMMON_MASK_SH_LIST_DCE_112(__SHIFT) +}; + +static const struct dce_opp_mask opp_mask = { + OPP_COMMON_MASK_SH_LIST_DCE_112(_MASK) +}; + +#define aux_engine_regs(id)\ +[id] = {\ + AUX_COMMON_REG_LIST(id), \ + .AUX_RESET_MASK = 0 \ +} + +static const struct dce110_aux_registers aux_engine_regs[] = { + aux_engine_regs(0), + aux_engine_regs(1), + aux_engine_regs(2), + aux_engine_regs(3), + aux_engine_regs(4), + aux_engine_regs(5) +}; + +#define audio_regs(id)\ +[id] = {\ + AUD_COMMON_REG_LIST(id)\ +} + +static const struct dce_audio_registers audio_regs[] = { + audio_regs(0), + audio_regs(1), + audio_regs(2), + audio_regs(3), + audio_regs(4), + audio_regs(5) +}; + +static const struct dce_audio_shift audio_shift = { + AUD_COMMON_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_audio_mask audio_mask = { + AUD_COMMON_MASK_SH_LIST(_MASK) +}; + +#define clk_src_regs(index, id)\ +[index] = {\ + CS_COMMON_REG_LIST_DCE_112(id),\ +} + +static const struct dce110_clk_src_regs clk_src_regs[] = { + clk_src_regs(0, A), + clk_src_regs(1, B), + clk_src_regs(2, C), + clk_src_regs(3, D), + clk_src_regs(4, E), + clk_src_regs(5, F) +}; + +static const struct dce110_clk_src_shift cs_shift = { + CS_COMMON_MASK_SH_LIST_DCE_112(__SHIFT) +}; + +static const struct dce110_clk_src_mask cs_mask = { + CS_COMMON_MASK_SH_LIST_DCE_112(_MASK) +}; + +static const struct bios_registers bios_regs = { + .BIOS_SCRATCH_3 = mmBIOS_SCRATCH_3, + .BIOS_SCRATCH_6 = mmBIOS_SCRATCH_6 +}; + +static const struct resource_caps polaris_10_resource_cap = { + .num_timing_generator = 6, + .num_audio = 6, + .num_stream_encoder = 6, + .num_pll = 8, /* why 8? 6 combo PHY PLL + 2 regular PLLs? */ + .num_ddc = 6, +}; + +static const struct resource_caps polaris_11_resource_cap = { + .num_timing_generator = 5, + .num_audio = 5, + .num_stream_encoder = 5, + .num_pll = 8, /* why 8? 6 combo PHY PLL + 2 regular PLLs? */ + .num_ddc = 5, +}; + +static const struct dc_plane_cap plane_cap = { + .type = DC_PLANE_TYPE_DCE_RGB, + + .pixel_format_support = { + .argb8888 = true, + .nv12 = false, + .fp16 = true + }, + + .max_upscale_factor = { + .argb8888 = 16000, + .nv12 = 1, + .fp16 = 1 + }, + + .max_downscale_factor = { + .argb8888 = 250, + .nv12 = 1, + .fp16 = 1 + }, + 64, + 64 +}; + +static const struct dc_debug_options debug_defaults = { + .enable_legacy_fast_update = true, +}; + +#define CTX ctx +#define REG(reg) mm ## reg + +#ifndef mmCC_DC_HDMI_STRAPS +#define mmCC_DC_HDMI_STRAPS 0x4819 +#define CC_DC_HDMI_STRAPS__HDMI_DISABLE_MASK 0x40 +#define CC_DC_HDMI_STRAPS__HDMI_DISABLE__SHIFT 0x6 +#define CC_DC_HDMI_STRAPS__AUDIO_STREAM_NUMBER_MASK 0x700 +#define CC_DC_HDMI_STRAPS__AUDIO_STREAM_NUMBER__SHIFT 0x8 +#endif + +static int map_transmitter_id_to_phy_instance( + enum transmitter transmitter) +{ + switch (transmitter) { + case TRANSMITTER_UNIPHY_A: + return 0; + case TRANSMITTER_UNIPHY_B: + return 1; + case TRANSMITTER_UNIPHY_C: + return 2; + case TRANSMITTER_UNIPHY_D: + return 3; + case TRANSMITTER_UNIPHY_E: + return 4; + case TRANSMITTER_UNIPHY_F: + return 5; + case TRANSMITTER_UNIPHY_G: + return 6; + default: + ASSERT(0); + return 0; + } +} + +static void read_dce_straps( + struct dc_context *ctx, + struct resource_straps *straps) +{ + REG_GET_2(CC_DC_HDMI_STRAPS, + HDMI_DISABLE, &straps->hdmi_disable, + AUDIO_STREAM_NUMBER, &straps->audio_stream_number); + + REG_GET(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO, &straps->dc_pinstraps_audio); +} + +static struct audio *create_audio( + struct dc_context *ctx, unsigned int inst) +{ + return dce_audio_create(ctx, inst, + &audio_regs[inst], &audio_shift, &audio_mask); +} + + +static struct timing_generator *dce112_timing_generator_create( + struct dc_context *ctx, + uint32_t instance, + const struct dce110_timing_generator_offsets *offsets) +{ + struct dce110_timing_generator *tg110 = + kzalloc(sizeof(struct dce110_timing_generator), GFP_KERNEL); + + if (!tg110) + return NULL; + + dce110_timing_generator_construct(tg110, ctx, instance, offsets); + return &tg110->base; +} + +static struct stream_encoder *dce112_stream_encoder_create( + enum engine_id eng_id, + struct dc_context *ctx) +{ + struct dce110_stream_encoder *enc110 = + kzalloc(sizeof(struct dce110_stream_encoder), GFP_KERNEL); + + if (!enc110) + return NULL; + + dce110_stream_encoder_construct(enc110, ctx, ctx->dc_bios, eng_id, + &stream_enc_regs[eng_id], + &se_shift, &se_mask); + return &enc110->base; +} + +#define SRII(reg_name, block, id)\ + .reg_name[id] = mm ## block ## id ## _ ## reg_name + +static const struct dce_hwseq_registers hwseq_reg = { + HWSEQ_DCE112_REG_LIST() +}; + +static const struct dce_hwseq_shift hwseq_shift = { + HWSEQ_DCE112_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_hwseq_mask hwseq_mask = { + HWSEQ_DCE112_MASK_SH_LIST(_MASK) +}; + +static struct dce_hwseq *dce112_hwseq_create( + struct dc_context *ctx) +{ + struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL); + + if (hws) { + hws->ctx = ctx; + hws->regs = &hwseq_reg; + hws->shifts = &hwseq_shift; + hws->masks = &hwseq_mask; + } + return hws; +} + +static const struct resource_create_funcs res_create_funcs = { + .read_dce_straps = read_dce_straps, + .create_audio = create_audio, + .create_stream_encoder = dce112_stream_encoder_create, + .create_hwseq = dce112_hwseq_create, +}; + +#define mi_inst_regs(id) { MI_DCE11_2_REG_LIST(id) } +static const struct dce_mem_input_registers mi_regs[] = { + mi_inst_regs(0), + mi_inst_regs(1), + mi_inst_regs(2), + mi_inst_regs(3), + mi_inst_regs(4), + mi_inst_regs(5), +}; + +static const struct dce_mem_input_shift mi_shifts = { + MI_DCE11_2_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_mem_input_mask mi_masks = { + MI_DCE11_2_MASK_SH_LIST(_MASK) +}; + +static struct mem_input *dce112_mem_input_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dce_mem_input *dce_mi = kzalloc(sizeof(struct dce_mem_input), + GFP_KERNEL); + + if (!dce_mi) { + BREAK_TO_DEBUGGER(); + return NULL; + } + + dce112_mem_input_construct(dce_mi, ctx, inst, &mi_regs[inst], &mi_shifts, &mi_masks); + return &dce_mi->base; +} + +static void dce112_transform_destroy(struct transform **xfm) +{ + kfree(TO_DCE_TRANSFORM(*xfm)); + *xfm = NULL; +} + +static struct transform *dce112_transform_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dce_transform *transform = + kzalloc(sizeof(struct dce_transform), GFP_KERNEL); + + if (!transform) + return NULL; + + dce_transform_construct(transform, ctx, inst, + &xfm_regs[inst], &xfm_shift, &xfm_mask); + transform->lb_memory_size = 0x1404; /*5124*/ + return &transform->base; +} + +static const struct encoder_feature_support link_enc_feature = { + .max_hdmi_deep_color = COLOR_DEPTH_121212, + .max_hdmi_pixel_clock = 600000, + .hdmi_ycbcr420_supported = true, + .dp_ycbcr420_supported = false, + .flags.bits.IS_HBR2_CAPABLE = true, + .flags.bits.IS_HBR3_CAPABLE = true, + .flags.bits.IS_TPS3_CAPABLE = true, + .flags.bits.IS_TPS4_CAPABLE = true +}; + +static struct link_encoder *dce112_link_encoder_create( + struct dc_context *ctx, + const struct encoder_init_data *enc_init_data) +{ + struct dce110_link_encoder *enc110 = + kzalloc(sizeof(struct dce110_link_encoder), GFP_KERNEL); + int link_regs_id; + + if (!enc110) + return NULL; + + link_regs_id = + map_transmitter_id_to_phy_instance(enc_init_data->transmitter); + + dce110_link_encoder_construct(enc110, + enc_init_data, + &link_enc_feature, + &link_enc_regs[link_regs_id], + &link_enc_aux_regs[enc_init_data->channel - 1], + &link_enc_hpd_regs[enc_init_data->hpd_source]); + return &enc110->base; +} + +static struct panel_cntl *dce112_panel_cntl_create(const struct panel_cntl_init_data *init_data) +{ + struct dce_panel_cntl *panel_cntl = + kzalloc(sizeof(struct dce_panel_cntl), GFP_KERNEL); + + if (!panel_cntl) + return NULL; + + dce_panel_cntl_construct(panel_cntl, + init_data, + &panel_cntl_regs[init_data->inst], + &panel_cntl_shift, + &panel_cntl_mask); + + return &panel_cntl->base; +} + +static struct input_pixel_processor *dce112_ipp_create( + struct dc_context *ctx, uint32_t inst) +{ + struct dce_ipp *ipp = kzalloc(sizeof(struct dce_ipp), GFP_KERNEL); + + if (!ipp) { + BREAK_TO_DEBUGGER(); + return NULL; + } + + dce_ipp_construct(ipp, ctx, inst, + &ipp_regs[inst], &ipp_shift, &ipp_mask); + return &ipp->base; +} + +static struct output_pixel_processor *dce112_opp_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dce110_opp *opp = + kzalloc(sizeof(struct dce110_opp), GFP_KERNEL); + + if (!opp) + return NULL; + + dce110_opp_construct(opp, + ctx, inst, &opp_regs[inst], &opp_shift, &opp_mask); + return &opp->base; +} + +static struct dce_aux *dce112_aux_engine_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct aux_engine_dce110 *aux_engine = + kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL); + + if (!aux_engine) + return NULL; + + dce110_aux_engine_construct(aux_engine, ctx, inst, + SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, + &aux_engine_regs[inst], + &aux_mask, + &aux_shift, + ctx->dc->caps.extended_aux_timeout_support); + + return &aux_engine->base; +} +#define i2c_inst_regs(id) { I2C_HW_ENGINE_COMMON_REG_LIST(id) } + +static const struct dce_i2c_registers i2c_hw_regs[] = { + i2c_inst_regs(1), + i2c_inst_regs(2), + i2c_inst_regs(3), + i2c_inst_regs(4), + i2c_inst_regs(5), + i2c_inst_regs(6), +}; + +static const struct dce_i2c_shift i2c_shifts = { + I2C_COMMON_MASK_SH_LIST_DCE110(__SHIFT) +}; + +static const struct dce_i2c_mask i2c_masks = { + I2C_COMMON_MASK_SH_LIST_DCE110(_MASK) +}; + +static struct dce_i2c_hw *dce112_i2c_hw_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dce_i2c_hw *dce_i2c_hw = + kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL); + + if (!dce_i2c_hw) + return NULL; + + dce112_i2c_hw_construct(dce_i2c_hw, ctx, inst, + &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks); + + return dce_i2c_hw; +} +static struct clock_source *dce112_clock_source_create( + struct dc_context *ctx, + struct dc_bios *bios, + enum clock_source_id id, + const struct dce110_clk_src_regs *regs, + bool dp_clk_src) +{ + struct dce110_clk_src *clk_src = + kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL); + + if (!clk_src) + return NULL; + + if (dce112_clk_src_construct(clk_src, ctx, bios, id, + regs, &cs_shift, &cs_mask)) { + clk_src->base.dp_clk_src = dp_clk_src; + return &clk_src->base; + } + + kfree(clk_src); + BREAK_TO_DEBUGGER(); + return NULL; +} + +static void dce112_clock_source_destroy(struct clock_source **clk_src) +{ + kfree(TO_DCE110_CLK_SRC(*clk_src)); + *clk_src = NULL; +} + +static void dce112_resource_destruct(struct dce110_resource_pool *pool) +{ + unsigned int i; + + for (i = 0; i < pool->base.pipe_count; i++) { + if (pool->base.opps[i] != NULL) + dce110_opp_destroy(&pool->base.opps[i]); + + if (pool->base.transforms[i] != NULL) + dce112_transform_destroy(&pool->base.transforms[i]); + + if (pool->base.ipps[i] != NULL) + dce_ipp_destroy(&pool->base.ipps[i]); + + if (pool->base.mis[i] != NULL) { + kfree(TO_DCE_MEM_INPUT(pool->base.mis[i])); + pool->base.mis[i] = NULL; + } + + if (pool->base.timing_generators[i] != NULL) { + kfree(DCE110TG_FROM_TG(pool->base.timing_generators[i])); + pool->base.timing_generators[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_ddc; i++) { + if (pool->base.engines[i] != NULL) + dce110_engine_destroy(&pool->base.engines[i]); + if (pool->base.hw_i2cs[i] != NULL) { + kfree(pool->base.hw_i2cs[i]); + pool->base.hw_i2cs[i] = NULL; + } + if (pool->base.sw_i2cs[i] != NULL) { + kfree(pool->base.sw_i2cs[i]); + pool->base.sw_i2cs[i] = NULL; + } + } + + for (i = 0; i < pool->base.stream_enc_count; i++) { + if (pool->base.stream_enc[i] != NULL) + kfree(DCE110STRENC_FROM_STRENC(pool->base.stream_enc[i])); + } + + for (i = 0; i < pool->base.clk_src_count; i++) { + if (pool->base.clock_sources[i] != NULL) { + dce112_clock_source_destroy(&pool->base.clock_sources[i]); + } + } + + if (pool->base.dp_clock_source != NULL) + dce112_clock_source_destroy(&pool->base.dp_clock_source); + + for (i = 0; i < pool->base.audio_count; i++) { + if (pool->base.audios[i] != NULL) { + dce_aud_destroy(&pool->base.audios[i]); + } + } + + if (pool->base.abm != NULL) + dce_abm_destroy(&pool->base.abm); + + if (pool->base.dmcu != NULL) + dce_dmcu_destroy(&pool->base.dmcu); + + if (pool->base.irqs != NULL) { + dal_irq_service_destroy(&pool->base.irqs); + } +} + +static struct clock_source *find_matching_pll( + struct resource_context *res_ctx, + const struct resource_pool *pool, + const struct dc_stream_state *const stream) +{ + switch (stream->link->link_enc->transmitter) { + case TRANSMITTER_UNIPHY_A: + return pool->clock_sources[DCE112_CLK_SRC_PLL0]; + case TRANSMITTER_UNIPHY_B: + return pool->clock_sources[DCE112_CLK_SRC_PLL1]; + case TRANSMITTER_UNIPHY_C: + return pool->clock_sources[DCE112_CLK_SRC_PLL2]; + case TRANSMITTER_UNIPHY_D: + return pool->clock_sources[DCE112_CLK_SRC_PLL3]; + case TRANSMITTER_UNIPHY_E: + return pool->clock_sources[DCE112_CLK_SRC_PLL4]; + case TRANSMITTER_UNIPHY_F: + return pool->clock_sources[DCE112_CLK_SRC_PLL5]; + default: + return NULL; + } + + return NULL; +} + +static enum dc_status build_mapped_resource( + const struct dc *dc, + struct dc_state *context, + struct dc_stream_state *stream) +{ + struct pipe_ctx *pipe_ctx = resource_get_otg_master_for_stream(&context->res_ctx, stream); + + if (!pipe_ctx) + return DC_ERROR_UNEXPECTED; + + dce110_resource_build_pipe_hw_param(pipe_ctx); + + resource_build_info_frame(pipe_ctx); + + return DC_OK; +} + +bool dce112_validate_bandwidth( + struct dc *dc, + struct dc_state *context, + bool fast_validate) +{ + bool result = false; + + DC_LOG_BANDWIDTH_CALCS( + "%s: start", + __func__); + + if (bw_calcs( + dc->ctx, + dc->bw_dceip, + dc->bw_vbios, + context->res_ctx.pipe_ctx, + dc->res_pool->pipe_count, + &context->bw_ctx.bw.dce)) + result = true; + + if (!result) + DC_LOG_BANDWIDTH_VALIDATION( + "%s: Bandwidth validation failed!", + __func__); + + if (memcmp(&dc->current_state->bw_ctx.bw.dce, + &context->bw_ctx.bw.dce, sizeof(context->bw_ctx.bw.dce))) { + + DC_LOG_BANDWIDTH_CALCS( + "%s: finish,\n" + "nbpMark_b: %d nbpMark_a: %d urgentMark_b: %d urgentMark_a: %d\n" + "stutMark_b: %d stutMark_a: %d\n" + "nbpMark_b: %d nbpMark_a: %d urgentMark_b: %d urgentMark_a: %d\n" + "stutMark_b: %d stutMark_a: %d\n" + "nbpMark_b: %d nbpMark_a: %d urgentMark_b: %d urgentMark_a: %d\n" + "stutMark_b: %d stutMark_a: %d stutter_mode_enable: %d\n" + "cstate: %d pstate: %d nbpstate: %d sync: %d dispclk: %d\n" + "sclk: %d sclk_sleep: %d yclk: %d blackout_recovery_time_us: %d\n" + , + __func__, + context->bw_ctx.bw.dce.nbp_state_change_wm_ns[0].b_mark, + context->bw_ctx.bw.dce.nbp_state_change_wm_ns[0].a_mark, + context->bw_ctx.bw.dce.urgent_wm_ns[0].b_mark, + context->bw_ctx.bw.dce.urgent_wm_ns[0].a_mark, + context->bw_ctx.bw.dce.stutter_exit_wm_ns[0].b_mark, + context->bw_ctx.bw.dce.stutter_exit_wm_ns[0].a_mark, + context->bw_ctx.bw.dce.nbp_state_change_wm_ns[1].b_mark, + context->bw_ctx.bw.dce.nbp_state_change_wm_ns[1].a_mark, + context->bw_ctx.bw.dce.urgent_wm_ns[1].b_mark, + context->bw_ctx.bw.dce.urgent_wm_ns[1].a_mark, + context->bw_ctx.bw.dce.stutter_exit_wm_ns[1].b_mark, + context->bw_ctx.bw.dce.stutter_exit_wm_ns[1].a_mark, + context->bw_ctx.bw.dce.nbp_state_change_wm_ns[2].b_mark, + context->bw_ctx.bw.dce.nbp_state_change_wm_ns[2].a_mark, + context->bw_ctx.bw.dce.urgent_wm_ns[2].b_mark, + context->bw_ctx.bw.dce.urgent_wm_ns[2].a_mark, + context->bw_ctx.bw.dce.stutter_exit_wm_ns[2].b_mark, + context->bw_ctx.bw.dce.stutter_exit_wm_ns[2].a_mark, + context->bw_ctx.bw.dce.stutter_mode_enable, + context->bw_ctx.bw.dce.cpuc_state_change_enable, + context->bw_ctx.bw.dce.cpup_state_change_enable, + context->bw_ctx.bw.dce.nbp_state_change_enable, + context->bw_ctx.bw.dce.all_displays_in_sync, + context->bw_ctx.bw.dce.dispclk_khz, + context->bw_ctx.bw.dce.sclk_khz, + context->bw_ctx.bw.dce.sclk_deep_sleep_khz, + context->bw_ctx.bw.dce.yclk_khz, + context->bw_ctx.bw.dce.blackout_recovery_time_us); + } + return result; +} + +enum dc_status resource_map_phy_clock_resources( + const struct dc *dc, + struct dc_state *context, + struct dc_stream_state *stream) +{ + + /* acquire new resources */ + struct pipe_ctx *pipe_ctx = resource_get_otg_master_for_stream( + &context->res_ctx, stream); + + if (!pipe_ctx) + return DC_ERROR_UNEXPECTED; + + if (dc_is_dp_signal(pipe_ctx->stream->signal) + || dc_is_virtual_signal(pipe_ctx->stream->signal)) + pipe_ctx->clock_source = + dc->res_pool->dp_clock_source; + else { + if (stream && stream->link && stream->link->link_enc) + pipe_ctx->clock_source = find_matching_pll( + &context->res_ctx, dc->res_pool, + stream); + } + + if (pipe_ctx->clock_source == NULL) + return DC_NO_CLOCK_SOURCE_RESOURCE; + + resource_reference_clock_source( + &context->res_ctx, + dc->res_pool, + pipe_ctx->clock_source); + + return DC_OK; +} + +static bool dce112_validate_surface_sets( + struct dc_state *context) +{ + int i; + + for (i = 0; i < context->stream_count; i++) { + if (context->stream_status[i].plane_count == 0) + continue; + + if (context->stream_status[i].plane_count > 1) + return false; + + if (context->stream_status[i].plane_states[0]->format + >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) + return false; + } + + return true; +} + +enum dc_status dce112_add_stream_to_ctx( + struct dc *dc, + struct dc_state *new_ctx, + struct dc_stream_state *dc_stream) +{ + enum dc_status result; + + result = resource_map_pool_resources(dc, new_ctx, dc_stream); + + if (result == DC_OK) + result = resource_map_phy_clock_resources(dc, new_ctx, dc_stream); + + + if (result == DC_OK) + result = build_mapped_resource(dc, new_ctx, dc_stream); + + return result; +} + +static enum dc_status dce112_validate_global( + struct dc *dc, + struct dc_state *context) +{ + if (!dce112_validate_surface_sets(context)) + return DC_FAIL_SURFACE_VALIDATE; + + return DC_OK; +} + +static void dce112_destroy_resource_pool(struct resource_pool **pool) +{ + struct dce110_resource_pool *dce110_pool = TO_DCE110_RES_POOL(*pool); + + dce112_resource_destruct(dce110_pool); + kfree(dce110_pool); + *pool = NULL; +} + +static const struct resource_funcs dce112_res_pool_funcs = { + .destroy = dce112_destroy_resource_pool, + .link_enc_create = dce112_link_encoder_create, + .panel_cntl_create = dce112_panel_cntl_create, + .validate_bandwidth = dce112_validate_bandwidth, + .validate_plane = dce100_validate_plane, + .add_stream_to_ctx = dce112_add_stream_to_ctx, + .validate_global = dce112_validate_global, + .find_first_free_match_stream_enc_for_link = dce110_find_first_free_match_stream_enc_for_link +}; + +static void bw_calcs_data_update_from_pplib(struct dc *dc) +{ + struct dm_pp_clock_levels_with_latency eng_clks = {0}; + struct dm_pp_clock_levels_with_latency mem_clks = {0}; + struct dm_pp_wm_sets_with_clock_ranges clk_ranges = {0}; + struct dm_pp_clock_levels clks = {0}; + int memory_type_multiplier = MEMORY_TYPE_MULTIPLIER_CZ; + + if (dc->bw_vbios && dc->bw_vbios->memory_type == bw_def_hbm) + memory_type_multiplier = MEMORY_TYPE_HBM; + + /*do system clock TODO PPLIB: after PPLIB implement, + * then remove old way + */ + if (!dm_pp_get_clock_levels_by_type_with_latency( + dc->ctx, + DM_PP_CLOCK_TYPE_ENGINE_CLK, + &eng_clks)) { + + /* This is only for temporary */ + dm_pp_get_clock_levels_by_type( + dc->ctx, + DM_PP_CLOCK_TYPE_ENGINE_CLK, + &clks); + /* convert all the clock fro kHz to fix point mHz */ + dc->bw_vbios->high_sclk = bw_frc_to_fixed( + clks.clocks_in_khz[clks.num_levels-1], 1000); + dc->bw_vbios->mid1_sclk = bw_frc_to_fixed( + clks.clocks_in_khz[clks.num_levels/8], 1000); + dc->bw_vbios->mid2_sclk = bw_frc_to_fixed( + clks.clocks_in_khz[clks.num_levels*2/8], 1000); + dc->bw_vbios->mid3_sclk = bw_frc_to_fixed( + clks.clocks_in_khz[clks.num_levels*3/8], 1000); + dc->bw_vbios->mid4_sclk = bw_frc_to_fixed( + clks.clocks_in_khz[clks.num_levels*4/8], 1000); + dc->bw_vbios->mid5_sclk = bw_frc_to_fixed( + clks.clocks_in_khz[clks.num_levels*5/8], 1000); + dc->bw_vbios->mid6_sclk = bw_frc_to_fixed( + clks.clocks_in_khz[clks.num_levels*6/8], 1000); + dc->bw_vbios->low_sclk = bw_frc_to_fixed( + clks.clocks_in_khz[0], 1000); + + /*do memory clock*/ + dm_pp_get_clock_levels_by_type( + dc->ctx, + DM_PP_CLOCK_TYPE_MEMORY_CLK, + &clks); + + dc->bw_vbios->low_yclk = bw_frc_to_fixed( + clks.clocks_in_khz[0] * memory_type_multiplier, 1000); + dc->bw_vbios->mid_yclk = bw_frc_to_fixed( + clks.clocks_in_khz[clks.num_levels>>1] * memory_type_multiplier, + 1000); + dc->bw_vbios->high_yclk = bw_frc_to_fixed( + clks.clocks_in_khz[clks.num_levels-1] * memory_type_multiplier, + 1000); + + return; + } + + /* convert all the clock fro kHz to fix point mHz TODO: wloop data */ + dc->bw_vbios->high_sclk = bw_frc_to_fixed( + eng_clks.data[eng_clks.num_levels-1].clocks_in_khz, 1000); + dc->bw_vbios->mid1_sclk = bw_frc_to_fixed( + eng_clks.data[eng_clks.num_levels/8].clocks_in_khz, 1000); + dc->bw_vbios->mid2_sclk = bw_frc_to_fixed( + eng_clks.data[eng_clks.num_levels*2/8].clocks_in_khz, 1000); + dc->bw_vbios->mid3_sclk = bw_frc_to_fixed( + eng_clks.data[eng_clks.num_levels*3/8].clocks_in_khz, 1000); + dc->bw_vbios->mid4_sclk = bw_frc_to_fixed( + eng_clks.data[eng_clks.num_levels*4/8].clocks_in_khz, 1000); + dc->bw_vbios->mid5_sclk = bw_frc_to_fixed( + eng_clks.data[eng_clks.num_levels*5/8].clocks_in_khz, 1000); + dc->bw_vbios->mid6_sclk = bw_frc_to_fixed( + eng_clks.data[eng_clks.num_levels*6/8].clocks_in_khz, 1000); + dc->bw_vbios->low_sclk = bw_frc_to_fixed( + eng_clks.data[0].clocks_in_khz, 1000); + + /*do memory clock*/ + dm_pp_get_clock_levels_by_type_with_latency( + dc->ctx, + DM_PP_CLOCK_TYPE_MEMORY_CLK, + &mem_clks); + + /* we don't need to call PPLIB for validation clock since they + * also give us the highest sclk and highest mclk (UMA clock). + * ALSO always convert UMA clock (from PPLIB) to YCLK (HW formula): + * YCLK = UMACLK*m_memoryTypeMultiplier + */ + dc->bw_vbios->low_yclk = bw_frc_to_fixed( + mem_clks.data[0].clocks_in_khz * memory_type_multiplier, 1000); + dc->bw_vbios->mid_yclk = bw_frc_to_fixed( + mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz * memory_type_multiplier, + 1000); + dc->bw_vbios->high_yclk = bw_frc_to_fixed( + mem_clks.data[mem_clks.num_levels-1].clocks_in_khz * memory_type_multiplier, + 1000); + + /* Now notify PPLib/SMU about which Watermarks sets they should select + * depending on DPM state they are in. And update BW MGR GFX Engine and + * Memory clock member variables for Watermarks calculations for each + * Watermark Set + */ + clk_ranges.num_wm_sets = 4; + clk_ranges.wm_clk_ranges[0].wm_set_id = WM_SET_A; + clk_ranges.wm_clk_ranges[0].wm_min_eng_clk_in_khz = + eng_clks.data[0].clocks_in_khz; + clk_ranges.wm_clk_ranges[0].wm_max_eng_clk_in_khz = + eng_clks.data[eng_clks.num_levels*3/8].clocks_in_khz - 1; + clk_ranges.wm_clk_ranges[0].wm_min_mem_clk_in_khz = + mem_clks.data[0].clocks_in_khz; + clk_ranges.wm_clk_ranges[0].wm_max_mem_clk_in_khz = + mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz - 1; + + clk_ranges.wm_clk_ranges[1].wm_set_id = WM_SET_B; + clk_ranges.wm_clk_ranges[1].wm_min_eng_clk_in_khz = + eng_clks.data[eng_clks.num_levels*3/8].clocks_in_khz; + /* 5 GHz instead of data[7].clockInKHz to cover Overdrive */ + clk_ranges.wm_clk_ranges[1].wm_max_eng_clk_in_khz = 5000000; + clk_ranges.wm_clk_ranges[1].wm_min_mem_clk_in_khz = + mem_clks.data[0].clocks_in_khz; + clk_ranges.wm_clk_ranges[1].wm_max_mem_clk_in_khz = + mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz - 1; + + clk_ranges.wm_clk_ranges[2].wm_set_id = WM_SET_C; + clk_ranges.wm_clk_ranges[2].wm_min_eng_clk_in_khz = + eng_clks.data[0].clocks_in_khz; + clk_ranges.wm_clk_ranges[2].wm_max_eng_clk_in_khz = + eng_clks.data[eng_clks.num_levels*3/8].clocks_in_khz - 1; + clk_ranges.wm_clk_ranges[2].wm_min_mem_clk_in_khz = + mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz; + /* 5 GHz instead of data[2].clockInKHz to cover Overdrive */ + clk_ranges.wm_clk_ranges[2].wm_max_mem_clk_in_khz = 5000000; + + clk_ranges.wm_clk_ranges[3].wm_set_id = WM_SET_D; + clk_ranges.wm_clk_ranges[3].wm_min_eng_clk_in_khz = + eng_clks.data[eng_clks.num_levels*3/8].clocks_in_khz; + /* 5 GHz instead of data[7].clockInKHz to cover Overdrive */ + clk_ranges.wm_clk_ranges[3].wm_max_eng_clk_in_khz = 5000000; + clk_ranges.wm_clk_ranges[3].wm_min_mem_clk_in_khz = + mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz; + /* 5 GHz instead of data[2].clockInKHz to cover Overdrive */ + clk_ranges.wm_clk_ranges[3].wm_max_mem_clk_in_khz = 5000000; + + /* Notify PP Lib/SMU which Watermarks to use for which clock ranges */ + dm_pp_notify_wm_clock_changes(dc->ctx, &clk_ranges); +} + +static const struct resource_caps *dce112_resource_cap( + struct hw_asic_id *asic_id) +{ + if (ASIC_REV_IS_POLARIS11_M(asic_id->hw_internal_rev) || + ASIC_REV_IS_POLARIS12_V(asic_id->hw_internal_rev)) + return &polaris_11_resource_cap; + else + return &polaris_10_resource_cap; +} + +static bool dce112_resource_construct( + uint8_t num_virtual_links, + struct dc *dc, + struct dce110_resource_pool *pool) +{ + unsigned int i; + struct dc_context *ctx = dc->ctx; + + ctx->dc_bios->regs = &bios_regs; + + pool->base.res_cap = dce112_resource_cap(&ctx->asic_id); + pool->base.funcs = &dce112_res_pool_funcs; + + /************************************************* + * Resource + asic cap harcoding * + *************************************************/ + pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE; + pool->base.pipe_count = pool->base.res_cap->num_timing_generator; + pool->base.timing_generator_count = pool->base.res_cap->num_timing_generator; + dc->caps.max_downscale_ratio = 200; + dc->caps.i2c_speed_in_khz = 100; + dc->caps.i2c_speed_in_khz_hdcp = 100; /*1.4 w/a not applied by default*/ + dc->caps.max_cursor_size = 128; + dc->caps.min_horizontal_blanking_period = 80; + dc->caps.dual_link_dvi = true; + dc->caps.extended_aux_timeout_support = false; + dc->debug = debug_defaults; + + /************************************************* + * Create resources * + *************************************************/ + + pool->base.clock_sources[DCE112_CLK_SRC_PLL0] = + dce112_clock_source_create( + ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL0, + &clk_src_regs[0], false); + pool->base.clock_sources[DCE112_CLK_SRC_PLL1] = + dce112_clock_source_create( + ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL1, + &clk_src_regs[1], false); + pool->base.clock_sources[DCE112_CLK_SRC_PLL2] = + dce112_clock_source_create( + ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL2, + &clk_src_regs[2], false); + pool->base.clock_sources[DCE112_CLK_SRC_PLL3] = + dce112_clock_source_create( + ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL3, + &clk_src_regs[3], false); + pool->base.clock_sources[DCE112_CLK_SRC_PLL4] = + dce112_clock_source_create( + ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL4, + &clk_src_regs[4], false); + pool->base.clock_sources[DCE112_CLK_SRC_PLL5] = + dce112_clock_source_create( + ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL5, + &clk_src_regs[5], false); + pool->base.clk_src_count = DCE112_CLK_SRC_TOTAL; + + pool->base.dp_clock_source = dce112_clock_source_create( + ctx, ctx->dc_bios, + CLOCK_SOURCE_ID_DP_DTO, &clk_src_regs[0], true); + + + for (i = 0; i < pool->base.clk_src_count; i++) { + if (pool->base.clock_sources[i] == NULL) { + dm_error("DC: failed to create clock sources!\n"); + BREAK_TO_DEBUGGER(); + goto res_create_fail; + } + } + + pool->base.dmcu = dce_dmcu_create(ctx, + &dmcu_regs, + &dmcu_shift, + &dmcu_mask); + if (pool->base.dmcu == NULL) { + dm_error("DC: failed to create dmcu!\n"); + BREAK_TO_DEBUGGER(); + goto res_create_fail; + } + + pool->base.abm = dce_abm_create(ctx, + &abm_regs, + &abm_shift, + &abm_mask); + if (pool->base.abm == NULL) { + dm_error("DC: failed to create abm!\n"); + BREAK_TO_DEBUGGER(); + goto res_create_fail; + } + + { + struct irq_service_init_data init_data; + init_data.ctx = dc->ctx; + pool->base.irqs = dal_irq_service_dce110_create(&init_data); + if (!pool->base.irqs) + goto res_create_fail; + } + + for (i = 0; i < pool->base.pipe_count; i++) { + pool->base.timing_generators[i] = + dce112_timing_generator_create( + ctx, + i, + &dce112_tg_offsets[i]); + if (pool->base.timing_generators[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create tg!\n"); + goto res_create_fail; + } + + pool->base.mis[i] = dce112_mem_input_create(ctx, i); + if (pool->base.mis[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create memory input!\n"); + goto res_create_fail; + } + + pool->base.ipps[i] = dce112_ipp_create(ctx, i); + if (pool->base.ipps[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC:failed to create input pixel processor!\n"); + goto res_create_fail; + } + + pool->base.transforms[i] = dce112_transform_create(ctx, i); + if (pool->base.transforms[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create transform!\n"); + goto res_create_fail; + } + + pool->base.opps[i] = dce112_opp_create( + ctx, + i); + if (pool->base.opps[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC:failed to create output pixel processor!\n"); + goto res_create_fail; + } + } + + for (i = 0; i < pool->base.res_cap->num_ddc; i++) { + pool->base.engines[i] = dce112_aux_engine_create(ctx, i); + if (pool->base.engines[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC:failed to create aux engine!!\n"); + goto res_create_fail; + } + pool->base.hw_i2cs[i] = dce112_i2c_hw_create(ctx, i); + if (pool->base.hw_i2cs[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC:failed to create i2c engine!!\n"); + goto res_create_fail; + } + pool->base.sw_i2cs[i] = NULL; + } + + if (!resource_construct(num_virtual_links, dc, &pool->base, + &res_create_funcs)) + goto res_create_fail; + + dc->caps.max_planes = pool->base.pipe_count; + + for (i = 0; i < dc->caps.max_planes; ++i) + dc->caps.planes[i] = plane_cap; + + /* Create hardware sequencer */ + dce112_hw_sequencer_construct(dc); + + bw_calcs_init(dc->bw_dceip, dc->bw_vbios, dc->ctx->asic_id); + + bw_calcs_data_update_from_pplib(dc); + + return true; + +res_create_fail: + dce112_resource_destruct(pool); + return false; +} + +struct resource_pool *dce112_create_resource_pool( + uint8_t num_virtual_links, + struct dc *dc) +{ + struct dce110_resource_pool *pool = + kzalloc(sizeof(struct dce110_resource_pool), GFP_KERNEL); + + if (!pool) + return NULL; + + if (dce112_resource_construct(num_virtual_links, dc, pool)) + return &pool->base; + + kfree(pool); + BREAK_TO_DEBUGGER(); + return NULL; +} diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.h new file mode 100644 index 000000000000..1f57ebc6f9b4 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.h @@ -0,0 +1,57 @@ +/* +* Copyright 2012-15 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DC_RESOURCE_DCE112_H__ +#define __DC_RESOURCE_DCE112_H__ + +#include "core_types.h" + +struct dc; +struct resource_pool; + +struct resource_pool *dce112_create_resource_pool( + uint8_t num_virtual_links, + struct dc *dc); + +enum dc_status dce112_validate_with_context( + struct dc *dc, + const struct dc_validation_set set[], + int set_count, + struct dc_state *context, + struct dc_state *old_context); + +bool dce112_validate_bandwidth( + struct dc *dc, + struct dc_state *context, + bool fast_validate); + +enum dc_status dce112_add_stream_to_ctx( + struct dc *dc, + struct dc_state *new_ctx, + struct dc_stream_state *dc_stream); + + +#endif /* __DC_RESOURCE_DCE112_H__ */ + diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c new file mode 100644 index 000000000000..20662edd0ae4 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c @@ -0,0 +1,1288 @@ +/* +* Copyright 2012-15 Advanced Micro Devices, Inc.cls +* + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dm_services.h" + + +#include "stream_encoder.h" +#include "resource.h" +#include "include/irq_service_interface.h" +#include "dce120_resource.h" + +#include "dce112/dce112_resource.h" + +#include "dce110/dce110_resource.h" +#include "virtual/virtual_stream_encoder.h" +#include "dce120/dce120_timing_generator.h" +#include "irq/dce120/irq_service_dce120.h" +#include "dce/dce_opp.h" +#include "dce/dce_clock_source.h" +#include "dce/dce_ipp.h" +#include "dce/dce_mem_input.h" +#include "dce/dce_panel_cntl.h" + +#include "dce110/dce110_hwseq.h" +#include "dce120/dce120_hwseq.h" +#include "dce/dce_transform.h" +#include "clk_mgr.h" +#include "dce/dce_audio.h" +#include "dce/dce_link_encoder.h" +#include "dce/dce_stream_encoder.h" +#include "dce/dce_hwseq.h" +#include "dce/dce_abm.h" +#include "dce/dce_dmcu.h" +#include "dce/dce_aux.h" +#include "dce/dce_i2c.h" + +#include "dce/dce_12_0_offset.h" +#include "dce/dce_12_0_sh_mask.h" +#include "soc15_hw_ip.h" +#include "vega10_ip_offset.h" +#include "nbio/nbio_6_1_offset.h" +#include "mmhub/mmhub_1_0_offset.h" +#include "mmhub/mmhub_1_0_sh_mask.h" +#include "reg_helper.h" + +#include "dce100/dce100_resource.h" + +#ifndef mmDP0_DP_DPHY_INTERNAL_CTRL + #define mmDP0_DP_DPHY_INTERNAL_CTRL 0x210f + #define mmDP0_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2 + #define mmDP1_DP_DPHY_INTERNAL_CTRL 0x220f + #define mmDP1_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2 + #define mmDP2_DP_DPHY_INTERNAL_CTRL 0x230f + #define mmDP2_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2 + #define mmDP3_DP_DPHY_INTERNAL_CTRL 0x240f + #define mmDP3_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2 + #define mmDP4_DP_DPHY_INTERNAL_CTRL 0x250f + #define mmDP4_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2 + #define mmDP5_DP_DPHY_INTERNAL_CTRL 0x260f + #define mmDP5_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2 + #define mmDP6_DP_DPHY_INTERNAL_CTRL 0x270f + #define mmDP6_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2 +#endif + +enum dce120_clk_src_array_id { + DCE120_CLK_SRC_PLL0, + DCE120_CLK_SRC_PLL1, + DCE120_CLK_SRC_PLL2, + DCE120_CLK_SRC_PLL3, + DCE120_CLK_SRC_PLL4, + DCE120_CLK_SRC_PLL5, + + DCE120_CLK_SRC_TOTAL +}; + +static const struct dce110_timing_generator_offsets dce120_tg_offsets[] = { + { + .crtc = (mmCRTC0_CRTC_CONTROL - mmCRTC0_CRTC_CONTROL), + }, + { + .crtc = (mmCRTC1_CRTC_CONTROL - mmCRTC0_CRTC_CONTROL), + }, + { + .crtc = (mmCRTC2_CRTC_CONTROL - mmCRTC0_CRTC_CONTROL), + }, + { + .crtc = (mmCRTC3_CRTC_CONTROL - mmCRTC0_CRTC_CONTROL), + }, + { + .crtc = (mmCRTC4_CRTC_CONTROL - mmCRTC0_CRTC_CONTROL), + }, + { + .crtc = (mmCRTC5_CRTC_CONTROL - mmCRTC0_CRTC_CONTROL), + } +}; + +/* begin ********************* + * macros to expend register list macro defined in HW object header file */ + +#define BASE_INNER(seg) \ + DCE_BASE__INST0_SEG ## seg + +#define NBIO_BASE_INNER(seg) \ + NBIF_BASE__INST0_SEG ## seg + +#define NBIO_BASE(seg) \ + NBIO_BASE_INNER(seg) + +/* compile time expand base address. */ +#define BASE(seg) \ + BASE_INNER(seg) + +#define SR(reg_name)\ + .reg_name = BASE(mm ## reg_name ## _BASE_IDX) + \ + mm ## reg_name + +#define SRI(reg_name, block, id)\ + .reg_name = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + mm ## block ## id ## _ ## reg_name + +/* MMHUB */ +#define MMHUB_BASE_INNER(seg) \ + MMHUB_BASE__INST0_SEG ## seg + +#define MMHUB_BASE(seg) \ + MMHUB_BASE_INNER(seg) + +#define MMHUB_SR(reg_name)\ + .reg_name = MMHUB_BASE(mm ## reg_name ## _BASE_IDX) + \ + mm ## reg_name + +/* macros to expend register list macro defined in HW object header file + * end *********************/ + + +static const struct dce_dmcu_registers dmcu_regs = { + DMCU_DCE110_COMMON_REG_LIST() +}; + +static const struct dce_dmcu_shift dmcu_shift = { + DMCU_MASK_SH_LIST_DCE110(__SHIFT) +}; + +static const struct dce_dmcu_mask dmcu_mask = { + DMCU_MASK_SH_LIST_DCE110(_MASK) +}; + +static const struct dce_abm_registers abm_regs = { + ABM_DCE110_COMMON_REG_LIST() +}; + +static const struct dce_abm_shift abm_shift = { + ABM_MASK_SH_LIST_DCE110(__SHIFT) +}; + +static const struct dce_abm_mask abm_mask = { + ABM_MASK_SH_LIST_DCE110(_MASK) +}; + +#define ipp_regs(id)\ +[id] = {\ + IPP_DCE110_REG_LIST_DCE_BASE(id)\ +} + +static const struct dce_ipp_registers ipp_regs[] = { + ipp_regs(0), + ipp_regs(1), + ipp_regs(2), + ipp_regs(3), + ipp_regs(4), + ipp_regs(5) +}; + +static const struct dce_ipp_shift ipp_shift = { + IPP_DCE120_MASK_SH_LIST_SOC_BASE(__SHIFT) +}; + +static const struct dce_ipp_mask ipp_mask = { + IPP_DCE120_MASK_SH_LIST_SOC_BASE(_MASK) +}; + +#define transform_regs(id)\ +[id] = {\ + XFM_COMMON_REG_LIST_DCE110(id)\ +} + +static const struct dce_transform_registers xfm_regs[] = { + transform_regs(0), + transform_regs(1), + transform_regs(2), + transform_regs(3), + transform_regs(4), + transform_regs(5) +}; + +static const struct dce_transform_shift xfm_shift = { + XFM_COMMON_MASK_SH_LIST_SOC_BASE(__SHIFT) +}; + +static const struct dce_transform_mask xfm_mask = { + XFM_COMMON_MASK_SH_LIST_SOC_BASE(_MASK) +}; + +#define aux_regs(id)\ +[id] = {\ + AUX_REG_LIST(id)\ +} + +static const struct dce110_link_enc_aux_registers link_enc_aux_regs[] = { + aux_regs(0), + aux_regs(1), + aux_regs(2), + aux_regs(3), + aux_regs(4), + aux_regs(5) +}; + +#define hpd_regs(id)\ +[id] = {\ + HPD_REG_LIST(id)\ +} + +static const struct dce110_link_enc_hpd_registers link_enc_hpd_regs[] = { + hpd_regs(0), + hpd_regs(1), + hpd_regs(2), + hpd_regs(3), + hpd_regs(4), + hpd_regs(5) +}; + +#define link_regs(id)\ +[id] = {\ + LE_DCE120_REG_LIST(id), \ + SRI(DP_DPHY_INTERNAL_CTRL, DP, id) \ +} + +static const struct dce110_link_enc_registers link_enc_regs[] = { + link_regs(0), + link_regs(1), + link_regs(2), + link_regs(3), + link_regs(4), + link_regs(5), + link_regs(6), +}; + + +#define stream_enc_regs(id)\ +[id] = {\ + SE_COMMON_REG_LIST(id),\ + .TMDS_CNTL = 0,\ +} + +static const struct dce110_stream_enc_registers stream_enc_regs[] = { + stream_enc_regs(0), + stream_enc_regs(1), + stream_enc_regs(2), + stream_enc_regs(3), + stream_enc_regs(4), + stream_enc_regs(5) +}; + +static const struct dce_stream_encoder_shift se_shift = { + SE_COMMON_MASK_SH_LIST_DCE120(__SHIFT) +}; + +static const struct dce_stream_encoder_mask se_mask = { + SE_COMMON_MASK_SH_LIST_DCE120(_MASK) +}; + +static const struct dce_panel_cntl_registers panel_cntl_regs[] = { + { DCE_PANEL_CNTL_REG_LIST() } +}; + +static const struct dce_panel_cntl_shift panel_cntl_shift = { + DCE_PANEL_CNTL_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_panel_cntl_mask panel_cntl_mask = { + DCE_PANEL_CNTL_MASK_SH_LIST(_MASK) +}; + +static const struct dce110_aux_registers_shift aux_shift = { + DCE12_AUX_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce110_aux_registers_mask aux_mask = { + DCE12_AUX_MASK_SH_LIST(_MASK) +}; + +#define opp_regs(id)\ +[id] = {\ + OPP_DCE_120_REG_LIST(id),\ +} + +static const struct dce_opp_registers opp_regs[] = { + opp_regs(0), + opp_regs(1), + opp_regs(2), + opp_regs(3), + opp_regs(4), + opp_regs(5) +}; + +static const struct dce_opp_shift opp_shift = { + OPP_COMMON_MASK_SH_LIST_DCE_120(__SHIFT) +}; + +static const struct dce_opp_mask opp_mask = { + OPP_COMMON_MASK_SH_LIST_DCE_120(_MASK) +}; + #define aux_engine_regs(id)\ +[id] = {\ + AUX_COMMON_REG_LIST(id), \ + .AUX_RESET_MASK = 0 \ +} + +static const struct dce110_aux_registers aux_engine_regs[] = { + aux_engine_regs(0), + aux_engine_regs(1), + aux_engine_regs(2), + aux_engine_regs(3), + aux_engine_regs(4), + aux_engine_regs(5) +}; + +#define audio_regs(id)\ +[id] = {\ + AUD_COMMON_REG_LIST(id)\ +} + +static const struct dce_audio_registers audio_regs[] = { + audio_regs(0), + audio_regs(1), + audio_regs(2), + audio_regs(3), + audio_regs(4), + audio_regs(5), + audio_regs(6), +}; + +#define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\ + SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_INDEX, AZALIA_ENDPOINT_REG_INDEX, mask_sh),\ + SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_DATA, AZALIA_ENDPOINT_REG_DATA, mask_sh),\ + AUD_COMMON_MASK_SH_LIST_BASE(mask_sh) + +static const struct dce_audio_shift audio_shift = { + DCE120_AUD_COMMON_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_audio_mask audio_mask = { + DCE120_AUD_COMMON_MASK_SH_LIST(_MASK) +}; + +static int map_transmitter_id_to_phy_instance( + enum transmitter transmitter) +{ + switch (transmitter) { + case TRANSMITTER_UNIPHY_A: + return 0; + case TRANSMITTER_UNIPHY_B: + return 1; + case TRANSMITTER_UNIPHY_C: + return 2; + case TRANSMITTER_UNIPHY_D: + return 3; + case TRANSMITTER_UNIPHY_E: + return 4; + case TRANSMITTER_UNIPHY_F: + return 5; + case TRANSMITTER_UNIPHY_G: + return 6; + default: + ASSERT(0); + return 0; + } +} + +#define clk_src_regs(index, id)\ +[index] = {\ + CS_COMMON_REG_LIST_DCE_112(id),\ +} + +static const struct dce110_clk_src_regs clk_src_regs[] = { + clk_src_regs(0, A), + clk_src_regs(1, B), + clk_src_regs(2, C), + clk_src_regs(3, D), + clk_src_regs(4, E), + clk_src_regs(5, F) +}; + +static const struct dce110_clk_src_shift cs_shift = { + CS_COMMON_MASK_SH_LIST_DCE_112(__SHIFT) +}; + +static const struct dce110_clk_src_mask cs_mask = { + CS_COMMON_MASK_SH_LIST_DCE_112(_MASK) +}; + +static struct output_pixel_processor *dce120_opp_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dce110_opp *opp = + kzalloc(sizeof(struct dce110_opp), GFP_KERNEL); + + if (!opp) + return NULL; + + dce110_opp_construct(opp, + ctx, inst, &opp_regs[inst], &opp_shift, &opp_mask); + return &opp->base; +} +static struct dce_aux *dce120_aux_engine_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct aux_engine_dce110 *aux_engine = + kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL); + + if (!aux_engine) + return NULL; + + dce110_aux_engine_construct(aux_engine, ctx, inst, + SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, + &aux_engine_regs[inst], + &aux_mask, + &aux_shift, + ctx->dc->caps.extended_aux_timeout_support); + + return &aux_engine->base; +} +#define i2c_inst_regs(id) { I2C_HW_ENGINE_COMMON_REG_LIST(id) } + +static const struct dce_i2c_registers i2c_hw_regs[] = { + i2c_inst_regs(1), + i2c_inst_regs(2), + i2c_inst_regs(3), + i2c_inst_regs(4), + i2c_inst_regs(5), + i2c_inst_regs(6), +}; + +static const struct dce_i2c_shift i2c_shifts = { + I2C_COMMON_MASK_SH_LIST_DCE110(__SHIFT) +}; + +static const struct dce_i2c_mask i2c_masks = { + I2C_COMMON_MASK_SH_LIST_DCE110(_MASK) +}; + +static struct dce_i2c_hw *dce120_i2c_hw_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dce_i2c_hw *dce_i2c_hw = + kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL); + + if (!dce_i2c_hw) + return NULL; + + dce112_i2c_hw_construct(dce_i2c_hw, ctx, inst, + &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks); + + return dce_i2c_hw; +} +static const struct bios_registers bios_regs = { + .BIOS_SCRATCH_3 = mmBIOS_SCRATCH_3 + NBIO_BASE(mmBIOS_SCRATCH_3_BASE_IDX), + .BIOS_SCRATCH_6 = mmBIOS_SCRATCH_6 + NBIO_BASE(mmBIOS_SCRATCH_6_BASE_IDX) +}; + +static const struct resource_caps res_cap = { + .num_timing_generator = 6, + .num_audio = 7, + .num_stream_encoder = 6, + .num_pll = 6, + .num_ddc = 6, +}; + +static const struct dc_plane_cap plane_cap = { + .type = DC_PLANE_TYPE_DCE_RGB, + + .pixel_format_support = { + .argb8888 = true, + .nv12 = false, + .fp16 = true + }, + + .max_upscale_factor = { + .argb8888 = 16000, + .nv12 = 1, + .fp16 = 1 + }, + + .max_downscale_factor = { + .argb8888 = 250, + .nv12 = 1, + .fp16 = 1 + } +}; + +static const struct dc_debug_options debug_defaults = { + .disable_clock_gate = true, + .enable_legacy_fast_update = true, +}; + +static struct clock_source *dce120_clock_source_create( + struct dc_context *ctx, + struct dc_bios *bios, + enum clock_source_id id, + const struct dce110_clk_src_regs *regs, + bool dp_clk_src) +{ + struct dce110_clk_src *clk_src = + kzalloc(sizeof(*clk_src), GFP_KERNEL); + + if (!clk_src) + return NULL; + + if (dce112_clk_src_construct(clk_src, ctx, bios, id, + regs, &cs_shift, &cs_mask)) { + clk_src->base.dp_clk_src = dp_clk_src; + return &clk_src->base; + } + + kfree(clk_src); + BREAK_TO_DEBUGGER(); + return NULL; +} + +static void dce120_clock_source_destroy(struct clock_source **clk_src) +{ + kfree(TO_DCE110_CLK_SRC(*clk_src)); + *clk_src = NULL; +} + + +static bool dce120_hw_sequencer_create(struct dc *dc) +{ + /* All registers used by dce11.2 match those in dce11 in offset and + * structure + */ + dce120_hw_sequencer_construct(dc); + + /*TODO Move to separate file and Override what is needed */ + + return true; +} + +static struct timing_generator *dce120_timing_generator_create( + struct dc_context *ctx, + uint32_t instance, + const struct dce110_timing_generator_offsets *offsets) +{ + struct dce110_timing_generator *tg110 = + kzalloc(sizeof(struct dce110_timing_generator), GFP_KERNEL); + + if (!tg110) + return NULL; + + dce120_timing_generator_construct(tg110, ctx, instance, offsets); + return &tg110->base; +} + +static void dce120_transform_destroy(struct transform **xfm) +{ + kfree(TO_DCE_TRANSFORM(*xfm)); + *xfm = NULL; +} + +static void dce120_resource_destruct(struct dce110_resource_pool *pool) +{ + unsigned int i; + + for (i = 0; i < pool->base.pipe_count; i++) { + if (pool->base.opps[i] != NULL) + dce110_opp_destroy(&pool->base.opps[i]); + + if (pool->base.transforms[i] != NULL) + dce120_transform_destroy(&pool->base.transforms[i]); + + if (pool->base.ipps[i] != NULL) + dce_ipp_destroy(&pool->base.ipps[i]); + + if (pool->base.mis[i] != NULL) { + kfree(TO_DCE_MEM_INPUT(pool->base.mis[i])); + pool->base.mis[i] = NULL; + } + + if (pool->base.irqs != NULL) { + dal_irq_service_destroy(&pool->base.irqs); + } + + if (pool->base.timing_generators[i] != NULL) { + kfree(DCE110TG_FROM_TG(pool->base.timing_generators[i])); + pool->base.timing_generators[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_ddc; i++) { + if (pool->base.engines[i] != NULL) + dce110_engine_destroy(&pool->base.engines[i]); + if (pool->base.hw_i2cs[i] != NULL) { + kfree(pool->base.hw_i2cs[i]); + pool->base.hw_i2cs[i] = NULL; + } + if (pool->base.sw_i2cs[i] != NULL) { + kfree(pool->base.sw_i2cs[i]); + pool->base.sw_i2cs[i] = NULL; + } + } + + for (i = 0; i < pool->base.audio_count; i++) { + if (pool->base.audios[i]) + dce_aud_destroy(&pool->base.audios[i]); + } + + for (i = 0; i < pool->base.stream_enc_count; i++) { + if (pool->base.stream_enc[i] != NULL) + kfree(DCE110STRENC_FROM_STRENC(pool->base.stream_enc[i])); + } + + for (i = 0; i < pool->base.clk_src_count; i++) { + if (pool->base.clock_sources[i] != NULL) + dce120_clock_source_destroy( + &pool->base.clock_sources[i]); + } + + if (pool->base.dp_clock_source != NULL) + dce120_clock_source_destroy(&pool->base.dp_clock_source); + + if (pool->base.abm != NULL) + dce_abm_destroy(&pool->base.abm); + + if (pool->base.dmcu != NULL) + dce_dmcu_destroy(&pool->base.dmcu); +} + +static void read_dce_straps( + struct dc_context *ctx, + struct resource_straps *straps) +{ + uint32_t reg_val = dm_read_reg_soc15(ctx, mmCC_DC_MISC_STRAPS, 0); + + straps->audio_stream_number = get_reg_field_value(reg_val, + CC_DC_MISC_STRAPS, + AUDIO_STREAM_NUMBER); + straps->hdmi_disable = get_reg_field_value(reg_val, + CC_DC_MISC_STRAPS, + HDMI_DISABLE); + + reg_val = dm_read_reg_soc15(ctx, mmDC_PINSTRAPS, 0); + straps->dc_pinstraps_audio = get_reg_field_value(reg_val, + DC_PINSTRAPS, + DC_PINSTRAPS_AUDIO); +} + +static struct audio *create_audio( + struct dc_context *ctx, unsigned int inst) +{ + return dce_audio_create(ctx, inst, + &audio_regs[inst], &audio_shift, &audio_mask); +} + +static const struct encoder_feature_support link_enc_feature = { + .max_hdmi_deep_color = COLOR_DEPTH_121212, + .max_hdmi_pixel_clock = 600000, + .hdmi_ycbcr420_supported = true, + .dp_ycbcr420_supported = false, + .flags.bits.IS_HBR2_CAPABLE = true, + .flags.bits.IS_HBR3_CAPABLE = true, + .flags.bits.IS_TPS3_CAPABLE = true, + .flags.bits.IS_TPS4_CAPABLE = true, +}; + +static struct link_encoder *dce120_link_encoder_create( + struct dc_context *ctx, + const struct encoder_init_data *enc_init_data) +{ + struct dce110_link_encoder *enc110 = + kzalloc(sizeof(struct dce110_link_encoder), GFP_KERNEL); + int link_regs_id; + + if (!enc110) + return NULL; + + link_regs_id = + map_transmitter_id_to_phy_instance(enc_init_data->transmitter); + + dce110_link_encoder_construct(enc110, + enc_init_data, + &link_enc_feature, + &link_enc_regs[link_regs_id], + &link_enc_aux_regs[enc_init_data->channel - 1], + &link_enc_hpd_regs[enc_init_data->hpd_source]); + + return &enc110->base; +} + +static struct panel_cntl *dce120_panel_cntl_create(const struct panel_cntl_init_data *init_data) +{ + struct dce_panel_cntl *panel_cntl = + kzalloc(sizeof(struct dce_panel_cntl), GFP_KERNEL); + + if (!panel_cntl) + return NULL; + + dce_panel_cntl_construct(panel_cntl, + init_data, + &panel_cntl_regs[init_data->inst], + &panel_cntl_shift, + &panel_cntl_mask); + + return &panel_cntl->base; +} + +static struct input_pixel_processor *dce120_ipp_create( + struct dc_context *ctx, uint32_t inst) +{ + struct dce_ipp *ipp = kzalloc(sizeof(struct dce_ipp), GFP_KERNEL); + + if (!ipp) { + BREAK_TO_DEBUGGER(); + return NULL; + } + + dce_ipp_construct(ipp, ctx, inst, + &ipp_regs[inst], &ipp_shift, &ipp_mask); + return &ipp->base; +} + +static struct stream_encoder *dce120_stream_encoder_create( + enum engine_id eng_id, + struct dc_context *ctx) +{ + struct dce110_stream_encoder *enc110 = + kzalloc(sizeof(struct dce110_stream_encoder), GFP_KERNEL); + + if (!enc110) + return NULL; + + dce110_stream_encoder_construct(enc110, ctx, ctx->dc_bios, eng_id, + &stream_enc_regs[eng_id], + &se_shift, &se_mask); + return &enc110->base; +} + +#define SRII(reg_name, block, id)\ + .reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + mm ## block ## id ## _ ## reg_name + +static const struct dce_hwseq_registers hwseq_reg = { + HWSEQ_DCE120_REG_LIST() +}; + +static const struct dce_hwseq_shift hwseq_shift = { + HWSEQ_DCE12_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_hwseq_mask hwseq_mask = { + HWSEQ_DCE12_MASK_SH_LIST(_MASK) +}; + +/* HWSEQ regs for VG20 */ +static const struct dce_hwseq_registers dce121_hwseq_reg = { + HWSEQ_VG20_REG_LIST() +}; + +static const struct dce_hwseq_shift dce121_hwseq_shift = { + HWSEQ_VG20_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_hwseq_mask dce121_hwseq_mask = { + HWSEQ_VG20_MASK_SH_LIST(_MASK) +}; + +static struct dce_hwseq *dce120_hwseq_create( + struct dc_context *ctx) +{ + struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL); + + if (hws) { + hws->ctx = ctx; + hws->regs = &hwseq_reg; + hws->shifts = &hwseq_shift; + hws->masks = &hwseq_mask; + } + return hws; +} + +static struct dce_hwseq *dce121_hwseq_create( + struct dc_context *ctx) +{ + struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL); + + if (hws) { + hws->ctx = ctx; + hws->regs = &dce121_hwseq_reg; + hws->shifts = &dce121_hwseq_shift; + hws->masks = &dce121_hwseq_mask; + } + return hws; +} + +static const struct resource_create_funcs res_create_funcs = { + .read_dce_straps = read_dce_straps, + .create_audio = create_audio, + .create_stream_encoder = dce120_stream_encoder_create, + .create_hwseq = dce120_hwseq_create, +}; + +static const struct resource_create_funcs dce121_res_create_funcs = { + .read_dce_straps = read_dce_straps, + .create_audio = create_audio, + .create_stream_encoder = dce120_stream_encoder_create, + .create_hwseq = dce121_hwseq_create, +}; + + +#define mi_inst_regs(id) { MI_DCE12_REG_LIST(id) } +static const struct dce_mem_input_registers mi_regs[] = { + mi_inst_regs(0), + mi_inst_regs(1), + mi_inst_regs(2), + mi_inst_regs(3), + mi_inst_regs(4), + mi_inst_regs(5), +}; + +static const struct dce_mem_input_shift mi_shifts = { + MI_DCE12_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_mem_input_mask mi_masks = { + MI_DCE12_MASK_SH_LIST(_MASK) +}; + +static struct mem_input *dce120_mem_input_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dce_mem_input *dce_mi = kzalloc(sizeof(struct dce_mem_input), + GFP_KERNEL); + + if (!dce_mi) { + BREAK_TO_DEBUGGER(); + return NULL; + } + + dce120_mem_input_construct(dce_mi, ctx, inst, &mi_regs[inst], &mi_shifts, &mi_masks); + return &dce_mi->base; +} + +static struct transform *dce120_transform_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dce_transform *transform = + kzalloc(sizeof(struct dce_transform), GFP_KERNEL); + + if (!transform) + return NULL; + + dce_transform_construct(transform, ctx, inst, + &xfm_regs[inst], &xfm_shift, &xfm_mask); + transform->lb_memory_size = 0x1404; /*5124*/ + return &transform->base; +} + +static void dce120_destroy_resource_pool(struct resource_pool **pool) +{ + struct dce110_resource_pool *dce110_pool = TO_DCE110_RES_POOL(*pool); + + dce120_resource_destruct(dce110_pool); + kfree(dce110_pool); + *pool = NULL; +} + +static const struct resource_funcs dce120_res_pool_funcs = { + .destroy = dce120_destroy_resource_pool, + .link_enc_create = dce120_link_encoder_create, + .panel_cntl_create = dce120_panel_cntl_create, + .validate_bandwidth = dce112_validate_bandwidth, + .validate_plane = dce100_validate_plane, + .add_stream_to_ctx = dce112_add_stream_to_ctx, + .find_first_free_match_stream_enc_for_link = dce110_find_first_free_match_stream_enc_for_link +}; + +static void bw_calcs_data_update_from_pplib(struct dc *dc) +{ + struct dm_pp_clock_levels_with_latency eng_clks = {0}; + struct dm_pp_clock_levels_with_latency mem_clks = {0}; + struct dm_pp_wm_sets_with_clock_ranges clk_ranges = {0}; + int i; + unsigned int clk; + unsigned int latency; + /*original logic in dal3*/ + int memory_type_multiplier = MEMORY_TYPE_MULTIPLIER_CZ; + + /*do system clock*/ + if (!dm_pp_get_clock_levels_by_type_with_latency( + dc->ctx, + DM_PP_CLOCK_TYPE_ENGINE_CLK, + &eng_clks) || eng_clks.num_levels == 0) { + + eng_clks.num_levels = 8; + clk = 300000; + + for (i = 0; i < eng_clks.num_levels; i++) { + eng_clks.data[i].clocks_in_khz = clk; + clk += 100000; + } + } + + /* convert all the clock fro kHz to fix point mHz TODO: wloop data */ + dc->bw_vbios->high_sclk = bw_frc_to_fixed( + eng_clks.data[eng_clks.num_levels-1].clocks_in_khz, 1000); + dc->bw_vbios->mid1_sclk = bw_frc_to_fixed( + eng_clks.data[eng_clks.num_levels/8].clocks_in_khz, 1000); + dc->bw_vbios->mid2_sclk = bw_frc_to_fixed( + eng_clks.data[eng_clks.num_levels*2/8].clocks_in_khz, 1000); + dc->bw_vbios->mid3_sclk = bw_frc_to_fixed( + eng_clks.data[eng_clks.num_levels*3/8].clocks_in_khz, 1000); + dc->bw_vbios->mid4_sclk = bw_frc_to_fixed( + eng_clks.data[eng_clks.num_levels*4/8].clocks_in_khz, 1000); + dc->bw_vbios->mid5_sclk = bw_frc_to_fixed( + eng_clks.data[eng_clks.num_levels*5/8].clocks_in_khz, 1000); + dc->bw_vbios->mid6_sclk = bw_frc_to_fixed( + eng_clks.data[eng_clks.num_levels*6/8].clocks_in_khz, 1000); + dc->bw_vbios->low_sclk = bw_frc_to_fixed( + eng_clks.data[0].clocks_in_khz, 1000); + + /*do memory clock*/ + if (!dm_pp_get_clock_levels_by_type_with_latency( + dc->ctx, + DM_PP_CLOCK_TYPE_MEMORY_CLK, + &mem_clks) || mem_clks.num_levels == 0) { + + mem_clks.num_levels = 3; + clk = 250000; + latency = 45; + + for (i = 0; i < eng_clks.num_levels; i++) { + mem_clks.data[i].clocks_in_khz = clk; + mem_clks.data[i].latency_in_us = latency; + clk += 500000; + latency -= 5; + } + + } + + /* we don't need to call PPLIB for validation clock since they + * also give us the highest sclk and highest mclk (UMA clock). + * ALSO always convert UMA clock (from PPLIB) to YCLK (HW formula): + * YCLK = UMACLK*m_memoryTypeMultiplier + */ + if (dc->bw_vbios->memory_type == bw_def_hbm) + memory_type_multiplier = MEMORY_TYPE_HBM; + + dc->bw_vbios->low_yclk = bw_frc_to_fixed( + mem_clks.data[0].clocks_in_khz * memory_type_multiplier, 1000); + dc->bw_vbios->mid_yclk = bw_frc_to_fixed( + mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz * memory_type_multiplier, + 1000); + dc->bw_vbios->high_yclk = bw_frc_to_fixed( + mem_clks.data[mem_clks.num_levels-1].clocks_in_khz * memory_type_multiplier, + 1000); + + /* Now notify PPLib/SMU about which Watermarks sets they should select + * depending on DPM state they are in. And update BW MGR GFX Engine and + * Memory clock member variables for Watermarks calculations for each + * Watermark Set + */ + clk_ranges.num_wm_sets = 4; + clk_ranges.wm_clk_ranges[0].wm_set_id = WM_SET_A; + clk_ranges.wm_clk_ranges[0].wm_min_eng_clk_in_khz = + eng_clks.data[0].clocks_in_khz; + clk_ranges.wm_clk_ranges[0].wm_max_eng_clk_in_khz = + eng_clks.data[eng_clks.num_levels*3/8].clocks_in_khz - 1; + clk_ranges.wm_clk_ranges[0].wm_min_mem_clk_in_khz = + mem_clks.data[0].clocks_in_khz; + clk_ranges.wm_clk_ranges[0].wm_max_mem_clk_in_khz = + mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz - 1; + + clk_ranges.wm_clk_ranges[1].wm_set_id = WM_SET_B; + clk_ranges.wm_clk_ranges[1].wm_min_eng_clk_in_khz = + eng_clks.data[eng_clks.num_levels*3/8].clocks_in_khz; + /* 5 GHz instead of data[7].clockInKHz to cover Overdrive */ + clk_ranges.wm_clk_ranges[1].wm_max_eng_clk_in_khz = 5000000; + clk_ranges.wm_clk_ranges[1].wm_min_mem_clk_in_khz = + mem_clks.data[0].clocks_in_khz; + clk_ranges.wm_clk_ranges[1].wm_max_mem_clk_in_khz = + mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz - 1; + + clk_ranges.wm_clk_ranges[2].wm_set_id = WM_SET_C; + clk_ranges.wm_clk_ranges[2].wm_min_eng_clk_in_khz = + eng_clks.data[0].clocks_in_khz; + clk_ranges.wm_clk_ranges[2].wm_max_eng_clk_in_khz = + eng_clks.data[eng_clks.num_levels*3/8].clocks_in_khz - 1; + clk_ranges.wm_clk_ranges[2].wm_min_mem_clk_in_khz = + mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz; + /* 5 GHz instead of data[2].clockInKHz to cover Overdrive */ + clk_ranges.wm_clk_ranges[2].wm_max_mem_clk_in_khz = 5000000; + + clk_ranges.wm_clk_ranges[3].wm_set_id = WM_SET_D; + clk_ranges.wm_clk_ranges[3].wm_min_eng_clk_in_khz = + eng_clks.data[eng_clks.num_levels*3/8].clocks_in_khz; + /* 5 GHz instead of data[7].clockInKHz to cover Overdrive */ + clk_ranges.wm_clk_ranges[3].wm_max_eng_clk_in_khz = 5000000; + clk_ranges.wm_clk_ranges[3].wm_min_mem_clk_in_khz = + mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz; + /* 5 GHz instead of data[2].clockInKHz to cover Overdrive */ + clk_ranges.wm_clk_ranges[3].wm_max_mem_clk_in_khz = 5000000; + + /* Notify PP Lib/SMU which Watermarks to use for which clock ranges */ + dm_pp_notify_wm_clock_changes(dc->ctx, &clk_ranges); +} + +static uint32_t read_pipe_fuses(struct dc_context *ctx) +{ + uint32_t value = dm_read_reg_soc15(ctx, mmCC_DC_PIPE_DIS, 0); + /* VG20 support max 6 pipes */ + value = value & 0x3f; + return value; +} + +static bool dce120_resource_construct( + uint8_t num_virtual_links, + struct dc *dc, + struct dce110_resource_pool *pool) +{ + unsigned int i; + int j; + struct dc_context *ctx = dc->ctx; + struct irq_service_init_data irq_init_data; + static const struct resource_create_funcs *res_funcs; + bool is_vg20 = ASICREV_IS_VEGA20_P(ctx->asic_id.hw_internal_rev); + uint32_t pipe_fuses; + + ctx->dc_bios->regs = &bios_regs; + + pool->base.res_cap = &res_cap; + pool->base.funcs = &dce120_res_pool_funcs; + + /* TODO: Fill more data from GreenlandAsicCapability.cpp */ + pool->base.pipe_count = res_cap.num_timing_generator; + pool->base.timing_generator_count = pool->base.res_cap->num_timing_generator; + pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE; + + dc->caps.max_downscale_ratio = 200; + dc->caps.i2c_speed_in_khz = 100; + dc->caps.i2c_speed_in_khz_hdcp = 100; /*1.4 w/a not applied by default*/ + dc->caps.max_cursor_size = 128; + dc->caps.min_horizontal_blanking_period = 80; + dc->caps.dual_link_dvi = true; + dc->caps.psp_setup_panel_mode = true; + dc->caps.extended_aux_timeout_support = false; + dc->debug = debug_defaults; + + /************************************************* + * Create resources * + *************************************************/ + + pool->base.clock_sources[DCE120_CLK_SRC_PLL0] = + dce120_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL0, + &clk_src_regs[0], false); + pool->base.clock_sources[DCE120_CLK_SRC_PLL1] = + dce120_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL1, + &clk_src_regs[1], false); + pool->base.clock_sources[DCE120_CLK_SRC_PLL2] = + dce120_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL2, + &clk_src_regs[2], false); + pool->base.clock_sources[DCE120_CLK_SRC_PLL3] = + dce120_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL3, + &clk_src_regs[3], false); + pool->base.clock_sources[DCE120_CLK_SRC_PLL4] = + dce120_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL4, + &clk_src_regs[4], false); + pool->base.clock_sources[DCE120_CLK_SRC_PLL5] = + dce120_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL5, + &clk_src_regs[5], false); + pool->base.clk_src_count = DCE120_CLK_SRC_TOTAL; + + pool->base.dp_clock_source = + dce120_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_ID_DP_DTO, + &clk_src_regs[0], true); + + for (i = 0; i < pool->base.clk_src_count; i++) { + if (pool->base.clock_sources[i] == NULL) { + dm_error("DC: failed to create clock sources!\n"); + BREAK_TO_DEBUGGER(); + goto clk_src_create_fail; + } + } + + pool->base.dmcu = dce_dmcu_create(ctx, + &dmcu_regs, + &dmcu_shift, + &dmcu_mask); + if (pool->base.dmcu == NULL) { + dm_error("DC: failed to create dmcu!\n"); + BREAK_TO_DEBUGGER(); + goto res_create_fail; + } + + pool->base.abm = dce_abm_create(ctx, + &abm_regs, + &abm_shift, + &abm_mask); + if (pool->base.abm == NULL) { + dm_error("DC: failed to create abm!\n"); + BREAK_TO_DEBUGGER(); + goto res_create_fail; + } + + + irq_init_data.ctx = dc->ctx; + pool->base.irqs = dal_irq_service_dce120_create(&irq_init_data); + if (!pool->base.irqs) + goto irqs_create_fail; + + /* VG20: Pipe harvesting enabled, retrieve valid pipe fuses */ + if (is_vg20) + pipe_fuses = read_pipe_fuses(ctx); + + /* index to valid pipe resource */ + j = 0; + for (i = 0; i < pool->base.pipe_count; i++) { + if (is_vg20) { + if ((pipe_fuses & (1 << i)) != 0) { + dm_error("DC: skip invalid pipe %d!\n", i); + continue; + } + } + + pool->base.timing_generators[j] = + dce120_timing_generator_create( + ctx, + i, + &dce120_tg_offsets[i]); + if (pool->base.timing_generators[j] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create tg!\n"); + goto controller_create_fail; + } + + pool->base.mis[j] = dce120_mem_input_create(ctx, i); + + if (pool->base.mis[j] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create memory input!\n"); + goto controller_create_fail; + } + + pool->base.ipps[j] = dce120_ipp_create(ctx, i); + if (pool->base.ipps[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create input pixel processor!\n"); + goto controller_create_fail; + } + + pool->base.transforms[j] = dce120_transform_create(ctx, i); + if (pool->base.transforms[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create transform!\n"); + goto res_create_fail; + } + + pool->base.opps[j] = dce120_opp_create( + ctx, + i); + if (pool->base.opps[j] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create output pixel processor!\n"); + } + + /* check next valid pipe */ + j++; + } + + for (i = 0; i < pool->base.res_cap->num_ddc; i++) { + pool->base.engines[i] = dce120_aux_engine_create(ctx, i); + if (pool->base.engines[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC:failed to create aux engine!!\n"); + goto res_create_fail; + } + pool->base.hw_i2cs[i] = dce120_i2c_hw_create(ctx, i); + if (pool->base.hw_i2cs[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC:failed to create i2c engine!!\n"); + goto res_create_fail; + } + pool->base.sw_i2cs[i] = NULL; + } + + /* valid pipe num */ + pool->base.pipe_count = j; + pool->base.timing_generator_count = j; + + if (is_vg20) + res_funcs = &dce121_res_create_funcs; + else + res_funcs = &res_create_funcs; + + if (!resource_construct(num_virtual_links, dc, &pool->base, res_funcs)) + goto res_create_fail; + + /* Create hardware sequencer */ + if (!dce120_hw_sequencer_create(dc)) + goto controller_create_fail; + + dc->caps.max_planes = pool->base.pipe_count; + + for (i = 0; i < dc->caps.max_planes; ++i) + dc->caps.planes[i] = plane_cap; + + bw_calcs_init(dc->bw_dceip, dc->bw_vbios, dc->ctx->asic_id); + + bw_calcs_data_update_from_pplib(dc); + + return true; + +irqs_create_fail: +controller_create_fail: +clk_src_create_fail: +res_create_fail: + + dce120_resource_destruct(pool); + + return false; +} + +struct resource_pool *dce120_create_resource_pool( + uint8_t num_virtual_links, + struct dc *dc) +{ + struct dce110_resource_pool *pool = + kzalloc(sizeof(struct dce110_resource_pool), GFP_KERNEL); + + if (!pool) + return NULL; + + if (dce120_resource_construct(num_virtual_links, dc, pool)) + return &pool->base; + + kfree(pool); + BREAK_TO_DEBUGGER(); + return NULL; +} diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.h new file mode 100644 index 000000000000..3d1f3cf012f4 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.h @@ -0,0 +1,39 @@ +/* +* Copyright 2012-15 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DC_RESOURCE_DCE120_H__ +#define __DC_RESOURCE_DCE120_H__ + +#include "core_types.h" + +struct dc; +struct resource_pool; + +struct resource_pool *dce120_create_resource_pool( + uint8_t num_virtual_links, + struct dc *dc); + +#endif /* __DC_RESOURCE_DCE120_H__ */ + diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce80/CMakeLists.txt b/drivers/gpu/drm/amd/display/dc/resource/dce80/CMakeLists.txt new file mode 100644 index 000000000000..19dd73bc9ab0 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/resource/dce80/CMakeLists.txt @@ -0,0 +1,4 @@ +dal3_subdirectory_sources( + dce80_resource.c + dce80_resource.h + ) \ No newline at end of file diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c new file mode 100644 index 000000000000..35a2cce0c2b8 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c @@ -0,0 +1,1544 @@ +/* + * Copyright 2012-15 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dce/dce_8_0_d.h" +#include "dce/dce_8_0_sh_mask.h" + +#include "dm_services.h" + +#include "link_encoder.h" +#include "stream_encoder.h" + +#include "resource.h" +#include "include/irq_service_interface.h" +#include "irq/dce80/irq_service_dce80.h" +#include "dce110/dce110_timing_generator.h" +#include "dce110/dce110_resource.h" +#include "dce80/dce80_timing_generator.h" +#include "dce/dce_mem_input.h" +#include "dce/dce_link_encoder.h" +#include "dce/dce_stream_encoder.h" +#include "dce/dce_ipp.h" +#include "dce/dce_transform.h" +#include "dce/dce_opp.h" +#include "dce/dce_clock_source.h" +#include "dce/dce_audio.h" +#include "dce/dce_hwseq.h" +#include "dce80/dce80_hwseq.h" +#include "dce100/dce100_resource.h" +#include "dce/dce_panel_cntl.h" + +#include "reg_helper.h" + +#include "dce/dce_dmcu.h" +#include "dce/dce_aux.h" +#include "dce/dce_abm.h" +#include "dce/dce_i2c.h" +/* TODO remove this include */ + +#ifndef mmMC_HUB_RDREQ_DMIF_LIMIT +#include "gmc/gmc_7_1_d.h" +#include "gmc/gmc_7_1_sh_mask.h" +#endif + +#include "dce80/dce80_resource.h" + +#ifndef mmDP_DPHY_INTERNAL_CTRL +#define mmDP_DPHY_INTERNAL_CTRL 0x1CDE +#define mmDP0_DP_DPHY_INTERNAL_CTRL 0x1CDE +#define mmDP1_DP_DPHY_INTERNAL_CTRL 0x1FDE +#define mmDP2_DP_DPHY_INTERNAL_CTRL 0x42DE +#define mmDP3_DP_DPHY_INTERNAL_CTRL 0x45DE +#define mmDP4_DP_DPHY_INTERNAL_CTRL 0x48DE +#define mmDP5_DP_DPHY_INTERNAL_CTRL 0x4BDE +#define mmDP6_DP_DPHY_INTERNAL_CTRL 0x4EDE +#endif + + +#ifndef mmBIOS_SCRATCH_2 + #define mmBIOS_SCRATCH_2 0x05CB + #define mmBIOS_SCRATCH_3 0x05CC + #define mmBIOS_SCRATCH_6 0x05CF +#endif + +#ifndef mmDP_DPHY_FAST_TRAINING + #define mmDP_DPHY_FAST_TRAINING 0x1CCE + #define mmDP0_DP_DPHY_FAST_TRAINING 0x1CCE + #define mmDP1_DP_DPHY_FAST_TRAINING 0x1FCE + #define mmDP2_DP_DPHY_FAST_TRAINING 0x42CE + #define mmDP3_DP_DPHY_FAST_TRAINING 0x45CE + #define mmDP4_DP_DPHY_FAST_TRAINING 0x48CE + #define mmDP5_DP_DPHY_FAST_TRAINING 0x4BCE + #define mmDP6_DP_DPHY_FAST_TRAINING 0x4ECE +#endif + + +#ifndef mmHPD_DC_HPD_CONTROL + #define mmHPD_DC_HPD_CONTROL 0x189A + #define mmHPD0_DC_HPD_CONTROL 0x189A + #define mmHPD1_DC_HPD_CONTROL 0x18A2 + #define mmHPD2_DC_HPD_CONTROL 0x18AA + #define mmHPD3_DC_HPD_CONTROL 0x18B2 + #define mmHPD4_DC_HPD_CONTROL 0x18BA + #define mmHPD5_DC_HPD_CONTROL 0x18C2 +#endif + +#define DCE11_DIG_FE_CNTL 0x4a00 +#define DCE11_DIG_BE_CNTL 0x4a47 +#define DCE11_DP_SEC 0x4ac3 + +static const struct dce110_timing_generator_offsets dce80_tg_offsets[] = { + { + .crtc = (mmCRTC0_CRTC_CONTROL - mmCRTC_CONTROL), + .dcp = (mmGRPH_CONTROL - mmGRPH_CONTROL), + .dmif = (mmDMIF_PG0_DPG_WATERMARK_MASK_CONTROL + - mmDPG_WATERMARK_MASK_CONTROL), + }, + { + .crtc = (mmCRTC1_CRTC_CONTROL - mmCRTC_CONTROL), + .dcp = (mmDCP1_GRPH_CONTROL - mmGRPH_CONTROL), + .dmif = (mmDMIF_PG1_DPG_WATERMARK_MASK_CONTROL + - mmDPG_WATERMARK_MASK_CONTROL), + }, + { + .crtc = (mmCRTC2_CRTC_CONTROL - mmCRTC_CONTROL), + .dcp = (mmDCP2_GRPH_CONTROL - mmGRPH_CONTROL), + .dmif = (mmDMIF_PG2_DPG_WATERMARK_MASK_CONTROL + - mmDPG_WATERMARK_MASK_CONTROL), + }, + { + .crtc = (mmCRTC3_CRTC_CONTROL - mmCRTC_CONTROL), + .dcp = (mmDCP3_GRPH_CONTROL - mmGRPH_CONTROL), + .dmif = (mmDMIF_PG3_DPG_WATERMARK_MASK_CONTROL + - mmDPG_WATERMARK_MASK_CONTROL), + }, + { + .crtc = (mmCRTC4_CRTC_CONTROL - mmCRTC_CONTROL), + .dcp = (mmDCP4_GRPH_CONTROL - mmGRPH_CONTROL), + .dmif = (mmDMIF_PG4_DPG_WATERMARK_MASK_CONTROL + - mmDPG_WATERMARK_MASK_CONTROL), + }, + { + .crtc = (mmCRTC5_CRTC_CONTROL - mmCRTC_CONTROL), + .dcp = (mmDCP5_GRPH_CONTROL - mmGRPH_CONTROL), + .dmif = (mmDMIF_PG5_DPG_WATERMARK_MASK_CONTROL + - mmDPG_WATERMARK_MASK_CONTROL), + } +}; + +/* set register offset */ +#define SR(reg_name)\ + .reg_name = mm ## reg_name + +/* set register offset with instance */ +#define SRI(reg_name, block, id)\ + .reg_name = mm ## block ## id ## _ ## reg_name + +#define ipp_regs(id)\ +[id] = {\ + IPP_COMMON_REG_LIST_DCE_BASE(id)\ +} + +static const struct dce_ipp_registers ipp_regs[] = { + ipp_regs(0), + ipp_regs(1), + ipp_regs(2), + ipp_regs(3), + ipp_regs(4), + ipp_regs(5) +}; + +static const struct dce_ipp_shift ipp_shift = { + IPP_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(__SHIFT) +}; + +static const struct dce_ipp_mask ipp_mask = { + IPP_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(_MASK) +}; + +#define transform_regs(id)\ +[id] = {\ + XFM_COMMON_REG_LIST_DCE80(id)\ +} + +static const struct dce_transform_registers xfm_regs[] = { + transform_regs(0), + transform_regs(1), + transform_regs(2), + transform_regs(3), + transform_regs(4), + transform_regs(5) +}; + +static const struct dce_transform_shift xfm_shift = { + XFM_COMMON_MASK_SH_LIST_DCE80(__SHIFT) +}; + +static const struct dce_transform_mask xfm_mask = { + XFM_COMMON_MASK_SH_LIST_DCE80(_MASK) +}; + +#define aux_regs(id)\ +[id] = {\ + AUX_REG_LIST(id)\ +} + +static const struct dce110_link_enc_aux_registers link_enc_aux_regs[] = { + aux_regs(0), + aux_regs(1), + aux_regs(2), + aux_regs(3), + aux_regs(4), + aux_regs(5) +}; + +#define hpd_regs(id)\ +[id] = {\ + HPD_REG_LIST(id)\ +} + +static const struct dce110_link_enc_hpd_registers link_enc_hpd_regs[] = { + hpd_regs(0), + hpd_regs(1), + hpd_regs(2), + hpd_regs(3), + hpd_regs(4), + hpd_regs(5) +}; + +#define link_regs(id)\ +[id] = {\ + LE_DCE80_REG_LIST(id)\ +} + +static const struct dce110_link_enc_registers link_enc_regs[] = { + link_regs(0), + link_regs(1), + link_regs(2), + link_regs(3), + link_regs(4), + link_regs(5), + link_regs(6), +}; + +#define stream_enc_regs(id)\ +[id] = {\ + SE_COMMON_REG_LIST_DCE_BASE(id),\ + .AFMT_CNTL = 0,\ +} + +static const struct dce110_stream_enc_registers stream_enc_regs[] = { + stream_enc_regs(0), + stream_enc_regs(1), + stream_enc_regs(2), + stream_enc_regs(3), + stream_enc_regs(4), + stream_enc_regs(5), + stream_enc_regs(6) +}; + +static const struct dce_stream_encoder_shift se_shift = { + SE_COMMON_MASK_SH_LIST_DCE80_100(__SHIFT) +}; + +static const struct dce_stream_encoder_mask se_mask = { + SE_COMMON_MASK_SH_LIST_DCE80_100(_MASK) +}; + +static const struct dce_panel_cntl_registers panel_cntl_regs[] = { + { DCE_PANEL_CNTL_REG_LIST() } +}; + +static const struct dce_panel_cntl_shift panel_cntl_shift = { + DCE_PANEL_CNTL_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_panel_cntl_mask panel_cntl_mask = { + DCE_PANEL_CNTL_MASK_SH_LIST(_MASK) +}; + +#define opp_regs(id)\ +[id] = {\ + OPP_DCE_80_REG_LIST(id),\ +} + +static const struct dce_opp_registers opp_regs[] = { + opp_regs(0), + opp_regs(1), + opp_regs(2), + opp_regs(3), + opp_regs(4), + opp_regs(5) +}; + +static const struct dce_opp_shift opp_shift = { + OPP_COMMON_MASK_SH_LIST_DCE_80(__SHIFT) +}; + +static const struct dce_opp_mask opp_mask = { + OPP_COMMON_MASK_SH_LIST_DCE_80(_MASK) +}; + +static const struct dce110_aux_registers_shift aux_shift = { + DCE10_AUX_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce110_aux_registers_mask aux_mask = { + DCE10_AUX_MASK_SH_LIST(_MASK) +}; + +#define aux_engine_regs(id)\ +[id] = {\ + AUX_COMMON_REG_LIST(id), \ + .AUX_RESET_MASK = 0 \ +} + +static const struct dce110_aux_registers aux_engine_regs[] = { + aux_engine_regs(0), + aux_engine_regs(1), + aux_engine_regs(2), + aux_engine_regs(3), + aux_engine_regs(4), + aux_engine_regs(5) +}; + +#define audio_regs(id)\ +[id] = {\ + AUD_COMMON_REG_LIST(id)\ +} + +static const struct dce_audio_registers audio_regs[] = { + audio_regs(0), + audio_regs(1), + audio_regs(2), + audio_regs(3), + audio_regs(4), + audio_regs(5), + audio_regs(6), +}; + +static const struct dce_audio_shift audio_shift = { + AUD_COMMON_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_audio_mask audio_mask = { + AUD_COMMON_MASK_SH_LIST(_MASK) +}; + +#define clk_src_regs(id)\ +[id] = {\ + CS_COMMON_REG_LIST_DCE_80(id),\ +} + + +static const struct dce110_clk_src_regs clk_src_regs[] = { + clk_src_regs(0), + clk_src_regs(1), + clk_src_regs(2) +}; + +static const struct dce110_clk_src_shift cs_shift = { + CS_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(__SHIFT) +}; + +static const struct dce110_clk_src_mask cs_mask = { + CS_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(_MASK) +}; + +static const struct bios_registers bios_regs = { + .BIOS_SCRATCH_3 = mmBIOS_SCRATCH_3, + .BIOS_SCRATCH_6 = mmBIOS_SCRATCH_6 +}; + +static const struct resource_caps res_cap = { + .num_timing_generator = 6, + .num_audio = 6, + .num_stream_encoder = 6, + .num_pll = 3, + .num_ddc = 6, +}; + +static const struct resource_caps res_cap_81 = { + .num_timing_generator = 4, + .num_audio = 7, + .num_stream_encoder = 7, + .num_pll = 3, + .num_ddc = 6, +}; + +static const struct resource_caps res_cap_83 = { + .num_timing_generator = 2, + .num_audio = 6, + .num_stream_encoder = 6, + .num_pll = 2, + .num_ddc = 2, +}; + +static const struct dc_plane_cap plane_cap = { + .type = DC_PLANE_TYPE_DCE_RGB, + + .pixel_format_support = { + .argb8888 = true, + .nv12 = false, + .fp16 = true + }, + + .max_upscale_factor = { + .argb8888 = 16000, + .nv12 = 1, + .fp16 = 1 + }, + + .max_downscale_factor = { + .argb8888 = 250, + .nv12 = 1, + .fp16 = 1 + } +}; + +static const struct dc_debug_options debug_defaults = { + .enable_legacy_fast_update = true, +}; + +static const struct dce_dmcu_registers dmcu_regs = { + DMCU_DCE80_REG_LIST() +}; + +static const struct dce_dmcu_shift dmcu_shift = { + DMCU_MASK_SH_LIST_DCE80(__SHIFT) +}; + +static const struct dce_dmcu_mask dmcu_mask = { + DMCU_MASK_SH_LIST_DCE80(_MASK) +}; +static const struct dce_abm_registers abm_regs = { + ABM_DCE110_COMMON_REG_LIST() +}; + +static const struct dce_abm_shift abm_shift = { + ABM_MASK_SH_LIST_DCE110(__SHIFT) +}; + +static const struct dce_abm_mask abm_mask = { + ABM_MASK_SH_LIST_DCE110(_MASK) +}; + +#define CTX ctx +#define REG(reg) mm ## reg + +#ifndef mmCC_DC_HDMI_STRAPS +#define mmCC_DC_HDMI_STRAPS 0x1918 +#define CC_DC_HDMI_STRAPS__HDMI_DISABLE_MASK 0x40 +#define CC_DC_HDMI_STRAPS__HDMI_DISABLE__SHIFT 0x6 +#define CC_DC_HDMI_STRAPS__AUDIO_STREAM_NUMBER_MASK 0x700 +#define CC_DC_HDMI_STRAPS__AUDIO_STREAM_NUMBER__SHIFT 0x8 +#endif + +static int map_transmitter_id_to_phy_instance( + enum transmitter transmitter) +{ + switch (transmitter) { + case TRANSMITTER_UNIPHY_A: + return 0; + case TRANSMITTER_UNIPHY_B: + return 1; + case TRANSMITTER_UNIPHY_C: + return 2; + case TRANSMITTER_UNIPHY_D: + return 3; + case TRANSMITTER_UNIPHY_E: + return 4; + case TRANSMITTER_UNIPHY_F: + return 5; + case TRANSMITTER_UNIPHY_G: + return 6; + default: + ASSERT(0); + return 0; + } +} + +static void read_dce_straps( + struct dc_context *ctx, + struct resource_straps *straps) +{ + REG_GET_2(CC_DC_HDMI_STRAPS, + HDMI_DISABLE, &straps->hdmi_disable, + AUDIO_STREAM_NUMBER, &straps->audio_stream_number); + + REG_GET(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO, &straps->dc_pinstraps_audio); +} + +static struct audio *create_audio( + struct dc_context *ctx, unsigned int inst) +{ + return dce_audio_create(ctx, inst, + &audio_regs[inst], &audio_shift, &audio_mask); +} + +static struct timing_generator *dce80_timing_generator_create( + struct dc_context *ctx, + uint32_t instance, + const struct dce110_timing_generator_offsets *offsets) +{ + struct dce110_timing_generator *tg110 = + kzalloc(sizeof(struct dce110_timing_generator), GFP_KERNEL); + + if (!tg110) + return NULL; + + dce80_timing_generator_construct(tg110, ctx, instance, offsets); + return &tg110->base; +} + +static struct output_pixel_processor *dce80_opp_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dce110_opp *opp = + kzalloc(sizeof(struct dce110_opp), GFP_KERNEL); + + if (!opp) + return NULL; + + dce110_opp_construct(opp, + ctx, inst, &opp_regs[inst], &opp_shift, &opp_mask); + return &opp->base; +} + +static struct dce_aux *dce80_aux_engine_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct aux_engine_dce110 *aux_engine = + kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL); + + if (!aux_engine) + return NULL; + + dce110_aux_engine_construct(aux_engine, ctx, inst, + SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, + &aux_engine_regs[inst], + &aux_mask, + &aux_shift, + ctx->dc->caps.extended_aux_timeout_support); + + return &aux_engine->base; +} +#define i2c_inst_regs(id) { I2C_HW_ENGINE_COMMON_REG_LIST(id) } + +static const struct dce_i2c_registers i2c_hw_regs[] = { + i2c_inst_regs(1), + i2c_inst_regs(2), + i2c_inst_regs(3), + i2c_inst_regs(4), + i2c_inst_regs(5), + i2c_inst_regs(6), +}; + +static const struct dce_i2c_shift i2c_shifts = { + I2C_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(__SHIFT) +}; + +static const struct dce_i2c_mask i2c_masks = { + I2C_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(_MASK) +}; + +static struct dce_i2c_hw *dce80_i2c_hw_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dce_i2c_hw *dce_i2c_hw = + kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL); + + if (!dce_i2c_hw) + return NULL; + + dce_i2c_hw_construct(dce_i2c_hw, ctx, inst, + &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks); + + return dce_i2c_hw; +} + +static struct dce_i2c_sw *dce80_i2c_sw_create( + struct dc_context *ctx) +{ + struct dce_i2c_sw *dce_i2c_sw = + kzalloc(sizeof(struct dce_i2c_sw), GFP_KERNEL); + + if (!dce_i2c_sw) + return NULL; + + dce_i2c_sw_construct(dce_i2c_sw, ctx); + + return dce_i2c_sw; +} +static struct stream_encoder *dce80_stream_encoder_create( + enum engine_id eng_id, + struct dc_context *ctx) +{ + struct dce110_stream_encoder *enc110 = + kzalloc(sizeof(struct dce110_stream_encoder), GFP_KERNEL); + + if (!enc110) + return NULL; + + dce110_stream_encoder_construct(enc110, ctx, ctx->dc_bios, eng_id, + &stream_enc_regs[eng_id], + &se_shift, &se_mask); + return &enc110->base; +} + +#define SRII(reg_name, block, id)\ + .reg_name[id] = mm ## block ## id ## _ ## reg_name + +static const struct dce_hwseq_registers hwseq_reg = { + HWSEQ_DCE8_REG_LIST() +}; + +static const struct dce_hwseq_shift hwseq_shift = { + HWSEQ_DCE8_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_hwseq_mask hwseq_mask = { + HWSEQ_DCE8_MASK_SH_LIST(_MASK) +}; + +static struct dce_hwseq *dce80_hwseq_create( + struct dc_context *ctx) +{ + struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL); + + if (hws) { + hws->ctx = ctx; + hws->regs = &hwseq_reg; + hws->shifts = &hwseq_shift; + hws->masks = &hwseq_mask; + } + return hws; +} + +static const struct resource_create_funcs res_create_funcs = { + .read_dce_straps = read_dce_straps, + .create_audio = create_audio, + .create_stream_encoder = dce80_stream_encoder_create, + .create_hwseq = dce80_hwseq_create, +}; + +#define mi_inst_regs(id) { \ + MI_DCE8_REG_LIST(id), \ + .MC_HUB_RDREQ_DMIF_LIMIT = mmMC_HUB_RDREQ_DMIF_LIMIT \ +} +static const struct dce_mem_input_registers mi_regs[] = { + mi_inst_regs(0), + mi_inst_regs(1), + mi_inst_regs(2), + mi_inst_regs(3), + mi_inst_regs(4), + mi_inst_regs(5), +}; + +static const struct dce_mem_input_shift mi_shifts = { + MI_DCE8_MASK_SH_LIST(__SHIFT), + .ENABLE = MC_HUB_RDREQ_DMIF_LIMIT__ENABLE__SHIFT +}; + +static const struct dce_mem_input_mask mi_masks = { + MI_DCE8_MASK_SH_LIST(_MASK), + .ENABLE = MC_HUB_RDREQ_DMIF_LIMIT__ENABLE_MASK +}; + +static struct mem_input *dce80_mem_input_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dce_mem_input *dce_mi = kzalloc(sizeof(struct dce_mem_input), + GFP_KERNEL); + + if (!dce_mi) { + BREAK_TO_DEBUGGER(); + return NULL; + } + + dce_mem_input_construct(dce_mi, ctx, inst, &mi_regs[inst], &mi_shifts, &mi_masks); + dce_mi->wa.single_head_rdreq_dmif_limit = 2; + return &dce_mi->base; +} + +static void dce80_transform_destroy(struct transform **xfm) +{ + kfree(TO_DCE_TRANSFORM(*xfm)); + *xfm = NULL; +} + +static struct transform *dce80_transform_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dce_transform *transform = + kzalloc(sizeof(struct dce_transform), GFP_KERNEL); + + if (!transform) + return NULL; + + dce_transform_construct(transform, ctx, inst, + &xfm_regs[inst], &xfm_shift, &xfm_mask); + transform->prescaler_on = false; + return &transform->base; +} + +static const struct encoder_feature_support link_enc_feature = { + .max_hdmi_deep_color = COLOR_DEPTH_121212, + .max_hdmi_pixel_clock = 297000, + .flags.bits.IS_HBR2_CAPABLE = true, + .flags.bits.IS_TPS3_CAPABLE = true +}; + +static struct link_encoder *dce80_link_encoder_create( + struct dc_context *ctx, + const struct encoder_init_data *enc_init_data) +{ + struct dce110_link_encoder *enc110 = + kzalloc(sizeof(struct dce110_link_encoder), GFP_KERNEL); + int link_regs_id; + + if (!enc110) + return NULL; + + link_regs_id = + map_transmitter_id_to_phy_instance(enc_init_data->transmitter); + + dce110_link_encoder_construct(enc110, + enc_init_data, + &link_enc_feature, + &link_enc_regs[link_regs_id], + &link_enc_aux_regs[enc_init_data->channel - 1], + &link_enc_hpd_regs[enc_init_data->hpd_source]); + return &enc110->base; +} + +static struct panel_cntl *dce80_panel_cntl_create(const struct panel_cntl_init_data *init_data) +{ + struct dce_panel_cntl *panel_cntl = + kzalloc(sizeof(struct dce_panel_cntl), GFP_KERNEL); + + if (!panel_cntl) + return NULL; + + dce_panel_cntl_construct(panel_cntl, + init_data, + &panel_cntl_regs[init_data->inst], + &panel_cntl_shift, + &panel_cntl_mask); + + return &panel_cntl->base; +} + +static struct clock_source *dce80_clock_source_create( + struct dc_context *ctx, + struct dc_bios *bios, + enum clock_source_id id, + const struct dce110_clk_src_regs *regs, + bool dp_clk_src) +{ + struct dce110_clk_src *clk_src = + kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL); + + if (!clk_src) + return NULL; + + if (dce110_clk_src_construct(clk_src, ctx, bios, id, + regs, &cs_shift, &cs_mask)) { + clk_src->base.dp_clk_src = dp_clk_src; + return &clk_src->base; + } + + kfree(clk_src); + BREAK_TO_DEBUGGER(); + return NULL; +} + +static void dce80_clock_source_destroy(struct clock_source **clk_src) +{ + kfree(TO_DCE110_CLK_SRC(*clk_src)); + *clk_src = NULL; +} + +static struct input_pixel_processor *dce80_ipp_create( + struct dc_context *ctx, uint32_t inst) +{ + struct dce_ipp *ipp = kzalloc(sizeof(struct dce_ipp), GFP_KERNEL); + + if (!ipp) { + BREAK_TO_DEBUGGER(); + return NULL; + } + + dce_ipp_construct(ipp, ctx, inst, + &ipp_regs[inst], &ipp_shift, &ipp_mask); + return &ipp->base; +} + +static void dce80_resource_destruct(struct dce110_resource_pool *pool) +{ + unsigned int i; + + for (i = 0; i < pool->base.pipe_count; i++) { + if (pool->base.opps[i] != NULL) + dce110_opp_destroy(&pool->base.opps[i]); + + if (pool->base.transforms[i] != NULL) + dce80_transform_destroy(&pool->base.transforms[i]); + + if (pool->base.ipps[i] != NULL) + dce_ipp_destroy(&pool->base.ipps[i]); + + if (pool->base.mis[i] != NULL) { + kfree(TO_DCE_MEM_INPUT(pool->base.mis[i])); + pool->base.mis[i] = NULL; + } + + if (pool->base.timing_generators[i] != NULL) { + kfree(DCE110TG_FROM_TG(pool->base.timing_generators[i])); + pool->base.timing_generators[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_ddc; i++) { + if (pool->base.engines[i] != NULL) + dce110_engine_destroy(&pool->base.engines[i]); + if (pool->base.hw_i2cs[i] != NULL) { + kfree(pool->base.hw_i2cs[i]); + pool->base.hw_i2cs[i] = NULL; + } + if (pool->base.sw_i2cs[i] != NULL) { + kfree(pool->base.sw_i2cs[i]); + pool->base.sw_i2cs[i] = NULL; + } + } + + for (i = 0; i < pool->base.stream_enc_count; i++) { + if (pool->base.stream_enc[i] != NULL) + kfree(DCE110STRENC_FROM_STRENC(pool->base.stream_enc[i])); + } + + for (i = 0; i < pool->base.clk_src_count; i++) { + if (pool->base.clock_sources[i] != NULL) { + dce80_clock_source_destroy(&pool->base.clock_sources[i]); + } + } + + if (pool->base.abm != NULL) + dce_abm_destroy(&pool->base.abm); + + if (pool->base.dmcu != NULL) + dce_dmcu_destroy(&pool->base.dmcu); + + if (pool->base.dp_clock_source != NULL) + dce80_clock_source_destroy(&pool->base.dp_clock_source); + + for (i = 0; i < pool->base.audio_count; i++) { + if (pool->base.audios[i] != NULL) { + dce_aud_destroy(&pool->base.audios[i]); + } + } + + if (pool->base.irqs != NULL) { + dal_irq_service_destroy(&pool->base.irqs); + } +} + +static bool dce80_validate_bandwidth( + struct dc *dc, + struct dc_state *context, + bool fast_validate) +{ + int i; + bool at_least_one_pipe = false; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + if (context->res_ctx.pipe_ctx[i].stream) + at_least_one_pipe = true; + } + + if (at_least_one_pipe) { + /* TODO implement when needed but for now hardcode max value*/ + context->bw_ctx.bw.dce.dispclk_khz = 681000; + context->bw_ctx.bw.dce.yclk_khz = 250000 * MEMORY_TYPE_MULTIPLIER_CZ; + } else { + context->bw_ctx.bw.dce.dispclk_khz = 0; + context->bw_ctx.bw.dce.yclk_khz = 0; + } + + return true; +} + +static bool dce80_validate_surface_sets( + struct dc_state *context) +{ + int i; + + for (i = 0; i < context->stream_count; i++) { + if (context->stream_status[i].plane_count == 0) + continue; + + if (context->stream_status[i].plane_count > 1) + return false; + + if (context->stream_status[i].plane_states[0]->format + >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) + return false; + } + + return true; +} + +static enum dc_status dce80_validate_global( + struct dc *dc, + struct dc_state *context) +{ + if (!dce80_validate_surface_sets(context)) + return DC_FAIL_SURFACE_VALIDATE; + + return DC_OK; +} + +static void dce80_destroy_resource_pool(struct resource_pool **pool) +{ + struct dce110_resource_pool *dce110_pool = TO_DCE110_RES_POOL(*pool); + + dce80_resource_destruct(dce110_pool); + kfree(dce110_pool); + *pool = NULL; +} + +static const struct resource_funcs dce80_res_pool_funcs = { + .destroy = dce80_destroy_resource_pool, + .link_enc_create = dce80_link_encoder_create, + .panel_cntl_create = dce80_panel_cntl_create, + .validate_bandwidth = dce80_validate_bandwidth, + .validate_plane = dce100_validate_plane, + .add_stream_to_ctx = dce100_add_stream_to_ctx, + .validate_global = dce80_validate_global, + .find_first_free_match_stream_enc_for_link = dce100_find_first_free_match_stream_enc_for_link +}; + +static bool dce80_construct( + uint8_t num_virtual_links, + struct dc *dc, + struct dce110_resource_pool *pool) +{ + unsigned int i; + struct dc_context *ctx = dc->ctx; + struct dc_bios *bp; + + ctx->dc_bios->regs = &bios_regs; + + pool->base.res_cap = &res_cap; + pool->base.funcs = &dce80_res_pool_funcs; + + + /************************************************* + * Resource + asic cap harcoding * + *************************************************/ + pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE; + pool->base.pipe_count = res_cap.num_timing_generator; + pool->base.timing_generator_count = res_cap.num_timing_generator; + dc->caps.max_downscale_ratio = 200; + dc->caps.i2c_speed_in_khz = 40; + dc->caps.i2c_speed_in_khz_hdcp = 40; + dc->caps.max_cursor_size = 128; + dc->caps.min_horizontal_blanking_period = 80; + dc->caps.dual_link_dvi = true; + dc->caps.extended_aux_timeout_support = false; + dc->debug = debug_defaults; + + /************************************************* + * Create resources * + *************************************************/ + + bp = ctx->dc_bios; + + if (bp->fw_info_valid && bp->fw_info.external_clock_source_frequency_for_dp != 0) { + pool->base.dp_clock_source = + dce80_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_EXTERNAL, NULL, true); + + pool->base.clock_sources[0] = + dce80_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL0, &clk_src_regs[0], false); + pool->base.clock_sources[1] = + dce80_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL1, &clk_src_regs[1], false); + pool->base.clock_sources[2] = + dce80_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL2, &clk_src_regs[2], false); + pool->base.clk_src_count = 3; + + } else { + pool->base.dp_clock_source = + dce80_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL0, &clk_src_regs[0], true); + + pool->base.clock_sources[0] = + dce80_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL1, &clk_src_regs[1], false); + pool->base.clock_sources[1] = + dce80_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL2, &clk_src_regs[2], false); + pool->base.clk_src_count = 2; + } + + if (pool->base.dp_clock_source == NULL) { + dm_error("DC: failed to create dp clock source!\n"); + BREAK_TO_DEBUGGER(); + goto res_create_fail; + } + + for (i = 0; i < pool->base.clk_src_count; i++) { + if (pool->base.clock_sources[i] == NULL) { + dm_error("DC: failed to create clock sources!\n"); + BREAK_TO_DEBUGGER(); + goto res_create_fail; + } + } + + pool->base.dmcu = dce_dmcu_create(ctx, + &dmcu_regs, + &dmcu_shift, + &dmcu_mask); + if (pool->base.dmcu == NULL) { + dm_error("DC: failed to create dmcu!\n"); + BREAK_TO_DEBUGGER(); + goto res_create_fail; + } + + pool->base.abm = dce_abm_create(ctx, + &abm_regs, + &abm_shift, + &abm_mask); + if (pool->base.abm == NULL) { + dm_error("DC: failed to create abm!\n"); + BREAK_TO_DEBUGGER(); + goto res_create_fail; + } + + { + struct irq_service_init_data init_data; + init_data.ctx = dc->ctx; + pool->base.irqs = dal_irq_service_dce80_create(&init_data); + if (!pool->base.irqs) + goto res_create_fail; + } + + for (i = 0; i < pool->base.pipe_count; i++) { + pool->base.timing_generators[i] = dce80_timing_generator_create( + ctx, i, &dce80_tg_offsets[i]); + if (pool->base.timing_generators[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create tg!\n"); + goto res_create_fail; + } + + pool->base.mis[i] = dce80_mem_input_create(ctx, i); + if (pool->base.mis[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create memory input!\n"); + goto res_create_fail; + } + + pool->base.ipps[i] = dce80_ipp_create(ctx, i); + if (pool->base.ipps[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create input pixel processor!\n"); + goto res_create_fail; + } + + pool->base.transforms[i] = dce80_transform_create(ctx, i); + if (pool->base.transforms[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create transform!\n"); + goto res_create_fail; + } + + pool->base.opps[i] = dce80_opp_create(ctx, i); + if (pool->base.opps[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create output pixel processor!\n"); + goto res_create_fail; + } + } + + for (i = 0; i < pool->base.res_cap->num_ddc; i++) { + pool->base.engines[i] = dce80_aux_engine_create(ctx, i); + if (pool->base.engines[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC:failed to create aux engine!!\n"); + goto res_create_fail; + } + pool->base.hw_i2cs[i] = dce80_i2c_hw_create(ctx, i); + if (pool->base.hw_i2cs[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC:failed to create i2c engine!!\n"); + goto res_create_fail; + } + pool->base.sw_i2cs[i] = dce80_i2c_sw_create(ctx); + if (pool->base.sw_i2cs[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC:failed to create sw i2c!!\n"); + goto res_create_fail; + } + } + + dc->caps.max_planes = pool->base.pipe_count; + + for (i = 0; i < dc->caps.max_planes; ++i) + dc->caps.planes[i] = plane_cap; + + dc->caps.disable_dp_clk_share = true; + + if (!resource_construct(num_virtual_links, dc, &pool->base, + &res_create_funcs)) + goto res_create_fail; + + /* Create hardware sequencer */ + dce80_hw_sequencer_construct(dc); + + return true; + +res_create_fail: + dce80_resource_destruct(pool); + return false; +} + +struct resource_pool *dce80_create_resource_pool( + uint8_t num_virtual_links, + struct dc *dc) +{ + struct dce110_resource_pool *pool = + kzalloc(sizeof(struct dce110_resource_pool), GFP_KERNEL); + + if (!pool) + return NULL; + + if (dce80_construct(num_virtual_links, dc, pool)) + return &pool->base; + + kfree(pool); + BREAK_TO_DEBUGGER(); + return NULL; +} + +static bool dce81_construct( + uint8_t num_virtual_links, + struct dc *dc, + struct dce110_resource_pool *pool) +{ + unsigned int i; + struct dc_context *ctx = dc->ctx; + struct dc_bios *bp; + + ctx->dc_bios->regs = &bios_regs; + + pool->base.res_cap = &res_cap_81; + pool->base.funcs = &dce80_res_pool_funcs; + + + /************************************************* + * Resource + asic cap harcoding * + *************************************************/ + pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE; + pool->base.pipe_count = res_cap_81.num_timing_generator; + pool->base.timing_generator_count = res_cap_81.num_timing_generator; + dc->caps.max_downscale_ratio = 200; + dc->caps.i2c_speed_in_khz = 40; + dc->caps.i2c_speed_in_khz_hdcp = 40; + dc->caps.max_cursor_size = 128; + dc->caps.min_horizontal_blanking_period = 80; + dc->caps.is_apu = true; + + /************************************************* + * Create resources * + *************************************************/ + + bp = ctx->dc_bios; + + if (bp->fw_info_valid && bp->fw_info.external_clock_source_frequency_for_dp != 0) { + pool->base.dp_clock_source = + dce80_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_EXTERNAL, NULL, true); + + pool->base.clock_sources[0] = + dce80_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL0, &clk_src_regs[0], false); + pool->base.clock_sources[1] = + dce80_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL1, &clk_src_regs[1], false); + pool->base.clock_sources[2] = + dce80_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL2, &clk_src_regs[2], false); + pool->base.clk_src_count = 3; + + } else { + pool->base.dp_clock_source = + dce80_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL0, &clk_src_regs[0], true); + + pool->base.clock_sources[0] = + dce80_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL1, &clk_src_regs[1], false); + pool->base.clock_sources[1] = + dce80_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL2, &clk_src_regs[2], false); + pool->base.clk_src_count = 2; + } + + if (pool->base.dp_clock_source == NULL) { + dm_error("DC: failed to create dp clock source!\n"); + BREAK_TO_DEBUGGER(); + goto res_create_fail; + } + + for (i = 0; i < pool->base.clk_src_count; i++) { + if (pool->base.clock_sources[i] == NULL) { + dm_error("DC: failed to create clock sources!\n"); + BREAK_TO_DEBUGGER(); + goto res_create_fail; + } + } + + pool->base.dmcu = dce_dmcu_create(ctx, + &dmcu_regs, + &dmcu_shift, + &dmcu_mask); + if (pool->base.dmcu == NULL) { + dm_error("DC: failed to create dmcu!\n"); + BREAK_TO_DEBUGGER(); + goto res_create_fail; + } + + pool->base.abm = dce_abm_create(ctx, + &abm_regs, + &abm_shift, + &abm_mask); + if (pool->base.abm == NULL) { + dm_error("DC: failed to create abm!\n"); + BREAK_TO_DEBUGGER(); + goto res_create_fail; + } + + { + struct irq_service_init_data init_data; + init_data.ctx = dc->ctx; + pool->base.irqs = dal_irq_service_dce80_create(&init_data); + if (!pool->base.irqs) + goto res_create_fail; + } + + for (i = 0; i < pool->base.pipe_count; i++) { + pool->base.timing_generators[i] = dce80_timing_generator_create( + ctx, i, &dce80_tg_offsets[i]); + if (pool->base.timing_generators[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create tg!\n"); + goto res_create_fail; + } + + pool->base.mis[i] = dce80_mem_input_create(ctx, i); + if (pool->base.mis[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create memory input!\n"); + goto res_create_fail; + } + + pool->base.ipps[i] = dce80_ipp_create(ctx, i); + if (pool->base.ipps[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create input pixel processor!\n"); + goto res_create_fail; + } + + pool->base.transforms[i] = dce80_transform_create(ctx, i); + if (pool->base.transforms[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create transform!\n"); + goto res_create_fail; + } + + pool->base.opps[i] = dce80_opp_create(ctx, i); + if (pool->base.opps[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create output pixel processor!\n"); + goto res_create_fail; + } + } + + for (i = 0; i < pool->base.res_cap->num_ddc; i++) { + pool->base.engines[i] = dce80_aux_engine_create(ctx, i); + if (pool->base.engines[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC:failed to create aux engine!!\n"); + goto res_create_fail; + } + pool->base.hw_i2cs[i] = dce80_i2c_hw_create(ctx, i); + if (pool->base.hw_i2cs[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC:failed to create i2c engine!!\n"); + goto res_create_fail; + } + pool->base.sw_i2cs[i] = dce80_i2c_sw_create(ctx); + if (pool->base.sw_i2cs[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC:failed to create sw i2c!!\n"); + goto res_create_fail; + } + } + + dc->caps.max_planes = pool->base.pipe_count; + + for (i = 0; i < dc->caps.max_planes; ++i) + dc->caps.planes[i] = plane_cap; + + dc->caps.disable_dp_clk_share = true; + + if (!resource_construct(num_virtual_links, dc, &pool->base, + &res_create_funcs)) + goto res_create_fail; + + /* Create hardware sequencer */ + dce80_hw_sequencer_construct(dc); + + return true; + +res_create_fail: + dce80_resource_destruct(pool); + return false; +} + +struct resource_pool *dce81_create_resource_pool( + uint8_t num_virtual_links, + struct dc *dc) +{ + struct dce110_resource_pool *pool = + kzalloc(sizeof(struct dce110_resource_pool), GFP_KERNEL); + + if (!pool) + return NULL; + + if (dce81_construct(num_virtual_links, dc, pool)) + return &pool->base; + + kfree(pool); + BREAK_TO_DEBUGGER(); + return NULL; +} + +static bool dce83_construct( + uint8_t num_virtual_links, + struct dc *dc, + struct dce110_resource_pool *pool) +{ + unsigned int i; + struct dc_context *ctx = dc->ctx; + struct dc_bios *bp; + + ctx->dc_bios->regs = &bios_regs; + + pool->base.res_cap = &res_cap_83; + pool->base.funcs = &dce80_res_pool_funcs; + + + /************************************************* + * Resource + asic cap harcoding * + *************************************************/ + pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE; + pool->base.pipe_count = res_cap_83.num_timing_generator; + pool->base.timing_generator_count = res_cap_83.num_timing_generator; + dc->caps.max_downscale_ratio = 200; + dc->caps.i2c_speed_in_khz = 40; + dc->caps.i2c_speed_in_khz_hdcp = 40; + dc->caps.max_cursor_size = 128; + dc->caps.min_horizontal_blanking_period = 80; + dc->caps.is_apu = true; + dc->debug = debug_defaults; + + /************************************************* + * Create resources * + *************************************************/ + + bp = ctx->dc_bios; + + if (bp->fw_info_valid && bp->fw_info.external_clock_source_frequency_for_dp != 0) { + pool->base.dp_clock_source = + dce80_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_EXTERNAL, NULL, true); + + pool->base.clock_sources[0] = + dce80_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL1, &clk_src_regs[0], false); + pool->base.clock_sources[1] = + dce80_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL2, &clk_src_regs[1], false); + pool->base.clk_src_count = 2; + + } else { + pool->base.dp_clock_source = + dce80_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL1, &clk_src_regs[0], true); + + pool->base.clock_sources[0] = + dce80_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL2, &clk_src_regs[1], false); + pool->base.clk_src_count = 1; + } + + if (pool->base.dp_clock_source == NULL) { + dm_error("DC: failed to create dp clock source!\n"); + BREAK_TO_DEBUGGER(); + goto res_create_fail; + } + + for (i = 0; i < pool->base.clk_src_count; i++) { + if (pool->base.clock_sources[i] == NULL) { + dm_error("DC: failed to create clock sources!\n"); + BREAK_TO_DEBUGGER(); + goto res_create_fail; + } + } + + pool->base.dmcu = dce_dmcu_create(ctx, + &dmcu_regs, + &dmcu_shift, + &dmcu_mask); + if (pool->base.dmcu == NULL) { + dm_error("DC: failed to create dmcu!\n"); + BREAK_TO_DEBUGGER(); + goto res_create_fail; + } + + pool->base.abm = dce_abm_create(ctx, + &abm_regs, + &abm_shift, + &abm_mask); + if (pool->base.abm == NULL) { + dm_error("DC: failed to create abm!\n"); + BREAK_TO_DEBUGGER(); + goto res_create_fail; + } + + { + struct irq_service_init_data init_data; + init_data.ctx = dc->ctx; + pool->base.irqs = dal_irq_service_dce80_create(&init_data); + if (!pool->base.irqs) + goto res_create_fail; + } + + for (i = 0; i < pool->base.pipe_count; i++) { + pool->base.timing_generators[i] = dce80_timing_generator_create( + ctx, i, &dce80_tg_offsets[i]); + if (pool->base.timing_generators[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create tg!\n"); + goto res_create_fail; + } + + pool->base.mis[i] = dce80_mem_input_create(ctx, i); + if (pool->base.mis[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create memory input!\n"); + goto res_create_fail; + } + + pool->base.ipps[i] = dce80_ipp_create(ctx, i); + if (pool->base.ipps[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create input pixel processor!\n"); + goto res_create_fail; + } + + pool->base.transforms[i] = dce80_transform_create(ctx, i); + if (pool->base.transforms[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create transform!\n"); + goto res_create_fail; + } + + pool->base.opps[i] = dce80_opp_create(ctx, i); + if (pool->base.opps[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create output pixel processor!\n"); + goto res_create_fail; + } + } + + for (i = 0; i < pool->base.res_cap->num_ddc; i++) { + pool->base.engines[i] = dce80_aux_engine_create(ctx, i); + if (pool->base.engines[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC:failed to create aux engine!!\n"); + goto res_create_fail; + } + pool->base.hw_i2cs[i] = dce80_i2c_hw_create(ctx, i); + if (pool->base.hw_i2cs[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC:failed to create i2c engine!!\n"); + goto res_create_fail; + } + pool->base.sw_i2cs[i] = dce80_i2c_sw_create(ctx); + if (pool->base.sw_i2cs[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC:failed to create sw i2c!!\n"); + goto res_create_fail; + } + } + + dc->caps.max_planes = pool->base.pipe_count; + + for (i = 0; i < dc->caps.max_planes; ++i) + dc->caps.planes[i] = plane_cap; + + dc->caps.disable_dp_clk_share = true; + + if (!resource_construct(num_virtual_links, dc, &pool->base, + &res_create_funcs)) + goto res_create_fail; + + /* Create hardware sequencer */ + dce80_hw_sequencer_construct(dc); + + return true; + +res_create_fail: + dce80_resource_destruct(pool); + return false; +} + +struct resource_pool *dce83_create_resource_pool( + uint8_t num_virtual_links, + struct dc *dc) +{ + struct dce110_resource_pool *pool = + kzalloc(sizeof(struct dce110_resource_pool), GFP_KERNEL); + + if (!pool) + return NULL; + + if (dce83_construct(num_virtual_links, dc, pool)) + return &pool->base; + + BREAK_TO_DEBUGGER(); + return NULL; +} diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.h new file mode 100644 index 000000000000..eff31ab83a39 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.h @@ -0,0 +1,47 @@ +/* +* Copyright 2012-15 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DC_RESOURCE_DCE80_H__ +#define __DC_RESOURCE_DCE80_H__ + +#include "core_types.h" + +struct dc; +struct resource_pool; + +struct resource_pool *dce80_create_resource_pool( + uint8_t num_virtual_links, + struct dc *dc); + +struct resource_pool *dce81_create_resource_pool( + uint8_t num_virtual_links, + struct dc *dc); + +struct resource_pool *dce83_create_resource_pool( + uint8_t num_virtual_links, + struct dc *dc); + +#endif /* __DC_RESOURCE_DCE80_H__ */ + diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c new file mode 100644 index 000000000000..af1b31f4e69a --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c @@ -0,0 +1,1689 @@ +/* +* Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dm_services.h" +#include "dc.h" + +#include "dcn10/dcn10_init.h" + +#include "resource.h" +#include "include/irq_service_interface.h" +#include "dcn10/dcn10_resource.h" +#include "dcn10/dcn10_ipp.h" +#include "dcn10/dcn10_mpc.h" + +#include "dcn10/dcn10_dwb.h" + +#include "irq/dcn10/irq_service_dcn10.h" +#include "dcn10/dcn10_dpp.h" +#include "dcn10/dcn10_optc.h" +#include "dcn10/dcn10_hwseq.h" +#include "dce110/dce110_hwseq.h" +#include "dcn10/dcn10_opp.h" +#include "dcn10/dcn10_link_encoder.h" +#include "dcn10/dcn10_stream_encoder.h" +#include "dce/dce_clock_source.h" +#include "dce/dce_audio.h" +#include "dce/dce_hwseq.h" +#include "virtual/virtual_stream_encoder.h" +#include "dce110/dce110_resource.h" +#include "dce112/dce112_resource.h" +#include "dcn10/dcn10_hubp.h" +#include "dcn10/dcn10_hubbub.h" +#include "dce/dce_panel_cntl.h" + +#include "soc15_hw_ip.h" +#include "vega10_ip_offset.h" + +#include "dcn/dcn_1_0_offset.h" +#include "dcn/dcn_1_0_sh_mask.h" + +#include "nbio/nbio_7_0_offset.h" + +#include "mmhub/mmhub_9_1_offset.h" +#include "mmhub/mmhub_9_1_sh_mask.h" + +#include "reg_helper.h" +#include "dce/dce_abm.h" +#include "dce/dce_dmcu.h" +#include "dce/dce_aux.h" +#include "dce/dce_i2c.h" + +#ifndef mmDP0_DP_DPHY_INTERNAL_CTRL + #define mmDP0_DP_DPHY_INTERNAL_CTRL 0x210f + #define mmDP0_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2 + #define mmDP1_DP_DPHY_INTERNAL_CTRL 0x220f + #define mmDP1_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2 + #define mmDP2_DP_DPHY_INTERNAL_CTRL 0x230f + #define mmDP2_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2 + #define mmDP3_DP_DPHY_INTERNAL_CTRL 0x240f + #define mmDP3_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2 + #define mmDP4_DP_DPHY_INTERNAL_CTRL 0x250f + #define mmDP4_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2 + #define mmDP5_DP_DPHY_INTERNAL_CTRL 0x260f + #define mmDP5_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2 + #define mmDP6_DP_DPHY_INTERNAL_CTRL 0x270f + #define mmDP6_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2 +#endif + + +enum dcn10_clk_src_array_id { + DCN10_CLK_SRC_PLL0, + DCN10_CLK_SRC_PLL1, + DCN10_CLK_SRC_PLL2, + DCN10_CLK_SRC_PLL3, + DCN10_CLK_SRC_TOTAL, + DCN101_CLK_SRC_TOTAL = DCN10_CLK_SRC_PLL3 +}; + +/* begin ********************* + * macros to expend register list macro defined in HW object header file */ + +/* DCN */ +#define BASE_INNER(seg) \ + DCE_BASE__INST0_SEG ## seg + +#define BASE(seg) \ + BASE_INNER(seg) + +#define SR(reg_name)\ + .reg_name = BASE(mm ## reg_name ## _BASE_IDX) + \ + mm ## reg_name + +#define SRI(reg_name, block, id)\ + .reg_name = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + mm ## block ## id ## _ ## reg_name + + +#define SRII(reg_name, block, id)\ + .reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + mm ## block ## id ## _ ## reg_name + +#define VUPDATE_SRII(reg_name, block, id)\ + .reg_name[id] = BASE(mm ## reg_name ## 0 ## _ ## block ## id ## _BASE_IDX) + \ + mm ## reg_name ## 0 ## _ ## block ## id + +/* set field/register/bitfield name */ +#define SFRB(field_name, reg_name, bitfield, post_fix)\ + .field_name = reg_name ## __ ## bitfield ## post_fix + +/* NBIO */ +#define NBIO_BASE_INNER(seg) \ + NBIF_BASE__INST0_SEG ## seg + +#define NBIO_BASE(seg) \ + NBIO_BASE_INNER(seg) + +#define NBIO_SR(reg_name)\ + .reg_name = NBIO_BASE(mm ## reg_name ## _BASE_IDX) + \ + mm ## reg_name + +/* MMHUB */ +#define MMHUB_BASE_INNER(seg) \ + MMHUB_BASE__INST0_SEG ## seg + +#define MMHUB_BASE(seg) \ + MMHUB_BASE_INNER(seg) + +#define MMHUB_SR(reg_name)\ + .reg_name = MMHUB_BASE(mm ## reg_name ## _BASE_IDX) + \ + mm ## reg_name + +/* macros to expend register list macro defined in HW object header file + * end *********************/ + + +static const struct dce_dmcu_registers dmcu_regs = { + DMCU_DCN10_REG_LIST() +}; + +static const struct dce_dmcu_shift dmcu_shift = { + DMCU_MASK_SH_LIST_DCN10(__SHIFT) +}; + +static const struct dce_dmcu_mask dmcu_mask = { + DMCU_MASK_SH_LIST_DCN10(_MASK) +}; + +static const struct dce_abm_registers abm_regs = { + ABM_DCN10_REG_LIST(0) +}; + +static const struct dce_abm_shift abm_shift = { + ABM_MASK_SH_LIST_DCN10(__SHIFT) +}; + +static const struct dce_abm_mask abm_mask = { + ABM_MASK_SH_LIST_DCN10(_MASK) +}; + +#define stream_enc_regs(id)\ +[id] = {\ + SE_DCN_REG_LIST(id)\ +} + +static const struct dcn10_stream_enc_registers stream_enc_regs[] = { + stream_enc_regs(0), + stream_enc_regs(1), + stream_enc_regs(2), + stream_enc_regs(3), +}; + +static const struct dcn10_stream_encoder_shift se_shift = { + SE_COMMON_MASK_SH_LIST_DCN10(__SHIFT) +}; + +static const struct dcn10_stream_encoder_mask se_mask = { + SE_COMMON_MASK_SH_LIST_DCN10(_MASK) +}; + +#define audio_regs(id)\ +[id] = {\ + AUD_COMMON_REG_LIST(id)\ +} + +static const struct dce_audio_registers audio_regs[] = { + audio_regs(0), + audio_regs(1), + audio_regs(2), + audio_regs(3), +}; + +#define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\ + SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_INDEX, AZALIA_ENDPOINT_REG_INDEX, mask_sh),\ + SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_DATA, AZALIA_ENDPOINT_REG_DATA, mask_sh),\ + AUD_COMMON_MASK_SH_LIST_BASE(mask_sh) + +static const struct dce_audio_shift audio_shift = { + DCE120_AUD_COMMON_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_audio_mask audio_mask = { + DCE120_AUD_COMMON_MASK_SH_LIST(_MASK) +}; + +#define aux_regs(id)\ +[id] = {\ + AUX_REG_LIST(id)\ +} + +static const struct dcn10_link_enc_aux_registers link_enc_aux_regs[] = { + aux_regs(0), + aux_regs(1), + aux_regs(2), + aux_regs(3) +}; + +#define hpd_regs(id)\ +[id] = {\ + HPD_REG_LIST(id)\ +} + +static const struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[] = { + hpd_regs(0), + hpd_regs(1), + hpd_regs(2), + hpd_regs(3) +}; + +#define link_regs(id)\ +[id] = {\ + LE_DCN10_REG_LIST(id), \ + SRI(DP_DPHY_INTERNAL_CTRL, DP, id) \ +} + +static const struct dcn10_link_enc_registers link_enc_regs[] = { + link_regs(0), + link_regs(1), + link_regs(2), + link_regs(3) +}; + +static const struct dcn10_link_enc_shift le_shift = { + LINK_ENCODER_MASK_SH_LIST_DCN10(__SHIFT) +}; + +static const struct dcn10_link_enc_mask le_mask = { + LINK_ENCODER_MASK_SH_LIST_DCN10(_MASK) +}; + +static const struct dce_panel_cntl_registers panel_cntl_regs[] = { + { DCN_PANEL_CNTL_REG_LIST() } +}; + +static const struct dce_panel_cntl_shift panel_cntl_shift = { + DCE_PANEL_CNTL_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_panel_cntl_mask panel_cntl_mask = { + DCE_PANEL_CNTL_MASK_SH_LIST(_MASK) +}; + +static const struct dce110_aux_registers_shift aux_shift = { + DCN10_AUX_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce110_aux_registers_mask aux_mask = { + DCN10_AUX_MASK_SH_LIST(_MASK) +}; + +#define ipp_regs(id)\ +[id] = {\ + IPP_REG_LIST_DCN10(id),\ +} + +static const struct dcn10_ipp_registers ipp_regs[] = { + ipp_regs(0), + ipp_regs(1), + ipp_regs(2), + ipp_regs(3), +}; + +static const struct dcn10_ipp_shift ipp_shift = { + IPP_MASK_SH_LIST_DCN10(__SHIFT) +}; + +static const struct dcn10_ipp_mask ipp_mask = { + IPP_MASK_SH_LIST_DCN10(_MASK), +}; + +#define opp_regs(id)\ +[id] = {\ + OPP_REG_LIST_DCN10(id),\ +} + +static const struct dcn10_opp_registers opp_regs[] = { + opp_regs(0), + opp_regs(1), + opp_regs(2), + opp_regs(3), +}; + +static const struct dcn10_opp_shift opp_shift = { + OPP_MASK_SH_LIST_DCN10(__SHIFT) +}; + +static const struct dcn10_opp_mask opp_mask = { + OPP_MASK_SH_LIST_DCN10(_MASK), +}; + +#define aux_engine_regs(id)\ +[id] = {\ + AUX_COMMON_REG_LIST(id), \ + .AUX_RESET_MASK = 0 \ +} + +static const struct dce110_aux_registers aux_engine_regs[] = { + aux_engine_regs(0), + aux_engine_regs(1), + aux_engine_regs(2), + aux_engine_regs(3), + aux_engine_regs(4), + aux_engine_regs(5) +}; + +#define tf_regs(id)\ +[id] = {\ + TF_REG_LIST_DCN10(id),\ +} + +static const struct dcn_dpp_registers tf_regs[] = { + tf_regs(0), + tf_regs(1), + tf_regs(2), + tf_regs(3), +}; + +static const struct dcn_dpp_shift tf_shift = { + TF_REG_LIST_SH_MASK_DCN10(__SHIFT), + TF_DEBUG_REG_LIST_SH_DCN10 + +}; + +static const struct dcn_dpp_mask tf_mask = { + TF_REG_LIST_SH_MASK_DCN10(_MASK), + TF_DEBUG_REG_LIST_MASK_DCN10 +}; + +static const struct dcn_mpc_registers mpc_regs = { + MPC_COMMON_REG_LIST_DCN1_0(0), + MPC_COMMON_REG_LIST_DCN1_0(1), + MPC_COMMON_REG_LIST_DCN1_0(2), + MPC_COMMON_REG_LIST_DCN1_0(3), + MPC_OUT_MUX_COMMON_REG_LIST_DCN1_0(0), + MPC_OUT_MUX_COMMON_REG_LIST_DCN1_0(1), + MPC_OUT_MUX_COMMON_REG_LIST_DCN1_0(2), + MPC_OUT_MUX_COMMON_REG_LIST_DCN1_0(3) +}; + +static const struct dcn_mpc_shift mpc_shift = { + MPC_COMMON_MASK_SH_LIST_DCN1_0(__SHIFT),\ + SFRB(CUR_VUPDATE_LOCK_SET, CUR0_VUPDATE_LOCK_SET0, CUR0_VUPDATE_LOCK_SET, __SHIFT) +}; + +static const struct dcn_mpc_mask mpc_mask = { + MPC_COMMON_MASK_SH_LIST_DCN1_0(_MASK),\ + SFRB(CUR_VUPDATE_LOCK_SET, CUR0_VUPDATE_LOCK_SET0, CUR0_VUPDATE_LOCK_SET, _MASK) +}; + +#define tg_regs(id)\ +[id] = {TG_COMMON_REG_LIST_DCN1_0(id)} + +static const struct dcn_optc_registers tg_regs[] = { + tg_regs(0), + tg_regs(1), + tg_regs(2), + tg_regs(3), +}; + +static const struct dcn_optc_shift tg_shift = { + TG_COMMON_MASK_SH_LIST_DCN1_0(__SHIFT) +}; + +static const struct dcn_optc_mask tg_mask = { + TG_COMMON_MASK_SH_LIST_DCN1_0(_MASK) +}; + +static const struct bios_registers bios_regs = { + NBIO_SR(BIOS_SCRATCH_3), + NBIO_SR(BIOS_SCRATCH_6) +}; + +#define hubp_regs(id)\ +[id] = {\ + HUBP_REG_LIST_DCN10(id)\ +} + +static const struct dcn_mi_registers hubp_regs[] = { + hubp_regs(0), + hubp_regs(1), + hubp_regs(2), + hubp_regs(3), +}; + +static const struct dcn_mi_shift hubp_shift = { + HUBP_MASK_SH_LIST_DCN10(__SHIFT) +}; + +static const struct dcn_mi_mask hubp_mask = { + HUBP_MASK_SH_LIST_DCN10(_MASK) +}; + +static const struct dcn_hubbub_registers hubbub_reg = { + HUBBUB_REG_LIST_DCN10(0) +}; + +static const struct dcn_hubbub_shift hubbub_shift = { + HUBBUB_MASK_SH_LIST_DCN10(__SHIFT) +}; + +static const struct dcn_hubbub_mask hubbub_mask = { + HUBBUB_MASK_SH_LIST_DCN10(_MASK) +}; + +static int map_transmitter_id_to_phy_instance( + enum transmitter transmitter) +{ + switch (transmitter) { + case TRANSMITTER_UNIPHY_A: + return 0; + break; + case TRANSMITTER_UNIPHY_B: + return 1; + break; + case TRANSMITTER_UNIPHY_C: + return 2; + break; + case TRANSMITTER_UNIPHY_D: + return 3; + break; + default: + ASSERT(0); + return 0; + } +} + +#define clk_src_regs(index, pllid)\ +[index] = {\ + CS_COMMON_REG_LIST_DCN1_0(index, pllid),\ +} + +static const struct dce110_clk_src_regs clk_src_regs[] = { + clk_src_regs(0, A), + clk_src_regs(1, B), + clk_src_regs(2, C), + clk_src_regs(3, D) +}; + +static const struct dce110_clk_src_shift cs_shift = { + CS_COMMON_MASK_SH_LIST_DCN1_0(__SHIFT) +}; + +static const struct dce110_clk_src_mask cs_mask = { + CS_COMMON_MASK_SH_LIST_DCN1_0(_MASK) +}; + +static const struct resource_caps res_cap = { + .num_timing_generator = 4, + .num_opp = 4, + .num_video_plane = 4, + .num_audio = 4, + .num_stream_encoder = 4, + .num_pll = 4, + .num_ddc = 4, +}; + +static const struct resource_caps rv2_res_cap = { + .num_timing_generator = 3, + .num_opp = 3, + .num_video_plane = 3, + .num_audio = 3, + .num_stream_encoder = 3, + .num_pll = 3, + .num_ddc = 4, +}; + +static const struct dc_plane_cap plane_cap = { + .type = DC_PLANE_TYPE_DCN_UNIVERSAL, + .per_pixel_alpha = true, + + .pixel_format_support = { + .argb8888 = true, + .nv12 = true, + .fp16 = true, + .p010 = true + }, + + .max_upscale_factor = { + .argb8888 = 16000, + .nv12 = 16000, + .fp16 = 1 + }, + + .max_downscale_factor = { + .argb8888 = 250, + .nv12 = 250, + .fp16 = 1 + } +}; + +static const struct dc_debug_options debug_defaults_drv = { + .sanity_checks = true, + .disable_dmcu = false, + .force_abm_enable = false, + .timing_trace = false, + .clock_trace = true, + + /* raven smu dones't allow 0 disp clk, + * smu min disp clk limit is 50Mhz + * keep min disp clk 100Mhz avoid smu hang + */ + .min_disp_clk_khz = 100000, + + .disable_pplib_clock_request = false, + .disable_pplib_wm_range = false, + .pplib_wm_report_mode = WM_REPORT_DEFAULT, + .pipe_split_policy = MPC_SPLIT_DYNAMIC, + .force_single_disp_pipe_split = true, + .disable_dcc = DCC_ENABLE, + .voltage_align_fclk = true, + .disable_stereo_support = true, + .vsr_support = true, + .performance_trace = false, + .az_endpoint_mute_only = true, + .recovery_enabled = false, /*enable this by default after testing.*/ + .max_downscale_src_width = 3840, + .underflow_assert_delay_us = 0xFFFFFFFF, + .enable_legacy_fast_update = true, + .using_dml2 = false, +}; + +static const struct dc_debug_options debug_defaults_diags = { + .disable_dmcu = false, + .force_abm_enable = false, + .timing_trace = true, + .clock_trace = true, + .disable_stutter = true, + .disable_pplib_clock_request = true, + .disable_pplib_wm_range = true, + .underflow_assert_delay_us = 0xFFFFFFFF, +}; + +static void dcn10_dpp_destroy(struct dpp **dpp) +{ + kfree(TO_DCN10_DPP(*dpp)); + *dpp = NULL; +} + +static struct dpp *dcn10_dpp_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn10_dpp *dpp = + kzalloc(sizeof(struct dcn10_dpp), GFP_KERNEL); + + if (!dpp) + return NULL; + + dpp1_construct(dpp, ctx, inst, + &tf_regs[inst], &tf_shift, &tf_mask); + return &dpp->base; +} + +static struct input_pixel_processor *dcn10_ipp_create( + struct dc_context *ctx, uint32_t inst) +{ + struct dcn10_ipp *ipp = + kzalloc(sizeof(struct dcn10_ipp), GFP_KERNEL); + + if (!ipp) { + BREAK_TO_DEBUGGER(); + return NULL; + } + + dcn10_ipp_construct(ipp, ctx, inst, + &ipp_regs[inst], &ipp_shift, &ipp_mask); + return &ipp->base; +} + + +static struct output_pixel_processor *dcn10_opp_create( + struct dc_context *ctx, uint32_t inst) +{ + struct dcn10_opp *opp = + kzalloc(sizeof(struct dcn10_opp), GFP_KERNEL); + + if (!opp) { + BREAK_TO_DEBUGGER(); + return NULL; + } + + dcn10_opp_construct(opp, ctx, inst, + &opp_regs[inst], &opp_shift, &opp_mask); + return &opp->base; +} + +static struct dce_aux *dcn10_aux_engine_create(struct dc_context *ctx, + uint32_t inst) +{ + struct aux_engine_dce110 *aux_engine = + kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL); + + if (!aux_engine) + return NULL; + + dce110_aux_engine_construct(aux_engine, ctx, inst, + SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, + &aux_engine_regs[inst], + &aux_mask, + &aux_shift, + ctx->dc->caps.extended_aux_timeout_support); + + return &aux_engine->base; +} +#define i2c_inst_regs(id) { I2C_HW_ENGINE_COMMON_REG_LIST(id) } + +static const struct dce_i2c_registers i2c_hw_regs[] = { + i2c_inst_regs(1), + i2c_inst_regs(2), + i2c_inst_regs(3), + i2c_inst_regs(4), + i2c_inst_regs(5), + i2c_inst_regs(6), +}; + +static const struct dce_i2c_shift i2c_shifts = { + I2C_COMMON_MASK_SH_LIST_DCE110(__SHIFT) +}; + +static const struct dce_i2c_mask i2c_masks = { + I2C_COMMON_MASK_SH_LIST_DCE110(_MASK) +}; + +static struct dce_i2c_hw *dcn10_i2c_hw_create(struct dc_context *ctx, + uint32_t inst) +{ + struct dce_i2c_hw *dce_i2c_hw = + kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL); + + if (!dce_i2c_hw) + return NULL; + + dcn1_i2c_hw_construct(dce_i2c_hw, ctx, inst, + &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks); + + return dce_i2c_hw; +} +static struct mpc *dcn10_mpc_create(struct dc_context *ctx) +{ + struct dcn10_mpc *mpc10 = kzalloc(sizeof(struct dcn10_mpc), + GFP_KERNEL); + + if (!mpc10) + return NULL; + + dcn10_mpc_construct(mpc10, ctx, + &mpc_regs, + &mpc_shift, + &mpc_mask, + 4); + + return &mpc10->base; +} + +static struct hubbub *dcn10_hubbub_create(struct dc_context *ctx) +{ + struct dcn10_hubbub *dcn10_hubbub = kzalloc(sizeof(struct dcn10_hubbub), + GFP_KERNEL); + + if (!dcn10_hubbub) + return NULL; + + hubbub1_construct(&dcn10_hubbub->base, ctx, + &hubbub_reg, + &hubbub_shift, + &hubbub_mask); + + return &dcn10_hubbub->base; +} + +static struct timing_generator *dcn10_timing_generator_create( + struct dc_context *ctx, + uint32_t instance) +{ + struct optc *tgn10 = + kzalloc(sizeof(struct optc), GFP_KERNEL); + + if (!tgn10) + return NULL; + + tgn10->base.inst = instance; + tgn10->base.ctx = ctx; + + tgn10->tg_regs = &tg_regs[instance]; + tgn10->tg_shift = &tg_shift; + tgn10->tg_mask = &tg_mask; + + dcn10_timing_generator_init(tgn10); + + return &tgn10->base; +} + +static const struct encoder_feature_support link_enc_feature = { + .max_hdmi_deep_color = COLOR_DEPTH_121212, + .max_hdmi_pixel_clock = 600000, + .hdmi_ycbcr420_supported = true, + .dp_ycbcr420_supported = true, + .flags.bits.IS_HBR2_CAPABLE = true, + .flags.bits.IS_HBR3_CAPABLE = true, + .flags.bits.IS_TPS3_CAPABLE = true, + .flags.bits.IS_TPS4_CAPABLE = true +}; + +static struct link_encoder *dcn10_link_encoder_create( + struct dc_context *ctx, + const struct encoder_init_data *enc_init_data) +{ + struct dcn10_link_encoder *enc10 = + kzalloc(sizeof(struct dcn10_link_encoder), GFP_KERNEL); + int link_regs_id; + + if (!enc10) + return NULL; + + link_regs_id = + map_transmitter_id_to_phy_instance(enc_init_data->transmitter); + + dcn10_link_encoder_construct(enc10, + enc_init_data, + &link_enc_feature, + &link_enc_regs[link_regs_id], + &link_enc_aux_regs[enc_init_data->channel - 1], + &link_enc_hpd_regs[enc_init_data->hpd_source], + &le_shift, + &le_mask); + + return &enc10->base; +} + +static struct panel_cntl *dcn10_panel_cntl_create(const struct panel_cntl_init_data *init_data) +{ + struct dce_panel_cntl *panel_cntl = + kzalloc(sizeof(struct dce_panel_cntl), GFP_KERNEL); + + if (!panel_cntl) + return NULL; + + dce_panel_cntl_construct(panel_cntl, + init_data, + &panel_cntl_regs[init_data->inst], + &panel_cntl_shift, + &panel_cntl_mask); + + return &panel_cntl->base; +} + +static struct clock_source *dcn10_clock_source_create( + struct dc_context *ctx, + struct dc_bios *bios, + enum clock_source_id id, + const struct dce110_clk_src_regs *regs, + bool dp_clk_src) +{ + struct dce110_clk_src *clk_src = + kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL); + + if (!clk_src) + return NULL; + + if (dce112_clk_src_construct(clk_src, ctx, bios, id, + regs, &cs_shift, &cs_mask)) { + clk_src->base.dp_clk_src = dp_clk_src; + return &clk_src->base; + } + + kfree(clk_src); + BREAK_TO_DEBUGGER(); + return NULL; +} + +static void read_dce_straps( + struct dc_context *ctx, + struct resource_straps *straps) +{ + generic_reg_get(ctx, mmDC_PINSTRAPS + BASE(mmDC_PINSTRAPS_BASE_IDX), + FN(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO), &straps->dc_pinstraps_audio); +} + +static struct audio *create_audio( + struct dc_context *ctx, unsigned int inst) +{ + return dce_audio_create(ctx, inst, + &audio_regs[inst], &audio_shift, &audio_mask); +} + +static struct stream_encoder *dcn10_stream_encoder_create( + enum engine_id eng_id, + struct dc_context *ctx) +{ + struct dcn10_stream_encoder *enc1 = + kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL); + + if (!enc1) + return NULL; + + dcn10_stream_encoder_construct(enc1, ctx, ctx->dc_bios, eng_id, + &stream_enc_regs[eng_id], + &se_shift, &se_mask); + return &enc1->base; +} + +static const struct dce_hwseq_registers hwseq_reg = { + HWSEQ_DCN1_REG_LIST() +}; + +static const struct dce_hwseq_shift hwseq_shift = { + HWSEQ_DCN1_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_hwseq_mask hwseq_mask = { + HWSEQ_DCN1_MASK_SH_LIST(_MASK) +}; + +static struct dce_hwseq *dcn10_hwseq_create( + struct dc_context *ctx) +{ + struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL); + + if (hws) { + hws->ctx = ctx; + hws->regs = &hwseq_reg; + hws->shifts = &hwseq_shift; + hws->masks = &hwseq_mask; + hws->wa.DEGVIDCN10_253 = true; + hws->wa.false_optc_underflow = true; + hws->wa.DEGVIDCN10_254 = true; + + if ((ctx->asic_id.chip_family == FAMILY_RV) && + ASICREV_IS_RAVEN2(ctx->asic_id.hw_internal_rev)) + switch (ctx->asic_id.pci_revision_id) { + case PRID_POLLOCK_94: + case PRID_POLLOCK_95: + case PRID_POLLOCK_E9: + case PRID_POLLOCK_EA: + case PRID_POLLOCK_EB: + hws->wa.wait_hubpret_read_start_during_mpo_transition = true; + break; + default: + hws->wa.wait_hubpret_read_start_during_mpo_transition = false; + break; + } + } + return hws; +} + +static const struct resource_create_funcs res_create_funcs = { + .read_dce_straps = read_dce_straps, + .create_audio = create_audio, + .create_stream_encoder = dcn10_stream_encoder_create, + .create_hwseq = dcn10_hwseq_create, +}; + +static void dcn10_clock_source_destroy(struct clock_source **clk_src) +{ + kfree(TO_DCE110_CLK_SRC(*clk_src)); + *clk_src = NULL; +} + +static struct pp_smu_funcs *dcn10_pp_smu_create(struct dc_context *ctx) +{ + struct pp_smu_funcs *pp_smu = kzalloc(sizeof(*pp_smu), GFP_KERNEL); + + if (!pp_smu) + return pp_smu; + + dm_pp_get_funcs(ctx, pp_smu); + return pp_smu; +} + +static void dcn10_resource_destruct(struct dcn10_resource_pool *pool) +{ + unsigned int i; + + for (i = 0; i < pool->base.stream_enc_count; i++) { + if (pool->base.stream_enc[i] != NULL) { + kfree(DCN10STRENC_FROM_STRENC(pool->base.stream_enc[i])); + pool->base.stream_enc[i] = NULL; + } + } + + if (pool->base.mpc != NULL) { + kfree(TO_DCN10_MPC(pool->base.mpc)); + pool->base.mpc = NULL; + } + + kfree(pool->base.hubbub); + pool->base.hubbub = NULL; + + for (i = 0; i < pool->base.pipe_count; i++) { + if (pool->base.opps[i] != NULL) + pool->base.opps[i]->funcs->opp_destroy(&pool->base.opps[i]); + + if (pool->base.dpps[i] != NULL) + dcn10_dpp_destroy(&pool->base.dpps[i]); + + if (pool->base.ipps[i] != NULL) + pool->base.ipps[i]->funcs->ipp_destroy(&pool->base.ipps[i]); + + if (pool->base.hubps[i] != NULL) { + kfree(TO_DCN10_HUBP(pool->base.hubps[i])); + pool->base.hubps[i] = NULL; + } + + if (pool->base.irqs != NULL) { + dal_irq_service_destroy(&pool->base.irqs); + } + + if (pool->base.timing_generators[i] != NULL) { + kfree(DCN10TG_FROM_TG(pool->base.timing_generators[i])); + pool->base.timing_generators[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_ddc; i++) { + if (pool->base.engines[i] != NULL) + dce110_engine_destroy(&pool->base.engines[i]); + kfree(pool->base.hw_i2cs[i]); + pool->base.hw_i2cs[i] = NULL; + kfree(pool->base.sw_i2cs[i]); + pool->base.sw_i2cs[i] = NULL; + } + + for (i = 0; i < pool->base.audio_count; i++) { + if (pool->base.audios[i]) + dce_aud_destroy(&pool->base.audios[i]); + } + + for (i = 0; i < pool->base.clk_src_count; i++) { + if (pool->base.clock_sources[i] != NULL) { + dcn10_clock_source_destroy(&pool->base.clock_sources[i]); + pool->base.clock_sources[i] = NULL; + } + } + + if (pool->base.dp_clock_source != NULL) { + dcn10_clock_source_destroy(&pool->base.dp_clock_source); + pool->base.dp_clock_source = NULL; + } + + if (pool->base.abm != NULL) + dce_abm_destroy(&pool->base.abm); + + if (pool->base.dmcu != NULL) + dce_dmcu_destroy(&pool->base.dmcu); + + kfree(pool->base.pp_smu); +} + +static struct hubp *dcn10_hubp_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn10_hubp *hubp1 = + kzalloc(sizeof(struct dcn10_hubp), GFP_KERNEL); + + if (!hubp1) + return NULL; + + dcn10_hubp_construct(hubp1, ctx, inst, + &hubp_regs[inst], &hubp_shift, &hubp_mask); + return &hubp1->base; +} + +static void get_pixel_clock_parameters( + const struct pipe_ctx *pipe_ctx, + struct pixel_clk_params *pixel_clk_params) +{ + const struct dc_stream_state *stream = pipe_ctx->stream; + pixel_clk_params->requested_pix_clk_100hz = stream->timing.pix_clk_100hz; + pixel_clk_params->encoder_object_id = stream->link->link_enc->id; + pixel_clk_params->signal_type = pipe_ctx->stream->signal; + pixel_clk_params->controller_id = pipe_ctx->stream_res.tg->inst + 1; + /* TODO: un-hardcode*/ + pixel_clk_params->requested_sym_clk = LINK_RATE_LOW * + LINK_RATE_REF_FREQ_IN_KHZ; + pixel_clk_params->flags.ENABLE_SS = 0; + pixel_clk_params->color_depth = + stream->timing.display_color_depth; + pixel_clk_params->flags.DISPLAY_BLANKED = 1; + pixel_clk_params->pixel_encoding = stream->timing.pixel_encoding; + + if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR422) + pixel_clk_params->color_depth = COLOR_DEPTH_888; + + if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) + pixel_clk_params->requested_pix_clk_100hz /= 2; + if (stream->timing.timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING) + pixel_clk_params->requested_pix_clk_100hz *= 2; + +} + +static void build_clamping_params(struct dc_stream_state *stream) +{ + stream->clamping.clamping_level = CLAMPING_FULL_RANGE; + stream->clamping.c_depth = stream->timing.display_color_depth; + stream->clamping.pixel_encoding = stream->timing.pixel_encoding; +} + +static void build_pipe_hw_param(struct pipe_ctx *pipe_ctx) +{ + + get_pixel_clock_parameters(pipe_ctx, &pipe_ctx->stream_res.pix_clk_params); + + pipe_ctx->clock_source->funcs->get_pix_clk_dividers( + pipe_ctx->clock_source, + &pipe_ctx->stream_res.pix_clk_params, + &pipe_ctx->pll_settings); + + pipe_ctx->stream->clamping.pixel_encoding = pipe_ctx->stream->timing.pixel_encoding; + + resource_build_bit_depth_reduction_params(pipe_ctx->stream, + &pipe_ctx->stream->bit_depth_params); + build_clamping_params(pipe_ctx->stream); +} + +static enum dc_status build_mapped_resource( + const struct dc *dc, + struct dc_state *context, + struct dc_stream_state *stream) +{ + struct pipe_ctx *pipe_ctx = resource_get_otg_master_for_stream(&context->res_ctx, stream); + + if (!pipe_ctx) + return DC_ERROR_UNEXPECTED; + + build_pipe_hw_param(pipe_ctx); + return DC_OK; +} + +static enum dc_status dcn10_add_stream_to_ctx( + struct dc *dc, + struct dc_state *new_ctx, + struct dc_stream_state *dc_stream) +{ + enum dc_status result = DC_ERROR_UNEXPECTED; + + result = resource_map_pool_resources(dc, new_ctx, dc_stream); + + if (result == DC_OK) + result = resource_map_phy_clock_resources(dc, new_ctx, dc_stream); + + + if (result == DC_OK) + result = build_mapped_resource(dc, new_ctx, dc_stream); + + return result; +} + +static struct pipe_ctx *dcn10_acquire_free_pipe_for_layer( + const struct dc_state *cur_ctx, + struct dc_state *new_ctx, + const struct resource_pool *pool, + const struct pipe_ctx *opp_head_pipe) +{ + struct resource_context *res_ctx = &new_ctx->res_ctx; + struct pipe_ctx *head_pipe = resource_get_otg_master_for_stream(res_ctx, opp_head_pipe->stream); + struct pipe_ctx *idle_pipe = resource_find_free_secondary_pipe_legacy(res_ctx, pool, head_pipe); + + if (!head_pipe) { + ASSERT(0); + return NULL; + } + + if (!idle_pipe) + return NULL; + + idle_pipe->stream = head_pipe->stream; + idle_pipe->stream_res.tg = head_pipe->stream_res.tg; + idle_pipe->stream_res.abm = head_pipe->stream_res.abm; + idle_pipe->stream_res.opp = head_pipe->stream_res.opp; + + idle_pipe->plane_res.hubp = pool->hubps[idle_pipe->pipe_idx]; + idle_pipe->plane_res.ipp = pool->ipps[idle_pipe->pipe_idx]; + idle_pipe->plane_res.dpp = pool->dpps[idle_pipe->pipe_idx]; + idle_pipe->plane_res.mpcc_inst = pool->dpps[idle_pipe->pipe_idx]->inst; + + return idle_pipe; +} + +static bool dcn10_get_dcc_compression_cap(const struct dc *dc, + const struct dc_dcc_surface_param *input, + struct dc_surface_dcc_cap *output) +{ + return dc->res_pool->hubbub->funcs->get_dcc_compression_cap( + dc->res_pool->hubbub, + input, + output); +} + +static void dcn10_destroy_resource_pool(struct resource_pool **pool) +{ + struct dcn10_resource_pool *dcn10_pool = TO_DCN10_RES_POOL(*pool); + + dcn10_resource_destruct(dcn10_pool); + kfree(dcn10_pool); + *pool = NULL; +} + +static bool dcn10_validate_bandwidth( + struct dc *dc, + struct dc_state *context, + bool fast_validate) +{ + bool voltage_supported; + + DC_FP_START(); + voltage_supported = dcn_validate_bandwidth(dc, context, fast_validate); + DC_FP_END(); + + return voltage_supported; +} + +static enum dc_status dcn10_validate_plane(const struct dc_plane_state *plane_state, struct dc_caps *caps) +{ + if (plane_state->format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN + && caps->max_video_width != 0 + && plane_state->src_rect.width > caps->max_video_width) + return DC_FAIL_SURFACE_VALIDATE; + + return DC_OK; +} + +static enum dc_status dcn10_validate_global(struct dc *dc, struct dc_state *context) +{ + int i, j; + bool video_down_scaled = false; + bool video_large = false; + bool desktop_large = false; + bool dcc_disabled = false; + bool mpo_enabled = false; + + for (i = 0; i < context->stream_count; i++) { + if (context->stream_status[i].plane_count == 0) + continue; + + if (context->stream_status[i].plane_count > 2) + return DC_FAIL_UNSUPPORTED_1; + + if (context->stream_status[i].plane_count > 1) + mpo_enabled = true; + + for (j = 0; j < context->stream_status[i].plane_count; j++) { + struct dc_plane_state *plane = + context->stream_status[i].plane_states[j]; + + + if (plane->format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) { + + if (plane->src_rect.width > plane->dst_rect.width || + plane->src_rect.height > plane->dst_rect.height) + video_down_scaled = true; + + if (plane->src_rect.width >= 3840) + video_large = true; + + } else { + if (plane->src_rect.width >= 3840) + desktop_large = true; + if (!plane->dcc.enable) + dcc_disabled = true; + } + } + } + + /* Disable MPO in multi-display configurations. */ + if (context->stream_count > 1 && mpo_enabled) + return DC_FAIL_UNSUPPORTED_1; + + /* + * Workaround: On DCN10 there is UMC issue that causes underflow when + * playing 4k video on 4k desktop with video downscaled and single channel + * memory + */ + if (video_large && desktop_large && video_down_scaled && dcc_disabled && + dc->dcn_soc->number_of_channels == 1) + return DC_FAIL_SURFACE_VALIDATE; + + return DC_OK; +} + +static enum dc_status dcn10_patch_unknown_plane_state(struct dc_plane_state *plane_state) +{ + enum surface_pixel_format surf_pix_format = plane_state->format; + unsigned int bpp = resource_pixel_format_to_bpp(surf_pix_format); + + enum swizzle_mode_values swizzle = DC_SW_LINEAR; + + if (bpp == 64) + swizzle = DC_SW_64KB_D; + else + swizzle = DC_SW_64KB_S; + + plane_state->tiling_info.gfx9.swizzle = swizzle; + return DC_OK; +} + +struct stream_encoder *dcn10_find_first_free_match_stream_enc_for_link( + struct resource_context *res_ctx, + const struct resource_pool *pool, + struct dc_stream_state *stream) +{ + int i; + int j = -1; + struct dc_link *link = stream->link; + + for (i = 0; i < pool->stream_enc_count; i++) { + if (!res_ctx->is_stream_enc_acquired[i] && + pool->stream_enc[i]) { + /* Store first available for MST second display + * in daisy chain use case + */ + j = i; + if (link->ep_type == DISPLAY_ENDPOINT_PHY && pool->stream_enc[i]->id == + link->link_enc->preferred_engine) + return pool->stream_enc[i]; + } + } + + /* + * For CZ and later, we can allow DIG FE and BE to differ for all display types + */ + + if (j >= 0) + return pool->stream_enc[j]; + + return NULL; +} + +static const struct dc_cap_funcs cap_funcs = { + .get_dcc_compression_cap = dcn10_get_dcc_compression_cap +}; + +static const struct resource_funcs dcn10_res_pool_funcs = { + .destroy = dcn10_destroy_resource_pool, + .link_enc_create = dcn10_link_encoder_create, + .panel_cntl_create = dcn10_panel_cntl_create, + .validate_bandwidth = dcn10_validate_bandwidth, + .acquire_free_pipe_as_secondary_dpp_pipe = dcn10_acquire_free_pipe_for_layer, + .validate_plane = dcn10_validate_plane, + .validate_global = dcn10_validate_global, + .add_stream_to_ctx = dcn10_add_stream_to_ctx, + .patch_unknown_plane_state = dcn10_patch_unknown_plane_state, + .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link +}; + +static uint32_t read_pipe_fuses(struct dc_context *ctx) +{ + uint32_t value = dm_read_reg_soc15(ctx, mmCC_DC_PIPE_DIS, 0); + /* RV1 support max 4 pipes */ + value = value & 0xf; + return value; +} + +static bool verify_clock_values(struct dm_pp_clock_levels_with_voltage *clks) +{ + int i; + + if (clks->num_levels == 0) + return false; + + for (i = 0; i < clks->num_levels; i++) + /* Ensure that the result is sane */ + if (clks->data[i].clocks_in_khz == 0) + return false; + + return true; +} + +static bool dcn10_resource_construct( + uint8_t num_virtual_links, + struct dc *dc, + struct dcn10_resource_pool *pool) +{ + int i; + int j; + struct dc_context *ctx = dc->ctx; + uint32_t pipe_fuses = read_pipe_fuses(ctx); + struct dm_pp_clock_levels_with_voltage fclks = {0}, dcfclks = {0}; + int min_fclk_khz, min_dcfclk_khz, socclk_khz; + bool res; + + ctx->dc_bios->regs = &bios_regs; + + if (ctx->dce_version == DCN_VERSION_1_01) + pool->base.res_cap = &rv2_res_cap; + else + pool->base.res_cap = &res_cap; + pool->base.funcs = &dcn10_res_pool_funcs; + + /* + * TODO fill in from actual raven resource when we create + * more than virtual encoder + */ + + /************************************************* + * Resource + asic cap harcoding * + *************************************************/ + pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE; + + /* max pipe num for ASIC before check pipe fuses */ + pool->base.pipe_count = pool->base.res_cap->num_timing_generator; + + if (dc->ctx->dce_version == DCN_VERSION_1_01) + pool->base.pipe_count = 3; + dc->caps.max_video_width = 3840; + dc->caps.max_downscale_ratio = 200; + dc->caps.i2c_speed_in_khz = 100; + dc->caps.i2c_speed_in_khz_hdcp = 100; /*1.4 w/a not applied by default*/ + dc->caps.max_cursor_size = 256; + dc->caps.min_horizontal_blanking_period = 80; + dc->caps.max_slave_planes = 1; + dc->caps.max_slave_yuv_planes = 1; + dc->caps.max_slave_rgb_planes = 0; + dc->caps.is_apu = true; + dc->caps.post_blend_color_processing = false; + dc->caps.extended_aux_timeout_support = false; + + /* Raven DP PHY HBR2 eye diagram pattern is not stable. Use TP4 */ + dc->caps.force_dp_tps4_for_cp2520 = true; + + /* Color pipeline capabilities */ + dc->caps.color.dpp.dcn_arch = 1; + dc->caps.color.dpp.input_lut_shared = 1; + dc->caps.color.dpp.icsc = 1; + dc->caps.color.dpp.dgam_ram = 1; + dc->caps.color.dpp.dgam_rom_caps.srgb = 1; + dc->caps.color.dpp.dgam_rom_caps.bt2020 = 1; + dc->caps.color.dpp.dgam_rom_caps.gamma2_2 = 0; + dc->caps.color.dpp.dgam_rom_caps.pq = 0; + dc->caps.color.dpp.dgam_rom_caps.hlg = 0; + dc->caps.color.dpp.post_csc = 0; + dc->caps.color.dpp.gamma_corr = 0; + dc->caps.color.dpp.dgam_rom_for_yuv = 1; + + dc->caps.color.dpp.hw_3d_lut = 0; + dc->caps.color.dpp.ogam_ram = 1; // RGAM on DCN1 + dc->caps.color.dpp.ogam_rom_caps.srgb = 1; + dc->caps.color.dpp.ogam_rom_caps.bt2020 = 1; + dc->caps.color.dpp.ogam_rom_caps.gamma2_2 = 0; + dc->caps.color.dpp.ogam_rom_caps.pq = 0; + dc->caps.color.dpp.ogam_rom_caps.hlg = 0; + dc->caps.color.dpp.ocsc = 1; + + /* no post-blend color operations */ + dc->caps.color.mpc.gamut_remap = 0; + dc->caps.color.mpc.num_3dluts = 0; + dc->caps.color.mpc.shared_3d_lut = 0; + dc->caps.color.mpc.ogam_ram = 0; + dc->caps.color.mpc.ogam_rom_caps.srgb = 0; + dc->caps.color.mpc.ogam_rom_caps.bt2020 = 0; + dc->caps.color.mpc.ogam_rom_caps.gamma2_2 = 0; + dc->caps.color.mpc.ogam_rom_caps.pq = 0; + dc->caps.color.mpc.ogam_rom_caps.hlg = 0; + dc->caps.color.mpc.ocsc = 0; + + if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) + dc->debug = debug_defaults_drv; + else + dc->debug = debug_defaults_diags; + + /************************************************* + * Create resources * + *************************************************/ + + pool->base.clock_sources[DCN10_CLK_SRC_PLL0] = + dcn10_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL0, + &clk_src_regs[0], false); + pool->base.clock_sources[DCN10_CLK_SRC_PLL1] = + dcn10_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL1, + &clk_src_regs[1], false); + pool->base.clock_sources[DCN10_CLK_SRC_PLL2] = + dcn10_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL2, + &clk_src_regs[2], false); + + if (dc->ctx->dce_version == DCN_VERSION_1_0) { + pool->base.clock_sources[DCN10_CLK_SRC_PLL3] = + dcn10_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL3, + &clk_src_regs[3], false); + } + + pool->base.clk_src_count = DCN10_CLK_SRC_TOTAL; + + if (dc->ctx->dce_version == DCN_VERSION_1_01) + pool->base.clk_src_count = DCN101_CLK_SRC_TOTAL; + + pool->base.dp_clock_source = + dcn10_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_ID_DP_DTO, + /* todo: not reuse phy_pll registers */ + &clk_src_regs[0], true); + + for (i = 0; i < pool->base.clk_src_count; i++) { + if (pool->base.clock_sources[i] == NULL) { + dm_error("DC: failed to create clock sources!\n"); + BREAK_TO_DEBUGGER(); + goto fail; + } + } + + pool->base.dmcu = dcn10_dmcu_create(ctx, + &dmcu_regs, + &dmcu_shift, + &dmcu_mask); + if (pool->base.dmcu == NULL) { + dm_error("DC: failed to create dmcu!\n"); + BREAK_TO_DEBUGGER(); + goto fail; + } + + pool->base.abm = dce_abm_create(ctx, + &abm_regs, + &abm_shift, + &abm_mask); + if (pool->base.abm == NULL) { + dm_error("DC: failed to create abm!\n"); + BREAK_TO_DEBUGGER(); + goto fail; + } + + dml_init_instance(&dc->dml, &dcn1_0_soc, &dcn1_0_ip, DML_PROJECT_RAVEN1); + memcpy(dc->dcn_ip, &dcn10_ip_defaults, sizeof(dcn10_ip_defaults)); + memcpy(dc->dcn_soc, &dcn10_soc_defaults, sizeof(dcn10_soc_defaults)); + + DC_FP_START(); + dcn10_resource_construct_fp(dc); + DC_FP_END(); + + if (!dc->config.is_vmin_only_asic) + if (ASICREV_IS_RAVEN2(dc->ctx->asic_id.hw_internal_rev)) + switch (dc->ctx->asic_id.pci_revision_id) { + case PRID_DALI_DE: + case PRID_DALI_DF: + case PRID_DALI_E3: + case PRID_DALI_E4: + case PRID_POLLOCK_94: + case PRID_POLLOCK_95: + case PRID_POLLOCK_E9: + case PRID_POLLOCK_EA: + case PRID_POLLOCK_EB: + dc->config.is_vmin_only_asic = true; + break; + default: + break; + } + + pool->base.pp_smu = dcn10_pp_smu_create(ctx); + + /* + * Right now SMU/PPLIB and DAL all have the AZ D3 force PME notification * + * implemented. So AZ D3 should work.For issue 197007. * + */ + if (pool->base.pp_smu != NULL + && pool->base.pp_smu->rv_funcs.set_pme_wa_enable != NULL) + dc->debug.az_endpoint_mute_only = false; + + + if (!dc->debug.disable_pplib_clock_request) { + /* + * TODO: This is not the proper way to obtain + * fabric_and_dram_bandwidth, should be min(fclk, memclk). + */ + res = dm_pp_get_clock_levels_by_type_with_voltage( + ctx, DM_PP_CLOCK_TYPE_FCLK, &fclks); + + DC_FP_START(); + + if (res) + res = verify_clock_values(&fclks); + + if (res) + dcn_bw_update_from_pplib_fclks(dc, &fclks); + else + BREAK_TO_DEBUGGER(); + + DC_FP_END(); + + res = dm_pp_get_clock_levels_by_type_with_voltage( + ctx, DM_PP_CLOCK_TYPE_DCFCLK, &dcfclks); + + DC_FP_START(); + + if (res) + res = verify_clock_values(&dcfclks); + + if (res) + dcn_bw_update_from_pplib_dcfclks(dc, &dcfclks); + else + BREAK_TO_DEBUGGER(); + + DC_FP_END(); + } + + dcn_bw_sync_calcs_and_dml(dc); + if (!dc->debug.disable_pplib_wm_range) { + dc->res_pool = &pool->base; + DC_FP_START(); + dcn_get_soc_clks( + dc, &min_fclk_khz, &min_dcfclk_khz, &socclk_khz); + DC_FP_END(); + dcn_bw_notify_pplib_of_wm_ranges( + dc, min_fclk_khz, min_dcfclk_khz, socclk_khz); + } + + { + struct irq_service_init_data init_data; + init_data.ctx = dc->ctx; + pool->base.irqs = dal_irq_service_dcn10_create(&init_data); + if (!pool->base.irqs) + goto fail; + } + + /* index to valid pipe resource */ + j = 0; + /* mem input -> ipp -> dpp -> opp -> TG */ + for (i = 0; i < pool->base.pipe_count; i++) { + /* if pipe is disabled, skip instance of HW pipe, + * i.e, skip ASIC register instance + */ + if ((pipe_fuses & (1 << i)) != 0) + continue; + + pool->base.hubps[j] = dcn10_hubp_create(ctx, i); + if (pool->base.hubps[j] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create memory input!\n"); + goto fail; + } + + pool->base.ipps[j] = dcn10_ipp_create(ctx, i); + if (pool->base.ipps[j] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create input pixel processor!\n"); + goto fail; + } + + pool->base.dpps[j] = dcn10_dpp_create(ctx, i); + if (pool->base.dpps[j] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create dpp!\n"); + goto fail; + } + + pool->base.opps[j] = dcn10_opp_create(ctx, i); + if (pool->base.opps[j] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create output pixel processor!\n"); + goto fail; + } + + pool->base.timing_generators[j] = dcn10_timing_generator_create( + ctx, i); + if (pool->base.timing_generators[j] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create tg!\n"); + goto fail; + } + /* check next valid pipe */ + j++; + } + + for (i = 0; i < pool->base.res_cap->num_ddc; i++) { + pool->base.engines[i] = dcn10_aux_engine_create(ctx, i); + if (pool->base.engines[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC:failed to create aux engine!!\n"); + goto fail; + } + pool->base.hw_i2cs[i] = dcn10_i2c_hw_create(ctx, i); + if (pool->base.hw_i2cs[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC:failed to create hw i2c!!\n"); + goto fail; + } + pool->base.sw_i2cs[i] = NULL; + } + + /* valid pipe num */ + pool->base.pipe_count = j; + pool->base.timing_generator_count = j; + + /* within dml lib, it is hard code to 4. If ASIC pipe is fused, + * the value may be changed + */ + dc->dml.ip.max_num_dpp = pool->base.pipe_count; + dc->dcn_ip->max_num_dpp = pool->base.pipe_count; + + pool->base.mpc = dcn10_mpc_create(ctx); + if (pool->base.mpc == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create mpc!\n"); + goto fail; + } + + pool->base.hubbub = dcn10_hubbub_create(ctx); + if (pool->base.hubbub == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create hubbub!\n"); + goto fail; + } + + if (!resource_construct(num_virtual_links, dc, &pool->base, + &res_create_funcs)) + goto fail; + + dcn10_hw_sequencer_construct(dc); + dc->caps.max_planes = pool->base.pipe_count; + + for (i = 0; i < dc->caps.max_planes; ++i) + dc->caps.planes[i] = plane_cap; + + dc->cap_funcs = cap_funcs; + + return true; + +fail: + + dcn10_resource_destruct(pool); + + return false; +} + +struct resource_pool *dcn10_create_resource_pool( + const struct dc_init_data *init_data, + struct dc *dc) +{ + struct dcn10_resource_pool *pool = + kzalloc(sizeof(struct dcn10_resource_pool), GFP_KERNEL); + + if (!pool) + return NULL; + + if (dcn10_resource_construct(init_data->num_virtual_links, dc, pool)) + return &pool->base; + + kfree(pool); + BREAK_TO_DEBUGGER(); + return NULL; +} diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.h new file mode 100644 index 000000000000..bf8e33cd8147 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.h @@ -0,0 +1,56 @@ +/* +* Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DC_RESOURCE_DCN10_H__ +#define __DC_RESOURCE_DCN10_H__ + +#include "core_types.h" +#include "dml/dcn10/dcn10_fpu.h" + +#define TO_DCN10_RES_POOL(pool)\ + container_of(pool, struct dcn10_resource_pool, base) + +struct dc; +struct resource_pool; +struct _vcs_dpi_display_pipe_params_st; + +extern struct _vcs_dpi_ip_params_st dcn1_0_ip; +extern struct _vcs_dpi_soc_bounding_box_st dcn1_0_soc; + +struct dcn10_resource_pool { + struct resource_pool base; +}; +struct resource_pool *dcn10_create_resource_pool( + const struct dc_init_data *init_data, + struct dc *dc); + +struct stream_encoder *dcn10_find_first_free_match_stream_enc_for_link( + struct resource_context *res_ctx, + const struct resource_pool *pool, + struct dc_stream_state *stream); + + +#endif /* __DC_RESOURCE_DCN10_H__ */ + diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c new file mode 100644 index 000000000000..f04bb5b1471d --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c @@ -0,0 +1,2789 @@ +/* +* Copyright 2016 Advanced Micro Devices, Inc. + * Copyright 2019 Raptor Engineering, LLC + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include + +#include "dm_services.h" +#include "dc.h" + +#include "dcn20/dcn20_init.h" + +#include "resource.h" +#include "include/irq_service_interface.h" +#include "dcn20/dcn20_resource.h" + +#include "dml/dcn20/dcn20_fpu.h" + +#include "dcn10/dcn10_hubp.h" +#include "dcn10/dcn10_ipp.h" +#include "dcn20/dcn20_hubbub.h" +#include "dcn20/dcn20_mpc.h" +#include "dcn20/dcn20_hubp.h" +#include "irq/dcn20/irq_service_dcn20.h" +#include "dcn20/dcn20_dpp.h" +#include "dcn20/dcn20_optc.h" +#include "dcn20/dcn20_hwseq.h" +#include "dce110/dce110_hwseq.h" +#include "dcn10/dcn10_resource.h" +#include "dcn20/dcn20_opp.h" + +#include "dcn20/dcn20_dsc.h" + +#include "dcn20/dcn20_link_encoder.h" +#include "dcn20/dcn20_stream_encoder.h" +#include "dce/dce_clock_source.h" +#include "dce/dce_audio.h" +#include "dce/dce_hwseq.h" +#include "virtual/virtual_stream_encoder.h" +#include "dce110/dce110_resource.h" +#include "dml/display_mode_vba.h" +#include "dcn20/dcn20_dccg.h" +#include "dcn20/dcn20_vmid.h" +#include "dce/dce_panel_cntl.h" + +#include "navi10_ip_offset.h" + +#include "dcn/dcn_2_0_0_offset.h" +#include "dcn/dcn_2_0_0_sh_mask.h" +#include "dpcs/dpcs_2_0_0_offset.h" +#include "dpcs/dpcs_2_0_0_sh_mask.h" + +#include "nbio/nbio_2_3_offset.h" + +#include "dcn20/dcn20_dwb.h" +#include "dcn20/dcn20_mmhubbub.h" + +#include "mmhub/mmhub_2_0_0_offset.h" +#include "mmhub/mmhub_2_0_0_sh_mask.h" + +#include "reg_helper.h" +#include "dce/dce_abm.h" +#include "dce/dce_dmcu.h" +#include "dce/dce_aux.h" +#include "dce/dce_i2c.h" +#include "vm_helper.h" +#include "link_enc_cfg.h" + +#include "amdgpu_socbb.h" + +#include "link.h" +#define DC_LOGGER_INIT(logger) + +#ifndef mmDP0_DP_DPHY_INTERNAL_CTRL + #define mmDP0_DP_DPHY_INTERNAL_CTRL 0x210f + #define mmDP0_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2 + #define mmDP1_DP_DPHY_INTERNAL_CTRL 0x220f + #define mmDP1_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2 + #define mmDP2_DP_DPHY_INTERNAL_CTRL 0x230f + #define mmDP2_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2 + #define mmDP3_DP_DPHY_INTERNAL_CTRL 0x240f + #define mmDP3_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2 + #define mmDP4_DP_DPHY_INTERNAL_CTRL 0x250f + #define mmDP4_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2 + #define mmDP5_DP_DPHY_INTERNAL_CTRL 0x260f + #define mmDP5_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2 + #define mmDP6_DP_DPHY_INTERNAL_CTRL 0x270f + #define mmDP6_DP_DPHY_INTERNAL_CTRL_BASE_IDX 2 +#endif + + +enum dcn20_clk_src_array_id { + DCN20_CLK_SRC_PLL0, + DCN20_CLK_SRC_PLL1, + DCN20_CLK_SRC_PLL2, + DCN20_CLK_SRC_PLL3, + DCN20_CLK_SRC_PLL4, + DCN20_CLK_SRC_PLL5, + DCN20_CLK_SRC_TOTAL +}; + +/* begin ********************* + * macros to expend register list macro defined in HW object header file */ + +/* DCN */ +#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg + +#define BASE(seg) BASE_INNER(seg) + +#define SR(reg_name)\ + .reg_name = BASE(mm ## reg_name ## _BASE_IDX) + \ + mm ## reg_name + +#define SRI(reg_name, block, id)\ + .reg_name = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + mm ## block ## id ## _ ## reg_name + +#define SRI2_DWB(reg_name, block, id)\ + .reg_name = BASE(mm ## reg_name ## _BASE_IDX) + \ + mm ## reg_name +#define SF_DWB(reg_name, field_name, post_fix)\ + .field_name = reg_name ## __ ## field_name ## post_fix + +#define SF_DWB2(reg_name, block, id, field_name, post_fix) \ + .field_name = reg_name ## __ ## field_name ## post_fix + +#define SRIR(var_name, reg_name, block, id)\ + .var_name = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + mm ## block ## id ## _ ## reg_name + +#define SRII(reg_name, block, id)\ + .reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + mm ## block ## id ## _ ## reg_name + +#define DCCG_SRII(reg_name, block, id)\ + .block ## _ ## reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + mm ## block ## id ## _ ## reg_name + +#define VUPDATE_SRII(reg_name, block, id)\ + .reg_name[id] = BASE(mm ## reg_name ## _ ## block ## id ## _BASE_IDX) + \ + mm ## reg_name ## _ ## block ## id + +/* NBIO */ +#define NBIO_BASE_INNER(seg) \ + NBIO_BASE__INST0_SEG ## seg + +#define NBIO_BASE(seg) \ + NBIO_BASE_INNER(seg) + +#define NBIO_SR(reg_name)\ + .reg_name = NBIO_BASE(mm ## reg_name ## _BASE_IDX) + \ + mm ## reg_name + +/* MMHUB */ +#define MMHUB_BASE_INNER(seg) \ + MMHUB_BASE__INST0_SEG ## seg + +#define MMHUB_BASE(seg) \ + MMHUB_BASE_INNER(seg) + +#define MMHUB_SR(reg_name)\ + .reg_name = MMHUB_BASE(mmMM ## reg_name ## _BASE_IDX) + \ + mmMM ## reg_name + +static const struct bios_registers bios_regs = { + NBIO_SR(BIOS_SCRATCH_3), + NBIO_SR(BIOS_SCRATCH_6) +}; + +#define clk_src_regs(index, pllid)\ +[index] = {\ + CS_COMMON_REG_LIST_DCN2_0(index, pllid),\ +} + +static const struct dce110_clk_src_regs clk_src_regs[] = { + clk_src_regs(0, A), + clk_src_regs(1, B), + clk_src_regs(2, C), + clk_src_regs(3, D), + clk_src_regs(4, E), + clk_src_regs(5, F) +}; + +static const struct dce110_clk_src_shift cs_shift = { + CS_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT) +}; + +static const struct dce110_clk_src_mask cs_mask = { + CS_COMMON_MASK_SH_LIST_DCN2_0(_MASK) +}; + +static const struct dce_dmcu_registers dmcu_regs = { + DMCU_DCN10_REG_LIST() +}; + +static const struct dce_dmcu_shift dmcu_shift = { + DMCU_MASK_SH_LIST_DCN10(__SHIFT) +}; + +static const struct dce_dmcu_mask dmcu_mask = { + DMCU_MASK_SH_LIST_DCN10(_MASK) +}; + +static const struct dce_abm_registers abm_regs = { + ABM_DCN20_REG_LIST() +}; + +static const struct dce_abm_shift abm_shift = { + ABM_MASK_SH_LIST_DCN20(__SHIFT) +}; + +static const struct dce_abm_mask abm_mask = { + ABM_MASK_SH_LIST_DCN20(_MASK) +}; + +#define audio_regs(id)\ +[id] = {\ + AUD_COMMON_REG_LIST(id)\ +} + +static const struct dce_audio_registers audio_regs[] = { + audio_regs(0), + audio_regs(1), + audio_regs(2), + audio_regs(3), + audio_regs(4), + audio_regs(5), + audio_regs(6), +}; + +#define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\ + SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_INDEX, AZALIA_ENDPOINT_REG_INDEX, mask_sh),\ + SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_DATA, AZALIA_ENDPOINT_REG_DATA, mask_sh),\ + AUD_COMMON_MASK_SH_LIST_BASE(mask_sh) + +static const struct dce_audio_shift audio_shift = { + DCE120_AUD_COMMON_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_audio_mask audio_mask = { + DCE120_AUD_COMMON_MASK_SH_LIST(_MASK) +}; + +#define stream_enc_regs(id)\ +[id] = {\ + SE_DCN2_REG_LIST(id)\ +} + +static const struct dcn10_stream_enc_registers stream_enc_regs[] = { + stream_enc_regs(0), + stream_enc_regs(1), + stream_enc_regs(2), + stream_enc_regs(3), + stream_enc_regs(4), + stream_enc_regs(5), +}; + +static const struct dcn10_stream_encoder_shift se_shift = { + SE_COMMON_MASK_SH_LIST_DCN20(__SHIFT) +}; + +static const struct dcn10_stream_encoder_mask se_mask = { + SE_COMMON_MASK_SH_LIST_DCN20(_MASK) +}; + + +#define aux_regs(id)\ +[id] = {\ + DCN2_AUX_REG_LIST(id)\ +} + +static const struct dcn10_link_enc_aux_registers link_enc_aux_regs[] = { + aux_regs(0), + aux_regs(1), + aux_regs(2), + aux_regs(3), + aux_regs(4), + aux_regs(5) +}; + +#define hpd_regs(id)\ +[id] = {\ + HPD_REG_LIST(id)\ +} + +static const struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[] = { + hpd_regs(0), + hpd_regs(1), + hpd_regs(2), + hpd_regs(3), + hpd_regs(4), + hpd_regs(5) +}; + +#define link_regs(id, phyid)\ +[id] = {\ + LE_DCN10_REG_LIST(id), \ + UNIPHY_DCN2_REG_LIST(phyid), \ + DPCS_DCN2_REG_LIST(id), \ + SRI(DP_DPHY_INTERNAL_CTRL, DP, id) \ +} + +static const struct dcn10_link_enc_registers link_enc_regs[] = { + link_regs(0, A), + link_regs(1, B), + link_regs(2, C), + link_regs(3, D), + link_regs(4, E), + link_regs(5, F) +}; + +static const struct dcn10_link_enc_shift le_shift = { + LINK_ENCODER_MASK_SH_LIST_DCN20(__SHIFT),\ + DPCS_DCN2_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn10_link_enc_mask le_mask = { + LINK_ENCODER_MASK_SH_LIST_DCN20(_MASK),\ + DPCS_DCN2_MASK_SH_LIST(_MASK) +}; + +static const struct dce_panel_cntl_registers panel_cntl_regs[] = { + { DCN_PANEL_CNTL_REG_LIST() } +}; + +static const struct dce_panel_cntl_shift panel_cntl_shift = { + DCE_PANEL_CNTL_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_panel_cntl_mask panel_cntl_mask = { + DCE_PANEL_CNTL_MASK_SH_LIST(_MASK) +}; + +#define ipp_regs(id)\ +[id] = {\ + IPP_REG_LIST_DCN20(id),\ +} + +static const struct dcn10_ipp_registers ipp_regs[] = { + ipp_regs(0), + ipp_regs(1), + ipp_regs(2), + ipp_regs(3), + ipp_regs(4), + ipp_regs(5), +}; + +static const struct dcn10_ipp_shift ipp_shift = { + IPP_MASK_SH_LIST_DCN20(__SHIFT) +}; + +static const struct dcn10_ipp_mask ipp_mask = { + IPP_MASK_SH_LIST_DCN20(_MASK), +}; + +#define opp_regs(id)\ +[id] = {\ + OPP_REG_LIST_DCN20(id),\ +} + +static const struct dcn20_opp_registers opp_regs[] = { + opp_regs(0), + opp_regs(1), + opp_regs(2), + opp_regs(3), + opp_regs(4), + opp_regs(5), +}; + +static const struct dcn20_opp_shift opp_shift = { + OPP_MASK_SH_LIST_DCN20(__SHIFT) +}; + +static const struct dcn20_opp_mask opp_mask = { + OPP_MASK_SH_LIST_DCN20(_MASK) +}; + +#define aux_engine_regs(id)\ +[id] = {\ + AUX_COMMON_REG_LIST0(id), \ + .AUXN_IMPCAL = 0, \ + .AUXP_IMPCAL = 0, \ + .AUX_RESET_MASK = DP_AUX0_AUX_CONTROL__AUX_RESET_MASK, \ +} + +static const struct dce110_aux_registers aux_engine_regs[] = { + aux_engine_regs(0), + aux_engine_regs(1), + aux_engine_regs(2), + aux_engine_regs(3), + aux_engine_regs(4), + aux_engine_regs(5) +}; + +#define tf_regs(id)\ +[id] = {\ + TF_REG_LIST_DCN20(id),\ + TF_REG_LIST_DCN20_COMMON_APPEND(id),\ +} + +static const struct dcn2_dpp_registers tf_regs[] = { + tf_regs(0), + tf_regs(1), + tf_regs(2), + tf_regs(3), + tf_regs(4), + tf_regs(5), +}; + +static const struct dcn2_dpp_shift tf_shift = { + TF_REG_LIST_SH_MASK_DCN20(__SHIFT), + TF_DEBUG_REG_LIST_SH_DCN20 +}; + +static const struct dcn2_dpp_mask tf_mask = { + TF_REG_LIST_SH_MASK_DCN20(_MASK), + TF_DEBUG_REG_LIST_MASK_DCN20 +}; + +#define dwbc_regs_dcn2(id)\ +[id] = {\ + DWBC_COMMON_REG_LIST_DCN2_0(id),\ + } + +static const struct dcn20_dwbc_registers dwbc20_regs[] = { + dwbc_regs_dcn2(0), +}; + +static const struct dcn20_dwbc_shift dwbc20_shift = { + DWBC_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT) +}; + +static const struct dcn20_dwbc_mask dwbc20_mask = { + DWBC_COMMON_MASK_SH_LIST_DCN2_0(_MASK) +}; + +#define mcif_wb_regs_dcn2(id)\ +[id] = {\ + MCIF_WB_COMMON_REG_LIST_DCN2_0(id),\ + } + +static const struct dcn20_mmhubbub_registers mcif_wb20_regs[] = { + mcif_wb_regs_dcn2(0), +}; + +static const struct dcn20_mmhubbub_shift mcif_wb20_shift = { + MCIF_WB_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT) +}; + +static const struct dcn20_mmhubbub_mask mcif_wb20_mask = { + MCIF_WB_COMMON_MASK_SH_LIST_DCN2_0(_MASK) +}; + +static const struct dcn20_mpc_registers mpc_regs = { + MPC_REG_LIST_DCN2_0(0), + MPC_REG_LIST_DCN2_0(1), + MPC_REG_LIST_DCN2_0(2), + MPC_REG_LIST_DCN2_0(3), + MPC_REG_LIST_DCN2_0(4), + MPC_REG_LIST_DCN2_0(5), + MPC_OUT_MUX_REG_LIST_DCN2_0(0), + MPC_OUT_MUX_REG_LIST_DCN2_0(1), + MPC_OUT_MUX_REG_LIST_DCN2_0(2), + MPC_OUT_MUX_REG_LIST_DCN2_0(3), + MPC_OUT_MUX_REG_LIST_DCN2_0(4), + MPC_OUT_MUX_REG_LIST_DCN2_0(5), + MPC_DBG_REG_LIST_DCN2_0() +}; + +static const struct dcn20_mpc_shift mpc_shift = { + MPC_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT), + MPC_DEBUG_REG_LIST_SH_DCN20 +}; + +static const struct dcn20_mpc_mask mpc_mask = { + MPC_COMMON_MASK_SH_LIST_DCN2_0(_MASK), + MPC_DEBUG_REG_LIST_MASK_DCN20 +}; + +#define tg_regs(id)\ +[id] = {TG_COMMON_REG_LIST_DCN2_0(id)} + + +static const struct dcn_optc_registers tg_regs[] = { + tg_regs(0), + tg_regs(1), + tg_regs(2), + tg_regs(3), + tg_regs(4), + tg_regs(5) +}; + +static const struct dcn_optc_shift tg_shift = { + TG_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT) +}; + +static const struct dcn_optc_mask tg_mask = { + TG_COMMON_MASK_SH_LIST_DCN2_0(_MASK) +}; + +#define hubp_regs(id)\ +[id] = {\ + HUBP_REG_LIST_DCN20(id)\ +} + +static const struct dcn_hubp2_registers hubp_regs[] = { + hubp_regs(0), + hubp_regs(1), + hubp_regs(2), + hubp_regs(3), + hubp_regs(4), + hubp_regs(5) +}; + +static const struct dcn_hubp2_shift hubp_shift = { + HUBP_MASK_SH_LIST_DCN20(__SHIFT) +}; + +static const struct dcn_hubp2_mask hubp_mask = { + HUBP_MASK_SH_LIST_DCN20(_MASK) +}; + +static const struct dcn_hubbub_registers hubbub_reg = { + HUBBUB_REG_LIST_DCN20(0) +}; + +static const struct dcn_hubbub_shift hubbub_shift = { + HUBBUB_MASK_SH_LIST_DCN20(__SHIFT) +}; + +static const struct dcn_hubbub_mask hubbub_mask = { + HUBBUB_MASK_SH_LIST_DCN20(_MASK) +}; + +#define vmid_regs(id)\ +[id] = {\ + DCN20_VMID_REG_LIST(id)\ +} + +static const struct dcn_vmid_registers vmid_regs[] = { + vmid_regs(0), + vmid_regs(1), + vmid_regs(2), + vmid_regs(3), + vmid_regs(4), + vmid_regs(5), + vmid_regs(6), + vmid_regs(7), + vmid_regs(8), + vmid_regs(9), + vmid_regs(10), + vmid_regs(11), + vmid_regs(12), + vmid_regs(13), + vmid_regs(14), + vmid_regs(15) +}; + +static const struct dcn20_vmid_shift vmid_shifts = { + DCN20_VMID_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn20_vmid_mask vmid_masks = { + DCN20_VMID_MASK_SH_LIST(_MASK) +}; + +static const struct dce110_aux_registers_shift aux_shift = { + DCN_AUX_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce110_aux_registers_mask aux_mask = { + DCN_AUX_MASK_SH_LIST(_MASK) +}; + +static int map_transmitter_id_to_phy_instance( + enum transmitter transmitter) +{ + switch (transmitter) { + case TRANSMITTER_UNIPHY_A: + return 0; + break; + case TRANSMITTER_UNIPHY_B: + return 1; + break; + case TRANSMITTER_UNIPHY_C: + return 2; + break; + case TRANSMITTER_UNIPHY_D: + return 3; + break; + case TRANSMITTER_UNIPHY_E: + return 4; + break; + case TRANSMITTER_UNIPHY_F: + return 5; + break; + default: + ASSERT(0); + return 0; + } +} + +#define dsc_regsDCN20(id)\ +[id] = {\ + DSC_REG_LIST_DCN20(id)\ +} + +static const struct dcn20_dsc_registers dsc_regs[] = { + dsc_regsDCN20(0), + dsc_regsDCN20(1), + dsc_regsDCN20(2), + dsc_regsDCN20(3), + dsc_regsDCN20(4), + dsc_regsDCN20(5) +}; + +static const struct dcn20_dsc_shift dsc_shift = { + DSC_REG_LIST_SH_MASK_DCN20(__SHIFT) +}; + +static const struct dcn20_dsc_mask dsc_mask = { + DSC_REG_LIST_SH_MASK_DCN20(_MASK) +}; + +static const struct dccg_registers dccg_regs = { + DCCG_REG_LIST_DCN2() +}; + +static const struct dccg_shift dccg_shift = { + DCCG_MASK_SH_LIST_DCN2(__SHIFT) +}; + +static const struct dccg_mask dccg_mask = { + DCCG_MASK_SH_LIST_DCN2(_MASK) +}; + +static const struct resource_caps res_cap_nv10 = { + .num_timing_generator = 6, + .num_opp = 6, + .num_video_plane = 6, + .num_audio = 7, + .num_stream_encoder = 6, + .num_pll = 6, + .num_dwb = 1, + .num_ddc = 6, + .num_vmid = 16, + .num_dsc = 6, +}; + +static const struct dc_plane_cap plane_cap = { + .type = DC_PLANE_TYPE_DCN_UNIVERSAL, + .per_pixel_alpha = true, + + .pixel_format_support = { + .argb8888 = true, + .nv12 = true, + .fp16 = true, + .p010 = true + }, + + .max_upscale_factor = { + .argb8888 = 16000, + .nv12 = 16000, + .fp16 = 1 + }, + + .max_downscale_factor = { + .argb8888 = 250, + .nv12 = 250, + .fp16 = 1 + }, + 16, + 16 +}; +static const struct resource_caps res_cap_nv14 = { + .num_timing_generator = 5, + .num_opp = 5, + .num_video_plane = 5, + .num_audio = 6, + .num_stream_encoder = 5, + .num_pll = 5, + .num_dwb = 1, + .num_ddc = 5, + .num_vmid = 16, + .num_dsc = 5, +}; + +static const struct dc_debug_options debug_defaults_drv = { + .disable_dmcu = false, + .force_abm_enable = false, + .timing_trace = false, + .clock_trace = true, + .disable_pplib_clock_request = true, + .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, + .force_single_disp_pipe_split = false, + .disable_dcc = DCC_ENABLE, + .vsr_support = true, + .performance_trace = false, + .max_downscale_src_width = 5120,/*upto 5K*/ + .disable_pplib_wm_range = false, + .scl_reset_length10 = true, + .sanity_checks = false, + .underflow_assert_delay_us = 0xFFFFFFFF, + .enable_legacy_fast_update = true, + .using_dml2 = false, +}; + +void dcn20_dpp_destroy(struct dpp **dpp) +{ + kfree(TO_DCN20_DPP(*dpp)); + *dpp = NULL; +} + +struct dpp *dcn20_dpp_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn20_dpp *dpp = + kzalloc(sizeof(struct dcn20_dpp), GFP_ATOMIC); + + if (!dpp) + return NULL; + + if (dpp2_construct(dpp, ctx, inst, + &tf_regs[inst], &tf_shift, &tf_mask)) + return &dpp->base; + + BREAK_TO_DEBUGGER(); + kfree(dpp); + return NULL; +} + +struct input_pixel_processor *dcn20_ipp_create( + struct dc_context *ctx, uint32_t inst) +{ + struct dcn10_ipp *ipp = + kzalloc(sizeof(struct dcn10_ipp), GFP_ATOMIC); + + if (!ipp) { + BREAK_TO_DEBUGGER(); + return NULL; + } + + dcn20_ipp_construct(ipp, ctx, inst, + &ipp_regs[inst], &ipp_shift, &ipp_mask); + return &ipp->base; +} + + +struct output_pixel_processor *dcn20_opp_create( + struct dc_context *ctx, uint32_t inst) +{ + struct dcn20_opp *opp = + kzalloc(sizeof(struct dcn20_opp), GFP_ATOMIC); + + if (!opp) { + BREAK_TO_DEBUGGER(); + return NULL; + } + + dcn20_opp_construct(opp, ctx, inst, + &opp_regs[inst], &opp_shift, &opp_mask); + return &opp->base; +} + +struct dce_aux *dcn20_aux_engine_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct aux_engine_dce110 *aux_engine = + kzalloc(sizeof(struct aux_engine_dce110), GFP_ATOMIC); + + if (!aux_engine) + return NULL; + + dce110_aux_engine_construct(aux_engine, ctx, inst, + SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, + &aux_engine_regs[inst], + &aux_mask, + &aux_shift, + ctx->dc->caps.extended_aux_timeout_support); + + return &aux_engine->base; +} +#define i2c_inst_regs(id) { I2C_HW_ENGINE_COMMON_REG_LIST(id) } + +static const struct dce_i2c_registers i2c_hw_regs[] = { + i2c_inst_regs(1), + i2c_inst_regs(2), + i2c_inst_regs(3), + i2c_inst_regs(4), + i2c_inst_regs(5), + i2c_inst_regs(6), +}; + +static const struct dce_i2c_shift i2c_shifts = { + I2C_COMMON_MASK_SH_LIST_DCN2(__SHIFT) +}; + +static const struct dce_i2c_mask i2c_masks = { + I2C_COMMON_MASK_SH_LIST_DCN2(_MASK) +}; + +struct dce_i2c_hw *dcn20_i2c_hw_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dce_i2c_hw *dce_i2c_hw = + kzalloc(sizeof(struct dce_i2c_hw), GFP_ATOMIC); + + if (!dce_i2c_hw) + return NULL; + + dcn2_i2c_hw_construct(dce_i2c_hw, ctx, inst, + &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks); + + return dce_i2c_hw; +} +struct mpc *dcn20_mpc_create(struct dc_context *ctx) +{ + struct dcn20_mpc *mpc20 = kzalloc(sizeof(struct dcn20_mpc), + GFP_ATOMIC); + + if (!mpc20) + return NULL; + + dcn20_mpc_construct(mpc20, ctx, + &mpc_regs, + &mpc_shift, + &mpc_mask, + 6); + + return &mpc20->base; +} + +struct hubbub *dcn20_hubbub_create(struct dc_context *ctx) +{ + int i; + struct dcn20_hubbub *hubbub = kzalloc(sizeof(struct dcn20_hubbub), + GFP_ATOMIC); + + if (!hubbub) + return NULL; + + hubbub2_construct(hubbub, ctx, + &hubbub_reg, + &hubbub_shift, + &hubbub_mask); + + for (i = 0; i < res_cap_nv10.num_vmid; i++) { + struct dcn20_vmid *vmid = &hubbub->vmid[i]; + + vmid->ctx = ctx; + + vmid->regs = &vmid_regs[i]; + vmid->shifts = &vmid_shifts; + vmid->masks = &vmid_masks; + } + + return &hubbub->base; +} + +struct timing_generator *dcn20_timing_generator_create( + struct dc_context *ctx, + uint32_t instance) +{ + struct optc *tgn10 = + kzalloc(sizeof(struct optc), GFP_ATOMIC); + + if (!tgn10) + return NULL; + + tgn10->base.inst = instance; + tgn10->base.ctx = ctx; + + tgn10->tg_regs = &tg_regs[instance]; + tgn10->tg_shift = &tg_shift; + tgn10->tg_mask = &tg_mask; + + dcn20_timing_generator_init(tgn10); + + return &tgn10->base; +} + +static const struct encoder_feature_support link_enc_feature = { + .max_hdmi_deep_color = COLOR_DEPTH_121212, + .max_hdmi_pixel_clock = 600000, + .hdmi_ycbcr420_supported = true, + .dp_ycbcr420_supported = true, + .fec_supported = true, + .flags.bits.IS_HBR2_CAPABLE = true, + .flags.bits.IS_HBR3_CAPABLE = true, + .flags.bits.IS_TPS3_CAPABLE = true, + .flags.bits.IS_TPS4_CAPABLE = true +}; + +struct link_encoder *dcn20_link_encoder_create( + struct dc_context *ctx, + const struct encoder_init_data *enc_init_data) +{ + struct dcn20_link_encoder *enc20 = + kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL); + int link_regs_id; + + if (!enc20) + return NULL; + + link_regs_id = + map_transmitter_id_to_phy_instance(enc_init_data->transmitter); + + dcn20_link_encoder_construct(enc20, + enc_init_data, + &link_enc_feature, + &link_enc_regs[link_regs_id], + &link_enc_aux_regs[enc_init_data->channel - 1], + &link_enc_hpd_regs[enc_init_data->hpd_source], + &le_shift, + &le_mask); + + return &enc20->enc10.base; +} + +static struct panel_cntl *dcn20_panel_cntl_create(const struct panel_cntl_init_data *init_data) +{ + struct dce_panel_cntl *panel_cntl = + kzalloc(sizeof(struct dce_panel_cntl), GFP_KERNEL); + + if (!panel_cntl) + return NULL; + + dce_panel_cntl_construct(panel_cntl, + init_data, + &panel_cntl_regs[init_data->inst], + &panel_cntl_shift, + &panel_cntl_mask); + + return &panel_cntl->base; +} + +static struct clock_source *dcn20_clock_source_create( + struct dc_context *ctx, + struct dc_bios *bios, + enum clock_source_id id, + const struct dce110_clk_src_regs *regs, + bool dp_clk_src) +{ + struct dce110_clk_src *clk_src = + kzalloc(sizeof(struct dce110_clk_src), GFP_ATOMIC); + + if (!clk_src) + return NULL; + + if (dcn20_clk_src_construct(clk_src, ctx, bios, id, + regs, &cs_shift, &cs_mask)) { + clk_src->base.dp_clk_src = dp_clk_src; + return &clk_src->base; + } + + kfree(clk_src); + BREAK_TO_DEBUGGER(); + return NULL; +} + +static void read_dce_straps( + struct dc_context *ctx, + struct resource_straps *straps) +{ + generic_reg_get(ctx, mmDC_PINSTRAPS + BASE(mmDC_PINSTRAPS_BASE_IDX), + FN(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO), &straps->dc_pinstraps_audio); +} + +static struct audio *dcn20_create_audio( + struct dc_context *ctx, unsigned int inst) +{ + return dce_audio_create(ctx, inst, + &audio_regs[inst], &audio_shift, &audio_mask); +} + +struct stream_encoder *dcn20_stream_encoder_create( + enum engine_id eng_id, + struct dc_context *ctx) +{ + struct dcn10_stream_encoder *enc1 = + kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL); + + if (!enc1) + return NULL; + + if (ASICREV_IS_NAVI14_M(ctx->asic_id.hw_internal_rev)) { + if (eng_id >= ENGINE_ID_DIGD) + eng_id++; + } + + dcn20_stream_encoder_construct(enc1, ctx, ctx->dc_bios, eng_id, + &stream_enc_regs[eng_id], + &se_shift, &se_mask); + + return &enc1->base; +} + +static const struct dce_hwseq_registers hwseq_reg = { + HWSEQ_DCN2_REG_LIST() +}; + +static const struct dce_hwseq_shift hwseq_shift = { + HWSEQ_DCN2_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_hwseq_mask hwseq_mask = { + HWSEQ_DCN2_MASK_SH_LIST(_MASK) +}; + +struct dce_hwseq *dcn20_hwseq_create( + struct dc_context *ctx) +{ + struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL); + + if (hws) { + hws->ctx = ctx; + hws->regs = &hwseq_reg; + hws->shifts = &hwseq_shift; + hws->masks = &hwseq_mask; + } + return hws; +} + +static const struct resource_create_funcs res_create_funcs = { + .read_dce_straps = read_dce_straps, + .create_audio = dcn20_create_audio, + .create_stream_encoder = dcn20_stream_encoder_create, + .create_hwseq = dcn20_hwseq_create, +}; + +static void dcn20_pp_smu_destroy(struct pp_smu_funcs **pp_smu); + +void dcn20_clock_source_destroy(struct clock_source **clk_src) +{ + kfree(TO_DCE110_CLK_SRC(*clk_src)); + *clk_src = NULL; +} + + +struct display_stream_compressor *dcn20_dsc_create( + struct dc_context *ctx, uint32_t inst) +{ + struct dcn20_dsc *dsc = + kzalloc(sizeof(struct dcn20_dsc), GFP_ATOMIC); + + if (!dsc) { + BREAK_TO_DEBUGGER(); + return NULL; + } + + dsc2_construct(dsc, ctx, inst, &dsc_regs[inst], &dsc_shift, &dsc_mask); + return &dsc->base; +} + +void dcn20_dsc_destroy(struct display_stream_compressor **dsc) +{ + kfree(container_of(*dsc, struct dcn20_dsc, base)); + *dsc = NULL; +} + + +static void dcn20_resource_destruct(struct dcn20_resource_pool *pool) +{ + unsigned int i; + + for (i = 0; i < pool->base.stream_enc_count; i++) { + if (pool->base.stream_enc[i] != NULL) { + kfree(DCN10STRENC_FROM_STRENC(pool->base.stream_enc[i])); + pool->base.stream_enc[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_dsc; i++) { + if (pool->base.dscs[i] != NULL) + dcn20_dsc_destroy(&pool->base.dscs[i]); + } + + if (pool->base.mpc != NULL) { + kfree(TO_DCN20_MPC(pool->base.mpc)); + pool->base.mpc = NULL; + } + if (pool->base.hubbub != NULL) { + kfree(pool->base.hubbub); + pool->base.hubbub = NULL; + } + for (i = 0; i < pool->base.pipe_count; i++) { + if (pool->base.dpps[i] != NULL) + dcn20_dpp_destroy(&pool->base.dpps[i]); + + if (pool->base.ipps[i] != NULL) + pool->base.ipps[i]->funcs->ipp_destroy(&pool->base.ipps[i]); + + if (pool->base.hubps[i] != NULL) { + kfree(TO_DCN20_HUBP(pool->base.hubps[i])); + pool->base.hubps[i] = NULL; + } + + if (pool->base.irqs != NULL) { + dal_irq_service_destroy(&pool->base.irqs); + } + } + + for (i = 0; i < pool->base.res_cap->num_ddc; i++) { + if (pool->base.engines[i] != NULL) + dce110_engine_destroy(&pool->base.engines[i]); + if (pool->base.hw_i2cs[i] != NULL) { + kfree(pool->base.hw_i2cs[i]); + pool->base.hw_i2cs[i] = NULL; + } + if (pool->base.sw_i2cs[i] != NULL) { + kfree(pool->base.sw_i2cs[i]); + pool->base.sw_i2cs[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_opp; i++) { + if (pool->base.opps[i] != NULL) + pool->base.opps[i]->funcs->opp_destroy(&pool->base.opps[i]); + } + + for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { + if (pool->base.timing_generators[i] != NULL) { + kfree(DCN10TG_FROM_TG(pool->base.timing_generators[i])); + pool->base.timing_generators[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_dwb; i++) { + if (pool->base.dwbc[i] != NULL) { + kfree(TO_DCN20_DWBC(pool->base.dwbc[i])); + pool->base.dwbc[i] = NULL; + } + if (pool->base.mcif_wb[i] != NULL) { + kfree(TO_DCN20_MMHUBBUB(pool->base.mcif_wb[i])); + pool->base.mcif_wb[i] = NULL; + } + } + + for (i = 0; i < pool->base.audio_count; i++) { + if (pool->base.audios[i]) + dce_aud_destroy(&pool->base.audios[i]); + } + + for (i = 0; i < pool->base.clk_src_count; i++) { + if (pool->base.clock_sources[i] != NULL) { + dcn20_clock_source_destroy(&pool->base.clock_sources[i]); + pool->base.clock_sources[i] = NULL; + } + } + + if (pool->base.dp_clock_source != NULL) { + dcn20_clock_source_destroy(&pool->base.dp_clock_source); + pool->base.dp_clock_source = NULL; + } + + + if (pool->base.abm != NULL) + dce_abm_destroy(&pool->base.abm); + + if (pool->base.dmcu != NULL) + dce_dmcu_destroy(&pool->base.dmcu); + + if (pool->base.dccg != NULL) + dcn_dccg_destroy(&pool->base.dccg); + + if (pool->base.pp_smu != NULL) + dcn20_pp_smu_destroy(&pool->base.pp_smu); + + if (pool->base.oem_device != NULL) { + struct dc *dc = pool->base.oem_device->ctx->dc; + + dc->link_srv->destroy_ddc_service(&pool->base.oem_device); + } +} + +struct hubp *dcn20_hubp_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn20_hubp *hubp2 = + kzalloc(sizeof(struct dcn20_hubp), GFP_ATOMIC); + + if (!hubp2) + return NULL; + + if (hubp2_construct(hubp2, ctx, inst, + &hubp_regs[inst], &hubp_shift, &hubp_mask)) + return &hubp2->base; + + BREAK_TO_DEBUGGER(); + kfree(hubp2); + return NULL; +} + +static void get_pixel_clock_parameters( + struct pipe_ctx *pipe_ctx, + struct pixel_clk_params *pixel_clk_params) +{ + const struct dc_stream_state *stream = pipe_ctx->stream; + struct pipe_ctx *odm_pipe; + int opp_cnt = 1; + struct dc_link *link = stream->link; + struct link_encoder *link_enc = NULL; + struct dc *dc = pipe_ctx->stream->ctx->dc; + struct dce_hwseq *hws = dc->hwseq; + + for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) + opp_cnt++; + + pixel_clk_params->requested_pix_clk_100hz = stream->timing.pix_clk_100hz; + + link_enc = link_enc_cfg_get_link_enc(link); + if (link_enc) + pixel_clk_params->encoder_object_id = link_enc->id; + + pixel_clk_params->signal_type = pipe_ctx->stream->signal; + pixel_clk_params->controller_id = pipe_ctx->stream_res.tg->inst + 1; + /* TODO: un-hardcode*/ + /* TODO - DP2.0 HW: calculate requested_sym_clk for UHBR rates */ + pixel_clk_params->requested_sym_clk = LINK_RATE_LOW * + LINK_RATE_REF_FREQ_IN_KHZ; + pixel_clk_params->flags.ENABLE_SS = 0; + pixel_clk_params->color_depth = + stream->timing.display_color_depth; + pixel_clk_params->flags.DISPLAY_BLANKED = 1; + pixel_clk_params->pixel_encoding = stream->timing.pixel_encoding; + + if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR422) + pixel_clk_params->color_depth = COLOR_DEPTH_888; + + if (opp_cnt == 4) + pixel_clk_params->requested_pix_clk_100hz /= 4; + else if (optc2_is_two_pixels_per_containter(&stream->timing) || opp_cnt == 2) + pixel_clk_params->requested_pix_clk_100hz /= 2; + else if (hws->funcs.is_dp_dig_pixel_rate_div_policy) { + if (hws->funcs.is_dp_dig_pixel_rate_div_policy(pipe_ctx)) + pixel_clk_params->requested_pix_clk_100hz /= 2; + } + + if (stream->timing.timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING) + pixel_clk_params->requested_pix_clk_100hz *= 2; + +} + +static void build_clamping_params(struct dc_stream_state *stream) +{ + stream->clamping.clamping_level = CLAMPING_FULL_RANGE; + stream->clamping.c_depth = stream->timing.display_color_depth; + stream->clamping.pixel_encoding = stream->timing.pixel_encoding; +} + +static enum dc_status build_pipe_hw_param(struct pipe_ctx *pipe_ctx) +{ + + get_pixel_clock_parameters(pipe_ctx, &pipe_ctx->stream_res.pix_clk_params); + + pipe_ctx->clock_source->funcs->get_pix_clk_dividers( + pipe_ctx->clock_source, + &pipe_ctx->stream_res.pix_clk_params, + &pipe_ctx->pll_settings); + + pipe_ctx->stream->clamping.pixel_encoding = pipe_ctx->stream->timing.pixel_encoding; + + resource_build_bit_depth_reduction_params(pipe_ctx->stream, + &pipe_ctx->stream->bit_depth_params); + build_clamping_params(pipe_ctx->stream); + + return DC_OK; +} + +enum dc_status dcn20_build_mapped_resource(const struct dc *dc, struct dc_state *context, struct dc_stream_state *stream) +{ + enum dc_status status = DC_OK; + struct pipe_ctx *pipe_ctx = resource_get_otg_master_for_stream(&context->res_ctx, stream); + + if (!pipe_ctx) + return DC_ERROR_UNEXPECTED; + + + status = build_pipe_hw_param(pipe_ctx); + + return status; +} + + +void dcn20_acquire_dsc(const struct dc *dc, + struct resource_context *res_ctx, + struct display_stream_compressor **dsc, + int pipe_idx) +{ + int i; + const struct resource_pool *pool = dc->res_pool; + struct display_stream_compressor *dsc_old = dc->current_state->res_ctx.pipe_ctx[pipe_idx].stream_res.dsc; + + ASSERT(*dsc == NULL); /* If this ASSERT fails, dsc was not released properly */ + *dsc = NULL; + + /* Always do 1-to-1 mapping when number of DSCs is same as number of pipes */ + if (pool->res_cap->num_dsc == pool->res_cap->num_opp) { + *dsc = pool->dscs[pipe_idx]; + res_ctx->is_dsc_acquired[pipe_idx] = true; + return; + } + + /* Return old DSC to avoid the need for re-programming */ + if (dsc_old && !res_ctx->is_dsc_acquired[dsc_old->inst]) { + *dsc = dsc_old; + res_ctx->is_dsc_acquired[dsc_old->inst] = true; + return ; + } + + /* Find first free DSC */ + for (i = 0; i < pool->res_cap->num_dsc; i++) + if (!res_ctx->is_dsc_acquired[i]) { + *dsc = pool->dscs[i]; + res_ctx->is_dsc_acquired[i] = true; + break; + } +} + +void dcn20_release_dsc(struct resource_context *res_ctx, + const struct resource_pool *pool, + struct display_stream_compressor **dsc) +{ + int i; + + for (i = 0; i < pool->res_cap->num_dsc; i++) + if (pool->dscs[i] == *dsc) { + res_ctx->is_dsc_acquired[i] = false; + *dsc = NULL; + break; + } +} + + + +enum dc_status dcn20_add_dsc_to_stream_resource(struct dc *dc, + struct dc_state *dc_ctx, + struct dc_stream_state *dc_stream) +{ + enum dc_status result = DC_OK; + int i; + + /* Get a DSC if required and available */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe_ctx = &dc_ctx->res_ctx.pipe_ctx[i]; + + if (pipe_ctx->top_pipe) + continue; + + if (pipe_ctx->stream != dc_stream) + continue; + + if (pipe_ctx->stream_res.dsc) + continue; + + dcn20_acquire_dsc(dc, &dc_ctx->res_ctx, &pipe_ctx->stream_res.dsc, i); + + /* The number of DSCs can be less than the number of pipes */ + if (!pipe_ctx->stream_res.dsc) { + result = DC_NO_DSC_RESOURCE; + } + + break; + } + + return result; +} + + +static enum dc_status remove_dsc_from_stream_resource(struct dc *dc, + struct dc_state *new_ctx, + struct dc_stream_state *dc_stream) +{ + struct pipe_ctx *pipe_ctx = NULL; + int i; + + for (i = 0; i < MAX_PIPES; i++) { + if (new_ctx->res_ctx.pipe_ctx[i].stream == dc_stream && !new_ctx->res_ctx.pipe_ctx[i].top_pipe) { + pipe_ctx = &new_ctx->res_ctx.pipe_ctx[i]; + + if (pipe_ctx->stream_res.dsc) + dcn20_release_dsc(&new_ctx->res_ctx, dc->res_pool, &pipe_ctx->stream_res.dsc); + } + } + + if (!pipe_ctx) + return DC_ERROR_UNEXPECTED; + else + return DC_OK; +} + + +enum dc_status dcn20_add_stream_to_ctx(struct dc *dc, struct dc_state *new_ctx, struct dc_stream_state *dc_stream) +{ + enum dc_status result = DC_ERROR_UNEXPECTED; + + result = resource_map_pool_resources(dc, new_ctx, dc_stream); + + if (result == DC_OK) + result = resource_map_phy_clock_resources(dc, new_ctx, dc_stream); + + /* Get a DSC if required and available */ + if (result == DC_OK && dc_stream->timing.flags.DSC) + result = dcn20_add_dsc_to_stream_resource(dc, new_ctx, dc_stream); + + if (result == DC_OK) + result = dcn20_build_mapped_resource(dc, new_ctx, dc_stream); + + return result; +} + + +enum dc_status dcn20_remove_stream_from_ctx(struct dc *dc, struct dc_state *new_ctx, struct dc_stream_state *dc_stream) +{ + enum dc_status result = DC_OK; + + result = remove_dsc_from_stream_resource(dc, new_ctx, dc_stream); + + return result; +} + +/** + * dcn20_split_stream_for_odm - Check if stream can be splited for ODM + * + * @dc: DC object with resource pool info required for pipe split + * @res_ctx: Persistent state of resources + * @prev_odm_pipe: Reference to the previous ODM pipe + * @next_odm_pipe: Reference to the next ODM pipe + * + * This function takes a logically active pipe and a logically free pipe and + * halves all the scaling parameters that need to be halved while populating + * the free pipe with the required resources and configuring the next/previous + * ODM pipe pointers. + * + * Return: + * Return true if split stream for ODM is possible, otherwise, return false. + */ +bool dcn20_split_stream_for_odm( + const struct dc *dc, + struct resource_context *res_ctx, + struct pipe_ctx *prev_odm_pipe, + struct pipe_ctx *next_odm_pipe) +{ + int pipe_idx = next_odm_pipe->pipe_idx; + const struct resource_pool *pool = dc->res_pool; + + *next_odm_pipe = *prev_odm_pipe; + + next_odm_pipe->pipe_idx = pipe_idx; + next_odm_pipe->plane_res.mi = pool->mis[next_odm_pipe->pipe_idx]; + next_odm_pipe->plane_res.hubp = pool->hubps[next_odm_pipe->pipe_idx]; + next_odm_pipe->plane_res.ipp = pool->ipps[next_odm_pipe->pipe_idx]; + next_odm_pipe->plane_res.xfm = pool->transforms[next_odm_pipe->pipe_idx]; + next_odm_pipe->plane_res.dpp = pool->dpps[next_odm_pipe->pipe_idx]; + next_odm_pipe->plane_res.mpcc_inst = pool->dpps[next_odm_pipe->pipe_idx]->inst; + next_odm_pipe->stream_res.dsc = NULL; + if (prev_odm_pipe->next_odm_pipe && prev_odm_pipe->next_odm_pipe != next_odm_pipe) { + next_odm_pipe->next_odm_pipe = prev_odm_pipe->next_odm_pipe; + next_odm_pipe->next_odm_pipe->prev_odm_pipe = next_odm_pipe; + } + if (prev_odm_pipe->top_pipe && prev_odm_pipe->top_pipe->next_odm_pipe) { + prev_odm_pipe->top_pipe->next_odm_pipe->bottom_pipe = next_odm_pipe; + next_odm_pipe->top_pipe = prev_odm_pipe->top_pipe->next_odm_pipe; + } + if (prev_odm_pipe->bottom_pipe && prev_odm_pipe->bottom_pipe->next_odm_pipe) { + prev_odm_pipe->bottom_pipe->next_odm_pipe->top_pipe = next_odm_pipe; + next_odm_pipe->bottom_pipe = prev_odm_pipe->bottom_pipe->next_odm_pipe; + } + prev_odm_pipe->next_odm_pipe = next_odm_pipe; + next_odm_pipe->prev_odm_pipe = prev_odm_pipe; + + if (prev_odm_pipe->plane_state) { + struct scaler_data *sd = &prev_odm_pipe->plane_res.scl_data; + int new_width; + + /* HACTIVE halved for odm combine */ + sd->h_active /= 2; + /* Calculate new vp and recout for left pipe */ + /* Need at least 16 pixels width per side */ + if (sd->recout.x + 16 >= sd->h_active) + return false; + new_width = sd->h_active - sd->recout.x; + sd->viewport.width -= dc_fixpt_floor(dc_fixpt_mul_int( + sd->ratios.horz, sd->recout.width - new_width)); + sd->viewport_c.width -= dc_fixpt_floor(dc_fixpt_mul_int( + sd->ratios.horz_c, sd->recout.width - new_width)); + sd->recout.width = new_width; + + /* Calculate new vp and recout for right pipe */ + sd = &next_odm_pipe->plane_res.scl_data; + /* HACTIVE halved for odm combine */ + sd->h_active /= 2; + /* Need at least 16 pixels width per side */ + if (new_width <= 16) + return false; + new_width = sd->recout.width + sd->recout.x - sd->h_active; + sd->viewport.width -= dc_fixpt_floor(dc_fixpt_mul_int( + sd->ratios.horz, sd->recout.width - new_width)); + sd->viewport_c.width -= dc_fixpt_floor(dc_fixpt_mul_int( + sd->ratios.horz_c, sd->recout.width - new_width)); + sd->recout.width = new_width; + sd->viewport.x += dc_fixpt_floor(dc_fixpt_mul_int( + sd->ratios.horz, sd->h_active - sd->recout.x)); + sd->viewport_c.x += dc_fixpt_floor(dc_fixpt_mul_int( + sd->ratios.horz_c, sd->h_active - sd->recout.x)); + sd->recout.x = 0; + } + if (!next_odm_pipe->top_pipe) + next_odm_pipe->stream_res.opp = pool->opps[next_odm_pipe->pipe_idx]; + else + next_odm_pipe->stream_res.opp = next_odm_pipe->top_pipe->stream_res.opp; + if (next_odm_pipe->stream->timing.flags.DSC == 1 && !next_odm_pipe->top_pipe) { + dcn20_acquire_dsc(dc, res_ctx, &next_odm_pipe->stream_res.dsc, next_odm_pipe->pipe_idx); + ASSERT(next_odm_pipe->stream_res.dsc); + if (next_odm_pipe->stream_res.dsc == NULL) + return false; + } + + return true; +} + +void dcn20_split_stream_for_mpc( + struct resource_context *res_ctx, + const struct resource_pool *pool, + struct pipe_ctx *primary_pipe, + struct pipe_ctx *secondary_pipe) +{ + int pipe_idx = secondary_pipe->pipe_idx; + struct pipe_ctx *sec_bot_pipe = secondary_pipe->bottom_pipe; + + *secondary_pipe = *primary_pipe; + secondary_pipe->bottom_pipe = sec_bot_pipe; + + secondary_pipe->pipe_idx = pipe_idx; + secondary_pipe->plane_res.mi = pool->mis[secondary_pipe->pipe_idx]; + secondary_pipe->plane_res.hubp = pool->hubps[secondary_pipe->pipe_idx]; + secondary_pipe->plane_res.ipp = pool->ipps[secondary_pipe->pipe_idx]; + secondary_pipe->plane_res.xfm = pool->transforms[secondary_pipe->pipe_idx]; + secondary_pipe->plane_res.dpp = pool->dpps[secondary_pipe->pipe_idx]; + secondary_pipe->plane_res.mpcc_inst = pool->dpps[secondary_pipe->pipe_idx]->inst; + secondary_pipe->stream_res.dsc = NULL; + if (primary_pipe->bottom_pipe && primary_pipe->bottom_pipe != secondary_pipe) { + ASSERT(!secondary_pipe->bottom_pipe); + secondary_pipe->bottom_pipe = primary_pipe->bottom_pipe; + secondary_pipe->bottom_pipe->top_pipe = secondary_pipe; + } + primary_pipe->bottom_pipe = secondary_pipe; + secondary_pipe->top_pipe = primary_pipe; + + ASSERT(primary_pipe->plane_state); +} + +unsigned int dcn20_calc_max_scaled_time( + unsigned int time_per_pixel, + enum mmhubbub_wbif_mode mode, + unsigned int urgent_watermark) +{ + unsigned int time_per_byte = 0; + unsigned int total_y_free_entry = 0x200; /* two memory piece for luma */ + unsigned int total_c_free_entry = 0x140; /* two memory piece for chroma */ + unsigned int small_free_entry, max_free_entry; + unsigned int buf_lh_capability; + unsigned int max_scaled_time; + + if (mode == PACKED_444) /* packed mode */ + time_per_byte = time_per_pixel/4; + else if (mode == PLANAR_420_8BPC) + time_per_byte = time_per_pixel; + else if (mode == PLANAR_420_10BPC) /* p010 */ + time_per_byte = time_per_pixel * 819/1024; + + if (time_per_byte == 0) + time_per_byte = 1; + + small_free_entry = (total_y_free_entry > total_c_free_entry) ? total_c_free_entry : total_y_free_entry; + max_free_entry = (mode == PACKED_444) ? total_y_free_entry + total_c_free_entry : small_free_entry; + buf_lh_capability = max_free_entry*time_per_byte*32/16; /* there is 4bit fraction */ + max_scaled_time = buf_lh_capability - urgent_watermark; + return max_scaled_time; +} + +void dcn20_set_mcif_arb_params( + struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt) +{ + enum mmhubbub_wbif_mode wbif_mode; + struct mcif_arb_params *wb_arb_params; + int i, j, dwb_pipe; + + /* Writeback MCIF_WB arbitration parameters */ + dwb_pipe = 0; + for (i = 0; i < dc->res_pool->pipe_count; i++) { + + if (!context->res_ctx.pipe_ctx[i].stream) + continue; + + for (j = 0; j < MAX_DWB_PIPES; j++) { + if (context->res_ctx.pipe_ctx[i].stream->writeback_info[j].wb_enabled == false) + continue; + + //wb_arb_params = &context->res_ctx.pipe_ctx[i].stream->writeback_info[j].mcif_arb_params; + wb_arb_params = &context->bw_ctx.bw.dcn.bw_writeback.mcif_wb_arb[dwb_pipe]; + + if (context->res_ctx.pipe_ctx[i].stream->writeback_info[j].dwb_params.out_format == dwb_scaler_mode_yuv420) { + if (context->res_ctx.pipe_ctx[i].stream->writeback_info[j].dwb_params.output_depth == DWB_OUTPUT_PIXEL_DEPTH_8BPC) + wbif_mode = PLANAR_420_8BPC; + else + wbif_mode = PLANAR_420_10BPC; + } else + wbif_mode = PACKED_444; + + DC_FP_START(); + dcn20_fpu_set_wb_arb_params(wb_arb_params, context, pipes, pipe_cnt, i); + DC_FP_END(); + + wb_arb_params->slice_lines = 32; + wb_arb_params->arbitration_slice = 2; + wb_arb_params->max_scaled_time = dcn20_calc_max_scaled_time(wb_arb_params->time_per_pixel, + wbif_mode, + wb_arb_params->cli_watermark[0]); /* assume 4 watermark sets have the same value */ + + dwb_pipe++; + + if (dwb_pipe >= MAX_DWB_PIPES) + return; + } + if (dwb_pipe >= MAX_DWB_PIPES) + return; + } +} + +bool dcn20_validate_dsc(struct dc *dc, struct dc_state *new_ctx) +{ + int i; + + /* Validate DSC config, dsc count validation is already done */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe_ctx = &new_ctx->res_ctx.pipe_ctx[i]; + struct dc_stream_state *stream = pipe_ctx->stream; + struct dsc_config dsc_cfg; + struct pipe_ctx *odm_pipe; + int opp_cnt = 1; + + for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) + opp_cnt++; + + /* Only need to validate top pipe */ + if (pipe_ctx->top_pipe || pipe_ctx->prev_odm_pipe || !stream || !stream->timing.flags.DSC) + continue; + + dsc_cfg.pic_width = (stream->timing.h_addressable + stream->timing.h_border_left + + stream->timing.h_border_right) / opp_cnt; + dsc_cfg.pic_height = stream->timing.v_addressable + stream->timing.v_border_top + + stream->timing.v_border_bottom; + dsc_cfg.pixel_encoding = stream->timing.pixel_encoding; + dsc_cfg.color_depth = stream->timing.display_color_depth; + dsc_cfg.is_odm = pipe_ctx->next_odm_pipe ? true : false; + dsc_cfg.dc_dsc_cfg = stream->timing.dsc_cfg; + dsc_cfg.dc_dsc_cfg.num_slices_h /= opp_cnt; + + if (!pipe_ctx->stream_res.dsc->funcs->dsc_validate_stream(pipe_ctx->stream_res.dsc, &dsc_cfg)) + return false; + } + return true; +} + +struct pipe_ctx *dcn20_find_secondary_pipe(struct dc *dc, + struct resource_context *res_ctx, + const struct resource_pool *pool, + const struct pipe_ctx *primary_pipe) +{ + struct pipe_ctx *secondary_pipe = NULL; + + if (dc && primary_pipe) { + int j; + int preferred_pipe_idx = 0; + + /* first check the prev dc state: + * if this primary pipe has a bottom pipe in prev. state + * and if the bottom pipe is still available (which it should be), + * pick that pipe as secondary + * Same logic applies for ODM pipes + */ + if (dc->current_state->res_ctx.pipe_ctx[primary_pipe->pipe_idx].next_odm_pipe) { + preferred_pipe_idx = dc->current_state->res_ctx.pipe_ctx[primary_pipe->pipe_idx].next_odm_pipe->pipe_idx; + if (res_ctx->pipe_ctx[preferred_pipe_idx].stream == NULL) { + secondary_pipe = &res_ctx->pipe_ctx[preferred_pipe_idx]; + secondary_pipe->pipe_idx = preferred_pipe_idx; + } + } + if (secondary_pipe == NULL && + dc->current_state->res_ctx.pipe_ctx[primary_pipe->pipe_idx].bottom_pipe) { + preferred_pipe_idx = dc->current_state->res_ctx.pipe_ctx[primary_pipe->pipe_idx].bottom_pipe->pipe_idx; + if (res_ctx->pipe_ctx[preferred_pipe_idx].stream == NULL) { + secondary_pipe = &res_ctx->pipe_ctx[preferred_pipe_idx]; + secondary_pipe->pipe_idx = preferred_pipe_idx; + } + } + + /* + * if this primary pipe does not have a bottom pipe in prev. state + * start backward and find a pipe that did not used to be a bottom pipe in + * prev. dc state. This way we make sure we keep the same assignment as + * last state and will not have to reprogram every pipe + */ + if (secondary_pipe == NULL) { + for (j = dc->res_pool->pipe_count - 1; j >= 0; j--) { + if (dc->current_state->res_ctx.pipe_ctx[j].top_pipe == NULL + && dc->current_state->res_ctx.pipe_ctx[j].prev_odm_pipe == NULL) { + preferred_pipe_idx = j; + + if (res_ctx->pipe_ctx[preferred_pipe_idx].stream == NULL) { + secondary_pipe = &res_ctx->pipe_ctx[preferred_pipe_idx]; + secondary_pipe->pipe_idx = preferred_pipe_idx; + break; + } + } + } + } + /* + * We should never hit this assert unless assignments are shuffled around + * if this happens we will prob. hit a vsync tdr + */ + ASSERT(secondary_pipe); + /* + * search backwards for the second pipe to keep pipe + * assignment more consistent + */ + if (secondary_pipe == NULL) { + for (j = dc->res_pool->pipe_count - 1; j >= 0; j--) { + preferred_pipe_idx = j; + + if (res_ctx->pipe_ctx[preferred_pipe_idx].stream == NULL) { + secondary_pipe = &res_ctx->pipe_ctx[preferred_pipe_idx]; + secondary_pipe->pipe_idx = preferred_pipe_idx; + break; + } + } + } + } + + return secondary_pipe; +} + +void dcn20_merge_pipes_for_validate( + struct dc *dc, + struct dc_state *context) +{ + int i; + + /* merge previously split odm pipes since mode support needs to make the decision */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + struct pipe_ctx *odm_pipe = pipe->next_odm_pipe; + + if (pipe->prev_odm_pipe) + continue; + + pipe->next_odm_pipe = NULL; + while (odm_pipe) { + struct pipe_ctx *next_odm_pipe = odm_pipe->next_odm_pipe; + + odm_pipe->plane_state = NULL; + odm_pipe->stream = NULL; + odm_pipe->top_pipe = NULL; + odm_pipe->bottom_pipe = NULL; + odm_pipe->prev_odm_pipe = NULL; + odm_pipe->next_odm_pipe = NULL; + if (odm_pipe->stream_res.dsc) + dcn20_release_dsc(&context->res_ctx, dc->res_pool, &odm_pipe->stream_res.dsc); + /* Clear plane_res and stream_res */ + memset(&odm_pipe->plane_res, 0, sizeof(odm_pipe->plane_res)); + memset(&odm_pipe->stream_res, 0, sizeof(odm_pipe->stream_res)); + odm_pipe = next_odm_pipe; + } + if (pipe->plane_state) + resource_build_scaling_params(pipe); + } + + /* merge previously mpc split pipes since mode support needs to make the decision */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + struct pipe_ctx *hsplit_pipe = pipe->bottom_pipe; + + if (!hsplit_pipe || hsplit_pipe->plane_state != pipe->plane_state) + continue; + + pipe->bottom_pipe = hsplit_pipe->bottom_pipe; + if (hsplit_pipe->bottom_pipe) + hsplit_pipe->bottom_pipe->top_pipe = pipe; + hsplit_pipe->plane_state = NULL; + hsplit_pipe->stream = NULL; + hsplit_pipe->top_pipe = NULL; + hsplit_pipe->bottom_pipe = NULL; + + /* Clear plane_res and stream_res */ + memset(&hsplit_pipe->plane_res, 0, sizeof(hsplit_pipe->plane_res)); + memset(&hsplit_pipe->stream_res, 0, sizeof(hsplit_pipe->stream_res)); + if (pipe->plane_state) + resource_build_scaling_params(pipe); + } +} + +int dcn20_validate_apply_pipe_split_flags( + struct dc *dc, + struct dc_state *context, + int vlevel, + int *split, + bool *merge) +{ + int i, pipe_idx, vlevel_split; + int plane_count = 0; + bool force_split = false; + bool avoid_split = dc->debug.pipe_split_policy == MPC_SPLIT_AVOID; + struct vba_vars_st *v = &context->bw_ctx.dml.vba; + int max_mpc_comb = v->maxMpcComb; + + if (context->stream_count > 1) { + if (dc->debug.pipe_split_policy == MPC_SPLIT_AVOID_MULT_DISP) + avoid_split = true; + } else if (dc->debug.force_single_disp_pipe_split) + force_split = true; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + /** + * Workaround for avoiding pipe-split in cases where we'd split + * planes that are too small, resulting in splits that aren't + * valid for the scaler. + */ + if (pipe->plane_state && + (pipe->plane_state->dst_rect.width <= 16 || + pipe->plane_state->dst_rect.height <= 16 || + pipe->plane_state->src_rect.width <= 16 || + pipe->plane_state->src_rect.height <= 16)) + avoid_split = true; + + /* TODO: fix dc bugs and remove this split threshold thing */ + if (pipe->stream && !pipe->prev_odm_pipe && + (!pipe->top_pipe || pipe->top_pipe->plane_state != pipe->plane_state)) + ++plane_count; + } + if (plane_count > dc->res_pool->pipe_count / 2) + avoid_split = true; + + /* W/A: Mode timing with borders may not work well with pipe split, avoid for this corner case */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + struct dc_crtc_timing timing; + + if (!pipe->stream) + continue; + else { + timing = pipe->stream->timing; + if (timing.h_border_left + timing.h_border_right + + timing.v_border_top + timing.v_border_bottom > 0) { + avoid_split = true; + break; + } + } + } + + /* Avoid split loop looks for lowest voltage level that allows most unsplit pipes possible */ + if (avoid_split) { + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + if (!context->res_ctx.pipe_ctx[i].stream) + continue; + + for (vlevel_split = vlevel; vlevel <= context->bw_ctx.dml.soc.num_states; vlevel++) + if (v->NoOfDPP[vlevel][0][pipe_idx] == 1 && + v->ModeSupport[vlevel][0]) + break; + /* Impossible to not split this pipe */ + if (vlevel > context->bw_ctx.dml.soc.num_states) + vlevel = vlevel_split; + else + max_mpc_comb = 0; + pipe_idx++; + } + v->maxMpcComb = max_mpc_comb; + } + + /* Split loop sets which pipe should be split based on dml outputs and dc flags */ + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + int pipe_plane = v->pipe_plane[pipe_idx]; + bool split4mpc = context->stream_count == 1 && plane_count == 1 + && dc->config.enable_4to1MPC && dc->res_pool->pipe_count >= 4; + + if (!context->res_ctx.pipe_ctx[i].stream) + continue; + + if (split4mpc || v->NoOfDPP[vlevel][max_mpc_comb][pipe_plane] == 4) + split[i] = 4; + else if (force_split || v->NoOfDPP[vlevel][max_mpc_comb][pipe_plane] == 2) + split[i] = 2; + + if ((pipe->stream->view_format == + VIEW_3D_FORMAT_SIDE_BY_SIDE || + pipe->stream->view_format == + VIEW_3D_FORMAT_TOP_AND_BOTTOM) && + (pipe->stream->timing.timing_3d_format == + TIMING_3D_FORMAT_TOP_AND_BOTTOM || + pipe->stream->timing.timing_3d_format == + TIMING_3D_FORMAT_SIDE_BY_SIDE)) + split[i] = 2; + if (dc->debug.force_odm_combine & (1 << pipe->stream_res.tg->inst)) { + split[i] = 2; + v->ODMCombineEnablePerState[vlevel][pipe_plane] = dm_odm_combine_mode_2to1; + } + if (dc->debug.force_odm_combine_4to1 & (1 << pipe->stream_res.tg->inst)) { + split[i] = 4; + v->ODMCombineEnablePerState[vlevel][pipe_plane] = dm_odm_combine_mode_4to1; + } + /*420 format workaround*/ + if (pipe->stream->timing.h_addressable > 7680 && + pipe->stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) { + split[i] = 4; + } + v->ODMCombineEnabled[pipe_plane] = + v->ODMCombineEnablePerState[vlevel][pipe_plane]; + + if (v->ODMCombineEnabled[pipe_plane] == dm_odm_combine_mode_disabled) { + if (resource_get_mpc_slice_count(pipe) == 2) { + /*If need split for mpc but 2 way split already*/ + if (split[i] == 4) + split[i] = 2; /* 2 -> 4 MPC */ + else if (split[i] == 2) + split[i] = 0; /* 2 -> 2 MPC */ + else if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state) + merge[i] = true; /* 2 -> 1 MPC */ + } else if (resource_get_mpc_slice_count(pipe) == 4) { + /*If need split for mpc but 4 way split already*/ + if (split[i] == 2 && ((pipe->top_pipe && !pipe->top_pipe->top_pipe) + || !pipe->bottom_pipe)) { + merge[i] = true; /* 4 -> 2 MPC */ + } else if (split[i] == 0 && pipe->top_pipe && + pipe->top_pipe->plane_state == pipe->plane_state) + merge[i] = true; /* 4 -> 1 MPC */ + split[i] = 0; + } else if (resource_get_odm_slice_count(pipe) > 1) { + /* ODM -> MPC transition */ + if (pipe->prev_odm_pipe) { + split[i] = 0; + merge[i] = true; + } + } + } else { + if (resource_get_odm_slice_count(pipe) == 2) { + /*If need split for odm but 2 way split already*/ + if (split[i] == 4) + split[i] = 2; /* 2 -> 4 ODM */ + else if (split[i] == 2) + split[i] = 0; /* 2 -> 2 ODM */ + else if (pipe->prev_odm_pipe) { + ASSERT(0); /* NOT expected yet */ + merge[i] = true; /* exit ODM */ + } + } else if (resource_get_odm_slice_count(pipe) == 4) { + /*If need split for odm but 4 way split already*/ + if (split[i] == 2 && ((pipe->prev_odm_pipe && !pipe->prev_odm_pipe->prev_odm_pipe) + || !pipe->next_odm_pipe)) { + merge[i] = true; /* 4 -> 2 ODM */ + } else if (split[i] == 0 && pipe->prev_odm_pipe) { + ASSERT(0); /* NOT expected yet */ + merge[i] = true; /* exit ODM */ + } + split[i] = 0; + } else if (resource_get_mpc_slice_count(pipe) > 1) { + /* MPC -> ODM transition */ + ASSERT(0); /* NOT expected yet */ + if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state) { + split[i] = 0; + merge[i] = true; + } + } + } + + /* Adjust dppclk when split is forced, do not bother with dispclk */ + if (split[i] != 0 && v->NoOfDPP[vlevel][max_mpc_comb][pipe_idx] == 1) { + DC_FP_START(); + dcn20_fpu_adjust_dppclk(v, vlevel, max_mpc_comb, pipe_idx, false); + DC_FP_END(); + } + pipe_idx++; + } + + return vlevel; +} + +bool dcn20_fast_validate_bw( + struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int *pipe_cnt_out, + int *pipe_split_from, + int *vlevel_out, + bool fast_validate) +{ + bool out = false; + int split[MAX_PIPES] = { 0 }; + int pipe_cnt, i, pipe_idx, vlevel; + + ASSERT(pipes); + if (!pipes) + return false; + + dcn20_merge_pipes_for_validate(dc, context); + + DC_FP_START(); + pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate); + DC_FP_END(); + + *pipe_cnt_out = pipe_cnt; + + if (!pipe_cnt) { + out = true; + goto validate_out; + } + + vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt); + + if (vlevel > context->bw_ctx.dml.soc.num_states) + goto validate_fail; + + vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, NULL); + + /*initialize pipe_just_split_from to invalid idx*/ + for (i = 0; i < MAX_PIPES; i++) + pipe_split_from[i] = -1; + + for (i = 0, pipe_idx = -1; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + struct pipe_ctx *hsplit_pipe = pipe->bottom_pipe; + + if (!pipe->stream || pipe_split_from[i] >= 0) + continue; + + pipe_idx++; + + if (!pipe->top_pipe && !pipe->plane_state && context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_idx]) { + hsplit_pipe = dcn20_find_secondary_pipe(dc, &context->res_ctx, dc->res_pool, pipe); + ASSERT(hsplit_pipe); + if (!dcn20_split_stream_for_odm( + dc, &context->res_ctx, + pipe, hsplit_pipe)) + goto validate_fail; + pipe_split_from[hsplit_pipe->pipe_idx] = pipe_idx; + dcn20_build_mapped_resource(dc, context, pipe->stream); + } + + if (!pipe->plane_state) + continue; + /* Skip 2nd half of already split pipe */ + if (pipe->top_pipe && pipe->plane_state == pipe->top_pipe->plane_state) + continue; + + /* We do not support mpo + odm at the moment */ + if (hsplit_pipe && hsplit_pipe->plane_state != pipe->plane_state + && context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_idx]) + goto validate_fail; + + if (split[i] == 2) { + if (!hsplit_pipe || hsplit_pipe->plane_state != pipe->plane_state) { + /* pipe not split previously needs split */ + hsplit_pipe = dcn20_find_secondary_pipe(dc, &context->res_ctx, dc->res_pool, pipe); + ASSERT(hsplit_pipe); + if (!hsplit_pipe) { + DC_FP_START(); + dcn20_fpu_adjust_dppclk(&context->bw_ctx.dml.vba, vlevel, context->bw_ctx.dml.vba.maxMpcComb, pipe_idx, true); + DC_FP_END(); + continue; + } + if (context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_idx]) { + if (!dcn20_split_stream_for_odm( + dc, &context->res_ctx, + pipe, hsplit_pipe)) + goto validate_fail; + dcn20_build_mapped_resource(dc, context, pipe->stream); + } else { + dcn20_split_stream_for_mpc( + &context->res_ctx, dc->res_pool, + pipe, hsplit_pipe); + resource_build_scaling_params(pipe); + resource_build_scaling_params(hsplit_pipe); + } + pipe_split_from[hsplit_pipe->pipe_idx] = pipe_idx; + } + } else if (hsplit_pipe && hsplit_pipe->plane_state == pipe->plane_state) { + /* merge should already have been done */ + ASSERT(0); + } + } + /* Actual dsc count per stream dsc validation*/ + if (!dcn20_validate_dsc(dc, context)) { + context->bw_ctx.dml.vba.ValidationStatus[context->bw_ctx.dml.vba.soc.num_states] = + DML_FAIL_DSC_VALIDATION_FAILURE; + goto validate_fail; + } + + *vlevel_out = vlevel; + + out = true; + goto validate_out; + +validate_fail: + out = false; + +validate_out: + return out; +} + +bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context, + bool fast_validate) +{ + bool voltage_supported; + display_e2e_pipe_params_st *pipes; + + pipes = kcalloc(dc->res_pool->pipe_count, sizeof(display_e2e_pipe_params_st), GFP_KERNEL); + if (!pipes) + return false; + + DC_FP_START(); + voltage_supported = dcn20_validate_bandwidth_fp(dc, context, fast_validate, pipes); + DC_FP_END(); + + kfree(pipes); + return voltage_supported; +} + +struct pipe_ctx *dcn20_acquire_free_pipe_for_layer( + const struct dc_state *cur_ctx, + struct dc_state *new_ctx, + const struct resource_pool *pool, + const struct pipe_ctx *opp_head) +{ + struct resource_context *res_ctx = &new_ctx->res_ctx; + struct pipe_ctx *otg_master = resource_get_otg_master_for_stream(res_ctx, opp_head->stream); + struct pipe_ctx *sec_dpp_pipe = resource_find_free_secondary_pipe_legacy(res_ctx, pool, otg_master); + + ASSERT(otg_master); + + if (!sec_dpp_pipe) + return NULL; + + sec_dpp_pipe->stream = opp_head->stream; + sec_dpp_pipe->stream_res.tg = opp_head->stream_res.tg; + sec_dpp_pipe->stream_res.opp = opp_head->stream_res.opp; + + sec_dpp_pipe->plane_res.hubp = pool->hubps[sec_dpp_pipe->pipe_idx]; + sec_dpp_pipe->plane_res.ipp = pool->ipps[sec_dpp_pipe->pipe_idx]; + sec_dpp_pipe->plane_res.dpp = pool->dpps[sec_dpp_pipe->pipe_idx]; + sec_dpp_pipe->plane_res.mpcc_inst = pool->dpps[sec_dpp_pipe->pipe_idx]->inst; + + return sec_dpp_pipe; +} + +bool dcn20_get_dcc_compression_cap(const struct dc *dc, + const struct dc_dcc_surface_param *input, + struct dc_surface_dcc_cap *output) +{ + return dc->res_pool->hubbub->funcs->get_dcc_compression_cap( + dc->res_pool->hubbub, + input, + output); +} + +static void dcn20_destroy_resource_pool(struct resource_pool **pool) +{ + struct dcn20_resource_pool *dcn20_pool = TO_DCN20_RES_POOL(*pool); + + dcn20_resource_destruct(dcn20_pool); + kfree(dcn20_pool); + *pool = NULL; +} + + +static struct dc_cap_funcs cap_funcs = { + .get_dcc_compression_cap = dcn20_get_dcc_compression_cap +}; + + +enum dc_status dcn20_patch_unknown_plane_state(struct dc_plane_state *plane_state) +{ + enum surface_pixel_format surf_pix_format = plane_state->format; + unsigned int bpp = resource_pixel_format_to_bpp(surf_pix_format); + + plane_state->tiling_info.gfx9.swizzle = DC_SW_64KB_S; + if (bpp == 64) + plane_state->tiling_info.gfx9.swizzle = DC_SW_64KB_D; + + return DC_OK; +} + +void dcn20_release_pipe(struct dc_state *context, + struct pipe_ctx *pipe, + const struct resource_pool *pool) +{ + if (resource_is_pipe_type(pipe, OPP_HEAD) && pipe->stream_res.dsc) + dcn20_release_dsc(&context->res_ctx, pool, &pipe->stream_res.dsc); + memset(pipe, 0, sizeof(*pipe)); +} + +static const struct resource_funcs dcn20_res_pool_funcs = { + .destroy = dcn20_destroy_resource_pool, + .link_enc_create = dcn20_link_encoder_create, + .panel_cntl_create = dcn20_panel_cntl_create, + .validate_bandwidth = dcn20_validate_bandwidth, + .acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer, + .release_pipe = dcn20_release_pipe, + .add_stream_to_ctx = dcn20_add_stream_to_ctx, + .add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource, + .remove_stream_from_ctx = dcn20_remove_stream_from_ctx, + .populate_dml_writeback_from_context = dcn20_populate_dml_writeback_from_context, + .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, + .set_mcif_arb_params = dcn20_set_mcif_arb_params, + .populate_dml_pipes = dcn20_populate_dml_pipes_from_context, + .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link +}; + +bool dcn20_dwbc_create(struct dc_context *ctx, struct resource_pool *pool) +{ + int i; + uint32_t pipe_count = pool->res_cap->num_dwb; + + for (i = 0; i < pipe_count; i++) { + struct dcn20_dwbc *dwbc20 = kzalloc(sizeof(struct dcn20_dwbc), + GFP_KERNEL); + + if (!dwbc20) { + dm_error("DC: failed to create dwbc20!\n"); + return false; + } + dcn20_dwbc_construct(dwbc20, ctx, + &dwbc20_regs[i], + &dwbc20_shift, + &dwbc20_mask, + i); + pool->dwbc[i] = &dwbc20->base; + } + return true; +} + +bool dcn20_mmhubbub_create(struct dc_context *ctx, struct resource_pool *pool) +{ + int i; + uint32_t pipe_count = pool->res_cap->num_dwb; + + ASSERT(pipe_count > 0); + + for (i = 0; i < pipe_count; i++) { + struct dcn20_mmhubbub *mcif_wb20 = kzalloc(sizeof(struct dcn20_mmhubbub), + GFP_KERNEL); + + if (!mcif_wb20) { + dm_error("DC: failed to create mcif_wb20!\n"); + return false; + } + + dcn20_mmhubbub_construct(mcif_wb20, ctx, + &mcif_wb20_regs[i], + &mcif_wb20_shift, + &mcif_wb20_mask, + i); + + pool->mcif_wb[i] = &mcif_wb20->base; + } + return true; +} + +static struct pp_smu_funcs *dcn20_pp_smu_create(struct dc_context *ctx) +{ + struct pp_smu_funcs *pp_smu = kzalloc(sizeof(*pp_smu), GFP_ATOMIC); + + if (!pp_smu) + return pp_smu; + + dm_pp_get_funcs(ctx, pp_smu); + + if (pp_smu->ctx.ver != PP_SMU_VER_NV) + pp_smu = memset(pp_smu, 0, sizeof(struct pp_smu_funcs)); + + return pp_smu; +} + +static void dcn20_pp_smu_destroy(struct pp_smu_funcs **pp_smu) +{ + if (pp_smu && *pp_smu) { + kfree(*pp_smu); + *pp_smu = NULL; + } +} + +static struct _vcs_dpi_soc_bounding_box_st *get_asic_rev_soc_bb( + uint32_t hw_internal_rev) +{ + if (ASICREV_IS_NAVI14_M(hw_internal_rev)) + return &dcn2_0_nv14_soc; + + if (ASICREV_IS_NAVI12_P(hw_internal_rev)) + return &dcn2_0_nv12_soc; + + return &dcn2_0_soc; +} + +static struct _vcs_dpi_ip_params_st *get_asic_rev_ip_params( + uint32_t hw_internal_rev) +{ + /* NV14 */ + if (ASICREV_IS_NAVI14_M(hw_internal_rev)) + return &dcn2_0_nv14_ip; + + /* NV12 and NV10 */ + return &dcn2_0_ip; +} + +static enum dml_project get_dml_project_version(uint32_t hw_internal_rev) +{ + return DML_PROJECT_NAVI10v2; +} + +static bool init_soc_bounding_box(struct dc *dc, + struct dcn20_resource_pool *pool) +{ + struct _vcs_dpi_soc_bounding_box_st *loaded_bb = + get_asic_rev_soc_bb(dc->ctx->asic_id.hw_internal_rev); + struct _vcs_dpi_ip_params_st *loaded_ip = + get_asic_rev_ip_params(dc->ctx->asic_id.hw_internal_rev); + + DC_LOGGER_INIT(dc->ctx->logger); + + if (pool->base.pp_smu) { + struct pp_smu_nv_clock_table max_clocks = {0}; + unsigned int uclk_states[8] = {0}; + unsigned int num_states = 0; + enum pp_smu_status status; + bool clock_limits_available = false; + bool uclk_states_available = false; + + if (pool->base.pp_smu->nv_funcs.get_uclk_dpm_states) { + status = (pool->base.pp_smu->nv_funcs.get_uclk_dpm_states) + (&pool->base.pp_smu->nv_funcs.pp_smu, uclk_states, &num_states); + + uclk_states_available = (status == PP_SMU_RESULT_OK); + } + + if (pool->base.pp_smu->nv_funcs.get_maximum_sustainable_clocks) { + status = (*pool->base.pp_smu->nv_funcs.get_maximum_sustainable_clocks) + (&pool->base.pp_smu->nv_funcs.pp_smu, &max_clocks); + /* SMU cannot set DCF clock to anything equal to or higher than SOC clock + */ + if (max_clocks.dcfClockInKhz >= max_clocks.socClockInKhz) + max_clocks.dcfClockInKhz = max_clocks.socClockInKhz - 1000; + clock_limits_available = (status == PP_SMU_RESULT_OK); + } + + if (clock_limits_available && uclk_states_available && num_states) { + DC_FP_START(); + dcn20_update_bounding_box(dc, loaded_bb, &max_clocks, uclk_states, num_states); + DC_FP_END(); + } else if (clock_limits_available) { + DC_FP_START(); + dcn20_cap_soc_clocks(loaded_bb, max_clocks); + DC_FP_END(); + } + } + + loaded_ip->max_num_otg = pool->base.res_cap->num_timing_generator; + loaded_ip->max_num_dpp = pool->base.pipe_count; + DC_FP_START(); + dcn20_patch_bounding_box(dc, loaded_bb); + DC_FP_END(); + return true; +} + +static bool dcn20_resource_construct( + uint8_t num_virtual_links, + struct dc *dc, + struct dcn20_resource_pool *pool) +{ + int i; + struct dc_context *ctx = dc->ctx; + struct irq_service_init_data init_data; + struct ddc_service_init_data ddc_init_data = {0}; + struct _vcs_dpi_soc_bounding_box_st *loaded_bb = + get_asic_rev_soc_bb(ctx->asic_id.hw_internal_rev); + struct _vcs_dpi_ip_params_st *loaded_ip = + get_asic_rev_ip_params(ctx->asic_id.hw_internal_rev); + enum dml_project dml_project_version = + get_dml_project_version(ctx->asic_id.hw_internal_rev); + + ctx->dc_bios->regs = &bios_regs; + pool->base.funcs = &dcn20_res_pool_funcs; + + if (ASICREV_IS_NAVI14_M(ctx->asic_id.hw_internal_rev)) { + pool->base.res_cap = &res_cap_nv14; + pool->base.pipe_count = 5; + pool->base.mpcc_count = 5; + } else { + pool->base.res_cap = &res_cap_nv10; + pool->base.pipe_count = 6; + pool->base.mpcc_count = 6; + } + /************************************************* + * Resource + asic cap harcoding * + *************************************************/ + pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE; + + dc->caps.max_downscale_ratio = 200; + dc->caps.i2c_speed_in_khz = 100; + dc->caps.i2c_speed_in_khz_hdcp = 100; /*1.4 w/a not applied by default*/ + dc->caps.max_cursor_size = 256; + dc->caps.min_horizontal_blanking_period = 80; + dc->caps.dmdata_alloc_size = 2048; + + dc->caps.max_slave_planes = 1; + dc->caps.max_slave_yuv_planes = 1; + dc->caps.max_slave_rgb_planes = 1; + dc->caps.post_blend_color_processing = true; + dc->caps.force_dp_tps4_for_cp2520 = true; + dc->caps.extended_aux_timeout_support = true; + + /* Color pipeline capabilities */ + dc->caps.color.dpp.dcn_arch = 1; + dc->caps.color.dpp.input_lut_shared = 0; + dc->caps.color.dpp.icsc = 1; + dc->caps.color.dpp.dgam_ram = 1; + dc->caps.color.dpp.dgam_rom_caps.srgb = 1; + dc->caps.color.dpp.dgam_rom_caps.bt2020 = 1; + dc->caps.color.dpp.dgam_rom_caps.gamma2_2 = 0; + dc->caps.color.dpp.dgam_rom_caps.pq = 0; + dc->caps.color.dpp.dgam_rom_caps.hlg = 0; + dc->caps.color.dpp.post_csc = 0; + dc->caps.color.dpp.gamma_corr = 0; + dc->caps.color.dpp.dgam_rom_for_yuv = 1; + + dc->caps.color.dpp.hw_3d_lut = 1; + dc->caps.color.dpp.ogam_ram = 1; + // no OGAM ROM on DCN2, only MPC ROM + dc->caps.color.dpp.ogam_rom_caps.srgb = 0; + dc->caps.color.dpp.ogam_rom_caps.bt2020 = 0; + dc->caps.color.dpp.ogam_rom_caps.gamma2_2 = 0; + dc->caps.color.dpp.ogam_rom_caps.pq = 0; + dc->caps.color.dpp.ogam_rom_caps.hlg = 0; + dc->caps.color.dpp.ocsc = 0; + + dc->caps.color.mpc.gamut_remap = 0; + dc->caps.color.mpc.num_3dluts = 0; + dc->caps.color.mpc.shared_3d_lut = 0; + dc->caps.color.mpc.ogam_ram = 1; + dc->caps.color.mpc.ogam_rom_caps.srgb = 0; + dc->caps.color.mpc.ogam_rom_caps.bt2020 = 0; + dc->caps.color.mpc.ogam_rom_caps.gamma2_2 = 0; + dc->caps.color.mpc.ogam_rom_caps.pq = 0; + dc->caps.color.mpc.ogam_rom_caps.hlg = 0; + dc->caps.color.mpc.ocsc = 1; + + dc->caps.dp_hdmi21_pcon_support = true; + + if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) + dc->debug = debug_defaults_drv; + + //dcn2.0x + dc->work_arounds.dedcn20_305_wa = true; + + // Init the vm_helper + if (dc->vm_helper) + vm_helper_init(dc->vm_helper, 16); + + /************************************************* + * Create resources * + *************************************************/ + + pool->base.clock_sources[DCN20_CLK_SRC_PLL0] = + dcn20_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL0, + &clk_src_regs[0], false); + pool->base.clock_sources[DCN20_CLK_SRC_PLL1] = + dcn20_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL1, + &clk_src_regs[1], false); + pool->base.clock_sources[DCN20_CLK_SRC_PLL2] = + dcn20_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL2, + &clk_src_regs[2], false); + pool->base.clock_sources[DCN20_CLK_SRC_PLL3] = + dcn20_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL3, + &clk_src_regs[3], false); + pool->base.clock_sources[DCN20_CLK_SRC_PLL4] = + dcn20_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL4, + &clk_src_regs[4], false); + pool->base.clock_sources[DCN20_CLK_SRC_PLL5] = + dcn20_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL5, + &clk_src_regs[5], false); + pool->base.clk_src_count = DCN20_CLK_SRC_TOTAL; + /* todo: not reuse phy_pll registers */ + pool->base.dp_clock_source = + dcn20_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_ID_DP_DTO, + &clk_src_regs[0], true); + + for (i = 0; i < pool->base.clk_src_count; i++) { + if (pool->base.clock_sources[i] == NULL) { + dm_error("DC: failed to create clock sources!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + } + + pool->base.dccg = dccg2_create(ctx, &dccg_regs, &dccg_shift, &dccg_mask); + if (pool->base.dccg == NULL) { + dm_error("DC: failed to create dccg!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + + pool->base.dmcu = dcn20_dmcu_create(ctx, + &dmcu_regs, + &dmcu_shift, + &dmcu_mask); + if (pool->base.dmcu == NULL) { + dm_error("DC: failed to create dmcu!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + + pool->base.abm = dce_abm_create(ctx, + &abm_regs, + &abm_shift, + &abm_mask); + if (pool->base.abm == NULL) { + dm_error("DC: failed to create abm!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + + pool->base.pp_smu = dcn20_pp_smu_create(ctx); + + + if (!init_soc_bounding_box(dc, pool)) { + dm_error("DC: failed to initialize soc bounding box!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + + dml_init_instance(&dc->dml, loaded_bb, loaded_ip, dml_project_version); + + if (!dc->debug.disable_pplib_wm_range) { + struct pp_smu_wm_range_sets ranges = {0}; + int i = 0; + + ranges.num_reader_wm_sets = 0; + + if (loaded_bb->num_states == 1) { + ranges.reader_wm_sets[0].wm_inst = i; + ranges.reader_wm_sets[0].min_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN; + ranges.reader_wm_sets[0].max_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX; + ranges.reader_wm_sets[0].min_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN; + ranges.reader_wm_sets[0].max_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX; + + ranges.num_reader_wm_sets = 1; + } else if (loaded_bb->num_states > 1) { + for (i = 0; i < 4 && i < loaded_bb->num_states; i++) { + ranges.reader_wm_sets[i].wm_inst = i; + ranges.reader_wm_sets[i].min_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN; + ranges.reader_wm_sets[i].max_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX; + DC_FP_START(); + dcn20_fpu_set_wm_ranges(i, &ranges, loaded_bb); + DC_FP_END(); + + ranges.num_reader_wm_sets = i + 1; + } + + ranges.reader_wm_sets[0].min_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN; + ranges.reader_wm_sets[ranges.num_reader_wm_sets - 1].max_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX; + } + + ranges.num_writer_wm_sets = 1; + + ranges.writer_wm_sets[0].wm_inst = 0; + ranges.writer_wm_sets[0].min_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN; + ranges.writer_wm_sets[0].max_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX; + ranges.writer_wm_sets[0].min_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN; + ranges.writer_wm_sets[0].max_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX; + + /* Notify PP Lib/SMU which Watermarks to use for which clock ranges */ + if (pool->base.pp_smu->nv_funcs.set_wm_ranges) + pool->base.pp_smu->nv_funcs.set_wm_ranges(&pool->base.pp_smu->nv_funcs.pp_smu, &ranges); + } + + init_data.ctx = dc->ctx; + pool->base.irqs = dal_irq_service_dcn20_create(&init_data); + if (!pool->base.irqs) + goto create_fail; + + /* mem input -> ipp -> dpp -> opp -> TG */ + for (i = 0; i < pool->base.pipe_count; i++) { + pool->base.hubps[i] = dcn20_hubp_create(ctx, i); + if (pool->base.hubps[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create memory input!\n"); + goto create_fail; + } + + pool->base.ipps[i] = dcn20_ipp_create(ctx, i); + if (pool->base.ipps[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create input pixel processor!\n"); + goto create_fail; + } + + pool->base.dpps[i] = dcn20_dpp_create(ctx, i); + if (pool->base.dpps[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create dpps!\n"); + goto create_fail; + } + } + for (i = 0; i < pool->base.res_cap->num_ddc; i++) { + pool->base.engines[i] = dcn20_aux_engine_create(ctx, i); + if (pool->base.engines[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC:failed to create aux engine!!\n"); + goto create_fail; + } + pool->base.hw_i2cs[i] = dcn20_i2c_hw_create(ctx, i); + if (pool->base.hw_i2cs[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC:failed to create hw i2c!!\n"); + goto create_fail; + } + pool->base.sw_i2cs[i] = NULL; + } + + for (i = 0; i < pool->base.res_cap->num_opp; i++) { + pool->base.opps[i] = dcn20_opp_create(ctx, i); + if (pool->base.opps[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create output pixel processor!\n"); + goto create_fail; + } + } + + for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { + pool->base.timing_generators[i] = dcn20_timing_generator_create( + ctx, i); + if (pool->base.timing_generators[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create tg!\n"); + goto create_fail; + } + } + + pool->base.timing_generator_count = i; + + pool->base.mpc = dcn20_mpc_create(ctx); + if (pool->base.mpc == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create mpc!\n"); + goto create_fail; + } + + pool->base.hubbub = dcn20_hubbub_create(ctx); + if (pool->base.hubbub == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create hubbub!\n"); + goto create_fail; + } + + for (i = 0; i < pool->base.res_cap->num_dsc; i++) { + pool->base.dscs[i] = dcn20_dsc_create(ctx, i); + if (pool->base.dscs[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create display stream compressor %d!\n", i); + goto create_fail; + } + } + + if (!dcn20_dwbc_create(ctx, &pool->base)) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create dwbc!\n"); + goto create_fail; + } + if (!dcn20_mmhubbub_create(ctx, &pool->base)) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create mcif_wb!\n"); + goto create_fail; + } + + if (!resource_construct(num_virtual_links, dc, &pool->base, + &res_create_funcs)) + goto create_fail; + + dcn20_hw_sequencer_construct(dc); + + // IF NV12, set PG function pointer to NULL. It's not that + // PG isn't supported for NV12, it's that we don't want to + // program the registers because that will cause more power + // to be consumed. We could have created dcn20_init_hw to get + // the same effect by checking ASIC rev, but there was a + // request at some point to not check ASIC rev on hw sequencer. + if (ASICREV_IS_NAVI12_P(dc->ctx->asic_id.hw_internal_rev)) { + dc->hwseq->funcs.enable_power_gating_plane = NULL; + dc->debug.disable_dpp_power_gate = true; + dc->debug.disable_hubp_power_gate = true; + } + + + dc->caps.max_planes = pool->base.pipe_count; + + for (i = 0; i < dc->caps.max_planes; ++i) + dc->caps.planes[i] = plane_cap; + + dc->cap_funcs = cap_funcs; + + if (dc->ctx->dc_bios->fw_info.oem_i2c_present) { + ddc_init_data.ctx = dc->ctx; + ddc_init_data.link = NULL; + ddc_init_data.id.id = dc->ctx->dc_bios->fw_info.oem_i2c_obj_id; + ddc_init_data.id.enum_id = 0; + ddc_init_data.id.type = OBJECT_TYPE_GENERIC; + pool->base.oem_device = dc->link_srv->create_ddc_service(&ddc_init_data); + } else { + pool->base.oem_device = NULL; + } + + return true; + +create_fail: + + dcn20_resource_destruct(pool); + + return false; +} + +struct resource_pool *dcn20_create_resource_pool( + const struct dc_init_data *init_data, + struct dc *dc) +{ + struct dcn20_resource_pool *pool = + kzalloc(sizeof(struct dcn20_resource_pool), GFP_ATOMIC); + + if (!pool) + return NULL; + + if (dcn20_resource_construct(init_data->num_virtual_links, dc, pool)) + return &pool->base; + + BREAK_TO_DEBUGGER(); + kfree(pool); + return NULL; +} diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.h new file mode 100644 index 000000000000..37ecaccc5d12 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.h @@ -0,0 +1,170 @@ +/* +* Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DC_RESOURCE_DCN20_H__ +#define __DC_RESOURCE_DCN20_H__ + +#include "core_types.h" +#include "dml/dcn20/dcn20_fpu.h" + +#define TO_DCN20_RES_POOL(pool)\ + container_of(pool, struct dcn20_resource_pool, base) + +struct dc; +struct resource_pool; +struct _vcs_dpi_display_pipe_params_st; + +extern struct _vcs_dpi_ip_params_st dcn2_0_ip; +extern struct _vcs_dpi_ip_params_st dcn2_0_nv14_ip; +extern struct _vcs_dpi_soc_bounding_box_st dcn2_0_soc; +extern struct _vcs_dpi_soc_bounding_box_st dcn2_0_nv14_soc; +extern struct _vcs_dpi_soc_bounding_box_st dcn2_0_nv12_soc; + +struct dcn20_resource_pool { + struct resource_pool base; +}; +struct resource_pool *dcn20_create_resource_pool( + const struct dc_init_data *init_data, + struct dc *dc); + +struct link_encoder *dcn20_link_encoder_create( + struct dc_context *ctx, + const struct encoder_init_data *enc_init_data); + +unsigned int dcn20_calc_max_scaled_time( + unsigned int time_per_pixel, + enum mmhubbub_wbif_mode mode, + unsigned int urgent_watermark); + +struct pipe_ctx *dcn20_acquire_free_pipe_for_layer( + const struct dc_state *cur_ctx, + struct dc_state *new_ctx, + const struct resource_pool *pool, + const struct pipe_ctx *opp_head_pipe); +void dcn20_release_pipe(struct dc_state *context, + struct pipe_ctx *pipe, + const struct resource_pool *pool); +struct stream_encoder *dcn20_stream_encoder_create( + enum engine_id eng_id, + struct dc_context *ctx); + +struct dce_hwseq *dcn20_hwseq_create( + struct dc_context *ctx); + +bool dcn20_get_dcc_compression_cap(const struct dc *dc, + const struct dc_dcc_surface_param *input, + struct dc_surface_dcc_cap *output); + +void dcn20_dpp_destroy(struct dpp **dpp); + +struct dpp *dcn20_dpp_create( + struct dc_context *ctx, + uint32_t inst); + +struct input_pixel_processor *dcn20_ipp_create( + struct dc_context *ctx, uint32_t inst); + +struct output_pixel_processor *dcn20_opp_create( + struct dc_context *ctx, uint32_t inst); + +struct dce_aux *dcn20_aux_engine_create( + struct dc_context *ctx, uint32_t inst); + +struct dce_i2c_hw *dcn20_i2c_hw_create( + struct dc_context *ctx, + uint32_t inst); + +void dcn20_clock_source_destroy(struct clock_source **clk_src); + +struct display_stream_compressor *dcn20_dsc_create( + struct dc_context *ctx, uint32_t inst); +void dcn20_dsc_destroy(struct display_stream_compressor **dsc); + +struct hubp *dcn20_hubp_create( + struct dc_context *ctx, + uint32_t inst); +struct timing_generator *dcn20_timing_generator_create( + struct dc_context *ctx, + uint32_t instance); +struct mpc *dcn20_mpc_create(struct dc_context *ctx); +struct hubbub *dcn20_hubbub_create(struct dc_context *ctx); + +bool dcn20_dwbc_create(struct dc_context *ctx, struct resource_pool *pool); +bool dcn20_mmhubbub_create(struct dc_context *ctx, struct resource_pool *pool); + +void dcn20_set_mcif_arb_params( + struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt); +bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context, bool fast_validate); +void dcn20_merge_pipes_for_validate( + struct dc *dc, + struct dc_state *context); +int dcn20_validate_apply_pipe_split_flags( + struct dc *dc, + struct dc_state *context, + int vlevel, + int *split, + bool *merge); +void dcn20_release_dsc(struct resource_context *res_ctx, + const struct resource_pool *pool, + struct display_stream_compressor **dsc); +bool dcn20_validate_dsc(struct dc *dc, struct dc_state *new_ctx); +void dcn20_split_stream_for_mpc( + struct resource_context *res_ctx, + const struct resource_pool *pool, + struct pipe_ctx *primary_pipe, + struct pipe_ctx *secondary_pipe); +bool dcn20_split_stream_for_odm( + const struct dc *dc, + struct resource_context *res_ctx, + struct pipe_ctx *prev_odm_pipe, + struct pipe_ctx *next_odm_pipe); +void dcn20_acquire_dsc(const struct dc *dc, + struct resource_context *res_ctx, + struct display_stream_compressor **dsc, + int pipe_idx); +struct pipe_ctx *dcn20_find_secondary_pipe(struct dc *dc, + struct resource_context *res_ctx, + const struct resource_pool *pool, + const struct pipe_ctx *primary_pipe); +bool dcn20_fast_validate_bw( + struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int *pipe_cnt_out, + int *pipe_split_from, + int *vlevel_out, + bool fast_validate); + +enum dc_status dcn20_build_mapped_resource(const struct dc *dc, struct dc_state *context, struct dc_stream_state *stream); +enum dc_status dcn20_add_stream_to_ctx(struct dc *dc, struct dc_state *new_ctx, struct dc_stream_state *dc_stream); +enum dc_status dcn20_add_dsc_to_stream_resource(struct dc *dc, struct dc_state *dc_ctx, struct dc_stream_state *dc_stream); +enum dc_status dcn20_remove_stream_from_ctx(struct dc *dc, struct dc_state *new_ctx, struct dc_stream_state *dc_stream); +enum dc_status dcn20_patch_unknown_plane_state(struct dc_plane_state *plane_state); + +#endif /* __DC_RESOURCE_DCN20_H__ */ + diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c new file mode 100644 index 000000000000..914b234d7f6b --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c @@ -0,0 +1,1308 @@ +/* +* Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dm_services.h" +#include "dc.h" + +#include "dcn201/dcn201_init.h" +#include "dml/dcn20/dcn20_fpu.h" +#include "resource.h" +#include "include/irq_service_interface.h" +#include "dcn201_resource.h" + +#include "dcn20/dcn20_resource.h" + +#include "dcn10/dcn10_hubp.h" +#include "dcn10/dcn10_ipp.h" +#include "dcn201/dcn201_mpc.h" +#include "dcn201/dcn201_hubp.h" +#include "irq/dcn201/irq_service_dcn201.h" +#include "dcn201/dcn201_dpp.h" +#include "dcn201/dcn201_hubbub.h" +#include "dcn201/dcn201_dccg.h" +#include "dcn201/dcn201_optc.h" +#include "dcn201/dcn201_hwseq.h" +#include "dce110/dce110_hwseq.h" +#include "dcn201/dcn201_opp.h" +#include "dcn201/dcn201_link_encoder.h" +#include "dcn20/dcn20_stream_encoder.h" +#include "dce/dce_clock_source.h" +#include "dce/dce_audio.h" +#include "dce/dce_hwseq.h" +#include "virtual/virtual_stream_encoder.h" +#include "dce110/dce110_resource.h" +#include "dce/dce_aux.h" +#include "dce/dce_i2c.h" +#include "dcn201/dcn201_hubbub.h" +#include "dcn10/dcn10_resource.h" + +#include "cyan_skillfish_ip_offset.h" + +#include "dcn/dcn_2_0_3_offset.h" +#include "dcn/dcn_2_0_3_sh_mask.h" +#include "dpcs/dpcs_2_0_3_offset.h" +#include "dpcs/dpcs_2_0_3_sh_mask.h" + +#include "mmhub/mmhub_2_0_0_offset.h" +#include "mmhub/mmhub_2_0_0_sh_mask.h" +#include "nbio/nbio_7_4_offset.h" + +#include "reg_helper.h" + +#define MIN_DISP_CLK_KHZ 100000 +#define MIN_DPP_CLK_KHZ 100000 + +static struct _vcs_dpi_ip_params_st dcn201_ip = { + .gpuvm_enable = 0, + .hostvm_enable = 0, + .gpuvm_max_page_table_levels = 4, + .hostvm_max_page_table_levels = 4, + .hostvm_cached_page_table_levels = 0, + .pte_group_size_bytes = 2048, + .rob_buffer_size_kbytes = 168, + .det_buffer_size_kbytes = 164, + .dpte_buffer_size_in_pte_reqs_luma = 84, + .pde_proc_buffer_size_64k_reqs = 48, + .dpp_output_buffer_pixels = 2560, + .opp_output_buffer_lines = 1, + .pixel_chunk_size_kbytes = 8, + .pte_chunk_size_kbytes = 2, + .meta_chunk_size_kbytes = 2, + .writeback_chunk_size_kbytes = 2, + .line_buffer_size_bits = 789504, + .is_line_buffer_bpp_fixed = 0, + .line_buffer_fixed_bpp = 0, + .dcc_supported = true, + .max_line_buffer_lines = 12, + .writeback_luma_buffer_size_kbytes = 12, + .writeback_chroma_buffer_size_kbytes = 8, + .writeback_chroma_line_buffer_width_pixels = 4, + .writeback_max_hscl_ratio = 1, + .writeback_max_vscl_ratio = 1, + .writeback_min_hscl_ratio = 1, + .writeback_min_vscl_ratio = 1, + .writeback_max_hscl_taps = 12, + .writeback_max_vscl_taps = 12, + .writeback_line_buffer_luma_buffer_size = 0, + .writeback_line_buffer_chroma_buffer_size = 9600, + .cursor_buffer_size = 8, + .cursor_chunk_size = 2, + .max_num_otg = 2, + .max_num_dpp = 4, + .max_num_wb = 0, + .max_dchub_pscl_bw_pix_per_clk = 4, + .max_pscl_lb_bw_pix_per_clk = 2, + .max_lb_vscl_bw_pix_per_clk = 4, + .max_vscl_hscl_bw_pix_per_clk = 4, + .max_hscl_ratio = 8, + .max_vscl_ratio = 8, + .hscl_mults = 4, + .vscl_mults = 4, + .max_hscl_taps = 8, + .max_vscl_taps = 8, + .dispclk_ramp_margin_percent = 1, + .underscan_factor = 1.10, + .min_vblank_lines = 30, + .dppclk_delay_subtotal = 77, + .dppclk_delay_scl_lb_only = 16, + .dppclk_delay_scl = 50, + .dppclk_delay_cnvc_formatter = 8, + .dppclk_delay_cnvc_cursor = 6, + .dispclk_delay_subtotal = 87, + .dcfclk_cstate_latency = 10, + .max_inter_dcn_tile_repeaters = 8, + .number_of_cursors = 1, +}; + +static struct _vcs_dpi_soc_bounding_box_st dcn201_soc = { + .clock_limits = { + { + .state = 0, + .dscclk_mhz = 400.0, + .dcfclk_mhz = 1000.0, + .fabricclk_mhz = 200.0, + .dispclk_mhz = 300.0, + .dppclk_mhz = 300.0, + .phyclk_mhz = 810.0, + .socclk_mhz = 1254.0, + .dram_speed_mts = 2000.0, + }, + { + .state = 1, + .dscclk_mhz = 400.0, + .dcfclk_mhz = 1000.0, + .fabricclk_mhz = 250.0, + .dispclk_mhz = 1200.0, + .dppclk_mhz = 1200.0, + .phyclk_mhz = 810.0, + .socclk_mhz = 1254.0, + .dram_speed_mts = 3600.0, + }, + { + .state = 2, + .dscclk_mhz = 400.0, + .dcfclk_mhz = 1000.0, + .fabricclk_mhz = 750.0, + .dispclk_mhz = 1200.0, + .dppclk_mhz = 1200.0, + .phyclk_mhz = 810.0, + .socclk_mhz = 1254.0, + .dram_speed_mts = 6800.0, + }, + { + .state = 3, + .dscclk_mhz = 400.0, + .dcfclk_mhz = 1000.0, + .fabricclk_mhz = 250.0, + .dispclk_mhz = 1200.0, + .dppclk_mhz = 1200.0, + .phyclk_mhz = 810.0, + .socclk_mhz = 1254.0, + .dram_speed_mts = 14000.0, + }, + { + .state = 4, + .dscclk_mhz = 400.0, + .dcfclk_mhz = 1000.0, + .fabricclk_mhz = 750.0, + .dispclk_mhz = 1200.0, + .dppclk_mhz = 1200.0, + .phyclk_mhz = 810.0, + .socclk_mhz = 1254.0, + .dram_speed_mts = 14000.0, + } + }, + .num_states = 4, + .sr_exit_time_us = 9.0, + .sr_enter_plus_exit_time_us = 11.0, + .urgent_latency_us = 4.0, + .urgent_latency_pixel_data_only_us = 4.0, + .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, + .urgent_latency_vm_data_only_us = 4.0, + .urgent_out_of_order_return_per_channel_pixel_only_bytes = 256, + .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 256, + .urgent_out_of_order_return_per_channel_vm_only_bytes = 256, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 80.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 80.0, + .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 80.0, + .max_avg_sdp_bw_use_normal_percent = 80.0, + .max_avg_dram_bw_use_normal_percent = 69.0, + .writeback_latency_us = 12.0, + .ideal_dram_bw_after_urgent_percent = 80.0, + .max_request_size_bytes = 256, + .dram_channel_width_bytes = 2, + .fabric_datapath_to_dcn_data_return_bytes = 64, + .dcn_downspread_percent = 0.3, + .downspread_percent = 0.3, + .dram_page_open_time_ns = 50.0, + .dram_rw_turnaround_time_ns = 17.5, + .dram_return_buffer_per_channel_bytes = 8192, + .round_trip_ping_latency_dcfclk_cycles = 128, + .urgent_out_of_order_return_per_channel_bytes = 256, + .channel_interleave_bytes = 256, + .num_banks = 8, + .num_chans = 16, + .vmm_page_size_bytes = 4096, + .dram_clock_change_latency_us = 250.0, + .writeback_dram_clock_change_latency_us = 23.0, + .return_bus_width_bytes = 64, + .dispclk_dppclk_vco_speed_mhz = 3000, + .use_urgent_burst_bw = 0, +}; + +enum dcn20_clk_src_array_id { + DCN20_CLK_SRC_PLL0, + DCN20_CLK_SRC_PLL1, + DCN20_CLK_SRC_TOTAL_DCN201 +}; + +/* begin ********************* + * macros to expend register list macro defined in HW object header file */ + +/* DCN */ + +#undef BASE_INNER +#define BASE_INNER(seg) DMU_BASE__INST0_SEG ## seg + +#define BASE(seg) BASE_INNER(seg) + +#define SR(reg_name)\ + .reg_name = BASE(mm ## reg_name ## _BASE_IDX) + \ + mm ## reg_name + +#define SRI(reg_name, block, id)\ + .reg_name = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + mm ## block ## id ## _ ## reg_name + +#define SRIR(var_name, reg_name, block, id)\ + .var_name = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + mm ## block ## id ## _ ## reg_name + +#define SRII(reg_name, block, id)\ + .reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + mm ## block ## id ## _ ## reg_name + +#define SRI_IX(reg_name, block, id)\ + .reg_name = ix ## block ## id ## _ ## reg_name + +#define DCCG_SRII(reg_name, block, id)\ + .block ## _ ## reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + mm ## block ## id ## _ ## reg_name + +#define VUPDATE_SRII(reg_name, block, id)\ + .reg_name[id] = BASE(mm ## reg_name ## _ ## block ## id ## _BASE_IDX) + \ + mm ## reg_name ## _ ## block ## id + +/* NBIO */ +#define NBIO_BASE_INNER(seg) \ + NBIO_BASE__INST0_SEG ## seg + +#define NBIO_BASE(seg) \ + NBIO_BASE_INNER(seg) + +#define NBIO_SR(reg_name)\ + .reg_name = NBIO_BASE(mm ## reg_name ## _BASE_IDX) + \ + mm ## reg_name + +/* MMHUB */ +#define MMHUB_BASE_INNER(seg) \ + MMHUB_BASE__INST0_SEG ## seg + +#define MMHUB_BASE(seg) \ + MMHUB_BASE_INNER(seg) + +#define MMHUB_SR(reg_name)\ + .reg_name = MMHUB_BASE(mmMM ## reg_name ## _BASE_IDX) + \ + mmMM ## reg_name + +static const struct bios_registers bios_regs = { + NBIO_SR(BIOS_SCRATCH_3), + NBIO_SR(BIOS_SCRATCH_6) +}; + +#define clk_src_regs(index, pllid)\ +[index] = {\ + CS_COMMON_REG_LIST_DCN201(index, pllid),\ +} + +static const struct dce110_clk_src_regs clk_src_regs[] = { + clk_src_regs(0, A), + clk_src_regs(1, B) +}; + +static const struct dce110_clk_src_shift cs_shift = { + CS_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT) +}; + +static const struct dce110_clk_src_mask cs_mask = { + CS_COMMON_MASK_SH_LIST_DCN2_0(_MASK) +}; + +#define audio_regs(id)\ +[id] = {\ + AUD_COMMON_REG_LIST(id)\ +} + +static const struct dce_audio_registers audio_regs[] = { + audio_regs(0), + audio_regs(1), +}; + +#define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\ + SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_INDEX, AZALIA_ENDPOINT_REG_INDEX, mask_sh),\ + SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_DATA, AZALIA_ENDPOINT_REG_DATA, mask_sh),\ + AUD_COMMON_MASK_SH_LIST_BASE(mask_sh) + +static const struct dce_audio_shift audio_shift = { + DCE120_AUD_COMMON_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_audio_mask audio_mask = { + DCE120_AUD_COMMON_MASK_SH_LIST(_MASK) +}; + +#define stream_enc_regs(id)\ +[id] = {\ + SE_DCN2_REG_LIST(id)\ +} + +static const struct dcn10_stream_enc_registers stream_enc_regs[] = { + stream_enc_regs(0), + stream_enc_regs(1) +}; + +static const struct dcn10_stream_encoder_shift se_shift = { + SE_COMMON_MASK_SH_LIST_DCN20(__SHIFT) +}; + +static const struct dcn10_stream_encoder_mask se_mask = { + SE_COMMON_MASK_SH_LIST_DCN20(_MASK) +}; + +static const struct dce110_aux_registers_shift aux_shift = { + DCN_AUX_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce110_aux_registers_mask aux_mask = { + DCN_AUX_MASK_SH_LIST(_MASK) +}; + +#define aux_regs(id)\ +[id] = {\ + DCN2_AUX_REG_LIST(id)\ +} + +static const struct dcn10_link_enc_aux_registers link_enc_aux_regs[] = { + aux_regs(0), + aux_regs(1), +}; + +#define hpd_regs(id)\ +[id] = {\ + HPD_REG_LIST(id)\ +} + +static const struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[] = { + hpd_regs(0), + hpd_regs(1), +}; + +#define link_regs(id, phyid)\ +[id] = {\ + LE_DCN_COMMON_REG_LIST(id), \ + UNIPHY_DCN2_REG_LIST(phyid) \ +} + +static const struct dcn10_link_enc_registers link_enc_regs[] = { + link_regs(0, A), + link_regs(1, B), +}; + +#define LINK_ENCODER_MASK_SH_LIST_DCN201(mask_sh)\ + LINK_ENCODER_MASK_SH_LIST_DCN20(mask_sh) + +static const struct dcn10_link_enc_shift le_shift = { + LINK_ENCODER_MASK_SH_LIST_DCN201(__SHIFT) +}; + +static const struct dcn10_link_enc_mask le_mask = { + LINK_ENCODER_MASK_SH_LIST_DCN201(_MASK) +}; + +#define ipp_regs(id)\ +[id] = {\ + IPP_REG_LIST_DCN201(id),\ +} + +static const struct dcn10_ipp_registers ipp_regs[] = { + ipp_regs(0), + ipp_regs(1), + ipp_regs(2), + ipp_regs(3), +}; + +static const struct dcn10_ipp_shift ipp_shift = { + IPP_MASK_SH_LIST_DCN201(__SHIFT) +}; + +static const struct dcn10_ipp_mask ipp_mask = { + IPP_MASK_SH_LIST_DCN201(_MASK) +}; + +#define opp_regs(id)\ +[id] = {\ + OPP_REG_LIST_DCN201(id),\ +} + +static const struct dcn201_opp_registers opp_regs[] = { + opp_regs(0), + opp_regs(1), +}; + +static const struct dcn201_opp_shift opp_shift = { + OPP_MASK_SH_LIST_DCN201(__SHIFT) +}; + +static const struct dcn201_opp_mask opp_mask = { + OPP_MASK_SH_LIST_DCN201(_MASK) +}; + +#define aux_engine_regs(id)\ +[id] = {\ + AUX_COMMON_REG_LIST0(id), \ + .AUX_RESET_MASK = 0 \ +} + +static const struct dce110_aux_registers aux_engine_regs[] = { + aux_engine_regs(0), + aux_engine_regs(1) +}; + +#define tf_regs(id)\ +[id] = {\ + TF_REG_LIST_DCN201(id),\ +} + +static const struct dcn201_dpp_registers tf_regs[] = { + tf_regs(0), + tf_regs(1), + tf_regs(2), + tf_regs(3), +}; + +static const struct dcn201_dpp_shift tf_shift = { + TF_REG_LIST_SH_MASK_DCN201(__SHIFT) +}; + +static const struct dcn201_dpp_mask tf_mask = { + TF_REG_LIST_SH_MASK_DCN201(_MASK) +}; + +static const struct dcn201_mpc_registers mpc_regs = { + MPC_REG_LIST_DCN201(0), + MPC_REG_LIST_DCN201(1), + MPC_REG_LIST_DCN201(2), + MPC_REG_LIST_DCN201(3), + MPC_REG_LIST_DCN201(4), + MPC_OUT_MUX_REG_LIST_DCN201(0), + MPC_OUT_MUX_REG_LIST_DCN201(1), +}; + +static const struct dcn201_mpc_shift mpc_shift = { + MPC_COMMON_MASK_SH_LIST_DCN201(__SHIFT) +}; + +static const struct dcn201_mpc_mask mpc_mask = { + MPC_COMMON_MASK_SH_LIST_DCN201(_MASK) +}; + +#define tg_regs_dcn201(id)\ +[id] = {TG_COMMON_REG_LIST_DCN201(id)} + +static const struct dcn_optc_registers tg_regs[] = { + tg_regs_dcn201(0), + tg_regs_dcn201(1) +}; + +static const struct dcn_optc_shift tg_shift = { + TG_COMMON_MASK_SH_LIST_DCN201(__SHIFT) +}; + +static const struct dcn_optc_mask tg_mask = { + TG_COMMON_MASK_SH_LIST_DCN201(_MASK) +}; + +#define hubp_regsDCN201(id)\ +[id] = {\ + HUBP_REG_LIST_DCN201(id)\ +} + +static const struct dcn201_hubp_registers hubp_regs[] = { + hubp_regsDCN201(0), + hubp_regsDCN201(1), + hubp_regsDCN201(2), + hubp_regsDCN201(3) +}; + +static const struct dcn201_hubp_shift hubp_shift = { + HUBP_MASK_SH_LIST_DCN201(__SHIFT) +}; + +static const struct dcn201_hubp_mask hubp_mask = { + HUBP_MASK_SH_LIST_DCN201(_MASK) +}; + +static const struct dcn_hubbub_registers hubbub_reg = { + HUBBUB_REG_LIST_DCN201(0) +}; + +static const struct dcn_hubbub_shift hubbub_shift = { + HUBBUB_MASK_SH_LIST_DCN201(__SHIFT) +}; + +static const struct dcn_hubbub_mask hubbub_mask = { + HUBBUB_MASK_SH_LIST_DCN201(_MASK) +}; + + +static const struct dccg_registers dccg_regs = { + DCCG_COMMON_REG_LIST_DCN_BASE() +}; + +static const struct dccg_shift dccg_shift = { + DCCG_COMMON_MASK_SH_LIST_DCN_COMMON_BASE(__SHIFT) +}; + +static const struct dccg_mask dccg_mask = { + DCCG_COMMON_MASK_SH_LIST_DCN_COMMON_BASE(_MASK) +}; + +static const struct resource_caps res_cap_dnc201 = { + .num_timing_generator = 2, + .num_opp = 2, + .num_video_plane = 4, + .num_audio = 2, + .num_stream_encoder = 2, + .num_pll = 2, + .num_ddc = 2, +}; + +static const struct dc_plane_cap plane_cap = { + .type = DC_PLANE_TYPE_DCN_UNIVERSAL, + .per_pixel_alpha = true, + + .pixel_format_support = { + .argb8888 = true, + .nv12 = false, + .fp16 = true, + .p010 = false, + }, + + .max_upscale_factor = { + .argb8888 = 16000, + .nv12 = 16000, + .fp16 = 1 + }, + + .max_downscale_factor = { + .argb8888 = 250, + .nv12 = 250, + .fp16 = 250 + }, + 64, + 64 +}; + +static const struct dc_debug_options debug_defaults_drv = { + .disable_dmcu = true, + .force_abm_enable = false, + .timing_trace = false, + .clock_trace = true, + .disable_pplib_clock_request = true, + .pipe_split_policy = MPC_SPLIT_DYNAMIC, + .force_single_disp_pipe_split = false, + .disable_dcc = DCC_ENABLE, + .vsr_support = true, + .performance_trace = false, + .az_endpoint_mute_only = true, + .max_downscale_src_width = 3840, + .disable_pplib_wm_range = true, + .scl_reset_length10 = true, + .sanity_checks = false, + .underflow_assert_delay_us = 0xFFFFFFFF, + .enable_tri_buf = false, + .enable_legacy_fast_update = true, + .using_dml2 = false, +}; + +static void dcn201_dpp_destroy(struct dpp **dpp) +{ + kfree(TO_DCN201_DPP(*dpp)); + *dpp = NULL; +} + +static struct dpp *dcn201_dpp_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn201_dpp *dpp = + kzalloc(sizeof(struct dcn201_dpp), GFP_ATOMIC); + + if (!dpp) + return NULL; + + if (dpp201_construct(dpp, ctx, inst, + &tf_regs[inst], &tf_shift, &tf_mask)) + return &dpp->base; + + kfree(dpp); + return NULL; +} + +static struct input_pixel_processor *dcn201_ipp_create( + struct dc_context *ctx, uint32_t inst) +{ + struct dcn10_ipp *ipp = + kzalloc(sizeof(struct dcn10_ipp), GFP_ATOMIC); + + if (!ipp) { + return NULL; + } + + dcn20_ipp_construct(ipp, ctx, inst, + &ipp_regs[inst], &ipp_shift, &ipp_mask); + return &ipp->base; +} + + +static struct output_pixel_processor *dcn201_opp_create( + struct dc_context *ctx, uint32_t inst) +{ + struct dcn201_opp *opp = + kzalloc(sizeof(struct dcn201_opp), GFP_ATOMIC); + + if (!opp) { + return NULL; + } + + dcn201_opp_construct(opp, ctx, inst, + &opp_regs[inst], &opp_shift, &opp_mask); + return &opp->base; +} + +static struct dce_aux *dcn201_aux_engine_create(struct dc_context *ctx, + uint32_t inst) +{ + struct aux_engine_dce110 *aux_engine = + kzalloc(sizeof(struct aux_engine_dce110), GFP_ATOMIC); + + if (!aux_engine) + return NULL; + + dce110_aux_engine_construct(aux_engine, ctx, inst, + SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, + &aux_engine_regs[inst], + &aux_mask, + &aux_shift, + ctx->dc->caps.extended_aux_timeout_support); + + return &aux_engine->base; +} +#define i2c_inst_regs(id) { I2C_HW_ENGINE_COMMON_REG_LIST(id) } + +static const struct dce_i2c_registers i2c_hw_regs[] = { + i2c_inst_regs(1), + i2c_inst_regs(2), +}; + +static const struct dce_i2c_shift i2c_shifts = { + I2C_COMMON_MASK_SH_LIST_DCN2(__SHIFT) +}; + +static const struct dce_i2c_mask i2c_masks = { + I2C_COMMON_MASK_SH_LIST_DCN2(_MASK) +}; + +static struct dce_i2c_hw *dcn201_i2c_hw_create(struct dc_context *ctx, + uint32_t inst) +{ + struct dce_i2c_hw *dce_i2c_hw = + kzalloc(sizeof(struct dce_i2c_hw), GFP_ATOMIC); + + if (!dce_i2c_hw) + return NULL; + + dcn2_i2c_hw_construct(dce_i2c_hw, ctx, inst, + &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks); + + return dce_i2c_hw; +} + +static struct mpc *dcn201_mpc_create(struct dc_context *ctx, uint32_t num_mpcc) +{ + struct dcn201_mpc *mpc201 = kzalloc(sizeof(struct dcn201_mpc), + GFP_ATOMIC); + + if (!mpc201) + return NULL; + + dcn201_mpc_construct(mpc201, ctx, + &mpc_regs, + &mpc_shift, + &mpc_mask, + num_mpcc); + + return &mpc201->base; +} + +static struct hubbub *dcn201_hubbub_create(struct dc_context *ctx) +{ + struct dcn20_hubbub *hubbub = kzalloc(sizeof(struct dcn20_hubbub), + GFP_ATOMIC); + + if (!hubbub) + return NULL; + + hubbub201_construct(hubbub, ctx, + &hubbub_reg, + &hubbub_shift, + &hubbub_mask); + + return &hubbub->base; +} + +static struct timing_generator *dcn201_timing_generator_create( + struct dc_context *ctx, + uint32_t instance) +{ + struct optc *tgn10 = + kzalloc(sizeof(struct optc), GFP_ATOMIC); + + if (!tgn10) + return NULL; + + tgn10->base.inst = instance; + tgn10->base.ctx = ctx; + + tgn10->tg_regs = &tg_regs[instance]; + tgn10->tg_shift = &tg_shift; + tgn10->tg_mask = &tg_mask; + + dcn201_timing_generator_init(tgn10); + + return &tgn10->base; +} + +static const struct encoder_feature_support link_enc_feature = { + .max_hdmi_deep_color = COLOR_DEPTH_121212, + .max_hdmi_pixel_clock = 600000, + .hdmi_ycbcr420_supported = true, + .dp_ycbcr420_supported = true, + .fec_supported = true, + .flags.bits.IS_HBR2_CAPABLE = true, + .flags.bits.IS_HBR3_CAPABLE = true, + .flags.bits.IS_TPS3_CAPABLE = true, + .flags.bits.IS_TPS4_CAPABLE = true +}; + +static struct link_encoder *dcn201_link_encoder_create( + struct dc_context *ctx, + const struct encoder_init_data *enc_init_data) +{ + struct dcn20_link_encoder *enc20 = + kzalloc(sizeof(struct dcn20_link_encoder), GFP_ATOMIC); + struct dcn10_link_encoder *enc10 = &enc20->enc10; + + if (!enc20) + return NULL; + + dcn201_link_encoder_construct(enc20, + enc_init_data, + &link_enc_feature, + &link_enc_regs[enc_init_data->transmitter], + &link_enc_aux_regs[enc_init_data->channel - 1], + &link_enc_hpd_regs[enc_init_data->hpd_source], + &le_shift, + &le_mask); + + return &enc10->base; +} + +static struct clock_source *dcn201_clock_source_create( + struct dc_context *ctx, + struct dc_bios *bios, + enum clock_source_id id, + const struct dce110_clk_src_regs *regs, + bool dp_clk_src) +{ + struct dce110_clk_src *clk_src = + kzalloc(sizeof(struct dce110_clk_src), GFP_ATOMIC); + + if (!clk_src) + return NULL; + + if (dce112_clk_src_construct(clk_src, ctx, bios, id, + regs, &cs_shift, &cs_mask)) { + clk_src->base.dp_clk_src = dp_clk_src; + return &clk_src->base; + } + kfree(clk_src); + return NULL; +} + +static void read_dce_straps( + struct dc_context *ctx, + struct resource_straps *straps) +{ + generic_reg_get(ctx, mmDC_PINSTRAPS + BASE(mmDC_PINSTRAPS_BASE_IDX), + + FN(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO), &straps->dc_pinstraps_audio); +} + +static struct audio *dcn201_create_audio( + struct dc_context *ctx, unsigned int inst) +{ + return dce_audio_create(ctx, inst, + &audio_regs[inst], &audio_shift, &audio_mask); +} + +static struct stream_encoder *dcn201_stream_encoder_create( + enum engine_id eng_id, + struct dc_context *ctx) +{ + struct dcn10_stream_encoder *enc1 = + kzalloc(sizeof(struct dcn10_stream_encoder), GFP_ATOMIC); + + if (!enc1) + return NULL; + + dcn20_stream_encoder_construct(enc1, ctx, ctx->dc_bios, eng_id, + &stream_enc_regs[eng_id], + &se_shift, &se_mask); + + return &enc1->base; +} + +static const struct dce_hwseq_registers hwseq_reg = { + HWSEQ_DCN201_REG_LIST() +}; + +static const struct dce_hwseq_shift hwseq_shift = { + HWSEQ_DCN201_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_hwseq_mask hwseq_mask = { + HWSEQ_DCN201_MASK_SH_LIST(_MASK) +}; + +static struct dce_hwseq *dcn201_hwseq_create( + struct dc_context *ctx) +{ + struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_ATOMIC); + + if (hws) { + hws->ctx = ctx; + hws->regs = &hwseq_reg; + hws->shifts = &hwseq_shift; + hws->masks = &hwseq_mask; + } + return hws; +} + +static const struct resource_create_funcs res_create_funcs = { + .read_dce_straps = read_dce_straps, + .create_audio = dcn201_create_audio, + .create_stream_encoder = dcn201_stream_encoder_create, + .create_hwseq = dcn201_hwseq_create, +}; + +static void dcn201_clock_source_destroy(struct clock_source **clk_src) +{ + kfree(TO_DCE110_CLK_SRC(*clk_src)); + *clk_src = NULL; +} + +static void dcn201_resource_destruct(struct dcn201_resource_pool *pool) +{ + unsigned int i; + + for (i = 0; i < pool->base.stream_enc_count; i++) { + if (pool->base.stream_enc[i] != NULL) { + kfree(DCN10STRENC_FROM_STRENC(pool->base.stream_enc[i])); + pool->base.stream_enc[i] = NULL; + } + } + + + if (pool->base.mpc != NULL) { + kfree(TO_DCN201_MPC(pool->base.mpc)); + pool->base.mpc = NULL; + } + + if (pool->base.hubbub != NULL) { + kfree(pool->base.hubbub); + pool->base.hubbub = NULL; + } + + for (i = 0; i < pool->base.pipe_count; i++) { + if (pool->base.dpps[i] != NULL) + dcn201_dpp_destroy(&pool->base.dpps[i]); + + if (pool->base.ipps[i] != NULL) + pool->base.ipps[i]->funcs->ipp_destroy(&pool->base.ipps[i]); + + if (pool->base.hubps[i] != NULL) { + kfree(TO_DCN10_HUBP(pool->base.hubps[i])); + pool->base.hubps[i] = NULL; + } + + if (pool->base.irqs != NULL) { + dal_irq_service_destroy(&pool->base.irqs); + } + } + + for (i = 0; i < pool->base.res_cap->num_opp; i++) { + if (pool->base.opps[i] != NULL) + pool->base.opps[i]->funcs->opp_destroy(&pool->base.opps[i]); + } + + for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { + if (pool->base.timing_generators[i] != NULL) { + kfree(DCN10TG_FROM_TG(pool->base.timing_generators[i])); + pool->base.timing_generators[i] = NULL; + } + } + for (i = 0; i < pool->base.audio_count; i++) { + if (pool->base.audios[i]) + dce_aud_destroy(&pool->base.audios[i]); + } + + for (i = 0; i < pool->base.clk_src_count; i++) { + if (pool->base.clock_sources[i] != NULL) { + dcn201_clock_source_destroy(&pool->base.clock_sources[i]); + pool->base.clock_sources[i] = NULL; + } + } + + if (pool->base.dp_clock_source != NULL) { + dcn201_clock_source_destroy(&pool->base.dp_clock_source); + pool->base.dp_clock_source = NULL; + } + + if (pool->base.dccg != NULL) + dcn_dccg_destroy(&pool->base.dccg); +} + +static struct hubp *dcn201_hubp_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn201_hubp *hubp201 = + kzalloc(sizeof(struct dcn201_hubp), GFP_ATOMIC); + + if (!hubp201) + return NULL; + + if (dcn201_hubp_construct(hubp201, ctx, inst, + &hubp_regs[inst], &hubp_shift, &hubp_mask)) + return &hubp201->base; + + kfree(hubp201); + return NULL; +} + +static struct pipe_ctx *dcn201_acquire_free_pipe_for_layer( + const struct dc_state *cur_ctx, + struct dc_state *new_ctx, + const struct resource_pool *pool, + const struct pipe_ctx *opp_head_pipe) +{ + struct resource_context *res_ctx = &new_ctx->res_ctx; + struct pipe_ctx *head_pipe = resource_get_otg_master_for_stream(res_ctx, opp_head_pipe->stream); + struct pipe_ctx *idle_pipe = resource_find_free_secondary_pipe_legacy(res_ctx, pool, head_pipe); + + if (!head_pipe) + ASSERT(0); + + if (!idle_pipe) + return NULL; + + idle_pipe->stream = head_pipe->stream; + idle_pipe->stream_res.tg = head_pipe->stream_res.tg; + idle_pipe->stream_res.opp = head_pipe->stream_res.opp; + + idle_pipe->plane_res.hubp = pool->hubps[idle_pipe->pipe_idx]; + idle_pipe->plane_res.ipp = pool->ipps[idle_pipe->pipe_idx]; + idle_pipe->plane_res.dpp = pool->dpps[idle_pipe->pipe_idx]; + idle_pipe->plane_res.mpcc_inst = pool->dpps[idle_pipe->pipe_idx]->inst; + + return idle_pipe; +} + +static bool dcn201_get_dcc_compression_cap(const struct dc *dc, + const struct dc_dcc_surface_param *input, + struct dc_surface_dcc_cap *output) +{ + return dc->res_pool->hubbub->funcs->get_dcc_compression_cap( + dc->res_pool->hubbub, + input, + output); +} + +static void dcn201_populate_dml_writeback_from_context(struct dc *dc, + struct resource_context *res_ctx, + display_e2e_pipe_params_st *pipes) +{ + DC_FP_START(); + dcn201_populate_dml_writeback_from_context_fpu(dc, res_ctx, pipes); + DC_FP_END(); +} + +static void dcn201_destroy_resource_pool(struct resource_pool **pool) +{ + struct dcn201_resource_pool *dcn201_pool = TO_DCN201_RES_POOL(*pool); + + dcn201_resource_destruct(dcn201_pool); + kfree(dcn201_pool); + *pool = NULL; +} + +static void dcn201_link_init(struct dc_link *link) +{ + if (link->ctx->dc_bios->integrated_info) + link->dp_ss_off = !link->ctx->dc_bios->integrated_info->dp_ss_control; +} + +static struct dc_cap_funcs cap_funcs = { + .get_dcc_compression_cap = dcn201_get_dcc_compression_cap, +}; + +static struct resource_funcs dcn201_res_pool_funcs = { + .link_init = dcn201_link_init, + .destroy = dcn201_destroy_resource_pool, + .link_enc_create = dcn201_link_encoder_create, + .panel_cntl_create = NULL, + .validate_bandwidth = dcn20_validate_bandwidth, + .populate_dml_pipes = dcn20_populate_dml_pipes_from_context, + .add_stream_to_ctx = dcn20_add_stream_to_ctx, + .add_dsc_to_stream_resource = NULL, + .remove_stream_from_ctx = dcn20_remove_stream_from_ctx, + .acquire_free_pipe_as_secondary_dpp_pipe = dcn201_acquire_free_pipe_for_layer, + .release_pipe = dcn20_release_pipe, + .populate_dml_writeback_from_context = dcn201_populate_dml_writeback_from_context, + .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, + .set_mcif_arb_params = dcn20_set_mcif_arb_params, + .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link +}; + +static bool dcn201_resource_construct( + uint8_t num_virtual_links, + struct dc *dc, + struct dcn201_resource_pool *pool) +{ + int i; + struct dc_context *ctx = dc->ctx; + + ctx->dc_bios->regs = &bios_regs; + + pool->base.res_cap = &res_cap_dnc201; + pool->base.funcs = &dcn201_res_pool_funcs; + + /************************************************* + * Resource + asic cap harcoding * + *************************************************/ + pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE; + + pool->base.pipe_count = 4; + pool->base.mpcc_count = 5; + dc->caps.max_downscale_ratio = 200; + dc->caps.i2c_speed_in_khz = 100; + dc->caps.i2c_speed_in_khz_hdcp = 5; /*1.5 w/a applied by default*/ + dc->caps.max_cursor_size = 256; + dc->caps.min_horizontal_blanking_period = 80; + dc->caps.dmdata_alloc_size = 2048; + + dc->caps.max_slave_planes = 1; + dc->caps.max_slave_yuv_planes = 1; + dc->caps.max_slave_rgb_planes = 1; + dc->caps.post_blend_color_processing = true; + dc->caps.force_dp_tps4_for_cp2520 = true; + dc->caps.extended_aux_timeout_support = true; + + /* Color pipeline capabilities */ + dc->caps.color.dpp.dcn_arch = 1; + dc->caps.color.dpp.input_lut_shared = 0; + dc->caps.color.dpp.icsc = 1; + dc->caps.color.dpp.dgam_ram = 1; + dc->caps.color.dpp.dgam_rom_caps.srgb = 1; + dc->caps.color.dpp.dgam_rom_caps.bt2020 = 1; + dc->caps.color.dpp.dgam_rom_caps.gamma2_2 = 0; + dc->caps.color.dpp.dgam_rom_caps.pq = 0; + dc->caps.color.dpp.dgam_rom_caps.hlg = 0; + dc->caps.color.dpp.post_csc = 0; + dc->caps.color.dpp.gamma_corr = 0; + dc->caps.color.dpp.dgam_rom_for_yuv = 1; + + dc->caps.color.dpp.hw_3d_lut = 1; + dc->caps.color.dpp.ogam_ram = 1; + // no OGAM ROM on DCN2 + dc->caps.color.dpp.ogam_rom_caps.srgb = 0; + dc->caps.color.dpp.ogam_rom_caps.bt2020 = 0; + dc->caps.color.dpp.ogam_rom_caps.gamma2_2 = 0; + dc->caps.color.dpp.ogam_rom_caps.pq = 0; + dc->caps.color.dpp.ogam_rom_caps.hlg = 0; + dc->caps.color.dpp.ocsc = 0; + + dc->caps.color.mpc.gamut_remap = 0; + dc->caps.color.mpc.num_3dluts = 0; + dc->caps.color.mpc.shared_3d_lut = 0; + dc->caps.color.mpc.ogam_ram = 1; + dc->caps.color.mpc.ogam_rom_caps.srgb = 0; + dc->caps.color.mpc.ogam_rom_caps.bt2020 = 0; + dc->caps.color.mpc.ogam_rom_caps.gamma2_2 = 0; + dc->caps.color.mpc.ogam_rom_caps.pq = 0; + dc->caps.color.mpc.ogam_rom_caps.hlg = 0; + dc->caps.color.mpc.ocsc = 1; + + dc->debug = debug_defaults_drv; + + /*a0 only, remove later*/ + dc->work_arounds.no_connect_phy_config = true; + dc->work_arounds.dedcn20_305_wa = true; + /************************************************* + * Create resources * + *************************************************/ + + pool->base.clock_sources[DCN20_CLK_SRC_PLL0] = + dcn201_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL0, + &clk_src_regs[0], false); + pool->base.clock_sources[DCN20_CLK_SRC_PLL1] = + dcn201_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL1, + &clk_src_regs[1], false); + + pool->base.clk_src_count = DCN20_CLK_SRC_TOTAL_DCN201; + + /* todo: not reuse phy_pll registers */ + pool->base.dp_clock_source = + dcn201_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_ID_DP_DTO, + &clk_src_regs[0], true); + + for (i = 0; i < pool->base.clk_src_count; i++) { + if (pool->base.clock_sources[i] == NULL) { + dm_error("DC: failed to create clock sources!\n"); + goto create_fail; + } + } + + pool->base.dccg = dccg201_create(ctx, &dccg_regs, &dccg_shift, &dccg_mask); + if (pool->base.dccg == NULL) { + dm_error("DC: failed to create dccg!\n"); + goto create_fail; + } + + dcn201_ip.max_num_otg = pool->base.res_cap->num_timing_generator; + dcn201_ip.max_num_dpp = pool->base.pipe_count; + dml_init_instance(&dc->dml, &dcn201_soc, &dcn201_ip, DML_PROJECT_DCN201); + { + struct irq_service_init_data init_data; + init_data.ctx = dc->ctx; + pool->base.irqs = dal_irq_service_dcn201_create(&init_data); + if (!pool->base.irqs) + goto create_fail; + } + + /* mem input -> ipp -> dpp -> opp -> TG */ + for (i = 0; i < pool->base.pipe_count; i++) { + pool->base.hubps[i] = dcn201_hubp_create(ctx, i); + if (pool->base.hubps[i] == NULL) { + dm_error( + "DC: failed to create memory input!\n"); + goto create_fail; + } + + pool->base.ipps[i] = dcn201_ipp_create(ctx, i); + if (pool->base.ipps[i] == NULL) { + dm_error( + "DC: failed to create input pixel processor!\n"); + goto create_fail; + } + + pool->base.dpps[i] = dcn201_dpp_create(ctx, i); + if (pool->base.dpps[i] == NULL) { + dm_error( + "DC: failed to create dpps!\n"); + goto create_fail; + } + } + + for (i = 0; i < pool->base.res_cap->num_opp; i++) { + pool->base.opps[i] = dcn201_opp_create(ctx, i); + if (pool->base.opps[i] == NULL) { + dm_error( + "DC: failed to create output pixel processor!\n"); + goto create_fail; + } + } + + for (i = 0; i < pool->base.res_cap->num_ddc; i++) { + pool->base.engines[i] = dcn201_aux_engine_create(ctx, i); + if (pool->base.engines[i] == NULL) { + dm_error( + "DC:failed to create aux engine!!\n"); + goto create_fail; + } + pool->base.hw_i2cs[i] = dcn201_i2c_hw_create(ctx, i); + if (pool->base.hw_i2cs[i] == NULL) { + dm_error( + "DC:failed to create hw i2c!!\n"); + goto create_fail; + } + pool->base.sw_i2cs[i] = NULL; + } + + for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { + pool->base.timing_generators[i] = dcn201_timing_generator_create( + ctx, i); + if (pool->base.timing_generators[i] == NULL) { + dm_error("DC: failed to create tg!\n"); + goto create_fail; + } + } + + pool->base.timing_generator_count = i; + + pool->base.mpc = dcn201_mpc_create(ctx, pool->base.mpcc_count); + if (pool->base.mpc == NULL) { + dm_error("DC: failed to create mpc!\n"); + goto create_fail; + } + + pool->base.hubbub = dcn201_hubbub_create(ctx); + if (pool->base.hubbub == NULL) { + dm_error("DC: failed to create hubbub!\n"); + goto create_fail; + } + + if (!resource_construct(num_virtual_links, dc, &pool->base, + &res_create_funcs)) + goto create_fail; + + dcn201_hw_sequencer_construct(dc); + + dc->caps.max_planes = pool->base.pipe_count; + + for (i = 0; i < dc->caps.max_planes; ++i) + dc->caps.planes[i] = plane_cap; + + dc->cap_funcs = cap_funcs; + + return true; + +create_fail: + + dcn201_resource_destruct(pool); + + return false; +} + +struct resource_pool *dcn201_create_resource_pool( + const struct dc_init_data *init_data, + struct dc *dc) +{ + struct dcn201_resource_pool *pool = + kzalloc(sizeof(struct dcn201_resource_pool), GFP_ATOMIC); + + if (!pool) + return NULL; + + if (dcn201_resource_construct(init_data->num_virtual_links, dc, pool)) + return &pool->base; + + kfree(pool); + return NULL; +} diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.h new file mode 100644 index 000000000000..e0467d17d4ae --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.h @@ -0,0 +1,50 @@ +/* +* Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DC_RESOURCE_DCN201_H__ +#define __DC_RESOURCE_DCN201_H__ + +#include "core_types.h" + +#define RRDPCS_PHY_DP_TX_PSTATE_POWER_UP 0x00000000 +#define RRDPCS_PHY_DP_TX_PSTATE_HOLD 0x00000001 +#define RRDPCS_PHY_DP_TX_PSTATE_HOLD_OFF 0x00000002 +#define RRDPCS_PHY_DP_TX_PSTATE_POWER_DOWN 0x00000003 + +#define TO_DCN201_RES_POOL(pool)\ + container_of(pool, struct dcn201_resource_pool, base) + +struct dc; +struct resource_pool; +struct _vcs_dpi_display_pipe_params_st; + +struct dcn201_resource_pool { + struct resource_pool base; +}; +struct resource_pool *dcn201_create_resource_pool( + const struct dc_init_data *init_data, + struct dc *dc); + +#endif /* __DC_RESOURCE_DCN201_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c new file mode 100644 index 000000000000..c07da45e1e2c --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c @@ -0,0 +1,1745 @@ +/* +* Copyright 2018 Advanced Micro Devices, Inc. + * Copyright 2019 Raptor Engineering, LLC + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include + +#include "dm_services.h" +#include "dc.h" + +#include "dcn21/dcn21_init.h" + +#include "resource.h" +#include "include/irq_service_interface.h" +#include "dcn20/dcn20_resource.h" +#include "dcn21/dcn21_resource.h" + +#include "dml/dcn20/dcn20_fpu.h" + +#include "clk_mgr.h" +#include "dcn10/dcn10_hubp.h" +#include "dcn10/dcn10_ipp.h" +#include "dcn20/dcn20_hubbub.h" +#include "dcn20/dcn20_mpc.h" +#include "dcn20/dcn20_hubp.h" +#include "dcn21/dcn21_hubp.h" +#include "irq/dcn21/irq_service_dcn21.h" +#include "dcn20/dcn20_dpp.h" +#include "dcn20/dcn20_optc.h" +#include "dcn21/dcn21_hwseq.h" +#include "dce110/dce110_hwseq.h" +#include "dcn20/dcn20_opp.h" +#include "dcn20/dcn20_dsc.h" +#include "dcn21/dcn21_link_encoder.h" +#include "dcn20/dcn20_stream_encoder.h" +#include "dce/dce_clock_source.h" +#include "dce/dce_audio.h" +#include "dce/dce_hwseq.h" +#include "virtual/virtual_stream_encoder.h" +#include "dml/display_mode_vba.h" +#include "dcn20/dcn20_dccg.h" +#include "dcn21/dcn21_dccg.h" +#include "dcn21/dcn21_hubbub.h" +#include "dcn10/dcn10_resource.h" +#include "dce/dce_panel_cntl.h" + +#include "dcn20/dcn20_dwb.h" +#include "dcn20/dcn20_mmhubbub.h" +#include "dpcs/dpcs_2_1_0_offset.h" +#include "dpcs/dpcs_2_1_0_sh_mask.h" + +#include "renoir_ip_offset.h" +#include "dcn/dcn_2_1_0_offset.h" +#include "dcn/dcn_2_1_0_sh_mask.h" + +#include "nbio/nbio_7_0_offset.h" + +#include "mmhub/mmhub_2_0_0_offset.h" +#include "mmhub/mmhub_2_0_0_sh_mask.h" + +#include "reg_helper.h" +#include "dce/dce_abm.h" +#include "dce/dce_dmcu.h" +#include "dce/dce_aux.h" +#include "dce/dce_i2c.h" +#include "dcn21_resource.h" +#include "vm_helper.h" +#include "dcn20/dcn20_vmid.h" +#include "dce/dmub_psr.h" +#include "dce/dmub_abm.h" + +/* begin ********************* + * macros to expend register list macro defined in HW object header file */ + +/* DCN */ +#define BASE_INNER(seg) DMU_BASE__INST0_SEG ## seg + +#define BASE(seg) BASE_INNER(seg) + +#define SR(reg_name)\ + .reg_name = BASE(mm ## reg_name ## _BASE_IDX) + \ + mm ## reg_name + +#define SRI(reg_name, block, id)\ + .reg_name = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + mm ## block ## id ## _ ## reg_name + +#define SRIR(var_name, reg_name, block, id)\ + .var_name = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + mm ## block ## id ## _ ## reg_name + +#define SRII(reg_name, block, id)\ + .reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + mm ## block ## id ## _ ## reg_name + +#define DCCG_SRII(reg_name, block, id)\ + .block ## _ ## reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + mm ## block ## id ## _ ## reg_name + +#define VUPDATE_SRII(reg_name, block, id)\ + .reg_name[id] = BASE(mm ## reg_name ## _ ## block ## id ## _BASE_IDX) + \ + mm ## reg_name ## _ ## block ## id + +/* NBIO */ +#define NBIO_BASE_INNER(seg) \ + NBIF0_BASE__INST0_SEG ## seg + +#define NBIO_BASE(seg) \ + NBIO_BASE_INNER(seg) + +#define NBIO_SR(reg_name)\ + .reg_name = NBIO_BASE(mm ## reg_name ## _BASE_IDX) + \ + mm ## reg_name + +/* MMHUB */ +#define MMHUB_BASE_INNER(seg) \ + MMHUB_BASE__INST0_SEG ## seg + +#define MMHUB_BASE(seg) \ + MMHUB_BASE_INNER(seg) + +#define MMHUB_SR(reg_name)\ + .reg_name = MMHUB_BASE(mmMM ## reg_name ## _BASE_IDX) + \ + mmMM ## reg_name + +#define clk_src_regs(index, pllid)\ +[index] = {\ + CS_COMMON_REG_LIST_DCN2_1(index, pllid),\ +} + +static const struct dce110_clk_src_regs clk_src_regs[] = { + clk_src_regs(0, A), + clk_src_regs(1, B), + clk_src_regs(2, C), + clk_src_regs(3, D), + clk_src_regs(4, E), +}; + +static const struct dce110_clk_src_shift cs_shift = { + CS_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT) +}; + +static const struct dce110_clk_src_mask cs_mask = { + CS_COMMON_MASK_SH_LIST_DCN2_0(_MASK) +}; + +static const struct bios_registers bios_regs = { + NBIO_SR(BIOS_SCRATCH_3), + NBIO_SR(BIOS_SCRATCH_6) +}; + +static const struct dce_dmcu_registers dmcu_regs = { + DMCU_DCN20_REG_LIST() +}; + +static const struct dce_dmcu_shift dmcu_shift = { + DMCU_MASK_SH_LIST_DCN10(__SHIFT) +}; + +static const struct dce_dmcu_mask dmcu_mask = { + DMCU_MASK_SH_LIST_DCN10(_MASK) +}; + +static const struct dce_abm_registers abm_regs = { + ABM_DCN20_REG_LIST() +}; + +static const struct dce_abm_shift abm_shift = { + ABM_MASK_SH_LIST_DCN20(__SHIFT) +}; + +static const struct dce_abm_mask abm_mask = { + ABM_MASK_SH_LIST_DCN20(_MASK) +}; + +#define audio_regs(id)\ +[id] = {\ + AUD_COMMON_REG_LIST(id)\ +} + +static const struct dce_audio_registers audio_regs[] = { + audio_regs(0), + audio_regs(1), + audio_regs(2), + audio_regs(3), + audio_regs(4), + audio_regs(5), +}; + +#define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\ + SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_INDEX, AZALIA_ENDPOINT_REG_INDEX, mask_sh),\ + SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_DATA, AZALIA_ENDPOINT_REG_DATA, mask_sh),\ + AUD_COMMON_MASK_SH_LIST_BASE(mask_sh) + +static const struct dce_audio_shift audio_shift = { + DCE120_AUD_COMMON_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_audio_mask audio_mask = { + DCE120_AUD_COMMON_MASK_SH_LIST(_MASK) +}; + +static const struct dccg_registers dccg_regs = { + DCCG_COMMON_REG_LIST_DCN_BASE() +}; + +static const struct dccg_shift dccg_shift = { + DCCG_MASK_SH_LIST_DCN2_1(__SHIFT) +}; + +static const struct dccg_mask dccg_mask = { + DCCG_MASK_SH_LIST_DCN2_1(_MASK) +}; + +#define opp_regs(id)\ +[id] = {\ + OPP_REG_LIST_DCN20(id),\ +} + +static const struct dcn20_opp_registers opp_regs[] = { + opp_regs(0), + opp_regs(1), + opp_regs(2), + opp_regs(3), + opp_regs(4), + opp_regs(5), +}; + +static const struct dcn20_opp_shift opp_shift = { + OPP_MASK_SH_LIST_DCN20(__SHIFT) +}; + +static const struct dcn20_opp_mask opp_mask = { + OPP_MASK_SH_LIST_DCN20(_MASK) +}; + +#define tg_regs(id)\ +[id] = {TG_COMMON_REG_LIST_DCN2_0(id)} + +static const struct dcn_optc_registers tg_regs[] = { + tg_regs(0), + tg_regs(1), + tg_regs(2), + tg_regs(3) +}; + +static const struct dcn_optc_shift tg_shift = { + TG_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT) +}; + +static const struct dcn_optc_mask tg_mask = { + TG_COMMON_MASK_SH_LIST_DCN2_0(_MASK) +}; + +static const struct dcn20_mpc_registers mpc_regs = { + MPC_REG_LIST_DCN2_0(0), + MPC_REG_LIST_DCN2_0(1), + MPC_REG_LIST_DCN2_0(2), + MPC_REG_LIST_DCN2_0(3), + MPC_REG_LIST_DCN2_0(4), + MPC_REG_LIST_DCN2_0(5), + MPC_OUT_MUX_REG_LIST_DCN2_0(0), + MPC_OUT_MUX_REG_LIST_DCN2_0(1), + MPC_OUT_MUX_REG_LIST_DCN2_0(2), + MPC_OUT_MUX_REG_LIST_DCN2_0(3), + MPC_DBG_REG_LIST_DCN2_0() +}; + +static const struct dcn20_mpc_shift mpc_shift = { + MPC_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT), + MPC_DEBUG_REG_LIST_SH_DCN20 +}; + +static const struct dcn20_mpc_mask mpc_mask = { + MPC_COMMON_MASK_SH_LIST_DCN2_0(_MASK), + MPC_DEBUG_REG_LIST_MASK_DCN20 +}; + +#define hubp_regs(id)\ +[id] = {\ + HUBP_REG_LIST_DCN21(id)\ +} + +static const struct dcn_hubp2_registers hubp_regs[] = { + hubp_regs(0), + hubp_regs(1), + hubp_regs(2), + hubp_regs(3) +}; + +static const struct dcn_hubp2_shift hubp_shift = { + HUBP_MASK_SH_LIST_DCN21(__SHIFT) +}; + +static const struct dcn_hubp2_mask hubp_mask = { + HUBP_MASK_SH_LIST_DCN21(_MASK) +}; + +static const struct dcn_hubbub_registers hubbub_reg = { + HUBBUB_REG_LIST_DCN21() +}; + +static const struct dcn_hubbub_shift hubbub_shift = { + HUBBUB_MASK_SH_LIST_DCN21(__SHIFT) +}; + +static const struct dcn_hubbub_mask hubbub_mask = { + HUBBUB_MASK_SH_LIST_DCN21(_MASK) +}; + + +#define vmid_regs(id)\ +[id] = {\ + DCN20_VMID_REG_LIST(id)\ +} + +static const struct dcn_vmid_registers vmid_regs[] = { + vmid_regs(0), + vmid_regs(1), + vmid_regs(2), + vmid_regs(3), + vmid_regs(4), + vmid_regs(5), + vmid_regs(6), + vmid_regs(7), + vmid_regs(8), + vmid_regs(9), + vmid_regs(10), + vmid_regs(11), + vmid_regs(12), + vmid_regs(13), + vmid_regs(14), + vmid_regs(15) +}; + +static const struct dcn20_vmid_shift vmid_shifts = { + DCN20_VMID_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn20_vmid_mask vmid_masks = { + DCN20_VMID_MASK_SH_LIST(_MASK) +}; + +#define dsc_regsDCN20(id)\ +[id] = {\ + DSC_REG_LIST_DCN20(id)\ +} + +static const struct dcn20_dsc_registers dsc_regs[] = { + dsc_regsDCN20(0), + dsc_regsDCN20(1), + dsc_regsDCN20(2), + dsc_regsDCN20(3), + dsc_regsDCN20(4), + dsc_regsDCN20(5) +}; + +static const struct dcn20_dsc_shift dsc_shift = { + DSC_REG_LIST_SH_MASK_DCN20(__SHIFT) +}; + +static const struct dcn20_dsc_mask dsc_mask = { + DSC_REG_LIST_SH_MASK_DCN20(_MASK) +}; + +#define ipp_regs(id)\ +[id] = {\ + IPP_REG_LIST_DCN20(id),\ +} + +static const struct dcn10_ipp_registers ipp_regs[] = { + ipp_regs(0), + ipp_regs(1), + ipp_regs(2), + ipp_regs(3), +}; + +static const struct dcn10_ipp_shift ipp_shift = { + IPP_MASK_SH_LIST_DCN20(__SHIFT) +}; + +static const struct dcn10_ipp_mask ipp_mask = { + IPP_MASK_SH_LIST_DCN20(_MASK), +}; + +#define opp_regs(id)\ +[id] = {\ + OPP_REG_LIST_DCN20(id),\ +} + + +#define aux_engine_regs(id)\ +[id] = {\ + AUX_COMMON_REG_LIST0(id), \ + .AUXN_IMPCAL = 0, \ + .AUXP_IMPCAL = 0, \ + .AUX_RESET_MASK = DP_AUX0_AUX_CONTROL__AUX_RESET_MASK, \ +} + +static const struct dce110_aux_registers aux_engine_regs[] = { + aux_engine_regs(0), + aux_engine_regs(1), + aux_engine_regs(2), + aux_engine_regs(3), + aux_engine_regs(4), +}; + +#define tf_regs(id)\ +[id] = {\ + TF_REG_LIST_DCN20(id),\ + TF_REG_LIST_DCN20_COMMON_APPEND(id),\ +} + +static const struct dcn2_dpp_registers tf_regs[] = { + tf_regs(0), + tf_regs(1), + tf_regs(2), + tf_regs(3), +}; + +static const struct dcn2_dpp_shift tf_shift = { + TF_REG_LIST_SH_MASK_DCN20(__SHIFT), + TF_DEBUG_REG_LIST_SH_DCN20 +}; + +static const struct dcn2_dpp_mask tf_mask = { + TF_REG_LIST_SH_MASK_DCN20(_MASK), + TF_DEBUG_REG_LIST_MASK_DCN20 +}; + +#define stream_enc_regs(id)\ +[id] = {\ + SE_DCN2_REG_LIST(id)\ +} + +static const struct dcn10_stream_enc_registers stream_enc_regs[] = { + stream_enc_regs(0), + stream_enc_regs(1), + stream_enc_regs(2), + stream_enc_regs(3), + stream_enc_regs(4), +}; + +static const struct dce110_aux_registers_shift aux_shift = { + DCN_AUX_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce110_aux_registers_mask aux_mask = { + DCN_AUX_MASK_SH_LIST(_MASK) +}; + +static const struct dcn10_stream_encoder_shift se_shift = { + SE_COMMON_MASK_SH_LIST_DCN20(__SHIFT) +}; + +static const struct dcn10_stream_encoder_mask se_mask = { + SE_COMMON_MASK_SH_LIST_DCN20(_MASK) +}; + +static void dcn21_pp_smu_destroy(struct pp_smu_funcs **pp_smu); + +static struct input_pixel_processor *dcn21_ipp_create( + struct dc_context *ctx, uint32_t inst) +{ + struct dcn10_ipp *ipp = + kzalloc(sizeof(struct dcn10_ipp), GFP_KERNEL); + + if (!ipp) { + BREAK_TO_DEBUGGER(); + return NULL; + } + + dcn20_ipp_construct(ipp, ctx, inst, + &ipp_regs[inst], &ipp_shift, &ipp_mask); + return &ipp->base; +} + +static struct dpp *dcn21_dpp_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn20_dpp *dpp = + kzalloc(sizeof(struct dcn20_dpp), GFP_KERNEL); + + if (!dpp) + return NULL; + + if (dpp2_construct(dpp, ctx, inst, + &tf_regs[inst], &tf_shift, &tf_mask)) + return &dpp->base; + + BREAK_TO_DEBUGGER(); + kfree(dpp); + return NULL; +} + +static struct dce_aux *dcn21_aux_engine_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct aux_engine_dce110 *aux_engine = + kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL); + + if (!aux_engine) + return NULL; + + dce110_aux_engine_construct(aux_engine, ctx, inst, + SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, + &aux_engine_regs[inst], + &aux_mask, + &aux_shift, + ctx->dc->caps.extended_aux_timeout_support); + + return &aux_engine->base; +} + +#define i2c_inst_regs(id) { I2C_HW_ENGINE_COMMON_REG_LIST(id) } + +static const struct dce_i2c_registers i2c_hw_regs[] = { + i2c_inst_regs(1), + i2c_inst_regs(2), + i2c_inst_regs(3), + i2c_inst_regs(4), + i2c_inst_regs(5), +}; + +static const struct dce_i2c_shift i2c_shifts = { + I2C_COMMON_MASK_SH_LIST_DCN2(__SHIFT) +}; + +static const struct dce_i2c_mask i2c_masks = { + I2C_COMMON_MASK_SH_LIST_DCN2(_MASK) +}; + +static struct dce_i2c_hw *dcn21_i2c_hw_create(struct dc_context *ctx, + uint32_t inst) +{ + struct dce_i2c_hw *dce_i2c_hw = + kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL); + + if (!dce_i2c_hw) + return NULL; + + dcn2_i2c_hw_construct(dce_i2c_hw, ctx, inst, + &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks); + + return dce_i2c_hw; +} + +static const struct resource_caps res_cap_rn = { + .num_timing_generator = 4, + .num_opp = 4, + .num_video_plane = 4, + .num_audio = 4, // 4 audio endpoints. 4 audio streams + .num_stream_encoder = 5, + .num_pll = 5, // maybe 3 because the last two used for USB-c + .num_dwb = 1, + .num_ddc = 5, + .num_vmid = 16, + .num_dsc = 3, +}; + +#ifdef DIAGS_BUILD +static const struct resource_caps res_cap_rn_FPGA_4pipe = { + .num_timing_generator = 4, + .num_opp = 4, + .num_video_plane = 4, + .num_audio = 7, + .num_stream_encoder = 4, + .num_pll = 4, + .num_dwb = 1, + .num_ddc = 4, + .num_dsc = 0, +}; + +static const struct resource_caps res_cap_rn_FPGA_2pipe_dsc = { + .num_timing_generator = 2, + .num_opp = 2, + .num_video_plane = 2, + .num_audio = 7, + .num_stream_encoder = 2, + .num_pll = 4, + .num_dwb = 1, + .num_ddc = 4, + .num_dsc = 2, +}; +#endif + +static const struct dc_plane_cap plane_cap = { + .type = DC_PLANE_TYPE_DCN_UNIVERSAL, + .per_pixel_alpha = true, + + .pixel_format_support = { + .argb8888 = true, + .nv12 = true, + .fp16 = true, + .p010 = true + }, + + .max_upscale_factor = { + .argb8888 = 16000, + .nv12 = 16000, + .fp16 = 16000 + }, + + .max_downscale_factor = { + .argb8888 = 250, + .nv12 = 250, + .fp16 = 250 + }, + 64, + 64 +}; + +static const struct dc_debug_options debug_defaults_drv = { + .disable_dmcu = false, + .force_abm_enable = false, + .timing_trace = false, + .clock_trace = true, + .disable_pplib_clock_request = true, + .min_disp_clk_khz = 100000, + .pipe_split_policy = MPC_SPLIT_DYNAMIC, + .force_single_disp_pipe_split = false, + .disable_dcc = DCC_ENABLE, + .vsr_support = true, + .performance_trace = false, + .max_downscale_src_width = 4096, + .disable_pplib_wm_range = false, + .scl_reset_length10 = true, + .sanity_checks = true, + .disable_48mhz_pwrdwn = false, + .usbc_combo_phy_reset_wa = true, + .dmub_command_table = true, + .use_max_lb = true, + .enable_legacy_fast_update = true, + .using_dml2 = false, +}; + +static const struct dc_panel_config panel_config_defaults = { + .psr = { + .disable_psr = false, + .disallow_psrsu = false, + .disallow_replay = false, + }, + .ilr = { + .optimize_edp_link_rate = true, + }, +}; + +enum dcn20_clk_src_array_id { + DCN20_CLK_SRC_PLL0, + DCN20_CLK_SRC_PLL1, + DCN20_CLK_SRC_PLL2, + DCN20_CLK_SRC_PLL3, + DCN20_CLK_SRC_PLL4, + DCN20_CLK_SRC_TOTAL_DCN21 +}; + +static void dcn21_resource_destruct(struct dcn21_resource_pool *pool) +{ + unsigned int i; + + for (i = 0; i < pool->base.stream_enc_count; i++) { + if (pool->base.stream_enc[i] != NULL) { + kfree(DCN10STRENC_FROM_STRENC(pool->base.stream_enc[i])); + pool->base.stream_enc[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_dsc; i++) { + if (pool->base.dscs[i] != NULL) + dcn20_dsc_destroy(&pool->base.dscs[i]); + } + + if (pool->base.mpc != NULL) { + kfree(TO_DCN20_MPC(pool->base.mpc)); + pool->base.mpc = NULL; + } + if (pool->base.hubbub != NULL) { + kfree(pool->base.hubbub); + pool->base.hubbub = NULL; + } + for (i = 0; i < pool->base.pipe_count; i++) { + if (pool->base.dpps[i] != NULL) + dcn20_dpp_destroy(&pool->base.dpps[i]); + + if (pool->base.ipps[i] != NULL) + pool->base.ipps[i]->funcs->ipp_destroy(&pool->base.ipps[i]); + + if (pool->base.hubps[i] != NULL) { + kfree(TO_DCN20_HUBP(pool->base.hubps[i])); + pool->base.hubps[i] = NULL; + } + + if (pool->base.irqs != NULL) { + dal_irq_service_destroy(&pool->base.irqs); + } + } + + for (i = 0; i < pool->base.res_cap->num_ddc; i++) { + if (pool->base.engines[i] != NULL) + dce110_engine_destroy(&pool->base.engines[i]); + if (pool->base.hw_i2cs[i] != NULL) { + kfree(pool->base.hw_i2cs[i]); + pool->base.hw_i2cs[i] = NULL; + } + if (pool->base.sw_i2cs[i] != NULL) { + kfree(pool->base.sw_i2cs[i]); + pool->base.sw_i2cs[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_opp; i++) { + if (pool->base.opps[i] != NULL) + pool->base.opps[i]->funcs->opp_destroy(&pool->base.opps[i]); + } + + for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { + if (pool->base.timing_generators[i] != NULL) { + kfree(DCN10TG_FROM_TG(pool->base.timing_generators[i])); + pool->base.timing_generators[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_dwb; i++) { + if (pool->base.dwbc[i] != NULL) { + kfree(TO_DCN20_DWBC(pool->base.dwbc[i])); + pool->base.dwbc[i] = NULL; + } + if (pool->base.mcif_wb[i] != NULL) { + kfree(TO_DCN20_MMHUBBUB(pool->base.mcif_wb[i])); + pool->base.mcif_wb[i] = NULL; + } + } + + for (i = 0; i < pool->base.audio_count; i++) { + if (pool->base.audios[i]) + dce_aud_destroy(&pool->base.audios[i]); + } + + for (i = 0; i < pool->base.clk_src_count; i++) { + if (pool->base.clock_sources[i] != NULL) { + dcn20_clock_source_destroy(&pool->base.clock_sources[i]); + pool->base.clock_sources[i] = NULL; + } + } + + if (pool->base.dp_clock_source != NULL) { + dcn20_clock_source_destroy(&pool->base.dp_clock_source); + pool->base.dp_clock_source = NULL; + } + + if (pool->base.abm != NULL) { + if (pool->base.abm->ctx->dc->config.disable_dmcu) + dmub_abm_destroy(&pool->base.abm); + else + dce_abm_destroy(&pool->base.abm); + } + + if (pool->base.dmcu != NULL) + dce_dmcu_destroy(&pool->base.dmcu); + + if (pool->base.psr != NULL) + dmub_psr_destroy(&pool->base.psr); + + if (pool->base.dccg != NULL) + dcn_dccg_destroy(&pool->base.dccg); + + if (pool->base.pp_smu != NULL) + dcn21_pp_smu_destroy(&pool->base.pp_smu); +} + +bool dcn21_fast_validate_bw(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int *pipe_cnt_out, + int *pipe_split_from, + int *vlevel_out, + bool fast_validate) +{ + bool out = false; + int split[MAX_PIPES] = { 0 }; + int pipe_cnt, i, pipe_idx, vlevel; + + ASSERT(pipes); + if (!pipes) + return false; + + dcn20_merge_pipes_for_validate(dc, context); + + DC_FP_START(); + pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate); + DC_FP_END(); + + *pipe_cnt_out = pipe_cnt; + + if (!pipe_cnt) { + out = true; + goto validate_out; + } + /* + * DML favors voltage over p-state, but we're more interested in + * supporting p-state over voltage. We can't support p-state in + * prefetch mode > 0 so try capping the prefetch mode to start. + */ + context->bw_ctx.dml.soc.allow_dram_self_refresh_or_dram_clock_change_in_vblank = + dm_allow_self_refresh_and_mclk_switch; + vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt); + + if (vlevel > context->bw_ctx.dml.soc.num_states) { + /* + * If mode is unsupported or there's still no p-state support then + * fall back to favoring voltage. + * + * We don't actually support prefetch mode 2, so require that we + * at least support prefetch mode 1. + */ + context->bw_ctx.dml.soc.allow_dram_self_refresh_or_dram_clock_change_in_vblank = + dm_allow_self_refresh; + vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt); + if (vlevel > context->bw_ctx.dml.soc.num_states) + goto validate_fail; + } + + vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, NULL); + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + struct pipe_ctx *mpo_pipe = pipe->bottom_pipe; + struct vba_vars_st *vba = &context->bw_ctx.dml.vba; + + if (!pipe->stream) + continue; + + /* We only support full screen mpo with ODM */ + if (vba->ODMCombineEnabled[vba->pipe_plane[pipe_idx]] != dm_odm_combine_mode_disabled + && pipe->plane_state && mpo_pipe + && memcmp(&mpo_pipe->plane_state->clip_rect, + &pipe->stream->src, + sizeof(struct rect)) != 0) { + ASSERT(mpo_pipe->plane_state != pipe->plane_state); + goto validate_fail; + } + pipe_idx++; + } + + /*initialize pipe_just_split_from to invalid idx*/ + for (i = 0; i < MAX_PIPES; i++) + pipe_split_from[i] = -1; + + for (i = 0, pipe_idx = -1; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + struct pipe_ctx *hsplit_pipe = pipe->bottom_pipe; + + if (!pipe->stream || pipe_split_from[i] >= 0) + continue; + + pipe_idx++; + + if (!pipe->top_pipe && !pipe->plane_state && context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_idx]) { + hsplit_pipe = dcn20_find_secondary_pipe(dc, &context->res_ctx, dc->res_pool, pipe); + ASSERT(hsplit_pipe); + if (!dcn20_split_stream_for_odm( + dc, &context->res_ctx, + pipe, hsplit_pipe)) + goto validate_fail; + pipe_split_from[hsplit_pipe->pipe_idx] = pipe_idx; + dcn20_build_mapped_resource(dc, context, pipe->stream); + } + + if (!pipe->plane_state) + continue; + /* Skip 2nd half of already split pipe */ + if (pipe->top_pipe && pipe->plane_state == pipe->top_pipe->plane_state) + continue; + + if (split[i] == 2) { + if (!hsplit_pipe || hsplit_pipe->plane_state != pipe->plane_state) { + /* pipe not split previously needs split */ + hsplit_pipe = dcn20_find_secondary_pipe(dc, &context->res_ctx, dc->res_pool, pipe); + ASSERT(hsplit_pipe); + if (!hsplit_pipe) { + DC_FP_START(); + dcn20_fpu_adjust_dppclk(&context->bw_ctx.dml.vba, vlevel, context->bw_ctx.dml.vba.maxMpcComb, pipe_idx, true); + DC_FP_END(); + continue; + } + if (context->bw_ctx.dml.vba.ODMCombineEnabled[pipe_idx]) { + if (!dcn20_split_stream_for_odm( + dc, &context->res_ctx, + pipe, hsplit_pipe)) + goto validate_fail; + dcn20_build_mapped_resource(dc, context, pipe->stream); + } else { + dcn20_split_stream_for_mpc( + &context->res_ctx, dc->res_pool, + pipe, hsplit_pipe); + resource_build_scaling_params(pipe); + resource_build_scaling_params(hsplit_pipe); + } + pipe_split_from[hsplit_pipe->pipe_idx] = pipe_idx; + } + } else if (hsplit_pipe && hsplit_pipe->plane_state == pipe->plane_state) { + /* merge should already have been done */ + ASSERT(0); + } + } + /* Actual dsc count per stream dsc validation*/ + if (!dcn20_validate_dsc(dc, context)) { + context->bw_ctx.dml.vba.ValidationStatus[context->bw_ctx.dml.vba.soc.num_states] = + DML_FAIL_DSC_VALIDATION_FAILURE; + goto validate_fail; + } + + *vlevel_out = vlevel; + + out = true; + goto validate_out; + +validate_fail: + out = false; + +validate_out: + return out; +} + +/* + * Some of the functions further below use the FPU, so we need to wrap this + * with DC_FP_START()/DC_FP_END(). Use the same approach as for + * dcn20_validate_bandwidth in dcn20_resource.c. + */ +static bool dcn21_validate_bandwidth(struct dc *dc, struct dc_state *context, + bool fast_validate) +{ + bool voltage_supported; + display_e2e_pipe_params_st *pipes; + + pipes = kcalloc(dc->res_pool->pipe_count, sizeof(display_e2e_pipe_params_st), GFP_KERNEL); + if (!pipes) + return false; + + DC_FP_START(); + voltage_supported = dcn21_validate_bandwidth_fp(dc, context, fast_validate, pipes); + DC_FP_END(); + + kfree(pipes); + return voltage_supported; +} + +static void dcn21_destroy_resource_pool(struct resource_pool **pool) +{ + struct dcn21_resource_pool *dcn21_pool = TO_DCN21_RES_POOL(*pool); + + dcn21_resource_destruct(dcn21_pool); + kfree(dcn21_pool); + *pool = NULL; +} + +static struct clock_source *dcn21_clock_source_create( + struct dc_context *ctx, + struct dc_bios *bios, + enum clock_source_id id, + const struct dce110_clk_src_regs *regs, + bool dp_clk_src) +{ + struct dce110_clk_src *clk_src = + kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL); + + if (!clk_src) + return NULL; + + if (dcn20_clk_src_construct(clk_src, ctx, bios, id, + regs, &cs_shift, &cs_mask)) { + clk_src->base.dp_clk_src = dp_clk_src; + return &clk_src->base; + } + + kfree(clk_src); + BREAK_TO_DEBUGGER(); + return NULL; +} + +static struct hubp *dcn21_hubp_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn21_hubp *hubp21 = + kzalloc(sizeof(struct dcn21_hubp), GFP_KERNEL); + + if (!hubp21) + return NULL; + + if (hubp21_construct(hubp21, ctx, inst, + &hubp_regs[inst], &hubp_shift, &hubp_mask)) + return &hubp21->base; + + BREAK_TO_DEBUGGER(); + kfree(hubp21); + return NULL; +} + +static struct hubbub *dcn21_hubbub_create(struct dc_context *ctx) +{ + int i; + + struct dcn20_hubbub *hubbub = kzalloc(sizeof(struct dcn20_hubbub), + GFP_KERNEL); + + if (!hubbub) + return NULL; + + hubbub21_construct(hubbub, ctx, + &hubbub_reg, + &hubbub_shift, + &hubbub_mask); + + for (i = 0; i < res_cap_rn.num_vmid; i++) { + struct dcn20_vmid *vmid = &hubbub->vmid[i]; + + vmid->ctx = ctx; + + vmid->regs = &vmid_regs[i]; + vmid->shifts = &vmid_shifts; + vmid->masks = &vmid_masks; + } + hubbub->num_vmid = res_cap_rn.num_vmid; + + return &hubbub->base; +} + +static struct output_pixel_processor *dcn21_opp_create(struct dc_context *ctx, + uint32_t inst) +{ + struct dcn20_opp *opp = + kzalloc(sizeof(struct dcn20_opp), GFP_KERNEL); + + if (!opp) { + BREAK_TO_DEBUGGER(); + return NULL; + } + + dcn20_opp_construct(opp, ctx, inst, + &opp_regs[inst], &opp_shift, &opp_mask); + return &opp->base; +} + +static struct timing_generator *dcn21_timing_generator_create(struct dc_context *ctx, + uint32_t instance) +{ + struct optc *tgn10 = + kzalloc(sizeof(struct optc), GFP_KERNEL); + + if (!tgn10) + return NULL; + + tgn10->base.inst = instance; + tgn10->base.ctx = ctx; + + tgn10->tg_regs = &tg_regs[instance]; + tgn10->tg_shift = &tg_shift; + tgn10->tg_mask = &tg_mask; + + dcn20_timing_generator_init(tgn10); + + return &tgn10->base; +} + +static struct mpc *dcn21_mpc_create(struct dc_context *ctx) +{ + struct dcn20_mpc *mpc20 = kzalloc(sizeof(struct dcn20_mpc), + GFP_KERNEL); + + if (!mpc20) + return NULL; + + dcn20_mpc_construct(mpc20, ctx, + &mpc_regs, + &mpc_shift, + &mpc_mask, + 6); + + return &mpc20->base; +} + +static void read_dce_straps( + struct dc_context *ctx, + struct resource_straps *straps) +{ + generic_reg_get(ctx, mmDC_PINSTRAPS + BASE(mmDC_PINSTRAPS_BASE_IDX), + FN(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO), &straps->dc_pinstraps_audio); + +} + + +static struct display_stream_compressor *dcn21_dsc_create(struct dc_context *ctx, + uint32_t inst) +{ + struct dcn20_dsc *dsc = + kzalloc(sizeof(struct dcn20_dsc), GFP_KERNEL); + + if (!dsc) { + BREAK_TO_DEBUGGER(); + return NULL; + } + + dsc2_construct(dsc, ctx, inst, &dsc_regs[inst], &dsc_shift, &dsc_mask); + return &dsc->base; +} + +static struct pp_smu_funcs *dcn21_pp_smu_create(struct dc_context *ctx) +{ + struct pp_smu_funcs *pp_smu = kzalloc(sizeof(*pp_smu), GFP_KERNEL); + + if (!pp_smu) + return pp_smu; + + dm_pp_get_funcs(ctx, pp_smu); + + if (pp_smu->ctx.ver != PP_SMU_VER_RN) + pp_smu = memset(pp_smu, 0, sizeof(struct pp_smu_funcs)); + + + return pp_smu; +} + +static void dcn21_pp_smu_destroy(struct pp_smu_funcs **pp_smu) +{ + if (pp_smu && *pp_smu) { + kfree(*pp_smu); + *pp_smu = NULL; + } +} + +static struct audio *dcn21_create_audio( + struct dc_context *ctx, unsigned int inst) +{ + return dce_audio_create(ctx, inst, + &audio_regs[inst], &audio_shift, &audio_mask); +} + +static struct dc_cap_funcs cap_funcs = { + .get_dcc_compression_cap = dcn20_get_dcc_compression_cap +}; + +static struct stream_encoder *dcn21_stream_encoder_create(enum engine_id eng_id, + struct dc_context *ctx) +{ + struct dcn10_stream_encoder *enc1 = + kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL); + + if (!enc1) + return NULL; + + dcn20_stream_encoder_construct(enc1, ctx, ctx->dc_bios, eng_id, + &stream_enc_regs[eng_id], + &se_shift, &se_mask); + + return &enc1->base; +} + +static const struct dce_hwseq_registers hwseq_reg = { + HWSEQ_DCN21_REG_LIST() +}; + +static const struct dce_hwseq_shift hwseq_shift = { + HWSEQ_DCN21_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_hwseq_mask hwseq_mask = { + HWSEQ_DCN21_MASK_SH_LIST(_MASK) +}; + +static struct dce_hwseq *dcn21_hwseq_create( + struct dc_context *ctx) +{ + struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL); + + if (hws) { + hws->ctx = ctx; + hws->regs = &hwseq_reg; + hws->shifts = &hwseq_shift; + hws->masks = &hwseq_mask; + hws->wa.DEGVIDCN21 = true; + hws->wa.disallow_self_refresh_during_multi_plane_transition = true; + } + return hws; +} + +static const struct resource_create_funcs res_create_funcs = { + .read_dce_straps = read_dce_straps, + .create_audio = dcn21_create_audio, + .create_stream_encoder = dcn21_stream_encoder_create, + .create_hwseq = dcn21_hwseq_create, +}; + +static const struct encoder_feature_support link_enc_feature = { + .max_hdmi_deep_color = COLOR_DEPTH_121212, + .max_hdmi_pixel_clock = 600000, + .hdmi_ycbcr420_supported = true, + .dp_ycbcr420_supported = true, + .fec_supported = true, + .flags.bits.IS_HBR2_CAPABLE = true, + .flags.bits.IS_HBR3_CAPABLE = true, + .flags.bits.IS_TPS3_CAPABLE = true, + .flags.bits.IS_TPS4_CAPABLE = true +}; + + +#define link_regs(id, phyid)\ +[id] = {\ + LE_DCN2_REG_LIST(id), \ + UNIPHY_DCN2_REG_LIST(phyid), \ + DPCS_DCN21_REG_LIST(id), \ + SRI(DP_DPHY_INTERNAL_CTRL, DP, id) \ +} + +static const struct dcn10_link_enc_registers link_enc_regs[] = { + link_regs(0, A), + link_regs(1, B), + link_regs(2, C), + link_regs(3, D), + link_regs(4, E), +}; + +static const struct dce_panel_cntl_registers panel_cntl_regs[] = { + { DCN_PANEL_CNTL_REG_LIST() } +}; + +static const struct dce_panel_cntl_shift panel_cntl_shift = { + DCE_PANEL_CNTL_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_panel_cntl_mask panel_cntl_mask = { + DCE_PANEL_CNTL_MASK_SH_LIST(_MASK) +}; + +#define aux_regs(id)\ +[id] = {\ + DCN2_AUX_REG_LIST(id)\ +} + +static const struct dcn10_link_enc_aux_registers link_enc_aux_regs[] = { + aux_regs(0), + aux_regs(1), + aux_regs(2), + aux_regs(3), + aux_regs(4) +}; + +#define hpd_regs(id)\ +[id] = {\ + HPD_REG_LIST(id)\ +} + +static const struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[] = { + hpd_regs(0), + hpd_regs(1), + hpd_regs(2), + hpd_regs(3), + hpd_regs(4) +}; + +static const struct dcn10_link_enc_shift le_shift = { + LINK_ENCODER_MASK_SH_LIST_DCN20(__SHIFT),\ + DPCS_DCN21_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn10_link_enc_mask le_mask = { + LINK_ENCODER_MASK_SH_LIST_DCN20(_MASK),\ + DPCS_DCN21_MASK_SH_LIST(_MASK) +}; + +static int map_transmitter_id_to_phy_instance( + enum transmitter transmitter) +{ + switch (transmitter) { + case TRANSMITTER_UNIPHY_A: + return 0; + break; + case TRANSMITTER_UNIPHY_B: + return 1; + break; + case TRANSMITTER_UNIPHY_C: + return 2; + break; + case TRANSMITTER_UNIPHY_D: + return 3; + break; + case TRANSMITTER_UNIPHY_E: + return 4; + break; + default: + ASSERT(0); + return 0; + } +} + +static struct link_encoder *dcn21_link_encoder_create( + struct dc_context *ctx, + const struct encoder_init_data *enc_init_data) +{ + struct dcn21_link_encoder *enc21 = + kzalloc(sizeof(struct dcn21_link_encoder), GFP_KERNEL); + int link_regs_id; + + if (!enc21) + return NULL; + + link_regs_id = + map_transmitter_id_to_phy_instance(enc_init_data->transmitter); + + dcn21_link_encoder_construct(enc21, + enc_init_data, + &link_enc_feature, + &link_enc_regs[link_regs_id], + &link_enc_aux_regs[enc_init_data->channel - 1], + &link_enc_hpd_regs[enc_init_data->hpd_source], + &le_shift, + &le_mask); + + return &enc21->enc10.base; +} + +static struct panel_cntl *dcn21_panel_cntl_create(const struct panel_cntl_init_data *init_data) +{ + struct dce_panel_cntl *panel_cntl = + kzalloc(sizeof(struct dce_panel_cntl), GFP_KERNEL); + + if (!panel_cntl) + return NULL; + + dce_panel_cntl_construct(panel_cntl, + init_data, + &panel_cntl_regs[init_data->inst], + &panel_cntl_shift, + &panel_cntl_mask); + + return &panel_cntl->base; +} + +static void dcn21_get_panel_config_defaults(struct dc_panel_config *panel_config) +{ + *panel_config = panel_config_defaults; +} + +#define CTX ctx + +#define REG(reg_name) \ + (DCN_BASE.instance[0].segment[mm ## reg_name ## _BASE_IDX] + mm ## reg_name) + +static uint32_t read_pipe_fuses(struct dc_context *ctx) +{ + uint32_t value = REG_READ(CC_DC_PIPE_DIS); + /* RV1 support max 4 pipes */ + value = value & 0xf; + return value; +} + +static enum dc_status dcn21_patch_unknown_plane_state(struct dc_plane_state *plane_state) +{ + if (plane_state->ctx->dc->debug.disable_dcc == DCC_ENABLE) { + plane_state->dcc.enable = 1; + /* align to our worst case block width */ + plane_state->dcc.meta_pitch = ((plane_state->src_rect.width + 1023) / 1024) * 1024; + } + + return dcn20_patch_unknown_plane_state(plane_state); +} + +static const struct resource_funcs dcn21_res_pool_funcs = { + .destroy = dcn21_destroy_resource_pool, + .link_enc_create = dcn21_link_encoder_create, + .panel_cntl_create = dcn21_panel_cntl_create, + .validate_bandwidth = dcn21_validate_bandwidth, + .populate_dml_pipes = dcn21_populate_dml_pipes_from_context, + .add_stream_to_ctx = dcn20_add_stream_to_ctx, + .add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource, + .remove_stream_from_ctx = dcn20_remove_stream_from_ctx, + .acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer, + .release_pipe = dcn20_release_pipe, + .populate_dml_writeback_from_context = dcn20_populate_dml_writeback_from_context, + .patch_unknown_plane_state = dcn21_patch_unknown_plane_state, + .set_mcif_arb_params = dcn20_set_mcif_arb_params, + .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link, + .update_bw_bounding_box = dcn21_update_bw_bounding_box, + .get_panel_config_defaults = dcn21_get_panel_config_defaults, +}; + +static bool dcn21_resource_construct( + uint8_t num_virtual_links, + struct dc *dc, + struct dcn21_resource_pool *pool) +{ + int i, j; + struct dc_context *ctx = dc->ctx; + struct irq_service_init_data init_data; + uint32_t pipe_fuses = read_pipe_fuses(ctx); + uint32_t num_pipes; + + ctx->dc_bios->regs = &bios_regs; + + pool->base.res_cap = &res_cap_rn; +#ifdef DIAGS_BUILD + if (IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) + //pool->base.res_cap = &res_cap_nv10_FPGA_2pipe_dsc; + pool->base.res_cap = &res_cap_rn_FPGA_4pipe; +#endif + + pool->base.funcs = &dcn21_res_pool_funcs; + + /************************************************* + * Resource + asic cap harcoding * + *************************************************/ + pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE; + + /* max pipe num for ASIC before check pipe fuses */ + pool->base.pipe_count = pool->base.res_cap->num_timing_generator; + + dc->caps.max_downscale_ratio = 200; + dc->caps.i2c_speed_in_khz = 100; + dc->caps.i2c_speed_in_khz_hdcp = 5; /*1.4 w/a applied by default*/ + dc->caps.max_cursor_size = 256; + dc->caps.min_horizontal_blanking_period = 80; + dc->caps.dmdata_alloc_size = 2048; + + dc->caps.max_slave_planes = 1; + dc->caps.max_slave_yuv_planes = 1; + dc->caps.max_slave_rgb_planes = 1; + dc->caps.post_blend_color_processing = true; + dc->caps.force_dp_tps4_for_cp2520 = true; + dc->caps.extended_aux_timeout_support = true; + dc->caps.dmcub_support = true; + dc->caps.is_apu = true; + + /* Color pipeline capabilities */ + dc->caps.color.dpp.dcn_arch = 1; + dc->caps.color.dpp.input_lut_shared = 0; + dc->caps.color.dpp.icsc = 1; + dc->caps.color.dpp.dgam_ram = 1; + dc->caps.color.dpp.dgam_rom_caps.srgb = 1; + dc->caps.color.dpp.dgam_rom_caps.bt2020 = 1; + dc->caps.color.dpp.dgam_rom_caps.gamma2_2 = 0; + dc->caps.color.dpp.dgam_rom_caps.pq = 0; + dc->caps.color.dpp.dgam_rom_caps.hlg = 0; + dc->caps.color.dpp.post_csc = 0; + dc->caps.color.dpp.gamma_corr = 0; + dc->caps.color.dpp.dgam_rom_for_yuv = 1; + + dc->caps.color.dpp.hw_3d_lut = 1; + dc->caps.color.dpp.ogam_ram = 1; + // no OGAM ROM on DCN2 + dc->caps.color.dpp.ogam_rom_caps.srgb = 0; + dc->caps.color.dpp.ogam_rom_caps.bt2020 = 0; + dc->caps.color.dpp.ogam_rom_caps.gamma2_2 = 0; + dc->caps.color.dpp.ogam_rom_caps.pq = 0; + dc->caps.color.dpp.ogam_rom_caps.hlg = 0; + dc->caps.color.dpp.ocsc = 0; + + dc->caps.color.mpc.gamut_remap = 0; + dc->caps.color.mpc.num_3dluts = 0; + dc->caps.color.mpc.shared_3d_lut = 0; + dc->caps.color.mpc.ogam_ram = 1; + dc->caps.color.mpc.ogam_rom_caps.srgb = 0; + dc->caps.color.mpc.ogam_rom_caps.bt2020 = 0; + dc->caps.color.mpc.ogam_rom_caps.gamma2_2 = 0; + dc->caps.color.mpc.ogam_rom_caps.pq = 0; + dc->caps.color.mpc.ogam_rom_caps.hlg = 0; + dc->caps.color.mpc.ocsc = 1; + + dc->caps.dp_hdmi21_pcon_support = true; + + if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) + dc->debug = debug_defaults_drv; + + // Init the vm_helper + if (dc->vm_helper) + vm_helper_init(dc->vm_helper, 16); + + /************************************************* + * Create resources * + *************************************************/ + + pool->base.clock_sources[DCN20_CLK_SRC_PLL0] = + dcn21_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL0, + &clk_src_regs[0], false); + pool->base.clock_sources[DCN20_CLK_SRC_PLL1] = + dcn21_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL1, + &clk_src_regs[1], false); + pool->base.clock_sources[DCN20_CLK_SRC_PLL2] = + dcn21_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL2, + &clk_src_regs[2], false); + pool->base.clock_sources[DCN20_CLK_SRC_PLL3] = + dcn21_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL3, + &clk_src_regs[3], false); + pool->base.clock_sources[DCN20_CLK_SRC_PLL4] = + dcn21_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL4, + &clk_src_regs[4], false); + + pool->base.clk_src_count = DCN20_CLK_SRC_TOTAL_DCN21; + + /* todo: not reuse phy_pll registers */ + pool->base.dp_clock_source = + dcn21_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_ID_DP_DTO, + &clk_src_regs[0], true); + + for (i = 0; i < pool->base.clk_src_count; i++) { + if (pool->base.clock_sources[i] == NULL) { + dm_error("DC: failed to create clock sources!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + } + + pool->base.dccg = dccg21_create(ctx, &dccg_regs, &dccg_shift, &dccg_mask); + if (pool->base.dccg == NULL) { + dm_error("DC: failed to create dccg!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + + if (!dc->config.disable_dmcu) { + pool->base.dmcu = dcn21_dmcu_create(ctx, + &dmcu_regs, + &dmcu_shift, + &dmcu_mask); + if (pool->base.dmcu == NULL) { + dm_error("DC: failed to create dmcu!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + + dc->debug.dmub_command_table = false; + } + + if (dc->config.disable_dmcu) { + pool->base.psr = dmub_psr_create(ctx); + + if (pool->base.psr == NULL) { + dm_error("DC: failed to create psr obj!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + } + + if (dc->config.disable_dmcu) + pool->base.abm = dmub_abm_create(ctx, + &abm_regs, + &abm_shift, + &abm_mask); + else + pool->base.abm = dce_abm_create(ctx, + &abm_regs, + &abm_shift, + &abm_mask); + + pool->base.pp_smu = dcn21_pp_smu_create(ctx); + + num_pipes = dcn2_1_ip.max_num_dpp; + + for (i = 0; i < dcn2_1_ip.max_num_dpp; i++) + if (pipe_fuses & 1 << i) + num_pipes--; + dcn2_1_ip.max_num_dpp = num_pipes; + dcn2_1_ip.max_num_otg = num_pipes; + + dml_init_instance(&dc->dml, &dcn2_1_soc, &dcn2_1_ip, DML_PROJECT_DCN21); + + init_data.ctx = dc->ctx; + pool->base.irqs = dal_irq_service_dcn21_create(&init_data); + if (!pool->base.irqs) + goto create_fail; + + j = 0; + /* mem input -> ipp -> dpp -> opp -> TG */ + for (i = 0; i < pool->base.pipe_count; i++) { + /* if pipe is disabled, skip instance of HW pipe, + * i.e, skip ASIC register instance + */ + if ((pipe_fuses & (1 << i)) != 0) + continue; + + pool->base.hubps[j] = dcn21_hubp_create(ctx, i); + if (pool->base.hubps[j] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create memory input!\n"); + goto create_fail; + } + + pool->base.ipps[j] = dcn21_ipp_create(ctx, i); + if (pool->base.ipps[j] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create input pixel processor!\n"); + goto create_fail; + } + + pool->base.dpps[j] = dcn21_dpp_create(ctx, i); + if (pool->base.dpps[j] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create dpps!\n"); + goto create_fail; + } + + pool->base.opps[j] = dcn21_opp_create(ctx, i); + if (pool->base.opps[j] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create output pixel processor!\n"); + goto create_fail; + } + + pool->base.timing_generators[j] = dcn21_timing_generator_create( + ctx, i); + if (pool->base.timing_generators[j] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create tg!\n"); + goto create_fail; + } + j++; + } + + for (i = 0; i < pool->base.res_cap->num_ddc; i++) { + pool->base.engines[i] = dcn21_aux_engine_create(ctx, i); + if (pool->base.engines[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC:failed to create aux engine!!\n"); + goto create_fail; + } + pool->base.hw_i2cs[i] = dcn21_i2c_hw_create(ctx, i); + if (pool->base.hw_i2cs[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC:failed to create hw i2c!!\n"); + goto create_fail; + } + pool->base.sw_i2cs[i] = NULL; + } + + pool->base.timing_generator_count = j; + pool->base.pipe_count = j; + pool->base.mpcc_count = j; + + pool->base.mpc = dcn21_mpc_create(ctx); + if (pool->base.mpc == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create mpc!\n"); + goto create_fail; + } + + pool->base.hubbub = dcn21_hubbub_create(ctx); + if (pool->base.hubbub == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create hubbub!\n"); + goto create_fail; + } + + for (i = 0; i < pool->base.res_cap->num_dsc; i++) { + pool->base.dscs[i] = dcn21_dsc_create(ctx, i); + if (pool->base.dscs[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create display stream compressor %d!\n", i); + goto create_fail; + } + } + + if (!dcn20_dwbc_create(ctx, &pool->base)) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create dwbc!\n"); + goto create_fail; + } + if (!dcn20_mmhubbub_create(ctx, &pool->base)) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create mcif_wb!\n"); + goto create_fail; + } + + if (!resource_construct(num_virtual_links, dc, &pool->base, + &res_create_funcs)) + goto create_fail; + + dcn21_hw_sequencer_construct(dc); + + dc->caps.max_planes = pool->base.pipe_count; + + for (i = 0; i < dc->caps.max_planes; ++i) + dc->caps.planes[i] = plane_cap; + + dc->cap_funcs = cap_funcs; + + return true; + +create_fail: + + dcn21_resource_destruct(pool); + + return false; +} + +struct resource_pool *dcn21_create_resource_pool( + const struct dc_init_data *init_data, + struct dc *dc) +{ + struct dcn21_resource_pool *pool = + kzalloc(sizeof(struct dcn21_resource_pool), GFP_KERNEL); + + if (!pool) + return NULL; + + if (dcn21_resource_construct(init_data->num_virtual_links, dc, pool)) + return &pool->base; + + BREAK_TO_DEBUGGER(); + kfree(pool); + return NULL; +} diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.h new file mode 100644 index 000000000000..f7ecc002c2f7 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.h @@ -0,0 +1,56 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef _DCN21_RESOURCE_H_ +#define _DCN21_RESOURCE_H_ + +#include "core_types.h" + +#define TO_DCN21_RES_POOL(pool)\ + container_of(pool, struct dcn21_resource_pool, base) + +struct dc; +struct resource_pool; +struct _vcs_dpi_display_pipe_params_st; + +extern struct _vcs_dpi_ip_params_st dcn2_1_ip; +extern struct _vcs_dpi_soc_bounding_box_st dcn2_1_soc; + +struct dcn21_resource_pool { + struct resource_pool base; +}; +struct resource_pool *dcn21_create_resource_pool( + const struct dc_init_data *init_data, + struct dc *dc); +bool dcn21_fast_validate_bw( + struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int *pipe_cnt_out, + int *pipe_split_from, + int *vlevel_out, + bool fast_validate); + +#endif /* _DCN21_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c new file mode 100644 index 000000000000..2b6dcb489b90 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c @@ -0,0 +1,2611 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + + +#include "dm_services.h" +#include "dc.h" + +#include "dcn30/dcn30_init.h" + +#include "resource.h" +#include "include/irq_service_interface.h" +#include "dcn20/dcn20_resource.h" + +#include "dcn30_resource.h" + +#include "dcn10/dcn10_ipp.h" +#include "dcn30/dcn30_hubbub.h" +#include "dcn30/dcn30_mpc.h" +#include "dcn30/dcn30_hubp.h" +#include "irq/dcn30/irq_service_dcn30.h" +#include "dcn30/dcn30_dpp.h" +#include "dcn30/dcn30_optc.h" +#include "dcn20/dcn20_hwseq.h" +#include "dcn30/dcn30_hwseq.h" +#include "dce110/dce110_hwseq.h" +#include "dcn30/dcn30_opp.h" +#include "dcn20/dcn20_dsc.h" +#include "dcn30/dcn30_vpg.h" +#include "dcn30/dcn30_afmt.h" +#include "dcn30/dcn30_dio_stream_encoder.h" +#include "dcn30/dcn30_dio_link_encoder.h" +#include "dce/dce_clock_source.h" +#include "dce/dce_audio.h" +#include "dce/dce_hwseq.h" +#include "clk_mgr.h" +#include "virtual/virtual_stream_encoder.h" +#include "dce110/dce110_resource.h" +#include "dml/display_mode_vba.h" +#include "dcn30/dcn30_dccg.h" +#include "dcn10/dcn10_resource.h" +#include "link.h" +#include "dce/dce_panel_cntl.h" + +#include "dcn30/dcn30_dwb.h" +#include "dcn30/dcn30_mmhubbub.h" + +#include "sienna_cichlid_ip_offset.h" +#include "dcn/dcn_3_0_0_offset.h" +#include "dcn/dcn_3_0_0_sh_mask.h" + +#include "nbio/nbio_7_4_offset.h" + +#include "dpcs/dpcs_3_0_0_offset.h" +#include "dpcs/dpcs_3_0_0_sh_mask.h" + +#include "mmhub/mmhub_2_0_0_offset.h" +#include "mmhub/mmhub_2_0_0_sh_mask.h" + +#include "reg_helper.h" +#include "dce/dmub_abm.h" +#include "dce/dmub_psr.h" +#include "dce/dce_aux.h" +#include "dce/dce_i2c.h" + +#include "dml/dcn30/dcn30_fpu.h" +#include "dml/dcn30/display_mode_vba_30.h" +#include "vm_helper.h" +#include "dcn20/dcn20_vmid.h" +#include "amdgpu_socbb.h" +#include "dc_dmub_srv.h" + +#define DC_LOGGER \ + dc->ctx->logger +#define DC_LOGGER_INIT(logger) + +enum dcn30_clk_src_array_id { + DCN30_CLK_SRC_PLL0, + DCN30_CLK_SRC_PLL1, + DCN30_CLK_SRC_PLL2, + DCN30_CLK_SRC_PLL3, + DCN30_CLK_SRC_PLL4, + DCN30_CLK_SRC_PLL5, + DCN30_CLK_SRC_TOTAL +}; + +/* begin ********************* + * macros to expend register list macro defined in HW object header file + */ + +/* DCN */ +#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg + +#define BASE(seg) BASE_INNER(seg) + +#define SR(reg_name)\ + .reg_name = BASE(mm ## reg_name ## _BASE_IDX) + \ + mm ## reg_name + +#define SRI(reg_name, block, id)\ + .reg_name = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + mm ## block ## id ## _ ## reg_name + +#define SRI2(reg_name, block, id)\ + .reg_name = BASE(mm ## reg_name ## _BASE_IDX) + \ + mm ## reg_name + +#define SRIR(var_name, reg_name, block, id)\ + .var_name = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + mm ## block ## id ## _ ## reg_name + +#define SRII(reg_name, block, id)\ + .reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + mm ## block ## id ## _ ## reg_name + +#define SRII_MPC_RMU(reg_name, block, id)\ + .RMU##_##reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + mm ## block ## id ## _ ## reg_name + +#define SRII_DWB(reg_name, temp_name, block, id)\ + .reg_name[id] = BASE(mm ## block ## id ## _ ## temp_name ## _BASE_IDX) + \ + mm ## block ## id ## _ ## temp_name + +#define SF_DWB2(reg_name, block, id, field_name, post_fix) \ + .field_name = reg_name ## __ ## field_name ## post_fix + +#define DCCG_SRII(reg_name, block, id)\ + .block ## _ ## reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + mm ## block ## id ## _ ## reg_name + +#define VUPDATE_SRII(reg_name, block, id)\ + .reg_name[id] = BASE(mm ## reg_name ## _ ## block ## id ## _BASE_IDX) + \ + mm ## reg_name ## _ ## block ## id + +/* NBIO */ +#define NBIO_BASE_INNER(seg) \ + NBIO_BASE__INST0_SEG ## seg + +#define NBIO_BASE(seg) \ + NBIO_BASE_INNER(seg) + +#define NBIO_SR(reg_name)\ + .reg_name = NBIO_BASE(mm ## reg_name ## _BASE_IDX) + \ + mm ## reg_name + +/* MMHUB */ +#define MMHUB_BASE_INNER(seg) \ + MMHUB_BASE__INST0_SEG ## seg + +#define MMHUB_BASE(seg) \ + MMHUB_BASE_INNER(seg) + +#define MMHUB_SR(reg_name)\ + .reg_name = MMHUB_BASE(mmMM ## reg_name ## _BASE_IDX) + \ + mmMM ## reg_name + +/* CLOCK */ +#define CLK_BASE_INNER(seg) \ + CLK_BASE__INST0_SEG ## seg + +#define CLK_BASE(seg) \ + CLK_BASE_INNER(seg) + +#define CLK_SRI(reg_name, block, inst)\ + .reg_name = CLK_BASE(mm ## block ## _ ## inst ## _ ## reg_name ## _BASE_IDX) + \ + mm ## block ## _ ## inst ## _ ## reg_name + + +static const struct bios_registers bios_regs = { + NBIO_SR(BIOS_SCRATCH_3), + NBIO_SR(BIOS_SCRATCH_6) +}; + +#define clk_src_regs(index, pllid)\ +[index] = {\ + CS_COMMON_REG_LIST_DCN2_0(index, pllid),\ +} + +static const struct dce110_clk_src_regs clk_src_regs[] = { + clk_src_regs(0, A), + clk_src_regs(1, B), + clk_src_regs(2, C), + clk_src_regs(3, D), + clk_src_regs(4, E), + clk_src_regs(5, F) +}; + +static const struct dce110_clk_src_shift cs_shift = { + CS_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT) +}; + +static const struct dce110_clk_src_mask cs_mask = { + CS_COMMON_MASK_SH_LIST_DCN2_0(_MASK) +}; + +#define abm_regs(id)\ +[id] = {\ + ABM_DCN30_REG_LIST(id)\ +} + +static const struct dce_abm_registers abm_regs[] = { + abm_regs(0), + abm_regs(1), + abm_regs(2), + abm_regs(3), + abm_regs(4), + abm_regs(5), +}; + +static const struct dce_abm_shift abm_shift = { + ABM_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dce_abm_mask abm_mask = { + ABM_MASK_SH_LIST_DCN30(_MASK) +}; + + + +#define audio_regs(id)\ +[id] = {\ + AUD_COMMON_REG_LIST(id)\ +} + +static const struct dce_audio_registers audio_regs[] = { + audio_regs(0), + audio_regs(1), + audio_regs(2), + audio_regs(3), + audio_regs(4), + audio_regs(5), + audio_regs(6) +}; + +#define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\ + SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_INDEX, AZALIA_ENDPOINT_REG_INDEX, mask_sh),\ + SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_DATA, AZALIA_ENDPOINT_REG_DATA, mask_sh),\ + AUD_COMMON_MASK_SH_LIST_BASE(mask_sh) + +static const struct dce_audio_shift audio_shift = { + DCE120_AUD_COMMON_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_audio_mask audio_mask = { + DCE120_AUD_COMMON_MASK_SH_LIST(_MASK) +}; + +#define vpg_regs(id)\ +[id] = {\ + VPG_DCN3_REG_LIST(id)\ +} + +static const struct dcn30_vpg_registers vpg_regs[] = { + vpg_regs(0), + vpg_regs(1), + vpg_regs(2), + vpg_regs(3), + vpg_regs(4), + vpg_regs(5), + vpg_regs(6), +}; + +static const struct dcn30_vpg_shift vpg_shift = { + DCN3_VPG_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn30_vpg_mask vpg_mask = { + DCN3_VPG_MASK_SH_LIST(_MASK) +}; + +#define afmt_regs(id)\ +[id] = {\ + AFMT_DCN3_REG_LIST(id)\ +} + +static const struct dcn30_afmt_registers afmt_regs[] = { + afmt_regs(0), + afmt_regs(1), + afmt_regs(2), + afmt_regs(3), + afmt_regs(4), + afmt_regs(5), + afmt_regs(6), +}; + +static const struct dcn30_afmt_shift afmt_shift = { + DCN3_AFMT_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn30_afmt_mask afmt_mask = { + DCN3_AFMT_MASK_SH_LIST(_MASK) +}; + +#define stream_enc_regs(id)\ +[id] = {\ + SE_DCN3_REG_LIST(id)\ +} + +static const struct dcn10_stream_enc_registers stream_enc_regs[] = { + stream_enc_regs(0), + stream_enc_regs(1), + stream_enc_regs(2), + stream_enc_regs(3), + stream_enc_regs(4), + stream_enc_regs(5) +}; + +static const struct dcn10_stream_encoder_shift se_shift = { + SE_COMMON_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dcn10_stream_encoder_mask se_mask = { + SE_COMMON_MASK_SH_LIST_DCN30(_MASK) +}; + + +#define aux_regs(id)\ +[id] = {\ + DCN2_AUX_REG_LIST(id)\ +} + +static const struct dcn10_link_enc_aux_registers link_enc_aux_regs[] = { + aux_regs(0), + aux_regs(1), + aux_regs(2), + aux_regs(3), + aux_regs(4), + aux_regs(5) +}; + +#define hpd_regs(id)\ +[id] = {\ + HPD_REG_LIST(id)\ +} + +static const struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[] = { + hpd_regs(0), + hpd_regs(1), + hpd_regs(2), + hpd_regs(3), + hpd_regs(4), + hpd_regs(5) +}; + +#define link_regs(id, phyid)\ +[id] = {\ + LE_DCN3_REG_LIST(id), \ + UNIPHY_DCN2_REG_LIST(phyid), \ + DPCS_DCN2_REG_LIST(id), \ + SRI(DP_DPHY_INTERNAL_CTRL, DP, id) \ +} + +static const struct dce110_aux_registers_shift aux_shift = { + DCN_AUX_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce110_aux_registers_mask aux_mask = { + DCN_AUX_MASK_SH_LIST(_MASK) +}; + +static const struct dcn10_link_enc_registers link_enc_regs[] = { + link_regs(0, A), + link_regs(1, B), + link_regs(2, C), + link_regs(3, D), + link_regs(4, E), + link_regs(5, F) +}; + +static const struct dcn10_link_enc_shift le_shift = { + LINK_ENCODER_MASK_SH_LIST_DCN30(__SHIFT),\ + DPCS_DCN2_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn10_link_enc_mask le_mask = { + LINK_ENCODER_MASK_SH_LIST_DCN30(_MASK),\ + DPCS_DCN2_MASK_SH_LIST(_MASK) +}; + + +static const struct dce_panel_cntl_registers panel_cntl_regs[] = { + { DCN_PANEL_CNTL_REG_LIST() } +}; + +static const struct dce_panel_cntl_shift panel_cntl_shift = { + DCE_PANEL_CNTL_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_panel_cntl_mask panel_cntl_mask = { + DCE_PANEL_CNTL_MASK_SH_LIST(_MASK) +}; + +#define dpp_regs(id)\ +[id] = {\ + DPP_REG_LIST_DCN30(id),\ +} + +static const struct dcn3_dpp_registers dpp_regs[] = { + dpp_regs(0), + dpp_regs(1), + dpp_regs(2), + dpp_regs(3), + dpp_regs(4), + dpp_regs(5), +}; + +static const struct dcn3_dpp_shift tf_shift = { + DPP_REG_LIST_SH_MASK_DCN30(__SHIFT) +}; + +static const struct dcn3_dpp_mask tf_mask = { + DPP_REG_LIST_SH_MASK_DCN30(_MASK) +}; + +#define opp_regs(id)\ +[id] = {\ + OPP_REG_LIST_DCN30(id),\ +} + +static const struct dcn20_opp_registers opp_regs[] = { + opp_regs(0), + opp_regs(1), + opp_regs(2), + opp_regs(3), + opp_regs(4), + opp_regs(5) +}; + +static const struct dcn20_opp_shift opp_shift = { + OPP_MASK_SH_LIST_DCN20(__SHIFT) +}; + +static const struct dcn20_opp_mask opp_mask = { + OPP_MASK_SH_LIST_DCN20(_MASK) +}; + +#define aux_engine_regs(id)\ +[id] = {\ + AUX_COMMON_REG_LIST0(id), \ + .AUXN_IMPCAL = 0, \ + .AUXP_IMPCAL = 0, \ + .AUX_RESET_MASK = DP_AUX0_AUX_CONTROL__AUX_RESET_MASK, \ +} + +static const struct dce110_aux_registers aux_engine_regs[] = { + aux_engine_regs(0), + aux_engine_regs(1), + aux_engine_regs(2), + aux_engine_regs(3), + aux_engine_regs(4), + aux_engine_regs(5) +}; + +#define dwbc_regs_dcn3(id)\ +[id] = {\ + DWBC_COMMON_REG_LIST_DCN30(id),\ +} + +static const struct dcn30_dwbc_registers dwbc30_regs[] = { + dwbc_regs_dcn3(0), +}; + +static const struct dcn30_dwbc_shift dwbc30_shift = { + DWBC_COMMON_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dcn30_dwbc_mask dwbc30_mask = { + DWBC_COMMON_MASK_SH_LIST_DCN30(_MASK) +}; + +#define mcif_wb_regs_dcn3(id)\ +[id] = {\ + MCIF_WB_COMMON_REG_LIST_DCN30(id),\ +} + +static const struct dcn30_mmhubbub_registers mcif_wb30_regs[] = { + mcif_wb_regs_dcn3(0) +}; + +static const struct dcn30_mmhubbub_shift mcif_wb30_shift = { + MCIF_WB_COMMON_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dcn30_mmhubbub_mask mcif_wb30_mask = { + MCIF_WB_COMMON_MASK_SH_LIST_DCN30(_MASK) +}; + +#define dsc_regsDCN20(id)\ +[id] = {\ + DSC_REG_LIST_DCN20(id)\ +} + +static const struct dcn20_dsc_registers dsc_regs[] = { + dsc_regsDCN20(0), + dsc_regsDCN20(1), + dsc_regsDCN20(2), + dsc_regsDCN20(3), + dsc_regsDCN20(4), + dsc_regsDCN20(5) +}; + +static const struct dcn20_dsc_shift dsc_shift = { + DSC_REG_LIST_SH_MASK_DCN20(__SHIFT) +}; + +static const struct dcn20_dsc_mask dsc_mask = { + DSC_REG_LIST_SH_MASK_DCN20(_MASK) +}; + +static const struct dcn30_mpc_registers mpc_regs = { + MPC_REG_LIST_DCN3_0(0), + MPC_REG_LIST_DCN3_0(1), + MPC_REG_LIST_DCN3_0(2), + MPC_REG_LIST_DCN3_0(3), + MPC_REG_LIST_DCN3_0(4), + MPC_REG_LIST_DCN3_0(5), + MPC_OUT_MUX_REG_LIST_DCN3_0(0), + MPC_OUT_MUX_REG_LIST_DCN3_0(1), + MPC_OUT_MUX_REG_LIST_DCN3_0(2), + MPC_OUT_MUX_REG_LIST_DCN3_0(3), + MPC_OUT_MUX_REG_LIST_DCN3_0(4), + MPC_OUT_MUX_REG_LIST_DCN3_0(5), + MPC_RMU_GLOBAL_REG_LIST_DCN3AG, + MPC_RMU_REG_LIST_DCN3AG(0), + MPC_RMU_REG_LIST_DCN3AG(1), + MPC_RMU_REG_LIST_DCN3AG(2), + MPC_DWB_MUX_REG_LIST_DCN3_0(0), +}; + +static const struct dcn30_mpc_shift mpc_shift = { + MPC_COMMON_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dcn30_mpc_mask mpc_mask = { + MPC_COMMON_MASK_SH_LIST_DCN30(_MASK) +}; + +#define optc_regs(id)\ +[id] = {OPTC_COMMON_REG_LIST_DCN3_0(id)} + + +static const struct dcn_optc_registers optc_regs[] = { + optc_regs(0), + optc_regs(1), + optc_regs(2), + optc_regs(3), + optc_regs(4), + optc_regs(5) +}; + +static const struct dcn_optc_shift optc_shift = { + OPTC_COMMON_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dcn_optc_mask optc_mask = { + OPTC_COMMON_MASK_SH_LIST_DCN30(_MASK) +}; + +#define hubp_regs(id)\ +[id] = {\ + HUBP_REG_LIST_DCN30(id)\ +} + +static const struct dcn_hubp2_registers hubp_regs[] = { + hubp_regs(0), + hubp_regs(1), + hubp_regs(2), + hubp_regs(3), + hubp_regs(4), + hubp_regs(5) +}; + +static const struct dcn_hubp2_shift hubp_shift = { + HUBP_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dcn_hubp2_mask hubp_mask = { + HUBP_MASK_SH_LIST_DCN30(_MASK) +}; + +static const struct dcn_hubbub_registers hubbub_reg = { + HUBBUB_REG_LIST_DCN30(0) +}; + +static const struct dcn_hubbub_shift hubbub_shift = { + HUBBUB_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dcn_hubbub_mask hubbub_mask = { + HUBBUB_MASK_SH_LIST_DCN30(_MASK) +}; + +static const struct dccg_registers dccg_regs = { + DCCG_REG_LIST_DCN30() +}; + +static const struct dccg_shift dccg_shift = { + DCCG_MASK_SH_LIST_DCN3(__SHIFT) +}; + +static const struct dccg_mask dccg_mask = { + DCCG_MASK_SH_LIST_DCN3(_MASK) +}; + +static const struct dce_hwseq_registers hwseq_reg = { + HWSEQ_DCN30_REG_LIST() +}; + +static const struct dce_hwseq_shift hwseq_shift = { + HWSEQ_DCN30_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_hwseq_mask hwseq_mask = { + HWSEQ_DCN30_MASK_SH_LIST(_MASK) +}; +#define vmid_regs(id)\ +[id] = {\ + DCN20_VMID_REG_LIST(id)\ +} + +static const struct dcn_vmid_registers vmid_regs[] = { + vmid_regs(0), + vmid_regs(1), + vmid_regs(2), + vmid_regs(3), + vmid_regs(4), + vmid_regs(5), + vmid_regs(6), + vmid_regs(7), + vmid_regs(8), + vmid_regs(9), + vmid_regs(10), + vmid_regs(11), + vmid_regs(12), + vmid_regs(13), + vmid_regs(14), + vmid_regs(15) +}; + +static const struct dcn20_vmid_shift vmid_shifts = { + DCN20_VMID_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn20_vmid_mask vmid_masks = { + DCN20_VMID_MASK_SH_LIST(_MASK) +}; + +static const struct resource_caps res_cap_dcn3 = { + .num_timing_generator = 6, + .num_opp = 6, + .num_video_plane = 6, + .num_audio = 6, + .num_stream_encoder = 6, + .num_pll = 6, + .num_dwb = 1, + .num_ddc = 6, + .num_vmid = 16, + .num_mpc_3dlut = 3, + .num_dsc = 6, +}; + +static const struct dc_plane_cap plane_cap = { + .type = DC_PLANE_TYPE_DCN_UNIVERSAL, + .per_pixel_alpha = true, + + .pixel_format_support = { + .argb8888 = true, + .nv12 = true, + .fp16 = true, + .p010 = true, + .ayuv = false, + }, + + .max_upscale_factor = { + .argb8888 = 16000, + .nv12 = 16000, + .fp16 = 16000 + }, + + /* 6:1 downscaling ratio: 1000/6 = 166.666 */ + .max_downscale_factor = { + .argb8888 = 167, + .nv12 = 167, + .fp16 = 167 + }, + 16, + 16 +}; + +static const struct dc_debug_options debug_defaults_drv = { + .disable_dmcu = true, //No DMCU on DCN30 + .force_abm_enable = false, + .timing_trace = false, + .clock_trace = true, + .disable_pplib_clock_request = true, + .pipe_split_policy = MPC_SPLIT_DYNAMIC, + .force_single_disp_pipe_split = false, + .disable_dcc = DCC_ENABLE, + .vsr_support = true, + .performance_trace = false, + .max_downscale_src_width = 7680,/*upto 8K*/ + .disable_pplib_wm_range = false, + .scl_reset_length10 = true, + .sanity_checks = false, + .underflow_assert_delay_us = 0xFFFFFFFF, + .dwb_fi_phase = -1, // -1 = disable, + .dmub_command_table = true, + .use_max_lb = true, + .exit_idle_opt_for_cursor_updates = true, + .enable_legacy_fast_update = false, + .using_dml2 = false, +}; + +static const struct dc_panel_config panel_config_defaults = { + .psr = { + .disable_psr = false, + .disallow_psrsu = false, + .disallow_replay = false, + }, +}; + +static void dcn30_dpp_destroy(struct dpp **dpp) +{ + kfree(TO_DCN20_DPP(*dpp)); + *dpp = NULL; +} + +static struct dpp *dcn30_dpp_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn3_dpp *dpp = + kzalloc(sizeof(struct dcn3_dpp), GFP_KERNEL); + + if (!dpp) + return NULL; + + if (dpp3_construct(dpp, ctx, inst, + &dpp_regs[inst], &tf_shift, &tf_mask)) + return &dpp->base; + + BREAK_TO_DEBUGGER(); + kfree(dpp); + return NULL; +} + +static struct output_pixel_processor *dcn30_opp_create( + struct dc_context *ctx, uint32_t inst) +{ + struct dcn20_opp *opp = + kzalloc(sizeof(struct dcn20_opp), GFP_KERNEL); + + if (!opp) { + BREAK_TO_DEBUGGER(); + return NULL; + } + + dcn20_opp_construct(opp, ctx, inst, + &opp_regs[inst], &opp_shift, &opp_mask); + return &opp->base; +} + +static struct dce_aux *dcn30_aux_engine_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct aux_engine_dce110 *aux_engine = + kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL); + + if (!aux_engine) + return NULL; + + dce110_aux_engine_construct(aux_engine, ctx, inst, + SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, + &aux_engine_regs[inst], + &aux_mask, + &aux_shift, + ctx->dc->caps.extended_aux_timeout_support); + + return &aux_engine->base; +} + +#define i2c_inst_regs(id) { I2C_HW_ENGINE_COMMON_REG_LIST_DCN30(id) } + +static const struct dce_i2c_registers i2c_hw_regs[] = { + i2c_inst_regs(1), + i2c_inst_regs(2), + i2c_inst_regs(3), + i2c_inst_regs(4), + i2c_inst_regs(5), + i2c_inst_regs(6), +}; + +static const struct dce_i2c_shift i2c_shifts = { + I2C_COMMON_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dce_i2c_mask i2c_masks = { + I2C_COMMON_MASK_SH_LIST_DCN30(_MASK) +}; + +static struct dce_i2c_hw *dcn30_i2c_hw_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dce_i2c_hw *dce_i2c_hw = + kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL); + + if (!dce_i2c_hw) + return NULL; + + dcn2_i2c_hw_construct(dce_i2c_hw, ctx, inst, + &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks); + + return dce_i2c_hw; +} + +static struct mpc *dcn30_mpc_create( + struct dc_context *ctx, + int num_mpcc, + int num_rmu) +{ + struct dcn30_mpc *mpc30 = kzalloc(sizeof(struct dcn30_mpc), + GFP_KERNEL); + + if (!mpc30) + return NULL; + + dcn30_mpc_construct(mpc30, ctx, + &mpc_regs, + &mpc_shift, + &mpc_mask, + num_mpcc, + num_rmu); + + return &mpc30->base; +} + +static struct hubbub *dcn30_hubbub_create(struct dc_context *ctx) +{ + int i; + + struct dcn20_hubbub *hubbub3 = kzalloc(sizeof(struct dcn20_hubbub), + GFP_KERNEL); + + if (!hubbub3) + return NULL; + + hubbub3_construct(hubbub3, ctx, + &hubbub_reg, + &hubbub_shift, + &hubbub_mask); + + + for (i = 0; i < res_cap_dcn3.num_vmid; i++) { + struct dcn20_vmid *vmid = &hubbub3->vmid[i]; + + vmid->ctx = ctx; + + vmid->regs = &vmid_regs[i]; + vmid->shifts = &vmid_shifts; + vmid->masks = &vmid_masks; + } + + return &hubbub3->base; +} + +static struct timing_generator *dcn30_timing_generator_create( + struct dc_context *ctx, + uint32_t instance) +{ + struct optc *tgn10 = + kzalloc(sizeof(struct optc), GFP_KERNEL); + + if (!tgn10) + return NULL; + + tgn10->base.inst = instance; + tgn10->base.ctx = ctx; + + tgn10->tg_regs = &optc_regs[instance]; + tgn10->tg_shift = &optc_shift; + tgn10->tg_mask = &optc_mask; + + dcn30_timing_generator_init(tgn10); + + return &tgn10->base; +} + +static const struct encoder_feature_support link_enc_feature = { + .max_hdmi_deep_color = COLOR_DEPTH_121212, + .max_hdmi_pixel_clock = 600000, + .hdmi_ycbcr420_supported = true, + .dp_ycbcr420_supported = true, + .fec_supported = true, + .flags.bits.IS_HBR2_CAPABLE = true, + .flags.bits.IS_HBR3_CAPABLE = true, + .flags.bits.IS_TPS3_CAPABLE = true, + .flags.bits.IS_TPS4_CAPABLE = true +}; + +static struct link_encoder *dcn30_link_encoder_create( + struct dc_context *ctx, + const struct encoder_init_data *enc_init_data) +{ + struct dcn20_link_encoder *enc20 = + kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL); + + if (!enc20) + return NULL; + + dcn30_link_encoder_construct(enc20, + enc_init_data, + &link_enc_feature, + &link_enc_regs[enc_init_data->transmitter], + &link_enc_aux_regs[enc_init_data->channel - 1], + &link_enc_hpd_regs[enc_init_data->hpd_source], + &le_shift, + &le_mask); + + return &enc20->enc10.base; +} + +static struct panel_cntl *dcn30_panel_cntl_create(const struct panel_cntl_init_data *init_data) +{ + struct dce_panel_cntl *panel_cntl = + kzalloc(sizeof(struct dce_panel_cntl), GFP_KERNEL); + + if (!panel_cntl) + return NULL; + + dce_panel_cntl_construct(panel_cntl, + init_data, + &panel_cntl_regs[init_data->inst], + &panel_cntl_shift, + &panel_cntl_mask); + + return &panel_cntl->base; +} + +static void read_dce_straps( + struct dc_context *ctx, + struct resource_straps *straps) +{ + generic_reg_get(ctx, mmDC_PINSTRAPS + BASE(mmDC_PINSTRAPS_BASE_IDX), + FN(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO), &straps->dc_pinstraps_audio); + +} + +static struct audio *dcn30_create_audio( + struct dc_context *ctx, unsigned int inst) +{ + return dce_audio_create(ctx, inst, + &audio_regs[inst], &audio_shift, &audio_mask); +} + +static struct vpg *dcn30_vpg_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn30_vpg *vpg3 = kzalloc(sizeof(struct dcn30_vpg), GFP_KERNEL); + + if (!vpg3) + return NULL; + + vpg3_construct(vpg3, ctx, inst, + &vpg_regs[inst], + &vpg_shift, + &vpg_mask); + + return &vpg3->base; +} + +static struct afmt *dcn30_afmt_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn30_afmt *afmt3 = kzalloc(sizeof(struct dcn30_afmt), GFP_KERNEL); + + if (!afmt3) + return NULL; + + afmt3_construct(afmt3, ctx, inst, + &afmt_regs[inst], + &afmt_shift, + &afmt_mask); + + return &afmt3->base; +} + +static struct stream_encoder *dcn30_stream_encoder_create(enum engine_id eng_id, + struct dc_context *ctx) +{ + struct dcn10_stream_encoder *enc1; + struct vpg *vpg; + struct afmt *afmt; + int vpg_inst; + int afmt_inst; + + /* Mapping of VPG, AFMT, DME register blocks to DIO block instance */ + if (eng_id <= ENGINE_ID_DIGF) { + vpg_inst = eng_id; + afmt_inst = eng_id; + } else + return NULL; + + enc1 = kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL); + vpg = dcn30_vpg_create(ctx, vpg_inst); + afmt = dcn30_afmt_create(ctx, afmt_inst); + + if (!enc1 || !vpg || !afmt) { + kfree(enc1); + kfree(vpg); + kfree(afmt); + return NULL; + } + + dcn30_dio_stream_encoder_construct(enc1, ctx, ctx->dc_bios, + eng_id, vpg, afmt, + &stream_enc_regs[eng_id], + &se_shift, &se_mask); + + return &enc1->base; +} + +static struct dce_hwseq *dcn30_hwseq_create(struct dc_context *ctx) +{ + struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL); + + if (hws) { + hws->ctx = ctx; + hws->regs = &hwseq_reg; + hws->shifts = &hwseq_shift; + hws->masks = &hwseq_mask; + } + return hws; +} +static const struct resource_create_funcs res_create_funcs = { + .read_dce_straps = read_dce_straps, + .create_audio = dcn30_create_audio, + .create_stream_encoder = dcn30_stream_encoder_create, + .create_hwseq = dcn30_hwseq_create, +}; + +static void dcn30_resource_destruct(struct dcn30_resource_pool *pool) +{ + unsigned int i; + + for (i = 0; i < pool->base.stream_enc_count; i++) { + if (pool->base.stream_enc[i] != NULL) { + if (pool->base.stream_enc[i]->vpg != NULL) { + kfree(DCN30_VPG_FROM_VPG(pool->base.stream_enc[i]->vpg)); + pool->base.stream_enc[i]->vpg = NULL; + } + if (pool->base.stream_enc[i]->afmt != NULL) { + kfree(DCN30_AFMT_FROM_AFMT(pool->base.stream_enc[i]->afmt)); + pool->base.stream_enc[i]->afmt = NULL; + } + kfree(DCN10STRENC_FROM_STRENC(pool->base.stream_enc[i])); + pool->base.stream_enc[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_dsc; i++) { + if (pool->base.dscs[i] != NULL) + dcn20_dsc_destroy(&pool->base.dscs[i]); + } + + if (pool->base.mpc != NULL) { + kfree(TO_DCN20_MPC(pool->base.mpc)); + pool->base.mpc = NULL; + } + if (pool->base.hubbub != NULL) { + kfree(pool->base.hubbub); + pool->base.hubbub = NULL; + } + for (i = 0; i < pool->base.pipe_count; i++) { + if (pool->base.dpps[i] != NULL) + dcn30_dpp_destroy(&pool->base.dpps[i]); + + if (pool->base.ipps[i] != NULL) + pool->base.ipps[i]->funcs->ipp_destroy(&pool->base.ipps[i]); + + if (pool->base.hubps[i] != NULL) { + kfree(TO_DCN20_HUBP(pool->base.hubps[i])); + pool->base.hubps[i] = NULL; + } + + if (pool->base.irqs != NULL) { + dal_irq_service_destroy(&pool->base.irqs); + } + } + + for (i = 0; i < pool->base.res_cap->num_ddc; i++) { + if (pool->base.engines[i] != NULL) + dce110_engine_destroy(&pool->base.engines[i]); + if (pool->base.hw_i2cs[i] != NULL) { + kfree(pool->base.hw_i2cs[i]); + pool->base.hw_i2cs[i] = NULL; + } + if (pool->base.sw_i2cs[i] != NULL) { + kfree(pool->base.sw_i2cs[i]); + pool->base.sw_i2cs[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_opp; i++) { + if (pool->base.opps[i] != NULL) + pool->base.opps[i]->funcs->opp_destroy(&pool->base.opps[i]); + } + + for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { + if (pool->base.timing_generators[i] != NULL) { + kfree(DCN10TG_FROM_TG(pool->base.timing_generators[i])); + pool->base.timing_generators[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_dwb; i++) { + if (pool->base.dwbc[i] != NULL) { + kfree(TO_DCN30_DWBC(pool->base.dwbc[i])); + pool->base.dwbc[i] = NULL; + } + if (pool->base.mcif_wb[i] != NULL) { + kfree(TO_DCN30_MMHUBBUB(pool->base.mcif_wb[i])); + pool->base.mcif_wb[i] = NULL; + } + } + + for (i = 0; i < pool->base.audio_count; i++) { + if (pool->base.audios[i]) + dce_aud_destroy(&pool->base.audios[i]); + } + + for (i = 0; i < pool->base.clk_src_count; i++) { + if (pool->base.clock_sources[i] != NULL) { + dcn20_clock_source_destroy(&pool->base.clock_sources[i]); + pool->base.clock_sources[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_mpc_3dlut; i++) { + if (pool->base.mpc_lut[i] != NULL) { + dc_3dlut_func_release(pool->base.mpc_lut[i]); + pool->base.mpc_lut[i] = NULL; + } + if (pool->base.mpc_shaper[i] != NULL) { + dc_transfer_func_release(pool->base.mpc_shaper[i]); + pool->base.mpc_shaper[i] = NULL; + } + } + + if (pool->base.dp_clock_source != NULL) { + dcn20_clock_source_destroy(&pool->base.dp_clock_source); + pool->base.dp_clock_source = NULL; + } + + for (i = 0; i < pool->base.pipe_count; i++) { + if (pool->base.multiple_abms[i] != NULL) + dce_abm_destroy(&pool->base.multiple_abms[i]); + } + + if (pool->base.psr != NULL) + dmub_psr_destroy(&pool->base.psr); + + if (pool->base.dccg != NULL) + dcn_dccg_destroy(&pool->base.dccg); + + if (pool->base.oem_device != NULL) { + struct dc *dc = pool->base.oem_device->ctx->dc; + + dc->link_srv->destroy_ddc_service(&pool->base.oem_device); + } +} + +static struct hubp *dcn30_hubp_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn20_hubp *hubp2 = + kzalloc(sizeof(struct dcn20_hubp), GFP_KERNEL); + + if (!hubp2) + return NULL; + + if (hubp3_construct(hubp2, ctx, inst, + &hubp_regs[inst], &hubp_shift, &hubp_mask)) + return &hubp2->base; + + BREAK_TO_DEBUGGER(); + kfree(hubp2); + return NULL; +} + +static bool dcn30_dwbc_create(struct dc_context *ctx, struct resource_pool *pool) +{ + int i; + uint32_t pipe_count = pool->res_cap->num_dwb; + + for (i = 0; i < pipe_count; i++) { + struct dcn30_dwbc *dwbc30 = kzalloc(sizeof(struct dcn30_dwbc), + GFP_KERNEL); + + if (!dwbc30) { + dm_error("DC: failed to create dwbc30!\n"); + return false; + } + + dcn30_dwbc_construct(dwbc30, ctx, + &dwbc30_regs[i], + &dwbc30_shift, + &dwbc30_mask, + i); + + pool->dwbc[i] = &dwbc30->base; + } + return true; +} + +static bool dcn30_mmhubbub_create(struct dc_context *ctx, struct resource_pool *pool) +{ + int i; + uint32_t pipe_count = pool->res_cap->num_dwb; + + for (i = 0; i < pipe_count; i++) { + struct dcn30_mmhubbub *mcif_wb30 = kzalloc(sizeof(struct dcn30_mmhubbub), + GFP_KERNEL); + + if (!mcif_wb30) { + dm_error("DC: failed to create mcif_wb30!\n"); + return false; + } + + dcn30_mmhubbub_construct(mcif_wb30, ctx, + &mcif_wb30_regs[i], + &mcif_wb30_shift, + &mcif_wb30_mask, + i); + + pool->mcif_wb[i] = &mcif_wb30->base; + } + return true; +} + +static struct display_stream_compressor *dcn30_dsc_create( + struct dc_context *ctx, uint32_t inst) +{ + struct dcn20_dsc *dsc = + kzalloc(sizeof(struct dcn20_dsc), GFP_KERNEL); + + if (!dsc) { + BREAK_TO_DEBUGGER(); + return NULL; + } + + dsc2_construct(dsc, ctx, inst, &dsc_regs[inst], &dsc_shift, &dsc_mask); + return &dsc->base; +} + +enum dc_status dcn30_add_stream_to_ctx(struct dc *dc, struct dc_state *new_ctx, struct dc_stream_state *dc_stream) +{ + + return dcn20_add_stream_to_ctx(dc, new_ctx, dc_stream); +} + +static void dcn30_destroy_resource_pool(struct resource_pool **pool) +{ + struct dcn30_resource_pool *dcn30_pool = TO_DCN30_RES_POOL(*pool); + + dcn30_resource_destruct(dcn30_pool); + kfree(dcn30_pool); + *pool = NULL; +} + +static struct clock_source *dcn30_clock_source_create( + struct dc_context *ctx, + struct dc_bios *bios, + enum clock_source_id id, + const struct dce110_clk_src_regs *regs, + bool dp_clk_src) +{ + struct dce110_clk_src *clk_src = + kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL); + + if (!clk_src) + return NULL; + + if (dcn3_clk_src_construct(clk_src, ctx, bios, id, + regs, &cs_shift, &cs_mask)) { + clk_src->base.dp_clk_src = dp_clk_src; + return &clk_src->base; + } + + kfree(clk_src); + BREAK_TO_DEBUGGER(); + return NULL; +} + +int dcn30_populate_dml_pipes_from_context( + struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + bool fast_validate) +{ + int i, pipe_cnt; + struct resource_context *res_ctx = &context->res_ctx; + + DC_FP_START(); + dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); + DC_FP_END(); + + for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { + if (!res_ctx->pipe_ctx[i].stream) + continue; + + pipes[pipe_cnt++].pipe.scale_ratio_depth.lb_depth = + dm_lb_16; + } + + return pipe_cnt; +} + +void dcn30_populate_dml_writeback_from_context( + struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes) +{ + DC_FP_START(); + dcn30_fpu_populate_dml_writeback_from_context(dc, res_ctx, pipes); + DC_FP_END(); +} + +unsigned int dcn30_calc_max_scaled_time( + unsigned int time_per_pixel, + enum mmhubbub_wbif_mode mode, + unsigned int urgent_watermark) +{ + unsigned int time_per_byte = 0; + unsigned int total_free_entry = 0xb40; + unsigned int buf_lh_capability; + unsigned int max_scaled_time; + + if (mode == PACKED_444) /* packed mode 32 bpp */ + time_per_byte = time_per_pixel/4; + else if (mode == PACKED_444_FP16) /* packed mode 64 bpp */ + time_per_byte = time_per_pixel/8; + + if (time_per_byte == 0) + time_per_byte = 1; + + buf_lh_capability = (total_free_entry*time_per_byte*32) >> 6; /* time_per_byte is in u6.6*/ + max_scaled_time = buf_lh_capability - urgent_watermark; + return max_scaled_time; +} + +void dcn30_set_mcif_arb_params( + struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt) +{ + enum mmhubbub_wbif_mode wbif_mode; + struct display_mode_lib *dml = &context->bw_ctx.dml; + struct mcif_arb_params *wb_arb_params; + int i, j, dwb_pipe; + + /* Writeback MCIF_WB arbitration parameters */ + dwb_pipe = 0; + for (i = 0; i < dc->res_pool->pipe_count; i++) { + + if (!context->res_ctx.pipe_ctx[i].stream) + continue; + + for (j = 0; j < MAX_DWB_PIPES; j++) { + struct dc_writeback_info *writeback_info = &context->res_ctx.pipe_ctx[i].stream->writeback_info[j]; + + if (writeback_info->wb_enabled == false) + continue; + + //wb_arb_params = &context->res_ctx.pipe_ctx[i].stream->writeback_info[j].mcif_arb_params; + wb_arb_params = &context->bw_ctx.bw.dcn.bw_writeback.mcif_wb_arb[dwb_pipe]; + + if (writeback_info->dwb_params.cnv_params.fc_out_format == DWB_OUT_FORMAT_64BPP_ARGB || + writeback_info->dwb_params.cnv_params.fc_out_format == DWB_OUT_FORMAT_64BPP_RGBA) + wbif_mode = PACKED_444_FP16; + else + wbif_mode = PACKED_444; + + DC_FP_START(); + dcn30_fpu_set_mcif_arb_params(wb_arb_params, dml, pipes, pipe_cnt, j); + DC_FP_END(); + wb_arb_params->time_per_pixel = (1000000 << 6) / context->res_ctx.pipe_ctx[i].stream->phy_pix_clk; /* time_per_pixel should be in u6.6 format */ + wb_arb_params->slice_lines = 32; + wb_arb_params->arbitration_slice = 2; /* irrelevant since there is no YUV output */ + wb_arb_params->max_scaled_time = dcn30_calc_max_scaled_time(wb_arb_params->time_per_pixel, + wbif_mode, + wb_arb_params->cli_watermark[0]); /* assume 4 watermark sets have the same value */ + + dwb_pipe++; + + if (dwb_pipe >= MAX_DWB_PIPES) + return; + } + if (dwb_pipe >= MAX_DWB_PIPES) + return; + } + +} + +static struct dc_cap_funcs cap_funcs = { + .get_dcc_compression_cap = dcn20_get_dcc_compression_cap +}; + +bool dcn30_acquire_post_bldn_3dlut( + struct resource_context *res_ctx, + const struct resource_pool *pool, + int mpcc_id, + struct dc_3dlut **lut, + struct dc_transfer_func **shaper) +{ + int i; + bool ret = false; + union dc_3dlut_state *state; + + ASSERT(*lut == NULL && *shaper == NULL); + *lut = NULL; + *shaper = NULL; + + for (i = 0; i < pool->res_cap->num_mpc_3dlut; i++) { + if (!res_ctx->is_mpc_3dlut_acquired[i]) { + *lut = pool->mpc_lut[i]; + *shaper = pool->mpc_shaper[i]; + state = &pool->mpc_lut[i]->state; + res_ctx->is_mpc_3dlut_acquired[i] = true; + state->bits.rmu_idx_valid = 1; + state->bits.rmu_mux_num = i; + if (state->bits.rmu_mux_num == 0) + state->bits.mpc_rmu0_mux = mpcc_id; + else if (state->bits.rmu_mux_num == 1) + state->bits.mpc_rmu1_mux = mpcc_id; + else if (state->bits.rmu_mux_num == 2) + state->bits.mpc_rmu2_mux = mpcc_id; + ret = true; + break; + } + } + return ret; +} + +bool dcn30_release_post_bldn_3dlut( + struct resource_context *res_ctx, + const struct resource_pool *pool, + struct dc_3dlut **lut, + struct dc_transfer_func **shaper) +{ + int i; + bool ret = false; + + for (i = 0; i < pool->res_cap->num_mpc_3dlut; i++) { + if (pool->mpc_lut[i] == *lut && pool->mpc_shaper[i] == *shaper) { + res_ctx->is_mpc_3dlut_acquired[i] = false; + pool->mpc_lut[i]->state.raw = 0; + *lut = NULL; + *shaper = NULL; + ret = true; + break; + } + } + return ret; +} + +static bool is_soc_bounding_box_valid(struct dc *dc) +{ + uint32_t hw_internal_rev = dc->ctx->asic_id.hw_internal_rev; + + if (ASICREV_IS_SIENNA_CICHLID_P(hw_internal_rev)) + return true; + + return false; +} + +static bool init_soc_bounding_box(struct dc *dc, + struct dcn30_resource_pool *pool) +{ + struct _vcs_dpi_soc_bounding_box_st *loaded_bb = &dcn3_0_soc; + struct _vcs_dpi_ip_params_st *loaded_ip = &dcn3_0_ip; + + DC_LOGGER_INIT(dc->ctx->logger); + + if (!is_soc_bounding_box_valid(dc)) { + DC_LOG_ERROR("%s: not valid soc bounding box\n", __func__); + return false; + } + + loaded_ip->max_num_otg = pool->base.res_cap->num_timing_generator; + loaded_ip->max_num_dpp = pool->base.pipe_count; + loaded_ip->clamp_min_dcfclk = dc->config.clamp_min_dcfclk; + dcn20_patch_bounding_box(dc, loaded_bb); + DC_FP_START(); + patch_dcn30_soc_bounding_box(dc, &dcn3_0_soc); + DC_FP_END(); + + return true; +} + +static bool dcn30_split_stream_for_mpc_or_odm( + const struct dc *dc, + struct resource_context *res_ctx, + struct pipe_ctx *pri_pipe, + struct pipe_ctx *sec_pipe, + bool odm) +{ + int pipe_idx = sec_pipe->pipe_idx; + const struct resource_pool *pool = dc->res_pool; + + *sec_pipe = *pri_pipe; + + sec_pipe->pipe_idx = pipe_idx; + sec_pipe->plane_res.mi = pool->mis[pipe_idx]; + sec_pipe->plane_res.hubp = pool->hubps[pipe_idx]; + sec_pipe->plane_res.ipp = pool->ipps[pipe_idx]; + sec_pipe->plane_res.xfm = pool->transforms[pipe_idx]; + sec_pipe->plane_res.dpp = pool->dpps[pipe_idx]; + sec_pipe->plane_res.mpcc_inst = pool->dpps[pipe_idx]->inst; + sec_pipe->stream_res.dsc = NULL; + if (odm) { + if (pri_pipe->next_odm_pipe) { + ASSERT(pri_pipe->next_odm_pipe != sec_pipe); + sec_pipe->next_odm_pipe = pri_pipe->next_odm_pipe; + sec_pipe->next_odm_pipe->prev_odm_pipe = sec_pipe; + } + if (pri_pipe->top_pipe && pri_pipe->top_pipe->next_odm_pipe) { + pri_pipe->top_pipe->next_odm_pipe->bottom_pipe = sec_pipe; + sec_pipe->top_pipe = pri_pipe->top_pipe->next_odm_pipe; + } + if (pri_pipe->bottom_pipe && pri_pipe->bottom_pipe->next_odm_pipe) { + pri_pipe->bottom_pipe->next_odm_pipe->top_pipe = sec_pipe; + sec_pipe->bottom_pipe = pri_pipe->bottom_pipe->next_odm_pipe; + } + pri_pipe->next_odm_pipe = sec_pipe; + sec_pipe->prev_odm_pipe = pri_pipe; + + if (!sec_pipe->top_pipe) + sec_pipe->stream_res.opp = pool->opps[pipe_idx]; + else + sec_pipe->stream_res.opp = sec_pipe->top_pipe->stream_res.opp; + if (sec_pipe->stream->timing.flags.DSC == 1) { + dcn20_acquire_dsc(dc, res_ctx, &sec_pipe->stream_res.dsc, pipe_idx); + ASSERT(sec_pipe->stream_res.dsc); + if (sec_pipe->stream_res.dsc == NULL) + return false; + } + } else { + if (pri_pipe->bottom_pipe) { + ASSERT(pri_pipe->bottom_pipe != sec_pipe); + sec_pipe->bottom_pipe = pri_pipe->bottom_pipe; + sec_pipe->bottom_pipe->top_pipe = sec_pipe; + } + pri_pipe->bottom_pipe = sec_pipe; + sec_pipe->top_pipe = pri_pipe; + + ASSERT(pri_pipe->plane_state); + } + + return true; +} + +static struct pipe_ctx *dcn30_find_split_pipe( + struct dc *dc, + struct dc_state *context, + int old_index) +{ + struct pipe_ctx *pipe = NULL; + int i; + + if (old_index >= 0 && context->res_ctx.pipe_ctx[old_index].stream == NULL) { + pipe = &context->res_ctx.pipe_ctx[old_index]; + pipe->pipe_idx = old_index; + } + + if (!pipe) + for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) { + if (dc->current_state->res_ctx.pipe_ctx[i].top_pipe == NULL + && dc->current_state->res_ctx.pipe_ctx[i].prev_odm_pipe == NULL) { + if (context->res_ctx.pipe_ctx[i].stream == NULL) { + pipe = &context->res_ctx.pipe_ctx[i]; + pipe->pipe_idx = i; + break; + } + } + } + + /* + * May need to fix pipes getting tossed from 1 opp to another on flip + * Add for debugging transient underflow during topology updates: + * ASSERT(pipe); + */ + if (!pipe) + for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) { + if (context->res_ctx.pipe_ctx[i].stream == NULL) { + pipe = &context->res_ctx.pipe_ctx[i]; + pipe->pipe_idx = i; + break; + } + } + + return pipe; +} + +noinline bool dcn30_internal_validate_bw( + struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int *pipe_cnt_out, + int *vlevel_out, + bool fast_validate, + bool allow_self_refresh_only) +{ + bool out = false; + bool repopulate_pipes = false; + int split[MAX_PIPES] = { 0 }; + bool merge[MAX_PIPES] = { false }; + bool newly_split[MAX_PIPES] = { false }; + int pipe_cnt, i, pipe_idx, vlevel; + struct vba_vars_st *vba = &context->bw_ctx.dml.vba; + + ASSERT(pipes); + if (!pipes) + return false; + + context->bw_ctx.dml.vba.maxMpcComb = 0; + context->bw_ctx.dml.vba.VoltageLevel = 0; + context->bw_ctx.dml.vba.DRAMClockChangeSupport[0][0] = dm_dram_clock_change_vactive; + dc->res_pool->funcs->update_soc_for_wm_a(dc, context); + pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate); + + if (!pipe_cnt) { + out = true; + goto validate_out; + } + + dml_log_pipe_params(&context->bw_ctx.dml, pipes, pipe_cnt); + + if (!fast_validate || !allow_self_refresh_only) { + /* + * DML favors voltage over p-state, but we're more interested in + * supporting p-state over voltage. We can't support p-state in + * prefetch mode > 0 so try capping the prefetch mode to start. + */ + context->bw_ctx.dml.soc.allow_dram_self_refresh_or_dram_clock_change_in_vblank = + dm_allow_self_refresh_and_mclk_switch; + vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt); + /* This may adjust vlevel and maxMpcComb */ + if (vlevel < context->bw_ctx.dml.soc.num_states) + vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, merge); + } + if (allow_self_refresh_only && + (fast_validate || vlevel == context->bw_ctx.dml.soc.num_states || + vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported)) { + /* + * If mode is unsupported or there's still no p-state support + * then fall back to favoring voltage. + * + * We don't actually support prefetch mode 2, so require that we + * at least support prefetch mode 1. + */ + context->bw_ctx.dml.soc.allow_dram_self_refresh_or_dram_clock_change_in_vblank = + dm_allow_self_refresh; + + vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt); + if (vlevel < context->bw_ctx.dml.soc.num_states) { + memset(split, 0, sizeof(split)); + memset(merge, 0, sizeof(merge)); + vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, merge); + } + } + + dml_log_mode_support_params(&context->bw_ctx.dml); + + if (vlevel == context->bw_ctx.dml.soc.num_states) + goto validate_fail; + + if (!dc->config.enable_windowed_mpo_odm) { + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + struct pipe_ctx *mpo_pipe = pipe->bottom_pipe; + + if (!pipe->stream) + continue; + + /* We only support full screen mpo with ODM */ + if (vba->ODMCombineEnabled[vba->pipe_plane[pipe_idx]] != dm_odm_combine_mode_disabled + && pipe->plane_state && mpo_pipe + && memcmp(&mpo_pipe->plane_state->clip_rect, + &pipe->stream->src, + sizeof(struct rect)) != 0) { + ASSERT(mpo_pipe->plane_state != pipe->plane_state); + goto validate_fail; + } + pipe_idx++; + } + } + + /* merge pipes if necessary */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + /*skip pipes that don't need merging*/ + if (!merge[i]) + continue; + + /* if ODM merge we ignore mpc tree, mpo pipes will have their own flags */ + if (pipe->prev_odm_pipe) { + /*split off odm pipe*/ + pipe->prev_odm_pipe->next_odm_pipe = pipe->next_odm_pipe; + if (pipe->next_odm_pipe) + pipe->next_odm_pipe->prev_odm_pipe = pipe->prev_odm_pipe; + + pipe->bottom_pipe = NULL; + pipe->next_odm_pipe = NULL; + pipe->plane_state = NULL; + pipe->stream = NULL; + pipe->top_pipe = NULL; + pipe->prev_odm_pipe = NULL; + if (pipe->stream_res.dsc) + dcn20_release_dsc(&context->res_ctx, dc->res_pool, &pipe->stream_res.dsc); + memset(&pipe->plane_res, 0, sizeof(pipe->plane_res)); + memset(&pipe->stream_res, 0, sizeof(pipe->stream_res)); + repopulate_pipes = true; + } else if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state) { + struct pipe_ctx *top_pipe = pipe->top_pipe; + struct pipe_ctx *bottom_pipe = pipe->bottom_pipe; + + top_pipe->bottom_pipe = bottom_pipe; + if (bottom_pipe) + bottom_pipe->top_pipe = top_pipe; + + pipe->top_pipe = NULL; + pipe->bottom_pipe = NULL; + pipe->plane_state = NULL; + pipe->stream = NULL; + memset(&pipe->plane_res, 0, sizeof(pipe->plane_res)); + memset(&pipe->stream_res, 0, sizeof(pipe->stream_res)); + repopulate_pipes = true; + } else + ASSERT(0); /* Should never try to merge master pipe */ + + } + + for (i = 0, pipe_idx = -1; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + struct pipe_ctx *hsplit_pipe = NULL; + bool odm; + int old_index = -1; + + if (!pipe->stream || newly_split[i]) + continue; + + pipe_idx++; + odm = vba->ODMCombineEnabled[vba->pipe_plane[pipe_idx]] != dm_odm_combine_mode_disabled; + + if (!pipe->plane_state && !odm) + continue; + + if (split[i]) { + if (odm) { + if (split[i] == 4 && old_pipe->next_odm_pipe && old_pipe->next_odm_pipe->next_odm_pipe) + old_index = old_pipe->next_odm_pipe->next_odm_pipe->pipe_idx; + else if (old_pipe->next_odm_pipe) + old_index = old_pipe->next_odm_pipe->pipe_idx; + } else { + if (split[i] == 4 && old_pipe->bottom_pipe && old_pipe->bottom_pipe->bottom_pipe && + old_pipe->bottom_pipe->bottom_pipe->plane_state == old_pipe->plane_state) + old_index = old_pipe->bottom_pipe->bottom_pipe->pipe_idx; + else if (old_pipe->bottom_pipe && + old_pipe->bottom_pipe->plane_state == old_pipe->plane_state) + old_index = old_pipe->bottom_pipe->pipe_idx; + } + hsplit_pipe = dcn30_find_split_pipe(dc, context, old_index); + ASSERT(hsplit_pipe); + if (!hsplit_pipe) + goto validate_fail; + + if (!dcn30_split_stream_for_mpc_or_odm( + dc, &context->res_ctx, + pipe, hsplit_pipe, odm)) + goto validate_fail; + + newly_split[hsplit_pipe->pipe_idx] = true; + repopulate_pipes = true; + } + if (split[i] == 4) { + struct pipe_ctx *pipe_4to1; + + if (odm && old_pipe->next_odm_pipe) + old_index = old_pipe->next_odm_pipe->pipe_idx; + else if (!odm && old_pipe->bottom_pipe && + old_pipe->bottom_pipe->plane_state == old_pipe->plane_state) + old_index = old_pipe->bottom_pipe->pipe_idx; + else + old_index = -1; + pipe_4to1 = dcn30_find_split_pipe(dc, context, old_index); + ASSERT(pipe_4to1); + if (!pipe_4to1) + goto validate_fail; + if (!dcn30_split_stream_for_mpc_or_odm( + dc, &context->res_ctx, + pipe, pipe_4to1, odm)) + goto validate_fail; + newly_split[pipe_4to1->pipe_idx] = true; + + if (odm && old_pipe->next_odm_pipe && old_pipe->next_odm_pipe->next_odm_pipe + && old_pipe->next_odm_pipe->next_odm_pipe->next_odm_pipe) + old_index = old_pipe->next_odm_pipe->next_odm_pipe->next_odm_pipe->pipe_idx; + else if (!odm && old_pipe->bottom_pipe && old_pipe->bottom_pipe->bottom_pipe && + old_pipe->bottom_pipe->bottom_pipe->bottom_pipe && + old_pipe->bottom_pipe->bottom_pipe->bottom_pipe->plane_state == old_pipe->plane_state) + old_index = old_pipe->bottom_pipe->bottom_pipe->bottom_pipe->pipe_idx; + else + old_index = -1; + pipe_4to1 = dcn30_find_split_pipe(dc, context, old_index); + ASSERT(pipe_4to1); + if (!pipe_4to1) + goto validate_fail; + if (!dcn30_split_stream_for_mpc_or_odm( + dc, &context->res_ctx, + hsplit_pipe, pipe_4to1, odm)) + goto validate_fail; + newly_split[pipe_4to1->pipe_idx] = true; + } + if (odm) + dcn20_build_mapped_resource(dc, context, pipe->stream); + } + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (pipe->plane_state) { + if (!resource_build_scaling_params(pipe)) + goto validate_fail; + } + } + + /* Actual dsc count per stream dsc validation*/ + if (!dcn20_validate_dsc(dc, context)) { + vba->ValidationStatus[vba->soc.num_states] = DML_FAIL_DSC_VALIDATION_FAILURE; + goto validate_fail; + } + + if (repopulate_pipes) + pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate); + context->bw_ctx.dml.vba.VoltageLevel = vlevel; + *vlevel_out = vlevel; + *pipe_cnt_out = pipe_cnt; + + out = true; + goto validate_out; + +validate_fail: + out = false; + +validate_out: + return out; +} + +static int get_refresh_rate(struct dc_state *context) +{ + int refresh_rate = 0; + int h_v_total = 0; + struct dc_crtc_timing *timing = NULL; + + if (context == NULL || context->streams[0] == NULL) + return 0; + + /* check if refresh rate at least 120hz */ + timing = &context->streams[0]->timing; + if (timing == NULL) + return 0; + + h_v_total = timing->h_total * timing->v_total; + if (h_v_total == 0) + return 0; + + refresh_rate = ((timing->pix_clk_100hz * 100) / (h_v_total)) + 1; + return refresh_rate; +} + +#define MAX_STRETCHED_V_BLANK 500 // in micro-seconds +/* + * Scaling factor for v_blank stretch calculations considering timing in + * micro-seconds and pixel clock in 100hz. + * Note: the parenthesis are necessary to ensure the correct order of + * operation where V_SCALE is used. + */ +#define V_SCALE (10000 / MAX_STRETCHED_V_BLANK) + +static int get_frame_rate_at_max_stretch_100hz(struct dc_state *context) +{ + struct dc_crtc_timing *timing = NULL; + uint32_t sec_per_100_lines; + uint32_t max_v_blank; + uint32_t curr_v_blank; + uint32_t v_stretch_max; + uint32_t stretched_frame_pix_cnt; + uint32_t scaled_stretched_frame_pix_cnt; + uint32_t scaled_refresh_rate; + + if (context == NULL || context->streams[0] == NULL) + return 0; + + /* check if refresh rate at least 120hz */ + timing = &context->streams[0]->timing; + if (timing == NULL) + return 0; + + sec_per_100_lines = timing->pix_clk_100hz / timing->h_total + 1; + max_v_blank = sec_per_100_lines / V_SCALE + 1; + curr_v_blank = timing->v_total - timing->v_addressable; + v_stretch_max = (max_v_blank > curr_v_blank) ? (max_v_blank - curr_v_blank) : (0); + stretched_frame_pix_cnt = (v_stretch_max + timing->v_total) * timing->h_total; + scaled_stretched_frame_pix_cnt = stretched_frame_pix_cnt / 10000; + scaled_refresh_rate = (timing->pix_clk_100hz) / scaled_stretched_frame_pix_cnt + 1; + + return scaled_refresh_rate; +} + +static bool is_refresh_rate_support_mclk_switch_using_fw_based_vblank_stretch(struct dc_state *context) +{ + int refresh_rate_max_stretch_100hz; + int min_refresh_100hz; + + if (context == NULL || context->streams[0] == NULL) + return false; + + refresh_rate_max_stretch_100hz = get_frame_rate_at_max_stretch_100hz(context); + min_refresh_100hz = context->streams[0]->timing.min_refresh_in_uhz / 10000; + + if (refresh_rate_max_stretch_100hz < min_refresh_100hz) + return false; + + return true; +} + +bool dcn30_can_support_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, struct dc_state *context) +{ + int refresh_rate = 0; + const int minimum_refreshrate_supported = 120; + + if (context == NULL || context->streams[0] == NULL) + return false; + + if (context->streams[0]->sink->edid_caps.panel_patch.disable_fams) + return false; + + if (dc->debug.disable_fams) + return false; + + if (!dc->caps.dmub_caps.mclk_sw) + return false; + + if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching_shut_down) + return false; + + /* more then 1 monitor connected */ + if (context->stream_count != 1) + return false; + + refresh_rate = get_refresh_rate(context); + if (refresh_rate < minimum_refreshrate_supported) + return false; + + if (!is_refresh_rate_support_mclk_switch_using_fw_based_vblank_stretch(context)) + return false; + + if (!context->streams[0]->allow_freesync) + return false; + + if (context->streams[0]->vrr_active_variable && dc->debug.disable_fams_gaming) + return false; + + context->streams[0]->fpo_in_use = true; + + return true; +} + +/* + * set up FPO watermarks, pstate, dram latency + */ +void dcn30_setup_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, struct dc_state *context) +{ + ASSERT(dc != NULL && context != NULL); + if (dc == NULL || context == NULL) + return; + + /* Set wm_a.pstate so high natural MCLK switches are impossible: 4 seconds */ + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 4U * 1000U * 1000U * 1000U; +} + +void dcn30_update_soc_for_wm_a(struct dc *dc, struct dc_state *context) +{ + DC_FP_START(); + dcn30_fpu_update_soc_for_wm_a(dc, context); + DC_FP_END(); +} + +void dcn30_calculate_wm_and_dlg( + struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel) +{ + DC_FP_START(); + dcn30_fpu_calculate_wm_and_dlg(dc, context, pipes, pipe_cnt, vlevel); + DC_FP_END(); +} + +bool dcn30_validate_bandwidth(struct dc *dc, + struct dc_state *context, + bool fast_validate) +{ + bool out = false; + + BW_VAL_TRACE_SETUP(); + + int vlevel = 0; + int pipe_cnt = 0; + display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_KERNEL); + DC_LOGGER_INIT(dc->ctx->logger); + + BW_VAL_TRACE_COUNT(); + + DC_FP_START(); + out = dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate, true); + DC_FP_END(); + + if (pipe_cnt == 0) + goto validate_out; + + if (!out) + goto validate_fail; + + BW_VAL_TRACE_END_VOLTAGE_LEVEL(); + + if (fast_validate) { + BW_VAL_TRACE_SKIP(fast); + goto validate_out; + } + + DC_FP_START(); + if (dc->res_pool->funcs->calculate_wm_and_dlg) + dc->res_pool->funcs->calculate_wm_and_dlg(dc, context, pipes, pipe_cnt, vlevel); + DC_FP_END(); + + BW_VAL_TRACE_END_WATERMARKS(); + + goto validate_out; + +validate_fail: + DC_LOG_WARNING("Mode Validation Warning: %s failed validation.\n", + dml_get_status_message(context->bw_ctx.dml.vba.ValidationStatus[context->bw_ctx.dml.vba.soc.num_states])); + + BW_VAL_TRACE_SKIP(fail); + out = false; + +validate_out: + kfree(pipes); + + BW_VAL_TRACE_FINISH(); + + return out; +} + +void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) +{ + unsigned int i, j; + unsigned int num_states = 0; + + unsigned int dcfclk_mhz[DC__VOLTAGE_STATES] = {0}; + unsigned int dram_speed_mts[DC__VOLTAGE_STATES] = {0}; + unsigned int optimal_uclk_for_dcfclk_sta_targets[DC__VOLTAGE_STATES] = {0}; + unsigned int optimal_dcfclk_for_uclk[DC__VOLTAGE_STATES] = {0}; + + unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {694, 875, 1000, 1200}; + unsigned int num_dcfclk_sta_targets = 4; + unsigned int num_uclk_states; + + struct dc_bounding_box_max_clk dcn30_bb_max_clk; + + memset(&dcn30_bb_max_clk, 0, sizeof(dcn30_bb_max_clk)); + + if (dc->ctx->dc_bios->vram_info.num_chans) + dcn3_0_soc.num_chans = dc->ctx->dc_bios->vram_info.num_chans; + + DC_FP_START(); + dcn30_fpu_update_dram_channel_width_bytes(dc); + DC_FP_END(); + + if (bw_params->clk_table.entries[0].memclk_mhz) { + + for (i = 0; i < MAX_NUM_DPM_LVL; i++) { + if (bw_params->clk_table.entries[i].dcfclk_mhz > dcn30_bb_max_clk.max_dcfclk_mhz) + dcn30_bb_max_clk.max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz; + if (bw_params->clk_table.entries[i].dispclk_mhz > dcn30_bb_max_clk.max_dispclk_mhz) + dcn30_bb_max_clk.max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz; + if (bw_params->clk_table.entries[i].dppclk_mhz > dcn30_bb_max_clk.max_dppclk_mhz) + dcn30_bb_max_clk.max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz; + if (bw_params->clk_table.entries[i].phyclk_mhz > dcn30_bb_max_clk.max_phyclk_mhz) + dcn30_bb_max_clk.max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz; + } + + DC_FP_START(); + dcn30_fpu_update_max_clk(&dcn30_bb_max_clk); + DC_FP_END(); + + if (dcn30_bb_max_clk.max_dcfclk_mhz > dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { + // If max DCFCLK is greater than the max DCFCLK STA target, insert into the DCFCLK STA target array + dcfclk_sta_targets[num_dcfclk_sta_targets] = dcn30_bb_max_clk.max_dcfclk_mhz; + num_dcfclk_sta_targets++; + } else if (dcn30_bb_max_clk.max_dcfclk_mhz < dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { + // If max DCFCLK is less than the max DCFCLK STA target, cap values and remove duplicates + for (i = 0; i < num_dcfclk_sta_targets; i++) { + if (dcfclk_sta_targets[i] > dcn30_bb_max_clk.max_dcfclk_mhz) { + dcfclk_sta_targets[i] = dcn30_bb_max_clk.max_dcfclk_mhz; + break; + } + } + // Update size of array since we "removed" duplicates + num_dcfclk_sta_targets = i + 1; + } + + num_uclk_states = bw_params->clk_table.num_entries; + + // Calculate optimal dcfclk for each uclk + for (i = 0; i < num_uclk_states; i++) { + DC_FP_START(); + dcn30_fpu_get_optimal_dcfclk_fclk_for_uclk(bw_params->clk_table.entries[i].memclk_mhz * 16, + &optimal_dcfclk_for_uclk[i], NULL); + DC_FP_END(); + if (optimal_dcfclk_for_uclk[i] < bw_params->clk_table.entries[0].dcfclk_mhz) { + optimal_dcfclk_for_uclk[i] = bw_params->clk_table.entries[0].dcfclk_mhz; + } + } + + // Calculate optimal uclk for each dcfclk sta target + for (i = 0; i < num_dcfclk_sta_targets; i++) { + for (j = 0; j < num_uclk_states; j++) { + if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) { + optimal_uclk_for_dcfclk_sta_targets[i] = + bw_params->clk_table.entries[j].memclk_mhz * 16; + break; + } + } + } + + i = 0; + j = 0; + // create the final dcfclk and uclk table + while (i < num_dcfclk_sta_targets && j < num_uclk_states && num_states < DC__VOLTAGE_STATES) { + if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j] && i < num_dcfclk_sta_targets) { + dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; + dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; + } else { + if (j < num_uclk_states && optimal_dcfclk_for_uclk[j] <= dcn30_bb_max_clk.max_dcfclk_mhz) { + dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j]; + dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16; + } else { + j = num_uclk_states; + } + } + } + + while (i < num_dcfclk_sta_targets && num_states < DC__VOLTAGE_STATES) { + dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; + dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; + } + + while (j < num_uclk_states && num_states < DC__VOLTAGE_STATES && + optimal_dcfclk_for_uclk[j] <= dcn30_bb_max_clk.max_dcfclk_mhz) { + dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j]; + dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16; + } + + dcn3_0_soc.num_states = num_states; + DC_FP_START(); + dcn30_fpu_update_bw_bounding_box(dc, bw_params, &dcn30_bb_max_clk, dcfclk_mhz, dram_speed_mts); + DC_FP_END(); + } +} + +static void dcn30_get_panel_config_defaults(struct dc_panel_config *panel_config) +{ + *panel_config = panel_config_defaults; +} + +static const struct resource_funcs dcn30_res_pool_funcs = { + .destroy = dcn30_destroy_resource_pool, + .link_enc_create = dcn30_link_encoder_create, + .panel_cntl_create = dcn30_panel_cntl_create, + .validate_bandwidth = dcn30_validate_bandwidth, + .calculate_wm_and_dlg = dcn30_calculate_wm_and_dlg, + .update_soc_for_wm_a = dcn30_update_soc_for_wm_a, + .populate_dml_pipes = dcn30_populate_dml_pipes_from_context, + .acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer, + .release_pipe = dcn20_release_pipe, + .add_stream_to_ctx = dcn30_add_stream_to_ctx, + .add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource, + .remove_stream_from_ctx = dcn20_remove_stream_from_ctx, + .populate_dml_writeback_from_context = dcn30_populate_dml_writeback_from_context, + .set_mcif_arb_params = dcn30_set_mcif_arb_params, + .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link, + .acquire_post_bldn_3dlut = dcn30_acquire_post_bldn_3dlut, + .release_post_bldn_3dlut = dcn30_release_post_bldn_3dlut, + .update_bw_bounding_box = dcn30_update_bw_bounding_box, + .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, + .get_panel_config_defaults = dcn30_get_panel_config_defaults, +}; + +#define CTX ctx + +#define REG(reg_name) \ + (DCN_BASE.instance[0].segment[mm ## reg_name ## _BASE_IDX] + mm ## reg_name) + +static uint32_t read_pipe_fuses(struct dc_context *ctx) +{ + uint32_t value = REG_READ(CC_DC_PIPE_DIS); + /* Support for max 6 pipes */ + value = value & 0x3f; + return value; +} + +static bool dcn30_resource_construct( + uint8_t num_virtual_links, + struct dc *dc, + struct dcn30_resource_pool *pool) +{ + int i; + struct dc_context *ctx = dc->ctx; + struct irq_service_init_data init_data; + struct ddc_service_init_data ddc_init_data = {0}; + uint32_t pipe_fuses = read_pipe_fuses(ctx); + uint32_t num_pipes = 0; + + if (!(pipe_fuses == 0 || pipe_fuses == 0x3e)) { + BREAK_TO_DEBUGGER(); + dm_error("DC: Unexpected fuse recipe for navi2x !\n"); + /* fault to single pipe */ + pipe_fuses = 0x3e; + } + + DC_FP_START(); + + ctx->dc_bios->regs = &bios_regs; + + pool->base.res_cap = &res_cap_dcn3; + + pool->base.funcs = &dcn30_res_pool_funcs; + + /************************************************* + * Resource + asic cap harcoding * + *************************************************/ + pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE; + pool->base.pipe_count = pool->base.res_cap->num_timing_generator; + pool->base.mpcc_count = pool->base.res_cap->num_timing_generator; + dc->caps.max_downscale_ratio = 600; + dc->caps.i2c_speed_in_khz = 100; + dc->caps.i2c_speed_in_khz_hdcp = 100; /*1.4 w/a not applied by default*/ + dc->caps.max_cursor_size = 256; + dc->caps.min_horizontal_blanking_period = 80; + dc->caps.dmdata_alloc_size = 2048; + dc->caps.mall_size_per_mem_channel = 8; + /* total size = mall per channel * num channels * 1024 * 1024 */ + dc->caps.mall_size_total = dc->caps.mall_size_per_mem_channel * dc->ctx->dc_bios->vram_info.num_chans * 1048576; + dc->caps.cursor_cache_size = dc->caps.max_cursor_size * dc->caps.max_cursor_size * 8; + + dc->caps.max_slave_planes = 2; + dc->caps.max_slave_yuv_planes = 2; + dc->caps.max_slave_rgb_planes = 2; + dc->caps.post_blend_color_processing = true; + dc->caps.force_dp_tps4_for_cp2520 = true; + dc->caps.extended_aux_timeout_support = true; + dc->caps.dmcub_support = true; + + /* Color pipeline capabilities */ + dc->caps.color.dpp.dcn_arch = 1; + dc->caps.color.dpp.input_lut_shared = 0; + dc->caps.color.dpp.icsc = 1; + dc->caps.color.dpp.dgam_ram = 0; // must use gamma_corr + dc->caps.color.dpp.dgam_rom_caps.srgb = 1; + dc->caps.color.dpp.dgam_rom_caps.bt2020 = 1; + dc->caps.color.dpp.dgam_rom_caps.gamma2_2 = 1; + dc->caps.color.dpp.dgam_rom_caps.pq = 1; + dc->caps.color.dpp.dgam_rom_caps.hlg = 1; + dc->caps.color.dpp.post_csc = 1; + dc->caps.color.dpp.gamma_corr = 1; + dc->caps.color.dpp.dgam_rom_for_yuv = 0; + + dc->caps.color.dpp.hw_3d_lut = 1; + dc->caps.color.dpp.ogam_ram = 1; + // no OGAM ROM on DCN3 + dc->caps.color.dpp.ogam_rom_caps.srgb = 0; + dc->caps.color.dpp.ogam_rom_caps.bt2020 = 0; + dc->caps.color.dpp.ogam_rom_caps.gamma2_2 = 0; + dc->caps.color.dpp.ogam_rom_caps.pq = 0; + dc->caps.color.dpp.ogam_rom_caps.hlg = 0; + dc->caps.color.dpp.ocsc = 0; + + dc->caps.color.mpc.gamut_remap = 1; + dc->caps.color.mpc.num_3dluts = pool->base.res_cap->num_mpc_3dlut; //3 + dc->caps.color.mpc.ogam_ram = 1; + dc->caps.color.mpc.ogam_rom_caps.srgb = 0; + dc->caps.color.mpc.ogam_rom_caps.bt2020 = 0; + dc->caps.color.mpc.ogam_rom_caps.gamma2_2 = 0; + dc->caps.color.mpc.ogam_rom_caps.pq = 0; + dc->caps.color.mpc.ogam_rom_caps.hlg = 0; + dc->caps.color.mpc.ocsc = 1; + + dc->caps.dp_hdmi21_pcon_support = true; + dc->caps.max_v_total = (1 << 15) - 1; + + /* read VBIOS LTTPR caps */ + { + if (ctx->dc_bios->funcs->get_lttpr_caps) { + enum bp_result bp_query_result; + uint8_t is_vbios_lttpr_enable = 0; + + bp_query_result = ctx->dc_bios->funcs->get_lttpr_caps(ctx->dc_bios, &is_vbios_lttpr_enable); + dc->caps.vbios_lttpr_enable = (bp_query_result == BP_RESULT_OK) && !!is_vbios_lttpr_enable; + } + + if (ctx->dc_bios->funcs->get_lttpr_interop) { + enum bp_result bp_query_result; + uint8_t is_vbios_interop_enabled = 0; + + bp_query_result = ctx->dc_bios->funcs->get_lttpr_interop(ctx->dc_bios, + &is_vbios_interop_enabled); + dc->caps.vbios_lttpr_aware = (bp_query_result == BP_RESULT_OK) && !!is_vbios_interop_enabled; + } + } + + if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) + dc->debug = debug_defaults_drv; + + // Init the vm_helper + if (dc->vm_helper) + vm_helper_init(dc->vm_helper, 16); + + /************************************************* + * Create resources * + *************************************************/ + + /* Clock Sources for Pixel Clock*/ + pool->base.clock_sources[DCN30_CLK_SRC_PLL0] = + dcn30_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL0, + &clk_src_regs[0], false); + pool->base.clock_sources[DCN30_CLK_SRC_PLL1] = + dcn30_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL1, + &clk_src_regs[1], false); + pool->base.clock_sources[DCN30_CLK_SRC_PLL2] = + dcn30_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL2, + &clk_src_regs[2], false); + pool->base.clock_sources[DCN30_CLK_SRC_PLL3] = + dcn30_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL3, + &clk_src_regs[3], false); + pool->base.clock_sources[DCN30_CLK_SRC_PLL4] = + dcn30_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL4, + &clk_src_regs[4], false); + pool->base.clock_sources[DCN30_CLK_SRC_PLL5] = + dcn30_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL5, + &clk_src_regs[5], false); + + pool->base.clk_src_count = DCN30_CLK_SRC_TOTAL; + + /* todo: not reuse phy_pll registers */ + pool->base.dp_clock_source = + dcn30_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_ID_DP_DTO, + &clk_src_regs[0], true); + + for (i = 0; i < pool->base.clk_src_count; i++) { + if (pool->base.clock_sources[i] == NULL) { + dm_error("DC: failed to create clock sources!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + } + + /* DCCG */ + pool->base.dccg = dccg30_create(ctx, &dccg_regs, &dccg_shift, &dccg_mask); + if (pool->base.dccg == NULL) { + dm_error("DC: failed to create dccg!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + + /* PP Lib and SMU interfaces */ + init_soc_bounding_box(dc, pool); + + num_pipes = dcn3_0_ip.max_num_dpp; + + for (i = 0; i < dcn3_0_ip.max_num_dpp; i++) + if (pipe_fuses & 1 << i) + num_pipes--; + + dcn3_0_ip.max_num_dpp = num_pipes; + dcn3_0_ip.max_num_otg = num_pipes; + + dml_init_instance(&dc->dml, &dcn3_0_soc, &dcn3_0_ip, DML_PROJECT_DCN30); + + /* IRQ */ + init_data.ctx = dc->ctx; + pool->base.irqs = dal_irq_service_dcn30_create(&init_data); + if (!pool->base.irqs) + goto create_fail; + + /* HUBBUB */ + pool->base.hubbub = dcn30_hubbub_create(ctx); + if (pool->base.hubbub == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create hubbub!\n"); + goto create_fail; + } + + /* HUBPs, DPPs, OPPs and TGs */ + for (i = 0; i < pool->base.pipe_count; i++) { + pool->base.hubps[i] = dcn30_hubp_create(ctx, i); + if (pool->base.hubps[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create hubps!\n"); + goto create_fail; + } + + pool->base.dpps[i] = dcn30_dpp_create(ctx, i); + if (pool->base.dpps[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create dpps!\n"); + goto create_fail; + } + } + + for (i = 0; i < pool->base.res_cap->num_opp; i++) { + pool->base.opps[i] = dcn30_opp_create(ctx, i); + if (pool->base.opps[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create output pixel processor!\n"); + goto create_fail; + } + } + + for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { + pool->base.timing_generators[i] = dcn30_timing_generator_create( + ctx, i); + if (pool->base.timing_generators[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create tg!\n"); + goto create_fail; + } + } + pool->base.timing_generator_count = i; + /* PSR */ + pool->base.psr = dmub_psr_create(ctx); + + if (pool->base.psr == NULL) { + dm_error("DC: failed to create PSR obj!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + + /* ABM */ + for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { + pool->base.multiple_abms[i] = dmub_abm_create(ctx, + &abm_regs[i], + &abm_shift, + &abm_mask); + if (pool->base.multiple_abms[i] == NULL) { + dm_error("DC: failed to create abm for pipe %d!\n", i); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + } + /* MPC and DSC */ + pool->base.mpc = dcn30_mpc_create(ctx, pool->base.mpcc_count, pool->base.res_cap->num_mpc_3dlut); + if (pool->base.mpc == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create mpc!\n"); + goto create_fail; + } + + for (i = 0; i < pool->base.res_cap->num_dsc; i++) { + pool->base.dscs[i] = dcn30_dsc_create(ctx, i); + if (pool->base.dscs[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create display stream compressor %d!\n", i); + goto create_fail; + } + } + + /* DWB and MMHUBBUB */ + if (!dcn30_dwbc_create(ctx, &pool->base)) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create dwbc!\n"); + goto create_fail; + } + + if (!dcn30_mmhubbub_create(ctx, &pool->base)) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create mcif_wb!\n"); + goto create_fail; + } + + /* AUX and I2C */ + for (i = 0; i < pool->base.res_cap->num_ddc; i++) { + pool->base.engines[i] = dcn30_aux_engine_create(ctx, i); + if (pool->base.engines[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC:failed to create aux engine!!\n"); + goto create_fail; + } + pool->base.hw_i2cs[i] = dcn30_i2c_hw_create(ctx, i); + if (pool->base.hw_i2cs[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC:failed to create hw i2c!!\n"); + goto create_fail; + } + pool->base.sw_i2cs[i] = NULL; + } + + /* Audio, Stream Encoders including DIG and virtual, MPC 3D LUTs */ + if (!resource_construct(num_virtual_links, dc, &pool->base, + &res_create_funcs)) + goto create_fail; + + /* HW Sequencer and Plane caps */ + dcn30_hw_sequencer_construct(dc); + + dc->caps.max_planes = pool->base.pipe_count; + + for (i = 0; i < dc->caps.max_planes; ++i) + dc->caps.planes[i] = plane_cap; + + dc->cap_funcs = cap_funcs; + + if (dc->ctx->dc_bios->fw_info.oem_i2c_present) { + ddc_init_data.ctx = dc->ctx; + ddc_init_data.link = NULL; + ddc_init_data.id.id = dc->ctx->dc_bios->fw_info.oem_i2c_obj_id; + ddc_init_data.id.enum_id = 0; + ddc_init_data.id.type = OBJECT_TYPE_GENERIC; + pool->base.oem_device = dc->link_srv->create_ddc_service(&ddc_init_data); + } else { + pool->base.oem_device = NULL; + } + + DC_FP_END(); + + return true; + +create_fail: + + DC_FP_END(); + dcn30_resource_destruct(pool); + + return false; +} + +struct resource_pool *dcn30_create_resource_pool( + const struct dc_init_data *init_data, + struct dc *dc) +{ + struct dcn30_resource_pool *pool = + kzalloc(sizeof(struct dcn30_resource_pool), GFP_KERNEL); + + if (!pool) + return NULL; + + if (dcn30_resource_construct(init_data->num_virtual_links, dc, pool)) + return &pool->base; + + BREAK_TO_DEBUGGER(); + kfree(pool); + return NULL; +} diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.h new file mode 100644 index 000000000000..8e6b8b7368fd --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.h @@ -0,0 +1,108 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef _DCN30_RESOURCE_H_ +#define _DCN30_RESOURCE_H_ + +#include "core_types.h" + +#define TO_DCN30_RES_POOL(pool)\ + container_of(pool, struct dcn30_resource_pool, base) + +struct dc; +struct resource_pool; +struct _vcs_dpi_display_pipe_params_st; + +extern struct _vcs_dpi_ip_params_st dcn3_0_ip; +extern struct _vcs_dpi_soc_bounding_box_st dcn3_0_soc; + +struct dcn30_resource_pool { + struct resource_pool base; +}; +struct resource_pool *dcn30_create_resource_pool( + const struct dc_init_data *init_data, + struct dc *dc); + +void dcn30_set_mcif_arb_params( + struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt); + +unsigned int dcn30_calc_max_scaled_time( + unsigned int time_per_pixel, + enum mmhubbub_wbif_mode mode, + unsigned int urgent_watermark); + +bool dcn30_validate_bandwidth(struct dc *dc, struct dc_state *context, + bool fast_validate); +bool dcn30_internal_validate_bw( + struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int *pipe_cnt_out, + int *vlevel_out, + bool fast_validate, + bool allow_self_refresh_only); +void dcn30_calculate_wm_and_dlg( + struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel); +void dcn30_update_soc_for_wm_a(struct dc *dc, struct dc_state *context); +void dcn30_populate_dml_writeback_from_context( + struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes); + +int dcn30_populate_dml_pipes_from_context( + struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + bool fast_validate); + +bool dcn30_acquire_post_bldn_3dlut( + struct resource_context *res_ctx, + const struct resource_pool *pool, + int mpcc_id, + struct dc_3dlut **lut, + struct dc_transfer_func **shaper); + +bool dcn30_release_post_bldn_3dlut( + struct resource_context *res_ctx, + const struct resource_pool *pool, + struct dc_3dlut **lut, + struct dc_transfer_func **shaper); + +enum dc_status dcn30_add_stream_to_ctx( + struct dc *dc, + struct dc_state *new_ctx, + struct dc_stream_state *dc_stream); + +void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params); + +bool dcn30_can_support_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, struct dc_state *context); +void dcn30_setup_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, struct dc_state *context); +int dcn30_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, int pipe_cnt, int vlevel); + +#endif /* _DCN30_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c new file mode 100644 index 000000000000..511ff6b5b985 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c @@ -0,0 +1,1728 @@ +/* + * Copyright 2019-2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + + +#include "dm_services.h" +#include "dc.h" + +#include "dcn301/dcn301_init.h" + +#include "resource.h" +#include "include/irq_service_interface.h" +#include "dcn30/dcn30_resource.h" +#include "dcn301_resource.h" + +#include "dcn20/dcn20_resource.h" + +#include "dcn10/dcn10_ipp.h" +#include "dcn301/dcn301_hubbub.h" +#include "dcn30/dcn30_mpc.h" +#include "dcn30/dcn30_hubp.h" +#include "irq/dcn30/irq_service_dcn30.h" +#include "dcn30/dcn30_dpp.h" +#include "dcn301/dcn301_optc.h" +#include "dcn20/dcn20_hwseq.h" +#include "dcn30/dcn30_hwseq.h" +#include "dce110/dce110_hwseq.h" +#include "dcn30/dcn30_opp.h" +#include "dcn20/dcn20_dsc.h" +#include "dcn30/dcn30_vpg.h" +#include "dcn30/dcn30_afmt.h" +#include "dce/dce_clock_source.h" +#include "dce/dce_audio.h" +#include "dce/dce_hwseq.h" +#include "clk_mgr.h" +#include "virtual/virtual_stream_encoder.h" +#include "dce110/dce110_resource.h" +#include "dml/display_mode_vba.h" +#include "dcn301/dcn301_dccg.h" +#include "dcn10/dcn10_resource.h" +#include "dcn30/dcn30_dio_stream_encoder.h" +#include "dcn301/dcn301_dio_link_encoder.h" +#include "dcn301/dcn301_panel_cntl.h" + +#include "vangogh_ip_offset.h" + +#include "dcn30/dcn30_dwb.h" +#include "dcn30/dcn30_mmhubbub.h" + +#include "dcn/dcn_3_0_1_offset.h" +#include "dcn/dcn_3_0_1_sh_mask.h" + +#include "nbio/nbio_7_2_0_offset.h" + +#include "dpcs/dpcs_3_0_0_offset.h" +#include "dpcs/dpcs_3_0_0_sh_mask.h" + +#include "reg_helper.h" +#include "dce/dmub_abm.h" +#include "dce/dce_aux.h" +#include "dce/dce_i2c.h" + +#include "dml/dcn30/dcn30_fpu.h" + +#include "dml/dcn30/display_mode_vba_30.h" +#include "dml/dcn301/dcn301_fpu.h" +#include "vm_helper.h" +#include "dcn20/dcn20_vmid.h" +#include "amdgpu_socbb.h" + +#define TO_DCN301_RES_POOL(pool)\ + container_of(pool, struct dcn301_resource_pool, base) + +#define DC_LOGGER \ + dc->ctx->logger +#define DC_LOGGER_INIT(logger) + +enum dcn301_clk_src_array_id { + DCN301_CLK_SRC_PLL0, + DCN301_CLK_SRC_PLL1, + DCN301_CLK_SRC_PLL2, + DCN301_CLK_SRC_PLL3, + DCN301_CLK_SRC_TOTAL +}; + +/* begin ********************* + * macros to expend register list macro defined in HW object header file + */ + +/* DCN */ +#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg + +#define BASE(seg) BASE_INNER(seg) + +#define SR(reg_name)\ + .reg_name = BASE(mm ## reg_name ## _BASE_IDX) + \ + mm ## reg_name + +#define SRI(reg_name, block, id)\ + .reg_name = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + mm ## block ## id ## _ ## reg_name + +#define SRI2(reg_name, block, id)\ + .reg_name = BASE(mm ## reg_name ## _BASE_IDX) + \ + mm ## reg_name + +#define SRIR(var_name, reg_name, block, id)\ + .var_name = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + mm ## block ## id ## _ ## reg_name + +#define SRII(reg_name, block, id)\ + .reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + mm ## block ## id ## _ ## reg_name + +#define SRII2(reg_name_pre, reg_name_post, id)\ + .reg_name_pre ## _ ## reg_name_post[id] = BASE(mm ## reg_name_pre \ + ## id ## _ ## reg_name_post ## _BASE_IDX) + \ + mm ## reg_name_pre ## id ## _ ## reg_name_post + +#define SRII_MPC_RMU(reg_name, block, id)\ + .RMU##_##reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + mm ## block ## id ## _ ## reg_name + +#define SRII_DWB(reg_name, temp_name, block, id)\ + .reg_name[id] = BASE(mm ## block ## id ## _ ## temp_name ## _BASE_IDX) + \ + mm ## block ## id ## _ ## temp_name + +#define SF_DWB2(reg_name, block, id, field_name, post_fix) \ + .field_name = reg_name ## __ ## field_name ## post_fix + +#define DCCG_SRII(reg_name, block, id)\ + .block ## _ ## reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + mm ## block ## id ## _ ## reg_name + +#define VUPDATE_SRII(reg_name, block, id)\ + .reg_name[id] = BASE(mm ## reg_name ## _ ## block ## id ## _BASE_IDX) + \ + mm ## reg_name ## _ ## block ## id + +/* NBIO */ +#define NBIO_BASE_INNER(seg) \ + NBIO_BASE__INST0_SEG ## seg + +#define NBIO_BASE(seg) \ + NBIO_BASE_INNER(seg) + +#define NBIO_SR(reg_name)\ + .reg_name = NBIO_BASE(regBIF_BX0_ ## reg_name ## _BASE_IDX) + \ + regBIF_BX0_ ## reg_name + +/* MMHUB */ +#define MMHUB_BASE_INNER(seg) \ + MMHUB_BASE__INST0_SEG ## seg + +#define MMHUB_BASE(seg) \ + MMHUB_BASE_INNER(seg) + +#define MMHUB_SR(reg_name)\ + .reg_name = MMHUB_BASE(regMM ## reg_name ## _BASE_IDX) + \ + regMM ## reg_name + +/* CLOCK */ +#define CLK_BASE_INNER(seg) \ + CLK_BASE__INST0_SEG ## seg + +#define CLK_BASE(seg) \ + CLK_BASE_INNER(seg) + +#define CLK_SRI(reg_name, block, inst)\ + .reg_name = CLK_BASE(mm ## block ## _ ## inst ## _ ## reg_name ## _BASE_IDX) + \ + mm ## block ## _ ## inst ## _ ## reg_name + +static const struct bios_registers bios_regs = { + NBIO_SR(BIOS_SCRATCH_3), + NBIO_SR(BIOS_SCRATCH_6) +}; + +#define clk_src_regs(index, pllid)\ +[index] = {\ + CS_COMMON_REG_LIST_DCN3_01(index, pllid),\ +} + +static const struct dce110_clk_src_regs clk_src_regs[] = { + clk_src_regs(0, A), + clk_src_regs(1, B), + clk_src_regs(2, C), + clk_src_regs(3, D) +}; + +static const struct dce110_clk_src_shift cs_shift = { + CS_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT) +}; + +static const struct dce110_clk_src_mask cs_mask = { + CS_COMMON_MASK_SH_LIST_DCN2_0(_MASK) +}; + +#define abm_regs(id)\ +[id] = {\ + ABM_DCN301_REG_LIST(id)\ +} + +static const struct dce_abm_registers abm_regs[] = { + abm_regs(0), + abm_regs(1), + abm_regs(2), + abm_regs(3), +}; + +static const struct dce_abm_shift abm_shift = { + ABM_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dce_abm_mask abm_mask = { + ABM_MASK_SH_LIST_DCN30(_MASK) +}; + +#define audio_regs(id)\ +[id] = {\ + AUD_COMMON_REG_LIST(id)\ +} + +static const struct dce_audio_registers audio_regs[] = { + audio_regs(0), + audio_regs(1), + audio_regs(2), + audio_regs(3), + audio_regs(4), + audio_regs(5), + audio_regs(6) +}; + +#define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\ + SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_INDEX, AZALIA_ENDPOINT_REG_INDEX, mask_sh),\ + SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_DATA, AZALIA_ENDPOINT_REG_DATA, mask_sh),\ + AUD_COMMON_MASK_SH_LIST_BASE(mask_sh) + +static const struct dce_audio_shift audio_shift = { + DCE120_AUD_COMMON_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_audio_mask audio_mask = { + DCE120_AUD_COMMON_MASK_SH_LIST(_MASK) +}; + +#define vpg_regs(id)\ +[id] = {\ + VPG_DCN3_REG_LIST(id)\ +} + +static const struct dcn30_vpg_registers vpg_regs[] = { + vpg_regs(0), + vpg_regs(1), + vpg_regs(2), + vpg_regs(3), +}; + +static const struct dcn30_vpg_shift vpg_shift = { + DCN3_VPG_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn30_vpg_mask vpg_mask = { + DCN3_VPG_MASK_SH_LIST(_MASK) +}; + +#define afmt_regs(id)\ +[id] = {\ + AFMT_DCN3_REG_LIST(id)\ +} + +static const struct dcn30_afmt_registers afmt_regs[] = { + afmt_regs(0), + afmt_regs(1), + afmt_regs(2), + afmt_regs(3), +}; + +static const struct dcn30_afmt_shift afmt_shift = { + DCN3_AFMT_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn30_afmt_mask afmt_mask = { + DCN3_AFMT_MASK_SH_LIST(_MASK) +}; + +#define stream_enc_regs(id)\ +[id] = {\ + SE_DCN3_REG_LIST(id)\ +} + +static const struct dcn10_stream_enc_registers stream_enc_regs[] = { + stream_enc_regs(0), + stream_enc_regs(1), + stream_enc_regs(2), + stream_enc_regs(3), +}; + +static const struct dcn10_stream_encoder_shift se_shift = { + SE_COMMON_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dcn10_stream_encoder_mask se_mask = { + SE_COMMON_MASK_SH_LIST_DCN30(_MASK) +}; + + +#define aux_regs(id)\ +[id] = {\ + DCN2_AUX_REG_LIST(id)\ +} + +static const struct dcn10_link_enc_aux_registers link_enc_aux_regs[] = { + aux_regs(0), + aux_regs(1), + aux_regs(2), + aux_regs(3), +}; + +#define hpd_regs(id)\ +[id] = {\ + HPD_REG_LIST(id)\ +} + +static const struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[] = { + hpd_regs(0), + hpd_regs(1), + hpd_regs(2), + hpd_regs(3), +}; + + +#define link_regs(id, phyid)\ +[id] = {\ + LE_DCN301_REG_LIST(id), \ + UNIPHY_DCN2_REG_LIST(phyid), \ + DPCS_DCN2_REG_LIST(id), \ + SRI(DP_DPHY_INTERNAL_CTRL, DP, id) \ +} + +static const struct dce110_aux_registers_shift aux_shift = { + DCN_AUX_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce110_aux_registers_mask aux_mask = { + DCN_AUX_MASK_SH_LIST(_MASK) +}; + +static const struct dcn10_link_enc_registers link_enc_regs[] = { + link_regs(0, A), + link_regs(1, B), + link_regs(2, C), + link_regs(3, D), +}; + +static const struct dcn10_link_enc_shift le_shift = { + LINK_ENCODER_MASK_SH_LIST_DCN301(__SHIFT),\ + DPCS_DCN2_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn10_link_enc_mask le_mask = { + LINK_ENCODER_MASK_SH_LIST_DCN301(_MASK),\ + DPCS_DCN2_MASK_SH_LIST(_MASK) +}; + +#define panel_cntl_regs(id)\ +[id] = {\ + DCN301_PANEL_CNTL_REG_LIST(id),\ +} + +static const struct dce_panel_cntl_registers panel_cntl_regs[] = { + panel_cntl_regs(0), + panel_cntl_regs(1), +}; + +static const struct dcn301_panel_cntl_shift panel_cntl_shift = { + DCN301_PANEL_CNTL_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn301_panel_cntl_mask panel_cntl_mask = { + DCN301_PANEL_CNTL_MASK_SH_LIST(_MASK) +}; + +#define dpp_regs(id)\ +[id] = {\ + DPP_REG_LIST_DCN30(id),\ +} + +static const struct dcn3_dpp_registers dpp_regs[] = { + dpp_regs(0), + dpp_regs(1), + dpp_regs(2), + dpp_regs(3), +}; + +static const struct dcn3_dpp_shift tf_shift = { + DPP_REG_LIST_SH_MASK_DCN30(__SHIFT) +}; + +static const struct dcn3_dpp_mask tf_mask = { + DPP_REG_LIST_SH_MASK_DCN30(_MASK) +}; + +#define opp_regs(id)\ +[id] = {\ + OPP_REG_LIST_DCN30(id),\ +} + +static const struct dcn20_opp_registers opp_regs[] = { + opp_regs(0), + opp_regs(1), + opp_regs(2), + opp_regs(3), +}; + +static const struct dcn20_opp_shift opp_shift = { + OPP_MASK_SH_LIST_DCN20(__SHIFT) +}; + +static const struct dcn20_opp_mask opp_mask = { + OPP_MASK_SH_LIST_DCN20(_MASK) +}; + +#define aux_engine_regs(id)\ +[id] = {\ + AUX_COMMON_REG_LIST0(id), \ + .AUXN_IMPCAL = 0, \ + .AUXP_IMPCAL = 0, \ + .AUX_RESET_MASK = DP_AUX0_AUX_CONTROL__AUX_RESET_MASK, \ +} + +static const struct dce110_aux_registers aux_engine_regs[] = { + aux_engine_regs(0), + aux_engine_regs(1), + aux_engine_regs(2), + aux_engine_regs(3), +}; + +#define dwbc_regs_dcn3(id)\ +[id] = {\ + DWBC_COMMON_REG_LIST_DCN30(id),\ +} + +static const struct dcn30_dwbc_registers dwbc30_regs[] = { + dwbc_regs_dcn3(0), +}; + +static const struct dcn30_dwbc_shift dwbc30_shift = { + DWBC_COMMON_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dcn30_dwbc_mask dwbc30_mask = { + DWBC_COMMON_MASK_SH_LIST_DCN30(_MASK) +}; + +#define mcif_wb_regs_dcn3(id)\ +[id] = {\ + MCIF_WB_COMMON_REG_LIST_DCN30(id),\ +} + +static const struct dcn30_mmhubbub_registers mcif_wb30_regs[] = { + mcif_wb_regs_dcn3(0) +}; + +static const struct dcn30_mmhubbub_shift mcif_wb30_shift = { + MCIF_WB_COMMON_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dcn30_mmhubbub_mask mcif_wb30_mask = { + MCIF_WB_COMMON_MASK_SH_LIST_DCN30(_MASK) +}; + +#define dsc_regsDCN20(id)\ +[id] = {\ + DSC_REG_LIST_DCN20(id)\ +} + +static const struct dcn20_dsc_registers dsc_regs[] = { + dsc_regsDCN20(0), + dsc_regsDCN20(1), + dsc_regsDCN20(2), +}; + +static const struct dcn20_dsc_shift dsc_shift = { + DSC_REG_LIST_SH_MASK_DCN20(__SHIFT) +}; + +static const struct dcn20_dsc_mask dsc_mask = { + DSC_REG_LIST_SH_MASK_DCN20(_MASK) +}; + +static const struct dcn30_mpc_registers mpc_regs = { + MPC_REG_LIST_DCN3_0(0), + MPC_REG_LIST_DCN3_0(1), + MPC_REG_LIST_DCN3_0(2), + MPC_REG_LIST_DCN3_0(3), + MPC_OUT_MUX_REG_LIST_DCN3_0(0), + MPC_OUT_MUX_REG_LIST_DCN3_0(1), + MPC_OUT_MUX_REG_LIST_DCN3_0(2), + MPC_OUT_MUX_REG_LIST_DCN3_0(3), + MPC_RMU_GLOBAL_REG_LIST_DCN3AG, + MPC_RMU_REG_LIST_DCN3AG(0), + MPC_RMU_REG_LIST_DCN3AG(1), + MPC_DWB_MUX_REG_LIST_DCN3_0(0), +}; + +static const struct dcn30_mpc_shift mpc_shift = { + MPC_COMMON_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dcn30_mpc_mask mpc_mask = { + MPC_COMMON_MASK_SH_LIST_DCN30(_MASK) +}; + +#define optc_regs(id)\ +[id] = {OPTC_COMMON_REG_LIST_DCN3_0(id)} + + +static const struct dcn_optc_registers optc_regs[] = { + optc_regs(0), + optc_regs(1), + optc_regs(2), + optc_regs(3), +}; + +static const struct dcn_optc_shift optc_shift = { + OPTC_COMMON_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dcn_optc_mask optc_mask = { + OPTC_COMMON_MASK_SH_LIST_DCN30(_MASK) +}; + +#define hubp_regs(id)\ +[id] = {\ + HUBP_REG_LIST_DCN30(id)\ +} + +static const struct dcn_hubp2_registers hubp_regs[] = { + hubp_regs(0), + hubp_regs(1), + hubp_regs(2), + hubp_regs(3), +}; + +static const struct dcn_hubp2_shift hubp_shift = { + HUBP_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dcn_hubp2_mask hubp_mask = { + HUBP_MASK_SH_LIST_DCN30(_MASK) +}; + +static const struct dcn_hubbub_registers hubbub_reg = { + HUBBUB_REG_LIST_DCN301(0) +}; + +static const struct dcn_hubbub_shift hubbub_shift = { + HUBBUB_MASK_SH_LIST_DCN301(__SHIFT) +}; + +static const struct dcn_hubbub_mask hubbub_mask = { + HUBBUB_MASK_SH_LIST_DCN301(_MASK) +}; + +static const struct dccg_registers dccg_regs = { + DCCG_REG_LIST_DCN301() +}; + +static const struct dccg_shift dccg_shift = { + DCCG_MASK_SH_LIST_DCN301(__SHIFT) +}; + +static const struct dccg_mask dccg_mask = { + DCCG_MASK_SH_LIST_DCN301(_MASK) +}; + +static const struct dce_hwseq_registers hwseq_reg = { + HWSEQ_DCN301_REG_LIST() +}; + +static const struct dce_hwseq_shift hwseq_shift = { + HWSEQ_DCN301_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_hwseq_mask hwseq_mask = { + HWSEQ_DCN301_MASK_SH_LIST(_MASK) +}; +#define vmid_regs(id)\ +[id] = {\ + DCN20_VMID_REG_LIST(id)\ +} + +static const struct dcn_vmid_registers vmid_regs[] = { + vmid_regs(0), + vmid_regs(1), + vmid_regs(2), + vmid_regs(3), + vmid_regs(4), + vmid_regs(5), + vmid_regs(6), + vmid_regs(7), + vmid_regs(8), + vmid_regs(9), + vmid_regs(10), + vmid_regs(11), + vmid_regs(12), + vmid_regs(13), + vmid_regs(14), + vmid_regs(15) +}; + +static const struct dcn20_vmid_shift vmid_shifts = { + DCN20_VMID_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn20_vmid_mask vmid_masks = { + DCN20_VMID_MASK_SH_LIST(_MASK) +}; + +static struct resource_caps res_cap_dcn301 = { + .num_timing_generator = 4, + .num_opp = 4, + .num_video_plane = 4, + .num_audio = 4, + .num_stream_encoder = 4, + .num_pll = 4, + .num_dwb = 1, + .num_ddc = 4, + .num_vmid = 16, + .num_mpc_3dlut = 2, + .num_dsc = 3, +}; + +static const struct dc_plane_cap plane_cap = { + .type = DC_PLANE_TYPE_DCN_UNIVERSAL, + .per_pixel_alpha = true, + + .pixel_format_support = { + .argb8888 = true, + .nv12 = true, + .fp16 = true, + .p010 = true, + .ayuv = false, + }, + + .max_upscale_factor = { + .argb8888 = 16000, + .nv12 = 16000, + .fp16 = 16000 + }, + + /* 6:1 downscaling ratio: 1000/6 = 166.666 */ + .max_downscale_factor = { + .argb8888 = 167, + .nv12 = 167, + .fp16 = 167 + }, + 64, + 64 +}; + +static const struct dc_debug_options debug_defaults_drv = { + .disable_dmcu = true, + .force_abm_enable = false, + .timing_trace = false, + .clock_trace = true, + .disable_dpp_power_gate = false, + .disable_hubp_power_gate = false, + .disable_clock_gate = true, + .disable_pplib_clock_request = true, + .disable_pplib_wm_range = true, + .pipe_split_policy = MPC_SPLIT_DYNAMIC, + .force_single_disp_pipe_split = false, + .disable_dcc = DCC_ENABLE, + .vsr_support = true, + .performance_trace = false, + .max_downscale_src_width = 7680,/*upto 8K*/ + .scl_reset_length10 = true, + .sanity_checks = false, + .underflow_assert_delay_us = 0xFFFFFFFF, + .dwb_fi_phase = -1, // -1 = disable + .dmub_command_table = true, + .use_max_lb = false, + .exit_idle_opt_for_cursor_updates = true, + .using_dml2 = false, +}; + +static void dcn301_dpp_destroy(struct dpp **dpp) +{ + kfree(TO_DCN20_DPP(*dpp)); + *dpp = NULL; +} + +static struct dpp *dcn301_dpp_create(struct dc_context *ctx, uint32_t inst) +{ + struct dcn3_dpp *dpp = + kzalloc(sizeof(struct dcn3_dpp), GFP_KERNEL); + + if (!dpp) + return NULL; + + if (dpp3_construct(dpp, ctx, inst, + &dpp_regs[inst], &tf_shift, &tf_mask)) + return &dpp->base; + + BREAK_TO_DEBUGGER(); + kfree(dpp); + return NULL; +} +static struct output_pixel_processor *dcn301_opp_create(struct dc_context *ctx, + uint32_t inst) +{ + struct dcn20_opp *opp = + kzalloc(sizeof(struct dcn20_opp), GFP_KERNEL); + + if (!opp) { + BREAK_TO_DEBUGGER(); + return NULL; + } + + dcn20_opp_construct(opp, ctx, inst, + &opp_regs[inst], &opp_shift, &opp_mask); + return &opp->base; +} + +static struct dce_aux *dcn301_aux_engine_create(struct dc_context *ctx, uint32_t inst) +{ + struct aux_engine_dce110 *aux_engine = + kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL); + + if (!aux_engine) + return NULL; + + dce110_aux_engine_construct(aux_engine, ctx, inst, + SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, + &aux_engine_regs[inst], + &aux_mask, + &aux_shift, + ctx->dc->caps.extended_aux_timeout_support); + + return &aux_engine->base; +} +#define i2c_inst_regs(id) { I2C_HW_ENGINE_COMMON_REG_LIST(id) } + +static const struct dce_i2c_registers i2c_hw_regs[] = { + i2c_inst_regs(1), + i2c_inst_regs(2), + i2c_inst_regs(3), + i2c_inst_regs(4), +}; + +static const struct dce_i2c_shift i2c_shifts = { + I2C_COMMON_MASK_SH_LIST_DCN2(__SHIFT) +}; + +static const struct dce_i2c_mask i2c_masks = { + I2C_COMMON_MASK_SH_LIST_DCN2(_MASK) +}; + +static struct dce_i2c_hw *dcn301_i2c_hw_create(struct dc_context *ctx, uint32_t inst) +{ + struct dce_i2c_hw *dce_i2c_hw = + kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL); + + if (!dce_i2c_hw) + return NULL; + + dcn2_i2c_hw_construct(dce_i2c_hw, ctx, inst, + &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks); + + return dce_i2c_hw; +} +static struct mpc *dcn301_mpc_create( + struct dc_context *ctx, + int num_mpcc, + int num_rmu) +{ + struct dcn30_mpc *mpc30 = kzalloc(sizeof(struct dcn30_mpc), + GFP_KERNEL); + + if (!mpc30) + return NULL; + + dcn30_mpc_construct(mpc30, ctx, + &mpc_regs, + &mpc_shift, + &mpc_mask, + num_mpcc, + num_rmu); + + return &mpc30->base; +} + +static struct hubbub *dcn301_hubbub_create(struct dc_context *ctx) +{ + int i; + + struct dcn20_hubbub *hubbub3 = kzalloc(sizeof(struct dcn20_hubbub), + GFP_KERNEL); + + if (!hubbub3) + return NULL; + + hubbub301_construct(hubbub3, ctx, + &hubbub_reg, + &hubbub_shift, + &hubbub_mask); + + + for (i = 0; i < res_cap_dcn301.num_vmid; i++) { + struct dcn20_vmid *vmid = &hubbub3->vmid[i]; + + vmid->ctx = ctx; + + vmid->regs = &vmid_regs[i]; + vmid->shifts = &vmid_shifts; + vmid->masks = &vmid_masks; + } + + hubbub3->num_vmid = res_cap_dcn301.num_vmid; + + return &hubbub3->base; +} + +static struct timing_generator *dcn301_timing_generator_create( + struct dc_context *ctx, uint32_t instance) +{ + struct optc *tgn10 = + kzalloc(sizeof(struct optc), GFP_KERNEL); + + if (!tgn10) + return NULL; + + tgn10->base.inst = instance; + tgn10->base.ctx = ctx; + + tgn10->tg_regs = &optc_regs[instance]; + tgn10->tg_shift = &optc_shift; + tgn10->tg_mask = &optc_mask; + + dcn301_timing_generator_init(tgn10); + + return &tgn10->base; +} + +static const struct encoder_feature_support link_enc_feature = { + .max_hdmi_deep_color = COLOR_DEPTH_121212, + .max_hdmi_pixel_clock = 600000, + .hdmi_ycbcr420_supported = true, + .dp_ycbcr420_supported = true, + .fec_supported = true, + .flags.bits.IS_HBR2_CAPABLE = true, + .flags.bits.IS_HBR3_CAPABLE = true, + .flags.bits.IS_TPS3_CAPABLE = true, + .flags.bits.IS_TPS4_CAPABLE = true +}; + +static struct link_encoder *dcn301_link_encoder_create( + struct dc_context *ctx, + const struct encoder_init_data *enc_init_data) +{ + struct dcn20_link_encoder *enc20 = + kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL); + + if (!enc20) + return NULL; + + dcn301_link_encoder_construct(enc20, + enc_init_data, + &link_enc_feature, + &link_enc_regs[enc_init_data->transmitter], + &link_enc_aux_regs[enc_init_data->channel - 1], + &link_enc_hpd_regs[enc_init_data->hpd_source], + &le_shift, + &le_mask); + + return &enc20->enc10.base; +} + +static struct panel_cntl *dcn301_panel_cntl_create(const struct panel_cntl_init_data *init_data) +{ + struct dcn301_panel_cntl *panel_cntl = + kzalloc(sizeof(struct dcn301_panel_cntl), GFP_KERNEL); + + if (!panel_cntl) + return NULL; + + dcn301_panel_cntl_construct(panel_cntl, + init_data, + &panel_cntl_regs[init_data->inst], + &panel_cntl_shift, + &panel_cntl_mask); + + return &panel_cntl->base; +} + + +#define CTX ctx + +#define REG(reg_name) \ + (DCN_BASE.instance[0].segment[mm ## reg_name ## _BASE_IDX] + mm ## reg_name) + +static uint32_t read_pipe_fuses(struct dc_context *ctx) +{ + uint32_t value = REG_READ(CC_DC_PIPE_DIS); + /* RV1 support max 4 pipes */ + value = value & 0xf; + return value; +} + + +static void read_dce_straps( + struct dc_context *ctx, + struct resource_straps *straps) +{ + generic_reg_get(ctx, mmDC_PINSTRAPS + BASE(mmDC_PINSTRAPS_BASE_IDX), + FN(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO), &straps->dc_pinstraps_audio); + +} + +static struct audio *dcn301_create_audio( + struct dc_context *ctx, unsigned int inst) +{ + return dce_audio_create(ctx, inst, + &audio_regs[inst], &audio_shift, &audio_mask); +} + +static struct vpg *dcn301_vpg_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn30_vpg *vpg3 = kzalloc(sizeof(struct dcn30_vpg), GFP_KERNEL); + + if (!vpg3) + return NULL; + + vpg3_construct(vpg3, ctx, inst, + &vpg_regs[inst], + &vpg_shift, + &vpg_mask); + + return &vpg3->base; +} + +static struct afmt *dcn301_afmt_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn30_afmt *afmt3 = kzalloc(sizeof(struct dcn30_afmt), GFP_KERNEL); + + if (!afmt3) + return NULL; + + afmt3_construct(afmt3, ctx, inst, + &afmt_regs[inst], + &afmt_shift, + &afmt_mask); + + return &afmt3->base; +} + +static struct stream_encoder *dcn301_stream_encoder_create(enum engine_id eng_id, + struct dc_context *ctx) +{ + struct dcn10_stream_encoder *enc1; + struct vpg *vpg; + struct afmt *afmt; + int vpg_inst; + int afmt_inst; + + /* Mapping of VPG, AFMT, DME register blocks to DIO block instance */ + if (eng_id <= ENGINE_ID_DIGF) { + vpg_inst = eng_id; + afmt_inst = eng_id; + } else + return NULL; + + enc1 = kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL); + vpg = dcn301_vpg_create(ctx, vpg_inst); + afmt = dcn301_afmt_create(ctx, afmt_inst); + + if (!enc1 || !vpg || !afmt) { + kfree(enc1); + kfree(vpg); + kfree(afmt); + return NULL; + } + + dcn30_dio_stream_encoder_construct(enc1, ctx, ctx->dc_bios, + eng_id, vpg, afmt, + &stream_enc_regs[eng_id], + &se_shift, &se_mask); + + return &enc1->base; +} + +static struct dce_hwseq *dcn301_hwseq_create(struct dc_context *ctx) +{ + struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL); + + if (hws) { + hws->ctx = ctx; + hws->regs = &hwseq_reg; + hws->shifts = &hwseq_shift; + hws->masks = &hwseq_mask; + } + return hws; +} +static const struct resource_create_funcs res_create_funcs = { + .read_dce_straps = read_dce_straps, + .create_audio = dcn301_create_audio, + .create_stream_encoder = dcn301_stream_encoder_create, + .create_hwseq = dcn301_hwseq_create, +}; + +static void dcn301_destruct(struct dcn301_resource_pool *pool) +{ + unsigned int i; + + for (i = 0; i < pool->base.stream_enc_count; i++) { + if (pool->base.stream_enc[i] != NULL) { + if (pool->base.stream_enc[i]->vpg != NULL) { + kfree(DCN30_VPG_FROM_VPG(pool->base.stream_enc[i]->vpg)); + pool->base.stream_enc[i]->vpg = NULL; + } + if (pool->base.stream_enc[i]->afmt != NULL) { + kfree(DCN30_AFMT_FROM_AFMT(pool->base.stream_enc[i]->afmt)); + pool->base.stream_enc[i]->afmt = NULL; + } + kfree(DCN10STRENC_FROM_STRENC(pool->base.stream_enc[i])); + pool->base.stream_enc[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_dsc; i++) { + if (pool->base.dscs[i] != NULL) + dcn20_dsc_destroy(&pool->base.dscs[i]); + } + + if (pool->base.mpc != NULL) { + kfree(TO_DCN20_MPC(pool->base.mpc)); + pool->base.mpc = NULL; + } + if (pool->base.hubbub != NULL) { + kfree(pool->base.hubbub); + pool->base.hubbub = NULL; + } + for (i = 0; i < pool->base.pipe_count; i++) { + if (pool->base.dpps[i] != NULL) + dcn301_dpp_destroy(&pool->base.dpps[i]); + + if (pool->base.ipps[i] != NULL) + pool->base.ipps[i]->funcs->ipp_destroy(&pool->base.ipps[i]); + + if (pool->base.hubps[i] != NULL) { + kfree(TO_DCN20_HUBP(pool->base.hubps[i])); + pool->base.hubps[i] = NULL; + } + + if (pool->base.irqs != NULL) { + dal_irq_service_destroy(&pool->base.irqs); + } + } + + for (i = 0; i < pool->base.res_cap->num_ddc; i++) { + if (pool->base.engines[i] != NULL) + dce110_engine_destroy(&pool->base.engines[i]); + if (pool->base.hw_i2cs[i] != NULL) { + kfree(pool->base.hw_i2cs[i]); + pool->base.hw_i2cs[i] = NULL; + } + if (pool->base.sw_i2cs[i] != NULL) { + kfree(pool->base.sw_i2cs[i]); + pool->base.sw_i2cs[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_opp; i++) { + if (pool->base.opps[i] != NULL) + pool->base.opps[i]->funcs->opp_destroy(&pool->base.opps[i]); + } + + for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { + if (pool->base.timing_generators[i] != NULL) { + kfree(DCN10TG_FROM_TG(pool->base.timing_generators[i])); + pool->base.timing_generators[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_dwb; i++) { + if (pool->base.dwbc[i] != NULL) { + kfree(TO_DCN30_DWBC(pool->base.dwbc[i])); + pool->base.dwbc[i] = NULL; + } + if (pool->base.mcif_wb[i] != NULL) { + kfree(TO_DCN30_MMHUBBUB(pool->base.mcif_wb[i])); + pool->base.mcif_wb[i] = NULL; + } + } + + for (i = 0; i < pool->base.audio_count; i++) { + if (pool->base.audios[i]) + dce_aud_destroy(&pool->base.audios[i]); + } + + for (i = 0; i < pool->base.clk_src_count; i++) { + if (pool->base.clock_sources[i] != NULL) { + dcn20_clock_source_destroy(&pool->base.clock_sources[i]); + pool->base.clock_sources[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_mpc_3dlut; i++) { + if (pool->base.mpc_lut[i] != NULL) { + dc_3dlut_func_release(pool->base.mpc_lut[i]); + pool->base.mpc_lut[i] = NULL; + } + if (pool->base.mpc_shaper[i] != NULL) { + dc_transfer_func_release(pool->base.mpc_shaper[i]); + pool->base.mpc_shaper[i] = NULL; + } + } + + if (pool->base.dp_clock_source != NULL) { + dcn20_clock_source_destroy(&pool->base.dp_clock_source); + pool->base.dp_clock_source = NULL; + } + + for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { + if (pool->base.multiple_abms[i] != NULL) + dce_abm_destroy(&pool->base.multiple_abms[i]); + } + + if (pool->base.dccg != NULL) + dcn_dccg_destroy(&pool->base.dccg); +} + +static struct hubp *dcn301_hubp_create(struct dc_context *ctx, uint32_t inst) +{ + struct dcn20_hubp *hubp2 = + kzalloc(sizeof(struct dcn20_hubp), GFP_KERNEL); + + if (!hubp2) + return NULL; + + if (hubp3_construct(hubp2, ctx, inst, + &hubp_regs[inst], &hubp_shift, &hubp_mask)) + return &hubp2->base; + + BREAK_TO_DEBUGGER(); + kfree(hubp2); + return NULL; +} + +static bool dcn301_dwbc_create(struct dc_context *ctx, struct resource_pool *pool) +{ + int i; + uint32_t pipe_count = pool->res_cap->num_dwb; + + for (i = 0; i < pipe_count; i++) { + struct dcn30_dwbc *dwbc30 = kzalloc(sizeof(struct dcn30_dwbc), + GFP_KERNEL); + + if (!dwbc30) { + dm_error("DC: failed to create dwbc30!\n"); + return false; + } + + dcn30_dwbc_construct(dwbc30, ctx, + &dwbc30_regs[i], + &dwbc30_shift, + &dwbc30_mask, + i); + + pool->dwbc[i] = &dwbc30->base; + } + return true; +} + +static bool dcn301_mmhubbub_create(struct dc_context *ctx, struct resource_pool *pool) +{ + int i; + uint32_t pipe_count = pool->res_cap->num_dwb; + + for (i = 0; i < pipe_count; i++) { + struct dcn30_mmhubbub *mcif_wb30 = kzalloc(sizeof(struct dcn30_mmhubbub), + GFP_KERNEL); + + if (!mcif_wb30) { + dm_error("DC: failed to create mcif_wb30!\n"); + return false; + } + + dcn30_mmhubbub_construct(mcif_wb30, ctx, + &mcif_wb30_regs[i], + &mcif_wb30_shift, + &mcif_wb30_mask, + i); + + pool->mcif_wb[i] = &mcif_wb30->base; + } + return true; +} + +static struct display_stream_compressor *dcn301_dsc_create( + struct dc_context *ctx, uint32_t inst) +{ + struct dcn20_dsc *dsc = + kzalloc(sizeof(struct dcn20_dsc), GFP_KERNEL); + + if (!dsc) { + BREAK_TO_DEBUGGER(); + return NULL; + } + + dsc2_construct(dsc, ctx, inst, &dsc_regs[inst], &dsc_shift, &dsc_mask); + return &dsc->base; +} + + +static void dcn301_destroy_resource_pool(struct resource_pool **pool) +{ + struct dcn301_resource_pool *dcn301_pool = TO_DCN301_RES_POOL(*pool); + + dcn301_destruct(dcn301_pool); + kfree(dcn301_pool); + *pool = NULL; +} + +static struct clock_source *dcn301_clock_source_create( + struct dc_context *ctx, + struct dc_bios *bios, + enum clock_source_id id, + const struct dce110_clk_src_regs *regs, + bool dp_clk_src) +{ + struct dce110_clk_src *clk_src = + kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL); + + if (!clk_src) + return NULL; + + if (dcn301_clk_src_construct(clk_src, ctx, bios, id, + regs, &cs_shift, &cs_mask)) { + clk_src->base.dp_clk_src = dp_clk_src; + return &clk_src->base; + } + + kfree(clk_src); + BREAK_TO_DEBUGGER(); + return NULL; +} + +static struct dc_cap_funcs cap_funcs = { + .get_dcc_compression_cap = dcn20_get_dcc_compression_cap +}; + + +static bool is_soc_bounding_box_valid(struct dc *dc) +{ + uint32_t hw_internal_rev = dc->ctx->asic_id.hw_internal_rev; + + if (ASICREV_IS_VANGOGH(hw_internal_rev)) + return true; + + return false; +} + +static bool init_soc_bounding_box(struct dc *dc, + struct dcn301_resource_pool *pool) +{ + struct _vcs_dpi_soc_bounding_box_st *loaded_bb = &dcn3_01_soc; + struct _vcs_dpi_ip_params_st *loaded_ip = &dcn3_01_ip; + + DC_LOGGER_INIT(dc->ctx->logger); + + if (!is_soc_bounding_box_valid(dc)) { + DC_LOG_ERROR("%s: not valid soc bounding box\n", __func__); + return false; + } + + loaded_ip->max_num_otg = pool->base.res_cap->num_timing_generator; + loaded_ip->max_num_dpp = pool->base.pipe_count; + DC_FP_START(); + dcn20_patch_bounding_box(dc, loaded_bb); + DC_FP_END(); + + if (dc->ctx->dc_bios->funcs->get_soc_bb_info) { + struct bp_soc_bb_info bb_info = {0}; + + if (dc->ctx->dc_bios->funcs->get_soc_bb_info(dc->ctx->dc_bios, &bb_info) == BP_RESULT_OK) { + DC_FP_START(); + dcn301_fpu_init_soc_bounding_box(bb_info); + DC_FP_END(); + } + } + + return true; +} + + +static void set_wm_ranges( + struct pp_smu_funcs *pp_smu, + struct _vcs_dpi_soc_bounding_box_st *loaded_bb) +{ + struct pp_smu_wm_range_sets ranges = {0}; + int i; + + ranges.num_reader_wm_sets = 0; + + if (loaded_bb->num_states == 1) { + ranges.reader_wm_sets[0].wm_inst = 0; + ranges.reader_wm_sets[0].min_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN; + ranges.reader_wm_sets[0].max_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX; + ranges.reader_wm_sets[0].min_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN; + ranges.reader_wm_sets[0].max_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX; + + ranges.num_reader_wm_sets = 1; + } else if (loaded_bb->num_states > 1) { + for (i = 0; i < 4 && i < loaded_bb->num_states; i++) { + ranges.reader_wm_sets[i].wm_inst = i; + ranges.reader_wm_sets[i].min_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN; + ranges.reader_wm_sets[i].max_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX; + DC_FP_START(); + dcn301_fpu_set_wm_ranges(i, &ranges, loaded_bb); + DC_FP_END(); + ranges.num_reader_wm_sets = i + 1; + } + + ranges.reader_wm_sets[0].min_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN; + ranges.reader_wm_sets[ranges.num_reader_wm_sets - 1].max_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX; + } + + ranges.num_writer_wm_sets = 1; + + ranges.writer_wm_sets[0].wm_inst = 0; + ranges.writer_wm_sets[0].min_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN; + ranges.writer_wm_sets[0].max_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX; + ranges.writer_wm_sets[0].min_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN; + ranges.writer_wm_sets[0].max_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX; + + /* Notify PP Lib/SMU which Watermarks to use for which clock ranges */ + pp_smu->nv_funcs.set_wm_ranges(&pp_smu->nv_funcs.pp_smu, &ranges); +} + +static void dcn301_calculate_wm_and_dlg( + struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel) +{ + DC_FP_START(); + dcn301_calculate_wm_and_dlg_fp(dc, context, pipes, pipe_cnt, vlevel); + DC_FP_END(); +} + +static struct resource_funcs dcn301_res_pool_funcs = { + .destroy = dcn301_destroy_resource_pool, + .link_enc_create = dcn301_link_encoder_create, + .panel_cntl_create = dcn301_panel_cntl_create, + .validate_bandwidth = dcn30_validate_bandwidth, + .calculate_wm_and_dlg = dcn301_calculate_wm_and_dlg, + .update_soc_for_wm_a = dcn30_update_soc_for_wm_a, + .populate_dml_pipes = dcn30_populate_dml_pipes_from_context, + .acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer, + .release_pipe = dcn20_release_pipe, + .add_stream_to_ctx = dcn30_add_stream_to_ctx, + .add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource, + .remove_stream_from_ctx = dcn20_remove_stream_from_ctx, + .populate_dml_writeback_from_context = dcn30_populate_dml_writeback_from_context, + .set_mcif_arb_params = dcn30_set_mcif_arb_params, + .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link, + .acquire_post_bldn_3dlut = dcn30_acquire_post_bldn_3dlut, + .release_post_bldn_3dlut = dcn30_release_post_bldn_3dlut, + .update_bw_bounding_box = dcn301_update_bw_bounding_box, + .patch_unknown_plane_state = dcn20_patch_unknown_plane_state +}; + +static bool dcn301_resource_construct( + uint8_t num_virtual_links, + struct dc *dc, + struct dcn301_resource_pool *pool) +{ + int i, j; + struct dc_context *ctx = dc->ctx; + struct irq_service_init_data init_data; + uint32_t pipe_fuses = read_pipe_fuses(ctx); + uint32_t num_pipes = 0; + + DC_LOGGER_INIT(dc->ctx->logger); + + ctx->dc_bios->regs = &bios_regs; + + if (dc->ctx->asic_id.chip_id == DEVICE_ID_VGH_1435) + res_cap_dcn301.num_pll = 2; + pool->base.res_cap = &res_cap_dcn301; + + pool->base.funcs = &dcn301_res_pool_funcs; + + /************************************************* + * Resource + asic cap harcoding * + *************************************************/ + pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE; + pool->base.pipe_count = pool->base.res_cap->num_timing_generator; + pool->base.mpcc_count = pool->base.res_cap->num_timing_generator; + dc->caps.max_downscale_ratio = 600; + dc->caps.i2c_speed_in_khz = 100; + dc->caps.i2c_speed_in_khz_hdcp = 5; /*1.4 w/a enabled by default*/ + dc->caps.max_cursor_size = 256; + dc->caps.min_horizontal_blanking_period = 80; + dc->caps.dmdata_alloc_size = 2048; + dc->caps.max_slave_planes = 2; + dc->caps.max_slave_yuv_planes = 2; + dc->caps.max_slave_rgb_planes = 2; + dc->caps.is_apu = true; + dc->caps.post_blend_color_processing = true; + dc->caps.force_dp_tps4_for_cp2520 = true; + dc->caps.extended_aux_timeout_support = true; + dc->caps.dmcub_support = true; + + /* Color pipeline capabilities */ + dc->caps.color.dpp.dcn_arch = 1; + dc->caps.color.dpp.input_lut_shared = 0; + dc->caps.color.dpp.icsc = 1; + dc->caps.color.dpp.dgam_ram = 0; // must use gamma_corr + dc->caps.color.dpp.dgam_rom_caps.srgb = 1; + dc->caps.color.dpp.dgam_rom_caps.bt2020 = 1; + dc->caps.color.dpp.dgam_rom_caps.gamma2_2 = 1; + dc->caps.color.dpp.dgam_rom_caps.pq = 1; + dc->caps.color.dpp.dgam_rom_caps.hlg = 1; + dc->caps.color.dpp.post_csc = 1; + dc->caps.color.dpp.gamma_corr = 1; + dc->caps.color.dpp.dgam_rom_for_yuv = 0; + + dc->caps.color.dpp.hw_3d_lut = 1; + dc->caps.color.dpp.ogam_ram = 1; + // no OGAM ROM on DCN301 + dc->caps.color.dpp.ogam_rom_caps.srgb = 0; + dc->caps.color.dpp.ogam_rom_caps.bt2020 = 0; + dc->caps.color.dpp.ogam_rom_caps.gamma2_2 = 0; + dc->caps.color.dpp.ogam_rom_caps.pq = 0; + dc->caps.color.dpp.ogam_rom_caps.hlg = 0; + dc->caps.color.dpp.ocsc = 0; + + dc->caps.color.mpc.gamut_remap = 1; + dc->caps.color.mpc.num_3dluts = pool->base.res_cap->num_mpc_3dlut; //2 + dc->caps.color.mpc.ogam_ram = 1; + dc->caps.color.mpc.ogam_rom_caps.srgb = 0; + dc->caps.color.mpc.ogam_rom_caps.bt2020 = 0; + dc->caps.color.mpc.ogam_rom_caps.gamma2_2 = 0; + dc->caps.color.mpc.ogam_rom_caps.pq = 0; + dc->caps.color.mpc.ogam_rom_caps.hlg = 0; + dc->caps.color.mpc.ocsc = 1; + + dc->caps.dp_hdmi21_pcon_support = true; + + /* read VBIOS LTTPR caps */ + if (ctx->dc_bios->funcs->get_lttpr_caps) { + enum bp_result bp_query_result; + uint8_t is_vbios_lttpr_enable = 0; + + bp_query_result = ctx->dc_bios->funcs->get_lttpr_caps(ctx->dc_bios, &is_vbios_lttpr_enable); + dc->caps.vbios_lttpr_enable = (bp_query_result == BP_RESULT_OK) && !!is_vbios_lttpr_enable; + } + + if (ctx->dc_bios->funcs->get_lttpr_interop) { + enum bp_result bp_query_result; + uint8_t is_vbios_interop_enabled = 0; + + bp_query_result = ctx->dc_bios->funcs->get_lttpr_interop(ctx->dc_bios, &is_vbios_interop_enabled); + dc->caps.vbios_lttpr_aware = (bp_query_result == BP_RESULT_OK) && !!is_vbios_interop_enabled; + } + + if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) + dc->debug = debug_defaults_drv; + + // Init the vm_helper + if (dc->vm_helper) + vm_helper_init(dc->vm_helper, 16); + + /************************************************* + * Create resources * + *************************************************/ + + /* Clock Sources for Pixel Clock*/ + pool->base.clock_sources[DCN301_CLK_SRC_PLL0] = + dcn301_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL0, + &clk_src_regs[0], false); + pool->base.clock_sources[DCN301_CLK_SRC_PLL1] = + dcn301_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL1, + &clk_src_regs[1], false); + pool->base.clock_sources[DCN301_CLK_SRC_PLL2] = + dcn301_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL2, + &clk_src_regs[2], false); + pool->base.clock_sources[DCN301_CLK_SRC_PLL3] = + dcn301_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL3, + &clk_src_regs[3], false); + + pool->base.clk_src_count = DCN301_CLK_SRC_TOTAL; + + /* todo: not reuse phy_pll registers */ + pool->base.dp_clock_source = + dcn301_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_ID_DP_DTO, + &clk_src_regs[0], true); + + for (i = 0; i < pool->base.clk_src_count; i++) { + if (pool->base.clock_sources[i] == NULL) { + dm_error("DC: failed to create clock sources!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + } + + /* DCCG */ + pool->base.dccg = dccg301_create(ctx, &dccg_regs, &dccg_shift, &dccg_mask); + if (pool->base.dccg == NULL) { + dm_error("DC: failed to create dccg!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + + init_soc_bounding_box(dc, pool); + + if (!dc->debug.disable_pplib_wm_range && pool->base.pp_smu->nv_funcs.set_wm_ranges) + set_wm_ranges(pool->base.pp_smu, &dcn3_01_soc); + + num_pipes = dcn3_01_ip.max_num_dpp; + + for (i = 0; i < dcn3_01_ip.max_num_dpp; i++) + if (pipe_fuses & 1 << i) + num_pipes--; + dcn3_01_ip.max_num_dpp = num_pipes; + dcn3_01_ip.max_num_otg = num_pipes; + + + dml_init_instance(&dc->dml, &dcn3_01_soc, &dcn3_01_ip, DML_PROJECT_DCN30); + + /* IRQ */ + init_data.ctx = dc->ctx; + pool->base.irqs = dal_irq_service_dcn30_create(&init_data); + if (!pool->base.irqs) + goto create_fail; + + /* HUBBUB */ + pool->base.hubbub = dcn301_hubbub_create(ctx); + if (pool->base.hubbub == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create hubbub!\n"); + goto create_fail; + } + + j = 0; + /* HUBPs, DPPs, OPPs and TGs */ + for (i = 0; i < pool->base.pipe_count; i++) { + + /* if pipe is disabled, skip instance of HW pipe, + * i.e, skip ASIC register instance + */ + if ((pipe_fuses & (1 << i)) != 0) { + DC_LOG_DEBUG("%s: fusing pipe %d\n", __func__, i); + continue; + } + + pool->base.hubps[j] = dcn301_hubp_create(ctx, i); + if (pool->base.hubps[j] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create hubps!\n"); + goto create_fail; + } + + pool->base.dpps[j] = dcn301_dpp_create(ctx, i); + if (pool->base.dpps[j] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create dpps!\n"); + goto create_fail; + } + + pool->base.opps[j] = dcn301_opp_create(ctx, i); + if (pool->base.opps[j] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create output pixel processor!\n"); + goto create_fail; + } + + pool->base.timing_generators[j] = dcn301_timing_generator_create(ctx, i); + if (pool->base.timing_generators[j] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create tg!\n"); + goto create_fail; + } + j++; + } + pool->base.timing_generator_count = j; + pool->base.pipe_count = j; + pool->base.mpcc_count = j; + + /* ABM (or ABMs for NV2x) */ + /* TODO: */ + for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { + pool->base.multiple_abms[i] = dmub_abm_create(ctx, + &abm_regs[i], + &abm_shift, + &abm_mask); + if (pool->base.multiple_abms[i] == NULL) { + dm_error("DC: failed to create abm for pipe %d!\n", i); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + } + + /* MPC and DSC */ + pool->base.mpc = dcn301_mpc_create(ctx, pool->base.mpcc_count, pool->base.res_cap->num_mpc_3dlut); + if (pool->base.mpc == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create mpc!\n"); + goto create_fail; + } + + for (i = 0; i < pool->base.res_cap->num_dsc; i++) { + pool->base.dscs[i] = dcn301_dsc_create(ctx, i); + if (pool->base.dscs[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create display stream compressor %d!\n", i); + goto create_fail; + } + } + + /* DWB and MMHUBBUB */ + if (!dcn301_dwbc_create(ctx, &pool->base)) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create dwbc!\n"); + goto create_fail; + } + + if (!dcn301_mmhubbub_create(ctx, &pool->base)) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create mcif_wb!\n"); + goto create_fail; + } + + /* AUX and I2C */ + for (i = 0; i < pool->base.res_cap->num_ddc; i++) { + pool->base.engines[i] = dcn301_aux_engine_create(ctx, i); + if (pool->base.engines[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC:failed to create aux engine!!\n"); + goto create_fail; + } + pool->base.hw_i2cs[i] = dcn301_i2c_hw_create(ctx, i); + if (pool->base.hw_i2cs[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC:failed to create hw i2c!!\n"); + goto create_fail; + } + pool->base.sw_i2cs[i] = NULL; + } + + /* Audio, Stream Encoders including HPO and virtual, MPC 3D LUTs */ + if (!resource_construct(num_virtual_links, dc, &pool->base, + &res_create_funcs)) + goto create_fail; + + /* HW Sequencer and Plane caps */ + dcn301_hw_sequencer_construct(dc); + + dc->caps.max_planes = pool->base.pipe_count; + + for (i = 0; i < dc->caps.max_planes; ++i) + dc->caps.planes[i] = plane_cap; + + dc->cap_funcs = cap_funcs; + + return true; + +create_fail: + + dcn301_destruct(pool); + + return false; +} + +struct resource_pool *dcn301_create_resource_pool( + const struct dc_init_data *init_data, + struct dc *dc) +{ + struct dcn301_resource_pool *pool = + kzalloc(sizeof(struct dcn301_resource_pool), GFP_KERNEL); + + if (!pool) + return NULL; + + if (dcn301_resource_construct(init_data->num_virtual_links, dc, pool)) + return &pool->base; + + BREAK_TO_DEBUGGER(); + kfree(pool); + return NULL; +} diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.h new file mode 100644 index 000000000000..ae8672680cdd --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.h @@ -0,0 +1,45 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef _DCN301_RESOURCE_H_ +#define _DCN301_RESOURCE_H_ + +#include "core_types.h" + +struct dc; +struct resource_pool; +struct _vcs_dpi_display_pipe_params_st; + +extern struct _vcs_dpi_ip_params_st dcn3_01_ip; +extern struct _vcs_dpi_soc_bounding_box_st dcn3_01_soc; + +struct dcn301_resource_pool { + struct resource_pool base; +}; +struct resource_pool *dcn301_create_resource_pool( + const struct dc_init_data *init_data, + struct dc *dc); + +#endif /* _DCN301_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c new file mode 100644 index 000000000000..5791b5cc2875 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c @@ -0,0 +1,1518 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dcn302/dcn302_init.h" +#include "dcn302_resource.h" +#include "dcn302/dcn302_dccg.h" +#include "irq/dcn302/irq_service_dcn302.h" + +#include "dcn30/dcn30_dio_link_encoder.h" +#include "dcn30/dcn30_dio_stream_encoder.h" +#include "dcn30/dcn30_dwb.h" +#include "dcn30/dcn30_dpp.h" +#include "dcn30/dcn30_hubbub.h" +#include "dcn30/dcn30_hubp.h" +#include "dcn30/dcn30_mmhubbub.h" +#include "dcn30/dcn30_mpc.h" +#include "dcn30/dcn30_opp.h" +#include "dcn30/dcn30_optc.h" +#include "dcn30/dcn30_resource.h" + +#include "dcn20/dcn20_dsc.h" +#include "dcn20/dcn20_resource.h" + +#include "dml/dcn30/dcn30_fpu.h" + +#include "dcn10/dcn10_resource.h" + +#include "link.h" +#include "dce/dce_abm.h" +#include "dce/dce_audio.h" +#include "dce/dce_aux.h" +#include "dce/dce_clock_source.h" +#include "dce/dce_hwseq.h" +#include "dce/dce_i2c_hw.h" +#include "dce/dce_panel_cntl.h" +#include "dce/dmub_abm.h" +#include "dce/dmub_psr.h" +#include "clk_mgr.h" + +#include "hw_sequencer_private.h" +#include "reg_helper.h" +#include "resource.h" +#include "vm_helper.h" + +#include "dml/dcn302/dcn302_fpu.h" + +#include "dimgrey_cavefish_ip_offset.h" +#include "dcn/dcn_3_0_2_offset.h" +#include "dcn/dcn_3_0_2_sh_mask.h" +#include "dpcs/dpcs_3_0_0_offset.h" +#include "dpcs/dpcs_3_0_0_sh_mask.h" +#include "nbio/nbio_7_4_offset.h" +#include "amdgpu_socbb.h" + +#define DC_LOGGER \ + dc->ctx->logger +#define DC_LOGGER_INIT(logger) + +static const struct dc_debug_options debug_defaults_drv = { + .disable_dmcu = true, + .force_abm_enable = false, + .timing_trace = false, + .clock_trace = true, + .disable_pplib_clock_request = true, + .pipe_split_policy = MPC_SPLIT_DYNAMIC, + .force_single_disp_pipe_split = false, + .disable_dcc = DCC_ENABLE, + .vsr_support = true, + .performance_trace = false, + .max_downscale_src_width = 7680,/*upto 8K*/ + .disable_pplib_wm_range = false, + .scl_reset_length10 = true, + .sanity_checks = false, + .underflow_assert_delay_us = 0xFFFFFFFF, + .dwb_fi_phase = -1, // -1 = disable, + .dmub_command_table = true, + .use_max_lb = true, + .exit_idle_opt_for_cursor_updates = true, + .enable_legacy_fast_update = false, + .using_dml2 = false, +}; + +static const struct dc_panel_config panel_config_defaults = { + .psr = { + .disable_psr = false, + .disallow_psrsu = false, + .disallow_replay = false, + }, +}; + +enum dcn302_clk_src_array_id { + DCN302_CLK_SRC_PLL0, + DCN302_CLK_SRC_PLL1, + DCN302_CLK_SRC_PLL2, + DCN302_CLK_SRC_PLL3, + DCN302_CLK_SRC_PLL4, + DCN302_CLK_SRC_TOTAL +}; + +static const struct resource_caps res_cap_dcn302 = { + .num_timing_generator = 5, + .num_opp = 5, + .num_video_plane = 5, + .num_audio = 5, + .num_stream_encoder = 5, + .num_dwb = 1, + .num_ddc = 5, + .num_vmid = 16, + .num_mpc_3dlut = 2, + .num_dsc = 5, +}; + +static const struct dc_plane_cap plane_cap = { + .type = DC_PLANE_TYPE_DCN_UNIVERSAL, + .per_pixel_alpha = true, + .pixel_format_support = { + .argb8888 = true, + .nv12 = true, + .fp16 = true, + .p010 = true, + .ayuv = false, + }, + .max_upscale_factor = { + .argb8888 = 16000, + .nv12 = 16000, + .fp16 = 16000 + }, + /* 6:1 downscaling ratio: 1000/6 = 166.666 */ + .max_downscale_factor = { + .argb8888 = 167, + .nv12 = 167, + .fp16 = 167 + }, + 16, + 16 +}; + +/* NBIO */ +#define NBIO_BASE_INNER(seg) \ + NBIO_BASE__INST0_SEG ## seg + +#define NBIO_BASE(seg) \ + NBIO_BASE_INNER(seg) + +#define NBIO_SR(reg_name)\ + .reg_name = NBIO_BASE(mm ## reg_name ## _BASE_IDX) + \ + mm ## reg_name + +/* DCN */ +#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg + +#define BASE(seg) BASE_INNER(seg) + +#define SR(reg_name)\ + .reg_name = BASE(mm ## reg_name ## _BASE_IDX) + mm ## reg_name + +#define SF(reg_name, field_name, post_fix)\ + .field_name = reg_name ## __ ## field_name ## post_fix + +#define SRI(reg_name, block, id)\ + .reg_name = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + mm ## block ## id ## _ ## reg_name + +#define SRI2(reg_name, block, id)\ + .reg_name = BASE(mm ## reg_name ## _BASE_IDX) + mm ## reg_name + +#define SRII(reg_name, block, id)\ + .reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + mm ## block ## id ## _ ## reg_name + +#define DCCG_SRII(reg_name, block, id)\ + .block ## _ ## reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + mm ## block ## id ## _ ## reg_name + +#define VUPDATE_SRII(reg_name, block, id)\ + .reg_name[id] = BASE(mm ## reg_name ## _ ## block ## id ## _BASE_IDX) + \ + mm ## reg_name ## _ ## block ## id + +#define SRII_DWB(reg_name, temp_name, block, id)\ + .reg_name[id] = BASE(mm ## block ## id ## _ ## temp_name ## _BASE_IDX) + \ + mm ## block ## id ## _ ## temp_name + +#define SF_DWB2(reg_name, block, id, field_name, post_fix) \ + .field_name = reg_name ## __ ## field_name ## post_fix + +#define SRII_MPC_RMU(reg_name, block, id)\ + .RMU##_##reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + mm ## block ## id ## _ ## reg_name + +static const struct dcn_hubbub_registers hubbub_reg = { + HUBBUB_REG_LIST_DCN30(0) +}; + +static const struct dcn_hubbub_shift hubbub_shift = { + HUBBUB_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dcn_hubbub_mask hubbub_mask = { + HUBBUB_MASK_SH_LIST_DCN30(_MASK) +}; + +#define vmid_regs(id)\ + [id] = { DCN20_VMID_REG_LIST(id) } + +static const struct dcn_vmid_registers vmid_regs[] = { + vmid_regs(0), + vmid_regs(1), + vmid_regs(2), + vmid_regs(3), + vmid_regs(4), + vmid_regs(5), + vmid_regs(6), + vmid_regs(7), + vmid_regs(8), + vmid_regs(9), + vmid_regs(10), + vmid_regs(11), + vmid_regs(12), + vmid_regs(13), + vmid_regs(14), + vmid_regs(15) +}; + +static const struct dcn20_vmid_shift vmid_shifts = { + DCN20_VMID_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn20_vmid_mask vmid_masks = { + DCN20_VMID_MASK_SH_LIST(_MASK) +}; + +static struct hubbub *dcn302_hubbub_create(struct dc_context *ctx) +{ + int i; + + struct dcn20_hubbub *hubbub3 = kzalloc(sizeof(struct dcn20_hubbub), GFP_KERNEL); + + if (!hubbub3) + return NULL; + + hubbub3_construct(hubbub3, ctx, &hubbub_reg, &hubbub_shift, &hubbub_mask); + + for (i = 0; i < res_cap_dcn302.num_vmid; i++) { + struct dcn20_vmid *vmid = &hubbub3->vmid[i]; + + vmid->ctx = ctx; + + vmid->regs = &vmid_regs[i]; + vmid->shifts = &vmid_shifts; + vmid->masks = &vmid_masks; + } + + return &hubbub3->base; +} + +#define vpg_regs(id)\ + [id] = { VPG_DCN3_REG_LIST(id) } + +static const struct dcn30_vpg_registers vpg_regs[] = { + vpg_regs(0), + vpg_regs(1), + vpg_regs(2), + vpg_regs(3), + vpg_regs(4), + vpg_regs(5) +}; + +static const struct dcn30_vpg_shift vpg_shift = { + DCN3_VPG_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn30_vpg_mask vpg_mask = { + DCN3_VPG_MASK_SH_LIST(_MASK) +}; + +static struct vpg *dcn302_vpg_create(struct dc_context *ctx, uint32_t inst) +{ + struct dcn30_vpg *vpg3 = kzalloc(sizeof(struct dcn30_vpg), GFP_KERNEL); + + if (!vpg3) + return NULL; + + vpg3_construct(vpg3, ctx, inst, &vpg_regs[inst], &vpg_shift, &vpg_mask); + + return &vpg3->base; +} + +#define afmt_regs(id)\ + [id] = { AFMT_DCN3_REG_LIST(id) } + +static const struct dcn30_afmt_registers afmt_regs[] = { + afmt_regs(0), + afmt_regs(1), + afmt_regs(2), + afmt_regs(3), + afmt_regs(4), + afmt_regs(5) +}; + +static const struct dcn30_afmt_shift afmt_shift = { + DCN3_AFMT_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn30_afmt_mask afmt_mask = { + DCN3_AFMT_MASK_SH_LIST(_MASK) +}; + +static struct afmt *dcn302_afmt_create(struct dc_context *ctx, uint32_t inst) +{ + struct dcn30_afmt *afmt3 = kzalloc(sizeof(struct dcn30_afmt), GFP_KERNEL); + + if (!afmt3) + return NULL; + + afmt3_construct(afmt3, ctx, inst, &afmt_regs[inst], &afmt_shift, &afmt_mask); + + return &afmt3->base; +} + +#define audio_regs(id)\ + [id] = { AUD_COMMON_REG_LIST(id) } + +static const struct dce_audio_registers audio_regs[] = { + audio_regs(0), + audio_regs(1), + audio_regs(2), + audio_regs(3), + audio_regs(4), + audio_regs(5), + audio_regs(6) +}; + +#define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\ + SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_INDEX, AZALIA_ENDPOINT_REG_INDEX, mask_sh),\ + SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_DATA, AZALIA_ENDPOINT_REG_DATA, mask_sh),\ + AUD_COMMON_MASK_SH_LIST_BASE(mask_sh) + +static const struct dce_audio_shift audio_shift = { + DCE120_AUD_COMMON_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_audio_mask audio_mask = { + DCE120_AUD_COMMON_MASK_SH_LIST(_MASK) +}; + +static struct audio *dcn302_create_audio(struct dc_context *ctx, unsigned int inst) +{ + return dce_audio_create(ctx, inst, &audio_regs[inst], &audio_shift, &audio_mask); +} + +#define stream_enc_regs(id)\ + [id] = { SE_DCN3_REG_LIST(id) } + +static const struct dcn10_stream_enc_registers stream_enc_regs[] = { + stream_enc_regs(0), + stream_enc_regs(1), + stream_enc_regs(2), + stream_enc_regs(3), + stream_enc_regs(4) +}; + +static const struct dcn10_stream_encoder_shift se_shift = { + SE_COMMON_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dcn10_stream_encoder_mask se_mask = { + SE_COMMON_MASK_SH_LIST_DCN30(_MASK) +}; + +static struct stream_encoder *dcn302_stream_encoder_create(enum engine_id eng_id, struct dc_context *ctx) +{ + struct dcn10_stream_encoder *enc1; + struct vpg *vpg; + struct afmt *afmt; + int vpg_inst; + int afmt_inst; + + /* Mapping of VPG, AFMT, DME register blocks to DIO block instance */ + if (eng_id <= ENGINE_ID_DIGE) { + vpg_inst = eng_id; + afmt_inst = eng_id; + } else + return NULL; + + enc1 = kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL); + vpg = dcn302_vpg_create(ctx, vpg_inst); + afmt = dcn302_afmt_create(ctx, afmt_inst); + + if (!enc1 || !vpg || !afmt) { + kfree(enc1); + kfree(vpg); + kfree(afmt); + return NULL; + } + + dcn30_dio_stream_encoder_construct(enc1, ctx, ctx->dc_bios, eng_id, vpg, afmt, &stream_enc_regs[eng_id], + &se_shift, &se_mask); + + return &enc1->base; +} + +#define clk_src_regs(index, pllid)\ + [index] = { CS_COMMON_REG_LIST_DCN3_02(index, pllid) } + +static const struct dce110_clk_src_regs clk_src_regs[] = { + clk_src_regs(0, A), + clk_src_regs(1, B), + clk_src_regs(2, C), + clk_src_regs(3, D), + clk_src_regs(4, E) +}; + +static const struct dce110_clk_src_shift cs_shift = { + CS_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT) +}; + +static const struct dce110_clk_src_mask cs_mask = { + CS_COMMON_MASK_SH_LIST_DCN2_0(_MASK) +}; + +static struct clock_source *dcn302_clock_source_create(struct dc_context *ctx, struct dc_bios *bios, + enum clock_source_id id, const struct dce110_clk_src_regs *regs, bool dp_clk_src) +{ + struct dce110_clk_src *clk_src = kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL); + + if (!clk_src) + return NULL; + + if (dcn3_clk_src_construct(clk_src, ctx, bios, id, regs, &cs_shift, &cs_mask)) { + clk_src->base.dp_clk_src = dp_clk_src; + return &clk_src->base; + } + + kfree(clk_src); + BREAK_TO_DEBUGGER(); + return NULL; +} + +static const struct dce_hwseq_registers hwseq_reg = { + HWSEQ_DCN302_REG_LIST() +}; + +static const struct dce_hwseq_shift hwseq_shift = { + HWSEQ_DCN302_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_hwseq_mask hwseq_mask = { + HWSEQ_DCN302_MASK_SH_LIST(_MASK) +}; + +static struct dce_hwseq *dcn302_hwseq_create(struct dc_context *ctx) +{ + struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL); + + if (hws) { + hws->ctx = ctx; + hws->regs = &hwseq_reg; + hws->shifts = &hwseq_shift; + hws->masks = &hwseq_mask; + } + return hws; +} + +#define hubp_regs(id)\ + [id] = { HUBP_REG_LIST_DCN30(id) } + +static const struct dcn_hubp2_registers hubp_regs[] = { + hubp_regs(0), + hubp_regs(1), + hubp_regs(2), + hubp_regs(3), + hubp_regs(4) +}; + +static const struct dcn_hubp2_shift hubp_shift = { + HUBP_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dcn_hubp2_mask hubp_mask = { + HUBP_MASK_SH_LIST_DCN30(_MASK) +}; + +static struct hubp *dcn302_hubp_create(struct dc_context *ctx, uint32_t inst) +{ + struct dcn20_hubp *hubp2 = kzalloc(sizeof(struct dcn20_hubp), GFP_KERNEL); + + if (!hubp2) + return NULL; + + if (hubp3_construct(hubp2, ctx, inst, &hubp_regs[inst], &hubp_shift, &hubp_mask)) + return &hubp2->base; + + BREAK_TO_DEBUGGER(); + kfree(hubp2); + return NULL; +} + +#define dpp_regs(id)\ + [id] = { DPP_REG_LIST_DCN30(id) } + +static const struct dcn3_dpp_registers dpp_regs[] = { + dpp_regs(0), + dpp_regs(1), + dpp_regs(2), + dpp_regs(3), + dpp_regs(4) +}; + +static const struct dcn3_dpp_shift tf_shift = { + DPP_REG_LIST_SH_MASK_DCN30(__SHIFT) +}; + +static const struct dcn3_dpp_mask tf_mask = { + DPP_REG_LIST_SH_MASK_DCN30(_MASK) +}; + +static struct dpp *dcn302_dpp_create(struct dc_context *ctx, uint32_t inst) +{ + struct dcn3_dpp *dpp = kzalloc(sizeof(struct dcn3_dpp), GFP_KERNEL); + + if (!dpp) + return NULL; + + if (dpp3_construct(dpp, ctx, inst, &dpp_regs[inst], &tf_shift, &tf_mask)) + return &dpp->base; + + BREAK_TO_DEBUGGER(); + kfree(dpp); + return NULL; +} + +#define opp_regs(id)\ + [id] = { OPP_REG_LIST_DCN30(id) } + +static const struct dcn20_opp_registers opp_regs[] = { + opp_regs(0), + opp_regs(1), + opp_regs(2), + opp_regs(3), + opp_regs(4) +}; + +static const struct dcn20_opp_shift opp_shift = { + OPP_MASK_SH_LIST_DCN20(__SHIFT) +}; + +static const struct dcn20_opp_mask opp_mask = { + OPP_MASK_SH_LIST_DCN20(_MASK) +}; + +static struct output_pixel_processor *dcn302_opp_create(struct dc_context *ctx, uint32_t inst) +{ + struct dcn20_opp *opp = kzalloc(sizeof(struct dcn20_opp), GFP_KERNEL); + + if (!opp) { + BREAK_TO_DEBUGGER(); + return NULL; + } + + dcn20_opp_construct(opp, ctx, inst, &opp_regs[inst], &opp_shift, &opp_mask); + return &opp->base; +} + +#define optc_regs(id)\ + [id] = { OPTC_COMMON_REG_LIST_DCN3_0(id) } + +static const struct dcn_optc_registers optc_regs[] = { + optc_regs(0), + optc_regs(1), + optc_regs(2), + optc_regs(3), + optc_regs(4) +}; + +static const struct dcn_optc_shift optc_shift = { + OPTC_COMMON_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dcn_optc_mask optc_mask = { + OPTC_COMMON_MASK_SH_LIST_DCN30(_MASK) +}; + +static struct timing_generator *dcn302_timing_generator_create(struct dc_context *ctx, uint32_t instance) +{ + struct optc *tgn10 = kzalloc(sizeof(struct optc), GFP_KERNEL); + + if (!tgn10) + return NULL; + + tgn10->base.inst = instance; + tgn10->base.ctx = ctx; + + tgn10->tg_regs = &optc_regs[instance]; + tgn10->tg_shift = &optc_shift; + tgn10->tg_mask = &optc_mask; + + dcn30_timing_generator_init(tgn10); + + return &tgn10->base; +} + +static const struct dcn30_mpc_registers mpc_regs = { + MPC_REG_LIST_DCN3_0(0), + MPC_REG_LIST_DCN3_0(1), + MPC_REG_LIST_DCN3_0(2), + MPC_REG_LIST_DCN3_0(3), + MPC_REG_LIST_DCN3_0(4), + MPC_OUT_MUX_REG_LIST_DCN3_0(0), + MPC_OUT_MUX_REG_LIST_DCN3_0(1), + MPC_OUT_MUX_REG_LIST_DCN3_0(2), + MPC_OUT_MUX_REG_LIST_DCN3_0(3), + MPC_OUT_MUX_REG_LIST_DCN3_0(4), + MPC_RMU_GLOBAL_REG_LIST_DCN3AG, + MPC_RMU_REG_LIST_DCN3AG(0), + MPC_RMU_REG_LIST_DCN3AG(1), + MPC_RMU_REG_LIST_DCN3AG(2), + MPC_DWB_MUX_REG_LIST_DCN3_0(0), +}; + +static const struct dcn30_mpc_shift mpc_shift = { + MPC_COMMON_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dcn30_mpc_mask mpc_mask = { + MPC_COMMON_MASK_SH_LIST_DCN30(_MASK) +}; + +static struct mpc *dcn302_mpc_create(struct dc_context *ctx, int num_mpcc, int num_rmu) +{ + struct dcn30_mpc *mpc30 = kzalloc(sizeof(struct dcn30_mpc), GFP_KERNEL); + + if (!mpc30) + return NULL; + + dcn30_mpc_construct(mpc30, ctx, &mpc_regs, &mpc_shift, &mpc_mask, num_mpcc, num_rmu); + + return &mpc30->base; +} + +#define dsc_regsDCN20(id)\ +[id] = { DSC_REG_LIST_DCN20(id) } + +static const struct dcn20_dsc_registers dsc_regs[] = { + dsc_regsDCN20(0), + dsc_regsDCN20(1), + dsc_regsDCN20(2), + dsc_regsDCN20(3), + dsc_regsDCN20(4) +}; + +static const struct dcn20_dsc_shift dsc_shift = { + DSC_REG_LIST_SH_MASK_DCN20(__SHIFT) +}; + +static const struct dcn20_dsc_mask dsc_mask = { + DSC_REG_LIST_SH_MASK_DCN20(_MASK) +}; + +static struct display_stream_compressor *dcn302_dsc_create(struct dc_context *ctx, uint32_t inst) +{ + struct dcn20_dsc *dsc = kzalloc(sizeof(struct dcn20_dsc), GFP_KERNEL); + + if (!dsc) { + BREAK_TO_DEBUGGER(); + return NULL; + } + + dsc2_construct(dsc, ctx, inst, &dsc_regs[inst], &dsc_shift, &dsc_mask); + return &dsc->base; +} + +#define dwbc_regs_dcn3(id)\ +[id] = { DWBC_COMMON_REG_LIST_DCN30(id) } + +static const struct dcn30_dwbc_registers dwbc30_regs[] = { + dwbc_regs_dcn3(0) +}; + +static const struct dcn30_dwbc_shift dwbc30_shift = { + DWBC_COMMON_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dcn30_dwbc_mask dwbc30_mask = { + DWBC_COMMON_MASK_SH_LIST_DCN30(_MASK) +}; + +static bool dcn302_dwbc_create(struct dc_context *ctx, struct resource_pool *pool) +{ + int i; + uint32_t pipe_count = pool->res_cap->num_dwb; + + for (i = 0; i < pipe_count; i++) { + struct dcn30_dwbc *dwbc30 = kzalloc(sizeof(struct dcn30_dwbc), GFP_KERNEL); + + if (!dwbc30) { + dm_error("DC: failed to create dwbc30!\n"); + return false; + } + + dcn30_dwbc_construct(dwbc30, ctx, &dwbc30_regs[i], &dwbc30_shift, &dwbc30_mask, i); + + pool->dwbc[i] = &dwbc30->base; + } + return true; +} + +#define mcif_wb_regs_dcn3(id)\ +[id] = { MCIF_WB_COMMON_REG_LIST_DCN30(id) } + +static const struct dcn30_mmhubbub_registers mcif_wb30_regs[] = { + mcif_wb_regs_dcn3(0) +}; + +static const struct dcn30_mmhubbub_shift mcif_wb30_shift = { + MCIF_WB_COMMON_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dcn30_mmhubbub_mask mcif_wb30_mask = { + MCIF_WB_COMMON_MASK_SH_LIST_DCN30(_MASK) +}; + +static bool dcn302_mmhubbub_create(struct dc_context *ctx, struct resource_pool *pool) +{ + int i; + uint32_t pipe_count = pool->res_cap->num_dwb; + + for (i = 0; i < pipe_count; i++) { + struct dcn30_mmhubbub *mcif_wb30 = kzalloc(sizeof(struct dcn30_mmhubbub), GFP_KERNEL); + + if (!mcif_wb30) { + dm_error("DC: failed to create mcif_wb30!\n"); + return false; + } + + dcn30_mmhubbub_construct(mcif_wb30, ctx, &mcif_wb30_regs[i], &mcif_wb30_shift, &mcif_wb30_mask, i); + + pool->mcif_wb[i] = &mcif_wb30->base; + } + return true; +} + +#define aux_engine_regs(id)\ +[id] = {\ + AUX_COMMON_REG_LIST0(id), \ + .AUXN_IMPCAL = 0, \ + .AUXP_IMPCAL = 0, \ + .AUX_RESET_MASK = DP_AUX0_AUX_CONTROL__AUX_RESET_MASK, \ +} + +static const struct dce110_aux_registers aux_engine_regs[] = { + aux_engine_regs(0), + aux_engine_regs(1), + aux_engine_regs(2), + aux_engine_regs(3), + aux_engine_regs(4) +}; + +static const struct dce110_aux_registers_shift aux_shift = { + DCN_AUX_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce110_aux_registers_mask aux_mask = { + DCN_AUX_MASK_SH_LIST(_MASK) +}; + +static struct dce_aux *dcn302_aux_engine_create(struct dc_context *ctx, uint32_t inst) +{ + struct aux_engine_dce110 *aux_engine = kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL); + + if (!aux_engine) + return NULL; + + dce110_aux_engine_construct(aux_engine, ctx, inst, SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, + &aux_engine_regs[inst], &aux_mask, &aux_shift, ctx->dc->caps.extended_aux_timeout_support); + + return &aux_engine->base; +} + +#define i2c_inst_regs(id) { I2C_HW_ENGINE_COMMON_REG_LIST(id) } + +static const struct dce_i2c_registers i2c_hw_regs[] = { + i2c_inst_regs(1), + i2c_inst_regs(2), + i2c_inst_regs(3), + i2c_inst_regs(4), + i2c_inst_regs(5) +}; + +static const struct dce_i2c_shift i2c_shifts = { + I2C_COMMON_MASK_SH_LIST_DCN2(__SHIFT) +}; + +static const struct dce_i2c_mask i2c_masks = { + I2C_COMMON_MASK_SH_LIST_DCN2(_MASK) +}; + +static struct dce_i2c_hw *dcn302_i2c_hw_create(struct dc_context *ctx, uint32_t inst) +{ + struct dce_i2c_hw *dce_i2c_hw = kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL); + + if (!dce_i2c_hw) + return NULL; + + dcn2_i2c_hw_construct(dce_i2c_hw, ctx, inst, &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks); + + return dce_i2c_hw; +} + +static const struct encoder_feature_support link_enc_feature = { + .max_hdmi_deep_color = COLOR_DEPTH_121212, + .max_hdmi_pixel_clock = 600000, + .hdmi_ycbcr420_supported = true, + .dp_ycbcr420_supported = true, + .fec_supported = true, + .flags.bits.IS_HBR2_CAPABLE = true, + .flags.bits.IS_HBR3_CAPABLE = true, + .flags.bits.IS_TPS3_CAPABLE = true, + .flags.bits.IS_TPS4_CAPABLE = true +}; + +#define link_regs(id, phyid)\ + [id] = {\ + LE_DCN3_REG_LIST(id), \ + UNIPHY_DCN2_REG_LIST(phyid), \ + DPCS_DCN2_REG_LIST(id), \ + SRI(DP_DPHY_INTERNAL_CTRL, DP, id) \ + } + +static const struct dcn10_link_enc_registers link_enc_regs[] = { + link_regs(0, A), + link_regs(1, B), + link_regs(2, C), + link_regs(3, D), + link_regs(4, E) +}; + +static const struct dcn10_link_enc_shift le_shift = { + LINK_ENCODER_MASK_SH_LIST_DCN30(__SHIFT), + DPCS_DCN2_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn10_link_enc_mask le_mask = { + LINK_ENCODER_MASK_SH_LIST_DCN30(_MASK), + DPCS_DCN2_MASK_SH_LIST(_MASK) +}; + +#define aux_regs(id)\ + [id] = { DCN2_AUX_REG_LIST(id) } + +static const struct dcn10_link_enc_aux_registers link_enc_aux_regs[] = { + aux_regs(0), + aux_regs(1), + aux_regs(2), + aux_regs(3), + aux_regs(4) +}; + +#define hpd_regs(id)\ + [id] = { HPD_REG_LIST(id) } + +static const struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[] = { + hpd_regs(0), + hpd_regs(1), + hpd_regs(2), + hpd_regs(3), + hpd_regs(4) +}; + +static struct link_encoder *dcn302_link_encoder_create( + struct dc_context *ctx, + const struct encoder_init_data *enc_init_data) +{ + struct dcn20_link_encoder *enc20 = kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL); + + if (!enc20) + return NULL; + + dcn30_link_encoder_construct(enc20, enc_init_data, &link_enc_feature, + &link_enc_regs[enc_init_data->transmitter], &link_enc_aux_regs[enc_init_data->channel - 1], + &link_enc_hpd_regs[enc_init_data->hpd_source], &le_shift, &le_mask); + + return &enc20->enc10.base; +} + +static const struct dce_panel_cntl_registers panel_cntl_regs[] = { + { DCN_PANEL_CNTL_REG_LIST() } +}; + +static const struct dce_panel_cntl_shift panel_cntl_shift = { + DCE_PANEL_CNTL_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_panel_cntl_mask panel_cntl_mask = { + DCE_PANEL_CNTL_MASK_SH_LIST(_MASK) +}; + +static struct panel_cntl *dcn302_panel_cntl_create(const struct panel_cntl_init_data *init_data) +{ + struct dce_panel_cntl *panel_cntl = kzalloc(sizeof(struct dce_panel_cntl), GFP_KERNEL); + + if (!panel_cntl) + return NULL; + + dce_panel_cntl_construct(panel_cntl, init_data, &panel_cntl_regs[init_data->inst], + &panel_cntl_shift, &panel_cntl_mask); + + return &panel_cntl->base; +} + +static void read_dce_straps(struct dc_context *ctx, struct resource_straps *straps) +{ + generic_reg_get(ctx, mmDC_PINSTRAPS + BASE(mmDC_PINSTRAPS_BASE_IDX), + FN(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO), &straps->dc_pinstraps_audio); +} + +static const struct resource_create_funcs res_create_funcs = { + .read_dce_straps = read_dce_straps, + .create_audio = dcn302_create_audio, + .create_stream_encoder = dcn302_stream_encoder_create, + .create_hwseq = dcn302_hwseq_create, +}; + +static bool is_soc_bounding_box_valid(struct dc *dc) +{ + uint32_t hw_internal_rev = dc->ctx->asic_id.hw_internal_rev; + + if (ASICREV_IS_DIMGREY_CAVEFISH_P(hw_internal_rev)) + return true; + + return false; +} + +static bool init_soc_bounding_box(struct dc *dc, struct resource_pool *pool) +{ + struct _vcs_dpi_soc_bounding_box_st *loaded_bb = &dcn3_02_soc; + struct _vcs_dpi_ip_params_st *loaded_ip = &dcn3_02_ip; + + DC_LOGGER_INIT(dc->ctx->logger); + + if (!is_soc_bounding_box_valid(dc)) { + DC_LOG_ERROR("%s: not valid soc bounding box\n", __func__); + return false; + } + + loaded_ip->max_num_otg = pool->pipe_count; + loaded_ip->max_num_dpp = pool->pipe_count; + loaded_ip->clamp_min_dcfclk = dc->config.clamp_min_dcfclk; + DC_FP_START(); + dcn20_patch_bounding_box(dc, loaded_bb); + DC_FP_END(); + + if (dc->ctx->dc_bios->funcs->get_soc_bb_info) { + struct bp_soc_bb_info bb_info = { 0 }; + + if (dc->ctx->dc_bios->funcs->get_soc_bb_info( + dc->ctx->dc_bios, &bb_info) == BP_RESULT_OK) { + + DC_FP_START(); + dcn302_fpu_init_soc_bounding_box(bb_info); + DC_FP_END(); + } + } + + return true; +} + +static void dcn302_resource_destruct(struct resource_pool *pool) +{ + unsigned int i; + + for (i = 0; i < pool->stream_enc_count; i++) { + if (pool->stream_enc[i] != NULL) { + if (pool->stream_enc[i]->vpg != NULL) { + kfree(DCN30_VPG_FROM_VPG(pool->stream_enc[i]->vpg)); + pool->stream_enc[i]->vpg = NULL; + } + if (pool->stream_enc[i]->afmt != NULL) { + kfree(DCN30_AFMT_FROM_AFMT(pool->stream_enc[i]->afmt)); + pool->stream_enc[i]->afmt = NULL; + } + kfree(DCN10STRENC_FROM_STRENC(pool->stream_enc[i])); + pool->stream_enc[i] = NULL; + } + } + + for (i = 0; i < pool->res_cap->num_dsc; i++) { + if (pool->dscs[i] != NULL) + dcn20_dsc_destroy(&pool->dscs[i]); + } + + if (pool->mpc != NULL) { + kfree(TO_DCN20_MPC(pool->mpc)); + pool->mpc = NULL; + } + + if (pool->hubbub != NULL) { + kfree(pool->hubbub); + pool->hubbub = NULL; + } + + for (i = 0; i < pool->pipe_count; i++) { + if (pool->dpps[i] != NULL) { + kfree(TO_DCN20_DPP(pool->dpps[i])); + pool->dpps[i] = NULL; + } + + if (pool->hubps[i] != NULL) { + kfree(TO_DCN20_HUBP(pool->hubps[i])); + pool->hubps[i] = NULL; + } + + if (pool->irqs != NULL) + dal_irq_service_destroy(&pool->irqs); + } + + for (i = 0; i < pool->res_cap->num_ddc; i++) { + if (pool->engines[i] != NULL) + dce110_engine_destroy(&pool->engines[i]); + if (pool->hw_i2cs[i] != NULL) { + kfree(pool->hw_i2cs[i]); + pool->hw_i2cs[i] = NULL; + } + if (pool->sw_i2cs[i] != NULL) { + kfree(pool->sw_i2cs[i]); + pool->sw_i2cs[i] = NULL; + } + } + + for (i = 0; i < pool->res_cap->num_opp; i++) { + if (pool->opps[i] != NULL) + pool->opps[i]->funcs->opp_destroy(&pool->opps[i]); + } + + for (i = 0; i < pool->res_cap->num_timing_generator; i++) { + if (pool->timing_generators[i] != NULL) { + kfree(DCN10TG_FROM_TG(pool->timing_generators[i])); + pool->timing_generators[i] = NULL; + } + } + + for (i = 0; i < pool->res_cap->num_dwb; i++) { + if (pool->dwbc[i] != NULL) { + kfree(TO_DCN30_DWBC(pool->dwbc[i])); + pool->dwbc[i] = NULL; + } + if (pool->mcif_wb[i] != NULL) { + kfree(TO_DCN30_MMHUBBUB(pool->mcif_wb[i])); + pool->mcif_wb[i] = NULL; + } + } + + for (i = 0; i < pool->audio_count; i++) { + if (pool->audios[i]) + dce_aud_destroy(&pool->audios[i]); + } + + for (i = 0; i < pool->clk_src_count; i++) { + if (pool->clock_sources[i] != NULL) + dcn20_clock_source_destroy(&pool->clock_sources[i]); + } + + if (pool->dp_clock_source != NULL) + dcn20_clock_source_destroy(&pool->dp_clock_source); + + for (i = 0; i < pool->res_cap->num_mpc_3dlut; i++) { + if (pool->mpc_lut[i] != NULL) { + dc_3dlut_func_release(pool->mpc_lut[i]); + pool->mpc_lut[i] = NULL; + } + if (pool->mpc_shaper[i] != NULL) { + dc_transfer_func_release(pool->mpc_shaper[i]); + pool->mpc_shaper[i] = NULL; + } + } + + for (i = 0; i < pool->pipe_count; i++) { + if (pool->multiple_abms[i] != NULL) + dce_abm_destroy(&pool->multiple_abms[i]); + } + + if (pool->psr != NULL) + dmub_psr_destroy(&pool->psr); + + if (pool->dccg != NULL) + dcn_dccg_destroy(&pool->dccg); + + if (pool->oem_device != NULL) { + struct dc *dc = pool->oem_device->ctx->dc; + + dc->link_srv->destroy_ddc_service(&pool->oem_device); + } +} + +static void dcn302_destroy_resource_pool(struct resource_pool **pool) +{ + dcn302_resource_destruct(*pool); + kfree(*pool); + *pool = NULL; +} + +void dcn302_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) +{ + DC_FP_START(); + dcn302_fpu_update_bw_bounding_box(dc, bw_params); + DC_FP_END(); +} + +static void dcn302_get_panel_config_defaults(struct dc_panel_config *panel_config) +{ + *panel_config = panel_config_defaults; +} + +static struct resource_funcs dcn302_res_pool_funcs = { + .destroy = dcn302_destroy_resource_pool, + .link_enc_create = dcn302_link_encoder_create, + .panel_cntl_create = dcn302_panel_cntl_create, + .validate_bandwidth = dcn30_validate_bandwidth, + .calculate_wm_and_dlg = dcn30_calculate_wm_and_dlg, + .update_soc_for_wm_a = dcn30_update_soc_for_wm_a, + .populate_dml_pipes = dcn30_populate_dml_pipes_from_context, + .acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer, + .release_pipe = dcn20_release_pipe, + .add_stream_to_ctx = dcn30_add_stream_to_ctx, + .add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource, + .remove_stream_from_ctx = dcn20_remove_stream_from_ctx, + .populate_dml_writeback_from_context = dcn30_populate_dml_writeback_from_context, + .set_mcif_arb_params = dcn30_set_mcif_arb_params, + .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link, + .acquire_post_bldn_3dlut = dcn30_acquire_post_bldn_3dlut, + .release_post_bldn_3dlut = dcn30_release_post_bldn_3dlut, + .update_bw_bounding_box = dcn302_update_bw_bounding_box, + .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, + .get_panel_config_defaults = dcn302_get_panel_config_defaults, +}; + +static struct dc_cap_funcs cap_funcs = { + .get_dcc_compression_cap = dcn20_get_dcc_compression_cap +}; + +static const struct bios_registers bios_regs = { + NBIO_SR(BIOS_SCRATCH_3), + NBIO_SR(BIOS_SCRATCH_6) +}; + +static const struct dccg_registers dccg_regs = { + DCCG_REG_LIST_DCN3_02() +}; + +static const struct dccg_shift dccg_shift = { + DCCG_MASK_SH_LIST_DCN3_02(__SHIFT) +}; + +static const struct dccg_mask dccg_mask = { + DCCG_MASK_SH_LIST_DCN3_02(_MASK) +}; + +#define abm_regs(id)\ + [id] = { ABM_DCN302_REG_LIST(id) } + +static const struct dce_abm_registers abm_regs[] = { + abm_regs(0), + abm_regs(1), + abm_regs(2), + abm_regs(3), + abm_regs(4) +}; + +static const struct dce_abm_shift abm_shift = { + ABM_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dce_abm_mask abm_mask = { + ABM_MASK_SH_LIST_DCN30(_MASK) +}; + +static bool dcn302_resource_construct( + uint8_t num_virtual_links, + struct dc *dc, + struct resource_pool *pool) +{ + int i; + struct dc_context *ctx = dc->ctx; + struct irq_service_init_data init_data; + struct ddc_service_init_data ddc_init_data = {0}; + + ctx->dc_bios->regs = &bios_regs; + + pool->res_cap = &res_cap_dcn302; + + pool->funcs = &dcn302_res_pool_funcs; + + /************************************************* + * Resource + asic cap harcoding * + *************************************************/ + pool->underlay_pipe_index = NO_UNDERLAY_PIPE; + pool->pipe_count = pool->res_cap->num_timing_generator; + pool->mpcc_count = pool->res_cap->num_timing_generator; + dc->caps.max_downscale_ratio = 600; + dc->caps.i2c_speed_in_khz = 100; + dc->caps.i2c_speed_in_khz_hdcp = 5; /*1.4 w/a applied by derfault*/ + dc->caps.max_cursor_size = 256; + dc->caps.min_horizontal_blanking_period = 80; + dc->caps.dmdata_alloc_size = 2048; + dc->caps.mall_size_per_mem_channel = 4; + /* total size = mall per channel * num channels * 1024 * 1024 */ + dc->caps.mall_size_total = dc->caps.mall_size_per_mem_channel * dc->ctx->dc_bios->vram_info.num_chans * 1048576; + dc->caps.cursor_cache_size = dc->caps.max_cursor_size * dc->caps.max_cursor_size * 8; + dc->caps.max_slave_planes = 2; + dc->caps.max_slave_yuv_planes = 2; + dc->caps.max_slave_rgb_planes = 2; + dc->caps.post_blend_color_processing = true; + dc->caps.force_dp_tps4_for_cp2520 = true; + dc->caps.extended_aux_timeout_support = true; + dc->caps.dmcub_support = true; + dc->caps.max_v_total = (1 << 15) - 1; + + /* Color pipeline capabilities */ + dc->caps.color.dpp.dcn_arch = 1; + dc->caps.color.dpp.input_lut_shared = 0; + dc->caps.color.dpp.icsc = 1; + dc->caps.color.dpp.dgam_ram = 0; // must use gamma_corr + dc->caps.color.dpp.dgam_rom_caps.srgb = 1; + dc->caps.color.dpp.dgam_rom_caps.bt2020 = 1; + dc->caps.color.dpp.dgam_rom_caps.gamma2_2 = 1; + dc->caps.color.dpp.dgam_rom_caps.pq = 1; + dc->caps.color.dpp.dgam_rom_caps.hlg = 1; + dc->caps.color.dpp.post_csc = 1; + dc->caps.color.dpp.gamma_corr = 1; + dc->caps.color.dpp.dgam_rom_for_yuv = 0; + + dc->caps.color.dpp.hw_3d_lut = 1; + dc->caps.color.dpp.ogam_ram = 1; + // no OGAM ROM on DCN3 + dc->caps.color.dpp.ogam_rom_caps.srgb = 0; + dc->caps.color.dpp.ogam_rom_caps.bt2020 = 0; + dc->caps.color.dpp.ogam_rom_caps.gamma2_2 = 0; + dc->caps.color.dpp.ogam_rom_caps.pq = 0; + dc->caps.color.dpp.ogam_rom_caps.hlg = 0; + dc->caps.color.dpp.ocsc = 0; + + dc->caps.color.mpc.gamut_remap = 1; + dc->caps.color.mpc.num_3dluts = pool->res_cap->num_mpc_3dlut; //3 + dc->caps.color.mpc.ogam_ram = 1; + dc->caps.color.mpc.ogam_rom_caps.srgb = 0; + dc->caps.color.mpc.ogam_rom_caps.bt2020 = 0; + dc->caps.color.mpc.ogam_rom_caps.gamma2_2 = 0; + dc->caps.color.mpc.ogam_rom_caps.pq = 0; + dc->caps.color.mpc.ogam_rom_caps.hlg = 0; + dc->caps.color.mpc.ocsc = 1; + + dc->caps.dp_hdmi21_pcon_support = true; + + /* read VBIOS LTTPR caps */ + if (ctx->dc_bios->funcs->get_lttpr_caps) { + enum bp_result bp_query_result; + uint8_t is_vbios_lttpr_enable = 0; + + bp_query_result = ctx->dc_bios->funcs->get_lttpr_caps(ctx->dc_bios, &is_vbios_lttpr_enable); + dc->caps.vbios_lttpr_enable = (bp_query_result == BP_RESULT_OK) && !!is_vbios_lttpr_enable; + } + + if (ctx->dc_bios->funcs->get_lttpr_interop) { + enum bp_result bp_query_result; + uint8_t is_vbios_interop_enabled = 0; + + bp_query_result = ctx->dc_bios->funcs->get_lttpr_interop(ctx->dc_bios, + &is_vbios_interop_enabled); + dc->caps.vbios_lttpr_aware = (bp_query_result == BP_RESULT_OK) && !!is_vbios_interop_enabled; + } + + if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) + dc->debug = debug_defaults_drv; + + // Init the vm_helper + if (dc->vm_helper) + vm_helper_init(dc->vm_helper, 16); + + /************************************************* + * Create resources * + *************************************************/ + + /* Clock Sources for Pixel Clock*/ + pool->clock_sources[DCN302_CLK_SRC_PLL0] = + dcn302_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL0, + &clk_src_regs[0], false); + pool->clock_sources[DCN302_CLK_SRC_PLL1] = + dcn302_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL1, + &clk_src_regs[1], false); + pool->clock_sources[DCN302_CLK_SRC_PLL2] = + dcn302_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL2, + &clk_src_regs[2], false); + pool->clock_sources[DCN302_CLK_SRC_PLL3] = + dcn302_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL3, + &clk_src_regs[3], false); + pool->clock_sources[DCN302_CLK_SRC_PLL4] = + dcn302_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL4, + &clk_src_regs[4], false); + + pool->clk_src_count = DCN302_CLK_SRC_TOTAL; + + /* todo: not reuse phy_pll registers */ + pool->dp_clock_source = + dcn302_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_ID_DP_DTO, + &clk_src_regs[0], true); + + for (i = 0; i < pool->clk_src_count; i++) { + if (pool->clock_sources[i] == NULL) { + dm_error("DC: failed to create clock sources!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + } + + /* DCCG */ + pool->dccg = dccg30_create(ctx, &dccg_regs, &dccg_shift, &dccg_mask); + if (pool->dccg == NULL) { + dm_error("DC: failed to create dccg!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + + /* PP Lib and SMU interfaces */ + init_soc_bounding_box(dc, pool); + + /* DML */ + dml_init_instance(&dc->dml, &dcn3_02_soc, &dcn3_02_ip, DML_PROJECT_DCN30); + + /* IRQ */ + init_data.ctx = dc->ctx; + pool->irqs = dal_irq_service_dcn302_create(&init_data); + if (!pool->irqs) + goto create_fail; + + /* HUBBUB */ + pool->hubbub = dcn302_hubbub_create(ctx); + if (pool->hubbub == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create hubbub!\n"); + goto create_fail; + } + + /* HUBPs, DPPs, OPPs and TGs */ + for (i = 0; i < pool->pipe_count; i++) { + pool->hubps[i] = dcn302_hubp_create(ctx, i); + if (pool->hubps[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create hubps!\n"); + goto create_fail; + } + + pool->dpps[i] = dcn302_dpp_create(ctx, i); + if (pool->dpps[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create dpps!\n"); + goto create_fail; + } + } + + for (i = 0; i < pool->res_cap->num_opp; i++) { + pool->opps[i] = dcn302_opp_create(ctx, i); + if (pool->opps[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create output pixel processor!\n"); + goto create_fail; + } + } + + for (i = 0; i < pool->res_cap->num_timing_generator; i++) { + pool->timing_generators[i] = dcn302_timing_generator_create(ctx, i); + if (pool->timing_generators[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create tg!\n"); + goto create_fail; + } + } + pool->timing_generator_count = i; + + /* PSR */ + pool->psr = dmub_psr_create(ctx); + if (pool->psr == NULL) { + dm_error("DC: failed to create psr!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + + /* ABMs */ + for (i = 0; i < pool->res_cap->num_timing_generator; i++) { + pool->multiple_abms[i] = dmub_abm_create(ctx, &abm_regs[i], &abm_shift, &abm_mask); + if (pool->multiple_abms[i] == NULL) { + dm_error("DC: failed to create abm for pipe %d!\n", i); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + } + + /* MPC and DSC */ + pool->mpc = dcn302_mpc_create(ctx, pool->mpcc_count, pool->res_cap->num_mpc_3dlut); + if (pool->mpc == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create mpc!\n"); + goto create_fail; + } + + for (i = 0; i < pool->res_cap->num_dsc; i++) { + pool->dscs[i] = dcn302_dsc_create(ctx, i); + if (pool->dscs[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create display stream compressor %d!\n", i); + goto create_fail; + } + } + + /* DWB and MMHUBBUB */ + if (!dcn302_dwbc_create(ctx, pool)) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create dwbc!\n"); + goto create_fail; + } + + if (!dcn302_mmhubbub_create(ctx, pool)) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create mcif_wb!\n"); + goto create_fail; + } + + /* AUX and I2C */ + for (i = 0; i < pool->res_cap->num_ddc; i++) { + pool->engines[i] = dcn302_aux_engine_create(ctx, i); + if (pool->engines[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC:failed to create aux engine!!\n"); + goto create_fail; + } + pool->hw_i2cs[i] = dcn302_i2c_hw_create(ctx, i); + if (pool->hw_i2cs[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC:failed to create hw i2c!!\n"); + goto create_fail; + } + pool->sw_i2cs[i] = NULL; + } + + /* Audio, Stream Encoders including HPO and virtual, MPC 3D LUTs */ + if (!resource_construct(num_virtual_links, dc, pool, + &res_create_funcs)) + goto create_fail; + + /* HW Sequencer and Plane caps */ + dcn302_hw_sequencer_construct(dc); + + dc->caps.max_planes = pool->pipe_count; + + for (i = 0; i < dc->caps.max_planes; ++i) + dc->caps.planes[i] = plane_cap; + + dc->cap_funcs = cap_funcs; + + if (dc->ctx->dc_bios->fw_info.oem_i2c_present) { + ddc_init_data.ctx = dc->ctx; + ddc_init_data.link = NULL; + ddc_init_data.id.id = dc->ctx->dc_bios->fw_info.oem_i2c_obj_id; + ddc_init_data.id.enum_id = 0; + ddc_init_data.id.type = OBJECT_TYPE_GENERIC; + pool->oem_device = dc->link_srv->create_ddc_service(&ddc_init_data); + } else { + pool->oem_device = NULL; + } + + return true; + +create_fail: + + dcn302_resource_destruct(pool); + + return false; +} + +struct resource_pool *dcn302_create_resource_pool(const struct dc_init_data *init_data, struct dc *dc) +{ + struct resource_pool *pool = kzalloc(sizeof(struct resource_pool), GFP_KERNEL); + + if (!pool) + return NULL; + + if (dcn302_resource_construct(init_data->num_virtual_links, dc, pool)) + return pool; + + BREAK_TO_DEBUGGER(); + kfree(pool); + return NULL; +} diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.h new file mode 100644 index 000000000000..9f24e73b92b3 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.h @@ -0,0 +1,38 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef _DCN302_RESOURCE_H_ +#define _DCN302_RESOURCE_H_ + +#include "core_types.h" + +extern struct _vcs_dpi_ip_params_st dcn3_02_ip; +extern struct _vcs_dpi_soc_bounding_box_st dcn3_02_soc; + +struct resource_pool *dcn302_create_resource_pool(const struct dc_init_data *init_data, struct dc *dc); + +void dcn302_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params); + +#endif /* _DCN302_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c new file mode 100644 index 000000000000..25cd6236b054 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c @@ -0,0 +1,1448 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright (C) 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + */ + +#include "dcn303/dcn303_init.h" +#include "dcn303_resource.h" +#include "dcn303/dcn303_dccg.h" +#include "irq/dcn303/irq_service_dcn303.h" + +#include "dcn30/dcn30_dio_link_encoder.h" +#include "dcn30/dcn30_dio_stream_encoder.h" +#include "dcn30/dcn30_dpp.h" +#include "dcn30/dcn30_dwb.h" +#include "dcn30/dcn30_hubbub.h" +#include "dcn30/dcn30_hubp.h" +#include "dcn30/dcn30_mmhubbub.h" +#include "dcn30/dcn30_mpc.h" +#include "dcn30/dcn30_opp.h" +#include "dcn30/dcn30_optc.h" +#include "dcn30/dcn30_resource.h" + +#include "dcn20/dcn20_dsc.h" +#include "dcn20/dcn20_resource.h" + +#include "dml/dcn30/dcn30_fpu.h" + +#include "dcn10/dcn10_resource.h" + +#include "link.h" + +#include "dce/dce_abm.h" +#include "dce/dce_audio.h" +#include "dce/dce_aux.h" +#include "dce/dce_clock_source.h" +#include "dce/dce_hwseq.h" +#include "dce/dce_i2c_hw.h" +#include "dce/dce_panel_cntl.h" +#include "dce/dmub_abm.h" +#include "dce/dmub_psr.h" +#include "clk_mgr.h" + +#include "hw_sequencer_private.h" +#include "reg_helper.h" +#include "resource.h" +#include "vm_helper.h" + +#include "sienna_cichlid_ip_offset.h" +#include "dcn/dcn_3_0_3_offset.h" +#include "dcn/dcn_3_0_3_sh_mask.h" +#include "dpcs/dpcs_3_0_3_offset.h" +#include "dpcs/dpcs_3_0_3_sh_mask.h" +#include "nbio/nbio_2_3_offset.h" + +#include "dml/dcn303/dcn303_fpu.h" + +#define DC_LOGGER \ + dc->ctx->logger +#define DC_LOGGER_INIT(logger) + + +static const struct dc_debug_options debug_defaults_drv = { + .disable_dmcu = true, + .force_abm_enable = false, + .timing_trace = false, + .clock_trace = true, + .disable_pplib_clock_request = true, + .pipe_split_policy = MPC_SPLIT_AVOID, + .force_single_disp_pipe_split = false, + .disable_dcc = DCC_ENABLE, + .vsr_support = true, + .performance_trace = false, + .max_downscale_src_width = 7680,/*upto 8K*/ + .disable_pplib_wm_range = false, + .scl_reset_length10 = true, + .sanity_checks = false, + .underflow_assert_delay_us = 0xFFFFFFFF, + .dwb_fi_phase = -1, // -1 = disable, + .dmub_command_table = true, + .exit_idle_opt_for_cursor_updates = true, + .disable_idle_power_optimizations = false, + .using_dml2 = false, +}; + +static const struct dc_panel_config panel_config_defaults = { + .psr = { + .disable_psr = false, + .disallow_psrsu = false, + .disallow_replay = false, + }, +}; + +enum dcn303_clk_src_array_id { + DCN303_CLK_SRC_PLL0, + DCN303_CLK_SRC_PLL1, + DCN303_CLK_SRC_TOTAL +}; + +static const struct resource_caps res_cap_dcn303 = { + .num_timing_generator = 2, + .num_opp = 2, + .num_video_plane = 2, + .num_audio = 2, + .num_stream_encoder = 2, + .num_dwb = 1, + .num_ddc = 2, + .num_vmid = 16, + .num_mpc_3dlut = 1, + .num_dsc = 2, +}; + +static const struct dc_plane_cap plane_cap = { + .type = DC_PLANE_TYPE_DCN_UNIVERSAL, + .per_pixel_alpha = true, + .pixel_format_support = { + .argb8888 = true, + .nv12 = true, + .fp16 = true, + .p010 = true, + .ayuv = false, + }, + .max_upscale_factor = { + .argb8888 = 16000, + .nv12 = 16000, + .fp16 = 16000 + }, + .max_downscale_factor = { + .argb8888 = 600, + .nv12 = 600, + .fp16 = 600 + }, + 16, + 16 +}; + +/* NBIO */ +#define NBIO_BASE_INNER(seg) \ + NBIO_BASE__INST0_SEG ## seg + +#define NBIO_BASE(seg) \ + NBIO_BASE_INNER(seg) + +#define NBIO_SR(reg_name)\ + .reg_name = NBIO_BASE(mm ## reg_name ## _BASE_IDX) + \ + mm ## reg_name + +/* DCN */ +#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg + +#define BASE(seg) BASE_INNER(seg) + +#define SR(reg_name)\ + .reg_name = BASE(mm ## reg_name ## _BASE_IDX) + mm ## reg_name + +#define SF(reg_name, field_name, post_fix)\ + .field_name = reg_name ## __ ## field_name ## post_fix + +#define SRI(reg_name, block, id)\ + .reg_name = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + mm ## block ## id ## _ ## reg_name + +#define SRI2(reg_name, block, id)\ + .reg_name = BASE(mm ## reg_name ## _BASE_IDX) + mm ## reg_name + +#define SRII(reg_name, block, id)\ + .reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + mm ## block ## id ## _ ## reg_name + +#define DCCG_SRII(reg_name, block, id)\ + .block ## _ ## reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + mm ## block ## id ## _ ## reg_name + +#define VUPDATE_SRII(reg_name, block, id)\ + .reg_name[id] = BASE(mm ## reg_name ## _ ## block ## id ## _BASE_IDX) + \ + mm ## reg_name ## _ ## block ## id + +#define SRII_DWB(reg_name, temp_name, block, id)\ + .reg_name[id] = BASE(mm ## block ## id ## _ ## temp_name ## _BASE_IDX) + \ + mm ## block ## id ## _ ## temp_name + +#define SF_DWB2(reg_name, block, id, field_name, post_fix) \ + .field_name = reg_name ## __ ## field_name ## post_fix + +#define SRII_MPC_RMU(reg_name, block, id)\ + .RMU##_##reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + mm ## block ## id ## _ ## reg_name + +static const struct dcn_hubbub_registers hubbub_reg = { + HUBBUB_REG_LIST_DCN30(0) +}; + +static const struct dcn_hubbub_shift hubbub_shift = { + HUBBUB_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dcn_hubbub_mask hubbub_mask = { + HUBBUB_MASK_SH_LIST_DCN30(_MASK) +}; + +#define vmid_regs(id)\ + [id] = { DCN20_VMID_REG_LIST(id) } + +static const struct dcn_vmid_registers vmid_regs[] = { + vmid_regs(0), + vmid_regs(1), + vmid_regs(2), + vmid_regs(3), + vmid_regs(4), + vmid_regs(5), + vmid_regs(6), + vmid_regs(7), + vmid_regs(8), + vmid_regs(9), + vmid_regs(10), + vmid_regs(11), + vmid_regs(12), + vmid_regs(13), + vmid_regs(14), + vmid_regs(15) +}; + +static const struct dcn20_vmid_shift vmid_shifts = { + DCN20_VMID_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn20_vmid_mask vmid_masks = { + DCN20_VMID_MASK_SH_LIST(_MASK) +}; + +static struct hubbub *dcn303_hubbub_create(struct dc_context *ctx) +{ + int i; + + struct dcn20_hubbub *hubbub3 = kzalloc(sizeof(struct dcn20_hubbub), GFP_KERNEL); + + if (!hubbub3) + return NULL; + + hubbub3_construct(hubbub3, ctx, &hubbub_reg, &hubbub_shift, &hubbub_mask); + + for (i = 0; i < res_cap_dcn303.num_vmid; i++) { + struct dcn20_vmid *vmid = &hubbub3->vmid[i]; + + vmid->ctx = ctx; + + vmid->regs = &vmid_regs[i]; + vmid->shifts = &vmid_shifts; + vmid->masks = &vmid_masks; + } + + return &hubbub3->base; +} + +#define vpg_regs(id)\ + [id] = { VPG_DCN3_REG_LIST(id) } + +static const struct dcn30_vpg_registers vpg_regs[] = { + vpg_regs(0), + vpg_regs(1), + vpg_regs(2) +}; + +static const struct dcn30_vpg_shift vpg_shift = { + DCN3_VPG_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn30_vpg_mask vpg_mask = { + DCN3_VPG_MASK_SH_LIST(_MASK) +}; + +static struct vpg *dcn303_vpg_create(struct dc_context *ctx, uint32_t inst) +{ + struct dcn30_vpg *vpg3 = kzalloc(sizeof(struct dcn30_vpg), GFP_KERNEL); + + if (!vpg3) + return NULL; + + vpg3_construct(vpg3, ctx, inst, &vpg_regs[inst], &vpg_shift, &vpg_mask); + + return &vpg3->base; +} + +#define afmt_regs(id)\ + [id] = { AFMT_DCN3_REG_LIST(id) } + +static const struct dcn30_afmt_registers afmt_regs[] = { + afmt_regs(0), + afmt_regs(1), + afmt_regs(2) +}; + +static const struct dcn30_afmt_shift afmt_shift = { + DCN3_AFMT_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn30_afmt_mask afmt_mask = { + DCN3_AFMT_MASK_SH_LIST(_MASK) +}; + +static struct afmt *dcn303_afmt_create(struct dc_context *ctx, uint32_t inst) +{ + struct dcn30_afmt *afmt3 = kzalloc(sizeof(struct dcn30_afmt), GFP_KERNEL); + + if (!afmt3) + return NULL; + + afmt3_construct(afmt3, ctx, inst, &afmt_regs[inst], &afmt_shift, &afmt_mask); + + return &afmt3->base; +} + +#define audio_regs(id)\ + [id] = { AUD_COMMON_REG_LIST(id) } + +static const struct dce_audio_registers audio_regs[] = { + audio_regs(0), + audio_regs(1), + audio_regs(2), + audio_regs(3), + audio_regs(4), + audio_regs(5), + audio_regs(6) +}; + +#define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\ + SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_INDEX, AZALIA_ENDPOINT_REG_INDEX, mask_sh),\ + SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_DATA, AZALIA_ENDPOINT_REG_DATA, mask_sh),\ + AUD_COMMON_MASK_SH_LIST_BASE(mask_sh) + +static const struct dce_audio_shift audio_shift = { + DCE120_AUD_COMMON_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_audio_mask audio_mask = { + DCE120_AUD_COMMON_MASK_SH_LIST(_MASK) +}; + +static struct audio *dcn303_create_audio(struct dc_context *ctx, unsigned int inst) +{ + return dce_audio_create(ctx, inst, &audio_regs[inst], &audio_shift, &audio_mask); +} + +#define stream_enc_regs(id)\ + [id] = { SE_DCN3_REG_LIST(id) } + +static const struct dcn10_stream_enc_registers stream_enc_regs[] = { + stream_enc_regs(0), + stream_enc_regs(1) +}; + +static const struct dcn10_stream_encoder_shift se_shift = { + SE_COMMON_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dcn10_stream_encoder_mask se_mask = { + SE_COMMON_MASK_SH_LIST_DCN30(_MASK) +}; + +static struct stream_encoder *dcn303_stream_encoder_create(enum engine_id eng_id, struct dc_context *ctx) +{ + struct dcn10_stream_encoder *enc1; + struct vpg *vpg; + struct afmt *afmt; + int vpg_inst; + int afmt_inst; + + /* Mapping of VPG, AFMT, DME register blocks to DIO block instance */ + if (eng_id <= ENGINE_ID_DIGB) { + vpg_inst = eng_id; + afmt_inst = eng_id; + } else + return NULL; + + enc1 = kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL); + vpg = dcn303_vpg_create(ctx, vpg_inst); + afmt = dcn303_afmt_create(ctx, afmt_inst); + + if (!enc1 || !vpg || !afmt) { + kfree(enc1); + kfree(vpg); + kfree(afmt); + return NULL; + } + + dcn30_dio_stream_encoder_construct(enc1, ctx, ctx->dc_bios, eng_id, vpg, afmt, &stream_enc_regs[eng_id], + &se_shift, &se_mask); + + return &enc1->base; +} + +#define clk_src_regs(index, pllid)\ + [index] = { CS_COMMON_REG_LIST_DCN3_03(index, pllid) } + +static const struct dce110_clk_src_regs clk_src_regs[] = { + clk_src_regs(0, A), + clk_src_regs(1, B) +}; + +static const struct dce110_clk_src_shift cs_shift = { + CS_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT) +}; + +static const struct dce110_clk_src_mask cs_mask = { + CS_COMMON_MASK_SH_LIST_DCN2_0(_MASK) +}; + +static struct clock_source *dcn303_clock_source_create(struct dc_context *ctx, struct dc_bios *bios, + enum clock_source_id id, const struct dce110_clk_src_regs *regs, bool dp_clk_src) +{ + struct dce110_clk_src *clk_src = kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL); + + if (!clk_src) + return NULL; + + if (dcn3_clk_src_construct(clk_src, ctx, bios, id, regs, &cs_shift, &cs_mask)) { + clk_src->base.dp_clk_src = dp_clk_src; + return &clk_src->base; + } + + kfree(clk_src); + BREAK_TO_DEBUGGER(); + return NULL; +} + +static const struct dce_hwseq_registers hwseq_reg = { + HWSEQ_DCN303_REG_LIST() +}; + +static const struct dce_hwseq_shift hwseq_shift = { + HWSEQ_DCN303_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_hwseq_mask hwseq_mask = { + HWSEQ_DCN303_MASK_SH_LIST(_MASK) +}; + +static struct dce_hwseq *dcn303_hwseq_create(struct dc_context *ctx) +{ + struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL); + + if (hws) { + hws->ctx = ctx; + hws->regs = &hwseq_reg; + hws->shifts = &hwseq_shift; + hws->masks = &hwseq_mask; + } + return hws; +} + +#define hubp_regs(id)\ + [id] = { HUBP_REG_LIST_DCN30(id) } + +static const struct dcn_hubp2_registers hubp_regs[] = { + hubp_regs(0), + hubp_regs(1) +}; + +static const struct dcn_hubp2_shift hubp_shift = { + HUBP_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dcn_hubp2_mask hubp_mask = { + HUBP_MASK_SH_LIST_DCN30(_MASK) +}; + +static struct hubp *dcn303_hubp_create(struct dc_context *ctx, uint32_t inst) +{ + struct dcn20_hubp *hubp2 = kzalloc(sizeof(struct dcn20_hubp), GFP_KERNEL); + + if (!hubp2) + return NULL; + + if (hubp3_construct(hubp2, ctx, inst, &hubp_regs[inst], &hubp_shift, &hubp_mask)) + return &hubp2->base; + + BREAK_TO_DEBUGGER(); + kfree(hubp2); + return NULL; +} + +#define dpp_regs(id)\ + [id] = { DPP_REG_LIST_DCN30(id) } + +static const struct dcn3_dpp_registers dpp_regs[] = { + dpp_regs(0), + dpp_regs(1) +}; + +static const struct dcn3_dpp_shift tf_shift = { + DPP_REG_LIST_SH_MASK_DCN30(__SHIFT) +}; + +static const struct dcn3_dpp_mask tf_mask = { + DPP_REG_LIST_SH_MASK_DCN30(_MASK) +}; + +static struct dpp *dcn303_dpp_create(struct dc_context *ctx, uint32_t inst) +{ + struct dcn3_dpp *dpp = kzalloc(sizeof(struct dcn3_dpp), GFP_KERNEL); + + if (!dpp) + return NULL; + + if (dpp3_construct(dpp, ctx, inst, &dpp_regs[inst], &tf_shift, &tf_mask)) + return &dpp->base; + + BREAK_TO_DEBUGGER(); + kfree(dpp); + return NULL; +} + +#define opp_regs(id)\ + [id] = { OPP_REG_LIST_DCN30(id) } + +static const struct dcn20_opp_registers opp_regs[] = { + opp_regs(0), + opp_regs(1) +}; + +static const struct dcn20_opp_shift opp_shift = { + OPP_MASK_SH_LIST_DCN20(__SHIFT) +}; + +static const struct dcn20_opp_mask opp_mask = { + OPP_MASK_SH_LIST_DCN20(_MASK) +}; + +static struct output_pixel_processor *dcn303_opp_create(struct dc_context *ctx, uint32_t inst) +{ + struct dcn20_opp *opp = kzalloc(sizeof(struct dcn20_opp), GFP_KERNEL); + + if (!opp) { + BREAK_TO_DEBUGGER(); + return NULL; + } + + dcn20_opp_construct(opp, ctx, inst, &opp_regs[inst], &opp_shift, &opp_mask); + return &opp->base; +} + +#define optc_regs(id)\ + [id] = { OPTC_COMMON_REG_LIST_DCN3_0(id) } + +static const struct dcn_optc_registers optc_regs[] = { + optc_regs(0), + optc_regs(1) +}; + +static const struct dcn_optc_shift optc_shift = { + OPTC_COMMON_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dcn_optc_mask optc_mask = { + OPTC_COMMON_MASK_SH_LIST_DCN30(_MASK) +}; + +static struct timing_generator *dcn303_timing_generator_create(struct dc_context *ctx, uint32_t instance) +{ + struct optc *tgn10 = kzalloc(sizeof(struct optc), GFP_KERNEL); + + if (!tgn10) + return NULL; + + tgn10->base.inst = instance; + tgn10->base.ctx = ctx; + + tgn10->tg_regs = &optc_regs[instance]; + tgn10->tg_shift = &optc_shift; + tgn10->tg_mask = &optc_mask; + + dcn30_timing_generator_init(tgn10); + + return &tgn10->base; +} + +static const struct dcn30_mpc_registers mpc_regs = { + MPC_REG_LIST_DCN3_0(0), + MPC_REG_LIST_DCN3_0(1), + MPC_OUT_MUX_REG_LIST_DCN3_0(0), + MPC_OUT_MUX_REG_LIST_DCN3_0(1), + MPC_RMU_GLOBAL_REG_LIST_DCN3AG, + MPC_RMU_REG_LIST_DCN3AG(0), + MPC_DWB_MUX_REG_LIST_DCN3_0(0), +}; + +static const struct dcn30_mpc_shift mpc_shift = { + MPC_COMMON_MASK_SH_LIST_DCN303(__SHIFT) +}; + +static const struct dcn30_mpc_mask mpc_mask = { + MPC_COMMON_MASK_SH_LIST_DCN303(_MASK) +}; + +static struct mpc *dcn303_mpc_create(struct dc_context *ctx, int num_mpcc, int num_rmu) +{ + struct dcn30_mpc *mpc30 = kzalloc(sizeof(struct dcn30_mpc), GFP_KERNEL); + + if (!mpc30) + return NULL; + + dcn30_mpc_construct(mpc30, ctx, &mpc_regs, &mpc_shift, &mpc_mask, num_mpcc, num_rmu); + + return &mpc30->base; +} + +#define dsc_regsDCN20(id)\ +[id] = { DSC_REG_LIST_DCN20(id) } + +static const struct dcn20_dsc_registers dsc_regs[] = { + dsc_regsDCN20(0), + dsc_regsDCN20(1) +}; + +static const struct dcn20_dsc_shift dsc_shift = { + DSC_REG_LIST_SH_MASK_DCN20(__SHIFT) +}; + +static const struct dcn20_dsc_mask dsc_mask = { + DSC_REG_LIST_SH_MASK_DCN20(_MASK) +}; + +static struct display_stream_compressor *dcn303_dsc_create(struct dc_context *ctx, uint32_t inst) +{ + struct dcn20_dsc *dsc = kzalloc(sizeof(struct dcn20_dsc), GFP_KERNEL); + + if (!dsc) { + BREAK_TO_DEBUGGER(); + return NULL; + } + + dsc2_construct(dsc, ctx, inst, &dsc_regs[inst], &dsc_shift, &dsc_mask); + return &dsc->base; +} + +#define dwbc_regs_dcn3(id)\ +[id] = { DWBC_COMMON_REG_LIST_DCN30(id) } + +static const struct dcn30_dwbc_registers dwbc30_regs[] = { + dwbc_regs_dcn3(0) +}; + +static const struct dcn30_dwbc_shift dwbc30_shift = { + DWBC_COMMON_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dcn30_dwbc_mask dwbc30_mask = { + DWBC_COMMON_MASK_SH_LIST_DCN30(_MASK) +}; + +static bool dcn303_dwbc_create(struct dc_context *ctx, struct resource_pool *pool) +{ + int i; + uint32_t pipe_count = pool->res_cap->num_dwb; + + for (i = 0; i < pipe_count; i++) { + struct dcn30_dwbc *dwbc30 = kzalloc(sizeof(struct dcn30_dwbc), GFP_KERNEL); + + if (!dwbc30) { + dm_error("DC: failed to create dwbc30!\n"); + return false; + } + + dcn30_dwbc_construct(dwbc30, ctx, &dwbc30_regs[i], &dwbc30_shift, &dwbc30_mask, i); + + pool->dwbc[i] = &dwbc30->base; + } + return true; +} + +#define mcif_wb_regs_dcn3(id)\ +[id] = { MCIF_WB_COMMON_REG_LIST_DCN30(id) } + +static const struct dcn30_mmhubbub_registers mcif_wb30_regs[] = { + mcif_wb_regs_dcn3(0) +}; + +static const struct dcn30_mmhubbub_shift mcif_wb30_shift = { + MCIF_WB_COMMON_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dcn30_mmhubbub_mask mcif_wb30_mask = { + MCIF_WB_COMMON_MASK_SH_LIST_DCN30(_MASK) +}; + +static bool dcn303_mmhubbub_create(struct dc_context *ctx, struct resource_pool *pool) +{ + int i; + uint32_t pipe_count = pool->res_cap->num_dwb; + + for (i = 0; i < pipe_count; i++) { + struct dcn30_mmhubbub *mcif_wb30 = kzalloc(sizeof(struct dcn30_mmhubbub), GFP_KERNEL); + + if (!mcif_wb30) { + dm_error("DC: failed to create mcif_wb30!\n"); + return false; + } + + dcn30_mmhubbub_construct(mcif_wb30, ctx, &mcif_wb30_regs[i], &mcif_wb30_shift, &mcif_wb30_mask, i); + + pool->mcif_wb[i] = &mcif_wb30->base; + } + return true; +} + +#define aux_engine_regs(id)\ +[id] = {\ + AUX_COMMON_REG_LIST0(id), \ + .AUXN_IMPCAL = 0, \ + .AUXP_IMPCAL = 0, \ + .AUX_RESET_MASK = DP_AUX0_AUX_CONTROL__AUX_RESET_MASK, \ +} + +static const struct dce110_aux_registers aux_engine_regs[] = { + aux_engine_regs(0), + aux_engine_regs(1) +}; + +static const struct dce110_aux_registers_shift aux_shift = { + DCN_AUX_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce110_aux_registers_mask aux_mask = { + DCN_AUX_MASK_SH_LIST(_MASK) +}; + +static struct dce_aux *dcn303_aux_engine_create(struct dc_context *ctx, uint32_t inst) +{ + struct aux_engine_dce110 *aux_engine = kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL); + + if (!aux_engine) + return NULL; + + dce110_aux_engine_construct(aux_engine, ctx, inst, SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, + &aux_engine_regs[inst], &aux_mask, &aux_shift, ctx->dc->caps.extended_aux_timeout_support); + + return &aux_engine->base; +} + +#define i2c_inst_regs(id) { I2C_HW_ENGINE_COMMON_REG_LIST(id) } + +static const struct dce_i2c_registers i2c_hw_regs[] = { + i2c_inst_regs(1), + i2c_inst_regs(2) +}; + +static const struct dce_i2c_shift i2c_shifts = { + I2C_COMMON_MASK_SH_LIST_DCN2(__SHIFT) +}; + +static const struct dce_i2c_mask i2c_masks = { + I2C_COMMON_MASK_SH_LIST_DCN2(_MASK) +}; + +static struct dce_i2c_hw *dcn303_i2c_hw_create(struct dc_context *ctx, uint32_t inst) +{ + struct dce_i2c_hw *dce_i2c_hw = kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL); + + if (!dce_i2c_hw) + return NULL; + + dcn2_i2c_hw_construct(dce_i2c_hw, ctx, inst, &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks); + + return dce_i2c_hw; +} + +static const struct encoder_feature_support link_enc_feature = { + .max_hdmi_deep_color = COLOR_DEPTH_121212, + .max_hdmi_pixel_clock = 600000, + .hdmi_ycbcr420_supported = true, + .dp_ycbcr420_supported = true, + .fec_supported = true, + .flags.bits.IS_HBR2_CAPABLE = true, + .flags.bits.IS_HBR3_CAPABLE = true, + .flags.bits.IS_TPS3_CAPABLE = true, + .flags.bits.IS_TPS4_CAPABLE = true +}; + +#define link_regs(id, phyid)\ + [id] = {\ + LE_DCN3_REG_LIST(id), \ + UNIPHY_DCN2_REG_LIST(phyid), \ + SRI(DP_DPHY_INTERNAL_CTRL, DP, id) \ + } + +static const struct dcn10_link_enc_registers link_enc_regs[] = { + link_regs(0, A), + link_regs(1, B) +}; + +static const struct dcn10_link_enc_shift le_shift = { + LINK_ENCODER_MASK_SH_LIST_DCN30(__SHIFT), + DPCS_DCN2_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn10_link_enc_mask le_mask = { + LINK_ENCODER_MASK_SH_LIST_DCN30(_MASK), + DPCS_DCN2_MASK_SH_LIST(_MASK) +}; + +#define aux_regs(id)\ + [id] = { DCN2_AUX_REG_LIST(id) } + +static const struct dcn10_link_enc_aux_registers link_enc_aux_regs[] = { + aux_regs(0), + aux_regs(1) +}; + +#define hpd_regs(id)\ + [id] = { HPD_REG_LIST(id) } + +static const struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[] = { + hpd_regs(0), + hpd_regs(1) +}; + +static struct link_encoder *dcn303_link_encoder_create( + struct dc_context *ctx, + const struct encoder_init_data *enc_init_data) +{ + struct dcn20_link_encoder *enc20 = kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL); + + if (!enc20) + return NULL; + + dcn30_link_encoder_construct(enc20, enc_init_data, &link_enc_feature, + &link_enc_regs[enc_init_data->transmitter], &link_enc_aux_regs[enc_init_data->channel - 1], + &link_enc_hpd_regs[enc_init_data->hpd_source], &le_shift, &le_mask); + + return &enc20->enc10.base; +} + +static const struct dce_panel_cntl_registers panel_cntl_regs[] = { + { DCN_PANEL_CNTL_REG_LIST() } +}; + +static const struct dce_panel_cntl_shift panel_cntl_shift = { + DCE_PANEL_CNTL_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_panel_cntl_mask panel_cntl_mask = { + DCE_PANEL_CNTL_MASK_SH_LIST(_MASK) +}; + +static struct panel_cntl *dcn303_panel_cntl_create(const struct panel_cntl_init_data *init_data) +{ + struct dce_panel_cntl *panel_cntl = kzalloc(sizeof(struct dce_panel_cntl), GFP_KERNEL); + + if (!panel_cntl) + return NULL; + + dce_panel_cntl_construct(panel_cntl, init_data, &panel_cntl_regs[init_data->inst], + &panel_cntl_shift, &panel_cntl_mask); + + return &panel_cntl->base; +} + +static void read_dce_straps(struct dc_context *ctx, struct resource_straps *straps) +{ + generic_reg_get(ctx, mmDC_PINSTRAPS + BASE(mmDC_PINSTRAPS_BASE_IDX), + FN(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO), &straps->dc_pinstraps_audio); +} + +static const struct resource_create_funcs res_create_funcs = { + .read_dce_straps = read_dce_straps, + .create_audio = dcn303_create_audio, + .create_stream_encoder = dcn303_stream_encoder_create, + .create_hwseq = dcn303_hwseq_create, +}; + +static bool is_soc_bounding_box_valid(struct dc *dc) +{ + uint32_t hw_internal_rev = dc->ctx->asic_id.hw_internal_rev; + + if (ASICREV_IS_BEIGE_GOBY_P(hw_internal_rev)) + return true; + + return false; +} + +static bool init_soc_bounding_box(struct dc *dc, struct resource_pool *pool) +{ + struct _vcs_dpi_soc_bounding_box_st *loaded_bb = &dcn3_03_soc; + struct _vcs_dpi_ip_params_st *loaded_ip = &dcn3_03_ip; + + DC_LOGGER_INIT(dc->ctx->logger); + + if (!is_soc_bounding_box_valid(dc)) { + DC_LOG_ERROR("%s: not valid soc bounding box/n", __func__); + return false; + } + + loaded_ip->max_num_otg = pool->pipe_count; + loaded_ip->max_num_dpp = pool->pipe_count; + loaded_ip->clamp_min_dcfclk = dc->config.clamp_min_dcfclk; + DC_FP_START(); + dcn20_patch_bounding_box(dc, loaded_bb); + DC_FP_END(); + + if (dc->ctx->dc_bios->funcs->get_soc_bb_info) { + struct bp_soc_bb_info bb_info = { 0 }; + + if (dc->ctx->dc_bios->funcs->get_soc_bb_info( + dc->ctx->dc_bios, &bb_info) == BP_RESULT_OK) { + DC_FP_START(); + dcn303_fpu_init_soc_bounding_box(bb_info); + DC_FP_END(); + } + } + + return true; +} + +static void dcn303_resource_destruct(struct resource_pool *pool) +{ + unsigned int i; + + for (i = 0; i < pool->stream_enc_count; i++) { + if (pool->stream_enc[i] != NULL) { + if (pool->stream_enc[i]->vpg != NULL) { + kfree(DCN30_VPG_FROM_VPG(pool->stream_enc[i]->vpg)); + pool->stream_enc[i]->vpg = NULL; + } + if (pool->stream_enc[i]->afmt != NULL) { + kfree(DCN30_AFMT_FROM_AFMT(pool->stream_enc[i]->afmt)); + pool->stream_enc[i]->afmt = NULL; + } + kfree(DCN10STRENC_FROM_STRENC(pool->stream_enc[i])); + pool->stream_enc[i] = NULL; + } + } + + for (i = 0; i < pool->res_cap->num_dsc; i++) { + if (pool->dscs[i] != NULL) + dcn20_dsc_destroy(&pool->dscs[i]); + } + + if (pool->mpc != NULL) { + kfree(TO_DCN20_MPC(pool->mpc)); + pool->mpc = NULL; + } + + if (pool->hubbub != NULL) { + kfree(pool->hubbub); + pool->hubbub = NULL; + } + + for (i = 0; i < pool->pipe_count; i++) { + if (pool->dpps[i] != NULL) { + kfree(TO_DCN20_DPP(pool->dpps[i])); + pool->dpps[i] = NULL; + } + + if (pool->hubps[i] != NULL) { + kfree(TO_DCN20_HUBP(pool->hubps[i])); + pool->hubps[i] = NULL; + } + + if (pool->irqs != NULL) + dal_irq_service_destroy(&pool->irqs); + } + + for (i = 0; i < pool->res_cap->num_ddc; i++) { + if (pool->engines[i] != NULL) + dce110_engine_destroy(&pool->engines[i]); + if (pool->hw_i2cs[i] != NULL) { + kfree(pool->hw_i2cs[i]); + pool->hw_i2cs[i] = NULL; + } + if (pool->sw_i2cs[i] != NULL) { + kfree(pool->sw_i2cs[i]); + pool->sw_i2cs[i] = NULL; + } + } + + for (i = 0; i < pool->res_cap->num_opp; i++) { + if (pool->opps[i] != NULL) + pool->opps[i]->funcs->opp_destroy(&pool->opps[i]); + } + + for (i = 0; i < pool->res_cap->num_timing_generator; i++) { + if (pool->timing_generators[i] != NULL) { + kfree(DCN10TG_FROM_TG(pool->timing_generators[i])); + pool->timing_generators[i] = NULL; + } + } + + for (i = 0; i < pool->res_cap->num_dwb; i++) { + if (pool->dwbc[i] != NULL) { + kfree(TO_DCN30_DWBC(pool->dwbc[i])); + pool->dwbc[i] = NULL; + } + if (pool->mcif_wb[i] != NULL) { + kfree(TO_DCN30_MMHUBBUB(pool->mcif_wb[i])); + pool->mcif_wb[i] = NULL; + } + } + + for (i = 0; i < pool->audio_count; i++) { + if (pool->audios[i]) + dce_aud_destroy(&pool->audios[i]); + } + + for (i = 0; i < pool->clk_src_count; i++) { + if (pool->clock_sources[i] != NULL) + dcn20_clock_source_destroy(&pool->clock_sources[i]); + } + + if (pool->dp_clock_source != NULL) + dcn20_clock_source_destroy(&pool->dp_clock_source); + + for (i = 0; i < pool->res_cap->num_mpc_3dlut; i++) { + if (pool->mpc_lut[i] != NULL) { + dc_3dlut_func_release(pool->mpc_lut[i]); + pool->mpc_lut[i] = NULL; + } + if (pool->mpc_shaper[i] != NULL) { + dc_transfer_func_release(pool->mpc_shaper[i]); + pool->mpc_shaper[i] = NULL; + } + } + + for (i = 0; i < pool->pipe_count; i++) { + if (pool->multiple_abms[i] != NULL) + dce_abm_destroy(&pool->multiple_abms[i]); + } + + if (pool->psr != NULL) + dmub_psr_destroy(&pool->psr); + + if (pool->dccg != NULL) + dcn_dccg_destroy(&pool->dccg); + + if (pool->oem_device != NULL) { + struct dc *dc = pool->oem_device->ctx->dc; + + dc->link_srv->destroy_ddc_service(&pool->oem_device); + } +} + +static void dcn303_destroy_resource_pool(struct resource_pool **pool) +{ + dcn303_resource_destruct(*pool); + kfree(*pool); + *pool = NULL; +} + +static void dcn303_get_panel_config_defaults(struct dc_panel_config *panel_config) +{ + *panel_config = panel_config_defaults; +} + +void dcn303_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) +{ + DC_FP_START(); + dcn303_fpu_update_bw_bounding_box(dc, bw_params); + DC_FP_END(); +} + +static struct resource_funcs dcn303_res_pool_funcs = { + .destroy = dcn303_destroy_resource_pool, + .link_enc_create = dcn303_link_encoder_create, + .panel_cntl_create = dcn303_panel_cntl_create, + .validate_bandwidth = dcn30_validate_bandwidth, + .calculate_wm_and_dlg = dcn30_calculate_wm_and_dlg, + .update_soc_for_wm_a = dcn30_update_soc_for_wm_a, + .populate_dml_pipes = dcn30_populate_dml_pipes_from_context, + .acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer, + .release_pipe = dcn20_release_pipe, + .add_stream_to_ctx = dcn30_add_stream_to_ctx, + .add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource, + .remove_stream_from_ctx = dcn20_remove_stream_from_ctx, + .populate_dml_writeback_from_context = dcn30_populate_dml_writeback_from_context, + .set_mcif_arb_params = dcn30_set_mcif_arb_params, + .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link, + .acquire_post_bldn_3dlut = dcn30_acquire_post_bldn_3dlut, + .release_post_bldn_3dlut = dcn30_release_post_bldn_3dlut, + .update_bw_bounding_box = dcn303_update_bw_bounding_box, + .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, + .get_panel_config_defaults = dcn303_get_panel_config_defaults, +}; + +static struct dc_cap_funcs cap_funcs = { + .get_dcc_compression_cap = dcn20_get_dcc_compression_cap +}; + +static const struct bios_registers bios_regs = { + NBIO_SR(BIOS_SCRATCH_3), + NBIO_SR(BIOS_SCRATCH_6) +}; + +static const struct dccg_registers dccg_regs = { + DCCG_REG_LIST_DCN3_03() +}; + +static const struct dccg_shift dccg_shift = { + DCCG_MASK_SH_LIST_DCN3_03(__SHIFT) +}; + +static const struct dccg_mask dccg_mask = { + DCCG_MASK_SH_LIST_DCN3_03(_MASK) +}; + +#define abm_regs(id)\ + [id] = { ABM_DCN302_REG_LIST(id) } + +static const struct dce_abm_registers abm_regs[] = { + abm_regs(0), + abm_regs(1) +}; + +static const struct dce_abm_shift abm_shift = { + ABM_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dce_abm_mask abm_mask = { + ABM_MASK_SH_LIST_DCN30(_MASK) +}; + +static bool dcn303_resource_construct( + uint8_t num_virtual_links, + struct dc *dc, + struct resource_pool *pool) +{ + int i; + struct dc_context *ctx = dc->ctx; + struct irq_service_init_data init_data; + struct ddc_service_init_data ddc_init_data; + + ctx->dc_bios->regs = &bios_regs; + + pool->res_cap = &res_cap_dcn303; + + pool->funcs = &dcn303_res_pool_funcs; + + /************************************************* + * Resource + asic cap harcoding * + *************************************************/ + pool->underlay_pipe_index = NO_UNDERLAY_PIPE; + pool->pipe_count = pool->res_cap->num_timing_generator; + pool->mpcc_count = pool->res_cap->num_timing_generator; + dc->caps.max_downscale_ratio = 600; + dc->caps.i2c_speed_in_khz = 100; + dc->caps.i2c_speed_in_khz_hdcp = 5; /*1.4 w/a applied by derfault*/ + dc->caps.max_cursor_size = 256; + dc->caps.min_horizontal_blanking_period = 80; + dc->caps.dmdata_alloc_size = 2048; + dc->caps.mall_size_per_mem_channel = 4; + /* total size = mall per channel * num channels * 1024 * 1024 */ + dc->caps.mall_size_total = dc->caps.mall_size_per_mem_channel * + dc->ctx->dc_bios->vram_info.num_chans * + 1024 * 1024; + dc->caps.cursor_cache_size = + dc->caps.max_cursor_size * dc->caps.max_cursor_size * 8; + dc->caps.max_slave_planes = 1; + dc->caps.post_blend_color_processing = true; + dc->caps.force_dp_tps4_for_cp2520 = true; + dc->caps.extended_aux_timeout_support = true; + dc->caps.dmcub_support = true; + dc->caps.max_v_total = (1 << 15) - 1; + + /* Color pipeline capabilities */ + dc->caps.color.dpp.dcn_arch = 1; + dc->caps.color.dpp.input_lut_shared = 0; + dc->caps.color.dpp.icsc = 1; + dc->caps.color.dpp.dgam_ram = 0; // must use gamma_corr + dc->caps.color.dpp.dgam_rom_caps.srgb = 1; + dc->caps.color.dpp.dgam_rom_caps.bt2020 = 1; + dc->caps.color.dpp.dgam_rom_caps.gamma2_2 = 1; + dc->caps.color.dpp.dgam_rom_caps.pq = 1; + dc->caps.color.dpp.dgam_rom_caps.hlg = 1; + dc->caps.color.dpp.post_csc = 1; + dc->caps.color.dpp.gamma_corr = 1; + dc->caps.color.dpp.dgam_rom_for_yuv = 0; + + dc->caps.color.dpp.hw_3d_lut = 1; + dc->caps.color.dpp.ogam_ram = 1; + // no OGAM ROM on DCN3 + dc->caps.color.dpp.ogam_rom_caps.srgb = 0; + dc->caps.color.dpp.ogam_rom_caps.bt2020 = 0; + dc->caps.color.dpp.ogam_rom_caps.gamma2_2 = 0; + dc->caps.color.dpp.ogam_rom_caps.pq = 0; + dc->caps.color.dpp.ogam_rom_caps.hlg = 0; + dc->caps.color.dpp.ocsc = 0; + + dc->caps.color.mpc.gamut_remap = 1; + dc->caps.color.mpc.num_3dluts = pool->res_cap->num_mpc_3dlut; //3 + dc->caps.color.mpc.ogam_ram = 1; + dc->caps.color.mpc.ogam_rom_caps.srgb = 0; + dc->caps.color.mpc.ogam_rom_caps.bt2020 = 0; + dc->caps.color.mpc.ogam_rom_caps.gamma2_2 = 0; + dc->caps.color.mpc.ogam_rom_caps.pq = 0; + dc->caps.color.mpc.ogam_rom_caps.hlg = 0; + dc->caps.color.mpc.ocsc = 1; + + dc->caps.dp_hdmi21_pcon_support = true; + + dc->config.dc_mode_clk_limit_support = true; + /* read VBIOS LTTPR caps */ + if (ctx->dc_bios->funcs->get_lttpr_caps) { + enum bp_result bp_query_result; + uint8_t is_vbios_lttpr_enable = 0; + + bp_query_result = ctx->dc_bios->funcs->get_lttpr_caps(ctx->dc_bios, &is_vbios_lttpr_enable); + dc->caps.vbios_lttpr_enable = (bp_query_result == BP_RESULT_OK) && !!is_vbios_lttpr_enable; + } + + if (ctx->dc_bios->funcs->get_lttpr_interop) { + enum bp_result bp_query_result; + uint8_t is_vbios_interop_enabled = 0; + + bp_query_result = ctx->dc_bios->funcs->get_lttpr_interop(ctx->dc_bios, &is_vbios_interop_enabled); + dc->caps.vbios_lttpr_aware = (bp_query_result == BP_RESULT_OK) && !!is_vbios_interop_enabled; + } + + if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) + dc->debug = debug_defaults_drv; + + // Init the vm_helper + if (dc->vm_helper) + vm_helper_init(dc->vm_helper, 16); + + /************************************************* + * Create resources * + *************************************************/ + + /* Clock Sources for Pixel Clock*/ + pool->clock_sources[DCN303_CLK_SRC_PLL0] = + dcn303_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL0, + &clk_src_regs[0], false); + pool->clock_sources[DCN303_CLK_SRC_PLL1] = + dcn303_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL1, + &clk_src_regs[1], false); + + pool->clk_src_count = DCN303_CLK_SRC_TOTAL; + + /* todo: not reuse phy_pll registers */ + pool->dp_clock_source = + dcn303_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_ID_DP_DTO, + &clk_src_regs[0], true); + + for (i = 0; i < pool->clk_src_count; i++) { + if (pool->clock_sources[i] == NULL) { + dm_error("DC: failed to create clock sources!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + } + + /* DCCG */ + pool->dccg = dccg30_create(ctx, &dccg_regs, &dccg_shift, &dccg_mask); + if (pool->dccg == NULL) { + dm_error("DC: failed to create dccg!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + + /* PP Lib and SMU interfaces */ + init_soc_bounding_box(dc, pool); + + /* DML */ + dml_init_instance(&dc->dml, &dcn3_03_soc, &dcn3_03_ip, DML_PROJECT_DCN30); + + /* IRQ */ + init_data.ctx = dc->ctx; + pool->irqs = dal_irq_service_dcn303_create(&init_data); + if (!pool->irqs) + goto create_fail; + + /* HUBBUB */ + pool->hubbub = dcn303_hubbub_create(ctx); + if (pool->hubbub == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create hubbub!\n"); + goto create_fail; + } + + /* HUBPs, DPPs, OPPs and TGs */ + for (i = 0; i < pool->pipe_count; i++) { + pool->hubps[i] = dcn303_hubp_create(ctx, i); + if (pool->hubps[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create hubps!\n"); + goto create_fail; + } + + pool->dpps[i] = dcn303_dpp_create(ctx, i); + if (pool->dpps[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create dpps!\n"); + goto create_fail; + } + } + + for (i = 0; i < pool->res_cap->num_opp; i++) { + pool->opps[i] = dcn303_opp_create(ctx, i); + if (pool->opps[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create output pixel processor!\n"); + goto create_fail; + } + } + + for (i = 0; i < pool->res_cap->num_timing_generator; i++) { + pool->timing_generators[i] = dcn303_timing_generator_create(ctx, i); + if (pool->timing_generators[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create tg!\n"); + goto create_fail; + } + } + pool->timing_generator_count = i; + + /* PSR */ + pool->psr = dmub_psr_create(ctx); + if (pool->psr == NULL) { + dm_error("DC: failed to create psr!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + + /* ABM */ + for (i = 0; i < pool->res_cap->num_timing_generator; i++) { + pool->multiple_abms[i] = dmub_abm_create(ctx, &abm_regs[i], &abm_shift, &abm_mask); + if (pool->multiple_abms[i] == NULL) { + dm_error("DC: failed to create abm for pipe %d!\n", i); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + } + + /* MPC and DSC */ + pool->mpc = dcn303_mpc_create(ctx, pool->mpcc_count, pool->res_cap->num_mpc_3dlut); + if (pool->mpc == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create mpc!\n"); + goto create_fail; + } + + for (i = 0; i < pool->res_cap->num_dsc; i++) { + pool->dscs[i] = dcn303_dsc_create(ctx, i); + if (pool->dscs[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create display stream compressor %d!\n", i); + goto create_fail; + } + } + + /* DWB and MMHUBBUB */ + if (!dcn303_dwbc_create(ctx, pool)) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create dwbc!\n"); + goto create_fail; + } + + if (!dcn303_mmhubbub_create(ctx, pool)) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create mcif_wb!\n"); + goto create_fail; + } + + /* AUX and I2C */ + for (i = 0; i < pool->res_cap->num_ddc; i++) { + pool->engines[i] = dcn303_aux_engine_create(ctx, i); + if (pool->engines[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC:failed to create aux engine!!\n"); + goto create_fail; + } + pool->hw_i2cs[i] = dcn303_i2c_hw_create(ctx, i); + if (pool->hw_i2cs[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC:failed to create hw i2c!!\n"); + goto create_fail; + } + pool->sw_i2cs[i] = NULL; + } + + /* Audio, Stream Encoders including HPO and virtual, MPC 3D LUTs */ + if (!resource_construct(num_virtual_links, dc, pool, + &res_create_funcs)) + goto create_fail; + + /* HW Sequencer and Plane caps */ + dcn303_hw_sequencer_construct(dc); + + dc->caps.max_planes = pool->pipe_count; + + for (i = 0; i < dc->caps.max_planes; ++i) + dc->caps.planes[i] = plane_cap; + + dc->cap_funcs = cap_funcs; + + if (dc->ctx->dc_bios->fw_info.oem_i2c_present) { + ddc_init_data.ctx = dc->ctx; + ddc_init_data.link = NULL; + ddc_init_data.id.id = dc->ctx->dc_bios->fw_info.oem_i2c_obj_id; + ddc_init_data.id.enum_id = 0; + ddc_init_data.id.type = OBJECT_TYPE_GENERIC; + pool->oem_device = dc->link_srv->create_ddc_service(&ddc_init_data); + } else { + pool->oem_device = NULL; + } + + return true; + +create_fail: + + dcn303_resource_destruct(pool); + + return false; +} + +struct resource_pool *dcn303_create_resource_pool(const struct dc_init_data *init_data, struct dc *dc) +{ + struct resource_pool *pool = kzalloc(sizeof(struct resource_pool), GFP_KERNEL); + + if (!pool) + return NULL; + + if (dcn303_resource_construct(init_data->num_virtual_links, dc, pool)) + return pool; + + BREAK_TO_DEBUGGER(); + kfree(pool); + return NULL; +} diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.h new file mode 100644 index 000000000000..37cf1525820b --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.h @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright (C) 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + */ + +#ifndef _DCN303_RESOURCE_H_ +#define _DCN303_RESOURCE_H_ + +#include "core_types.h" + +extern struct _vcs_dpi_ip_params_st dcn3_03_ip; +extern struct _vcs_dpi_soc_bounding_box_st dcn3_03_soc; + +struct resource_pool *dcn303_create_resource_pool(const struct dc_init_data *init_data, struct dc *dc); + +void dcn303_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params); + +#endif /* _DCN303_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c new file mode 100644 index 000000000000..31035fc3d868 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c @@ -0,0 +1,2218 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + + +#include "dm_services.h" +#include "dc.h" + +#include "dcn31/dcn31_init.h" + +#include "resource.h" +#include "include/irq_service_interface.h" +#include "dcn31_resource.h" + +#include "dcn20/dcn20_resource.h" +#include "dcn30/dcn30_resource.h" + +#include "dml/dcn30/dcn30_fpu.h" + +#include "dcn10/dcn10_ipp.h" +#include "dcn30/dcn30_hubbub.h" +#include "dcn31/dcn31_hubbub.h" +#include "dcn30/dcn30_mpc.h" +#include "dcn31/dcn31_hubp.h" +#include "irq/dcn31/irq_service_dcn31.h" +#include "dcn30/dcn30_dpp.h" +#include "dcn31/dcn31_optc.h" +#include "dcn20/dcn20_hwseq.h" +#include "dcn30/dcn30_hwseq.h" +#include "dce110/dce110_hwseq.h" +#include "dcn30/dcn30_opp.h" +#include "dcn20/dcn20_dsc.h" +#include "dcn30/dcn30_vpg.h" +#include "dcn30/dcn30_afmt.h" +#include "dcn30/dcn30_dio_stream_encoder.h" +#include "dcn31/dcn31_hpo_dp_stream_encoder.h" +#include "dcn31/dcn31_hpo_dp_link_encoder.h" +#include "dcn31/dcn31_apg.h" +#include "dcn31/dcn31_dio_link_encoder.h" +#include "dcn31/dcn31_vpg.h" +#include "dcn31/dcn31_afmt.h" +#include "dce/dce_clock_source.h" +#include "dce/dce_audio.h" +#include "dce/dce_hwseq.h" +#include "clk_mgr.h" +#include "virtual/virtual_stream_encoder.h" +#include "dce110/dce110_resource.h" +#include "dml/display_mode_vba.h" +#include "dml/dcn31/dcn31_fpu.h" +#include "dcn31/dcn31_dccg.h" +#include "dcn10/dcn10_resource.h" +#include "dcn31/dcn31_panel_cntl.h" + +#include "dcn30/dcn30_dwb.h" +#include "dcn30/dcn30_mmhubbub.h" + +// TODO: change include headers /amd/include/asic_reg after upstream +#include "yellow_carp_offset.h" +#include "dcn/dcn_3_1_2_offset.h" +#include "dcn/dcn_3_1_2_sh_mask.h" +#include "nbio/nbio_7_2_0_offset.h" +#include "dpcs/dpcs_4_2_0_offset.h" +#include "dpcs/dpcs_4_2_0_sh_mask.h" +#include "mmhub/mmhub_2_3_0_offset.h" +#include "mmhub/mmhub_2_3_0_sh_mask.h" + + +#define regDCHUBBUB_DEBUG_CTRL_0 0x04d6 +#define regDCHUBBUB_DEBUG_CTRL_0_BASE_IDX 2 +#define DCHUBBUB_DEBUG_CTRL_0__DET_DEPTH__SHIFT 0x10 +#define DCHUBBUB_DEBUG_CTRL_0__DET_DEPTH_MASK 0x01FF0000L + +#include "reg_helper.h" +#include "dce/dmub_abm.h" +#include "dce/dmub_psr.h" +#include "dce/dce_aux.h" +#include "dce/dce_i2c.h" +#include "dce/dmub_replay.h" + +#include "dml/dcn30/display_mode_vba_30.h" +#include "vm_helper.h" +#include "dcn20/dcn20_vmid.h" + +#include "link_enc_cfg.h" + +#define DC_LOGGER \ + dc->ctx->logger +#define DC_LOGGER_INIT(logger) + +enum dcn31_clk_src_array_id { + DCN31_CLK_SRC_PLL0, + DCN31_CLK_SRC_PLL1, + DCN31_CLK_SRC_PLL2, + DCN31_CLK_SRC_PLL3, + DCN31_CLK_SRC_PLL4, + DCN30_CLK_SRC_TOTAL +}; + +/* begin ********************* + * macros to expend register list macro defined in HW object header file + */ + +/* DCN */ +#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg + +#define BASE(seg) BASE_INNER(seg) + +#define SR(reg_name)\ + .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ + reg ## reg_name + +#define SRI(reg_name, block, id)\ + .reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SRI2(reg_name, block, id)\ + .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ + reg ## reg_name + +#define SRIR(var_name, reg_name, block, id)\ + .var_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SRII(reg_name, block, id)\ + .reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SRII_MPC_RMU(reg_name, block, id)\ + .RMU##_##reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SRII_DWB(reg_name, temp_name, block, id)\ + .reg_name[id] = BASE(reg ## block ## id ## _ ## temp_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## temp_name + +#define SF_DWB2(reg_name, block, id, field_name, post_fix) \ + .field_name = reg_name ## __ ## field_name ## post_fix + +#define DCCG_SRII(reg_name, block, id)\ + .block ## _ ## reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define VUPDATE_SRII(reg_name, block, id)\ + .reg_name[id] = BASE(reg ## reg_name ## _ ## block ## id ## _BASE_IDX) + \ + reg ## reg_name ## _ ## block ## id + +/* NBIO */ +#define NBIO_BASE_INNER(seg) \ + NBIO_BASE__INST0_SEG ## seg + +#define NBIO_BASE(seg) \ + NBIO_BASE_INNER(seg) + +#define NBIO_SR(reg_name)\ + .reg_name = NBIO_BASE(regBIF_BX1_ ## reg_name ## _BASE_IDX) + \ + regBIF_BX1_ ## reg_name + +/* MMHUB */ +#define MMHUB_BASE_INNER(seg) \ + MMHUB_BASE__INST0_SEG ## seg + +#define MMHUB_BASE(seg) \ + MMHUB_BASE_INNER(seg) + +#define MMHUB_SR(reg_name)\ + .reg_name = MMHUB_BASE(mm ## reg_name ## _BASE_IDX) + \ + mm ## reg_name + +/* CLOCK */ +#define CLK_BASE_INNER(seg) \ + CLK_BASE__INST0_SEG ## seg + +#define CLK_BASE(seg) \ + CLK_BASE_INNER(seg) + +#define CLK_SRI(reg_name, block, inst)\ + .reg_name = CLK_BASE(reg ## block ## _ ## inst ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## _ ## inst ## _ ## reg_name + + +static const struct bios_registers bios_regs = { + NBIO_SR(BIOS_SCRATCH_3), + NBIO_SR(BIOS_SCRATCH_6) +}; + +#define clk_src_regs(index, pllid)\ +[index] = {\ + CS_COMMON_REG_LIST_DCN3_0(index, pllid),\ +} + +static const struct dce110_clk_src_regs clk_src_regs[] = { + clk_src_regs(0, A), + clk_src_regs(1, B), + clk_src_regs(2, C), + clk_src_regs(3, D), + clk_src_regs(4, E) +}; +/*pll_id being rempped in dmub, in driver it is logical instance*/ +static const struct dce110_clk_src_regs clk_src_regs_b0[] = { + clk_src_regs(0, A), + clk_src_regs(1, B), + clk_src_regs(2, F), + clk_src_regs(3, G), + clk_src_regs(4, E) +}; + +static const struct dce110_clk_src_shift cs_shift = { + CS_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT) +}; + +static const struct dce110_clk_src_mask cs_mask = { + CS_COMMON_MASK_SH_LIST_DCN2_0(_MASK) +}; + +#define abm_regs(id)\ +[id] = {\ + ABM_DCN302_REG_LIST(id)\ +} + +static const struct dce_abm_registers abm_regs[] = { + abm_regs(0), + abm_regs(1), + abm_regs(2), + abm_regs(3), +}; + +static const struct dce_abm_shift abm_shift = { + ABM_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dce_abm_mask abm_mask = { + ABM_MASK_SH_LIST_DCN30(_MASK) +}; + +#define audio_regs(id)\ +[id] = {\ + AUD_COMMON_REG_LIST(id)\ +} + +static const struct dce_audio_registers audio_regs[] = { + audio_regs(0), + audio_regs(1), + audio_regs(2), + audio_regs(3), + audio_regs(4), + audio_regs(5), + audio_regs(6) +}; + +#define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\ + SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_INDEX, AZALIA_ENDPOINT_REG_INDEX, mask_sh),\ + SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_DATA, AZALIA_ENDPOINT_REG_DATA, mask_sh),\ + AUD_COMMON_MASK_SH_LIST_BASE(mask_sh) + +static const struct dce_audio_shift audio_shift = { + DCE120_AUD_COMMON_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_audio_mask audio_mask = { + DCE120_AUD_COMMON_MASK_SH_LIST(_MASK) +}; + +#define vpg_regs(id)\ +[id] = {\ + VPG_DCN31_REG_LIST(id)\ +} + +static const struct dcn31_vpg_registers vpg_regs[] = { + vpg_regs(0), + vpg_regs(1), + vpg_regs(2), + vpg_regs(3), + vpg_regs(4), + vpg_regs(5), + vpg_regs(6), + vpg_regs(7), + vpg_regs(8), + vpg_regs(9), +}; + +static const struct dcn31_vpg_shift vpg_shift = { + DCN31_VPG_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn31_vpg_mask vpg_mask = { + DCN31_VPG_MASK_SH_LIST(_MASK) +}; + +#define afmt_regs(id)\ +[id] = {\ + AFMT_DCN31_REG_LIST(id)\ +} + +static const struct dcn31_afmt_registers afmt_regs[] = { + afmt_regs(0), + afmt_regs(1), + afmt_regs(2), + afmt_regs(3), + afmt_regs(4), + afmt_regs(5) +}; + +static const struct dcn31_afmt_shift afmt_shift = { + DCN31_AFMT_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn31_afmt_mask afmt_mask = { + DCN31_AFMT_MASK_SH_LIST(_MASK) +}; + +#define apg_regs(id)\ +[id] = {\ + APG_DCN31_REG_LIST(id)\ +} + +static const struct dcn31_apg_registers apg_regs[] = { + apg_regs(0), + apg_regs(1), + apg_regs(2), + apg_regs(3) +}; + +static const struct dcn31_apg_shift apg_shift = { + DCN31_APG_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn31_apg_mask apg_mask = { + DCN31_APG_MASK_SH_LIST(_MASK) +}; + +#define stream_enc_regs(id)\ +[id] = {\ + SE_DCN3_REG_LIST(id)\ +} + +/* Some encoders won't be initialized here - but they're logical, not physical. */ +static const struct dcn10_stream_enc_registers stream_enc_regs[ENGINE_ID_COUNT] = { + stream_enc_regs(0), + stream_enc_regs(1), + stream_enc_regs(2), + stream_enc_regs(3), + stream_enc_regs(4) +}; + +static const struct dcn10_stream_encoder_shift se_shift = { + SE_COMMON_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dcn10_stream_encoder_mask se_mask = { + SE_COMMON_MASK_SH_LIST_DCN30(_MASK) +}; + + +#define aux_regs(id)\ +[id] = {\ + DCN2_AUX_REG_LIST(id)\ +} + +static const struct dcn10_link_enc_aux_registers link_enc_aux_regs[] = { + aux_regs(0), + aux_regs(1), + aux_regs(2), + aux_regs(3), + aux_regs(4) +}; + +#define hpd_regs(id)\ +[id] = {\ + HPD_REG_LIST(id)\ +} + +static const struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[] = { + hpd_regs(0), + hpd_regs(1), + hpd_regs(2), + hpd_regs(3), + hpd_regs(4) +}; + +#define link_regs(id, phyid)\ +[id] = {\ + LE_DCN31_REG_LIST(id), \ + UNIPHY_DCN2_REG_LIST(phyid), \ + DPCS_DCN31_REG_LIST(id), \ +} + +static const struct dce110_aux_registers_shift aux_shift = { + DCN_AUX_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce110_aux_registers_mask aux_mask = { + DCN_AUX_MASK_SH_LIST(_MASK) +}; + +static const struct dcn10_link_enc_registers link_enc_regs[] = { + link_regs(0, A), + link_regs(1, B), + link_regs(2, C), + link_regs(3, D), + link_regs(4, E) +}; + +static const struct dcn10_link_enc_shift le_shift = { + LINK_ENCODER_MASK_SH_LIST_DCN31(__SHIFT), \ + DPCS_DCN31_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn10_link_enc_mask le_mask = { + LINK_ENCODER_MASK_SH_LIST_DCN31(_MASK), \ + DPCS_DCN31_MASK_SH_LIST(_MASK) +}; + +#define hpo_dp_stream_encoder_reg_list(id)\ +[id] = {\ + DCN3_1_HPO_DP_STREAM_ENC_REG_LIST(id)\ +} + +static const struct dcn31_hpo_dp_stream_encoder_registers hpo_dp_stream_enc_regs[] = { + hpo_dp_stream_encoder_reg_list(0), + hpo_dp_stream_encoder_reg_list(1), + hpo_dp_stream_encoder_reg_list(2), + hpo_dp_stream_encoder_reg_list(3), +}; + +static const struct dcn31_hpo_dp_stream_encoder_shift hpo_dp_se_shift = { + DCN3_1_HPO_DP_STREAM_ENC_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn31_hpo_dp_stream_encoder_mask hpo_dp_se_mask = { + DCN3_1_HPO_DP_STREAM_ENC_MASK_SH_LIST(_MASK) +}; + +#define hpo_dp_link_encoder_reg_list(id)\ +[id] = {\ + DCN3_1_HPO_DP_LINK_ENC_REG_LIST(id),\ + DCN3_1_RDPCSTX_REG_LIST(0),\ + DCN3_1_RDPCSTX_REG_LIST(1),\ + DCN3_1_RDPCSTX_REG_LIST(2),\ + DCN3_1_RDPCSTX_REG_LIST(3),\ + DCN3_1_RDPCSTX_REG_LIST(4)\ +} + +static const struct dcn31_hpo_dp_link_encoder_registers hpo_dp_link_enc_regs[] = { + hpo_dp_link_encoder_reg_list(0), + hpo_dp_link_encoder_reg_list(1), +}; + +static const struct dcn31_hpo_dp_link_encoder_shift hpo_dp_le_shift = { + DCN3_1_HPO_DP_LINK_ENC_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn31_hpo_dp_link_encoder_mask hpo_dp_le_mask = { + DCN3_1_HPO_DP_LINK_ENC_MASK_SH_LIST(_MASK) +}; + +#define dpp_regs(id)\ +[id] = {\ + DPP_REG_LIST_DCN30(id),\ +} + +static const struct dcn3_dpp_registers dpp_regs[] = { + dpp_regs(0), + dpp_regs(1), + dpp_regs(2), + dpp_regs(3) +}; + +static const struct dcn3_dpp_shift tf_shift = { + DPP_REG_LIST_SH_MASK_DCN30(__SHIFT) +}; + +static const struct dcn3_dpp_mask tf_mask = { + DPP_REG_LIST_SH_MASK_DCN30(_MASK) +}; + +#define opp_regs(id)\ +[id] = {\ + OPP_REG_LIST_DCN30(id),\ +} + +static const struct dcn20_opp_registers opp_regs[] = { + opp_regs(0), + opp_regs(1), + opp_regs(2), + opp_regs(3) +}; + +static const struct dcn20_opp_shift opp_shift = { + OPP_MASK_SH_LIST_DCN20(__SHIFT) +}; + +static const struct dcn20_opp_mask opp_mask = { + OPP_MASK_SH_LIST_DCN20(_MASK) +}; + +#define aux_engine_regs(id)\ +[id] = {\ + AUX_COMMON_REG_LIST0(id), \ + .AUXN_IMPCAL = 0, \ + .AUXP_IMPCAL = 0, \ + .AUX_RESET_MASK = DP_AUX0_AUX_CONTROL__AUX_RESET_MASK, \ +} + +static const struct dce110_aux_registers aux_engine_regs[] = { + aux_engine_regs(0), + aux_engine_regs(1), + aux_engine_regs(2), + aux_engine_regs(3), + aux_engine_regs(4) +}; + +#define dwbc_regs_dcn3(id)\ +[id] = {\ + DWBC_COMMON_REG_LIST_DCN30(id),\ +} + +static const struct dcn30_dwbc_registers dwbc30_regs[] = { + dwbc_regs_dcn3(0), +}; + +static const struct dcn30_dwbc_shift dwbc30_shift = { + DWBC_COMMON_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dcn30_dwbc_mask dwbc30_mask = { + DWBC_COMMON_MASK_SH_LIST_DCN30(_MASK) +}; + +#define mcif_wb_regs_dcn3(id)\ +[id] = {\ + MCIF_WB_COMMON_REG_LIST_DCN30(id),\ +} + +static const struct dcn30_mmhubbub_registers mcif_wb30_regs[] = { + mcif_wb_regs_dcn3(0) +}; + +static const struct dcn30_mmhubbub_shift mcif_wb30_shift = { + MCIF_WB_COMMON_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dcn30_mmhubbub_mask mcif_wb30_mask = { + MCIF_WB_COMMON_MASK_SH_LIST_DCN30(_MASK) +}; + +#define dsc_regsDCN20(id)\ +[id] = {\ + DSC_REG_LIST_DCN20(id)\ +} + +static const struct dcn20_dsc_registers dsc_regs[] = { + dsc_regsDCN20(0), + dsc_regsDCN20(1), + dsc_regsDCN20(2) +}; + +static const struct dcn20_dsc_shift dsc_shift = { + DSC_REG_LIST_SH_MASK_DCN20(__SHIFT) +}; + +static const struct dcn20_dsc_mask dsc_mask = { + DSC_REG_LIST_SH_MASK_DCN20(_MASK) +}; + +static const struct dcn30_mpc_registers mpc_regs = { + MPC_REG_LIST_DCN3_0(0), + MPC_REG_LIST_DCN3_0(1), + MPC_REG_LIST_DCN3_0(2), + MPC_REG_LIST_DCN3_0(3), + MPC_OUT_MUX_REG_LIST_DCN3_0(0), + MPC_OUT_MUX_REG_LIST_DCN3_0(1), + MPC_OUT_MUX_REG_LIST_DCN3_0(2), + MPC_OUT_MUX_REG_LIST_DCN3_0(3), + MPC_RMU_GLOBAL_REG_LIST_DCN3AG, + MPC_RMU_REG_LIST_DCN3AG(0), + MPC_RMU_REG_LIST_DCN3AG(1), + //MPC_RMU_REG_LIST_DCN3AG(2), + MPC_DWB_MUX_REG_LIST_DCN3_0(0), +}; + +static const struct dcn30_mpc_shift mpc_shift = { + MPC_COMMON_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dcn30_mpc_mask mpc_mask = { + MPC_COMMON_MASK_SH_LIST_DCN30(_MASK) +}; + +#define optc_regs(id)\ +[id] = {OPTC_COMMON_REG_LIST_DCN3_1(id)} + +static const struct dcn_optc_registers optc_regs[] = { + optc_regs(0), + optc_regs(1), + optc_regs(2), + optc_regs(3) +}; + +static const struct dcn_optc_shift optc_shift = { + OPTC_COMMON_MASK_SH_LIST_DCN3_1(__SHIFT) +}; + +static const struct dcn_optc_mask optc_mask = { + OPTC_COMMON_MASK_SH_LIST_DCN3_1(_MASK) +}; + +#define hubp_regs(id)\ +[id] = {\ + HUBP_REG_LIST_DCN30(id)\ +} + +static const struct dcn_hubp2_registers hubp_regs[] = { + hubp_regs(0), + hubp_regs(1), + hubp_regs(2), + hubp_regs(3) +}; + + +static const struct dcn_hubp2_shift hubp_shift = { + HUBP_MASK_SH_LIST_DCN31(__SHIFT) +}; + +static const struct dcn_hubp2_mask hubp_mask = { + HUBP_MASK_SH_LIST_DCN31(_MASK) +}; +static const struct dcn_hubbub_registers hubbub_reg = { + HUBBUB_REG_LIST_DCN31(0) +}; + +static const struct dcn_hubbub_shift hubbub_shift = { + HUBBUB_MASK_SH_LIST_DCN31(__SHIFT) +}; + +static const struct dcn_hubbub_mask hubbub_mask = { + HUBBUB_MASK_SH_LIST_DCN31(_MASK) +}; + +static const struct dccg_registers dccg_regs = { + DCCG_REG_LIST_DCN31() +}; + +static const struct dccg_shift dccg_shift = { + DCCG_MASK_SH_LIST_DCN31(__SHIFT) +}; + +static const struct dccg_mask dccg_mask = { + DCCG_MASK_SH_LIST_DCN31(_MASK) +}; + + +#define SRII2(reg_name_pre, reg_name_post, id)\ + .reg_name_pre ## _ ## reg_name_post[id] = BASE(reg ## reg_name_pre \ + ## id ## _ ## reg_name_post ## _BASE_IDX) + \ + reg ## reg_name_pre ## id ## _ ## reg_name_post + + +#define HWSEQ_DCN31_REG_LIST()\ + SR(DCHUBBUB_GLOBAL_TIMER_CNTL), \ + SR(DCHUBBUB_ARB_HOSTVM_CNTL), \ + SR(DIO_MEM_PWR_CTRL), \ + SR(ODM_MEM_PWR_CTRL3), \ + SR(DMU_MEM_PWR_CNTL), \ + SR(MMHUBBUB_MEM_PWR_CNTL), \ + SR(DCCG_GATE_DISABLE_CNTL), \ + SR(DCCG_GATE_DISABLE_CNTL2), \ + SR(DCFCLK_CNTL),\ + SR(DC_MEM_GLOBAL_PWR_REQ_CNTL), \ + SRII(PIXEL_RATE_CNTL, OTG, 0), \ + SRII(PIXEL_RATE_CNTL, OTG, 1),\ + SRII(PIXEL_RATE_CNTL, OTG, 2),\ + SRII(PIXEL_RATE_CNTL, OTG, 3),\ + SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 0),\ + SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 1),\ + SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 2),\ + SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 3),\ + SR(MICROSECOND_TIME_BASE_DIV), \ + SR(MILLISECOND_TIME_BASE_DIV), \ + SR(DISPCLK_FREQ_CHANGE_CNTL), \ + SR(RBBMIF_TIMEOUT_DIS), \ + SR(RBBMIF_TIMEOUT_DIS_2), \ + SR(DCHUBBUB_CRC_CTRL), \ + SR(DPP_TOP0_DPP_CRC_CTRL), \ + SR(DPP_TOP0_DPP_CRC_VAL_B_A), \ + SR(DPP_TOP0_DPP_CRC_VAL_R_G), \ + SR(MPC_CRC_CTRL), \ + SR(MPC_CRC_RESULT_GB), \ + SR(MPC_CRC_RESULT_C), \ + SR(MPC_CRC_RESULT_AR), \ + SR(DOMAIN0_PG_CONFIG), \ + SR(DOMAIN1_PG_CONFIG), \ + SR(DOMAIN2_PG_CONFIG), \ + SR(DOMAIN3_PG_CONFIG), \ + SR(DOMAIN16_PG_CONFIG), \ + SR(DOMAIN17_PG_CONFIG), \ + SR(DOMAIN18_PG_CONFIG), \ + SR(DOMAIN0_PG_STATUS), \ + SR(DOMAIN1_PG_STATUS), \ + SR(DOMAIN2_PG_STATUS), \ + SR(DOMAIN3_PG_STATUS), \ + SR(DOMAIN16_PG_STATUS), \ + SR(DOMAIN17_PG_STATUS), \ + SR(DOMAIN18_PG_STATUS), \ + SR(D1VGA_CONTROL), \ + SR(D2VGA_CONTROL), \ + SR(D3VGA_CONTROL), \ + SR(D4VGA_CONTROL), \ + SR(D5VGA_CONTROL), \ + SR(D6VGA_CONTROL), \ + SR(DC_IP_REQUEST_CNTL), \ + SR(AZALIA_AUDIO_DTO), \ + SR(AZALIA_CONTROLLER_CLOCK_GATING), \ + SR(HPO_TOP_HW_CONTROL) + +static const struct dce_hwseq_registers hwseq_reg = { + HWSEQ_DCN31_REG_LIST() +}; + +#define HWSEQ_DCN31_MASK_SH_LIST(mask_sh)\ + HWSEQ_DCN_MASK_SH_LIST(mask_sh), \ + HWS_SF(, DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_REFDIV, mask_sh), \ + HWS_SF(, DCHUBBUB_ARB_HOSTVM_CNTL, DISABLE_HOSTVM_FORCE_ALLOW_PSTATE, mask_sh), \ + HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN1_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN1_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN2_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN2_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN3_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN3_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN16_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN16_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN17_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN17_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN18_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN18_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN0_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN1_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN2_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN3_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN16_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN17_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN18_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DC_IP_REQUEST_CNTL, IP_REQUEST_EN, mask_sh), \ + HWS_SF(, AZALIA_AUDIO_DTO, AZALIA_AUDIO_DTO_MODULE, mask_sh), \ + HWS_SF(, HPO_TOP_CLOCK_CONTROL, HPO_HDMISTREAMCLK_G_GATE_DIS, mask_sh), \ + HWS_SF(, DMU_MEM_PWR_CNTL, DMCU_ERAM_MEM_PWR_FORCE, mask_sh), \ + HWS_SF(, ODM_MEM_PWR_CTRL3, ODM_MEM_UNASSIGNED_PWR_MODE, mask_sh), \ + HWS_SF(, ODM_MEM_PWR_CTRL3, ODM_MEM_VBLANK_PWR_MODE, mask_sh), \ + HWS_SF(, MMHUBBUB_MEM_PWR_CNTL, VGA_MEM_PWR_FORCE, mask_sh), \ + HWS_SF(, DIO_MEM_PWR_CTRL, I2C_LIGHT_SLEEP_FORCE, mask_sh), \ + HWS_SF(, HPO_TOP_HW_CONTROL, HPO_IO_EN, mask_sh) + +static const struct dce_hwseq_shift hwseq_shift = { + HWSEQ_DCN31_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_hwseq_mask hwseq_mask = { + HWSEQ_DCN31_MASK_SH_LIST(_MASK) +}; +#define vmid_regs(id)\ +[id] = {\ + DCN20_VMID_REG_LIST(id)\ +} + +static const struct dcn_vmid_registers vmid_regs[] = { + vmid_regs(0), + vmid_regs(1), + vmid_regs(2), + vmid_regs(3), + vmid_regs(4), + vmid_regs(5), + vmid_regs(6), + vmid_regs(7), + vmid_regs(8), + vmid_regs(9), + vmid_regs(10), + vmid_regs(11), + vmid_regs(12), + vmid_regs(13), + vmid_regs(14), + vmid_regs(15) +}; + +static const struct dcn20_vmid_shift vmid_shifts = { + DCN20_VMID_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn20_vmid_mask vmid_masks = { + DCN20_VMID_MASK_SH_LIST(_MASK) +}; + +static const struct resource_caps res_cap_dcn31 = { + .num_timing_generator = 4, + .num_opp = 4, + .num_video_plane = 4, + .num_audio = 5, + .num_stream_encoder = 5, + .num_dig_link_enc = 5, + .num_hpo_dp_stream_encoder = 4, + .num_hpo_dp_link_encoder = 2, + .num_pll = 5, + .num_dwb = 1, + .num_ddc = 5, + .num_vmid = 16, + .num_mpc_3dlut = 2, + .num_dsc = 3, +}; + +static const struct dc_plane_cap plane_cap = { + .type = DC_PLANE_TYPE_DCN_UNIVERSAL, + .per_pixel_alpha = true, + + .pixel_format_support = { + .argb8888 = true, + .nv12 = true, + .fp16 = true, + .p010 = true, + .ayuv = false, + }, + + .max_upscale_factor = { + .argb8888 = 16000, + .nv12 = 16000, + .fp16 = 16000 + }, + + // 6:1 downscaling ratio: 1000/6 = 166.666 + .max_downscale_factor = { + .argb8888 = 167, + .nv12 = 167, + .fp16 = 167 + }, + 64, + 64 +}; + +static const struct dc_debug_options debug_defaults_drv = { + .disable_dmcu = true, + .force_abm_enable = false, + .timing_trace = false, + .clock_trace = true, + .disable_pplib_clock_request = false, + .pipe_split_policy = MPC_SPLIT_DYNAMIC, + .force_single_disp_pipe_split = false, + .disable_dcc = DCC_ENABLE, + .vsr_support = true, + .performance_trace = false, + .max_downscale_src_width = 4096,/*upto true 4K*/ + .disable_pplib_wm_range = false, + .scl_reset_length10 = true, + .sanity_checks = true, + .underflow_assert_delay_us = 0xFFFFFFFF, + .dwb_fi_phase = -1, // -1 = disable, + .dmub_command_table = true, + .pstate_enabled = true, + .use_max_lb = true, + .enable_mem_low_power = { + .bits = { + .vga = true, + .i2c = true, + .dmcu = false, // This is previously known to cause hang on S3 cycles if enabled + .dscl = true, + .cm = true, + .mpc = true, + .optc = true, + .vpg = true, + .afmt = true, + } + }, + .disable_z10 = true, + .enable_legacy_fast_update = true, + .enable_z9_disable_interface = true, /* Allow support for the PMFW interface for disable Z9*/ + .dml_hostvm_override = DML_HOSTVM_OVERRIDE_FALSE, + .using_dml2 = false, +}; + +static const struct dc_panel_config panel_config_defaults = { + .psr = { + .disable_psr = false, + .disallow_psrsu = false, + .disallow_replay = false, + }, + .ilr = { + .optimize_edp_link_rate = true, + }, +}; + +static void dcn31_dpp_destroy(struct dpp **dpp) +{ + kfree(TO_DCN20_DPP(*dpp)); + *dpp = NULL; +} + +static struct dpp *dcn31_dpp_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn3_dpp *dpp = + kzalloc(sizeof(struct dcn3_dpp), GFP_KERNEL); + + if (!dpp) + return NULL; + + if (dpp3_construct(dpp, ctx, inst, + &dpp_regs[inst], &tf_shift, &tf_mask)) + return &dpp->base; + + BREAK_TO_DEBUGGER(); + kfree(dpp); + return NULL; +} + +static struct output_pixel_processor *dcn31_opp_create( + struct dc_context *ctx, uint32_t inst) +{ + struct dcn20_opp *opp = + kzalloc(sizeof(struct dcn20_opp), GFP_KERNEL); + + if (!opp) { + BREAK_TO_DEBUGGER(); + return NULL; + } + + dcn20_opp_construct(opp, ctx, inst, + &opp_regs[inst], &opp_shift, &opp_mask); + return &opp->base; +} + +static struct dce_aux *dcn31_aux_engine_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct aux_engine_dce110 *aux_engine = + kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL); + + if (!aux_engine) + return NULL; + + dce110_aux_engine_construct(aux_engine, ctx, inst, + SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, + &aux_engine_regs[inst], + &aux_mask, + &aux_shift, + ctx->dc->caps.extended_aux_timeout_support); + + return &aux_engine->base; +} +#define i2c_inst_regs(id) { I2C_HW_ENGINE_COMMON_REG_LIST_DCN30(id) } + +static const struct dce_i2c_registers i2c_hw_regs[] = { + i2c_inst_regs(1), + i2c_inst_regs(2), + i2c_inst_regs(3), + i2c_inst_regs(4), + i2c_inst_regs(5), +}; + +static const struct dce_i2c_shift i2c_shifts = { + I2C_COMMON_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dce_i2c_mask i2c_masks = { + I2C_COMMON_MASK_SH_LIST_DCN30(_MASK) +}; + +static struct dce_i2c_hw *dcn31_i2c_hw_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dce_i2c_hw *dce_i2c_hw = + kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL); + + if (!dce_i2c_hw) + return NULL; + + dcn2_i2c_hw_construct(dce_i2c_hw, ctx, inst, + &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks); + + return dce_i2c_hw; +} +static struct mpc *dcn31_mpc_create( + struct dc_context *ctx, + int num_mpcc, + int num_rmu) +{ + struct dcn30_mpc *mpc30 = kzalloc(sizeof(struct dcn30_mpc), + GFP_KERNEL); + + if (!mpc30) + return NULL; + + dcn30_mpc_construct(mpc30, ctx, + &mpc_regs, + &mpc_shift, + &mpc_mask, + num_mpcc, + num_rmu); + + return &mpc30->base; +} + +static struct hubbub *dcn31_hubbub_create(struct dc_context *ctx) +{ + int i; + + struct dcn20_hubbub *hubbub3 = kzalloc(sizeof(struct dcn20_hubbub), + GFP_KERNEL); + + if (!hubbub3) + return NULL; + + hubbub31_construct(hubbub3, ctx, + &hubbub_reg, + &hubbub_shift, + &hubbub_mask, + dcn3_1_ip.det_buffer_size_kbytes, + dcn3_1_ip.pixel_chunk_size_kbytes, + dcn3_1_ip.config_return_buffer_size_in_kbytes); + + + for (i = 0; i < res_cap_dcn31.num_vmid; i++) { + struct dcn20_vmid *vmid = &hubbub3->vmid[i]; + + vmid->ctx = ctx; + + vmid->regs = &vmid_regs[i]; + vmid->shifts = &vmid_shifts; + vmid->masks = &vmid_masks; + } + + return &hubbub3->base; +} + +static struct timing_generator *dcn31_timing_generator_create( + struct dc_context *ctx, + uint32_t instance) +{ + struct optc *tgn10 = + kzalloc(sizeof(struct optc), GFP_KERNEL); + + if (!tgn10) + return NULL; + + tgn10->base.inst = instance; + tgn10->base.ctx = ctx; + + tgn10->tg_regs = &optc_regs[instance]; + tgn10->tg_shift = &optc_shift; + tgn10->tg_mask = &optc_mask; + + dcn31_timing_generator_init(tgn10); + + return &tgn10->base; +} + +static const struct encoder_feature_support link_enc_feature = { + .max_hdmi_deep_color = COLOR_DEPTH_121212, + .max_hdmi_pixel_clock = 600000, + .hdmi_ycbcr420_supported = true, + .dp_ycbcr420_supported = true, + .fec_supported = true, + .flags.bits.IS_HBR2_CAPABLE = true, + .flags.bits.IS_HBR3_CAPABLE = true, + .flags.bits.IS_TPS3_CAPABLE = true, + .flags.bits.IS_TPS4_CAPABLE = true +}; + +static struct link_encoder *dcn31_link_encoder_create( + struct dc_context *ctx, + const struct encoder_init_data *enc_init_data) +{ + struct dcn20_link_encoder *enc20 = + kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL); + + if (!enc20) + return NULL; + + dcn31_link_encoder_construct(enc20, + enc_init_data, + &link_enc_feature, + &link_enc_regs[enc_init_data->transmitter], + &link_enc_aux_regs[enc_init_data->channel - 1], + &link_enc_hpd_regs[enc_init_data->hpd_source], + &le_shift, + &le_mask); + + return &enc20->enc10.base; +} + +/* Create a minimal link encoder object not associated with a particular + * physical connector. + * resource_funcs.link_enc_create_minimal + */ +static struct link_encoder *dcn31_link_enc_create_minimal( + struct dc_context *ctx, enum engine_id eng_id) +{ + struct dcn20_link_encoder *enc20; + + if ((eng_id - ENGINE_ID_DIGA) > ctx->dc->res_pool->res_cap->num_dig_link_enc) + return NULL; + + enc20 = kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL); + if (!enc20) + return NULL; + + dcn31_link_encoder_construct_minimal( + enc20, + ctx, + &link_enc_feature, + &link_enc_regs[eng_id - ENGINE_ID_DIGA], + eng_id); + + return &enc20->enc10.base; +} + +static struct panel_cntl *dcn31_panel_cntl_create(const struct panel_cntl_init_data *init_data) +{ + struct dcn31_panel_cntl *panel_cntl = + kzalloc(sizeof(struct dcn31_panel_cntl), GFP_KERNEL); + + if (!panel_cntl) + return NULL; + + dcn31_panel_cntl_construct(panel_cntl, init_data); + + return &panel_cntl->base; +} + +static void read_dce_straps( + struct dc_context *ctx, + struct resource_straps *straps) +{ + generic_reg_get(ctx, regDC_PINSTRAPS + BASE(regDC_PINSTRAPS_BASE_IDX), + FN(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO), &straps->dc_pinstraps_audio); + +} + +static struct audio *dcn31_create_audio( + struct dc_context *ctx, unsigned int inst) +{ + return dce_audio_create(ctx, inst, + &audio_regs[inst], &audio_shift, &audio_mask); +} + +static struct vpg *dcn31_vpg_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn31_vpg *vpg31 = kzalloc(sizeof(struct dcn31_vpg), GFP_KERNEL); + + if (!vpg31) + return NULL; + + vpg31_construct(vpg31, ctx, inst, + &vpg_regs[inst], + &vpg_shift, + &vpg_mask); + + return &vpg31->base; +} + +static struct afmt *dcn31_afmt_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn31_afmt *afmt31 = kzalloc(sizeof(struct dcn31_afmt), GFP_KERNEL); + + if (!afmt31) + return NULL; + + afmt31_construct(afmt31, ctx, inst, + &afmt_regs[inst], + &afmt_shift, + &afmt_mask); + + // Light sleep by default, no need to power down here + + return &afmt31->base; +} + +static struct apg *dcn31_apg_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn31_apg *apg31 = kzalloc(sizeof(struct dcn31_apg), GFP_KERNEL); + + if (!apg31) + return NULL; + + apg31_construct(apg31, ctx, inst, + &apg_regs[inst], + &apg_shift, + &apg_mask); + + return &apg31->base; +} + +static struct stream_encoder *dcn31_stream_encoder_create( + enum engine_id eng_id, + struct dc_context *ctx) +{ + struct dcn10_stream_encoder *enc1; + struct vpg *vpg; + struct afmt *afmt; + int vpg_inst; + int afmt_inst; + + /* Mapping of VPG, AFMT, DME register blocks to DIO block instance */ + if (eng_id <= ENGINE_ID_DIGF) { + vpg_inst = eng_id; + afmt_inst = eng_id; + } else + return NULL; + + enc1 = kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL); + vpg = dcn31_vpg_create(ctx, vpg_inst); + afmt = dcn31_afmt_create(ctx, afmt_inst); + + if (!enc1 || !vpg || !afmt) { + kfree(enc1); + kfree(vpg); + kfree(afmt); + return NULL; + } + + dcn30_dio_stream_encoder_construct(enc1, ctx, ctx->dc_bios, + eng_id, vpg, afmt, + &stream_enc_regs[eng_id], + &se_shift, &se_mask); + + return &enc1->base; +} + +static struct hpo_dp_stream_encoder *dcn31_hpo_dp_stream_encoder_create( + enum engine_id eng_id, + struct dc_context *ctx) +{ + struct dcn31_hpo_dp_stream_encoder *hpo_dp_enc31; + struct vpg *vpg; + struct apg *apg; + uint32_t hpo_dp_inst; + uint32_t vpg_inst; + uint32_t apg_inst; + + ASSERT((eng_id >= ENGINE_ID_HPO_DP_0) && (eng_id <= ENGINE_ID_HPO_DP_3)); + hpo_dp_inst = eng_id - ENGINE_ID_HPO_DP_0; + + /* Mapping of VPG register blocks to HPO DP block instance: + * VPG[6] -> HPO_DP[0] + * VPG[7] -> HPO_DP[1] + * VPG[8] -> HPO_DP[2] + * VPG[9] -> HPO_DP[3] + */ + vpg_inst = hpo_dp_inst + 6; + + /* Mapping of APG register blocks to HPO DP block instance: + * APG[0] -> HPO_DP[0] + * APG[1] -> HPO_DP[1] + * APG[2] -> HPO_DP[2] + * APG[3] -> HPO_DP[3] + */ + apg_inst = hpo_dp_inst; + + /* allocate HPO stream encoder and create VPG sub-block */ + hpo_dp_enc31 = kzalloc(sizeof(struct dcn31_hpo_dp_stream_encoder), GFP_KERNEL); + vpg = dcn31_vpg_create(ctx, vpg_inst); + apg = dcn31_apg_create(ctx, apg_inst); + + if (!hpo_dp_enc31 || !vpg || !apg) { + kfree(hpo_dp_enc31); + kfree(vpg); + kfree(apg); + return NULL; + } + + dcn31_hpo_dp_stream_encoder_construct(hpo_dp_enc31, ctx, ctx->dc_bios, + hpo_dp_inst, eng_id, vpg, apg, + &hpo_dp_stream_enc_regs[hpo_dp_inst], + &hpo_dp_se_shift, &hpo_dp_se_mask); + + return &hpo_dp_enc31->base; +} + +static struct hpo_dp_link_encoder *dcn31_hpo_dp_link_encoder_create( + uint8_t inst, + struct dc_context *ctx) +{ + struct dcn31_hpo_dp_link_encoder *hpo_dp_enc31; + + /* allocate HPO link encoder */ + hpo_dp_enc31 = kzalloc(sizeof(struct dcn31_hpo_dp_link_encoder), GFP_KERNEL); + + hpo_dp_link_encoder31_construct(hpo_dp_enc31, ctx, inst, + &hpo_dp_link_enc_regs[inst], + &hpo_dp_le_shift, &hpo_dp_le_mask); + + return &hpo_dp_enc31->base; +} + +static struct dce_hwseq *dcn31_hwseq_create( + struct dc_context *ctx) +{ + struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL); + + if (hws) { + hws->ctx = ctx; + hws->regs = &hwseq_reg; + hws->shifts = &hwseq_shift; + hws->masks = &hwseq_mask; + } + return hws; +} +static const struct resource_create_funcs res_create_funcs = { + .read_dce_straps = read_dce_straps, + .create_audio = dcn31_create_audio, + .create_stream_encoder = dcn31_stream_encoder_create, + .create_hpo_dp_stream_encoder = dcn31_hpo_dp_stream_encoder_create, + .create_hpo_dp_link_encoder = dcn31_hpo_dp_link_encoder_create, + .create_hwseq = dcn31_hwseq_create, +}; + +static void dcn31_resource_destruct(struct dcn31_resource_pool *pool) +{ + unsigned int i; + + for (i = 0; i < pool->base.stream_enc_count; i++) { + if (pool->base.stream_enc[i] != NULL) { + if (pool->base.stream_enc[i]->vpg != NULL) { + kfree(DCN30_VPG_FROM_VPG(pool->base.stream_enc[i]->vpg)); + pool->base.stream_enc[i]->vpg = NULL; + } + if (pool->base.stream_enc[i]->afmt != NULL) { + kfree(DCN30_AFMT_FROM_AFMT(pool->base.stream_enc[i]->afmt)); + pool->base.stream_enc[i]->afmt = NULL; + } + kfree(DCN10STRENC_FROM_STRENC(pool->base.stream_enc[i])); + pool->base.stream_enc[i] = NULL; + } + } + + for (i = 0; i < pool->base.hpo_dp_stream_enc_count; i++) { + if (pool->base.hpo_dp_stream_enc[i] != NULL) { + if (pool->base.hpo_dp_stream_enc[i]->vpg != NULL) { + kfree(DCN30_VPG_FROM_VPG(pool->base.hpo_dp_stream_enc[i]->vpg)); + pool->base.hpo_dp_stream_enc[i]->vpg = NULL; + } + if (pool->base.hpo_dp_stream_enc[i]->apg != NULL) { + kfree(DCN31_APG_FROM_APG(pool->base.hpo_dp_stream_enc[i]->apg)); + pool->base.hpo_dp_stream_enc[i]->apg = NULL; + } + kfree(DCN3_1_HPO_DP_STREAM_ENC_FROM_HPO_STREAM_ENC(pool->base.hpo_dp_stream_enc[i])); + pool->base.hpo_dp_stream_enc[i] = NULL; + } + } + + for (i = 0; i < pool->base.hpo_dp_link_enc_count; i++) { + if (pool->base.hpo_dp_link_enc[i] != NULL) { + kfree(DCN3_1_HPO_DP_LINK_ENC_FROM_HPO_LINK_ENC(pool->base.hpo_dp_link_enc[i])); + pool->base.hpo_dp_link_enc[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_dsc; i++) { + if (pool->base.dscs[i] != NULL) + dcn20_dsc_destroy(&pool->base.dscs[i]); + } + + if (pool->base.mpc != NULL) { + kfree(TO_DCN20_MPC(pool->base.mpc)); + pool->base.mpc = NULL; + } + if (pool->base.hubbub != NULL) { + kfree(pool->base.hubbub); + pool->base.hubbub = NULL; + } + for (i = 0; i < pool->base.pipe_count; i++) { + if (pool->base.dpps[i] != NULL) + dcn31_dpp_destroy(&pool->base.dpps[i]); + + if (pool->base.ipps[i] != NULL) + pool->base.ipps[i]->funcs->ipp_destroy(&pool->base.ipps[i]); + + if (pool->base.hubps[i] != NULL) { + kfree(TO_DCN20_HUBP(pool->base.hubps[i])); + pool->base.hubps[i] = NULL; + } + + if (pool->base.irqs != NULL) { + dal_irq_service_destroy(&pool->base.irqs); + } + } + + for (i = 0; i < pool->base.res_cap->num_ddc; i++) { + if (pool->base.engines[i] != NULL) + dce110_engine_destroy(&pool->base.engines[i]); + if (pool->base.hw_i2cs[i] != NULL) { + kfree(pool->base.hw_i2cs[i]); + pool->base.hw_i2cs[i] = NULL; + } + if (pool->base.sw_i2cs[i] != NULL) { + kfree(pool->base.sw_i2cs[i]); + pool->base.sw_i2cs[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_opp; i++) { + if (pool->base.opps[i] != NULL) + pool->base.opps[i]->funcs->opp_destroy(&pool->base.opps[i]); + } + + for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { + if (pool->base.timing_generators[i] != NULL) { + kfree(DCN10TG_FROM_TG(pool->base.timing_generators[i])); + pool->base.timing_generators[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_dwb; i++) { + if (pool->base.dwbc[i] != NULL) { + kfree(TO_DCN30_DWBC(pool->base.dwbc[i])); + pool->base.dwbc[i] = NULL; + } + if (pool->base.mcif_wb[i] != NULL) { + kfree(TO_DCN30_MMHUBBUB(pool->base.mcif_wb[i])); + pool->base.mcif_wb[i] = NULL; + } + } + + for (i = 0; i < pool->base.audio_count; i++) { + if (pool->base.audios[i]) + dce_aud_destroy(&pool->base.audios[i]); + } + + for (i = 0; i < pool->base.clk_src_count; i++) { + if (pool->base.clock_sources[i] != NULL) { + dcn20_clock_source_destroy(&pool->base.clock_sources[i]); + pool->base.clock_sources[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_mpc_3dlut; i++) { + if (pool->base.mpc_lut[i] != NULL) { + dc_3dlut_func_release(pool->base.mpc_lut[i]); + pool->base.mpc_lut[i] = NULL; + } + if (pool->base.mpc_shaper[i] != NULL) { + dc_transfer_func_release(pool->base.mpc_shaper[i]); + pool->base.mpc_shaper[i] = NULL; + } + } + + if (pool->base.dp_clock_source != NULL) { + dcn20_clock_source_destroy(&pool->base.dp_clock_source); + pool->base.dp_clock_source = NULL; + } + + for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { + if (pool->base.multiple_abms[i] != NULL) + dce_abm_destroy(&pool->base.multiple_abms[i]); + } + + if (pool->base.psr != NULL) + dmub_psr_destroy(&pool->base.psr); + + if (pool->base.replay != NULL) + dmub_replay_destroy(&pool->base.replay); + + if (pool->base.dccg != NULL) + dcn_dccg_destroy(&pool->base.dccg); +} + +static struct hubp *dcn31_hubp_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn20_hubp *hubp2 = + kzalloc(sizeof(struct dcn20_hubp), GFP_KERNEL); + + if (!hubp2) + return NULL; + + if (hubp31_construct(hubp2, ctx, inst, + &hubp_regs[inst], &hubp_shift, &hubp_mask)) + return &hubp2->base; + + BREAK_TO_DEBUGGER(); + kfree(hubp2); + return NULL; +} + +static bool dcn31_dwbc_create(struct dc_context *ctx, struct resource_pool *pool) +{ + int i; + uint32_t pipe_count = pool->res_cap->num_dwb; + + for (i = 0; i < pipe_count; i++) { + struct dcn30_dwbc *dwbc30 = kzalloc(sizeof(struct dcn30_dwbc), + GFP_KERNEL); + + if (!dwbc30) { + dm_error("DC: failed to create dwbc30!\n"); + return false; + } + + dcn30_dwbc_construct(dwbc30, ctx, + &dwbc30_regs[i], + &dwbc30_shift, + &dwbc30_mask, + i); + + pool->dwbc[i] = &dwbc30->base; + } + return true; +} + +static bool dcn31_mmhubbub_create(struct dc_context *ctx, struct resource_pool *pool) +{ + int i; + uint32_t pipe_count = pool->res_cap->num_dwb; + + for (i = 0; i < pipe_count; i++) { + struct dcn30_mmhubbub *mcif_wb30 = kzalloc(sizeof(struct dcn30_mmhubbub), + GFP_KERNEL); + + if (!mcif_wb30) { + dm_error("DC: failed to create mcif_wb30!\n"); + return false; + } + + dcn30_mmhubbub_construct(mcif_wb30, ctx, + &mcif_wb30_regs[i], + &mcif_wb30_shift, + &mcif_wb30_mask, + i); + + pool->mcif_wb[i] = &mcif_wb30->base; + } + return true; +} + +static struct display_stream_compressor *dcn31_dsc_create( + struct dc_context *ctx, uint32_t inst) +{ + struct dcn20_dsc *dsc = + kzalloc(sizeof(struct dcn20_dsc), GFP_KERNEL); + + if (!dsc) { + BREAK_TO_DEBUGGER(); + return NULL; + } + + dsc2_construct(dsc, ctx, inst, &dsc_regs[inst], &dsc_shift, &dsc_mask); + return &dsc->base; +} + +static void dcn31_destroy_resource_pool(struct resource_pool **pool) +{ + struct dcn31_resource_pool *dcn31_pool = TO_DCN31_RES_POOL(*pool); + + dcn31_resource_destruct(dcn31_pool); + kfree(dcn31_pool); + *pool = NULL; +} + +static struct clock_source *dcn31_clock_source_create( + struct dc_context *ctx, + struct dc_bios *bios, + enum clock_source_id id, + const struct dce110_clk_src_regs *regs, + bool dp_clk_src) +{ + struct dce110_clk_src *clk_src = + kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL); + + if (!clk_src) + return NULL; + + if (dcn3_clk_src_construct(clk_src, ctx, bios, id, + regs, &cs_shift, &cs_mask)) { + clk_src->base.dp_clk_src = dp_clk_src; + return &clk_src->base; + } + + kfree(clk_src); + BREAK_TO_DEBUGGER(); + return NULL; +} + +static bool is_dual_plane(enum surface_pixel_format format) +{ + return format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA; +} + +int dcn31x_populate_dml_pipes_from_context(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + bool fast_validate) +{ + uint32_t pipe_cnt; + int i; + + dc_assert_fp_enabled(); + + pipe_cnt = dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); + + for (i = 0; i < pipe_cnt; i++) { + pipes[i].pipe.src.gpuvm = 1; + if (dc->debug.dml_hostvm_override == DML_HOSTVM_NO_OVERRIDE) { + //pipes[pipe_cnt].pipe.src.hostvm = dc->res_pool->hubbub->riommu_active; + pipes[i].pipe.src.hostvm = dc->vm_pa_config.is_hvm_enabled; + } else if (dc->debug.dml_hostvm_override == DML_HOSTVM_OVERRIDE_FALSE) + pipes[i].pipe.src.hostvm = false; + else if (dc->debug.dml_hostvm_override == DML_HOSTVM_OVERRIDE_TRUE) + pipes[i].pipe.src.hostvm = true; + } + return pipe_cnt; +} + +int dcn31_populate_dml_pipes_from_context( + struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + bool fast_validate) +{ + int i, pipe_cnt; + struct resource_context *res_ctx = &context->res_ctx; + struct pipe_ctx *pipe; + bool upscaled = false; + + DC_FP_START(); + dcn31x_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); + DC_FP_END(); + + for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { + struct dc_crtc_timing *timing; + + if (!res_ctx->pipe_ctx[i].stream) + continue; + pipe = &res_ctx->pipe_ctx[i]; + timing = &pipe->stream->timing; + if (pipe->plane_state && + (pipe->plane_state->src_rect.height < pipe->plane_state->dst_rect.height || + pipe->plane_state->src_rect.width < pipe->plane_state->dst_rect.width)) + upscaled = true; + + /* + * Immediate flip can be set dynamically after enabling the plane. + * We need to require support for immediate flip or underflow can be + * intermittently experienced depending on peak b/w requirements. + */ + pipes[pipe_cnt].pipe.src.immediate_flip = true; + pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; + pipes[pipe_cnt].pipe.src.gpuvm = true; + pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch; + pipes[pipe_cnt].pipe.src.dcc_rate = 3; + pipes[pipe_cnt].dout.dsc_input_bpc = 0; + DC_FP_START(); + dcn31_zero_pipe_dcc_fraction(pipes, pipe_cnt); + DC_FP_END(); + + + if (pipes[pipe_cnt].dout.dsc_enable) { + switch (timing->display_color_depth) { + case COLOR_DEPTH_888: + pipes[pipe_cnt].dout.dsc_input_bpc = 8; + break; + case COLOR_DEPTH_101010: + pipes[pipe_cnt].dout.dsc_input_bpc = 10; + break; + case COLOR_DEPTH_121212: + pipes[pipe_cnt].dout.dsc_input_bpc = 12; + break; + default: + ASSERT(0); + break; + } + } + + pipe_cnt++; + } + context->bw_ctx.dml.ip.det_buffer_size_kbytes = DCN3_1_DEFAULT_DET_SIZE; + dc->config.enable_4to1MPC = false; + if (pipe_cnt == 1 && pipe->plane_state && !dc->debug.disable_z9_mpc) { + if (is_dual_plane(pipe->plane_state->format) + && pipe->plane_state->src_rect.width <= 1920 && pipe->plane_state->src_rect.height <= 1080) { + dc->config.enable_4to1MPC = true; + } else if (!is_dual_plane(pipe->plane_state->format) && pipe->plane_state->src_rect.width <= 5120) { + /* Limit to 5k max to avoid forced pipe split when there is not enough detile for swath */ + context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192; + pipes[0].pipe.src.unbounded_req_mode = true; + } + } else if (context->stream_count >= dc->debug.crb_alloc_policy_min_disp_count + && dc->debug.crb_alloc_policy > DET_SIZE_DEFAULT) { + context->bw_ctx.dml.ip.det_buffer_size_kbytes = dc->debug.crb_alloc_policy * 64; + } else if (context->stream_count >= 3 && upscaled) { + context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192; + } + + return pipe_cnt; +} + +void dcn31_calculate_wm_and_dlg( + struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel) +{ + DC_FP_START(); + dcn31_calculate_wm_and_dlg_fp(dc, context, pipes, pipe_cnt, vlevel); + DC_FP_END(); +} + +void +dcn31_populate_dml_writeback_from_context(struct dc *dc, + struct resource_context *res_ctx, + display_e2e_pipe_params_st *pipes) +{ + DC_FP_START(); + dcn30_populate_dml_writeback_from_context(dc, res_ctx, pipes); + DC_FP_END(); +} + +void +dcn31_set_mcif_arb_params(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt) +{ + DC_FP_START(); + dcn30_set_mcif_arb_params(dc, context, pipes, pipe_cnt); + DC_FP_END(); +} + +bool dcn31_validate_bandwidth(struct dc *dc, + struct dc_state *context, + bool fast_validate) +{ + bool out = false; + + BW_VAL_TRACE_SETUP(); + + int vlevel = 0; + int pipe_cnt = 0; + display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_KERNEL); + DC_LOGGER_INIT(dc->ctx->logger); + + BW_VAL_TRACE_COUNT(); + + DC_FP_START(); + out = dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate, true); + DC_FP_END(); + + // Disable fast_validate to set min dcfclk in alculate_wm_and_dlg + if (pipe_cnt == 0) + fast_validate = false; + + if (!out) + goto validate_fail; + + BW_VAL_TRACE_END_VOLTAGE_LEVEL(); + + if (fast_validate) { + BW_VAL_TRACE_SKIP(fast); + goto validate_out; + } + if (dc->res_pool->funcs->calculate_wm_and_dlg) + dc->res_pool->funcs->calculate_wm_and_dlg(dc, context, pipes, pipe_cnt, vlevel); + + BW_VAL_TRACE_END_WATERMARKS(); + + goto validate_out; + +validate_fail: + DC_LOG_WARNING("Mode Validation Warning: %s failed validation.\n", + dml_get_status_message(context->bw_ctx.dml.vba.ValidationStatus[context->bw_ctx.dml.vba.soc.num_states])); + + BW_VAL_TRACE_SKIP(fail); + out = false; + +validate_out: + kfree(pipes); + + BW_VAL_TRACE_FINISH(); + + return out; +} + +static void dcn31_get_panel_config_defaults(struct dc_panel_config *panel_config) +{ + *panel_config = panel_config_defaults; +} + +static struct dc_cap_funcs cap_funcs = { + .get_dcc_compression_cap = dcn20_get_dcc_compression_cap +}; + +static struct resource_funcs dcn31_res_pool_funcs = { + .destroy = dcn31_destroy_resource_pool, + .link_enc_create = dcn31_link_encoder_create, + .link_enc_create_minimal = dcn31_link_enc_create_minimal, + .link_encs_assign = link_enc_cfg_link_encs_assign, + .link_enc_unassign = link_enc_cfg_link_enc_unassign, + .panel_cntl_create = dcn31_panel_cntl_create, + .validate_bandwidth = dcn31_validate_bandwidth, + .calculate_wm_and_dlg = dcn31_calculate_wm_and_dlg, + .update_soc_for_wm_a = dcn31_update_soc_for_wm_a, + .populate_dml_pipes = dcn31_populate_dml_pipes_from_context, + .acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer, + .release_pipe = dcn20_release_pipe, + .add_stream_to_ctx = dcn30_add_stream_to_ctx, + .add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource, + .remove_stream_from_ctx = dcn20_remove_stream_from_ctx, + .populate_dml_writeback_from_context = dcn31_populate_dml_writeback_from_context, + .set_mcif_arb_params = dcn31_set_mcif_arb_params, + .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link, + .acquire_post_bldn_3dlut = dcn30_acquire_post_bldn_3dlut, + .release_post_bldn_3dlut = dcn30_release_post_bldn_3dlut, + .update_bw_bounding_box = dcn31_update_bw_bounding_box, + .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, + .get_panel_config_defaults = dcn31_get_panel_config_defaults, +}; + +static struct clock_source *dcn30_clock_source_create( + struct dc_context *ctx, + struct dc_bios *bios, + enum clock_source_id id, + const struct dce110_clk_src_regs *regs, + bool dp_clk_src) +{ + struct dce110_clk_src *clk_src = + kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL); + + if (!clk_src) + return NULL; + + if (dcn31_clk_src_construct(clk_src, ctx, bios, id, + regs, &cs_shift, &cs_mask)) { + clk_src->base.dp_clk_src = dp_clk_src; + return &clk_src->base; + } + + BREAK_TO_DEBUGGER(); + return NULL; +} + +static bool dcn31_resource_construct( + uint8_t num_virtual_links, + struct dc *dc, + struct dcn31_resource_pool *pool) +{ + int i; + struct dc_context *ctx = dc->ctx; + struct irq_service_init_data init_data; + + ctx->dc_bios->regs = &bios_regs; + + pool->base.res_cap = &res_cap_dcn31; + + pool->base.funcs = &dcn31_res_pool_funcs; + + /************************************************* + * Resource + asic cap harcoding * + *************************************************/ + pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE; + pool->base.pipe_count = pool->base.res_cap->num_timing_generator; + pool->base.mpcc_count = pool->base.res_cap->num_timing_generator; + dc->caps.max_downscale_ratio = 600; + dc->caps.i2c_speed_in_khz = 100; + dc->caps.i2c_speed_in_khz_hdcp = 5; /*1.4 w/a applied by default*/ + dc->caps.max_cursor_size = 256; + dc->caps.min_horizontal_blanking_period = 80; + dc->caps.dmdata_alloc_size = 2048; + + dc->caps.max_slave_planes = 2; + dc->caps.max_slave_yuv_planes = 2; + dc->caps.max_slave_rgb_planes = 2; + dc->caps.post_blend_color_processing = true; + dc->caps.force_dp_tps4_for_cp2520 = true; + if (dc->config.forceHBR2CP2520) + dc->caps.force_dp_tps4_for_cp2520 = false; + dc->caps.dp_hpo = true; + dc->caps.dp_hdmi21_pcon_support = true; + dc->caps.edp_dsc_support = true; + dc->caps.extended_aux_timeout_support = true; + dc->caps.dmcub_support = true; + dc->caps.is_apu = true; + dc->caps.zstate_support = true; + + /* Color pipeline capabilities */ + dc->caps.color.dpp.dcn_arch = 1; + dc->caps.color.dpp.input_lut_shared = 0; + dc->caps.color.dpp.icsc = 1; + dc->caps.color.dpp.dgam_ram = 0; // must use gamma_corr + dc->caps.color.dpp.dgam_rom_caps.srgb = 1; + dc->caps.color.dpp.dgam_rom_caps.bt2020 = 1; + dc->caps.color.dpp.dgam_rom_caps.gamma2_2 = 1; + dc->caps.color.dpp.dgam_rom_caps.pq = 1; + dc->caps.color.dpp.dgam_rom_caps.hlg = 1; + dc->caps.color.dpp.post_csc = 1; + dc->caps.color.dpp.gamma_corr = 1; + dc->caps.color.dpp.dgam_rom_for_yuv = 0; + + dc->caps.color.dpp.hw_3d_lut = 1; + dc->caps.color.dpp.ogam_ram = 1; + // no OGAM ROM on DCN301 + dc->caps.color.dpp.ogam_rom_caps.srgb = 0; + dc->caps.color.dpp.ogam_rom_caps.bt2020 = 0; + dc->caps.color.dpp.ogam_rom_caps.gamma2_2 = 0; + dc->caps.color.dpp.ogam_rom_caps.pq = 0; + dc->caps.color.dpp.ogam_rom_caps.hlg = 0; + dc->caps.color.dpp.ocsc = 0; + + dc->caps.color.mpc.gamut_remap = 1; + dc->caps.color.mpc.num_3dluts = pool->base.res_cap->num_mpc_3dlut; //2 + dc->caps.color.mpc.ogam_ram = 1; + dc->caps.color.mpc.ogam_rom_caps.srgb = 0; + dc->caps.color.mpc.ogam_rom_caps.bt2020 = 0; + dc->caps.color.mpc.ogam_rom_caps.gamma2_2 = 0; + dc->caps.color.mpc.ogam_rom_caps.pq = 0; + dc->caps.color.mpc.ogam_rom_caps.hlg = 0; + dc->caps.color.mpc.ocsc = 1; + + dc->config.use_old_fixed_vs_sequence = true; + + /* Use pipe context based otg sync logic */ + dc->config.use_pipe_ctx_sync_logic = true; + + /* read VBIOS LTTPR caps */ + { + if (ctx->dc_bios->funcs->get_lttpr_caps) { + enum bp_result bp_query_result; + uint8_t is_vbios_lttpr_enable = 0; + + bp_query_result = ctx->dc_bios->funcs->get_lttpr_caps(ctx->dc_bios, &is_vbios_lttpr_enable); + dc->caps.vbios_lttpr_enable = (bp_query_result == BP_RESULT_OK) && !!is_vbios_lttpr_enable; + } + + /* interop bit is implicit */ + { + dc->caps.vbios_lttpr_aware = true; + } + } + + if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) + dc->debug = debug_defaults_drv; + + // Init the vm_helper + if (dc->vm_helper) + vm_helper_init(dc->vm_helper, 16); + + /************************************************* + * Create resources * + *************************************************/ + + /* Clock Sources for Pixel Clock*/ + pool->base.clock_sources[DCN31_CLK_SRC_PLL0] = + dcn30_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL0, + &clk_src_regs[0], false); + pool->base.clock_sources[DCN31_CLK_SRC_PLL1] = + dcn30_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL1, + &clk_src_regs[1], false); + /*move phypllx_pixclk_resync to dmub next*/ + if (dc->ctx->asic_id.hw_internal_rev == YELLOW_CARP_B0) { + pool->base.clock_sources[DCN31_CLK_SRC_PLL2] = + dcn30_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL2, + &clk_src_regs_b0[2], false); + pool->base.clock_sources[DCN31_CLK_SRC_PLL3] = + dcn30_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL3, + &clk_src_regs_b0[3], false); + } else { + pool->base.clock_sources[DCN31_CLK_SRC_PLL2] = + dcn30_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL2, + &clk_src_regs[2], false); + pool->base.clock_sources[DCN31_CLK_SRC_PLL3] = + dcn30_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL3, + &clk_src_regs[3], false); + } + + pool->base.clock_sources[DCN31_CLK_SRC_PLL4] = + dcn30_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL4, + &clk_src_regs[4], false); + + pool->base.clk_src_count = DCN30_CLK_SRC_TOTAL; + + /* todo: not reuse phy_pll registers */ + pool->base.dp_clock_source = + dcn31_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_ID_DP_DTO, + &clk_src_regs[0], true); + + for (i = 0; i < pool->base.clk_src_count; i++) { + if (pool->base.clock_sources[i] == NULL) { + dm_error("DC: failed to create clock sources!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + } + + /* TODO: DCCG */ + pool->base.dccg = dccg31_create(ctx, &dccg_regs, &dccg_shift, &dccg_mask); + if (pool->base.dccg == NULL) { + dm_error("DC: failed to create dccg!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + + /* TODO: IRQ */ + init_data.ctx = dc->ctx; + pool->base.irqs = dal_irq_service_dcn31_create(&init_data); + if (!pool->base.irqs) + goto create_fail; + + /* HUBBUB */ + pool->base.hubbub = dcn31_hubbub_create(ctx); + if (pool->base.hubbub == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create hubbub!\n"); + goto create_fail; + } + + /* HUBPs, DPPs, OPPs and TGs */ + for (i = 0; i < pool->base.pipe_count; i++) { + pool->base.hubps[i] = dcn31_hubp_create(ctx, i); + if (pool->base.hubps[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create hubps!\n"); + goto create_fail; + } + + pool->base.dpps[i] = dcn31_dpp_create(ctx, i); + if (pool->base.dpps[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create dpps!\n"); + goto create_fail; + } + } + + for (i = 0; i < pool->base.res_cap->num_opp; i++) { + pool->base.opps[i] = dcn31_opp_create(ctx, i); + if (pool->base.opps[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create output pixel processor!\n"); + goto create_fail; + } + } + + for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { + pool->base.timing_generators[i] = dcn31_timing_generator_create( + ctx, i); + if (pool->base.timing_generators[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create tg!\n"); + goto create_fail; + } + } + pool->base.timing_generator_count = i; + + /* PSR */ + pool->base.psr = dmub_psr_create(ctx); + if (pool->base.psr == NULL) { + dm_error("DC: failed to create psr obj!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + + /* Replay */ + pool->base.replay = dmub_replay_create(ctx); + if (pool->base.replay == NULL) { + dm_error("DC: failed to create replay obj!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + + /* ABM */ + for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { + pool->base.multiple_abms[i] = dmub_abm_create(ctx, + &abm_regs[i], + &abm_shift, + &abm_mask); + if (pool->base.multiple_abms[i] == NULL) { + dm_error("DC: failed to create abm for pipe %d!\n", i); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + } + + /* MPC and DSC */ + pool->base.mpc = dcn31_mpc_create(ctx, pool->base.mpcc_count, pool->base.res_cap->num_mpc_3dlut); + if (pool->base.mpc == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create mpc!\n"); + goto create_fail; + } + + for (i = 0; i < pool->base.res_cap->num_dsc; i++) { + pool->base.dscs[i] = dcn31_dsc_create(ctx, i); + if (pool->base.dscs[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create display stream compressor %d!\n", i); + goto create_fail; + } + } + + /* DWB and MMHUBBUB */ + if (!dcn31_dwbc_create(ctx, &pool->base)) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create dwbc!\n"); + goto create_fail; + } + + if (!dcn31_mmhubbub_create(ctx, &pool->base)) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create mcif_wb!\n"); + goto create_fail; + } + + /* AUX and I2C */ + for (i = 0; i < pool->base.res_cap->num_ddc; i++) { + pool->base.engines[i] = dcn31_aux_engine_create(ctx, i); + if (pool->base.engines[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC:failed to create aux engine!!\n"); + goto create_fail; + } + pool->base.hw_i2cs[i] = dcn31_i2c_hw_create(ctx, i); + if (pool->base.hw_i2cs[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC:failed to create hw i2c!!\n"); + goto create_fail; + } + pool->base.sw_i2cs[i] = NULL; + } + + if (dc->ctx->asic_id.chip_family == FAMILY_YELLOW_CARP && + dc->ctx->asic_id.hw_internal_rev == YELLOW_CARP_B0 && + !dc->debug.dpia_debug.bits.disable_dpia) { + /* YELLOW CARP B0 has 4 DPIA's */ + pool->base.usb4_dpia_count = 4; + } + + if (dc->ctx->asic_id.chip_family == AMDGPU_FAMILY_GC_11_0_1) + pool->base.usb4_dpia_count = 4; + + /* Audio, Stream Encoders including HPO and virtual, MPC 3D LUTs */ + if (!resource_construct(num_virtual_links, dc, &pool->base, + &res_create_funcs)) + goto create_fail; + + /* HW Sequencer and Plane caps */ + dcn31_hw_sequencer_construct(dc); + + dc->caps.max_planes = pool->base.pipe_count; + + for (i = 0; i < dc->caps.max_planes; ++i) + dc->caps.planes[i] = plane_cap; + + dc->cap_funcs = cap_funcs; + + dc->dcn_ip->max_num_dpp = dcn3_1_ip.max_num_dpp; + + return true; + +create_fail: + dcn31_resource_destruct(pool); + + return false; +} + +struct resource_pool *dcn31_create_resource_pool( + const struct dc_init_data *init_data, + struct dc *dc) +{ + struct dcn31_resource_pool *pool = + kzalloc(sizeof(struct dcn31_resource_pool), GFP_KERNEL); + + if (!pool) + return NULL; + + if (dcn31_resource_construct(init_data->num_virtual_links, dc, pool)) + return &pool->base; + + BREAK_TO_DEBUGGER(); + kfree(pool); + return NULL; +} diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h new file mode 100644 index 000000000000..901436591ed4 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h @@ -0,0 +1,97 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef _DCN31_RESOURCE_H_ +#define _DCN31_RESOURCE_H_ + +#include "core_types.h" + +#define TO_DCN31_RES_POOL(pool)\ + container_of(pool, struct dcn31_resource_pool, base) + +extern struct _vcs_dpi_ip_params_st dcn3_1_ip; + +struct dcn31_resource_pool { + struct resource_pool base; +}; + +bool dcn31_validate_bandwidth(struct dc *dc, + struct dc_state *context, + bool fast_validate); +void dcn31_calculate_wm_and_dlg( + struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel); +int dcn31_populate_dml_pipes_from_context( + struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + bool fast_validate); +void +dcn31_populate_dml_writeback_from_context(struct dc *dc, + struct resource_context *res_ctx, + display_e2e_pipe_params_st *pipes); +void +dcn31_set_mcif_arb_params(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt); + +struct resource_pool *dcn31_create_resource_pool( + const struct dc_init_data *init_data, + struct dc *dc); + +/*temp: B0 specific before switch to dcn313 headers*/ +#ifndef regPHYPLLF_PIXCLK_RESYNC_CNTL +#define regPHYPLLF_PIXCLK_RESYNC_CNTL 0x007e +#define regPHYPLLF_PIXCLK_RESYNC_CNTL_BASE_IDX 1 +#define regPHYPLLG_PIXCLK_RESYNC_CNTL 0x005f +#define regPHYPLLG_PIXCLK_RESYNC_CNTL_BASE_IDX 1 + +//PHYPLLF_PIXCLK_RESYNC_CNTL +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_PIXCLK_RESYNC_ENABLE__SHIFT 0x0 +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_DEEP_COLOR_DTO_ENABLE_STATUS__SHIFT 0x1 +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_DCCG_DEEP_COLOR_CNTL__SHIFT 0x4 +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_PIXCLK_ENABLE__SHIFT 0x8 +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_PIXCLK_DOUBLE_RATE_ENABLE__SHIFT 0x9 +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_PIXCLK_RESYNC_ENABLE_MASK 0x00000001L +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_DEEP_COLOR_DTO_ENABLE_STATUS_MASK 0x00000002L +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_DCCG_DEEP_COLOR_CNTL_MASK 0x00000030L +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_PIXCLK_ENABLE_MASK 0x00000100L +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_PIXCLK_DOUBLE_RATE_ENABLE_MASK 0x00000200L + +//PHYPLLG_PIXCLK_RESYNC_CNTL +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_PIXCLK_RESYNC_ENABLE__SHIFT 0x0 +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_DEEP_COLOR_DTO_ENABLE_STATUS__SHIFT 0x1 +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_DCCG_DEEP_COLOR_CNTL__SHIFT 0x4 +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_PIXCLK_ENABLE__SHIFT 0x8 +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_PIXCLK_DOUBLE_RATE_ENABLE__SHIFT 0x9 +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_PIXCLK_RESYNC_ENABLE_MASK 0x00000001L +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_DEEP_COLOR_DTO_ENABLE_STATUS_MASK 0x00000002L +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_DCCG_DEEP_COLOR_CNTL_MASK 0x00000030L +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_PIXCLK_ENABLE_MASK 0x00000100L +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_PIXCLK_DOUBLE_RATE_ENABLE_MASK 0x00000200L +#endif +#endif /* _DCN31_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c new file mode 100644 index 000000000000..677361d74a4e --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c @@ -0,0 +1,2180 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + + +#include "dm_services.h" +#include "dc.h" + +#include "dcn31/dcn31_init.h" +#include "dcn314/dcn314_init.h" + +#include "resource.h" +#include "include/irq_service_interface.h" +#include "dcn314_resource.h" + +#include "dcn20/dcn20_resource.h" +#include "dcn30/dcn30_resource.h" +#include "dcn31/dcn31_resource.h" + +#include "dcn10/dcn10_ipp.h" +#include "dcn30/dcn30_hubbub.h" +#include "dcn31/dcn31_hubbub.h" +#include "dcn30/dcn30_mpc.h" +#include "dcn31/dcn31_hubp.h" +#include "irq/dcn31/irq_service_dcn31.h" +#include "irq/dcn314/irq_service_dcn314.h" +#include "dcn30/dcn30_dpp.h" +#include "dcn314/dcn314_optc.h" +#include "dcn20/dcn20_hwseq.h" +#include "dcn30/dcn30_hwseq.h" +#include "dce110/dce110_hwseq.h" +#include "dcn30/dcn30_opp.h" +#include "dcn20/dcn20_dsc.h" +#include "dcn30/dcn30_vpg.h" +#include "dcn30/dcn30_afmt.h" +#include "dcn31/dcn31_dio_link_encoder.h" +#include "dcn314/dcn314_dio_stream_encoder.h" +#include "dcn31/dcn31_hpo_dp_stream_encoder.h" +#include "dcn31/dcn31_hpo_dp_link_encoder.h" +#include "dcn31/dcn31_apg.h" +#include "dcn31/dcn31_vpg.h" +#include "dcn31/dcn31_afmt.h" +#include "dce/dce_clock_source.h" +#include "dce/dce_audio.h" +#include "dce/dce_hwseq.h" +#include "clk_mgr.h" +#include "virtual/virtual_stream_encoder.h" +#include "dce110/dce110_resource.h" +#include "dml/display_mode_vba.h" +#include "dml/dcn31/dcn31_fpu.h" +#include "dml/dcn314/dcn314_fpu.h" +#include "dcn314/dcn314_dccg.h" +#include "dcn10/dcn10_resource.h" +#include "dcn31/dcn31_panel_cntl.h" +#include "dcn314/dcn314_hwseq.h" + +#include "dcn30/dcn30_dwb.h" +#include "dcn30/dcn30_mmhubbub.h" + +#include "dcn/dcn_3_1_4_offset.h" +#include "dcn/dcn_3_1_4_sh_mask.h" +#include "dpcs/dpcs_3_1_4_offset.h" +#include "dpcs/dpcs_3_1_4_sh_mask.h" + +#define DCHUBBUB_DEBUG_CTRL_0__DET_DEPTH__SHIFT 0x10 +#define DCHUBBUB_DEBUG_CTRL_0__DET_DEPTH_MASK 0x01FF0000L + +#define DSCC0_DSCC_CONFIG0__ICH_RESET_AT_END_OF_LINE__SHIFT 0x0 +#define DSCC0_DSCC_CONFIG0__ICH_RESET_AT_END_OF_LINE_MASK 0x0000000FL + +#include "reg_helper.h" +#include "dce/dmub_abm.h" +#include "dce/dmub_psr.h" +#include "dce/dmub_replay.h" +#include "dce/dce_aux.h" +#include "dce/dce_i2c.h" +#include "dml/dcn314/display_mode_vba_314.h" +#include "vm_helper.h" +#include "dcn20/dcn20_vmid.h" + +#include "link_enc_cfg.h" + +#define DCN_BASE__INST0_SEG1 0x000000C0 +#define DCN_BASE__INST0_SEG2 0x000034C0 +#define DCN_BASE__INST0_SEG3 0x00009000 + +#define NBIO_BASE__INST0_SEG1 0x00000014 + +#define MAX_INSTANCE 7 +#define MAX_SEGMENT 8 + +#define regBIF_BX2_BIOS_SCRATCH_2 0x003a +#define regBIF_BX2_BIOS_SCRATCH_2_BASE_IDX 1 +#define regBIF_BX2_BIOS_SCRATCH_3 0x003b +#define regBIF_BX2_BIOS_SCRATCH_3_BASE_IDX 1 +#define regBIF_BX2_BIOS_SCRATCH_6 0x003e +#define regBIF_BX2_BIOS_SCRATCH_6_BASE_IDX 1 + +#define DC_LOGGER \ + dc->ctx->logger +#define DC_LOGGER_INIT(logger) + +enum dcn31_clk_src_array_id { + DCN31_CLK_SRC_PLL0, + DCN31_CLK_SRC_PLL1, + DCN31_CLK_SRC_PLL2, + DCN31_CLK_SRC_PLL3, + DCN31_CLK_SRC_PLL4, + DCN30_CLK_SRC_TOTAL +}; + +/* begin ********************* + * macros to expend register list macro defined in HW object header file + */ + +/* DCN */ +/* TODO awful hack. fixup dcn20_dwb.h */ +#undef BASE_INNER +#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg + +#define BASE(seg) BASE_INNER(seg) + +#define SR(reg_name)\ + .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ + reg ## reg_name + +#define SRI(reg_name, block, id)\ + .reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SRI2(reg_name, block, id)\ + .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ + reg ## reg_name + +#define SRIR(var_name, reg_name, block, id)\ + .var_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SRII(reg_name, block, id)\ + .reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SRII_MPC_RMU(reg_name, block, id)\ + .RMU##_##reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SRII_DWB(reg_name, temp_name, block, id)\ + .reg_name[id] = BASE(reg ## block ## id ## _ ## temp_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## temp_name + +#define SF_DWB2(reg_name, block, id, field_name, post_fix) \ + .field_name = reg_name ## __ ## field_name ## post_fix + +#define DCCG_SRII(reg_name, block, id)\ + .block ## _ ## reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define VUPDATE_SRII(reg_name, block, id)\ + .reg_name[id] = BASE(reg ## reg_name ## _ ## block ## id ## _BASE_IDX) + \ + reg ## reg_name ## _ ## block ## id + +/* NBIO */ +#define NBIO_BASE_INNER(seg) \ + NBIO_BASE__INST0_SEG ## seg + +#define NBIO_BASE(seg) \ + NBIO_BASE_INNER(seg) + +#define NBIO_SR(reg_name)\ + .reg_name = NBIO_BASE(regBIF_BX2_ ## reg_name ## _BASE_IDX) + \ + regBIF_BX2_ ## reg_name + +/* MMHUB */ +#define MMHUB_BASE_INNER(seg) \ + MMHUB_BASE__INST0_SEG ## seg + +#define MMHUB_BASE(seg) \ + MMHUB_BASE_INNER(seg) + +#define MMHUB_SR(reg_name)\ + .reg_name = MMHUB_BASE(reg ## reg_name ## _BASE_IDX) + \ + reg ## reg_name + +/* CLOCK */ +#define CLK_BASE_INNER(seg) \ + CLK_BASE__INST0_SEG ## seg + +#define CLK_BASE(seg) \ + CLK_BASE_INNER(seg) + +#define CLK_SRI(reg_name, block, inst)\ + .reg_name = CLK_BASE(reg ## block ## _ ## inst ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## _ ## inst ## _ ## reg_name + + +static const struct bios_registers bios_regs = { + NBIO_SR(BIOS_SCRATCH_3), + NBIO_SR(BIOS_SCRATCH_6) +}; + +#define clk_src_regs(index, pllid)\ +[index] = {\ + CS_COMMON_REG_LIST_DCN3_0(index, pllid),\ +} + +static const struct dce110_clk_src_regs clk_src_regs[] = { + clk_src_regs(0, A), + clk_src_regs(1, B), + clk_src_regs(2, C), + clk_src_regs(3, D), + clk_src_regs(4, E) +}; + +static const struct dce110_clk_src_shift cs_shift = { + CS_COMMON_MASK_SH_LIST_DCN3_1_4(__SHIFT) +}; + +static const struct dce110_clk_src_mask cs_mask = { + CS_COMMON_MASK_SH_LIST_DCN3_1_4(_MASK) +}; + +#define abm_regs(id)\ +[id] = {\ + ABM_DCN302_REG_LIST(id)\ +} + +static const struct dce_abm_registers abm_regs[] = { + abm_regs(0), + abm_regs(1), + abm_regs(2), + abm_regs(3), +}; + +static const struct dce_abm_shift abm_shift = { + ABM_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dce_abm_mask abm_mask = { + ABM_MASK_SH_LIST_DCN30(_MASK) +}; + +#define audio_regs(id)\ +[id] = {\ + AUD_COMMON_REG_LIST(id)\ +} + +static const struct dce_audio_registers audio_regs[] = { + audio_regs(0), + audio_regs(1), + audio_regs(2), + audio_regs(3), + audio_regs(4), + audio_regs(5), + audio_regs(6) +}; + +#define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\ + SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_INDEX, AZALIA_ENDPOINT_REG_INDEX, mask_sh),\ + SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_DATA, AZALIA_ENDPOINT_REG_DATA, mask_sh),\ + AUD_COMMON_MASK_SH_LIST_BASE(mask_sh) + +static const struct dce_audio_shift audio_shift = { + DCE120_AUD_COMMON_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_audio_mask audio_mask = { + DCE120_AUD_COMMON_MASK_SH_LIST(_MASK) +}; + +#define vpg_regs(id)\ +[id] = {\ + VPG_DCN31_REG_LIST(id)\ +} + +static const struct dcn31_vpg_registers vpg_regs[] = { + vpg_regs(0), + vpg_regs(1), + vpg_regs(2), + vpg_regs(3), + vpg_regs(4), + vpg_regs(5), + vpg_regs(6), + vpg_regs(7), + vpg_regs(8), + vpg_regs(9), +}; + +static const struct dcn31_vpg_shift vpg_shift = { + DCN31_VPG_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn31_vpg_mask vpg_mask = { + DCN31_VPG_MASK_SH_LIST(_MASK) +}; + +#define afmt_regs(id)\ +[id] = {\ + AFMT_DCN31_REG_LIST(id)\ +} + +static const struct dcn31_afmt_registers afmt_regs[] = { + afmt_regs(0), + afmt_regs(1), + afmt_regs(2), + afmt_regs(3), + afmt_regs(4), + afmt_regs(5) +}; + +static const struct dcn31_afmt_shift afmt_shift = { + DCN31_AFMT_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn31_afmt_mask afmt_mask = { + DCN31_AFMT_MASK_SH_LIST(_MASK) +}; + +#define apg_regs(id)\ +[id] = {\ + APG_DCN31_REG_LIST(id)\ +} + +static const struct dcn31_apg_registers apg_regs[] = { + apg_regs(0), + apg_regs(1), + apg_regs(2), + apg_regs(3) +}; + +static const struct dcn31_apg_shift apg_shift = { + DCN31_APG_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn31_apg_mask apg_mask = { + DCN31_APG_MASK_SH_LIST(_MASK) +}; + +#define stream_enc_regs(id)\ +[id] = {\ + SE_DCN314_REG_LIST(id)\ +} + +static const struct dcn10_stream_enc_registers stream_enc_regs[] = { + stream_enc_regs(0), + stream_enc_regs(1), + stream_enc_regs(2), + stream_enc_regs(3), + stream_enc_regs(4) +}; + +static const struct dcn10_stream_encoder_shift se_shift = { + SE_COMMON_MASK_SH_LIST_DCN314(__SHIFT) +}; + +static const struct dcn10_stream_encoder_mask se_mask = { + SE_COMMON_MASK_SH_LIST_DCN314(_MASK) +}; + + +#define aux_regs(id)\ +[id] = {\ + DCN2_AUX_REG_LIST(id)\ +} + +static const struct dcn10_link_enc_aux_registers link_enc_aux_regs[] = { + aux_regs(0), + aux_regs(1), + aux_regs(2), + aux_regs(3), + aux_regs(4) +}; + +#define hpd_regs(id)\ +[id] = {\ + HPD_REG_LIST(id)\ +} + +static const struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[] = { + hpd_regs(0), + hpd_regs(1), + hpd_regs(2), + hpd_regs(3), + hpd_regs(4) +}; + +#define link_regs(id, phyid)\ +[id] = {\ + LE_DCN31_REG_LIST(id), \ + UNIPHY_DCN2_REG_LIST(phyid), \ +} + +static const struct dce110_aux_registers_shift aux_shift = { + DCN_AUX_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce110_aux_registers_mask aux_mask = { + DCN_AUX_MASK_SH_LIST(_MASK) +}; + +static const struct dcn10_link_enc_registers link_enc_regs[] = { + link_regs(0, A), + link_regs(1, B), + link_regs(2, C), + link_regs(3, D), + link_regs(4, E) +}; + +static const struct dcn10_link_enc_shift le_shift = { + LINK_ENCODER_MASK_SH_LIST_DCN31(__SHIFT), + DPCS_DCN31_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn10_link_enc_mask le_mask = { + LINK_ENCODER_MASK_SH_LIST_DCN31(_MASK), + DPCS_DCN31_MASK_SH_LIST(_MASK) +}; + +#define hpo_dp_stream_encoder_reg_list(id)\ +[id] = {\ + DCN3_1_HPO_DP_STREAM_ENC_REG_LIST(id)\ +} + +static const struct dcn31_hpo_dp_stream_encoder_registers hpo_dp_stream_enc_regs[] = { + hpo_dp_stream_encoder_reg_list(0), + hpo_dp_stream_encoder_reg_list(1), + hpo_dp_stream_encoder_reg_list(2), + hpo_dp_stream_encoder_reg_list(3) +}; + +static const struct dcn31_hpo_dp_stream_encoder_shift hpo_dp_se_shift = { + DCN3_1_HPO_DP_STREAM_ENC_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn31_hpo_dp_stream_encoder_mask hpo_dp_se_mask = { + DCN3_1_HPO_DP_STREAM_ENC_MASK_SH_LIST(_MASK) +}; + + +#define hpo_dp_link_encoder_reg_list(id)\ +[id] = {\ + DCN3_1_HPO_DP_LINK_ENC_REG_LIST(id),\ + DCN3_1_RDPCSTX_REG_LIST(0),\ + DCN3_1_RDPCSTX_REG_LIST(1),\ + DCN3_1_RDPCSTX_REG_LIST(2),\ +} + +static const struct dcn31_hpo_dp_link_encoder_registers hpo_dp_link_enc_regs[] = { + hpo_dp_link_encoder_reg_list(0), + hpo_dp_link_encoder_reg_list(1), +}; + +static const struct dcn31_hpo_dp_link_encoder_shift hpo_dp_le_shift = { + DCN3_1_HPO_DP_LINK_ENC_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn31_hpo_dp_link_encoder_mask hpo_dp_le_mask = { + DCN3_1_HPO_DP_LINK_ENC_MASK_SH_LIST(_MASK) +}; + +#define dpp_regs(id)\ +[id] = {\ + DPP_REG_LIST_DCN30(id),\ +} + +static const struct dcn3_dpp_registers dpp_regs[] = { + dpp_regs(0), + dpp_regs(1), + dpp_regs(2), + dpp_regs(3) +}; + +static const struct dcn3_dpp_shift tf_shift = { + DPP_REG_LIST_SH_MASK_DCN30(__SHIFT) +}; + +static const struct dcn3_dpp_mask tf_mask = { + DPP_REG_LIST_SH_MASK_DCN30(_MASK) +}; + +#define opp_regs(id)\ +[id] = {\ + OPP_REG_LIST_DCN30(id),\ +} + +static const struct dcn20_opp_registers opp_regs[] = { + opp_regs(0), + opp_regs(1), + opp_regs(2), + opp_regs(3) +}; + +static const struct dcn20_opp_shift opp_shift = { + OPP_MASK_SH_LIST_DCN20(__SHIFT) +}; + +static const struct dcn20_opp_mask opp_mask = { + OPP_MASK_SH_LIST_DCN20(_MASK) +}; + +#define aux_engine_regs(id)\ +[id] = {\ + AUX_COMMON_REG_LIST0(id), \ + .AUXN_IMPCAL = 0, \ + .AUXP_IMPCAL = 0, \ + .AUX_RESET_MASK = DP_AUX0_AUX_CONTROL__AUX_RESET_MASK, \ +} + +static const struct dce110_aux_registers aux_engine_regs[] = { + aux_engine_regs(0), + aux_engine_regs(1), + aux_engine_regs(2), + aux_engine_regs(3), + aux_engine_regs(4) +}; + +#define dwbc_regs_dcn3(id)\ +[id] = {\ + DWBC_COMMON_REG_LIST_DCN30(id),\ +} + +static const struct dcn30_dwbc_registers dwbc30_regs[] = { + dwbc_regs_dcn3(0), +}; + +static const struct dcn30_dwbc_shift dwbc30_shift = { + DWBC_COMMON_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dcn30_dwbc_mask dwbc30_mask = { + DWBC_COMMON_MASK_SH_LIST_DCN30(_MASK) +}; + +#define mcif_wb_regs_dcn3(id)\ +[id] = {\ + MCIF_WB_COMMON_REG_LIST_DCN30(id),\ +} + +static const struct dcn30_mmhubbub_registers mcif_wb30_regs[] = { + mcif_wb_regs_dcn3(0) +}; + +static const struct dcn30_mmhubbub_shift mcif_wb30_shift = { + MCIF_WB_COMMON_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dcn30_mmhubbub_mask mcif_wb30_mask = { + MCIF_WB_COMMON_MASK_SH_LIST_DCN30(_MASK) +}; + +#define dsc_regsDCN314(id)\ +[id] = {\ + DSC_REG_LIST_DCN20(id)\ +} + +static const struct dcn20_dsc_registers dsc_regs[] = { + dsc_regsDCN314(0), + dsc_regsDCN314(1), + dsc_regsDCN314(2), + dsc_regsDCN314(3) +}; + +static const struct dcn20_dsc_shift dsc_shift = { + DSC_REG_LIST_SH_MASK_DCN20(__SHIFT) +}; + +static const struct dcn20_dsc_mask dsc_mask = { + DSC_REG_LIST_SH_MASK_DCN20(_MASK) +}; + +static const struct dcn30_mpc_registers mpc_regs = { + MPC_REG_LIST_DCN3_0(0), + MPC_REG_LIST_DCN3_0(1), + MPC_REG_LIST_DCN3_0(2), + MPC_REG_LIST_DCN3_0(3), + MPC_OUT_MUX_REG_LIST_DCN3_0(0), + MPC_OUT_MUX_REG_LIST_DCN3_0(1), + MPC_OUT_MUX_REG_LIST_DCN3_0(2), + MPC_OUT_MUX_REG_LIST_DCN3_0(3), + MPC_RMU_GLOBAL_REG_LIST_DCN3AG, + MPC_RMU_REG_LIST_DCN3AG(0), + MPC_RMU_REG_LIST_DCN3AG(1), + //MPC_RMU_REG_LIST_DCN3AG(2), + MPC_DWB_MUX_REG_LIST_DCN3_0(0), +}; + +static const struct dcn30_mpc_shift mpc_shift = { + MPC_COMMON_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dcn30_mpc_mask mpc_mask = { + MPC_COMMON_MASK_SH_LIST_DCN30(_MASK) +}; + +#define optc_regs(id)\ +[id] = {OPTC_COMMON_REG_LIST_DCN3_14(id)} + +static const struct dcn_optc_registers optc_regs[] = { + optc_regs(0), + optc_regs(1), + optc_regs(2), + optc_regs(3) +}; + +static const struct dcn_optc_shift optc_shift = { + OPTC_COMMON_MASK_SH_LIST_DCN3_14(__SHIFT) +}; + +static const struct dcn_optc_mask optc_mask = { + OPTC_COMMON_MASK_SH_LIST_DCN3_14(_MASK) +}; + +#define hubp_regs(id)\ +[id] = {\ + HUBP_REG_LIST_DCN30(id)\ +} + +static const struct dcn_hubp2_registers hubp_regs[] = { + hubp_regs(0), + hubp_regs(1), + hubp_regs(2), + hubp_regs(3) +}; + + +static const struct dcn_hubp2_shift hubp_shift = { + HUBP_MASK_SH_LIST_DCN31(__SHIFT) +}; + +static const struct dcn_hubp2_mask hubp_mask = { + HUBP_MASK_SH_LIST_DCN31(_MASK) +}; +static const struct dcn_hubbub_registers hubbub_reg = { + HUBBUB_REG_LIST_DCN31(0) +}; + +static const struct dcn_hubbub_shift hubbub_shift = { + HUBBUB_MASK_SH_LIST_DCN31(__SHIFT) +}; + +static const struct dcn_hubbub_mask hubbub_mask = { + HUBBUB_MASK_SH_LIST_DCN31(_MASK) +}; + +static const struct dccg_registers dccg_regs = { + DCCG_REG_LIST_DCN314() +}; + +static const struct dccg_shift dccg_shift = { + DCCG_MASK_SH_LIST_DCN314(__SHIFT) +}; + +static const struct dccg_mask dccg_mask = { + DCCG_MASK_SH_LIST_DCN314(_MASK) +}; + + +#define SRII2(reg_name_pre, reg_name_post, id)\ + .reg_name_pre ## _ ## reg_name_post[id] = BASE(reg ## reg_name_pre \ + ## id ## _ ## reg_name_post ## _BASE_IDX) + \ + reg ## reg_name_pre ## id ## _ ## reg_name_post + + +#define HWSEQ_DCN31_REG_LIST()\ + SR(DCHUBBUB_GLOBAL_TIMER_CNTL), \ + SR(DCHUBBUB_ARB_HOSTVM_CNTL), \ + SR(DIO_MEM_PWR_CTRL), \ + SR(ODM_MEM_PWR_CTRL3), \ + SR(DMU_MEM_PWR_CNTL), \ + SR(MMHUBBUB_MEM_PWR_CNTL), \ + SR(DCCG_GATE_DISABLE_CNTL), \ + SR(DCCG_GATE_DISABLE_CNTL2), \ + SR(DCFCLK_CNTL),\ + SR(DC_MEM_GLOBAL_PWR_REQ_CNTL), \ + SRII(PIXEL_RATE_CNTL, OTG, 0), \ + SRII(PIXEL_RATE_CNTL, OTG, 1),\ + SRII(PIXEL_RATE_CNTL, OTG, 2),\ + SRII(PIXEL_RATE_CNTL, OTG, 3),\ + SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 0),\ + SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 1),\ + SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 2),\ + SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 3),\ + SR(MICROSECOND_TIME_BASE_DIV), \ + SR(MILLISECOND_TIME_BASE_DIV), \ + SR(DISPCLK_FREQ_CHANGE_CNTL), \ + SR(RBBMIF_TIMEOUT_DIS), \ + SR(RBBMIF_TIMEOUT_DIS_2), \ + SR(DCHUBBUB_CRC_CTRL), \ + SR(DPP_TOP0_DPP_CRC_CTRL), \ + SR(DPP_TOP0_DPP_CRC_VAL_B_A), \ + SR(DPP_TOP0_DPP_CRC_VAL_R_G), \ + SR(MPC_CRC_CTRL), \ + SR(MPC_CRC_RESULT_GB), \ + SR(MPC_CRC_RESULT_C), \ + SR(MPC_CRC_RESULT_AR), \ + SR(DOMAIN0_PG_CONFIG), \ + SR(DOMAIN1_PG_CONFIG), \ + SR(DOMAIN2_PG_CONFIG), \ + SR(DOMAIN3_PG_CONFIG), \ + SR(DOMAIN16_PG_CONFIG), \ + SR(DOMAIN17_PG_CONFIG), \ + SR(DOMAIN18_PG_CONFIG), \ + SR(DOMAIN19_PG_CONFIG), \ + SR(DOMAIN0_PG_STATUS), \ + SR(DOMAIN1_PG_STATUS), \ + SR(DOMAIN2_PG_STATUS), \ + SR(DOMAIN3_PG_STATUS), \ + SR(DOMAIN16_PG_STATUS), \ + SR(DOMAIN17_PG_STATUS), \ + SR(DOMAIN18_PG_STATUS), \ + SR(DOMAIN19_PG_STATUS), \ + SR(D1VGA_CONTROL), \ + SR(D2VGA_CONTROL), \ + SR(D3VGA_CONTROL), \ + SR(D4VGA_CONTROL), \ + SR(D5VGA_CONTROL), \ + SR(D6VGA_CONTROL), \ + SR(DC_IP_REQUEST_CNTL), \ + SR(AZALIA_AUDIO_DTO), \ + SR(AZALIA_CONTROLLER_CLOCK_GATING), \ + SR(HPO_TOP_HW_CONTROL) + +static const struct dce_hwseq_registers hwseq_reg = { + HWSEQ_DCN31_REG_LIST() +}; + +#define HWSEQ_DCN31_MASK_SH_LIST(mask_sh)\ + HWSEQ_DCN_MASK_SH_LIST(mask_sh), \ + HWS_SF(, DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_REFDIV, mask_sh), \ + HWS_SF(, DCHUBBUB_ARB_HOSTVM_CNTL, DISABLE_HOSTVM_FORCE_ALLOW_PSTATE, mask_sh), \ + HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN1_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN1_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN2_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN2_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN3_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN3_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN16_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN16_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN17_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN17_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN18_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN18_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN19_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN19_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN0_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN1_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN2_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN3_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN16_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN17_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN18_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN19_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DC_IP_REQUEST_CNTL, IP_REQUEST_EN, mask_sh), \ + HWS_SF(, AZALIA_AUDIO_DTO, AZALIA_AUDIO_DTO_MODULE, mask_sh), \ + HWS_SF(, HPO_TOP_CLOCK_CONTROL, HPO_HDMISTREAMCLK_G_GATE_DIS, mask_sh), \ + HWS_SF(, DMU_MEM_PWR_CNTL, DMCU_ERAM_MEM_PWR_FORCE, mask_sh), \ + HWS_SF(, ODM_MEM_PWR_CTRL3, ODM_MEM_UNASSIGNED_PWR_MODE, mask_sh), \ + HWS_SF(, ODM_MEM_PWR_CTRL3, ODM_MEM_VBLANK_PWR_MODE, mask_sh), \ + HWS_SF(, MMHUBBUB_MEM_PWR_CNTL, VGA_MEM_PWR_FORCE, mask_sh), \ + HWS_SF(, DIO_MEM_PWR_CTRL, I2C_LIGHT_SLEEP_FORCE, mask_sh), \ + HWS_SF(, HPO_TOP_HW_CONTROL, HPO_IO_EN, mask_sh) + +static const struct dce_hwseq_shift hwseq_shift = { + HWSEQ_DCN31_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_hwseq_mask hwseq_mask = { + HWSEQ_DCN31_MASK_SH_LIST(_MASK) +}; +#define vmid_regs(id)\ +[id] = {\ + DCN20_VMID_REG_LIST(id)\ +} + +static const struct dcn_vmid_registers vmid_regs[] = { + vmid_regs(0), + vmid_regs(1), + vmid_regs(2), + vmid_regs(3), + vmid_regs(4), + vmid_regs(5), + vmid_regs(6), + vmid_regs(7), + vmid_regs(8), + vmid_regs(9), + vmid_regs(10), + vmid_regs(11), + vmid_regs(12), + vmid_regs(13), + vmid_regs(14), + vmid_regs(15) +}; + +static const struct dcn20_vmid_shift vmid_shifts = { + DCN20_VMID_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn20_vmid_mask vmid_masks = { + DCN20_VMID_MASK_SH_LIST(_MASK) +}; + +static const struct resource_caps res_cap_dcn314 = { + .num_timing_generator = 4, + .num_opp = 4, + .num_video_plane = 4, + .num_audio = 5, + .num_stream_encoder = 5, + .num_dig_link_enc = 5, + .num_hpo_dp_stream_encoder = 4, + .num_hpo_dp_link_encoder = 2, + .num_pll = 5, + .num_dwb = 1, + .num_ddc = 5, + .num_vmid = 16, + .num_mpc_3dlut = 2, + .num_dsc = 4, +}; + +static const struct dc_plane_cap plane_cap = { + .type = DC_PLANE_TYPE_DCN_UNIVERSAL, + .per_pixel_alpha = true, + + .pixel_format_support = { + .argb8888 = true, + .nv12 = true, + .fp16 = true, + .p010 = true, + .ayuv = false, + }, + + .max_upscale_factor = { + .argb8888 = 16000, + .nv12 = 16000, + .fp16 = 16000 + }, + + // 6:1 downscaling ratio: 1000/6 = 166.666 + // 4:1 downscaling ratio for ARGB888 to prevent underflow during P010 playback: 1000/4 = 250 + .max_downscale_factor = { + .argb8888 = 250, + .nv12 = 167, + .fp16 = 167 + }, + 64, + 64 +}; + +static const struct dc_debug_options debug_defaults_drv = { + .disable_z10 = false, + .enable_z9_disable_interface = true, + .minimum_z8_residency_time = 2000, + .psr_skip_crtc_disable = true, + .replay_skip_crtc_disabled = true, + .disable_dmcu = true, + .force_abm_enable = false, + .timing_trace = false, + .clock_trace = true, + .disable_dpp_power_gate = false, + .disable_hubp_power_gate = false, + .disable_pplib_clock_request = false, + .pipe_split_policy = MPC_SPLIT_DYNAMIC, + .force_single_disp_pipe_split = false, + .disable_dcc = DCC_ENABLE, + .vsr_support = true, + .performance_trace = false, + .max_downscale_src_width = 4096,/*upto true 4k*/ + .disable_pplib_wm_range = false, + .scl_reset_length10 = true, + .sanity_checks = true, + .underflow_assert_delay_us = 0xFFFFFFFF, + .dwb_fi_phase = -1, // -1 = disable, + .dmub_command_table = true, + .pstate_enabled = true, + .use_max_lb = true, + .enable_mem_low_power = { + .bits = { + .vga = true, + .i2c = true, + .dmcu = false, // This is previously known to cause hang on S3 cycles if enabled + .dscl = true, + .cm = true, + .mpc = true, + .optc = true, + .vpg = true, + .afmt = true, + } + }, + + .root_clock_optimization = { + .bits = { + .dpp = true, + .dsc = true, + .hdmistream = true, + .hdmichar = true, + .dpstream = true, + .symclk32_se = false, + .symclk32_le = true, + .symclk_fe = true, + .physymclk = true, + .dpiasymclk = true, + } + }, + + .seamless_boot_odm_combine = true, + .using_dml2 = false, +}; + +static const struct dc_debug_options debug_defaults_diags = { + .disable_dmcu = true, + .force_abm_enable = false, + .timing_trace = true, + .clock_trace = true, + .disable_dpp_power_gate = true, + .disable_hubp_power_gate = true, + .disable_clock_gate = true, + .disable_pplib_clock_request = true, + .disable_pplib_wm_range = true, + .disable_stutter = false, + .scl_reset_length10 = true, + .dwb_fi_phase = -1, // -1 = disable + .dmub_command_table = true, + .enable_tri_buf = true, + .use_max_lb = true +}; + +static const struct dc_panel_config panel_config_defaults = { + .psr = { + .disable_psr = false, + .disallow_psrsu = false, + .disallow_replay = false, + }, + .ilr = { + .optimize_edp_link_rate = true, + }, +}; + +static void dcn31_dpp_destroy(struct dpp **dpp) +{ + kfree(TO_DCN20_DPP(*dpp)); + *dpp = NULL; +} + +static struct dpp *dcn31_dpp_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn3_dpp *dpp = + kzalloc(sizeof(struct dcn3_dpp), GFP_KERNEL); + + if (!dpp) + return NULL; + + if (dpp3_construct(dpp, ctx, inst, + &dpp_regs[inst], &tf_shift, &tf_mask)) + return &dpp->base; + + BREAK_TO_DEBUGGER(); + kfree(dpp); + return NULL; +} + +static struct output_pixel_processor *dcn31_opp_create( + struct dc_context *ctx, uint32_t inst) +{ + struct dcn20_opp *opp = + kzalloc(sizeof(struct dcn20_opp), GFP_KERNEL); + + if (!opp) { + BREAK_TO_DEBUGGER(); + return NULL; + } + + dcn20_opp_construct(opp, ctx, inst, + &opp_regs[inst], &opp_shift, &opp_mask); + return &opp->base; +} + +static struct dce_aux *dcn31_aux_engine_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct aux_engine_dce110 *aux_engine = + kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL); + + if (!aux_engine) + return NULL; + + dce110_aux_engine_construct(aux_engine, ctx, inst, + SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, + &aux_engine_regs[inst], + &aux_mask, + &aux_shift, + ctx->dc->caps.extended_aux_timeout_support); + + return &aux_engine->base; +} +#define i2c_inst_regs(id) { I2C_HW_ENGINE_COMMON_REG_LIST_DCN30(id) } + +static const struct dce_i2c_registers i2c_hw_regs[] = { + i2c_inst_regs(1), + i2c_inst_regs(2), + i2c_inst_regs(3), + i2c_inst_regs(4), + i2c_inst_regs(5), +}; + +static const struct dce_i2c_shift i2c_shifts = { + I2C_COMMON_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dce_i2c_mask i2c_masks = { + I2C_COMMON_MASK_SH_LIST_DCN30(_MASK) +}; + +/* ========================================================== */ + +/* + * DPIA index | Preferred Encoder | Host Router + * 0 | C | 0 + * 1 | First Available | 0 + * 2 | D | 1 + * 3 | First Available | 1 + */ +/* ========================================================== */ +static const enum engine_id dpia_to_preferred_enc_id_table[] = { + ENGINE_ID_DIGC, + ENGINE_ID_DIGC, + ENGINE_ID_DIGD, + ENGINE_ID_DIGD +}; + +static enum engine_id dcn314_get_preferred_eng_id_dpia(unsigned int dpia_index) +{ + return dpia_to_preferred_enc_id_table[dpia_index]; +} + +static struct dce_i2c_hw *dcn31_i2c_hw_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dce_i2c_hw *dce_i2c_hw = + kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL); + + if (!dce_i2c_hw) + return NULL; + + dcn2_i2c_hw_construct(dce_i2c_hw, ctx, inst, + &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks); + + return dce_i2c_hw; +} +static struct mpc *dcn31_mpc_create( + struct dc_context *ctx, + int num_mpcc, + int num_rmu) +{ + struct dcn30_mpc *mpc30 = kzalloc(sizeof(struct dcn30_mpc), + GFP_KERNEL); + + if (!mpc30) + return NULL; + + dcn30_mpc_construct(mpc30, ctx, + &mpc_regs, + &mpc_shift, + &mpc_mask, + num_mpcc, + num_rmu); + + return &mpc30->base; +} + +static struct hubbub *dcn31_hubbub_create(struct dc_context *ctx) +{ + int i; + + struct dcn20_hubbub *hubbub3 = kzalloc(sizeof(struct dcn20_hubbub), + GFP_KERNEL); + + if (!hubbub3) + return NULL; + + hubbub31_construct(hubbub3, ctx, + &hubbub_reg, + &hubbub_shift, + &hubbub_mask, + dcn3_14_ip.det_buffer_size_kbytes, + dcn3_14_ip.pixel_chunk_size_kbytes, + dcn3_14_ip.config_return_buffer_size_in_kbytes); + + + for (i = 0; i < res_cap_dcn314.num_vmid; i++) { + struct dcn20_vmid *vmid = &hubbub3->vmid[i]; + + vmid->ctx = ctx; + + vmid->regs = &vmid_regs[i]; + vmid->shifts = &vmid_shifts; + vmid->masks = &vmid_masks; + } + + return &hubbub3->base; +} + +static struct timing_generator *dcn31_timing_generator_create( + struct dc_context *ctx, + uint32_t instance) +{ + struct optc *tgn10 = + kzalloc(sizeof(struct optc), GFP_KERNEL); + + if (!tgn10) + return NULL; + + tgn10->base.inst = instance; + tgn10->base.ctx = ctx; + + tgn10->tg_regs = &optc_regs[instance]; + tgn10->tg_shift = &optc_shift; + tgn10->tg_mask = &optc_mask; + + dcn314_timing_generator_init(tgn10); + + return &tgn10->base; +} + +static const struct encoder_feature_support link_enc_feature = { + .max_hdmi_deep_color = COLOR_DEPTH_121212, + .max_hdmi_pixel_clock = 600000, + .hdmi_ycbcr420_supported = true, + .dp_ycbcr420_supported = true, + .fec_supported = true, + .flags.bits.IS_HBR2_CAPABLE = true, + .flags.bits.IS_HBR3_CAPABLE = true, + .flags.bits.IS_TPS3_CAPABLE = true, + .flags.bits.IS_TPS4_CAPABLE = true +}; + +static struct link_encoder *dcn31_link_encoder_create( + struct dc_context *ctx, + const struct encoder_init_data *enc_init_data) +{ + struct dcn20_link_encoder *enc20 = + kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL); + + if (!enc20) + return NULL; + + dcn31_link_encoder_construct(enc20, + enc_init_data, + &link_enc_feature, + &link_enc_regs[enc_init_data->transmitter], + &link_enc_aux_regs[enc_init_data->channel - 1], + &link_enc_hpd_regs[enc_init_data->hpd_source], + &le_shift, + &le_mask); + + return &enc20->enc10.base; +} + +/* Create a minimal link encoder object not associated with a particular + * physical connector. + * resource_funcs.link_enc_create_minimal + */ +static struct link_encoder *dcn31_link_enc_create_minimal( + struct dc_context *ctx, enum engine_id eng_id) +{ + struct dcn20_link_encoder *enc20; + + if ((eng_id - ENGINE_ID_DIGA) > ctx->dc->res_pool->res_cap->num_dig_link_enc) + return NULL; + + enc20 = kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL); + if (!enc20) + return NULL; + + dcn31_link_encoder_construct_minimal( + enc20, + ctx, + &link_enc_feature, + &link_enc_regs[eng_id - ENGINE_ID_DIGA], + eng_id); + + return &enc20->enc10.base; +} + +static struct panel_cntl *dcn31_panel_cntl_create(const struct panel_cntl_init_data *init_data) +{ + struct dcn31_panel_cntl *panel_cntl = + kzalloc(sizeof(struct dcn31_panel_cntl), GFP_KERNEL); + + if (!panel_cntl) + return NULL; + + dcn31_panel_cntl_construct(panel_cntl, init_data); + + return &panel_cntl->base; +} + +static void read_dce_straps( + struct dc_context *ctx, + struct resource_straps *straps) +{ + generic_reg_get(ctx, regDC_PINSTRAPS + BASE(regDC_PINSTRAPS_BASE_IDX), + FN(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO), &straps->dc_pinstraps_audio); + +} + +static struct audio *dcn31_create_audio( + struct dc_context *ctx, unsigned int inst) +{ + return dce_audio_create(ctx, inst, + &audio_regs[inst], &audio_shift, &audio_mask); +} + +static struct vpg *dcn31_vpg_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn31_vpg *vpg31 = kzalloc(sizeof(struct dcn31_vpg), GFP_KERNEL); + + if (!vpg31) + return NULL; + + vpg31_construct(vpg31, ctx, inst, + &vpg_regs[inst], + &vpg_shift, + &vpg_mask); + + return &vpg31->base; +} + +static struct afmt *dcn31_afmt_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn31_afmt *afmt31 = kzalloc(sizeof(struct dcn31_afmt), GFP_KERNEL); + + if (!afmt31) + return NULL; + + afmt31_construct(afmt31, ctx, inst, + &afmt_regs[inst], + &afmt_shift, + &afmt_mask); + + // Light sleep by default, no need to power down here + + return &afmt31->base; +} + +static struct apg *dcn31_apg_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn31_apg *apg31 = kzalloc(sizeof(struct dcn31_apg), GFP_KERNEL); + + if (!apg31) + return NULL; + + apg31_construct(apg31, ctx, inst, + &apg_regs[inst], + &apg_shift, + &apg_mask); + + return &apg31->base; +} + +static struct stream_encoder *dcn314_stream_encoder_create( + enum engine_id eng_id, + struct dc_context *ctx) +{ + struct dcn10_stream_encoder *enc1; + struct vpg *vpg; + struct afmt *afmt; + int vpg_inst; + int afmt_inst; + + /* Mapping of VPG, AFMT, DME register blocks to DIO block instance */ + if (eng_id < ENGINE_ID_DIGF) { + vpg_inst = eng_id; + afmt_inst = eng_id; + } else + return NULL; + + enc1 = kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL); + vpg = dcn31_vpg_create(ctx, vpg_inst); + afmt = dcn31_afmt_create(ctx, afmt_inst); + + if (!enc1 || !vpg || !afmt) { + kfree(enc1); + kfree(vpg); + kfree(afmt); + return NULL; + } + + dcn314_dio_stream_encoder_construct(enc1, ctx, ctx->dc_bios, + eng_id, vpg, afmt, + &stream_enc_regs[eng_id], + &se_shift, &se_mask); + + return &enc1->base; +} + +static struct hpo_dp_stream_encoder *dcn31_hpo_dp_stream_encoder_create( + enum engine_id eng_id, + struct dc_context *ctx) +{ + struct dcn31_hpo_dp_stream_encoder *hpo_dp_enc31; + struct vpg *vpg; + struct apg *apg; + uint32_t hpo_dp_inst; + uint32_t vpg_inst; + uint32_t apg_inst; + + ASSERT((eng_id >= ENGINE_ID_HPO_DP_0) && (eng_id <= ENGINE_ID_HPO_DP_3)); + hpo_dp_inst = eng_id - ENGINE_ID_HPO_DP_0; + + /* Mapping of VPG register blocks to HPO DP block instance: + * VPG[6] -> HPO_DP[0] + * VPG[7] -> HPO_DP[1] + * VPG[8] -> HPO_DP[2] + * VPG[9] -> HPO_DP[3] + */ + //Uses offset index 5-8, but actually maps to vpg_inst 6-9 + vpg_inst = hpo_dp_inst + 5; + + /* Mapping of APG register blocks to HPO DP block instance: + * APG[0] -> HPO_DP[0] + * APG[1] -> HPO_DP[1] + * APG[2] -> HPO_DP[2] + * APG[3] -> HPO_DP[3] + */ + apg_inst = hpo_dp_inst; + + /* allocate HPO stream encoder and create VPG sub-block */ + hpo_dp_enc31 = kzalloc(sizeof(struct dcn31_hpo_dp_stream_encoder), GFP_KERNEL); + vpg = dcn31_vpg_create(ctx, vpg_inst); + apg = dcn31_apg_create(ctx, apg_inst); + + if (!hpo_dp_enc31 || !vpg || !apg) { + kfree(hpo_dp_enc31); + kfree(vpg); + kfree(apg); + return NULL; + } + + dcn31_hpo_dp_stream_encoder_construct(hpo_dp_enc31, ctx, ctx->dc_bios, + hpo_dp_inst, eng_id, vpg, apg, + &hpo_dp_stream_enc_regs[hpo_dp_inst], + &hpo_dp_se_shift, &hpo_dp_se_mask); + + return &hpo_dp_enc31->base; +} + +static struct hpo_dp_link_encoder *dcn31_hpo_dp_link_encoder_create( + uint8_t inst, + struct dc_context *ctx) +{ + struct dcn31_hpo_dp_link_encoder *hpo_dp_enc31; + + /* allocate HPO link encoder */ + hpo_dp_enc31 = kzalloc(sizeof(struct dcn31_hpo_dp_link_encoder), GFP_KERNEL); + + hpo_dp_link_encoder31_construct(hpo_dp_enc31, ctx, inst, + &hpo_dp_link_enc_regs[inst], + &hpo_dp_le_shift, &hpo_dp_le_mask); + + return &hpo_dp_enc31->base; +} + +static struct dce_hwseq *dcn314_hwseq_create( + struct dc_context *ctx) +{ + struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL); + + if (hws) { + hws->ctx = ctx; + hws->regs = &hwseq_reg; + hws->shifts = &hwseq_shift; + hws->masks = &hwseq_mask; + } + return hws; +} +static const struct resource_create_funcs res_create_funcs = { + .read_dce_straps = read_dce_straps, + .create_audio = dcn31_create_audio, + .create_stream_encoder = dcn314_stream_encoder_create, + .create_hpo_dp_stream_encoder = dcn31_hpo_dp_stream_encoder_create, + .create_hpo_dp_link_encoder = dcn31_hpo_dp_link_encoder_create, + .create_hwseq = dcn314_hwseq_create, +}; + +static void dcn314_resource_destruct(struct dcn314_resource_pool *pool) +{ + unsigned int i; + + for (i = 0; i < pool->base.stream_enc_count; i++) { + if (pool->base.stream_enc[i] != NULL) { + if (pool->base.stream_enc[i]->vpg != NULL) { + kfree(DCN30_VPG_FROM_VPG(pool->base.stream_enc[i]->vpg)); + pool->base.stream_enc[i]->vpg = NULL; + } + if (pool->base.stream_enc[i]->afmt != NULL) { + kfree(DCN30_AFMT_FROM_AFMT(pool->base.stream_enc[i]->afmt)); + pool->base.stream_enc[i]->afmt = NULL; + } + kfree(DCN10STRENC_FROM_STRENC(pool->base.stream_enc[i])); + pool->base.stream_enc[i] = NULL; + } + } + + for (i = 0; i < pool->base.hpo_dp_stream_enc_count; i++) { + if (pool->base.hpo_dp_stream_enc[i] != NULL) { + if (pool->base.hpo_dp_stream_enc[i]->vpg != NULL) { + kfree(DCN30_VPG_FROM_VPG(pool->base.hpo_dp_stream_enc[i]->vpg)); + pool->base.hpo_dp_stream_enc[i]->vpg = NULL; + } + if (pool->base.hpo_dp_stream_enc[i]->apg != NULL) { + kfree(DCN31_APG_FROM_APG(pool->base.hpo_dp_stream_enc[i]->apg)); + pool->base.hpo_dp_stream_enc[i]->apg = NULL; + } + kfree(DCN3_1_HPO_DP_STREAM_ENC_FROM_HPO_STREAM_ENC(pool->base.hpo_dp_stream_enc[i])); + pool->base.hpo_dp_stream_enc[i] = NULL; + } + } + + for (i = 0; i < pool->base.hpo_dp_link_enc_count; i++) { + if (pool->base.hpo_dp_link_enc[i] != NULL) { + kfree(DCN3_1_HPO_DP_LINK_ENC_FROM_HPO_LINK_ENC(pool->base.hpo_dp_link_enc[i])); + pool->base.hpo_dp_link_enc[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_dsc; i++) { + if (pool->base.dscs[i] != NULL) + dcn20_dsc_destroy(&pool->base.dscs[i]); + } + + if (pool->base.mpc != NULL) { + kfree(TO_DCN20_MPC(pool->base.mpc)); + pool->base.mpc = NULL; + } + if (pool->base.hubbub != NULL) { + kfree(pool->base.hubbub); + pool->base.hubbub = NULL; + } + for (i = 0; i < pool->base.pipe_count; i++) { + if (pool->base.dpps[i] != NULL) + dcn31_dpp_destroy(&pool->base.dpps[i]); + + if (pool->base.ipps[i] != NULL) + pool->base.ipps[i]->funcs->ipp_destroy(&pool->base.ipps[i]); + + if (pool->base.hubps[i] != NULL) { + kfree(TO_DCN20_HUBP(pool->base.hubps[i])); + pool->base.hubps[i] = NULL; + } + + if (pool->base.irqs != NULL) + dal_irq_service_destroy(&pool->base.irqs); + } + + for (i = 0; i < pool->base.res_cap->num_ddc; i++) { + if (pool->base.engines[i] != NULL) + dce110_engine_destroy(&pool->base.engines[i]); + if (pool->base.hw_i2cs[i] != NULL) { + kfree(pool->base.hw_i2cs[i]); + pool->base.hw_i2cs[i] = NULL; + } + if (pool->base.sw_i2cs[i] != NULL) { + kfree(pool->base.sw_i2cs[i]); + pool->base.sw_i2cs[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_opp; i++) { + if (pool->base.opps[i] != NULL) + pool->base.opps[i]->funcs->opp_destroy(&pool->base.opps[i]); + } + + for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { + if (pool->base.timing_generators[i] != NULL) { + kfree(DCN10TG_FROM_TG(pool->base.timing_generators[i])); + pool->base.timing_generators[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_dwb; i++) { + if (pool->base.dwbc[i] != NULL) { + kfree(TO_DCN30_DWBC(pool->base.dwbc[i])); + pool->base.dwbc[i] = NULL; + } + if (pool->base.mcif_wb[i] != NULL) { + kfree(TO_DCN30_MMHUBBUB(pool->base.mcif_wb[i])); + pool->base.mcif_wb[i] = NULL; + } + } + + for (i = 0; i < pool->base.audio_count; i++) { + if (pool->base.audios[i]) + dce_aud_destroy(&pool->base.audios[i]); + } + + for (i = 0; i < pool->base.clk_src_count; i++) { + if (pool->base.clock_sources[i] != NULL) { + dcn20_clock_source_destroy(&pool->base.clock_sources[i]); + pool->base.clock_sources[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_mpc_3dlut; i++) { + if (pool->base.mpc_lut[i] != NULL) { + dc_3dlut_func_release(pool->base.mpc_lut[i]); + pool->base.mpc_lut[i] = NULL; + } + if (pool->base.mpc_shaper[i] != NULL) { + dc_transfer_func_release(pool->base.mpc_shaper[i]); + pool->base.mpc_shaper[i] = NULL; + } + } + + if (pool->base.dp_clock_source != NULL) { + dcn20_clock_source_destroy(&pool->base.dp_clock_source); + pool->base.dp_clock_source = NULL; + } + + for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { + if (pool->base.multiple_abms[i] != NULL) + dce_abm_destroy(&pool->base.multiple_abms[i]); + } + + if (pool->base.psr != NULL) + dmub_psr_destroy(&pool->base.psr); + + if (pool->base.replay != NULL) + dmub_replay_destroy(&pool->base.replay); + + if (pool->base.dccg != NULL) + dcn_dccg_destroy(&pool->base.dccg); +} + +static struct hubp *dcn31_hubp_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn20_hubp *hubp2 = + kzalloc(sizeof(struct dcn20_hubp), GFP_KERNEL); + + if (!hubp2) + return NULL; + + if (hubp31_construct(hubp2, ctx, inst, + &hubp_regs[inst], &hubp_shift, &hubp_mask)) + return &hubp2->base; + + BREAK_TO_DEBUGGER(); + kfree(hubp2); + return NULL; +} + +static bool dcn31_dwbc_create(struct dc_context *ctx, struct resource_pool *pool) +{ + int i; + uint32_t pipe_count = pool->res_cap->num_dwb; + + for (i = 0; i < pipe_count; i++) { + struct dcn30_dwbc *dwbc30 = kzalloc(sizeof(struct dcn30_dwbc), + GFP_KERNEL); + + if (!dwbc30) { + dm_error("DC: failed to create dwbc30!\n"); + return false; + } + + dcn30_dwbc_construct(dwbc30, ctx, + &dwbc30_regs[i], + &dwbc30_shift, + &dwbc30_mask, + i); + + pool->dwbc[i] = &dwbc30->base; + } + return true; +} + +static bool dcn31_mmhubbub_create(struct dc_context *ctx, struct resource_pool *pool) +{ + int i; + uint32_t pipe_count = pool->res_cap->num_dwb; + + for (i = 0; i < pipe_count; i++) { + struct dcn30_mmhubbub *mcif_wb30 = kzalloc(sizeof(struct dcn30_mmhubbub), + GFP_KERNEL); + + if (!mcif_wb30) { + dm_error("DC: failed to create mcif_wb30!\n"); + return false; + } + + dcn30_mmhubbub_construct(mcif_wb30, ctx, + &mcif_wb30_regs[i], + &mcif_wb30_shift, + &mcif_wb30_mask, + i); + + pool->mcif_wb[i] = &mcif_wb30->base; + } + return true; +} + +static struct display_stream_compressor *dcn314_dsc_create( + struct dc_context *ctx, uint32_t inst) +{ + struct dcn20_dsc *dsc = + kzalloc(sizeof(struct dcn20_dsc), GFP_KERNEL); + + if (!dsc) { + BREAK_TO_DEBUGGER(); + return NULL; + } + + dsc2_construct(dsc, ctx, inst, &dsc_regs[inst], &dsc_shift, &dsc_mask); + return &dsc->base; +} + +static void dcn314_destroy_resource_pool(struct resource_pool **pool) +{ + struct dcn314_resource_pool *dcn314_pool = TO_DCN314_RES_POOL(*pool); + + dcn314_resource_destruct(dcn314_pool); + kfree(dcn314_pool); + *pool = NULL; +} + +static struct clock_source *dcn31_clock_source_create( + struct dc_context *ctx, + struct dc_bios *bios, + enum clock_source_id id, + const struct dce110_clk_src_regs *regs, + bool dp_clk_src) +{ + struct dce110_clk_src *clk_src = + kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL); + + if (!clk_src) + return NULL; + + if (dcn31_clk_src_construct(clk_src, ctx, bios, id, + regs, &cs_shift, &cs_mask)) { + clk_src->base.dp_clk_src = dp_clk_src; + return &clk_src->base; + } + + BREAK_TO_DEBUGGER(); + kfree(clk_src); + return NULL; +} + +static int dcn314_populate_dml_pipes_from_context( + struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + bool fast_validate) +{ + int pipe_cnt; + + DC_FP_START(); + pipe_cnt = dcn314_populate_dml_pipes_from_context_fpu(dc, context, pipes, fast_validate); + DC_FP_END(); + + return pipe_cnt; +} + +static struct dc_cap_funcs cap_funcs = { + .get_dcc_compression_cap = dcn20_get_dcc_compression_cap +}; + +static void dcn314_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) +{ + DC_FP_START(); + dcn314_update_bw_bounding_box_fpu(dc, bw_params); + DC_FP_END(); +} + +static void dcn314_get_panel_config_defaults(struct dc_panel_config *panel_config) +{ + *panel_config = panel_config_defaults; +} + +static bool filter_modes_for_single_channel_workaround(struct dc *dc, + struct dc_state *context) +{ + // Filter 2K@240Hz+8K@24fps above combination timing if memory only has single dimm LPDDR + if (dc->clk_mgr->bw_params->vram_type == 34 && + dc->clk_mgr->bw_params->num_channels < 2 && + context->stream_count > 1) { + int total_phy_pix_clk = 0; + + for (int i = 0; i < context->stream_count; i++) + if (context->res_ctx.pipe_ctx[i].stream) + total_phy_pix_clk += context->res_ctx.pipe_ctx[i].stream->phy_pix_clk; + + if (total_phy_pix_clk >= (1148928+826260)) //2K@240Hz+8K@24fps + return true; + } + return false; +} + +bool dcn314_validate_bandwidth(struct dc *dc, + struct dc_state *context, + bool fast_validate) +{ + bool out = false; + + BW_VAL_TRACE_SETUP(); + + int vlevel = 0; + int pipe_cnt = 0; + display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_KERNEL); + DC_LOGGER_INIT(dc->ctx->logger); + + BW_VAL_TRACE_COUNT(); + + if (filter_modes_for_single_channel_workaround(dc, context)) + goto validate_fail; + + DC_FP_START(); + // do not support self refresh only + out = dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate, false); + DC_FP_END(); + + // Disable fast_validate to set min dcfclk in calculate_wm_and_dlg + if (pipe_cnt == 0) + fast_validate = false; + + if (!out) + goto validate_fail; + + BW_VAL_TRACE_END_VOLTAGE_LEVEL(); + + if (fast_validate) { + BW_VAL_TRACE_SKIP(fast); + goto validate_out; + } + if (dc->res_pool->funcs->calculate_wm_and_dlg) + dc->res_pool->funcs->calculate_wm_and_dlg(dc, context, pipes, pipe_cnt, vlevel); + + BW_VAL_TRACE_END_WATERMARKS(); + + goto validate_out; + +validate_fail: + DC_LOG_WARNING("Mode Validation Warning: %s failed validation.\n", + dml_get_status_message(context->bw_ctx.dml.vba.ValidationStatus[context->bw_ctx.dml.vba.soc.num_states])); + + BW_VAL_TRACE_SKIP(fail); + out = false; + +validate_out: + kfree(pipes); + + BW_VAL_TRACE_FINISH(); + + return out; +} + +static struct resource_funcs dcn314_res_pool_funcs = { + .destroy = dcn314_destroy_resource_pool, + .link_enc_create = dcn31_link_encoder_create, + .link_enc_create_minimal = dcn31_link_enc_create_minimal, + .link_encs_assign = link_enc_cfg_link_encs_assign, + .link_enc_unassign = link_enc_cfg_link_enc_unassign, + .panel_cntl_create = dcn31_panel_cntl_create, + .validate_bandwidth = dcn314_validate_bandwidth, + .calculate_wm_and_dlg = dcn31_calculate_wm_and_dlg, + .update_soc_for_wm_a = dcn31_update_soc_for_wm_a, + .populate_dml_pipes = dcn314_populate_dml_pipes_from_context, + .acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer, + .release_pipe = dcn20_release_pipe, + .add_stream_to_ctx = dcn30_add_stream_to_ctx, + .add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource, + .remove_stream_from_ctx = dcn20_remove_stream_from_ctx, + .populate_dml_writeback_from_context = dcn30_populate_dml_writeback_from_context, + .set_mcif_arb_params = dcn30_set_mcif_arb_params, + .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link, + .acquire_post_bldn_3dlut = dcn30_acquire_post_bldn_3dlut, + .release_post_bldn_3dlut = dcn30_release_post_bldn_3dlut, + .update_bw_bounding_box = dcn314_update_bw_bounding_box, + .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, + .get_panel_config_defaults = dcn314_get_panel_config_defaults, + .get_preferred_eng_id_dpia = dcn314_get_preferred_eng_id_dpia, +}; + +static struct clock_source *dcn30_clock_source_create( + struct dc_context *ctx, + struct dc_bios *bios, + enum clock_source_id id, + const struct dce110_clk_src_regs *regs, + bool dp_clk_src) +{ + struct dce110_clk_src *clk_src = + kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL); + + if (!clk_src) + return NULL; + + if (dcn31_clk_src_construct(clk_src, ctx, bios, id, + regs, &cs_shift, &cs_mask)) { + clk_src->base.dp_clk_src = dp_clk_src; + return &clk_src->base; + } + + BREAK_TO_DEBUGGER(); + kfree(clk_src); + return NULL; +} + +static bool dcn314_resource_construct( + uint8_t num_virtual_links, + struct dc *dc, + struct dcn314_resource_pool *pool) +{ + int i; + struct dc_context *ctx = dc->ctx; + struct irq_service_init_data init_data; + + ctx->dc_bios->regs = &bios_regs; + + pool->base.res_cap = &res_cap_dcn314; + pool->base.funcs = &dcn314_res_pool_funcs; + + /************************************************* + * Resource + asic cap harcoding * + *************************************************/ + pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE; + pool->base.pipe_count = pool->base.res_cap->num_timing_generator; + pool->base.mpcc_count = pool->base.res_cap->num_timing_generator; + dc->caps.max_downscale_ratio = 400; + dc->caps.i2c_speed_in_khz = 100; + dc->caps.i2c_speed_in_khz_hdcp = 100; + dc->caps.max_cursor_size = 256; + dc->caps.min_horizontal_blanking_period = 80; + dc->caps.dmdata_alloc_size = 2048; + dc->caps.max_slave_planes = 2; + dc->caps.max_slave_yuv_planes = 2; + dc->caps.max_slave_rgb_planes = 2; + dc->caps.post_blend_color_processing = true; + dc->caps.force_dp_tps4_for_cp2520 = true; + if (dc->config.forceHBR2CP2520) + dc->caps.force_dp_tps4_for_cp2520 = false; + dc->caps.dp_hpo = true; + dc->caps.dp_hdmi21_pcon_support = true; + dc->caps.edp_dsc_support = true; + dc->caps.extended_aux_timeout_support = true; + dc->caps.dmcub_support = true; + dc->caps.is_apu = true; + dc->caps.seamless_odm = true; + + dc->caps.zstate_support = true; + + /* Color pipeline capabilities */ + dc->caps.color.dpp.dcn_arch = 1; + dc->caps.color.dpp.input_lut_shared = 0; + dc->caps.color.dpp.icsc = 1; + dc->caps.color.dpp.dgam_ram = 0; // must use gamma_corr + dc->caps.color.dpp.dgam_rom_caps.srgb = 1; + dc->caps.color.dpp.dgam_rom_caps.bt2020 = 1; + dc->caps.color.dpp.dgam_rom_caps.gamma2_2 = 1; + dc->caps.color.dpp.dgam_rom_caps.pq = 1; + dc->caps.color.dpp.dgam_rom_caps.hlg = 1; + dc->caps.color.dpp.post_csc = 1; + dc->caps.color.dpp.gamma_corr = 1; + dc->caps.color.dpp.dgam_rom_for_yuv = 0; + + dc->caps.color.dpp.hw_3d_lut = 1; + dc->caps.color.dpp.ogam_ram = 1; + // no OGAM ROM on DCN301 + dc->caps.color.dpp.ogam_rom_caps.srgb = 0; + dc->caps.color.dpp.ogam_rom_caps.bt2020 = 0; + dc->caps.color.dpp.ogam_rom_caps.gamma2_2 = 0; + dc->caps.color.dpp.ogam_rom_caps.pq = 0; + dc->caps.color.dpp.ogam_rom_caps.hlg = 0; + dc->caps.color.dpp.ocsc = 0; + + dc->caps.color.mpc.gamut_remap = 1; + dc->caps.color.mpc.num_3dluts = pool->base.res_cap->num_mpc_3dlut; //2 + dc->caps.color.mpc.ogam_ram = 1; + dc->caps.color.mpc.ogam_rom_caps.srgb = 0; + dc->caps.color.mpc.ogam_rom_caps.bt2020 = 0; + dc->caps.color.mpc.ogam_rom_caps.gamma2_2 = 0; + dc->caps.color.mpc.ogam_rom_caps.pq = 0; + dc->caps.color.mpc.ogam_rom_caps.hlg = 0; + dc->caps.color.mpc.ocsc = 1; + + dc->caps.max_disp_clock_khz_at_vmin = 650000; + + /* Use pipe context based otg sync logic */ + dc->config.use_pipe_ctx_sync_logic = true; + + /* read VBIOS LTTPR caps */ + { + if (ctx->dc_bios->funcs->get_lttpr_caps) { + enum bp_result bp_query_result; + uint8_t is_vbios_lttpr_enable = 0; + + bp_query_result = ctx->dc_bios->funcs->get_lttpr_caps(ctx->dc_bios, &is_vbios_lttpr_enable); + dc->caps.vbios_lttpr_enable = (bp_query_result == BP_RESULT_OK) && !!is_vbios_lttpr_enable; + } + + /* interop bit is implicit */ + { + dc->caps.vbios_lttpr_aware = true; + } + } + + if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) + dc->debug = debug_defaults_drv; + else + dc->debug = debug_defaults_diags; + + /* Disable pipe power gating */ + dc->debug.disable_dpp_power_gate = true; + dc->debug.disable_hubp_power_gate = true; + + /* Disable root clock optimization */ + dc->debug.root_clock_optimization.u32All = 0; + + // Init the vm_helper + if (dc->vm_helper) + vm_helper_init(dc->vm_helper, 16); + + /************************************************* + * Create resources * + *************************************************/ + + /* Clock Sources for Pixel Clock*/ + pool->base.clock_sources[DCN31_CLK_SRC_PLL0] = + dcn30_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL0, + &clk_src_regs[0], false); + pool->base.clock_sources[DCN31_CLK_SRC_PLL1] = + dcn30_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL1, + &clk_src_regs[1], false); + pool->base.clock_sources[DCN31_CLK_SRC_PLL2] = + dcn30_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL2, + &clk_src_regs[2], false); + pool->base.clock_sources[DCN31_CLK_SRC_PLL3] = + dcn30_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL3, + &clk_src_regs[3], false); + pool->base.clock_sources[DCN31_CLK_SRC_PLL4] = + dcn30_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL4, + &clk_src_regs[4], false); + + pool->base.clk_src_count = DCN30_CLK_SRC_TOTAL; + + /* todo: not reuse phy_pll registers */ + pool->base.dp_clock_source = + dcn31_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_ID_DP_DTO, + &clk_src_regs[0], true); + + for (i = 0; i < pool->base.clk_src_count; i++) { + if (pool->base.clock_sources[i] == NULL) { + dm_error("DC: failed to create clock sources!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + } + + pool->base.dccg = dccg314_create(ctx, &dccg_regs, &dccg_shift, &dccg_mask); + if (pool->base.dccg == NULL) { + dm_error("DC: failed to create dccg!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + + init_data.ctx = dc->ctx; + pool->base.irqs = dal_irq_service_dcn314_create(&init_data); + if (!pool->base.irqs) + goto create_fail; + + /* HUBBUB */ + pool->base.hubbub = dcn31_hubbub_create(ctx); + if (pool->base.hubbub == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create hubbub!\n"); + goto create_fail; + } + + /* HUBPs, DPPs, OPPs and TGs */ + for (i = 0; i < pool->base.pipe_count; i++) { + pool->base.hubps[i] = dcn31_hubp_create(ctx, i); + if (pool->base.hubps[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create hubps!\n"); + goto create_fail; + } + + pool->base.dpps[i] = dcn31_dpp_create(ctx, i); + if (pool->base.dpps[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create dpps!\n"); + goto create_fail; + } + } + + for (i = 0; i < pool->base.res_cap->num_opp; i++) { + pool->base.opps[i] = dcn31_opp_create(ctx, i); + if (pool->base.opps[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create output pixel processor!\n"); + goto create_fail; + } + } + + for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { + pool->base.timing_generators[i] = dcn31_timing_generator_create( + ctx, i); + if (pool->base.timing_generators[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create tg!\n"); + goto create_fail; + } + } + pool->base.timing_generator_count = i; + + /* PSR */ + pool->base.psr = dmub_psr_create(ctx); + if (pool->base.psr == NULL) { + dm_error("DC: failed to create psr obj!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + + /* Replay */ + pool->base.replay = dmub_replay_create(ctx); + if (pool->base.replay == NULL) { + dm_error("DC: failed to create replay obj!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + + /* ABM */ + for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { + pool->base.multiple_abms[i] = dmub_abm_create(ctx, + &abm_regs[i], + &abm_shift, + &abm_mask); + if (pool->base.multiple_abms[i] == NULL) { + dm_error("DC: failed to create abm for pipe %d!\n", i); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + } + + /* MPC and DSC */ + pool->base.mpc = dcn31_mpc_create(ctx, pool->base.mpcc_count, pool->base.res_cap->num_mpc_3dlut); + if (pool->base.mpc == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create mpc!\n"); + goto create_fail; + } + + for (i = 0; i < pool->base.res_cap->num_dsc; i++) { + pool->base.dscs[i] = dcn314_dsc_create(ctx, i); + if (pool->base.dscs[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create display stream compressor %d!\n", i); + goto create_fail; + } + } + + /* DWB and MMHUBBUB */ + if (!dcn31_dwbc_create(ctx, &pool->base)) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create dwbc!\n"); + goto create_fail; + } + + if (!dcn31_mmhubbub_create(ctx, &pool->base)) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create mcif_wb!\n"); + goto create_fail; + } + + /* AUX and I2C */ + for (i = 0; i < pool->base.res_cap->num_ddc; i++) { + pool->base.engines[i] = dcn31_aux_engine_create(ctx, i); + if (pool->base.engines[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC:failed to create aux engine!!\n"); + goto create_fail; + } + pool->base.hw_i2cs[i] = dcn31_i2c_hw_create(ctx, i); + if (pool->base.hw_i2cs[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC:failed to create hw i2c!!\n"); + goto create_fail; + } + pool->base.sw_i2cs[i] = NULL; + } + + /* DCN314 has 4 DPIA */ + pool->base.usb4_dpia_count = 4; + + /* Audio, Stream Encoders including HPO and virtual, MPC 3D LUTs */ + if (!resource_construct(num_virtual_links, dc, &pool->base, + &res_create_funcs)) + goto create_fail; + + /* HW Sequencer and Plane caps */ + dcn314_hw_sequencer_construct(dc); + + dc->caps.max_planes = pool->base.pipe_count; + + for (i = 0; i < dc->caps.max_planes; ++i) + dc->caps.planes[i] = plane_cap; + + dc->cap_funcs = cap_funcs; + + dc->dcn_ip->max_num_dpp = dcn3_14_ip.max_num_dpp; + + return true; + +create_fail: + + dcn314_resource_destruct(pool); + + return false; +} + +struct resource_pool *dcn314_create_resource_pool( + const struct dc_init_data *init_data, + struct dc *dc) +{ + struct dcn314_resource_pool *pool = + kzalloc(sizeof(struct dcn314_resource_pool), GFP_KERNEL); + + if (!pool) + return NULL; + + if (dcn314_resource_construct(init_data->num_virtual_links, dc, pool)) + return &pool->base; + + BREAK_TO_DEBUGGER(); + kfree(pool); + return NULL; +} diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.h new file mode 100644 index 000000000000..49ffe71018df --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef _DCN314_RESOURCE_H_ +#define _DCN314_RESOURCE_H_ + +#include "core_types.h" + +extern struct _vcs_dpi_ip_params_st dcn3_14_ip; +extern struct _vcs_dpi_soc_bounding_box_st dcn3_14_soc; + +#define TO_DCN314_RES_POOL(pool)\ + container_of(pool, struct dcn314_resource_pool, base) + +struct dcn314_resource_pool { + struct resource_pool base; +}; + +bool dcn314_validate_bandwidth(struct dc *dc, + struct dc_state *context, + bool fast_validate); + +struct resource_pool *dcn314_create_resource_pool( + const struct dc_init_data *init_data, + struct dc *dc); + +#endif /* _DCN314_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c new file mode 100644 index 000000000000..cb8024eee8e4 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c @@ -0,0 +1,2151 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + + +#include "dm_services.h" +#include "dc.h" + +#include "dcn31/dcn31_init.h" + +#include "resource.h" +#include "include/irq_service_interface.h" +#include "dcn315_resource.h" + +#include "dcn20/dcn20_resource.h" +#include "dcn30/dcn30_resource.h" +#include "dcn31/dcn31_resource.h" + +#include "dcn10/dcn10_ipp.h" +#include "dcn30/dcn30_hubbub.h" +#include "dcn31/dcn31_hubbub.h" +#include "dcn30/dcn30_mpc.h" +#include "dcn31/dcn31_hubp.h" +#include "irq/dcn315/irq_service_dcn315.h" +#include "dcn30/dcn30_dpp.h" +#include "dcn31/dcn31_optc.h" +#include "dcn20/dcn20_hwseq.h" +#include "dcn30/dcn30_hwseq.h" +#include "dce110/dce110_hwseq.h" +#include "dcn30/dcn30_opp.h" +#include "dcn20/dcn20_dsc.h" +#include "dcn30/dcn30_vpg.h" +#include "dcn30/dcn30_afmt.h" +#include "dcn30/dcn30_dio_stream_encoder.h" +#include "dcn31/dcn31_hpo_dp_stream_encoder.h" +#include "dcn31/dcn31_hpo_dp_link_encoder.h" +#include "dcn31/dcn31_apg.h" +#include "dcn31/dcn31_dio_link_encoder.h" +#include "dcn31/dcn31_vpg.h" +#include "dcn31/dcn31_afmt.h" +#include "dce/dce_clock_source.h" +#include "dce/dce_audio.h" +#include "dce/dce_hwseq.h" +#include "clk_mgr.h" +#include "virtual/virtual_stream_encoder.h" +#include "dce110/dce110_resource.h" +#include "dml/display_mode_vba.h" +#include "dml/dcn31/dcn31_fpu.h" +#include "dcn31/dcn31_dccg.h" +#include "dcn10/dcn10_resource.h" +#include "dcn31/dcn31_panel_cntl.h" + +#include "dcn30/dcn30_dwb.h" +#include "dcn30/dcn30_mmhubbub.h" + +#include "dcn/dcn_3_1_5_offset.h" +#include "dcn/dcn_3_1_5_sh_mask.h" +#include "dpcs/dpcs_4_2_2_offset.h" +#include "dpcs/dpcs_4_2_2_sh_mask.h" + +#define NBIO_BASE__INST0_SEG0 0x00000000 +#define NBIO_BASE__INST0_SEG1 0x00000014 +#define NBIO_BASE__INST0_SEG2 0x00000D20 +#define NBIO_BASE__INST0_SEG3 0x00010400 +#define NBIO_BASE__INST0_SEG4 0x0241B000 +#define NBIO_BASE__INST0_SEG5 0x04040000 + +#define DPCS_BASE__INST0_SEG0 0x00000012 +#define DPCS_BASE__INST0_SEG1 0x000000C0 +#define DPCS_BASE__INST0_SEG2 0x000034C0 +#define DPCS_BASE__INST0_SEG3 0x00009000 +#define DPCS_BASE__INST0_SEG4 0x02403C00 +#define DPCS_BASE__INST0_SEG5 0 + +#define DCN_BASE__INST0_SEG0 0x00000012 +#define DCN_BASE__INST0_SEG1 0x000000C0 +#define DCN_BASE__INST0_SEG2 0x000034C0 +#define DCN_BASE__INST0_SEG3 0x00009000 +#define DCN_BASE__INST0_SEG4 0x02403C00 +#define DCN_BASE__INST0_SEG5 0 + +#define regBIF_BX_PF2_RSMU_INDEX 0x0000 +#define regBIF_BX_PF2_RSMU_INDEX_BASE_IDX 1 +#define regBIF_BX_PF2_RSMU_DATA 0x0001 +#define regBIF_BX_PF2_RSMU_DATA_BASE_IDX 1 +#define regBIF_BX2_BIOS_SCRATCH_6 0x003e +#define regBIF_BX2_BIOS_SCRATCH_6_BASE_IDX 1 +#define BIF_BX2_BIOS_SCRATCH_6__BIOS_SCRATCH_6__SHIFT 0x0 +#define BIF_BX2_BIOS_SCRATCH_6__BIOS_SCRATCH_6_MASK 0xFFFFFFFFL +#define regBIF_BX2_BIOS_SCRATCH_2 0x003a +#define regBIF_BX2_BIOS_SCRATCH_2_BASE_IDX 1 +#define BIF_BX2_BIOS_SCRATCH_2__BIOS_SCRATCH_2__SHIFT 0x0 +#define BIF_BX2_BIOS_SCRATCH_2__BIOS_SCRATCH_2_MASK 0xFFFFFFFFL +#define regBIF_BX2_BIOS_SCRATCH_3 0x003b +#define regBIF_BX2_BIOS_SCRATCH_3_BASE_IDX 1 +#define BIF_BX2_BIOS_SCRATCH_3__BIOS_SCRATCH_3__SHIFT 0x0 +#define BIF_BX2_BIOS_SCRATCH_3__BIOS_SCRATCH_3_MASK 0xFFFFFFFFL + +#define regDCHUBBUB_DEBUG_CTRL_0 0x04d6 +#define regDCHUBBUB_DEBUG_CTRL_0_BASE_IDX 2 +#define DCHUBBUB_DEBUG_CTRL_0__DET_DEPTH__SHIFT 0x10 +#define DCHUBBUB_DEBUG_CTRL_0__DET_DEPTH_MASK 0x01FF0000L + +#include "reg_helper.h" +#include "dce/dmub_abm.h" +#include "dce/dmub_psr.h" +#include "dce/dce_aux.h" +#include "dce/dce_i2c.h" + +#include "dml/dcn30/display_mode_vba_30.h" +#include "vm_helper.h" +#include "dcn20/dcn20_vmid.h" + +#include "link_enc_cfg.h" + +#define DCN3_15_MAX_DET_SIZE 384 +#define DCN3_15_CRB_SEGMENT_SIZE_KB 64 +#define DCN3_15_MAX_DET_SEGS (DCN3_15_MAX_DET_SIZE / DCN3_15_CRB_SEGMENT_SIZE_KB) +/* Minimum 3 extra segments need to be in compbuf and claimable to guarantee seamless mpo transitions */ +#define MIN_RESERVED_DET_SEGS 3 + +enum dcn31_clk_src_array_id { + DCN31_CLK_SRC_PLL0, + DCN31_CLK_SRC_PLL1, + DCN31_CLK_SRC_PLL2, + DCN31_CLK_SRC_PLL3, + DCN31_CLK_SRC_PLL4, + DCN30_CLK_SRC_TOTAL +}; + +/* begin ********************* + * macros to expend register list macro defined in HW object header file + */ + +/* DCN */ +#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg + +#define BASE(seg) BASE_INNER(seg) + +#define SR(reg_name)\ + .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ + reg ## reg_name + +#define SRI(reg_name, block, id)\ + .reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SRI2(reg_name, block, id)\ + .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ + reg ## reg_name + +#define SRIR(var_name, reg_name, block, id)\ + .var_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SRII(reg_name, block, id)\ + .reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SRII_MPC_RMU(reg_name, block, id)\ + .RMU##_##reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SRII_DWB(reg_name, temp_name, block, id)\ + .reg_name[id] = BASE(reg ## block ## id ## _ ## temp_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## temp_name + +#define SF_DWB2(reg_name, block, id, field_name, post_fix) \ + .field_name = reg_name ## __ ## field_name ## post_fix + +#define DCCG_SRII(reg_name, block, id)\ + .block ## _ ## reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define VUPDATE_SRII(reg_name, block, id)\ + .reg_name[id] = BASE(reg ## reg_name ## _ ## block ## id ## _BASE_IDX) + \ + reg ## reg_name ## _ ## block ## id + +/* NBIO */ +#define NBIO_BASE_INNER(seg) \ + NBIO_BASE__INST0_SEG ## seg + +#define NBIO_BASE(seg) \ + NBIO_BASE_INNER(seg) + +#define NBIO_SR(reg_name)\ + .reg_name = NBIO_BASE(regBIF_BX2_ ## reg_name ## _BASE_IDX) + \ + regBIF_BX2_ ## reg_name + +static const struct bios_registers bios_regs = { + NBIO_SR(BIOS_SCRATCH_3), + NBIO_SR(BIOS_SCRATCH_6) +}; + +#define clk_src_regs(index, pllid)\ +[index] = {\ + CS_COMMON_REG_LIST_DCN3_0(index, pllid),\ +} + +static const struct dce110_clk_src_regs clk_src_regs[] = { + clk_src_regs(0, A), + clk_src_regs(1, B), + clk_src_regs(2, C), + clk_src_regs(3, D), + clk_src_regs(4, E) +}; + +static const struct dce110_clk_src_shift cs_shift = { + CS_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT) +}; + +static const struct dce110_clk_src_mask cs_mask = { + CS_COMMON_MASK_SH_LIST_DCN2_0(_MASK) +}; + +#define abm_regs(id)\ +[id] = {\ + ABM_DCN302_REG_LIST(id)\ +} + +static const struct dce_abm_registers abm_regs[] = { + abm_regs(0), + abm_regs(1), + abm_regs(2), + abm_regs(3), +}; + +static const struct dce_abm_shift abm_shift = { + ABM_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dce_abm_mask abm_mask = { + ABM_MASK_SH_LIST_DCN30(_MASK) +}; + +#define audio_regs(id)\ +[id] = {\ + AUD_COMMON_REG_LIST(id)\ +} + +static const struct dce_audio_registers audio_regs[] = { + audio_regs(0), + audio_regs(1), + audio_regs(2), + audio_regs(3), + audio_regs(4), + audio_regs(5), + audio_regs(6) +}; + +#define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\ + SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_INDEX, AZALIA_ENDPOINT_REG_INDEX, mask_sh),\ + SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_DATA, AZALIA_ENDPOINT_REG_DATA, mask_sh),\ + AUD_COMMON_MASK_SH_LIST_BASE(mask_sh) + +static const struct dce_audio_shift audio_shift = { + DCE120_AUD_COMMON_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_audio_mask audio_mask = { + DCE120_AUD_COMMON_MASK_SH_LIST(_MASK) +}; + +#define vpg_regs(id)\ +[id] = {\ + VPG_DCN31_REG_LIST(id)\ +} + +static const struct dcn31_vpg_registers vpg_regs[] = { + vpg_regs(0), + vpg_regs(1), + vpg_regs(2), + vpg_regs(3), + vpg_regs(4), + vpg_regs(5), + vpg_regs(6), + vpg_regs(7), + vpg_regs(8), + vpg_regs(9), +}; + +static const struct dcn31_vpg_shift vpg_shift = { + DCN31_VPG_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn31_vpg_mask vpg_mask = { + DCN31_VPG_MASK_SH_LIST(_MASK) +}; + +#define afmt_regs(id)\ +[id] = {\ + AFMT_DCN31_REG_LIST(id)\ +} + +static const struct dcn31_afmt_registers afmt_regs[] = { + afmt_regs(0), + afmt_regs(1), + afmt_regs(2), + afmt_regs(3), + afmt_regs(4), + afmt_regs(5) +}; + +static const struct dcn31_afmt_shift afmt_shift = { + DCN31_AFMT_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn31_afmt_mask afmt_mask = { + DCN31_AFMT_MASK_SH_LIST(_MASK) +}; + +#define apg_regs(id)\ +[id] = {\ + APG_DCN31_REG_LIST(id)\ +} + +static const struct dcn31_apg_registers apg_regs[] = { + apg_regs(0), + apg_regs(1), + apg_regs(2), + apg_regs(3) +}; + +static const struct dcn31_apg_shift apg_shift = { + DCN31_APG_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn31_apg_mask apg_mask = { + DCN31_APG_MASK_SH_LIST(_MASK) +}; + +#define stream_enc_regs(id)\ +[id] = {\ + SE_DCN3_REG_LIST(id)\ +} + +static const struct dcn10_stream_enc_registers stream_enc_regs[] = { + stream_enc_regs(0), + stream_enc_regs(1), + stream_enc_regs(2), + stream_enc_regs(3), + stream_enc_regs(4) +}; + +static const struct dcn10_stream_encoder_shift se_shift = { + SE_COMMON_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dcn10_stream_encoder_mask se_mask = { + SE_COMMON_MASK_SH_LIST_DCN30(_MASK) +}; + + +#define aux_regs(id)\ +[id] = {\ + DCN2_AUX_REG_LIST(id)\ +} + +static const struct dcn10_link_enc_aux_registers link_enc_aux_regs[] = { + aux_regs(0), + aux_regs(1), + aux_regs(2), + aux_regs(3), + aux_regs(4) +}; + +#define hpd_regs(id)\ +[id] = {\ + HPD_REG_LIST(id)\ +} + +static const struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[] = { + hpd_regs(0), + hpd_regs(1), + hpd_regs(2), + hpd_regs(3), + hpd_regs(4) +}; + +#define link_regs(id, phyid)\ +[id] = {\ + LE_DCN31_REG_LIST(id), \ + UNIPHY_DCN2_REG_LIST(phyid), \ + DPCS_DCN31_REG_LIST(id), \ +} + +static const struct dce110_aux_registers_shift aux_shift = { + DCN_AUX_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce110_aux_registers_mask aux_mask = { + DCN_AUX_MASK_SH_LIST(_MASK) +}; + +static const struct dcn10_link_enc_registers link_enc_regs[] = { + link_regs(0, A), + link_regs(1, B), + link_regs(2, C), + link_regs(3, D), + link_regs(4, E) +}; + +static const struct dcn10_link_enc_shift le_shift = { + LINK_ENCODER_MASK_SH_LIST_DCN31(__SHIFT), \ + DPCS_DCN31_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn10_link_enc_mask le_mask = { + LINK_ENCODER_MASK_SH_LIST_DCN31(_MASK), \ + DPCS_DCN31_MASK_SH_LIST(_MASK) +}; + +#define hpo_dp_stream_encoder_reg_list(id)\ +[id] = {\ + DCN3_1_HPO_DP_STREAM_ENC_REG_LIST(id)\ +} + +static const struct dcn31_hpo_dp_stream_encoder_registers hpo_dp_stream_enc_regs[] = { + hpo_dp_stream_encoder_reg_list(0), + hpo_dp_stream_encoder_reg_list(1), + hpo_dp_stream_encoder_reg_list(2), + hpo_dp_stream_encoder_reg_list(3), +}; + +static const struct dcn31_hpo_dp_stream_encoder_shift hpo_dp_se_shift = { + DCN3_1_HPO_DP_STREAM_ENC_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn31_hpo_dp_stream_encoder_mask hpo_dp_se_mask = { + DCN3_1_HPO_DP_STREAM_ENC_MASK_SH_LIST(_MASK) +}; + + +#define hpo_dp_link_encoder_reg_list(id)\ +[id] = {\ + DCN3_1_HPO_DP_LINK_ENC_REG_LIST(id),\ + DCN3_1_RDPCSTX_REG_LIST(0),\ + DCN3_1_RDPCSTX_REG_LIST(1),\ + DCN3_1_RDPCSTX_REG_LIST(2),\ + DCN3_1_RDPCSTX_REG_LIST(3),\ + DCN3_1_RDPCSTX_REG_LIST(4)\ +} + +static const struct dcn31_hpo_dp_link_encoder_registers hpo_dp_link_enc_regs[] = { + hpo_dp_link_encoder_reg_list(0), + hpo_dp_link_encoder_reg_list(1), +}; + +static const struct dcn31_hpo_dp_link_encoder_shift hpo_dp_le_shift = { + DCN3_1_HPO_DP_LINK_ENC_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn31_hpo_dp_link_encoder_mask hpo_dp_le_mask = { + DCN3_1_HPO_DP_LINK_ENC_MASK_SH_LIST(_MASK) +}; + +#define dpp_regs(id)\ +[id] = {\ + DPP_REG_LIST_DCN30(id),\ +} + +static const struct dcn3_dpp_registers dpp_regs[] = { + dpp_regs(0), + dpp_regs(1), + dpp_regs(2), + dpp_regs(3) +}; + +static const struct dcn3_dpp_shift tf_shift = { + DPP_REG_LIST_SH_MASK_DCN30(__SHIFT) +}; + +static const struct dcn3_dpp_mask tf_mask = { + DPP_REG_LIST_SH_MASK_DCN30(_MASK) +}; + +#define opp_regs(id)\ +[id] = {\ + OPP_REG_LIST_DCN30(id),\ +} + +static const struct dcn20_opp_registers opp_regs[] = { + opp_regs(0), + opp_regs(1), + opp_regs(2), + opp_regs(3) +}; + +static const struct dcn20_opp_shift opp_shift = { + OPP_MASK_SH_LIST_DCN20(__SHIFT) +}; + +static const struct dcn20_opp_mask opp_mask = { + OPP_MASK_SH_LIST_DCN20(_MASK) +}; + +#define aux_engine_regs(id)\ +[id] = {\ + AUX_COMMON_REG_LIST0(id), \ + .AUXN_IMPCAL = 0, \ + .AUXP_IMPCAL = 0, \ + .AUX_RESET_MASK = DP_AUX0_AUX_CONTROL__AUX_RESET_MASK, \ +} + +static const struct dce110_aux_registers aux_engine_regs[] = { + aux_engine_regs(0), + aux_engine_regs(1), + aux_engine_regs(2), + aux_engine_regs(3), + aux_engine_regs(4) +}; + +#define dwbc_regs_dcn3(id)\ +[id] = {\ + DWBC_COMMON_REG_LIST_DCN30(id),\ +} + +static const struct dcn30_dwbc_registers dwbc30_regs[] = { + dwbc_regs_dcn3(0), +}; + +static const struct dcn30_dwbc_shift dwbc30_shift = { + DWBC_COMMON_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dcn30_dwbc_mask dwbc30_mask = { + DWBC_COMMON_MASK_SH_LIST_DCN30(_MASK) +}; + +#define mcif_wb_regs_dcn3(id)\ +[id] = {\ + MCIF_WB_COMMON_REG_LIST_DCN30(id),\ +} + +static const struct dcn30_mmhubbub_registers mcif_wb30_regs[] = { + mcif_wb_regs_dcn3(0) +}; + +static const struct dcn30_mmhubbub_shift mcif_wb30_shift = { + MCIF_WB_COMMON_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dcn30_mmhubbub_mask mcif_wb30_mask = { + MCIF_WB_COMMON_MASK_SH_LIST_DCN30(_MASK) +}; + +#define dsc_regsDCN20(id)\ +[id] = {\ + DSC_REG_LIST_DCN20(id)\ +} + +static const struct dcn20_dsc_registers dsc_regs[] = { + dsc_regsDCN20(0), + dsc_regsDCN20(1), + dsc_regsDCN20(2) +}; + +static const struct dcn20_dsc_shift dsc_shift = { + DSC_REG_LIST_SH_MASK_DCN20(__SHIFT) +}; + +static const struct dcn20_dsc_mask dsc_mask = { + DSC_REG_LIST_SH_MASK_DCN20(_MASK) +}; + +static const struct dcn30_mpc_registers mpc_regs = { + MPC_REG_LIST_DCN3_0(0), + MPC_REG_LIST_DCN3_0(1), + MPC_REG_LIST_DCN3_0(2), + MPC_REG_LIST_DCN3_0(3), + MPC_OUT_MUX_REG_LIST_DCN3_0(0), + MPC_OUT_MUX_REG_LIST_DCN3_0(1), + MPC_OUT_MUX_REG_LIST_DCN3_0(2), + MPC_OUT_MUX_REG_LIST_DCN3_0(3), + MPC_DWB_MUX_REG_LIST_DCN3_0(0), +}; + +static const struct dcn30_mpc_shift mpc_shift = { + MPC_COMMON_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dcn30_mpc_mask mpc_mask = { + MPC_COMMON_MASK_SH_LIST_DCN30(_MASK) +}; + +#define optc_regs(id)\ +[id] = {OPTC_COMMON_REG_LIST_DCN3_1(id)} + +static const struct dcn_optc_registers optc_regs[] = { + optc_regs(0), + optc_regs(1), + optc_regs(2), + optc_regs(3) +}; + +static const struct dcn_optc_shift optc_shift = { + OPTC_COMMON_MASK_SH_LIST_DCN3_1(__SHIFT) +}; + +static const struct dcn_optc_mask optc_mask = { + OPTC_COMMON_MASK_SH_LIST_DCN3_1(_MASK) +}; + +#define hubp_regs(id)\ +[id] = {\ + HUBP_REG_LIST_DCN30(id)\ +} + +static const struct dcn_hubp2_registers hubp_regs[] = { + hubp_regs(0), + hubp_regs(1), + hubp_regs(2), + hubp_regs(3) +}; + + +static const struct dcn_hubp2_shift hubp_shift = { + HUBP_MASK_SH_LIST_DCN31(__SHIFT) +}; + +static const struct dcn_hubp2_mask hubp_mask = { + HUBP_MASK_SH_LIST_DCN31(_MASK) +}; +static const struct dcn_hubbub_registers hubbub_reg = { + HUBBUB_REG_LIST_DCN31(0) +}; + +static const struct dcn_hubbub_shift hubbub_shift = { + HUBBUB_MASK_SH_LIST_DCN31(__SHIFT) +}; + +static const struct dcn_hubbub_mask hubbub_mask = { + HUBBUB_MASK_SH_LIST_DCN31(_MASK) +}; + +static const struct dccg_registers dccg_regs = { + DCCG_REG_LIST_DCN31() +}; + +static const struct dccg_shift dccg_shift = { + DCCG_MASK_SH_LIST_DCN31(__SHIFT) +}; + +static const struct dccg_mask dccg_mask = { + DCCG_MASK_SH_LIST_DCN31(_MASK) +}; + + +#define SRII2(reg_name_pre, reg_name_post, id)\ + .reg_name_pre ## _ ## reg_name_post[id] = BASE(reg ## reg_name_pre \ + ## id ## _ ## reg_name_post ## _BASE_IDX) + \ + reg ## reg_name_pre ## id ## _ ## reg_name_post + + +#define HWSEQ_DCN31_REG_LIST()\ + SR(DCHUBBUB_GLOBAL_TIMER_CNTL), \ + SR(DCHUBBUB_ARB_HOSTVM_CNTL), \ + SR(DIO_MEM_PWR_CTRL), \ + SR(ODM_MEM_PWR_CTRL3), \ + SR(DMU_MEM_PWR_CNTL), \ + SR(MMHUBBUB_MEM_PWR_CNTL), \ + SR(DCCG_GATE_DISABLE_CNTL), \ + SR(DCCG_GATE_DISABLE_CNTL2), \ + SR(DCFCLK_CNTL),\ + SR(DC_MEM_GLOBAL_PWR_REQ_CNTL), \ + SRII(PIXEL_RATE_CNTL, OTG, 0), \ + SRII(PIXEL_RATE_CNTL, OTG, 1),\ + SRII(PIXEL_RATE_CNTL, OTG, 2),\ + SRII(PIXEL_RATE_CNTL, OTG, 3),\ + SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 0),\ + SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 1),\ + SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 2),\ + SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 3),\ + SR(MICROSECOND_TIME_BASE_DIV), \ + SR(MILLISECOND_TIME_BASE_DIV), \ + SR(DISPCLK_FREQ_CHANGE_CNTL), \ + SR(RBBMIF_TIMEOUT_DIS), \ + SR(RBBMIF_TIMEOUT_DIS_2), \ + SR(DCHUBBUB_CRC_CTRL), \ + SR(DPP_TOP0_DPP_CRC_CTRL), \ + SR(DPP_TOP0_DPP_CRC_VAL_B_A), \ + SR(DPP_TOP0_DPP_CRC_VAL_R_G), \ + SR(MPC_CRC_CTRL), \ + SR(MPC_CRC_RESULT_GB), \ + SR(MPC_CRC_RESULT_C), \ + SR(MPC_CRC_RESULT_AR), \ + SR(DOMAIN0_PG_CONFIG), \ + SR(DOMAIN1_PG_CONFIG), \ + SR(DOMAIN2_PG_CONFIG), \ + SR(DOMAIN3_PG_CONFIG), \ + SR(DOMAIN16_PG_CONFIG), \ + SR(DOMAIN17_PG_CONFIG), \ + SR(DOMAIN18_PG_CONFIG), \ + SR(DOMAIN0_PG_STATUS), \ + SR(DOMAIN1_PG_STATUS), \ + SR(DOMAIN2_PG_STATUS), \ + SR(DOMAIN3_PG_STATUS), \ + SR(DOMAIN16_PG_STATUS), \ + SR(DOMAIN17_PG_STATUS), \ + SR(DOMAIN18_PG_STATUS), \ + SR(D1VGA_CONTROL), \ + SR(D2VGA_CONTROL), \ + SR(D3VGA_CONTROL), \ + SR(D4VGA_CONTROL), \ + SR(D5VGA_CONTROL), \ + SR(D6VGA_CONTROL), \ + SR(DC_IP_REQUEST_CNTL), \ + SR(AZALIA_AUDIO_DTO), \ + SR(AZALIA_CONTROLLER_CLOCK_GATING), \ + SR(HPO_TOP_HW_CONTROL) + +static const struct dce_hwseq_registers hwseq_reg = { + HWSEQ_DCN31_REG_LIST() +}; + +#define HWSEQ_DCN31_MASK_SH_LIST(mask_sh)\ + HWSEQ_DCN_MASK_SH_LIST(mask_sh), \ + HWS_SF(, DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_REFDIV, mask_sh), \ + HWS_SF(, DCHUBBUB_ARB_HOSTVM_CNTL, DISABLE_HOSTVM_FORCE_ALLOW_PSTATE, mask_sh), \ + HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN1_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN1_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN2_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN2_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN3_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN3_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN16_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN16_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN17_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN17_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN18_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN18_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN0_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN1_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN2_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN3_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN16_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN17_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN18_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DC_IP_REQUEST_CNTL, IP_REQUEST_EN, mask_sh), \ + HWS_SF(, AZALIA_AUDIO_DTO, AZALIA_AUDIO_DTO_MODULE, mask_sh), \ + HWS_SF(, HPO_TOP_CLOCK_CONTROL, HPO_HDMISTREAMCLK_G_GATE_DIS, mask_sh), \ + HWS_SF(, DMU_MEM_PWR_CNTL, DMCU_ERAM_MEM_PWR_FORCE, mask_sh), \ + HWS_SF(, ODM_MEM_PWR_CTRL3, ODM_MEM_UNASSIGNED_PWR_MODE, mask_sh), \ + HWS_SF(, ODM_MEM_PWR_CTRL3, ODM_MEM_VBLANK_PWR_MODE, mask_sh), \ + HWS_SF(, MMHUBBUB_MEM_PWR_CNTL, VGA_MEM_PWR_FORCE, mask_sh), \ + HWS_SF(, DIO_MEM_PWR_CTRL, I2C_LIGHT_SLEEP_FORCE, mask_sh), \ + HWS_SF(, HPO_TOP_HW_CONTROL, HPO_IO_EN, mask_sh) + +static const struct dce_hwseq_shift hwseq_shift = { + HWSEQ_DCN31_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_hwseq_mask hwseq_mask = { + HWSEQ_DCN31_MASK_SH_LIST(_MASK) +}; +#define vmid_regs(id)\ +[id] = {\ + DCN20_VMID_REG_LIST(id)\ +} + +static const struct dcn_vmid_registers vmid_regs[] = { + vmid_regs(0), + vmid_regs(1), + vmid_regs(2), + vmid_regs(3), + vmid_regs(4), + vmid_regs(5), + vmid_regs(6), + vmid_regs(7), + vmid_regs(8), + vmid_regs(9), + vmid_regs(10), + vmid_regs(11), + vmid_regs(12), + vmid_regs(13), + vmid_regs(14), + vmid_regs(15) +}; + +static const struct dcn20_vmid_shift vmid_shifts = { + DCN20_VMID_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn20_vmid_mask vmid_masks = { + DCN20_VMID_MASK_SH_LIST(_MASK) +}; + +static const struct resource_caps res_cap_dcn31 = { + .num_timing_generator = 4, + .num_opp = 4, + .num_video_plane = 4, + .num_audio = 5, + .num_stream_encoder = 5, + .num_dig_link_enc = 5, + .num_hpo_dp_stream_encoder = 4, + .num_hpo_dp_link_encoder = 2, + .num_pll = 5, + .num_dwb = 1, + .num_ddc = 5, + .num_vmid = 16, + .num_mpc_3dlut = 2, + .num_dsc = 3, +}; + +static const struct dc_plane_cap plane_cap = { + .type = DC_PLANE_TYPE_DCN_UNIVERSAL, + .per_pixel_alpha = true, + + .pixel_format_support = { + .argb8888 = true, + .nv12 = true, + .fp16 = true, + .p010 = true, + .ayuv = false, + }, + + .max_upscale_factor = { + .argb8888 = 16000, + .nv12 = 16000, + .fp16 = 16000 + }, + + // 6:1 downscaling ratio: 1000/6 = 166.666 + .max_downscale_factor = { + .argb8888 = 167, + .nv12 = 167, + .fp16 = 167 + }, + 64, + 64 +}; + +static const struct dc_debug_options debug_defaults_drv = { + .disable_z10 = true, /*hw not support it*/ + .disable_dmcu = true, + .force_abm_enable = false, + .timing_trace = false, + .clock_trace = true, + .disable_pplib_clock_request = false, + .pipe_split_policy = MPC_SPLIT_DYNAMIC, + .force_single_disp_pipe_split = false, + .disable_dcc = DCC_ENABLE, + .vsr_support = true, + .performance_trace = false, + .max_downscale_src_width = 4096,/*upto true 4k*/ + .disable_pplib_wm_range = false, + .scl_reset_length10 = true, + .sanity_checks = false, + .underflow_assert_delay_us = 0xFFFFFFFF, + .dwb_fi_phase = -1, // -1 = disable, + .dmub_command_table = true, + .pstate_enabled = true, + .use_max_lb = true, + .enable_mem_low_power = { + .bits = { + .vga = true, + .i2c = true, + .dmcu = false, // This is previously known to cause hang on S3 cycles if enabled + .dscl = true, + .cm = true, + .mpc = true, + .optc = true, + .vpg = true, + .afmt = true, + } + }, + .enable_legacy_fast_update = true, + .psr_power_use_phy_fsm = 0, + .using_dml2 = false, +}; + +static const struct dc_panel_config panel_config_defaults = { + .psr = { + .disable_psr = false, + .disallow_psrsu = false, + .disallow_replay = false, + }, + .ilr = { + .optimize_edp_link_rate = true, + }, +}; + +static void dcn31_dpp_destroy(struct dpp **dpp) +{ + kfree(TO_DCN20_DPP(*dpp)); + *dpp = NULL; +} + +static struct dpp *dcn31_dpp_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn3_dpp *dpp = + kzalloc(sizeof(struct dcn3_dpp), GFP_KERNEL); + + if (!dpp) + return NULL; + + if (dpp3_construct(dpp, ctx, inst, + &dpp_regs[inst], &tf_shift, &tf_mask)) + return &dpp->base; + + BREAK_TO_DEBUGGER(); + kfree(dpp); + return NULL; +} + +static struct output_pixel_processor *dcn31_opp_create( + struct dc_context *ctx, uint32_t inst) +{ + struct dcn20_opp *opp = + kzalloc(sizeof(struct dcn20_opp), GFP_KERNEL); + + if (!opp) { + BREAK_TO_DEBUGGER(); + return NULL; + } + + dcn20_opp_construct(opp, ctx, inst, + &opp_regs[inst], &opp_shift, &opp_mask); + return &opp->base; +} + +static struct dce_aux *dcn31_aux_engine_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct aux_engine_dce110 *aux_engine = + kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL); + + if (!aux_engine) + return NULL; + + dce110_aux_engine_construct(aux_engine, ctx, inst, + SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, + &aux_engine_regs[inst], + &aux_mask, + &aux_shift, + ctx->dc->caps.extended_aux_timeout_support); + + return &aux_engine->base; +} +#define i2c_inst_regs(id) { I2C_HW_ENGINE_COMMON_REG_LIST_DCN30(id) } + +static const struct dce_i2c_registers i2c_hw_regs[] = { + i2c_inst_regs(1), + i2c_inst_regs(2), + i2c_inst_regs(3), + i2c_inst_regs(4), + i2c_inst_regs(5), +}; + +static const struct dce_i2c_shift i2c_shifts = { + I2C_COMMON_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dce_i2c_mask i2c_masks = { + I2C_COMMON_MASK_SH_LIST_DCN30(_MASK) +}; + +static struct dce_i2c_hw *dcn31_i2c_hw_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dce_i2c_hw *dce_i2c_hw = + kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL); + + if (!dce_i2c_hw) + return NULL; + + dcn2_i2c_hw_construct(dce_i2c_hw, ctx, inst, + &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks); + + return dce_i2c_hw; +} +static struct mpc *dcn31_mpc_create( + struct dc_context *ctx, + int num_mpcc, + int num_rmu) +{ + struct dcn30_mpc *mpc30 = kzalloc(sizeof(struct dcn30_mpc), + GFP_KERNEL); + + if (!mpc30) + return NULL; + + dcn30_mpc_construct(mpc30, ctx, + &mpc_regs, + &mpc_shift, + &mpc_mask, + num_mpcc, + num_rmu); + + return &mpc30->base; +} + +static struct hubbub *dcn31_hubbub_create(struct dc_context *ctx) +{ + int i; + + struct dcn20_hubbub *hubbub3 = kzalloc(sizeof(struct dcn20_hubbub), + GFP_KERNEL); + + if (!hubbub3) + return NULL; + + hubbub31_construct(hubbub3, ctx, + &hubbub_reg, + &hubbub_shift, + &hubbub_mask, + dcn3_15_ip.det_buffer_size_kbytes, + dcn3_15_ip.pixel_chunk_size_kbytes, + dcn3_15_ip.config_return_buffer_size_in_kbytes); + + + for (i = 0; i < res_cap_dcn31.num_vmid; i++) { + struct dcn20_vmid *vmid = &hubbub3->vmid[i]; + + vmid->ctx = ctx; + + vmid->regs = &vmid_regs[i]; + vmid->shifts = &vmid_shifts; + vmid->masks = &vmid_masks; + } + + return &hubbub3->base; +} + +static struct timing_generator *dcn31_timing_generator_create( + struct dc_context *ctx, + uint32_t instance) +{ + struct optc *tgn10 = + kzalloc(sizeof(struct optc), GFP_KERNEL); + + if (!tgn10) + return NULL; + + tgn10->base.inst = instance; + tgn10->base.ctx = ctx; + + tgn10->tg_regs = &optc_regs[instance]; + tgn10->tg_shift = &optc_shift; + tgn10->tg_mask = &optc_mask; + + dcn31_timing_generator_init(tgn10); + + return &tgn10->base; +} + +static const struct encoder_feature_support link_enc_feature = { + .max_hdmi_deep_color = COLOR_DEPTH_121212, + .max_hdmi_pixel_clock = 600000, + .hdmi_ycbcr420_supported = true, + .dp_ycbcr420_supported = true, + .fec_supported = true, + .flags.bits.IS_HBR2_CAPABLE = true, + .flags.bits.IS_HBR3_CAPABLE = true, + .flags.bits.IS_TPS3_CAPABLE = true, + .flags.bits.IS_TPS4_CAPABLE = true +}; + +static struct link_encoder *dcn31_link_encoder_create( + struct dc_context *ctx, + const struct encoder_init_data *enc_init_data) +{ + struct dcn20_link_encoder *enc20 = + kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL); + + if (!enc20) + return NULL; + + dcn31_link_encoder_construct(enc20, + enc_init_data, + &link_enc_feature, + &link_enc_regs[enc_init_data->transmitter], + &link_enc_aux_regs[enc_init_data->channel - 1], + &link_enc_hpd_regs[enc_init_data->hpd_source], + &le_shift, + &le_mask); + + return &enc20->enc10.base; +} + +/* Create a minimal link encoder object not associated with a particular + * physical connector. + * resource_funcs.link_enc_create_minimal + */ +static struct link_encoder *dcn31_link_enc_create_minimal( + struct dc_context *ctx, enum engine_id eng_id) +{ + struct dcn20_link_encoder *enc20; + + if ((eng_id - ENGINE_ID_DIGA) > ctx->dc->res_pool->res_cap->num_dig_link_enc) + return NULL; + + enc20 = kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL); + if (!enc20) + return NULL; + + dcn31_link_encoder_construct_minimal( + enc20, + ctx, + &link_enc_feature, + &link_enc_regs[eng_id - ENGINE_ID_DIGA], + eng_id); + + return &enc20->enc10.base; +} + +static struct panel_cntl *dcn31_panel_cntl_create(const struct panel_cntl_init_data *init_data) +{ + struct dcn31_panel_cntl *panel_cntl = + kzalloc(sizeof(struct dcn31_panel_cntl), GFP_KERNEL); + + if (!panel_cntl) + return NULL; + + dcn31_panel_cntl_construct(panel_cntl, init_data); + + return &panel_cntl->base; +} + +static void read_dce_straps( + struct dc_context *ctx, + struct resource_straps *straps) +{ + generic_reg_get(ctx, regDC_PINSTRAPS + BASE(regDC_PINSTRAPS_BASE_IDX), + FN(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO), &straps->dc_pinstraps_audio); + +} + +static struct audio *dcn31_create_audio( + struct dc_context *ctx, unsigned int inst) +{ + return dce_audio_create(ctx, inst, + &audio_regs[inst], &audio_shift, &audio_mask); +} + +static struct vpg *dcn31_vpg_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn31_vpg *vpg31 = kzalloc(sizeof(struct dcn31_vpg), GFP_KERNEL); + + if (!vpg31) + return NULL; + + vpg31_construct(vpg31, ctx, inst, + &vpg_regs[inst], + &vpg_shift, + &vpg_mask); + + return &vpg31->base; +} + +static struct afmt *dcn31_afmt_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn31_afmt *afmt31 = kzalloc(sizeof(struct dcn31_afmt), GFP_KERNEL); + + if (!afmt31) + return NULL; + + afmt31_construct(afmt31, ctx, inst, + &afmt_regs[inst], + &afmt_shift, + &afmt_mask); + + // Light sleep by default, no need to power down here + + return &afmt31->base; +} + +static struct apg *dcn31_apg_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn31_apg *apg31 = kzalloc(sizeof(struct dcn31_apg), GFP_KERNEL); + + if (!apg31) + return NULL; + + apg31_construct(apg31, ctx, inst, + &apg_regs[inst], + &apg_shift, + &apg_mask); + + return &apg31->base; +} + +static struct stream_encoder *dcn315_stream_encoder_create( + enum engine_id eng_id, + struct dc_context *ctx) +{ + struct dcn10_stream_encoder *enc1; + struct vpg *vpg; + struct afmt *afmt; + int vpg_inst; + int afmt_inst; + + /*PHYB is wired off in HW, allow front end to remapping, otherwise needs more changes*/ + + /* Mapping of VPG, AFMT, DME register blocks to DIO block instance */ + if (eng_id <= ENGINE_ID_DIGF) { + vpg_inst = eng_id; + afmt_inst = eng_id; + } else + return NULL; + + enc1 = kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL); + vpg = dcn31_vpg_create(ctx, vpg_inst); + afmt = dcn31_afmt_create(ctx, afmt_inst); + + if (!enc1 || !vpg || !afmt) { + kfree(enc1); + kfree(vpg); + kfree(afmt); + return NULL; + } + + dcn30_dio_stream_encoder_construct(enc1, ctx, ctx->dc_bios, + eng_id, vpg, afmt, + &stream_enc_regs[eng_id], + &se_shift, &se_mask); + + return &enc1->base; +} + +static struct hpo_dp_stream_encoder *dcn31_hpo_dp_stream_encoder_create( + enum engine_id eng_id, + struct dc_context *ctx) +{ + struct dcn31_hpo_dp_stream_encoder *hpo_dp_enc31; + struct vpg *vpg; + struct apg *apg; + uint32_t hpo_dp_inst; + uint32_t vpg_inst; + uint32_t apg_inst; + + ASSERT((eng_id >= ENGINE_ID_HPO_DP_0) && (eng_id <= ENGINE_ID_HPO_DP_3)); + hpo_dp_inst = eng_id - ENGINE_ID_HPO_DP_0; + + /* Mapping of VPG register blocks to HPO DP block instance: + * VPG[6] -> HPO_DP[0] + * VPG[7] -> HPO_DP[1] + * VPG[8] -> HPO_DP[2] + * VPG[9] -> HPO_DP[3] + */ + vpg_inst = hpo_dp_inst + 6; + + /* Mapping of APG register blocks to HPO DP block instance: + * APG[0] -> HPO_DP[0] + * APG[1] -> HPO_DP[1] + * APG[2] -> HPO_DP[2] + * APG[3] -> HPO_DP[3] + */ + apg_inst = hpo_dp_inst; + + /* allocate HPO stream encoder and create VPG sub-block */ + hpo_dp_enc31 = kzalloc(sizeof(struct dcn31_hpo_dp_stream_encoder), GFP_KERNEL); + vpg = dcn31_vpg_create(ctx, vpg_inst); + apg = dcn31_apg_create(ctx, apg_inst); + + if (!hpo_dp_enc31 || !vpg || !apg) { + kfree(hpo_dp_enc31); + kfree(vpg); + kfree(apg); + return NULL; + } + + dcn31_hpo_dp_stream_encoder_construct(hpo_dp_enc31, ctx, ctx->dc_bios, + hpo_dp_inst, eng_id, vpg, apg, + &hpo_dp_stream_enc_regs[hpo_dp_inst], + &hpo_dp_se_shift, &hpo_dp_se_mask); + + return &hpo_dp_enc31->base; +} + +static struct hpo_dp_link_encoder *dcn31_hpo_dp_link_encoder_create( + uint8_t inst, + struct dc_context *ctx) +{ + struct dcn31_hpo_dp_link_encoder *hpo_dp_enc31; + + /* allocate HPO link encoder */ + hpo_dp_enc31 = kzalloc(sizeof(struct dcn31_hpo_dp_link_encoder), GFP_KERNEL); + + hpo_dp_link_encoder31_construct(hpo_dp_enc31, ctx, inst, + &hpo_dp_link_enc_regs[inst], + &hpo_dp_le_shift, &hpo_dp_le_mask); + + return &hpo_dp_enc31->base; +} + +static struct dce_hwseq *dcn31_hwseq_create( + struct dc_context *ctx) +{ + struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL); + + if (hws) { + hws->ctx = ctx; + hws->regs = &hwseq_reg; + hws->shifts = &hwseq_shift; + hws->masks = &hwseq_mask; + } + return hws; +} +static const struct resource_create_funcs res_create_funcs = { + .read_dce_straps = read_dce_straps, + .create_audio = dcn31_create_audio, + .create_stream_encoder = dcn315_stream_encoder_create, + .create_hpo_dp_stream_encoder = dcn31_hpo_dp_stream_encoder_create, + .create_hpo_dp_link_encoder = dcn31_hpo_dp_link_encoder_create, + .create_hwseq = dcn31_hwseq_create, +}; + +static void dcn315_resource_destruct(struct dcn315_resource_pool *pool) +{ + unsigned int i; + + for (i = 0; i < pool->base.stream_enc_count; i++) { + if (pool->base.stream_enc[i] != NULL) { + if (pool->base.stream_enc[i]->vpg != NULL) { + kfree(DCN30_VPG_FROM_VPG(pool->base.stream_enc[i]->vpg)); + pool->base.stream_enc[i]->vpg = NULL; + } + if (pool->base.stream_enc[i]->afmt != NULL) { + kfree(DCN30_AFMT_FROM_AFMT(pool->base.stream_enc[i]->afmt)); + pool->base.stream_enc[i]->afmt = NULL; + } + kfree(DCN10STRENC_FROM_STRENC(pool->base.stream_enc[i])); + pool->base.stream_enc[i] = NULL; + } + } + + for (i = 0; i < pool->base.hpo_dp_stream_enc_count; i++) { + if (pool->base.hpo_dp_stream_enc[i] != NULL) { + if (pool->base.hpo_dp_stream_enc[i]->vpg != NULL) { + kfree(DCN30_VPG_FROM_VPG(pool->base.hpo_dp_stream_enc[i]->vpg)); + pool->base.hpo_dp_stream_enc[i]->vpg = NULL; + } + if (pool->base.hpo_dp_stream_enc[i]->apg != NULL) { + kfree(DCN31_APG_FROM_APG(pool->base.hpo_dp_stream_enc[i]->apg)); + pool->base.hpo_dp_stream_enc[i]->apg = NULL; + } + kfree(DCN3_1_HPO_DP_STREAM_ENC_FROM_HPO_STREAM_ENC(pool->base.hpo_dp_stream_enc[i])); + pool->base.hpo_dp_stream_enc[i] = NULL; + } + } + + for (i = 0; i < pool->base.hpo_dp_link_enc_count; i++) { + if (pool->base.hpo_dp_link_enc[i] != NULL) { + kfree(DCN3_1_HPO_DP_LINK_ENC_FROM_HPO_LINK_ENC(pool->base.hpo_dp_link_enc[i])); + pool->base.hpo_dp_link_enc[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_dsc; i++) { + if (pool->base.dscs[i] != NULL) + dcn20_dsc_destroy(&pool->base.dscs[i]); + } + + if (pool->base.mpc != NULL) { + kfree(TO_DCN20_MPC(pool->base.mpc)); + pool->base.mpc = NULL; + } + if (pool->base.hubbub != NULL) { + kfree(pool->base.hubbub); + pool->base.hubbub = NULL; + } + for (i = 0; i < pool->base.pipe_count; i++) { + if (pool->base.dpps[i] != NULL) + dcn31_dpp_destroy(&pool->base.dpps[i]); + + if (pool->base.ipps[i] != NULL) + pool->base.ipps[i]->funcs->ipp_destroy(&pool->base.ipps[i]); + + if (pool->base.hubps[i] != NULL) { + kfree(TO_DCN20_HUBP(pool->base.hubps[i])); + pool->base.hubps[i] = NULL; + } + + if (pool->base.irqs != NULL) { + dal_irq_service_destroy(&pool->base.irqs); + } + } + + for (i = 0; i < pool->base.res_cap->num_ddc; i++) { + if (pool->base.engines[i] != NULL) + dce110_engine_destroy(&pool->base.engines[i]); + if (pool->base.hw_i2cs[i] != NULL) { + kfree(pool->base.hw_i2cs[i]); + pool->base.hw_i2cs[i] = NULL; + } + if (pool->base.sw_i2cs[i] != NULL) { + kfree(pool->base.sw_i2cs[i]); + pool->base.sw_i2cs[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_opp; i++) { + if (pool->base.opps[i] != NULL) + pool->base.opps[i]->funcs->opp_destroy(&pool->base.opps[i]); + } + + for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { + if (pool->base.timing_generators[i] != NULL) { + kfree(DCN10TG_FROM_TG(pool->base.timing_generators[i])); + pool->base.timing_generators[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_dwb; i++) { + if (pool->base.dwbc[i] != NULL) { + kfree(TO_DCN30_DWBC(pool->base.dwbc[i])); + pool->base.dwbc[i] = NULL; + } + if (pool->base.mcif_wb[i] != NULL) { + kfree(TO_DCN30_MMHUBBUB(pool->base.mcif_wb[i])); + pool->base.mcif_wb[i] = NULL; + } + } + + for (i = 0; i < pool->base.audio_count; i++) { + if (pool->base.audios[i]) + dce_aud_destroy(&pool->base.audios[i]); + } + + for (i = 0; i < pool->base.clk_src_count; i++) { + if (pool->base.clock_sources[i] != NULL) { + dcn20_clock_source_destroy(&pool->base.clock_sources[i]); + pool->base.clock_sources[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_mpc_3dlut; i++) { + if (pool->base.mpc_lut[i] != NULL) { + dc_3dlut_func_release(pool->base.mpc_lut[i]); + pool->base.mpc_lut[i] = NULL; + } + if (pool->base.mpc_shaper[i] != NULL) { + dc_transfer_func_release(pool->base.mpc_shaper[i]); + pool->base.mpc_shaper[i] = NULL; + } + } + + if (pool->base.dp_clock_source != NULL) { + dcn20_clock_source_destroy(&pool->base.dp_clock_source); + pool->base.dp_clock_source = NULL; + } + + for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { + if (pool->base.multiple_abms[i] != NULL) + dce_abm_destroy(&pool->base.multiple_abms[i]); + } + + if (pool->base.psr != NULL) + dmub_psr_destroy(&pool->base.psr); + + if (pool->base.dccg != NULL) + dcn_dccg_destroy(&pool->base.dccg); +} + +static struct hubp *dcn31_hubp_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn20_hubp *hubp2 = + kzalloc(sizeof(struct dcn20_hubp), GFP_KERNEL); + + if (!hubp2) + return NULL; + + if (hubp31_construct(hubp2, ctx, inst, + &hubp_regs[inst], &hubp_shift, &hubp_mask)) + return &hubp2->base; + + BREAK_TO_DEBUGGER(); + kfree(hubp2); + return NULL; +} + +static bool dcn31_dwbc_create(struct dc_context *ctx, struct resource_pool *pool) +{ + int i; + uint32_t pipe_count = pool->res_cap->num_dwb; + + for (i = 0; i < pipe_count; i++) { + struct dcn30_dwbc *dwbc30 = kzalloc(sizeof(struct dcn30_dwbc), + GFP_KERNEL); + + if (!dwbc30) { + dm_error("DC: failed to create dwbc30!\n"); + return false; + } + + dcn30_dwbc_construct(dwbc30, ctx, + &dwbc30_regs[i], + &dwbc30_shift, + &dwbc30_mask, + i); + + pool->dwbc[i] = &dwbc30->base; + } + return true; +} + +static bool dcn31_mmhubbub_create(struct dc_context *ctx, struct resource_pool *pool) +{ + int i; + uint32_t pipe_count = pool->res_cap->num_dwb; + + for (i = 0; i < pipe_count; i++) { + struct dcn30_mmhubbub *mcif_wb30 = kzalloc(sizeof(struct dcn30_mmhubbub), + GFP_KERNEL); + + if (!mcif_wb30) { + dm_error("DC: failed to create mcif_wb30!\n"); + return false; + } + + dcn30_mmhubbub_construct(mcif_wb30, ctx, + &mcif_wb30_regs[i], + &mcif_wb30_shift, + &mcif_wb30_mask, + i); + + pool->mcif_wb[i] = &mcif_wb30->base; + } + return true; +} + +static struct display_stream_compressor *dcn31_dsc_create( + struct dc_context *ctx, uint32_t inst) +{ + struct dcn20_dsc *dsc = + kzalloc(sizeof(struct dcn20_dsc), GFP_KERNEL); + + if (!dsc) { + BREAK_TO_DEBUGGER(); + return NULL; + } + + dsc2_construct(dsc, ctx, inst, &dsc_regs[inst], &dsc_shift, &dsc_mask); + return &dsc->base; +} + +static void dcn315_destroy_resource_pool(struct resource_pool **pool) +{ + struct dcn315_resource_pool *dcn31_pool = TO_DCN315_RES_POOL(*pool); + + dcn315_resource_destruct(dcn31_pool); + kfree(dcn31_pool); + *pool = NULL; +} + +static struct clock_source *dcn31_clock_source_create( + struct dc_context *ctx, + struct dc_bios *bios, + enum clock_source_id id, + const struct dce110_clk_src_regs *regs, + bool dp_clk_src) +{ + struct dce110_clk_src *clk_src = + kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL); + + if (!clk_src) + return NULL; + + if (dcn31_clk_src_construct(clk_src, ctx, bios, id, + regs, &cs_shift, &cs_mask)) { + clk_src->base.dp_clk_src = dp_clk_src; + return &clk_src->base; + } + + kfree(clk_src); + BREAK_TO_DEBUGGER(); + return NULL; +} + +static bool is_dual_plane(enum surface_pixel_format format) +{ + return format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA; +} + +static int source_format_to_bpp (enum source_format_class SourcePixelFormat) +{ + if (SourcePixelFormat == dm_444_64) + return 8; + else if (SourcePixelFormat == dm_444_16) + return 2; + else if (SourcePixelFormat == dm_444_8) + return 1; + else if (SourcePixelFormat == dm_rgbe_alpha) + return 5; + else if (SourcePixelFormat == dm_420_8) + return 3; + else if (SourcePixelFormat == dm_420_12) + return 6; + else + return 4; +} + +static bool allow_pixel_rate_crb(struct dc *dc, struct dc_state *context) +{ + int i; + struct resource_context *res_ctx = &context->res_ctx; + + /*Don't apply for single stream*/ + if (context->stream_count < 2) + return false; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + if (!res_ctx->pipe_ctx[i].stream) + continue; + + /*Don't apply if scaling*/ + if (res_ctx->pipe_ctx[i].stream->src.width != res_ctx->pipe_ctx[i].stream->dst.width || + res_ctx->pipe_ctx[i].stream->src.height != res_ctx->pipe_ctx[i].stream->dst.height || + (res_ctx->pipe_ctx[i].plane_state && (res_ctx->pipe_ctx[i].plane_state->src_rect.width + != res_ctx->pipe_ctx[i].plane_state->dst_rect.width || + res_ctx->pipe_ctx[i].plane_state->src_rect.height + != res_ctx->pipe_ctx[i].plane_state->dst_rect.height))) + return false; + /*Don't apply if MPO to avoid transition issues*/ + if (res_ctx->pipe_ctx[i].top_pipe && res_ctx->pipe_ctx[i].top_pipe->plane_state != res_ctx->pipe_ctx[i].plane_state) + return false; + } + return true; +} + +static int dcn315_populate_dml_pipes_from_context( + struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + bool fast_validate) +{ + int i, pipe_cnt, crb_idx, crb_pipes; + struct resource_context *res_ctx = &context->res_ctx; + struct pipe_ctx *pipe = NULL; + const int max_usable_det = context->bw_ctx.dml.ip.config_return_buffer_size_in_kbytes - DCN3_15_MIN_COMPBUF_SIZE_KB; + int remaining_det_segs = max_usable_det / DCN3_15_CRB_SEGMENT_SIZE_KB; + bool pixel_rate_crb = allow_pixel_rate_crb(dc, context); + + DC_FP_START(); + dcn31x_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); + DC_FP_END(); + + for (i = 0, pipe_cnt = 0, crb_pipes = 0; i < dc->res_pool->pipe_count; i++) { + struct dc_crtc_timing *timing; + + if (!res_ctx->pipe_ctx[i].stream) + continue; + pipe = &res_ctx->pipe_ctx[i]; + timing = &pipe->stream->timing; + + /* + * Immediate flip can be set dynamically after enabling the plane. + * We need to require support for immediate flip or underflow can be + * intermittently experienced depending on peak b/w requirements. + */ + pipes[pipe_cnt].pipe.src.immediate_flip = true; + + pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; + pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch; + pipes[pipe_cnt].pipe.src.dcc_rate = 3; + pipes[pipe_cnt].dout.dsc_input_bpc = 0; + DC_FP_START(); + dcn31_zero_pipe_dcc_fraction(pipes, pipe_cnt); + if (pixel_rate_crb && !pipe->top_pipe && !pipe->prev_odm_pipe) { + int bpp = source_format_to_bpp(pipes[pipe_cnt].pipe.src.source_format); + /* Ceil to crb segment size */ + int approx_det_segs_required_for_pstate = dcn_get_approx_det_segs_required_for_pstate( + &context->bw_ctx.dml.soc, timing->pix_clk_100hz, bpp, DCN3_15_CRB_SEGMENT_SIZE_KB); + + if (approx_det_segs_required_for_pstate <= 2 * DCN3_15_MAX_DET_SEGS) { + bool split_required = approx_det_segs_required_for_pstate > DCN3_15_MAX_DET_SEGS; + split_required = split_required || timing->pix_clk_100hz >= dcn_get_max_non_odm_pix_rate_100hz(&dc->dml.soc); + split_required = split_required || (pipe->plane_state && pipe->plane_state->src_rect.width > 5120); + + /* Minimum 2 segments to allow mpc/odm combine if its used later */ + if (approx_det_segs_required_for_pstate < 2) + approx_det_segs_required_for_pstate = 2; + if (split_required) + approx_det_segs_required_for_pstate += approx_det_segs_required_for_pstate % 2; + pipes[pipe_cnt].pipe.src.det_size_override = approx_det_segs_required_for_pstate; + remaining_det_segs -= approx_det_segs_required_for_pstate; + } else + remaining_det_segs = -1; + crb_pipes++; + } + DC_FP_END(); + + if (pipes[pipe_cnt].dout.dsc_enable) { + switch (timing->display_color_depth) { + case COLOR_DEPTH_888: + pipes[pipe_cnt].dout.dsc_input_bpc = 8; + break; + case COLOR_DEPTH_101010: + pipes[pipe_cnt].dout.dsc_input_bpc = 10; + break; + case COLOR_DEPTH_121212: + pipes[pipe_cnt].dout.dsc_input_bpc = 12; + break; + default: + ASSERT(0); + break; + } + } + pipe_cnt++; + } + + /* Spread remaining unreserved crb evenly among all pipes*/ + if (pixel_rate_crb) { + for (i = 0, pipe_cnt = 0, crb_idx = 0; i < dc->res_pool->pipe_count; i++) { + pipe = &res_ctx->pipe_ctx[i]; + if (!pipe->stream) + continue; + + /* Do not use asymetric crb if not enough for pstate support */ + if (remaining_det_segs < 0) { + pipes[pipe_cnt].pipe.src.det_size_override = 0; + pipe_cnt++; + continue; + } + + if (!pipe->top_pipe && !pipe->prev_odm_pipe) { + bool split_required = pipe->stream->timing.pix_clk_100hz >= dcn_get_max_non_odm_pix_rate_100hz(&dc->dml.soc) + || (pipe->plane_state && pipe->plane_state->src_rect.width > 5120); + + if (remaining_det_segs > MIN_RESERVED_DET_SEGS) + pipes[pipe_cnt].pipe.src.det_size_override += (remaining_det_segs - MIN_RESERVED_DET_SEGS) / crb_pipes + + (crb_idx < (remaining_det_segs - MIN_RESERVED_DET_SEGS) % crb_pipes ? 1 : 0); + if (pipes[pipe_cnt].pipe.src.det_size_override > 2 * DCN3_15_MAX_DET_SEGS) { + /* Clamp to 2 pipe split max det segments */ + remaining_det_segs += pipes[pipe_cnt].pipe.src.det_size_override - 2 * (DCN3_15_MAX_DET_SEGS); + pipes[pipe_cnt].pipe.src.det_size_override = 2 * DCN3_15_MAX_DET_SEGS; + } + if (pipes[pipe_cnt].pipe.src.det_size_override > DCN3_15_MAX_DET_SEGS || split_required) { + /* If we are splitting we must have an even number of segments */ + remaining_det_segs += pipes[pipe_cnt].pipe.src.det_size_override % 2; + pipes[pipe_cnt].pipe.src.det_size_override -= pipes[pipe_cnt].pipe.src.det_size_override % 2; + } + /* Convert segments into size for DML use */ + pipes[pipe_cnt].pipe.src.det_size_override *= DCN3_15_CRB_SEGMENT_SIZE_KB; + + crb_idx++; + } + pipe_cnt++; + } + } + + if (pipe_cnt) + context->bw_ctx.dml.ip.det_buffer_size_kbytes = + (max_usable_det / DCN3_15_CRB_SEGMENT_SIZE_KB / pipe_cnt) * DCN3_15_CRB_SEGMENT_SIZE_KB; + if (context->bw_ctx.dml.ip.det_buffer_size_kbytes > DCN3_15_MAX_DET_SIZE) + context->bw_ctx.dml.ip.det_buffer_size_kbytes = DCN3_15_MAX_DET_SIZE; + + dc->config.enable_4to1MPC = false; + if (pipe_cnt == 1 && pipe->plane_state && !dc->debug.disable_z9_mpc) { + if (is_dual_plane(pipe->plane_state->format) + && pipe->plane_state->src_rect.width <= 1920 && pipe->plane_state->src_rect.height <= 1080) { + dc->config.enable_4to1MPC = true; + context->bw_ctx.dml.ip.det_buffer_size_kbytes = + (max_usable_det / DCN3_15_CRB_SEGMENT_SIZE_KB / 4) * DCN3_15_CRB_SEGMENT_SIZE_KB; + } else if (!is_dual_plane(pipe->plane_state->format) + && pipe->plane_state->src_rect.width <= 5120 + && pipe->stream->timing.pix_clk_100hz < dcn_get_max_non_odm_pix_rate_100hz(&dc->dml.soc)) { + /* Limit to 5k max to avoid forced pipe split when there is not enough detile for swath */ + context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192; + pipes[0].pipe.src.unbounded_req_mode = true; + } + } + + return pipe_cnt; +} + +static void dcn315_get_panel_config_defaults(struct dc_panel_config *panel_config) +{ + *panel_config = panel_config_defaults; +} + +static struct dc_cap_funcs cap_funcs = { + .get_dcc_compression_cap = dcn20_get_dcc_compression_cap +}; + +static struct resource_funcs dcn315_res_pool_funcs = { + .destroy = dcn315_destroy_resource_pool, + .link_enc_create = dcn31_link_encoder_create, + .link_enc_create_minimal = dcn31_link_enc_create_minimal, + .link_encs_assign = link_enc_cfg_link_encs_assign, + .link_enc_unassign = link_enc_cfg_link_enc_unassign, + .panel_cntl_create = dcn31_panel_cntl_create, + .validate_bandwidth = dcn31_validate_bandwidth, + .calculate_wm_and_dlg = dcn31_calculate_wm_and_dlg, + .update_soc_for_wm_a = dcn315_update_soc_for_wm_a, + .populate_dml_pipes = dcn315_populate_dml_pipes_from_context, + .acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer, + .release_pipe = dcn20_release_pipe, + .add_stream_to_ctx = dcn30_add_stream_to_ctx, + .add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource, + .remove_stream_from_ctx = dcn20_remove_stream_from_ctx, + .populate_dml_writeback_from_context = dcn31_populate_dml_writeback_from_context, + .set_mcif_arb_params = dcn31_set_mcif_arb_params, + .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link, + .acquire_post_bldn_3dlut = dcn30_acquire_post_bldn_3dlut, + .release_post_bldn_3dlut = dcn30_release_post_bldn_3dlut, + .update_bw_bounding_box = dcn315_update_bw_bounding_box, + .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, + .get_panel_config_defaults = dcn315_get_panel_config_defaults, +}; + +static bool dcn315_resource_construct( + uint8_t num_virtual_links, + struct dc *dc, + struct dcn315_resource_pool *pool) +{ + int i; + struct dc_context *ctx = dc->ctx; + struct irq_service_init_data init_data; + + ctx->dc_bios->regs = &bios_regs; + + pool->base.res_cap = &res_cap_dcn31; + + pool->base.funcs = &dcn315_res_pool_funcs; + + /************************************************* + * Resource + asic cap harcoding * + *************************************************/ + pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE; + pool->base.pipe_count = pool->base.res_cap->num_timing_generator; + pool->base.mpcc_count = pool->base.res_cap->num_timing_generator; + dc->caps.max_downscale_ratio = 600; + dc->caps.i2c_speed_in_khz = 100; + dc->caps.i2c_speed_in_khz_hdcp = 100; + dc->caps.max_cursor_size = 256; + dc->caps.min_horizontal_blanking_period = 80; + dc->caps.dmdata_alloc_size = 2048; + dc->caps.max_slave_planes = 2; + dc->caps.max_slave_yuv_planes = 2; + dc->caps.max_slave_rgb_planes = 2; + dc->caps.post_blend_color_processing = true; + dc->caps.force_dp_tps4_for_cp2520 = true; + if (dc->config.forceHBR2CP2520) + dc->caps.force_dp_tps4_for_cp2520 = false; + dc->caps.dp_hpo = true; + dc->caps.dp_hdmi21_pcon_support = true; + dc->caps.edp_dsc_support = true; + dc->caps.extended_aux_timeout_support = true; + dc->caps.dmcub_support = true; + dc->caps.is_apu = true; + + /* Color pipeline capabilities */ + dc->caps.color.dpp.dcn_arch = 1; + dc->caps.color.dpp.input_lut_shared = 0; + dc->caps.color.dpp.icsc = 1; + dc->caps.color.dpp.dgam_ram = 0; // must use gamma_corr + dc->caps.color.dpp.dgam_rom_caps.srgb = 1; + dc->caps.color.dpp.dgam_rom_caps.bt2020 = 1; + dc->caps.color.dpp.dgam_rom_caps.gamma2_2 = 1; + dc->caps.color.dpp.dgam_rom_caps.pq = 1; + dc->caps.color.dpp.dgam_rom_caps.hlg = 1; + dc->caps.color.dpp.post_csc = 1; + dc->caps.color.dpp.gamma_corr = 1; + dc->caps.color.dpp.dgam_rom_for_yuv = 0; + + dc->caps.color.dpp.hw_3d_lut = 1; + dc->caps.color.dpp.ogam_ram = 1; + // no OGAM ROM on DCN301 + dc->caps.color.dpp.ogam_rom_caps.srgb = 0; + dc->caps.color.dpp.ogam_rom_caps.bt2020 = 0; + dc->caps.color.dpp.ogam_rom_caps.gamma2_2 = 0; + dc->caps.color.dpp.ogam_rom_caps.pq = 0; + dc->caps.color.dpp.ogam_rom_caps.hlg = 0; + dc->caps.color.dpp.ocsc = 0; + + dc->caps.color.mpc.gamut_remap = 1; + dc->caps.color.mpc.num_3dluts = pool->base.res_cap->num_mpc_3dlut; //2 + dc->caps.color.mpc.ogam_ram = 1; + dc->caps.color.mpc.ogam_rom_caps.srgb = 0; + dc->caps.color.mpc.ogam_rom_caps.bt2020 = 0; + dc->caps.color.mpc.ogam_rom_caps.gamma2_2 = 0; + dc->caps.color.mpc.ogam_rom_caps.pq = 0; + dc->caps.color.mpc.ogam_rom_caps.hlg = 0; + dc->caps.color.mpc.ocsc = 1; + + /* read VBIOS LTTPR caps */ + { + if (ctx->dc_bios->funcs->get_lttpr_caps) { + enum bp_result bp_query_result; + uint8_t is_vbios_lttpr_enable = 0; + + bp_query_result = ctx->dc_bios->funcs->get_lttpr_caps(ctx->dc_bios, &is_vbios_lttpr_enable); + dc->caps.vbios_lttpr_enable = (bp_query_result == BP_RESULT_OK) && !!is_vbios_lttpr_enable; + } + + /* interop bit is implicit */ + { + dc->caps.vbios_lttpr_aware = true; + } + } + + if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) + dc->debug = debug_defaults_drv; + + // Init the vm_helper + if (dc->vm_helper) + vm_helper_init(dc->vm_helper, 16); + + /************************************************* + * Create resources * + *************************************************/ + + /* Clock Sources for Pixel Clock*/ + pool->base.clock_sources[DCN31_CLK_SRC_PLL0] = + dcn31_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL0, + &clk_src_regs[0], false); + pool->base.clock_sources[DCN31_CLK_SRC_PLL1] = + dcn31_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL1, + &clk_src_regs[1], false); + pool->base.clock_sources[DCN31_CLK_SRC_PLL2] = + dcn31_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL2, + &clk_src_regs[2], false); + pool->base.clock_sources[DCN31_CLK_SRC_PLL3] = + dcn31_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL3, + &clk_src_regs[3], false); + pool->base.clock_sources[DCN31_CLK_SRC_PLL4] = + dcn31_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL4, + &clk_src_regs[4], false); + + pool->base.clk_src_count = DCN30_CLK_SRC_TOTAL; + + /* todo: not reuse phy_pll registers */ + pool->base.dp_clock_source = + dcn31_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_ID_DP_DTO, + &clk_src_regs[0], true); + + for (i = 0; i < pool->base.clk_src_count; i++) { + if (pool->base.clock_sources[i] == NULL) { + dm_error("DC: failed to create clock sources!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + } + + /* TODO: DCCG */ + pool->base.dccg = dccg31_create(ctx, &dccg_regs, &dccg_shift, &dccg_mask); + if (pool->base.dccg == NULL) { + dm_error("DC: failed to create dccg!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + + /* TODO: IRQ */ + init_data.ctx = dc->ctx; + pool->base.irqs = dal_irq_service_dcn315_create(&init_data); + if (!pool->base.irqs) + goto create_fail; + + /* HUBBUB */ + pool->base.hubbub = dcn31_hubbub_create(ctx); + if (pool->base.hubbub == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create hubbub!\n"); + goto create_fail; + } + + /* HUBPs, DPPs, OPPs and TGs */ + for (i = 0; i < pool->base.pipe_count; i++) { + pool->base.hubps[i] = dcn31_hubp_create(ctx, i); + if (pool->base.hubps[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create hubps!\n"); + goto create_fail; + } + + pool->base.dpps[i] = dcn31_dpp_create(ctx, i); + if (pool->base.dpps[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create dpps!\n"); + goto create_fail; + } + } + + for (i = 0; i < pool->base.res_cap->num_opp; i++) { + pool->base.opps[i] = dcn31_opp_create(ctx, i); + if (pool->base.opps[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create output pixel processor!\n"); + goto create_fail; + } + } + + for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { + pool->base.timing_generators[i] = dcn31_timing_generator_create( + ctx, i); + if (pool->base.timing_generators[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create tg!\n"); + goto create_fail; + } + } + pool->base.timing_generator_count = i; + + /* PSR */ + pool->base.psr = dmub_psr_create(ctx); + if (pool->base.psr == NULL) { + dm_error("DC: failed to create psr obj!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + + /* ABM */ + for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { + pool->base.multiple_abms[i] = dmub_abm_create(ctx, + &abm_regs[i], + &abm_shift, + &abm_mask); + if (pool->base.multiple_abms[i] == NULL) { + dm_error("DC: failed to create abm for pipe %d!\n", i); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + } + + /* MPC and DSC */ + pool->base.mpc = dcn31_mpc_create(ctx, pool->base.mpcc_count, pool->base.res_cap->num_mpc_3dlut); + if (pool->base.mpc == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create mpc!\n"); + goto create_fail; + } + + for (i = 0; i < pool->base.res_cap->num_dsc; i++) { + pool->base.dscs[i] = dcn31_dsc_create(ctx, i); + if (pool->base.dscs[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create display stream compressor %d!\n", i); + goto create_fail; + } + } + + /* DWB and MMHUBBUB */ + if (!dcn31_dwbc_create(ctx, &pool->base)) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create dwbc!\n"); + goto create_fail; + } + + if (!dcn31_mmhubbub_create(ctx, &pool->base)) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create mcif_wb!\n"); + goto create_fail; + } + + /* AUX and I2C */ + for (i = 0; i < pool->base.res_cap->num_ddc; i++) { + pool->base.engines[i] = dcn31_aux_engine_create(ctx, i); + if (pool->base.engines[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC:failed to create aux engine!!\n"); + goto create_fail; + } + pool->base.hw_i2cs[i] = dcn31_i2c_hw_create(ctx, i); + if (pool->base.hw_i2cs[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC:failed to create hw i2c!!\n"); + goto create_fail; + } + pool->base.sw_i2cs[i] = NULL; + } + + /* Audio, Stream Encoders including HPO and virtual, MPC 3D LUTs */ + if (!resource_construct(num_virtual_links, dc, &pool->base, + &res_create_funcs)) + goto create_fail; + + /* HW Sequencer and Plane caps */ + dcn31_hw_sequencer_construct(dc); + + dc->caps.max_planes = pool->base.pipe_count; + + for (i = 0; i < dc->caps.max_planes; ++i) + dc->caps.planes[i] = plane_cap; + + dc->cap_funcs = cap_funcs; + + dc->dcn_ip->max_num_dpp = dcn3_15_ip.max_num_dpp; + + return true; + +create_fail: + + dcn315_resource_destruct(pool); + + return false; +} + +struct resource_pool *dcn315_create_resource_pool( + const struct dc_init_data *init_data, + struct dc *dc) +{ + struct dcn315_resource_pool *pool = + kzalloc(sizeof(struct dcn315_resource_pool), GFP_KERNEL); + + if (!pool) + return NULL; + + if (dcn315_resource_construct(init_data->num_virtual_links, dc, pool)) + return &pool->base; + + BREAK_TO_DEBUGGER(); + kfree(pool); + return NULL; +} diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.h new file mode 100644 index 000000000000..22849eaa6f24 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.h @@ -0,0 +1,44 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef _DCN315_RESOURCE_H_ +#define _DCN315_RESOURCE_H_ + +#include "core_types.h" + +#define TO_DCN315_RES_POOL(pool)\ + container_of(pool, struct dcn315_resource_pool, base) + +extern struct _vcs_dpi_ip_params_st dcn3_15_ip; + +struct dcn315_resource_pool { + struct resource_pool base; +}; + +struct resource_pool *dcn315_create_resource_pool( + const struct dc_init_data *init_data, + struct dc *dc); + +#endif /* _DCN315_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c new file mode 100644 index 000000000000..b9753d4606f8 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c @@ -0,0 +1,2038 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + + +#include "dm_services.h" +#include "dc.h" + +#include "dcn31/dcn31_init.h" + +#include "resource.h" +#include "include/irq_service_interface.h" +#include "dcn316_resource.h" + +#include "dcn20/dcn20_resource.h" +#include "dcn30/dcn30_resource.h" +#include "dcn31/dcn31_resource.h" + +#include "dcn10/dcn10_ipp.h" +#include "dcn30/dcn30_hubbub.h" +#include "dcn31/dcn31_hubbub.h" +#include "dcn30/dcn30_mpc.h" +#include "dcn31/dcn31_hubp.h" +#include "irq/dcn31/irq_service_dcn31.h" +#include "dcn30/dcn30_dpp.h" +#include "dcn31/dcn31_optc.h" +#include "dcn20/dcn20_hwseq.h" +#include "dcn30/dcn30_hwseq.h" +#include "dce110/dce110_hwseq.h" +#include "dcn30/dcn30_opp.h" +#include "dcn20/dcn20_dsc.h" +#include "dcn30/dcn30_vpg.h" +#include "dcn30/dcn30_afmt.h" +#include "dcn30/dcn30_dio_stream_encoder.h" +#include "dcn31/dcn31_hpo_dp_stream_encoder.h" +#include "dcn31/dcn31_hpo_dp_link_encoder.h" +#include "dcn31/dcn31_apg.h" +#include "dcn31/dcn31_dio_link_encoder.h" +#include "dcn31/dcn31_vpg.h" +#include "dcn31/dcn31_afmt.h" +#include "dce/dce_clock_source.h" +#include "dce/dce_audio.h" +#include "dce/dce_hwseq.h" +#include "clk_mgr.h" +#include "virtual/virtual_stream_encoder.h" +#include "dce110/dce110_resource.h" +#include "dml/display_mode_vba.h" +#include "dml/dcn31/dcn31_fpu.h" +#include "dcn31/dcn31_dccg.h" +#include "dcn10/dcn10_resource.h" +#include "dcn31/dcn31_panel_cntl.h" + +#include "dcn30/dcn30_dwb.h" +#include "dcn30/dcn30_mmhubbub.h" + +#include "dcn/dcn_3_1_6_offset.h" +#include "dcn/dcn_3_1_6_sh_mask.h" +#include "dpcs/dpcs_4_2_3_offset.h" +#include "dpcs/dpcs_4_2_3_sh_mask.h" + +#define regBIF_BX1_BIOS_SCRATCH_2 0x003a +#define regBIF_BX1_BIOS_SCRATCH_2_BASE_IDX 1 +#define regBIF_BX1_BIOS_SCRATCH_3 0x003b +#define regBIF_BX1_BIOS_SCRATCH_3_BASE_IDX 1 +#define regBIF_BX1_BIOS_SCRATCH_6 0x003e +#define regBIF_BX1_BIOS_SCRATCH_6_BASE_IDX 1 + +#define regDCHUBBUB_DEBUG_CTRL_0 0x04d6 +#define regDCHUBBUB_DEBUG_CTRL_0_BASE_IDX 2 +#define DCHUBBUB_DEBUG_CTRL_0__DET_DEPTH__SHIFT 0x10 +#define DCHUBBUB_DEBUG_CTRL_0__DET_DEPTH_MASK 0x01FF0000L + +#define DCN_BASE__INST0_SEG0 0x00000012 +#define DCN_BASE__INST0_SEG1 0x000000C0 +#define DCN_BASE__INST0_SEG2 0x000034C0 +#define DCN_BASE__INST0_SEG3 0x00009000 +#define DCN_BASE__INST0_SEG4 0x02403C00 +#define DCN_BASE__INST0_SEG5 0 + +#define DPCS_BASE__INST0_SEG0 0x00000012 +#define DPCS_BASE__INST0_SEG1 0x000000C0 +#define DPCS_BASE__INST0_SEG2 0x000034C0 +#define DPCS_BASE__INST0_SEG3 0x00009000 +#define DPCS_BASE__INST0_SEG4 0x02403C00 +#define DPCS_BASE__INST0_SEG5 0 + +#define NBIO_BASE__INST0_SEG0 0x00000000 +#define NBIO_BASE__INST0_SEG1 0x00000014 +#define NBIO_BASE__INST0_SEG2 0x00000D20 +#define NBIO_BASE__INST0_SEG3 0x00010400 +#define NBIO_BASE__INST0_SEG4 0x0241B000 +#define NBIO_BASE__INST0_SEG5 0x04040000 + +#include "reg_helper.h" +#include "dce/dmub_abm.h" +#include "dce/dmub_psr.h" +#include "dce/dce_aux.h" +#include "dce/dce_i2c.h" + +#include "dml/dcn30/display_mode_vba_30.h" +#include "vm_helper.h" +#include "dcn20/dcn20_vmid.h" + +#include "link_enc_cfg.h" + +#define DCN3_16_MAX_DET_SIZE 384 +#define DCN3_16_MIN_COMPBUF_SIZE_KB 128 +#define DCN3_16_CRB_SEGMENT_SIZE_KB 64 + +enum dcn31_clk_src_array_id { + DCN31_CLK_SRC_PLL0, + DCN31_CLK_SRC_PLL1, + DCN31_CLK_SRC_PLL2, + DCN31_CLK_SRC_PLL3, + DCN31_CLK_SRC_PLL4, + DCN30_CLK_SRC_TOTAL +}; + +/* begin ********************* + * macros to expend register list macro defined in HW object header file + */ + +/* DCN */ +#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg + +#define BASE(seg) BASE_INNER(seg) + +#define SR(reg_name)\ + .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ + reg ## reg_name + +#define SRI(reg_name, block, id)\ + .reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SRI2(reg_name, block, id)\ + .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ + reg ## reg_name + +#define SRIR(var_name, reg_name, block, id)\ + .var_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SRII(reg_name, block, id)\ + .reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SRII_MPC_RMU(reg_name, block, id)\ + .RMU##_##reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SRII_DWB(reg_name, temp_name, block, id)\ + .reg_name[id] = BASE(reg ## block ## id ## _ ## temp_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## temp_name + +#define SF_DWB2(reg_name, block, id, field_name, post_fix) \ + .field_name = reg_name ## __ ## field_name ## post_fix + +#define DCCG_SRII(reg_name, block, id)\ + .block ## _ ## reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define VUPDATE_SRII(reg_name, block, id)\ + .reg_name[id] = BASE(reg ## reg_name ## _ ## block ## id ## _BASE_IDX) + \ + reg ## reg_name ## _ ## block ## id + +/* NBIO */ +#define NBIO_BASE_INNER(seg) \ + NBIO_BASE__INST0_SEG ## seg + +#define NBIO_BASE(seg) \ + NBIO_BASE_INNER(seg) + +#define NBIO_SR(reg_name)\ + .reg_name = NBIO_BASE(regBIF_BX1_ ## reg_name ## _BASE_IDX) + \ + regBIF_BX1_ ## reg_name + +static const struct bios_registers bios_regs = { + NBIO_SR(BIOS_SCRATCH_3), + NBIO_SR(BIOS_SCRATCH_6) +}; + +#define clk_src_regs(index, pllid)\ +[index] = {\ + CS_COMMON_REG_LIST_DCN3_0(index, pllid),\ +} + +static const struct dce110_clk_src_regs clk_src_regs[] = { + clk_src_regs(0, A), + clk_src_regs(1, B), + clk_src_regs(2, C), + clk_src_regs(3, D), + clk_src_regs(4, E) +}; + +static const struct dce110_clk_src_shift cs_shift = { + CS_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT) +}; + +static const struct dce110_clk_src_mask cs_mask = { + CS_COMMON_MASK_SH_LIST_DCN2_0(_MASK) +}; + +#define abm_regs(id)\ +[id] = {\ + ABM_DCN302_REG_LIST(id)\ +} + +static const struct dce_abm_registers abm_regs[] = { + abm_regs(0), + abm_regs(1), + abm_regs(2), + abm_regs(3), +}; + +static const struct dce_abm_shift abm_shift = { + ABM_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dce_abm_mask abm_mask = { + ABM_MASK_SH_LIST_DCN30(_MASK) +}; + +#define audio_regs(id)\ +[id] = {\ + AUD_COMMON_REG_LIST(id)\ +} + +static const struct dce_audio_registers audio_regs[] = { + audio_regs(0), + audio_regs(1), + audio_regs(2), + audio_regs(3), + audio_regs(4), + audio_regs(5), + audio_regs(6) +}; + +#define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\ + SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_INDEX, AZALIA_ENDPOINT_REG_INDEX, mask_sh),\ + SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_DATA, AZALIA_ENDPOINT_REG_DATA, mask_sh),\ + AUD_COMMON_MASK_SH_LIST_BASE(mask_sh) + +static const struct dce_audio_shift audio_shift = { + DCE120_AUD_COMMON_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_audio_mask audio_mask = { + DCE120_AUD_COMMON_MASK_SH_LIST(_MASK) +}; + +#define vpg_regs(id)\ +[id] = {\ + VPG_DCN31_REG_LIST(id)\ +} + +static const struct dcn31_vpg_registers vpg_regs[] = { + vpg_regs(0), + vpg_regs(1), + vpg_regs(2), + vpg_regs(3), + vpg_regs(4), + vpg_regs(5), + vpg_regs(6), + vpg_regs(7), + vpg_regs(8), + vpg_regs(9), +}; + +static const struct dcn31_vpg_shift vpg_shift = { + DCN31_VPG_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn31_vpg_mask vpg_mask = { + DCN31_VPG_MASK_SH_LIST(_MASK) +}; + +#define afmt_regs(id)\ +[id] = {\ + AFMT_DCN31_REG_LIST(id)\ +} + +static const struct dcn31_afmt_registers afmt_regs[] = { + afmt_regs(0), + afmt_regs(1), + afmt_regs(2), + afmt_regs(3), + afmt_regs(4), + afmt_regs(5) +}; + +static const struct dcn31_afmt_shift afmt_shift = { + DCN31_AFMT_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn31_afmt_mask afmt_mask = { + DCN31_AFMT_MASK_SH_LIST(_MASK) +}; + + +#define apg_regs(id)\ +[id] = {\ + APG_DCN31_REG_LIST(id)\ +} + +static const struct dcn31_apg_registers apg_regs[] = { + apg_regs(0), + apg_regs(1), + apg_regs(2), + apg_regs(3) +}; + +static const struct dcn31_apg_shift apg_shift = { + DCN31_APG_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn31_apg_mask apg_mask = { + DCN31_APG_MASK_SH_LIST(_MASK) +}; + + +#define stream_enc_regs(id)\ +[id] = {\ + SE_DCN3_REG_LIST(id)\ +} + +static const struct dcn10_stream_enc_registers stream_enc_regs[] = { + stream_enc_regs(0), + stream_enc_regs(1), + stream_enc_regs(2), + stream_enc_regs(3), + stream_enc_regs(4) +}; + +static const struct dcn10_stream_encoder_shift se_shift = { + SE_COMMON_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dcn10_stream_encoder_mask se_mask = { + SE_COMMON_MASK_SH_LIST_DCN30(_MASK) +}; + + +#define aux_regs(id)\ +[id] = {\ + DCN2_AUX_REG_LIST(id)\ +} + +static const struct dcn10_link_enc_aux_registers link_enc_aux_regs[] = { + aux_regs(0), + aux_regs(1), + aux_regs(2), + aux_regs(3), + aux_regs(4) +}; + +#define hpd_regs(id)\ +[id] = {\ + HPD_REG_LIST(id)\ +} + +static const struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[] = { + hpd_regs(0), + hpd_regs(1), + hpd_regs(2), + hpd_regs(3), + hpd_regs(4) +}; + +#define link_regs(id, phyid)\ +[id] = {\ + LE_DCN31_REG_LIST(id), \ + UNIPHY_DCN2_REG_LIST(phyid), \ + DPCS_DCN31_REG_LIST(id), \ +} + +static const struct dce110_aux_registers_shift aux_shift = { + DCN_AUX_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce110_aux_registers_mask aux_mask = { + DCN_AUX_MASK_SH_LIST(_MASK) +}; + +static const struct dcn10_link_enc_registers link_enc_regs[] = { + link_regs(0, A), + link_regs(1, B), + link_regs(2, C), + link_regs(3, D), + link_regs(4, E) +}; + +static const struct dcn10_link_enc_shift le_shift = { + LINK_ENCODER_MASK_SH_LIST_DCN31(__SHIFT), \ + DPCS_DCN31_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn10_link_enc_mask le_mask = { + LINK_ENCODER_MASK_SH_LIST_DCN31(_MASK), \ + DPCS_DCN31_MASK_SH_LIST(_MASK) +}; + + + +#define hpo_dp_stream_encoder_reg_list(id)\ +[id] = {\ + DCN3_1_HPO_DP_STREAM_ENC_REG_LIST(id)\ +} + +static const struct dcn31_hpo_dp_stream_encoder_registers hpo_dp_stream_enc_regs[] = { + hpo_dp_stream_encoder_reg_list(0), + hpo_dp_stream_encoder_reg_list(1), + hpo_dp_stream_encoder_reg_list(2), + hpo_dp_stream_encoder_reg_list(3), +}; + +static const struct dcn31_hpo_dp_stream_encoder_shift hpo_dp_se_shift = { + DCN3_1_HPO_DP_STREAM_ENC_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn31_hpo_dp_stream_encoder_mask hpo_dp_se_mask = { + DCN3_1_HPO_DP_STREAM_ENC_MASK_SH_LIST(_MASK) +}; + + +#define hpo_dp_link_encoder_reg_list(id)\ +[id] = {\ + DCN3_1_HPO_DP_LINK_ENC_REG_LIST(id),\ + DCN3_1_RDPCSTX_REG_LIST(0),\ + DCN3_1_RDPCSTX_REG_LIST(1),\ + DCN3_1_RDPCSTX_REG_LIST(2),\ + DCN3_1_RDPCSTX_REG_LIST(3),\ + DCN3_1_RDPCSTX_REG_LIST(4)\ +} + +static const struct dcn31_hpo_dp_link_encoder_registers hpo_dp_link_enc_regs[] = { + hpo_dp_link_encoder_reg_list(0), + hpo_dp_link_encoder_reg_list(1), +}; + +static const struct dcn31_hpo_dp_link_encoder_shift hpo_dp_le_shift = { + DCN3_1_HPO_DP_LINK_ENC_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn31_hpo_dp_link_encoder_mask hpo_dp_le_mask = { + DCN3_1_HPO_DP_LINK_ENC_MASK_SH_LIST(_MASK) +}; + + +#define dpp_regs(id)\ +[id] = {\ + DPP_REG_LIST_DCN30(id),\ +} + +static const struct dcn3_dpp_registers dpp_regs[] = { + dpp_regs(0), + dpp_regs(1), + dpp_regs(2), + dpp_regs(3) +}; + +static const struct dcn3_dpp_shift tf_shift = { + DPP_REG_LIST_SH_MASK_DCN30(__SHIFT) +}; + +static const struct dcn3_dpp_mask tf_mask = { + DPP_REG_LIST_SH_MASK_DCN30(_MASK) +}; + +#define opp_regs(id)\ +[id] = {\ + OPP_REG_LIST_DCN30(id),\ +} + +static const struct dcn20_opp_registers opp_regs[] = { + opp_regs(0), + opp_regs(1), + opp_regs(2), + opp_regs(3) +}; + +static const struct dcn20_opp_shift opp_shift = { + OPP_MASK_SH_LIST_DCN20(__SHIFT) +}; + +static const struct dcn20_opp_mask opp_mask = { + OPP_MASK_SH_LIST_DCN20(_MASK) +}; + +#define aux_engine_regs(id)\ +[id] = {\ + AUX_COMMON_REG_LIST0(id), \ + .AUXN_IMPCAL = 0, \ + .AUXP_IMPCAL = 0, \ + .AUX_RESET_MASK = DP_AUX0_AUX_CONTROL__AUX_RESET_MASK, \ +} + +static const struct dce110_aux_registers aux_engine_regs[] = { + aux_engine_regs(0), + aux_engine_regs(1), + aux_engine_regs(2), + aux_engine_regs(3), + aux_engine_regs(4) +}; + +#define dwbc_regs_dcn3(id)\ +[id] = {\ + DWBC_COMMON_REG_LIST_DCN30(id),\ +} + +static const struct dcn30_dwbc_registers dwbc30_regs[] = { + dwbc_regs_dcn3(0), +}; + +static const struct dcn30_dwbc_shift dwbc30_shift = { + DWBC_COMMON_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dcn30_dwbc_mask dwbc30_mask = { + DWBC_COMMON_MASK_SH_LIST_DCN30(_MASK) +}; + +#define mcif_wb_regs_dcn3(id)\ +[id] = {\ + MCIF_WB_COMMON_REG_LIST_DCN30(id),\ +} + +static const struct dcn30_mmhubbub_registers mcif_wb30_regs[] = { + mcif_wb_regs_dcn3(0) +}; + +static const struct dcn30_mmhubbub_shift mcif_wb30_shift = { + MCIF_WB_COMMON_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dcn30_mmhubbub_mask mcif_wb30_mask = { + MCIF_WB_COMMON_MASK_SH_LIST_DCN30(_MASK) +}; + +#define dsc_regsDCN20(id)\ +[id] = {\ + DSC_REG_LIST_DCN20(id)\ +} + +static const struct dcn20_dsc_registers dsc_regs[] = { + dsc_regsDCN20(0), + dsc_regsDCN20(1), + dsc_regsDCN20(2) +}; + +static const struct dcn20_dsc_shift dsc_shift = { + DSC_REG_LIST_SH_MASK_DCN20(__SHIFT) +}; + +static const struct dcn20_dsc_mask dsc_mask = { + DSC_REG_LIST_SH_MASK_DCN20(_MASK) +}; + +static const struct dcn30_mpc_registers mpc_regs = { + MPC_REG_LIST_DCN3_0(0), + MPC_REG_LIST_DCN3_0(1), + MPC_REG_LIST_DCN3_0(2), + MPC_REG_LIST_DCN3_0(3), + MPC_OUT_MUX_REG_LIST_DCN3_0(0), + MPC_OUT_MUX_REG_LIST_DCN3_0(1), + MPC_OUT_MUX_REG_LIST_DCN3_0(2), + MPC_OUT_MUX_REG_LIST_DCN3_0(3), + MPC_RMU_GLOBAL_REG_LIST_DCN3AG, + MPC_RMU_REG_LIST_DCN3AG(0), + MPC_RMU_REG_LIST_DCN3AG(1), + //MPC_RMU_REG_LIST_DCN3AG(2), + MPC_DWB_MUX_REG_LIST_DCN3_0(0), +}; + +static const struct dcn30_mpc_shift mpc_shift = { + MPC_COMMON_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dcn30_mpc_mask mpc_mask = { + MPC_COMMON_MASK_SH_LIST_DCN30(_MASK) +}; + +#define optc_regs(id)\ +[id] = {OPTC_COMMON_REG_LIST_DCN3_1(id)} + +static const struct dcn_optc_registers optc_regs[] = { + optc_regs(0), + optc_regs(1), + optc_regs(2), + optc_regs(3) +}; + +static const struct dcn_optc_shift optc_shift = { + OPTC_COMMON_MASK_SH_LIST_DCN3_1(__SHIFT) +}; + +static const struct dcn_optc_mask optc_mask = { + OPTC_COMMON_MASK_SH_LIST_DCN3_1(_MASK) +}; + +#define hubp_regs(id)\ +[id] = {\ + HUBP_REG_LIST_DCN30(id)\ +} + +static const struct dcn_hubp2_registers hubp_regs[] = { + hubp_regs(0), + hubp_regs(1), + hubp_regs(2), + hubp_regs(3) +}; + + +static const struct dcn_hubp2_shift hubp_shift = { + HUBP_MASK_SH_LIST_DCN31(__SHIFT) +}; + +static const struct dcn_hubp2_mask hubp_mask = { + HUBP_MASK_SH_LIST_DCN31(_MASK) +}; +static const struct dcn_hubbub_registers hubbub_reg = { + HUBBUB_REG_LIST_DCN31(0) +}; + +static const struct dcn_hubbub_shift hubbub_shift = { + HUBBUB_MASK_SH_LIST_DCN31(__SHIFT) +}; + +static const struct dcn_hubbub_mask hubbub_mask = { + HUBBUB_MASK_SH_LIST_DCN31(_MASK) +}; + +static const struct dccg_registers dccg_regs = { + DCCG_REG_LIST_DCN31() +}; + +static const struct dccg_shift dccg_shift = { + DCCG_MASK_SH_LIST_DCN31(__SHIFT) +}; + +static const struct dccg_mask dccg_mask = { + DCCG_MASK_SH_LIST_DCN31(_MASK) +}; + + +#define SRII2(reg_name_pre, reg_name_post, id)\ + .reg_name_pre ## _ ## reg_name_post[id] = BASE(reg ## reg_name_pre \ + ## id ## _ ## reg_name_post ## _BASE_IDX) + \ + reg ## reg_name_pre ## id ## _ ## reg_name_post + + +#define HWSEQ_DCN31_REG_LIST()\ + SR(DCHUBBUB_GLOBAL_TIMER_CNTL), \ + SR(DCHUBBUB_ARB_HOSTVM_CNTL), \ + SR(DIO_MEM_PWR_CTRL), \ + SR(ODM_MEM_PWR_CTRL3), \ + SR(DMU_MEM_PWR_CNTL), \ + SR(MMHUBBUB_MEM_PWR_CNTL), \ + SR(DCCG_GATE_DISABLE_CNTL), \ + SR(DCCG_GATE_DISABLE_CNTL2), \ + SR(DCFCLK_CNTL),\ + SR(DC_MEM_GLOBAL_PWR_REQ_CNTL), \ + SRII(PIXEL_RATE_CNTL, OTG, 0), \ + SRII(PIXEL_RATE_CNTL, OTG, 1),\ + SRII(PIXEL_RATE_CNTL, OTG, 2),\ + SRII(PIXEL_RATE_CNTL, OTG, 3),\ + SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 0),\ + SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 1),\ + SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 2),\ + SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 3),\ + SR(MICROSECOND_TIME_BASE_DIV), \ + SR(MILLISECOND_TIME_BASE_DIV), \ + SR(DISPCLK_FREQ_CHANGE_CNTL), \ + SR(RBBMIF_TIMEOUT_DIS), \ + SR(RBBMIF_TIMEOUT_DIS_2), \ + SR(DCHUBBUB_CRC_CTRL), \ + SR(DPP_TOP0_DPP_CRC_CTRL), \ + SR(DPP_TOP0_DPP_CRC_VAL_B_A), \ + SR(DPP_TOP0_DPP_CRC_VAL_R_G), \ + SR(MPC_CRC_CTRL), \ + SR(MPC_CRC_RESULT_GB), \ + SR(MPC_CRC_RESULT_C), \ + SR(MPC_CRC_RESULT_AR), \ + SR(DOMAIN0_PG_CONFIG), \ + SR(DOMAIN1_PG_CONFIG), \ + SR(DOMAIN2_PG_CONFIG), \ + SR(DOMAIN3_PG_CONFIG), \ + SR(DOMAIN16_PG_CONFIG), \ + SR(DOMAIN17_PG_CONFIG), \ + SR(DOMAIN18_PG_CONFIG), \ + SR(DOMAIN0_PG_STATUS), \ + SR(DOMAIN1_PG_STATUS), \ + SR(DOMAIN2_PG_STATUS), \ + SR(DOMAIN3_PG_STATUS), \ + SR(DOMAIN16_PG_STATUS), \ + SR(DOMAIN17_PG_STATUS), \ + SR(DOMAIN18_PG_STATUS), \ + SR(D1VGA_CONTROL), \ + SR(D2VGA_CONTROL), \ + SR(D3VGA_CONTROL), \ + SR(D4VGA_CONTROL), \ + SR(D5VGA_CONTROL), \ + SR(D6VGA_CONTROL), \ + SR(DC_IP_REQUEST_CNTL), \ + SR(AZALIA_AUDIO_DTO), \ + SR(AZALIA_CONTROLLER_CLOCK_GATING), \ + SR(HPO_TOP_HW_CONTROL) + +static const struct dce_hwseq_registers hwseq_reg = { + HWSEQ_DCN31_REG_LIST() +}; + +#define HWSEQ_DCN31_MASK_SH_LIST(mask_sh)\ + HWSEQ_DCN_MASK_SH_LIST(mask_sh), \ + HWS_SF(, DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_REFDIV, mask_sh), \ + HWS_SF(, DCHUBBUB_ARB_HOSTVM_CNTL, DISABLE_HOSTVM_FORCE_ALLOW_PSTATE, mask_sh), \ + HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN1_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN1_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN2_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN2_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN3_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN3_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN16_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN16_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN17_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN17_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN18_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN18_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN0_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN1_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN2_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN3_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN16_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN17_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN18_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DC_IP_REQUEST_CNTL, IP_REQUEST_EN, mask_sh), \ + HWS_SF(, AZALIA_AUDIO_DTO, AZALIA_AUDIO_DTO_MODULE, mask_sh), \ + HWS_SF(, HPO_TOP_CLOCK_CONTROL, HPO_HDMISTREAMCLK_G_GATE_DIS, mask_sh), \ + HWS_SF(, DMU_MEM_PWR_CNTL, DMCU_ERAM_MEM_PWR_FORCE, mask_sh), \ + HWS_SF(, ODM_MEM_PWR_CTRL3, ODM_MEM_UNASSIGNED_PWR_MODE, mask_sh), \ + HWS_SF(, ODM_MEM_PWR_CTRL3, ODM_MEM_VBLANK_PWR_MODE, mask_sh), \ + HWS_SF(, MMHUBBUB_MEM_PWR_CNTL, VGA_MEM_PWR_FORCE, mask_sh), \ + HWS_SF(, DIO_MEM_PWR_CTRL, I2C_LIGHT_SLEEP_FORCE, mask_sh), \ + HWS_SF(, HPO_TOP_HW_CONTROL, HPO_IO_EN, mask_sh) + +static const struct dce_hwseq_shift hwseq_shift = { + HWSEQ_DCN31_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_hwseq_mask hwseq_mask = { + HWSEQ_DCN31_MASK_SH_LIST(_MASK) +}; +#define vmid_regs(id)\ +[id] = {\ + DCN20_VMID_REG_LIST(id)\ +} + +static const struct dcn_vmid_registers vmid_regs[] = { + vmid_regs(0), + vmid_regs(1), + vmid_regs(2), + vmid_regs(3), + vmid_regs(4), + vmid_regs(5), + vmid_regs(6), + vmid_regs(7), + vmid_regs(8), + vmid_regs(9), + vmid_regs(10), + vmid_regs(11), + vmid_regs(12), + vmid_regs(13), + vmid_regs(14), + vmid_regs(15) +}; + +static const struct dcn20_vmid_shift vmid_shifts = { + DCN20_VMID_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn20_vmid_mask vmid_masks = { + DCN20_VMID_MASK_SH_LIST(_MASK) +}; + +static const struct resource_caps res_cap_dcn31 = { + .num_timing_generator = 4, + .num_opp = 4, + .num_video_plane = 4, + .num_audio = 5, + .num_stream_encoder = 5, + .num_dig_link_enc = 5, + .num_hpo_dp_stream_encoder = 4, + .num_hpo_dp_link_encoder = 2, + .num_pll = 5, + .num_dwb = 1, + .num_ddc = 5, + .num_vmid = 16, + .num_mpc_3dlut = 2, + .num_dsc = 3, +}; + +static const struct dc_plane_cap plane_cap = { + .type = DC_PLANE_TYPE_DCN_UNIVERSAL, + .per_pixel_alpha = true, + + .pixel_format_support = { + .argb8888 = true, + .nv12 = true, + .fp16 = true, + .p010 = true, + .ayuv = false, + }, + + .max_upscale_factor = { + .argb8888 = 16000, + .nv12 = 16000, + .fp16 = 16000 + }, + + // 6:1 downscaling ratio: 1000/6 = 166.666 + .max_downscale_factor = { + .argb8888 = 167, + .nv12 = 167, + .fp16 = 167 + }, + 64, + 64 +}; + +static const struct dc_debug_options debug_defaults_drv = { + .disable_z10 = true, /*hw not support it*/ + .disable_dmcu = true, + .force_abm_enable = false, + .timing_trace = false, + .clock_trace = true, + .disable_pplib_clock_request = false, + .pipe_split_policy = MPC_SPLIT_DYNAMIC, + .force_single_disp_pipe_split = false, + .disable_dcc = DCC_ENABLE, + .vsr_support = true, + .performance_trace = false, + .max_downscale_src_width = 4096,/*upto true 4k*/ + .disable_pplib_wm_range = false, + .scl_reset_length10 = true, + .sanity_checks = false, + .underflow_assert_delay_us = 0xFFFFFFFF, + .dwb_fi_phase = -1, // -1 = disable, + .dmub_command_table = true, + .pstate_enabled = true, + .use_max_lb = true, + .enable_mem_low_power = { + .bits = { + .vga = true, + .i2c = true, + .dmcu = false, // This is previously known to cause hang on S3 cycles if enabled + .dscl = true, + .cm = true, + .mpc = true, + .optc = true, + .vpg = true, + .afmt = true, + } + }, + .enable_legacy_fast_update = true, + .using_dml2 = false, +}; + +static const struct dc_panel_config panel_config_defaults = { + .psr = { + .disable_psr = false, + .disallow_psrsu = false, + .disallow_replay = false, + }, + .ilr = { + .optimize_edp_link_rate = true, + }, +}; + +static void dcn31_dpp_destroy(struct dpp **dpp) +{ + kfree(TO_DCN20_DPP(*dpp)); + *dpp = NULL; +} + +static struct dpp *dcn31_dpp_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn3_dpp *dpp = + kzalloc(sizeof(struct dcn3_dpp), GFP_KERNEL); + + if (!dpp) + return NULL; + + if (dpp3_construct(dpp, ctx, inst, + &dpp_regs[inst], &tf_shift, &tf_mask)) + return &dpp->base; + + BREAK_TO_DEBUGGER(); + kfree(dpp); + return NULL; +} + +static struct output_pixel_processor *dcn31_opp_create( + struct dc_context *ctx, uint32_t inst) +{ + struct dcn20_opp *opp = + kzalloc(sizeof(struct dcn20_opp), GFP_KERNEL); + + if (!opp) { + BREAK_TO_DEBUGGER(); + return NULL; + } + + dcn20_opp_construct(opp, ctx, inst, + &opp_regs[inst], &opp_shift, &opp_mask); + return &opp->base; +} + +static struct dce_aux *dcn31_aux_engine_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct aux_engine_dce110 *aux_engine = + kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL); + + if (!aux_engine) + return NULL; + + dce110_aux_engine_construct(aux_engine, ctx, inst, + SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, + &aux_engine_regs[inst], + &aux_mask, + &aux_shift, + ctx->dc->caps.extended_aux_timeout_support); + + return &aux_engine->base; +} +#define i2c_inst_regs(id) { I2C_HW_ENGINE_COMMON_REG_LIST_DCN30(id) } + +static const struct dce_i2c_registers i2c_hw_regs[] = { + i2c_inst_regs(1), + i2c_inst_regs(2), + i2c_inst_regs(3), + i2c_inst_regs(4), + i2c_inst_regs(5), +}; + +static const struct dce_i2c_shift i2c_shifts = { + I2C_COMMON_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dce_i2c_mask i2c_masks = { + I2C_COMMON_MASK_SH_LIST_DCN30(_MASK) +}; + +static struct dce_i2c_hw *dcn31_i2c_hw_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dce_i2c_hw *dce_i2c_hw = + kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL); + + if (!dce_i2c_hw) + return NULL; + + dcn2_i2c_hw_construct(dce_i2c_hw, ctx, inst, + &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks); + + return dce_i2c_hw; +} +static struct mpc *dcn31_mpc_create( + struct dc_context *ctx, + int num_mpcc, + int num_rmu) +{ + struct dcn30_mpc *mpc30 = kzalloc(sizeof(struct dcn30_mpc), + GFP_KERNEL); + + if (!mpc30) + return NULL; + + dcn30_mpc_construct(mpc30, ctx, + &mpc_regs, + &mpc_shift, + &mpc_mask, + num_mpcc, + num_rmu); + + return &mpc30->base; +} + +static struct hubbub *dcn31_hubbub_create(struct dc_context *ctx) +{ + int i; + + struct dcn20_hubbub *hubbub3 = kzalloc(sizeof(struct dcn20_hubbub), + GFP_KERNEL); + + if (!hubbub3) + return NULL; + + hubbub31_construct(hubbub3, ctx, + &hubbub_reg, + &hubbub_shift, + &hubbub_mask, + dcn3_16_ip.det_buffer_size_kbytes, + dcn3_16_ip.pixel_chunk_size_kbytes, + dcn3_16_ip.config_return_buffer_size_in_kbytes); + + + for (i = 0; i < res_cap_dcn31.num_vmid; i++) { + struct dcn20_vmid *vmid = &hubbub3->vmid[i]; + + vmid->ctx = ctx; + + vmid->regs = &vmid_regs[i]; + vmid->shifts = &vmid_shifts; + vmid->masks = &vmid_masks; + } + + return &hubbub3->base; +} + +static struct timing_generator *dcn31_timing_generator_create( + struct dc_context *ctx, + uint32_t instance) +{ + struct optc *tgn10 = + kzalloc(sizeof(struct optc), GFP_KERNEL); + + if (!tgn10) + return NULL; + + tgn10->base.inst = instance; + tgn10->base.ctx = ctx; + + tgn10->tg_regs = &optc_regs[instance]; + tgn10->tg_shift = &optc_shift; + tgn10->tg_mask = &optc_mask; + + dcn31_timing_generator_init(tgn10); + + return &tgn10->base; +} + +static const struct encoder_feature_support link_enc_feature = { + .max_hdmi_deep_color = COLOR_DEPTH_121212, + .max_hdmi_pixel_clock = 600000, + .hdmi_ycbcr420_supported = true, + .dp_ycbcr420_supported = true, + .fec_supported = true, + .flags.bits.IS_HBR2_CAPABLE = true, + .flags.bits.IS_HBR3_CAPABLE = true, + .flags.bits.IS_TPS3_CAPABLE = true, + .flags.bits.IS_TPS4_CAPABLE = true +}; + +static struct link_encoder *dcn31_link_encoder_create( + struct dc_context *ctx, + const struct encoder_init_data *enc_init_data) +{ + struct dcn20_link_encoder *enc20 = + kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL); + + if (!enc20) + return NULL; + + dcn31_link_encoder_construct(enc20, + enc_init_data, + &link_enc_feature, + &link_enc_regs[enc_init_data->transmitter], + &link_enc_aux_regs[enc_init_data->channel - 1], + &link_enc_hpd_regs[enc_init_data->hpd_source], + &le_shift, + &le_mask); + + return &enc20->enc10.base; +} + +/* Create a minimal link encoder object not associated with a particular + * physical connector. + * resource_funcs.link_enc_create_minimal + */ +static struct link_encoder *dcn31_link_enc_create_minimal( + struct dc_context *ctx, enum engine_id eng_id) +{ + struct dcn20_link_encoder *enc20; + + if ((eng_id - ENGINE_ID_DIGA) > ctx->dc->res_pool->res_cap->num_dig_link_enc) + return NULL; + + enc20 = kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL); + if (!enc20) + return NULL; + + dcn31_link_encoder_construct_minimal( + enc20, + ctx, + &link_enc_feature, + &link_enc_regs[eng_id - ENGINE_ID_DIGA], + eng_id); + + return &enc20->enc10.base; +} + +static struct panel_cntl *dcn31_panel_cntl_create(const struct panel_cntl_init_data *init_data) +{ + struct dcn31_panel_cntl *panel_cntl = + kzalloc(sizeof(struct dcn31_panel_cntl), GFP_KERNEL); + + if (!panel_cntl) + return NULL; + + dcn31_panel_cntl_construct(panel_cntl, init_data); + + return &panel_cntl->base; +} + +static void read_dce_straps( + struct dc_context *ctx, + struct resource_straps *straps) +{ + generic_reg_get(ctx, regDC_PINSTRAPS + BASE(regDC_PINSTRAPS_BASE_IDX), + FN(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO), &straps->dc_pinstraps_audio); + +} + +static struct audio *dcn31_create_audio( + struct dc_context *ctx, unsigned int inst) +{ + return dce_audio_create(ctx, inst, + &audio_regs[inst], &audio_shift, &audio_mask); +} + +static struct vpg *dcn31_vpg_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn31_vpg *vpg31 = kzalloc(sizeof(struct dcn31_vpg), GFP_KERNEL); + + if (!vpg31) + return NULL; + + vpg31_construct(vpg31, ctx, inst, + &vpg_regs[inst], + &vpg_shift, + &vpg_mask); + + return &vpg31->base; +} + +static struct afmt *dcn31_afmt_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn31_afmt *afmt31 = kzalloc(sizeof(struct dcn31_afmt), GFP_KERNEL); + + if (!afmt31) + return NULL; + + afmt31_construct(afmt31, ctx, inst, + &afmt_regs[inst], + &afmt_shift, + &afmt_mask); + + // Light sleep by default, no need to power down here + + return &afmt31->base; +} + + +static struct apg *dcn31_apg_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn31_apg *apg31 = kzalloc(sizeof(struct dcn31_apg), GFP_KERNEL); + + if (!apg31) + return NULL; + + apg31_construct(apg31, ctx, inst, + &apg_regs[inst], + &apg_shift, + &apg_mask); + + return &apg31->base; +} + + +static struct stream_encoder *dcn316_stream_encoder_create( + enum engine_id eng_id, + struct dc_context *ctx) +{ + struct dcn10_stream_encoder *enc1; + struct vpg *vpg; + struct afmt *afmt; + int vpg_inst; + int afmt_inst; + + /* Mapping of VPG, AFMT, DME register blocks to DIO block instance */ + if (eng_id <= ENGINE_ID_DIGF) { + vpg_inst = eng_id; + afmt_inst = eng_id; + } else + return NULL; + + enc1 = kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL); + vpg = dcn31_vpg_create(ctx, vpg_inst); + afmt = dcn31_afmt_create(ctx, afmt_inst); + + if (!enc1 || !vpg || !afmt) { + kfree(enc1); + kfree(vpg); + kfree(afmt); + return NULL; + } + + dcn30_dio_stream_encoder_construct(enc1, ctx, ctx->dc_bios, + eng_id, vpg, afmt, + &stream_enc_regs[eng_id], + &se_shift, &se_mask); + + return &enc1->base; +} + + +static struct hpo_dp_stream_encoder *dcn31_hpo_dp_stream_encoder_create( + enum engine_id eng_id, + struct dc_context *ctx) +{ + struct dcn31_hpo_dp_stream_encoder *hpo_dp_enc31; + struct vpg *vpg; + struct apg *apg; + uint32_t hpo_dp_inst; + uint32_t vpg_inst; + uint32_t apg_inst; + + ASSERT((eng_id >= ENGINE_ID_HPO_DP_0) && (eng_id <= ENGINE_ID_HPO_DP_3)); + hpo_dp_inst = eng_id - ENGINE_ID_HPO_DP_0; + + /* Mapping of VPG register blocks to HPO DP block instance: + * VPG[6] -> HPO_DP[0] + * VPG[7] -> HPO_DP[1] + * VPG[8] -> HPO_DP[2] + * VPG[9] -> HPO_DP[3] + */ + vpg_inst = hpo_dp_inst + 6; + + /* Mapping of APG register blocks to HPO DP block instance: + * APG[0] -> HPO_DP[0] + * APG[1] -> HPO_DP[1] + * APG[2] -> HPO_DP[2] + * APG[3] -> HPO_DP[3] + */ + apg_inst = hpo_dp_inst; + + /* allocate HPO stream encoder and create VPG sub-block */ + hpo_dp_enc31 = kzalloc(sizeof(struct dcn31_hpo_dp_stream_encoder), GFP_KERNEL); + vpg = dcn31_vpg_create(ctx, vpg_inst); + apg = dcn31_apg_create(ctx, apg_inst); + + if (!hpo_dp_enc31 || !vpg || !apg) { + kfree(hpo_dp_enc31); + kfree(vpg); + kfree(apg); + return NULL; + } + + dcn31_hpo_dp_stream_encoder_construct(hpo_dp_enc31, ctx, ctx->dc_bios, + hpo_dp_inst, eng_id, vpg, apg, + &hpo_dp_stream_enc_regs[hpo_dp_inst], + &hpo_dp_se_shift, &hpo_dp_se_mask); + + return &hpo_dp_enc31->base; +} + +static struct hpo_dp_link_encoder *dcn31_hpo_dp_link_encoder_create( + uint8_t inst, + struct dc_context *ctx) +{ + struct dcn31_hpo_dp_link_encoder *hpo_dp_enc31; + + /* allocate HPO link encoder */ + hpo_dp_enc31 = kzalloc(sizeof(struct dcn31_hpo_dp_link_encoder), GFP_KERNEL); + + hpo_dp_link_encoder31_construct(hpo_dp_enc31, ctx, inst, + &hpo_dp_link_enc_regs[inst], + &hpo_dp_le_shift, &hpo_dp_le_mask); + + return &hpo_dp_enc31->base; +} + + +static struct dce_hwseq *dcn31_hwseq_create( + struct dc_context *ctx) +{ + struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL); + + if (hws) { + hws->ctx = ctx; + hws->regs = &hwseq_reg; + hws->shifts = &hwseq_shift; + hws->masks = &hwseq_mask; + } + return hws; +} +static const struct resource_create_funcs res_create_funcs = { + .read_dce_straps = read_dce_straps, + .create_audio = dcn31_create_audio, + .create_stream_encoder = dcn316_stream_encoder_create, + .create_hpo_dp_stream_encoder = dcn31_hpo_dp_stream_encoder_create, + .create_hpo_dp_link_encoder = dcn31_hpo_dp_link_encoder_create, + .create_hwseq = dcn31_hwseq_create, +}; + +static void dcn316_resource_destruct(struct dcn316_resource_pool *pool) +{ + unsigned int i; + + for (i = 0; i < pool->base.stream_enc_count; i++) { + if (pool->base.stream_enc[i] != NULL) { + if (pool->base.stream_enc[i]->vpg != NULL) { + kfree(DCN30_VPG_FROM_VPG(pool->base.stream_enc[i]->vpg)); + pool->base.stream_enc[i]->vpg = NULL; + } + if (pool->base.stream_enc[i]->afmt != NULL) { + kfree(DCN30_AFMT_FROM_AFMT(pool->base.stream_enc[i]->afmt)); + pool->base.stream_enc[i]->afmt = NULL; + } + kfree(DCN10STRENC_FROM_STRENC(pool->base.stream_enc[i])); + pool->base.stream_enc[i] = NULL; + } + } + + for (i = 0; i < pool->base.hpo_dp_stream_enc_count; i++) { + if (pool->base.hpo_dp_stream_enc[i] != NULL) { + if (pool->base.hpo_dp_stream_enc[i]->vpg != NULL) { + kfree(DCN30_VPG_FROM_VPG(pool->base.hpo_dp_stream_enc[i]->vpg)); + pool->base.hpo_dp_stream_enc[i]->vpg = NULL; + } + if (pool->base.hpo_dp_stream_enc[i]->apg != NULL) { + kfree(DCN31_APG_FROM_APG(pool->base.hpo_dp_stream_enc[i]->apg)); + pool->base.hpo_dp_stream_enc[i]->apg = NULL; + } + kfree(DCN3_1_HPO_DP_STREAM_ENC_FROM_HPO_STREAM_ENC(pool->base.hpo_dp_stream_enc[i])); + pool->base.hpo_dp_stream_enc[i] = NULL; + } + } + + for (i = 0; i < pool->base.hpo_dp_link_enc_count; i++) { + if (pool->base.hpo_dp_link_enc[i] != NULL) { + kfree(DCN3_1_HPO_DP_LINK_ENC_FROM_HPO_LINK_ENC(pool->base.hpo_dp_link_enc[i])); + pool->base.hpo_dp_link_enc[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_dsc; i++) { + if (pool->base.dscs[i] != NULL) + dcn20_dsc_destroy(&pool->base.dscs[i]); + } + + if (pool->base.mpc != NULL) { + kfree(TO_DCN20_MPC(pool->base.mpc)); + pool->base.mpc = NULL; + } + if (pool->base.hubbub != NULL) { + kfree(pool->base.hubbub); + pool->base.hubbub = NULL; + } + for (i = 0; i < pool->base.pipe_count; i++) { + if (pool->base.dpps[i] != NULL) + dcn31_dpp_destroy(&pool->base.dpps[i]); + + if (pool->base.ipps[i] != NULL) + pool->base.ipps[i]->funcs->ipp_destroy(&pool->base.ipps[i]); + + if (pool->base.hubps[i] != NULL) { + kfree(TO_DCN20_HUBP(pool->base.hubps[i])); + pool->base.hubps[i] = NULL; + } + + if (pool->base.irqs != NULL) { + dal_irq_service_destroy(&pool->base.irqs); + } + } + + for (i = 0; i < pool->base.res_cap->num_ddc; i++) { + if (pool->base.engines[i] != NULL) + dce110_engine_destroy(&pool->base.engines[i]); + if (pool->base.hw_i2cs[i] != NULL) { + kfree(pool->base.hw_i2cs[i]); + pool->base.hw_i2cs[i] = NULL; + } + if (pool->base.sw_i2cs[i] != NULL) { + kfree(pool->base.sw_i2cs[i]); + pool->base.sw_i2cs[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_opp; i++) { + if (pool->base.opps[i] != NULL) + pool->base.opps[i]->funcs->opp_destroy(&pool->base.opps[i]); + } + + for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { + if (pool->base.timing_generators[i] != NULL) { + kfree(DCN10TG_FROM_TG(pool->base.timing_generators[i])); + pool->base.timing_generators[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_dwb; i++) { + if (pool->base.dwbc[i] != NULL) { + kfree(TO_DCN30_DWBC(pool->base.dwbc[i])); + pool->base.dwbc[i] = NULL; + } + if (pool->base.mcif_wb[i] != NULL) { + kfree(TO_DCN30_MMHUBBUB(pool->base.mcif_wb[i])); + pool->base.mcif_wb[i] = NULL; + } + } + + for (i = 0; i < pool->base.audio_count; i++) { + if (pool->base.audios[i]) + dce_aud_destroy(&pool->base.audios[i]); + } + + for (i = 0; i < pool->base.clk_src_count; i++) { + if (pool->base.clock_sources[i] != NULL) { + dcn20_clock_source_destroy(&pool->base.clock_sources[i]); + pool->base.clock_sources[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_mpc_3dlut; i++) { + if (pool->base.mpc_lut[i] != NULL) { + dc_3dlut_func_release(pool->base.mpc_lut[i]); + pool->base.mpc_lut[i] = NULL; + } + if (pool->base.mpc_shaper[i] != NULL) { + dc_transfer_func_release(pool->base.mpc_shaper[i]); + pool->base.mpc_shaper[i] = NULL; + } + } + + if (pool->base.dp_clock_source != NULL) { + dcn20_clock_source_destroy(&pool->base.dp_clock_source); + pool->base.dp_clock_source = NULL; + } + + for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { + if (pool->base.multiple_abms[i] != NULL) + dce_abm_destroy(&pool->base.multiple_abms[i]); + } + + if (pool->base.psr != NULL) + dmub_psr_destroy(&pool->base.psr); + + if (pool->base.dccg != NULL) + dcn_dccg_destroy(&pool->base.dccg); +} + +static struct hubp *dcn31_hubp_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn20_hubp *hubp2 = + kzalloc(sizeof(struct dcn20_hubp), GFP_KERNEL); + + if (!hubp2) + return NULL; + + if (hubp31_construct(hubp2, ctx, inst, + &hubp_regs[inst], &hubp_shift, &hubp_mask)) + return &hubp2->base; + + BREAK_TO_DEBUGGER(); + kfree(hubp2); + return NULL; +} + +static bool dcn31_dwbc_create(struct dc_context *ctx, struct resource_pool *pool) +{ + int i; + uint32_t pipe_count = pool->res_cap->num_dwb; + + for (i = 0; i < pipe_count; i++) { + struct dcn30_dwbc *dwbc30 = kzalloc(sizeof(struct dcn30_dwbc), + GFP_KERNEL); + + if (!dwbc30) { + dm_error("DC: failed to create dwbc30!\n"); + return false; + } + + dcn30_dwbc_construct(dwbc30, ctx, + &dwbc30_regs[i], + &dwbc30_shift, + &dwbc30_mask, + i); + + pool->dwbc[i] = &dwbc30->base; + } + return true; +} + +static bool dcn31_mmhubbub_create(struct dc_context *ctx, struct resource_pool *pool) +{ + int i; + uint32_t pipe_count = pool->res_cap->num_dwb; + + for (i = 0; i < pipe_count; i++) { + struct dcn30_mmhubbub *mcif_wb30 = kzalloc(sizeof(struct dcn30_mmhubbub), + GFP_KERNEL); + + if (!mcif_wb30) { + dm_error("DC: failed to create mcif_wb30!\n"); + return false; + } + + dcn30_mmhubbub_construct(mcif_wb30, ctx, + &mcif_wb30_regs[i], + &mcif_wb30_shift, + &mcif_wb30_mask, + i); + + pool->mcif_wb[i] = &mcif_wb30->base; + } + return true; +} + +static struct display_stream_compressor *dcn31_dsc_create( + struct dc_context *ctx, uint32_t inst) +{ + struct dcn20_dsc *dsc = + kzalloc(sizeof(struct dcn20_dsc), GFP_KERNEL); + + if (!dsc) { + BREAK_TO_DEBUGGER(); + return NULL; + } + + dsc2_construct(dsc, ctx, inst, &dsc_regs[inst], &dsc_shift, &dsc_mask); + return &dsc->base; +} + +static void dcn316_destroy_resource_pool(struct resource_pool **pool) +{ + struct dcn316_resource_pool *dcn31_pool = TO_DCN316_RES_POOL(*pool); + + dcn316_resource_destruct(dcn31_pool); + kfree(dcn31_pool); + *pool = NULL; +} + +static struct clock_source *dcn31_clock_source_create( + struct dc_context *ctx, + struct dc_bios *bios, + enum clock_source_id id, + const struct dce110_clk_src_regs *regs, + bool dp_clk_src) +{ + struct dce110_clk_src *clk_src = + kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL); + + if (!clk_src) + return NULL; + + if (dcn31_clk_src_construct(clk_src, ctx, bios, id, + regs, &cs_shift, &cs_mask)) { + clk_src->base.dp_clk_src = dp_clk_src; + return &clk_src->base; + } + + kfree(clk_src); + + BREAK_TO_DEBUGGER(); + return NULL; +} + +static bool is_dual_plane(enum surface_pixel_format format) +{ + return format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA; +} + +static int dcn316_populate_dml_pipes_from_context( + struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + bool fast_validate) +{ + int i, pipe_cnt; + struct resource_context *res_ctx = &context->res_ctx; + struct pipe_ctx *pipe; + const int max_usable_det = context->bw_ctx.dml.ip.config_return_buffer_size_in_kbytes - DCN3_16_MIN_COMPBUF_SIZE_KB; + + DC_FP_START(); + dcn31x_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); + DC_FP_END(); + + for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { + struct dc_crtc_timing *timing; + + if (!res_ctx->pipe_ctx[i].stream) + continue; + pipe = &res_ctx->pipe_ctx[i]; + timing = &pipe->stream->timing; + + /* + * Immediate flip can be set dynamically after enabling the plane. + * We need to require support for immediate flip or underflow can be + * intermittently experienced depending on peak b/w requirements. + */ + pipes[pipe_cnt].pipe.src.immediate_flip = true; + + pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; + pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch; + pipes[pipe_cnt].pipe.src.dcc_rate = 3; + pipes[pipe_cnt].dout.dsc_input_bpc = 0; + DC_FP_START(); + dcn31_zero_pipe_dcc_fraction(pipes, pipe_cnt); + DC_FP_END(); + + if (pipes[pipe_cnt].dout.dsc_enable) { + switch (timing->display_color_depth) { + case COLOR_DEPTH_888: + pipes[pipe_cnt].dout.dsc_input_bpc = 8; + break; + case COLOR_DEPTH_101010: + pipes[pipe_cnt].dout.dsc_input_bpc = 10; + break; + case COLOR_DEPTH_121212: + pipes[pipe_cnt].dout.dsc_input_bpc = 12; + break; + default: + ASSERT(0); + break; + } + } + + pipe_cnt++; + } + + if (pipe_cnt) + context->bw_ctx.dml.ip.det_buffer_size_kbytes = + (max_usable_det / DCN3_16_CRB_SEGMENT_SIZE_KB / pipe_cnt) * DCN3_16_CRB_SEGMENT_SIZE_KB; + if (context->bw_ctx.dml.ip.det_buffer_size_kbytes > DCN3_16_MAX_DET_SIZE) + context->bw_ctx.dml.ip.det_buffer_size_kbytes = DCN3_16_MAX_DET_SIZE; + ASSERT(context->bw_ctx.dml.ip.det_buffer_size_kbytes >= DCN3_16_DEFAULT_DET_SIZE); + dc->config.enable_4to1MPC = false; + if (pipe_cnt == 1 && pipe->plane_state && !dc->debug.disable_z9_mpc) { + if (is_dual_plane(pipe->plane_state->format) + && pipe->plane_state->src_rect.width <= 1920 && pipe->plane_state->src_rect.height <= 1080) { + dc->config.enable_4to1MPC = true; + context->bw_ctx.dml.ip.det_buffer_size_kbytes = + (max_usable_det / DCN3_16_CRB_SEGMENT_SIZE_KB / 4) * DCN3_16_CRB_SEGMENT_SIZE_KB; + } else if (!is_dual_plane(pipe->plane_state->format)) { + context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192; + pipes[0].pipe.src.unbounded_req_mode = true; + } + } + + return pipe_cnt; +} + +static void dcn316_get_panel_config_defaults(struct dc_panel_config *panel_config) +{ + *panel_config = panel_config_defaults; +} + +static struct dc_cap_funcs cap_funcs = { + .get_dcc_compression_cap = dcn20_get_dcc_compression_cap +}; + +static struct resource_funcs dcn316_res_pool_funcs = { + .destroy = dcn316_destroy_resource_pool, + .link_enc_create = dcn31_link_encoder_create, + .link_enc_create_minimal = dcn31_link_enc_create_minimal, + .link_encs_assign = link_enc_cfg_link_encs_assign, + .link_enc_unassign = link_enc_cfg_link_enc_unassign, + .panel_cntl_create = dcn31_panel_cntl_create, + .validate_bandwidth = dcn31_validate_bandwidth, + .calculate_wm_and_dlg = dcn31_calculate_wm_and_dlg, + .update_soc_for_wm_a = dcn31_update_soc_for_wm_a, + .populate_dml_pipes = dcn316_populate_dml_pipes_from_context, + .acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer, + .release_pipe = dcn20_release_pipe, + .add_stream_to_ctx = dcn30_add_stream_to_ctx, + .add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource, + .remove_stream_from_ctx = dcn20_remove_stream_from_ctx, + .populate_dml_writeback_from_context = dcn31_populate_dml_writeback_from_context, + .set_mcif_arb_params = dcn31_set_mcif_arb_params, + .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link, + .acquire_post_bldn_3dlut = dcn30_acquire_post_bldn_3dlut, + .release_post_bldn_3dlut = dcn30_release_post_bldn_3dlut, + .update_bw_bounding_box = dcn316_update_bw_bounding_box, + .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, + .get_panel_config_defaults = dcn316_get_panel_config_defaults, +}; + +static bool dcn316_resource_construct( + uint8_t num_virtual_links, + struct dc *dc, + struct dcn316_resource_pool *pool) +{ + int i; + struct dc_context *ctx = dc->ctx; + struct irq_service_init_data init_data; + + ctx->dc_bios->regs = &bios_regs; + + pool->base.res_cap = &res_cap_dcn31; + + pool->base.funcs = &dcn316_res_pool_funcs; + + /************************************************* + * Resource + asic cap harcoding * + *************************************************/ + pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE; + pool->base.pipe_count = pool->base.res_cap->num_timing_generator; + pool->base.mpcc_count = pool->base.res_cap->num_timing_generator; + dc->caps.max_downscale_ratio = 600; + dc->caps.i2c_speed_in_khz = 100; + dc->caps.i2c_speed_in_khz_hdcp = 5; /*1.5 w/a applied by default*/ + dc->caps.max_cursor_size = 256; + dc->caps.min_horizontal_blanking_period = 80; + dc->caps.dmdata_alloc_size = 2048; + dc->caps.max_slave_planes = 2; + dc->caps.max_slave_yuv_planes = 2; + dc->caps.max_slave_rgb_planes = 2; + dc->caps.post_blend_color_processing = true; + dc->caps.force_dp_tps4_for_cp2520 = true; + if (dc->config.forceHBR2CP2520) + dc->caps.force_dp_tps4_for_cp2520 = false; + dc->caps.dp_hpo = true; + dc->caps.dp_hdmi21_pcon_support = true; + dc->caps.edp_dsc_support = true; + dc->caps.extended_aux_timeout_support = true; + dc->caps.dmcub_support = true; + dc->caps.is_apu = true; + + /* Color pipeline capabilities */ + dc->caps.color.dpp.dcn_arch = 1; + dc->caps.color.dpp.input_lut_shared = 0; + dc->caps.color.dpp.icsc = 1; + dc->caps.color.dpp.dgam_ram = 0; // must use gamma_corr + dc->caps.color.dpp.dgam_rom_caps.srgb = 1; + dc->caps.color.dpp.dgam_rom_caps.bt2020 = 1; + dc->caps.color.dpp.dgam_rom_caps.gamma2_2 = 1; + dc->caps.color.dpp.dgam_rom_caps.pq = 1; + dc->caps.color.dpp.dgam_rom_caps.hlg = 1; + dc->caps.color.dpp.post_csc = 1; + dc->caps.color.dpp.gamma_corr = 1; + dc->caps.color.dpp.dgam_rom_for_yuv = 0; + + dc->caps.color.dpp.hw_3d_lut = 1; + dc->caps.color.dpp.ogam_ram = 1; + // no OGAM ROM on DCN301 + dc->caps.color.dpp.ogam_rom_caps.srgb = 0; + dc->caps.color.dpp.ogam_rom_caps.bt2020 = 0; + dc->caps.color.dpp.ogam_rom_caps.gamma2_2 = 0; + dc->caps.color.dpp.ogam_rom_caps.pq = 0; + dc->caps.color.dpp.ogam_rom_caps.hlg = 0; + dc->caps.color.dpp.ocsc = 0; + + dc->caps.color.mpc.gamut_remap = 1; + dc->caps.color.mpc.num_3dluts = pool->base.res_cap->num_mpc_3dlut; //2 + dc->caps.color.mpc.ogam_ram = 1; + dc->caps.color.mpc.ogam_rom_caps.srgb = 0; + dc->caps.color.mpc.ogam_rom_caps.bt2020 = 0; + dc->caps.color.mpc.ogam_rom_caps.gamma2_2 = 0; + dc->caps.color.mpc.ogam_rom_caps.pq = 0; + dc->caps.color.mpc.ogam_rom_caps.hlg = 0; + dc->caps.color.mpc.ocsc = 1; + + /* read VBIOS LTTPR caps */ + { + if (ctx->dc_bios->funcs->get_lttpr_caps) { + enum bp_result bp_query_result; + uint8_t is_vbios_lttpr_enable = 0; + + bp_query_result = ctx->dc_bios->funcs->get_lttpr_caps(ctx->dc_bios, &is_vbios_lttpr_enable); + dc->caps.vbios_lttpr_enable = (bp_query_result == BP_RESULT_OK) && !!is_vbios_lttpr_enable; + } + + /* interop bit is implicit */ + { + dc->caps.vbios_lttpr_aware = true; + } + } + + if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) + dc->debug = debug_defaults_drv; + + // Init the vm_helper + if (dc->vm_helper) + vm_helper_init(dc->vm_helper, 16); + + /************************************************* + * Create resources * + *************************************************/ + + /* Clock Sources for Pixel Clock*/ + pool->base.clock_sources[DCN31_CLK_SRC_PLL0] = + dcn31_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL0, + &clk_src_regs[0], false); + pool->base.clock_sources[DCN31_CLK_SRC_PLL1] = + dcn31_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL1, + &clk_src_regs[1], false); + pool->base.clock_sources[DCN31_CLK_SRC_PLL2] = + dcn31_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL2, + &clk_src_regs[2], false); + pool->base.clock_sources[DCN31_CLK_SRC_PLL3] = + dcn31_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL3, + &clk_src_regs[3], false); + pool->base.clock_sources[DCN31_CLK_SRC_PLL4] = + dcn31_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL4, + &clk_src_regs[4], false); + + pool->base.clk_src_count = DCN30_CLK_SRC_TOTAL; + + /* todo: not reuse phy_pll registers */ + pool->base.dp_clock_source = + dcn31_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_ID_DP_DTO, + &clk_src_regs[0], true); + + for (i = 0; i < pool->base.clk_src_count; i++) { + if (pool->base.clock_sources[i] == NULL) { + dm_error("DC: failed to create clock sources!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + } + + /* TODO: DCCG */ + pool->base.dccg = dccg31_create(ctx, &dccg_regs, &dccg_shift, &dccg_mask); + if (pool->base.dccg == NULL) { + dm_error("DC: failed to create dccg!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + + /* TODO: IRQ */ + init_data.ctx = dc->ctx; + pool->base.irqs = dal_irq_service_dcn31_create(&init_data); + if (!pool->base.irqs) + goto create_fail; + + /* HUBBUB */ + pool->base.hubbub = dcn31_hubbub_create(ctx); + if (pool->base.hubbub == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create hubbub!\n"); + goto create_fail; + } + + /* HUBPs, DPPs, OPPs and TGs */ + for (i = 0; i < pool->base.pipe_count; i++) { + pool->base.hubps[i] = dcn31_hubp_create(ctx, i); + if (pool->base.hubps[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create hubps!\n"); + goto create_fail; + } + + pool->base.dpps[i] = dcn31_dpp_create(ctx, i); + if (pool->base.dpps[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create dpps!\n"); + goto create_fail; + } + } + + for (i = 0; i < pool->base.res_cap->num_opp; i++) { + pool->base.opps[i] = dcn31_opp_create(ctx, i); + if (pool->base.opps[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create output pixel processor!\n"); + goto create_fail; + } + } + + for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { + pool->base.timing_generators[i] = dcn31_timing_generator_create( + ctx, i); + if (pool->base.timing_generators[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create tg!\n"); + goto create_fail; + } + } + pool->base.timing_generator_count = i; + + /* PSR */ + pool->base.psr = dmub_psr_create(ctx); + if (pool->base.psr == NULL) { + dm_error("DC: failed to create psr obj!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + + /* ABM */ + for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { + pool->base.multiple_abms[i] = dmub_abm_create(ctx, + &abm_regs[i], + &abm_shift, + &abm_mask); + if (pool->base.multiple_abms[i] == NULL) { + dm_error("DC: failed to create abm for pipe %d!\n", i); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + } + + /* MPC and DSC */ + pool->base.mpc = dcn31_mpc_create(ctx, pool->base.mpcc_count, pool->base.res_cap->num_mpc_3dlut); + if (pool->base.mpc == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create mpc!\n"); + goto create_fail; + } + + for (i = 0; i < pool->base.res_cap->num_dsc; i++) { + pool->base.dscs[i] = dcn31_dsc_create(ctx, i); + if (pool->base.dscs[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create display stream compressor %d!\n", i); + goto create_fail; + } + } + + /* DWB and MMHUBBUB */ + if (!dcn31_dwbc_create(ctx, &pool->base)) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create dwbc!\n"); + goto create_fail; + } + + if (!dcn31_mmhubbub_create(ctx, &pool->base)) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create mcif_wb!\n"); + goto create_fail; + } + + /* AUX and I2C */ + for (i = 0; i < pool->base.res_cap->num_ddc; i++) { + pool->base.engines[i] = dcn31_aux_engine_create(ctx, i); + if (pool->base.engines[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC:failed to create aux engine!!\n"); + goto create_fail; + } + pool->base.hw_i2cs[i] = dcn31_i2c_hw_create(ctx, i); + if (pool->base.hw_i2cs[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC:failed to create hw i2c!!\n"); + goto create_fail; + } + pool->base.sw_i2cs[i] = NULL; + } + + /* Audio, Stream Encoders including HPO and virtual, MPC 3D LUTs */ + if (!resource_construct(num_virtual_links, dc, &pool->base, + &res_create_funcs)) + goto create_fail; + + /* HW Sequencer and Plane caps */ + dcn31_hw_sequencer_construct(dc); + + dc->caps.max_planes = pool->base.pipe_count; + + for (i = 0; i < dc->caps.max_planes; ++i) + dc->caps.planes[i] = plane_cap; + + dc->cap_funcs = cap_funcs; + + dc->dcn_ip->max_num_dpp = dcn3_16_ip.max_num_dpp; + + return true; + +create_fail: + + dcn316_resource_destruct(pool); + + return false; +} + +struct resource_pool *dcn316_create_resource_pool( + const struct dc_init_data *init_data, + struct dc *dc) +{ + struct dcn316_resource_pool *pool = + kzalloc(sizeof(struct dcn316_resource_pool), GFP_KERNEL); + + if (!pool) + return NULL; + + if (dcn316_resource_construct(init_data->num_virtual_links, dc, pool)) + return &pool->base; + + BREAK_TO_DEBUGGER(); + kfree(pool); + return NULL; +} diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.h new file mode 100644 index 000000000000..aba6d634131b --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.h @@ -0,0 +1,44 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef _DCN316_RESOURCE_H_ +#define _DCN316_RESOURCE_H_ + +#include "core_types.h" + +#define TO_DCN316_RES_POOL(pool)\ + container_of(pool, struct dcn316_resource_pool, base) + +extern struct _vcs_dpi_ip_params_st dcn3_16_ip; + +struct dcn316_resource_pool { + struct resource_pool base; +}; + +struct resource_pool *dcn316_create_resource_pool( + const struct dc_init_data *init_data, + struct dc *dc); + +#endif /* _DCN316_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c new file mode 100644 index 000000000000..36e4c7bef403 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c @@ -0,0 +1,2862 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dm_services.h" +#include "dc.h" + +#include "dcn32/dcn32_init.h" + +#include "resource.h" +#include "include/irq_service_interface.h" +#include "dcn32_resource.h" + +#include "dcn20/dcn20_resource.h" +#include "dcn30/dcn30_resource.h" + +#include "dcn10/dcn10_ipp.h" +#include "dcn30/dcn30_hubbub.h" +#include "dcn31/dcn31_hubbub.h" +#include "dcn32/dcn32_hubbub.h" +#include "dcn32/dcn32_mpc.h" +#include "dcn32/dcn32_hubp.h" +#include "irq/dcn32/irq_service_dcn32.h" +#include "dcn32/dcn32_dpp.h" +#include "dcn32/dcn32_optc.h" +#include "dcn20/dcn20_hwseq.h" +#include "dcn30/dcn30_hwseq.h" +#include "dce110/dce110_hwseq.h" +#include "dcn30/dcn30_opp.h" +#include "dcn20/dcn20_dsc.h" +#include "dcn30/dcn30_vpg.h" +#include "dcn30/dcn30_afmt.h" +#include "dcn30/dcn30_dio_stream_encoder.h" +#include "dcn32/dcn32_dio_stream_encoder.h" +#include "dcn31/dcn31_hpo_dp_stream_encoder.h" +#include "dcn31/dcn31_hpo_dp_link_encoder.h" +#include "dcn32/dcn32_hpo_dp_link_encoder.h" +#include "dcn31/dcn31_apg.h" +#include "dcn31/dcn31_dio_link_encoder.h" +#include "dcn32/dcn32_dio_link_encoder.h" +#include "dce/dce_clock_source.h" +#include "dce/dce_audio.h" +#include "dce/dce_hwseq.h" +#include "clk_mgr.h" +#include "virtual/virtual_stream_encoder.h" +#include "dml/display_mode_vba.h" +#include "dcn32/dcn32_dccg.h" +#include "dcn10/dcn10_resource.h" +#include "link.h" +#include "dcn31/dcn31_panel_cntl.h" + +#include "dcn30/dcn30_dwb.h" +#include "dcn32/dcn32_mmhubbub.h" + +#include "dcn/dcn_3_2_0_offset.h" +#include "dcn/dcn_3_2_0_sh_mask.h" +#include "nbio/nbio_4_3_0_offset.h" + +#include "reg_helper.h" +#include "dce/dmub_abm.h" +#include "dce/dmub_psr.h" +#include "dce/dce_aux.h" +#include "dce/dce_i2c.h" + +#include "dml/dcn30/display_mode_vba_30.h" +#include "vm_helper.h" +#include "dcn20/dcn20_vmid.h" +#include "dml/dcn32/dcn32_fpu.h" + +#include "dml2/dml2_wrapper.h" + +#define DC_LOGGER_INIT(logger) + +enum dcn32_clk_src_array_id { + DCN32_CLK_SRC_PLL0, + DCN32_CLK_SRC_PLL1, + DCN32_CLK_SRC_PLL2, + DCN32_CLK_SRC_PLL3, + DCN32_CLK_SRC_PLL4, + DCN32_CLK_SRC_TOTAL +}; + +/* begin ********************* + * macros to expend register list macro defined in HW object header file + */ + +/* DCN */ +#define BASE_INNER(seg) ctx->dcn_reg_offsets[seg] + +#define BASE(seg) BASE_INNER(seg) + +#define SR(reg_name)\ + REG_STRUCT.reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ + reg ## reg_name +#define SR_ARR(reg_name, id) \ + REG_STRUCT[id].reg_name = BASE(reg##reg_name##_BASE_IDX) + reg##reg_name + +#define SR_ARR_INIT(reg_name, id, value) \ + REG_STRUCT[id].reg_name = value + +#define SRI(reg_name, block, id)\ + REG_STRUCT.reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SRI_ARR(reg_name, block, id)\ + REG_STRUCT[id].reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SR_ARR_I2C(reg_name, id) \ + REG_STRUCT[id-1].reg_name = BASE(reg##reg_name##_BASE_IDX) + reg##reg_name + +#define SRI_ARR_I2C(reg_name, block, id)\ + REG_STRUCT[id-1].reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SRI_ARR_ALPHABET(reg_name, block, index, id)\ + REG_STRUCT[index].reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SRI2(reg_name, block, id)\ + .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ + reg ## reg_name +#define SRI2_ARR(reg_name, block, id)\ + REG_STRUCT[id].reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ + reg ## reg_name + +#define SRIR(var_name, reg_name, block, id)\ + .var_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SRII(reg_name, block, id)\ + REG_STRUCT.reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SRII_ARR_2(reg_name, block, id, inst)\ + REG_STRUCT[inst].reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SRII_MPC_RMU(reg_name, block, id)\ + .RMU##_##reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SRII_DWB(reg_name, temp_name, block, id)\ + REG_STRUCT.reg_name[id] = BASE(reg ## block ## id ## _ ## temp_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## temp_name + +#define SF_DWB2(reg_name, block, id, field_name, post_fix) \ + .field_name = reg_name ## __ ## field_name ## post_fix + +#define DCCG_SRII(reg_name, block, id)\ + REG_STRUCT.block ## _ ## reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define VUPDATE_SRII(reg_name, block, id)\ + REG_STRUCT.reg_name[id] = BASE(reg ## reg_name ## _ ## block ## id ## _BASE_IDX) + \ + reg ## reg_name ## _ ## block ## id + +/* NBIO */ +#define NBIO_BASE_INNER(seg) ctx->nbio_reg_offsets[seg] + +#define NBIO_BASE(seg) \ + NBIO_BASE_INNER(seg) + +#define NBIO_SR(reg_name)\ + REG_STRUCT.reg_name = NBIO_BASE(regBIF_BX0_ ## reg_name ## _BASE_IDX) + \ + regBIF_BX0_ ## reg_name +#define NBIO_SR_ARR(reg_name, id)\ + REG_STRUCT[id].reg_name = NBIO_BASE(regBIF_BX0_ ## reg_name ## _BASE_IDX) + \ + regBIF_BX0_ ## reg_name + +#undef CTX +#define CTX ctx +#define REG(reg_name) \ + (ctx->dcn_reg_offsets[reg ## reg_name ## _BASE_IDX] + reg ## reg_name) + +static struct bios_registers bios_regs; + +#define bios_regs_init() \ + ( \ + NBIO_SR(BIOS_SCRATCH_3),\ + NBIO_SR(BIOS_SCRATCH_6)\ + ) + +#define clk_src_regs_init(index, pllid)\ + CS_COMMON_REG_LIST_DCN3_0_RI(index, pllid) + +static struct dce110_clk_src_regs clk_src_regs[5]; + +static const struct dce110_clk_src_shift cs_shift = { + CS_COMMON_MASK_SH_LIST_DCN3_2(__SHIFT) +}; + +static const struct dce110_clk_src_mask cs_mask = { + CS_COMMON_MASK_SH_LIST_DCN3_2(_MASK) +}; + +#define abm_regs_init(id)\ + ABM_DCN32_REG_LIST_RI(id) + +static struct dce_abm_registers abm_regs[4]; + +static const struct dce_abm_shift abm_shift = { + ABM_MASK_SH_LIST_DCN32(__SHIFT) +}; + +static const struct dce_abm_mask abm_mask = { + ABM_MASK_SH_LIST_DCN32(_MASK) +}; + +#define audio_regs_init(id)\ + AUD_COMMON_REG_LIST_RI(id) + +static struct dce_audio_registers audio_regs[5]; + +#define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\ + SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_INDEX, AZALIA_ENDPOINT_REG_INDEX, mask_sh),\ + SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_DATA, AZALIA_ENDPOINT_REG_DATA, mask_sh),\ + AUD_COMMON_MASK_SH_LIST_BASE(mask_sh) + +static const struct dce_audio_shift audio_shift = { + DCE120_AUD_COMMON_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_audio_mask audio_mask = { + DCE120_AUD_COMMON_MASK_SH_LIST(_MASK) +}; + +#define vpg_regs_init(id)\ + VPG_DCN3_REG_LIST_RI(id) + +static struct dcn30_vpg_registers vpg_regs[10]; + +static const struct dcn30_vpg_shift vpg_shift = { + DCN3_VPG_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn30_vpg_mask vpg_mask = { + DCN3_VPG_MASK_SH_LIST(_MASK) +}; + +#define afmt_regs_init(id)\ + AFMT_DCN3_REG_LIST_RI(id) + +static struct dcn30_afmt_registers afmt_regs[6]; + +static const struct dcn30_afmt_shift afmt_shift = { + DCN3_AFMT_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn30_afmt_mask afmt_mask = { + DCN3_AFMT_MASK_SH_LIST(_MASK) +}; + +#define apg_regs_init(id)\ + APG_DCN31_REG_LIST_RI(id) + +static struct dcn31_apg_registers apg_regs[4]; + +static const struct dcn31_apg_shift apg_shift = { + DCN31_APG_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn31_apg_mask apg_mask = { + DCN31_APG_MASK_SH_LIST(_MASK) +}; + +#define stream_enc_regs_init(id)\ + SE_DCN32_REG_LIST_RI(id) + +static struct dcn10_stream_enc_registers stream_enc_regs[5]; + +static const struct dcn10_stream_encoder_shift se_shift = { + SE_COMMON_MASK_SH_LIST_DCN32(__SHIFT) +}; + +static const struct dcn10_stream_encoder_mask se_mask = { + SE_COMMON_MASK_SH_LIST_DCN32(_MASK) +}; + + +#define aux_regs_init(id)\ + DCN2_AUX_REG_LIST_RI(id) + +static struct dcn10_link_enc_aux_registers link_enc_aux_regs[5]; + +#define hpd_regs_init(id)\ + HPD_REG_LIST_RI(id) + +static struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[5]; + +#define link_regs_init(id, phyid)\ + ( \ + LE_DCN31_REG_LIST_RI(id), \ + UNIPHY_DCN2_REG_LIST_RI(id, phyid)\ + ) + /*DPCS_DCN31_REG_LIST(id),*/ \ + +static struct dcn10_link_enc_registers link_enc_regs[5]; + +static const struct dcn10_link_enc_shift le_shift = { + LINK_ENCODER_MASK_SH_LIST_DCN31(__SHIFT), \ + //DPCS_DCN31_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn10_link_enc_mask le_mask = { + LINK_ENCODER_MASK_SH_LIST_DCN31(_MASK), \ + //DPCS_DCN31_MASK_SH_LIST(_MASK) +}; + +#define hpo_dp_stream_encoder_reg_init(id)\ + DCN3_1_HPO_DP_STREAM_ENC_REG_LIST_RI(id) + +static struct dcn31_hpo_dp_stream_encoder_registers hpo_dp_stream_enc_regs[4]; + +static const struct dcn31_hpo_dp_stream_encoder_shift hpo_dp_se_shift = { + DCN3_1_HPO_DP_STREAM_ENC_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn31_hpo_dp_stream_encoder_mask hpo_dp_se_mask = { + DCN3_1_HPO_DP_STREAM_ENC_MASK_SH_LIST(_MASK) +}; + + +#define hpo_dp_link_encoder_reg_init(id)\ + DCN3_1_HPO_DP_LINK_ENC_REG_LIST_RI(id) + /*DCN3_1_RDPCSTX_REG_LIST(0),*/ + /*DCN3_1_RDPCSTX_REG_LIST(1),*/ + /*DCN3_1_RDPCSTX_REG_LIST(2),*/ + /*DCN3_1_RDPCSTX_REG_LIST(3),*/ + +static struct dcn31_hpo_dp_link_encoder_registers hpo_dp_link_enc_regs[2]; + +static const struct dcn31_hpo_dp_link_encoder_shift hpo_dp_le_shift = { + DCN3_2_HPO_DP_LINK_ENC_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn31_hpo_dp_link_encoder_mask hpo_dp_le_mask = { + DCN3_2_HPO_DP_LINK_ENC_MASK_SH_LIST(_MASK) +}; + +#define dpp_regs_init(id)\ + DPP_REG_LIST_DCN30_COMMON_RI(id) + +static struct dcn3_dpp_registers dpp_regs[4]; + +static const struct dcn3_dpp_shift tf_shift = { + DPP_REG_LIST_SH_MASK_DCN30_COMMON(__SHIFT) +}; + +static const struct dcn3_dpp_mask tf_mask = { + DPP_REG_LIST_SH_MASK_DCN30_COMMON(_MASK) +}; + + +#define opp_regs_init(id)\ + OPP_REG_LIST_DCN30_RI(id) + +static struct dcn20_opp_registers opp_regs[4]; + +static const struct dcn20_opp_shift opp_shift = { + OPP_MASK_SH_LIST_DCN20(__SHIFT) +}; + +static const struct dcn20_opp_mask opp_mask = { + OPP_MASK_SH_LIST_DCN20(_MASK) +}; + +#define aux_engine_regs_init(id)\ + ( \ + AUX_COMMON_REG_LIST0_RI(id), \ + SR_ARR_INIT(AUXN_IMPCAL, id, 0), \ + SR_ARR_INIT(AUXP_IMPCAL, id, 0), \ + SR_ARR_INIT(AUX_RESET_MASK, id, DP_AUX0_AUX_CONTROL__AUX_RESET_MASK), \ + SR_ARR_INIT(AUX_RESET_MASK, id, DP_AUX0_AUX_CONTROL__AUX_RESET_MASK)\ + ) + +static struct dce110_aux_registers aux_engine_regs[5]; + +static const struct dce110_aux_registers_shift aux_shift = { + DCN_AUX_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce110_aux_registers_mask aux_mask = { + DCN_AUX_MASK_SH_LIST(_MASK) +}; + +#define dwbc_regs_dcn3_init(id)\ + DWBC_COMMON_REG_LIST_DCN30_RI(id) + +static struct dcn30_dwbc_registers dwbc30_regs[1]; + +static const struct dcn30_dwbc_shift dwbc30_shift = { + DWBC_COMMON_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dcn30_dwbc_mask dwbc30_mask = { + DWBC_COMMON_MASK_SH_LIST_DCN30(_MASK) +}; + +#define mcif_wb_regs_dcn3_init(id)\ + MCIF_WB_COMMON_REG_LIST_DCN32_RI(id) + +static struct dcn30_mmhubbub_registers mcif_wb30_regs[1]; + +static const struct dcn30_mmhubbub_shift mcif_wb30_shift = { + MCIF_WB_COMMON_MASK_SH_LIST_DCN32(__SHIFT) +}; + +static const struct dcn30_mmhubbub_mask mcif_wb30_mask = { + MCIF_WB_COMMON_MASK_SH_LIST_DCN32(_MASK) +}; + +#define dsc_regsDCN20_init(id)\ + DSC_REG_LIST_DCN20_RI(id) + +static struct dcn20_dsc_registers dsc_regs[4]; + +static const struct dcn20_dsc_shift dsc_shift = { + DSC_REG_LIST_SH_MASK_DCN20(__SHIFT) +}; + +static const struct dcn20_dsc_mask dsc_mask = { + DSC_REG_LIST_SH_MASK_DCN20(_MASK) +}; + +static struct dcn30_mpc_registers mpc_regs; + +#define dcn_mpc_regs_init() \ + MPC_REG_LIST_DCN3_2_RI(0),\ + MPC_REG_LIST_DCN3_2_RI(1),\ + MPC_REG_LIST_DCN3_2_RI(2),\ + MPC_REG_LIST_DCN3_2_RI(3),\ + MPC_OUT_MUX_REG_LIST_DCN3_0_RI(0),\ + MPC_OUT_MUX_REG_LIST_DCN3_0_RI(1),\ + MPC_OUT_MUX_REG_LIST_DCN3_0_RI(2),\ + MPC_OUT_MUX_REG_LIST_DCN3_0_RI(3),\ + MPC_DWB_MUX_REG_LIST_DCN3_0_RI(0) + +static const struct dcn30_mpc_shift mpc_shift = { + MPC_COMMON_MASK_SH_LIST_DCN32(__SHIFT) +}; + +static const struct dcn30_mpc_mask mpc_mask = { + MPC_COMMON_MASK_SH_LIST_DCN32(_MASK) +}; + +#define optc_regs_init(id)\ + OPTC_COMMON_REG_LIST_DCN3_2_RI(id) + +static struct dcn_optc_registers optc_regs[4]; + +static const struct dcn_optc_shift optc_shift = { + OPTC_COMMON_MASK_SH_LIST_DCN3_2(__SHIFT) +}; + +static const struct dcn_optc_mask optc_mask = { + OPTC_COMMON_MASK_SH_LIST_DCN3_2(_MASK) +}; + +#define hubp_regs_init(id)\ + HUBP_REG_LIST_DCN32_RI(id) + +static struct dcn_hubp2_registers hubp_regs[4]; + + +static const struct dcn_hubp2_shift hubp_shift = { + HUBP_MASK_SH_LIST_DCN32(__SHIFT) +}; + +static const struct dcn_hubp2_mask hubp_mask = { + HUBP_MASK_SH_LIST_DCN32(_MASK) +}; + +static struct dcn_hubbub_registers hubbub_reg; +#define hubbub_reg_init()\ + HUBBUB_REG_LIST_DCN32_RI(0) + +static const struct dcn_hubbub_shift hubbub_shift = { + HUBBUB_MASK_SH_LIST_DCN32(__SHIFT) +}; + +static const struct dcn_hubbub_mask hubbub_mask = { + HUBBUB_MASK_SH_LIST_DCN32(_MASK) +}; + +static struct dccg_registers dccg_regs; + +#define dccg_regs_init()\ + DCCG_REG_LIST_DCN32_RI() + +static const struct dccg_shift dccg_shift = { + DCCG_MASK_SH_LIST_DCN32(__SHIFT) +}; + +static const struct dccg_mask dccg_mask = { + DCCG_MASK_SH_LIST_DCN32(_MASK) +}; + + +#define SRII2(reg_name_pre, reg_name_post, id)\ + .reg_name_pre ## _ ## reg_name_post[id] = BASE(reg ## reg_name_pre \ + ## id ## _ ## reg_name_post ## _BASE_IDX) + \ + reg ## reg_name_pre ## id ## _ ## reg_name_post + + +#define HWSEQ_DCN32_REG_LIST()\ + SR(DCHUBBUB_GLOBAL_TIMER_CNTL), \ + SR(DIO_MEM_PWR_CTRL), \ + SR(ODM_MEM_PWR_CTRL3), \ + SR(MMHUBBUB_MEM_PWR_CNTL), \ + SR(DCCG_GATE_DISABLE_CNTL), \ + SR(DCCG_GATE_DISABLE_CNTL2), \ + SR(DCFCLK_CNTL),\ + SR(DC_MEM_GLOBAL_PWR_REQ_CNTL), \ + SRII(PIXEL_RATE_CNTL, OTG, 0), \ + SRII(PIXEL_RATE_CNTL, OTG, 1),\ + SRII(PIXEL_RATE_CNTL, OTG, 2),\ + SRII(PIXEL_RATE_CNTL, OTG, 3),\ + SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 0),\ + SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 1),\ + SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 2),\ + SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 3),\ + SR(MICROSECOND_TIME_BASE_DIV), \ + SR(MILLISECOND_TIME_BASE_DIV), \ + SR(DISPCLK_FREQ_CHANGE_CNTL), \ + SR(RBBMIF_TIMEOUT_DIS), \ + SR(RBBMIF_TIMEOUT_DIS_2), \ + SR(DCHUBBUB_CRC_CTRL), \ + SR(DPP_TOP0_DPP_CRC_CTRL), \ + SR(DPP_TOP0_DPP_CRC_VAL_B_A), \ + SR(DPP_TOP0_DPP_CRC_VAL_R_G), \ + SR(MPC_CRC_CTRL), \ + SR(MPC_CRC_RESULT_GB), \ + SR(MPC_CRC_RESULT_C), \ + SR(MPC_CRC_RESULT_AR), \ + SR(DOMAIN0_PG_CONFIG), \ + SR(DOMAIN1_PG_CONFIG), \ + SR(DOMAIN2_PG_CONFIG), \ + SR(DOMAIN3_PG_CONFIG), \ + SR(DOMAIN16_PG_CONFIG), \ + SR(DOMAIN17_PG_CONFIG), \ + SR(DOMAIN18_PG_CONFIG), \ + SR(DOMAIN19_PG_CONFIG), \ + SR(DOMAIN0_PG_STATUS), \ + SR(DOMAIN1_PG_STATUS), \ + SR(DOMAIN2_PG_STATUS), \ + SR(DOMAIN3_PG_STATUS), \ + SR(DOMAIN16_PG_STATUS), \ + SR(DOMAIN17_PG_STATUS), \ + SR(DOMAIN18_PG_STATUS), \ + SR(DOMAIN19_PG_STATUS), \ + SR(D1VGA_CONTROL), \ + SR(D2VGA_CONTROL), \ + SR(D3VGA_CONTROL), \ + SR(D4VGA_CONTROL), \ + SR(D5VGA_CONTROL), \ + SR(D6VGA_CONTROL), \ + SR(DC_IP_REQUEST_CNTL), \ + SR(AZALIA_AUDIO_DTO), \ + SR(AZALIA_CONTROLLER_CLOCK_GATING) + +static struct dce_hwseq_registers hwseq_reg; + +#define hwseq_reg_init()\ + HWSEQ_DCN32_REG_LIST() + +#define HWSEQ_DCN32_MASK_SH_LIST(mask_sh)\ + HWSEQ_DCN_MASK_SH_LIST(mask_sh), \ + HWS_SF(, DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_REFDIV, mask_sh), \ + HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN1_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN1_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN2_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN2_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN3_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN3_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN16_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN16_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN17_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN17_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN18_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN18_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN19_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN19_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN0_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN1_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN2_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN3_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN16_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN17_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN18_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN19_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DC_IP_REQUEST_CNTL, IP_REQUEST_EN, mask_sh), \ + HWS_SF(, AZALIA_AUDIO_DTO, AZALIA_AUDIO_DTO_MODULE, mask_sh), \ + HWS_SF(, HPO_TOP_CLOCK_CONTROL, HPO_HDMISTREAMCLK_G_GATE_DIS, mask_sh), \ + HWS_SF(, ODM_MEM_PWR_CTRL3, ODM_MEM_UNASSIGNED_PWR_MODE, mask_sh), \ + HWS_SF(, ODM_MEM_PWR_CTRL3, ODM_MEM_VBLANK_PWR_MODE, mask_sh), \ + HWS_SF(, MMHUBBUB_MEM_PWR_CNTL, VGA_MEM_PWR_FORCE, mask_sh) + +static const struct dce_hwseq_shift hwseq_shift = { + HWSEQ_DCN32_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_hwseq_mask hwseq_mask = { + HWSEQ_DCN32_MASK_SH_LIST(_MASK) +}; +#define vmid_regs_init(id)\ + DCN20_VMID_REG_LIST_RI(id) + +static struct dcn_vmid_registers vmid_regs[16]; + +static const struct dcn20_vmid_shift vmid_shifts = { + DCN20_VMID_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn20_vmid_mask vmid_masks = { + DCN20_VMID_MASK_SH_LIST(_MASK) +}; + +static const struct resource_caps res_cap_dcn32 = { + .num_timing_generator = 4, + .num_opp = 4, + .num_video_plane = 4, + .num_audio = 5, + .num_stream_encoder = 5, + .num_hpo_dp_stream_encoder = 4, + .num_hpo_dp_link_encoder = 2, + .num_pll = 5, + .num_dwb = 1, + .num_ddc = 5, + .num_vmid = 16, + .num_mpc_3dlut = 4, + .num_dsc = 4, +}; + +static const struct dc_plane_cap plane_cap = { + .type = DC_PLANE_TYPE_DCN_UNIVERSAL, + .per_pixel_alpha = true, + + .pixel_format_support = { + .argb8888 = true, + .nv12 = true, + .fp16 = true, + .p010 = true, + .ayuv = false, + }, + + .max_upscale_factor = { + .argb8888 = 16000, + .nv12 = 16000, + .fp16 = 16000 + }, + + // 6:1 downscaling ratio: 1000/6 = 166.666 + .max_downscale_factor = { + .argb8888 = 167, + .nv12 = 167, + .fp16 = 167 + }, + 64, + 64 +}; + +static const struct dc_debug_options debug_defaults_drv = { + .disable_dmcu = true, + .force_abm_enable = false, + .timing_trace = false, + .clock_trace = true, + .disable_pplib_clock_request = false, + .pipe_split_policy = MPC_SPLIT_AVOID, // Due to CRB, no need to MPC split anymore + .force_single_disp_pipe_split = false, + .disable_dcc = DCC_ENABLE, + .vsr_support = true, + .performance_trace = false, + .max_downscale_src_width = 7680,/*upto 8K*/ + .disable_pplib_wm_range = false, + .scl_reset_length10 = true, + .sanity_checks = false, + .underflow_assert_delay_us = 0xFFFFFFFF, + .dwb_fi_phase = -1, // -1 = disable, + .dmub_command_table = true, + .enable_mem_low_power = { + .bits = { + .vga = false, + .i2c = false, + .dmcu = false, // This is previously known to cause hang on S3 cycles if enabled + .dscl = false, + .cm = false, + .mpc = false, + .optc = true, + } + }, + .use_max_lb = true, + .force_disable_subvp = false, + .exit_idle_opt_for_cursor_updates = true, + .using_dml2 = false, + .enable_single_display_2to1_odm_policy = true, + + /* Must match enable_single_display_2to1_odm_policy to support dynamic ODM transitions*/ + .enable_double_buffered_dsc_pg_support = true, + .enable_dp_dig_pixel_rate_div_policy = 1, + .allow_sw_cursor_fallback = false, // Linux can't do SW cursor "fallback" + .alloc_extra_way_for_cursor = true, + .min_prefetch_in_strobe_ns = 60000, // 60us + .disable_unbounded_requesting = false, + .override_dispclk_programming = true, + .disable_fpo_optimizations = false, + .fpo_vactive_margin_us = 2000, // 2000us + .disable_fpo_vactive = false, + .disable_boot_optimizations = false, + .disable_subvp_high_refresh = false, + .disable_dp_plus_plus_wa = true, + .fpo_vactive_min_active_margin_us = 200, + .fpo_vactive_max_blank_us = 1000, + .enable_legacy_fast_update = false, +}; + +static struct dce_aux *dcn32_aux_engine_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct aux_engine_dce110 *aux_engine = + kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL); + + if (!aux_engine) + return NULL; + +#undef REG_STRUCT +#define REG_STRUCT aux_engine_regs + aux_engine_regs_init(0), + aux_engine_regs_init(1), + aux_engine_regs_init(2), + aux_engine_regs_init(3), + aux_engine_regs_init(4); + + dce110_aux_engine_construct(aux_engine, ctx, inst, + SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, + &aux_engine_regs[inst], + &aux_mask, + &aux_shift, + ctx->dc->caps.extended_aux_timeout_support); + + return &aux_engine->base; +} +#define i2c_inst_regs_init(id)\ + I2C_HW_ENGINE_COMMON_REG_LIST_DCN30_RI(id) + +static struct dce_i2c_registers i2c_hw_regs[5]; + +static const struct dce_i2c_shift i2c_shifts = { + I2C_COMMON_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dce_i2c_mask i2c_masks = { + I2C_COMMON_MASK_SH_LIST_DCN30(_MASK) +}; + +static struct dce_i2c_hw *dcn32_i2c_hw_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dce_i2c_hw *dce_i2c_hw = + kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL); + + if (!dce_i2c_hw) + return NULL; + +#undef REG_STRUCT +#define REG_STRUCT i2c_hw_regs + i2c_inst_regs_init(1), + i2c_inst_regs_init(2), + i2c_inst_regs_init(3), + i2c_inst_regs_init(4), + i2c_inst_regs_init(5); + + dcn2_i2c_hw_construct(dce_i2c_hw, ctx, inst, + &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks); + + return dce_i2c_hw; +} + +static struct clock_source *dcn32_clock_source_create( + struct dc_context *ctx, + struct dc_bios *bios, + enum clock_source_id id, + const struct dce110_clk_src_regs *regs, + bool dp_clk_src) +{ + struct dce110_clk_src *clk_src = + kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL); + + if (!clk_src) + return NULL; + + if (dcn31_clk_src_construct(clk_src, ctx, bios, id, + regs, &cs_shift, &cs_mask)) { + clk_src->base.dp_clk_src = dp_clk_src; + return &clk_src->base; + } + + kfree(clk_src); + BREAK_TO_DEBUGGER(); + return NULL; +} + +static struct hubbub *dcn32_hubbub_create(struct dc_context *ctx) +{ + int i; + + struct dcn20_hubbub *hubbub2 = kzalloc(sizeof(struct dcn20_hubbub), + GFP_KERNEL); + + if (!hubbub2) + return NULL; + +#undef REG_STRUCT +#define REG_STRUCT hubbub_reg + hubbub_reg_init(); + +#undef REG_STRUCT +#define REG_STRUCT vmid_regs + vmid_regs_init(0), + vmid_regs_init(1), + vmid_regs_init(2), + vmid_regs_init(3), + vmid_regs_init(4), + vmid_regs_init(5), + vmid_regs_init(6), + vmid_regs_init(7), + vmid_regs_init(8), + vmid_regs_init(9), + vmid_regs_init(10), + vmid_regs_init(11), + vmid_regs_init(12), + vmid_regs_init(13), + vmid_regs_init(14), + vmid_regs_init(15); + + hubbub32_construct(hubbub2, ctx, + &hubbub_reg, + &hubbub_shift, + &hubbub_mask, + ctx->dc->dml.ip.det_buffer_size_kbytes, + ctx->dc->dml.ip.pixel_chunk_size_kbytes, + ctx->dc->dml.ip.config_return_buffer_size_in_kbytes); + + + for (i = 0; i < res_cap_dcn32.num_vmid; i++) { + struct dcn20_vmid *vmid = &hubbub2->vmid[i]; + + vmid->ctx = ctx; + + vmid->regs = &vmid_regs[i]; + vmid->shifts = &vmid_shifts; + vmid->masks = &vmid_masks; + } + + return &hubbub2->base; +} + +static struct hubp *dcn32_hubp_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn20_hubp *hubp2 = + kzalloc(sizeof(struct dcn20_hubp), GFP_KERNEL); + + if (!hubp2) + return NULL; + +#undef REG_STRUCT +#define REG_STRUCT hubp_regs + hubp_regs_init(0), + hubp_regs_init(1), + hubp_regs_init(2), + hubp_regs_init(3); + + if (hubp32_construct(hubp2, ctx, inst, + &hubp_regs[inst], &hubp_shift, &hubp_mask)) + return &hubp2->base; + + BREAK_TO_DEBUGGER(); + kfree(hubp2); + return NULL; +} + +static void dcn32_dpp_destroy(struct dpp **dpp) +{ + kfree(TO_DCN30_DPP(*dpp)); + *dpp = NULL; +} + +static struct dpp *dcn32_dpp_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn3_dpp *dpp3 = + kzalloc(sizeof(struct dcn3_dpp), GFP_KERNEL); + + if (!dpp3) + return NULL; + +#undef REG_STRUCT +#define REG_STRUCT dpp_regs + dpp_regs_init(0), + dpp_regs_init(1), + dpp_regs_init(2), + dpp_regs_init(3); + + if (dpp32_construct(dpp3, ctx, inst, + &dpp_regs[inst], &tf_shift, &tf_mask)) + return &dpp3->base; + + BREAK_TO_DEBUGGER(); + kfree(dpp3); + return NULL; +} + +static struct mpc *dcn32_mpc_create( + struct dc_context *ctx, + int num_mpcc, + int num_rmu) +{ + struct dcn30_mpc *mpc30 = kzalloc(sizeof(struct dcn30_mpc), + GFP_KERNEL); + + if (!mpc30) + return NULL; + +#undef REG_STRUCT +#define REG_STRUCT mpc_regs + dcn_mpc_regs_init(); + + dcn32_mpc_construct(mpc30, ctx, + &mpc_regs, + &mpc_shift, + &mpc_mask, + num_mpcc, + num_rmu); + + return &mpc30->base; +} + +static struct output_pixel_processor *dcn32_opp_create( + struct dc_context *ctx, uint32_t inst) +{ + struct dcn20_opp *opp2 = + kzalloc(sizeof(struct dcn20_opp), GFP_KERNEL); + + if (!opp2) { + BREAK_TO_DEBUGGER(); + return NULL; + } + +#undef REG_STRUCT +#define REG_STRUCT opp_regs + opp_regs_init(0), + opp_regs_init(1), + opp_regs_init(2), + opp_regs_init(3); + + dcn20_opp_construct(opp2, ctx, inst, + &opp_regs[inst], &opp_shift, &opp_mask); + return &opp2->base; +} + + +static struct timing_generator *dcn32_timing_generator_create( + struct dc_context *ctx, + uint32_t instance) +{ + struct optc *tgn10 = + kzalloc(sizeof(struct optc), GFP_KERNEL); + + if (!tgn10) + return NULL; + +#undef REG_STRUCT +#define REG_STRUCT optc_regs + optc_regs_init(0), + optc_regs_init(1), + optc_regs_init(2), + optc_regs_init(3); + + tgn10->base.inst = instance; + tgn10->base.ctx = ctx; + + tgn10->tg_regs = &optc_regs[instance]; + tgn10->tg_shift = &optc_shift; + tgn10->tg_mask = &optc_mask; + + dcn32_timing_generator_init(tgn10); + + return &tgn10->base; +} + +static const struct encoder_feature_support link_enc_feature = { + .max_hdmi_deep_color = COLOR_DEPTH_121212, + .max_hdmi_pixel_clock = 600000, + .hdmi_ycbcr420_supported = true, + .dp_ycbcr420_supported = true, + .fec_supported = true, + .flags.bits.IS_HBR2_CAPABLE = true, + .flags.bits.IS_HBR3_CAPABLE = true, + .flags.bits.IS_TPS3_CAPABLE = true, + .flags.bits.IS_TPS4_CAPABLE = true +}; + +static struct link_encoder *dcn32_link_encoder_create( + struct dc_context *ctx, + const struct encoder_init_data *enc_init_data) +{ + struct dcn20_link_encoder *enc20 = + kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL); + + if (!enc20) + return NULL; + +#undef REG_STRUCT +#define REG_STRUCT link_enc_aux_regs + aux_regs_init(0), + aux_regs_init(1), + aux_regs_init(2), + aux_regs_init(3), + aux_regs_init(4); + +#undef REG_STRUCT +#define REG_STRUCT link_enc_hpd_regs + hpd_regs_init(0), + hpd_regs_init(1), + hpd_regs_init(2), + hpd_regs_init(3), + hpd_regs_init(4); + +#undef REG_STRUCT +#define REG_STRUCT link_enc_regs + link_regs_init(0, A), + link_regs_init(1, B), + link_regs_init(2, C), + link_regs_init(3, D), + link_regs_init(4, E); + + dcn32_link_encoder_construct(enc20, + enc_init_data, + &link_enc_feature, + &link_enc_regs[enc_init_data->transmitter], + &link_enc_aux_regs[enc_init_data->channel - 1], + &link_enc_hpd_regs[enc_init_data->hpd_source], + &le_shift, + &le_mask); + + return &enc20->enc10.base; +} + +struct panel_cntl *dcn32_panel_cntl_create(const struct panel_cntl_init_data *init_data) +{ + struct dcn31_panel_cntl *panel_cntl = + kzalloc(sizeof(struct dcn31_panel_cntl), GFP_KERNEL); + + if (!panel_cntl) + return NULL; + + dcn31_panel_cntl_construct(panel_cntl, init_data); + + return &panel_cntl->base; +} + +static void read_dce_straps( + struct dc_context *ctx, + struct resource_straps *straps) +{ + generic_reg_get(ctx, ctx->dcn_reg_offsets[regDC_PINSTRAPS_BASE_IDX] + regDC_PINSTRAPS, + FN(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO), &straps->dc_pinstraps_audio); + +} + +static struct audio *dcn32_create_audio( + struct dc_context *ctx, unsigned int inst) +{ + +#undef REG_STRUCT +#define REG_STRUCT audio_regs + audio_regs_init(0), + audio_regs_init(1), + audio_regs_init(2), + audio_regs_init(3), + audio_regs_init(4); + + return dce_audio_create(ctx, inst, + &audio_regs[inst], &audio_shift, &audio_mask); +} + +static struct vpg *dcn32_vpg_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn30_vpg *vpg3 = kzalloc(sizeof(struct dcn30_vpg), GFP_KERNEL); + + if (!vpg3) + return NULL; + +#undef REG_STRUCT +#define REG_STRUCT vpg_regs + vpg_regs_init(0), + vpg_regs_init(1), + vpg_regs_init(2), + vpg_regs_init(3), + vpg_regs_init(4), + vpg_regs_init(5), + vpg_regs_init(6), + vpg_regs_init(7), + vpg_regs_init(8), + vpg_regs_init(9); + + vpg3_construct(vpg3, ctx, inst, + &vpg_regs[inst], + &vpg_shift, + &vpg_mask); + + return &vpg3->base; +} + +static struct afmt *dcn32_afmt_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn30_afmt *afmt3 = kzalloc(sizeof(struct dcn30_afmt), GFP_KERNEL); + + if (!afmt3) + return NULL; + +#undef REG_STRUCT +#define REG_STRUCT afmt_regs + afmt_regs_init(0), + afmt_regs_init(1), + afmt_regs_init(2), + afmt_regs_init(3), + afmt_regs_init(4), + afmt_regs_init(5); + + afmt3_construct(afmt3, ctx, inst, + &afmt_regs[inst], + &afmt_shift, + &afmt_mask); + + return &afmt3->base; +} + +static struct apg *dcn31_apg_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn31_apg *apg31 = kzalloc(sizeof(struct dcn31_apg), GFP_KERNEL); + + if (!apg31) + return NULL; + +#undef REG_STRUCT +#define REG_STRUCT apg_regs + apg_regs_init(0), + apg_regs_init(1), + apg_regs_init(2), + apg_regs_init(3); + + apg31_construct(apg31, ctx, inst, + &apg_regs[inst], + &apg_shift, + &apg_mask); + + return &apg31->base; +} + +static struct stream_encoder *dcn32_stream_encoder_create( + enum engine_id eng_id, + struct dc_context *ctx) +{ + struct dcn10_stream_encoder *enc1; + struct vpg *vpg; + struct afmt *afmt; + int vpg_inst; + int afmt_inst; + + /* Mapping of VPG, AFMT, DME register blocks to DIO block instance */ + if (eng_id <= ENGINE_ID_DIGF) { + vpg_inst = eng_id; + afmt_inst = eng_id; + } else + return NULL; + + enc1 = kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL); + vpg = dcn32_vpg_create(ctx, vpg_inst); + afmt = dcn32_afmt_create(ctx, afmt_inst); + + if (!enc1 || !vpg || !afmt) { + kfree(enc1); + kfree(vpg); + kfree(afmt); + return NULL; + } + +#undef REG_STRUCT +#define REG_STRUCT stream_enc_regs + stream_enc_regs_init(0), + stream_enc_regs_init(1), + stream_enc_regs_init(2), + stream_enc_regs_init(3), + stream_enc_regs_init(4); + + dcn32_dio_stream_encoder_construct(enc1, ctx, ctx->dc_bios, + eng_id, vpg, afmt, + &stream_enc_regs[eng_id], + &se_shift, &se_mask); + + return &enc1->base; +} + +static struct hpo_dp_stream_encoder *dcn32_hpo_dp_stream_encoder_create( + enum engine_id eng_id, + struct dc_context *ctx) +{ + struct dcn31_hpo_dp_stream_encoder *hpo_dp_enc31; + struct vpg *vpg; + struct apg *apg; + uint32_t hpo_dp_inst; + uint32_t vpg_inst; + uint32_t apg_inst; + + ASSERT((eng_id >= ENGINE_ID_HPO_DP_0) && (eng_id <= ENGINE_ID_HPO_DP_3)); + hpo_dp_inst = eng_id - ENGINE_ID_HPO_DP_0; + + /* Mapping of VPG register blocks to HPO DP block instance: + * VPG[6] -> HPO_DP[0] + * VPG[7] -> HPO_DP[1] + * VPG[8] -> HPO_DP[2] + * VPG[9] -> HPO_DP[3] + */ + vpg_inst = hpo_dp_inst + 6; + + /* Mapping of APG register blocks to HPO DP block instance: + * APG[0] -> HPO_DP[0] + * APG[1] -> HPO_DP[1] + * APG[2] -> HPO_DP[2] + * APG[3] -> HPO_DP[3] + */ + apg_inst = hpo_dp_inst; + + /* allocate HPO stream encoder and create VPG sub-block */ + hpo_dp_enc31 = kzalloc(sizeof(struct dcn31_hpo_dp_stream_encoder), GFP_KERNEL); + vpg = dcn32_vpg_create(ctx, vpg_inst); + apg = dcn31_apg_create(ctx, apg_inst); + + if (!hpo_dp_enc31 || !vpg || !apg) { + kfree(hpo_dp_enc31); + kfree(vpg); + kfree(apg); + return NULL; + } + +#undef REG_STRUCT +#define REG_STRUCT hpo_dp_stream_enc_regs + hpo_dp_stream_encoder_reg_init(0), + hpo_dp_stream_encoder_reg_init(1), + hpo_dp_stream_encoder_reg_init(2), + hpo_dp_stream_encoder_reg_init(3); + + dcn31_hpo_dp_stream_encoder_construct(hpo_dp_enc31, ctx, ctx->dc_bios, + hpo_dp_inst, eng_id, vpg, apg, + &hpo_dp_stream_enc_regs[hpo_dp_inst], + &hpo_dp_se_shift, &hpo_dp_se_mask); + + return &hpo_dp_enc31->base; +} + +static struct hpo_dp_link_encoder *dcn32_hpo_dp_link_encoder_create( + uint8_t inst, + struct dc_context *ctx) +{ + struct dcn31_hpo_dp_link_encoder *hpo_dp_enc31; + + /* allocate HPO link encoder */ + hpo_dp_enc31 = kzalloc(sizeof(struct dcn31_hpo_dp_link_encoder), GFP_KERNEL); + +#undef REG_STRUCT +#define REG_STRUCT hpo_dp_link_enc_regs + hpo_dp_link_encoder_reg_init(0), + hpo_dp_link_encoder_reg_init(1); + + hpo_dp_link_encoder32_construct(hpo_dp_enc31, ctx, inst, + &hpo_dp_link_enc_regs[inst], + &hpo_dp_le_shift, &hpo_dp_le_mask); + + return &hpo_dp_enc31->base; +} + +static struct dce_hwseq *dcn32_hwseq_create( + struct dc_context *ctx) +{ + struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL); + +#undef REG_STRUCT +#define REG_STRUCT hwseq_reg + hwseq_reg_init(); + + if (hws) { + hws->ctx = ctx; + hws->regs = &hwseq_reg; + hws->shifts = &hwseq_shift; + hws->masks = &hwseq_mask; + } + return hws; +} +static const struct resource_create_funcs res_create_funcs = { + .read_dce_straps = read_dce_straps, + .create_audio = dcn32_create_audio, + .create_stream_encoder = dcn32_stream_encoder_create, + .create_hpo_dp_stream_encoder = dcn32_hpo_dp_stream_encoder_create, + .create_hpo_dp_link_encoder = dcn32_hpo_dp_link_encoder_create, + .create_hwseq = dcn32_hwseq_create, +}; + +static void dcn32_resource_destruct(struct dcn32_resource_pool *pool) +{ + unsigned int i; + + for (i = 0; i < pool->base.stream_enc_count; i++) { + if (pool->base.stream_enc[i] != NULL) { + if (pool->base.stream_enc[i]->vpg != NULL) { + kfree(DCN30_VPG_FROM_VPG(pool->base.stream_enc[i]->vpg)); + pool->base.stream_enc[i]->vpg = NULL; + } + if (pool->base.stream_enc[i]->afmt != NULL) { + kfree(DCN30_AFMT_FROM_AFMT(pool->base.stream_enc[i]->afmt)); + pool->base.stream_enc[i]->afmt = NULL; + } + kfree(DCN10STRENC_FROM_STRENC(pool->base.stream_enc[i])); + pool->base.stream_enc[i] = NULL; + } + } + + for (i = 0; i < pool->base.hpo_dp_stream_enc_count; i++) { + if (pool->base.hpo_dp_stream_enc[i] != NULL) { + if (pool->base.hpo_dp_stream_enc[i]->vpg != NULL) { + kfree(DCN30_VPG_FROM_VPG(pool->base.hpo_dp_stream_enc[i]->vpg)); + pool->base.hpo_dp_stream_enc[i]->vpg = NULL; + } + if (pool->base.hpo_dp_stream_enc[i]->apg != NULL) { + kfree(DCN31_APG_FROM_APG(pool->base.hpo_dp_stream_enc[i]->apg)); + pool->base.hpo_dp_stream_enc[i]->apg = NULL; + } + kfree(DCN3_1_HPO_DP_STREAM_ENC_FROM_HPO_STREAM_ENC(pool->base.hpo_dp_stream_enc[i])); + pool->base.hpo_dp_stream_enc[i] = NULL; + } + } + + for (i = 0; i < pool->base.hpo_dp_link_enc_count; i++) { + if (pool->base.hpo_dp_link_enc[i] != NULL) { + kfree(DCN3_1_HPO_DP_LINK_ENC_FROM_HPO_LINK_ENC(pool->base.hpo_dp_link_enc[i])); + pool->base.hpo_dp_link_enc[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_dsc; i++) { + if (pool->base.dscs[i] != NULL) + dcn20_dsc_destroy(&pool->base.dscs[i]); + } + + if (pool->base.mpc != NULL) { + kfree(TO_DCN20_MPC(pool->base.mpc)); + pool->base.mpc = NULL; + } + if (pool->base.hubbub != NULL) { + kfree(TO_DCN20_HUBBUB(pool->base.hubbub)); + pool->base.hubbub = NULL; + } + for (i = 0; i < pool->base.pipe_count; i++) { + if (pool->base.dpps[i] != NULL) + dcn32_dpp_destroy(&pool->base.dpps[i]); + + if (pool->base.ipps[i] != NULL) + pool->base.ipps[i]->funcs->ipp_destroy(&pool->base.ipps[i]); + + if (pool->base.hubps[i] != NULL) { + kfree(TO_DCN20_HUBP(pool->base.hubps[i])); + pool->base.hubps[i] = NULL; + } + + if (pool->base.irqs != NULL) { + dal_irq_service_destroy(&pool->base.irqs); + } + } + + for (i = 0; i < pool->base.res_cap->num_ddc; i++) { + if (pool->base.engines[i] != NULL) + dce110_engine_destroy(&pool->base.engines[i]); + if (pool->base.hw_i2cs[i] != NULL) { + kfree(pool->base.hw_i2cs[i]); + pool->base.hw_i2cs[i] = NULL; + } + if (pool->base.sw_i2cs[i] != NULL) { + kfree(pool->base.sw_i2cs[i]); + pool->base.sw_i2cs[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_opp; i++) { + if (pool->base.opps[i] != NULL) + pool->base.opps[i]->funcs->opp_destroy(&pool->base.opps[i]); + } + + for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { + if (pool->base.timing_generators[i] != NULL) { + kfree(DCN10TG_FROM_TG(pool->base.timing_generators[i])); + pool->base.timing_generators[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_dwb; i++) { + if (pool->base.dwbc[i] != NULL) { + kfree(TO_DCN30_DWBC(pool->base.dwbc[i])); + pool->base.dwbc[i] = NULL; + } + if (pool->base.mcif_wb[i] != NULL) { + kfree(TO_DCN30_MMHUBBUB(pool->base.mcif_wb[i])); + pool->base.mcif_wb[i] = NULL; + } + } + + for (i = 0; i < pool->base.audio_count; i++) { + if (pool->base.audios[i]) + dce_aud_destroy(&pool->base.audios[i]); + } + + for (i = 0; i < pool->base.clk_src_count; i++) { + if (pool->base.clock_sources[i] != NULL) { + dcn20_clock_source_destroy(&pool->base.clock_sources[i]); + pool->base.clock_sources[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_mpc_3dlut; i++) { + if (pool->base.mpc_lut[i] != NULL) { + dc_3dlut_func_release(pool->base.mpc_lut[i]); + pool->base.mpc_lut[i] = NULL; + } + if (pool->base.mpc_shaper[i] != NULL) { + dc_transfer_func_release(pool->base.mpc_shaper[i]); + pool->base.mpc_shaper[i] = NULL; + } + } + + if (pool->base.dp_clock_source != NULL) { + dcn20_clock_source_destroy(&pool->base.dp_clock_source); + pool->base.dp_clock_source = NULL; + } + + for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { + if (pool->base.multiple_abms[i] != NULL) + dce_abm_destroy(&pool->base.multiple_abms[i]); + } + + if (pool->base.psr != NULL) + dmub_psr_destroy(&pool->base.psr); + + if (pool->base.dccg != NULL) + dcn_dccg_destroy(&pool->base.dccg); + + if (pool->base.oem_device != NULL) { + struct dc *dc = pool->base.oem_device->ctx->dc; + + dc->link_srv->destroy_ddc_service(&pool->base.oem_device); + } +} + + +static bool dcn32_dwbc_create(struct dc_context *ctx, struct resource_pool *pool) +{ + int i; + uint32_t dwb_count = pool->res_cap->num_dwb; + + for (i = 0; i < dwb_count; i++) { + struct dcn30_dwbc *dwbc30 = kzalloc(sizeof(struct dcn30_dwbc), + GFP_KERNEL); + + if (!dwbc30) { + dm_error("DC: failed to create dwbc30!\n"); + return false; + } + +#undef REG_STRUCT +#define REG_STRUCT dwbc30_regs + dwbc_regs_dcn3_init(0); + + dcn30_dwbc_construct(dwbc30, ctx, + &dwbc30_regs[i], + &dwbc30_shift, + &dwbc30_mask, + i); + + pool->dwbc[i] = &dwbc30->base; + } + return true; +} + +static bool dcn32_mmhubbub_create(struct dc_context *ctx, struct resource_pool *pool) +{ + int i; + uint32_t dwb_count = pool->res_cap->num_dwb; + + for (i = 0; i < dwb_count; i++) { + struct dcn30_mmhubbub *mcif_wb30 = kzalloc(sizeof(struct dcn30_mmhubbub), + GFP_KERNEL); + + if (!mcif_wb30) { + dm_error("DC: failed to create mcif_wb30!\n"); + return false; + } + +#undef REG_STRUCT +#define REG_STRUCT mcif_wb30_regs + mcif_wb_regs_dcn3_init(0); + + dcn32_mmhubbub_construct(mcif_wb30, ctx, + &mcif_wb30_regs[i], + &mcif_wb30_shift, + &mcif_wb30_mask, + i); + + pool->mcif_wb[i] = &mcif_wb30->base; + } + return true; +} + +static struct display_stream_compressor *dcn32_dsc_create( + struct dc_context *ctx, uint32_t inst) +{ + struct dcn20_dsc *dsc = + kzalloc(sizeof(struct dcn20_dsc), GFP_KERNEL); + + if (!dsc) { + BREAK_TO_DEBUGGER(); + return NULL; + } + +#undef REG_STRUCT +#define REG_STRUCT dsc_regs + dsc_regsDCN20_init(0), + dsc_regsDCN20_init(1), + dsc_regsDCN20_init(2), + dsc_regsDCN20_init(3); + + dsc2_construct(dsc, ctx, inst, &dsc_regs[inst], &dsc_shift, &dsc_mask); + + dsc->max_image_width = 6016; + + return &dsc->base; +} + +static void dcn32_destroy_resource_pool(struct resource_pool **pool) +{ + struct dcn32_resource_pool *dcn32_pool = TO_DCN32_RES_POOL(*pool); + + dcn32_resource_destruct(dcn32_pool); + kfree(dcn32_pool); + *pool = NULL; +} + +bool dcn32_acquire_post_bldn_3dlut( + struct resource_context *res_ctx, + const struct resource_pool *pool, + int mpcc_id, + struct dc_3dlut **lut, + struct dc_transfer_func **shaper) +{ + bool ret = false; + + ASSERT(*lut == NULL && *shaper == NULL); + *lut = NULL; + *shaper = NULL; + + if (!res_ctx->is_mpc_3dlut_acquired[mpcc_id]) { + *lut = pool->mpc_lut[mpcc_id]; + *shaper = pool->mpc_shaper[mpcc_id]; + res_ctx->is_mpc_3dlut_acquired[mpcc_id] = true; + ret = true; + } + return ret; +} + +bool dcn32_release_post_bldn_3dlut( + struct resource_context *res_ctx, + const struct resource_pool *pool, + struct dc_3dlut **lut, + struct dc_transfer_func **shaper) +{ + int i; + bool ret = false; + + for (i = 0; i < pool->res_cap->num_mpc_3dlut; i++) { + if (pool->mpc_lut[i] == *lut && pool->mpc_shaper[i] == *shaper) { + res_ctx->is_mpc_3dlut_acquired[i] = false; + pool->mpc_lut[i]->state.raw = 0; + *lut = NULL; + *shaper = NULL; + ret = true; + break; + } + } + return ret; +} + +static void dcn32_enable_phantom_plane(struct dc *dc, + struct dc_state *context, + struct dc_stream_state *phantom_stream, + unsigned int dc_pipe_idx) +{ + struct dc_plane_state *phantom_plane = NULL; + struct dc_plane_state *prev_phantom_plane = NULL; + struct pipe_ctx *curr_pipe = &context->res_ctx.pipe_ctx[dc_pipe_idx]; + + while (curr_pipe) { + if (curr_pipe->top_pipe && curr_pipe->top_pipe->plane_state == curr_pipe->plane_state) + phantom_plane = prev_phantom_plane; + else + phantom_plane = dc_create_plane_state(dc); + + memcpy(&phantom_plane->address, &curr_pipe->plane_state->address, sizeof(phantom_plane->address)); + memcpy(&phantom_plane->scaling_quality, &curr_pipe->plane_state->scaling_quality, + sizeof(phantom_plane->scaling_quality)); + memcpy(&phantom_plane->src_rect, &curr_pipe->plane_state->src_rect, sizeof(phantom_plane->src_rect)); + memcpy(&phantom_plane->dst_rect, &curr_pipe->plane_state->dst_rect, sizeof(phantom_plane->dst_rect)); + memcpy(&phantom_plane->clip_rect, &curr_pipe->plane_state->clip_rect, sizeof(phantom_plane->clip_rect)); + memcpy(&phantom_plane->plane_size, &curr_pipe->plane_state->plane_size, + sizeof(phantom_plane->plane_size)); + memcpy(&phantom_plane->tiling_info, &curr_pipe->plane_state->tiling_info, + sizeof(phantom_plane->tiling_info)); + memcpy(&phantom_plane->dcc, &curr_pipe->plane_state->dcc, sizeof(phantom_plane->dcc)); + phantom_plane->format = curr_pipe->plane_state->format; + phantom_plane->rotation = curr_pipe->plane_state->rotation; + phantom_plane->visible = curr_pipe->plane_state->visible; + + /* Shadow pipe has small viewport. */ + phantom_plane->clip_rect.y = 0; + phantom_plane->clip_rect.height = phantom_stream->src.height; + + phantom_plane->is_phantom = true; + + dc_add_plane_to_context(dc, phantom_stream, phantom_plane, context); + + curr_pipe = curr_pipe->bottom_pipe; + prev_phantom_plane = phantom_plane; + } +} + +static struct dc_stream_state *dcn32_enable_phantom_stream(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + unsigned int pipe_cnt, + unsigned int dc_pipe_idx) +{ + struct dc_stream_state *phantom_stream = NULL; + struct pipe_ctx *ref_pipe = &context->res_ctx.pipe_ctx[dc_pipe_idx]; + + phantom_stream = dc_create_stream_for_sink(ref_pipe->stream->sink); + phantom_stream->signal = SIGNAL_TYPE_VIRTUAL; + phantom_stream->dpms_off = true; + phantom_stream->mall_stream_config.type = SUBVP_PHANTOM; + phantom_stream->mall_stream_config.paired_stream = ref_pipe->stream; + ref_pipe->stream->mall_stream_config.type = SUBVP_MAIN; + ref_pipe->stream->mall_stream_config.paired_stream = phantom_stream; + + /* stream has limited viewport and small timing */ + memcpy(&phantom_stream->timing, &ref_pipe->stream->timing, sizeof(phantom_stream->timing)); + memcpy(&phantom_stream->src, &ref_pipe->stream->src, sizeof(phantom_stream->src)); + memcpy(&phantom_stream->dst, &ref_pipe->stream->dst, sizeof(phantom_stream->dst)); + DC_FP_START(); + dcn32_set_phantom_stream_timing(dc, context, ref_pipe, phantom_stream, pipes, pipe_cnt, dc_pipe_idx); + DC_FP_END(); + + dc_add_stream_to_ctx(dc, context, phantom_stream); + return phantom_stream; +} + +void dcn32_retain_phantom_pipes(struct dc *dc, struct dc_state *context) +{ + int i; + struct dc_plane_state *phantom_plane = NULL; + struct dc_stream_state *phantom_stream = NULL; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (resource_is_pipe_type(pipe, OTG_MASTER) && + resource_is_pipe_type(pipe, DPP_PIPE) && + pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { + phantom_plane = pipe->plane_state; + phantom_stream = pipe->stream; + + dc_plane_state_retain(phantom_plane); + dc_stream_retain(phantom_stream); + } + } +} + +// return true if removed piped from ctx, false otherwise +bool dcn32_remove_phantom_pipes(struct dc *dc, struct dc_state *context, bool fast_update) +{ + int i; + bool removed_pipe = false; + struct dc_plane_state *phantom_plane = NULL; + struct dc_stream_state *phantom_stream = NULL; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + // build scaling params for phantom pipes + if (pipe->plane_state && pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { + phantom_plane = pipe->plane_state; + phantom_stream = pipe->stream; + + dc_rem_all_planes_for_stream(dc, pipe->stream, context); + dc_remove_stream_from_ctx(dc, context, pipe->stream); + + /* Ref count is incremented on allocation and also when added to the context. + * Therefore we must call release for the the phantom plane and stream once + * they are removed from the ctx to finally decrement the refcount to 0 to free. + */ + dc_plane_state_release(phantom_plane); + dc_stream_release(phantom_stream); + + removed_pipe = true; + } + + /* For non-full updates, a shallow copy of the current state + * is created. In this case we don't want to erase the current + * state (there can be 2 HIRQL threads, one in flip, and one in + * checkMPO) that can cause a race condition. + * + * This is just a workaround, needs a proper fix. + */ + if (!fast_update) { + // Clear all phantom stream info + if (pipe->stream) { + pipe->stream->mall_stream_config.type = SUBVP_NONE; + pipe->stream->mall_stream_config.paired_stream = NULL; + } + + if (pipe->plane_state) { + pipe->plane_state->is_phantom = false; + } + } + } + return removed_pipe; +} + +/* TODO: Input to this function should indicate which pipe indexes (or streams) + * require a phantom pipe / stream + */ +void dcn32_add_phantom_pipes(struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + unsigned int pipe_cnt, + unsigned int index) +{ + struct dc_stream_state *phantom_stream = NULL; + unsigned int i; + + // The index of the DC pipe passed into this function is guarenteed to + // be a valid candidate for SubVP (i.e. has a plane, stream, doesn't + // already have phantom pipe assigned, etc.) by previous checks. + phantom_stream = dcn32_enable_phantom_stream(dc, context, pipes, pipe_cnt, index); + dcn32_enable_phantom_plane(dc, context, phantom_stream, index); + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + // Build scaling params for phantom pipes which were newly added. + // We determine which phantom pipes were added by comparing with + // the phantom stream. + if (pipe->plane_state && pipe->stream && pipe->stream == phantom_stream && + pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { + pipe->stream->use_dynamic_meta = false; + pipe->plane_state->flip_immediate = false; + if (!resource_build_scaling_params(pipe)) { + // Log / remove phantom pipes since failed to build scaling params + } + } + } +} + +static bool dml1_validate(struct dc *dc, struct dc_state *context, bool fast_validate) +{ + bool out = false; + + BW_VAL_TRACE_SETUP(); + + int vlevel = 0; + int pipe_cnt = 0; + display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_KERNEL); + struct mall_temp_config mall_temp_config; + + /* To handle Freesync properly, setting FreeSync DML parameters + * to its default state for the first stage of validation + */ + context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = false; + context->bw_ctx.dml.soc.dram_clock_change_requirement_final = true; + + DC_LOGGER_INIT(dc->ctx->logger); + + /* For fast validation, there are situations where a shallow copy of + * of the dc->current_state is created for the validation. In this case + * we want to save and restore the mall config because we always + * teardown subvp at the beginning of validation (and don't attempt + * to add it back if it's fast validation). If we don't restore the + * subvp config in cases of fast validation + shallow copy of the + * dc->current_state, the dc->current_state will have a partially + * removed subvp state when we did not intend to remove it. + */ + if (fast_validate) { + memset(&mall_temp_config, 0, sizeof(mall_temp_config)); + dcn32_save_mall_state(dc, context, &mall_temp_config); + } + + BW_VAL_TRACE_COUNT(); + + DC_FP_START(); + out = dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate); + DC_FP_END(); + + if (fast_validate) + dcn32_restore_mall_state(dc, context, &mall_temp_config); + + if (pipe_cnt == 0) + goto validate_out; + + if (!out) + goto validate_fail; + + BW_VAL_TRACE_END_VOLTAGE_LEVEL(); + + if (fast_validate) { + BW_VAL_TRACE_SKIP(fast); + goto validate_out; + } + + dc->res_pool->funcs->calculate_wm_and_dlg(dc, context, pipes, pipe_cnt, vlevel); + + dcn32_override_min_req_memclk(dc, context); + + BW_VAL_TRACE_END_WATERMARKS(); + + goto validate_out; + +validate_fail: + DC_LOG_WARNING("Mode Validation Warning: %s failed validation.\n", + dml_get_status_message(context->bw_ctx.dml.vba.ValidationStatus[context->bw_ctx.dml.vba.soc.num_states])); + + BW_VAL_TRACE_SKIP(fail); + out = false; + +validate_out: + kfree(pipes); + + BW_VAL_TRACE_FINISH(); + + return out; +} + +bool dcn32_validate_bandwidth(struct dc *dc, + struct dc_state *context, + bool fast_validate) +{ + bool out = false; + + if (dc->debug.using_dml2) + out = dml2_validate(dc, context, fast_validate); + else + out = dml1_validate(dc, context, fast_validate); + return out; +} + +int dcn32_populate_dml_pipes_from_context( + struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + bool fast_validate) +{ + int i, pipe_cnt; + struct resource_context *res_ctx = &context->res_ctx; + struct pipe_ctx *pipe = NULL; + bool subvp_in_use = false; + struct dc_crtc_timing *timing; + + dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); + + for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { + + if (!res_ctx->pipe_ctx[i].stream) + continue; + pipe = &res_ctx->pipe_ctx[i]; + timing = &pipe->stream->timing; + + pipes[pipe_cnt].pipe.src.gpuvm = true; + DC_FP_START(); + dcn32_zero_pipe_dcc_fraction(pipes, pipe_cnt); + DC_FP_END(); + pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch; + pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_dal; + pipes[pipe_cnt].pipe.src.gpuvm_min_page_size_kbytes = 256; // according to spreadsheet + pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; + pipes[pipe_cnt].pipe.scale_ratio_depth.lb_depth = dm_lb_19; + + /* Only populate DML input with subvp info for full updates. + * This is just a workaround -- needs a proper fix. + */ + if (!fast_validate) { + switch (pipe->stream->mall_stream_config.type) { + case SUBVP_MAIN: + pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_sub_viewport; + subvp_in_use = true; + break; + case SUBVP_PHANTOM: + pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_phantom_pipe; + pipes[pipe_cnt].pipe.src.use_mall_for_static_screen = dm_use_mall_static_screen_disable; + // Disallow unbounded req for SubVP according to DCHUB programming guide + pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; + break; + case SUBVP_NONE: + pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_disable; + pipes[pipe_cnt].pipe.src.use_mall_for_static_screen = dm_use_mall_static_screen_disable; + break; + default: + break; + } + } + + pipes[pipe_cnt].dout.dsc_input_bpc = 0; + if (pipes[pipe_cnt].dout.dsc_enable) { + switch (timing->display_color_depth) { + case COLOR_DEPTH_888: + pipes[pipe_cnt].dout.dsc_input_bpc = 8; + break; + case COLOR_DEPTH_101010: + pipes[pipe_cnt].dout.dsc_input_bpc = 10; + break; + case COLOR_DEPTH_121212: + pipes[pipe_cnt].dout.dsc_input_bpc = 12; + break; + default: + ASSERT(0); + break; + } + } + + + pipe_cnt++; + } + + /* For DET allocation, we don't want to use DML policy (not optimal for utilizing all + * the DET available for each pipe). Use the DET override input to maintain our driver + * policy. + */ + dcn32_set_det_allocations(dc, context, pipes); + + // In general cases we want to keep the dram clock change requirement + // (prefer configs that support MCLK switch). Only override to false + // for SubVP + if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching || subvp_in_use) + context->bw_ctx.dml.soc.dram_clock_change_requirement_final = false; + else + context->bw_ctx.dml.soc.dram_clock_change_requirement_final = true; + + return pipe_cnt; +} + +static struct dc_cap_funcs cap_funcs = { + .get_dcc_compression_cap = dcn20_get_dcc_compression_cap, + .get_subvp_en = dcn32_subvp_in_use, +}; + +void dcn32_calculate_wm_and_dlg(struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel) +{ + DC_FP_START(); + dcn32_calculate_wm_and_dlg_fpu(dc, context, pipes, pipe_cnt, vlevel); + DC_FP_END(); +} + +static void dcn32_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) +{ + DC_FP_START(); + dcn32_update_bw_bounding_box_fpu(dc, bw_params); + DC_FP_END(); +} + +static struct resource_funcs dcn32_res_pool_funcs = { + .destroy = dcn32_destroy_resource_pool, + .link_enc_create = dcn32_link_encoder_create, + .link_enc_create_minimal = NULL, + .panel_cntl_create = dcn32_panel_cntl_create, + .validate_bandwidth = dcn32_validate_bandwidth, + .calculate_wm_and_dlg = dcn32_calculate_wm_and_dlg, + .populate_dml_pipes = dcn32_populate_dml_pipes_from_context, + .acquire_free_pipe_as_secondary_dpp_pipe = dcn32_acquire_free_pipe_as_secondary_dpp_pipe, + .acquire_free_pipe_as_secondary_opp_head = dcn32_acquire_free_pipe_as_secondary_opp_head, + .release_pipe = dcn20_release_pipe, + .add_stream_to_ctx = dcn30_add_stream_to_ctx, + .add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource, + .remove_stream_from_ctx = dcn20_remove_stream_from_ctx, + .populate_dml_writeback_from_context = dcn30_populate_dml_writeback_from_context, + .set_mcif_arb_params = dcn30_set_mcif_arb_params, + .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link, + .acquire_post_bldn_3dlut = dcn32_acquire_post_bldn_3dlut, + .release_post_bldn_3dlut = dcn32_release_post_bldn_3dlut, + .update_bw_bounding_box = dcn32_update_bw_bounding_box, + .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, + .update_soc_for_wm_a = dcn30_update_soc_for_wm_a, + .add_phantom_pipes = dcn32_add_phantom_pipes, + .remove_phantom_pipes = dcn32_remove_phantom_pipes, + .retain_phantom_pipes = dcn32_retain_phantom_pipes, + .save_mall_state = dcn32_save_mall_state, + .restore_mall_state = dcn32_restore_mall_state, +}; + +static uint32_t read_pipe_fuses(struct dc_context *ctx) +{ + uint32_t value = REG_READ(CC_DC_PIPE_DIS); + /* DCN32 support max 4 pipes */ + value = value & 0xf; + return value; +} + + +static bool dcn32_resource_construct( + uint8_t num_virtual_links, + struct dc *dc, + struct dcn32_resource_pool *pool) +{ + int i, j; + struct dc_context *ctx = dc->ctx; + struct irq_service_init_data init_data; + struct ddc_service_init_data ddc_init_data = {0}; + uint32_t pipe_fuses = 0; + uint32_t num_pipes = 4; + +#undef REG_STRUCT +#define REG_STRUCT bios_regs + bios_regs_init(); + +#undef REG_STRUCT +#define REG_STRUCT clk_src_regs + clk_src_regs_init(0, A), + clk_src_regs_init(1, B), + clk_src_regs_init(2, C), + clk_src_regs_init(3, D), + clk_src_regs_init(4, E); + +#undef REG_STRUCT +#define REG_STRUCT abm_regs + abm_regs_init(0), + abm_regs_init(1), + abm_regs_init(2), + abm_regs_init(3); + +#undef REG_STRUCT +#define REG_STRUCT dccg_regs + dccg_regs_init(); + + DC_FP_START(); + + ctx->dc_bios->regs = &bios_regs; + + pool->base.res_cap = &res_cap_dcn32; + /* max number of pipes for ASIC before checking for pipe fuses */ + num_pipes = pool->base.res_cap->num_timing_generator; + pipe_fuses = read_pipe_fuses(ctx); + + for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) + if (pipe_fuses & 1 << i) + num_pipes--; + + if (pipe_fuses & 1) + ASSERT(0); //Unexpected - Pipe 0 should always be fully functional! + + if (pipe_fuses & CC_DC_PIPE_DIS__DC_FULL_DIS_MASK) + ASSERT(0); //Entire DCN is harvested! + + /* within dml lib, initial value is hard coded, if ASIC pipe is fused, the + * value will be changed, update max_num_dpp and max_num_otg for dml. + */ + dcn3_2_ip.max_num_dpp = num_pipes; + dcn3_2_ip.max_num_otg = num_pipes; + + pool->base.funcs = &dcn32_res_pool_funcs; + + /************************************************* + * Resource + asic cap harcoding * + *************************************************/ + pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE; + pool->base.timing_generator_count = num_pipes; + pool->base.pipe_count = num_pipes; + pool->base.mpcc_count = num_pipes; + dc->caps.max_downscale_ratio = 600; + dc->caps.i2c_speed_in_khz = 100; + dc->caps.i2c_speed_in_khz_hdcp = 100; /*1.4 w/a applied by default*/ + /* TODO: Bring max_cursor_size back to 256 after subvp cursor corruption is fixed*/ + dc->caps.max_cursor_size = 64; + dc->caps.min_horizontal_blanking_period = 80; + dc->caps.dmdata_alloc_size = 2048; + dc->caps.mall_size_per_mem_channel = 4; + dc->caps.mall_size_total = 0; + dc->caps.cursor_cache_size = dc->caps.max_cursor_size * dc->caps.max_cursor_size * 8; + + dc->caps.cache_line_size = 64; + dc->caps.cache_num_ways = 16; + + /* Calculate the available MALL space */ + dc->caps.max_cab_allocation_bytes = dcn32_calc_num_avail_chans_for_mall( + dc, dc->ctx->dc_bios->vram_info.num_chans) * + dc->caps.mall_size_per_mem_channel * 1024 * 1024; + dc->caps.mall_size_total = dc->caps.max_cab_allocation_bytes; + + dc->caps.subvp_fw_processing_delay_us = 15; + dc->caps.subvp_drr_max_vblank_margin_us = 40; + dc->caps.subvp_prefetch_end_to_mall_start_us = 15; + dc->caps.subvp_swath_height_margin_lines = 16; + dc->caps.subvp_pstate_allow_width_us = 20; + dc->caps.subvp_vertical_int_margin_us = 30; + dc->caps.subvp_drr_vblank_start_margin_us = 100; // 100us margin + + dc->caps.max_slave_planes = 2; + dc->caps.max_slave_yuv_planes = 2; + dc->caps.max_slave_rgb_planes = 2; + dc->caps.post_blend_color_processing = true; + dc->caps.force_dp_tps4_for_cp2520 = true; + if (dc->config.forceHBR2CP2520) + dc->caps.force_dp_tps4_for_cp2520 = false; + dc->caps.dp_hpo = true; + dc->caps.dp_hdmi21_pcon_support = true; + dc->caps.edp_dsc_support = true; + dc->caps.extended_aux_timeout_support = true; + dc->caps.dmcub_support = true; + dc->caps.seamless_odm = true; + dc->caps.max_v_total = (1 << 15) - 1; + + /* Color pipeline capabilities */ + dc->caps.color.dpp.dcn_arch = 1; + dc->caps.color.dpp.input_lut_shared = 0; + dc->caps.color.dpp.icsc = 1; + dc->caps.color.dpp.dgam_ram = 0; // must use gamma_corr + dc->caps.color.dpp.dgam_rom_caps.srgb = 1; + dc->caps.color.dpp.dgam_rom_caps.bt2020 = 1; + dc->caps.color.dpp.dgam_rom_caps.gamma2_2 = 1; + dc->caps.color.dpp.dgam_rom_caps.pq = 1; + dc->caps.color.dpp.dgam_rom_caps.hlg = 1; + dc->caps.color.dpp.post_csc = 1; + dc->caps.color.dpp.gamma_corr = 1; + dc->caps.color.dpp.dgam_rom_for_yuv = 0; + + dc->caps.color.dpp.hw_3d_lut = 1; + dc->caps.color.dpp.ogam_ram = 0; // no OGAM in DPP since DCN1 + // no OGAM ROM on DCN2 and later ASICs + dc->caps.color.dpp.ogam_rom_caps.srgb = 0; + dc->caps.color.dpp.ogam_rom_caps.bt2020 = 0; + dc->caps.color.dpp.ogam_rom_caps.gamma2_2 = 0; + dc->caps.color.dpp.ogam_rom_caps.pq = 0; + dc->caps.color.dpp.ogam_rom_caps.hlg = 0; + dc->caps.color.dpp.ocsc = 0; + + dc->caps.color.mpc.gamut_remap = 1; + dc->caps.color.mpc.num_3dluts = pool->base.res_cap->num_mpc_3dlut; //4, configurable to be before or after BLND in MPCC + dc->caps.color.mpc.ogam_ram = 1; + dc->caps.color.mpc.ogam_rom_caps.srgb = 0; + dc->caps.color.mpc.ogam_rom_caps.bt2020 = 0; + dc->caps.color.mpc.ogam_rom_caps.gamma2_2 = 0; + dc->caps.color.mpc.ogam_rom_caps.pq = 0; + dc->caps.color.mpc.ogam_rom_caps.hlg = 0; + dc->caps.color.mpc.ocsc = 1; + + /* Use pipe context based otg sync logic */ + dc->config.use_pipe_ctx_sync_logic = true; + + dc->config.dc_mode_clk_limit_support = true; + /* read VBIOS LTTPR caps */ + { + if (ctx->dc_bios->funcs->get_lttpr_caps) { + enum bp_result bp_query_result; + uint8_t is_vbios_lttpr_enable = 0; + + bp_query_result = ctx->dc_bios->funcs->get_lttpr_caps(ctx->dc_bios, &is_vbios_lttpr_enable); + dc->caps.vbios_lttpr_enable = (bp_query_result == BP_RESULT_OK) && !!is_vbios_lttpr_enable; + } + + /* interop bit is implicit */ + { + dc->caps.vbios_lttpr_aware = true; + } + } + + if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) + dc->debug = debug_defaults_drv; + + // Init the vm_helper + if (dc->vm_helper) + vm_helper_init(dc->vm_helper, 16); + + /************************************************* + * Create resources * + *************************************************/ + + /* Clock Sources for Pixel Clock*/ + pool->base.clock_sources[DCN32_CLK_SRC_PLL0] = + dcn32_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL0, + &clk_src_regs[0], false); + pool->base.clock_sources[DCN32_CLK_SRC_PLL1] = + dcn32_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL1, + &clk_src_regs[1], false); + pool->base.clock_sources[DCN32_CLK_SRC_PLL2] = + dcn32_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL2, + &clk_src_regs[2], false); + pool->base.clock_sources[DCN32_CLK_SRC_PLL3] = + dcn32_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL3, + &clk_src_regs[3], false); + pool->base.clock_sources[DCN32_CLK_SRC_PLL4] = + dcn32_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL4, + &clk_src_regs[4], false); + + pool->base.clk_src_count = DCN32_CLK_SRC_TOTAL; + + /* todo: not reuse phy_pll registers */ + pool->base.dp_clock_source = + dcn32_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_ID_DP_DTO, + &clk_src_regs[0], true); + + for (i = 0; i < pool->base.clk_src_count; i++) { + if (pool->base.clock_sources[i] == NULL) { + dm_error("DC: failed to create clock sources!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + } + + /* DCCG */ + pool->base.dccg = dccg32_create(ctx, &dccg_regs, &dccg_shift, &dccg_mask); + if (pool->base.dccg == NULL) { + dm_error("DC: failed to create dccg!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + + /* DML */ + dml_init_instance(&dc->dml, &dcn3_2_soc, &dcn3_2_ip, DML_PROJECT_DCN32); + + /* IRQ Service */ + init_data.ctx = dc->ctx; + pool->base.irqs = dal_irq_service_dcn32_create(&init_data); + if (!pool->base.irqs) + goto create_fail; + + /* HUBBUB */ + pool->base.hubbub = dcn32_hubbub_create(ctx); + if (pool->base.hubbub == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create hubbub!\n"); + goto create_fail; + } + + /* HUBPs, DPPs, OPPs, TGs, ABMs */ + for (i = 0, j = 0; i < pool->base.res_cap->num_timing_generator; i++) { + + /* if pipe is disabled, skip instance of HW pipe, + * i.e, skip ASIC register instance + */ + if (pipe_fuses & 1 << i) + continue; + + /* HUBPs */ + pool->base.hubps[j] = dcn32_hubp_create(ctx, i); + if (pool->base.hubps[j] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create hubps!\n"); + goto create_fail; + } + + /* DPPs */ + pool->base.dpps[j] = dcn32_dpp_create(ctx, i); + if (pool->base.dpps[j] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create dpps!\n"); + goto create_fail; + } + + /* OPPs */ + pool->base.opps[j] = dcn32_opp_create(ctx, i); + if (pool->base.opps[j] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create output pixel processor!\n"); + goto create_fail; + } + + /* TGs */ + pool->base.timing_generators[j] = dcn32_timing_generator_create( + ctx, i); + if (pool->base.timing_generators[j] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create tg!\n"); + goto create_fail; + } + + /* ABMs */ + pool->base.multiple_abms[j] = dmub_abm_create(ctx, + &abm_regs[i], + &abm_shift, + &abm_mask); + if (pool->base.multiple_abms[j] == NULL) { + dm_error("DC: failed to create abm for pipe %d!\n", i); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + + /* index for resource pool arrays for next valid pipe */ + j++; + } + + /* PSR */ + pool->base.psr = dmub_psr_create(ctx); + if (pool->base.psr == NULL) { + dm_error("DC: failed to create psr obj!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + + /* MPCCs */ + pool->base.mpc = dcn32_mpc_create(ctx, pool->base.res_cap->num_timing_generator, pool->base.res_cap->num_mpc_3dlut); + if (pool->base.mpc == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create mpc!\n"); + goto create_fail; + } + + /* DSCs */ + for (i = 0; i < pool->base.res_cap->num_dsc; i++) { + pool->base.dscs[i] = dcn32_dsc_create(ctx, i); + if (pool->base.dscs[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create display stream compressor %d!\n", i); + goto create_fail; + } + } + + /* DWB */ + if (!dcn32_dwbc_create(ctx, &pool->base)) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create dwbc!\n"); + goto create_fail; + } + + /* MMHUBBUB */ + if (!dcn32_mmhubbub_create(ctx, &pool->base)) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create mcif_wb!\n"); + goto create_fail; + } + + /* AUX and I2C */ + for (i = 0; i < pool->base.res_cap->num_ddc; i++) { + pool->base.engines[i] = dcn32_aux_engine_create(ctx, i); + if (pool->base.engines[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC:failed to create aux engine!!\n"); + goto create_fail; + } + pool->base.hw_i2cs[i] = dcn32_i2c_hw_create(ctx, i); + if (pool->base.hw_i2cs[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC:failed to create hw i2c!!\n"); + goto create_fail; + } + pool->base.sw_i2cs[i] = NULL; + } + + /* Audio, HWSeq, Stream Encoders including HPO and virtual, MPC 3D LUTs */ + if (!resource_construct(num_virtual_links, dc, &pool->base, + &res_create_funcs)) + goto create_fail; + + /* HW Sequencer init functions and Plane caps */ + dcn32_hw_sequencer_init_functions(dc); + + dc->caps.max_planes = pool->base.pipe_count; + + for (i = 0; i < dc->caps.max_planes; ++i) + dc->caps.planes[i] = plane_cap; + + dc->cap_funcs = cap_funcs; + + if (dc->ctx->dc_bios->fw_info.oem_i2c_present) { + ddc_init_data.ctx = dc->ctx; + ddc_init_data.link = NULL; + ddc_init_data.id.id = dc->ctx->dc_bios->fw_info.oem_i2c_obj_id; + ddc_init_data.id.enum_id = 0; + ddc_init_data.id.type = OBJECT_TYPE_GENERIC; + pool->base.oem_device = dc->link_srv->create_ddc_service(&ddc_init_data); + } else { + pool->base.oem_device = NULL; + } + + dc->dml2_options.dcn_pipe_count = pool->base.pipe_count; + dc->dml2_options.use_native_pstate_optimization = false; + dc->dml2_options.use_native_soc_bb_construction = true; + dc->dml2_options.minimize_dispclk_using_odm = true; + + dc->dml2_options.callbacks.dc = dc; + dc->dml2_options.callbacks.build_scaling_params = &resource_build_scaling_params; + dc->dml2_options.callbacks.can_support_mclk_switch_using_fw_based_vblank_stretch = &dcn30_can_support_mclk_switch_using_fw_based_vblank_stretch; + dc->dml2_options.callbacks.acquire_secondary_pipe_for_mpc_odm = &dc_resource_acquire_secondary_pipe_for_mpc_odm_legacy; + dc->dml2_options.callbacks.update_pipes_for_stream_with_slice_count = &resource_update_pipes_for_stream_with_slice_count; + dc->dml2_options.callbacks.update_pipes_for_plane_with_slice_count = &resource_update_pipes_for_plane_with_slice_count; + dc->dml2_options.callbacks.get_mpc_slice_index = &resource_get_mpc_slice_index; + dc->dml2_options.callbacks.get_odm_slice_index = &resource_get_odm_slice_index; + dc->dml2_options.callbacks.get_opp_head = &resource_get_opp_head; + + dc->dml2_options.svp_pstate.callbacks.dc = dc; + dc->dml2_options.svp_pstate.callbacks.add_plane_to_context = &dc_add_plane_to_context; + dc->dml2_options.svp_pstate.callbacks.add_stream_to_ctx = &dc_add_stream_to_ctx; + dc->dml2_options.svp_pstate.callbacks.build_scaling_params = &resource_build_scaling_params; + dc->dml2_options.svp_pstate.callbacks.create_plane = &dc_create_plane_state; + dc->dml2_options.svp_pstate.callbacks.remove_plane_from_context = &dc_remove_plane_from_context; + dc->dml2_options.svp_pstate.callbacks.remove_stream_from_ctx = &dc_remove_stream_from_ctx; + dc->dml2_options.svp_pstate.callbacks.create_stream_for_sink = &dc_create_stream_for_sink; + dc->dml2_options.svp_pstate.callbacks.plane_state_release = &dc_plane_state_release; + dc->dml2_options.svp_pstate.callbacks.stream_release = &dc_stream_release; + dc->dml2_options.svp_pstate.callbacks.release_dsc = &dcn20_release_dsc; + + dc->dml2_options.svp_pstate.subvp_fw_processing_delay_us = dc->caps.subvp_fw_processing_delay_us; + dc->dml2_options.svp_pstate.subvp_prefetch_end_to_mall_start_us = dc->caps.subvp_prefetch_end_to_mall_start_us; + dc->dml2_options.svp_pstate.subvp_pstate_allow_width_us = dc->caps.subvp_pstate_allow_width_us; + dc->dml2_options.svp_pstate.subvp_swath_height_margin_lines = dc->caps.subvp_swath_height_margin_lines; + + dc->dml2_options.svp_pstate.force_disable_subvp = dc->debug.force_disable_subvp; + dc->dml2_options.svp_pstate.force_enable_subvp = dc->debug.force_subvp_mclk_switch; + + dc->dml2_options.mall_cfg.cache_line_size_bytes = dc->caps.cache_line_size; + dc->dml2_options.mall_cfg.cache_num_ways = dc->caps.cache_num_ways; + dc->dml2_options.mall_cfg.max_cab_allocation_bytes = dc->caps.max_cab_allocation_bytes; + dc->dml2_options.mall_cfg.mblk_height_4bpe_pixels = DCN3_2_MBLK_HEIGHT_4BPE; + dc->dml2_options.mall_cfg.mblk_height_8bpe_pixels = DCN3_2_MBLK_HEIGHT_8BPE; + dc->dml2_options.mall_cfg.mblk_size_bytes = DCN3_2_MALL_MBLK_SIZE_BYTES; + dc->dml2_options.mall_cfg.mblk_width_pixels = DCN3_2_MBLK_WIDTH; + + dc->dml2_options.max_segments_per_hubp = 18; + dc->dml2_options.det_segment_size = DCN3_2_DET_SEG_SIZE; + dc->dml2_options.map_dc_pipes_with_callbacks = true; + + if (ASICREV_IS_GC_11_0_3(dc->ctx->asic_id.hw_internal_rev) && (dc->config.sdpif_request_limit_words_per_umc == 0)) + dc->config.sdpif_request_limit_words_per_umc = 16; + + DC_FP_END(); + + return true; + +create_fail: + + DC_FP_END(); + + dcn32_resource_destruct(pool); + + return false; +} + +struct resource_pool *dcn32_create_resource_pool( + const struct dc_init_data *init_data, + struct dc *dc) +{ + struct dcn32_resource_pool *pool = + kzalloc(sizeof(struct dcn32_resource_pool), GFP_KERNEL); + + if (!pool) + return NULL; + + if (dcn32_resource_construct(init_data->num_virtual_links, dc, pool)) + return &pool->base; + + BREAK_TO_DEBUGGER(); + kfree(pool); + return NULL; +} + +/* + * Find the most optimal free pipe from res_ctx, which could be used as a + * secondary dpp pipe for input opp head pipe. + * + * a free pipe - a pipe in input res_ctx not yet used for any streams or + * planes. + * secondary dpp pipe - a pipe gets inserted to a head OPP pipe's MPC blending + * tree. This is typical used for rendering MPO planes or additional offset + * areas in MPCC combine. + * + * Hardware Transition Minimization Algorithm for Finding a Secondary DPP Pipe + * ------------------------------------------------------------------------- + * + * PROBLEM: + * + * 1. There is a hardware limitation that a secondary DPP pipe cannot be + * transferred from one MPC blending tree to the other in a single frame. + * Otherwise it could cause glitches on the screen. + * + * For instance, we cannot transition from state 1 to state 2 in one frame. This + * is because PIPE1 is transferred from PIPE0's MPC blending tree over to + * PIPE2's MPC blending tree, which is not supported by hardware. + * To support this transition we need to first remove PIPE1 from PIPE0's MPC + * blending tree in one frame and then insert PIPE1 to PIPE2's MPC blending tree + * in the next frame. This is not optimal as it will delay the flip for two + * frames. + * + * State 1: + * PIPE0 -- secondary DPP pipe --> (PIPE1) + * PIPE2 -- secondary DPP pipe --> NONE + * + * State 2: + * PIPE0 -- secondary DPP pipe --> NONE + * PIPE2 -- secondary DPP pipe --> (PIPE1) + * + * 2. We want to in general minimize the unnecessary changes in pipe topology. + * If a pipe is already added in current blending tree and there are no changes + * to plane topology, we don't want to swap it with another free pipe + * unnecessarily in every update. Powering up and down a pipe would require a + * full update which delays the flip for 1 frame. If we use the original pipe + * we don't have to toggle its power. So we can flip faster. + */ +static int find_optimal_free_pipe_as_secondary_dpp_pipe( + const struct resource_context *cur_res_ctx, + struct resource_context *new_res_ctx, + const struct resource_pool *pool, + const struct pipe_ctx *new_opp_head) +{ + const struct pipe_ctx *cur_opp_head; + int free_pipe_idx; + + cur_opp_head = &cur_res_ctx->pipe_ctx[new_opp_head->pipe_idx]; + free_pipe_idx = resource_find_free_pipe_used_in_cur_mpc_blending_tree( + cur_res_ctx, new_res_ctx, cur_opp_head); + + /* Up until here if we have not found a free secondary pipe, we will + * need to wait for at least one frame to complete the transition + * sequence. + */ + if (free_pipe_idx == FREE_PIPE_INDEX_NOT_FOUND) + free_pipe_idx = recource_find_free_pipe_not_used_in_cur_res_ctx( + cur_res_ctx, new_res_ctx, pool); + + /* Up until here if we have not found a free secondary pipe, we will + * need to wait for at least two frames to complete the transition + * sequence. It really doesn't matter which pipe we decide take from + * current enabled pipes. It won't save our frame time when we swap only + * one pipe or more pipes. + */ + if (free_pipe_idx == FREE_PIPE_INDEX_NOT_FOUND) + free_pipe_idx = resource_find_free_pipe_used_as_cur_sec_dpp_in_mpcc_combine( + cur_res_ctx, new_res_ctx, pool); + + if (free_pipe_idx == FREE_PIPE_INDEX_NOT_FOUND) + free_pipe_idx = resource_find_any_free_pipe(new_res_ctx, pool); + + return free_pipe_idx; +} + +static struct pipe_ctx *find_idle_secondary_pipe_check_mpo( + struct resource_context *res_ctx, + const struct resource_pool *pool, + const struct pipe_ctx *primary_pipe) +{ + int i; + struct pipe_ctx *secondary_pipe = NULL; + struct pipe_ctx *next_odm_mpo_pipe = NULL; + int primary_index, preferred_pipe_idx; + struct pipe_ctx *old_primary_pipe = NULL; + + /* + * Modified from find_idle_secondary_pipe + * With windowed MPO and ODM, we want to avoid the case where we want a + * free pipe for the left side but the free pipe is being used on the + * right side. + * Add check on current_state if the primary_pipe is the left side, + * to check the right side ( primary_pipe->next_odm_pipe ) to see if + * it is using a pipe for MPO ( primary_pipe->next_odm_pipe->bottom_pipe ) + * - If so, then don't use this pipe + * EXCEPTION - 3 plane ( 2 MPO plane ) case + * - in this case, the primary pipe has already gotten a free pipe for the + * MPO window in the left + * - when it tries to get a free pipe for the MPO window on the right, + * it will see that it is already assigned to the right side + * ( primary_pipe->next_odm_pipe ). But in this case, we want this + * free pipe, since it will be for the right side. So add an + * additional condition, that skipping the free pipe on the right only + * applies if the primary pipe has no bottom pipe currently assigned + */ + if (primary_pipe) { + primary_index = primary_pipe->pipe_idx; + old_primary_pipe = &primary_pipe->stream->ctx->dc->current_state->res_ctx.pipe_ctx[primary_index]; + if ((old_primary_pipe->next_odm_pipe) && (old_primary_pipe->next_odm_pipe->bottom_pipe) + && (!primary_pipe->bottom_pipe)) + next_odm_mpo_pipe = old_primary_pipe->next_odm_pipe->bottom_pipe; + + preferred_pipe_idx = (pool->pipe_count - 1) - primary_pipe->pipe_idx; + if ((res_ctx->pipe_ctx[preferred_pipe_idx].stream == NULL) && + !(next_odm_mpo_pipe && next_odm_mpo_pipe->pipe_idx == preferred_pipe_idx)) { + secondary_pipe = &res_ctx->pipe_ctx[preferred_pipe_idx]; + secondary_pipe->pipe_idx = preferred_pipe_idx; + } + } + + /* + * search backwards for the second pipe to keep pipe + * assignment more consistent + */ + if (!secondary_pipe) + for (i = pool->pipe_count - 1; i >= 0; i--) { + if ((res_ctx->pipe_ctx[i].stream == NULL) && + !(next_odm_mpo_pipe && next_odm_mpo_pipe->pipe_idx == i)) { + secondary_pipe = &res_ctx->pipe_ctx[i]; + secondary_pipe->pipe_idx = i; + break; + } + } + + return secondary_pipe; +} + +static struct pipe_ctx *dcn32_acquire_idle_pipe_for_head_pipe_in_layer( + struct dc_state *state, + const struct resource_pool *pool, + struct dc_stream_state *stream, + const struct pipe_ctx *head_pipe) +{ + struct resource_context *res_ctx = &state->res_ctx; + struct pipe_ctx *idle_pipe, *pipe; + struct resource_context *old_ctx = &stream->ctx->dc->current_state->res_ctx; + int head_index; + + if (!head_pipe) + ASSERT(0); + + /* + * Modified from dcn20_acquire_idle_pipe_for_layer + * Check if head_pipe in old_context already has bottom_pipe allocated. + * - If so, check if that pipe is available in the current context. + * -- If so, reuse pipe from old_context + */ + head_index = head_pipe->pipe_idx; + pipe = &old_ctx->pipe_ctx[head_index]; + if (pipe->bottom_pipe && res_ctx->pipe_ctx[pipe->bottom_pipe->pipe_idx].stream == NULL) { + idle_pipe = &res_ctx->pipe_ctx[pipe->bottom_pipe->pipe_idx]; + idle_pipe->pipe_idx = pipe->bottom_pipe->pipe_idx; + } else { + idle_pipe = find_idle_secondary_pipe_check_mpo(res_ctx, pool, head_pipe); + if (!idle_pipe) + return NULL; + } + + idle_pipe->stream = head_pipe->stream; + idle_pipe->stream_res.tg = head_pipe->stream_res.tg; + idle_pipe->stream_res.opp = head_pipe->stream_res.opp; + + idle_pipe->plane_res.hubp = pool->hubps[idle_pipe->pipe_idx]; + idle_pipe->plane_res.ipp = pool->ipps[idle_pipe->pipe_idx]; + idle_pipe->plane_res.dpp = pool->dpps[idle_pipe->pipe_idx]; + idle_pipe->plane_res.mpcc_inst = pool->dpps[idle_pipe->pipe_idx]->inst; + + return idle_pipe; +} + +static int find_optimal_free_pipe_as_secondary_opp_head( + const struct resource_context *cur_res_ctx, + struct resource_context *new_res_ctx, + const struct resource_pool *pool, + const struct pipe_ctx *new_otg_master) +{ + const struct pipe_ctx *cur_otg_master; + int free_pipe_idx; + + cur_otg_master = &cur_res_ctx->pipe_ctx[new_otg_master->pipe_idx]; + free_pipe_idx = resource_find_free_pipe_used_as_sec_opp_head_by_cur_otg_master( + cur_res_ctx, new_res_ctx, cur_otg_master); + + /* Up until here if we have not found a free secondary pipe, we will + * need to wait for at least one frame to complete the transition + * sequence. + */ + if (free_pipe_idx == FREE_PIPE_INDEX_NOT_FOUND) + free_pipe_idx = recource_find_free_pipe_not_used_in_cur_res_ctx( + cur_res_ctx, new_res_ctx, pool); + + if (free_pipe_idx == FREE_PIPE_INDEX_NOT_FOUND) + free_pipe_idx = resource_find_any_free_pipe(new_res_ctx, pool); + + return free_pipe_idx; +} + +struct pipe_ctx *dcn32_acquire_free_pipe_as_secondary_dpp_pipe( + const struct dc_state *cur_ctx, + struct dc_state *new_ctx, + const struct resource_pool *pool, + const struct pipe_ctx *opp_head_pipe) +{ + + int free_pipe_idx; + struct pipe_ctx *free_pipe; + + if (!opp_head_pipe->stream->ctx->dc->config.enable_windowed_mpo_odm) + return dcn32_acquire_idle_pipe_for_head_pipe_in_layer( + new_ctx, pool, opp_head_pipe->stream, opp_head_pipe); + + free_pipe_idx = find_optimal_free_pipe_as_secondary_dpp_pipe( + &cur_ctx->res_ctx, &new_ctx->res_ctx, + pool, opp_head_pipe); + if (free_pipe_idx >= 0) { + free_pipe = &new_ctx->res_ctx.pipe_ctx[free_pipe_idx]; + free_pipe->pipe_idx = free_pipe_idx; + free_pipe->stream = opp_head_pipe->stream; + free_pipe->stream_res.tg = opp_head_pipe->stream_res.tg; + free_pipe->stream_res.opp = opp_head_pipe->stream_res.opp; + + free_pipe->plane_res.hubp = pool->hubps[free_pipe->pipe_idx]; + free_pipe->plane_res.ipp = pool->ipps[free_pipe->pipe_idx]; + free_pipe->plane_res.dpp = pool->dpps[free_pipe->pipe_idx]; + free_pipe->plane_res.mpcc_inst = + pool->dpps[free_pipe->pipe_idx]->inst; + } else { + ASSERT(opp_head_pipe); + free_pipe = NULL; + } + + return free_pipe; +} + +struct pipe_ctx *dcn32_acquire_free_pipe_as_secondary_opp_head( + const struct dc_state *cur_ctx, + struct dc_state *new_ctx, + const struct resource_pool *pool, + const struct pipe_ctx *otg_master) +{ + int free_pipe_idx = find_optimal_free_pipe_as_secondary_opp_head( + &cur_ctx->res_ctx, &new_ctx->res_ctx, + pool, otg_master); + struct pipe_ctx *free_pipe; + + if (free_pipe_idx >= 0) { + free_pipe = &new_ctx->res_ctx.pipe_ctx[free_pipe_idx]; + free_pipe->pipe_idx = free_pipe_idx; + free_pipe->stream = otg_master->stream; + free_pipe->stream_res.tg = otg_master->stream_res.tg; + free_pipe->stream_res.dsc = NULL; + free_pipe->stream_res.opp = pool->opps[free_pipe_idx]; + free_pipe->plane_res.mi = pool->mis[free_pipe_idx]; + free_pipe->plane_res.hubp = pool->hubps[free_pipe_idx]; + free_pipe->plane_res.ipp = pool->ipps[free_pipe_idx]; + free_pipe->plane_res.xfm = pool->transforms[free_pipe_idx]; + free_pipe->plane_res.dpp = pool->dpps[free_pipe_idx]; + free_pipe->plane_res.mpcc_inst = pool->dpps[free_pipe_idx]->inst; + if (free_pipe->stream->timing.flags.DSC == 1) { + dcn20_acquire_dsc(free_pipe->stream->ctx->dc, + &new_ctx->res_ctx, + &free_pipe->stream_res.dsc, + free_pipe_idx); + ASSERT(free_pipe->stream_res.dsc); + if (free_pipe->stream_res.dsc == NULL) { + memset(free_pipe, 0, sizeof(*free_pipe)); + free_pipe = NULL; + } + } + } else { + ASSERT(otg_master); + free_pipe = NULL; + } + + return free_pipe; +} + +unsigned int dcn32_calc_num_avail_chans_for_mall(struct dc *dc, int num_chans) +{ + /* + * DCN32 and DCN321 SKUs may have different sizes for MALL + * but we may not be able to access all the MALL space. + * If the num_chans is power of 2, then we can access all + * of the available MALL space. Otherwise, we can only + * access: + * + * max_cab_size_in_bytes = total_cache_size_in_bytes * + * ((2^floor(log2(num_chans)))/num_chans) + * + * Calculating the MALL sizes for all available SKUs, we + * have come up with the follow simplified check. + * - we have max_chans which provides the max MALL size. + * Each chans supports 4MB of MALL so: + * + * total_cache_size_in_bytes = max_chans * 4 MB + * + * - we have avail_chans which shows the number of channels + * we can use if we can't access the entire MALL space. + * It is generally half of max_chans + * - so we use the following checks: + * + * if (num_chans == max_chans), return max_chans + * if (num_chans < max_chans), return avail_chans + * + * - exception is GC_11_0_0 where we can't access max_chans, + * so we define max_avail_chans as the maximum available + * MALL space + * + */ + int gc_11_0_0_max_chans = 48; + int gc_11_0_0_max_avail_chans = 32; + int gc_11_0_0_avail_chans = 16; + int gc_11_0_3_max_chans = 16; + int gc_11_0_3_avail_chans = 8; + int gc_11_0_2_max_chans = 8; + int gc_11_0_2_avail_chans = 4; + + if (ASICREV_IS_GC_11_0_0(dc->ctx->asic_id.hw_internal_rev)) { + return (num_chans == gc_11_0_0_max_chans) ? + gc_11_0_0_max_avail_chans : gc_11_0_0_avail_chans; + } else if (ASICREV_IS_GC_11_0_2(dc->ctx->asic_id.hw_internal_rev)) { + return (num_chans == gc_11_0_2_max_chans) ? + gc_11_0_2_max_chans : gc_11_0_2_avail_chans; + } else { // if (ASICREV_IS_GC_11_0_3(dc->ctx->asic_id.hw_internal_rev)) { + return (num_chans == gc_11_0_3_max_chans) ? + gc_11_0_3_max_chans : gc_11_0_3_avail_chans; + } +} diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h new file mode 100644 index 000000000000..b931008114c9 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h @@ -0,0 +1,1263 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef _DCN32_RESOURCE_H_ +#define _DCN32_RESOURCE_H_ + +#include "core_types.h" + +#define DCN3_2_DEFAULT_DET_SIZE 256 +#define DCN3_2_MAX_DET_SIZE 1152 +#define DCN3_2_MIN_DET_SIZE 128 +#define DCN3_2_MIN_COMPBUF_SIZE_KB 128 +#define DCN3_2_DET_SEG_SIZE 64 +#define DCN3_2_MALL_MBLK_SIZE_BYTES 65536 // 64 * 1024 +#define DCN3_2_MBLK_WIDTH 128 +#define DCN3_2_MBLK_HEIGHT_4BPE 128 +#define DCN3_2_MBLK_HEIGHT_8BPE 64 +#define DCN3_2_DCFCLK_DS_INIT_KHZ 10000 // Choose 10Mhz for init DCFCLK DS freq +#define SUBVP_HIGH_REFRESH_LIST_LEN 4 +#define DCN3_2_MAX_SUBVP_PIXEL_RATE_MHZ 1800 +#define DCN3_2_VMIN_DISPCLK_HZ 717000000 + +#define TO_DCN32_RES_POOL(pool)\ + container_of(pool, struct dcn32_resource_pool, base) + +extern struct _vcs_dpi_ip_params_st dcn3_2_ip; +extern struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc; + +struct subvp_high_refresh_list { + int min_refresh; + int max_refresh; + struct resolution { + int width; + int height; + } res[SUBVP_HIGH_REFRESH_LIST_LEN]; +}; + +struct dcn32_resource_pool { + struct resource_pool base; +}; + +struct resource_pool *dcn32_create_resource_pool( + const struct dc_init_data *init_data, + struct dc *dc); + +struct panel_cntl *dcn32_panel_cntl_create( + const struct panel_cntl_init_data *init_data); + +bool dcn32_acquire_post_bldn_3dlut( + struct resource_context *res_ctx, + const struct resource_pool *pool, + int mpcc_id, + struct dc_3dlut **lut, + struct dc_transfer_func **shaper); + +bool dcn32_release_post_bldn_3dlut( + struct resource_context *res_ctx, + const struct resource_pool *pool, + struct dc_3dlut **lut, + struct dc_transfer_func **shaper); + +bool dcn32_remove_phantom_pipes(struct dc *dc, + struct dc_state *context, bool fast_update); + +void dcn32_retain_phantom_pipes(struct dc *dc, + struct dc_state *context); + +void dcn32_add_phantom_pipes(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + unsigned int pipe_cnt, + unsigned int index); + +bool dcn32_validate_bandwidth(struct dc *dc, + struct dc_state *context, + bool fast_validate); + +int dcn32_populate_dml_pipes_from_context( + struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + bool fast_validate); + +void dcn32_calculate_wm_and_dlg( + struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel); + +uint32_t dcn32_helper_mall_bytes_to_ways( + struct dc *dc, + uint32_t total_size_in_mall_bytes); + +uint32_t dcn32_helper_calculate_mall_bytes_for_cursor( + struct dc *dc, + struct pipe_ctx *pipe_ctx, + bool ignore_cursor_buf); + +uint32_t dcn32_helper_calculate_num_ways_for_subvp( + struct dc *dc, + struct dc_state *context); + +void dcn32_merge_pipes_for_subvp(struct dc *dc, + struct dc_state *context); + +bool dcn32_all_pipes_have_stream_and_plane(struct dc *dc, + struct dc_state *context); + +bool dcn32_subvp_in_use(struct dc *dc, + struct dc_state *context); + +bool dcn32_mpo_in_use(struct dc_state *context); + +bool dcn32_any_surfaces_rotated(struct dc *dc, struct dc_state *context); +bool dcn32_is_center_timing(struct pipe_ctx *pipe); +bool dcn32_is_psr_capable(struct pipe_ctx *pipe); + +struct pipe_ctx *dcn32_acquire_free_pipe_as_secondary_dpp_pipe( + const struct dc_state *cur_ctx, + struct dc_state *new_ctx, + const struct resource_pool *pool, + const struct pipe_ctx *opp_head_pipe); + +struct pipe_ctx *dcn32_acquire_free_pipe_as_secondary_opp_head( + const struct dc_state *cur_ctx, + struct dc_state *new_ctx, + const struct resource_pool *pool, + const struct pipe_ctx *otg_master); + +void dcn32_release_pipe(struct dc_state *context, + struct pipe_ctx *pipe, + const struct resource_pool *pool); + +void dcn32_determine_det_override(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes); + +void dcn32_set_det_allocations(struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes); + +void dcn32_save_mall_state(struct dc *dc, + struct dc_state *context, + struct mall_temp_config *temp_config); + +void dcn32_restore_mall_state(struct dc *dc, + struct dc_state *context, + struct mall_temp_config *temp_config); + +struct dc_stream_state *dcn32_can_support_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, const struct dc_state *context); + +bool dcn32_allow_subvp_with_active_margin(struct pipe_ctx *pipe); + +bool dcn32_allow_subvp_high_refresh_rate(struct dc *dc, struct dc_state *context, struct pipe_ctx *pipe); + +unsigned int dcn32_calc_num_avail_chans_for_mall(struct dc *dc, int num_chans); + +double dcn32_determine_max_vratio_prefetch(struct dc *dc, struct dc_state *context); + +bool dcn32_check_native_scaling_for_res(struct pipe_ctx *pipe, unsigned int width, unsigned int height); + +bool dcn32_subvp_drr_admissable(struct dc *dc, struct dc_state *context); + +bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int vlevel); + +/* definitions for run time init of reg offsets */ + +/* CLK SRC */ +#define CS_COMMON_REG_LIST_DCN3_0_RI(index, pllid) \ + SRI_ARR_ALPHABET(PIXCLK_RESYNC_CNTL, PHYPLL, index, pllid), \ + SRII_ARR_2(PHASE, DP_DTO, 0, index), \ + SRII_ARR_2(PHASE, DP_DTO, 1, index), \ + SRII_ARR_2(PHASE, DP_DTO, 2, index), \ + SRII_ARR_2(PHASE, DP_DTO, 3, index), \ + SRII_ARR_2(MODULO, DP_DTO, 0, index), \ + SRII_ARR_2(MODULO, DP_DTO, 1, index), \ + SRII_ARR_2(MODULO, DP_DTO, 2, index), \ + SRII_ARR_2(MODULO, DP_DTO, 3, index), \ + SRII_ARR_2(PIXEL_RATE_CNTL, OTG, 0, index), \ + SRII_ARR_2(PIXEL_RATE_CNTL, OTG, 1, index), \ + SRII_ARR_2(PIXEL_RATE_CNTL, OTG, 2, index), \ + SRII_ARR_2(PIXEL_RATE_CNTL, OTG, 3, index) + +/* ABM */ +#define ABM_DCN32_REG_LIST_RI(id) \ + SRI_ARR(DC_ABM1_HG_SAMPLE_RATE, ABM, id), \ + SRI_ARR(DC_ABM1_LS_SAMPLE_RATE, ABM, id), \ + SRI_ARR(BL1_PWM_BL_UPDATE_SAMPLE_RATE, ABM, id), \ + SRI_ARR(DC_ABM1_HG_MISC_CTRL, ABM, id), \ + SRI_ARR(DC_ABM1_IPCSC_COEFF_SEL, ABM, id), \ + SRI_ARR(BL1_PWM_CURRENT_ABM_LEVEL, ABM, id), \ + SRI_ARR(BL1_PWM_TARGET_ABM_LEVEL, ABM, id), \ + SRI_ARR(BL1_PWM_USER_LEVEL, ABM, id), \ + SRI_ARR(DC_ABM1_LS_MIN_MAX_PIXEL_VALUE_THRES, ABM, id), \ + SRI_ARR(DC_ABM1_HGLS_REG_READ_PROGRESS, ABM, id), \ + SRI_ARR(DC_ABM1_ACE_OFFSET_SLOPE_0, ABM, id), \ + SRI_ARR(DC_ABM1_ACE_THRES_12, ABM, id), NBIO_SR_ARR(BIOS_SCRATCH_2, id) + +/* Audio */ +#define AUD_COMMON_REG_LIST_RI(id) \ + SRI_ARR(AZALIA_F0_CODEC_ENDPOINT_INDEX, AZF0ENDPOINT, id), \ + SRI_ARR(AZALIA_F0_CODEC_ENDPOINT_DATA, AZF0ENDPOINT, id), \ + SR_ARR(AZALIA_F0_CODEC_FUNCTION_PARAMETER_STREAM_FORMATS, id), \ + SR_ARR(AZALIA_F0_CODEC_FUNCTION_PARAMETER_SUPPORTED_SIZE_RATES, id), \ + SR_ARR(AZALIA_F0_CODEC_FUNCTION_PARAMETER_POWER_STATES, id), \ + SR_ARR(DCCG_AUDIO_DTO_SOURCE, id), SR_ARR(DCCG_AUDIO_DTO0_MODULE, id), \ + SR_ARR(DCCG_AUDIO_DTO0_PHASE, id), SR_ARR(DCCG_AUDIO_DTO1_MODULE, id), \ + SR_ARR(DCCG_AUDIO_DTO1_PHASE, id) \ + +/* VPG */ + +#define VPG_DCN3_REG_LIST_RI(id) \ + SRI_ARR(VPG_GENERIC_STATUS, VPG, id), \ + SRI_ARR(VPG_GENERIC_PACKET_ACCESS_CTRL, VPG, id), \ + SRI_ARR(VPG_GENERIC_PACKET_DATA, VPG, id), \ + SRI_ARR(VPG_GSP_FRAME_UPDATE_CTRL, VPG, id), \ + SRI_ARR(VPG_GSP_IMMEDIATE_UPDATE_CTRL, VPG, id) + +/* AFMT */ +#define AFMT_DCN3_REG_LIST_RI(id) \ + SRI_ARR(AFMT_INFOFRAME_CONTROL0, AFMT, id), \ + SRI_ARR(AFMT_VBI_PACKET_CONTROL, AFMT, id), \ + SRI_ARR(AFMT_AUDIO_PACKET_CONTROL, AFMT, id), \ + SRI_ARR(AFMT_AUDIO_PACKET_CONTROL2, AFMT, id), \ + SRI_ARR(AFMT_AUDIO_SRC_CONTROL, AFMT, id), \ + SRI_ARR(AFMT_60958_0, AFMT, id), SRI_ARR(AFMT_60958_1, AFMT, id), \ + SRI_ARR(AFMT_60958_2, AFMT, id), SRI_ARR(AFMT_MEM_PWR, AFMT, id) + +/* APG */ +#define APG_DCN31_REG_LIST_RI(id) \ + SRI_ARR(APG_CONTROL, APG, id), SRI_ARR(APG_CONTROL2, APG, id), \ + SRI_ARR(APG_MEM_PWR, APG, id), SRI_ARR(APG_DBG_GEN_CONTROL, APG, id) + +/* Stream encoder */ +#define SE_DCN32_REG_LIST_RI(id) \ + SRI_ARR(AFMT_CNTL, DIG, id), SRI_ARR(DIG_FE_CNTL, DIG, id), \ + SRI_ARR(HDMI_CONTROL, DIG, id), SRI_ARR(HDMI_DB_CONTROL, DIG, id), \ + SRI_ARR(HDMI_GC, DIG, id), \ + SRI_ARR(HDMI_GENERIC_PACKET_CONTROL0, DIG, id), \ + SRI_ARR(HDMI_GENERIC_PACKET_CONTROL1, DIG, id), \ + SRI_ARR(HDMI_GENERIC_PACKET_CONTROL2, DIG, id), \ + SRI_ARR(HDMI_GENERIC_PACKET_CONTROL3, DIG, id), \ + SRI_ARR(HDMI_GENERIC_PACKET_CONTROL4, DIG, id), \ + SRI_ARR(HDMI_GENERIC_PACKET_CONTROL5, DIG, id), \ + SRI_ARR(HDMI_GENERIC_PACKET_CONTROL6, DIG, id), \ + SRI_ARR(HDMI_GENERIC_PACKET_CONTROL7, DIG, id), \ + SRI_ARR(HDMI_GENERIC_PACKET_CONTROL8, DIG, id), \ + SRI_ARR(HDMI_GENERIC_PACKET_CONTROL9, DIG, id), \ + SRI_ARR(HDMI_GENERIC_PACKET_CONTROL10, DIG, id), \ + SRI_ARR(HDMI_INFOFRAME_CONTROL0, DIG, id), \ + SRI_ARR(HDMI_INFOFRAME_CONTROL1, DIG, id), \ + SRI_ARR(HDMI_VBI_PACKET_CONTROL, DIG, id), \ + SRI_ARR(HDMI_AUDIO_PACKET_CONTROL, DIG, id), \ + SRI_ARR(HDMI_ACR_PACKET_CONTROL, DIG, id), \ + SRI_ARR(HDMI_ACR_32_0, DIG, id), SRI_ARR(HDMI_ACR_32_1, DIG, id), \ + SRI_ARR(HDMI_ACR_44_0, DIG, id), SRI_ARR(HDMI_ACR_44_1, DIG, id), \ + SRI_ARR(HDMI_ACR_48_0, DIG, id), SRI_ARR(HDMI_ACR_48_1, DIG, id), \ + SRI_ARR(DP_DB_CNTL, DP, id), SRI_ARR(DP_MSA_MISC, DP, id), \ + SRI_ARR(DP_MSA_VBID_MISC, DP, id), SRI_ARR(DP_MSA_COLORIMETRY, DP, id), \ + SRI_ARR(DP_MSA_TIMING_PARAM1, DP, id), \ + SRI_ARR(DP_MSA_TIMING_PARAM2, DP, id), \ + SRI_ARR(DP_MSA_TIMING_PARAM3, DP, id), \ + SRI_ARR(DP_MSA_TIMING_PARAM4, DP, id), \ + SRI_ARR(DP_MSE_RATE_CNTL, DP, id), SRI_ARR(DP_MSE_RATE_UPDATE, DP, id), \ + SRI_ARR(DP_PIXEL_FORMAT, DP, id), SRI_ARR(DP_SEC_CNTL, DP, id), \ + SRI_ARR(DP_SEC_CNTL1, DP, id), SRI_ARR(DP_SEC_CNTL2, DP, id), \ + SRI_ARR(DP_SEC_CNTL5, DP, id), SRI_ARR(DP_SEC_CNTL6, DP, id), \ + SRI_ARR(DP_STEER_FIFO, DP, id), SRI_ARR(DP_VID_M, DP, id), \ + SRI_ARR(DP_VID_N, DP, id), SRI_ARR(DP_VID_STREAM_CNTL, DP, id), \ + SRI_ARR(DP_VID_TIMING, DP, id), SRI_ARR(DP_SEC_AUD_N, DP, id), \ + SRI_ARR(DP_SEC_TIMESTAMP, DP, id), SRI_ARR(DP_DSC_CNTL, DP, id), \ + SRI_ARR(DP_SEC_METADATA_TRANSMISSION, DP, id), \ + SRI_ARR(HDMI_METADATA_PACKET_CONTROL, DIG, id), \ + SRI_ARR(DP_SEC_FRAMING4, DP, id), SRI_ARR(DP_GSP11_CNTL, DP, id), \ + SRI_ARR(DME_CONTROL, DME, id), \ + SRI_ARR(DP_SEC_METADATA_TRANSMISSION, DP, id), \ + SRI_ARR(HDMI_METADATA_PACKET_CONTROL, DIG, id), \ + SRI_ARR(DIG_FE_CNTL, DIG, id), SRI_ARR(DIG_CLOCK_PATTERN, DIG, id), \ + SRI_ARR(DIG_FIFO_CTRL0, DIG, id) + +/* Aux regs */ + +#define AUX_REG_LIST_RI(id) \ + SRI_ARR(AUX_CONTROL, DP_AUX, id), SRI_ARR(AUX_DPHY_RX_CONTROL0, DP_AUX, id), \ + SRI_ARR(AUX_DPHY_RX_CONTROL1, DP_AUX, id) + +#define DCN2_AUX_REG_LIST_RI(id) \ + AUX_REG_LIST_RI(id), SRI_ARR(AUX_DPHY_TX_CONTROL, DP_AUX, id) + +/* HDP */ +#define HPD_REG_LIST_RI(id) SRI_ARR(DC_HPD_CONTROL, HPD, id) + +/* Link encoder */ +#define LE_DCN3_REG_LIST_RI(id) \ + SRI_ARR(DIG_BE_CNTL, DIG, id), SRI_ARR(DIG_BE_EN_CNTL, DIG, id), \ + SRI_ARR(TMDS_CTL_BITS, DIG, id), \ + SRI_ARR(TMDS_DCBALANCER_CONTROL, DIG, id), SRI_ARR(DP_CONFIG, DP, id), \ + SRI_ARR(DP_DPHY_CNTL, DP, id), SRI_ARR(DP_DPHY_PRBS_CNTL, DP, id), \ + SRI_ARR(DP_DPHY_SCRAM_CNTL, DP, id), SRI_ARR(DP_DPHY_SYM0, DP, id), \ + SRI_ARR(DP_DPHY_SYM1, DP, id), SRI_ARR(DP_DPHY_SYM2, DP, id), \ + SRI_ARR(DP_DPHY_TRAINING_PATTERN_SEL, DP, id), \ + SRI_ARR(DP_LINK_CNTL, DP, id), SRI_ARR(DP_LINK_FRAMING_CNTL, DP, id), \ + SRI_ARR(DP_MSE_SAT0, DP, id), SRI_ARR(DP_MSE_SAT1, DP, id), \ + SRI_ARR(DP_MSE_SAT2, DP, id), SRI_ARR(DP_MSE_SAT_UPDATE, DP, id), \ + SRI_ARR(DP_SEC_CNTL, DP, id), SRI_ARR(DP_VID_STREAM_CNTL, DP, id), \ + SRI_ARR(DP_DPHY_FAST_TRAINING, DP, id), SRI_ARR(DP_SEC_CNTL1, DP, id), \ + SRI_ARR(DP_DPHY_BS_SR_SWAP_CNTL, DP, id), \ + SRI_ARR(DP_DPHY_HBR2_PATTERN_CONTROL, DP, id) + +#define LE_DCN31_REG_LIST_RI(id) \ + LE_DCN3_REG_LIST_RI(id), SRI_ARR(DP_DPHY_INTERNAL_CTRL, DP, id), \ + SR_ARR(DIO_LINKA_CNTL, id), SR_ARR(DIO_LINKB_CNTL, id), \ + SR_ARR(DIO_LINKC_CNTL, id), SR_ARR(DIO_LINKD_CNTL, id), \ + SR_ARR(DIO_LINKE_CNTL, id), SR_ARR(DIO_LINKF_CNTL, id) + +#define UNIPHY_DCN2_REG_LIST_RI(id, phyid) \ + SRI_ARR_ALPHABET(CLOCK_ENABLE, SYMCLK, id, phyid), \ + SRI_ARR_ALPHABET(CHANNEL_XBAR_CNTL, UNIPHY, id, phyid) + +/* HPO DP stream encoder */ +#define DCN3_1_HPO_DP_STREAM_ENC_REG_LIST_RI(id) \ + SR_ARR(DP_STREAM_MAPPER_CONTROL0, id), \ + SR_ARR(DP_STREAM_MAPPER_CONTROL1, id), \ + SR_ARR(DP_STREAM_MAPPER_CONTROL2, id), \ + SR_ARR(DP_STREAM_MAPPER_CONTROL3, id), \ + SRI_ARR(DP_STREAM_ENC_CLOCK_CONTROL, DP_STREAM_ENC, id), \ + SRI_ARR(DP_STREAM_ENC_INPUT_MUX_CONTROL, DP_STREAM_ENC, id), \ + SRI_ARR(DP_STREAM_ENC_AUDIO_CONTROL, DP_STREAM_ENC, id), \ + SRI_ARR(DP_STREAM_ENC_CLOCK_RAMP_ADJUSTER_FIFO_STATUS_CONTROL0, DP_STREAM_ENC, id), \ + SRI_ARR(DP_SYM32_ENC_CONTROL, DP_SYM32_ENC, id), \ + SRI_ARR(DP_SYM32_ENC_VID_PIXEL_FORMAT, DP_SYM32_ENC, id), \ + SRI_ARR(DP_SYM32_ENC_VID_PIXEL_FORMAT_DOUBLE_BUFFER_CONTROL, DP_SYM32_ENC, id), \ + SRI_ARR(DP_SYM32_ENC_VID_MSA0, DP_SYM32_ENC, id), \ + SRI_ARR(DP_SYM32_ENC_VID_MSA1, DP_SYM32_ENC, id), \ + SRI_ARR(DP_SYM32_ENC_VID_MSA2, DP_SYM32_ENC, id), \ + SRI_ARR(DP_SYM32_ENC_VID_MSA3, DP_SYM32_ENC, id), \ + SRI_ARR(DP_SYM32_ENC_VID_MSA4, DP_SYM32_ENC, id), \ + SRI_ARR(DP_SYM32_ENC_VID_MSA5, DP_SYM32_ENC, id), \ + SRI_ARR(DP_SYM32_ENC_VID_MSA6, DP_SYM32_ENC, id), \ + SRI_ARR(DP_SYM32_ENC_VID_MSA7, DP_SYM32_ENC, id), \ + SRI_ARR(DP_SYM32_ENC_VID_MSA8, DP_SYM32_ENC, id), \ + SRI_ARR(DP_SYM32_ENC_VID_MSA_CONTROL, DP_SYM32_ENC, id), \ + SRI_ARR(DP_SYM32_ENC_VID_MSA_DOUBLE_BUFFER_CONTROL, DP_SYM32_ENC, id), \ + SRI_ARR(DP_SYM32_ENC_VID_FIFO_CONTROL, DP_SYM32_ENC, id), \ + SRI_ARR(DP_SYM32_ENC_VID_STREAM_CONTROL, DP_SYM32_ENC, id), \ + SRI_ARR(DP_SYM32_ENC_VID_VBID_CONTROL, DP_SYM32_ENC, id), \ + SRI_ARR(DP_SYM32_ENC_SDP_CONTROL, DP_SYM32_ENC, id), \ + SRI_ARR(DP_SYM32_ENC_SDP_GSP_CONTROL0, DP_SYM32_ENC, id), \ + SRI_ARR(DP_SYM32_ENC_SDP_GSP_CONTROL2, DP_SYM32_ENC, id), \ + SRI_ARR(DP_SYM32_ENC_SDP_GSP_CONTROL3, DP_SYM32_ENC, id), \ + SRI_ARR(DP_SYM32_ENC_SDP_GSP_CONTROL5, DP_SYM32_ENC, id), \ + SRI_ARR(DP_SYM32_ENC_SDP_GSP_CONTROL11, DP_SYM32_ENC, id), \ + SRI_ARR(DP_SYM32_ENC_SDP_METADATA_PACKET_CONTROL, DP_SYM32_ENC, id), \ + SRI_ARR(DP_SYM32_ENC_SDP_AUDIO_CONTROL0, DP_SYM32_ENC, id), \ + SRI_ARR(DP_SYM32_ENC_VID_CRC_CONTROL, DP_SYM32_ENC, id), \ + SRI_ARR(DP_SYM32_ENC_HBLANK_CONTROL, DP_SYM32_ENC, id) + +/* HPO DP link encoder regs */ +#define DCN3_1_HPO_DP_LINK_ENC_REG_LIST_RI(id) \ + SRI_ARR(DP_LINK_ENC_CLOCK_CONTROL, DP_LINK_ENC, id), \ + SRI_ARR(DP_DPHY_SYM32_CONTROL, DP_DPHY_SYM32, id), \ + SRI_ARR(DP_DPHY_SYM32_STATUS, DP_DPHY_SYM32, id), \ + SRI_ARR(DP_DPHY_SYM32_TP_CONFIG, DP_DPHY_SYM32, id), \ + SRI_ARR(DP_DPHY_SYM32_TP_PRBS_SEED0, DP_DPHY_SYM32, id), \ + SRI_ARR(DP_DPHY_SYM32_TP_PRBS_SEED1, DP_DPHY_SYM32, id), \ + SRI_ARR(DP_DPHY_SYM32_TP_PRBS_SEED2, DP_DPHY_SYM32, id), \ + SRI_ARR(DP_DPHY_SYM32_TP_PRBS_SEED3, DP_DPHY_SYM32, id), \ + SRI_ARR(DP_DPHY_SYM32_TP_SQ_PULSE, DP_DPHY_SYM32, id), \ + SRI_ARR(DP_DPHY_SYM32_TP_CUSTOM0, DP_DPHY_SYM32, id), \ + SRI_ARR(DP_DPHY_SYM32_TP_CUSTOM1, DP_DPHY_SYM32, id), \ + SRI_ARR(DP_DPHY_SYM32_TP_CUSTOM2, DP_DPHY_SYM32, id), \ + SRI_ARR(DP_DPHY_SYM32_TP_CUSTOM3, DP_DPHY_SYM32, id), \ + SRI_ARR(DP_DPHY_SYM32_TP_CUSTOM4, DP_DPHY_SYM32, id), \ + SRI_ARR(DP_DPHY_SYM32_TP_CUSTOM5, DP_DPHY_SYM32, id), \ + SRI_ARR(DP_DPHY_SYM32_TP_CUSTOM6, DP_DPHY_SYM32, id), \ + SRI_ARR(DP_DPHY_SYM32_TP_CUSTOM7, DP_DPHY_SYM32, id), \ + SRI_ARR(DP_DPHY_SYM32_TP_CUSTOM8, DP_DPHY_SYM32, id), \ + SRI_ARR(DP_DPHY_SYM32_TP_CUSTOM9, DP_DPHY_SYM32, id), \ + SRI_ARR(DP_DPHY_SYM32_TP_CUSTOM10, DP_DPHY_SYM32, id), \ + SRI_ARR(DP_DPHY_SYM32_SAT_VC0, DP_DPHY_SYM32, id), \ + SRI_ARR(DP_DPHY_SYM32_SAT_VC1, DP_DPHY_SYM32, id), \ + SRI_ARR(DP_DPHY_SYM32_SAT_VC2, DP_DPHY_SYM32, id), \ + SRI_ARR(DP_DPHY_SYM32_SAT_VC3, DP_DPHY_SYM32, id), \ + SRI_ARR(DP_DPHY_SYM32_VC_RATE_CNTL0, DP_DPHY_SYM32, id), \ + SRI_ARR(DP_DPHY_SYM32_VC_RATE_CNTL1, DP_DPHY_SYM32, id), \ + SRI_ARR(DP_DPHY_SYM32_VC_RATE_CNTL2, DP_DPHY_SYM32, id), \ + SRI_ARR(DP_DPHY_SYM32_VC_RATE_CNTL3, DP_DPHY_SYM32, id), \ + SRI_ARR(DP_DPHY_SYM32_SAT_UPDATE, DP_DPHY_SYM32, id) + +/* DPP */ +#define DPP_REG_LIST_DCN30_COMMON_RI(id) \ + SRI_ARR(CM_DEALPHA, CM, id), SRI_ARR(CM_MEM_PWR_STATUS, CM, id), \ + SRI_ARR(CM_BIAS_CR_R, CM, id), SRI_ARR(CM_BIAS_Y_G_CB_B, CM, id), \ + SRI_ARR(PRE_DEGAM, CNVC_CFG, id), SRI_ARR(CM_GAMCOR_CONTROL, CM, id), \ + SRI_ARR(CM_GAMCOR_LUT_CONTROL, CM, id), \ + SRI_ARR(CM_GAMCOR_LUT_INDEX, CM, id), \ + SRI_ARR(CM_GAMCOR_LUT_INDEX, CM, id), \ + SRI_ARR(CM_GAMCOR_LUT_DATA, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMB_START_CNTL_B, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMB_START_CNTL_G, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMB_START_CNTL_R, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMB_START_SLOPE_CNTL_B, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMB_START_SLOPE_CNTL_G, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMB_START_SLOPE_CNTL_R, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMB_END_CNTL1_B, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMB_END_CNTL2_B, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMB_END_CNTL1_G, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMB_END_CNTL2_G, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMB_END_CNTL1_R, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMB_END_CNTL2_R, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMB_REGION_0_1, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMB_REGION_32_33, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMB_OFFSET_B, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMB_OFFSET_G, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMB_OFFSET_R, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMB_START_BASE_CNTL_B, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMB_START_BASE_CNTL_G, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMB_START_BASE_CNTL_R, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMA_START_CNTL_B, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMA_START_CNTL_G, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMA_START_CNTL_R, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMA_START_SLOPE_CNTL_B, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMA_START_SLOPE_CNTL_G, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMA_START_SLOPE_CNTL_R, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMA_END_CNTL1_B, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMA_END_CNTL2_B, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMA_END_CNTL1_G, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMA_END_CNTL2_G, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMA_END_CNTL1_R, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMA_END_CNTL2_R, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMA_REGION_0_1, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMA_REGION_32_33, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMA_OFFSET_B, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMA_OFFSET_G, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMA_OFFSET_R, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMA_START_BASE_CNTL_B, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMA_START_BASE_CNTL_G, CM, id), \ + SRI_ARR(CM_GAMCOR_RAMA_START_BASE_CNTL_R, CM, id), \ + SRI_ARR(CM_GAMUT_REMAP_CONTROL, CM, id), \ + SRI_ARR(CM_GAMUT_REMAP_C11_C12, CM, id), \ + SRI_ARR(CM_GAMUT_REMAP_C13_C14, CM, id), \ + SRI_ARR(CM_GAMUT_REMAP_C21_C22, CM, id), \ + SRI_ARR(CM_GAMUT_REMAP_C23_C24, CM, id), \ + SRI_ARR(CM_GAMUT_REMAP_C31_C32, CM, id), \ + SRI_ARR(CM_GAMUT_REMAP_C33_C34, CM, id), \ + SRI_ARR(CM_GAMUT_REMAP_B_C11_C12, CM, id), \ + SRI_ARR(CM_GAMUT_REMAP_B_C13_C14, CM, id), \ + SRI_ARR(CM_GAMUT_REMAP_B_C21_C22, CM, id), \ + SRI_ARR(CM_GAMUT_REMAP_B_C23_C24, CM, id), \ + SRI_ARR(CM_GAMUT_REMAP_B_C31_C32, CM, id), \ + SRI_ARR(CM_GAMUT_REMAP_B_C33_C34, CM, id), \ + SRI_ARR(DSCL_EXT_OVERSCAN_LEFT_RIGHT, DSCL, id), \ + SRI_ARR(DSCL_EXT_OVERSCAN_TOP_BOTTOM, DSCL, id), \ + SRI_ARR(OTG_H_BLANK, DSCL, id), SRI_ARR(OTG_V_BLANK, DSCL, id), \ + SRI_ARR(SCL_MODE, DSCL, id), SRI_ARR(LB_DATA_FORMAT, DSCL, id), \ + SRI_ARR(LB_MEMORY_CTRL, DSCL, id), SRI_ARR(DSCL_AUTOCAL, DSCL, id), \ + SRI_ARR(DSCL_CONTROL, DSCL, id), \ + SRI_ARR(SCL_TAP_CONTROL, DSCL, id), \ + SRI_ARR(SCL_COEF_RAM_TAP_SELECT, DSCL, id), \ + SRI_ARR(SCL_COEF_RAM_TAP_DATA, DSCL, id), \ + SRI_ARR(DSCL_2TAP_CONTROL, DSCL, id), SRI_ARR(MPC_SIZE, DSCL, id), \ + SRI_ARR(SCL_HORZ_FILTER_SCALE_RATIO, DSCL, id), \ + SRI_ARR(SCL_VERT_FILTER_SCALE_RATIO, DSCL, id), \ + SRI_ARR(SCL_HORZ_FILTER_SCALE_RATIO_C, DSCL, id), \ + SRI_ARR(SCL_VERT_FILTER_SCALE_RATIO_C, DSCL, id), \ + SRI_ARR(SCL_HORZ_FILTER_INIT, DSCL, id), \ + SRI_ARR(SCL_HORZ_FILTER_INIT_C, DSCL, id), \ + SRI_ARR(SCL_VERT_FILTER_INIT, DSCL, id), \ + SRI_ARR(SCL_VERT_FILTER_INIT_C, DSCL, id), \ + SRI_ARR(RECOUT_START, DSCL, id), SRI_ARR(RECOUT_SIZE, DSCL, id), \ + SRI_ARR(PRE_DEALPHA, CNVC_CFG, id), SRI_ARR(PRE_REALPHA, CNVC_CFG, id), \ + SRI_ARR(PRE_CSC_MODE, CNVC_CFG, id), \ + SRI_ARR(PRE_CSC_C11_C12, CNVC_CFG, id), \ + SRI_ARR(PRE_CSC_C33_C34, CNVC_CFG, id), \ + SRI_ARR(PRE_CSC_B_C11_C12, CNVC_CFG, id), \ + SRI_ARR(PRE_CSC_B_C33_C34, CNVC_CFG, id), \ + SRI_ARR(CM_POST_CSC_CONTROL, CM, id), \ + SRI_ARR(CM_POST_CSC_C11_C12, CM, id), \ + SRI_ARR(CM_POST_CSC_C33_C34, CM, id), \ + SRI_ARR(CM_POST_CSC_B_C11_C12, CM, id), \ + SRI_ARR(CM_POST_CSC_B_C33_C34, CM, id), \ + SRI_ARR(CM_MEM_PWR_CTRL, CM, id), SRI_ARR(CM_CONTROL, CM, id), \ + SRI_ARR(FORMAT_CONTROL, CNVC_CFG, id), \ + SRI_ARR(CNVC_SURFACE_PIXEL_FORMAT, CNVC_CFG, id), \ + SRI_ARR(CURSOR0_CONTROL, CNVC_CUR, id), \ + SRI_ARR(CURSOR0_COLOR0, CNVC_CUR, id), \ + SRI_ARR(CURSOR0_COLOR1, CNVC_CUR, id), \ + SRI_ARR(CURSOR0_FP_SCALE_BIAS, CNVC_CUR, id), \ + SRI_ARR(DPP_CONTROL, DPP_TOP, id), SRI_ARR(CM_HDR_MULT_COEF, CM, id), \ + SRI_ARR(CURSOR_CONTROL, CURSOR0_, id), \ + SRI_ARR(ALPHA_2BIT_LUT, CNVC_CFG, id), \ + SRI_ARR(FCNV_FP_BIAS_R, CNVC_CFG, id), \ + SRI_ARR(FCNV_FP_BIAS_G, CNVC_CFG, id), \ + SRI_ARR(FCNV_FP_BIAS_B, CNVC_CFG, id), \ + SRI_ARR(FCNV_FP_SCALE_R, CNVC_CFG, id), \ + SRI_ARR(FCNV_FP_SCALE_G, CNVC_CFG, id), \ + SRI_ARR(FCNV_FP_SCALE_B, CNVC_CFG, id), \ + SRI_ARR(COLOR_KEYER_CONTROL, CNVC_CFG, id), \ + SRI_ARR(COLOR_KEYER_ALPHA, CNVC_CFG, id), \ + SRI_ARR(COLOR_KEYER_RED, CNVC_CFG, id), \ + SRI_ARR(COLOR_KEYER_GREEN, CNVC_CFG, id), \ + SRI_ARR(COLOR_KEYER_BLUE, CNVC_CFG, id), \ + SRI_ARR(CURSOR_CONTROL, CURSOR0_, id), \ + SRI_ARR(OBUF_MEM_PWR_CTRL, DSCL, id), \ + SRI_ARR(DSCL_MEM_PWR_STATUS, DSCL, id), \ + SRI_ARR(DSCL_MEM_PWR_CTRL, DSCL, id) + +/* OPP */ +#define OPP_REG_LIST_DCN_RI(id) \ + SRI_ARR(FMT_BIT_DEPTH_CONTROL, FMT, id), SRI_ARR(FMT_CONTROL, FMT, id), \ + SRI_ARR(FMT_DITHER_RAND_R_SEED, FMT, id), \ + SRI_ARR(FMT_DITHER_RAND_G_SEED, FMT, id), \ + SRI_ARR(FMT_DITHER_RAND_B_SEED, FMT, id), \ + SRI_ARR(FMT_CLAMP_CNTL, FMT, id), \ + SRI_ARR(FMT_DYNAMIC_EXP_CNTL, FMT, id), \ + SRI_ARR(FMT_MAP420_MEMORY_CONTROL, FMT, id), \ + SRI_ARR(OPPBUF_CONTROL, OPPBUF, id), \ + SRI_ARR(OPPBUF_3D_PARAMETERS_0, OPPBUF, id), \ + SRI_ARR(OPPBUF_3D_PARAMETERS_1, OPPBUF, id), \ + SRI_ARR(OPP_PIPE_CONTROL, OPP_PIPE, id) \ + +#define OPP_REG_LIST_DCN10_RI(id) OPP_REG_LIST_DCN_RI(id) + +#define OPP_DPG_REG_LIST_RI(id) \ + SRI_ARR(DPG_CONTROL, DPG, id), SRI_ARR(DPG_DIMENSIONS, DPG, id), \ + SRI_ARR(DPG_OFFSET_SEGMENT, DPG, id), SRI_ARR(DPG_COLOUR_B_CB, DPG, id), \ + SRI_ARR(DPG_COLOUR_G_Y, DPG, id), SRI_ARR(DPG_COLOUR_R_CR, DPG, id), \ + SRI_ARR(DPG_RAMP_CONTROL, DPG, id), SRI_ARR(DPG_STATUS, DPG, id) + +#define OPP_REG_LIST_DCN30_RI(id) \ + OPP_REG_LIST_DCN10_RI(id), OPP_DPG_REG_LIST_RI(id), \ + SRI_ARR(FMT_422_CONTROL, FMT, id) + +/* Aux engine regs */ +#define AUX_COMMON_REG_LIST0_RI(id) \ + SRI_ARR(AUX_CONTROL, DP_AUX, id), SRI_ARR(AUX_ARB_CONTROL, DP_AUX, id), \ + SRI_ARR(AUX_SW_DATA, DP_AUX, id), SRI_ARR(AUX_SW_CONTROL, DP_AUX, id), \ + SRI_ARR(AUX_INTERRUPT_CONTROL, DP_AUX, id), \ + SRI_ARR(AUX_DPHY_RX_CONTROL1, DP_AUX, id), \ + SRI_ARR(AUX_SW_STATUS, DP_AUX, id) + +/* DWBC */ +#define DWBC_COMMON_REG_LIST_DCN30_RI(id) \ + SR_ARR(DWB_ENABLE_CLK_CTRL, id), SR_ARR(DWB_MEM_PWR_CTRL, id), \ + SR_ARR(FC_MODE_CTRL, id), SR_ARR(FC_FLOW_CTRL, id), \ + SR_ARR(FC_WINDOW_START, id), SR_ARR(FC_WINDOW_SIZE, id), \ + SR_ARR(FC_SOURCE_SIZE, id), SR_ARR(DWB_UPDATE_CTRL, id), \ + SR_ARR(DWB_CRC_CTRL, id), SR_ARR(DWB_CRC_MASK_R_G, id), \ + SR_ARR(DWB_CRC_MASK_B_A, id), SR_ARR(DWB_CRC_VAL_R_G, id), \ + SR_ARR(DWB_CRC_VAL_B_A, id), SR_ARR(DWB_OUT_CTRL, id), \ + SR_ARR(DWB_MMHUBBUB_BACKPRESSURE_CNT_EN, id), \ + SR_ARR(DWB_MMHUBBUB_BACKPRESSURE_CNT, id), \ + SR_ARR(DWB_HOST_READ_CONTROL, id), SR_ARR(DWB_SOFT_RESET, id), \ + SR_ARR(DWB_HDR_MULT_COEF, id), SR_ARR(DWB_GAMUT_REMAP_MODE, id), \ + SR_ARR(DWB_GAMUT_REMAP_COEF_FORMAT, id), \ + SR_ARR(DWB_GAMUT_REMAPA_C11_C12, id), \ + SR_ARR(DWB_GAMUT_REMAPA_C13_C14, id), \ + SR_ARR(DWB_GAMUT_REMAPA_C21_C22, id), \ + SR_ARR(DWB_GAMUT_REMAPA_C23_C24, id), \ + SR_ARR(DWB_GAMUT_REMAPA_C31_C32, id), \ + SR_ARR(DWB_GAMUT_REMAPA_C33_C34, id), \ + SR_ARR(DWB_GAMUT_REMAPB_C11_C12, id), \ + SR_ARR(DWB_GAMUT_REMAPB_C13_C14, id), \ + SR_ARR(DWB_GAMUT_REMAPB_C21_C22, id), \ + SR_ARR(DWB_GAMUT_REMAPB_C23_C24, id), \ + SR_ARR(DWB_GAMUT_REMAPB_C31_C32, id), \ + SR_ARR(DWB_GAMUT_REMAPB_C33_C34, id), SR_ARR(DWB_OGAM_CONTROL, id), \ + SR_ARR(DWB_OGAM_LUT_INDEX, id), SR_ARR(DWB_OGAM_LUT_DATA, id), \ + SR_ARR(DWB_OGAM_LUT_CONTROL, id), \ + SR_ARR(DWB_OGAM_RAMA_START_CNTL_B, id), \ + SR_ARR(DWB_OGAM_RAMA_START_CNTL_G, id), \ + SR_ARR(DWB_OGAM_RAMA_START_CNTL_R, id), \ + SR_ARR(DWB_OGAM_RAMA_START_BASE_CNTL_B, id), \ + SR_ARR(DWB_OGAM_RAMA_START_SLOPE_CNTL_B, id), \ + SR_ARR(DWB_OGAM_RAMA_START_BASE_CNTL_G, id), \ + SR_ARR(DWB_OGAM_RAMA_START_SLOPE_CNTL_G, id), \ + SR_ARR(DWB_OGAM_RAMA_START_BASE_CNTL_R, id), \ + SR_ARR(DWB_OGAM_RAMA_START_SLOPE_CNTL_R, id), \ + SR_ARR(DWB_OGAM_RAMA_END_CNTL1_B, id), \ + SR_ARR(DWB_OGAM_RAMA_END_CNTL2_B, id), \ + SR_ARR(DWB_OGAM_RAMA_END_CNTL1_G, id), \ + SR_ARR(DWB_OGAM_RAMA_END_CNTL2_G, id), \ + SR_ARR(DWB_OGAM_RAMA_END_CNTL1_R, id), \ + SR_ARR(DWB_OGAM_RAMA_END_CNTL2_R, id), \ + SR_ARR(DWB_OGAM_RAMA_OFFSET_B, id), SR_ARR(DWB_OGAM_RAMA_OFFSET_G, id), \ + SR_ARR(DWB_OGAM_RAMA_OFFSET_R, id), \ + SR_ARR(DWB_OGAM_RAMA_REGION_0_1, id), \ + SR_ARR(DWB_OGAM_RAMA_REGION_2_3, id), \ + SR_ARR(DWB_OGAM_RAMA_REGION_4_5, id), \ + SR_ARR(DWB_OGAM_RAMA_REGION_6_7, id), \ + SR_ARR(DWB_OGAM_RAMA_REGION_8_9, id), \ + SR_ARR(DWB_OGAM_RAMA_REGION_10_11, id), \ + SR_ARR(DWB_OGAM_RAMA_REGION_12_13, id), \ + SR_ARR(DWB_OGAM_RAMA_REGION_14_15, id), \ + SR_ARR(DWB_OGAM_RAMA_REGION_16_17, id), \ + SR_ARR(DWB_OGAM_RAMA_REGION_18_19, id), \ + SR_ARR(DWB_OGAM_RAMA_REGION_20_21, id), \ + SR_ARR(DWB_OGAM_RAMA_REGION_22_23, id), \ + SR_ARR(DWB_OGAM_RAMA_REGION_24_25, id), \ + SR_ARR(DWB_OGAM_RAMA_REGION_26_27, id), \ + SR_ARR(DWB_OGAM_RAMA_REGION_28_29, id), \ + SR_ARR(DWB_OGAM_RAMA_REGION_30_31, id), \ + SR_ARR(DWB_OGAM_RAMA_REGION_32_33, id), \ + SR_ARR(DWB_OGAM_RAMB_START_CNTL_B, id), \ + SR_ARR(DWB_OGAM_RAMB_START_CNTL_G, id), \ + SR_ARR(DWB_OGAM_RAMB_START_CNTL_R, id), \ + SR_ARR(DWB_OGAM_RAMB_START_BASE_CNTL_B, id), \ + SR_ARR(DWB_OGAM_RAMB_START_SLOPE_CNTL_B, id), \ + SR_ARR(DWB_OGAM_RAMB_START_BASE_CNTL_G, id), \ + SR_ARR(DWB_OGAM_RAMB_START_SLOPE_CNTL_G, id), \ + SR_ARR(DWB_OGAM_RAMB_START_BASE_CNTL_R, id), \ + SR_ARR(DWB_OGAM_RAMB_START_SLOPE_CNTL_R, id), \ + SR_ARR(DWB_OGAM_RAMB_END_CNTL1_B, id), \ + SR_ARR(DWB_OGAM_RAMB_END_CNTL2_B, id), \ + SR_ARR(DWB_OGAM_RAMB_END_CNTL1_G, id), \ + SR_ARR(DWB_OGAM_RAMB_END_CNTL2_G, id), \ + SR_ARR(DWB_OGAM_RAMB_END_CNTL1_R, id), \ + SR_ARR(DWB_OGAM_RAMB_END_CNTL2_R, id), \ + SR_ARR(DWB_OGAM_RAMB_OFFSET_B, id), SR_ARR(DWB_OGAM_RAMB_OFFSET_G, id), \ + SR_ARR(DWB_OGAM_RAMB_OFFSET_R, id), \ + SR_ARR(DWB_OGAM_RAMB_REGION_0_1, id), \ + SR_ARR(DWB_OGAM_RAMB_REGION_2_3, id), \ + SR_ARR(DWB_OGAM_RAMB_REGION_4_5, id), \ + SR_ARR(DWB_OGAM_RAMB_REGION_6_7, id), \ + SR_ARR(DWB_OGAM_RAMB_REGION_8_9, id), \ + SR_ARR(DWB_OGAM_RAMB_REGION_10_11, id), \ + SR_ARR(DWB_OGAM_RAMB_REGION_12_13, id), \ + SR_ARR(DWB_OGAM_RAMB_REGION_14_15, id), \ + SR_ARR(DWB_OGAM_RAMB_REGION_16_17, id), \ + SR_ARR(DWB_OGAM_RAMB_REGION_18_19, id), \ + SR_ARR(DWB_OGAM_RAMB_REGION_20_21, id), \ + SR_ARR(DWB_OGAM_RAMB_REGION_22_23, id), \ + SR_ARR(DWB_OGAM_RAMB_REGION_24_25, id), \ + SR_ARR(DWB_OGAM_RAMB_REGION_26_27, id), \ + SR_ARR(DWB_OGAM_RAMB_REGION_28_29, id), \ + SR_ARR(DWB_OGAM_RAMB_REGION_30_31, id), \ + SR_ARR(DWB_OGAM_RAMB_REGION_32_33, id) + +/* MCIF */ + +#define MCIF_WB_COMMON_REG_LIST_DCN32_RI(inst) \ + SRI2_ARR(MCIF_WB_BUFMGR_SW_CONTROL, MCIF_WB, inst), \ + SRI2_ARR(MCIF_WB_BUFMGR_STATUS, MCIF_WB, inst), \ + SRI2_ARR(MCIF_WB_BUF_PITCH, MCIF_WB, inst), \ + SRI2_ARR(MCIF_WB_BUF_1_STATUS, MCIF_WB, inst), \ + SRI2_ARR(MCIF_WB_BUF_1_STATUS2, MCIF_WB, inst), \ + SRI2_ARR(MCIF_WB_BUF_2_STATUS, MCIF_WB, inst), \ + SRI2_ARR(MCIF_WB_BUF_2_STATUS2, MCIF_WB, inst), \ + SRI2_ARR(MCIF_WB_BUF_3_STATUS, MCIF_WB, inst), \ + SRI2_ARR(MCIF_WB_BUF_3_STATUS2, MCIF_WB, inst), \ + SRI2_ARR(MCIF_WB_BUF_4_STATUS, MCIF_WB, inst), \ + SRI2_ARR(MCIF_WB_BUF_4_STATUS2, MCIF_WB, inst), \ + SRI2_ARR(MCIF_WB_ARBITRATION_CONTROL, MCIF_WB, inst), \ + SRI2_ARR(MCIF_WB_SCLK_CHANGE, MCIF_WB, inst), \ + SRI2_ARR(MCIF_WB_BUF_1_ADDR_Y, MCIF_WB, inst), \ + SRI2_ARR(MCIF_WB_BUF_1_ADDR_C, MCIF_WB, inst), \ + SRI2_ARR(MCIF_WB_BUF_2_ADDR_Y, MCIF_WB, inst), \ + SRI2_ARR(MCIF_WB_BUF_2_ADDR_C, MCIF_WB, inst), \ + SRI2_ARR(MCIF_WB_BUF_3_ADDR_Y, MCIF_WB, inst), \ + SRI2_ARR(MCIF_WB_BUF_3_ADDR_C, MCIF_WB, inst), \ + SRI2_ARR(MCIF_WB_BUF_4_ADDR_Y, MCIF_WB, inst), \ + SRI2_ARR(MCIF_WB_BUF_4_ADDR_C, MCIF_WB, inst), \ + SRI2_ARR(MCIF_WB_BUFMGR_VCE_CONTROL, MCIF_WB, inst), \ + SRI2_ARR(MCIF_WB_NB_PSTATE_LATENCY_WATERMARK, MMHUBBUB, inst), \ + SRI2_ARR(MCIF_WB_NB_PSTATE_CONTROL, MCIF_WB, inst), \ + SRI2_ARR(MCIF_WB_WATERMARK, MMHUBBUB, inst), \ + SRI2_ARR(MCIF_WB_CLOCK_GATER_CONTROL, MCIF_WB, inst), \ + SRI2_ARR(MCIF_WB_SELF_REFRESH_CONTROL, MCIF_WB, inst), \ + SRI2_ARR(MULTI_LEVEL_QOS_CTRL, MCIF_WB, inst), \ + SRI2_ARR(MCIF_WB_SECURITY_LEVEL, MCIF_WB, inst), \ + SRI2_ARR(MCIF_WB_BUF_LUMA_SIZE, MCIF_WB, inst), \ + SRI2_ARR(MCIF_WB_BUF_CHROMA_SIZE, MCIF_WB, inst), \ + SRI2_ARR(MCIF_WB_BUF_1_ADDR_Y_HIGH, MCIF_WB, inst), \ + SRI2_ARR(MCIF_WB_BUF_1_ADDR_C_HIGH, MCIF_WB, inst), \ + SRI2_ARR(MCIF_WB_BUF_2_ADDR_Y_HIGH, MCIF_WB, inst), \ + SRI2_ARR(MCIF_WB_BUF_2_ADDR_C_HIGH, MCIF_WB, inst), \ + SRI2_ARR(MCIF_WB_BUF_3_ADDR_Y_HIGH, MCIF_WB, inst), \ + SRI2_ARR(MCIF_WB_BUF_3_ADDR_C_HIGH, MCIF_WB, inst), \ + SRI2_ARR(MCIF_WB_BUF_4_ADDR_Y_HIGH, MCIF_WB, inst), \ + SRI2_ARR(MCIF_WB_BUF_4_ADDR_C_HIGH, MCIF_WB, inst), \ + SRI2_ARR(MCIF_WB_BUF_1_RESOLUTION, MCIF_WB, inst), \ + SRI2_ARR(MCIF_WB_BUF_2_RESOLUTION, MCIF_WB, inst), \ + SRI2_ARR(MCIF_WB_BUF_3_RESOLUTION, MCIF_WB, inst), \ + SRI2_ARR(MCIF_WB_BUF_4_RESOLUTION, MCIF_WB, inst), \ + SRI2_ARR(MMHUBBUB_MEM_PWR_CNTL, MMHUBBUB, inst), \ + SRI2_ARR(MMHUBBUB_WARMUP_ADDR_REGION, MMHUBBUB, inst), \ + SRI2_ARR(MMHUBBUB_WARMUP_BASE_ADDR_HIGH, MMHUBBUB, inst), \ + SRI2_ARR(MMHUBBUB_WARMUP_BASE_ADDR_LOW, MMHUBBUB, inst), \ + SRI2_ARR(MMHUBBUB_WARMUP_CONTROL_STATUS, MMHUBBUB, inst) + +/* DSC */ + +#define DSC_REG_LIST_DCN20_RI(id) \ + SRI_ARR(DSC_TOP_CONTROL, DSC_TOP, id), \ + SRI_ARR(DSC_DEBUG_CONTROL, DSC_TOP, id), \ + SRI_ARR(DSCC_CONFIG0, DSCC, id), SRI_ARR(DSCC_CONFIG1, DSCC, id), \ + SRI_ARR(DSCC_STATUS, DSCC, id), \ + SRI_ARR(DSCC_INTERRUPT_CONTROL_STATUS, DSCC, id), \ + SRI_ARR(DSCC_PPS_CONFIG0, DSCC, id), \ + SRI_ARR(DSCC_PPS_CONFIG1, DSCC, id), \ + SRI_ARR(DSCC_PPS_CONFIG2, DSCC, id), \ + SRI_ARR(DSCC_PPS_CONFIG3, DSCC, id), \ + SRI_ARR(DSCC_PPS_CONFIG4, DSCC, id), \ + SRI_ARR(DSCC_PPS_CONFIG5, DSCC, id), \ + SRI_ARR(DSCC_PPS_CONFIG6, DSCC, id), \ + SRI_ARR(DSCC_PPS_CONFIG7, DSCC, id), \ + SRI_ARR(DSCC_PPS_CONFIG8, DSCC, id), \ + SRI_ARR(DSCC_PPS_CONFIG9, DSCC, id), \ + SRI_ARR(DSCC_PPS_CONFIG10, DSCC, id), \ + SRI_ARR(DSCC_PPS_CONFIG11, DSCC, id), \ + SRI_ARR(DSCC_PPS_CONFIG12, DSCC, id), \ + SRI_ARR(DSCC_PPS_CONFIG13, DSCC, id), \ + SRI_ARR(DSCC_PPS_CONFIG14, DSCC, id), \ + SRI_ARR(DSCC_PPS_CONFIG15, DSCC, id), \ + SRI_ARR(DSCC_PPS_CONFIG16, DSCC, id), \ + SRI_ARR(DSCC_PPS_CONFIG17, DSCC, id), \ + SRI_ARR(DSCC_PPS_CONFIG18, DSCC, id), \ + SRI_ARR(DSCC_PPS_CONFIG19, DSCC, id), \ + SRI_ARR(DSCC_PPS_CONFIG20, DSCC, id), \ + SRI_ARR(DSCC_PPS_CONFIG21, DSCC, id), \ + SRI_ARR(DSCC_PPS_CONFIG22, DSCC, id), \ + SRI_ARR(DSCC_MEM_POWER_CONTROL, DSCC, id), \ + SRI_ARR(DSCC_R_Y_SQUARED_ERROR_LOWER, DSCC, id), \ + SRI_ARR(DSCC_R_Y_SQUARED_ERROR_UPPER, DSCC, id), \ + SRI_ARR(DSCC_G_CB_SQUARED_ERROR_LOWER, DSCC, id), \ + SRI_ARR(DSCC_G_CB_SQUARED_ERROR_UPPER, DSCC, id), \ + SRI_ARR(DSCC_B_CR_SQUARED_ERROR_LOWER, DSCC, id), \ + SRI_ARR(DSCC_B_CR_SQUARED_ERROR_UPPER, DSCC, id), \ + SRI_ARR(DSCC_MAX_ABS_ERROR0, DSCC, id), \ + SRI_ARR(DSCC_MAX_ABS_ERROR1, DSCC, id), \ + SRI_ARR(DSCC_RATE_BUFFER0_MAX_FULLNESS_LEVEL, DSCC, id), \ + SRI_ARR(DSCC_RATE_BUFFER1_MAX_FULLNESS_LEVEL, DSCC, id), \ + SRI_ARR(DSCC_RATE_BUFFER2_MAX_FULLNESS_LEVEL, DSCC, id), \ + SRI_ARR(DSCC_RATE_BUFFER3_MAX_FULLNESS_LEVEL, DSCC, id), \ + SRI_ARR(DSCC_RATE_CONTROL_BUFFER0_MAX_FULLNESS_LEVEL, DSCC, id), \ + SRI_ARR(DSCC_RATE_CONTROL_BUFFER1_MAX_FULLNESS_LEVEL, DSCC, id), \ + SRI_ARR(DSCC_RATE_CONTROL_BUFFER2_MAX_FULLNESS_LEVEL, DSCC, id), \ + SRI_ARR(DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL, DSCC, id), \ + SRI_ARR(DSCCIF_CONFIG0, DSCCIF, id), \ + SRI_ARR(DSCCIF_CONFIG1, DSCCIF, id), \ + SRI_ARR(DSCRM_DSC_FORWARD_CONFIG, DSCRM, id) + +/* MPC */ + +#define MPC_DWB_MUX_REG_LIST_DCN3_0_RI(inst) \ + SRII_DWB(DWB_MUX, MUX, MPC_DWB, inst) + +#define MPC_OUT_MUX_COMMON_REG_LIST_DCN1_0_RI(inst) \ + SRII(MUX, MPC_OUT, inst), VUPDATE_SRII(CUR, VUPDATE_LOCK_SET, inst) + +#define MPC_OUT_MUX_REG_LIST_DCN3_0_RI(inst) \ + MPC_OUT_MUX_COMMON_REG_LIST_DCN1_0_RI(inst), SRII(CSC_MODE, MPC_OUT, inst), \ + SRII(CSC_C11_C12_A, MPC_OUT, inst), SRII(CSC_C33_C34_A, MPC_OUT, inst), \ + SRII(CSC_C11_C12_B, MPC_OUT, inst), SRII(CSC_C33_C34_B, MPC_OUT, inst), \ + SRII(DENORM_CONTROL, MPC_OUT, inst), \ + SRII(DENORM_CLAMP_G_Y, MPC_OUT, inst), \ + SRII(DENORM_CLAMP_B_CB, MPC_OUT, inst), SR(MPC_OUT_CSC_COEF_FORMAT) + +#define MPC_COMMON_REG_LIST_DCN1_0_RI(inst) \ + SRII(MPCC_TOP_SEL, MPCC, inst), SRII(MPCC_BOT_SEL, MPCC, inst), \ + SRII(MPCC_CONTROL, MPCC, inst), SRII(MPCC_STATUS, MPCC, inst), \ + SRII(MPCC_OPP_ID, MPCC, inst), SRII(MPCC_BG_G_Y, MPCC, inst), \ + SRII(MPCC_BG_R_CR, MPCC, inst), SRII(MPCC_BG_B_CB, MPCC, inst), \ + SRII(MPCC_SM_CONTROL, MPCC, inst), \ + SRII(MPCC_UPDATE_LOCK_SEL, MPCC, inst) + +#define MPC_REG_LIST_DCN3_0_RI(inst) \ + MPC_COMMON_REG_LIST_DCN1_0_RI(inst), SRII(MPCC_TOP_GAIN, MPCC, inst), \ + SRII(MPCC_BOT_GAIN_INSIDE, MPCC, inst), \ + SRII(MPCC_BOT_GAIN_OUTSIDE, MPCC, inst), \ + SRII(MPCC_MEM_PWR_CTRL, MPCC, inst), \ + SRII(MPCC_OGAM_LUT_INDEX, MPCC_OGAM, inst), \ + SRII(MPCC_OGAM_LUT_DATA, MPCC_OGAM, inst), \ + SRII(MPCC_GAMUT_REMAP_COEF_FORMAT, MPCC_OGAM, inst), \ + SRII(MPCC_GAMUT_REMAP_MODE, MPCC_OGAM, inst), \ + SRII(MPC_GAMUT_REMAP_C11_C12_A, MPCC_OGAM, inst), \ + SRII(MPC_GAMUT_REMAP_C33_C34_A, MPCC_OGAM, inst), \ + SRII(MPC_GAMUT_REMAP_C11_C12_B, MPCC_OGAM, inst), \ + SRII(MPC_GAMUT_REMAP_C33_C34_B, MPCC_OGAM, inst), \ + SRII(MPCC_OGAM_RAMA_START_CNTL_B, MPCC_OGAM, inst), \ + SRII(MPCC_OGAM_RAMA_START_CNTL_G, MPCC_OGAM, inst), \ + SRII(MPCC_OGAM_RAMA_START_CNTL_R, MPCC_OGAM, inst), \ + SRII(MPCC_OGAM_RAMA_START_SLOPE_CNTL_B, MPCC_OGAM, inst), \ + SRII(MPCC_OGAM_RAMA_START_SLOPE_CNTL_G, MPCC_OGAM, inst), \ + SRII(MPCC_OGAM_RAMA_START_SLOPE_CNTL_R, MPCC_OGAM, inst), \ + SRII(MPCC_OGAM_RAMA_END_CNTL1_B, MPCC_OGAM, inst), \ + SRII(MPCC_OGAM_RAMA_END_CNTL2_B, MPCC_OGAM, inst), \ + SRII(MPCC_OGAM_RAMA_END_CNTL1_G, MPCC_OGAM, inst), \ + SRII(MPCC_OGAM_RAMA_END_CNTL2_G, MPCC_OGAM, inst), \ + SRII(MPCC_OGAM_RAMA_END_CNTL1_R, MPCC_OGAM, inst), \ + SRII(MPCC_OGAM_RAMA_END_CNTL2_R, MPCC_OGAM, inst), \ + SRII(MPCC_OGAM_RAMA_REGION_0_1, MPCC_OGAM, inst), \ + SRII(MPCC_OGAM_RAMA_REGION_32_33, MPCC_OGAM, inst), \ + SRII(MPCC_OGAM_RAMA_OFFSET_B, MPCC_OGAM, inst), \ + SRII(MPCC_OGAM_RAMA_OFFSET_G, MPCC_OGAM, inst), \ + SRII(MPCC_OGAM_RAMA_OFFSET_R, MPCC_OGAM, inst), \ + SRII(MPCC_OGAM_RAMA_START_BASE_CNTL_B, MPCC_OGAM, inst), \ + SRII(MPCC_OGAM_RAMA_START_BASE_CNTL_G, MPCC_OGAM, inst), \ + SRII(MPCC_OGAM_RAMA_START_BASE_CNTL_R, MPCC_OGAM, inst), \ + SRII(MPCC_OGAM_RAMB_START_CNTL_B, MPCC_OGAM, inst), \ + SRII(MPCC_OGAM_RAMB_START_CNTL_G, MPCC_OGAM, inst), \ + SRII(MPCC_OGAM_RAMB_START_CNTL_R, MPCC_OGAM, inst), \ + SRII(MPCC_OGAM_RAMB_START_SLOPE_CNTL_B, MPCC_OGAM, inst), \ + SRII(MPCC_OGAM_RAMB_START_SLOPE_CNTL_G, MPCC_OGAM, inst), \ + SRII(MPCC_OGAM_RAMB_START_SLOPE_CNTL_R, MPCC_OGAM, inst), \ + SRII(MPCC_OGAM_RAMB_END_CNTL1_B, MPCC_OGAM, inst), \ + SRII(MPCC_OGAM_RAMB_END_CNTL2_B, MPCC_OGAM, inst), \ + SRII(MPCC_OGAM_RAMB_END_CNTL1_G, MPCC_OGAM, inst), \ + SRII(MPCC_OGAM_RAMB_END_CNTL2_G, MPCC_OGAM, inst), \ + SRII(MPCC_OGAM_RAMB_END_CNTL1_R, MPCC_OGAM, inst), \ + SRII(MPCC_OGAM_RAMB_END_CNTL2_R, MPCC_OGAM, inst), \ + SRII(MPCC_OGAM_RAMB_REGION_0_1, MPCC_OGAM, inst), \ + SRII(MPCC_OGAM_RAMB_REGION_32_33, MPCC_OGAM, inst), \ + SRII(MPCC_OGAM_RAMB_OFFSET_B, MPCC_OGAM, inst), \ + SRII(MPCC_OGAM_RAMB_OFFSET_G, MPCC_OGAM, inst), \ + SRII(MPCC_OGAM_RAMB_OFFSET_R, MPCC_OGAM, inst), \ + SRII(MPCC_OGAM_RAMB_START_BASE_CNTL_B, MPCC_OGAM, inst), \ + SRII(MPCC_OGAM_RAMB_START_BASE_CNTL_G, MPCC_OGAM, inst), \ + SRII(MPCC_OGAM_RAMB_START_BASE_CNTL_R, MPCC_OGAM, inst), \ + SRII(MPCC_OGAM_CONTROL, MPCC_OGAM, inst), \ + SRII(MPCC_OGAM_LUT_CONTROL, MPCC_OGAM, inst) + +#define MPC_REG_LIST_DCN3_2_RI(inst) \ + MPC_REG_LIST_DCN3_0_RI(inst),\ + SRII(MPCC_MOVABLE_CM_LOCATION_CONTROL, MPCC, inst),\ + SRII(MPCC_MCM_SHAPER_CONTROL, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_OFFSET_R, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_OFFSET_G, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_OFFSET_B, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_SCALE_R, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_SCALE_G_B, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_LUT_INDEX, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_LUT_DATA, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_LUT_WRITE_EN_MASK, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMA_START_CNTL_B, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMA_START_CNTL_G, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMA_START_CNTL_R, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMA_END_CNTL_B, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMA_END_CNTL_G, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMA_END_CNTL_R, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMA_REGION_0_1, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMA_REGION_2_3, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMA_REGION_4_5, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMA_REGION_6_7, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMA_REGION_8_9, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMA_REGION_10_11, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMA_REGION_12_13, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMA_REGION_14_15, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMA_REGION_16_17, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMA_REGION_18_19, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMA_REGION_20_21, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMA_REGION_22_23, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMA_REGION_24_25, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMA_REGION_26_27, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMA_REGION_28_29, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMA_REGION_30_31, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMA_REGION_32_33, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMB_START_CNTL_B, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMB_START_CNTL_G, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMB_START_CNTL_R, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMB_END_CNTL_B, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMB_END_CNTL_G, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMB_END_CNTL_R, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMB_REGION_0_1, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMB_REGION_2_3, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMB_REGION_4_5, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMB_REGION_6_7, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMB_REGION_8_9, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMB_REGION_10_11, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMB_REGION_12_13, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMB_REGION_14_15, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMB_REGION_16_17, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMB_REGION_18_19, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMB_REGION_20_21, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMB_REGION_22_23, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMB_REGION_24_25, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMB_REGION_26_27, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMB_REGION_28_29, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMB_REGION_30_31, MPCC_MCM, inst),\ + SRII(MPCC_MCM_SHAPER_RAMB_REGION_32_33, MPCC_MCM, inst),\ + SRII(MPCC_MCM_3DLUT_MODE, MPCC_MCM, inst), /*TODO: may need to add other 3DLUT regs*/\ + SRII(MPCC_MCM_3DLUT_INDEX, MPCC_MCM, inst),\ + SRII(MPCC_MCM_3DLUT_DATA, MPCC_MCM, inst),\ + SRII(MPCC_MCM_3DLUT_DATA_30BIT, MPCC_MCM, inst),\ + SRII(MPCC_MCM_3DLUT_READ_WRITE_CONTROL, MPCC_MCM, inst),\ + SRII(MPCC_MCM_3DLUT_OUT_NORM_FACTOR, MPCC_MCM, inst),\ + SRII(MPCC_MCM_3DLUT_OUT_OFFSET_R, MPCC_MCM, inst),\ + SRII(MPCC_MCM_3DLUT_OUT_OFFSET_G, MPCC_MCM, inst),\ + SRII(MPCC_MCM_3DLUT_OUT_OFFSET_B, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_CONTROL, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_LUT_INDEX, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_LUT_DATA, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_LUT_CONTROL, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_START_CNTL_B, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_START_CNTL_G, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_START_CNTL_R, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_START_SLOPE_CNTL_B, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_START_SLOPE_CNTL_G, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_START_SLOPE_CNTL_R, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_START_BASE_CNTL_B, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_START_BASE_CNTL_G, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_START_BASE_CNTL_R, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_END_CNTL1_B, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_END_CNTL2_B, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_END_CNTL1_G, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_END_CNTL2_G, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_END_CNTL1_R, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_END_CNTL2_R, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_OFFSET_B, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_OFFSET_G, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_OFFSET_R, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_REGION_0_1, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_REGION_2_3, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_REGION_4_5, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_REGION_6_7, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_REGION_8_9, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_REGION_10_11, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_REGION_12_13, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_REGION_14_15, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_REGION_16_17, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_REGION_18_19, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_REGION_20_21, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_REGION_22_23, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_REGION_24_25, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_REGION_26_27, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_REGION_28_29, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_REGION_30_31, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMA_REGION_32_33, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_START_CNTL_B, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_START_CNTL_G, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_START_CNTL_R, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_START_SLOPE_CNTL_B, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_START_SLOPE_CNTL_G, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_START_SLOPE_CNTL_R, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_START_BASE_CNTL_B, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_START_BASE_CNTL_G, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_START_BASE_CNTL_R, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_END_CNTL1_B, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_END_CNTL2_B, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_END_CNTL1_G, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_END_CNTL2_G, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_END_CNTL1_R, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_END_CNTL2_R, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_OFFSET_B, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_OFFSET_G, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_OFFSET_R, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_REGION_0_1, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_REGION_2_3, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_REGION_4_5, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_REGION_6_7, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_REGION_8_9, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_REGION_10_11, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_REGION_12_13, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_REGION_14_15, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_REGION_16_17, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_REGION_18_19, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_REGION_20_21, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_REGION_22_23, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_REGION_24_25, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_REGION_26_27, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_REGION_28_29, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_REGION_30_31, MPCC_MCM, inst),\ + SRII(MPCC_MCM_1DLUT_RAMB_REGION_32_33, MPCC_MCM, inst),\ + SRII(MPCC_MCM_MEM_PWR_CTRL, MPCC_MCM, inst) +/* OPTC */ + +#define OPTC_COMMON_REG_LIST_DCN3_2_RI(inst) \ + SRI_ARR(OTG_VSTARTUP_PARAM, OTG, inst), \ + SRI_ARR(OTG_VUPDATE_PARAM, OTG, inst), \ + SRI_ARR(OTG_VREADY_PARAM, OTG, inst), \ + SRI_ARR(OTG_MASTER_UPDATE_LOCK, OTG, inst), \ + SRI_ARR(OTG_GLOBAL_CONTROL0, OTG, inst), \ + SRI_ARR(OTG_GLOBAL_CONTROL1, OTG, inst), \ + SRI_ARR(OTG_GLOBAL_CONTROL2, OTG, inst), \ + SRI_ARR(OTG_GLOBAL_CONTROL4, OTG, inst), \ + SRI_ARR(OTG_DOUBLE_BUFFER_CONTROL, OTG, inst), \ + SRI_ARR(OTG_H_TOTAL, OTG, inst), \ + SRI_ARR(OTG_H_BLANK_START_END, OTG, inst), \ + SRI_ARR(OTG_H_SYNC_A, OTG, inst), SRI_ARR(OTG_H_SYNC_A_CNTL, OTG, inst), \ + SRI_ARR(OTG_H_TIMING_CNTL, OTG, inst), SRI_ARR(OTG_V_TOTAL, OTG, inst), \ + SRI_ARR(OTG_V_BLANK_START_END, OTG, inst), \ + SRI_ARR(OTG_V_SYNC_A, OTG, inst), SRI_ARR(OTG_V_SYNC_A_CNTL, OTG, inst), \ + SRI_ARR(OTG_CONTROL, OTG, inst), SRI_ARR(OTG_STEREO_CONTROL, OTG, inst), \ + SRI_ARR(OTG_3D_STRUCTURE_CONTROL, OTG, inst), \ + SRI_ARR(OTG_STEREO_STATUS, OTG, inst), \ + SRI_ARR(OTG_V_TOTAL_MAX, OTG, inst), \ + SRI_ARR(OTG_V_TOTAL_MIN, OTG, inst), \ + SRI_ARR(OTG_V_TOTAL_CONTROL, OTG, inst), \ + SRI_ARR(OTG_TRIGA_CNTL, OTG, inst), \ + SRI_ARR(OTG_FORCE_COUNT_NOW_CNTL, OTG, inst), \ + SRI_ARR(OTG_STATIC_SCREEN_CONTROL, OTG, inst), \ + SRI_ARR(OTG_STATUS_FRAME_COUNT, OTG, inst), \ + SRI_ARR(OTG_STATUS, OTG, inst), SRI_ARR(OTG_STATUS_POSITION, OTG, inst), \ + SRI_ARR(OTG_NOM_VERT_POSITION, OTG, inst), \ + SRI_ARR(OTG_M_CONST_DTO0, OTG, inst), \ + SRI_ARR(OTG_M_CONST_DTO1, OTG, inst), \ + SRI_ARR(OTG_CLOCK_CONTROL, OTG, inst), \ + SRI_ARR(OTG_VERTICAL_INTERRUPT0_CONTROL, OTG, inst), \ + SRI_ARR(OTG_VERTICAL_INTERRUPT0_POSITION, OTG, inst), \ + SRI_ARR(OTG_VERTICAL_INTERRUPT1_CONTROL, OTG, inst), \ + SRI_ARR(OTG_VERTICAL_INTERRUPT1_POSITION, OTG, inst), \ + SRI_ARR(OTG_VERTICAL_INTERRUPT2_CONTROL, OTG, inst), \ + SRI_ARR(OTG_VERTICAL_INTERRUPT2_POSITION, OTG, inst), \ + SRI_ARR(OPTC_INPUT_CLOCK_CONTROL, ODM, inst), \ + SRI_ARR(OPTC_DATA_SOURCE_SELECT, ODM, inst), \ + SRI_ARR(OPTC_INPUT_GLOBAL_CONTROL, ODM, inst), \ + SRI_ARR(CONTROL, VTG, inst), SRI_ARR(OTG_VERT_SYNC_CONTROL, OTG, inst), \ + SRI_ARR(OTG_GSL_CONTROL, OTG, inst), SRI_ARR(OTG_CRC_CNTL, OTG, inst), \ + SRI_ARR(OTG_CRC0_DATA_RG, OTG, inst), \ + SRI_ARR(OTG_CRC0_DATA_B, OTG, inst), \ + SRI_ARR(OTG_CRC0_WINDOWA_X_CONTROL, OTG, inst), \ + SRI_ARR(OTG_CRC0_WINDOWA_Y_CONTROL, OTG, inst), \ + SRI_ARR(OTG_CRC0_WINDOWB_X_CONTROL, OTG, inst), \ + SRI_ARR(OTG_CRC0_WINDOWB_Y_CONTROL, OTG, inst), \ + SR_ARR(GSL_SOURCE_SELECT, inst), \ + SRI_ARR(OTG_TRIGA_MANUAL_TRIG, OTG, inst), \ + SRI_ARR(OTG_GLOBAL_CONTROL1, OTG, inst), \ + SRI_ARR(OTG_GLOBAL_CONTROL2, OTG, inst), \ + SRI_ARR(OTG_GSL_WINDOW_X, OTG, inst), \ + SRI_ARR(OTG_GSL_WINDOW_Y, OTG, inst), \ + SRI_ARR(OTG_VUPDATE_KEEPOUT, OTG, inst), \ + SRI_ARR(OTG_DSC_START_POSITION, OTG, inst), \ + SRI_ARR(OTG_DRR_TRIGGER_WINDOW, OTG, inst), \ + SRI_ARR(OTG_DRR_V_TOTAL_CHANGE, OTG, inst), \ + SRI_ARR(OPTC_DATA_FORMAT_CONTROL, ODM, inst), \ + SRI_ARR(OPTC_BYTES_PER_PIXEL, ODM, inst), \ + SRI_ARR(OPTC_WIDTH_CONTROL, ODM, inst), \ + SRI_ARR(OPTC_MEMORY_CONFIG, ODM, inst), \ + SRI_ARR(OTG_DRR_CONTROL, OTG, inst) + +/* HUBP */ + +#define HUBP_REG_LIST_DCN_VM_RI(id) \ + SRI_ARR(NOM_PARAMETERS_0, HUBPREQ, id), \ + SRI_ARR(NOM_PARAMETERS_1, HUBPREQ, id), \ + SRI_ARR(NOM_PARAMETERS_2, HUBPREQ, id), \ + SRI_ARR(NOM_PARAMETERS_3, HUBPREQ, id), \ + SRI_ARR(DCN_VM_MX_L1_TLB_CNTL, HUBPREQ, id) +#define HUBP_REG_LIST_DCN_RI(id) \ + SRI_ARR(DCHUBP_CNTL, HUBP, id), SRI_ARR(HUBPREQ_DEBUG_DB, HUBP, id), \ + SRI_ARR(HUBPREQ_DEBUG, HUBP, id), SRI_ARR(DCSURF_ADDR_CONFIG, HUBP, id), \ + SRI_ARR(DCSURF_TILING_CONFIG, HUBP, id), \ + SRI_ARR(DCSURF_SURFACE_PITCH, HUBPREQ, id), \ + SRI_ARR(DCSURF_SURFACE_PITCH_C, HUBPREQ, id), \ + SRI_ARR(DCSURF_SURFACE_CONFIG, HUBP, id), \ + SRI_ARR(DCSURF_FLIP_CONTROL, HUBPREQ, id), \ + SRI_ARR(DCSURF_PRI_VIEWPORT_DIMENSION, HUBP, id), \ + SRI_ARR(DCSURF_PRI_VIEWPORT_START, HUBP, id), \ + SRI_ARR(DCSURF_SEC_VIEWPORT_DIMENSION, HUBP, id), \ + SRI_ARR(DCSURF_SEC_VIEWPORT_START, HUBP, id), \ + SRI_ARR(DCSURF_PRI_VIEWPORT_DIMENSION_C, HUBP, id), \ + SRI_ARR(DCSURF_PRI_VIEWPORT_START_C, HUBP, id), \ + SRI_ARR(DCSURF_SEC_VIEWPORT_DIMENSION_C, HUBP, id), \ + SRI_ARR(DCSURF_SEC_VIEWPORT_START_C, HUBP, id), \ + SRI_ARR(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH, HUBPREQ, id), \ + SRI_ARR(DCSURF_PRIMARY_SURFACE_ADDRESS, HUBPREQ, id), \ + SRI_ARR(DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH, HUBPREQ, id), \ + SRI_ARR(DCSURF_SECONDARY_SURFACE_ADDRESS, HUBPREQ, id), \ + SRI_ARR(DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH, HUBPREQ, id), \ + SRI_ARR(DCSURF_PRIMARY_META_SURFACE_ADDRESS, HUBPREQ, id), \ + SRI_ARR(DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH, HUBPREQ, id), \ + SRI_ARR(DCSURF_SECONDARY_META_SURFACE_ADDRESS, HUBPREQ, id), \ + SRI_ARR(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C, HUBPREQ, id), \ + SRI_ARR(DCSURF_PRIMARY_SURFACE_ADDRESS_C, HUBPREQ, id), \ + SRI_ARR(DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH_C, HUBPREQ, id), \ + SRI_ARR(DCSURF_SECONDARY_SURFACE_ADDRESS_C, HUBPREQ, id), \ + SRI_ARR(DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C, HUBPREQ, id), \ + SRI_ARR(DCSURF_PRIMARY_META_SURFACE_ADDRESS_C, HUBPREQ, id), \ + SRI_ARR(DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH_C, HUBPREQ, id), \ + SRI_ARR(DCSURF_SECONDARY_META_SURFACE_ADDRESS_C, HUBPREQ, id), \ + SRI_ARR(DCSURF_SURFACE_INUSE, HUBPREQ, id), \ + SRI_ARR(DCSURF_SURFACE_INUSE_HIGH, HUBPREQ, id), \ + SRI_ARR(DCSURF_SURFACE_INUSE_C, HUBPREQ, id), \ + SRI_ARR(DCSURF_SURFACE_INUSE_HIGH_C, HUBPREQ, id), \ + SRI_ARR(DCSURF_SURFACE_EARLIEST_INUSE, HUBPREQ, id), \ + SRI_ARR(DCSURF_SURFACE_EARLIEST_INUSE_HIGH, HUBPREQ, id), \ + SRI_ARR(DCSURF_SURFACE_EARLIEST_INUSE_C, HUBPREQ, id), \ + SRI_ARR(DCSURF_SURFACE_EARLIEST_INUSE_HIGH_C, HUBPREQ, id), \ + SRI_ARR(DCSURF_SURFACE_CONTROL, HUBPREQ, id), \ + SRI_ARR(DCSURF_SURFACE_FLIP_INTERRUPT, HUBPREQ, id), \ + SRI_ARR(HUBPRET_CONTROL, HUBPRET, id), \ + SRI_ARR(HUBPRET_READ_LINE_STATUS, HUBPRET, id), \ + SRI_ARR(DCN_EXPANSION_MODE, HUBPREQ, id), \ + SRI_ARR(DCHUBP_REQ_SIZE_CONFIG, HUBP, id), \ + SRI_ARR(DCHUBP_REQ_SIZE_CONFIG_C, HUBP, id), \ + SRI_ARR(BLANK_OFFSET_0, HUBPREQ, id), \ + SRI_ARR(BLANK_OFFSET_1, HUBPREQ, id), \ + SRI_ARR(DST_DIMENSIONS, HUBPREQ, id), \ + SRI_ARR(DST_AFTER_SCALER, HUBPREQ, id), \ + SRI_ARR(VBLANK_PARAMETERS_0, HUBPREQ, id), \ + SRI_ARR(REF_FREQ_TO_PIX_FREQ, HUBPREQ, id), \ + SRI_ARR(VBLANK_PARAMETERS_1, HUBPREQ, id), \ + SRI_ARR(VBLANK_PARAMETERS_3, HUBPREQ, id), \ + SRI_ARR(NOM_PARAMETERS_4, HUBPREQ, id), \ + SRI_ARR(NOM_PARAMETERS_5, HUBPREQ, id), \ + SRI_ARR(PER_LINE_DELIVERY_PRE, HUBPREQ, id), \ + SRI_ARR(PER_LINE_DELIVERY, HUBPREQ, id), \ + SRI_ARR(VBLANK_PARAMETERS_2, HUBPREQ, id), \ + SRI_ARR(VBLANK_PARAMETERS_4, HUBPREQ, id), \ + SRI_ARR(NOM_PARAMETERS_6, HUBPREQ, id), \ + SRI_ARR(NOM_PARAMETERS_7, HUBPREQ, id), \ + SRI_ARR(DCN_TTU_QOS_WM, HUBPREQ, id), \ + SRI_ARR(DCN_GLOBAL_TTU_CNTL, HUBPREQ, id), \ + SRI_ARR(DCN_SURF0_TTU_CNTL0, HUBPREQ, id), \ + SRI_ARR(DCN_SURF0_TTU_CNTL1, HUBPREQ, id), \ + SRI_ARR(DCN_SURF1_TTU_CNTL0, HUBPREQ, id), \ + SRI_ARR(DCN_SURF1_TTU_CNTL1, HUBPREQ, id), \ + SRI_ARR(DCN_CUR0_TTU_CNTL0, HUBPREQ, id), \ + SRI_ARR(DCN_CUR0_TTU_CNTL1, HUBPREQ, id), \ + SRI_ARR(HUBP_CLK_CNTL, HUBP, id) +#define HUBP_REG_LIST_DCN2_COMMON_RI(id) \ + HUBP_REG_LIST_DCN_RI(id), HUBP_REG_LIST_DCN_VM_RI(id), \ + SRI_ARR(PREFETCH_SETTINGS, HUBPREQ, id), \ + SRI_ARR(PREFETCH_SETTINGS_C, HUBPREQ, id), \ + SRI_ARR(DCN_VM_SYSTEM_APERTURE_LOW_ADDR, HUBPREQ, id), \ + SRI_ARR(DCN_VM_SYSTEM_APERTURE_HIGH_ADDR, HUBPREQ, id), \ + SRI_ARR(CURSOR_SETTINGS, HUBPREQ, id), \ + SRI_ARR(CURSOR_SURFACE_ADDRESS_HIGH, CURSOR0_, id), \ + SRI_ARR(CURSOR_SURFACE_ADDRESS, CURSOR0_, id), \ + SRI_ARR(CURSOR_SIZE, CURSOR0_, id), \ + SRI_ARR(CURSOR_CONTROL, CURSOR0_, id), \ + SRI_ARR(CURSOR_POSITION, CURSOR0_, id), \ + SRI_ARR(CURSOR_HOT_SPOT, CURSOR0_, id), \ + SRI_ARR(CURSOR_DST_OFFSET, CURSOR0_, id), \ + SRI_ARR(DMDATA_ADDRESS_HIGH, CURSOR0_, id), \ + SRI_ARR(DMDATA_ADDRESS_LOW, CURSOR0_, id), \ + SRI_ARR(DMDATA_CNTL, CURSOR0_, id), \ + SRI_ARR(DMDATA_SW_CNTL, CURSOR0_, id), \ + SRI_ARR(DMDATA_QOS_CNTL, CURSOR0_, id), \ + SRI_ARR(DMDATA_SW_DATA, CURSOR0_, id), \ + SRI_ARR(DMDATA_STATUS, CURSOR0_, id), \ + SRI_ARR(FLIP_PARAMETERS_0, HUBPREQ, id), \ + SRI_ARR(FLIP_PARAMETERS_1, HUBPREQ, id), \ + SRI_ARR(FLIP_PARAMETERS_2, HUBPREQ, id), \ + SRI_ARR(DCN_CUR1_TTU_CNTL0, HUBPREQ, id), \ + SRI_ARR(DCN_CUR1_TTU_CNTL1, HUBPREQ, id), \ + SRI_ARR(DCSURF_FLIP_CONTROL2, HUBPREQ, id), \ + SRI_ARR(VMID_SETTINGS_0, HUBPREQ, id) +#define HUBP_REG_LIST_DCN21_RI(id) \ + HUBP_REG_LIST_DCN2_COMMON_RI(id), SRI_ARR(FLIP_PARAMETERS_3, HUBPREQ, id), \ + SRI_ARR(FLIP_PARAMETERS_4, HUBPREQ, id), \ + SRI_ARR(FLIP_PARAMETERS_5, HUBPREQ, id), \ + SRI_ARR(FLIP_PARAMETERS_6, HUBPREQ, id), \ + SRI_ARR(VBLANK_PARAMETERS_5, HUBPREQ, id), \ + SRI_ARR(VBLANK_PARAMETERS_6, HUBPREQ, id) +#define HUBP_REG_LIST_DCN30_RI(id) \ + HUBP_REG_LIST_DCN21_RI(id), SRI_ARR(DCN_DMDATA_VM_CNTL, HUBPREQ, id) +#define HUBP_REG_LIST_DCN32_RI(id) \ + HUBP_REG_LIST_DCN30_RI(id), SRI_ARR(DCHUBP_MALL_CONFIG, HUBP, id), \ + SRI_ARR(DCHUBP_VMPG_CONFIG, HUBP, id), \ + SRI_ARR(UCLK_PSTATE_FORCE, HUBPREQ, id) + +/* HUBBUB */ + +#define HUBBUB_REG_LIST_DCN32_RI(id) \ + SR(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A), \ + SR(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B), \ + SR(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_C), \ + SR(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_D), \ + SR(DCHUBBUB_ARB_WATERMARK_CHANGE_CNTL), \ + SR(DCHUBBUB_ARB_DRAM_STATE_CNTL), SR(DCHUBBUB_ARB_SAT_LEVEL), \ + SR(DCHUBBUB_ARB_DF_REQ_OUTSTAND), SR(DCHUBBUB_GLOBAL_TIMER_CNTL), \ + SR(DCHUBBUB_SOFT_RESET), SR(DCHUBBUB_CRC_CTRL), \ + SR(DCN_VM_FB_LOCATION_BASE), SR(DCN_VM_FB_LOCATION_TOP), \ + SR(DCN_VM_FB_OFFSET), SR(DCN_VM_AGP_BOT), SR(DCN_VM_AGP_TOP), \ + SR(DCN_VM_AGP_BASE), HUBBUB_SR_WATERMARK_REG_LIST(), \ + SR(DCHUBBUB_ARB_FRAC_URG_BW_NOM_A), SR(DCHUBBUB_ARB_FRAC_URG_BW_NOM_B), \ + SR(DCHUBBUB_ARB_FRAC_URG_BW_NOM_C), SR(DCHUBBUB_ARB_FRAC_URG_BW_NOM_D), \ + SR(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_A), \ + SR(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_B), \ + SR(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_C), \ + SR(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_D), \ + SR(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_A), \ + SR(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_B), \ + SR(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_C), \ + SR(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_D), SR(DCHUBBUB_DET0_CTRL), \ + SR(DCHUBBUB_DET1_CTRL), SR(DCHUBBUB_DET2_CTRL), SR(DCHUBBUB_DET3_CTRL), \ + SR(DCHUBBUB_COMPBUF_CTRL), SR(COMPBUF_RESERVED_SPACE), \ + SR(DCHUBBUB_DEBUG_CTRL_0), \ + SR(DCHUBBUB_ARB_USR_RETRAINING_CNTL), \ + SR(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_A), \ + SR(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_B), \ + SR(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_C), \ + SR(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_D), \ + SR(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_A), \ + SR(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_B), \ + SR(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_C), \ + SR(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_D), \ + SR(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_A), \ + SR(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_B), \ + SR(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_C), \ + SR(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_D), \ + SR(DCHUBBUB_ARB_MALL_CNTL), \ + SR(DCN_VM_FAULT_ADDR_MSB), SR(DCN_VM_FAULT_ADDR_LSB), \ + SR(DCN_VM_FAULT_CNTL), SR(DCN_VM_FAULT_STATUS), \ + SR(SDPIF_REQUEST_RATE_LIMIT) + +/* DCCG */ + +#define DCCG_REG_LIST_DCN32_RI() \ + SR(DPPCLK_DTO_CTRL), DCCG_SRII(DTO_PARAM, DPPCLK, 0), \ + DCCG_SRII(DTO_PARAM, DPPCLK, 1), DCCG_SRII(DTO_PARAM, DPPCLK, 2), \ + DCCG_SRII(DTO_PARAM, DPPCLK, 3), DCCG_SRII(CLOCK_CNTL, HDMICHARCLK, 0), \ + SR(PHYASYMCLK_CLOCK_CNTL), SR(PHYBSYMCLK_CLOCK_CNTL), \ + SR(PHYCSYMCLK_CLOCK_CNTL), SR(PHYDSYMCLK_CLOCK_CNTL), \ + SR(PHYESYMCLK_CLOCK_CNTL), SR(DPSTREAMCLK_CNTL), SR(HDMISTREAMCLK_CNTL), \ + SR(SYMCLK32_SE_CNTL), SR(SYMCLK32_LE_CNTL), \ + DCCG_SRII(PIXEL_RATE_CNTL, OTG, 0), DCCG_SRII(PIXEL_RATE_CNTL, OTG, 1), \ + DCCG_SRII(PIXEL_RATE_CNTL, OTG, 2), DCCG_SRII(PIXEL_RATE_CNTL, OTG, 3), \ + DCCG_SRII(MODULO, DTBCLK_DTO, 0), DCCG_SRII(MODULO, DTBCLK_DTO, 1), \ + DCCG_SRII(MODULO, DTBCLK_DTO, 2), DCCG_SRII(MODULO, DTBCLK_DTO, 3), \ + DCCG_SRII(PHASE, DTBCLK_DTO, 0), DCCG_SRII(PHASE, DTBCLK_DTO, 1), \ + DCCG_SRII(PHASE, DTBCLK_DTO, 2), DCCG_SRII(PHASE, DTBCLK_DTO, 3), \ + SR(DCCG_AUDIO_DTBCLK_DTO_MODULO), SR(DCCG_AUDIO_DTBCLK_DTO_PHASE), \ + SR(OTG_PIXEL_RATE_DIV), SR(DTBCLK_P_CNTL), \ + SR(DCCG_AUDIO_DTO_SOURCE), SR(DENTIST_DISPCLK_CNTL) + +/* VMID */ +#define DCN20_VMID_REG_LIST_RI(id) \ + SRI_ARR(CNTL, DCN_VM_CONTEXT, id), \ + SRI_ARR(PAGE_TABLE_BASE_ADDR_HI32, DCN_VM_CONTEXT, id), \ + SRI_ARR(PAGE_TABLE_BASE_ADDR_LO32, DCN_VM_CONTEXT, id), \ + SRI_ARR(PAGE_TABLE_START_ADDR_HI32, DCN_VM_CONTEXT, id), \ + SRI_ARR(PAGE_TABLE_START_ADDR_LO32, DCN_VM_CONTEXT, id), \ + SRI_ARR(PAGE_TABLE_END_ADDR_HI32, DCN_VM_CONTEXT, id), \ + SRI_ARR(PAGE_TABLE_END_ADDR_LO32, DCN_VM_CONTEXT, id) + +/* I2C HW */ + +#define I2C_HW_ENGINE_COMMON_REG_LIST_RI(id) \ + SRI_ARR_I2C(SETUP, DC_I2C_DDC, id), SRI_ARR_I2C(SPEED, DC_I2C_DDC, id), \ + SRI_ARR_I2C(HW_STATUS, DC_I2C_DDC, id), \ + SR_ARR_I2C(DC_I2C_ARBITRATION, id), \ + SR_ARR_I2C(DC_I2C_CONTROL, id), SR_ARR_I2C(DC_I2C_SW_STATUS, id), \ + SR_ARR_I2C(DC_I2C_TRANSACTION0, id), SR_ARR_I2C(DC_I2C_TRANSACTION1, id),\ + SR_ARR_I2C(DC_I2C_TRANSACTION2, id), SR_ARR_I2C(DC_I2C_TRANSACTION3, id),\ + SR_ARR_I2C(DC_I2C_DATA, id), SR_ARR_I2C(MICROSECOND_TIME_BASE_DIV, id) + +#define I2C_HW_ENGINE_COMMON_REG_LIST_DCN30_RI(id) \ + I2C_HW_ENGINE_COMMON_REG_LIST_RI(id), SR_ARR_I2C(DIO_MEM_PWR_CTRL, id), \ + SR_ARR_I2C(DIO_MEM_PWR_STATUS, id) + +#endif /* _DCN32_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c new file mode 100644 index 000000000000..bedb70b98162 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c @@ -0,0 +1,2065 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dm_services.h" +#include "dc.h" + +#include "dcn32/dcn32_init.h" + +#include "resource.h" +#include "include/irq_service_interface.h" +#include "dcn32/dcn32_resource.h" +#include "dcn321_resource.h" + +#include "dcn20/dcn20_resource.h" +#include "dcn30/dcn30_resource.h" + +#include "dml/dcn321/dcn321_fpu.h" + +#include "dcn10/dcn10_ipp.h" +#include "dcn30/dcn30_hubbub.h" +#include "dcn31/dcn31_hubbub.h" +#include "dcn32/dcn32_hubbub.h" +#include "dcn32/dcn32_mpc.h" +#include "dcn32/dcn32_hubp.h" +#include "irq/dcn32/irq_service_dcn32.h" +#include "dcn32/dcn32_dpp.h" +#include "dcn32/dcn32_optc.h" +#include "dcn20/dcn20_hwseq.h" +#include "dcn30/dcn30_hwseq.h" +#include "dce110/dce110_hwseq.h" +#include "dcn30/dcn30_opp.h" +#include "dcn20/dcn20_dsc.h" +#include "dcn30/dcn30_vpg.h" +#include "dcn30/dcn30_afmt.h" +#include "dcn30/dcn30_dio_stream_encoder.h" +#include "dcn32/dcn32_dio_stream_encoder.h" +#include "dcn31/dcn31_hpo_dp_stream_encoder.h" +#include "dcn31/dcn31_hpo_dp_link_encoder.h" +#include "dcn32/dcn32_hpo_dp_link_encoder.h" +#include "dcn31/dcn31_apg.h" +#include "dcn31/dcn31_dio_link_encoder.h" +#include "dcn32/dcn32_dio_link_encoder.h" +#include "dcn321/dcn321_dio_link_encoder.h" +#include "dce/dce_clock_source.h" +#include "dce/dce_audio.h" +#include "dce/dce_hwseq.h" +#include "clk_mgr.h" +#include "virtual/virtual_stream_encoder.h" +#include "dml/display_mode_vba.h" +#include "dcn32/dcn32_dccg.h" +#include "dcn10/dcn10_resource.h" +#include "link.h" +#include "dcn31/dcn31_panel_cntl.h" + +#include "dcn30/dcn30_dwb.h" +#include "dcn32/dcn32_mmhubbub.h" + +#include "dcn/dcn_3_2_1_offset.h" +#include "dcn/dcn_3_2_1_sh_mask.h" +#include "nbio/nbio_4_3_0_offset.h" + +#include "reg_helper.h" +#include "dce/dmub_abm.h" +#include "dce/dmub_psr.h" +#include "dce/dce_aux.h" +#include "dce/dce_i2c.h" + +#include "dml/dcn30/display_mode_vba_30.h" +#include "vm_helper.h" +#include "dcn20/dcn20_vmid.h" + +#define DC_LOGGER_INIT(logger) + +enum dcn321_clk_src_array_id { + DCN321_CLK_SRC_PLL0, + DCN321_CLK_SRC_PLL1, + DCN321_CLK_SRC_PLL2, + DCN321_CLK_SRC_PLL3, + DCN321_CLK_SRC_PLL4, + DCN321_CLK_SRC_TOTAL +}; + +/* begin ********************* + * macros to expend register list macro defined in HW object header file + */ + +/* DCN */ +#define BASE_INNER(seg) ctx->dcn_reg_offsets[seg] + +#define BASE(seg) BASE_INNER(seg) + +#define SR(reg_name)\ + REG_STRUCT.reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ + reg ## reg_name +#define SR_ARR(reg_name, id)\ + REG_STRUCT[id].reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ + reg ## reg_name +#define SR_ARR_INIT(reg_name, id, value)\ + REG_STRUCT[id].reg_name = value + +#define SRI(reg_name, block, id)\ + REG_STRUCT.reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SRI_ARR(reg_name, block, id)\ + REG_STRUCT[id].reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SR_ARR_I2C(reg_name, id) \ + REG_STRUCT[id-1].reg_name = BASE(reg##reg_name##_BASE_IDX) + reg##reg_name + +#define SRI_ARR_I2C(reg_name, block, id)\ + REG_STRUCT[id-1].reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SRI_ARR_ALPHABET(reg_name, block, index, id)\ + REG_STRUCT[index].reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SRI2(reg_name, block, id)\ + .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ + reg ## reg_name +#define SRI2_ARR(reg_name, block, id)\ + REG_STRUCT[id].reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ + reg ## reg_name + +#define SRIR(var_name, reg_name, block, id)\ + .var_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SRII(reg_name, block, id)\ + REG_STRUCT.reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SRII_ARR_2(reg_name, block, id, inst)\ + REG_STRUCT[inst].reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SRII_MPC_RMU(reg_name, block, id)\ + .RMU##_##reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SRII_DWB(reg_name, temp_name, block, id)\ + REG_STRUCT.reg_name[id] = BASE(reg ## block ## id ## _ ## temp_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## temp_name + +#define DCCG_SRII(reg_name, block, id)\ + REG_STRUCT.block ## _ ## reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SF_DWB2(reg_name, block, id, field_name, post_fix) \ + .field_name = reg_name ## __ ## field_name ## post_fix + +#define VUPDATE_SRII(reg_name, block, id)\ + REG_STRUCT.reg_name[id] = BASE(reg ## reg_name ## _ ## block ## id ## _BASE_IDX) + \ + reg ## reg_name ## _ ## block ## id + +/* NBIO */ +#define NBIO_BASE_INNER(seg) ctx->nbio_reg_offsets[seg] + +#define NBIO_BASE(seg) \ + NBIO_BASE_INNER(seg) + +#define NBIO_SR(reg_name)\ + REG_STRUCT.reg_name = NBIO_BASE(regBIF_BX0_ ## reg_name ## _BASE_IDX) + \ + regBIF_BX0_ ## reg_name +#define NBIO_SR_ARR(reg_name, id)\ + REG_STRUCT[id].reg_name = NBIO_BASE(regBIF_BX0_ ## reg_name ## _BASE_IDX) + \ + regBIF_BX0_ ## reg_name + +#define CTX ctx +#define REG(reg_name) \ + (ctx->dcn_reg_offsets[reg ## reg_name ## _BASE_IDX] + reg ## reg_name) + +static struct bios_registers bios_regs; + +#define bios_regs_init() \ + ( \ + NBIO_SR(BIOS_SCRATCH_3),\ + NBIO_SR(BIOS_SCRATCH_6)\ + ) + +#define clk_src_regs_init(index, pllid)\ + CS_COMMON_REG_LIST_DCN3_0_RI(index, pllid) + +static struct dce110_clk_src_regs clk_src_regs[5]; + +static const struct dce110_clk_src_shift cs_shift = { + CS_COMMON_MASK_SH_LIST_DCN3_2(__SHIFT) +}; + +static const struct dce110_clk_src_mask cs_mask = { + CS_COMMON_MASK_SH_LIST_DCN3_2(_MASK) +}; + +#define abm_regs_init(id)\ + ABM_DCN32_REG_LIST_RI(id) + +static struct dce_abm_registers abm_regs[4]; + +static const struct dce_abm_shift abm_shift = { + ABM_MASK_SH_LIST_DCN32(__SHIFT) +}; + +static const struct dce_abm_mask abm_mask = { + ABM_MASK_SH_LIST_DCN32(_MASK) +}; + +#define audio_regs_init(id)\ + AUD_COMMON_REG_LIST_RI(id) + +static struct dce_audio_registers audio_regs[5]; + +#define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\ + SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_INDEX, AZALIA_ENDPOINT_REG_INDEX, mask_sh),\ + SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_DATA, AZALIA_ENDPOINT_REG_DATA, mask_sh),\ + AUD_COMMON_MASK_SH_LIST_BASE(mask_sh) + +static const struct dce_audio_shift audio_shift = { + DCE120_AUD_COMMON_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_audio_mask audio_mask = { + DCE120_AUD_COMMON_MASK_SH_LIST(_MASK) +}; + +#define vpg_regs_init(id)\ + VPG_DCN3_REG_LIST_RI(id) + +static struct dcn30_vpg_registers vpg_regs[10]; + +static const struct dcn30_vpg_shift vpg_shift = { + DCN3_VPG_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn30_vpg_mask vpg_mask = { + DCN3_VPG_MASK_SH_LIST(_MASK) +}; + +#define afmt_regs_init(id)\ + AFMT_DCN3_REG_LIST_RI(id) + +static struct dcn30_afmt_registers afmt_regs[6]; + +static const struct dcn30_afmt_shift afmt_shift = { + DCN3_AFMT_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn30_afmt_mask afmt_mask = { + DCN3_AFMT_MASK_SH_LIST(_MASK) +}; + +#define apg_regs_init(id)\ + APG_DCN31_REG_LIST_RI(id) + +static struct dcn31_apg_registers apg_regs[4]; + +static const struct dcn31_apg_shift apg_shift = { + DCN31_APG_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn31_apg_mask apg_mask = { + DCN31_APG_MASK_SH_LIST(_MASK) +}; + +#define stream_enc_regs_init(id)\ + SE_DCN32_REG_LIST_RI(id) + +static struct dcn10_stream_enc_registers stream_enc_regs[5]; + +static const struct dcn10_stream_encoder_shift se_shift = { + SE_COMMON_MASK_SH_LIST_DCN32(__SHIFT) +}; + +static const struct dcn10_stream_encoder_mask se_mask = { + SE_COMMON_MASK_SH_LIST_DCN32(_MASK) +}; + + +#define aux_regs_init(id)\ + DCN2_AUX_REG_LIST_RI(id) + +static struct dcn10_link_enc_aux_registers link_enc_aux_regs[5]; + +#define hpd_regs_init(id)\ + HPD_REG_LIST_RI(id) + +static struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[5]; + +#define link_regs_init(id, phyid)\ + ( \ + LE_DCN31_REG_LIST_RI(id), \ + UNIPHY_DCN2_REG_LIST_RI(id, phyid)\ + ) + /*DPCS_DCN31_REG_LIST(id),*/ \ + +static struct dcn10_link_enc_registers link_enc_regs[5]; + +static const struct dcn10_link_enc_shift le_shift = { + LINK_ENCODER_MASK_SH_LIST_DCN31(__SHIFT), \ +// DPCS_DCN31_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn10_link_enc_mask le_mask = { + LINK_ENCODER_MASK_SH_LIST_DCN31(_MASK), \ +// DPCS_DCN31_MASK_SH_LIST(_MASK) +}; + +#define hpo_dp_stream_encoder_reg_init(id)\ + DCN3_1_HPO_DP_STREAM_ENC_REG_LIST_RI(id) + +static struct dcn31_hpo_dp_stream_encoder_registers hpo_dp_stream_enc_regs[4]; + +static const struct dcn31_hpo_dp_stream_encoder_shift hpo_dp_se_shift = { + DCN3_1_HPO_DP_STREAM_ENC_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn31_hpo_dp_stream_encoder_mask hpo_dp_se_mask = { + DCN3_1_HPO_DP_STREAM_ENC_MASK_SH_LIST(_MASK) +}; + + +#define hpo_dp_link_encoder_reg_init(id)\ + DCN3_1_HPO_DP_LINK_ENC_REG_LIST_RI(id) + /*DCN3_1_RDPCSTX_REG_LIST(0),*/ + /*DCN3_1_RDPCSTX_REG_LIST(1),*/ + /*DCN3_1_RDPCSTX_REG_LIST(2),*/ + /*DCN3_1_RDPCSTX_REG_LIST(3),*/ + +static struct dcn31_hpo_dp_link_encoder_registers hpo_dp_link_enc_regs[2]; + +static const struct dcn31_hpo_dp_link_encoder_shift hpo_dp_le_shift = { + DCN3_2_HPO_DP_LINK_ENC_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn31_hpo_dp_link_encoder_mask hpo_dp_le_mask = { + DCN3_2_HPO_DP_LINK_ENC_MASK_SH_LIST(_MASK) +}; + +#define dpp_regs_init(id)\ + DPP_REG_LIST_DCN30_COMMON_RI(id) + +static struct dcn3_dpp_registers dpp_regs[4]; + +static const struct dcn3_dpp_shift tf_shift = { + DPP_REG_LIST_SH_MASK_DCN30_COMMON(__SHIFT) +}; + +static const struct dcn3_dpp_mask tf_mask = { + DPP_REG_LIST_SH_MASK_DCN30_COMMON(_MASK) +}; + + +#define opp_regs_init(id)\ + OPP_REG_LIST_DCN30_RI(id) + +static struct dcn20_opp_registers opp_regs[4]; + +static const struct dcn20_opp_shift opp_shift = { + OPP_MASK_SH_LIST_DCN20(__SHIFT) +}; + +static const struct dcn20_opp_mask opp_mask = { + OPP_MASK_SH_LIST_DCN20(_MASK) +}; + +#define aux_engine_regs_init(id) \ + ( \ + AUX_COMMON_REG_LIST0_RI(id), SR_ARR_INIT(AUXN_IMPCAL, id, 0), \ + SR_ARR_INIT(AUXP_IMPCAL, id, 0), \ + SR_ARR_INIT(AUX_RESET_MASK, id, DP_AUX0_AUX_CONTROL__AUX_RESET_MASK), \ + SR_ARR_INIT(AUX_RESET_MASK, id, DP_AUX0_AUX_CONTROL__AUX_RESET_MASK)\ + ) + +static struct dce110_aux_registers aux_engine_regs[5]; + +static const struct dce110_aux_registers_shift aux_shift = { + DCN_AUX_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce110_aux_registers_mask aux_mask = { + DCN_AUX_MASK_SH_LIST(_MASK) +}; + +#define dwbc_regs_dcn3_init(id)\ + DWBC_COMMON_REG_LIST_DCN30_RI(id) + +static struct dcn30_dwbc_registers dwbc30_regs[1]; + +static const struct dcn30_dwbc_shift dwbc30_shift = { + DWBC_COMMON_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dcn30_dwbc_mask dwbc30_mask = { + DWBC_COMMON_MASK_SH_LIST_DCN30(_MASK) +}; + +#define mcif_wb_regs_dcn3_init(id)\ + MCIF_WB_COMMON_REG_LIST_DCN32_RI(id) + +static struct dcn30_mmhubbub_registers mcif_wb30_regs[1]; + +static const struct dcn30_mmhubbub_shift mcif_wb30_shift = { + MCIF_WB_COMMON_MASK_SH_LIST_DCN32(__SHIFT) +}; + +static const struct dcn30_mmhubbub_mask mcif_wb30_mask = { + MCIF_WB_COMMON_MASK_SH_LIST_DCN32(_MASK) +}; + +#define dsc_regsDCN20_init(id)\ + DSC_REG_LIST_DCN20_RI(id) + +static struct dcn20_dsc_registers dsc_regs[4]; + +static const struct dcn20_dsc_shift dsc_shift = { + DSC_REG_LIST_SH_MASK_DCN20(__SHIFT) +}; + +static const struct dcn20_dsc_mask dsc_mask = { + DSC_REG_LIST_SH_MASK_DCN20(_MASK) +}; + +static struct dcn30_mpc_registers mpc_regs; +#define dcn_mpc_regs_init()\ + MPC_REG_LIST_DCN3_2_RI(0),\ + MPC_REG_LIST_DCN3_2_RI(1),\ + MPC_REG_LIST_DCN3_2_RI(2),\ + MPC_REG_LIST_DCN3_2_RI(3),\ + MPC_OUT_MUX_REG_LIST_DCN3_0_RI(0),\ + MPC_OUT_MUX_REG_LIST_DCN3_0_RI(1),\ + MPC_OUT_MUX_REG_LIST_DCN3_0_RI(2),\ + MPC_OUT_MUX_REG_LIST_DCN3_0_RI(3),\ + MPC_DWB_MUX_REG_LIST_DCN3_0_RI(0) + +static const struct dcn30_mpc_shift mpc_shift = { + MPC_COMMON_MASK_SH_LIST_DCN32(__SHIFT) +}; + +static const struct dcn30_mpc_mask mpc_mask = { + MPC_COMMON_MASK_SH_LIST_DCN32(_MASK) +}; + +#define optc_regs_init(id)\ + OPTC_COMMON_REG_LIST_DCN3_2_RI(id) + +static struct dcn_optc_registers optc_regs[4]; + +static const struct dcn_optc_shift optc_shift = { + OPTC_COMMON_MASK_SH_LIST_DCN3_2(__SHIFT) +}; + +static const struct dcn_optc_mask optc_mask = { + OPTC_COMMON_MASK_SH_LIST_DCN3_2(_MASK) +}; + +#define hubp_regs_init(id) \ + HUBP_REG_LIST_DCN32_RI(id) + +static struct dcn_hubp2_registers hubp_regs[4]; + +static const struct dcn_hubp2_shift hubp_shift = { + HUBP_MASK_SH_LIST_DCN32(__SHIFT) +}; + +static const struct dcn_hubp2_mask hubp_mask = { + HUBP_MASK_SH_LIST_DCN32(_MASK) +}; + +static struct dcn_hubbub_registers hubbub_reg; +#define hubbub_reg_init()\ + HUBBUB_REG_LIST_DCN32_RI(0) + +static const struct dcn_hubbub_shift hubbub_shift = { + HUBBUB_MASK_SH_LIST_DCN32(__SHIFT) +}; + +static const struct dcn_hubbub_mask hubbub_mask = { + HUBBUB_MASK_SH_LIST_DCN32(_MASK) +}; + +static struct dccg_registers dccg_regs; + +#define dccg_regs_init()\ + DCCG_REG_LIST_DCN32_RI() + +static const struct dccg_shift dccg_shift = { + DCCG_MASK_SH_LIST_DCN32(__SHIFT) +}; + +static const struct dccg_mask dccg_mask = { + DCCG_MASK_SH_LIST_DCN32(_MASK) +}; + + +#define SRII2(reg_name_pre, reg_name_post, id)\ + .reg_name_pre ## _ ## reg_name_post[id] = BASE(reg ## reg_name_pre \ + ## id ## _ ## reg_name_post ## _BASE_IDX) + \ + reg ## reg_name_pre ## id ## _ ## reg_name_post + + +#define HWSEQ_DCN32_REG_LIST()\ + SR(DCHUBBUB_GLOBAL_TIMER_CNTL), \ + SR(DIO_MEM_PWR_CTRL), \ + SR(ODM_MEM_PWR_CTRL3), \ + SR(MMHUBBUB_MEM_PWR_CNTL), \ + SR(DCCG_GATE_DISABLE_CNTL), \ + SR(DCCG_GATE_DISABLE_CNTL2), \ + SR(DCFCLK_CNTL),\ + SR(DC_MEM_GLOBAL_PWR_REQ_CNTL), \ + SRII(PIXEL_RATE_CNTL, OTG, 0), \ + SRII(PIXEL_RATE_CNTL, OTG, 1),\ + SRII(PIXEL_RATE_CNTL, OTG, 2),\ + SRII(PIXEL_RATE_CNTL, OTG, 3),\ + SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 0),\ + SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 1),\ + SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 2),\ + SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 3),\ + SR(MICROSECOND_TIME_BASE_DIV), \ + SR(MILLISECOND_TIME_BASE_DIV), \ + SR(DISPCLK_FREQ_CHANGE_CNTL), \ + SR(RBBMIF_TIMEOUT_DIS), \ + SR(RBBMIF_TIMEOUT_DIS_2), \ + SR(DCHUBBUB_CRC_CTRL), \ + SR(DPP_TOP0_DPP_CRC_CTRL), \ + SR(DPP_TOP0_DPP_CRC_VAL_B_A), \ + SR(DPP_TOP0_DPP_CRC_VAL_R_G), \ + SR(MPC_CRC_CTRL), \ + SR(MPC_CRC_RESULT_GB), \ + SR(MPC_CRC_RESULT_C), \ + SR(MPC_CRC_RESULT_AR), \ + SR(DOMAIN0_PG_CONFIG), \ + SR(DOMAIN1_PG_CONFIG), \ + SR(DOMAIN2_PG_CONFIG), \ + SR(DOMAIN3_PG_CONFIG), \ + SR(DOMAIN16_PG_CONFIG), \ + SR(DOMAIN17_PG_CONFIG), \ + SR(DOMAIN18_PG_CONFIG), \ + SR(DOMAIN19_PG_CONFIG), \ + SR(DOMAIN0_PG_STATUS), \ + SR(DOMAIN1_PG_STATUS), \ + SR(DOMAIN2_PG_STATUS), \ + SR(DOMAIN3_PG_STATUS), \ + SR(DOMAIN16_PG_STATUS), \ + SR(DOMAIN17_PG_STATUS), \ + SR(DOMAIN18_PG_STATUS), \ + SR(DOMAIN19_PG_STATUS), \ + SR(D1VGA_CONTROL), \ + SR(D2VGA_CONTROL), \ + SR(D3VGA_CONTROL), \ + SR(D4VGA_CONTROL), \ + SR(D5VGA_CONTROL), \ + SR(D6VGA_CONTROL), \ + SR(DC_IP_REQUEST_CNTL), \ + SR(AZALIA_AUDIO_DTO), \ + SR(AZALIA_CONTROLLER_CLOCK_GATING) + +static struct dce_hwseq_registers hwseq_reg; + +#define hwseq_reg_init()\ + HWSEQ_DCN32_REG_LIST() + +#define HWSEQ_DCN32_MASK_SH_LIST(mask_sh)\ + HWSEQ_DCN_MASK_SH_LIST(mask_sh), \ + HWS_SF(, DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_REFDIV, mask_sh), \ + HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN1_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN1_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN2_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN2_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN3_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN3_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN16_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN16_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN17_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN17_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN18_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN18_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN19_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN19_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN0_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN1_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN2_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN3_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN16_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN17_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN18_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN19_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DC_IP_REQUEST_CNTL, IP_REQUEST_EN, mask_sh), \ + HWS_SF(, AZALIA_AUDIO_DTO, AZALIA_AUDIO_DTO_MODULE, mask_sh), \ + HWS_SF(, HPO_TOP_CLOCK_CONTROL, HPO_HDMISTREAMCLK_G_GATE_DIS, mask_sh), \ + HWS_SF(, ODM_MEM_PWR_CTRL3, ODM_MEM_UNASSIGNED_PWR_MODE, mask_sh), \ + HWS_SF(, ODM_MEM_PWR_CTRL3, ODM_MEM_VBLANK_PWR_MODE, mask_sh), \ + HWS_SF(, MMHUBBUB_MEM_PWR_CNTL, VGA_MEM_PWR_FORCE, mask_sh) + +static const struct dce_hwseq_shift hwseq_shift = { + HWSEQ_DCN32_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_hwseq_mask hwseq_mask = { + HWSEQ_DCN32_MASK_SH_LIST(_MASK) +}; +#define vmid_regs_init(id)\ + DCN20_VMID_REG_LIST_RI(id) + +static struct dcn_vmid_registers vmid_regs[16]; + +static const struct dcn20_vmid_shift vmid_shifts = { + DCN20_VMID_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn20_vmid_mask vmid_masks = { + DCN20_VMID_MASK_SH_LIST(_MASK) +}; + +static const struct resource_caps res_cap_dcn321 = { + .num_timing_generator = 4, + .num_opp = 4, + .num_video_plane = 4, + .num_audio = 5, + .num_stream_encoder = 5, + .num_hpo_dp_stream_encoder = 4, + .num_hpo_dp_link_encoder = 2, + .num_pll = 5, + .num_dwb = 1, + .num_ddc = 5, + .num_vmid = 16, + .num_mpc_3dlut = 4, + .num_dsc = 4, +}; + +static const struct dc_plane_cap plane_cap = { + .type = DC_PLANE_TYPE_DCN_UNIVERSAL, + .per_pixel_alpha = true, + + .pixel_format_support = { + .argb8888 = true, + .nv12 = true, + .fp16 = true, + .p010 = true, + .ayuv = false, + }, + + .max_upscale_factor = { + .argb8888 = 16000, + .nv12 = 16000, + .fp16 = 16000 + }, + + // 6:1 downscaling ratio: 1000/6 = 166.666 + .max_downscale_factor = { + .argb8888 = 167, + .nv12 = 167, + .fp16 = 167 + }, + 64, + 64 +}; + +static const struct dc_debug_options debug_defaults_drv = { + .disable_dmcu = true, + .force_abm_enable = false, + .timing_trace = false, + .clock_trace = true, + .disable_pplib_clock_request = false, + .pipe_split_policy = MPC_SPLIT_AVOID, + .force_single_disp_pipe_split = false, + .disable_dcc = DCC_ENABLE, + .vsr_support = true, + .performance_trace = false, + .max_downscale_src_width = 7680,/*upto 8K*/ + .disable_pplib_wm_range = false, + .scl_reset_length10 = true, + .sanity_checks = false, + .underflow_assert_delay_us = 0xFFFFFFFF, + .dwb_fi_phase = -1, // -1 = disable, + .dmub_command_table = true, + .enable_mem_low_power = { + .bits = { + .vga = false, + .i2c = false, + .dmcu = false, // This is previously known to cause hang on S3 cycles if enabled + .dscl = false, + .cm = false, + .mpc = false, + .optc = true, + } + }, + .use_max_lb = true, + .force_disable_subvp = false, + .exit_idle_opt_for_cursor_updates = true, + .enable_single_display_2to1_odm_policy = true, + + /*must match enable_single_display_2to1_odm_policy to support dynamic ODM transitions*/ + .enable_double_buffered_dsc_pg_support = true, + .enable_dp_dig_pixel_rate_div_policy = 1, + .allow_sw_cursor_fallback = false, // Linux can't do SW cursor "fallback" + .alloc_extra_way_for_cursor = true, + .min_prefetch_in_strobe_ns = 60000, // 60us + .disable_unbounded_requesting = false, + .override_dispclk_programming = true, + .disable_fpo_optimizations = false, + .fpo_vactive_margin_us = 2000, // 2000us + .disable_fpo_vactive = false, + .disable_boot_optimizations = false, + .disable_subvp_high_refresh = false, + .fpo_vactive_min_active_margin_us = 200, + .fpo_vactive_max_blank_us = 1000, + .enable_legacy_fast_update = false, + .disable_dc_mode_overwrite = true, + .using_dml2 = false, +}; + +static struct dce_aux *dcn321_aux_engine_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct aux_engine_dce110 *aux_engine = + kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL); + + if (!aux_engine) + return NULL; + +#undef REG_STRUCT +#define REG_STRUCT aux_engine_regs + aux_engine_regs_init(0), + aux_engine_regs_init(1), + aux_engine_regs_init(2), + aux_engine_regs_init(3), + aux_engine_regs_init(4); + + dce110_aux_engine_construct(aux_engine, ctx, inst, + SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, + &aux_engine_regs[inst], + &aux_mask, + &aux_shift, + ctx->dc->caps.extended_aux_timeout_support); + + return &aux_engine->base; +} +#define i2c_inst_regs_init(id)\ + I2C_HW_ENGINE_COMMON_REG_LIST_DCN30_RI(id) + +static struct dce_i2c_registers i2c_hw_regs[5]; + +static const struct dce_i2c_shift i2c_shifts = { + I2C_COMMON_MASK_SH_LIST_DCN30(__SHIFT) +}; + +static const struct dce_i2c_mask i2c_masks = { + I2C_COMMON_MASK_SH_LIST_DCN30(_MASK) +}; + +static struct dce_i2c_hw *dcn321_i2c_hw_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dce_i2c_hw *dce_i2c_hw = + kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL); + + if (!dce_i2c_hw) + return NULL; + +#undef REG_STRUCT +#define REG_STRUCT i2c_hw_regs + i2c_inst_regs_init(1), + i2c_inst_regs_init(2), + i2c_inst_regs_init(3), + i2c_inst_regs_init(4), + i2c_inst_regs_init(5); + + dcn2_i2c_hw_construct(dce_i2c_hw, ctx, inst, + &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks); + + return dce_i2c_hw; +} + +static struct clock_source *dcn321_clock_source_create( + struct dc_context *ctx, + struct dc_bios *bios, + enum clock_source_id id, + const struct dce110_clk_src_regs *regs, + bool dp_clk_src) +{ + struct dce110_clk_src *clk_src = + kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL); + + if (!clk_src) + return NULL; + + if (dcn31_clk_src_construct(clk_src, ctx, bios, id, + regs, &cs_shift, &cs_mask)) { + clk_src->base.dp_clk_src = dp_clk_src; + return &clk_src->base; + } + + kfree(clk_src); + BREAK_TO_DEBUGGER(); + return NULL; +} + +static struct hubbub *dcn321_hubbub_create(struct dc_context *ctx) +{ + int i; + + struct dcn20_hubbub *hubbub2 = kzalloc(sizeof(struct dcn20_hubbub), + GFP_KERNEL); + + if (!hubbub2) + return NULL; + +#undef REG_STRUCT +#define REG_STRUCT hubbub_reg + hubbub_reg_init(); + +#undef REG_STRUCT +#define REG_STRUCT vmid_regs + vmid_regs_init(0), + vmid_regs_init(1), + vmid_regs_init(2), + vmid_regs_init(3), + vmid_regs_init(4), + vmid_regs_init(5), + vmid_regs_init(6), + vmid_regs_init(7), + vmid_regs_init(8), + vmid_regs_init(9), + vmid_regs_init(10), + vmid_regs_init(11), + vmid_regs_init(12), + vmid_regs_init(13), + vmid_regs_init(14), + vmid_regs_init(15); + + hubbub32_construct(hubbub2, ctx, + &hubbub_reg, + &hubbub_shift, + &hubbub_mask, + ctx->dc->dml.ip.det_buffer_size_kbytes, + ctx->dc->dml.ip.pixel_chunk_size_kbytes, + ctx->dc->dml.ip.config_return_buffer_size_in_kbytes); + + + for (i = 0; i < res_cap_dcn321.num_vmid; i++) { + struct dcn20_vmid *vmid = &hubbub2->vmid[i]; + + vmid->ctx = ctx; + + vmid->regs = &vmid_regs[i]; + vmid->shifts = &vmid_shifts; + vmid->masks = &vmid_masks; + } + + return &hubbub2->base; +} + +static struct hubp *dcn321_hubp_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn20_hubp *hubp2 = + kzalloc(sizeof(struct dcn20_hubp), GFP_KERNEL); + + if (!hubp2) + return NULL; + +#undef REG_STRUCT +#define REG_STRUCT hubp_regs + hubp_regs_init(0), + hubp_regs_init(1), + hubp_regs_init(2), + hubp_regs_init(3); + + if (hubp32_construct(hubp2, ctx, inst, + &hubp_regs[inst], &hubp_shift, &hubp_mask)) + return &hubp2->base; + + BREAK_TO_DEBUGGER(); + kfree(hubp2); + return NULL; +} + +static void dcn321_dpp_destroy(struct dpp **dpp) +{ + kfree(TO_DCN30_DPP(*dpp)); + *dpp = NULL; +} + +static struct dpp *dcn321_dpp_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn3_dpp *dpp3 = + kzalloc(sizeof(struct dcn3_dpp), GFP_KERNEL); + + if (!dpp3) + return NULL; + +#undef REG_STRUCT +#define REG_STRUCT dpp_regs + dpp_regs_init(0), + dpp_regs_init(1), + dpp_regs_init(2), + dpp_regs_init(3); + + if (dpp32_construct(dpp3, ctx, inst, + &dpp_regs[inst], &tf_shift, &tf_mask)) + return &dpp3->base; + + BREAK_TO_DEBUGGER(); + kfree(dpp3); + return NULL; +} + +static struct mpc *dcn321_mpc_create( + struct dc_context *ctx, + int num_mpcc, + int num_rmu) +{ + struct dcn30_mpc *mpc30 = kzalloc(sizeof(struct dcn30_mpc), + GFP_KERNEL); + + if (!mpc30) + return NULL; + +#undef REG_STRUCT +#define REG_STRUCT mpc_regs + dcn_mpc_regs_init(); + + dcn32_mpc_construct(mpc30, ctx, + &mpc_regs, + &mpc_shift, + &mpc_mask, + num_mpcc, + num_rmu); + + return &mpc30->base; +} + +static struct output_pixel_processor *dcn321_opp_create( + struct dc_context *ctx, uint32_t inst) +{ + struct dcn20_opp *opp2 = + kzalloc(sizeof(struct dcn20_opp), GFP_KERNEL); + + if (!opp2) { + BREAK_TO_DEBUGGER(); + return NULL; + } + +#undef REG_STRUCT +#define REG_STRUCT opp_regs + opp_regs_init(0), + opp_regs_init(1), + opp_regs_init(2), + opp_regs_init(3); + + dcn20_opp_construct(opp2, ctx, inst, + &opp_regs[inst], &opp_shift, &opp_mask); + return &opp2->base; +} + + +static struct timing_generator *dcn321_timing_generator_create( + struct dc_context *ctx, + uint32_t instance) +{ + struct optc *tgn10 = + kzalloc(sizeof(struct optc), GFP_KERNEL); + + if (!tgn10) + return NULL; + +#undef REG_STRUCT +#define REG_STRUCT optc_regs + optc_regs_init(0), + optc_regs_init(1), + optc_regs_init(2), + optc_regs_init(3); + + tgn10->base.inst = instance; + tgn10->base.ctx = ctx; + + tgn10->tg_regs = &optc_regs[instance]; + tgn10->tg_shift = &optc_shift; + tgn10->tg_mask = &optc_mask; + + dcn32_timing_generator_init(tgn10); + + return &tgn10->base; +} + +static const struct encoder_feature_support link_enc_feature = { + .max_hdmi_deep_color = COLOR_DEPTH_121212, + .max_hdmi_pixel_clock = 600000, + .hdmi_ycbcr420_supported = true, + .dp_ycbcr420_supported = true, + .fec_supported = true, + .flags.bits.IS_HBR2_CAPABLE = true, + .flags.bits.IS_HBR3_CAPABLE = true, + .flags.bits.IS_TPS3_CAPABLE = true, + .flags.bits.IS_TPS4_CAPABLE = true +}; + +static struct link_encoder *dcn321_link_encoder_create( + struct dc_context *ctx, + const struct encoder_init_data *enc_init_data) +{ + struct dcn20_link_encoder *enc20 = + kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL); + + if (!enc20) + return NULL; + +#undef REG_STRUCT +#define REG_STRUCT link_enc_aux_regs + aux_regs_init(0), + aux_regs_init(1), + aux_regs_init(2), + aux_regs_init(3), + aux_regs_init(4); + +#undef REG_STRUCT +#define REG_STRUCT link_enc_hpd_regs + hpd_regs_init(0), + hpd_regs_init(1), + hpd_regs_init(2), + hpd_regs_init(3), + hpd_regs_init(4); + +#undef REG_STRUCT +#define REG_STRUCT link_enc_regs + link_regs_init(0, A), + link_regs_init(1, B), + link_regs_init(2, C), + link_regs_init(3, D), + link_regs_init(4, E); + + dcn321_link_encoder_construct(enc20, + enc_init_data, + &link_enc_feature, + &link_enc_regs[enc_init_data->transmitter], + &link_enc_aux_regs[enc_init_data->channel - 1], + &link_enc_hpd_regs[enc_init_data->hpd_source], + &le_shift, + &le_mask); + + return &enc20->enc10.base; +} + +static void read_dce_straps( + struct dc_context *ctx, + struct resource_straps *straps) +{ + generic_reg_get(ctx, ctx->dcn_reg_offsets[regDC_PINSTRAPS_BASE_IDX] + regDC_PINSTRAPS, + FN(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO), &straps->dc_pinstraps_audio); + +} + +static struct audio *dcn321_create_audio( + struct dc_context *ctx, unsigned int inst) +{ + +#undef REG_STRUCT +#define REG_STRUCT audio_regs + audio_regs_init(0), + audio_regs_init(1), + audio_regs_init(2), + audio_regs_init(3), + audio_regs_init(4); + + return dce_audio_create(ctx, inst, + &audio_regs[inst], &audio_shift, &audio_mask); +} + +static struct vpg *dcn321_vpg_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn30_vpg *vpg3 = kzalloc(sizeof(struct dcn30_vpg), GFP_KERNEL); + + if (!vpg3) + return NULL; + +#undef REG_STRUCT +#define REG_STRUCT vpg_regs + vpg_regs_init(0), + vpg_regs_init(1), + vpg_regs_init(2), + vpg_regs_init(3), + vpg_regs_init(4), + vpg_regs_init(5), + vpg_regs_init(6), + vpg_regs_init(7), + vpg_regs_init(8), + vpg_regs_init(9); + + vpg3_construct(vpg3, ctx, inst, + &vpg_regs[inst], + &vpg_shift, + &vpg_mask); + + return &vpg3->base; +} + +static struct afmt *dcn321_afmt_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn30_afmt *afmt3 = kzalloc(sizeof(struct dcn30_afmt), GFP_KERNEL); + + if (!afmt3) + return NULL; + +#undef REG_STRUCT +#define REG_STRUCT afmt_regs + afmt_regs_init(0), + afmt_regs_init(1), + afmt_regs_init(2), + afmt_regs_init(3), + afmt_regs_init(4), + afmt_regs_init(5); + + afmt3_construct(afmt3, ctx, inst, + &afmt_regs[inst], + &afmt_shift, + &afmt_mask); + + return &afmt3->base; +} + +static struct apg *dcn321_apg_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn31_apg *apg31 = kzalloc(sizeof(struct dcn31_apg), GFP_KERNEL); + + if (!apg31) + return NULL; + +#undef REG_STRUCT +#define REG_STRUCT apg_regs + apg_regs_init(0), + apg_regs_init(1), + apg_regs_init(2), + apg_regs_init(3); + + apg31_construct(apg31, ctx, inst, + &apg_regs[inst], + &apg_shift, + &apg_mask); + + return &apg31->base; +} + +static struct stream_encoder *dcn321_stream_encoder_create( + enum engine_id eng_id, + struct dc_context *ctx) +{ + struct dcn10_stream_encoder *enc1; + struct vpg *vpg; + struct afmt *afmt; + int vpg_inst; + int afmt_inst; + + /* Mapping of VPG, AFMT, DME register blocks to DIO block instance */ + if (eng_id <= ENGINE_ID_DIGF) { + vpg_inst = eng_id; + afmt_inst = eng_id; + } else + return NULL; + + enc1 = kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL); + vpg = dcn321_vpg_create(ctx, vpg_inst); + afmt = dcn321_afmt_create(ctx, afmt_inst); + + if (!enc1 || !vpg || !afmt) { + kfree(enc1); + kfree(vpg); + kfree(afmt); + return NULL; + } + +#undef REG_STRUCT +#define REG_STRUCT stream_enc_regs + stream_enc_regs_init(0), + stream_enc_regs_init(1), + stream_enc_regs_init(2), + stream_enc_regs_init(3), + stream_enc_regs_init(4); + + dcn32_dio_stream_encoder_construct(enc1, ctx, ctx->dc_bios, + eng_id, vpg, afmt, + &stream_enc_regs[eng_id], + &se_shift, &se_mask); + + return &enc1->base; +} + +static struct hpo_dp_stream_encoder *dcn321_hpo_dp_stream_encoder_create( + enum engine_id eng_id, + struct dc_context *ctx) +{ + struct dcn31_hpo_dp_stream_encoder *hpo_dp_enc31; + struct vpg *vpg; + struct apg *apg; + uint32_t hpo_dp_inst; + uint32_t vpg_inst; + uint32_t apg_inst; + + ASSERT((eng_id >= ENGINE_ID_HPO_DP_0) && (eng_id <= ENGINE_ID_HPO_DP_3)); + hpo_dp_inst = eng_id - ENGINE_ID_HPO_DP_0; + + /* Mapping of VPG register blocks to HPO DP block instance: + * VPG[6] -> HPO_DP[0] + * VPG[7] -> HPO_DP[1] + * VPG[8] -> HPO_DP[2] + * VPG[9] -> HPO_DP[3] + */ + vpg_inst = hpo_dp_inst + 6; + + /* Mapping of APG register blocks to HPO DP block instance: + * APG[0] -> HPO_DP[0] + * APG[1] -> HPO_DP[1] + * APG[2] -> HPO_DP[2] + * APG[3] -> HPO_DP[3] + */ + apg_inst = hpo_dp_inst; + + /* allocate HPO stream encoder and create VPG sub-block */ + hpo_dp_enc31 = kzalloc(sizeof(struct dcn31_hpo_dp_stream_encoder), GFP_KERNEL); + vpg = dcn321_vpg_create(ctx, vpg_inst); + apg = dcn321_apg_create(ctx, apg_inst); + + if (!hpo_dp_enc31 || !vpg || !apg) { + kfree(hpo_dp_enc31); + kfree(vpg); + kfree(apg); + return NULL; + } + +#undef REG_STRUCT +#define REG_STRUCT hpo_dp_stream_enc_regs + hpo_dp_stream_encoder_reg_init(0), + hpo_dp_stream_encoder_reg_init(1), + hpo_dp_stream_encoder_reg_init(2), + hpo_dp_stream_encoder_reg_init(3); + + dcn31_hpo_dp_stream_encoder_construct(hpo_dp_enc31, ctx, ctx->dc_bios, + hpo_dp_inst, eng_id, vpg, apg, + &hpo_dp_stream_enc_regs[hpo_dp_inst], + &hpo_dp_se_shift, &hpo_dp_se_mask); + + return &hpo_dp_enc31->base; +} + +static struct hpo_dp_link_encoder *dcn321_hpo_dp_link_encoder_create( + uint8_t inst, + struct dc_context *ctx) +{ + struct dcn31_hpo_dp_link_encoder *hpo_dp_enc31; + + /* allocate HPO link encoder */ + hpo_dp_enc31 = kzalloc(sizeof(struct dcn31_hpo_dp_link_encoder), GFP_KERNEL); + +#undef REG_STRUCT +#define REG_STRUCT hpo_dp_link_enc_regs + hpo_dp_link_encoder_reg_init(0), + hpo_dp_link_encoder_reg_init(1); + + hpo_dp_link_encoder32_construct(hpo_dp_enc31, ctx, inst, + &hpo_dp_link_enc_regs[inst], + &hpo_dp_le_shift, &hpo_dp_le_mask); + + return &hpo_dp_enc31->base; +} + +static struct dce_hwseq *dcn321_hwseq_create( + struct dc_context *ctx) +{ + struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL); + +#undef REG_STRUCT +#define REG_STRUCT hwseq_reg + hwseq_reg_init(); + + if (hws) { + hws->ctx = ctx; + hws->regs = &hwseq_reg; + hws->shifts = &hwseq_shift; + hws->masks = &hwseq_mask; + } + return hws; +} +static const struct resource_create_funcs res_create_funcs = { + .read_dce_straps = read_dce_straps, + .create_audio = dcn321_create_audio, + .create_stream_encoder = dcn321_stream_encoder_create, + .create_hpo_dp_stream_encoder = dcn321_hpo_dp_stream_encoder_create, + .create_hpo_dp_link_encoder = dcn321_hpo_dp_link_encoder_create, + .create_hwseq = dcn321_hwseq_create, +}; + +static void dcn321_resource_destruct(struct dcn321_resource_pool *pool) +{ + unsigned int i; + + for (i = 0; i < pool->base.stream_enc_count; i++) { + if (pool->base.stream_enc[i] != NULL) { + if (pool->base.stream_enc[i]->vpg != NULL) { + kfree(DCN30_VPG_FROM_VPG(pool->base.stream_enc[i]->vpg)); + pool->base.stream_enc[i]->vpg = NULL; + } + if (pool->base.stream_enc[i]->afmt != NULL) { + kfree(DCN30_AFMT_FROM_AFMT(pool->base.stream_enc[i]->afmt)); + pool->base.stream_enc[i]->afmt = NULL; + } + kfree(DCN10STRENC_FROM_STRENC(pool->base.stream_enc[i])); + pool->base.stream_enc[i] = NULL; + } + } + + for (i = 0; i < pool->base.hpo_dp_stream_enc_count; i++) { + if (pool->base.hpo_dp_stream_enc[i] != NULL) { + if (pool->base.hpo_dp_stream_enc[i]->vpg != NULL) { + kfree(DCN30_VPG_FROM_VPG(pool->base.hpo_dp_stream_enc[i]->vpg)); + pool->base.hpo_dp_stream_enc[i]->vpg = NULL; + } + if (pool->base.hpo_dp_stream_enc[i]->apg != NULL) { + kfree(DCN31_APG_FROM_APG(pool->base.hpo_dp_stream_enc[i]->apg)); + pool->base.hpo_dp_stream_enc[i]->apg = NULL; + } + kfree(DCN3_1_HPO_DP_STREAM_ENC_FROM_HPO_STREAM_ENC(pool->base.hpo_dp_stream_enc[i])); + pool->base.hpo_dp_stream_enc[i] = NULL; + } + } + + for (i = 0; i < pool->base.hpo_dp_link_enc_count; i++) { + if (pool->base.hpo_dp_link_enc[i] != NULL) { + kfree(DCN3_1_HPO_DP_LINK_ENC_FROM_HPO_LINK_ENC(pool->base.hpo_dp_link_enc[i])); + pool->base.hpo_dp_link_enc[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_dsc; i++) { + if (pool->base.dscs[i] != NULL) + dcn20_dsc_destroy(&pool->base.dscs[i]); + } + + if (pool->base.mpc != NULL) { + kfree(TO_DCN20_MPC(pool->base.mpc)); + pool->base.mpc = NULL; + } + if (pool->base.hubbub != NULL) { + kfree(TO_DCN20_HUBBUB(pool->base.hubbub)); + pool->base.hubbub = NULL; + } + for (i = 0; i < pool->base.pipe_count; i++) { + if (pool->base.dpps[i] != NULL) + dcn321_dpp_destroy(&pool->base.dpps[i]); + + if (pool->base.ipps[i] != NULL) + pool->base.ipps[i]->funcs->ipp_destroy(&pool->base.ipps[i]); + + if (pool->base.hubps[i] != NULL) { + kfree(TO_DCN20_HUBP(pool->base.hubps[i])); + pool->base.hubps[i] = NULL; + } + + if (pool->base.irqs != NULL) + dal_irq_service_destroy(&pool->base.irqs); + } + + for (i = 0; i < pool->base.res_cap->num_ddc; i++) { + if (pool->base.engines[i] != NULL) + dce110_engine_destroy(&pool->base.engines[i]); + if (pool->base.hw_i2cs[i] != NULL) { + kfree(pool->base.hw_i2cs[i]); + pool->base.hw_i2cs[i] = NULL; + } + if (pool->base.sw_i2cs[i] != NULL) { + kfree(pool->base.sw_i2cs[i]); + pool->base.sw_i2cs[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_opp; i++) { + if (pool->base.opps[i] != NULL) + pool->base.opps[i]->funcs->opp_destroy(&pool->base.opps[i]); + } + + for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { + if (pool->base.timing_generators[i] != NULL) { + kfree(DCN10TG_FROM_TG(pool->base.timing_generators[i])); + pool->base.timing_generators[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_dwb; i++) { + if (pool->base.dwbc[i] != NULL) { + kfree(TO_DCN30_DWBC(pool->base.dwbc[i])); + pool->base.dwbc[i] = NULL; + } + if (pool->base.mcif_wb[i] != NULL) { + kfree(TO_DCN30_MMHUBBUB(pool->base.mcif_wb[i])); + pool->base.mcif_wb[i] = NULL; + } + } + + for (i = 0; i < pool->base.audio_count; i++) { + if (pool->base.audios[i]) + dce_aud_destroy(&pool->base.audios[i]); + } + + for (i = 0; i < pool->base.clk_src_count; i++) { + if (pool->base.clock_sources[i] != NULL) { + dcn20_clock_source_destroy(&pool->base.clock_sources[i]); + pool->base.clock_sources[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_mpc_3dlut; i++) { + if (pool->base.mpc_lut[i] != NULL) { + dc_3dlut_func_release(pool->base.mpc_lut[i]); + pool->base.mpc_lut[i] = NULL; + } + if (pool->base.mpc_shaper[i] != NULL) { + dc_transfer_func_release(pool->base.mpc_shaper[i]); + pool->base.mpc_shaper[i] = NULL; + } + } + + if (pool->base.dp_clock_source != NULL) { + dcn20_clock_source_destroy(&pool->base.dp_clock_source); + pool->base.dp_clock_source = NULL; + } + + for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { + if (pool->base.multiple_abms[i] != NULL) + dce_abm_destroy(&pool->base.multiple_abms[i]); + } + + if (pool->base.psr != NULL) + dmub_psr_destroy(&pool->base.psr); + + if (pool->base.dccg != NULL) + dcn_dccg_destroy(&pool->base.dccg); + + if (pool->base.oem_device != NULL) { + struct dc *dc = pool->base.oem_device->ctx->dc; + + dc->link_srv->destroy_ddc_service(&pool->base.oem_device); + } +} + + +static bool dcn321_dwbc_create(struct dc_context *ctx, struct resource_pool *pool) +{ + int i; + uint32_t dwb_count = pool->res_cap->num_dwb; + + for (i = 0; i < dwb_count; i++) { + struct dcn30_dwbc *dwbc30 = kzalloc(sizeof(struct dcn30_dwbc), + GFP_KERNEL); + + if (!dwbc30) { + dm_error("DC: failed to create dwbc30!\n"); + return false; + } + +#undef REG_STRUCT +#define REG_STRUCT dwbc30_regs + dwbc_regs_dcn3_init(0); + + dcn30_dwbc_construct(dwbc30, ctx, + &dwbc30_regs[i], + &dwbc30_shift, + &dwbc30_mask, + i); + + pool->dwbc[i] = &dwbc30->base; + } + return true; +} + +static bool dcn321_mmhubbub_create(struct dc_context *ctx, struct resource_pool *pool) +{ + int i; + uint32_t dwb_count = pool->res_cap->num_dwb; + + for (i = 0; i < dwb_count; i++) { + struct dcn30_mmhubbub *mcif_wb30 = kzalloc(sizeof(struct dcn30_mmhubbub), + GFP_KERNEL); + + if (!mcif_wb30) { + dm_error("DC: failed to create mcif_wb30!\n"); + return false; + } + +#undef REG_STRUCT +#define REG_STRUCT mcif_wb30_regs + mcif_wb_regs_dcn3_init(0); + + dcn32_mmhubbub_construct(mcif_wb30, ctx, + &mcif_wb30_regs[i], + &mcif_wb30_shift, + &mcif_wb30_mask, + i); + + pool->mcif_wb[i] = &mcif_wb30->base; + } + return true; +} + +static struct display_stream_compressor *dcn321_dsc_create( + struct dc_context *ctx, uint32_t inst) +{ + struct dcn20_dsc *dsc = + kzalloc(sizeof(struct dcn20_dsc), GFP_KERNEL); + + if (!dsc) { + BREAK_TO_DEBUGGER(); + return NULL; + } + +#undef REG_STRUCT +#define REG_STRUCT dsc_regs + dsc_regsDCN20_init(0), + dsc_regsDCN20_init(1), + dsc_regsDCN20_init(2), + dsc_regsDCN20_init(3); + + dsc2_construct(dsc, ctx, inst, &dsc_regs[inst], &dsc_shift, &dsc_mask); + + dsc->max_image_width = 6016; + + return &dsc->base; +} + +static void dcn321_destroy_resource_pool(struct resource_pool **pool) +{ + struct dcn321_resource_pool *dcn321_pool = TO_DCN321_RES_POOL(*pool); + + dcn321_resource_destruct(dcn321_pool); + kfree(dcn321_pool); + *pool = NULL; +} + +static struct dc_cap_funcs cap_funcs = { + .get_dcc_compression_cap = dcn20_get_dcc_compression_cap, + .get_subvp_en = dcn32_subvp_in_use, +}; + +static void dcn321_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) +{ + DC_FP_START(); + dcn321_update_bw_bounding_box_fpu(dc, bw_params); + DC_FP_END(); +} + +static struct resource_funcs dcn321_res_pool_funcs = { + .destroy = dcn321_destroy_resource_pool, + .link_enc_create = dcn321_link_encoder_create, + .link_enc_create_minimal = NULL, + .panel_cntl_create = dcn32_panel_cntl_create, + .validate_bandwidth = dcn32_validate_bandwidth, + .calculate_wm_and_dlg = dcn32_calculate_wm_and_dlg, + .populate_dml_pipes = dcn32_populate_dml_pipes_from_context, + .acquire_free_pipe_as_secondary_dpp_pipe = dcn32_acquire_free_pipe_as_secondary_dpp_pipe, + .acquire_free_pipe_as_secondary_opp_head = dcn32_acquire_free_pipe_as_secondary_opp_head, + .release_pipe = dcn20_release_pipe, + .add_stream_to_ctx = dcn30_add_stream_to_ctx, + .add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource, + .remove_stream_from_ctx = dcn20_remove_stream_from_ctx, + .populate_dml_writeback_from_context = dcn30_populate_dml_writeback_from_context, + .set_mcif_arb_params = dcn30_set_mcif_arb_params, + .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link, + .acquire_post_bldn_3dlut = dcn32_acquire_post_bldn_3dlut, + .release_post_bldn_3dlut = dcn32_release_post_bldn_3dlut, + .update_bw_bounding_box = dcn321_update_bw_bounding_box, + .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, + .update_soc_for_wm_a = dcn30_update_soc_for_wm_a, + .add_phantom_pipes = dcn32_add_phantom_pipes, + .remove_phantom_pipes = dcn32_remove_phantom_pipes, + .retain_phantom_pipes = dcn32_retain_phantom_pipes, + .save_mall_state = dcn32_save_mall_state, + .restore_mall_state = dcn32_restore_mall_state, +}; + +static uint32_t read_pipe_fuses(struct dc_context *ctx) +{ + uint32_t value = REG_READ(CC_DC_PIPE_DIS); + /* DCN321 support max 4 pipes */ + value = value & 0xf; + return value; +} + + +static bool dcn321_resource_construct( + uint8_t num_virtual_links, + struct dc *dc, + struct dcn321_resource_pool *pool) +{ + int i, j; + struct dc_context *ctx = dc->ctx; + struct irq_service_init_data init_data; + struct ddc_service_init_data ddc_init_data = {0}; + uint32_t pipe_fuses = 0; + uint32_t num_pipes = 4; + +#undef REG_STRUCT +#define REG_STRUCT bios_regs + bios_regs_init(); + +#undef REG_STRUCT +#define REG_STRUCT clk_src_regs + clk_src_regs_init(0, A), + clk_src_regs_init(1, B), + clk_src_regs_init(2, C), + clk_src_regs_init(3, D), + clk_src_regs_init(4, E); + +#undef REG_STRUCT +#define REG_STRUCT abm_regs + abm_regs_init(0), + abm_regs_init(1), + abm_regs_init(2), + abm_regs_init(3); + +#undef REG_STRUCT +#define REG_STRUCT dccg_regs + dccg_regs_init(); + + + ctx->dc_bios->regs = &bios_regs; + + pool->base.res_cap = &res_cap_dcn321; + /* max number of pipes for ASIC before checking for pipe fuses */ + num_pipes = pool->base.res_cap->num_timing_generator; + pipe_fuses = read_pipe_fuses(ctx); + + for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) + if (pipe_fuses & 1 << i) + num_pipes--; + + if (pipe_fuses & 1) + ASSERT(0); //Unexpected - Pipe 0 should always be fully functional! + + if (pipe_fuses & CC_DC_PIPE_DIS__DC_FULL_DIS_MASK) + ASSERT(0); //Entire DCN is harvested! + + /* within dml lib, initial value is hard coded, if ASIC pipe is fused, the + * value will be changed, update max_num_dpp and max_num_otg for dml. + */ + dcn3_21_ip.max_num_dpp = num_pipes; + dcn3_21_ip.max_num_otg = num_pipes; + + pool->base.funcs = &dcn321_res_pool_funcs; + + /************************************************* + * Resource + asic cap harcoding * + *************************************************/ + pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE; + pool->base.timing_generator_count = num_pipes; + pool->base.pipe_count = num_pipes; + pool->base.mpcc_count = num_pipes; + dc->caps.max_downscale_ratio = 600; + dc->caps.i2c_speed_in_khz = 100; + dc->caps.i2c_speed_in_khz_hdcp = 100; /*1.4 w/a applied by default*/ + /* TODO: Bring max cursor size back to 256 after subvp cursor corruption is fixed*/ + dc->caps.max_cursor_size = 64; + dc->caps.min_horizontal_blanking_period = 80; + dc->caps.dmdata_alloc_size = 2048; + dc->caps.mall_size_per_mem_channel = 4; + dc->caps.mall_size_total = 0; + dc->caps.cursor_cache_size = dc->caps.max_cursor_size * dc->caps.max_cursor_size * 8; + dc->caps.cache_line_size = 64; + dc->caps.cache_num_ways = 16; + + /* Calculate the available MALL space */ + dc->caps.max_cab_allocation_bytes = dcn32_calc_num_avail_chans_for_mall( + dc, dc->ctx->dc_bios->vram_info.num_chans) * + dc->caps.mall_size_per_mem_channel * 1024 * 1024; + dc->caps.mall_size_total = dc->caps.max_cab_allocation_bytes; + + dc->caps.subvp_fw_processing_delay_us = 15; + dc->caps.subvp_drr_max_vblank_margin_us = 40; + dc->caps.subvp_prefetch_end_to_mall_start_us = 15; + dc->caps.subvp_swath_height_margin_lines = 16; + dc->caps.subvp_pstate_allow_width_us = 20; + dc->caps.subvp_vertical_int_margin_us = 30; + dc->caps.subvp_drr_vblank_start_margin_us = 100; // 100us margin + dc->caps.max_slave_planes = 2; + dc->caps.max_slave_yuv_planes = 2; + dc->caps.max_slave_rgb_planes = 2; + dc->caps.post_blend_color_processing = true; + dc->caps.force_dp_tps4_for_cp2520 = true; + dc->caps.dp_hpo = true; + dc->caps.dp_hdmi21_pcon_support = true; + dc->caps.edp_dsc_support = true; + dc->caps.extended_aux_timeout_support = true; + dc->caps.dmcub_support = true; + dc->caps.max_v_total = (1 << 15) - 1; + + /* Color pipeline capabilities */ + dc->caps.color.dpp.dcn_arch = 1; + dc->caps.color.dpp.input_lut_shared = 0; + dc->caps.color.dpp.icsc = 1; + dc->caps.color.dpp.dgam_ram = 0; // must use gamma_corr + dc->caps.color.dpp.dgam_rom_caps.srgb = 1; + dc->caps.color.dpp.dgam_rom_caps.bt2020 = 1; + dc->caps.color.dpp.dgam_rom_caps.gamma2_2 = 1; + dc->caps.color.dpp.dgam_rom_caps.pq = 1; + dc->caps.color.dpp.dgam_rom_caps.hlg = 1; + dc->caps.color.dpp.post_csc = 1; + dc->caps.color.dpp.gamma_corr = 1; + dc->caps.color.dpp.dgam_rom_for_yuv = 0; + + dc->caps.color.dpp.hw_3d_lut = 1; + dc->caps.color.dpp.ogam_ram = 1; + // no OGAM ROM on DCN2 and later ASICs + dc->caps.color.dpp.ogam_rom_caps.srgb = 0; + dc->caps.color.dpp.ogam_rom_caps.bt2020 = 0; + dc->caps.color.dpp.ogam_rom_caps.gamma2_2 = 0; + dc->caps.color.dpp.ogam_rom_caps.pq = 0; + dc->caps.color.dpp.ogam_rom_caps.hlg = 0; + dc->caps.color.dpp.ocsc = 0; + + dc->caps.color.mpc.gamut_remap = 1; + dc->caps.color.mpc.num_3dluts = pool->base.res_cap->num_mpc_3dlut; //4, configurable to be before or after BLND in MPCC + dc->caps.color.mpc.ogam_ram = 1; + dc->caps.color.mpc.ogam_rom_caps.srgb = 0; + dc->caps.color.mpc.ogam_rom_caps.bt2020 = 0; + dc->caps.color.mpc.ogam_rom_caps.gamma2_2 = 0; + dc->caps.color.mpc.ogam_rom_caps.pq = 0; + dc->caps.color.mpc.ogam_rom_caps.hlg = 0; + dc->caps.color.mpc.ocsc = 1; + + dc->config.dc_mode_clk_limit_support = true; + /* read VBIOS LTTPR caps */ + { + if (ctx->dc_bios->funcs->get_lttpr_caps) { + enum bp_result bp_query_result; + uint8_t is_vbios_lttpr_enable = 0; + + bp_query_result = ctx->dc_bios->funcs->get_lttpr_caps(ctx->dc_bios, &is_vbios_lttpr_enable); + dc->caps.vbios_lttpr_enable = (bp_query_result == BP_RESULT_OK) && !!is_vbios_lttpr_enable; + } + + /* interop bit is implicit */ + { + dc->caps.vbios_lttpr_aware = true; + } + } + + if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) + dc->debug = debug_defaults_drv; + + // Init the vm_helper + if (dc->vm_helper) + vm_helper_init(dc->vm_helper, 16); + + /************************************************* + * Create resources * + *************************************************/ + + /* Clock Sources for Pixel Clock*/ + pool->base.clock_sources[DCN321_CLK_SRC_PLL0] = + dcn321_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL0, + &clk_src_regs[0], false); + pool->base.clock_sources[DCN321_CLK_SRC_PLL1] = + dcn321_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL1, + &clk_src_regs[1], false); + pool->base.clock_sources[DCN321_CLK_SRC_PLL2] = + dcn321_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL2, + &clk_src_regs[2], false); + pool->base.clock_sources[DCN321_CLK_SRC_PLL3] = + dcn321_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL3, + &clk_src_regs[3], false); + pool->base.clock_sources[DCN321_CLK_SRC_PLL4] = + dcn321_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL4, + &clk_src_regs[4], false); + + pool->base.clk_src_count = DCN321_CLK_SRC_TOTAL; + + /* todo: not reuse phy_pll registers */ + pool->base.dp_clock_source = + dcn321_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_ID_DP_DTO, + &clk_src_regs[0], true); + + for (i = 0; i < pool->base.clk_src_count; i++) { + if (pool->base.clock_sources[i] == NULL) { + dm_error("DC: failed to create clock sources!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + } + + /* DCCG */ + pool->base.dccg = dccg32_create(ctx, &dccg_regs, &dccg_shift, &dccg_mask); + if (pool->base.dccg == NULL) { + dm_error("DC: failed to create dccg!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + + /* DML */ + dml_init_instance(&dc->dml, &dcn3_21_soc, &dcn3_21_ip, DML_PROJECT_DCN32); + + /* IRQ Service */ + init_data.ctx = dc->ctx; + pool->base.irqs = dal_irq_service_dcn32_create(&init_data); + if (!pool->base.irqs) + goto create_fail; + + /* HUBBUB */ + pool->base.hubbub = dcn321_hubbub_create(ctx); + if (pool->base.hubbub == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create hubbub!\n"); + goto create_fail; + } + + /* HUBPs, DPPs, OPPs, TGs, ABMs */ + for (i = 0, j = 0; i < pool->base.res_cap->num_timing_generator; i++) { + + /* if pipe is disabled, skip instance of HW pipe, + * i.e, skip ASIC register instance + */ + if (pipe_fuses & 1 << i) + continue; + + pool->base.hubps[j] = dcn321_hubp_create(ctx, i); + if (pool->base.hubps[j] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create hubps!\n"); + goto create_fail; + } + + pool->base.dpps[j] = dcn321_dpp_create(ctx, i); + if (pool->base.dpps[j] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create dpps!\n"); + goto create_fail; + } + + pool->base.opps[j] = dcn321_opp_create(ctx, i); + if (pool->base.opps[j] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create output pixel processor!\n"); + goto create_fail; + } + + pool->base.timing_generators[j] = dcn321_timing_generator_create( + ctx, i); + if (pool->base.timing_generators[j] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create tg!\n"); + goto create_fail; + } + + pool->base.multiple_abms[j] = dmub_abm_create(ctx, + &abm_regs[i], + &abm_shift, + &abm_mask); + if (pool->base.multiple_abms[j] == NULL) { + dm_error("DC: failed to create abm for pipe %d!\n", i); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + + /* index for resource pool arrays for next valid pipe */ + j++; + } + + /* PSR */ + pool->base.psr = dmub_psr_create(ctx); + if (pool->base.psr == NULL) { + dm_error("DC: failed to create psr obj!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + + /* MPCCs */ + pool->base.mpc = dcn321_mpc_create(ctx, pool->base.res_cap->num_timing_generator, pool->base.res_cap->num_mpc_3dlut); + if (pool->base.mpc == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create mpc!\n"); + goto create_fail; + } + + /* DSCs */ + for (i = 0; i < pool->base.res_cap->num_dsc; i++) { + pool->base.dscs[i] = dcn321_dsc_create(ctx, i); + if (pool->base.dscs[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create display stream compressor %d!\n", i); + goto create_fail; + } + } + + /* DWB */ + if (!dcn321_dwbc_create(ctx, &pool->base)) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create dwbc!\n"); + goto create_fail; + } + + /* MMHUBBUB */ + if (!dcn321_mmhubbub_create(ctx, &pool->base)) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create mcif_wb!\n"); + goto create_fail; + } + + /* AUX and I2C */ + for (i = 0; i < pool->base.res_cap->num_ddc; i++) { + pool->base.engines[i] = dcn321_aux_engine_create(ctx, i); + if (pool->base.engines[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC:failed to create aux engine!!\n"); + goto create_fail; + } + pool->base.hw_i2cs[i] = dcn321_i2c_hw_create(ctx, i); + if (pool->base.hw_i2cs[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC:failed to create hw i2c!!\n"); + goto create_fail; + } + pool->base.sw_i2cs[i] = NULL; + } + + /* Audio, HWSeq, Stream Encoders including HPO and virtual, MPC 3D LUTs */ + if (!resource_construct(num_virtual_links, dc, &pool->base, + &res_create_funcs)) + goto create_fail; + + /* HW Sequencer init functions and Plane caps */ + dcn32_hw_sequencer_init_functions(dc); + + dc->caps.max_planes = pool->base.pipe_count; + + for (i = 0; i < dc->caps.max_planes; ++i) + dc->caps.planes[i] = plane_cap; + + dc->cap_funcs = cap_funcs; + + if (dc->ctx->dc_bios->fw_info.oem_i2c_present) { + ddc_init_data.ctx = dc->ctx; + ddc_init_data.link = NULL; + ddc_init_data.id.id = dc->ctx->dc_bios->fw_info.oem_i2c_obj_id; + ddc_init_data.id.enum_id = 0; + ddc_init_data.id.type = OBJECT_TYPE_GENERIC; + pool->base.oem_device = dc->link_srv->create_ddc_service(&ddc_init_data); + } else { + pool->base.oem_device = NULL; + } + + dc->dml2_options.dcn_pipe_count = pool->base.pipe_count; + dc->dml2_options.use_native_pstate_optimization = false; + dc->dml2_options.use_native_soc_bb_construction = true; + dc->dml2_options.minimize_dispclk_using_odm = true; + + dc->dml2_options.callbacks.dc = dc; + dc->dml2_options.callbacks.build_scaling_params = &resource_build_scaling_params; + dc->dml2_options.callbacks.can_support_mclk_switch_using_fw_based_vblank_stretch = &dcn30_can_support_mclk_switch_using_fw_based_vblank_stretch; + dc->dml2_options.callbacks.acquire_secondary_pipe_for_mpc_odm = &dc_resource_acquire_secondary_pipe_for_mpc_odm_legacy; + dc->dml2_options.callbacks.update_pipes_for_stream_with_slice_count = &resource_update_pipes_for_stream_with_slice_count; + dc->dml2_options.callbacks.update_pipes_for_plane_with_slice_count = &resource_update_pipes_for_plane_with_slice_count; + dc->dml2_options.callbacks.get_mpc_slice_index = &resource_get_mpc_slice_index; + dc->dml2_options.callbacks.get_odm_slice_index = &resource_get_odm_slice_index; + dc->dml2_options.callbacks.get_opp_head = &resource_get_opp_head; + + dc->dml2_options.svp_pstate.callbacks.dc = dc; + dc->dml2_options.svp_pstate.callbacks.add_plane_to_context = &dc_add_plane_to_context; + dc->dml2_options.svp_pstate.callbacks.add_stream_to_ctx = &dc_add_stream_to_ctx; + dc->dml2_options.svp_pstate.callbacks.build_scaling_params = &resource_build_scaling_params; + dc->dml2_options.svp_pstate.callbacks.create_plane = &dc_create_plane_state; + dc->dml2_options.svp_pstate.callbacks.remove_plane_from_context = &dc_remove_plane_from_context; + dc->dml2_options.svp_pstate.callbacks.remove_stream_from_ctx = &dc_remove_stream_from_ctx; + dc->dml2_options.svp_pstate.callbacks.create_stream_for_sink = &dc_create_stream_for_sink; + dc->dml2_options.svp_pstate.callbacks.plane_state_release = &dc_plane_state_release; + dc->dml2_options.svp_pstate.callbacks.stream_release = &dc_stream_release; + dc->dml2_options.svp_pstate.callbacks.release_dsc = &dcn20_release_dsc; + + dc->dml2_options.svp_pstate.subvp_fw_processing_delay_us = dc->caps.subvp_fw_processing_delay_us; + dc->dml2_options.svp_pstate.subvp_prefetch_end_to_mall_start_us = dc->caps.subvp_prefetch_end_to_mall_start_us; + dc->dml2_options.svp_pstate.subvp_pstate_allow_width_us = dc->caps.subvp_pstate_allow_width_us; + dc->dml2_options.svp_pstate.subvp_swath_height_margin_lines = dc->caps.subvp_swath_height_margin_lines; + + dc->dml2_options.svp_pstate.force_disable_subvp = dc->debug.force_disable_subvp; + dc->dml2_options.svp_pstate.force_enable_subvp = dc->debug.force_subvp_mclk_switch; + + dc->dml2_options.mall_cfg.cache_line_size_bytes = dc->caps.cache_line_size; + dc->dml2_options.mall_cfg.cache_num_ways = dc->caps.cache_num_ways; + dc->dml2_options.mall_cfg.max_cab_allocation_bytes = dc->caps.max_cab_allocation_bytes; + dc->dml2_options.mall_cfg.mblk_height_4bpe_pixels = DCN3_2_MBLK_HEIGHT_4BPE; + dc->dml2_options.mall_cfg.mblk_height_8bpe_pixels = DCN3_2_MBLK_HEIGHT_8BPE; + dc->dml2_options.mall_cfg.mblk_size_bytes = DCN3_2_MALL_MBLK_SIZE_BYTES; + dc->dml2_options.mall_cfg.mblk_width_pixels = DCN3_2_MBLK_WIDTH; + + dc->dml2_options.max_segments_per_hubp = 18; + dc->dml2_options.det_segment_size = DCN3_2_DET_SEG_SIZE; + + return true; + +create_fail: + + dcn321_resource_destruct(pool); + + return false; +} + +struct resource_pool *dcn321_create_resource_pool( + const struct dc_init_data *init_data, + struct dc *dc) +{ + struct dcn321_resource_pool *pool = + kzalloc(sizeof(struct dcn321_resource_pool), GFP_KERNEL); + + if (!pool) + return NULL; + + if (dcn321_resource_construct(init_data->num_virtual_links, dc, pool)) + return &pool->base; + + BREAK_TO_DEBUGGER(); + kfree(pool); + return NULL; +} diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.h new file mode 100644 index 000000000000..82cbf009f2d3 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.h @@ -0,0 +1,45 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef _DCN321_RESOURCE_H_ +#define _DCN321_RESOURCE_H_ + +#include "core_types.h" + +#define TO_DCN321_RES_POOL(pool)\ + container_of(pool, struct dcn321_resource_pool, base) + +extern struct _vcs_dpi_ip_params_st dcn3_21_ip; +extern struct _vcs_dpi_soc_bounding_box_st dcn3_21_soc; + +struct dcn321_resource_pool { + struct resource_pool base; +}; + +struct resource_pool *dcn321_create_resource_pool( + const struct dc_init_data *init_data, + struct dc *dc); + +#endif /* _DCN321_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c new file mode 100644 index 000000000000..a4027bf053c5 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c @@ -0,0 +1,2141 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dm_services.h" +#include "dc.h" + +#include "dcn31/dcn31_init.h" +#include "dcn35/dcn35_init.h" + +#include "resource.h" +#include "include/irq_service_interface.h" +#include "dcn35_resource.h" +#include "dml2/dml2_wrapper.h" + +#include "dcn20/dcn20_resource.h" +#include "dcn30/dcn30_resource.h" +#include "dcn31/dcn31_resource.h" +#include "dcn32/dcn32_resource.h" + +#include "dcn10/dcn10_ipp.h" +#include "dcn30/dcn30_hubbub.h" +#include "dcn31/dcn31_hubbub.h" +#include "dcn35/dcn35_hubbub.h" +#include "dcn32/dcn32_mpc.h" +#include "dcn35/dcn35_hubp.h" +#include "irq/dcn35/irq_service_dcn35.h" +#include "dcn35/dcn35_dpp.h" +#include "dcn35/dcn35_optc.h" +#include "dcn20/dcn20_hwseq.h" +#include "dcn30/dcn30_hwseq.h" +#include "dce110/dce110_hwseq.h" +#include "dcn35/dcn35_opp.h" +#include "dcn35/dcn35_dsc.h" +#include "dcn30/dcn30_vpg.h" +#include "dcn30/dcn30_afmt.h" +#include "dcn31/dcn31_dio_link_encoder.h" +#include "dcn35/dcn35_dio_stream_encoder.h" +#include "dcn31/dcn31_hpo_dp_stream_encoder.h" +#include "dcn31/dcn31_hpo_dp_link_encoder.h" +#include "dcn32/dcn32_hpo_dp_link_encoder.h" +#include "link.h" +#include "dcn31/dcn31_apg.h" +#include "dcn32/dcn32_dio_link_encoder.h" +#include "dcn31/dcn31_vpg.h" +#include "dcn31/dcn31_afmt.h" +#include "dce/dce_clock_source.h" +#include "dce/dce_audio.h" +#include "dce/dce_hwseq.h" +#include "clk_mgr.h" +#include "virtual/virtual_stream_encoder.h" +#include "dce110/dce110_resource.h" +#include "dml/display_mode_vba.h" +#include "dcn35/dcn35_dccg.h" +#include "dcn35/dcn35_pg_cntl.h" +#include "dcn10/dcn10_resource.h" +#include "dcn31/dcn31_panel_cntl.h" +#include "dcn35/dcn35_hwseq.h" +#include "dcn35/dcn35_dio_link_encoder.h" +#include "dml/dcn31/dcn31_fpu.h" /*todo*/ +#include "dml/dcn35/dcn35_fpu.h" +#include "dcn35/dcn35_dwb.h" +#include "dcn35/dcn35_mmhubbub.h" + +#include "dcn/dcn_3_5_0_offset.h" +#include "dcn/dcn_3_5_0_sh_mask.h" +#include "nbio/nbio_7_11_0_offset.h" +#include "mmhub/mmhub_3_3_0_offset.h" +#include "mmhub/mmhub_3_3_0_sh_mask.h" + +#define DSCC0_DSCC_CONFIG0__ICH_RESET_AT_END_OF_LINE__SHIFT 0x0 +#define DSCC0_DSCC_CONFIG0__ICH_RESET_AT_END_OF_LINE_MASK 0x0000000FL + +#include "reg_helper.h" +#include "dce/dmub_abm.h" +#include "dce/dmub_psr.h" +#include "dce/dce_aux.h" +#include "dce/dce_i2c.h" +#include "dml/dcn31/display_mode_vba_31.h" /*temp*/ +#include "vm_helper.h" +#include "dcn20/dcn20_vmid.h" + +#include "link_enc_cfg.h" +#define DC_LOGGER_INIT(logger) + +enum dcn35_clk_src_array_id { + DCN35_CLK_SRC_PLL0, + DCN35_CLK_SRC_PLL1, + DCN35_CLK_SRC_PLL2, + DCN35_CLK_SRC_PLL3, + DCN35_CLK_SRC_PLL4, + DCN35_CLK_SRC_TOTAL +}; + +/* begin ********************* + * macros to expend register list macro defined in HW object header file + */ + +/* DCN */ +/* TODO awful hack. fixup dcn20_dwb.h */ +#undef BASE_INNER +#define BASE_INNER(seg) ctx->dcn_reg_offsets[seg] + +#define BASE(seg) BASE_INNER(seg) + +#define SR(reg_name)\ + REG_STRUCT.reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ + reg ## reg_name + +#define SR_ARR(reg_name, id) \ + REG_STRUCT[id].reg_name = BASE(reg##reg_name##_BASE_IDX) + reg##reg_name + +#define SR_ARR_INIT(reg_name, id, value) \ + REG_STRUCT[id].reg_name = value + +#define SRI(reg_name, block, id)\ + REG_STRUCT.reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SRI_ARR(reg_name, block, id)\ + REG_STRUCT[id].reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SR_ARR_I2C(reg_name, id) \ + REG_STRUCT[id-1].reg_name = BASE(reg##reg_name##_BASE_IDX) + reg##reg_name + +#define SRI_ARR_I2C(reg_name, block, id)\ + REG_STRUCT[id-1].reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SRI_ARR_ALPHABET(reg_name, block, index, id)\ + REG_STRUCT[index].reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SRI2(reg_name, block, id)\ + .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ + reg ## reg_name + +#define SRI2_ARR(reg_name, block, id)\ + REG_STRUCT[id].reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ + reg ## reg_name + +#define SRIR(var_name, reg_name, block, id)\ + .var_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SRII(reg_name, block, id)\ + REG_STRUCT.reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SRII_ARR_2(reg_name, block, id, inst)\ + REG_STRUCT[inst].reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SRII_MPC_RMU(reg_name, block, id)\ + .RMU##_##reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define SRII_DWB(reg_name, temp_name, block, id)\ + REG_STRUCT.reg_name[id] = BASE(reg ## block ## id ## _ ## temp_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## temp_name + +#define SF_DWB2(reg_name, block, id, field_name, post_fix) \ + .field_name = reg_name ## __ ## field_name ## post_fix + +#define DCCG_SRII(reg_name, block, id)\ + REG_STRUCT.block ## _ ## reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ + reg ## block ## id ## _ ## reg_name + +#define VUPDATE_SRII(reg_name, block, id)\ + REG_STRUCT.reg_name[id] = BASE(reg ## reg_name ## _ ## block ## id ## _BASE_IDX) + \ + reg ## reg_name ## _ ## block ## id + +/* NBIO */ +#define NBIO_BASE_INNER(seg) ctx->nbio_reg_offsets[seg] + +#define NBIO_BASE(seg) \ + NBIO_BASE_INNER(seg) + +#define NBIO_SR(reg_name)\ + REG_STRUCT.reg_name = NBIO_BASE(regBIF_BX2_ ## reg_name ## _BASE_IDX) + \ + regBIF_BX2_ ## reg_name + +#define NBIO_SR_ARR(reg_name, id)\ + REG_STRUCT[id].reg_name = NBIO_BASE(regBIF_BX2_ ## reg_name ## _BASE_IDX) + \ + regBIF_BX2_ ## reg_name + +#define bios_regs_init() \ + ( \ + NBIO_SR(BIOS_SCRATCH_3),\ + NBIO_SR(BIOS_SCRATCH_6)\ + ) + +static struct bios_registers bios_regs; + +#define clk_src_regs_init(index, pllid)\ + CS_COMMON_REG_LIST_DCN3_0_RI(index, pllid) + +static struct dce110_clk_src_regs clk_src_regs[5]; + +static const struct dce110_clk_src_shift cs_shift = { + CS_COMMON_MASK_SH_LIST_DCN3_1_4(__SHIFT) +}; + +static const struct dce110_clk_src_mask cs_mask = { + CS_COMMON_MASK_SH_LIST_DCN3_1_4(_MASK) +}; + +#define abm_regs_init(id)\ + ABM_DCN32_REG_LIST_RI(id) + +static struct dce_abm_registers abm_regs[4]; + +static const struct dce_abm_shift abm_shift = { + ABM_MASK_SH_LIST_DCN35(__SHIFT) +}; + +static const struct dce_abm_mask abm_mask = { + ABM_MASK_SH_LIST_DCN35(_MASK) +}; + +#define audio_regs_init(id)\ + AUD_COMMON_REG_LIST_RI(id) + +static struct dce_audio_registers audio_regs[7]; + + +#define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\ + SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_INDEX, AZALIA_ENDPOINT_REG_INDEX, mask_sh),\ + SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_DATA, AZALIA_ENDPOINT_REG_DATA, mask_sh),\ + AUD_COMMON_MASK_SH_LIST_BASE(mask_sh) + +static const struct dce_audio_shift audio_shift = { + DCE120_AUD_COMMON_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_audio_mask audio_mask = { + DCE120_AUD_COMMON_MASK_SH_LIST(_MASK) +}; + +#define vpg_regs_init(id)\ + VPG_DCN31_REG_LIST_RI(id) + +static struct dcn31_vpg_registers vpg_regs[10]; + +static const struct dcn31_vpg_shift vpg_shift = { + DCN31_VPG_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn31_vpg_mask vpg_mask = { + DCN31_VPG_MASK_SH_LIST(_MASK) +}; + +#define afmt_regs_init(id)\ + AFMT_DCN31_REG_LIST_RI(id) + +static struct dcn31_afmt_registers afmt_regs[6]; + +static const struct dcn31_afmt_shift afmt_shift = { + DCN31_AFMT_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn31_afmt_mask afmt_mask = { + DCN31_AFMT_MASK_SH_LIST(_MASK) +}; + +#define apg_regs_init(id)\ + APG_DCN31_REG_LIST_RI(id) + +static struct dcn31_apg_registers apg_regs[4]; + +static const struct dcn31_apg_shift apg_shift = { + DCN31_APG_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn31_apg_mask apg_mask = { + DCN31_APG_MASK_SH_LIST(_MASK) +}; + +#define stream_enc_regs_init(id)\ + SE_DCN35_REG_LIST_RI(id) + +static struct dcn10_stream_enc_registers stream_enc_regs[5]; + +static const struct dcn10_stream_encoder_shift se_shift = { + SE_COMMON_MASK_SH_LIST_DCN35(__SHIFT) +}; + +static const struct dcn10_stream_encoder_mask se_mask = { + SE_COMMON_MASK_SH_LIST_DCN35(_MASK) +}; + +#define aux_regs_init(id)\ + DCN2_AUX_REG_LIST_RI(id) + +static struct dcn10_link_enc_aux_registers link_enc_aux_regs[5]; + +#define hpd_regs_init(id)\ + HPD_REG_LIST_RI(id) + +static struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[5]; + + +static const struct dce110_aux_registers_shift aux_shift = { + DCN_AUX_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce110_aux_registers_mask aux_mask = { + DCN_AUX_MASK_SH_LIST(_MASK) +}; + +#define link_regs_init(id, phyid)\ + ( \ + LE_DCN35_REG_LIST_RI(id), \ + UNIPHY_DCN2_REG_LIST_RI(id, phyid)\ + ) + +static struct dcn10_link_enc_registers link_enc_regs[5]; + +static const struct dcn10_link_enc_shift le_shift = { + LINK_ENCODER_MASK_SH_LIST_DCN35(__SHIFT), \ + //DPCS_DCN31_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn10_link_enc_mask le_mask = { + LINK_ENCODER_MASK_SH_LIST_DCN35(_MASK), \ + //DPCS_DCN31_MASK_SH_LIST(_MASK) +}; + +#define hpo_dp_stream_encoder_reg_init(id)\ + DCN3_1_HPO_DP_STREAM_ENC_REG_LIST_RI(id) + +static struct dcn31_hpo_dp_stream_encoder_registers hpo_dp_stream_enc_regs[4]; + +static const struct dcn31_hpo_dp_stream_encoder_shift hpo_dp_se_shift = { + DCN3_1_HPO_DP_STREAM_ENC_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn31_hpo_dp_stream_encoder_mask hpo_dp_se_mask = { + DCN3_1_HPO_DP_STREAM_ENC_MASK_SH_LIST(_MASK) +}; + +#define hpo_dp_link_encoder_reg_init(id)\ + DCN3_1_HPO_DP_LINK_ENC_REG_LIST_RI(id) + /*DCN3_1_RDPCSTX_REG_LIST(0),*/ + /*DCN3_1_RDPCSTX_REG_LIST(1),*/ + /*DCN3_1_RDPCSTX_REG_LIST(2),*/ + /*DCN3_1_RDPCSTX_REG_LIST(3),*/ + +static struct dcn31_hpo_dp_link_encoder_registers hpo_dp_link_enc_regs[2]; + +static const struct dcn31_hpo_dp_link_encoder_shift hpo_dp_le_shift = { + DCN3_1_HPO_DP_LINK_ENC_COMMON_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn31_hpo_dp_link_encoder_mask hpo_dp_le_mask = { + DCN3_1_HPO_DP_LINK_ENC_COMMON_MASK_SH_LIST(_MASK) +}; + +#define dpp_regs_init(id)\ + DPP_REG_LIST_DCN35_RI(id) + +static struct dcn3_dpp_registers dpp_regs[4]; + +static const struct dcn35_dpp_shift tf_shift = { + DPP_REG_LIST_SH_MASK_DCN35(__SHIFT) +}; + +static const struct dcn35_dpp_mask tf_mask = { + DPP_REG_LIST_SH_MASK_DCN35(_MASK) +}; + +#define opp_regs_init(id)\ + OPP_REG_LIST_DCN35_RI(id) + +static struct dcn35_opp_registers opp_regs[4]; + +static const struct dcn35_opp_shift opp_shift = { + OPP_MASK_SH_LIST_DCN35(__SHIFT) +}; + +static const struct dcn35_opp_mask opp_mask = { + OPP_MASK_SH_LIST_DCN35(_MASK) +}; + +#define aux_engine_regs_init(id)\ + ( \ + AUX_COMMON_REG_LIST0_RI(id), \ + SR_ARR_INIT(AUXN_IMPCAL, id, 0), \ + SR_ARR_INIT(AUXP_IMPCAL, id, 0), \ + SR_ARR_INIT(AUX_RESET_MASK, id, DP_AUX0_AUX_CONTROL__AUX_RESET_MASK) \ + ) + +static struct dce110_aux_registers aux_engine_regs[5]; + +#define dwbc_regs_dcn3_init(id)\ + DWBC_COMMON_REG_LIST_DCN30_RI(id) + +static struct dcn30_dwbc_registers dwbc35_regs[1]; + +static const struct dcn35_dwbc_shift dwbc35_shift = { + DWBC_COMMON_MASK_SH_LIST_DCN35(__SHIFT) +}; + +static const struct dcn35_dwbc_mask dwbc35_mask = { + DWBC_COMMON_MASK_SH_LIST_DCN35(_MASK) +}; + +#define mcif_wb_regs_dcn3_init(id)\ + MCIF_WB_COMMON_REG_LIST_DCN3_5_RI(id) + +static struct dcn35_mmhubbub_registers mcif_wb35_regs[1]; + +static const struct dcn35_mmhubbub_shift mcif_wb35_shift = { + MCIF_WB_COMMON_MASK_SH_LIST_DCN3_5(__SHIFT) +}; + +static const struct dcn35_mmhubbub_mask mcif_wb35_mask = { + MCIF_WB_COMMON_MASK_SH_LIST_DCN3_5(_MASK) +}; + +#define dsc_regsDCN35_init(id)\ + DSC_REG_LIST_DCN20_RI(id) + +static struct dcn20_dsc_registers dsc_regs[4]; + +static const struct dcn35_dsc_shift dsc_shift = { + DSC_REG_LIST_SH_MASK_DCN35(__SHIFT) +}; + +static const struct dcn35_dsc_mask dsc_mask = { + DSC_REG_LIST_SH_MASK_DCN35(_MASK) +}; + +static struct dcn30_mpc_registers mpc_regs; + +#define dcn_mpc_regs_init() \ + MPC_REG_LIST_DCN3_2_RI(0),\ + MPC_REG_LIST_DCN3_2_RI(1),\ + MPC_REG_LIST_DCN3_2_RI(2),\ + MPC_REG_LIST_DCN3_2_RI(3),\ + MPC_OUT_MUX_REG_LIST_DCN3_0_RI(0),\ + MPC_OUT_MUX_REG_LIST_DCN3_0_RI(1),\ + MPC_OUT_MUX_REG_LIST_DCN3_0_RI(2),\ + MPC_OUT_MUX_REG_LIST_DCN3_0_RI(3),\ + MPC_DWB_MUX_REG_LIST_DCN3_0_RI(0) + +static const struct dcn30_mpc_shift mpc_shift = { + MPC_COMMON_MASK_SH_LIST_DCN32(__SHIFT) +}; + +static const struct dcn30_mpc_mask mpc_mask = { + MPC_COMMON_MASK_SH_LIST_DCN32(_MASK) +}; + +#define optc_regs_init(id)\ + OPTC_COMMON_REG_LIST_DCN3_5_RI(id) + +static struct dcn_optc_registers optc_regs[4]; + +static const struct dcn_optc_shift optc_shift = { + OPTC_COMMON_MASK_SH_LIST_DCN3_5(__SHIFT) +}; + +static const struct dcn_optc_mask optc_mask = { + OPTC_COMMON_MASK_SH_LIST_DCN3_5(_MASK) +}; + +#define hubp_regs_init(id)\ + HUBP_REG_LIST_DCN30_RI(id) + +static struct dcn_hubp2_registers hubp_regs[4]; + + +static const struct dcn35_hubp2_shift hubp_shift = { + HUBP_MASK_SH_LIST_DCN35(__SHIFT) +}; + +static const struct dcn35_hubp2_mask hubp_mask = { + HUBP_MASK_SH_LIST_DCN35(_MASK) +}; + +static struct dcn_hubbub_registers hubbub_reg; + +#define hubbub_reg_init()\ + HUBBUB_REG_LIST_DCN35(0) + +static const struct dcn_hubbub_shift hubbub_shift = { + HUBBUB_MASK_SH_LIST_DCN35(__SHIFT) +}; + +static const struct dcn_hubbub_mask hubbub_mask = { + HUBBUB_MASK_SH_LIST_DCN35(_MASK) +}; + +static struct dccg_registers dccg_regs; + +#define dccg_regs_init()\ + DCCG_REG_LIST_DCN35() + +static const struct dccg_shift dccg_shift = { + DCCG_MASK_SH_LIST_DCN35(__SHIFT) +}; + +static const struct dccg_mask dccg_mask = { + DCCG_MASK_SH_LIST_DCN35(_MASK) +}; + +static struct pg_cntl_registers pg_cntl_regs; + +#define pg_cntl_dcn35_regs_init() \ + PG_CNTL_REG_LIST_DCN35() + +static const struct pg_cntl_shift pg_cntl_shift = { + PG_CNTL_MASK_SH_LIST_DCN35(__SHIFT) +}; + +static const struct pg_cntl_mask pg_cntl_mask = { + PG_CNTL_MASK_SH_LIST_DCN35(_MASK) +}; + +#define SRII2(reg_name_pre, reg_name_post, id)\ + .reg_name_pre ## _ ## reg_name_post[id] = BASE(reg ## reg_name_pre \ + ## id ## _ ## reg_name_post ## _BASE_IDX) + \ + reg ## reg_name_pre ## id ## _ ## reg_name_post + +static struct dce_hwseq_registers hwseq_reg; + +#define hwseq_reg_init()\ + HWSEQ_DCN35_REG_LIST() + +#define HWSEQ_DCN35_MASK_SH_LIST(mask_sh)\ + HWSEQ_DCN_MASK_SH_LIST(mask_sh), \ + HWS_SF(, DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_REFDIV, mask_sh), \ + HWS_SF(, DCHUBBUB_ARB_HOSTVM_CNTL, DISABLE_HOSTVM_FORCE_ALLOW_PSTATE, mask_sh), \ + HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN1_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN1_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN2_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN2_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN3_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN3_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN16_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN16_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN17_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN17_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN18_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN18_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN19_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN19_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN22_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN22_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN23_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN23_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN24_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN24_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN25_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ + HWS_SF(, DOMAIN25_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ + HWS_SF(, DOMAIN0_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN1_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN2_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN3_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN16_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN17_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN18_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN19_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN22_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN23_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN24_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DOMAIN25_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \ + HWS_SF(, DC_IP_REQUEST_CNTL, IP_REQUEST_EN, mask_sh), \ + HWS_SF(, AZALIA_AUDIO_DTO, AZALIA_AUDIO_DTO_MODULE, mask_sh), \ + HWS_SF(, HPO_TOP_CLOCK_CONTROL, HPO_HDMISTREAMCLK_G_GATE_DIS, mask_sh), \ + HWS_SF(, ODM_MEM_PWR_CTRL3, ODM_MEM_UNASSIGNED_PWR_MODE, mask_sh), \ + HWS_SF(, ODM_MEM_PWR_CTRL3, ODM_MEM_VBLANK_PWR_MODE, mask_sh), \ + HWS_SF(, DIO_MEM_PWR_CTRL, I2C_LIGHT_SLEEP_FORCE, mask_sh), \ + HWS_SF(, HPO_TOP_HW_CONTROL, HPO_IO_EN, mask_sh),\ + HWS_SF(, DMU_CLK_CNTL, DISPCLK_R_DMU_GATE_DIS, mask_sh),\ + HWS_SF(, DMU_CLK_CNTL, DISPCLK_G_RBBMIF_GATE_DIS, mask_sh),\ + HWS_SF(, DMU_CLK_CNTL, RBBMIF_FGCG_REP_DIS, mask_sh),\ + HWS_SF(, DMU_CLK_CNTL, DPREFCLK_ALLOW_DS_CLKSTOP, mask_sh),\ + HWS_SF(, DMU_CLK_CNTL, DISPCLK_ALLOW_DS_CLKSTOP, mask_sh),\ + HWS_SF(, DMU_CLK_CNTL, DPPCLK_ALLOW_DS_CLKSTOP, mask_sh),\ + HWS_SF(, DMU_CLK_CNTL, DTBCLK_ALLOW_DS_CLKSTOP, mask_sh),\ + HWS_SF(, DMU_CLK_CNTL, DCFCLK_ALLOW_DS_CLKSTOP, mask_sh),\ + HWS_SF(, DMU_CLK_CNTL, DPIACLK_ALLOW_DS_CLKSTOP, mask_sh),\ + HWS_SF(, DMU_CLK_CNTL, LONO_FGCG_REP_DIS, mask_sh),\ + HWS_SF(, DMU_CLK_CNTL, LONO_DISPCLK_GATE_DISABLE, mask_sh),\ + HWS_SF(, DMU_CLK_CNTL, LONO_SOCCLK_GATE_DISABLE, mask_sh),\ + HWS_SF(, DMU_CLK_CNTL, LONO_DMCUBCLK_GATE_DISABLE, mask_sh),\ + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKA_FE_GATE_DISABLE, mask_sh), \ + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKB_FE_GATE_DISABLE, mask_sh), \ + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKC_FE_GATE_DISABLE, mask_sh), \ + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKD_FE_GATE_DISABLE, mask_sh), \ + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKE_FE_GATE_DISABLE, mask_sh), \ + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, HDMICHARCLK0_GATE_DISABLE, mask_sh), \ + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKA_GATE_DISABLE, mask_sh), \ + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKB_GATE_DISABLE, mask_sh), \ + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKC_GATE_DISABLE, mask_sh), \ + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKD_GATE_DISABLE, mask_sh), \ + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKE_GATE_DISABLE, mask_sh), \ + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYASYMCLK_ROOT_GATE_DISABLE, mask_sh), \ + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYBSYMCLK_ROOT_GATE_DISABLE, mask_sh), \ + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYCSYMCLK_ROOT_GATE_DISABLE, mask_sh), \ + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYDSYMCLK_ROOT_GATE_DISABLE, mask_sh), \ + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYESYMCLK_ROOT_GATE_DISABLE, mask_sh) + +static const struct dce_hwseq_shift hwseq_shift = { + HWSEQ_DCN35_MASK_SH_LIST(__SHIFT) +}; + +static const struct dce_hwseq_mask hwseq_mask = { + HWSEQ_DCN35_MASK_SH_LIST(_MASK) +}; + +#define vmid_regs_init(id)\ + DCN20_VMID_REG_LIST_RI(id) + +static struct dcn_vmid_registers vmid_regs[16]; + +static const struct dcn20_vmid_shift vmid_shifts = { + DCN20_VMID_MASK_SH_LIST(__SHIFT) +}; + +static const struct dcn20_vmid_mask vmid_masks = { + DCN20_VMID_MASK_SH_LIST(_MASK) +}; + +static const struct resource_caps res_cap_dcn35 = { + .num_timing_generator = 4, + .num_opp = 4, + .num_video_plane = 4, + .num_audio = 5, + .num_stream_encoder = 5, + .num_dig_link_enc = 5, + .num_hpo_dp_stream_encoder = 4, + .num_hpo_dp_link_encoder = 2, + .num_pll = 4,/*1 c10 edp, 3xc20 combo PHY*/ + .num_dwb = 1, + .num_ddc = 5, + .num_vmid = 16, + .num_mpc_3dlut = 2, + .num_dsc = 4, +}; + +static const struct dc_plane_cap plane_cap = { + .type = DC_PLANE_TYPE_DCN_UNIVERSAL, + .per_pixel_alpha = true, + + .pixel_format_support = { + .argb8888 = true, + .nv12 = true, + .fp16 = true, + .p010 = true, + .ayuv = false, + }, + + .max_upscale_factor = { + .argb8888 = 16000, + .nv12 = 16000, + .fp16 = 16000 + }, + + // 6:1 downscaling ratio: 1000/6 = 166.666 + .max_downscale_factor = { + .argb8888 = 167, + .nv12 = 167, + .fp16 = 167 + }, + 64, + 64 +}; + +static const struct dc_debug_options debug_defaults_drv = { + .disable_dmcu = true, + .force_abm_enable = false, + .timing_trace = false, + .clock_trace = true, + .disable_pplib_clock_request = false, + .pipe_split_policy = MPC_SPLIT_AVOID, + .force_single_disp_pipe_split = false, + .disable_dcc = DCC_ENABLE, + .disable_dpp_power_gate = true, + .disable_hubp_power_gate = true, + .disable_clock_gate = true, + .disable_dsc_power_gate = true, + .vsr_support = true, + .performance_trace = false, + .max_downscale_src_width = 4096,/*upto true 4k*/ + .disable_pplib_wm_range = false, + .scl_reset_length10 = true, + .sanity_checks = false, + .underflow_assert_delay_us = 0xFFFFFFFF, + .dwb_fi_phase = -1, // -1 = disable, + .dmub_command_table = true, + .pstate_enabled = true, + .use_max_lb = true, + .enable_mem_low_power = { + .bits = { + .vga = false, + .i2c = true, + .dmcu = false, // This is previously known to cause hang on S3 cycles if enabled + .dscl = true, + .cm = true, + .mpc = true, + .optc = true, + .vpg = true, + .afmt = true, + } + }, + .root_clock_optimization = { + .bits = { + .dpp = true, + .dsc = true,/*dscclk and dsc pg*/ + .hdmistream = true, + .hdmichar = true, + .dpstream = true, + .symclk32_se = true, + .symclk32_le = true, + .symclk_fe = true, + .physymclk = true, + .dpiasymclk = true, + } + }, + .seamless_boot_odm_combine = DML_FAIL_SOURCE_PIXEL_FORMAT, + .enable_z9_disable_interface = true, /* Allow support for the PMFW interface for disable Z9*/ + .using_dml2 = true, + .support_eDP1_5 = true, + .enable_hpo_pg_support = false, + .enable_legacy_fast_update = true, + .enable_single_display_2to1_odm_policy = false, + .disable_idle_power_optimizations = true, + .dmcub_emulation = false, + .disable_boot_optimizations = false, + .disable_unbounded_requesting = false, + .disable_mem_low_power = false, + //must match enable_single_display_2to1_odm_policy to support dynamic ODM transitions + .enable_double_buffered_dsc_pg_support = true, + .enable_dp_dig_pixel_rate_div_policy = 1, + .disable_z10 = false, + .ignore_pg = true, + .psp_disabled_wa = true, + .ips2_eval_delay_us = 200, + .ips2_entry_delay_us = 400 +}; + +static const struct dc_panel_config panel_config_defaults = { + .psr = { + .disable_psr = false, + .disallow_psrsu = false, + }, + .ilr = { + .optimize_edp_link_rate = true, + }, +}; + +static void dcn35_dpp_destroy(struct dpp **dpp) +{ + kfree(TO_DCN20_DPP(*dpp)); + *dpp = NULL; +} + +static struct dpp *dcn35_dpp_create(struct dc_context *ctx, uint32_t inst) +{ + struct dcn3_dpp *dpp = kzalloc(sizeof(struct dcn3_dpp), GFP_KERNEL); + bool success = (dpp != NULL); + + if (!success) + return NULL; + +#undef REG_STRUCT +#define REG_STRUCT dpp_regs + dpp_regs_init(0), + dpp_regs_init(1), + dpp_regs_init(2), + dpp_regs_init(3); + + success = dpp35_construct(dpp, ctx, inst, &dpp_regs[inst], &tf_shift, + &tf_mask); + if (success) { + dpp35_set_fgcg( + dpp, + ctx->dc->debug.enable_fine_grain_clock_gating.bits.dpp); + return &dpp->base; + } + + BREAK_TO_DEBUGGER(); + kfree(dpp); + return NULL; +} + +static struct output_pixel_processor *dcn35_opp_create( + struct dc_context *ctx, uint32_t inst) +{ + struct dcn20_opp *opp = + kzalloc(sizeof(struct dcn20_opp), GFP_KERNEL); + + if (!opp) { + BREAK_TO_DEBUGGER(); + return NULL; + } + +#undef REG_STRUCT +#define REG_STRUCT opp_regs + opp_regs_init(0), + opp_regs_init(1), + opp_regs_init(2), + opp_regs_init(3); + + dcn35_opp_construct(opp, ctx, inst, + &opp_regs[inst], &opp_shift, &opp_mask); + + dcn35_opp_set_fgcg(opp, ctx->dc->debug.enable_fine_grain_clock_gating.bits.opp); + + return &opp->base; +} + +static struct dce_aux *dcn31_aux_engine_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct aux_engine_dce110 *aux_engine = + kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL); + + if (!aux_engine) + return NULL; + +#undef REG_STRUCT +#define REG_STRUCT aux_engine_regs + aux_engine_regs_init(0), + aux_engine_regs_init(1), + aux_engine_regs_init(2), + aux_engine_regs_init(3), + aux_engine_regs_init(4); + + dce110_aux_engine_construct(aux_engine, ctx, inst, + SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD, + &aux_engine_regs[inst], + &aux_mask, + &aux_shift, + ctx->dc->caps.extended_aux_timeout_support); + + return &aux_engine->base; +} + +#define i2c_inst_regs_init(id)\ + I2C_HW_ENGINE_COMMON_REG_LIST_DCN30_RI(id) + +static struct dce_i2c_registers i2c_hw_regs[5]; + +static const struct dce_i2c_shift i2c_shifts = { + I2C_COMMON_MASK_SH_LIST_DCN35(__SHIFT) +}; + +static const struct dce_i2c_mask i2c_masks = { + I2C_COMMON_MASK_SH_LIST_DCN35(_MASK) +}; + +/* ========================================================== */ + +/* + * DPIA index | Preferred Encoder | Host Router + * 0 | C | 0 + * 1 | First Available | 0 + * 2 | D | 1 + * 3 | First Available | 1 + */ +/* ========================================================== */ +static const enum engine_id dpia_to_preferred_enc_id_table[] = { + ENGINE_ID_DIGC, + ENGINE_ID_DIGC, + ENGINE_ID_DIGD, + ENGINE_ID_DIGD +}; + +static enum engine_id dcn35_get_preferred_eng_id_dpia(unsigned int dpia_index) +{ + return dpia_to_preferred_enc_id_table[dpia_index]; +} + +static struct dce_i2c_hw *dcn31_i2c_hw_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dce_i2c_hw *dce_i2c_hw = + kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL); + + if (!dce_i2c_hw) + return NULL; + +#undef REG_STRUCT +#define REG_STRUCT i2c_hw_regs + i2c_inst_regs_init(1), + i2c_inst_regs_init(2), + i2c_inst_regs_init(3), + i2c_inst_regs_init(4), + i2c_inst_regs_init(5); + + dcn2_i2c_hw_construct(dce_i2c_hw, ctx, inst, + &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks); + + return dce_i2c_hw; +} +static struct mpc *dcn35_mpc_create( + struct dc_context *ctx, + int num_mpcc, + int num_rmu) +{ + struct dcn30_mpc *mpc30 = kzalloc(sizeof(struct dcn30_mpc), GFP_KERNEL); + + if (!mpc30) + return NULL; + +#undef REG_STRUCT +#define REG_STRUCT mpc_regs + dcn_mpc_regs_init(); + + dcn32_mpc_construct(mpc30, ctx, + &mpc_regs, + &mpc_shift, + &mpc_mask, + num_mpcc, + num_rmu); + + return &mpc30->base; +} + +static struct hubbub *dcn35_hubbub_create(struct dc_context *ctx) +{ + int i; + + struct dcn20_hubbub *hubbub3 = kzalloc(sizeof(struct dcn20_hubbub), + GFP_KERNEL); + + if (!hubbub3) + return NULL; + +#undef REG_STRUCT +#define REG_STRUCT hubbub_reg + hubbub_reg_init(); + +#undef REG_STRUCT +#define REG_STRUCT vmid_regs + vmid_regs_init(0), + vmid_regs_init(1), + vmid_regs_init(2), + vmid_regs_init(3), + vmid_regs_init(4), + vmid_regs_init(5), + vmid_regs_init(6), + vmid_regs_init(7), + vmid_regs_init(8), + vmid_regs_init(9), + vmid_regs_init(10), + vmid_regs_init(11), + vmid_regs_init(12), + vmid_regs_init(13), + vmid_regs_init(14), + vmid_regs_init(15); + + hubbub35_construct(hubbub3, ctx, + &hubbub_reg, + &hubbub_shift, + &hubbub_mask, + 384,/*ctx->dc->dml.ip.det_buffer_size_kbytes,*/ + 8, /*ctx->dc->dml.ip.pixel_chunk_size_kbytes,*/ + 1792 /*ctx->dc->dml.ip.config_return_buffer_size_in_kbytes*/); + + + for (i = 0; i < res_cap_dcn35.num_vmid; i++) { + struct dcn20_vmid *vmid = &hubbub3->vmid[i]; + + vmid->ctx = ctx; + + vmid->regs = &vmid_regs[i]; + vmid->shifts = &vmid_shifts; + vmid->masks = &vmid_masks; + } + + return &hubbub3->base; +} + +static struct timing_generator *dcn35_timing_generator_create( + struct dc_context *ctx, + uint32_t instance) +{ + struct optc *tgn10 = + kzalloc(sizeof(struct optc), GFP_KERNEL); + + if (!tgn10) + return NULL; + +#undef REG_STRUCT +#define REG_STRUCT optc_regs + optc_regs_init(0), + optc_regs_init(1), + optc_regs_init(2), + optc_regs_init(3); + + tgn10->base.inst = instance; + tgn10->base.ctx = ctx; + + tgn10->tg_regs = &optc_regs[instance]; + tgn10->tg_shift = &optc_shift; + tgn10->tg_mask = &optc_mask; + + dcn35_timing_generator_init(tgn10); + + return &tgn10->base; +} + +static const struct encoder_feature_support link_enc_feature = { + .max_hdmi_deep_color = COLOR_DEPTH_121212, + .max_hdmi_pixel_clock = 600000, + .hdmi_ycbcr420_supported = true, + .dp_ycbcr420_supported = true, + .fec_supported = true, + .flags.bits.IS_HBR2_CAPABLE = true, + .flags.bits.IS_HBR3_CAPABLE = true, + .flags.bits.IS_TPS3_CAPABLE = true, + .flags.bits.IS_TPS4_CAPABLE = true +}; + +static struct link_encoder *dcn35_link_encoder_create( + struct dc_context *ctx, + const struct encoder_init_data *enc_init_data) +{ + struct dcn20_link_encoder *enc20 = + kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL); + + if (!enc20) + return NULL; + +#undef REG_STRUCT +#define REG_STRUCT link_enc_aux_regs + aux_regs_init(0), + aux_regs_init(1), + aux_regs_init(2), + aux_regs_init(3), + aux_regs_init(4); + +#undef REG_STRUCT +#define REG_STRUCT link_enc_hpd_regs + hpd_regs_init(0), + hpd_regs_init(1), + hpd_regs_init(2), + hpd_regs_init(3), + hpd_regs_init(4); + +#undef REG_STRUCT +#define REG_STRUCT link_enc_regs + link_regs_init(0, A), + link_regs_init(1, B), + link_regs_init(2, C), + link_regs_init(3, D), + link_regs_init(4, E); + + dcn35_link_encoder_construct(enc20, + enc_init_data, + &link_enc_feature, + &link_enc_regs[enc_init_data->transmitter], + &link_enc_aux_regs[enc_init_data->channel - 1], + &link_enc_hpd_regs[enc_init_data->hpd_source], + &le_shift, + &le_mask); + + return &enc20->enc10.base; +} + +/* Create a minimal link encoder object not associated with a particular + * physical connector. + * resource_funcs.link_enc_create_minimal + */ +static struct link_encoder *dcn31_link_enc_create_minimal( + struct dc_context *ctx, enum engine_id eng_id) +{ + struct dcn20_link_encoder *enc20; + + if ((eng_id - ENGINE_ID_DIGA) > ctx->dc->res_pool->res_cap->num_dig_link_enc) + return NULL; + + enc20 = kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL); + if (!enc20) + return NULL; + + dcn31_link_encoder_construct_minimal( + enc20, + ctx, + &link_enc_feature, + &link_enc_regs[eng_id - ENGINE_ID_DIGA], + eng_id); + + return &enc20->enc10.base; +} + +static struct panel_cntl *dcn31_panel_cntl_create(const struct panel_cntl_init_data *init_data) +{ + struct dcn31_panel_cntl *panel_cntl = + kzalloc(sizeof(struct dcn31_panel_cntl), GFP_KERNEL); + + if (!panel_cntl) + return NULL; + + dcn31_panel_cntl_construct(panel_cntl, init_data); + + return &panel_cntl->base; +} + +static void read_dce_straps( + struct dc_context *ctx, + struct resource_straps *straps) +{ + generic_reg_get(ctx, regDC_PINSTRAPS + BASE(regDC_PINSTRAPS_BASE_IDX), + FN(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO), &straps->dc_pinstraps_audio); + +} + +static struct audio *dcn31_create_audio( + struct dc_context *ctx, unsigned int inst) +{ + +#undef REG_STRUCT +#define REG_STRUCT audio_regs + audio_regs_init(0), + audio_regs_init(1), + audio_regs_init(2), + audio_regs_init(3), + audio_regs_init(4); + audio_regs_init(5); + audio_regs_init(6); + + return dce_audio_create(ctx, inst, + &audio_regs[inst], &audio_shift, &audio_mask); +} + +static struct vpg *dcn31_vpg_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn31_vpg *vpg31 = kzalloc(sizeof(struct dcn31_vpg), GFP_KERNEL); + + if (!vpg31) + return NULL; + +#undef REG_STRUCT +#define REG_STRUCT vpg_regs + vpg_regs_init(0), + vpg_regs_init(1), + vpg_regs_init(2), + vpg_regs_init(3), + vpg_regs_init(4), + vpg_regs_init(5), + vpg_regs_init(6), + vpg_regs_init(7), + vpg_regs_init(8), + vpg_regs_init(9); + + vpg31_construct(vpg31, ctx, inst, + &vpg_regs[inst], + &vpg_shift, + &vpg_mask); + + return &vpg31->base; +} + +static struct afmt *dcn31_afmt_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn31_afmt *afmt31 = kzalloc(sizeof(struct dcn31_afmt), GFP_KERNEL); + + if (!afmt31) + return NULL; + +#undef REG_STRUCT +#define REG_STRUCT afmt_regs + afmt_regs_init(0), + afmt_regs_init(1), + afmt_regs_init(2), + afmt_regs_init(3), + afmt_regs_init(4), + afmt_regs_init(5); + + afmt31_construct(afmt31, ctx, inst, + &afmt_regs[inst], + &afmt_shift, + &afmt_mask); + + // Light sleep by default, no need to power down here + + return &afmt31->base; +} + +static struct apg *dcn31_apg_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn31_apg *apg31 = kzalloc(sizeof(struct dcn31_apg), GFP_KERNEL); + + if (!apg31) + return NULL; + +#undef REG_STRUCT +#define REG_STRUCT apg_regs + apg_regs_init(0), + apg_regs_init(1), + apg_regs_init(2), + apg_regs_init(3); + + apg31_construct(apg31, ctx, inst, + &apg_regs[inst], + &apg_shift, + &apg_mask); + + return &apg31->base; +} + +static struct stream_encoder *dcn35_stream_encoder_create( + enum engine_id eng_id, + struct dc_context *ctx) +{ + struct dcn10_stream_encoder *enc1; + struct vpg *vpg; + struct afmt *afmt; + int vpg_inst; + int afmt_inst; + + /* Mapping of VPG, AFMT, DME register blocks to DIO block instance */ + if (eng_id <= ENGINE_ID_DIGF) { + vpg_inst = eng_id; + afmt_inst = eng_id; + } else + return NULL; + + enc1 = kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL); + vpg = dcn31_vpg_create(ctx, vpg_inst); + afmt = dcn31_afmt_create(ctx, afmt_inst); + + if (!enc1 || !vpg || !afmt) { + kfree(enc1); + kfree(vpg); + kfree(afmt); + return NULL; + } + +#undef REG_STRUCT +#define REG_STRUCT stream_enc_regs + stream_enc_regs_init(0), + stream_enc_regs_init(1), + stream_enc_regs_init(2), + stream_enc_regs_init(3), + stream_enc_regs_init(4); + + dcn35_dio_stream_encoder_construct(enc1, ctx, ctx->dc_bios, + eng_id, vpg, afmt, + &stream_enc_regs[eng_id], + &se_shift, &se_mask); + + return &enc1->base; +} + +static struct hpo_dp_stream_encoder *dcn31_hpo_dp_stream_encoder_create( + enum engine_id eng_id, + struct dc_context *ctx) +{ + struct dcn31_hpo_dp_stream_encoder *hpo_dp_enc31; + struct vpg *vpg; + struct apg *apg; + uint32_t hpo_dp_inst; + uint32_t vpg_inst; + uint32_t apg_inst; + + ASSERT((eng_id >= ENGINE_ID_HPO_DP_0) && (eng_id <= ENGINE_ID_HPO_DP_3)); + hpo_dp_inst = eng_id - ENGINE_ID_HPO_DP_0; + + /* Mapping of VPG register blocks to HPO DP block instance: + * VPG[6] -> HPO_DP[0] + * VPG[7] -> HPO_DP[1] + * VPG[8] -> HPO_DP[2] + * VPG[9] -> HPO_DP[3] + */ + vpg_inst = hpo_dp_inst + 6; + + /* Mapping of APG register blocks to HPO DP block instance: + * APG[0] -> HPO_DP[0] + * APG[1] -> HPO_DP[1] + * APG[2] -> HPO_DP[2] + * APG[3] -> HPO_DP[3] + */ + apg_inst = hpo_dp_inst; + + /* allocate HPO stream encoder and create VPG sub-block */ + hpo_dp_enc31 = kzalloc(sizeof(struct dcn31_hpo_dp_stream_encoder), GFP_KERNEL); + vpg = dcn31_vpg_create(ctx, vpg_inst); + apg = dcn31_apg_create(ctx, apg_inst); + + if (!hpo_dp_enc31 || !vpg || !apg) { + kfree(hpo_dp_enc31); + kfree(vpg); + kfree(apg); + return NULL; + } + +#undef REG_STRUCT +#define REG_STRUCT hpo_dp_stream_enc_regs + hpo_dp_stream_encoder_reg_init(0), + hpo_dp_stream_encoder_reg_init(1), + hpo_dp_stream_encoder_reg_init(2), + hpo_dp_stream_encoder_reg_init(3); + + dcn31_hpo_dp_stream_encoder_construct(hpo_dp_enc31, ctx, ctx->dc_bios, + hpo_dp_inst, eng_id, vpg, apg, + &hpo_dp_stream_enc_regs[hpo_dp_inst], + &hpo_dp_se_shift, &hpo_dp_se_mask); + + return &hpo_dp_enc31->base; +} + +static struct hpo_dp_link_encoder *dcn31_hpo_dp_link_encoder_create( + uint8_t inst, + struct dc_context *ctx) +{ + struct dcn31_hpo_dp_link_encoder *hpo_dp_enc31; + + /* allocate HPO link encoder */ + hpo_dp_enc31 = kzalloc(sizeof(struct dcn31_hpo_dp_link_encoder), GFP_KERNEL); + +#undef REG_STRUCT +#define REG_STRUCT hpo_dp_link_enc_regs + hpo_dp_link_encoder_reg_init(0), + hpo_dp_link_encoder_reg_init(1); + + hpo_dp_link_encoder31_construct(hpo_dp_enc31, ctx, inst, + &hpo_dp_link_enc_regs[inst], + &hpo_dp_le_shift, &hpo_dp_le_mask); + + return &hpo_dp_enc31->base; +} + +static struct dce_hwseq *dcn35_hwseq_create( + struct dc_context *ctx) +{ + struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL); + +#undef REG_STRUCT +#define REG_STRUCT hwseq_reg + hwseq_reg_init(); + + if (hws) { + hws->ctx = ctx; + hws->regs = &hwseq_reg; + hws->shifts = &hwseq_shift; + hws->masks = &hwseq_mask; + } + return hws; +} +static const struct resource_create_funcs res_create_funcs = { + .read_dce_straps = read_dce_straps, + .create_audio = dcn31_create_audio, + .create_stream_encoder = dcn35_stream_encoder_create, + .create_hpo_dp_stream_encoder = dcn31_hpo_dp_stream_encoder_create, + .create_hpo_dp_link_encoder = dcn31_hpo_dp_link_encoder_create, + .create_hwseq = dcn35_hwseq_create, +}; + +static void dcn35_resource_destruct(struct dcn35_resource_pool *pool) +{ + unsigned int i; + + for (i = 0; i < pool->base.stream_enc_count; i++) { + if (pool->base.stream_enc[i] != NULL) { + if (pool->base.stream_enc[i]->vpg != NULL) { + kfree(DCN30_VPG_FROM_VPG(pool->base.stream_enc[i]->vpg)); + pool->base.stream_enc[i]->vpg = NULL; + } + if (pool->base.stream_enc[i]->afmt != NULL) { + kfree(DCN30_AFMT_FROM_AFMT(pool->base.stream_enc[i]->afmt)); + pool->base.stream_enc[i]->afmt = NULL; + } + kfree(DCN10STRENC_FROM_STRENC(pool->base.stream_enc[i])); + pool->base.stream_enc[i] = NULL; + } + } + + for (i = 0; i < pool->base.hpo_dp_stream_enc_count; i++) { + if (pool->base.hpo_dp_stream_enc[i] != NULL) { + if (pool->base.hpo_dp_stream_enc[i]->vpg != NULL) { + kfree(DCN30_VPG_FROM_VPG(pool->base.hpo_dp_stream_enc[i]->vpg)); + pool->base.hpo_dp_stream_enc[i]->vpg = NULL; + } + if (pool->base.hpo_dp_stream_enc[i]->apg != NULL) { + kfree(DCN31_APG_FROM_APG(pool->base.hpo_dp_stream_enc[i]->apg)); + pool->base.hpo_dp_stream_enc[i]->apg = NULL; + } + kfree(DCN3_1_HPO_DP_STREAM_ENC_FROM_HPO_STREAM_ENC(pool->base.hpo_dp_stream_enc[i])); + pool->base.hpo_dp_stream_enc[i] = NULL; + } + } + + for (i = 0; i < pool->base.hpo_dp_link_enc_count; i++) { + if (pool->base.hpo_dp_link_enc[i] != NULL) { + kfree(DCN3_1_HPO_DP_LINK_ENC_FROM_HPO_LINK_ENC(pool->base.hpo_dp_link_enc[i])); + pool->base.hpo_dp_link_enc[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_dsc; i++) { + if (pool->base.dscs[i] != NULL) + dcn20_dsc_destroy(&pool->base.dscs[i]); + } + + if (pool->base.mpc != NULL) { + kfree(TO_DCN20_MPC(pool->base.mpc)); + pool->base.mpc = NULL; + } + if (pool->base.hubbub != NULL) { + kfree(pool->base.hubbub); + pool->base.hubbub = NULL; + } + for (i = 0; i < pool->base.pipe_count; i++) { + if (pool->base.dpps[i] != NULL) + dcn35_dpp_destroy(&pool->base.dpps[i]); + + if (pool->base.ipps[i] != NULL) + pool->base.ipps[i]->funcs->ipp_destroy(&pool->base.ipps[i]); + + if (pool->base.hubps[i] != NULL) { + kfree(TO_DCN20_HUBP(pool->base.hubps[i])); + pool->base.hubps[i] = NULL; + } + + if (pool->base.irqs != NULL) { + dal_irq_service_destroy(&pool->base.irqs); + } + } + + for (i = 0; i < pool->base.res_cap->num_ddc; i++) { + if (pool->base.engines[i] != NULL) + dce110_engine_destroy(&pool->base.engines[i]); + if (pool->base.hw_i2cs[i] != NULL) { + kfree(pool->base.hw_i2cs[i]); + pool->base.hw_i2cs[i] = NULL; + } + if (pool->base.sw_i2cs[i] != NULL) { + kfree(pool->base.sw_i2cs[i]); + pool->base.sw_i2cs[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_opp; i++) { + if (pool->base.opps[i] != NULL) + pool->base.opps[i]->funcs->opp_destroy(&pool->base.opps[i]); + } + + for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { + if (pool->base.timing_generators[i] != NULL) { + kfree(DCN10TG_FROM_TG(pool->base.timing_generators[i])); + pool->base.timing_generators[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_dwb; i++) { + if (pool->base.dwbc[i] != NULL) { + kfree(TO_DCN30_DWBC(pool->base.dwbc[i])); + pool->base.dwbc[i] = NULL; + } + if (pool->base.mcif_wb[i] != NULL) { + kfree(TO_DCN30_MMHUBBUB(pool->base.mcif_wb[i])); + pool->base.mcif_wb[i] = NULL; + } + } + + for (i = 0; i < pool->base.audio_count; i++) { + if (pool->base.audios[i]) + dce_aud_destroy(&pool->base.audios[i]); + } + + for (i = 0; i < pool->base.clk_src_count; i++) { + if (pool->base.clock_sources[i] != NULL) { + dcn20_clock_source_destroy(&pool->base.clock_sources[i]); + pool->base.clock_sources[i] = NULL; + } + } + + for (i = 0; i < pool->base.res_cap->num_mpc_3dlut; i++) { + if (pool->base.mpc_lut[i] != NULL) { + dc_3dlut_func_release(pool->base.mpc_lut[i]); + pool->base.mpc_lut[i] = NULL; + } + if (pool->base.mpc_shaper[i] != NULL) { + dc_transfer_func_release(pool->base.mpc_shaper[i]); + pool->base.mpc_shaper[i] = NULL; + } + } + + if (pool->base.dp_clock_source != NULL) { + dcn20_clock_source_destroy(&pool->base.dp_clock_source); + pool->base.dp_clock_source = NULL; + } + + for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { + if (pool->base.multiple_abms[i] != NULL) + dce_abm_destroy(&pool->base.multiple_abms[i]); + } + + if (pool->base.psr != NULL) + dmub_psr_destroy(&pool->base.psr); + + if (pool->base.pg_cntl != NULL) + dcn_pg_cntl_destroy(&pool->base.pg_cntl); + + if (pool->base.dccg != NULL) + dcn_dccg_destroy(&pool->base.dccg); +} + +static struct hubp *dcn35_hubp_create( + struct dc_context *ctx, + uint32_t inst) +{ + struct dcn20_hubp *hubp2 = + kzalloc(sizeof(struct dcn20_hubp), GFP_KERNEL); + + if (!hubp2) + return NULL; + +#undef REG_STRUCT +#define REG_STRUCT hubp_regs + hubp_regs_init(0), + hubp_regs_init(1), + hubp_regs_init(2), + hubp_regs_init(3); + + if (hubp35_construct(hubp2, ctx, inst, + &hubp_regs[inst], &hubp_shift, &hubp_mask)) + return &hubp2->base; + + BREAK_TO_DEBUGGER(); + kfree(hubp2); + return NULL; +} + +static void dcn35_dwbc_init(struct dcn30_dwbc *dwbc30, struct dc_context *ctx) +{ + dcn35_dwbc_set_fgcg( + dwbc30, ctx->dc->debug.enable_fine_grain_clock_gating.bits.dwb); +} + +static bool dcn35_dwbc_create(struct dc_context *ctx, struct resource_pool *pool) +{ + int i; + uint32_t pipe_count = pool->res_cap->num_dwb; + + for (i = 0; i < pipe_count; i++) { + struct dcn30_dwbc *dwbc30 = kzalloc(sizeof(struct dcn30_dwbc), + GFP_KERNEL); + + if (!dwbc30) { + dm_error("DC: failed to create dwbc30!\n"); + return false; + } + +#undef REG_STRUCT +#define REG_STRUCT dwbc35_regs + dwbc_regs_dcn3_init(0); + + dcn35_dwbc_construct(dwbc30, ctx, + &dwbc35_regs[i], + &dwbc35_shift, + &dwbc35_mask, + i); + + pool->dwbc[i] = &dwbc30->base; + + dcn35_dwbc_init(dwbc30, ctx); + } + return true; +} + +static void dcn35_mmhubbub_init(struct dcn30_mmhubbub *mcif_wb30, + struct dc_context *ctx) +{ + dcn35_mmhubbub_set_fgcg( + mcif_wb30, + ctx->dc->debug.enable_fine_grain_clock_gating.bits.mmhubbub); +} + +static bool dcn35_mmhubbub_create(struct dc_context *ctx, struct resource_pool *pool) +{ + int i; + uint32_t pipe_count = pool->res_cap->num_dwb; + + for (i = 0; i < pipe_count; i++) { + struct dcn30_mmhubbub *mcif_wb30 = kzalloc(sizeof(struct dcn30_mmhubbub), + GFP_KERNEL); + + if (!mcif_wb30) { + dm_error("DC: failed to create mcif_wb30!\n"); + return false; + } + +#undef REG_STRUCT +#define REG_STRUCT mcif_wb35_regs + mcif_wb_regs_dcn3_init(0); + + dcn35_mmhubbub_construct(mcif_wb30, ctx, + &mcif_wb35_regs[i], + &mcif_wb35_shift, + &mcif_wb35_mask, + i); + + dcn35_mmhubbub_init(mcif_wb30, ctx); + + pool->mcif_wb[i] = &mcif_wb30->base; + } + return true; +} + +static struct display_stream_compressor *dcn35_dsc_create( + struct dc_context *ctx, uint32_t inst) +{ + struct dcn20_dsc *dsc = + kzalloc(sizeof(struct dcn20_dsc), GFP_KERNEL); + + if (!dsc) { + BREAK_TO_DEBUGGER(); + return NULL; + } + +#undef REG_STRUCT +#define REG_STRUCT dsc_regs + dsc_regsDCN35_init(0), + dsc_regsDCN35_init(1), + dsc_regsDCN35_init(2), + dsc_regsDCN35_init(3); + + dsc35_construct(dsc, ctx, inst, &dsc_regs[inst], &dsc_shift, &dsc_mask); + dsc35_set_fgcg(dsc, + ctx->dc->debug.enable_fine_grain_clock_gating.bits.dsc); + return &dsc->base; +} + +static void dcn35_destroy_resource_pool(struct resource_pool **pool) +{ + struct dcn35_resource_pool *dcn35_pool = TO_DCN35_RES_POOL(*pool); + + dcn35_resource_destruct(dcn35_pool); + kfree(dcn35_pool); + *pool = NULL; +} + +static struct clock_source *dcn35_clock_source_create( + struct dc_context *ctx, + struct dc_bios *bios, + enum clock_source_id id, + const struct dce110_clk_src_regs *regs, + bool dp_clk_src) +{ + struct dce110_clk_src *clk_src = + kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL); + + if (!clk_src) + return NULL; + + if (dcn31_clk_src_construct(clk_src, ctx, bios, id, + regs, &cs_shift, &cs_mask)) { + clk_src->base.dp_clk_src = dp_clk_src; + return &clk_src->base; + } + + BREAK_TO_DEBUGGER(); + return NULL; +} + +static struct dc_cap_funcs cap_funcs = { + .get_dcc_compression_cap = dcn20_get_dcc_compression_cap +}; + +static void dcn35_get_panel_config_defaults(struct dc_panel_config *panel_config) +{ + *panel_config = panel_config_defaults; +} + + +static bool dcn35_validate_bandwidth(struct dc *dc, + struct dc_state *context, + bool fast_validate) +{ + bool out = false; + + out = dml2_validate(dc, context, fast_validate); + + return out; +} + + +static struct resource_funcs dcn35_res_pool_funcs = { + .destroy = dcn35_destroy_resource_pool, + .link_enc_create = dcn35_link_encoder_create, + .link_enc_create_minimal = dcn31_link_enc_create_minimal, + .link_encs_assign = link_enc_cfg_link_encs_assign, + .link_enc_unassign = link_enc_cfg_link_enc_unassign, + .panel_cntl_create = dcn31_panel_cntl_create, + .validate_bandwidth = dcn35_validate_bandwidth, + .calculate_wm_and_dlg = NULL, + .update_soc_for_wm_a = dcn31_update_soc_for_wm_a, + .populate_dml_pipes = dcn35_populate_dml_pipes_from_context_fpu, + .acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer, + .release_pipe = dcn20_release_pipe, + .add_stream_to_ctx = dcn30_add_stream_to_ctx, + .add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource, + .remove_stream_from_ctx = dcn20_remove_stream_from_ctx, + .populate_dml_writeback_from_context = dcn30_populate_dml_writeback_from_context, + .set_mcif_arb_params = dcn30_set_mcif_arb_params, + .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link, + .acquire_post_bldn_3dlut = dcn30_acquire_post_bldn_3dlut, + .release_post_bldn_3dlut = dcn30_release_post_bldn_3dlut, + .update_bw_bounding_box = dcn35_update_bw_bounding_box_fpu, + .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, + .get_panel_config_defaults = dcn35_get_panel_config_defaults, + .get_preferred_eng_id_dpia = dcn35_get_preferred_eng_id_dpia, +}; + +static bool dcn35_resource_construct( + uint8_t num_virtual_links, + struct dc *dc, + struct dcn35_resource_pool *pool) +{ + int i; + struct dc_context *ctx = dc->ctx; + struct irq_service_init_data init_data; + +#undef REG_STRUCT +#define REG_STRUCT bios_regs + bios_regs_init(); + +#undef REG_STRUCT +#define REG_STRUCT clk_src_regs + clk_src_regs_init(0, A), + clk_src_regs_init(1, B), + clk_src_regs_init(2, C), + clk_src_regs_init(3, D), + clk_src_regs_init(4, E); + +#undef REG_STRUCT +#define REG_STRUCT abm_regs + abm_regs_init(0), + abm_regs_init(1), + abm_regs_init(2), + abm_regs_init(3); + +#undef REG_STRUCT +#define REG_STRUCT dccg_regs + dccg_regs_init(); + + ctx->dc_bios->regs = &bios_regs; + + pool->base.res_cap = &res_cap_dcn35; + + pool->base.funcs = &dcn35_res_pool_funcs; + + /************************************************* + * Resource + asic cap harcoding * + *************************************************/ + pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE; + pool->base.pipe_count = pool->base.res_cap->num_timing_generator; + pool->base.mpcc_count = pool->base.res_cap->num_timing_generator; + dc->caps.max_downscale_ratio = 600; + dc->caps.i2c_speed_in_khz = 100; + dc->caps.i2c_speed_in_khz_hdcp = 100; + dc->caps.max_cursor_size = 256; + dc->caps.min_horizontal_blanking_period = 80; + dc->caps.dmdata_alloc_size = 2048; + dc->caps.max_slave_planes = 2; + dc->caps.max_slave_yuv_planes = 2; + dc->caps.max_slave_rgb_planes = 2; + dc->caps.post_blend_color_processing = true; + dc->caps.force_dp_tps4_for_cp2520 = true; + if (dc->config.forceHBR2CP2520) + dc->caps.force_dp_tps4_for_cp2520 = false; + dc->caps.dp_hpo = true; + dc->caps.dp_hdmi21_pcon_support = true; + + dc->caps.edp_dsc_support = true; + dc->caps.extended_aux_timeout_support = true; + dc->caps.dmcub_support = true; + dc->caps.is_apu = true; + dc->caps.seamless_odm = true; + + dc->caps.zstate_support = true; + dc->caps.ips_support = true; + dc->caps.max_v_total = (1 << 15) - 1; + + /* Color pipeline capabilities */ + dc->caps.color.dpp.dcn_arch = 1; + dc->caps.color.dpp.input_lut_shared = 0; + dc->caps.color.dpp.icsc = 1; + dc->caps.color.dpp.dgam_ram = 0; // must use gamma_corr + dc->caps.color.dpp.dgam_rom_caps.srgb = 1; + dc->caps.color.dpp.dgam_rom_caps.bt2020 = 1; + dc->caps.color.dpp.dgam_rom_caps.gamma2_2 = 1; + dc->caps.color.dpp.dgam_rom_caps.pq = 1; + dc->caps.color.dpp.dgam_rom_caps.hlg = 1; + dc->caps.color.dpp.post_csc = 1; + dc->caps.color.dpp.gamma_corr = 1; + dc->caps.color.dpp.dgam_rom_for_yuv = 0; + + dc->caps.color.dpp.hw_3d_lut = 1; + dc->caps.color.dpp.ogam_ram = 0; // no OGAM in DPP since DCN1 + // no OGAM ROM on DCN301 + dc->caps.color.dpp.ogam_rom_caps.srgb = 0; + dc->caps.color.dpp.ogam_rom_caps.bt2020 = 0; + dc->caps.color.dpp.ogam_rom_caps.gamma2_2 = 0; + dc->caps.color.dpp.ogam_rom_caps.pq = 0; + dc->caps.color.dpp.ogam_rom_caps.hlg = 0; + dc->caps.color.dpp.ocsc = 0; + + dc->caps.color.mpc.gamut_remap = 1; + dc->caps.color.mpc.num_3dluts = pool->base.res_cap->num_mpc_3dlut; //2 + dc->caps.color.mpc.ogam_ram = 1; + dc->caps.color.mpc.ogam_rom_caps.srgb = 0; + dc->caps.color.mpc.ogam_rom_caps.bt2020 = 0; + dc->caps.color.mpc.ogam_rom_caps.gamma2_2 = 0; + dc->caps.color.mpc.ogam_rom_caps.pq = 0; + dc->caps.color.mpc.ogam_rom_caps.hlg = 0; + dc->caps.color.mpc.ocsc = 1; + + /* max_disp_clock_khz_at_vmin is slightly lower than the STA value in order + * to provide some margin. + * It's expected for furture ASIC to have equal or higher value, in order to + * have determinstic power improvement from generate to genration. + * (i.e., we should not expect new ASIC generation with lower vmin rate) + */ + dc->caps.max_disp_clock_khz_at_vmin = 650000; + + /* Use pipe context based otg sync logic */ + dc->config.use_pipe_ctx_sync_logic = true; + dc->config.use_default_clock_table = false; + /* read VBIOS LTTPR caps */ + { + if (ctx->dc_bios->funcs->get_lttpr_caps) { + enum bp_result bp_query_result; + uint8_t is_vbios_lttpr_enable = 0; + + bp_query_result = ctx->dc_bios->funcs->get_lttpr_caps(ctx->dc_bios, &is_vbios_lttpr_enable); + dc->caps.vbios_lttpr_enable = (bp_query_result == BP_RESULT_OK) && !!is_vbios_lttpr_enable; + } + + /* interop bit is implicit */ + { + dc->caps.vbios_lttpr_aware = true; + } + } + + if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) + dc->debug = debug_defaults_drv; + + // Init the vm_helper + if (dc->vm_helper) + vm_helper_init(dc->vm_helper, 16); + + /************************************************* + * Create resources * + *************************************************/ + + /* Clock Sources for Pixel Clock*/ + pool->base.clock_sources[DCN35_CLK_SRC_PLL0] = + dcn35_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL0, + &clk_src_regs[0], false); + pool->base.clock_sources[DCN35_CLK_SRC_PLL1] = + dcn35_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL1, + &clk_src_regs[1], false); + pool->base.clock_sources[DCN35_CLK_SRC_PLL2] = + dcn35_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL2, + &clk_src_regs[2], false); + pool->base.clock_sources[DCN35_CLK_SRC_PLL3] = + dcn35_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL3, + &clk_src_regs[3], false); + pool->base.clock_sources[DCN35_CLK_SRC_PLL4] = + dcn35_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL4, + &clk_src_regs[4], false); + + pool->base.clk_src_count = DCN35_CLK_SRC_TOTAL; + + /* todo: not reuse phy_pll registers */ + pool->base.dp_clock_source = + dcn35_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_ID_DP_DTO, + &clk_src_regs[0], true); + + for (i = 0; i < pool->base.clk_src_count; i++) { + if (pool->base.clock_sources[i] == NULL) { + dm_error("DC: failed to create clock sources!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + } + /*temp till dml2 fully work without dml1*/ + dml_init_instance(&dc->dml, &dcn3_5_soc, &dcn3_5_ip, DML_PROJECT_DCN31); + + /* TODO: DCCG */ + pool->base.dccg = dccg35_create(ctx, &dccg_regs, &dccg_shift, &dccg_mask); + if (pool->base.dccg == NULL) { + dm_error("DC: failed to create dccg!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + +#undef REG_STRUCT +#define REG_STRUCT pg_cntl_regs + pg_cntl_dcn35_regs_init(); + + pool->base.pg_cntl = pg_cntl35_create(ctx, &pg_cntl_regs, &pg_cntl_shift, &pg_cntl_mask); + if (pool->base.pg_cntl == NULL) { + dm_error("DC: failed to create power gate control!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + + /* TODO: IRQ */ + init_data.ctx = dc->ctx; + pool->base.irqs = dal_irq_service_dcn35_create(&init_data); + if (!pool->base.irqs) + goto create_fail; + + /* HUBBUB */ + pool->base.hubbub = dcn35_hubbub_create(ctx); + if (pool->base.hubbub == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create hubbub!\n"); + goto create_fail; + } + + /* HUBPs, DPPs, OPPs and TGs */ + for (i = 0; i < pool->base.pipe_count; i++) { + pool->base.hubps[i] = dcn35_hubp_create(ctx, i); + if (pool->base.hubps[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create hubps!\n"); + goto create_fail; + } + + pool->base.dpps[i] = dcn35_dpp_create(ctx, i); + if (pool->base.dpps[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create dpps!\n"); + goto create_fail; + } + } + + for (i = 0; i < pool->base.res_cap->num_opp; i++) { + pool->base.opps[i] = dcn35_opp_create(ctx, i); + if (pool->base.opps[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC: failed to create output pixel processor!\n"); + goto create_fail; + } + } + + for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { + pool->base.timing_generators[i] = dcn35_timing_generator_create( + ctx, i); + if (pool->base.timing_generators[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create tg!\n"); + goto create_fail; + } + } + pool->base.timing_generator_count = i; + + /* PSR */ + pool->base.psr = dmub_psr_create(ctx); + if (pool->base.psr == NULL) { + dm_error("DC: failed to create psr obj!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + + /* ABM */ + for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { + pool->base.multiple_abms[i] = dmub_abm_create(ctx, + &abm_regs[i], + &abm_shift, + &abm_mask); + if (pool->base.multiple_abms[i] == NULL) { + dm_error("DC: failed to create abm for pipe %d!\n", i); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + } + + /* MPC and DSC */ + pool->base.mpc = dcn35_mpc_create(ctx, pool->base.mpcc_count, pool->base.res_cap->num_mpc_3dlut); + if (pool->base.mpc == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create mpc!\n"); + goto create_fail; + } + + for (i = 0; i < pool->base.res_cap->num_dsc; i++) { + pool->base.dscs[i] = dcn35_dsc_create(ctx, i); + if (pool->base.dscs[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create display stream compressor %d!\n", i); + goto create_fail; + } + } + + /* DWB and MMHUBBUB */ + if (!dcn35_dwbc_create(ctx, &pool->base)) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create dwbc!\n"); + goto create_fail; + } + + if (!dcn35_mmhubbub_create(ctx, &pool->base)) { + BREAK_TO_DEBUGGER(); + dm_error("DC: failed to create mcif_wb!\n"); + goto create_fail; + } + + /* AUX and I2C */ + for (i = 0; i < pool->base.res_cap->num_ddc; i++) { + pool->base.engines[i] = dcn31_aux_engine_create(ctx, i); + if (pool->base.engines[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC:failed to create aux engine!!\n"); + goto create_fail; + } + pool->base.hw_i2cs[i] = dcn31_i2c_hw_create(ctx, i); + if (pool->base.hw_i2cs[i] == NULL) { + BREAK_TO_DEBUGGER(); + dm_error( + "DC:failed to create hw i2c!!\n"); + goto create_fail; + } + pool->base.sw_i2cs[i] = NULL; + } + + /* DCN3.5 has 6 DPIA */ + pool->base.usb4_dpia_count = 4; + if (dc->debug.dpia_debug.bits.disable_dpia) + pool->base.usb4_dpia_count = 0; + + /* Audio, Stream Encoders including HPO and virtual, MPC 3D LUTs */ + if (!resource_construct(num_virtual_links, dc, &pool->base, + &res_create_funcs)) + goto create_fail; + + /* HW Sequencer and Plane caps */ + dcn35_hw_sequencer_construct(dc); + + dc->caps.max_planes = pool->base.pipe_count; + + for (i = 0; i < dc->caps.max_planes; ++i) + dc->caps.planes[i] = plane_cap; + + dc->cap_funcs = cap_funcs; + + dc->dcn_ip->max_num_dpp = pool->base.pipe_count; + + dc->dml2_options.dcn_pipe_count = pool->base.pipe_count; + dc->dml2_options.use_native_pstate_optimization = true; + dc->dml2_options.use_native_soc_bb_construction = true; + if (dc->config.EnableMinDispClkODM) + dc->dml2_options.minimize_dispclk_using_odm = true; + dc->dml2_options.enable_windowed_mpo_odm = dc->config.enable_windowed_mpo_odm; + + dc->dml2_options.callbacks.dc = dc; + dc->dml2_options.callbacks.build_scaling_params = &resource_build_scaling_params; + dc->dml2_options.callbacks.can_support_mclk_switch_using_fw_based_vblank_stretch = &dcn30_can_support_mclk_switch_using_fw_based_vblank_stretch; + dc->dml2_options.callbacks.acquire_secondary_pipe_for_mpc_odm = &dc_resource_acquire_secondary_pipe_for_mpc_odm_legacy; + dc->dml2_options.callbacks.update_pipes_for_stream_with_slice_count = &resource_update_pipes_for_stream_with_slice_count; + dc->dml2_options.callbacks.update_pipes_for_plane_with_slice_count = &resource_update_pipes_for_plane_with_slice_count; + dc->dml2_options.callbacks.get_mpc_slice_index = &resource_get_mpc_slice_index; + dc->dml2_options.callbacks.get_odm_slice_index = &resource_get_odm_slice_index; + dc->dml2_options.callbacks.get_opp_head = &resource_get_opp_head; + dc->dml2_options.max_segments_per_hubp = 24; + + dc->dml2_options.det_segment_size = DCN3_2_DET_SEG_SIZE;/*todo*/ + + if (dc->config.sdpif_request_limit_words_per_umc == 0) + dc->config.sdpif_request_limit_words_per_umc = 16;/*todo*/ + + return true; + +create_fail: + + dcn35_resource_destruct(pool); + + return false; +} + +struct resource_pool *dcn35_create_resource_pool( + const struct dc_init_data *init_data, + struct dc *dc) +{ + struct dcn35_resource_pool *pool = + kzalloc(sizeof(struct dcn35_resource_pool), GFP_KERNEL); + + if (!pool) + return NULL; + + if (dcn35_resource_construct(init_data->num_virtual_links, dc, pool)) + return &pool->base; + + BREAK_TO_DEBUGGER(); + kfree(pool); + return NULL; +} diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.h new file mode 100644 index 000000000000..99aea102e3f7 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.h @@ -0,0 +1,310 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef _DCN35_RESOURCE_H_ +#define _DCN35_RESOURCE_H_ + +#include "core_types.h" + +#define DCN3_5_VMIN_DISPCLK_HZ 717000000 +#define TO_DCN35_RES_POOL(pool)\ + container_of(pool, struct dcn35_resource_pool, base) + +extern struct _vcs_dpi_ip_params_st dcn3_5_ip; +extern struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc; + +struct dcn35_resource_pool { + struct resource_pool base; +}; + +struct resource_pool *dcn35_create_resource_pool( + const struct dc_init_data *init_data, + struct dc *dc); + +/* Defs for runtime init of registers */ + +#define OPP_REG_LIST_DCN20_RI(id) \ + OPP_REG_LIST_DCN10_RI(id), \ + OPP_DPG_REG_LIST_RI(id), \ + SRI_ARR(FMT_422_CONTROL, FMT, id), \ + SRI_ARR(OPPBUF_CONTROL1, OPPBUF, id) + +#define OPP_REG_LIST_DCN35_RI(id) \ + OPP_REG_LIST_DCN20_RI(id), \ + SRI2_ARR(OPP_TOP_CLK_CONTROL, OPP, id) + +#define VPG_DCN31_REG_LIST_RI(id) \ + SRI_ARR(VPG_GENERIC_STATUS, VPG, id), \ + SRI_ARR(VPG_GENERIC_PACKET_ACCESS_CTRL, VPG, id), \ + SRI_ARR(VPG_GENERIC_PACKET_DATA, VPG, id), \ + SRI_ARR(VPG_GSP_FRAME_UPDATE_CTRL, VPG, id), \ + SRI_ARR(VPG_GSP_IMMEDIATE_UPDATE_CTRL, VPG, id), \ + SRI_ARR(VPG_MEM_PWR, VPG, id) + +#define AFMT_DCN31_REG_LIST_RI(id) \ + SRI_ARR(AFMT_INFOFRAME_CONTROL0, AFMT, id), \ + SRI_ARR(AFMT_VBI_PACKET_CONTROL, AFMT, id), \ + SRI_ARR(AFMT_AUDIO_PACKET_CONTROL, AFMT, id), \ + SRI_ARR(AFMT_AUDIO_PACKET_CONTROL2, AFMT, id), \ + SRI_ARR(AFMT_AUDIO_SRC_CONTROL, AFMT, id), \ + SRI_ARR(AFMT_60958_0, AFMT, id), \ + SRI_ARR(AFMT_60958_1, AFMT, id), \ + SRI_ARR(AFMT_60958_2, AFMT, id), \ + SRI_ARR(AFMT_MEM_PWR, AFMT, id) + +/* Stream encoder */ +#define SE_DCN35_REG_LIST_RI(id) \ + SRI_ARR(AFMT_CNTL, DIG, id), \ + SRI_ARR(DIG_FE_CNTL, DIG, id), \ + SRI_ARR(HDMI_CONTROL, DIG, id), \ + SRI_ARR(HDMI_DB_CONTROL, DIG, id), \ + SRI_ARR(HDMI_GC, DIG, id), \ + SRI_ARR(HDMI_GENERIC_PACKET_CONTROL0, DIG, id), \ + SRI_ARR(HDMI_GENERIC_PACKET_CONTROL1, DIG, id), \ + SRI_ARR(HDMI_GENERIC_PACKET_CONTROL2, DIG, id), \ + SRI_ARR(HDMI_GENERIC_PACKET_CONTROL3, DIG, id), \ + SRI_ARR(HDMI_GENERIC_PACKET_CONTROL4, DIG, id), \ + SRI_ARR(HDMI_GENERIC_PACKET_CONTROL5, DIG, id), \ + SRI_ARR(HDMI_GENERIC_PACKET_CONTROL6, DIG, id), \ + SRI_ARR(HDMI_GENERIC_PACKET_CONTROL7, DIG, id), \ + SRI_ARR(HDMI_GENERIC_PACKET_CONTROL8, DIG, id), \ + SRI_ARR(HDMI_GENERIC_PACKET_CONTROL9, DIG, id), \ + SRI_ARR(HDMI_GENERIC_PACKET_CONTROL10, DIG, id), \ + SRI_ARR(HDMI_INFOFRAME_CONTROL0, DIG, id), \ + SRI_ARR(HDMI_INFOFRAME_CONTROL1, DIG, id), \ + SRI_ARR(HDMI_VBI_PACKET_CONTROL, DIG, id), \ + SRI_ARR(HDMI_AUDIO_PACKET_CONTROL, DIG, id),\ + SRI_ARR(HDMI_ACR_PACKET_CONTROL, DIG, id),\ + SRI_ARR(HDMI_ACR_32_0, DIG, id),\ + SRI_ARR(HDMI_ACR_32_1, DIG, id),\ + SRI_ARR(HDMI_ACR_44_0, DIG, id),\ + SRI_ARR(HDMI_ACR_44_1, DIG, id),\ + SRI_ARR(HDMI_ACR_48_0, DIG, id),\ + SRI_ARR(HDMI_ACR_48_1, DIG, id),\ + SRI_ARR(DP_DB_CNTL, DP, id), \ + SRI_ARR(DP_MSA_MISC, DP, id), \ + SRI_ARR(DP_MSA_VBID_MISC, DP, id), \ + SRI_ARR(DP_MSA_COLORIMETRY, DP, id), \ + SRI_ARR(DP_MSA_TIMING_PARAM1, DP, id), \ + SRI_ARR(DP_MSA_TIMING_PARAM2, DP, id), \ + SRI_ARR(DP_MSA_TIMING_PARAM3, DP, id), \ + SRI_ARR(DP_MSA_TIMING_PARAM4, DP, id), \ + SRI_ARR(DP_MSE_RATE_CNTL, DP, id), \ + SRI_ARR(DP_MSE_RATE_UPDATE, DP, id), \ + SRI_ARR(DP_PIXEL_FORMAT, DP, id), \ + SRI_ARR(DP_SEC_CNTL, DP, id), \ + SRI_ARR(DP_SEC_CNTL1, DP, id), \ + SRI_ARR(DP_SEC_CNTL2, DP, id), \ + SRI_ARR(DP_SEC_CNTL5, DP, id), \ + SRI_ARR(DP_SEC_CNTL6, DP, id), \ + SRI_ARR(DP_STEER_FIFO, DP, id), \ + SRI_ARR(DP_VID_M, DP, id), \ + SRI_ARR(DP_VID_N, DP, id), \ + SRI_ARR(DP_VID_STREAM_CNTL, DP, id), \ + SRI_ARR(DP_VID_TIMING, DP, id), \ + SRI_ARR(DP_SEC_AUD_N, DP, id), \ + SRI_ARR(DP_SEC_TIMESTAMP, DP, id), \ + SRI_ARR(DP_DSC_CNTL, DP, id), \ + SRI_ARR(DP_SEC_METADATA_TRANSMISSION, DP, id), \ + SRI_ARR(HDMI_METADATA_PACKET_CONTROL, DIG, id), \ + SRI_ARR(DP_SEC_FRAMING4, DP, id), \ + SRI_ARR(DP_GSP11_CNTL, DP, id), \ + SRI_ARR(DME_CONTROL, DME, id),\ + SRI_ARR(DP_SEC_METADATA_TRANSMISSION, DP, id), \ + SRI_ARR(HDMI_METADATA_PACKET_CONTROL, DIG, id), \ + SRI_ARR(DIG_FE_CNTL, DIG, id), \ + SRI_ARR(DIG_FE_EN_CNTL, DIG, id), \ + SRI_ARR(DIG_FE_CLK_CNTL, DIG, id), \ + SRI_ARR(DIG_CLOCK_PATTERN, DIG, id), \ + SRI_ARR(DIG_FIFO_CTRL0, DIG, id), \ + SRI_ARR(STREAM_MAPPER_CONTROL, DIG, id) + +#define LE_DCN35_REG_LIST_RI(id)\ + LE_DCN3_REG_LIST_RI(id),\ + SRI_ARR(DP_DPHY_INTERNAL_CTRL, DP, id), \ + SR_ARR(DIO_LINKA_CNTL, id), \ + SR_ARR(DIO_LINKB_CNTL, id), \ + SR_ARR(DIO_LINKC_CNTL, id), \ + SR_ARR(DIO_LINKD_CNTL, id), \ + SR_ARR(DIO_LINKE_CNTL, id), \ + SR_ARR(DIO_LINKF_CNTL, id),\ + SRI_ARR(DIG_BE_CLK_CNTL, DIG, id),\ + SR_ARR(DIO_CLK_CNTL, id) + +#define MCIF_WB_COMMON_REG_LIST_DCN3_5_RI(inst) \ + MCIF_WB_COMMON_REG_LIST_DCN32_RI(inst), \ + SRI2_ARR(MMHUBBUB_CLOCK_CNTL, MMHUBBUB, inst) + +#define HWSEQ_DCN35_REG_LIST()\ + SR(DCHUBBUB_GLOBAL_TIMER_CNTL), \ + SR(DCHUBBUB_ARB_HOSTVM_CNTL), \ + SR(DIO_MEM_PWR_CTRL), \ + SR(ODM_MEM_PWR_CTRL3), \ + SR(MMHUBBUB_MEM_PWR_CNTL), \ + SR(DCCG_GATE_DISABLE_CNTL), \ + SR(DCCG_GATE_DISABLE_CNTL2), \ + SR(DCCG_GATE_DISABLE_CNTL5), \ + SR(DCFCLK_CNTL),\ + SR(DC_MEM_GLOBAL_PWR_REQ_CNTL), \ + SRII(PIXEL_RATE_CNTL, OTG, 0), \ + SRII(PIXEL_RATE_CNTL, OTG, 1),\ + SRII(PIXEL_RATE_CNTL, OTG, 2),\ + SRII(PIXEL_RATE_CNTL, OTG, 3),\ + SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 0),\ + SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 1),\ + SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 2),\ + SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 3),\ + SR(MICROSECOND_TIME_BASE_DIV), \ + SR(MILLISECOND_TIME_BASE_DIV), \ + SR(DISPCLK_FREQ_CHANGE_CNTL), \ + SR(RBBMIF_TIMEOUT_DIS), \ + SR(RBBMIF_TIMEOUT_DIS_2), \ + SR(DCHUBBUB_CRC_CTRL), \ + SR(DPP_TOP0_DPP_CRC_CTRL), \ + SR(DPP_TOP0_DPP_CRC_VAL_B_A), \ + SR(DPP_TOP0_DPP_CRC_VAL_R_G), \ + SR(MPC_CRC_CTRL), \ + SR(MPC_CRC_RESULT_GB), \ + SR(MPC_CRC_RESULT_C), \ + SR(MPC_CRC_RESULT_AR), \ + SR(DOMAIN0_PG_CONFIG), \ + SR(DOMAIN1_PG_CONFIG), \ + SR(DOMAIN2_PG_CONFIG), \ + SR(DOMAIN3_PG_CONFIG), \ + SR(DOMAIN16_PG_CONFIG), \ + SR(DOMAIN17_PG_CONFIG), \ + SR(DOMAIN18_PG_CONFIG), \ + SR(DOMAIN19_PG_CONFIG), \ + SR(DOMAIN0_PG_STATUS), \ + SR(DOMAIN1_PG_STATUS), \ + SR(DOMAIN2_PG_STATUS), \ + SR(DOMAIN3_PG_STATUS), \ + SR(DOMAIN16_PG_STATUS), \ + SR(DOMAIN17_PG_STATUS), \ + SR(DOMAIN18_PG_STATUS), \ + SR(DOMAIN19_PG_STATUS), \ + SR(DC_IP_REQUEST_CNTL), \ + SR(AZALIA_AUDIO_DTO), \ + SR(AZALIA_CONTROLLER_CLOCK_GATING), \ + SR(HPO_TOP_HW_CONTROL),\ + SR(DMU_CLK_CNTL) + +/* OPTC */ +#define OPTC_COMMON_REG_LIST_DCN3_5_RI(inst) \ + SRI_ARR(OTG_VSTARTUP_PARAM, OTG, inst),\ + SRI_ARR(OTG_VUPDATE_PARAM, OTG, inst),\ + SRI_ARR(OTG_VREADY_PARAM, OTG, inst),\ + SRI_ARR(OTG_MASTER_UPDATE_LOCK, OTG, inst),\ + SRI_ARR(OTG_GLOBAL_CONTROL0, OTG, inst),\ + SRI_ARR(OTG_GLOBAL_CONTROL1, OTG, inst),\ + SRI_ARR(OTG_GLOBAL_CONTROL2, OTG, inst),\ + SRI_ARR(OTG_GLOBAL_CONTROL4, OTG, inst),\ + SRI_ARR(OTG_DOUBLE_BUFFER_CONTROL, OTG, inst),\ + SRI_ARR(OTG_H_TOTAL, OTG, inst),\ + SRI_ARR(OTG_H_BLANK_START_END, OTG, inst),\ + SRI_ARR(OTG_H_SYNC_A, OTG, inst),\ + SRI_ARR(OTG_H_SYNC_A_CNTL, OTG, inst),\ + SRI_ARR(OTG_H_TIMING_CNTL, OTG, inst),\ + SRI_ARR(OTG_V_TOTAL, OTG, inst),\ + SRI_ARR(OTG_V_BLANK_START_END, OTG, inst),\ + SRI_ARR(OTG_V_SYNC_A, OTG, inst),\ + SRI_ARR(OTG_V_SYNC_A_CNTL, OTG, inst),\ + SRI_ARR(OTG_CONTROL, OTG, inst),\ + SRI_ARR(OTG_STEREO_CONTROL, OTG, inst),\ + SRI_ARR(OTG_3D_STRUCTURE_CONTROL, OTG, inst),\ + SRI_ARR(OTG_STEREO_STATUS, OTG, inst),\ + SRI_ARR(OTG_V_TOTAL_MAX, OTG, inst),\ + SRI_ARR(OTG_V_TOTAL_MIN, OTG, inst),\ + SRI_ARR(OTG_V_TOTAL_CONTROL, OTG, inst),\ + SRI_ARR(OTG_TRIGA_CNTL, OTG, inst),\ + SRI_ARR(OTG_FORCE_COUNT_NOW_CNTL, OTG, inst),\ + SRI_ARR(OTG_STATIC_SCREEN_CONTROL, OTG, inst),\ + SRI_ARR(OTG_STATUS_FRAME_COUNT, OTG, inst),\ + SRI_ARR(OTG_STATUS, OTG, inst),\ + SRI_ARR(OTG_STATUS_POSITION, OTG, inst),\ + SRI_ARR(OTG_NOM_VERT_POSITION, OTG, inst),\ + SRI_ARR(OTG_M_CONST_DTO0, OTG, inst),\ + SRI_ARR(OTG_M_CONST_DTO1, OTG, inst),\ + SRI_ARR(OTG_CLOCK_CONTROL, OTG, inst),\ + SRI_ARR(OTG_VERTICAL_INTERRUPT0_CONTROL, OTG, inst),\ + SRI_ARR(OTG_VERTICAL_INTERRUPT0_POSITION, OTG, inst),\ + SRI_ARR(OTG_VERTICAL_INTERRUPT1_CONTROL, OTG, inst),\ + SRI_ARR(OTG_VERTICAL_INTERRUPT1_POSITION, OTG, inst),\ + SRI_ARR(OTG_VERTICAL_INTERRUPT2_CONTROL, OTG, inst),\ + SRI_ARR(OTG_VERTICAL_INTERRUPT2_POSITION, OTG, inst),\ + SRI_ARR(OPTC_INPUT_CLOCK_CONTROL, ODM, inst),\ + SRI_ARR(OPTC_DATA_SOURCE_SELECT, ODM, inst),\ + SRI_ARR(OPTC_INPUT_GLOBAL_CONTROL, ODM, inst),\ + SRI_ARR(CONTROL, VTG, inst),\ + SRI_ARR(OTG_VERT_SYNC_CONTROL, OTG, inst),\ + SRI_ARR(OTG_GSL_CONTROL, OTG, inst),\ + SRI_ARR(OTG_CRC_CNTL, OTG, inst),\ + SRI_ARR(OTG_CRC0_DATA_RG, OTG, inst),\ + SRI_ARR(OTG_CRC0_DATA_B, OTG, inst),\ + SRI_ARR(OTG_CRC1_DATA_RG, OTG, inst),\ + SRI_ARR(OTG_CRC1_DATA_B, OTG, inst),\ + SRI_ARR(OTG_CRC2_DATA_RG, OTG, inst),\ + SRI_ARR(OTG_CRC2_DATA_B, OTG, inst),\ + SRI_ARR(OTG_CRC3_DATA_RG, OTG, inst),\ + SRI_ARR(OTG_CRC3_DATA_B, OTG, inst),\ + SRI_ARR(OTG_CRC0_WINDOWA_X_CONTROL, OTG, inst),\ + SRI_ARR(OTG_CRC0_WINDOWA_Y_CONTROL, OTG, inst),\ + SRI_ARR(OTG_CRC0_WINDOWB_X_CONTROL, OTG, inst),\ + SRI_ARR(OTG_CRC0_WINDOWB_Y_CONTROL, OTG, inst),\ + SRI_ARR(OTG_CRC1_WINDOWA_X_CONTROL, OTG, inst),\ + SRI_ARR(OTG_CRC1_WINDOWA_Y_CONTROL, OTG, inst),\ + SRI_ARR(OTG_CRC1_WINDOWB_X_CONTROL, OTG, inst),\ + SRI_ARR(OTG_CRC1_WINDOWB_Y_CONTROL, OTG, inst),\ + SRI_ARR(OTG_CRC0_WINDOWA_X_CONTROL_READBACK, OTG, inst),\ + SRI_ARR(OTG_CRC0_WINDOWA_Y_CONTROL_READBACK, OTG, inst),\ + SRI_ARR(OTG_CRC0_WINDOWB_X_CONTROL_READBACK, OTG, inst),\ + SRI_ARR(OTG_CRC0_WINDOWB_Y_CONTROL_READBACK, OTG, inst),\ + SRI_ARR(OTG_CRC1_WINDOWA_X_CONTROL_READBACK, OTG, inst),\ + SRI_ARR(OTG_CRC1_WINDOWA_Y_CONTROL_READBACK, OTG, inst),\ + SRI_ARR(OTG_CRC1_WINDOWB_X_CONTROL_READBACK, OTG, inst),\ + SRI_ARR(OTG_CRC1_WINDOWB_Y_CONTROL_READBACK, OTG, inst),\ + SR_ARR(GSL_SOURCE_SELECT, inst),\ + SRI_ARR(OTG_TRIGA_MANUAL_TRIG, OTG, inst),\ + SRI_ARR(OTG_GLOBAL_CONTROL1, OTG, inst),\ + SRI_ARR(OTG_GLOBAL_CONTROL2, OTG, inst),\ + SRI_ARR(OTG_GSL_WINDOW_X, OTG, inst),\ + SRI_ARR(OTG_GSL_WINDOW_Y, OTG, inst),\ + SRI_ARR(OTG_VUPDATE_KEEPOUT, OTG, inst),\ + SRI_ARR(OTG_DSC_START_POSITION, OTG, inst),\ + SRI_ARR(OTG_DRR_TRIGGER_WINDOW, OTG, inst),\ + SRI_ARR(OTG_DRR_V_TOTAL_CHANGE, OTG, inst),\ + SRI_ARR(OPTC_DATA_FORMAT_CONTROL, ODM, inst),\ + SRI_ARR(OPTC_BYTES_PER_PIXEL, ODM, inst),\ + SRI_ARR(OPTC_WIDTH_CONTROL, ODM, inst),\ + SRI_ARR(OPTC_MEMORY_CONFIG, ODM, inst),\ + SRI_ARR(OTG_DRR_CONTROL, OTG, inst),\ + SRI2_ARR(OPTC_CLOCK_CONTROL, OPTC, inst) + +/* DPP */ +#define DPP_REG_LIST_DCN35_RI(id)\ + DPP_REG_LIST_DCN30_COMMON_RI(id) + +#endif /* _DCN35_RESOURCE_H_ */ -- cgit v1.2.3 From 68cfc5d8e459f50e5f46dca3b0f3c97a75f39975 Mon Sep 17 00:00:00 2001 From: Wenjing Liu Date: Fri, 20 Oct 2023 16:26:19 -0400 Subject: drm/amd/display: Try to acquire a free OTG master not used in cur ctx first [WHY & HOW] The current otg master pipe allocation logic is not optimized based current resource context. We should try to acquire a free OTG master not used in cur cts first to avoid unnecessary pipe switch from current state. Acked-by: Alex Hung Signed-off-by: Wenjing Liu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 6159d819c5c5..49f8c90f0d2b 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -3586,6 +3586,7 @@ static void mark_seamless_boot_stream( * |________|_______________|___________|_____________| */ static bool acquire_otg_master_pipe_for_stream( + const struct dc_state *cur_ctx, struct dc_state *new_ctx, const struct resource_pool *pool, struct dc_stream_state *stream) @@ -3599,7 +3600,10 @@ static bool acquire_otg_master_pipe_for_stream( int pipe_idx; struct pipe_ctx *pipe_ctx = NULL; - pipe_idx = resource_find_any_free_pipe(&new_ctx->res_ctx, pool); + pipe_idx = recource_find_free_pipe_not_used_in_cur_res_ctx( + &cur_ctx->res_ctx, &new_ctx->res_ctx, pool); + if (pipe_idx == FREE_PIPE_INDEX_NOT_FOUND) + pipe_idx = resource_find_any_free_pipe(&new_ctx->res_ctx, pool); if (pipe_idx != FREE_PIPE_INDEX_NOT_FOUND) { pipe_ctx = &new_ctx->res_ctx.pipe_ctx[pipe_idx]; memset(pipe_ctx, 0, sizeof(*pipe_ctx)); @@ -3659,7 +3663,7 @@ enum dc_status resource_map_pool_resources( if (!acquired) /* acquire new resources */ - acquired = acquire_otg_master_pipe_for_stream( + acquired = acquire_otg_master_pipe_for_stream(dc->current_state, context, pool, stream); pipe_ctx = resource_get_otg_master_for_stream(&context->res_ctx, stream); -- cgit v1.2.3 From 673d6d73eba79a1205ac403b68ef63da1c823da2 Mon Sep 17 00:00:00 2001 From: Wenjing Liu Date: Mon, 30 Oct 2023 15:21:12 -0400 Subject: drm/amd/display: Prefer currently used OTG master when acquiring free pipe [WHY & HOW] When acquiring an OTG master pipe we should prefer currently enabled OTG master pipes first. If there are no free pipes used as current OTG master pipe then we will try to acquire a currently unused free pipe as new OTG master instead of tearing down current secondary pipes from ODM or MPC combine. Reviewed-by: Alvin Lee Reviewed-by: Dillon Varone Acked-by: Alex Hung Signed-off-by: Wenjing Liu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 37 ++++++++++++++++++++++- drivers/gpu/drm/amd/display/dc/inc/resource.h | 12 ++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 49f8c90f0d2b..42a927710743 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1764,6 +1764,29 @@ int recource_find_free_pipe_not_used_in_cur_res_ctx( return free_pipe_idx; } +int recource_find_free_pipe_used_as_otg_master_in_cur_res_ctx( + const struct resource_context *cur_res_ctx, + struct resource_context *new_res_ctx, + const struct resource_pool *pool) +{ + int free_pipe_idx = FREE_PIPE_INDEX_NOT_FOUND; + const struct pipe_ctx *new_pipe, *cur_pipe; + int i; + + for (i = 0; i < pool->pipe_count; i++) { + cur_pipe = &cur_res_ctx->pipe_ctx[i]; + new_pipe = &new_res_ctx->pipe_ctx[i]; + + if (resource_is_pipe_type(cur_pipe, OTG_MASTER) && + resource_is_pipe_type(new_pipe, FREE_PIPE)) { + free_pipe_idx = i; + break; + } + } + + return free_pipe_idx; +} + int resource_find_free_pipe_used_as_cur_sec_dpp_in_mpcc_combine( const struct resource_context *cur_res_ctx, struct resource_context *new_res_ctx, @@ -3600,8 +3623,20 @@ static bool acquire_otg_master_pipe_for_stream( int pipe_idx; struct pipe_ctx *pipe_ctx = NULL; - pipe_idx = recource_find_free_pipe_not_used_in_cur_res_ctx( + /* + * Upper level code is responsible to optimize unnecessary addition and + * removal for unchanged streams. So unchanged stream will keep the same + * OTG master instance allocated. When current stream is removed and a + * new stream is added, we want to reuse the OTG instance made available + * by the removed stream first. If not found, we try to avoid of using + * any free pipes already used in current context as this could tear + * down exiting ODM/MPC/MPO configuration unnecessarily. + */ + pipe_idx = recource_find_free_pipe_used_as_otg_master_in_cur_res_ctx( &cur_ctx->res_ctx, &new_ctx->res_ctx, pool); + if (pipe_idx == FREE_PIPE_INDEX_NOT_FOUND) + pipe_idx = recource_find_free_pipe_not_used_in_cur_res_ctx( + &cur_ctx->res_ctx, &new_ctx->res_ctx, pool); if (pipe_idx == FREE_PIPE_INDEX_NOT_FOUND) pipe_idx = resource_find_any_free_pipe(&new_ctx->res_ctx, pool); if (pipe_idx != FREE_PIPE_INDEX_NOT_FOUND) { diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h index 06ca8bfb91e7..0458d2d749f4 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/resource.h +++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h @@ -500,6 +500,18 @@ int recource_find_free_pipe_not_used_in_cur_res_ctx( struct resource_context *new_res_ctx, const struct resource_pool *pool); +/* + * Look for a free pipe in new resource context that is used in current resource + * context as an OTG master pipe. + * + * return - FREE_PIPE_INDEX_NOT_FOUND if free pipe is not found, otherwise + * pipe idx of the free pipe + */ +int recource_find_free_pipe_used_as_otg_master_in_cur_res_ctx( + const struct resource_context *cur_res_ctx, + struct resource_context *new_res_ctx, + const struct resource_pool *pool); + /* * Look for a free pipe in new resource context that is used as a secondary DPP * pipe in any MPCC combine in current resource context. -- cgit v1.2.3 From 5f70d4ff8095a2ad362d2a00eb8d9f7e20f3daa1 Mon Sep 17 00:00:00 2001 From: Daniel Miess Date: Thu, 12 Oct 2023 12:55:47 -0400 Subject: drm/amd/display: Enable DCN clock gating for DCN35 [WHY & HOW] Enable DCN clock gating for DCN35. Disable DTBCLK gate before link training and re-enable afterwards Reviewed-by: Nicholas Kazlauskas Acked-by: Alex Hung Signed-off-by: Daniel Miess Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h | 6 +++- drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c | 30 ++++++++++++++++ drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.h | 7 +++- .../gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c | 10 +----- .../gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.h | 1 - .../gpu/drm/amd/display/dc/hwss/dce/dce_hwseq.h | 15 +++++++- .../drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c | 42 +++++++++++++++------- drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h | 2 -- .../amd/display/dc/resource/dcn35/dcn35_resource.c | 16 +++++++-- .../amd/display/dc/resource/dcn35/dcn35_resource.h | 1 + .../amd/include/asic_reg/dcn/dcn_3_5_0_sh_mask.h | 8 +++++ 11 files changed, 108 insertions(+), 30 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h index ab6d09c6fe34..76da59d8caaf 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h @@ -291,7 +291,11 @@ type SYMCLKB_FE_SRC_SEL;\ type SYMCLKC_FE_SRC_SEL;\ type SYMCLKD_FE_SRC_SEL;\ - type SYMCLKE_FE_SRC_SEL; + type SYMCLKE_FE_SRC_SEL;\ + type DTBCLK_P0_GATE_DISABLE;\ + type DTBCLK_P1_GATE_DISABLE;\ + type DTBCLK_P2_GATE_DISABLE;\ + type DTBCLK_P3_GATE_DISABLE;\ struct dccg_shift { DCCG_REG_FIELD_LIST(uint8_t) diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c index 479f3683c0b7..142efd390d86 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c @@ -256,6 +256,21 @@ static void dccg35_set_dtbclk_dto( if (params->ref_dtbclk_khz && req_dtbclk_khz) { uint32_t modulo, phase; + switch (params->otg_inst) { + case 0: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P0_GATE_DISABLE, 1); + break; + case 1: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P1_GATE_DISABLE, 1); + break; + case 2: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P2_GATE_DISABLE, 1); + break; + case 3: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P3_GATE_DISABLE, 1); + break; + } + // phase / modulo = dtbclk / dtbclk ref modulo = params->ref_dtbclk_khz * 1000; phase = req_dtbclk_khz * 1000; @@ -280,6 +295,21 @@ static void dccg35_set_dtbclk_dto( REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst], PIPE_DTO_SRC_SEL[params->otg_inst], 2); } else { + switch (params->otg_inst) { + case 0: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P0_GATE_DISABLE, 0); + break; + case 1: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P1_GATE_DISABLE, 0); + break; + case 2: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P2_GATE_DISABLE, 0); + break; + case 3: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P3_GATE_DISABLE, 0); + break; + } + REG_UPDATE_2(OTG_PIXEL_RATE_CNTL[params->otg_inst], DTBCLK_DTO_ENABLE[params->otg_inst], 0, PIPE_DTO_SRC_SEL[params->otg_inst], params->is_hdmi ? 0 : 1); diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.h b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.h index 423feb4c2f3f..bde48bee0119 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.h @@ -34,6 +34,7 @@ #define DCCG_REG_LIST_DCN35() \ DCCG_REG_LIST_DCN314(),\ SR(DPPCLK_CTRL),\ + SR(DCCG_GATE_DISABLE_CNTL5),\ SR(DCCG_GATE_DISABLE_CNTL6),\ SR(DCCG_GLOBAL_FGCG_REP_CNTL),\ SR(SYMCLKA_CLOCK_ENABLE),\ @@ -174,7 +175,11 @@ DCCG_SF(SYMCLKB_CLOCK_ENABLE, SYMCLKB_FE_SRC_SEL, mask_sh),\ DCCG_SF(SYMCLKC_CLOCK_ENABLE, SYMCLKC_FE_SRC_SEL, mask_sh),\ DCCG_SF(SYMCLKD_CLOCK_ENABLE, SYMCLKD_FE_SRC_SEL, mask_sh),\ - DCCG_SF(SYMCLKE_CLOCK_ENABLE, SYMCLKE_FE_SRC_SEL, mask_sh) + DCCG_SF(SYMCLKE_CLOCK_ENABLE, SYMCLKE_FE_SRC_SEL, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P0_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P1_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P2_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P3_GATE_DISABLE, mask_sh),\ struct dccg *dccg35_create( struct dc_context *ctx, diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c index 46f71ff08fd1..0f60c40e1fc5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c +++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c @@ -332,13 +332,6 @@ void pg_cntl35_io_clk_pg_control(struct pg_cntl *pg_cntl, bool power_on) pg_cntl->pg_res_enable[PG_DCIO] = power_on; } -void pg_cntl35_set_force_poweron_domain22(struct pg_cntl *pg_cntl, bool power_on) -{ - struct dcn_pg_cntl *pg_cntl_dcn = TO_DCN_PG_CNTL(pg_cntl); - - REG_UPDATE(DOMAIN22_PG_CONFIG, DOMAIN_POWER_FORCEON, power_on ? 1 : 0); -} - static bool pg_cntl35_plane_otg_status(struct pg_cntl *pg_cntl) { struct dcn_pg_cntl *pg_cntl_dcn = TO_DCN_PG_CNTL(pg_cntl); @@ -508,8 +501,7 @@ static const struct pg_cntl_funcs pg_cntl35_funcs = { .mpcc_pg_control = pg_cntl35_mpcc_pg_control, .opp_pg_control = pg_cntl35_opp_pg_control, .optc_pg_control = pg_cntl35_optc_pg_control, - .dwb_pg_control = pg_cntl35_dwb_pg_control, - .set_force_poweron_domain22 = pg_cntl35_set_force_poweron_domain22 + .dwb_pg_control = pg_cntl35_dwb_pg_control }; struct pg_cntl *pg_cntl35_create( diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.h b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.h index 069dae08e222..3de240884d22 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.h +++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.h @@ -183,7 +183,6 @@ void pg_cntl35_optc_pg_control(struct pg_cntl *pg_cntl, unsigned int optc_inst, bool power_on); void pg_cntl35_dwb_pg_control(struct pg_cntl *pg_cntl, bool power_on); void pg_cntl35_init_pg_status(struct pg_cntl *pg_cntl); -void pg_cntl35_set_force_poweron_domain22(struct pg_cntl *pg_cntl, bool power_on); struct pg_cntl *pg_cntl35_create( struct dc_context *ctx, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce/dce_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dce/dce_hwseq.h index 44b4df6469d1..52f045cfd52a 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce/dce_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce/dce_hwseq.h @@ -682,6 +682,7 @@ struct dce_hwseq_registers { uint32_t DCHUBBUB_ARB_HOSTVM_CNTL; uint32_t HPO_TOP_HW_CONTROL; uint32_t DMU_CLK_CNTL; + uint32_t DCCG_GATE_DISABLE_CNTL4; uint32_t DCCG_GATE_DISABLE_CNTL5; }; /* set field name */ @@ -1199,7 +1200,19 @@ struct dce_hwseq_registers { type PHYBSYMCLK_ROOT_GATE_DISABLE;\ type PHYCSYMCLK_ROOT_GATE_DISABLE;\ type PHYDSYMCLK_ROOT_GATE_DISABLE;\ - type PHYESYMCLK_ROOT_GATE_DISABLE; + type PHYESYMCLK_ROOT_GATE_DISABLE;\ + type DTBCLK_P0_GATE_DISABLE;\ + type DTBCLK_P1_GATE_DISABLE;\ + type DTBCLK_P2_GATE_DISABLE;\ + type DTBCLK_P3_GATE_DISABLE;\ + type DPSTREAMCLK0_GATE_DISABLE;\ + type DPSTREAMCLK1_GATE_DISABLE;\ + type DPSTREAMCLK2_GATE_DISABLE;\ + type DPSTREAMCLK3_GATE_DISABLE;\ + type DPIASYMCLK0_GATE_DISABLE;\ + type DPIASYMCLK1_GATE_DISABLE;\ + type DPIASYMCLK2_GATE_DISABLE;\ + type DPIASYMCLK3_GATE_DISABLE; struct dce_hwseq_shift { HWSEQ_REG_FIELD_LIST(uint8_t) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index 5a8258287438..39260371beb9 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -145,17 +145,36 @@ void dcn35_init_hw(struct dc *dc) hws->funcs.bios_golden_init(dc); } - REG_WRITE(DCCG_GATE_DISABLE_CNTL, 0); - REG_WRITE(DCCG_GATE_DISABLE_CNTL2, 0); - - /* Disable gating for PHYASYMCLK. This will be enabled in dccg if needed */ - REG_UPDATE_5(DCCG_GATE_DISABLE_CNTL2, PHYASYMCLK_ROOT_GATE_DISABLE, 1, - PHYBSYMCLK_ROOT_GATE_DISABLE, 1, - PHYCSYMCLK_ROOT_GATE_DISABLE, 1, - PHYDSYMCLK_ROOT_GATE_DISABLE, 1, - PHYESYMCLK_ROOT_GATE_DISABLE, 1); + if (!dc->debug.disable_clock_gate) { + REG_WRITE(DCCG_GATE_DISABLE_CNTL, 0); + REG_WRITE(DCCG_GATE_DISABLE_CNTL2, 0); + + /* Disable gating for PHYASYMCLK. This will be enabled in dccg if needed */ + REG_UPDATE_5(DCCG_GATE_DISABLE_CNTL2, PHYASYMCLK_ROOT_GATE_DISABLE, 1, + PHYBSYMCLK_ROOT_GATE_DISABLE, 1, + PHYCSYMCLK_ROOT_GATE_DISABLE, 1, + PHYDSYMCLK_ROOT_GATE_DISABLE, 1, + PHYESYMCLK_ROOT_GATE_DISABLE, 1); + + REG_UPDATE_4(DCCG_GATE_DISABLE_CNTL4, + DPIASYMCLK0_GATE_DISABLE, 0, + DPIASYMCLK1_GATE_DISABLE, 0, + DPIASYMCLK2_GATE_DISABLE, 0, + DPIASYMCLK3_GATE_DISABLE, 0); + + REG_WRITE(DCCG_GATE_DISABLE_CNTL5, 0xFFFFFFFF); + REG_UPDATE_4(DCCG_GATE_DISABLE_CNTL5, + DTBCLK_P0_GATE_DISABLE, 0, + DTBCLK_P1_GATE_DISABLE, 0, + DTBCLK_P2_GATE_DISABLE, 0, + DTBCLK_P3_GATE_DISABLE, 0); + REG_UPDATE_4(DCCG_GATE_DISABLE_CNTL5, + DPSTREAMCLK0_GATE_DISABLE, 0, + DPSTREAMCLK1_GATE_DISABLE, 0, + DPSTREAMCLK2_GATE_DISABLE, 0, + DPSTREAMCLK3_GATE_DISABLE, 0); - REG_WRITE(DCCG_GATE_DISABLE_CNTL5, 0x1f7c3fcf); + } // Initialize the dccg if (res_pool->dccg->funcs->dccg_init) @@ -332,9 +351,6 @@ void dcn35_init_hw(struct dc *dc) if (dc->res_pool->pg_cntl) { if (dc->res_pool->pg_cntl->funcs->init_pg_status) dc->res_pool->pg_cntl->funcs->init_pg_status(dc->res_pool->pg_cntl); - - if (dc->res_pool->pg_cntl->funcs->set_force_poweron_domain22) - dc->res_pool->pg_cntl->funcs->set_force_poweron_domain22(dc->res_pool->pg_cntl, false); } } diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h b/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h index b9812afb886b..00ea3864dd4d 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h @@ -47,8 +47,6 @@ struct pg_cntl_funcs { void (*optc_pg_control)(struct pg_cntl *pg_cntl, unsigned int optc_inst, bool power_on); void (*dwb_pg_control)(struct pg_cntl *pg_cntl, bool power_on); void (*init_pg_status)(struct pg_cntl *pg_cntl); - - void (*set_force_poweron_domain22)(struct pg_cntl *pg_cntl, bool power_on); }; #endif //__DC_PG_CNTL_H__ diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c index a4027bf053c5..5c935d94a95c 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c @@ -626,7 +626,19 @@ static struct dce_hwseq_registers hwseq_reg; HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYBSYMCLK_ROOT_GATE_DISABLE, mask_sh), \ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYCSYMCLK_ROOT_GATE_DISABLE, mask_sh), \ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYDSYMCLK_ROOT_GATE_DISABLE, mask_sh), \ - HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYESYMCLK_ROOT_GATE_DISABLE, mask_sh) + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYESYMCLK_ROOT_GATE_DISABLE, mask_sh),\ + HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DTBCLK_P0_GATE_DISABLE, mask_sh),\ + HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DTBCLK_P1_GATE_DISABLE, mask_sh),\ + HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DTBCLK_P2_GATE_DISABLE, mask_sh),\ + HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DTBCLK_P3_GATE_DISABLE, mask_sh),\ + HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK0_GATE_DISABLE, mask_sh),\ + HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK1_GATE_DISABLE, mask_sh),\ + HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK2_GATE_DISABLE, mask_sh),\ + HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK3_GATE_DISABLE, mask_sh),\ + HWS_SF(, DCCG_GATE_DISABLE_CNTL4, DPIASYMCLK0_GATE_DISABLE, mask_sh),\ + HWS_SF(, DCCG_GATE_DISABLE_CNTL4, DPIASYMCLK1_GATE_DISABLE, mask_sh),\ + HWS_SF(, DCCG_GATE_DISABLE_CNTL4, DPIASYMCLK2_GATE_DISABLE, mask_sh),\ + HWS_SF(, DCCG_GATE_DISABLE_CNTL4, DPIASYMCLK3_GATE_DISABLE, mask_sh) static const struct dce_hwseq_shift hwseq_shift = { HWSEQ_DCN35_MASK_SH_LIST(__SHIFT) @@ -705,7 +717,7 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_dcc = DCC_ENABLE, .disable_dpp_power_gate = true, .disable_hubp_power_gate = true, - .disable_clock_gate = true, + .disable_clock_gate = false, .disable_dsc_power_gate = true, .vsr_support = true, .performance_trace = false, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.h index 99aea102e3f7..a51c4a9eaafe 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.h @@ -166,6 +166,7 @@ struct resource_pool *dcn35_create_resource_pool( SR(MMHUBBUB_MEM_PWR_CNTL), \ SR(DCCG_GATE_DISABLE_CNTL), \ SR(DCCG_GATE_DISABLE_CNTL2), \ + SR(DCCG_GATE_DISABLE_CNTL4), \ SR(DCCG_GATE_DISABLE_CNTL5), \ SR(DCFCLK_CNTL),\ SR(DC_MEM_GLOBAL_PWR_REQ_CNTL), \ diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_5_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_5_0_sh_mask.h index b64664879211..fca72e2ec929 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_5_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_5_0_sh_mask.h @@ -6220,12 +6220,20 @@ #define DCCG_GATE_DISABLE_CNTL4__PHYD_REFCLK_ROOT_GATE_DISABLE__SHIFT 0x3 #define DCCG_GATE_DISABLE_CNTL4__PHYE_REFCLK_ROOT_GATE_DISABLE__SHIFT 0x4 #define DCCG_GATE_DISABLE_CNTL4__HDMICHARCLK0_ROOT_GATE_DISABLE__SHIFT 0x11 +#define DCCG_GATE_DISABLE_CNTL4__DPIASYMCLK0_GATE_DISABLE__SHIFT 0x17 +#define DCCG_GATE_DISABLE_CNTL4__DPIASYMCLK1_GATE_DISABLE__SHIFT 0x18 +#define DCCG_GATE_DISABLE_CNTL4__DPIASYMCLK2_GATE_DISABLE__SHIFT 0x19 +#define DCCG_GATE_DISABLE_CNTL4__DPIASYMCLK3_GATE_DISABLE__SHIFT 0x1a #define DCCG_GATE_DISABLE_CNTL4__PHYA_REFCLK_ROOT_GATE_DISABLE_MASK 0x00000001L #define DCCG_GATE_DISABLE_CNTL4__PHYB_REFCLK_ROOT_GATE_DISABLE_MASK 0x00000002L #define DCCG_GATE_DISABLE_CNTL4__PHYC_REFCLK_ROOT_GATE_DISABLE_MASK 0x00000004L #define DCCG_GATE_DISABLE_CNTL4__PHYD_REFCLK_ROOT_GATE_DISABLE_MASK 0x00000008L #define DCCG_GATE_DISABLE_CNTL4__PHYE_REFCLK_ROOT_GATE_DISABLE_MASK 0x00000010L #define DCCG_GATE_DISABLE_CNTL4__HDMICHARCLK0_ROOT_GATE_DISABLE_MASK 0x00020000L +#define DCCG_GATE_DISABLE_CNTL4__DPIASYMCLK0_GATE_DISABLE_MASK 0x00800000L +#define DCCG_GATE_DISABLE_CNTL4__DPIASYMCLK1_GATE_DISABLE_MASK 0x01000000L +#define DCCG_GATE_DISABLE_CNTL4__DPIASYMCLK2_GATE_DISABLE_MASK 0x02000000L +#define DCCG_GATE_DISABLE_CNTL4__DPIASYMCLK3_GATE_DISABLE_MASK 0x04000000L #define DPSTREAMCLK_CNTL__DPSTREAMCLK0_SRC_SEL__SHIFT 0x0 #define DPSTREAMCLK_CNTL__DPSTREAMCLK0_EN__SHIFT 0x3 #define DPSTREAMCLK_CNTL__DPSTREAMCLK1_SRC_SEL__SHIFT 0x4 -- cgit v1.2.3 From cc6201b773f12388c234aa10145322ccc429959e Mon Sep 17 00:00:00 2001 From: Duncan Ma Date: Tue, 31 Oct 2023 16:57:36 -0400 Subject: drm/amd/display: Add disable timeout option [WHY] Driver continues running whenever there is is timeout from smu or dmcub. It is difficult to track failure state when dcn, dc or dmcub changes on root failure. [HOW] Add disable_timeout option to halt driver whenever there is a failure in response. Reviewed-by: Charlene Liu Acked-by: Alex Hung Signed-off-by: Duncan Ma Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c | 3 +++ drivers/gpu/drm/amd/display/dc/dc.h | 1 + drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 29 +++++++++++++++++----- 3 files changed, 27 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c index b6b8c3ca1572..af0a0f292595 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c @@ -116,6 +116,9 @@ static uint32_t dcn35_smu_wait_for_response(struct clk_mgr_internal *clk_mgr, un msleep(delay_us/1000); else if (delay_us > 0) udelay(delay_us); + + if (clk_mgr->base.ctx->dc->debug.disable_timeout) + max_retries++; } while (max_retries--); return res_val; diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 9316b737a8ba..ae54ef6837f9 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -978,6 +978,7 @@ struct dc_debug_options { bool psp_disabled_wa; unsigned int ips2_eval_delay_us; unsigned int ips2_entry_delay_us; + bool disable_timeout; }; struct gpu_info_soc_bounding_box_v1_0; diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index 0e07699c1e83..53400cc05b5b 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -241,7 +241,12 @@ bool dc_dmub_srv_cmd_run_list(struct dc_dmub_srv *dc_dmub_srv, unsigned int coun // Wait for DMUB to process command if (wait_type != DM_DMUB_WAIT_TYPE_NO_WAIT) { - status = dmub_srv_wait_for_idle(dmub, 100000); + if (dc_dmub_srv->ctx->dc->debug.disable_timeout) { + do { + status = dmub_srv_wait_for_idle(dmub, 100000); + } while (status != DMUB_STATUS_OK); + } else + status = dmub_srv_wait_for_idle(dmub, 100000); if (status != DMUB_STATUS_OK) { DC_LOG_DEBUG("No reply for DMUB command: status=%d\n", status); @@ -1147,10 +1152,16 @@ bool dc_dmub_srv_is_hw_pwr_up(struct dc_dmub_srv *dc_dmub_srv, bool wait) return true; if (wait) { - status = dmub_srv_wait_for_hw_pwr_up(dc_dmub_srv->dmub, 500000); - if (status != DMUB_STATUS_OK) { - DC_ERROR("Error querying DMUB hw power up status: error=%d\n", status); - return false; + if (dc_dmub_srv->ctx->dc->debug.disable_timeout) { + do { + status = dmub_srv_wait_for_hw_pwr_up(dc_dmub_srv->dmub, 500000); + } while (status != DMUB_STATUS_OK); + } else { + status = dmub_srv_wait_for_hw_pwr_up(dc_dmub_srv->dmub, 500000); + if (status != DMUB_STATUS_OK) { + DC_ERROR("Error querying DMUB hw power up status: error=%d\n", status); + return false; + } } } else return dmub_srv_is_hw_pwr_up(dc_dmub_srv->dmub); @@ -1187,7 +1198,7 @@ void dc_dmub_srv_exit_low_power_state(const struct dc *dc) const uint32_t max_num_polls = 10000; uint32_t allow_state = 0; uint32_t commit_state = 0; - uint32_t i; + int i; if (dc->debug.dmcub_emulation) return; @@ -1220,6 +1231,9 @@ void dc_dmub_srv_exit_low_power_state(const struct dc *dc) break; udelay(1); + + if (dc->debug.disable_timeout) + i--; } ASSERT(i < max_num_polls); @@ -1242,6 +1256,9 @@ void dc_dmub_srv_exit_low_power_state(const struct dc *dc) break; udelay(1); + + if (dc->debug.disable_timeout) + i--; } ASSERT(i < max_num_polls); } -- cgit v1.2.3 From c21a764a98cb59d673cad3da64f35f4dec951951 Mon Sep 17 00:00:00 2001 From: Krunoslav Kovac Date: Wed, 1 Nov 2023 15:59:56 -0400 Subject: drm/amd/display: Send PQ bit in AMD VSIF [WHY & HOW] PB9 bit 5 was added to signal PQ EOTF in AMD vendor specific infoframe. This change sets it when appropriate. Reviewed-by: Aric Cyr Acked-by: Alex Hung Signed-off-by: Krunoslav Kovac Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/modules/freesync/freesync.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c index ccecddafeb05..47296d155c3a 100644 --- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c +++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c @@ -693,10 +693,12 @@ static void build_vrr_infopacket_fs2_data(enum color_transfer_func app_tf, if (app_tf != TRANSFER_FUNC_UNKNOWN) { infopacket->valid = true; - if (app_tf != TRANSFER_FUNC_PQ2084) { + if (app_tf == TRANSFER_FUNC_PQ2084) + infopacket->sb[9] |= 0x20; // PB9 = [Bit 5 = PQ EOTF Active] + else { infopacket->sb[6] |= 0x08; // PB6 = [Bit 3 = Native Color Active] if (app_tf == TRANSFER_FUNC_GAMMA_22) - infopacket->sb[9] |= 0x04; // PB6 = [Bit 2 = Gamma 2.2 EOTF Active] + infopacket->sb[9] |= 0x04; // PB9 = [Bit 2 = Gamma 2.2 EOTF Active] } } } -- cgit v1.2.3 From c29085d29562990559163302d9e28d1e88223d90 Mon Sep 17 00:00:00 2001 From: Fangzhi Zuo Date: Mon, 30 Oct 2023 16:23:08 -0400 Subject: drm/amd/display: Enable DSC Flag in MST Mode Validation [WHY & HOW] When dsc is possible, MST mode validation includes: 1. if maximum dsc compression cannot fit into end to end bw, mode pruned 2. if native bw cannot fit into end to end bw, try to enabled dsc to see whether a feasible dsc config can be found 3. if native bw can fit into end to end bw, mode supported Reviewed-by: Wayne Lin Acked-by: Alex Hung Signed-off-by: Fangzhi Zuo Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 44 +++++++++++++--------- 1 file changed, 26 insertions(+), 18 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index ec87e31c6fe8..8d7d4024f285 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -1602,9 +1602,8 @@ enum dc_status dm_dp_mst_is_port_support_mode( struct dc_link_settings cur_link_settings; unsigned int end_to_end_bw_in_kbps = 0; unsigned int upper_link_bw_in_kbps = 0, down_link_bw_in_kbps = 0; - unsigned int max_compressed_bw_in_kbps = 0; struct dc_dsc_bw_range bw_range = {0}; - uint16_t full_pbn = aconnector->mst_output_port->full_pbn; + struct dc_dsc_config_options dsc_options = {0}; /* * Consider the case with the depth of the mst topology tree is equal or less than 2 @@ -1620,30 +1619,39 @@ enum dc_status dm_dp_mst_is_port_support_mode( (aconnector->mst_output_port->passthrough_aux || aconnector->dsc_aux == &aconnector->mst_output_port->aux)) { cur_link_settings = stream->link->verified_link_cap; + upper_link_bw_in_kbps = dc_link_bandwidth_kbps(aconnector->dc_link, &cur_link_settings); + down_link_bw_in_kbps = kbps_from_pbn(aconnector->mst_output_port->full_pbn); - upper_link_bw_in_kbps = dc_link_bandwidth_kbps(aconnector->dc_link, - &cur_link_settings); - down_link_bw_in_kbps = kbps_from_pbn(full_pbn); - - /* pick the bottleneck */ - end_to_end_bw_in_kbps = min(upper_link_bw_in_kbps, - down_link_bw_in_kbps); - - /* - * use the maximum dsc compression bandwidth as the required - * bandwidth for the mode - */ - max_compressed_bw_in_kbps = bw_range.min_kbps; + /* pick the end to end bw bottleneck */ + end_to_end_bw_in_kbps = min(upper_link_bw_in_kbps, down_link_bw_in_kbps); - if (end_to_end_bw_in_kbps < max_compressed_bw_in_kbps) { - DRM_DEBUG_DRIVER("Mode does not fit into DSC pass-through bandwidth validation\n"); + if (end_to_end_bw_in_kbps < bw_range.min_kbps) { + DRM_DEBUG_DRIVER("maximum dsc compression cannot fit into end-to-end bandwidth\n"); return DC_FAIL_BANDWIDTH_VALIDATE; } + + if (end_to_end_bw_in_kbps < bw_range.stream_kbps) { + dc_dsc_get_default_config_option(stream->link->dc, &dsc_options); + dsc_options.max_target_bpp_limit_override_x16 = aconnector->base.display_info.max_dsc_bpp * 16; + if (dc_dsc_compute_config(stream->sink->ctx->dc->res_pool->dscs[0], + &stream->sink->dsc_caps.dsc_dec_caps, + &dsc_options, + end_to_end_bw_in_kbps, + &stream->timing, + dc_link_get_highest_encoding_format(stream->link), + &stream->timing.dsc_cfg)) { + stream->timing.flags.DSC = 1; + DRM_DEBUG_DRIVER("end-to-end bandwidth require dsc and dsc config found\n"); + } else { + DRM_DEBUG_DRIVER("end-to-end bandwidth require dsc but dsc config not found\n"); + return DC_FAIL_BANDWIDTH_VALIDATE; + } + } } else { /* check if mode could be supported within full_pbn */ bpp = convert_dc_color_depth_into_bpc(stream->timing.display_color_depth) * 3; pbn = drm_dp_calc_pbn_mode(stream->timing.pix_clk_100hz / 10, bpp, false); - if (pbn > full_pbn) + if (pbn > aconnector->mst_output_port->full_pbn) return DC_FAIL_BANDWIDTH_VALIDATE; } -- cgit v1.2.3 From ed4ae8f77f2c4ff05244db99330d1eff828d9f7d Mon Sep 17 00:00:00 2001 From: Anthony Koo Date: Sat, 4 Nov 2023 01:53:13 -0400 Subject: drm/amd/display: Add new command to disable replay timing resync [WHY & HOW] Add new command to disable replay timing resync Acked-by: Alex Hung Signed-off-by: Anthony Koo Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 41 +++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index aa6e6923afed..55573083bc31 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -2876,6 +2876,10 @@ enum dmub_cmd_replay_type { * Set power opt and coasting vtotal. */ DMUB_CMD__REPLAY_SET_POWER_OPT_AND_COASTING_VTOTAL = 4, + /** + * Set disabled iiming sync. + */ + DMUB_CMD__REPLAY_SET_TIMING_SYNC_SUPPORTED = 5, }; /** @@ -3038,6 +3042,27 @@ struct dmub_cmd_replay_set_power_opt_data { uint32_t power_opt; }; +/** + * Data passed from driver to FW in a DMUB_CMD__REPLAY_SET_TIMING_SYNC_SUPPORTED command. + */ +struct dmub_cmd_replay_set_timing_sync_data { + /** + * Panel Instance. + * Panel isntance to identify which replay_state to use + * Currently the support is only for 0 or 1 + */ + uint8_t panel_inst; + + /** + * Explicit padding to 4 byte boundary. + */ + uint8_t pad[3]; + /** + * REPLAY set_timing_sync + */ + bool timing_sync_supported; +}; + /** * Definition of a DMUB_CMD__SET_REPLAY_POWER_OPT command. */ @@ -3104,6 +3129,20 @@ struct dmub_rb_cmd_replay_set_power_opt_and_coasting_vtotal { struct dmub_cmd_replay_set_coasting_vtotal_data replay_set_coasting_vtotal_data; }; +/** + * Definition of a DMUB_CMD__REPLAY_SET_TIMING_SYNC_SUPPORTED command. + */ +struct dmub_rb_cmd_replay_set_timing_sync { + /** + * Command header. + */ + struct dmub_cmd_header header; + /** + * Definition of DMUB_CMD__REPLAY_SET_TIMING_SYNC_SUPPORTED command. + */ + struct dmub_cmd_replay_set_timing_sync_data replay_set_timing_sync_data; +}; + /** * Set of HW components that can be locked. * @@ -4237,6 +4276,8 @@ union dmub_rb_cmd { * Definition of a DMUB_CMD__REPLAY_SET_POWER_OPT_AND_COASTING_VTOTAL command. */ struct dmub_rb_cmd_replay_set_power_opt_and_coasting_vtotal replay_set_power_opt_and_coasting_vtotal; + + struct dmub_rb_cmd_replay_set_timing_sync replay_set_timing_sync; }; /** -- cgit v1.2.3 From 8a2553d5c7ade00d1b508bbd418d5c4803c12fdd Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Mon, 6 Nov 2023 19:30:51 -0700 Subject: drm/amd/display: Add missing chips for HDCP [WHAT] Add missing HDCP ID in the message id enum. Acked-by: Alex Hung Signed-off-by: Rodrigo Siqueira Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/include/hdcp_msg_types.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/include/hdcp_msg_types.h b/drivers/gpu/drm/amd/display/include/hdcp_msg_types.h index 42229b4effdc..eced9ad91f1d 100644 --- a/drivers/gpu/drm/amd/display/include/hdcp_msg_types.h +++ b/drivers/gpu/drm/amd/display/include/hdcp_msg_types.h @@ -69,6 +69,11 @@ enum hdcp_message_id { HDCP_MESSAGE_ID_READ_RXSTATUS, HDCP_MESSAGE_ID_WRITE_CONTENT_STREAM_TYPE, + /* PS175 chip */ + + HDCP_MESSAGE_ID_WRITE_PS175_CMD, + HDCP_MESSAGE_ID_READ_PS175_RSP, + HDCP_MESSAGE_ID_MAX }; -- cgit v1.2.3 From 466a7d115326ece682c2b60d1c77d1d0b9010b4f Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 10 Nov 2023 16:34:51 -0600 Subject: drm/amd: Use the first non-dGPU PCI device for BW limits When bandwidth limits are looked up using pcie_bandwidth_available() virtual links such as USB4 are analyzed which might not represent the real speed. Furthermore devices may change speeds autonomously which may introduce conditional variation to the results reported in the status registers. Instead look at the capabilities of first PCI device outside of dGPU to decide upper limits that the dGPU will work at. For eGPU this effectively means that it will use the speed of the link partner. Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2925#note_2145860 Link: https://www.usb.org/document-library/usb4r-specification-v20 USB4 V2 with Errata and ECN through June 2023 Section 11.2.1 Reviewed-by: Lijo Lazar Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 37 ++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 151f8b950b72..ec3bf949ee2f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5737,6 +5737,39 @@ recover_end: return r; } +/** + * amdgpu_device_partner_bandwidth - find the bandwidth of appropriate partner + * + * @adev: amdgpu_device pointer + * @speed: pointer to the speed of the link + * @width: pointer to the width of the link + * + * Evaluate the hierarchy to find the speed and bandwidth capabilities of the + * first physical partner to an AMD dGPU. + * This will exclude any virtual switches and links. + */ +static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev, + enum pci_bus_speed *speed, + enum pcie_link_width *width) +{ + struct pci_dev *parent = adev->pdev; + + if (!speed || !width) + return; + + *speed = PCI_SPEED_UNKNOWN; + *width = PCIE_LNK_WIDTH_UNKNOWN; + + while ((parent = pci_upstream_bridge(parent))) { + /* skip upstream/downstream switches internal to dGPU*/ + if (parent->vendor == PCI_VENDOR_ID_ATI) + continue; + *speed = pcie_get_speed_cap(parent); + *width = pcie_get_width_cap(parent); + break; + } +} + /** * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot * @@ -5770,8 +5803,8 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask) return; - pcie_bandwidth_available(adev->pdev, NULL, - &platform_speed_cap, &platform_link_width); + amdgpu_device_partner_bandwidth(adev, &platform_speed_cap, + &platform_link_width); if (adev->pm.pcie_gen_mask == 0) { /* asic caps */ -- cgit v1.2.3 From d9b3a066dfcd3fe50b4dc561d8510c43c0ad8863 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 10 Nov 2023 16:34:52 -0600 Subject: drm/amd: Exclude dGPUs in eGPU enclosures from DPM quirks The PCIe speed capabilities advertised by a USB4 or TBT3 link are limited to PCIe gen 1 per the USB4 spec. In reality the speed will change dynamically based on fabric conditions and other traffic. DPM is disabled when dGPUs are connected directly to Intel hosts since the PCIe root port isn't able to handle dynamic speed switching. As this limitation is specifically for PCIe root ports in the SoC, don't apply it when connected to an eGPU enclosure connected to an Intel host. Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2885 Reviewed-by: Lijo Lazar Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index ec3bf949ee2f..b5edf40b5d03 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1551,11 +1551,15 @@ bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev) * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/ * https://gitlab.freedesktop.org/drm/amd/-/issues/2663 */ -static bool amdgpu_device_pcie_dynamic_switching_supported(void) +static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device *adev) { #if IS_ENABLED(CONFIG_X86) struct cpuinfo_x86 *c = &cpu_data(0); + /* eGPU change speeds based on USB4 fabric conditions */ + if (dev_is_removable(adev->dev)) + return true; + if (c->x86_vendor == X86_VENDOR_INTEL) return false; #endif @@ -2395,7 +2399,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) adev->pm.pp_feature &= ~PP_GFXOFF_MASK; if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID) adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK; - if (!amdgpu_device_pcie_dynamic_switching_supported()) + if (!amdgpu_device_pcie_dynamic_switching_supported(adev)) adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK; total = true; -- cgit v1.2.3 From 59e4db5375f587954eb779ac9c7888a6c81c306b Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Sun, 5 Nov 2023 20:22:06 -0500 Subject: drm/amd/display: Promote DC to 3.2.260 - Add missing chips for HDCP - Add new command to disable replay timing resync - Fix encoder disable logic - Enable DSC Flag in MST Mode Validation - Change the DMCUB mailbox memory location from FB to inbox - Add disable timeout option - Negate IPS allow and commit bits - Enable DCN clock gating for DCN35 - Prefer currently used OTG master when acquiring free pipe - Try to acquire a free OTG master not used in cur ctx first - Clear dpcd_sink_ext_caps if not set - Enable fast plane updates on DCN3.2 and above - Add null checks for 8K60 lightup - Refactor resource into component directory - Fix DSC not Enabled on Direct MST Sink - Guard against invalid RPTR/WPTR being set - Enable CM low mem power optimization - Fix a debugfs null pointer error Acked-by: Alex Hung Signed-off-by: Aric Cyr Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index ae54ef6837f9..541e781267b9 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -49,7 +49,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.259" +#define DC_VER "3.2.260" #define MAX_SURFACES 3 #define MAX_PLANES 6 -- cgit v1.2.3 From 75fb313c55fa102f973c440f55dc63ffc61f3b54 Mon Sep 17 00:00:00 2001 From: Shiwu Zhang Date: Tue, 31 Oct 2023 12:32:54 +0800 Subject: drm/amdgpu: expose the connected port num info through sysfs By catting the xgmi_port_num sysfs node, it prints out the info in the format of : -> : for one xgmi link. For example, in case of 4 sockets fully and evenly connected setup, it would be like as below for the first node in the hive. 01:02 -> 02:03 01:03 -> 02:02 01:07 -> 03:04 01:04 -> 03:07 01:06 -> 04:05 01:05 -> 04:06 Based on the fact that there is two xgmi links between each socket pair, "01:02 -> 02:03" means that the current socket in question use the port 2 to connect with port 3 of the second node in the hive and so on. v2: print out the src/dst node id for each xgmi link (lijo) v3: replace the current_node++ with +1 to align with dst node (le) and use the dev_err instead of pr_err (lijo) v4: fix checkpatch warning (alex) Signed-off-by: Shiwu Zhang Acked-by: Lijo Lazar Reviewed-by: Le Ma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 44 ++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index bd20cb3b9819..44d8c1a11e1b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -413,6 +413,38 @@ static ssize_t amdgpu_xgmi_show_num_links(struct device *dev, return sysfs_emit(buf, "%s\n", buf); } +static ssize_t amdgpu_xgmi_show_connected_port_num(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info; + int i, j, size = 0; + int current_node; + /* + * get the node id in the sysfs for the current socket and show + * it in the port num info output in the sysfs for easy reading. + * it is NOT the one retrieved from xgmi ta. + */ + for (i = 0; i < top->num_nodes; i++) { + if (top->nodes[i].node_id == adev->gmc.xgmi.node_id) { + current_node = i; + break; + } + } + + for (i = 0; i < top->num_nodes; i++) { + for (j = 0; j < top->nodes[i].num_links; j++) + /* node id in sysfs starts from 1 rather than 0 so +1 here */ + size += sysfs_emit_at(buf, size, "%02x:%02x -> %02x:%02x\n", current_node + 1, + top->nodes[i].port_num[j].src_xgmi_port_num, i + 1, + top->nodes[i].port_num[j].dst_xgmi_port_num); + } + + return size; +} + #define AMDGPU_XGMI_SET_FICAA(o) ((o) | 0x456801) static ssize_t amdgpu_xgmi_show_error(struct device *dev, struct device_attribute *attr, @@ -452,6 +484,7 @@ static DEVICE_ATTR(xgmi_physical_id, 0444, amdgpu_xgmi_show_physical_id, NULL); static DEVICE_ATTR(xgmi_error, S_IRUGO, amdgpu_xgmi_show_error, NULL); static DEVICE_ATTR(xgmi_num_hops, S_IRUGO, amdgpu_xgmi_show_num_hops, NULL); static DEVICE_ATTR(xgmi_num_links, S_IRUGO, amdgpu_xgmi_show_num_links, NULL); +static DEVICE_ATTR(xgmi_port_num, S_IRUGO, amdgpu_xgmi_show_connected_port_num, NULL); static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev, struct amdgpu_hive_info *hive) @@ -487,6 +520,13 @@ static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev, if (ret) pr_err("failed to create xgmi_num_links\n"); + /* Create xgmi port num file if supported */ + if (adev->psp.xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG) { + ret = device_create_file(adev->dev, &dev_attr_xgmi_port_num); + if (ret) + dev_err(adev->dev, "failed to create xgmi_port_num\n"); + } + /* Create sysfs link to hive info folder on the first device */ if (hive->kobj.parent != (&adev->dev->kobj)) { ret = sysfs_create_link(&adev->dev->kobj, &hive->kobj, @@ -517,6 +557,8 @@ remove_file: device_remove_file(adev->dev, &dev_attr_xgmi_error); device_remove_file(adev->dev, &dev_attr_xgmi_num_hops); device_remove_file(adev->dev, &dev_attr_xgmi_num_links); + if (adev->psp.xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG) + device_remove_file(adev->dev, &dev_attr_xgmi_port_num); success: return ret; @@ -533,6 +575,8 @@ static void amdgpu_xgmi_sysfs_rem_dev_info(struct amdgpu_device *adev, device_remove_file(adev->dev, &dev_attr_xgmi_error); device_remove_file(adev->dev, &dev_attr_xgmi_num_hops); device_remove_file(adev->dev, &dev_attr_xgmi_num_links); + if (adev->psp.xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG) + device_remove_file(adev->dev, &dev_attr_xgmi_port_num); if (hive->kobj.parent != (&adev->dev->kobj)) sysfs_remove_link(&adev->dev->kobj,"xgmi_hive_info"); -- cgit v1.2.3 From a3cc7dbe9957f856b84a504687a85e22e02a49db Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 28 Sep 2023 15:18:16 +0530 Subject: drm/amdgpu: add pm metrics structure definition Define the pm metrics structures to be exposed via sysfs. Signed-off-by: Alex Deucher Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Reviewed-by: Yang Wang --- drivers/gpu/drm/amd/include/kgd_pp_interface.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index b14231f470dc..f9c438d16c56 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -1127,4 +1127,19 @@ struct gpu_metrics_v3_0 { /* Metrics table alpha filter time constant [us] */ uint32_t time_filter_alphavalue; }; + +struct amdgpu_pmmetrics_header { + uint16_t structure_size; + uint16_t pad; + uint32_t mp1_ip_discovery_version; + uint32_t pmfw_version; + uint32_t pmmetrics_version; +}; + +struct amdgpu_pm_metrics { + struct amdgpu_pmmetrics_header common_header; + + uint8_t data[]; +}; + #endif -- cgit v1.2.3 From 425285d39afddaf4a9dab36045b816af0cc3e400 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Thu, 9 Nov 2023 11:37:18 +0800 Subject: drm/amdgpu: add amdgpu runpm usage trace for separate funcs Add trace for amdgpu runpm separate funcs usage and this will help debugging on the case of runpm usage missed to dereference. In the normal case the runpm usage count referred by one kind of functionality pairwise and usage should be changed from 1 to 0, otherwise there will be an issue in the amdgpu runpm usage dereference. Signed-off-by: Prike Liang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 4 ++++ drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 15 +++++++++++++++ 3 files changed, 21 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index e7e87a3b2601..decbbe3d4f06 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -42,6 +42,7 @@ #include #include #include +#include "amdgpu_trace.h" /** * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation @@ -63,6 +64,7 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf, attach->peer2peer = false; r = pm_runtime_get_sync(adev_to_drm(adev)->dev); + trace_amdgpu_runpm_reference_dumps(1, __func__); if (r < 0) goto out; @@ -70,6 +72,7 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf, out: pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + trace_amdgpu_runpm_reference_dumps(0, __func__); return r; } @@ -90,6 +93,7 @@ static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf, pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + trace_amdgpu_runpm_reference_dumps(0, __func__); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index dc230212746a..70bff8cecfda 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -183,6 +183,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, seq, flags | AMDGPU_FENCE_FLAG_INT); pm_runtime_get_noresume(adev_to_drm(adev)->dev); + trace_amdgpu_runpm_reference_dumps(1, __func__); ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; if (unlikely(rcu_dereference_protected(*ptr, 1))) { struct dma_fence *old; @@ -310,6 +311,7 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring) dma_fence_put(fence); pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + trace_amdgpu_runpm_reference_dumps(0, __func__); } while (last_seq != seq); return true; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 2fd1bfb35916..f539b1d00234 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -554,6 +554,21 @@ TRACE_EVENT(amdgpu_reset_reg_dumps, __entry->value) ); +TRACE_EVENT(amdgpu_runpm_reference_dumps, + TP_PROTO(uint32_t index, const char *func), + TP_ARGS(index, func), + TP_STRUCT__entry( + __field(uint32_t, index) + __string(func, func) + ), + TP_fast_assign( + __entry->index = index; + __assign_str(func, func); + ), + TP_printk("amdgpu runpm reference dump 0x%x: 0x%s\n", + __entry->index, + __get_str(func)) +); #undef AMDGPU_JOB_GET_TIMELINE_NAME #endif -- cgit v1.2.3 From 699d392903c3cebb7d2a2a3505ec9047c419dcd7 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Sun, 12 Nov 2023 10:09:44 +0530 Subject: drm/amdgpu: Add function parameter 'xcc_mask' not described in 'amdgpu_vm_flush_compute_tlb' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the below: drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:1373: warning: Function parameter or member 'xcc_mask' not described in 'amdgpu_vm_flush_compute_tlb' Cc: Felix Kuehling Cc: Christian König Cc: Alex Deucher Cc: "Pan, Xinhui" Signed-off-by: Srinivasan Shanmugam Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index d59154ddaaed..7da71b6a9dc6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1443,6 +1443,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, * @adev: amdgpu_device pointer * @vm: requested vm * @flush_type: flush type + * @xcc_mask: mask of XCCs that belong to the compute partition in need of a TLB flush. * * Flush TLB if needed for a compute VM. * -- cgit v1.2.3 From 89a410b2e416f2216b29183d6b8537abeccc7abb Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 15 Nov 2023 10:21:18 -0800 Subject: drm/i915/dg2: Wa_18028616096 now applies to all DG2 The workaround database was just updated to extend this workaround to DG2-G11 (whereas previously it applied only to G10 and G12). Signed-off-by: Matt Roper Reviewed-by: Gustavo Sousa Link: https://patchwork.freedesktop.org/patch/msgid/20231115182117.2551522-2-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 63205edfea50..9bc0654efdc0 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -2937,6 +2937,9 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li * Wa_22015475538:dg2 */ wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8); + + /* Wa_18028616096 */ + wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, UGM_FRAGMENT_THRESHOLD_TO_3); } if (IS_DG2_G11(i915)) { @@ -2965,11 +2968,6 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li true); } - if (IS_DG2_G10(i915) || IS_DG2_G12(i915)) { - /* Wa_18028616096 */ - wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, UGM_FRAGMENT_THRESHOLD_TO_3); - } - if (IS_XEHPSDV(i915)) { /* Wa_1409954639 */ wa_mcr_masked_en(wal, -- cgit v1.2.3 From 3c7a5eb700661e8905ab4e50c2d09c6568125280 Mon Sep 17 00:00:00 2001 From: Radhakrishna Sripada Date: Thu, 16 Nov 2023 13:25:11 -0800 Subject: drm/i915/mtl: Update Wa_22018931422 Commit 78cc55e0b64c ("drm/i915/mcr: Hold GT forcewake during steering operations") introduced the workaround which was in early stages. With a valid lineage number update Workaround for future tracking. Cc: Matt Roper Signed-off-by: Radhakrishna Sripada Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20231116212511.1760446-1-radhakrishna.sripada@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c index 34913912d8ae..e253750a51c5 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c @@ -388,8 +388,7 @@ void intel_gt_mcr_lock(struct intel_gt *gt, unsigned long *flags) * registers. This wakeref will be released in the unlock * routine. * - * This is expected to become a formally documented/numbered - * workaround soon. + * Wa_22018931422 */ intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_GT); -- cgit v1.2.3 From 44eea8d08078bbce4d0f76c16706ab57ec38da62 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 13 Nov 2023 08:54:56 +0000 Subject: drm/i915: Remove return type from i915_drm_client_remove_object There is no need to return anything in the version which was merged and also the implementation of the !CONFIG_PROC_FS wasn't returning anything, causing a build failure there. Signed-off-by: Tvrtko Ursulin Fixes: e4ae85e364fc ("drm/i915: Add ability for tracking buffer objects per client") Cc: Aravind Iddamsetty Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202311110104.8TlHVxUI-lkp@intel.com/ Reviewed-by: Jani Nikula Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20231113085457.199053-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_drm_client.c | 6 ++---- drivers/gpu/drm/i915/i915_drm_client.h | 5 +++-- 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c index 7efffdaa508d..be9acfd9410e 100644 --- a/drivers/gpu/drm/i915/i915_drm_client.c +++ b/drivers/gpu/drm/i915/i915_drm_client.c @@ -191,22 +191,20 @@ void i915_drm_client_add_object(struct i915_drm_client *client, spin_unlock_irqrestore(&client->objects_lock, flags); } -bool i915_drm_client_remove_object(struct drm_i915_gem_object *obj) +void i915_drm_client_remove_object(struct drm_i915_gem_object *obj) { struct i915_drm_client *client = fetch_and_zero(&obj->client); unsigned long flags; /* Object may not be associated with a client. */ if (!client) - return false; + return; spin_lock_irqsave(&client->objects_lock, flags); list_del_rcu(&obj->client_link); spin_unlock_irqrestore(&client->objects_lock, flags); i915_drm_client_put(client); - - return true; } void i915_drm_client_add_context_objects(struct i915_drm_client *client, diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h index 69cedfcd3d69..a439dd789936 100644 --- a/drivers/gpu/drm/i915/i915_drm_client.h +++ b/drivers/gpu/drm/i915/i915_drm_client.h @@ -70,7 +70,7 @@ void i915_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file); #ifdef CONFIG_PROC_FS void i915_drm_client_add_object(struct i915_drm_client *client, struct drm_i915_gem_object *obj); -bool i915_drm_client_remove_object(struct drm_i915_gem_object *obj); +void i915_drm_client_remove_object(struct drm_i915_gem_object *obj); void i915_drm_client_add_context_objects(struct i915_drm_client *client, struct intel_context *ce); #else @@ -79,7 +79,8 @@ static inline void i915_drm_client_add_object(struct i915_drm_client *client, { } -static inline bool i915_drm_client_remove_object(struct drm_i915_gem_object *obj) +static inline void +i915_drm_client_remove_object(struct drm_i915_gem_object *obj) { } -- cgit v1.2.3 From e31b380741bfa27d274a9f9610fd732e1204ea24 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 13 Nov 2023 08:54:57 +0000 Subject: drm/i915: Add __rcu annotation to cursor when iterating client objects __rcu annotation is needed to avoid the sparse warnings such as: .../i915_drm_client.c:92:9: sparse: sparse: incompatible types in comparison expression (different address spaces): .../i915_drm_client.c:92:9: sparse: struct list_head [noderef] __rcu * .../i915_drm_client.c:92:9: sparse: struct list_head * Signed-off-by: Tvrtko Ursulin Fixes: 968853033d8a ("drm/i915: Implement fdinfo memory stats printing") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202311110610.h0m6ydI5-lkp@intel.com/ Cc: Andi Shyti Cc: Aravind Iddamsetty Reviewed-by: Jani Nikula Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20231113085457.199053-2-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_drm_client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c index be9acfd9410e..fa6852713bee 100644 --- a/drivers/gpu/drm/i915/i915_drm_client.c +++ b/drivers/gpu/drm/i915/i915_drm_client.c @@ -78,7 +78,7 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) struct drm_i915_private *i915 = fpriv->i915; struct drm_i915_gem_object *obj; struct intel_memory_region *mr; - struct list_head *pos; + struct list_head __rcu *pos; unsigned int id; /* Public objects. */ -- cgit v1.2.3 From dfed6b58d54f3a5d7e6bc1fb060e2c936330eba2 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 16 Nov 2023 08:44:56 +0000 Subject: drm/i915/gsc: Mark internal GSC engine with reserved uabi class The GSC CS is not exposed to the user, so we skipped assigning a uabi class number for it. However, the trace logs use the uabi class and instance to identify the engine, so leaving uabi class unset makes the GSC CS show up as the RCS in those logs. Given that the engine is not exposed to the user, we can't add a new case in the uabi enum, so we insted internally define a kernel internal class as -1. At the same time remove special handling for the name and complete the uabi_classes array so internal class is automatically correctly assigned. Engine will show as 65535:0 other0 in the logs/traces which should be unique enough. v2: * Fix uabi class u8 vs u16 type confusion. Signed-off-by: Tvrtko Ursulin Fixes: 194babe26bdc ("drm/i915/mtl: don't expose GSC command streamer to the user") Cc: Daniele Ceraolo Spurio Cc: Alan Previn Cc: Matt Roper Reviewed-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20231116084456.291533-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_engine_user.c | 39 ++++++++++++++++------------- 1 file changed, 22 insertions(+), 17 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c index 118164ddbb2e..833987015b8b 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c @@ -41,12 +41,15 @@ void intel_engine_add_user(struct intel_engine_cs *engine) llist_add(&engine->uabi_llist, &engine->i915->uabi_engines_llist); } -static const u8 uabi_classes[] = { +#define I915_NO_UABI_CLASS ((u16)(-1)) + +static const u16 uabi_classes[] = { [RENDER_CLASS] = I915_ENGINE_CLASS_RENDER, [COPY_ENGINE_CLASS] = I915_ENGINE_CLASS_COPY, [VIDEO_DECODE_CLASS] = I915_ENGINE_CLASS_VIDEO, [VIDEO_ENHANCEMENT_CLASS] = I915_ENGINE_CLASS_VIDEO_ENHANCE, [COMPUTE_CLASS] = I915_ENGINE_CLASS_COMPUTE, + [OTHER_CLASS] = I915_NO_UABI_CLASS, /* Not exposed to users, no uabi class. */ }; static int engine_cmp(void *priv, const struct list_head *A, @@ -200,6 +203,7 @@ static void engine_rename(struct intel_engine_cs *engine, const char *name, u16 void intel_engines_driver_register(struct drm_i915_private *i915) { + u16 name_instance, other_instance = 0; struct legacy_ring ring = {}; struct list_head *it, *next; struct rb_node **p, *prev; @@ -216,27 +220,28 @@ void intel_engines_driver_register(struct drm_i915_private *i915) if (intel_gt_has_unrecoverable_error(engine->gt)) continue; /* ignore incomplete engines */ - /* - * We don't want to expose the GSC engine to the users, but we - * still rename it so it is easier to identify in the debug logs - */ - if (engine->id == GSC0) { - engine_rename(engine, "gsc", 0); - continue; - } - GEM_BUG_ON(engine->class >= ARRAY_SIZE(uabi_classes)); engine->uabi_class = uabi_classes[engine->class]; + if (engine->uabi_class == I915_NO_UABI_CLASS) { + name_instance = other_instance++; + } else { + GEM_BUG_ON(engine->uabi_class >= + ARRAY_SIZE(i915->engine_uabi_class_count)); + name_instance = + i915->engine_uabi_class_count[engine->uabi_class]++; + } + engine->uabi_instance = name_instance; - GEM_BUG_ON(engine->uabi_class >= - ARRAY_SIZE(i915->engine_uabi_class_count)); - engine->uabi_instance = - i915->engine_uabi_class_count[engine->uabi_class]++; - - /* Replace the internal name with the final user facing name */ + /* + * Replace the internal name with the final user and log facing + * name. + */ engine_rename(engine, intel_engine_class_repr(engine->class), - engine->uabi_instance); + name_instance); + + if (engine->uabi_class == I915_NO_UABI_CLASS) + continue; rb_link_node(&engine->uabi_node, prev, p); rb_insert_color(&engine->uabi_node, &i915->uabi_engines); -- cgit v1.2.3 From b49e894c3fd83f67aae2a4778b98ea3838e41020 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Mon, 30 Oct 2023 18:40:12 +0100 Subject: drm/i915: Replace custom intel runtime_pm tracker with ref_tracker library Beside reusing existing code, the main advantage of ref_tracker is tracking per instance of wakeref. It allows also to catch double put. On the other side we lose information about the first acquire and the last release, but the advantages outweigh it. Signed-off-by: Andrzej Hajda Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20231030-ref_tracker_i915-v1-1-006fe6b96421@intel.com --- drivers/gpu/drm/i915/Kconfig.debug | 4 + drivers/gpu/drm/i915/display/intel_display_power.c | 2 +- drivers/gpu/drm/i915/i915_driver.c | 2 +- drivers/gpu/drm/i915/intel_runtime_pm.c | 221 ++------------------- drivers/gpu/drm/i915/intel_runtime_pm.h | 11 +- drivers/gpu/drm/i915/intel_wakeref.c | 28 +++ drivers/gpu/drm/i915/intel_wakeref.h | 35 +++- 7 files changed, 86 insertions(+), 217 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index 2d21930d5501..8d82d7a1a9c5 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -24,7 +24,9 @@ config DRM_I915_DEBUG select DEBUG_FS select PREEMPT_COUNT select I2C_CHARDEV + select REF_TRACKER select STACKDEPOT + select STACKTRACE select DRM_DP_AUX_CHARDEV select X86_MSR # used by igt/pm_rpm select DRM_VGEM # used by igt/prime_vgem (dmabuf interop checks) @@ -231,7 +233,9 @@ config DRM_I915_DEBUG_RUNTIME_PM bool "Enable extra state checking for runtime PM" depends on DRM_I915 default n + select REF_TRACKER select STACKDEPOT + select STACKTRACE help Choose this option to turn on extra state checking for the runtime PM functionality. This may introduce overhead during diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index e25785ae1c20..f78bfcf2ce00 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -405,7 +405,7 @@ print_async_put_domains_state(struct i915_power_domains *power_domains) struct drm_i915_private, display.power.domains); - drm_dbg(&i915->drm, "async_put_wakeref %u\n", + drm_dbg(&i915->drm, "async_put_wakeref %lu\n", power_domains->async_put_wakeref); print_power_domains(power_domains, "async_put_domains[0]", diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 802de2c6decb..db6f7c30adde 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -1037,7 +1037,7 @@ void i915_driver_shutdown(struct drm_i915_private *i915) intel_power_domains_driver_remove(i915); enable_rpm_wakeref_asserts(&i915->runtime_pm); - intel_runtime_pm_driver_release(&i915->runtime_pm); + intel_runtime_pm_driver_last_release(&i915->runtime_pm); } static bool suspend_to_idle(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 8743153fad87..91491111dbd5 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -52,182 +52,37 @@ #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM) -#include - -#define STACKDEPTH 8 - -static noinline depot_stack_handle_t __save_depot_stack(void) -{ - unsigned long entries[STACKDEPTH]; - unsigned int n; - - n = stack_trace_save(entries, ARRAY_SIZE(entries), 1); - return stack_depot_save(entries, n, GFP_NOWAIT | __GFP_NOWARN); -} - static void init_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm) { - spin_lock_init(&rpm->debug.lock); - stack_depot_init(); + ref_tracker_dir_init(&rpm->debug, INTEL_REFTRACK_DEAD_COUNT, dev_name(rpm->kdev)); } -static noinline depot_stack_handle_t +static intel_wakeref_t track_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm) { - depot_stack_handle_t stack, *stacks; - unsigned long flags; - - if (rpm->no_wakeref_tracking) - return -1; - - stack = __save_depot_stack(); - if (!stack) + if (!rpm->available || rpm->no_wakeref_tracking) return -1; - spin_lock_irqsave(&rpm->debug.lock, flags); - - if (!rpm->debug.count) - rpm->debug.last_acquire = stack; - - stacks = krealloc(rpm->debug.owners, - (rpm->debug.count + 1) * sizeof(*stacks), - GFP_NOWAIT | __GFP_NOWARN); - if (stacks) { - stacks[rpm->debug.count++] = stack; - rpm->debug.owners = stacks; - } else { - stack = -1; - } - - spin_unlock_irqrestore(&rpm->debug.lock, flags); - - return stack; + return intel_ref_tracker_alloc(&rpm->debug); } static void untrack_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm, - depot_stack_handle_t stack) + intel_wakeref_t wakeref) { - struct drm_i915_private *i915 = container_of(rpm, - struct drm_i915_private, - runtime_pm); - unsigned long flags, n; - bool found = false; - - if (unlikely(stack == -1)) + if (!rpm->available || rpm->no_wakeref_tracking) return; - spin_lock_irqsave(&rpm->debug.lock, flags); - for (n = rpm->debug.count; n--; ) { - if (rpm->debug.owners[n] == stack) { - memmove(rpm->debug.owners + n, - rpm->debug.owners + n + 1, - (--rpm->debug.count - n) * sizeof(stack)); - found = true; - break; - } - } - spin_unlock_irqrestore(&rpm->debug.lock, flags); - - if (drm_WARN(&i915->drm, !found, - "Unmatched wakeref (tracking %lu), count %u\n", - rpm->debug.count, atomic_read(&rpm->wakeref_count))) { - char *buf; - - buf = kmalloc(PAGE_SIZE, GFP_NOWAIT | __GFP_NOWARN); - if (!buf) - return; - - stack_depot_snprint(stack, buf, PAGE_SIZE, 2); - DRM_DEBUG_DRIVER("wakeref %x from\n%s", stack, buf); - - stack = READ_ONCE(rpm->debug.last_release); - if (stack) { - stack_depot_snprint(stack, buf, PAGE_SIZE, 2); - DRM_DEBUG_DRIVER("wakeref last released at\n%s", buf); - } - - kfree(buf); - } + intel_ref_tracker_free(&rpm->debug, wakeref); } -static int cmphandle(const void *_a, const void *_b) +static void untrack_all_intel_runtime_pm_wakerefs(struct intel_runtime_pm *rpm) { - const depot_stack_handle_t * const a = _a, * const b = _b; - - if (*a < *b) - return -1; - else if (*a > *b) - return 1; - else - return 0; -} - -static void -__print_intel_runtime_pm_wakeref(struct drm_printer *p, - const struct intel_runtime_pm_debug *dbg) -{ - unsigned long i; - char *buf; - - buf = kmalloc(PAGE_SIZE, GFP_NOWAIT | __GFP_NOWARN); - if (!buf) - return; - - if (dbg->last_acquire) { - stack_depot_snprint(dbg->last_acquire, buf, PAGE_SIZE, 2); - drm_printf(p, "Wakeref last acquired:\n%s", buf); - } - - if (dbg->last_release) { - stack_depot_snprint(dbg->last_release, buf, PAGE_SIZE, 2); - drm_printf(p, "Wakeref last released:\n%s", buf); - } - - drm_printf(p, "Wakeref count: %lu\n", dbg->count); - - sort(dbg->owners, dbg->count, sizeof(*dbg->owners), cmphandle, NULL); - - for (i = 0; i < dbg->count; i++) { - depot_stack_handle_t stack = dbg->owners[i]; - unsigned long rep; - - rep = 1; - while (i + 1 < dbg->count && dbg->owners[i + 1] == stack) - rep++, i++; - stack_depot_snprint(stack, buf, PAGE_SIZE, 2); - drm_printf(p, "Wakeref x%lu taken at:\n%s", rep, buf); - } - - kfree(buf); -} - -static noinline void -__untrack_all_wakerefs(struct intel_runtime_pm_debug *debug, - struct intel_runtime_pm_debug *saved) -{ - *saved = *debug; - - debug->owners = NULL; - debug->count = 0; - debug->last_release = __save_depot_stack(); -} - -static void -dump_and_free_wakeref_tracking(struct intel_runtime_pm_debug *debug) -{ - if (debug->count) { - struct drm_printer p = drm_debug_printer("i915"); - - __print_intel_runtime_pm_wakeref(&p, debug); - } - - kfree(debug->owners); + ref_tracker_dir_exit(&rpm->debug); } static noinline void __intel_wakeref_dec_and_check_tracking(struct intel_runtime_pm *rpm) { - struct intel_runtime_pm_debug dbg = {}; unsigned long flags; if (!atomic_dec_and_lock_irqsave(&rpm->wakeref_count, @@ -235,60 +90,14 @@ __intel_wakeref_dec_and_check_tracking(struct intel_runtime_pm *rpm) flags)) return; - __untrack_all_wakerefs(&rpm->debug, &dbg); + ref_tracker_dir_print_locked(&rpm->debug, INTEL_REFTRACK_PRINT_LIMIT); spin_unlock_irqrestore(&rpm->debug.lock, flags); - - dump_and_free_wakeref_tracking(&dbg); -} - -static noinline void -untrack_all_intel_runtime_pm_wakerefs(struct intel_runtime_pm *rpm) -{ - struct intel_runtime_pm_debug dbg = {}; - unsigned long flags; - - spin_lock_irqsave(&rpm->debug.lock, flags); - __untrack_all_wakerefs(&rpm->debug, &dbg); - spin_unlock_irqrestore(&rpm->debug.lock, flags); - - dump_and_free_wakeref_tracking(&dbg); } void print_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm, struct drm_printer *p) { - struct intel_runtime_pm_debug dbg = {}; - - do { - unsigned long alloc = dbg.count; - depot_stack_handle_t *s; - - spin_lock_irq(&rpm->debug.lock); - dbg.count = rpm->debug.count; - if (dbg.count <= alloc) { - memcpy(dbg.owners, - rpm->debug.owners, - dbg.count * sizeof(*s)); - } - dbg.last_acquire = rpm->debug.last_acquire; - dbg.last_release = rpm->debug.last_release; - spin_unlock_irq(&rpm->debug.lock); - if (dbg.count <= alloc) - break; - - s = krealloc(dbg.owners, - dbg.count * sizeof(*s), - GFP_NOWAIT | __GFP_NOWARN); - if (!s) - goto out; - - dbg.owners = s; - } while (1); - - __print_intel_runtime_pm_wakeref(p, &dbg); - -out: - kfree(dbg.owners); + intel_ref_tracker_show(&rpm->debug, p); } #else @@ -297,14 +106,14 @@ static void init_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm) { } -static depot_stack_handle_t +static intel_wakeref_t track_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm) { return -1; } static void untrack_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm, - intel_wakeref_t wref) + intel_wakeref_t wakeref) { } @@ -639,7 +448,11 @@ void intel_runtime_pm_driver_release(struct intel_runtime_pm *rpm) "i915 raw-wakerefs=%d wakelocks=%d on cleanup\n", intel_rpm_raw_wakeref_count(count), intel_rpm_wakelock_count(count)); +} +void intel_runtime_pm_driver_last_release(struct intel_runtime_pm *rpm) +{ + intel_runtime_pm_driver_release(rpm); untrack_all_intel_runtime_pm_wakerefs(rpm); } diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.h b/drivers/gpu/drm/i915/intel_runtime_pm.h index f79cda7a2503..f3cb7d4c7000 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.h +++ b/drivers/gpu/drm/i915/intel_runtime_pm.h @@ -77,15 +77,7 @@ struct intel_runtime_pm { * paired rpm_put) we can remove corresponding pairs of and keep * the array trimmed to active wakerefs. */ - struct intel_runtime_pm_debug { - spinlock_t lock; - - depot_stack_handle_t last_acquire; - depot_stack_handle_t last_release; - - depot_stack_handle_t *owners; - unsigned long count; - } debug; + struct ref_tracker_dir debug; #endif }; @@ -189,6 +181,7 @@ void intel_runtime_pm_init_early(struct intel_runtime_pm *rpm); void intel_runtime_pm_enable(struct intel_runtime_pm *rpm); void intel_runtime_pm_disable(struct intel_runtime_pm *rpm); void intel_runtime_pm_driver_release(struct intel_runtime_pm *rpm); +void intel_runtime_pm_driver_last_release(struct intel_runtime_pm *rpm); intel_wakeref_t intel_runtime_pm_get(struct intel_runtime_pm *rpm); intel_wakeref_t intel_runtime_pm_get_if_in_use(struct intel_runtime_pm *rpm); diff --git a/drivers/gpu/drm/i915/intel_wakeref.c b/drivers/gpu/drm/i915/intel_wakeref.c index 623a69089386..2401b88b55a4 100644 --- a/drivers/gpu/drm/i915/intel_wakeref.c +++ b/drivers/gpu/drm/i915/intel_wakeref.c @@ -191,3 +191,31 @@ void intel_wakeref_auto_fini(struct intel_wakeref_auto *wf) intel_wakeref_auto(wf, 0); INTEL_WAKEREF_BUG_ON(wf->wakeref); } + +void intel_ref_tracker_show(struct ref_tracker_dir *dir, + struct drm_printer *p) +{ + const size_t buf_size = PAGE_SIZE; + char *buf, *sb, *se; + size_t count; + + buf = kmalloc(buf_size, GFP_NOWAIT); + if (!buf) + return; + + count = ref_tracker_dir_snprint(dir, buf, buf_size); + if (!count) + goto free; + /* printk does not like big buffers, so we split it */ + for (sb = buf; *sb; sb = se + 1) { + se = strchrnul(sb, '\n'); + drm_printf(p, "%.*s", (int)(se - sb + 1), sb); + if (!*se) + break; + } + if (count >= buf_size) + drm_printf(p, "\n...dropped %zd extra bytes of leak report.\n", + count + 1 - buf_size); +free: + kfree(buf); +} diff --git a/drivers/gpu/drm/i915/intel_wakeref.h b/drivers/gpu/drm/i915/intel_wakeref.h index ec881b097368..909cad13ac1f 100644 --- a/drivers/gpu/drm/i915/intel_wakeref.h +++ b/drivers/gpu/drm/i915/intel_wakeref.h @@ -7,16 +7,25 @@ #ifndef INTEL_WAKEREF_H #define INTEL_WAKEREF_H +#include + #include #include #include #include #include #include +#include +#include #include #include #include +typedef unsigned long intel_wakeref_t; + +#define INTEL_REFTRACK_DEAD_COUNT 16 +#define INTEL_REFTRACK_PRINT_LIMIT 16 + #if IS_ENABLED(CONFIG_DRM_I915_DEBUG) #define INTEL_WAKEREF_BUG_ON(expr) BUG_ON(expr) #else @@ -26,8 +35,6 @@ struct intel_runtime_pm; struct intel_wakeref; -typedef depot_stack_handle_t intel_wakeref_t; - struct intel_wakeref_ops { int (*get)(struct intel_wakeref *wf); int (*put)(struct intel_wakeref *wf); @@ -261,6 +268,30 @@ __intel_wakeref_defer_park(struct intel_wakeref *wf) */ int intel_wakeref_wait_for_idle(struct intel_wakeref *wf); +#define INTEL_WAKEREF_DEF ((intel_wakeref_t)(-1)) + +static inline intel_wakeref_t intel_ref_tracker_alloc(struct ref_tracker_dir *dir) +{ + struct ref_tracker *user = NULL; + + ref_tracker_alloc(dir, &user, GFP_NOWAIT); + + return (intel_wakeref_t)user ?: INTEL_WAKEREF_DEF; +} + +static inline void intel_ref_tracker_free(struct ref_tracker_dir *dir, + intel_wakeref_t handle) +{ + struct ref_tracker *user; + + user = (handle == INTEL_WAKEREF_DEF) ? NULL : (void *)handle; + + ref_tracker_free(dir, &user); +} + +void intel_ref_tracker_show(struct ref_tracker_dir *dir, + struct drm_printer *p); + struct intel_wakeref_auto { struct drm_i915_private *i915; struct timer_list timer; -- cgit v1.2.3 From 5e4e06e4087eb91b0e5405ed42e792415d055e45 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Mon, 30 Oct 2023 18:40:13 +0100 Subject: drm/i915: Track gt pm wakerefs Track every intel_gt_pm_get() until its corresponding release in intel_gt_pm_put() by returning a cookie to the caller for acquire that must be passed by on released. When there is an imbalance, we can see who either tried to free a stale wakeref, or who forgot to free theirs. v2: track recently added calls in gen8_ggtt_bind_get_ce and destroyed_worker_func Signed-off-by: Andrzej Hajda Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20231030-ref_tracker_i915-v1-2-006fe6b96421@intel.com --- drivers/gpu/drm/i915/Kconfig.debug | 14 ++++++++ drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 14 ++++---- .../drm/i915/gem/selftests/i915_gem_coherency.c | 10 +++--- drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c | 14 ++++---- drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 13 +++++--- drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h | 3 +- drivers/gpu/drm/i915/gt/intel_context.h | 4 +-- drivers/gpu/drm/i915/gt/intel_context_types.h | 2 ++ drivers/gpu/drm/i915/gt/intel_engine_pm.c | 7 ++-- drivers/gpu/drm/i915/gt/intel_engine_types.h | 2 ++ .../gpu/drm/i915/gt/intel_execlists_submission.c | 2 +- drivers/gpu/drm/i915/gt/intel_ggtt.c | 16 +++++---- drivers/gpu/drm/i915/gt/intel_gt_pm.c | 12 ++++--- drivers/gpu/drm/i915/gt/intel_gt_pm.h | 38 +++++++++++++++++----- drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 4 +-- drivers/gpu/drm/i915/gt/selftest_engine_cs.c | 20 +++++++----- drivers/gpu/drm/i915/gt/selftest_gt_pm.c | 5 +-- drivers/gpu/drm/i915/gt/selftest_reset.c | 10 +++--- drivers/gpu/drm/i915/gt/selftest_rps.c | 17 ++++++---- drivers/gpu/drm/i915/gt/selftest_slpc.c | 5 +-- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 14 ++++---- drivers/gpu/drm/i915/i915_pmu.c | 16 +++++---- drivers/gpu/drm/i915/intel_wakeref.c | 7 +++- drivers/gpu/drm/i915/intel_wakeref.h | 38 ++++++++++++++++++++-- 24 files changed, 197 insertions(+), 90 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index 8d82d7a1a9c5..5b7162076850 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -40,6 +40,7 @@ config DRM_I915_DEBUG select DRM_I915_DEBUG_GEM_ONCE select DRM_I915_DEBUG_MMIO select DRM_I915_DEBUG_RUNTIME_PM + select DRM_I915_DEBUG_WAKEREF select DRM_I915_SW_FENCE_DEBUG_OBJECTS select DRM_I915_SELFTEST default n @@ -244,3 +245,16 @@ config DRM_I915_DEBUG_RUNTIME_PM Recommended for driver developers only. If in doubt, say "N" + +config DRM_I915_DEBUG_WAKEREF + bool "Enable extra tracking for wakerefs" + depends on DRM_I915 + select REF_TRACKER + select STACKDEPOT + select STACKTRACE + help + Choose this option to turn on extra state checking and usage + tracking for the wakerefPM functionality. This may introduce + overhead during driver runtime. + + If in doubt, say "N" diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 45b9d9e34b8b..b1aa62dfb155 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -253,6 +253,8 @@ struct i915_execbuffer { struct intel_gt *gt; /* gt for the execbuf */ struct intel_context *context; /* logical state for the request */ struct i915_gem_context *gem_context; /** caller's context */ + intel_wakeref_t wakeref; + intel_wakeref_t wakeref_gt0; /** our requests to build */ struct i915_request *requests[MAX_ENGINE_INSTANCE + 1]; @@ -2719,13 +2721,13 @@ eb_select_engine(struct i915_execbuffer *eb) for_each_child(ce, child) intel_context_get(child); - intel_gt_pm_get(gt); + eb->wakeref = intel_gt_pm_get(ce->engine->gt); /* * Keep GT0 active on MTL so that i915_vma_parked() doesn't * free VMAs while execbuf ioctl is validating VMAs. */ if (gt->info.id) - intel_gt_pm_get(to_gt(gt->i915)); + eb->wakeref_gt0 = intel_gt_pm_get(to_gt(gt->i915)); if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) { err = intel_context_alloc_state(ce); @@ -2765,9 +2767,9 @@ eb_select_engine(struct i915_execbuffer *eb) err: if (gt->info.id) - intel_gt_pm_put(to_gt(gt->i915)); + intel_gt_pm_put(to_gt(gt->i915), eb->wakeref_gt0); - intel_gt_pm_put(gt); + intel_gt_pm_put(ce->engine->gt, eb->wakeref); for_each_child(ce, child) intel_context_put(child); intel_context_put(ce); @@ -2785,8 +2787,8 @@ eb_put_engine(struct i915_execbuffer *eb) * i915_vma_parked() from interfering while execbuf validates vmas. */ if (eb->gt->info.id) - intel_gt_pm_put(to_gt(eb->gt->i915)); - intel_gt_pm_put(eb->gt); + intel_gt_pm_put(to_gt(eb->gt->i915), eb->wakeref_gt0); + intel_gt_pm_put(eb->context->engine->gt, eb->wakeref); for_each_child(eb->context, child) intel_context_put(child); intel_context_put(eb->context); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c index 3bef1beec7cb..3fd68a099a85 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c @@ -85,6 +85,7 @@ out: static int gtt_set(struct context *ctx, unsigned long offset, u32 v) { + intel_wakeref_t wakeref; struct i915_vma *vma; u32 __iomem *map; int err = 0; @@ -99,7 +100,7 @@ static int gtt_set(struct context *ctx, unsigned long offset, u32 v) if (IS_ERR(vma)) return PTR_ERR(vma); - intel_gt_pm_get(vma->vm->gt); + wakeref = intel_gt_pm_get(vma->vm->gt); map = i915_vma_pin_iomap(vma); i915_vma_unpin(vma); @@ -112,12 +113,13 @@ static int gtt_set(struct context *ctx, unsigned long offset, u32 v) i915_vma_unpin_iomap(vma); out_rpm: - intel_gt_pm_put(vma->vm->gt); + intel_gt_pm_put(vma->vm->gt, wakeref); return err; } static int gtt_get(struct context *ctx, unsigned long offset, u32 *v) { + intel_wakeref_t wakeref; struct i915_vma *vma; u32 __iomem *map; int err = 0; @@ -132,7 +134,7 @@ static int gtt_get(struct context *ctx, unsigned long offset, u32 *v) if (IS_ERR(vma)) return PTR_ERR(vma); - intel_gt_pm_get(vma->vm->gt); + wakeref = intel_gt_pm_get(vma->vm->gt); map = i915_vma_pin_iomap(vma); i915_vma_unpin(vma); @@ -145,7 +147,7 @@ static int gtt_get(struct context *ctx, unsigned long offset, u32 *v) i915_vma_unpin_iomap(vma); out_rpm: - intel_gt_pm_put(vma->vm->gt); + intel_gt_pm_put(vma->vm->gt, wakeref); return err; } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 72957a36a36b..2c51a2c452fc 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -630,14 +630,14 @@ static bool assert_mmap_offset(struct drm_i915_private *i915, static void disable_retire_worker(struct drm_i915_private *i915) { i915_gem_driver_unregister__shrinker(i915); - intel_gt_pm_get(to_gt(i915)); + intel_gt_pm_get_untracked(to_gt(i915)); cancel_delayed_work_sync(&to_gt(i915)->requests.retire_work); } static void restore_retire_worker(struct drm_i915_private *i915) { igt_flush_test(i915); - intel_gt_pm_put(to_gt(i915)); + intel_gt_pm_put_untracked(to_gt(i915)); i915_gem_driver_register__shrinker(i915); } @@ -778,6 +778,7 @@ err_obj: static int gtt_set(struct drm_i915_gem_object *obj) { + intel_wakeref_t wakeref; struct i915_vma *vma; void __iomem *map; int err = 0; @@ -786,7 +787,7 @@ static int gtt_set(struct drm_i915_gem_object *obj) if (IS_ERR(vma)) return PTR_ERR(vma); - intel_gt_pm_get(vma->vm->gt); + wakeref = intel_gt_pm_get(vma->vm->gt); map = i915_vma_pin_iomap(vma); i915_vma_unpin(vma); if (IS_ERR(map)) { @@ -798,12 +799,13 @@ static int gtt_set(struct drm_i915_gem_object *obj) i915_vma_unpin_iomap(vma); out: - intel_gt_pm_put(vma->vm->gt); + intel_gt_pm_put(vma->vm->gt, wakeref); return err; } static int gtt_check(struct drm_i915_gem_object *obj) { + intel_wakeref_t wakeref; struct i915_vma *vma; void __iomem *map; int err = 0; @@ -812,7 +814,7 @@ static int gtt_check(struct drm_i915_gem_object *obj) if (IS_ERR(vma)) return PTR_ERR(vma); - intel_gt_pm_get(vma->vm->gt); + wakeref = intel_gt_pm_get(vma->vm->gt); map = i915_vma_pin_iomap(vma); i915_vma_unpin(vma); if (IS_ERR(map)) { @@ -828,7 +830,7 @@ static int gtt_check(struct drm_i915_gem_object *obj) i915_vma_unpin_iomap(vma); out: - intel_gt_pm_put(vma->vm->gt); + intel_gt_pm_put(vma->vm->gt, wakeref); return err; } diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index ecc990ec1b95..d650beb8ed22 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -28,11 +28,14 @@ static void irq_disable(struct intel_breadcrumbs *b) static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) { + intel_wakeref_t wakeref; + /* * Since we are waiting on a request, the GPU should be busy * and should have its own rpm reference. */ - if (GEM_WARN_ON(!intel_gt_pm_get_if_awake(b->irq_engine->gt))) + wakeref = intel_gt_pm_get_if_awake(b->irq_engine->gt); + if (GEM_WARN_ON(!wakeref)) return; /* @@ -41,7 +44,7 @@ static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) * which we can add a new waiter and avoid the cost of re-enabling * the irq. */ - WRITE_ONCE(b->irq_armed, true); + WRITE_ONCE(b->irq_armed, wakeref); /* Requests may have completed before we could enable the interrupt. */ if (!b->irq_enabled++ && b->irq_enable(b)) @@ -61,12 +64,14 @@ static void intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) { + intel_wakeref_t wakeref = b->irq_armed; + GEM_BUG_ON(!b->irq_enabled); if (!--b->irq_enabled) b->irq_disable(b); - WRITE_ONCE(b->irq_armed, false); - intel_gt_pm_put_async(b->irq_engine->gt); + WRITE_ONCE(b->irq_armed, 0); + intel_gt_pm_put_async(b->irq_engine->gt, wakeref); } static void intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h index 72dfd3748c4c..bdf09fd67b6e 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h @@ -13,6 +13,7 @@ #include #include "intel_engine_types.h" +#include "intel_wakeref.h" /* * Rather than have every client wait upon all user interrupts, @@ -43,7 +44,7 @@ struct intel_breadcrumbs { spinlock_t irq_lock; /* protects the interrupt from hardirq context */ struct irq_work irq_work; /* for use from inside irq_lock */ unsigned int irq_enabled; - bool irq_armed; + intel_wakeref_t irq_armed; /* Not all breadcrumbs are attached to physical HW */ intel_engine_mask_t engine_mask; diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index a80e3b7c24ff..25564c01507e 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -212,7 +212,7 @@ static inline void intel_context_enter(struct intel_context *ce) return; ce->ops->enter(ce); - intel_gt_pm_get(ce->vm->gt); + ce->wakeref = intel_gt_pm_get(ce->vm->gt); } static inline void intel_context_mark_active(struct intel_context *ce) @@ -229,7 +229,7 @@ static inline void intel_context_exit(struct intel_context *ce) if (--ce->active_count) return; - intel_gt_pm_put_async(ce->vm->gt); + intel_gt_pm_put_async(ce->vm->gt, ce->wakeref); ce->ops->exit(ce); } diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index aceaac28a33e..7eccbd70d89f 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -17,6 +17,7 @@ #include "i915_utils.h" #include "intel_engine_types.h" #include "intel_sseu.h" +#include "intel_wakeref.h" #include "uc/intel_guc_fwif.h" @@ -112,6 +113,7 @@ struct intel_context { u32 ring_size; struct intel_ring *ring; struct intel_timeline *timeline; + intel_wakeref_t wakeref; unsigned long flags; #define CONTEXT_BARRIER_BIT 0 diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index e91fc881dbf1..96bdb93a948d 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -63,7 +63,7 @@ static int __engine_unpark(struct intel_wakeref *wf) ENGINE_TRACE(engine, "\n"); - intel_gt_pm_get(engine->gt); + engine->wakeref_track = intel_gt_pm_get(engine->gt); /* Discard stale context state from across idling */ ce = engine->kernel_context; @@ -122,6 +122,7 @@ __queue_and_release_pm(struct i915_request *rq, */ GEM_BUG_ON(rq->context->active_count != 1); __intel_gt_pm_get(engine->gt); + rq->context->wakeref = intel_wakeref_track(&engine->gt->wakeref); /* * We have to serialise all potential retirement paths with our @@ -285,7 +286,7 @@ static int __engine_park(struct intel_wakeref *wf) engine->park(engine); /* While gt calls i915_vma_parked(), we have to break the lock cycle */ - intel_gt_pm_put_async(engine->gt); + intel_gt_pm_put_async(engine->gt, engine->wakeref_track); return 0; } @@ -296,7 +297,7 @@ static const struct intel_wakeref_ops wf_ops = { void intel_engine_init__pm(struct intel_engine_cs *engine) { - intel_wakeref_init(&engine->wakeref, engine->i915, &wf_ops); + intel_wakeref_init(&engine->wakeref, engine->i915, &wf_ops, engine->name); intel_engine_init_heartbeat(engine); intel_gsc_idle_msg_enable(engine); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 8769760257fd..960e6be2042f 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -446,7 +446,9 @@ struct intel_engine_cs { unsigned long serial; unsigned long wakeref_serial; + intel_wakeref_t wakeref_track; struct intel_wakeref wakeref; + struct file *default_state; struct { diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index e8f42ec6b1b4..42aade0faf2d 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -630,7 +630,7 @@ static void __execlists_schedule_out(struct i915_request * const rq, execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); if (engine->fw_domain && !--engine->fw_active) intel_uncore_forcewake_put(engine->uncore, engine->fw_domain); - intel_gt_pm_put_async(engine->gt); + intel_gt_pm_put_async_untracked(engine->gt); /* * If this is part of a virtual engine, its next request may diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 76b0ee2b906b..21a7e3191c18 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -296,7 +296,7 @@ static bool should_update_ggtt_with_bind(struct i915_ggtt *ggtt) return intel_gt_is_bind_context_ready(gt); } -static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt) +static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt, intel_wakeref_t *wakeref) { struct intel_context *ce; struct intel_gt *gt = ggtt->vm.gt; @@ -313,7 +313,8 @@ static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt) * would conflict with fs_reclaim trying to allocate memory while * doing rpm_resume(). */ - if (!intel_gt_pm_get_if_awake(gt)) + *wakeref = intel_gt_pm_get_if_awake(gt); + if (!*wakeref) return NULL; intel_engine_pm_get(ce->engine); @@ -321,10 +322,10 @@ static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt) return ce; } -static void gen8_ggtt_bind_put_ce(struct intel_context *ce) +static void gen8_ggtt_bind_put_ce(struct intel_context *ce, intel_wakeref_t wakeref) { intel_engine_pm_put(ce->engine); - intel_gt_pm_put(ce->engine->gt); + intel_gt_pm_put(ce->engine->gt, wakeref); } static bool gen8_ggtt_bind_ptes(struct i915_ggtt *ggtt, u32 offset, @@ -337,12 +338,13 @@ static bool gen8_ggtt_bind_ptes(struct i915_ggtt *ggtt, u32 offset, struct sgt_iter iter; struct i915_request *rq; struct intel_context *ce; + intel_wakeref_t wakeref; u32 *cs; if (!num_entries) return true; - ce = gen8_ggtt_bind_get_ce(ggtt); + ce = gen8_ggtt_bind_get_ce(ggtt, &wakeref); if (!ce) return false; @@ -418,13 +420,13 @@ queue_err_rq: offset += n_ptes; } - gen8_ggtt_bind_put_ce(ce); + gen8_ggtt_bind_put_ce(ce, wakeref); return true; err_rq: i915_request_put(rq); put_ce: - gen8_ggtt_bind_put_ce(ce); + gen8_ggtt_bind_put_ce(ce, wakeref); return false; } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index 471b7cdc10ba..220ac4f92edf 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -28,19 +28,20 @@ static void user_forcewake(struct intel_gt *gt, bool suspend) { int count = atomic_read(>->user_wakeref); + intel_wakeref_t wakeref; /* Inside suspend/resume so single threaded, no races to worry about. */ if (likely(!count)) return; - intel_gt_pm_get(gt); + wakeref = intel_gt_pm_get(gt); if (suspend) { GEM_BUG_ON(count > atomic_read(>->wakeref.count)); atomic_sub(count, >->wakeref.count); } else { atomic_add(count, >->wakeref.count); } - intel_gt_pm_put(gt); + intel_gt_pm_put(gt, wakeref); } static void runtime_begin(struct intel_gt *gt) @@ -138,7 +139,7 @@ void intel_gt_pm_init_early(struct intel_gt *gt) * runtime_pm is per-device rather than per-tile, so this is still the * correct structure. */ - intel_wakeref_init(>->wakeref, gt->i915, &wf_ops); + intel_wakeref_init(>->wakeref, gt->i915, &wf_ops, "GT"); seqcount_mutex_init(>->stats.lock, >->wakeref.mutex); } @@ -236,6 +237,7 @@ int intel_gt_resume(struct intel_gt *gt) { struct intel_engine_cs *engine; enum intel_engine_id id; + intel_wakeref_t wakeref; int err; err = intel_gt_has_unrecoverable_error(gt); @@ -252,7 +254,7 @@ int intel_gt_resume(struct intel_gt *gt) */ gt_sanitize(gt, true); - intel_gt_pm_get(gt); + wakeref = intel_gt_pm_get(gt); intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); intel_rc6_sanitize(>->rc6); @@ -295,7 +297,7 @@ int intel_gt_resume(struct intel_gt *gt) out_fw: intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); - intel_gt_pm_put(gt); + intel_gt_pm_put(gt, wakeref); intel_gt_bind_context_set_ready(gt); return err; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h index b1eeb5b33918..911fd0160221 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h @@ -16,19 +16,28 @@ static inline bool intel_gt_pm_is_awake(const struct intel_gt *gt) return intel_wakeref_is_active(>->wakeref); } -static inline void intel_gt_pm_get(struct intel_gt *gt) +static inline void intel_gt_pm_get_untracked(struct intel_gt *gt) { intel_wakeref_get(>->wakeref); } +static inline intel_wakeref_t intel_gt_pm_get(struct intel_gt *gt) +{ + intel_gt_pm_get_untracked(gt); + return intel_wakeref_track(>->wakeref); +} + static inline void __intel_gt_pm_get(struct intel_gt *gt) { __intel_wakeref_get(>->wakeref); } -static inline bool intel_gt_pm_get_if_awake(struct intel_gt *gt) +static inline intel_wakeref_t intel_gt_pm_get_if_awake(struct intel_gt *gt) { - return intel_wakeref_get_if_active(>->wakeref); + if (!intel_wakeref_get_if_active(>->wakeref)) + return 0; + + return intel_wakeref_track(>->wakeref); } static inline void intel_gt_pm_might_get(struct intel_gt *gt) @@ -36,12 +45,18 @@ static inline void intel_gt_pm_might_get(struct intel_gt *gt) intel_wakeref_might_get(>->wakeref); } -static inline void intel_gt_pm_put(struct intel_gt *gt) +static inline void intel_gt_pm_put_untracked(struct intel_gt *gt) { intel_wakeref_put(>->wakeref); } -static inline void intel_gt_pm_put_async(struct intel_gt *gt) +static inline void intel_gt_pm_put(struct intel_gt *gt, intel_wakeref_t handle) +{ + intel_wakeref_untrack(>->wakeref, handle); + intel_gt_pm_put_untracked(gt); +} + +static inline void intel_gt_pm_put_async_untracked(struct intel_gt *gt) { intel_wakeref_put_async(>->wakeref); } @@ -51,9 +66,14 @@ static inline void intel_gt_pm_might_put(struct intel_gt *gt) intel_wakeref_might_put(>->wakeref); } -#define with_intel_gt_pm(gt, tmp) \ - for (tmp = 1, intel_gt_pm_get(gt); tmp; \ - intel_gt_pm_put(gt), tmp = 0) +static inline void intel_gt_pm_put_async(struct intel_gt *gt, intel_wakeref_t handle) +{ + intel_wakeref_untrack(>->wakeref, handle); + intel_gt_pm_put_async_untracked(gt); +} + +#define with_intel_gt_pm(gt, wf) \ + for (wf = intel_gt_pm_get(gt); wf; intel_gt_pm_put(gt, wf), wf = 0) /** * with_intel_gt_pm_if_awake - if GT is PM awake, get a reference to prevent @@ -64,7 +84,7 @@ static inline void intel_gt_pm_might_put(struct intel_gt *gt) * @wf: pointer to a temporary wakeref. */ #define with_intel_gt_pm_if_awake(gt, wf) \ - for (wf = intel_gt_pm_get_if_awake(gt); wf; intel_gt_pm_put_async(gt), wf = 0) + for (wf = intel_gt_pm_get_if_awake(gt); wf; intel_gt_pm_put_async(gt, wf), wf = 0) static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt) { diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c index f900cc68d6d9..7114c116e928 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c @@ -27,7 +27,7 @@ void intel_gt_pm_debugfs_forcewake_user_open(struct intel_gt *gt) { atomic_inc(>->user_wakeref); - intel_gt_pm_get(gt); + intel_gt_pm_get_untracked(gt); if (GRAPHICS_VER(gt->i915) >= 6) intel_uncore_forcewake_user_get(gt->uncore); } @@ -36,7 +36,7 @@ void intel_gt_pm_debugfs_forcewake_user_release(struct intel_gt *gt) { if (GRAPHICS_VER(gt->i915) >= 6) intel_uncore_forcewake_user_put(gt->uncore); - intel_gt_pm_put(gt); + intel_gt_pm_put_untracked(gt); atomic_dec(>->user_wakeref); } diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c index 86cecf7a1105..5ffa5e30f419 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c @@ -21,20 +21,22 @@ static int cmp_u32(const void *A, const void *B) return *a - *b; } -static void perf_begin(struct intel_gt *gt) +static intel_wakeref_t perf_begin(struct intel_gt *gt) { - intel_gt_pm_get(gt); + intel_wakeref_t wakeref = intel_gt_pm_get(gt); /* Boost gpufreq to max [waitboost] and keep it fixed */ atomic_inc(>->rps.num_waiters); queue_work(gt->i915->unordered_wq, >->rps.work); flush_work(>->rps.work); + + return wakeref; } -static int perf_end(struct intel_gt *gt) +static int perf_end(struct intel_gt *gt, intel_wakeref_t wakeref) { atomic_dec(>->rps.num_waiters); - intel_gt_pm_put(gt); + intel_gt_pm_put(gt, wakeref); return igt_flush_test(gt->i915); } @@ -133,12 +135,13 @@ static int perf_mi_bb_start(void *arg) struct intel_gt *gt = arg; struct intel_engine_cs *engine; enum intel_engine_id id; + intel_wakeref_t wakeref; int err = 0; if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */ return 0; - perf_begin(gt); + wakeref = perf_begin(gt); for_each_engine(engine, gt, id) { struct intel_context *ce = engine->kernel_context; struct i915_vma *batch; @@ -207,7 +210,7 @@ out: pr_info("%s: MI_BB_START cycles: %u\n", engine->name, trifilter(cycles)); } - if (perf_end(gt)) + if (perf_end(gt, wakeref)) err = -EIO; return err; @@ -260,12 +263,13 @@ static int perf_mi_noop(void *arg) struct intel_gt *gt = arg; struct intel_engine_cs *engine; enum intel_engine_id id; + intel_wakeref_t wakeref; int err = 0; if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */ return 0; - perf_begin(gt); + wakeref = perf_begin(gt); for_each_engine(engine, gt, id) { struct intel_context *ce = engine->kernel_context; struct i915_vma *base, *nop; @@ -364,7 +368,7 @@ out: pr_info("%s: 16K MI_NOOP cycles: %u\n", engine->name, trifilter(cycles)); } - if (perf_end(gt)) + if (perf_end(gt, wakeref)) err = -EIO; return err; diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c index 0971241707ce..33351deeea4f 100644 --- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c @@ -81,6 +81,7 @@ static int live_gt_clocks(void *arg) struct intel_gt *gt = arg; struct intel_engine_cs *engine; enum intel_engine_id id; + intel_wakeref_t wakeref; int err = 0; if (!gt->clock_frequency) { /* unknown */ @@ -91,7 +92,7 @@ static int live_gt_clocks(void *arg) if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */ return 0; - intel_gt_pm_get(gt); + wakeref = intel_gt_pm_get(gt); intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); for_each_engine(engine, gt, id) { @@ -128,7 +129,7 @@ static int live_gt_clocks(void *arg) } intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); - intel_gt_pm_put(gt); + intel_gt_pm_put(gt, wakeref); return err; } diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c index 79aa6ac66ad2..f40de408cd3a 100644 --- a/drivers/gpu/drm/i915/gt/selftest_reset.c +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c @@ -261,11 +261,12 @@ static int igt_atomic_reset(void *arg) { struct intel_gt *gt = arg; const typeof(*igt_atomic_phases) *p; + intel_wakeref_t wakeref; int err = 0; /* Check that the resets are usable from atomic context */ - intel_gt_pm_get(gt); + wakeref = intel_gt_pm_get(gt); igt_global_reset_lock(gt); /* Flush any requests before we get started and check basics */ @@ -296,7 +297,7 @@ static int igt_atomic_reset(void *arg) unlock: igt_global_reset_unlock(gt); - intel_gt_pm_put(gt); + intel_gt_pm_put(gt, wakeref); return err; } @@ -307,6 +308,7 @@ static int igt_atomic_engine_reset(void *arg) const typeof(*igt_atomic_phases) *p; struct intel_engine_cs *engine; enum intel_engine_id id; + intel_wakeref_t wakeref; int err = 0; /* Check that the resets are usable from atomic context */ @@ -317,7 +319,7 @@ static int igt_atomic_engine_reset(void *arg) if (intel_uc_uses_guc_submission(>->uc)) return 0; - intel_gt_pm_get(gt); + wakeref = intel_gt_pm_get(gt); igt_global_reset_lock(gt); /* Flush any requests before we get started and check basics */ @@ -365,7 +367,7 @@ static int igt_atomic_engine_reset(void *arg) out_unlock: igt_global_reset_unlock(gt); - intel_gt_pm_put(gt); + intel_gt_pm_put(gt, wakeref); return err; } diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c index fb30f733b036..dcef8d498919 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rps.c +++ b/drivers/gpu/drm/i915/gt/selftest_rps.c @@ -224,6 +224,7 @@ int live_rps_clock_interval(void *arg) struct intel_engine_cs *engine; enum intel_engine_id id; struct igt_spinner spin; + intel_wakeref_t wakeref; int err = 0; if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6) @@ -236,7 +237,7 @@ int live_rps_clock_interval(void *arg) saved_work = rps->work.func; rps->work.func = dummy_rps_work; - intel_gt_pm_get(gt); + wakeref = intel_gt_pm_get(gt); intel_rps_disable(>->rps); intel_gt_check_clock_frequency(gt); @@ -355,7 +356,7 @@ int live_rps_clock_interval(void *arg) } intel_rps_enable(>->rps); - intel_gt_pm_put(gt); + intel_gt_pm_put(gt, wakeref); igt_spinner_fini(&spin); @@ -376,6 +377,7 @@ int live_rps_control(void *arg) struct intel_engine_cs *engine; enum intel_engine_id id; struct igt_spinner spin; + intel_wakeref_t wakeref; int err = 0; /* @@ -398,7 +400,7 @@ int live_rps_control(void *arg) saved_work = rps->work.func; rps->work.func = dummy_rps_work; - intel_gt_pm_get(gt); + wakeref = intel_gt_pm_get(gt); for_each_engine(engine, gt, id) { struct i915_request *rq; ktime_t min_dt, max_dt; @@ -488,7 +490,7 @@ int live_rps_control(void *arg) break; } } - intel_gt_pm_put(gt); + intel_gt_pm_put(gt, wakeref); igt_spinner_fini(&spin); @@ -1023,6 +1025,7 @@ int live_rps_interrupt(void *arg) struct intel_engine_cs *engine; enum intel_engine_id id; struct igt_spinner spin; + intel_wakeref_t wakeref; u32 pm_events; int err = 0; @@ -1033,9 +1036,9 @@ int live_rps_interrupt(void *arg) if (!intel_rps_has_interrupts(rps) || GRAPHICS_VER(gt->i915) < 6) return 0; - intel_gt_pm_get(gt); - pm_events = rps->pm_events; - intel_gt_pm_put(gt); + pm_events = 0; + with_intel_gt_pm(gt, wakeref) + pm_events = rps->pm_events; if (!pm_events) { pr_err("No RPS PM events registered, but RPS is enabled?\n"); return -ENODEV; diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c b/drivers/gpu/drm/i915/gt/selftest_slpc.c index 952c8d52d68a..302d0540295d 100644 --- a/drivers/gpu/drm/i915/gt/selftest_slpc.c +++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c @@ -266,6 +266,7 @@ static int run_test(struct intel_gt *gt, int test_type) struct intel_rps *rps = >->rps; struct intel_engine_cs *engine; enum intel_engine_id id; + intel_wakeref_t wakeref; struct igt_spinner spin; u32 slpc_min_freq, slpc_max_freq; int err = 0; @@ -311,7 +312,7 @@ static int run_test(struct intel_gt *gt, int test_type) } intel_gt_pm_wait_for_idle(gt); - intel_gt_pm_get(gt); + wakeref = intel_gt_pm_get(gt); for_each_engine(engine, gt, id) { struct i915_request *rq; u32 max_act_freq; @@ -397,7 +398,7 @@ static int run_test(struct intel_gt *gt, int test_type) if (igt_flush_test(gt->i915)) err = -EIO; - intel_gt_pm_put(gt); + intel_gt_pm_put(gt, wakeref); igt_spinner_fini(&spin); intel_gt_pm_wait_for_idle(gt); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index d37698bd6b91..04f8377fd7a3 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1107,7 +1107,7 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) if (deregister) guc_signal_context_fence(ce); if (destroyed) { - intel_gt_pm_put_async(guc_to_gt(guc)); + intel_gt_pm_put_async_untracked(guc_to_gt(guc)); release_guc_id(guc, ce); __guc_context_destroy(ce); } @@ -1303,6 +1303,7 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now) unsigned long flags; u32 reset_count; bool in_reset; + intel_wakeref_t wakeref; spin_lock_irqsave(&guc->timestamp.lock, flags); @@ -1325,7 +1326,8 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now) * start_gt_clk is derived from GuC state. To get a consistent * view of activity, we query the GuC state only if gt is awake. */ - if (!in_reset && intel_gt_pm_get_if_awake(gt)) { + wakeref = in_reset ? 0 : intel_gt_pm_get_if_awake(gt); + if (wakeref) { stats_saved = *stats; gt_stamp_saved = guc->timestamp.gt_stamp; /* @@ -1334,7 +1336,7 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now) */ guc_update_engine_gt_clks(engine); guc_update_pm_timestamp(guc, now); - intel_gt_pm_put_async(gt); + intel_gt_pm_put_async(gt, wakeref); if (i915_reset_count(gpu_error) != reset_count) { *stats = stats_saved; guc->timestamp.gt_stamp = gt_stamp_saved; @@ -3385,9 +3387,9 @@ static void destroyed_worker_func(struct work_struct *w) struct intel_guc *guc = container_of(w, struct intel_guc, submission_state.destroyed_worker); struct intel_gt *gt = guc_to_gt(guc); - int tmp; + intel_wakeref_t wakeref; - with_intel_gt_pm(gt, tmp) + with_intel_gt_pm(gt, wakeref) deregister_destroyed_contexts(guc); } @@ -4894,7 +4896,7 @@ int intel_guc_deregister_done_process_msg(struct intel_guc *guc, intel_context_put(ce); } else if (context_destroyed(ce)) { /* Context has been destroyed */ - intel_gt_pm_put_async(guc_to_gt(guc)); + intel_gt_pm_put_async_untracked(guc_to_gt(guc)); release_guc_id(guc, ce); __guc_context_destroy(ce); } diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 878a27e1c8ef..21eb0c5b320d 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -223,19 +223,19 @@ static u64 get_rc6(struct intel_gt *gt) struct drm_i915_private *i915 = gt->i915; const unsigned int gt_id = gt->info.id; struct i915_pmu *pmu = &i915->pmu; + intel_wakeref_t wakeref; unsigned long flags; - bool awake = false; u64 val; - if (intel_gt_pm_get_if_awake(gt)) { + wakeref = intel_gt_pm_get_if_awake(gt); + if (wakeref) { val = __get_rc6(gt); - intel_gt_pm_put_async(gt); - awake = true; + intel_gt_pm_put_async(gt, wakeref); } spin_lock_irqsave(&pmu->lock, flags); - if (awake) { + if (wakeref) { store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val); } else { /* @@ -439,12 +439,14 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns) const unsigned int gt_id = gt->info.id; struct i915_pmu *pmu = &i915->pmu; struct intel_rps *rps = >->rps; + intel_wakeref_t wakeref; if (!frequency_sampling_enabled(pmu, gt_id)) return; /* Report 0/0 (actual/requested) frequency while parked. */ - if (!intel_gt_pm_get_if_awake(gt)) + wakeref = intel_gt_pm_get_if_awake(gt); + if (!wakeref) return; if (pmu->enable & config_mask(__I915_PMU_ACTUAL_FREQUENCY(gt_id))) { @@ -473,7 +475,7 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns) period_ns / 1000); } - intel_gt_pm_put_async(gt); + intel_gt_pm_put_async(gt, wakeref); } static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer) diff --git a/drivers/gpu/drm/i915/intel_wakeref.c b/drivers/gpu/drm/i915/intel_wakeref.c index 2401b88b55a4..dea2f63184f8 100644 --- a/drivers/gpu/drm/i915/intel_wakeref.c +++ b/drivers/gpu/drm/i915/intel_wakeref.c @@ -99,7 +99,8 @@ static void __intel_wakeref_put_work(struct work_struct *wrk) void __intel_wakeref_init(struct intel_wakeref *wf, struct drm_i915_private *i915, const struct intel_wakeref_ops *ops, - struct intel_wakeref_lockclass *key) + struct intel_wakeref_lockclass *key, + const char *name) { wf->i915 = i915; wf->ops = ops; @@ -111,6 +112,10 @@ void __intel_wakeref_init(struct intel_wakeref *wf, INIT_DELAYED_WORK(&wf->work, __intel_wakeref_put_work); lockdep_init_map(&wf->work.work.lockdep_map, "wakeref.work", &key->work, 0); + +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_WAKEREF) + ref_tracker_dir_init(&wf->debug, INTEL_REFTRACK_DEAD_COUNT, name); +#endif } int intel_wakeref_wait_for_idle(struct intel_wakeref *wf) diff --git a/drivers/gpu/drm/i915/intel_wakeref.h b/drivers/gpu/drm/i915/intel_wakeref.h index 909cad13ac1f..68aa3be48251 100644 --- a/drivers/gpu/drm/i915/intel_wakeref.h +++ b/drivers/gpu/drm/i915/intel_wakeref.h @@ -50,6 +50,10 @@ struct intel_wakeref { const struct intel_wakeref_ops *ops; struct delayed_work work; + +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_WAKEREF) + struct ref_tracker_dir debug; +#endif }; struct intel_wakeref_lockclass { @@ -60,11 +64,12 @@ struct intel_wakeref_lockclass { void __intel_wakeref_init(struct intel_wakeref *wf, struct drm_i915_private *i915, const struct intel_wakeref_ops *ops, - struct intel_wakeref_lockclass *key); -#define intel_wakeref_init(wf, i915, ops) do { \ + struct intel_wakeref_lockclass *key, + const char *name); +#define intel_wakeref_init(wf, i915, ops, name) do { \ static struct intel_wakeref_lockclass __key; \ \ - __intel_wakeref_init((wf), (i915), (ops), &__key); \ + __intel_wakeref_init((wf), (i915), (ops), &__key, name); \ } while (0) int __intel_wakeref_get_first(struct intel_wakeref *wf); @@ -292,6 +297,33 @@ static inline void intel_ref_tracker_free(struct ref_tracker_dir *dir, void intel_ref_tracker_show(struct ref_tracker_dir *dir, struct drm_printer *p); +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_WAKEREF) + +static inline intel_wakeref_t intel_wakeref_track(struct intel_wakeref *wf) +{ + return intel_ref_tracker_alloc(&wf->debug); +} + +static inline void intel_wakeref_untrack(struct intel_wakeref *wf, + intel_wakeref_t handle) +{ + intel_ref_tracker_free(&wf->debug, handle); +} + +#else + +static inline intel_wakeref_t intel_wakeref_track(struct intel_wakeref *wf) +{ + return -1; +} + +static inline void intel_wakeref_untrack(struct intel_wakeref *wf, + intel_wakeref_t handle) +{ +} + +#endif + struct intel_wakeref_auto { struct drm_i915_private *i915; struct timer_list timer; -- cgit v1.2.3 From 548b61a8ce18dec8757fcc112eac5bd125161408 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 15 Nov 2023 14:44:09 -0800 Subject: drm/msm/gpu: Move gpu devcore's to gpu device The dpu devcore's are already associated with the dpu device. So we should associate the gpu devcore's with the gpu device, for easier classification. Signed-off-by: Rob Clark Reviewed-by: Abhinav Kumar Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/567738/ --- drivers/gpu/drm/msm/msm_gpu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 7f64c6667300..2b7c9db3ded3 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -292,8 +292,7 @@ static void msm_gpu_crashstate_capture(struct msm_gpu *gpu, /* Set the active crash state to be dumped on failure */ gpu->crashstate = state; - /* FIXME: Release the crashstate if this errors out? */ - dev_coredumpm(gpu->dev->dev, THIS_MODULE, gpu, 0, GFP_KERNEL, + dev_coredumpm(&gpu->pdev->dev, THIS_MODULE, gpu, 0, GFP_KERNEL, msm_gpu_devcoredump_read, msm_gpu_devcoredump_free); } #else -- cgit v1.2.3 From 4bea53b9c7c72fd12a0ceebe88a71723c0a514b8 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 17 Nov 2023 07:14:19 -0800 Subject: drm/msm: Reduce fallout of fence signaling vs reclaim hangs Until various PM devfreq/QoS and interconnect patches land, we could potentially trigger reclaim from gpu scheduler thread, and under enough memory pressure that could trigger a sort of deadlock. Eventually the wait will timeout and we'll move on to consider other GEM objects. But given that there is still a potential for deadlock/stalling, we should reduce the timeout to contain the damage. Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/568031/ --- drivers/gpu/drm/msm/msm_gem_shrinker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/msm_gem_shrinker.c b/drivers/gpu/drm/msm/msm_gem_shrinker.c index 5a7d48c02c4b..07ca4ddfe4e3 100644 --- a/drivers/gpu/drm/msm/msm_gem_shrinker.c +++ b/drivers/gpu/drm/msm/msm_gem_shrinker.c @@ -75,7 +75,7 @@ static bool wait_for_idle(struct drm_gem_object *obj) { enum dma_resv_usage usage = dma_resv_usage_rw(true); - return dma_resv_wait_timeout(obj->resv, usage, false, 1000) > 0; + return dma_resv_wait_timeout(obj->resv, usage, false, 10) > 0; } static bool -- cgit v1.2.3 From 12578c075f89d6bd1b8af21751fbc2e1f78d2ce0 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 17 Nov 2023 07:24:28 -0800 Subject: drm/msm/gpu: Skip retired submits in recover worker If we somehow raced with submit retiring, either while waiting for worker to have a chance to run or acquiring the gpu lock, then the recover worker should just bail. Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/568034/ --- drivers/gpu/drm/msm/msm_gpu.c | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 2b7c9db3ded3..095390774f22 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -365,29 +365,31 @@ static void recover_worker(struct kthread_work *work) DRM_DEV_ERROR(dev->dev, "%s: hangcheck recover!\n", gpu->name); submit = find_submit(cur_ring, cur_ring->memptrs->fence + 1); - if (submit) { - /* Increment the fault counts */ - submit->queue->faults++; - if (submit->aspace) - submit->aspace->faults++; - get_comm_cmdline(submit, &comm, &cmd); + /* + * If the submit retired while we were waiting for the worker to run, + * or waiting to acquire the gpu lock, then nothing more to do. + */ + if (!submit) + goto out_unlock; - if (comm && cmd) { - DRM_DEV_ERROR(dev->dev, "%s: offending task: %s (%s)\n", - gpu->name, comm, cmd); + /* Increment the fault counts */ + submit->queue->faults++; + if (submit->aspace) + submit->aspace->faults++; - msm_rd_dump_submit(priv->hangrd, submit, - "offending task: %s (%s)", comm, cmd); - } else { - msm_rd_dump_submit(priv->hangrd, submit, NULL); - } + get_comm_cmdline(submit, &comm, &cmd); + + if (comm && cmd) { + DRM_DEV_ERROR(dev->dev, "%s: offending task: %s (%s)\n", + gpu->name, comm, cmd); + + msm_rd_dump_submit(priv->hangrd, submit, + "offending task: %s (%s)", comm, cmd); } else { - /* - * We couldn't attribute this fault to any particular context, - * so increment the global fault count instead. - */ - gpu->global_faults++; + DRM_DEV_ERROR(dev->dev, "%s: offending task: unknown\n", gpu->name); + + msm_rd_dump_submit(priv->hangrd, submit, NULL); } /* Record the crash state */ @@ -440,6 +442,7 @@ static void recover_worker(struct kthread_work *work) pm_runtime_put(&gpu->pdev->dev); +out_unlock: mutex_unlock(&gpu->lock); msm_gpu_retire(gpu); -- cgit v1.2.3 From f6afe4f09f6605b725b39670b3568ab2927c9b43 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Tue, 10 Oct 2023 08:54:25 +0300 Subject: drm/msm: don't create GPU-related debugfs files with no GPU present If there is no GPU present, skip creation of the GPU-related debugfs files, making the MSM's debugfs more usable. Signed-off-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/561742/ Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_debugfs.c | 41 ++++++++++++++++++++++++--------------- drivers/gpu/drm/msm/msm_rd.c | 3 +++ 2 files changed, 28 insertions(+), 16 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/msm_debugfs.c b/drivers/gpu/drm/msm/msm_debugfs.c index 04d304eed223..4494f6d1c7cb 100644 --- a/drivers/gpu/drm/msm/msm_debugfs.c +++ b/drivers/gpu/drm/msm/msm_debugfs.c @@ -304,36 +304,21 @@ int msm_debugfs_late_init(struct drm_device *dev) return ret; } -void msm_debugfs_init(struct drm_minor *minor) +static void msm_debugfs_gpu_init(struct drm_minor *minor) { struct drm_device *dev = minor->dev; struct msm_drm_private *priv = dev->dev_private; struct dentry *gpu_devfreq; - drm_debugfs_create_files(msm_debugfs_list, - ARRAY_SIZE(msm_debugfs_list), - minor->debugfs_root, minor); - debugfs_create_file("gpu", S_IRUSR, minor->debugfs_root, dev, &msm_gpu_fops); - if (priv->kms) { - drm_debugfs_create_files(msm_kms_debugfs_list, - ARRAY_SIZE(msm_kms_debugfs_list), - minor->debugfs_root, minor); - debugfs_create_file("kms", S_IRUSR, minor->debugfs_root, - dev, &msm_kms_fops); - } - debugfs_create_u32("hangcheck_period_ms", 0600, minor->debugfs_root, &priv->hangcheck_period); debugfs_create_bool("disable_err_irq", 0600, minor->debugfs_root, &priv->disable_err_irq); - debugfs_create_file("shrink", S_IRWXU, minor->debugfs_root, - dev, &shrink_fops); - gpu_devfreq = debugfs_create_dir("devfreq", minor->debugfs_root); debugfs_create_bool("idle_clamp",0600, gpu_devfreq, @@ -344,6 +329,30 @@ void msm_debugfs_init(struct drm_minor *minor) debugfs_create_u32("downdifferential",0600, gpu_devfreq, &priv->gpu_devfreq_config.downdifferential); +} + +void msm_debugfs_init(struct drm_minor *minor) +{ + struct drm_device *dev = minor->dev; + struct msm_drm_private *priv = dev->dev_private; + + drm_debugfs_create_files(msm_debugfs_list, + ARRAY_SIZE(msm_debugfs_list), + minor->debugfs_root, minor); + + if (priv->gpu_pdev) + msm_debugfs_gpu_init(minor); + + if (priv->kms) { + drm_debugfs_create_files(msm_kms_debugfs_list, + ARRAY_SIZE(msm_kms_debugfs_list), + minor->debugfs_root, minor); + debugfs_create_file("kms", S_IRUSR, minor->debugfs_root, + dev, &msm_kms_fops); + } + + debugfs_create_file("shrink", S_IRWXU, minor->debugfs_root, + dev, &shrink_fops); if (priv->kms && priv->kms->funcs->debugfs_init) priv->kms->funcs->debugfs_init(priv->kms, minor); diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c index 5adc51f7ab59..ca44fd291c5b 100644 --- a/drivers/gpu/drm/msm/msm_rd.c +++ b/drivers/gpu/drm/msm/msm_rd.c @@ -270,6 +270,9 @@ int msm_rd_debugfs_init(struct drm_minor *minor) struct msm_rd_state *rd; int ret; + if (!priv->gpu_pdev) + return 0; + /* only create on first minor: */ if (priv->rd) return 0; -- cgit v1.2.3 From d1912f6972b857e63577e8094f5f1242d9fe3c7d Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 6 Nov 2023 10:50:25 -0800 Subject: drm/msm: Small uabi fixes Correct the minor version exposed and error return value for MSM_INFO_GET_NAME. Signed-off-by: Rob Clark Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/566155/ --- drivers/gpu/drm/msm/msm_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 2aae7d107f33..d751d2023f84 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -39,7 +39,7 @@ * - 1.11.0 - Add wait boost (MSM_WAIT_FENCE_BOOST, MSM_PREP_BOOST) */ #define MSM_VERSION_MAJOR 1 -#define MSM_VERSION_MINOR 10 +#define MSM_VERSION_MINOR 11 #define MSM_VERSION_PATCHLEVEL 0 static void msm_deinit_vram(struct drm_device *ddev); @@ -620,7 +620,7 @@ static int msm_ioctl_gem_info(struct drm_device *dev, void *data, break; case MSM_INFO_GET_NAME: if (args->value && (args->len < strlen(msm_obj->name))) { - ret = -EINVAL; + ret = -ETOOSMALL; break; } args->len = strlen(msm_obj->name); -- cgit v1.2.3 From 9902cb999e4e913d98e8afe4b36c08e4a793e1ce Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 6 Nov 2023 10:50:26 -0800 Subject: drm/msm/gem: Add metadata The EXT_external_objects extension is a bit awkward as it doesn't pass explicit modifiers, leaving the importer to guess with incomplete information. In the case of vk (turnip) exporting and gl (freedreno) importing, the "OPTIMAL_TILING_EXT" layout depends on VkImageCreateInfo flags (among other things), which the importer does not know. Which unfortunately leaves us with the need for a metadata back-channel. The contents of the metadata are defined by userspace. The EXT_external_objects extension is only required to work between compatible versions of gl and vk drivers, as defined by device and driver UUIDs. v2: add missing metadata kfree v3: Rework to move copy_from/to_user out from under gem obj lock to avoid angering lockdep about deadlocks against fs-reclaim Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/566157/ --- drivers/gpu/drm/msm/msm_drv.c | 92 ++++++++++++++++++++++++++++++++++++++++++- drivers/gpu/drm/msm/msm_gem.c | 1 + drivers/gpu/drm/msm/msm_gem.h | 4 ++ include/uapi/drm/msm_drm.h | 2 + 4 files changed, 98 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index d751d2023f84..75d60aab5780 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -37,9 +37,10 @@ * - 1.9.0 - Add MSM_SUBMIT_FENCE_SN_IN * - 1.10.0 - Add MSM_SUBMIT_BO_NO_IMPLICIT * - 1.11.0 - Add wait boost (MSM_WAIT_FENCE_BOOST, MSM_PREP_BOOST) + * - 1.12.0 - Add MSM_INFO_SET_METADATA and MSM_INFO_GET_METADATA */ #define MSM_VERSION_MAJOR 1 -#define MSM_VERSION_MINOR 11 +#define MSM_VERSION_MINOR 12 #define MSM_VERSION_PATCHLEVEL 0 static void msm_deinit_vram(struct drm_device *ddev); @@ -546,6 +547,85 @@ static int msm_ioctl_gem_info_set_iova(struct drm_device *dev, return msm_gem_set_iova(obj, ctx->aspace, iova); } +static int msm_ioctl_gem_info_set_metadata(struct drm_gem_object *obj, + __user void *metadata, + u32 metadata_size) +{ + struct msm_gem_object *msm_obj = to_msm_bo(obj); + void *buf; + int ret; + + /* Impose a moderate upper bound on metadata size: */ + if (metadata_size > 128) { + return -EOVERFLOW; + } + + /* Use a temporary buf to keep copy_from_user() outside of gem obj lock: */ + buf = memdup_user(metadata, metadata_size); + if (IS_ERR(buf)) + return PTR_ERR(buf); + + ret = msm_gem_lock_interruptible(obj); + if (ret) + goto out; + + msm_obj->metadata = + krealloc(msm_obj->metadata, metadata_size, GFP_KERNEL); + msm_obj->metadata_size = metadata_size; + memcpy(msm_obj->metadata, buf, metadata_size); + + msm_gem_unlock(obj); + +out: + kfree(buf); + + return ret; +} + +static int msm_ioctl_gem_info_get_metadata(struct drm_gem_object *obj, + __user void *metadata, + u32 *metadata_size) +{ + struct msm_gem_object *msm_obj = to_msm_bo(obj); + void *buf; + int ret, len; + + if (!metadata) { + /* + * Querying the size is inherently racey, but + * EXT_external_objects expects the app to confirm + * via device and driver UUIDs that the exporter and + * importer versions match. All we can do from the + * kernel side is check the length under obj lock + * when userspace tries to retrieve the metadata + */ + *metadata_size = msm_obj->metadata_size; + return 0; + } + + ret = msm_gem_lock_interruptible(obj); + if (ret) + return ret; + + /* Avoid copy_to_user() under gem obj lock: */ + len = msm_obj->metadata_size; + buf = kmemdup(msm_obj->metadata, len, GFP_KERNEL); + + msm_gem_unlock(obj); + + if (*metadata_size < len) { + ret = -ETOOSMALL; + } else if (copy_to_user(metadata, buf, len)) { + ret = -EFAULT; + } else { + *metadata_size = len; + } + + kfree(buf); + + return 0; +} + static int msm_ioctl_gem_info(struct drm_device *dev, void *data, struct drm_file *file) { @@ -568,6 +648,8 @@ static int msm_ioctl_gem_info(struct drm_device *dev, void *data, break; case MSM_INFO_SET_NAME: case MSM_INFO_GET_NAME: + case MSM_INFO_SET_METADATA: + case MSM_INFO_GET_METADATA: break; default: return -EINVAL; @@ -630,6 +712,14 @@ static int msm_ioctl_gem_info(struct drm_device *dev, void *data, ret = -EFAULT; } break; + case MSM_INFO_SET_METADATA: + ret = msm_ioctl_gem_info_set_metadata( + obj, u64_to_user_ptr(args->value), args->len); + break; + case MSM_INFO_GET_METADATA: + ret = msm_ioctl_gem_info_get_metadata( + obj, u64_to_user_ptr(args->value), &args->len); + break; } drm_gem_object_put(obj); diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index db1e748daa75..32303cc8e646 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -1058,6 +1058,7 @@ static void msm_gem_free_object(struct drm_gem_object *obj) drm_gem_object_release(obj); + kfree(msm_obj->metadata); kfree(msm_obj); } diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index 8ddef5443140..6352bd3cc5ed 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -108,6 +108,10 @@ struct msm_gem_object { char name[32]; /* Identifier to print for the debugfs files */ + /* userspace metadata backchannel */ + void *metadata; + u32 metadata_size; + /** * pin_count: Number of times the pages are pinned * diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index 6c34272a13fd..6f2a7ad04aa4 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -139,6 +139,8 @@ struct drm_msm_gem_new { #define MSM_INFO_GET_NAME 0x03 /* get debug name, returned by pointer */ #define MSM_INFO_SET_IOVA 0x04 /* set the iova, passed by value */ #define MSM_INFO_GET_FLAGS 0x05 /* get the MSM_BO_x flags */ +#define MSM_INFO_SET_METADATA 0x06 /* set userspace metadata */ +#define MSM_INFO_GET_METADATA 0x07 /* get userspace metadata */ struct drm_msm_gem_info { __u32 handle; /* in */ -- cgit v1.2.3 From b2acb89af1a400be721bcb14f137aa22b509caba Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 24 Oct 2023 10:08:04 -0700 Subject: drm/msm/gem: Demote userspace errors to DRM_UT_DRIVER Error messages resulting from incorrect usage of the kernel uabi should not spam dmesg by default. But it is useful to enable them to debug userspace. So demote to DRM_UT_DRIVER. Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/564189/ --- drivers/gpu/drm/msm/msm_gem.c | 6 +++--- drivers/gpu/drm/msm/msm_gem_submit.c | 36 +++++++++++++++++++++--------------- 2 files changed, 24 insertions(+), 18 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 32303cc8e646..175ee4ab8a6f 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -226,9 +226,9 @@ static struct page **msm_gem_pin_pages_locked(struct drm_gem_object *obj, msm_gem_assert_locked(obj); - if (GEM_WARN_ON(msm_obj->madv > madv)) { - DRM_DEV_ERROR(obj->dev->dev, "Invalid madv state: %u vs %u\n", - msm_obj->madv, madv); + if (msm_obj->madv > madv) { + DRM_DEV_DEBUG_DRIVER(obj->dev->dev, "Invalid madv state: %u vs %u\n", + msm_obj->madv, madv); return ERR_PTR(-EBUSY); } diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 99744de6c05a..207b6ba1565d 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -17,6 +17,12 @@ #include "msm_gem.h" #include "msm_gpu_trace.h" +/* For userspace errors, use DRM_UT_DRIVER.. so that userspace can enable + * error msgs for debugging, but we don't spam dmesg by default + */ +#define SUBMIT_ERROR(submit, fmt, ...) \ + DRM_DEV_DEBUG_DRIVER((submit)->dev->dev, fmt, ##__VA_ARGS__) + /* * Cmdstream submission: */ @@ -136,7 +142,7 @@ static int submit_lookup_objects(struct msm_gem_submit *submit, if ((submit_bo.flags & ~MSM_SUBMIT_BO_FLAGS) || !(submit_bo.flags & MANDATORY_FLAGS)) { - DRM_ERROR("invalid flags: %x\n", submit_bo.flags); + SUBMIT_ERROR(submit, "invalid flags: %x\n", submit_bo.flags); ret = -EINVAL; i = 0; goto out; @@ -158,7 +164,7 @@ static int submit_lookup_objects(struct msm_gem_submit *submit, */ obj = idr_find(&file->object_idr, submit->bos[i].handle); if (!obj) { - DRM_ERROR("invalid handle %u at index %u\n", submit->bos[i].handle, i); + SUBMIT_ERROR(submit, "invalid handle %u at index %u\n", submit->bos[i].handle, i); ret = -EINVAL; goto out_unlock; } @@ -202,13 +208,13 @@ static int submit_lookup_cmds(struct msm_gem_submit *submit, case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: break; default: - DRM_ERROR("invalid type: %08x\n", submit_cmd.type); + SUBMIT_ERROR(submit, "invalid type: %08x\n", submit_cmd.type); return -EINVAL; } if (submit_cmd.size % 4) { - DRM_ERROR("non-aligned cmdstream buffer size: %u\n", - submit_cmd.size); + SUBMIT_ERROR(submit, "non-aligned cmdstream buffer size: %u\n", + submit_cmd.size); ret = -EINVAL; goto out; } @@ -306,8 +312,8 @@ retry: fail: if (ret == -EALREADY) { - DRM_ERROR("handle %u at index %u already on submit list\n", - submit->bos[i].handle, i); + SUBMIT_ERROR(submit, "handle %u at index %u already on submit list\n", + submit->bos[i].handle, i); ret = -EINVAL; } @@ -448,8 +454,8 @@ static int submit_bo(struct msm_gem_submit *submit, uint32_t idx, struct drm_gem_object **obj, uint64_t *iova, bool *valid) { if (idx >= submit->nr_bos) { - DRM_ERROR("invalid buffer index: %u (out of %u)\n", - idx, submit->nr_bos); + SUBMIT_ERROR(submit, "invalid buffer index: %u (out of %u)\n", + idx, submit->nr_bos); return -EINVAL; } @@ -475,7 +481,7 @@ static int submit_reloc(struct msm_gem_submit *submit, struct drm_gem_object *ob return 0; if (offset % 4) { - DRM_ERROR("non-aligned cmdstream buffer: %u\n", offset); + SUBMIT_ERROR(submit, "non-aligned cmdstream buffer: %u\n", offset); return -EINVAL; } @@ -497,8 +503,8 @@ static int submit_reloc(struct msm_gem_submit *submit, struct drm_gem_object *ob bool valid; if (submit_reloc.submit_offset % 4) { - DRM_ERROR("non-aligned reloc offset: %u\n", - submit_reloc.submit_offset); + SUBMIT_ERROR(submit, "non-aligned reloc offset: %u\n", + submit_reloc.submit_offset); ret = -EINVAL; goto out; } @@ -508,7 +514,7 @@ static int submit_reloc(struct msm_gem_submit *submit, struct drm_gem_object *ob if ((off >= (obj->size / 4)) || (off < last_offset)) { - DRM_ERROR("invalid offset %u at reloc %u\n", off, i); + SUBMIT_ERROR(submit, "invalid offset %u at reloc %u\n", off, i); ret = -EINVAL; goto out; } @@ -881,7 +887,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, if (!submit->cmd[i].size || ((submit->cmd[i].size + submit->cmd[i].offset) > obj->size / 4)) { - DRM_ERROR("invalid cmdstream size: %u\n", submit->cmd[i].size * 4); + SUBMIT_ERROR(submit, "invalid cmdstream size: %u\n", submit->cmd[i].size * 4); ret = -EINVAL; goto out; } @@ -893,7 +899,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, if (!gpu->allow_relocs) { if (submit->cmd[i].nr_relocs) { - DRM_ERROR("relocs not allowed\n"); + SUBMIT_ERROR(submit, "relocs not allowed\n"); ret = -EINVAL; goto out; } -- cgit v1.2.3 From 6c15808d9b7640c3209d53cd2d8d56cfbf9f7175 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 24 Oct 2023 10:08:05 -0700 Subject: drm/msm/gem: Demote allocations to __GFP_NOWARN For allocations with userspace controlled size, we should not warn on allocation failure. Fixes KASAN splat: WARNING: CPU: 6 PID: 29557 at mm/page_alloc.c:5398 __alloc_pages+0x160c/0x2204 Modules linked in: bridge stp llc hci_vhci tun veth xt_cgroup uinput xt_MASQUERADE rfcomm ip6table_nat fuse 8021q r8153_ecm cdc_ether usbnet r8152 mii venus_enc venus_dec uvcvideo algif_hash algif_skcipher af_alg qcom_spmi_adc_tm5 qcom_spmi_adc5 qcom_vadc_common qcom_spmi_temp_alarm cros_ec_typec typec hci_uart btqca qcom_stats snd_soc_sc7180 venus_core ath10k_snoc ath10k_core ath coresight_tmc coresight_replicator coresight_etm4x coresight_funnel snd_soc_lpass_sc7180 mac80211 coresight bluetooth ecdh_generic ecc cfg80211 cros_ec_sensorhub lzo_rle lzo_compress zram joydev CPU: 6 PID: 29557 Comm: syz-executor Not tainted 5.15.110-lockdep-19320-g89d010b0a9df #1 45bdd400697a78353f2927c116615abba810e5dd Hardware name: Google Kingoftown (DT) pstate: 20400009 (nzCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : __alloc_pages+0x160c/0x2204 lr : __alloc_pages+0x58/0x2204 sp : ffffffc0214176c0 x29: ffffffc0214178a0 x28: ffffff801f7b4000 x27: 0000000000000000 x26: ffffff808a4fa000 x25: 1ffffff011290781 x24: ffffff808a59c000 x23: 0000000000000010 x22: ffffffc0080e6980 x21: 0000000000000010 x20: 0000000000000000 x19: 00000000080001f8 x18: 0000000000000000 x17: 0000000000000000 x16: 0000000000000000 x15: 0000000020000500 x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000001 x11: 0000000000000000 x10: 1ffffff804282f06 x9 : 0000000000000000 x8 : ffffffc021417848 x7 : 0000000000000000 x6 : ffffffc0082ac788 x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000010 x2 : 0000000000000008 x1 : 0000000000000000 x0 : ffffffc021417830 Call trace: __alloc_pages+0x160c/0x2204 kmalloc_order+0x50/0xf4 kmalloc_order_trace+0x38/0x18c __kmalloc+0x300/0x45c msm_ioctl_gem_submit+0x284/0x5988 drm_ioctl_kernel+0x270/0x418 drm_ioctl+0x5e0/0xbf8 __arm64_sys_ioctl+0x154/0x1d0 invoke_syscall+0x98/0x278 el0_svc_common+0x214/0x274 do_el0_svc+0x9c/0x19c el0_svc+0x5c/0xc0 el0t_64_sync_handler+0x78/0x108 el0t_64_sync+0x1a4/0x1a8 Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/564191/ --- drivers/gpu/drm/msm/msm_gem_submit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 207b6ba1565d..30d72191cee6 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -43,7 +43,7 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev, if (sz > SIZE_MAX) return ERR_PTR(-ENOMEM); - submit = kzalloc(sz, GFP_KERNEL); + submit = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN); if (!submit) return ERR_PTR(-ENOMEM); @@ -234,7 +234,7 @@ static int submit_lookup_cmds(struct msm_gem_submit *submit, ret = -ENOMEM; goto out; } - submit->cmd[i].relocs = kmalloc(sz, GFP_KERNEL); + submit->cmd[i].relocs = kmalloc(sz, GFP_KERNEL | __GFP_NOWARN); if (!submit->cmd[i].relocs) { ret = -ENOMEM; goto out; -- cgit v1.2.3 From 85884871921000b9bca2184077b1159771e50047 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 22 Nov 2023 13:48:22 +0100 Subject: i915: make inject_virtual_interrupt() void The single caller of inject_virtual_interrupt() ignores the return value anyway. This allows us to simplify eventfd_signal() in follow-up patches. Link: https://lore.kernel.org/r/20231122-vfs-eventfd-signal-v2-1-bd549b14ce0c@kernel.org Reviewed-by: Jan Kara Reviewed-by: Zhenyu Wang Reviewed-by: Jens Axboe Signed-off-by: Christian Brauner --- drivers/gpu/drm/i915/gvt/interrupt.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gvt/interrupt.c b/drivers/gpu/drm/i915/gvt/interrupt.c index de3f5903d1a7..b32ba5f2b240 100644 --- a/drivers/gpu/drm/i915/gvt/interrupt.c +++ b/drivers/gpu/drm/i915/gvt/interrupt.c @@ -422,7 +422,7 @@ static void init_irq_map(struct intel_gvt_irq *irq) #define MSI_CAP_DATA(offset) (offset + 8) #define MSI_CAP_EN 0x1 -static int inject_virtual_interrupt(struct intel_vgpu *vgpu) +static void inject_virtual_interrupt(struct intel_vgpu *vgpu) { unsigned long offset = vgpu->gvt->device_info.msi_cap_offset; u16 control, data; @@ -434,10 +434,10 @@ static int inject_virtual_interrupt(struct intel_vgpu *vgpu) /* Do not generate MSI if MSIEN is disabled */ if (!(control & MSI_CAP_EN)) - return 0; + return; if (WARN(control & GENMASK(15, 1), "only support one MSI format\n")) - return -EINVAL; + return; trace_inject_msi(vgpu->id, addr, data); @@ -451,10 +451,9 @@ static int inject_virtual_interrupt(struct intel_vgpu *vgpu) * returned and don't inject interrupt into guest. */ if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status)) - return -ESRCH; - if (vgpu->msi_trigger && eventfd_signal(vgpu->msi_trigger, 1) != 1) - return -EFAULT; - return 0; + return; + if (vgpu->msi_trigger) + eventfd_signal(vgpu->msi_trigger, 1); } static void propagate_event(struct intel_gvt_irq *irq, -- cgit v1.2.3 From 9aa6a662c309e6f8770972840948af963bd6ff34 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Wed, 25 Oct 2023 12:49:40 +0200 Subject: drm/mediatek: mtk_disp_gamma: Fix breakage due to merge issue While the commit that was sent to the mailing lists was fine, something happened during merge and the mtk_gamma_set() function got broken as a writel() was turned into a readl(). Fix that by changing that back to the expected writel(). Fixes: a6b39cd248f3 ("drm/mediatek: De-commonize disp_aal/disp_gamma gamma_set functions") Signed-off-by: AngeloGioacchino Del Regno Link: https://patchwork.kernel.org/project/dri-devel/patch/20231025104940.140605-1-angelogioacchino.delregno@collabora.com/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_disp_gamma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/mediatek/mtk_disp_gamma.c b/drivers/gpu/drm/mediatek/mtk_disp_gamma.c index f81dc34c9c3e..c1bc8b00d938 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_gamma.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_gamma.c @@ -203,7 +203,7 @@ void mtk_gamma_set(struct device *dev, struct drm_crtc_state *state) /* Disable RELAY mode to pass the processed image */ cfg_val &= ~GAMMA_RELAY_MODE; - cfg_val = readl(gamma->regs + DISP_GAMMA_CFG); + writel(cfg_val, gamma->regs + DISP_GAMMA_CFG); } void mtk_gamma_config(struct device *dev, unsigned int w, -- cgit v1.2.3 From 4662817aed5a9d6c695658d0105d8ff4b84ac6cb Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Tue, 5 Sep 2023 10:49:21 +0200 Subject: drm/mediatek: fix kernel oops if no crtc is found MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drm_crtc_from_index(0) might return NULL if there are no CRTCs registered at all which will lead to a kernel oops in mtk_drm_crtc_dma_dev_get(). Add the missing return value check. Fixes: 0d9eee9118b7 ("drm/mediatek: Add drm ovl_adaptor sub driver for MT8195") Signed-off-by: Michael Walle Reviewed-by: Nícolas F. R. A. Prado Tested-by: Nícolas F. R. A. Prado Reviewed-by: AngeloGioacchino Del Regno Tested-by: Eugen Hristev Reviewed-by: Eugen Hristev Link: https://patchwork.kernel.org/project/dri-devel/patch/20230905084922.3908121-1-mwalle@kernel.org/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_drm_drv.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index 2dfaa613276a..2b0c35cacbc6 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -443,6 +443,7 @@ static int mtk_drm_kms_init(struct drm_device *drm) struct mtk_drm_private *private = drm->dev_private; struct mtk_drm_private *priv_n; struct device *dma_dev = NULL; + struct drm_crtc *crtc; int ret, i, j; if (drm_firmware_drivers_only()) @@ -519,7 +520,9 @@ static int mtk_drm_kms_init(struct drm_device *drm) } /* Use OVL device for all DMA memory allocations */ - dma_dev = mtk_drm_crtc_dma_dev_get(drm_crtc_from_index(drm, 0)); + crtc = drm_crtc_from_index(drm, 0); + if (crtc) + dma_dev = mtk_drm_crtc_dma_dev_get(crtc); if (!dma_dev) { ret = -ENODEV; dev_err(drm->dev, "Need at least one OVL device\n"); -- cgit v1.2.3 From 58046e6cf811464b8a6f269dc6a40a8cb91a8a68 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 22 Nov 2023 11:31:36 +0200 Subject: drm/i915: Stop printing pipe name as hex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Print the pipe name in ascii rather than hex. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231122093137.1509-3-ville.syrjala@linux.intel.com Reviewed-by: Jouni Högander --- drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index 7958d0bd851e..ef57dad1a9cb 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -4537,7 +4537,7 @@ void intel_shared_dpll_state_verify(struct intel_atomic_state *state, "pll active mismatch (didn't expect pipe %c in active mask (0x%x))\n", pipe_name(crtc->pipe), pll->active_mask); I915_STATE_WARN(i915, pll->state.pipe_mask & pipe_mask, - "pll enabled crtcs mismatch (found %x in enabled mask (0x%x))\n", + "pll enabled crtcs mismatch (found pipe %c in enabled mask (0x%x))\n", pipe_name(crtc->pipe), pll->state.pipe_mask); } } -- cgit v1.2.3 From b90fccfb5cde406365c33aa21ee87da83bbfca02 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 22 Nov 2023 11:31:37 +0200 Subject: drm/i915: Move the SDP split debug spew to the correct place MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adding ad-hoc debug prints all over the place is not good. Move the SDP split debug spew into the proper place (state dumper). Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231122093137.1509-4-ville.syrjala@linux.intel.com Reviewed-by: Jouni Högander --- drivers/gpu/drm/i915/display/intel_crtc_state_dump.c | 3 +++ drivers/gpu/drm/i915/display/intel_dp.c | 7 ------- 2 files changed, 3 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c b/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c index 66fe880af8f3..30336453041d 100644 --- a/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c +++ b/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c @@ -261,6 +261,9 @@ void intel_crtc_state_dump(const struct intel_crtc_state *pipe_config, drm_dbg_kms(&i915->drm, "fec: %s, enhanced framing: %s\n", str_enabled_disabled(pipe_config->fec_enable), str_enabled_disabled(pipe_config->enhanced_framing)); + + drm_dbg_kms(&i915->drm, "sdp split: %s\n", + str_enabled_disabled(pipe_config->sdp_split_enable)); } drm_dbg_kms(&i915->drm, "framestart delay: %d, MSA timing delay: %d\n", diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 6d5a794b6b71..f9b5fc2fdbcb 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -2844,19 +2844,12 @@ intel_dp_audio_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) { - struct drm_i915_private *i915 = to_i915(encoder->base.dev); - struct drm_connector *connector = conn_state->connector; - pipe_config->has_audio = intel_dp_has_audio(encoder, pipe_config, conn_state) && intel_audio_compute_config(encoder, pipe_config, conn_state); pipe_config->sdp_split_enable = pipe_config->has_audio && intel_dp_is_uhbr(pipe_config); - - drm_dbg_kms(&i915->drm, "[CONNECTOR:%d:%s] SDP split enable: %s\n", - connector->base.id, connector->name, - str_yes_no(pipe_config->sdp_split_enable)); } int -- cgit v1.2.3 From 39d5b6a64ace77d0c11c398d272218df5f939abb Mon Sep 17 00:00:00 2001 From: Liu Ying Date: Thu, 23 Nov 2023 11:26:15 +0800 Subject: drm/bridge: panel: Check device dependency before managing device link Some panel devices already depend on DRM device, like the panel in arch/arm/boot/dts/st/ste-ux500-samsung-skomer.dts, because DRM device is the ancestor of those panel devices. device_link_add() would fail by returning a NULL pointer for those panel devices because of the existing dependency. So, check the dependency by calling device_is_dependent() before adding or deleting device link between panel device and DRM device so that the link is managed only for independent panel devices. Fixes: 887878014534 ("drm/bridge: panel: Fix device link for DRM_BRIDGE_ATTACH_NO_CONNECTOR") Fixes: 199cf07ebd2b ("drm/bridge: panel: Add a device link between drm device and panel device") Reported-by: Linus Walleij Closes: https://lore.kernel.org/lkml/CACRpkdaGzXD6HbiX7mVUNJAJtMEPG00Pp6+nJ1P0JrfJ-ArMvQ@mail.gmail.com/T/ Tested-by: Linus Walleij Signed-off-by: Liu Ying Signed-off-by: Linus Walleij Link: https://patchwork.freedesktop.org/patch/msgid/20231123032615.3760488-1-victor.liu@nxp.com --- drivers/gpu/drm/bridge/panel.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/bridge/panel.c b/drivers/gpu/drm/bridge/panel.c index e48823a4f1ed..5e8980023407 100644 --- a/drivers/gpu/drm/bridge/panel.c +++ b/drivers/gpu/drm/bridge/panel.c @@ -23,6 +23,7 @@ struct panel_bridge { struct drm_panel *panel; struct device_link *link; u32 connector_type; + bool is_independent; }; static inline struct panel_bridge * @@ -67,12 +68,17 @@ static int panel_bridge_attach(struct drm_bridge *bridge, struct drm_device *drm_dev = bridge->dev; int ret; - panel_bridge->link = device_link_add(drm_dev->dev, panel->dev, - DL_FLAG_STATELESS); - if (!panel_bridge->link) { - DRM_ERROR("Failed to add device link between %s and %s\n", - dev_name(drm_dev->dev), dev_name(panel->dev)); - return -EINVAL; + panel_bridge->is_independent = !device_is_dependent(drm_dev->dev, + panel->dev); + + if (panel_bridge->is_independent) { + panel_bridge->link = device_link_add(drm_dev->dev, panel->dev, + DL_FLAG_STATELESS); + if (!panel_bridge->link) { + DRM_ERROR("Failed to add device link between %s and %s\n", + dev_name(drm_dev->dev), dev_name(panel->dev)); + return -EINVAL; + } } if (flags & DRM_BRIDGE_ATTACH_NO_CONNECTOR) @@ -80,7 +86,8 @@ static int panel_bridge_attach(struct drm_bridge *bridge, if (!bridge->encoder) { DRM_ERROR("Missing encoder\n"); - device_link_del(panel_bridge->link); + if (panel_bridge->is_independent) + device_link_del(panel_bridge->link); return -ENODEV; } @@ -92,7 +99,8 @@ static int panel_bridge_attach(struct drm_bridge *bridge, panel_bridge->connector_type); if (ret) { DRM_ERROR("Failed to initialize connector\n"); - device_link_del(panel_bridge->link); + if (panel_bridge->is_independent) + device_link_del(panel_bridge->link); return ret; } @@ -115,7 +123,8 @@ static void panel_bridge_detach(struct drm_bridge *bridge) struct panel_bridge *panel_bridge = drm_bridge_to_panel_bridge(bridge); struct drm_connector *connector = &panel_bridge->connector; - device_link_del(panel_bridge->link); + if (panel_bridge->is_independent) + device_link_del(panel_bridge->link); /* * Cleanup the connector if we know it was initialized. -- cgit v1.2.3 From cf1aaa7d4a719f0bdd9c246c0fac8247cb54ddd7 Mon Sep 17 00:00:00 2001 From: Danylo Piliaiev Date: Sat, 25 Nov 2023 11:11:50 -0800 Subject: drm/msm/a6xx: Add missing BIT(7) to REG_A6XX_UCHE_CLIENT_PF Downstream always set BIT(7) Signed-off-by: Danylo Piliaiev Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/568930/ --- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 7a0220d29a23..e5558d5e0aa2 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -1782,7 +1782,7 @@ static int hw_init(struct msm_gpu *gpu) else gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x1fffff); - gpu_write(gpu, REG_A6XX_UCHE_CLIENT_PF, 1); + gpu_write(gpu, REG_A6XX_UCHE_CLIENT_PF, BIT(7) | 0x1); /* Set weights for bicubic filtering */ if (adreno_is_a650_family(adreno_gpu)) { -- cgit v1.2.3 From 07e6de738aa6f0e873463e9ca88bdb7081c4bfd4 Mon Sep 17 00:00:00 2001 From: Danylo Piliaiev Date: Sat, 25 Nov 2023 11:11:51 -0800 Subject: drm/msm/a690: Fix reg values for a690 KGSL doesn't support a690 so all reg values were the same as on a660. Now we know the values and they are different from the windows driver. This fixes hangs on D3D12 games and some CTS tests. Signed-off-by: Danylo Piliaiev Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/568931/ --- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index e5558d5e0aa2..723f9b97734d 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -1312,6 +1312,7 @@ static void a6xx_set_ubwc_config(struct msm_gpu *gpu) if (adreno_is_a650(adreno_gpu) || adreno_is_a660(adreno_gpu) || + adreno_is_a690(adreno_gpu) || adreno_is_a730(adreno_gpu) || adreno_is_a740_family(adreno_gpu)) { /* TODO: get ddr type from bootloader and use 2 for LPDDR4 */ @@ -1321,13 +1322,6 @@ static void a6xx_set_ubwc_config(struct msm_gpu *gpu) uavflagprd_inv = 2; } - if (adreno_is_a690(adreno_gpu)) { - hbb_lo = 2; - amsbc = 1; - rgb565_predicator = 1; - uavflagprd_inv = 2; - } - if (adreno_is_7c3(adreno_gpu)) { hbb_lo = 1; amsbc = 1; @@ -1741,7 +1735,9 @@ static int hw_init(struct msm_gpu *gpu) /* Setting the primFifo thresholds default values, * and vccCacheSkipDis=1 bit (0x200) for A640 and newer */ - if (adreno_is_a650(adreno_gpu) || adreno_is_a660(adreno_gpu) || adreno_is_a690(adreno_gpu)) + if (adreno_is_a690(adreno_gpu)) + gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00800200); + else if (adreno_is_a650(adreno_gpu) || adreno_is_a660(adreno_gpu)) gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00300200); else if (adreno_is_a640_family(adreno_gpu) || adreno_is_7c3(adreno_gpu)) gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00200200); @@ -1775,6 +1771,8 @@ static int hw_init(struct msm_gpu *gpu) if (adreno_is_a730(adreno_gpu) || adreno_is_a740_family(adreno_gpu)) gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0xcfffff); + else if (adreno_is_a690(adreno_gpu)) + gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x4fffff); else if (adreno_is_a619(adreno_gpu)) gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x3fffff); else if (adreno_is_a610(adreno_gpu)) @@ -1808,12 +1806,17 @@ static int hw_init(struct msm_gpu *gpu) a6xx_set_cp_protect(gpu); if (adreno_is_a660_family(adreno_gpu)) { - gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, 0x1); + if (adreno_is_a690(adreno_gpu)) + gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, 0x00028801); + else + gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, 0x1); gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x0); } + if (adreno_is_a690(adreno_gpu)) + gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x90); /* Set dualQ + disable afull for A660 GPU */ - if (adreno_is_a660(adreno_gpu)) + else if (adreno_is_a660(adreno_gpu)) gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x66906); else if (adreno_is_a7xx(adreno_gpu)) gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, -- cgit v1.2.3 From fe4c5f662097978b6c91c23a13c24ed92339a180 Mon Sep 17 00:00:00 2001 From: "Jason-JH.Lin" Date: Wed, 20 Sep 2023 17:06:58 +0800 Subject: drm/mediatek: Add spinlock for setting vblank event in atomic_begin Add spinlock protection to avoid race condition on vblank event between mtk_drm_crtc_atomic_begin() and mtk_drm_finish_page_flip(). Fixes: 119f5173628a ("drm/mediatek: Add DRM Driver for Mediatek SoC MT8173.") Signed-off-by: Jason-JH.Lin Suggested-by: AngeloGioacchino Del Regno Reviewed-by: Alexandre Mergnat Reviewed-by: Fei Shao Tested-by: Fei Shao Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: CK Hu Link: https://patchwork.kernel.org/project/dri-devel/patch/20230920090658.31181-1-jason-jh.lin@mediatek.com/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_drm_crtc.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c index c277b9fae950..0a7d60c191b8 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c @@ -788,6 +788,7 @@ static void mtk_drm_crtc_atomic_begin(struct drm_crtc *crtc, crtc); struct mtk_crtc_state *mtk_crtc_state = to_mtk_crtc_state(crtc_state); struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc); + unsigned long flags; if (mtk_crtc->event && mtk_crtc_state->base.event) DRM_ERROR("new event while there is still a pending event\n"); @@ -795,7 +796,11 @@ static void mtk_drm_crtc_atomic_begin(struct drm_crtc *crtc, if (mtk_crtc_state->base.event) { mtk_crtc_state->base.event->pipe = drm_crtc_index(crtc); WARN_ON(drm_crtc_vblank_get(crtc) != 0); + + spin_lock_irqsave(&crtc->dev->event_lock, flags); mtk_crtc->event = mtk_crtc_state->base.event; + spin_unlock_irqrestore(&crtc->dev->event_lock, flags); + mtk_crtc_state->base.event = NULL; } } -- cgit v1.2.3 From b1f5279b5981f9ed851163ee661692f42397982f Mon Sep 17 00:00:00 2001 From: Jouni Högander Date: Mon, 20 Nov 2023 10:26:05 +0200 Subject: drm/i915/psr: Move plane sel fetch configuration into plane source files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently selective fetch configuration for planes is implemented in psr code. More suitable place for this code is where everything else is configured for planes -> move it into skl_universal_plane.c and intel_cursor.c. This also allows us to drop hooks for cursor handling. v3: Checkpatch warnings fixed v2: Removed setting sel_fetch_area->y1/y2 as -1 Signed-off-by: Jouni Högander Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231120082606.3156488-2-jouni.hogander@intel.com --- drivers/gpu/drm/i915/display/intel_cursor.c | 33 +++++++++- drivers/gpu/drm/i915/display/intel_psr.c | 75 ---------------------- drivers/gpu/drm/i915/display/intel_psr.h | 10 --- drivers/gpu/drm/i915/display/skl_universal_plane.c | 75 +++++++++++++++++++++- 4 files changed, 102 insertions(+), 91 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_cursor.c b/drivers/gpu/drm/i915/display/intel_cursor.c index b342fad180ca..c025cafda7d9 100644 --- a/drivers/gpu/drm/i915/display/intel_cursor.c +++ b/drivers/gpu/drm/i915/display/intel_cursor.c @@ -21,6 +21,7 @@ #include "intel_fb_pin.h" #include "intel_frontbuffer.h" #include "intel_psr.h" +#include "intel_psr_regs.h" #include "skl_watermark.h" /* Cursor formats */ @@ -484,6 +485,32 @@ static int i9xx_check_cursor(struct intel_crtc_state *crtc_state, return 0; } +static void i9xx_cursor_update_sel_fetch_arm(struct intel_plane *plane, + const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) +{ + struct drm_i915_private *i915 = to_i915(plane->base.dev); + enum pipe pipe = plane->pipe; + + if (!crtc_state->enable_psr2_sel_fetch) + return; + + intel_de_write_fw(i915, PLANE_SEL_FETCH_CTL(pipe, plane->id), + plane_state->ctl); +} + +static void i9xx_cursor_disable_sel_fetch_arm(struct intel_plane *plane, + const struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *i915 = to_i915(plane->base.dev); + enum pipe pipe = plane->pipe; + + if (!crtc_state->enable_psr2_sel_fetch) + return; + + intel_de_write_fw(i915, PLANE_SEL_FETCH_CTL(pipe, plane->id), 0); +} + /* TODO: split into noarm+arm pair */ static void i9xx_cursor_update_arm(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, @@ -531,10 +558,10 @@ static void i9xx_cursor_update_arm(struct intel_plane *plane, skl_write_cursor_wm(plane, crtc_state); if (plane_state) - intel_psr2_program_plane_sel_fetch_arm(plane, crtc_state, - plane_state); + i9xx_cursor_update_sel_fetch_arm(plane, crtc_state, + plane_state); else - intel_psr2_disable_plane_sel_fetch_arm(plane, crtc_state); + i9xx_cursor_disable_sel_fetch_arm(plane, crtc_state); if (plane->cursor.base != base || plane->cursor.size != fbc_ctl || diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 15c1804dcd59..e7d65282fb88 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -1917,81 +1917,6 @@ static void psr_force_hw_tracking_exit(struct intel_dp *intel_dp) intel_de_write(dev_priv, CURSURFLIVE(intel_dp->psr.pipe), 0); } -void intel_psr2_disable_plane_sel_fetch_arm(struct intel_plane *plane, - const struct intel_crtc_state *crtc_state) -{ - struct drm_i915_private *dev_priv = to_i915(plane->base.dev); - enum pipe pipe = plane->pipe; - - if (!crtc_state->enable_psr2_sel_fetch) - return; - - intel_de_write_fw(dev_priv, PLANE_SEL_FETCH_CTL(pipe, plane->id), 0); -} - -void intel_psr2_program_plane_sel_fetch_arm(struct intel_plane *plane, - const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) -{ - struct drm_i915_private *i915 = to_i915(plane->base.dev); - enum pipe pipe = plane->pipe; - - if (!crtc_state->enable_psr2_sel_fetch) - return; - - if (plane->id == PLANE_CURSOR) - intel_de_write_fw(i915, PLANE_SEL_FETCH_CTL(pipe, plane->id), - plane_state->ctl); - else - intel_de_write_fw(i915, PLANE_SEL_FETCH_CTL(pipe, plane->id), - PLANE_SEL_FETCH_CTL_ENABLE); -} - -void intel_psr2_program_plane_sel_fetch_noarm(struct intel_plane *plane, - const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state, - int color_plane) -{ - struct drm_i915_private *dev_priv = to_i915(plane->base.dev); - enum pipe pipe = plane->pipe; - const struct drm_rect *clip; - u32 val; - int x, y; - - if (!crtc_state->enable_psr2_sel_fetch) - return; - - if (plane->id == PLANE_CURSOR) - return; - - clip = &plane_state->psr2_sel_fetch_area; - - val = (clip->y1 + plane_state->uapi.dst.y1) << 16; - val |= plane_state->uapi.dst.x1; - intel_de_write_fw(dev_priv, PLANE_SEL_FETCH_POS(pipe, plane->id), val); - - x = plane_state->view.color_plane[color_plane].x; - - /* - * From Bspec: UV surface Start Y Position = half of Y plane Y - * start position. - */ - if (!color_plane) - y = plane_state->view.color_plane[color_plane].y + clip->y1; - else - y = plane_state->view.color_plane[color_plane].y + clip->y1 / 2; - - val = y << 16 | x; - - intel_de_write_fw(dev_priv, PLANE_SEL_FETCH_OFFSET(pipe, plane->id), - val); - - /* Sizes are 0 based */ - val = (drm_rect_height(clip) - 1) << 16; - val |= (drm_rect_width(&plane_state->uapi.src) >> 16) - 1; - intel_de_write_fw(dev_priv, PLANE_SEL_FETCH_SIZE(pipe, plane->id), val); -} - void intel_psr2_program_trans_man_trk_ctl(const struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); diff --git a/drivers/gpu/drm/i915/display/intel_psr.h b/drivers/gpu/drm/i915/display/intel_psr.h index 6a1f4573852b..143e0595c097 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.h +++ b/drivers/gpu/drm/i915/display/intel_psr.h @@ -55,16 +55,6 @@ bool intel_psr_enabled(struct intel_dp *intel_dp); int intel_psr2_sel_fetch_update(struct intel_atomic_state *state, struct intel_crtc *crtc); void intel_psr2_program_trans_man_trk_ctl(const struct intel_crtc_state *crtc_state); -void intel_psr2_program_plane_sel_fetch_noarm(struct intel_plane *plane, - const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state, - int color_plane); -void intel_psr2_program_plane_sel_fetch_arm(struct intel_plane *plane, - const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state); - -void intel_psr2_disable_plane_sel_fetch_arm(struct intel_plane *plane, - const struct intel_crtc_state *crtc_state); void intel_psr_pause(struct intel_dp *intel_dp); void intel_psr_resume(struct intel_dp *intel_dp); diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index f5c77a018e10..0cac8be0c77b 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -18,6 +18,7 @@ #include "intel_fbc.h" #include "intel_frontbuffer.h" #include "intel_psr.h" +#include "intel_psr_regs.h" #include "skl_scaler.h" #include "skl_universal_plane.h" #include "skl_watermark.h" @@ -629,6 +630,18 @@ skl_plane_disable_arm(struct intel_plane *plane, intel_de_write_fw(dev_priv, PLANE_SURF(pipe, plane_id), 0); } +static void icl_plane_disable_sel_fetch_arm(struct intel_plane *plane, + const struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *i915 = to_i915(plane->base.dev); + enum pipe pipe = plane->pipe; + + if (!crtc_state->enable_psr2_sel_fetch) + return; + + intel_de_write_fw(i915, PLANE_SEL_FETCH_CTL(pipe, plane->id), 0); +} + static void icl_plane_disable_arm(struct intel_plane *plane, const struct intel_crtc_state *crtc_state) @@ -642,7 +655,7 @@ icl_plane_disable_arm(struct intel_plane *plane, skl_write_plane_wm(plane, crtc_state); - intel_psr2_disable_plane_sel_fetch_arm(plane, crtc_state); + icl_plane_disable_sel_fetch_arm(plane, crtc_state); intel_de_write_fw(dev_priv, PLANE_CTL(pipe, plane_id), 0); intel_de_write_fw(dev_priv, PLANE_SURF(pipe, plane_id), 0); } @@ -1197,6 +1210,48 @@ skl_plane_update_arm(struct intel_plane *plane, skl_plane_surf(plane_state, 0)); } +static void icl_plane_update_sel_fetch_noarm(struct intel_plane *plane, + const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state, + int color_plane) +{ + struct drm_i915_private *i915 = to_i915(plane->base.dev); + enum pipe pipe = plane->pipe; + const struct drm_rect *clip; + u32 val; + int x, y; + + if (!crtc_state->enable_psr2_sel_fetch) + return; + + clip = &plane_state->psr2_sel_fetch_area; + + val = (clip->y1 + plane_state->uapi.dst.y1) << 16; + val |= plane_state->uapi.dst.x1; + intel_de_write_fw(i915, PLANE_SEL_FETCH_POS(pipe, plane->id), val); + + x = plane_state->view.color_plane[color_plane].x; + + /* + * From Bspec: UV surface Start Y Position = half of Y plane Y + * start position. + */ + if (!color_plane) + y = plane_state->view.color_plane[color_plane].y + clip->y1; + else + y = plane_state->view.color_plane[color_plane].y + clip->y1 / 2; + + val = y << 16 | x; + + intel_de_write_fw(i915, PLANE_SEL_FETCH_OFFSET(pipe, plane->id), + val); + + /* Sizes are 0 based */ + val = (drm_rect_height(clip) - 1) << 16; + val |= (drm_rect_width(&plane_state->uapi.src) >> 16) - 1; + intel_de_write_fw(i915, PLANE_SEL_FETCH_SIZE(pipe, plane->id), val); +} + static void icl_plane_update_noarm(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, @@ -1269,7 +1324,21 @@ icl_plane_update_noarm(struct intel_plane *plane, if (plane_state->force_black) icl_plane_csc_load_black(plane); - intel_psr2_program_plane_sel_fetch_noarm(plane, crtc_state, plane_state, color_plane); + icl_plane_update_sel_fetch_noarm(plane, crtc_state, plane_state, color_plane); +} + +static void icl_plane_update_sel_fetch_arm(struct intel_plane *plane, + const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) +{ + struct drm_i915_private *i915 = to_i915(plane->base.dev); + enum pipe pipe = plane->pipe; + + if (!crtc_state->enable_psr2_sel_fetch) + return; + + intel_de_write_fw(i915, PLANE_SEL_FETCH_CTL(pipe, plane->id), + PLANE_SEL_FETCH_CTL_ENABLE); } static void @@ -1296,7 +1365,7 @@ icl_plane_update_arm(struct intel_plane *plane, if (plane_state->scaler_id >= 0) skl_program_plane_scaler(plane, crtc_state, plane_state); - intel_psr2_program_plane_sel_fetch_arm(plane, crtc_state, plane_state); + icl_plane_update_sel_fetch_arm(plane, crtc_state, plane_state); /* * The control register self-arms if the plane was previously -- cgit v1.2.3 From a4f477e6ac171ccdea38556437493c3c5222bbe5 Mon Sep 17 00:00:00 2001 From: Jouni Högander Date: Mon, 20 Nov 2023 10:26:06 +0200 Subject: drm/i915/psr: Add proper handling for disabling sel fetch for planes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently we are enabling selective fetch for all planes that are visible. This is suboptimal as we might be fetching for memory for planes that are not part of selective update. Fix this by adding proper handling for disabling plane selective fetch: If plane previously part of selective update is now not part of update: Add it into updated planes and let the plane configuration to disable selective fetch for it. v3: Checkpatch warnings fixed v2: - Add setting sel_fetch_area->y1/y2 to -1 - Remove setting again local sel_fetch_area variable Signed-off-by: Jouni Högander Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231120082606.3156488-3-jouni.hogander@intel.com --- drivers/gpu/drm/i915/display/intel_cursor.c | 21 ++++++++++++--------- drivers/gpu/drm/i915/display/intel_psr.c | 13 ++++++++++++- drivers/gpu/drm/i915/display/skl_universal_plane.c | 7 +++++-- 3 files changed, 29 insertions(+), 12 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_cursor.c b/drivers/gpu/drm/i915/display/intel_cursor.c index c025cafda7d9..a515ae2831f8 100644 --- a/drivers/gpu/drm/i915/display/intel_cursor.c +++ b/drivers/gpu/drm/i915/display/intel_cursor.c @@ -485,22 +485,21 @@ static int i9xx_check_cursor(struct intel_crtc_state *crtc_state, return 0; } -static void i9xx_cursor_update_sel_fetch_arm(struct intel_plane *plane, - const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) +static void i9xx_cursor_disable_sel_fetch_arm(struct intel_plane *plane, + const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *i915 = to_i915(plane->base.dev); + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum pipe pipe = plane->pipe; if (!crtc_state->enable_psr2_sel_fetch) return; - intel_de_write_fw(i915, PLANE_SEL_FETCH_CTL(pipe, plane->id), - plane_state->ctl); + intel_de_write_fw(dev_priv, PLANE_SEL_FETCH_CTL(pipe, plane->id), 0); } -static void i9xx_cursor_disable_sel_fetch_arm(struct intel_plane *plane, - const struct intel_crtc_state *crtc_state) +static void i9xx_cursor_update_sel_fetch_arm(struct intel_plane *plane, + const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) { struct drm_i915_private *i915 = to_i915(plane->base.dev); enum pipe pipe = plane->pipe; @@ -508,7 +507,11 @@ static void i9xx_cursor_disable_sel_fetch_arm(struct intel_plane *plane, if (!crtc_state->enable_psr2_sel_fetch) return; - intel_de_write_fw(i915, PLANE_SEL_FETCH_CTL(pipe, plane->id), 0); + if (drm_rect_height(&plane_state->psr2_sel_fetch_area) > 0) + intel_de_write_fw(i915, PLANE_SEL_FETCH_CTL(pipe, plane->id), + plane_state->ctl); + else + i9xx_cursor_disable_sel_fetch_arm(plane, crtc_state); } /* TODO: split into noarm+arm pair */ diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index e7d65282fb88..36e4a1e9b98f 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -2176,8 +2176,19 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state, continue; inter = pipe_clip; - if (!drm_rect_intersect(&inter, &new_plane_state->uapi.dst)) + sel_fetch_area = &new_plane_state->psr2_sel_fetch_area; + if (!drm_rect_intersect(&inter, &new_plane_state->uapi.dst)) { + sel_fetch_area->y1 = -1; + sel_fetch_area->y2 = -1; + /* + * if plane sel fetch was previously enabled -> + * disable it + */ + if (drm_rect_height(&old_plane_state->psr2_sel_fetch_area) > 0) + crtc_state->update_planes |= BIT(plane->id); + continue; + } if (!psr2_sel_fetch_plane_state_supported(new_plane_state)) { full_update = true; diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index 0cac8be0c77b..511dc1544854 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -1337,8 +1337,11 @@ static void icl_plane_update_sel_fetch_arm(struct intel_plane *plane, if (!crtc_state->enable_psr2_sel_fetch) return; - intel_de_write_fw(i915, PLANE_SEL_FETCH_CTL(pipe, plane->id), - PLANE_SEL_FETCH_CTL_ENABLE); + if (drm_rect_height(&plane_state->psr2_sel_fetch_area) > 0) + intel_de_write_fw(i915, PLANE_SEL_FETCH_CTL(pipe, plane->id), + PLANE_SEL_FETCH_CTL_ENABLE); + else + icl_plane_disable_sel_fetch_arm(plane, crtc_state); } static void -- cgit v1.2.3 From c13f87efa7488fcd5f4d6e89c8f9d5bb072f9e6e Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 28 Nov 2023 00:10:20 +0100 Subject: Revert "drm/bridge: panel: Check device dependency before managing device link" This reverts commit 39d5b6a64ace77d0c11c398d272218df5f939abb. This patch was causing build errors by using an unexported function from the device core, which Greg questions the saneness in exporting. Link: https://lore.kernel.org/lkml/CACRpkdaGzXD6HbiX7mVUNJAJtMEPG00Pp6+nJ1P0JrfJ-ArMvQ@mail.gmail.com/T/ Signed-off-by: Linus Walleij Acked-by: Neil Armstrong Link: https://lore.kernel.org/r/20231128-revert-panel-fix-v1-2-69bb05048dae@linaro.org Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20231128-revert-panel-fix-v1-2-69bb05048dae@linaro.org --- drivers/gpu/drm/bridge/panel.c | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/bridge/panel.c b/drivers/gpu/drm/bridge/panel.c index 5e8980023407..e48823a4f1ed 100644 --- a/drivers/gpu/drm/bridge/panel.c +++ b/drivers/gpu/drm/bridge/panel.c @@ -23,7 +23,6 @@ struct panel_bridge { struct drm_panel *panel; struct device_link *link; u32 connector_type; - bool is_independent; }; static inline struct panel_bridge * @@ -68,17 +67,12 @@ static int panel_bridge_attach(struct drm_bridge *bridge, struct drm_device *drm_dev = bridge->dev; int ret; - panel_bridge->is_independent = !device_is_dependent(drm_dev->dev, - panel->dev); - - if (panel_bridge->is_independent) { - panel_bridge->link = device_link_add(drm_dev->dev, panel->dev, - DL_FLAG_STATELESS); - if (!panel_bridge->link) { - DRM_ERROR("Failed to add device link between %s and %s\n", - dev_name(drm_dev->dev), dev_name(panel->dev)); - return -EINVAL; - } + panel_bridge->link = device_link_add(drm_dev->dev, panel->dev, + DL_FLAG_STATELESS); + if (!panel_bridge->link) { + DRM_ERROR("Failed to add device link between %s and %s\n", + dev_name(drm_dev->dev), dev_name(panel->dev)); + return -EINVAL; } if (flags & DRM_BRIDGE_ATTACH_NO_CONNECTOR) @@ -86,8 +80,7 @@ static int panel_bridge_attach(struct drm_bridge *bridge, if (!bridge->encoder) { DRM_ERROR("Missing encoder\n"); - if (panel_bridge->is_independent) - device_link_del(panel_bridge->link); + device_link_del(panel_bridge->link); return -ENODEV; } @@ -99,8 +92,7 @@ static int panel_bridge_attach(struct drm_bridge *bridge, panel_bridge->connector_type); if (ret) { DRM_ERROR("Failed to initialize connector\n"); - if (panel_bridge->is_independent) - device_link_del(panel_bridge->link); + device_link_del(panel_bridge->link); return ret; } @@ -123,8 +115,7 @@ static void panel_bridge_detach(struct drm_bridge *bridge) struct panel_bridge *panel_bridge = drm_bridge_to_panel_bridge(bridge); struct drm_connector *connector = &panel_bridge->connector; - if (panel_bridge->is_independent) - device_link_del(panel_bridge->link); + device_link_del(panel_bridge->link); /* * Cleanup the connector if we know it was initialized. -- cgit v1.2.3 From 9b6a59e5db87c2c6b3ca0391176ed4358623d5e4 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 28 Nov 2023 00:10:21 +0100 Subject: Revert "drm/bridge: panel: Add a device link between drm device and panel device" This reverts commit 199cf07ebd2b0d41185ac79b895547d45610b681. This patch creates bugs on devices where the DRM device is the ancestor of the panel devices. Attempts to fix this have failed because it leads to using device core functionality which is questionable. Reported-by: Linus Walleij Link: https://lore.kernel.org/lkml/CACRpkdaGzXD6HbiX7mVUNJAJtMEPG00Pp6+nJ1P0JrfJ-ArMvQ@mail.gmail.com/T/ Signed-off-by: Linus Walleij Acked-by: Neil Armstrong Link: https://lore.kernel.org/r/20231128-revert-panel-fix-v1-3-69bb05048dae@linaro.org Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20231128-revert-panel-fix-v1-3-69bb05048dae@linaro.org --- drivers/gpu/drm/bridge/panel.c | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/bridge/panel.c b/drivers/gpu/drm/bridge/panel.c index e48823a4f1ed..7f41525f7a6e 100644 --- a/drivers/gpu/drm/bridge/panel.c +++ b/drivers/gpu/drm/bridge/panel.c @@ -4,8 +4,6 @@ * Copyright (C) 2017 Broadcom */ -#include - #include #include #include @@ -21,7 +19,6 @@ struct panel_bridge { struct drm_bridge bridge; struct drm_connector connector; struct drm_panel *panel; - struct device_link *link; u32 connector_type; }; @@ -63,24 +60,13 @@ static int panel_bridge_attach(struct drm_bridge *bridge, { struct panel_bridge *panel_bridge = drm_bridge_to_panel_bridge(bridge); struct drm_connector *connector = &panel_bridge->connector; - struct drm_panel *panel = panel_bridge->panel; - struct drm_device *drm_dev = bridge->dev; int ret; - panel_bridge->link = device_link_add(drm_dev->dev, panel->dev, - DL_FLAG_STATELESS); - if (!panel_bridge->link) { - DRM_ERROR("Failed to add device link between %s and %s\n", - dev_name(drm_dev->dev), dev_name(panel->dev)); - return -EINVAL; - } - if (flags & DRM_BRIDGE_ATTACH_NO_CONNECTOR) return 0; if (!bridge->encoder) { DRM_ERROR("Missing encoder\n"); - device_link_del(panel_bridge->link); return -ENODEV; } @@ -92,7 +78,6 @@ static int panel_bridge_attach(struct drm_bridge *bridge, panel_bridge->connector_type); if (ret) { DRM_ERROR("Failed to initialize connector\n"); - device_link_del(panel_bridge->link); return ret; } @@ -115,8 +100,6 @@ static void panel_bridge_detach(struct drm_bridge *bridge) struct panel_bridge *panel_bridge = drm_bridge_to_panel_bridge(bridge); struct drm_connector *connector = &panel_bridge->connector; - device_link_del(panel_bridge->link); - /* * Cleanup the connector if we know it was initialized. * -- cgit v1.2.3 From b9c02e1052650af56d4487efa5fade3fb70e3653 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Mon, 6 Nov 2023 12:48:27 +0100 Subject: drm/gpuvm: Fix deprecated license identifier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit "GPL-2.0-only" in the license header was incorrectly changed to the now deprecated "GPL-2.0". Fix. Cc: Maxime Ripard Cc: Danilo Krummrich Reported-by: David Edelsohn Closes: https://lore.kernel.org/dri-devel/5lfrhdpkwhpgzipgngojs3tyqfqbesifzu5nf4l5q3nhfdhcf2@25nmiq7tfrew/T/#m5c356d68815711eea30dd94cc6f7ea8cd4344fe3 Fixes: f7749a549b4f ("drm/gpuvm: Dual-licence the drm_gpuvm code GPL-2.0 OR MIT") Signed-off-by: Thomas Hellström Acked-by: Maxime Ripard Acked-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20231106114827.62492-1-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/drm_gpuvm.c | 2 +- include/drm/drm_gpuvm.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c index 08c088319652..b80d4e1cc9b7 100644 --- a/drivers/gpu/drm/drm_gpuvm.c +++ b/drivers/gpu/drm/drm_gpuvm.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 OR MIT +// SPDX-License-Identifier: GPL-2.0-only OR MIT /* * Copyright (c) 2022 Red Hat. * diff --git a/include/drm/drm_gpuvm.h b/include/drm/drm_gpuvm.h index bdfafc4a7705..c7a0594bdab1 100644 --- a/include/drm/drm_gpuvm.h +++ b/include/drm/drm_gpuvm.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ #ifndef __DRM_GPUVM_H__ #define __DRM_GPUVM_H__ -- cgit v1.2.3 From 3652117f854819a148ff0fbe4492587d3520b5e5 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 22 Nov 2023 13:48:23 +0100 Subject: eventfd: simplify eventfd_signal() Ever since the eventfd type was introduced back in 2007 in commit e1ad7468c77d ("signal/timer/event: eventfd core") the eventfd_signal() function only ever passed 1 as a value for @n. There's no point in keeping that additional argument. Link: https://lore.kernel.org/r/20231122-vfs-eventfd-signal-v2-2-bd549b14ce0c@kernel.org Acked-by: Xu Yilun Acked-by: Andrew Donnellan # ocxl Acked-by: Eric Farman # s390 Reviewed-by: Jan Kara Reviewed-by: Jens Axboe Signed-off-by: Christian Brauner --- arch/x86/kvm/hyperv.c | 2 +- arch/x86/kvm/xen.c | 2 +- drivers/accel/habanalabs/common/device.c | 2 +- drivers/fpga/dfl.c | 2 +- drivers/gpu/drm/drm_syncobj.c | 6 +++--- drivers/gpu/drm/i915/gvt/interrupt.c | 2 +- drivers/infiniband/hw/mlx5/devx.c | 2 +- drivers/misc/ocxl/file.c | 2 +- drivers/s390/cio/vfio_ccw_chp.c | 2 +- drivers/s390/cio/vfio_ccw_drv.c | 4 ++-- drivers/s390/cio/vfio_ccw_ops.c | 6 +++--- drivers/s390/crypto/vfio_ap_ops.c | 2 +- drivers/usb/gadget/function/f_fs.c | 4 ++-- drivers/vdpa/vdpa_user/vduse_dev.c | 6 +++--- drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c | 2 +- drivers/vfio/pci/vfio_pci_core.c | 6 +++--- drivers/vfio/pci/vfio_pci_intrs.c | 12 ++++++------ drivers/vfio/platform/vfio_platform_irq.c | 4 ++-- drivers/vhost/vdpa.c | 4 ++-- drivers/vhost/vhost.c | 10 +++++----- drivers/vhost/vhost.h | 2 +- drivers/virt/acrn/ioeventfd.c | 2 +- drivers/xen/privcmd.c | 2 +- fs/aio.c | 2 +- fs/eventfd.c | 11 ++++------- include/linux/eventfd.h | 4 ++-- mm/memcontrol.c | 10 +++++----- mm/vmpressure.c | 2 +- samples/vfio-mdev/mtty.c | 4 ++-- virt/kvm/eventfd.c | 4 ++-- 30 files changed, 61 insertions(+), 64 deletions(-) (limited to 'drivers/gpu') diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 238afd7335e4..4943f6b2bbee 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -2388,7 +2388,7 @@ static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *h if (!eventfd) return HV_STATUS_INVALID_PORT_ID; - eventfd_signal(eventfd, 1); + eventfd_signal(eventfd); return HV_STATUS_SUCCESS; } diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index e53fad915a62..523bb6df5ac9 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -2088,7 +2088,7 @@ static bool kvm_xen_hcall_evtchn_send(struct kvm_vcpu *vcpu, u64 param, u64 *r) if (ret < 0 && ret != -ENOTCONN) return false; } else { - eventfd_signal(evtchnfd->deliver.eventfd.ctx, 1); + eventfd_signal(evtchnfd->deliver.eventfd.ctx); } *r = 0; diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c index 9711e8fc979d..3a89644f087c 100644 --- a/drivers/accel/habanalabs/common/device.c +++ b/drivers/accel/habanalabs/common/device.c @@ -2044,7 +2044,7 @@ static void hl_notifier_event_send(struct hl_notifier_event *notifier_event, u64 notifier_event->events_mask |= event_mask; if (notifier_event->eventfd) - eventfd_signal(notifier_event->eventfd, 1); + eventfd_signal(notifier_event->eventfd); mutex_unlock(¬ifier_event->lock); } diff --git a/drivers/fpga/dfl.c b/drivers/fpga/dfl.c index dd7a783d53b5..e73f88050f08 100644 --- a/drivers/fpga/dfl.c +++ b/drivers/fpga/dfl.c @@ -1872,7 +1872,7 @@ static irqreturn_t dfl_irq_handler(int irq, void *arg) { struct eventfd_ctx *trigger = arg; - eventfd_signal(trigger, 1); + eventfd_signal(trigger); return IRQ_HANDLED; } diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 01da6789d044..b9cc62982196 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -1365,7 +1365,7 @@ static void syncobj_eventfd_entry_fence_func(struct dma_fence *fence, struct syncobj_eventfd_entry *entry = container_of(cb, struct syncobj_eventfd_entry, fence_cb); - eventfd_signal(entry->ev_fd_ctx, 1); + eventfd_signal(entry->ev_fd_ctx); syncobj_eventfd_entry_free(entry); } @@ -1388,13 +1388,13 @@ syncobj_eventfd_entry_func(struct drm_syncobj *syncobj, entry->fence = fence; if (entry->flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE) { - eventfd_signal(entry->ev_fd_ctx, 1); + eventfd_signal(entry->ev_fd_ctx); syncobj_eventfd_entry_free(entry); } else { ret = dma_fence_add_callback(fence, &entry->fence_cb, syncobj_eventfd_entry_fence_func); if (ret == -ENOENT) { - eventfd_signal(entry->ev_fd_ctx, 1); + eventfd_signal(entry->ev_fd_ctx); syncobj_eventfd_entry_free(entry); } } diff --git a/drivers/gpu/drm/i915/gvt/interrupt.c b/drivers/gpu/drm/i915/gvt/interrupt.c index b32ba5f2b240..c8e7dfc9f791 100644 --- a/drivers/gpu/drm/i915/gvt/interrupt.c +++ b/drivers/gpu/drm/i915/gvt/interrupt.c @@ -453,7 +453,7 @@ static void inject_virtual_interrupt(struct intel_vgpu *vgpu) if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status)) return; if (vgpu->msi_trigger) - eventfd_signal(vgpu->msi_trigger, 1); + eventfd_signal(vgpu->msi_trigger); } static void propagate_event(struct intel_gvt_irq *irq, diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 8ba53edf2311..869369cb5b5f 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -2498,7 +2498,7 @@ static void dispatch_event_fd(struct list_head *fd_list, list_for_each_entry_rcu(item, fd_list, xa_list) { if (item->eventfd) - eventfd_signal(item->eventfd, 1); + eventfd_signal(item->eventfd); else deliver_event(item, data); } diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c index ac69b7f361f5..7eb74711ac96 100644 --- a/drivers/misc/ocxl/file.c +++ b/drivers/misc/ocxl/file.c @@ -184,7 +184,7 @@ static irqreturn_t irq_handler(void *private) { struct eventfd_ctx *ev_ctx = private; - eventfd_signal(ev_ctx, 1); + eventfd_signal(ev_ctx); return IRQ_HANDLED; } diff --git a/drivers/s390/cio/vfio_ccw_chp.c b/drivers/s390/cio/vfio_ccw_chp.c index d3f3a611f95b..38c176cf6295 100644 --- a/drivers/s390/cio/vfio_ccw_chp.c +++ b/drivers/s390/cio/vfio_ccw_chp.c @@ -115,7 +115,7 @@ static ssize_t vfio_ccw_crw_region_read(struct vfio_ccw_private *private, /* Notify the guest if more CRWs are on our queue */ if (!list_empty(&private->crw) && private->crw_trigger) - eventfd_signal(private->crw_trigger, 1); + eventfd_signal(private->crw_trigger); return ret; } diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c index 43601816ea4e..bfb35cfce1ef 100644 --- a/drivers/s390/cio/vfio_ccw_drv.c +++ b/drivers/s390/cio/vfio_ccw_drv.c @@ -112,7 +112,7 @@ void vfio_ccw_sch_io_todo(struct work_struct *work) private->state = VFIO_CCW_STATE_IDLE; if (private->io_trigger) - eventfd_signal(private->io_trigger, 1); + eventfd_signal(private->io_trigger); } void vfio_ccw_crw_todo(struct work_struct *work) @@ -122,7 +122,7 @@ void vfio_ccw_crw_todo(struct work_struct *work) private = container_of(work, struct vfio_ccw_private, crw_work); if (!list_empty(&private->crw) && private->crw_trigger) - eventfd_signal(private->crw_trigger, 1); + eventfd_signal(private->crw_trigger); } /* diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c index cba4971618ff..ea532a8a4a0c 100644 --- a/drivers/s390/cio/vfio_ccw_ops.c +++ b/drivers/s390/cio/vfio_ccw_ops.c @@ -421,7 +421,7 @@ static int vfio_ccw_mdev_set_irqs(struct vfio_ccw_private *private, case VFIO_IRQ_SET_DATA_NONE: { if (*ctx) - eventfd_signal(*ctx, 1); + eventfd_signal(*ctx); return 0; } case VFIO_IRQ_SET_DATA_BOOL: @@ -432,7 +432,7 @@ static int vfio_ccw_mdev_set_irqs(struct vfio_ccw_private *private, return -EFAULT; if (trigger && *ctx) - eventfd_signal(*ctx, 1); + eventfd_signal(*ctx); return 0; } case VFIO_IRQ_SET_DATA_EVENTFD: @@ -612,7 +612,7 @@ static void vfio_ccw_mdev_request(struct vfio_device *vdev, unsigned int count) "Relaying device request to user (#%u)\n", count); - eventfd_signal(private->req_trigger, 1); + eventfd_signal(private->req_trigger); } else if (count == 0) { dev_notice(dev, "No device request channel registered, blocked until released by user\n"); diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index 4db538a55192..542b5be73a6a 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -1794,7 +1794,7 @@ static void vfio_ap_mdev_request(struct vfio_device *vdev, unsigned int count) "Relaying device request to user (#%u)\n", count); - eventfd_signal(matrix_mdev->req_trigger, 1); + eventfd_signal(matrix_mdev->req_trigger); } else if (count == 0) { dev_notice(dev, "No device request registered, blocked until released by user\n"); diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index efe3e3b85769..fdd0fc7b8f25 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -831,7 +831,7 @@ static void ffs_user_copy_worker(struct work_struct *work) io_data->kiocb->ki_complete(io_data->kiocb, ret); if (io_data->ffs->ffs_eventfd && !kiocb_has_eventfd) - eventfd_signal(io_data->ffs->ffs_eventfd, 1); + eventfd_signal(io_data->ffs->ffs_eventfd); if (io_data->read) kfree(io_data->to_free); @@ -2738,7 +2738,7 @@ static void __ffs_event_add(struct ffs_data *ffs, ffs->ev.types[ffs->ev.count++] = type; wake_up_locked(&ffs->ev.waitq); if (ffs->ffs_eventfd) - eventfd_signal(ffs->ffs_eventfd, 1); + eventfd_signal(ffs->ffs_eventfd); } static void ffs_event_add(struct ffs_data *ffs, diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index 0ddd4b8abecb..6cb5ce4a8b9a 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -493,7 +493,7 @@ static void vduse_vq_kick(struct vduse_virtqueue *vq) goto unlock; if (vq->kickfd) - eventfd_signal(vq->kickfd, 1); + eventfd_signal(vq->kickfd); else vq->kicked = true; unlock: @@ -911,7 +911,7 @@ static int vduse_kickfd_setup(struct vduse_dev *dev, eventfd_ctx_put(vq->kickfd); vq->kickfd = ctx; if (vq->ready && vq->kicked && vq->kickfd) { - eventfd_signal(vq->kickfd, 1); + eventfd_signal(vq->kickfd); vq->kicked = false; } spin_unlock(&vq->kick_lock); @@ -960,7 +960,7 @@ static bool vduse_vq_signal_irqfd(struct vduse_virtqueue *vq) spin_lock_irq(&vq->irq_lock); if (vq->ready && vq->cb.trigger) { - eventfd_signal(vq->cb.trigger, 1); + eventfd_signal(vq->cb.trigger); signal = true; } spin_unlock_irq(&vq->irq_lock); diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c b/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c index c51229fccbd6..d62fbfff20b8 100644 --- a/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c @@ -54,7 +54,7 @@ static irqreturn_t vfio_fsl_mc_irq_handler(int irq_num, void *arg) { struct vfio_fsl_mc_irq *mc_irq = (struct vfio_fsl_mc_irq *)arg; - eventfd_signal(mc_irq->trigger, 1); + eventfd_signal(mc_irq->trigger); return IRQ_HANDLED; } diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index 1929103ee59a..1cbc990d42e0 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -443,7 +443,7 @@ static int vfio_pci_core_runtime_resume(struct device *dev) */ down_write(&vdev->memory_lock); if (vdev->pm_wake_eventfd_ctx) { - eventfd_signal(vdev->pm_wake_eventfd_ctx, 1); + eventfd_signal(vdev->pm_wake_eventfd_ctx); __vfio_pci_runtime_pm_exit(vdev); } up_write(&vdev->memory_lock); @@ -1883,7 +1883,7 @@ void vfio_pci_core_request(struct vfio_device *core_vdev, unsigned int count) pci_notice_ratelimited(pdev, "Relaying device request to user (#%u)\n", count); - eventfd_signal(vdev->req_trigger, 1); + eventfd_signal(vdev->req_trigger); } else if (count == 0) { pci_warn(pdev, "No device request channel registered, blocked until released by user\n"); @@ -2302,7 +2302,7 @@ pci_ers_result_t vfio_pci_core_aer_err_detected(struct pci_dev *pdev, mutex_lock(&vdev->igate); if (vdev->err_trigger) - eventfd_signal(vdev->err_trigger, 1); + eventfd_signal(vdev->err_trigger); mutex_unlock(&vdev->igate); diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index cbb4bcbfbf83..237beac83809 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -94,7 +94,7 @@ static void vfio_send_intx_eventfd(void *opaque, void *unused) ctx = vfio_irq_ctx_get(vdev, 0); if (WARN_ON_ONCE(!ctx)) return; - eventfd_signal(ctx->trigger, 1); + eventfd_signal(ctx->trigger); } } @@ -342,7 +342,7 @@ static irqreturn_t vfio_msihandler(int irq, void *arg) { struct eventfd_ctx *trigger = arg; - eventfd_signal(trigger, 1); + eventfd_signal(trigger); return IRQ_HANDLED; } @@ -689,11 +689,11 @@ static int vfio_pci_set_msi_trigger(struct vfio_pci_core_device *vdev, if (!ctx) continue; if (flags & VFIO_IRQ_SET_DATA_NONE) { - eventfd_signal(ctx->trigger, 1); + eventfd_signal(ctx->trigger); } else if (flags & VFIO_IRQ_SET_DATA_BOOL) { uint8_t *bools = data; if (bools[i - start]) - eventfd_signal(ctx->trigger, 1); + eventfd_signal(ctx->trigger); } } return 0; @@ -707,7 +707,7 @@ static int vfio_pci_set_ctx_trigger_single(struct eventfd_ctx **ctx, if (flags & VFIO_IRQ_SET_DATA_NONE) { if (*ctx) { if (count) { - eventfd_signal(*ctx, 1); + eventfd_signal(*ctx); } else { eventfd_ctx_put(*ctx); *ctx = NULL; @@ -722,7 +722,7 @@ static int vfio_pci_set_ctx_trigger_single(struct eventfd_ctx **ctx, trigger = *(uint8_t *)data; if (trigger && *ctx) - eventfd_signal(*ctx, 1); + eventfd_signal(*ctx); return 0; } else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { diff --git a/drivers/vfio/platform/vfio_platform_irq.c b/drivers/vfio/platform/vfio_platform_irq.c index 665197caed89..61a1bfb68ac7 100644 --- a/drivers/vfio/platform/vfio_platform_irq.c +++ b/drivers/vfio/platform/vfio_platform_irq.c @@ -155,7 +155,7 @@ static irqreturn_t vfio_automasked_irq_handler(int irq, void *dev_id) spin_unlock_irqrestore(&irq_ctx->lock, flags); if (ret == IRQ_HANDLED) - eventfd_signal(irq_ctx->trigger, 1); + eventfd_signal(irq_ctx->trigger); return ret; } @@ -164,7 +164,7 @@ static irqreturn_t vfio_irq_handler(int irq, void *dev_id) { struct vfio_platform_irq *irq_ctx = dev_id; - eventfd_signal(irq_ctx->trigger, 1); + eventfd_signal(irq_ctx->trigger); return IRQ_HANDLED; } diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 30df5c58db73..8d9f958946d6 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -178,7 +178,7 @@ static irqreturn_t vhost_vdpa_virtqueue_cb(void *private) struct eventfd_ctx *call_ctx = vq->call_ctx.ctx; if (call_ctx) - eventfd_signal(call_ctx, 1); + eventfd_signal(call_ctx); return IRQ_HANDLED; } @@ -189,7 +189,7 @@ static irqreturn_t vhost_vdpa_config_cb(void *private) struct eventfd_ctx *config_ctx = v->config_ctx; if (config_ctx) - eventfd_signal(config_ctx, 1); + eventfd_signal(config_ctx); return IRQ_HANDLED; } diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index e0c181ad17e3..045f666b4f12 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -2248,7 +2248,7 @@ int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, len -= l; if (!len) { if (vq->log_ctx) - eventfd_signal(vq->log_ctx, 1); + eventfd_signal(vq->log_ctx); return 0; } } @@ -2271,7 +2271,7 @@ static int vhost_update_used_flags(struct vhost_virtqueue *vq) log_used(vq, (used - (void __user *)vq->used), sizeof vq->used->flags); if (vq->log_ctx) - eventfd_signal(vq->log_ctx, 1); + eventfd_signal(vq->log_ctx); } return 0; } @@ -2289,7 +2289,7 @@ static int vhost_update_avail_event(struct vhost_virtqueue *vq) log_used(vq, (used - (void __user *)vq->used), sizeof *vhost_avail_event(vq)); if (vq->log_ctx) - eventfd_signal(vq->log_ctx, 1); + eventfd_signal(vq->log_ctx); } return 0; } @@ -2715,7 +2715,7 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, log_used(vq, offsetof(struct vring_used, idx), sizeof vq->used->idx); if (vq->log_ctx) - eventfd_signal(vq->log_ctx, 1); + eventfd_signal(vq->log_ctx); } return r; } @@ -2763,7 +2763,7 @@ void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq) { /* Signal the Guest tell them we used something up. */ if (vq->call_ctx.ctx && vhost_notify(dev, vq)) - eventfd_signal(vq->call_ctx.ctx, 1); + eventfd_signal(vq->call_ctx.ctx); } EXPORT_SYMBOL_GPL(vhost_signal); diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index f60d5f7bef94..9e942fcda5c3 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -249,7 +249,7 @@ void vhost_iotlb_map_free(struct vhost_iotlb *iotlb, #define vq_err(vq, fmt, ...) do { \ pr_debug(pr_fmt(fmt), ##__VA_ARGS__); \ if ((vq)->error_ctx) \ - eventfd_signal((vq)->error_ctx, 1);\ + eventfd_signal((vq)->error_ctx);\ } while (0) enum { diff --git a/drivers/virt/acrn/ioeventfd.c b/drivers/virt/acrn/ioeventfd.c index ac4037e9f947..4e845c6ca0b5 100644 --- a/drivers/virt/acrn/ioeventfd.c +++ b/drivers/virt/acrn/ioeventfd.c @@ -223,7 +223,7 @@ static int acrn_ioeventfd_handler(struct acrn_ioreq_client *client, mutex_lock(&client->vm->ioeventfds_lock); p = hsm_ioeventfd_match(client->vm, addr, val, size, req->type); if (p) - eventfd_signal(p->eventfd, 1); + eventfd_signal(p->eventfd); mutex_unlock(&client->vm->ioeventfds_lock); return 0; diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 1ce7f3c7a950..7efc0c62e984 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -1147,7 +1147,7 @@ static irqreturn_t ioeventfd_interrupt(int irq, void *dev_id) if (ioreq->addr == kioeventfd->addr + VIRTIO_MMIO_QUEUE_NOTIFY && ioreq->size == kioeventfd->addr_len && (ioreq->data & QUEUE_NOTIFY_VQ_MASK) == kioeventfd->vq) { - eventfd_signal(kioeventfd->eventfd, 1); + eventfd_signal(kioeventfd->eventfd); state = STATE_IORESP_READY; break; } diff --git a/fs/aio.c b/fs/aio.c index d02842156b35..7c691cf84cc7 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1166,7 +1166,7 @@ static void aio_complete(struct aio_kiocb *iocb) * from IRQ context. */ if (iocb->ki_eventfd) - eventfd_signal(iocb->ki_eventfd, 1); + eventfd_signal(iocb->ki_eventfd); /* * We have to order our ring_info tail store above and test diff --git a/fs/eventfd.c b/fs/eventfd.c index 33a918f9566c..d2f7d2d8a351 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -72,22 +72,19 @@ __u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, __poll_t mask) } /** - * eventfd_signal - Adds @n to the eventfd counter. + * eventfd_signal - Increment the event counter * @ctx: [in] Pointer to the eventfd context. - * @n: [in] Value of the counter to be added to the eventfd internal counter. - * The value cannot be negative. * * This function is supposed to be called by the kernel in paths that do not * allow sleeping. In this function we allow the counter to reach the ULLONG_MAX * value, and we signal this as overflow condition by returning a EPOLLERR * to poll(2). * - * Returns the amount by which the counter was incremented. This will be less - * than @n if the counter has overflowed. + * Returns the amount by which the counter was incremented. */ -__u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n) +__u64 eventfd_signal(struct eventfd_ctx *ctx) { - return eventfd_signal_mask(ctx, n, 0); + return eventfd_signal_mask(ctx, 1, 0); } EXPORT_SYMBOL_GPL(eventfd_signal); diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h index b9d83652c097..562089431551 100644 --- a/include/linux/eventfd.h +++ b/include/linux/eventfd.h @@ -35,7 +35,7 @@ void eventfd_ctx_put(struct eventfd_ctx *ctx); struct file *eventfd_fget(int fd); struct eventfd_ctx *eventfd_ctx_fdget(int fd); struct eventfd_ctx *eventfd_ctx_fileget(struct file *file); -__u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n); +__u64 eventfd_signal(struct eventfd_ctx *ctx); __u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, __poll_t mask); int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait, __u64 *cnt); @@ -58,7 +58,7 @@ static inline struct eventfd_ctx *eventfd_ctx_fdget(int fd) return ERR_PTR(-ENOSYS); } -static inline int eventfd_signal(struct eventfd_ctx *ctx, __u64 n) +static inline int eventfd_signal(struct eventfd_ctx *ctx) { return -ENOSYS; } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 774bd6e21e27..dfbb1d3b77b7 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4378,7 +4378,7 @@ static void __mem_cgroup_threshold(struct mem_cgroup *memcg, bool swap) * only one element of the array here. */ for (; i >= 0 && unlikely(t->entries[i].threshold > usage); i--) - eventfd_signal(t->entries[i].eventfd, 1); + eventfd_signal(t->entries[i].eventfd); /* i = current_threshold + 1 */ i++; @@ -4390,7 +4390,7 @@ static void __mem_cgroup_threshold(struct mem_cgroup *memcg, bool swap) * only one element of the array here. */ for (; i < t->size && unlikely(t->entries[i].threshold <= usage); i++) - eventfd_signal(t->entries[i].eventfd, 1); + eventfd_signal(t->entries[i].eventfd); /* Update current_threshold */ t->current_threshold = i - 1; @@ -4430,7 +4430,7 @@ static int mem_cgroup_oom_notify_cb(struct mem_cgroup *memcg) spin_lock(&memcg_oom_lock); list_for_each_entry(ev, &memcg->oom_notify, list) - eventfd_signal(ev->eventfd, 1); + eventfd_signal(ev->eventfd); spin_unlock(&memcg_oom_lock); return 0; @@ -4649,7 +4649,7 @@ static int mem_cgroup_oom_register_event(struct mem_cgroup *memcg, /* already in OOM ? */ if (memcg->under_oom) - eventfd_signal(eventfd, 1); + eventfd_signal(eventfd); spin_unlock(&memcg_oom_lock); return 0; @@ -4941,7 +4941,7 @@ static void memcg_event_remove(struct work_struct *work) event->unregister_event(memcg, event->eventfd); /* Notify userspace the event is going away. */ - eventfd_signal(event->eventfd, 1); + eventfd_signal(event->eventfd); eventfd_ctx_put(event->eventfd); kfree(event); diff --git a/mm/vmpressure.c b/mm/vmpressure.c index 22c6689d9302..bd5183dfd879 100644 --- a/mm/vmpressure.c +++ b/mm/vmpressure.c @@ -169,7 +169,7 @@ static bool vmpressure_event(struct vmpressure *vmpr, continue; if (level < ev->level) continue; - eventfd_signal(ev->efd, 1); + eventfd_signal(ev->efd); ret = true; } mutex_unlock(&vmpr->events_lock); diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c index 69ba0281f9e0..2284b3751240 100644 --- a/samples/vfio-mdev/mtty.c +++ b/samples/vfio-mdev/mtty.c @@ -234,10 +234,10 @@ static void mtty_trigger_interrupt(struct mdev_state *mdev_state) if (is_msi(mdev_state)) { if (mdev_state->msi_evtfd) - eventfd_signal(mdev_state->msi_evtfd, 1); + eventfd_signal(mdev_state->msi_evtfd); } else if (is_intx(mdev_state)) { if (mdev_state->intx_evtfd && !mdev_state->intx_mask) { - eventfd_signal(mdev_state->intx_evtfd, 1); + eventfd_signal(mdev_state->intx_evtfd); mdev_state->intx_mask = true; } } diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 89912a17f5d5..c0e230f4c3e9 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -61,7 +61,7 @@ static void irqfd_resampler_notify(struct kvm_kernel_irqfd_resampler *resampler) list_for_each_entry_srcu(irqfd, &resampler->list, resampler_link, srcu_read_lock_held(&resampler->kvm->irq_srcu)) - eventfd_signal(irqfd->resamplefd, 1); + eventfd_signal(irqfd->resamplefd); } /* @@ -786,7 +786,7 @@ ioeventfd_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr, if (!ioeventfd_in_range(p, addr, len, val)) return -EOPNOTSUPP; - eventfd_signal(p->eventfd, 1); + eventfd_signal(p->eventfd); return 0; } -- cgit v1.2.3 From 97137bd3ffc5c5972ef3e27d145250c1750f8dc4 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 27 Nov 2023 11:00:44 -0800 Subject: drm/i915/dg2: Drop Wa_22014600077 This workaround has been dropped from all DG2 variants in the latest workaround database update. Signed-off-by: Matt Roper Reviewed-by: Gustavo Sousa Link: https://patchwork.freedesktop.org/patch/msgid/20231127190043.4099109-2-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 9bc0654efdc0..4cbf9e512645 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -2357,14 +2357,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) 0, true); } - if (IS_DG2_G11(i915) || IS_DG2_G10(i915)) { - /* Wa_22014600077:dg2 */ - wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0, - _MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH), - 0 /* Wa_14012342262 write-only reg, so skip verification */, - true); - } - if (IS_DG2(i915) || IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { /* -- cgit v1.2.3 From e9ba37d9f9a6872b069dd893bd86a7d77ba8c153 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 11 Aug 2023 13:15:20 +1000 Subject: nouveau: find the smallest page allocation to cover a buffer alloc. With the new uapi we don't have the comp flags on the allocation, so we shouldn't be using the first size that works, we should be iterating until we get the correct one. This reduces allocations from 2MB to 64k in lots of places. Fixes dEQP-VK.memory.allocation.basic.size_8KiB.forward.count_4000 on my ampere/gsp system. Cc: stable@vger.kernel.org # v6.6 Signed-off-by: Dave Airlie Reviewed-by: Faith Ekstrand Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20230811031520.248341-1-airlied@gmail.com --- drivers/gpu/drm/nouveau/nouveau_bo.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 0f3bd187ede6..280d1d9a559b 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -318,8 +318,9 @@ nouveau_bo_alloc(struct nouveau_cli *cli, u64 *size, int *align, u32 domain, (!vmm->page[i].host || vmm->page[i].shift > PAGE_SHIFT)) continue; - if (pi < 0) - pi = i; + /* pick the last one as it will be smallest. */ + pi = i; + /* Stop once the buffer is larger than the current page size. */ if (*size >= 1ULL << vmm->page[i].shift) break; -- cgit v1.2.3 From 45b7955b774f82680db71f460fa01bfcdaaeb514 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 27 Nov 2023 15:56:33 +0300 Subject: nouveau/gsp/r535: remove a stray unlock in r535_gsp_rpc_send() This unlock doesn't belong here and it leads to a double unlock in the caller, r535_gsp_rpc_push(). Fixes: 176fdcbddfd2 ("drm/nouveau/gsp/r535: add support for booting GSP-RM") Signed-off-by: Dan Carpenter Reviewed-by: Timur Tabi Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/a0293812-c05d-45f0-a535-3f24fe582c02@moroto.mountain --- drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c index dc44f5c7833f..818e5c73b7a6 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c @@ -365,10 +365,8 @@ r535_gsp_rpc_send(struct nvkm_gsp *gsp, void *argv, bool wait, u32 repc) } ret = r535_gsp_cmdq_push(gsp, rpc); - if (ret) { - mutex_unlock(&gsp->cmdq.mutex); + if (ret) return ERR_PTR(ret); - } if (wait) { msg = r535_gsp_msg_recv(gsp, fn, repc); -- cgit v1.2.3 From 52fdb99cc436014a417750150928c8ff1f69ae66 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 16 Nov 2023 12:11:43 -0600 Subject: nouveau/gsp: replace zero-length array with flex-array member and use __counted_by Fake flexible arrays (zero-length and one-element arrays) are deprecated, and should be replaced by flexible-array members. So, replace zero-length array with a flexible-array member in `struct PACKED_REGISTRY_TABLE`. Also annotate array `entries` with `__counted_by()` to prepare for the coming implementation by GCC and Clang of the `__counted_by` attribute. Flexible array members annotated with `__counted_by` can have their accesses bounds-checked at run-time via `CONFIG_UBSAN_BOUNDS` (for array indexing) and `CONFIG_FORTIFY_SOURCE` (for strcpy/memcpy-family functions). This fixes multiple -Warray-bounds warnings: drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c:1069:29: warning: array subscript 0 is outside array bounds of 'PACKED_REGISTRY_ENTRY[0]' [-Warray-bounds=] drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c:1070:29: warning: array subscript 0 is outside array bounds of 'PACKED_REGISTRY_ENTRY[0]' [-Warray-bounds=] drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c:1071:29: warning: array subscript 0 is outside array bounds of 'PACKED_REGISTRY_ENTRY[0]' [-Warray-bounds=] drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c:1072:29: warning: array subscript 0 is outside array bounds of 'PACKED_REGISTRY_ENTRY[0]' [-Warray-bounds=] While there, also make use of the struct_size() helper, and address checkpatch.pl warning: WARNING: please, no spaces at the start of a line This results in no differences in binary output. Signed-off-by: Gustavo A. R. Silva Reviewed-by: Kees Cook Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/ZVZbX7C5suLMiBf+@work --- .../drm/nouveau/include/nvrm/535.113.01/nvidia/generated/g_os_nvoc.h | 2 +- drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/nvidia/generated/g_os_nvoc.h b/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/nvidia/generated/g_os_nvoc.h index 754c6af42f30..10121218f4d3 100644 --- a/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/nvidia/generated/g_os_nvoc.h +++ b/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/nvidia/generated/g_os_nvoc.h @@ -38,7 +38,7 @@ typedef struct PACKED_REGISTRY_TABLE { NvU32 size; NvU32 numEntries; - PACKED_REGISTRY_ENTRY entries[0]; + PACKED_REGISTRY_ENTRY entries[] __counted_by(numEntries); } PACKED_REGISTRY_TABLE; #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c index 818e5c73b7a6..f6725a5f5bfb 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c @@ -1046,7 +1046,7 @@ r535_gsp_rpc_set_registry(struct nvkm_gsp *gsp) char *strings; int str_offset; int i; - size_t rpc_size = sizeof(*rpc) + sizeof(rpc->entries[0]) * NV_GSP_REG_NUM_ENTRIES; + size_t rpc_size = struct_size(rpc, entries, NV_GSP_REG_NUM_ENTRIES); /* add strings + null terminator */ for (i = 0; i < NV_GSP_REG_NUM_ENTRIES; i++) -- cgit v1.2.3 From 503579448db93f9fbcc93cd99a1f2d5aa4b2cda6 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 16 Nov 2023 08:44:56 +0000 Subject: drm/i915/gsc: Mark internal GSC engine with reserved uabi class The GSC CS is not exposed to the user, so we skipped assigning a uabi class number for it. However, the trace logs use the uabi class and instance to identify the engine, so leaving uabi class unset makes the GSC CS show up as the RCS in those logs. Given that the engine is not exposed to the user, we can't add a new case in the uabi enum, so we insted internally define a kernel internal class as -1. At the same time remove special handling for the name and complete the uabi_classes array so internal class is automatically correctly assigned. Engine will show as 65535:0 other0 in the logs/traces which should be unique enough. v2: * Fix uabi class u8 vs u16 type confusion. Signed-off-by: Tvrtko Ursulin Fixes: 194babe26bdc ("drm/i915/mtl: don't expose GSC command streamer to the user") Cc: Daniele Ceraolo Spurio Cc: Alan Previn Cc: Matt Roper Reviewed-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20231116084456.291533-1-tvrtko.ursulin@linux.intel.com (cherry picked from commit dfed6b58d54f3a5d7e6bc1fb060e2c936330eba2) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_engine_user.c | 39 ++++++++++++++++------------- 1 file changed, 22 insertions(+), 17 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c index 118164ddbb2e..833987015b8b 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c @@ -41,12 +41,15 @@ void intel_engine_add_user(struct intel_engine_cs *engine) llist_add(&engine->uabi_llist, &engine->i915->uabi_engines_llist); } -static const u8 uabi_classes[] = { +#define I915_NO_UABI_CLASS ((u16)(-1)) + +static const u16 uabi_classes[] = { [RENDER_CLASS] = I915_ENGINE_CLASS_RENDER, [COPY_ENGINE_CLASS] = I915_ENGINE_CLASS_COPY, [VIDEO_DECODE_CLASS] = I915_ENGINE_CLASS_VIDEO, [VIDEO_ENHANCEMENT_CLASS] = I915_ENGINE_CLASS_VIDEO_ENHANCE, [COMPUTE_CLASS] = I915_ENGINE_CLASS_COMPUTE, + [OTHER_CLASS] = I915_NO_UABI_CLASS, /* Not exposed to users, no uabi class. */ }; static int engine_cmp(void *priv, const struct list_head *A, @@ -200,6 +203,7 @@ static void engine_rename(struct intel_engine_cs *engine, const char *name, u16 void intel_engines_driver_register(struct drm_i915_private *i915) { + u16 name_instance, other_instance = 0; struct legacy_ring ring = {}; struct list_head *it, *next; struct rb_node **p, *prev; @@ -216,27 +220,28 @@ void intel_engines_driver_register(struct drm_i915_private *i915) if (intel_gt_has_unrecoverable_error(engine->gt)) continue; /* ignore incomplete engines */ - /* - * We don't want to expose the GSC engine to the users, but we - * still rename it so it is easier to identify in the debug logs - */ - if (engine->id == GSC0) { - engine_rename(engine, "gsc", 0); - continue; - } - GEM_BUG_ON(engine->class >= ARRAY_SIZE(uabi_classes)); engine->uabi_class = uabi_classes[engine->class]; + if (engine->uabi_class == I915_NO_UABI_CLASS) { + name_instance = other_instance++; + } else { + GEM_BUG_ON(engine->uabi_class >= + ARRAY_SIZE(i915->engine_uabi_class_count)); + name_instance = + i915->engine_uabi_class_count[engine->uabi_class]++; + } + engine->uabi_instance = name_instance; - GEM_BUG_ON(engine->uabi_class >= - ARRAY_SIZE(i915->engine_uabi_class_count)); - engine->uabi_instance = - i915->engine_uabi_class_count[engine->uabi_class]++; - - /* Replace the internal name with the final user facing name */ + /* + * Replace the internal name with the final user and log facing + * name. + */ engine_rename(engine, intel_engine_class_repr(engine->class), - engine->uabi_instance); + name_instance); + + if (engine->uabi_class == I915_NO_UABI_CLASS) + continue; rb_link_node(&engine->uabi_node, prev, p); rb_insert_color(&engine->uabi_node, &i915->uabi_engines); -- cgit v1.2.3 From f76f83a83c8fdbb62acbf8bd945f10821768145b Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 14 Nov 2023 16:23:33 +0200 Subject: drm/i915: Also check for VGA converter in eDP probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unfortunately even the HPD based detection added in commit cfe5bdfb27fa ("drm/i915: Check HPD live state during eDP probe") fails to detect that the VBT's eDP/DDI-A is a ghost on Asus B360M-A (CFL+CNP). On that board eDP/DDI-A has its HPD asserted despite nothing being actually connected there :( The straps/fuses also indicate that the eDP port is present. So if one boots with a VGA monitor connected the eDP probe will mistake the DP->VGA converter hooked to DDI-E for an eDP panel on DDI-A. As a last resort check what kind of DP device we've detected, and if it looks like a DP->VGA converter then conclude that the eDP port should be ignored. Cc: stable@vger.kernel.org Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/9636 Fixes: cfe5bdfb27fa ("drm/i915: Check HPD live state during eDP probe") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231114142333.15799-1-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula (cherry picked from commit fcd479a79120bf0cd507d85f898297a3b868dda6) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_dp.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 2c1034578984..2852958dd4e7 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -6037,8 +6037,7 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp, * (eg. Acer Chromebook C710), so we'll check it only if multiple * ports are attempting to use the same AUX CH, according to VBT. */ - if (intel_bios_dp_has_shared_aux_ch(encoder->devdata) && - !intel_digital_port_connected(encoder)) { + if (intel_bios_dp_has_shared_aux_ch(encoder->devdata)) { /* * If this fails, presume the DPCD answer came * from some other port using the same AUX CH. @@ -6046,10 +6045,27 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp, * FIXME maybe cleaner to check this before the * DPCD read? Would need sort out the VDD handling... */ - drm_info(&dev_priv->drm, - "[ENCODER:%d:%s] HPD is down, disabling eDP\n", - encoder->base.base.id, encoder->base.name); - goto out_vdd_off; + if (!intel_digital_port_connected(encoder)) { + drm_info(&dev_priv->drm, + "[ENCODER:%d:%s] HPD is down, disabling eDP\n", + encoder->base.base.id, encoder->base.name); + goto out_vdd_off; + } + + /* + * Unfortunately even the HPD based detection fails on + * eg. Asus B360M-A (CFL+CNP), so as a last resort fall + * back to checking for a VGA branch device. Only do this + * on known affected platforms to minimize false positives. + */ + if (DISPLAY_VER(dev_priv) == 9 && drm_dp_is_branch(intel_dp->dpcd) && + (intel_dp->dpcd[DP_DOWNSTREAMPORT_PRESENT] & DP_DWN_STRM_PORT_TYPE_MASK) == + DP_DWN_STRM_PORT_TYPE_ANALOG) { + drm_info(&dev_priv->drm, + "[ENCODER:%d:%s] VGA converter detected, disabling eDP\n", + encoder->base.base.id, encoder->base.name); + goto out_vdd_off; + } } mutex_lock(&dev_priv->drm.mode_config.mutex); -- cgit v1.2.3 From d21a3962d3042e6f56ad324cf18bdd64a1e6ecfa Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 21 Nov 2023 07:43:15 +0200 Subject: drm/i915: Call intel_pre_plane_updates() also for pipes getting enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We used to call intel_pre_plane_updates() for any pipe going through a modeset whether the pipe was previously enabled or not. This in fact needed to apply all the necessary clock gating workarounds/etc. Restore the correct behaviour. Fixes: 39919997322f ("drm/i915: Disable all planes before modesetting any pipes") Reviewed-by: Jani Nikula Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231121054324.9988-3-ville.syrjala@linux.intel.com (cherry picked from commit e0d5ce11ed0a21bb2bf328ad82fd261783c7ad88) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 28d85e1e858e..a2a806262c9e 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -6853,10 +6853,11 @@ static void intel_commit_modeset_disables(struct intel_atomic_state *state) if (!intel_crtc_needs_modeset(new_crtc_state)) continue; + intel_pre_plane_update(state, crtc); + if (!old_crtc_state->hw.active) continue; - intel_pre_plane_update(state, crtc); intel_crtc_disable_planes(state, crtc); } -- cgit v1.2.3 From b844c6bae2b89b4a4e102eb326e35c632308dd85 Mon Sep 17 00:00:00 2001 From: Vinod Govindapillai Date: Fri, 10 Nov 2023 11:32:25 +0200 Subject: drm/i915/xe2lpd: remove the FBC restriction if PSR2 is enabled In earlier versions, FBC was restricted if PSR2 is enabled. From xe2lpd onwards no such restrictions are needed anymore. HSD: 14014305387 Signed-off-by: Vinod Govindapillai Reviewed-by: Mika Kahola Signed-off-by: Mika Kahola Link: https://patchwork.freedesktop.org/patch/msgid/20231110093225.39573-2-vinod.govindapillai@intel.com --- drivers/gpu/drm/i915/display/intel_fbc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 63f389a1707d..f17a1afb4929 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -1235,7 +1235,7 @@ static int intel_fbc_check_plane(struct intel_atomic_state *state, * Recommendation is to keep this combination disabled * Bspec: 50422 HSD: 14010260002 */ - if (DISPLAY_VER(i915) >= 12 && crtc_state->has_psr2) { + if (IS_DISPLAY_VER(i915, 12, 14) && crtc_state->has_psr2) { plane_state->no_fbc_reason = "PSR2 enabled"; return 0; } -- cgit v1.2.3 From fcebbe2fa3443e400657d71182610219750d1c1e Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 24 Nov 2023 10:27:31 +0200 Subject: drm/i915/psr: Include some basic PSR information in the state dump MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently no one can figure out what the PSR code is doing since we're including any of it in the basic state dump. Add at least the bare minimum there. v2: Also dump has_panel_replay (Jouni) Cc: Jouni Högander Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231124082735.25470-1-ville.syrjala@linux.intel.com Reviewed-by: Jouni Högander --- drivers/gpu/drm/i915/display/intel_crtc_state_dump.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c b/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c index 30336453041d..30b8ddeae8ce 100644 --- a/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c +++ b/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c @@ -264,6 +264,12 @@ void intel_crtc_state_dump(const struct intel_crtc_state *pipe_config, drm_dbg_kms(&i915->drm, "sdp split: %s\n", str_enabled_disabled(pipe_config->sdp_split_enable)); + + drm_dbg_kms(&i915->drm, "psr: %s, psr2: %s, panel replay: %s, selective fetch: %s\n", + str_enabled_disabled(pipe_config->has_psr), + str_enabled_disabled(pipe_config->has_psr2), + str_enabled_disabled(pipe_config->has_panel_replay), + str_enabled_disabled(pipe_config->enable_psr2_sel_fetch)); } drm_dbg_kms(&i915->drm, "framestart delay: %d, MSA timing delay: %d\n", -- cgit v1.2.3 From e0ef2daa8ca8ce4dbc2fd0959e383b753a87fd7d Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 27 Nov 2023 16:50:25 +0200 Subject: drm/i915: Skip some timing checks on BXT/GLK DSI transcoders MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apparently some BXT/GLK systems have DSI panels whose timings don't agree with the normal cpu transcoder hblank>=32 limitation. This is perhaps fine as there are no specific hblank/etc. limits listed for the BXT/GLK DSI transcoders. Move those checks out from the global intel_mode_valid() into into connector specific .mode_valid() hooks, skipping BXT/GLK DSI connectors. We'll leave the basic [hv]display/[hv]total checks in intel_mode_valid() as those seem like sensible upper limits regardless of the transcoder used. Cc: stable@vger.kernel.org Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/9720 Fixes: 8f4b1068e7fc ("drm/i915: Check some transcoder timing minimum limits") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231127145028.4899-1-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula --- drivers/gpu/drm/i915/display/icl_dsi.c | 7 +++++++ drivers/gpu/drm/i915/display/intel_crt.c | 5 +++++ drivers/gpu/drm/i915/display/intel_display.c | 10 ++++++++++ drivers/gpu/drm/i915/display/intel_display.h | 3 +++ drivers/gpu/drm/i915/display/intel_dp.c | 4 ++++ drivers/gpu/drm/i915/display/intel_dp_mst.c | 4 ++++ drivers/gpu/drm/i915/display/intel_dvo.c | 6 ++++++ drivers/gpu/drm/i915/display/intel_hdmi.c | 4 ++++ drivers/gpu/drm/i915/display/intel_lvds.c | 5 +++++ drivers/gpu/drm/i915/display/intel_sdvo.c | 8 +++++++- drivers/gpu/drm/i915/display/intel_tv.c | 8 +++++++- drivers/gpu/drm/i915/display/vlv_dsi.c | 18 +++++++++++++++++- 12 files changed, 79 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index 481fcb650850..ac456a2275db 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -1440,6 +1440,13 @@ static void gen11_dsi_post_disable(struct intel_atomic_state *state, static enum drm_mode_status gen11_dsi_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { + struct drm_i915_private *i915 = to_i915(connector->dev); + enum drm_mode_status status; + + status = intel_cpu_transcoder_mode_valid(i915, mode); + if (status != MODE_OK) + return status; + /* FIXME: DSC? */ return intel_dsi_mode_valid(connector, mode); } diff --git a/drivers/gpu/drm/i915/display/intel_crt.c b/drivers/gpu/drm/i915/display/intel_crt.c index 0e33a0523a75..abaacea5c2cc 100644 --- a/drivers/gpu/drm/i915/display/intel_crt.c +++ b/drivers/gpu/drm/i915/display/intel_crt.c @@ -348,8 +348,13 @@ intel_crt_mode_valid(struct drm_connector *connector, struct drm_device *dev = connector->dev; struct drm_i915_private *dev_priv = to_i915(dev); int max_dotclk = dev_priv->max_dotclk_freq; + enum drm_mode_status status; int max_clock; + status = intel_cpu_transcoder_mode_valid(dev_priv, mode); + if (status != MODE_OK) + return status; + if (mode->flags & DRM_MODE_FLAG_DBLSCAN) return MODE_NO_DBLESCAN; diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 5cf162628b95..23b077f43614 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -7734,6 +7734,16 @@ enum drm_mode_status intel_mode_valid(struct drm_device *dev, mode->vtotal > vtotal_max) return MODE_V_ILLEGAL; + return MODE_OK; +} + +enum drm_mode_status intel_cpu_transcoder_mode_valid(struct drm_i915_private *dev_priv, + const struct drm_display_mode *mode) +{ + /* + * Additional transcoder timing limits, + * excluding BXT/GLK DSI transcoders. + */ if (DISPLAY_VER(dev_priv) >= 5) { if (mode->hdisplay < 64 || mode->htotal - mode->hdisplay < 32) diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index 8548f49e3972..f4a0773f0fca 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -402,6 +402,9 @@ enum drm_mode_status intel_mode_valid_max_plane_size(struct drm_i915_private *dev_priv, const struct drm_display_mode *mode, bool bigjoiner); +enum drm_mode_status +intel_cpu_transcoder_mode_valid(struct drm_i915_private *i915, + const struct drm_display_mode *mode); enum phy intel_port_to_phy(struct drm_i915_private *i915, enum port port); bool is_trans_port_sync_mode(const struct intel_crtc_state *state); bool is_trans_port_sync_master(const struct intel_crtc_state *state); diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index f9b5fc2fdbcb..68b2e69dca39 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -1227,6 +1227,10 @@ intel_dp_mode_valid(struct drm_connector *_connector, enum drm_mode_status status; bool dsc = false, bigjoiner = false; + status = intel_cpu_transcoder_mode_valid(dev_priv, mode); + if (status != MODE_OK) + return status; + if (mode->flags & DRM_MODE_FLAG_DBLCLK) return MODE_H_ILLEGAL; diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 63364c9602ef..0514f825baf5 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -1282,6 +1282,10 @@ intel_dp_mst_mode_valid_ctx(struct drm_connector *connector, return 0; } + *status = intel_cpu_transcoder_mode_valid(dev_priv, mode); + if (*status != MODE_OK) + return 0; + if (mode->flags & DRM_MODE_FLAG_DBLSCAN) { *status = MODE_NO_DBLESCAN; return 0; diff --git a/drivers/gpu/drm/i915/display/intel_dvo.c b/drivers/gpu/drm/i915/display/intel_dvo.c index 55d6743374bd..9111e9d46486 100644 --- a/drivers/gpu/drm/i915/display/intel_dvo.c +++ b/drivers/gpu/drm/i915/display/intel_dvo.c @@ -217,11 +217,17 @@ intel_dvo_mode_valid(struct drm_connector *_connector, struct drm_display_mode *mode) { struct intel_connector *connector = to_intel_connector(_connector); + struct drm_i915_private *i915 = to_i915(connector->base.dev); struct intel_dvo *intel_dvo = intel_attached_dvo(connector); const struct drm_display_mode *fixed_mode = intel_panel_fixed_mode(connector, mode); int max_dotclk = to_i915(connector->base.dev)->max_dotclk_freq; int target_clock = mode->clock; + enum drm_mode_status status; + + status = intel_cpu_transcoder_mode_valid(i915, mode); + if (status != MODE_OK) + return status; if (mode->flags & DRM_MODE_FLAG_DBLSCAN) return MODE_NO_DBLESCAN; diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index ab18cfc19c0a..39e4f5f7c817 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -1983,6 +1983,10 @@ intel_hdmi_mode_valid(struct drm_connector *connector, bool ycbcr_420_only; enum intel_output_format sink_format; + status = intel_cpu_transcoder_mode_valid(dev_priv, mode); + if (status != MODE_OK) + return status; + if ((mode->flags & DRM_MODE_FLAG_3D_MASK) == DRM_MODE_FLAG_3D_FRAME_PACKING) clock *= 2; diff --git a/drivers/gpu/drm/i915/display/intel_lvds.c b/drivers/gpu/drm/i915/display/intel_lvds.c index 4b114fde57b1..958b6fd0d741 100644 --- a/drivers/gpu/drm/i915/display/intel_lvds.c +++ b/drivers/gpu/drm/i915/display/intel_lvds.c @@ -389,11 +389,16 @@ intel_lvds_mode_valid(struct drm_connector *_connector, struct drm_display_mode *mode) { struct intel_connector *connector = to_intel_connector(_connector); + struct drm_i915_private *i915 = to_i915(connector->base.dev); const struct drm_display_mode *fixed_mode = intel_panel_fixed_mode(connector, mode); int max_pixclk = to_i915(connector->base.dev)->max_dotclk_freq; enum drm_mode_status status; + status = intel_cpu_transcoder_mode_valid(i915, mode); + if (status != MODE_OK) + return status; + if (mode->flags & DRM_MODE_FLAG_DBLSCAN) return MODE_NO_DBLESCAN; diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index c7af7e046188..0362d02d70b6 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -1930,13 +1930,19 @@ static enum drm_mode_status intel_sdvo_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { + struct drm_i915_private *i915 = to_i915(connector->dev); struct intel_sdvo *intel_sdvo = intel_attached_sdvo(to_intel_connector(connector)); struct intel_sdvo_connector *intel_sdvo_connector = to_intel_sdvo_connector(connector); - int max_dotclk = to_i915(connector->dev)->max_dotclk_freq; bool has_hdmi_sink = intel_has_hdmi_sink(intel_sdvo_connector, connector->state); + int max_dotclk = i915->max_dotclk_freq; + enum drm_mode_status status; int clock = mode->clock; + status = intel_cpu_transcoder_mode_valid(i915, mode); + if (status != MODE_OK) + return status; + if (mode->flags & DRM_MODE_FLAG_DBLSCAN) return MODE_NO_DBLESCAN; diff --git a/drivers/gpu/drm/i915/display/intel_tv.c b/drivers/gpu/drm/i915/display/intel_tv.c index 31a79fdfc812..2ee4f0d95851 100644 --- a/drivers/gpu/drm/i915/display/intel_tv.c +++ b/drivers/gpu/drm/i915/display/intel_tv.c @@ -958,8 +958,14 @@ static enum drm_mode_status intel_tv_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { + struct drm_i915_private *i915 = to_i915(connector->dev); const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state); - int max_dotclk = to_i915(connector->dev)->max_dotclk_freq; + int max_dotclk = i915->max_dotclk_freq; + enum drm_mode_status status; + + status = intel_cpu_transcoder_mode_valid(i915, mode); + if (status != MODE_OK) + return status; if (mode->flags & DRM_MODE_FLAG_DBLSCAN) return MODE_NO_DBLESCAN; diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index bda49734ca33..5bda97c96810 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -1544,9 +1544,25 @@ static const struct drm_encoder_funcs intel_dsi_funcs = { .destroy = intel_dsi_encoder_destroy, }; +static enum drm_mode_status vlv_dsi_mode_valid(struct drm_connector *connector, + struct drm_display_mode *mode) +{ + struct drm_i915_private *i915 = to_i915(connector->dev); + + if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { + enum drm_mode_status status; + + status = intel_cpu_transcoder_mode_valid(i915, mode); + if (status != MODE_OK) + return status; + } + + return intel_dsi_mode_valid(connector, mode); +} + static const struct drm_connector_helper_funcs intel_dsi_connector_helper_funcs = { .get_modes = intel_dsi_get_modes, - .mode_valid = intel_dsi_mode_valid, + .mode_valid = vlv_dsi_mode_valid, .atomic_check = intel_digital_connector_atomic_check, }; -- cgit v1.2.3 From c1799032d2ef6616113b733428dfaa2199a5604b Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 27 Nov 2023 16:50:26 +0200 Subject: drm/i915/mst: Fix .mode_valid_ctx() return values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .mode_valid_ctx() returns an errno, not the mode status. Fix the code to do the right thing. Cc: stable@vger.kernel.org Cc: Stanislav Lisovskiy Fixes: d51f25eb479a ("drm/i915: Add DSC support to MST path") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231127145028.4899-2-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_dp_mst.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 0514f825baf5..0680a42f7d2a 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -1366,11 +1366,15 @@ intel_dp_mst_mode_valid_ctx(struct drm_connector *connector, * Big joiner configuration needs DSC for TGL which is not true for * XE_LPD where uncompressed joiner is supported. */ - if (DISPLAY_VER(dev_priv) < 13 && bigjoiner && !dsc) - return MODE_CLOCK_HIGH; + if (DISPLAY_VER(dev_priv) < 13 && bigjoiner && !dsc) { + *status = MODE_CLOCK_HIGH; + return 0; + } - if (mode_rate > max_rate && !dsc) - return MODE_CLOCK_HIGH; + if (mode_rate > max_rate && !dsc) { + *status = MODE_CLOCK_HIGH; + return 0; + } *status = intel_mode_valid_max_plane_size(dev_priv, mode, false); return 0; -- cgit v1.2.3 From 9c058492b16f90bb772cb0dad567e8acc68e155d Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 27 Nov 2023 16:50:27 +0200 Subject: drm/i915/mst: Reject modes that require the bigjoiner MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have no bigjoiner support in the MST code, so .mode_valid() pretending otherwise is just going to result black screens for users. Reject any mode that needs the joiner. Cc: stable@vger.kernel.org Cc: Stanislav Lisovskiy Fixes: d51f25eb479a ("drm/i915: Add DSC support to MST path") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231127145028.4899-3-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_dp_mst.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 0680a42f7d2a..b665fe6ef871 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -1332,6 +1332,10 @@ intel_dp_mst_mode_valid_ctx(struct drm_connector *connector, if (intel_dp_need_bigjoiner(intel_dp, mode->hdisplay, target_clock)) { bigjoiner = true; max_dotclk *= 2; + + /* TODO: add support for bigjoiner */ + *status = MODE_CLOCK_HIGH; + return 0; } if (DISPLAY_VER(dev_priv) >= 10 && -- cgit v1.2.3 From 8dfce5f3095b79236b585bfa0e291b77ba4b6dbd Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 27 Nov 2023 16:50:28 +0200 Subject: drm/i915: Clean up some DISPLAY_VER checks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the >= and < operators for the DISPLAY_VER checks everywhere. This is what most of the code does, but especially recently random pieces of code have started doing this differently for no good reason. Conversion done with the following cocci: @find@ expression i915; constant ver; @@ ( DISPLAY_VER(i915) <= ver | DISPLAY_VER(i915) > ver ) @script:python inc@ old_ver << find.ver; new_ver; @@ coccinelle.new_ver = str(int(old_ver) + 1) @@ expression find.i915; constant find.ver; identifier inc.new_ver; @@ ( - DISPLAY_VER(i915) <= ver + DISPLAY_VER(i915) < new_ver | - DISPLAY_VER(i915) > ver + DISPLAY_VER(i915) >= new_ver ) Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231127145028.4899-4-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula --- drivers/gpu/drm/i915/display/i9xx_wm.c | 8 ++++---- drivers/gpu/drm/i915/display/intel_bw.c | 7 ++++--- drivers/gpu/drm/i915/display/intel_cdclk.c | 2 +- drivers/gpu/drm/i915/display/intel_display.c | 8 ++++---- drivers/gpu/drm/i915/display/intel_display_device.h | 2 +- drivers/gpu/drm/i915/display/intel_display_irq.c | 2 +- drivers/gpu/drm/i915/display/intel_dp.c | 2 +- drivers/gpu/drm/i915/display/intel_dp_mst.c | 2 +- drivers/gpu/drm/i915/display/intel_lvds.c | 2 +- drivers/gpu/drm/i915/display/intel_psr.c | 8 ++++---- 10 files changed, 22 insertions(+), 21 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/i9xx_wm.c b/drivers/gpu/drm/i915/display/i9xx_wm.c index b37c0d02d500..03e8fb6caa83 100644 --- a/drivers/gpu/drm/i915/display/i9xx_wm.c +++ b/drivers/gpu/drm/i915/display/i9xx_wm.c @@ -2477,7 +2477,7 @@ static unsigned int ilk_plane_wm_max(const struct drm_i915_private *dev_priv, * FIFO size is only half of the self * refresh FIFO size on ILK/SNB. */ - if (DISPLAY_VER(dev_priv) <= 6) + if (DISPLAY_VER(dev_priv) < 7) fifo_size /= 2; } @@ -2818,7 +2818,7 @@ static int ilk_compute_pipe_wm(struct intel_atomic_state *state, usable_level = dev_priv->display.wm.num_levels - 1; /* ILK/SNB: LP2+ watermarks only w/o sprites */ - if (DISPLAY_VER(dev_priv) <= 6 && pipe_wm->sprites_enabled) + if (DISPLAY_VER(dev_priv) < 7 && pipe_wm->sprites_enabled) usable_level = 1; /* ILK/SNB/IVB: LP1+ watermarks only w/o scaling */ @@ -2961,7 +2961,7 @@ static void ilk_wm_merge(struct drm_i915_private *dev_priv, int last_enabled_level = num_levels - 1; /* ILK/SNB/IVB: LP1+ watermarks only w/ single pipe */ - if ((DISPLAY_VER(dev_priv) <= 6 || IS_IVYBRIDGE(dev_priv)) && + if ((DISPLAY_VER(dev_priv) < 7 || IS_IVYBRIDGE(dev_priv)) && config->num_pipes_active > 1) last_enabled_level = 0; @@ -3060,7 +3060,7 @@ static void ilk_compute_wm_results(struct drm_i915_private *dev_priv, * Always set WM_LP_SPRITE_EN when spr_val != 0, even if the * level is disabled. Doing otherwise could cause underruns. */ - if (DISPLAY_VER(dev_priv) <= 6 && r->spr_val) { + if (DISPLAY_VER(dev_priv) < 7 && r->spr_val) { drm_WARN_ON(&dev_priv->drm, wm_lp != 1); results->wm_lp_spr[wm_lp - 1] |= WM_LP_SPRITE_ENABLE; } diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index bef96db62c80..7f2a50b4f494 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c +++ b/drivers/gpu/drm/i915/display/intel_bw.c @@ -87,7 +87,8 @@ static int icl_pcode_read_qgv_point_info(struct drm_i915_private *dev_priv, return ret; dclk = val & 0xffff; - sp->dclk = DIV_ROUND_UP((16667 * dclk) + (DISPLAY_VER(dev_priv) > 11 ? 500 : 0), 1000); + sp->dclk = DIV_ROUND_UP((16667 * dclk) + (DISPLAY_VER(dev_priv) >= 12 ? 500 : 0), + 1000); sp->t_rp = (val & 0xff0000) >> 16; sp->t_rcd = (val & 0xff000000) >> 24; @@ -480,7 +481,7 @@ static int tgl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel if (num_channels < qi.max_numchannels && DISPLAY_VER(dev_priv) >= 12) qi.deinterleave = max(DIV_ROUND_UP(qi.deinterleave, 2), 1); - if (DISPLAY_VER(dev_priv) > 11 && num_channels > qi.max_numchannels) + if (DISPLAY_VER(dev_priv) >= 12 && num_channels > qi.max_numchannels) drm_warn(&dev_priv->drm, "Number of channels exceeds max number of channels."); if (qi.max_numchannels != 0) num_channels = min_t(u8, num_channels, qi.max_numchannels); @@ -897,7 +898,7 @@ static int icl_find_qgv_points(struct drm_i915_private *i915, unsigned int idx; unsigned int max_data_rate; - if (DISPLAY_VER(i915) > 11) + if (DISPLAY_VER(i915) >= 12) idx = tgl_max_bw_index(i915, num_active_planes, i); else idx = icl_max_bw_index(i915, num_active_planes, i); diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index b93d1ad7936d..8bb6bab7c8cd 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -2597,7 +2597,7 @@ static int intel_vdsc_min_cdclk(const struct intel_crtc_state *crtc_state) * Since PPC = 2 with bigjoiner * => CDCLK >= compressed_bpp * Pixel clock / 2 * Bigjoiner Interface bits */ - int bigjoiner_interface_bits = DISPLAY_VER(i915) > 13 ? 36 : 24; + int bigjoiner_interface_bits = DISPLAY_VER(i915) >= 14 ? 36 : 24; int min_cdclk_bj = (to_bpp_int_roundup(crtc_state->dsc.compressed_bpp_x16) * pixel_clock) / (2 * bigjoiner_interface_bits); diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 23b077f43614..9dc22fc8b3d3 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -2627,7 +2627,7 @@ static void intel_set_transcoder_timings(const struct intel_crtc_state *crtc_sta crtc_vblank_start = 1; } - if (DISPLAY_VER(dev_priv) > 3) + if (DISPLAY_VER(dev_priv) >= 4) intel_de_write(dev_priv, TRANS_VSYNCSHIFT(cpu_transcoder), vsyncshift); @@ -3167,7 +3167,7 @@ static void bdw_set_pipe_misc(const struct intel_crtc_state *crtc_state) break; case 36: /* Port output 12BPC defined for ADLP+ */ - if (DISPLAY_VER(dev_priv) > 12) + if (DISPLAY_VER(dev_priv) >= 13) val |= PIPE_MISC_BPC_12_ADLP; break; default: @@ -3224,7 +3224,7 @@ int bdw_get_pipe_misc_bpp(struct intel_crtc *crtc) * MIPI DSI HW readout. */ case PIPE_MISC_BPC_12_ADLP: - if (DISPLAY_VER(dev_priv) > 12) + if (DISPLAY_VER(dev_priv) >= 13) return 36; fallthrough; default: @@ -7763,7 +7763,7 @@ enum drm_mode_status intel_cpu_transcoder_mode_valid(struct drm_i915_private *de * Cantiga+ cannot handle modes with a hsync front porch of 0. * WaPruneModeWithIncorrectHsyncOffset:ctg,elk,ilk,snb,ivb,vlv,hsw. */ - if ((DISPLAY_VER(dev_priv) > 4 || IS_G4X(dev_priv)) && + if ((DISPLAY_VER(dev_priv) >= 5 || IS_G4X(dev_priv)) && mode->hsync_start == mode->hdisplay) return MODE_H_ILLEGAL; diff --git a/drivers/gpu/drm/i915/display/intel_display_device.h b/drivers/gpu/drm/i915/display/intel_display_device.h index 4299cc452e05..79e9f1c3e241 100644 --- a/drivers/gpu/drm/i915/display/intel_display_device.h +++ b/drivers/gpu/drm/i915/display/intel_display_device.h @@ -49,7 +49,7 @@ struct drm_printer; #define HAS_DSC(__i915) (DISPLAY_RUNTIME_INFO(__i915)->has_dsc) #define HAS_FBC(i915) (DISPLAY_RUNTIME_INFO(i915)->fbc_mask != 0) #define HAS_FPGA_DBG_UNCLAIMED(i915) (DISPLAY_INFO(i915)->has_fpga_dbg) -#define HAS_FW_BLC(i915) (DISPLAY_VER(i915) > 2) +#define HAS_FW_BLC(i915) (DISPLAY_VER(i915) >= 3) #define HAS_GMBUS_IRQ(i915) (DISPLAY_VER(i915) >= 4) #define HAS_GMBUS_BURST_READ(i915) (DISPLAY_VER(i915) >= 10 || IS_KABYLAKE(i915)) #define HAS_GMCH(i915) (DISPLAY_INFO(i915)->has_gmch) diff --git a/drivers/gpu/drm/i915/display/intel_display_irq.c b/drivers/gpu/drm/i915/display/intel_display_irq.c index bff4a76310c0..5a3e10e73e59 100644 --- a/drivers/gpu/drm/i915/display/intel_display_irq.c +++ b/drivers/gpu/drm/i915/display/intel_display_irq.c @@ -1653,7 +1653,7 @@ void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) else if (HAS_PCH_SPLIT(dev_priv)) ibx_irq_postinstall(dev_priv); - if (DISPLAY_VER(dev_priv) <= 10) + if (DISPLAY_VER(dev_priv) < 11) de_misc_masked |= GEN8_DE_MISC_GSE; if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 68b2e69dca39..0b24c0cba94e 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -1890,7 +1890,7 @@ static int dsc_src_max_compressed_bpp(struct intel_dp *intel_dp) * Max Compressed bpp for Gen 13+ is 27bpp. * For earlier platform is 23bpp. (Bspec:49259). */ - if (DISPLAY_VER(i915) <= 12) + if (DISPLAY_VER(i915) < 13) return 23; else return 27; diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index b665fe6ef871..e8940acea8ad 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -54,7 +54,7 @@ static int intel_dp_mst_check_constraints(struct drm_i915_private *i915, int bpp struct intel_crtc_state *crtc_state, bool dsc) { - if (intel_dp_is_uhbr(crtc_state) && DISPLAY_VER(i915) <= 13 && dsc) { + if (intel_dp_is_uhbr(crtc_state) && DISPLAY_VER(i915) < 14 && dsc) { int output_bpp = bpp; /* DisplayPort 2 128b/132b, bits per lane is always 32 */ int symbol_clock = crtc_state->port_clock / 32; diff --git a/drivers/gpu/drm/i915/display/intel_lvds.c b/drivers/gpu/drm/i915/display/intel_lvds.c index 958b6fd0d741..221f5c6c871b 100644 --- a/drivers/gpu/drm/i915/display/intel_lvds.c +++ b/drivers/gpu/drm/i915/display/intel_lvds.c @@ -185,7 +185,7 @@ static void intel_lvds_pps_get_hw_state(struct drm_i915_private *dev_priv, /* Convert from 100ms to 100us units */ pps->t4 = val * 1000; - if (DISPLAY_VER(dev_priv) <= 4 && + if (DISPLAY_VER(dev_priv) < 5 && pps->t1_t2 == 0 && pps->t5 == 0 && pps->t3 == 0 && pps->tx == 0) { drm_dbg_kms(&dev_priv->drm, "Panel power timings uninitialized, " diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 36e4a1e9b98f..6029bb71276c 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -806,10 +806,10 @@ static void hsw_activate_psr2(struct intel_dp *intel_dp) val |= EDP_PSR2_IDLE_FRAMES(psr_compute_idle_frames(intel_dp)); - if (DISPLAY_VER(dev_priv) <= 13 && !IS_ALDERLAKE_P(dev_priv)) + if (DISPLAY_VER(dev_priv) < 14 && !IS_ALDERLAKE_P(dev_priv)) val |= EDP_SU_TRACK_ENABLE; - if (DISPLAY_VER(dev_priv) >= 10 && DISPLAY_VER(dev_priv) <= 12) + if (DISPLAY_VER(dev_priv) >= 10 && DISPLAY_VER(dev_priv) < 13) val |= EDP_Y_COORDINATE_ENABLE; val |= EDP_PSR2_FRAME_BEFORE_SU(frames_before_su_entry(intel_dp)); @@ -1094,7 +1094,7 @@ static bool _compute_psr2_sdp_prior_scanline_indication(struct intel_dp *intel_d return true; /* Not supported <13 / Wa_22012279113:adl-p */ - if (DISPLAY_VER(dev_priv) <= 13 || intel_dp->edp_dpcd[0] < DP_EDP_14b) + if (DISPLAY_VER(dev_priv) < 14 || intel_dp->edp_dpcd[0] < DP_EDP_14b) return false; crtc_state->req_psr2_sdp_prior_scanline = true; @@ -1221,7 +1221,7 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp, * over PSR2. */ if (crtc_state->dsc.compression_enable && - (DISPLAY_VER(dev_priv) <= 13 && !IS_ALDERLAKE_P(dev_priv))) { + (DISPLAY_VER(dev_priv) < 14 && !IS_ALDERLAKE_P(dev_priv))) { drm_dbg_kms(&dev_priv->drm, "PSR2 cannot be enabled since DSC is enabled\n"); return false; -- cgit v1.2.3 From fc1ccc16271a0526518f19f460fed63d575a8a42 Mon Sep 17 00:00:00 2001 From: xiazhengqiao Date: Wed, 29 Nov 2023 16:41:15 +0800 Subject: drm/panel: starry-2081101qfh032011-53g: Fine tune the panel power sequence For the "starry, 2081101qfh032011-53g" panel, it is stipulated in the panel spec that MIPI needs to keep the LP11 state before the lcm_reset pin is pulled high. Fixes: 6069b66cd962 ("drm/panel: support for STARRY 2081101QFH032011-53G MIPI-DSI panel") Signed-off-by: xiazhengqiao Reviewed-by: Jessica Zhang Link: https://lore.kernel.org/r/20231129084115.7918-1-xiazhengqiao@huaqin.corp-partner.google.com Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20231129084115.7918-1-xiazhengqiao@huaqin.corp-partner.google.com --- drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c index be8f48e3c1db..c4c0f08e9202 100644 --- a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c +++ b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c @@ -1764,6 +1764,7 @@ static const struct panel_desc starry_qfh032011_53g_desc = { .mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_SYNC_PULSE | MIPI_DSI_MODE_LPM, .init_cmds = starry_qfh032011_53g_init_cmd, + .lp11_before_reset = true, }; static const struct drm_display_mode starry_himax83102_j02_default_mode = { -- cgit v1.2.3 From fb18fe0fdf22a2f4512a8b644bb5ea1473829cda Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Wed, 29 Nov 2023 17:07:15 +0800 Subject: drm/panel: nt36523: fix return value check in nt36523_probe() mipi_dsi_device_register_full() never returns NULL pointer, it will return ERR_PTR() when it fails, so replace the check with IS_ERR(). Fixes: 0993234a0045 ("drm/panel: Add driver for Novatek NT36523") Signed-off-by: Yang Yingliang Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20231129090715.856263-1-yangyingliang@huaweicloud.com Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20231129090715.856263-1-yangyingliang@huaweicloud.com --- drivers/gpu/drm/panel/panel-novatek-nt36523.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/panel/panel-novatek-nt36523.c b/drivers/gpu/drm/panel/panel-novatek-nt36523.c index 9b9a7eb1bc60..a189ce236328 100644 --- a/drivers/gpu/drm/panel/panel-novatek-nt36523.c +++ b/drivers/gpu/drm/panel/panel-novatek-nt36523.c @@ -1254,9 +1254,9 @@ static int nt36523_probe(struct mipi_dsi_device *dsi) return dev_err_probe(dev, -EPROBE_DEFER, "cannot get secondary DSI host\n"); pinfo->dsi[1] = mipi_dsi_device_register_full(dsi1_host, info); - if (!pinfo->dsi[1]) { + if (IS_ERR(pinfo->dsi[1])) { dev_err(dev, "cannot get secondary DSI device\n"); - return -ENODEV; + return PTR_ERR(pinfo->dsi[1]); } } -- cgit v1.2.3 From fd2096500acb8b57a66a75ec7985049a5650cff1 Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Sun, 26 Nov 2023 21:42:01 +0000 Subject: drm/i915/irq: Improve error logging for unexpected DE Misc interrupts Dump the iir value in hex when the interrupt is unexpected. Link: https://gitlab.freedesktop.org/drm/intel/-/issues/9652#note_2178501 Cc: Jani Nikula Signed-off-by: Rahul Rameshbabu Reviewed-by: Chaitanya Kumar Borah Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20231126214142.102106-1-sergeantsagara@protonmail.com --- drivers/gpu/drm/i915/display/intel_display_irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_display_irq.c b/drivers/gpu/drm/i915/display/intel_display_irq.c index 5a3e10e73e59..f8ed53f30b2e 100644 --- a/drivers/gpu/drm/i915/display/intel_display_irq.c +++ b/drivers/gpu/drm/i915/display/intel_display_irq.c @@ -896,7 +896,7 @@ gen8_de_misc_irq_handler(struct drm_i915_private *dev_priv, u32 iir) } if (!found) - drm_err(&dev_priv->drm, "Unexpected DE Misc interrupt\n"); + drm_err(&dev_priv->drm, "Unexpected DE Misc interrupt: 0x%08x\n", iir); } static void gen11_dsi_te_interrupt_handler(struct drm_i915_private *dev_priv, -- cgit v1.2.3 From ef32c3cc9c62252986f09e06b4e525742cd91529 Mon Sep 17 00:00:00 2001 From: heminhong Date: Tue, 14 Nov 2023 10:43:41 +0800 Subject: drm/i915: correct the input parameter on _intel_dsb_commit() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Current, the dewake_scanline variable is defined as unsigned int, an unsigned int variable that is always greater than or equal to 0. when _intel_dsb_commit function is called by intel_dsb_commit function, the dewake_scanline variable may have an int value. So the dewake_scanline variable is necessary to defined as an int. Fixes: f83b94d23770 ("drm/i915/dsb: Use DEwake to combat PkgC latency") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202310052201.AnVbpgPr-lkp@intel.com/ Cc: Ville Syrjälä Cc: Uma Shankar Signed-off-by: heminhong Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20231114024341.14524-1-heminhong@kylinos.cn --- drivers/gpu/drm/i915/display/intel_dsb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c index 9598d50f68f2..482c28b5c2de 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.c +++ b/drivers/gpu/drm/i915/display/intel_dsb.c @@ -341,7 +341,7 @@ static int intel_dsb_dewake_scanline(const struct intel_crtc_state *crtc_state) } static void _intel_dsb_commit(struct intel_dsb *dsb, u32 ctrl, - unsigned int dewake_scanline) + int dewake_scanline) { struct intel_crtc *crtc = dsb->crtc; struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); -- cgit v1.2.3 From 0f82a1b94862da255ac791e11f2c3610f5ad5f26 Mon Sep 17 00:00:00 2001 From: Balasubramani Vivekanandan Date: Tue, 28 Nov 2023 15:54:51 +0530 Subject: drm/i915/display: Fix IP version of the WAs WAs 14011508470, 14011503030 were applied on IP versions beyond which they are applicable. Fixed the IP version checks for these workarounds. Signed-off-by: Balasubramani Vivekanandan Reviewed-by: Gustavo Sousa Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20231128102451.825242-1-balasubramani.vivekanandan@intel.com --- drivers/gpu/drm/i915/display/intel_display_power.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index e390595d7341..f23080a4368d 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -1697,14 +1697,14 @@ static void icl_display_core_init(struct drm_i915_private *dev_priv, if (resume) intel_dmc_load_program(dev_priv); - /* Wa_14011508470:tgl,dg1,rkl,adl-s,adl-p */ - if (DISPLAY_VER(dev_priv) >= 12) + /* Wa_14011508470:tgl,dg1,rkl,adl-s,adl-p,dg2 */ + if (IS_DISPLAY_IP_RANGE(dev_priv, IP_VER(12, 0), IP_VER(13, 0))) intel_de_rmw(dev_priv, GEN11_CHICKEN_DCPR_2, 0, DCPR_CLEAR_MEMSTAT_DIS | DCPR_SEND_RESP_IMM | DCPR_MASK_LPMODE | DCPR_MASK_MAXLATENCY_MEMUP_CLR); /* Wa_14011503030:xelpd */ - if (DISPLAY_VER(dev_priv) >= 13) + if (DISPLAY_VER(dev_priv) == 13) intel_de_write(dev_priv, XELPD_DISPLAY_ERR_FATAL_MASK, ~0); } -- cgit v1.2.3 From 613ecd6563d2716192e69624105fe1939d104663 Mon Sep 17 00:00:00 2001 From: André Almeida Date: Fri, 10 Nov 2023 12:23:28 -0500 Subject: drm/amd: Document device reset methods MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Document what each amdgpu driver reset method does. Signed-off-by: André Almeida Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 4d100f8d75bf..ea2656bd714f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -508,6 +508,31 @@ struct amdgpu_allowed_register_entry { bool grbm_indexed; }; +/** + * enum amd_reset_method - Methods for resetting AMD GPU devices + * + * @AMD_RESET_METHOD_NONE: The device will not be reset. + * @AMD_RESET_LEGACY: Method reserved for SI, CIK and VI ASICs. + * @AMD_RESET_MODE0: Reset the entire ASIC. Not currently available for the + * any device. + * @AMD_RESET_MODE1: Resets all IP blocks on the ASIC (SDMA, GFX, VCN, etc.) + * individually. Suitable only for some discrete GPU, not + * available for all ASICs. + * @AMD_RESET_MODE2: Resets a lesser level of IPs compared to MODE1. Which IPs + * are reset depends on the ASIC. Notably doesn't reset IPs + * shared with the CPU on APUs or the memory controllers (so + * VRAM is not lost). Not available on all ASICs. + * @AMD_RESET_BACO: BACO (Bus Alive, Chip Off) method powers off and on the card + * but without powering off the PCI bus. Suitable only for + * discrete GPUs. + * @AMD_RESET_PCI: Does a full bus reset using core Linux subsystem PCI reset + * and does a secondary bus reset or FLR, depending on what the + * underlying hardware supports. + * + * Methods available for AMD GPU driver for resetting the device. Not all + * methods are suitable for every device. User can override the method using + * module parameter `reset_method`. + */ enum amd_reset_method { AMD_RESET_METHOD_NONE = -1, AMD_RESET_METHOD_LEGACY = 0, -- cgit v1.2.3 From 534eee82356c220649dc9c2ea90099f39fb1cb62 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Sun, 12 Nov 2023 09:30:51 +0530 Subject: drm/amd/display: Remove redundant DRM device struct in amdgpu_dm_, mst_types.c Declaration of 'struct drm_device *dev' is redundant, as 'connector->dev' & 'dev_get_drvdata(kdev)' can be directly passed to 'drm_to_adev', without any intermediate DRM device 'dev' variable Cc: Roman Li Cc: Hamza Mahfooz Cc: Harry Wentland Cc: Rodrigo Siqueira Cc: Aurabindo Pillai Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Reviewed-by: Roman Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 9 +++------ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 3 +-- 2 files changed, 4 insertions(+), 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index ee97814ebd99..ba3f7b232a16 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -894,8 +894,7 @@ static int dm_early_init(void *handle); /* Allocate memory for FBC compressed data */ static void amdgpu_dm_fbc_init(struct drm_connector *connector) { - struct drm_device *dev = connector->dev; - struct amdgpu_device *adev = drm_to_adev(dev); + struct amdgpu_device *adev = drm_to_adev(connector->dev); struct dm_compressor_info *compressor = &adev->dm.compressor; struct amdgpu_dm_connector *aconn = to_amdgpu_dm_connector(connector); struct drm_display_mode *mode; @@ -989,8 +988,7 @@ static int amdgpu_dm_audio_component_bind(struct device *kdev, static void amdgpu_dm_audio_component_unbind(struct device *kdev, struct device *hda_kdev, void *data) { - struct drm_device *dev = dev_get_drvdata(kdev); - struct amdgpu_device *adev = drm_to_adev(dev); + struct amdgpu_device *adev = drm_to_adev(dev_get_drvdata(kdev)); struct drm_audio_component *acomp = data; acomp->ops = NULL; @@ -10788,8 +10786,7 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, struct dm_connector_state *dm_con_state = NULL; struct dc_sink *sink; - struct drm_device *dev = connector->dev; - struct amdgpu_device *adev = drm_to_adev(dev); + struct amdgpu_device *adev = drm_to_adev(connector->dev); struct amdgpu_hdmi_vsdb_info vsdb_info = {0}; bool freesync_capable = false; enum adaptive_sync_type as_type = ADAPTIVE_SYNC_TYPE_NONE; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 8d7d4024f285..a6995688d7ad 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -424,8 +424,7 @@ dm_mst_atomic_best_encoder(struct drm_connector *connector, { struct drm_connector_state *connector_state = drm_atomic_get_new_connector_state(state, connector); - struct drm_device *dev = connector->dev; - struct amdgpu_device *adev = drm_to_adev(dev); + struct amdgpu_device *adev = drm_to_adev(connector->dev); struct amdgpu_crtc *acrtc = to_amdgpu_crtc(connector_state->crtc); return &adev->dm.mst_encoders[acrtc->crtc_id].base; -- cgit v1.2.3 From 12c2d3b5f5bc4ecb470a4bc06424914c145e8c03 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Fri, 29 Sep 2023 10:51:02 +0530 Subject: drm/amd/pm: Add support to fetch pm metrics sample Add API support to fetch a snapshot of power management metrics from PMFW. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/include/kgd_pp_interface.h | 1 + drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 17 +++++++++++++++++ drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h | 12 ++++++++++++ drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 15 +++++++++++++++ drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 10 ++++++++++ drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h | 3 ++- 6 files changed, 57 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index f9c438d16c56..6a1d31e8fdd1 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -431,6 +431,7 @@ struct amd_pm_funcs { int (*set_df_cstate)(void *handle, enum pp_df_cstate state); int (*set_xgmi_pstate)(void *handle, uint32_t pstate); ssize_t (*get_gpu_metrics)(void *handle, void **table); + ssize_t (*get_pm_metrics)(void *handle, void *pmmetrics, size_t size); int (*set_watermarks_for_clock_ranges)(void *handle, struct pp_smu_wm_range_sets *ranges); int (*display_disable_memory_clock_switch)(void *handle, diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index 1ae3b81548fa..4717884a2bc4 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -1299,6 +1299,23 @@ int amdgpu_dpm_get_gpu_metrics(struct amdgpu_device *adev, void **table) return ret; } +ssize_t amdgpu_dpm_get_pm_metrics(struct amdgpu_device *adev, void *pm_metrics, + size_t size) +{ + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + int ret = 0; + + if (!pp_funcs->get_pm_metrics) + return -EOPNOTSUPP; + + mutex_lock(&adev->pm.mutex); + ret = pp_funcs->get_pm_metrics(adev->powerplay.pp_handle, pm_metrics, + size); + mutex_unlock(&adev->pm.mutex); + + return ret; +} + int amdgpu_dpm_get_fan_control_mode(struct amdgpu_device *adev, uint32_t *fan_mode) { diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h index feccd2a7120d..9b3fef5474b1 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h @@ -511,6 +511,18 @@ int amdgpu_dpm_get_power_profile_mode(struct amdgpu_device *adev, int amdgpu_dpm_set_power_profile_mode(struct amdgpu_device *adev, long *input, uint32_t size); int amdgpu_dpm_get_gpu_metrics(struct amdgpu_device *adev, void **table); + +/** + * @get_pm_metrics: Get one snapshot of power management metrics from PMFW. The + * sample is copied to pm_metrics buffer. It's expected to be allocated by the + * caller and size of the allocated buffer is passed. Max size expected for a + * metrics sample is 4096 bytes. + * + * Return: Actual size of the metrics sample + */ +ssize_t amdgpu_dpm_get_pm_metrics(struct amdgpu_device *adev, void *pm_metrics, + size_t size); + int amdgpu_dpm_get_fan_control_mode(struct amdgpu_device *adev, uint32_t *fan_mode); int amdgpu_dpm_set_fan_speed_pwm(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 37c2605f9b35..930e7e3532ad 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -3189,6 +3189,20 @@ static ssize_t smu_sys_get_gpu_metrics(void *handle, void **table) return smu->ppt_funcs->get_gpu_metrics(smu, table); } +static ssize_t smu_sys_get_pm_metrics(void *handle, void *pm_metrics, + size_t size) +{ + struct smu_context *smu = handle; + + if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled) + return -EOPNOTSUPP; + + if (!smu->ppt_funcs->get_pm_metrics) + return -EOPNOTSUPP; + + return smu->ppt_funcs->get_pm_metrics(smu, pm_metrics, size); +} + static int smu_enable_mgpu_fan_boost(void *handle) { struct smu_context *smu = handle; @@ -3330,6 +3344,7 @@ static const struct amd_pm_funcs swsmu_pm_funcs = { .set_df_cstate = smu_set_df_cstate, .set_xgmi_pstate = smu_set_xgmi_pstate, .get_gpu_metrics = smu_sys_get_gpu_metrics, + .get_pm_metrics = smu_sys_get_pm_metrics, .set_watermarks_for_clock_ranges = smu_set_watermarks_for_clock_ranges, .display_disable_memory_clock_switch = smu_display_disable_memory_clock_switch, .get_max_sustainable_clocks_by_dc = smu_get_max_sustainable_clocks_by_dc, diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index 8def291b18bc..32600ba74bf1 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -253,6 +253,7 @@ struct smu_table { uint64_t mc_address; void *cpu_addr; struct amdgpu_bo *bo; + uint32_t version; }; enum smu_perf_level_designation { @@ -1252,6 +1253,15 @@ struct pptable_funcs { */ ssize_t (*get_gpu_metrics)(struct smu_context *smu, void **table); + /** + * @get_pm_metrics: Get one snapshot of power management metrics from + * PMFW. + * + * Return: Size of the metrics sample + */ + ssize_t (*get_pm_metrics)(struct smu_context *smu, void *pm_metrics, + size_t size); + /** * @enable_mgpu_fan_boost: Enable multi-GPU fan boost. */ diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h index 9dd47d91093e..ea8ea99b7436 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h @@ -259,7 +259,8 @@ __SMU_DUMMY_MAP(PowerUpUmsch), \ __SMU_DUMMY_MAP(PowerDownUmsch), \ __SMU_DUMMY_MAP(SetSoftMaxVpe), \ - __SMU_DUMMY_MAP(SetSoftMinVpe), + __SMU_DUMMY_MAP(SetSoftMinVpe), \ + __SMU_DUMMY_MAP(GetMetricsVersion), #undef __SMU_DUMMY_MAP #define __SMU_DUMMY_MAP(type) SMU_MSG_##type -- cgit v1.2.3 From f9a45b76a1883b081fbe15466b11d0264e85d372 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Fri, 29 Sep 2023 11:35:19 +0530 Subject: drm/amd/pm: Add pm metrics support to SMU v13.0.6 Add support to fetch PM metrics sample from SMU v13.0.6 Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 47 ++++++++++++++++++++++ 1 file changed, 47 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index f723a4190ee5..8ccb8354bb49 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -120,6 +120,7 @@ struct mca_ras_info { #define P2S_TABLE_ID_A 0x50325341 #define P2S_TABLE_ID_X 0x50325358 +// clang-format off static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COUNT] = { MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 0), MSG_MAP(GetSmuVersion, PPSMC_MSG_GetSmuVersion, 1), @@ -128,6 +129,7 @@ static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COU MSG_MAP(DisableAllSmuFeatures, PPSMC_MSG_DisableAllSmuFeatures, 0), MSG_MAP(RequestI2cTransaction, PPSMC_MSG_RequestI2cTransaction, 0), MSG_MAP(GetMetricsTable, PPSMC_MSG_GetMetricsTable, 1), + MSG_MAP(GetMetricsVersion, PPSMC_MSG_GetMetricsVersion, 1), MSG_MAP(GetEnabledSmuFeaturesHigh, PPSMC_MSG_GetEnabledSmuFeaturesHigh, 1), MSG_MAP(GetEnabledSmuFeaturesLow, PPSMC_MSG_GetEnabledSmuFeaturesLow, 1), MSG_MAP(SetDriverDramAddrHigh, PPSMC_MSG_SetDriverDramAddrHigh, 1), @@ -171,6 +173,7 @@ static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COU MSG_MAP(SelectPLPDMode, PPSMC_MSG_SelectPLPDMode, 0), }; +// clang-format on static const struct cmn2asic_mapping smu_v13_0_6_clk_map[SMU_CLK_COUNT] = { CLK_MAP(SOCCLK, PPCLK_SOCCLK), CLK_MAP(FCLK, PPCLK_FCLK), @@ -428,6 +431,41 @@ static int smu_v13_0_6_get_metrics_table(struct smu_context *smu, return 0; } +static ssize_t smu_v13_0_6_get_pm_metrics(struct smu_context *smu, + void *metrics, size_t max_size) +{ + struct smu_table_context *smu_tbl_ctxt = &smu->smu_table; + uint32_t table_version = smu_tbl_ctxt->tables[SMU_TABLE_SMU_METRICS].version; + uint32_t table_size = smu_tbl_ctxt->tables[SMU_TABLE_SMU_METRICS].size; + struct amdgpu_pm_metrics *pm_metrics = metrics; + uint32_t pmfw_version; + int ret; + + if (!pm_metrics || !max_size) + return -EINVAL; + + if (max_size < (table_size + sizeof(pm_metrics->common_header))) + return -EOVERFLOW; + + /* Don't use cached metrics data */ + ret = smu_v13_0_6_get_metrics_table(smu, pm_metrics->data, true); + if (ret) + return ret; + + smu_cmn_get_smc_version(smu, NULL, &pmfw_version); + + memset(&pm_metrics->common_header, 0, + sizeof(pm_metrics->common_header)); + pm_metrics->common_header.mp1_ip_discovery_version = + IP_VERSION(13, 0, 6); + pm_metrics->common_header.pmfw_version = pmfw_version; + pm_metrics->common_header.pmmetrics_version = table_version; + pm_metrics->common_header.structure_size = + sizeof(pm_metrics->common_header) + table_size; + + return pm_metrics->common_header.structure_size; +} + static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu) { struct smu_table_context *smu_table = &smu->smu_table; @@ -435,6 +473,7 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu) struct PPTable_t *pptable = (struct PPTable_t *)smu_table->driver_pptable; int ret, i, retry = 100; + uint32_t table_version; /* Store one-time values in driver PPTable */ if (!pptable->Init) { @@ -453,6 +492,13 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu) if (!retry) return -ETIME; + ret = smu_cmn_send_smc_msg(smu, SMU_MSG_GetMetricsVersion, + &table_version); + if (ret) + return ret; + smu_table->tables[SMU_TABLE_SMU_METRICS].version = + table_version; + pptable->MaxSocketPowerLimit = SMUQ10_ROUND(metrics->MaxSocketPowerLimit); pptable->MaxGfxclkFrequency = @@ -2864,6 +2910,7 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = { .log_thermal_throttling_event = smu_v13_0_6_log_thermal_throttling_event, .get_pp_feature_mask = smu_cmn_get_pp_feature_mask, .get_gpu_metrics = smu_v13_0_6_get_gpu_metrics, + .get_pm_metrics = smu_v13_0_6_get_pm_metrics, .get_thermal_temperature_range = smu_v13_0_6_get_thermal_temperature_range, .mode1_reset_is_support = smu_v13_0_6_is_mode1_reset_supported, .mode2_reset_is_support = smu_v13_0_6_is_mode2_reset_supported, -- cgit v1.2.3 From 223aad1be34e1169ee7210bce05726cc5ef1fd66 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Fri, 29 Sep 2023 11:56:49 +0530 Subject: drm/amd/pm: Add sysfs attribute to get pm metrics Add sysfs attribute to read power management metrics. A snapshot is captured to the buffer when the attribute is read. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/amdgpu_pm.c | 40 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index ca2ece24e1e0..e1497296afee 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -1799,6 +1799,44 @@ static ssize_t amdgpu_set_apu_thermal_cap(struct device *dev, return count; } +static int amdgpu_pm_metrics_attr_update(struct amdgpu_device *adev, + struct amdgpu_device_attr *attr, + uint32_t mask, + enum amdgpu_device_attr_states *states) +{ + if (amdgpu_dpm_get_pm_metrics(adev, NULL, 0) == -EOPNOTSUPP) + *states = ATTR_STATE_UNSUPPORTED; + + return 0; +} + +static ssize_t amdgpu_get_pm_metrics(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + ssize_t size = 0; + int ret; + + if (amdgpu_in_reset(adev)) + return -EPERM; + if (adev->in_suspend && !adev->in_runpm) + return -EPERM; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) { + pm_runtime_put_autosuspend(ddev->dev); + return ret; + } + + size = amdgpu_dpm_get_pm_metrics(adev, buf, PAGE_SIZE); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + return size; +} + /** * DOC: gpu_metrics * @@ -2096,6 +2134,8 @@ static struct amdgpu_device_attr amdgpu_device_attrs[] = { AMDGPU_DEVICE_ATTR_RW(smartshift_bias, ATTR_FLAG_BASIC, .attr_update = ss_bias_attr_update), AMDGPU_DEVICE_ATTR_RW(xgmi_plpd_policy, ATTR_FLAG_BASIC), + AMDGPU_DEVICE_ATTR_RO(pm_metrics, ATTR_FLAG_BASIC, + .attr_update = amdgpu_pm_metrics_attr_update), }; static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_attr *attr, -- cgit v1.2.3 From cee6de122461de699aaa7932b33466c6d259eabb Mon Sep 17 00:00:00 2001 From: Dennis Chan Date: Fri, 27 Oct 2023 11:00:36 +0800 Subject: drm/amd/display: Add new Replay command and Disabled Replay Timing Resync [why] To support dynamic switching for Replay timing sync mechanism. Reviewed-by: ChunTao Tso Acked-by: Hamza Mahfooz Signed-off-by: Dennis Chan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_types.h | 10 +++++++++ drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h | 3 +++ drivers/gpu/drm/amd/display/dc/inc/link.h | 3 +++ drivers/gpu/drm/amd/display/dc/link/link_factory.c | 1 + .../dc/link/protocols/link_edp_panel_control.c | 24 ++++++++++++++++++++++ .../dc/link/protocols/link_edp_panel_control.h | 3 +++ 6 files changed, 44 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index fcb825e4f1bb..edf60c4f318c 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -1022,6 +1022,16 @@ enum replay_coasting_vtotal_type { PR_COASTING_TYPE_NUM, }; +/* + * This is general Interface for Replay to + * set an 32 bit variable to dmub + * The Message_type indicates which variable + * passed to DMUB. + */ +enum replay_FW_Message_type { + Replay_Set_Timing_Sync_Supported, +}; + union replay_error_status { struct { unsigned char STATE_TRANSITION_ERROR :1; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h index e8385bbf51fc..427bc47a676e 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h @@ -45,6 +45,9 @@ struct dmub_replay_funcs { struct replay_context *replay_context, uint8_t panel_inst); void (*replay_set_power_opt)(struct dmub_replay *dmub, unsigned int power_opt, uint8_t panel_inst); + void (*replay_send_cmd)(struct dmub_replay *dmub, + enum replay_FW_Message_type msg, unsigned int panel_inst, + uint32_t cmd_data); void (*replay_set_coasting_vtotal)(struct dmub_replay *dmub, uint16_t coasting_vtotal, uint8_t panel_inst); void (*replay_residency)(struct dmub_replay *dmub, diff --git a/drivers/gpu/drm/amd/display/dc/inc/link.h b/drivers/gpu/drm/amd/display/dc/inc/link.h index d7685368140a..dd3f53151d8b 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/link.h +++ b/drivers/gpu/drm/amd/display/dc/inc/link.h @@ -281,6 +281,9 @@ struct link_service { const unsigned int *power_opts); bool (*edp_setup_replay)(struct dc_link *link, const struct dc_stream_state *stream); + bool (*edp_send_replay_cmd)(struct dc_link *link, + enum replay_FW_Message_type msg, + uint32_t params); bool (*edp_set_coasting_vtotal)( struct dc_link *link, uint16_t coasting_vtotal); bool (*edp_replay_residency)(const struct dc_link *link, diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.c b/drivers/gpu/drm/amd/display/dc/link/link_factory.c index 7abfc67d10a6..6b306ea58b9b 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_factory.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.c @@ -213,6 +213,7 @@ static void construct_link_service_edp_panel_control(struct link_service *link_s link_srv->edp_get_replay_state = edp_get_replay_state; link_srv->edp_set_replay_allow_active = edp_set_replay_allow_active; link_srv->edp_setup_replay = edp_setup_replay; + link_srv->edp_send_replay_cmd = edp_send_replay_cmd; link_srv->edp_set_coasting_vtotal = edp_set_coasting_vtotal; link_srv->edp_replay_residency = edp_replay_residency; diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c index e32a7974a4bc..c52b51b2b4b3 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c @@ -1007,6 +1007,30 @@ bool edp_setup_replay(struct dc_link *link, const struct dc_stream_state *stream return true; } +/* + * This is general Interface for Replay to set an 32 bit variable to dmub + * replay_FW_Message_type: Indicates which instruction or variable pass to DMUB + * cmd_data: Value of the config. + */ +bool edp_send_replay_cmd(struct dc_link *link, + enum replay_FW_Message_type msg, + uint32_t cmd_data) +{ + struct dc *dc = link->ctx->dc; + struct dmub_replay *replay = dc->res_pool->replay; + unsigned int panel_inst; + + if (!replay) + return false; + + if (!dc_get_edp_link_panel_inst(dc, link, &panel_inst)) + return false; + + replay->funcs->replay_send_cmd(replay, msg, cmd_data, panel_inst); + + return true; +} + bool edp_set_coasting_vtotal(struct dc_link *link, uint16_t coasting_vtotal) { struct dc *dc = link->ctx->dc; diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h index ebf7deb63d13..6b223580ac8a 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h @@ -57,6 +57,9 @@ bool edp_set_replay_allow_active(struct dc_link *dc_link, const bool *enable, bool wait, bool force_static, const unsigned int *power_opts); bool edp_setup_replay(struct dc_link *link, const struct dc_stream_state *stream); +bool edp_send_replay_cmd(struct dc_link *link, + enum replay_FW_Message_type msg, + uint32_t params); bool edp_set_coasting_vtotal(struct dc_link *link, uint16_t coasting_vtotal); bool edp_replay_residency(const struct dc_link *link, unsigned int *residency, const bool is_start, const bool is_alpm); -- cgit v1.2.3 From 1c22d6ce53280763bcb4cb24d4f71111fff4a526 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Mon, 6 Nov 2023 11:20:15 -0500 Subject: drm/amd/display: Include udelay when waiting for INBOX0 ACK When waiting for the ACK for INBOX0 message, we have to ensure to include the udelay for proper wait time Cc: stable@vger.kernel.org # 6.1+ Reviewed-by: Samson Tam Acked-by: Hamza Mahfooz Signed-off-by: Alvin Lee Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c index 22fc4ba96def..38360adc53d9 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c @@ -1077,6 +1077,7 @@ enum dmub_status dmub_srv_wait_for_inbox0_ack(struct dmub_srv *dmub, uint32_t ti ack = dmub->hw_funcs.read_inbox0_ack_register(dmub); if (ack) return DMUB_STATUS_OK; + udelay(1); } return DMUB_STATUS_TIMEOUT; } -- cgit v1.2.3 From 3f3b08be58834339b00f28d19c20d684cdec704f Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Fri, 3 Nov 2023 10:01:01 -0400 Subject: drm/amd/display: Add z-state support policy for dcn35 [Why] DML2 means that the dcn3x policy for calculating z-state support no longer runs from validate_bandwidth. This means we are unconditionally allowing Z8, the hardware default. [How] Port the policy over to DCN35, but with a few modifications: - Don't use min_dst_y_next_start as a check for Z8/Z10 allow - Add support for overriding the Z10 stutter period per ASIC - Cleanup the code to make the policy assignment more clear Reviewed-by: Charlene Liu Acked-by: Hamza Mahfooz Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 1 + .../gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c | 34 ++++++++++++++++++++++ .../gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.h | 2 ++ .../amd/display/dc/resource/dcn35/dcn35_resource.c | 7 +++++ 4 files changed, 44 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 541e781267b9..349629858205 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -874,6 +874,7 @@ struct dc_debug_options { unsigned int seamless_boot_odm_combine; unsigned int force_odm_combine_4to1; //bit vector based on otg inst int minimum_z8_residency_time; + int minimum_z10_residency_time; bool disable_z9_mpc; unsigned int force_fclk_khz; bool enable_tri_buf; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c index a5fe523668e9..dee80429fc4c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c @@ -507,3 +507,37 @@ int dcn35_populate_dml_pipes_from_context_fpu(struct dc *dc, return pipe_cnt; } + +void dcn35_decide_zstate_support(struct dc *dc, struct dc_state *context) +{ + enum dcn_zstate_support_state support = DCN_ZSTATE_SUPPORT_DISALLOW; + unsigned int i, plane_count = 0; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + if (context->res_ctx.pipe_ctx[i].plane_state) + plane_count++; + } + + if (plane_count == 0) { + support = DCN_ZSTATE_SUPPORT_ALLOW; + } else if (plane_count == 1 && context->stream_count == 1 && context->streams[0]->signal == SIGNAL_TYPE_EDP) { + struct dc_link *link = context->streams[0]->sink->link; + bool is_pwrseq0 = link && link->link_index == 0; + bool is_psr1 = link && link->psr_settings.psr_version == DC_PSR_VERSION_1 && !link->panel_config.psr.disable_psr; + int minmum_z8_residency = + dc->debug.minimum_z8_residency_time > 0 ? dc->debug.minimum_z8_residency_time : 1000; + bool allow_z8 = context->bw_ctx.dml.vba.StutterPeriod > (double)minmum_z8_residency; + int minmum_z10_residency = + dc->debug.minimum_z10_residency_time > 0 ? dc->debug.minimum_z10_residency_time : 5000; + bool allow_z10 = context->bw_ctx.dml.vba.StutterPeriod > (double)minmum_z10_residency; + + if (is_pwrseq0 && allow_z10) + support = DCN_ZSTATE_SUPPORT_ALLOW; + else if (is_pwrseq0 && is_psr1) + support = allow_z8 ? DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY : DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY; + else if (allow_z8) + support = DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY; + } + + context->bw_ctx.bw.dcn.clk.zstate_support = support; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.h index e8d5a170893e..067480fc3691 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.h @@ -39,4 +39,6 @@ int dcn35_populate_dml_pipes_from_context_fpu(struct dc *dc, display_e2e_pipe_params_st *pipes, bool fast_validate); +void dcn35_decide_zstate_support(struct dc *dc, struct dc_state *context); + #endif diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c index 5c935d94a95c..0d5a03c6d812 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c @@ -1724,6 +1724,13 @@ static bool dcn35_validate_bandwidth(struct dc *dc, out = dml2_validate(dc, context, fast_validate); + if (fast_validate) + return out; + + DC_FP_START(); + dcn35_decide_zstate_support(dc, context); + DC_FP_END(); + return out; } -- cgit v1.2.3 From a2d3c69261178df7d4c1350d5ef67375d399acd3 Mon Sep 17 00:00:00 2001 From: David Yat Sin Date: Fri, 17 Nov 2023 05:16:59 +0000 Subject: drm/amdkfd: Copy HW exception data to user event Fixes issue where user events of type KFD_EVENT_TYPE_HW_EXCEPTION do not have valid data Signed-off-by: David Yat Sin Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_events.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 0f58be65132f..739721254a5d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -880,6 +880,10 @@ static int copy_signaled_event_data(uint32_t num_events, dst = &data[i].memory_exception_data; src = &event->memory_exception_data; size = sizeof(struct kfd_hsa_memory_exception_data); + } else if (event->type == KFD_EVENT_TYPE_HW_EXCEPTION) { + dst = &data[i].memory_exception_data; + src = &event->hw_exception_data; + size = sizeof(struct kfd_hsa_hw_exception_data); } else if (event->type == KFD_EVENT_TYPE_SIGNAL && waiter->event_age_enabled) { dst = &data[i].signal_event_data.last_event_age; -- cgit v1.2.3 From 35c425f5cc251417ad681475dc9901ab6d3244ea Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Thu, 16 Nov 2023 13:57:07 -0500 Subject: drm/amdgpu: update xgmi num links info post gc9.4.2 GC IP 9.4.2 and up support TA reporting of the number of xGMI links between peers. Tested-by: Vignesh Chander Signed-off-by: Jonathan Kim Reviewed-by: Mukul Joshi Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 6ab17330a6ed..2d22f7d45512 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -547,7 +547,7 @@ int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct amdgpu_device *dst, struct amdgpu_device *adev = dst, *peer_adev; int num_links; - if (adev->asic_type != CHIP_ALDEBARAN) + if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 4, 2)) return 0; if (src) -- cgit v1.2.3 From 76c5d6900908439386b0045a6130150150079300 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Mon, 6 Nov 2023 17:29:33 -0500 Subject: drm/amd/display: Update DCN35 watermarks [Why & How] Update to the new values per HW team request. Affects both stutter and z8. Reviewed-by: Charlene Liu Acked-by: Hamza Mahfooz Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- .../amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 32 +++++++++++----------- .../gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c | 8 +++--- 2 files changed, 20 insertions(+), 20 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 507a7cf56711..3469f692d6ea 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -443,32 +443,32 @@ static struct wm_table ddr5_wm_table = { .wm_inst = WM_A, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.72, - .sr_exit_time_us = 9, - .sr_enter_plus_exit_time_us = 11, + .sr_exit_time_us = 14.0, + .sr_enter_plus_exit_time_us = 16.0, .valid = true, }, { .wm_inst = WM_B, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.72, - .sr_exit_time_us = 9, - .sr_enter_plus_exit_time_us = 11, + .sr_exit_time_us = 14.0, + .sr_enter_plus_exit_time_us = 16.0, .valid = true, }, { .wm_inst = WM_C, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.72, - .sr_exit_time_us = 9, - .sr_enter_plus_exit_time_us = 11, + .sr_exit_time_us = 14.0, + .sr_enter_plus_exit_time_us = 16.0, .valid = true, }, { .wm_inst = WM_D, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.72, - .sr_exit_time_us = 9, - .sr_enter_plus_exit_time_us = 11, + .sr_exit_time_us = 14.0, + .sr_enter_plus_exit_time_us = 16.0, .valid = true, }, } @@ -480,32 +480,32 @@ static struct wm_table lpddr5_wm_table = { .wm_inst = WM_A, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.65333, - .sr_exit_time_us = 11.5, - .sr_enter_plus_exit_time_us = 14.5, + .sr_exit_time_us = 14.0, + .sr_enter_plus_exit_time_us = 16.0, .valid = true, }, { .wm_inst = WM_B, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.65333, - .sr_exit_time_us = 11.5, - .sr_enter_plus_exit_time_us = 14.5, + .sr_exit_time_us = 14.0, + .sr_enter_plus_exit_time_us = 16.0, .valid = true, }, { .wm_inst = WM_C, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.65333, - .sr_exit_time_us = 11.5, - .sr_enter_plus_exit_time_us = 14.5, + .sr_exit_time_us = 14.0, + .sr_enter_plus_exit_time_us = 16.0, .valid = true, }, { .wm_inst = WM_D, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.65333, - .sr_exit_time_us = 11.5, - .sr_enter_plus_exit_time_us = 14.5, + .sr_exit_time_us = 14.0, + .sr_enter_plus_exit_time_us = 16.0, .valid = true, }, } diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c index dee80429fc4c..30d78ad91b9c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c @@ -164,10 +164,10 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { }, }, .num_states = 5, - .sr_exit_time_us = 9.0, - .sr_enter_plus_exit_time_us = 11.0, - .sr_exit_z8_time_us = 50.0, /*changed from 442.0*/ - .sr_enter_plus_exit_z8_time_us = 50.0,/*changed from 560.0*/ + .sr_exit_time_us = 14.0, + .sr_enter_plus_exit_time_us = 16.0, + .sr_exit_z8_time_us = 525.0, + .sr_enter_plus_exit_z8_time_us = 715.0, .fclk_change_latency_us = 20.0, .usr_retraining_latency_us = 2, .writeback_latency_us = 12.0, -- cgit v1.2.3 From c4290449f8fbecc55013c6125b50908b5359a8fd Mon Sep 17 00:00:00 2001 From: Ian Chen Date: Mon, 6 Nov 2023 15:46:19 +0800 Subject: drm/amd/display: add skip_implict_edp_power_control flag for dce110 If the link requests to skip implicit eDP power control, we should honor that request. Reviewed-by: Robin Chen Acked-by: Hamza Mahfooz Signed-off-by: Ian Chen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index 960a55e06375..0a331d17ee92 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -3115,7 +3115,8 @@ void dce110_disable_link_output(struct dc_link *link, struct dmcu *dmcu = dc->res_pool->dmcu; if (signal == SIGNAL_TYPE_EDP && - link->dc->hwss.edp_backlight_control) + link->dc->hwss.edp_backlight_control && + !link->skip_implict_edp_power_control) link->dc->hwss.edp_backlight_control(link, false); else if (dmcu != NULL && dmcu->funcs->lock_phy) dmcu->funcs->lock_phy(dmcu); -- cgit v1.2.3 From 613a81995575889753ca44d70d33e84a1d21bae5 Mon Sep 17 00:00:00 2001 From: Wenjing Liu Date: Mon, 6 Nov 2023 16:47:19 -0500 Subject: drm/amd/display: fix a pipe mapping error in dcn32_fpu [why] In dcn32 DML pipes are ordered the same as dc pipes but only for used pipes. For example, if dc pipe 1 and 2 are used, their dml pipe indices would be 0 and 1 respectively. However update_pipe_slice_table_with_split_flags doesn't skip indices for free pipes. This causes us to not reference correct dml pipe output when building pipe topology. [how] Use two variables to iterate dc and dml pipes respectively and only increment dml pipe index when current dc pipe is not free. Cc: stable@vger.kernel.org # 6.1+ Reviewed-by: Chaitanya Dhere Acked-by: Hamza Mahfooz Signed-off-by: Wenjing Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 9ec4172d1c2d..44b0666e53b0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -1192,13 +1192,16 @@ static bool update_pipe_slice_table_with_split_flags( */ struct pipe_ctx *pipe; bool odm; - int i; + int dc_pipe_idx, dml_pipe_idx = 0; bool updated = false; - for (i = 0; i < dc->res_pool->pipe_count; i++) { - pipe = &context->res_ctx.pipe_ctx[i]; + for (dc_pipe_idx = 0; + dc_pipe_idx < dc->res_pool->pipe_count; dc_pipe_idx++) { + pipe = &context->res_ctx.pipe_ctx[dc_pipe_idx]; + if (resource_is_pipe_type(pipe, FREE_PIPE)) + continue; - if (merge[i]) { + if (merge[dc_pipe_idx]) { if (resource_is_pipe_type(pipe, OPP_HEAD)) /* merging OPP head means reducing ODM slice * count by 1 @@ -1213,17 +1216,18 @@ static bool update_pipe_slice_table_with_split_flags( updated = true; } - if (split[i]) { - odm = vba->ODMCombineEnabled[vba->pipe_plane[i]] != + if (split[dc_pipe_idx]) { + odm = vba->ODMCombineEnabled[vba->pipe_plane[dml_pipe_idx]] != dm_odm_combine_mode_disabled; if (odm && resource_is_pipe_type(pipe, OPP_HEAD)) update_slice_table_for_stream( - table, pipe->stream, split[i] - 1); + table, pipe->stream, split[dc_pipe_idx] - 1); else if (!odm && resource_is_pipe_type(pipe, DPP_PIPE)) update_slice_table_for_plane(table, pipe, - pipe->plane_state, split[i] - 1); + pipe->plane_state, split[dc_pipe_idx] - 1); updated = true; } + dml_pipe_idx++; } return updated; } -- cgit v1.2.3 From 702e2fb579e000382c219c58dacef4f733511a36 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Mon, 20 Nov 2023 11:04:45 +0800 Subject: drm/amdgpu: Retire query/reset_ras_err_status from gfx_v9_4_3 Not needed anymore. Signed-off-by: Hawking Zhang Reviewed-by: Stanley Yang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 156 -------------------------------- 1 file changed, 156 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 40d06d32bb74..21865668b787 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -3882,150 +3882,6 @@ static void gfx_v9_4_3_inst_reset_ras_err_count(struct amdgpu_device *adev, mutex_unlock(&adev->grbm_idx_mutex); } -static void gfx_v9_4_3_inst_query_utc_err_status(struct amdgpu_device *adev, - int xcc_id) -{ - uint32_t data; - - data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regUTCL2_MEM_ECC_STATUS); - if (data) { - dev_warn(adev->dev, "GFX UTCL2 Mem Ecc Status: 0x%x!\n", data); - WREG32_SOC15(GC, GET_INST(GC, xcc_id), regUTCL2_MEM_ECC_STATUS, 0x3); - } - - data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_MEM_ECC_STATUS); - if (data) { - dev_warn(adev->dev, "GFX VML2 Mem Ecc Status: 0x%x!\n", data); - WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_MEM_ECC_STATUS, 0x3); - } - - data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), - regVML2_WALKER_MEM_ECC_STATUS); - if (data) { - dev_warn(adev->dev, "GFX VML2 Walker Mem Ecc Status: 0x%x!\n", data); - WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_WALKER_MEM_ECC_STATUS, - 0x3); - } -} - -static void gfx_v9_4_3_log_cu_timeout_status(struct amdgpu_device *adev, - uint32_t status, int xcc_id) -{ - struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; - uint32_t i, simd, wave; - uint32_t wave_status; - uint32_t wave_pc_lo, wave_pc_hi; - uint32_t wave_exec_lo, wave_exec_hi; - uint32_t wave_inst_dw0, wave_inst_dw1; - uint32_t wave_ib_sts; - - for (i = 0; i < 32; i++) { - if (!((i << 1) & status)) - continue; - - simd = i / cu_info->max_waves_per_simd; - wave = i % cu_info->max_waves_per_simd; - - wave_status = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_STATUS); - wave_pc_lo = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_PC_LO); - wave_pc_hi = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_PC_HI); - wave_exec_lo = - wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_EXEC_LO); - wave_exec_hi = - wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_EXEC_HI); - wave_inst_dw0 = - wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_INST_DW0); - wave_inst_dw1 = - wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_INST_DW1); - wave_ib_sts = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_IB_STS); - - dev_info( - adev->dev, - "\t SIMD %d, Wave %d: status 0x%x, pc 0x%llx, exec 0x%llx, inst 0x%llx, ib_sts 0x%x\n", - simd, wave, wave_status, - ((uint64_t)wave_pc_hi << 32 | wave_pc_lo), - ((uint64_t)wave_exec_hi << 32 | wave_exec_lo), - ((uint64_t)wave_inst_dw1 << 32 | wave_inst_dw0), - wave_ib_sts); - } -} - -static void gfx_v9_4_3_inst_query_sq_timeout_status(struct amdgpu_device *adev, - int xcc_id) -{ - uint32_t se_idx, sh_idx, cu_idx; - uint32_t status; - - mutex_lock(&adev->grbm_idx_mutex); - for (se_idx = 0; se_idx < adev->gfx.config.max_shader_engines; se_idx++) { - for (sh_idx = 0; sh_idx < adev->gfx.config.max_sh_per_se; sh_idx++) { - for (cu_idx = 0; cu_idx < adev->gfx.config.max_cu_per_sh; cu_idx++) { - gfx_v9_4_3_xcc_select_se_sh(adev, se_idx, sh_idx, - cu_idx, xcc_id); - status = RREG32_SOC15(GC, GET_INST(GC, xcc_id), - regSQ_TIMEOUT_STATUS); - if (status != 0) { - dev_info( - adev->dev, - "GFX Watchdog Timeout: SE %d, SH %d, CU %d\n", - se_idx, sh_idx, cu_idx); - gfx_v9_4_3_log_cu_timeout_status( - adev, status, xcc_id); - } - /* clear old status */ - WREG32_SOC15(GC, GET_INST(GC, xcc_id), - regSQ_TIMEOUT_STATUS, 0); - } - } - } - gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, - xcc_id); - mutex_unlock(&adev->grbm_idx_mutex); -} - -static void gfx_v9_4_3_inst_query_ras_err_status(struct amdgpu_device *adev, - void *ras_error_status, int xcc_id) -{ - gfx_v9_4_3_inst_query_utc_err_status(adev, xcc_id); - gfx_v9_4_3_inst_query_sq_timeout_status(adev, xcc_id); -} - -static void gfx_v9_4_3_inst_reset_utc_err_status(struct amdgpu_device *adev, - int xcc_id) -{ - WREG32_SOC15(GC, GET_INST(GC, xcc_id), regUTCL2_MEM_ECC_STATUS, 0x3); - WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_MEM_ECC_STATUS, 0x3); - WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_WALKER_MEM_ECC_STATUS, 0x3); -} - -static void gfx_v9_4_3_inst_reset_sq_timeout_status(struct amdgpu_device *adev, - int xcc_id) -{ - uint32_t se_idx, sh_idx, cu_idx; - - mutex_lock(&adev->grbm_idx_mutex); - for (se_idx = 0; se_idx < adev->gfx.config.max_shader_engines; se_idx++) { - for (sh_idx = 0; sh_idx < adev->gfx.config.max_sh_per_se; sh_idx++) { - for (cu_idx = 0; cu_idx < adev->gfx.config.max_cu_per_sh; cu_idx++) { - gfx_v9_4_3_xcc_select_se_sh(adev, se_idx, sh_idx, - cu_idx, xcc_id); - WREG32_SOC15(GC, GET_INST(GC, xcc_id), - regSQ_TIMEOUT_STATUS, 0); - } - } - } - gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, - xcc_id); - mutex_unlock(&adev->grbm_idx_mutex); -} - -static void gfx_v9_4_3_inst_reset_ras_err_status(struct amdgpu_device *adev, - void *ras_error_status, int xcc_id) -{ - gfx_v9_4_3_inst_reset_utc_err_status(adev, xcc_id); - gfx_v9_4_3_inst_reset_sq_timeout_status(adev, xcc_id); -} - static void gfx_v9_4_3_inst_enable_watchdog_timer(struct amdgpu_device *adev, void *ras_error_status, int xcc_id) { @@ -4067,16 +3923,6 @@ static void gfx_v9_4_3_reset_ras_error_count(struct amdgpu_device *adev) amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_reset_ras_err_count); } -static void gfx_v9_4_3_query_ras_error_status(struct amdgpu_device *adev) -{ - amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_query_ras_err_status); -} - -static void gfx_v9_4_3_reset_ras_error_status(struct amdgpu_device *adev) -{ - amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_reset_ras_err_status); -} - static void gfx_v9_4_3_enable_watchdog_timer(struct amdgpu_device *adev) { amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_enable_watchdog_timer); @@ -4394,8 +4240,6 @@ struct amdgpu_xcp_ip_funcs gfx_v9_4_3_xcp_funcs = { struct amdgpu_ras_block_hw_ops gfx_v9_4_3_ras_ops = { .query_ras_error_count = &gfx_v9_4_3_query_ras_error_count, .reset_ras_error_count = &gfx_v9_4_3_reset_ras_error_count, - .query_ras_error_status = &gfx_v9_4_3_query_ras_error_status, - .reset_ras_error_status = &gfx_v9_4_3_reset_ras_error_status, }; struct amdgpu_gfx_ras gfx_v9_4_3_ras = { -- cgit v1.2.3 From 2e9b152325f649923b9324fa8ea5f1a5289145bb Mon Sep 17 00:00:00 2001 From: Perry Yuan Date: Tue, 1 Aug 2023 10:37:41 -0400 Subject: drm/amdgpu: optimize RLC powerdown notification on Vangogh The smu needs to get the rlc power down message to sync the rlc state with smu, the rlc state updating message need to be sent at while smu begin suspend sequence , otherwise SMU will crash while RLC state is not notified by driver, and rlc state probally changed after that notification, so it needs to notify rlc state to smu at the end of the suspend sequence in amdgpu_device_suspend() that can make sure the rlc state is correctly set to SMU. [ 101.000590] amdgpu 0000:03:00.0: amdgpu: SMU: I'm not done with your previous command: SMN_C2PMSG_66:0x0000001E SMN_C2PMSG_82:0x00000000 [ 101.000598] amdgpu 0000:03:00.0: amdgpu: Failed to disable gfxoff! [ 110.838026] amdgpu 0000:03:00.0: amdgpu: SMU: I'm not done with your previous command: SMN_C2PMSG_66:0x0000001E SMN_C2PMSG_82:0x00000000 [ 110.838035] amdgpu 0000:03:00.0: amdgpu: Failed to disable smu features. [ 110.838039] amdgpu 0000:03:00.0: amdgpu: Fail to disable dpm features! [ 110.838040] [drm:amdgpu_device_ip_suspend_phase2 [amdgpu]] *ERROR* suspend of IP block failed -62 [ 110.884394] PM: suspend of devices aborted after 21213.620 msecs [ 110.884402] PM: start suspend of devices aborted after 21213.882 msecs [ 110.884405] PM: Some devices failed to suspend, or early wake event detected Reviewed-by: Yifan Zhang Signed-off-by: Perry Yuan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++++ drivers/gpu/drm/amd/include/kgd_pp_interface.h | 1 + drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 18 ++++++++++++++++++ drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h | 2 ++ drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 10 ++++++++++ drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 5 +++++ drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 5 ++--- drivers/gpu/drm/amd/pm/swsmu/smu_internal.h | 1 + 8 files changed, 43 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index b5edf40b5d03..5d3a83193115 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4549,6 +4549,10 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) if (amdgpu_sriov_vf(adev)) amdgpu_virt_release_full_gpu(adev, false); + r = amdgpu_dpm_notify_rlc_state(adev, false); + if (r) + return r; + return 0; } diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 6a1d31e8fdd1..8ebba87f4289 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -445,6 +445,7 @@ struct amd_pm_funcs { struct dpm_clocks *clock_table); int (*get_smu_prv_buf_details)(void *handle, void **addr, size_t *size); void (*pm_compute_clocks)(void *handle); + int (*notify_rlc_state)(void *handle, bool en); }; struct metrics_table_header { diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index 4717884a2bc4..97b40c5fa1ff 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -181,6 +181,24 @@ int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev, return ret; } +int amdgpu_dpm_notify_rlc_state(struct amdgpu_device *adev, bool en) +{ + int ret = 0; + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + + if (pp_funcs && pp_funcs->notify_rlc_state) { + mutex_lock(&adev->pm.mutex); + + ret = pp_funcs->notify_rlc_state( + adev->powerplay.pp_handle, + en); + + mutex_unlock(&adev->pm.mutex); + } + + return ret; +} + bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev) { const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h index 9b3fef5474b1..d76b0a60db44 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h @@ -415,6 +415,8 @@ int amdgpu_dpm_mode1_reset(struct amdgpu_device *adev); int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev, enum pp_mp1_state mp1_state); +int amdgpu_dpm_notify_rlc_state(struct amdgpu_device *adev, bool en); + int amdgpu_dpm_set_gfx_power_up_by_imu(struct amdgpu_device *adev); int amdgpu_dpm_baco_exit(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 930e7e3532ad..f464610a959f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -1710,6 +1710,16 @@ static int smu_disable_dpms(struct smu_context *smu) } } + /* Notify SMU RLC is going to be off, stop RLC and SMU interaction. + * otherwise SMU will hang while interacting with RLC if RLC is halted + * this is a WA for Vangogh asic which fix the SMU hang issue. + */ + ret = smu_notify_rlc_state(smu, false); + if (ret) { + dev_err(adev->dev, "Fail to notify rlc status!\n"); + return ret; + } + if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2) && !((adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs) && !amdgpu_sriov_vf(adev) && adev->gfx.rlc.funcs->stop) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index 32600ba74bf1..c125253df20b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -1370,6 +1370,11 @@ struct pptable_funcs { * management. */ int (*dpm_set_umsch_mm_enable)(struct smu_context *smu, bool enable); + + /** + * @notify_rlc_state: Notify RLC power state to SMU. + */ + int (*notify_rlc_state)(struct smu_context *smu, bool en); }; typedef enum { diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index 762b31455a0b..2ff6deedef95 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -2193,8 +2193,7 @@ static int vangogh_get_dpm_clock_table(struct smu_context *smu, struct dpm_clock return 0; } - -static int vangogh_system_features_control(struct smu_context *smu, bool en) +static int vangogh_notify_rlc_state(struct smu_context *smu, bool en) { struct amdgpu_device *adev = smu->adev; int ret = 0; @@ -2523,7 +2522,7 @@ static const struct pptable_funcs vangogh_ppt_funcs = { .print_clk_levels = vangogh_common_print_clk_levels, .set_default_dpm_table = vangogh_set_default_dpm_tables, .set_fine_grain_gfx_freq_parameters = vangogh_set_fine_grain_gfx_freq_parameters, - .system_features_control = vangogh_system_features_control, + .notify_rlc_state = vangogh_notify_rlc_state, .feature_is_enabled = smu_cmn_feature_is_enabled, .set_power_profile_mode = vangogh_set_power_profile_mode, .get_power_profile_mode = vangogh_get_power_profile_mode, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h index 80b3c3efc006..64766ac69c53 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h @@ -97,6 +97,7 @@ #define smu_get_default_config_table_settings(smu, config_table) smu_ppt_funcs(get_default_config_table_settings, -EOPNOTSUPP, smu, config_table) #define smu_set_config_table(smu, config_table) smu_ppt_funcs(set_config_table, -EOPNOTSUPP, smu, config_table) #define smu_init_pptable_microcode(smu) smu_ppt_funcs(init_pptable_microcode, 0, smu) +#define smu_notify_rlc_state(smu, en) smu_ppt_funcs(notify_rlc_state, 0, smu, en) #endif #endif -- cgit v1.2.3 From cfab803884f426b36b58dbe1f86f99742767c208 Mon Sep 17 00:00:00 2001 From: Wenjing Liu Date: Thu, 2 Nov 2023 14:59:13 -0400 Subject: drm/amd/display: update pixel clock params after stream slice count change in context [why] When ODM slice count is changed, otg master pipe's pixel clock params is no longer valid as the value is dependent on ODM slice count. Reviewed-by: Chaitanya Dhere Acked-by: Hamza Mahfooz Signed-off-by: Wenjing Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 9 ++++++--- drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 6 +----- drivers/gpu/drm/amd/display/dc/inc/core_types.h | 1 + .../drm/amd/display/dc/resource/dcn20/dcn20_resource.c | 16 ++++++++++------ .../drm/amd/display/dc/resource/dcn20/dcn20_resource.h | 1 + .../drm/amd/display/dc/resource/dcn32/dcn32_resource.c | 1 + .../drm/amd/display/dc/resource/dcn321/dcn321_resource.c | 1 + 7 files changed, 21 insertions(+), 14 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 42a927710743..84d632700949 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -2256,7 +2256,7 @@ static struct pipe_ctx *get_last_dpp_pipe_in_mpcc_combine( } static bool update_pipe_params_after_odm_slice_count_change( - const struct dc_stream_state *stream, + struct pipe_ctx *otg_master, struct dc_state *context, const struct resource_pool *pool) { @@ -2266,9 +2266,12 @@ static bool update_pipe_params_after_odm_slice_count_change( for (i = 0; i < pool->pipe_count && result; i++) { pipe = &context->res_ctx.pipe_ctx[i]; - if (pipe->stream == stream && pipe->plane_state) + if (pipe->stream == otg_master->stream && pipe->plane_state) result = resource_build_scaling_params(pipe); } + + if (pool->funcs->build_pipe_pix_clk_params) + pool->funcs->build_pipe_pix_clk_params(otg_master); return result; } @@ -2951,7 +2954,7 @@ bool resource_update_pipes_for_stream_with_slice_count( otg_master, new_ctx, pool); if (result) result = update_pipe_params_after_odm_slice_count_change( - otg_master->stream, new_ctx, pool); + otg_master, new_ctx, pool); return result; } diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 44b0666e53b0..e7f13e28caa3 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -1237,15 +1237,11 @@ static void update_pipes_with_slice_table(struct dc *dc, struct dc_state *contex { int i; - for (i = 0; i < table->odm_combine_count; i++) { + for (i = 0; i < table->odm_combine_count; i++) resource_update_pipes_for_stream_with_slice_count(context, dc->current_state, dc->res_pool, table->odm_combines[i].stream, table->odm_combines[i].slice_count); - /* TODO: move this into the function above */ - dcn20_build_mapped_resource(dc, context, - table->odm_combines[i].stream); - } for (i = 0; i < table->mpc_combine_count; i++) resource_update_pipes_for_plane_with_slice_count(context, diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index bac1420b1de8..10397d4dfb07 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -205,6 +205,7 @@ struct resource_funcs { void (*get_panel_config_defaults)(struct dc_panel_config *panel_config); void (*save_mall_state)(struct dc *dc, struct dc_state *context, struct mall_temp_config *temp_config); void (*restore_mall_state)(struct dc *dc, struct dc_state *context, struct mall_temp_config *temp_config); + void (*build_pipe_pix_clk_params)(struct pipe_ctx *pipe_ctx); }; struct audio_support{ diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c index f04bb5b1471d..f9c5bc624be3 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c @@ -1273,15 +1273,19 @@ static void build_clamping_params(struct dc_stream_state *stream) stream->clamping.pixel_encoding = stream->timing.pixel_encoding; } -static enum dc_status build_pipe_hw_param(struct pipe_ctx *pipe_ctx) +void dcn20_build_pipe_pix_clk_params(struct pipe_ctx *pipe_ctx) { - get_pixel_clock_parameters(pipe_ctx, &pipe_ctx->stream_res.pix_clk_params); - pipe_ctx->clock_source->funcs->get_pix_clk_dividers( - pipe_ctx->clock_source, - &pipe_ctx->stream_res.pix_clk_params, - &pipe_ctx->pll_settings); + pipe_ctx->clock_source, + &pipe_ctx->stream_res.pix_clk_params, + &pipe_ctx->pll_settings); +} + +static enum dc_status build_pipe_hw_param(struct pipe_ctx *pipe_ctx) +{ + + dcn20_build_pipe_pix_clk_params(pipe_ctx); pipe_ctx->stream->clamping.pixel_encoding = pipe_ctx->stream->timing.pixel_encoding; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.h index 37ecaccc5d12..4cee3fa11a7f 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.h @@ -165,6 +165,7 @@ enum dc_status dcn20_add_stream_to_ctx(struct dc *dc, struct dc_state *new_ctx, enum dc_status dcn20_add_dsc_to_stream_resource(struct dc *dc, struct dc_state *dc_ctx, struct dc_stream_state *dc_stream); enum dc_status dcn20_remove_stream_from_ctx(struct dc *dc, struct dc_state *new_ctx, struct dc_stream_state *dc_stream); enum dc_status dcn20_patch_unknown_plane_state(struct dc_plane_state *plane_state); +void dcn20_build_pipe_pix_clk_params(struct pipe_ctx *pipe_ctx); #endif /* __DC_RESOURCE_DCN20_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c index 36e4c7bef403..f6cbcc9b4006 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c @@ -2041,6 +2041,7 @@ static struct resource_funcs dcn32_res_pool_funcs = { .retain_phantom_pipes = dcn32_retain_phantom_pipes, .save_mall_state = dcn32_save_mall_state, .restore_mall_state = dcn32_restore_mall_state, + .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params, }; static uint32_t read_pipe_fuses(struct dc_context *ctx) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c index bedb70b98162..12986fe0b289 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c @@ -1609,6 +1609,7 @@ static struct resource_funcs dcn321_res_pool_funcs = { .retain_phantom_pipes = dcn32_retain_phantom_pipes, .save_mall_state = dcn32_save_mall_state, .restore_mall_state = dcn32_restore_mall_state, + .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params, }; static uint32_t read_pipe_fuses(struct dc_context *ctx) -- cgit v1.2.3 From 1290183db494641772c18d063c34e9c8f720c61c Mon Sep 17 00:00:00 2001 From: Wenjing Liu Date: Thu, 2 Nov 2023 15:02:42 -0400 Subject: drm/amd/display: always use mpc factor of 2 for stereo timings [why] In the new pipe resource management logic, the special handling for stereo timings is missing. This commit implements the same stereo timings handling as old pipe resource management code. Reviewed-by: Chaitanya Dhere Acked-by: Hamza Mahfooz Signed-off-by: Wenjing Liu Signed-off-by: Alex Deucher --- .../amd/display/dc/dml2/dml2_dc_resource_mgmt.c | 26 +++++++++++++++------- drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c | 2 +- drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.h | 2 +- 3 files changed, 20 insertions(+), 10 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c index 1a2b24cc6b61..0baf39d64a2d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c @@ -772,18 +772,29 @@ static unsigned int get_mpc_factor(struct dml2_context *ctx, const struct dc_state *state, const struct dml_display_cfg_st *disp_cfg, struct dml2_dml_to_dc_pipe_mapping *mapping, - const struct dc_stream_status *status, unsigned int stream_id, + const struct dc_stream_status *status, + const struct dc_stream_state *stream, int plane_idx) { unsigned int plane_id; unsigned int cfg_idx; + unsigned int mpc_factor; - get_plane_id(ctx, state, status->plane_states[plane_idx], stream_id, plane_idx, &plane_id); + get_plane_id(ctx, state, status->plane_states[plane_idx], + stream->stream_id, plane_idx, &plane_id); cfg_idx = find_disp_cfg_idx_by_plane_id(mapping, plane_id); - if (ctx->architecture == dml2_architecture_20) - return (unsigned int)disp_cfg->hw.DPPPerSurface[cfg_idx]; - ASSERT(false); - return 1; + if (ctx->architecture == dml2_architecture_20) { + mpc_factor = (unsigned int)disp_cfg->hw.DPPPerSurface[cfg_idx]; + } else { + mpc_factor = 1; + ASSERT(false); + } + + /* For stereo timings, we need to pipe split */ + if (dml2_is_stereo_timing(stream)) + mpc_factor = 2; + + return mpc_factor; } static unsigned int get_odm_factor( @@ -820,14 +831,13 @@ static void populate_mpc_factors_for_stream( unsigned int mpc_factors[MAX_PIPES]) { const struct dc_stream_status *status = &state->stream_status[stream_idx]; - unsigned int stream_id = state->streams[stream_idx]->stream_id; int i; for (i = 0; i < status->plane_count; i++) if (odm_factor == 1) mpc_factors[i] = get_mpc_factor( ctx, state, disp_cfg, mapping, status, - stream_id, i); + state->streams[stream_idx], i); else mpc_factors[i] = 1; } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c index 2498b8341199..33eab80e89a8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c @@ -468,7 +468,7 @@ bool dml2_verify_det_buffer_configuration(struct dml2_context *in_ctx, struct dc return need_recalculation; } -bool dml2_is_stereo_timing(struct dc_stream_state *stream) +bool dml2_is_stereo_timing(const struct dc_stream_state *stream) { bool is_stereo = false; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.h index 23b9028337d4..5842d6d3c4b6 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.h @@ -42,7 +42,7 @@ void dml2_copy_clocks_to_dc_state(struct dml2_dcn_clocks *out_clks, struct dc_st void dml2_extract_watermark_set(struct dcn_watermarks *watermark, struct display_mode_lib_st *dml_core_ctx); int dml2_helper_find_dml_pipe_idx_by_stream_id(struct dml2_context *ctx, unsigned int stream_id); bool is_dtbclk_required(const struct dc *dc, struct dc_state *context); -bool dml2_is_stereo_timing(struct dc_stream_state *stream); +bool dml2_is_stereo_timing(const struct dc_stream_state *stream); /* * dml2_dc_construct_pipes - This function will determine if we need additional pipes based -- cgit v1.2.3 From 80061d6b58a99f1fffb97a7f3592234a5fe0a3fe Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Tue, 7 Nov 2023 11:12:45 -0500 Subject: drm/amd/display: Add Z8 watermarks for DML2 bbox overrides [Why] We can override SR watermarks but not Z8 ones. [How] Add new parameters for Z8 matching the SR ones and feed them into the states. These also weren't being applied to every state, so make sure that we loop over and update all SOC states if given an override. Reviewed-by: Jun Lei Acked-by: Hamza Mahfooz Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- .../amd/display/dc/dml2/dml2_translation_helper.c | 47 +++++++++++++++------- drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h | 2 + 2 files changed, 34 insertions(+), 15 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index 75171bee6f71..2b9638c6d9b0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -341,25 +341,42 @@ void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc, break; } - /* Override from passed values, mainly for debugging purposes, if available */ - if (dml2->config.bbox_overrides.sr_exit_latency_us) { - p->in_states->state_array[0].sr_exit_time_us = dml2->config.bbox_overrides.sr_exit_latency_us; - } + /* Override from passed values, if available */ + for (i = 0; i < p->in_states->num_states; i++) { + if (dml2->config.bbox_overrides.sr_exit_latency_us) { + p->in_states->state_array[i].sr_exit_time_us = + dml2->config.bbox_overrides.sr_exit_latency_us; + } - if (dml2->config.bbox_overrides.sr_enter_plus_exit_latency_us) { - p->in_states->state_array[0].sr_enter_plus_exit_time_us = dml2->config.bbox_overrides.sr_enter_plus_exit_latency_us; - } + if (dml2->config.bbox_overrides.sr_enter_plus_exit_latency_us) { + p->in_states->state_array[i].sr_enter_plus_exit_time_us = + dml2->config.bbox_overrides.sr_enter_plus_exit_latency_us; + } - if (dml2->config.bbox_overrides.urgent_latency_us) { - p->in_states->state_array[0].urgent_latency_pixel_data_only_us = dml2->config.bbox_overrides.urgent_latency_us; - } + if (dml2->config.bbox_overrides.sr_exit_z8_time_us) { + p->in_states->state_array[i].sr_exit_z8_time_us = + dml2->config.bbox_overrides.sr_exit_z8_time_us; + } - if (dml2->config.bbox_overrides.dram_clock_change_latency_us) { - p->in_states->state_array[0].dram_clock_change_latency_us = dml2->config.bbox_overrides.dram_clock_change_latency_us; - } + if (dml2->config.bbox_overrides.sr_enter_plus_exit_z8_time_us) { + p->in_states->state_array[i].sr_enter_plus_exit_z8_time_us = + dml2->config.bbox_overrides.sr_enter_plus_exit_z8_time_us; + } + + if (dml2->config.bbox_overrides.urgent_latency_us) { + p->in_states->state_array[i].urgent_latency_pixel_data_only_us = + dml2->config.bbox_overrides.urgent_latency_us; + } - if (dml2->config.bbox_overrides.fclk_change_latency_us) { - p->in_states->state_array[0].fclk_change_latency_us = dml2->config.bbox_overrides.fclk_change_latency_us; + if (dml2->config.bbox_overrides.dram_clock_change_latency_us) { + p->in_states->state_array[i].dram_clock_change_latency_us = + dml2->config.bbox_overrides.dram_clock_change_latency_us; + } + + if (dml2->config.bbox_overrides.fclk_change_latency_us) { + p->in_states->state_array[i].fclk_change_latency_us = + dml2->config.bbox_overrides.fclk_change_latency_us; + } } /* DCFCLK stas values are project specific */ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h index 317f90776d97..fe15baa4bf09 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h @@ -139,6 +139,8 @@ struct dml2_soc_bbox_overrides { double urgent_latency_us; double sr_exit_latency_us; double sr_enter_plus_exit_latency_us; + double sr_exit_z8_time_us; + double sr_enter_plus_exit_z8_time_us; double dram_clock_change_latency_us; double fclk_change_latency_us; unsigned int dram_num_chan; -- cgit v1.2.3 From 4b8251e019ea17037667e6d61aa5e66d5b4f51d2 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Mon, 20 Nov 2023 10:14:21 +0800 Subject: drm/amdgpu: Do not issue gpu reset from nbio v7_9 bif interrupt In nbio v7_9, host driver should not issu gpu reset Signed-off-by: Hawking Zhang Reviewed-by: Stanley Yang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c index 23f26f8caad4..25a3da83e0fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c @@ -611,11 +611,6 @@ static void nbio_v7_9_handle_ras_controller_intr_no_bifring(struct amdgpu_device dev_info(adev->dev, "RAS controller interrupt triggered " "by NBIF error\n"); - - /* ras_controller_int is dedicated for nbif ras error, - * not the global interrupt for sync flood - */ - amdgpu_ras_reset_gpu(adev); } amdgpu_ras_error_data_fini(&err_data); -- cgit v1.2.3 From 20b07b0cb3a0a2fb3a6daf00f645925be77ec80c Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Mon, 20 Nov 2023 11:31:32 -0600 Subject: drm/amdgpu: Force order between a read and write to the same address Setting register to force ordering to prevent read/write or write/read hazards for un-cached modes. Signed-off-by: Alex Sierra Acked-by: Alex Deucher Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 8 ++++++++ drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_0_0_offset.h | 2 ++ 2 files changed, 10 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 0c6133cc5e57..40ce12323164 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -89,6 +89,10 @@ MODULE_FIRMWARE("amdgpu/gc_11_5_0_me.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_0_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_0_rlc.bin"); +static const struct soc15_reg_golden golden_settings_gc_11_0[] = { + SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL, 0x20000000, 0x20000000) +}; + static const struct soc15_reg_golden golden_settings_gc_11_0_1[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_GS_NGG_CLK_CTRL, 0x9fff8fff, 0x00000010), @@ -304,6 +308,10 @@ static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev) default: break; } + soc15_program_register_sequence(adev, + golden_settings_gc_11_0, + (const u32)ARRAY_SIZE(golden_settings_gc_11_0)); + } static void gfx_v11_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_0_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_0_0_offset.h index c92c4b83253f..4bff1ef8a9a6 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_0_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_0_0_offset.h @@ -6369,6 +6369,8 @@ #define regTCP_INVALIDATE_BASE_IDX 1 #define regTCP_STATUS 0x19a1 #define regTCP_STATUS_BASE_IDX 1 +#define regTCP_CNTL 0x19a2 +#define regTCP_CNTL_BASE_IDX 1 #define regTCP_CNTL2 0x19a3 #define regTCP_CNTL2_BASE_IDX 1 #define regTCP_DEBUG_INDEX 0x19a5 -- cgit v1.2.3 From 251027968a7230f18c353e25634cc7e25d9ab953 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Tue, 7 Nov 2023 11:15:16 -0500 Subject: drm/amd/display: Feed SR and Z8 watermarks into DML2 for DCN35 [Why] We've updated the table but the values aren't being reflected in DML2 calculation. [How] Pass them into the bbox overrides. Reviewed-by: Jun Lei Acked-by: Hamza Mahfooz Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c index 30d78ad91b9c..21c17d3296a3 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c @@ -329,6 +329,15 @@ void dcn35_update_bw_bounding_box_fpu(struct dc *dc, /*temp till dml2 fully work without dml1*/ dml_init_instance(&dc->dml, &dcn3_5_soc, &dcn3_5_ip, DML_PROJECT_DCN31); + + /* Update latency values */ + dc->dml2_options.bbox_overrides.dram_clock_change_latency_us = dcn3_5_soc.dram_clock_change_latency_us; + + dc->dml2_options.bbox_overrides.sr_exit_latency_us = dcn3_5_soc.sr_exit_time_us; + dc->dml2_options.bbox_overrides.sr_enter_plus_exit_latency_us = dcn3_5_soc.sr_enter_plus_exit_time_us; + + dc->dml2_options.bbox_overrides.sr_exit_z8_time_us = dcn3_5_soc.sr_exit_z8_time_us; + dc->dml2_options.bbox_overrides.sr_enter_plus_exit_z8_time_us = dcn3_5_soc.sr_enter_plus_exit_z8_time_us; } static bool is_dual_plane(enum surface_pixel_format format) -- cgit v1.2.3 From a953cd8cac6be69fba0b66e6fb46d1324d797af4 Mon Sep 17 00:00:00 2001 From: Ilya Bakoulin Date: Tue, 7 Nov 2023 15:07:56 -0500 Subject: drm/amd/display: Fix MPCC 1DLUT programming [Why] Wrong function is used to translate LUT values to HW format, leading to visible artifacting in some cases. [How] Use the correct cm3_helper function. Cc: stable@vger.kernel.org # 6.1+ Reviewed-by: Krunoslav Kovac Acked-by: Hamza Mahfooz Signed-off-by: Ilya Bakoulin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c index 6a65af8c36b9..5f7f474ef51c 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c @@ -487,8 +487,7 @@ bool dcn32_set_mcm_luts( if (plane_state->blend_tf->type == TF_TYPE_HWPWL) lut_params = &plane_state->blend_tf->pwl; else if (plane_state->blend_tf->type == TF_TYPE_DISTRIBUTED_POINTS) { - cm_helper_translate_curve_to_hw_format(plane_state->ctx, - plane_state->blend_tf, + cm3_helper_translate_curve_to_hw_format(plane_state->blend_tf, &dpp_base->regamma_params, false); lut_params = &dpp_base->regamma_params; } @@ -503,8 +502,7 @@ bool dcn32_set_mcm_luts( else if (plane_state->in_shaper_func->type == TF_TYPE_DISTRIBUTED_POINTS) { // TODO: dpp_base replace ASSERT(false); - cm_helper_translate_curve_to_hw_format(plane_state->ctx, - plane_state->in_shaper_func, + cm3_helper_translate_curve_to_hw_format(plane_state->in_shaper_func, &dpp_base->shaper_params, true); lut_params = &dpp_base->shaper_params; } -- cgit v1.2.3 From 40436ce7ccfec5c616e2e48d0ec2c905637c7397 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Tue, 7 Nov 2023 17:01:49 -0500 Subject: drm/amd/display: Use DRAM speed from validation for dummy p-state [Description] When choosing which dummy p-state latency to use, we need to use the DRAM speed from validation. The DRAMSpeed DML variable can change because we use different input params to DML when populating watermarks set B. Cc: stable@vger.kernel.org # 6.1+ Reviewed-by: Samson Tam Acked-by: Hamza Mahfooz Signed-off-by: Alvin Lee Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index e7f13e28caa3..92e2ddc9ab7e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -2231,6 +2231,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, int i, pipe_idx, vlevel_temp = 0; double dcfclk = dcn3_2_soc.clock_limits[0].dcfclk_mhz; double dcfclk_from_validation = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; + double dram_speed_from_validation = context->bw_ctx.dml.vba.DRAMSpeed; double dcfclk_from_fw_based_mclk_switching = dcfclk_from_validation; bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] != dm_dram_clock_change_unsupported; @@ -2418,7 +2419,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, } if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) { - min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed; + min_dram_speed_mts = dram_speed_from_validation; min_dram_speed_mts_margin = 160; context->bw_ctx.dml.soc.dram_clock_change_latency_us = -- cgit v1.2.3 From 6c22fb07e0c2935d97a86509f16f755ab895f2c8 Mon Sep 17 00:00:00 2001 From: Bhuvana Chandra Pinninti Date: Wed, 18 Oct 2023 19:16:17 +0530 Subject: drm/amd/display: Refactor DSC into component folder [why] To refactor DSC and make DSC files unit testable. [how] moved the dcnxx_dsc.c and .h files into corresponding dcn folders inside the dsc and cleared the linkage errors. Reviewed-by: Wenjing Liu Acked-by: Hamza Mahfooz Signed-off-by: Bhuvana Chandra Pinninti Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/Makefile | 1 + drivers/gpu/drm/amd/display/dc/dcn20/Makefile | 2 - drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c | 780 --------------------- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.h | 589 ---------------- drivers/gpu/drm/amd/display/dc/dcn35/Makefile | 2 +- drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dsc.c | 60 -- drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dsc.h | 59 -- drivers/gpu/drm/amd/display/dc/dsc/Makefile | 26 + .../gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c | 780 +++++++++++++++++++++ .../gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.h | 589 ++++++++++++++++ .../gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.c | 60 ++ .../gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.h | 59 ++ drivers/gpu/drm/amd/display/dc/dsc/dsc.h | 112 +++ drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h | 112 --- 14 files changed, 1628 insertions(+), 1603 deletions(-) delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dsc.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dsc.h create mode 100644 drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c create mode 100644 drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.h create mode 100644 drivers/gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.c create mode 100644 drivers/gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.h create mode 100644 drivers/gpu/drm/amd/display/dc/dsc/dsc.h delete mode 100644 drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/Makefile b/drivers/gpu/drm/amd/display/Makefile index 71192fc81a20..7b0959da2cac 100644 --- a/drivers/gpu/drm/amd/display/Makefile +++ b/drivers/gpu/drm/amd/display/Makefile @@ -31,6 +31,7 @@ subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/inc/hw subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/clk_mgr subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/hwss subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/resource +subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dsc subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/inc subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/freesync subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/color diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile index 1cac1eca8111..93ac45802e44 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile @@ -7,8 +7,6 @@ DCN20 = dcn20_init.o dcn20_dpp.o dcn20_dpp_cm.o dcn20_hubp.o \ dcn20_stream_encoder.o dcn20_link_encoder.o dcn20_dccg.o \ dcn20_vmid.o dcn20_dwb.o dcn20_dwb_scl.o -DCN20 += dcn20_dsc.o - AMD_DAL_DCN20 = $(addprefix $(AMDDALPATH)/dc/dcn20/,$(DCN20)) AMD_DISPLAY_FILES += $(AMD_DAL_DCN20) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c deleted file mode 100644 index c9ae2d8f0096..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c +++ /dev/null @@ -1,780 +0,0 @@ -/* - * Copyright 2017 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include - -#include "reg_helper.h" -#include "dcn20_dsc.h" -#include "dsc/dscc_types.h" -#include "dsc/rc_calc.h" - -static void dsc_write_to_registers(struct display_stream_compressor *dsc, const struct dsc_reg_values *reg_vals); - -/* Object I/F functions */ -static void dsc2_read_state(struct display_stream_compressor *dsc, struct dcn_dsc_state *s); -static bool dsc2_validate_stream(struct display_stream_compressor *dsc, const struct dsc_config *dsc_cfg); -static void dsc2_set_config(struct display_stream_compressor *dsc, const struct dsc_config *dsc_cfg, - struct dsc_optc_config *dsc_optc_cfg); -static void dsc2_enable(struct display_stream_compressor *dsc, int opp_pipe); -static void dsc2_disable(struct display_stream_compressor *dsc); -static void dsc2_disconnect(struct display_stream_compressor *dsc); - -static const struct dsc_funcs dcn20_dsc_funcs = { - .dsc_get_enc_caps = dsc2_get_enc_caps, - .dsc_read_state = dsc2_read_state, - .dsc_validate_stream = dsc2_validate_stream, - .dsc_set_config = dsc2_set_config, - .dsc_get_packed_pps = dsc2_get_packed_pps, - .dsc_enable = dsc2_enable, - .dsc_disable = dsc2_disable, - .dsc_disconnect = dsc2_disconnect, -}; - -/* Macro definitios for REG_SET macros*/ -#define CTX \ - dsc20->base.ctx - -#define REG(reg)\ - dsc20->dsc_regs->reg - -#undef FN -#define FN(reg_name, field_name) \ - dsc20->dsc_shift->field_name, dsc20->dsc_mask->field_name -#define DC_LOGGER \ - dsc->ctx->logger - -enum dsc_bits_per_comp { - DSC_BPC_8 = 8, - DSC_BPC_10 = 10, - DSC_BPC_12 = 12, - DSC_BPC_UNKNOWN -}; - -/* API functions (external or via structure->function_pointer) */ - -void dsc2_construct(struct dcn20_dsc *dsc, - struct dc_context *ctx, - int inst, - const struct dcn20_dsc_registers *dsc_regs, - const struct dcn20_dsc_shift *dsc_shift, - const struct dcn20_dsc_mask *dsc_mask) -{ - dsc->base.ctx = ctx; - dsc->base.inst = inst; - dsc->base.funcs = &dcn20_dsc_funcs; - - dsc->dsc_regs = dsc_regs; - dsc->dsc_shift = dsc_shift; - dsc->dsc_mask = dsc_mask; - - dsc->max_image_width = 5184; -} - - -#define DCN20_MAX_PIXEL_CLOCK_Mhz 1188 -#define DCN20_MAX_DISPLAY_CLOCK_Mhz 1200 - -/* This returns the capabilities for a single DSC encoder engine. Number of slices and total throughput - * can be doubled, tripled etc. by using additional DSC engines. - */ -void dsc2_get_enc_caps(struct dsc_enc_caps *dsc_enc_caps, int pixel_clock_100Hz) -{ - dsc_enc_caps->dsc_version = 0x21; /* v1.2 - DP spec defined it in reverse order and we kept it */ - - dsc_enc_caps->slice_caps.bits.NUM_SLICES_1 = 1; - dsc_enc_caps->slice_caps.bits.NUM_SLICES_2 = 1; - dsc_enc_caps->slice_caps.bits.NUM_SLICES_3 = 1; - dsc_enc_caps->slice_caps.bits.NUM_SLICES_4 = 1; - - dsc_enc_caps->lb_bit_depth = 13; - dsc_enc_caps->is_block_pred_supported = true; - - dsc_enc_caps->color_formats.bits.RGB = 1; - dsc_enc_caps->color_formats.bits.YCBCR_444 = 1; - dsc_enc_caps->color_formats.bits.YCBCR_SIMPLE_422 = 1; - dsc_enc_caps->color_formats.bits.YCBCR_NATIVE_422 = 0; - dsc_enc_caps->color_formats.bits.YCBCR_NATIVE_420 = 1; - - dsc_enc_caps->color_depth.bits.COLOR_DEPTH_8_BPC = 1; - dsc_enc_caps->color_depth.bits.COLOR_DEPTH_10_BPC = 1; - dsc_enc_caps->color_depth.bits.COLOR_DEPTH_12_BPC = 1; - - /* Maximum total throughput with all the slices combined. This is different from how DP spec specifies it. - * Our decoder's total throughput in Pix/s is equal to DISPCLK. This is then shared between slices. - * The value below is the absolute maximum value. The actual throughput may be lower, but it'll always - * be sufficient to process the input pixel rate fed into a single DSC engine. - */ - dsc_enc_caps->max_total_throughput_mps = DCN20_MAX_DISPLAY_CLOCK_Mhz; - - /* For pixel clock bigger than a single-pipe limit we'll need two engines, which then doubles our - * throughput and number of slices, but also introduces a lower limit of 2 slices - */ - if (pixel_clock_100Hz >= DCN20_MAX_PIXEL_CLOCK_Mhz*10000) { - dsc_enc_caps->slice_caps.bits.NUM_SLICES_1 = 0; - dsc_enc_caps->slice_caps.bits.NUM_SLICES_8 = 1; - dsc_enc_caps->max_total_throughput_mps = DCN20_MAX_DISPLAY_CLOCK_Mhz * 2; - } - - /* For pixel clock bigger than a single-pipe limit needing four engines ODM 4:1, which then quardruples our - * throughput and number of slices - */ - if (pixel_clock_100Hz > DCN20_MAX_PIXEL_CLOCK_Mhz*10000*2) { - dsc_enc_caps->slice_caps.bits.NUM_SLICES_12 = 1; - dsc_enc_caps->slice_caps.bits.NUM_SLICES_16 = 1; - dsc_enc_caps->max_total_throughput_mps = DCN20_MAX_DISPLAY_CLOCK_Mhz * 4; - } - - dsc_enc_caps->max_slice_width = 5184; /* (including 64 overlap pixels for eDP MSO mode) */ - dsc_enc_caps->bpp_increment_div = 16; /* 1/16th of a bit */ -} - - -/* this function read dsc related register fields to be logged later in dcn10_log_hw_state - * into a dcn_dsc_state struct. - */ -static void dsc2_read_state(struct display_stream_compressor *dsc, struct dcn_dsc_state *s) -{ - struct dcn20_dsc *dsc20 = TO_DCN20_DSC(dsc); - - REG_GET(DSC_TOP_CONTROL, DSC_CLOCK_EN, &s->dsc_clock_en); - REG_GET(DSCC_PPS_CONFIG3, SLICE_WIDTH, &s->dsc_slice_width); - REG_GET(DSCC_PPS_CONFIG1, BITS_PER_PIXEL, &s->dsc_bits_per_pixel); - REG_GET(DSCC_PPS_CONFIG3, SLICE_HEIGHT, &s->dsc_slice_height); - REG_GET(DSCC_PPS_CONFIG1, CHUNK_SIZE, &s->dsc_chunk_size); - REG_GET(DSCC_PPS_CONFIG2, PIC_WIDTH, &s->dsc_pic_width); - REG_GET(DSCC_PPS_CONFIG2, PIC_HEIGHT, &s->dsc_pic_height); - REG_GET(DSCC_PPS_CONFIG7, SLICE_BPG_OFFSET, &s->dsc_slice_bpg_offset); - REG_GET_2(DSCRM_DSC_FORWARD_CONFIG, DSCRM_DSC_FORWARD_EN, &s->dsc_fw_en, - DSCRM_DSC_OPP_PIPE_SOURCE, &s->dsc_opp_source); -} - - -static bool dsc2_validate_stream(struct display_stream_compressor *dsc, const struct dsc_config *dsc_cfg) -{ - struct dsc_optc_config dsc_optc_cfg; - struct dcn20_dsc *dsc20 = TO_DCN20_DSC(dsc); - - if (dsc_cfg->pic_width > dsc20->max_image_width) - return false; - - return dsc_prepare_config(dsc_cfg, &dsc20->reg_vals, &dsc_optc_cfg); -} - - -void dsc_config_log(struct display_stream_compressor *dsc, const struct dsc_config *config) -{ - DC_LOG_DSC("\tnum_slices_h %d", config->dc_dsc_cfg.num_slices_h); - DC_LOG_DSC("\tnum_slices_v %d", config->dc_dsc_cfg.num_slices_v); - DC_LOG_DSC("\tbits_per_pixel %d (%d.%04d)", - config->dc_dsc_cfg.bits_per_pixel, - config->dc_dsc_cfg.bits_per_pixel / 16, - ((config->dc_dsc_cfg.bits_per_pixel % 16) * 10000) / 16); - DC_LOG_DSC("\tcolor_depth %d", config->color_depth); -} - -static void dsc2_set_config(struct display_stream_compressor *dsc, const struct dsc_config *dsc_cfg, - struct dsc_optc_config *dsc_optc_cfg) -{ - bool is_config_ok; - struct dcn20_dsc *dsc20 = TO_DCN20_DSC(dsc); - - DC_LOG_DSC("Setting DSC Config at DSC inst %d", dsc->inst); - dsc_config_log(dsc, dsc_cfg); - is_config_ok = dsc_prepare_config(dsc_cfg, &dsc20->reg_vals, dsc_optc_cfg); - ASSERT(is_config_ok); - DC_LOG_DSC("programming DSC Picture Parameter Set (PPS):"); - dsc_log_pps(dsc, &dsc20->reg_vals.pps); - dsc_write_to_registers(dsc, &dsc20->reg_vals); -} - - -bool dsc2_get_packed_pps(struct display_stream_compressor *dsc, const struct dsc_config *dsc_cfg, uint8_t *dsc_packed_pps) -{ - bool is_config_ok; - struct dsc_reg_values dsc_reg_vals; - struct dsc_optc_config dsc_optc_cfg; - - memset(&dsc_reg_vals, 0, sizeof(dsc_reg_vals)); - memset(&dsc_optc_cfg, 0, sizeof(dsc_optc_cfg)); - - DC_LOG_DSC("Getting packed DSC PPS for DSC Config:"); - dsc_config_log(dsc, dsc_cfg); - DC_LOG_DSC("DSC Picture Parameter Set (PPS):"); - is_config_ok = dsc_prepare_config(dsc_cfg, &dsc_reg_vals, &dsc_optc_cfg); - ASSERT(is_config_ok); - drm_dsc_pps_payload_pack((struct drm_dsc_picture_parameter_set *)dsc_packed_pps, &dsc_reg_vals.pps); - dsc_log_pps(dsc, &dsc_reg_vals.pps); - - return is_config_ok; -} - - -static void dsc2_enable(struct display_stream_compressor *dsc, int opp_pipe) -{ - struct dcn20_dsc *dsc20 = TO_DCN20_DSC(dsc); - int dsc_clock_en; - int dsc_fw_config; - int enabled_opp_pipe; - - DC_LOG_DSC("enable DSC %d at opp pipe %d", dsc->inst, opp_pipe); - - REG_GET(DSC_TOP_CONTROL, DSC_CLOCK_EN, &dsc_clock_en); - REG_GET_2(DSCRM_DSC_FORWARD_CONFIG, DSCRM_DSC_FORWARD_EN, &dsc_fw_config, DSCRM_DSC_OPP_PIPE_SOURCE, &enabled_opp_pipe); - if ((dsc_clock_en || dsc_fw_config) && enabled_opp_pipe != opp_pipe) { - DC_LOG_DSC("ERROR: DSC %d at opp pipe %d already enabled!", dsc->inst, enabled_opp_pipe); - ASSERT(0); - } - - REG_UPDATE(DSC_TOP_CONTROL, - DSC_CLOCK_EN, 1); - - REG_UPDATE_2(DSCRM_DSC_FORWARD_CONFIG, - DSCRM_DSC_FORWARD_EN, 1, - DSCRM_DSC_OPP_PIPE_SOURCE, opp_pipe); -} - - -static void dsc2_disable(struct display_stream_compressor *dsc) -{ - struct dcn20_dsc *dsc20 = TO_DCN20_DSC(dsc); - int dsc_clock_en; - int dsc_fw_config; - int enabled_opp_pipe; - - DC_LOG_DSC("disable DSC %d", dsc->inst); - - REG_GET(DSC_TOP_CONTROL, DSC_CLOCK_EN, &dsc_clock_en); - REG_GET_2(DSCRM_DSC_FORWARD_CONFIG, DSCRM_DSC_FORWARD_EN, &dsc_fw_config, DSCRM_DSC_OPP_PIPE_SOURCE, &enabled_opp_pipe); - if (!dsc_clock_en || !dsc_fw_config) { - DC_LOG_DSC("ERROR: DSC %d at opp pipe %d already disabled!", dsc->inst, enabled_opp_pipe); - ASSERT(0); - } - - REG_UPDATE(DSCRM_DSC_FORWARD_CONFIG, - DSCRM_DSC_FORWARD_EN, 0); - - REG_UPDATE(DSC_TOP_CONTROL, - DSC_CLOCK_EN, 0); -} - -static void dsc2_disconnect(struct display_stream_compressor *dsc) -{ - struct dcn20_dsc *dsc20 = TO_DCN20_DSC(dsc); - - DC_LOG_DSC("disconnect DSC %d", dsc->inst); - - REG_UPDATE(DSCRM_DSC_FORWARD_CONFIG, - DSCRM_DSC_FORWARD_EN, 0); -} - -/* This module's internal functions */ -void dsc_log_pps(struct display_stream_compressor *dsc, struct drm_dsc_config *pps) -{ - int i; - int bits_per_pixel = pps->bits_per_pixel; - - DC_LOG_DSC("\tdsc_version_major %d", pps->dsc_version_major); - DC_LOG_DSC("\tdsc_version_minor %d", pps->dsc_version_minor); - DC_LOG_DSC("\tbits_per_component %d", pps->bits_per_component); - DC_LOG_DSC("\tline_buf_depth %d", pps->line_buf_depth); - DC_LOG_DSC("\tblock_pred_enable %d", pps->block_pred_enable); - DC_LOG_DSC("\tconvert_rgb %d", pps->convert_rgb); - DC_LOG_DSC("\tsimple_422 %d", pps->simple_422); - DC_LOG_DSC("\tvbr_enable %d", pps->vbr_enable); - DC_LOG_DSC("\tbits_per_pixel %d (%d.%04d)", bits_per_pixel, bits_per_pixel / 16, ((bits_per_pixel % 16) * 10000) / 16); - DC_LOG_DSC("\tpic_height %d", pps->pic_height); - DC_LOG_DSC("\tpic_width %d", pps->pic_width); - DC_LOG_DSC("\tslice_height %d", pps->slice_height); - DC_LOG_DSC("\tslice_width %d", pps->slice_width); - DC_LOG_DSC("\tslice_chunk_size %d", pps->slice_chunk_size); - DC_LOG_DSC("\tinitial_xmit_delay %d", pps->initial_xmit_delay); - DC_LOG_DSC("\tinitial_dec_delay %d", pps->initial_dec_delay); - DC_LOG_DSC("\tinitial_scale_value %d", pps->initial_scale_value); - DC_LOG_DSC("\tscale_increment_interval %d", pps->scale_increment_interval); - DC_LOG_DSC("\tscale_decrement_interval %d", pps->scale_decrement_interval); - DC_LOG_DSC("\tfirst_line_bpg_offset %d", pps->first_line_bpg_offset); - DC_LOG_DSC("\tnfl_bpg_offset %d", pps->nfl_bpg_offset); - DC_LOG_DSC("\tslice_bpg_offset %d", pps->slice_bpg_offset); - DC_LOG_DSC("\tinitial_offset %d", pps->initial_offset); - DC_LOG_DSC("\tfinal_offset %d", pps->final_offset); - DC_LOG_DSC("\tflatness_min_qp %d", pps->flatness_min_qp); - DC_LOG_DSC("\tflatness_max_qp %d", pps->flatness_max_qp); - /* DC_LOG_DSC("\trc_parameter_set %d", pps->rc_parameter_set); */ - DC_LOG_DSC("\tnative_420 %d", pps->native_420); - DC_LOG_DSC("\tnative_422 %d", pps->native_422); - DC_LOG_DSC("\tsecond_line_bpg_offset %d", pps->second_line_bpg_offset); - DC_LOG_DSC("\tnsl_bpg_offset %d", pps->nsl_bpg_offset); - DC_LOG_DSC("\tsecond_line_offset_adj %d", pps->second_line_offset_adj); - DC_LOG_DSC("\trc_model_size %d", pps->rc_model_size); - DC_LOG_DSC("\trc_edge_factor %d", pps->rc_edge_factor); - DC_LOG_DSC("\trc_quant_incr_limit0 %d", pps->rc_quant_incr_limit0); - DC_LOG_DSC("\trc_quant_incr_limit1 %d", pps->rc_quant_incr_limit1); - DC_LOG_DSC("\trc_tgt_offset_high %d", pps->rc_tgt_offset_high); - DC_LOG_DSC("\trc_tgt_offset_low %d", pps->rc_tgt_offset_low); - - for (i = 0; i < NUM_BUF_RANGES - 1; i++) - DC_LOG_DSC("\trc_buf_thresh[%d] %d", i, pps->rc_buf_thresh[i]); - - for (i = 0; i < NUM_BUF_RANGES; i++) { - DC_LOG_DSC("\trc_range_parameters[%d].range_min_qp %d", i, pps->rc_range_params[i].range_min_qp); - DC_LOG_DSC("\trc_range_parameters[%d].range_max_qp %d", i, pps->rc_range_params[i].range_max_qp); - DC_LOG_DSC("\trc_range_parameters[%d].range_bpg_offset %d", i, pps->rc_range_params[i].range_bpg_offset); - } -} - -void dsc_override_rc_params(struct rc_params *rc, const struct dc_dsc_rc_params_override *override) -{ - uint8_t i; - - rc->rc_model_size = override->rc_model_size; - for (i = 0; i < DC_DSC_RC_BUF_THRESH_SIZE; i++) - rc->rc_buf_thresh[i] = override->rc_buf_thresh[i]; - for (i = 0; i < DC_DSC_QP_SET_SIZE; i++) { - rc->qp_min[i] = override->rc_minqp[i]; - rc->qp_max[i] = override->rc_maxqp[i]; - rc->ofs[i] = override->rc_offset[i]; - } - - rc->rc_tgt_offset_hi = override->rc_tgt_offset_hi; - rc->rc_tgt_offset_lo = override->rc_tgt_offset_lo; - rc->rc_edge_factor = override->rc_edge_factor; - rc->rc_quant_incr_limit0 = override->rc_quant_incr_limit0; - rc->rc_quant_incr_limit1 = override->rc_quant_incr_limit1; - - rc->initial_fullness_offset = override->initial_fullness_offset; - rc->initial_xmit_delay = override->initial_delay; - - rc->flatness_min_qp = override->flatness_min_qp; - rc->flatness_max_qp = override->flatness_max_qp; - rc->flatness_det_thresh = override->flatness_det_thresh; -} - -bool dsc_prepare_config(const struct dsc_config *dsc_cfg, struct dsc_reg_values *dsc_reg_vals, - struct dsc_optc_config *dsc_optc_cfg) -{ - struct dsc_parameters dsc_params; - struct rc_params rc; - - /* Validate input parameters */ - ASSERT(dsc_cfg->dc_dsc_cfg.num_slices_h); - ASSERT(dsc_cfg->dc_dsc_cfg.num_slices_v); - ASSERT(dsc_cfg->dc_dsc_cfg.version_minor == 1 || dsc_cfg->dc_dsc_cfg.version_minor == 2); - ASSERT(dsc_cfg->pic_width); - ASSERT(dsc_cfg->pic_height); - ASSERT((dsc_cfg->dc_dsc_cfg.version_minor == 1 && - (8 <= dsc_cfg->dc_dsc_cfg.linebuf_depth && dsc_cfg->dc_dsc_cfg.linebuf_depth <= 13)) || - (dsc_cfg->dc_dsc_cfg.version_minor == 2 && - ((8 <= dsc_cfg->dc_dsc_cfg.linebuf_depth && dsc_cfg->dc_dsc_cfg.linebuf_depth <= 15) || - dsc_cfg->dc_dsc_cfg.linebuf_depth == 0))); - ASSERT(96 <= dsc_cfg->dc_dsc_cfg.bits_per_pixel && dsc_cfg->dc_dsc_cfg.bits_per_pixel <= 0x3ff); // 6.0 <= bits_per_pixel <= 63.9375 - - if (!dsc_cfg->dc_dsc_cfg.num_slices_v || !dsc_cfg->dc_dsc_cfg.num_slices_h || - !(dsc_cfg->dc_dsc_cfg.version_minor == 1 || dsc_cfg->dc_dsc_cfg.version_minor == 2) || - !dsc_cfg->pic_width || !dsc_cfg->pic_height || - !((dsc_cfg->dc_dsc_cfg.version_minor == 1 && // v1.1 line buffer depth range: - 8 <= dsc_cfg->dc_dsc_cfg.linebuf_depth && dsc_cfg->dc_dsc_cfg.linebuf_depth <= 13) || - (dsc_cfg->dc_dsc_cfg.version_minor == 2 && // v1.2 line buffer depth range: - ((8 <= dsc_cfg->dc_dsc_cfg.linebuf_depth && dsc_cfg->dc_dsc_cfg.linebuf_depth <= 15) || - dsc_cfg->dc_dsc_cfg.linebuf_depth == 0))) || - !(96 <= dsc_cfg->dc_dsc_cfg.bits_per_pixel && dsc_cfg->dc_dsc_cfg.bits_per_pixel <= 0x3ff)) { - dm_output_to_console("%s: Invalid parameters\n", __func__); - return false; - } - - dsc_init_reg_values(dsc_reg_vals); - - /* Copy input config */ - dsc_reg_vals->pixel_format = dsc_dc_pixel_encoding_to_dsc_pixel_format(dsc_cfg->pixel_encoding, dsc_cfg->dc_dsc_cfg.ycbcr422_simple); - dsc_reg_vals->num_slices_h = dsc_cfg->dc_dsc_cfg.num_slices_h; - dsc_reg_vals->num_slices_v = dsc_cfg->dc_dsc_cfg.num_slices_v; - dsc_reg_vals->pps.dsc_version_minor = dsc_cfg->dc_dsc_cfg.version_minor; - dsc_reg_vals->pps.pic_width = dsc_cfg->pic_width; - dsc_reg_vals->pps.pic_height = dsc_cfg->pic_height; - dsc_reg_vals->pps.bits_per_component = dsc_dc_color_depth_to_dsc_bits_per_comp(dsc_cfg->color_depth); - dsc_reg_vals->pps.block_pred_enable = dsc_cfg->dc_dsc_cfg.block_pred_enable; - dsc_reg_vals->pps.line_buf_depth = dsc_cfg->dc_dsc_cfg.linebuf_depth; - dsc_reg_vals->alternate_ich_encoding_en = dsc_reg_vals->pps.dsc_version_minor == 1 ? 0 : 1; - dsc_reg_vals->ich_reset_at_eol = (dsc_cfg->is_odm || dsc_reg_vals->num_slices_h > 1) ? 0xF : 0; - - // TODO: in addition to validating slice height (pic height must be divisible by slice height), - // see what happens when the same condition doesn't apply for slice_width/pic_width. - dsc_reg_vals->pps.slice_width = dsc_cfg->pic_width / dsc_cfg->dc_dsc_cfg.num_slices_h; - dsc_reg_vals->pps.slice_height = dsc_cfg->pic_height / dsc_cfg->dc_dsc_cfg.num_slices_v; - - ASSERT(dsc_reg_vals->pps.slice_height * dsc_cfg->dc_dsc_cfg.num_slices_v == dsc_cfg->pic_height); - if (!(dsc_reg_vals->pps.slice_height * dsc_cfg->dc_dsc_cfg.num_slices_v == dsc_cfg->pic_height)) { - dm_output_to_console("%s: pix height %d not divisible by num_slices_v %d\n\n", __func__, dsc_cfg->pic_height, dsc_cfg->dc_dsc_cfg.num_slices_v); - return false; - } - - dsc_reg_vals->bpp_x32 = dsc_cfg->dc_dsc_cfg.bits_per_pixel << 1; - if (dsc_reg_vals->pixel_format == DSC_PIXFMT_NATIVE_YCBCR420 || dsc_reg_vals->pixel_format == DSC_PIXFMT_NATIVE_YCBCR422) - dsc_reg_vals->pps.bits_per_pixel = dsc_reg_vals->bpp_x32; - else - dsc_reg_vals->pps.bits_per_pixel = dsc_reg_vals->bpp_x32 >> 1; - - dsc_reg_vals->pps.convert_rgb = dsc_reg_vals->pixel_format == DSC_PIXFMT_RGB ? 1 : 0; - dsc_reg_vals->pps.native_422 = (dsc_reg_vals->pixel_format == DSC_PIXFMT_NATIVE_YCBCR422); - dsc_reg_vals->pps.native_420 = (dsc_reg_vals->pixel_format == DSC_PIXFMT_NATIVE_YCBCR420); - dsc_reg_vals->pps.simple_422 = (dsc_reg_vals->pixel_format == DSC_PIXFMT_SIMPLE_YCBCR422); - - calc_rc_params(&rc, &dsc_reg_vals->pps); - - if (dsc_cfg->dc_dsc_cfg.rc_params_ovrd) - dsc_override_rc_params(&rc, dsc_cfg->dc_dsc_cfg.rc_params_ovrd); - - if (dscc_compute_dsc_parameters(&dsc_reg_vals->pps, &rc, &dsc_params)) { - dm_output_to_console("%s: DSC config failed\n", __func__); - return false; - } - - dsc_update_from_dsc_parameters(dsc_reg_vals, &dsc_params); - - dsc_optc_cfg->bytes_per_pixel = dsc_params.bytes_per_pixel; - dsc_optc_cfg->slice_width = dsc_reg_vals->pps.slice_width; - dsc_optc_cfg->is_pixel_format_444 = dsc_reg_vals->pixel_format == DSC_PIXFMT_RGB || - dsc_reg_vals->pixel_format == DSC_PIXFMT_YCBCR444 || - dsc_reg_vals->pixel_format == DSC_PIXFMT_SIMPLE_YCBCR422; - - return true; -} - - -enum dsc_pixel_format dsc_dc_pixel_encoding_to_dsc_pixel_format(enum dc_pixel_encoding dc_pix_enc, bool is_ycbcr422_simple) -{ - enum dsc_pixel_format dsc_pix_fmt = DSC_PIXFMT_UNKNOWN; - - /* NOTE: We don't support DSC_PIXFMT_SIMPLE_YCBCR422 */ - - switch (dc_pix_enc) { - case PIXEL_ENCODING_RGB: - dsc_pix_fmt = DSC_PIXFMT_RGB; - break; - case PIXEL_ENCODING_YCBCR422: - if (is_ycbcr422_simple) - dsc_pix_fmt = DSC_PIXFMT_SIMPLE_YCBCR422; - else - dsc_pix_fmt = DSC_PIXFMT_NATIVE_YCBCR422; - break; - case PIXEL_ENCODING_YCBCR444: - dsc_pix_fmt = DSC_PIXFMT_YCBCR444; - break; - case PIXEL_ENCODING_YCBCR420: - dsc_pix_fmt = DSC_PIXFMT_NATIVE_YCBCR420; - break; - default: - dsc_pix_fmt = DSC_PIXFMT_UNKNOWN; - break; - } - - ASSERT(dsc_pix_fmt != DSC_PIXFMT_UNKNOWN); - return dsc_pix_fmt; -} - - -enum dsc_bits_per_comp dsc_dc_color_depth_to_dsc_bits_per_comp(enum dc_color_depth dc_color_depth) -{ - enum dsc_bits_per_comp bpc = DSC_BPC_UNKNOWN; - - switch (dc_color_depth) { - case COLOR_DEPTH_888: - bpc = DSC_BPC_8; - break; - case COLOR_DEPTH_101010: - bpc = DSC_BPC_10; - break; - case COLOR_DEPTH_121212: - bpc = DSC_BPC_12; - break; - default: - bpc = DSC_BPC_UNKNOWN; - break; - } - - return bpc; -} - - -void dsc_init_reg_values(struct dsc_reg_values *reg_vals) -{ - int i; - - memset(reg_vals, 0, sizeof(struct dsc_reg_values)); - - /* Non-PPS values */ - reg_vals->dsc_clock_enable = 1; - reg_vals->dsc_clock_gating_disable = 0; - reg_vals->underflow_recovery_en = 0; - reg_vals->underflow_occurred_int_en = 0; - reg_vals->underflow_occurred_status = 0; - reg_vals->ich_reset_at_eol = 0; - reg_vals->alternate_ich_encoding_en = 0; - reg_vals->rc_buffer_model_size = 0; - /*reg_vals->disable_ich = 0;*/ - reg_vals->dsc_dbg_en = 0; - - for (i = 0; i < 4; i++) - reg_vals->rc_buffer_model_overflow_int_en[i] = 0; - - /* PPS values */ - reg_vals->pps.dsc_version_minor = 2; - reg_vals->pps.dsc_version_major = 1; - reg_vals->pps.line_buf_depth = 9; - reg_vals->pps.bits_per_component = 8; - reg_vals->pps.block_pred_enable = 1; - reg_vals->pps.slice_chunk_size = 0; - reg_vals->pps.pic_width = 0; - reg_vals->pps.pic_height = 0; - reg_vals->pps.slice_width = 0; - reg_vals->pps.slice_height = 0; - reg_vals->pps.initial_xmit_delay = 170; - reg_vals->pps.initial_dec_delay = 0; - reg_vals->pps.initial_scale_value = 0; - reg_vals->pps.scale_increment_interval = 0; - reg_vals->pps.scale_decrement_interval = 0; - reg_vals->pps.nfl_bpg_offset = 0; - reg_vals->pps.slice_bpg_offset = 0; - reg_vals->pps.nsl_bpg_offset = 0; - reg_vals->pps.initial_offset = 6144; - reg_vals->pps.final_offset = 0; - reg_vals->pps.flatness_min_qp = 3; - reg_vals->pps.flatness_max_qp = 12; - reg_vals->pps.rc_model_size = 8192; - reg_vals->pps.rc_edge_factor = 6; - reg_vals->pps.rc_quant_incr_limit0 = 11; - reg_vals->pps.rc_quant_incr_limit1 = 11; - reg_vals->pps.rc_tgt_offset_low = 3; - reg_vals->pps.rc_tgt_offset_high = 3; -} - -/* Updates dsc_reg_values::reg_vals::xxx fields based on the values from computed params. - * This is required because dscc_compute_dsc_parameters returns a modified PPS, which in turn - * affects non-PPS register values. - */ -void dsc_update_from_dsc_parameters(struct dsc_reg_values *reg_vals, const struct dsc_parameters *dsc_params) -{ - int i; - - reg_vals->pps = dsc_params->pps; - - // pps_computed will have the "expanded" values; need to shift them to make them fit for regs. - for (i = 0; i < NUM_BUF_RANGES - 1; i++) - reg_vals->pps.rc_buf_thresh[i] = reg_vals->pps.rc_buf_thresh[i] >> 6; - - reg_vals->rc_buffer_model_size = dsc_params->rc_buffer_model_size; -} - -static void dsc_write_to_registers(struct display_stream_compressor *dsc, const struct dsc_reg_values *reg_vals) -{ - uint32_t temp_int; - struct dcn20_dsc *dsc20 = TO_DCN20_DSC(dsc); - - REG_SET(DSC_DEBUG_CONTROL, 0, - DSC_DBG_EN, reg_vals->dsc_dbg_en); - - // dsccif registers - REG_SET_5(DSCCIF_CONFIG0, 0, - INPUT_INTERFACE_UNDERFLOW_RECOVERY_EN, reg_vals->underflow_recovery_en, - INPUT_INTERFACE_UNDERFLOW_OCCURRED_INT_EN, reg_vals->underflow_occurred_int_en, - INPUT_INTERFACE_UNDERFLOW_OCCURRED_STATUS, reg_vals->underflow_occurred_status, - INPUT_PIXEL_FORMAT, reg_vals->pixel_format, - DSCCIF_CONFIG0__BITS_PER_COMPONENT, reg_vals->pps.bits_per_component); - - REG_SET_2(DSCCIF_CONFIG1, 0, - PIC_WIDTH, reg_vals->pps.pic_width, - PIC_HEIGHT, reg_vals->pps.pic_height); - - // dscc registers - if (dsc20->dsc_mask->ICH_RESET_AT_END_OF_LINE == 0) { - REG_SET_3(DSCC_CONFIG0, 0, - NUMBER_OF_SLICES_PER_LINE, reg_vals->num_slices_h - 1, - ALTERNATE_ICH_ENCODING_EN, reg_vals->alternate_ich_encoding_en, - NUMBER_OF_SLICES_IN_VERTICAL_DIRECTION, reg_vals->num_slices_v - 1); - } else { - REG_SET_4(DSCC_CONFIG0, 0, ICH_RESET_AT_END_OF_LINE, - reg_vals->ich_reset_at_eol, NUMBER_OF_SLICES_PER_LINE, - reg_vals->num_slices_h - 1, ALTERNATE_ICH_ENCODING_EN, - reg_vals->alternate_ich_encoding_en, NUMBER_OF_SLICES_IN_VERTICAL_DIRECTION, - reg_vals->num_slices_v - 1); - } - - REG_SET(DSCC_CONFIG1, 0, - DSCC_RATE_CONTROL_BUFFER_MODEL_SIZE, reg_vals->rc_buffer_model_size); - /*REG_SET_2(DSCC_CONFIG1, 0, - DSCC_RATE_CONTROL_BUFFER_MODEL_SIZE, reg_vals->rc_buffer_model_size, - DSCC_DISABLE_ICH, reg_vals->disable_ich);*/ - - REG_SET_4(DSCC_INTERRUPT_CONTROL_STATUS, 0, - DSCC_RATE_CONTROL_BUFFER_MODEL0_OVERFLOW_OCCURRED_INT_EN, reg_vals->rc_buffer_model_overflow_int_en[0], - DSCC_RATE_CONTROL_BUFFER_MODEL1_OVERFLOW_OCCURRED_INT_EN, reg_vals->rc_buffer_model_overflow_int_en[1], - DSCC_RATE_CONTROL_BUFFER_MODEL2_OVERFLOW_OCCURRED_INT_EN, reg_vals->rc_buffer_model_overflow_int_en[2], - DSCC_RATE_CONTROL_BUFFER_MODEL3_OVERFLOW_OCCURRED_INT_EN, reg_vals->rc_buffer_model_overflow_int_en[3]); - - REG_SET_3(DSCC_PPS_CONFIG0, 0, - DSC_VERSION_MINOR, reg_vals->pps.dsc_version_minor, - LINEBUF_DEPTH, reg_vals->pps.line_buf_depth, - DSCC_PPS_CONFIG0__BITS_PER_COMPONENT, reg_vals->pps.bits_per_component); - - if (reg_vals->pixel_format == DSC_PIXFMT_NATIVE_YCBCR420 || reg_vals->pixel_format == DSC_PIXFMT_NATIVE_YCBCR422) - temp_int = reg_vals->bpp_x32; - else - temp_int = reg_vals->bpp_x32 >> 1; - - REG_SET_7(DSCC_PPS_CONFIG1, 0, - BITS_PER_PIXEL, temp_int, - SIMPLE_422, reg_vals->pixel_format == DSC_PIXFMT_SIMPLE_YCBCR422, - CONVERT_RGB, reg_vals->pixel_format == DSC_PIXFMT_RGB, - BLOCK_PRED_ENABLE, reg_vals->pps.block_pred_enable, - NATIVE_422, reg_vals->pixel_format == DSC_PIXFMT_NATIVE_YCBCR422, - NATIVE_420, reg_vals->pixel_format == DSC_PIXFMT_NATIVE_YCBCR420, - CHUNK_SIZE, reg_vals->pps.slice_chunk_size); - - REG_SET_2(DSCC_PPS_CONFIG2, 0, - PIC_WIDTH, reg_vals->pps.pic_width, - PIC_HEIGHT, reg_vals->pps.pic_height); - - REG_SET_2(DSCC_PPS_CONFIG3, 0, - SLICE_WIDTH, reg_vals->pps.slice_width, - SLICE_HEIGHT, reg_vals->pps.slice_height); - - REG_SET(DSCC_PPS_CONFIG4, 0, - INITIAL_XMIT_DELAY, reg_vals->pps.initial_xmit_delay); - - REG_SET_2(DSCC_PPS_CONFIG5, 0, - INITIAL_SCALE_VALUE, reg_vals->pps.initial_scale_value, - SCALE_INCREMENT_INTERVAL, reg_vals->pps.scale_increment_interval); - - REG_SET_3(DSCC_PPS_CONFIG6, 0, - SCALE_DECREMENT_INTERVAL, reg_vals->pps.scale_decrement_interval, - FIRST_LINE_BPG_OFFSET, reg_vals->pps.first_line_bpg_offset, - SECOND_LINE_BPG_OFFSET, reg_vals->pps.second_line_bpg_offset); - - REG_SET_2(DSCC_PPS_CONFIG7, 0, - NFL_BPG_OFFSET, reg_vals->pps.nfl_bpg_offset, - SLICE_BPG_OFFSET, reg_vals->pps.slice_bpg_offset); - - REG_SET_2(DSCC_PPS_CONFIG8, 0, - NSL_BPG_OFFSET, reg_vals->pps.nsl_bpg_offset, - SECOND_LINE_OFFSET_ADJ, reg_vals->pps.second_line_offset_adj); - - REG_SET_2(DSCC_PPS_CONFIG9, 0, - INITIAL_OFFSET, reg_vals->pps.initial_offset, - FINAL_OFFSET, reg_vals->pps.final_offset); - - REG_SET_3(DSCC_PPS_CONFIG10, 0, - FLATNESS_MIN_QP, reg_vals->pps.flatness_min_qp, - FLATNESS_MAX_QP, reg_vals->pps.flatness_max_qp, - RC_MODEL_SIZE, reg_vals->pps.rc_model_size); - - REG_SET_5(DSCC_PPS_CONFIG11, 0, - RC_EDGE_FACTOR, reg_vals->pps.rc_edge_factor, - RC_QUANT_INCR_LIMIT0, reg_vals->pps.rc_quant_incr_limit0, - RC_QUANT_INCR_LIMIT1, reg_vals->pps.rc_quant_incr_limit1, - RC_TGT_OFFSET_LO, reg_vals->pps.rc_tgt_offset_low, - RC_TGT_OFFSET_HI, reg_vals->pps.rc_tgt_offset_high); - - REG_SET_4(DSCC_PPS_CONFIG12, 0, - RC_BUF_THRESH0, reg_vals->pps.rc_buf_thresh[0], - RC_BUF_THRESH1, reg_vals->pps.rc_buf_thresh[1], - RC_BUF_THRESH2, reg_vals->pps.rc_buf_thresh[2], - RC_BUF_THRESH3, reg_vals->pps.rc_buf_thresh[3]); - - REG_SET_4(DSCC_PPS_CONFIG13, 0, - RC_BUF_THRESH4, reg_vals->pps.rc_buf_thresh[4], - RC_BUF_THRESH5, reg_vals->pps.rc_buf_thresh[5], - RC_BUF_THRESH6, reg_vals->pps.rc_buf_thresh[6], - RC_BUF_THRESH7, reg_vals->pps.rc_buf_thresh[7]); - - REG_SET_4(DSCC_PPS_CONFIG14, 0, - RC_BUF_THRESH8, reg_vals->pps.rc_buf_thresh[8], - RC_BUF_THRESH9, reg_vals->pps.rc_buf_thresh[9], - RC_BUF_THRESH10, reg_vals->pps.rc_buf_thresh[10], - RC_BUF_THRESH11, reg_vals->pps.rc_buf_thresh[11]); - - REG_SET_5(DSCC_PPS_CONFIG15, 0, - RC_BUF_THRESH12, reg_vals->pps.rc_buf_thresh[12], - RC_BUF_THRESH13, reg_vals->pps.rc_buf_thresh[13], - RANGE_MIN_QP0, reg_vals->pps.rc_range_params[0].range_min_qp, - RANGE_MAX_QP0, reg_vals->pps.rc_range_params[0].range_max_qp, - RANGE_BPG_OFFSET0, reg_vals->pps.rc_range_params[0].range_bpg_offset); - - REG_SET_6(DSCC_PPS_CONFIG16, 0, - RANGE_MIN_QP1, reg_vals->pps.rc_range_params[1].range_min_qp, - RANGE_MAX_QP1, reg_vals->pps.rc_range_params[1].range_max_qp, - RANGE_BPG_OFFSET1, reg_vals->pps.rc_range_params[1].range_bpg_offset, - RANGE_MIN_QP2, reg_vals->pps.rc_range_params[2].range_min_qp, - RANGE_MAX_QP2, reg_vals->pps.rc_range_params[2].range_max_qp, - RANGE_BPG_OFFSET2, reg_vals->pps.rc_range_params[2].range_bpg_offset); - - REG_SET_6(DSCC_PPS_CONFIG17, 0, - RANGE_MIN_QP3, reg_vals->pps.rc_range_params[3].range_min_qp, - RANGE_MAX_QP3, reg_vals->pps.rc_range_params[3].range_max_qp, - RANGE_BPG_OFFSET3, reg_vals->pps.rc_range_params[3].range_bpg_offset, - RANGE_MIN_QP4, reg_vals->pps.rc_range_params[4].range_min_qp, - RANGE_MAX_QP4, reg_vals->pps.rc_range_params[4].range_max_qp, - RANGE_BPG_OFFSET4, reg_vals->pps.rc_range_params[4].range_bpg_offset); - - REG_SET_6(DSCC_PPS_CONFIG18, 0, - RANGE_MIN_QP5, reg_vals->pps.rc_range_params[5].range_min_qp, - RANGE_MAX_QP5, reg_vals->pps.rc_range_params[5].range_max_qp, - RANGE_BPG_OFFSET5, reg_vals->pps.rc_range_params[5].range_bpg_offset, - RANGE_MIN_QP6, reg_vals->pps.rc_range_params[6].range_min_qp, - RANGE_MAX_QP6, reg_vals->pps.rc_range_params[6].range_max_qp, - RANGE_BPG_OFFSET6, reg_vals->pps.rc_range_params[6].range_bpg_offset); - - REG_SET_6(DSCC_PPS_CONFIG19, 0, - RANGE_MIN_QP7, reg_vals->pps.rc_range_params[7].range_min_qp, - RANGE_MAX_QP7, reg_vals->pps.rc_range_params[7].range_max_qp, - RANGE_BPG_OFFSET7, reg_vals->pps.rc_range_params[7].range_bpg_offset, - RANGE_MIN_QP8, reg_vals->pps.rc_range_params[8].range_min_qp, - RANGE_MAX_QP8, reg_vals->pps.rc_range_params[8].range_max_qp, - RANGE_BPG_OFFSET8, reg_vals->pps.rc_range_params[8].range_bpg_offset); - - REG_SET_6(DSCC_PPS_CONFIG20, 0, - RANGE_MIN_QP9, reg_vals->pps.rc_range_params[9].range_min_qp, - RANGE_MAX_QP9, reg_vals->pps.rc_range_params[9].range_max_qp, - RANGE_BPG_OFFSET9, reg_vals->pps.rc_range_params[9].range_bpg_offset, - RANGE_MIN_QP10, reg_vals->pps.rc_range_params[10].range_min_qp, - RANGE_MAX_QP10, reg_vals->pps.rc_range_params[10].range_max_qp, - RANGE_BPG_OFFSET10, reg_vals->pps.rc_range_params[10].range_bpg_offset); - - REG_SET_6(DSCC_PPS_CONFIG21, 0, - RANGE_MIN_QP11, reg_vals->pps.rc_range_params[11].range_min_qp, - RANGE_MAX_QP11, reg_vals->pps.rc_range_params[11].range_max_qp, - RANGE_BPG_OFFSET11, reg_vals->pps.rc_range_params[11].range_bpg_offset, - RANGE_MIN_QP12, reg_vals->pps.rc_range_params[12].range_min_qp, - RANGE_MAX_QP12, reg_vals->pps.rc_range_params[12].range_max_qp, - RANGE_BPG_OFFSET12, reg_vals->pps.rc_range_params[12].range_bpg_offset); - - REG_SET_6(DSCC_PPS_CONFIG22, 0, - RANGE_MIN_QP13, reg_vals->pps.rc_range_params[13].range_min_qp, - RANGE_MAX_QP13, reg_vals->pps.rc_range_params[13].range_max_qp, - RANGE_BPG_OFFSET13, reg_vals->pps.rc_range_params[13].range_bpg_offset, - RANGE_MIN_QP14, reg_vals->pps.rc_range_params[14].range_min_qp, - RANGE_MAX_QP14, reg_vals->pps.rc_range_params[14].range_max_qp, - RANGE_BPG_OFFSET14, reg_vals->pps.rc_range_params[14].range_bpg_offset); - -} - diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.h deleted file mode 100644 index ba869387c3c5..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.h +++ /dev/null @@ -1,589 +0,0 @@ -/* Copyright 2017 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ -#ifndef __DCN20_DSC_H__ -#define __DCN20_DSC_H__ - -#include "dsc.h" -#include "dsc/dscc_types.h" -#include - -#define TO_DCN20_DSC(dsc)\ - container_of(dsc, struct dcn20_dsc, base) - -#define DSC_REG_LIST_DCN20(id) \ - SRI(DSC_TOP_CONTROL, DSC_TOP, id),\ - SRI(DSC_DEBUG_CONTROL, DSC_TOP, id),\ - SRI(DSCC_CONFIG0, DSCC, id),\ - SRI(DSCC_CONFIG1, DSCC, id),\ - SRI(DSCC_STATUS, DSCC, id),\ - SRI(DSCC_INTERRUPT_CONTROL_STATUS, DSCC, id),\ - SRI(DSCC_PPS_CONFIG0, DSCC, id),\ - SRI(DSCC_PPS_CONFIG1, DSCC, id),\ - SRI(DSCC_PPS_CONFIG2, DSCC, id),\ - SRI(DSCC_PPS_CONFIG3, DSCC, id),\ - SRI(DSCC_PPS_CONFIG4, DSCC, id),\ - SRI(DSCC_PPS_CONFIG5, DSCC, id),\ - SRI(DSCC_PPS_CONFIG6, DSCC, id),\ - SRI(DSCC_PPS_CONFIG7, DSCC, id),\ - SRI(DSCC_PPS_CONFIG8, DSCC, id),\ - SRI(DSCC_PPS_CONFIG9, DSCC, id),\ - SRI(DSCC_PPS_CONFIG10, DSCC, id),\ - SRI(DSCC_PPS_CONFIG11, DSCC, id),\ - SRI(DSCC_PPS_CONFIG12, DSCC, id),\ - SRI(DSCC_PPS_CONFIG13, DSCC, id),\ - SRI(DSCC_PPS_CONFIG14, DSCC, id),\ - SRI(DSCC_PPS_CONFIG15, DSCC, id),\ - SRI(DSCC_PPS_CONFIG16, DSCC, id),\ - SRI(DSCC_PPS_CONFIG17, DSCC, id),\ - SRI(DSCC_PPS_CONFIG18, DSCC, id),\ - SRI(DSCC_PPS_CONFIG19, DSCC, id),\ - SRI(DSCC_PPS_CONFIG20, DSCC, id),\ - SRI(DSCC_PPS_CONFIG21, DSCC, id),\ - SRI(DSCC_PPS_CONFIG22, DSCC, id),\ - SRI(DSCC_MEM_POWER_CONTROL, DSCC, id),\ - SRI(DSCC_R_Y_SQUARED_ERROR_LOWER, DSCC, id),\ - SRI(DSCC_R_Y_SQUARED_ERROR_UPPER, DSCC, id),\ - SRI(DSCC_G_CB_SQUARED_ERROR_LOWER, DSCC, id),\ - SRI(DSCC_G_CB_SQUARED_ERROR_UPPER, DSCC, id),\ - SRI(DSCC_B_CR_SQUARED_ERROR_LOWER, DSCC, id),\ - SRI(DSCC_B_CR_SQUARED_ERROR_UPPER, DSCC, id),\ - SRI(DSCC_MAX_ABS_ERROR0, DSCC, id),\ - SRI(DSCC_MAX_ABS_ERROR1, DSCC, id),\ - SRI(DSCC_RATE_BUFFER0_MAX_FULLNESS_LEVEL, DSCC, id),\ - SRI(DSCC_RATE_BUFFER1_MAX_FULLNESS_LEVEL, DSCC, id),\ - SRI(DSCC_RATE_BUFFER2_MAX_FULLNESS_LEVEL, DSCC, id),\ - SRI(DSCC_RATE_BUFFER3_MAX_FULLNESS_LEVEL, DSCC, id),\ - SRI(DSCC_RATE_CONTROL_BUFFER0_MAX_FULLNESS_LEVEL, DSCC, id),\ - SRI(DSCC_RATE_CONTROL_BUFFER1_MAX_FULLNESS_LEVEL, DSCC, id),\ - SRI(DSCC_RATE_CONTROL_BUFFER2_MAX_FULLNESS_LEVEL, DSCC, id),\ - SRI(DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL, DSCC, id),\ - SRI(DSCCIF_CONFIG0, DSCCIF, id),\ - SRI(DSCCIF_CONFIG1, DSCCIF, id),\ - SRI(DSCRM_DSC_FORWARD_CONFIG, DSCRM, id) - - -#define DSC_SF(reg_name, field_name, post_fix)\ - .field_name = reg_name ## __ ## field_name ## post_fix - -//Used in resolving the corner case with duplicate field name -#define DSC2_SF(reg_name, field_name, post_fix)\ - .field_name = reg_name ## _ ## field_name ## post_fix - -#define DSC_REG_LIST_SH_MASK_DCN20(mask_sh)\ - DSC_SF(DSC_TOP0_DSC_TOP_CONTROL, DSC_CLOCK_EN, mask_sh), \ - DSC_SF(DSC_TOP0_DSC_TOP_CONTROL, DSC_DISPCLK_R_GATE_DIS, mask_sh), \ - DSC_SF(DSC_TOP0_DSC_TOP_CONTROL, DSC_DSCCLK_R_GATE_DIS, mask_sh), \ - DSC_SF(DSC_TOP0_DSC_DEBUG_CONTROL, DSC_DBG_EN, mask_sh), \ - DSC_SF(DSCC0_DSCC_CONFIG0, ICH_RESET_AT_END_OF_LINE, mask_sh), \ - DSC_SF(DSCC0_DSCC_CONFIG0, NUMBER_OF_SLICES_PER_LINE, mask_sh), \ - DSC_SF(DSCC0_DSCC_CONFIG0, ALTERNATE_ICH_ENCODING_EN, mask_sh), \ - DSC_SF(DSCC0_DSCC_CONFIG0, NUMBER_OF_SLICES_IN_VERTICAL_DIRECTION, mask_sh), \ - DSC_SF(DSCC0_DSCC_CONFIG1, DSCC_RATE_CONTROL_BUFFER_MODEL_SIZE, mask_sh), \ - /*DSC_SF(DSCC0_DSCC_CONFIG1, DSCC_DISABLE_ICH, mask_sh),*/ \ - DSC_SF(DSCC0_DSCC_STATUS, DSCC_DOUBLE_BUFFER_REG_UPDATE_PENDING, mask_sh), \ - DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL_STATUS, DSCC_RATE_BUFFER0_OVERFLOW_OCCURRED, mask_sh), \ - DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL_STATUS, DSCC_RATE_BUFFER1_OVERFLOW_OCCURRED, mask_sh), \ - DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL_STATUS, DSCC_RATE_BUFFER2_OVERFLOW_OCCURRED, mask_sh), \ - DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL_STATUS, DSCC_RATE_BUFFER3_OVERFLOW_OCCURRED, mask_sh), \ - DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL_STATUS, DSCC_RATE_BUFFER0_UNDERFLOW_OCCURRED, mask_sh), \ - DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL_STATUS, DSCC_RATE_BUFFER1_UNDERFLOW_OCCURRED, mask_sh), \ - DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL_STATUS, DSCC_RATE_BUFFER2_UNDERFLOW_OCCURRED, mask_sh), \ - DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL_STATUS, DSCC_RATE_BUFFER3_UNDERFLOW_OCCURRED, mask_sh), \ - DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL_STATUS, DSCC_RATE_CONTROL_BUFFER_MODEL0_OVERFLOW_OCCURRED, mask_sh), \ - DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL_STATUS, DSCC_RATE_CONTROL_BUFFER_MODEL1_OVERFLOW_OCCURRED, mask_sh), \ - DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL_STATUS, DSCC_RATE_CONTROL_BUFFER_MODEL2_OVERFLOW_OCCURRED, mask_sh), \ - DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL_STATUS, DSCC_RATE_CONTROL_BUFFER_MODEL3_OVERFLOW_OCCURRED, mask_sh), \ - DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL_STATUS, DSCC_RATE_BUFFER0_OVERFLOW_OCCURRED_INT_EN, mask_sh), \ - DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL_STATUS, DSCC_RATE_BUFFER1_OVERFLOW_OCCURRED_INT_EN, mask_sh), \ - DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL_STATUS, DSCC_RATE_BUFFER2_OVERFLOW_OCCURRED_INT_EN, mask_sh), \ - DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL_STATUS, DSCC_RATE_BUFFER3_OVERFLOW_OCCURRED_INT_EN, mask_sh), \ - DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL_STATUS, DSCC_RATE_BUFFER0_UNDERFLOW_OCCURRED_INT_EN, mask_sh), \ - DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL_STATUS, DSCC_RATE_BUFFER1_UNDERFLOW_OCCURRED_INT_EN, mask_sh), \ - DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL_STATUS, DSCC_RATE_BUFFER2_UNDERFLOW_OCCURRED_INT_EN, mask_sh), \ - DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL_STATUS, DSCC_RATE_BUFFER3_UNDERFLOW_OCCURRED_INT_EN, mask_sh), \ - DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL_STATUS, DSCC_RATE_CONTROL_BUFFER_MODEL0_OVERFLOW_OCCURRED_INT_EN, mask_sh), \ - DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL_STATUS, DSCC_RATE_CONTROL_BUFFER_MODEL1_OVERFLOW_OCCURRED_INT_EN, mask_sh), \ - DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL_STATUS, DSCC_RATE_CONTROL_BUFFER_MODEL2_OVERFLOW_OCCURRED_INT_EN, mask_sh), \ - DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL_STATUS, DSCC_RATE_CONTROL_BUFFER_MODEL3_OVERFLOW_OCCURRED_INT_EN, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG0, DSC_VERSION_MINOR, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG0, DSC_VERSION_MAJOR, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG0, PPS_IDENTIFIER, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG0, LINEBUF_DEPTH, mask_sh), \ - DSC2_SF(DSCC0, DSCC_PPS_CONFIG0__BITS_PER_COMPONENT, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG1, BITS_PER_PIXEL, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG1, VBR_ENABLE, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG1, SIMPLE_422, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG1, CONVERT_RGB, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG1, BLOCK_PRED_ENABLE, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG1, NATIVE_422, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG1, NATIVE_420, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG1, CHUNK_SIZE, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG2, PIC_WIDTH, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG2, PIC_HEIGHT, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG3, SLICE_WIDTH, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG3, SLICE_HEIGHT, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG4, INITIAL_XMIT_DELAY, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG4, INITIAL_DEC_DELAY, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG5, INITIAL_SCALE_VALUE, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG5, SCALE_INCREMENT_INTERVAL, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG6, SCALE_DECREMENT_INTERVAL, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG6, FIRST_LINE_BPG_OFFSET, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG6, SECOND_LINE_BPG_OFFSET, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG7, NFL_BPG_OFFSET, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG7, SLICE_BPG_OFFSET, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG8, NSL_BPG_OFFSET, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG8, SECOND_LINE_OFFSET_ADJ, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG9, INITIAL_OFFSET, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG9, FINAL_OFFSET, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG10, FLATNESS_MIN_QP, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG10, FLATNESS_MAX_QP, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG10, RC_MODEL_SIZE, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG11, RC_EDGE_FACTOR, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG11, RC_QUANT_INCR_LIMIT0, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG11, RC_QUANT_INCR_LIMIT1, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG11, RC_TGT_OFFSET_LO, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG11, RC_TGT_OFFSET_HI, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG12, RC_BUF_THRESH0, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG12, RC_BUF_THRESH1, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG12, RC_BUF_THRESH2, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG12, RC_BUF_THRESH3, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG13, RC_BUF_THRESH4, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG13, RC_BUF_THRESH5, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG13, RC_BUF_THRESH6, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG13, RC_BUF_THRESH7, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG14, RC_BUF_THRESH8, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG14, RC_BUF_THRESH9, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG14, RC_BUF_THRESH10, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG14, RC_BUF_THRESH11, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG15, RC_BUF_THRESH12, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG15, RC_BUF_THRESH13, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG15, RANGE_MIN_QP0, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG15, RANGE_MAX_QP0, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG15, RANGE_BPG_OFFSET0, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG16, RANGE_MIN_QP1, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG16, RANGE_MAX_QP1, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG16, RANGE_BPG_OFFSET1, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG16, RANGE_MIN_QP2, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG16, RANGE_MAX_QP2, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG16, RANGE_BPG_OFFSET2, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG17, RANGE_MIN_QP3, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG17, RANGE_MAX_QP3, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG17, RANGE_BPG_OFFSET3, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG17, RANGE_MIN_QP4, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG17, RANGE_MAX_QP4, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG17, RANGE_BPG_OFFSET4, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG18, RANGE_MIN_QP5, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG18, RANGE_MAX_QP5, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG18, RANGE_BPG_OFFSET5, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG18, RANGE_MIN_QP6, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG18, RANGE_MAX_QP6, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG18, RANGE_BPG_OFFSET6, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG19, RANGE_MIN_QP7, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG19, RANGE_MAX_QP7, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG19, RANGE_BPG_OFFSET7, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG19, RANGE_MIN_QP8, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG19, RANGE_MAX_QP8, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG19, RANGE_BPG_OFFSET8, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG20, RANGE_MIN_QP9, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG20, RANGE_MAX_QP9, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG20, RANGE_BPG_OFFSET9, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG20, RANGE_MIN_QP10, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG20, RANGE_MAX_QP10, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG20, RANGE_BPG_OFFSET10, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG21, RANGE_MIN_QP11, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG21, RANGE_MAX_QP11, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG21, RANGE_BPG_OFFSET11, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG21, RANGE_MIN_QP12, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG21, RANGE_MAX_QP12, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG21, RANGE_BPG_OFFSET12, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG22, RANGE_MIN_QP13, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG22, RANGE_MAX_QP13, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG22, RANGE_BPG_OFFSET13, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG22, RANGE_MIN_QP14, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG22, RANGE_MAX_QP14, mask_sh), \ - DSC_SF(DSCC0_DSCC_PPS_CONFIG22, RANGE_BPG_OFFSET14, mask_sh), \ - DSC_SF(DSCC0_DSCC_MEM_POWER_CONTROL, DSCC_DEFAULT_MEM_LOW_POWER_STATE, mask_sh), \ - DSC_SF(DSCC0_DSCC_MEM_POWER_CONTROL, DSCC_MEM_PWR_FORCE, mask_sh), \ - DSC_SF(DSCC0_DSCC_MEM_POWER_CONTROL, DSCC_MEM_PWR_DIS, mask_sh), \ - DSC_SF(DSCC0_DSCC_MEM_POWER_CONTROL, DSCC_MEM_PWR_STATE, mask_sh), \ - DSC_SF(DSCC0_DSCC_MEM_POWER_CONTROL, DSCC_NATIVE_422_MEM_PWR_FORCE, mask_sh), \ - DSC_SF(DSCC0_DSCC_MEM_POWER_CONTROL, DSCC_NATIVE_422_MEM_PWR_DIS, mask_sh), \ - DSC_SF(DSCC0_DSCC_MEM_POWER_CONTROL, DSCC_NATIVE_422_MEM_PWR_STATE, mask_sh), \ - DSC_SF(DSCC0_DSCC_R_Y_SQUARED_ERROR_LOWER, DSCC_R_Y_SQUARED_ERROR_LOWER, mask_sh), \ - DSC_SF(DSCC0_DSCC_R_Y_SQUARED_ERROR_UPPER, DSCC_R_Y_SQUARED_ERROR_UPPER, mask_sh), \ - DSC_SF(DSCC0_DSCC_G_CB_SQUARED_ERROR_LOWER, DSCC_G_CB_SQUARED_ERROR_LOWER, mask_sh), \ - DSC_SF(DSCC0_DSCC_G_CB_SQUARED_ERROR_UPPER, DSCC_G_CB_SQUARED_ERROR_UPPER, mask_sh), \ - DSC_SF(DSCC0_DSCC_B_CR_SQUARED_ERROR_LOWER, DSCC_B_CR_SQUARED_ERROR_LOWER, mask_sh), \ - DSC_SF(DSCC0_DSCC_B_CR_SQUARED_ERROR_UPPER, DSCC_B_CR_SQUARED_ERROR_UPPER, mask_sh), \ - DSC_SF(DSCC0_DSCC_MAX_ABS_ERROR0, DSCC_R_Y_MAX_ABS_ERROR, mask_sh), \ - DSC_SF(DSCC0_DSCC_MAX_ABS_ERROR0, DSCC_G_CB_MAX_ABS_ERROR, mask_sh), \ - DSC_SF(DSCC0_DSCC_MAX_ABS_ERROR1, DSCC_B_CR_MAX_ABS_ERROR, mask_sh), \ - DSC_SF(DSCC0_DSCC_RATE_BUFFER0_MAX_FULLNESS_LEVEL, DSCC_RATE_BUFFER0_MAX_FULLNESS_LEVEL, mask_sh), \ - DSC_SF(DSCC0_DSCC_RATE_BUFFER1_MAX_FULLNESS_LEVEL, DSCC_RATE_BUFFER1_MAX_FULLNESS_LEVEL, mask_sh), \ - DSC_SF(DSCC0_DSCC_RATE_BUFFER2_MAX_FULLNESS_LEVEL, DSCC_RATE_BUFFER2_MAX_FULLNESS_LEVEL, mask_sh), \ - DSC_SF(DSCC0_DSCC_RATE_BUFFER3_MAX_FULLNESS_LEVEL, DSCC_RATE_BUFFER3_MAX_FULLNESS_LEVEL, mask_sh), \ - DSC_SF(DSCC0_DSCC_RATE_CONTROL_BUFFER0_MAX_FULLNESS_LEVEL, DSCC_RATE_CONTROL_BUFFER0_MAX_FULLNESS_LEVEL, mask_sh), \ - DSC_SF(DSCC0_DSCC_RATE_CONTROL_BUFFER1_MAX_FULLNESS_LEVEL, DSCC_RATE_CONTROL_BUFFER1_MAX_FULLNESS_LEVEL, mask_sh), \ - DSC_SF(DSCC0_DSCC_RATE_CONTROL_BUFFER2_MAX_FULLNESS_LEVEL, DSCC_RATE_CONTROL_BUFFER2_MAX_FULLNESS_LEVEL, mask_sh), \ - DSC_SF(DSCC0_DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL, DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL, mask_sh), \ - DSC_SF(DSCCIF0_DSCCIF_CONFIG0, INPUT_INTERFACE_UNDERFLOW_RECOVERY_EN, mask_sh), \ - DSC_SF(DSCCIF0_DSCCIF_CONFIG0, INPUT_INTERFACE_UNDERFLOW_OCCURRED_INT_EN, mask_sh), \ - DSC_SF(DSCCIF0_DSCCIF_CONFIG0, INPUT_INTERFACE_UNDERFLOW_OCCURRED_STATUS, mask_sh), \ - DSC_SF(DSCCIF0_DSCCIF_CONFIG0, INPUT_PIXEL_FORMAT, mask_sh), \ - DSC2_SF(DSCCIF0, DSCCIF_CONFIG0__BITS_PER_COMPONENT, mask_sh), \ - DSC_SF(DSCCIF0_DSCCIF_CONFIG0, DOUBLE_BUFFER_REG_UPDATE_PENDING, mask_sh), \ - DSC_SF(DSCCIF0_DSCCIF_CONFIG1, PIC_WIDTH, mask_sh), \ - DSC_SF(DSCCIF0_DSCCIF_CONFIG1, PIC_HEIGHT, mask_sh), \ - DSC_SF(DSCRM0_DSCRM_DSC_FORWARD_CONFIG, DSCRM_DSC_FORWARD_EN, mask_sh), \ - DSC_SF(DSCRM0_DSCRM_DSC_FORWARD_CONFIG, DSCRM_DSC_OPP_PIPE_SOURCE, mask_sh) - - - -#define DSC_FIELD_LIST_DCN20(type)\ - type DSC_CLOCK_EN; \ - type DSC_DISPCLK_R_GATE_DIS; \ - type DSC_DSCCLK_R_GATE_DIS; \ - type DSC_DBG_EN; \ - type DSC_TEST_CLOCK_MUX_SEL; \ - type ICH_RESET_AT_END_OF_LINE; \ - type NUMBER_OF_SLICES_PER_LINE; \ - type ALTERNATE_ICH_ENCODING_EN; \ - type NUMBER_OF_SLICES_IN_VERTICAL_DIRECTION; \ - type DSCC_RATE_CONTROL_BUFFER_MODEL_SIZE; \ - /*type DSCC_DISABLE_ICH;*/ \ - type DSCC_DOUBLE_BUFFER_REG_UPDATE_PENDING; \ - type DSCC_RATE_BUFFER0_OVERFLOW_OCCURRED; \ - type DSCC_RATE_BUFFER1_OVERFLOW_OCCURRED; \ - type DSCC_RATE_BUFFER2_OVERFLOW_OCCURRED; \ - type DSCC_RATE_BUFFER3_OVERFLOW_OCCURRED; \ - type DSCC_RATE_BUFFER0_UNDERFLOW_OCCURRED; \ - type DSCC_RATE_BUFFER1_UNDERFLOW_OCCURRED; \ - type DSCC_RATE_BUFFER2_UNDERFLOW_OCCURRED; \ - type DSCC_RATE_BUFFER3_UNDERFLOW_OCCURRED; \ - type DSCC_RATE_CONTROL_BUFFER_MODEL0_OVERFLOW_OCCURRED; \ - type DSCC_RATE_CONTROL_BUFFER_MODEL1_OVERFLOW_OCCURRED; \ - type DSCC_RATE_CONTROL_BUFFER_MODEL2_OVERFLOW_OCCURRED; \ - type DSCC_RATE_CONTROL_BUFFER_MODEL3_OVERFLOW_OCCURRED; \ - type DSCC_RATE_BUFFER0_OVERFLOW_OCCURRED_INT_EN; \ - type DSCC_RATE_BUFFER1_OVERFLOW_OCCURRED_INT_EN; \ - type DSCC_RATE_BUFFER2_OVERFLOW_OCCURRED_INT_EN; \ - type DSCC_RATE_BUFFER3_OVERFLOW_OCCURRED_INT_EN; \ - type DSCC_RATE_BUFFER0_UNDERFLOW_OCCURRED_INT_EN; \ - type DSCC_RATE_BUFFER1_UNDERFLOW_OCCURRED_INT_EN; \ - type DSCC_RATE_BUFFER2_UNDERFLOW_OCCURRED_INT_EN; \ - type DSCC_RATE_BUFFER3_UNDERFLOW_OCCURRED_INT_EN; \ - type DSCC_RATE_CONTROL_BUFFER_MODEL0_OVERFLOW_OCCURRED_INT_EN; \ - type DSCC_RATE_CONTROL_BUFFER_MODEL1_OVERFLOW_OCCURRED_INT_EN; \ - type DSCC_RATE_CONTROL_BUFFER_MODEL2_OVERFLOW_OCCURRED_INT_EN; \ - type DSCC_RATE_CONTROL_BUFFER_MODEL3_OVERFLOW_OCCURRED_INT_EN; \ - type DSC_VERSION_MINOR; \ - type DSC_VERSION_MAJOR; \ - type PPS_IDENTIFIER; \ - type LINEBUF_DEPTH; \ - type DSCC_PPS_CONFIG0__BITS_PER_COMPONENT; \ - type BITS_PER_PIXEL; \ - type VBR_ENABLE; \ - type SIMPLE_422; \ - type CONVERT_RGB; \ - type BLOCK_PRED_ENABLE; \ - type NATIVE_422; \ - type NATIVE_420; \ - type CHUNK_SIZE; \ - type PIC_WIDTH; \ - type PIC_HEIGHT; \ - type SLICE_WIDTH; \ - type SLICE_HEIGHT; \ - type INITIAL_XMIT_DELAY; \ - type INITIAL_DEC_DELAY; \ - type INITIAL_SCALE_VALUE; \ - type SCALE_INCREMENT_INTERVAL; \ - type SCALE_DECREMENT_INTERVAL; \ - type FIRST_LINE_BPG_OFFSET; \ - type SECOND_LINE_BPG_OFFSET; \ - type NFL_BPG_OFFSET; \ - type SLICE_BPG_OFFSET; \ - type NSL_BPG_OFFSET; \ - type SECOND_LINE_OFFSET_ADJ; \ - type INITIAL_OFFSET; \ - type FINAL_OFFSET; \ - type FLATNESS_MIN_QP; \ - type FLATNESS_MAX_QP; \ - type RC_MODEL_SIZE; \ - type RC_EDGE_FACTOR; \ - type RC_QUANT_INCR_LIMIT0; \ - type RC_QUANT_INCR_LIMIT1; \ - type RC_TGT_OFFSET_LO; \ - type RC_TGT_OFFSET_HI; \ - type RC_BUF_THRESH0; \ - type RC_BUF_THRESH1; \ - type RC_BUF_THRESH2; \ - type RC_BUF_THRESH3; \ - type RC_BUF_THRESH4; \ - type RC_BUF_THRESH5; \ - type RC_BUF_THRESH6; \ - type RC_BUF_THRESH7; \ - type RC_BUF_THRESH8; \ - type RC_BUF_THRESH9; \ - type RC_BUF_THRESH10; \ - type RC_BUF_THRESH11; \ - type RC_BUF_THRESH12; \ - type RC_BUF_THRESH13; \ - type RANGE_MIN_QP0; \ - type RANGE_MAX_QP0; \ - type RANGE_BPG_OFFSET0; \ - type RANGE_MIN_QP1; \ - type RANGE_MAX_QP1; \ - type RANGE_BPG_OFFSET1; \ - type RANGE_MIN_QP2; \ - type RANGE_MAX_QP2; \ - type RANGE_BPG_OFFSET2; \ - type RANGE_MIN_QP3; \ - type RANGE_MAX_QP3; \ - type RANGE_BPG_OFFSET3; \ - type RANGE_MIN_QP4; \ - type RANGE_MAX_QP4; \ - type RANGE_BPG_OFFSET4; \ - type RANGE_MIN_QP5; \ - type RANGE_MAX_QP5; \ - type RANGE_BPG_OFFSET5; \ - type RANGE_MIN_QP6; \ - type RANGE_MAX_QP6; \ - type RANGE_BPG_OFFSET6; \ - type RANGE_MIN_QP7; \ - type RANGE_MAX_QP7; \ - type RANGE_BPG_OFFSET7; \ - type RANGE_MIN_QP8; \ - type RANGE_MAX_QP8; \ - type RANGE_BPG_OFFSET8; \ - type RANGE_MIN_QP9; \ - type RANGE_MAX_QP9; \ - type RANGE_BPG_OFFSET9; \ - type RANGE_MIN_QP10; \ - type RANGE_MAX_QP10; \ - type RANGE_BPG_OFFSET10; \ - type RANGE_MIN_QP11; \ - type RANGE_MAX_QP11; \ - type RANGE_BPG_OFFSET11; \ - type RANGE_MIN_QP12; \ - type RANGE_MAX_QP12; \ - type RANGE_BPG_OFFSET12; \ - type RANGE_MIN_QP13; \ - type RANGE_MAX_QP13; \ - type RANGE_BPG_OFFSET13; \ - type RANGE_MIN_QP14; \ - type RANGE_MAX_QP14; \ - type RANGE_BPG_OFFSET14; \ - type DSCC_DEFAULT_MEM_LOW_POWER_STATE; \ - type DSCC_MEM_PWR_FORCE; \ - type DSCC_MEM_PWR_DIS; \ - type DSCC_MEM_PWR_STATE; \ - type DSCC_NATIVE_422_MEM_PWR_FORCE; \ - type DSCC_NATIVE_422_MEM_PWR_DIS; \ - type DSCC_NATIVE_422_MEM_PWR_STATE; \ - type DSCC_R_Y_SQUARED_ERROR_LOWER; \ - type DSCC_R_Y_SQUARED_ERROR_UPPER; \ - type DSCC_G_CB_SQUARED_ERROR_LOWER; \ - type DSCC_G_CB_SQUARED_ERROR_UPPER; \ - type DSCC_B_CR_SQUARED_ERROR_LOWER; \ - type DSCC_B_CR_SQUARED_ERROR_UPPER; \ - type DSCC_R_Y_MAX_ABS_ERROR; \ - type DSCC_G_CB_MAX_ABS_ERROR; \ - type DSCC_B_CR_MAX_ABS_ERROR; \ - type DSCC_RATE_BUFFER0_MAX_FULLNESS_LEVEL; \ - type DSCC_RATE_BUFFER1_MAX_FULLNESS_LEVEL; \ - type DSCC_RATE_BUFFER2_MAX_FULLNESS_LEVEL; \ - type DSCC_RATE_BUFFER3_MAX_FULLNESS_LEVEL; \ - type DSCC_RATE_CONTROL_BUFFER0_MAX_FULLNESS_LEVEL; \ - type DSCC_RATE_CONTROL_BUFFER1_MAX_FULLNESS_LEVEL; \ - type DSCC_RATE_CONTROL_BUFFER2_MAX_FULLNESS_LEVEL; \ - type DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL; \ - type DSCC_UPDATE_PENDING_STATUS; \ - type DSCC_UPDATE_TAKEN_STATUS; \ - type DSCC_UPDATE_TAKEN_ACK; \ - type DSCC_RATE_BUFFER0_FULLNESS_LEVEL; \ - type DSCC_RATE_BUFFER1_FULLNESS_LEVEL; \ - type DSCC_RATE_BUFFER2_FULLNESS_LEVEL; \ - type DSCC_RATE_BUFFER3_FULLNESS_LEVEL; \ - type DSCC_RATE_CONTROL_BUFFER0_FULLNESS_LEVEL; \ - type DSCC_RATE_CONTROL_BUFFER1_FULLNESS_LEVEL; \ - type DSCC_RATE_CONTROL_BUFFER2_FULLNESS_LEVEL; \ - type DSCC_RATE_CONTROL_BUFFER3_FULLNESS_LEVEL; \ - type DSCC_RATE_BUFFER0_INITIAL_XMIT_DELAY_REACHED; \ - type DSCC_RATE_BUFFER1_INITIAL_XMIT_DELAY_REACHED; \ - type DSCC_RATE_BUFFER2_INITIAL_XMIT_DELAY_REACHED; \ - type DSCC_RATE_BUFFER3_INITIAL_XMIT_DELAY_REACHED; \ - type INPUT_INTERFACE_UNDERFLOW_RECOVERY_EN; \ - type INPUT_INTERFACE_UNDERFLOW_OCCURRED_INT_EN; \ - type INPUT_INTERFACE_UNDERFLOW_OCCURRED_STATUS; \ - type INPUT_PIXEL_FORMAT; \ - type DSCCIF_CONFIG0__BITS_PER_COMPONENT; \ - type DOUBLE_BUFFER_REG_UPDATE_PENDING; \ - type DSCCIF_UPDATE_PENDING_STATUS; \ - type DSCCIF_UPDATE_TAKEN_STATUS; \ - type DSCCIF_UPDATE_TAKEN_ACK; \ - type DSCRM_DSC_FORWARD_EN; \ - type DSCRM_DSC_OPP_PIPE_SOURCE - -struct dcn20_dsc_registers { - uint32_t DSC_TOP_CONTROL; - uint32_t DSC_DEBUG_CONTROL; - uint32_t DSCC_CONFIG0; - uint32_t DSCC_CONFIG1; - uint32_t DSCC_STATUS; - uint32_t DSCC_INTERRUPT_CONTROL_STATUS; - uint32_t DSCC_PPS_CONFIG0; - uint32_t DSCC_PPS_CONFIG1; - uint32_t DSCC_PPS_CONFIG2; - uint32_t DSCC_PPS_CONFIG3; - uint32_t DSCC_PPS_CONFIG4; - uint32_t DSCC_PPS_CONFIG5; - uint32_t DSCC_PPS_CONFIG6; - uint32_t DSCC_PPS_CONFIG7; - uint32_t DSCC_PPS_CONFIG8; - uint32_t DSCC_PPS_CONFIG9; - uint32_t DSCC_PPS_CONFIG10; - uint32_t DSCC_PPS_CONFIG11; - uint32_t DSCC_PPS_CONFIG12; - uint32_t DSCC_PPS_CONFIG13; - uint32_t DSCC_PPS_CONFIG14; - uint32_t DSCC_PPS_CONFIG15; - uint32_t DSCC_PPS_CONFIG16; - uint32_t DSCC_PPS_CONFIG17; - uint32_t DSCC_PPS_CONFIG18; - uint32_t DSCC_PPS_CONFIG19; - uint32_t DSCC_PPS_CONFIG20; - uint32_t DSCC_PPS_CONFIG21; - uint32_t DSCC_PPS_CONFIG22; - uint32_t DSCC_MEM_POWER_CONTROL; - uint32_t DSCC_R_Y_SQUARED_ERROR_LOWER; - uint32_t DSCC_R_Y_SQUARED_ERROR_UPPER; - uint32_t DSCC_G_CB_SQUARED_ERROR_LOWER; - uint32_t DSCC_G_CB_SQUARED_ERROR_UPPER; - uint32_t DSCC_B_CR_SQUARED_ERROR_LOWER; - uint32_t DSCC_B_CR_SQUARED_ERROR_UPPER; - uint32_t DSCC_MAX_ABS_ERROR0; - uint32_t DSCC_MAX_ABS_ERROR1; - uint32_t DSCC_RATE_BUFFER0_MAX_FULLNESS_LEVEL; - uint32_t DSCC_RATE_BUFFER1_MAX_FULLNESS_LEVEL; - uint32_t DSCC_RATE_BUFFER2_MAX_FULLNESS_LEVEL; - uint32_t DSCC_RATE_BUFFER3_MAX_FULLNESS_LEVEL; - uint32_t DSCC_RATE_CONTROL_BUFFER0_MAX_FULLNESS_LEVEL; - uint32_t DSCC_RATE_CONTROL_BUFFER1_MAX_FULLNESS_LEVEL; - uint32_t DSCC_RATE_CONTROL_BUFFER2_MAX_FULLNESS_LEVEL; - uint32_t DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL; - uint32_t DSCCIF_CONFIG0; - uint32_t DSCCIF_CONFIG1; - uint32_t DSCRM_DSC_FORWARD_CONFIG; -}; - - -struct dcn20_dsc_shift { - DSC_FIELD_LIST_DCN20(uint8_t); -}; - -struct dcn20_dsc_mask { - DSC_FIELD_LIST_DCN20(uint32_t); -}; - -/* DSCCIF_CONFIG.INPUT_PIXEL_FORMAT values */ -enum dsc_pixel_format { - DSC_PIXFMT_RGB, - DSC_PIXFMT_YCBCR444, - DSC_PIXFMT_SIMPLE_YCBCR422, - DSC_PIXFMT_NATIVE_YCBCR422, - DSC_PIXFMT_NATIVE_YCBCR420, - DSC_PIXFMT_UNKNOWN -}; - -struct dsc_reg_values { - /* PPS registers */ - struct drm_dsc_config pps; - - /* Additional registers */ - uint32_t dsc_clock_enable; - uint32_t dsc_clock_gating_disable; - uint32_t underflow_recovery_en; - uint32_t underflow_occurred_int_en; - uint32_t underflow_occurred_status; - enum dsc_pixel_format pixel_format; - uint32_t ich_reset_at_eol; - uint32_t alternate_ich_encoding_en; - uint32_t num_slices_h; - uint32_t num_slices_v; - uint32_t rc_buffer_model_size; - uint32_t disable_ich; - uint32_t bpp_x32; - uint32_t dsc_dbg_en; - uint32_t rc_buffer_model_overflow_int_en[4]; -}; - -struct dcn20_dsc { - struct display_stream_compressor base; - const struct dcn20_dsc_registers *dsc_regs; - const struct dcn20_dsc_shift *dsc_shift; - const struct dcn20_dsc_mask *dsc_mask; - - struct dsc_reg_values reg_vals; - - int max_image_width; -}; - -void dsc_config_log(struct display_stream_compressor *dsc, - const struct dsc_config *config); - -void dsc_log_pps(struct display_stream_compressor *dsc, - struct drm_dsc_config *pps); - -void dsc_override_rc_params(struct rc_params *rc, - const struct dc_dsc_rc_params_override *override); - -bool dsc_prepare_config(const struct dsc_config *dsc_cfg, - struct dsc_reg_values *dsc_reg_vals, - struct dsc_optc_config *dsc_optc_cfg); - -enum dsc_pixel_format dsc_dc_pixel_encoding_to_dsc_pixel_format(enum dc_pixel_encoding dc_pix_enc, - bool is_ycbcr422_simple); - -enum dsc_bits_per_comp dsc_dc_color_depth_to_dsc_bits_per_comp(enum dc_color_depth dc_color_depth); - -void dsc_init_reg_values(struct dsc_reg_values *reg_vals); - -void dsc_update_from_dsc_parameters(struct dsc_reg_values *reg_vals, const struct dsc_parameters *dsc_params); - -void dsc2_construct(struct dcn20_dsc *dsc, - struct dc_context *ctx, - int inst, - const struct dcn20_dsc_registers *dsc_regs, - const struct dcn20_dsc_shift *dsc_shift, - const struct dcn20_dsc_mask *dsc_mask); - -void dsc2_get_enc_caps(struct dsc_enc_caps *dsc_enc_caps, - int pixel_clock_100Hz); - -bool dsc2_get_packed_pps(struct display_stream_compressor *dsc, - const struct dsc_config *dsc_cfg, - uint8_t *dsc_packed_pps); - -#endif - diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/Makefile b/drivers/gpu/drm/amd/display/dc/dcn35/Makefile index 719afb5a3b12..85a307babab9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn35/Makefile @@ -12,7 +12,7 @@ DCN35 = dcn35_init.o dcn35_dio_stream_encoder.o \ dcn35_dio_link_encoder.o dcn35_dccg.o dcn35_optc.o \ - dcn35_dsc.o dcn35_hubp.o dcn35_hubbub.o \ + dcn35_hubp.o dcn35_hubbub.o \ dcn35_mmhubbub.o dcn35_opp.o dcn35_dpp.o dcn35_pg_cntl.o dcn35_dwb.o AMD_DAL_DCN35 = $(addprefix $(AMDDALPATH)/dc/dcn35/,$(DCN35)) diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dsc.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dsc.c deleted file mode 100644 index 71d2dff9986d..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dsc.c +++ /dev/null @@ -1,60 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright 2023 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dcn35_dsc.h" -#include "reg_helper.h" - -/* Macro definitios for REG_SET macros*/ -#define CTX \ - dsc20->base.ctx - -#define REG(reg)\ - dsc20->dsc_regs->reg - -#undef FN -#define FN(reg_name, field_name) \ - ((const struct dcn35_dsc_shift *)(dsc20->dsc_shift))->field_name, \ - ((const struct dcn35_dsc_mask *)(dsc20->dsc_mask))->field_name - -#define DC_LOGGER \ - dsc->ctx->logger - -void dsc35_construct(struct dcn20_dsc *dsc, - struct dc_context *ctx, - int inst, - const struct dcn20_dsc_registers *dsc_regs, - const struct dcn35_dsc_shift *dsc_shift, - const struct dcn35_dsc_mask *dsc_mask) -{ - dsc2_construct(dsc, ctx, inst, dsc_regs, - (const struct dcn20_dsc_shift *)(dsc_shift), - (const struct dcn20_dsc_mask *)(dsc_mask)); -} - -void dsc35_set_fgcg(struct dcn20_dsc *dsc20, bool enable) -{ - REG_UPDATE(DSC_TOP_CONTROL, DSC_FGCG_REP_DIS, !enable); -} diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dsc.h b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dsc.h deleted file mode 100644 index 133ad38842cc..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dsc.h +++ /dev/null @@ -1,59 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright 2023 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DCN35_DSC_H__ -#define __DCN35_DSC_H__ - -#include "dcn20/dcn20_dsc.h" - -#define DSC_REG_LIST_SH_MASK_DCN35(mask_sh) \ - DSC_REG_LIST_SH_MASK_DCN20(mask_sh), \ - DSC_SF(DSC_TOP0_DSC_TOP_CONTROL, DSC_FGCG_REP_DIS, mask_sh) - -#define DSC_FIELD_LIST_DCN35(type) \ - struct { \ - DSC_FIELD_LIST_DCN20(type); \ - type DSC_FGCG_REP_DIS; \ - } - -struct dcn35_dsc_shift { - DSC_FIELD_LIST_DCN35(uint8_t); -}; - -struct dcn35_dsc_mask { - DSC_FIELD_LIST_DCN35(uint32_t); -}; - -void dsc35_construct(struct dcn20_dsc *dsc, - struct dc_context *ctx, - int inst, - const struct dcn20_dsc_registers *dsc_regs, - const struct dcn35_dsc_shift *dsc_shift, - const struct dcn35_dsc_mask *dsc_mask); - -void dsc35_set_fgcg(struct dcn20_dsc *dsc20, bool enable); - -#endif /* __DCN35_DSC_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dsc/Makefile b/drivers/gpu/drm/amd/display/dc/dsc/Makefile index a2537229ee88..b183ba5a692e 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dsc/Makefile @@ -1,8 +1,34 @@ # SPDX-License-Identifier: MIT # # Makefile for the 'dsc' sub-component of DAL. + +ifdef CONFIG_DRM_AMD_DC_FP + +############################################################################### +# DCN20 +############################################################################### +DSC_DCN20 = dcn20_dsc.o + +AMD_DISPLAY_FILES += $(addprefix $(AMDDALPATH)/dc/dsc/dcn20/,$(DSC_DCN20)) + + + + +############################################################################### +# DCN35 +############################################################################### + +DSC_DCN35 = dcn35_dsc.o + +AMD_DISPLAY_FILES += $(addprefix $(AMDDALPATH)/dc/dsc/dcn35/,$(DSC_DCN35)) + + + +endif + DSC = dc_dsc.o rc_calc.o rc_calc_dpi.o AMD_DAL_DSC = $(addprefix $(AMDDALPATH)/dc/dsc/,$(DSC)) AMD_DISPLAY_FILES += $(AMD_DAL_DSC) + diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c new file mode 100644 index 000000000000..c9ae2d8f0096 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c @@ -0,0 +1,780 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include + +#include "reg_helper.h" +#include "dcn20_dsc.h" +#include "dsc/dscc_types.h" +#include "dsc/rc_calc.h" + +static void dsc_write_to_registers(struct display_stream_compressor *dsc, const struct dsc_reg_values *reg_vals); + +/* Object I/F functions */ +static void dsc2_read_state(struct display_stream_compressor *dsc, struct dcn_dsc_state *s); +static bool dsc2_validate_stream(struct display_stream_compressor *dsc, const struct dsc_config *dsc_cfg); +static void dsc2_set_config(struct display_stream_compressor *dsc, const struct dsc_config *dsc_cfg, + struct dsc_optc_config *dsc_optc_cfg); +static void dsc2_enable(struct display_stream_compressor *dsc, int opp_pipe); +static void dsc2_disable(struct display_stream_compressor *dsc); +static void dsc2_disconnect(struct display_stream_compressor *dsc); + +static const struct dsc_funcs dcn20_dsc_funcs = { + .dsc_get_enc_caps = dsc2_get_enc_caps, + .dsc_read_state = dsc2_read_state, + .dsc_validate_stream = dsc2_validate_stream, + .dsc_set_config = dsc2_set_config, + .dsc_get_packed_pps = dsc2_get_packed_pps, + .dsc_enable = dsc2_enable, + .dsc_disable = dsc2_disable, + .dsc_disconnect = dsc2_disconnect, +}; + +/* Macro definitios for REG_SET macros*/ +#define CTX \ + dsc20->base.ctx + +#define REG(reg)\ + dsc20->dsc_regs->reg + +#undef FN +#define FN(reg_name, field_name) \ + dsc20->dsc_shift->field_name, dsc20->dsc_mask->field_name +#define DC_LOGGER \ + dsc->ctx->logger + +enum dsc_bits_per_comp { + DSC_BPC_8 = 8, + DSC_BPC_10 = 10, + DSC_BPC_12 = 12, + DSC_BPC_UNKNOWN +}; + +/* API functions (external or via structure->function_pointer) */ + +void dsc2_construct(struct dcn20_dsc *dsc, + struct dc_context *ctx, + int inst, + const struct dcn20_dsc_registers *dsc_regs, + const struct dcn20_dsc_shift *dsc_shift, + const struct dcn20_dsc_mask *dsc_mask) +{ + dsc->base.ctx = ctx; + dsc->base.inst = inst; + dsc->base.funcs = &dcn20_dsc_funcs; + + dsc->dsc_regs = dsc_regs; + dsc->dsc_shift = dsc_shift; + dsc->dsc_mask = dsc_mask; + + dsc->max_image_width = 5184; +} + + +#define DCN20_MAX_PIXEL_CLOCK_Mhz 1188 +#define DCN20_MAX_DISPLAY_CLOCK_Mhz 1200 + +/* This returns the capabilities for a single DSC encoder engine. Number of slices and total throughput + * can be doubled, tripled etc. by using additional DSC engines. + */ +void dsc2_get_enc_caps(struct dsc_enc_caps *dsc_enc_caps, int pixel_clock_100Hz) +{ + dsc_enc_caps->dsc_version = 0x21; /* v1.2 - DP spec defined it in reverse order and we kept it */ + + dsc_enc_caps->slice_caps.bits.NUM_SLICES_1 = 1; + dsc_enc_caps->slice_caps.bits.NUM_SLICES_2 = 1; + dsc_enc_caps->slice_caps.bits.NUM_SLICES_3 = 1; + dsc_enc_caps->slice_caps.bits.NUM_SLICES_4 = 1; + + dsc_enc_caps->lb_bit_depth = 13; + dsc_enc_caps->is_block_pred_supported = true; + + dsc_enc_caps->color_formats.bits.RGB = 1; + dsc_enc_caps->color_formats.bits.YCBCR_444 = 1; + dsc_enc_caps->color_formats.bits.YCBCR_SIMPLE_422 = 1; + dsc_enc_caps->color_formats.bits.YCBCR_NATIVE_422 = 0; + dsc_enc_caps->color_formats.bits.YCBCR_NATIVE_420 = 1; + + dsc_enc_caps->color_depth.bits.COLOR_DEPTH_8_BPC = 1; + dsc_enc_caps->color_depth.bits.COLOR_DEPTH_10_BPC = 1; + dsc_enc_caps->color_depth.bits.COLOR_DEPTH_12_BPC = 1; + + /* Maximum total throughput with all the slices combined. This is different from how DP spec specifies it. + * Our decoder's total throughput in Pix/s is equal to DISPCLK. This is then shared between slices. + * The value below is the absolute maximum value. The actual throughput may be lower, but it'll always + * be sufficient to process the input pixel rate fed into a single DSC engine. + */ + dsc_enc_caps->max_total_throughput_mps = DCN20_MAX_DISPLAY_CLOCK_Mhz; + + /* For pixel clock bigger than a single-pipe limit we'll need two engines, which then doubles our + * throughput and number of slices, but also introduces a lower limit of 2 slices + */ + if (pixel_clock_100Hz >= DCN20_MAX_PIXEL_CLOCK_Mhz*10000) { + dsc_enc_caps->slice_caps.bits.NUM_SLICES_1 = 0; + dsc_enc_caps->slice_caps.bits.NUM_SLICES_8 = 1; + dsc_enc_caps->max_total_throughput_mps = DCN20_MAX_DISPLAY_CLOCK_Mhz * 2; + } + + /* For pixel clock bigger than a single-pipe limit needing four engines ODM 4:1, which then quardruples our + * throughput and number of slices + */ + if (pixel_clock_100Hz > DCN20_MAX_PIXEL_CLOCK_Mhz*10000*2) { + dsc_enc_caps->slice_caps.bits.NUM_SLICES_12 = 1; + dsc_enc_caps->slice_caps.bits.NUM_SLICES_16 = 1; + dsc_enc_caps->max_total_throughput_mps = DCN20_MAX_DISPLAY_CLOCK_Mhz * 4; + } + + dsc_enc_caps->max_slice_width = 5184; /* (including 64 overlap pixels for eDP MSO mode) */ + dsc_enc_caps->bpp_increment_div = 16; /* 1/16th of a bit */ +} + + +/* this function read dsc related register fields to be logged later in dcn10_log_hw_state + * into a dcn_dsc_state struct. + */ +static void dsc2_read_state(struct display_stream_compressor *dsc, struct dcn_dsc_state *s) +{ + struct dcn20_dsc *dsc20 = TO_DCN20_DSC(dsc); + + REG_GET(DSC_TOP_CONTROL, DSC_CLOCK_EN, &s->dsc_clock_en); + REG_GET(DSCC_PPS_CONFIG3, SLICE_WIDTH, &s->dsc_slice_width); + REG_GET(DSCC_PPS_CONFIG1, BITS_PER_PIXEL, &s->dsc_bits_per_pixel); + REG_GET(DSCC_PPS_CONFIG3, SLICE_HEIGHT, &s->dsc_slice_height); + REG_GET(DSCC_PPS_CONFIG1, CHUNK_SIZE, &s->dsc_chunk_size); + REG_GET(DSCC_PPS_CONFIG2, PIC_WIDTH, &s->dsc_pic_width); + REG_GET(DSCC_PPS_CONFIG2, PIC_HEIGHT, &s->dsc_pic_height); + REG_GET(DSCC_PPS_CONFIG7, SLICE_BPG_OFFSET, &s->dsc_slice_bpg_offset); + REG_GET_2(DSCRM_DSC_FORWARD_CONFIG, DSCRM_DSC_FORWARD_EN, &s->dsc_fw_en, + DSCRM_DSC_OPP_PIPE_SOURCE, &s->dsc_opp_source); +} + + +static bool dsc2_validate_stream(struct display_stream_compressor *dsc, const struct dsc_config *dsc_cfg) +{ + struct dsc_optc_config dsc_optc_cfg; + struct dcn20_dsc *dsc20 = TO_DCN20_DSC(dsc); + + if (dsc_cfg->pic_width > dsc20->max_image_width) + return false; + + return dsc_prepare_config(dsc_cfg, &dsc20->reg_vals, &dsc_optc_cfg); +} + + +void dsc_config_log(struct display_stream_compressor *dsc, const struct dsc_config *config) +{ + DC_LOG_DSC("\tnum_slices_h %d", config->dc_dsc_cfg.num_slices_h); + DC_LOG_DSC("\tnum_slices_v %d", config->dc_dsc_cfg.num_slices_v); + DC_LOG_DSC("\tbits_per_pixel %d (%d.%04d)", + config->dc_dsc_cfg.bits_per_pixel, + config->dc_dsc_cfg.bits_per_pixel / 16, + ((config->dc_dsc_cfg.bits_per_pixel % 16) * 10000) / 16); + DC_LOG_DSC("\tcolor_depth %d", config->color_depth); +} + +static void dsc2_set_config(struct display_stream_compressor *dsc, const struct dsc_config *dsc_cfg, + struct dsc_optc_config *dsc_optc_cfg) +{ + bool is_config_ok; + struct dcn20_dsc *dsc20 = TO_DCN20_DSC(dsc); + + DC_LOG_DSC("Setting DSC Config at DSC inst %d", dsc->inst); + dsc_config_log(dsc, dsc_cfg); + is_config_ok = dsc_prepare_config(dsc_cfg, &dsc20->reg_vals, dsc_optc_cfg); + ASSERT(is_config_ok); + DC_LOG_DSC("programming DSC Picture Parameter Set (PPS):"); + dsc_log_pps(dsc, &dsc20->reg_vals.pps); + dsc_write_to_registers(dsc, &dsc20->reg_vals); +} + + +bool dsc2_get_packed_pps(struct display_stream_compressor *dsc, const struct dsc_config *dsc_cfg, uint8_t *dsc_packed_pps) +{ + bool is_config_ok; + struct dsc_reg_values dsc_reg_vals; + struct dsc_optc_config dsc_optc_cfg; + + memset(&dsc_reg_vals, 0, sizeof(dsc_reg_vals)); + memset(&dsc_optc_cfg, 0, sizeof(dsc_optc_cfg)); + + DC_LOG_DSC("Getting packed DSC PPS for DSC Config:"); + dsc_config_log(dsc, dsc_cfg); + DC_LOG_DSC("DSC Picture Parameter Set (PPS):"); + is_config_ok = dsc_prepare_config(dsc_cfg, &dsc_reg_vals, &dsc_optc_cfg); + ASSERT(is_config_ok); + drm_dsc_pps_payload_pack((struct drm_dsc_picture_parameter_set *)dsc_packed_pps, &dsc_reg_vals.pps); + dsc_log_pps(dsc, &dsc_reg_vals.pps); + + return is_config_ok; +} + + +static void dsc2_enable(struct display_stream_compressor *dsc, int opp_pipe) +{ + struct dcn20_dsc *dsc20 = TO_DCN20_DSC(dsc); + int dsc_clock_en; + int dsc_fw_config; + int enabled_opp_pipe; + + DC_LOG_DSC("enable DSC %d at opp pipe %d", dsc->inst, opp_pipe); + + REG_GET(DSC_TOP_CONTROL, DSC_CLOCK_EN, &dsc_clock_en); + REG_GET_2(DSCRM_DSC_FORWARD_CONFIG, DSCRM_DSC_FORWARD_EN, &dsc_fw_config, DSCRM_DSC_OPP_PIPE_SOURCE, &enabled_opp_pipe); + if ((dsc_clock_en || dsc_fw_config) && enabled_opp_pipe != opp_pipe) { + DC_LOG_DSC("ERROR: DSC %d at opp pipe %d already enabled!", dsc->inst, enabled_opp_pipe); + ASSERT(0); + } + + REG_UPDATE(DSC_TOP_CONTROL, + DSC_CLOCK_EN, 1); + + REG_UPDATE_2(DSCRM_DSC_FORWARD_CONFIG, + DSCRM_DSC_FORWARD_EN, 1, + DSCRM_DSC_OPP_PIPE_SOURCE, opp_pipe); +} + + +static void dsc2_disable(struct display_stream_compressor *dsc) +{ + struct dcn20_dsc *dsc20 = TO_DCN20_DSC(dsc); + int dsc_clock_en; + int dsc_fw_config; + int enabled_opp_pipe; + + DC_LOG_DSC("disable DSC %d", dsc->inst); + + REG_GET(DSC_TOP_CONTROL, DSC_CLOCK_EN, &dsc_clock_en); + REG_GET_2(DSCRM_DSC_FORWARD_CONFIG, DSCRM_DSC_FORWARD_EN, &dsc_fw_config, DSCRM_DSC_OPP_PIPE_SOURCE, &enabled_opp_pipe); + if (!dsc_clock_en || !dsc_fw_config) { + DC_LOG_DSC("ERROR: DSC %d at opp pipe %d already disabled!", dsc->inst, enabled_opp_pipe); + ASSERT(0); + } + + REG_UPDATE(DSCRM_DSC_FORWARD_CONFIG, + DSCRM_DSC_FORWARD_EN, 0); + + REG_UPDATE(DSC_TOP_CONTROL, + DSC_CLOCK_EN, 0); +} + +static void dsc2_disconnect(struct display_stream_compressor *dsc) +{ + struct dcn20_dsc *dsc20 = TO_DCN20_DSC(dsc); + + DC_LOG_DSC("disconnect DSC %d", dsc->inst); + + REG_UPDATE(DSCRM_DSC_FORWARD_CONFIG, + DSCRM_DSC_FORWARD_EN, 0); +} + +/* This module's internal functions */ +void dsc_log_pps(struct display_stream_compressor *dsc, struct drm_dsc_config *pps) +{ + int i; + int bits_per_pixel = pps->bits_per_pixel; + + DC_LOG_DSC("\tdsc_version_major %d", pps->dsc_version_major); + DC_LOG_DSC("\tdsc_version_minor %d", pps->dsc_version_minor); + DC_LOG_DSC("\tbits_per_component %d", pps->bits_per_component); + DC_LOG_DSC("\tline_buf_depth %d", pps->line_buf_depth); + DC_LOG_DSC("\tblock_pred_enable %d", pps->block_pred_enable); + DC_LOG_DSC("\tconvert_rgb %d", pps->convert_rgb); + DC_LOG_DSC("\tsimple_422 %d", pps->simple_422); + DC_LOG_DSC("\tvbr_enable %d", pps->vbr_enable); + DC_LOG_DSC("\tbits_per_pixel %d (%d.%04d)", bits_per_pixel, bits_per_pixel / 16, ((bits_per_pixel % 16) * 10000) / 16); + DC_LOG_DSC("\tpic_height %d", pps->pic_height); + DC_LOG_DSC("\tpic_width %d", pps->pic_width); + DC_LOG_DSC("\tslice_height %d", pps->slice_height); + DC_LOG_DSC("\tslice_width %d", pps->slice_width); + DC_LOG_DSC("\tslice_chunk_size %d", pps->slice_chunk_size); + DC_LOG_DSC("\tinitial_xmit_delay %d", pps->initial_xmit_delay); + DC_LOG_DSC("\tinitial_dec_delay %d", pps->initial_dec_delay); + DC_LOG_DSC("\tinitial_scale_value %d", pps->initial_scale_value); + DC_LOG_DSC("\tscale_increment_interval %d", pps->scale_increment_interval); + DC_LOG_DSC("\tscale_decrement_interval %d", pps->scale_decrement_interval); + DC_LOG_DSC("\tfirst_line_bpg_offset %d", pps->first_line_bpg_offset); + DC_LOG_DSC("\tnfl_bpg_offset %d", pps->nfl_bpg_offset); + DC_LOG_DSC("\tslice_bpg_offset %d", pps->slice_bpg_offset); + DC_LOG_DSC("\tinitial_offset %d", pps->initial_offset); + DC_LOG_DSC("\tfinal_offset %d", pps->final_offset); + DC_LOG_DSC("\tflatness_min_qp %d", pps->flatness_min_qp); + DC_LOG_DSC("\tflatness_max_qp %d", pps->flatness_max_qp); + /* DC_LOG_DSC("\trc_parameter_set %d", pps->rc_parameter_set); */ + DC_LOG_DSC("\tnative_420 %d", pps->native_420); + DC_LOG_DSC("\tnative_422 %d", pps->native_422); + DC_LOG_DSC("\tsecond_line_bpg_offset %d", pps->second_line_bpg_offset); + DC_LOG_DSC("\tnsl_bpg_offset %d", pps->nsl_bpg_offset); + DC_LOG_DSC("\tsecond_line_offset_adj %d", pps->second_line_offset_adj); + DC_LOG_DSC("\trc_model_size %d", pps->rc_model_size); + DC_LOG_DSC("\trc_edge_factor %d", pps->rc_edge_factor); + DC_LOG_DSC("\trc_quant_incr_limit0 %d", pps->rc_quant_incr_limit0); + DC_LOG_DSC("\trc_quant_incr_limit1 %d", pps->rc_quant_incr_limit1); + DC_LOG_DSC("\trc_tgt_offset_high %d", pps->rc_tgt_offset_high); + DC_LOG_DSC("\trc_tgt_offset_low %d", pps->rc_tgt_offset_low); + + for (i = 0; i < NUM_BUF_RANGES - 1; i++) + DC_LOG_DSC("\trc_buf_thresh[%d] %d", i, pps->rc_buf_thresh[i]); + + for (i = 0; i < NUM_BUF_RANGES; i++) { + DC_LOG_DSC("\trc_range_parameters[%d].range_min_qp %d", i, pps->rc_range_params[i].range_min_qp); + DC_LOG_DSC("\trc_range_parameters[%d].range_max_qp %d", i, pps->rc_range_params[i].range_max_qp); + DC_LOG_DSC("\trc_range_parameters[%d].range_bpg_offset %d", i, pps->rc_range_params[i].range_bpg_offset); + } +} + +void dsc_override_rc_params(struct rc_params *rc, const struct dc_dsc_rc_params_override *override) +{ + uint8_t i; + + rc->rc_model_size = override->rc_model_size; + for (i = 0; i < DC_DSC_RC_BUF_THRESH_SIZE; i++) + rc->rc_buf_thresh[i] = override->rc_buf_thresh[i]; + for (i = 0; i < DC_DSC_QP_SET_SIZE; i++) { + rc->qp_min[i] = override->rc_minqp[i]; + rc->qp_max[i] = override->rc_maxqp[i]; + rc->ofs[i] = override->rc_offset[i]; + } + + rc->rc_tgt_offset_hi = override->rc_tgt_offset_hi; + rc->rc_tgt_offset_lo = override->rc_tgt_offset_lo; + rc->rc_edge_factor = override->rc_edge_factor; + rc->rc_quant_incr_limit0 = override->rc_quant_incr_limit0; + rc->rc_quant_incr_limit1 = override->rc_quant_incr_limit1; + + rc->initial_fullness_offset = override->initial_fullness_offset; + rc->initial_xmit_delay = override->initial_delay; + + rc->flatness_min_qp = override->flatness_min_qp; + rc->flatness_max_qp = override->flatness_max_qp; + rc->flatness_det_thresh = override->flatness_det_thresh; +} + +bool dsc_prepare_config(const struct dsc_config *dsc_cfg, struct dsc_reg_values *dsc_reg_vals, + struct dsc_optc_config *dsc_optc_cfg) +{ + struct dsc_parameters dsc_params; + struct rc_params rc; + + /* Validate input parameters */ + ASSERT(dsc_cfg->dc_dsc_cfg.num_slices_h); + ASSERT(dsc_cfg->dc_dsc_cfg.num_slices_v); + ASSERT(dsc_cfg->dc_dsc_cfg.version_minor == 1 || dsc_cfg->dc_dsc_cfg.version_minor == 2); + ASSERT(dsc_cfg->pic_width); + ASSERT(dsc_cfg->pic_height); + ASSERT((dsc_cfg->dc_dsc_cfg.version_minor == 1 && + (8 <= dsc_cfg->dc_dsc_cfg.linebuf_depth && dsc_cfg->dc_dsc_cfg.linebuf_depth <= 13)) || + (dsc_cfg->dc_dsc_cfg.version_minor == 2 && + ((8 <= dsc_cfg->dc_dsc_cfg.linebuf_depth && dsc_cfg->dc_dsc_cfg.linebuf_depth <= 15) || + dsc_cfg->dc_dsc_cfg.linebuf_depth == 0))); + ASSERT(96 <= dsc_cfg->dc_dsc_cfg.bits_per_pixel && dsc_cfg->dc_dsc_cfg.bits_per_pixel <= 0x3ff); // 6.0 <= bits_per_pixel <= 63.9375 + + if (!dsc_cfg->dc_dsc_cfg.num_slices_v || !dsc_cfg->dc_dsc_cfg.num_slices_h || + !(dsc_cfg->dc_dsc_cfg.version_minor == 1 || dsc_cfg->dc_dsc_cfg.version_minor == 2) || + !dsc_cfg->pic_width || !dsc_cfg->pic_height || + !((dsc_cfg->dc_dsc_cfg.version_minor == 1 && // v1.1 line buffer depth range: + 8 <= dsc_cfg->dc_dsc_cfg.linebuf_depth && dsc_cfg->dc_dsc_cfg.linebuf_depth <= 13) || + (dsc_cfg->dc_dsc_cfg.version_minor == 2 && // v1.2 line buffer depth range: + ((8 <= dsc_cfg->dc_dsc_cfg.linebuf_depth && dsc_cfg->dc_dsc_cfg.linebuf_depth <= 15) || + dsc_cfg->dc_dsc_cfg.linebuf_depth == 0))) || + !(96 <= dsc_cfg->dc_dsc_cfg.bits_per_pixel && dsc_cfg->dc_dsc_cfg.bits_per_pixel <= 0x3ff)) { + dm_output_to_console("%s: Invalid parameters\n", __func__); + return false; + } + + dsc_init_reg_values(dsc_reg_vals); + + /* Copy input config */ + dsc_reg_vals->pixel_format = dsc_dc_pixel_encoding_to_dsc_pixel_format(dsc_cfg->pixel_encoding, dsc_cfg->dc_dsc_cfg.ycbcr422_simple); + dsc_reg_vals->num_slices_h = dsc_cfg->dc_dsc_cfg.num_slices_h; + dsc_reg_vals->num_slices_v = dsc_cfg->dc_dsc_cfg.num_slices_v; + dsc_reg_vals->pps.dsc_version_minor = dsc_cfg->dc_dsc_cfg.version_minor; + dsc_reg_vals->pps.pic_width = dsc_cfg->pic_width; + dsc_reg_vals->pps.pic_height = dsc_cfg->pic_height; + dsc_reg_vals->pps.bits_per_component = dsc_dc_color_depth_to_dsc_bits_per_comp(dsc_cfg->color_depth); + dsc_reg_vals->pps.block_pred_enable = dsc_cfg->dc_dsc_cfg.block_pred_enable; + dsc_reg_vals->pps.line_buf_depth = dsc_cfg->dc_dsc_cfg.linebuf_depth; + dsc_reg_vals->alternate_ich_encoding_en = dsc_reg_vals->pps.dsc_version_minor == 1 ? 0 : 1; + dsc_reg_vals->ich_reset_at_eol = (dsc_cfg->is_odm || dsc_reg_vals->num_slices_h > 1) ? 0xF : 0; + + // TODO: in addition to validating slice height (pic height must be divisible by slice height), + // see what happens when the same condition doesn't apply for slice_width/pic_width. + dsc_reg_vals->pps.slice_width = dsc_cfg->pic_width / dsc_cfg->dc_dsc_cfg.num_slices_h; + dsc_reg_vals->pps.slice_height = dsc_cfg->pic_height / dsc_cfg->dc_dsc_cfg.num_slices_v; + + ASSERT(dsc_reg_vals->pps.slice_height * dsc_cfg->dc_dsc_cfg.num_slices_v == dsc_cfg->pic_height); + if (!(dsc_reg_vals->pps.slice_height * dsc_cfg->dc_dsc_cfg.num_slices_v == dsc_cfg->pic_height)) { + dm_output_to_console("%s: pix height %d not divisible by num_slices_v %d\n\n", __func__, dsc_cfg->pic_height, dsc_cfg->dc_dsc_cfg.num_slices_v); + return false; + } + + dsc_reg_vals->bpp_x32 = dsc_cfg->dc_dsc_cfg.bits_per_pixel << 1; + if (dsc_reg_vals->pixel_format == DSC_PIXFMT_NATIVE_YCBCR420 || dsc_reg_vals->pixel_format == DSC_PIXFMT_NATIVE_YCBCR422) + dsc_reg_vals->pps.bits_per_pixel = dsc_reg_vals->bpp_x32; + else + dsc_reg_vals->pps.bits_per_pixel = dsc_reg_vals->bpp_x32 >> 1; + + dsc_reg_vals->pps.convert_rgb = dsc_reg_vals->pixel_format == DSC_PIXFMT_RGB ? 1 : 0; + dsc_reg_vals->pps.native_422 = (dsc_reg_vals->pixel_format == DSC_PIXFMT_NATIVE_YCBCR422); + dsc_reg_vals->pps.native_420 = (dsc_reg_vals->pixel_format == DSC_PIXFMT_NATIVE_YCBCR420); + dsc_reg_vals->pps.simple_422 = (dsc_reg_vals->pixel_format == DSC_PIXFMT_SIMPLE_YCBCR422); + + calc_rc_params(&rc, &dsc_reg_vals->pps); + + if (dsc_cfg->dc_dsc_cfg.rc_params_ovrd) + dsc_override_rc_params(&rc, dsc_cfg->dc_dsc_cfg.rc_params_ovrd); + + if (dscc_compute_dsc_parameters(&dsc_reg_vals->pps, &rc, &dsc_params)) { + dm_output_to_console("%s: DSC config failed\n", __func__); + return false; + } + + dsc_update_from_dsc_parameters(dsc_reg_vals, &dsc_params); + + dsc_optc_cfg->bytes_per_pixel = dsc_params.bytes_per_pixel; + dsc_optc_cfg->slice_width = dsc_reg_vals->pps.slice_width; + dsc_optc_cfg->is_pixel_format_444 = dsc_reg_vals->pixel_format == DSC_PIXFMT_RGB || + dsc_reg_vals->pixel_format == DSC_PIXFMT_YCBCR444 || + dsc_reg_vals->pixel_format == DSC_PIXFMT_SIMPLE_YCBCR422; + + return true; +} + + +enum dsc_pixel_format dsc_dc_pixel_encoding_to_dsc_pixel_format(enum dc_pixel_encoding dc_pix_enc, bool is_ycbcr422_simple) +{ + enum dsc_pixel_format dsc_pix_fmt = DSC_PIXFMT_UNKNOWN; + + /* NOTE: We don't support DSC_PIXFMT_SIMPLE_YCBCR422 */ + + switch (dc_pix_enc) { + case PIXEL_ENCODING_RGB: + dsc_pix_fmt = DSC_PIXFMT_RGB; + break; + case PIXEL_ENCODING_YCBCR422: + if (is_ycbcr422_simple) + dsc_pix_fmt = DSC_PIXFMT_SIMPLE_YCBCR422; + else + dsc_pix_fmt = DSC_PIXFMT_NATIVE_YCBCR422; + break; + case PIXEL_ENCODING_YCBCR444: + dsc_pix_fmt = DSC_PIXFMT_YCBCR444; + break; + case PIXEL_ENCODING_YCBCR420: + dsc_pix_fmt = DSC_PIXFMT_NATIVE_YCBCR420; + break; + default: + dsc_pix_fmt = DSC_PIXFMT_UNKNOWN; + break; + } + + ASSERT(dsc_pix_fmt != DSC_PIXFMT_UNKNOWN); + return dsc_pix_fmt; +} + + +enum dsc_bits_per_comp dsc_dc_color_depth_to_dsc_bits_per_comp(enum dc_color_depth dc_color_depth) +{ + enum dsc_bits_per_comp bpc = DSC_BPC_UNKNOWN; + + switch (dc_color_depth) { + case COLOR_DEPTH_888: + bpc = DSC_BPC_8; + break; + case COLOR_DEPTH_101010: + bpc = DSC_BPC_10; + break; + case COLOR_DEPTH_121212: + bpc = DSC_BPC_12; + break; + default: + bpc = DSC_BPC_UNKNOWN; + break; + } + + return bpc; +} + + +void dsc_init_reg_values(struct dsc_reg_values *reg_vals) +{ + int i; + + memset(reg_vals, 0, sizeof(struct dsc_reg_values)); + + /* Non-PPS values */ + reg_vals->dsc_clock_enable = 1; + reg_vals->dsc_clock_gating_disable = 0; + reg_vals->underflow_recovery_en = 0; + reg_vals->underflow_occurred_int_en = 0; + reg_vals->underflow_occurred_status = 0; + reg_vals->ich_reset_at_eol = 0; + reg_vals->alternate_ich_encoding_en = 0; + reg_vals->rc_buffer_model_size = 0; + /*reg_vals->disable_ich = 0;*/ + reg_vals->dsc_dbg_en = 0; + + for (i = 0; i < 4; i++) + reg_vals->rc_buffer_model_overflow_int_en[i] = 0; + + /* PPS values */ + reg_vals->pps.dsc_version_minor = 2; + reg_vals->pps.dsc_version_major = 1; + reg_vals->pps.line_buf_depth = 9; + reg_vals->pps.bits_per_component = 8; + reg_vals->pps.block_pred_enable = 1; + reg_vals->pps.slice_chunk_size = 0; + reg_vals->pps.pic_width = 0; + reg_vals->pps.pic_height = 0; + reg_vals->pps.slice_width = 0; + reg_vals->pps.slice_height = 0; + reg_vals->pps.initial_xmit_delay = 170; + reg_vals->pps.initial_dec_delay = 0; + reg_vals->pps.initial_scale_value = 0; + reg_vals->pps.scale_increment_interval = 0; + reg_vals->pps.scale_decrement_interval = 0; + reg_vals->pps.nfl_bpg_offset = 0; + reg_vals->pps.slice_bpg_offset = 0; + reg_vals->pps.nsl_bpg_offset = 0; + reg_vals->pps.initial_offset = 6144; + reg_vals->pps.final_offset = 0; + reg_vals->pps.flatness_min_qp = 3; + reg_vals->pps.flatness_max_qp = 12; + reg_vals->pps.rc_model_size = 8192; + reg_vals->pps.rc_edge_factor = 6; + reg_vals->pps.rc_quant_incr_limit0 = 11; + reg_vals->pps.rc_quant_incr_limit1 = 11; + reg_vals->pps.rc_tgt_offset_low = 3; + reg_vals->pps.rc_tgt_offset_high = 3; +} + +/* Updates dsc_reg_values::reg_vals::xxx fields based on the values from computed params. + * This is required because dscc_compute_dsc_parameters returns a modified PPS, which in turn + * affects non-PPS register values. + */ +void dsc_update_from_dsc_parameters(struct dsc_reg_values *reg_vals, const struct dsc_parameters *dsc_params) +{ + int i; + + reg_vals->pps = dsc_params->pps; + + // pps_computed will have the "expanded" values; need to shift them to make them fit for regs. + for (i = 0; i < NUM_BUF_RANGES - 1; i++) + reg_vals->pps.rc_buf_thresh[i] = reg_vals->pps.rc_buf_thresh[i] >> 6; + + reg_vals->rc_buffer_model_size = dsc_params->rc_buffer_model_size; +} + +static void dsc_write_to_registers(struct display_stream_compressor *dsc, const struct dsc_reg_values *reg_vals) +{ + uint32_t temp_int; + struct dcn20_dsc *dsc20 = TO_DCN20_DSC(dsc); + + REG_SET(DSC_DEBUG_CONTROL, 0, + DSC_DBG_EN, reg_vals->dsc_dbg_en); + + // dsccif registers + REG_SET_5(DSCCIF_CONFIG0, 0, + INPUT_INTERFACE_UNDERFLOW_RECOVERY_EN, reg_vals->underflow_recovery_en, + INPUT_INTERFACE_UNDERFLOW_OCCURRED_INT_EN, reg_vals->underflow_occurred_int_en, + INPUT_INTERFACE_UNDERFLOW_OCCURRED_STATUS, reg_vals->underflow_occurred_status, + INPUT_PIXEL_FORMAT, reg_vals->pixel_format, + DSCCIF_CONFIG0__BITS_PER_COMPONENT, reg_vals->pps.bits_per_component); + + REG_SET_2(DSCCIF_CONFIG1, 0, + PIC_WIDTH, reg_vals->pps.pic_width, + PIC_HEIGHT, reg_vals->pps.pic_height); + + // dscc registers + if (dsc20->dsc_mask->ICH_RESET_AT_END_OF_LINE == 0) { + REG_SET_3(DSCC_CONFIG0, 0, + NUMBER_OF_SLICES_PER_LINE, reg_vals->num_slices_h - 1, + ALTERNATE_ICH_ENCODING_EN, reg_vals->alternate_ich_encoding_en, + NUMBER_OF_SLICES_IN_VERTICAL_DIRECTION, reg_vals->num_slices_v - 1); + } else { + REG_SET_4(DSCC_CONFIG0, 0, ICH_RESET_AT_END_OF_LINE, + reg_vals->ich_reset_at_eol, NUMBER_OF_SLICES_PER_LINE, + reg_vals->num_slices_h - 1, ALTERNATE_ICH_ENCODING_EN, + reg_vals->alternate_ich_encoding_en, NUMBER_OF_SLICES_IN_VERTICAL_DIRECTION, + reg_vals->num_slices_v - 1); + } + + REG_SET(DSCC_CONFIG1, 0, + DSCC_RATE_CONTROL_BUFFER_MODEL_SIZE, reg_vals->rc_buffer_model_size); + /*REG_SET_2(DSCC_CONFIG1, 0, + DSCC_RATE_CONTROL_BUFFER_MODEL_SIZE, reg_vals->rc_buffer_model_size, + DSCC_DISABLE_ICH, reg_vals->disable_ich);*/ + + REG_SET_4(DSCC_INTERRUPT_CONTROL_STATUS, 0, + DSCC_RATE_CONTROL_BUFFER_MODEL0_OVERFLOW_OCCURRED_INT_EN, reg_vals->rc_buffer_model_overflow_int_en[0], + DSCC_RATE_CONTROL_BUFFER_MODEL1_OVERFLOW_OCCURRED_INT_EN, reg_vals->rc_buffer_model_overflow_int_en[1], + DSCC_RATE_CONTROL_BUFFER_MODEL2_OVERFLOW_OCCURRED_INT_EN, reg_vals->rc_buffer_model_overflow_int_en[2], + DSCC_RATE_CONTROL_BUFFER_MODEL3_OVERFLOW_OCCURRED_INT_EN, reg_vals->rc_buffer_model_overflow_int_en[3]); + + REG_SET_3(DSCC_PPS_CONFIG0, 0, + DSC_VERSION_MINOR, reg_vals->pps.dsc_version_minor, + LINEBUF_DEPTH, reg_vals->pps.line_buf_depth, + DSCC_PPS_CONFIG0__BITS_PER_COMPONENT, reg_vals->pps.bits_per_component); + + if (reg_vals->pixel_format == DSC_PIXFMT_NATIVE_YCBCR420 || reg_vals->pixel_format == DSC_PIXFMT_NATIVE_YCBCR422) + temp_int = reg_vals->bpp_x32; + else + temp_int = reg_vals->bpp_x32 >> 1; + + REG_SET_7(DSCC_PPS_CONFIG1, 0, + BITS_PER_PIXEL, temp_int, + SIMPLE_422, reg_vals->pixel_format == DSC_PIXFMT_SIMPLE_YCBCR422, + CONVERT_RGB, reg_vals->pixel_format == DSC_PIXFMT_RGB, + BLOCK_PRED_ENABLE, reg_vals->pps.block_pred_enable, + NATIVE_422, reg_vals->pixel_format == DSC_PIXFMT_NATIVE_YCBCR422, + NATIVE_420, reg_vals->pixel_format == DSC_PIXFMT_NATIVE_YCBCR420, + CHUNK_SIZE, reg_vals->pps.slice_chunk_size); + + REG_SET_2(DSCC_PPS_CONFIG2, 0, + PIC_WIDTH, reg_vals->pps.pic_width, + PIC_HEIGHT, reg_vals->pps.pic_height); + + REG_SET_2(DSCC_PPS_CONFIG3, 0, + SLICE_WIDTH, reg_vals->pps.slice_width, + SLICE_HEIGHT, reg_vals->pps.slice_height); + + REG_SET(DSCC_PPS_CONFIG4, 0, + INITIAL_XMIT_DELAY, reg_vals->pps.initial_xmit_delay); + + REG_SET_2(DSCC_PPS_CONFIG5, 0, + INITIAL_SCALE_VALUE, reg_vals->pps.initial_scale_value, + SCALE_INCREMENT_INTERVAL, reg_vals->pps.scale_increment_interval); + + REG_SET_3(DSCC_PPS_CONFIG6, 0, + SCALE_DECREMENT_INTERVAL, reg_vals->pps.scale_decrement_interval, + FIRST_LINE_BPG_OFFSET, reg_vals->pps.first_line_bpg_offset, + SECOND_LINE_BPG_OFFSET, reg_vals->pps.second_line_bpg_offset); + + REG_SET_2(DSCC_PPS_CONFIG7, 0, + NFL_BPG_OFFSET, reg_vals->pps.nfl_bpg_offset, + SLICE_BPG_OFFSET, reg_vals->pps.slice_bpg_offset); + + REG_SET_2(DSCC_PPS_CONFIG8, 0, + NSL_BPG_OFFSET, reg_vals->pps.nsl_bpg_offset, + SECOND_LINE_OFFSET_ADJ, reg_vals->pps.second_line_offset_adj); + + REG_SET_2(DSCC_PPS_CONFIG9, 0, + INITIAL_OFFSET, reg_vals->pps.initial_offset, + FINAL_OFFSET, reg_vals->pps.final_offset); + + REG_SET_3(DSCC_PPS_CONFIG10, 0, + FLATNESS_MIN_QP, reg_vals->pps.flatness_min_qp, + FLATNESS_MAX_QP, reg_vals->pps.flatness_max_qp, + RC_MODEL_SIZE, reg_vals->pps.rc_model_size); + + REG_SET_5(DSCC_PPS_CONFIG11, 0, + RC_EDGE_FACTOR, reg_vals->pps.rc_edge_factor, + RC_QUANT_INCR_LIMIT0, reg_vals->pps.rc_quant_incr_limit0, + RC_QUANT_INCR_LIMIT1, reg_vals->pps.rc_quant_incr_limit1, + RC_TGT_OFFSET_LO, reg_vals->pps.rc_tgt_offset_low, + RC_TGT_OFFSET_HI, reg_vals->pps.rc_tgt_offset_high); + + REG_SET_4(DSCC_PPS_CONFIG12, 0, + RC_BUF_THRESH0, reg_vals->pps.rc_buf_thresh[0], + RC_BUF_THRESH1, reg_vals->pps.rc_buf_thresh[1], + RC_BUF_THRESH2, reg_vals->pps.rc_buf_thresh[2], + RC_BUF_THRESH3, reg_vals->pps.rc_buf_thresh[3]); + + REG_SET_4(DSCC_PPS_CONFIG13, 0, + RC_BUF_THRESH4, reg_vals->pps.rc_buf_thresh[4], + RC_BUF_THRESH5, reg_vals->pps.rc_buf_thresh[5], + RC_BUF_THRESH6, reg_vals->pps.rc_buf_thresh[6], + RC_BUF_THRESH7, reg_vals->pps.rc_buf_thresh[7]); + + REG_SET_4(DSCC_PPS_CONFIG14, 0, + RC_BUF_THRESH8, reg_vals->pps.rc_buf_thresh[8], + RC_BUF_THRESH9, reg_vals->pps.rc_buf_thresh[9], + RC_BUF_THRESH10, reg_vals->pps.rc_buf_thresh[10], + RC_BUF_THRESH11, reg_vals->pps.rc_buf_thresh[11]); + + REG_SET_5(DSCC_PPS_CONFIG15, 0, + RC_BUF_THRESH12, reg_vals->pps.rc_buf_thresh[12], + RC_BUF_THRESH13, reg_vals->pps.rc_buf_thresh[13], + RANGE_MIN_QP0, reg_vals->pps.rc_range_params[0].range_min_qp, + RANGE_MAX_QP0, reg_vals->pps.rc_range_params[0].range_max_qp, + RANGE_BPG_OFFSET0, reg_vals->pps.rc_range_params[0].range_bpg_offset); + + REG_SET_6(DSCC_PPS_CONFIG16, 0, + RANGE_MIN_QP1, reg_vals->pps.rc_range_params[1].range_min_qp, + RANGE_MAX_QP1, reg_vals->pps.rc_range_params[1].range_max_qp, + RANGE_BPG_OFFSET1, reg_vals->pps.rc_range_params[1].range_bpg_offset, + RANGE_MIN_QP2, reg_vals->pps.rc_range_params[2].range_min_qp, + RANGE_MAX_QP2, reg_vals->pps.rc_range_params[2].range_max_qp, + RANGE_BPG_OFFSET2, reg_vals->pps.rc_range_params[2].range_bpg_offset); + + REG_SET_6(DSCC_PPS_CONFIG17, 0, + RANGE_MIN_QP3, reg_vals->pps.rc_range_params[3].range_min_qp, + RANGE_MAX_QP3, reg_vals->pps.rc_range_params[3].range_max_qp, + RANGE_BPG_OFFSET3, reg_vals->pps.rc_range_params[3].range_bpg_offset, + RANGE_MIN_QP4, reg_vals->pps.rc_range_params[4].range_min_qp, + RANGE_MAX_QP4, reg_vals->pps.rc_range_params[4].range_max_qp, + RANGE_BPG_OFFSET4, reg_vals->pps.rc_range_params[4].range_bpg_offset); + + REG_SET_6(DSCC_PPS_CONFIG18, 0, + RANGE_MIN_QP5, reg_vals->pps.rc_range_params[5].range_min_qp, + RANGE_MAX_QP5, reg_vals->pps.rc_range_params[5].range_max_qp, + RANGE_BPG_OFFSET5, reg_vals->pps.rc_range_params[5].range_bpg_offset, + RANGE_MIN_QP6, reg_vals->pps.rc_range_params[6].range_min_qp, + RANGE_MAX_QP6, reg_vals->pps.rc_range_params[6].range_max_qp, + RANGE_BPG_OFFSET6, reg_vals->pps.rc_range_params[6].range_bpg_offset); + + REG_SET_6(DSCC_PPS_CONFIG19, 0, + RANGE_MIN_QP7, reg_vals->pps.rc_range_params[7].range_min_qp, + RANGE_MAX_QP7, reg_vals->pps.rc_range_params[7].range_max_qp, + RANGE_BPG_OFFSET7, reg_vals->pps.rc_range_params[7].range_bpg_offset, + RANGE_MIN_QP8, reg_vals->pps.rc_range_params[8].range_min_qp, + RANGE_MAX_QP8, reg_vals->pps.rc_range_params[8].range_max_qp, + RANGE_BPG_OFFSET8, reg_vals->pps.rc_range_params[8].range_bpg_offset); + + REG_SET_6(DSCC_PPS_CONFIG20, 0, + RANGE_MIN_QP9, reg_vals->pps.rc_range_params[9].range_min_qp, + RANGE_MAX_QP9, reg_vals->pps.rc_range_params[9].range_max_qp, + RANGE_BPG_OFFSET9, reg_vals->pps.rc_range_params[9].range_bpg_offset, + RANGE_MIN_QP10, reg_vals->pps.rc_range_params[10].range_min_qp, + RANGE_MAX_QP10, reg_vals->pps.rc_range_params[10].range_max_qp, + RANGE_BPG_OFFSET10, reg_vals->pps.rc_range_params[10].range_bpg_offset); + + REG_SET_6(DSCC_PPS_CONFIG21, 0, + RANGE_MIN_QP11, reg_vals->pps.rc_range_params[11].range_min_qp, + RANGE_MAX_QP11, reg_vals->pps.rc_range_params[11].range_max_qp, + RANGE_BPG_OFFSET11, reg_vals->pps.rc_range_params[11].range_bpg_offset, + RANGE_MIN_QP12, reg_vals->pps.rc_range_params[12].range_min_qp, + RANGE_MAX_QP12, reg_vals->pps.rc_range_params[12].range_max_qp, + RANGE_BPG_OFFSET12, reg_vals->pps.rc_range_params[12].range_bpg_offset); + + REG_SET_6(DSCC_PPS_CONFIG22, 0, + RANGE_MIN_QP13, reg_vals->pps.rc_range_params[13].range_min_qp, + RANGE_MAX_QP13, reg_vals->pps.rc_range_params[13].range_max_qp, + RANGE_BPG_OFFSET13, reg_vals->pps.rc_range_params[13].range_bpg_offset, + RANGE_MIN_QP14, reg_vals->pps.rc_range_params[14].range_min_qp, + RANGE_MAX_QP14, reg_vals->pps.rc_range_params[14].range_max_qp, + RANGE_BPG_OFFSET14, reg_vals->pps.rc_range_params[14].range_bpg_offset); + +} + diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.h b/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.h new file mode 100644 index 000000000000..ba869387c3c5 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.h @@ -0,0 +1,589 @@ +/* Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ +#ifndef __DCN20_DSC_H__ +#define __DCN20_DSC_H__ + +#include "dsc.h" +#include "dsc/dscc_types.h" +#include + +#define TO_DCN20_DSC(dsc)\ + container_of(dsc, struct dcn20_dsc, base) + +#define DSC_REG_LIST_DCN20(id) \ + SRI(DSC_TOP_CONTROL, DSC_TOP, id),\ + SRI(DSC_DEBUG_CONTROL, DSC_TOP, id),\ + SRI(DSCC_CONFIG0, DSCC, id),\ + SRI(DSCC_CONFIG1, DSCC, id),\ + SRI(DSCC_STATUS, DSCC, id),\ + SRI(DSCC_INTERRUPT_CONTROL_STATUS, DSCC, id),\ + SRI(DSCC_PPS_CONFIG0, DSCC, id),\ + SRI(DSCC_PPS_CONFIG1, DSCC, id),\ + SRI(DSCC_PPS_CONFIG2, DSCC, id),\ + SRI(DSCC_PPS_CONFIG3, DSCC, id),\ + SRI(DSCC_PPS_CONFIG4, DSCC, id),\ + SRI(DSCC_PPS_CONFIG5, DSCC, id),\ + SRI(DSCC_PPS_CONFIG6, DSCC, id),\ + SRI(DSCC_PPS_CONFIG7, DSCC, id),\ + SRI(DSCC_PPS_CONFIG8, DSCC, id),\ + SRI(DSCC_PPS_CONFIG9, DSCC, id),\ + SRI(DSCC_PPS_CONFIG10, DSCC, id),\ + SRI(DSCC_PPS_CONFIG11, DSCC, id),\ + SRI(DSCC_PPS_CONFIG12, DSCC, id),\ + SRI(DSCC_PPS_CONFIG13, DSCC, id),\ + SRI(DSCC_PPS_CONFIG14, DSCC, id),\ + SRI(DSCC_PPS_CONFIG15, DSCC, id),\ + SRI(DSCC_PPS_CONFIG16, DSCC, id),\ + SRI(DSCC_PPS_CONFIG17, DSCC, id),\ + SRI(DSCC_PPS_CONFIG18, DSCC, id),\ + SRI(DSCC_PPS_CONFIG19, DSCC, id),\ + SRI(DSCC_PPS_CONFIG20, DSCC, id),\ + SRI(DSCC_PPS_CONFIG21, DSCC, id),\ + SRI(DSCC_PPS_CONFIG22, DSCC, id),\ + SRI(DSCC_MEM_POWER_CONTROL, DSCC, id),\ + SRI(DSCC_R_Y_SQUARED_ERROR_LOWER, DSCC, id),\ + SRI(DSCC_R_Y_SQUARED_ERROR_UPPER, DSCC, id),\ + SRI(DSCC_G_CB_SQUARED_ERROR_LOWER, DSCC, id),\ + SRI(DSCC_G_CB_SQUARED_ERROR_UPPER, DSCC, id),\ + SRI(DSCC_B_CR_SQUARED_ERROR_LOWER, DSCC, id),\ + SRI(DSCC_B_CR_SQUARED_ERROR_UPPER, DSCC, id),\ + SRI(DSCC_MAX_ABS_ERROR0, DSCC, id),\ + SRI(DSCC_MAX_ABS_ERROR1, DSCC, id),\ + SRI(DSCC_RATE_BUFFER0_MAX_FULLNESS_LEVEL, DSCC, id),\ + SRI(DSCC_RATE_BUFFER1_MAX_FULLNESS_LEVEL, DSCC, id),\ + SRI(DSCC_RATE_BUFFER2_MAX_FULLNESS_LEVEL, DSCC, id),\ + SRI(DSCC_RATE_BUFFER3_MAX_FULLNESS_LEVEL, DSCC, id),\ + SRI(DSCC_RATE_CONTROL_BUFFER0_MAX_FULLNESS_LEVEL, DSCC, id),\ + SRI(DSCC_RATE_CONTROL_BUFFER1_MAX_FULLNESS_LEVEL, DSCC, id),\ + SRI(DSCC_RATE_CONTROL_BUFFER2_MAX_FULLNESS_LEVEL, DSCC, id),\ + SRI(DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL, DSCC, id),\ + SRI(DSCCIF_CONFIG0, DSCCIF, id),\ + SRI(DSCCIF_CONFIG1, DSCCIF, id),\ + SRI(DSCRM_DSC_FORWARD_CONFIG, DSCRM, id) + + +#define DSC_SF(reg_name, field_name, post_fix)\ + .field_name = reg_name ## __ ## field_name ## post_fix + +//Used in resolving the corner case with duplicate field name +#define DSC2_SF(reg_name, field_name, post_fix)\ + .field_name = reg_name ## _ ## field_name ## post_fix + +#define DSC_REG_LIST_SH_MASK_DCN20(mask_sh)\ + DSC_SF(DSC_TOP0_DSC_TOP_CONTROL, DSC_CLOCK_EN, mask_sh), \ + DSC_SF(DSC_TOP0_DSC_TOP_CONTROL, DSC_DISPCLK_R_GATE_DIS, mask_sh), \ + DSC_SF(DSC_TOP0_DSC_TOP_CONTROL, DSC_DSCCLK_R_GATE_DIS, mask_sh), \ + DSC_SF(DSC_TOP0_DSC_DEBUG_CONTROL, DSC_DBG_EN, mask_sh), \ + DSC_SF(DSCC0_DSCC_CONFIG0, ICH_RESET_AT_END_OF_LINE, mask_sh), \ + DSC_SF(DSCC0_DSCC_CONFIG0, NUMBER_OF_SLICES_PER_LINE, mask_sh), \ + DSC_SF(DSCC0_DSCC_CONFIG0, ALTERNATE_ICH_ENCODING_EN, mask_sh), \ + DSC_SF(DSCC0_DSCC_CONFIG0, NUMBER_OF_SLICES_IN_VERTICAL_DIRECTION, mask_sh), \ + DSC_SF(DSCC0_DSCC_CONFIG1, DSCC_RATE_CONTROL_BUFFER_MODEL_SIZE, mask_sh), \ + /*DSC_SF(DSCC0_DSCC_CONFIG1, DSCC_DISABLE_ICH, mask_sh),*/ \ + DSC_SF(DSCC0_DSCC_STATUS, DSCC_DOUBLE_BUFFER_REG_UPDATE_PENDING, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL_STATUS, DSCC_RATE_BUFFER0_OVERFLOW_OCCURRED, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL_STATUS, DSCC_RATE_BUFFER1_OVERFLOW_OCCURRED, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL_STATUS, DSCC_RATE_BUFFER2_OVERFLOW_OCCURRED, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL_STATUS, DSCC_RATE_BUFFER3_OVERFLOW_OCCURRED, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL_STATUS, DSCC_RATE_BUFFER0_UNDERFLOW_OCCURRED, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL_STATUS, DSCC_RATE_BUFFER1_UNDERFLOW_OCCURRED, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL_STATUS, DSCC_RATE_BUFFER2_UNDERFLOW_OCCURRED, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL_STATUS, DSCC_RATE_BUFFER3_UNDERFLOW_OCCURRED, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL_STATUS, DSCC_RATE_CONTROL_BUFFER_MODEL0_OVERFLOW_OCCURRED, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL_STATUS, DSCC_RATE_CONTROL_BUFFER_MODEL1_OVERFLOW_OCCURRED, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL_STATUS, DSCC_RATE_CONTROL_BUFFER_MODEL2_OVERFLOW_OCCURRED, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL_STATUS, DSCC_RATE_CONTROL_BUFFER_MODEL3_OVERFLOW_OCCURRED, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL_STATUS, DSCC_RATE_BUFFER0_OVERFLOW_OCCURRED_INT_EN, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL_STATUS, DSCC_RATE_BUFFER1_OVERFLOW_OCCURRED_INT_EN, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL_STATUS, DSCC_RATE_BUFFER2_OVERFLOW_OCCURRED_INT_EN, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL_STATUS, DSCC_RATE_BUFFER3_OVERFLOW_OCCURRED_INT_EN, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL_STATUS, DSCC_RATE_BUFFER0_UNDERFLOW_OCCURRED_INT_EN, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL_STATUS, DSCC_RATE_BUFFER1_UNDERFLOW_OCCURRED_INT_EN, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL_STATUS, DSCC_RATE_BUFFER2_UNDERFLOW_OCCURRED_INT_EN, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL_STATUS, DSCC_RATE_BUFFER3_UNDERFLOW_OCCURRED_INT_EN, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL_STATUS, DSCC_RATE_CONTROL_BUFFER_MODEL0_OVERFLOW_OCCURRED_INT_EN, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL_STATUS, DSCC_RATE_CONTROL_BUFFER_MODEL1_OVERFLOW_OCCURRED_INT_EN, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL_STATUS, DSCC_RATE_CONTROL_BUFFER_MODEL2_OVERFLOW_OCCURRED_INT_EN, mask_sh), \ + DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL_STATUS, DSCC_RATE_CONTROL_BUFFER_MODEL3_OVERFLOW_OCCURRED_INT_EN, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG0, DSC_VERSION_MINOR, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG0, DSC_VERSION_MAJOR, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG0, PPS_IDENTIFIER, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG0, LINEBUF_DEPTH, mask_sh), \ + DSC2_SF(DSCC0, DSCC_PPS_CONFIG0__BITS_PER_COMPONENT, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG1, BITS_PER_PIXEL, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG1, VBR_ENABLE, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG1, SIMPLE_422, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG1, CONVERT_RGB, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG1, BLOCK_PRED_ENABLE, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG1, NATIVE_422, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG1, NATIVE_420, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG1, CHUNK_SIZE, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG2, PIC_WIDTH, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG2, PIC_HEIGHT, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG3, SLICE_WIDTH, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG3, SLICE_HEIGHT, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG4, INITIAL_XMIT_DELAY, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG4, INITIAL_DEC_DELAY, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG5, INITIAL_SCALE_VALUE, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG5, SCALE_INCREMENT_INTERVAL, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG6, SCALE_DECREMENT_INTERVAL, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG6, FIRST_LINE_BPG_OFFSET, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG6, SECOND_LINE_BPG_OFFSET, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG7, NFL_BPG_OFFSET, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG7, SLICE_BPG_OFFSET, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG8, NSL_BPG_OFFSET, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG8, SECOND_LINE_OFFSET_ADJ, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG9, INITIAL_OFFSET, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG9, FINAL_OFFSET, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG10, FLATNESS_MIN_QP, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG10, FLATNESS_MAX_QP, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG10, RC_MODEL_SIZE, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG11, RC_EDGE_FACTOR, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG11, RC_QUANT_INCR_LIMIT0, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG11, RC_QUANT_INCR_LIMIT1, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG11, RC_TGT_OFFSET_LO, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG11, RC_TGT_OFFSET_HI, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG12, RC_BUF_THRESH0, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG12, RC_BUF_THRESH1, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG12, RC_BUF_THRESH2, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG12, RC_BUF_THRESH3, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG13, RC_BUF_THRESH4, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG13, RC_BUF_THRESH5, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG13, RC_BUF_THRESH6, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG13, RC_BUF_THRESH7, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG14, RC_BUF_THRESH8, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG14, RC_BUF_THRESH9, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG14, RC_BUF_THRESH10, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG14, RC_BUF_THRESH11, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG15, RC_BUF_THRESH12, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG15, RC_BUF_THRESH13, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG15, RANGE_MIN_QP0, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG15, RANGE_MAX_QP0, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG15, RANGE_BPG_OFFSET0, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG16, RANGE_MIN_QP1, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG16, RANGE_MAX_QP1, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG16, RANGE_BPG_OFFSET1, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG16, RANGE_MIN_QP2, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG16, RANGE_MAX_QP2, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG16, RANGE_BPG_OFFSET2, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG17, RANGE_MIN_QP3, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG17, RANGE_MAX_QP3, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG17, RANGE_BPG_OFFSET3, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG17, RANGE_MIN_QP4, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG17, RANGE_MAX_QP4, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG17, RANGE_BPG_OFFSET4, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG18, RANGE_MIN_QP5, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG18, RANGE_MAX_QP5, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG18, RANGE_BPG_OFFSET5, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG18, RANGE_MIN_QP6, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG18, RANGE_MAX_QP6, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG18, RANGE_BPG_OFFSET6, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG19, RANGE_MIN_QP7, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG19, RANGE_MAX_QP7, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG19, RANGE_BPG_OFFSET7, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG19, RANGE_MIN_QP8, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG19, RANGE_MAX_QP8, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG19, RANGE_BPG_OFFSET8, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG20, RANGE_MIN_QP9, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG20, RANGE_MAX_QP9, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG20, RANGE_BPG_OFFSET9, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG20, RANGE_MIN_QP10, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG20, RANGE_MAX_QP10, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG20, RANGE_BPG_OFFSET10, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG21, RANGE_MIN_QP11, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG21, RANGE_MAX_QP11, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG21, RANGE_BPG_OFFSET11, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG21, RANGE_MIN_QP12, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG21, RANGE_MAX_QP12, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG21, RANGE_BPG_OFFSET12, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG22, RANGE_MIN_QP13, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG22, RANGE_MAX_QP13, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG22, RANGE_BPG_OFFSET13, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG22, RANGE_MIN_QP14, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG22, RANGE_MAX_QP14, mask_sh), \ + DSC_SF(DSCC0_DSCC_PPS_CONFIG22, RANGE_BPG_OFFSET14, mask_sh), \ + DSC_SF(DSCC0_DSCC_MEM_POWER_CONTROL, DSCC_DEFAULT_MEM_LOW_POWER_STATE, mask_sh), \ + DSC_SF(DSCC0_DSCC_MEM_POWER_CONTROL, DSCC_MEM_PWR_FORCE, mask_sh), \ + DSC_SF(DSCC0_DSCC_MEM_POWER_CONTROL, DSCC_MEM_PWR_DIS, mask_sh), \ + DSC_SF(DSCC0_DSCC_MEM_POWER_CONTROL, DSCC_MEM_PWR_STATE, mask_sh), \ + DSC_SF(DSCC0_DSCC_MEM_POWER_CONTROL, DSCC_NATIVE_422_MEM_PWR_FORCE, mask_sh), \ + DSC_SF(DSCC0_DSCC_MEM_POWER_CONTROL, DSCC_NATIVE_422_MEM_PWR_DIS, mask_sh), \ + DSC_SF(DSCC0_DSCC_MEM_POWER_CONTROL, DSCC_NATIVE_422_MEM_PWR_STATE, mask_sh), \ + DSC_SF(DSCC0_DSCC_R_Y_SQUARED_ERROR_LOWER, DSCC_R_Y_SQUARED_ERROR_LOWER, mask_sh), \ + DSC_SF(DSCC0_DSCC_R_Y_SQUARED_ERROR_UPPER, DSCC_R_Y_SQUARED_ERROR_UPPER, mask_sh), \ + DSC_SF(DSCC0_DSCC_G_CB_SQUARED_ERROR_LOWER, DSCC_G_CB_SQUARED_ERROR_LOWER, mask_sh), \ + DSC_SF(DSCC0_DSCC_G_CB_SQUARED_ERROR_UPPER, DSCC_G_CB_SQUARED_ERROR_UPPER, mask_sh), \ + DSC_SF(DSCC0_DSCC_B_CR_SQUARED_ERROR_LOWER, DSCC_B_CR_SQUARED_ERROR_LOWER, mask_sh), \ + DSC_SF(DSCC0_DSCC_B_CR_SQUARED_ERROR_UPPER, DSCC_B_CR_SQUARED_ERROR_UPPER, mask_sh), \ + DSC_SF(DSCC0_DSCC_MAX_ABS_ERROR0, DSCC_R_Y_MAX_ABS_ERROR, mask_sh), \ + DSC_SF(DSCC0_DSCC_MAX_ABS_ERROR0, DSCC_G_CB_MAX_ABS_ERROR, mask_sh), \ + DSC_SF(DSCC0_DSCC_MAX_ABS_ERROR1, DSCC_B_CR_MAX_ABS_ERROR, mask_sh), \ + DSC_SF(DSCC0_DSCC_RATE_BUFFER0_MAX_FULLNESS_LEVEL, DSCC_RATE_BUFFER0_MAX_FULLNESS_LEVEL, mask_sh), \ + DSC_SF(DSCC0_DSCC_RATE_BUFFER1_MAX_FULLNESS_LEVEL, DSCC_RATE_BUFFER1_MAX_FULLNESS_LEVEL, mask_sh), \ + DSC_SF(DSCC0_DSCC_RATE_BUFFER2_MAX_FULLNESS_LEVEL, DSCC_RATE_BUFFER2_MAX_FULLNESS_LEVEL, mask_sh), \ + DSC_SF(DSCC0_DSCC_RATE_BUFFER3_MAX_FULLNESS_LEVEL, DSCC_RATE_BUFFER3_MAX_FULLNESS_LEVEL, mask_sh), \ + DSC_SF(DSCC0_DSCC_RATE_CONTROL_BUFFER0_MAX_FULLNESS_LEVEL, DSCC_RATE_CONTROL_BUFFER0_MAX_FULLNESS_LEVEL, mask_sh), \ + DSC_SF(DSCC0_DSCC_RATE_CONTROL_BUFFER1_MAX_FULLNESS_LEVEL, DSCC_RATE_CONTROL_BUFFER1_MAX_FULLNESS_LEVEL, mask_sh), \ + DSC_SF(DSCC0_DSCC_RATE_CONTROL_BUFFER2_MAX_FULLNESS_LEVEL, DSCC_RATE_CONTROL_BUFFER2_MAX_FULLNESS_LEVEL, mask_sh), \ + DSC_SF(DSCC0_DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL, DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL, mask_sh), \ + DSC_SF(DSCCIF0_DSCCIF_CONFIG0, INPUT_INTERFACE_UNDERFLOW_RECOVERY_EN, mask_sh), \ + DSC_SF(DSCCIF0_DSCCIF_CONFIG0, INPUT_INTERFACE_UNDERFLOW_OCCURRED_INT_EN, mask_sh), \ + DSC_SF(DSCCIF0_DSCCIF_CONFIG0, INPUT_INTERFACE_UNDERFLOW_OCCURRED_STATUS, mask_sh), \ + DSC_SF(DSCCIF0_DSCCIF_CONFIG0, INPUT_PIXEL_FORMAT, mask_sh), \ + DSC2_SF(DSCCIF0, DSCCIF_CONFIG0__BITS_PER_COMPONENT, mask_sh), \ + DSC_SF(DSCCIF0_DSCCIF_CONFIG0, DOUBLE_BUFFER_REG_UPDATE_PENDING, mask_sh), \ + DSC_SF(DSCCIF0_DSCCIF_CONFIG1, PIC_WIDTH, mask_sh), \ + DSC_SF(DSCCIF0_DSCCIF_CONFIG1, PIC_HEIGHT, mask_sh), \ + DSC_SF(DSCRM0_DSCRM_DSC_FORWARD_CONFIG, DSCRM_DSC_FORWARD_EN, mask_sh), \ + DSC_SF(DSCRM0_DSCRM_DSC_FORWARD_CONFIG, DSCRM_DSC_OPP_PIPE_SOURCE, mask_sh) + + + +#define DSC_FIELD_LIST_DCN20(type)\ + type DSC_CLOCK_EN; \ + type DSC_DISPCLK_R_GATE_DIS; \ + type DSC_DSCCLK_R_GATE_DIS; \ + type DSC_DBG_EN; \ + type DSC_TEST_CLOCK_MUX_SEL; \ + type ICH_RESET_AT_END_OF_LINE; \ + type NUMBER_OF_SLICES_PER_LINE; \ + type ALTERNATE_ICH_ENCODING_EN; \ + type NUMBER_OF_SLICES_IN_VERTICAL_DIRECTION; \ + type DSCC_RATE_CONTROL_BUFFER_MODEL_SIZE; \ + /*type DSCC_DISABLE_ICH;*/ \ + type DSCC_DOUBLE_BUFFER_REG_UPDATE_PENDING; \ + type DSCC_RATE_BUFFER0_OVERFLOW_OCCURRED; \ + type DSCC_RATE_BUFFER1_OVERFLOW_OCCURRED; \ + type DSCC_RATE_BUFFER2_OVERFLOW_OCCURRED; \ + type DSCC_RATE_BUFFER3_OVERFLOW_OCCURRED; \ + type DSCC_RATE_BUFFER0_UNDERFLOW_OCCURRED; \ + type DSCC_RATE_BUFFER1_UNDERFLOW_OCCURRED; \ + type DSCC_RATE_BUFFER2_UNDERFLOW_OCCURRED; \ + type DSCC_RATE_BUFFER3_UNDERFLOW_OCCURRED; \ + type DSCC_RATE_CONTROL_BUFFER_MODEL0_OVERFLOW_OCCURRED; \ + type DSCC_RATE_CONTROL_BUFFER_MODEL1_OVERFLOW_OCCURRED; \ + type DSCC_RATE_CONTROL_BUFFER_MODEL2_OVERFLOW_OCCURRED; \ + type DSCC_RATE_CONTROL_BUFFER_MODEL3_OVERFLOW_OCCURRED; \ + type DSCC_RATE_BUFFER0_OVERFLOW_OCCURRED_INT_EN; \ + type DSCC_RATE_BUFFER1_OVERFLOW_OCCURRED_INT_EN; \ + type DSCC_RATE_BUFFER2_OVERFLOW_OCCURRED_INT_EN; \ + type DSCC_RATE_BUFFER3_OVERFLOW_OCCURRED_INT_EN; \ + type DSCC_RATE_BUFFER0_UNDERFLOW_OCCURRED_INT_EN; \ + type DSCC_RATE_BUFFER1_UNDERFLOW_OCCURRED_INT_EN; \ + type DSCC_RATE_BUFFER2_UNDERFLOW_OCCURRED_INT_EN; \ + type DSCC_RATE_BUFFER3_UNDERFLOW_OCCURRED_INT_EN; \ + type DSCC_RATE_CONTROL_BUFFER_MODEL0_OVERFLOW_OCCURRED_INT_EN; \ + type DSCC_RATE_CONTROL_BUFFER_MODEL1_OVERFLOW_OCCURRED_INT_EN; \ + type DSCC_RATE_CONTROL_BUFFER_MODEL2_OVERFLOW_OCCURRED_INT_EN; \ + type DSCC_RATE_CONTROL_BUFFER_MODEL3_OVERFLOW_OCCURRED_INT_EN; \ + type DSC_VERSION_MINOR; \ + type DSC_VERSION_MAJOR; \ + type PPS_IDENTIFIER; \ + type LINEBUF_DEPTH; \ + type DSCC_PPS_CONFIG0__BITS_PER_COMPONENT; \ + type BITS_PER_PIXEL; \ + type VBR_ENABLE; \ + type SIMPLE_422; \ + type CONVERT_RGB; \ + type BLOCK_PRED_ENABLE; \ + type NATIVE_422; \ + type NATIVE_420; \ + type CHUNK_SIZE; \ + type PIC_WIDTH; \ + type PIC_HEIGHT; \ + type SLICE_WIDTH; \ + type SLICE_HEIGHT; \ + type INITIAL_XMIT_DELAY; \ + type INITIAL_DEC_DELAY; \ + type INITIAL_SCALE_VALUE; \ + type SCALE_INCREMENT_INTERVAL; \ + type SCALE_DECREMENT_INTERVAL; \ + type FIRST_LINE_BPG_OFFSET; \ + type SECOND_LINE_BPG_OFFSET; \ + type NFL_BPG_OFFSET; \ + type SLICE_BPG_OFFSET; \ + type NSL_BPG_OFFSET; \ + type SECOND_LINE_OFFSET_ADJ; \ + type INITIAL_OFFSET; \ + type FINAL_OFFSET; \ + type FLATNESS_MIN_QP; \ + type FLATNESS_MAX_QP; \ + type RC_MODEL_SIZE; \ + type RC_EDGE_FACTOR; \ + type RC_QUANT_INCR_LIMIT0; \ + type RC_QUANT_INCR_LIMIT1; \ + type RC_TGT_OFFSET_LO; \ + type RC_TGT_OFFSET_HI; \ + type RC_BUF_THRESH0; \ + type RC_BUF_THRESH1; \ + type RC_BUF_THRESH2; \ + type RC_BUF_THRESH3; \ + type RC_BUF_THRESH4; \ + type RC_BUF_THRESH5; \ + type RC_BUF_THRESH6; \ + type RC_BUF_THRESH7; \ + type RC_BUF_THRESH8; \ + type RC_BUF_THRESH9; \ + type RC_BUF_THRESH10; \ + type RC_BUF_THRESH11; \ + type RC_BUF_THRESH12; \ + type RC_BUF_THRESH13; \ + type RANGE_MIN_QP0; \ + type RANGE_MAX_QP0; \ + type RANGE_BPG_OFFSET0; \ + type RANGE_MIN_QP1; \ + type RANGE_MAX_QP1; \ + type RANGE_BPG_OFFSET1; \ + type RANGE_MIN_QP2; \ + type RANGE_MAX_QP2; \ + type RANGE_BPG_OFFSET2; \ + type RANGE_MIN_QP3; \ + type RANGE_MAX_QP3; \ + type RANGE_BPG_OFFSET3; \ + type RANGE_MIN_QP4; \ + type RANGE_MAX_QP4; \ + type RANGE_BPG_OFFSET4; \ + type RANGE_MIN_QP5; \ + type RANGE_MAX_QP5; \ + type RANGE_BPG_OFFSET5; \ + type RANGE_MIN_QP6; \ + type RANGE_MAX_QP6; \ + type RANGE_BPG_OFFSET6; \ + type RANGE_MIN_QP7; \ + type RANGE_MAX_QP7; \ + type RANGE_BPG_OFFSET7; \ + type RANGE_MIN_QP8; \ + type RANGE_MAX_QP8; \ + type RANGE_BPG_OFFSET8; \ + type RANGE_MIN_QP9; \ + type RANGE_MAX_QP9; \ + type RANGE_BPG_OFFSET9; \ + type RANGE_MIN_QP10; \ + type RANGE_MAX_QP10; \ + type RANGE_BPG_OFFSET10; \ + type RANGE_MIN_QP11; \ + type RANGE_MAX_QP11; \ + type RANGE_BPG_OFFSET11; \ + type RANGE_MIN_QP12; \ + type RANGE_MAX_QP12; \ + type RANGE_BPG_OFFSET12; \ + type RANGE_MIN_QP13; \ + type RANGE_MAX_QP13; \ + type RANGE_BPG_OFFSET13; \ + type RANGE_MIN_QP14; \ + type RANGE_MAX_QP14; \ + type RANGE_BPG_OFFSET14; \ + type DSCC_DEFAULT_MEM_LOW_POWER_STATE; \ + type DSCC_MEM_PWR_FORCE; \ + type DSCC_MEM_PWR_DIS; \ + type DSCC_MEM_PWR_STATE; \ + type DSCC_NATIVE_422_MEM_PWR_FORCE; \ + type DSCC_NATIVE_422_MEM_PWR_DIS; \ + type DSCC_NATIVE_422_MEM_PWR_STATE; \ + type DSCC_R_Y_SQUARED_ERROR_LOWER; \ + type DSCC_R_Y_SQUARED_ERROR_UPPER; \ + type DSCC_G_CB_SQUARED_ERROR_LOWER; \ + type DSCC_G_CB_SQUARED_ERROR_UPPER; \ + type DSCC_B_CR_SQUARED_ERROR_LOWER; \ + type DSCC_B_CR_SQUARED_ERROR_UPPER; \ + type DSCC_R_Y_MAX_ABS_ERROR; \ + type DSCC_G_CB_MAX_ABS_ERROR; \ + type DSCC_B_CR_MAX_ABS_ERROR; \ + type DSCC_RATE_BUFFER0_MAX_FULLNESS_LEVEL; \ + type DSCC_RATE_BUFFER1_MAX_FULLNESS_LEVEL; \ + type DSCC_RATE_BUFFER2_MAX_FULLNESS_LEVEL; \ + type DSCC_RATE_BUFFER3_MAX_FULLNESS_LEVEL; \ + type DSCC_RATE_CONTROL_BUFFER0_MAX_FULLNESS_LEVEL; \ + type DSCC_RATE_CONTROL_BUFFER1_MAX_FULLNESS_LEVEL; \ + type DSCC_RATE_CONTROL_BUFFER2_MAX_FULLNESS_LEVEL; \ + type DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL; \ + type DSCC_UPDATE_PENDING_STATUS; \ + type DSCC_UPDATE_TAKEN_STATUS; \ + type DSCC_UPDATE_TAKEN_ACK; \ + type DSCC_RATE_BUFFER0_FULLNESS_LEVEL; \ + type DSCC_RATE_BUFFER1_FULLNESS_LEVEL; \ + type DSCC_RATE_BUFFER2_FULLNESS_LEVEL; \ + type DSCC_RATE_BUFFER3_FULLNESS_LEVEL; \ + type DSCC_RATE_CONTROL_BUFFER0_FULLNESS_LEVEL; \ + type DSCC_RATE_CONTROL_BUFFER1_FULLNESS_LEVEL; \ + type DSCC_RATE_CONTROL_BUFFER2_FULLNESS_LEVEL; \ + type DSCC_RATE_CONTROL_BUFFER3_FULLNESS_LEVEL; \ + type DSCC_RATE_BUFFER0_INITIAL_XMIT_DELAY_REACHED; \ + type DSCC_RATE_BUFFER1_INITIAL_XMIT_DELAY_REACHED; \ + type DSCC_RATE_BUFFER2_INITIAL_XMIT_DELAY_REACHED; \ + type DSCC_RATE_BUFFER3_INITIAL_XMIT_DELAY_REACHED; \ + type INPUT_INTERFACE_UNDERFLOW_RECOVERY_EN; \ + type INPUT_INTERFACE_UNDERFLOW_OCCURRED_INT_EN; \ + type INPUT_INTERFACE_UNDERFLOW_OCCURRED_STATUS; \ + type INPUT_PIXEL_FORMAT; \ + type DSCCIF_CONFIG0__BITS_PER_COMPONENT; \ + type DOUBLE_BUFFER_REG_UPDATE_PENDING; \ + type DSCCIF_UPDATE_PENDING_STATUS; \ + type DSCCIF_UPDATE_TAKEN_STATUS; \ + type DSCCIF_UPDATE_TAKEN_ACK; \ + type DSCRM_DSC_FORWARD_EN; \ + type DSCRM_DSC_OPP_PIPE_SOURCE + +struct dcn20_dsc_registers { + uint32_t DSC_TOP_CONTROL; + uint32_t DSC_DEBUG_CONTROL; + uint32_t DSCC_CONFIG0; + uint32_t DSCC_CONFIG1; + uint32_t DSCC_STATUS; + uint32_t DSCC_INTERRUPT_CONTROL_STATUS; + uint32_t DSCC_PPS_CONFIG0; + uint32_t DSCC_PPS_CONFIG1; + uint32_t DSCC_PPS_CONFIG2; + uint32_t DSCC_PPS_CONFIG3; + uint32_t DSCC_PPS_CONFIG4; + uint32_t DSCC_PPS_CONFIG5; + uint32_t DSCC_PPS_CONFIG6; + uint32_t DSCC_PPS_CONFIG7; + uint32_t DSCC_PPS_CONFIG8; + uint32_t DSCC_PPS_CONFIG9; + uint32_t DSCC_PPS_CONFIG10; + uint32_t DSCC_PPS_CONFIG11; + uint32_t DSCC_PPS_CONFIG12; + uint32_t DSCC_PPS_CONFIG13; + uint32_t DSCC_PPS_CONFIG14; + uint32_t DSCC_PPS_CONFIG15; + uint32_t DSCC_PPS_CONFIG16; + uint32_t DSCC_PPS_CONFIG17; + uint32_t DSCC_PPS_CONFIG18; + uint32_t DSCC_PPS_CONFIG19; + uint32_t DSCC_PPS_CONFIG20; + uint32_t DSCC_PPS_CONFIG21; + uint32_t DSCC_PPS_CONFIG22; + uint32_t DSCC_MEM_POWER_CONTROL; + uint32_t DSCC_R_Y_SQUARED_ERROR_LOWER; + uint32_t DSCC_R_Y_SQUARED_ERROR_UPPER; + uint32_t DSCC_G_CB_SQUARED_ERROR_LOWER; + uint32_t DSCC_G_CB_SQUARED_ERROR_UPPER; + uint32_t DSCC_B_CR_SQUARED_ERROR_LOWER; + uint32_t DSCC_B_CR_SQUARED_ERROR_UPPER; + uint32_t DSCC_MAX_ABS_ERROR0; + uint32_t DSCC_MAX_ABS_ERROR1; + uint32_t DSCC_RATE_BUFFER0_MAX_FULLNESS_LEVEL; + uint32_t DSCC_RATE_BUFFER1_MAX_FULLNESS_LEVEL; + uint32_t DSCC_RATE_BUFFER2_MAX_FULLNESS_LEVEL; + uint32_t DSCC_RATE_BUFFER3_MAX_FULLNESS_LEVEL; + uint32_t DSCC_RATE_CONTROL_BUFFER0_MAX_FULLNESS_LEVEL; + uint32_t DSCC_RATE_CONTROL_BUFFER1_MAX_FULLNESS_LEVEL; + uint32_t DSCC_RATE_CONTROL_BUFFER2_MAX_FULLNESS_LEVEL; + uint32_t DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL; + uint32_t DSCCIF_CONFIG0; + uint32_t DSCCIF_CONFIG1; + uint32_t DSCRM_DSC_FORWARD_CONFIG; +}; + + +struct dcn20_dsc_shift { + DSC_FIELD_LIST_DCN20(uint8_t); +}; + +struct dcn20_dsc_mask { + DSC_FIELD_LIST_DCN20(uint32_t); +}; + +/* DSCCIF_CONFIG.INPUT_PIXEL_FORMAT values */ +enum dsc_pixel_format { + DSC_PIXFMT_RGB, + DSC_PIXFMT_YCBCR444, + DSC_PIXFMT_SIMPLE_YCBCR422, + DSC_PIXFMT_NATIVE_YCBCR422, + DSC_PIXFMT_NATIVE_YCBCR420, + DSC_PIXFMT_UNKNOWN +}; + +struct dsc_reg_values { + /* PPS registers */ + struct drm_dsc_config pps; + + /* Additional registers */ + uint32_t dsc_clock_enable; + uint32_t dsc_clock_gating_disable; + uint32_t underflow_recovery_en; + uint32_t underflow_occurred_int_en; + uint32_t underflow_occurred_status; + enum dsc_pixel_format pixel_format; + uint32_t ich_reset_at_eol; + uint32_t alternate_ich_encoding_en; + uint32_t num_slices_h; + uint32_t num_slices_v; + uint32_t rc_buffer_model_size; + uint32_t disable_ich; + uint32_t bpp_x32; + uint32_t dsc_dbg_en; + uint32_t rc_buffer_model_overflow_int_en[4]; +}; + +struct dcn20_dsc { + struct display_stream_compressor base; + const struct dcn20_dsc_registers *dsc_regs; + const struct dcn20_dsc_shift *dsc_shift; + const struct dcn20_dsc_mask *dsc_mask; + + struct dsc_reg_values reg_vals; + + int max_image_width; +}; + +void dsc_config_log(struct display_stream_compressor *dsc, + const struct dsc_config *config); + +void dsc_log_pps(struct display_stream_compressor *dsc, + struct drm_dsc_config *pps); + +void dsc_override_rc_params(struct rc_params *rc, + const struct dc_dsc_rc_params_override *override); + +bool dsc_prepare_config(const struct dsc_config *dsc_cfg, + struct dsc_reg_values *dsc_reg_vals, + struct dsc_optc_config *dsc_optc_cfg); + +enum dsc_pixel_format dsc_dc_pixel_encoding_to_dsc_pixel_format(enum dc_pixel_encoding dc_pix_enc, + bool is_ycbcr422_simple); + +enum dsc_bits_per_comp dsc_dc_color_depth_to_dsc_bits_per_comp(enum dc_color_depth dc_color_depth); + +void dsc_init_reg_values(struct dsc_reg_values *reg_vals); + +void dsc_update_from_dsc_parameters(struct dsc_reg_values *reg_vals, const struct dsc_parameters *dsc_params); + +void dsc2_construct(struct dcn20_dsc *dsc, + struct dc_context *ctx, + int inst, + const struct dcn20_dsc_registers *dsc_regs, + const struct dcn20_dsc_shift *dsc_shift, + const struct dcn20_dsc_mask *dsc_mask); + +void dsc2_get_enc_caps(struct dsc_enc_caps *dsc_enc_caps, + int pixel_clock_100Hz); + +bool dsc2_get_packed_pps(struct display_stream_compressor *dsc, + const struct dsc_config *dsc_cfg, + uint8_t *dsc_packed_pps); + +#endif + diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.c new file mode 100644 index 000000000000..71d2dff9986d --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.c @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dcn35_dsc.h" +#include "reg_helper.h" + +/* Macro definitios for REG_SET macros*/ +#define CTX \ + dsc20->base.ctx + +#define REG(reg)\ + dsc20->dsc_regs->reg + +#undef FN +#define FN(reg_name, field_name) \ + ((const struct dcn35_dsc_shift *)(dsc20->dsc_shift))->field_name, \ + ((const struct dcn35_dsc_mask *)(dsc20->dsc_mask))->field_name + +#define DC_LOGGER \ + dsc->ctx->logger + +void dsc35_construct(struct dcn20_dsc *dsc, + struct dc_context *ctx, + int inst, + const struct dcn20_dsc_registers *dsc_regs, + const struct dcn35_dsc_shift *dsc_shift, + const struct dcn35_dsc_mask *dsc_mask) +{ + dsc2_construct(dsc, ctx, inst, dsc_regs, + (const struct dcn20_dsc_shift *)(dsc_shift), + (const struct dcn20_dsc_mask *)(dsc_mask)); +} + +void dsc35_set_fgcg(struct dcn20_dsc *dsc20, bool enable) +{ + REG_UPDATE(DSC_TOP_CONTROL, DSC_FGCG_REP_DIS, !enable); +} diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.h b/drivers/gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.h new file mode 100644 index 000000000000..133ad38842cc --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DCN35_DSC_H__ +#define __DCN35_DSC_H__ + +#include "dcn20/dcn20_dsc.h" + +#define DSC_REG_LIST_SH_MASK_DCN35(mask_sh) \ + DSC_REG_LIST_SH_MASK_DCN20(mask_sh), \ + DSC_SF(DSC_TOP0_DSC_TOP_CONTROL, DSC_FGCG_REP_DIS, mask_sh) + +#define DSC_FIELD_LIST_DCN35(type) \ + struct { \ + DSC_FIELD_LIST_DCN20(type); \ + type DSC_FGCG_REP_DIS; \ + } + +struct dcn35_dsc_shift { + DSC_FIELD_LIST_DCN35(uint8_t); +}; + +struct dcn35_dsc_mask { + DSC_FIELD_LIST_DCN35(uint32_t); +}; + +void dsc35_construct(struct dcn20_dsc *dsc, + struct dc_context *ctx, + int inst, + const struct dcn20_dsc_registers *dsc_regs, + const struct dcn35_dsc_shift *dsc_shift, + const struct dcn35_dsc_mask *dsc_mask); + +void dsc35_set_fgcg(struct dcn20_dsc *dsc20, bool enable); + +#endif /* __DCN35_DSC_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dsc.h b/drivers/gpu/drm/amd/display/dc/dsc/dsc.h new file mode 100644 index 000000000000..4b27f29d0d80 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dsc/dsc.h @@ -0,0 +1,112 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ +#ifndef __DAL_DSC_H__ +#define __DAL_DSC_H__ + +#include "dc_dsc.h" +#include "dc_hw_types.h" +#include "dc_types.h" +/* do not include any other headers + * or else it might break Edid Utility functionality. + */ + + +/* Input parameters for configuring DSC from the outside of DSC */ +struct dsc_config { + uint32_t pic_width; + uint32_t pic_height; + enum dc_pixel_encoding pixel_encoding; + enum dc_color_depth color_depth; /* Bits per component */ + bool is_odm; + struct dc_dsc_config dc_dsc_cfg; +}; + + +/* Output parameters for configuring DSC-related part of OPTC */ +struct dsc_optc_config { + uint32_t slice_width; /* Slice width in pixels */ + uint32_t bytes_per_pixel; /* Bytes per pixel in u3.28 format */ + bool is_pixel_format_444; /* 'true' if pixel format is 'RGB 444' or 'Simple YCbCr 4:2:2' (4:2:2 upsampled to 4:4:4)' */ +}; + + +struct dcn_dsc_state { + uint32_t dsc_clock_en; + uint32_t dsc_slice_width; + uint32_t dsc_bits_per_pixel; + uint32_t dsc_slice_height; + uint32_t dsc_pic_width; + uint32_t dsc_pic_height; + uint32_t dsc_slice_bpg_offset; + uint32_t dsc_chunk_size; + uint32_t dsc_fw_en; + uint32_t dsc_opp_source; +}; + + +/* DSC encoder capabilities + * They differ from the DPCD DSC caps because they are based on AMD DSC encoder caps. + */ +union dsc_enc_slice_caps { + struct { + uint8_t NUM_SLICES_1 : 1; + uint8_t NUM_SLICES_2 : 1; + uint8_t NUM_SLICES_3 : 1; /* This one is not per DSC spec, but our encoder supports it */ + uint8_t NUM_SLICES_4 : 1; + uint8_t NUM_SLICES_8 : 1; + uint8_t NUM_SLICES_12 : 1; + uint8_t NUM_SLICES_16 : 1; + } bits; + uint8_t raw; +}; + +struct dsc_enc_caps { + uint8_t dsc_version; + union dsc_enc_slice_caps slice_caps; + int32_t lb_bit_depth; + bool is_block_pred_supported; + union dsc_color_formats color_formats; + union dsc_color_depth color_depth; + int32_t max_total_throughput_mps; /* Maximum total throughput with all the slices combined */ + int32_t max_slice_width; + uint32_t bpp_increment_div; /* bpp increment divisor, e.g. if 16, it's 1/16th of a bit */ + uint32_t edp_sink_max_bits_per_pixel; + bool is_dp; +}; + +struct dsc_funcs { + void (*dsc_get_enc_caps)(struct dsc_enc_caps *dsc_enc_caps, int pixel_clock_100Hz); + void (*dsc_read_state)(struct display_stream_compressor *dsc, struct dcn_dsc_state *s); + bool (*dsc_validate_stream)(struct display_stream_compressor *dsc, const struct dsc_config *dsc_cfg); + void (*dsc_set_config)(struct display_stream_compressor *dsc, const struct dsc_config *dsc_cfg, + struct dsc_optc_config *dsc_optc_cfg); + bool (*dsc_get_packed_pps)(struct display_stream_compressor *dsc, const struct dsc_config *dsc_cfg, + uint8_t *dsc_packed_pps); + void (*dsc_enable)(struct display_stream_compressor *dsc, int opp_pipe); + void (*dsc_disable)(struct display_stream_compressor *dsc); + void (*dsc_disconnect)(struct display_stream_compressor *dsc); +}; + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h deleted file mode 100644 index 4b27f29d0d80..000000000000 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Copyright 2017 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ -#ifndef __DAL_DSC_H__ -#define __DAL_DSC_H__ - -#include "dc_dsc.h" -#include "dc_hw_types.h" -#include "dc_types.h" -/* do not include any other headers - * or else it might break Edid Utility functionality. - */ - - -/* Input parameters for configuring DSC from the outside of DSC */ -struct dsc_config { - uint32_t pic_width; - uint32_t pic_height; - enum dc_pixel_encoding pixel_encoding; - enum dc_color_depth color_depth; /* Bits per component */ - bool is_odm; - struct dc_dsc_config dc_dsc_cfg; -}; - - -/* Output parameters for configuring DSC-related part of OPTC */ -struct dsc_optc_config { - uint32_t slice_width; /* Slice width in pixels */ - uint32_t bytes_per_pixel; /* Bytes per pixel in u3.28 format */ - bool is_pixel_format_444; /* 'true' if pixel format is 'RGB 444' or 'Simple YCbCr 4:2:2' (4:2:2 upsampled to 4:4:4)' */ -}; - - -struct dcn_dsc_state { - uint32_t dsc_clock_en; - uint32_t dsc_slice_width; - uint32_t dsc_bits_per_pixel; - uint32_t dsc_slice_height; - uint32_t dsc_pic_width; - uint32_t dsc_pic_height; - uint32_t dsc_slice_bpg_offset; - uint32_t dsc_chunk_size; - uint32_t dsc_fw_en; - uint32_t dsc_opp_source; -}; - - -/* DSC encoder capabilities - * They differ from the DPCD DSC caps because they are based on AMD DSC encoder caps. - */ -union dsc_enc_slice_caps { - struct { - uint8_t NUM_SLICES_1 : 1; - uint8_t NUM_SLICES_2 : 1; - uint8_t NUM_SLICES_3 : 1; /* This one is not per DSC spec, but our encoder supports it */ - uint8_t NUM_SLICES_4 : 1; - uint8_t NUM_SLICES_8 : 1; - uint8_t NUM_SLICES_12 : 1; - uint8_t NUM_SLICES_16 : 1; - } bits; - uint8_t raw; -}; - -struct dsc_enc_caps { - uint8_t dsc_version; - union dsc_enc_slice_caps slice_caps; - int32_t lb_bit_depth; - bool is_block_pred_supported; - union dsc_color_formats color_formats; - union dsc_color_depth color_depth; - int32_t max_total_throughput_mps; /* Maximum total throughput with all the slices combined */ - int32_t max_slice_width; - uint32_t bpp_increment_div; /* bpp increment divisor, e.g. if 16, it's 1/16th of a bit */ - uint32_t edp_sink_max_bits_per_pixel; - bool is_dp; -}; - -struct dsc_funcs { - void (*dsc_get_enc_caps)(struct dsc_enc_caps *dsc_enc_caps, int pixel_clock_100Hz); - void (*dsc_read_state)(struct display_stream_compressor *dsc, struct dcn_dsc_state *s); - bool (*dsc_validate_stream)(struct display_stream_compressor *dsc, const struct dsc_config *dsc_cfg); - void (*dsc_set_config)(struct display_stream_compressor *dsc, const struct dsc_config *dsc_cfg, - struct dsc_optc_config *dsc_optc_cfg); - bool (*dsc_get_packed_pps)(struct display_stream_compressor *dsc, const struct dsc_config *dsc_cfg, - uint8_t *dsc_packed_pps); - void (*dsc_enable)(struct display_stream_compressor *dsc, int opp_pipe); - void (*dsc_disable)(struct display_stream_compressor *dsc); - void (*dsc_disconnect)(struct display_stream_compressor *dsc); -}; - -#endif -- cgit v1.2.3 From c77b0008591094d454c1f340d1e82b5ebe2d918d Mon Sep 17 00:00:00 2001 From: Max Tseng Date: Tue, 7 Nov 2023 15:00:03 +0800 Subject: drm/amd/display: replay: generalize the send command function usage Augment the function to allow send different format data in different use case. Reviewed-by: Dennis Chan Acked-by: Hamza Mahfooz Signed-off-by: Max Tseng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_types.h | 1 + drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h | 3 +-- drivers/gpu/drm/amd/display/dc/inc/link.h | 2 +- .../drm/amd/display/dc/link/protocols/link_edp_panel_control.c | 4 ++-- .../drm/amd/display/dc/link/protocols/link_edp_panel_control.h | 2 +- drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 10 ++++++++++ 6 files changed, 16 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index edf60c4f318c..6f5da510e8de 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -1029,6 +1029,7 @@ enum replay_coasting_vtotal_type { * passed to DMUB. */ enum replay_FW_Message_type { + Replay_Msg_Not_Support = -1, Replay_Set_Timing_Sync_Supported, }; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h index 427bc47a676e..368711f76335 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h @@ -46,8 +46,7 @@ struct dmub_replay_funcs { void (*replay_set_power_opt)(struct dmub_replay *dmub, unsigned int power_opt, uint8_t panel_inst); void (*replay_send_cmd)(struct dmub_replay *dmub, - enum replay_FW_Message_type msg, unsigned int panel_inst, - uint32_t cmd_data); + enum replay_FW_Message_type msg, unsigned int panel_inst, union dmub_replay_cmd_set *cmd_element); void (*replay_set_coasting_vtotal)(struct dmub_replay *dmub, uint16_t coasting_vtotal, uint8_t panel_inst); void (*replay_residency)(struct dmub_replay *dmub, diff --git a/drivers/gpu/drm/amd/display/dc/inc/link.h b/drivers/gpu/drm/amd/display/dc/inc/link.h index dd3f53151d8b..7439865d1b50 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/link.h +++ b/drivers/gpu/drm/amd/display/dc/inc/link.h @@ -283,7 +283,7 @@ struct link_service { const struct dc_stream_state *stream); bool (*edp_send_replay_cmd)(struct dc_link *link, enum replay_FW_Message_type msg, - uint32_t params); + union dmub_replay_cmd_set *cmd_data); bool (*edp_set_coasting_vtotal)( struct dc_link *link, uint16_t coasting_vtotal); bool (*edp_replay_residency)(const struct dc_link *link, diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c index c52b51b2b4b3..6bc8ec47e267 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c @@ -1014,7 +1014,7 @@ bool edp_setup_replay(struct dc_link *link, const struct dc_stream_state *stream */ bool edp_send_replay_cmd(struct dc_link *link, enum replay_FW_Message_type msg, - uint32_t cmd_data) + union dmub_replay_cmd_set *cmd_data) { struct dc *dc = link->ctx->dc; struct dmub_replay *replay = dc->res_pool->replay; @@ -1026,7 +1026,7 @@ bool edp_send_replay_cmd(struct dc_link *link, if (!dc_get_edp_link_panel_inst(dc, link, &panel_inst)) return false; - replay->funcs->replay_send_cmd(replay, msg, cmd_data, panel_inst); + replay->funcs->replay_send_cmd(replay, msg, panel_inst, cmd_data); return true; } diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h index 6b223580ac8a..39526bd40178 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h @@ -59,7 +59,7 @@ bool edp_setup_replay(struct dc_link *link, const struct dc_stream_state *stream); bool edp_send_replay_cmd(struct dc_link *link, enum replay_FW_Message_type msg, - uint32_t params); + union dmub_replay_cmd_set *cmd_data); bool edp_set_coasting_vtotal(struct dc_link *link, uint16_t coasting_vtotal); bool edp_replay_residency(const struct dc_link *link, unsigned int *residency, const bool is_start, const bool is_alpm); diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index 55573083bc31..5f06cf4c663f 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -3143,6 +3143,16 @@ struct dmub_rb_cmd_replay_set_timing_sync { struct dmub_cmd_replay_set_timing_sync_data replay_set_timing_sync_data; }; +/** + * Definition union of replay command set + */ +union dmub_replay_cmd_set { + /** + * Definition of DMUB_CMD__REPLAY_SET_TIMING_SYNC_SUPPORTED command data. + */ + struct dmub_cmd_replay_set_timing_sync_data sync_data; +}; + /** * Set of HW components that can be locked. * -- cgit v1.2.3 From 220db802cb505e6ec3b3e0018ac0233205632a72 Mon Sep 17 00:00:00 2001 From: Michael Strauss Date: Fri, 27 Oct 2023 14:12:51 -0400 Subject: drm/amd/display: Do not read DPREFCLK spread info from LUT on DCN35 [WHY] Currently DCN35 does not spread DPREFCLK [HOW] Remove hardcoded table with nonzero caps Reviewed-by: Nicholas Kazlauskas Acked-by: Hamza Mahfooz Signed-off-by: Michael Strauss Signed-off-by: Alex Deucher --- .../amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 22 ---------------------- 1 file changed, 22 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 3469f692d6ea..0f3f6a9d5144 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -515,11 +515,6 @@ static DpmClocks_t_dcn35 dummy_clocks; static struct dcn35_watermarks dummy_wms = { 0 }; -static struct dcn35_ss_info_table ss_info_table = { - .ss_divider = 1000, - .ss_percentage = {0, 0, 375, 375, 375} -}; - static void dcn35_build_watermark_ranges(struct clk_bw_params *bw_params, struct dcn35_watermarks *table) { int i, num_valid_sets; @@ -965,21 +960,6 @@ struct clk_mgr_funcs dcn35_fpga_funcs = { .get_dtb_ref_clk_frequency = dcn31_get_dtb_ref_freq_khz, }; -static void dcn35_read_ss_info_from_lut(struct clk_mgr_internal *clk_mgr) -{ - uint32_t clock_source; - struct dc_context *ctx = clk_mgr->base.ctx; - - REG_GET(CLK1_CLK2_BYPASS_CNTL, CLK2_BYPASS_SEL, &clock_source); - - clk_mgr->dprefclk_ss_percentage = ss_info_table.ss_percentage[clock_source]; - - if (clk_mgr->dprefclk_ss_percentage != 0) { - clk_mgr->ss_on_dprefclk = true; - clk_mgr->dprefclk_ss_divider = ss_info_table.ss_divider; - } -} - void dcn35_clk_mgr_construct( struct dc_context *ctx, struct clk_mgr_dcn35 *clk_mgr, @@ -1052,8 +1032,6 @@ void dcn35_clk_mgr_construct( dce_clock_read_ss_info(&clk_mgr->base); /*when clk src is from FCH, it could have ss, same clock src as DPREF clk*/ - dcn35_read_ss_info_from_lut(&clk_mgr->base); - clk_mgr->base.base.bw_params = &dcn35_bw_params; if (clk_mgr->base.base.ctx->dc->debug.pstate_enabled) { -- cgit v1.2.3 From 75a3371e8ffdab2e504f4326daab60f8fb15fdf1 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Wed, 8 Nov 2023 17:16:28 -0500 Subject: drm/amd/display: Increase num voltage states to 40 [Description] If during driver init stage there are greater than 20 intermediary voltage states while constructing the SOC BB we could hit issues because we will index outside of the clock_limits array and start overwriting data. Increase the total number of states to 40 to avoid this issue. Cc: stable@vger.kernel.org # 6.1+ Reviewed-by: Samson Tam Acked-by: Hamza Mahfooz Signed-off-by: Alvin Lee Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dc_features.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml/dc_features.h b/drivers/gpu/drm/amd/display/dc/dml/dc_features.h index 2cbdd75429ff..6e669a2c5b2d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dc_features.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dc_features.h @@ -36,7 +36,7 @@ * Define the maximum amount of states supported by the ASIC. Every ASIC has a * specific number of states; this macro defines the maximum number of states. */ -#define DC__VOLTAGE_STATES 20 +#define DC__VOLTAGE_STATES 40 #define DC__NUM_DPP__4 1 #define DC__NUM_DPP__0_PRESENT 1 #define DC__NUM_DPP__1_PRESENT 1 -- cgit v1.2.3 From 37f4382b64a2b01109a0ed5c05f58d3f86385e10 Mon Sep 17 00:00:00 2001 From: Max Tseng Date: Wed, 8 Nov 2023 11:31:50 +0800 Subject: drm/amd/display: replay: Augment Frameupdate Command [Why] Sending certain Frameupdate number for Replay Power Evaluation Reviewed-by: Dennis Chan Acked-by: Hamza Mahfooz Signed-off-by: Max Tseng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_types.h | 1 + drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h | 2 +- .../dc/link/protocols/link_edp_panel_control.c | 10 ++++- drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 51 ++++++++++++++++++++++ 4 files changed, 61 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index 6f5da510e8de..4a60d2c47686 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -1031,6 +1031,7 @@ enum replay_coasting_vtotal_type { enum replay_FW_Message_type { Replay_Msg_Not_Support = -1, Replay_Set_Timing_Sync_Supported, + Replay_Set_Residency_Frameupdate_Timer, }; union replay_error_status { diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h index 368711f76335..b3ee90a0b8b3 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h @@ -46,7 +46,7 @@ struct dmub_replay_funcs { void (*replay_set_power_opt)(struct dmub_replay *dmub, unsigned int power_opt, uint8_t panel_inst); void (*replay_send_cmd)(struct dmub_replay *dmub, - enum replay_FW_Message_type msg, unsigned int panel_inst, union dmub_replay_cmd_set *cmd_element); + enum replay_FW_Message_type msg, union dmub_replay_cmd_set *cmd_element); void (*replay_set_coasting_vtotal)(struct dmub_replay *dmub, uint16_t coasting_vtotal, uint8_t panel_inst); void (*replay_residency)(struct dmub_replay *dmub, diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c index 6bc8ec47e267..fdeb8dff5485 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c @@ -1023,10 +1023,16 @@ bool edp_send_replay_cmd(struct dc_link *link, if (!replay) return false; - if (!dc_get_edp_link_panel_inst(dc, link, &panel_inst)) + DC_LOGGER_INIT(link->ctx->logger); + + if (dc_get_edp_link_panel_inst(dc, link, &panel_inst)) + cmd_data->panel_inst = panel_inst; + else { + DC_LOG_DC("%s(): get edp panel inst fail ", __func__); return false; + } - replay->funcs->replay_send_cmd(replay, msg, panel_inst, cmd_data); + replay->funcs->replay_send_cmd(replay, msg, cmd_data); return true; } diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index 5f06cf4c663f..d1becbb5aa29 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -2880,6 +2880,10 @@ enum dmub_cmd_replay_type { * Set disabled iiming sync. */ DMUB_CMD__REPLAY_SET_TIMING_SYNC_SUPPORTED = 5, + /** + * Set Residency Frameupdate Timer. + */ + DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER = 6, }; /** @@ -3143,14 +3147,57 @@ struct dmub_rb_cmd_replay_set_timing_sync { struct dmub_cmd_replay_set_timing_sync_data replay_set_timing_sync_data; }; +/** + * Data passed from driver to FW in DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER command. + */ +struct dmub_cmd_replay_frameupdate_timer_data { + /** + * Panel Instance. + * Panel isntance to identify which replay_state to use + * Currently the support is only for 0 or 1 + */ + uint8_t panel_inst; + /** + * Replay Frameupdate Timer Enable or not + */ + uint8_t enable; + /** + * REPLAY force reflash frame update number + */ + uint16_t frameupdate_count; +}; +/** + * Definition of DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER + */ +struct dmub_rb_cmd_replay_set_frameupdate_timer { + /** + * Command header. + */ + struct dmub_cmd_header header; + /** + * Definition of a DMUB_CMD__SET_REPLAY_POWER_OPT command. + */ + struct dmub_cmd_replay_frameupdate_timer_data data; +}; + /** * Definition union of replay command set */ union dmub_replay_cmd_set { + /** + * Panel Instance. + * Panel isntance to identify which replay_state to use + * Currently the support is only for 0 or 1 + */ + uint8_t panel_inst; /** * Definition of DMUB_CMD__REPLAY_SET_TIMING_SYNC_SUPPORTED command data. */ struct dmub_cmd_replay_set_timing_sync_data sync_data; + /** + * Definition of DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER command data. + */ + struct dmub_cmd_replay_frameupdate_timer_data timer_data; }; /** @@ -4288,6 +4335,10 @@ union dmub_rb_cmd { struct dmub_rb_cmd_replay_set_power_opt_and_coasting_vtotal replay_set_power_opt_and_coasting_vtotal; struct dmub_rb_cmd_replay_set_timing_sync replay_set_timing_sync; + /** + * Definition of a DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER command. + */ + struct dmub_rb_cmd_replay_set_frameupdate_timer replay_set_frameupdate_timer; }; /** -- cgit v1.2.3 From 43b8ac4b34ec239bccf4a692c1227ef51a95a4d2 Mon Sep 17 00:00:00 2001 From: Camille Cho Date: Fri, 3 Nov 2023 12:08:42 +0800 Subject: drm/amd/display: Simplify brightness initialization [Why] Remove the brightness cache in DC. It uses a single value to represent the brightness for both SDR and HDR mode. This leads to flash in HDR on/off. It also unconditionally programs brightness as in HDR mode. This may introduce garbage on SDR mode in miniLED panel. [How] Simplify the initialization flow by removing the DC cache and taking what panel has as default. Expand the mechanism for PWM to DPCD Aux to restore cached brightness value generally. Cc: stable@vger.kernel.org # 6.1+ Reviewed-by: Krunoslav Kovac Acked-by: Hamza Mahfooz Signed-off-by: Camille Cho Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 1 - drivers/gpu/drm/amd/display/dc/dc_types.h | 4 ---- drivers/gpu/drm/amd/display/dc/link/link_detection.c | 2 +- drivers/gpu/drm/amd/display/dc/link/link_dpms.c | 3 +-- .../display/dc/link/protocols/link_edp_panel_control.c | 16 +++------------- .../display/dc/link/protocols/link_edp_panel_control.h | 1 - 6 files changed, 5 insertions(+), 22 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 349629858205..18b8d7f3f738 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -1610,7 +1610,6 @@ struct dc_link { enum edp_revision edp_revision; union dpcd_sink_ext_caps dpcd_sink_ext_caps; - struct backlight_settings backlight_settings; struct psr_settings psr_settings; struct replay_settings replay_settings; diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index 4a60d2c47686..a2f6c994a2a9 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -991,10 +991,6 @@ struct link_mst_stream_allocation_table { struct link_mst_stream_allocation stream_allocations[MAX_CONTROLLER_NUM]; }; -struct backlight_settings { - uint32_t backlight_millinits; -}; - /* PSR feature flags */ struct psr_settings { bool psr_feature_enabled; // PSR is supported by sink diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.c b/drivers/gpu/drm/amd/display/dc/link/link_detection.c index f2fe523f914f..24153b0df503 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_detection.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.c @@ -879,7 +879,7 @@ static bool detect_link_and_local_sink(struct dc_link *link, (link->dpcd_sink_ext_caps.bits.oled == 1)) { dpcd_set_source_specific_data(link); msleep(post_oui_delay); - set_cached_brightness_aux(link); + set_default_brightness_aux(link); } return true; diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c index 34a4a8c0e18c..f8e01ca09d96 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c @@ -2142,8 +2142,7 @@ static enum dc_status enable_link_dp(struct dc_state *state, if (link->dpcd_sink_ext_caps.bits.oled == 1 || link->dpcd_sink_ext_caps.bits.sdr_aux_backlight_control == 1 || link->dpcd_sink_ext_caps.bits.hdr_aux_backlight_control == 1) { - set_cached_brightness_aux(link); - + set_default_brightness_aux(link); if (link->dpcd_sink_ext_caps.bits.oled == 1) msleep(bl_oled_enable_delay); edp_backlight_enable_aux(link, true); diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c index fdeb8dff5485..ac0fa88b52a0 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c @@ -170,7 +170,6 @@ bool edp_set_backlight_level_nits(struct dc_link *link, *(uint32_t *)&dpcd_backlight_set.backlight_level_millinits = backlight_millinits; *(uint16_t *)&dpcd_backlight_set.backlight_transition_time_ms = (uint16_t)transition_time_in_ms; - link->backlight_settings.backlight_millinits = backlight_millinits; if (!link->dpcd_caps.panel_luminance_control) { if (core_link_write_dpcd(link, DP_SOURCE_BACKLIGHT_LEVEL, @@ -288,9 +287,9 @@ bool set_default_brightness_aux(struct dc_link *link) if (link && link->dpcd_sink_ext_caps.bits.oled == 1) { if (!read_default_bl_aux(link, &default_backlight)) default_backlight = 150000; - // if < 1 nits or > 5000, it might be wrong readback - if (default_backlight < 1000 || default_backlight > 5000000) - default_backlight = 150000; // + // if > 5000, it might be wrong readback + if (default_backlight > 5000000) + default_backlight = 150000; return edp_set_backlight_level_nits(link, true, default_backlight, 0); @@ -298,15 +297,6 @@ bool set_default_brightness_aux(struct dc_link *link) return false; } -bool set_cached_brightness_aux(struct dc_link *link) -{ - if (link->backlight_settings.backlight_millinits) - return edp_set_backlight_level_nits(link, true, - link->backlight_settings.backlight_millinits, 0); - else - return set_default_brightness_aux(link); - return false; -} bool edp_is_ilr_optimization_enabled(struct dc_link *link) { if (link->dpcd_caps.edp_supported_link_rates_count == 0 || !link->panel_config.ilr.optimize_edp_link_rate) diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h index 39526bd40178..b7493ff4fcee 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h @@ -30,7 +30,6 @@ enum dp_panel_mode dp_get_panel_mode(struct dc_link *link); void dp_set_panel_mode(struct dc_link *link, enum dp_panel_mode panel_mode); bool set_default_brightness_aux(struct dc_link *link); -bool set_cached_brightness_aux(struct dc_link *link); void edp_panel_backlight_power_on(struct dc_link *link, bool wait_for_hpd); int edp_get_backlight_level(const struct dc_link *link); bool edp_get_backlight_level_nits(struct dc_link *link, -- cgit v1.2.3 From 83a79dd6f4fb54c8cfe3ecbd378817047687a9b2 Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Mon, 7 Aug 2023 15:34:39 +0800 Subject: drm/amd/display: adjust flow for deallocation mst payload MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Why] MST relevant variables are maintained at drm side. As the result, we still have to call drm_dp_remove_payload_part2() to update the relevant values regardless the link is under mst mode or not. We used to have a workaround patch to tackle this: commit 3d8fcc6740c9 ("drm/amd/display: Extract temp drm mst deallocation wa into its own function") Now it's time to remove the workaround and adjust the flow. [How] During deallocate_mst_payload(), source actually doesn't send out ALLOCATE_PAYLOAD at the end as like the flow in allocate_mst_payload(). Call function dm_helpers_dp_mst_send_payload_allocation() at the end of deallocate_mst_payload() is a bit confusing. Separate dm_helpers_dp_mst_send_payload_allocation() into 2 functions. Have a new function dm_helpers_dp_mst_update_mst_mgr_for_deallocation() to replace dm_helpers_dp_mst_send_payload_allocation() for payload deallocation. Cc: Ville Syrjälä Reviewed-by: Wenjing Liu Acked-by: Hamza Mahfooz Signed-off-by: Wayne Lin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 17 ---- .../drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 53 +++++++---- drivers/gpu/drm/amd/display/dc/dc.h | 1 - drivers/gpu/drm/amd/display/dc/dm_helpers.h | 12 ++- drivers/gpu/drm/amd/display/dc/link/link_dpms.c | 105 ++------------------- 5 files changed, 50 insertions(+), 138 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index ba3f7b232a16..2be64c593c87 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1715,23 +1715,6 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) /* TODO: Remove after DP2 receiver gets proper support of Cable ID feature */ adev->dm.dc->debug.ignore_cable_id = true; - /* TODO: There is a new drm mst change where the freedom of - * vc_next_start_slot update is revoked/moved into drm, instead of in - * driver. This forces us to make sure to get vc_next_start_slot updated - * in drm function each time without considering if mst_state is active - * or not. Otherwise, next time hotplug will give wrong start_slot - * number. We are implementing a temporary solution to even notify drm - * mst deallocation when link is no longer of MST type when uncommitting - * the stream so we will have more time to work on a proper solution. - * Ideally when dm_helpers_dp_mst_stop_top_mgr message is triggered, we - * should notify drm to do a complete "reset" of its states and stop - * calling further drm mst functions when link is no longer of an MST - * type. This could happen when we unplug an MST hubs/displays. When - * uncommit stream comes later after unplug, we should just reset - * hardware states only. - */ - adev->dm.dc->debug.temp_mst_deallocation_sequence = true; - if (adev->dm.dc->caps.dp_hdmi21_pcon_support) DRM_INFO("DP-HDMI FRL PCON supported\n"); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index c7a29bb737e2..e44ba5c1c48e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -333,15 +333,14 @@ enum act_return_status dm_helpers_dp_mst_poll_for_allocation_change_trigger( return ACT_SUCCESS; } -bool dm_helpers_dp_mst_send_payload_allocation( +void dm_helpers_dp_mst_send_payload_allocation( struct dc_context *ctx, - const struct dc_stream_state *stream, - bool enable) + const struct dc_stream_state *stream) { struct amdgpu_dm_connector *aconnector; struct drm_dp_mst_topology_state *mst_state; struct drm_dp_mst_topology_mgr *mst_mgr; - struct drm_dp_mst_atomic_payload *new_payload, old_payload; + struct drm_dp_mst_atomic_payload *new_payload; enum mst_progress_status set_flag = MST_ALLOCATE_NEW_PAYLOAD; enum mst_progress_status clr_flag = MST_CLEAR_ALLOCATED_PAYLOAD; int ret = 0; @@ -349,25 +348,13 @@ bool dm_helpers_dp_mst_send_payload_allocation( aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context; if (!aconnector || !aconnector->mst_root) - return false; + return; mst_mgr = &aconnector->mst_root->mst_mgr; mst_state = to_drm_dp_mst_topology_state(mst_mgr->base.state); - new_payload = drm_atomic_get_mst_payload_state(mst_state, aconnector->mst_output_port); - if (!enable) { - set_flag = MST_CLEAR_ALLOCATED_PAYLOAD; - clr_flag = MST_ALLOCATE_NEW_PAYLOAD; - } - - if (enable) { - ret = drm_dp_add_payload_part2(mst_mgr, mst_state->base.state, new_payload); - } else { - dm_helpers_construct_old_payload(mst_mgr, mst_state, - new_payload, &old_payload); - drm_dp_remove_payload_part2(mst_mgr, mst_state, &old_payload, new_payload); - } + ret = drm_dp_add_payload_part2(mst_mgr, mst_state->base.state, new_payload); if (ret) { amdgpu_dm_set_mst_status(&aconnector->mst_status, @@ -378,10 +365,36 @@ bool dm_helpers_dp_mst_send_payload_allocation( amdgpu_dm_set_mst_status(&aconnector->mst_status, clr_flag, false); } - - return true; } +void dm_helpers_dp_mst_update_mst_mgr_for_deallocation( + struct dc_context *ctx, + const struct dc_stream_state *stream) +{ + struct amdgpu_dm_connector *aconnector; + struct drm_dp_mst_topology_state *mst_state; + struct drm_dp_mst_topology_mgr *mst_mgr; + struct drm_dp_mst_atomic_payload *new_payload, old_payload; + enum mst_progress_status set_flag = MST_CLEAR_ALLOCATED_PAYLOAD; + enum mst_progress_status clr_flag = MST_ALLOCATE_NEW_PAYLOAD; + + aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context; + + if (!aconnector || !aconnector->mst_root) + return; + + mst_mgr = &aconnector->mst_root->mst_mgr; + mst_state = to_drm_dp_mst_topology_state(mst_mgr->base.state); + new_payload = drm_atomic_get_mst_payload_state(mst_state, aconnector->mst_output_port); + dm_helpers_construct_old_payload(mst_mgr, mst_state, + new_payload, &old_payload); + + drm_dp_remove_payload_part2(mst_mgr, mst_state, &old_payload, new_payload); + + amdgpu_dm_set_mst_status(&aconnector->mst_status, set_flag, true); + amdgpu_dm_set_mst_status(&aconnector->mst_status, clr_flag, false); + } + void dm_dtn_log_begin(struct dc_context *ctx, struct dc_log_buffer_ctx *log_ctx) { diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 18b8d7f3f738..85771ef98cd7 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -956,7 +956,6 @@ struct dc_debug_options { unsigned int min_prefetch_in_strobe_ns; bool disable_unbounded_requesting; bool dig_fifo_off_in_blank; - bool temp_mst_deallocation_sequence; bool override_dispclk_programming; bool otg_crc_db; bool disallow_dispclk_dppclk_ds; diff --git a/drivers/gpu/drm/amd/display/dc/dm_helpers.h b/drivers/gpu/drm/amd/display/dc/dm_helpers.h index 7ce9a5b6c33b..6d7a15dcf8a7 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_helpers.h +++ b/drivers/gpu/drm/amd/display/dc/dm_helpers.h @@ -103,10 +103,16 @@ enum act_return_status dm_helpers_dp_mst_poll_for_allocation_change_trigger( /* * Sends ALLOCATE_PAYLOAD message. */ -bool dm_helpers_dp_mst_send_payload_allocation( +void dm_helpers_dp_mst_send_payload_allocation( struct dc_context *ctx, - const struct dc_stream_state *stream, - bool enable); + const struct dc_stream_state *stream); + +/* + * Update mst manager relevant variables + */ +void dm_helpers_dp_mst_update_mst_mgr_for_deallocation( + struct dc_context *ctx, + const struct dc_stream_state *stream); bool dm_helpers_dp_mst_start_top_mgr( struct dc_context *ctx, diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c index f8e01ca09d96..aa0a086aa7fc 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c @@ -1247,86 +1247,6 @@ static void remove_stream_from_alloc_table( } } -static enum dc_status deallocate_mst_payload_with_temp_drm_wa( - struct pipe_ctx *pipe_ctx) -{ - struct dc_stream_state *stream = pipe_ctx->stream; - struct dc_link *link = stream->link; - struct dc_dp_mst_stream_allocation_table proposed_table = {0}; - struct fixed31_32 avg_time_slots_per_mtp = dc_fixpt_from_int(0); - int i; - bool mst_mode = (link->type == dc_connection_mst_branch); - /* adjust for drm changes*/ - const struct link_hwss *link_hwss = get_link_hwss(link, &pipe_ctx->link_res); - const struct dc_link_settings empty_link_settings = {0}; - DC_LOGGER_INIT(link->ctx->logger); - - if (link_hwss->ext.set_throttled_vcp_size) - link_hwss->ext.set_throttled_vcp_size(pipe_ctx, avg_time_slots_per_mtp); - if (link_hwss->ext.set_hblank_min_symbol_width) - link_hwss->ext.set_hblank_min_symbol_width(pipe_ctx, - &empty_link_settings, - avg_time_slots_per_mtp); - - if (dm_helpers_dp_mst_write_payload_allocation_table( - stream->ctx, - stream, - &proposed_table, - false)) - update_mst_stream_alloc_table( - link, - pipe_ctx->stream_res.stream_enc, - pipe_ctx->stream_res.hpo_dp_stream_enc, - &proposed_table); - else - DC_LOG_WARNING("Failed to update" - "MST allocation table for" - "pipe idx:%d\n", - pipe_ctx->pipe_idx); - - DC_LOG_MST("%s" - "stream_count: %d: ", - __func__, - link->mst_stream_alloc_table.stream_count); - - for (i = 0; i < MAX_CONTROLLER_NUM; i++) { - DC_LOG_MST("stream_enc[%d]: %p " - "stream[%d].hpo_dp_stream_enc: %p " - "stream[%d].vcp_id: %d " - "stream[%d].slot_count: %d\n", - i, - (void *) link->mst_stream_alloc_table.stream_allocations[i].stream_enc, - i, - (void *) link->mst_stream_alloc_table.stream_allocations[i].hpo_dp_stream_enc, - i, - link->mst_stream_alloc_table.stream_allocations[i].vcp_id, - i, - link->mst_stream_alloc_table.stream_allocations[i].slot_count); - } - - if (link_hwss->ext.update_stream_allocation_table == NULL || - link_dp_get_encoding_format(&link->cur_link_settings) == DP_UNKNOWN_ENCODING) { - DC_LOG_DEBUG("Unknown encoding format\n"); - return DC_ERROR_UNEXPECTED; - } - - link_hwss->ext.update_stream_allocation_table(link, &pipe_ctx->link_res, - &link->mst_stream_alloc_table); - - if (mst_mode) { - dm_helpers_dp_mst_poll_for_allocation_change_trigger( - stream->ctx, - stream); - } - - dm_helpers_dp_mst_send_payload_allocation( - stream->ctx, - stream, - false); - - return DC_OK; -} - static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx) { struct dc_stream_state *stream = pipe_ctx->stream; @@ -1339,9 +1259,6 @@ static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx) const struct dc_link_settings empty_link_settings = {0}; DC_LOGGER_INIT(link->ctx->logger); - if (link->dc->debug.temp_mst_deallocation_sequence) - return deallocate_mst_payload_with_temp_drm_wa(pipe_ctx); - /* deallocate_mst_payload is called before disable link. When mode or * disable/enable monitor, new stream is created which is not in link * stream[] yet. For this, payload is not allocated yet, so de-alloc @@ -1414,16 +1331,14 @@ static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx) link_hwss->ext.update_stream_allocation_table(link, &pipe_ctx->link_res, &link->mst_stream_alloc_table); - if (mst_mode) { + if (mst_mode) dm_helpers_dp_mst_poll_for_allocation_change_trigger( stream->ctx, stream); - dm_helpers_dp_mst_send_payload_allocation( - stream->ctx, - stream, - false); - } + dm_helpers_dp_mst_update_mst_mgr_for_deallocation( + stream->ctx, + stream); return DC_OK; } @@ -1504,12 +1419,10 @@ static enum dc_status allocate_mst_payload(struct pipe_ctx *pipe_ctx) stream->ctx, stream); - if (ret != ACT_LINK_LOST) { + if (ret != ACT_LINK_LOST) dm_helpers_dp_mst_send_payload_allocation( stream->ctx, - stream, - true); - } + stream); /* slot X.Y for only current stream */ pbn_per_slot = get_pbn_per_slot(stream); @@ -1769,8 +1682,7 @@ enum dc_status link_reduce_mst_payload(struct pipe_ctx *pipe_ctx, uint32_t bw_in /* send ALLOCATE_PAYLOAD sideband message with updated pbn */ dm_helpers_dp_mst_send_payload_allocation( stream->ctx, - stream, - true); + stream); /* notify immediate branch device table update */ if (dm_helpers_dp_mst_write_payload_allocation_table( @@ -1899,8 +1811,7 @@ enum dc_status link_increase_mst_payload(struct pipe_ctx *pipe_ctx, uint32_t bw_ /* send ALLOCATE_PAYLOAD sideband message with updated pbn */ dm_helpers_dp_mst_send_payload_allocation( stream->ctx, - stream, - true); + stream); } /* increase throttled vcp size */ -- cgit v1.2.3 From 5f2a404cbccec0c8d6635f0997cea2ac226d25d4 Mon Sep 17 00:00:00 2001 From: Dennis Chan Date: Tue, 31 Oct 2023 10:09:02 +0800 Subject: drm/amd/display: Disable Timing sync check in Full-Screen Video Case [why] If Panel max link off frame count is low, it will cause low residency for Replay, then Disabled timing sync check in Full screen Video Case. Reviewed-by: Robin Chen Acked-by: Hamza Mahfooz Signed-off-by: Dennis Chan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_types.h | 66 ++++++++++++++++++------- drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 9 ++-- 2 files changed, 51 insertions(+), 24 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index a2f6c994a2a9..7313cfe69498 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -1018,6 +1018,12 @@ enum replay_coasting_vtotal_type { PR_COASTING_TYPE_NUM, }; +enum replay_link_off_frame_count_level { + PR_LINK_OFF_FRAME_COUNT_FAIL = 0x0, + PR_LINK_OFF_FRAME_COUNT_GOOD = 0x2, + PR_LINK_OFF_FRAME_COUNT_BEST = 0x6, +}; + /* * This is general Interface for Replay to * set an 32 bit variable to dmub @@ -1041,26 +1047,48 @@ union replay_error_status { }; struct replay_config { - bool replay_supported; // Replay feature is supported - unsigned int replay_power_opt_supported; // Power opt flags that are supported - bool replay_smu_opt_supported; // SMU optimization is supported - unsigned int replay_enable_option; // Replay enablement option - uint32_t debug_flags; // Replay debug flags - bool replay_timing_sync_supported; // Replay desync is supported - bool force_disable_desync_error_check; // Replay desync is supported - bool received_desync_error_hpd; //Replay Received Desync Error HPD. - union replay_error_status replay_error_status; // Replay error status -}; - -/* Replay feature flags */ + /* Replay feature is supported */ + bool replay_supported; + /* Power opt flags that are supported */ + unsigned int replay_power_opt_supported; + /* SMU optimization is supported */ + bool replay_smu_opt_supported; + /* Replay enablement option */ + unsigned int replay_enable_option; + /* Replay debug flags */ + uint32_t debug_flags; + /* Replay sync is supported */ + bool replay_timing_sync_supported; + /* Replay Disable desync error check. */ + bool force_disable_desync_error_check; + /* Replay Received Desync Error HPD. */ + bool received_desync_error_hpd; + /* Replay feature is supported long vblank */ + bool replay_support_fast_resync_in_ultra_sleep_mode; + /* Replay error status */ + union replay_error_status replay_error_status; +}; + +/* Replay feature flags*/ struct replay_settings { - struct replay_config config; // Replay configuration - bool replay_feature_enabled; // Replay feature is ready for activating - bool replay_allow_active; // Replay is currently active - unsigned int replay_power_opt_active; // Power opt flags that are activated currently - bool replay_smu_opt_enable; // SMU optimization is enabled - uint16_t coasting_vtotal; // Current Coasting vtotal - uint16_t coasting_vtotal_table[PR_COASTING_TYPE_NUM]; // Coasting vtotal table + /* Replay configuration */ + struct replay_config config; + /* Replay feature is ready for activating */ + bool replay_feature_enabled; + /* Replay is currently active */ + bool replay_allow_active; + /* Replay is currently active */ + bool replay_allow_long_vblank; + /* Power opt flags that are activated currently */ + unsigned int replay_power_opt_active; + /* SMU optimization is enabled */ + bool replay_smu_opt_enable; + /* Current Coasting vtotal */ + uint16_t coasting_vtotal; + /* Coasting vtotal table */ + uint16_t coasting_vtotal_table[PR_COASTING_TYPE_NUM]; + /* Maximum link off frame count */ + enum replay_link_off_frame_count_level link_off_frame_count_level; }; /* To split out "global" and "per-panel" config settings. diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index d1becbb5aa29..a08073fc92ae 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -3056,15 +3056,14 @@ struct dmub_cmd_replay_set_timing_sync_data { * Currently the support is only for 0 or 1 */ uint8_t panel_inst; - /** - * Explicit padding to 4 byte boundary. + * REPLAY set_timing_sync */ - uint8_t pad[3]; + uint8_t timing_sync_supported; /** - * REPLAY set_timing_sync + * Explicit padding to 4 byte boundary. */ - bool timing_sync_supported; + uint8_t pad[2]; }; /** -- cgit v1.2.3 From 8f3656ce65d6d550247a85fdb5c54a5b65cc2252 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Thu, 9 Nov 2023 10:50:30 -0500 Subject: drm/amd/display: Enable SubVP on 1080p60 displays [Description] - Previously SubVP would never be selected on 1080p60 displays because it has too much vactive margin. However, implement a change to allow it like how 1440p60 is allowed. - Add a new struct such that we have a list of allowed modes for enabling subvp with vactive margin (currently 1080p60 and 1440p60) - Also ensure to block drr + vblank cases to prevent unexpected enablement of new display configs - Update SW cursor fallback for these new potential cases as well Reviewed-by: Samson Tam Acked-by: Hamza Mahfooz Signed-off-by: Alvin Lee Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 2 +- .../amd/display/dc/dcn32/dcn32_resource_helpers.c | 32 +++++++++++++++-- .../gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 41 +++++++++++++--------- .../amd/display/dc/resource/dcn32/dcn32_resource.h | 10 ++++++ 4 files changed, 65 insertions(+), 20 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 84d632700949..f3a9fdd2340d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -5316,7 +5316,7 @@ bool check_subvp_sw_cursor_fallback_req(const struct dc *dc, struct dc_stream_st if (dc->current_state->stream_count == 1 && stream->timing.v_addressable >= 2880 && ((stream->timing.pix_clk_100hz * 100) / stream->timing.v_total / stream->timing.h_total) < 120) return true; - else if (dc->current_state->stream_count > 1 && stream->timing.v_addressable >= 2160 && + else if (dc->current_state->stream_count > 1 && stream->timing.v_addressable >= 1080 && ((stream->timing.pix_clk_100hz * 100) / stream->timing.v_total / stream->timing.h_total) < 120) return true; diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c index ef0a2b01734d..389ac7ae1154 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c @@ -665,6 +665,30 @@ bool dcn32_check_native_scaling_for_res(struct pipe_ctx *pipe, unsigned int widt return is_native_scaling; } +/** + * disallow_subvp_in_active_plus_blank() - Function to determine disallowed subvp + drr/vblank configs + * + * @pipe: subvp pipe to be used for the subvp + drr/vblank config + * + * Since subvp is being enabled on more configs (such as 1080p60), we want + * to explicitly block any configs that we don't want to enable. We do not + * want to enable any 1080p60 (SubVP) + drr / vblank configs since these + * are already convered by FPO. + * + * Return: True if disallowed, false otherwise + */ +static bool disallow_subvp_in_active_plus_blank(struct pipe_ctx *pipe) +{ + bool disallow = false; + + if (resource_is_pipe_type(pipe, OPP_HEAD) && + resource_is_pipe_type(pipe, DPP_PIPE)) { + if (pipe->stream->timing.v_addressable == 1080 && pipe->stream->timing.h_addressable == 1920) + disallow = true; + } + return disallow; +} + /** * dcn32_subvp_drr_admissable() - Determine if SubVP + DRR config is admissible * @@ -688,6 +712,7 @@ bool dcn32_subvp_drr_admissable(struct dc *dc, struct dc_state *context) bool drr_pipe_found = false; bool drr_psr_capable = false; uint64_t refresh_rate = 0; + bool subvp_disallow = false; for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; @@ -697,6 +722,7 @@ bool dcn32_subvp_drr_admissable(struct dc *dc, struct dc_state *context) if (pipe->stream->mall_stream_config.type == SUBVP_MAIN) { subvp_count++; + subvp_disallow |= disallow_subvp_in_active_plus_blank(pipe); refresh_rate = (pipe->stream->timing.pix_clk_100hz * (uint64_t)100 + pipe->stream->timing.v_total * pipe->stream->timing.h_total - (uint64_t)1); refresh_rate = div_u64(refresh_rate, pipe->stream->timing.v_total); @@ -713,7 +739,7 @@ bool dcn32_subvp_drr_admissable(struct dc *dc, struct dc_state *context) } } - if (subvp_count == 1 && non_subvp_pipes == 1 && drr_pipe_found && !drr_psr_capable && + if (subvp_count == 1 && !subvp_disallow && non_subvp_pipes == 1 && drr_pipe_found && !drr_psr_capable && ((uint32_t)refresh_rate < 120)) result = true; @@ -746,6 +772,7 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int struct vba_vars_st *vba = &context->bw_ctx.dml.vba; bool vblank_psr_capable = false; uint64_t refresh_rate = 0; + bool subvp_disallow = false; for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; @@ -755,6 +782,7 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int if (pipe->stream->mall_stream_config.type == SUBVP_MAIN) { subvp_count++; + subvp_disallow |= disallow_subvp_in_active_plus_blank(pipe); refresh_rate = (pipe->stream->timing.pix_clk_100hz * (uint64_t)100 + pipe->stream->timing.v_total * pipe->stream->timing.h_total - (uint64_t)1); refresh_rate = div_u64(refresh_rate, pipe->stream->timing.v_total); @@ -772,7 +800,7 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int } if (subvp_count == 1 && non_subvp_pipes == 1 && !drr_pipe_found && !vblank_psr_capable && - ((uint32_t)refresh_rate < 120) && + ((uint32_t)refresh_rate < 120) && !subvp_disallow && vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_vblank_w_mall_sub_vp) result = true; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 92e2ddc9ab7e..c4ffb4b92b56 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -45,6 +45,14 @@ static const struct subvp_high_refresh_list subvp_high_refresh_list = { {.width = 1920, .height = 1080, }}, }; +static const struct subvp_active_margin_list subvp_active_margin_list = { + .min_refresh = 55, + .max_refresh = 65, + .res = { + {.width = 2560, .height = 1440, }, + {.width = 1920, .height = 1080, }}, +}; + struct _vcs_dpi_ip_params_st dcn3_2_ip = { .gpuvm_enable = 0, .gpuvm_max_page_table_levels = 4, @@ -3295,25 +3303,24 @@ bool dcn32_allow_subvp_with_active_margin(struct pipe_ctx *pipe) { bool allow = false; uint32_t refresh_rate = 0; + uint32_t min_refresh = subvp_active_margin_list.min_refresh; + uint32_t max_refresh = subvp_active_margin_list.max_refresh; + uint32_t i; - /* Allow subvp on displays that have active margin for 2560x1440@60hz displays - * only for now. There must be no scaling as well. - * - * For now we only enable on 2560x1440@60hz displays to enable 4K60 + 1440p60 configs - * for p-state switching. - */ - if (pipe->stream && pipe->plane_state) { - refresh_rate = (pipe->stream->timing.pix_clk_100hz * 100 + - pipe->stream->timing.v_total * pipe->stream->timing.h_total - 1) - / (double)(pipe->stream->timing.v_total * pipe->stream->timing.h_total); - if (pipe->stream->timing.v_addressable == 1440 && - pipe->stream->timing.h_addressable == 2560 && - refresh_rate >= 55 && refresh_rate <= 65 && - pipe->plane_state->src_rect.height == 1440 && - pipe->plane_state->src_rect.width == 2560 && - pipe->plane_state->dst_rect.height == 1440 && - pipe->plane_state->dst_rect.width == 2560) + for (i = 0; i < SUBVP_ACTIVE_MARGIN_LIST_LEN; i++) { + uint32_t width = subvp_active_margin_list.res[i].width; + uint32_t height = subvp_active_margin_list.res[i].height; + + refresh_rate = (pipe->stream->timing.pix_clk_100hz * (uint64_t)100 + + pipe->stream->timing.v_total * pipe->stream->timing.h_total - (uint64_t)1); + refresh_rate = div_u64(refresh_rate, pipe->stream->timing.v_total); + refresh_rate = div_u64(refresh_rate, pipe->stream->timing.h_total); + + if (refresh_rate >= min_refresh && refresh_rate <= max_refresh && + dcn32_check_native_scaling_for_res(pipe, width, height)) { allow = true; + break; + } } return allow; } diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h index b931008114c9..b2f20e6cfb38 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h @@ -39,6 +39,7 @@ #define DCN3_2_MBLK_HEIGHT_8BPE 64 #define DCN3_2_DCFCLK_DS_INIT_KHZ 10000 // Choose 10Mhz for init DCFCLK DS freq #define SUBVP_HIGH_REFRESH_LIST_LEN 4 +#define SUBVP_ACTIVE_MARGIN_LIST_LEN 2 #define DCN3_2_MAX_SUBVP_PIXEL_RATE_MHZ 1800 #define DCN3_2_VMIN_DISPCLK_HZ 717000000 @@ -57,6 +58,15 @@ struct subvp_high_refresh_list { } res[SUBVP_HIGH_REFRESH_LIST_LEN]; }; +struct subvp_active_margin_list { + int min_refresh; + int max_refresh; + struct { + int width; + int height; + } res[SUBVP_ACTIVE_MARGIN_LIST_LEN]; +}; + struct dcn32_resource_pool { struct resource_pool base; }; -- cgit v1.2.3 From ee95135bfeecf67b313b5573054b03aa6dbc76f8 Mon Sep 17 00:00:00 2001 From: Li Ma Date: Tue, 21 Nov 2023 16:54:59 +0800 Subject: drm/amdgpu: add init_registers for nbio v7.11 enable init_registers callback func for nbio v7.11. Signed-off-by: Li Ma Reviewed-by: Yifan Zhang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c | 18 +++++++------- .../amd/include/asic_reg/nbio/nbio_7_11_0_offset.h | 2 ++ .../include/asic_reg/nbio/nbio_7_11_0_sh_mask.h | 29 ++++++++++++++++++++++ 3 files changed, 40 insertions(+), 9 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c index 676ab1d20d2f..1f52b4b1db03 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c @@ -259,17 +259,17 @@ const struct nbio_hdp_flush_reg nbio_v7_11_hdp_flush_reg = { static void nbio_v7_11_init_registers(struct amdgpu_device *adev) { -/* uint32_t def, data; + uint32_t def, data; + + def = data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3); + data = REG_SET_FIELD(data, BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3, + CI_SWUS_MAX_READ_REQUEST_SIZE_MODE, 1); + data = REG_SET_FIELD(data, BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3, + CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV, 1); - def = data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3); - data = REG_SET_FIELD(data, BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3, - CI_SWUS_MAX_READ_REQUEST_SIZE_MODE, 1); - data = REG_SET_FIELD(data, BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3, - CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV, 1); + if (def != data) + WREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3, data); - if (def != data) - WREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3, data); -*/ } static void nbio_v7_11_update_medium_grain_clock_gating(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h index ff30f04be591..7ee3d291120d 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h @@ -781,6 +781,8 @@ #define regBIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2_BASE_IDX 5 #define regBIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1 0x420187 #define regBIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1_BASE_IDX 5 +#define regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3 0x4201c6 +#define regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3_BASE_IDX 5 // addressBlock: nbio_nbif0_bif_cfg_dev0_rc_bifcfgdecp diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_sh_mask.h index 7f131999a263..eb8c556d9c93 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_sh_mask.h @@ -24646,6 +24646,35 @@ //BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1 #define BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK 0x00000001L #define BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK 0x00000008L +//BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3 +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_MAX_PAYLOAD_SIZE_MODE__SHIFT 0x8 +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_PRIV_MAX_PAYLOAD_SIZE__SHIFT 0x9 +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_10BIT_TAG_EN_OVERRIDE__SHIFT 0xb +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_10BIT_TAG_EN_OVERRIDE__SHIFT 0xd +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__MST_DROP_SYNC_FLOOD_EN__SHIFT 0xf +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_MAX_PAYLOAD_SIZE_MODE__SHIFT 0x10 +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_PRIV_MAX_PAYLOAD_SIZE__SHIFT 0x11 +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_MAX_READ_REQUEST_SIZE_MODE__SHIFT 0x14 +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_PRIV_MAX_READ_REQUEST_SIZE__SHIFT 0x15 +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_MAX_READ_SAFE_MODE__SHIFT 0x18 +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_EXTENDED_TAG_EN_OVERRIDE__SHIFT 0x19 +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_MAX_READ_REQUEST_SIZE_MODE__SHIFT 0x1b +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV__SHIFT 0x1c +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_EXTENDED_TAG_EN_OVERRIDE__SHIFT 0x1e +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_MAX_PAYLOAD_SIZE_MODE_MASK 0x00000100L +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_PRIV_MAX_PAYLOAD_SIZE_MASK 0x00000600L +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_10BIT_TAG_EN_OVERRIDE_MASK 0x00001800L +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_10BIT_TAG_EN_OVERRIDE_MASK 0x00006000L +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__MST_DROP_SYNC_FLOOD_EN_MASK 0x00008000L +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_MAX_PAYLOAD_SIZE_MODE_MASK 0x00010000L +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_PRIV_MAX_PAYLOAD_SIZE_MASK 0x000E0000L +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_MAX_READ_REQUEST_SIZE_MODE_MASK 0x00100000L +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_PRIV_MAX_READ_REQUEST_SIZE_MASK 0x00E00000L +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_MAX_READ_SAFE_MODE_MASK 0x01000000L +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_EXTENDED_TAG_EN_OVERRIDE_MASK 0x06000000L +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_MAX_READ_REQUEST_SIZE_MODE_MASK 0x08000000L +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV_MASK 0x30000000L +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_EXTENDED_TAG_EN_OVERRIDE_MASK 0xC0000000L // addressBlock: nbio_nbif0_bif_cfg_dev0_rc_bifcfgdecp //BIF_CFG_DEV0_RC0_VENDOR_ID -- cgit v1.2.3 From 88f4b10a793262c4d6cf2566b1d210ec76f87867 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Tue, 21 Nov 2023 11:06:51 +0800 Subject: drm/amdgpu: fix memory overflow in the IB test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a memory overflow issue in the gfx IB test for some ASICs. At least 20 bytes are needed for the IB test packet. v2: correct code indentation errors. (Christian) Signed-off-by: Tim Huang Reviewed-by: Yifan Zhang Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 40ce12323164..8ed4a6fb147a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -427,7 +427,7 @@ static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); cpu_ptr = &adev->wb.wb[index]; - r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib); + r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); if (r) { DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); goto err1; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 885ebd703260..1943beb135c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -883,8 +883,8 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) gpu_addr = adev->wb.gpu_addr + (index * 4); adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); memset(&ib, 0, sizeof(ib)); - r = amdgpu_ib_get(adev, NULL, 16, - AMDGPU_IB_POOL_DIRECT, &ib); + + r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); if (r) goto err1; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index e3ff6e46f3f7..69c500910746 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1039,8 +1039,8 @@ static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) gpu_addr = adev->wb.gpu_addr + (index * 4); adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); memset(&ib, 0, sizeof(ib)); - r = amdgpu_ib_get(adev, NULL, 16, - AMDGPU_IB_POOL_DIRECT, &ib); + + r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); if (r) goto err1; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 21865668b787..00b21ece081f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -297,8 +297,8 @@ static int gfx_v9_4_3_ring_test_ib(struct amdgpu_ring *ring, long timeout) gpu_addr = adev->wb.gpu_addr + (index * 4); adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); memset(&ib, 0, sizeof(ib)); - r = amdgpu_ib_get(adev, NULL, 16, - AMDGPU_IB_POOL_DIRECT, &ib); + + r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); if (r) goto err1; -- cgit v1.2.3 From c4b8394e76adba4f50a3c2696c75b214a291e24a Mon Sep 17 00:00:00 2001 From: Meenakshikumar Somasundaram Date: Thu, 9 Nov 2023 00:04:36 -0500 Subject: drm/amd/display: Fix tiled display misalignment [Why] When otg workaround is applied during clock update, otgs of tiled display went out of sync. [How] To call dc_trigger_sync() after clock update to sync otgs again. Reviewed-by: Nicholas Kazlauskas Acked-by: Hamza Mahfooz Signed-off-by: Meenakshikumar Somasundaram Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 76b47f178127..eab713c0da0d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1964,6 +1964,10 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c wait_for_no_pipes_pending(dc, context); /* pplib is notified if disp_num changed */ dc->hwss.optimize_bandwidth(dc, context); + /* Need to do otg sync again as otg could be out of sync due to otg + * workaround applied during clock update + */ + dc_trigger_sync(dc, context); } if (dc->hwss.update_dsc_pg) -- cgit v1.2.3 From fcd94ef1b3e78f7dc76309c9611915018d2d62a3 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Wed, 8 Nov 2023 10:55:53 -0500 Subject: drm/amd/display: Remove min_dst_y_next_start check for Z8 [Why] Flickering occurs on DRR supported panels when engaged in DRR due to min_dst_y_next becoming larger than the frame size itself. [How] In general, we should be able to enter Z8 when this is engaged but it might be a net power loss even if the calculation wasn't bugged. Don't support enabling Z8 during the DRR region. Cc: stable@vger.kernel.org # 6.1+ Reviewed-by: Syed Hassan Acked-by: Hamza Mahfooz Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c index 7fc8b18096ba..ec77b2b41ba3 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c @@ -950,10 +950,8 @@ static enum dcn_zstate_support_state decide_zstate_support(struct dc *dc, struc { int plane_count; int i; - unsigned int min_dst_y_next_start_us; plane_count = 0; - min_dst_y_next_start_us = 0; for (i = 0; i < dc->res_pool->pipe_count; i++) { if (context->res_ctx.pipe_ctx[i].plane_state) plane_count++; @@ -975,26 +973,15 @@ static enum dcn_zstate_support_state decide_zstate_support(struct dc *dc, struc else if (context->stream_count == 1 && context->streams[0]->signal == SIGNAL_TYPE_EDP) { struct dc_link *link = context->streams[0]->sink->link; struct dc_stream_status *stream_status = &context->stream_status[0]; - struct dc_stream_state *current_stream = context->streams[0]; int minmum_z8_residency = dc->debug.minimum_z8_residency_time > 0 ? dc->debug.minimum_z8_residency_time : 1000; bool allow_z8 = context->bw_ctx.dml.vba.StutterPeriod > (double)minmum_z8_residency; bool is_pwrseq0 = link->link_index == 0; - bool isFreesyncVideo; - - isFreesyncVideo = current_stream->adjust.v_total_min == current_stream->adjust.v_total_max; - isFreesyncVideo = isFreesyncVideo && current_stream->timing.v_total < current_stream->adjust.v_total_min; - for (i = 0; i < dc->res_pool->pipe_count; i++) { - if (context->res_ctx.pipe_ctx[i].stream == current_stream && isFreesyncVideo) { - min_dst_y_next_start_us = context->res_ctx.pipe_ctx[i].dlg_regs.min_dst_y_next_start_us; - break; - } - } /* Don't support multi-plane configurations */ if (stream_status->plane_count > 1) return DCN_ZSTATE_SUPPORT_DISALLOW; - if (is_pwrseq0 && (context->bw_ctx.dml.vba.StutterPeriod > 5000.0 || min_dst_y_next_start_us > 5000)) + if (is_pwrseq0 && context->bw_ctx.dml.vba.StutterPeriod > 5000.0) return DCN_ZSTATE_SUPPORT_ALLOW; else if (is_pwrseq0 && link->psr_settings.psr_version == DC_PSR_VERSION_1 && !link->panel_config.psr.disable_psr) return allow_z8 ? DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY : DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY; -- cgit v1.2.3 From d642b0100bf8c95e88e8396b7191b35807dabb4c Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Wed, 8 Nov 2023 10:59:00 -0500 Subject: drm/amd/display: Update min Z8 residency time to 2100 for DCN314 [Why] Some panels with residency period of 2054 exhibit flickering with Z8 at the end of the frame. [How] As a workaround, increase the limit to block these panels. Cc: stable@vger.kernel.org # 6.1+ Reviewed-by: Syed Hassan Acked-by: Hamza Mahfooz Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c index 677361d74a4e..c97391edb5ff 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c @@ -871,7 +871,7 @@ static const struct dc_plane_cap plane_cap = { static const struct dc_debug_options debug_defaults_drv = { .disable_z10 = false, .enable_z9_disable_interface = true, - .minimum_z8_residency_time = 2000, + .minimum_z8_residency_time = 2100, .psr_skip_crtc_disable = true, .replay_skip_crtc_disabled = true, .disable_dmcu = true, -- cgit v1.2.3 From a5e90392fdda05ce842810bb749f3d210c3ffc65 Mon Sep 17 00:00:00 2001 From: Gabe Teeger Date: Thu, 9 Nov 2023 17:53:49 -0500 Subject: Revert "drm/amd/display: Enable CM low mem power optimization" This reverts commit fcfc6ceec3ebb725a0d6381a1120e7cd546e1df4. [why] Flickering observed. Regression search pointed to this being the offending commit. Reviewed-by: Charlene Liu Reviewed-by: Yihan Zhu Acked-by: Hamza Mahfooz Signed-off-by: Gabe Teeger Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c | 13 +++++-------- .../gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c | 2 +- 2 files changed, 6 insertions(+), 9 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c index 1a2adb354718..994b21ed272f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c @@ -71,24 +71,21 @@ void mpc32_power_on_blnd_lut( { struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc); -/* if (mpc->ctx->dc->debug.enable_mem_low_power.bits.cm) { if (power_on) { REG_UPDATE(MPCC_MCM_MEM_PWR_CTRL[mpcc_id], MPCC_MCM_1DLUT_MEM_PWR_FORCE, 0); REG_WAIT(MPCC_MCM_MEM_PWR_CTRL[mpcc_id], MPCC_MCM_1DLUT_MEM_PWR_STATE, 0, 1, 5); } else if (!mpc->ctx->dc->debug.disable_mem_low_power) { - //TODO: change to mpc - dpp_base->ctx->dc->optimized_required = true; - dpp_base->deferred_reg_writes.bits.disable_blnd_lut = true; + ASSERT(false); + /* TODO: change to mpc + * dpp_base->ctx->dc->optimized_required = true; + * dpp_base->deferred_reg_writes.bits.disable_blnd_lut = true; + */ } } else { REG_SET(MPCC_MCM_MEM_PWR_CTRL[mpcc_id], 0, MPCC_MCM_1DLUT_MEM_PWR_FORCE, power_on == true ? 0 : 1); } -*/ - - REG_SET(MPCC_MCM_MEM_PWR_CTRL[mpcc_id], 0, - MPCC_MCM_1DLUT_MEM_PWR_FORCE, power_on == true ? 0 : 1); } static enum dc_lut_mode mpc32_get_post1dlut_current(struct mpc *mpc, uint32_t mpcc_id) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c index 0d5a03c6d812..53eefba0b9dc 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c @@ -736,7 +736,7 @@ static const struct dc_debug_options debug_defaults_drv = { .i2c = true, .dmcu = false, // This is previously known to cause hang on S3 cycles if enabled .dscl = true, - .cm = true, + .cm = false, .mpc = true, .optc = true, .vpg = true, -- cgit v1.2.3 From 0e6a12884ca72520b195d95f1fa1bf790678cd6c Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Wed, 8 Nov 2023 14:38:29 +0800 Subject: drm/amdgpu: correct the amdgpu runtime dereference usage count Fix the amdgpu runpm dereference usage count. Signed-off-by: Prike Liang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 0cacd0b9f8be..b8fbe97efe1d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -340,14 +340,11 @@ int amdgpu_display_crtc_set_config(struct drm_mode_set *set, adev->have_disp_power_ref = true; return ret; } - /* if we have no active crtcs, then drop the power ref - * we got before + /* if we have no active crtcs, then go to + * drop the power ref we got before */ - if (!active && adev->have_disp_power_ref) { - pm_runtime_put_autosuspend(dev->dev); + if (!active && adev->have_disp_power_ref) adev->have_disp_power_ref = false; - } - out: /* drop the power reference we got coming in here */ pm_runtime_put_autosuspend(dev->dev); -- cgit v1.2.3 From f4233efedf75572e49efd08202b1a07196949b4a Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Thu, 9 Nov 2023 18:08:17 -0500 Subject: drm/amd/display: If P-State is supported try SubVP for smaller vlevel [Description] - To reduce vlevel further, we can try to apply subvp on configs that already support p-state since the natural p-state support may not allow for DPM0. - Add code to try subvp to reduce UCLK DPM level further if already supported, but don't use subvp if it does not optimize the DPM level even lower Reviewed-by: Samson Tam Acked-by: Hamza Mahfooz Signed-off-by: Alvin Lee Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index c4ffb4b92b56..26411d4e9730 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -1416,6 +1416,7 @@ static void dcn32_full_validate_bw_helper(struct dc *dc, unsigned int dc_pipe_idx = 0; int i = 0; bool found_supported_config = false; + int vlevel_temp = 0; dc_assert_fp_enabled(); @@ -1448,13 +1449,15 @@ static void dcn32_full_validate_bw_helper(struct dc *dc, */ if (!dc->debug.force_disable_subvp && !dc->caps.dmub_caps.gecc_enable && dcn32_all_pipes_have_stream_and_plane(dc, context) && !dcn32_mpo_in_use(context) && !dcn32_any_surfaces_rotated(dc, context) && !is_test_pattern_enabled(context) && - (*vlevel == context->bw_ctx.dml.soc.num_states || + (*vlevel == context->bw_ctx.dml.soc.num_states || (vba->DRAMSpeedPerState[*vlevel] != vba->DRAMSpeedPerState[0] && + vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] != dm_dram_clock_change_unsupported) || vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported || dc->debug.force_subvp_mclk_switch)) { dcn32_merge_pipes_for_subvp(dc, context); memset(merge, 0, MAX_PIPES * sizeof(bool)); + vlevel_temp = *vlevel; /* to re-initialize viewport after the pipe merge */ for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; @@ -1523,6 +1526,9 @@ static void dcn32_full_validate_bw_helper(struct dc *dc, } } + if (vba->DRAMSpeedPerState[*vlevel] >= vba->DRAMSpeedPerState[vlevel_temp]) + found_supported_config = false; + // If SubVP pipe config is unsupported (or cannot be used for UCLK switching) // remove phantom pipes and repopulate dml pipes if (!found_supported_config) { -- cgit v1.2.3 From 5a9a2cc8ae1889c4002850b00fd4fd9691dfac4e Mon Sep 17 00:00:00 2001 From: Zhongwei Date: Wed, 8 Nov 2023 16:34:36 +0800 Subject: drm/amd/display: force toggle rate wa for first link training for a retimer [WHY] Handover from DMUB to driver does not perform link rate toggle. It might cause link training failure for boot up. [HOW] Force toggle rate wa for first link train. link->vendor_specific_lttpr_link_rate_wa should be zero then. Cc: stable@vger.kernel.org # 6.1+ Reviewed-by: Michael Strauss Acked-by: Hamza Mahfooz Signed-off-by: Zhongwei Signed-off-by: Alex Deucher --- .../dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c index fd8f6f198146..68096d12f52f 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c @@ -115,7 +115,7 @@ static enum link_training_result perform_fixed_vs_pe_nontransparent_training_seq lt_settings->cr_pattern_time = 16000; /* Fixed VS/PE specific: Toggle link rate */ - apply_toggle_rate_wa = (link->vendor_specific_lttpr_link_rate_wa == target_rate); + apply_toggle_rate_wa = ((link->vendor_specific_lttpr_link_rate_wa == target_rate) || (link->vendor_specific_lttpr_link_rate_wa == 0)); target_rate = get_dpcd_link_rate(<_settings->link_settings); toggle_rate = (target_rate == 0x6) ? 0xA : 0x6; @@ -271,7 +271,7 @@ enum link_training_result dp_perform_fixed_vs_pe_training_sequence_legacy( /* Vendor specific: Toggle link rate */ toggle_rate = (rate == 0x6) ? 0xA : 0x6; - if (link->vendor_specific_lttpr_link_rate_wa == rate) { + if (link->vendor_specific_lttpr_link_rate_wa == rate || link->vendor_specific_lttpr_link_rate_wa == 0) { core_link_write_dpcd( link, DP_LINK_BW_SET, @@ -617,7 +617,7 @@ enum link_training_result dp_perform_fixed_vs_pe_training_sequence( /* Vendor specific: Toggle link rate */ toggle_rate = (rate == 0x6) ? 0xA : 0x6; - if (link->vendor_specific_lttpr_link_rate_wa == rate) { + if (link->vendor_specific_lttpr_link_rate_wa == rate || link->vendor_specific_lttpr_link_rate_wa == 0) { core_link_write_dpcd( link, DP_LINK_BW_SET, -- cgit v1.2.3 From 3d0fe49454652117522f60bfbefb978ba0e5300b Mon Sep 17 00:00:00 2001 From: Parandhaman K Date: Thu, 9 Nov 2023 15:52:17 +0530 Subject: drm/amd/display: Refactor OPTC into component folder [why] Move all optc files to unique folder optc. [how] creating optc repo in dc, and moved the dcnxx_optc.c and .h files into corresponding new folders inside the optc and cleared the linkage errors by adding relative paths in the Makefile.template. Reviewed-by: Martin Leung Acked-by: Hamza Mahfooz Signed-off-by: Parandhaman K Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/Makefile | 1 + drivers/gpu/drm/amd/display/dc/Makefile | 2 +- drivers/gpu/drm/amd/display/dc/dcn10/Makefile | 2 +- .../display/dc/dcn10/dcn10_hw_sequencer_debug.c | 2 +- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c | 1621 -------------------- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h | 599 -------- drivers/gpu/drm/amd/display/dc/dcn20/Makefile | 2 +- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c | 587 ------- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.h | 124 -- drivers/gpu/drm/amd/display/dc/dcn201/Makefile | 2 +- .../gpu/drm/amd/display/dc/dcn201/dcn201_optc.c | 202 --- .../gpu/drm/amd/display/dc/dcn201/dcn201_optc.h | 74 - drivers/gpu/drm/amd/display/dc/dcn30/Makefile | 1 - drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c | 393 ----- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h | 359 ----- drivers/gpu/drm/amd/display/dc/dcn301/Makefile | 3 +- .../gpu/drm/amd/display/dc/dcn301/dcn301_optc.c | 185 --- .../gpu/drm/amd/display/dc/dcn301/dcn301_optc.h | 36 - drivers/gpu/drm/amd/display/dc/dcn31/Makefile | 2 +- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c | 310 ---- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h | 267 ---- drivers/gpu/drm/amd/display/dc/dcn314/Makefile | 2 +- .../gpu/drm/amd/display/dc/dcn314/dcn314_optc.c | 273 ---- .../gpu/drm/amd/display/dc/dcn314/dcn314_optc.h | 255 --- drivers/gpu/drm/amd/display/dc/dcn32/Makefile | 6 +- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c | 357 ----- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.h | 187 --- drivers/gpu/drm/amd/display/dc/dcn35/Makefile | 2 +- drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.c | 290 ---- drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.h | 74 - drivers/gpu/drm/amd/display/dc/optc/Makefile | 108 ++ .../gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c | 1621 ++++++++++++++++++++ .../gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h | 599 ++++++++ .../gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.c | 587 +++++++ .../gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.h | 124 ++ .../drm/amd/display/dc/optc/dcn201/dcn201_optc.c | 202 +++ .../drm/amd/display/dc/optc/dcn201/dcn201_optc.h | 74 + .../gpu/drm/amd/display/dc/optc/dcn30/dcn30_optc.c | 393 +++++ .../gpu/drm/amd/display/dc/optc/dcn30/dcn30_optc.h | 359 +++++ .../drm/amd/display/dc/optc/dcn301/dcn301_optc.c | 185 +++ .../drm/amd/display/dc/optc/dcn301/dcn301_optc.h | 36 + .../gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.c | 310 ++++ .../gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.h | 267 ++++ .../drm/amd/display/dc/optc/dcn314/dcn314_optc.c | 273 ++++ .../drm/amd/display/dc/optc/dcn314/dcn314_optc.h | 255 +++ .../gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c | 357 +++++ .../gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.h | 187 +++ .../gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c | 290 ++++ .../gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.h | 74 + 49 files changed, 6314 insertions(+), 6207 deletions(-) delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn201/dcn201_optc.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn201/dcn201_optc.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn301/dcn301_optc.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn301/dcn301_optc.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.h create mode 100644 drivers/gpu/drm/amd/display/dc/optc/Makefile create mode 100644 drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c create mode 100644 drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h create mode 100644 drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.c create mode 100644 drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.h create mode 100644 drivers/gpu/drm/amd/display/dc/optc/dcn201/dcn201_optc.c create mode 100644 drivers/gpu/drm/amd/display/dc/optc/dcn201/dcn201_optc.h create mode 100644 drivers/gpu/drm/amd/display/dc/optc/dcn30/dcn30_optc.c create mode 100644 drivers/gpu/drm/amd/display/dc/optc/dcn30/dcn30_optc.h create mode 100644 drivers/gpu/drm/amd/display/dc/optc/dcn301/dcn301_optc.c create mode 100644 drivers/gpu/drm/amd/display/dc/optc/dcn301/dcn301_optc.h create mode 100644 drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.c create mode 100644 drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.h create mode 100644 drivers/gpu/drm/amd/display/dc/optc/dcn314/dcn314_optc.c create mode 100644 drivers/gpu/drm/amd/display/dc/optc/dcn314/dcn314_optc.h create mode 100644 drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c create mode 100644 drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.h create mode 100644 drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c create mode 100644 drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/Makefile b/drivers/gpu/drm/amd/display/Makefile index 7b0959da2cac..92a5c5efcf92 100644 --- a/drivers/gpu/drm/amd/display/Makefile +++ b/drivers/gpu/drm/amd/display/Makefile @@ -32,6 +32,7 @@ subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/clk_mgr subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/hwss subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/resource subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dsc +subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/optc subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/inc subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/freesync subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/color diff --git a/drivers/gpu/drm/amd/display/dc/Makefile b/drivers/gpu/drm/amd/display/dc/Makefile index ab51a065cf0e..390e7a99be54 100644 --- a/drivers/gpu/drm/amd/display/dc/Makefile +++ b/drivers/gpu/drm/amd/display/dc/Makefile @@ -22,7 +22,7 @@ # # Makefile for Display Core (dc) component. -DC_LIBS = basics bios dml clk_mgr dce gpio hwss irq link virtual dsc resource +DC_LIBS = basics bios dml clk_mgr dce gpio hwss irq link virtual dsc resource optc ifdef CONFIG_DRM_AMD_DC_FP diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile index 1eb7418ced3a..0dd62934a18c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile @@ -24,7 +24,7 @@ DCN10 = dcn10_init.o dcn10_ipp.o \ dcn10_hw_sequencer_debug.o \ - dcn10_dpp.o dcn10_opp.o dcn10_optc.o \ + dcn10_dpp.o dcn10_opp.o \ dcn10_hubp.o dcn10_mpc.o \ dcn10_dpp_dscl.o dcn10_dpp_cm.o dcn10_cm_common.o \ dcn10_hubbub.o dcn10_stream_encoder.o dcn10_link_encoder.o diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c index 92fdab731f4a..9033b39e0e0c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c @@ -32,7 +32,7 @@ #include "dce/dce_hwseq.h" #include "abm.h" #include "dmcu.h" -#include "dcn10_optc.h" +#include "dcn10/dcn10_optc.h" #include "dcn10/dcn10_dpp.h" #include "dcn10/dcn10_mpc.h" #include "timing_generator.h" diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c deleted file mode 100644 index 0e8f4f36c87c..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c +++ /dev/null @@ -1,1621 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - - -#include "reg_helper.h" -#include "dcn10_optc.h" -#include "dc.h" -#include "dc_trace.h" - -#define REG(reg)\ - optc1->tg_regs->reg - -#define CTX \ - optc1->base.ctx - -#undef FN -#define FN(reg_name, field_name) \ - optc1->tg_shift->field_name, optc1->tg_mask->field_name - -#define STATIC_SCREEN_EVENT_MASK_RANGETIMING_DOUBLE_BUFFER_UPDATE_EN 0x100 - -/** - * apply_front_porch_workaround() - This is a workaround for a bug that has - * existed since R5xx and has not been fixed - * keep Front porch at minimum 2 for Interlaced - * mode or 1 for progressive. - * - * @timing: Timing parameters used to configure DCN blocks. - */ -static void apply_front_porch_workaround(struct dc_crtc_timing *timing) -{ - if (timing->flags.INTERLACE == 1) { - if (timing->v_front_porch < 2) - timing->v_front_porch = 2; - } else { - if (timing->v_front_porch < 1) - timing->v_front_porch = 1; - } -} - -void optc1_program_global_sync( - struct timing_generator *optc, - int vready_offset, - int vstartup_start, - int vupdate_offset, - int vupdate_width) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - optc1->vready_offset = vready_offset; - optc1->vstartup_start = vstartup_start; - optc1->vupdate_offset = vupdate_offset; - optc1->vupdate_width = vupdate_width; - - if (optc1->vstartup_start == 0) { - BREAK_TO_DEBUGGER(); - return; - } - - REG_SET(OTG_VSTARTUP_PARAM, 0, - VSTARTUP_START, optc1->vstartup_start); - - REG_SET_2(OTG_VUPDATE_PARAM, 0, - VUPDATE_OFFSET, optc1->vupdate_offset, - VUPDATE_WIDTH, optc1->vupdate_width); - - REG_SET(OTG_VREADY_PARAM, 0, - VREADY_OFFSET, optc1->vready_offset); -} - -static void optc1_disable_stereo(struct timing_generator *optc) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_SET(OTG_STEREO_CONTROL, 0, - OTG_STEREO_EN, 0); - - REG_SET_2(OTG_3D_STRUCTURE_CONTROL, 0, - OTG_3D_STRUCTURE_EN, 0, - OTG_3D_STRUCTURE_STEREO_SEL_OVR, 0); -} - -void optc1_setup_vertical_interrupt0( - struct timing_generator *optc, - uint32_t start_line, - uint32_t end_line) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_SET_2(OTG_VERTICAL_INTERRUPT0_POSITION, 0, - OTG_VERTICAL_INTERRUPT0_LINE_START, start_line, - OTG_VERTICAL_INTERRUPT0_LINE_END, end_line); -} - -void optc1_setup_vertical_interrupt1( - struct timing_generator *optc, - uint32_t start_line) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_SET(OTG_VERTICAL_INTERRUPT1_POSITION, 0, - OTG_VERTICAL_INTERRUPT1_LINE_START, start_line); -} - -void optc1_setup_vertical_interrupt2( - struct timing_generator *optc, - uint32_t start_line) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_SET(OTG_VERTICAL_INTERRUPT2_POSITION, 0, - OTG_VERTICAL_INTERRUPT2_LINE_START, start_line); -} - -/** - * optc1_program_timing() - used by mode timing set Program - * CRTC Timing Registers - OTG_H_*, - * OTG_V_*, Pixel repetition. - * Including SYNC. Call BIOS command table to program Timings. - * - * @optc: timing_generator instance. - * @dc_crtc_timing: Timing parameters used to configure DCN blocks. - * @vready_offset: Vready's starting position. - * @vstartup_start: Vstartup period. - * @vupdate_offset: Vupdate starting position. - * @vupdate_width: Vupdate duration. - * @signal: DC signal types. - * @use_vbios: to program timings from BIOS command table. - * - */ -void optc1_program_timing( - struct timing_generator *optc, - const struct dc_crtc_timing *dc_crtc_timing, - int vready_offset, - int vstartup_start, - int vupdate_offset, - int vupdate_width, - const enum signal_type signal, - bool use_vbios) -{ - struct dc_crtc_timing patched_crtc_timing; - uint32_t asic_blank_end; - uint32_t asic_blank_start; - uint32_t v_total; - uint32_t v_sync_end; - uint32_t h_sync_polarity, v_sync_polarity; - uint32_t start_point = 0; - uint32_t field_num = 0; - enum h_timing_div_mode h_div = H_TIMING_NO_DIV; - - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - optc1->signal = signal; - optc1->vready_offset = vready_offset; - optc1->vstartup_start = vstartup_start; - optc1->vupdate_offset = vupdate_offset; - optc1->vupdate_width = vupdate_width; - patched_crtc_timing = *dc_crtc_timing; - apply_front_porch_workaround(&patched_crtc_timing); - optc1->orginal_patched_timing = patched_crtc_timing; - - /* Load horizontal timing */ - - /* CRTC_H_TOTAL = vesa.h_total - 1 */ - REG_SET(OTG_H_TOTAL, 0, - OTG_H_TOTAL, patched_crtc_timing.h_total - 1); - - /* h_sync_start = 0, h_sync_end = vesa.h_sync_width */ - REG_UPDATE_2(OTG_H_SYNC_A, - OTG_H_SYNC_A_START, 0, - OTG_H_SYNC_A_END, patched_crtc_timing.h_sync_width); - - /* blank_start = line end - front porch */ - asic_blank_start = patched_crtc_timing.h_total - - patched_crtc_timing.h_front_porch; - - /* blank_end = blank_start - active */ - asic_blank_end = asic_blank_start - - patched_crtc_timing.h_border_right - - patched_crtc_timing.h_addressable - - patched_crtc_timing.h_border_left; - - REG_UPDATE_2(OTG_H_BLANK_START_END, - OTG_H_BLANK_START, asic_blank_start, - OTG_H_BLANK_END, asic_blank_end); - - /* h_sync polarity */ - h_sync_polarity = patched_crtc_timing.flags.HSYNC_POSITIVE_POLARITY ? - 0 : 1; - - REG_UPDATE(OTG_H_SYNC_A_CNTL, - OTG_H_SYNC_A_POL, h_sync_polarity); - - v_total = patched_crtc_timing.v_total - 1; - - REG_SET(OTG_V_TOTAL, 0, - OTG_V_TOTAL, v_total); - - /* In case of V_TOTAL_CONTROL is on, make sure OTG_V_TOTAL_MAX and - * OTG_V_TOTAL_MIN are equal to V_TOTAL. - */ - optc->funcs->set_vtotal_min_max(optc, v_total, v_total); - - /* v_sync_start = 0, v_sync_end = v_sync_width */ - v_sync_end = patched_crtc_timing.v_sync_width; - - REG_UPDATE_2(OTG_V_SYNC_A, - OTG_V_SYNC_A_START, 0, - OTG_V_SYNC_A_END, v_sync_end); - - /* blank_start = frame end - front porch */ - asic_blank_start = patched_crtc_timing.v_total - - patched_crtc_timing.v_front_porch; - - /* blank_end = blank_start - active */ - asic_blank_end = asic_blank_start - - patched_crtc_timing.v_border_bottom - - patched_crtc_timing.v_addressable - - patched_crtc_timing.v_border_top; - - REG_UPDATE_2(OTG_V_BLANK_START_END, - OTG_V_BLANK_START, asic_blank_start, - OTG_V_BLANK_END, asic_blank_end); - - /* v_sync polarity */ - v_sync_polarity = patched_crtc_timing.flags.VSYNC_POSITIVE_POLARITY ? - 0 : 1; - - REG_UPDATE(OTG_V_SYNC_A_CNTL, - OTG_V_SYNC_A_POL, v_sync_polarity); - - if (optc1->signal == SIGNAL_TYPE_DISPLAY_PORT || - optc1->signal == SIGNAL_TYPE_DISPLAY_PORT_MST || - optc1->signal == SIGNAL_TYPE_EDP) { - start_point = 1; - if (patched_crtc_timing.flags.INTERLACE == 1) - field_num = 1; - } - - /* Interlace */ - if (REG(OTG_INTERLACE_CONTROL)) { - if (patched_crtc_timing.flags.INTERLACE == 1) - REG_UPDATE(OTG_INTERLACE_CONTROL, - OTG_INTERLACE_ENABLE, 1); - else - REG_UPDATE(OTG_INTERLACE_CONTROL, - OTG_INTERLACE_ENABLE, 0); - } - - /* VTG enable set to 0 first VInit */ - REG_UPDATE(CONTROL, - VTG0_ENABLE, 0); - - /* original code is using VTG offset to address OTG reg, seems wrong */ - REG_UPDATE_2(OTG_CONTROL, - OTG_START_POINT_CNTL, start_point, - OTG_FIELD_NUMBER_CNTL, field_num); - - optc->funcs->program_global_sync(optc, - vready_offset, - vstartup_start, - vupdate_offset, - vupdate_width); - - optc->funcs->set_vtg_params(optc, dc_crtc_timing, true); - - /* TODO - * patched_crtc_timing.flags.HORZ_COUNT_BY_TWO == 1 - * program_horz_count_by_2 - * for DVI 30bpp mode, 0 otherwise - * program_horz_count_by_2(optc, &patched_crtc_timing); - */ - - /* Enable stereo - only when we need to pack 3D frame. Other types - * of stereo handled in explicit call - */ - - if (optc1_is_two_pixels_per_containter(&patched_crtc_timing) || optc1->opp_count == 2) - h_div = H_TIMING_DIV_BY2; - - if (REG(OPTC_DATA_FORMAT_CONTROL) && optc1->tg_mask->OPTC_DATA_FORMAT != 0) { - uint32_t data_fmt = 0; - - if (patched_crtc_timing.pixel_encoding == PIXEL_ENCODING_YCBCR422) - data_fmt = 1; - else if (patched_crtc_timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) - data_fmt = 2; - - REG_UPDATE(OPTC_DATA_FORMAT_CONTROL, OPTC_DATA_FORMAT, data_fmt); - } - - if (optc1->tg_mask->OTG_H_TIMING_DIV_MODE != 0) { - if (optc1->opp_count == 4) - h_div = H_TIMING_DIV_BY4; - - REG_UPDATE(OTG_H_TIMING_CNTL, - OTG_H_TIMING_DIV_MODE, h_div); - } else { - REG_UPDATE(OTG_H_TIMING_CNTL, - OTG_H_TIMING_DIV_BY2, h_div); - } -} - -/** - * optc1_set_vtg_params - Set Vertical Timing Generator (VTG) parameters - * - * @optc: timing_generator struct used to extract the optc parameters - * @dc_crtc_timing: Timing parameters configured - * @program_fp2: Boolean value indicating if FP2 will be programmed or not - * - * OTG is responsible for generating the global sync signals, including - * vertical timing information for each HUBP in the dcfclk domain. Each VTG is - * associated with one OTG that provides HUBP with vertical timing information - * (i.e., there is 1:1 correspondence between OTG and VTG). This function is - * responsible for setting the OTG parameters to the VTG during the pipe - * programming. - */ -void optc1_set_vtg_params(struct timing_generator *optc, - const struct dc_crtc_timing *dc_crtc_timing, bool program_fp2) -{ - struct dc_crtc_timing patched_crtc_timing; - uint32_t asic_blank_end; - uint32_t v_init; - uint32_t v_fp2 = 0; - int32_t vertical_line_start; - - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - patched_crtc_timing = *dc_crtc_timing; - apply_front_porch_workaround(&patched_crtc_timing); - - /* VCOUNT_INIT is the start of blank */ - v_init = patched_crtc_timing.v_total - patched_crtc_timing.v_front_porch; - - /* end of blank = v_init - active */ - asic_blank_end = v_init - - patched_crtc_timing.v_border_bottom - - patched_crtc_timing.v_addressable - - patched_crtc_timing.v_border_top; - - /* if VSTARTUP is before VSYNC, FP2 is the offset, otherwise 0 */ - vertical_line_start = asic_blank_end - optc1->vstartup_start + 1; - if (vertical_line_start < 0) - v_fp2 = -vertical_line_start; - - /* Interlace */ - if (REG(OTG_INTERLACE_CONTROL)) { - if (patched_crtc_timing.flags.INTERLACE == 1) { - v_init = v_init / 2; - if ((optc1->vstartup_start/2)*2 > asic_blank_end) - v_fp2 = v_fp2 / 2; - } - } - - if (program_fp2) - REG_UPDATE_2(CONTROL, - VTG0_FP2, v_fp2, - VTG0_VCOUNT_INIT, v_init); - else - REG_UPDATE(CONTROL, VTG0_VCOUNT_INIT, v_init); -} - -void optc1_set_blank_data_double_buffer(struct timing_generator *optc, bool enable) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - uint32_t blank_data_double_buffer_enable = enable ? 1 : 0; - - REG_UPDATE(OTG_DOUBLE_BUFFER_CONTROL, - OTG_BLANK_DATA_DOUBLE_BUFFER_EN, blank_data_double_buffer_enable); -} - -/** - * optc1_set_timing_double_buffer() - DRR double buffering control - * - * Sets double buffer point for V_TOTAL, H_TOTAL, VTOTAL_MIN, - * VTOTAL_MAX, VTOTAL_MIN_SEL and VTOTAL_MAX_SEL registers. - * - * @optc: timing_generator instance. - * @enable: Enable DRR double buffering control if true, disable otherwise. - * - * Options: any time, start of frame, dp start of frame (range timing) - */ -void optc1_set_timing_double_buffer(struct timing_generator *optc, bool enable) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - uint32_t mode = enable ? 2 : 0; - - REG_UPDATE(OTG_DOUBLE_BUFFER_CONTROL, - OTG_RANGE_TIMING_DBUF_UPDATE_MODE, mode); -} - -/** - * optc1_unblank_crtc() - Call ASIC Control Object to UnBlank CRTC. - * - * @optc: timing_generator instance. - */ -static void optc1_unblank_crtc(struct timing_generator *optc) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_UPDATE_2(OTG_BLANK_CONTROL, - OTG_BLANK_DATA_EN, 0, - OTG_BLANK_DE_MODE, 0); - - /* W/A for automated testing - * Automated testing will fail underflow test as there - * sporadic underflows which occur during the optc blank - * sequence. As a w/a, clear underflow on unblank. - * This prevents the failure, but will not mask actual - * underflow that affect real use cases. - */ - optc1_clear_optc_underflow(optc); -} - -/** - * optc1_blank_crtc() - Call ASIC Control Object to Blank CRTC. - * - * @optc: timing_generator instance. - */ - -static void optc1_blank_crtc(struct timing_generator *optc) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_UPDATE_2(OTG_BLANK_CONTROL, - OTG_BLANK_DATA_EN, 1, - OTG_BLANK_DE_MODE, 0); - - optc1_set_blank_data_double_buffer(optc, false); -} - -void optc1_set_blank(struct timing_generator *optc, - bool enable_blanking) -{ - if (enable_blanking) - optc1_blank_crtc(optc); - else - optc1_unblank_crtc(optc); -} - -bool optc1_is_blanked(struct timing_generator *optc) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - uint32_t blank_en; - uint32_t blank_state; - - REG_GET_2(OTG_BLANK_CONTROL, - OTG_BLANK_DATA_EN, &blank_en, - OTG_CURRENT_BLANK_STATE, &blank_state); - - return blank_en && blank_state; -} - -void optc1_enable_optc_clock(struct timing_generator *optc, bool enable) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - if (enable) { - REG_UPDATE_2(OPTC_INPUT_CLOCK_CONTROL, - OPTC_INPUT_CLK_EN, 1, - OPTC_INPUT_CLK_GATE_DIS, 1); - - REG_WAIT(OPTC_INPUT_CLOCK_CONTROL, - OPTC_INPUT_CLK_ON, 1, - 1, 1000); - - /* Enable clock */ - REG_UPDATE_2(OTG_CLOCK_CONTROL, - OTG_CLOCK_EN, 1, - OTG_CLOCK_GATE_DIS, 1); - REG_WAIT(OTG_CLOCK_CONTROL, - OTG_CLOCK_ON, 1, - 1, 1000); - } else { - - //last chance to clear underflow, otherwise, it will always there due to clock is off. - if (optc->funcs->is_optc_underflow_occurred(optc) == true) - optc->funcs->clear_optc_underflow(optc); - - REG_UPDATE_2(OTG_CLOCK_CONTROL, - OTG_CLOCK_GATE_DIS, 0, - OTG_CLOCK_EN, 0); - - REG_UPDATE_2(OPTC_INPUT_CLOCK_CONTROL, - OPTC_INPUT_CLK_GATE_DIS, 0, - OPTC_INPUT_CLK_EN, 0); - } -} - -/** - * optc1_enable_crtc() - Enable CRTC - call ASIC Control Object to enable Timing generator. - * - * @optc: timing_generator instance. - */ -static bool optc1_enable_crtc(struct timing_generator *optc) -{ - /* TODO FPGA wait for answer - * OTG_MASTER_UPDATE_MODE != CRTC_MASTER_UPDATE_MODE - * OTG_MASTER_UPDATE_LOCK != CRTC_MASTER_UPDATE_LOCK - */ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - /* opp instance for OTG. For DCN1.0, ODM is remoed. - * OPP and OPTC should 1:1 mapping - */ - REG_UPDATE(OPTC_DATA_SOURCE_SELECT, - OPTC_SRC_SEL, optc->inst); - - /* VTG enable first is for HW workaround */ - REG_UPDATE(CONTROL, - VTG0_ENABLE, 1); - - REG_SEQ_START(); - - /* Enable CRTC */ - REG_UPDATE_2(OTG_CONTROL, - OTG_DISABLE_POINT_CNTL, 3, - OTG_MASTER_EN, 1); - - REG_SEQ_SUBMIT(); - REG_SEQ_WAIT_DONE(); - - return true; -} - -/* disable_crtc - call ASIC Control Object to disable Timing generator. */ -bool optc1_disable_crtc(struct timing_generator *optc) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - /* disable otg request until end of the first line - * in the vertical blank region - */ - REG_UPDATE_2(OTG_CONTROL, - OTG_DISABLE_POINT_CNTL, 3, - OTG_MASTER_EN, 0); - - REG_UPDATE(CONTROL, - VTG0_ENABLE, 0); - - /* CRTC disabled, so disable clock. */ - REG_WAIT(OTG_CLOCK_CONTROL, - OTG_BUSY, 0, - 1, 100000); - - return true; -} - - -void optc1_program_blank_color( - struct timing_generator *optc, - const struct tg_color *black_color) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_SET_3(OTG_BLACK_COLOR, 0, - OTG_BLACK_COLOR_B_CB, black_color->color_b_cb, - OTG_BLACK_COLOR_G_Y, black_color->color_g_y, - OTG_BLACK_COLOR_R_CR, black_color->color_r_cr); -} - -bool optc1_validate_timing( - struct timing_generator *optc, - const struct dc_crtc_timing *timing) -{ - uint32_t v_blank; - uint32_t h_blank; - uint32_t min_v_blank; - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - ASSERT(timing != NULL); - - v_blank = (timing->v_total - timing->v_addressable - - timing->v_border_top - timing->v_border_bottom); - - h_blank = (timing->h_total - timing->h_addressable - - timing->h_border_right - - timing->h_border_left); - - if (timing->timing_3d_format != TIMING_3D_FORMAT_NONE && - timing->timing_3d_format != TIMING_3D_FORMAT_HW_FRAME_PACKING && - timing->timing_3d_format != TIMING_3D_FORMAT_TOP_AND_BOTTOM && - timing->timing_3d_format != TIMING_3D_FORMAT_SIDE_BY_SIDE && - timing->timing_3d_format != TIMING_3D_FORMAT_FRAME_ALTERNATE && - timing->timing_3d_format != TIMING_3D_FORMAT_INBAND_FA) - return false; - - /* Temporarily blocking interlacing mode until it's supported */ - if (timing->flags.INTERLACE == 1) - return false; - - /* Check maximum number of pixels supported by Timing Generator - * (Currently will never fail, in order to fail needs display which - * needs more than 8192 horizontal and - * more than 8192 vertical total pixels) - */ - if (timing->h_total > optc1->max_h_total || - timing->v_total > optc1->max_v_total) - return false; - - - if (h_blank < optc1->min_h_blank) - return false; - - if (timing->h_sync_width < optc1->min_h_sync_width || - timing->v_sync_width < optc1->min_v_sync_width) - return false; - - min_v_blank = timing->flags.INTERLACE?optc1->min_v_blank_interlace:optc1->min_v_blank; - - if (v_blank < min_v_blank) - return false; - - return true; - -} - -/* - * get_vblank_counter - * - * @brief - * Get counter for vertical blanks. use register CRTC_STATUS_FRAME_COUNT which - * holds the counter of frames. - * - * @param - * struct timing_generator *optc - [in] timing generator which controls the - * desired CRTC - * - * @return - * Counter of frames, which should equal to number of vblanks. - */ -uint32_t optc1_get_vblank_counter(struct timing_generator *optc) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - uint32_t frame_count; - - REG_GET(OTG_STATUS_FRAME_COUNT, - OTG_FRAME_COUNT, &frame_count); - - return frame_count; -} - -void optc1_lock(struct timing_generator *optc) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_SET(OTG_GLOBAL_CONTROL0, 0, - OTG_MASTER_UPDATE_LOCK_SEL, optc->inst); - REG_SET(OTG_MASTER_UPDATE_LOCK, 0, - OTG_MASTER_UPDATE_LOCK, 1); - - REG_WAIT(OTG_MASTER_UPDATE_LOCK, - UPDATE_LOCK_STATUS, 1, - 1, 10); - - TRACE_OPTC_LOCK_UNLOCK_STATE(optc1, optc->inst, true); -} - -void optc1_unlock(struct timing_generator *optc) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_SET(OTG_MASTER_UPDATE_LOCK, 0, - OTG_MASTER_UPDATE_LOCK, 0); - - TRACE_OPTC_LOCK_UNLOCK_STATE(optc1, optc->inst, false); -} - -void optc1_get_position(struct timing_generator *optc, - struct crtc_position *position) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_GET_2(OTG_STATUS_POSITION, - OTG_HORZ_COUNT, &position->horizontal_count, - OTG_VERT_COUNT, &position->vertical_count); - - REG_GET(OTG_NOM_VERT_POSITION, - OTG_VERT_COUNT_NOM, &position->nominal_vcount); -} - -bool optc1_is_counter_moving(struct timing_generator *optc) -{ - struct crtc_position position1, position2; - - optc->funcs->get_position(optc, &position1); - optc->funcs->get_position(optc, &position2); - - if (position1.horizontal_count == position2.horizontal_count && - position1.vertical_count == position2.vertical_count) - return false; - else - return true; -} - -bool optc1_did_triggered_reset_occur( - struct timing_generator *optc) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - uint32_t occurred_force, occurred_vsync; - - REG_GET(OTG_FORCE_COUNT_NOW_CNTL, - OTG_FORCE_COUNT_NOW_OCCURRED, &occurred_force); - - REG_GET(OTG_VERT_SYNC_CONTROL, - OTG_FORCE_VSYNC_NEXT_LINE_OCCURRED, &occurred_vsync); - - return occurred_vsync != 0 || occurred_force != 0; -} - -void optc1_disable_reset_trigger(struct timing_generator *optc) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_WRITE(OTG_TRIGA_CNTL, 0); - - REG_SET(OTG_FORCE_COUNT_NOW_CNTL, 0, - OTG_FORCE_COUNT_NOW_CLEAR, 1); - - REG_SET(OTG_VERT_SYNC_CONTROL, 0, - OTG_FORCE_VSYNC_NEXT_LINE_CLEAR, 1); -} - -void optc1_enable_reset_trigger(struct timing_generator *optc, int source_tg_inst) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - uint32_t falling_edge; - - REG_GET(OTG_V_SYNC_A_CNTL, - OTG_V_SYNC_A_POL, &falling_edge); - - if (falling_edge) - REG_SET_3(OTG_TRIGA_CNTL, 0, - /* vsync signal from selected OTG pipe based - * on OTG_TRIG_SOURCE_PIPE_SELECT setting - */ - OTG_TRIGA_SOURCE_SELECT, 20, - OTG_TRIGA_SOURCE_PIPE_SELECT, source_tg_inst, - /* always detect falling edge */ - OTG_TRIGA_FALLING_EDGE_DETECT_CNTL, 1); - else - REG_SET_3(OTG_TRIGA_CNTL, 0, - /* vsync signal from selected OTG pipe based - * on OTG_TRIG_SOURCE_PIPE_SELECT setting - */ - OTG_TRIGA_SOURCE_SELECT, 20, - OTG_TRIGA_SOURCE_PIPE_SELECT, source_tg_inst, - /* always detect rising edge */ - OTG_TRIGA_RISING_EDGE_DETECT_CNTL, 1); - - REG_SET(OTG_FORCE_COUNT_NOW_CNTL, 0, - /* force H count to H_TOTAL and V count to V_TOTAL in - * progressive mode and V_TOTAL-1 in interlaced mode - */ - OTG_FORCE_COUNT_NOW_MODE, 2); -} - -void optc1_enable_crtc_reset( - struct timing_generator *optc, - int source_tg_inst, - struct crtc_trigger_info *crtc_tp) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - uint32_t falling_edge = 0; - uint32_t rising_edge = 0; - - switch (crtc_tp->event) { - - case CRTC_EVENT_VSYNC_RISING: - rising_edge = 1; - break; - - case CRTC_EVENT_VSYNC_FALLING: - falling_edge = 1; - break; - } - - REG_SET_4(OTG_TRIGA_CNTL, 0, - /* vsync signal from selected OTG pipe based - * on OTG_TRIG_SOURCE_PIPE_SELECT setting - */ - OTG_TRIGA_SOURCE_SELECT, 20, - OTG_TRIGA_SOURCE_PIPE_SELECT, source_tg_inst, - /* always detect falling edge */ - OTG_TRIGA_RISING_EDGE_DETECT_CNTL, rising_edge, - OTG_TRIGA_FALLING_EDGE_DETECT_CNTL, falling_edge); - - switch (crtc_tp->delay) { - case TRIGGER_DELAY_NEXT_LINE: - REG_SET(OTG_VERT_SYNC_CONTROL, 0, - OTG_AUTO_FORCE_VSYNC_MODE, 1); - break; - case TRIGGER_DELAY_NEXT_PIXEL: - REG_SET(OTG_FORCE_COUNT_NOW_CNTL, 0, - /* force H count to H_TOTAL and V count to V_TOTAL in - * progressive mode and V_TOTAL-1 in interlaced mode - */ - OTG_FORCE_COUNT_NOW_MODE, 2); - break; - } -} - -void optc1_wait_for_state(struct timing_generator *optc, - enum crtc_state state) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - switch (state) { - case CRTC_STATE_VBLANK: - REG_WAIT(OTG_STATUS, - OTG_V_BLANK, 1, - 1, 100000); /* 1 vupdate at 10hz */ - break; - - case CRTC_STATE_VACTIVE: - REG_WAIT(OTG_STATUS, - OTG_V_ACTIVE_DISP, 1, - 1, 100000); /* 1 vupdate at 10hz */ - break; - - default: - break; - } -} - -void optc1_set_early_control( - struct timing_generator *optc, - uint32_t early_cntl) -{ - /* asic design change, do not need this control - * empty for share caller logic - */ -} - - -void optc1_set_static_screen_control( - struct timing_generator *optc, - uint32_t event_triggers, - uint32_t num_frames) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - // By register spec, it only takes 8 bit value - if (num_frames > 0xFF) - num_frames = 0xFF; - - /* Bit 8 is no longer applicable in RV for PSR case, - * set bit 8 to 0 if given - */ - if ((event_triggers & STATIC_SCREEN_EVENT_MASK_RANGETIMING_DOUBLE_BUFFER_UPDATE_EN) - != 0) - event_triggers = event_triggers & - ~STATIC_SCREEN_EVENT_MASK_RANGETIMING_DOUBLE_BUFFER_UPDATE_EN; - - REG_SET_2(OTG_STATIC_SCREEN_CONTROL, 0, - OTG_STATIC_SCREEN_EVENT_MASK, event_triggers, - OTG_STATIC_SCREEN_FRAME_COUNT, num_frames); -} - -static void optc1_setup_manual_trigger(struct timing_generator *optc) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_SET(OTG_GLOBAL_CONTROL2, 0, - MANUAL_FLOW_CONTROL_SEL, optc->inst); - - REG_SET_8(OTG_TRIGA_CNTL, 0, - OTG_TRIGA_SOURCE_SELECT, 22, - OTG_TRIGA_SOURCE_PIPE_SELECT, optc->inst, - OTG_TRIGA_RISING_EDGE_DETECT_CNTL, 1, - OTG_TRIGA_FALLING_EDGE_DETECT_CNTL, 0, - OTG_TRIGA_POLARITY_SELECT, 0, - OTG_TRIGA_FREQUENCY_SELECT, 0, - OTG_TRIGA_DELAY, 0, - OTG_TRIGA_CLEAR, 1); -} - -static void optc1_program_manual_trigger(struct timing_generator *optc) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_SET(OTG_MANUAL_FLOW_CONTROL, 0, - MANUAL_FLOW_CONTROL, 1); - - REG_SET(OTG_MANUAL_FLOW_CONTROL, 0, - MANUAL_FLOW_CONTROL, 0); -} - -/** - * optc1_set_drr() - Program dynamic refresh rate registers m_OTGx_OTG_V_TOTAL_*. - * - * @optc: timing_generator instance. - * @params: parameters used for Dynamic Refresh Rate. - */ -void optc1_set_drr( - struct timing_generator *optc, - const struct drr_params *params) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - if (params != NULL && - params->vertical_total_max > 0 && - params->vertical_total_min > 0) { - - if (params->vertical_total_mid != 0) { - - REG_SET(OTG_V_TOTAL_MID, 0, - OTG_V_TOTAL_MID, params->vertical_total_mid - 1); - - REG_UPDATE_2(OTG_V_TOTAL_CONTROL, - OTG_VTOTAL_MID_REPLACING_MAX_EN, 1, - OTG_VTOTAL_MID_FRAME_NUM, - (uint8_t)params->vertical_total_mid_frame_num); - - } - - optc->funcs->set_vtotal_min_max(optc, params->vertical_total_min - 1, params->vertical_total_max - 1); - - REG_UPDATE_5(OTG_V_TOTAL_CONTROL, - OTG_V_TOTAL_MIN_SEL, 1, - OTG_V_TOTAL_MAX_SEL, 1, - OTG_FORCE_LOCK_ON_EVENT, 0, - OTG_SET_V_TOTAL_MIN_MASK_EN, 0, - OTG_SET_V_TOTAL_MIN_MASK, 0); - } - - // Setup manual flow control for EOF via TRIG_A - optc->funcs->setup_manual_trigger(optc); -} - -void optc1_set_vtotal_min_max(struct timing_generator *optc, int vtotal_min, int vtotal_max) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_SET(OTG_V_TOTAL_MAX, 0, - OTG_V_TOTAL_MAX, vtotal_max); - - REG_SET(OTG_V_TOTAL_MIN, 0, - OTG_V_TOTAL_MIN, vtotal_min); -} - -static void optc1_set_test_pattern( - struct timing_generator *optc, - /* TODO: replace 'controller_dp_test_pattern' by 'test_pattern_mode' - * because this is not DP-specific (which is probably somewhere in DP - * encoder) */ - enum controller_dp_test_pattern test_pattern, - enum dc_color_depth color_depth) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - enum test_pattern_color_format bit_depth; - enum test_pattern_dyn_range dyn_range; - enum test_pattern_mode mode; - uint32_t pattern_mask; - uint32_t pattern_data; - /* color ramp generator mixes 16-bits color */ - uint32_t src_bpc = 16; - /* requested bpc */ - uint32_t dst_bpc; - uint32_t index; - /* RGB values of the color bars. - * Produce two RGB colors: RGB0 - white (all Fs) - * and RGB1 - black (all 0s) - * (three RGB components for two colors) - */ - uint16_t src_color[6] = {0xFFFF, 0xFFFF, 0xFFFF, 0x0000, - 0x0000, 0x0000}; - /* dest color (converted to the specified color format) */ - uint16_t dst_color[6]; - uint32_t inc_base; - - /* translate to bit depth */ - switch (color_depth) { - case COLOR_DEPTH_666: - bit_depth = TEST_PATTERN_COLOR_FORMAT_BPC_6; - break; - case COLOR_DEPTH_888: - bit_depth = TEST_PATTERN_COLOR_FORMAT_BPC_8; - break; - case COLOR_DEPTH_101010: - bit_depth = TEST_PATTERN_COLOR_FORMAT_BPC_10; - break; - case COLOR_DEPTH_121212: - bit_depth = TEST_PATTERN_COLOR_FORMAT_BPC_12; - break; - default: - bit_depth = TEST_PATTERN_COLOR_FORMAT_BPC_8; - break; - } - - switch (test_pattern) { - case CONTROLLER_DP_TEST_PATTERN_COLORSQUARES: - case CONTROLLER_DP_TEST_PATTERN_COLORSQUARES_CEA: - { - dyn_range = (test_pattern == - CONTROLLER_DP_TEST_PATTERN_COLORSQUARES_CEA ? - TEST_PATTERN_DYN_RANGE_CEA : - TEST_PATTERN_DYN_RANGE_VESA); - mode = TEST_PATTERN_MODE_COLORSQUARES_RGB; - - REG_UPDATE_2(OTG_TEST_PATTERN_PARAMETERS, - OTG_TEST_PATTERN_VRES, 6, - OTG_TEST_PATTERN_HRES, 6); - - REG_UPDATE_4(OTG_TEST_PATTERN_CONTROL, - OTG_TEST_PATTERN_EN, 1, - OTG_TEST_PATTERN_MODE, mode, - OTG_TEST_PATTERN_DYNAMIC_RANGE, dyn_range, - OTG_TEST_PATTERN_COLOR_FORMAT, bit_depth); - } - break; - - case CONTROLLER_DP_TEST_PATTERN_VERTICALBARS: - case CONTROLLER_DP_TEST_PATTERN_HORIZONTALBARS: - { - mode = (test_pattern == - CONTROLLER_DP_TEST_PATTERN_VERTICALBARS ? - TEST_PATTERN_MODE_VERTICALBARS : - TEST_PATTERN_MODE_HORIZONTALBARS); - - switch (bit_depth) { - case TEST_PATTERN_COLOR_FORMAT_BPC_6: - dst_bpc = 6; - break; - case TEST_PATTERN_COLOR_FORMAT_BPC_8: - dst_bpc = 8; - break; - case TEST_PATTERN_COLOR_FORMAT_BPC_10: - dst_bpc = 10; - break; - default: - dst_bpc = 8; - break; - } - - /* adjust color to the required colorFormat */ - for (index = 0; index < 6; index++) { - /* dst = 2^dstBpc * src / 2^srcBpc = src >> - * (srcBpc - dstBpc); - */ - dst_color[index] = - src_color[index] >> (src_bpc - dst_bpc); - /* CRTC_TEST_PATTERN_DATA has 16 bits, - * lowest 6 are hardwired to ZERO - * color bits should be left aligned to MSB - * XXXXXXXXXX000000 for 10 bit, - * XXXXXXXX00000000 for 8 bit and XXXXXX0000000000 for 6 - */ - dst_color[index] <<= (16 - dst_bpc); - } - - REG_WRITE(OTG_TEST_PATTERN_PARAMETERS, 0); - - /* We have to write the mask before data, similar to pipeline. - * For example, for 8 bpc, if we want RGB0 to be magenta, - * and RGB1 to be cyan, - * we need to make 7 writes: - * MASK DATA - * 000001 00000000 00000000 set mask to R0 - * 000010 11111111 00000000 R0 255, 0xFF00, set mask to G0 - * 000100 00000000 00000000 G0 0, 0x0000, set mask to B0 - * 001000 11111111 00000000 B0 255, 0xFF00, set mask to R1 - * 010000 00000000 00000000 R1 0, 0x0000, set mask to G1 - * 100000 11111111 00000000 G1 255, 0xFF00, set mask to B1 - * 100000 11111111 00000000 B1 255, 0xFF00 - * - * we will make a loop of 6 in which we prepare the mask, - * then write, then prepare the color for next write. - * first iteration will write mask only, - * but each next iteration color prepared in - * previous iteration will be written within new mask, - * the last component will written separately, - * mask is not changing between 6th and 7th write - * and color will be prepared by last iteration - */ - - /* write color, color values mask in CRTC_TEST_PATTERN_MASK - * is B1, G1, R1, B0, G0, R0 - */ - pattern_data = 0; - for (index = 0; index < 6; index++) { - /* prepare color mask, first write PATTERN_DATA - * will have all zeros - */ - pattern_mask = (1 << index); - - /* write color component */ - REG_SET_2(OTG_TEST_PATTERN_COLOR, 0, - OTG_TEST_PATTERN_MASK, pattern_mask, - OTG_TEST_PATTERN_DATA, pattern_data); - - /* prepare next color component, - * will be written in the next iteration - */ - pattern_data = dst_color[index]; - } - /* write last color component, - * it's been already prepared in the loop - */ - REG_SET_2(OTG_TEST_PATTERN_COLOR, 0, - OTG_TEST_PATTERN_MASK, pattern_mask, - OTG_TEST_PATTERN_DATA, pattern_data); - - /* enable test pattern */ - REG_UPDATE_4(OTG_TEST_PATTERN_CONTROL, - OTG_TEST_PATTERN_EN, 1, - OTG_TEST_PATTERN_MODE, mode, - OTG_TEST_PATTERN_DYNAMIC_RANGE, 0, - OTG_TEST_PATTERN_COLOR_FORMAT, bit_depth); - } - break; - - case CONTROLLER_DP_TEST_PATTERN_COLORRAMP: - { - mode = (bit_depth == - TEST_PATTERN_COLOR_FORMAT_BPC_10 ? - TEST_PATTERN_MODE_DUALRAMP_RGB : - TEST_PATTERN_MODE_SINGLERAMP_RGB); - - switch (bit_depth) { - case TEST_PATTERN_COLOR_FORMAT_BPC_6: - dst_bpc = 6; - break; - case TEST_PATTERN_COLOR_FORMAT_BPC_8: - dst_bpc = 8; - break; - case TEST_PATTERN_COLOR_FORMAT_BPC_10: - dst_bpc = 10; - break; - default: - dst_bpc = 8; - break; - } - - /* increment for the first ramp for one color gradation - * 1 gradation for 6-bit color is 2^10 - * gradations in 16-bit color - */ - inc_base = (src_bpc - dst_bpc); - - switch (bit_depth) { - case TEST_PATTERN_COLOR_FORMAT_BPC_6: - { - REG_UPDATE_5(OTG_TEST_PATTERN_PARAMETERS, - OTG_TEST_PATTERN_INC0, inc_base, - OTG_TEST_PATTERN_INC1, 0, - OTG_TEST_PATTERN_HRES, 6, - OTG_TEST_PATTERN_VRES, 6, - OTG_TEST_PATTERN_RAMP0_OFFSET, 0); - } - break; - case TEST_PATTERN_COLOR_FORMAT_BPC_8: - { - REG_UPDATE_5(OTG_TEST_PATTERN_PARAMETERS, - OTG_TEST_PATTERN_INC0, inc_base, - OTG_TEST_PATTERN_INC1, 0, - OTG_TEST_PATTERN_HRES, 8, - OTG_TEST_PATTERN_VRES, 6, - OTG_TEST_PATTERN_RAMP0_OFFSET, 0); - } - break; - case TEST_PATTERN_COLOR_FORMAT_BPC_10: - { - REG_UPDATE_5(OTG_TEST_PATTERN_PARAMETERS, - OTG_TEST_PATTERN_INC0, inc_base, - OTG_TEST_PATTERN_INC1, inc_base + 2, - OTG_TEST_PATTERN_HRES, 8, - OTG_TEST_PATTERN_VRES, 5, - OTG_TEST_PATTERN_RAMP0_OFFSET, 384 << 6); - } - break; - default: - break; - } - - REG_WRITE(OTG_TEST_PATTERN_COLOR, 0); - - /* enable test pattern */ - REG_WRITE(OTG_TEST_PATTERN_CONTROL, 0); - - REG_SET_4(OTG_TEST_PATTERN_CONTROL, 0, - OTG_TEST_PATTERN_EN, 1, - OTG_TEST_PATTERN_MODE, mode, - OTG_TEST_PATTERN_DYNAMIC_RANGE, 0, - OTG_TEST_PATTERN_COLOR_FORMAT, bit_depth); - } - break; - case CONTROLLER_DP_TEST_PATTERN_VIDEOMODE: - { - REG_WRITE(OTG_TEST_PATTERN_CONTROL, 0); - REG_WRITE(OTG_TEST_PATTERN_COLOR, 0); - REG_WRITE(OTG_TEST_PATTERN_PARAMETERS, 0); - } - break; - default: - break; - - } -} - -void optc1_get_crtc_scanoutpos( - struct timing_generator *optc, - uint32_t *v_blank_start, - uint32_t *v_blank_end, - uint32_t *h_position, - uint32_t *v_position) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - struct crtc_position position; - - REG_GET_2(OTG_V_BLANK_START_END, - OTG_V_BLANK_START, v_blank_start, - OTG_V_BLANK_END, v_blank_end); - - optc1_get_position(optc, &position); - - *h_position = position.horizontal_count; - *v_position = position.vertical_count; -} - -static void optc1_enable_stereo(struct timing_generator *optc, - const struct dc_crtc_timing *timing, struct crtc_stereo_flags *flags) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - if (flags) { - uint32_t stereo_en; - stereo_en = flags->FRAME_PACKED == 0 ? 1 : 0; - - if (flags->PROGRAM_STEREO) - REG_UPDATE_3(OTG_STEREO_CONTROL, - OTG_STEREO_EN, stereo_en, - OTG_STEREO_SYNC_OUTPUT_LINE_NUM, 0, - OTG_STEREO_SYNC_OUTPUT_POLARITY, flags->RIGHT_EYE_POLARITY == 0 ? 0 : 1); - - if (flags->PROGRAM_POLARITY) - REG_UPDATE(OTG_STEREO_CONTROL, - OTG_STEREO_EYE_FLAG_POLARITY, - flags->RIGHT_EYE_POLARITY == 0 ? 0 : 1); - - if (flags->DISABLE_STEREO_DP_SYNC) - REG_UPDATE(OTG_STEREO_CONTROL, - OTG_DISABLE_STEREOSYNC_OUTPUT_FOR_DP, 1); - - if (flags->PROGRAM_STEREO) - REG_UPDATE_2(OTG_3D_STRUCTURE_CONTROL, - OTG_3D_STRUCTURE_EN, flags->FRAME_PACKED, - OTG_3D_STRUCTURE_STEREO_SEL_OVR, flags->FRAME_PACKED); - - } -} - -void optc1_program_stereo(struct timing_generator *optc, - const struct dc_crtc_timing *timing, struct crtc_stereo_flags *flags) -{ - if (flags->PROGRAM_STEREO) - optc1_enable_stereo(optc, timing, flags); - else - optc1_disable_stereo(optc); -} - - -bool optc1_is_stereo_left_eye(struct timing_generator *optc) -{ - bool ret = false; - uint32_t left_eye = 0; - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_GET(OTG_STEREO_STATUS, - OTG_STEREO_CURRENT_EYE, &left_eye); - if (left_eye == 1) - ret = true; - else - ret = false; - - return ret; -} - -bool optc1_get_hw_timing(struct timing_generator *tg, - struct dc_crtc_timing *hw_crtc_timing) -{ - struct dcn_otg_state s = {0}; - - if (tg == NULL || hw_crtc_timing == NULL) - return false; - - optc1_read_otg_state(DCN10TG_FROM_TG(tg), &s); - - hw_crtc_timing->h_total = s.h_total + 1; - hw_crtc_timing->h_addressable = s.h_total - ((s.h_total - s.h_blank_start) + s.h_blank_end); - hw_crtc_timing->h_front_porch = s.h_total + 1 - s.h_blank_start; - hw_crtc_timing->h_sync_width = s.h_sync_a_end - s.h_sync_a_start; - - hw_crtc_timing->v_total = s.v_total + 1; - hw_crtc_timing->v_addressable = s.v_total - ((s.v_total - s.v_blank_start) + s.v_blank_end); - hw_crtc_timing->v_front_porch = s.v_total + 1 - s.v_blank_start; - hw_crtc_timing->v_sync_width = s.v_sync_a_end - s.v_sync_a_start; - - return true; -} - - -void optc1_read_otg_state(struct optc *optc1, - struct dcn_otg_state *s) -{ - REG_GET(OTG_CONTROL, - OTG_MASTER_EN, &s->otg_enabled); - - REG_GET_2(OTG_V_BLANK_START_END, - OTG_V_BLANK_START, &s->v_blank_start, - OTG_V_BLANK_END, &s->v_blank_end); - - REG_GET(OTG_V_SYNC_A_CNTL, - OTG_V_SYNC_A_POL, &s->v_sync_a_pol); - - REG_GET(OTG_V_TOTAL, - OTG_V_TOTAL, &s->v_total); - - REG_GET(OTG_V_TOTAL_MAX, - OTG_V_TOTAL_MAX, &s->v_total_max); - - REG_GET(OTG_V_TOTAL_MIN, - OTG_V_TOTAL_MIN, &s->v_total_min); - - REG_GET(OTG_V_TOTAL_CONTROL, - OTG_V_TOTAL_MAX_SEL, &s->v_total_max_sel); - - REG_GET(OTG_V_TOTAL_CONTROL, - OTG_V_TOTAL_MIN_SEL, &s->v_total_min_sel); - - REG_GET_2(OTG_V_SYNC_A, - OTG_V_SYNC_A_START, &s->v_sync_a_start, - OTG_V_SYNC_A_END, &s->v_sync_a_end); - - REG_GET_2(OTG_H_BLANK_START_END, - OTG_H_BLANK_START, &s->h_blank_start, - OTG_H_BLANK_END, &s->h_blank_end); - - REG_GET_2(OTG_H_SYNC_A, - OTG_H_SYNC_A_START, &s->h_sync_a_start, - OTG_H_SYNC_A_END, &s->h_sync_a_end); - - REG_GET(OTG_H_SYNC_A_CNTL, - OTG_H_SYNC_A_POL, &s->h_sync_a_pol); - - REG_GET(OTG_H_TOTAL, - OTG_H_TOTAL, &s->h_total); - - REG_GET(OPTC_INPUT_GLOBAL_CONTROL, - OPTC_UNDERFLOW_OCCURRED_STATUS, &s->underflow_occurred_status); - - REG_GET(OTG_VERTICAL_INTERRUPT1_CONTROL, - OTG_VERTICAL_INTERRUPT1_INT_ENABLE, &s->vertical_interrupt1_en); - - REG_GET(OTG_VERTICAL_INTERRUPT1_POSITION, - OTG_VERTICAL_INTERRUPT1_LINE_START, &s->vertical_interrupt1_line); - - REG_GET(OTG_VERTICAL_INTERRUPT2_CONTROL, - OTG_VERTICAL_INTERRUPT2_INT_ENABLE, &s->vertical_interrupt2_en); - - REG_GET(OTG_VERTICAL_INTERRUPT2_POSITION, - OTG_VERTICAL_INTERRUPT2_LINE_START, &s->vertical_interrupt2_line); -} - -bool optc1_get_otg_active_size(struct timing_generator *optc, - uint32_t *otg_active_width, - uint32_t *otg_active_height) -{ - uint32_t otg_enabled; - uint32_t v_blank_start; - uint32_t v_blank_end; - uint32_t h_blank_start; - uint32_t h_blank_end; - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - - REG_GET(OTG_CONTROL, - OTG_MASTER_EN, &otg_enabled); - - if (otg_enabled == 0) - return false; - - REG_GET_2(OTG_V_BLANK_START_END, - OTG_V_BLANK_START, &v_blank_start, - OTG_V_BLANK_END, &v_blank_end); - - REG_GET_2(OTG_H_BLANK_START_END, - OTG_H_BLANK_START, &h_blank_start, - OTG_H_BLANK_END, &h_blank_end); - - *otg_active_width = v_blank_start - v_blank_end; - *otg_active_height = h_blank_start - h_blank_end; - return true; -} - -void optc1_clear_optc_underflow(struct timing_generator *optc) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_UPDATE(OPTC_INPUT_GLOBAL_CONTROL, OPTC_UNDERFLOW_CLEAR, 1); -} - -void optc1_tg_init(struct timing_generator *optc) -{ - optc1_set_blank_data_double_buffer(optc, true); - optc1_set_timing_double_buffer(optc, true); - optc1_clear_optc_underflow(optc); -} - -bool optc1_is_tg_enabled(struct timing_generator *optc) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - uint32_t otg_enabled = 0; - - REG_GET(OTG_CONTROL, OTG_MASTER_EN, &otg_enabled); - - return (otg_enabled != 0); - -} - -bool optc1_is_optc_underflow_occurred(struct timing_generator *optc) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - uint32_t underflow_occurred = 0; - - REG_GET(OPTC_INPUT_GLOBAL_CONTROL, - OPTC_UNDERFLOW_OCCURRED_STATUS, - &underflow_occurred); - - return (underflow_occurred == 1); -} - -bool optc1_configure_crc(struct timing_generator *optc, - const struct crc_params *params) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - /* Cannot configure crc on a CRTC that is disabled */ - if (!optc1_is_tg_enabled(optc)) - return false; - - REG_WRITE(OTG_CRC_CNTL, 0); - - if (!params->enable) - return true; - - /* Program frame boundaries */ - /* Window A x axis start and end. */ - REG_UPDATE_2(OTG_CRC0_WINDOWA_X_CONTROL, - OTG_CRC0_WINDOWA_X_START, params->windowa_x_start, - OTG_CRC0_WINDOWA_X_END, params->windowa_x_end); - - /* Window A y axis start and end. */ - REG_UPDATE_2(OTG_CRC0_WINDOWA_Y_CONTROL, - OTG_CRC0_WINDOWA_Y_START, params->windowa_y_start, - OTG_CRC0_WINDOWA_Y_END, params->windowa_y_end); - - /* Window B x axis start and end. */ - REG_UPDATE_2(OTG_CRC0_WINDOWB_X_CONTROL, - OTG_CRC0_WINDOWB_X_START, params->windowb_x_start, - OTG_CRC0_WINDOWB_X_END, params->windowb_x_end); - - /* Window B y axis start and end. */ - REG_UPDATE_2(OTG_CRC0_WINDOWB_Y_CONTROL, - OTG_CRC0_WINDOWB_Y_START, params->windowb_y_start, - OTG_CRC0_WINDOWB_Y_END, params->windowb_y_end); - - /* Set crc mode and selection, and enable. Only using CRC0*/ - REG_UPDATE_3(OTG_CRC_CNTL, - OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0, - OTG_CRC0_SELECT, params->selection, - OTG_CRC_EN, 1); - - return true; -} - -/** - * optc1_get_crc - Capture CRC result per component - * - * @optc: timing_generator instance. - * @r_cr: 16-bit primary CRC signature for red data. - * @g_y: 16-bit primary CRC signature for green data. - * @b_cb: 16-bit primary CRC signature for blue data. - * - * This function reads the CRC signature from the OPTC registers. Notice that - * we have three registers to keep the CRC result per color component (RGB). - * - * Returns: - * If CRC is disabled, return false; otherwise, return true, and the CRC - * results in the parameters. - */ -bool optc1_get_crc(struct timing_generator *optc, - uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb) -{ - uint32_t field = 0; - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_GET(OTG_CRC_CNTL, OTG_CRC_EN, &field); - - /* Early return if CRC is not enabled for this CRTC */ - if (!field) - return false; - - /* OTG_CRC0_DATA_RG has the CRC16 results for the red and green component */ - REG_GET_2(OTG_CRC0_DATA_RG, - CRC0_R_CR, r_cr, - CRC0_G_Y, g_y); - - /* OTG_CRC0_DATA_B has the CRC16 results for the blue component */ - REG_GET(OTG_CRC0_DATA_B, - CRC0_B_CB, b_cb); - - return true; -} - -static const struct timing_generator_funcs dcn10_tg_funcs = { - .validate_timing = optc1_validate_timing, - .program_timing = optc1_program_timing, - .setup_vertical_interrupt0 = optc1_setup_vertical_interrupt0, - .setup_vertical_interrupt1 = optc1_setup_vertical_interrupt1, - .setup_vertical_interrupt2 = optc1_setup_vertical_interrupt2, - .program_global_sync = optc1_program_global_sync, - .enable_crtc = optc1_enable_crtc, - .disable_crtc = optc1_disable_crtc, - /* used by enable_timing_synchronization. Not need for FPGA */ - .is_counter_moving = optc1_is_counter_moving, - .get_position = optc1_get_position, - .get_frame_count = optc1_get_vblank_counter, - .get_scanoutpos = optc1_get_crtc_scanoutpos, - .get_otg_active_size = optc1_get_otg_active_size, - .set_early_control = optc1_set_early_control, - /* used by enable_timing_synchronization. Not need for FPGA */ - .wait_for_state = optc1_wait_for_state, - .set_blank = optc1_set_blank, - .is_blanked = optc1_is_blanked, - .set_blank_color = optc1_program_blank_color, - .did_triggered_reset_occur = optc1_did_triggered_reset_occur, - .enable_reset_trigger = optc1_enable_reset_trigger, - .enable_crtc_reset = optc1_enable_crtc_reset, - .disable_reset_trigger = optc1_disable_reset_trigger, - .lock = optc1_lock, - .unlock = optc1_unlock, - .enable_optc_clock = optc1_enable_optc_clock, - .set_drr = optc1_set_drr, - .get_last_used_drr_vtotal = NULL, - .set_vtotal_min_max = optc1_set_vtotal_min_max, - .set_static_screen_control = optc1_set_static_screen_control, - .set_test_pattern = optc1_set_test_pattern, - .program_stereo = optc1_program_stereo, - .is_stereo_left_eye = optc1_is_stereo_left_eye, - .set_blank_data_double_buffer = optc1_set_blank_data_double_buffer, - .tg_init = optc1_tg_init, - .is_tg_enabled = optc1_is_tg_enabled, - .is_optc_underflow_occurred = optc1_is_optc_underflow_occurred, - .clear_optc_underflow = optc1_clear_optc_underflow, - .get_crc = optc1_get_crc, - .configure_crc = optc1_configure_crc, - .set_vtg_params = optc1_set_vtg_params, - .program_manual_trigger = optc1_program_manual_trigger, - .setup_manual_trigger = optc1_setup_manual_trigger, - .get_hw_timing = optc1_get_hw_timing, -}; - -void dcn10_timing_generator_init(struct optc *optc1) -{ - optc1->base.funcs = &dcn10_tg_funcs; - - optc1->max_h_total = optc1->tg_mask->OTG_H_TOTAL + 1; - optc1->max_v_total = optc1->tg_mask->OTG_V_TOTAL + 1; - - optc1->min_h_blank = 32; - optc1->min_v_blank = 3; - optc1->min_v_blank_interlace = 5; - optc1->min_h_sync_width = 4; - optc1->min_v_sync_width = 1; -} - -/* "Containter" vs. "pixel" is a concept within HW blocks, mostly those closer to the back-end. It works like this: - * - * - In most of the formats (RGB or YCbCr 4:4:4, 4:2:2 uncompressed and DSC 4:2:2 Simple) pixel rate is the same as - * containter rate. - * - * - In 4:2:0 (DSC or uncompressed) there are two pixels per container, hence the target container rate has to be - * halved to maintain the correct pixel rate. - * - * - Unlike 4:2:2 uncompressed, DSC 4:2:2 Native also has two pixels per container (this happens when DSC is applied - * to it) and has to be treated the same as 4:2:0, i.e. target containter rate has to be halved in this case as well. - * - */ -bool optc1_is_two_pixels_per_containter(const struct dc_crtc_timing *timing) -{ - bool two_pix = timing->pixel_encoding == PIXEL_ENCODING_YCBCR420; - - two_pix = two_pix || (timing->flags.DSC && timing->pixel_encoding == PIXEL_ENCODING_YCBCR422 - && !timing->dsc_cfg.ycbcr422_simple); - return two_pix; -} - diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h deleted file mode 100644 index ab81594a7fad..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h +++ /dev/null @@ -1,599 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DC_TIMING_GENERATOR_DCN10_H__ -#define __DC_TIMING_GENERATOR_DCN10_H__ - -#include "optc.h" - -#define DCN10TG_FROM_TG(tg)\ - container_of(tg, struct optc, base) - -#define TG_COMMON_REG_LIST_DCN(inst) \ - SRI(OTG_VSTARTUP_PARAM, OTG, inst),\ - SRI(OTG_VUPDATE_PARAM, OTG, inst),\ - SRI(OTG_VREADY_PARAM, OTG, inst),\ - SRI(OTG_BLANK_CONTROL, OTG, inst),\ - SRI(OTG_MASTER_UPDATE_LOCK, OTG, inst),\ - SRI(OTG_GLOBAL_CONTROL0, OTG, inst),\ - SRI(OTG_DOUBLE_BUFFER_CONTROL, OTG, inst),\ - SRI(OTG_H_TOTAL, OTG, inst),\ - SRI(OTG_H_BLANK_START_END, OTG, inst),\ - SRI(OTG_H_SYNC_A, OTG, inst),\ - SRI(OTG_H_SYNC_A_CNTL, OTG, inst),\ - SRI(OTG_H_TIMING_CNTL, OTG, inst),\ - SRI(OTG_V_TOTAL, OTG, inst),\ - SRI(OTG_V_BLANK_START_END, OTG, inst),\ - SRI(OTG_V_SYNC_A, OTG, inst),\ - SRI(OTG_V_SYNC_A_CNTL, OTG, inst),\ - SRI(OTG_INTERLACE_CONTROL, OTG, inst),\ - SRI(OTG_CONTROL, OTG, inst),\ - SRI(OTG_STEREO_CONTROL, OTG, inst),\ - SRI(OTG_3D_STRUCTURE_CONTROL, OTG, inst),\ - SRI(OTG_STEREO_STATUS, OTG, inst),\ - SRI(OTG_V_TOTAL_MAX, OTG, inst),\ - SRI(OTG_V_TOTAL_MID, OTG, inst),\ - SRI(OTG_V_TOTAL_MIN, OTG, inst),\ - SRI(OTG_V_TOTAL_CONTROL, OTG, inst),\ - SRI(OTG_TRIGA_CNTL, OTG, inst),\ - SRI(OTG_FORCE_COUNT_NOW_CNTL, OTG, inst),\ - SRI(OTG_STATIC_SCREEN_CONTROL, OTG, inst),\ - SRI(OTG_STATUS_FRAME_COUNT, OTG, inst),\ - SRI(OTG_STATUS, OTG, inst),\ - SRI(OTG_STATUS_POSITION, OTG, inst),\ - SRI(OTG_NOM_VERT_POSITION, OTG, inst),\ - SRI(OTG_BLACK_COLOR, OTG, inst),\ - SRI(OTG_CLOCK_CONTROL, OTG, inst),\ - SRI(OTG_VERTICAL_INTERRUPT0_CONTROL, OTG, inst),\ - SRI(OTG_VERTICAL_INTERRUPT0_POSITION, OTG, inst),\ - SRI(OTG_VERTICAL_INTERRUPT1_CONTROL, OTG, inst),\ - SRI(OTG_VERTICAL_INTERRUPT1_POSITION, OTG, inst),\ - SRI(OTG_VERTICAL_INTERRUPT2_CONTROL, OTG, inst),\ - SRI(OTG_VERTICAL_INTERRUPT2_POSITION, OTG, inst),\ - SRI(OPTC_INPUT_CLOCK_CONTROL, ODM, inst),\ - SRI(OPTC_DATA_SOURCE_SELECT, ODM, inst),\ - SRI(OPTC_INPUT_GLOBAL_CONTROL, ODM, inst),\ - SRI(CONTROL, VTG, inst),\ - SRI(OTG_VERT_SYNC_CONTROL, OTG, inst),\ - SRI(OTG_MASTER_UPDATE_MODE, OTG, inst),\ - SRI(OTG_GSL_CONTROL, OTG, inst),\ - SRI(OTG_CRC_CNTL, OTG, inst),\ - SRI(OTG_CRC0_DATA_RG, OTG, inst),\ - SRI(OTG_CRC0_DATA_B, OTG, inst),\ - SRI(OTG_CRC0_WINDOWA_X_CONTROL, OTG, inst),\ - SRI(OTG_CRC0_WINDOWA_Y_CONTROL, OTG, inst),\ - SRI(OTG_CRC0_WINDOWB_X_CONTROL, OTG, inst),\ - SRI(OTG_CRC0_WINDOWB_Y_CONTROL, OTG, inst),\ - SR(GSL_SOURCE_SELECT),\ - SRI(OTG_GLOBAL_CONTROL2, OTG, inst),\ - SRI(OTG_TRIGA_MANUAL_TRIG, OTG, inst) - -#define TG_COMMON_REG_LIST_DCN1_0(inst) \ - TG_COMMON_REG_LIST_DCN(inst),\ - SRI(OTG_TEST_PATTERN_PARAMETERS, OTG, inst),\ - SRI(OTG_TEST_PATTERN_CONTROL, OTG, inst),\ - SRI(OTG_TEST_PATTERN_COLOR, OTG, inst),\ - SRI(OTG_MANUAL_FLOW_CONTROL, OTG, inst) - - -struct dcn_optc_registers { - uint32_t OTG_GLOBAL_CONTROL1; - uint32_t OTG_GLOBAL_CONTROL2; - uint32_t OTG_VERT_SYNC_CONTROL; - uint32_t OTG_MASTER_UPDATE_MODE; - uint32_t OTG_GSL_CONTROL; - uint32_t OTG_VSTARTUP_PARAM; - uint32_t OTG_VUPDATE_PARAM; - uint32_t OTG_VREADY_PARAM; - uint32_t OTG_BLANK_CONTROL; - uint32_t OTG_MASTER_UPDATE_LOCK; - uint32_t OTG_GLOBAL_CONTROL0; - uint32_t OTG_DOUBLE_BUFFER_CONTROL; - uint32_t OTG_H_TOTAL; - uint32_t OTG_H_BLANK_START_END; - uint32_t OTG_H_SYNC_A; - uint32_t OTG_H_SYNC_A_CNTL; - uint32_t OTG_H_TIMING_CNTL; - uint32_t OTG_V_TOTAL; - uint32_t OTG_V_BLANK_START_END; - uint32_t OTG_V_SYNC_A; - uint32_t OTG_V_SYNC_A_CNTL; - uint32_t OTG_INTERLACE_CONTROL; - uint32_t OTG_CONTROL; - uint32_t OTG_STEREO_CONTROL; - uint32_t OTG_3D_STRUCTURE_CONTROL; - uint32_t OTG_STEREO_STATUS; - uint32_t OTG_V_TOTAL_MAX; - uint32_t OTG_V_TOTAL_MID; - uint32_t OTG_V_TOTAL_MIN; - uint32_t OTG_V_TOTAL_CONTROL; - uint32_t OTG_TRIGA_CNTL; - uint32_t OTG_TRIGA_MANUAL_TRIG; - uint32_t OTG_MANUAL_FLOW_CONTROL; - uint32_t OTG_FORCE_COUNT_NOW_CNTL; - uint32_t OTG_STATIC_SCREEN_CONTROL; - uint32_t OTG_STATUS_FRAME_COUNT; - uint32_t OTG_STATUS; - uint32_t OTG_STATUS_POSITION; - uint32_t OTG_NOM_VERT_POSITION; - uint32_t OTG_BLACK_COLOR; - uint32_t OTG_TEST_PATTERN_PARAMETERS; - uint32_t OTG_TEST_PATTERN_CONTROL; - uint32_t OTG_TEST_PATTERN_COLOR; - uint32_t OTG_CLOCK_CONTROL; - uint32_t OTG_VERTICAL_INTERRUPT0_CONTROL; - uint32_t OTG_VERTICAL_INTERRUPT0_POSITION; - uint32_t OTG_VERTICAL_INTERRUPT1_CONTROL; - uint32_t OTG_VERTICAL_INTERRUPT1_POSITION; - uint32_t OTG_VERTICAL_INTERRUPT2_CONTROL; - uint32_t OTG_VERTICAL_INTERRUPT2_POSITION; - uint32_t OPTC_INPUT_CLOCK_CONTROL; - uint32_t OPTC_DATA_SOURCE_SELECT; - uint32_t OPTC_MEMORY_CONFIG; - uint32_t OPTC_INPUT_GLOBAL_CONTROL; - uint32_t CONTROL; - uint32_t OTG_GSL_WINDOW_X; - uint32_t OTG_GSL_WINDOW_Y; - uint32_t OTG_VUPDATE_KEEPOUT; - uint32_t OTG_CRC_CNTL; - uint32_t OTG_CRC_CNTL2; - uint32_t OTG_CRC0_DATA_RG; - uint32_t OTG_CRC0_DATA_B; - uint32_t OTG_CRC1_DATA_B; - uint32_t OTG_CRC2_DATA_B; - uint32_t OTG_CRC3_DATA_B; - uint32_t OTG_CRC1_DATA_RG; - uint32_t OTG_CRC2_DATA_RG; - uint32_t OTG_CRC3_DATA_RG; - uint32_t OTG_CRC0_WINDOWA_X_CONTROL; - uint32_t OTG_CRC0_WINDOWA_Y_CONTROL; - uint32_t OTG_CRC0_WINDOWB_X_CONTROL; - uint32_t OTG_CRC0_WINDOWB_Y_CONTROL; - uint32_t OTG_CRC1_WINDOWA_X_CONTROL; - uint32_t OTG_CRC1_WINDOWA_Y_CONTROL; - uint32_t OTG_CRC1_WINDOWB_X_CONTROL; - uint32_t OTG_CRC1_WINDOWB_Y_CONTROL; - uint32_t GSL_SOURCE_SELECT; - uint32_t DWB_SOURCE_SELECT; - uint32_t OTG_DSC_START_POSITION; - uint32_t OPTC_DATA_FORMAT_CONTROL; - uint32_t OPTC_BYTES_PER_PIXEL; - uint32_t OPTC_WIDTH_CONTROL; - uint32_t OTG_DRR_CONTROL; - uint32_t OTG_BLANK_DATA_COLOR; - uint32_t OTG_BLANK_DATA_COLOR_EXT; - uint32_t OTG_DRR_TRIGGER_WINDOW; - uint32_t OTG_M_CONST_DTO0; - uint32_t OTG_M_CONST_DTO1; - uint32_t OTG_DRR_V_TOTAL_CHANGE; - uint32_t OTG_GLOBAL_CONTROL4; - uint32_t OTG_CRC0_WINDOWA_X_CONTROL_READBACK; - uint32_t OTG_CRC0_WINDOWA_Y_CONTROL_READBACK; - uint32_t OTG_CRC0_WINDOWB_X_CONTROL_READBACK; - uint32_t OTG_CRC0_WINDOWB_Y_CONTROL_READBACK; - uint32_t OTG_CRC1_WINDOWA_X_CONTROL_READBACK; - uint32_t OTG_CRC1_WINDOWA_Y_CONTROL_READBACK; - uint32_t OTG_CRC1_WINDOWB_X_CONTROL_READBACK; - uint32_t OTG_CRC1_WINDOWB_Y_CONTROL_READBACK; - uint32_t OPTC_CLOCK_CONTROL; -}; - -#define TG_COMMON_MASK_SH_LIST_DCN(mask_sh)\ - SF(OTG0_OTG_VSTARTUP_PARAM, VSTARTUP_START, mask_sh),\ - SF(OTG0_OTG_VUPDATE_PARAM, VUPDATE_OFFSET, mask_sh),\ - SF(OTG0_OTG_VUPDATE_PARAM, VUPDATE_WIDTH, mask_sh),\ - SF(OTG0_OTG_VREADY_PARAM, VREADY_OFFSET, mask_sh),\ - SF(OTG0_OTG_BLANK_CONTROL, OTG_BLANK_DATA_EN, mask_sh),\ - SF(OTG0_OTG_BLANK_CONTROL, OTG_BLANK_DE_MODE, mask_sh),\ - SF(OTG0_OTG_BLANK_CONTROL, OTG_CURRENT_BLANK_STATE, mask_sh),\ - SF(OTG0_OTG_MASTER_UPDATE_LOCK, OTG_MASTER_UPDATE_LOCK, mask_sh),\ - SF(OTG0_OTG_MASTER_UPDATE_LOCK, UPDATE_LOCK_STATUS, mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL0, OTG_MASTER_UPDATE_LOCK_SEL, mask_sh),\ - SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_UPDATE_PENDING, mask_sh),\ - SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_BLANK_DATA_DOUBLE_BUFFER_EN, mask_sh),\ - SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_RANGE_TIMING_DBUF_UPDATE_MODE, mask_sh),\ - SF(OTG0_OTG_VUPDATE_KEEPOUT, OTG_MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_EN, mask_sh), \ - SF(OTG0_OTG_VUPDATE_KEEPOUT, MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_START_OFFSET, mask_sh), \ - SF(OTG0_OTG_VUPDATE_KEEPOUT, MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_END_OFFSET, mask_sh), \ - SF(OTG0_OTG_H_TOTAL, OTG_H_TOTAL, mask_sh),\ - SF(OTG0_OTG_H_BLANK_START_END, OTG_H_BLANK_START, mask_sh),\ - SF(OTG0_OTG_H_BLANK_START_END, OTG_H_BLANK_END, mask_sh),\ - SF(OTG0_OTG_H_SYNC_A, OTG_H_SYNC_A_START, mask_sh),\ - SF(OTG0_OTG_H_SYNC_A, OTG_H_SYNC_A_END, mask_sh),\ - SF(OTG0_OTG_H_SYNC_A_CNTL, OTG_H_SYNC_A_POL, mask_sh),\ - SF(OTG0_OTG_H_TIMING_CNTL, OTG_H_TIMING_DIV_BY2, mask_sh),\ - SF(OTG0_OTG_V_TOTAL, OTG_V_TOTAL, mask_sh),\ - SF(OTG0_OTG_V_BLANK_START_END, OTG_V_BLANK_START, mask_sh),\ - SF(OTG0_OTG_V_BLANK_START_END, OTG_V_BLANK_END, mask_sh),\ - SF(OTG0_OTG_V_SYNC_A, OTG_V_SYNC_A_START, mask_sh),\ - SF(OTG0_OTG_V_SYNC_A, OTG_V_SYNC_A_END, mask_sh),\ - SF(OTG0_OTG_V_SYNC_A_CNTL, OTG_V_SYNC_A_POL, mask_sh),\ - SF(OTG0_OTG_INTERLACE_CONTROL, OTG_INTERLACE_ENABLE, mask_sh),\ - SF(OTG0_OTG_CONTROL, OTG_MASTER_EN, mask_sh),\ - SF(OTG0_OTG_CONTROL, OTG_START_POINT_CNTL, mask_sh),\ - SF(OTG0_OTG_CONTROL, OTG_DISABLE_POINT_CNTL, mask_sh),\ - SF(OTG0_OTG_CONTROL, OTG_FIELD_NUMBER_CNTL, mask_sh),\ - SF(OTG0_OTG_CONTROL, OTG_CURRENT_MASTER_EN_STATE, mask_sh),\ - SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_EN, mask_sh),\ - SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_SYNC_OUTPUT_LINE_NUM, mask_sh),\ - SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_SYNC_OUTPUT_POLARITY, mask_sh),\ - SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_EYE_FLAG_POLARITY, mask_sh),\ - SF(OTG0_OTG_STEREO_CONTROL, OTG_DISABLE_STEREOSYNC_OUTPUT_FOR_DP, mask_sh),\ - SF(OTG0_OTG_STEREO_CONTROL, OTG_DISABLE_STEREOSYNC_OUTPUT_FOR_DP, mask_sh),\ - SF(OTG0_OTG_STEREO_STATUS, OTG_STEREO_CURRENT_EYE, mask_sh),\ - SF(OTG0_OTG_3D_STRUCTURE_CONTROL, OTG_3D_STRUCTURE_EN, mask_sh),\ - SF(OTG0_OTG_3D_STRUCTURE_CONTROL, OTG_3D_STRUCTURE_V_UPDATE_MODE, mask_sh),\ - SF(OTG0_OTG_3D_STRUCTURE_CONTROL, OTG_3D_STRUCTURE_STEREO_SEL_OVR, mask_sh),\ - SF(OTG0_OTG_V_TOTAL_MAX, OTG_V_TOTAL_MAX, mask_sh),\ - SF(OTG0_OTG_V_TOTAL_MID, OTG_V_TOTAL_MID, mask_sh),\ - SF(OTG0_OTG_V_TOTAL_MIN, OTG_V_TOTAL_MIN, mask_sh),\ - SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_V_TOTAL_MIN_SEL, mask_sh),\ - SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_V_TOTAL_MAX_SEL, mask_sh),\ - SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_FORCE_LOCK_ON_EVENT, mask_sh),\ - SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_SET_V_TOTAL_MIN_MASK_EN, mask_sh),\ - SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_SET_V_TOTAL_MIN_MASK, mask_sh),\ - SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_VTOTAL_MID_REPLACING_MAX_EN, mask_sh),\ - SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_VTOTAL_MID_FRAME_NUM, mask_sh),\ - SF(OTG0_OTG_FORCE_COUNT_NOW_CNTL, OTG_FORCE_COUNT_NOW_CLEAR, mask_sh),\ - SF(OTG0_OTG_FORCE_COUNT_NOW_CNTL, OTG_FORCE_COUNT_NOW_MODE, mask_sh),\ - SF(OTG0_OTG_FORCE_COUNT_NOW_CNTL, OTG_FORCE_COUNT_NOW_OCCURRED, mask_sh),\ - SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_SOURCE_SELECT, mask_sh),\ - SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_SOURCE_PIPE_SELECT, mask_sh),\ - SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_RISING_EDGE_DETECT_CNTL, mask_sh),\ - SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_FALLING_EDGE_DETECT_CNTL, mask_sh),\ - SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_POLARITY_SELECT, mask_sh),\ - SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_FREQUENCY_SELECT, mask_sh),\ - SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_DELAY, mask_sh),\ - SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_CLEAR, mask_sh),\ - SF(OTG0_OTG_TRIGA_MANUAL_TRIG, OTG_TRIGA_MANUAL_TRIG, mask_sh),\ - SF(OTG0_OTG_STATIC_SCREEN_CONTROL, OTG_STATIC_SCREEN_EVENT_MASK, mask_sh),\ - SF(OTG0_OTG_STATIC_SCREEN_CONTROL, OTG_STATIC_SCREEN_FRAME_COUNT, mask_sh),\ - SF(OTG0_OTG_STATUS_FRAME_COUNT, OTG_FRAME_COUNT, mask_sh),\ - SF(OTG0_OTG_STATUS, OTG_V_BLANK, mask_sh),\ - SF(OTG0_OTG_STATUS, OTG_V_ACTIVE_DISP, mask_sh),\ - SF(OTG0_OTG_STATUS_POSITION, OTG_HORZ_COUNT, mask_sh),\ - SF(OTG0_OTG_STATUS_POSITION, OTG_VERT_COUNT, mask_sh),\ - SF(OTG0_OTG_NOM_VERT_POSITION, OTG_VERT_COUNT_NOM, mask_sh),\ - SF(OTG0_OTG_BLACK_COLOR, OTG_BLACK_COLOR_B_CB, mask_sh),\ - SF(OTG0_OTG_BLACK_COLOR, OTG_BLACK_COLOR_G_Y, mask_sh),\ - SF(OTG0_OTG_BLACK_COLOR, OTG_BLACK_COLOR_R_CR, mask_sh),\ - SF(OTG0_OTG_CLOCK_CONTROL, OTG_BUSY, mask_sh),\ - SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_EN, mask_sh),\ - SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_ON, mask_sh),\ - SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_GATE_DIS, mask_sh),\ - SF(OTG0_OTG_VERTICAL_INTERRUPT0_CONTROL, OTG_VERTICAL_INTERRUPT0_INT_ENABLE, mask_sh),\ - SF(OTG0_OTG_VERTICAL_INTERRUPT0_POSITION, OTG_VERTICAL_INTERRUPT0_LINE_START, mask_sh),\ - SF(OTG0_OTG_VERTICAL_INTERRUPT0_POSITION, OTG_VERTICAL_INTERRUPT0_LINE_END, mask_sh),\ - SF(OTG0_OTG_VERTICAL_INTERRUPT1_CONTROL, OTG_VERTICAL_INTERRUPT1_INT_ENABLE, mask_sh),\ - SF(OTG0_OTG_VERTICAL_INTERRUPT1_POSITION, OTG_VERTICAL_INTERRUPT1_LINE_START, mask_sh),\ - SF(OTG0_OTG_VERTICAL_INTERRUPT2_CONTROL, OTG_VERTICAL_INTERRUPT2_INT_ENABLE, mask_sh),\ - SF(OTG0_OTG_VERTICAL_INTERRUPT2_POSITION, OTG_VERTICAL_INTERRUPT2_LINE_START, mask_sh),\ - SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_EN, mask_sh),\ - SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_ON, mask_sh),\ - SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_GATE_DIS, mask_sh),\ - SF(ODM0_OPTC_INPUT_GLOBAL_CONTROL, OPTC_UNDERFLOW_OCCURRED_STATUS, mask_sh),\ - SF(ODM0_OPTC_INPUT_GLOBAL_CONTROL, OPTC_UNDERFLOW_CLEAR, mask_sh),\ - SF(VTG0_CONTROL, VTG0_ENABLE, mask_sh),\ - SF(VTG0_CONTROL, VTG0_FP2, mask_sh),\ - SF(VTG0_CONTROL, VTG0_VCOUNT_INIT, mask_sh),\ - SF(OTG0_OTG_VERT_SYNC_CONTROL, OTG_FORCE_VSYNC_NEXT_LINE_OCCURRED, mask_sh),\ - SF(OTG0_OTG_VERT_SYNC_CONTROL, OTG_FORCE_VSYNC_NEXT_LINE_CLEAR, mask_sh),\ - SF(OTG0_OTG_VERT_SYNC_CONTROL, OTG_AUTO_FORCE_VSYNC_MODE, mask_sh),\ - SF(OTG0_OTG_MASTER_UPDATE_MODE, MASTER_UPDATE_INTERLACED_MODE, mask_sh),\ - SF(OTG0_OTG_GSL_CONTROL, OTG_GSL0_EN, mask_sh),\ - SF(OTG0_OTG_GSL_CONTROL, OTG_GSL1_EN, mask_sh),\ - SF(OTG0_OTG_GSL_CONTROL, OTG_GSL2_EN, mask_sh),\ - SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_MASTER_EN, mask_sh),\ - SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_FORCE_DELAY, mask_sh),\ - SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_CHECK_ALL_FIELDS, mask_sh),\ - SF(OTG0_OTG_CRC_CNTL, OTG_CRC_CONT_EN, mask_sh),\ - SF(OTG0_OTG_CRC_CNTL, OTG_CRC0_SELECT, mask_sh),\ - SF(OTG0_OTG_CRC_CNTL, OTG_CRC_EN, mask_sh),\ - SF(OTG0_OTG_CRC0_DATA_RG, CRC0_R_CR, mask_sh),\ - SF(OTG0_OTG_CRC0_DATA_RG, CRC0_G_Y, mask_sh),\ - SF(OTG0_OTG_CRC0_DATA_B, CRC0_B_CB, mask_sh),\ - SF(OTG0_OTG_CRC0_WINDOWA_X_CONTROL, OTG_CRC0_WINDOWA_X_START, mask_sh),\ - SF(OTG0_OTG_CRC0_WINDOWA_X_CONTROL, OTG_CRC0_WINDOWA_X_END, mask_sh),\ - SF(OTG0_OTG_CRC0_WINDOWA_Y_CONTROL, OTG_CRC0_WINDOWA_Y_START, mask_sh),\ - SF(OTG0_OTG_CRC0_WINDOWA_Y_CONTROL, OTG_CRC0_WINDOWA_Y_END, mask_sh),\ - SF(OTG0_OTG_CRC0_WINDOWB_X_CONTROL, OTG_CRC0_WINDOWB_X_START, mask_sh),\ - SF(OTG0_OTG_CRC0_WINDOWB_X_CONTROL, OTG_CRC0_WINDOWB_X_END, mask_sh),\ - SF(OTG0_OTG_CRC0_WINDOWB_Y_CONTROL, OTG_CRC0_WINDOWB_Y_START, mask_sh),\ - SF(OTG0_OTG_CRC0_WINDOWB_Y_CONTROL, OTG_CRC0_WINDOWB_Y_END, mask_sh),\ - SF(GSL_SOURCE_SELECT, GSL0_READY_SOURCE_SEL, mask_sh),\ - SF(GSL_SOURCE_SELECT, GSL1_READY_SOURCE_SEL, mask_sh),\ - SF(GSL_SOURCE_SELECT, GSL2_READY_SOURCE_SEL, mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL2, MANUAL_FLOW_CONTROL_SEL, mask_sh) - - - -#define TG_COMMON_MASK_SH_LIST_DCN1_0(mask_sh)\ - TG_COMMON_MASK_SH_LIST_DCN(mask_sh),\ - SF(OTG0_OTG_TEST_PATTERN_PARAMETERS, OTG_TEST_PATTERN_INC0, mask_sh),\ - SF(OTG0_OTG_TEST_PATTERN_PARAMETERS, OTG_TEST_PATTERN_INC1, mask_sh),\ - SF(OTG0_OTG_TEST_PATTERN_PARAMETERS, OTG_TEST_PATTERN_VRES, mask_sh),\ - SF(OTG0_OTG_TEST_PATTERN_PARAMETERS, OTG_TEST_PATTERN_HRES, mask_sh),\ - SF(OTG0_OTG_TEST_PATTERN_PARAMETERS, OTG_TEST_PATTERN_RAMP0_OFFSET, mask_sh),\ - SF(OTG0_OTG_TEST_PATTERN_CONTROL, OTG_TEST_PATTERN_EN, mask_sh),\ - SF(OTG0_OTG_TEST_PATTERN_CONTROL, OTG_TEST_PATTERN_MODE, mask_sh),\ - SF(OTG0_OTG_TEST_PATTERN_CONTROL, OTG_TEST_PATTERN_DYNAMIC_RANGE, mask_sh),\ - SF(OTG0_OTG_TEST_PATTERN_CONTROL, OTG_TEST_PATTERN_COLOR_FORMAT, mask_sh),\ - SF(OTG0_OTG_TEST_PATTERN_COLOR, OTG_TEST_PATTERN_MASK, mask_sh),\ - SF(OTG0_OTG_TEST_PATTERN_COLOR, OTG_TEST_PATTERN_DATA, mask_sh),\ - SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SRC_SEL, mask_sh),\ - SF(OTG0_OTG_MANUAL_FLOW_CONTROL, MANUAL_FLOW_CONTROL, mask_sh),\ - -#define TG_REG_FIELD_LIST_DCN1_0(type) \ - type VSTARTUP_START;\ - type VUPDATE_OFFSET;\ - type VUPDATE_WIDTH;\ - type VREADY_OFFSET;\ - type OTG_BLANK_DATA_EN;\ - type OTG_BLANK_DE_MODE;\ - type OTG_CURRENT_BLANK_STATE;\ - type OTG_MASTER_UPDATE_LOCK;\ - type UPDATE_LOCK_STATUS;\ - type OTG_UPDATE_PENDING;\ - type OTG_MASTER_UPDATE_LOCK_SEL;\ - type OTG_BLANK_DATA_DOUBLE_BUFFER_EN;\ - type OTG_H_TOTAL;\ - type OTG_H_BLANK_START;\ - type OTG_H_BLANK_END;\ - type OTG_H_SYNC_A_START;\ - type OTG_H_SYNC_A_END;\ - type OTG_H_SYNC_A_POL;\ - type OTG_H_TIMING_DIV_BY2;\ - type OTG_V_TOTAL;\ - type OTG_V_BLANK_START;\ - type OTG_V_BLANK_END;\ - type OTG_V_SYNC_A_START;\ - type OTG_V_SYNC_A_END;\ - type OTG_V_SYNC_A_POL;\ - type OTG_INTERLACE_ENABLE;\ - type OTG_MASTER_EN;\ - type OTG_START_POINT_CNTL;\ - type OTG_DISABLE_POINT_CNTL;\ - type OTG_FIELD_NUMBER_CNTL;\ - type OTG_CURRENT_MASTER_EN_STATE;\ - type OTG_STEREO_EN;\ - type OTG_STEREO_SYNC_OUTPUT_LINE_NUM;\ - type OTG_STEREO_SYNC_OUTPUT_POLARITY;\ - type OTG_STEREO_EYE_FLAG_POLARITY;\ - type OTG_STEREO_CURRENT_EYE;\ - type OTG_DISABLE_STEREOSYNC_OUTPUT_FOR_DP;\ - type OTG_3D_STRUCTURE_EN;\ - type OTG_3D_STRUCTURE_V_UPDATE_MODE;\ - type OTG_3D_STRUCTURE_STEREO_SEL_OVR;\ - type OTG_V_TOTAL_MAX;\ - type OTG_V_TOTAL_MID;\ - type OTG_V_TOTAL_MIN;\ - type OTG_V_TOTAL_MIN_SEL;\ - type OTG_V_TOTAL_MAX_SEL;\ - type OTG_VTOTAL_MID_REPLACING_MAX_EN;\ - type OTG_VTOTAL_MID_FRAME_NUM;\ - type OTG_FORCE_LOCK_ON_EVENT;\ - type OTG_SET_V_TOTAL_MIN_MASK_EN;\ - type OTG_SET_V_TOTAL_MIN_MASK;\ - type OTG_FORCE_COUNT_NOW_CLEAR;\ - type OTG_FORCE_COUNT_NOW_MODE;\ - type OTG_FORCE_COUNT_NOW_OCCURRED;\ - type OTG_TRIGA_SOURCE_SELECT;\ - type OTG_TRIGA_SOURCE_PIPE_SELECT;\ - type OTG_TRIGA_RISING_EDGE_DETECT_CNTL;\ - type OTG_TRIGA_FALLING_EDGE_DETECT_CNTL;\ - type OTG_TRIGA_POLARITY_SELECT;\ - type OTG_TRIGA_FREQUENCY_SELECT;\ - type OTG_TRIGA_DELAY;\ - type OTG_TRIGA_CLEAR;\ - type OTG_TRIGA_MANUAL_TRIG;\ - type OTG_STATIC_SCREEN_EVENT_MASK;\ - type OTG_STATIC_SCREEN_FRAME_COUNT;\ - type OTG_FRAME_COUNT;\ - type OTG_V_BLANK;\ - type OTG_V_ACTIVE_DISP;\ - type OTG_HORZ_COUNT;\ - type OTG_VERT_COUNT;\ - type OTG_VERT_COUNT_NOM;\ - type OTG_BLACK_COLOR_B_CB;\ - type OTG_BLACK_COLOR_G_Y;\ - type OTG_BLACK_COLOR_R_CR;\ - type OTG_BLANK_DATA_COLOR_BLUE_CB;\ - type OTG_BLANK_DATA_COLOR_GREEN_Y;\ - type OTG_BLANK_DATA_COLOR_RED_CR;\ - type OTG_BLANK_DATA_COLOR_BLUE_CB_EXT;\ - type OTG_BLANK_DATA_COLOR_GREEN_Y_EXT;\ - type OTG_BLANK_DATA_COLOR_RED_CR_EXT;\ - type OTG_VTOTAL_MID_REPLACING_MIN_EN;\ - type OTG_TEST_PATTERN_INC0;\ - type OTG_TEST_PATTERN_INC1;\ - type OTG_TEST_PATTERN_VRES;\ - type OTG_TEST_PATTERN_HRES;\ - type OTG_TEST_PATTERN_RAMP0_OFFSET;\ - type OTG_TEST_PATTERN_EN;\ - type OTG_TEST_PATTERN_MODE;\ - type OTG_TEST_PATTERN_DYNAMIC_RANGE;\ - type OTG_TEST_PATTERN_COLOR_FORMAT;\ - type OTG_TEST_PATTERN_MASK;\ - type OTG_TEST_PATTERN_DATA;\ - type OTG_BUSY;\ - type OTG_CLOCK_EN;\ - type OTG_CLOCK_ON;\ - type OTG_CLOCK_GATE_DIS;\ - type OTG_VERTICAL_INTERRUPT0_INT_ENABLE;\ - type OTG_VERTICAL_INTERRUPT0_LINE_START;\ - type OTG_VERTICAL_INTERRUPT0_LINE_END;\ - type OTG_VERTICAL_INTERRUPT1_INT_ENABLE;\ - type OTG_VERTICAL_INTERRUPT1_LINE_START;\ - type OTG_VERTICAL_INTERRUPT2_INT_ENABLE;\ - type OTG_VERTICAL_INTERRUPT2_LINE_START;\ - type OPTC_INPUT_CLK_EN;\ - type OPTC_INPUT_CLK_ON;\ - type OPTC_INPUT_CLK_GATE_DIS;\ - type OPTC_UNDERFLOW_OCCURRED_STATUS;\ - type OPTC_UNDERFLOW_CLEAR;\ - type OPTC_SRC_SEL;\ - type VTG0_ENABLE;\ - type VTG0_FP2;\ - type VTG0_VCOUNT_INIT;\ - type OTG_FORCE_VSYNC_NEXT_LINE_OCCURRED;\ - type OTG_FORCE_VSYNC_NEXT_LINE_CLEAR;\ - type OTG_AUTO_FORCE_VSYNC_MODE;\ - type MASTER_UPDATE_INTERLACED_MODE;\ - type OTG_GSL0_EN;\ - type OTG_GSL1_EN;\ - type OTG_GSL2_EN;\ - type OTG_GSL_MASTER_EN;\ - type OTG_GSL_FORCE_DELAY;\ - type OTG_GSL_CHECK_ALL_FIELDS;\ - type OTG_GSL_WINDOW_START_X;\ - type OTG_GSL_WINDOW_END_X;\ - type OTG_GSL_WINDOW_START_Y;\ - type OTG_GSL_WINDOW_END_Y;\ - type OTG_RANGE_TIMING_DBUF_UPDATE_MODE;\ - type OTG_GSL_MASTER_MODE;\ - type OTG_MASTER_UPDATE_LOCK_GSL_EN;\ - type MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_START_OFFSET;\ - type MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_END_OFFSET;\ - type OTG_MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_EN;\ - type OTG_CRC_CONT_EN;\ - type OTG_CRC0_SELECT;\ - type OTG_CRC_EN;\ - type CRC0_R_CR;\ - type CRC0_G_Y;\ - type CRC0_B_CB;\ - type CRC1_R_CR;\ - type CRC1_G_Y;\ - type CRC1_B_CB;\ - type CRC2_R_CR;\ - type CRC2_G_Y;\ - type CRC2_B_CB;\ - type CRC3_R_CR;\ - type CRC3_G_Y;\ - type CRC3_B_CB;\ - type OTG_CRC0_WINDOWA_X_START;\ - type OTG_CRC0_WINDOWA_X_END;\ - type OTG_CRC0_WINDOWA_Y_START;\ - type OTG_CRC0_WINDOWA_Y_END;\ - type OTG_CRC0_WINDOWB_X_START;\ - type OTG_CRC0_WINDOWB_X_END;\ - type OTG_CRC0_WINDOWB_Y_START;\ - type OTG_CRC0_WINDOWB_Y_END;\ - type OTG_CRC_WINDOW_DB_EN;\ - type OTG_CRC1_WINDOWA_X_START;\ - type OTG_CRC1_WINDOWA_X_END;\ - type OTG_CRC1_WINDOWA_Y_START;\ - type OTG_CRC1_WINDOWA_Y_END;\ - type OTG_CRC1_WINDOWB_X_START;\ - type OTG_CRC1_WINDOWB_X_END;\ - type OTG_CRC1_WINDOWB_Y_START;\ - type OTG_CRC1_WINDOWB_Y_END;\ - type GSL0_READY_SOURCE_SEL;\ - type GSL1_READY_SOURCE_SEL;\ - type GSL2_READY_SOURCE_SEL;\ - type MANUAL_FLOW_CONTROL;\ - type MANUAL_FLOW_CONTROL_SEL; - -#define TG_REG_FIELD_LIST(type) \ - TG_REG_FIELD_LIST_DCN1_0(type)\ - type OTG_V_SYNC_MODE;\ - type OTG_DRR_TRIGGER_WINDOW_START_X;\ - type OTG_DRR_TRIGGER_WINDOW_END_X;\ - type OTG_DRR_V_TOTAL_CHANGE_LIMIT;\ - type OTG_OUT_MUX;\ - type OTG_M_CONST_DTO_PHASE;\ - type OTG_M_CONST_DTO_MODULO;\ - type MASTER_UPDATE_LOCK_DB_X;\ - type MASTER_UPDATE_LOCK_DB_Y;\ - type MASTER_UPDATE_LOCK_DB_EN;\ - type GLOBAL_UPDATE_LOCK_EN;\ - type DIG_UPDATE_LOCATION;\ - type OTG_DSC_START_POSITION_X;\ - type OTG_DSC_START_POSITION_LINE_NUM;\ - type OPTC_NUM_OF_INPUT_SEGMENT;\ - type OPTC_SEG0_SRC_SEL;\ - type OPTC_SEG1_SRC_SEL;\ - type OPTC_SEG2_SRC_SEL;\ - type OPTC_SEG3_SRC_SEL;\ - type OPTC_MEM_SEL;\ - type OPTC_DATA_FORMAT;\ - type OPTC_DSC_MODE;\ - type OPTC_DSC_BYTES_PER_PIXEL;\ - type OPTC_DSC_SLICE_WIDTH;\ - type OPTC_SEGMENT_WIDTH;\ - type OPTC_DWB0_SOURCE_SELECT;\ - type OPTC_DWB1_SOURCE_SELECT;\ - type MASTER_UPDATE_LOCK_DB_START_X;\ - type MASTER_UPDATE_LOCK_DB_END_X;\ - type MASTER_UPDATE_LOCK_DB_START_Y;\ - type MASTER_UPDATE_LOCK_DB_END_Y;\ - type DIG_UPDATE_POSITION_X;\ - type DIG_UPDATE_POSITION_Y;\ - type OTG_H_TIMING_DIV_MODE;\ - type OTG_DRR_TIMING_DBUF_UPDATE_MODE;\ - type OTG_CRC_DSC_MODE;\ - type OTG_CRC_DATA_STREAM_COMBINE_MODE;\ - type OTG_CRC_DATA_STREAM_SPLIT_MODE;\ - type OTG_CRC_DATA_FORMAT;\ - type OTG_V_TOTAL_LAST_USED_BY_DRR;\ - type OTG_DRR_TIMING_DBUF_UPDATE_PENDING; - -#define TG_REG_FIELD_LIST_DCN3_2(type) \ - type OTG_H_TIMING_DIV_MODE_MANUAL; - - -#define TG_REG_FIELD_LIST_DCN3_5(type) \ - type OTG_CRC0_WINDOWA_X_START_READBACK;\ - type OTG_CRC0_WINDOWA_X_END_READBACK;\ - type OTG_CRC0_WINDOWA_Y_START_READBACK;\ - type OTG_CRC0_WINDOWA_Y_END_READBACK;\ - type OTG_CRC0_WINDOWB_X_START_READBACK;\ - type OTG_CRC0_WINDOWB_X_END_READBACK;\ - type OTG_CRC0_WINDOWB_Y_START_READBACK;\ - type OTG_CRC0_WINDOWB_Y_END_READBACK; \ - type OTG_CRC1_WINDOWA_X_START_READBACK;\ - type OTG_CRC1_WINDOWA_X_END_READBACK;\ - type OTG_CRC1_WINDOWA_Y_START_READBACK;\ - type OTG_CRC1_WINDOWA_Y_END_READBACK;\ - type OTG_CRC1_WINDOWB_X_START_READBACK;\ - type OTG_CRC1_WINDOWB_X_END_READBACK;\ - type OTG_CRC1_WINDOWB_Y_START_READBACK;\ - type OTG_CRC1_WINDOWB_Y_END_READBACK;\ - type OPTC_FGCG_REP_DIS; - -struct dcn_optc_shift { - TG_REG_FIELD_LIST(uint8_t) - TG_REG_FIELD_LIST_DCN3_2(uint8_t) - TG_REG_FIELD_LIST_DCN3_5(uint8_t) -}; - -struct dcn_optc_mask { - TG_REG_FIELD_LIST(uint32_t) - TG_REG_FIELD_LIST_DCN3_2(uint32_t) - TG_REG_FIELD_LIST_DCN3_5(uint32_t) -}; - -void dcn10_timing_generator_init(struct optc *optc); - -#endif /* __DC_TIMING_GENERATOR_DCN10_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile index 93ac45802e44..bd760442ff89 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile @@ -3,7 +3,7 @@ # Makefile for DCN. DCN20 = dcn20_init.o dcn20_dpp.o dcn20_dpp_cm.o dcn20_hubp.o \ - dcn20_mpc.o dcn20_opp.o dcn20_hubbub.o dcn20_optc.o dcn20_mmhubbub.o \ + dcn20_mpc.o dcn20_opp.o dcn20_hubbub.o dcn20_mmhubbub.o \ dcn20_stream_encoder.o dcn20_link_encoder.o dcn20_dccg.o \ dcn20_vmid.o dcn20_dwb.o dcn20_dwb_scl.o diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c deleted file mode 100644 index 58bdbd859bf9..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c +++ /dev/null @@ -1,587 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "reg_helper.h" -#include "dcn20_optc.h" -#include "dc.h" - -#define REG(reg)\ - optc1->tg_regs->reg - -#define CTX \ - optc1->base.ctx - -#undef FN -#define FN(reg_name, field_name) \ - optc1->tg_shift->field_name, optc1->tg_mask->field_name - -/** - * optc2_enable_crtc() - Enable CRTC - call ASIC Control Object to enable Timing generator. - * - * @optc: timing_generator instance. - * - * Return: If CRTC is enabled, return true. - * - */ -bool optc2_enable_crtc(struct timing_generator *optc) -{ - /* TODO FPGA wait for answer - * OTG_MASTER_UPDATE_MODE != CRTC_MASTER_UPDATE_MODE - * OTG_MASTER_UPDATE_LOCK != CRTC_MASTER_UPDATE_LOCK - */ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - /* opp instance for OTG. For DCN1.0, ODM is remoed. - * OPP and OPTC should 1:1 mapping - */ - REG_UPDATE(OPTC_DATA_SOURCE_SELECT, - OPTC_SEG0_SRC_SEL, optc->inst); - - /* VTG enable first is for HW workaround */ - REG_UPDATE(CONTROL, - VTG0_ENABLE, 1); - - REG_SEQ_START(); - - /* Enable CRTC */ - REG_UPDATE_2(OTG_CONTROL, - OTG_DISABLE_POINT_CNTL, 3, - OTG_MASTER_EN, 1); - - REG_SEQ_SUBMIT(); - REG_SEQ_WAIT_DONE(); - - return true; -} - -/** - * optc2_set_gsl() - Assign OTG to GSL groups, - * set one of the OTGs to be master & rest are slaves - * - * @optc: timing_generator instance. - * @params: pointer to gsl_params - */ -void optc2_set_gsl(struct timing_generator *optc, - const struct gsl_params *params) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - -/* - * There are (MAX_OPTC+1)/2 gsl groups available for use. - * In each group (assign an OTG to a group by setting OTG_GSLX_EN = 1, - * set one of the OTGs to be the master (OTG_GSL_MASTER_EN = 1) and the rest are slaves. - */ - REG_UPDATE_5(OTG_GSL_CONTROL, - OTG_GSL0_EN, params->gsl0_en, - OTG_GSL1_EN, params->gsl1_en, - OTG_GSL2_EN, params->gsl2_en, - OTG_GSL_MASTER_EN, params->gsl_master_en, - OTG_GSL_MASTER_MODE, params->gsl_master_mode); -} - - -void optc2_set_gsl_source_select( - struct timing_generator *optc, - int group_idx, - uint32_t gsl_ready_signal) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - switch (group_idx) { - case 1: - REG_UPDATE(GSL_SOURCE_SELECT, GSL0_READY_SOURCE_SEL, gsl_ready_signal); - break; - case 2: - REG_UPDATE(GSL_SOURCE_SELECT, GSL1_READY_SOURCE_SEL, gsl_ready_signal); - break; - case 3: - REG_UPDATE(GSL_SOURCE_SELECT, GSL2_READY_SOURCE_SEL, gsl_ready_signal); - break; - default: - break; - } -} - -/* Set DSC-related configuration. - * dsc_mode: 0 disables DSC, other values enable DSC in specified format - * sc_bytes_per_pixel: Bytes per pixel in u3.28 format - * dsc_slice_width: Slice width in pixels - */ -void optc2_set_dsc_config(struct timing_generator *optc, - enum optc_dsc_mode dsc_mode, - uint32_t dsc_bytes_per_pixel, - uint32_t dsc_slice_width) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_UPDATE(OPTC_DATA_FORMAT_CONTROL, - OPTC_DSC_MODE, dsc_mode); - - REG_SET(OPTC_BYTES_PER_PIXEL, 0, - OPTC_DSC_BYTES_PER_PIXEL, dsc_bytes_per_pixel); - - REG_UPDATE(OPTC_WIDTH_CONTROL, - OPTC_DSC_SLICE_WIDTH, dsc_slice_width); -} - -/* Get DSC-related configuration. - * dsc_mode: 0 disables DSC, other values enable DSC in specified format - */ -void optc2_get_dsc_status(struct timing_generator *optc, - uint32_t *dsc_mode) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_GET(OPTC_DATA_FORMAT_CONTROL, - OPTC_DSC_MODE, dsc_mode); -} - - -/*TEMP: Need to figure out inheritance model here.*/ -bool optc2_is_two_pixels_per_containter(const struct dc_crtc_timing *timing) -{ - return optc1_is_two_pixels_per_containter(timing); -} - -void optc2_set_odm_bypass(struct timing_generator *optc, - const struct dc_crtc_timing *dc_crtc_timing) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - uint32_t h_div_2 = 0; - - REG_SET_3(OPTC_DATA_SOURCE_SELECT, 0, - OPTC_NUM_OF_INPUT_SEGMENT, 0, - OPTC_SEG0_SRC_SEL, optc->inst, - OPTC_SEG1_SRC_SEL, 0xf); - REG_WRITE(OTG_H_TIMING_CNTL, 0); - - h_div_2 = optc2_is_two_pixels_per_containter(dc_crtc_timing); - REG_UPDATE(OTG_H_TIMING_CNTL, - OTG_H_TIMING_DIV_BY2, h_div_2); - REG_SET(OPTC_MEMORY_CONFIG, 0, - OPTC_MEM_SEL, 0); - optc1->opp_count = 1; -} - -void optc2_set_odm_combine(struct timing_generator *optc, int *opp_id, int opp_cnt, - struct dc_crtc_timing *timing) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - int mpcc_hactive = (timing->h_addressable + timing->h_border_left + timing->h_border_right) - / opp_cnt; - uint32_t memory_mask; - - ASSERT(opp_cnt == 2); - - /* TODO: In pseudocode but does not affect maximus, delete comment if we dont need on asic - * REG_SET(OTG_GLOBAL_CONTROL2, 0, GLOBAL_UPDATE_LOCK_EN, 1); - * Program OTG register MASTER_UPDATE_LOCK_DB_X/Y to the position before DP frame start - * REG_SET_2(OTG_GLOBAL_CONTROL1, 0, - * MASTER_UPDATE_LOCK_DB_X, 160, - * MASTER_UPDATE_LOCK_DB_Y, 240); - */ - - /* 2 pieces of memory required for up to 5120 displays, 4 for up to 8192, - * however, for ODM combine we can simplify by always using 4. - * To make sure there's no overlap, each instance "reserves" 2 memories and - * they are uniquely combined here. - */ - memory_mask = 0x3 << (opp_id[0] * 2) | 0x3 << (opp_id[1] * 2); - - if (REG(OPTC_MEMORY_CONFIG)) - REG_SET(OPTC_MEMORY_CONFIG, 0, - OPTC_MEM_SEL, memory_mask); - - REG_SET_3(OPTC_DATA_SOURCE_SELECT, 0, - OPTC_NUM_OF_INPUT_SEGMENT, 1, - OPTC_SEG0_SRC_SEL, opp_id[0], - OPTC_SEG1_SRC_SEL, opp_id[1]); - - REG_UPDATE(OPTC_WIDTH_CONTROL, - OPTC_SEGMENT_WIDTH, mpcc_hactive); - - REG_SET(OTG_H_TIMING_CNTL, 0, OTG_H_TIMING_DIV_BY2, 1); - optc1->opp_count = opp_cnt; -} - -void optc2_get_optc_source(struct timing_generator *optc, - uint32_t *num_of_src_opp, - uint32_t *src_opp_id_0, - uint32_t *src_opp_id_1) -{ - uint32_t num_of_input_segments; - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_GET_3(OPTC_DATA_SOURCE_SELECT, - OPTC_NUM_OF_INPUT_SEGMENT, &num_of_input_segments, - OPTC_SEG0_SRC_SEL, src_opp_id_0, - OPTC_SEG1_SRC_SEL, src_opp_id_1); - - if (num_of_input_segments == 1) - *num_of_src_opp = 2; - else - *num_of_src_opp = 1; - - /* Work around VBIOS not updating OPTC_NUM_OF_INPUT_SEGMENT */ - if (*src_opp_id_1 == 0xf) - *num_of_src_opp = 1; -} - -static void optc2_set_dwb_source(struct timing_generator *optc, - uint32_t dwb_pipe_inst) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - if (dwb_pipe_inst == 0) - REG_UPDATE(DWB_SOURCE_SELECT, - OPTC_DWB0_SOURCE_SELECT, optc->inst); - else if (dwb_pipe_inst == 1) - REG_UPDATE(DWB_SOURCE_SELECT, - OPTC_DWB1_SOURCE_SELECT, optc->inst); -} - -static void optc2_align_vblanks( - struct timing_generator *optc_master, - struct timing_generator *optc_slave, - uint32_t master_pixel_clock_100Hz, - uint32_t slave_pixel_clock_100Hz, - uint8_t master_clock_divider, - uint8_t slave_clock_divider) -{ - /* accessing slave OTG registers */ - struct optc *optc1 = DCN10TG_FROM_TG(optc_slave); - - uint32_t master_v_active = 0; - uint32_t master_h_total = 0; - uint32_t slave_h_total = 0; - uint64_t L, XY; - uint32_t X, Y, p = 10000; - uint32_t master_update_lock; - - /* disable slave OTG */ - REG_UPDATE(OTG_CONTROL, OTG_MASTER_EN, 0); - /* wait until disabled */ - REG_WAIT(OTG_CONTROL, - OTG_CURRENT_MASTER_EN_STATE, - 0, 10, 5000); - - REG_GET(OTG_H_TOTAL, OTG_H_TOTAL, &slave_h_total); - - /* assign slave OTG to be controlled by master update lock */ - REG_SET(OTG_GLOBAL_CONTROL0, 0, - OTG_MASTER_UPDATE_LOCK_SEL, optc_master->inst); - - /* accessing master OTG registers */ - optc1 = DCN10TG_FROM_TG(optc_master); - - /* saving update lock state, not sure if it's needed */ - REG_GET(OTG_MASTER_UPDATE_LOCK, - OTG_MASTER_UPDATE_LOCK, &master_update_lock); - /* unlocking master OTG */ - REG_SET(OTG_MASTER_UPDATE_LOCK, 0, - OTG_MASTER_UPDATE_LOCK, 0); - - REG_GET(OTG_V_BLANK_START_END, - OTG_V_BLANK_START, &master_v_active); - REG_GET(OTG_H_TOTAL, OTG_H_TOTAL, &master_h_total); - - /* calculate when to enable slave OTG */ - L = (uint64_t)p * slave_h_total * master_pixel_clock_100Hz; - L = div_u64(L, master_h_total); - L = div_u64(L, slave_pixel_clock_100Hz); - XY = div_u64(L, p); - Y = master_v_active - XY - 1; - X = div_u64(((XY + 1) * p - L) * master_h_total, p * master_clock_divider); - - /* - * set master OTG to unlock when V/H - * counters reach calculated values - */ - REG_UPDATE(OTG_GLOBAL_CONTROL1, - MASTER_UPDATE_LOCK_DB_EN, 1); - REG_UPDATE_2(OTG_GLOBAL_CONTROL1, - MASTER_UPDATE_LOCK_DB_X, - X, - MASTER_UPDATE_LOCK_DB_Y, - Y); - - /* lock master OTG */ - REG_SET(OTG_MASTER_UPDATE_LOCK, 0, - OTG_MASTER_UPDATE_LOCK, 1); - REG_WAIT(OTG_MASTER_UPDATE_LOCK, - UPDATE_LOCK_STATUS, 1, 1, 10); - - /* accessing slave OTG registers */ - optc1 = DCN10TG_FROM_TG(optc_slave); - - /* - * enable slave OTG, the OTG is locked with - * master's update lock, so it will not run - */ - REG_UPDATE(OTG_CONTROL, - OTG_MASTER_EN, 1); - - /* accessing master OTG registers */ - optc1 = DCN10TG_FROM_TG(optc_master); - - /* - * unlock master OTG. When master H/V counters reach - * DB_XY point, slave OTG will start - */ - REG_SET(OTG_MASTER_UPDATE_LOCK, 0, - OTG_MASTER_UPDATE_LOCK, 0); - - /* accessing slave OTG registers */ - optc1 = DCN10TG_FROM_TG(optc_slave); - - /* wait for slave OTG to start running*/ - REG_WAIT(OTG_CONTROL, - OTG_CURRENT_MASTER_EN_STATE, - 1, 10, 5000); - - /* accessing master OTG registers */ - optc1 = DCN10TG_FROM_TG(optc_master); - - /* disable the XY point*/ - REG_UPDATE(OTG_GLOBAL_CONTROL1, - MASTER_UPDATE_LOCK_DB_EN, 0); - REG_UPDATE_2(OTG_GLOBAL_CONTROL1, - MASTER_UPDATE_LOCK_DB_X, - 0, - MASTER_UPDATE_LOCK_DB_Y, - 0); - - /*restore master update lock*/ - REG_SET(OTG_MASTER_UPDATE_LOCK, 0, - OTG_MASTER_UPDATE_LOCK, master_update_lock); - - /* accessing slave OTG registers */ - optc1 = DCN10TG_FROM_TG(optc_slave); - /* restore slave to be controlled by it's own */ - REG_SET(OTG_GLOBAL_CONTROL0, 0, - OTG_MASTER_UPDATE_LOCK_SEL, optc_slave->inst); - -} - -void optc2_triplebuffer_lock(struct timing_generator *optc) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_SET(OTG_GLOBAL_CONTROL0, 0, - OTG_MASTER_UPDATE_LOCK_SEL, optc->inst); - - REG_SET(OTG_VUPDATE_KEEPOUT, 0, - OTG_MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_EN, 1); - - REG_SET(OTG_MASTER_UPDATE_LOCK, 0, - OTG_MASTER_UPDATE_LOCK, 1); - - REG_WAIT(OTG_MASTER_UPDATE_LOCK, - UPDATE_LOCK_STATUS, 1, - 1, 10); -} - -void optc2_triplebuffer_unlock(struct timing_generator *optc) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_SET(OTG_MASTER_UPDATE_LOCK, 0, - OTG_MASTER_UPDATE_LOCK, 0); - - REG_SET(OTG_VUPDATE_KEEPOUT, 0, - OTG_MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_EN, 0); - -} - -void optc2_lock_doublebuffer_enable(struct timing_generator *optc) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - uint32_t v_blank_start = 0; - uint32_t h_blank_start = 0; - - REG_UPDATE(OTG_GLOBAL_CONTROL1, MASTER_UPDATE_LOCK_DB_EN, 1); - - REG_UPDATE_2(OTG_GLOBAL_CONTROL2, GLOBAL_UPDATE_LOCK_EN, 1, - DIG_UPDATE_LOCATION, 20); - - REG_GET(OTG_V_BLANK_START_END, OTG_V_BLANK_START, &v_blank_start); - - REG_GET(OTG_H_BLANK_START_END, OTG_H_BLANK_START, &h_blank_start); - - REG_UPDATE_2(OTG_GLOBAL_CONTROL1, - MASTER_UPDATE_LOCK_DB_X, - (h_blank_start - 200 - 1) / optc1->opp_count, - MASTER_UPDATE_LOCK_DB_Y, - v_blank_start - 1); - - REG_SET_3(OTG_VUPDATE_KEEPOUT, 0, - MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_START_OFFSET, 0, - MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_END_OFFSET, 100, - OTG_MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_EN, 1); -} - -void optc2_lock_doublebuffer_disable(struct timing_generator *optc) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_UPDATE_2(OTG_GLOBAL_CONTROL1, - MASTER_UPDATE_LOCK_DB_X, - 0, - MASTER_UPDATE_LOCK_DB_Y, - 0); - - REG_UPDATE_2(OTG_GLOBAL_CONTROL2, GLOBAL_UPDATE_LOCK_EN, 0, - DIG_UPDATE_LOCATION, 0); - - REG_UPDATE(OTG_GLOBAL_CONTROL1, MASTER_UPDATE_LOCK_DB_EN, 0); -} - -void optc2_setup_manual_trigger(struct timing_generator *optc) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - /* Set the min/max selectors unconditionally so that - * DMCUB fw may change OTG timings when necessary - * TODO: Remove the w/a after fixing the issue in DMCUB firmware - */ - REG_UPDATE_4(OTG_V_TOTAL_CONTROL, - OTG_V_TOTAL_MIN_SEL, 1, - OTG_V_TOTAL_MAX_SEL, 1, - OTG_FORCE_LOCK_ON_EVENT, 0, - OTG_SET_V_TOTAL_MIN_MASK, (1 << 1)); /* TRIGA */ - - REG_SET_8(OTG_TRIGA_CNTL, 0, - OTG_TRIGA_SOURCE_SELECT, 21, - OTG_TRIGA_SOURCE_PIPE_SELECT, optc->inst, - OTG_TRIGA_RISING_EDGE_DETECT_CNTL, 1, - OTG_TRIGA_FALLING_EDGE_DETECT_CNTL, 0, - OTG_TRIGA_POLARITY_SELECT, 0, - OTG_TRIGA_FREQUENCY_SELECT, 0, - OTG_TRIGA_DELAY, 0, - OTG_TRIGA_CLEAR, 1); -} - -void optc2_program_manual_trigger(struct timing_generator *optc) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_SET(OTG_TRIGA_MANUAL_TRIG, 0, - OTG_TRIGA_MANUAL_TRIG, 1); -} - -bool optc2_configure_crc(struct timing_generator *optc, - const struct crc_params *params) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_SET_2(OTG_CRC_CNTL2, 0, - OTG_CRC_DSC_MODE, params->dsc_mode, - OTG_CRC_DATA_STREAM_COMBINE_MODE, params->odm_mode); - - return optc1_configure_crc(optc, params); -} - - -void optc2_get_last_used_drr_vtotal(struct timing_generator *optc, uint32_t *refresh_rate) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_GET(OTG_DRR_CONTROL, OTG_V_TOTAL_LAST_USED_BY_DRR, refresh_rate); -} - -static struct timing_generator_funcs dcn20_tg_funcs = { - .validate_timing = optc1_validate_timing, - .program_timing = optc1_program_timing, - .setup_vertical_interrupt0 = optc1_setup_vertical_interrupt0, - .setup_vertical_interrupt1 = optc1_setup_vertical_interrupt1, - .setup_vertical_interrupt2 = optc1_setup_vertical_interrupt2, - .program_global_sync = optc1_program_global_sync, - .enable_crtc = optc2_enable_crtc, - .disable_crtc = optc1_disable_crtc, - /* used by enable_timing_synchronization. Not need for FPGA */ - .is_counter_moving = optc1_is_counter_moving, - .get_position = optc1_get_position, - .get_frame_count = optc1_get_vblank_counter, - .get_scanoutpos = optc1_get_crtc_scanoutpos, - .get_otg_active_size = optc1_get_otg_active_size, - .set_early_control = optc1_set_early_control, - /* used by enable_timing_synchronization. Not need for FPGA */ - .wait_for_state = optc1_wait_for_state, - .set_blank = optc1_set_blank, - .is_blanked = optc1_is_blanked, - .set_blank_color = optc1_program_blank_color, - .enable_reset_trigger = optc1_enable_reset_trigger, - .enable_crtc_reset = optc1_enable_crtc_reset, - .did_triggered_reset_occur = optc1_did_triggered_reset_occur, - .triplebuffer_lock = optc2_triplebuffer_lock, - .triplebuffer_unlock = optc2_triplebuffer_unlock, - .disable_reset_trigger = optc1_disable_reset_trigger, - .lock = optc1_lock, - .unlock = optc1_unlock, - .lock_doublebuffer_enable = optc2_lock_doublebuffer_enable, - .lock_doublebuffer_disable = optc2_lock_doublebuffer_disable, - .enable_optc_clock = optc1_enable_optc_clock, - .set_drr = optc1_set_drr, - .get_last_used_drr_vtotal = optc2_get_last_used_drr_vtotal, - .set_vtotal_min_max = optc1_set_vtotal_min_max, - .set_static_screen_control = optc1_set_static_screen_control, - .program_stereo = optc1_program_stereo, - .is_stereo_left_eye = optc1_is_stereo_left_eye, - .set_blank_data_double_buffer = optc1_set_blank_data_double_buffer, - .tg_init = optc1_tg_init, - .is_tg_enabled = optc1_is_tg_enabled, - .is_optc_underflow_occurred = optc1_is_optc_underflow_occurred, - .clear_optc_underflow = optc1_clear_optc_underflow, - .setup_global_swap_lock = NULL, - .get_crc = optc1_get_crc, - .configure_crc = optc2_configure_crc, - .set_dsc_config = optc2_set_dsc_config, - .get_dsc_status = optc2_get_dsc_status, - .set_dwb_source = optc2_set_dwb_source, - .set_odm_bypass = optc2_set_odm_bypass, - .set_odm_combine = optc2_set_odm_combine, - .get_optc_source = optc2_get_optc_source, - .set_gsl = optc2_set_gsl, - .set_gsl_source_select = optc2_set_gsl_source_select, - .set_vtg_params = optc1_set_vtg_params, - .program_manual_trigger = optc2_program_manual_trigger, - .setup_manual_trigger = optc2_setup_manual_trigger, - .get_hw_timing = optc1_get_hw_timing, - .align_vblanks = optc2_align_vblanks, -}; - -void dcn20_timing_generator_init(struct optc *optc1) -{ - optc1->base.funcs = &dcn20_tg_funcs; - - optc1->max_h_total = optc1->tg_mask->OTG_H_TOTAL + 1; - optc1->max_v_total = optc1->tg_mask->OTG_V_TOTAL + 1; - - optc1->min_h_blank = 32; - optc1->min_v_blank = 3; - optc1->min_v_blank_interlace = 5; - optc1->min_h_sync_width = 4;// Minimum HSYNC = 8 pixels asked By HW in the first place for no actual reason. Oculus Rift S will not light up with 8 as it's hsyncWidth is 6. Changing it to 4 to fix that issue. - optc1->min_v_sync_width = 1; -} diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.h deleted file mode 100644 index f7968b9ca16e..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.h +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DC_OPTC_DCN20_H__ -#define __DC_OPTC_DCN20_H__ - -#include "../dcn10/dcn10_optc.h" - -#define TG_COMMON_REG_LIST_DCN2_0(inst) \ - TG_COMMON_REG_LIST_DCN(inst),\ - SRI(OTG_GLOBAL_CONTROL1, OTG, inst),\ - SRI(OTG_GLOBAL_CONTROL2, OTG, inst),\ - SRI(OTG_GSL_WINDOW_X, OTG, inst),\ - SRI(OTG_GSL_WINDOW_Y, OTG, inst),\ - SRI(OTG_VUPDATE_KEEPOUT, OTG, inst),\ - SRI(OTG_DSC_START_POSITION, OTG, inst),\ - SRI(OTG_CRC_CNTL2, OTG, inst),\ - SRI(OPTC_DATA_FORMAT_CONTROL, ODM, inst),\ - SRI(OPTC_BYTES_PER_PIXEL, ODM, inst),\ - SRI(OPTC_WIDTH_CONTROL, ODM, inst),\ - SRI(OPTC_MEMORY_CONFIG, ODM, inst),\ - SR(DWB_SOURCE_SELECT),\ - SRI(OTG_MANUAL_FLOW_CONTROL, OTG, inst), \ - SRI(OTG_DRR_CONTROL, OTG, inst) - -#define TG_COMMON_MASK_SH_LIST_DCN2_0(mask_sh)\ - TG_COMMON_MASK_SH_LIST_DCN(mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL1, MASTER_UPDATE_LOCK_DB_X, mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL1, MASTER_UPDATE_LOCK_DB_Y, mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL1, MASTER_UPDATE_LOCK_DB_EN, mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL2, GLOBAL_UPDATE_LOCK_EN, mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL2, DIG_UPDATE_LOCATION, mask_sh),\ - SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_RANGE_TIMING_DBUF_UPDATE_MODE, mask_sh),\ - SF(OTG0_OTG_GSL_WINDOW_X, OTG_GSL_WINDOW_START_X, mask_sh),\ - SF(OTG0_OTG_GSL_WINDOW_X, OTG_GSL_WINDOW_END_X, mask_sh), \ - SF(OTG0_OTG_GSL_WINDOW_Y, OTG_GSL_WINDOW_START_Y, mask_sh),\ - SF(OTG0_OTG_GSL_WINDOW_Y, OTG_GSL_WINDOW_END_Y, mask_sh),\ - SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_MASTER_MODE, mask_sh), \ - SF(OTG0_OTG_GSL_CONTROL, OTG_MASTER_UPDATE_LOCK_GSL_EN, mask_sh), \ - SF(OTG0_OTG_DSC_START_POSITION, OTG_DSC_START_POSITION_X, mask_sh), \ - SF(OTG0_OTG_DSC_START_POSITION, OTG_DSC_START_POSITION_LINE_NUM, mask_sh),\ - SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DSC_MODE, mask_sh),\ - SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DATA_STREAM_COMBINE_MODE, mask_sh),\ - SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DATA_STREAM_SPLIT_MODE, mask_sh),\ - SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DATA_FORMAT, mask_sh),\ - SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG0_SRC_SEL, mask_sh),\ - SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG1_SRC_SEL, mask_sh),\ - SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_NUM_OF_INPUT_SEGMENT, mask_sh),\ - SF(ODM0_OPTC_MEMORY_CONFIG, OPTC_MEM_SEL, mask_sh),\ - SF(ODM0_OPTC_DATA_FORMAT_CONTROL, OPTC_DATA_FORMAT, mask_sh),\ - SF(ODM0_OPTC_DATA_FORMAT_CONTROL, OPTC_DSC_MODE, mask_sh),\ - SF(ODM0_OPTC_BYTES_PER_PIXEL, OPTC_DSC_BYTES_PER_PIXEL, mask_sh),\ - SF(ODM0_OPTC_WIDTH_CONTROL, OPTC_DSC_SLICE_WIDTH, mask_sh),\ - SF(ODM0_OPTC_WIDTH_CONTROL, OPTC_SEGMENT_WIDTH, mask_sh),\ - SF(DWB_SOURCE_SELECT, OPTC_DWB0_SOURCE_SELECT, mask_sh),\ - SF(DWB_SOURCE_SELECT, OPTC_DWB1_SOURCE_SELECT, mask_sh),\ - SF(OTG0_OTG_MANUAL_FLOW_CONTROL, MANUAL_FLOW_CONTROL, mask_sh), \ - SF(OTG0_OTG_DRR_CONTROL, OTG_V_TOTAL_LAST_USED_BY_DRR, mask_sh) - -void dcn20_timing_generator_init(struct optc *optc); - -void optc2_get_last_used_drr_vtotal(struct timing_generator *optc, - uint32_t *refresh_rate); - -bool optc2_enable_crtc(struct timing_generator *optc); - -void optc2_set_gsl(struct timing_generator *optc, - const struct gsl_params *params); - -void optc2_set_gsl_source_select(struct timing_generator *optc, - int group_idx, - uint32_t gsl_ready_signal); - -void optc2_set_dsc_config(struct timing_generator *optc, - enum optc_dsc_mode dsc_mode, - uint32_t dsc_bytes_per_pixel, - uint32_t dsc_slice_width); - -void optc2_get_dsc_status(struct timing_generator *optc, - uint32_t *dsc_mode); - -void optc2_set_odm_bypass(struct timing_generator *optc, - const struct dc_crtc_timing *dc_crtc_timing); - -void optc2_set_odm_combine(struct timing_generator *optc, int *opp_id, int opp_cnt, - struct dc_crtc_timing *timing); - -void optc2_get_optc_source(struct timing_generator *optc, - uint32_t *num_of_src_opp, - uint32_t *src_opp_id_0, - uint32_t *src_opp_id_1); - -void optc2_triplebuffer_lock(struct timing_generator *optc); -void optc2_triplebuffer_unlock(struct timing_generator *optc); -void optc2_lock_doublebuffer_disable(struct timing_generator *optc); -void optc2_lock_doublebuffer_enable(struct timing_generator *optc); -void optc2_setup_manual_trigger(struct timing_generator *optc); -void optc2_program_manual_trigger(struct timing_generator *optc); -bool optc2_is_two_pixels_per_containter(const struct dc_crtc_timing *timing); -bool optc2_configure_crc(struct timing_generator *optc, - const struct crc_params *params); -#endif /* __DC_OPTC_DCN20_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/Makefile b/drivers/gpu/drm/amd/display/dc/dcn201/Makefile index c069a894db92..a101e6511555 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn201/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn201/Makefile @@ -3,7 +3,7 @@ # Makefile for DCN. DCN201 = dcn201_init.o \ dcn201_hubbub.o\ - dcn201_mpc.o dcn201_hubp.o dcn201_opp.o dcn201_optc.o dcn201_dpp.o \ + dcn201_mpc.o dcn201_hubp.o dcn201_opp.o dcn201_dpp.o \ dcn201_dccg.o dcn201_link_encoder.o AMD_DAL_DCN201 = $(addprefix $(AMDDALPATH)/dc/dcn201/,$(DCN201)) diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_optc.c b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_optc.c deleted file mode 100644 index 70fcbec03fb6..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_optc.c +++ /dev/null @@ -1,202 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "reg_helper.h" -#include "dcn201_optc.h" -#include "dcn10/dcn10_optc.h" -#include "dc.h" - -#define REG(reg)\ - optc1->tg_regs->reg - -#define CTX \ - optc1->base.ctx - -#undef FN -#define FN(reg_name, field_name) \ - optc1->tg_shift->field_name, optc1->tg_mask->field_name - -/*TEMP: Need to figure out inheritance model here.*/ -bool optc201_is_two_pixels_per_containter(const struct dc_crtc_timing *timing) -{ - return optc1_is_two_pixels_per_containter(timing); -} - -static void optc201_triplebuffer_lock(struct timing_generator *optc) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_SET(OTG_GLOBAL_CONTROL0, 0, - OTG_MASTER_UPDATE_LOCK_SEL, optc->inst); - REG_SET(OTG_VUPDATE_KEEPOUT, 0, - OTG_MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_EN, 1); - REG_SET(OTG_MASTER_UPDATE_LOCK, 0, - OTG_MASTER_UPDATE_LOCK, 1); - - REG_WAIT(OTG_MASTER_UPDATE_LOCK, - UPDATE_LOCK_STATUS, 1, - 1, 10); -} - -static void optc201_triplebuffer_unlock(struct timing_generator *optc) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_SET(OTG_MASTER_UPDATE_LOCK, 0, - OTG_MASTER_UPDATE_LOCK, 0); - REG_SET(OTG_VUPDATE_KEEPOUT, 0, - OTG_MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_EN, 0); - -} - -static bool optc201_validate_timing( - struct timing_generator *optc, - const struct dc_crtc_timing *timing) -{ - uint32_t v_blank; - uint32_t h_blank; - uint32_t min_v_blank; - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - ASSERT(timing != NULL); - - v_blank = (timing->v_total - timing->v_addressable - - timing->v_border_top - timing->v_border_bottom); - - h_blank = (timing->h_total - timing->h_addressable - - timing->h_border_right - - timing->h_border_left); - - if (timing->timing_3d_format != TIMING_3D_FORMAT_NONE && - timing->timing_3d_format != TIMING_3D_FORMAT_HW_FRAME_PACKING && - timing->timing_3d_format != TIMING_3D_FORMAT_TOP_AND_BOTTOM && - timing->timing_3d_format != TIMING_3D_FORMAT_SIDE_BY_SIDE && - timing->timing_3d_format != TIMING_3D_FORMAT_FRAME_ALTERNATE && - timing->timing_3d_format != TIMING_3D_FORMAT_INBAND_FA) - return false; - - /* Check maximum number of pixels supported by Timing Generator - * (Currently will never fail, in order to fail needs display which - * needs more than 8192 horizontal and - * more than 8192 vertical total pixels) - */ - if (timing->h_total > optc1->max_h_total || - timing->v_total > optc1->max_v_total) - return false; - - if (h_blank < optc1->min_h_blank) - return false; - - if (timing->h_sync_width < optc1->min_h_sync_width || - timing->v_sync_width < optc1->min_v_sync_width) - return false; - - min_v_blank = timing->flags.INTERLACE?optc1->min_v_blank_interlace:optc1->min_v_blank; - - if (v_blank < min_v_blank) - return false; - - return true; - -} - -static void optc201_get_optc_source(struct timing_generator *optc, - uint32_t *num_of_src_opp, - uint32_t *src_opp_id_0, - uint32_t *src_opp_id_1) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_GET(OPTC_DATA_SOURCE_SELECT, - OPTC_SEG0_SRC_SEL, src_opp_id_0); - - *num_of_src_opp = 1; -} - -static struct timing_generator_funcs dcn201_tg_funcs = { - .validate_timing = optc201_validate_timing, - .program_timing = optc1_program_timing, - .setup_vertical_interrupt0 = optc1_setup_vertical_interrupt0, - .setup_vertical_interrupt1 = optc1_setup_vertical_interrupt1, - .setup_vertical_interrupt2 = optc1_setup_vertical_interrupt2, - .program_global_sync = optc1_program_global_sync, - .enable_crtc = optc2_enable_crtc, - .disable_crtc = optc1_disable_crtc, - /* used by enable_timing_synchronization. Not need for FPGA */ - .is_counter_moving = optc1_is_counter_moving, - .get_position = optc1_get_position, - .get_frame_count = optc1_get_vblank_counter, - .get_scanoutpos = optc1_get_crtc_scanoutpos, - .get_otg_active_size = optc1_get_otg_active_size, - .set_early_control = optc1_set_early_control, - /* used by enable_timing_synchronization. Not need for FPGA */ - .wait_for_state = optc1_wait_for_state, - .set_blank = optc1_set_blank, - .is_blanked = optc1_is_blanked, - .set_blank_color = optc1_program_blank_color, - .did_triggered_reset_occur = optc1_did_triggered_reset_occur, - .enable_reset_trigger = optc1_enable_reset_trigger, - .enable_crtc_reset = optc1_enable_crtc_reset, - .disable_reset_trigger = optc1_disable_reset_trigger, - .triplebuffer_lock = optc201_triplebuffer_lock, - .triplebuffer_unlock = optc201_triplebuffer_unlock, - .lock = optc1_lock, - .unlock = optc1_unlock, - .enable_optc_clock = optc1_enable_optc_clock, - .set_drr = optc1_set_drr, - .get_last_used_drr_vtotal = NULL, - .set_vtotal_min_max = optc1_set_vtotal_min_max, - .set_static_screen_control = optc1_set_static_screen_control, - .program_stereo = optc1_program_stereo, - .is_stereo_left_eye = optc1_is_stereo_left_eye, - .set_blank_data_double_buffer = optc1_set_blank_data_double_buffer, - .tg_init = optc1_tg_init, - .is_tg_enabled = optc1_is_tg_enabled, - .is_optc_underflow_occurred = optc1_is_optc_underflow_occurred, - .clear_optc_underflow = optc1_clear_optc_underflow, - .get_crc = optc1_get_crc, - .configure_crc = optc2_configure_crc, - .set_dsc_config = optc2_set_dsc_config, - .set_dwb_source = NULL, - .get_optc_source = optc201_get_optc_source, - .set_vtg_params = optc1_set_vtg_params, - .program_manual_trigger = optc2_program_manual_trigger, - .setup_manual_trigger = optc2_setup_manual_trigger, - .get_hw_timing = optc1_get_hw_timing, -}; - -void dcn201_timing_generator_init(struct optc *optc1) -{ - optc1->base.funcs = &dcn201_tg_funcs; - - optc1->max_h_total = optc1->tg_mask->OTG_H_TOTAL + 1; - optc1->max_v_total = optc1->tg_mask->OTG_V_TOTAL + 1; - - optc1->min_h_blank = 32; - optc1->min_v_blank = 3; - optc1->min_v_blank_interlace = 5; - optc1->min_h_sync_width = 8; - optc1->min_v_sync_width = 1; -} diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_optc.h b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_optc.h deleted file mode 100644 index e9545b73513a..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_optc.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DC_OPTC_DCN201_H__ -#define __DC_OPTC_DCN201_H__ - -#include "dcn20/dcn20_optc.h" - -#define TG_COMMON_REG_LIST_DCN201(inst) \ - TG_COMMON_REG_LIST_DCN(inst),\ - SRI(OTG_GLOBAL_CONTROL1, OTG, inst),\ - SRI(OTG_GLOBAL_CONTROL2, OTG, inst),\ - SRI(OTG_GSL_WINDOW_X, OTG, inst),\ - SRI(OTG_GSL_WINDOW_Y, OTG, inst),\ - SRI(OTG_VUPDATE_KEEPOUT, OTG, inst),\ - SRI(OTG_DSC_START_POSITION, OTG, inst),\ - SRI(OPTC_DATA_FORMAT_CONTROL, ODM, inst),\ - SRI(OPTC_BYTES_PER_PIXEL, ODM, inst),\ - SRI(OPTC_WIDTH_CONTROL, ODM, inst),\ - SR(DWB_SOURCE_SELECT) - -#define TG_COMMON_MASK_SH_LIST_DCN201(mask_sh)\ - TG_COMMON_MASK_SH_LIST_DCN(mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL1, MASTER_UPDATE_LOCK_DB_X, mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL1, MASTER_UPDATE_LOCK_DB_Y, mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL1, MASTER_UPDATE_LOCK_DB_EN, mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL2, GLOBAL_UPDATE_LOCK_EN, mask_sh),\ - SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_RANGE_TIMING_DBUF_UPDATE_MODE, mask_sh),\ - SF(OTG0_OTG_GSL_WINDOW_X, OTG_GSL_WINDOW_START_X, mask_sh),\ - SF(OTG0_OTG_GSL_WINDOW_X, OTG_GSL_WINDOW_END_X, mask_sh), \ - SF(OTG0_OTG_GSL_WINDOW_Y, OTG_GSL_WINDOW_START_Y, mask_sh),\ - SF(OTG0_OTG_GSL_WINDOW_Y, OTG_GSL_WINDOW_END_Y, mask_sh),\ - SF(OTG0_OTG_VUPDATE_KEEPOUT, OTG_MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_EN, mask_sh), \ - SF(OTG0_OTG_VUPDATE_KEEPOUT, MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_START_OFFSET, mask_sh), \ - SF(OTG0_OTG_VUPDATE_KEEPOUT, MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_END_OFFSET, mask_sh), \ - SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_MASTER_MODE, mask_sh), \ - SF(OTG0_OTG_GSL_CONTROL, OTG_MASTER_UPDATE_LOCK_GSL_EN, mask_sh), \ - SF(OTG0_OTG_DSC_START_POSITION, OTG_DSC_START_POSITION_X, mask_sh), \ - SF(OTG0_OTG_DSC_START_POSITION, OTG_DSC_START_POSITION_LINE_NUM, mask_sh),\ - SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG0_SRC_SEL, mask_sh),\ - SF(ODM0_OPTC_DATA_FORMAT_CONTROL, OPTC_DSC_MODE, mask_sh),\ - SF(ODM0_OPTC_BYTES_PER_PIXEL, OPTC_DSC_BYTES_PER_PIXEL, mask_sh),\ - SF(ODM0_OPTC_WIDTH_CONTROL, OPTC_DSC_SLICE_WIDTH, mask_sh),\ - SF(DWB_SOURCE_SELECT, OPTC_DWB0_SOURCE_SELECT, mask_sh),\ - SF(DWB_SOURCE_SELECT, OPTC_DWB1_SOURCE_SELECT, mask_sh),\ - SF(DWB_SOURCE_SELECT, OPTC_DWB1_SOURCE_SELECT, mask_sh) - -void dcn201_timing_generator_init(struct optc *optc); - -bool optc201_is_two_pixels_per_containter(const struct dc_crtc_timing *timing); - -#endif diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile index 9dcf06c0954d..cd95f322235e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile @@ -28,7 +28,6 @@ DCN30 := \ dcn30_hubbub.o \ dcn30_hubp.o \ dcn30_dpp.o \ - dcn30_optc.o \ dcn30_dccg.o \ dcn30_mpc.o dcn30_vpg.o \ dcn30_afmt.o \ diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c deleted file mode 100644 index b97bdb868a0e..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c +++ /dev/null @@ -1,393 +0,0 @@ -/* - * Copyright 2020 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "reg_helper.h" -#include "dcn30_optc.h" -#include "dc.h" -#include "dcn_calc_math.h" -#include "dc_dmub_srv.h" - -#include "dml/dcn30/dcn30_fpu.h" -#include "dc_trace.h" - -#define REG(reg)\ - optc1->tg_regs->reg - -#define CTX \ - optc1->base.ctx - -#undef FN -#define FN(reg_name, field_name) \ - optc1->tg_shift->field_name, optc1->tg_mask->field_name - -void optc3_triplebuffer_lock(struct timing_generator *optc) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_UPDATE(OTG_GLOBAL_CONTROL2, - OTG_MASTER_UPDATE_LOCK_SEL, optc->inst); - - REG_SET(OTG_VUPDATE_KEEPOUT, 0, - OTG_MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_EN, 1); - - REG_SET(OTG_MASTER_UPDATE_LOCK, 0, - OTG_MASTER_UPDATE_LOCK, 1); - - REG_WAIT(OTG_MASTER_UPDATE_LOCK, - UPDATE_LOCK_STATUS, 1, - 1, 10); - - TRACE_OPTC_LOCK_UNLOCK_STATE(optc1, optc->inst, true); -} - -void optc3_lock_doublebuffer_enable(struct timing_generator *optc) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - uint32_t v_blank_start = 0; - uint32_t v_blank_end = 0; - uint32_t h_blank_start = 0; - uint32_t h_blank_end = 0; - - REG_GET_2(OTG_V_BLANK_START_END, - OTG_V_BLANK_START, &v_blank_start, - OTG_V_BLANK_END, &v_blank_end); - REG_GET_2(OTG_H_BLANK_START_END, - OTG_H_BLANK_START, &h_blank_start, - OTG_H_BLANK_END, &h_blank_end); - - REG_UPDATE_2(OTG_GLOBAL_CONTROL1, - MASTER_UPDATE_LOCK_DB_START_Y, v_blank_start - 1, - MASTER_UPDATE_LOCK_DB_END_Y, v_blank_start); - REG_UPDATE_2(OTG_GLOBAL_CONTROL4, - DIG_UPDATE_POSITION_X, h_blank_start - 180 - 1, - DIG_UPDATE_POSITION_Y, v_blank_start - 1); - // there is a DIG_UPDATE_VCOUNT_MODE and it is 0. - - REG_UPDATE_3(OTG_GLOBAL_CONTROL0, - MASTER_UPDATE_LOCK_DB_START_X, h_blank_start - 200 - 1, - MASTER_UPDATE_LOCK_DB_END_X, h_blank_start - 180, - MASTER_UPDATE_LOCK_DB_EN, 1); - REG_UPDATE(OTG_GLOBAL_CONTROL2, GLOBAL_UPDATE_LOCK_EN, 1); - - REG_SET_3(OTG_VUPDATE_KEEPOUT, 0, - MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_START_OFFSET, 0, - MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_END_OFFSET, 100, - OTG_MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_EN, 1); - - TRACE_OPTC_LOCK_UNLOCK_STATE(optc1, optc->inst, true); -} - -void optc3_lock_doublebuffer_disable(struct timing_generator *optc) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_UPDATE_2(OTG_GLOBAL_CONTROL0, - MASTER_UPDATE_LOCK_DB_START_X, 0, - MASTER_UPDATE_LOCK_DB_END_X, 0); - REG_UPDATE_2(OTG_GLOBAL_CONTROL1, - MASTER_UPDATE_LOCK_DB_START_Y, 0, - MASTER_UPDATE_LOCK_DB_END_Y, 0); - - REG_UPDATE(OTG_GLOBAL_CONTROL2, GLOBAL_UPDATE_LOCK_EN, 0); - REG_UPDATE(OTG_GLOBAL_CONTROL0, MASTER_UPDATE_LOCK_DB_EN, 0); - - TRACE_OPTC_LOCK_UNLOCK_STATE(optc1, optc->inst, true); -} - -void optc3_lock(struct timing_generator *optc) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_UPDATE(OTG_GLOBAL_CONTROL2, - OTG_MASTER_UPDATE_LOCK_SEL, optc->inst); - REG_SET(OTG_MASTER_UPDATE_LOCK, 0, - OTG_MASTER_UPDATE_LOCK, 1); - - REG_WAIT(OTG_MASTER_UPDATE_LOCK, - UPDATE_LOCK_STATUS, 1, - 1, 10); - - TRACE_OPTC_LOCK_UNLOCK_STATE(optc1, optc->inst, true); -} - -void optc3_set_out_mux(struct timing_generator *optc, enum otg_out_mux_dest dest) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_UPDATE(OTG_CONTROL, OTG_OUT_MUX, dest); -} - -void optc3_program_blank_color(struct timing_generator *optc, - const struct tg_color *blank_color) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_SET_3(OTG_BLANK_DATA_COLOR, 0, - OTG_BLANK_DATA_COLOR_BLUE_CB, blank_color->color_b_cb, - OTG_BLANK_DATA_COLOR_GREEN_Y, blank_color->color_g_y, - OTG_BLANK_DATA_COLOR_RED_CR, blank_color->color_r_cr); - - REG_SET_3(OTG_BLANK_DATA_COLOR_EXT, 0, - OTG_BLANK_DATA_COLOR_BLUE_CB_EXT, blank_color->color_b_cb >> 10, - OTG_BLANK_DATA_COLOR_GREEN_Y_EXT, blank_color->color_g_y >> 10, - OTG_BLANK_DATA_COLOR_RED_CR_EXT, blank_color->color_r_cr >> 10); -} - -void optc3_set_drr_trigger_window(struct timing_generator *optc, - uint32_t window_start, uint32_t window_end) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_SET_2(OTG_DRR_TRIGGER_WINDOW, 0, - OTG_DRR_TRIGGER_WINDOW_START_X, window_start, - OTG_DRR_TRIGGER_WINDOW_END_X, window_end); -} - -void optc3_set_vtotal_change_limit(struct timing_generator *optc, - uint32_t limit) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - - REG_SET(OTG_DRR_V_TOTAL_CHANGE, 0, - OTG_DRR_V_TOTAL_CHANGE_LIMIT, limit); -} - - -/* Set DSC-related configuration. - * dsc_mode: 0 disables DSC, other values enable DSC in specified format - * sc_bytes_per_pixel: Bytes per pixel in u3.28 format - * dsc_slice_width: Slice width in pixels - */ -void optc3_set_dsc_config(struct timing_generator *optc, - enum optc_dsc_mode dsc_mode, - uint32_t dsc_bytes_per_pixel, - uint32_t dsc_slice_width) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - optc2_set_dsc_config(optc, dsc_mode, dsc_bytes_per_pixel, dsc_slice_width); - REG_UPDATE(OTG_V_SYNC_A_CNTL, OTG_V_SYNC_MODE, 0); -} - -void optc3_set_odm_bypass(struct timing_generator *optc, - const struct dc_crtc_timing *dc_crtc_timing) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - enum h_timing_div_mode h_div = H_TIMING_NO_DIV; - - REG_SET_5(OPTC_DATA_SOURCE_SELECT, 0, - OPTC_NUM_OF_INPUT_SEGMENT, 0, - OPTC_SEG0_SRC_SEL, optc->inst, - OPTC_SEG1_SRC_SEL, 0xf, - OPTC_SEG2_SRC_SEL, 0xf, - OPTC_SEG3_SRC_SEL, 0xf - ); - - h_div = optc1_is_two_pixels_per_containter(dc_crtc_timing); - REG_UPDATE(OTG_H_TIMING_CNTL, - OTG_H_TIMING_DIV_MODE, h_div); - - REG_SET(OPTC_MEMORY_CONFIG, 0, - OPTC_MEM_SEL, 0); - optc1->opp_count = 1; -} - -void optc3_set_odm_combine(struct timing_generator *optc, int *opp_id, int opp_cnt, - struct dc_crtc_timing *timing) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - int mpcc_hactive = (timing->h_addressable + timing->h_border_left + timing->h_border_right) - / opp_cnt; - uint32_t memory_mask = 0; - - /* TODO: In pseudocode but does not affect maximus, delete comment if we dont need on asic - * REG_SET(OTG_GLOBAL_CONTROL2, 0, GLOBAL_UPDATE_LOCK_EN, 1); - * Program OTG register MASTER_UPDATE_LOCK_DB_X/Y to the position before DP frame start - * REG_SET_2(OTG_GLOBAL_CONTROL1, 0, - * MASTER_UPDATE_LOCK_DB_X, 160, - * MASTER_UPDATE_LOCK_DB_Y, 240); - */ - - ASSERT(opp_cnt == 2 || opp_cnt == 4); - - /* 2 pieces of memory required for up to 5120 displays, 4 for up to 8192, - * however, for ODM combine we can simplify by always using 4. - */ - if (opp_cnt == 2) { - /* To make sure there's no memory overlap, each instance "reserves" 2 - * memories and they are uniquely combined here. - */ - memory_mask = 0x3 << (opp_id[0] * 2) | 0x3 << (opp_id[1] * 2); - } else if (opp_cnt == 4) { - /* To make sure there's no memory overlap, each instance "reserves" 1 - * memory and they are uniquely combined here. - */ - memory_mask = 0x1 << (opp_id[0] * 2) | 0x1 << (opp_id[1] * 2) | 0x1 << (opp_id[2] * 2) | 0x1 << (opp_id[3] * 2); - } - - if (REG(OPTC_MEMORY_CONFIG)) - REG_SET(OPTC_MEMORY_CONFIG, 0, - OPTC_MEM_SEL, memory_mask); - - if (opp_cnt == 2) { - REG_SET_3(OPTC_DATA_SOURCE_SELECT, 0, - OPTC_NUM_OF_INPUT_SEGMENT, 1, - OPTC_SEG0_SRC_SEL, opp_id[0], - OPTC_SEG1_SRC_SEL, opp_id[1]); - } else if (opp_cnt == 4) { - REG_SET_5(OPTC_DATA_SOURCE_SELECT, 0, - OPTC_NUM_OF_INPUT_SEGMENT, 3, - OPTC_SEG0_SRC_SEL, opp_id[0], - OPTC_SEG1_SRC_SEL, opp_id[1], - OPTC_SEG2_SRC_SEL, opp_id[2], - OPTC_SEG3_SRC_SEL, opp_id[3]); - } - - REG_UPDATE(OPTC_WIDTH_CONTROL, - OPTC_SEGMENT_WIDTH, mpcc_hactive); - - REG_SET(OTG_H_TIMING_CNTL, 0, OTG_H_TIMING_DIV_MODE, opp_cnt - 1); - optc1->opp_count = opp_cnt; -} - -/** - * optc3_set_timing_double_buffer() - DRR double buffering control - * - * Sets double buffer point for V_TOTAL, H_TOTAL, VTOTAL_MIN, - * VTOTAL_MAX, VTOTAL_MIN_SEL and VTOTAL_MAX_SEL registers. - * - * @optc: timing_generator instance. - * @enable: Enable DRR double buffering control if true, disable otherwise. - * - * Options: any time, start of frame, dp start of frame (range timing) - */ -static void optc3_set_timing_double_buffer(struct timing_generator *optc, bool enable) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - uint32_t mode = enable ? 2 : 0; - - REG_UPDATE(OTG_DOUBLE_BUFFER_CONTROL, - OTG_DRR_TIMING_DBUF_UPDATE_MODE, mode); -} - -void optc3_wait_drr_doublebuffer_pending_clear(struct timing_generator *optc) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_WAIT(OTG_DOUBLE_BUFFER_CONTROL, OTG_DRR_TIMING_DBUF_UPDATE_PENDING, 0, 2, 100000); /* 1 vupdate at 5hz */ - -} - -void optc3_set_vtotal_min_max(struct timing_generator *optc, int vtotal_min, int vtotal_max) -{ - struct dc *dc = optc->ctx->dc; - - if (dc->caps.dmub_caps.mclk_sw && !dc->debug.disable_fams) - dc_dmub_srv_drr_update_cmd(dc, optc->inst, vtotal_min, vtotal_max); - else - optc1_set_vtotal_min_max(optc, vtotal_min, vtotal_max); -} - -void optc3_tg_init(struct timing_generator *optc) -{ - optc3_set_timing_double_buffer(optc, true); - optc1_clear_optc_underflow(optc); -} - -static struct timing_generator_funcs dcn30_tg_funcs = { - .validate_timing = optc1_validate_timing, - .program_timing = optc1_program_timing, - .setup_vertical_interrupt0 = optc1_setup_vertical_interrupt0, - .setup_vertical_interrupt1 = optc1_setup_vertical_interrupt1, - .setup_vertical_interrupt2 = optc1_setup_vertical_interrupt2, - .program_global_sync = optc1_program_global_sync, - .enable_crtc = optc2_enable_crtc, - .disable_crtc = optc1_disable_crtc, - /* used by enable_timing_synchronization. Not need for FPGA */ - .is_counter_moving = optc1_is_counter_moving, - .get_position = optc1_get_position, - .get_frame_count = optc1_get_vblank_counter, - .get_scanoutpos = optc1_get_crtc_scanoutpos, - .get_otg_active_size = optc1_get_otg_active_size, - .set_early_control = optc1_set_early_control, - /* used by enable_timing_synchronization. Not need for FPGA */ - .wait_for_state = optc1_wait_for_state, - .set_blank_color = optc3_program_blank_color, - .did_triggered_reset_occur = optc1_did_triggered_reset_occur, - .triplebuffer_lock = optc3_triplebuffer_lock, - .triplebuffer_unlock = optc2_triplebuffer_unlock, - .enable_reset_trigger = optc1_enable_reset_trigger, - .enable_crtc_reset = optc1_enable_crtc_reset, - .disable_reset_trigger = optc1_disable_reset_trigger, - .lock = optc3_lock, - .unlock = optc1_unlock, - .lock_doublebuffer_enable = optc3_lock_doublebuffer_enable, - .lock_doublebuffer_disable = optc3_lock_doublebuffer_disable, - .enable_optc_clock = optc1_enable_optc_clock, - .set_drr = optc1_set_drr, - .get_last_used_drr_vtotal = optc2_get_last_used_drr_vtotal, - .set_vtotal_min_max = optc3_set_vtotal_min_max, - .set_static_screen_control = optc1_set_static_screen_control, - .program_stereo = optc1_program_stereo, - .is_stereo_left_eye = optc1_is_stereo_left_eye, - .tg_init = optc3_tg_init, - .is_tg_enabled = optc1_is_tg_enabled, - .is_optc_underflow_occurred = optc1_is_optc_underflow_occurred, - .clear_optc_underflow = optc1_clear_optc_underflow, - .setup_global_swap_lock = NULL, - .get_crc = optc1_get_crc, - .configure_crc = optc2_configure_crc, - .set_dsc_config = optc3_set_dsc_config, - .get_dsc_status = optc2_get_dsc_status, - .set_dwb_source = NULL, - .set_odm_bypass = optc3_set_odm_bypass, - .set_odm_combine = optc3_set_odm_combine, - .get_optc_source = optc2_get_optc_source, - .set_out_mux = optc3_set_out_mux, - .set_drr_trigger_window = optc3_set_drr_trigger_window, - .set_vtotal_change_limit = optc3_set_vtotal_change_limit, - .set_gsl = optc2_set_gsl, - .set_gsl_source_select = optc2_set_gsl_source_select, - .set_vtg_params = optc1_set_vtg_params, - .program_manual_trigger = optc2_program_manual_trigger, - .setup_manual_trigger = optc2_setup_manual_trigger, - .get_hw_timing = optc1_get_hw_timing, - .wait_drr_doublebuffer_pending_clear = optc3_wait_drr_doublebuffer_pending_clear, -}; - -void dcn30_timing_generator_init(struct optc *optc1) -{ - optc1->base.funcs = &dcn30_tg_funcs; - - optc1->max_h_total = optc1->tg_mask->OTG_H_TOTAL + 1; - optc1->max_v_total = optc1->tg_mask->OTG_V_TOTAL + 1; - - optc1->min_h_blank = 32; - optc1->min_v_blank = 3; - optc1->min_v_blank_interlace = 5; - optc1->min_h_sync_width = 4; - optc1->min_v_sync_width = 1; -} diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h deleted file mode 100644 index d3a056c12b0d..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h +++ /dev/null @@ -1,359 +0,0 @@ -/* - * Copyright 2020 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DC_OPTC_DCN30_H__ -#define __DC_OPTC_DCN30_H__ - -#include "dcn20/dcn20_optc.h" - -#define V_TOTAL_REGS_DCN30_SRI(inst) - -#define OPTC_COMMON_REG_LIST_DCN3_BASE(inst) \ - SRI(OTG_VSTARTUP_PARAM, OTG, inst),\ - SRI(OTG_VUPDATE_PARAM, OTG, inst),\ - SRI(OTG_VREADY_PARAM, OTG, inst),\ - SRI(OTG_MASTER_UPDATE_LOCK, OTG, inst),\ - SRI(OTG_GLOBAL_CONTROL0, OTG, inst),\ - SRI(OTG_GLOBAL_CONTROL1, OTG, inst),\ - SRI(OTG_GLOBAL_CONTROL2, OTG, inst),\ - SRI(OTG_GLOBAL_CONTROL4, OTG, inst),\ - SRI(OTG_DOUBLE_BUFFER_CONTROL, OTG, inst),\ - SRI(OTG_H_TOTAL, OTG, inst),\ - SRI(OTG_H_BLANK_START_END, OTG, inst),\ - SRI(OTG_H_SYNC_A, OTG, inst),\ - SRI(OTG_H_SYNC_A_CNTL, OTG, inst),\ - SRI(OTG_H_TIMING_CNTL, OTG, inst),\ - SRI(OTG_V_TOTAL, OTG, inst),\ - SRI(OTG_V_BLANK_START_END, OTG, inst),\ - SRI(OTG_V_SYNC_A, OTG, inst),\ - SRI(OTG_V_SYNC_A_CNTL, OTG, inst),\ - SRI(OTG_CONTROL, OTG, inst),\ - SRI(OTG_STEREO_CONTROL, OTG, inst),\ - SRI(OTG_3D_STRUCTURE_CONTROL, OTG, inst),\ - SRI(OTG_STEREO_STATUS, OTG, inst),\ - SRI(OTG_V_TOTAL_MAX, OTG, inst),\ - SRI(OTG_V_TOTAL_MIN, OTG, inst),\ - SRI(OTG_V_TOTAL_CONTROL, OTG, inst),\ - V_TOTAL_REGS_DCN30_SRI(inst)\ - SRI(OTG_TRIGA_CNTL, OTG, inst),\ - SRI(OTG_FORCE_COUNT_NOW_CNTL, OTG, inst),\ - SRI(OTG_STATIC_SCREEN_CONTROL, OTG, inst),\ - SRI(OTG_STATUS_FRAME_COUNT, OTG, inst),\ - SRI(OTG_STATUS, OTG, inst),\ - SRI(OTG_STATUS_POSITION, OTG, inst),\ - SRI(OTG_NOM_VERT_POSITION, OTG, inst),\ - SRI(OTG_BLANK_DATA_COLOR, OTG, inst),\ - SRI(OTG_BLANK_DATA_COLOR_EXT, OTG, inst),\ - SRI(OTG_M_CONST_DTO0, OTG, inst),\ - SRI(OTG_M_CONST_DTO1, OTG, inst),\ - SRI(OTG_CLOCK_CONTROL, OTG, inst),\ - SRI(OTG_VERTICAL_INTERRUPT0_CONTROL, OTG, inst),\ - SRI(OTG_VERTICAL_INTERRUPT0_POSITION, OTG, inst),\ - SRI(OTG_VERTICAL_INTERRUPT1_CONTROL, OTG, inst),\ - SRI(OTG_VERTICAL_INTERRUPT1_POSITION, OTG, inst),\ - SRI(OTG_VERTICAL_INTERRUPT2_CONTROL, OTG, inst),\ - SRI(OTG_VERTICAL_INTERRUPT2_POSITION, OTG, inst),\ - SRI(OPTC_INPUT_CLOCK_CONTROL, ODM, inst),\ - SRI(OPTC_DATA_SOURCE_SELECT, ODM, inst),\ - SRI(OPTC_INPUT_GLOBAL_CONTROL, ODM, inst),\ - SRI(CONTROL, VTG, inst),\ - SRI(OTG_VERT_SYNC_CONTROL, OTG, inst),\ - SRI(OTG_GSL_CONTROL, OTG, inst),\ - SRI(OTG_CRC_CNTL, OTG, inst),\ - SRI(OTG_CRC_CNTL2, OTG, inst),\ - SRI(OTG_CRC0_DATA_RG, OTG, inst),\ - SRI(OTG_CRC0_DATA_B, OTG, inst),\ - SRI(OTG_CRC0_WINDOWA_X_CONTROL, OTG, inst),\ - SRI(OTG_CRC0_WINDOWA_Y_CONTROL, OTG, inst),\ - SRI(OTG_CRC0_WINDOWB_X_CONTROL, OTG, inst),\ - SRI(OTG_CRC0_WINDOWB_Y_CONTROL, OTG, inst),\ - SR(GSL_SOURCE_SELECT),\ - SRI(OTG_TRIGA_MANUAL_TRIG, OTG, inst),\ - SRI(OTG_DRR_CONTROL, OTG, inst) - - -#define OPTC_COMMON_REG_LIST_DCN3_0(inst) \ - OPTC_COMMON_REG_LIST_DCN3_BASE(inst),\ - SRI(OTG_GLOBAL_CONTROL1, OTG, inst),\ - SRI(OTG_GLOBAL_CONTROL2, OTG, inst),\ - SRI(OTG_GSL_WINDOW_X, OTG, inst),\ - SRI(OTG_GSL_WINDOW_Y, OTG, inst),\ - SRI(OTG_VUPDATE_KEEPOUT, OTG, inst),\ - SRI(OTG_DSC_START_POSITION, OTG, inst),\ - SRI(OTG_CRC_CNTL2, OTG, inst),\ - SRI(OTG_DRR_TRIGGER_WINDOW, OTG, inst),\ - SRI(OTG_DRR_V_TOTAL_CHANGE, OTG, inst),\ - SRI(OPTC_DATA_FORMAT_CONTROL, ODM, inst),\ - SRI(OPTC_BYTES_PER_PIXEL, ODM, inst),\ - SRI(OPTC_WIDTH_CONTROL, ODM, inst),\ - SRI(OPTC_MEMORY_CONFIG, ODM, inst),\ - SR(DWB_SOURCE_SELECT) - -#define DCN30_VTOTAL_REGS_SF(mask_sh) - -#define OPTC_COMMON_MASK_SH_LIST_DCN3_BASE(mask_sh)\ - SF(OTG0_OTG_VSTARTUP_PARAM, VSTARTUP_START, mask_sh),\ - SF(OTG0_OTG_VUPDATE_PARAM, VUPDATE_OFFSET, mask_sh),\ - SF(OTG0_OTG_VUPDATE_PARAM, VUPDATE_WIDTH, mask_sh),\ - SF(OTG0_OTG_VREADY_PARAM, VREADY_OFFSET, mask_sh),\ - SF(OTG0_OTG_MASTER_UPDATE_LOCK, OTG_MASTER_UPDATE_LOCK, mask_sh),\ - SF(OTG0_OTG_MASTER_UPDATE_LOCK, UPDATE_LOCK_STATUS, mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL0, MASTER_UPDATE_LOCK_DB_START_X, mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL0, MASTER_UPDATE_LOCK_DB_END_X, mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL0, MASTER_UPDATE_LOCK_DB_EN, mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL1, MASTER_UPDATE_LOCK_DB_START_Y, mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL1, MASTER_UPDATE_LOCK_DB_END_Y, mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL2, OTG_MASTER_UPDATE_LOCK_SEL, mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL4, DIG_UPDATE_POSITION_X, mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL4, DIG_UPDATE_POSITION_Y, mask_sh),\ - SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_UPDATE_PENDING, mask_sh),\ - SF(OTG0_OTG_H_TOTAL, OTG_H_TOTAL, mask_sh),\ - SF(OTG0_OTG_H_BLANK_START_END, OTG_H_BLANK_START, mask_sh),\ - SF(OTG0_OTG_H_BLANK_START_END, OTG_H_BLANK_END, mask_sh),\ - SF(OTG0_OTG_H_SYNC_A, OTG_H_SYNC_A_START, mask_sh),\ - SF(OTG0_OTG_H_SYNC_A, OTG_H_SYNC_A_END, mask_sh),\ - SF(OTG0_OTG_H_SYNC_A_CNTL, OTG_H_SYNC_A_POL, mask_sh),\ - SF(OTG0_OTG_V_TOTAL, OTG_V_TOTAL, mask_sh),\ - SF(OTG0_OTG_V_BLANK_START_END, OTG_V_BLANK_START, mask_sh),\ - SF(OTG0_OTG_V_BLANK_START_END, OTG_V_BLANK_END, mask_sh),\ - SF(OTG0_OTG_V_SYNC_A, OTG_V_SYNC_A_START, mask_sh),\ - SF(OTG0_OTG_V_SYNC_A, OTG_V_SYNC_A_END, mask_sh),\ - SF(OTG0_OTG_V_SYNC_A_CNTL, OTG_V_SYNC_A_POL, mask_sh),\ - SF(OTG0_OTG_V_SYNC_A_CNTL, OTG_V_SYNC_MODE, mask_sh),\ - SF(OTG0_OTG_CONTROL, OTG_MASTER_EN, mask_sh),\ - SF(OTG0_OTG_CONTROL, OTG_START_POINT_CNTL, mask_sh),\ - SF(OTG0_OTG_CONTROL, OTG_DISABLE_POINT_CNTL, mask_sh),\ - SF(OTG0_OTG_CONTROL, OTG_FIELD_NUMBER_CNTL, mask_sh),\ - SF(OTG0_OTG_CONTROL, OTG_OUT_MUX, mask_sh),\ - SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_EN, mask_sh),\ - SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_SYNC_OUTPUT_LINE_NUM, mask_sh),\ - SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_SYNC_OUTPUT_POLARITY, mask_sh),\ - SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_EYE_FLAG_POLARITY, mask_sh),\ - SF(OTG0_OTG_STEREO_CONTROL, OTG_DISABLE_STEREOSYNC_OUTPUT_FOR_DP, mask_sh),\ - SF(OTG0_OTG_STEREO_STATUS, OTG_STEREO_CURRENT_EYE, mask_sh),\ - SF(OTG0_OTG_3D_STRUCTURE_CONTROL, OTG_3D_STRUCTURE_EN, mask_sh),\ - SF(OTG0_OTG_3D_STRUCTURE_CONTROL, OTG_3D_STRUCTURE_V_UPDATE_MODE, mask_sh),\ - SF(OTG0_OTG_3D_STRUCTURE_CONTROL, OTG_3D_STRUCTURE_STEREO_SEL_OVR, mask_sh),\ - SF(OTG0_OTG_V_TOTAL_MAX, OTG_V_TOTAL_MAX, mask_sh),\ - SF(OTG0_OTG_V_TOTAL_MIN, OTG_V_TOTAL_MIN, mask_sh),\ - SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_V_TOTAL_MIN_SEL, mask_sh),\ - SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_V_TOTAL_MAX_SEL, mask_sh),\ - SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_FORCE_LOCK_ON_EVENT, mask_sh),\ - SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_SET_V_TOTAL_MIN_MASK_EN, mask_sh),\ - SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_SET_V_TOTAL_MIN_MASK, mask_sh),\ - SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_VTOTAL_MID_REPLACING_MIN_EN, mask_sh),\ - SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_VTOTAL_MID_REPLACING_MAX_EN, mask_sh),\ - DCN30_VTOTAL_REGS_SF(mask_sh)\ - SF(OTG0_OTG_FORCE_COUNT_NOW_CNTL, OTG_FORCE_COUNT_NOW_CLEAR, mask_sh),\ - SF(OTG0_OTG_FORCE_COUNT_NOW_CNTL, OTG_FORCE_COUNT_NOW_MODE, mask_sh),\ - SF(OTG0_OTG_FORCE_COUNT_NOW_CNTL, OTG_FORCE_COUNT_NOW_OCCURRED, mask_sh),\ - SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_SOURCE_SELECT, mask_sh),\ - SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_SOURCE_PIPE_SELECT, mask_sh),\ - SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_RISING_EDGE_DETECT_CNTL, mask_sh),\ - SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_FALLING_EDGE_DETECT_CNTL, mask_sh),\ - SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_POLARITY_SELECT, mask_sh),\ - SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_FREQUENCY_SELECT, mask_sh),\ - SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_DELAY, mask_sh),\ - SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_CLEAR, mask_sh),\ - SF(OTG0_OTG_STATIC_SCREEN_CONTROL, OTG_STATIC_SCREEN_EVENT_MASK, mask_sh),\ - SF(OTG0_OTG_STATIC_SCREEN_CONTROL, OTG_STATIC_SCREEN_FRAME_COUNT, mask_sh),\ - SF(OTG0_OTG_STATUS_FRAME_COUNT, OTG_FRAME_COUNT, mask_sh),\ - SF(OTG0_OTG_STATUS, OTG_V_BLANK, mask_sh),\ - SF(OTG0_OTG_STATUS, OTG_V_ACTIVE_DISP, mask_sh),\ - SF(OTG0_OTG_STATUS_POSITION, OTG_HORZ_COUNT, mask_sh),\ - SF(OTG0_OTG_STATUS_POSITION, OTG_VERT_COUNT, mask_sh),\ - SF(OTG0_OTG_NOM_VERT_POSITION, OTG_VERT_COUNT_NOM, mask_sh),\ - SF(OTG0_OTG_BLANK_DATA_COLOR, OTG_BLANK_DATA_COLOR_BLUE_CB, mask_sh),\ - SF(OTG0_OTG_BLANK_DATA_COLOR, OTG_BLANK_DATA_COLOR_GREEN_Y, mask_sh),\ - SF(OTG0_OTG_BLANK_DATA_COLOR, OTG_BLANK_DATA_COLOR_RED_CR, mask_sh),\ - SF(OTG0_OTG_BLANK_DATA_COLOR_EXT, OTG_BLANK_DATA_COLOR_BLUE_CB_EXT, mask_sh),\ - SF(OTG0_OTG_BLANK_DATA_COLOR_EXT, OTG_BLANK_DATA_COLOR_GREEN_Y_EXT, mask_sh),\ - SF(OTG0_OTG_BLANK_DATA_COLOR_EXT, OTG_BLANK_DATA_COLOR_RED_CR_EXT, mask_sh),\ - SF(OTG0_OTG_M_CONST_DTO0, OTG_M_CONST_DTO_PHASE, mask_sh),\ - SF(OTG0_OTG_M_CONST_DTO1, OTG_M_CONST_DTO_MODULO, mask_sh),\ - SF(OTG0_OTG_CLOCK_CONTROL, OTG_BUSY, mask_sh),\ - SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_EN, mask_sh),\ - SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_ON, mask_sh),\ - SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_GATE_DIS, mask_sh),\ - SF(OTG0_OTG_VERTICAL_INTERRUPT0_CONTROL, OTG_VERTICAL_INTERRUPT0_INT_ENABLE, mask_sh),\ - SF(OTG0_OTG_VERTICAL_INTERRUPT0_POSITION, OTG_VERTICAL_INTERRUPT0_LINE_START, mask_sh),\ - SF(OTG0_OTG_VERTICAL_INTERRUPT0_POSITION, OTG_VERTICAL_INTERRUPT0_LINE_END, mask_sh),\ - SF(OTG0_OTG_VERTICAL_INTERRUPT1_CONTROL, OTG_VERTICAL_INTERRUPT1_INT_ENABLE, mask_sh),\ - SF(OTG0_OTG_VERTICAL_INTERRUPT1_POSITION, OTG_VERTICAL_INTERRUPT1_LINE_START, mask_sh),\ - SF(OTG0_OTG_VERTICAL_INTERRUPT2_CONTROL, OTG_VERTICAL_INTERRUPT2_INT_ENABLE, mask_sh),\ - SF(OTG0_OTG_VERTICAL_INTERRUPT2_POSITION, OTG_VERTICAL_INTERRUPT2_LINE_START, mask_sh),\ - SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_EN, mask_sh),\ - SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_ON, mask_sh),\ - SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_GATE_DIS, mask_sh),\ - SF(ODM0_OPTC_INPUT_GLOBAL_CONTROL, OPTC_UNDERFLOW_OCCURRED_STATUS, mask_sh),\ - SF(ODM0_OPTC_INPUT_GLOBAL_CONTROL, OPTC_UNDERFLOW_CLEAR, mask_sh),\ - SF(VTG0_CONTROL, VTG0_ENABLE, mask_sh),\ - SF(VTG0_CONTROL, VTG0_FP2, mask_sh),\ - SF(VTG0_CONTROL, VTG0_VCOUNT_INIT, mask_sh),\ - SF(OTG0_OTG_VERT_SYNC_CONTROL, OTG_FORCE_VSYNC_NEXT_LINE_OCCURRED, mask_sh),\ - SF(OTG0_OTG_VERT_SYNC_CONTROL, OTG_FORCE_VSYNC_NEXT_LINE_CLEAR, mask_sh),\ - SF(OTG0_OTG_VERT_SYNC_CONTROL, OTG_AUTO_FORCE_VSYNC_MODE, mask_sh),\ - SF(OTG0_OTG_GSL_CONTROL, OTG_GSL0_EN, mask_sh),\ - SF(OTG0_OTG_GSL_CONTROL, OTG_GSL1_EN, mask_sh),\ - SF(OTG0_OTG_GSL_CONTROL, OTG_GSL2_EN, mask_sh),\ - SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_MASTER_EN, mask_sh),\ - SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_FORCE_DELAY, mask_sh),\ - SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_CHECK_ALL_FIELDS, mask_sh),\ - SF(OTG0_OTG_CRC_CNTL, OTG_CRC_CONT_EN, mask_sh),\ - SF(OTG0_OTG_CRC_CNTL, OTG_CRC0_SELECT, mask_sh),\ - SF(OTG0_OTG_CRC_CNTL, OTG_CRC_EN, mask_sh),\ - SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DSC_MODE, mask_sh),\ - SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DATA_STREAM_COMBINE_MODE, mask_sh),\ - SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DATA_STREAM_SPLIT_MODE, mask_sh),\ - SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DATA_FORMAT, mask_sh),\ - SF(OTG0_OTG_CRC0_DATA_RG, CRC0_R_CR, mask_sh),\ - SF(OTG0_OTG_CRC0_DATA_RG, CRC0_G_Y, mask_sh),\ - SF(OTG0_OTG_CRC0_DATA_B, CRC0_B_CB, mask_sh),\ - SF(OTG0_OTG_CRC0_WINDOWA_X_CONTROL, OTG_CRC0_WINDOWA_X_START, mask_sh),\ - SF(OTG0_OTG_CRC0_WINDOWA_X_CONTROL, OTG_CRC0_WINDOWA_X_END, mask_sh),\ - SF(OTG0_OTG_CRC0_WINDOWA_Y_CONTROL, OTG_CRC0_WINDOWA_Y_START, mask_sh),\ - SF(OTG0_OTG_CRC0_WINDOWA_Y_CONTROL, OTG_CRC0_WINDOWA_Y_END, mask_sh),\ - SF(OTG0_OTG_CRC0_WINDOWB_X_CONTROL, OTG_CRC0_WINDOWB_X_START, mask_sh),\ - SF(OTG0_OTG_CRC0_WINDOWB_X_CONTROL, OTG_CRC0_WINDOWB_X_END, mask_sh),\ - SF(OTG0_OTG_CRC0_WINDOWB_Y_CONTROL, OTG_CRC0_WINDOWB_Y_START, mask_sh),\ - SF(OTG0_OTG_CRC0_WINDOWB_Y_CONTROL, OTG_CRC0_WINDOWB_Y_END, mask_sh),\ - SF(OTG0_OTG_TRIGA_MANUAL_TRIG, OTG_TRIGA_MANUAL_TRIG, mask_sh),\ - SF(GSL_SOURCE_SELECT, GSL0_READY_SOURCE_SEL, mask_sh),\ - SF(GSL_SOURCE_SELECT, GSL1_READY_SOURCE_SEL, mask_sh),\ - SF(GSL_SOURCE_SELECT, GSL2_READY_SOURCE_SEL, mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL2, MANUAL_FLOW_CONTROL_SEL, mask_sh),\ - SF(OTG0_OTG_DRR_CONTROL, OTG_V_TOTAL_LAST_USED_BY_DRR, mask_sh) - -#define OPTC_COMMON_MASK_SH_LIST_DCN3_0(mask_sh)\ - OPTC_COMMON_MASK_SH_LIST_DCN3_BASE(mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL2, GLOBAL_UPDATE_LOCK_EN, mask_sh),\ - SF(OTG0_OTG_GSL_WINDOW_X, OTG_GSL_WINDOW_START_X, mask_sh),\ - SF(OTG0_OTG_GSL_WINDOW_X, OTG_GSL_WINDOW_END_X, mask_sh), \ - SF(OTG0_OTG_GSL_WINDOW_Y, OTG_GSL_WINDOW_START_Y, mask_sh),\ - SF(OTG0_OTG_GSL_WINDOW_Y, OTG_GSL_WINDOW_END_Y, mask_sh),\ - SF(OTG0_OTG_VUPDATE_KEEPOUT, OTG_MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_EN, mask_sh), \ - SF(OTG0_OTG_VUPDATE_KEEPOUT, MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_START_OFFSET, mask_sh), \ - SF(OTG0_OTG_VUPDATE_KEEPOUT, MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_END_OFFSET, mask_sh), \ - SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_MASTER_MODE, mask_sh), \ - SF(OTG0_OTG_GSL_CONTROL, OTG_MASTER_UPDATE_LOCK_GSL_EN, mask_sh), \ - SF(OTG0_OTG_DSC_START_POSITION, OTG_DSC_START_POSITION_X, mask_sh), \ - SF(OTG0_OTG_DSC_START_POSITION, OTG_DSC_START_POSITION_LINE_NUM, mask_sh),\ - SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DSC_MODE, mask_sh),\ - SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DATA_STREAM_COMBINE_MODE, mask_sh),\ - SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DATA_STREAM_SPLIT_MODE, mask_sh),\ - SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DATA_FORMAT, mask_sh),\ - SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG0_SRC_SEL, mask_sh),\ - SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG1_SRC_SEL, mask_sh),\ - SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_NUM_OF_INPUT_SEGMENT, mask_sh),\ - SF(ODM0_OPTC_MEMORY_CONFIG, OPTC_MEM_SEL, mask_sh),\ - SF(ODM0_OPTC_DATA_FORMAT_CONTROL, OPTC_DATA_FORMAT, mask_sh),\ - SF(ODM0_OPTC_DATA_FORMAT_CONTROL, OPTC_DSC_MODE, mask_sh),\ - SF(ODM0_OPTC_BYTES_PER_PIXEL, OPTC_DSC_BYTES_PER_PIXEL, mask_sh),\ - SF(ODM0_OPTC_WIDTH_CONTROL, OPTC_DSC_SLICE_WIDTH, mask_sh),\ - SF(ODM0_OPTC_WIDTH_CONTROL, OPTC_SEGMENT_WIDTH, mask_sh),\ - SF(DWB_SOURCE_SELECT, OPTC_DWB0_SOURCE_SELECT, mask_sh),\ - SF(DWB_SOURCE_SELECT, OPTC_DWB1_SOURCE_SELECT, mask_sh),\ - SF(OTG0_OTG_DRR_TRIGGER_WINDOW, OTG_DRR_TRIGGER_WINDOW_START_X, mask_sh),\ - SF(OTG0_OTG_DRR_TRIGGER_WINDOW, OTG_DRR_TRIGGER_WINDOW_END_X, mask_sh),\ - SF(OTG0_OTG_DRR_V_TOTAL_CHANGE, OTG_DRR_V_TOTAL_CHANGE_LIMIT, mask_sh),\ - SF(OTG0_OTG_H_TIMING_CNTL, OTG_H_TIMING_DIV_BY2, mask_sh),\ - SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_DRR_TIMING_DBUF_UPDATE_PENDING, mask_sh),\ - SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_DRR_TIMING_DBUF_UPDATE_MODE, mask_sh),\ - SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_BLANK_DATA_DOUBLE_BUFFER_EN, mask_sh) - -#define OPTC_COMMON_MASK_SH_LIST_DCN30(mask_sh)\ - OPTC_COMMON_MASK_SH_LIST_DCN3_BASE(mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL2, GLOBAL_UPDATE_LOCK_EN, mask_sh),\ - SF(OTG0_OTG_GSL_WINDOW_X, OTG_GSL_WINDOW_START_X, mask_sh),\ - SF(OTG0_OTG_GSL_WINDOW_X, OTG_GSL_WINDOW_END_X, mask_sh), \ - SF(OTG0_OTG_GSL_WINDOW_Y, OTG_GSL_WINDOW_START_Y, mask_sh),\ - SF(OTG0_OTG_GSL_WINDOW_Y, OTG_GSL_WINDOW_END_Y, mask_sh),\ - SF(OTG0_OTG_VUPDATE_KEEPOUT, OTG_MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_EN, mask_sh), \ - SF(OTG0_OTG_VUPDATE_KEEPOUT, MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_START_OFFSET, mask_sh), \ - SF(OTG0_OTG_VUPDATE_KEEPOUT, MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_END_OFFSET, mask_sh), \ - SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_MASTER_MODE, mask_sh), \ - SF(OTG0_OTG_GSL_CONTROL, OTG_MASTER_UPDATE_LOCK_GSL_EN, mask_sh), \ - SF(OTG0_OTG_DSC_START_POSITION, OTG_DSC_START_POSITION_X, mask_sh), \ - SF(OTG0_OTG_DSC_START_POSITION, OTG_DSC_START_POSITION_LINE_NUM, mask_sh),\ - SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DSC_MODE, mask_sh),\ - SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DATA_STREAM_COMBINE_MODE, mask_sh),\ - SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DATA_STREAM_SPLIT_MODE, mask_sh),\ - SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DATA_FORMAT, mask_sh),\ - SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG0_SRC_SEL, mask_sh),\ - SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG1_SRC_SEL, mask_sh),\ - SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG2_SRC_SEL, mask_sh),\ - SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG3_SRC_SEL, mask_sh),\ - SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_NUM_OF_INPUT_SEGMENT, mask_sh),\ - SF(ODM0_OPTC_MEMORY_CONFIG, OPTC_MEM_SEL, mask_sh),\ - SF(ODM0_OPTC_DATA_FORMAT_CONTROL, OPTC_DATA_FORMAT, mask_sh),\ - SF(ODM0_OPTC_DATA_FORMAT_CONTROL, OPTC_DSC_MODE, mask_sh),\ - SF(ODM0_OPTC_BYTES_PER_PIXEL, OPTC_DSC_BYTES_PER_PIXEL, mask_sh),\ - SF(ODM0_OPTC_WIDTH_CONTROL, OPTC_DSC_SLICE_WIDTH, mask_sh),\ - SF(ODM0_OPTC_WIDTH_CONTROL, OPTC_SEGMENT_WIDTH, mask_sh),\ - SF(DWB_SOURCE_SELECT, OPTC_DWB0_SOURCE_SELECT, mask_sh),\ - SF(DWB_SOURCE_SELECT, OPTC_DWB1_SOURCE_SELECT, mask_sh),\ - SF(OTG0_OTG_DRR_TRIGGER_WINDOW, OTG_DRR_TRIGGER_WINDOW_START_X, mask_sh),\ - SF(OTG0_OTG_DRR_TRIGGER_WINDOW, OTG_DRR_TRIGGER_WINDOW_END_X, mask_sh),\ - SF(OTG0_OTG_DRR_V_TOTAL_CHANGE, OTG_DRR_V_TOTAL_CHANGE_LIMIT, mask_sh),\ - SF(OTG0_OTG_H_TIMING_CNTL, OTG_H_TIMING_DIV_MODE, mask_sh),\ - SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_DRR_TIMING_DBUF_UPDATE_PENDING, mask_sh),\ - SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_DRR_TIMING_DBUF_UPDATE_MODE, mask_sh) - -void dcn30_timing_generator_init(struct optc *optc1); - -void optc3_set_out_mux(struct timing_generator *optc, enum otg_out_mux_dest dest); - -void optc3_lock(struct timing_generator *optc); - -void optc3_lock_doublebuffer_enable(struct timing_generator *optc); - -void optc3_lock_doublebuffer_disable(struct timing_generator *optc); - -void optc3_set_drr_trigger_window(struct timing_generator *optc, - uint32_t window_start, uint32_t window_end); - -void optc3_triplebuffer_lock(struct timing_generator *optc); - -void optc3_program_blank_color(struct timing_generator *optc, - const struct tg_color *blank_color); - -void optc3_set_vtotal_change_limit(struct timing_generator *optc, - uint32_t limit); - -void optc3_set_dsc_config(struct timing_generator *optc, - enum optc_dsc_mode dsc_mode, - uint32_t dsc_bytes_per_pixel, - uint32_t dsc_slice_width); - -void optc3_set_timing_db_mode(struct timing_generator *optc, bool enable); - -void optc3_set_odm_bypass(struct timing_generator *optc, - const struct dc_crtc_timing *dc_crtc_timing); -void optc3_set_odm_combine(struct timing_generator *optc, int *opp_id, int opp_cnt, - struct dc_crtc_timing *timing); -void optc3_wait_drr_doublebuffer_pending_clear(struct timing_generator *optc); -void optc3_tg_init(struct timing_generator *optc); -void optc3_set_vtotal_min_max(struct timing_generator *optc, int vtotal_min, int vtotal_max); -#endif /* __DC_OPTC_DCN30_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/Makefile b/drivers/gpu/drm/amd/display/dc/dcn301/Makefile index cbf59d7e78c4..090011300dcd 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn301/Makefile @@ -11,8 +11,7 @@ # Makefile for dcn30. DCN301 = dcn301_init.o dcn301_dccg.o \ - dcn301_dio_link_encoder.o dcn301_panel_cntl.o dcn301_hubbub.o \ - dcn301_optc.o + dcn301_dio_link_encoder.o dcn301_panel_cntl.o dcn301_hubbub.o AMD_DAL_DCN301 = $(addprefix $(AMDDALPATH)/dc/dcn301/,$(DCN301)) diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_optc.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_optc.c deleted file mode 100644 index b3cfcb887905..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_optc.c +++ /dev/null @@ -1,185 +0,0 @@ -/* - * Copyright 2020 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "reg_helper.h" -#include "dcn301_optc.h" -#include "dc.h" -#include "dcn_calc_math.h" -#include "dc_dmub_srv.h" - -#include "dml/dcn30/dcn30_fpu.h" -#include "dc_trace.h" - -#define REG(reg)\ - optc1->tg_regs->reg - -#define CTX \ - optc1->base.ctx - -#undef FN -#define FN(reg_name, field_name) \ - optc1->tg_shift->field_name, optc1->tg_mask->field_name - - -/** - * optc301_set_drr() - Program dynamic refresh rate registers m_OTGx_OTG_V_TOTAL_*. - * - * @optc: timing_generator instance. - * @params: parameters used for Dynamic Refresh Rate. - */ -void optc301_set_drr( - struct timing_generator *optc, - const struct drr_params *params) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - if (params != NULL && - params->vertical_total_max > 0 && - params->vertical_total_min > 0) { - - if (params->vertical_total_mid != 0) { - - REG_SET(OTG_V_TOTAL_MID, 0, - OTG_V_TOTAL_MID, params->vertical_total_mid - 1); - - REG_UPDATE_2(OTG_V_TOTAL_CONTROL, - OTG_VTOTAL_MID_REPLACING_MAX_EN, 1, - OTG_VTOTAL_MID_FRAME_NUM, - (uint8_t)params->vertical_total_mid_frame_num); - - } - - optc->funcs->set_vtotal_min_max(optc, params->vertical_total_min - 1, params->vertical_total_max - 1); - - REG_UPDATE_5(OTG_V_TOTAL_CONTROL, - OTG_V_TOTAL_MIN_SEL, 1, - OTG_V_TOTAL_MAX_SEL, 1, - OTG_FORCE_LOCK_ON_EVENT, 0, - OTG_SET_V_TOTAL_MIN_MASK_EN, 0, - OTG_SET_V_TOTAL_MIN_MASK, 0); - // Setup manual flow control for EOF via TRIG_A - optc->funcs->setup_manual_trigger(optc); - - } else { - REG_UPDATE_4(OTG_V_TOTAL_CONTROL, - OTG_SET_V_TOTAL_MIN_MASK, 0, - OTG_V_TOTAL_MIN_SEL, 0, - OTG_V_TOTAL_MAX_SEL, 0, - OTG_FORCE_LOCK_ON_EVENT, 0); - - optc->funcs->set_vtotal_min_max(optc, 0, 0); - } -} - - -void optc301_setup_manual_trigger(struct timing_generator *optc) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_SET_8(OTG_TRIGA_CNTL, 0, - OTG_TRIGA_SOURCE_SELECT, 21, - OTG_TRIGA_SOURCE_PIPE_SELECT, optc->inst, - OTG_TRIGA_RISING_EDGE_DETECT_CNTL, 1, - OTG_TRIGA_FALLING_EDGE_DETECT_CNTL, 0, - OTG_TRIGA_POLARITY_SELECT, 0, - OTG_TRIGA_FREQUENCY_SELECT, 0, - OTG_TRIGA_DELAY, 0, - OTG_TRIGA_CLEAR, 1); -} - -static struct timing_generator_funcs dcn30_tg_funcs = { - .validate_timing = optc1_validate_timing, - .program_timing = optc1_program_timing, - .setup_vertical_interrupt0 = optc1_setup_vertical_interrupt0, - .setup_vertical_interrupt1 = optc1_setup_vertical_interrupt1, - .setup_vertical_interrupt2 = optc1_setup_vertical_interrupt2, - .program_global_sync = optc1_program_global_sync, - .enable_crtc = optc2_enable_crtc, - .disable_crtc = optc1_disable_crtc, - /* used by enable_timing_synchronization. Not need for FPGA */ - .is_counter_moving = optc1_is_counter_moving, - .get_position = optc1_get_position, - .get_frame_count = optc1_get_vblank_counter, - .get_scanoutpos = optc1_get_crtc_scanoutpos, - .get_otg_active_size = optc1_get_otg_active_size, - .set_early_control = optc1_set_early_control, - /* used by enable_timing_synchronization. Not need for FPGA */ - .wait_for_state = optc1_wait_for_state, - .set_blank_color = optc3_program_blank_color, - .did_triggered_reset_occur = optc1_did_triggered_reset_occur, - .triplebuffer_lock = optc3_triplebuffer_lock, - .triplebuffer_unlock = optc2_triplebuffer_unlock, - .enable_reset_trigger = optc1_enable_reset_trigger, - .enable_crtc_reset = optc1_enable_crtc_reset, - .disable_reset_trigger = optc1_disable_reset_trigger, - .lock = optc3_lock, - .unlock = optc1_unlock, - .lock_doublebuffer_enable = optc3_lock_doublebuffer_enable, - .lock_doublebuffer_disable = optc3_lock_doublebuffer_disable, - .enable_optc_clock = optc1_enable_optc_clock, - .set_drr = optc301_set_drr, - .get_last_used_drr_vtotal = optc2_get_last_used_drr_vtotal, - .set_vtotal_min_max = optc3_set_vtotal_min_max, - .set_static_screen_control = optc1_set_static_screen_control, - .program_stereo = optc1_program_stereo, - .is_stereo_left_eye = optc1_is_stereo_left_eye, - .tg_init = optc3_tg_init, - .is_tg_enabled = optc1_is_tg_enabled, - .is_optc_underflow_occurred = optc1_is_optc_underflow_occurred, - .clear_optc_underflow = optc1_clear_optc_underflow, - .setup_global_swap_lock = NULL, - .get_crc = optc1_get_crc, - .configure_crc = optc2_configure_crc, - .set_dsc_config = optc3_set_dsc_config, - .get_dsc_status = optc2_get_dsc_status, - .set_dwb_source = NULL, - .set_odm_bypass = optc3_set_odm_bypass, - .set_odm_combine = optc3_set_odm_combine, - .get_optc_source = optc2_get_optc_source, - .set_out_mux = optc3_set_out_mux, - .set_drr_trigger_window = optc3_set_drr_trigger_window, - .set_vtotal_change_limit = optc3_set_vtotal_change_limit, - .set_gsl = optc2_set_gsl, - .set_gsl_source_select = optc2_set_gsl_source_select, - .set_vtg_params = optc1_set_vtg_params, - .program_manual_trigger = optc2_program_manual_trigger, - .setup_manual_trigger = optc301_setup_manual_trigger, - .get_hw_timing = optc1_get_hw_timing, - .wait_drr_doublebuffer_pending_clear = optc3_wait_drr_doublebuffer_pending_clear, -}; - -void dcn301_timing_generator_init(struct optc *optc1) -{ - optc1->base.funcs = &dcn30_tg_funcs; - - optc1->max_h_total = optc1->tg_mask->OTG_H_TOTAL + 1; - optc1->max_v_total = optc1->tg_mask->OTG_V_TOTAL + 1; - - optc1->min_h_blank = 32; - optc1->min_v_blank = 3; - optc1->min_v_blank_interlace = 5; - optc1->min_h_sync_width = 4; - optc1->min_v_sync_width = 1; -} diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_optc.h b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_optc.h deleted file mode 100644 index b49585682a15..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_optc.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright 2020 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DC_OPTC_DCN301_H__ -#define __DC_OPTC_DCN301_H__ - -#include "dcn20/dcn20_optc.h" -#include "dcn30/dcn30_optc.h" - -void dcn301_timing_generator_init(struct optc *optc1); -void optc301_setup_manual_trigger(struct timing_generator *optc); -void optc301_set_drr(struct timing_generator *optc, const struct drr_params *params); - -#endif /* __DC_OPTC_DCN301_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile index 212287008c0a..11a2662e58ef 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile @@ -11,7 +11,7 @@ # Makefile for dcn31. DCN31 = dcn31_hubbub.o dcn31_init.o dcn31_hubp.o \ - dcn31_dccg.o dcn31_optc.o dcn31_dio_link_encoder.o dcn31_panel_cntl.o \ + dcn31_dccg.o dcn31_dio_link_encoder.o dcn31_panel_cntl.o \ dcn31_apg.o dcn31_hpo_dp_stream_encoder.o dcn31_hpo_dp_link_encoder.o \ dcn31_afmt.o dcn31_vpg.o diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c deleted file mode 100644 index 63a677c8ee27..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c +++ /dev/null @@ -1,310 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dcn31_optc.h" - -#include "dcn30/dcn30_optc.h" -#include "reg_helper.h" -#include "dc.h" -#include "dcn_calc_math.h" - -#define REG(reg)\ - optc1->tg_regs->reg - -#define CTX \ - optc1->base.ctx - -#undef FN -#define FN(reg_name, field_name) \ - optc1->tg_shift->field_name, optc1->tg_mask->field_name - -static void optc31_set_odm_combine(struct timing_generator *optc, int *opp_id, int opp_cnt, - struct dc_crtc_timing *timing) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - int mpcc_hactive = (timing->h_addressable + timing->h_border_left + timing->h_border_right) - / opp_cnt; - uint32_t memory_mask = 0; - int mem_count_per_opp = (mpcc_hactive + 2559) / 2560; - - /* Assume less than 6 pipes */ - if (opp_cnt == 4) { - if (mem_count_per_opp == 1) - memory_mask = 0xf; - else { - ASSERT(mem_count_per_opp == 2); - memory_mask = 0xff; - } - } else if (mem_count_per_opp == 1) - memory_mask = 0x1 << (opp_id[0] * 2) | 0x1 << (opp_id[1] * 2); - else if (mem_count_per_opp == 2) - memory_mask = 0x3 << (opp_id[0] * 2) | 0x3 << (opp_id[1] * 2); - else if (mem_count_per_opp == 3) - memory_mask = 0x77; - else if (mem_count_per_opp == 4) - memory_mask = 0xff; - - if (REG(OPTC_MEMORY_CONFIG)) - REG_SET(OPTC_MEMORY_CONFIG, 0, - OPTC_MEM_SEL, memory_mask); - - if (opp_cnt == 2) { - REG_SET_3(OPTC_DATA_SOURCE_SELECT, 0, - OPTC_NUM_OF_INPUT_SEGMENT, 1, - OPTC_SEG0_SRC_SEL, opp_id[0], - OPTC_SEG1_SRC_SEL, opp_id[1]); - } else if (opp_cnt == 4) { - REG_SET_5(OPTC_DATA_SOURCE_SELECT, 0, - OPTC_NUM_OF_INPUT_SEGMENT, 3, - OPTC_SEG0_SRC_SEL, opp_id[0], - OPTC_SEG1_SRC_SEL, opp_id[1], - OPTC_SEG2_SRC_SEL, opp_id[2], - OPTC_SEG3_SRC_SEL, opp_id[3]); - } - - REG_UPDATE(OPTC_WIDTH_CONTROL, - OPTC_SEGMENT_WIDTH, mpcc_hactive); - - REG_SET(OTG_H_TIMING_CNTL, 0, OTG_H_TIMING_DIV_MODE, opp_cnt - 1); - optc1->opp_count = opp_cnt; -} - -/* - * Enable CRTC - call ASIC Control Object to enable Timing generator. - */ -static bool optc31_enable_crtc(struct timing_generator *optc) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - /* opp instance for OTG, 1 to 1 mapping and odm will adjust */ - REG_UPDATE(OPTC_DATA_SOURCE_SELECT, - OPTC_SEG0_SRC_SEL, optc->inst); - - /* VTG enable first is for HW workaround */ - REG_UPDATE(CONTROL, - VTG0_ENABLE, 1); - - REG_SEQ_START(); - - /* Enable CRTC */ - REG_UPDATE_2(OTG_CONTROL, - OTG_DISABLE_POINT_CNTL, 2, - OTG_MASTER_EN, 1); - - REG_SEQ_SUBMIT(); - REG_SEQ_WAIT_DONE(); - - return true; -} - -/* disable_crtc - call ASIC Control Object to disable Timing generator. */ -static bool optc31_disable_crtc(struct timing_generator *optc) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - /* disable otg request until end of the first line - * in the vertical blank region - */ - REG_UPDATE(OTG_CONTROL, - OTG_MASTER_EN, 0); - - REG_UPDATE(CONTROL, - VTG0_ENABLE, 0); - - /* CRTC disabled, so disable clock. */ - REG_WAIT(OTG_CLOCK_CONTROL, - OTG_BUSY, 0, - 1, 100000); - optc1_clear_optc_underflow(optc); - - return true; -} - -bool optc31_immediate_disable_crtc(struct timing_generator *optc) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_UPDATE_2(OTG_CONTROL, - OTG_DISABLE_POINT_CNTL, 0, - OTG_MASTER_EN, 0); - - REG_UPDATE(CONTROL, - VTG0_ENABLE, 0); - - /* CRTC disabled, so disable clock. */ - REG_WAIT(OTG_CLOCK_CONTROL, - OTG_BUSY, 0, - 1, 100000); - - /* clear the false state */ - optc1_clear_optc_underflow(optc); - - return true; -} - -void optc31_set_drr( - struct timing_generator *optc, - const struct drr_params *params) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - if (params != NULL && - params->vertical_total_max > 0 && - params->vertical_total_min > 0) { - - if (params->vertical_total_mid != 0) { - - REG_SET(OTG_V_TOTAL_MID, 0, - OTG_V_TOTAL_MID, params->vertical_total_mid - 1); - - REG_UPDATE_2(OTG_V_TOTAL_CONTROL, - OTG_VTOTAL_MID_REPLACING_MAX_EN, 1, - OTG_VTOTAL_MID_FRAME_NUM, - (uint8_t)params->vertical_total_mid_frame_num); - - } - - optc->funcs->set_vtotal_min_max(optc, params->vertical_total_min - 1, params->vertical_total_max - 1); - - /* - * MIN_MASK_EN is gone and MASK is now always enabled. - * - * To get it to it work with manual trigger we need to make sure - * we program the correct bit. - */ - REG_UPDATE_4(OTG_V_TOTAL_CONTROL, - OTG_V_TOTAL_MIN_SEL, 1, - OTG_V_TOTAL_MAX_SEL, 1, - OTG_FORCE_LOCK_ON_EVENT, 0, - OTG_SET_V_TOTAL_MIN_MASK, (1 << 1)); /* TRIGA */ - - // Setup manual flow control for EOF via TRIG_A - optc->funcs->setup_manual_trigger(optc); - } else { - REG_UPDATE_4(OTG_V_TOTAL_CONTROL, - OTG_SET_V_TOTAL_MIN_MASK, 0, - OTG_V_TOTAL_MIN_SEL, 0, - OTG_V_TOTAL_MAX_SEL, 0, - OTG_FORCE_LOCK_ON_EVENT, 0); - - optc->funcs->set_vtotal_min_max(optc, 0, 0); - } -} - -void optc3_init_odm(struct timing_generator *optc) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_SET_5(OPTC_DATA_SOURCE_SELECT, 0, - OPTC_NUM_OF_INPUT_SEGMENT, 0, - OPTC_SEG0_SRC_SEL, optc->inst, - OPTC_SEG1_SRC_SEL, 0xf, - OPTC_SEG2_SRC_SEL, 0xf, - OPTC_SEG3_SRC_SEL, 0xf - ); - - REG_SET(OTG_H_TIMING_CNTL, 0, - OTG_H_TIMING_DIV_MODE, 0); - - REG_SET(OPTC_MEMORY_CONFIG, 0, - OPTC_MEM_SEL, 0); - optc1->opp_count = 1; -} - -static struct timing_generator_funcs dcn31_tg_funcs = { - .validate_timing = optc1_validate_timing, - .program_timing = optc1_program_timing, - .setup_vertical_interrupt0 = optc1_setup_vertical_interrupt0, - .setup_vertical_interrupt1 = optc1_setup_vertical_interrupt1, - .setup_vertical_interrupt2 = optc1_setup_vertical_interrupt2, - .program_global_sync = optc1_program_global_sync, - .enable_crtc = optc31_enable_crtc, - .disable_crtc = optc31_disable_crtc, - .immediate_disable_crtc = optc31_immediate_disable_crtc, - /* used by enable_timing_synchronization. Not need for FPGA */ - .is_counter_moving = optc1_is_counter_moving, - .get_position = optc1_get_position, - .get_frame_count = optc1_get_vblank_counter, - .get_scanoutpos = optc1_get_crtc_scanoutpos, - .get_otg_active_size = optc1_get_otg_active_size, - .set_early_control = optc1_set_early_control, - /* used by enable_timing_synchronization. Not need for FPGA */ - .wait_for_state = optc1_wait_for_state, - .set_blank_color = optc3_program_blank_color, - .did_triggered_reset_occur = optc1_did_triggered_reset_occur, - .triplebuffer_lock = optc3_triplebuffer_lock, - .triplebuffer_unlock = optc2_triplebuffer_unlock, - .enable_reset_trigger = optc1_enable_reset_trigger, - .enable_crtc_reset = optc1_enable_crtc_reset, - .disable_reset_trigger = optc1_disable_reset_trigger, - .lock = optc3_lock, - .unlock = optc1_unlock, - .lock_doublebuffer_enable = optc3_lock_doublebuffer_enable, - .lock_doublebuffer_disable = optc3_lock_doublebuffer_disable, - .enable_optc_clock = optc1_enable_optc_clock, - .set_drr = optc31_set_drr, - .get_last_used_drr_vtotal = optc2_get_last_used_drr_vtotal, - .set_vtotal_min_max = optc1_set_vtotal_min_max, - .set_static_screen_control = optc1_set_static_screen_control, - .program_stereo = optc1_program_stereo, - .is_stereo_left_eye = optc1_is_stereo_left_eye, - .tg_init = optc3_tg_init, - .is_tg_enabled = optc1_is_tg_enabled, - .is_optc_underflow_occurred = optc1_is_optc_underflow_occurred, - .clear_optc_underflow = optc1_clear_optc_underflow, - .setup_global_swap_lock = NULL, - .get_crc = optc1_get_crc, - .configure_crc = optc2_configure_crc, - .set_dsc_config = optc3_set_dsc_config, - .get_dsc_status = optc2_get_dsc_status, - .set_dwb_source = NULL, - .set_odm_bypass = optc3_set_odm_bypass, - .set_odm_combine = optc31_set_odm_combine, - .get_optc_source = optc2_get_optc_source, - .set_out_mux = optc3_set_out_mux, - .set_drr_trigger_window = optc3_set_drr_trigger_window, - .set_vtotal_change_limit = optc3_set_vtotal_change_limit, - .set_gsl = optc2_set_gsl, - .set_gsl_source_select = optc2_set_gsl_source_select, - .set_vtg_params = optc1_set_vtg_params, - .program_manual_trigger = optc2_program_manual_trigger, - .setup_manual_trigger = optc2_setup_manual_trigger, - .get_hw_timing = optc1_get_hw_timing, - .init_odm = optc3_init_odm, -}; - -void dcn31_timing_generator_init(struct optc *optc1) -{ - optc1->base.funcs = &dcn31_tg_funcs; - - optc1->max_h_total = optc1->tg_mask->OTG_H_TOTAL + 1; - optc1->max_v_total = optc1->tg_mask->OTG_V_TOTAL + 1; - - optc1->min_h_blank = 32; - optc1->min_v_blank = 3; - optc1->min_v_blank_interlace = 5; - optc1->min_h_sync_width = 4; - optc1->min_v_sync_width = 1; -} - diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h deleted file mode 100644 index 30b81a448ce2..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h +++ /dev/null @@ -1,267 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DC_OPTC_DCN31_H__ -#define __DC_OPTC_DCN31_H__ - -#include "dcn10/dcn10_optc.h" - -#define OPTC_COMMON_REG_LIST_DCN3_1(inst) \ - SRI(OTG_VSTARTUP_PARAM, OTG, inst),\ - SRI(OTG_VUPDATE_PARAM, OTG, inst),\ - SRI(OTG_VREADY_PARAM, OTG, inst),\ - SRI(OTG_MASTER_UPDATE_LOCK, OTG, inst),\ - SRI(OTG_GLOBAL_CONTROL0, OTG, inst),\ - SRI(OTG_GLOBAL_CONTROL1, OTG, inst),\ - SRI(OTG_GLOBAL_CONTROL2, OTG, inst),\ - SRI(OTG_GLOBAL_CONTROL4, OTG, inst),\ - SRI(OTG_DOUBLE_BUFFER_CONTROL, OTG, inst),\ - SRI(OTG_H_TOTAL, OTG, inst),\ - SRI(OTG_H_BLANK_START_END, OTG, inst),\ - SRI(OTG_H_SYNC_A, OTG, inst),\ - SRI(OTG_H_SYNC_A_CNTL, OTG, inst),\ - SRI(OTG_H_TIMING_CNTL, OTG, inst),\ - SRI(OTG_V_TOTAL, OTG, inst),\ - SRI(OTG_V_BLANK_START_END, OTG, inst),\ - SRI(OTG_V_SYNC_A, OTG, inst),\ - SRI(OTG_V_SYNC_A_CNTL, OTG, inst),\ - SRI(OTG_CONTROL, OTG, inst),\ - SRI(OTG_STEREO_CONTROL, OTG, inst),\ - SRI(OTG_3D_STRUCTURE_CONTROL, OTG, inst),\ - SRI(OTG_STEREO_STATUS, OTG, inst),\ - SRI(OTG_V_TOTAL_MAX, OTG, inst),\ - SRI(OTG_V_TOTAL_MIN, OTG, inst),\ - SRI(OTG_V_TOTAL_CONTROL, OTG, inst),\ - SRI(OTG_TRIGA_CNTL, OTG, inst),\ - SRI(OTG_FORCE_COUNT_NOW_CNTL, OTG, inst),\ - SRI(OTG_STATIC_SCREEN_CONTROL, OTG, inst),\ - SRI(OTG_STATUS_FRAME_COUNT, OTG, inst),\ - SRI(OTG_STATUS, OTG, inst),\ - SRI(OTG_STATUS_POSITION, OTG, inst),\ - SRI(OTG_NOM_VERT_POSITION, OTG, inst),\ - SRI(OTG_M_CONST_DTO0, OTG, inst),\ - SRI(OTG_M_CONST_DTO1, OTG, inst),\ - SRI(OTG_CLOCK_CONTROL, OTG, inst),\ - SRI(OTG_VERTICAL_INTERRUPT0_CONTROL, OTG, inst),\ - SRI(OTG_VERTICAL_INTERRUPT0_POSITION, OTG, inst),\ - SRI(OTG_VERTICAL_INTERRUPT1_CONTROL, OTG, inst),\ - SRI(OTG_VERTICAL_INTERRUPT1_POSITION, OTG, inst),\ - SRI(OTG_VERTICAL_INTERRUPT2_CONTROL, OTG, inst),\ - SRI(OTG_VERTICAL_INTERRUPT2_POSITION, OTG, inst),\ - SRI(OPTC_INPUT_CLOCK_CONTROL, ODM, inst),\ - SRI(OPTC_DATA_SOURCE_SELECT, ODM, inst),\ - SRI(OPTC_INPUT_GLOBAL_CONTROL, ODM, inst),\ - SRI(CONTROL, VTG, inst),\ - SRI(OTG_VERT_SYNC_CONTROL, OTG, inst),\ - SRI(OTG_GSL_CONTROL, OTG, inst),\ - SRI(OTG_CRC_CNTL, OTG, inst),\ - SRI(OTG_CRC0_DATA_RG, OTG, inst),\ - SRI(OTG_CRC0_DATA_B, OTG, inst),\ - SRI(OTG_CRC0_WINDOWA_X_CONTROL, OTG, inst),\ - SRI(OTG_CRC0_WINDOWA_Y_CONTROL, OTG, inst),\ - SRI(OTG_CRC0_WINDOWB_X_CONTROL, OTG, inst),\ - SRI(OTG_CRC0_WINDOWB_Y_CONTROL, OTG, inst),\ - SR(GSL_SOURCE_SELECT),\ - SRI(OTG_TRIGA_MANUAL_TRIG, OTG, inst),\ - SRI(OTG_GLOBAL_CONTROL1, OTG, inst),\ - SRI(OTG_GLOBAL_CONTROL2, OTG, inst),\ - SRI(OTG_GSL_WINDOW_X, OTG, inst),\ - SRI(OTG_GSL_WINDOW_Y, OTG, inst),\ - SRI(OTG_VUPDATE_KEEPOUT, OTG, inst),\ - SRI(OTG_DSC_START_POSITION, OTG, inst),\ - SRI(OTG_DRR_TRIGGER_WINDOW, OTG, inst),\ - SRI(OTG_DRR_V_TOTAL_CHANGE, OTG, inst),\ - SRI(OPTC_DATA_FORMAT_CONTROL, ODM, inst),\ - SRI(OPTC_BYTES_PER_PIXEL, ODM, inst),\ - SRI(OPTC_WIDTH_CONTROL, ODM, inst),\ - SRI(OPTC_MEMORY_CONFIG, ODM, inst),\ - SRI(OTG_CRC_CNTL2, OTG, inst),\ - SR(DWB_SOURCE_SELECT),\ - SRI(OTG_DRR_CONTROL, OTG, inst) - -#define OPTC_COMMON_MASK_SH_LIST_DCN3_1(mask_sh)\ - SF(OTG0_OTG_VSTARTUP_PARAM, VSTARTUP_START, mask_sh),\ - SF(OTG0_OTG_VUPDATE_PARAM, VUPDATE_OFFSET, mask_sh),\ - SF(OTG0_OTG_VUPDATE_PARAM, VUPDATE_WIDTH, mask_sh),\ - SF(OTG0_OTG_VREADY_PARAM, VREADY_OFFSET, mask_sh),\ - SF(OTG0_OTG_MASTER_UPDATE_LOCK, OTG_MASTER_UPDATE_LOCK, mask_sh),\ - SF(OTG0_OTG_MASTER_UPDATE_LOCK, UPDATE_LOCK_STATUS, mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL0, MASTER_UPDATE_LOCK_DB_START_X, mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL0, MASTER_UPDATE_LOCK_DB_END_X, mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL0, MASTER_UPDATE_LOCK_DB_EN, mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL1, MASTER_UPDATE_LOCK_DB_START_Y, mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL1, MASTER_UPDATE_LOCK_DB_END_Y, mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL2, OTG_MASTER_UPDATE_LOCK_SEL, mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL4, DIG_UPDATE_POSITION_X, mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL4, DIG_UPDATE_POSITION_Y, mask_sh),\ - SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_UPDATE_PENDING, mask_sh),\ - SF(OTG0_OTG_H_TOTAL, OTG_H_TOTAL, mask_sh),\ - SF(OTG0_OTG_H_BLANK_START_END, OTG_H_BLANK_START, mask_sh),\ - SF(OTG0_OTG_H_BLANK_START_END, OTG_H_BLANK_END, mask_sh),\ - SF(OTG0_OTG_H_SYNC_A, OTG_H_SYNC_A_START, mask_sh),\ - SF(OTG0_OTG_H_SYNC_A, OTG_H_SYNC_A_END, mask_sh),\ - SF(OTG0_OTG_H_SYNC_A_CNTL, OTG_H_SYNC_A_POL, mask_sh),\ - SF(OTG0_OTG_V_TOTAL, OTG_V_TOTAL, mask_sh),\ - SF(OTG0_OTG_V_BLANK_START_END, OTG_V_BLANK_START, mask_sh),\ - SF(OTG0_OTG_V_BLANK_START_END, OTG_V_BLANK_END, mask_sh),\ - SF(OTG0_OTG_V_SYNC_A, OTG_V_SYNC_A_START, mask_sh),\ - SF(OTG0_OTG_V_SYNC_A, OTG_V_SYNC_A_END, mask_sh),\ - SF(OTG0_OTG_V_SYNC_A_CNTL, OTG_V_SYNC_A_POL, mask_sh),\ - SF(OTG0_OTG_V_SYNC_A_CNTL, OTG_V_SYNC_MODE, mask_sh),\ - SF(OTG0_OTG_CONTROL, OTG_MASTER_EN, mask_sh),\ - SF(OTG0_OTG_CONTROL, OTG_START_POINT_CNTL, mask_sh),\ - SF(OTG0_OTG_CONTROL, OTG_DISABLE_POINT_CNTL, mask_sh),\ - SF(OTG0_OTG_CONTROL, OTG_FIELD_NUMBER_CNTL, mask_sh),\ - SF(OTG0_OTG_CONTROL, OTG_OUT_MUX, mask_sh),\ - SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_EN, mask_sh),\ - SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_SYNC_OUTPUT_LINE_NUM, mask_sh),\ - SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_SYNC_OUTPUT_POLARITY, mask_sh),\ - SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_EYE_FLAG_POLARITY, mask_sh),\ - SF(OTG0_OTG_STEREO_CONTROL, OTG_DISABLE_STEREOSYNC_OUTPUT_FOR_DP, mask_sh),\ - SF(OTG0_OTG_STEREO_STATUS, OTG_STEREO_CURRENT_EYE, mask_sh),\ - SF(OTG0_OTG_3D_STRUCTURE_CONTROL, OTG_3D_STRUCTURE_EN, mask_sh),\ - SF(OTG0_OTG_3D_STRUCTURE_CONTROL, OTG_3D_STRUCTURE_V_UPDATE_MODE, mask_sh),\ - SF(OTG0_OTG_3D_STRUCTURE_CONTROL, OTG_3D_STRUCTURE_STEREO_SEL_OVR, mask_sh),\ - SF(OTG0_OTG_V_TOTAL_MAX, OTG_V_TOTAL_MAX, mask_sh),\ - SF(OTG0_OTG_V_TOTAL_MIN, OTG_V_TOTAL_MIN, mask_sh),\ - SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_V_TOTAL_MIN_SEL, mask_sh),\ - SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_V_TOTAL_MAX_SEL, mask_sh),\ - SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_FORCE_LOCK_ON_EVENT, mask_sh),\ - SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_SET_V_TOTAL_MIN_MASK, mask_sh),\ - SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_VTOTAL_MID_REPLACING_MIN_EN, mask_sh),\ - SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_VTOTAL_MID_REPLACING_MAX_EN, mask_sh),\ - SF(OTG0_OTG_FORCE_COUNT_NOW_CNTL, OTG_FORCE_COUNT_NOW_CLEAR, mask_sh),\ - SF(OTG0_OTG_FORCE_COUNT_NOW_CNTL, OTG_FORCE_COUNT_NOW_MODE, mask_sh),\ - SF(OTG0_OTG_FORCE_COUNT_NOW_CNTL, OTG_FORCE_COUNT_NOW_OCCURRED, mask_sh),\ - SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_SOURCE_SELECT, mask_sh),\ - SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_SOURCE_PIPE_SELECT, mask_sh),\ - SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_RISING_EDGE_DETECT_CNTL, mask_sh),\ - SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_FALLING_EDGE_DETECT_CNTL, mask_sh),\ - SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_POLARITY_SELECT, mask_sh),\ - SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_FREQUENCY_SELECT, mask_sh),\ - SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_DELAY, mask_sh),\ - SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_CLEAR, mask_sh),\ - SF(OTG0_OTG_STATIC_SCREEN_CONTROL, OTG_STATIC_SCREEN_EVENT_MASK, mask_sh),\ - SF(OTG0_OTG_STATIC_SCREEN_CONTROL, OTG_STATIC_SCREEN_FRAME_COUNT, mask_sh),\ - SF(OTG0_OTG_STATUS_FRAME_COUNT, OTG_FRAME_COUNT, mask_sh),\ - SF(OTG0_OTG_STATUS, OTG_V_BLANK, mask_sh),\ - SF(OTG0_OTG_STATUS, OTG_V_ACTIVE_DISP, mask_sh),\ - SF(OTG0_OTG_STATUS_POSITION, OTG_HORZ_COUNT, mask_sh),\ - SF(OTG0_OTG_STATUS_POSITION, OTG_VERT_COUNT, mask_sh),\ - SF(OTG0_OTG_NOM_VERT_POSITION, OTG_VERT_COUNT_NOM, mask_sh),\ - SF(OTG0_OTG_M_CONST_DTO0, OTG_M_CONST_DTO_PHASE, mask_sh),\ - SF(OTG0_OTG_M_CONST_DTO1, OTG_M_CONST_DTO_MODULO, mask_sh),\ - SF(OTG0_OTG_CLOCK_CONTROL, OTG_BUSY, mask_sh),\ - SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_EN, mask_sh),\ - SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_ON, mask_sh),\ - SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_GATE_DIS, mask_sh),\ - SF(OTG0_OTG_VERTICAL_INTERRUPT0_CONTROL, OTG_VERTICAL_INTERRUPT0_INT_ENABLE, mask_sh),\ - SF(OTG0_OTG_VERTICAL_INTERRUPT0_POSITION, OTG_VERTICAL_INTERRUPT0_LINE_START, mask_sh),\ - SF(OTG0_OTG_VERTICAL_INTERRUPT0_POSITION, OTG_VERTICAL_INTERRUPT0_LINE_END, mask_sh),\ - SF(OTG0_OTG_VERTICAL_INTERRUPT1_CONTROL, OTG_VERTICAL_INTERRUPT1_INT_ENABLE, mask_sh),\ - SF(OTG0_OTG_VERTICAL_INTERRUPT1_POSITION, OTG_VERTICAL_INTERRUPT1_LINE_START, mask_sh),\ - SF(OTG0_OTG_VERTICAL_INTERRUPT2_CONTROL, OTG_VERTICAL_INTERRUPT2_INT_ENABLE, mask_sh),\ - SF(OTG0_OTG_VERTICAL_INTERRUPT2_POSITION, OTG_VERTICAL_INTERRUPT2_LINE_START, mask_sh),\ - SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_EN, mask_sh),\ - SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_ON, mask_sh),\ - SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_GATE_DIS, mask_sh),\ - SF(ODM0_OPTC_INPUT_GLOBAL_CONTROL, OPTC_UNDERFLOW_OCCURRED_STATUS, mask_sh),\ - SF(ODM0_OPTC_INPUT_GLOBAL_CONTROL, OPTC_UNDERFLOW_CLEAR, mask_sh),\ - SF(VTG0_CONTROL, VTG0_ENABLE, mask_sh),\ - SF(VTG0_CONTROL, VTG0_FP2, mask_sh),\ - SF(VTG0_CONTROL, VTG0_VCOUNT_INIT, mask_sh),\ - SF(OTG0_OTG_VERT_SYNC_CONTROL, OTG_FORCE_VSYNC_NEXT_LINE_OCCURRED, mask_sh),\ - SF(OTG0_OTG_VERT_SYNC_CONTROL, OTG_FORCE_VSYNC_NEXT_LINE_CLEAR, mask_sh),\ - SF(OTG0_OTG_VERT_SYNC_CONTROL, OTG_AUTO_FORCE_VSYNC_MODE, mask_sh),\ - SF(OTG0_OTG_GSL_CONTROL, OTG_GSL0_EN, mask_sh),\ - SF(OTG0_OTG_GSL_CONTROL, OTG_GSL1_EN, mask_sh),\ - SF(OTG0_OTG_GSL_CONTROL, OTG_GSL2_EN, mask_sh),\ - SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_MASTER_EN, mask_sh),\ - SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_FORCE_DELAY, mask_sh),\ - SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_CHECK_ALL_FIELDS, mask_sh),\ - SF(OTG0_OTG_CRC_CNTL, OTG_CRC_CONT_EN, mask_sh),\ - SF(OTG0_OTG_CRC_CNTL, OTG_CRC0_SELECT, mask_sh),\ - SF(OTG0_OTG_CRC_CNTL, OTG_CRC_EN, mask_sh),\ - SF(OTG0_OTG_CRC0_DATA_RG, CRC0_R_CR, mask_sh),\ - SF(OTG0_OTG_CRC0_DATA_RG, CRC0_G_Y, mask_sh),\ - SF(OTG0_OTG_CRC0_DATA_B, CRC0_B_CB, mask_sh),\ - SF(OTG0_OTG_CRC0_WINDOWA_X_CONTROL, OTG_CRC0_WINDOWA_X_START, mask_sh),\ - SF(OTG0_OTG_CRC0_WINDOWA_X_CONTROL, OTG_CRC0_WINDOWA_X_END, mask_sh),\ - SF(OTG0_OTG_CRC0_WINDOWA_Y_CONTROL, OTG_CRC0_WINDOWA_Y_START, mask_sh),\ - SF(OTG0_OTG_CRC0_WINDOWA_Y_CONTROL, OTG_CRC0_WINDOWA_Y_END, mask_sh),\ - SF(OTG0_OTG_CRC0_WINDOWB_X_CONTROL, OTG_CRC0_WINDOWB_X_START, mask_sh),\ - SF(OTG0_OTG_CRC0_WINDOWB_X_CONTROL, OTG_CRC0_WINDOWB_X_END, mask_sh),\ - SF(OTG0_OTG_CRC0_WINDOWB_Y_CONTROL, OTG_CRC0_WINDOWB_Y_START, mask_sh),\ - SF(OTG0_OTG_CRC0_WINDOWB_Y_CONTROL, OTG_CRC0_WINDOWB_Y_END, mask_sh),\ - SF(OTG0_OTG_TRIGA_MANUAL_TRIG, OTG_TRIGA_MANUAL_TRIG, mask_sh),\ - SF(GSL_SOURCE_SELECT, GSL0_READY_SOURCE_SEL, mask_sh),\ - SF(GSL_SOURCE_SELECT, GSL1_READY_SOURCE_SEL, mask_sh),\ - SF(GSL_SOURCE_SELECT, GSL2_READY_SOURCE_SEL, mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL2, MANUAL_FLOW_CONTROL_SEL, mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL2, GLOBAL_UPDATE_LOCK_EN, mask_sh),\ - SF(OTG0_OTG_GSL_WINDOW_X, OTG_GSL_WINDOW_START_X, mask_sh),\ - SF(OTG0_OTG_GSL_WINDOW_X, OTG_GSL_WINDOW_END_X, mask_sh), \ - SF(OTG0_OTG_GSL_WINDOW_Y, OTG_GSL_WINDOW_START_Y, mask_sh),\ - SF(OTG0_OTG_GSL_WINDOW_Y, OTG_GSL_WINDOW_END_Y, mask_sh),\ - SF(OTG0_OTG_VUPDATE_KEEPOUT, OTG_MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_EN, mask_sh), \ - SF(OTG0_OTG_VUPDATE_KEEPOUT, MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_START_OFFSET, mask_sh), \ - SF(OTG0_OTG_VUPDATE_KEEPOUT, MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_END_OFFSET, mask_sh), \ - SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_MASTER_MODE, mask_sh), \ - SF(OTG0_OTG_GSL_CONTROL, OTG_MASTER_UPDATE_LOCK_GSL_EN, mask_sh), \ - SF(OTG0_OTG_DSC_START_POSITION, OTG_DSC_START_POSITION_X, mask_sh), \ - SF(OTG0_OTG_DSC_START_POSITION, OTG_DSC_START_POSITION_LINE_NUM, mask_sh),\ - SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG0_SRC_SEL, mask_sh),\ - SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG1_SRC_SEL, mask_sh),\ - SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG2_SRC_SEL, mask_sh),\ - SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG3_SRC_SEL, mask_sh),\ - SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_NUM_OF_INPUT_SEGMENT, mask_sh),\ - SF(ODM0_OPTC_MEMORY_CONFIG, OPTC_MEM_SEL, mask_sh),\ - SF(ODM0_OPTC_DATA_FORMAT_CONTROL, OPTC_DATA_FORMAT, mask_sh),\ - SF(ODM0_OPTC_DATA_FORMAT_CONTROL, OPTC_DSC_MODE, mask_sh),\ - SF(ODM0_OPTC_BYTES_PER_PIXEL, OPTC_DSC_BYTES_PER_PIXEL, mask_sh),\ - SF(ODM0_OPTC_WIDTH_CONTROL, OPTC_DSC_SLICE_WIDTH, mask_sh),\ - SF(ODM0_OPTC_WIDTH_CONTROL, OPTC_SEGMENT_WIDTH, mask_sh),\ - SF(DWB_SOURCE_SELECT, OPTC_DWB0_SOURCE_SELECT, mask_sh),\ - SF(DWB_SOURCE_SELECT, OPTC_DWB1_SOURCE_SELECT, mask_sh),\ - SF(OTG0_OTG_DRR_TRIGGER_WINDOW, OTG_DRR_TRIGGER_WINDOW_START_X, mask_sh),\ - SF(OTG0_OTG_DRR_TRIGGER_WINDOW, OTG_DRR_TRIGGER_WINDOW_END_X, mask_sh),\ - SF(OTG0_OTG_DRR_V_TOTAL_CHANGE, OTG_DRR_V_TOTAL_CHANGE_LIMIT, mask_sh),\ - SF(OTG0_OTG_H_TIMING_CNTL, OTG_H_TIMING_DIV_MODE, mask_sh),\ - SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_DRR_TIMING_DBUF_UPDATE_MODE, mask_sh),\ - SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DSC_MODE, mask_sh),\ - SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DATA_STREAM_COMBINE_MODE, mask_sh),\ - SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DATA_STREAM_SPLIT_MODE, mask_sh),\ - SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DATA_FORMAT, mask_sh),\ - SF(OTG0_OTG_DRR_CONTROL, OTG_V_TOTAL_LAST_USED_BY_DRR, mask_sh) - -void dcn31_timing_generator_init(struct optc *optc1); - -bool optc31_immediate_disable_crtc(struct timing_generator *optc); - -void optc31_set_drr(struct timing_generator *optc, const struct drr_params *params); - -void optc3_init_odm(struct timing_generator *optc); - -#endif /* __DC_OPTC_DCN31_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/Makefile b/drivers/gpu/drm/amd/display/dc/dcn314/Makefile index 6ea47e00d62d..d5c177346a3b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn314/Makefile @@ -11,7 +11,7 @@ # Makefile for dcn314. DCN314 = dcn314_init.o \ - dcn314_dio_stream_encoder.o dcn314_dccg.o dcn314_optc.o + dcn314_dio_stream_encoder.o dcn314_dccg.o AMD_DAL_DCN314 = $(addprefix $(AMDDALPATH)/dc/dcn314/,$(DCN314)) diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c deleted file mode 100644 index 0086cafb0f7a..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c +++ /dev/null @@ -1,273 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright 2022 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dcn314_optc.h" - -#include "dcn30/dcn30_optc.h" -#include "dcn31/dcn31_optc.h" -#include "reg_helper.h" -#include "dc.h" -#include "dcn_calc_math.h" - -#define REG(reg)\ - optc1->tg_regs->reg - -#define CTX \ - optc1->base.ctx - -#undef FN -#define FN(reg_name, field_name) \ - optc1->tg_shift->field_name, optc1->tg_mask->field_name - -/* - * Enable CRTC - * Enable CRTC - call ASIC Control Object to enable Timing generator. - */ - -static void optc314_set_odm_combine(struct timing_generator *optc, int *opp_id, int opp_cnt, - struct dc_crtc_timing *timing) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - uint32_t memory_mask = 0; - int h_active = timing->h_addressable + timing->h_border_left + timing->h_border_right; - int mpcc_hactive = h_active / opp_cnt; - /* Each memory instance is 2048x(314x2) bits to support half line of 4096 */ - int odm_mem_count = (h_active + 2047) / 2048; - - /* - * display <= 4k : 2 memories + 2 pipes - * 4k < display <= 8k : 4 memories + 2 pipes - * 8k < display <= 12k : 6 memories + 4 pipes - */ - if (opp_cnt == 4) { - if (odm_mem_count <= 2) - memory_mask = 0x3; - else if (odm_mem_count <= 4) - memory_mask = 0xf; - else - memory_mask = 0x3f; - } else { - if (odm_mem_count <= 2) - memory_mask = 0x1 << (opp_id[0] * 2) | 0x1 << (opp_id[1] * 2); - else if (odm_mem_count <= 4) - memory_mask = 0x3 << (opp_id[0] * 2) | 0x3 << (opp_id[1] * 2); - else - memory_mask = 0x77; - } - - REG_SET(OPTC_MEMORY_CONFIG, 0, - OPTC_MEM_SEL, memory_mask); - - if (opp_cnt == 2) { - REG_SET_3(OPTC_DATA_SOURCE_SELECT, 0, - OPTC_NUM_OF_INPUT_SEGMENT, 1, - OPTC_SEG0_SRC_SEL, opp_id[0], - OPTC_SEG1_SRC_SEL, opp_id[1]); - } else if (opp_cnt == 4) { - REG_SET_5(OPTC_DATA_SOURCE_SELECT, 0, - OPTC_NUM_OF_INPUT_SEGMENT, 3, - OPTC_SEG0_SRC_SEL, opp_id[0], - OPTC_SEG1_SRC_SEL, opp_id[1], - OPTC_SEG2_SRC_SEL, opp_id[2], - OPTC_SEG3_SRC_SEL, opp_id[3]); - } - - REG_UPDATE(OPTC_WIDTH_CONTROL, - OPTC_SEGMENT_WIDTH, mpcc_hactive); - - REG_UPDATE(OTG_H_TIMING_CNTL, - OTG_H_TIMING_DIV_MODE, opp_cnt - 1); - optc1->opp_count = opp_cnt; -} - -static bool optc314_enable_crtc(struct timing_generator *optc) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - /* opp instance for OTG, 1 to 1 mapping and odm will adjust */ - REG_UPDATE(OPTC_DATA_SOURCE_SELECT, - OPTC_SEG0_SRC_SEL, optc->inst); - - /* VTG enable first is for HW workaround */ - REG_UPDATE(CONTROL, - VTG0_ENABLE, 1); - - REG_SEQ_START(); - - /* Enable CRTC */ - REG_UPDATE_2(OTG_CONTROL, - OTG_DISABLE_POINT_CNTL, 2, - OTG_MASTER_EN, 1); - - REG_SEQ_SUBMIT(); - REG_SEQ_WAIT_DONE(); - - return true; -} - -/* disable_crtc */ -static bool optc314_disable_crtc(struct timing_generator *optc) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - /* disable otg request until end of the first line - * in the vertical blank region - */ - REG_UPDATE(OTG_CONTROL, - OTG_MASTER_EN, 0); - - REG_UPDATE(CONTROL, - VTG0_ENABLE, 0); - - /* CRTC disabled, so disable clock. */ - REG_WAIT(OTG_CLOCK_CONTROL, - OTG_BUSY, 0, - 1, 100000); - - return true; -} - -static void optc314_phantom_crtc_post_enable(struct timing_generator *optc) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - /* Disable immediately. */ - REG_UPDATE_2(OTG_CONTROL, OTG_DISABLE_POINT_CNTL, 0, OTG_MASTER_EN, 0); - - /* CRTC disabled, so disable clock. */ - REG_WAIT(OTG_CLOCK_CONTROL, OTG_BUSY, 0, 1, 100000); -} - -static void optc314_set_odm_bypass(struct timing_generator *optc, - const struct dc_crtc_timing *dc_crtc_timing) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - enum h_timing_div_mode h_div = H_TIMING_NO_DIV; - - REG_SET_5(OPTC_DATA_SOURCE_SELECT, 0, - OPTC_NUM_OF_INPUT_SEGMENT, 0, - OPTC_SEG0_SRC_SEL, optc->inst, - OPTC_SEG1_SRC_SEL, 0xf, - OPTC_SEG2_SRC_SEL, 0xf, - OPTC_SEG3_SRC_SEL, 0xf - ); - - h_div = optc1_is_two_pixels_per_containter(dc_crtc_timing); - REG_UPDATE(OTG_H_TIMING_CNTL, - OTG_H_TIMING_DIV_MODE, h_div); - - REG_SET(OPTC_MEMORY_CONFIG, 0, - OPTC_MEM_SEL, 0); - optc1->opp_count = 1; -} - -static void optc314_set_h_timing_div_manual_mode(struct timing_generator *optc, bool manual_mode) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_UPDATE(OTG_H_TIMING_CNTL, - OTG_H_TIMING_DIV_MODE_MANUAL, manual_mode ? 1 : 0); -} - - -static struct timing_generator_funcs dcn314_tg_funcs = { - .validate_timing = optc1_validate_timing, - .program_timing = optc1_program_timing, - .setup_vertical_interrupt0 = optc1_setup_vertical_interrupt0, - .setup_vertical_interrupt1 = optc1_setup_vertical_interrupt1, - .setup_vertical_interrupt2 = optc1_setup_vertical_interrupt2, - .program_global_sync = optc1_program_global_sync, - .enable_crtc = optc314_enable_crtc, - .disable_crtc = optc314_disable_crtc, - .immediate_disable_crtc = optc31_immediate_disable_crtc, - .phantom_crtc_post_enable = optc314_phantom_crtc_post_enable, - /* used by enable_timing_synchronization. Not need for FPGA */ - .is_counter_moving = optc1_is_counter_moving, - .get_position = optc1_get_position, - .get_frame_count = optc1_get_vblank_counter, - .get_scanoutpos = optc1_get_crtc_scanoutpos, - .get_otg_active_size = optc1_get_otg_active_size, - .set_early_control = optc1_set_early_control, - /* used by enable_timing_synchronization. Not need for FPGA */ - .wait_for_state = optc1_wait_for_state, - .set_blank_color = optc3_program_blank_color, - .did_triggered_reset_occur = optc1_did_triggered_reset_occur, - .triplebuffer_lock = optc3_triplebuffer_lock, - .triplebuffer_unlock = optc2_triplebuffer_unlock, - .enable_reset_trigger = optc1_enable_reset_trigger, - .enable_crtc_reset = optc1_enable_crtc_reset, - .disable_reset_trigger = optc1_disable_reset_trigger, - .lock = optc3_lock, - .unlock = optc1_unlock, - .lock_doublebuffer_enable = optc3_lock_doublebuffer_enable, - .lock_doublebuffer_disable = optc3_lock_doublebuffer_disable, - .enable_optc_clock = optc1_enable_optc_clock, - .set_drr = optc31_set_drr, - .get_last_used_drr_vtotal = optc2_get_last_used_drr_vtotal, - .set_vtotal_min_max = optc1_set_vtotal_min_max, - .set_static_screen_control = optc1_set_static_screen_control, - .program_stereo = optc1_program_stereo, - .is_stereo_left_eye = optc1_is_stereo_left_eye, - .tg_init = optc3_tg_init, - .is_tg_enabled = optc1_is_tg_enabled, - .is_optc_underflow_occurred = optc1_is_optc_underflow_occurred, - .clear_optc_underflow = optc1_clear_optc_underflow, - .setup_global_swap_lock = NULL, - .get_crc = optc1_get_crc, - .configure_crc = optc1_configure_crc, - .set_dsc_config = optc3_set_dsc_config, - .get_dsc_status = optc2_get_dsc_status, - .set_dwb_source = NULL, - .get_optc_source = optc2_get_optc_source, - .set_out_mux = optc3_set_out_mux, - .set_drr_trigger_window = optc3_set_drr_trigger_window, - .set_vtotal_change_limit = optc3_set_vtotal_change_limit, - .set_gsl = optc2_set_gsl, - .set_gsl_source_select = optc2_set_gsl_source_select, - .set_vtg_params = optc1_set_vtg_params, - .program_manual_trigger = optc2_program_manual_trigger, - .setup_manual_trigger = optc2_setup_manual_trigger, - .get_hw_timing = optc1_get_hw_timing, - .init_odm = optc3_init_odm, - .set_odm_bypass = optc314_set_odm_bypass, - .set_odm_combine = optc314_set_odm_combine, - .set_h_timing_div_manual_mode = optc314_set_h_timing_div_manual_mode, -}; - -void dcn314_timing_generator_init(struct optc *optc1) -{ - optc1->base.funcs = &dcn314_tg_funcs; - - optc1->max_h_total = optc1->tg_mask->OTG_H_TOTAL + 1; - optc1->max_v_total = optc1->tg_mask->OTG_V_TOTAL + 1; - - optc1->min_h_blank = 32; - optc1->min_v_blank = 3; - optc1->min_v_blank_interlace = 5; - optc1->min_h_sync_width = 4; - optc1->min_v_sync_width = 1; -} - diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.h b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.h deleted file mode 100644 index 99c098e76116..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.h +++ /dev/null @@ -1,255 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright 2022 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DC_OPTC_DCN314_H__ -#define __DC_OPTC_DCN314_H__ - -#include "dcn10/dcn10_optc.h" - -#define OPTC_COMMON_REG_LIST_DCN3_14(inst) \ - SRI(OTG_VSTARTUP_PARAM, OTG, inst),\ - SRI(OTG_VUPDATE_PARAM, OTG, inst),\ - SRI(OTG_VREADY_PARAM, OTG, inst),\ - SRI(OTG_MASTER_UPDATE_LOCK, OTG, inst),\ - SRI(OTG_GLOBAL_CONTROL0, OTG, inst),\ - SRI(OTG_GLOBAL_CONTROL1, OTG, inst),\ - SRI(OTG_GLOBAL_CONTROL2, OTG, inst),\ - SRI(OTG_GLOBAL_CONTROL4, OTG, inst),\ - SRI(OTG_DOUBLE_BUFFER_CONTROL, OTG, inst),\ - SRI(OTG_H_TOTAL, OTG, inst),\ - SRI(OTG_H_BLANK_START_END, OTG, inst),\ - SRI(OTG_H_SYNC_A, OTG, inst),\ - SRI(OTG_H_SYNC_A_CNTL, OTG, inst),\ - SRI(OTG_H_TIMING_CNTL, OTG, inst),\ - SRI(OTG_V_TOTAL, OTG, inst),\ - SRI(OTG_V_BLANK_START_END, OTG, inst),\ - SRI(OTG_V_SYNC_A, OTG, inst),\ - SRI(OTG_V_SYNC_A_CNTL, OTG, inst),\ - SRI(OTG_CONTROL, OTG, inst),\ - SRI(OTG_STEREO_CONTROL, OTG, inst),\ - SRI(OTG_3D_STRUCTURE_CONTROL, OTG, inst),\ - SRI(OTG_STEREO_STATUS, OTG, inst),\ - SRI(OTG_V_TOTAL_MAX, OTG, inst),\ - SRI(OTG_V_TOTAL_MIN, OTG, inst),\ - SRI(OTG_V_TOTAL_CONTROL, OTG, inst),\ - SRI(OTG_TRIGA_CNTL, OTG, inst),\ - SRI(OTG_FORCE_COUNT_NOW_CNTL, OTG, inst),\ - SRI(OTG_STATIC_SCREEN_CONTROL, OTG, inst),\ - SRI(OTG_STATUS_FRAME_COUNT, OTG, inst),\ - SRI(OTG_STATUS, OTG, inst),\ - SRI(OTG_STATUS_POSITION, OTG, inst),\ - SRI(OTG_NOM_VERT_POSITION, OTG, inst),\ - SRI(OTG_M_CONST_DTO0, OTG, inst),\ - SRI(OTG_M_CONST_DTO1, OTG, inst),\ - SRI(OTG_CLOCK_CONTROL, OTG, inst),\ - SRI(OTG_VERTICAL_INTERRUPT0_CONTROL, OTG, inst),\ - SRI(OTG_VERTICAL_INTERRUPT0_POSITION, OTG, inst),\ - SRI(OTG_VERTICAL_INTERRUPT1_CONTROL, OTG, inst),\ - SRI(OTG_VERTICAL_INTERRUPT1_POSITION, OTG, inst),\ - SRI(OTG_VERTICAL_INTERRUPT2_CONTROL, OTG, inst),\ - SRI(OTG_VERTICAL_INTERRUPT2_POSITION, OTG, inst),\ - SRI(OPTC_INPUT_CLOCK_CONTROL, ODM, inst),\ - SRI(OPTC_DATA_SOURCE_SELECT, ODM, inst),\ - SRI(OPTC_INPUT_GLOBAL_CONTROL, ODM, inst),\ - SRI(CONTROL, VTG, inst),\ - SRI(OTG_VERT_SYNC_CONTROL, OTG, inst),\ - SRI(OTG_GSL_CONTROL, OTG, inst),\ - SRI(OTG_CRC_CNTL, OTG, inst),\ - SRI(OTG_CRC0_DATA_RG, OTG, inst),\ - SRI(OTG_CRC0_DATA_B, OTG, inst),\ - SRI(OTG_CRC0_WINDOWA_X_CONTROL, OTG, inst),\ - SRI(OTG_CRC0_WINDOWA_Y_CONTROL, OTG, inst),\ - SRI(OTG_CRC0_WINDOWB_X_CONTROL, OTG, inst),\ - SRI(OTG_CRC0_WINDOWB_Y_CONTROL, OTG, inst),\ - SR(GSL_SOURCE_SELECT),\ - SRI(OTG_TRIGA_MANUAL_TRIG, OTG, inst),\ - SRI(OTG_GLOBAL_CONTROL1, OTG, inst),\ - SRI(OTG_GLOBAL_CONTROL2, OTG, inst),\ - SRI(OTG_GSL_WINDOW_X, OTG, inst),\ - SRI(OTG_GSL_WINDOW_Y, OTG, inst),\ - SRI(OTG_VUPDATE_KEEPOUT, OTG, inst),\ - SRI(OTG_DSC_START_POSITION, OTG, inst),\ - SRI(OTG_DRR_TRIGGER_WINDOW, OTG, inst),\ - SRI(OTG_DRR_V_TOTAL_CHANGE, OTG, inst),\ - SRI(OPTC_DATA_FORMAT_CONTROL, ODM, inst),\ - SRI(OPTC_BYTES_PER_PIXEL, ODM, inst),\ - SRI(OPTC_WIDTH_CONTROL, ODM, inst),\ - SRI(OPTC_MEMORY_CONFIG, ODM, inst),\ - SRI(OTG_DRR_CONTROL, OTG, inst) - -#define OPTC_COMMON_MASK_SH_LIST_DCN3_14(mask_sh)\ - SF(OTG0_OTG_VSTARTUP_PARAM, VSTARTUP_START, mask_sh),\ - SF(OTG0_OTG_VUPDATE_PARAM, VUPDATE_OFFSET, mask_sh),\ - SF(OTG0_OTG_VUPDATE_PARAM, VUPDATE_WIDTH, mask_sh),\ - SF(OTG0_OTG_VREADY_PARAM, VREADY_OFFSET, mask_sh),\ - SF(OTG0_OTG_MASTER_UPDATE_LOCK, OTG_MASTER_UPDATE_LOCK, mask_sh),\ - SF(OTG0_OTG_MASTER_UPDATE_LOCK, UPDATE_LOCK_STATUS, mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL0, MASTER_UPDATE_LOCK_DB_START_X, mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL0, MASTER_UPDATE_LOCK_DB_END_X, mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL0, MASTER_UPDATE_LOCK_DB_EN, mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL1, MASTER_UPDATE_LOCK_DB_START_Y, mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL1, MASTER_UPDATE_LOCK_DB_END_Y, mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL2, OTG_MASTER_UPDATE_LOCK_SEL, mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL4, DIG_UPDATE_POSITION_X, mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL4, DIG_UPDATE_POSITION_Y, mask_sh),\ - SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_UPDATE_PENDING, mask_sh),\ - SF(OTG0_OTG_H_TOTAL, OTG_H_TOTAL, mask_sh),\ - SF(OTG0_OTG_H_BLANK_START_END, OTG_H_BLANK_START, mask_sh),\ - SF(OTG0_OTG_H_BLANK_START_END, OTG_H_BLANK_END, mask_sh),\ - SF(OTG0_OTG_H_SYNC_A, OTG_H_SYNC_A_START, mask_sh),\ - SF(OTG0_OTG_H_SYNC_A, OTG_H_SYNC_A_END, mask_sh),\ - SF(OTG0_OTG_H_SYNC_A_CNTL, OTG_H_SYNC_A_POL, mask_sh),\ - SF(OTG0_OTG_V_TOTAL, OTG_V_TOTAL, mask_sh),\ - SF(OTG0_OTG_V_BLANK_START_END, OTG_V_BLANK_START, mask_sh),\ - SF(OTG0_OTG_V_BLANK_START_END, OTG_V_BLANK_END, mask_sh),\ - SF(OTG0_OTG_V_SYNC_A, OTG_V_SYNC_A_START, mask_sh),\ - SF(OTG0_OTG_V_SYNC_A, OTG_V_SYNC_A_END, mask_sh),\ - SF(OTG0_OTG_V_SYNC_A_CNTL, OTG_V_SYNC_A_POL, mask_sh),\ - SF(OTG0_OTG_V_SYNC_A_CNTL, OTG_V_SYNC_MODE, mask_sh),\ - SF(OTG0_OTG_CONTROL, OTG_MASTER_EN, mask_sh),\ - SF(OTG0_OTG_CONTROL, OTG_START_POINT_CNTL, mask_sh),\ - SF(OTG0_OTG_CONTROL, OTG_DISABLE_POINT_CNTL, mask_sh),\ - SF(OTG0_OTG_CONTROL, OTG_FIELD_NUMBER_CNTL, mask_sh),\ - SF(OTG0_OTG_CONTROL, OTG_OUT_MUX, mask_sh),\ - SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_EN, mask_sh),\ - SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_SYNC_OUTPUT_LINE_NUM, mask_sh),\ - SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_SYNC_OUTPUT_POLARITY, mask_sh),\ - SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_EYE_FLAG_POLARITY, mask_sh),\ - SF(OTG0_OTG_STEREO_CONTROL, OTG_DISABLE_STEREOSYNC_OUTPUT_FOR_DP, mask_sh),\ - SF(OTG0_OTG_STEREO_STATUS, OTG_STEREO_CURRENT_EYE, mask_sh),\ - SF(OTG0_OTG_3D_STRUCTURE_CONTROL, OTG_3D_STRUCTURE_EN, mask_sh),\ - SF(OTG0_OTG_3D_STRUCTURE_CONTROL, OTG_3D_STRUCTURE_V_UPDATE_MODE, mask_sh),\ - SF(OTG0_OTG_3D_STRUCTURE_CONTROL, OTG_3D_STRUCTURE_STEREO_SEL_OVR, mask_sh),\ - SF(OTG0_OTG_V_TOTAL_MAX, OTG_V_TOTAL_MAX, mask_sh),\ - SF(OTG0_OTG_V_TOTAL_MIN, OTG_V_TOTAL_MIN, mask_sh),\ - SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_V_TOTAL_MIN_SEL, mask_sh),\ - SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_V_TOTAL_MAX_SEL, mask_sh),\ - SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_FORCE_LOCK_ON_EVENT, mask_sh),\ - SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_SET_V_TOTAL_MIN_MASK, mask_sh),\ - SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_VTOTAL_MID_REPLACING_MIN_EN, mask_sh),\ - SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_VTOTAL_MID_REPLACING_MAX_EN, mask_sh),\ - SF(OTG0_OTG_FORCE_COUNT_NOW_CNTL, OTG_FORCE_COUNT_NOW_CLEAR, mask_sh),\ - SF(OTG0_OTG_FORCE_COUNT_NOW_CNTL, OTG_FORCE_COUNT_NOW_MODE, mask_sh),\ - SF(OTG0_OTG_FORCE_COUNT_NOW_CNTL, OTG_FORCE_COUNT_NOW_OCCURRED, mask_sh),\ - SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_SOURCE_SELECT, mask_sh),\ - SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_SOURCE_PIPE_SELECT, mask_sh),\ - SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_RISING_EDGE_DETECT_CNTL, mask_sh),\ - SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_FALLING_EDGE_DETECT_CNTL, mask_sh),\ - SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_POLARITY_SELECT, mask_sh),\ - SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_FREQUENCY_SELECT, mask_sh),\ - SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_DELAY, mask_sh),\ - SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_CLEAR, mask_sh),\ - SF(OTG0_OTG_STATIC_SCREEN_CONTROL, OTG_STATIC_SCREEN_EVENT_MASK, mask_sh),\ - SF(OTG0_OTG_STATIC_SCREEN_CONTROL, OTG_STATIC_SCREEN_FRAME_COUNT, mask_sh),\ - SF(OTG0_OTG_STATUS_FRAME_COUNT, OTG_FRAME_COUNT, mask_sh),\ - SF(OTG0_OTG_STATUS, OTG_V_BLANK, mask_sh),\ - SF(OTG0_OTG_STATUS, OTG_V_ACTIVE_DISP, mask_sh),\ - SF(OTG0_OTG_STATUS_POSITION, OTG_HORZ_COUNT, mask_sh),\ - SF(OTG0_OTG_STATUS_POSITION, OTG_VERT_COUNT, mask_sh),\ - SF(OTG0_OTG_NOM_VERT_POSITION, OTG_VERT_COUNT_NOM, mask_sh),\ - SF(OTG0_OTG_M_CONST_DTO0, OTG_M_CONST_DTO_PHASE, mask_sh),\ - SF(OTG0_OTG_M_CONST_DTO1, OTG_M_CONST_DTO_MODULO, mask_sh),\ - SF(OTG0_OTG_CLOCK_CONTROL, OTG_BUSY, mask_sh),\ - SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_EN, mask_sh),\ - SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_ON, mask_sh),\ - SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_GATE_DIS, mask_sh),\ - SF(OTG0_OTG_VERTICAL_INTERRUPT0_CONTROL, OTG_VERTICAL_INTERRUPT0_INT_ENABLE, mask_sh),\ - SF(OTG0_OTG_VERTICAL_INTERRUPT0_POSITION, OTG_VERTICAL_INTERRUPT0_LINE_START, mask_sh),\ - SF(OTG0_OTG_VERTICAL_INTERRUPT0_POSITION, OTG_VERTICAL_INTERRUPT0_LINE_END, mask_sh),\ - SF(OTG0_OTG_VERTICAL_INTERRUPT1_CONTROL, OTG_VERTICAL_INTERRUPT1_INT_ENABLE, mask_sh),\ - SF(OTG0_OTG_VERTICAL_INTERRUPT1_POSITION, OTG_VERTICAL_INTERRUPT1_LINE_START, mask_sh),\ - SF(OTG0_OTG_VERTICAL_INTERRUPT2_CONTROL, OTG_VERTICAL_INTERRUPT2_INT_ENABLE, mask_sh),\ - SF(OTG0_OTG_VERTICAL_INTERRUPT2_POSITION, OTG_VERTICAL_INTERRUPT2_LINE_START, mask_sh),\ - SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_EN, mask_sh),\ - SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_ON, mask_sh),\ - SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_GATE_DIS, mask_sh),\ - SF(ODM0_OPTC_INPUT_GLOBAL_CONTROL, OPTC_UNDERFLOW_OCCURRED_STATUS, mask_sh),\ - SF(ODM0_OPTC_INPUT_GLOBAL_CONTROL, OPTC_UNDERFLOW_CLEAR, mask_sh),\ - SF(VTG0_CONTROL, VTG0_ENABLE, mask_sh),\ - SF(VTG0_CONTROL, VTG0_FP2, mask_sh),\ - SF(VTG0_CONTROL, VTG0_VCOUNT_INIT, mask_sh),\ - SF(OTG0_OTG_VERT_SYNC_CONTROL, OTG_FORCE_VSYNC_NEXT_LINE_OCCURRED, mask_sh),\ - SF(OTG0_OTG_VERT_SYNC_CONTROL, OTG_FORCE_VSYNC_NEXT_LINE_CLEAR, mask_sh),\ - SF(OTG0_OTG_VERT_SYNC_CONTROL, OTG_AUTO_FORCE_VSYNC_MODE, mask_sh),\ - SF(OTG0_OTG_GSL_CONTROL, OTG_GSL0_EN, mask_sh),\ - SF(OTG0_OTG_GSL_CONTROL, OTG_GSL1_EN, mask_sh),\ - SF(OTG0_OTG_GSL_CONTROL, OTG_GSL2_EN, mask_sh),\ - SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_MASTER_EN, mask_sh),\ - SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_FORCE_DELAY, mask_sh),\ - SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_CHECK_ALL_FIELDS, mask_sh),\ - SF(OTG0_OTG_CRC_CNTL, OTG_CRC_CONT_EN, mask_sh),\ - SF(OTG0_OTG_CRC_CNTL, OTG_CRC0_SELECT, mask_sh),\ - SF(OTG0_OTG_CRC_CNTL, OTG_CRC_EN, mask_sh),\ - SF(OTG0_OTG_CRC0_DATA_RG, CRC0_R_CR, mask_sh),\ - SF(OTG0_OTG_CRC0_DATA_RG, CRC0_G_Y, mask_sh),\ - SF(OTG0_OTG_CRC0_DATA_B, CRC0_B_CB, mask_sh),\ - SF(OTG0_OTG_CRC0_WINDOWA_X_CONTROL, OTG_CRC0_WINDOWA_X_START, mask_sh),\ - SF(OTG0_OTG_CRC0_WINDOWA_X_CONTROL, OTG_CRC0_WINDOWA_X_END, mask_sh),\ - SF(OTG0_OTG_CRC0_WINDOWA_Y_CONTROL, OTG_CRC0_WINDOWA_Y_START, mask_sh),\ - SF(OTG0_OTG_CRC0_WINDOWA_Y_CONTROL, OTG_CRC0_WINDOWA_Y_END, mask_sh),\ - SF(OTG0_OTG_CRC0_WINDOWB_X_CONTROL, OTG_CRC0_WINDOWB_X_START, mask_sh),\ - SF(OTG0_OTG_CRC0_WINDOWB_X_CONTROL, OTG_CRC0_WINDOWB_X_END, mask_sh),\ - SF(OTG0_OTG_CRC0_WINDOWB_Y_CONTROL, OTG_CRC0_WINDOWB_Y_START, mask_sh),\ - SF(OTG0_OTG_CRC0_WINDOWB_Y_CONTROL, OTG_CRC0_WINDOWB_Y_END, mask_sh),\ - SF(OTG0_OTG_TRIGA_MANUAL_TRIG, OTG_TRIGA_MANUAL_TRIG, mask_sh),\ - SF(GSL_SOURCE_SELECT, GSL0_READY_SOURCE_SEL, mask_sh),\ - SF(GSL_SOURCE_SELECT, GSL1_READY_SOURCE_SEL, mask_sh),\ - SF(GSL_SOURCE_SELECT, GSL2_READY_SOURCE_SEL, mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL2, MANUAL_FLOW_CONTROL_SEL, mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL2, GLOBAL_UPDATE_LOCK_EN, mask_sh),\ - SF(OTG0_OTG_GSL_WINDOW_X, OTG_GSL_WINDOW_START_X, mask_sh),\ - SF(OTG0_OTG_GSL_WINDOW_X, OTG_GSL_WINDOW_END_X, mask_sh), \ - SF(OTG0_OTG_GSL_WINDOW_Y, OTG_GSL_WINDOW_START_Y, mask_sh),\ - SF(OTG0_OTG_GSL_WINDOW_Y, OTG_GSL_WINDOW_END_Y, mask_sh),\ - SF(OTG0_OTG_VUPDATE_KEEPOUT, OTG_MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_EN, mask_sh), \ - SF(OTG0_OTG_VUPDATE_KEEPOUT, MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_START_OFFSET, mask_sh), \ - SF(OTG0_OTG_VUPDATE_KEEPOUT, MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_END_OFFSET, mask_sh), \ - SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_MASTER_MODE, mask_sh), \ - SF(OTG0_OTG_GSL_CONTROL, OTG_MASTER_UPDATE_LOCK_GSL_EN, mask_sh), \ - SF(OTG0_OTG_DSC_START_POSITION, OTG_DSC_START_POSITION_X, mask_sh), \ - SF(OTG0_OTG_DSC_START_POSITION, OTG_DSC_START_POSITION_LINE_NUM, mask_sh),\ - SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG0_SRC_SEL, mask_sh),\ - SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG1_SRC_SEL, mask_sh),\ - SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG2_SRC_SEL, mask_sh),\ - SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG3_SRC_SEL, mask_sh),\ - SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_NUM_OF_INPUT_SEGMENT, mask_sh),\ - SF(ODM0_OPTC_MEMORY_CONFIG, OPTC_MEM_SEL, mask_sh),\ - SF(ODM0_OPTC_DATA_FORMAT_CONTROL, OPTC_DATA_FORMAT, mask_sh),\ - SF(ODM0_OPTC_DATA_FORMAT_CONTROL, OPTC_DSC_MODE, mask_sh),\ - SF(ODM0_OPTC_BYTES_PER_PIXEL, OPTC_DSC_BYTES_PER_PIXEL, mask_sh),\ - SF(ODM0_OPTC_WIDTH_CONTROL, OPTC_DSC_SLICE_WIDTH, mask_sh),\ - SF(ODM0_OPTC_WIDTH_CONTROL, OPTC_SEGMENT_WIDTH, mask_sh),\ - SF(OTG0_OTG_DRR_TRIGGER_WINDOW, OTG_DRR_TRIGGER_WINDOW_START_X, mask_sh),\ - SF(OTG0_OTG_DRR_TRIGGER_WINDOW, OTG_DRR_TRIGGER_WINDOW_END_X, mask_sh),\ - SF(OTG0_OTG_DRR_V_TOTAL_CHANGE, OTG_DRR_V_TOTAL_CHANGE_LIMIT, mask_sh),\ - SF(OTG0_OTG_H_TIMING_CNTL, OTG_H_TIMING_DIV_MODE, mask_sh),\ - SF(OTG0_OTG_H_TIMING_CNTL, OTG_H_TIMING_DIV_MODE_MANUAL, mask_sh),\ - SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_DRR_TIMING_DBUF_UPDATE_MODE, mask_sh),\ - SF(OTG0_OTG_DRR_CONTROL, OTG_V_TOTAL_LAST_USED_BY_DRR, mask_sh) - -void dcn314_timing_generator_init(struct optc *optc1); - -#endif /* __DC_OPTC_DCN314_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/Makefile b/drivers/gpu/drm/amd/display/dc/dcn32/Makefile index 3bb17dd01e4c..905b74b53092 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn32/Makefile @@ -11,9 +11,9 @@ # Makefile for dcn32. DCN32 = dcn32_hubbub.o dcn32_init.o dcn32_dccg.o \ - dcn32_dccg.o dcn32_optc.o dcn32_mmhubbub.o dcn32_hubp.o dcn32_dpp.o \ - dcn32_dio_stream_encoder.o dcn32_dio_link_encoder.o dcn32_hpo_dp_link_encoder.o \ - dcn32_resource_helpers.o dcn32_mpc.o + dcn32_mmhubbub.o dcn32_dpp.o dcn32_hubp.o dcn32_mpc.o \ + dcn32_dio_stream_encoder.o dcn32_dio_link_encoder.o dcn32_resource_helpers.o \ + dcn32_hpo_dp_link_encoder.o AMD_DAL_DCN32 = $(addprefix $(AMDDALPATH)/dc/dcn32/,$(DCN32)) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c deleted file mode 100644 index a2c4db2cebdd..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c +++ /dev/null @@ -1,357 +0,0 @@ -/* - * Copyright 2022 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dcn32_optc.h" - -#include "dcn30/dcn30_optc.h" -#include "dcn31/dcn31_optc.h" -#include "reg_helper.h" -#include "dc.h" -#include "dcn_calc_math.h" -#include "dc_dmub_srv.h" - -#define REG(reg)\ - optc1->tg_regs->reg - -#define CTX \ - optc1->base.ctx - -#undef FN -#define FN(reg_name, field_name) \ - optc1->tg_shift->field_name, optc1->tg_mask->field_name - -static void optc32_set_odm_combine(struct timing_generator *optc, int *opp_id, int opp_cnt, - struct dc_crtc_timing *timing) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - uint32_t memory_mask = 0; - int h_active = timing->h_addressable + timing->h_border_left + timing->h_border_right; - int mpcc_hactive = h_active / opp_cnt; - /* Each memory instance is 2048x(32x2) bits to support half line of 4096 */ - int odm_mem_count = (h_active + 2047) / 2048; - - /* - * display <= 4k : 2 memories + 2 pipes - * 4k < display <= 8k : 4 memories + 2 pipes - * 8k < display <= 12k : 6 memories + 4 pipes - */ - if (opp_cnt == 4) { - if (odm_mem_count <= 2) - memory_mask = 0x3; - else if (odm_mem_count <= 4) - memory_mask = 0xf; - else - memory_mask = 0x3f; - } else { - if (odm_mem_count <= 2) - memory_mask = 0x1 << (opp_id[0] * 2) | 0x1 << (opp_id[1] * 2); - else if (odm_mem_count <= 4) - memory_mask = 0x3 << (opp_id[0] * 2) | 0x3 << (opp_id[1] * 2); - else - memory_mask = 0x77; - } - - REG_SET(OPTC_MEMORY_CONFIG, 0, - OPTC_MEM_SEL, memory_mask); - - if (opp_cnt == 2) { - REG_SET_3(OPTC_DATA_SOURCE_SELECT, 0, - OPTC_NUM_OF_INPUT_SEGMENT, 1, - OPTC_SEG0_SRC_SEL, opp_id[0], - OPTC_SEG1_SRC_SEL, opp_id[1]); - } else if (opp_cnt == 4) { - REG_SET_5(OPTC_DATA_SOURCE_SELECT, 0, - OPTC_NUM_OF_INPUT_SEGMENT, 3, - OPTC_SEG0_SRC_SEL, opp_id[0], - OPTC_SEG1_SRC_SEL, opp_id[1], - OPTC_SEG2_SRC_SEL, opp_id[2], - OPTC_SEG3_SRC_SEL, opp_id[3]); - } - - REG_UPDATE(OPTC_WIDTH_CONTROL, - OPTC_SEGMENT_WIDTH, mpcc_hactive); - - REG_UPDATE(OTG_H_TIMING_CNTL, - OTG_H_TIMING_DIV_MODE, opp_cnt - 1); - optc1->opp_count = opp_cnt; -} - -void optc32_get_odm_combine_segments(struct timing_generator *tg, int *odm_combine_segments) -{ - struct optc *optc1 = DCN10TG_FROM_TG(tg); - int segments; - - REG_GET(OPTC_DATA_SOURCE_SELECT, OPTC_NUM_OF_INPUT_SEGMENT, &segments); - - switch (segments) { - case 0: - *odm_combine_segments = 1; - break; - case 1: - *odm_combine_segments = 2; - break; - case 3: - *odm_combine_segments = 4; - break; - /* 2 is reserved */ - case 2: - default: - *odm_combine_segments = -1; - } -} - -void optc32_set_h_timing_div_manual_mode(struct timing_generator *optc, bool manual_mode) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_UPDATE(OTG_H_TIMING_CNTL, - OTG_H_TIMING_DIV_MODE_MANUAL, manual_mode ? 1 : 0); -} -/** - * optc32_enable_crtc() - Enable CRTC - call ASIC Control Object to enable Timing generator. - * - * @optc: timing_generator instance. - * - * Return: If CRTC is enabled, return true. - */ -static bool optc32_enable_crtc(struct timing_generator *optc) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - /* opp instance for OTG, 1 to 1 mapping and odm will adjust */ - REG_UPDATE(OPTC_DATA_SOURCE_SELECT, - OPTC_SEG0_SRC_SEL, optc->inst); - - /* VTG enable first is for HW workaround */ - REG_UPDATE(CONTROL, - VTG0_ENABLE, 1); - - REG_SEQ_START(); - - /* Enable CRTC */ - REG_UPDATE_2(OTG_CONTROL, - OTG_DISABLE_POINT_CNTL, 2, - OTG_MASTER_EN, 1); - - REG_SEQ_SUBMIT(); - REG_SEQ_WAIT_DONE(); - - return true; -} - -/* disable_crtc */ -static bool optc32_disable_crtc(struct timing_generator *optc) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - /* disable otg request until end of the first line - * in the vertical blank region - */ - REG_UPDATE(OTG_CONTROL, - OTG_MASTER_EN, 0); - - REG_UPDATE(CONTROL, - VTG0_ENABLE, 0); - - /* CRTC disabled, so disable clock. */ - REG_WAIT(OTG_CLOCK_CONTROL, - OTG_BUSY, 0, - 1, 150000); - - return true; -} - -static void optc32_phantom_crtc_post_enable(struct timing_generator *optc) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - /* Disable immediately. */ - REG_UPDATE_2(OTG_CONTROL, OTG_DISABLE_POINT_CNTL, 0, OTG_MASTER_EN, 0); - - /* CRTC disabled, so disable clock. */ - REG_WAIT(OTG_CLOCK_CONTROL, OTG_BUSY, 0, 1, 100000); -} - -static void optc32_disable_phantom_otg(struct timing_generator *optc) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - REG_UPDATE(OTG_CONTROL, OTG_MASTER_EN, 0); -} - -void optc32_set_odm_bypass(struct timing_generator *optc, - const struct dc_crtc_timing *dc_crtc_timing) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - enum h_timing_div_mode h_div = H_TIMING_NO_DIV; - - REG_SET_5(OPTC_DATA_SOURCE_SELECT, 0, - OPTC_NUM_OF_INPUT_SEGMENT, 0, - OPTC_SEG0_SRC_SEL, optc->inst, - OPTC_SEG1_SRC_SEL, 0xf, - OPTC_SEG2_SRC_SEL, 0xf, - OPTC_SEG3_SRC_SEL, 0xf - ); - - h_div = optc1_is_two_pixels_per_containter(dc_crtc_timing); - REG_UPDATE(OTG_H_TIMING_CNTL, - OTG_H_TIMING_DIV_MODE, h_div); - - REG_SET(OPTC_MEMORY_CONFIG, 0, - OPTC_MEM_SEL, 0); - optc1->opp_count = 1; -} - -static void optc32_setup_manual_trigger(struct timing_generator *optc) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - struct dc *dc = optc->ctx->dc; - - if (dc->caps.dmub_caps.mclk_sw && !dc->debug.disable_fams) - dc_dmub_srv_set_drr_manual_trigger_cmd(dc, optc->inst); - else { - /* - * MIN_MASK_EN is gone and MASK is now always enabled. - * - * To get it to it work with manual trigger we need to make sure - * we program the correct bit. - */ - REG_UPDATE_4(OTG_V_TOTAL_CONTROL, - OTG_V_TOTAL_MIN_SEL, 1, - OTG_V_TOTAL_MAX_SEL, 1, - OTG_FORCE_LOCK_ON_EVENT, 0, - OTG_SET_V_TOTAL_MIN_MASK, (1 << 1)); /* TRIGA */ - - // Setup manual flow control for EOF via TRIG_A - optc->funcs->setup_manual_trigger(optc); - } -} - -static void optc32_set_drr( - struct timing_generator *optc, - const struct drr_params *params) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - if (params != NULL && - params->vertical_total_max > 0 && - params->vertical_total_min > 0) { - - if (params->vertical_total_mid != 0) { - - REG_SET(OTG_V_TOTAL_MID, 0, - OTG_V_TOTAL_MID, params->vertical_total_mid - 1); - - REG_UPDATE_2(OTG_V_TOTAL_CONTROL, - OTG_VTOTAL_MID_REPLACING_MAX_EN, 1, - OTG_VTOTAL_MID_FRAME_NUM, - (uint8_t)params->vertical_total_mid_frame_num); - - } - - optc->funcs->set_vtotal_min_max(optc, params->vertical_total_min - 1, params->vertical_total_max - 1); - } - - optc32_setup_manual_trigger(optc); -} - -static struct timing_generator_funcs dcn32_tg_funcs = { - .validate_timing = optc1_validate_timing, - .program_timing = optc1_program_timing, - .setup_vertical_interrupt0 = optc1_setup_vertical_interrupt0, - .setup_vertical_interrupt1 = optc1_setup_vertical_interrupt1, - .setup_vertical_interrupt2 = optc1_setup_vertical_interrupt2, - .program_global_sync = optc1_program_global_sync, - .enable_crtc = optc32_enable_crtc, - .disable_crtc = optc32_disable_crtc, - .phantom_crtc_post_enable = optc32_phantom_crtc_post_enable, - .disable_phantom_crtc = optc32_disable_phantom_otg, - /* used by enable_timing_synchronization. Not need for FPGA */ - .is_counter_moving = optc1_is_counter_moving, - .get_position = optc1_get_position, - .get_frame_count = optc1_get_vblank_counter, - .get_scanoutpos = optc1_get_crtc_scanoutpos, - .get_otg_active_size = optc1_get_otg_active_size, - .set_early_control = optc1_set_early_control, - /* used by enable_timing_synchronization. Not need for FPGA */ - .wait_for_state = optc1_wait_for_state, - .set_blank_color = optc3_program_blank_color, - .did_triggered_reset_occur = optc1_did_triggered_reset_occur, - .triplebuffer_lock = optc3_triplebuffer_lock, - .triplebuffer_unlock = optc2_triplebuffer_unlock, - .enable_reset_trigger = optc1_enable_reset_trigger, - .enable_crtc_reset = optc1_enable_crtc_reset, - .disable_reset_trigger = optc1_disable_reset_trigger, - .lock = optc3_lock, - .unlock = optc1_unlock, - .lock_doublebuffer_enable = optc3_lock_doublebuffer_enable, - .lock_doublebuffer_disable = optc3_lock_doublebuffer_disable, - .enable_optc_clock = optc1_enable_optc_clock, - .set_drr = optc32_set_drr, - .get_last_used_drr_vtotal = optc2_get_last_used_drr_vtotal, - .set_vtotal_min_max = optc3_set_vtotal_min_max, - .set_static_screen_control = optc1_set_static_screen_control, - .program_stereo = optc1_program_stereo, - .is_stereo_left_eye = optc1_is_stereo_left_eye, - .tg_init = optc3_tg_init, - .is_tg_enabled = optc1_is_tg_enabled, - .is_optc_underflow_occurred = optc1_is_optc_underflow_occurred, - .clear_optc_underflow = optc1_clear_optc_underflow, - .setup_global_swap_lock = NULL, - .get_crc = optc1_get_crc, - .configure_crc = optc1_configure_crc, - .set_dsc_config = optc3_set_dsc_config, - .get_dsc_status = optc2_get_dsc_status, - .set_dwb_source = NULL, - .set_odm_bypass = optc32_set_odm_bypass, - .set_odm_combine = optc32_set_odm_combine, - .get_odm_combine_segments = optc32_get_odm_combine_segments, - .set_h_timing_div_manual_mode = optc32_set_h_timing_div_manual_mode, - .get_optc_source = optc2_get_optc_source, - .set_out_mux = optc3_set_out_mux, - .set_drr_trigger_window = optc3_set_drr_trigger_window, - .set_vtotal_change_limit = optc3_set_vtotal_change_limit, - .set_gsl = optc2_set_gsl, - .set_gsl_source_select = optc2_set_gsl_source_select, - .set_vtg_params = optc1_set_vtg_params, - .program_manual_trigger = optc2_program_manual_trigger, - .setup_manual_trigger = optc2_setup_manual_trigger, - .get_hw_timing = optc1_get_hw_timing, -}; - -void dcn32_timing_generator_init(struct optc *optc1) -{ - optc1->base.funcs = &dcn32_tg_funcs; - - optc1->max_h_total = optc1->tg_mask->OTG_H_TOTAL + 1; - optc1->max_v_total = optc1->tg_mask->OTG_V_TOTAL + 1; - - optc1->min_h_blank = 32; - optc1->min_v_blank = 3; - optc1->min_v_blank_interlace = 5; - optc1->min_h_sync_width = 4; - optc1->min_v_sync_width = 1; -} - diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.h deleted file mode 100644 index 8ce3b178cab0..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.h +++ /dev/null @@ -1,187 +0,0 @@ -/* - * Copyright 2021 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DC_OPTC_DCN32_H__ -#define __DC_OPTC_DCN32_H__ - -#include "dcn10/dcn10_optc.h" - -#define OPTC_COMMON_MASK_SH_LIST_DCN3_2(mask_sh)\ - SF(OTG0_OTG_VSTARTUP_PARAM, VSTARTUP_START, mask_sh),\ - SF(OTG0_OTG_VUPDATE_PARAM, VUPDATE_OFFSET, mask_sh),\ - SF(OTG0_OTG_VUPDATE_PARAM, VUPDATE_WIDTH, mask_sh),\ - SF(OTG0_OTG_VREADY_PARAM, VREADY_OFFSET, mask_sh),\ - SF(OTG0_OTG_MASTER_UPDATE_LOCK, OTG_MASTER_UPDATE_LOCK, mask_sh),\ - SF(OTG0_OTG_MASTER_UPDATE_LOCK, UPDATE_LOCK_STATUS, mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL0, MASTER_UPDATE_LOCK_DB_START_X, mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL0, MASTER_UPDATE_LOCK_DB_END_X, mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL0, MASTER_UPDATE_LOCK_DB_EN, mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL1, MASTER_UPDATE_LOCK_DB_START_Y, mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL1, MASTER_UPDATE_LOCK_DB_END_Y, mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL2, OTG_MASTER_UPDATE_LOCK_SEL, mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL4, DIG_UPDATE_POSITION_X, mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL4, DIG_UPDATE_POSITION_Y, mask_sh),\ - SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_UPDATE_PENDING, mask_sh),\ - SF(OTG0_OTG_H_TOTAL, OTG_H_TOTAL, mask_sh),\ - SF(OTG0_OTG_H_BLANK_START_END, OTG_H_BLANK_START, mask_sh),\ - SF(OTG0_OTG_H_BLANK_START_END, OTG_H_BLANK_END, mask_sh),\ - SF(OTG0_OTG_H_SYNC_A, OTG_H_SYNC_A_START, mask_sh),\ - SF(OTG0_OTG_H_SYNC_A, OTG_H_SYNC_A_END, mask_sh),\ - SF(OTG0_OTG_H_SYNC_A_CNTL, OTG_H_SYNC_A_POL, mask_sh),\ - SF(OTG0_OTG_V_TOTAL, OTG_V_TOTAL, mask_sh),\ - SF(OTG0_OTG_V_BLANK_START_END, OTG_V_BLANK_START, mask_sh),\ - SF(OTG0_OTG_V_BLANK_START_END, OTG_V_BLANK_END, mask_sh),\ - SF(OTG0_OTG_V_SYNC_A, OTG_V_SYNC_A_START, mask_sh),\ - SF(OTG0_OTG_V_SYNC_A, OTG_V_SYNC_A_END, mask_sh),\ - SF(OTG0_OTG_V_SYNC_A_CNTL, OTG_V_SYNC_A_POL, mask_sh),\ - SF(OTG0_OTG_V_SYNC_A_CNTL, OTG_V_SYNC_MODE, mask_sh),\ - SF(OTG0_OTG_CONTROL, OTG_MASTER_EN, mask_sh),\ - SF(OTG0_OTG_CONTROL, OTG_START_POINT_CNTL, mask_sh),\ - SF(OTG0_OTG_CONTROL, OTG_DISABLE_POINT_CNTL, mask_sh),\ - SF(OTG0_OTG_CONTROL, OTG_FIELD_NUMBER_CNTL, mask_sh),\ - SF(OTG0_OTG_CONTROL, OTG_OUT_MUX, mask_sh),\ - SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_EN, mask_sh),\ - SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_SYNC_OUTPUT_LINE_NUM, mask_sh),\ - SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_SYNC_OUTPUT_POLARITY, mask_sh),\ - SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_EYE_FLAG_POLARITY, mask_sh),\ - SF(OTG0_OTG_STEREO_CONTROL, OTG_DISABLE_STEREOSYNC_OUTPUT_FOR_DP, mask_sh),\ - SF(OTG0_OTG_STEREO_STATUS, OTG_STEREO_CURRENT_EYE, mask_sh),\ - SF(OTG0_OTG_3D_STRUCTURE_CONTROL, OTG_3D_STRUCTURE_EN, mask_sh),\ - SF(OTG0_OTG_3D_STRUCTURE_CONTROL, OTG_3D_STRUCTURE_V_UPDATE_MODE, mask_sh),\ - SF(OTG0_OTG_3D_STRUCTURE_CONTROL, OTG_3D_STRUCTURE_STEREO_SEL_OVR, mask_sh),\ - SF(OTG0_OTG_V_TOTAL_MAX, OTG_V_TOTAL_MAX, mask_sh),\ - SF(OTG0_OTG_V_TOTAL_MIN, OTG_V_TOTAL_MIN, mask_sh),\ - SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_V_TOTAL_MIN_SEL, mask_sh),\ - SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_V_TOTAL_MAX_SEL, mask_sh),\ - SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_FORCE_LOCK_ON_EVENT, mask_sh),\ - SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_SET_V_TOTAL_MIN_MASK, mask_sh),\ - SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_VTOTAL_MID_REPLACING_MIN_EN, mask_sh),\ - SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_VTOTAL_MID_REPLACING_MAX_EN, mask_sh),\ - SF(OTG0_OTG_FORCE_COUNT_NOW_CNTL, OTG_FORCE_COUNT_NOW_CLEAR, mask_sh),\ - SF(OTG0_OTG_FORCE_COUNT_NOW_CNTL, OTG_FORCE_COUNT_NOW_MODE, mask_sh),\ - SF(OTG0_OTG_FORCE_COUNT_NOW_CNTL, OTG_FORCE_COUNT_NOW_OCCURRED, mask_sh),\ - SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_SOURCE_SELECT, mask_sh),\ - SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_SOURCE_PIPE_SELECT, mask_sh),\ - SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_RISING_EDGE_DETECT_CNTL, mask_sh),\ - SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_FALLING_EDGE_DETECT_CNTL, mask_sh),\ - SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_POLARITY_SELECT, mask_sh),\ - SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_FREQUENCY_SELECT, mask_sh),\ - SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_DELAY, mask_sh),\ - SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_CLEAR, mask_sh),\ - SF(OTG0_OTG_STATIC_SCREEN_CONTROL, OTG_STATIC_SCREEN_EVENT_MASK, mask_sh),\ - SF(OTG0_OTG_STATIC_SCREEN_CONTROL, OTG_STATIC_SCREEN_FRAME_COUNT, mask_sh),\ - SF(OTG0_OTG_STATUS_FRAME_COUNT, OTG_FRAME_COUNT, mask_sh),\ - SF(OTG0_OTG_STATUS, OTG_V_BLANK, mask_sh),\ - SF(OTG0_OTG_STATUS, OTG_V_ACTIVE_DISP, mask_sh),\ - SF(OTG0_OTG_STATUS_POSITION, OTG_HORZ_COUNT, mask_sh),\ - SF(OTG0_OTG_STATUS_POSITION, OTG_VERT_COUNT, mask_sh),\ - SF(OTG0_OTG_NOM_VERT_POSITION, OTG_VERT_COUNT_NOM, mask_sh),\ - SF(OTG0_OTG_M_CONST_DTO0, OTG_M_CONST_DTO_PHASE, mask_sh),\ - SF(OTG0_OTG_M_CONST_DTO1, OTG_M_CONST_DTO_MODULO, mask_sh),\ - SF(OTG0_OTG_CLOCK_CONTROL, OTG_BUSY, mask_sh),\ - SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_EN, mask_sh),\ - SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_ON, mask_sh),\ - SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_GATE_DIS, mask_sh),\ - SF(OTG0_OTG_VERTICAL_INTERRUPT0_CONTROL, OTG_VERTICAL_INTERRUPT0_INT_ENABLE, mask_sh),\ - SF(OTG0_OTG_VERTICAL_INTERRUPT0_POSITION, OTG_VERTICAL_INTERRUPT0_LINE_START, mask_sh),\ - SF(OTG0_OTG_VERTICAL_INTERRUPT0_POSITION, OTG_VERTICAL_INTERRUPT0_LINE_END, mask_sh),\ - SF(OTG0_OTG_VERTICAL_INTERRUPT1_CONTROL, OTG_VERTICAL_INTERRUPT1_INT_ENABLE, mask_sh),\ - SF(OTG0_OTG_VERTICAL_INTERRUPT1_POSITION, OTG_VERTICAL_INTERRUPT1_LINE_START, mask_sh),\ - SF(OTG0_OTG_VERTICAL_INTERRUPT2_CONTROL, OTG_VERTICAL_INTERRUPT2_INT_ENABLE, mask_sh),\ - SF(OTG0_OTG_VERTICAL_INTERRUPT2_POSITION, OTG_VERTICAL_INTERRUPT2_LINE_START, mask_sh),\ - SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_EN, mask_sh),\ - SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_ON, mask_sh),\ - SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_GATE_DIS, mask_sh),\ - SF(ODM0_OPTC_INPUT_GLOBAL_CONTROL, OPTC_UNDERFLOW_OCCURRED_STATUS, mask_sh),\ - SF(ODM0_OPTC_INPUT_GLOBAL_CONTROL, OPTC_UNDERFLOW_CLEAR, mask_sh),\ - SF(VTG0_CONTROL, VTG0_ENABLE, mask_sh),\ - SF(VTG0_CONTROL, VTG0_FP2, mask_sh),\ - SF(VTG0_CONTROL, VTG0_VCOUNT_INIT, mask_sh),\ - SF(OTG0_OTG_VERT_SYNC_CONTROL, OTG_FORCE_VSYNC_NEXT_LINE_OCCURRED, mask_sh),\ - SF(OTG0_OTG_VERT_SYNC_CONTROL, OTG_FORCE_VSYNC_NEXT_LINE_CLEAR, mask_sh),\ - SF(OTG0_OTG_VERT_SYNC_CONTROL, OTG_AUTO_FORCE_VSYNC_MODE, mask_sh),\ - SF(OTG0_OTG_GSL_CONTROL, OTG_GSL0_EN, mask_sh),\ - SF(OTG0_OTG_GSL_CONTROL, OTG_GSL1_EN, mask_sh),\ - SF(OTG0_OTG_GSL_CONTROL, OTG_GSL2_EN, mask_sh),\ - SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_MASTER_EN, mask_sh),\ - SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_FORCE_DELAY, mask_sh),\ - SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_CHECK_ALL_FIELDS, mask_sh),\ - SF(OTG0_OTG_CRC_CNTL, OTG_CRC_CONT_EN, mask_sh),\ - SF(OTG0_OTG_CRC_CNTL, OTG_CRC0_SELECT, mask_sh),\ - SF(OTG0_OTG_CRC_CNTL, OTG_CRC_EN, mask_sh),\ - SF(OTG0_OTG_CRC0_DATA_RG, CRC0_R_CR, mask_sh),\ - SF(OTG0_OTG_CRC0_DATA_RG, CRC0_G_Y, mask_sh),\ - SF(OTG0_OTG_CRC0_DATA_B, CRC0_B_CB, mask_sh),\ - SF(OTG0_OTG_CRC0_WINDOWA_X_CONTROL, OTG_CRC0_WINDOWA_X_START, mask_sh),\ - SF(OTG0_OTG_CRC0_WINDOWA_X_CONTROL, OTG_CRC0_WINDOWA_X_END, mask_sh),\ - SF(OTG0_OTG_CRC0_WINDOWA_Y_CONTROL, OTG_CRC0_WINDOWA_Y_START, mask_sh),\ - SF(OTG0_OTG_CRC0_WINDOWA_Y_CONTROL, OTG_CRC0_WINDOWA_Y_END, mask_sh),\ - SF(OTG0_OTG_CRC0_WINDOWB_X_CONTROL, OTG_CRC0_WINDOWB_X_START, mask_sh),\ - SF(OTG0_OTG_CRC0_WINDOWB_X_CONTROL, OTG_CRC0_WINDOWB_X_END, mask_sh),\ - SF(OTG0_OTG_CRC0_WINDOWB_Y_CONTROL, OTG_CRC0_WINDOWB_Y_START, mask_sh),\ - SF(OTG0_OTG_CRC0_WINDOWB_Y_CONTROL, OTG_CRC0_WINDOWB_Y_END, mask_sh),\ - SF(OTG0_OTG_TRIGA_MANUAL_TRIG, OTG_TRIGA_MANUAL_TRIG, mask_sh),\ - SF(GSL_SOURCE_SELECT, GSL0_READY_SOURCE_SEL, mask_sh),\ - SF(GSL_SOURCE_SELECT, GSL1_READY_SOURCE_SEL, mask_sh),\ - SF(GSL_SOURCE_SELECT, GSL2_READY_SOURCE_SEL, mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL2, MANUAL_FLOW_CONTROL_SEL, mask_sh),\ - SF(OTG0_OTG_GLOBAL_CONTROL2, GLOBAL_UPDATE_LOCK_EN, mask_sh),\ - SF(OTG0_OTG_GSL_WINDOW_X, OTG_GSL_WINDOW_START_X, mask_sh),\ - SF(OTG0_OTG_GSL_WINDOW_X, OTG_GSL_WINDOW_END_X, mask_sh), \ - SF(OTG0_OTG_GSL_WINDOW_Y, OTG_GSL_WINDOW_START_Y, mask_sh),\ - SF(OTG0_OTG_GSL_WINDOW_Y, OTG_GSL_WINDOW_END_Y, mask_sh),\ - SF(OTG0_OTG_VUPDATE_KEEPOUT, OTG_MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_EN, mask_sh), \ - SF(OTG0_OTG_VUPDATE_KEEPOUT, MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_START_OFFSET, mask_sh), \ - SF(OTG0_OTG_VUPDATE_KEEPOUT, MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_END_OFFSET, mask_sh), \ - SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_MASTER_MODE, mask_sh), \ - SF(OTG0_OTG_GSL_CONTROL, OTG_MASTER_UPDATE_LOCK_GSL_EN, mask_sh), \ - SF(OTG0_OTG_DSC_START_POSITION, OTG_DSC_START_POSITION_X, mask_sh), \ - SF(OTG0_OTG_DSC_START_POSITION, OTG_DSC_START_POSITION_LINE_NUM, mask_sh),\ - SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG0_SRC_SEL, mask_sh),\ - SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG1_SRC_SEL, mask_sh),\ - SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG2_SRC_SEL, mask_sh),\ - SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG3_SRC_SEL, mask_sh),\ - SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_NUM_OF_INPUT_SEGMENT, mask_sh),\ - SF(ODM0_OPTC_MEMORY_CONFIG, OPTC_MEM_SEL, mask_sh),\ - SF(ODM0_OPTC_DATA_FORMAT_CONTROL, OPTC_DATA_FORMAT, mask_sh),\ - SF(ODM0_OPTC_DATA_FORMAT_CONTROL, OPTC_DSC_MODE, mask_sh),\ - SF(ODM0_OPTC_BYTES_PER_PIXEL, OPTC_DSC_BYTES_PER_PIXEL, mask_sh),\ - SF(ODM0_OPTC_WIDTH_CONTROL, OPTC_DSC_SLICE_WIDTH, mask_sh),\ - SF(ODM0_OPTC_WIDTH_CONTROL, OPTC_SEGMENT_WIDTH, mask_sh),\ - SF(OTG0_OTG_DRR_TRIGGER_WINDOW, OTG_DRR_TRIGGER_WINDOW_START_X, mask_sh),\ - SF(OTG0_OTG_DRR_TRIGGER_WINDOW, OTG_DRR_TRIGGER_WINDOW_END_X, mask_sh),\ - SF(OTG0_OTG_DRR_V_TOTAL_CHANGE, OTG_DRR_V_TOTAL_CHANGE_LIMIT, mask_sh),\ - SF(OTG0_OTG_H_TIMING_CNTL, OTG_H_TIMING_DIV_MODE, mask_sh),\ - SF(OTG0_OTG_H_TIMING_CNTL, OTG_H_TIMING_DIV_MODE_MANUAL, mask_sh),\ - SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_DRR_TIMING_DBUF_UPDATE_MODE, mask_sh),\ - SF(OTG0_OTG_DRR_CONTROL, OTG_V_TOTAL_LAST_USED_BY_DRR, mask_sh) - -void dcn32_timing_generator_init(struct optc *optc1); -void optc32_set_h_timing_div_manual_mode(struct timing_generator *optc, bool manual_mode); -void optc32_get_odm_combine_segments(struct timing_generator *tg, int *odm_combine_segments); -void optc32_set_odm_bypass(struct timing_generator *optc, - const struct dc_crtc_timing *dc_crtc_timing); - -#endif /* __DC_OPTC_DCN32_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/Makefile b/drivers/gpu/drm/amd/display/dc/dcn35/Makefile index 85a307babab9..fa7ec82ae5f5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn35/Makefile @@ -11,7 +11,7 @@ # Makefile for DCN35. DCN35 = dcn35_init.o dcn35_dio_stream_encoder.o \ - dcn35_dio_link_encoder.o dcn35_dccg.o dcn35_optc.o \ + dcn35_dio_link_encoder.o dcn35_dccg.o \ dcn35_hubp.o dcn35_hubbub.o \ dcn35_mmhubbub.o dcn35_opp.o dcn35_dpp.o dcn35_pg_cntl.o dcn35_dwb.o diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.c deleted file mode 100644 index a4a39f1638cf..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.c +++ /dev/null @@ -1,290 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright 2023 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dcn35_optc.h" - -#include "dcn30/dcn30_optc.h" -#include "dcn31/dcn31_optc.h" -#include "dcn32/dcn32_optc.h" -#include "reg_helper.h" -#include "dc.h" -#include "dcn_calc_math.h" - -#define REG(reg)\ - optc1->tg_regs->reg - -#define CTX \ - optc1->base.ctx - -#undef FN -#define FN(reg_name, field_name) \ - optc1->tg_shift->field_name, optc1->tg_mask->field_name - -/** - * optc35_set_odm_combine() - Enable CRTC - call ASIC Control Object to enable Timing generator. - * - * @optc: Output Pipe Timing Combine instance reference. - * @opp_id: Output Plane Processor instance ID. - * @opp_cnt: Output Plane Processor count. - * @timing: Timing parameters used to configure DCN blocks. - * - * Return: void. - */ -static void optc35_set_odm_combine(struct timing_generator *optc, int *opp_id, int opp_cnt, - struct dc_crtc_timing *timing) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - uint32_t memory_mask = 0; - int h_active = timing->h_addressable + timing->h_border_left + timing->h_border_right; - int mpcc_hactive = h_active / opp_cnt; - /* Each memory instance is 2048x(314x2) bits to support half line of 4096 */ - int odm_mem_count = (h_active + 2047) / 2048; - - /* - * display <= 4k : 2 memories + 2 pipes - * 4k < display <= 8k : 4 memories + 2 pipes - * 8k < display <= 12k : 6 memories + 4 pipes - */ - if (opp_cnt == 4) { - if (odm_mem_count <= 2) - memory_mask = 0x3; - else if (odm_mem_count <= 4) - memory_mask = 0xf; - else - memory_mask = 0x3f; - } else { - if (odm_mem_count <= 2) - memory_mask = 0x1 << (opp_id[0] * 2) | 0x1 << (opp_id[1] * 2); - else if (odm_mem_count <= 4) - memory_mask = 0x3 << (opp_id[0] * 2) | 0x3 << (opp_id[1] * 2); - else - memory_mask = 0x77; - } - - REG_SET(OPTC_MEMORY_CONFIG, 0, - OPTC_MEM_SEL, memory_mask); - - if (opp_cnt == 2) { - REG_SET_3(OPTC_DATA_SOURCE_SELECT, 0, - OPTC_NUM_OF_INPUT_SEGMENT, 1, - OPTC_SEG0_SRC_SEL, opp_id[0], - OPTC_SEG1_SRC_SEL, opp_id[1]); - } else if (opp_cnt == 4) { - REG_SET_5(OPTC_DATA_SOURCE_SELECT, 0, - OPTC_NUM_OF_INPUT_SEGMENT, 3, - OPTC_SEG0_SRC_SEL, opp_id[0], - OPTC_SEG1_SRC_SEL, opp_id[1], - OPTC_SEG2_SRC_SEL, opp_id[2], - OPTC_SEG3_SRC_SEL, opp_id[3]); - } - - REG_UPDATE(OPTC_WIDTH_CONTROL, - OPTC_SEGMENT_WIDTH, mpcc_hactive); - - REG_UPDATE(OTG_H_TIMING_CNTL, OTG_H_TIMING_DIV_MODE, opp_cnt - 1); - optc1->opp_count = opp_cnt; -} - -static bool optc35_enable_crtc(struct timing_generator *optc) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - /* opp instance for OTG, 1 to 1 mapping and odm will adjust */ - REG_UPDATE(OPTC_DATA_SOURCE_SELECT, - OPTC_SEG0_SRC_SEL, optc->inst); - - /* VTG enable first is for HW workaround */ - REG_UPDATE(CONTROL, - VTG0_ENABLE, 1); - - REG_SEQ_START(); - - /* Enable CRTC */ - REG_UPDATE_2(OTG_CONTROL, - OTG_DISABLE_POINT_CNTL, 2, - OTG_MASTER_EN, 1); - - REG_SEQ_SUBMIT(); - REG_SEQ_WAIT_DONE(); - - return true; -} - -/* disable_crtc */ -static bool optc35_disable_crtc(struct timing_generator *optc) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - /* disable otg request until end of the first line - * in the vertical blank region - */ - REG_UPDATE(OTG_CONTROL, - OTG_MASTER_EN, 0); - - REG_UPDATE(CONTROL, - VTG0_ENABLE, 0); - - /* CRTC disabled, so disable clock. */ - REG_WAIT(OTG_CLOCK_CONTROL, - OTG_BUSY, 0, - 1, 100000); - optc1_clear_optc_underflow(optc); - - return true; -} - -static void optc35_phantom_crtc_post_enable(struct timing_generator *optc) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - /* Disable immediately. */ - REG_UPDATE_2(OTG_CONTROL, OTG_DISABLE_POINT_CNTL, 0, OTG_MASTER_EN, 0); - - /* CRTC disabled, so disable clock. */ - REG_WAIT(OTG_CLOCK_CONTROL, OTG_BUSY, 0, 1, 100000); -} - -static bool optc35_configure_crc(struct timing_generator *optc, - const struct crc_params *params) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - - if (!optc1_is_tg_enabled(optc)) - return false; - REG_WRITE(OTG_CRC_CNTL, 0); - if (!params->enable) - return true; - REG_UPDATE_2(OTG_CRC0_WINDOWA_X_CONTROL, - OTG_CRC0_WINDOWA_X_START, params->windowa_x_start, - OTG_CRC0_WINDOWA_X_END, params->windowa_x_end); - REG_UPDATE_2(OTG_CRC0_WINDOWA_Y_CONTROL, - OTG_CRC0_WINDOWA_Y_START, params->windowa_y_start, - OTG_CRC0_WINDOWA_Y_END, params->windowa_y_end); - REG_UPDATE_2(OTG_CRC0_WINDOWB_X_CONTROL, - OTG_CRC0_WINDOWB_X_START, params->windowb_x_start, - OTG_CRC0_WINDOWB_X_END, params->windowb_x_end); - REG_UPDATE_2(OTG_CRC0_WINDOWB_Y_CONTROL, - OTG_CRC0_WINDOWB_Y_START, params->windowb_y_start, - OTG_CRC0_WINDOWB_Y_END, params->windowb_y_end); - if (optc1->base.ctx->dc->debug.otg_crc_db && optc1->tg_mask->OTG_CRC_WINDOW_DB_EN != 0) { - REG_UPDATE_4(OTG_CRC_CNTL, - OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0, - OTG_CRC0_SELECT, params->selection, - OTG_CRC_EN, 1, - OTG_CRC_WINDOW_DB_EN, 1); - } else - REG_UPDATE_3(OTG_CRC_CNTL, - OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0, - OTG_CRC0_SELECT, params->selection, - OTG_CRC_EN, 1); - return true; -} - -static struct timing_generator_funcs dcn35_tg_funcs = { - .validate_timing = optc1_validate_timing, - .program_timing = optc1_program_timing, - .setup_vertical_interrupt0 = optc1_setup_vertical_interrupt0, - .setup_vertical_interrupt1 = optc1_setup_vertical_interrupt1, - .setup_vertical_interrupt2 = optc1_setup_vertical_interrupt2, - .program_global_sync = optc1_program_global_sync, - .enable_crtc = optc35_enable_crtc, - .disable_crtc = optc35_disable_crtc, - .immediate_disable_crtc = optc31_immediate_disable_crtc, - .phantom_crtc_post_enable = optc35_phantom_crtc_post_enable, - /* used by enable_timing_synchronization. Not need for FPGA */ - .is_counter_moving = optc1_is_counter_moving, - .get_position = optc1_get_position, - .get_frame_count = optc1_get_vblank_counter, - .get_scanoutpos = optc1_get_crtc_scanoutpos, - .get_otg_active_size = optc1_get_otg_active_size, - .set_early_control = optc1_set_early_control, - /* used by enable_timing_synchronization. Not need for FPGA */ - .wait_for_state = optc1_wait_for_state, - .set_blank_color = optc3_program_blank_color, - .did_triggered_reset_occur = optc1_did_triggered_reset_occur, - .triplebuffer_lock = optc3_triplebuffer_lock, - .triplebuffer_unlock = optc2_triplebuffer_unlock, - .enable_reset_trigger = optc1_enable_reset_trigger, - .enable_crtc_reset = optc1_enable_crtc_reset, - .disable_reset_trigger = optc1_disable_reset_trigger, - .lock = optc3_lock, - .unlock = optc1_unlock, - .lock_doublebuffer_enable = optc3_lock_doublebuffer_enable, - .lock_doublebuffer_disable = optc3_lock_doublebuffer_disable, - .enable_optc_clock = optc1_enable_optc_clock, - .set_drr = optc31_set_drr, - .get_last_used_drr_vtotal = optc2_get_last_used_drr_vtotal, - .set_vtotal_min_max = optc1_set_vtotal_min_max, - .set_static_screen_control = optc1_set_static_screen_control, - .program_stereo = optc1_program_stereo, - .is_stereo_left_eye = optc1_is_stereo_left_eye, - .tg_init = optc3_tg_init, - .is_tg_enabled = optc1_is_tg_enabled, - .is_optc_underflow_occurred = optc1_is_optc_underflow_occurred, - .clear_optc_underflow = optc1_clear_optc_underflow, - .setup_global_swap_lock = NULL, - .get_crc = optc1_get_crc, - .configure_crc = optc35_configure_crc, - .set_dsc_config = optc3_set_dsc_config, - .get_dsc_status = optc2_get_dsc_status, - .set_dwb_source = NULL, - .set_odm_bypass = optc32_set_odm_bypass, - .set_odm_combine = optc35_set_odm_combine, - .get_optc_source = optc2_get_optc_source, - .set_h_timing_div_manual_mode = optc32_set_h_timing_div_manual_mode, - .set_out_mux = optc3_set_out_mux, - .set_drr_trigger_window = optc3_set_drr_trigger_window, - .set_vtotal_change_limit = optc3_set_vtotal_change_limit, - .set_gsl = optc2_set_gsl, - .set_gsl_source_select = optc2_set_gsl_source_select, - .set_vtg_params = optc1_set_vtg_params, - .program_manual_trigger = optc2_program_manual_trigger, - .setup_manual_trigger = optc2_setup_manual_trigger, - .get_hw_timing = optc1_get_hw_timing, - .init_odm = optc3_init_odm, -}; - -void dcn35_timing_generator_init(struct optc *optc1) -{ - optc1->base.funcs = &dcn35_tg_funcs; - - optc1->max_h_total = optc1->tg_mask->OTG_H_TOTAL + 1; - optc1->max_v_total = optc1->tg_mask->OTG_V_TOTAL + 1; - - optc1->min_h_blank = 32; - optc1->min_v_blank = 3; - optc1->min_v_blank_interlace = 5; - optc1->min_h_sync_width = 4; - optc1->min_v_sync_width = 1; - - dcn35_timing_generator_set_fgcg( - optc1, CTX->dc->debug.enable_fine_grain_clock_gating.bits.optc); -} - -void dcn35_timing_generator_set_fgcg(struct optc *optc1, bool enable) -{ - REG_UPDATE(OPTC_CLOCK_CONTROL, OPTC_FGCG_REP_DIS, !enable); -} diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.h b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.h deleted file mode 100644 index 1f422e4c468f..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_optc.h +++ /dev/null @@ -1,74 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright 2023 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DC_OPTC_DCN35_H__ -#define __DC_OPTC_DCN35_H__ - -#include "dcn10/dcn10_optc.h" -#include "dcn32/dcn32_optc.h" -#define OPTC_COMMON_MASK_SH_LIST_DCN3_5(mask_sh)\ - OPTC_COMMON_MASK_SH_LIST_DCN3_2(mask_sh),\ - SF(OTG0_OTG_CRC_CNTL, OTG_CRC_WINDOW_DB_EN, mask_sh),\ - SF(OTG0_OTG_CRC1_DATA_RG, CRC1_R_CR, mask_sh),\ - SF(OTG0_OTG_CRC1_DATA_RG, CRC1_G_Y, mask_sh),\ - SF(OTG0_OTG_CRC1_DATA_B, CRC1_B_CB, mask_sh),\ - SF(OTG0_OTG_CRC2_DATA_RG, CRC2_R_CR, mask_sh),\ - SF(OTG0_OTG_CRC2_DATA_RG, CRC2_G_Y, mask_sh),\ - SF(OTG0_OTG_CRC2_DATA_B, CRC2_B_CB, mask_sh),\ - SF(OTG0_OTG_CRC3_DATA_RG, CRC3_R_CR, mask_sh),\ - SF(OTG0_OTG_CRC3_DATA_RG, CRC3_G_Y, mask_sh),\ - SF(OTG0_OTG_CRC3_DATA_B, CRC3_B_CB, mask_sh),\ - SF(OTG0_OTG_CRC1_WINDOWA_X_CONTROL, OTG_CRC1_WINDOWA_X_START, mask_sh),\ - SF(OTG0_OTG_CRC1_WINDOWA_X_CONTROL, OTG_CRC1_WINDOWA_X_END, mask_sh),\ - SF(OTG0_OTG_CRC1_WINDOWA_Y_CONTROL, OTG_CRC1_WINDOWA_Y_START, mask_sh),\ - SF(OTG0_OTG_CRC1_WINDOWA_Y_CONTROL, OTG_CRC1_WINDOWA_Y_END, mask_sh),\ - SF(OTG0_OTG_CRC1_WINDOWB_X_CONTROL, OTG_CRC1_WINDOWB_X_START, mask_sh),\ - SF(OTG0_OTG_CRC1_WINDOWB_X_CONTROL, OTG_CRC1_WINDOWB_X_END, mask_sh),\ - SF(OTG0_OTG_CRC1_WINDOWB_Y_CONTROL, OTG_CRC1_WINDOWB_Y_START, mask_sh),\ - SF(OTG0_OTG_CRC1_WINDOWB_Y_CONTROL, OTG_CRC1_WINDOWB_Y_END, mask_sh),\ - SF(OTG0_OTG_CRC0_WINDOWA_X_CONTROL_READBACK, OTG_CRC0_WINDOWA_X_START_READBACK, mask_sh),\ - SF(OTG0_OTG_CRC0_WINDOWA_X_CONTROL_READBACK, OTG_CRC0_WINDOWA_X_END_READBACK, mask_sh),\ - SF(OTG0_OTG_CRC0_WINDOWA_Y_CONTROL_READBACK, OTG_CRC0_WINDOWA_Y_START_READBACK, mask_sh),\ - SF(OTG0_OTG_CRC0_WINDOWA_Y_CONTROL_READBACK, OTG_CRC0_WINDOWA_Y_END_READBACK, mask_sh),\ - SF(OTG0_OTG_CRC0_WINDOWB_X_CONTROL_READBACK, OTG_CRC0_WINDOWB_X_START_READBACK, mask_sh),\ - SF(OTG0_OTG_CRC0_WINDOWB_X_CONTROL_READBACK, OTG_CRC0_WINDOWB_X_END_READBACK, mask_sh),\ - SF(OTG0_OTG_CRC0_WINDOWB_Y_CONTROL_READBACK, OTG_CRC0_WINDOWB_Y_START_READBACK, mask_sh),\ - SF(OTG0_OTG_CRC0_WINDOWB_Y_CONTROL_READBACK, OTG_CRC0_WINDOWB_Y_END_READBACK, mask_sh),\ - SF(OTG0_OTG_CRC1_WINDOWA_X_CONTROL_READBACK, OTG_CRC1_WINDOWA_X_START_READBACK, mask_sh),\ - SF(OTG0_OTG_CRC1_WINDOWA_X_CONTROL_READBACK, OTG_CRC1_WINDOWA_X_END_READBACK, mask_sh),\ - SF(OTG0_OTG_CRC1_WINDOWA_Y_CONTROL_READBACK, OTG_CRC1_WINDOWA_Y_START_READBACK, mask_sh),\ - SF(OTG0_OTG_CRC1_WINDOWA_Y_CONTROL_READBACK, OTG_CRC1_WINDOWA_Y_END_READBACK, mask_sh),\ - SF(OTG0_OTG_CRC1_WINDOWB_X_CONTROL_READBACK, OTG_CRC1_WINDOWB_X_START_READBACK, mask_sh),\ - SF(OTG0_OTG_CRC1_WINDOWB_X_CONTROL_READBACK, OTG_CRC1_WINDOWB_X_END_READBACK, mask_sh),\ - SF(OTG0_OTG_CRC1_WINDOWB_Y_CONTROL_READBACK, OTG_CRC1_WINDOWB_Y_START_READBACK, mask_sh),\ - SF(OTG0_OTG_CRC1_WINDOWB_Y_CONTROL_READBACK, OTG_CRC1_WINDOWB_Y_END_READBACK, mask_sh),\ - SF(OPTC_CLOCK_CONTROL, OPTC_FGCG_REP_DIS, mask_sh) - -void dcn35_timing_generator_init(struct optc *optc1); - -void dcn35_timing_generator_set_fgcg(struct optc *optc1, bool enable); - -#endif /* __DC_OPTC_DCN35_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/optc/Makefile b/drivers/gpu/drm/amd/display/dc/optc/Makefile new file mode 100644 index 000000000000..bb213335fb9f --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/optc/Makefile @@ -0,0 +1,108 @@ + +# Copyright 2022 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# Makefile for the 'optc' sub-component of DAL. +# + + +ifdef CONFIG_DRM_AMD_DC_FP +############################################################################### +# DCN +############################################################################### + +OPTC_DCN10 = dcn10_optc.o + +AMD_DAL_OPTC_DCN10 = $(addprefix $(AMDDALPATH)/dc/optc/dcn10/,$(OPTC_DCN10)) + +AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN10) + +############################################################################### + +OPTC_DCN20 = dcn20_optc.o + +AMD_DAL_OPTC_DCN20 = $(addprefix $(AMDDALPATH)/dc/optc/dcn20/,$(OPTC_DCN20)) + +AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN20) + +############################################################################### + +OPTC_DCN201 = dcn201_optc.o + +AMD_DAL_OPTC_DCN201 = $(addprefix $(AMDDALPATH)/dc/optc/dcn201/,$(OPTC_DCN201)) + +AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN201) + +############################################################################### + +############################################################################### + +############################################################################### + +OPTC_DCN30 = dcn30_optc.o + +AMD_DAL_OPTC_DCN30 = $(addprefix $(AMDDALPATH)/dc/optc/dcn30/,$(OPTC_DCN30)) + +AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN30) + +############################################################################### + +OPTC_DCN301 = dcn301_optc.o + +AMD_DAL_OPTC_DCN301 = $(addprefix $(AMDDALPATH)/dc/optc/dcn301/,$(OPTC_DCN301)) + +AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN301) + +############################################################################### + +OPTC_DCN31 = dcn31_optc.o + +AMD_DAL_OPTC_DCN31 = $(addprefix $(AMDDALPATH)/dc/optc/dcn31/,$(OPTC_DCN31)) + +AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN31) + +############################################################################### + +OPTC_DCN314 = dcn314_optc.o + +AMD_DAL_OPTC_DCN314 = $(addprefix $(AMDDALPATH)/dc/optc/dcn314/,$(OPTC_DCN314)) + +AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN314) + +############################################################################### + +OPTC_DCN32 = dcn32_optc.o + +AMD_DAL_OPTC_DCN32 = $(addprefix $(AMDDALPATH)/dc/optc/dcn32/,$(OPTC_DCN32)) + +AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN32) + +############################################################################### + +OPTC_DCN35 = dcn35_optc.o + +AMD_DAL_OPTC_DCN35 = $(addprefix $(AMDDALPATH)/dc/optc/dcn35/,$(OPTC_DCN35)) + +AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN35) + +############################################################################### + +############################################################################### +endif diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c new file mode 100644 index 000000000000..0e8f4f36c87c --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c @@ -0,0 +1,1621 @@ +/* + * Copyright 2012-15 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + + +#include "reg_helper.h" +#include "dcn10_optc.h" +#include "dc.h" +#include "dc_trace.h" + +#define REG(reg)\ + optc1->tg_regs->reg + +#define CTX \ + optc1->base.ctx + +#undef FN +#define FN(reg_name, field_name) \ + optc1->tg_shift->field_name, optc1->tg_mask->field_name + +#define STATIC_SCREEN_EVENT_MASK_RANGETIMING_DOUBLE_BUFFER_UPDATE_EN 0x100 + +/** + * apply_front_porch_workaround() - This is a workaround for a bug that has + * existed since R5xx and has not been fixed + * keep Front porch at minimum 2 for Interlaced + * mode or 1 for progressive. + * + * @timing: Timing parameters used to configure DCN blocks. + */ +static void apply_front_porch_workaround(struct dc_crtc_timing *timing) +{ + if (timing->flags.INTERLACE == 1) { + if (timing->v_front_porch < 2) + timing->v_front_porch = 2; + } else { + if (timing->v_front_porch < 1) + timing->v_front_porch = 1; + } +} + +void optc1_program_global_sync( + struct timing_generator *optc, + int vready_offset, + int vstartup_start, + int vupdate_offset, + int vupdate_width) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + optc1->vready_offset = vready_offset; + optc1->vstartup_start = vstartup_start; + optc1->vupdate_offset = vupdate_offset; + optc1->vupdate_width = vupdate_width; + + if (optc1->vstartup_start == 0) { + BREAK_TO_DEBUGGER(); + return; + } + + REG_SET(OTG_VSTARTUP_PARAM, 0, + VSTARTUP_START, optc1->vstartup_start); + + REG_SET_2(OTG_VUPDATE_PARAM, 0, + VUPDATE_OFFSET, optc1->vupdate_offset, + VUPDATE_WIDTH, optc1->vupdate_width); + + REG_SET(OTG_VREADY_PARAM, 0, + VREADY_OFFSET, optc1->vready_offset); +} + +static void optc1_disable_stereo(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_SET(OTG_STEREO_CONTROL, 0, + OTG_STEREO_EN, 0); + + REG_SET_2(OTG_3D_STRUCTURE_CONTROL, 0, + OTG_3D_STRUCTURE_EN, 0, + OTG_3D_STRUCTURE_STEREO_SEL_OVR, 0); +} + +void optc1_setup_vertical_interrupt0( + struct timing_generator *optc, + uint32_t start_line, + uint32_t end_line) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_SET_2(OTG_VERTICAL_INTERRUPT0_POSITION, 0, + OTG_VERTICAL_INTERRUPT0_LINE_START, start_line, + OTG_VERTICAL_INTERRUPT0_LINE_END, end_line); +} + +void optc1_setup_vertical_interrupt1( + struct timing_generator *optc, + uint32_t start_line) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_SET(OTG_VERTICAL_INTERRUPT1_POSITION, 0, + OTG_VERTICAL_INTERRUPT1_LINE_START, start_line); +} + +void optc1_setup_vertical_interrupt2( + struct timing_generator *optc, + uint32_t start_line) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_SET(OTG_VERTICAL_INTERRUPT2_POSITION, 0, + OTG_VERTICAL_INTERRUPT2_LINE_START, start_line); +} + +/** + * optc1_program_timing() - used by mode timing set Program + * CRTC Timing Registers - OTG_H_*, + * OTG_V_*, Pixel repetition. + * Including SYNC. Call BIOS command table to program Timings. + * + * @optc: timing_generator instance. + * @dc_crtc_timing: Timing parameters used to configure DCN blocks. + * @vready_offset: Vready's starting position. + * @vstartup_start: Vstartup period. + * @vupdate_offset: Vupdate starting position. + * @vupdate_width: Vupdate duration. + * @signal: DC signal types. + * @use_vbios: to program timings from BIOS command table. + * + */ +void optc1_program_timing( + struct timing_generator *optc, + const struct dc_crtc_timing *dc_crtc_timing, + int vready_offset, + int vstartup_start, + int vupdate_offset, + int vupdate_width, + const enum signal_type signal, + bool use_vbios) +{ + struct dc_crtc_timing patched_crtc_timing; + uint32_t asic_blank_end; + uint32_t asic_blank_start; + uint32_t v_total; + uint32_t v_sync_end; + uint32_t h_sync_polarity, v_sync_polarity; + uint32_t start_point = 0; + uint32_t field_num = 0; + enum h_timing_div_mode h_div = H_TIMING_NO_DIV; + + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + optc1->signal = signal; + optc1->vready_offset = vready_offset; + optc1->vstartup_start = vstartup_start; + optc1->vupdate_offset = vupdate_offset; + optc1->vupdate_width = vupdate_width; + patched_crtc_timing = *dc_crtc_timing; + apply_front_porch_workaround(&patched_crtc_timing); + optc1->orginal_patched_timing = patched_crtc_timing; + + /* Load horizontal timing */ + + /* CRTC_H_TOTAL = vesa.h_total - 1 */ + REG_SET(OTG_H_TOTAL, 0, + OTG_H_TOTAL, patched_crtc_timing.h_total - 1); + + /* h_sync_start = 0, h_sync_end = vesa.h_sync_width */ + REG_UPDATE_2(OTG_H_SYNC_A, + OTG_H_SYNC_A_START, 0, + OTG_H_SYNC_A_END, patched_crtc_timing.h_sync_width); + + /* blank_start = line end - front porch */ + asic_blank_start = patched_crtc_timing.h_total - + patched_crtc_timing.h_front_porch; + + /* blank_end = blank_start - active */ + asic_blank_end = asic_blank_start - + patched_crtc_timing.h_border_right - + patched_crtc_timing.h_addressable - + patched_crtc_timing.h_border_left; + + REG_UPDATE_2(OTG_H_BLANK_START_END, + OTG_H_BLANK_START, asic_blank_start, + OTG_H_BLANK_END, asic_blank_end); + + /* h_sync polarity */ + h_sync_polarity = patched_crtc_timing.flags.HSYNC_POSITIVE_POLARITY ? + 0 : 1; + + REG_UPDATE(OTG_H_SYNC_A_CNTL, + OTG_H_SYNC_A_POL, h_sync_polarity); + + v_total = patched_crtc_timing.v_total - 1; + + REG_SET(OTG_V_TOTAL, 0, + OTG_V_TOTAL, v_total); + + /* In case of V_TOTAL_CONTROL is on, make sure OTG_V_TOTAL_MAX and + * OTG_V_TOTAL_MIN are equal to V_TOTAL. + */ + optc->funcs->set_vtotal_min_max(optc, v_total, v_total); + + /* v_sync_start = 0, v_sync_end = v_sync_width */ + v_sync_end = patched_crtc_timing.v_sync_width; + + REG_UPDATE_2(OTG_V_SYNC_A, + OTG_V_SYNC_A_START, 0, + OTG_V_SYNC_A_END, v_sync_end); + + /* blank_start = frame end - front porch */ + asic_blank_start = patched_crtc_timing.v_total - + patched_crtc_timing.v_front_porch; + + /* blank_end = blank_start - active */ + asic_blank_end = asic_blank_start - + patched_crtc_timing.v_border_bottom - + patched_crtc_timing.v_addressable - + patched_crtc_timing.v_border_top; + + REG_UPDATE_2(OTG_V_BLANK_START_END, + OTG_V_BLANK_START, asic_blank_start, + OTG_V_BLANK_END, asic_blank_end); + + /* v_sync polarity */ + v_sync_polarity = patched_crtc_timing.flags.VSYNC_POSITIVE_POLARITY ? + 0 : 1; + + REG_UPDATE(OTG_V_SYNC_A_CNTL, + OTG_V_SYNC_A_POL, v_sync_polarity); + + if (optc1->signal == SIGNAL_TYPE_DISPLAY_PORT || + optc1->signal == SIGNAL_TYPE_DISPLAY_PORT_MST || + optc1->signal == SIGNAL_TYPE_EDP) { + start_point = 1; + if (patched_crtc_timing.flags.INTERLACE == 1) + field_num = 1; + } + + /* Interlace */ + if (REG(OTG_INTERLACE_CONTROL)) { + if (patched_crtc_timing.flags.INTERLACE == 1) + REG_UPDATE(OTG_INTERLACE_CONTROL, + OTG_INTERLACE_ENABLE, 1); + else + REG_UPDATE(OTG_INTERLACE_CONTROL, + OTG_INTERLACE_ENABLE, 0); + } + + /* VTG enable set to 0 first VInit */ + REG_UPDATE(CONTROL, + VTG0_ENABLE, 0); + + /* original code is using VTG offset to address OTG reg, seems wrong */ + REG_UPDATE_2(OTG_CONTROL, + OTG_START_POINT_CNTL, start_point, + OTG_FIELD_NUMBER_CNTL, field_num); + + optc->funcs->program_global_sync(optc, + vready_offset, + vstartup_start, + vupdate_offset, + vupdate_width); + + optc->funcs->set_vtg_params(optc, dc_crtc_timing, true); + + /* TODO + * patched_crtc_timing.flags.HORZ_COUNT_BY_TWO == 1 + * program_horz_count_by_2 + * for DVI 30bpp mode, 0 otherwise + * program_horz_count_by_2(optc, &patched_crtc_timing); + */ + + /* Enable stereo - only when we need to pack 3D frame. Other types + * of stereo handled in explicit call + */ + + if (optc1_is_two_pixels_per_containter(&patched_crtc_timing) || optc1->opp_count == 2) + h_div = H_TIMING_DIV_BY2; + + if (REG(OPTC_DATA_FORMAT_CONTROL) && optc1->tg_mask->OPTC_DATA_FORMAT != 0) { + uint32_t data_fmt = 0; + + if (patched_crtc_timing.pixel_encoding == PIXEL_ENCODING_YCBCR422) + data_fmt = 1; + else if (patched_crtc_timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) + data_fmt = 2; + + REG_UPDATE(OPTC_DATA_FORMAT_CONTROL, OPTC_DATA_FORMAT, data_fmt); + } + + if (optc1->tg_mask->OTG_H_TIMING_DIV_MODE != 0) { + if (optc1->opp_count == 4) + h_div = H_TIMING_DIV_BY4; + + REG_UPDATE(OTG_H_TIMING_CNTL, + OTG_H_TIMING_DIV_MODE, h_div); + } else { + REG_UPDATE(OTG_H_TIMING_CNTL, + OTG_H_TIMING_DIV_BY2, h_div); + } +} + +/** + * optc1_set_vtg_params - Set Vertical Timing Generator (VTG) parameters + * + * @optc: timing_generator struct used to extract the optc parameters + * @dc_crtc_timing: Timing parameters configured + * @program_fp2: Boolean value indicating if FP2 will be programmed or not + * + * OTG is responsible for generating the global sync signals, including + * vertical timing information for each HUBP in the dcfclk domain. Each VTG is + * associated with one OTG that provides HUBP with vertical timing information + * (i.e., there is 1:1 correspondence between OTG and VTG). This function is + * responsible for setting the OTG parameters to the VTG during the pipe + * programming. + */ +void optc1_set_vtg_params(struct timing_generator *optc, + const struct dc_crtc_timing *dc_crtc_timing, bool program_fp2) +{ + struct dc_crtc_timing patched_crtc_timing; + uint32_t asic_blank_end; + uint32_t v_init; + uint32_t v_fp2 = 0; + int32_t vertical_line_start; + + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + patched_crtc_timing = *dc_crtc_timing; + apply_front_porch_workaround(&patched_crtc_timing); + + /* VCOUNT_INIT is the start of blank */ + v_init = patched_crtc_timing.v_total - patched_crtc_timing.v_front_porch; + + /* end of blank = v_init - active */ + asic_blank_end = v_init - + patched_crtc_timing.v_border_bottom - + patched_crtc_timing.v_addressable - + patched_crtc_timing.v_border_top; + + /* if VSTARTUP is before VSYNC, FP2 is the offset, otherwise 0 */ + vertical_line_start = asic_blank_end - optc1->vstartup_start + 1; + if (vertical_line_start < 0) + v_fp2 = -vertical_line_start; + + /* Interlace */ + if (REG(OTG_INTERLACE_CONTROL)) { + if (patched_crtc_timing.flags.INTERLACE == 1) { + v_init = v_init / 2; + if ((optc1->vstartup_start/2)*2 > asic_blank_end) + v_fp2 = v_fp2 / 2; + } + } + + if (program_fp2) + REG_UPDATE_2(CONTROL, + VTG0_FP2, v_fp2, + VTG0_VCOUNT_INIT, v_init); + else + REG_UPDATE(CONTROL, VTG0_VCOUNT_INIT, v_init); +} + +void optc1_set_blank_data_double_buffer(struct timing_generator *optc, bool enable) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + uint32_t blank_data_double_buffer_enable = enable ? 1 : 0; + + REG_UPDATE(OTG_DOUBLE_BUFFER_CONTROL, + OTG_BLANK_DATA_DOUBLE_BUFFER_EN, blank_data_double_buffer_enable); +} + +/** + * optc1_set_timing_double_buffer() - DRR double buffering control + * + * Sets double buffer point for V_TOTAL, H_TOTAL, VTOTAL_MIN, + * VTOTAL_MAX, VTOTAL_MIN_SEL and VTOTAL_MAX_SEL registers. + * + * @optc: timing_generator instance. + * @enable: Enable DRR double buffering control if true, disable otherwise. + * + * Options: any time, start of frame, dp start of frame (range timing) + */ +void optc1_set_timing_double_buffer(struct timing_generator *optc, bool enable) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + uint32_t mode = enable ? 2 : 0; + + REG_UPDATE(OTG_DOUBLE_BUFFER_CONTROL, + OTG_RANGE_TIMING_DBUF_UPDATE_MODE, mode); +} + +/** + * optc1_unblank_crtc() - Call ASIC Control Object to UnBlank CRTC. + * + * @optc: timing_generator instance. + */ +static void optc1_unblank_crtc(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_UPDATE_2(OTG_BLANK_CONTROL, + OTG_BLANK_DATA_EN, 0, + OTG_BLANK_DE_MODE, 0); + + /* W/A for automated testing + * Automated testing will fail underflow test as there + * sporadic underflows which occur during the optc blank + * sequence. As a w/a, clear underflow on unblank. + * This prevents the failure, but will not mask actual + * underflow that affect real use cases. + */ + optc1_clear_optc_underflow(optc); +} + +/** + * optc1_blank_crtc() - Call ASIC Control Object to Blank CRTC. + * + * @optc: timing_generator instance. + */ + +static void optc1_blank_crtc(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_UPDATE_2(OTG_BLANK_CONTROL, + OTG_BLANK_DATA_EN, 1, + OTG_BLANK_DE_MODE, 0); + + optc1_set_blank_data_double_buffer(optc, false); +} + +void optc1_set_blank(struct timing_generator *optc, + bool enable_blanking) +{ + if (enable_blanking) + optc1_blank_crtc(optc); + else + optc1_unblank_crtc(optc); +} + +bool optc1_is_blanked(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + uint32_t blank_en; + uint32_t blank_state; + + REG_GET_2(OTG_BLANK_CONTROL, + OTG_BLANK_DATA_EN, &blank_en, + OTG_CURRENT_BLANK_STATE, &blank_state); + + return blank_en && blank_state; +} + +void optc1_enable_optc_clock(struct timing_generator *optc, bool enable) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + if (enable) { + REG_UPDATE_2(OPTC_INPUT_CLOCK_CONTROL, + OPTC_INPUT_CLK_EN, 1, + OPTC_INPUT_CLK_GATE_DIS, 1); + + REG_WAIT(OPTC_INPUT_CLOCK_CONTROL, + OPTC_INPUT_CLK_ON, 1, + 1, 1000); + + /* Enable clock */ + REG_UPDATE_2(OTG_CLOCK_CONTROL, + OTG_CLOCK_EN, 1, + OTG_CLOCK_GATE_DIS, 1); + REG_WAIT(OTG_CLOCK_CONTROL, + OTG_CLOCK_ON, 1, + 1, 1000); + } else { + + //last chance to clear underflow, otherwise, it will always there due to clock is off. + if (optc->funcs->is_optc_underflow_occurred(optc) == true) + optc->funcs->clear_optc_underflow(optc); + + REG_UPDATE_2(OTG_CLOCK_CONTROL, + OTG_CLOCK_GATE_DIS, 0, + OTG_CLOCK_EN, 0); + + REG_UPDATE_2(OPTC_INPUT_CLOCK_CONTROL, + OPTC_INPUT_CLK_GATE_DIS, 0, + OPTC_INPUT_CLK_EN, 0); + } +} + +/** + * optc1_enable_crtc() - Enable CRTC - call ASIC Control Object to enable Timing generator. + * + * @optc: timing_generator instance. + */ +static bool optc1_enable_crtc(struct timing_generator *optc) +{ + /* TODO FPGA wait for answer + * OTG_MASTER_UPDATE_MODE != CRTC_MASTER_UPDATE_MODE + * OTG_MASTER_UPDATE_LOCK != CRTC_MASTER_UPDATE_LOCK + */ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + /* opp instance for OTG. For DCN1.0, ODM is remoed. + * OPP and OPTC should 1:1 mapping + */ + REG_UPDATE(OPTC_DATA_SOURCE_SELECT, + OPTC_SRC_SEL, optc->inst); + + /* VTG enable first is for HW workaround */ + REG_UPDATE(CONTROL, + VTG0_ENABLE, 1); + + REG_SEQ_START(); + + /* Enable CRTC */ + REG_UPDATE_2(OTG_CONTROL, + OTG_DISABLE_POINT_CNTL, 3, + OTG_MASTER_EN, 1); + + REG_SEQ_SUBMIT(); + REG_SEQ_WAIT_DONE(); + + return true; +} + +/* disable_crtc - call ASIC Control Object to disable Timing generator. */ +bool optc1_disable_crtc(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + /* disable otg request until end of the first line + * in the vertical blank region + */ + REG_UPDATE_2(OTG_CONTROL, + OTG_DISABLE_POINT_CNTL, 3, + OTG_MASTER_EN, 0); + + REG_UPDATE(CONTROL, + VTG0_ENABLE, 0); + + /* CRTC disabled, so disable clock. */ + REG_WAIT(OTG_CLOCK_CONTROL, + OTG_BUSY, 0, + 1, 100000); + + return true; +} + + +void optc1_program_blank_color( + struct timing_generator *optc, + const struct tg_color *black_color) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_SET_3(OTG_BLACK_COLOR, 0, + OTG_BLACK_COLOR_B_CB, black_color->color_b_cb, + OTG_BLACK_COLOR_G_Y, black_color->color_g_y, + OTG_BLACK_COLOR_R_CR, black_color->color_r_cr); +} + +bool optc1_validate_timing( + struct timing_generator *optc, + const struct dc_crtc_timing *timing) +{ + uint32_t v_blank; + uint32_t h_blank; + uint32_t min_v_blank; + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + ASSERT(timing != NULL); + + v_blank = (timing->v_total - timing->v_addressable - + timing->v_border_top - timing->v_border_bottom); + + h_blank = (timing->h_total - timing->h_addressable - + timing->h_border_right - + timing->h_border_left); + + if (timing->timing_3d_format != TIMING_3D_FORMAT_NONE && + timing->timing_3d_format != TIMING_3D_FORMAT_HW_FRAME_PACKING && + timing->timing_3d_format != TIMING_3D_FORMAT_TOP_AND_BOTTOM && + timing->timing_3d_format != TIMING_3D_FORMAT_SIDE_BY_SIDE && + timing->timing_3d_format != TIMING_3D_FORMAT_FRAME_ALTERNATE && + timing->timing_3d_format != TIMING_3D_FORMAT_INBAND_FA) + return false; + + /* Temporarily blocking interlacing mode until it's supported */ + if (timing->flags.INTERLACE == 1) + return false; + + /* Check maximum number of pixels supported by Timing Generator + * (Currently will never fail, in order to fail needs display which + * needs more than 8192 horizontal and + * more than 8192 vertical total pixels) + */ + if (timing->h_total > optc1->max_h_total || + timing->v_total > optc1->max_v_total) + return false; + + + if (h_blank < optc1->min_h_blank) + return false; + + if (timing->h_sync_width < optc1->min_h_sync_width || + timing->v_sync_width < optc1->min_v_sync_width) + return false; + + min_v_blank = timing->flags.INTERLACE?optc1->min_v_blank_interlace:optc1->min_v_blank; + + if (v_blank < min_v_blank) + return false; + + return true; + +} + +/* + * get_vblank_counter + * + * @brief + * Get counter for vertical blanks. use register CRTC_STATUS_FRAME_COUNT which + * holds the counter of frames. + * + * @param + * struct timing_generator *optc - [in] timing generator which controls the + * desired CRTC + * + * @return + * Counter of frames, which should equal to number of vblanks. + */ +uint32_t optc1_get_vblank_counter(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + uint32_t frame_count; + + REG_GET(OTG_STATUS_FRAME_COUNT, + OTG_FRAME_COUNT, &frame_count); + + return frame_count; +} + +void optc1_lock(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_SET(OTG_GLOBAL_CONTROL0, 0, + OTG_MASTER_UPDATE_LOCK_SEL, optc->inst); + REG_SET(OTG_MASTER_UPDATE_LOCK, 0, + OTG_MASTER_UPDATE_LOCK, 1); + + REG_WAIT(OTG_MASTER_UPDATE_LOCK, + UPDATE_LOCK_STATUS, 1, + 1, 10); + + TRACE_OPTC_LOCK_UNLOCK_STATE(optc1, optc->inst, true); +} + +void optc1_unlock(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_SET(OTG_MASTER_UPDATE_LOCK, 0, + OTG_MASTER_UPDATE_LOCK, 0); + + TRACE_OPTC_LOCK_UNLOCK_STATE(optc1, optc->inst, false); +} + +void optc1_get_position(struct timing_generator *optc, + struct crtc_position *position) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_GET_2(OTG_STATUS_POSITION, + OTG_HORZ_COUNT, &position->horizontal_count, + OTG_VERT_COUNT, &position->vertical_count); + + REG_GET(OTG_NOM_VERT_POSITION, + OTG_VERT_COUNT_NOM, &position->nominal_vcount); +} + +bool optc1_is_counter_moving(struct timing_generator *optc) +{ + struct crtc_position position1, position2; + + optc->funcs->get_position(optc, &position1); + optc->funcs->get_position(optc, &position2); + + if (position1.horizontal_count == position2.horizontal_count && + position1.vertical_count == position2.vertical_count) + return false; + else + return true; +} + +bool optc1_did_triggered_reset_occur( + struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + uint32_t occurred_force, occurred_vsync; + + REG_GET(OTG_FORCE_COUNT_NOW_CNTL, + OTG_FORCE_COUNT_NOW_OCCURRED, &occurred_force); + + REG_GET(OTG_VERT_SYNC_CONTROL, + OTG_FORCE_VSYNC_NEXT_LINE_OCCURRED, &occurred_vsync); + + return occurred_vsync != 0 || occurred_force != 0; +} + +void optc1_disable_reset_trigger(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_WRITE(OTG_TRIGA_CNTL, 0); + + REG_SET(OTG_FORCE_COUNT_NOW_CNTL, 0, + OTG_FORCE_COUNT_NOW_CLEAR, 1); + + REG_SET(OTG_VERT_SYNC_CONTROL, 0, + OTG_FORCE_VSYNC_NEXT_LINE_CLEAR, 1); +} + +void optc1_enable_reset_trigger(struct timing_generator *optc, int source_tg_inst) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + uint32_t falling_edge; + + REG_GET(OTG_V_SYNC_A_CNTL, + OTG_V_SYNC_A_POL, &falling_edge); + + if (falling_edge) + REG_SET_3(OTG_TRIGA_CNTL, 0, + /* vsync signal from selected OTG pipe based + * on OTG_TRIG_SOURCE_PIPE_SELECT setting + */ + OTG_TRIGA_SOURCE_SELECT, 20, + OTG_TRIGA_SOURCE_PIPE_SELECT, source_tg_inst, + /* always detect falling edge */ + OTG_TRIGA_FALLING_EDGE_DETECT_CNTL, 1); + else + REG_SET_3(OTG_TRIGA_CNTL, 0, + /* vsync signal from selected OTG pipe based + * on OTG_TRIG_SOURCE_PIPE_SELECT setting + */ + OTG_TRIGA_SOURCE_SELECT, 20, + OTG_TRIGA_SOURCE_PIPE_SELECT, source_tg_inst, + /* always detect rising edge */ + OTG_TRIGA_RISING_EDGE_DETECT_CNTL, 1); + + REG_SET(OTG_FORCE_COUNT_NOW_CNTL, 0, + /* force H count to H_TOTAL and V count to V_TOTAL in + * progressive mode and V_TOTAL-1 in interlaced mode + */ + OTG_FORCE_COUNT_NOW_MODE, 2); +} + +void optc1_enable_crtc_reset( + struct timing_generator *optc, + int source_tg_inst, + struct crtc_trigger_info *crtc_tp) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + uint32_t falling_edge = 0; + uint32_t rising_edge = 0; + + switch (crtc_tp->event) { + + case CRTC_EVENT_VSYNC_RISING: + rising_edge = 1; + break; + + case CRTC_EVENT_VSYNC_FALLING: + falling_edge = 1; + break; + } + + REG_SET_4(OTG_TRIGA_CNTL, 0, + /* vsync signal from selected OTG pipe based + * on OTG_TRIG_SOURCE_PIPE_SELECT setting + */ + OTG_TRIGA_SOURCE_SELECT, 20, + OTG_TRIGA_SOURCE_PIPE_SELECT, source_tg_inst, + /* always detect falling edge */ + OTG_TRIGA_RISING_EDGE_DETECT_CNTL, rising_edge, + OTG_TRIGA_FALLING_EDGE_DETECT_CNTL, falling_edge); + + switch (crtc_tp->delay) { + case TRIGGER_DELAY_NEXT_LINE: + REG_SET(OTG_VERT_SYNC_CONTROL, 0, + OTG_AUTO_FORCE_VSYNC_MODE, 1); + break; + case TRIGGER_DELAY_NEXT_PIXEL: + REG_SET(OTG_FORCE_COUNT_NOW_CNTL, 0, + /* force H count to H_TOTAL and V count to V_TOTAL in + * progressive mode and V_TOTAL-1 in interlaced mode + */ + OTG_FORCE_COUNT_NOW_MODE, 2); + break; + } +} + +void optc1_wait_for_state(struct timing_generator *optc, + enum crtc_state state) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + switch (state) { + case CRTC_STATE_VBLANK: + REG_WAIT(OTG_STATUS, + OTG_V_BLANK, 1, + 1, 100000); /* 1 vupdate at 10hz */ + break; + + case CRTC_STATE_VACTIVE: + REG_WAIT(OTG_STATUS, + OTG_V_ACTIVE_DISP, 1, + 1, 100000); /* 1 vupdate at 10hz */ + break; + + default: + break; + } +} + +void optc1_set_early_control( + struct timing_generator *optc, + uint32_t early_cntl) +{ + /* asic design change, do not need this control + * empty for share caller logic + */ +} + + +void optc1_set_static_screen_control( + struct timing_generator *optc, + uint32_t event_triggers, + uint32_t num_frames) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + // By register spec, it only takes 8 bit value + if (num_frames > 0xFF) + num_frames = 0xFF; + + /* Bit 8 is no longer applicable in RV for PSR case, + * set bit 8 to 0 if given + */ + if ((event_triggers & STATIC_SCREEN_EVENT_MASK_RANGETIMING_DOUBLE_BUFFER_UPDATE_EN) + != 0) + event_triggers = event_triggers & + ~STATIC_SCREEN_EVENT_MASK_RANGETIMING_DOUBLE_BUFFER_UPDATE_EN; + + REG_SET_2(OTG_STATIC_SCREEN_CONTROL, 0, + OTG_STATIC_SCREEN_EVENT_MASK, event_triggers, + OTG_STATIC_SCREEN_FRAME_COUNT, num_frames); +} + +static void optc1_setup_manual_trigger(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_SET(OTG_GLOBAL_CONTROL2, 0, + MANUAL_FLOW_CONTROL_SEL, optc->inst); + + REG_SET_8(OTG_TRIGA_CNTL, 0, + OTG_TRIGA_SOURCE_SELECT, 22, + OTG_TRIGA_SOURCE_PIPE_SELECT, optc->inst, + OTG_TRIGA_RISING_EDGE_DETECT_CNTL, 1, + OTG_TRIGA_FALLING_EDGE_DETECT_CNTL, 0, + OTG_TRIGA_POLARITY_SELECT, 0, + OTG_TRIGA_FREQUENCY_SELECT, 0, + OTG_TRIGA_DELAY, 0, + OTG_TRIGA_CLEAR, 1); +} + +static void optc1_program_manual_trigger(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_SET(OTG_MANUAL_FLOW_CONTROL, 0, + MANUAL_FLOW_CONTROL, 1); + + REG_SET(OTG_MANUAL_FLOW_CONTROL, 0, + MANUAL_FLOW_CONTROL, 0); +} + +/** + * optc1_set_drr() - Program dynamic refresh rate registers m_OTGx_OTG_V_TOTAL_*. + * + * @optc: timing_generator instance. + * @params: parameters used for Dynamic Refresh Rate. + */ +void optc1_set_drr( + struct timing_generator *optc, + const struct drr_params *params) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + if (params != NULL && + params->vertical_total_max > 0 && + params->vertical_total_min > 0) { + + if (params->vertical_total_mid != 0) { + + REG_SET(OTG_V_TOTAL_MID, 0, + OTG_V_TOTAL_MID, params->vertical_total_mid - 1); + + REG_UPDATE_2(OTG_V_TOTAL_CONTROL, + OTG_VTOTAL_MID_REPLACING_MAX_EN, 1, + OTG_VTOTAL_MID_FRAME_NUM, + (uint8_t)params->vertical_total_mid_frame_num); + + } + + optc->funcs->set_vtotal_min_max(optc, params->vertical_total_min - 1, params->vertical_total_max - 1); + + REG_UPDATE_5(OTG_V_TOTAL_CONTROL, + OTG_V_TOTAL_MIN_SEL, 1, + OTG_V_TOTAL_MAX_SEL, 1, + OTG_FORCE_LOCK_ON_EVENT, 0, + OTG_SET_V_TOTAL_MIN_MASK_EN, 0, + OTG_SET_V_TOTAL_MIN_MASK, 0); + } + + // Setup manual flow control for EOF via TRIG_A + optc->funcs->setup_manual_trigger(optc); +} + +void optc1_set_vtotal_min_max(struct timing_generator *optc, int vtotal_min, int vtotal_max) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_SET(OTG_V_TOTAL_MAX, 0, + OTG_V_TOTAL_MAX, vtotal_max); + + REG_SET(OTG_V_TOTAL_MIN, 0, + OTG_V_TOTAL_MIN, vtotal_min); +} + +static void optc1_set_test_pattern( + struct timing_generator *optc, + /* TODO: replace 'controller_dp_test_pattern' by 'test_pattern_mode' + * because this is not DP-specific (which is probably somewhere in DP + * encoder) */ + enum controller_dp_test_pattern test_pattern, + enum dc_color_depth color_depth) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + enum test_pattern_color_format bit_depth; + enum test_pattern_dyn_range dyn_range; + enum test_pattern_mode mode; + uint32_t pattern_mask; + uint32_t pattern_data; + /* color ramp generator mixes 16-bits color */ + uint32_t src_bpc = 16; + /* requested bpc */ + uint32_t dst_bpc; + uint32_t index; + /* RGB values of the color bars. + * Produce two RGB colors: RGB0 - white (all Fs) + * and RGB1 - black (all 0s) + * (three RGB components for two colors) + */ + uint16_t src_color[6] = {0xFFFF, 0xFFFF, 0xFFFF, 0x0000, + 0x0000, 0x0000}; + /* dest color (converted to the specified color format) */ + uint16_t dst_color[6]; + uint32_t inc_base; + + /* translate to bit depth */ + switch (color_depth) { + case COLOR_DEPTH_666: + bit_depth = TEST_PATTERN_COLOR_FORMAT_BPC_6; + break; + case COLOR_DEPTH_888: + bit_depth = TEST_PATTERN_COLOR_FORMAT_BPC_8; + break; + case COLOR_DEPTH_101010: + bit_depth = TEST_PATTERN_COLOR_FORMAT_BPC_10; + break; + case COLOR_DEPTH_121212: + bit_depth = TEST_PATTERN_COLOR_FORMAT_BPC_12; + break; + default: + bit_depth = TEST_PATTERN_COLOR_FORMAT_BPC_8; + break; + } + + switch (test_pattern) { + case CONTROLLER_DP_TEST_PATTERN_COLORSQUARES: + case CONTROLLER_DP_TEST_PATTERN_COLORSQUARES_CEA: + { + dyn_range = (test_pattern == + CONTROLLER_DP_TEST_PATTERN_COLORSQUARES_CEA ? + TEST_PATTERN_DYN_RANGE_CEA : + TEST_PATTERN_DYN_RANGE_VESA); + mode = TEST_PATTERN_MODE_COLORSQUARES_RGB; + + REG_UPDATE_2(OTG_TEST_PATTERN_PARAMETERS, + OTG_TEST_PATTERN_VRES, 6, + OTG_TEST_PATTERN_HRES, 6); + + REG_UPDATE_4(OTG_TEST_PATTERN_CONTROL, + OTG_TEST_PATTERN_EN, 1, + OTG_TEST_PATTERN_MODE, mode, + OTG_TEST_PATTERN_DYNAMIC_RANGE, dyn_range, + OTG_TEST_PATTERN_COLOR_FORMAT, bit_depth); + } + break; + + case CONTROLLER_DP_TEST_PATTERN_VERTICALBARS: + case CONTROLLER_DP_TEST_PATTERN_HORIZONTALBARS: + { + mode = (test_pattern == + CONTROLLER_DP_TEST_PATTERN_VERTICALBARS ? + TEST_PATTERN_MODE_VERTICALBARS : + TEST_PATTERN_MODE_HORIZONTALBARS); + + switch (bit_depth) { + case TEST_PATTERN_COLOR_FORMAT_BPC_6: + dst_bpc = 6; + break; + case TEST_PATTERN_COLOR_FORMAT_BPC_8: + dst_bpc = 8; + break; + case TEST_PATTERN_COLOR_FORMAT_BPC_10: + dst_bpc = 10; + break; + default: + dst_bpc = 8; + break; + } + + /* adjust color to the required colorFormat */ + for (index = 0; index < 6; index++) { + /* dst = 2^dstBpc * src / 2^srcBpc = src >> + * (srcBpc - dstBpc); + */ + dst_color[index] = + src_color[index] >> (src_bpc - dst_bpc); + /* CRTC_TEST_PATTERN_DATA has 16 bits, + * lowest 6 are hardwired to ZERO + * color bits should be left aligned to MSB + * XXXXXXXXXX000000 for 10 bit, + * XXXXXXXX00000000 for 8 bit and XXXXXX0000000000 for 6 + */ + dst_color[index] <<= (16 - dst_bpc); + } + + REG_WRITE(OTG_TEST_PATTERN_PARAMETERS, 0); + + /* We have to write the mask before data, similar to pipeline. + * For example, for 8 bpc, if we want RGB0 to be magenta, + * and RGB1 to be cyan, + * we need to make 7 writes: + * MASK DATA + * 000001 00000000 00000000 set mask to R0 + * 000010 11111111 00000000 R0 255, 0xFF00, set mask to G0 + * 000100 00000000 00000000 G0 0, 0x0000, set mask to B0 + * 001000 11111111 00000000 B0 255, 0xFF00, set mask to R1 + * 010000 00000000 00000000 R1 0, 0x0000, set mask to G1 + * 100000 11111111 00000000 G1 255, 0xFF00, set mask to B1 + * 100000 11111111 00000000 B1 255, 0xFF00 + * + * we will make a loop of 6 in which we prepare the mask, + * then write, then prepare the color for next write. + * first iteration will write mask only, + * but each next iteration color prepared in + * previous iteration will be written within new mask, + * the last component will written separately, + * mask is not changing between 6th and 7th write + * and color will be prepared by last iteration + */ + + /* write color, color values mask in CRTC_TEST_PATTERN_MASK + * is B1, G1, R1, B0, G0, R0 + */ + pattern_data = 0; + for (index = 0; index < 6; index++) { + /* prepare color mask, first write PATTERN_DATA + * will have all zeros + */ + pattern_mask = (1 << index); + + /* write color component */ + REG_SET_2(OTG_TEST_PATTERN_COLOR, 0, + OTG_TEST_PATTERN_MASK, pattern_mask, + OTG_TEST_PATTERN_DATA, pattern_data); + + /* prepare next color component, + * will be written in the next iteration + */ + pattern_data = dst_color[index]; + } + /* write last color component, + * it's been already prepared in the loop + */ + REG_SET_2(OTG_TEST_PATTERN_COLOR, 0, + OTG_TEST_PATTERN_MASK, pattern_mask, + OTG_TEST_PATTERN_DATA, pattern_data); + + /* enable test pattern */ + REG_UPDATE_4(OTG_TEST_PATTERN_CONTROL, + OTG_TEST_PATTERN_EN, 1, + OTG_TEST_PATTERN_MODE, mode, + OTG_TEST_PATTERN_DYNAMIC_RANGE, 0, + OTG_TEST_PATTERN_COLOR_FORMAT, bit_depth); + } + break; + + case CONTROLLER_DP_TEST_PATTERN_COLORRAMP: + { + mode = (bit_depth == + TEST_PATTERN_COLOR_FORMAT_BPC_10 ? + TEST_PATTERN_MODE_DUALRAMP_RGB : + TEST_PATTERN_MODE_SINGLERAMP_RGB); + + switch (bit_depth) { + case TEST_PATTERN_COLOR_FORMAT_BPC_6: + dst_bpc = 6; + break; + case TEST_PATTERN_COLOR_FORMAT_BPC_8: + dst_bpc = 8; + break; + case TEST_PATTERN_COLOR_FORMAT_BPC_10: + dst_bpc = 10; + break; + default: + dst_bpc = 8; + break; + } + + /* increment for the first ramp for one color gradation + * 1 gradation for 6-bit color is 2^10 + * gradations in 16-bit color + */ + inc_base = (src_bpc - dst_bpc); + + switch (bit_depth) { + case TEST_PATTERN_COLOR_FORMAT_BPC_6: + { + REG_UPDATE_5(OTG_TEST_PATTERN_PARAMETERS, + OTG_TEST_PATTERN_INC0, inc_base, + OTG_TEST_PATTERN_INC1, 0, + OTG_TEST_PATTERN_HRES, 6, + OTG_TEST_PATTERN_VRES, 6, + OTG_TEST_PATTERN_RAMP0_OFFSET, 0); + } + break; + case TEST_PATTERN_COLOR_FORMAT_BPC_8: + { + REG_UPDATE_5(OTG_TEST_PATTERN_PARAMETERS, + OTG_TEST_PATTERN_INC0, inc_base, + OTG_TEST_PATTERN_INC1, 0, + OTG_TEST_PATTERN_HRES, 8, + OTG_TEST_PATTERN_VRES, 6, + OTG_TEST_PATTERN_RAMP0_OFFSET, 0); + } + break; + case TEST_PATTERN_COLOR_FORMAT_BPC_10: + { + REG_UPDATE_5(OTG_TEST_PATTERN_PARAMETERS, + OTG_TEST_PATTERN_INC0, inc_base, + OTG_TEST_PATTERN_INC1, inc_base + 2, + OTG_TEST_PATTERN_HRES, 8, + OTG_TEST_PATTERN_VRES, 5, + OTG_TEST_PATTERN_RAMP0_OFFSET, 384 << 6); + } + break; + default: + break; + } + + REG_WRITE(OTG_TEST_PATTERN_COLOR, 0); + + /* enable test pattern */ + REG_WRITE(OTG_TEST_PATTERN_CONTROL, 0); + + REG_SET_4(OTG_TEST_PATTERN_CONTROL, 0, + OTG_TEST_PATTERN_EN, 1, + OTG_TEST_PATTERN_MODE, mode, + OTG_TEST_PATTERN_DYNAMIC_RANGE, 0, + OTG_TEST_PATTERN_COLOR_FORMAT, bit_depth); + } + break; + case CONTROLLER_DP_TEST_PATTERN_VIDEOMODE: + { + REG_WRITE(OTG_TEST_PATTERN_CONTROL, 0); + REG_WRITE(OTG_TEST_PATTERN_COLOR, 0); + REG_WRITE(OTG_TEST_PATTERN_PARAMETERS, 0); + } + break; + default: + break; + + } +} + +void optc1_get_crtc_scanoutpos( + struct timing_generator *optc, + uint32_t *v_blank_start, + uint32_t *v_blank_end, + uint32_t *h_position, + uint32_t *v_position) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + struct crtc_position position; + + REG_GET_2(OTG_V_BLANK_START_END, + OTG_V_BLANK_START, v_blank_start, + OTG_V_BLANK_END, v_blank_end); + + optc1_get_position(optc, &position); + + *h_position = position.horizontal_count; + *v_position = position.vertical_count; +} + +static void optc1_enable_stereo(struct timing_generator *optc, + const struct dc_crtc_timing *timing, struct crtc_stereo_flags *flags) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + if (flags) { + uint32_t stereo_en; + stereo_en = flags->FRAME_PACKED == 0 ? 1 : 0; + + if (flags->PROGRAM_STEREO) + REG_UPDATE_3(OTG_STEREO_CONTROL, + OTG_STEREO_EN, stereo_en, + OTG_STEREO_SYNC_OUTPUT_LINE_NUM, 0, + OTG_STEREO_SYNC_OUTPUT_POLARITY, flags->RIGHT_EYE_POLARITY == 0 ? 0 : 1); + + if (flags->PROGRAM_POLARITY) + REG_UPDATE(OTG_STEREO_CONTROL, + OTG_STEREO_EYE_FLAG_POLARITY, + flags->RIGHT_EYE_POLARITY == 0 ? 0 : 1); + + if (flags->DISABLE_STEREO_DP_SYNC) + REG_UPDATE(OTG_STEREO_CONTROL, + OTG_DISABLE_STEREOSYNC_OUTPUT_FOR_DP, 1); + + if (flags->PROGRAM_STEREO) + REG_UPDATE_2(OTG_3D_STRUCTURE_CONTROL, + OTG_3D_STRUCTURE_EN, flags->FRAME_PACKED, + OTG_3D_STRUCTURE_STEREO_SEL_OVR, flags->FRAME_PACKED); + + } +} + +void optc1_program_stereo(struct timing_generator *optc, + const struct dc_crtc_timing *timing, struct crtc_stereo_flags *flags) +{ + if (flags->PROGRAM_STEREO) + optc1_enable_stereo(optc, timing, flags); + else + optc1_disable_stereo(optc); +} + + +bool optc1_is_stereo_left_eye(struct timing_generator *optc) +{ + bool ret = false; + uint32_t left_eye = 0; + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_GET(OTG_STEREO_STATUS, + OTG_STEREO_CURRENT_EYE, &left_eye); + if (left_eye == 1) + ret = true; + else + ret = false; + + return ret; +} + +bool optc1_get_hw_timing(struct timing_generator *tg, + struct dc_crtc_timing *hw_crtc_timing) +{ + struct dcn_otg_state s = {0}; + + if (tg == NULL || hw_crtc_timing == NULL) + return false; + + optc1_read_otg_state(DCN10TG_FROM_TG(tg), &s); + + hw_crtc_timing->h_total = s.h_total + 1; + hw_crtc_timing->h_addressable = s.h_total - ((s.h_total - s.h_blank_start) + s.h_blank_end); + hw_crtc_timing->h_front_porch = s.h_total + 1 - s.h_blank_start; + hw_crtc_timing->h_sync_width = s.h_sync_a_end - s.h_sync_a_start; + + hw_crtc_timing->v_total = s.v_total + 1; + hw_crtc_timing->v_addressable = s.v_total - ((s.v_total - s.v_blank_start) + s.v_blank_end); + hw_crtc_timing->v_front_porch = s.v_total + 1 - s.v_blank_start; + hw_crtc_timing->v_sync_width = s.v_sync_a_end - s.v_sync_a_start; + + return true; +} + + +void optc1_read_otg_state(struct optc *optc1, + struct dcn_otg_state *s) +{ + REG_GET(OTG_CONTROL, + OTG_MASTER_EN, &s->otg_enabled); + + REG_GET_2(OTG_V_BLANK_START_END, + OTG_V_BLANK_START, &s->v_blank_start, + OTG_V_BLANK_END, &s->v_blank_end); + + REG_GET(OTG_V_SYNC_A_CNTL, + OTG_V_SYNC_A_POL, &s->v_sync_a_pol); + + REG_GET(OTG_V_TOTAL, + OTG_V_TOTAL, &s->v_total); + + REG_GET(OTG_V_TOTAL_MAX, + OTG_V_TOTAL_MAX, &s->v_total_max); + + REG_GET(OTG_V_TOTAL_MIN, + OTG_V_TOTAL_MIN, &s->v_total_min); + + REG_GET(OTG_V_TOTAL_CONTROL, + OTG_V_TOTAL_MAX_SEL, &s->v_total_max_sel); + + REG_GET(OTG_V_TOTAL_CONTROL, + OTG_V_TOTAL_MIN_SEL, &s->v_total_min_sel); + + REG_GET_2(OTG_V_SYNC_A, + OTG_V_SYNC_A_START, &s->v_sync_a_start, + OTG_V_SYNC_A_END, &s->v_sync_a_end); + + REG_GET_2(OTG_H_BLANK_START_END, + OTG_H_BLANK_START, &s->h_blank_start, + OTG_H_BLANK_END, &s->h_blank_end); + + REG_GET_2(OTG_H_SYNC_A, + OTG_H_SYNC_A_START, &s->h_sync_a_start, + OTG_H_SYNC_A_END, &s->h_sync_a_end); + + REG_GET(OTG_H_SYNC_A_CNTL, + OTG_H_SYNC_A_POL, &s->h_sync_a_pol); + + REG_GET(OTG_H_TOTAL, + OTG_H_TOTAL, &s->h_total); + + REG_GET(OPTC_INPUT_GLOBAL_CONTROL, + OPTC_UNDERFLOW_OCCURRED_STATUS, &s->underflow_occurred_status); + + REG_GET(OTG_VERTICAL_INTERRUPT1_CONTROL, + OTG_VERTICAL_INTERRUPT1_INT_ENABLE, &s->vertical_interrupt1_en); + + REG_GET(OTG_VERTICAL_INTERRUPT1_POSITION, + OTG_VERTICAL_INTERRUPT1_LINE_START, &s->vertical_interrupt1_line); + + REG_GET(OTG_VERTICAL_INTERRUPT2_CONTROL, + OTG_VERTICAL_INTERRUPT2_INT_ENABLE, &s->vertical_interrupt2_en); + + REG_GET(OTG_VERTICAL_INTERRUPT2_POSITION, + OTG_VERTICAL_INTERRUPT2_LINE_START, &s->vertical_interrupt2_line); +} + +bool optc1_get_otg_active_size(struct timing_generator *optc, + uint32_t *otg_active_width, + uint32_t *otg_active_height) +{ + uint32_t otg_enabled; + uint32_t v_blank_start; + uint32_t v_blank_end; + uint32_t h_blank_start; + uint32_t h_blank_end; + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + + REG_GET(OTG_CONTROL, + OTG_MASTER_EN, &otg_enabled); + + if (otg_enabled == 0) + return false; + + REG_GET_2(OTG_V_BLANK_START_END, + OTG_V_BLANK_START, &v_blank_start, + OTG_V_BLANK_END, &v_blank_end); + + REG_GET_2(OTG_H_BLANK_START_END, + OTG_H_BLANK_START, &h_blank_start, + OTG_H_BLANK_END, &h_blank_end); + + *otg_active_width = v_blank_start - v_blank_end; + *otg_active_height = h_blank_start - h_blank_end; + return true; +} + +void optc1_clear_optc_underflow(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_UPDATE(OPTC_INPUT_GLOBAL_CONTROL, OPTC_UNDERFLOW_CLEAR, 1); +} + +void optc1_tg_init(struct timing_generator *optc) +{ + optc1_set_blank_data_double_buffer(optc, true); + optc1_set_timing_double_buffer(optc, true); + optc1_clear_optc_underflow(optc); +} + +bool optc1_is_tg_enabled(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + uint32_t otg_enabled = 0; + + REG_GET(OTG_CONTROL, OTG_MASTER_EN, &otg_enabled); + + return (otg_enabled != 0); + +} + +bool optc1_is_optc_underflow_occurred(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + uint32_t underflow_occurred = 0; + + REG_GET(OPTC_INPUT_GLOBAL_CONTROL, + OPTC_UNDERFLOW_OCCURRED_STATUS, + &underflow_occurred); + + return (underflow_occurred == 1); +} + +bool optc1_configure_crc(struct timing_generator *optc, + const struct crc_params *params) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + /* Cannot configure crc on a CRTC that is disabled */ + if (!optc1_is_tg_enabled(optc)) + return false; + + REG_WRITE(OTG_CRC_CNTL, 0); + + if (!params->enable) + return true; + + /* Program frame boundaries */ + /* Window A x axis start and end. */ + REG_UPDATE_2(OTG_CRC0_WINDOWA_X_CONTROL, + OTG_CRC0_WINDOWA_X_START, params->windowa_x_start, + OTG_CRC0_WINDOWA_X_END, params->windowa_x_end); + + /* Window A y axis start and end. */ + REG_UPDATE_2(OTG_CRC0_WINDOWA_Y_CONTROL, + OTG_CRC0_WINDOWA_Y_START, params->windowa_y_start, + OTG_CRC0_WINDOWA_Y_END, params->windowa_y_end); + + /* Window B x axis start and end. */ + REG_UPDATE_2(OTG_CRC0_WINDOWB_X_CONTROL, + OTG_CRC0_WINDOWB_X_START, params->windowb_x_start, + OTG_CRC0_WINDOWB_X_END, params->windowb_x_end); + + /* Window B y axis start and end. */ + REG_UPDATE_2(OTG_CRC0_WINDOWB_Y_CONTROL, + OTG_CRC0_WINDOWB_Y_START, params->windowb_y_start, + OTG_CRC0_WINDOWB_Y_END, params->windowb_y_end); + + /* Set crc mode and selection, and enable. Only using CRC0*/ + REG_UPDATE_3(OTG_CRC_CNTL, + OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0, + OTG_CRC0_SELECT, params->selection, + OTG_CRC_EN, 1); + + return true; +} + +/** + * optc1_get_crc - Capture CRC result per component + * + * @optc: timing_generator instance. + * @r_cr: 16-bit primary CRC signature for red data. + * @g_y: 16-bit primary CRC signature for green data. + * @b_cb: 16-bit primary CRC signature for blue data. + * + * This function reads the CRC signature from the OPTC registers. Notice that + * we have three registers to keep the CRC result per color component (RGB). + * + * Returns: + * If CRC is disabled, return false; otherwise, return true, and the CRC + * results in the parameters. + */ +bool optc1_get_crc(struct timing_generator *optc, + uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb) +{ + uint32_t field = 0; + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_GET(OTG_CRC_CNTL, OTG_CRC_EN, &field); + + /* Early return if CRC is not enabled for this CRTC */ + if (!field) + return false; + + /* OTG_CRC0_DATA_RG has the CRC16 results for the red and green component */ + REG_GET_2(OTG_CRC0_DATA_RG, + CRC0_R_CR, r_cr, + CRC0_G_Y, g_y); + + /* OTG_CRC0_DATA_B has the CRC16 results for the blue component */ + REG_GET(OTG_CRC0_DATA_B, + CRC0_B_CB, b_cb); + + return true; +} + +static const struct timing_generator_funcs dcn10_tg_funcs = { + .validate_timing = optc1_validate_timing, + .program_timing = optc1_program_timing, + .setup_vertical_interrupt0 = optc1_setup_vertical_interrupt0, + .setup_vertical_interrupt1 = optc1_setup_vertical_interrupt1, + .setup_vertical_interrupt2 = optc1_setup_vertical_interrupt2, + .program_global_sync = optc1_program_global_sync, + .enable_crtc = optc1_enable_crtc, + .disable_crtc = optc1_disable_crtc, + /* used by enable_timing_synchronization. Not need for FPGA */ + .is_counter_moving = optc1_is_counter_moving, + .get_position = optc1_get_position, + .get_frame_count = optc1_get_vblank_counter, + .get_scanoutpos = optc1_get_crtc_scanoutpos, + .get_otg_active_size = optc1_get_otg_active_size, + .set_early_control = optc1_set_early_control, + /* used by enable_timing_synchronization. Not need for FPGA */ + .wait_for_state = optc1_wait_for_state, + .set_blank = optc1_set_blank, + .is_blanked = optc1_is_blanked, + .set_blank_color = optc1_program_blank_color, + .did_triggered_reset_occur = optc1_did_triggered_reset_occur, + .enable_reset_trigger = optc1_enable_reset_trigger, + .enable_crtc_reset = optc1_enable_crtc_reset, + .disable_reset_trigger = optc1_disable_reset_trigger, + .lock = optc1_lock, + .unlock = optc1_unlock, + .enable_optc_clock = optc1_enable_optc_clock, + .set_drr = optc1_set_drr, + .get_last_used_drr_vtotal = NULL, + .set_vtotal_min_max = optc1_set_vtotal_min_max, + .set_static_screen_control = optc1_set_static_screen_control, + .set_test_pattern = optc1_set_test_pattern, + .program_stereo = optc1_program_stereo, + .is_stereo_left_eye = optc1_is_stereo_left_eye, + .set_blank_data_double_buffer = optc1_set_blank_data_double_buffer, + .tg_init = optc1_tg_init, + .is_tg_enabled = optc1_is_tg_enabled, + .is_optc_underflow_occurred = optc1_is_optc_underflow_occurred, + .clear_optc_underflow = optc1_clear_optc_underflow, + .get_crc = optc1_get_crc, + .configure_crc = optc1_configure_crc, + .set_vtg_params = optc1_set_vtg_params, + .program_manual_trigger = optc1_program_manual_trigger, + .setup_manual_trigger = optc1_setup_manual_trigger, + .get_hw_timing = optc1_get_hw_timing, +}; + +void dcn10_timing_generator_init(struct optc *optc1) +{ + optc1->base.funcs = &dcn10_tg_funcs; + + optc1->max_h_total = optc1->tg_mask->OTG_H_TOTAL + 1; + optc1->max_v_total = optc1->tg_mask->OTG_V_TOTAL + 1; + + optc1->min_h_blank = 32; + optc1->min_v_blank = 3; + optc1->min_v_blank_interlace = 5; + optc1->min_h_sync_width = 4; + optc1->min_v_sync_width = 1; +} + +/* "Containter" vs. "pixel" is a concept within HW blocks, mostly those closer to the back-end. It works like this: + * + * - In most of the formats (RGB or YCbCr 4:4:4, 4:2:2 uncompressed and DSC 4:2:2 Simple) pixel rate is the same as + * containter rate. + * + * - In 4:2:0 (DSC or uncompressed) there are two pixels per container, hence the target container rate has to be + * halved to maintain the correct pixel rate. + * + * - Unlike 4:2:2 uncompressed, DSC 4:2:2 Native also has two pixels per container (this happens when DSC is applied + * to it) and has to be treated the same as 4:2:0, i.e. target containter rate has to be halved in this case as well. + * + */ +bool optc1_is_two_pixels_per_containter(const struct dc_crtc_timing *timing) +{ + bool two_pix = timing->pixel_encoding == PIXEL_ENCODING_YCBCR420; + + two_pix = two_pix || (timing->flags.DSC && timing->pixel_encoding == PIXEL_ENCODING_YCBCR422 + && !timing->dsc_cfg.ycbcr422_simple); + return two_pix; +} + diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h new file mode 100644 index 000000000000..ab81594a7fad --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h @@ -0,0 +1,599 @@ +/* + * Copyright 2012-15 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DC_TIMING_GENERATOR_DCN10_H__ +#define __DC_TIMING_GENERATOR_DCN10_H__ + +#include "optc.h" + +#define DCN10TG_FROM_TG(tg)\ + container_of(tg, struct optc, base) + +#define TG_COMMON_REG_LIST_DCN(inst) \ + SRI(OTG_VSTARTUP_PARAM, OTG, inst),\ + SRI(OTG_VUPDATE_PARAM, OTG, inst),\ + SRI(OTG_VREADY_PARAM, OTG, inst),\ + SRI(OTG_BLANK_CONTROL, OTG, inst),\ + SRI(OTG_MASTER_UPDATE_LOCK, OTG, inst),\ + SRI(OTG_GLOBAL_CONTROL0, OTG, inst),\ + SRI(OTG_DOUBLE_BUFFER_CONTROL, OTG, inst),\ + SRI(OTG_H_TOTAL, OTG, inst),\ + SRI(OTG_H_BLANK_START_END, OTG, inst),\ + SRI(OTG_H_SYNC_A, OTG, inst),\ + SRI(OTG_H_SYNC_A_CNTL, OTG, inst),\ + SRI(OTG_H_TIMING_CNTL, OTG, inst),\ + SRI(OTG_V_TOTAL, OTG, inst),\ + SRI(OTG_V_BLANK_START_END, OTG, inst),\ + SRI(OTG_V_SYNC_A, OTG, inst),\ + SRI(OTG_V_SYNC_A_CNTL, OTG, inst),\ + SRI(OTG_INTERLACE_CONTROL, OTG, inst),\ + SRI(OTG_CONTROL, OTG, inst),\ + SRI(OTG_STEREO_CONTROL, OTG, inst),\ + SRI(OTG_3D_STRUCTURE_CONTROL, OTG, inst),\ + SRI(OTG_STEREO_STATUS, OTG, inst),\ + SRI(OTG_V_TOTAL_MAX, OTG, inst),\ + SRI(OTG_V_TOTAL_MID, OTG, inst),\ + SRI(OTG_V_TOTAL_MIN, OTG, inst),\ + SRI(OTG_V_TOTAL_CONTROL, OTG, inst),\ + SRI(OTG_TRIGA_CNTL, OTG, inst),\ + SRI(OTG_FORCE_COUNT_NOW_CNTL, OTG, inst),\ + SRI(OTG_STATIC_SCREEN_CONTROL, OTG, inst),\ + SRI(OTG_STATUS_FRAME_COUNT, OTG, inst),\ + SRI(OTG_STATUS, OTG, inst),\ + SRI(OTG_STATUS_POSITION, OTG, inst),\ + SRI(OTG_NOM_VERT_POSITION, OTG, inst),\ + SRI(OTG_BLACK_COLOR, OTG, inst),\ + SRI(OTG_CLOCK_CONTROL, OTG, inst),\ + SRI(OTG_VERTICAL_INTERRUPT0_CONTROL, OTG, inst),\ + SRI(OTG_VERTICAL_INTERRUPT0_POSITION, OTG, inst),\ + SRI(OTG_VERTICAL_INTERRUPT1_CONTROL, OTG, inst),\ + SRI(OTG_VERTICAL_INTERRUPT1_POSITION, OTG, inst),\ + SRI(OTG_VERTICAL_INTERRUPT2_CONTROL, OTG, inst),\ + SRI(OTG_VERTICAL_INTERRUPT2_POSITION, OTG, inst),\ + SRI(OPTC_INPUT_CLOCK_CONTROL, ODM, inst),\ + SRI(OPTC_DATA_SOURCE_SELECT, ODM, inst),\ + SRI(OPTC_INPUT_GLOBAL_CONTROL, ODM, inst),\ + SRI(CONTROL, VTG, inst),\ + SRI(OTG_VERT_SYNC_CONTROL, OTG, inst),\ + SRI(OTG_MASTER_UPDATE_MODE, OTG, inst),\ + SRI(OTG_GSL_CONTROL, OTG, inst),\ + SRI(OTG_CRC_CNTL, OTG, inst),\ + SRI(OTG_CRC0_DATA_RG, OTG, inst),\ + SRI(OTG_CRC0_DATA_B, OTG, inst),\ + SRI(OTG_CRC0_WINDOWA_X_CONTROL, OTG, inst),\ + SRI(OTG_CRC0_WINDOWA_Y_CONTROL, OTG, inst),\ + SRI(OTG_CRC0_WINDOWB_X_CONTROL, OTG, inst),\ + SRI(OTG_CRC0_WINDOWB_Y_CONTROL, OTG, inst),\ + SR(GSL_SOURCE_SELECT),\ + SRI(OTG_GLOBAL_CONTROL2, OTG, inst),\ + SRI(OTG_TRIGA_MANUAL_TRIG, OTG, inst) + +#define TG_COMMON_REG_LIST_DCN1_0(inst) \ + TG_COMMON_REG_LIST_DCN(inst),\ + SRI(OTG_TEST_PATTERN_PARAMETERS, OTG, inst),\ + SRI(OTG_TEST_PATTERN_CONTROL, OTG, inst),\ + SRI(OTG_TEST_PATTERN_COLOR, OTG, inst),\ + SRI(OTG_MANUAL_FLOW_CONTROL, OTG, inst) + + +struct dcn_optc_registers { + uint32_t OTG_GLOBAL_CONTROL1; + uint32_t OTG_GLOBAL_CONTROL2; + uint32_t OTG_VERT_SYNC_CONTROL; + uint32_t OTG_MASTER_UPDATE_MODE; + uint32_t OTG_GSL_CONTROL; + uint32_t OTG_VSTARTUP_PARAM; + uint32_t OTG_VUPDATE_PARAM; + uint32_t OTG_VREADY_PARAM; + uint32_t OTG_BLANK_CONTROL; + uint32_t OTG_MASTER_UPDATE_LOCK; + uint32_t OTG_GLOBAL_CONTROL0; + uint32_t OTG_DOUBLE_BUFFER_CONTROL; + uint32_t OTG_H_TOTAL; + uint32_t OTG_H_BLANK_START_END; + uint32_t OTG_H_SYNC_A; + uint32_t OTG_H_SYNC_A_CNTL; + uint32_t OTG_H_TIMING_CNTL; + uint32_t OTG_V_TOTAL; + uint32_t OTG_V_BLANK_START_END; + uint32_t OTG_V_SYNC_A; + uint32_t OTG_V_SYNC_A_CNTL; + uint32_t OTG_INTERLACE_CONTROL; + uint32_t OTG_CONTROL; + uint32_t OTG_STEREO_CONTROL; + uint32_t OTG_3D_STRUCTURE_CONTROL; + uint32_t OTG_STEREO_STATUS; + uint32_t OTG_V_TOTAL_MAX; + uint32_t OTG_V_TOTAL_MID; + uint32_t OTG_V_TOTAL_MIN; + uint32_t OTG_V_TOTAL_CONTROL; + uint32_t OTG_TRIGA_CNTL; + uint32_t OTG_TRIGA_MANUAL_TRIG; + uint32_t OTG_MANUAL_FLOW_CONTROL; + uint32_t OTG_FORCE_COUNT_NOW_CNTL; + uint32_t OTG_STATIC_SCREEN_CONTROL; + uint32_t OTG_STATUS_FRAME_COUNT; + uint32_t OTG_STATUS; + uint32_t OTG_STATUS_POSITION; + uint32_t OTG_NOM_VERT_POSITION; + uint32_t OTG_BLACK_COLOR; + uint32_t OTG_TEST_PATTERN_PARAMETERS; + uint32_t OTG_TEST_PATTERN_CONTROL; + uint32_t OTG_TEST_PATTERN_COLOR; + uint32_t OTG_CLOCK_CONTROL; + uint32_t OTG_VERTICAL_INTERRUPT0_CONTROL; + uint32_t OTG_VERTICAL_INTERRUPT0_POSITION; + uint32_t OTG_VERTICAL_INTERRUPT1_CONTROL; + uint32_t OTG_VERTICAL_INTERRUPT1_POSITION; + uint32_t OTG_VERTICAL_INTERRUPT2_CONTROL; + uint32_t OTG_VERTICAL_INTERRUPT2_POSITION; + uint32_t OPTC_INPUT_CLOCK_CONTROL; + uint32_t OPTC_DATA_SOURCE_SELECT; + uint32_t OPTC_MEMORY_CONFIG; + uint32_t OPTC_INPUT_GLOBAL_CONTROL; + uint32_t CONTROL; + uint32_t OTG_GSL_WINDOW_X; + uint32_t OTG_GSL_WINDOW_Y; + uint32_t OTG_VUPDATE_KEEPOUT; + uint32_t OTG_CRC_CNTL; + uint32_t OTG_CRC_CNTL2; + uint32_t OTG_CRC0_DATA_RG; + uint32_t OTG_CRC0_DATA_B; + uint32_t OTG_CRC1_DATA_B; + uint32_t OTG_CRC2_DATA_B; + uint32_t OTG_CRC3_DATA_B; + uint32_t OTG_CRC1_DATA_RG; + uint32_t OTG_CRC2_DATA_RG; + uint32_t OTG_CRC3_DATA_RG; + uint32_t OTG_CRC0_WINDOWA_X_CONTROL; + uint32_t OTG_CRC0_WINDOWA_Y_CONTROL; + uint32_t OTG_CRC0_WINDOWB_X_CONTROL; + uint32_t OTG_CRC0_WINDOWB_Y_CONTROL; + uint32_t OTG_CRC1_WINDOWA_X_CONTROL; + uint32_t OTG_CRC1_WINDOWA_Y_CONTROL; + uint32_t OTG_CRC1_WINDOWB_X_CONTROL; + uint32_t OTG_CRC1_WINDOWB_Y_CONTROL; + uint32_t GSL_SOURCE_SELECT; + uint32_t DWB_SOURCE_SELECT; + uint32_t OTG_DSC_START_POSITION; + uint32_t OPTC_DATA_FORMAT_CONTROL; + uint32_t OPTC_BYTES_PER_PIXEL; + uint32_t OPTC_WIDTH_CONTROL; + uint32_t OTG_DRR_CONTROL; + uint32_t OTG_BLANK_DATA_COLOR; + uint32_t OTG_BLANK_DATA_COLOR_EXT; + uint32_t OTG_DRR_TRIGGER_WINDOW; + uint32_t OTG_M_CONST_DTO0; + uint32_t OTG_M_CONST_DTO1; + uint32_t OTG_DRR_V_TOTAL_CHANGE; + uint32_t OTG_GLOBAL_CONTROL4; + uint32_t OTG_CRC0_WINDOWA_X_CONTROL_READBACK; + uint32_t OTG_CRC0_WINDOWA_Y_CONTROL_READBACK; + uint32_t OTG_CRC0_WINDOWB_X_CONTROL_READBACK; + uint32_t OTG_CRC0_WINDOWB_Y_CONTROL_READBACK; + uint32_t OTG_CRC1_WINDOWA_X_CONTROL_READBACK; + uint32_t OTG_CRC1_WINDOWA_Y_CONTROL_READBACK; + uint32_t OTG_CRC1_WINDOWB_X_CONTROL_READBACK; + uint32_t OTG_CRC1_WINDOWB_Y_CONTROL_READBACK; + uint32_t OPTC_CLOCK_CONTROL; +}; + +#define TG_COMMON_MASK_SH_LIST_DCN(mask_sh)\ + SF(OTG0_OTG_VSTARTUP_PARAM, VSTARTUP_START, mask_sh),\ + SF(OTG0_OTG_VUPDATE_PARAM, VUPDATE_OFFSET, mask_sh),\ + SF(OTG0_OTG_VUPDATE_PARAM, VUPDATE_WIDTH, mask_sh),\ + SF(OTG0_OTG_VREADY_PARAM, VREADY_OFFSET, mask_sh),\ + SF(OTG0_OTG_BLANK_CONTROL, OTG_BLANK_DATA_EN, mask_sh),\ + SF(OTG0_OTG_BLANK_CONTROL, OTG_BLANK_DE_MODE, mask_sh),\ + SF(OTG0_OTG_BLANK_CONTROL, OTG_CURRENT_BLANK_STATE, mask_sh),\ + SF(OTG0_OTG_MASTER_UPDATE_LOCK, OTG_MASTER_UPDATE_LOCK, mask_sh),\ + SF(OTG0_OTG_MASTER_UPDATE_LOCK, UPDATE_LOCK_STATUS, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL0, OTG_MASTER_UPDATE_LOCK_SEL, mask_sh),\ + SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_UPDATE_PENDING, mask_sh),\ + SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_BLANK_DATA_DOUBLE_BUFFER_EN, mask_sh),\ + SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_RANGE_TIMING_DBUF_UPDATE_MODE, mask_sh),\ + SF(OTG0_OTG_VUPDATE_KEEPOUT, OTG_MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_EN, mask_sh), \ + SF(OTG0_OTG_VUPDATE_KEEPOUT, MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_START_OFFSET, mask_sh), \ + SF(OTG0_OTG_VUPDATE_KEEPOUT, MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_END_OFFSET, mask_sh), \ + SF(OTG0_OTG_H_TOTAL, OTG_H_TOTAL, mask_sh),\ + SF(OTG0_OTG_H_BLANK_START_END, OTG_H_BLANK_START, mask_sh),\ + SF(OTG0_OTG_H_BLANK_START_END, OTG_H_BLANK_END, mask_sh),\ + SF(OTG0_OTG_H_SYNC_A, OTG_H_SYNC_A_START, mask_sh),\ + SF(OTG0_OTG_H_SYNC_A, OTG_H_SYNC_A_END, mask_sh),\ + SF(OTG0_OTG_H_SYNC_A_CNTL, OTG_H_SYNC_A_POL, mask_sh),\ + SF(OTG0_OTG_H_TIMING_CNTL, OTG_H_TIMING_DIV_BY2, mask_sh),\ + SF(OTG0_OTG_V_TOTAL, OTG_V_TOTAL, mask_sh),\ + SF(OTG0_OTG_V_BLANK_START_END, OTG_V_BLANK_START, mask_sh),\ + SF(OTG0_OTG_V_BLANK_START_END, OTG_V_BLANK_END, mask_sh),\ + SF(OTG0_OTG_V_SYNC_A, OTG_V_SYNC_A_START, mask_sh),\ + SF(OTG0_OTG_V_SYNC_A, OTG_V_SYNC_A_END, mask_sh),\ + SF(OTG0_OTG_V_SYNC_A_CNTL, OTG_V_SYNC_A_POL, mask_sh),\ + SF(OTG0_OTG_INTERLACE_CONTROL, OTG_INTERLACE_ENABLE, mask_sh),\ + SF(OTG0_OTG_CONTROL, OTG_MASTER_EN, mask_sh),\ + SF(OTG0_OTG_CONTROL, OTG_START_POINT_CNTL, mask_sh),\ + SF(OTG0_OTG_CONTROL, OTG_DISABLE_POINT_CNTL, mask_sh),\ + SF(OTG0_OTG_CONTROL, OTG_FIELD_NUMBER_CNTL, mask_sh),\ + SF(OTG0_OTG_CONTROL, OTG_CURRENT_MASTER_EN_STATE, mask_sh),\ + SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_EN, mask_sh),\ + SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_SYNC_OUTPUT_LINE_NUM, mask_sh),\ + SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_SYNC_OUTPUT_POLARITY, mask_sh),\ + SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_EYE_FLAG_POLARITY, mask_sh),\ + SF(OTG0_OTG_STEREO_CONTROL, OTG_DISABLE_STEREOSYNC_OUTPUT_FOR_DP, mask_sh),\ + SF(OTG0_OTG_STEREO_CONTROL, OTG_DISABLE_STEREOSYNC_OUTPUT_FOR_DP, mask_sh),\ + SF(OTG0_OTG_STEREO_STATUS, OTG_STEREO_CURRENT_EYE, mask_sh),\ + SF(OTG0_OTG_3D_STRUCTURE_CONTROL, OTG_3D_STRUCTURE_EN, mask_sh),\ + SF(OTG0_OTG_3D_STRUCTURE_CONTROL, OTG_3D_STRUCTURE_V_UPDATE_MODE, mask_sh),\ + SF(OTG0_OTG_3D_STRUCTURE_CONTROL, OTG_3D_STRUCTURE_STEREO_SEL_OVR, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_MAX, OTG_V_TOTAL_MAX, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_MID, OTG_V_TOTAL_MID, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_MIN, OTG_V_TOTAL_MIN, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_V_TOTAL_MIN_SEL, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_V_TOTAL_MAX_SEL, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_FORCE_LOCK_ON_EVENT, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_SET_V_TOTAL_MIN_MASK_EN, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_SET_V_TOTAL_MIN_MASK, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_VTOTAL_MID_REPLACING_MAX_EN, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_VTOTAL_MID_FRAME_NUM, mask_sh),\ + SF(OTG0_OTG_FORCE_COUNT_NOW_CNTL, OTG_FORCE_COUNT_NOW_CLEAR, mask_sh),\ + SF(OTG0_OTG_FORCE_COUNT_NOW_CNTL, OTG_FORCE_COUNT_NOW_MODE, mask_sh),\ + SF(OTG0_OTG_FORCE_COUNT_NOW_CNTL, OTG_FORCE_COUNT_NOW_OCCURRED, mask_sh),\ + SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_SOURCE_SELECT, mask_sh),\ + SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_SOURCE_PIPE_SELECT, mask_sh),\ + SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_RISING_EDGE_DETECT_CNTL, mask_sh),\ + SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_FALLING_EDGE_DETECT_CNTL, mask_sh),\ + SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_POLARITY_SELECT, mask_sh),\ + SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_FREQUENCY_SELECT, mask_sh),\ + SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_DELAY, mask_sh),\ + SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_CLEAR, mask_sh),\ + SF(OTG0_OTG_TRIGA_MANUAL_TRIG, OTG_TRIGA_MANUAL_TRIG, mask_sh),\ + SF(OTG0_OTG_STATIC_SCREEN_CONTROL, OTG_STATIC_SCREEN_EVENT_MASK, mask_sh),\ + SF(OTG0_OTG_STATIC_SCREEN_CONTROL, OTG_STATIC_SCREEN_FRAME_COUNT, mask_sh),\ + SF(OTG0_OTG_STATUS_FRAME_COUNT, OTG_FRAME_COUNT, mask_sh),\ + SF(OTG0_OTG_STATUS, OTG_V_BLANK, mask_sh),\ + SF(OTG0_OTG_STATUS, OTG_V_ACTIVE_DISP, mask_sh),\ + SF(OTG0_OTG_STATUS_POSITION, OTG_HORZ_COUNT, mask_sh),\ + SF(OTG0_OTG_STATUS_POSITION, OTG_VERT_COUNT, mask_sh),\ + SF(OTG0_OTG_NOM_VERT_POSITION, OTG_VERT_COUNT_NOM, mask_sh),\ + SF(OTG0_OTG_BLACK_COLOR, OTG_BLACK_COLOR_B_CB, mask_sh),\ + SF(OTG0_OTG_BLACK_COLOR, OTG_BLACK_COLOR_G_Y, mask_sh),\ + SF(OTG0_OTG_BLACK_COLOR, OTG_BLACK_COLOR_R_CR, mask_sh),\ + SF(OTG0_OTG_CLOCK_CONTROL, OTG_BUSY, mask_sh),\ + SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_EN, mask_sh),\ + SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_ON, mask_sh),\ + SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_GATE_DIS, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT0_CONTROL, OTG_VERTICAL_INTERRUPT0_INT_ENABLE, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT0_POSITION, OTG_VERTICAL_INTERRUPT0_LINE_START, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT0_POSITION, OTG_VERTICAL_INTERRUPT0_LINE_END, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT1_CONTROL, OTG_VERTICAL_INTERRUPT1_INT_ENABLE, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT1_POSITION, OTG_VERTICAL_INTERRUPT1_LINE_START, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT2_CONTROL, OTG_VERTICAL_INTERRUPT2_INT_ENABLE, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT2_POSITION, OTG_VERTICAL_INTERRUPT2_LINE_START, mask_sh),\ + SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_EN, mask_sh),\ + SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_ON, mask_sh),\ + SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_GATE_DIS, mask_sh),\ + SF(ODM0_OPTC_INPUT_GLOBAL_CONTROL, OPTC_UNDERFLOW_OCCURRED_STATUS, mask_sh),\ + SF(ODM0_OPTC_INPUT_GLOBAL_CONTROL, OPTC_UNDERFLOW_CLEAR, mask_sh),\ + SF(VTG0_CONTROL, VTG0_ENABLE, mask_sh),\ + SF(VTG0_CONTROL, VTG0_FP2, mask_sh),\ + SF(VTG0_CONTROL, VTG0_VCOUNT_INIT, mask_sh),\ + SF(OTG0_OTG_VERT_SYNC_CONTROL, OTG_FORCE_VSYNC_NEXT_LINE_OCCURRED, mask_sh),\ + SF(OTG0_OTG_VERT_SYNC_CONTROL, OTG_FORCE_VSYNC_NEXT_LINE_CLEAR, mask_sh),\ + SF(OTG0_OTG_VERT_SYNC_CONTROL, OTG_AUTO_FORCE_VSYNC_MODE, mask_sh),\ + SF(OTG0_OTG_MASTER_UPDATE_MODE, MASTER_UPDATE_INTERLACED_MODE, mask_sh),\ + SF(OTG0_OTG_GSL_CONTROL, OTG_GSL0_EN, mask_sh),\ + SF(OTG0_OTG_GSL_CONTROL, OTG_GSL1_EN, mask_sh),\ + SF(OTG0_OTG_GSL_CONTROL, OTG_GSL2_EN, mask_sh),\ + SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_MASTER_EN, mask_sh),\ + SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_FORCE_DELAY, mask_sh),\ + SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_CHECK_ALL_FIELDS, mask_sh),\ + SF(OTG0_OTG_CRC_CNTL, OTG_CRC_CONT_EN, mask_sh),\ + SF(OTG0_OTG_CRC_CNTL, OTG_CRC0_SELECT, mask_sh),\ + SF(OTG0_OTG_CRC_CNTL, OTG_CRC_EN, mask_sh),\ + SF(OTG0_OTG_CRC0_DATA_RG, CRC0_R_CR, mask_sh),\ + SF(OTG0_OTG_CRC0_DATA_RG, CRC0_G_Y, mask_sh),\ + SF(OTG0_OTG_CRC0_DATA_B, CRC0_B_CB, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWA_X_CONTROL, OTG_CRC0_WINDOWA_X_START, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWA_X_CONTROL, OTG_CRC0_WINDOWA_X_END, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWA_Y_CONTROL, OTG_CRC0_WINDOWA_Y_START, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWA_Y_CONTROL, OTG_CRC0_WINDOWA_Y_END, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWB_X_CONTROL, OTG_CRC0_WINDOWB_X_START, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWB_X_CONTROL, OTG_CRC0_WINDOWB_X_END, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWB_Y_CONTROL, OTG_CRC0_WINDOWB_Y_START, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWB_Y_CONTROL, OTG_CRC0_WINDOWB_Y_END, mask_sh),\ + SF(GSL_SOURCE_SELECT, GSL0_READY_SOURCE_SEL, mask_sh),\ + SF(GSL_SOURCE_SELECT, GSL1_READY_SOURCE_SEL, mask_sh),\ + SF(GSL_SOURCE_SELECT, GSL2_READY_SOURCE_SEL, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL2, MANUAL_FLOW_CONTROL_SEL, mask_sh) + + + +#define TG_COMMON_MASK_SH_LIST_DCN1_0(mask_sh)\ + TG_COMMON_MASK_SH_LIST_DCN(mask_sh),\ + SF(OTG0_OTG_TEST_PATTERN_PARAMETERS, OTG_TEST_PATTERN_INC0, mask_sh),\ + SF(OTG0_OTG_TEST_PATTERN_PARAMETERS, OTG_TEST_PATTERN_INC1, mask_sh),\ + SF(OTG0_OTG_TEST_PATTERN_PARAMETERS, OTG_TEST_PATTERN_VRES, mask_sh),\ + SF(OTG0_OTG_TEST_PATTERN_PARAMETERS, OTG_TEST_PATTERN_HRES, mask_sh),\ + SF(OTG0_OTG_TEST_PATTERN_PARAMETERS, OTG_TEST_PATTERN_RAMP0_OFFSET, mask_sh),\ + SF(OTG0_OTG_TEST_PATTERN_CONTROL, OTG_TEST_PATTERN_EN, mask_sh),\ + SF(OTG0_OTG_TEST_PATTERN_CONTROL, OTG_TEST_PATTERN_MODE, mask_sh),\ + SF(OTG0_OTG_TEST_PATTERN_CONTROL, OTG_TEST_PATTERN_DYNAMIC_RANGE, mask_sh),\ + SF(OTG0_OTG_TEST_PATTERN_CONTROL, OTG_TEST_PATTERN_COLOR_FORMAT, mask_sh),\ + SF(OTG0_OTG_TEST_PATTERN_COLOR, OTG_TEST_PATTERN_MASK, mask_sh),\ + SF(OTG0_OTG_TEST_PATTERN_COLOR, OTG_TEST_PATTERN_DATA, mask_sh),\ + SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SRC_SEL, mask_sh),\ + SF(OTG0_OTG_MANUAL_FLOW_CONTROL, MANUAL_FLOW_CONTROL, mask_sh),\ + +#define TG_REG_FIELD_LIST_DCN1_0(type) \ + type VSTARTUP_START;\ + type VUPDATE_OFFSET;\ + type VUPDATE_WIDTH;\ + type VREADY_OFFSET;\ + type OTG_BLANK_DATA_EN;\ + type OTG_BLANK_DE_MODE;\ + type OTG_CURRENT_BLANK_STATE;\ + type OTG_MASTER_UPDATE_LOCK;\ + type UPDATE_LOCK_STATUS;\ + type OTG_UPDATE_PENDING;\ + type OTG_MASTER_UPDATE_LOCK_SEL;\ + type OTG_BLANK_DATA_DOUBLE_BUFFER_EN;\ + type OTG_H_TOTAL;\ + type OTG_H_BLANK_START;\ + type OTG_H_BLANK_END;\ + type OTG_H_SYNC_A_START;\ + type OTG_H_SYNC_A_END;\ + type OTG_H_SYNC_A_POL;\ + type OTG_H_TIMING_DIV_BY2;\ + type OTG_V_TOTAL;\ + type OTG_V_BLANK_START;\ + type OTG_V_BLANK_END;\ + type OTG_V_SYNC_A_START;\ + type OTG_V_SYNC_A_END;\ + type OTG_V_SYNC_A_POL;\ + type OTG_INTERLACE_ENABLE;\ + type OTG_MASTER_EN;\ + type OTG_START_POINT_CNTL;\ + type OTG_DISABLE_POINT_CNTL;\ + type OTG_FIELD_NUMBER_CNTL;\ + type OTG_CURRENT_MASTER_EN_STATE;\ + type OTG_STEREO_EN;\ + type OTG_STEREO_SYNC_OUTPUT_LINE_NUM;\ + type OTG_STEREO_SYNC_OUTPUT_POLARITY;\ + type OTG_STEREO_EYE_FLAG_POLARITY;\ + type OTG_STEREO_CURRENT_EYE;\ + type OTG_DISABLE_STEREOSYNC_OUTPUT_FOR_DP;\ + type OTG_3D_STRUCTURE_EN;\ + type OTG_3D_STRUCTURE_V_UPDATE_MODE;\ + type OTG_3D_STRUCTURE_STEREO_SEL_OVR;\ + type OTG_V_TOTAL_MAX;\ + type OTG_V_TOTAL_MID;\ + type OTG_V_TOTAL_MIN;\ + type OTG_V_TOTAL_MIN_SEL;\ + type OTG_V_TOTAL_MAX_SEL;\ + type OTG_VTOTAL_MID_REPLACING_MAX_EN;\ + type OTG_VTOTAL_MID_FRAME_NUM;\ + type OTG_FORCE_LOCK_ON_EVENT;\ + type OTG_SET_V_TOTAL_MIN_MASK_EN;\ + type OTG_SET_V_TOTAL_MIN_MASK;\ + type OTG_FORCE_COUNT_NOW_CLEAR;\ + type OTG_FORCE_COUNT_NOW_MODE;\ + type OTG_FORCE_COUNT_NOW_OCCURRED;\ + type OTG_TRIGA_SOURCE_SELECT;\ + type OTG_TRIGA_SOURCE_PIPE_SELECT;\ + type OTG_TRIGA_RISING_EDGE_DETECT_CNTL;\ + type OTG_TRIGA_FALLING_EDGE_DETECT_CNTL;\ + type OTG_TRIGA_POLARITY_SELECT;\ + type OTG_TRIGA_FREQUENCY_SELECT;\ + type OTG_TRIGA_DELAY;\ + type OTG_TRIGA_CLEAR;\ + type OTG_TRIGA_MANUAL_TRIG;\ + type OTG_STATIC_SCREEN_EVENT_MASK;\ + type OTG_STATIC_SCREEN_FRAME_COUNT;\ + type OTG_FRAME_COUNT;\ + type OTG_V_BLANK;\ + type OTG_V_ACTIVE_DISP;\ + type OTG_HORZ_COUNT;\ + type OTG_VERT_COUNT;\ + type OTG_VERT_COUNT_NOM;\ + type OTG_BLACK_COLOR_B_CB;\ + type OTG_BLACK_COLOR_G_Y;\ + type OTG_BLACK_COLOR_R_CR;\ + type OTG_BLANK_DATA_COLOR_BLUE_CB;\ + type OTG_BLANK_DATA_COLOR_GREEN_Y;\ + type OTG_BLANK_DATA_COLOR_RED_CR;\ + type OTG_BLANK_DATA_COLOR_BLUE_CB_EXT;\ + type OTG_BLANK_DATA_COLOR_GREEN_Y_EXT;\ + type OTG_BLANK_DATA_COLOR_RED_CR_EXT;\ + type OTG_VTOTAL_MID_REPLACING_MIN_EN;\ + type OTG_TEST_PATTERN_INC0;\ + type OTG_TEST_PATTERN_INC1;\ + type OTG_TEST_PATTERN_VRES;\ + type OTG_TEST_PATTERN_HRES;\ + type OTG_TEST_PATTERN_RAMP0_OFFSET;\ + type OTG_TEST_PATTERN_EN;\ + type OTG_TEST_PATTERN_MODE;\ + type OTG_TEST_PATTERN_DYNAMIC_RANGE;\ + type OTG_TEST_PATTERN_COLOR_FORMAT;\ + type OTG_TEST_PATTERN_MASK;\ + type OTG_TEST_PATTERN_DATA;\ + type OTG_BUSY;\ + type OTG_CLOCK_EN;\ + type OTG_CLOCK_ON;\ + type OTG_CLOCK_GATE_DIS;\ + type OTG_VERTICAL_INTERRUPT0_INT_ENABLE;\ + type OTG_VERTICAL_INTERRUPT0_LINE_START;\ + type OTG_VERTICAL_INTERRUPT0_LINE_END;\ + type OTG_VERTICAL_INTERRUPT1_INT_ENABLE;\ + type OTG_VERTICAL_INTERRUPT1_LINE_START;\ + type OTG_VERTICAL_INTERRUPT2_INT_ENABLE;\ + type OTG_VERTICAL_INTERRUPT2_LINE_START;\ + type OPTC_INPUT_CLK_EN;\ + type OPTC_INPUT_CLK_ON;\ + type OPTC_INPUT_CLK_GATE_DIS;\ + type OPTC_UNDERFLOW_OCCURRED_STATUS;\ + type OPTC_UNDERFLOW_CLEAR;\ + type OPTC_SRC_SEL;\ + type VTG0_ENABLE;\ + type VTG0_FP2;\ + type VTG0_VCOUNT_INIT;\ + type OTG_FORCE_VSYNC_NEXT_LINE_OCCURRED;\ + type OTG_FORCE_VSYNC_NEXT_LINE_CLEAR;\ + type OTG_AUTO_FORCE_VSYNC_MODE;\ + type MASTER_UPDATE_INTERLACED_MODE;\ + type OTG_GSL0_EN;\ + type OTG_GSL1_EN;\ + type OTG_GSL2_EN;\ + type OTG_GSL_MASTER_EN;\ + type OTG_GSL_FORCE_DELAY;\ + type OTG_GSL_CHECK_ALL_FIELDS;\ + type OTG_GSL_WINDOW_START_X;\ + type OTG_GSL_WINDOW_END_X;\ + type OTG_GSL_WINDOW_START_Y;\ + type OTG_GSL_WINDOW_END_Y;\ + type OTG_RANGE_TIMING_DBUF_UPDATE_MODE;\ + type OTG_GSL_MASTER_MODE;\ + type OTG_MASTER_UPDATE_LOCK_GSL_EN;\ + type MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_START_OFFSET;\ + type MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_END_OFFSET;\ + type OTG_MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_EN;\ + type OTG_CRC_CONT_EN;\ + type OTG_CRC0_SELECT;\ + type OTG_CRC_EN;\ + type CRC0_R_CR;\ + type CRC0_G_Y;\ + type CRC0_B_CB;\ + type CRC1_R_CR;\ + type CRC1_G_Y;\ + type CRC1_B_CB;\ + type CRC2_R_CR;\ + type CRC2_G_Y;\ + type CRC2_B_CB;\ + type CRC3_R_CR;\ + type CRC3_G_Y;\ + type CRC3_B_CB;\ + type OTG_CRC0_WINDOWA_X_START;\ + type OTG_CRC0_WINDOWA_X_END;\ + type OTG_CRC0_WINDOWA_Y_START;\ + type OTG_CRC0_WINDOWA_Y_END;\ + type OTG_CRC0_WINDOWB_X_START;\ + type OTG_CRC0_WINDOWB_X_END;\ + type OTG_CRC0_WINDOWB_Y_START;\ + type OTG_CRC0_WINDOWB_Y_END;\ + type OTG_CRC_WINDOW_DB_EN;\ + type OTG_CRC1_WINDOWA_X_START;\ + type OTG_CRC1_WINDOWA_X_END;\ + type OTG_CRC1_WINDOWA_Y_START;\ + type OTG_CRC1_WINDOWA_Y_END;\ + type OTG_CRC1_WINDOWB_X_START;\ + type OTG_CRC1_WINDOWB_X_END;\ + type OTG_CRC1_WINDOWB_Y_START;\ + type OTG_CRC1_WINDOWB_Y_END;\ + type GSL0_READY_SOURCE_SEL;\ + type GSL1_READY_SOURCE_SEL;\ + type GSL2_READY_SOURCE_SEL;\ + type MANUAL_FLOW_CONTROL;\ + type MANUAL_FLOW_CONTROL_SEL; + +#define TG_REG_FIELD_LIST(type) \ + TG_REG_FIELD_LIST_DCN1_0(type)\ + type OTG_V_SYNC_MODE;\ + type OTG_DRR_TRIGGER_WINDOW_START_X;\ + type OTG_DRR_TRIGGER_WINDOW_END_X;\ + type OTG_DRR_V_TOTAL_CHANGE_LIMIT;\ + type OTG_OUT_MUX;\ + type OTG_M_CONST_DTO_PHASE;\ + type OTG_M_CONST_DTO_MODULO;\ + type MASTER_UPDATE_LOCK_DB_X;\ + type MASTER_UPDATE_LOCK_DB_Y;\ + type MASTER_UPDATE_LOCK_DB_EN;\ + type GLOBAL_UPDATE_LOCK_EN;\ + type DIG_UPDATE_LOCATION;\ + type OTG_DSC_START_POSITION_X;\ + type OTG_DSC_START_POSITION_LINE_NUM;\ + type OPTC_NUM_OF_INPUT_SEGMENT;\ + type OPTC_SEG0_SRC_SEL;\ + type OPTC_SEG1_SRC_SEL;\ + type OPTC_SEG2_SRC_SEL;\ + type OPTC_SEG3_SRC_SEL;\ + type OPTC_MEM_SEL;\ + type OPTC_DATA_FORMAT;\ + type OPTC_DSC_MODE;\ + type OPTC_DSC_BYTES_PER_PIXEL;\ + type OPTC_DSC_SLICE_WIDTH;\ + type OPTC_SEGMENT_WIDTH;\ + type OPTC_DWB0_SOURCE_SELECT;\ + type OPTC_DWB1_SOURCE_SELECT;\ + type MASTER_UPDATE_LOCK_DB_START_X;\ + type MASTER_UPDATE_LOCK_DB_END_X;\ + type MASTER_UPDATE_LOCK_DB_START_Y;\ + type MASTER_UPDATE_LOCK_DB_END_Y;\ + type DIG_UPDATE_POSITION_X;\ + type DIG_UPDATE_POSITION_Y;\ + type OTG_H_TIMING_DIV_MODE;\ + type OTG_DRR_TIMING_DBUF_UPDATE_MODE;\ + type OTG_CRC_DSC_MODE;\ + type OTG_CRC_DATA_STREAM_COMBINE_MODE;\ + type OTG_CRC_DATA_STREAM_SPLIT_MODE;\ + type OTG_CRC_DATA_FORMAT;\ + type OTG_V_TOTAL_LAST_USED_BY_DRR;\ + type OTG_DRR_TIMING_DBUF_UPDATE_PENDING; + +#define TG_REG_FIELD_LIST_DCN3_2(type) \ + type OTG_H_TIMING_DIV_MODE_MANUAL; + + +#define TG_REG_FIELD_LIST_DCN3_5(type) \ + type OTG_CRC0_WINDOWA_X_START_READBACK;\ + type OTG_CRC0_WINDOWA_X_END_READBACK;\ + type OTG_CRC0_WINDOWA_Y_START_READBACK;\ + type OTG_CRC0_WINDOWA_Y_END_READBACK;\ + type OTG_CRC0_WINDOWB_X_START_READBACK;\ + type OTG_CRC0_WINDOWB_X_END_READBACK;\ + type OTG_CRC0_WINDOWB_Y_START_READBACK;\ + type OTG_CRC0_WINDOWB_Y_END_READBACK; \ + type OTG_CRC1_WINDOWA_X_START_READBACK;\ + type OTG_CRC1_WINDOWA_X_END_READBACK;\ + type OTG_CRC1_WINDOWA_Y_START_READBACK;\ + type OTG_CRC1_WINDOWA_Y_END_READBACK;\ + type OTG_CRC1_WINDOWB_X_START_READBACK;\ + type OTG_CRC1_WINDOWB_X_END_READBACK;\ + type OTG_CRC1_WINDOWB_Y_START_READBACK;\ + type OTG_CRC1_WINDOWB_Y_END_READBACK;\ + type OPTC_FGCG_REP_DIS; + +struct dcn_optc_shift { + TG_REG_FIELD_LIST(uint8_t) + TG_REG_FIELD_LIST_DCN3_2(uint8_t) + TG_REG_FIELD_LIST_DCN3_5(uint8_t) +}; + +struct dcn_optc_mask { + TG_REG_FIELD_LIST(uint32_t) + TG_REG_FIELD_LIST_DCN3_2(uint32_t) + TG_REG_FIELD_LIST_DCN3_5(uint32_t) +}; + +void dcn10_timing_generator_init(struct optc *optc); + +#endif /* __DC_TIMING_GENERATOR_DCN10_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.c new file mode 100644 index 000000000000..58bdbd859bf9 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.c @@ -0,0 +1,587 @@ +/* + * Copyright 2012-15 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "reg_helper.h" +#include "dcn20_optc.h" +#include "dc.h" + +#define REG(reg)\ + optc1->tg_regs->reg + +#define CTX \ + optc1->base.ctx + +#undef FN +#define FN(reg_name, field_name) \ + optc1->tg_shift->field_name, optc1->tg_mask->field_name + +/** + * optc2_enable_crtc() - Enable CRTC - call ASIC Control Object to enable Timing generator. + * + * @optc: timing_generator instance. + * + * Return: If CRTC is enabled, return true. + * + */ +bool optc2_enable_crtc(struct timing_generator *optc) +{ + /* TODO FPGA wait for answer + * OTG_MASTER_UPDATE_MODE != CRTC_MASTER_UPDATE_MODE + * OTG_MASTER_UPDATE_LOCK != CRTC_MASTER_UPDATE_LOCK + */ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + /* opp instance for OTG. For DCN1.0, ODM is remoed. + * OPP and OPTC should 1:1 mapping + */ + REG_UPDATE(OPTC_DATA_SOURCE_SELECT, + OPTC_SEG0_SRC_SEL, optc->inst); + + /* VTG enable first is for HW workaround */ + REG_UPDATE(CONTROL, + VTG0_ENABLE, 1); + + REG_SEQ_START(); + + /* Enable CRTC */ + REG_UPDATE_2(OTG_CONTROL, + OTG_DISABLE_POINT_CNTL, 3, + OTG_MASTER_EN, 1); + + REG_SEQ_SUBMIT(); + REG_SEQ_WAIT_DONE(); + + return true; +} + +/** + * optc2_set_gsl() - Assign OTG to GSL groups, + * set one of the OTGs to be master & rest are slaves + * + * @optc: timing_generator instance. + * @params: pointer to gsl_params + */ +void optc2_set_gsl(struct timing_generator *optc, + const struct gsl_params *params) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + +/* + * There are (MAX_OPTC+1)/2 gsl groups available for use. + * In each group (assign an OTG to a group by setting OTG_GSLX_EN = 1, + * set one of the OTGs to be the master (OTG_GSL_MASTER_EN = 1) and the rest are slaves. + */ + REG_UPDATE_5(OTG_GSL_CONTROL, + OTG_GSL0_EN, params->gsl0_en, + OTG_GSL1_EN, params->gsl1_en, + OTG_GSL2_EN, params->gsl2_en, + OTG_GSL_MASTER_EN, params->gsl_master_en, + OTG_GSL_MASTER_MODE, params->gsl_master_mode); +} + + +void optc2_set_gsl_source_select( + struct timing_generator *optc, + int group_idx, + uint32_t gsl_ready_signal) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + switch (group_idx) { + case 1: + REG_UPDATE(GSL_SOURCE_SELECT, GSL0_READY_SOURCE_SEL, gsl_ready_signal); + break; + case 2: + REG_UPDATE(GSL_SOURCE_SELECT, GSL1_READY_SOURCE_SEL, gsl_ready_signal); + break; + case 3: + REG_UPDATE(GSL_SOURCE_SELECT, GSL2_READY_SOURCE_SEL, gsl_ready_signal); + break; + default: + break; + } +} + +/* Set DSC-related configuration. + * dsc_mode: 0 disables DSC, other values enable DSC in specified format + * sc_bytes_per_pixel: Bytes per pixel in u3.28 format + * dsc_slice_width: Slice width in pixels + */ +void optc2_set_dsc_config(struct timing_generator *optc, + enum optc_dsc_mode dsc_mode, + uint32_t dsc_bytes_per_pixel, + uint32_t dsc_slice_width) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_UPDATE(OPTC_DATA_FORMAT_CONTROL, + OPTC_DSC_MODE, dsc_mode); + + REG_SET(OPTC_BYTES_PER_PIXEL, 0, + OPTC_DSC_BYTES_PER_PIXEL, dsc_bytes_per_pixel); + + REG_UPDATE(OPTC_WIDTH_CONTROL, + OPTC_DSC_SLICE_WIDTH, dsc_slice_width); +} + +/* Get DSC-related configuration. + * dsc_mode: 0 disables DSC, other values enable DSC in specified format + */ +void optc2_get_dsc_status(struct timing_generator *optc, + uint32_t *dsc_mode) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_GET(OPTC_DATA_FORMAT_CONTROL, + OPTC_DSC_MODE, dsc_mode); +} + + +/*TEMP: Need to figure out inheritance model here.*/ +bool optc2_is_two_pixels_per_containter(const struct dc_crtc_timing *timing) +{ + return optc1_is_two_pixels_per_containter(timing); +} + +void optc2_set_odm_bypass(struct timing_generator *optc, + const struct dc_crtc_timing *dc_crtc_timing) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + uint32_t h_div_2 = 0; + + REG_SET_3(OPTC_DATA_SOURCE_SELECT, 0, + OPTC_NUM_OF_INPUT_SEGMENT, 0, + OPTC_SEG0_SRC_SEL, optc->inst, + OPTC_SEG1_SRC_SEL, 0xf); + REG_WRITE(OTG_H_TIMING_CNTL, 0); + + h_div_2 = optc2_is_two_pixels_per_containter(dc_crtc_timing); + REG_UPDATE(OTG_H_TIMING_CNTL, + OTG_H_TIMING_DIV_BY2, h_div_2); + REG_SET(OPTC_MEMORY_CONFIG, 0, + OPTC_MEM_SEL, 0); + optc1->opp_count = 1; +} + +void optc2_set_odm_combine(struct timing_generator *optc, int *opp_id, int opp_cnt, + struct dc_crtc_timing *timing) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + int mpcc_hactive = (timing->h_addressable + timing->h_border_left + timing->h_border_right) + / opp_cnt; + uint32_t memory_mask; + + ASSERT(opp_cnt == 2); + + /* TODO: In pseudocode but does not affect maximus, delete comment if we dont need on asic + * REG_SET(OTG_GLOBAL_CONTROL2, 0, GLOBAL_UPDATE_LOCK_EN, 1); + * Program OTG register MASTER_UPDATE_LOCK_DB_X/Y to the position before DP frame start + * REG_SET_2(OTG_GLOBAL_CONTROL1, 0, + * MASTER_UPDATE_LOCK_DB_X, 160, + * MASTER_UPDATE_LOCK_DB_Y, 240); + */ + + /* 2 pieces of memory required for up to 5120 displays, 4 for up to 8192, + * however, for ODM combine we can simplify by always using 4. + * To make sure there's no overlap, each instance "reserves" 2 memories and + * they are uniquely combined here. + */ + memory_mask = 0x3 << (opp_id[0] * 2) | 0x3 << (opp_id[1] * 2); + + if (REG(OPTC_MEMORY_CONFIG)) + REG_SET(OPTC_MEMORY_CONFIG, 0, + OPTC_MEM_SEL, memory_mask); + + REG_SET_3(OPTC_DATA_SOURCE_SELECT, 0, + OPTC_NUM_OF_INPUT_SEGMENT, 1, + OPTC_SEG0_SRC_SEL, opp_id[0], + OPTC_SEG1_SRC_SEL, opp_id[1]); + + REG_UPDATE(OPTC_WIDTH_CONTROL, + OPTC_SEGMENT_WIDTH, mpcc_hactive); + + REG_SET(OTG_H_TIMING_CNTL, 0, OTG_H_TIMING_DIV_BY2, 1); + optc1->opp_count = opp_cnt; +} + +void optc2_get_optc_source(struct timing_generator *optc, + uint32_t *num_of_src_opp, + uint32_t *src_opp_id_0, + uint32_t *src_opp_id_1) +{ + uint32_t num_of_input_segments; + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_GET_3(OPTC_DATA_SOURCE_SELECT, + OPTC_NUM_OF_INPUT_SEGMENT, &num_of_input_segments, + OPTC_SEG0_SRC_SEL, src_opp_id_0, + OPTC_SEG1_SRC_SEL, src_opp_id_1); + + if (num_of_input_segments == 1) + *num_of_src_opp = 2; + else + *num_of_src_opp = 1; + + /* Work around VBIOS not updating OPTC_NUM_OF_INPUT_SEGMENT */ + if (*src_opp_id_1 == 0xf) + *num_of_src_opp = 1; +} + +static void optc2_set_dwb_source(struct timing_generator *optc, + uint32_t dwb_pipe_inst) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + if (dwb_pipe_inst == 0) + REG_UPDATE(DWB_SOURCE_SELECT, + OPTC_DWB0_SOURCE_SELECT, optc->inst); + else if (dwb_pipe_inst == 1) + REG_UPDATE(DWB_SOURCE_SELECT, + OPTC_DWB1_SOURCE_SELECT, optc->inst); +} + +static void optc2_align_vblanks( + struct timing_generator *optc_master, + struct timing_generator *optc_slave, + uint32_t master_pixel_clock_100Hz, + uint32_t slave_pixel_clock_100Hz, + uint8_t master_clock_divider, + uint8_t slave_clock_divider) +{ + /* accessing slave OTG registers */ + struct optc *optc1 = DCN10TG_FROM_TG(optc_slave); + + uint32_t master_v_active = 0; + uint32_t master_h_total = 0; + uint32_t slave_h_total = 0; + uint64_t L, XY; + uint32_t X, Y, p = 10000; + uint32_t master_update_lock; + + /* disable slave OTG */ + REG_UPDATE(OTG_CONTROL, OTG_MASTER_EN, 0); + /* wait until disabled */ + REG_WAIT(OTG_CONTROL, + OTG_CURRENT_MASTER_EN_STATE, + 0, 10, 5000); + + REG_GET(OTG_H_TOTAL, OTG_H_TOTAL, &slave_h_total); + + /* assign slave OTG to be controlled by master update lock */ + REG_SET(OTG_GLOBAL_CONTROL0, 0, + OTG_MASTER_UPDATE_LOCK_SEL, optc_master->inst); + + /* accessing master OTG registers */ + optc1 = DCN10TG_FROM_TG(optc_master); + + /* saving update lock state, not sure if it's needed */ + REG_GET(OTG_MASTER_UPDATE_LOCK, + OTG_MASTER_UPDATE_LOCK, &master_update_lock); + /* unlocking master OTG */ + REG_SET(OTG_MASTER_UPDATE_LOCK, 0, + OTG_MASTER_UPDATE_LOCK, 0); + + REG_GET(OTG_V_BLANK_START_END, + OTG_V_BLANK_START, &master_v_active); + REG_GET(OTG_H_TOTAL, OTG_H_TOTAL, &master_h_total); + + /* calculate when to enable slave OTG */ + L = (uint64_t)p * slave_h_total * master_pixel_clock_100Hz; + L = div_u64(L, master_h_total); + L = div_u64(L, slave_pixel_clock_100Hz); + XY = div_u64(L, p); + Y = master_v_active - XY - 1; + X = div_u64(((XY + 1) * p - L) * master_h_total, p * master_clock_divider); + + /* + * set master OTG to unlock when V/H + * counters reach calculated values + */ + REG_UPDATE(OTG_GLOBAL_CONTROL1, + MASTER_UPDATE_LOCK_DB_EN, 1); + REG_UPDATE_2(OTG_GLOBAL_CONTROL1, + MASTER_UPDATE_LOCK_DB_X, + X, + MASTER_UPDATE_LOCK_DB_Y, + Y); + + /* lock master OTG */ + REG_SET(OTG_MASTER_UPDATE_LOCK, 0, + OTG_MASTER_UPDATE_LOCK, 1); + REG_WAIT(OTG_MASTER_UPDATE_LOCK, + UPDATE_LOCK_STATUS, 1, 1, 10); + + /* accessing slave OTG registers */ + optc1 = DCN10TG_FROM_TG(optc_slave); + + /* + * enable slave OTG, the OTG is locked with + * master's update lock, so it will not run + */ + REG_UPDATE(OTG_CONTROL, + OTG_MASTER_EN, 1); + + /* accessing master OTG registers */ + optc1 = DCN10TG_FROM_TG(optc_master); + + /* + * unlock master OTG. When master H/V counters reach + * DB_XY point, slave OTG will start + */ + REG_SET(OTG_MASTER_UPDATE_LOCK, 0, + OTG_MASTER_UPDATE_LOCK, 0); + + /* accessing slave OTG registers */ + optc1 = DCN10TG_FROM_TG(optc_slave); + + /* wait for slave OTG to start running*/ + REG_WAIT(OTG_CONTROL, + OTG_CURRENT_MASTER_EN_STATE, + 1, 10, 5000); + + /* accessing master OTG registers */ + optc1 = DCN10TG_FROM_TG(optc_master); + + /* disable the XY point*/ + REG_UPDATE(OTG_GLOBAL_CONTROL1, + MASTER_UPDATE_LOCK_DB_EN, 0); + REG_UPDATE_2(OTG_GLOBAL_CONTROL1, + MASTER_UPDATE_LOCK_DB_X, + 0, + MASTER_UPDATE_LOCK_DB_Y, + 0); + + /*restore master update lock*/ + REG_SET(OTG_MASTER_UPDATE_LOCK, 0, + OTG_MASTER_UPDATE_LOCK, master_update_lock); + + /* accessing slave OTG registers */ + optc1 = DCN10TG_FROM_TG(optc_slave); + /* restore slave to be controlled by it's own */ + REG_SET(OTG_GLOBAL_CONTROL0, 0, + OTG_MASTER_UPDATE_LOCK_SEL, optc_slave->inst); + +} + +void optc2_triplebuffer_lock(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_SET(OTG_GLOBAL_CONTROL0, 0, + OTG_MASTER_UPDATE_LOCK_SEL, optc->inst); + + REG_SET(OTG_VUPDATE_KEEPOUT, 0, + OTG_MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_EN, 1); + + REG_SET(OTG_MASTER_UPDATE_LOCK, 0, + OTG_MASTER_UPDATE_LOCK, 1); + + REG_WAIT(OTG_MASTER_UPDATE_LOCK, + UPDATE_LOCK_STATUS, 1, + 1, 10); +} + +void optc2_triplebuffer_unlock(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_SET(OTG_MASTER_UPDATE_LOCK, 0, + OTG_MASTER_UPDATE_LOCK, 0); + + REG_SET(OTG_VUPDATE_KEEPOUT, 0, + OTG_MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_EN, 0); + +} + +void optc2_lock_doublebuffer_enable(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + uint32_t v_blank_start = 0; + uint32_t h_blank_start = 0; + + REG_UPDATE(OTG_GLOBAL_CONTROL1, MASTER_UPDATE_LOCK_DB_EN, 1); + + REG_UPDATE_2(OTG_GLOBAL_CONTROL2, GLOBAL_UPDATE_LOCK_EN, 1, + DIG_UPDATE_LOCATION, 20); + + REG_GET(OTG_V_BLANK_START_END, OTG_V_BLANK_START, &v_blank_start); + + REG_GET(OTG_H_BLANK_START_END, OTG_H_BLANK_START, &h_blank_start); + + REG_UPDATE_2(OTG_GLOBAL_CONTROL1, + MASTER_UPDATE_LOCK_DB_X, + (h_blank_start - 200 - 1) / optc1->opp_count, + MASTER_UPDATE_LOCK_DB_Y, + v_blank_start - 1); + + REG_SET_3(OTG_VUPDATE_KEEPOUT, 0, + MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_START_OFFSET, 0, + MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_END_OFFSET, 100, + OTG_MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_EN, 1); +} + +void optc2_lock_doublebuffer_disable(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_UPDATE_2(OTG_GLOBAL_CONTROL1, + MASTER_UPDATE_LOCK_DB_X, + 0, + MASTER_UPDATE_LOCK_DB_Y, + 0); + + REG_UPDATE_2(OTG_GLOBAL_CONTROL2, GLOBAL_UPDATE_LOCK_EN, 0, + DIG_UPDATE_LOCATION, 0); + + REG_UPDATE(OTG_GLOBAL_CONTROL1, MASTER_UPDATE_LOCK_DB_EN, 0); +} + +void optc2_setup_manual_trigger(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + /* Set the min/max selectors unconditionally so that + * DMCUB fw may change OTG timings when necessary + * TODO: Remove the w/a after fixing the issue in DMCUB firmware + */ + REG_UPDATE_4(OTG_V_TOTAL_CONTROL, + OTG_V_TOTAL_MIN_SEL, 1, + OTG_V_TOTAL_MAX_SEL, 1, + OTG_FORCE_LOCK_ON_EVENT, 0, + OTG_SET_V_TOTAL_MIN_MASK, (1 << 1)); /* TRIGA */ + + REG_SET_8(OTG_TRIGA_CNTL, 0, + OTG_TRIGA_SOURCE_SELECT, 21, + OTG_TRIGA_SOURCE_PIPE_SELECT, optc->inst, + OTG_TRIGA_RISING_EDGE_DETECT_CNTL, 1, + OTG_TRIGA_FALLING_EDGE_DETECT_CNTL, 0, + OTG_TRIGA_POLARITY_SELECT, 0, + OTG_TRIGA_FREQUENCY_SELECT, 0, + OTG_TRIGA_DELAY, 0, + OTG_TRIGA_CLEAR, 1); +} + +void optc2_program_manual_trigger(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_SET(OTG_TRIGA_MANUAL_TRIG, 0, + OTG_TRIGA_MANUAL_TRIG, 1); +} + +bool optc2_configure_crc(struct timing_generator *optc, + const struct crc_params *params) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_SET_2(OTG_CRC_CNTL2, 0, + OTG_CRC_DSC_MODE, params->dsc_mode, + OTG_CRC_DATA_STREAM_COMBINE_MODE, params->odm_mode); + + return optc1_configure_crc(optc, params); +} + + +void optc2_get_last_used_drr_vtotal(struct timing_generator *optc, uint32_t *refresh_rate) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_GET(OTG_DRR_CONTROL, OTG_V_TOTAL_LAST_USED_BY_DRR, refresh_rate); +} + +static struct timing_generator_funcs dcn20_tg_funcs = { + .validate_timing = optc1_validate_timing, + .program_timing = optc1_program_timing, + .setup_vertical_interrupt0 = optc1_setup_vertical_interrupt0, + .setup_vertical_interrupt1 = optc1_setup_vertical_interrupt1, + .setup_vertical_interrupt2 = optc1_setup_vertical_interrupt2, + .program_global_sync = optc1_program_global_sync, + .enable_crtc = optc2_enable_crtc, + .disable_crtc = optc1_disable_crtc, + /* used by enable_timing_synchronization. Not need for FPGA */ + .is_counter_moving = optc1_is_counter_moving, + .get_position = optc1_get_position, + .get_frame_count = optc1_get_vblank_counter, + .get_scanoutpos = optc1_get_crtc_scanoutpos, + .get_otg_active_size = optc1_get_otg_active_size, + .set_early_control = optc1_set_early_control, + /* used by enable_timing_synchronization. Not need for FPGA */ + .wait_for_state = optc1_wait_for_state, + .set_blank = optc1_set_blank, + .is_blanked = optc1_is_blanked, + .set_blank_color = optc1_program_blank_color, + .enable_reset_trigger = optc1_enable_reset_trigger, + .enable_crtc_reset = optc1_enable_crtc_reset, + .did_triggered_reset_occur = optc1_did_triggered_reset_occur, + .triplebuffer_lock = optc2_triplebuffer_lock, + .triplebuffer_unlock = optc2_triplebuffer_unlock, + .disable_reset_trigger = optc1_disable_reset_trigger, + .lock = optc1_lock, + .unlock = optc1_unlock, + .lock_doublebuffer_enable = optc2_lock_doublebuffer_enable, + .lock_doublebuffer_disable = optc2_lock_doublebuffer_disable, + .enable_optc_clock = optc1_enable_optc_clock, + .set_drr = optc1_set_drr, + .get_last_used_drr_vtotal = optc2_get_last_used_drr_vtotal, + .set_vtotal_min_max = optc1_set_vtotal_min_max, + .set_static_screen_control = optc1_set_static_screen_control, + .program_stereo = optc1_program_stereo, + .is_stereo_left_eye = optc1_is_stereo_left_eye, + .set_blank_data_double_buffer = optc1_set_blank_data_double_buffer, + .tg_init = optc1_tg_init, + .is_tg_enabled = optc1_is_tg_enabled, + .is_optc_underflow_occurred = optc1_is_optc_underflow_occurred, + .clear_optc_underflow = optc1_clear_optc_underflow, + .setup_global_swap_lock = NULL, + .get_crc = optc1_get_crc, + .configure_crc = optc2_configure_crc, + .set_dsc_config = optc2_set_dsc_config, + .get_dsc_status = optc2_get_dsc_status, + .set_dwb_source = optc2_set_dwb_source, + .set_odm_bypass = optc2_set_odm_bypass, + .set_odm_combine = optc2_set_odm_combine, + .get_optc_source = optc2_get_optc_source, + .set_gsl = optc2_set_gsl, + .set_gsl_source_select = optc2_set_gsl_source_select, + .set_vtg_params = optc1_set_vtg_params, + .program_manual_trigger = optc2_program_manual_trigger, + .setup_manual_trigger = optc2_setup_manual_trigger, + .get_hw_timing = optc1_get_hw_timing, + .align_vblanks = optc2_align_vblanks, +}; + +void dcn20_timing_generator_init(struct optc *optc1) +{ + optc1->base.funcs = &dcn20_tg_funcs; + + optc1->max_h_total = optc1->tg_mask->OTG_H_TOTAL + 1; + optc1->max_v_total = optc1->tg_mask->OTG_V_TOTAL + 1; + + optc1->min_h_blank = 32; + optc1->min_v_blank = 3; + optc1->min_v_blank_interlace = 5; + optc1->min_h_sync_width = 4;// Minimum HSYNC = 8 pixels asked By HW in the first place for no actual reason. Oculus Rift S will not light up with 8 as it's hsyncWidth is 6. Changing it to 4 to fix that issue. + optc1->min_v_sync_width = 1; +} diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.h new file mode 100644 index 000000000000..c2e03ced392e --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.h @@ -0,0 +1,124 @@ +/* + * Copyright 2012-15 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DC_OPTC_DCN20_H__ +#define __DC_OPTC_DCN20_H__ + +#include "dcn10/dcn10_optc.h" + +#define TG_COMMON_REG_LIST_DCN2_0(inst) \ + TG_COMMON_REG_LIST_DCN(inst),\ + SRI(OTG_GLOBAL_CONTROL1, OTG, inst),\ + SRI(OTG_GLOBAL_CONTROL2, OTG, inst),\ + SRI(OTG_GSL_WINDOW_X, OTG, inst),\ + SRI(OTG_GSL_WINDOW_Y, OTG, inst),\ + SRI(OTG_VUPDATE_KEEPOUT, OTG, inst),\ + SRI(OTG_DSC_START_POSITION, OTG, inst),\ + SRI(OTG_CRC_CNTL2, OTG, inst),\ + SRI(OPTC_DATA_FORMAT_CONTROL, ODM, inst),\ + SRI(OPTC_BYTES_PER_PIXEL, ODM, inst),\ + SRI(OPTC_WIDTH_CONTROL, ODM, inst),\ + SRI(OPTC_MEMORY_CONFIG, ODM, inst),\ + SR(DWB_SOURCE_SELECT),\ + SRI(OTG_MANUAL_FLOW_CONTROL, OTG, inst), \ + SRI(OTG_DRR_CONTROL, OTG, inst) + +#define TG_COMMON_MASK_SH_LIST_DCN2_0(mask_sh)\ + TG_COMMON_MASK_SH_LIST_DCN(mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL1, MASTER_UPDATE_LOCK_DB_X, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL1, MASTER_UPDATE_LOCK_DB_Y, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL1, MASTER_UPDATE_LOCK_DB_EN, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL2, GLOBAL_UPDATE_LOCK_EN, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL2, DIG_UPDATE_LOCATION, mask_sh),\ + SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_RANGE_TIMING_DBUF_UPDATE_MODE, mask_sh),\ + SF(OTG0_OTG_GSL_WINDOW_X, OTG_GSL_WINDOW_START_X, mask_sh),\ + SF(OTG0_OTG_GSL_WINDOW_X, OTG_GSL_WINDOW_END_X, mask_sh), \ + SF(OTG0_OTG_GSL_WINDOW_Y, OTG_GSL_WINDOW_START_Y, mask_sh),\ + SF(OTG0_OTG_GSL_WINDOW_Y, OTG_GSL_WINDOW_END_Y, mask_sh),\ + SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_MASTER_MODE, mask_sh), \ + SF(OTG0_OTG_GSL_CONTROL, OTG_MASTER_UPDATE_LOCK_GSL_EN, mask_sh), \ + SF(OTG0_OTG_DSC_START_POSITION, OTG_DSC_START_POSITION_X, mask_sh), \ + SF(OTG0_OTG_DSC_START_POSITION, OTG_DSC_START_POSITION_LINE_NUM, mask_sh),\ + SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DSC_MODE, mask_sh),\ + SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DATA_STREAM_COMBINE_MODE, mask_sh),\ + SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DATA_STREAM_SPLIT_MODE, mask_sh),\ + SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DATA_FORMAT, mask_sh),\ + SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG0_SRC_SEL, mask_sh),\ + SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG1_SRC_SEL, mask_sh),\ + SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_NUM_OF_INPUT_SEGMENT, mask_sh),\ + SF(ODM0_OPTC_MEMORY_CONFIG, OPTC_MEM_SEL, mask_sh),\ + SF(ODM0_OPTC_DATA_FORMAT_CONTROL, OPTC_DATA_FORMAT, mask_sh),\ + SF(ODM0_OPTC_DATA_FORMAT_CONTROL, OPTC_DSC_MODE, mask_sh),\ + SF(ODM0_OPTC_BYTES_PER_PIXEL, OPTC_DSC_BYTES_PER_PIXEL, mask_sh),\ + SF(ODM0_OPTC_WIDTH_CONTROL, OPTC_DSC_SLICE_WIDTH, mask_sh),\ + SF(ODM0_OPTC_WIDTH_CONTROL, OPTC_SEGMENT_WIDTH, mask_sh),\ + SF(DWB_SOURCE_SELECT, OPTC_DWB0_SOURCE_SELECT, mask_sh),\ + SF(DWB_SOURCE_SELECT, OPTC_DWB1_SOURCE_SELECT, mask_sh),\ + SF(OTG0_OTG_MANUAL_FLOW_CONTROL, MANUAL_FLOW_CONTROL, mask_sh), \ + SF(OTG0_OTG_DRR_CONTROL, OTG_V_TOTAL_LAST_USED_BY_DRR, mask_sh) + +void dcn20_timing_generator_init(struct optc *optc); + +void optc2_get_last_used_drr_vtotal(struct timing_generator *optc, + uint32_t *refresh_rate); + +bool optc2_enable_crtc(struct timing_generator *optc); + +void optc2_set_gsl(struct timing_generator *optc, + const struct gsl_params *params); + +void optc2_set_gsl_source_select(struct timing_generator *optc, + int group_idx, + uint32_t gsl_ready_signal); + +void optc2_set_dsc_config(struct timing_generator *optc, + enum optc_dsc_mode dsc_mode, + uint32_t dsc_bytes_per_pixel, + uint32_t dsc_slice_width); + +void optc2_get_dsc_status(struct timing_generator *optc, + uint32_t *dsc_mode); + +void optc2_set_odm_bypass(struct timing_generator *optc, + const struct dc_crtc_timing *dc_crtc_timing); + +void optc2_set_odm_combine(struct timing_generator *optc, int *opp_id, int opp_cnt, + struct dc_crtc_timing *timing); + +void optc2_get_optc_source(struct timing_generator *optc, + uint32_t *num_of_src_opp, + uint32_t *src_opp_id_0, + uint32_t *src_opp_id_1); + +void optc2_triplebuffer_lock(struct timing_generator *optc); +void optc2_triplebuffer_unlock(struct timing_generator *optc); +void optc2_lock_doublebuffer_disable(struct timing_generator *optc); +void optc2_lock_doublebuffer_enable(struct timing_generator *optc); +void optc2_setup_manual_trigger(struct timing_generator *optc); +void optc2_program_manual_trigger(struct timing_generator *optc); +bool optc2_is_two_pixels_per_containter(const struct dc_crtc_timing *timing); +bool optc2_configure_crc(struct timing_generator *optc, + const struct crc_params *params); +#endif /* __DC_OPTC_DCN20_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn201/dcn201_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn201/dcn201_optc.c new file mode 100644 index 000000000000..70fcbec03fb6 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn201/dcn201_optc.c @@ -0,0 +1,202 @@ +/* + * Copyright 2012-15 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "reg_helper.h" +#include "dcn201_optc.h" +#include "dcn10/dcn10_optc.h" +#include "dc.h" + +#define REG(reg)\ + optc1->tg_regs->reg + +#define CTX \ + optc1->base.ctx + +#undef FN +#define FN(reg_name, field_name) \ + optc1->tg_shift->field_name, optc1->tg_mask->field_name + +/*TEMP: Need to figure out inheritance model here.*/ +bool optc201_is_two_pixels_per_containter(const struct dc_crtc_timing *timing) +{ + return optc1_is_two_pixels_per_containter(timing); +} + +static void optc201_triplebuffer_lock(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_SET(OTG_GLOBAL_CONTROL0, 0, + OTG_MASTER_UPDATE_LOCK_SEL, optc->inst); + REG_SET(OTG_VUPDATE_KEEPOUT, 0, + OTG_MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_EN, 1); + REG_SET(OTG_MASTER_UPDATE_LOCK, 0, + OTG_MASTER_UPDATE_LOCK, 1); + + REG_WAIT(OTG_MASTER_UPDATE_LOCK, + UPDATE_LOCK_STATUS, 1, + 1, 10); +} + +static void optc201_triplebuffer_unlock(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_SET(OTG_MASTER_UPDATE_LOCK, 0, + OTG_MASTER_UPDATE_LOCK, 0); + REG_SET(OTG_VUPDATE_KEEPOUT, 0, + OTG_MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_EN, 0); + +} + +static bool optc201_validate_timing( + struct timing_generator *optc, + const struct dc_crtc_timing *timing) +{ + uint32_t v_blank; + uint32_t h_blank; + uint32_t min_v_blank; + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + ASSERT(timing != NULL); + + v_blank = (timing->v_total - timing->v_addressable - + timing->v_border_top - timing->v_border_bottom); + + h_blank = (timing->h_total - timing->h_addressable - + timing->h_border_right - + timing->h_border_left); + + if (timing->timing_3d_format != TIMING_3D_FORMAT_NONE && + timing->timing_3d_format != TIMING_3D_FORMAT_HW_FRAME_PACKING && + timing->timing_3d_format != TIMING_3D_FORMAT_TOP_AND_BOTTOM && + timing->timing_3d_format != TIMING_3D_FORMAT_SIDE_BY_SIDE && + timing->timing_3d_format != TIMING_3D_FORMAT_FRAME_ALTERNATE && + timing->timing_3d_format != TIMING_3D_FORMAT_INBAND_FA) + return false; + + /* Check maximum number of pixels supported by Timing Generator + * (Currently will never fail, in order to fail needs display which + * needs more than 8192 horizontal and + * more than 8192 vertical total pixels) + */ + if (timing->h_total > optc1->max_h_total || + timing->v_total > optc1->max_v_total) + return false; + + if (h_blank < optc1->min_h_blank) + return false; + + if (timing->h_sync_width < optc1->min_h_sync_width || + timing->v_sync_width < optc1->min_v_sync_width) + return false; + + min_v_blank = timing->flags.INTERLACE?optc1->min_v_blank_interlace:optc1->min_v_blank; + + if (v_blank < min_v_blank) + return false; + + return true; + +} + +static void optc201_get_optc_source(struct timing_generator *optc, + uint32_t *num_of_src_opp, + uint32_t *src_opp_id_0, + uint32_t *src_opp_id_1) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_GET(OPTC_DATA_SOURCE_SELECT, + OPTC_SEG0_SRC_SEL, src_opp_id_0); + + *num_of_src_opp = 1; +} + +static struct timing_generator_funcs dcn201_tg_funcs = { + .validate_timing = optc201_validate_timing, + .program_timing = optc1_program_timing, + .setup_vertical_interrupt0 = optc1_setup_vertical_interrupt0, + .setup_vertical_interrupt1 = optc1_setup_vertical_interrupt1, + .setup_vertical_interrupt2 = optc1_setup_vertical_interrupt2, + .program_global_sync = optc1_program_global_sync, + .enable_crtc = optc2_enable_crtc, + .disable_crtc = optc1_disable_crtc, + /* used by enable_timing_synchronization. Not need for FPGA */ + .is_counter_moving = optc1_is_counter_moving, + .get_position = optc1_get_position, + .get_frame_count = optc1_get_vblank_counter, + .get_scanoutpos = optc1_get_crtc_scanoutpos, + .get_otg_active_size = optc1_get_otg_active_size, + .set_early_control = optc1_set_early_control, + /* used by enable_timing_synchronization. Not need for FPGA */ + .wait_for_state = optc1_wait_for_state, + .set_blank = optc1_set_blank, + .is_blanked = optc1_is_blanked, + .set_blank_color = optc1_program_blank_color, + .did_triggered_reset_occur = optc1_did_triggered_reset_occur, + .enable_reset_trigger = optc1_enable_reset_trigger, + .enable_crtc_reset = optc1_enable_crtc_reset, + .disable_reset_trigger = optc1_disable_reset_trigger, + .triplebuffer_lock = optc201_triplebuffer_lock, + .triplebuffer_unlock = optc201_triplebuffer_unlock, + .lock = optc1_lock, + .unlock = optc1_unlock, + .enable_optc_clock = optc1_enable_optc_clock, + .set_drr = optc1_set_drr, + .get_last_used_drr_vtotal = NULL, + .set_vtotal_min_max = optc1_set_vtotal_min_max, + .set_static_screen_control = optc1_set_static_screen_control, + .program_stereo = optc1_program_stereo, + .is_stereo_left_eye = optc1_is_stereo_left_eye, + .set_blank_data_double_buffer = optc1_set_blank_data_double_buffer, + .tg_init = optc1_tg_init, + .is_tg_enabled = optc1_is_tg_enabled, + .is_optc_underflow_occurred = optc1_is_optc_underflow_occurred, + .clear_optc_underflow = optc1_clear_optc_underflow, + .get_crc = optc1_get_crc, + .configure_crc = optc2_configure_crc, + .set_dsc_config = optc2_set_dsc_config, + .set_dwb_source = NULL, + .get_optc_source = optc201_get_optc_source, + .set_vtg_params = optc1_set_vtg_params, + .program_manual_trigger = optc2_program_manual_trigger, + .setup_manual_trigger = optc2_setup_manual_trigger, + .get_hw_timing = optc1_get_hw_timing, +}; + +void dcn201_timing_generator_init(struct optc *optc1) +{ + optc1->base.funcs = &dcn201_tg_funcs; + + optc1->max_h_total = optc1->tg_mask->OTG_H_TOTAL + 1; + optc1->max_v_total = optc1->tg_mask->OTG_V_TOTAL + 1; + + optc1->min_h_blank = 32; + optc1->min_v_blank = 3; + optc1->min_v_blank_interlace = 5; + optc1->min_h_sync_width = 8; + optc1->min_v_sync_width = 1; +} diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn201/dcn201_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn201/dcn201_optc.h new file mode 100644 index 000000000000..e9545b73513a --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn201/dcn201_optc.h @@ -0,0 +1,74 @@ +/* + * Copyright 2012-15 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DC_OPTC_DCN201_H__ +#define __DC_OPTC_DCN201_H__ + +#include "dcn20/dcn20_optc.h" + +#define TG_COMMON_REG_LIST_DCN201(inst) \ + TG_COMMON_REG_LIST_DCN(inst),\ + SRI(OTG_GLOBAL_CONTROL1, OTG, inst),\ + SRI(OTG_GLOBAL_CONTROL2, OTG, inst),\ + SRI(OTG_GSL_WINDOW_X, OTG, inst),\ + SRI(OTG_GSL_WINDOW_Y, OTG, inst),\ + SRI(OTG_VUPDATE_KEEPOUT, OTG, inst),\ + SRI(OTG_DSC_START_POSITION, OTG, inst),\ + SRI(OPTC_DATA_FORMAT_CONTROL, ODM, inst),\ + SRI(OPTC_BYTES_PER_PIXEL, ODM, inst),\ + SRI(OPTC_WIDTH_CONTROL, ODM, inst),\ + SR(DWB_SOURCE_SELECT) + +#define TG_COMMON_MASK_SH_LIST_DCN201(mask_sh)\ + TG_COMMON_MASK_SH_LIST_DCN(mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL1, MASTER_UPDATE_LOCK_DB_X, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL1, MASTER_UPDATE_LOCK_DB_Y, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL1, MASTER_UPDATE_LOCK_DB_EN, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL2, GLOBAL_UPDATE_LOCK_EN, mask_sh),\ + SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_RANGE_TIMING_DBUF_UPDATE_MODE, mask_sh),\ + SF(OTG0_OTG_GSL_WINDOW_X, OTG_GSL_WINDOW_START_X, mask_sh),\ + SF(OTG0_OTG_GSL_WINDOW_X, OTG_GSL_WINDOW_END_X, mask_sh), \ + SF(OTG0_OTG_GSL_WINDOW_Y, OTG_GSL_WINDOW_START_Y, mask_sh),\ + SF(OTG0_OTG_GSL_WINDOW_Y, OTG_GSL_WINDOW_END_Y, mask_sh),\ + SF(OTG0_OTG_VUPDATE_KEEPOUT, OTG_MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_EN, mask_sh), \ + SF(OTG0_OTG_VUPDATE_KEEPOUT, MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_START_OFFSET, mask_sh), \ + SF(OTG0_OTG_VUPDATE_KEEPOUT, MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_END_OFFSET, mask_sh), \ + SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_MASTER_MODE, mask_sh), \ + SF(OTG0_OTG_GSL_CONTROL, OTG_MASTER_UPDATE_LOCK_GSL_EN, mask_sh), \ + SF(OTG0_OTG_DSC_START_POSITION, OTG_DSC_START_POSITION_X, mask_sh), \ + SF(OTG0_OTG_DSC_START_POSITION, OTG_DSC_START_POSITION_LINE_NUM, mask_sh),\ + SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG0_SRC_SEL, mask_sh),\ + SF(ODM0_OPTC_DATA_FORMAT_CONTROL, OPTC_DSC_MODE, mask_sh),\ + SF(ODM0_OPTC_BYTES_PER_PIXEL, OPTC_DSC_BYTES_PER_PIXEL, mask_sh),\ + SF(ODM0_OPTC_WIDTH_CONTROL, OPTC_DSC_SLICE_WIDTH, mask_sh),\ + SF(DWB_SOURCE_SELECT, OPTC_DWB0_SOURCE_SELECT, mask_sh),\ + SF(DWB_SOURCE_SELECT, OPTC_DWB1_SOURCE_SELECT, mask_sh),\ + SF(DWB_SOURCE_SELECT, OPTC_DWB1_SOURCE_SELECT, mask_sh) + +void dcn201_timing_generator_init(struct optc *optc); + +bool optc201_is_two_pixels_per_containter(const struct dc_crtc_timing *timing); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn30/dcn30_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn30/dcn30_optc.c new file mode 100644 index 000000000000..b97bdb868a0e --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn30/dcn30_optc.c @@ -0,0 +1,393 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "reg_helper.h" +#include "dcn30_optc.h" +#include "dc.h" +#include "dcn_calc_math.h" +#include "dc_dmub_srv.h" + +#include "dml/dcn30/dcn30_fpu.h" +#include "dc_trace.h" + +#define REG(reg)\ + optc1->tg_regs->reg + +#define CTX \ + optc1->base.ctx + +#undef FN +#define FN(reg_name, field_name) \ + optc1->tg_shift->field_name, optc1->tg_mask->field_name + +void optc3_triplebuffer_lock(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_UPDATE(OTG_GLOBAL_CONTROL2, + OTG_MASTER_UPDATE_LOCK_SEL, optc->inst); + + REG_SET(OTG_VUPDATE_KEEPOUT, 0, + OTG_MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_EN, 1); + + REG_SET(OTG_MASTER_UPDATE_LOCK, 0, + OTG_MASTER_UPDATE_LOCK, 1); + + REG_WAIT(OTG_MASTER_UPDATE_LOCK, + UPDATE_LOCK_STATUS, 1, + 1, 10); + + TRACE_OPTC_LOCK_UNLOCK_STATE(optc1, optc->inst, true); +} + +void optc3_lock_doublebuffer_enable(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + uint32_t v_blank_start = 0; + uint32_t v_blank_end = 0; + uint32_t h_blank_start = 0; + uint32_t h_blank_end = 0; + + REG_GET_2(OTG_V_BLANK_START_END, + OTG_V_BLANK_START, &v_blank_start, + OTG_V_BLANK_END, &v_blank_end); + REG_GET_2(OTG_H_BLANK_START_END, + OTG_H_BLANK_START, &h_blank_start, + OTG_H_BLANK_END, &h_blank_end); + + REG_UPDATE_2(OTG_GLOBAL_CONTROL1, + MASTER_UPDATE_LOCK_DB_START_Y, v_blank_start - 1, + MASTER_UPDATE_LOCK_DB_END_Y, v_blank_start); + REG_UPDATE_2(OTG_GLOBAL_CONTROL4, + DIG_UPDATE_POSITION_X, h_blank_start - 180 - 1, + DIG_UPDATE_POSITION_Y, v_blank_start - 1); + // there is a DIG_UPDATE_VCOUNT_MODE and it is 0. + + REG_UPDATE_3(OTG_GLOBAL_CONTROL0, + MASTER_UPDATE_LOCK_DB_START_X, h_blank_start - 200 - 1, + MASTER_UPDATE_LOCK_DB_END_X, h_blank_start - 180, + MASTER_UPDATE_LOCK_DB_EN, 1); + REG_UPDATE(OTG_GLOBAL_CONTROL2, GLOBAL_UPDATE_LOCK_EN, 1); + + REG_SET_3(OTG_VUPDATE_KEEPOUT, 0, + MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_START_OFFSET, 0, + MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_END_OFFSET, 100, + OTG_MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_EN, 1); + + TRACE_OPTC_LOCK_UNLOCK_STATE(optc1, optc->inst, true); +} + +void optc3_lock_doublebuffer_disable(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_UPDATE_2(OTG_GLOBAL_CONTROL0, + MASTER_UPDATE_LOCK_DB_START_X, 0, + MASTER_UPDATE_LOCK_DB_END_X, 0); + REG_UPDATE_2(OTG_GLOBAL_CONTROL1, + MASTER_UPDATE_LOCK_DB_START_Y, 0, + MASTER_UPDATE_LOCK_DB_END_Y, 0); + + REG_UPDATE(OTG_GLOBAL_CONTROL2, GLOBAL_UPDATE_LOCK_EN, 0); + REG_UPDATE(OTG_GLOBAL_CONTROL0, MASTER_UPDATE_LOCK_DB_EN, 0); + + TRACE_OPTC_LOCK_UNLOCK_STATE(optc1, optc->inst, true); +} + +void optc3_lock(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_UPDATE(OTG_GLOBAL_CONTROL2, + OTG_MASTER_UPDATE_LOCK_SEL, optc->inst); + REG_SET(OTG_MASTER_UPDATE_LOCK, 0, + OTG_MASTER_UPDATE_LOCK, 1); + + REG_WAIT(OTG_MASTER_UPDATE_LOCK, + UPDATE_LOCK_STATUS, 1, + 1, 10); + + TRACE_OPTC_LOCK_UNLOCK_STATE(optc1, optc->inst, true); +} + +void optc3_set_out_mux(struct timing_generator *optc, enum otg_out_mux_dest dest) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_UPDATE(OTG_CONTROL, OTG_OUT_MUX, dest); +} + +void optc3_program_blank_color(struct timing_generator *optc, + const struct tg_color *blank_color) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_SET_3(OTG_BLANK_DATA_COLOR, 0, + OTG_BLANK_DATA_COLOR_BLUE_CB, blank_color->color_b_cb, + OTG_BLANK_DATA_COLOR_GREEN_Y, blank_color->color_g_y, + OTG_BLANK_DATA_COLOR_RED_CR, blank_color->color_r_cr); + + REG_SET_3(OTG_BLANK_DATA_COLOR_EXT, 0, + OTG_BLANK_DATA_COLOR_BLUE_CB_EXT, blank_color->color_b_cb >> 10, + OTG_BLANK_DATA_COLOR_GREEN_Y_EXT, blank_color->color_g_y >> 10, + OTG_BLANK_DATA_COLOR_RED_CR_EXT, blank_color->color_r_cr >> 10); +} + +void optc3_set_drr_trigger_window(struct timing_generator *optc, + uint32_t window_start, uint32_t window_end) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_SET_2(OTG_DRR_TRIGGER_WINDOW, 0, + OTG_DRR_TRIGGER_WINDOW_START_X, window_start, + OTG_DRR_TRIGGER_WINDOW_END_X, window_end); +} + +void optc3_set_vtotal_change_limit(struct timing_generator *optc, + uint32_t limit) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + + REG_SET(OTG_DRR_V_TOTAL_CHANGE, 0, + OTG_DRR_V_TOTAL_CHANGE_LIMIT, limit); +} + + +/* Set DSC-related configuration. + * dsc_mode: 0 disables DSC, other values enable DSC in specified format + * sc_bytes_per_pixel: Bytes per pixel in u3.28 format + * dsc_slice_width: Slice width in pixels + */ +void optc3_set_dsc_config(struct timing_generator *optc, + enum optc_dsc_mode dsc_mode, + uint32_t dsc_bytes_per_pixel, + uint32_t dsc_slice_width) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + optc2_set_dsc_config(optc, dsc_mode, dsc_bytes_per_pixel, dsc_slice_width); + REG_UPDATE(OTG_V_SYNC_A_CNTL, OTG_V_SYNC_MODE, 0); +} + +void optc3_set_odm_bypass(struct timing_generator *optc, + const struct dc_crtc_timing *dc_crtc_timing) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + enum h_timing_div_mode h_div = H_TIMING_NO_DIV; + + REG_SET_5(OPTC_DATA_SOURCE_SELECT, 0, + OPTC_NUM_OF_INPUT_SEGMENT, 0, + OPTC_SEG0_SRC_SEL, optc->inst, + OPTC_SEG1_SRC_SEL, 0xf, + OPTC_SEG2_SRC_SEL, 0xf, + OPTC_SEG3_SRC_SEL, 0xf + ); + + h_div = optc1_is_two_pixels_per_containter(dc_crtc_timing); + REG_UPDATE(OTG_H_TIMING_CNTL, + OTG_H_TIMING_DIV_MODE, h_div); + + REG_SET(OPTC_MEMORY_CONFIG, 0, + OPTC_MEM_SEL, 0); + optc1->opp_count = 1; +} + +void optc3_set_odm_combine(struct timing_generator *optc, int *opp_id, int opp_cnt, + struct dc_crtc_timing *timing) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + int mpcc_hactive = (timing->h_addressable + timing->h_border_left + timing->h_border_right) + / opp_cnt; + uint32_t memory_mask = 0; + + /* TODO: In pseudocode but does not affect maximus, delete comment if we dont need on asic + * REG_SET(OTG_GLOBAL_CONTROL2, 0, GLOBAL_UPDATE_LOCK_EN, 1); + * Program OTG register MASTER_UPDATE_LOCK_DB_X/Y to the position before DP frame start + * REG_SET_2(OTG_GLOBAL_CONTROL1, 0, + * MASTER_UPDATE_LOCK_DB_X, 160, + * MASTER_UPDATE_LOCK_DB_Y, 240); + */ + + ASSERT(opp_cnt == 2 || opp_cnt == 4); + + /* 2 pieces of memory required for up to 5120 displays, 4 for up to 8192, + * however, for ODM combine we can simplify by always using 4. + */ + if (opp_cnt == 2) { + /* To make sure there's no memory overlap, each instance "reserves" 2 + * memories and they are uniquely combined here. + */ + memory_mask = 0x3 << (opp_id[0] * 2) | 0x3 << (opp_id[1] * 2); + } else if (opp_cnt == 4) { + /* To make sure there's no memory overlap, each instance "reserves" 1 + * memory and they are uniquely combined here. + */ + memory_mask = 0x1 << (opp_id[0] * 2) | 0x1 << (opp_id[1] * 2) | 0x1 << (opp_id[2] * 2) | 0x1 << (opp_id[3] * 2); + } + + if (REG(OPTC_MEMORY_CONFIG)) + REG_SET(OPTC_MEMORY_CONFIG, 0, + OPTC_MEM_SEL, memory_mask); + + if (opp_cnt == 2) { + REG_SET_3(OPTC_DATA_SOURCE_SELECT, 0, + OPTC_NUM_OF_INPUT_SEGMENT, 1, + OPTC_SEG0_SRC_SEL, opp_id[0], + OPTC_SEG1_SRC_SEL, opp_id[1]); + } else if (opp_cnt == 4) { + REG_SET_5(OPTC_DATA_SOURCE_SELECT, 0, + OPTC_NUM_OF_INPUT_SEGMENT, 3, + OPTC_SEG0_SRC_SEL, opp_id[0], + OPTC_SEG1_SRC_SEL, opp_id[1], + OPTC_SEG2_SRC_SEL, opp_id[2], + OPTC_SEG3_SRC_SEL, opp_id[3]); + } + + REG_UPDATE(OPTC_WIDTH_CONTROL, + OPTC_SEGMENT_WIDTH, mpcc_hactive); + + REG_SET(OTG_H_TIMING_CNTL, 0, OTG_H_TIMING_DIV_MODE, opp_cnt - 1); + optc1->opp_count = opp_cnt; +} + +/** + * optc3_set_timing_double_buffer() - DRR double buffering control + * + * Sets double buffer point for V_TOTAL, H_TOTAL, VTOTAL_MIN, + * VTOTAL_MAX, VTOTAL_MIN_SEL and VTOTAL_MAX_SEL registers. + * + * @optc: timing_generator instance. + * @enable: Enable DRR double buffering control if true, disable otherwise. + * + * Options: any time, start of frame, dp start of frame (range timing) + */ +static void optc3_set_timing_double_buffer(struct timing_generator *optc, bool enable) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + uint32_t mode = enable ? 2 : 0; + + REG_UPDATE(OTG_DOUBLE_BUFFER_CONTROL, + OTG_DRR_TIMING_DBUF_UPDATE_MODE, mode); +} + +void optc3_wait_drr_doublebuffer_pending_clear(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_WAIT(OTG_DOUBLE_BUFFER_CONTROL, OTG_DRR_TIMING_DBUF_UPDATE_PENDING, 0, 2, 100000); /* 1 vupdate at 5hz */ + +} + +void optc3_set_vtotal_min_max(struct timing_generator *optc, int vtotal_min, int vtotal_max) +{ + struct dc *dc = optc->ctx->dc; + + if (dc->caps.dmub_caps.mclk_sw && !dc->debug.disable_fams) + dc_dmub_srv_drr_update_cmd(dc, optc->inst, vtotal_min, vtotal_max); + else + optc1_set_vtotal_min_max(optc, vtotal_min, vtotal_max); +} + +void optc3_tg_init(struct timing_generator *optc) +{ + optc3_set_timing_double_buffer(optc, true); + optc1_clear_optc_underflow(optc); +} + +static struct timing_generator_funcs dcn30_tg_funcs = { + .validate_timing = optc1_validate_timing, + .program_timing = optc1_program_timing, + .setup_vertical_interrupt0 = optc1_setup_vertical_interrupt0, + .setup_vertical_interrupt1 = optc1_setup_vertical_interrupt1, + .setup_vertical_interrupt2 = optc1_setup_vertical_interrupt2, + .program_global_sync = optc1_program_global_sync, + .enable_crtc = optc2_enable_crtc, + .disable_crtc = optc1_disable_crtc, + /* used by enable_timing_synchronization. Not need for FPGA */ + .is_counter_moving = optc1_is_counter_moving, + .get_position = optc1_get_position, + .get_frame_count = optc1_get_vblank_counter, + .get_scanoutpos = optc1_get_crtc_scanoutpos, + .get_otg_active_size = optc1_get_otg_active_size, + .set_early_control = optc1_set_early_control, + /* used by enable_timing_synchronization. Not need for FPGA */ + .wait_for_state = optc1_wait_for_state, + .set_blank_color = optc3_program_blank_color, + .did_triggered_reset_occur = optc1_did_triggered_reset_occur, + .triplebuffer_lock = optc3_triplebuffer_lock, + .triplebuffer_unlock = optc2_triplebuffer_unlock, + .enable_reset_trigger = optc1_enable_reset_trigger, + .enable_crtc_reset = optc1_enable_crtc_reset, + .disable_reset_trigger = optc1_disable_reset_trigger, + .lock = optc3_lock, + .unlock = optc1_unlock, + .lock_doublebuffer_enable = optc3_lock_doublebuffer_enable, + .lock_doublebuffer_disable = optc3_lock_doublebuffer_disable, + .enable_optc_clock = optc1_enable_optc_clock, + .set_drr = optc1_set_drr, + .get_last_used_drr_vtotal = optc2_get_last_used_drr_vtotal, + .set_vtotal_min_max = optc3_set_vtotal_min_max, + .set_static_screen_control = optc1_set_static_screen_control, + .program_stereo = optc1_program_stereo, + .is_stereo_left_eye = optc1_is_stereo_left_eye, + .tg_init = optc3_tg_init, + .is_tg_enabled = optc1_is_tg_enabled, + .is_optc_underflow_occurred = optc1_is_optc_underflow_occurred, + .clear_optc_underflow = optc1_clear_optc_underflow, + .setup_global_swap_lock = NULL, + .get_crc = optc1_get_crc, + .configure_crc = optc2_configure_crc, + .set_dsc_config = optc3_set_dsc_config, + .get_dsc_status = optc2_get_dsc_status, + .set_dwb_source = NULL, + .set_odm_bypass = optc3_set_odm_bypass, + .set_odm_combine = optc3_set_odm_combine, + .get_optc_source = optc2_get_optc_source, + .set_out_mux = optc3_set_out_mux, + .set_drr_trigger_window = optc3_set_drr_trigger_window, + .set_vtotal_change_limit = optc3_set_vtotal_change_limit, + .set_gsl = optc2_set_gsl, + .set_gsl_source_select = optc2_set_gsl_source_select, + .set_vtg_params = optc1_set_vtg_params, + .program_manual_trigger = optc2_program_manual_trigger, + .setup_manual_trigger = optc2_setup_manual_trigger, + .get_hw_timing = optc1_get_hw_timing, + .wait_drr_doublebuffer_pending_clear = optc3_wait_drr_doublebuffer_pending_clear, +}; + +void dcn30_timing_generator_init(struct optc *optc1) +{ + optc1->base.funcs = &dcn30_tg_funcs; + + optc1->max_h_total = optc1->tg_mask->OTG_H_TOTAL + 1; + optc1->max_v_total = optc1->tg_mask->OTG_V_TOTAL + 1; + + optc1->min_h_blank = 32; + optc1->min_v_blank = 3; + optc1->min_v_blank_interlace = 5; + optc1->min_h_sync_width = 4; + optc1->min_v_sync_width = 1; +} diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn30/dcn30_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn30/dcn30_optc.h new file mode 100644 index 000000000000..d3a056c12b0d --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn30/dcn30_optc.h @@ -0,0 +1,359 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DC_OPTC_DCN30_H__ +#define __DC_OPTC_DCN30_H__ + +#include "dcn20/dcn20_optc.h" + +#define V_TOTAL_REGS_DCN30_SRI(inst) + +#define OPTC_COMMON_REG_LIST_DCN3_BASE(inst) \ + SRI(OTG_VSTARTUP_PARAM, OTG, inst),\ + SRI(OTG_VUPDATE_PARAM, OTG, inst),\ + SRI(OTG_VREADY_PARAM, OTG, inst),\ + SRI(OTG_MASTER_UPDATE_LOCK, OTG, inst),\ + SRI(OTG_GLOBAL_CONTROL0, OTG, inst),\ + SRI(OTG_GLOBAL_CONTROL1, OTG, inst),\ + SRI(OTG_GLOBAL_CONTROL2, OTG, inst),\ + SRI(OTG_GLOBAL_CONTROL4, OTG, inst),\ + SRI(OTG_DOUBLE_BUFFER_CONTROL, OTG, inst),\ + SRI(OTG_H_TOTAL, OTG, inst),\ + SRI(OTG_H_BLANK_START_END, OTG, inst),\ + SRI(OTG_H_SYNC_A, OTG, inst),\ + SRI(OTG_H_SYNC_A_CNTL, OTG, inst),\ + SRI(OTG_H_TIMING_CNTL, OTG, inst),\ + SRI(OTG_V_TOTAL, OTG, inst),\ + SRI(OTG_V_BLANK_START_END, OTG, inst),\ + SRI(OTG_V_SYNC_A, OTG, inst),\ + SRI(OTG_V_SYNC_A_CNTL, OTG, inst),\ + SRI(OTG_CONTROL, OTG, inst),\ + SRI(OTG_STEREO_CONTROL, OTG, inst),\ + SRI(OTG_3D_STRUCTURE_CONTROL, OTG, inst),\ + SRI(OTG_STEREO_STATUS, OTG, inst),\ + SRI(OTG_V_TOTAL_MAX, OTG, inst),\ + SRI(OTG_V_TOTAL_MIN, OTG, inst),\ + SRI(OTG_V_TOTAL_CONTROL, OTG, inst),\ + V_TOTAL_REGS_DCN30_SRI(inst)\ + SRI(OTG_TRIGA_CNTL, OTG, inst),\ + SRI(OTG_FORCE_COUNT_NOW_CNTL, OTG, inst),\ + SRI(OTG_STATIC_SCREEN_CONTROL, OTG, inst),\ + SRI(OTG_STATUS_FRAME_COUNT, OTG, inst),\ + SRI(OTG_STATUS, OTG, inst),\ + SRI(OTG_STATUS_POSITION, OTG, inst),\ + SRI(OTG_NOM_VERT_POSITION, OTG, inst),\ + SRI(OTG_BLANK_DATA_COLOR, OTG, inst),\ + SRI(OTG_BLANK_DATA_COLOR_EXT, OTG, inst),\ + SRI(OTG_M_CONST_DTO0, OTG, inst),\ + SRI(OTG_M_CONST_DTO1, OTG, inst),\ + SRI(OTG_CLOCK_CONTROL, OTG, inst),\ + SRI(OTG_VERTICAL_INTERRUPT0_CONTROL, OTG, inst),\ + SRI(OTG_VERTICAL_INTERRUPT0_POSITION, OTG, inst),\ + SRI(OTG_VERTICAL_INTERRUPT1_CONTROL, OTG, inst),\ + SRI(OTG_VERTICAL_INTERRUPT1_POSITION, OTG, inst),\ + SRI(OTG_VERTICAL_INTERRUPT2_CONTROL, OTG, inst),\ + SRI(OTG_VERTICAL_INTERRUPT2_POSITION, OTG, inst),\ + SRI(OPTC_INPUT_CLOCK_CONTROL, ODM, inst),\ + SRI(OPTC_DATA_SOURCE_SELECT, ODM, inst),\ + SRI(OPTC_INPUT_GLOBAL_CONTROL, ODM, inst),\ + SRI(CONTROL, VTG, inst),\ + SRI(OTG_VERT_SYNC_CONTROL, OTG, inst),\ + SRI(OTG_GSL_CONTROL, OTG, inst),\ + SRI(OTG_CRC_CNTL, OTG, inst),\ + SRI(OTG_CRC_CNTL2, OTG, inst),\ + SRI(OTG_CRC0_DATA_RG, OTG, inst),\ + SRI(OTG_CRC0_DATA_B, OTG, inst),\ + SRI(OTG_CRC0_WINDOWA_X_CONTROL, OTG, inst),\ + SRI(OTG_CRC0_WINDOWA_Y_CONTROL, OTG, inst),\ + SRI(OTG_CRC0_WINDOWB_X_CONTROL, OTG, inst),\ + SRI(OTG_CRC0_WINDOWB_Y_CONTROL, OTG, inst),\ + SR(GSL_SOURCE_SELECT),\ + SRI(OTG_TRIGA_MANUAL_TRIG, OTG, inst),\ + SRI(OTG_DRR_CONTROL, OTG, inst) + + +#define OPTC_COMMON_REG_LIST_DCN3_0(inst) \ + OPTC_COMMON_REG_LIST_DCN3_BASE(inst),\ + SRI(OTG_GLOBAL_CONTROL1, OTG, inst),\ + SRI(OTG_GLOBAL_CONTROL2, OTG, inst),\ + SRI(OTG_GSL_WINDOW_X, OTG, inst),\ + SRI(OTG_GSL_WINDOW_Y, OTG, inst),\ + SRI(OTG_VUPDATE_KEEPOUT, OTG, inst),\ + SRI(OTG_DSC_START_POSITION, OTG, inst),\ + SRI(OTG_CRC_CNTL2, OTG, inst),\ + SRI(OTG_DRR_TRIGGER_WINDOW, OTG, inst),\ + SRI(OTG_DRR_V_TOTAL_CHANGE, OTG, inst),\ + SRI(OPTC_DATA_FORMAT_CONTROL, ODM, inst),\ + SRI(OPTC_BYTES_PER_PIXEL, ODM, inst),\ + SRI(OPTC_WIDTH_CONTROL, ODM, inst),\ + SRI(OPTC_MEMORY_CONFIG, ODM, inst),\ + SR(DWB_SOURCE_SELECT) + +#define DCN30_VTOTAL_REGS_SF(mask_sh) + +#define OPTC_COMMON_MASK_SH_LIST_DCN3_BASE(mask_sh)\ + SF(OTG0_OTG_VSTARTUP_PARAM, VSTARTUP_START, mask_sh),\ + SF(OTG0_OTG_VUPDATE_PARAM, VUPDATE_OFFSET, mask_sh),\ + SF(OTG0_OTG_VUPDATE_PARAM, VUPDATE_WIDTH, mask_sh),\ + SF(OTG0_OTG_VREADY_PARAM, VREADY_OFFSET, mask_sh),\ + SF(OTG0_OTG_MASTER_UPDATE_LOCK, OTG_MASTER_UPDATE_LOCK, mask_sh),\ + SF(OTG0_OTG_MASTER_UPDATE_LOCK, UPDATE_LOCK_STATUS, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL0, MASTER_UPDATE_LOCK_DB_START_X, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL0, MASTER_UPDATE_LOCK_DB_END_X, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL0, MASTER_UPDATE_LOCK_DB_EN, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL1, MASTER_UPDATE_LOCK_DB_START_Y, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL1, MASTER_UPDATE_LOCK_DB_END_Y, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL2, OTG_MASTER_UPDATE_LOCK_SEL, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL4, DIG_UPDATE_POSITION_X, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL4, DIG_UPDATE_POSITION_Y, mask_sh),\ + SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_UPDATE_PENDING, mask_sh),\ + SF(OTG0_OTG_H_TOTAL, OTG_H_TOTAL, mask_sh),\ + SF(OTG0_OTG_H_BLANK_START_END, OTG_H_BLANK_START, mask_sh),\ + SF(OTG0_OTG_H_BLANK_START_END, OTG_H_BLANK_END, mask_sh),\ + SF(OTG0_OTG_H_SYNC_A, OTG_H_SYNC_A_START, mask_sh),\ + SF(OTG0_OTG_H_SYNC_A, OTG_H_SYNC_A_END, mask_sh),\ + SF(OTG0_OTG_H_SYNC_A_CNTL, OTG_H_SYNC_A_POL, mask_sh),\ + SF(OTG0_OTG_V_TOTAL, OTG_V_TOTAL, mask_sh),\ + SF(OTG0_OTG_V_BLANK_START_END, OTG_V_BLANK_START, mask_sh),\ + SF(OTG0_OTG_V_BLANK_START_END, OTG_V_BLANK_END, mask_sh),\ + SF(OTG0_OTG_V_SYNC_A, OTG_V_SYNC_A_START, mask_sh),\ + SF(OTG0_OTG_V_SYNC_A, OTG_V_SYNC_A_END, mask_sh),\ + SF(OTG0_OTG_V_SYNC_A_CNTL, OTG_V_SYNC_A_POL, mask_sh),\ + SF(OTG0_OTG_V_SYNC_A_CNTL, OTG_V_SYNC_MODE, mask_sh),\ + SF(OTG0_OTG_CONTROL, OTG_MASTER_EN, mask_sh),\ + SF(OTG0_OTG_CONTROL, OTG_START_POINT_CNTL, mask_sh),\ + SF(OTG0_OTG_CONTROL, OTG_DISABLE_POINT_CNTL, mask_sh),\ + SF(OTG0_OTG_CONTROL, OTG_FIELD_NUMBER_CNTL, mask_sh),\ + SF(OTG0_OTG_CONTROL, OTG_OUT_MUX, mask_sh),\ + SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_EN, mask_sh),\ + SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_SYNC_OUTPUT_LINE_NUM, mask_sh),\ + SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_SYNC_OUTPUT_POLARITY, mask_sh),\ + SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_EYE_FLAG_POLARITY, mask_sh),\ + SF(OTG0_OTG_STEREO_CONTROL, OTG_DISABLE_STEREOSYNC_OUTPUT_FOR_DP, mask_sh),\ + SF(OTG0_OTG_STEREO_STATUS, OTG_STEREO_CURRENT_EYE, mask_sh),\ + SF(OTG0_OTG_3D_STRUCTURE_CONTROL, OTG_3D_STRUCTURE_EN, mask_sh),\ + SF(OTG0_OTG_3D_STRUCTURE_CONTROL, OTG_3D_STRUCTURE_V_UPDATE_MODE, mask_sh),\ + SF(OTG0_OTG_3D_STRUCTURE_CONTROL, OTG_3D_STRUCTURE_STEREO_SEL_OVR, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_MAX, OTG_V_TOTAL_MAX, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_MIN, OTG_V_TOTAL_MIN, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_V_TOTAL_MIN_SEL, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_V_TOTAL_MAX_SEL, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_FORCE_LOCK_ON_EVENT, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_SET_V_TOTAL_MIN_MASK_EN, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_SET_V_TOTAL_MIN_MASK, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_VTOTAL_MID_REPLACING_MIN_EN, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_VTOTAL_MID_REPLACING_MAX_EN, mask_sh),\ + DCN30_VTOTAL_REGS_SF(mask_sh)\ + SF(OTG0_OTG_FORCE_COUNT_NOW_CNTL, OTG_FORCE_COUNT_NOW_CLEAR, mask_sh),\ + SF(OTG0_OTG_FORCE_COUNT_NOW_CNTL, OTG_FORCE_COUNT_NOW_MODE, mask_sh),\ + SF(OTG0_OTG_FORCE_COUNT_NOW_CNTL, OTG_FORCE_COUNT_NOW_OCCURRED, mask_sh),\ + SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_SOURCE_SELECT, mask_sh),\ + SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_SOURCE_PIPE_SELECT, mask_sh),\ + SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_RISING_EDGE_DETECT_CNTL, mask_sh),\ + SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_FALLING_EDGE_DETECT_CNTL, mask_sh),\ + SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_POLARITY_SELECT, mask_sh),\ + SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_FREQUENCY_SELECT, mask_sh),\ + SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_DELAY, mask_sh),\ + SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_CLEAR, mask_sh),\ + SF(OTG0_OTG_STATIC_SCREEN_CONTROL, OTG_STATIC_SCREEN_EVENT_MASK, mask_sh),\ + SF(OTG0_OTG_STATIC_SCREEN_CONTROL, OTG_STATIC_SCREEN_FRAME_COUNT, mask_sh),\ + SF(OTG0_OTG_STATUS_FRAME_COUNT, OTG_FRAME_COUNT, mask_sh),\ + SF(OTG0_OTG_STATUS, OTG_V_BLANK, mask_sh),\ + SF(OTG0_OTG_STATUS, OTG_V_ACTIVE_DISP, mask_sh),\ + SF(OTG0_OTG_STATUS_POSITION, OTG_HORZ_COUNT, mask_sh),\ + SF(OTG0_OTG_STATUS_POSITION, OTG_VERT_COUNT, mask_sh),\ + SF(OTG0_OTG_NOM_VERT_POSITION, OTG_VERT_COUNT_NOM, mask_sh),\ + SF(OTG0_OTG_BLANK_DATA_COLOR, OTG_BLANK_DATA_COLOR_BLUE_CB, mask_sh),\ + SF(OTG0_OTG_BLANK_DATA_COLOR, OTG_BLANK_DATA_COLOR_GREEN_Y, mask_sh),\ + SF(OTG0_OTG_BLANK_DATA_COLOR, OTG_BLANK_DATA_COLOR_RED_CR, mask_sh),\ + SF(OTG0_OTG_BLANK_DATA_COLOR_EXT, OTG_BLANK_DATA_COLOR_BLUE_CB_EXT, mask_sh),\ + SF(OTG0_OTG_BLANK_DATA_COLOR_EXT, OTG_BLANK_DATA_COLOR_GREEN_Y_EXT, mask_sh),\ + SF(OTG0_OTG_BLANK_DATA_COLOR_EXT, OTG_BLANK_DATA_COLOR_RED_CR_EXT, mask_sh),\ + SF(OTG0_OTG_M_CONST_DTO0, OTG_M_CONST_DTO_PHASE, mask_sh),\ + SF(OTG0_OTG_M_CONST_DTO1, OTG_M_CONST_DTO_MODULO, mask_sh),\ + SF(OTG0_OTG_CLOCK_CONTROL, OTG_BUSY, mask_sh),\ + SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_EN, mask_sh),\ + SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_ON, mask_sh),\ + SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_GATE_DIS, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT0_CONTROL, OTG_VERTICAL_INTERRUPT0_INT_ENABLE, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT0_POSITION, OTG_VERTICAL_INTERRUPT0_LINE_START, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT0_POSITION, OTG_VERTICAL_INTERRUPT0_LINE_END, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT1_CONTROL, OTG_VERTICAL_INTERRUPT1_INT_ENABLE, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT1_POSITION, OTG_VERTICAL_INTERRUPT1_LINE_START, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT2_CONTROL, OTG_VERTICAL_INTERRUPT2_INT_ENABLE, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT2_POSITION, OTG_VERTICAL_INTERRUPT2_LINE_START, mask_sh),\ + SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_EN, mask_sh),\ + SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_ON, mask_sh),\ + SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_GATE_DIS, mask_sh),\ + SF(ODM0_OPTC_INPUT_GLOBAL_CONTROL, OPTC_UNDERFLOW_OCCURRED_STATUS, mask_sh),\ + SF(ODM0_OPTC_INPUT_GLOBAL_CONTROL, OPTC_UNDERFLOW_CLEAR, mask_sh),\ + SF(VTG0_CONTROL, VTG0_ENABLE, mask_sh),\ + SF(VTG0_CONTROL, VTG0_FP2, mask_sh),\ + SF(VTG0_CONTROL, VTG0_VCOUNT_INIT, mask_sh),\ + SF(OTG0_OTG_VERT_SYNC_CONTROL, OTG_FORCE_VSYNC_NEXT_LINE_OCCURRED, mask_sh),\ + SF(OTG0_OTG_VERT_SYNC_CONTROL, OTG_FORCE_VSYNC_NEXT_LINE_CLEAR, mask_sh),\ + SF(OTG0_OTG_VERT_SYNC_CONTROL, OTG_AUTO_FORCE_VSYNC_MODE, mask_sh),\ + SF(OTG0_OTG_GSL_CONTROL, OTG_GSL0_EN, mask_sh),\ + SF(OTG0_OTG_GSL_CONTROL, OTG_GSL1_EN, mask_sh),\ + SF(OTG0_OTG_GSL_CONTROL, OTG_GSL2_EN, mask_sh),\ + SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_MASTER_EN, mask_sh),\ + SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_FORCE_DELAY, mask_sh),\ + SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_CHECK_ALL_FIELDS, mask_sh),\ + SF(OTG0_OTG_CRC_CNTL, OTG_CRC_CONT_EN, mask_sh),\ + SF(OTG0_OTG_CRC_CNTL, OTG_CRC0_SELECT, mask_sh),\ + SF(OTG0_OTG_CRC_CNTL, OTG_CRC_EN, mask_sh),\ + SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DSC_MODE, mask_sh),\ + SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DATA_STREAM_COMBINE_MODE, mask_sh),\ + SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DATA_STREAM_SPLIT_MODE, mask_sh),\ + SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DATA_FORMAT, mask_sh),\ + SF(OTG0_OTG_CRC0_DATA_RG, CRC0_R_CR, mask_sh),\ + SF(OTG0_OTG_CRC0_DATA_RG, CRC0_G_Y, mask_sh),\ + SF(OTG0_OTG_CRC0_DATA_B, CRC0_B_CB, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWA_X_CONTROL, OTG_CRC0_WINDOWA_X_START, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWA_X_CONTROL, OTG_CRC0_WINDOWA_X_END, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWA_Y_CONTROL, OTG_CRC0_WINDOWA_Y_START, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWA_Y_CONTROL, OTG_CRC0_WINDOWA_Y_END, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWB_X_CONTROL, OTG_CRC0_WINDOWB_X_START, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWB_X_CONTROL, OTG_CRC0_WINDOWB_X_END, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWB_Y_CONTROL, OTG_CRC0_WINDOWB_Y_START, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWB_Y_CONTROL, OTG_CRC0_WINDOWB_Y_END, mask_sh),\ + SF(OTG0_OTG_TRIGA_MANUAL_TRIG, OTG_TRIGA_MANUAL_TRIG, mask_sh),\ + SF(GSL_SOURCE_SELECT, GSL0_READY_SOURCE_SEL, mask_sh),\ + SF(GSL_SOURCE_SELECT, GSL1_READY_SOURCE_SEL, mask_sh),\ + SF(GSL_SOURCE_SELECT, GSL2_READY_SOURCE_SEL, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL2, MANUAL_FLOW_CONTROL_SEL, mask_sh),\ + SF(OTG0_OTG_DRR_CONTROL, OTG_V_TOTAL_LAST_USED_BY_DRR, mask_sh) + +#define OPTC_COMMON_MASK_SH_LIST_DCN3_0(mask_sh)\ + OPTC_COMMON_MASK_SH_LIST_DCN3_BASE(mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL2, GLOBAL_UPDATE_LOCK_EN, mask_sh),\ + SF(OTG0_OTG_GSL_WINDOW_X, OTG_GSL_WINDOW_START_X, mask_sh),\ + SF(OTG0_OTG_GSL_WINDOW_X, OTG_GSL_WINDOW_END_X, mask_sh), \ + SF(OTG0_OTG_GSL_WINDOW_Y, OTG_GSL_WINDOW_START_Y, mask_sh),\ + SF(OTG0_OTG_GSL_WINDOW_Y, OTG_GSL_WINDOW_END_Y, mask_sh),\ + SF(OTG0_OTG_VUPDATE_KEEPOUT, OTG_MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_EN, mask_sh), \ + SF(OTG0_OTG_VUPDATE_KEEPOUT, MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_START_OFFSET, mask_sh), \ + SF(OTG0_OTG_VUPDATE_KEEPOUT, MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_END_OFFSET, mask_sh), \ + SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_MASTER_MODE, mask_sh), \ + SF(OTG0_OTG_GSL_CONTROL, OTG_MASTER_UPDATE_LOCK_GSL_EN, mask_sh), \ + SF(OTG0_OTG_DSC_START_POSITION, OTG_DSC_START_POSITION_X, mask_sh), \ + SF(OTG0_OTG_DSC_START_POSITION, OTG_DSC_START_POSITION_LINE_NUM, mask_sh),\ + SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DSC_MODE, mask_sh),\ + SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DATA_STREAM_COMBINE_MODE, mask_sh),\ + SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DATA_STREAM_SPLIT_MODE, mask_sh),\ + SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DATA_FORMAT, mask_sh),\ + SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG0_SRC_SEL, mask_sh),\ + SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG1_SRC_SEL, mask_sh),\ + SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_NUM_OF_INPUT_SEGMENT, mask_sh),\ + SF(ODM0_OPTC_MEMORY_CONFIG, OPTC_MEM_SEL, mask_sh),\ + SF(ODM0_OPTC_DATA_FORMAT_CONTROL, OPTC_DATA_FORMAT, mask_sh),\ + SF(ODM0_OPTC_DATA_FORMAT_CONTROL, OPTC_DSC_MODE, mask_sh),\ + SF(ODM0_OPTC_BYTES_PER_PIXEL, OPTC_DSC_BYTES_PER_PIXEL, mask_sh),\ + SF(ODM0_OPTC_WIDTH_CONTROL, OPTC_DSC_SLICE_WIDTH, mask_sh),\ + SF(ODM0_OPTC_WIDTH_CONTROL, OPTC_SEGMENT_WIDTH, mask_sh),\ + SF(DWB_SOURCE_SELECT, OPTC_DWB0_SOURCE_SELECT, mask_sh),\ + SF(DWB_SOURCE_SELECT, OPTC_DWB1_SOURCE_SELECT, mask_sh),\ + SF(OTG0_OTG_DRR_TRIGGER_WINDOW, OTG_DRR_TRIGGER_WINDOW_START_X, mask_sh),\ + SF(OTG0_OTG_DRR_TRIGGER_WINDOW, OTG_DRR_TRIGGER_WINDOW_END_X, mask_sh),\ + SF(OTG0_OTG_DRR_V_TOTAL_CHANGE, OTG_DRR_V_TOTAL_CHANGE_LIMIT, mask_sh),\ + SF(OTG0_OTG_H_TIMING_CNTL, OTG_H_TIMING_DIV_BY2, mask_sh),\ + SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_DRR_TIMING_DBUF_UPDATE_PENDING, mask_sh),\ + SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_DRR_TIMING_DBUF_UPDATE_MODE, mask_sh),\ + SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_BLANK_DATA_DOUBLE_BUFFER_EN, mask_sh) + +#define OPTC_COMMON_MASK_SH_LIST_DCN30(mask_sh)\ + OPTC_COMMON_MASK_SH_LIST_DCN3_BASE(mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL2, GLOBAL_UPDATE_LOCK_EN, mask_sh),\ + SF(OTG0_OTG_GSL_WINDOW_X, OTG_GSL_WINDOW_START_X, mask_sh),\ + SF(OTG0_OTG_GSL_WINDOW_X, OTG_GSL_WINDOW_END_X, mask_sh), \ + SF(OTG0_OTG_GSL_WINDOW_Y, OTG_GSL_WINDOW_START_Y, mask_sh),\ + SF(OTG0_OTG_GSL_WINDOW_Y, OTG_GSL_WINDOW_END_Y, mask_sh),\ + SF(OTG0_OTG_VUPDATE_KEEPOUT, OTG_MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_EN, mask_sh), \ + SF(OTG0_OTG_VUPDATE_KEEPOUT, MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_START_OFFSET, mask_sh), \ + SF(OTG0_OTG_VUPDATE_KEEPOUT, MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_END_OFFSET, mask_sh), \ + SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_MASTER_MODE, mask_sh), \ + SF(OTG0_OTG_GSL_CONTROL, OTG_MASTER_UPDATE_LOCK_GSL_EN, mask_sh), \ + SF(OTG0_OTG_DSC_START_POSITION, OTG_DSC_START_POSITION_X, mask_sh), \ + SF(OTG0_OTG_DSC_START_POSITION, OTG_DSC_START_POSITION_LINE_NUM, mask_sh),\ + SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DSC_MODE, mask_sh),\ + SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DATA_STREAM_COMBINE_MODE, mask_sh),\ + SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DATA_STREAM_SPLIT_MODE, mask_sh),\ + SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DATA_FORMAT, mask_sh),\ + SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG0_SRC_SEL, mask_sh),\ + SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG1_SRC_SEL, mask_sh),\ + SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG2_SRC_SEL, mask_sh),\ + SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG3_SRC_SEL, mask_sh),\ + SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_NUM_OF_INPUT_SEGMENT, mask_sh),\ + SF(ODM0_OPTC_MEMORY_CONFIG, OPTC_MEM_SEL, mask_sh),\ + SF(ODM0_OPTC_DATA_FORMAT_CONTROL, OPTC_DATA_FORMAT, mask_sh),\ + SF(ODM0_OPTC_DATA_FORMAT_CONTROL, OPTC_DSC_MODE, mask_sh),\ + SF(ODM0_OPTC_BYTES_PER_PIXEL, OPTC_DSC_BYTES_PER_PIXEL, mask_sh),\ + SF(ODM0_OPTC_WIDTH_CONTROL, OPTC_DSC_SLICE_WIDTH, mask_sh),\ + SF(ODM0_OPTC_WIDTH_CONTROL, OPTC_SEGMENT_WIDTH, mask_sh),\ + SF(DWB_SOURCE_SELECT, OPTC_DWB0_SOURCE_SELECT, mask_sh),\ + SF(DWB_SOURCE_SELECT, OPTC_DWB1_SOURCE_SELECT, mask_sh),\ + SF(OTG0_OTG_DRR_TRIGGER_WINDOW, OTG_DRR_TRIGGER_WINDOW_START_X, mask_sh),\ + SF(OTG0_OTG_DRR_TRIGGER_WINDOW, OTG_DRR_TRIGGER_WINDOW_END_X, mask_sh),\ + SF(OTG0_OTG_DRR_V_TOTAL_CHANGE, OTG_DRR_V_TOTAL_CHANGE_LIMIT, mask_sh),\ + SF(OTG0_OTG_H_TIMING_CNTL, OTG_H_TIMING_DIV_MODE, mask_sh),\ + SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_DRR_TIMING_DBUF_UPDATE_PENDING, mask_sh),\ + SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_DRR_TIMING_DBUF_UPDATE_MODE, mask_sh) + +void dcn30_timing_generator_init(struct optc *optc1); + +void optc3_set_out_mux(struct timing_generator *optc, enum otg_out_mux_dest dest); + +void optc3_lock(struct timing_generator *optc); + +void optc3_lock_doublebuffer_enable(struct timing_generator *optc); + +void optc3_lock_doublebuffer_disable(struct timing_generator *optc); + +void optc3_set_drr_trigger_window(struct timing_generator *optc, + uint32_t window_start, uint32_t window_end); + +void optc3_triplebuffer_lock(struct timing_generator *optc); + +void optc3_program_blank_color(struct timing_generator *optc, + const struct tg_color *blank_color); + +void optc3_set_vtotal_change_limit(struct timing_generator *optc, + uint32_t limit); + +void optc3_set_dsc_config(struct timing_generator *optc, + enum optc_dsc_mode dsc_mode, + uint32_t dsc_bytes_per_pixel, + uint32_t dsc_slice_width); + +void optc3_set_timing_db_mode(struct timing_generator *optc, bool enable); + +void optc3_set_odm_bypass(struct timing_generator *optc, + const struct dc_crtc_timing *dc_crtc_timing); +void optc3_set_odm_combine(struct timing_generator *optc, int *opp_id, int opp_cnt, + struct dc_crtc_timing *timing); +void optc3_wait_drr_doublebuffer_pending_clear(struct timing_generator *optc); +void optc3_tg_init(struct timing_generator *optc); +void optc3_set_vtotal_min_max(struct timing_generator *optc, int vtotal_min, int vtotal_max); +#endif /* __DC_OPTC_DCN30_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn301/dcn301_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn301/dcn301_optc.c new file mode 100644 index 000000000000..b3cfcb887905 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn301/dcn301_optc.c @@ -0,0 +1,185 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "reg_helper.h" +#include "dcn301_optc.h" +#include "dc.h" +#include "dcn_calc_math.h" +#include "dc_dmub_srv.h" + +#include "dml/dcn30/dcn30_fpu.h" +#include "dc_trace.h" + +#define REG(reg)\ + optc1->tg_regs->reg + +#define CTX \ + optc1->base.ctx + +#undef FN +#define FN(reg_name, field_name) \ + optc1->tg_shift->field_name, optc1->tg_mask->field_name + + +/** + * optc301_set_drr() - Program dynamic refresh rate registers m_OTGx_OTG_V_TOTAL_*. + * + * @optc: timing_generator instance. + * @params: parameters used for Dynamic Refresh Rate. + */ +void optc301_set_drr( + struct timing_generator *optc, + const struct drr_params *params) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + if (params != NULL && + params->vertical_total_max > 0 && + params->vertical_total_min > 0) { + + if (params->vertical_total_mid != 0) { + + REG_SET(OTG_V_TOTAL_MID, 0, + OTG_V_TOTAL_MID, params->vertical_total_mid - 1); + + REG_UPDATE_2(OTG_V_TOTAL_CONTROL, + OTG_VTOTAL_MID_REPLACING_MAX_EN, 1, + OTG_VTOTAL_MID_FRAME_NUM, + (uint8_t)params->vertical_total_mid_frame_num); + + } + + optc->funcs->set_vtotal_min_max(optc, params->vertical_total_min - 1, params->vertical_total_max - 1); + + REG_UPDATE_5(OTG_V_TOTAL_CONTROL, + OTG_V_TOTAL_MIN_SEL, 1, + OTG_V_TOTAL_MAX_SEL, 1, + OTG_FORCE_LOCK_ON_EVENT, 0, + OTG_SET_V_TOTAL_MIN_MASK_EN, 0, + OTG_SET_V_TOTAL_MIN_MASK, 0); + // Setup manual flow control for EOF via TRIG_A + optc->funcs->setup_manual_trigger(optc); + + } else { + REG_UPDATE_4(OTG_V_TOTAL_CONTROL, + OTG_SET_V_TOTAL_MIN_MASK, 0, + OTG_V_TOTAL_MIN_SEL, 0, + OTG_V_TOTAL_MAX_SEL, 0, + OTG_FORCE_LOCK_ON_EVENT, 0); + + optc->funcs->set_vtotal_min_max(optc, 0, 0); + } +} + + +void optc301_setup_manual_trigger(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_SET_8(OTG_TRIGA_CNTL, 0, + OTG_TRIGA_SOURCE_SELECT, 21, + OTG_TRIGA_SOURCE_PIPE_SELECT, optc->inst, + OTG_TRIGA_RISING_EDGE_DETECT_CNTL, 1, + OTG_TRIGA_FALLING_EDGE_DETECT_CNTL, 0, + OTG_TRIGA_POLARITY_SELECT, 0, + OTG_TRIGA_FREQUENCY_SELECT, 0, + OTG_TRIGA_DELAY, 0, + OTG_TRIGA_CLEAR, 1); +} + +static struct timing_generator_funcs dcn30_tg_funcs = { + .validate_timing = optc1_validate_timing, + .program_timing = optc1_program_timing, + .setup_vertical_interrupt0 = optc1_setup_vertical_interrupt0, + .setup_vertical_interrupt1 = optc1_setup_vertical_interrupt1, + .setup_vertical_interrupt2 = optc1_setup_vertical_interrupt2, + .program_global_sync = optc1_program_global_sync, + .enable_crtc = optc2_enable_crtc, + .disable_crtc = optc1_disable_crtc, + /* used by enable_timing_synchronization. Not need for FPGA */ + .is_counter_moving = optc1_is_counter_moving, + .get_position = optc1_get_position, + .get_frame_count = optc1_get_vblank_counter, + .get_scanoutpos = optc1_get_crtc_scanoutpos, + .get_otg_active_size = optc1_get_otg_active_size, + .set_early_control = optc1_set_early_control, + /* used by enable_timing_synchronization. Not need for FPGA */ + .wait_for_state = optc1_wait_for_state, + .set_blank_color = optc3_program_blank_color, + .did_triggered_reset_occur = optc1_did_triggered_reset_occur, + .triplebuffer_lock = optc3_triplebuffer_lock, + .triplebuffer_unlock = optc2_triplebuffer_unlock, + .enable_reset_trigger = optc1_enable_reset_trigger, + .enable_crtc_reset = optc1_enable_crtc_reset, + .disable_reset_trigger = optc1_disable_reset_trigger, + .lock = optc3_lock, + .unlock = optc1_unlock, + .lock_doublebuffer_enable = optc3_lock_doublebuffer_enable, + .lock_doublebuffer_disable = optc3_lock_doublebuffer_disable, + .enable_optc_clock = optc1_enable_optc_clock, + .set_drr = optc301_set_drr, + .get_last_used_drr_vtotal = optc2_get_last_used_drr_vtotal, + .set_vtotal_min_max = optc3_set_vtotal_min_max, + .set_static_screen_control = optc1_set_static_screen_control, + .program_stereo = optc1_program_stereo, + .is_stereo_left_eye = optc1_is_stereo_left_eye, + .tg_init = optc3_tg_init, + .is_tg_enabled = optc1_is_tg_enabled, + .is_optc_underflow_occurred = optc1_is_optc_underflow_occurred, + .clear_optc_underflow = optc1_clear_optc_underflow, + .setup_global_swap_lock = NULL, + .get_crc = optc1_get_crc, + .configure_crc = optc2_configure_crc, + .set_dsc_config = optc3_set_dsc_config, + .get_dsc_status = optc2_get_dsc_status, + .set_dwb_source = NULL, + .set_odm_bypass = optc3_set_odm_bypass, + .set_odm_combine = optc3_set_odm_combine, + .get_optc_source = optc2_get_optc_source, + .set_out_mux = optc3_set_out_mux, + .set_drr_trigger_window = optc3_set_drr_trigger_window, + .set_vtotal_change_limit = optc3_set_vtotal_change_limit, + .set_gsl = optc2_set_gsl, + .set_gsl_source_select = optc2_set_gsl_source_select, + .set_vtg_params = optc1_set_vtg_params, + .program_manual_trigger = optc2_program_manual_trigger, + .setup_manual_trigger = optc301_setup_manual_trigger, + .get_hw_timing = optc1_get_hw_timing, + .wait_drr_doublebuffer_pending_clear = optc3_wait_drr_doublebuffer_pending_clear, +}; + +void dcn301_timing_generator_init(struct optc *optc1) +{ + optc1->base.funcs = &dcn30_tg_funcs; + + optc1->max_h_total = optc1->tg_mask->OTG_H_TOTAL + 1; + optc1->max_v_total = optc1->tg_mask->OTG_V_TOTAL + 1; + + optc1->min_h_blank = 32; + optc1->min_v_blank = 3; + optc1->min_v_blank_interlace = 5; + optc1->min_h_sync_width = 4; + optc1->min_v_sync_width = 1; +} diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn301/dcn301_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn301/dcn301_optc.h new file mode 100644 index 000000000000..b49585682a15 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn301/dcn301_optc.h @@ -0,0 +1,36 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DC_OPTC_DCN301_H__ +#define __DC_OPTC_DCN301_H__ + +#include "dcn20/dcn20_optc.h" +#include "dcn30/dcn30_optc.h" + +void dcn301_timing_generator_init(struct optc *optc1); +void optc301_setup_manual_trigger(struct timing_generator *optc); +void optc301_set_drr(struct timing_generator *optc, const struct drr_params *params); + +#endif /* __DC_OPTC_DCN301_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.c new file mode 100644 index 000000000000..63a677c8ee27 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.c @@ -0,0 +1,310 @@ +/* + * Copyright 2012-15 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dcn31_optc.h" + +#include "dcn30/dcn30_optc.h" +#include "reg_helper.h" +#include "dc.h" +#include "dcn_calc_math.h" + +#define REG(reg)\ + optc1->tg_regs->reg + +#define CTX \ + optc1->base.ctx + +#undef FN +#define FN(reg_name, field_name) \ + optc1->tg_shift->field_name, optc1->tg_mask->field_name + +static void optc31_set_odm_combine(struct timing_generator *optc, int *opp_id, int opp_cnt, + struct dc_crtc_timing *timing) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + int mpcc_hactive = (timing->h_addressable + timing->h_border_left + timing->h_border_right) + / opp_cnt; + uint32_t memory_mask = 0; + int mem_count_per_opp = (mpcc_hactive + 2559) / 2560; + + /* Assume less than 6 pipes */ + if (opp_cnt == 4) { + if (mem_count_per_opp == 1) + memory_mask = 0xf; + else { + ASSERT(mem_count_per_opp == 2); + memory_mask = 0xff; + } + } else if (mem_count_per_opp == 1) + memory_mask = 0x1 << (opp_id[0] * 2) | 0x1 << (opp_id[1] * 2); + else if (mem_count_per_opp == 2) + memory_mask = 0x3 << (opp_id[0] * 2) | 0x3 << (opp_id[1] * 2); + else if (mem_count_per_opp == 3) + memory_mask = 0x77; + else if (mem_count_per_opp == 4) + memory_mask = 0xff; + + if (REG(OPTC_MEMORY_CONFIG)) + REG_SET(OPTC_MEMORY_CONFIG, 0, + OPTC_MEM_SEL, memory_mask); + + if (opp_cnt == 2) { + REG_SET_3(OPTC_DATA_SOURCE_SELECT, 0, + OPTC_NUM_OF_INPUT_SEGMENT, 1, + OPTC_SEG0_SRC_SEL, opp_id[0], + OPTC_SEG1_SRC_SEL, opp_id[1]); + } else if (opp_cnt == 4) { + REG_SET_5(OPTC_DATA_SOURCE_SELECT, 0, + OPTC_NUM_OF_INPUT_SEGMENT, 3, + OPTC_SEG0_SRC_SEL, opp_id[0], + OPTC_SEG1_SRC_SEL, opp_id[1], + OPTC_SEG2_SRC_SEL, opp_id[2], + OPTC_SEG3_SRC_SEL, opp_id[3]); + } + + REG_UPDATE(OPTC_WIDTH_CONTROL, + OPTC_SEGMENT_WIDTH, mpcc_hactive); + + REG_SET(OTG_H_TIMING_CNTL, 0, OTG_H_TIMING_DIV_MODE, opp_cnt - 1); + optc1->opp_count = opp_cnt; +} + +/* + * Enable CRTC - call ASIC Control Object to enable Timing generator. + */ +static bool optc31_enable_crtc(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + /* opp instance for OTG, 1 to 1 mapping and odm will adjust */ + REG_UPDATE(OPTC_DATA_SOURCE_SELECT, + OPTC_SEG0_SRC_SEL, optc->inst); + + /* VTG enable first is for HW workaround */ + REG_UPDATE(CONTROL, + VTG0_ENABLE, 1); + + REG_SEQ_START(); + + /* Enable CRTC */ + REG_UPDATE_2(OTG_CONTROL, + OTG_DISABLE_POINT_CNTL, 2, + OTG_MASTER_EN, 1); + + REG_SEQ_SUBMIT(); + REG_SEQ_WAIT_DONE(); + + return true; +} + +/* disable_crtc - call ASIC Control Object to disable Timing generator. */ +static bool optc31_disable_crtc(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + /* disable otg request until end of the first line + * in the vertical blank region + */ + REG_UPDATE(OTG_CONTROL, + OTG_MASTER_EN, 0); + + REG_UPDATE(CONTROL, + VTG0_ENABLE, 0); + + /* CRTC disabled, so disable clock. */ + REG_WAIT(OTG_CLOCK_CONTROL, + OTG_BUSY, 0, + 1, 100000); + optc1_clear_optc_underflow(optc); + + return true; +} + +bool optc31_immediate_disable_crtc(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_UPDATE_2(OTG_CONTROL, + OTG_DISABLE_POINT_CNTL, 0, + OTG_MASTER_EN, 0); + + REG_UPDATE(CONTROL, + VTG0_ENABLE, 0); + + /* CRTC disabled, so disable clock. */ + REG_WAIT(OTG_CLOCK_CONTROL, + OTG_BUSY, 0, + 1, 100000); + + /* clear the false state */ + optc1_clear_optc_underflow(optc); + + return true; +} + +void optc31_set_drr( + struct timing_generator *optc, + const struct drr_params *params) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + if (params != NULL && + params->vertical_total_max > 0 && + params->vertical_total_min > 0) { + + if (params->vertical_total_mid != 0) { + + REG_SET(OTG_V_TOTAL_MID, 0, + OTG_V_TOTAL_MID, params->vertical_total_mid - 1); + + REG_UPDATE_2(OTG_V_TOTAL_CONTROL, + OTG_VTOTAL_MID_REPLACING_MAX_EN, 1, + OTG_VTOTAL_MID_FRAME_NUM, + (uint8_t)params->vertical_total_mid_frame_num); + + } + + optc->funcs->set_vtotal_min_max(optc, params->vertical_total_min - 1, params->vertical_total_max - 1); + + /* + * MIN_MASK_EN is gone and MASK is now always enabled. + * + * To get it to it work with manual trigger we need to make sure + * we program the correct bit. + */ + REG_UPDATE_4(OTG_V_TOTAL_CONTROL, + OTG_V_TOTAL_MIN_SEL, 1, + OTG_V_TOTAL_MAX_SEL, 1, + OTG_FORCE_LOCK_ON_EVENT, 0, + OTG_SET_V_TOTAL_MIN_MASK, (1 << 1)); /* TRIGA */ + + // Setup manual flow control for EOF via TRIG_A + optc->funcs->setup_manual_trigger(optc); + } else { + REG_UPDATE_4(OTG_V_TOTAL_CONTROL, + OTG_SET_V_TOTAL_MIN_MASK, 0, + OTG_V_TOTAL_MIN_SEL, 0, + OTG_V_TOTAL_MAX_SEL, 0, + OTG_FORCE_LOCK_ON_EVENT, 0); + + optc->funcs->set_vtotal_min_max(optc, 0, 0); + } +} + +void optc3_init_odm(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_SET_5(OPTC_DATA_SOURCE_SELECT, 0, + OPTC_NUM_OF_INPUT_SEGMENT, 0, + OPTC_SEG0_SRC_SEL, optc->inst, + OPTC_SEG1_SRC_SEL, 0xf, + OPTC_SEG2_SRC_SEL, 0xf, + OPTC_SEG3_SRC_SEL, 0xf + ); + + REG_SET(OTG_H_TIMING_CNTL, 0, + OTG_H_TIMING_DIV_MODE, 0); + + REG_SET(OPTC_MEMORY_CONFIG, 0, + OPTC_MEM_SEL, 0); + optc1->opp_count = 1; +} + +static struct timing_generator_funcs dcn31_tg_funcs = { + .validate_timing = optc1_validate_timing, + .program_timing = optc1_program_timing, + .setup_vertical_interrupt0 = optc1_setup_vertical_interrupt0, + .setup_vertical_interrupt1 = optc1_setup_vertical_interrupt1, + .setup_vertical_interrupt2 = optc1_setup_vertical_interrupt2, + .program_global_sync = optc1_program_global_sync, + .enable_crtc = optc31_enable_crtc, + .disable_crtc = optc31_disable_crtc, + .immediate_disable_crtc = optc31_immediate_disable_crtc, + /* used by enable_timing_synchronization. Not need for FPGA */ + .is_counter_moving = optc1_is_counter_moving, + .get_position = optc1_get_position, + .get_frame_count = optc1_get_vblank_counter, + .get_scanoutpos = optc1_get_crtc_scanoutpos, + .get_otg_active_size = optc1_get_otg_active_size, + .set_early_control = optc1_set_early_control, + /* used by enable_timing_synchronization. Not need for FPGA */ + .wait_for_state = optc1_wait_for_state, + .set_blank_color = optc3_program_blank_color, + .did_triggered_reset_occur = optc1_did_triggered_reset_occur, + .triplebuffer_lock = optc3_triplebuffer_lock, + .triplebuffer_unlock = optc2_triplebuffer_unlock, + .enable_reset_trigger = optc1_enable_reset_trigger, + .enable_crtc_reset = optc1_enable_crtc_reset, + .disable_reset_trigger = optc1_disable_reset_trigger, + .lock = optc3_lock, + .unlock = optc1_unlock, + .lock_doublebuffer_enable = optc3_lock_doublebuffer_enable, + .lock_doublebuffer_disable = optc3_lock_doublebuffer_disable, + .enable_optc_clock = optc1_enable_optc_clock, + .set_drr = optc31_set_drr, + .get_last_used_drr_vtotal = optc2_get_last_used_drr_vtotal, + .set_vtotal_min_max = optc1_set_vtotal_min_max, + .set_static_screen_control = optc1_set_static_screen_control, + .program_stereo = optc1_program_stereo, + .is_stereo_left_eye = optc1_is_stereo_left_eye, + .tg_init = optc3_tg_init, + .is_tg_enabled = optc1_is_tg_enabled, + .is_optc_underflow_occurred = optc1_is_optc_underflow_occurred, + .clear_optc_underflow = optc1_clear_optc_underflow, + .setup_global_swap_lock = NULL, + .get_crc = optc1_get_crc, + .configure_crc = optc2_configure_crc, + .set_dsc_config = optc3_set_dsc_config, + .get_dsc_status = optc2_get_dsc_status, + .set_dwb_source = NULL, + .set_odm_bypass = optc3_set_odm_bypass, + .set_odm_combine = optc31_set_odm_combine, + .get_optc_source = optc2_get_optc_source, + .set_out_mux = optc3_set_out_mux, + .set_drr_trigger_window = optc3_set_drr_trigger_window, + .set_vtotal_change_limit = optc3_set_vtotal_change_limit, + .set_gsl = optc2_set_gsl, + .set_gsl_source_select = optc2_set_gsl_source_select, + .set_vtg_params = optc1_set_vtg_params, + .program_manual_trigger = optc2_program_manual_trigger, + .setup_manual_trigger = optc2_setup_manual_trigger, + .get_hw_timing = optc1_get_hw_timing, + .init_odm = optc3_init_odm, +}; + +void dcn31_timing_generator_init(struct optc *optc1) +{ + optc1->base.funcs = &dcn31_tg_funcs; + + optc1->max_h_total = optc1->tg_mask->OTG_H_TOTAL + 1; + optc1->max_v_total = optc1->tg_mask->OTG_V_TOTAL + 1; + + optc1->min_h_blank = 32; + optc1->min_v_blank = 3; + optc1->min_v_blank_interlace = 5; + optc1->min_h_sync_width = 4; + optc1->min_v_sync_width = 1; +} + diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.h new file mode 100644 index 000000000000..30b81a448ce2 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.h @@ -0,0 +1,267 @@ +/* + * Copyright 2012-15 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DC_OPTC_DCN31_H__ +#define __DC_OPTC_DCN31_H__ + +#include "dcn10/dcn10_optc.h" + +#define OPTC_COMMON_REG_LIST_DCN3_1(inst) \ + SRI(OTG_VSTARTUP_PARAM, OTG, inst),\ + SRI(OTG_VUPDATE_PARAM, OTG, inst),\ + SRI(OTG_VREADY_PARAM, OTG, inst),\ + SRI(OTG_MASTER_UPDATE_LOCK, OTG, inst),\ + SRI(OTG_GLOBAL_CONTROL0, OTG, inst),\ + SRI(OTG_GLOBAL_CONTROL1, OTG, inst),\ + SRI(OTG_GLOBAL_CONTROL2, OTG, inst),\ + SRI(OTG_GLOBAL_CONTROL4, OTG, inst),\ + SRI(OTG_DOUBLE_BUFFER_CONTROL, OTG, inst),\ + SRI(OTG_H_TOTAL, OTG, inst),\ + SRI(OTG_H_BLANK_START_END, OTG, inst),\ + SRI(OTG_H_SYNC_A, OTG, inst),\ + SRI(OTG_H_SYNC_A_CNTL, OTG, inst),\ + SRI(OTG_H_TIMING_CNTL, OTG, inst),\ + SRI(OTG_V_TOTAL, OTG, inst),\ + SRI(OTG_V_BLANK_START_END, OTG, inst),\ + SRI(OTG_V_SYNC_A, OTG, inst),\ + SRI(OTG_V_SYNC_A_CNTL, OTG, inst),\ + SRI(OTG_CONTROL, OTG, inst),\ + SRI(OTG_STEREO_CONTROL, OTG, inst),\ + SRI(OTG_3D_STRUCTURE_CONTROL, OTG, inst),\ + SRI(OTG_STEREO_STATUS, OTG, inst),\ + SRI(OTG_V_TOTAL_MAX, OTG, inst),\ + SRI(OTG_V_TOTAL_MIN, OTG, inst),\ + SRI(OTG_V_TOTAL_CONTROL, OTG, inst),\ + SRI(OTG_TRIGA_CNTL, OTG, inst),\ + SRI(OTG_FORCE_COUNT_NOW_CNTL, OTG, inst),\ + SRI(OTG_STATIC_SCREEN_CONTROL, OTG, inst),\ + SRI(OTG_STATUS_FRAME_COUNT, OTG, inst),\ + SRI(OTG_STATUS, OTG, inst),\ + SRI(OTG_STATUS_POSITION, OTG, inst),\ + SRI(OTG_NOM_VERT_POSITION, OTG, inst),\ + SRI(OTG_M_CONST_DTO0, OTG, inst),\ + SRI(OTG_M_CONST_DTO1, OTG, inst),\ + SRI(OTG_CLOCK_CONTROL, OTG, inst),\ + SRI(OTG_VERTICAL_INTERRUPT0_CONTROL, OTG, inst),\ + SRI(OTG_VERTICAL_INTERRUPT0_POSITION, OTG, inst),\ + SRI(OTG_VERTICAL_INTERRUPT1_CONTROL, OTG, inst),\ + SRI(OTG_VERTICAL_INTERRUPT1_POSITION, OTG, inst),\ + SRI(OTG_VERTICAL_INTERRUPT2_CONTROL, OTG, inst),\ + SRI(OTG_VERTICAL_INTERRUPT2_POSITION, OTG, inst),\ + SRI(OPTC_INPUT_CLOCK_CONTROL, ODM, inst),\ + SRI(OPTC_DATA_SOURCE_SELECT, ODM, inst),\ + SRI(OPTC_INPUT_GLOBAL_CONTROL, ODM, inst),\ + SRI(CONTROL, VTG, inst),\ + SRI(OTG_VERT_SYNC_CONTROL, OTG, inst),\ + SRI(OTG_GSL_CONTROL, OTG, inst),\ + SRI(OTG_CRC_CNTL, OTG, inst),\ + SRI(OTG_CRC0_DATA_RG, OTG, inst),\ + SRI(OTG_CRC0_DATA_B, OTG, inst),\ + SRI(OTG_CRC0_WINDOWA_X_CONTROL, OTG, inst),\ + SRI(OTG_CRC0_WINDOWA_Y_CONTROL, OTG, inst),\ + SRI(OTG_CRC0_WINDOWB_X_CONTROL, OTG, inst),\ + SRI(OTG_CRC0_WINDOWB_Y_CONTROL, OTG, inst),\ + SR(GSL_SOURCE_SELECT),\ + SRI(OTG_TRIGA_MANUAL_TRIG, OTG, inst),\ + SRI(OTG_GLOBAL_CONTROL1, OTG, inst),\ + SRI(OTG_GLOBAL_CONTROL2, OTG, inst),\ + SRI(OTG_GSL_WINDOW_X, OTG, inst),\ + SRI(OTG_GSL_WINDOW_Y, OTG, inst),\ + SRI(OTG_VUPDATE_KEEPOUT, OTG, inst),\ + SRI(OTG_DSC_START_POSITION, OTG, inst),\ + SRI(OTG_DRR_TRIGGER_WINDOW, OTG, inst),\ + SRI(OTG_DRR_V_TOTAL_CHANGE, OTG, inst),\ + SRI(OPTC_DATA_FORMAT_CONTROL, ODM, inst),\ + SRI(OPTC_BYTES_PER_PIXEL, ODM, inst),\ + SRI(OPTC_WIDTH_CONTROL, ODM, inst),\ + SRI(OPTC_MEMORY_CONFIG, ODM, inst),\ + SRI(OTG_CRC_CNTL2, OTG, inst),\ + SR(DWB_SOURCE_SELECT),\ + SRI(OTG_DRR_CONTROL, OTG, inst) + +#define OPTC_COMMON_MASK_SH_LIST_DCN3_1(mask_sh)\ + SF(OTG0_OTG_VSTARTUP_PARAM, VSTARTUP_START, mask_sh),\ + SF(OTG0_OTG_VUPDATE_PARAM, VUPDATE_OFFSET, mask_sh),\ + SF(OTG0_OTG_VUPDATE_PARAM, VUPDATE_WIDTH, mask_sh),\ + SF(OTG0_OTG_VREADY_PARAM, VREADY_OFFSET, mask_sh),\ + SF(OTG0_OTG_MASTER_UPDATE_LOCK, OTG_MASTER_UPDATE_LOCK, mask_sh),\ + SF(OTG0_OTG_MASTER_UPDATE_LOCK, UPDATE_LOCK_STATUS, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL0, MASTER_UPDATE_LOCK_DB_START_X, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL0, MASTER_UPDATE_LOCK_DB_END_X, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL0, MASTER_UPDATE_LOCK_DB_EN, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL1, MASTER_UPDATE_LOCK_DB_START_Y, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL1, MASTER_UPDATE_LOCK_DB_END_Y, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL2, OTG_MASTER_UPDATE_LOCK_SEL, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL4, DIG_UPDATE_POSITION_X, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL4, DIG_UPDATE_POSITION_Y, mask_sh),\ + SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_UPDATE_PENDING, mask_sh),\ + SF(OTG0_OTG_H_TOTAL, OTG_H_TOTAL, mask_sh),\ + SF(OTG0_OTG_H_BLANK_START_END, OTG_H_BLANK_START, mask_sh),\ + SF(OTG0_OTG_H_BLANK_START_END, OTG_H_BLANK_END, mask_sh),\ + SF(OTG0_OTG_H_SYNC_A, OTG_H_SYNC_A_START, mask_sh),\ + SF(OTG0_OTG_H_SYNC_A, OTG_H_SYNC_A_END, mask_sh),\ + SF(OTG0_OTG_H_SYNC_A_CNTL, OTG_H_SYNC_A_POL, mask_sh),\ + SF(OTG0_OTG_V_TOTAL, OTG_V_TOTAL, mask_sh),\ + SF(OTG0_OTG_V_BLANK_START_END, OTG_V_BLANK_START, mask_sh),\ + SF(OTG0_OTG_V_BLANK_START_END, OTG_V_BLANK_END, mask_sh),\ + SF(OTG0_OTG_V_SYNC_A, OTG_V_SYNC_A_START, mask_sh),\ + SF(OTG0_OTG_V_SYNC_A, OTG_V_SYNC_A_END, mask_sh),\ + SF(OTG0_OTG_V_SYNC_A_CNTL, OTG_V_SYNC_A_POL, mask_sh),\ + SF(OTG0_OTG_V_SYNC_A_CNTL, OTG_V_SYNC_MODE, mask_sh),\ + SF(OTG0_OTG_CONTROL, OTG_MASTER_EN, mask_sh),\ + SF(OTG0_OTG_CONTROL, OTG_START_POINT_CNTL, mask_sh),\ + SF(OTG0_OTG_CONTROL, OTG_DISABLE_POINT_CNTL, mask_sh),\ + SF(OTG0_OTG_CONTROL, OTG_FIELD_NUMBER_CNTL, mask_sh),\ + SF(OTG0_OTG_CONTROL, OTG_OUT_MUX, mask_sh),\ + SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_EN, mask_sh),\ + SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_SYNC_OUTPUT_LINE_NUM, mask_sh),\ + SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_SYNC_OUTPUT_POLARITY, mask_sh),\ + SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_EYE_FLAG_POLARITY, mask_sh),\ + SF(OTG0_OTG_STEREO_CONTROL, OTG_DISABLE_STEREOSYNC_OUTPUT_FOR_DP, mask_sh),\ + SF(OTG0_OTG_STEREO_STATUS, OTG_STEREO_CURRENT_EYE, mask_sh),\ + SF(OTG0_OTG_3D_STRUCTURE_CONTROL, OTG_3D_STRUCTURE_EN, mask_sh),\ + SF(OTG0_OTG_3D_STRUCTURE_CONTROL, OTG_3D_STRUCTURE_V_UPDATE_MODE, mask_sh),\ + SF(OTG0_OTG_3D_STRUCTURE_CONTROL, OTG_3D_STRUCTURE_STEREO_SEL_OVR, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_MAX, OTG_V_TOTAL_MAX, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_MIN, OTG_V_TOTAL_MIN, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_V_TOTAL_MIN_SEL, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_V_TOTAL_MAX_SEL, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_FORCE_LOCK_ON_EVENT, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_SET_V_TOTAL_MIN_MASK, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_VTOTAL_MID_REPLACING_MIN_EN, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_VTOTAL_MID_REPLACING_MAX_EN, mask_sh),\ + SF(OTG0_OTG_FORCE_COUNT_NOW_CNTL, OTG_FORCE_COUNT_NOW_CLEAR, mask_sh),\ + SF(OTG0_OTG_FORCE_COUNT_NOW_CNTL, OTG_FORCE_COUNT_NOW_MODE, mask_sh),\ + SF(OTG0_OTG_FORCE_COUNT_NOW_CNTL, OTG_FORCE_COUNT_NOW_OCCURRED, mask_sh),\ + SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_SOURCE_SELECT, mask_sh),\ + SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_SOURCE_PIPE_SELECT, mask_sh),\ + SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_RISING_EDGE_DETECT_CNTL, mask_sh),\ + SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_FALLING_EDGE_DETECT_CNTL, mask_sh),\ + SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_POLARITY_SELECT, mask_sh),\ + SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_FREQUENCY_SELECT, mask_sh),\ + SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_DELAY, mask_sh),\ + SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_CLEAR, mask_sh),\ + SF(OTG0_OTG_STATIC_SCREEN_CONTROL, OTG_STATIC_SCREEN_EVENT_MASK, mask_sh),\ + SF(OTG0_OTG_STATIC_SCREEN_CONTROL, OTG_STATIC_SCREEN_FRAME_COUNT, mask_sh),\ + SF(OTG0_OTG_STATUS_FRAME_COUNT, OTG_FRAME_COUNT, mask_sh),\ + SF(OTG0_OTG_STATUS, OTG_V_BLANK, mask_sh),\ + SF(OTG0_OTG_STATUS, OTG_V_ACTIVE_DISP, mask_sh),\ + SF(OTG0_OTG_STATUS_POSITION, OTG_HORZ_COUNT, mask_sh),\ + SF(OTG0_OTG_STATUS_POSITION, OTG_VERT_COUNT, mask_sh),\ + SF(OTG0_OTG_NOM_VERT_POSITION, OTG_VERT_COUNT_NOM, mask_sh),\ + SF(OTG0_OTG_M_CONST_DTO0, OTG_M_CONST_DTO_PHASE, mask_sh),\ + SF(OTG0_OTG_M_CONST_DTO1, OTG_M_CONST_DTO_MODULO, mask_sh),\ + SF(OTG0_OTG_CLOCK_CONTROL, OTG_BUSY, mask_sh),\ + SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_EN, mask_sh),\ + SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_ON, mask_sh),\ + SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_GATE_DIS, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT0_CONTROL, OTG_VERTICAL_INTERRUPT0_INT_ENABLE, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT0_POSITION, OTG_VERTICAL_INTERRUPT0_LINE_START, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT0_POSITION, OTG_VERTICAL_INTERRUPT0_LINE_END, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT1_CONTROL, OTG_VERTICAL_INTERRUPT1_INT_ENABLE, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT1_POSITION, OTG_VERTICAL_INTERRUPT1_LINE_START, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT2_CONTROL, OTG_VERTICAL_INTERRUPT2_INT_ENABLE, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT2_POSITION, OTG_VERTICAL_INTERRUPT2_LINE_START, mask_sh),\ + SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_EN, mask_sh),\ + SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_ON, mask_sh),\ + SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_GATE_DIS, mask_sh),\ + SF(ODM0_OPTC_INPUT_GLOBAL_CONTROL, OPTC_UNDERFLOW_OCCURRED_STATUS, mask_sh),\ + SF(ODM0_OPTC_INPUT_GLOBAL_CONTROL, OPTC_UNDERFLOW_CLEAR, mask_sh),\ + SF(VTG0_CONTROL, VTG0_ENABLE, mask_sh),\ + SF(VTG0_CONTROL, VTG0_FP2, mask_sh),\ + SF(VTG0_CONTROL, VTG0_VCOUNT_INIT, mask_sh),\ + SF(OTG0_OTG_VERT_SYNC_CONTROL, OTG_FORCE_VSYNC_NEXT_LINE_OCCURRED, mask_sh),\ + SF(OTG0_OTG_VERT_SYNC_CONTROL, OTG_FORCE_VSYNC_NEXT_LINE_CLEAR, mask_sh),\ + SF(OTG0_OTG_VERT_SYNC_CONTROL, OTG_AUTO_FORCE_VSYNC_MODE, mask_sh),\ + SF(OTG0_OTG_GSL_CONTROL, OTG_GSL0_EN, mask_sh),\ + SF(OTG0_OTG_GSL_CONTROL, OTG_GSL1_EN, mask_sh),\ + SF(OTG0_OTG_GSL_CONTROL, OTG_GSL2_EN, mask_sh),\ + SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_MASTER_EN, mask_sh),\ + SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_FORCE_DELAY, mask_sh),\ + SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_CHECK_ALL_FIELDS, mask_sh),\ + SF(OTG0_OTG_CRC_CNTL, OTG_CRC_CONT_EN, mask_sh),\ + SF(OTG0_OTG_CRC_CNTL, OTG_CRC0_SELECT, mask_sh),\ + SF(OTG0_OTG_CRC_CNTL, OTG_CRC_EN, mask_sh),\ + SF(OTG0_OTG_CRC0_DATA_RG, CRC0_R_CR, mask_sh),\ + SF(OTG0_OTG_CRC0_DATA_RG, CRC0_G_Y, mask_sh),\ + SF(OTG0_OTG_CRC0_DATA_B, CRC0_B_CB, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWA_X_CONTROL, OTG_CRC0_WINDOWA_X_START, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWA_X_CONTROL, OTG_CRC0_WINDOWA_X_END, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWA_Y_CONTROL, OTG_CRC0_WINDOWA_Y_START, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWA_Y_CONTROL, OTG_CRC0_WINDOWA_Y_END, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWB_X_CONTROL, OTG_CRC0_WINDOWB_X_START, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWB_X_CONTROL, OTG_CRC0_WINDOWB_X_END, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWB_Y_CONTROL, OTG_CRC0_WINDOWB_Y_START, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWB_Y_CONTROL, OTG_CRC0_WINDOWB_Y_END, mask_sh),\ + SF(OTG0_OTG_TRIGA_MANUAL_TRIG, OTG_TRIGA_MANUAL_TRIG, mask_sh),\ + SF(GSL_SOURCE_SELECT, GSL0_READY_SOURCE_SEL, mask_sh),\ + SF(GSL_SOURCE_SELECT, GSL1_READY_SOURCE_SEL, mask_sh),\ + SF(GSL_SOURCE_SELECT, GSL2_READY_SOURCE_SEL, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL2, MANUAL_FLOW_CONTROL_SEL, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL2, GLOBAL_UPDATE_LOCK_EN, mask_sh),\ + SF(OTG0_OTG_GSL_WINDOW_X, OTG_GSL_WINDOW_START_X, mask_sh),\ + SF(OTG0_OTG_GSL_WINDOW_X, OTG_GSL_WINDOW_END_X, mask_sh), \ + SF(OTG0_OTG_GSL_WINDOW_Y, OTG_GSL_WINDOW_START_Y, mask_sh),\ + SF(OTG0_OTG_GSL_WINDOW_Y, OTG_GSL_WINDOW_END_Y, mask_sh),\ + SF(OTG0_OTG_VUPDATE_KEEPOUT, OTG_MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_EN, mask_sh), \ + SF(OTG0_OTG_VUPDATE_KEEPOUT, MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_START_OFFSET, mask_sh), \ + SF(OTG0_OTG_VUPDATE_KEEPOUT, MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_END_OFFSET, mask_sh), \ + SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_MASTER_MODE, mask_sh), \ + SF(OTG0_OTG_GSL_CONTROL, OTG_MASTER_UPDATE_LOCK_GSL_EN, mask_sh), \ + SF(OTG0_OTG_DSC_START_POSITION, OTG_DSC_START_POSITION_X, mask_sh), \ + SF(OTG0_OTG_DSC_START_POSITION, OTG_DSC_START_POSITION_LINE_NUM, mask_sh),\ + SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG0_SRC_SEL, mask_sh),\ + SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG1_SRC_SEL, mask_sh),\ + SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG2_SRC_SEL, mask_sh),\ + SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG3_SRC_SEL, mask_sh),\ + SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_NUM_OF_INPUT_SEGMENT, mask_sh),\ + SF(ODM0_OPTC_MEMORY_CONFIG, OPTC_MEM_SEL, mask_sh),\ + SF(ODM0_OPTC_DATA_FORMAT_CONTROL, OPTC_DATA_FORMAT, mask_sh),\ + SF(ODM0_OPTC_DATA_FORMAT_CONTROL, OPTC_DSC_MODE, mask_sh),\ + SF(ODM0_OPTC_BYTES_PER_PIXEL, OPTC_DSC_BYTES_PER_PIXEL, mask_sh),\ + SF(ODM0_OPTC_WIDTH_CONTROL, OPTC_DSC_SLICE_WIDTH, mask_sh),\ + SF(ODM0_OPTC_WIDTH_CONTROL, OPTC_SEGMENT_WIDTH, mask_sh),\ + SF(DWB_SOURCE_SELECT, OPTC_DWB0_SOURCE_SELECT, mask_sh),\ + SF(DWB_SOURCE_SELECT, OPTC_DWB1_SOURCE_SELECT, mask_sh),\ + SF(OTG0_OTG_DRR_TRIGGER_WINDOW, OTG_DRR_TRIGGER_WINDOW_START_X, mask_sh),\ + SF(OTG0_OTG_DRR_TRIGGER_WINDOW, OTG_DRR_TRIGGER_WINDOW_END_X, mask_sh),\ + SF(OTG0_OTG_DRR_V_TOTAL_CHANGE, OTG_DRR_V_TOTAL_CHANGE_LIMIT, mask_sh),\ + SF(OTG0_OTG_H_TIMING_CNTL, OTG_H_TIMING_DIV_MODE, mask_sh),\ + SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_DRR_TIMING_DBUF_UPDATE_MODE, mask_sh),\ + SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DSC_MODE, mask_sh),\ + SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DATA_STREAM_COMBINE_MODE, mask_sh),\ + SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DATA_STREAM_SPLIT_MODE, mask_sh),\ + SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DATA_FORMAT, mask_sh),\ + SF(OTG0_OTG_DRR_CONTROL, OTG_V_TOTAL_LAST_USED_BY_DRR, mask_sh) + +void dcn31_timing_generator_init(struct optc *optc1); + +bool optc31_immediate_disable_crtc(struct timing_generator *optc); + +void optc31_set_drr(struct timing_generator *optc, const struct drr_params *params); + +void optc3_init_odm(struct timing_generator *optc); + +#endif /* __DC_OPTC_DCN31_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn314/dcn314_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn314/dcn314_optc.c new file mode 100644 index 000000000000..0086cafb0f7a --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn314/dcn314_optc.c @@ -0,0 +1,273 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dcn314_optc.h" + +#include "dcn30/dcn30_optc.h" +#include "dcn31/dcn31_optc.h" +#include "reg_helper.h" +#include "dc.h" +#include "dcn_calc_math.h" + +#define REG(reg)\ + optc1->tg_regs->reg + +#define CTX \ + optc1->base.ctx + +#undef FN +#define FN(reg_name, field_name) \ + optc1->tg_shift->field_name, optc1->tg_mask->field_name + +/* + * Enable CRTC + * Enable CRTC - call ASIC Control Object to enable Timing generator. + */ + +static void optc314_set_odm_combine(struct timing_generator *optc, int *opp_id, int opp_cnt, + struct dc_crtc_timing *timing) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + uint32_t memory_mask = 0; + int h_active = timing->h_addressable + timing->h_border_left + timing->h_border_right; + int mpcc_hactive = h_active / opp_cnt; + /* Each memory instance is 2048x(314x2) bits to support half line of 4096 */ + int odm_mem_count = (h_active + 2047) / 2048; + + /* + * display <= 4k : 2 memories + 2 pipes + * 4k < display <= 8k : 4 memories + 2 pipes + * 8k < display <= 12k : 6 memories + 4 pipes + */ + if (opp_cnt == 4) { + if (odm_mem_count <= 2) + memory_mask = 0x3; + else if (odm_mem_count <= 4) + memory_mask = 0xf; + else + memory_mask = 0x3f; + } else { + if (odm_mem_count <= 2) + memory_mask = 0x1 << (opp_id[0] * 2) | 0x1 << (opp_id[1] * 2); + else if (odm_mem_count <= 4) + memory_mask = 0x3 << (opp_id[0] * 2) | 0x3 << (opp_id[1] * 2); + else + memory_mask = 0x77; + } + + REG_SET(OPTC_MEMORY_CONFIG, 0, + OPTC_MEM_SEL, memory_mask); + + if (opp_cnt == 2) { + REG_SET_3(OPTC_DATA_SOURCE_SELECT, 0, + OPTC_NUM_OF_INPUT_SEGMENT, 1, + OPTC_SEG0_SRC_SEL, opp_id[0], + OPTC_SEG1_SRC_SEL, opp_id[1]); + } else if (opp_cnt == 4) { + REG_SET_5(OPTC_DATA_SOURCE_SELECT, 0, + OPTC_NUM_OF_INPUT_SEGMENT, 3, + OPTC_SEG0_SRC_SEL, opp_id[0], + OPTC_SEG1_SRC_SEL, opp_id[1], + OPTC_SEG2_SRC_SEL, opp_id[2], + OPTC_SEG3_SRC_SEL, opp_id[3]); + } + + REG_UPDATE(OPTC_WIDTH_CONTROL, + OPTC_SEGMENT_WIDTH, mpcc_hactive); + + REG_UPDATE(OTG_H_TIMING_CNTL, + OTG_H_TIMING_DIV_MODE, opp_cnt - 1); + optc1->opp_count = opp_cnt; +} + +static bool optc314_enable_crtc(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + /* opp instance for OTG, 1 to 1 mapping and odm will adjust */ + REG_UPDATE(OPTC_DATA_SOURCE_SELECT, + OPTC_SEG0_SRC_SEL, optc->inst); + + /* VTG enable first is for HW workaround */ + REG_UPDATE(CONTROL, + VTG0_ENABLE, 1); + + REG_SEQ_START(); + + /* Enable CRTC */ + REG_UPDATE_2(OTG_CONTROL, + OTG_DISABLE_POINT_CNTL, 2, + OTG_MASTER_EN, 1); + + REG_SEQ_SUBMIT(); + REG_SEQ_WAIT_DONE(); + + return true; +} + +/* disable_crtc */ +static bool optc314_disable_crtc(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + /* disable otg request until end of the first line + * in the vertical blank region + */ + REG_UPDATE(OTG_CONTROL, + OTG_MASTER_EN, 0); + + REG_UPDATE(CONTROL, + VTG0_ENABLE, 0); + + /* CRTC disabled, so disable clock. */ + REG_WAIT(OTG_CLOCK_CONTROL, + OTG_BUSY, 0, + 1, 100000); + + return true; +} + +static void optc314_phantom_crtc_post_enable(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + /* Disable immediately. */ + REG_UPDATE_2(OTG_CONTROL, OTG_DISABLE_POINT_CNTL, 0, OTG_MASTER_EN, 0); + + /* CRTC disabled, so disable clock. */ + REG_WAIT(OTG_CLOCK_CONTROL, OTG_BUSY, 0, 1, 100000); +} + +static void optc314_set_odm_bypass(struct timing_generator *optc, + const struct dc_crtc_timing *dc_crtc_timing) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + enum h_timing_div_mode h_div = H_TIMING_NO_DIV; + + REG_SET_5(OPTC_DATA_SOURCE_SELECT, 0, + OPTC_NUM_OF_INPUT_SEGMENT, 0, + OPTC_SEG0_SRC_SEL, optc->inst, + OPTC_SEG1_SRC_SEL, 0xf, + OPTC_SEG2_SRC_SEL, 0xf, + OPTC_SEG3_SRC_SEL, 0xf + ); + + h_div = optc1_is_two_pixels_per_containter(dc_crtc_timing); + REG_UPDATE(OTG_H_TIMING_CNTL, + OTG_H_TIMING_DIV_MODE, h_div); + + REG_SET(OPTC_MEMORY_CONFIG, 0, + OPTC_MEM_SEL, 0); + optc1->opp_count = 1; +} + +static void optc314_set_h_timing_div_manual_mode(struct timing_generator *optc, bool manual_mode) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_UPDATE(OTG_H_TIMING_CNTL, + OTG_H_TIMING_DIV_MODE_MANUAL, manual_mode ? 1 : 0); +} + + +static struct timing_generator_funcs dcn314_tg_funcs = { + .validate_timing = optc1_validate_timing, + .program_timing = optc1_program_timing, + .setup_vertical_interrupt0 = optc1_setup_vertical_interrupt0, + .setup_vertical_interrupt1 = optc1_setup_vertical_interrupt1, + .setup_vertical_interrupt2 = optc1_setup_vertical_interrupt2, + .program_global_sync = optc1_program_global_sync, + .enable_crtc = optc314_enable_crtc, + .disable_crtc = optc314_disable_crtc, + .immediate_disable_crtc = optc31_immediate_disable_crtc, + .phantom_crtc_post_enable = optc314_phantom_crtc_post_enable, + /* used by enable_timing_synchronization. Not need for FPGA */ + .is_counter_moving = optc1_is_counter_moving, + .get_position = optc1_get_position, + .get_frame_count = optc1_get_vblank_counter, + .get_scanoutpos = optc1_get_crtc_scanoutpos, + .get_otg_active_size = optc1_get_otg_active_size, + .set_early_control = optc1_set_early_control, + /* used by enable_timing_synchronization. Not need for FPGA */ + .wait_for_state = optc1_wait_for_state, + .set_blank_color = optc3_program_blank_color, + .did_triggered_reset_occur = optc1_did_triggered_reset_occur, + .triplebuffer_lock = optc3_triplebuffer_lock, + .triplebuffer_unlock = optc2_triplebuffer_unlock, + .enable_reset_trigger = optc1_enable_reset_trigger, + .enable_crtc_reset = optc1_enable_crtc_reset, + .disable_reset_trigger = optc1_disable_reset_trigger, + .lock = optc3_lock, + .unlock = optc1_unlock, + .lock_doublebuffer_enable = optc3_lock_doublebuffer_enable, + .lock_doublebuffer_disable = optc3_lock_doublebuffer_disable, + .enable_optc_clock = optc1_enable_optc_clock, + .set_drr = optc31_set_drr, + .get_last_used_drr_vtotal = optc2_get_last_used_drr_vtotal, + .set_vtotal_min_max = optc1_set_vtotal_min_max, + .set_static_screen_control = optc1_set_static_screen_control, + .program_stereo = optc1_program_stereo, + .is_stereo_left_eye = optc1_is_stereo_left_eye, + .tg_init = optc3_tg_init, + .is_tg_enabled = optc1_is_tg_enabled, + .is_optc_underflow_occurred = optc1_is_optc_underflow_occurred, + .clear_optc_underflow = optc1_clear_optc_underflow, + .setup_global_swap_lock = NULL, + .get_crc = optc1_get_crc, + .configure_crc = optc1_configure_crc, + .set_dsc_config = optc3_set_dsc_config, + .get_dsc_status = optc2_get_dsc_status, + .set_dwb_source = NULL, + .get_optc_source = optc2_get_optc_source, + .set_out_mux = optc3_set_out_mux, + .set_drr_trigger_window = optc3_set_drr_trigger_window, + .set_vtotal_change_limit = optc3_set_vtotal_change_limit, + .set_gsl = optc2_set_gsl, + .set_gsl_source_select = optc2_set_gsl_source_select, + .set_vtg_params = optc1_set_vtg_params, + .program_manual_trigger = optc2_program_manual_trigger, + .setup_manual_trigger = optc2_setup_manual_trigger, + .get_hw_timing = optc1_get_hw_timing, + .init_odm = optc3_init_odm, + .set_odm_bypass = optc314_set_odm_bypass, + .set_odm_combine = optc314_set_odm_combine, + .set_h_timing_div_manual_mode = optc314_set_h_timing_div_manual_mode, +}; + +void dcn314_timing_generator_init(struct optc *optc1) +{ + optc1->base.funcs = &dcn314_tg_funcs; + + optc1->max_h_total = optc1->tg_mask->OTG_H_TOTAL + 1; + optc1->max_v_total = optc1->tg_mask->OTG_V_TOTAL + 1; + + optc1->min_h_blank = 32; + optc1->min_v_blank = 3; + optc1->min_v_blank_interlace = 5; + optc1->min_h_sync_width = 4; + optc1->min_v_sync_width = 1; +} + diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn314/dcn314_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn314/dcn314_optc.h new file mode 100644 index 000000000000..99c098e76116 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn314/dcn314_optc.h @@ -0,0 +1,255 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DC_OPTC_DCN314_H__ +#define __DC_OPTC_DCN314_H__ + +#include "dcn10/dcn10_optc.h" + +#define OPTC_COMMON_REG_LIST_DCN3_14(inst) \ + SRI(OTG_VSTARTUP_PARAM, OTG, inst),\ + SRI(OTG_VUPDATE_PARAM, OTG, inst),\ + SRI(OTG_VREADY_PARAM, OTG, inst),\ + SRI(OTG_MASTER_UPDATE_LOCK, OTG, inst),\ + SRI(OTG_GLOBAL_CONTROL0, OTG, inst),\ + SRI(OTG_GLOBAL_CONTROL1, OTG, inst),\ + SRI(OTG_GLOBAL_CONTROL2, OTG, inst),\ + SRI(OTG_GLOBAL_CONTROL4, OTG, inst),\ + SRI(OTG_DOUBLE_BUFFER_CONTROL, OTG, inst),\ + SRI(OTG_H_TOTAL, OTG, inst),\ + SRI(OTG_H_BLANK_START_END, OTG, inst),\ + SRI(OTG_H_SYNC_A, OTG, inst),\ + SRI(OTG_H_SYNC_A_CNTL, OTG, inst),\ + SRI(OTG_H_TIMING_CNTL, OTG, inst),\ + SRI(OTG_V_TOTAL, OTG, inst),\ + SRI(OTG_V_BLANK_START_END, OTG, inst),\ + SRI(OTG_V_SYNC_A, OTG, inst),\ + SRI(OTG_V_SYNC_A_CNTL, OTG, inst),\ + SRI(OTG_CONTROL, OTG, inst),\ + SRI(OTG_STEREO_CONTROL, OTG, inst),\ + SRI(OTG_3D_STRUCTURE_CONTROL, OTG, inst),\ + SRI(OTG_STEREO_STATUS, OTG, inst),\ + SRI(OTG_V_TOTAL_MAX, OTG, inst),\ + SRI(OTG_V_TOTAL_MIN, OTG, inst),\ + SRI(OTG_V_TOTAL_CONTROL, OTG, inst),\ + SRI(OTG_TRIGA_CNTL, OTG, inst),\ + SRI(OTG_FORCE_COUNT_NOW_CNTL, OTG, inst),\ + SRI(OTG_STATIC_SCREEN_CONTROL, OTG, inst),\ + SRI(OTG_STATUS_FRAME_COUNT, OTG, inst),\ + SRI(OTG_STATUS, OTG, inst),\ + SRI(OTG_STATUS_POSITION, OTG, inst),\ + SRI(OTG_NOM_VERT_POSITION, OTG, inst),\ + SRI(OTG_M_CONST_DTO0, OTG, inst),\ + SRI(OTG_M_CONST_DTO1, OTG, inst),\ + SRI(OTG_CLOCK_CONTROL, OTG, inst),\ + SRI(OTG_VERTICAL_INTERRUPT0_CONTROL, OTG, inst),\ + SRI(OTG_VERTICAL_INTERRUPT0_POSITION, OTG, inst),\ + SRI(OTG_VERTICAL_INTERRUPT1_CONTROL, OTG, inst),\ + SRI(OTG_VERTICAL_INTERRUPT1_POSITION, OTG, inst),\ + SRI(OTG_VERTICAL_INTERRUPT2_CONTROL, OTG, inst),\ + SRI(OTG_VERTICAL_INTERRUPT2_POSITION, OTG, inst),\ + SRI(OPTC_INPUT_CLOCK_CONTROL, ODM, inst),\ + SRI(OPTC_DATA_SOURCE_SELECT, ODM, inst),\ + SRI(OPTC_INPUT_GLOBAL_CONTROL, ODM, inst),\ + SRI(CONTROL, VTG, inst),\ + SRI(OTG_VERT_SYNC_CONTROL, OTG, inst),\ + SRI(OTG_GSL_CONTROL, OTG, inst),\ + SRI(OTG_CRC_CNTL, OTG, inst),\ + SRI(OTG_CRC0_DATA_RG, OTG, inst),\ + SRI(OTG_CRC0_DATA_B, OTG, inst),\ + SRI(OTG_CRC0_WINDOWA_X_CONTROL, OTG, inst),\ + SRI(OTG_CRC0_WINDOWA_Y_CONTROL, OTG, inst),\ + SRI(OTG_CRC0_WINDOWB_X_CONTROL, OTG, inst),\ + SRI(OTG_CRC0_WINDOWB_Y_CONTROL, OTG, inst),\ + SR(GSL_SOURCE_SELECT),\ + SRI(OTG_TRIGA_MANUAL_TRIG, OTG, inst),\ + SRI(OTG_GLOBAL_CONTROL1, OTG, inst),\ + SRI(OTG_GLOBAL_CONTROL2, OTG, inst),\ + SRI(OTG_GSL_WINDOW_X, OTG, inst),\ + SRI(OTG_GSL_WINDOW_Y, OTG, inst),\ + SRI(OTG_VUPDATE_KEEPOUT, OTG, inst),\ + SRI(OTG_DSC_START_POSITION, OTG, inst),\ + SRI(OTG_DRR_TRIGGER_WINDOW, OTG, inst),\ + SRI(OTG_DRR_V_TOTAL_CHANGE, OTG, inst),\ + SRI(OPTC_DATA_FORMAT_CONTROL, ODM, inst),\ + SRI(OPTC_BYTES_PER_PIXEL, ODM, inst),\ + SRI(OPTC_WIDTH_CONTROL, ODM, inst),\ + SRI(OPTC_MEMORY_CONFIG, ODM, inst),\ + SRI(OTG_DRR_CONTROL, OTG, inst) + +#define OPTC_COMMON_MASK_SH_LIST_DCN3_14(mask_sh)\ + SF(OTG0_OTG_VSTARTUP_PARAM, VSTARTUP_START, mask_sh),\ + SF(OTG0_OTG_VUPDATE_PARAM, VUPDATE_OFFSET, mask_sh),\ + SF(OTG0_OTG_VUPDATE_PARAM, VUPDATE_WIDTH, mask_sh),\ + SF(OTG0_OTG_VREADY_PARAM, VREADY_OFFSET, mask_sh),\ + SF(OTG0_OTG_MASTER_UPDATE_LOCK, OTG_MASTER_UPDATE_LOCK, mask_sh),\ + SF(OTG0_OTG_MASTER_UPDATE_LOCK, UPDATE_LOCK_STATUS, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL0, MASTER_UPDATE_LOCK_DB_START_X, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL0, MASTER_UPDATE_LOCK_DB_END_X, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL0, MASTER_UPDATE_LOCK_DB_EN, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL1, MASTER_UPDATE_LOCK_DB_START_Y, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL1, MASTER_UPDATE_LOCK_DB_END_Y, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL2, OTG_MASTER_UPDATE_LOCK_SEL, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL4, DIG_UPDATE_POSITION_X, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL4, DIG_UPDATE_POSITION_Y, mask_sh),\ + SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_UPDATE_PENDING, mask_sh),\ + SF(OTG0_OTG_H_TOTAL, OTG_H_TOTAL, mask_sh),\ + SF(OTG0_OTG_H_BLANK_START_END, OTG_H_BLANK_START, mask_sh),\ + SF(OTG0_OTG_H_BLANK_START_END, OTG_H_BLANK_END, mask_sh),\ + SF(OTG0_OTG_H_SYNC_A, OTG_H_SYNC_A_START, mask_sh),\ + SF(OTG0_OTG_H_SYNC_A, OTG_H_SYNC_A_END, mask_sh),\ + SF(OTG0_OTG_H_SYNC_A_CNTL, OTG_H_SYNC_A_POL, mask_sh),\ + SF(OTG0_OTG_V_TOTAL, OTG_V_TOTAL, mask_sh),\ + SF(OTG0_OTG_V_BLANK_START_END, OTG_V_BLANK_START, mask_sh),\ + SF(OTG0_OTG_V_BLANK_START_END, OTG_V_BLANK_END, mask_sh),\ + SF(OTG0_OTG_V_SYNC_A, OTG_V_SYNC_A_START, mask_sh),\ + SF(OTG0_OTG_V_SYNC_A, OTG_V_SYNC_A_END, mask_sh),\ + SF(OTG0_OTG_V_SYNC_A_CNTL, OTG_V_SYNC_A_POL, mask_sh),\ + SF(OTG0_OTG_V_SYNC_A_CNTL, OTG_V_SYNC_MODE, mask_sh),\ + SF(OTG0_OTG_CONTROL, OTG_MASTER_EN, mask_sh),\ + SF(OTG0_OTG_CONTROL, OTG_START_POINT_CNTL, mask_sh),\ + SF(OTG0_OTG_CONTROL, OTG_DISABLE_POINT_CNTL, mask_sh),\ + SF(OTG0_OTG_CONTROL, OTG_FIELD_NUMBER_CNTL, mask_sh),\ + SF(OTG0_OTG_CONTROL, OTG_OUT_MUX, mask_sh),\ + SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_EN, mask_sh),\ + SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_SYNC_OUTPUT_LINE_NUM, mask_sh),\ + SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_SYNC_OUTPUT_POLARITY, mask_sh),\ + SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_EYE_FLAG_POLARITY, mask_sh),\ + SF(OTG0_OTG_STEREO_CONTROL, OTG_DISABLE_STEREOSYNC_OUTPUT_FOR_DP, mask_sh),\ + SF(OTG0_OTG_STEREO_STATUS, OTG_STEREO_CURRENT_EYE, mask_sh),\ + SF(OTG0_OTG_3D_STRUCTURE_CONTROL, OTG_3D_STRUCTURE_EN, mask_sh),\ + SF(OTG0_OTG_3D_STRUCTURE_CONTROL, OTG_3D_STRUCTURE_V_UPDATE_MODE, mask_sh),\ + SF(OTG0_OTG_3D_STRUCTURE_CONTROL, OTG_3D_STRUCTURE_STEREO_SEL_OVR, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_MAX, OTG_V_TOTAL_MAX, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_MIN, OTG_V_TOTAL_MIN, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_V_TOTAL_MIN_SEL, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_V_TOTAL_MAX_SEL, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_FORCE_LOCK_ON_EVENT, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_SET_V_TOTAL_MIN_MASK, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_VTOTAL_MID_REPLACING_MIN_EN, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_VTOTAL_MID_REPLACING_MAX_EN, mask_sh),\ + SF(OTG0_OTG_FORCE_COUNT_NOW_CNTL, OTG_FORCE_COUNT_NOW_CLEAR, mask_sh),\ + SF(OTG0_OTG_FORCE_COUNT_NOW_CNTL, OTG_FORCE_COUNT_NOW_MODE, mask_sh),\ + SF(OTG0_OTG_FORCE_COUNT_NOW_CNTL, OTG_FORCE_COUNT_NOW_OCCURRED, mask_sh),\ + SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_SOURCE_SELECT, mask_sh),\ + SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_SOURCE_PIPE_SELECT, mask_sh),\ + SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_RISING_EDGE_DETECT_CNTL, mask_sh),\ + SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_FALLING_EDGE_DETECT_CNTL, mask_sh),\ + SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_POLARITY_SELECT, mask_sh),\ + SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_FREQUENCY_SELECT, mask_sh),\ + SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_DELAY, mask_sh),\ + SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_CLEAR, mask_sh),\ + SF(OTG0_OTG_STATIC_SCREEN_CONTROL, OTG_STATIC_SCREEN_EVENT_MASK, mask_sh),\ + SF(OTG0_OTG_STATIC_SCREEN_CONTROL, OTG_STATIC_SCREEN_FRAME_COUNT, mask_sh),\ + SF(OTG0_OTG_STATUS_FRAME_COUNT, OTG_FRAME_COUNT, mask_sh),\ + SF(OTG0_OTG_STATUS, OTG_V_BLANK, mask_sh),\ + SF(OTG0_OTG_STATUS, OTG_V_ACTIVE_DISP, mask_sh),\ + SF(OTG0_OTG_STATUS_POSITION, OTG_HORZ_COUNT, mask_sh),\ + SF(OTG0_OTG_STATUS_POSITION, OTG_VERT_COUNT, mask_sh),\ + SF(OTG0_OTG_NOM_VERT_POSITION, OTG_VERT_COUNT_NOM, mask_sh),\ + SF(OTG0_OTG_M_CONST_DTO0, OTG_M_CONST_DTO_PHASE, mask_sh),\ + SF(OTG0_OTG_M_CONST_DTO1, OTG_M_CONST_DTO_MODULO, mask_sh),\ + SF(OTG0_OTG_CLOCK_CONTROL, OTG_BUSY, mask_sh),\ + SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_EN, mask_sh),\ + SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_ON, mask_sh),\ + SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_GATE_DIS, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT0_CONTROL, OTG_VERTICAL_INTERRUPT0_INT_ENABLE, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT0_POSITION, OTG_VERTICAL_INTERRUPT0_LINE_START, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT0_POSITION, OTG_VERTICAL_INTERRUPT0_LINE_END, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT1_CONTROL, OTG_VERTICAL_INTERRUPT1_INT_ENABLE, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT1_POSITION, OTG_VERTICAL_INTERRUPT1_LINE_START, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT2_CONTROL, OTG_VERTICAL_INTERRUPT2_INT_ENABLE, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT2_POSITION, OTG_VERTICAL_INTERRUPT2_LINE_START, mask_sh),\ + SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_EN, mask_sh),\ + SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_ON, mask_sh),\ + SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_GATE_DIS, mask_sh),\ + SF(ODM0_OPTC_INPUT_GLOBAL_CONTROL, OPTC_UNDERFLOW_OCCURRED_STATUS, mask_sh),\ + SF(ODM0_OPTC_INPUT_GLOBAL_CONTROL, OPTC_UNDERFLOW_CLEAR, mask_sh),\ + SF(VTG0_CONTROL, VTG0_ENABLE, mask_sh),\ + SF(VTG0_CONTROL, VTG0_FP2, mask_sh),\ + SF(VTG0_CONTROL, VTG0_VCOUNT_INIT, mask_sh),\ + SF(OTG0_OTG_VERT_SYNC_CONTROL, OTG_FORCE_VSYNC_NEXT_LINE_OCCURRED, mask_sh),\ + SF(OTG0_OTG_VERT_SYNC_CONTROL, OTG_FORCE_VSYNC_NEXT_LINE_CLEAR, mask_sh),\ + SF(OTG0_OTG_VERT_SYNC_CONTROL, OTG_AUTO_FORCE_VSYNC_MODE, mask_sh),\ + SF(OTG0_OTG_GSL_CONTROL, OTG_GSL0_EN, mask_sh),\ + SF(OTG0_OTG_GSL_CONTROL, OTG_GSL1_EN, mask_sh),\ + SF(OTG0_OTG_GSL_CONTROL, OTG_GSL2_EN, mask_sh),\ + SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_MASTER_EN, mask_sh),\ + SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_FORCE_DELAY, mask_sh),\ + SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_CHECK_ALL_FIELDS, mask_sh),\ + SF(OTG0_OTG_CRC_CNTL, OTG_CRC_CONT_EN, mask_sh),\ + SF(OTG0_OTG_CRC_CNTL, OTG_CRC0_SELECT, mask_sh),\ + SF(OTG0_OTG_CRC_CNTL, OTG_CRC_EN, mask_sh),\ + SF(OTG0_OTG_CRC0_DATA_RG, CRC0_R_CR, mask_sh),\ + SF(OTG0_OTG_CRC0_DATA_RG, CRC0_G_Y, mask_sh),\ + SF(OTG0_OTG_CRC0_DATA_B, CRC0_B_CB, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWA_X_CONTROL, OTG_CRC0_WINDOWA_X_START, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWA_X_CONTROL, OTG_CRC0_WINDOWA_X_END, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWA_Y_CONTROL, OTG_CRC0_WINDOWA_Y_START, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWA_Y_CONTROL, OTG_CRC0_WINDOWA_Y_END, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWB_X_CONTROL, OTG_CRC0_WINDOWB_X_START, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWB_X_CONTROL, OTG_CRC0_WINDOWB_X_END, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWB_Y_CONTROL, OTG_CRC0_WINDOWB_Y_START, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWB_Y_CONTROL, OTG_CRC0_WINDOWB_Y_END, mask_sh),\ + SF(OTG0_OTG_TRIGA_MANUAL_TRIG, OTG_TRIGA_MANUAL_TRIG, mask_sh),\ + SF(GSL_SOURCE_SELECT, GSL0_READY_SOURCE_SEL, mask_sh),\ + SF(GSL_SOURCE_SELECT, GSL1_READY_SOURCE_SEL, mask_sh),\ + SF(GSL_SOURCE_SELECT, GSL2_READY_SOURCE_SEL, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL2, MANUAL_FLOW_CONTROL_SEL, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL2, GLOBAL_UPDATE_LOCK_EN, mask_sh),\ + SF(OTG0_OTG_GSL_WINDOW_X, OTG_GSL_WINDOW_START_X, mask_sh),\ + SF(OTG0_OTG_GSL_WINDOW_X, OTG_GSL_WINDOW_END_X, mask_sh), \ + SF(OTG0_OTG_GSL_WINDOW_Y, OTG_GSL_WINDOW_START_Y, mask_sh),\ + SF(OTG0_OTG_GSL_WINDOW_Y, OTG_GSL_WINDOW_END_Y, mask_sh),\ + SF(OTG0_OTG_VUPDATE_KEEPOUT, OTG_MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_EN, mask_sh), \ + SF(OTG0_OTG_VUPDATE_KEEPOUT, MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_START_OFFSET, mask_sh), \ + SF(OTG0_OTG_VUPDATE_KEEPOUT, MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_END_OFFSET, mask_sh), \ + SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_MASTER_MODE, mask_sh), \ + SF(OTG0_OTG_GSL_CONTROL, OTG_MASTER_UPDATE_LOCK_GSL_EN, mask_sh), \ + SF(OTG0_OTG_DSC_START_POSITION, OTG_DSC_START_POSITION_X, mask_sh), \ + SF(OTG0_OTG_DSC_START_POSITION, OTG_DSC_START_POSITION_LINE_NUM, mask_sh),\ + SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG0_SRC_SEL, mask_sh),\ + SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG1_SRC_SEL, mask_sh),\ + SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG2_SRC_SEL, mask_sh),\ + SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG3_SRC_SEL, mask_sh),\ + SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_NUM_OF_INPUT_SEGMENT, mask_sh),\ + SF(ODM0_OPTC_MEMORY_CONFIG, OPTC_MEM_SEL, mask_sh),\ + SF(ODM0_OPTC_DATA_FORMAT_CONTROL, OPTC_DATA_FORMAT, mask_sh),\ + SF(ODM0_OPTC_DATA_FORMAT_CONTROL, OPTC_DSC_MODE, mask_sh),\ + SF(ODM0_OPTC_BYTES_PER_PIXEL, OPTC_DSC_BYTES_PER_PIXEL, mask_sh),\ + SF(ODM0_OPTC_WIDTH_CONTROL, OPTC_DSC_SLICE_WIDTH, mask_sh),\ + SF(ODM0_OPTC_WIDTH_CONTROL, OPTC_SEGMENT_WIDTH, mask_sh),\ + SF(OTG0_OTG_DRR_TRIGGER_WINDOW, OTG_DRR_TRIGGER_WINDOW_START_X, mask_sh),\ + SF(OTG0_OTG_DRR_TRIGGER_WINDOW, OTG_DRR_TRIGGER_WINDOW_END_X, mask_sh),\ + SF(OTG0_OTG_DRR_V_TOTAL_CHANGE, OTG_DRR_V_TOTAL_CHANGE_LIMIT, mask_sh),\ + SF(OTG0_OTG_H_TIMING_CNTL, OTG_H_TIMING_DIV_MODE, mask_sh),\ + SF(OTG0_OTG_H_TIMING_CNTL, OTG_H_TIMING_DIV_MODE_MANUAL, mask_sh),\ + SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_DRR_TIMING_DBUF_UPDATE_MODE, mask_sh),\ + SF(OTG0_OTG_DRR_CONTROL, OTG_V_TOTAL_LAST_USED_BY_DRR, mask_sh) + +void dcn314_timing_generator_init(struct optc *optc1); + +#endif /* __DC_OPTC_DCN314_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c new file mode 100644 index 000000000000..a2c4db2cebdd --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c @@ -0,0 +1,357 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dcn32_optc.h" + +#include "dcn30/dcn30_optc.h" +#include "dcn31/dcn31_optc.h" +#include "reg_helper.h" +#include "dc.h" +#include "dcn_calc_math.h" +#include "dc_dmub_srv.h" + +#define REG(reg)\ + optc1->tg_regs->reg + +#define CTX \ + optc1->base.ctx + +#undef FN +#define FN(reg_name, field_name) \ + optc1->tg_shift->field_name, optc1->tg_mask->field_name + +static void optc32_set_odm_combine(struct timing_generator *optc, int *opp_id, int opp_cnt, + struct dc_crtc_timing *timing) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + uint32_t memory_mask = 0; + int h_active = timing->h_addressable + timing->h_border_left + timing->h_border_right; + int mpcc_hactive = h_active / opp_cnt; + /* Each memory instance is 2048x(32x2) bits to support half line of 4096 */ + int odm_mem_count = (h_active + 2047) / 2048; + + /* + * display <= 4k : 2 memories + 2 pipes + * 4k < display <= 8k : 4 memories + 2 pipes + * 8k < display <= 12k : 6 memories + 4 pipes + */ + if (opp_cnt == 4) { + if (odm_mem_count <= 2) + memory_mask = 0x3; + else if (odm_mem_count <= 4) + memory_mask = 0xf; + else + memory_mask = 0x3f; + } else { + if (odm_mem_count <= 2) + memory_mask = 0x1 << (opp_id[0] * 2) | 0x1 << (opp_id[1] * 2); + else if (odm_mem_count <= 4) + memory_mask = 0x3 << (opp_id[0] * 2) | 0x3 << (opp_id[1] * 2); + else + memory_mask = 0x77; + } + + REG_SET(OPTC_MEMORY_CONFIG, 0, + OPTC_MEM_SEL, memory_mask); + + if (opp_cnt == 2) { + REG_SET_3(OPTC_DATA_SOURCE_SELECT, 0, + OPTC_NUM_OF_INPUT_SEGMENT, 1, + OPTC_SEG0_SRC_SEL, opp_id[0], + OPTC_SEG1_SRC_SEL, opp_id[1]); + } else if (opp_cnt == 4) { + REG_SET_5(OPTC_DATA_SOURCE_SELECT, 0, + OPTC_NUM_OF_INPUT_SEGMENT, 3, + OPTC_SEG0_SRC_SEL, opp_id[0], + OPTC_SEG1_SRC_SEL, opp_id[1], + OPTC_SEG2_SRC_SEL, opp_id[2], + OPTC_SEG3_SRC_SEL, opp_id[3]); + } + + REG_UPDATE(OPTC_WIDTH_CONTROL, + OPTC_SEGMENT_WIDTH, mpcc_hactive); + + REG_UPDATE(OTG_H_TIMING_CNTL, + OTG_H_TIMING_DIV_MODE, opp_cnt - 1); + optc1->opp_count = opp_cnt; +} + +void optc32_get_odm_combine_segments(struct timing_generator *tg, int *odm_combine_segments) +{ + struct optc *optc1 = DCN10TG_FROM_TG(tg); + int segments; + + REG_GET(OPTC_DATA_SOURCE_SELECT, OPTC_NUM_OF_INPUT_SEGMENT, &segments); + + switch (segments) { + case 0: + *odm_combine_segments = 1; + break; + case 1: + *odm_combine_segments = 2; + break; + case 3: + *odm_combine_segments = 4; + break; + /* 2 is reserved */ + case 2: + default: + *odm_combine_segments = -1; + } +} + +void optc32_set_h_timing_div_manual_mode(struct timing_generator *optc, bool manual_mode) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_UPDATE(OTG_H_TIMING_CNTL, + OTG_H_TIMING_DIV_MODE_MANUAL, manual_mode ? 1 : 0); +} +/** + * optc32_enable_crtc() - Enable CRTC - call ASIC Control Object to enable Timing generator. + * + * @optc: timing_generator instance. + * + * Return: If CRTC is enabled, return true. + */ +static bool optc32_enable_crtc(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + /* opp instance for OTG, 1 to 1 mapping and odm will adjust */ + REG_UPDATE(OPTC_DATA_SOURCE_SELECT, + OPTC_SEG0_SRC_SEL, optc->inst); + + /* VTG enable first is for HW workaround */ + REG_UPDATE(CONTROL, + VTG0_ENABLE, 1); + + REG_SEQ_START(); + + /* Enable CRTC */ + REG_UPDATE_2(OTG_CONTROL, + OTG_DISABLE_POINT_CNTL, 2, + OTG_MASTER_EN, 1); + + REG_SEQ_SUBMIT(); + REG_SEQ_WAIT_DONE(); + + return true; +} + +/* disable_crtc */ +static bool optc32_disable_crtc(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + /* disable otg request until end of the first line + * in the vertical blank region + */ + REG_UPDATE(OTG_CONTROL, + OTG_MASTER_EN, 0); + + REG_UPDATE(CONTROL, + VTG0_ENABLE, 0); + + /* CRTC disabled, so disable clock. */ + REG_WAIT(OTG_CLOCK_CONTROL, + OTG_BUSY, 0, + 1, 150000); + + return true; +} + +static void optc32_phantom_crtc_post_enable(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + /* Disable immediately. */ + REG_UPDATE_2(OTG_CONTROL, OTG_DISABLE_POINT_CNTL, 0, OTG_MASTER_EN, 0); + + /* CRTC disabled, so disable clock. */ + REG_WAIT(OTG_CLOCK_CONTROL, OTG_BUSY, 0, 1, 100000); +} + +static void optc32_disable_phantom_otg(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_UPDATE(OTG_CONTROL, OTG_MASTER_EN, 0); +} + +void optc32_set_odm_bypass(struct timing_generator *optc, + const struct dc_crtc_timing *dc_crtc_timing) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + enum h_timing_div_mode h_div = H_TIMING_NO_DIV; + + REG_SET_5(OPTC_DATA_SOURCE_SELECT, 0, + OPTC_NUM_OF_INPUT_SEGMENT, 0, + OPTC_SEG0_SRC_SEL, optc->inst, + OPTC_SEG1_SRC_SEL, 0xf, + OPTC_SEG2_SRC_SEL, 0xf, + OPTC_SEG3_SRC_SEL, 0xf + ); + + h_div = optc1_is_two_pixels_per_containter(dc_crtc_timing); + REG_UPDATE(OTG_H_TIMING_CNTL, + OTG_H_TIMING_DIV_MODE, h_div); + + REG_SET(OPTC_MEMORY_CONFIG, 0, + OPTC_MEM_SEL, 0); + optc1->opp_count = 1; +} + +static void optc32_setup_manual_trigger(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + struct dc *dc = optc->ctx->dc; + + if (dc->caps.dmub_caps.mclk_sw && !dc->debug.disable_fams) + dc_dmub_srv_set_drr_manual_trigger_cmd(dc, optc->inst); + else { + /* + * MIN_MASK_EN is gone and MASK is now always enabled. + * + * To get it to it work with manual trigger we need to make sure + * we program the correct bit. + */ + REG_UPDATE_4(OTG_V_TOTAL_CONTROL, + OTG_V_TOTAL_MIN_SEL, 1, + OTG_V_TOTAL_MAX_SEL, 1, + OTG_FORCE_LOCK_ON_EVENT, 0, + OTG_SET_V_TOTAL_MIN_MASK, (1 << 1)); /* TRIGA */ + + // Setup manual flow control for EOF via TRIG_A + optc->funcs->setup_manual_trigger(optc); + } +} + +static void optc32_set_drr( + struct timing_generator *optc, + const struct drr_params *params) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + if (params != NULL && + params->vertical_total_max > 0 && + params->vertical_total_min > 0) { + + if (params->vertical_total_mid != 0) { + + REG_SET(OTG_V_TOTAL_MID, 0, + OTG_V_TOTAL_MID, params->vertical_total_mid - 1); + + REG_UPDATE_2(OTG_V_TOTAL_CONTROL, + OTG_VTOTAL_MID_REPLACING_MAX_EN, 1, + OTG_VTOTAL_MID_FRAME_NUM, + (uint8_t)params->vertical_total_mid_frame_num); + + } + + optc->funcs->set_vtotal_min_max(optc, params->vertical_total_min - 1, params->vertical_total_max - 1); + } + + optc32_setup_manual_trigger(optc); +} + +static struct timing_generator_funcs dcn32_tg_funcs = { + .validate_timing = optc1_validate_timing, + .program_timing = optc1_program_timing, + .setup_vertical_interrupt0 = optc1_setup_vertical_interrupt0, + .setup_vertical_interrupt1 = optc1_setup_vertical_interrupt1, + .setup_vertical_interrupt2 = optc1_setup_vertical_interrupt2, + .program_global_sync = optc1_program_global_sync, + .enable_crtc = optc32_enable_crtc, + .disable_crtc = optc32_disable_crtc, + .phantom_crtc_post_enable = optc32_phantom_crtc_post_enable, + .disable_phantom_crtc = optc32_disable_phantom_otg, + /* used by enable_timing_synchronization. Not need for FPGA */ + .is_counter_moving = optc1_is_counter_moving, + .get_position = optc1_get_position, + .get_frame_count = optc1_get_vblank_counter, + .get_scanoutpos = optc1_get_crtc_scanoutpos, + .get_otg_active_size = optc1_get_otg_active_size, + .set_early_control = optc1_set_early_control, + /* used by enable_timing_synchronization. Not need for FPGA */ + .wait_for_state = optc1_wait_for_state, + .set_blank_color = optc3_program_blank_color, + .did_triggered_reset_occur = optc1_did_triggered_reset_occur, + .triplebuffer_lock = optc3_triplebuffer_lock, + .triplebuffer_unlock = optc2_triplebuffer_unlock, + .enable_reset_trigger = optc1_enable_reset_trigger, + .enable_crtc_reset = optc1_enable_crtc_reset, + .disable_reset_trigger = optc1_disable_reset_trigger, + .lock = optc3_lock, + .unlock = optc1_unlock, + .lock_doublebuffer_enable = optc3_lock_doublebuffer_enable, + .lock_doublebuffer_disable = optc3_lock_doublebuffer_disable, + .enable_optc_clock = optc1_enable_optc_clock, + .set_drr = optc32_set_drr, + .get_last_used_drr_vtotal = optc2_get_last_used_drr_vtotal, + .set_vtotal_min_max = optc3_set_vtotal_min_max, + .set_static_screen_control = optc1_set_static_screen_control, + .program_stereo = optc1_program_stereo, + .is_stereo_left_eye = optc1_is_stereo_left_eye, + .tg_init = optc3_tg_init, + .is_tg_enabled = optc1_is_tg_enabled, + .is_optc_underflow_occurred = optc1_is_optc_underflow_occurred, + .clear_optc_underflow = optc1_clear_optc_underflow, + .setup_global_swap_lock = NULL, + .get_crc = optc1_get_crc, + .configure_crc = optc1_configure_crc, + .set_dsc_config = optc3_set_dsc_config, + .get_dsc_status = optc2_get_dsc_status, + .set_dwb_source = NULL, + .set_odm_bypass = optc32_set_odm_bypass, + .set_odm_combine = optc32_set_odm_combine, + .get_odm_combine_segments = optc32_get_odm_combine_segments, + .set_h_timing_div_manual_mode = optc32_set_h_timing_div_manual_mode, + .get_optc_source = optc2_get_optc_source, + .set_out_mux = optc3_set_out_mux, + .set_drr_trigger_window = optc3_set_drr_trigger_window, + .set_vtotal_change_limit = optc3_set_vtotal_change_limit, + .set_gsl = optc2_set_gsl, + .set_gsl_source_select = optc2_set_gsl_source_select, + .set_vtg_params = optc1_set_vtg_params, + .program_manual_trigger = optc2_program_manual_trigger, + .setup_manual_trigger = optc2_setup_manual_trigger, + .get_hw_timing = optc1_get_hw_timing, +}; + +void dcn32_timing_generator_init(struct optc *optc1) +{ + optc1->base.funcs = &dcn32_tg_funcs; + + optc1->max_h_total = optc1->tg_mask->OTG_H_TOTAL + 1; + optc1->max_v_total = optc1->tg_mask->OTG_V_TOTAL + 1; + + optc1->min_h_blank = 32; + optc1->min_v_blank = 3; + optc1->min_v_blank_interlace = 5; + optc1->min_h_sync_width = 4; + optc1->min_v_sync_width = 1; +} + diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.h new file mode 100644 index 000000000000..8ce3b178cab0 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.h @@ -0,0 +1,187 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DC_OPTC_DCN32_H__ +#define __DC_OPTC_DCN32_H__ + +#include "dcn10/dcn10_optc.h" + +#define OPTC_COMMON_MASK_SH_LIST_DCN3_2(mask_sh)\ + SF(OTG0_OTG_VSTARTUP_PARAM, VSTARTUP_START, mask_sh),\ + SF(OTG0_OTG_VUPDATE_PARAM, VUPDATE_OFFSET, mask_sh),\ + SF(OTG0_OTG_VUPDATE_PARAM, VUPDATE_WIDTH, mask_sh),\ + SF(OTG0_OTG_VREADY_PARAM, VREADY_OFFSET, mask_sh),\ + SF(OTG0_OTG_MASTER_UPDATE_LOCK, OTG_MASTER_UPDATE_LOCK, mask_sh),\ + SF(OTG0_OTG_MASTER_UPDATE_LOCK, UPDATE_LOCK_STATUS, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL0, MASTER_UPDATE_LOCK_DB_START_X, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL0, MASTER_UPDATE_LOCK_DB_END_X, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL0, MASTER_UPDATE_LOCK_DB_EN, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL1, MASTER_UPDATE_LOCK_DB_START_Y, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL1, MASTER_UPDATE_LOCK_DB_END_Y, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL2, OTG_MASTER_UPDATE_LOCK_SEL, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL4, DIG_UPDATE_POSITION_X, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL4, DIG_UPDATE_POSITION_Y, mask_sh),\ + SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_UPDATE_PENDING, mask_sh),\ + SF(OTG0_OTG_H_TOTAL, OTG_H_TOTAL, mask_sh),\ + SF(OTG0_OTG_H_BLANK_START_END, OTG_H_BLANK_START, mask_sh),\ + SF(OTG0_OTG_H_BLANK_START_END, OTG_H_BLANK_END, mask_sh),\ + SF(OTG0_OTG_H_SYNC_A, OTG_H_SYNC_A_START, mask_sh),\ + SF(OTG0_OTG_H_SYNC_A, OTG_H_SYNC_A_END, mask_sh),\ + SF(OTG0_OTG_H_SYNC_A_CNTL, OTG_H_SYNC_A_POL, mask_sh),\ + SF(OTG0_OTG_V_TOTAL, OTG_V_TOTAL, mask_sh),\ + SF(OTG0_OTG_V_BLANK_START_END, OTG_V_BLANK_START, mask_sh),\ + SF(OTG0_OTG_V_BLANK_START_END, OTG_V_BLANK_END, mask_sh),\ + SF(OTG0_OTG_V_SYNC_A, OTG_V_SYNC_A_START, mask_sh),\ + SF(OTG0_OTG_V_SYNC_A, OTG_V_SYNC_A_END, mask_sh),\ + SF(OTG0_OTG_V_SYNC_A_CNTL, OTG_V_SYNC_A_POL, mask_sh),\ + SF(OTG0_OTG_V_SYNC_A_CNTL, OTG_V_SYNC_MODE, mask_sh),\ + SF(OTG0_OTG_CONTROL, OTG_MASTER_EN, mask_sh),\ + SF(OTG0_OTG_CONTROL, OTG_START_POINT_CNTL, mask_sh),\ + SF(OTG0_OTG_CONTROL, OTG_DISABLE_POINT_CNTL, mask_sh),\ + SF(OTG0_OTG_CONTROL, OTG_FIELD_NUMBER_CNTL, mask_sh),\ + SF(OTG0_OTG_CONTROL, OTG_OUT_MUX, mask_sh),\ + SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_EN, mask_sh),\ + SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_SYNC_OUTPUT_LINE_NUM, mask_sh),\ + SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_SYNC_OUTPUT_POLARITY, mask_sh),\ + SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_EYE_FLAG_POLARITY, mask_sh),\ + SF(OTG0_OTG_STEREO_CONTROL, OTG_DISABLE_STEREOSYNC_OUTPUT_FOR_DP, mask_sh),\ + SF(OTG0_OTG_STEREO_STATUS, OTG_STEREO_CURRENT_EYE, mask_sh),\ + SF(OTG0_OTG_3D_STRUCTURE_CONTROL, OTG_3D_STRUCTURE_EN, mask_sh),\ + SF(OTG0_OTG_3D_STRUCTURE_CONTROL, OTG_3D_STRUCTURE_V_UPDATE_MODE, mask_sh),\ + SF(OTG0_OTG_3D_STRUCTURE_CONTROL, OTG_3D_STRUCTURE_STEREO_SEL_OVR, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_MAX, OTG_V_TOTAL_MAX, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_MIN, OTG_V_TOTAL_MIN, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_V_TOTAL_MIN_SEL, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_V_TOTAL_MAX_SEL, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_FORCE_LOCK_ON_EVENT, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_SET_V_TOTAL_MIN_MASK, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_VTOTAL_MID_REPLACING_MIN_EN, mask_sh),\ + SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_VTOTAL_MID_REPLACING_MAX_EN, mask_sh),\ + SF(OTG0_OTG_FORCE_COUNT_NOW_CNTL, OTG_FORCE_COUNT_NOW_CLEAR, mask_sh),\ + SF(OTG0_OTG_FORCE_COUNT_NOW_CNTL, OTG_FORCE_COUNT_NOW_MODE, mask_sh),\ + SF(OTG0_OTG_FORCE_COUNT_NOW_CNTL, OTG_FORCE_COUNT_NOW_OCCURRED, mask_sh),\ + SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_SOURCE_SELECT, mask_sh),\ + SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_SOURCE_PIPE_SELECT, mask_sh),\ + SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_RISING_EDGE_DETECT_CNTL, mask_sh),\ + SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_FALLING_EDGE_DETECT_CNTL, mask_sh),\ + SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_POLARITY_SELECT, mask_sh),\ + SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_FREQUENCY_SELECT, mask_sh),\ + SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_DELAY, mask_sh),\ + SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_CLEAR, mask_sh),\ + SF(OTG0_OTG_STATIC_SCREEN_CONTROL, OTG_STATIC_SCREEN_EVENT_MASK, mask_sh),\ + SF(OTG0_OTG_STATIC_SCREEN_CONTROL, OTG_STATIC_SCREEN_FRAME_COUNT, mask_sh),\ + SF(OTG0_OTG_STATUS_FRAME_COUNT, OTG_FRAME_COUNT, mask_sh),\ + SF(OTG0_OTG_STATUS, OTG_V_BLANK, mask_sh),\ + SF(OTG0_OTG_STATUS, OTG_V_ACTIVE_DISP, mask_sh),\ + SF(OTG0_OTG_STATUS_POSITION, OTG_HORZ_COUNT, mask_sh),\ + SF(OTG0_OTG_STATUS_POSITION, OTG_VERT_COUNT, mask_sh),\ + SF(OTG0_OTG_NOM_VERT_POSITION, OTG_VERT_COUNT_NOM, mask_sh),\ + SF(OTG0_OTG_M_CONST_DTO0, OTG_M_CONST_DTO_PHASE, mask_sh),\ + SF(OTG0_OTG_M_CONST_DTO1, OTG_M_CONST_DTO_MODULO, mask_sh),\ + SF(OTG0_OTG_CLOCK_CONTROL, OTG_BUSY, mask_sh),\ + SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_EN, mask_sh),\ + SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_ON, mask_sh),\ + SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_GATE_DIS, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT0_CONTROL, OTG_VERTICAL_INTERRUPT0_INT_ENABLE, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT0_POSITION, OTG_VERTICAL_INTERRUPT0_LINE_START, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT0_POSITION, OTG_VERTICAL_INTERRUPT0_LINE_END, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT1_CONTROL, OTG_VERTICAL_INTERRUPT1_INT_ENABLE, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT1_POSITION, OTG_VERTICAL_INTERRUPT1_LINE_START, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT2_CONTROL, OTG_VERTICAL_INTERRUPT2_INT_ENABLE, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT2_POSITION, OTG_VERTICAL_INTERRUPT2_LINE_START, mask_sh),\ + SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_EN, mask_sh),\ + SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_ON, mask_sh),\ + SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_GATE_DIS, mask_sh),\ + SF(ODM0_OPTC_INPUT_GLOBAL_CONTROL, OPTC_UNDERFLOW_OCCURRED_STATUS, mask_sh),\ + SF(ODM0_OPTC_INPUT_GLOBAL_CONTROL, OPTC_UNDERFLOW_CLEAR, mask_sh),\ + SF(VTG0_CONTROL, VTG0_ENABLE, mask_sh),\ + SF(VTG0_CONTROL, VTG0_FP2, mask_sh),\ + SF(VTG0_CONTROL, VTG0_VCOUNT_INIT, mask_sh),\ + SF(OTG0_OTG_VERT_SYNC_CONTROL, OTG_FORCE_VSYNC_NEXT_LINE_OCCURRED, mask_sh),\ + SF(OTG0_OTG_VERT_SYNC_CONTROL, OTG_FORCE_VSYNC_NEXT_LINE_CLEAR, mask_sh),\ + SF(OTG0_OTG_VERT_SYNC_CONTROL, OTG_AUTO_FORCE_VSYNC_MODE, mask_sh),\ + SF(OTG0_OTG_GSL_CONTROL, OTG_GSL0_EN, mask_sh),\ + SF(OTG0_OTG_GSL_CONTROL, OTG_GSL1_EN, mask_sh),\ + SF(OTG0_OTG_GSL_CONTROL, OTG_GSL2_EN, mask_sh),\ + SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_MASTER_EN, mask_sh),\ + SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_FORCE_DELAY, mask_sh),\ + SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_CHECK_ALL_FIELDS, mask_sh),\ + SF(OTG0_OTG_CRC_CNTL, OTG_CRC_CONT_EN, mask_sh),\ + SF(OTG0_OTG_CRC_CNTL, OTG_CRC0_SELECT, mask_sh),\ + SF(OTG0_OTG_CRC_CNTL, OTG_CRC_EN, mask_sh),\ + SF(OTG0_OTG_CRC0_DATA_RG, CRC0_R_CR, mask_sh),\ + SF(OTG0_OTG_CRC0_DATA_RG, CRC0_G_Y, mask_sh),\ + SF(OTG0_OTG_CRC0_DATA_B, CRC0_B_CB, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWA_X_CONTROL, OTG_CRC0_WINDOWA_X_START, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWA_X_CONTROL, OTG_CRC0_WINDOWA_X_END, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWA_Y_CONTROL, OTG_CRC0_WINDOWA_Y_START, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWA_Y_CONTROL, OTG_CRC0_WINDOWA_Y_END, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWB_X_CONTROL, OTG_CRC0_WINDOWB_X_START, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWB_X_CONTROL, OTG_CRC0_WINDOWB_X_END, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWB_Y_CONTROL, OTG_CRC0_WINDOWB_Y_START, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWB_Y_CONTROL, OTG_CRC0_WINDOWB_Y_END, mask_sh),\ + SF(OTG0_OTG_TRIGA_MANUAL_TRIG, OTG_TRIGA_MANUAL_TRIG, mask_sh),\ + SF(GSL_SOURCE_SELECT, GSL0_READY_SOURCE_SEL, mask_sh),\ + SF(GSL_SOURCE_SELECT, GSL1_READY_SOURCE_SEL, mask_sh),\ + SF(GSL_SOURCE_SELECT, GSL2_READY_SOURCE_SEL, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL2, MANUAL_FLOW_CONTROL_SEL, mask_sh),\ + SF(OTG0_OTG_GLOBAL_CONTROL2, GLOBAL_UPDATE_LOCK_EN, mask_sh),\ + SF(OTG0_OTG_GSL_WINDOW_X, OTG_GSL_WINDOW_START_X, mask_sh),\ + SF(OTG0_OTG_GSL_WINDOW_X, OTG_GSL_WINDOW_END_X, mask_sh), \ + SF(OTG0_OTG_GSL_WINDOW_Y, OTG_GSL_WINDOW_START_Y, mask_sh),\ + SF(OTG0_OTG_GSL_WINDOW_Y, OTG_GSL_WINDOW_END_Y, mask_sh),\ + SF(OTG0_OTG_VUPDATE_KEEPOUT, OTG_MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_EN, mask_sh), \ + SF(OTG0_OTG_VUPDATE_KEEPOUT, MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_START_OFFSET, mask_sh), \ + SF(OTG0_OTG_VUPDATE_KEEPOUT, MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_END_OFFSET, mask_sh), \ + SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_MASTER_MODE, mask_sh), \ + SF(OTG0_OTG_GSL_CONTROL, OTG_MASTER_UPDATE_LOCK_GSL_EN, mask_sh), \ + SF(OTG0_OTG_DSC_START_POSITION, OTG_DSC_START_POSITION_X, mask_sh), \ + SF(OTG0_OTG_DSC_START_POSITION, OTG_DSC_START_POSITION_LINE_NUM, mask_sh),\ + SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG0_SRC_SEL, mask_sh),\ + SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG1_SRC_SEL, mask_sh),\ + SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG2_SRC_SEL, mask_sh),\ + SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG3_SRC_SEL, mask_sh),\ + SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_NUM_OF_INPUT_SEGMENT, mask_sh),\ + SF(ODM0_OPTC_MEMORY_CONFIG, OPTC_MEM_SEL, mask_sh),\ + SF(ODM0_OPTC_DATA_FORMAT_CONTROL, OPTC_DATA_FORMAT, mask_sh),\ + SF(ODM0_OPTC_DATA_FORMAT_CONTROL, OPTC_DSC_MODE, mask_sh),\ + SF(ODM0_OPTC_BYTES_PER_PIXEL, OPTC_DSC_BYTES_PER_PIXEL, mask_sh),\ + SF(ODM0_OPTC_WIDTH_CONTROL, OPTC_DSC_SLICE_WIDTH, mask_sh),\ + SF(ODM0_OPTC_WIDTH_CONTROL, OPTC_SEGMENT_WIDTH, mask_sh),\ + SF(OTG0_OTG_DRR_TRIGGER_WINDOW, OTG_DRR_TRIGGER_WINDOW_START_X, mask_sh),\ + SF(OTG0_OTG_DRR_TRIGGER_WINDOW, OTG_DRR_TRIGGER_WINDOW_END_X, mask_sh),\ + SF(OTG0_OTG_DRR_V_TOTAL_CHANGE, OTG_DRR_V_TOTAL_CHANGE_LIMIT, mask_sh),\ + SF(OTG0_OTG_H_TIMING_CNTL, OTG_H_TIMING_DIV_MODE, mask_sh),\ + SF(OTG0_OTG_H_TIMING_CNTL, OTG_H_TIMING_DIV_MODE_MANUAL, mask_sh),\ + SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_DRR_TIMING_DBUF_UPDATE_MODE, mask_sh),\ + SF(OTG0_OTG_DRR_CONTROL, OTG_V_TOTAL_LAST_USED_BY_DRR, mask_sh) + +void dcn32_timing_generator_init(struct optc *optc1); +void optc32_set_h_timing_div_manual_mode(struct timing_generator *optc, bool manual_mode); +void optc32_get_odm_combine_segments(struct timing_generator *tg, int *odm_combine_segments); +void optc32_set_odm_bypass(struct timing_generator *optc, + const struct dc_crtc_timing *dc_crtc_timing); + +#endif /* __DC_OPTC_DCN32_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c new file mode 100644 index 000000000000..a4a39f1638cf --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c @@ -0,0 +1,290 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dcn35_optc.h" + +#include "dcn30/dcn30_optc.h" +#include "dcn31/dcn31_optc.h" +#include "dcn32/dcn32_optc.h" +#include "reg_helper.h" +#include "dc.h" +#include "dcn_calc_math.h" + +#define REG(reg)\ + optc1->tg_regs->reg + +#define CTX \ + optc1->base.ctx + +#undef FN +#define FN(reg_name, field_name) \ + optc1->tg_shift->field_name, optc1->tg_mask->field_name + +/** + * optc35_set_odm_combine() - Enable CRTC - call ASIC Control Object to enable Timing generator. + * + * @optc: Output Pipe Timing Combine instance reference. + * @opp_id: Output Plane Processor instance ID. + * @opp_cnt: Output Plane Processor count. + * @timing: Timing parameters used to configure DCN blocks. + * + * Return: void. + */ +static void optc35_set_odm_combine(struct timing_generator *optc, int *opp_id, int opp_cnt, + struct dc_crtc_timing *timing) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + uint32_t memory_mask = 0; + int h_active = timing->h_addressable + timing->h_border_left + timing->h_border_right; + int mpcc_hactive = h_active / opp_cnt; + /* Each memory instance is 2048x(314x2) bits to support half line of 4096 */ + int odm_mem_count = (h_active + 2047) / 2048; + + /* + * display <= 4k : 2 memories + 2 pipes + * 4k < display <= 8k : 4 memories + 2 pipes + * 8k < display <= 12k : 6 memories + 4 pipes + */ + if (opp_cnt == 4) { + if (odm_mem_count <= 2) + memory_mask = 0x3; + else if (odm_mem_count <= 4) + memory_mask = 0xf; + else + memory_mask = 0x3f; + } else { + if (odm_mem_count <= 2) + memory_mask = 0x1 << (opp_id[0] * 2) | 0x1 << (opp_id[1] * 2); + else if (odm_mem_count <= 4) + memory_mask = 0x3 << (opp_id[0] * 2) | 0x3 << (opp_id[1] * 2); + else + memory_mask = 0x77; + } + + REG_SET(OPTC_MEMORY_CONFIG, 0, + OPTC_MEM_SEL, memory_mask); + + if (opp_cnt == 2) { + REG_SET_3(OPTC_DATA_SOURCE_SELECT, 0, + OPTC_NUM_OF_INPUT_SEGMENT, 1, + OPTC_SEG0_SRC_SEL, opp_id[0], + OPTC_SEG1_SRC_SEL, opp_id[1]); + } else if (opp_cnt == 4) { + REG_SET_5(OPTC_DATA_SOURCE_SELECT, 0, + OPTC_NUM_OF_INPUT_SEGMENT, 3, + OPTC_SEG0_SRC_SEL, opp_id[0], + OPTC_SEG1_SRC_SEL, opp_id[1], + OPTC_SEG2_SRC_SEL, opp_id[2], + OPTC_SEG3_SRC_SEL, opp_id[3]); + } + + REG_UPDATE(OPTC_WIDTH_CONTROL, + OPTC_SEGMENT_WIDTH, mpcc_hactive); + + REG_UPDATE(OTG_H_TIMING_CNTL, OTG_H_TIMING_DIV_MODE, opp_cnt - 1); + optc1->opp_count = opp_cnt; +} + +static bool optc35_enable_crtc(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + /* opp instance for OTG, 1 to 1 mapping and odm will adjust */ + REG_UPDATE(OPTC_DATA_SOURCE_SELECT, + OPTC_SEG0_SRC_SEL, optc->inst); + + /* VTG enable first is for HW workaround */ + REG_UPDATE(CONTROL, + VTG0_ENABLE, 1); + + REG_SEQ_START(); + + /* Enable CRTC */ + REG_UPDATE_2(OTG_CONTROL, + OTG_DISABLE_POINT_CNTL, 2, + OTG_MASTER_EN, 1); + + REG_SEQ_SUBMIT(); + REG_SEQ_WAIT_DONE(); + + return true; +} + +/* disable_crtc */ +static bool optc35_disable_crtc(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + /* disable otg request until end of the first line + * in the vertical blank region + */ + REG_UPDATE(OTG_CONTROL, + OTG_MASTER_EN, 0); + + REG_UPDATE(CONTROL, + VTG0_ENABLE, 0); + + /* CRTC disabled, so disable clock. */ + REG_WAIT(OTG_CLOCK_CONTROL, + OTG_BUSY, 0, + 1, 100000); + optc1_clear_optc_underflow(optc); + + return true; +} + +static void optc35_phantom_crtc_post_enable(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + /* Disable immediately. */ + REG_UPDATE_2(OTG_CONTROL, OTG_DISABLE_POINT_CNTL, 0, OTG_MASTER_EN, 0); + + /* CRTC disabled, so disable clock. */ + REG_WAIT(OTG_CLOCK_CONTROL, OTG_BUSY, 0, 1, 100000); +} + +static bool optc35_configure_crc(struct timing_generator *optc, + const struct crc_params *params) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + if (!optc1_is_tg_enabled(optc)) + return false; + REG_WRITE(OTG_CRC_CNTL, 0); + if (!params->enable) + return true; + REG_UPDATE_2(OTG_CRC0_WINDOWA_X_CONTROL, + OTG_CRC0_WINDOWA_X_START, params->windowa_x_start, + OTG_CRC0_WINDOWA_X_END, params->windowa_x_end); + REG_UPDATE_2(OTG_CRC0_WINDOWA_Y_CONTROL, + OTG_CRC0_WINDOWA_Y_START, params->windowa_y_start, + OTG_CRC0_WINDOWA_Y_END, params->windowa_y_end); + REG_UPDATE_2(OTG_CRC0_WINDOWB_X_CONTROL, + OTG_CRC0_WINDOWB_X_START, params->windowb_x_start, + OTG_CRC0_WINDOWB_X_END, params->windowb_x_end); + REG_UPDATE_2(OTG_CRC0_WINDOWB_Y_CONTROL, + OTG_CRC0_WINDOWB_Y_START, params->windowb_y_start, + OTG_CRC0_WINDOWB_Y_END, params->windowb_y_end); + if (optc1->base.ctx->dc->debug.otg_crc_db && optc1->tg_mask->OTG_CRC_WINDOW_DB_EN != 0) { + REG_UPDATE_4(OTG_CRC_CNTL, + OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0, + OTG_CRC0_SELECT, params->selection, + OTG_CRC_EN, 1, + OTG_CRC_WINDOW_DB_EN, 1); + } else + REG_UPDATE_3(OTG_CRC_CNTL, + OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0, + OTG_CRC0_SELECT, params->selection, + OTG_CRC_EN, 1); + return true; +} + +static struct timing_generator_funcs dcn35_tg_funcs = { + .validate_timing = optc1_validate_timing, + .program_timing = optc1_program_timing, + .setup_vertical_interrupt0 = optc1_setup_vertical_interrupt0, + .setup_vertical_interrupt1 = optc1_setup_vertical_interrupt1, + .setup_vertical_interrupt2 = optc1_setup_vertical_interrupt2, + .program_global_sync = optc1_program_global_sync, + .enable_crtc = optc35_enable_crtc, + .disable_crtc = optc35_disable_crtc, + .immediate_disable_crtc = optc31_immediate_disable_crtc, + .phantom_crtc_post_enable = optc35_phantom_crtc_post_enable, + /* used by enable_timing_synchronization. Not need for FPGA */ + .is_counter_moving = optc1_is_counter_moving, + .get_position = optc1_get_position, + .get_frame_count = optc1_get_vblank_counter, + .get_scanoutpos = optc1_get_crtc_scanoutpos, + .get_otg_active_size = optc1_get_otg_active_size, + .set_early_control = optc1_set_early_control, + /* used by enable_timing_synchronization. Not need for FPGA */ + .wait_for_state = optc1_wait_for_state, + .set_blank_color = optc3_program_blank_color, + .did_triggered_reset_occur = optc1_did_triggered_reset_occur, + .triplebuffer_lock = optc3_triplebuffer_lock, + .triplebuffer_unlock = optc2_triplebuffer_unlock, + .enable_reset_trigger = optc1_enable_reset_trigger, + .enable_crtc_reset = optc1_enable_crtc_reset, + .disable_reset_trigger = optc1_disable_reset_trigger, + .lock = optc3_lock, + .unlock = optc1_unlock, + .lock_doublebuffer_enable = optc3_lock_doublebuffer_enable, + .lock_doublebuffer_disable = optc3_lock_doublebuffer_disable, + .enable_optc_clock = optc1_enable_optc_clock, + .set_drr = optc31_set_drr, + .get_last_used_drr_vtotal = optc2_get_last_used_drr_vtotal, + .set_vtotal_min_max = optc1_set_vtotal_min_max, + .set_static_screen_control = optc1_set_static_screen_control, + .program_stereo = optc1_program_stereo, + .is_stereo_left_eye = optc1_is_stereo_left_eye, + .tg_init = optc3_tg_init, + .is_tg_enabled = optc1_is_tg_enabled, + .is_optc_underflow_occurred = optc1_is_optc_underflow_occurred, + .clear_optc_underflow = optc1_clear_optc_underflow, + .setup_global_swap_lock = NULL, + .get_crc = optc1_get_crc, + .configure_crc = optc35_configure_crc, + .set_dsc_config = optc3_set_dsc_config, + .get_dsc_status = optc2_get_dsc_status, + .set_dwb_source = NULL, + .set_odm_bypass = optc32_set_odm_bypass, + .set_odm_combine = optc35_set_odm_combine, + .get_optc_source = optc2_get_optc_source, + .set_h_timing_div_manual_mode = optc32_set_h_timing_div_manual_mode, + .set_out_mux = optc3_set_out_mux, + .set_drr_trigger_window = optc3_set_drr_trigger_window, + .set_vtotal_change_limit = optc3_set_vtotal_change_limit, + .set_gsl = optc2_set_gsl, + .set_gsl_source_select = optc2_set_gsl_source_select, + .set_vtg_params = optc1_set_vtg_params, + .program_manual_trigger = optc2_program_manual_trigger, + .setup_manual_trigger = optc2_setup_manual_trigger, + .get_hw_timing = optc1_get_hw_timing, + .init_odm = optc3_init_odm, +}; + +void dcn35_timing_generator_init(struct optc *optc1) +{ + optc1->base.funcs = &dcn35_tg_funcs; + + optc1->max_h_total = optc1->tg_mask->OTG_H_TOTAL + 1; + optc1->max_v_total = optc1->tg_mask->OTG_V_TOTAL + 1; + + optc1->min_h_blank = 32; + optc1->min_v_blank = 3; + optc1->min_v_blank_interlace = 5; + optc1->min_h_sync_width = 4; + optc1->min_v_sync_width = 1; + + dcn35_timing_generator_set_fgcg( + optc1, CTX->dc->debug.enable_fine_grain_clock_gating.bits.optc); +} + +void dcn35_timing_generator_set_fgcg(struct optc *optc1, bool enable) +{ + REG_UPDATE(OPTC_CLOCK_CONTROL, OPTC_FGCG_REP_DIS, !enable); +} diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.h new file mode 100644 index 000000000000..1f422e4c468f --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DC_OPTC_DCN35_H__ +#define __DC_OPTC_DCN35_H__ + +#include "dcn10/dcn10_optc.h" +#include "dcn32/dcn32_optc.h" +#define OPTC_COMMON_MASK_SH_LIST_DCN3_5(mask_sh)\ + OPTC_COMMON_MASK_SH_LIST_DCN3_2(mask_sh),\ + SF(OTG0_OTG_CRC_CNTL, OTG_CRC_WINDOW_DB_EN, mask_sh),\ + SF(OTG0_OTG_CRC1_DATA_RG, CRC1_R_CR, mask_sh),\ + SF(OTG0_OTG_CRC1_DATA_RG, CRC1_G_Y, mask_sh),\ + SF(OTG0_OTG_CRC1_DATA_B, CRC1_B_CB, mask_sh),\ + SF(OTG0_OTG_CRC2_DATA_RG, CRC2_R_CR, mask_sh),\ + SF(OTG0_OTG_CRC2_DATA_RG, CRC2_G_Y, mask_sh),\ + SF(OTG0_OTG_CRC2_DATA_B, CRC2_B_CB, mask_sh),\ + SF(OTG0_OTG_CRC3_DATA_RG, CRC3_R_CR, mask_sh),\ + SF(OTG0_OTG_CRC3_DATA_RG, CRC3_G_Y, mask_sh),\ + SF(OTG0_OTG_CRC3_DATA_B, CRC3_B_CB, mask_sh),\ + SF(OTG0_OTG_CRC1_WINDOWA_X_CONTROL, OTG_CRC1_WINDOWA_X_START, mask_sh),\ + SF(OTG0_OTG_CRC1_WINDOWA_X_CONTROL, OTG_CRC1_WINDOWA_X_END, mask_sh),\ + SF(OTG0_OTG_CRC1_WINDOWA_Y_CONTROL, OTG_CRC1_WINDOWA_Y_START, mask_sh),\ + SF(OTG0_OTG_CRC1_WINDOWA_Y_CONTROL, OTG_CRC1_WINDOWA_Y_END, mask_sh),\ + SF(OTG0_OTG_CRC1_WINDOWB_X_CONTROL, OTG_CRC1_WINDOWB_X_START, mask_sh),\ + SF(OTG0_OTG_CRC1_WINDOWB_X_CONTROL, OTG_CRC1_WINDOWB_X_END, mask_sh),\ + SF(OTG0_OTG_CRC1_WINDOWB_Y_CONTROL, OTG_CRC1_WINDOWB_Y_START, mask_sh),\ + SF(OTG0_OTG_CRC1_WINDOWB_Y_CONTROL, OTG_CRC1_WINDOWB_Y_END, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWA_X_CONTROL_READBACK, OTG_CRC0_WINDOWA_X_START_READBACK, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWA_X_CONTROL_READBACK, OTG_CRC0_WINDOWA_X_END_READBACK, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWA_Y_CONTROL_READBACK, OTG_CRC0_WINDOWA_Y_START_READBACK, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWA_Y_CONTROL_READBACK, OTG_CRC0_WINDOWA_Y_END_READBACK, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWB_X_CONTROL_READBACK, OTG_CRC0_WINDOWB_X_START_READBACK, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWB_X_CONTROL_READBACK, OTG_CRC0_WINDOWB_X_END_READBACK, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWB_Y_CONTROL_READBACK, OTG_CRC0_WINDOWB_Y_START_READBACK, mask_sh),\ + SF(OTG0_OTG_CRC0_WINDOWB_Y_CONTROL_READBACK, OTG_CRC0_WINDOWB_Y_END_READBACK, mask_sh),\ + SF(OTG0_OTG_CRC1_WINDOWA_X_CONTROL_READBACK, OTG_CRC1_WINDOWA_X_START_READBACK, mask_sh),\ + SF(OTG0_OTG_CRC1_WINDOWA_X_CONTROL_READBACK, OTG_CRC1_WINDOWA_X_END_READBACK, mask_sh),\ + SF(OTG0_OTG_CRC1_WINDOWA_Y_CONTROL_READBACK, OTG_CRC1_WINDOWA_Y_START_READBACK, mask_sh),\ + SF(OTG0_OTG_CRC1_WINDOWA_Y_CONTROL_READBACK, OTG_CRC1_WINDOWA_Y_END_READBACK, mask_sh),\ + SF(OTG0_OTG_CRC1_WINDOWB_X_CONTROL_READBACK, OTG_CRC1_WINDOWB_X_START_READBACK, mask_sh),\ + SF(OTG0_OTG_CRC1_WINDOWB_X_CONTROL_READBACK, OTG_CRC1_WINDOWB_X_END_READBACK, mask_sh),\ + SF(OTG0_OTG_CRC1_WINDOWB_Y_CONTROL_READBACK, OTG_CRC1_WINDOWB_Y_START_READBACK, mask_sh),\ + SF(OTG0_OTG_CRC1_WINDOWB_Y_CONTROL_READBACK, OTG_CRC1_WINDOWB_Y_END_READBACK, mask_sh),\ + SF(OPTC_CLOCK_CONTROL, OPTC_FGCG_REP_DIS, mask_sh) + +void dcn35_timing_generator_init(struct optc *optc1); + +void dcn35_timing_generator_set_fgcg(struct optc *optc1, bool enable); + +#endif /* __DC_OPTC_DCN35_H__ */ -- cgit v1.2.3 From 5fcf74e002f152db0c39a7cdafa082c952cc5640 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Fri, 3 Nov 2023 18:07:11 -0400 Subject: drm/amd/display: Update DCN35 clock table policy [Why] The new table doesn't have an implicit mapping between Fclk SOC voltage and MemClk and it currently builds the table off of number of Fclk states rather than DcfClock states. The DML table in use is not correct for functionality or power and does not align with our existing policies for DCN3x. [How] Build the table based on DcfClock with the following assumptions: 1. Raising Soc voltage is the most expensive operation, so assume that running at max DispClock or DppClock is preferable. 2. Assume that we can run at max Fclk / MemClk at any state, but restrict the maximum state to the very last entry in the table as the worst case scenario. 3. Assume that Fclk always has a 2x multiplier on DcfClock unless the table specifies something lower. Reviewed-by: Taimur Hassan Acked-by: Hamza Mahfooz Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- .../amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 77 ++++++++++++++-------- 1 file changed, 51 insertions(+), 26 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 0f3f6a9d5144..19f8d83698be 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -648,27 +648,47 @@ static unsigned int convert_wck_ratio(uint8_t wck_ratio) return 1; } +static inline uint32_t calc_dram_speed_mts(const MemPstateTable_t *entry) +{ + return entry->UClk * convert_wck_ratio(entry->WckRatio) * 2; +} + static void dcn35_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk_mgr, struct integrated_info *bios_info, DpmClocks_t_dcn35 *clock_table) { struct clk_bw_params *bw_params = clk_mgr->base.bw_params; struct clk_limit_table_entry def_max = bw_params->clk_table.entries[bw_params->clk_table.num_entries - 1]; - uint32_t max_pstate = 0, max_uclk = 0, max_fclk = 0; - uint32_t min_pstate = 0, max_dispclk = 0, max_dppclk = 0; + uint32_t max_fclk = 0, min_pstate = 0, max_dispclk = 0, max_dppclk = 0; + uint32_t max_pstate = 0, max_dram_speed_mts = 0, min_dram_speed_mts = 0; int i; + /* Determine min/max p-state values. */ for (i = 0; i < clock_table->NumMemPstatesEnabled; i++) { - if (is_valid_clock_value(clock_table->MemPstateTable[i].UClk) && - clock_table->MemPstateTable[i].UClk > max_uclk) { - max_uclk = clock_table->MemPstateTable[i].UClk; + uint32_t dram_speed_mts = calc_dram_speed_mts(&clock_table->MemPstateTable[i]); + + if (is_valid_clock_value(dram_speed_mts) && dram_speed_mts > max_dram_speed_mts) { + max_dram_speed_mts = dram_speed_mts; max_pstate = i; } } - /* We expect the table to contain at least one valid Uclk entry. */ - ASSERT(is_valid_clock_value(max_uclk)); + min_dram_speed_mts = max_dram_speed_mts; + min_pstate = max_pstate; + for (i = 0; i < clock_table->NumMemPstatesEnabled; i++) { + uint32_t dram_speed_mts = calc_dram_speed_mts(&clock_table->MemPstateTable[i]); + + if (is_valid_clock_value(dram_speed_mts) && dram_speed_mts < min_dram_speed_mts) { + min_dram_speed_mts = dram_speed_mts; + min_pstate = i; + } + } + + /* We expect the table to contain at least one valid P-state entry. */ + ASSERT(clock_table->NumMemPstatesEnabled && + is_valid_clock_value(max_dram_speed_mts) && + is_valid_clock_value(min_dram_speed_mts)); /* dispclk and dppclk can be max at any voltage, same number of levels for both */ if (clock_table->NumDispClkLevelsEnabled <= NUM_DISPCLK_DPM_LEVELS && @@ -678,47 +698,46 @@ static void dcn35_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk max_dppclk = find_max_clk_value(clock_table->DppClocks, clock_table->NumDispClkLevelsEnabled); } else { + /* Invalid number of entries in the table from PMFW. */ ASSERT(0); } - if (clock_table->NumFclkLevelsEnabled <= NUM_FCLK_DPM_LEVELS) - max_fclk = find_max_clk_value(clock_table->FclkClocks_Freq, - clock_table->NumFclkLevelsEnabled); - for (i = 0; i < clock_table->NumMemPstatesEnabled; i++) { - uint32_t min_uclk = clock_table->MemPstateTable[0].UClk; - int j; + /* Base the clock table on dcfclk, need at least one entry regardless of pmfw table */ + ASSERT(clock_table->NumDcfClkLevelsEnabled > 0); - for (j = 1; j < clock_table->NumMemPstatesEnabled; j++) { - if (is_valid_clock_value(clock_table->MemPstateTable[j].UClk) && - clock_table->MemPstateTable[j].UClk < min_uclk && - clock_table->MemPstateTable[j].Voltage <= clock_table->SocVoltage[i]) { - min_uclk = clock_table->MemPstateTable[j].UClk; - min_pstate = j; - } - } + max_fclk = find_max_clk_value(clock_table->FclkClocks_Freq, clock_table->NumFclkLevelsEnabled); + for (i = 0; i < clock_table->NumDcfClkLevelsEnabled; i++) { + int j; + + /* First search defaults for the clocks we don't read using closest lower or equal default dcfclk */ for (j = bw_params->clk_table.num_entries - 1; j > 0; j--) if (bw_params->clk_table.entries[j].dcfclk_mhz <= clock_table->DcfClocks[i]) - break; + break; bw_params->clk_table.entries[i].phyclk_mhz = bw_params->clk_table.entries[j].phyclk_mhz; bw_params->clk_table.entries[i].phyclk_d18_mhz = bw_params->clk_table.entries[j].phyclk_d18_mhz; bw_params->clk_table.entries[i].dtbclk_mhz = bw_params->clk_table.entries[j].dtbclk_mhz; - bw_params->clk_table.entries[i].fclk_mhz = max_fclk; + + /* Now update clocks we do read */ bw_params->clk_table.entries[i].memclk_mhz = clock_table->MemPstateTable[min_pstate].MemClk; bw_params->clk_table.entries[i].voltage = clock_table->MemPstateTable[min_pstate].Voltage; bw_params->clk_table.entries[i].dcfclk_mhz = clock_table->DcfClocks[i]; bw_params->clk_table.entries[i].socclk_mhz = clock_table->SocClocks[i]; bw_params->clk_table.entries[i].dispclk_mhz = max_dispclk; bw_params->clk_table.entries[i].dppclk_mhz = max_dppclk; - bw_params->clk_table.entries[i].wck_ratio = convert_wck_ratio( - clock_table->MemPstateTable[min_pstate].WckRatio); - } + bw_params->clk_table.entries[i].wck_ratio = + convert_wck_ratio(clock_table->MemPstateTable[min_pstate].WckRatio); + + /* Dcfclk and Fclk are tied, but at a different ratio */ + bw_params->clk_table.entries[i].fclk_mhz = min(max_fclk, 2 * clock_table->DcfClocks[i]); + } /* Make sure to include at least one entry at highest pstate */ if (max_pstate != min_pstate || i == 0) { if (i > MAX_NUM_DPM_LVL - 1) i = MAX_NUM_DPM_LVL - 1; + bw_params->clk_table.entries[i].fclk_mhz = max_fclk; bw_params->clk_table.entries[i].memclk_mhz = clock_table->MemPstateTable[max_pstate].MemClk; bw_params->clk_table.entries[i].voltage = clock_table->MemPstateTable[max_pstate].Voltage; @@ -734,6 +753,7 @@ static void dcn35_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk } bw_params->clk_table.num_entries = i--; + /* Make sure all highest clocks are included*/ bw_params->clk_table.entries[i].socclk_mhz = find_max_clk_value(clock_table->SocClocks, NUM_SOCCLK_DPM_LEVELS); bw_params->clk_table.entries[i].dispclk_mhz = @@ -752,6 +772,11 @@ static void dcn35_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk bw_params->clk_table.num_entries_per_clk.num_fclk_levels = clock_table->NumFclkLevelsEnabled; bw_params->clk_table.num_entries_per_clk.num_memclk_levels = clock_table->NumMemPstatesEnabled; bw_params->clk_table.num_entries_per_clk.num_socclk_levels = clock_table->NumSocClkLevelsEnabled; + + /* + * Set any 0 clocks to max default setting. Not an issue for + * power since we aren't doing switching in such case anyway + */ for (i = 0; i < bw_params->clk_table.num_entries; i++) { if (!bw_params->clk_table.entries[i].fclk_mhz) { bw_params->clk_table.entries[i].fclk_mhz = def_max.fclk_mhz; -- cgit v1.2.3 From f19c115d9c3c4f386c4662cc7b02ae1ffc2374af Mon Sep 17 00:00:00 2001 From: Taimur Hassan Date: Fri, 10 Nov 2023 10:06:09 -0500 Subject: drm/amd/display: Remove config update [Why] Prevent overwrite of dc->config.use_default_clock_table, as it should be pre-configured. Reviewed-by: Nicholas Kazlauskas Acked-by: Hamza Mahfooz Signed-off-by: Taimur Hassan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c index 53eefba0b9dc..1a0ed1c7e2d4 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c @@ -1876,7 +1876,7 @@ static bool dcn35_resource_construct( /* Use pipe context based otg sync logic */ dc->config.use_pipe_ctx_sync_logic = true; - dc->config.use_default_clock_table = false; + /* read VBIOS LTTPR caps */ { if (ctx->dc_bios->funcs->get_lttpr_caps) { -- cgit v1.2.3 From fbd2076c31e3281dea7b475d80211b7a6f1500da Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Wed, 22 Nov 2023 15:17:22 -0500 Subject: drm/amdkfd: Use common function for IP version check KFD_GC_VERSION was recently updated to use a new function for IP version checks. As a result, use KFD_GC_VERSION as the common function for all IP version checks in KFD. Signed-off-by: Mukul Joshi Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index a40f8cfc6aa5..45366b4ca976 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -1127,7 +1127,7 @@ static inline struct kfd_node *kfd_node_by_irq_ids(struct amdgpu_device *adev, struct kfd_dev *dev = adev->kfd.dev; uint32_t i; - if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 3)) + if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3)) return dev->nodes[0]; for (i = 0; i < dev->num_nodes; i++) -- cgit v1.2.3 From 22136ff27c4e01fae81f6588033363a46c72ed8c Mon Sep 17 00:00:00 2001 From: Taimur Hassan Date: Fri, 10 Nov 2023 10:15:28 -0500 Subject: drm/amd/display: Fix conversions between bytes and KB [Why] There are a number of instances where we convert HostVMMinPageSize or GPUVMMinPageSize from bytes to KB by dividing (rather than multiplying) and vice versa. Additionally, in some cases, a parameter is passed through DML in KB but later checked as if it were in bytes. Cc: stable@vger.kernel.org Reviewed-by: Nicholas Kazlauskas Acked-by: Hamza Mahfooz Signed-off-by: Taimur Hassan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c | 16 ++++++++-------- .../drm/amd/display/dc/dml2/dml2_translation_helper.c | 4 ++-- 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c index 510be909cd75..59718ee33e51 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c @@ -6329,7 +6329,7 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib) mode_lib->ms.NoOfDPPThisState, mode_lib->ms.dpte_group_bytes, s->HostVMInefficiencyFactor, - mode_lib->ms.soc.hostvm_min_page_size_kbytes, + mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels); s->NextMaxVStartup = s->MaxVStartupAllPlanes[j]; @@ -6542,7 +6542,7 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib) mode_lib->ms.cache_display_cfg.plane.HostVMEnable, mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels, mode_lib->ms.cache_display_cfg.plane.GPUVMEnable, - mode_lib->ms.soc.hostvm_min_page_size_kbytes, + mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k], mode_lib->ms.MetaRowBytes[j][k], mode_lib->ms.DPTEBytesPerRow[j][k], @@ -7687,7 +7687,7 @@ dml_bool_t dml_core_mode_support(struct display_mode_lib_st *mode_lib) CalculateVMRowAndSwath_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; CalculateVMRowAndSwath_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; CalculateVMRowAndSwath_params->GPUVMMinPageSizeKBytes = mode_lib->ms.cache_display_cfg.plane.GPUVMMinPageSizeKBytes; - CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes; + CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; CalculateVMRowAndSwath_params->PTEBufferModeOverrideEn = mode_lib->ms.cache_display_cfg.plane.PTEBufferModeOverrideEn; CalculateVMRowAndSwath_params->PTEBufferModeOverrideVal = mode_lib->ms.cache_display_cfg.plane.PTEBufferMode; CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = mode_lib->ms.PTEBufferSizeNotExceededPerState; @@ -7957,7 +7957,7 @@ dml_bool_t dml_core_mode_support(struct display_mode_lib_st *mode_lib) UseMinimumDCFCLK_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; UseMinimumDCFCLK_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable; UseMinimumDCFCLK_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; - UseMinimumDCFCLK_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes; + UseMinimumDCFCLK_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; UseMinimumDCFCLK_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; UseMinimumDCFCLK_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled; UseMinimumDCFCLK_params->ImmediateFlipRequirement = s->ImmediateFlipRequiredFinal; @@ -8699,7 +8699,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc CalculateVMRowAndSwath_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; CalculateVMRowAndSwath_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; CalculateVMRowAndSwath_params->GPUVMMinPageSizeKBytes = mode_lib->ms.cache_display_cfg.plane.GPUVMMinPageSizeKBytes; - CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes; + CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; CalculateVMRowAndSwath_params->PTEBufferModeOverrideEn = mode_lib->ms.cache_display_cfg.plane.PTEBufferModeOverrideEn; CalculateVMRowAndSwath_params->PTEBufferModeOverrideVal = mode_lib->ms.cache_display_cfg.plane.PTEBufferMode; CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = s->dummy_boolean_array[0]; @@ -8805,7 +8805,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc mode_lib->ms.cache_display_cfg.hw.DPPPerSurface, locals->dpte_group_bytes, s->HostVMInefficiencyFactor, - mode_lib->ms.soc.hostvm_min_page_size_kbytes, + mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels); locals->TCalc = 24.0 / locals->DCFCLKDeepSleep; @@ -8995,7 +8995,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc CalculatePrefetchSchedule_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable; CalculatePrefetchSchedule_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable; CalculatePrefetchSchedule_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; - CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes; + CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; CalculatePrefetchSchedule_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k]; CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled; CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired[k]; @@ -9240,7 +9240,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc mode_lib->ms.cache_display_cfg.plane.HostVMEnable, mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels, mode_lib->ms.cache_display_cfg.plane.GPUVMEnable, - mode_lib->ms.soc.hostvm_min_page_size_kbytes, + mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, locals->PDEAndMetaPTEBytesFrame[k], locals->MetaRowByte[k], locals->PixelPTEBytesPerRow[k], diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index 2b9638c6d9b0..48caa34a5ce7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -515,8 +515,8 @@ void dml2_translate_socbb_params(const struct dc *in, struct soc_bounding_box_st out->do_urgent_latency_adjustment = in_soc_params->do_urgent_latency_adjustment; out->dram_channel_width_bytes = (dml_uint_t)in_soc_params->dram_channel_width_bytes; out->fabric_datapath_to_dcn_data_return_bytes = (dml_uint_t)in_soc_params->fabric_datapath_to_dcn_data_return_bytes; - out->gpuvm_min_page_size_kbytes = in_soc_params->gpuvm_min_page_size_bytes * 1024; - out->hostvm_min_page_size_kbytes = in_soc_params->hostvm_min_page_size_bytes * 1024; + out->gpuvm_min_page_size_kbytes = in_soc_params->gpuvm_min_page_size_bytes / 1024; + out->hostvm_min_page_size_kbytes = in_soc_params->hostvm_min_page_size_bytes / 1024; out->mall_allocated_for_dcn_mbytes = (dml_uint_t)in_soc_params->mall_allocated_for_dcn_mbytes; out->max_avg_dram_bw_use_normal_percent = in_soc_params->max_avg_dram_bw_use_normal_percent; out->max_avg_fabric_bw_use_normal_percent = in_soc_params->max_avg_fabric_bw_use_normal_percent; -- cgit v1.2.3 From 2d1c884a535fcca74814553132d41c15dc9831ef Mon Sep 17 00:00:00 2001 From: Sung Joon Kim Date: Fri, 10 Nov 2023 11:33:45 -0500 Subject: drm/amd/display: Fix black screen on video playback with embedded panel [why] We have dynamic power control in driver but should be ignored when power is forced on. [how] Bypass any power control when it's forced on. Cc: stable@vger.kernel.org Reviewed-by: Nicholas Kazlauskas Acked-by: Hamza Mahfooz Signed-off-by: Sung Joon Kim Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 1 - drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c | 10 ++++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 19f8d83698be..63a0b885b6f0 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -1132,7 +1132,6 @@ void dcn35_clk_mgr_construct( ctx->dc->debug.disable_dpp_power_gate = false; ctx->dc->debug.disable_hubp_power_gate = false; ctx->dc->debug.disable_dsc_power_gate = false; - ctx->dc->debug.disable_hpo_power_gate = false; } else { /*let's reset the config control flag*/ ctx->dc->config.disable_ips = DMUB_IPS_DISABLE_ALL; /*pmfw not support it, disable it all*/ diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c index 0f60c40e1fc5..53bd0ae4bab5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c +++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c @@ -261,6 +261,7 @@ void pg_cntl35_hpo_pg_control(struct pg_cntl *pg_cntl, bool power_on) uint32_t power_gate = power_on ? 0 : 1; uint32_t pwr_status = power_on ? 0 : 2; uint32_t org_ip_request_cntl; + uint32_t power_forceon; bool block_enabled; if (pg_cntl->ctx->dc->debug.ignore_pg || @@ -277,6 +278,10 @@ void pg_cntl35_hpo_pg_control(struct pg_cntl *pg_cntl, bool power_on) return; } + REG_GET(DOMAIN25_PG_CONFIG, DOMAIN_POWER_FORCEON, &power_forceon); + if (power_forceon) + return; + REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl); if (org_ip_request_cntl == 0) REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 1); @@ -304,6 +309,7 @@ void pg_cntl35_io_clk_pg_control(struct pg_cntl *pg_cntl, bool power_on) uint32_t power_gate = power_on ? 0 : 1; uint32_t pwr_status = power_on ? 0 : 2; uint32_t org_ip_request_cntl; + uint32_t power_forceon; bool block_enabled; if (pg_cntl->ctx->dc->debug.ignore_pg || @@ -319,6 +325,10 @@ void pg_cntl35_io_clk_pg_control(struct pg_cntl *pg_cntl, bool power_on) return; } + REG_GET(DOMAIN22_PG_CONFIG, DOMAIN_POWER_FORCEON, &power_forceon); + if (power_forceon) + return; + REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl); if (org_ip_request_cntl == 0) REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 1); -- cgit v1.2.3 From 641220b2a53c64efb8327ffbbc3bfcf96b5a613f Mon Sep 17 00:00:00 2001 From: Anthony Koo Date: Sat, 11 Nov 2023 22:47:50 -0500 Subject: drm/amd/display: [FW Promotion] Release 0.0.193.0 - Add a tracing framework, to measure duration, execution count and longest duration of main loop/vsync interrupt work GPINT command is used to start/stop the measurements. Acked-by: Hamza Mahfooz Signed-off-by: Anthony Koo Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index a08073fc92ae..b7d9360bfdc8 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -185,8 +185,7 @@ union abm_flags { unsigned int disable_abm_requested : 1; /** - * @disable_abm_immediately: Indicates if driver has requested ABM to be disabled - * immediately. + * @disable_abm_immediately: Indicates if driver has requested ABM to be disabled immediately. */ unsigned int disable_abm_immediately : 1; @@ -866,6 +865,13 @@ enum dmub_gpint_command { * DESC: Updates the trace buffer mask bit32~bit63. */ DMUB_GPINT__GET_TRACE_BUFFER_MASK_WORD3 = 119, + + /** + * DESC: Enable measurements for various task duration + * ARGS: 0 - Disable measurement + * 1 - Enable measurement + */ + DMUB_GPINT__TRACE_DMUB_WAKE_ACTIVITY = 123, }; /** -- cgit v1.2.3 From db4616f7667c9d1f733ec360a754a4d7fd32c28e Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Mon, 13 Nov 2023 09:07:22 -0500 Subject: drm/amd/display: 3.2.261 This version brings along the following: - DCN314 fixes - DCN32 fixes - DCN35 fixes - DML2 fixes - eDP fixes - HDR fixes - MST fixes - Replay fixes - SubVP support for more configs Acked-by: Hamza Mahfooz Signed-off-by: Aric Cyr Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 85771ef98cd7..6074ac1b180a 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -49,7 +49,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.260" +#define DC_VER "3.2.261" #define MAX_SURFACES 3 #define MAX_PLANES 6 -- cgit v1.2.3 From 201761b5eb57c3fad810cde555795c3b5721a031 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Thu, 9 Nov 2023 10:50:38 +0530 Subject: drm/amdgpu: Move mca debug mode decision to ras Refactor code such that ras block decides the default mca debug mode, and not swsmu block. By default mca debug mode is set to false. v2: squash in uninitialized value fix (Alex) Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 14 +++++++++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 2 +- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 12 ------------ 4 files changed, 13 insertions(+), 17 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c index cf33eb219e25..54f2f346579e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c @@ -377,7 +377,7 @@ static int amdgpu_mca_smu_debug_mode_set(void *data, u64 val) struct amdgpu_device *adev = (struct amdgpu_device *)data; int ret; - ret = amdgpu_mca_smu_set_debug_mode(adev, val ? true : false); + ret = amdgpu_ras_set_mca_debug_mode(adev, val ? true : false); if (ret) return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index a3dc68e98910..f6b47ebce9d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -3132,6 +3132,8 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) return 0; + amdgpu_ras_set_mca_debug_mode(adev, false); + list_for_each_entry_safe(node, tmp, &adev->ras_list, node) { if (!node->ras_obj) { dev_warn(adev->dev, "Warning: abnormal ras list node.\n"); @@ -3405,12 +3407,18 @@ int amdgpu_ras_reset_gpu(struct amdgpu_device *adev) return 0; } -void amdgpu_ras_set_mca_debug_mode(struct amdgpu_device *adev, bool enable) +int amdgpu_ras_set_mca_debug_mode(struct amdgpu_device *adev, bool enable) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + int ret = 0; - if (con) - con->is_mca_debug_mode = enable; + if (con) { + ret = amdgpu_mca_smu_set_debug_mode(adev, enable); + if (!ret) + con->is_mca_debug_mode = enable; + } + + return ret; } bool amdgpu_ras_get_mca_debug_mode(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 19161916ac46..6a941eb8fb8f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -773,7 +773,7 @@ struct amdgpu_ras* amdgpu_ras_get_context(struct amdgpu_device *adev); int amdgpu_ras_set_context(struct amdgpu_device *adev, struct amdgpu_ras *ras_con); -void amdgpu_ras_set_mca_debug_mode(struct amdgpu_device *adev, bool enable); +int amdgpu_ras_set_mca_debug_mode(struct amdgpu_device *adev, bool enable); bool amdgpu_ras_get_mca_debug_mode(struct amdgpu_device *adev); bool amdgpu_ras_get_error_query_mode(struct amdgpu_device *adev, unsigned int *mode); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 8ccb8354bb49..dda2249c4994 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -1516,7 +1516,6 @@ static int smu_v13_0_6_mca_set_debug_mode(struct smu_context *smu, bool enable) if (smu->smc_fw_version < 0x554800) return 0; - amdgpu_ras_set_mca_debug_mode(smu->adev, enable); return smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ClearMcaOnRead, enable ? 0 : ClearMcaOnRead_UE_FLAG_MASK | ClearMcaOnRead_CE_POLL_MASK, NULL); @@ -2346,16 +2345,6 @@ static int smu_v13_0_6_smu_send_hbm_bad_page_num(struct smu_context *smu, return ret; } -static int smu_v13_0_6_post_init(struct smu_context *smu) -{ - struct amdgpu_device *adev = smu->adev; - - if (!amdgpu_sriov_vf(adev) && adev->ras_enabled) - return smu_v13_0_6_mca_set_debug_mode(smu, false); - - return 0; -} - static int mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable) { struct smu_context *smu = adev->powerplay.pp_handle; @@ -2920,7 +2909,6 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = { .i2c_init = smu_v13_0_6_i2c_control_init, .i2c_fini = smu_v13_0_6_i2c_control_fini, .send_hbm_bad_pages_num = smu_v13_0_6_smu_send_hbm_bad_page_num, - .post_init = smu_v13_0_6_post_init, }; void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu) -- cgit v1.2.3 From 7a6931a476d30f0d6bf70b01a925f76f92d23940 Mon Sep 17 00:00:00 2001 From: Hamza Mahfooz Date: Wed, 22 Nov 2023 14:50:34 -0500 Subject: drm/amd/display: fix ABM disablement On recent versions of DMUB firmware, if we want to completely disable ABM we have to pass ABM_LEVEL_IMMEDIATE_DISABLE as the requested ABM level to DMUB. Otherwise, LCD eDP displays are unable to reach their maximum brightness levels. So, to fix this whenever the user requests an ABM level of 0 pass ABM_LEVEL_IMMEDIATE_DISABLE to DMUB instead. Also, to keep the user's experience consistent map ABM_LEVEL_IMMEDIATE_DISABLE to 0 when a user tries to read the requested ABM level. Cc: stable@vger.kernel.org # 6.1+ Reviewed-by: Harry Wentland Signed-off-by: Hamza Mahfooz Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 2be64c593c87..39a4b47b6804 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -6248,7 +6248,7 @@ int amdgpu_dm_connector_atomic_set_property(struct drm_connector *connector, dm_new_state->underscan_enable = val; ret = 0; } else if (property == adev->mode_info.abm_level_property) { - dm_new_state->abm_level = val; + dm_new_state->abm_level = val ?: ABM_LEVEL_IMMEDIATE_DISABLE; ret = 0; } @@ -6293,7 +6293,8 @@ int amdgpu_dm_connector_atomic_get_property(struct drm_connector *connector, *val = dm_state->underscan_enable; ret = 0; } else if (property == adev->mode_info.abm_level_property) { - *val = dm_state->abm_level; + *val = (dm_state->abm_level != ABM_LEVEL_IMMEDIATE_DISABLE) ? + dm_state->abm_level : 0; ret = 0; } @@ -6366,7 +6367,8 @@ void amdgpu_dm_connector_funcs_reset(struct drm_connector *connector) state->pbn = 0; if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) - state->abm_level = amdgpu_dm_abm_level; + state->abm_level = amdgpu_dm_abm_level ?: + ABM_LEVEL_IMMEDIATE_DISABLE; __drm_atomic_helper_connector_reset(connector, &state->base); } -- cgit v1.2.3 From 01a1526ac4c8d9342d3d8b703751f3fc5ce487ba Mon Sep 17 00:00:00 2001 From: Dmytro Laktyushkin Date: Fri, 3 Nov 2023 14:55:37 -0400 Subject: drm/amd/display: update dcn315 lpddr pstate latency [WHY/HOW] Increase the pstate latency to improve ac/dc transition Reviewed-by: Charlene Liu Acked-by: Tom Chung Signed-off-by: Dmytro Laktyushkin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c index b2c4f97afc8b..8776055bbeaa 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c @@ -334,7 +334,7 @@ static struct wm_table lpddr5_wm_table = { { .wm_inst = WM_A, .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, + .pstate_latency_us = 129.0, .sr_exit_time_us = 11.5, .sr_enter_plus_exit_time_us = 14.5, .valid = true, @@ -342,7 +342,7 @@ static struct wm_table lpddr5_wm_table = { { .wm_inst = WM_B, .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, + .pstate_latency_us = 129.0, .sr_exit_time_us = 11.5, .sr_enter_plus_exit_time_us = 14.5, .valid = true, @@ -350,7 +350,7 @@ static struct wm_table lpddr5_wm_table = { { .wm_inst = WM_C, .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, + .pstate_latency_us = 129.0, .sr_exit_time_us = 11.5, .sr_enter_plus_exit_time_us = 14.5, .valid = true, @@ -358,7 +358,7 @@ static struct wm_table lpddr5_wm_table = { { .wm_inst = WM_D, .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, + .pstate_latency_us = 129.0, .sr_exit_time_us = 11.5, .sr_enter_plus_exit_time_us = 14.5, .valid = true, -- cgit v1.2.3 From bcdbd6f607bacb51743ac73f13f40d015cb9de53 Mon Sep 17 00:00:00 2001 From: RutingZhang Date: Tue, 21 Nov 2023 12:36:20 +0800 Subject: drm/amd/display: remove unnecessary braces to fix coding style checkpatch complains that: WARNING: braces {} are not necessary for single statement blocks + if (pool->base.irqs != NULL) { + dal_irq_service_destroy(&pool->base.irqs); + } Fixed it by removing unnecessary braces to fix the coding style issue. Signed-off-by: RutingZhang Reviewed-by: Dongliang Mu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c index c07da45e1e2c..65d337731f56 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c @@ -713,9 +713,8 @@ static void dcn21_resource_destruct(struct dcn21_resource_pool *pool) pool->base.hubps[i] = NULL; } - if (pool->base.irqs != NULL) { + if (pool->base.irqs != NULL) dal_irq_service_destroy(&pool->base.irqs); - } } for (i = 0; i < pool->base.res_cap->num_ddc; i++) { -- cgit v1.2.3 From ca0b006939f9701ab2e14a08ed9ef77a8014d2c5 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 10 Nov 2023 09:39:18 -0500 Subject: drm/amdgpu: fix AGP addressing when GART is not at 0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This worked by luck if the GART aperture ended up at 0. When we ended up moving GART on some chips, the GART aperture ended up offsetting the AGP address since the resource->start is a GART offset, not an MC address. Fix this by moving the AGP address setup into amdgpu_bo_gpu_offset_no_check(). v2: check mem_type before checking agp v3: check if the ttm bo has a ttm_tt allocated yet Fixes: 67318cb84341 ("drm/amdgpu/gmc11: set gart placement GC11") Tested-by: Mario Limonciello Reported-by: Jesse Zhang Reported-by: Yifan Zhang Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: christian.koenig@amd.com Cc: mario.limonciello@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 10 +++++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 4 +--- 3 files changed, 11 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 5f71414190e9..d2f273d77e59 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -181,6 +181,9 @@ uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); + if (!bo->ttm) + return AMDGPU_BO_INVALID_OFFSET; + if (bo->ttm->num_pages != 1 || bo->ttm->caching == ttm_cached) return AMDGPU_BO_INVALID_OFFSET; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index cef920a93924..d79b4ca1ecfc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1527,10 +1527,14 @@ u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo) u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - uint64_t offset; + uint64_t offset = AMDGPU_BO_INVALID_OFFSET; - offset = (bo->tbo.resource->start << PAGE_SHIFT) + - amdgpu_ttm_domain_start(adev, bo->tbo.resource->mem_type); + if (bo->tbo.resource->mem_type == TTM_PL_TT) + offset = amdgpu_gmc_agp_addr(&bo->tbo); + + if (offset == AMDGPU_BO_INVALID_OFFSET) + offset = (bo->tbo.resource->start << PAGE_SHIFT) + + amdgpu_ttm_domain_start(adev, bo->tbo.resource->mem_type); return amdgpu_gmc_sign_extend(offset); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 05991c5c8ddb..ab4a762aed5b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -959,10 +959,8 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) return 0; addr = amdgpu_gmc_agp_addr(bo); - if (addr != AMDGPU_BO_INVALID_OFFSET) { - bo->resource->start = addr >> PAGE_SHIFT; + if (addr != AMDGPU_BO_INVALID_OFFSET) return 0; - } /* allocate GART space */ placement.num_placement = 1; -- cgit v1.2.3 From b0e5c88d8a88bdcc9834409387e10a5ae1b2753e Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Thu, 23 Nov 2023 15:33:22 +0800 Subject: drm/amd/pm: fix a memleak in aldebaran_tables_init When kzalloc() for smu_table->ecc_table fails, we should free the previously allocated resources to prevent memleak. Fixes: edd794208555 ("drm/amd/pm: add message smu to get ecc_table v2") Signed-off-by: Dinghao Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index 1a6675d70a4b..f1440869d1ce 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -257,8 +257,11 @@ static int aldebaran_tables_init(struct smu_context *smu) } smu_table->ecc_table = kzalloc(tables[SMU_TABLE_ECCINFO].size, GFP_KERNEL); - if (!smu_table->ecc_table) + if (!smu_table->ecc_table) { + kfree(smu_table->metrics_table); + kfree(smu_table->gpu_metrics_table); return -ENOMEM; + } return 0; } -- cgit v1.2.3 From 7b194fdccb8458779687063e582cf218a0920c29 Mon Sep 17 00:00:00 2001 From: Lu Yao Date: Thu, 23 Nov 2023 09:22:34 +0800 Subject: drm/amdgpu: Fix cat debugfs amdgpu_regs_didt causes kernel null pointer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For 'AMDGPU_FAMILY_SI' family cards, in 'si_common_early_init' func, init 'didt_rreg' and 'didt_wreg' to 'NULL'. But in func 'amdgpu_debugfs_regs_didt_read/write', using 'RREG32_DIDT' 'WREG32_DIDT' lacks of relevant judgment. And other 'amdgpu_ip_block_version' that use these two definitions won't be added for 'AMDGPU_FAMILY_SI'. So, add null pointer judgment before calling. Reviewed-by: Christian König Signed-off-by: Lu Yao Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index a53f436fa9f1..0e61ebdb3f3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -638,6 +638,9 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf, if (size & 0x3 || *pos & 0x3) return -EINVAL; + if (!adev->didt_rreg) + return -EOPNOTSUPP; + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); if (r < 0) { pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); @@ -694,6 +697,9 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user if (size & 0x3 || *pos & 0x3) return -EINVAL; + if (!adev->didt_wreg) + return -EOPNOTSUPP; + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); if (r < 0) { pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); -- cgit v1.2.3 From bd1f6a31e7762ebc99b97f3eda5e5ea3708fa792 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 24 Nov 2023 09:56:32 -0600 Subject: drm/amd: Enable PCIe PME from D3 When dGPU is put into BOCO it may be in D3cold but still able send PME on display hotplug event. For this to work it must be enabled as wake source from D3. When runpm is enabled use pci_wake_from_d3() to mark wakeup as enabled by default. Cc: stable@vger.kernel.org # 6.1+ Signed-off-by: Mario Limonciello Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 8f24cabe2155..8b33b130ea36 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2263,6 +2263,8 @@ retry_init: pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); + pci_wake_from_d3(pdev, TRUE); + /* * For runpm implemented via BACO, PMFW will handle the * timing for BACO in and out: -- cgit v1.2.3 From 2e583200907cc43f062321bf751fe4b0960dbecf Mon Sep 17 00:00:00 2001 From: Dmytro Laktyushkin Date: Mon, 13 Nov 2023 13:12:44 -0500 Subject: drm/amd/display: block dcn315 dynamic crb allocation when unintended [WHY/HOW] Limit the dynamic crb to dual stream configs that include eDP Reviewed-by: Charlene Liu Acked-by: Tom Chung Signed-off-by: Dmytro Laktyushkin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c index cb8024eee8e4..515ba435f759 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c @@ -1631,8 +1631,10 @@ static bool allow_pixel_rate_crb(struct dc *dc, struct dc_state *context) int i; struct resource_context *res_ctx = &context->res_ctx; - /*Don't apply for single stream*/ - if (context->stream_count < 2) + /* Only apply for dual stream scenarios with edp*/ + if (context->stream_count != 2) + return false; + if (context->streams[0]->signal != SIGNAL_TYPE_EDP && context->streams[1]->signal != SIGNAL_TYPE_EDP) return false; for (i = 0; i < dc->res_pool->pipe_count; i++) { -- cgit v1.2.3 From 4fc26c2f912b5d9232dc4432fb1b7bfd6f016be6 Mon Sep 17 00:00:00 2001 From: Michael Strauss Date: Fri, 13 Oct 2023 12:13:28 -0400 Subject: drm/amd/display: Update Fixed VS/PE Retimer Sequence [WHY/HOW] Add a new AUX sequence provided by vendor to improve interop with specific display configurations. Reviewed-by: Wenjing Liu Acked-by: Tom Chung Signed-off-by: Michael Strauss Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c index 68096d12f52f..7087cdc9e977 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c @@ -205,6 +205,7 @@ enum link_training_result dp_perform_fixed_vs_pe_training_sequence_legacy( const uint8_t vendor_lttpr_write_data_4lane_3[4] = {0x1, 0x6D, 0xF2, 0x18}; const uint8_t vendor_lttpr_write_data_4lane_4[4] = {0x1, 0x6C, 0xF2, 0x03}; const uint8_t vendor_lttpr_write_data_4lane_5[4] = {0x1, 0x03, 0xF3, 0x06}; + const uint8_t vendor_lttpr_write_data_dpmf[4] = {0x1, 0x6, 0x70, 0x87}; enum link_training_result status = LINK_TRAINING_SUCCESS; uint8_t lane = 0; union down_spread_ctrl downspread = {0}; @@ -293,6 +294,10 @@ enum link_training_result dp_perform_fixed_vs_pe_training_sequence_legacy( DP_DOWNSPREAD_CTRL, lt_settings->link_settings.link_spread); + link_configure_fixed_vs_pe_retimer(link->ddc, + &vendor_lttpr_write_data_dpmf[0], + sizeof(vendor_lttpr_write_data_dpmf)); + if (lt_settings->link_settings.lane_count == LANE_COUNT_FOUR) { link_configure_fixed_vs_pe_retimer(link->ddc, &vendor_lttpr_write_data_4lane_1[0], sizeof(vendor_lttpr_write_data_4lane_1)); @@ -552,6 +557,7 @@ enum link_training_result dp_perform_fixed_vs_pe_training_sequence( const uint8_t vendor_lttpr_write_data_4lane_3[4] = {0x1, 0x6D, 0xF2, 0x18}; const uint8_t vendor_lttpr_write_data_4lane_4[4] = {0x1, 0x6C, 0xF2, 0x03}; const uint8_t vendor_lttpr_write_data_4lane_5[4] = {0x1, 0x03, 0xF3, 0x06}; + const uint8_t vendor_lttpr_write_data_dpmf[4] = {0x1, 0x6, 0x70, 0x87}; enum link_training_result status = LINK_TRAINING_SUCCESS; uint8_t lane = 0; union down_spread_ctrl downspread = {0}; @@ -639,6 +645,10 @@ enum link_training_result dp_perform_fixed_vs_pe_training_sequence( DP_DOWNSPREAD_CTRL, lt_settings->link_settings.link_spread); + link_configure_fixed_vs_pe_retimer(link->ddc, + &vendor_lttpr_write_data_dpmf[0], + sizeof(vendor_lttpr_write_data_dpmf)); + if (lt_settings->link_settings.lane_count == LANE_COUNT_FOUR) { link_configure_fixed_vs_pe_retimer(link->ddc, &vendor_lttpr_write_data_4lane_1[0], sizeof(vendor_lttpr_write_data_4lane_1)); -- cgit v1.2.3 From 9a1c1339abf972477aeef4ea037e650f49c5892d Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Fri, 27 Oct 2023 18:21:55 -0400 Subject: drm/amdkfd: Run restore_workers on freezable WQs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make restore workers freezable so we don't have to explicitly flush them in suspend and GPU reset code paths, and we don't accidentally try to restore BOs while the GPU is suspended. Not having to flush restore_work also helps avoid lock/fence dependencies in the GPU reset case where we're not allowed to wait for fences. A side effect of this is, that we can now have multiple concurrent threads trying to signal the same eviction fence. Rework eviction fence signaling and replacement to account for that. The GPU reset path can no longer rely on restore_process_worker to resume queues because evict/restore workers can run independently of it. Instead call a new restore_process_helper directly. This is an RFC and request for testing. v2: - Reworked eviction fence signaling - Introduced restore_process_helper v3: - Handle unsignaled eviction fences in restore_process_bos Signed-off-by: Felix Kuehling Acked-by: Christian König Tested-by: Emily Deng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 68 ++++++++++++------ drivers/gpu/drm/amd/amdkfd/kfd_process.c | 87 +++++++++++++++--------- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 4 +- 3 files changed, 104 insertions(+), 55 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index b98c0969d3be..73288f9ccaf8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1384,7 +1384,6 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, amdgpu_amdkfd_restore_userptr_worker); *process_info = info; - *ef = dma_fence_get(&info->eviction_fence->base); } vm->process_info = *process_info; @@ -1415,6 +1414,8 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, list_add_tail(&vm->vm_list_node, &(vm->process_info->vm_list_head)); vm->process_info->n_vms++; + + *ef = dma_fence_get(&vm->process_info->eviction_fence->base); mutex_unlock(&vm->process_info->lock); return 0; @@ -1426,10 +1427,7 @@ validate_pd_fail: reserve_pd_fail: vm->process_info = NULL; if (info) { - /* Two fence references: one in info and one in *ef */ dma_fence_put(&info->eviction_fence->base); - dma_fence_put(*ef); - *ef = NULL; *process_info = NULL; put_pid(info->pid); create_evict_fence_fail: @@ -1623,7 +1621,8 @@ int amdgpu_amdkfd_criu_resume(void *p) goto out_unlock; } WRITE_ONCE(pinfo->block_mmu_notifications, false); - schedule_delayed_work(&pinfo->restore_userptr_work, 0); + queue_delayed_work(system_freezable_wq, + &pinfo->restore_userptr_work, 0); out_unlock: mutex_unlock(&pinfo->lock); @@ -2426,7 +2425,8 @@ int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni, KFD_QUEUE_EVICTION_TRIGGER_USERPTR); if (r) pr_err("Failed to quiesce KFD\n"); - schedule_delayed_work(&process_info->restore_userptr_work, + queue_delayed_work(system_freezable_wq, + &process_info->restore_userptr_work, msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS)); } mutex_unlock(&process_info->notifier_lock); @@ -2749,7 +2749,8 @@ unlock_out: /* If validation failed, reschedule another attempt */ if (evicted_bos) { - schedule_delayed_work(&process_info->restore_userptr_work, + queue_delayed_work(system_freezable_wq, + &process_info->restore_userptr_work, msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS)); kfd_smi_event_queue_restore_rescheduled(mm); @@ -2758,6 +2759,23 @@ unlock_out: put_task_struct(usertask); } +static void replace_eviction_fence(struct dma_fence **ef, + struct dma_fence *new_ef) +{ + struct dma_fence *old_ef = rcu_replace_pointer(*ef, new_ef, true + /* protected by process_info->lock */); + + /* If we're replacing an unsignaled eviction fence, that fence will + * never be signaled, and if anyone is still waiting on that fence, + * they will hang forever. This should never happen. We should only + * replace the fence in restore_work that only gets scheduled after + * eviction work signaled the fence. + */ + WARN_ONCE(!dma_fence_is_signaled(old_ef), + "Replacing unsignaled eviction fence"); + dma_fence_put(old_ef); +} + /** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given * KFD process identified by process_info * @@ -2781,7 +2799,6 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) struct amdkfd_process_info *process_info = info; struct amdgpu_vm *peer_vm; struct kgd_mem *mem; - struct amdgpu_amdkfd_fence *new_fence; struct list_head duplicate_save; struct amdgpu_sync sync_obj; unsigned long failed_size = 0; @@ -2907,22 +2924,35 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) /* Wait for validate and PT updates to finish */ amdgpu_sync_wait(&sync_obj, false); - /* Release old eviction fence and create new one, because fence only - * goes from unsignaled to signaled, fence cannot be reused. - * Use context and mm from the old fence. + /* The old eviction fence may be unsignaled if restore happens + * after a GPU reset or suspend/resume. Keep the old fence in that + * case. Otherwise release the old eviction fence and create new + * one, because fence only goes from unsignaled to signaled once + * and cannot be reused. Use context and mm from the old fence. + * + * If an old eviction fence signals after this check, that's OK. + * Anyone signaling an eviction fence must stop the queues first + * and schedule another restore worker. */ - new_fence = amdgpu_amdkfd_fence_create( + if (dma_fence_is_signaled(&process_info->eviction_fence->base)) { + struct amdgpu_amdkfd_fence *new_fence = + amdgpu_amdkfd_fence_create( process_info->eviction_fence->base.context, process_info->eviction_fence->mm, NULL); - if (!new_fence) { - pr_err("Failed to create eviction fence\n"); - ret = -ENOMEM; - goto validate_map_fail; + + if (!new_fence) { + pr_err("Failed to create eviction fence\n"); + ret = -ENOMEM; + goto validate_map_fail; + } + dma_fence_put(&process_info->eviction_fence->base); + process_info->eviction_fence = new_fence; + replace_eviction_fence(ef, dma_fence_get(&new_fence->base)); + } else { + WARN_ONCE(*ef != &process_info->eviction_fence->base, + "KFD eviction fence doesn't match KGD process_info"); } - dma_fence_put(&process_info->eviction_fence->base); - process_info->eviction_fence = new_fence; - *ef = dma_fence_get(&new_fence->base); /* Attach new eviction fence to all BOs except pinned ones */ list_for_each_entry(mem, &process_info->kfd_bo_list, validate_list) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index c10d050e1a61..71df51fcc1b0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -664,7 +664,8 @@ int kfd_process_create_wq(void) if (!kfd_process_wq) kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0); if (!kfd_restore_wq) - kfd_restore_wq = alloc_ordered_workqueue("kfd_restore_wq", 0); + kfd_restore_wq = alloc_ordered_workqueue("kfd_restore_wq", + WQ_FREEZABLE); if (!kfd_process_wq || !kfd_restore_wq) { kfd_process_destroy_wq(); @@ -1642,6 +1643,7 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd, struct amdgpu_fpriv *drv_priv; struct amdgpu_vm *avm; struct kfd_process *p; + struct dma_fence *ef; struct kfd_node *dev; int ret; @@ -1661,11 +1663,12 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd, ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(dev->adev, avm, &p->kgd_process_info, - &p->ef); + &ef); if (ret) { pr_err("Failed to create process VM object\n"); return ret; } + RCU_INIT_POINTER(p->ef, ef); pdd->drm_priv = drm_file->private_data; ret = kfd_process_device_reserve_ib_mem(pdd); @@ -1908,6 +1911,21 @@ kfd_process_gpuid_from_node(struct kfd_process *p, struct kfd_node *node, return -EINVAL; } +static int signal_eviction_fence(struct kfd_process *p) +{ + struct dma_fence *ef; + int ret; + + rcu_read_lock(); + ef = dma_fence_get_rcu_safe(&p->ef); + rcu_read_unlock(); + + ret = dma_fence_signal(ef); + dma_fence_put(ef); + + return ret; +} + static void evict_process_worker(struct work_struct *work) { int ret; @@ -1920,31 +1938,46 @@ static void evict_process_worker(struct work_struct *work) * lifetime of this thread, kfd_process p will be valid */ p = container_of(dwork, struct kfd_process, eviction_work); - WARN_ONCE(p->last_eviction_seqno != p->ef->seqno, - "Eviction fence mismatch\n"); - - /* Narrow window of overlap between restore and evict work - * item is possible. Once amdgpu_amdkfd_gpuvm_restore_process_bos - * unreserves KFD BOs, it is possible to evicted again. But - * restore has few more steps of finish. So lets wait for any - * previous restore work to complete - */ - flush_delayed_work(&p->restore_work); pr_debug("Started evicting pasid 0x%x\n", p->pasid); ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_TRIGGER_TTM); if (!ret) { - dma_fence_signal(p->ef); - dma_fence_put(p->ef); - p->ef = NULL; - queue_delayed_work(kfd_restore_wq, &p->restore_work, + /* If another thread already signaled the eviction fence, + * they are responsible stopping the queues and scheduling + * the restore work. + */ + if (!signal_eviction_fence(p)) + queue_delayed_work(kfd_restore_wq, &p->restore_work, msecs_to_jiffies(PROCESS_RESTORE_TIME_MS)); + else + kfd_process_restore_queues(p); pr_debug("Finished evicting pasid 0x%x\n", p->pasid); } else pr_err("Failed to evict queues of pasid 0x%x\n", p->pasid); } +static int restore_process_helper(struct kfd_process *p) +{ + int ret = 0; + + /* VMs may not have been acquired yet during debugging. */ + if (p->kgd_process_info) { + ret = amdgpu_amdkfd_gpuvm_restore_process_bos( + p->kgd_process_info, &p->ef); + if (ret) + return ret; + } + + ret = kfd_process_restore_queues(p); + if (!ret) + pr_debug("Finished restoring pasid 0x%x\n", p->pasid); + else + pr_err("Failed to restore queues of pasid 0x%x\n", p->pasid); + + return ret; +} + static void restore_process_worker(struct work_struct *work) { struct delayed_work *dwork; @@ -1970,24 +2003,15 @@ static void restore_process_worker(struct work_struct *work) */ p->last_restore_timestamp = get_jiffies_64(); - /* VMs may not have been acquired yet during debugging. */ - if (p->kgd_process_info) - ret = amdgpu_amdkfd_gpuvm_restore_process_bos(p->kgd_process_info, - &p->ef); + + ret = restore_process_helper(p); if (ret) { pr_debug("Failed to restore BOs of pasid 0x%x, retry after %d ms\n", p->pasid, PROCESS_BACK_OFF_TIME_MS); ret = queue_delayed_work(kfd_restore_wq, &p->restore_work, msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS)); WARN(!ret, "reschedule restore work failed\n"); - return; } - - ret = kfd_process_restore_queues(p); - if (!ret) - pr_debug("Finished restoring pasid 0x%x\n", p->pasid); - else - pr_err("Failed to restore queues of pasid 0x%x\n", p->pasid); } void kfd_suspend_all_processes(void) @@ -1998,14 +2022,9 @@ void kfd_suspend_all_processes(void) WARN(debug_evictions, "Evicting all processes"); hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { - cancel_delayed_work_sync(&p->eviction_work); - flush_delayed_work(&p->restore_work); - if (kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_TRIGGER_SUSPEND)) pr_err("Failed to suspend process 0x%x\n", p->pasid); - dma_fence_signal(p->ef); - dma_fence_put(p->ef); - p->ef = NULL; + signal_eviction_fence(p); } srcu_read_unlock(&kfd_processes_srcu, idx); } @@ -2017,7 +2036,7 @@ int kfd_resume_all_processes(void) int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu); hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { - if (!queue_delayed_work(kfd_restore_wq, &p->restore_work, 0)) { + if (restore_process_helper(p)) { pr_err("Restore process %d failed during resume\n", p->pasid); ret = -EFAULT; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index f2f3c338fd94..613f19510287 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1870,7 +1870,7 @@ out_reschedule: /* If validation failed, reschedule another attempt */ if (evicted_ranges) { pr_debug("reschedule to restore svm range\n"); - schedule_delayed_work(&svms->restore_work, + queue_delayed_work(system_freezable_wq, &svms->restore_work, msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS)); kfd_smi_event_queue_restore_rescheduled(mm); @@ -1946,7 +1946,7 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm, pr_debug("failed to quiesce KFD\n"); pr_debug("schedule to restore svm %p ranges\n", svms); - schedule_delayed_work(&svms->restore_work, + queue_delayed_work(system_freezable_wq, &svms->restore_work, msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS)); } else { unsigned long s, l; -- cgit v1.2.3 From 1919fd6bb09f61015549b9e5a5af1541b41f45d9 Mon Sep 17 00:00:00 2001 From: Anthony Koo Date: Sat, 18 Nov 2023 14:39:40 -0500 Subject: drm/amd/display: [FW Promotion] Release 0.0.194.0 - Add a new dmub command in enum dmub_cmd_cab_type Reviewed-by: Tom Chung Acked-by: Tom Chung Signed-off-by: Anthony Koo Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index b7d9360bfdc8..a365f6c096de 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -1345,6 +1345,10 @@ enum dmub_cmd_cab_type { * Fit surfaces in CAB (i.e. CAB enable) */ DMUB_CMD__CAB_DCN_SS_FIT_IN_CAB = 2, + /** + * Do not fit surfaces in CAB (i.e. no CAB) + */ + DMUB_CMD__CAB_DCN_SS_NOT_FIT_IN_CAB = 3, }; /** -- cgit v1.2.3 From 061a5bf210cd7b941627092309ff6035a017cda3 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Tue, 14 Nov 2023 11:22:09 -0500 Subject: drm/amd/display: Allow DTBCLK disable for DCN35 [Why] DTBCLK is enabled on idle and it will burn power. [How] There's a few issues here: - Always enabling DTBCLK on clock manager init - Setting refclk when DTBCLK is supposed to be disabled - Not applying the correct calculated version refclk, but instead the base value which might be zero On dtbclk_en change we'll message PMFW to enable or disable the clock accordingly. The DTBDTO will be then based on refclk, but it will be set to the default fixed value if there was nothing calculated in DML despite the clock being considered enabled. Reviewed-by: Charlene Liu Acked-by: Tom Chung Signed-off-by: Nicholas Kazlauskas Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 27 ++++++++++------------ 1 file changed, 12 insertions(+), 15 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 63a0b885b6f0..d5fde7d23fbf 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -232,6 +232,10 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base, if (dc->work_arounds.skip_clock_update) return; + /* DTBCLK is fixed, so set a default if unspecified. */ + if (new_clocks->dtbclk_en && !new_clocks->ref_dtbclk_khz) + new_clocks->ref_dtbclk_khz = 600000; + /* * if it is safe to lower, but we are already in the lower state, we don't have to do anything * also if safe to lower is false, we just go in the higher state @@ -265,8 +269,10 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base, if (!clk_mgr_base->clks.dtbclk_en && new_clocks->dtbclk_en) { dcn35_smu_set_dtbclk(clk_mgr, true); - dcn35_update_clocks_update_dtb_dto(clk_mgr, context, clk_mgr_base->clks.ref_dtbclk_khz); clk_mgr_base->clks.dtbclk_en = new_clocks->dtbclk_en; + + dcn35_update_clocks_update_dtb_dto(clk_mgr, context, new_clocks->ref_dtbclk_khz); + clk_mgr_base->clks.ref_dtbclk_khz = new_clocks->ref_dtbclk_khz; } /* check that we're not already in D0 */ @@ -314,17 +320,12 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base, update_dispclk = true; } - if (!new_clocks->dtbclk_en) { - new_clocks->ref_dtbclk_khz = 600000; - } - /* clock limits are received with MHz precision, divide by 1000 to prevent setting clocks at every call */ if (!dc->debug.disable_dtb_ref_clk_switch && - should_set_clock(safe_to_lower, new_clocks->ref_dtbclk_khz / 1000, clk_mgr_base->clks.ref_dtbclk_khz / 1000)) { - /* DCCG requires KHz precision for DTBCLK */ - dcn35_smu_set_dtbclk(clk_mgr, true); - - dcn35_update_clocks_update_dtb_dto(clk_mgr, context, clk_mgr_base->clks.ref_dtbclk_khz); + should_set_clock(safe_to_lower, new_clocks->ref_dtbclk_khz / 1000, + clk_mgr_base->clks.ref_dtbclk_khz / 1000)) { + dcn35_update_clocks_update_dtb_dto(clk_mgr, context, new_clocks->ref_dtbclk_khz); + clk_mgr_base->clks.ref_dtbclk_khz = new_clocks->ref_dtbclk_khz; } if (dpp_clock_lowered) { @@ -1048,12 +1049,8 @@ void dcn35_clk_mgr_construct( dcn35_dump_clk_registers(&clk_mgr->base.base.boot_snapshot, &clk_mgr->base.base, &log_info); clk_mgr->base.base.dprefclk_khz = dcn35_smu_get_dprefclk(&clk_mgr->base); - clk_mgr->base.base.clks.ref_dtbclk_khz = dcn35_smu_get_dtbclk(&clk_mgr->base); - - if (!clk_mgr->base.base.clks.ref_dtbclk_khz) - dcn35_smu_set_dtbclk(&clk_mgr->base, true); + clk_mgr->base.base.clks.ref_dtbclk_khz = 600000; - clk_mgr->base.base.clks.dtbclk_en = true; dce_clock_read_ss_info(&clk_mgr->base); /*when clk src is from FCH, it could have ss, same clock src as DPREF clk*/ -- cgit v1.2.3 From ca0ad76089a8171d7a075e50b7beecf092f8fd0c Mon Sep 17 00:00:00 2001 From: Candice Li Date: Fri, 24 Nov 2023 09:33:47 +0800 Subject: drm/amdgpu: Update EEPROM I2C address for smu v13_0_0 Check smu v13_0_0 SKU type to select EEPROM I2C address. Signed-off-by: Candice Li Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 65aa218380be..2fde93b00cab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -214,6 +214,12 @@ static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev, control->i2c_address = EEPROM_I2C_MADDR_0; return true; case IP_VERSION(13, 0, 0): + if (strnstr(atom_ctx->vbios_pn, "D707", + sizeof(atom_ctx->vbios_pn))) + control->i2c_address = EEPROM_I2C_MADDR_0; + else + control->i2c_address = EEPROM_I2C_MADDR_4; + return true; case IP_VERSION(13, 0, 6): case IP_VERSION(13, 0, 10): control->i2c_address = EEPROM_I2C_MADDR_4; -- cgit v1.2.3 From d581ceab26a1be9fe94befe2604cbe99eadf1acc Mon Sep 17 00:00:00 2001 From: ZhenGuo Yin Date: Mon, 6 Nov 2023 18:07:51 +0800 Subject: drm/amdkfd: Free gang_ctx_bo and wptr_bo in pqm_uninit [Why] Memory leaks of gang_ctx_bo and wptr_bo. [How] Free gang_ctx_bo and wptr_bo in pqm_uninit. v2: add a common function pqm_clean_queue_resource to free queue's resources. v3: reset pdd->pqd.num_gws when destorying GWS queue. Reviewed-by: Felix Kuehling Signed-off-by: ZhenGuo Yin Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 54 +++++++++++++--------- 1 file changed, 33 insertions(+), 21 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 77649392e233..77f493262e05 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -169,16 +169,43 @@ int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p) return 0; } +static void pqm_clean_queue_resource(struct process_queue_manager *pqm, + struct process_queue_node *pqn) +{ + struct kfd_node *dev; + struct kfd_process_device *pdd; + + dev = pqn->q->device; + + pdd = kfd_get_process_device_data(dev, pqm->process); + if (!pdd) { + pr_err("Process device data doesn't exist\n"); + return; + } + + if (pqn->q->gws) { + if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) && + !dev->kfd->shared_resources.enable_mes) + amdgpu_amdkfd_remove_gws_from_process( + pqm->process->kgd_process_info, pqn->q->gws); + pdd->qpd.num_gws = 0; + } + + if (dev->kfd->shared_resources.enable_mes) { + amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->gang_ctx_bo); + if (pqn->q->wptr_bo) + amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->wptr_bo); + } +} + void pqm_uninit(struct process_queue_manager *pqm) { struct process_queue_node *pqn, *next; list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) { - if (pqn->q && pqn->q->gws && - KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) && - !pqn->q->device->kfd->shared_resources.enable_mes) - amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info, - pqn->q->gws); + if (pqn->q) + pqm_clean_queue_resource(pqm, pqn); + kfd_procfs_del_queue(pqn->q); uninit_queue(pqn->q); list_del(&pqn->process_queue_list); @@ -461,22 +488,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) goto err_destroy_queue; } - if (pqn->q->gws) { - if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) && - !dev->kfd->shared_resources.enable_mes) - amdgpu_amdkfd_remove_gws_from_process( - pqm->process->kgd_process_info, - pqn->q->gws); - pdd->qpd.num_gws = 0; - } - - if (dev->kfd->shared_resources.enable_mes) { - amdgpu_amdkfd_free_gtt_mem(dev->adev, - pqn->q->gang_ctx_bo); - if (pqn->q->wptr_bo) - amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->wptr_bo); - - } + pqm_clean_queue_resource(pqm, pqn); uninit_queue(pqn->q); } -- cgit v1.2.3 From 5290ed0a8b261115fe4965a6d95a642b0742d159 Mon Sep 17 00:00:00 2001 From: Ilya Bakoulin Date: Thu, 16 Nov 2023 15:28:53 -0500 Subject: drm/amd/display: Add DSC granular throughput adjustment [Why/How] Update DSC DPCD parsing to take granular throughput adjustment into consideration. Reviewed-by: Wenjing Liu Acked-by: Tom Chung Signed-off-by: Ilya Bakoulin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c index e8b5f17beb96..0df6c55eb326 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c @@ -331,8 +331,9 @@ bool dc_dsc_parse_dsc_dpcd(const struct dc *dc, int buff_block_size; int buff_size; - if (!dsc_buff_block_size_from_dpcd(dpcd_dsc_basic_data[DP_DSC_RC_BUF_BLK_SIZE - DP_DSC_SUPPORT], - &buff_block_size)) + if (!dsc_buff_block_size_from_dpcd( + dpcd_dsc_basic_data[DP_DSC_RC_BUF_BLK_SIZE - DP_DSC_SUPPORT] & 0x03, + &buff_block_size)) return false; buff_size = dpcd_dsc_basic_data[DP_DSC_RC_BUF_SIZE - DP_DSC_SUPPORT] + 1; @@ -357,10 +358,15 @@ bool dc_dsc_parse_dsc_dpcd(const struct dc *dc, { int dpcd_throughput = dpcd_dsc_basic_data[DP_DSC_PEAK_THROUGHPUT - DP_DSC_SUPPORT]; + int dsc_throughput_granular_delta; + + dsc_throughput_granular_delta = dpcd_dsc_basic_data[DP_DSC_RC_BUF_BLK_SIZE - DP_DSC_SUPPORT] >> 3; + dsc_throughput_granular_delta *= 2; if (!dsc_throughput_from_dpcd(dpcd_throughput & DP_DSC_THROUGHPUT_MODE_0_MASK, &dsc_sink_caps->throughput_mode_0_mps)) return false; + dsc_sink_caps->throughput_mode_0_mps += dsc_throughput_granular_delta; dpcd_throughput = (dpcd_throughput & DP_DSC_THROUGHPUT_MODE_1_MASK) >> DP_DSC_THROUGHPUT_MODE_1_SHIFT; if (!dsc_throughput_from_dpcd(dpcd_throughput, &dsc_sink_caps->throughput_mode_1_mps)) -- cgit v1.2.3 From 33a6e409165cd23d1dc580031cb749550ca18517 Mon Sep 17 00:00:00 2001 From: Taimur Hassan Date: Fri, 10 Nov 2023 10:24:20 -0500 Subject: drm/amd/display: Fix some HostVM parameters in DML [Why] A number of DML parameters related to HostVM were either missing or being set incorrectly, which may cause inaccuracies in calculating margins and determining BW limitations. [How] Correct these values where needed and populate the missing values. Cc: stable@vger.kernel.org Reviewed-by: Nicholas Kazlauskas Acked-by: Hamza Mahfooz Signed-off-by: Taimur Hassan Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c | 33 ++++++++++++++++++++++ .../amd/display/dc/dml2/dml2_translation_helper.c | 9 ++++-- 2 files changed, 39 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c index 21c17d3296a3..39cf1ae3a3e1 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c @@ -330,6 +330,39 @@ void dcn35_update_bw_bounding_box_fpu(struct dc *dc, dml_init_instance(&dc->dml, &dcn3_5_soc, &dcn3_5_ip, DML_PROJECT_DCN31); + /*copy to dml2, before dml2_create*/ + if (clk_table->num_entries > 2) { + + for (i = 0; i < clk_table->num_entries; i++) { + dc->dml2_options.bbox_overrides.clks_table.num_states = + clk_table->num_entries; + dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dcfclk_mhz = + clock_limits[i].dcfclk_mhz; + dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].fclk_mhz = + clock_limits[i].fabricclk_mhz; + dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dispclk_mhz = + clock_limits[i].dispclk_mhz; + dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dppclk_mhz = + clock_limits[i].dppclk_mhz; + dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].socclk_mhz = + clock_limits[i].socclk_mhz; + dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].memclk_mhz = + clk_table->entries[i].memclk_mhz * clk_table->entries[i].wck_ratio; + dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dcfclk_levels = + clk_table->num_entries; + dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_fclk_levels = + clk_table->num_entries; + dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dispclk_levels = + clk_table->num_entries; + dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dppclk_levels = + clk_table->num_entries; + dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_socclk_levels = + clk_table->num_entries; + dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_memclk_levels = + clk_table->num_entries; + } + } + /* Update latency values */ dc->dml2_options.bbox_overrides.dram_clock_change_latency_us = dcn3_5_soc.dram_clock_change_latency_us; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index 48caa34a5ce7..fa8fe5bf7e57 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -1057,9 +1057,12 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat } //Generally these are set by referencing our latest BB/IP params in dcn32_resource.c file - dml_dispcfg->plane.GPUVMEnable = true; - dml_dispcfg->plane.GPUVMMaxPageTableLevels = 4; - dml_dispcfg->plane.HostVMEnable = false; + dml_dispcfg->plane.GPUVMEnable = dml2->v20.dml_core_ctx.ip.gpuvm_enable; + dml_dispcfg->plane.GPUVMMaxPageTableLevels = dml2->v20.dml_core_ctx.ip.gpuvm_max_page_table_levels; + dml_dispcfg->plane.HostVMEnable = dml2->v20.dml_core_ctx.ip.hostvm_enable; + dml_dispcfg->plane.HostVMMaxPageTableLevels = dml2->v20.dml_core_ctx.ip.hostvm_max_page_table_levels; + if (dml2->v20.dml_core_ctx.ip.hostvm_enable) + dml2->v20.dml_core_ctx.policy.AllowForPStateChangeOrStutterInVBlankFinal = dml_prefetch_support_uclk_fclk_and_stutter; dml2_populate_pipe_to_plane_index_mapping(dml2, context); -- cgit v1.2.3 From 70378005378a23fbfe0d4c44dac4187cad07da94 Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Sun, 19 Nov 2023 23:02:26 -0500 Subject: drm/amd/display: Promote DAL to 3.2.262 This version brings along following fixes: - Add DSC granular throughput adjustment - Allow DTBCLK disable for DCN35 - Update Fixed VS/PE Retimer Sequence - Block dcn315 dynamic crb allocation when unintended - Update dcn315 lpddr pstate latency - Fix some HostVM parameters in DML Reviewed-by: Tom Chung Acked-by: Tom Chung Signed-off-by: Aric Cyr Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 6074ac1b180a..e8c1599ab18a 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -49,7 +49,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.261" +#define DC_VER "3.2.262" #define MAX_SURFACES 3 #define MAX_PLANES 6 -- cgit v1.2.3 From 9a5095e785c38ab8d9f3d91f4ee76f4f73ec4adc Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 13 Sep 2023 12:00:44 -0400 Subject: drm/amdgpu: add amdgpu_reg_state.h This header defines the reg state structures exposed via sysfs for umr debugging. v2: add content type Signed-off-by: Alex Deucher Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 5 + drivers/gpu/drm/amd/include/amdgpu_reg_state.h | 140 +++++++++++++++++++++++++ 2 files changed, 145 insertions(+) create mode 100644 drivers/gpu/drm/amd/include/amdgpu_reg_state.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index ea2656bd714f..0af8ac81facd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -110,6 +110,7 @@ #include "amdgpu_ras.h" #include "amdgpu_xcp.h" #include "amdgpu_seq64.h" +#include "amdgpu_reg_state.h" #define MAX_GPU_INSTANCE 64 @@ -612,6 +613,10 @@ struct amdgpu_asic_funcs { const struct amdgpu_video_codecs **codecs); /* encode "> 32bits" smn addressing */ u64 (*encode_ext_smn_addressing)(int ext_id); + + ssize_t (*get_reg_state)(struct amdgpu_device *adev, + enum amdgpu_reg_state reg_state, void *buf, + size_t max_size); }; /* diff --git a/drivers/gpu/drm/amd/include/amdgpu_reg_state.h b/drivers/gpu/drm/amd/include/amdgpu_reg_state.h new file mode 100644 index 000000000000..3dbdb118db59 --- /dev/null +++ b/drivers/gpu/drm/amd/include/amdgpu_reg_state.h @@ -0,0 +1,140 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_REG_STATE_H__ +#define __AMDGPU_REG_STATE_H__ + +enum amdgpu_reg_state { + AMDGPU_REG_STATE_TYPE_INVALID = 0, + AMDGPU_REG_STATE_TYPE_XGMI = 1, + AMDGPU_REG_STATE_TYPE_WAFL = 2, + AMDGPU_REG_STATE_TYPE_PCIE = 3, + AMDGPU_REG_STATE_TYPE_USR = 4, + AMDGPU_REG_STATE_TYPE_USR_1 = 5 +}; + +struct amdgpu_reg_state_header { + uint16_t structure_size; + uint8_t format_revision; + uint8_t content_revision; + uint8_t state_type; + uint8_t num_instances; + uint16_t pad; +}; + +enum amdgpu_reg_inst_state { + AMDGPU_INST_S_OK, + AMDGPU_INST_S_EDISABLED, + AMDGPU_INST_S_EACCESS, +}; + +struct amdgpu_smn_reg_data { + uint64_t addr; + uint32_t value; + uint32_t pad; +}; + +struct amdgpu_reg_inst_header { + uint16_t instance; + uint16_t state; + uint16_t num_smn_regs; + uint16_t pad; +}; + + +struct amdgpu_regs_xgmi_v1_0 { + struct amdgpu_reg_inst_header inst_header; + + struct amdgpu_smn_reg_data smn_reg_values[]; +}; + +struct amdgpu_reg_state_xgmi_v1_0 { + /* common_header.state_type must be AMDGPU_REG_STATE_TYPE_XGMI */ + struct amdgpu_reg_state_header common_header; + + struct amdgpu_regs_xgmi_v1_0 xgmi_state_regs[]; +}; + +struct amdgpu_regs_wafl_v1_0 { + struct amdgpu_reg_inst_header inst_header; + + struct amdgpu_smn_reg_data smn_reg_values[]; +}; + +struct amdgpu_reg_state_wafl_v1_0 { + /* common_header.state_type must be AMDGPU_REG_STATE_TYPE_WAFL */ + struct amdgpu_reg_state_header common_header; + + struct amdgpu_regs_wafl_v1_0 wafl_state_regs[]; +}; + +struct amdgpu_regs_pcie_v1_0 { + struct amdgpu_reg_inst_header inst_header; + + uint16_t device_status; + uint16_t link_status; + uint32_t sub_bus_number_latency; + uint32_t pcie_corr_err_status; + uint32_t pcie_uncorr_err_status; + + struct amdgpu_smn_reg_data smn_reg_values[]; +}; + +struct amdgpu_reg_state_pcie_v1_0 { + /* common_header.state_type must be AMDGPU_REG_STATE_TYPE_PCIE */ + struct amdgpu_reg_state_header common_header; + + struct amdgpu_regs_pcie_v1_0 pci_state_regs[]; +}; + +struct amdgpu_regs_usr_v1_0 { + struct amdgpu_reg_inst_header inst_header; + + struct amdgpu_smn_reg_data smn_reg_values[]; +}; + +struct amdgpu_reg_state_usr_v1_0 { + /* common_header.state_type must be AMDGPU_REG_STATE_TYPE_USR */ + struct amdgpu_reg_state_header common_header; + + struct amdgpu_regs_usr_v1_0 usr_state_regs[]; +}; + +static inline size_t amdgpu_reginst_size(uint16_t num_inst, size_t inst_size, + uint16_t num_regs) +{ + return num_inst * + (inst_size + num_regs * sizeof(struct amdgpu_smn_reg_data)); +} + +#define amdgpu_asic_get_reg_state_supported(adev) \ + ((adev)->asic_funcs->get_reg_state ? 1 : 0) + +#define amdgpu_asic_get_reg_state(adev, state, buf, size) \ + ((adev)->asic_funcs->get_reg_state ? \ + (adev)->asic_funcs->get_reg_state((adev), (state), (buf), \ + (size)) : \ + 0) + +#endif -- cgit v1.2.3 From af39e6f4d8032b101907cc2ac12a21a778da568d Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Fri, 6 Oct 2023 13:49:03 +0530 Subject: drm/amdgpu: Add reg_state sysfs attribute Add reg_state attribute to fetch the register snapshot of different IPs like XGMI, WAFL,PCIE and USR. To get a snapshot for a particular IP 1) Open the sysfs file 2) Seek to the offset as defined in amdgpu_sysfs_reg_offset 3) Read Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 62 ++++++++++++++++++++++++++ drivers/gpu/drm/amd/include/amdgpu_reg_state.h | 13 ++++++ 2 files changed, 75 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 5d3a83193115..f29d0faf956e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -162,6 +162,65 @@ static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev, static DEVICE_ATTR(pcie_replay_count, 0444, amdgpu_device_get_pcie_replay_count, NULL); +static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj, + struct bin_attribute *attr, char *buf, + loff_t ppos, size_t count) +{ + struct device *dev = kobj_to_dev(kobj); + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + ssize_t bytes_read; + + switch (ppos) { + case AMDGPU_SYS_REG_STATE_XGMI: + bytes_read = amdgpu_asic_get_reg_state( + adev, AMDGPU_REG_STATE_TYPE_XGMI, buf, count); + break; + case AMDGPU_SYS_REG_STATE_WAFL: + bytes_read = amdgpu_asic_get_reg_state( + adev, AMDGPU_REG_STATE_TYPE_WAFL, buf, count); + break; + case AMDGPU_SYS_REG_STATE_PCIE: + bytes_read = amdgpu_asic_get_reg_state( + adev, AMDGPU_REG_STATE_TYPE_PCIE, buf, count); + break; + case AMDGPU_SYS_REG_STATE_USR: + bytes_read = amdgpu_asic_get_reg_state( + adev, AMDGPU_REG_STATE_TYPE_USR, buf, count); + break; + case AMDGPU_SYS_REG_STATE_USR_1: + bytes_read = amdgpu_asic_get_reg_state( + adev, AMDGPU_REG_STATE_TYPE_USR_1, buf, count); + break; + default: + return -EINVAL; + } + + return bytes_read; +} + +BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL, + AMDGPU_SYS_REG_STATE_END); + +int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev) +{ + int ret; + + if (!amdgpu_asic_get_reg_state_supported(adev)) + return 0; + + ret = sysfs_create_bin_file(&adev->dev->kobj, &bin_attr_reg_state); + + return ret; +} + +void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev) +{ + if (!amdgpu_asic_get_reg_state_supported(adev)) + return; + sysfs_remove_bin_file(&adev->dev->kobj, &bin_attr_reg_state); +} + /** * DOC: board_info * @@ -4233,6 +4292,7 @@ fence_driver_init: "Could not create amdgpu board attributes\n"); amdgpu_fru_sysfs_init(adev); + amdgpu_reg_state_sysfs_init(adev); if (IS_ENABLED(CONFIG_PERF_EVENTS)) r = amdgpu_pmu_init(adev); @@ -4355,6 +4415,8 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes); amdgpu_fru_sysfs_fini(adev); + amdgpu_reg_state_sysfs_fini(adev); + /* disable ras feature must before hw fini */ amdgpu_ras_pre_fini(adev); diff --git a/drivers/gpu/drm/amd/include/amdgpu_reg_state.h b/drivers/gpu/drm/amd/include/amdgpu_reg_state.h index 3dbdb118db59..be519c8edf49 100644 --- a/drivers/gpu/drm/amd/include/amdgpu_reg_state.h +++ b/drivers/gpu/drm/amd/include/amdgpu_reg_state.h @@ -34,6 +34,15 @@ enum amdgpu_reg_state { AMDGPU_REG_STATE_TYPE_USR_1 = 5 }; +enum amdgpu_sysfs_reg_offset { + AMDGPU_SYS_REG_STATE_XGMI = 0x0000, + AMDGPU_SYS_REG_STATE_WAFL = 0x1000, + AMDGPU_SYS_REG_STATE_PCIE = 0x2000, + AMDGPU_SYS_REG_STATE_USR = 0x3000, + AMDGPU_SYS_REG_STATE_USR_1 = 0x4000, + AMDGPU_SYS_REG_STATE_END = 0x5000, +}; + struct amdgpu_reg_state_header { uint16_t structure_size; uint8_t format_revision; @@ -137,4 +146,8 @@ static inline size_t amdgpu_reginst_size(uint16_t num_inst, size_t inst_size, (size)) : \ 0) + +int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev); +void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev); + #endif -- cgit v1.2.3 From 081a6eda2b25092e1466f09eb46d829488b75730 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Fri, 6 Oct 2023 14:20:45 +0530 Subject: drm/amdgpu: Read aquavanjaram PCIE register state Add support to read aqua vanjaram PCIE register state Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c | 115 +++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/soc15.c | 1 + drivers/gpu/drm/amd/amdgpu/soc15.h | 4 + 3 files changed, 120 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c index 3f715e7fe1a9..0a4598480dd5 100644 --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c @@ -24,6 +24,7 @@ #include "soc15.h" #include "soc15_common.h" +#include "amdgpu_reg_state.h" #include "amdgpu_xcp.h" #include "gfx_v9_4_3.h" #include "gfxhub_v1_2.h" @@ -656,3 +657,117 @@ int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev) return 0; } + +static void aqua_read_smn(struct amdgpu_device *adev, + struct amdgpu_smn_reg_data *regdata, + uint64_t smn_addr) +{ + regdata->addr = smn_addr; + regdata->value = RREG32_PCIE(smn_addr); +} + +struct aqua_reg_list { + uint64_t start_addr; + uint32_t num_regs; + uint32_t incrx; +}; + +#define DW_ADDR_INCR 4 + +#define smnreg_0x1A340218 0x1A340218 +#define smnreg_0x1A3402E4 0x1A3402E4 +#define smnreg_0x1A340294 0x1A340294 +#define smreg_0x1A380088 0x1A380088 + +#define NUM_PCIE_SMN_REGS 14 + +static struct aqua_reg_list pcie_reg_addrs[] = { + { smnreg_0x1A340218, 1, 0 }, + { smnreg_0x1A3402E4, 1, 0 }, + { smnreg_0x1A340294, 6, DW_ADDR_INCR }, + { smreg_0x1A380088, 6, DW_ADDR_INCR }, +}; + +static ssize_t aqua_vanjaram_read_pcie_state(struct amdgpu_device *adev, + void *buf, size_t max_size) +{ + struct amdgpu_reg_state_pcie_v1_0 *pcie_reg_state; + uint32_t start_addr, incrx, num_regs, szbuf; + struct amdgpu_regs_pcie_v1_0 *pcie_regs; + struct amdgpu_smn_reg_data *reg_data; + struct pci_dev *us_pdev, *ds_pdev; + int aer_cap, r, n; + + if (!buf || !max_size) + return -EINVAL; + + pcie_reg_state = (struct amdgpu_reg_state_pcie_v1_0 *)buf; + + szbuf = sizeof(*pcie_reg_state) + + amdgpu_reginst_size(1, sizeof(*pcie_regs), NUM_PCIE_SMN_REGS); + /* Only one instance of pcie regs */ + if (max_size < szbuf) + return -EOVERFLOW; + + pcie_regs = (struct amdgpu_regs_pcie_v1_0 *)((uint8_t *)buf + + sizeof(*pcie_reg_state)); + pcie_regs->inst_header.instance = 0; + pcie_regs->inst_header.state = AMDGPU_INST_S_OK; + pcie_regs->inst_header.num_smn_regs = NUM_PCIE_SMN_REGS; + + reg_data = pcie_regs->smn_reg_values; + + for (r = 0; r < ARRAY_SIZE(pcie_reg_addrs); r++) { + start_addr = pcie_reg_addrs[r].start_addr; + incrx = pcie_reg_addrs[r].incrx; + num_regs = pcie_reg_addrs[r].num_regs; + for (n = 0; n < num_regs; n++) { + aqua_read_smn(adev, reg_data, start_addr + n * incrx); + ++reg_data; + } + } + + ds_pdev = pci_upstream_bridge(adev->pdev); + us_pdev = pci_upstream_bridge(ds_pdev); + + pcie_capability_read_word(us_pdev, PCI_EXP_DEVSTA, + &pcie_regs->device_status); + pcie_capability_read_word(us_pdev, PCI_EXP_LNKSTA, + &pcie_regs->link_status); + + aer_cap = pci_find_ext_capability(us_pdev, PCI_EXT_CAP_ID_ERR); + if (aer_cap) { + pci_read_config_dword(us_pdev, aer_cap + PCI_ERR_COR_STATUS, + &pcie_regs->pcie_corr_err_status); + pci_read_config_dword(us_pdev, aer_cap + PCI_ERR_UNCOR_STATUS, + &pcie_regs->pcie_uncorr_err_status); + } + + pci_read_config_dword(us_pdev, PCI_PRIMARY_BUS, + &pcie_regs->sub_bus_number_latency); + + pcie_reg_state->common_header.structure_size = szbuf; + pcie_reg_state->common_header.format_revision = 1; + pcie_reg_state->common_header.content_revision = 0; + pcie_reg_state->common_header.state_type = AMDGPU_REG_STATE_TYPE_PCIE; + pcie_reg_state->common_header.num_instances = 1; + + return pcie_reg_state->common_header.structure_size; +} + +ssize_t aqua_vanjaram_get_reg_state(struct amdgpu_device *adev, + enum amdgpu_reg_state reg_state, void *buf, + size_t max_size) +{ + ssize_t size; + + switch (reg_state) { + case AMDGPU_REG_STATE_TYPE_PCIE: + size = aqua_vanjaram_read_pcie_state(adev, buf, max_size); + break; + default: + return -EINVAL; + } + + return size; +} diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index d4b8d62f4294..e3d41e8aac9d 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -902,6 +902,7 @@ static const struct amdgpu_asic_funcs aqua_vanjaram_asic_funcs = .pre_asic_init = &soc15_pre_asic_init, .query_video_codecs = &soc15_query_video_codecs, .encode_ext_smn_addressing = &aqua_vanjaram_encode_ext_smn_addressing, + .get_reg_state = &aqua_vanjaram_get_reg_state, }; static int soc15_common_early_init(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h index eac54042c6c0..1444b7765e4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h @@ -27,6 +27,7 @@ #include "nbio_v6_1.h" #include "nbio_v7_0.h" #include "nbio_v7_4.h" +#include "amdgpu_reg_state.h" extern const struct amdgpu_ip_block_version vega10_common_ip_block; @@ -114,6 +115,9 @@ int aldebaran_reg_base_init(struct amdgpu_device *adev); void aqua_vanjaram_ip_map_init(struct amdgpu_device *adev); u64 aqua_vanjaram_encode_ext_smn_addressing(int ext_id); int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev); +ssize_t aqua_vanjaram_get_reg_state(struct amdgpu_device *adev, + enum amdgpu_reg_state reg_state, void *buf, + size_t max_size); void vega10_doorbell_index_init(struct amdgpu_device *adev); void vega20_doorbell_index_init(struct amdgpu_device *adev); -- cgit v1.2.3 From 92e508eaf337d465f0574dda18d805bb4df138bc Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Sat, 7 Oct 2023 15:41:27 +0530 Subject: drm/amdgpu: Read aquavanjaram XGMI register state Add support to read state of XGMI links in aquavanjaram SOC. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c | 96 ++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c index 0a4598480dd5..a00b8c6f0a94 100644 --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c @@ -674,6 +674,15 @@ struct aqua_reg_list { #define DW_ADDR_INCR 4 +static void aqua_read_smn_ext(struct amdgpu_device *adev, + struct amdgpu_smn_reg_data *regdata, + uint64_t smn_addr, int i) +{ + regdata->addr = + smn_addr + adev->asic_funcs->encode_ext_smn_addressing(i); + regdata->value = RREG32_PCIE_EXT(regdata->addr); +} + #define smnreg_0x1A340218 0x1A340218 #define smnreg_0x1A3402E4 0x1A3402E4 #define smnreg_0x1A340294 0x1A340294 @@ -755,6 +764,90 @@ static ssize_t aqua_vanjaram_read_pcie_state(struct amdgpu_device *adev, return pcie_reg_state->common_header.structure_size; } +#define smnreg_0x11A00050 0x11A00050 +#define smnreg_0x11A00180 0x11A00180 +#define smnreg_0x11A00070 0x11A00070 +#define smnreg_0x11A00200 0x11A00200 +#define smnreg_0x11A0020C 0x11A0020C +#define smnreg_0x11A00210 0x11A00210 +#define smnreg_0x11A00108 0x11A00108 + +#define XGMI_LINK_REG(smnreg, l) ((smnreg) | (l << 20)) + +#define NUM_XGMI_SMN_REGS 25 + +static struct aqua_reg_list xgmi_reg_addrs[] = { + { smnreg_0x11A00050, 1, 0 }, + { smnreg_0x11A00180, 16, DW_ADDR_INCR }, + { smnreg_0x11A00070, 4, DW_ADDR_INCR }, + { smnreg_0x11A00200, 1, 0 }, + { smnreg_0x11A0020C, 1, 0 }, + { smnreg_0x11A00210, 1, 0 }, + { smnreg_0x11A00108, 1, 0 }, +}; + +static ssize_t aqua_vanjaram_read_xgmi_state(struct amdgpu_device *adev, + void *buf, size_t max_size) +{ + struct amdgpu_reg_state_xgmi_v1_0 *xgmi_reg_state; + uint32_t start_addr, incrx, num_regs, szbuf; + struct amdgpu_regs_xgmi_v1_0 *xgmi_regs; + struct amdgpu_smn_reg_data *reg_data; + const int max_xgmi_instances = 8; + int inst = 0, i, j, r, n; + const int xgmi_inst = 2; + void *p; + + if (!buf || !max_size) + return -EINVAL; + + xgmi_reg_state = (struct amdgpu_reg_state_xgmi_v1_0 *)buf; + + szbuf = sizeof(*xgmi_reg_state) + + amdgpu_reginst_size(max_xgmi_instances, sizeof(*xgmi_regs), + NUM_XGMI_SMN_REGS); + /* Only one instance of pcie regs */ + if (max_size < szbuf) + return -EOVERFLOW; + + p = &xgmi_reg_state->xgmi_state_regs[0]; + for_each_inst(i, adev->aid_mask) { + for (j = 0; j < xgmi_inst; ++j) { + xgmi_regs = (struct amdgpu_regs_xgmi_v1_0 *)p; + xgmi_regs->inst_header.instance = inst++; + + xgmi_regs->inst_header.state = AMDGPU_INST_S_OK; + xgmi_regs->inst_header.num_smn_regs = NUM_XGMI_SMN_REGS; + + reg_data = xgmi_regs->smn_reg_values; + + for (r = 0; r < ARRAY_SIZE(xgmi_reg_addrs); r++) { + start_addr = xgmi_reg_addrs[r].start_addr; + incrx = xgmi_reg_addrs[r].incrx; + num_regs = xgmi_reg_addrs[r].num_regs; + + for (n = 0; n < num_regs; n++) { + aqua_read_smn_ext( + adev, reg_data, + XGMI_LINK_REG(start_addr, j) + + n * incrx, + i); + ++reg_data; + } + } + p = reg_data; + } + } + + xgmi_reg_state->common_header.structure_size = szbuf; + xgmi_reg_state->common_header.format_revision = 1; + xgmi_reg_state->common_header.content_revision = 0; + xgmi_reg_state->common_header.state_type = AMDGPU_REG_STATE_TYPE_XGMI; + xgmi_reg_state->common_header.num_instances = max_xgmi_instances; + + return xgmi_reg_state->common_header.structure_size; +} + ssize_t aqua_vanjaram_get_reg_state(struct amdgpu_device *adev, enum amdgpu_reg_state reg_state, void *buf, size_t max_size) @@ -765,6 +858,9 @@ ssize_t aqua_vanjaram_get_reg_state(struct amdgpu_device *adev, case AMDGPU_REG_STATE_TYPE_PCIE: size = aqua_vanjaram_read_pcie_state(adev, buf, max_size); break; + case AMDGPU_REG_STATE_TYPE_XGMI: + size = aqua_vanjaram_read_xgmi_state(adev, buf, max_size); + break; default: return -EINVAL; } -- cgit v1.2.3 From 36fd9969fa53c40e8a58192714d9a3624cbe04e3 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Tue, 28 Nov 2023 16:47:14 +0530 Subject: drm/amdgpu: Use another offset for GC 9.4.3 remap The legacy region at 0x7F000 maps to valid registers in GC 9.4.3 SOCs. Use 0x1A000 offset instead as MMIO register remap region. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc15.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index e3d41e8aac9d..9ad4d6d3122b 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1162,6 +1162,11 @@ static int soc15_common_early_init(void *handle) AMD_PG_SUPPORT_VCN_DPG | AMD_PG_SUPPORT_JPEG; adev->external_rev_id = adev->rev_id + 0x46; + /* GC 9.4.3 uses MMIO register region hole at a different offset */ + if (!amdgpu_sriov_vf(adev)) { + adev->rmmio_remap.reg_offset = 0x1A000; + adev->rmmio_remap.bus_addr = adev->rmmio_base + 0x1A000; + } break; default: /* FIXME: not supported yet */ -- cgit v1.2.3 From 562f33836f519a235e5c5e71bcc723ab1faccd2f Mon Sep 17 00:00:00 2001 From: Clint Taylor Date: Tue, 28 Nov 2023 11:03:29 -0800 Subject: drm/i915/dgfx: DGFX uses direct VBT pin mapping DDC pin mapping for DGFX cards uses direct VBT pin mapping Cc: Lucas De Marchi Cc: Matt Roper Signed-off-by: Clint Taylor Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20231128190329.1335562-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/i915/display/intel_bios.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index 2fd72b2fd109..3e7e96acb24a 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -2201,6 +2201,9 @@ static u8 map_ddc_pin(struct drm_i915_private *i915, u8 vbt_pin) const u8 *ddc_pin_map; int i, n_entries; + if (IS_DGFX(i915)) + return vbt_pin; + if (INTEL_PCH_TYPE(i915) >= PCH_LNL || HAS_PCH_MTP(i915) || IS_ALDERLAKE_P(i915)) { ddc_pin_map = adlp_ddc_pin_map; @@ -2208,8 +2211,6 @@ static u8 map_ddc_pin(struct drm_i915_private *i915, u8 vbt_pin) } else if (IS_ALDERLAKE_S(i915)) { ddc_pin_map = adls_ddc_pin_map; n_entries = ARRAY_SIZE(adls_ddc_pin_map); - } else if (INTEL_PCH_TYPE(i915) >= PCH_DG1) { - return vbt_pin; } else if (IS_ROCKETLAKE(i915) && INTEL_PCH_TYPE(i915) == PCH_TGP) { ddc_pin_map = rkl_pch_tgp_ddc_pin_map; n_entries = ARRAY_SIZE(rkl_pch_tgp_ddc_pin_map); -- cgit v1.2.3 From 3c9ea68cb61bd7e5bd312c06a12adada74ff5805 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Mon, 6 Nov 2023 11:20:15 -0500 Subject: drm/amd/display: Include udelay when waiting for INBOX0 ACK When waiting for the ACK for INBOX0 message, we have to ensure to include the udelay for proper wait time Cc: stable@vger.kernel.org # 6.1+ Reviewed-by: Samson Tam Acked-by: Hamza Mahfooz Signed-off-by: Alvin Lee Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c index 22fc4ba96def..38360adc53d9 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c @@ -1077,6 +1077,7 @@ enum dmub_status dmub_srv_wait_for_inbox0_ack(struct dmub_srv *dmub, uint32_t ti ack = dmub->hw_funcs.read_inbox0_ack_register(dmub); if (ack) return DMUB_STATUS_OK; + udelay(1); } return DMUB_STATUS_TIMEOUT; } -- cgit v1.2.3 From c95f12b7b724abee5e8c3727db066c63c0876db3 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Fri, 3 Nov 2023 10:01:01 -0400 Subject: drm/amd/display: Add z-state support policy for dcn35 [Why] DML2 means that the dcn3x policy for calculating z-state support no longer runs from validate_bandwidth. This means we are unconditionally allowing Z8, the hardware default. [How] Port the policy over to DCN35, but with a few modifications: - Don't use min_dst_y_next_start as a check for Z8/Z10 allow - Add support for overriding the Z10 stutter period per ASIC - Cleanup the code to make the policy assignment more clear Reviewed-by: Charlene Liu Acked-by: Hamza Mahfooz Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 1 + .../gpu/drm/amd/display/dc/dcn35/dcn35_resource.c | 7 +++++ .../gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c | 34 ++++++++++++++++++++++ .../gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.h | 2 ++ 4 files changed, 44 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 9316b737a8ba..c4e5c3350b75 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -874,6 +874,7 @@ struct dc_debug_options { unsigned int seamless_boot_odm_combine; unsigned int force_odm_combine_4to1; //bit vector based on otg inst int minimum_z8_residency_time; + int minimum_z10_residency_time; bool disable_z9_mpc; unsigned int force_fclk_khz; bool enable_tri_buf; diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c index c7e011d26d41..8f1a9c959bb5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c @@ -1712,6 +1712,13 @@ static bool dcn35_validate_bandwidth(struct dc *dc, out = dml2_validate(dc, context, fast_validate); + if (fast_validate) + return out; + + DC_FP_START(); + dcn35_decide_zstate_support(dc, context); + DC_FP_END(); + return out; } diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c index a5fe523668e9..dee80429fc4c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c @@ -507,3 +507,37 @@ int dcn35_populate_dml_pipes_from_context_fpu(struct dc *dc, return pipe_cnt; } + +void dcn35_decide_zstate_support(struct dc *dc, struct dc_state *context) +{ + enum dcn_zstate_support_state support = DCN_ZSTATE_SUPPORT_DISALLOW; + unsigned int i, plane_count = 0; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + if (context->res_ctx.pipe_ctx[i].plane_state) + plane_count++; + } + + if (plane_count == 0) { + support = DCN_ZSTATE_SUPPORT_ALLOW; + } else if (plane_count == 1 && context->stream_count == 1 && context->streams[0]->signal == SIGNAL_TYPE_EDP) { + struct dc_link *link = context->streams[0]->sink->link; + bool is_pwrseq0 = link && link->link_index == 0; + bool is_psr1 = link && link->psr_settings.psr_version == DC_PSR_VERSION_1 && !link->panel_config.psr.disable_psr; + int minmum_z8_residency = + dc->debug.minimum_z8_residency_time > 0 ? dc->debug.minimum_z8_residency_time : 1000; + bool allow_z8 = context->bw_ctx.dml.vba.StutterPeriod > (double)minmum_z8_residency; + int minmum_z10_residency = + dc->debug.minimum_z10_residency_time > 0 ? dc->debug.minimum_z10_residency_time : 5000; + bool allow_z10 = context->bw_ctx.dml.vba.StutterPeriod > (double)minmum_z10_residency; + + if (is_pwrseq0 && allow_z10) + support = DCN_ZSTATE_SUPPORT_ALLOW; + else if (is_pwrseq0 && is_psr1) + support = allow_z8 ? DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY : DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY; + else if (allow_z8) + support = DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY; + } + + context->bw_ctx.bw.dcn.clk.zstate_support = support; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.h index e8d5a170893e..067480fc3691 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.h @@ -39,4 +39,6 @@ int dcn35_populate_dml_pipes_from_context_fpu(struct dc *dc, display_e2e_pipe_params_st *pipes, bool fast_validate); +void dcn35_decide_zstate_support(struct dc *dc, struct dc_state *context); + #endif -- cgit v1.2.3 From b9eab9e0aad3285651040e8ab86f64f6c4e51956 Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Thu, 16 Nov 2023 13:57:07 -0500 Subject: drm/amdgpu: update xgmi num links info post gc9.4.2 GC IP 9.4.2 and up support TA reporting of the number of xGMI links between peers. Tested-by: Vignesh Chander Signed-off-by: Jonathan Kim Reviewed-by: Mukul Joshi Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index b8412202a1b0..75dc58470393 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -547,7 +547,7 @@ int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct amdgpu_device *dst, struct amdgpu_device *adev = dst, *peer_adev; int num_links; - if (adev->asic_type != CHIP_ALDEBARAN) + if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 4, 2)) return 0; if (src) -- cgit v1.2.3 From fdf84f10e2b08b6e3cbfc672e5dd2cebf4317dea Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Mon, 6 Nov 2023 17:29:33 -0500 Subject: drm/amd/display: Update DCN35 watermarks [Why & How] Update to the new values per HW team request. Affects both stutter and z8. Reviewed-by: Charlene Liu Acked-by: Hamza Mahfooz Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- .../amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 32 +++++++++++----------- .../gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c | 8 +++--- 2 files changed, 20 insertions(+), 20 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 507a7cf56711..3469f692d6ea 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -443,32 +443,32 @@ static struct wm_table ddr5_wm_table = { .wm_inst = WM_A, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.72, - .sr_exit_time_us = 9, - .sr_enter_plus_exit_time_us = 11, + .sr_exit_time_us = 14.0, + .sr_enter_plus_exit_time_us = 16.0, .valid = true, }, { .wm_inst = WM_B, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.72, - .sr_exit_time_us = 9, - .sr_enter_plus_exit_time_us = 11, + .sr_exit_time_us = 14.0, + .sr_enter_plus_exit_time_us = 16.0, .valid = true, }, { .wm_inst = WM_C, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.72, - .sr_exit_time_us = 9, - .sr_enter_plus_exit_time_us = 11, + .sr_exit_time_us = 14.0, + .sr_enter_plus_exit_time_us = 16.0, .valid = true, }, { .wm_inst = WM_D, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.72, - .sr_exit_time_us = 9, - .sr_enter_plus_exit_time_us = 11, + .sr_exit_time_us = 14.0, + .sr_enter_plus_exit_time_us = 16.0, .valid = true, }, } @@ -480,32 +480,32 @@ static struct wm_table lpddr5_wm_table = { .wm_inst = WM_A, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.65333, - .sr_exit_time_us = 11.5, - .sr_enter_plus_exit_time_us = 14.5, + .sr_exit_time_us = 14.0, + .sr_enter_plus_exit_time_us = 16.0, .valid = true, }, { .wm_inst = WM_B, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.65333, - .sr_exit_time_us = 11.5, - .sr_enter_plus_exit_time_us = 14.5, + .sr_exit_time_us = 14.0, + .sr_enter_plus_exit_time_us = 16.0, .valid = true, }, { .wm_inst = WM_C, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.65333, - .sr_exit_time_us = 11.5, - .sr_enter_plus_exit_time_us = 14.5, + .sr_exit_time_us = 14.0, + .sr_enter_plus_exit_time_us = 16.0, .valid = true, }, { .wm_inst = WM_D, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.65333, - .sr_exit_time_us = 11.5, - .sr_enter_plus_exit_time_us = 14.5, + .sr_exit_time_us = 14.0, + .sr_enter_plus_exit_time_us = 16.0, .valid = true, }, } diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c index dee80429fc4c..30d78ad91b9c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c @@ -164,10 +164,10 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { }, }, .num_states = 5, - .sr_exit_time_us = 9.0, - .sr_enter_plus_exit_time_us = 11.0, - .sr_exit_z8_time_us = 50.0, /*changed from 442.0*/ - .sr_enter_plus_exit_z8_time_us = 50.0,/*changed from 560.0*/ + .sr_exit_time_us = 14.0, + .sr_enter_plus_exit_time_us = 16.0, + .sr_exit_z8_time_us = 525.0, + .sr_enter_plus_exit_z8_time_us = 715.0, .fclk_change_latency_us = 20.0, .usr_retraining_latency_us = 2, .writeback_latency_us = 12.0, -- cgit v1.2.3 From dc9b0c2af004fe7d9d7b67015fadcb0a7123c740 Mon Sep 17 00:00:00 2001 From: Wenjing Liu Date: Mon, 6 Nov 2023 16:47:19 -0500 Subject: drm/amd/display: fix a pipe mapping error in dcn32_fpu [why] In dcn32 DML pipes are ordered the same as dc pipes but only for used pipes. For example, if dc pipe 1 and 2 are used, their dml pipe indices would be 0 and 1 respectively. However update_pipe_slice_table_with_split_flags doesn't skip indices for free pipes. This causes us to not reference correct dml pipe output when building pipe topology. [how] Use two variables to iterate dc and dml pipes respectively and only increment dml pipe index when current dc pipe is not free. Cc: stable@vger.kernel.org # 6.1+ Reviewed-by: Chaitanya Dhere Acked-by: Hamza Mahfooz Signed-off-by: Wenjing Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 9ec4172d1c2d..44b0666e53b0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -1192,13 +1192,16 @@ static bool update_pipe_slice_table_with_split_flags( */ struct pipe_ctx *pipe; bool odm; - int i; + int dc_pipe_idx, dml_pipe_idx = 0; bool updated = false; - for (i = 0; i < dc->res_pool->pipe_count; i++) { - pipe = &context->res_ctx.pipe_ctx[i]; + for (dc_pipe_idx = 0; + dc_pipe_idx < dc->res_pool->pipe_count; dc_pipe_idx++) { + pipe = &context->res_ctx.pipe_ctx[dc_pipe_idx]; + if (resource_is_pipe_type(pipe, FREE_PIPE)) + continue; - if (merge[i]) { + if (merge[dc_pipe_idx]) { if (resource_is_pipe_type(pipe, OPP_HEAD)) /* merging OPP head means reducing ODM slice * count by 1 @@ -1213,17 +1216,18 @@ static bool update_pipe_slice_table_with_split_flags( updated = true; } - if (split[i]) { - odm = vba->ODMCombineEnabled[vba->pipe_plane[i]] != + if (split[dc_pipe_idx]) { + odm = vba->ODMCombineEnabled[vba->pipe_plane[dml_pipe_idx]] != dm_odm_combine_mode_disabled; if (odm && resource_is_pipe_type(pipe, OPP_HEAD)) update_slice_table_for_stream( - table, pipe->stream, split[i] - 1); + table, pipe->stream, split[dc_pipe_idx] - 1); else if (!odm && resource_is_pipe_type(pipe, DPP_PIPE)) update_slice_table_for_plane(table, pipe, - pipe->plane_state, split[i] - 1); + pipe->plane_state, split[dc_pipe_idx] - 1); updated = true; } + dml_pipe_idx++; } return updated; } -- cgit v1.2.3 From 8c4e9105b2a8ab4ac4e6eeb479951ba6a3b4e897 Mon Sep 17 00:00:00 2001 From: Perry Yuan Date: Tue, 1 Aug 2023 10:37:41 -0400 Subject: drm/amdgpu: optimize RLC powerdown notification on Vangogh The smu needs to get the rlc power down message to sync the rlc state with smu, the rlc state updating message need to be sent at while smu begin suspend sequence , otherwise SMU will crash while RLC state is not notified by driver, and rlc state probally changed after that notification, so it needs to notify rlc state to smu at the end of the suspend sequence in amdgpu_device_suspend() that can make sure the rlc state is correctly set to SMU. [ 101.000590] amdgpu 0000:03:00.0: amdgpu: SMU: I'm not done with your previous command: SMN_C2PMSG_66:0x0000001E SMN_C2PMSG_82:0x00000000 [ 101.000598] amdgpu 0000:03:00.0: amdgpu: Failed to disable gfxoff! [ 110.838026] amdgpu 0000:03:00.0: amdgpu: SMU: I'm not done with your previous command: SMN_C2PMSG_66:0x0000001E SMN_C2PMSG_82:0x00000000 [ 110.838035] amdgpu 0000:03:00.0: amdgpu: Failed to disable smu features. [ 110.838039] amdgpu 0000:03:00.0: amdgpu: Fail to disable dpm features! [ 110.838040] [drm:amdgpu_device_ip_suspend_phase2 [amdgpu]] *ERROR* suspend of IP block failed -62 [ 110.884394] PM: suspend of devices aborted after 21213.620 msecs [ 110.884402] PM: start suspend of devices aborted after 21213.882 msecs [ 110.884405] PM: Some devices failed to suspend, or early wake event detected Reviewed-by: Yifan Zhang Signed-off-by: Perry Yuan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++++ drivers/gpu/drm/amd/include/kgd_pp_interface.h | 1 + drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 18 ++++++++++++++++++ drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h | 2 ++ drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 10 ++++++++++ drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 5 +++++ drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 5 ++--- drivers/gpu/drm/amd/pm/swsmu/smu_internal.h | 1 + 8 files changed, 43 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 7eeaf0aa7f81..5c0817cbc7c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4538,6 +4538,10 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) if (amdgpu_sriov_vf(adev)) amdgpu_virt_release_full_gpu(adev, false); + r = amdgpu_dpm_notify_rlc_state(adev, false); + if (r) + return r; + return 0; } diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index cd3c40a86029..0d1209f2cf31 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -444,6 +444,7 @@ struct amd_pm_funcs { struct dpm_clocks *clock_table); int (*get_smu_prv_buf_details)(void *handle, void **addr, size_t *size); void (*pm_compute_clocks)(void *handle); + int (*notify_rlc_state)(void *handle, bool en); }; struct metrics_table_header { diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index 08cb79401410..8ec11da0319f 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -181,6 +181,24 @@ int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev, return ret; } +int amdgpu_dpm_notify_rlc_state(struct amdgpu_device *adev, bool en) +{ + int ret = 0; + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + + if (pp_funcs && pp_funcs->notify_rlc_state) { + mutex_lock(&adev->pm.mutex); + + ret = pp_funcs->notify_rlc_state( + adev->powerplay.pp_handle, + en); + + mutex_unlock(&adev->pm.mutex); + } + + return ret; +} + bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev) { const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h index feccd2a7120d..482ea30147ab 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h @@ -415,6 +415,8 @@ int amdgpu_dpm_mode1_reset(struct amdgpu_device *adev); int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev, enum pp_mp1_state mp1_state); +int amdgpu_dpm_notify_rlc_state(struct amdgpu_device *adev, bool en); + int amdgpu_dpm_set_gfx_power_up_by_imu(struct amdgpu_device *adev); int amdgpu_dpm_baco_exit(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 1ead323f1c78..e1a5ee911dbb 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -1710,6 +1710,16 @@ static int smu_disable_dpms(struct smu_context *smu) } } + /* Notify SMU RLC is going to be off, stop RLC and SMU interaction. + * otherwise SMU will hang while interacting with RLC if RLC is halted + * this is a WA for Vangogh asic which fix the SMU hang issue. + */ + ret = smu_notify_rlc_state(smu, false); + if (ret) { + dev_err(adev->dev, "Fail to notify rlc status!\n"); + return ret; + } + if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2) && !((adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs) && !amdgpu_sriov_vf(adev) && adev->gfx.rlc.funcs->stop) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index 8def291b18bc..23fa71cafb14 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -1360,6 +1360,11 @@ struct pptable_funcs { * management. */ int (*dpm_set_umsch_mm_enable)(struct smu_context *smu, bool enable); + + /** + * @notify_rlc_state: Notify RLC power state to SMU. + */ + int (*notify_rlc_state)(struct smu_context *smu, bool en); }; typedef enum { diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index 762b31455a0b..2ff6deedef95 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -2193,8 +2193,7 @@ static int vangogh_get_dpm_clock_table(struct smu_context *smu, struct dpm_clock return 0; } - -static int vangogh_system_features_control(struct smu_context *smu, bool en) +static int vangogh_notify_rlc_state(struct smu_context *smu, bool en) { struct amdgpu_device *adev = smu->adev; int ret = 0; @@ -2523,7 +2522,7 @@ static const struct pptable_funcs vangogh_ppt_funcs = { .print_clk_levels = vangogh_common_print_clk_levels, .set_default_dpm_table = vangogh_set_default_dpm_tables, .set_fine_grain_gfx_freq_parameters = vangogh_set_fine_grain_gfx_freq_parameters, - .system_features_control = vangogh_system_features_control, + .notify_rlc_state = vangogh_notify_rlc_state, .feature_is_enabled = smu_cmn_feature_is_enabled, .set_power_profile_mode = vangogh_set_power_profile_mode, .get_power_profile_mode = vangogh_get_power_profile_mode, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h index 80b3c3efc006..64766ac69c53 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h @@ -97,6 +97,7 @@ #define smu_get_default_config_table_settings(smu, config_table) smu_ppt_funcs(get_default_config_table_settings, -EOPNOTSUPP, smu, config_table) #define smu_set_config_table(smu, config_table) smu_ppt_funcs(set_config_table, -EOPNOTSUPP, smu, config_table) #define smu_init_pptable_microcode(smu) smu_ppt_funcs(init_pptable_microcode, 0, smu) +#define smu_notify_rlc_state(smu, en) smu_ppt_funcs(notify_rlc_state, 0, smu, en) #endif #endif -- cgit v1.2.3 From 0652a1c8a4a434a9766ca6bc52487c907df1864d Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Tue, 7 Nov 2023 11:12:45 -0500 Subject: drm/amd/display: Add Z8 watermarks for DML2 bbox overrides [Why] We can override SR watermarks but not Z8 ones. [How] Add new parameters for Z8 matching the SR ones and feed them into the states. These also weren't being applied to every state, so make sure that we loop over and update all SOC states if given an override. Reviewed-by: Jun Lei Acked-by: Hamza Mahfooz Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- .../amd/display/dc/dml2/dml2_translation_helper.c | 47 +++++++++++++++------- drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h | 2 + 2 files changed, 34 insertions(+), 15 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index 75171bee6f71..2b9638c6d9b0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -341,25 +341,42 @@ void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc, break; } - /* Override from passed values, mainly for debugging purposes, if available */ - if (dml2->config.bbox_overrides.sr_exit_latency_us) { - p->in_states->state_array[0].sr_exit_time_us = dml2->config.bbox_overrides.sr_exit_latency_us; - } + /* Override from passed values, if available */ + for (i = 0; i < p->in_states->num_states; i++) { + if (dml2->config.bbox_overrides.sr_exit_latency_us) { + p->in_states->state_array[i].sr_exit_time_us = + dml2->config.bbox_overrides.sr_exit_latency_us; + } - if (dml2->config.bbox_overrides.sr_enter_plus_exit_latency_us) { - p->in_states->state_array[0].sr_enter_plus_exit_time_us = dml2->config.bbox_overrides.sr_enter_plus_exit_latency_us; - } + if (dml2->config.bbox_overrides.sr_enter_plus_exit_latency_us) { + p->in_states->state_array[i].sr_enter_plus_exit_time_us = + dml2->config.bbox_overrides.sr_enter_plus_exit_latency_us; + } - if (dml2->config.bbox_overrides.urgent_latency_us) { - p->in_states->state_array[0].urgent_latency_pixel_data_only_us = dml2->config.bbox_overrides.urgent_latency_us; - } + if (dml2->config.bbox_overrides.sr_exit_z8_time_us) { + p->in_states->state_array[i].sr_exit_z8_time_us = + dml2->config.bbox_overrides.sr_exit_z8_time_us; + } - if (dml2->config.bbox_overrides.dram_clock_change_latency_us) { - p->in_states->state_array[0].dram_clock_change_latency_us = dml2->config.bbox_overrides.dram_clock_change_latency_us; - } + if (dml2->config.bbox_overrides.sr_enter_plus_exit_z8_time_us) { + p->in_states->state_array[i].sr_enter_plus_exit_z8_time_us = + dml2->config.bbox_overrides.sr_enter_plus_exit_z8_time_us; + } + + if (dml2->config.bbox_overrides.urgent_latency_us) { + p->in_states->state_array[i].urgent_latency_pixel_data_only_us = + dml2->config.bbox_overrides.urgent_latency_us; + } - if (dml2->config.bbox_overrides.fclk_change_latency_us) { - p->in_states->state_array[0].fclk_change_latency_us = dml2->config.bbox_overrides.fclk_change_latency_us; + if (dml2->config.bbox_overrides.dram_clock_change_latency_us) { + p->in_states->state_array[i].dram_clock_change_latency_us = + dml2->config.bbox_overrides.dram_clock_change_latency_us; + } + + if (dml2->config.bbox_overrides.fclk_change_latency_us) { + p->in_states->state_array[i].fclk_change_latency_us = + dml2->config.bbox_overrides.fclk_change_latency_us; + } } /* DCFCLK stas values are project specific */ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h index 317f90776d97..fe15baa4bf09 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h @@ -139,6 +139,8 @@ struct dml2_soc_bbox_overrides { double urgent_latency_us; double sr_exit_latency_us; double sr_enter_plus_exit_latency_us; + double sr_exit_z8_time_us; + double sr_enter_plus_exit_z8_time_us; double dram_clock_change_latency_us; double fclk_change_latency_us; unsigned int dram_num_chan; -- cgit v1.2.3 From 884e9b0827e889a8742e203ccd052101fb0b945d Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Mon, 20 Nov 2023 10:14:21 +0800 Subject: drm/amdgpu: Do not issue gpu reset from nbio v7_9 bif interrupt In nbio v7_9, host driver should not issu gpu reset Signed-off-by: Hawking Zhang Reviewed-by: Stanley Yang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c index 23f26f8caad4..25a3da83e0fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c @@ -611,11 +611,6 @@ static void nbio_v7_9_handle_ras_controller_intr_no_bifring(struct amdgpu_device dev_info(adev->dev, "RAS controller interrupt triggered " "by NBIF error\n"); - - /* ras_controller_int is dedicated for nbif ras error, - * not the global interrupt for sync flood - */ - amdgpu_ras_reset_gpu(adev); } amdgpu_ras_error_data_fini(&err_data); -- cgit v1.2.3 From 4b27a33c3b173bef1d19ba89e0b9b812b4fddd25 Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Mon, 20 Nov 2023 11:31:32 -0600 Subject: drm/amdgpu: Force order between a read and write to the same address Setting register to force ordering to prevent read/write or write/read hazards for un-cached modes. Signed-off-by: Alex Sierra Acked-by: Alex Deucher Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.1.x --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 8 ++++++++ drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_0_0_offset.h | 2 ++ 2 files changed, 10 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 0c6133cc5e57..40ce12323164 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -89,6 +89,10 @@ MODULE_FIRMWARE("amdgpu/gc_11_5_0_me.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_0_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_0_rlc.bin"); +static const struct soc15_reg_golden golden_settings_gc_11_0[] = { + SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL, 0x20000000, 0x20000000) +}; + static const struct soc15_reg_golden golden_settings_gc_11_0_1[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_GS_NGG_CLK_CTRL, 0x9fff8fff, 0x00000010), @@ -304,6 +308,10 @@ static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev) default: break; } + soc15_program_register_sequence(adev, + golden_settings_gc_11_0, + (const u32)ARRAY_SIZE(golden_settings_gc_11_0)); + } static void gfx_v11_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_0_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_0_0_offset.h index c92c4b83253f..4bff1ef8a9a6 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_0_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_0_0_offset.h @@ -6369,6 +6369,8 @@ #define regTCP_INVALIDATE_BASE_IDX 1 #define regTCP_STATUS 0x19a1 #define regTCP_STATUS_BASE_IDX 1 +#define regTCP_CNTL 0x19a2 +#define regTCP_CNTL_BASE_IDX 1 #define regTCP_CNTL2 0x19a3 #define regTCP_CNTL2_BASE_IDX 1 #define regTCP_DEBUG_INDEX 0x19a5 -- cgit v1.2.3 From cab667a87133d409ff18913fd53c2324803ea8d2 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Tue, 7 Nov 2023 11:15:16 -0500 Subject: drm/amd/display: Feed SR and Z8 watermarks into DML2 for DCN35 [Why] We've updated the table but the values aren't being reflected in DML2 calculation. [How] Pass them into the bbox overrides. Reviewed-by: Jun Lei Acked-by: Hamza Mahfooz Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c index 30d78ad91b9c..21c17d3296a3 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c @@ -329,6 +329,15 @@ void dcn35_update_bw_bounding_box_fpu(struct dc *dc, /*temp till dml2 fully work without dml1*/ dml_init_instance(&dc->dml, &dcn3_5_soc, &dcn3_5_ip, DML_PROJECT_DCN31); + + /* Update latency values */ + dc->dml2_options.bbox_overrides.dram_clock_change_latency_us = dcn3_5_soc.dram_clock_change_latency_us; + + dc->dml2_options.bbox_overrides.sr_exit_latency_us = dcn3_5_soc.sr_exit_time_us; + dc->dml2_options.bbox_overrides.sr_enter_plus_exit_latency_us = dcn3_5_soc.sr_enter_plus_exit_time_us; + + dc->dml2_options.bbox_overrides.sr_exit_z8_time_us = dcn3_5_soc.sr_exit_z8_time_us; + dc->dml2_options.bbox_overrides.sr_enter_plus_exit_z8_time_us = dcn3_5_soc.sr_enter_plus_exit_z8_time_us; } static bool is_dual_plane(enum surface_pixel_format format) -- cgit v1.2.3 From 6f395cebdd8927fbffdc3a55a14fcacf93634359 Mon Sep 17 00:00:00 2001 From: Ilya Bakoulin Date: Tue, 7 Nov 2023 15:07:56 -0500 Subject: drm/amd/display: Fix MPCC 1DLUT programming [Why] Wrong function is used to translate LUT values to HW format, leading to visible artifacting in some cases. [How] Use the correct cm3_helper function. Cc: stable@vger.kernel.org # 6.1+ Reviewed-by: Krunoslav Kovac Acked-by: Hamza Mahfooz Signed-off-by: Ilya Bakoulin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c index 6a65af8c36b9..5f7f474ef51c 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c @@ -487,8 +487,7 @@ bool dcn32_set_mcm_luts( if (plane_state->blend_tf->type == TF_TYPE_HWPWL) lut_params = &plane_state->blend_tf->pwl; else if (plane_state->blend_tf->type == TF_TYPE_DISTRIBUTED_POINTS) { - cm_helper_translate_curve_to_hw_format(plane_state->ctx, - plane_state->blend_tf, + cm3_helper_translate_curve_to_hw_format(plane_state->blend_tf, &dpp_base->regamma_params, false); lut_params = &dpp_base->regamma_params; } @@ -503,8 +502,7 @@ bool dcn32_set_mcm_luts( else if (plane_state->in_shaper_func->type == TF_TYPE_DISTRIBUTED_POINTS) { // TODO: dpp_base replace ASSERT(false); - cm_helper_translate_curve_to_hw_format(plane_state->ctx, - plane_state->in_shaper_func, + cm3_helper_translate_curve_to_hw_format(plane_state->in_shaper_func, &dpp_base->shaper_params, true); lut_params = &dpp_base->shaper_params; } -- cgit v1.2.3 From 9be601135ba8ac69880c01606c82140f2dde105e Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Tue, 7 Nov 2023 17:01:49 -0500 Subject: drm/amd/display: Use DRAM speed from validation for dummy p-state [Description] When choosing which dummy p-state latency to use, we need to use the DRAM speed from validation. The DRAMSpeed DML variable can change because we use different input params to DML when populating watermarks set B. Cc: stable@vger.kernel.org # 6.1+ Reviewed-by: Samson Tam Acked-by: Hamza Mahfooz Signed-off-by: Alvin Lee Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 44b0666e53b0..b46cde525066 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -2235,6 +2235,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, int i, pipe_idx, vlevel_temp = 0; double dcfclk = dcn3_2_soc.clock_limits[0].dcfclk_mhz; double dcfclk_from_validation = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; + double dram_speed_from_validation = context->bw_ctx.dml.vba.DRAMSpeed; double dcfclk_from_fw_based_mclk_switching = dcfclk_from_validation; bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] != dm_dram_clock_change_unsupported; @@ -2422,7 +2423,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, } if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) { - min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed; + min_dram_speed_mts = dram_speed_from_validation; min_dram_speed_mts_margin = 160; context->bw_ctx.dml.soc.dram_clock_change_latency_us = -- cgit v1.2.3 From 9976421f8cb26e22b611ad7036c8b26340dcce25 Mon Sep 17 00:00:00 2001 From: Michael Strauss Date: Fri, 27 Oct 2023 14:12:51 -0400 Subject: drm/amd/display: Do not read DPREFCLK spread info from LUT on DCN35 [WHY] Currently DCN35 does not spread DPREFCLK [HOW] Remove hardcoded table with nonzero caps Reviewed-by: Nicholas Kazlauskas Acked-by: Hamza Mahfooz Signed-off-by: Michael Strauss Signed-off-by: Alex Deucher --- .../amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 22 ---------------------- 1 file changed, 22 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 3469f692d6ea..0f3f6a9d5144 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -515,11 +515,6 @@ static DpmClocks_t_dcn35 dummy_clocks; static struct dcn35_watermarks dummy_wms = { 0 }; -static struct dcn35_ss_info_table ss_info_table = { - .ss_divider = 1000, - .ss_percentage = {0, 0, 375, 375, 375} -}; - static void dcn35_build_watermark_ranges(struct clk_bw_params *bw_params, struct dcn35_watermarks *table) { int i, num_valid_sets; @@ -965,21 +960,6 @@ struct clk_mgr_funcs dcn35_fpga_funcs = { .get_dtb_ref_clk_frequency = dcn31_get_dtb_ref_freq_khz, }; -static void dcn35_read_ss_info_from_lut(struct clk_mgr_internal *clk_mgr) -{ - uint32_t clock_source; - struct dc_context *ctx = clk_mgr->base.ctx; - - REG_GET(CLK1_CLK2_BYPASS_CNTL, CLK2_BYPASS_SEL, &clock_source); - - clk_mgr->dprefclk_ss_percentage = ss_info_table.ss_percentage[clock_source]; - - if (clk_mgr->dprefclk_ss_percentage != 0) { - clk_mgr->ss_on_dprefclk = true; - clk_mgr->dprefclk_ss_divider = ss_info_table.ss_divider; - } -} - void dcn35_clk_mgr_construct( struct dc_context *ctx, struct clk_mgr_dcn35 *clk_mgr, @@ -1052,8 +1032,6 @@ void dcn35_clk_mgr_construct( dce_clock_read_ss_info(&clk_mgr->base); /*when clk src is from FCH, it could have ss, same clock src as DPREF clk*/ - dcn35_read_ss_info_from_lut(&clk_mgr->base); - clk_mgr->base.base.bw_params = &dcn35_bw_params; if (clk_mgr->base.base.ctx->dc->debug.pstate_enabled) { -- cgit v1.2.3 From 67e38874b85b8df7b23d29f78ac3d7ecccd9519d Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Wed, 8 Nov 2023 17:16:28 -0500 Subject: drm/amd/display: Increase num voltage states to 40 [Description] If during driver init stage there are greater than 20 intermediary voltage states while constructing the SOC BB we could hit issues because we will index outside of the clock_limits array and start overwriting data. Increase the total number of states to 40 to avoid this issue. Cc: stable@vger.kernel.org # 6.1+ Reviewed-by: Samson Tam Acked-by: Hamza Mahfooz Signed-off-by: Alvin Lee Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dc_features.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml/dc_features.h b/drivers/gpu/drm/amd/display/dc/dml/dc_features.h index 2cbdd75429ff..6e669a2c5b2d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dc_features.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dc_features.h @@ -36,7 +36,7 @@ * Define the maximum amount of states supported by the ASIC. Every ASIC has a * specific number of states; this macro defines the maximum number of states. */ -#define DC__VOLTAGE_STATES 20 +#define DC__VOLTAGE_STATES 40 #define DC__NUM_DPP__4 1 #define DC__NUM_DPP__0_PRESENT 1 #define DC__NUM_DPP__1_PRESENT 1 -- cgit v1.2.3 From d9e865826c202b262f9ee3f17a03cc4ac5d44ced Mon Sep 17 00:00:00 2001 From: Camille Cho Date: Fri, 3 Nov 2023 12:08:42 +0800 Subject: drm/amd/display: Simplify brightness initialization [Why] Remove the brightness cache in DC. It uses a single value to represent the brightness for both SDR and HDR mode. This leads to flash in HDR on/off. It also unconditionally programs brightness as in HDR mode. This may introduce garbage on SDR mode in miniLED panel. [How] Simplify the initialization flow by removing the DC cache and taking what panel has as default. Expand the mechanism for PWM to DPCD Aux to restore cached brightness value generally. Cc: stable@vger.kernel.org # 6.1+ Reviewed-by: Krunoslav Kovac Acked-by: Hamza Mahfooz Signed-off-by: Camille Cho Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 1 - drivers/gpu/drm/amd/display/dc/dc_types.h | 4 ---- drivers/gpu/drm/amd/display/dc/link/link_detection.c | 2 +- drivers/gpu/drm/amd/display/dc/link/link_dpms.c | 3 +-- .../display/dc/link/protocols/link_edp_panel_control.c | 16 +++------------- .../display/dc/link/protocols/link_edp_panel_control.h | 1 - 6 files changed, 5 insertions(+), 22 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index c4e5c3350b75..2cafd644baff 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -1609,7 +1609,6 @@ struct dc_link { enum edp_revision edp_revision; union dpcd_sink_ext_caps dpcd_sink_ext_caps; - struct backlight_settings backlight_settings; struct psr_settings psr_settings; struct replay_settings replay_settings; diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index fcb825e4f1bb..35d146217aef 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -991,10 +991,6 @@ struct link_mst_stream_allocation_table { struct link_mst_stream_allocation stream_allocations[MAX_CONTROLLER_NUM]; }; -struct backlight_settings { - uint32_t backlight_millinits; -}; - /* PSR feature flags */ struct psr_settings { bool psr_feature_enabled; // PSR is supported by sink diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.c b/drivers/gpu/drm/amd/display/dc/link/link_detection.c index f2fe523f914f..24153b0df503 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_detection.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.c @@ -879,7 +879,7 @@ static bool detect_link_and_local_sink(struct dc_link *link, (link->dpcd_sink_ext_caps.bits.oled == 1)) { dpcd_set_source_specific_data(link); msleep(post_oui_delay); - set_cached_brightness_aux(link); + set_default_brightness_aux(link); } return true; diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c index 34a4a8c0e18c..f8e01ca09d96 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c @@ -2142,8 +2142,7 @@ static enum dc_status enable_link_dp(struct dc_state *state, if (link->dpcd_sink_ext_caps.bits.oled == 1 || link->dpcd_sink_ext_caps.bits.sdr_aux_backlight_control == 1 || link->dpcd_sink_ext_caps.bits.hdr_aux_backlight_control == 1) { - set_cached_brightness_aux(link); - + set_default_brightness_aux(link); if (link->dpcd_sink_ext_caps.bits.oled == 1) msleep(bl_oled_enable_delay); edp_backlight_enable_aux(link, true); diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c index e32a7974a4bc..996e4ee99023 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c @@ -170,7 +170,6 @@ bool edp_set_backlight_level_nits(struct dc_link *link, *(uint32_t *)&dpcd_backlight_set.backlight_level_millinits = backlight_millinits; *(uint16_t *)&dpcd_backlight_set.backlight_transition_time_ms = (uint16_t)transition_time_in_ms; - link->backlight_settings.backlight_millinits = backlight_millinits; if (!link->dpcd_caps.panel_luminance_control) { if (core_link_write_dpcd(link, DP_SOURCE_BACKLIGHT_LEVEL, @@ -288,9 +287,9 @@ bool set_default_brightness_aux(struct dc_link *link) if (link && link->dpcd_sink_ext_caps.bits.oled == 1) { if (!read_default_bl_aux(link, &default_backlight)) default_backlight = 150000; - // if < 1 nits or > 5000, it might be wrong readback - if (default_backlight < 1000 || default_backlight > 5000000) - default_backlight = 150000; // + // if > 5000, it might be wrong readback + if (default_backlight > 5000000) + default_backlight = 150000; return edp_set_backlight_level_nits(link, true, default_backlight, 0); @@ -298,15 +297,6 @@ bool set_default_brightness_aux(struct dc_link *link) return false; } -bool set_cached_brightness_aux(struct dc_link *link) -{ - if (link->backlight_settings.backlight_millinits) - return edp_set_backlight_level_nits(link, true, - link->backlight_settings.backlight_millinits, 0); - else - return set_default_brightness_aux(link); - return false; -} bool edp_is_ilr_optimization_enabled(struct dc_link *link) { if (link->dpcd_caps.edp_supported_link_rates_count == 0 || !link->panel_config.ilr.optimize_edp_link_rate) diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h index ebf7deb63d13..a034288ad75d 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h @@ -30,7 +30,6 @@ enum dp_panel_mode dp_get_panel_mode(struct dc_link *link); void dp_set_panel_mode(struct dc_link *link, enum dp_panel_mode panel_mode); bool set_default_brightness_aux(struct dc_link *link); -bool set_cached_brightness_aux(struct dc_link *link); void edp_panel_backlight_power_on(struct dc_link *link, bool wait_for_hpd); int edp_get_backlight_level(const struct dc_link *link); bool edp_get_backlight_level_nits(struct dc_link *link, -- cgit v1.2.3 From 5c908a3586492d469aef4f60f74f5298b7cb68af Mon Sep 17 00:00:00 2001 From: Li Ma Date: Tue, 21 Nov 2023 16:54:59 +0800 Subject: drm/amdgpu: add init_registers for nbio v7.11 enable init_registers callback func for nbio v7.11. Signed-off-by: Li Ma Reviewed-by: Yifan Zhang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c | 18 +++++++------- .../amd/include/asic_reg/nbio/nbio_7_11_0_offset.h | 2 ++ .../include/asic_reg/nbio/nbio_7_11_0_sh_mask.h | 29 ++++++++++++++++++++++ 3 files changed, 40 insertions(+), 9 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c index 676ab1d20d2f..1f52b4b1db03 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c @@ -259,17 +259,17 @@ const struct nbio_hdp_flush_reg nbio_v7_11_hdp_flush_reg = { static void nbio_v7_11_init_registers(struct amdgpu_device *adev) { -/* uint32_t def, data; + uint32_t def, data; + + def = data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3); + data = REG_SET_FIELD(data, BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3, + CI_SWUS_MAX_READ_REQUEST_SIZE_MODE, 1); + data = REG_SET_FIELD(data, BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3, + CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV, 1); - def = data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3); - data = REG_SET_FIELD(data, BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3, - CI_SWUS_MAX_READ_REQUEST_SIZE_MODE, 1); - data = REG_SET_FIELD(data, BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3, - CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV, 1); + if (def != data) + WREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3, data); - if (def != data) - WREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3, data); -*/ } static void nbio_v7_11_update_medium_grain_clock_gating(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h index ff30f04be591..7ee3d291120d 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h @@ -781,6 +781,8 @@ #define regBIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2_BASE_IDX 5 #define regBIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1 0x420187 #define regBIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1_BASE_IDX 5 +#define regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3 0x4201c6 +#define regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3_BASE_IDX 5 // addressBlock: nbio_nbif0_bif_cfg_dev0_rc_bifcfgdecp diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_sh_mask.h index 7f131999a263..eb8c556d9c93 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_sh_mask.h @@ -24646,6 +24646,35 @@ //BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1 #define BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK 0x00000001L #define BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK 0x00000008L +//BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3 +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_MAX_PAYLOAD_SIZE_MODE__SHIFT 0x8 +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_PRIV_MAX_PAYLOAD_SIZE__SHIFT 0x9 +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_10BIT_TAG_EN_OVERRIDE__SHIFT 0xb +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_10BIT_TAG_EN_OVERRIDE__SHIFT 0xd +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__MST_DROP_SYNC_FLOOD_EN__SHIFT 0xf +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_MAX_PAYLOAD_SIZE_MODE__SHIFT 0x10 +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_PRIV_MAX_PAYLOAD_SIZE__SHIFT 0x11 +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_MAX_READ_REQUEST_SIZE_MODE__SHIFT 0x14 +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_PRIV_MAX_READ_REQUEST_SIZE__SHIFT 0x15 +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_MAX_READ_SAFE_MODE__SHIFT 0x18 +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_EXTENDED_TAG_EN_OVERRIDE__SHIFT 0x19 +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_MAX_READ_REQUEST_SIZE_MODE__SHIFT 0x1b +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV__SHIFT 0x1c +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_EXTENDED_TAG_EN_OVERRIDE__SHIFT 0x1e +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_MAX_PAYLOAD_SIZE_MODE_MASK 0x00000100L +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_PRIV_MAX_PAYLOAD_SIZE_MASK 0x00000600L +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_10BIT_TAG_EN_OVERRIDE_MASK 0x00001800L +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_10BIT_TAG_EN_OVERRIDE_MASK 0x00006000L +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__MST_DROP_SYNC_FLOOD_EN_MASK 0x00008000L +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_MAX_PAYLOAD_SIZE_MODE_MASK 0x00010000L +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_PRIV_MAX_PAYLOAD_SIZE_MASK 0x000E0000L +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_MAX_READ_REQUEST_SIZE_MODE_MASK 0x00100000L +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_PRIV_MAX_READ_REQUEST_SIZE_MASK 0x00E00000L +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_MAX_READ_SAFE_MODE_MASK 0x01000000L +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_EXTENDED_TAG_EN_OVERRIDE_MASK 0x06000000L +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_MAX_READ_REQUEST_SIZE_MODE_MASK 0x08000000L +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV_MASK 0x30000000L +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_EXTENDED_TAG_EN_OVERRIDE_MASK 0xC0000000L // addressBlock: nbio_nbif0_bif_cfg_dev0_rc_bifcfgdecp //BIF_CFG_DEV0_RC0_VENDOR_ID -- cgit v1.2.3 From 6b0b7789a7a5f3e69185449f891beea58e563f9b Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Tue, 21 Nov 2023 11:06:51 +0800 Subject: drm/amdgpu: fix memory overflow in the IB test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a memory overflow issue in the gfx IB test for some ASICs. At least 20 bytes are needed for the IB test packet. v2: correct code indentation errors. (Christian) Signed-off-by: Tim Huang Reviewed-by: Yifan Zhang Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 40ce12323164..8ed4a6fb147a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -427,7 +427,7 @@ static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); cpu_ptr = &adev->wb.wb[index]; - r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib); + r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); if (r) { DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); goto err1; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 885ebd703260..1943beb135c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -883,8 +883,8 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) gpu_addr = adev->wb.gpu_addr + (index * 4); adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); memset(&ib, 0, sizeof(ib)); - r = amdgpu_ib_get(adev, NULL, 16, - AMDGPU_IB_POOL_DIRECT, &ib); + + r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); if (r) goto err1; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index e3ff6e46f3f7..69c500910746 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1039,8 +1039,8 @@ static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) gpu_addr = adev->wb.gpu_addr + (index * 4); adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); memset(&ib, 0, sizeof(ib)); - r = amdgpu_ib_get(adev, NULL, 16, - AMDGPU_IB_POOL_DIRECT, &ib); + + r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); if (r) goto err1; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 40d06d32bb74..4a09cc0d8ce0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -297,8 +297,8 @@ static int gfx_v9_4_3_ring_test_ib(struct amdgpu_ring *ring, long timeout) gpu_addr = adev->wb.gpu_addr + (index * 4); adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); memset(&ib, 0, sizeof(ib)); - r = amdgpu_ib_get(adev, NULL, 16, - AMDGPU_IB_POOL_DIRECT, &ib); + + r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); if (r) goto err1; -- cgit v1.2.3 From 0eec708ec3c2cb4076cd239605eb6d51e7c23e77 Mon Sep 17 00:00:00 2001 From: Alan Previn Date: Wed, 22 Nov 2023 11:15:23 -0800 Subject: drm/i915/pxp: Add drm_dbgs for critical PXP events. Debugging PXP issues can't even begin without understanding precedding sequence of important events. Add drm_dbg into the most important PXP events. v5 : - rebase. v4 : - rebase. v3 : - move gt_dbg to after mutex block in function i915_gsc_proxy_component_bind. (Vivaik) v2 : - remove __func__ since drm_dbg covers that (Jani). - add timeout dbg of the restart from front-end (Alan). Signed-off-by: Alan Previn Reviewed-by: Vivaik Balasubrawmanian Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20231122191523.58379-1-alan.previn.teres.alexis@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c | 2 ++ drivers/gpu/drm/i915/pxp/intel_pxp.c | 15 ++++++++++++--- drivers/gpu/drm/i915/pxp/intel_pxp_irq.c | 5 +++-- drivers/gpu/drm/i915/pxp/intel_pxp_session.c | 6 +++++- drivers/gpu/drm/i915/pxp/intel_pxp_types.h | 1 + 5 files changed, 23 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c index 5f138de3c14f..40817ebcca71 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c @@ -322,6 +322,7 @@ static int i915_gsc_proxy_component_bind(struct device *i915_kdev, gsc->proxy.component = data; gsc->proxy.component->mei_dev = mei_kdev; mutex_unlock(&gsc->proxy.mutex); + gt_dbg(gt, "GSC proxy mei component bound\n"); return 0; } @@ -342,6 +343,7 @@ static void i915_gsc_proxy_component_unbind(struct device *i915_kdev, with_intel_runtime_pm(&i915->runtime_pm, wakeref) intel_uncore_rmw(gt->uncore, HECI_H_CSR(MTL_GSC_HECI2_BASE), HECI_H_CSR_IE | HECI_H_CSR_RST, 0); + gt_dbg(gt, "GSC proxy mei component unbound\n"); } static const struct component_ops i915_gsc_proxy_component_ops = { diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.c b/drivers/gpu/drm/i915/pxp/intel_pxp.c index dc327cf40b5a..e11f562b1876 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp.c @@ -303,6 +303,8 @@ static int __pxp_global_teardown_final(struct intel_pxp *pxp) if (!pxp->arb_is_valid) return 0; + + drm_dbg(&pxp->ctrl_gt->i915->drm, "PXP: teardown for suspend/fini"); /* * To ensure synchronous and coherent session teardown completion * in response to suspend or shutdown triggers, don't use a worker. @@ -324,6 +326,8 @@ static int __pxp_global_teardown_restart(struct intel_pxp *pxp) if (pxp->arb_is_valid) return 0; + + drm_dbg(&pxp->ctrl_gt->i915->drm, "PXP: teardown for restart"); /* * The arb-session is currently inactive and we are doing a reset and restart * due to a runtime event. Use the worker that was designed for this. @@ -332,8 +336,11 @@ static int __pxp_global_teardown_restart(struct intel_pxp *pxp) timeout = intel_pxp_get_backend_timeout_ms(pxp); - if (!wait_for_completion_timeout(&pxp->termination, msecs_to_jiffies(timeout))) + if (!wait_for_completion_timeout(&pxp->termination, msecs_to_jiffies(timeout))) { + drm_dbg(&pxp->ctrl_gt->i915->drm, "PXP: restart backend timed out (%d ms)", + timeout); return -ETIMEDOUT; + } return 0; } @@ -414,10 +421,12 @@ int intel_pxp_start(struct intel_pxp *pxp) int ret = 0; ret = intel_pxp_get_readiness_status(pxp, PXP_READINESS_TIMEOUT); - if (ret < 0) + if (ret < 0) { + drm_dbg(&pxp->ctrl_gt->i915->drm, "PXP: tried but not-avail (%d)", ret); return ret; - else if (ret > 1) + } else if (ret > 1) { return -EIO; /* per UAPI spec, user may retry later */ + } mutex_lock(&pxp->arb_mutex); diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_irq.c b/drivers/gpu/drm/i915/pxp/intel_pxp_irq.c index 91e9622c07d0..d81750b9bdda 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_irq.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_irq.c @@ -40,11 +40,12 @@ void intel_pxp_irq_handler(struct intel_pxp *pxp, u16 iir) GEN12_DISPLAY_APP_TERMINATED_PER_FW_REQ_INTERRUPT)) { /* immediately mark PXP as inactive on termination */ intel_pxp_mark_termination_in_progress(pxp); - pxp->session_events |= PXP_TERMINATION_REQUEST | PXP_INVAL_REQUIRED; + pxp->session_events |= PXP_TERMINATION_REQUEST | PXP_INVAL_REQUIRED | + PXP_EVENT_TYPE_IRQ; } if (iir & GEN12_DISPLAY_STATE_RESET_COMPLETE_INTERRUPT) - pxp->session_events |= PXP_TERMINATION_COMPLETE; + pxp->session_events |= PXP_TERMINATION_COMPLETE | PXP_EVENT_TYPE_IRQ; if (pxp->session_events) queue_work(system_unbound_wq, &pxp->session_work); diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_session.c b/drivers/gpu/drm/i915/pxp/intel_pxp_session.c index 0a3e66b0265e..091c86e03d1a 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_session.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_session.c @@ -137,8 +137,10 @@ void intel_pxp_terminate(struct intel_pxp *pxp, bool post_invalidation_needs_res static void pxp_terminate_complete(struct intel_pxp *pxp) { /* Re-create the arb session after teardown handle complete */ - if (fetch_and_zero(&pxp->hw_state_invalidated)) + if (fetch_and_zero(&pxp->hw_state_invalidated)) { + drm_dbg(&pxp->ctrl_gt->i915->drm, "PXP: creating arb_session after invalidation"); pxp_create_arb_session(pxp); + } complete_all(&pxp->termination); } @@ -157,6 +159,8 @@ static void pxp_session_work(struct work_struct *work) if (!events) return; + drm_dbg(>->i915->drm, "PXP: processing event-flags 0x%08x", events); + if (events & PXP_INVAL_REQUIRED) intel_pxp_invalidate(pxp); diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h index 7e11fa8034b2..07864b584cf4 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h @@ -124,6 +124,7 @@ struct intel_pxp { #define PXP_TERMINATION_REQUEST BIT(0) #define PXP_TERMINATION_COMPLETE BIT(1) #define PXP_INVAL_REQUIRED BIT(2) +#define PXP_EVENT_TYPE_IRQ BIT(3) }; #endif /* __INTEL_PXP_TYPES_H__ */ -- cgit v1.2.3 From 08448812acb2ab701cd5ff7e1a1dc97f7f10260c Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Wed, 8 Nov 2023 10:55:53 -0500 Subject: drm/amd/display: Remove min_dst_y_next_start check for Z8 [Why] Flickering occurs on DRR supported panels when engaged in DRR due to min_dst_y_next becoming larger than the frame size itself. [How] In general, we should be able to enter Z8 when this is engaged but it might be a net power loss even if the calculation wasn't bugged. Don't support enabling Z8 during the DRR region. Cc: stable@vger.kernel.org # 6.1+ Reviewed-by: Syed Hassan Acked-by: Hamza Mahfooz Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c index 7fc8b18096ba..ec77b2b41ba3 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c @@ -950,10 +950,8 @@ static enum dcn_zstate_support_state decide_zstate_support(struct dc *dc, struc { int plane_count; int i; - unsigned int min_dst_y_next_start_us; plane_count = 0; - min_dst_y_next_start_us = 0; for (i = 0; i < dc->res_pool->pipe_count; i++) { if (context->res_ctx.pipe_ctx[i].plane_state) plane_count++; @@ -975,26 +973,15 @@ static enum dcn_zstate_support_state decide_zstate_support(struct dc *dc, struc else if (context->stream_count == 1 && context->streams[0]->signal == SIGNAL_TYPE_EDP) { struct dc_link *link = context->streams[0]->sink->link; struct dc_stream_status *stream_status = &context->stream_status[0]; - struct dc_stream_state *current_stream = context->streams[0]; int minmum_z8_residency = dc->debug.minimum_z8_residency_time > 0 ? dc->debug.minimum_z8_residency_time : 1000; bool allow_z8 = context->bw_ctx.dml.vba.StutterPeriod > (double)minmum_z8_residency; bool is_pwrseq0 = link->link_index == 0; - bool isFreesyncVideo; - - isFreesyncVideo = current_stream->adjust.v_total_min == current_stream->adjust.v_total_max; - isFreesyncVideo = isFreesyncVideo && current_stream->timing.v_total < current_stream->adjust.v_total_min; - for (i = 0; i < dc->res_pool->pipe_count; i++) { - if (context->res_ctx.pipe_ctx[i].stream == current_stream && isFreesyncVideo) { - min_dst_y_next_start_us = context->res_ctx.pipe_ctx[i].dlg_regs.min_dst_y_next_start_us; - break; - } - } /* Don't support multi-plane configurations */ if (stream_status->plane_count > 1) return DCN_ZSTATE_SUPPORT_DISALLOW; - if (is_pwrseq0 && (context->bw_ctx.dml.vba.StutterPeriod > 5000.0 || min_dst_y_next_start_us > 5000)) + if (is_pwrseq0 && context->bw_ctx.dml.vba.StutterPeriod > 5000.0) return DCN_ZSTATE_SUPPORT_ALLOW; else if (is_pwrseq0 && link->psr_settings.psr_version == DC_PSR_VERSION_1 && !link->panel_config.psr.disable_psr) return allow_z8 ? DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY : DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY; -- cgit v1.2.3 From 4636a211980052ca0df90265c8a3ed2d46099091 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Wed, 8 Nov 2023 10:59:00 -0500 Subject: drm/amd/display: Update min Z8 residency time to 2100 for DCN314 [Why] Some panels with residency period of 2054 exhibit flickering with Z8 at the end of the frame. [How] As a workaround, increase the limit to block these panels. Cc: stable@vger.kernel.org # 6.1+ Reviewed-by: Syed Hassan Acked-by: Hamza Mahfooz Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c index 677361d74a4e..c97391edb5ff 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c @@ -871,7 +871,7 @@ static const struct dc_plane_cap plane_cap = { static const struct dc_debug_options debug_defaults_drv = { .disable_z10 = false, .enable_z9_disable_interface = true, - .minimum_z8_residency_time = 2000, + .minimum_z8_residency_time = 2100, .psr_skip_crtc_disable = true, .replay_skip_crtc_disabled = true, .disable_dmcu = true, -- cgit v1.2.3 From c6df7f313794c3ad41a49b9a7c95da369db607f3 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Wed, 8 Nov 2023 14:38:29 +0800 Subject: drm/amdgpu: correct the amdgpu runtime dereference usage count Fix the amdgpu runpm dereference usage count. Signed-off-by: Prike Liang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 0cacd0b9f8be..b8fbe97efe1d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -340,14 +340,11 @@ int amdgpu_display_crtc_set_config(struct drm_mode_set *set, adev->have_disp_power_ref = true; return ret; } - /* if we have no active crtcs, then drop the power ref - * we got before + /* if we have no active crtcs, then go to + * drop the power ref we got before */ - if (!active && adev->have_disp_power_ref) { - pm_runtime_put_autosuspend(dev->dev); + if (!active && adev->have_disp_power_ref) adev->have_disp_power_ref = false; - } - out: /* drop the power reference we got coming in here */ pm_runtime_put_autosuspend(dev->dev); -- cgit v1.2.3 From eb28018943fed7639dfea1c9ec9c756ec692b99a Mon Sep 17 00:00:00 2001 From: Zhongwei Date: Wed, 8 Nov 2023 16:34:36 +0800 Subject: drm/amd/display: force toggle rate wa for first link training for a retimer [WHY] Handover from DMUB to driver does not perform link rate toggle. It might cause link training failure for boot up. [HOW] Force toggle rate wa for first link train. link->vendor_specific_lttpr_link_rate_wa should be zero then. Cc: stable@vger.kernel.org # 6.1+ Reviewed-by: Michael Strauss Acked-by: Hamza Mahfooz Signed-off-by: Zhongwei Signed-off-by: Alex Deucher --- .../dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c index fd8f6f198146..68096d12f52f 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c @@ -115,7 +115,7 @@ static enum link_training_result perform_fixed_vs_pe_nontransparent_training_seq lt_settings->cr_pattern_time = 16000; /* Fixed VS/PE specific: Toggle link rate */ - apply_toggle_rate_wa = (link->vendor_specific_lttpr_link_rate_wa == target_rate); + apply_toggle_rate_wa = ((link->vendor_specific_lttpr_link_rate_wa == target_rate) || (link->vendor_specific_lttpr_link_rate_wa == 0)); target_rate = get_dpcd_link_rate(<_settings->link_settings); toggle_rate = (target_rate == 0x6) ? 0xA : 0x6; @@ -271,7 +271,7 @@ enum link_training_result dp_perform_fixed_vs_pe_training_sequence_legacy( /* Vendor specific: Toggle link rate */ toggle_rate = (rate == 0x6) ? 0xA : 0x6; - if (link->vendor_specific_lttpr_link_rate_wa == rate) { + if (link->vendor_specific_lttpr_link_rate_wa == rate || link->vendor_specific_lttpr_link_rate_wa == 0) { core_link_write_dpcd( link, DP_LINK_BW_SET, @@ -617,7 +617,7 @@ enum link_training_result dp_perform_fixed_vs_pe_training_sequence( /* Vendor specific: Toggle link rate */ toggle_rate = (rate == 0x6) ? 0xA : 0x6; - if (link->vendor_specific_lttpr_link_rate_wa == rate) { + if (link->vendor_specific_lttpr_link_rate_wa == rate || link->vendor_specific_lttpr_link_rate_wa == 0) { core_link_write_dpcd( link, DP_LINK_BW_SET, -- cgit v1.2.3 From d60f56b92d3c59364a54618d557d7f9ba5939b21 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Fri, 3 Nov 2023 18:07:11 -0400 Subject: drm/amd/display: Update DCN35 clock table policy [Why] The new table doesn't have an implicit mapping between Fclk SOC voltage and MemClk and it currently builds the table off of number of Fclk states rather than DcfClock states. The DML table in use is not correct for functionality or power and does not align with our existing policies for DCN3x. [How] Build the table based on DcfClock with the following assumptions: 1. Raising Soc voltage is the most expensive operation, so assume that running at max DispClock or DppClock is preferable. 2. Assume that we can run at max Fclk / MemClk at any state, but restrict the maximum state to the very last entry in the table as the worst case scenario. 3. Assume that Fclk always has a 2x multiplier on DcfClock unless the table specifies something lower. Reviewed-by: Taimur Hassan Acked-by: Hamza Mahfooz Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- .../amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 77 ++++++++++++++-------- 1 file changed, 51 insertions(+), 26 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 0f3f6a9d5144..19f8d83698be 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -648,27 +648,47 @@ static unsigned int convert_wck_ratio(uint8_t wck_ratio) return 1; } +static inline uint32_t calc_dram_speed_mts(const MemPstateTable_t *entry) +{ + return entry->UClk * convert_wck_ratio(entry->WckRatio) * 2; +} + static void dcn35_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk_mgr, struct integrated_info *bios_info, DpmClocks_t_dcn35 *clock_table) { struct clk_bw_params *bw_params = clk_mgr->base.bw_params; struct clk_limit_table_entry def_max = bw_params->clk_table.entries[bw_params->clk_table.num_entries - 1]; - uint32_t max_pstate = 0, max_uclk = 0, max_fclk = 0; - uint32_t min_pstate = 0, max_dispclk = 0, max_dppclk = 0; + uint32_t max_fclk = 0, min_pstate = 0, max_dispclk = 0, max_dppclk = 0; + uint32_t max_pstate = 0, max_dram_speed_mts = 0, min_dram_speed_mts = 0; int i; + /* Determine min/max p-state values. */ for (i = 0; i < clock_table->NumMemPstatesEnabled; i++) { - if (is_valid_clock_value(clock_table->MemPstateTable[i].UClk) && - clock_table->MemPstateTable[i].UClk > max_uclk) { - max_uclk = clock_table->MemPstateTable[i].UClk; + uint32_t dram_speed_mts = calc_dram_speed_mts(&clock_table->MemPstateTable[i]); + + if (is_valid_clock_value(dram_speed_mts) && dram_speed_mts > max_dram_speed_mts) { + max_dram_speed_mts = dram_speed_mts; max_pstate = i; } } - /* We expect the table to contain at least one valid Uclk entry. */ - ASSERT(is_valid_clock_value(max_uclk)); + min_dram_speed_mts = max_dram_speed_mts; + min_pstate = max_pstate; + for (i = 0; i < clock_table->NumMemPstatesEnabled; i++) { + uint32_t dram_speed_mts = calc_dram_speed_mts(&clock_table->MemPstateTable[i]); + + if (is_valid_clock_value(dram_speed_mts) && dram_speed_mts < min_dram_speed_mts) { + min_dram_speed_mts = dram_speed_mts; + min_pstate = i; + } + } + + /* We expect the table to contain at least one valid P-state entry. */ + ASSERT(clock_table->NumMemPstatesEnabled && + is_valid_clock_value(max_dram_speed_mts) && + is_valid_clock_value(min_dram_speed_mts)); /* dispclk and dppclk can be max at any voltage, same number of levels for both */ if (clock_table->NumDispClkLevelsEnabled <= NUM_DISPCLK_DPM_LEVELS && @@ -678,47 +698,46 @@ static void dcn35_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk max_dppclk = find_max_clk_value(clock_table->DppClocks, clock_table->NumDispClkLevelsEnabled); } else { + /* Invalid number of entries in the table from PMFW. */ ASSERT(0); } - if (clock_table->NumFclkLevelsEnabled <= NUM_FCLK_DPM_LEVELS) - max_fclk = find_max_clk_value(clock_table->FclkClocks_Freq, - clock_table->NumFclkLevelsEnabled); - for (i = 0; i < clock_table->NumMemPstatesEnabled; i++) { - uint32_t min_uclk = clock_table->MemPstateTable[0].UClk; - int j; + /* Base the clock table on dcfclk, need at least one entry regardless of pmfw table */ + ASSERT(clock_table->NumDcfClkLevelsEnabled > 0); - for (j = 1; j < clock_table->NumMemPstatesEnabled; j++) { - if (is_valid_clock_value(clock_table->MemPstateTable[j].UClk) && - clock_table->MemPstateTable[j].UClk < min_uclk && - clock_table->MemPstateTable[j].Voltage <= clock_table->SocVoltage[i]) { - min_uclk = clock_table->MemPstateTable[j].UClk; - min_pstate = j; - } - } + max_fclk = find_max_clk_value(clock_table->FclkClocks_Freq, clock_table->NumFclkLevelsEnabled); + for (i = 0; i < clock_table->NumDcfClkLevelsEnabled; i++) { + int j; + + /* First search defaults for the clocks we don't read using closest lower or equal default dcfclk */ for (j = bw_params->clk_table.num_entries - 1; j > 0; j--) if (bw_params->clk_table.entries[j].dcfclk_mhz <= clock_table->DcfClocks[i]) - break; + break; bw_params->clk_table.entries[i].phyclk_mhz = bw_params->clk_table.entries[j].phyclk_mhz; bw_params->clk_table.entries[i].phyclk_d18_mhz = bw_params->clk_table.entries[j].phyclk_d18_mhz; bw_params->clk_table.entries[i].dtbclk_mhz = bw_params->clk_table.entries[j].dtbclk_mhz; - bw_params->clk_table.entries[i].fclk_mhz = max_fclk; + + /* Now update clocks we do read */ bw_params->clk_table.entries[i].memclk_mhz = clock_table->MemPstateTable[min_pstate].MemClk; bw_params->clk_table.entries[i].voltage = clock_table->MemPstateTable[min_pstate].Voltage; bw_params->clk_table.entries[i].dcfclk_mhz = clock_table->DcfClocks[i]; bw_params->clk_table.entries[i].socclk_mhz = clock_table->SocClocks[i]; bw_params->clk_table.entries[i].dispclk_mhz = max_dispclk; bw_params->clk_table.entries[i].dppclk_mhz = max_dppclk; - bw_params->clk_table.entries[i].wck_ratio = convert_wck_ratio( - clock_table->MemPstateTable[min_pstate].WckRatio); - } + bw_params->clk_table.entries[i].wck_ratio = + convert_wck_ratio(clock_table->MemPstateTable[min_pstate].WckRatio); + + /* Dcfclk and Fclk are tied, but at a different ratio */ + bw_params->clk_table.entries[i].fclk_mhz = min(max_fclk, 2 * clock_table->DcfClocks[i]); + } /* Make sure to include at least one entry at highest pstate */ if (max_pstate != min_pstate || i == 0) { if (i > MAX_NUM_DPM_LVL - 1) i = MAX_NUM_DPM_LVL - 1; + bw_params->clk_table.entries[i].fclk_mhz = max_fclk; bw_params->clk_table.entries[i].memclk_mhz = clock_table->MemPstateTable[max_pstate].MemClk; bw_params->clk_table.entries[i].voltage = clock_table->MemPstateTable[max_pstate].Voltage; @@ -734,6 +753,7 @@ static void dcn35_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk } bw_params->clk_table.num_entries = i--; + /* Make sure all highest clocks are included*/ bw_params->clk_table.entries[i].socclk_mhz = find_max_clk_value(clock_table->SocClocks, NUM_SOCCLK_DPM_LEVELS); bw_params->clk_table.entries[i].dispclk_mhz = @@ -752,6 +772,11 @@ static void dcn35_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk bw_params->clk_table.num_entries_per_clk.num_fclk_levels = clock_table->NumFclkLevelsEnabled; bw_params->clk_table.num_entries_per_clk.num_memclk_levels = clock_table->NumMemPstatesEnabled; bw_params->clk_table.num_entries_per_clk.num_socclk_levels = clock_table->NumSocClkLevelsEnabled; + + /* + * Set any 0 clocks to max default setting. Not an issue for + * power since we aren't doing switching in such case anyway + */ for (i = 0; i < bw_params->clk_table.num_entries; i++) { if (!bw_params->clk_table.entries[i].fclk_mhz) { bw_params->clk_table.entries[i].fclk_mhz = def_max.fclk_mhz; -- cgit v1.2.3 From c5ca994445a6c99012e70ed6f3550f07efa4c341 Mon Sep 17 00:00:00 2001 From: Taimur Hassan Date: Fri, 10 Nov 2023 10:06:09 -0500 Subject: drm/amd/display: Remove config update [Why] Prevent overwrite of dc->config.use_default_clock_table, as it should be pre-configured. Reviewed-by: Nicholas Kazlauskas Acked-by: Hamza Mahfooz Signed-off-by: Taimur Hassan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c index 8f1a9c959bb5..70ef1e7ff841 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c @@ -1864,7 +1864,7 @@ static bool dcn35_resource_construct( /* Use pipe context based otg sync logic */ dc->config.use_pipe_ctx_sync_logic = true; - dc->config.use_default_clock_table = false; + /* read VBIOS LTTPR caps */ { if (ctx->dc_bios->funcs->get_lttpr_caps) { -- cgit v1.2.3 From 2f86bf79b63dbe6963ebc647b77a5f576a906b40 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Wed, 22 Nov 2023 15:17:22 -0500 Subject: drm/amdkfd: Use common function for IP version check KFD_GC_VERSION was recently updated to use a new function for IP version checks. As a result, use KFD_GC_VERSION as the common function for all IP version checks in KFD. Signed-off-by: Mukul Joshi Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 9cc32f577e38..4c8e278a0d0c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -1128,7 +1128,7 @@ static inline struct kfd_node *kfd_node_by_irq_ids(struct amdgpu_device *adev, struct kfd_dev *dev = adev->kfd.dev; uint32_t i; - if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 3)) + if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3)) return dev->nodes[0]; for (i = 0; i < dev->num_nodes; i++) -- cgit v1.2.3 From dcf6cd7f35de572a946e2805ed32eb20d429a881 Mon Sep 17 00:00:00 2001 From: Taimur Hassan Date: Fri, 10 Nov 2023 10:15:28 -0500 Subject: drm/amd/display: Fix conversions between bytes and KB [Why] There are a number of instances where we convert HostVMMinPageSize or GPUVMMinPageSize from bytes to KB by dividing (rather than multiplying) and vice versa. Additionally, in some cases, a parameter is passed through DML in KB but later checked as if it were in bytes. Cc: stable@vger.kernel.org Reviewed-by: Nicholas Kazlauskas Acked-by: Hamza Mahfooz Signed-off-by: Taimur Hassan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c | 16 ++++++++-------- .../drm/amd/display/dc/dml2/dml2_translation_helper.c | 4 ++-- 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c index 510be909cd75..59718ee33e51 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c @@ -6329,7 +6329,7 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib) mode_lib->ms.NoOfDPPThisState, mode_lib->ms.dpte_group_bytes, s->HostVMInefficiencyFactor, - mode_lib->ms.soc.hostvm_min_page_size_kbytes, + mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels); s->NextMaxVStartup = s->MaxVStartupAllPlanes[j]; @@ -6542,7 +6542,7 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib) mode_lib->ms.cache_display_cfg.plane.HostVMEnable, mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels, mode_lib->ms.cache_display_cfg.plane.GPUVMEnable, - mode_lib->ms.soc.hostvm_min_page_size_kbytes, + mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k], mode_lib->ms.MetaRowBytes[j][k], mode_lib->ms.DPTEBytesPerRow[j][k], @@ -7687,7 +7687,7 @@ dml_bool_t dml_core_mode_support(struct display_mode_lib_st *mode_lib) CalculateVMRowAndSwath_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; CalculateVMRowAndSwath_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; CalculateVMRowAndSwath_params->GPUVMMinPageSizeKBytes = mode_lib->ms.cache_display_cfg.plane.GPUVMMinPageSizeKBytes; - CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes; + CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; CalculateVMRowAndSwath_params->PTEBufferModeOverrideEn = mode_lib->ms.cache_display_cfg.plane.PTEBufferModeOverrideEn; CalculateVMRowAndSwath_params->PTEBufferModeOverrideVal = mode_lib->ms.cache_display_cfg.plane.PTEBufferMode; CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = mode_lib->ms.PTEBufferSizeNotExceededPerState; @@ -7957,7 +7957,7 @@ dml_bool_t dml_core_mode_support(struct display_mode_lib_st *mode_lib) UseMinimumDCFCLK_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; UseMinimumDCFCLK_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable; UseMinimumDCFCLK_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; - UseMinimumDCFCLK_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes; + UseMinimumDCFCLK_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; UseMinimumDCFCLK_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; UseMinimumDCFCLK_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled; UseMinimumDCFCLK_params->ImmediateFlipRequirement = s->ImmediateFlipRequiredFinal; @@ -8699,7 +8699,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc CalculateVMRowAndSwath_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; CalculateVMRowAndSwath_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; CalculateVMRowAndSwath_params->GPUVMMinPageSizeKBytes = mode_lib->ms.cache_display_cfg.plane.GPUVMMinPageSizeKBytes; - CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes; + CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; CalculateVMRowAndSwath_params->PTEBufferModeOverrideEn = mode_lib->ms.cache_display_cfg.plane.PTEBufferModeOverrideEn; CalculateVMRowAndSwath_params->PTEBufferModeOverrideVal = mode_lib->ms.cache_display_cfg.plane.PTEBufferMode; CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = s->dummy_boolean_array[0]; @@ -8805,7 +8805,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc mode_lib->ms.cache_display_cfg.hw.DPPPerSurface, locals->dpte_group_bytes, s->HostVMInefficiencyFactor, - mode_lib->ms.soc.hostvm_min_page_size_kbytes, + mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels); locals->TCalc = 24.0 / locals->DCFCLKDeepSleep; @@ -8995,7 +8995,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc CalculatePrefetchSchedule_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable; CalculatePrefetchSchedule_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable; CalculatePrefetchSchedule_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; - CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes; + CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; CalculatePrefetchSchedule_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k]; CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled; CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired[k]; @@ -9240,7 +9240,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc mode_lib->ms.cache_display_cfg.plane.HostVMEnable, mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels, mode_lib->ms.cache_display_cfg.plane.GPUVMEnable, - mode_lib->ms.soc.hostvm_min_page_size_kbytes, + mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, locals->PDEAndMetaPTEBytesFrame[k], locals->MetaRowByte[k], locals->PixelPTEBytesPerRow[k], diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index 2b9638c6d9b0..48caa34a5ce7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -515,8 +515,8 @@ void dml2_translate_socbb_params(const struct dc *in, struct soc_bounding_box_st out->do_urgent_latency_adjustment = in_soc_params->do_urgent_latency_adjustment; out->dram_channel_width_bytes = (dml_uint_t)in_soc_params->dram_channel_width_bytes; out->fabric_datapath_to_dcn_data_return_bytes = (dml_uint_t)in_soc_params->fabric_datapath_to_dcn_data_return_bytes; - out->gpuvm_min_page_size_kbytes = in_soc_params->gpuvm_min_page_size_bytes * 1024; - out->hostvm_min_page_size_kbytes = in_soc_params->hostvm_min_page_size_bytes * 1024; + out->gpuvm_min_page_size_kbytes = in_soc_params->gpuvm_min_page_size_bytes / 1024; + out->hostvm_min_page_size_kbytes = in_soc_params->hostvm_min_page_size_bytes / 1024; out->mall_allocated_for_dcn_mbytes = (dml_uint_t)in_soc_params->mall_allocated_for_dcn_mbytes; out->max_avg_dram_bw_use_normal_percent = in_soc_params->max_avg_dram_bw_use_normal_percent; out->max_avg_fabric_bw_use_normal_percent = in_soc_params->max_avg_fabric_bw_use_normal_percent; -- cgit v1.2.3 From 47831f4860d4e8cdfee4910d2b76ccd892fd72d1 Mon Sep 17 00:00:00 2001 From: Sung Joon Kim Date: Fri, 10 Nov 2023 11:33:45 -0500 Subject: drm/amd/display: Fix black screen on video playback with embedded panel [why] We have dynamic power control in driver but should be ignored when power is forced on. [how] Bypass any power control when it's forced on. Cc: stable@vger.kernel.org Reviewed-by: Nicholas Kazlauskas Acked-by: Hamza Mahfooz Signed-off-by: Sung Joon Kim Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 1 - drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c | 10 ++++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 19f8d83698be..63a0b885b6f0 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -1132,7 +1132,6 @@ void dcn35_clk_mgr_construct( ctx->dc->debug.disable_dpp_power_gate = false; ctx->dc->debug.disable_hubp_power_gate = false; ctx->dc->debug.disable_dsc_power_gate = false; - ctx->dc->debug.disable_hpo_power_gate = false; } else { /*let's reset the config control flag*/ ctx->dc->config.disable_ips = DMUB_IPS_DISABLE_ALL; /*pmfw not support it, disable it all*/ diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c index 46f71ff08fd1..d19db8e9b8a5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c +++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c @@ -261,6 +261,7 @@ void pg_cntl35_hpo_pg_control(struct pg_cntl *pg_cntl, bool power_on) uint32_t power_gate = power_on ? 0 : 1; uint32_t pwr_status = power_on ? 0 : 2; uint32_t org_ip_request_cntl; + uint32_t power_forceon; bool block_enabled; if (pg_cntl->ctx->dc->debug.ignore_pg || @@ -277,6 +278,10 @@ void pg_cntl35_hpo_pg_control(struct pg_cntl *pg_cntl, bool power_on) return; } + REG_GET(DOMAIN25_PG_CONFIG, DOMAIN_POWER_FORCEON, &power_forceon); + if (power_forceon) + return; + REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl); if (org_ip_request_cntl == 0) REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 1); @@ -304,6 +309,7 @@ void pg_cntl35_io_clk_pg_control(struct pg_cntl *pg_cntl, bool power_on) uint32_t power_gate = power_on ? 0 : 1; uint32_t pwr_status = power_on ? 0 : 2; uint32_t org_ip_request_cntl; + uint32_t power_forceon; bool block_enabled; if (pg_cntl->ctx->dc->debug.ignore_pg || @@ -319,6 +325,10 @@ void pg_cntl35_io_clk_pg_control(struct pg_cntl *pg_cntl, bool power_on) return; } + REG_GET(DOMAIN22_PG_CONFIG, DOMAIN_POWER_FORCEON, &power_forceon); + if (power_forceon) + return; + REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl); if (org_ip_request_cntl == 0) REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 1); -- cgit v1.2.3 From b9f46f0b98784e40288ee393f863f553fde062fa Mon Sep 17 00:00:00 2001 From: Hamza Mahfooz Date: Wed, 22 Nov 2023 14:50:34 -0500 Subject: drm/amd/display: fix ABM disablement On recent versions of DMUB firmware, if we want to completely disable ABM we have to pass ABM_LEVEL_IMMEDIATE_DISABLE as the requested ABM level to DMUB. Otherwise, LCD eDP displays are unable to reach their maximum brightness levels. So, to fix this whenever the user requests an ABM level of 0 pass ABM_LEVEL_IMMEDIATE_DISABLE to DMUB instead. Also, to keep the user's experience consistent map ABM_LEVEL_IMMEDIATE_DISABLE to 0 when a user tries to read the requested ABM level. Cc: stable@vger.kernel.org # 6.1+ Reviewed-by: Harry Wentland Signed-off-by: Hamza Mahfooz Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index ee97814ebd99..b452796fc6d3 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -6267,7 +6267,7 @@ int amdgpu_dm_connector_atomic_set_property(struct drm_connector *connector, dm_new_state->underscan_enable = val; ret = 0; } else if (property == adev->mode_info.abm_level_property) { - dm_new_state->abm_level = val; + dm_new_state->abm_level = val ?: ABM_LEVEL_IMMEDIATE_DISABLE; ret = 0; } @@ -6312,7 +6312,8 @@ int amdgpu_dm_connector_atomic_get_property(struct drm_connector *connector, *val = dm_state->underscan_enable; ret = 0; } else if (property == adev->mode_info.abm_level_property) { - *val = dm_state->abm_level; + *val = (dm_state->abm_level != ABM_LEVEL_IMMEDIATE_DISABLE) ? + dm_state->abm_level : 0; ret = 0; } @@ -6385,7 +6386,8 @@ void amdgpu_dm_connector_funcs_reset(struct drm_connector *connector) state->pbn = 0; if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) - state->abm_level = amdgpu_dm_abm_level; + state->abm_level = amdgpu_dm_abm_level ?: + ABM_LEVEL_IMMEDIATE_DISABLE; __drm_atomic_helper_connector_reset(connector, &state->base); } -- cgit v1.2.3 From c92da0403d373c03ea5c65c0260c7db6762013b0 Mon Sep 17 00:00:00 2001 From: Dmytro Laktyushkin Date: Fri, 3 Nov 2023 14:55:37 -0400 Subject: drm/amd/display: update dcn315 lpddr pstate latency [WHY/HOW] Increase the pstate latency to improve ac/dc transition Reviewed-by: Charlene Liu Acked-by: Tom Chung Signed-off-by: Dmytro Laktyushkin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c index b2c4f97afc8b..8776055bbeaa 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c @@ -334,7 +334,7 @@ static struct wm_table lpddr5_wm_table = { { .wm_inst = WM_A, .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, + .pstate_latency_us = 129.0, .sr_exit_time_us = 11.5, .sr_enter_plus_exit_time_us = 14.5, .valid = true, @@ -342,7 +342,7 @@ static struct wm_table lpddr5_wm_table = { { .wm_inst = WM_B, .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, + .pstate_latency_us = 129.0, .sr_exit_time_us = 11.5, .sr_enter_plus_exit_time_us = 14.5, .valid = true, @@ -350,7 +350,7 @@ static struct wm_table lpddr5_wm_table = { { .wm_inst = WM_C, .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, + .pstate_latency_us = 129.0, .sr_exit_time_us = 11.5, .sr_enter_plus_exit_time_us = 14.5, .valid = true, @@ -358,7 +358,7 @@ static struct wm_table lpddr5_wm_table = { { .wm_inst = WM_D, .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, + .pstate_latency_us = 129.0, .sr_exit_time_us = 11.5, .sr_enter_plus_exit_time_us = 14.5, .valid = true, -- cgit v1.2.3 From e222b36e9649404cc0770a6d778d69cf73bcd440 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 10 Nov 2023 09:39:18 -0500 Subject: drm/amdgpu: fix AGP addressing when GART is not at 0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This worked by luck if the GART aperture ended up at 0. When we ended up moving GART on some chips, the GART aperture ended up offsetting the AGP address since the resource->start is a GART offset, not an MC address. Fix this by moving the AGP address setup into amdgpu_bo_gpu_offset_no_check(). v2: check mem_type before checking agp v3: check if the ttm bo has a ttm_tt allocated yet Fixes: 67318cb84341 ("drm/amdgpu/gmc11: set gart placement GC11") Tested-by: Mario Limonciello Reported-by: Jesse Zhang Reported-by: Yifan Zhang Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: christian.koenig@amd.com Cc: mario.limonciello@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 10 +++++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 4 +--- 3 files changed, 11 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 5f71414190e9..d2f273d77e59 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -181,6 +181,9 @@ uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); + if (!bo->ttm) + return AMDGPU_BO_INVALID_OFFSET; + if (bo->ttm->num_pages != 1 || bo->ttm->caching == ttm_cached) return AMDGPU_BO_INVALID_OFFSET; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index cef920a93924..d79b4ca1ecfc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1527,10 +1527,14 @@ u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo) u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - uint64_t offset; + uint64_t offset = AMDGPU_BO_INVALID_OFFSET; - offset = (bo->tbo.resource->start << PAGE_SHIFT) + - amdgpu_ttm_domain_start(adev, bo->tbo.resource->mem_type); + if (bo->tbo.resource->mem_type == TTM_PL_TT) + offset = amdgpu_gmc_agp_addr(&bo->tbo); + + if (offset == AMDGPU_BO_INVALID_OFFSET) + offset = (bo->tbo.resource->start << PAGE_SHIFT) + + amdgpu_ttm_domain_start(adev, bo->tbo.resource->mem_type); return amdgpu_gmc_sign_extend(offset); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 05991c5c8ddb..ab4a762aed5b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -959,10 +959,8 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) return 0; addr = amdgpu_gmc_agp_addr(bo); - if (addr != AMDGPU_BO_INVALID_OFFSET) { - bo->resource->start = addr >> PAGE_SHIFT; + if (addr != AMDGPU_BO_INVALID_OFFSET) return 0; - } /* allocate GART space */ placement.num_placement = 1; -- cgit v1.2.3 From 7a88f23e768491bae653b444a96091d2aaeb0818 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Thu, 23 Nov 2023 15:33:22 +0800 Subject: drm/amd/pm: fix a memleak in aldebaran_tables_init When kzalloc() for smu_table->ecc_table fails, we should free the previously allocated resources to prevent memleak. Fixes: edd794208555 ("drm/amd/pm: add message smu to get ecc_table v2") Signed-off-by: Dinghao Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index 1a6675d70a4b..f1440869d1ce 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -257,8 +257,11 @@ static int aldebaran_tables_init(struct smu_context *smu) } smu_table->ecc_table = kzalloc(tables[SMU_TABLE_ECCINFO].size, GFP_KERNEL); - if (!smu_table->ecc_table) + if (!smu_table->ecc_table) { + kfree(smu_table->metrics_table); + kfree(smu_table->gpu_metrics_table); return -ENOMEM; + } return 0; } -- cgit v1.2.3 From 6967741d26c87300a51b5e50d4acd104bc1a9759 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 24 Nov 2023 09:56:32 -0600 Subject: drm/amd: Enable PCIe PME from D3 When dGPU is put into BOCO it may be in D3cold but still able send PME on display hotplug event. For this to work it must be enabled as wake source from D3. When runpm is enabled use pci_wake_from_d3() to mark wakeup as enabled by default. Cc: stable@vger.kernel.org # 6.1+ Signed-off-by: Mario Limonciello Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 8f24cabe2155..8b33b130ea36 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2263,6 +2263,8 @@ retry_init: pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); + pci_wake_from_d3(pdev, TRUE); + /* * For runpm implemented via BACO, PMFW will handle the * timing for BACO in and out: -- cgit v1.2.3 From 2161e09cd05a50d80736fe397145340d2e8f6c05 Mon Sep 17 00:00:00 2001 From: Lu Yao Date: Thu, 23 Nov 2023 09:22:34 +0800 Subject: drm/amdgpu: Fix cat debugfs amdgpu_regs_didt causes kernel null pointer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For 'AMDGPU_FAMILY_SI' family cards, in 'si_common_early_init' func, init 'didt_rreg' and 'didt_wreg' to 'NULL'. But in func 'amdgpu_debugfs_regs_didt_read/write', using 'RREG32_DIDT' 'WREG32_DIDT' lacks of relevant judgment. And other 'amdgpu_ip_block_version' that use these two definitions won't be added for 'AMDGPU_FAMILY_SI'. So, add null pointer judgment before calling. Reviewed-by: Christian König Signed-off-by: Lu Yao Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index a53f436fa9f1..0e61ebdb3f3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -638,6 +638,9 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf, if (size & 0x3 || *pos & 0x3) return -EINVAL; + if (!adev->didt_rreg) + return -EOPNOTSUPP; + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); if (r < 0) { pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); @@ -694,6 +697,9 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user if (size & 0x3 || *pos & 0x3) return -EINVAL; + if (!adev->didt_wreg) + return -EOPNOTSUPP; + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); if (r < 0) { pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); -- cgit v1.2.3 From 27750e176a4f8549e13fa91f311b29a2e40d47be Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Tue, 14 Nov 2023 11:22:09 -0500 Subject: drm/amd/display: Allow DTBCLK disable for DCN35 [Why] DTBCLK is enabled on idle and it will burn power. [How] There's a few issues here: - Always enabling DTBCLK on clock manager init - Setting refclk when DTBCLK is supposed to be disabled - Not applying the correct calculated version refclk, but instead the base value which might be zero On dtbclk_en change we'll message PMFW to enable or disable the clock accordingly. The DTBDTO will be then based on refclk, but it will be set to the default fixed value if there was nothing calculated in DML despite the clock being considered enabled. Reviewed-by: Charlene Liu Acked-by: Tom Chung Signed-off-by: Nicholas Kazlauskas Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 27 ++++++++++------------ 1 file changed, 12 insertions(+), 15 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 63a0b885b6f0..d5fde7d23fbf 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -232,6 +232,10 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base, if (dc->work_arounds.skip_clock_update) return; + /* DTBCLK is fixed, so set a default if unspecified. */ + if (new_clocks->dtbclk_en && !new_clocks->ref_dtbclk_khz) + new_clocks->ref_dtbclk_khz = 600000; + /* * if it is safe to lower, but we are already in the lower state, we don't have to do anything * also if safe to lower is false, we just go in the higher state @@ -265,8 +269,10 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base, if (!clk_mgr_base->clks.dtbclk_en && new_clocks->dtbclk_en) { dcn35_smu_set_dtbclk(clk_mgr, true); - dcn35_update_clocks_update_dtb_dto(clk_mgr, context, clk_mgr_base->clks.ref_dtbclk_khz); clk_mgr_base->clks.dtbclk_en = new_clocks->dtbclk_en; + + dcn35_update_clocks_update_dtb_dto(clk_mgr, context, new_clocks->ref_dtbclk_khz); + clk_mgr_base->clks.ref_dtbclk_khz = new_clocks->ref_dtbclk_khz; } /* check that we're not already in D0 */ @@ -314,17 +320,12 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base, update_dispclk = true; } - if (!new_clocks->dtbclk_en) { - new_clocks->ref_dtbclk_khz = 600000; - } - /* clock limits are received with MHz precision, divide by 1000 to prevent setting clocks at every call */ if (!dc->debug.disable_dtb_ref_clk_switch && - should_set_clock(safe_to_lower, new_clocks->ref_dtbclk_khz / 1000, clk_mgr_base->clks.ref_dtbclk_khz / 1000)) { - /* DCCG requires KHz precision for DTBCLK */ - dcn35_smu_set_dtbclk(clk_mgr, true); - - dcn35_update_clocks_update_dtb_dto(clk_mgr, context, clk_mgr_base->clks.ref_dtbclk_khz); + should_set_clock(safe_to_lower, new_clocks->ref_dtbclk_khz / 1000, + clk_mgr_base->clks.ref_dtbclk_khz / 1000)) { + dcn35_update_clocks_update_dtb_dto(clk_mgr, context, new_clocks->ref_dtbclk_khz); + clk_mgr_base->clks.ref_dtbclk_khz = new_clocks->ref_dtbclk_khz; } if (dpp_clock_lowered) { @@ -1048,12 +1049,8 @@ void dcn35_clk_mgr_construct( dcn35_dump_clk_registers(&clk_mgr->base.base.boot_snapshot, &clk_mgr->base.base, &log_info); clk_mgr->base.base.dprefclk_khz = dcn35_smu_get_dprefclk(&clk_mgr->base); - clk_mgr->base.base.clks.ref_dtbclk_khz = dcn35_smu_get_dtbclk(&clk_mgr->base); - - if (!clk_mgr->base.base.clks.ref_dtbclk_khz) - dcn35_smu_set_dtbclk(&clk_mgr->base, true); + clk_mgr->base.base.clks.ref_dtbclk_khz = 600000; - clk_mgr->base.base.clks.dtbclk_en = true; dce_clock_read_ss_info(&clk_mgr->base); /*when clk src is from FCH, it could have ss, same clock src as DPREF clk*/ -- cgit v1.2.3 From e0409021e34af50e7b6f31635c8d21583d7c43dd Mon Sep 17 00:00:00 2001 From: Candice Li Date: Fri, 24 Nov 2023 09:33:47 +0800 Subject: drm/amdgpu: Update EEPROM I2C address for smu v13_0_0 Check smu v13_0_0 SKU type to select EEPROM I2C address. Signed-off-by: Candice Li Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.1.x --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 65aa218380be..2fde93b00cab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -214,6 +214,12 @@ static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev, control->i2c_address = EEPROM_I2C_MADDR_0; return true; case IP_VERSION(13, 0, 0): + if (strnstr(atom_ctx->vbios_pn, "D707", + sizeof(atom_ctx->vbios_pn))) + control->i2c_address = EEPROM_I2C_MADDR_0; + else + control->i2c_address = EEPROM_I2C_MADDR_4; + return true; case IP_VERSION(13, 0, 6): case IP_VERSION(13, 0, 10): control->i2c_address = EEPROM_I2C_MADDR_4; -- cgit v1.2.3 From 72838777aa38352e20301e123b97110c456cd38e Mon Sep 17 00:00:00 2001 From: ZhenGuo Yin Date: Mon, 6 Nov 2023 18:07:51 +0800 Subject: drm/amdkfd: Free gang_ctx_bo and wptr_bo in pqm_uninit [Why] Memory leaks of gang_ctx_bo and wptr_bo. [How] Free gang_ctx_bo and wptr_bo in pqm_uninit. v2: add a common function pqm_clean_queue_resource to free queue's resources. v3: reset pdd->pqd.num_gws when destorying GWS queue. Reviewed-by: Felix Kuehling Signed-off-by: ZhenGuo Yin Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 54 +++++++++++++--------- 1 file changed, 33 insertions(+), 21 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 77649392e233..77f493262e05 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -169,16 +169,43 @@ int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p) return 0; } +static void pqm_clean_queue_resource(struct process_queue_manager *pqm, + struct process_queue_node *pqn) +{ + struct kfd_node *dev; + struct kfd_process_device *pdd; + + dev = pqn->q->device; + + pdd = kfd_get_process_device_data(dev, pqm->process); + if (!pdd) { + pr_err("Process device data doesn't exist\n"); + return; + } + + if (pqn->q->gws) { + if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) && + !dev->kfd->shared_resources.enable_mes) + amdgpu_amdkfd_remove_gws_from_process( + pqm->process->kgd_process_info, pqn->q->gws); + pdd->qpd.num_gws = 0; + } + + if (dev->kfd->shared_resources.enable_mes) { + amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->gang_ctx_bo); + if (pqn->q->wptr_bo) + amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->wptr_bo); + } +} + void pqm_uninit(struct process_queue_manager *pqm) { struct process_queue_node *pqn, *next; list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) { - if (pqn->q && pqn->q->gws && - KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) && - !pqn->q->device->kfd->shared_resources.enable_mes) - amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info, - pqn->q->gws); + if (pqn->q) + pqm_clean_queue_resource(pqm, pqn); + kfd_procfs_del_queue(pqn->q); uninit_queue(pqn->q); list_del(&pqn->process_queue_list); @@ -461,22 +488,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) goto err_destroy_queue; } - if (pqn->q->gws) { - if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) && - !dev->kfd->shared_resources.enable_mes) - amdgpu_amdkfd_remove_gws_from_process( - pqm->process->kgd_process_info, - pqn->q->gws); - pdd->qpd.num_gws = 0; - } - - if (dev->kfd->shared_resources.enable_mes) { - amdgpu_amdkfd_free_gtt_mem(dev->adev, - pqn->q->gang_ctx_bo); - if (pqn->q->wptr_bo) - amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->wptr_bo); - - } + pqm_clean_queue_resource(pqm, pqn); uninit_queue(pqn->q); } -- cgit v1.2.3 From a2ab248d94f5af2c609c8c3329875f92324782c5 Mon Sep 17 00:00:00 2001 From: Taimur Hassan Date: Fri, 10 Nov 2023 10:24:20 -0500 Subject: drm/amd/display: Fix some HostVM parameters in DML [Why] A number of DML parameters related to HostVM were either missing or being set incorrectly, which may cause inaccuracies in calculating margins and determining BW limitations. [How] Correct these values where needed and populate the missing values. Cc: stable@vger.kernel.org Reviewed-by: Nicholas Kazlauskas Acked-by: Hamza Mahfooz Signed-off-by: Taimur Hassan Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c | 33 ++++++++++++++++++++++ .../amd/display/dc/dml2/dml2_translation_helper.c | 9 ++++-- 2 files changed, 39 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c index 21c17d3296a3..39cf1ae3a3e1 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c @@ -330,6 +330,39 @@ void dcn35_update_bw_bounding_box_fpu(struct dc *dc, dml_init_instance(&dc->dml, &dcn3_5_soc, &dcn3_5_ip, DML_PROJECT_DCN31); + /*copy to dml2, before dml2_create*/ + if (clk_table->num_entries > 2) { + + for (i = 0; i < clk_table->num_entries; i++) { + dc->dml2_options.bbox_overrides.clks_table.num_states = + clk_table->num_entries; + dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dcfclk_mhz = + clock_limits[i].dcfclk_mhz; + dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].fclk_mhz = + clock_limits[i].fabricclk_mhz; + dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dispclk_mhz = + clock_limits[i].dispclk_mhz; + dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dppclk_mhz = + clock_limits[i].dppclk_mhz; + dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].socclk_mhz = + clock_limits[i].socclk_mhz; + dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].memclk_mhz = + clk_table->entries[i].memclk_mhz * clk_table->entries[i].wck_ratio; + dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dcfclk_levels = + clk_table->num_entries; + dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_fclk_levels = + clk_table->num_entries; + dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dispclk_levels = + clk_table->num_entries; + dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dppclk_levels = + clk_table->num_entries; + dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_socclk_levels = + clk_table->num_entries; + dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_memclk_levels = + clk_table->num_entries; + } + } + /* Update latency values */ dc->dml2_options.bbox_overrides.dram_clock_change_latency_us = dcn3_5_soc.dram_clock_change_latency_us; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index 48caa34a5ce7..fa8fe5bf7e57 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -1057,9 +1057,12 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat } //Generally these are set by referencing our latest BB/IP params in dcn32_resource.c file - dml_dispcfg->plane.GPUVMEnable = true; - dml_dispcfg->plane.GPUVMMaxPageTableLevels = 4; - dml_dispcfg->plane.HostVMEnable = false; + dml_dispcfg->plane.GPUVMEnable = dml2->v20.dml_core_ctx.ip.gpuvm_enable; + dml_dispcfg->plane.GPUVMMaxPageTableLevels = dml2->v20.dml_core_ctx.ip.gpuvm_max_page_table_levels; + dml_dispcfg->plane.HostVMEnable = dml2->v20.dml_core_ctx.ip.hostvm_enable; + dml_dispcfg->plane.HostVMMaxPageTableLevels = dml2->v20.dml_core_ctx.ip.hostvm_max_page_table_levels; + if (dml2->v20.dml_core_ctx.ip.hostvm_enable) + dml2->v20.dml_core_ctx.policy.AllowForPStateChangeOrStutterInVBlankFinal = dml_prefetch_support_uclk_fclk_and_stutter; dml2_populate_pipe_to_plane_index_mapping(dml2, context); -- cgit v1.2.3 From ed6e4f0a27ebafffbd12bf3878ab004787685d8a Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Tue, 28 Nov 2023 16:47:14 +0530 Subject: drm/amdgpu: Use another offset for GC 9.4.3 remap The legacy region at 0x7F000 maps to valid registers in GC 9.4.3 SOCs. Use 0x1A000 offset instead as MMIO register remap region. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc15.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index d4b8d62f4294..c82776e5e9aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1161,6 +1161,11 @@ static int soc15_common_early_init(void *handle) AMD_PG_SUPPORT_VCN_DPG | AMD_PG_SUPPORT_JPEG; adev->external_rev_id = adev->rev_id + 0x46; + /* GC 9.4.3 uses MMIO register region hole at a different offset */ + if (!amdgpu_sriov_vf(adev)) { + adev->rmmio_remap.reg_offset = 0x1A000; + adev->rmmio_remap.bus_addr = adev->rmmio_base + 0x1A000; + } break; default: /* FIXME: not supported yet */ -- cgit v1.2.3 From 88a2b4d34a64bba914c4e245c6de3ca42bea93cf Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Wed, 22 Nov 2023 14:28:40 -0600 Subject: nouveau/gsp: document some aspects of GSP-RM Document a few aspects of communication with GSP-RM. These comments are derived from notes made during early development of GSP-RM support in Nouveau, but were not included in the initial patch set. Reviewed-by: Dave Airlie Signed-off-by: Timur Tabi Reviewed-by: Danilo Krummrich Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20231122202840.2565153-1-ttabi@nvidia.com --- .../common/shared/msgq/inc/msgq/msgq_priv.h | 51 ++++++++++++++ drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c | 82 ++++++++++++++++++++++ 2 files changed, 133 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/common/shared/msgq/inc/msgq/msgq_priv.h b/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/common/shared/msgq/inc/msgq/msgq_priv.h index 5a2f273d95c8..0e32e71e123f 100644 --- a/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/common/shared/msgq/inc/msgq/msgq_priv.h +++ b/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/common/shared/msgq/inc/msgq/msgq_priv.h @@ -26,6 +26,49 @@ * DEALINGS IN THE SOFTWARE. */ +/** + * msgqTxHeader -- TX queue data structure + * @version: the version of this structure, must be 0 + * @size: the size of the entire queue, including this header + * @msgSize: the padded size of queue element, 16 is minimum + * @msgCount: the number of elements in this queue + * @writePtr: head index of this queue + * @flags: 1 = swap the RX pointers + * @rxHdrOff: offset of readPtr in this structure + * @entryOff: offset of beginning of queue (msgqRxHeader), relative to + * beginning of this structure + * + * The command queue is a queue of RPCs that are sent from the driver to the + * GSP. The status queue is a queue of messages/responses from GSP-RM to the + * driver. Although the driver allocates memory for both queues, the command + * queue is owned by the driver and the status queue is owned by GSP-RM. In + * addition, the headers of the two queues must not share the same 4K page. + * + * Each queue is prefixed with this data structure. The idea is that a queue + * and its header are written to only by their owner. That is, only the + * driver writes to the command queue and command queue header, and only the + * GSP writes to the status (receive) queue and its header. + * + * This is enforced by the concept of "swapping" the RX pointers. This is + * why the 'flags' field must be set to 1. 'rxHdrOff' is how the GSP knows + * where the where the tail pointer of its status queue. + * + * When the driver writes a new RPC to the command queue, it updates writePtr. + * When it reads a new message from the status queue, it updates readPtr. In + * this way, the GSP knows when a new command is in the queue (it polls + * writePtr) and it knows how much free space is in the status queue (it + * checks readPtr). The driver never cares about how much free space is in + * the status queue. + * + * As usual, producers write to the head pointer, and consumers read from the + * tail pointer. When head == tail, the queue is empty. + * + * So to summarize: + * command.writePtr = head of command queue + * command.readPtr = tail of status queue + * status.writePtr = head of status queue + * status.readPtr = tail of command queue + */ typedef struct { NvU32 version; // queue version @@ -38,6 +81,14 @@ typedef struct NvU32 entryOff; // Offset of entries from start of backing store. } msgqTxHeader; +/** + * msgqRxHeader - RX queue data structure + * @readPtr: tail index of the other queue + * + * Although this is a separate struct, it could easily be merged into + * msgqTxHeader. msgqTxHeader.rxHdrOff is simply the offset of readPtr + * from the beginning of msgqTxHeader. + */ typedef struct { NvU32 readPtr; // message id of last message read diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c index f6725a5f5bfb..44fb86841c05 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c @@ -1377,6 +1377,13 @@ r535_gsp_msg_post_event(void *priv, u32 fn, void *repv, u32 repc) return 0; } +/** + * r535_gsp_msg_run_cpu_sequencer() -- process I/O commands from the GSP + * + * The GSP sequencer is a list of I/O commands that the GSP can send to + * the driver to perform for various purposes. The most common usage is to + * perform a special mid-initialization reset. + */ static int r535_gsp_msg_run_cpu_sequencer(void *priv, u32 fn, void *repv, u32 repc) { @@ -1716,6 +1723,23 @@ r535_gsp_libos_id8(const char *name) return id; } +/** + * create_pte_array() - creates a PTE array of a physically contiguous buffer + * @ptes: pointer to the array + * @addr: base address of physically contiguous buffer (GSP_PAGE_SIZE aligned) + * @size: size of the buffer + * + * GSP-RM sometimes expects physically-contiguous buffers to have an array of + * "PTEs" for each page in that buffer. Although in theory that allows for + * the buffer to be physically discontiguous, GSP-RM does not currently + * support that. + * + * In this case, the PTEs are DMA addresses of each page of the buffer. Since + * the buffer is physically contiguous, calculating all the PTEs is simple + * math. + * + * See memdescGetPhysAddrsForGpu() + */ static void create_pte_array(u64 *ptes, dma_addr_t addr, size_t size) { unsigned int num_pages = DIV_ROUND_UP_ULL(size, GSP_PAGE_SIZE); @@ -1725,6 +1749,35 @@ static void create_pte_array(u64 *ptes, dma_addr_t addr, size_t size) ptes[i] = (u64)addr + (i << GSP_PAGE_SHIFT); } +/** + * r535_gsp_libos_init() -- create the libos arguments structure + * + * The logging buffers are byte queues that contain encoded printf-like + * messages from GSP-RM. They need to be decoded by a special application + * that can parse the buffers. + * + * The 'loginit' buffer contains logs from early GSP-RM init and + * exception dumps. The 'logrm' buffer contains the subsequent logs. Both are + * written to directly by GSP-RM and can be any multiple of GSP_PAGE_SIZE. + * + * The physical address map for the log buffer is stored in the buffer + * itself, starting with offset 1. Offset 0 contains the "put" pointer. + * + * The GSP only understands 4K pages (GSP_PAGE_SIZE), so even if the kernel is + * configured for a larger page size (e.g. 64K pages), we need to give + * the GSP an array of 4K pages. Fortunately, since the buffer is + * physically contiguous, it's simple math to calculate the addresses. + * + * The buffers must be a multiple of GSP_PAGE_SIZE. GSP-RM also currently + * ignores the @kind field for LOGINIT, LOGINTR, and LOGRM, but expects the + * buffers to be physically contiguous anyway. + * + * The memory allocated for the arguments must remain until the GSP sends the + * init_done RPC. + * + * See _kgspInitLibosLoggingStructures (allocates memory for buffers) + * See kgspSetupLibosInitArgs_IMPL (creates pLibosInitArgs[] array) + */ static int r535_gsp_libos_init(struct nvkm_gsp *gsp) { @@ -1835,6 +1888,35 @@ nvkm_gsp_radix3_dtor(struct nvkm_gsp *gsp, struct nvkm_gsp_radix3 *rx3) nvkm_gsp_mem_dtor(gsp, &rx3->mem[i]); } +/** + * nvkm_gsp_radix3_sg - build a radix3 table from a S/G list + * + * The GSP uses a three-level page table, called radix3, to map the firmware. + * Each 64-bit "pointer" in the table is either the bus address of an entry in + * the next table (for levels 0 and 1) or the bus address of the next page in + * the GSP firmware image itself. + * + * Level 0 contains a single entry in one page that points to the first page + * of level 1. + * + * Level 1, since it's also only one page in size, contains up to 512 entries, + * one for each page in Level 2. + * + * Level 2 can be up to 512 pages in size, and each of those entries points to + * the next page of the firmware image. Since there can be up to 512*512 + * pages, that limits the size of the firmware to 512*512*GSP_PAGE_SIZE = 1GB. + * + * Internally, the GSP has its window into system memory, but the base + * physical address of the aperture is not 0. In fact, it varies depending on + * the GPU architecture. Since the GPU is a PCI device, this window is + * accessed via DMA and is therefore bound by IOMMU translation. The end + * result is that GSP-RM must translate the bus addresses in the table to GSP + * physical addresses. All this should happen transparently. + * + * Returns 0 on success, or negative error code + * + * See kgspCreateRadix3_IMPL + */ static int nvkm_gsp_radix3_sg(struct nvkm_device *device, struct sg_table *sgt, u64 size, struct nvkm_gsp_radix3 *rx3) -- cgit v1.2.3 From 698e19da2914a0021a088b2b5d101d1854862315 Mon Sep 17 00:00:00 2001 From: Zhanjun Dong Date: Mon, 13 Nov 2023 14:49:53 -0800 Subject: drm/i915: Skip pxp init if gt is wedged The gt wedged could be triggered by missing guc firmware file, HW not working, etc. Once triggered, it means all gt usage is dead, therefore we can't enable pxp under this fatal error condition. v2: Updated commit message. v3: Updated return code check. Signed-off-by: Zhanjun Dong Reviewed-by: Alan Previn Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20231113224953.378534-1-zhanjun.dong@intel.com --- drivers/gpu/drm/i915/i915_driver.c | 4 +++- drivers/gpu/drm/i915/pxp/intel_pxp.c | 3 +++ 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index db6f7c30adde..8a17eb7f9321 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -804,7 +804,9 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (ret) goto out_cleanup_modeset2; - intel_pxp_init(i915); + ret = intel_pxp_init(i915); + if (ret != -ENODEV) + drm_dbg(&i915->drm, "pxp init failed with %d\n", ret); ret = intel_display_driver_probe(i915); if (ret) diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.c b/drivers/gpu/drm/i915/pxp/intel_pxp.c index e11f562b1876..75278e78ca90 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp.c @@ -199,6 +199,9 @@ int intel_pxp_init(struct drm_i915_private *i915) struct intel_gt *gt; bool is_full_feature = false; + if (intel_gt_is_wedged(to_gt(i915))) + return -ENOTCONN; + /* * NOTE: Get the ctrl_gt before checking intel_pxp_is_supported since * we still need it if PXP's backend tee transport is needed. -- cgit v1.2.3 From cb9c919364653eeafb49e7ff5cd32f1ad64063ac Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 30 Nov 2023 11:08:52 +1000 Subject: nouveau/tu102: flush all pdbs on vmm flush This is a hack around a bug exposed with the GSP code, I'm not sure what is happening exactly, but it appears some of our flushes don't result in proper tlb invalidation for out BAR2 and we get a BAR2 fault from GSP and it all dies. Signed-off-by: Dave Airlie Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20231130010852.4034774-1-airlied@gmail.com --- drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c index e34bc6076401..8379e72d77ab 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c @@ -31,7 +31,7 @@ tu102_vmm_flush(struct nvkm_vmm *vmm, int depth) type |= 0x00000001; /* PAGE_ALL */ if (atomic_read(&vmm->engref[NVKM_SUBDEV_BAR])) - type |= 0x00000004; /* HUB_ONLY */ + type |= 0x00000006; /* HUB_ONLY | ALL PDB (hack) */ mutex_lock(&vmm->mmu->mutex); -- cgit v1.2.3 From 7701ce26c747322de1f1a87f8e32792582f33249 Mon Sep 17 00:00:00 2001 From: Adrián Larumbe Date: Sat, 25 Nov 2023 20:52:02 +0000 Subject: drm/panfrost: Consider dma-buf imported objects as resident MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A GEM object constructed from a dma-buf imported sgtable should be regarded as being memory resident, because the dma-buf API mandates backing storage to be allocated when attachment succeeds. Signed-off-by: Adrián Larumbe Fixes: 9ccdac7aa822 ("drm/panfrost: Add fdinfo support for memory stats") Reported-by: Boris Brezillon Reviewed-by: Steven Price Signed-off-by: Steven Price Link: https://patchwork.freedesktop.org/patch/msgid/20231125205438.375407-2-adrian.larumbe@collabora.com --- drivers/gpu/drm/panfrost/panfrost_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c index 0cf64456e29a..d47b40b82b0b 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.c +++ b/drivers/gpu/drm/panfrost/panfrost_gem.c @@ -200,7 +200,7 @@ static enum drm_gem_object_status panfrost_gem_status(struct drm_gem_object *obj struct panfrost_gem_object *bo = to_panfrost_bo(obj); enum drm_gem_object_status res = 0; - if (bo->base.pages) + if (bo->base.base.import_attach || bo->base.pages) res |= DRM_GEM_OBJECT_RESIDENT; if (bo->base.madv == PANFROST_MADV_DONTNEED) -- cgit v1.2.3 From 64111a0e22a9d4e0de7a5d04e7d5c21d0af4b900 Mon Sep 17 00:00:00 2001 From: Adrián Larumbe Date: Sat, 25 Nov 2023 20:52:03 +0000 Subject: drm/panfrost: Fix incorrect updating of current device frequency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It was noticed when setting the Panfrost's DVFS device to the performant governor, GPU frequency as reported by fdinfo had dropped to 0 permamently. There are two separate issues causing this behaviour: - Not initialising the device's current_frequency variable to its original value during device probe(). - Updating said variable in Panfrost devfreq's get_dev_status() rather than after the new OPP's frequency had been retrieved in target(), which meant the old frequency would be assigned instead. Signed-off-by: Adrián Larumbe Fixes: f11b0417eec2 ("drm/panfrost: Add fdinfo support GPU load metrics") Reviewed-by: Steven Price Signed-off-by: Steven Price Link: https://patchwork.freedesktop.org/patch/msgid/20231125205438.375407-3-adrian.larumbe@collabora.com --- drivers/gpu/drm/panfrost/panfrost_devfreq.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/panfrost/panfrost_devfreq.c b/drivers/gpu/drm/panfrost/panfrost_devfreq.c index f59c82ea8870..2d30da38c2c3 100644 --- a/drivers/gpu/drm/panfrost/panfrost_devfreq.c +++ b/drivers/gpu/drm/panfrost/panfrost_devfreq.c @@ -29,14 +29,20 @@ static void panfrost_devfreq_update_utilization(struct panfrost_devfreq *pfdevfr static int panfrost_devfreq_target(struct device *dev, unsigned long *freq, u32 flags) { + struct panfrost_device *ptdev = dev_get_drvdata(dev); struct dev_pm_opp *opp; + int err; opp = devfreq_recommended_opp(dev, freq, flags); if (IS_ERR(opp)) return PTR_ERR(opp); dev_pm_opp_put(opp); - return dev_pm_opp_set_rate(dev, *freq); + err = dev_pm_opp_set_rate(dev, *freq); + if (!err) + ptdev->pfdevfreq.current_frequency = *freq; + + return err; } static void panfrost_devfreq_reset(struct panfrost_devfreq *pfdevfreq) @@ -58,7 +64,6 @@ static int panfrost_devfreq_get_dev_status(struct device *dev, spin_lock_irqsave(&pfdevfreq->lock, irqflags); panfrost_devfreq_update_utilization(pfdevfreq); - pfdevfreq->current_frequency = status->current_frequency; status->total_time = ktime_to_ns(ktime_add(pfdevfreq->busy_time, pfdevfreq->idle_time)); @@ -164,6 +169,14 @@ int panfrost_devfreq_init(struct panfrost_device *pfdev) panfrost_devfreq_profile.initial_freq = cur_freq; + /* + * We could wait until panfrost_devfreq_target() to set this value, but + * since the simple_ondemand governor works asynchronously, there's a + * chance by the time someone opens the device's fdinfo file, current + * frequency hasn't been updated yet, so let's just do an early set. + */ + pfdevfreq->current_frequency = cur_freq; + /* * Set the recommend OPP this will enable and configure the regulator * if any and will avoid a switch off by regulator_late_cleanup() -- cgit v1.2.3 From 0514f63cfff38a0dcb7ba9c5f245827edc0c5107 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Fri, 17 Nov 2023 16:44:17 -0500 Subject: Revert "drm/prime: Unexport helpers for fd/handle conversion" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 71a7974ac7019afeec105a54447ae1dc7216cbb3. These helper functions are needed for KFD to export and import DMABufs the right way without duplicating the tracking of DMABufs associated with GEM objects while ensuring that move notifier callbacks are working as intended. CC: Christian König CC: Thomas Zimmermann Acked-by: Thomas Zimmermann Acked-by: Daniel Vetter Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/drm_prime.c | 33 ++++++++++++++++++--------------- include/drm/drm_prime.h | 7 +++++++ 2 files changed, 25 insertions(+), 15 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c index 63b709a67471..834a5e28abbe 100644 --- a/drivers/gpu/drm/drm_prime.c +++ b/drivers/gpu/drm/drm_prime.c @@ -278,7 +278,7 @@ void drm_gem_dmabuf_release(struct dma_buf *dma_buf) } EXPORT_SYMBOL(drm_gem_dmabuf_release); -/* +/** * drm_gem_prime_fd_to_handle - PRIME import function for GEM drivers * @dev: drm_device to import into * @file_priv: drm file-private structure @@ -292,9 +292,9 @@ EXPORT_SYMBOL(drm_gem_dmabuf_release); * * Returns 0 on success or a negative error code on failure. */ -static int drm_gem_prime_fd_to_handle(struct drm_device *dev, - struct drm_file *file_priv, int prime_fd, - uint32_t *handle) +int drm_gem_prime_fd_to_handle(struct drm_device *dev, + struct drm_file *file_priv, int prime_fd, + uint32_t *handle) { struct dma_buf *dma_buf; struct drm_gem_object *obj; @@ -360,6 +360,7 @@ out_put: dma_buf_put(dma_buf); return ret; } +EXPORT_SYMBOL(drm_gem_prime_fd_to_handle); int drm_prime_fd_to_handle_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) @@ -408,7 +409,7 @@ static struct dma_buf *export_and_register_object(struct drm_device *dev, return dmabuf; } -/* +/** * drm_gem_prime_handle_to_fd - PRIME export function for GEM drivers * @dev: dev to export the buffer from * @file_priv: drm file-private structure @@ -421,10 +422,10 @@ static struct dma_buf *export_and_register_object(struct drm_device *dev, * The actual exporting from GEM object to a dma-buf is done through the * &drm_gem_object_funcs.export callback. */ -static int drm_gem_prime_handle_to_fd(struct drm_device *dev, - struct drm_file *file_priv, uint32_t handle, - uint32_t flags, - int *prime_fd) +int drm_gem_prime_handle_to_fd(struct drm_device *dev, + struct drm_file *file_priv, uint32_t handle, + uint32_t flags, + int *prime_fd) { struct drm_gem_object *obj; int ret = 0; @@ -506,6 +507,7 @@ out_unlock: return ret; } +EXPORT_SYMBOL(drm_gem_prime_handle_to_fd); int drm_prime_handle_to_fd_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) @@ -864,9 +866,9 @@ EXPORT_SYMBOL(drm_prime_get_contiguous_size); * @obj: GEM object to export * @flags: flags like DRM_CLOEXEC and DRM_RDWR * - * This is the implementation of the &drm_gem_object_funcs.export functions - * for GEM drivers using the PRIME helpers. It is used as the default for - * drivers that do not set their own. + * This is the implementation of the &drm_gem_object_funcs.export functions for GEM drivers + * using the PRIME helpers. It is used as the default in + * drm_gem_prime_handle_to_fd(). */ struct dma_buf *drm_gem_prime_export(struct drm_gem_object *obj, int flags) @@ -962,9 +964,10 @@ EXPORT_SYMBOL(drm_gem_prime_import_dev); * @dev: drm_device to import into * @dma_buf: dma-buf object to import * - * This is the implementation of the gem_prime_import functions for GEM - * drivers using the PRIME helpers. It is the default for drivers that do - * not set their own &drm_driver.gem_prime_import. + * This is the implementation of the gem_prime_import functions for GEM drivers + * using the PRIME helpers. Drivers can use this as their + * &drm_driver.gem_prime_import implementation. It is used as the default + * implementation in drm_gem_prime_fd_to_handle(). * * Drivers must arrange to call drm_prime_gem_destroy() from their * &drm_gem_object_funcs.free hook when using this function. diff --git a/include/drm/drm_prime.h b/include/drm/drm_prime.h index a7abf9f3e697..2a1d01e5b56b 100644 --- a/include/drm/drm_prime.h +++ b/include/drm/drm_prime.h @@ -60,12 +60,19 @@ enum dma_data_direction; struct drm_device; struct drm_gem_object; +struct drm_file; /* core prime functions */ struct dma_buf *drm_gem_dmabuf_export(struct drm_device *dev, struct dma_buf_export_info *exp_info); void drm_gem_dmabuf_release(struct dma_buf *dma_buf); +int drm_gem_prime_fd_to_handle(struct drm_device *dev, + struct drm_file *file_priv, int prime_fd, uint32_t *handle); +int drm_gem_prime_handle_to_fd(struct drm_device *dev, + struct drm_file *file_priv, uint32_t handle, uint32_t flags, + int *prime_fd); + /* helper functions for exporting */ int drm_gem_map_attach(struct dma_buf *dma_buf, struct dma_buf_attachment *attach); -- cgit v1.2.3 From b7d2a4da38fb558832b70c6f45929649a9d114a3 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Mon, 13 Nov 2023 17:00:15 -0800 Subject: drm/i915/guc: Fix for potential false positives in GuC hang selftest Noticed that the hangcheck selftest is submitting a non-preemptoble spinner. That means that even if the GuC does not die, the heartbeat will still kick in and trigger a reset. Which is rather defeating the purpose of the test - to verify that the heartbeat will kick in if the GuC itself has died. The test is deliberately killing the GuC, so it should never hit the case of a non-dead GuC. But it is not impossible that the kill might fail at some future point due to other driver re-work. So, make the spinner pre-emptible. That way the heartbeat can get through if the GuC is alive and context switching. Thus a reset only happens if the GuC dies. Thus, if the kill should stop working the test will now fail rather than claim to pass. Signed-off-by: John Harrison Reviewed-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20231114010016.234570-2-John.C.Harrison@Intel.com --- drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c index 34b5d952e2bc..26fdc392fce6 100644 --- a/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c @@ -74,7 +74,7 @@ static int intel_hang_guc(void *arg) goto err; } - rq = igt_spinner_create_request(&spin, ce, MI_NOOP); + rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); intel_context_put(ce); if (IS_ERR(rq)) { ret = PTR_ERR(rq); -- cgit v1.2.3 From 706785c19fe92186815bdb9ae0148c4ba7262669 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Mon, 13 Nov 2023 17:00:16 -0800 Subject: drm/i915/guc: Add a selftest for FAST_REQUEST errors There is a mechanism for reporting errors from fire and forget H2G messages. This is the only way to find out about almost any error in the GuC backend submission path. So it would be useful to know that it is working. v2: Fix some dumb over-complications and a couple of typos - review feedback from Daniele. Signed-off-by: John Harrison Reviewed-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20231114010016.234570-3-John.C.Harrison@Intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc.h | 4 ++ drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 9 +++ drivers/gpu/drm/i915/gt/uc/selftest_guc.c | 115 ++++++++++++++++++++++++++++++ 3 files changed, 128 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 2b6dfe62c8f2..e22c12ce245a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -297,6 +297,10 @@ struct intel_guc { * @number_guc_id_stolen: The number of guc_ids that have been stolen */ int number_guc_id_stolen; + /** + * @fast_response_selftest: Backdoor to CT handler for fast response selftest + */ + u32 fast_response_selftest; #endif }; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index 89e314b3756b..ed6ce73ef3b0 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -1076,6 +1076,15 @@ static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *r found = true; break; } + +#ifdef CONFIG_DRM_I915_SELFTEST + if (!found && ct_to_guc(ct)->fast_response_selftest) { + CT_DEBUG(ct, "Assuming unsolicited response due to FAST_REQUEST selftest\n"); + ct_to_guc(ct)->fast_response_selftest++; + found = true; + } +#endif + if (!found) { CT_ERROR(ct, "Unsolicited response message: len %u, data %#x (fence %u, last %u)\n", len, hxg[0], fence, ct->requests.last_fence); diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c index bfb72143566f..c900aac85adb 100644 --- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c @@ -286,11 +286,126 @@ err_wakeref: return ret; } +/* + * Send a context schedule H2G message with an invalid context id. + * This should generate a GUC_RESULT_INVALID_CONTEXT response. + */ +static int bad_h2g(struct intel_guc *guc) +{ + u32 action[] = { + INTEL_GUC_ACTION_SCHED_CONTEXT, + 0x12345678, + }; + + return intel_guc_send_nb(guc, action, ARRAY_SIZE(action), 0); +} + +/* + * Set a spinner running to make sure the system is alive and active, + * then send a bad but asynchronous H2G command and wait to see if an + * error response is returned. If no response is received or if the + * spinner dies then the test will fail. + */ +#define FAST_RESPONSE_TIMEOUT_MS 1000 +static int intel_guc_fast_request(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_context *ce; + struct igt_spinner spin; + struct i915_request *rq; + intel_wakeref_t wakeref; + struct intel_engine_cs *engine = intel_selftest_find_any_engine(gt); + bool spinning = false; + int ret = 0; + + if (!engine) + return 0; + + wakeref = intel_runtime_pm_get(gt->uncore->rpm); + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + ret = PTR_ERR(ce); + gt_err(gt, "Failed to create spinner request: %pe\n", ce); + goto err_pm; + } + + ret = igt_spinner_init(&spin, engine->gt); + if (ret) { + gt_err(gt, "Failed to create spinner: %pe\n", ERR_PTR(ret)); + goto err_pm; + } + spinning = true; + + rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); + intel_context_put(ce); + if (IS_ERR(rq)) { + ret = PTR_ERR(rq); + gt_err(gt, "Failed to create spinner request: %pe\n", rq); + goto err_spin; + } + + ret = request_add_spin(rq, &spin); + if (ret) { + gt_err(gt, "Failed to add Spinner request: %pe\n", ERR_PTR(ret)); + goto err_rq; + } + + gt->uc.guc.fast_response_selftest = 1; + + ret = bad_h2g(>->uc.guc); + if (ret) { + gt_err(gt, "Failed to send H2G: %pe\n", ERR_PTR(ret)); + goto err_rq; + } + + ret = wait_for(gt->uc.guc.fast_response_selftest != 1 || i915_request_completed(rq), + FAST_RESPONSE_TIMEOUT_MS); + if (ret) { + gt_err(gt, "Request wait failed: %pe\n", ERR_PTR(ret)); + goto err_rq; + } + + if (i915_request_completed(rq)) { + gt_err(gt, "Spinner died waiting for fast request error!\n"); + ret = -EIO; + goto err_rq; + } + + if (gt->uc.guc.fast_response_selftest != 2) { + gt_err(gt, "Unexpected fast response count: %d\n", + gt->uc.guc.fast_response_selftest); + goto err_rq; + } + + igt_spinner_end(&spin); + spinning = false; + + ret = intel_selftest_wait_for_rq(rq); + if (ret) { + gt_err(gt, "Request failed to complete: %pe\n", ERR_PTR(ret)); + goto err_rq; + } + +err_rq: + i915_request_put(rq); + +err_spin: + if (spinning) + igt_spinner_end(&spin); + igt_spinner_fini(&spin); + +err_pm: + intel_runtime_pm_put(gt->uncore->rpm, wakeref); + return ret; +} + int intel_guc_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(intel_guc_scrub_ctbs), SUBTEST(intel_guc_steal_guc_ids), + SUBTEST(intel_guc_fast_request), }; struct intel_gt *gt = to_gt(i915); -- cgit v1.2.3 From 04fcc3fec5dbd316b0b1fb2b9f8a39bfbe07af50 Mon Sep 17 00:00:00 2001 From: ZhenGuo Yin Date: Thu, 23 Nov 2023 16:47:33 +0800 Subject: drm/amdgpu: Skip access gfx11 golden registers under SRIOV [Why] Golden registers are PF-only registers on gfx11. RLCG interface will return "out-of-range" under SRIOV VF. [How] Skip access gfx11 golden registers under SRIOV. Reviewed-by: Horace Chen Signed-off-by: ZhenGuo Yin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 8ed4a6fb147a..288e926e5cd4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -293,6 +293,9 @@ static void gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev) { + if (amdgpu_sriov_vf(adev)) + return; + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 4): -- cgit v1.2.3 From 00f9d49bce844e8196e0c2ea298f9a41a11129d9 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Thu, 30 Nov 2023 12:58:14 +0800 Subject: drm/amdgpu: Fix missing mca debugfs node Use amdgpu_ip_version() helper function to check ip version. The ip version contains other information, use the helper function to avoid reading wrong value. Reviewed-by: Lijo Lazar Signed-off-by: Yang Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c index 54f2f346579e..210aea590a52 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c @@ -485,7 +485,7 @@ DEFINE_DEBUGFS_ATTRIBUTE(mca_debug_mode_fops, NULL, amdgpu_mca_smu_debug_mode_se void amdgpu_mca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root) { #if defined(CONFIG_DEBUG_FS) - if (!root || adev->ip_versions[MP1_HWIP][0] != IP_VERSION(13, 0, 6)) + if (!root || amdgpu_ip_version(adev, MP1_HWIP, 0) != IP_VERSION(13, 0, 6)) return; debugfs_create_file("mca_debug_mode", 0200, root, adev, &mca_debug_mode_fops); -- cgit v1.2.3 From 9596ffe1cc99dd699e595ea971a2c8ccd2735e21 Mon Sep 17 00:00:00 2001 From: Likun Gao Date: Wed, 29 Nov 2023 10:59:53 +0800 Subject: drm/amdgpu: distinguish rlc fw for different SKU For some SKU, rlc firmware should use different one compared with the normal rlc firmware. Signed-off-by: Likun Gao Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 288e926e5cd4..c659ef0f47ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -67,6 +67,7 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_1.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_1_me.bin"); @@ -567,7 +568,11 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) } if (!amdgpu_sriov_vf(adev)) { - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix); + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 0) && + adev->pdev->revision == 0xCE) + snprintf(fw_name, sizeof(fw_name), "amdgpu/gc_11_0_0_rlc_1.bin"); + else + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix); err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name); if (err) goto out; -- cgit v1.2.3 From f875f61b1fd626a4223a5bdf0339b5372c689e13 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Thu, 30 Nov 2023 18:39:03 +0800 Subject: drm/amdgpu: enable mca debug mode on APU by default enable MCA debug mode on APU device by default. Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index f6b47ebce9d6..72634d675e27 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -3132,7 +3132,8 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) return 0; - amdgpu_ras_set_mca_debug_mode(adev, false); + /* enable MCA debug on APU device */ + amdgpu_ras_set_mca_debug_mode(adev, !!(adev->flags & AMD_IS_APU)); list_for_each_entry_safe(node, tmp, &adev->ras_list, node) { if (!node->ras_obj) { -- cgit v1.2.3 From 39c960bbf9d9ea862398759e75736cfb68c3446f Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Wed, 29 Nov 2023 07:22:30 -0800 Subject: drm/radeon/r600_cs: Fix possible int overflows in r600_cs_check_reg() While improbable, there may be a chance of hitting integer overflow when the result of radeon_get_ib_value() gets shifted left. Avoid it by casting one of the operands to larger data type (u64). Found by Linux Verification Center (linuxtesting.org) with static analysis tool SVACE. Fixes: 1729dd33d20b ("drm/radeon/kms: r600 CS parser fixes") Signed-off-by: Nikita Zhandarovich Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/r600_cs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c index 638f861af80f..6cf54a747749 100644 --- a/drivers/gpu/drm/radeon/r600_cs.c +++ b/drivers/gpu/drm/radeon/r600_cs.c @@ -1275,7 +1275,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) return -EINVAL; } tmp = (reg - CB_COLOR0_BASE) / 4; - track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8; + track->cb_color_bo_offset[tmp] = (u64)radeon_get_ib_value(p, idx) << 8; ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); track->cb_color_base_last[tmp] = ib[idx]; track->cb_color_bo[tmp] = reloc->robj; @@ -1302,7 +1302,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) "0x%04X\n", reg); return -EINVAL; } - track->htile_offset = radeon_get_ib_value(p, idx) << 8; + track->htile_offset = (u64)radeon_get_ib_value(p, idx) << 8; ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); track->htile_bo = reloc->robj; track->db_dirty = true; -- cgit v1.2.3 From b5c5baa458faa5430c445acd9a17481274d77ccf Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Wed, 29 Nov 2023 07:22:12 -0800 Subject: drm/radeon/r100: Fix integer overflow issues in r100_cs_track_check() It may be possible, albeit unlikely, to encounter integer overflow during the multiplication of several unsigned int variables, the result being assigned to a variable 'size' of wider type. Prevent this potential behaviour by converting one of the multiples to unsigned long. Found by Linux Verification Center (linuxtesting.org) with static analysis tool SVACE. Fixes: 0242f74d29df ("drm/radeon: clean up CS functions in r100.c") Signed-off-by: Nikita Zhandarovich Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/r100.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index affa9e0309b2..cfeca2694d5f 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -2321,7 +2321,7 @@ int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track) switch (prim_walk) { case 1: for (i = 0; i < track->num_arrays; i++) { - size = track->arrays[i].esize * track->max_indx * 4; + size = track->arrays[i].esize * track->max_indx * 4UL; if (track->arrays[i].robj == NULL) { DRM_ERROR("(PW %u) Vertex array %u no buffer " "bound\n", prim_walk, i); @@ -2340,7 +2340,7 @@ int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track) break; case 2: for (i = 0; i < track->num_arrays; i++) { - size = track->arrays[i].esize * (nverts - 1) * 4; + size = track->arrays[i].esize * (nverts - 1) * 4UL; if (track->arrays[i].robj == NULL) { DRM_ERROR("(PW %u) Vertex array %u no buffer " "bound\n", prim_walk, i); -- cgit v1.2.3 From 71225e1c930942cb1e042fc08c5cc0c4ef30e95e Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Tue, 8 Aug 2023 11:04:16 -0700 Subject: drm/radeon: check return value of radeon_ring_lock() In the unlikely event of radeon_ring_lock() failing, its errno return value should be processed. This patch checks said return value and prints a debug message in case of an error. Found by Linux Verification Center (linuxtesting.org) with static analysis tool SVACE. Fixes: 48c0c902e2e6 ("drm/radeon/kms: add support for CP setup on SI") Signed-off-by: Nikita Zhandarovich Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/si.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index a91012447b56..85e9cba49cec 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -3611,6 +3611,10 @@ static int si_cp_start(struct radeon_device *rdev) for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) { ring = &rdev->ring[i]; r = radeon_ring_lock(rdev, ring, 2); + if (r) { + DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); + return r; + } /* clear the compute context state */ radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0)); -- cgit v1.2.3 From b719a9c15d52d4f56bdea8241a5d90fd9197ce99 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 28 Nov 2023 18:35:09 -0600 Subject: drm/amd/display: Fix NULL pointer dereference at hibernate During hibernate sequence the source context might not have a clk_mgr. So don't use it to look for DML2 support. Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2980 Fixes: 7966f319c66d ("drm/amd/display: Introduce DML2") Reviewed-by: Harry Wentland Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index f3a9fdd2340d..e1c02527d04a 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -4554,7 +4554,7 @@ void dc_resource_state_copy_construct( struct dml2_context *dml2 = NULL; // Need to preserve allocated dml2 context - if (src_ctx->clk_mgr->ctx->dc->debug.using_dml2) + if (src_ctx->clk_mgr && src_ctx->clk_mgr->ctx->dc->debug.using_dml2) dml2 = dst_ctx->bw_ctx.dml2; #endif @@ -4562,7 +4562,7 @@ void dc_resource_state_copy_construct( #ifdef CONFIG_DRM_AMD_DC_FP // Preserve allocated dml2 context - if (src_ctx->clk_mgr->ctx->dc->debug.using_dml2) + if (src_ctx->clk_mgr && src_ctx->clk_mgr->ctx->dc->debug.using_dml2) dst_ctx->bw_ctx.dml2 = dml2; #endif -- cgit v1.2.3 From 96d7e79401364c6e9a63af5f74f76792b03cb832 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 21 Nov 2023 07:43:14 +0200 Subject: drm/i915: Check pipe active state in {planes,vrr}_{enabling,disabling}() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit {planes,vrr}_{enabling,disabling}() are supposed to indicate whether the specific hardware feature is supposed to be enabling or disabling. That can only makes sense if the pipe is active overall. So check for that before we go poking at the hardware. I think we're semi-safe currently on due to: - intel_pre_plane_update() doesn't get called when the pipe was not-active prior to the commit, but this is actually a bug. This saves vrr_disabling(), and vrr_enabling() is called from deeper down where we have already checked hw.active. - active_planes mirrors the crtc's hw.active Reviewed-by: Jani Nikula Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231121054324.9988-2-ville.syrjala@linux.intel.com (cherry picked from commit bc53c4d56eb24dbe56cd2c66ef4e9fc9393b1533) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index a2a806262c9e..94330108145e 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -906,12 +906,18 @@ static bool needs_async_flip_vtd_wa(const struct intel_crtc_state *crtc_state) static bool planes_enabling(const struct intel_crtc_state *old_crtc_state, const struct intel_crtc_state *new_crtc_state) { + if (!new_crtc_state->hw.active) + return false; + return is_enabling(active_planes, old_crtc_state, new_crtc_state); } static bool planes_disabling(const struct intel_crtc_state *old_crtc_state, const struct intel_crtc_state *new_crtc_state) { + if (!old_crtc_state->hw.active) + return false; + return is_disabling(active_planes, old_crtc_state, new_crtc_state); } @@ -928,6 +934,9 @@ static bool vrr_params_changed(const struct intel_crtc_state *old_crtc_state, static bool vrr_enabling(const struct intel_crtc_state *old_crtc_state, const struct intel_crtc_state *new_crtc_state) { + if (!new_crtc_state->hw.active) + return false; + return is_enabling(vrr.enable, old_crtc_state, new_crtc_state) || (new_crtc_state->vrr.enable && (new_crtc_state->update_m_n || new_crtc_state->update_lrr || @@ -937,6 +946,9 @@ static bool vrr_enabling(const struct intel_crtc_state *old_crtc_state, static bool vrr_disabling(const struct intel_crtc_state *old_crtc_state, const struct intel_crtc_state *new_crtc_state) { + if (!old_crtc_state->hw.active) + return false; + return is_disabling(vrr.enable, old_crtc_state, new_crtc_state) || (old_crtc_state->vrr.enable && (new_crtc_state->update_m_n || new_crtc_state->update_lrr || -- cgit v1.2.3 From 00cb022753e29a1c5993fa7d291378750377bd70 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 29 Nov 2023 19:33:16 +0200 Subject: drm/i915: use PIPE_CONF_CHECK_BOOL() for bool members Don't treat bools as integers. v2: Rebase Signed-off-by: Jani Nikula Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20231129173317.1192269-1-jani.nikula@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 9dc22fc8b3d3..d62cdae7ab6b 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -5091,8 +5091,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, #define PIPE_CONF_QUIRK(quirk) \ ((current_config->quirks | pipe_config->quirks) & (quirk)) - PIPE_CONF_CHECK_I(hw.enable); - PIPE_CONF_CHECK_I(hw.active); + PIPE_CONF_CHECK_BOOL(hw.enable); + PIPE_CONF_CHECK_BOOL(hw.active); PIPE_CONF_CHECK_I(cpu_transcoder); PIPE_CONF_CHECK_I(mst_master_transcoder); @@ -5301,8 +5301,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, PIPE_CONF_CHECK_I(dsc.config.second_line_bpg_offset); PIPE_CONF_CHECK_I(dsc.config.nsl_bpg_offset); - PIPE_CONF_CHECK_I(dsc.compression_enable); - PIPE_CONF_CHECK_I(dsc.dsc_split); + PIPE_CONF_CHECK_BOOL(dsc.compression_enable); + PIPE_CONF_CHECK_BOOL(dsc.dsc_split); PIPE_CONF_CHECK_I(dsc.compressed_bpp_x16); PIPE_CONF_CHECK_BOOL(splitter.enable); -- cgit v1.2.3 From 9f82f1655fdbaf598a0106f7268ff99a606be434 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 29 Nov 2023 19:33:17 +0200 Subject: drm/i915: add bool type checks in PIPE_CONF_CHECK_* Avoid bool/int mismatches in state checker macros. Signed-off-by: Jani Nikula Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20231129173317.1192269-2-jani.nikula@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index d62cdae7ab6b..7d48bcdd5797 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -4923,6 +4923,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, #define PIPE_CONF_CHECK_X(name) do { \ if (current_config->name != pipe_config->name) { \ + BUILD_BUG_ON_MSG(__same_type(current_config->name, bool), \ + __stringify(name) " is bool"); \ pipe_config_mismatch(fastset, crtc, __stringify(name), \ "(expected 0x%08x, found 0x%08x)", \ current_config->name, \ @@ -4933,6 +4935,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, #define PIPE_CONF_CHECK_X_WITH_MASK(name, mask) do { \ if ((current_config->name & (mask)) != (pipe_config->name & (mask))) { \ + BUILD_BUG_ON_MSG(__same_type(current_config->name, bool), \ + __stringify(name) " is bool"); \ pipe_config_mismatch(fastset, crtc, __stringify(name), \ "(expected 0x%08x, found 0x%08x)", \ current_config->name & (mask), \ @@ -4943,6 +4947,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, #define PIPE_CONF_CHECK_I(name) do { \ if (current_config->name != pipe_config->name) { \ + BUILD_BUG_ON_MSG(__same_type(current_config->name, bool), \ + __stringify(name) " is bool"); \ pipe_config_mismatch(fastset, crtc, __stringify(name), \ "(expected %i, found %i)", \ current_config->name, \ @@ -4953,6 +4959,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, #define PIPE_CONF_CHECK_BOOL(name) do { \ if (current_config->name != pipe_config->name) { \ + BUILD_BUG_ON_MSG(!__same_type(current_config->name, bool), \ + __stringify(name) " is not bool"); \ pipe_config_mismatch(fastset, crtc, __stringify(name), \ "(expected %s, found %s)", \ str_yes_no(current_config->name), \ -- cgit v1.2.3 From 561322c3bc14bb59f26120a9135eabc140284f86 Mon Sep 17 00:00:00 2001 From: Mika Kahola Date: Wed, 29 Nov 2023 14:22:21 +0200 Subject: drm/i915/display: Skip state verification with TBT-ALT mode With TBT-ALT mode we are not programming C20 chip PLL's and hence we don't need to check state verification. We don't need to program DP link signal levels i.e.pre-emphasis and voltage swing either. This patch fixes dmesg errors like this one "[drm] ERROR PHY F Write 0c06 failed after 3 retries." Signed-off-by: Mika Kahola Reviewed-by: Gustavo Sousa Link: https://patchwork.freedesktop.org/patch/msgid/20231129122221.1109084-1-mika.kahola@intel.com --- drivers/gpu/drm/i915/display/intel_cx0_phy.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy.c b/drivers/gpu/drm/i915/display/intel_cx0_phy.c index a8fa76580802..5fbec5784b83 100644 --- a/drivers/gpu/drm/i915/display/intel_cx0_phy.c +++ b/drivers/gpu/drm/i915/display/intel_cx0_phy.c @@ -415,9 +415,15 @@ void intel_cx0_phy_set_signal_levels(struct intel_encoder *encoder, struct drm_i915_private *i915 = to_i915(encoder->base.dev); const struct intel_ddi_buf_trans *trans; enum phy phy = intel_port_to_phy(i915, encoder->port); - u8 owned_lane_mask = intel_cx0_get_owned_lane_mask(i915, encoder); + u8 owned_lane_mask; intel_wakeref_t wakeref; int n_entries, ln; + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); + + if (intel_tc_port_in_tbt_alt_mode(dig_port)) + return; + + owned_lane_mask = intel_cx0_get_owned_lane_mask(i915, encoder); wakeref = intel_cx0_phy_transaction_begin(encoder); @@ -3136,6 +3142,9 @@ void intel_cx0pll_state_verify(struct intel_atomic_state *state, encoder = intel_get_crtc_new_encoder(state, new_crtc_state); phy = intel_port_to_phy(i915, encoder->port); + if (intel_tc_port_in_tbt_alt_mode(enc_to_dig_port(encoder))) + return; + intel_cx0pll_readout_hw_state(encoder, &mpll_hw_state); if (intel_is_c10phy(i915, phy)) -- cgit v1.2.3 From 80d20fd99124800749d605c733911a8d9da78e2b Mon Sep 17 00:00:00 2001 From: Jouni Högander Date: Wed, 15 Nov 2023 11:07:18 +0200 Subject: drm/i915/display: split i915 specific code from intel_fbdev MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Split out code from intel_fbdev that can not be share between i915 and xe. Create new i915 specific source/header file intel_fbdev_fb.[ch] which contains this code. Signed-off-by: Jouni Högander Reviewed-by: Vinod Govindapillai Link: https://patchwork.freedesktop.org/patch/msgid/20231115090719.3210079-2-jouni.hogander@intel.com --- drivers/gpu/drm/i915/Makefile | 3 +- drivers/gpu/drm/i915/display/intel_fbdev.c | 108 ++---------------------- drivers/gpu/drm/i915/display/intel_fbdev_fb.c | 115 ++++++++++++++++++++++++++ drivers/gpu/drm/i915/display/intel_fbdev_fb.h | 21 +++++ 4 files changed, 146 insertions(+), 101 deletions(-) create mode 100644 drivers/gpu/drm/i915/display/intel_fbdev_fb.c create mode 100644 drivers/gpu/drm/i915/display/intel_fbdev_fb.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 65e984242089..eadc9e612962 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -318,7 +318,8 @@ i915-$(CONFIG_ACPI) += \ display/intel_acpi.o \ display/intel_opregion.o i915-$(CONFIG_DRM_FBDEV_EMULATION) += \ - display/intel_fbdev.o + display/intel_fbdev.o \ + display/intel_fbdev_fb.o i915-$(CONFIG_DEBUG_FS) += \ display/intel_display_debugfs.o \ display/intel_display_debugfs_params.o \ diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c index 31d0d695d567..b7e4b7830e45 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev.c @@ -43,7 +43,6 @@ #include #include -#include "gem/i915_gem_lmem.h" #include "gem/i915_gem_mman.h" #include "i915_drv.h" @@ -51,6 +50,7 @@ #include "intel_fb.h" #include "intel_fb_pin.h" #include "intel_fbdev.h" +#include "intel_fbdev_fb.h" #include "intel_frontbuffer.h" struct intel_fbdev { @@ -146,65 +146,6 @@ static const struct fb_ops intelfb_ops = { .fb_mmap = intel_fbdev_mmap, }; -static int intelfb_alloc(struct drm_fb_helper *helper, - struct drm_fb_helper_surface_size *sizes) -{ - struct intel_fbdev *ifbdev = to_intel_fbdev(helper); - struct drm_framebuffer *fb; - struct drm_device *dev = helper->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_mode_fb_cmd2 mode_cmd = {}; - struct drm_i915_gem_object *obj; - int size; - - /* we don't do packed 24bpp */ - if (sizes->surface_bpp == 24) - sizes->surface_bpp = 32; - - mode_cmd.width = sizes->surface_width; - mode_cmd.height = sizes->surface_height; - - mode_cmd.pitches[0] = ALIGN(mode_cmd.width * - DIV_ROUND_UP(sizes->surface_bpp, 8), 64); - mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp, - sizes->surface_depth); - - size = mode_cmd.pitches[0] * mode_cmd.height; - size = PAGE_ALIGN(size); - - obj = ERR_PTR(-ENODEV); - if (HAS_LMEM(dev_priv)) { - obj = i915_gem_object_create_lmem(dev_priv, size, - I915_BO_ALLOC_CONTIGUOUS | - I915_BO_ALLOC_USER); - } else { - /* - * If the FB is too big, just don't use it since fbdev is not very - * important and we should probably use that space with FBC or other - * features. - * - * Also skip stolen on MTL as Wa_22018444074 mitigation. - */ - if (!(IS_METEORLAKE(dev_priv)) && size * 2 < dev_priv->dsm.usable_size) - obj = i915_gem_object_create_stolen(dev_priv, size); - if (IS_ERR(obj)) - obj = i915_gem_object_create_shmem(dev_priv, size); - } - - if (IS_ERR(obj)) { - drm_err(&dev_priv->drm, "failed to allocate framebuffer (%pe)\n", obj); - return PTR_ERR(obj); - } - - fb = intel_framebuffer_create(obj, &mode_cmd); - i915_gem_object_put(obj); - if (IS_ERR(fb)) - return PTR_ERR(fb); - - ifbdev->fb = to_intel_framebuffer(fb); - return 0; -} - static int intelfb_create(struct drm_fb_helper *helper, struct drm_fb_helper_surface_size *sizes) { @@ -213,7 +154,6 @@ static int intelfb_create(struct drm_fb_helper *helper, struct drm_device *dev = helper->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev); - struct i915_ggtt *ggtt = to_gt(dev_priv)->ggtt; const struct i915_gtt_view view = { .type = I915_GTT_VIEW_NORMAL, }; @@ -222,9 +162,7 @@ static int intelfb_create(struct drm_fb_helper *helper, struct i915_vma *vma; unsigned long flags = 0; bool prealloc = false; - void __iomem *vaddr; struct drm_i915_gem_object *obj; - struct i915_gem_ww_ctx ww; int ret; mutex_lock(&ifbdev->hpd_lock); @@ -245,12 +183,13 @@ static int intelfb_create(struct drm_fb_helper *helper, intel_fb = ifbdev->fb = NULL; } if (!intel_fb || drm_WARN_ON(dev, !intel_fb_obj(&intel_fb->base))) { + struct drm_framebuffer *fb; drm_dbg_kms(&dev_priv->drm, "no BIOS fb, allocating a new one\n"); - ret = intelfb_alloc(helper, sizes); - if (ret) - return ret; - intel_fb = ifbdev->fb; + fb = intel_fbdev_fb_alloc(helper, sizes); + if (IS_ERR(fb)) + return PTR_ERR(fb); + intel_fb = ifbdev->fb = to_intel_framebuffer(fb); } else { drm_dbg_kms(&dev_priv->drm, "re-using BIOS fb\n"); prealloc = true; @@ -283,49 +222,18 @@ static int intelfb_create(struct drm_fb_helper *helper, info->fbops = &intelfb_ops; obj = intel_fb_obj(&intel_fb->base); - if (i915_gem_object_is_lmem(obj)) { - struct intel_memory_region *mem = obj->mm.region; - - /* Use fbdev's framebuffer from lmem for discrete */ - info->fix.smem_start = - (unsigned long)(mem->io_start + - i915_gem_object_get_dma_address(obj, 0)); - info->fix.smem_len = obj->base.size; - } else { - /* Our framebuffer is the entirety of fbdev's system memory */ - info->fix.smem_start = - (unsigned long)(ggtt->gmadr.start + i915_ggtt_offset(vma)); - info->fix.smem_len = vma->size; - } - - for_i915_gem_ww(&ww, ret, false) { - ret = i915_gem_object_lock(vma->obj, &ww); - - if (ret) - continue; - - vaddr = i915_vma_pin_iomap(vma); - if (IS_ERR(vaddr)) { - drm_err(&dev_priv->drm, - "Failed to remap framebuffer into virtual memory (%pe)\n", vaddr); - ret = PTR_ERR(vaddr); - continue; - } - } + ret = intel_fbdev_fb_fill_info(dev_priv, info, obj, vma); if (ret) goto out_unpin; - info->screen_base = vaddr; - info->screen_size = vma->size; - drm_fb_helper_fill_info(info, &ifbdev->helper, sizes); /* If the object is shmemfs backed, it will have given us zeroed pages. * If the object is stolen however, it will be full of whatever * garbage was left in there. */ - if (!i915_gem_object_is_shmem(vma->obj) && !prealloc) + if (!i915_gem_object_is_shmem(obj) && !prealloc) memset_io(info->screen_base, 0, info->screen_size); /* Use default scratch pixmap (info->pixmap.flags = FB_PIXMAP_SYSTEM) */ diff --git a/drivers/gpu/drm/i915/display/intel_fbdev_fb.c b/drivers/gpu/drm/i915/display/intel_fbdev_fb.c new file mode 100644 index 000000000000..717c3a3237c4 --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_fbdev_fb.c @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#include + +#include "gem/i915_gem_lmem.h" + +#include "i915_drv.h" +#include "intel_display_types.h" +#include "intel_fbdev_fb.h" + +struct drm_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper, + struct drm_fb_helper_surface_size *sizes) +{ + struct drm_framebuffer *fb; + struct drm_device *dev = helper->dev; + struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_mode_fb_cmd2 mode_cmd = {}; + struct drm_i915_gem_object *obj; + int size; + + /* we don't do packed 24bpp */ + if (sizes->surface_bpp == 24) + sizes->surface_bpp = 32; + + mode_cmd.width = sizes->surface_width; + mode_cmd.height = sizes->surface_height; + + mode_cmd.pitches[0] = ALIGN(mode_cmd.width * + DIV_ROUND_UP(sizes->surface_bpp, 8), 64); + mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp, + sizes->surface_depth); + + size = mode_cmd.pitches[0] * mode_cmd.height; + size = PAGE_ALIGN(size); + + obj = ERR_PTR(-ENODEV); + if (HAS_LMEM(dev_priv)) { + obj = i915_gem_object_create_lmem(dev_priv, size, + I915_BO_ALLOC_CONTIGUOUS | + I915_BO_ALLOC_USER); + } else { + /* + * If the FB is too big, just don't use it since fbdev is not very + * important and we should probably use that space with FBC or other + * features. + * + * Also skip stolen on MTL as Wa_22018444074 mitigation. + */ + if (!(IS_METEORLAKE(dev_priv)) && size * 2 < dev_priv->dsm.usable_size) + obj = i915_gem_object_create_stolen(dev_priv, size); + if (IS_ERR(obj)) + obj = i915_gem_object_create_shmem(dev_priv, size); + } + + if (IS_ERR(obj)) { + drm_err(&dev_priv->drm, "failed to allocate framebuffer (%pe)\n", obj); + return ERR_PTR(-ENOMEM); + } + + fb = intel_framebuffer_create(obj, &mode_cmd); + i915_gem_object_put(obj); + + return fb; +} + +int intel_fbdev_fb_fill_info(struct drm_i915_private *i915, struct fb_info *info, + struct drm_i915_gem_object *obj, struct i915_vma *vma) +{ + struct i915_gem_ww_ctx ww; + void __iomem *vaddr; + int ret; + + if (i915_gem_object_is_lmem(obj)) { + struct intel_memory_region *mem = obj->mm.region; + + /* Use fbdev's framebuffer from lmem for discrete */ + info->fix.smem_start = + (unsigned long)(mem->io_start + + i915_gem_object_get_dma_address(obj, 0)); + info->fix.smem_len = obj->base.size; + } else { + struct i915_ggtt *ggtt = to_gt(i915)->ggtt; + + /* Our framebuffer is the entirety of fbdev's system memory */ + info->fix.smem_start = + (unsigned long)(ggtt->gmadr.start + i915_ggtt_offset(vma)); + info->fix.smem_len = vma->size; + } + + for_i915_gem_ww(&ww, ret, false) { + ret = i915_gem_object_lock(vma->obj, &ww); + + if (ret) + continue; + + vaddr = i915_vma_pin_iomap(vma); + if (IS_ERR(vaddr)) { + drm_err(&i915->drm, + "Failed to remap framebuffer into virtual memory (%pe)\n", vaddr); + ret = PTR_ERR(vaddr); + continue; + } + } + + if (ret) + return ret; + + info->screen_base = vaddr; + info->screen_size = intel_bo_to_drm_bo(obj)->size; + + return 0; +} diff --git a/drivers/gpu/drm/i915/display/intel_fbdev_fb.h b/drivers/gpu/drm/i915/display/intel_fbdev_fb.h new file mode 100644 index 000000000000..a395b2c65d33 --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_fbdev_fb.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef __INTEL_FBDEV_FB_H__ +#define __INTEL_FBDEV_FB_H__ + +struct drm_fb_helper; +struct drm_fb_helper_surface_size; +struct drm_i915_gem_object; +struct drm_i915_private; +struct fb_info; +struct i915_vma; + +struct drm_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper, + struct drm_fb_helper_surface_size *sizes); +int intel_fbdev_fb_fill_info(struct drm_i915_private *i915, struct fb_info *info, + struct drm_i915_gem_object *obj, struct i915_vma *vma); + +#endif -- cgit v1.2.3 From c952bf11ace50b03fce14dbc15a092fdc9a6d2c8 Mon Sep 17 00:00:00 2001 From: Jouni Högander Date: Wed, 15 Nov 2023 11:07:19 +0200 Subject: drm/i915/display: use intel_bo_to_drm_bo in intel_fbdev MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We are preparing for Xe driver. I915 and Xe object implementation are differing. Do not use i915_gem_object->base directly. Instead use intel_bo_to_drm_bo. Signed-off-by: Jouni Högander Signed-off-by: Maarten Lankhorst Reviewed-by: Vinod Govindapillai Link: https://patchwork.freedesktop.org/patch/msgid/20231115090719.3210079-3-jouni.hogander@intel.com --- drivers/gpu/drm/i915/display/intel_fbdev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c index b7e4b7830e45..99894a855ef0 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev.c @@ -332,12 +332,12 @@ static bool intel_fbdev_init_bios(struct drm_device *dev, continue; } - if (obj->base.size > max_size) { + if (intel_bo_to_drm_bo(obj)->size > max_size) { drm_dbg_kms(&i915->drm, "found possible fb from [PLANE:%d:%s]\n", plane->base.base.id, plane->base.name); fb = to_intel_framebuffer(plane_state->uapi.fb); - max_size = obj->base.size; + max_size = intel_bo_to_drm_bo(obj)->size; } } -- cgit v1.2.3 From 3b2894c967377a49be084b9b39b21b2315bd9b2c Mon Sep 17 00:00:00 2001 From: Justin Stitt Date: Mon, 16 Oct 2023 22:38:20 +0000 Subject: drm/modes: replace deprecated strncpy with strscpy_pad `strncpy` is deprecated for use on NUL-terminated destination strings [1] and as such we should prefer more robust and less ambiguous string interfaces. We should NUL-pad as there are full struct copies happening in places: | struct drm_mode_modeinfo umode; | | ... | struct drm_property_blob *blob; | | drm_mode_convert_to_umode(&umode, mode); | blob = drm_property_create_blob(crtc->dev, | sizeof(umode), &umode); A suitable replacement is `strscpy_pad` due to the fact that it guarantees both NUL-termination and NUL-padding on the destination buffer. Additionally, replace size macro `DRM_DISPLAY_MODE_LEN` with sizeof() to more directly tie the maximum buffer size to the destination buffer: | struct drm_display_mode { | ... | char name[DRM_DISPLAY_MODE_LEN]; Link: https://www.kernel.org/doc/html/latest/process/deprecated.html#strncpy-on-nul-terminated-strings [1] Link: https://github.com/KSPP/linux/issues/90 Cc: linux-hardening@vger.kernel.org Cc: Xu Panda Signed-off-by: Justin Stitt Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/20231016-strncpy-drivers-gpu-drm-drm_modes-c-v2-1-d0b60686e1c6@google.com Signed-off-by: Kees Cook --- drivers/gpu/drm/drm_modes.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c index ac9a406250c5..893f52ee4926 100644 --- a/drivers/gpu/drm/drm_modes.c +++ b/drivers/gpu/drm/drm_modes.c @@ -2617,8 +2617,7 @@ void drm_mode_convert_to_umode(struct drm_mode_modeinfo *out, break; } - strncpy(out->name, in->name, DRM_DISPLAY_MODE_LEN); - out->name[DRM_DISPLAY_MODE_LEN-1] = 0; + strscpy_pad(out->name, in->name, sizeof(out->name)); } /** @@ -2659,8 +2658,7 @@ int drm_mode_convert_umode(struct drm_device *dev, * useful for the kernel->userspace direction anyway. */ out->type = in->type & DRM_MODE_TYPE_ALL; - strncpy(out->name, in->name, DRM_DISPLAY_MODE_LEN); - out->name[DRM_DISPLAY_MODE_LEN-1] = 0; + strscpy_pad(out->name, in->name, sizeof(out->name)); /* Clearing picture aspect ratio bits from out flags, * as the aspect-ratio information is not stored in -- cgit v1.2.3 From e6c0de5f445091d250b75dabc4c60dd2643b8c98 Mon Sep 17 00:00:00 2001 From: Abhinav Kumar Date: Mon, 11 Sep 2023 15:16:27 -0700 Subject: drm/msm/dpu: try multirect based on mdp clock limits It's certainly possible that for large resolutions a single DPU SSPP cannot process the image without exceeding the MDP clock limits but it can still process it in multirect mode because the source rectangles will get divided and can fall within the MDP clock limits. If the SSPP cannot process the image even in multirect mode, then it will be rejected in dpu_plane_atomic_check_pipe(). Hence try using multirect for resolutions which cannot be processed by a single SSPP without exceeding the MDP clock limits. changes in v2: - use crtc_state's adjusted_mode instead of mode - fix the UBWC condition to check maxlinewidth Signed-off-by: Abhinav Kumar Reviewed-by: Dmitry Baryshkov Tested-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/556817/ Link: https://lore.kernel.org/r/20230911221627.9569-2-quic_abhinavk@quicinc.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c index 3eef5e025e12..1e0da38c6f2a 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c @@ -824,6 +824,8 @@ static int dpu_plane_atomic_check(struct drm_plane *plane, plane); int ret = 0, min_scale; struct dpu_plane *pdpu = to_dpu_plane(plane); + struct dpu_kms *kms = _dpu_plane_get_kms(&pdpu->base); + u64 max_mdp_clk_rate = kms->perf.max_core_clk_rate; struct dpu_plane_state *pstate = to_dpu_plane_state(new_plane_state); struct dpu_sw_pipe *pipe = &pstate->pipe; struct dpu_sw_pipe *r_pipe = &pstate->r_pipe; @@ -892,14 +894,16 @@ static int dpu_plane_atomic_check(struct drm_plane *plane, max_linewidth = pdpu->catalog->caps->max_linewidth; - if (drm_rect_width(&pipe_cfg->src_rect) > max_linewidth) { + if ((drm_rect_width(&pipe_cfg->src_rect) > max_linewidth) || + _dpu_plane_calc_clk(&crtc_state->adjusted_mode, pipe_cfg) > max_mdp_clk_rate) { /* * In parallel multirect case only the half of the usual width * is supported for tiled formats. If we are here, we know that * full width is more than max_linewidth, thus each rect is * wider than allowed. */ - if (DPU_FORMAT_IS_UBWC(fmt)) { + if (DPU_FORMAT_IS_UBWC(fmt) && + drm_rect_width(&pipe_cfg->src_rect) > max_linewidth) { DPU_DEBUG_PLANE(pdpu, "invalid src " DRM_RECT_FMT " line:%u, tiled format\n", DRM_RECT_ARG(&pipe_cfg->src_rect), max_linewidth); return -E2BIG; -- cgit v1.2.3 From a9bd555de5e9042fdf8ab8d6080b86f45c68ddf6 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Mon, 9 Oct 2023 19:56:27 +0300 Subject: drm/msm/dpu: enable SmartDMA on SM8450 Enable the SmartDMA / multirect support on the SM8450 platform to support higher resoltion modes. Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/561590/ Link: https://lore.kernel.org/r/20231009165627.2691015-1-dmitry.baryshkov@linaro.org Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h index 7742f52be859..d18145c226da 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h @@ -75,7 +75,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = { { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x32c, - .features = VIG_SC7180_MASK, + .features = VIG_SC7180_MASK_SDMA, .sblk = &sm8250_vig_sblk_0, .xin_id = 0, .type = SSPP_TYPE_VIG, @@ -83,7 +83,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = { }, { .name = "sspp_1", .id = SSPP_VIG1, .base = 0x6000, .len = 0x32c, - .features = VIG_SC7180_MASK, + .features = VIG_SC7180_MASK_SDMA, .sblk = &sm8250_vig_sblk_1, .xin_id = 4, .type = SSPP_TYPE_VIG, @@ -91,7 +91,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = { }, { .name = "sspp_2", .id = SSPP_VIG2, .base = 0x8000, .len = 0x32c, - .features = VIG_SC7180_MASK, + .features = VIG_SC7180_MASK_SDMA, .sblk = &sm8250_vig_sblk_2, .xin_id = 8, .type = SSPP_TYPE_VIG, @@ -99,7 +99,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = { }, { .name = "sspp_3", .id = SSPP_VIG3, .base = 0xa000, .len = 0x32c, - .features = VIG_SC7180_MASK, + .features = VIG_SC7180_MASK_SDMA, .sblk = &sm8250_vig_sblk_3, .xin_id = 12, .type = SSPP_TYPE_VIG, @@ -107,7 +107,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = { }, { .name = "sspp_8", .id = SSPP_DMA0, .base = 0x24000, .len = 0x32c, - .features = DMA_SDM845_MASK, + .features = DMA_SDM845_MASK_SDMA, .sblk = &sdm845_dma_sblk_0, .xin_id = 1, .type = SSPP_TYPE_DMA, @@ -115,7 +115,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = { }, { .name = "sspp_9", .id = SSPP_DMA1, .base = 0x26000, .len = 0x32c, - .features = DMA_SDM845_MASK, + .features = DMA_SDM845_MASK_SDMA, .sblk = &sdm845_dma_sblk_1, .xin_id = 5, .type = SSPP_TYPE_DMA, @@ -123,7 +123,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = { }, { .name = "sspp_10", .id = SSPP_DMA2, .base = 0x28000, .len = 0x32c, - .features = DMA_CURSOR_SDM845_MASK, + .features = DMA_CURSOR_SDM845_MASK_SDMA, .sblk = &sdm845_dma_sblk_2, .xin_id = 9, .type = SSPP_TYPE_DMA, @@ -131,7 +131,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = { }, { .name = "sspp_11", .id = SSPP_DMA3, .base = 0x2a000, .len = 0x32c, - .features = DMA_CURSOR_SDM845_MASK, + .features = DMA_CURSOR_SDM845_MASK_SDMA, .sblk = &sdm845_dma_sblk_3, .xin_id = 13, .type = SSPP_TYPE_DMA, -- cgit v1.2.3 From 921e32bf6c0c86f774226577cb743d654f9bcfd9 Mon Sep 17 00:00:00 2001 From: Abhinav Kumar Date: Fri, 8 Sep 2023 12:33:13 -0700 Subject: drm/msm/dpu: enable smartdma on sm8350 To support high resolutions on sm8350, enable smartdma in its catalog. Signed-off-by: Abhinav Kumar Tested-by: Dmitry Baryshkov Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/556561/ Link: https://lore.kernel.org/r/20230908193314.27008-1-quic_abhinavk@quicinc.com [DB: rebased on top of msm-next] Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h index 1709ba57f384..5aaa24281906 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h @@ -74,7 +74,7 @@ static const struct dpu_sspp_cfg sm8350_sspp[] = { { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x1f8, - .features = VIG_SC7180_MASK, + .features = VIG_SC7180_MASK_SDMA, .sblk = &sm8250_vig_sblk_0, .xin_id = 0, .type = SSPP_TYPE_VIG, @@ -82,7 +82,7 @@ static const struct dpu_sspp_cfg sm8350_sspp[] = { }, { .name = "sspp_1", .id = SSPP_VIG1, .base = 0x6000, .len = 0x1f8, - .features = VIG_SC7180_MASK, + .features = VIG_SC7180_MASK_SDMA, .sblk = &sm8250_vig_sblk_1, .xin_id = 4, .type = SSPP_TYPE_VIG, @@ -90,7 +90,7 @@ static const struct dpu_sspp_cfg sm8350_sspp[] = { }, { .name = "sspp_2", .id = SSPP_VIG2, .base = 0x8000, .len = 0x1f8, - .features = VIG_SC7180_MASK, + .features = VIG_SC7180_MASK_SDMA, .sblk = &sm8250_vig_sblk_2, .xin_id = 8, .type = SSPP_TYPE_VIG, @@ -98,7 +98,7 @@ static const struct dpu_sspp_cfg sm8350_sspp[] = { }, { .name = "sspp_3", .id = SSPP_VIG3, .base = 0xa000, .len = 0x1f8, - .features = VIG_SC7180_MASK, + .features = VIG_SC7180_MASK_SDMA, .sblk = &sm8250_vig_sblk_3, .xin_id = 12, .type = SSPP_TYPE_VIG, @@ -106,7 +106,7 @@ static const struct dpu_sspp_cfg sm8350_sspp[] = { }, { .name = "sspp_8", .id = SSPP_DMA0, .base = 0x24000, .len = 0x1f8, - .features = DMA_SDM845_MASK, + .features = DMA_SDM845_MASK_SDMA, .sblk = &sdm845_dma_sblk_0, .xin_id = 1, .type = SSPP_TYPE_DMA, @@ -114,7 +114,7 @@ static const struct dpu_sspp_cfg sm8350_sspp[] = { }, { .name = "sspp_9", .id = SSPP_DMA1, .base = 0x26000, .len = 0x1f8, - .features = DMA_SDM845_MASK, + .features = DMA_SDM845_MASK_SDMA, .sblk = &sdm845_dma_sblk_1, .xin_id = 5, .type = SSPP_TYPE_DMA, @@ -122,7 +122,7 @@ static const struct dpu_sspp_cfg sm8350_sspp[] = { }, { .name = "sspp_10", .id = SSPP_DMA2, .base = 0x28000, .len = 0x1f8, - .features = DMA_CURSOR_SDM845_MASK, + .features = DMA_CURSOR_SDM845_MASK_SDMA, .sblk = &sdm845_dma_sblk_2, .xin_id = 9, .type = SSPP_TYPE_DMA, @@ -130,7 +130,7 @@ static const struct dpu_sspp_cfg sm8350_sspp[] = { }, { .name = "sspp_11", .id = SSPP_DMA3, .base = 0x2a000, .len = 0x1f8, - .features = DMA_CURSOR_SDM845_MASK, + .features = DMA_CURSOR_SDM845_MASK_SDMA, .sblk = &sdm845_dma_sblk_3, .xin_id = 13, .type = SSPP_TYPE_DMA, -- cgit v1.2.3 From 96ab215b2d5edc39310c00f50b33d8ab72ac3fe3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 16 Oct 2023 22:04:03 +0200 Subject: drm/msm/a6xx: add QMP dependency When QMP is in a loadable module, the A6xx GPU driver fails to link as built-in: x86_64-linux-ld: drivers/gpu/drm/msm/adreno/a6xx_gmu.o: in function `a6xx_gmu_resume': a6xx_gmu.c:(.text+0xd62): undefined reference to `qmp_send' Add the usual dependency that still allows compiling without QMP but otherwise avoids the broken combination of options. Fixes: 88a0997f2f949 ("drm/msm/a6xx: Send ACD state to QMP at GMU resume") Signed-off-by: Arnd Bergmann Reviewed-by: Konrad Dybcio Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/562945/ Link: https://lore.kernel.org/r/20231016200415.791090-1-arnd@kernel.org Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig index 6309a857ca31..ad70b611b44f 100644 --- a/drivers/gpu/drm/msm/Kconfig +++ b/drivers/gpu/drm/msm/Kconfig @@ -6,6 +6,7 @@ config DRM_MSM depends on ARCH_QCOM || SOC_IMX5 || COMPILE_TEST depends on COMMON_CLK depends on IOMMU_SUPPORT + depends on QCOM_AOSS_QMP || QCOM_AOSS_QMP=n depends on QCOM_OCMEM || QCOM_OCMEM=n depends on QCOM_LLCC || QCOM_LLCC=n depends on QCOM_COMMAND_DB || QCOM_COMMAND_DB=n -- cgit v1.2.3 From ab8420418c2e67c81549ce483ff6a472c6dc2833 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 19 Oct 2023 13:44:19 +0300 Subject: drm/msm/dp: cleanup debugfs handling Currently there are two subdirs for DP debugfs files, e.g. DP-1, created by the drm core for the connector, and the msm_dp-DP-1, created by the DP driver itself. Merge those two, so that there are no extraneous connector-related subdirs. Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/563523/ Link: https://lore.kernel.org/r/20231019104419.1032329-1-dmitry.baryshkov@linaro.org Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c | 11 ------ drivers/gpu/drm/msm/dp/dp_debug.c | 69 ++++++++------------------------- drivers/gpu/drm/msm/dp/dp_debug.h | 23 ++++------- drivers/gpu/drm/msm/dp/dp_display.c | 5 +-- drivers/gpu/drm/msm/dp/dp_display.h | 1 + drivers/gpu/drm/msm/dp/dp_drm.c | 16 ++++++++ drivers/gpu/drm/msm/msm_drv.h | 6 --- 7 files changed, 42 insertions(+), 89 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c index fe7267b3bff5..babec724c76a 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c @@ -274,9 +274,6 @@ static int dpu_kms_debugfs_init(struct msm_kms *kms, struct drm_minor *minor) struct dpu_kms *dpu_kms = to_dpu_kms(kms); void *p = dpu_hw_util_get_log_mask_ptr(); struct dentry *entry; - struct drm_device *dev; - struct msm_drm_private *priv; - int i; if (!p) return -EINVAL; @@ -285,9 +282,6 @@ static int dpu_kms_debugfs_init(struct msm_kms *kms, struct drm_minor *minor) if (minor->type != DRM_MINOR_PRIMARY) return 0; - dev = dpu_kms->dev; - priv = dev->dev_private; - entry = debugfs_create_dir("debug", minor->debugfs_root); debugfs_create_x32(DPU_DEBUGFS_HWMASKNAME, 0600, entry, p); @@ -297,11 +291,6 @@ static int dpu_kms_debugfs_init(struct msm_kms *kms, struct drm_minor *minor) dpu_debugfs_core_irq_init(dpu_kms, entry); dpu_debugfs_sspp_init(dpu_kms, entry); - for (i = 0; i < ARRAY_SIZE(priv->dp); i++) { - if (priv->dp[i]) - msm_dp_debugfs_init(priv->dp[i], minor); - } - return dpu_core_perf_debugfs_init(dpu_kms, entry); } #endif diff --git a/drivers/gpu/drm/msm/dp/dp_debug.c b/drivers/gpu/drm/msm/dp/dp_debug.c index 3bba901afe33..6c281dc095b9 100644 --- a/drivers/gpu/drm/msm/dp/dp_debug.c +++ b/drivers/gpu/drm/msm/dp/dp_debug.c @@ -19,13 +19,9 @@ #define DEBUG_NAME "msm_dp" struct dp_debug_private { - struct dentry *root; - struct dp_link *link; struct dp_panel *panel; struct drm_connector *connector; - struct device *dev; - struct drm_device *drm_dev; struct dp_debug dp_debug; }; @@ -204,35 +200,33 @@ static const struct file_operations test_active_fops = { .write = dp_test_active_write }; -static void dp_debug_init(struct dp_debug *dp_debug, struct drm_minor *minor) +static void dp_debug_init(struct dp_debug *dp_debug, struct dentry *root, bool is_edp) { - char path[64]; struct dp_debug_private *debug = container_of(dp_debug, struct dp_debug_private, dp_debug); - snprintf(path, sizeof(path), "msm_dp-%s", debug->connector->name); - - debug->root = debugfs_create_dir(path, minor->debugfs_root); - - debugfs_create_file("dp_debug", 0444, debug->root, + debugfs_create_file("dp_debug", 0444, root, debug, &dp_debug_fops); - debugfs_create_file("msm_dp_test_active", 0444, - debug->root, - debug, &test_active_fops); + if (!is_edp) { + debugfs_create_file("msm_dp_test_active", 0444, + root, + debug, &test_active_fops); - debugfs_create_file("msm_dp_test_data", 0444, - debug->root, - debug, &dp_test_data_fops); + debugfs_create_file("msm_dp_test_data", 0444, + root, + debug, &dp_test_data_fops); - debugfs_create_file("msm_dp_test_type", 0444, - debug->root, - debug, &dp_test_type_fops); + debugfs_create_file("msm_dp_test_type", 0444, + root, + debug, &dp_test_type_fops); + } } struct dp_debug *dp_debug_get(struct device *dev, struct dp_panel *panel, struct dp_link *link, - struct drm_connector *connector, struct drm_minor *minor) + struct drm_connector *connector, + struct dentry *root, bool is_edp) { struct dp_debug_private *debug; struct dp_debug *dp_debug; @@ -253,46 +247,15 @@ struct dp_debug *dp_debug_get(struct device *dev, struct dp_panel *panel, debug->dp_debug.debug_en = false; debug->link = link; debug->panel = panel; - debug->dev = dev; - debug->drm_dev = minor->dev; - debug->connector = connector; dp_debug = &debug->dp_debug; dp_debug->vdisplay = 0; dp_debug->hdisplay = 0; dp_debug->vrefresh = 0; - dp_debug_init(dp_debug, minor); + dp_debug_init(dp_debug, root, is_edp); return dp_debug; error: return ERR_PTR(rc); } - -static int dp_debug_deinit(struct dp_debug *dp_debug) -{ - struct dp_debug_private *debug; - - if (!dp_debug) - return -EINVAL; - - debug = container_of(dp_debug, struct dp_debug_private, dp_debug); - - debugfs_remove_recursive(debug->root); - - return 0; -} - -void dp_debug_put(struct dp_debug *dp_debug) -{ - struct dp_debug_private *debug; - - if (!dp_debug) - return; - - debug = container_of(dp_debug, struct dp_debug_private, dp_debug); - - dp_debug_deinit(dp_debug); - - devm_kfree(debug->dev, debug); -} diff --git a/drivers/gpu/drm/msm/dp/dp_debug.h b/drivers/gpu/drm/msm/dp/dp_debug.h index 124227873d58..9b3b2e702f65 100644 --- a/drivers/gpu/drm/msm/dp/dp_debug.h +++ b/drivers/gpu/drm/msm/dp/dp_debug.h @@ -34,7 +34,8 @@ struct dp_debug { * @panel: instance of panel module * @link: instance of link module * @connector: double pointer to display connector - * @minor: pointer to drm minor number after device registration + * @root: connector's debugfs root + * @is_edp: set for eDP connectors / panels * return: pointer to allocated debug module data * * This function sets up the debug module and provides a way @@ -43,31 +44,21 @@ struct dp_debug { struct dp_debug *dp_debug_get(struct device *dev, struct dp_panel *panel, struct dp_link *link, struct drm_connector *connector, - struct drm_minor *minor); - -/** - * dp_debug_put() - * - * Cleans up dp_debug instance - * - * @dp_debug: instance of dp_debug - */ -void dp_debug_put(struct dp_debug *dp_debug); + struct dentry *root, + bool is_edp); #else static inline struct dp_debug *dp_debug_get(struct device *dev, struct dp_panel *panel, struct dp_link *link, - struct drm_connector *connector, struct drm_minor *minor) + struct drm_connector *connector, + struct dentry *root, + bool is_edp) { return ERR_PTR(-EINVAL); } -static inline void dp_debug_put(struct dp_debug *dp_debug) -{ -} - #endif /* defined(CONFIG_DEBUG_FS) */ #endif /* _DP_DEBUG_H_ */ diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index e329e03e068d..2ac9d61501c7 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -715,7 +715,6 @@ static int dp_irq_hpd_handle(struct dp_display_private *dp, u32 data) static void dp_display_deinit_sub_modules(struct dp_display_private *dp) { - dp_debug_put(dp->debug); dp_audio_put(dp->audio); dp_panel_put(dp->panel); dp_aux_put(dp->aux); @@ -1451,7 +1450,7 @@ bool msm_dp_wide_bus_available(const struct msm_dp *dp_display) return dp->wide_bus_en; } -void msm_dp_debugfs_init(struct msm_dp *dp_display, struct drm_minor *minor) +void dp_display_debugfs_init(struct msm_dp *dp_display, struct dentry *root, bool is_edp) { struct dp_display_private *dp; struct device *dev; @@ -1462,7 +1461,7 @@ void msm_dp_debugfs_init(struct msm_dp *dp_display, struct drm_minor *minor) dp->debug = dp_debug_get(dev, dp->panel, dp->link, dp->dp_display.connector, - minor); + root, is_edp); if (IS_ERR(dp->debug)) { rc = PTR_ERR(dp->debug); DRM_ERROR("failed to initialize debug, rc = %d\n", rc); diff --git a/drivers/gpu/drm/msm/dp/dp_display.h b/drivers/gpu/drm/msm/dp/dp_display.h index f66cdbc35785..5e2fbd8318e9 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.h +++ b/drivers/gpu/drm/msm/dp/dp_display.h @@ -42,5 +42,6 @@ int dp_display_get_test_bpp(struct msm_dp *dp_display); void dp_display_signal_audio_start(struct msm_dp *dp_display); void dp_display_signal_audio_complete(struct msm_dp *dp_display); void dp_display_set_psr(struct msm_dp *dp, bool enter); +void dp_display_debugfs_init(struct msm_dp *dp_display, struct dentry *dentry, bool is_edp); #endif /* _DP_DISPLAY_H_ */ diff --git a/drivers/gpu/drm/msm/dp/dp_drm.c b/drivers/gpu/drm/msm/dp/dp_drm.c index 40e7344180e3..21850a384b10 100644 --- a/drivers/gpu/drm/msm/dp/dp_drm.c +++ b/drivers/gpu/drm/msm/dp/dp_drm.c @@ -90,6 +90,13 @@ static int dp_bridge_get_modes(struct drm_bridge *bridge, struct drm_connector * return rc; } +static void dp_bridge_debugfs_init(struct drm_bridge *bridge, struct dentry *root) +{ + struct msm_dp *dp = to_dp_bridge(bridge)->dp_display; + + dp_display_debugfs_init(dp, root, false); +} + static const struct drm_bridge_funcs dp_bridge_ops = { .atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state, .atomic_destroy_state = drm_atomic_helper_bridge_destroy_state, @@ -105,6 +112,7 @@ static const struct drm_bridge_funcs dp_bridge_ops = { .hpd_enable = dp_bridge_hpd_enable, .hpd_disable = dp_bridge_hpd_disable, .hpd_notify = dp_bridge_hpd_notify, + .debugfs_init = dp_bridge_debugfs_init, }; static int edp_bridge_atomic_check(struct drm_bridge *drm_bridge, @@ -260,6 +268,13 @@ static enum drm_mode_status edp_bridge_mode_valid(struct drm_bridge *bridge, return MODE_OK; } +static void edp_bridge_debugfs_init(struct drm_bridge *bridge, struct dentry *root) +{ + struct msm_dp *dp = to_dp_bridge(bridge)->dp_display; + + dp_display_debugfs_init(dp, root, true); +} + static const struct drm_bridge_funcs edp_bridge_ops = { .atomic_enable = edp_bridge_atomic_enable, .atomic_disable = edp_bridge_atomic_disable, @@ -270,6 +285,7 @@ static const struct drm_bridge_funcs edp_bridge_ops = { .atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state, .atomic_destroy_state = drm_atomic_helper_bridge_destroy_state, .atomic_check = edp_bridge_atomic_check, + .debugfs_init = edp_bridge_debugfs_init, }; int dp_bridge_init(struct msm_dp *dp_display, struct drm_device *dev, diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index cd5bf658df66..628ef3e663ea 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -389,7 +389,6 @@ int msm_dp_modeset_init(struct msm_dp *dp_display, struct drm_device *dev, void msm_dp_irq_postinstall(struct msm_dp *dp_display); void msm_dp_snapshot(struct msm_disp_state *disp_state, struct msm_dp *dp_display); -void msm_dp_debugfs_init(struct msm_dp *dp_display, struct drm_minor *minor); bool msm_dp_wide_bus_available(const struct msm_dp *dp_display); #else @@ -415,11 +414,6 @@ static inline void msm_dp_snapshot(struct msm_disp_state *disp_state, struct msm { } -static inline void msm_dp_debugfs_init(struct msm_dp *dp_display, - struct drm_minor *minor) -{ -} - static inline bool msm_dp_wide_bus_available(const struct msm_dp *dp_display) { return false; -- cgit v1.2.3 From 062aeadeba1d3822b1704235de9a0a0024a78683 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 8 Jul 2023 04:03:52 +0300 Subject: drm/msm/mdp5: use devres-managed allocation for configuration data Use devm_kzalloc to create configuration data structure. This allows us to remove corresponding kfree and drop mdp5_cfg_destroy() function. Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/546156/ Link: https://lore.kernel.org/r/20230708010407.3871346-3-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c | 24 +++++------------------- drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.h | 1 - drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c | 2 -- 3 files changed, 5 insertions(+), 22 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c index 694d54341337..c5179e4c393c 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c @@ -1350,23 +1350,17 @@ int mdp5_cfg_get_hw_rev(struct mdp5_cfg_handler *cfg_handler) return cfg_handler->revision; } -void mdp5_cfg_destroy(struct mdp5_cfg_handler *cfg_handler) -{ - kfree(cfg_handler); -} - struct mdp5_cfg_handler *mdp5_cfg_init(struct mdp5_kms *mdp5_kms, uint32_t major, uint32_t minor) { struct drm_device *dev = mdp5_kms->dev; struct mdp5_cfg_handler *cfg_handler; const struct mdp5_cfg_handler *cfg_handlers; - int i, ret = 0, num_handlers; + int i, num_handlers; - cfg_handler = kzalloc(sizeof(*cfg_handler), GFP_KERNEL); + cfg_handler = devm_kzalloc(dev->dev, sizeof(*cfg_handler), GFP_KERNEL); if (unlikely(!cfg_handler)) { - ret = -ENOMEM; - goto fail; + return ERR_PTR(-ENOMEM); } switch (major) { @@ -1381,8 +1375,7 @@ struct mdp5_cfg_handler *mdp5_cfg_init(struct mdp5_kms *mdp5_kms, default: DRM_DEV_ERROR(dev->dev, "unexpected MDP major version: v%d.%d\n", major, minor); - ret = -ENXIO; - goto fail; + return ERR_PTR(-ENXIO); } /* only after mdp5_cfg global pointer's init can we access the hw */ @@ -1396,8 +1389,7 @@ struct mdp5_cfg_handler *mdp5_cfg_init(struct mdp5_kms *mdp5_kms, if (unlikely(!mdp5_cfg)) { DRM_DEV_ERROR(dev->dev, "unexpected MDP minor revision: v%d.%d\n", major, minor); - ret = -ENXIO; - goto fail; + return ERR_PTR(-ENXIO); } cfg_handler->revision = minor; @@ -1406,10 +1398,4 @@ struct mdp5_cfg_handler *mdp5_cfg_init(struct mdp5_kms *mdp5_kms, DBG("MDP5: %s hw config selected", mdp5_cfg->name); return cfg_handler; - -fail: - if (cfg_handler) - mdp5_cfg_destroy(cfg_handler); - - return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.h b/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.h index c2502cc33864..26c5d8b4ab46 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.h +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.h @@ -121,6 +121,5 @@ int mdp5_cfg_get_hw_rev(struct mdp5_cfg_handler *cfg_hnd); struct mdp5_cfg_handler *mdp5_cfg_init(struct mdp5_kms *mdp5_kms, uint32_t major, uint32_t minor); -void mdp5_cfg_destroy(struct mdp5_cfg_handler *cfg_hnd); #endif /* __MDP5_CFG_H__ */ diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c index 11d9fc2c6bf5..9f0467c8f172 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c @@ -629,8 +629,6 @@ static void mdp5_destroy(struct mdp5_kms *mdp5_kms) mdp5_ctlm_destroy(mdp5_kms->ctlm); if (mdp5_kms->smp) mdp5_smp_destroy(mdp5_kms->smp); - if (mdp5_kms->cfg) - mdp5_cfg_destroy(mdp5_kms->cfg); for (i = 0; i < mdp5_kms->num_intfs; i++) kfree(mdp5_kms->intfs[i]); -- cgit v1.2.3 From 4c1f4c1f1b43dbe399b93cf712ce31cc0b5181e6 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 8 Jul 2023 04:03:53 +0300 Subject: drm/msm/mdp5: use devres-managed allocation for CTL manager data Use devm_kzalloc to create CTL manager data structure. This allows us to remove corresponding kfree and drop mdp5_ctlm_destroy() function. Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/546159/ Link: https://lore.kernel.org/r/20230708010407.3871346-4-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.c | 21 ++++----------------- drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.h | 1 - drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c | 2 -- 3 files changed, 4 insertions(+), 20 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.c index 1220f2b20e05..666de99a46a5 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.c @@ -681,11 +681,6 @@ void mdp5_ctlm_hw_reset(struct mdp5_ctl_manager *ctl_mgr) } } -void mdp5_ctlm_destroy(struct mdp5_ctl_manager *ctl_mgr) -{ - kfree(ctl_mgr); -} - struct mdp5_ctl_manager *mdp5_ctlm_init(struct drm_device *dev, void __iomem *mmio_base, struct mdp5_cfg_handler *cfg_hnd) { @@ -697,18 +692,16 @@ struct mdp5_ctl_manager *mdp5_ctlm_init(struct drm_device *dev, unsigned long flags; int c, ret; - ctl_mgr = kzalloc(sizeof(*ctl_mgr), GFP_KERNEL); + ctl_mgr = devm_kzalloc(dev->dev, sizeof(*ctl_mgr), GFP_KERNEL); if (!ctl_mgr) { DRM_DEV_ERROR(dev->dev, "failed to allocate CTL manager\n"); - ret = -ENOMEM; - goto fail; + return ERR_PTR(-ENOMEM); } if (WARN_ON(ctl_cfg->count > MAX_CTL)) { DRM_DEV_ERROR(dev->dev, "Increase static pool size to at least %d\n", ctl_cfg->count); - ret = -ENOSPC; - goto fail; + return ERR_PTR(-ENOSPC); } /* initialize the CTL manager: */ @@ -727,7 +720,7 @@ struct mdp5_ctl_manager *mdp5_ctlm_init(struct drm_device *dev, DRM_DEV_ERROR(dev->dev, "CTL_%d: base is null!\n", c); ret = -EINVAL; spin_unlock_irqrestore(&ctl_mgr->pool_lock, flags); - goto fail; + return ERR_PTR(ret); } ctl->ctlm = ctl_mgr; ctl->id = c; @@ -755,10 +748,4 @@ struct mdp5_ctl_manager *mdp5_ctlm_init(struct drm_device *dev, DBG("Pool of %d CTLs created.", ctl_mgr->nctl); return ctl_mgr; - -fail: - if (ctl_mgr) - mdp5_ctlm_destroy(ctl_mgr); - - return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.h b/drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.h index c2af68aa77ae..9020e8efc4e4 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.h +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.h @@ -17,7 +17,6 @@ struct mdp5_ctl_manager; struct mdp5_ctl_manager *mdp5_ctlm_init(struct drm_device *dev, void __iomem *mmio_base, struct mdp5_cfg_handler *cfg_hnd); void mdp5_ctlm_hw_reset(struct mdp5_ctl_manager *ctlm); -void mdp5_ctlm_destroy(struct mdp5_ctl_manager *ctlm); /* * CTL prototypes: diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c index 9f0467c8f172..d2bd14e8dadf 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c @@ -625,8 +625,6 @@ static void mdp5_destroy(struct mdp5_kms *mdp5_kms) { int i; - if (mdp5_kms->ctlm) - mdp5_ctlm_destroy(mdp5_kms->ctlm); if (mdp5_kms->smp) mdp5_smp_destroy(mdp5_kms->smp); -- cgit v1.2.3 From 1ad175c2c884a8ee56fb669648ef655a5d3f22fe Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 8 Jul 2023 04:03:54 +0300 Subject: drm/msm/mdp5: use devres-managed allocation for mixer data Use devm_kzalloc to create mixer data structure. This allows us to remove corresponding kfree and drop mdp5_mixer_destroy() function. Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/546166/ Link: https://lore.kernel.org/r/20230708010407.3871346-5-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c | 5 +---- drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.c | 10 +++------- drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.h | 4 ++-- 3 files changed, 6 insertions(+), 13 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c index d2bd14e8dadf..3189b3b0a743 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c @@ -211,9 +211,6 @@ static void mdp5_kms_destroy(struct msm_kms *kms) struct msm_gem_address_space *aspace = kms->aspace; int i; - for (i = 0; i < mdp5_kms->num_hwmixers; i++) - mdp5_mixer_destroy(mdp5_kms->hwmixers[i]); - for (i = 0; i < mdp5_kms->num_hwpipes; i++) mdp5_pipe_destroy(mdp5_kms->hwpipes[i]); @@ -720,7 +717,7 @@ static int hwmixer_init(struct mdp5_kms *mdp5_kms) for (i = 0; i < hw_cfg->lm.count; i++) { struct mdp5_hw_mixer *mixer; - mixer = mdp5_mixer_init(&hw_cfg->lm.instances[i]); + mixer = mdp5_mixer_init(dev, &hw_cfg->lm.instances[i]); if (IS_ERR(mixer)) { ret = PTR_ERR(mixer); DRM_DEV_ERROR(dev->dev, "failed to construct LM%d (%d)\n", diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.c index 2536def2a000..2822b533f807 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.c @@ -140,20 +140,16 @@ int mdp5_mixer_release(struct drm_atomic_state *s, struct mdp5_hw_mixer *mixer) return 0; } -void mdp5_mixer_destroy(struct mdp5_hw_mixer *mixer) -{ - kfree(mixer); -} - static const char * const mixer_names[] = { "LM0", "LM1", "LM2", "LM3", "LM4", "LM5", }; -struct mdp5_hw_mixer *mdp5_mixer_init(const struct mdp5_lm_instance *lm) +struct mdp5_hw_mixer *mdp5_mixer_init(struct drm_device *dev, + const struct mdp5_lm_instance *lm) { struct mdp5_hw_mixer *mixer; - mixer = kzalloc(sizeof(*mixer), GFP_KERNEL); + mixer = devm_kzalloc(dev->dev, sizeof(*mixer), GFP_KERNEL); if (!mixer) return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.h b/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.h index 545ee223b9d7..2bedd75835bc 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.h +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.h @@ -25,8 +25,8 @@ struct mdp5_hw_mixer_state { struct drm_crtc *hwmixer_to_crtc[8]; }; -struct mdp5_hw_mixer *mdp5_mixer_init(const struct mdp5_lm_instance *lm); -void mdp5_mixer_destroy(struct mdp5_hw_mixer *lm); +struct mdp5_hw_mixer *mdp5_mixer_init(struct drm_device *dev, + const struct mdp5_lm_instance *lm); int mdp5_mixer_assign(struct drm_atomic_state *s, struct drm_crtc *crtc, uint32_t caps, struct mdp5_hw_mixer **mixer, struct mdp5_hw_mixer **r_mixer); -- cgit v1.2.3 From 323e9a18d6e195f44eaaa3d3fe92fce9073fe298 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 8 Jul 2023 04:03:55 +0300 Subject: drm/msm/mdp5: use devres-managed allocation for pipe data Use devm_kzalloc to create pipe data structure. This allows us to remove corresponding kfree and drop mdp5_pipe_destroy() function. Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/546171/ Link: https://lore.kernel.org/r/20230708010407.3871346-6-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c | 6 +----- drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.c | 10 +++------- drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.h | 4 ++-- 3 files changed, 6 insertions(+), 14 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c index 3189b3b0a743..b918c9dc0afb 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c @@ -209,10 +209,6 @@ static void mdp5_kms_destroy(struct msm_kms *kms) { struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(kms)); struct msm_gem_address_space *aspace = kms->aspace; - int i; - - for (i = 0; i < mdp5_kms->num_hwpipes; i++) - mdp5_pipe_destroy(mdp5_kms->hwpipes[i]); if (aspace) { aspace->mmu->funcs->detach(aspace->mmu); @@ -645,7 +641,7 @@ static int construct_pipes(struct mdp5_kms *mdp5_kms, int cnt, for (i = 0; i < cnt; i++) { struct mdp5_hw_pipe *hwpipe; - hwpipe = mdp5_pipe_init(pipes[i], offsets[i], caps); + hwpipe = mdp5_pipe_init(dev, pipes[i], offsets[i], caps); if (IS_ERR(hwpipe)) { ret = PTR_ERR(hwpipe); DRM_DEV_ERROR(dev->dev, "failed to construct pipe for %s (%d)\n", diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.c index e4b8a789835a..99b2c30b1d48 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.c @@ -151,17 +151,13 @@ int mdp5_pipe_release(struct drm_atomic_state *s, struct mdp5_hw_pipe *hwpipe) return 0; } -void mdp5_pipe_destroy(struct mdp5_hw_pipe *hwpipe) -{ - kfree(hwpipe); -} - -struct mdp5_hw_pipe *mdp5_pipe_init(enum mdp5_pipe pipe, +struct mdp5_hw_pipe *mdp5_pipe_init(struct drm_device *dev, + enum mdp5_pipe pipe, uint32_t reg_offset, uint32_t caps) { struct mdp5_hw_pipe *hwpipe; - hwpipe = kzalloc(sizeof(*hwpipe), GFP_KERNEL); + hwpipe = devm_kzalloc(dev->dev, sizeof(*hwpipe), GFP_KERNEL); if (!hwpipe) return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.h b/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.h index cca67938cab2..452138821f60 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.h +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.h @@ -39,8 +39,8 @@ int mdp5_pipe_assign(struct drm_atomic_state *s, struct drm_plane *plane, struct mdp5_hw_pipe **r_hwpipe); int mdp5_pipe_release(struct drm_atomic_state *s, struct mdp5_hw_pipe *hwpipe); -struct mdp5_hw_pipe *mdp5_pipe_init(enum mdp5_pipe pipe, +struct mdp5_hw_pipe *mdp5_pipe_init(struct drm_device *dev, + enum mdp5_pipe pipe, uint32_t reg_offset, uint32_t caps); -void mdp5_pipe_destroy(struct mdp5_hw_pipe *hwpipe); #endif /* __MDP5_PIPE_H__ */ -- cgit v1.2.3 From 531d5313d934325c10721e1d22e352dc4c4a236e Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 8 Jul 2023 04:03:56 +0300 Subject: drm/msm/mdp5: use devres-managed allocation for SMP data Use devm_kzalloc to create SMP data structure. This allows us to remove corresponding kfree and drop mdp5_smp_destroy() function. Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/546172/ Link: https://lore.kernel.org/r/20230708010407.3871346-7-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c | 3 --- drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c | 19 ++++--------------- drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.h | 1 - 3 files changed, 4 insertions(+), 19 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c index b918c9dc0afb..4cfb1a8e903e 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c @@ -618,9 +618,6 @@ static void mdp5_destroy(struct mdp5_kms *mdp5_kms) { int i; - if (mdp5_kms->smp) - mdp5_smp_destroy(mdp5_kms->smp); - for (i = 0; i < mdp5_kms->num_intfs; i++) kfree(mdp5_kms->intfs[i]); diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c index b68682c1b5bc..8b59562e29e2 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c @@ -370,23 +370,17 @@ void mdp5_smp_dump(struct mdp5_smp *smp, struct drm_printer *p) drm_modeset_unlock(&mdp5_kms->glob_state_lock); } -void mdp5_smp_destroy(struct mdp5_smp *smp) -{ - kfree(smp); -} struct mdp5_smp *mdp5_smp_init(struct mdp5_kms *mdp5_kms, const struct mdp5_smp_block *cfg) { + struct drm_device *dev = mdp5_kms->dev; struct mdp5_smp_state *state; struct mdp5_global_state *global_state; struct mdp5_smp *smp; - int ret; - smp = kzalloc(sizeof(*smp), GFP_KERNEL); - if (unlikely(!smp)) { - ret = -ENOMEM; - goto fail; - } + smp = devm_kzalloc(dev->dev, sizeof(*smp), GFP_KERNEL); + if (unlikely(!smp)) + return ERR_PTR(-ENOMEM); smp->dev = mdp5_kms->dev; smp->blk_cnt = cfg->mmb_count; @@ -400,9 +394,4 @@ struct mdp5_smp *mdp5_smp_init(struct mdp5_kms *mdp5_kms, const struct mdp5_smp_ memcpy(smp->reserved, cfg->reserved, sizeof(smp->reserved)); return smp; -fail: - if (smp) - mdp5_smp_destroy(smp); - - return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.h b/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.h index ba5618e136c3..d8b6a11413d9 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.h +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.h @@ -68,7 +68,6 @@ struct mdp5_smp; struct mdp5_smp *mdp5_smp_init(struct mdp5_kms *mdp5_kms, const struct mdp5_smp_block *cfg); -void mdp5_smp_destroy(struct mdp5_smp *smp); void mdp5_smp_dump(struct mdp5_smp *smp, struct drm_printer *p); -- cgit v1.2.3 From 6de8288bf668ef4eba1258a523faf32a8d406d9e Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 8 Jul 2023 04:03:57 +0300 Subject: drm/msm/mdp5: use devres-managed allocation for INTF data Use devm_kzalloc to create INTF data structure. This allows us to remove corresponding kfree() call. Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/546163/ Link: https://lore.kernel.org/r/20230708010407.3871346-8-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c index 4cfb1a8e903e..48f447f4e183 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c @@ -616,11 +616,6 @@ fail: static void mdp5_destroy(struct mdp5_kms *mdp5_kms) { - int i; - - for (i = 0; i < mdp5_kms->num_intfs; i++) - kfree(mdp5_kms->intfs[i]); - if (mdp5_kms->rpm_enabled) pm_runtime_disable(&mdp5_kms->pdev->dev); @@ -741,7 +736,7 @@ static int interface_init(struct mdp5_kms *mdp5_kms) if (intf_types[i] == INTF_DISABLED) continue; - intf = kzalloc(sizeof(*intf), GFP_KERNEL); + intf = devm_kzalloc(dev->dev, sizeof(*intf), GFP_KERNEL); if (!intf) { DRM_DEV_ERROR(dev->dev, "failed to construct INTF%d\n", i); return -ENOMEM; -- cgit v1.2.3 From 6f235e3d6b1894a808cefdf32e45998a6459794f Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 8 Jul 2023 04:03:58 +0300 Subject: drm/msm/mdp5: use drmm-managed allocation for mdp5_crtc Change struct mdp5_crtc allocation to use drmm_crtc_alloc(). This removes the need to perform any actions on CRTC destruction. Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/546169/ Link: https://lore.kernel.org/r/20230708010407.3871346-9-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c index 86036dd4e1e8..4a3db2ea1689 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -172,14 +173,11 @@ static void unref_cursor_worker(struct drm_flip_work *work, void *val) drm_gem_object_put(val); } -static void mdp5_crtc_destroy(struct drm_crtc *crtc) +static void mdp5_crtc_flip_cleanup(struct drm_device *dev, void *ptr) { - struct mdp5_crtc *mdp5_crtc = to_mdp5_crtc(crtc); + struct mdp5_crtc *mdp5_crtc = ptr; - drm_crtc_cleanup(crtc); drm_flip_work_cleanup(&mdp5_crtc->unref_cursor_work); - - kfree(mdp5_crtc); } static inline u32 mdp5_lm_use_fg_alpha_mask(enum mdp_mixer_stage_id stage) @@ -1147,7 +1145,6 @@ static void mdp5_crtc_reset(struct drm_crtc *crtc) static const struct drm_crtc_funcs mdp5_crtc_no_lm_cursor_funcs = { .set_config = drm_atomic_helper_set_config, - .destroy = mdp5_crtc_destroy, .page_flip = drm_atomic_helper_page_flip, .reset = mdp5_crtc_reset, .atomic_duplicate_state = mdp5_crtc_duplicate_state, @@ -1161,7 +1158,6 @@ static const struct drm_crtc_funcs mdp5_crtc_no_lm_cursor_funcs = { static const struct drm_crtc_funcs mdp5_crtc_funcs = { .set_config = drm_atomic_helper_set_config, - .destroy = mdp5_crtc_destroy, .page_flip = drm_atomic_helper_page_flip, .reset = mdp5_crtc_reset, .atomic_duplicate_state = mdp5_crtc_duplicate_state, @@ -1327,10 +1323,16 @@ struct drm_crtc *mdp5_crtc_init(struct drm_device *dev, { struct drm_crtc *crtc = NULL; struct mdp5_crtc *mdp5_crtc; + int ret; - mdp5_crtc = kzalloc(sizeof(*mdp5_crtc), GFP_KERNEL); - if (!mdp5_crtc) - return ERR_PTR(-ENOMEM); + mdp5_crtc = drmm_crtc_alloc_with_planes(dev, struct mdp5_crtc, base, + plane, cursor_plane, + cursor_plane ? + &mdp5_crtc_no_lm_cursor_funcs : + &mdp5_crtc_funcs, + NULL); + if (IS_ERR(mdp5_crtc)) + return ERR_CAST(mdp5_crtc); crtc = &mdp5_crtc->base; @@ -1346,13 +1348,11 @@ struct drm_crtc *mdp5_crtc_init(struct drm_device *dev, mdp5_crtc->lm_cursor_enabled = cursor_plane ? false : true; - drm_crtc_init_with_planes(dev, crtc, plane, cursor_plane, - cursor_plane ? - &mdp5_crtc_no_lm_cursor_funcs : - &mdp5_crtc_funcs, NULL); - drm_flip_work_init(&mdp5_crtc->unref_cursor_work, "unref cursor", unref_cursor_worker); + ret = drmm_add_action_or_reset(dev, mdp5_crtc_flip_cleanup, mdp5_crtc); + if (ret) + return ERR_PTR(ret); drm_crtc_helper_add(crtc, &mdp5_crtc_helper_funcs); -- cgit v1.2.3 From 669afee4a17ebb758889b1470b993efe9c66d5eb Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 8 Jul 2023 04:03:59 +0300 Subject: drm/msm/mdp5: use drmm-managed allocation for mdp5_encoder Change struct mdp5_encoder allocation to use drmm_encoder_alloc(). This removes the need to perform any actions on encoder destruction. Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/546164/ Link: https://lore.kernel.org/r/20230708010407.3871346-10-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/disp/mdp5/mdp5_encoder.c | 29 ++++------------------------ 1 file changed, 4 insertions(+), 25 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_encoder.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_encoder.c index 79d67c495780..8db97083e14d 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_encoder.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_encoder.c @@ -16,17 +16,6 @@ static struct mdp5_kms *get_kms(struct drm_encoder *encoder) return to_mdp5_kms(to_mdp_kms(priv->kms)); } -static void mdp5_encoder_destroy(struct drm_encoder *encoder) -{ - struct mdp5_encoder *mdp5_encoder = to_mdp5_encoder(encoder); - drm_encoder_cleanup(encoder); - kfree(mdp5_encoder); -} - -static const struct drm_encoder_funcs mdp5_encoder_funcs = { - .destroy = mdp5_encoder_destroy, -}; - static void mdp5_vid_encoder_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode) @@ -342,13 +331,11 @@ struct drm_encoder *mdp5_encoder_init(struct drm_device *dev, struct mdp5_encoder *mdp5_encoder; int enc_type = (intf->type == INTF_DSI) ? DRM_MODE_ENCODER_DSI : DRM_MODE_ENCODER_TMDS; - int ret; - mdp5_encoder = kzalloc(sizeof(*mdp5_encoder), GFP_KERNEL); - if (!mdp5_encoder) { - ret = -ENOMEM; - goto fail; - } + mdp5_encoder = drmm_encoder_alloc(dev, struct mdp5_encoder, base, + NULL, enc_type, NULL); + if (IS_ERR(mdp5_encoder)) + return ERR_CAST(mdp5_encoder); encoder = &mdp5_encoder->base; mdp5_encoder->ctl = ctl; @@ -356,15 +343,7 @@ struct drm_encoder *mdp5_encoder_init(struct drm_device *dev, spin_lock_init(&mdp5_encoder->intf_lock); - drm_encoder_init(dev, encoder, &mdp5_encoder_funcs, enc_type, NULL); - drm_encoder_helper_add(encoder, &mdp5_encoder_helper_funcs); return encoder; - -fail: - if (encoder) - mdp5_encoder_destroy(encoder); - - return ERR_PTR(ret); } -- cgit v1.2.3 From 54f1fbcb47d45bbd9737cd0115e44fd80acf4073 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 8 Jul 2023 04:04:01 +0300 Subject: drm/msm/mdp4: use bulk regulators API for LCDC encoder Switch mdp4_lcdc_encoder to using regulator_bulk_* API instead of enumerating regulators by hand. Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/546173/ Link: https://lore.kernel.org/r/20230708010407.3871346-12-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c | 51 +++++++---------------- 1 file changed, 15 insertions(+), 36 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c index 10eb3e5b218e..67c118bb30ca 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c @@ -18,7 +18,7 @@ struct mdp4_lcdc_encoder { struct drm_panel *panel; struct clk *lcdc_clk; unsigned long int pixclock; - struct regulator *regs[3]; + struct regulator_bulk_data regs[3]; bool enabled; uint32_t bsc; }; @@ -271,12 +271,10 @@ static void mdp4_lcdc_encoder_mode_set(struct drm_encoder *encoder, static void mdp4_lcdc_encoder_disable(struct drm_encoder *encoder) { - struct drm_device *dev = encoder->dev; struct mdp4_lcdc_encoder *mdp4_lcdc_encoder = to_mdp4_lcdc_encoder(encoder); struct mdp4_kms *mdp4_kms = get_kms(encoder); struct drm_panel *panel; - int i, ret; if (WARN_ON(!mdp4_lcdc_encoder->enabled)) return; @@ -301,11 +299,8 @@ static void mdp4_lcdc_encoder_disable(struct drm_encoder *encoder) clk_disable_unprepare(mdp4_lcdc_encoder->lcdc_clk); - for (i = 0; i < ARRAY_SIZE(mdp4_lcdc_encoder->regs); i++) { - ret = regulator_disable(mdp4_lcdc_encoder->regs[i]); - if (ret) - DRM_DEV_ERROR(dev->dev, "failed to disable regulator: %d\n", ret); - } + regulator_bulk_disable(ARRAY_SIZE(mdp4_lcdc_encoder->regs), + mdp4_lcdc_encoder->regs); mdp4_lcdc_encoder->enabled = false; } @@ -319,7 +314,7 @@ static void mdp4_lcdc_encoder_enable(struct drm_encoder *encoder) struct mdp4_kms *mdp4_kms = get_kms(encoder); struct drm_panel *panel; uint32_t config; - int i, ret; + int ret; if (WARN_ON(mdp4_lcdc_encoder->enabled)) return; @@ -339,11 +334,10 @@ static void mdp4_lcdc_encoder_enable(struct drm_encoder *encoder) mdp4_crtc_set_config(encoder->crtc, config); mdp4_crtc_set_intf(encoder->crtc, INTF_LCDC_DTV, 0); - for (i = 0; i < ARRAY_SIZE(mdp4_lcdc_encoder->regs); i++) { - ret = regulator_enable(mdp4_lcdc_encoder->regs[i]); - if (ret) - DRM_DEV_ERROR(dev->dev, "failed to enable regulator: %d\n", ret); - } + ret = regulator_bulk_enable(ARRAY_SIZE(mdp4_lcdc_encoder->regs), + mdp4_lcdc_encoder->regs); + if (ret) + DRM_DEV_ERROR(dev->dev, "failed to enable regulators: %d\n", ret); DBG("setting lcdc_clk=%lu", pc); ret = clk_set_rate(mdp4_lcdc_encoder->lcdc_clk, pc); @@ -385,7 +379,6 @@ struct drm_encoder *mdp4_lcdc_encoder_init(struct drm_device *dev, { struct drm_encoder *encoder = NULL; struct mdp4_lcdc_encoder *mdp4_lcdc_encoder; - struct regulator *reg; int ret; mdp4_lcdc_encoder = kzalloc(sizeof(*mdp4_lcdc_encoder), GFP_KERNEL); @@ -411,29 +404,15 @@ struct drm_encoder *mdp4_lcdc_encoder_init(struct drm_device *dev, } /* TODO: different regulators in other cases? */ - reg = devm_regulator_get(dev->dev, "lvds-vccs-3p3v"); - if (IS_ERR(reg)) { - ret = PTR_ERR(reg); - DRM_DEV_ERROR(dev->dev, "failed to get lvds-vccs-3p3v: %d\n", ret); - goto fail; - } - mdp4_lcdc_encoder->regs[0] = reg; - - reg = devm_regulator_get(dev->dev, "lvds-pll-vdda"); - if (IS_ERR(reg)) { - ret = PTR_ERR(reg); - DRM_DEV_ERROR(dev->dev, "failed to get lvds-pll-vdda: %d\n", ret); - goto fail; - } - mdp4_lcdc_encoder->regs[1] = reg; + mdp4_lcdc_encoder->regs[0].supply = "lvds-vccs-3p3v"; + mdp4_lcdc_encoder->regs[1].supply = "lvds-vccs-3p3v"; + mdp4_lcdc_encoder->regs[2].supply = "lvds-vdda"; - reg = devm_regulator_get(dev->dev, "lvds-vdda"); - if (IS_ERR(reg)) { - ret = PTR_ERR(reg); - DRM_DEV_ERROR(dev->dev, "failed to get lvds-vdda: %d\n", ret); + ret = devm_regulator_bulk_get(dev->dev, + ARRAY_SIZE(mdp4_lcdc_encoder->regs), + mdp4_lcdc_encoder->regs); + if (ret) goto fail; - } - mdp4_lcdc_encoder->regs[2] = reg; return encoder; -- cgit v1.2.3 From 783ad6e6312fe28a005a3378128cdbbaab211527 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 8 Jul 2023 04:04:02 +0300 Subject: drm/msm/mdp4: use drmm-managed allocation for mdp4_crtc Change struct mdp4_crtc allocation to use drmm_crtc_alloc(). This removes the need to perform any actions on CRTC destruction. Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/546184/ Link: https://lore.kernel.org/r/20230708010407.3871346-13-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c | 33 ++++++++++++++++--------------- 1 file changed, 17 insertions(+), 16 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c index 169f9de4a12a..5e5c31b44a8a 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -123,16 +124,6 @@ static void unref_cursor_worker(struct drm_flip_work *work, void *val) drm_gem_object_put(val); } -static void mdp4_crtc_destroy(struct drm_crtc *crtc) -{ - struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc); - - drm_crtc_cleanup(crtc); - drm_flip_work_cleanup(&mdp4_crtc->unref_cursor_work); - - kfree(mdp4_crtc); -} - /* statically (for now) map planes to mixer stage (z-order): */ static const int idxs[] = { [VG1] = 1, @@ -475,7 +466,6 @@ static int mdp4_crtc_cursor_move(struct drm_crtc *crtc, int x, int y) static const struct drm_crtc_funcs mdp4_crtc_funcs = { .set_config = drm_atomic_helper_set_config, - .destroy = mdp4_crtc_destroy, .page_flip = drm_atomic_helper_page_flip, .cursor_set = mdp4_crtc_cursor_set, .cursor_move = mdp4_crtc_cursor_move, @@ -616,6 +606,13 @@ static const char *dma_names[] = { "DMA_P", "DMA_S", "DMA_E", }; +static void mdp4_crtc_flip_cleanup(struct drm_device *dev, void *ptr) +{ + struct mdp4_crtc *mdp4_crtc = ptr; + + drm_flip_work_cleanup(&mdp4_crtc->unref_cursor_work); +} + /* initialize crtc */ struct drm_crtc *mdp4_crtc_init(struct drm_device *dev, struct drm_plane *plane, int id, int ovlp_id, @@ -623,10 +620,13 @@ struct drm_crtc *mdp4_crtc_init(struct drm_device *dev, { struct drm_crtc *crtc = NULL; struct mdp4_crtc *mdp4_crtc; + int ret; - mdp4_crtc = kzalloc(sizeof(*mdp4_crtc), GFP_KERNEL); - if (!mdp4_crtc) - return ERR_PTR(-ENOMEM); + mdp4_crtc = drmm_crtc_alloc_with_planes(dev, struct mdp4_crtc, base, + plane, NULL, + &mdp4_crtc_funcs, NULL); + if (IS_ERR(mdp4_crtc)) + return ERR_CAST(mdp4_crtc); crtc = &mdp4_crtc->base; @@ -648,9 +648,10 @@ struct drm_crtc *mdp4_crtc_init(struct drm_device *dev, drm_flip_work_init(&mdp4_crtc->unref_cursor_work, "unref cursor", unref_cursor_worker); + ret = drmm_add_action_or_reset(dev, mdp4_crtc_flip_cleanup, mdp4_crtc); + if (ret) + return ERR_PTR(ret); - drm_crtc_init_with_planes(dev, crtc, plane, NULL, &mdp4_crtc_funcs, - NULL); drm_crtc_helper_add(crtc, &mdp4_crtc_helper_funcs); return crtc; -- cgit v1.2.3 From e79571e8708bdcbd12c85ec0fdef3c6b99e34e0f Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 8 Jul 2023 04:04:03 +0300 Subject: drm/msm/mdp4: use drmm-managed allocation for mdp4_dsi_encoder Change struct mdp4_dsi_encoder allocation to use drmm_encoder_alloc(). This removes the need to perform any actions on this encoder destruction. Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/546180/ Link: https://lore.kernel.org/r/20230708010407.3871346-14-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/disp/mdp4/mdp4_dsi_encoder.c | 32 ++++-------------------- 1 file changed, 5 insertions(+), 27 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_dsi_encoder.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_dsi_encoder.c index 39b8fe53c29d..74dafe7106be 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_dsi_encoder.c +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_dsi_encoder.c @@ -26,18 +26,6 @@ static struct mdp4_kms *get_kms(struct drm_encoder *encoder) return to_mdp4_kms(to_mdp_kms(priv->kms)); } -static void mdp4_dsi_encoder_destroy(struct drm_encoder *encoder) -{ - struct mdp4_dsi_encoder *mdp4_dsi_encoder = to_mdp4_dsi_encoder(encoder); - - drm_encoder_cleanup(encoder); - kfree(mdp4_dsi_encoder); -} - -static const struct drm_encoder_funcs mdp4_dsi_encoder_funcs = { - .destroy = mdp4_dsi_encoder_destroy, -}; - static void mdp4_dsi_encoder_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode) @@ -148,28 +136,18 @@ static const struct drm_encoder_helper_funcs mdp4_dsi_encoder_helper_funcs = { /* initialize encoder */ struct drm_encoder *mdp4_dsi_encoder_init(struct drm_device *dev) { - struct drm_encoder *encoder = NULL; + struct drm_encoder *encoder; struct mdp4_dsi_encoder *mdp4_dsi_encoder; - int ret; - mdp4_dsi_encoder = kzalloc(sizeof(*mdp4_dsi_encoder), GFP_KERNEL); - if (!mdp4_dsi_encoder) { - ret = -ENOMEM; - goto fail; - } + mdp4_dsi_encoder = drmm_encoder_alloc(dev, struct mdp4_dsi_encoder, base, + NULL, DRM_MODE_ENCODER_DSI, NULL); + if (IS_ERR(mdp4_dsi_encoder)) + return ERR_CAST(mdp4_dsi_encoder); encoder = &mdp4_dsi_encoder->base; - drm_encoder_init(dev, encoder, &mdp4_dsi_encoder_funcs, - DRM_MODE_ENCODER_DSI, NULL); drm_encoder_helper_add(encoder, &mdp4_dsi_encoder_helper_funcs); return encoder; - -fail: - if (encoder) - mdp4_dsi_encoder_destroy(encoder); - - return ERR_PTR(ret); } #endif /* CONFIG_DRM_MSM_DSI */ -- cgit v1.2.3 From 93d6e1b82b93df1a7e58cd511663197f1208a8b0 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 8 Jul 2023 04:04:04 +0300 Subject: drm/msm/mdp4: use drmm-managed allocation for mdp4_dtv_encoder Change struct mdp4_dtv_encoder allocation to use drmm_encoder_alloc(). This removes the need to perform any actions on this encoder destruction. Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/546178/ Link: https://lore.kernel.org/r/20230708010407.3871346-15-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/disp/mdp4/mdp4_dtv_encoder.c | 37 +++++------------------- 1 file changed, 7 insertions(+), 30 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_dtv_encoder.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_dtv_encoder.c index 88645dbc3785..3b70764b48c4 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_dtv_encoder.c +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_dtv_encoder.c @@ -25,17 +25,6 @@ static struct mdp4_kms *get_kms(struct drm_encoder *encoder) return to_mdp4_kms(to_mdp_kms(priv->kms)); } -static void mdp4_dtv_encoder_destroy(struct drm_encoder *encoder) -{ - struct mdp4_dtv_encoder *mdp4_dtv_encoder = to_mdp4_dtv_encoder(encoder); - drm_encoder_cleanup(encoder); - kfree(mdp4_dtv_encoder); -} - -static const struct drm_encoder_funcs mdp4_dtv_encoder_funcs = { - .destroy = mdp4_dtv_encoder_destroy, -}; - static void mdp4_dtv_encoder_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode) @@ -173,41 +162,29 @@ long mdp4_dtv_round_pixclk(struct drm_encoder *encoder, unsigned long rate) /* initialize encoder */ struct drm_encoder *mdp4_dtv_encoder_init(struct drm_device *dev) { - struct drm_encoder *encoder = NULL; + struct drm_encoder *encoder; struct mdp4_dtv_encoder *mdp4_dtv_encoder; - int ret; - mdp4_dtv_encoder = kzalloc(sizeof(*mdp4_dtv_encoder), GFP_KERNEL); - if (!mdp4_dtv_encoder) { - ret = -ENOMEM; - goto fail; - } + mdp4_dtv_encoder = drmm_encoder_alloc(dev, struct mdp4_dtv_encoder, base, + NULL, DRM_MODE_ENCODER_TMDS, NULL); + if (IS_ERR(mdp4_dtv_encoder)) + return ERR_CAST(mdp4_dtv_encoder); encoder = &mdp4_dtv_encoder->base; - drm_encoder_init(dev, encoder, &mdp4_dtv_encoder_funcs, - DRM_MODE_ENCODER_TMDS, NULL); drm_encoder_helper_add(encoder, &mdp4_dtv_encoder_helper_funcs); mdp4_dtv_encoder->hdmi_clk = devm_clk_get(dev->dev, "hdmi_clk"); if (IS_ERR(mdp4_dtv_encoder->hdmi_clk)) { DRM_DEV_ERROR(dev->dev, "failed to get hdmi_clk\n"); - ret = PTR_ERR(mdp4_dtv_encoder->hdmi_clk); - goto fail; + return ERR_CAST(mdp4_dtv_encoder->hdmi_clk); } mdp4_dtv_encoder->mdp_clk = devm_clk_get(dev->dev, "tv_clk"); if (IS_ERR(mdp4_dtv_encoder->mdp_clk)) { DRM_DEV_ERROR(dev->dev, "failed to get tv_clk\n"); - ret = PTR_ERR(mdp4_dtv_encoder->mdp_clk); - goto fail; + return ERR_CAST(mdp4_dtv_encoder->mdp_clk); } return encoder; - -fail: - if (encoder) - mdp4_dtv_encoder_destroy(encoder); - - return ERR_PTR(ret); } -- cgit v1.2.3 From 2c24668cc068fc69bf3ffb32234c7e6e91184a82 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 8 Jul 2023 04:04:05 +0300 Subject: drm/msm/mdp4: use drmm-managed allocation for mdp4_lcdc_encoder Change struct mdp4_lcdc_encoder allocation to use drmm_encoder_alloc(). This removes the need to perform any actions on this encoder destruction. Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/546176/ Link: https://lore.kernel.org/r/20230708010407.3871346-16-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c | 36 +++++------------------ 1 file changed, 7 insertions(+), 29 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c index 67c118bb30ca..576995ddce37 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c @@ -30,18 +30,6 @@ static struct mdp4_kms *get_kms(struct drm_encoder *encoder) return to_mdp4_kms(to_mdp_kms(priv->kms)); } -static void mdp4_lcdc_encoder_destroy(struct drm_encoder *encoder) -{ - struct mdp4_lcdc_encoder *mdp4_lcdc_encoder = - to_mdp4_lcdc_encoder(encoder); - drm_encoder_cleanup(encoder); - kfree(mdp4_lcdc_encoder); -} - -static const struct drm_encoder_funcs mdp4_lcdc_encoder_funcs = { - .destroy = mdp4_lcdc_encoder_destroy, -}; - /* this should probably be a helper: */ static struct drm_connector *get_connector(struct drm_encoder *encoder) { @@ -377,30 +365,26 @@ long mdp4_lcdc_round_pixclk(struct drm_encoder *encoder, unsigned long rate) struct drm_encoder *mdp4_lcdc_encoder_init(struct drm_device *dev, struct device_node *panel_node) { - struct drm_encoder *encoder = NULL; + struct drm_encoder *encoder; struct mdp4_lcdc_encoder *mdp4_lcdc_encoder; int ret; - mdp4_lcdc_encoder = kzalloc(sizeof(*mdp4_lcdc_encoder), GFP_KERNEL); - if (!mdp4_lcdc_encoder) { - ret = -ENOMEM; - goto fail; - } + mdp4_lcdc_encoder = drmm_encoder_alloc(dev, struct mdp4_lcdc_encoder, base, + NULL, DRM_MODE_ENCODER_LVDS, NULL); + if (IS_ERR(mdp4_lcdc_encoder)) + return ERR_CAST(mdp4_lcdc_encoder); mdp4_lcdc_encoder->panel_node = panel_node; encoder = &mdp4_lcdc_encoder->base; - drm_encoder_init(dev, encoder, &mdp4_lcdc_encoder_funcs, - DRM_MODE_ENCODER_LVDS, NULL); drm_encoder_helper_add(encoder, &mdp4_lcdc_encoder_helper_funcs); /* TODO: do we need different pll in other cases? */ mdp4_lcdc_encoder->lcdc_clk = mpd4_lvds_pll_init(dev); if (IS_ERR(mdp4_lcdc_encoder->lcdc_clk)) { DRM_DEV_ERROR(dev->dev, "failed to get lvds_clk\n"); - ret = PTR_ERR(mdp4_lcdc_encoder->lcdc_clk); - goto fail; + return ERR_CAST(mdp4_lcdc_encoder->lcdc_clk); } /* TODO: different regulators in other cases? */ @@ -412,13 +396,7 @@ struct drm_encoder *mdp4_lcdc_encoder_init(struct drm_device *dev, ARRAY_SIZE(mdp4_lcdc_encoder->regs), mdp4_lcdc_encoder->regs); if (ret) - goto fail; + return ERR_PTR(ret); return encoder; - -fail: - if (encoder) - mdp4_lcdc_encoder_destroy(encoder); - - return ERR_PTR(ret); } -- cgit v1.2.3 From c6721b3c6423d8a348ae885a0f4c85e14f9bf85c Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Tue, 28 Nov 2023 00:54:01 +0300 Subject: drm/msm/mdp4: flush vblank event on disable Flush queued events when disabling the crtc. This avoids timeouts when we come back and wait for dependencies (like the previous frame's flip_done). Fixes: c8afe684c95c ("drm/msm: basic KMS driver for snapdragon") Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/569127/ Link: https://lore.kernel.org/r/20231127215401.4064128-1-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c index 5e5c31b44a8a..75f93e346282 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c @@ -260,6 +260,7 @@ static void mdp4_crtc_atomic_disable(struct drm_crtc *crtc, { struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc); struct mdp4_kms *mdp4_kms = get_kms(crtc); + unsigned long flags; DBG("%s", mdp4_crtc->name); @@ -272,6 +273,14 @@ static void mdp4_crtc_atomic_disable(struct drm_crtc *crtc, mdp_irq_unregister(&mdp4_kms->base, &mdp4_crtc->err); mdp4_disable(mdp4_kms); + if (crtc->state->event && !crtc->state->active) { + WARN_ON(mdp4_crtc->event); + spin_lock_irqsave(&mdp4_kms->dev->event_lock, flags); + drm_crtc_send_vblank_event(crtc, crtc->state->event); + crtc->state->event = NULL; + spin_unlock_irqrestore(&mdp4_kms->dev->event_lock, flags); + } + mdp4_crtc->enabled = false; } -- cgit v1.2.3 From 82c2a5751227056a643455d080a389a4b0bfe184 Mon Sep 17 00:00:00 2001 From: Kuogee Hsieh Date: Fri, 1 Dec 2023 15:19:43 -0800 Subject: drm/msm/dp: tie dp_display_irq_handler() with dp driver Currently the dp_display_request_irq() is executed at msm_dp_modeset_init() which ties irq registering to the DPU device's life cycle, while depending on resources that are released as the DP device is torn down. Move register DP driver irq handler to dp_display_probe() to have dp_display_irq_handler() IRQ tied with DP device. In addition, use platform_get_irq() to retrieve irq number from platform device directly. Changes in v5: -- reworded commit text as review comments at change #4 -- tear down component if failed at dp_display_request_irq() Changes in v4: -- delete dp->irq check at dp_display_request_irq() Changes in v3: -- move calling dp_display_irq_handler() to probe Signed-off-by: Kuogee Hsieh Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/570069/ Link: https://lore.kernel.org/r/1701472789-25951-2-git-send-email-quic_khsieh@quicinc.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/dp/dp_display.c | 32 +++++++++++++------------------- drivers/gpu/drm/msm/dp/dp_display.h | 1 - 2 files changed, 13 insertions(+), 20 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index 2ac9d61501c7..36bdba19467a 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -1183,26 +1183,18 @@ static irqreturn_t dp_display_irq_handler(int irq, void *dev_id) return ret; } -int dp_display_request_irq(struct msm_dp *dp_display) +static int dp_display_request_irq(struct dp_display_private *dp) { int rc = 0; - struct dp_display_private *dp; - - if (!dp_display) { - DRM_ERROR("invalid input\n"); - return -EINVAL; - } - - dp = container_of(dp_display, struct dp_display_private, dp_display); + struct platform_device *pdev = dp->dp_display.pdev; - dp->irq = irq_of_parse_and_map(dp->dp_display.pdev->dev.of_node, 0); + dp->irq = platform_get_irq(pdev, 0); if (!dp->irq) { DRM_ERROR("failed to get irq\n"); return -EINVAL; } - rc = devm_request_irq(dp_display->drm_dev->dev, dp->irq, - dp_display_irq_handler, + rc = devm_request_irq(&pdev->dev, dp->irq, dp_display_irq_handler, IRQF_TRIGGER_HIGH, "dp_display_isr", dp); if (rc < 0) { DRM_ERROR("failed to request IRQ%u: %d\n", @@ -1277,13 +1269,21 @@ static int dp_display_probe(struct platform_device *pdev) platform_set_drvdata(pdev, &dp->dp_display); + rc = dp_display_request_irq(dp); + if (rc) + goto err; + rc = component_add(&pdev->dev, &dp_display_comp_ops); if (rc) { DRM_ERROR("component add failed, rc=%d\n", rc); - dp_display_deinit_sub_modules(dp); + goto err; } return rc; + +err: + dp_display_deinit_sub_modules(dp); + return rc; } static void dp_display_remove(struct platform_device *pdev) @@ -1536,12 +1536,6 @@ int msm_dp_modeset_init(struct msm_dp *dp_display, struct drm_device *dev, dp_priv = container_of(dp_display, struct dp_display_private, dp_display); - ret = dp_display_request_irq(dp_display); - if (ret) { - DRM_ERROR("request_irq failed, ret=%d\n", ret); - return ret; - } - ret = dp_display_get_next_bridge(dp_display); if (ret) return ret; diff --git a/drivers/gpu/drm/msm/dp/dp_display.h b/drivers/gpu/drm/msm/dp/dp_display.h index 5e2fbd8318e9..b4a8be0abd9a 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.h +++ b/drivers/gpu/drm/msm/dp/dp_display.h @@ -36,7 +36,6 @@ struct msm_dp { int dp_display_set_plugged_cb(struct msm_dp *dp_display, hdmi_codec_plugged_cb fn, struct device *codec_dev); int dp_display_get_modes(struct msm_dp *dp_display); -int dp_display_request_irq(struct msm_dp *dp_display); bool dp_display_check_video_test(struct msm_dp *dp_display); int dp_display_get_test_bpp(struct msm_dp *dp_display); void dp_display_signal_audio_start(struct msm_dp *dp_display); -- cgit v1.2.3 From aa1131204e587535b9b48ed6d1b9187942bc939e Mon Sep 17 00:00:00 2001 From: Kuogee Hsieh Date: Fri, 1 Dec 2023 15:19:44 -0800 Subject: drm/msm/dp: rename is_connected with link_ready The is_connected flag is set to true after DP mainlink successfully finishes link training to enter into ST_MAINLINK_READY state rather than being set after the DP dongle is connected. Rename the is_connected flag with link_ready flag to match the state of DP driver's state machine. Changes in v5: -- reworded commit text according to review comments from change #4 Changes in v4: -- reworded commit text Signed-off-by: Kuogee Hsieh Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/570063/ Link: https://lore.kernel.org/r/1701472789-25951-3-git-send-email-quic_khsieh@quicinc.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/dp/dp_display.c | 19 +++++++++---------- drivers/gpu/drm/msm/dp/dp_display.h | 2 +- drivers/gpu/drm/msm/dp/dp_drm.c | 14 +++++++------- 3 files changed, 17 insertions(+), 18 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index 36bdba19467a..a3d7cb1943e2 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -351,12 +351,11 @@ static void dp_display_send_hpd_event(struct msm_dp *dp_display) drm_helper_hpd_irq_event(connector->dev); } - static int dp_display_send_hpd_notification(struct dp_display_private *dp, bool hpd) { - if ((hpd && dp->dp_display.is_connected) || - (!hpd && !dp->dp_display.is_connected)) { + if ((hpd && dp->dp_display.link_ready) || + (!hpd && !dp->dp_display.link_ready)) { drm_dbg_dp(dp->drm_dev, "HPD already %s\n", (hpd ? "on" : "off")); return 0; @@ -370,7 +369,7 @@ static int dp_display_send_hpd_notification(struct dp_display_private *dp, dp->panel->dpcd, dp->panel->downstream_ports); } - dp->dp_display.is_connected = hpd; + dp->dp_display.link_ready = hpd; drm_dbg_dp(dp->drm_dev, "type=%d hpd=%d\n", dp->dp_display.connector_type, hpd); @@ -912,7 +911,7 @@ int dp_display_set_plugged_cb(struct msm_dp *dp_display, dp_display->plugged_cb = fn; dp_display->codec_dev = codec_dev; - plugged = dp_display->is_connected; + plugged = dp_display->link_ready; dp_display_handle_plugged_change(dp_display, plugged); return 0; @@ -1343,16 +1342,16 @@ static int dp_pm_resume(struct device *dev) * also only signal audio when disconnected */ if (dp->link->sink_count) { - dp->dp_display.is_connected = true; + dp->dp_display.link_ready = true; } else { - dp->dp_display.is_connected = false; + dp->dp_display.link_ready = false; dp_display_handle_plugged_change(dp_display, false); } drm_dbg_dp(dp->drm_dev, "After, type=%d sink=%d conn=%d core_init=%d phy_init=%d power=%d\n", dp->dp_display.connector_type, dp->link->sink_count, - dp->dp_display.is_connected, dp->core_initialized, + dp->dp_display.link_ready, dp->core_initialized, dp->phy_initialized, dp_display->power_on); mutex_unlock(&dp->event_mutex); @@ -1740,8 +1739,8 @@ void dp_bridge_hpd_notify(struct drm_bridge *bridge, return; } - if (!dp_display->is_connected && status == connector_status_connected) + if (!dp_display->link_ready && status == connector_status_connected) dp_add_event(dp, EV_HPD_PLUG_INT, 0, 0); - else if (dp_display->is_connected && status == connector_status_disconnected) + else if (dp_display->link_ready && status == connector_status_disconnected) dp_add_event(dp, EV_HPD_UNPLUG_INT, 0, 0); } diff --git a/drivers/gpu/drm/msm/dp/dp_display.h b/drivers/gpu/drm/msm/dp/dp_display.h index b4a8be0abd9a..102f3507d824 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.h +++ b/drivers/gpu/drm/msm/dp/dp_display.h @@ -17,7 +17,7 @@ struct msm_dp { struct drm_bridge *bridge; struct drm_connector *connector; struct drm_bridge *next_bridge; - bool is_connected; + bool link_ready; bool audio_enabled; bool power_on; unsigned int connector_type; diff --git a/drivers/gpu/drm/msm/dp/dp_drm.c b/drivers/gpu/drm/msm/dp/dp_drm.c index 21850a384b10..0efb501490cc 100644 --- a/drivers/gpu/drm/msm/dp/dp_drm.c +++ b/drivers/gpu/drm/msm/dp/dp_drm.c @@ -24,10 +24,10 @@ static enum drm_connector_status dp_bridge_detect(struct drm_bridge *bridge) dp = to_dp_bridge(bridge)->dp_display; - drm_dbg_dp(dp->drm_dev, "is_connected = %s\n", - (dp->is_connected) ? "true" : "false"); + drm_dbg_dp(dp->drm_dev, "link_ready = %s\n", + (dp->link_ready) ? "true" : "false"); - return (dp->is_connected) ? connector_status_connected : + return (dp->link_ready) ? connector_status_connected : connector_status_disconnected; } @@ -40,8 +40,8 @@ static int dp_bridge_atomic_check(struct drm_bridge *bridge, dp = to_dp_bridge(bridge)->dp_display; - drm_dbg_dp(dp->drm_dev, "is_connected = %s\n", - (dp->is_connected) ? "true" : "false"); + drm_dbg_dp(dp->drm_dev, "link_ready = %s\n", + (dp->link_ready) ? "true" : "false"); /* * There is no protection in the DRM framework to check if the display @@ -55,7 +55,7 @@ static int dp_bridge_atomic_check(struct drm_bridge *bridge, * After that this piece of code can be removed. */ if (bridge->ops & DRM_BRIDGE_OP_HPD) - return (dp->is_connected) ? 0 : -ENOTCONN; + return (dp->link_ready) ? 0 : -ENOTCONN; return 0; } @@ -78,7 +78,7 @@ static int dp_bridge_get_modes(struct drm_bridge *bridge, struct drm_connector * dp = to_dp_bridge(bridge)->dp_display; /* pluggable case assumes EDID is read when HPD */ - if (dp->is_connected) { + if (dp->link_ready) { rc = dp_display_get_modes(dp); if (rc <= 0) { DRM_ERROR("failed to get DP sink modes, rc=%d\n", rc); -- cgit v1.2.3 From e467e0bde881a667ccde9c7963f1042c01818059 Mon Sep 17 00:00:00 2001 From: Kuogee Hsieh Date: Fri, 1 Dec 2023 15:19:45 -0800 Subject: drm/msm/dp: use drm_bridge_hpd_notify() to report HPD status changes Currently DP driver use drm_helper_hpd_irq_event(), bypassing drm bridge framework, to report HPD status changes to user space frame work. Replace it with drm_bridge_hpd_notify() since DP driver is part of drm bridge. Signed-off-by: Kuogee Hsieh Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/570067/ Link: https://lore.kernel.org/r/1701472789-25951-4-git-send-email-quic_khsieh@quicinc.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/dp/dp_display.c | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index a3d7cb1943e2..148ac1629ef2 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -340,26 +340,10 @@ static const struct component_ops dp_display_comp_ops = { .unbind = dp_display_unbind, }; -static void dp_display_send_hpd_event(struct msm_dp *dp_display) -{ - struct dp_display_private *dp; - struct drm_connector *connector; - - dp = container_of(dp_display, struct dp_display_private, dp_display); - - connector = dp->dp_display.connector; - drm_helper_hpd_irq_event(connector->dev); -} - static int dp_display_send_hpd_notification(struct dp_display_private *dp, bool hpd) { - if ((hpd && dp->dp_display.link_ready) || - (!hpd && !dp->dp_display.link_ready)) { - drm_dbg_dp(dp->drm_dev, "HPD already %s\n", - (hpd ? "on" : "off")); - return 0; - } + struct drm_bridge *bridge = dp->dp_display.bridge; /* reset video pattern flag on disconnect */ if (!hpd) { @@ -373,7 +357,7 @@ static int dp_display_send_hpd_notification(struct dp_display_private *dp, drm_dbg_dp(dp->drm_dev, "type=%d hpd=%d\n", dp->dp_display.connector_type, hpd); - dp_display_send_hpd_event(&dp->dp_display); + drm_bridge_hpd_notify(bridge, dp->dp_display.link_ready); return 0; } -- cgit v1.2.3 From 9179fd9596a47de3ff2947101751315ea00bd7ef Mon Sep 17 00:00:00 2001 From: Kuogee Hsieh Date: Fri, 1 Dec 2023 15:19:46 -0800 Subject: drm/msm/dp: move parser->parse() and dp_power_client_init() to probe Original both parser->parse() and dp_power_client_init() are done at dp_display_bind() since eDP population is done at binding time. In the preparation of having eDP population done at probe() time, move both function from dp_display_bind() to dp_display_probe(). Changes in v6: -- move dp_power_client_deinit() to remove() Changes in v5: -- explain why parser->parse() and dp_power_client_init() are moved to probe time -- tear down sub modules if failed Changes in v4: -- split this patch out of "incorporate pm_runtime framework into DP driver" patch Signed-off-by: Kuogee Hsieh Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/570065/ Link: https://lore.kernel.org/r/1701472789-25951-5-git-send-email-quic_khsieh@quicinc.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/dp/dp_display.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index 148ac1629ef2..bae37e459d34 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -275,11 +275,6 @@ static int dp_display_bind(struct device *dev, struct device *master, dp->dp_display.drm_dev = drm; priv->dp[dp->id] = &dp->dp_display; - rc = dp->parser->parse(dp->parser); - if (rc) { - DRM_ERROR("device tree parsing failed\n"); - goto end; - } dp->drm_dev = drm; @@ -290,11 +285,6 @@ static int dp_display_bind(struct device *dev, struct device *master, goto end; } - rc = dp_power_client_init(dp->power); - if (rc) { - DRM_ERROR("Power client create failed\n"); - goto end; - } rc = dp_register_audio_driver(dev, dp->audio); if (rc) { @@ -327,7 +317,6 @@ static void dp_display_unbind(struct device *dev, struct device *master, of_dp_aux_depopulate_bus(dp->aux); - dp_power_client_deinit(dp->power); dp_unregister_audio_driver(dev, dp->audio); dp_aux_unregister(dp->aux); dp->drm_dev = NULL; @@ -1240,6 +1229,18 @@ static int dp_display_probe(struct platform_device *pdev) return -EPROBE_DEFER; } + rc = dp->parser->parse(dp->parser); + if (rc) { + DRM_ERROR("device tree parsing failed\n"); + goto err; + } + + rc = dp_power_client_init(dp->power); + if (rc) { + DRM_ERROR("Power client create failed\n"); + goto err; + } + /* setup event q */ mutex_init(&dp->event_mutex); init_waitqueue_head(&dp->event_q); @@ -1274,6 +1275,7 @@ static void dp_display_remove(struct platform_device *pdev) struct dp_display_private *dp = dev_get_dp_display_private(&pdev->dev); component_del(&pdev->dev, &dp_display_comp_ops); + dp_power_client_deinit(dp->power); dp_display_deinit_sub_modules(dp); platform_set_drvdata(pdev, NULL); -- cgit v1.2.3 From 5814b8bf086a1402a7fab4021aa58d4b84246db5 Mon Sep 17 00:00:00 2001 From: Kuogee Hsieh Date: Fri, 1 Dec 2023 15:19:47 -0800 Subject: drm/msm/dp: incorporate pm_runtime framework into DP driver Currently DP driver is executed independent of PM runtime framework. This leads msm eDP panel can not being detected by edp_panel driver during generic_edp_panel_probe() due to AUX DPCD read failed at edp panel driver. Incorporate PM runtime framework into DP driver so that host controller's power and clocks are enable/disable through PM runtime mechanism. Once PM runtime framework is incorporated into DP driver, waking up device from power up path is not necessary. Hence remove it. After incorporating pm_runtime framework into eDP/DP driver, dp_pm_suspend() to handle power off both DP phy and controller during suspend and dp_pm_resume() to handle power on both DP phy and controller during resume are not necessary. Therefore both dp_pm_suspend() and dp_pm_resume() are dropped and replace with dp_pm_runtime_suspend() and dp_pm_runtime_resume() respectively. Changes in v9: -- silent compiler warning message at dp_power_init() and dp_power_deinit() with W1 flag Changes in v7: -- add comments to dp_pm_runtime_resume() -- add comments to dp_bridge_hpd_enable() -- delete dp->hpd_state = ST_DISCONNECTED from dp_bridge_hpd_notify() Changes in v6: -- delete dp_power_client_deinit(dp->power); -- remove if (!dp->dp_display.is_edp) condition checkout at plug_handle() -- remove if (!dp->dp_display.is_edp) condition checkout at unplug_handle() -- add IRQF_NO_AUTOEN to devm_request_irq() -- add enable_irq() and disable_irq() to pm_runtime_resume()/suspend() -- del dp->hpd_state = ST_DISCONNECTED from dp_bridge_hpd_disable() Changes in v5: -- remove pm_runtime_put_autosuspend feature, use pm_runtime_put_sync() -- squash add pm_runtime_force_suspend()/resume() patch into this patch Changes in v4: -- reworded commit text to explain why pm_framework is required for edp panel -- reworded commit text to explain autosuspend is choiced -- delete EV_POWER_PM_GET and PM_EV_POWER_PUT from changes #3 -- delete dp_display_pm_get() and dp_display_pm_Put() from changes #3 -- return value from pm_runtime_resume_and_get() directly -- check return value of devm_pm_runtime_enable() -- delete pm_runtime_xxx from dp_display_remove() -- drop dp_display_host_init() from EV_HPD_INIT_SETUP -- drop both dp_pm_prepare() and dp_pm_compete() from this change -- delete ST_SUSPENDED state -- rewording commit text to add more details regrading the purpose of this change Changes in v3: -- incorporate removing pm_runtime_xx() from dp_pwer.c to this patch -- use pm_runtime_resume_and_get() instead of pm_runtime_get() -- error checking pm_runtime_resume_and_get() return value -- add EV_POWER_PM_GET and PM_EV_POWER_PUT to handle HPD_GPIO case -- replace dp_pm_suspend() with pm_runtime_force_suspend() -- replace dp_pm_resume() with pm_runtime_force_resume() Signed-off-by: Kuogee Hsieh Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/570073/ Link: https://lore.kernel.org/r/1701472789-25951-6-git-send-email-quic_khsieh@quicinc.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/dp/dp_aux.c | 5 + drivers/gpu/drm/msm/dp/dp_display.c | 181 ++++++++++++++---------------------- drivers/gpu/drm/msm/dp/dp_power.c | 32 +------ drivers/gpu/drm/msm/dp/dp_power.h | 11 --- 4 files changed, 77 insertions(+), 152 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/dp/dp_aux.c b/drivers/gpu/drm/msm/dp/dp_aux.c index 8e3b677f35e6..10b6eeb7bebf 100644 --- a/drivers/gpu/drm/msm/dp/dp_aux.c +++ b/drivers/gpu/drm/msm/dp/dp_aux.c @@ -291,6 +291,10 @@ static ssize_t dp_aux_transfer(struct drm_dp_aux *dp_aux, return -EINVAL; } + ret = pm_runtime_resume_and_get(dp_aux->dev); + if (ret) + return ret; + mutex_lock(&aux->mutex); if (!aux->initted) { ret = -EIO; @@ -364,6 +368,7 @@ static ssize_t dp_aux_transfer(struct drm_dp_aux *dp_aux, exit: mutex_unlock(&aux->mutex); + pm_runtime_put_sync(dp_aux->dev); return ret; } diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index bae37e459d34..05143ab9c6a4 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -49,7 +49,6 @@ enum { ST_CONNECTED, ST_DISCONNECT_PENDING, ST_DISPLAY_OFF, - ST_SUSPENDED, }; enum { @@ -309,10 +308,6 @@ static void dp_display_unbind(struct device *dev, struct device *master, struct dp_display_private *dp = dev_get_dp_display_private(dev); struct msm_drm_private *priv = dev_get_drvdata(master); - /* disable all HPD interrupts */ - if (dp->core_initialized) - dp_catalog_hpd_config_intr(dp->catalog, DP_DP_HPD_INT_MASK, false); - kthread_stop(dp->ev_tsk); of_dp_aux_depopulate_bus(dp->aux); @@ -542,6 +537,7 @@ static int dp_hpd_plug_handle(struct dp_display_private *dp, u32 data) { u32 state; int ret; + struct platform_device *pdev = dp->dp_display.pdev; mutex_lock(&dp->event_mutex); @@ -549,7 +545,7 @@ static int dp_hpd_plug_handle(struct dp_display_private *dp, u32 data) drm_dbg_dp(dp->drm_dev, "Before, type=%d hpd_state=%d\n", dp->dp_display.connector_type, state); - if (state == ST_DISPLAY_OFF || state == ST_SUSPENDED) { + if (state == ST_DISPLAY_OFF) { mutex_unlock(&dp->event_mutex); return 0; } @@ -566,7 +562,13 @@ static int dp_hpd_plug_handle(struct dp_display_private *dp, u32 data) return 0; } - ret = dp_display_usbpd_configure_cb(&dp->dp_display.pdev->dev); + ret = pm_runtime_resume_and_get(&pdev->dev); + if (ret) { + DRM_ERROR("failed to pm_runtime_resume\n"); + return ret; + } + + ret = dp_display_usbpd_configure_cb(&pdev->dev); if (ret) { /* link train failed */ dp->hpd_state = ST_DISCONNECTED; } else { @@ -598,6 +600,7 @@ static void dp_display_handle_plugged_change(struct msm_dp *dp_display, static int dp_hpd_unplug_handle(struct dp_display_private *dp, u32 data) { u32 state; + struct platform_device *pdev = dp->dp_display.pdev; mutex_lock(&dp->event_mutex); @@ -648,6 +651,7 @@ static int dp_hpd_unplug_handle(struct dp_display_private *dp, u32 data) dp->dp_display.connector_type, state); /* uevent will complete disconnection part */ + pm_runtime_put_sync(&pdev->dev); mutex_unlock(&dp->event_mutex); return 0; } @@ -663,7 +667,7 @@ static int dp_irq_hpd_handle(struct dp_display_private *dp, u32 data) drm_dbg_dp(dp->drm_dev, "Before, type=%d hpd_state=%d\n", dp->dp_display.connector_type, state); - if (state == ST_DISPLAY_OFF || state == ST_SUSPENDED) { + if (state == ST_DISPLAY_OFF) { mutex_unlock(&dp->event_mutex); return 0; } @@ -1075,7 +1079,6 @@ static int hpd_event_thread(void *data) switch (todo->event_id) { case EV_HPD_INIT_SETUP: - dp_display_host_init(dp_priv); break; case EV_HPD_PLUG_INT: dp_hpd_plug_handle(dp_priv, todo->data); @@ -1167,7 +1170,9 @@ static int dp_display_request_irq(struct dp_display_private *dp) } rc = devm_request_irq(&pdev->dev, dp->irq, dp_display_irq_handler, - IRQF_TRIGGER_HIGH, "dp_display_isr", dp); + IRQF_TRIGGER_HIGH|IRQF_NO_AUTOEN, + "dp_display_isr", dp); + if (rc < 0) { DRM_ERROR("failed to request IRQ%u: %d\n", dp->irq, rc); @@ -1253,6 +1258,10 @@ static int dp_display_probe(struct platform_device *pdev) platform_set_drvdata(pdev, &dp->dp_display); + rc = devm_pm_runtime_enable(&pdev->dev); + if (rc) + goto err; + rc = dp_display_request_irq(dp); if (rc) goto err; @@ -1275,115 +1284,52 @@ static void dp_display_remove(struct platform_device *pdev) struct dp_display_private *dp = dev_get_dp_display_private(&pdev->dev); component_del(&pdev->dev, &dp_display_comp_ops); - dp_power_client_deinit(dp->power); dp_display_deinit_sub_modules(dp); platform_set_drvdata(pdev, NULL); } -static int dp_pm_resume(struct device *dev) +static int dp_pm_runtime_suspend(struct device *dev) { - struct platform_device *pdev = to_platform_device(dev); - struct msm_dp *dp_display = platform_get_drvdata(pdev); - struct dp_display_private *dp; - int sink_count = 0; - - dp = container_of(dp_display, struct dp_display_private, dp_display); - - mutex_lock(&dp->event_mutex); - - drm_dbg_dp(dp->drm_dev, - "Before, type=%d core_inited=%d phy_inited=%d power_on=%d\n", - dp->dp_display.connector_type, dp->core_initialized, - dp->phy_initialized, dp_display->power_on); - - /* start from disconnected state */ - dp->hpd_state = ST_DISCONNECTED; - - /* turn on dp ctrl/phy */ - dp_display_host_init(dp); - - if (dp_display->is_edp) - dp_catalog_ctrl_hpd_enable(dp->catalog); + struct dp_display_private *dp = dev_get_dp_display_private(dev); - if (dp_catalog_link_is_connected(dp->catalog)) { - /* - * set sink to normal operation mode -- D0 - * before dpcd read - */ - dp_display_host_phy_init(dp); - dp_link_psm_config(dp->link, &dp->panel->link_info, false); - sink_count = drm_dp_read_sink_count(dp->aux); - if (sink_count < 0) - sink_count = 0; + disable_irq(dp->irq); + if (dp->dp_display.is_edp) { dp_display_host_phy_exit(dp); + dp_catalog_ctrl_hpd_disable(dp->catalog); } - - dp->link->sink_count = sink_count; - /* - * can not declared display is connected unless - * HDMI cable is plugged in and sink_count of - * dongle become 1 - * also only signal audio when disconnected - */ - if (dp->link->sink_count) { - dp->dp_display.link_ready = true; - } else { - dp->dp_display.link_ready = false; - dp_display_handle_plugged_change(dp_display, false); - } - - drm_dbg_dp(dp->drm_dev, - "After, type=%d sink=%d conn=%d core_init=%d phy_init=%d power=%d\n", - dp->dp_display.connector_type, dp->link->sink_count, - dp->dp_display.link_ready, dp->core_initialized, - dp->phy_initialized, dp_display->power_on); - - mutex_unlock(&dp->event_mutex); + dp_display_host_deinit(dp); return 0; } -static int dp_pm_suspend(struct device *dev) +static int dp_pm_runtime_resume(struct device *dev) { - struct platform_device *pdev = to_platform_device(dev); - struct msm_dp *dp_display = platform_get_drvdata(pdev); - struct dp_display_private *dp; - - dp = container_of(dp_display, struct dp_display_private, dp_display); - - mutex_lock(&dp->event_mutex); - - drm_dbg_dp(dp->drm_dev, - "Before, type=%d core_inited=%d phy_inited=%d power_on=%d\n", - dp->dp_display.connector_type, dp->core_initialized, - dp->phy_initialized, dp_display->power_on); - - /* mainlink enabled */ - if (dp_power_clk_status(dp->power, DP_CTRL_PM)) - dp_ctrl_off_link_stream(dp->ctrl); - - dp_display_host_phy_exit(dp); - - /* host_init will be called at pm_resume */ - dp_display_host_deinit(dp); - - dp->hpd_state = ST_SUSPENDED; - - drm_dbg_dp(dp->drm_dev, - "After, type=%d core_inited=%d phy_inited=%d power_on=%d\n", - dp->dp_display.connector_type, dp->core_initialized, - dp->phy_initialized, dp_display->power_on); + struct dp_display_private *dp = dev_get_dp_display_private(dev); - mutex_unlock(&dp->event_mutex); + /* + * for eDP, host cotroller, HPD block and PHY are enabled here + * but with HPD irq disabled + * + * for DP, only host controller is enabled here. + * HPD block is enabled at dp_bridge_hpd_enable() + * PHY will be enabled at plugin handler later + */ + dp_display_host_init(dp); + if (dp->dp_display.is_edp) { + dp_catalog_ctrl_hpd_enable(dp->catalog); + dp_display_host_phy_init(dp); + } + enable_irq(dp->irq); return 0; } static const struct dev_pm_ops dp_pm_ops = { - .suspend = dp_pm_suspend, - .resume = dp_pm_resume, + SET_RUNTIME_PM_OPS(dp_pm_runtime_suspend, dp_pm_runtime_resume, NULL) + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) }; static struct platform_driver dp_display_driver = { @@ -1466,10 +1412,6 @@ static int dp_display_get_next_bridge(struct msm_dp *dp) aux_bus = of_get_child_by_name(dev->of_node, "aux-bus"); if (aux_bus && dp->is_edp) { - dp_display_host_init(dp_priv); - dp_catalog_ctrl_hpd_enable(dp_priv->catalog); - dp_display_host_phy_init(dp_priv); - /* * The code below assumes that the panel will finish probing * by the time devm_of_dp_aux_populate_ep_devices() returns. @@ -1566,6 +1508,11 @@ void dp_bridge_atomic_enable(struct drm_bridge *drm_bridge, dp_hpd_plug_handle(dp_display, 0); mutex_lock(&dp_display->event_mutex); + if (pm_runtime_resume_and_get(&dp->pdev->dev)) { + DRM_ERROR("failed to pm_runtime_resume\n"); + mutex_unlock(&dp_display->event_mutex); + return; + } state = dp_display->hpd_state; if (state != ST_DISPLAY_OFF && state != ST_MAINLINK_READY) { @@ -1630,10 +1577,9 @@ void dp_bridge_atomic_post_disable(struct drm_bridge *drm_bridge, mutex_lock(&dp_display->event_mutex); state = dp_display->hpd_state; - if (state != ST_DISCONNECT_PENDING && state != ST_CONNECTED) { - mutex_unlock(&dp_display->event_mutex); - return; - } + if (state != ST_DISCONNECT_PENDING && state != ST_CONNECTED) + drm_dbg_dp(dp->drm_dev, "type=%d wrong hpd_state=%d\n", + dp->connector_type, state); dp_display_disable(dp_display); @@ -1646,6 +1592,8 @@ void dp_bridge_atomic_post_disable(struct drm_bridge *drm_bridge, } drm_dbg_dp(dp->drm_dev, "type=%d Done\n", dp->connector_type); + + pm_runtime_put_sync(&dp->pdev->dev); mutex_unlock(&dp_display->event_mutex); } @@ -1684,7 +1632,21 @@ void dp_bridge_hpd_enable(struct drm_bridge *bridge) struct msm_dp *dp_display = dp_bridge->dp_display; struct dp_display_private *dp = container_of(dp_display, struct dp_display_private, dp_display); + /* + * this is for external DP with hpd irq enabled case, + * step-1: dp_pm_runtime_resume() enable dp host only + * step-2: enable hdp block and have hpd irq enabled here + * step-3: waiting for plugin irq while phy is not initialized + * step-4: DP PHY is initialized at plugin handler before link training + * + */ mutex_lock(&dp->event_mutex); + if (pm_runtime_resume_and_get(&dp_display->pdev->dev)) { + DRM_ERROR("failed to resume power\n"); + mutex_unlock(&dp->event_mutex); + return; + } + dp_catalog_ctrl_hpd_enable(dp->catalog); /* enable HDP interrupts */ @@ -1706,6 +1668,8 @@ void dp_bridge_hpd_disable(struct drm_bridge *bridge) dp_catalog_ctrl_hpd_disable(dp->catalog); dp_display->internal_hpd = false; + + pm_runtime_put_sync(&dp_display->pdev->dev); mutex_unlock(&dp->event_mutex); } @@ -1720,11 +1684,6 @@ void dp_bridge_hpd_notify(struct drm_bridge *bridge, if (dp_display->internal_hpd) return; - if (!dp->core_initialized) { - drm_dbg_dp(dp->drm_dev, "not initialized\n"); - return; - } - if (!dp_display->link_ready && status == connector_status_connected) dp_add_event(dp, EV_HPD_PLUG_INT, 0, 0); else if (dp_display->link_ready && status == connector_status_disconnected) diff --git a/drivers/gpu/drm/msm/dp/dp_power.c b/drivers/gpu/drm/msm/dp/dp_power.c index 5cb84ca40e9e..c4843dd69f47 100644 --- a/drivers/gpu/drm/msm/dp/dp_power.c +++ b/drivers/gpu/drm/msm/dp/dp_power.c @@ -152,45 +152,17 @@ int dp_power_client_init(struct dp_power *dp_power) power = container_of(dp_power, struct dp_power_private, dp_power); - pm_runtime_enable(power->dev); - return dp_power_clk_init(power); } -void dp_power_client_deinit(struct dp_power *dp_power) -{ - struct dp_power_private *power; - - power = container_of(dp_power, struct dp_power_private, dp_power); - - pm_runtime_disable(power->dev); -} - int dp_power_init(struct dp_power *dp_power) { - int rc = 0; - struct dp_power_private *power = NULL; - - power = container_of(dp_power, struct dp_power_private, dp_power); - - pm_runtime_get_sync(power->dev); - - rc = dp_power_clk_enable(dp_power, DP_CORE_PM, true); - if (rc) - pm_runtime_put_sync(power->dev); - - return rc; + return dp_power_clk_enable(dp_power, DP_CORE_PM, true); } int dp_power_deinit(struct dp_power *dp_power) { - struct dp_power_private *power; - - power = container_of(dp_power, struct dp_power_private, dp_power); - - dp_power_clk_enable(dp_power, DP_CORE_PM, false); - pm_runtime_put_sync(power->dev); - return 0; + return dp_power_clk_enable(dp_power, DP_CORE_PM, false); } struct dp_power *dp_power_get(struct device *dev, struct dp_parser *parser) diff --git a/drivers/gpu/drm/msm/dp/dp_power.h b/drivers/gpu/drm/msm/dp/dp_power.h index a3dec200785e..55ada51edb57 100644 --- a/drivers/gpu/drm/msm/dp/dp_power.h +++ b/drivers/gpu/drm/msm/dp/dp_power.h @@ -80,17 +80,6 @@ int dp_power_clk_enable(struct dp_power *power, enum dp_pm_type pm_type, */ int dp_power_client_init(struct dp_power *power); -/** - * dp_power_clinet_deinit() - de-initialize clock and regulator modules - * - * @power: instance of power module - * return: 0 for success, error for failure. - * - * This API will de-initialize the DisplayPort's clocks and regulator - * modules. - */ -void dp_power_client_deinit(struct dp_power *power); - /** * dp_power_get() - configure and get the DisplayPort power module data * -- cgit v1.2.3 From 2b3aabc9caa2452653ef22bdfd15af65f0dff164 Mon Sep 17 00:00:00 2001 From: Kuogee Hsieh Date: Fri, 1 Dec 2023 15:19:48 -0800 Subject: drm/msm/dp: delete EV_HPD_INIT_SETUP EV_HPD_INIT_SETUP flag is used to trigger the initialization of external DP host controller. Since external DP host controller initialization had been incorporated into pm_runtime_resume(), this flag became obsolete. msm_dp_irq_postinstall() which triggers EV_HPD_INIT_SETUP event is obsoleted accordingly. Changes in v4: -- reworded commit text -- drop EV_HPD_INIT_SETUP -- drop msm_dp_irq_postinstall() Changes in v3: -- drop EV_HPD_INIT_SETUP and msm_dp_irq_postinstall() Signed-off-by: Kuogee Hsieh Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/570075/ Link: https://lore.kernel.org/r/1701472789-25951-7-git-send-email-quic_khsieh@quicinc.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c | 4 ---- drivers/gpu/drm/msm/dp/dp_display.c | 16 ---------------- drivers/gpu/drm/msm/msm_drv.h | 5 ----- 3 files changed, 25 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c index babec724c76a..9b2efd7acc87 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c @@ -845,7 +845,6 @@ static int dpu_irq_postinstall(struct msm_kms *kms) { struct msm_drm_private *priv; struct dpu_kms *dpu_kms = to_dpu_kms(kms); - int i; if (!dpu_kms || !dpu_kms->dev) return -EINVAL; @@ -854,9 +853,6 @@ static int dpu_irq_postinstall(struct msm_kms *kms) if (!priv) return -EINVAL; - for (i = 0; i < ARRAY_SIZE(priv->dp); i++) - msm_dp_irq_postinstall(priv->dp[i]); - return 0; } diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index 05143ab9c6a4..50054f28e5b2 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -54,7 +54,6 @@ enum { enum { EV_NO_EVENT, /* hpd events */ - EV_HPD_INIT_SETUP, EV_HPD_PLUG_INT, EV_IRQ_HPD_INT, EV_HPD_UNPLUG_INT, @@ -1078,8 +1077,6 @@ static int hpd_event_thread(void *data) spin_unlock_irqrestore(&dp_priv->event_lock, flag); switch (todo->event_id) { - case EV_HPD_INIT_SETUP: - break; case EV_HPD_PLUG_INT: dp_hpd_plug_handle(dp_priv, todo->data); break; @@ -1359,19 +1356,6 @@ void __exit msm_dp_unregister(void) platform_driver_unregister(&dp_display_driver); } -void msm_dp_irq_postinstall(struct msm_dp *dp_display) -{ - struct dp_display_private *dp; - - if (!dp_display) - return; - - dp = container_of(dp_display, struct dp_display_private, dp_display); - - if (!dp_display->is_edp) - dp_add_event(dp, EV_HPD_INIT_SETUP, 0, 0); -} - bool msm_dp_wide_bus_available(const struct msm_dp *dp_display) { struct dp_display_private *dp; diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 628ef3e663ea..a205127ccc93 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -386,7 +386,6 @@ int __init msm_dp_register(void); void __exit msm_dp_unregister(void); int msm_dp_modeset_init(struct msm_dp *dp_display, struct drm_device *dev, struct drm_encoder *encoder); -void msm_dp_irq_postinstall(struct msm_dp *dp_display); void msm_dp_snapshot(struct msm_disp_state *disp_state, struct msm_dp *dp_display); bool msm_dp_wide_bus_available(const struct msm_dp *dp_display); @@ -406,10 +405,6 @@ static inline int msm_dp_modeset_init(struct msm_dp *dp_display, return -EINVAL; } -static inline void msm_dp_irq_postinstall(struct msm_dp *dp_display) -{ -} - static inline void msm_dp_snapshot(struct msm_disp_state *disp_state, struct msm_dp *dp_display) { } -- cgit v1.2.3 From e2969ee302522b0b29506a9d17cebb5b02b5ce5c Mon Sep 17 00:00:00 2001 From: Kuogee Hsieh Date: Fri, 1 Dec 2023 15:19:49 -0800 Subject: drm/msm/dp: move of_dp_aux_populate_bus() to eDP probe() Currently eDP population is done at msm_dp_modeset_init() which happen at binding time. Move eDP population to be done at display probe time so that probe deferral cases can be handled effectively. wait_for_hpd_asserted callback is added during drm_dp_aux_init() to ensure eDP's HPD is up before proceeding eDP population. Changes in v5: -- inline dp_display_auxbus_population() and delete it Changes in v4: -- delete duplicate initialize code to dp_aux before drm_dp_aux_register() -- delete of_get_child_by_name(dev->of_node, "aux-bus") and inline the function -- not initialize rc = 0 Changes in v3: -- add done_probing callback into devm_of_dp_aux_populate_bus() Signed-off-by: Kuogee Hsieh Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/570074/ Link: https://lore.kernel.org/r/1701472789-25951-8-git-send-email-quic_khsieh@quicinc.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/dp/dp_aux.c | 34 ++++++++++++++++----- drivers/gpu/drm/msm/dp/dp_display.c | 59 +++++++++++++++---------------------- 2 files changed, 51 insertions(+), 42 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/dp/dp_aux.c b/drivers/gpu/drm/msm/dp/dp_aux.c index 10b6eeb7bebf..03f4951c49f4 100644 --- a/drivers/gpu/drm/msm/dp/dp_aux.c +++ b/drivers/gpu/drm/msm/dp/dp_aux.c @@ -479,7 +479,6 @@ void dp_aux_deinit(struct drm_dp_aux *dp_aux) int dp_aux_register(struct drm_dp_aux *dp_aux) { - struct dp_aux_private *aux; int ret; if (!dp_aux) { @@ -487,12 +486,7 @@ int dp_aux_register(struct drm_dp_aux *dp_aux) return -EINVAL; } - aux = container_of(dp_aux, struct dp_aux_private, dp_aux); - - aux->dp_aux.name = "dpu_dp_aux"; - aux->dp_aux.dev = aux->dev; - aux->dp_aux.transfer = dp_aux_transfer; - ret = drm_dp_aux_register(&aux->dp_aux); + ret = drm_dp_aux_register(dp_aux); if (ret) { DRM_ERROR("%s: failed to register drm aux: %d\n", __func__, ret); @@ -507,6 +501,21 @@ void dp_aux_unregister(struct drm_dp_aux *dp_aux) drm_dp_aux_unregister(dp_aux); } +static int dp_wait_hpd_asserted(struct drm_dp_aux *dp_aux, + unsigned long wait_us) +{ + int ret; + struct dp_aux_private *aux; + + aux = container_of(dp_aux, struct dp_aux_private, dp_aux); + + pm_runtime_get_sync(aux->dev); + ret = dp_catalog_aux_wait_for_hpd_connect_state(aux->catalog); + pm_runtime_put_sync(aux->dev); + + return ret; +} + struct drm_dp_aux *dp_aux_get(struct device *dev, struct dp_catalog *catalog, bool is_edp) { @@ -530,6 +539,17 @@ struct drm_dp_aux *dp_aux_get(struct device *dev, struct dp_catalog *catalog, aux->catalog = catalog; aux->retry_cnt = 0; + /* + * Use the drm_dp_aux_init() to use the aux adapter + * before registering AUX with the DRM device so that + * msm eDP panel can be detected by generic_dep_panel_probe(). + */ + aux->dp_aux.name = "dpu_dp_aux"; + aux->dp_aux.dev = dev; + aux->dp_aux.transfer = dp_aux_transfer; + aux->dp_aux.wait_hpd_asserted = dp_wait_hpd_asserted; + drm_dp_aux_init(&aux->dp_aux); + return &aux->dp_aux; } diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index 50054f28e5b2..3a360c87e2dc 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -1198,6 +1198,17 @@ static const struct msm_dp_desc *dp_display_get_desc(struct platform_device *pde return NULL; } +static int dp_auxbus_done_probe(struct drm_dp_aux *aux) +{ + int rc; + + rc = component_add(aux->dev, &dp_display_comp_ops); + if (rc) + DRM_ERROR("eDP component add failed, rc=%d\n", rc); + + return rc; +} + static int dp_display_probe(struct platform_device *pdev) { int rc = 0; @@ -1263,10 +1274,18 @@ static int dp_display_probe(struct platform_device *pdev) if (rc) goto err; - rc = component_add(&pdev->dev, &dp_display_comp_ops); - if (rc) { - DRM_ERROR("component add failed, rc=%d\n", rc); - goto err; + if (dp->dp_display.is_edp) { + rc = devm_of_dp_aux_populate_bus(dp->aux, dp_auxbus_done_probe); + if (rc) { + DRM_ERROR("eDP auxbus population failed, rc=%d\n", rc); + goto err; + } + } else { + rc = component_add(&pdev->dev, &dp_display_comp_ops); + if (rc) { + DRM_ERROR("component add failed, rc=%d\n", rc); + goto err; + } } return rc; @@ -1282,7 +1301,6 @@ static void dp_display_remove(struct platform_device *pdev) component_del(&pdev->dev, &dp_display_comp_ops); dp_display_deinit_sub_modules(dp); - platform_set_drvdata(pdev, NULL); } @@ -1388,29 +1406,8 @@ static int dp_display_get_next_bridge(struct msm_dp *dp) { int rc; struct dp_display_private *dp_priv; - struct device_node *aux_bus; - struct device *dev; dp_priv = container_of(dp, struct dp_display_private, dp_display); - dev = &dp_priv->dp_display.pdev->dev; - aux_bus = of_get_child_by_name(dev->of_node, "aux-bus"); - - if (aux_bus && dp->is_edp) { - /* - * The code below assumes that the panel will finish probing - * by the time devm_of_dp_aux_populate_ep_devices() returns. - * This isn't a great assumption since it will fail if the - * panel driver is probed asynchronously but is the best we - * can do without a bigger driver reorganization. - */ - rc = of_dp_aux_populate_bus(dp_priv->aux, NULL); - of_node_put(aux_bus); - if (rc) - goto error; - } else if (dp->is_edp) { - DRM_ERROR("eDP aux_bus not found\n"); - return -ENODEV; - } /* * External bridges are mandatory for eDP interfaces: one has to @@ -1423,17 +1420,9 @@ static int dp_display_get_next_bridge(struct msm_dp *dp) if (!dp->is_edp && rc == -ENODEV) return 0; - if (!rc) { + if (!rc) dp->next_bridge = dp_priv->parser->next_bridge; - return 0; - } -error: - if (dp->is_edp) { - of_dp_aux_depopulate_bus(dp_priv->aux); - dp_display_host_phy_exit(dp_priv); - dp_display_host_deinit(dp_priv); - } return rc; } -- cgit v1.2.3 From 0b414c731432917c83353c446e60ee838c9a9cfd Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 30 Nov 2023 11:21:18 -0800 Subject: drm/msm/dpu: Correct UBWC settings for sc8280xp The UBWC settings need to match between the display and GPU. When we updated the GPU settings, we forgot to make the corresponding update on the display side. Reported-by: Steev Klimaszewski Fixes: 07e6de738aa6 ("drm/msm/a690: Fix reg values for a690") Signed-off-by: Rob Clark Tested-by: Steev Klimaszewski Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/569817/ Link: https://lore.kernel.org/r/20231130192119.32538-1-robdclark@gmail.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/msm_mdss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c index 6865db1e3ce8..29bb38f0bb2c 100644 --- a/drivers/gpu/drm/msm/msm_mdss.c +++ b/drivers/gpu/drm/msm/msm_mdss.c @@ -545,7 +545,7 @@ static const struct msm_mdss_data sc8280xp_data = { .ubwc_dec_version = UBWC_4_0, .ubwc_swizzle = 6, .ubwc_static = 1, - .highest_bank_bit = 2, + .highest_bank_bit = 3, .macrotile_mode = 1, }; -- cgit v1.2.3 From 9cad81143ef000695b32e92048d8ec0481e5ea3d Mon Sep 17 00:00:00 2001 From: Paloma Arellano Date: Thu, 30 Nov 2023 14:47:37 -0800 Subject: drm/msm/dpu: Capture dpu snapshot when frame_done_timer timeouts Trigger a devcoredump to dump dpu registers and capture the drm atomic state when the frame_done_timer timeouts. v2: Optimize the format in which frame_done_timeout_cnt is incremented v3: Describe parameter frame_done_timeout_cnt in dpu_encoder_virt Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202312010225.2OJWLKmA-lkp@intel.com/ Reviewed-by: Dmitry Baryshkov Signed-off-by: Paloma Arellano Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/569834/ Link: https://lore.kernel.org/r/20231130224740.24383-1-quic_parellan@quicinc.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index 1cf7ff6caff4..1c3560e1c625 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -151,6 +151,8 @@ enum dpu_enc_rc_states { * @crtc_frame_event_cb: callback handler for frame event * @crtc_frame_event_cb_data: callback handler private data * @frame_done_timeout_ms: frame done timeout in ms + * @frame_done_timeout_cnt: atomic counter tracking the number of frame + * done timeouts * @frame_done_timer: watchdog timer for frame done event * @disp_info: local copy of msm_display_info struct * @idle_pc_supported: indicate if idle power collaps is supported @@ -191,6 +193,7 @@ struct dpu_encoder_virt { void *crtc_frame_event_cb_data; atomic_t frame_done_timeout_ms; + atomic_t frame_done_timeout_cnt; struct timer_list frame_done_timer; struct msm_display_info disp_info; @@ -1204,6 +1207,8 @@ static void dpu_encoder_virt_atomic_enable(struct drm_encoder *drm_enc, dpu_enc->dsc = dpu_encoder_get_dsc_config(drm_enc); + atomic_set(&dpu_enc->frame_done_timeout_cnt, 0); + if (disp_info->intf_type == INTF_DP) dpu_enc->wide_bus_en = msm_dp_wide_bus_available(priv->dp[index]); else if (disp_info->intf_type == INTF_DSI) @@ -2115,11 +2120,12 @@ static int _dpu_encoder_status_show(struct seq_file *s, void *data) for (i = 0; i < dpu_enc->num_phys_encs; i++) { struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i]; - seq_printf(s, "intf:%d wb:%d vsync:%8d underrun:%8d ", + seq_printf(s, "intf:%d wb:%d vsync:%8d underrun:%8d frame_done_cnt:%d", phys->hw_intf ? phys->hw_intf->idx - INTF_0 : -1, phys->hw_wb ? phys->hw_wb->idx - WB_0 : -1, atomic_read(&phys->vsync_cnt), - atomic_read(&phys->underrun_cnt)); + atomic_read(&phys->underrun_cnt), + atomic_read(&dpu_enc->frame_done_timeout_cnt)); seq_printf(s, "mode: %s\n", dpu_encoder_helper_get_intf_type(phys->intf_mode)); } @@ -2341,6 +2347,9 @@ static void dpu_encoder_frame_done_timeout(struct timer_list *t) DPU_ERROR_ENC(dpu_enc, "frame done timeout\n"); + if (atomic_inc_return(&dpu_enc->frame_done_timeout_cnt) == 1) + msm_disp_snapshot_state(drm_enc->dev); + event = DPU_ENCODER_FRAME_EVENT_ERROR; trace_dpu_enc_frame_done_timeout(DRMID(drm_enc), event); dpu_enc->crtc_frame_event_cb(dpu_enc->crtc_frame_event_cb_data, event); @@ -2392,6 +2401,7 @@ struct drm_encoder *dpu_encoder_init(struct drm_device *dev, goto fail; atomic_set(&dpu_enc->frame_done_timeout_ms, 0); + atomic_set(&dpu_enc->frame_done_timeout_cnt, 0); timer_setup(&dpu_enc->frame_done_timer, dpu_encoder_frame_done_timeout, 0); -- cgit v1.2.3 From 7cc2621f16b644bb7af37987cb471311641a9e56 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Thu, 30 Nov 2023 16:35:01 -0800 Subject: drm/msm/dpu: Add missing safe_lut_tbl in sc8180x catalog Similar to SC8280XP, the misconfigured SAFE logic causes rather significant delays in __arm_smmu_tlb_sync(), resulting in poor performance for things such as USB. Introduce appropriate SAFE values for SC8180X to correct this. Fixes: f3af2d6ee9ab ("drm/msm/dpu: Add SC8180x to hw catalog") Signed-off-by: Bjorn Andersson Reported-by: Anton Bambura Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/569840/ Link: https://lore.kernel.org/r/20231130-sc8180x-dpu-safe-lut-v1-1-a8a6bbac36b8@quicinc.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h index e07f4c8c25b9..9ffc8804a6fc 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h @@ -367,6 +367,7 @@ static const struct dpu_perf_cfg sc8180x_perf_data = { .min_llcc_ib = 800000, .min_dram_ib = 800000, .danger_lut_tbl = {0xf, 0xffff, 0x0}, + .safe_lut_tbl = {0xfff0, 0xf000, 0xffff}, .qos_lut_tbl = { {.nentry = ARRAY_SIZE(sc7180_qos_linear), .entries = sc7180_qos_linear -- cgit v1.2.3 From 3d07a411b4faaf2b498760ccf12888f8de529de0 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Tue, 20 Jun 2023 13:43:20 +0200 Subject: drm/msm/dsi: Use pm_runtime_resume_and_get to prevent refcnt leaks This helper has been introduced to avoid programmer errors (missing _put calls leading to dangling refcnt) when using pm_runtime_get, use it. While at it, start checking the return value. Signed-off-by: Konrad Dybcio Reviewed-by: Dmitry Baryshkov Fixes: 5c8290284402 ("drm/msm/dsi: Split PHY drivers to separate files") Patchwork: https://patchwork.freedesktop.org/patch/543350/ Link: https://lore.kernel.org/r/20230620-topic-dsiphy_rpm-v2-1-a11a751f34f0@linaro.org Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/dsi/phy/dsi_phy.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c index 05621e5e7d63..b6314bb66d2f 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c @@ -516,7 +516,9 @@ static int dsi_phy_enable_resource(struct msm_dsi_phy *phy) struct device *dev = &phy->pdev->dev; int ret; - pm_runtime_get_sync(dev); + ret = pm_runtime_resume_and_get(dev); + if (ret) + return ret; ret = clk_prepare_enable(phy->ahb_clk); if (ret) { -- cgit v1.2.3 From 6ab502bc1cf3147ea1d8540d04b83a7a4cb6d1f1 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Tue, 20 Jun 2023 13:43:21 +0200 Subject: drm/msm/dsi: Enable runtime PM Some devices power the DSI PHY/PLL through a power rail that we model as a GENPD. Enable runtime PM to make it suspendable. Signed-off-by: Konrad Dybcio Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/543352/ Link: https://lore.kernel.org/r/20230620-topic-dsiphy_rpm-v2-2-a11a751f34f0@linaro.org Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/dsi/phy/dsi_phy.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c index b6314bb66d2f..e49ebd9f6326 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c @@ -691,6 +691,10 @@ static int dsi_phy_driver_probe(struct platform_device *pdev) return dev_err_probe(dev, PTR_ERR(phy->ahb_clk), "Unable to get ahb clk\n"); + ret = devm_pm_runtime_enable(&pdev->dev); + if (ret) + return ret; + /* PLL init will call into clk_register which requires * register access, so we need to enable power and ahb clock. */ -- cgit v1.2.3 From 72207699ff76d4392244c8d9850aaef0160dc6b4 Mon Sep 17 00:00:00 2001 From: Jouni Högander Date: Sun, 3 Dec 2023 13:48:37 +0200 Subject: drm/i915/display: use intel_bo_to_drm_bo in intel_fb.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We are preparing for Xe driver. I915 and Xe object implementation are differing. Do not use i915_gem_object->base directly. Instead use intel_bo_to_drm_bo. Also use drm_gem_object_put instead of i915_gem_object_put. This should be ok as i915_gem_object_put is really just doing __drm_gem_object_put. Signed-off-by: Jouni Högander Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231203114840.841311-2-jouni.hogander@intel.com --- drivers/gpu/drm/i915/display/intel_fb.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index 77226ec00cb1..b17600aba577 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -1657,10 +1657,10 @@ int intel_fill_fb_info(struct drm_i915_private *i915, struct intel_framebuffer * max_size = max(max_size, offset + size); } - if (mul_u32_u32(max_size, tile_size) > obj->base.size) { + if (mul_u32_u32(max_size, tile_size) > intel_bo_to_drm_bo(obj)->size) { drm_dbg_kms(&i915->drm, "fb too big for bo (need %llu bytes, have %zu bytes)\n", - mul_u32_u32(max_size, tile_size), obj->base.size); + mul_u32_u32(max_size, tile_size), intel_bo_to_drm_bo(obj)->size); return -EINVAL; } @@ -1889,7 +1889,7 @@ static int intel_user_framebuffer_create_handle(struct drm_framebuffer *fb, unsigned int *handle) { struct drm_i915_gem_object *obj = intel_fb_obj(fb); - struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct drm_i915_private *i915 = to_i915(intel_bo_to_drm_bo(obj)->dev); if (i915_gem_object_is_userptr(obj)) { drm_dbg(&i915->drm, @@ -1897,7 +1897,7 @@ static int intel_user_framebuffer_create_handle(struct drm_framebuffer *fb, return -EINVAL; } - return drm_gem_handle_create(file, &obj->base, handle); + return drm_gem_handle_create(file, intel_bo_to_drm_bo(obj), handle); } struct frontbuffer_fence_cb { @@ -1975,7 +1975,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, struct drm_i915_gem_object *obj, struct drm_mode_fb_cmd2 *mode_cmd) { - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); + struct drm_i915_private *dev_priv = to_i915(intel_bo_to_drm_bo(obj)->dev); struct drm_framebuffer *fb = &intel_fb->base; u32 max_stride; unsigned int tiling, stride; @@ -2153,7 +2153,7 @@ intel_user_framebuffer_create(struct drm_device *dev, } fb = intel_framebuffer_create(obj, &mode_cmd); - i915_gem_object_put(obj); + drm_gem_object_put(intel_bo_to_drm_bo(obj)); return fb; } -- cgit v1.2.3 From 6383f69bd2ccd4765b22d60f12576891daa36c1a Mon Sep 17 00:00:00 2001 From: Jouni Högander Date: Sun, 3 Dec 2023 13:48:38 +0200 Subject: drm/i915/display: Convert intel_fb_modifier_to_tiling as non-static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We are about to split i915 specific code from intel_fb.c. Convert intel_fb_modifier_to_tiling as non-static to allow calling it from split code. Signed-off-by: Jouni Högander Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231203114840.841311-3-jouni.hogander@intel.com --- drivers/gpu/drm/i915/display/intel_fb.c | 40 ++++++++++++++++----------------- drivers/gpu/drm/i915/display/intel_fb.h | 2 ++ 2 files changed, 22 insertions(+), 20 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index b17600aba577..f42a693f96c1 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -301,6 +301,26 @@ lookup_format_info(const struct drm_format_info formats[], return NULL; } +unsigned int intel_fb_modifier_to_tiling(u64 fb_modifier) +{ + u8 tiling_caps = lookup_modifier(fb_modifier)->plane_caps & + INTEL_PLANE_CAP_TILING_MASK; + + switch (tiling_caps) { + case INTEL_PLANE_CAP_TILING_Y: + return I915_TILING_Y; + case INTEL_PLANE_CAP_TILING_X: + return I915_TILING_X; + case INTEL_PLANE_CAP_TILING_4: + case INTEL_PLANE_CAP_TILING_Yf: + case INTEL_PLANE_CAP_TILING_NONE: + return I915_TILING_NONE; + default: + MISSING_CASE(tiling_caps); + return I915_TILING_NONE; + } +} + /** * intel_fb_get_format_info: Get a modifier specific format information * @cmd: FB add command structure @@ -737,26 +757,6 @@ intel_fb_align_height(const struct drm_framebuffer *fb, return ALIGN(height, tile_height); } -static unsigned int intel_fb_modifier_to_tiling(u64 fb_modifier) -{ - u8 tiling_caps = lookup_modifier(fb_modifier)->plane_caps & - INTEL_PLANE_CAP_TILING_MASK; - - switch (tiling_caps) { - case INTEL_PLANE_CAP_TILING_Y: - return I915_TILING_Y; - case INTEL_PLANE_CAP_TILING_X: - return I915_TILING_X; - case INTEL_PLANE_CAP_TILING_4: - case INTEL_PLANE_CAP_TILING_Yf: - case INTEL_PLANE_CAP_TILING_NONE: - return I915_TILING_NONE; - default: - MISSING_CASE(tiling_caps); - return I915_TILING_NONE; - } -} - bool intel_fb_modifier_uses_dpt(struct drm_i915_private *i915, u64 modifier) { return HAS_DPT(i915) && modifier != DRM_FORMAT_MOD_LINEAR; diff --git a/drivers/gpu/drm/i915/display/intel_fb.h b/drivers/gpu/drm/i915/display/intel_fb.h index e85167d6bc34..23db6628f53e 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.h +++ b/drivers/gpu/drm/i915/display/intel_fb.h @@ -95,4 +95,6 @@ intel_user_framebuffer_create(struct drm_device *dev, bool intel_fb_modifier_uses_dpt(struct drm_i915_private *i915, u64 modifier); bool intel_fb_uses_dpt(const struct drm_framebuffer *fb); +unsigned int intel_fb_modifier_to_tiling(u64 fb_modifier); + #endif /* __INTEL_FB_H__ */ -- cgit v1.2.3 From ae424921a5ca763fef4be46f900065db0b0870ae Mon Sep 17 00:00:00 2001 From: Jouni Högander Date: Sun, 3 Dec 2023 13:48:39 +0200 Subject: drm/i915/display: Handle invalid fb_modifier in intel_fb_modifier_to_tiling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lookup_modifier is returning INTEL_PLANE_CAP_TILING_4 on invalid fb_modifier value. Use lookup_modifier_or_null in intel_fb_modifier_to_tiling and return I915_TILING_NONE in case lookup_modifier_or_null returns null. Signed-off-by: Jouni Högander Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231203114840.841311-4-jouni.hogander@intel.com --- drivers/gpu/drm/i915/display/intel_fb.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index f42a693f96c1..20d58a021a42 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -303,7 +303,14 @@ lookup_format_info(const struct drm_format_info formats[], unsigned int intel_fb_modifier_to_tiling(u64 fb_modifier) { - u8 tiling_caps = lookup_modifier(fb_modifier)->plane_caps & + const struct intel_modifier_desc *md; + u8 tiling_caps; + + md = lookup_modifier_or_null(fb_modifier); + if (!md) + return I915_TILING_NONE; + + tiling_caps = lookup_modifier_or_null(fb_modifier)->plane_caps & INTEL_PLANE_CAP_TILING_MASK; switch (tiling_caps) { -- cgit v1.2.3 From 5f449ed05da8bb2a470b17962978f0347ba399d2 Mon Sep 17 00:00:00 2001 From: Jouni Högander Date: Sun, 3 Dec 2023 13:48:40 +0200 Subject: drm/i915/display: Split i915 specific code away from intel_fb.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We are preparing for Xe driver. Backing object implementation is differing between i915 and Xe. Split i915 specific code into separate source file built only for i915. v9: - Use ERR_CAST v8: - return original error code from intel_fb_bo_lookup_valid_bo on failure v7: - drop #include - s/user_mode_cmd/mode_cmd/ - Use passed i915 pointer instead of to_i915(obj->base.dev) v6: Add missing intel_fb_bo.[ch] v5: - Keep drm_any_plane_has_format check in intel_fb.c - Use mode_cmd instead of user_mode_cmd for intel_fb_bo_lookup_valid_bo v4: Move drm_any_plane_has_format check into intel_fb_bo.c v3: Fix failure handling in intel_framebuffer_init v2: Couple of fixes to error value handling Signed-off-by: Jouni Högander Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231203114840.841311-5-jouni.hogander@intel.com --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/display/intel_fb.c | 73 ++++-------------------- drivers/gpu/drm/i915/display/intel_fb_bo.c | 92 ++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/display/intel_fb_bo.h | 24 ++++++++ 4 files changed, 127 insertions(+), 63 deletions(-) create mode 100644 drivers/gpu/drm/i915/display/intel_fb_bo.c create mode 100644 drivers/gpu/drm/i915/display/intel_fb_bo.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index eadc9e612962..e777686190ca 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -280,6 +280,7 @@ i915-y += \ display/intel_dsb.o \ display/intel_dsb_buffer.o \ display/intel_fb.o \ + display/intel_fb_bo.o \ display/intel_fb_pin.o \ display/intel_fbc.o \ display/intel_fdi.o \ diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index 20d58a021a42..96663f9ac431 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -4,7 +4,6 @@ */ #include -#include #include #include @@ -15,6 +14,7 @@ #include "intel_display_types.h" #include "intel_dpt.h" #include "intel_fb.h" +#include "intel_fb_bo.h" #include "intel_frontbuffer.h" #define check_array_bounds(i915, a, i) drm_WARN_ON(&(i915)->drm, (i) >= ARRAY_SIZE(a)) @@ -1985,7 +1985,6 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, struct drm_i915_private *dev_priv = to_i915(intel_bo_to_drm_bo(obj)->dev); struct drm_framebuffer *fb = &intel_fb->base; u32 max_stride; - unsigned int tiling, stride; int ret = -EINVAL; int i; @@ -1993,32 +1992,11 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, if (!intel_fb->frontbuffer) return -ENOMEM; - i915_gem_object_lock(obj, NULL); - tiling = i915_gem_object_get_tiling(obj); - stride = i915_gem_object_get_stride(obj); - i915_gem_object_unlock(obj); - - if (mode_cmd->flags & DRM_MODE_FB_MODIFIERS) { - /* - * If there's a fence, enforce that - * the fb modifier and tiling mode match. - */ - if (tiling != I915_TILING_NONE && - tiling != intel_fb_modifier_to_tiling(mode_cmd->modifier[0])) { - drm_dbg_kms(&dev_priv->drm, - "tiling_mode doesn't match fb modifier\n"); - goto err; - } - } else { - if (tiling == I915_TILING_X) { - mode_cmd->modifier[0] = I915_FORMAT_MOD_X_TILED; - } else if (tiling == I915_TILING_Y) { - drm_dbg_kms(&dev_priv->drm, - "No Y tiling for legacy addfb\n"); - goto err; - } - } + ret = intel_fb_bo_framebuffer_init(intel_fb, obj, mode_cmd); + if (ret) + goto err; + ret = -EINVAL; if (!drm_any_plane_has_format(&dev_priv->drm, mode_cmd->pixel_format, mode_cmd->modifier[0])) { @@ -2028,17 +2006,6 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, goto err; } - /* - * gen2/3 display engine uses the fence if present, - * so the tiling mode must match the fb modifier exactly. - */ - if (DISPLAY_VER(dev_priv) < 4 && - tiling != intel_fb_modifier_to_tiling(mode_cmd->modifier[0])) { - drm_dbg_kms(&dev_priv->drm, - "tiling_mode must match fb modifier exactly on gen2/3\n"); - goto err; - } - max_stride = intel_fb_max_stride(dev_priv, mode_cmd->pixel_format, mode_cmd->modifier[0]); if (mode_cmd->pitches[0] > max_stride) { @@ -2050,17 +2017,6 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, goto err; } - /* - * If there's a fence, enforce that - * the fb pitch and fence stride match. - */ - if (tiling != I915_TILING_NONE && mode_cmd->pitches[0] != stride) { - drm_dbg_kms(&dev_priv->drm, - "pitch (%d) must match tiling stride (%d)\n", - mode_cmd->pitches[0], stride); - goto err; - } - /* FIXME need to adjust LINOFF/TILEOFF accordingly. */ if (mode_cmd->offsets[0] != 0) { drm_dbg_kms(&dev_priv->drm, @@ -2144,20 +2100,11 @@ intel_user_framebuffer_create(struct drm_device *dev, struct drm_framebuffer *fb; struct drm_i915_gem_object *obj; struct drm_mode_fb_cmd2 mode_cmd = *user_mode_cmd; - struct drm_i915_private *i915; - - obj = i915_gem_object_lookup(filp, mode_cmd.handles[0]); - if (!obj) - return ERR_PTR(-ENOENT); - - /* object is backed with LMEM for discrete */ - i915 = to_i915(obj->base.dev); - if (HAS_LMEM(i915) && !i915_gem_object_can_migrate(obj, INTEL_REGION_LMEM_0)) { - /* object is "remote", not in local memory */ - i915_gem_object_put(obj); - drm_dbg_kms(&i915->drm, "framebuffer must reside in local memory\n"); - return ERR_PTR(-EREMOTE); - } + struct drm_i915_private *i915 = to_i915(dev); + + obj = intel_fb_bo_lookup_valid_bo(i915, filp, &mode_cmd); + if (IS_ERR(obj)) + return ERR_CAST(obj); fb = intel_framebuffer_create(obj, &mode_cmd); drm_gem_object_put(intel_bo_to_drm_bo(obj)); diff --git a/drivers/gpu/drm/i915/display/intel_fb_bo.c b/drivers/gpu/drm/i915/display/intel_fb_bo.c new file mode 100644 index 000000000000..ce86eff7b226 --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_fb_bo.c @@ -0,0 +1,92 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#include + +#include "gem/i915_gem_object.h" + +#include "i915_drv.h" +#include "intel_fb.h" +#include "intel_fb_bo.h" + +int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb, + struct drm_i915_gem_object *obj, + struct drm_mode_fb_cmd2 *mode_cmd) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + unsigned int tiling, stride; + + i915_gem_object_lock(obj, NULL); + tiling = i915_gem_object_get_tiling(obj); + stride = i915_gem_object_get_stride(obj); + i915_gem_object_unlock(obj); + + if (mode_cmd->flags & DRM_MODE_FB_MODIFIERS) { + /* + * If there's a fence, enforce that + * the fb modifier and tiling mode match. + */ + if (tiling != I915_TILING_NONE && + tiling != intel_fb_modifier_to_tiling(mode_cmd->modifier[0])) { + drm_dbg_kms(&i915->drm, + "tiling_mode doesn't match fb modifier\n"); + return -EINVAL; + } + } else { + if (tiling == I915_TILING_X) { + mode_cmd->modifier[0] = I915_FORMAT_MOD_X_TILED; + } else if (tiling == I915_TILING_Y) { + drm_dbg_kms(&i915->drm, + "No Y tiling for legacy addfb\n"); + return -EINVAL; + } + } + + /* + * gen2/3 display engine uses the fence if present, + * so the tiling mode must match the fb modifier exactly. + */ + if (DISPLAY_VER(i915) < 4 && + tiling != intel_fb_modifier_to_tiling(mode_cmd->modifier[0])) { + drm_dbg_kms(&i915->drm, + "tiling_mode must match fb modifier exactly on gen2/3\n"); + return -EINVAL; + } + + /* + * If there's a fence, enforce that + * the fb pitch and fence stride match. + */ + if (tiling != I915_TILING_NONE && mode_cmd->pitches[0] != stride) { + drm_dbg_kms(&i915->drm, + "pitch (%d) must match tiling stride (%d)\n", + mode_cmd->pitches[0], stride); + return -EINVAL; + } + + return 0; +} + +struct drm_i915_gem_object * +intel_fb_bo_lookup_valid_bo(struct drm_i915_private *i915, + struct drm_file *filp, + const struct drm_mode_fb_cmd2 *mode_cmd) +{ + struct drm_i915_gem_object *obj; + + obj = i915_gem_object_lookup(filp, mode_cmd->handles[0]); + if (!obj) + return ERR_PTR(-ENOENT); + + /* object is backed with LMEM for discrete */ + if (HAS_LMEM(i915) && !i915_gem_object_can_migrate(obj, INTEL_REGION_LMEM_0)) { + /* object is "remote", not in local memory */ + i915_gem_object_put(obj); + drm_dbg_kms(&i915->drm, "framebuffer must reside in local memory\n"); + return ERR_PTR(-EREMOTE); + } + + return obj; +} diff --git a/drivers/gpu/drm/i915/display/intel_fb_bo.h b/drivers/gpu/drm/i915/display/intel_fb_bo.h new file mode 100644 index 000000000000..dd06ceec8601 --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_fb_bo.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef __INTEL_FB_BO_H__ +#define __INTEL_FB_BO_H__ + +struct drm_file; +struct drm_mode_fb_cmd2; +struct drm_i915_gem_object; +struct drm_i915_private; +struct intel_framebuffer; + +int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb, + struct drm_i915_gem_object *obj, + struct drm_mode_fb_cmd2 *mode_cmd); + +struct drm_i915_gem_object * +intel_fb_bo_lookup_valid_bo(struct drm_i915_private *i915, + struct drm_file *filp, + const struct drm_mode_fb_cmd2 *user_mode_cmd); + +#endif -- cgit v1.2.3 From 687eb09b1d76d01401dd9b22efb34931c3f1e21d Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 29 Nov 2023 19:35:06 +0200 Subject: drm/i915/syncmap: squelch a sparse warning The code is fine, really, but tweak it to get rid of the sparse warning: drivers/gpu/drm/i915/selftests/i915_syncmap.c:80:54: warning: dubious: x | !y Signed-off-by: Jani Nikula Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20231129173506.1194437-1-jani.nikula@intel.com --- drivers/gpu/drm/i915/selftests/i915_syncmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/selftests/i915_syncmap.c b/drivers/gpu/drm/i915/selftests/i915_syncmap.c index 47f4ae18a1ef..88fa845e9f4a 100644 --- a/drivers/gpu/drm/i915/selftests/i915_syncmap.c +++ b/drivers/gpu/drm/i915/selftests/i915_syncmap.c @@ -77,7 +77,7 @@ __sync_print(struct i915_syncmap *p, for_each_set_bit(i, (unsigned long *)&p->bitmap, KSYNCMAP) { buf = __sync_print(__sync_child(p)[i], buf, sz, depth + 1, - last << 1 | !!(p->bitmap >> (i + 1)), + last << 1 | ((p->bitmap >> (i + 1)) ? 1 : 0), i); } } -- cgit v1.2.3 From 1116efbff3b106ec131e833f0e78f35c923d0104 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 29 Nov 2023 16:01:28 +0200 Subject: drm/i915/display: Don't use "proxy" headers The driver uses math.h and not util_macros.h. Signed-off-by: Andy Shevchenko Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20231129140129.699767-1-andriy.shevchenko@linux.intel.com --- drivers/gpu/drm/i915/display/intel_snps_phy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_snps_phy.c b/drivers/gpu/drm/i915/display/intel_snps_phy.c index ce5a73a4cc89..bc61e736f9b3 100644 --- a/drivers/gpu/drm/i915/display/intel_snps_phy.c +++ b/drivers/gpu/drm/i915/display/intel_snps_phy.c @@ -3,7 +3,7 @@ * Copyright © 2019 Intel Corporation */ -#include +#include #include "i915_reg.h" #include "intel_ddi.h" -- cgit v1.2.3 From 4de77156a2acdec0014fa89fc1766a7410d726ff Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 1 Dec 2023 17:11:30 +0100 Subject: drm/i915/dsi: Use devm_gpiod_get() for all GPIOs soc_gpio_set_value() already uses devm_gpiod_get(), lets be consistent and use devm_gpiod_get() for all GPIOs. This allows removing the intel_dsi_vbt_gpio_cleanup() function, which only function was to put the GPIO-descriptors. Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Acked-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20231201161130.23976-1-hdegoede@redhat.com --- drivers/gpu/drm/i915/display/intel_dsi_vbt.c | 17 ++--------------- drivers/gpu/drm/i915/display/intel_dsi_vbt.h | 1 - drivers/gpu/drm/i915/display/vlv_dsi.c | 10 +--------- 3 files changed, 3 insertions(+), 25 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c index 275d0218394c..a5d7fc8418c9 100644 --- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c +++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c @@ -922,7 +922,7 @@ void intel_dsi_vbt_gpio_init(struct intel_dsi *intel_dsi, bool panel_is_on) gpiod_add_lookup_table(gpiod_lookup_table); if (want_panel_gpio) { - intel_dsi->gpio_panel = gpiod_get(dev->dev, "panel", flags); + intel_dsi->gpio_panel = devm_gpiod_get(dev->dev, "panel", flags); if (IS_ERR(intel_dsi->gpio_panel)) { drm_err(&dev_priv->drm, "Failed to own gpio for panel control\n"); @@ -932,7 +932,7 @@ void intel_dsi_vbt_gpio_init(struct intel_dsi *intel_dsi, bool panel_is_on) if (want_backlight_gpio) { intel_dsi->gpio_backlight = - gpiod_get(dev->dev, "backlight", flags); + devm_gpiod_get(dev->dev, "backlight", flags); if (IS_ERR(intel_dsi->gpio_backlight)) { drm_err(&dev_priv->drm, "Failed to own gpio for backlight control\n"); @@ -943,16 +943,3 @@ void intel_dsi_vbt_gpio_init(struct intel_dsi *intel_dsi, bool panel_is_on) if (gpiod_lookup_table) gpiod_remove_lookup_table(gpiod_lookup_table); } - -void intel_dsi_vbt_gpio_cleanup(struct intel_dsi *intel_dsi) -{ - if (intel_dsi->gpio_panel) { - gpiod_put(intel_dsi->gpio_panel); - intel_dsi->gpio_panel = NULL; - } - - if (intel_dsi->gpio_backlight) { - gpiod_put(intel_dsi->gpio_backlight); - intel_dsi->gpio_backlight = NULL; - } -} diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.h b/drivers/gpu/drm/i915/display/intel_dsi_vbt.h index 468d873fab1a..3462fcc760e6 100644 --- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.h +++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.h @@ -13,7 +13,6 @@ struct intel_dsi; bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id); void intel_dsi_vbt_gpio_init(struct intel_dsi *intel_dsi, bool panel_is_on); -void intel_dsi_vbt_gpio_cleanup(struct intel_dsi *intel_dsi); void intel_dsi_vbt_exec_sequence(struct intel_dsi *intel_dsi, enum mipi_seq seq_id); void intel_dsi_log_params(struct intel_dsi *intel_dsi); diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index 5bda97c96810..9b33b8a74d64 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -1532,16 +1532,8 @@ static void intel_dsi_unprepare(struct intel_encoder *encoder) } } -static void intel_dsi_encoder_destroy(struct drm_encoder *encoder) -{ - struct intel_dsi *intel_dsi = enc_to_intel_dsi(to_intel_encoder(encoder)); - - intel_dsi_vbt_gpio_cleanup(intel_dsi); - intel_encoder_destroy(encoder); -} - static const struct drm_encoder_funcs intel_dsi_funcs = { - .destroy = intel_dsi_encoder_destroy, + .destroy = intel_encoder_destroy, }; static enum drm_mode_status vlv_dsi_mode_valid(struct drm_connector *connector, -- cgit v1.2.3 From 801207c18834e289960c515bc41d243aa623763b Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Mon, 4 Dec 2023 09:13:14 -0800 Subject: drm/msm/dp: add a missing unlock in dp_hpd_plug_handle() When pm_runtime_resume_and_get() fails, unlock before returning. Fixes: 5814b8bf086a ("drm/msm/dp: incorporate pm_runtime framework into DP driver") Signed-off-by: Harshit Mogalapalli Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/570395/ Link: https://lore.kernel.org/r/20231204171317.192427-1-harshit.m.mogalapalli@oracle.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/dp/dp_display.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index 3a360c87e2dc..20c5cf7986b2 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -564,6 +564,7 @@ static int dp_hpd_plug_handle(struct dp_display_private *dp, u32 data) ret = pm_runtime_resume_and_get(&pdev->dev); if (ret) { DRM_ERROR("failed to pm_runtime_resume\n"); + mutex_unlock(&dp->event_mutex); return ret; } -- cgit v1.2.3 From 46b1f1b839cad600de3ad7ed999bd0155c528746 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 2 Dec 2023 01:40:25 +0200 Subject: drm/msm/dpu: populate SSPP scaler block version The function _dpu_hw_sspp_setup_scaler3() passes and dpu_hw_setup_scaler3() uses scaler_blk.version to determine in which way the scaler (QSEED3) block should be programmed. However up to now we were not setting this field. Set it now, splitting the vig_sblk data which has different version fields. Reported-by: Marijn Suijten Fixes: 9b6f4fedaac2 ("drm/msm/dpu: Add SM6125 support") Fixes: 27f0df03f3ff ("drm/msm/dpu: Add SM6375 support") Fixes: 3186acba5cdc ("drm/msm/dpu: Add SM6350 support") Fixes: efcd0107727c ("drm/msm/dpu: add support for SM8550") Fixes: 4a352c2fc15a ("drm/msm/dpu: Introduce SC8280XP") Fixes: 0e91bcbb0016 ("drm/msm/dpu: Add SM8350 to hw catalog") Fixes: 100d7ef6995d ("drm/msm/dpu: add support for SM8450") Fixes: 3581b7062cec ("drm/msm/disp/dpu1: add support for display on SM6115") Fixes: dabfdd89eaa9 ("drm/msm/disp/dpu1: add inline rotation support for sc7280") Fixes: f3af2d6ee9ab ("drm/msm/dpu: Add SC8180x to hw catalog") Fixes: 94391a14fc27 ("drm/msm/dpu1: Add MSM8998 to hw catalog") Fixes: af776a3e1c30 ("drm/msm/dpu: add SM8250 to hw catalog") Fixes: 386fced3f76f ("drm/msm/dpu: add SM8150 to hw catalog") Fixes: b75ab05a3479 ("msm:disp:dpu1: add scaler support on SC7180 display") Fixes: 25fdd5933e4c ("drm/msm: Add SDM845 DPU support") Signed-off-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/570098/ Link: https://lore.kernel.org/r/20231201234234.2065610-2-dmitry.baryshkov@linaro.org --- .../gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h | 8 +- .../drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h | 8 +- .../gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h | 8 +- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c | 95 +++++++++++++++++----- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h | 3 +- 5 files changed, 87 insertions(+), 35 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h index 9392ad2b4d3f..c022e57864a4 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h @@ -77,7 +77,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x1f0, .features = VIG_SDM845_MASK, - .sblk = &sdm845_vig_sblk_0, + .sblk = &sm8150_vig_sblk_0, .xin_id = 0, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG0, @@ -85,7 +85,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = { .name = "sspp_1", .id = SSPP_VIG1, .base = 0x6000, .len = 0x1f0, .features = VIG_SDM845_MASK, - .sblk = &sdm845_vig_sblk_1, + .sblk = &sm8150_vig_sblk_1, .xin_id = 4, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG1, @@ -93,7 +93,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = { .name = "sspp_2", .id = SSPP_VIG2, .base = 0x8000, .len = 0x1f0, .features = VIG_SDM845_MASK, - .sblk = &sdm845_vig_sblk_2, + .sblk = &sm8150_vig_sblk_2, .xin_id = 8, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG2, @@ -101,7 +101,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = { .name = "sspp_3", .id = SSPP_VIG3, .base = 0xa000, .len = 0x1f0, .features = VIG_SDM845_MASK, - .sblk = &sdm845_vig_sblk_3, + .sblk = &sm8150_vig_sblk_3, .xin_id = 12, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG3, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h index 9ffc8804a6fc..cb0758f0829d 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h @@ -76,7 +76,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x1f0, .features = VIG_SDM845_MASK, - .sblk = &sdm845_vig_sblk_0, + .sblk = &sm8150_vig_sblk_0, .xin_id = 0, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG0, @@ -84,7 +84,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = { .name = "sspp_1", .id = SSPP_VIG1, .base = 0x6000, .len = 0x1f0, .features = VIG_SDM845_MASK, - .sblk = &sdm845_vig_sblk_1, + .sblk = &sm8150_vig_sblk_1, .xin_id = 4, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG1, @@ -92,7 +92,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = { .name = "sspp_2", .id = SSPP_VIG2, .base = 0x8000, .len = 0x1f0, .features = VIG_SDM845_MASK, - .sblk = &sdm845_vig_sblk_2, + .sblk = &sm8150_vig_sblk_2, .xin_id = 8, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG2, @@ -100,7 +100,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = { .name = "sspp_3", .id = SSPP_VIG3, .base = 0xa000, .len = 0x1f0, .features = VIG_SDM845_MASK, - .sblk = &sdm845_vig_sblk_3, + .sblk = &sm8150_vig_sblk_3, .xin_id = 12, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG3, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h index d18145c226da..72b0f547242f 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h @@ -76,7 +76,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x32c, .features = VIG_SC7180_MASK_SDMA, - .sblk = &sm8250_vig_sblk_0, + .sblk = &sm8450_vig_sblk_0, .xin_id = 0, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG0, @@ -84,7 +84,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = { .name = "sspp_1", .id = SSPP_VIG1, .base = 0x6000, .len = 0x32c, .features = VIG_SC7180_MASK_SDMA, - .sblk = &sm8250_vig_sblk_1, + .sblk = &sm8450_vig_sblk_1, .xin_id = 4, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG1, @@ -92,7 +92,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = { .name = "sspp_2", .id = SSPP_VIG2, .base = 0x8000, .len = 0x32c, .features = VIG_SC7180_MASK_SDMA, - .sblk = &sm8250_vig_sblk_2, + .sblk = &sm8450_vig_sblk_2, .xin_id = 8, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG2, @@ -100,7 +100,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = { .name = "sspp_3", .id = SSPP_VIG3, .base = 0xa000, .len = 0x32c, .features = VIG_SC7180_MASK_SDMA, - .sblk = &sm8250_vig_sblk_3, + .sblk = &sm8450_vig_sblk_3, .xin_id = 12, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG3, diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c index a1aada630780..7056c08b9957 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c @@ -249,14 +249,17 @@ static const uint32_t wb2_formats[] = { * SSPP sub blocks config *************************************************************/ +#define SSPP_SCALER_VER(maj, min) (((maj) << 16) | (min)) + /* SSPP common configuration */ -#define _VIG_SBLK(sdma_pri, qseed_ver) \ +#define _VIG_SBLK(sdma_pri, qseed_ver, scaler_ver) \ { \ .maxdwnscale = MAX_DOWNSCALE_RATIO, \ .maxupscale = MAX_UPSCALE_RATIO, \ .smart_dma_priority = sdma_pri, \ .scaler_blk = {.name = "scaler", \ .id = qseed_ver, \ + .version = scaler_ver, \ .base = 0xa00, .len = 0xa0,}, \ .csc_blk = {.name = "csc", \ .id = DPU_SSPP_CSC_10BIT, \ @@ -268,13 +271,14 @@ static const uint32_t wb2_formats[] = { .rotation_cfg = NULL, \ } -#define _VIG_SBLK_ROT(sdma_pri, qseed_ver, rot_cfg) \ +#define _VIG_SBLK_ROT(sdma_pri, qseed_ver, scaler_ver, rot_cfg) \ { \ .maxdwnscale = MAX_DOWNSCALE_RATIO, \ .maxupscale = MAX_UPSCALE_RATIO, \ .smart_dma_priority = sdma_pri, \ .scaler_blk = {.name = "scaler", \ .id = qseed_ver, \ + .version = scaler_ver, \ .base = 0xa00, .len = 0xa0,}, \ .csc_blk = {.name = "csc", \ .id = DPU_SSPP_CSC_10BIT, \ @@ -298,13 +302,17 @@ static const uint32_t wb2_formats[] = { } static const struct dpu_sspp_sub_blks msm8998_vig_sblk_0 = - _VIG_SBLK(0, DPU_SSPP_SCALER_QSEED3); + _VIG_SBLK(0, DPU_SSPP_SCALER_QSEED3, + SSPP_SCALER_VER(1, 2)); static const struct dpu_sspp_sub_blks msm8998_vig_sblk_1 = - _VIG_SBLK(0, DPU_SSPP_SCALER_QSEED3); + _VIG_SBLK(0, DPU_SSPP_SCALER_QSEED3, + SSPP_SCALER_VER(1, 2)); static const struct dpu_sspp_sub_blks msm8998_vig_sblk_2 = - _VIG_SBLK(0, DPU_SSPP_SCALER_QSEED3); + _VIG_SBLK(0, DPU_SSPP_SCALER_QSEED3, + SSPP_SCALER_VER(1, 2)); static const struct dpu_sspp_sub_blks msm8998_vig_sblk_3 = - _VIG_SBLK(0, DPU_SSPP_SCALER_QSEED3); + _VIG_SBLK(0, DPU_SSPP_SCALER_QSEED3, + SSPP_SCALER_VER(1, 2)); static const struct dpu_rotation_cfg dpu_rot_sc7280_cfg_v2 = { .rot_maxheight = 1088, @@ -313,13 +321,30 @@ static const struct dpu_rotation_cfg dpu_rot_sc7280_cfg_v2 = { }; static const struct dpu_sspp_sub_blks sdm845_vig_sblk_0 = - _VIG_SBLK(5, DPU_SSPP_SCALER_QSEED3); + _VIG_SBLK(5, DPU_SSPP_SCALER_QSEED3, + SSPP_SCALER_VER(1, 3)); static const struct dpu_sspp_sub_blks sdm845_vig_sblk_1 = - _VIG_SBLK(6, DPU_SSPP_SCALER_QSEED3); + _VIG_SBLK(6, DPU_SSPP_SCALER_QSEED3, + SSPP_SCALER_VER(1, 3)); static const struct dpu_sspp_sub_blks sdm845_vig_sblk_2 = - _VIG_SBLK(7, DPU_SSPP_SCALER_QSEED3); + _VIG_SBLK(7, DPU_SSPP_SCALER_QSEED3, + SSPP_SCALER_VER(1, 3)); static const struct dpu_sspp_sub_blks sdm845_vig_sblk_3 = - _VIG_SBLK(8, DPU_SSPP_SCALER_QSEED3); + _VIG_SBLK(8, DPU_SSPP_SCALER_QSEED3, + SSPP_SCALER_VER(1, 3)); + +static const struct dpu_sspp_sub_blks sm8150_vig_sblk_0 = + _VIG_SBLK(5, DPU_SSPP_SCALER_QSEED3, + SSPP_SCALER_VER(1, 4)); +static const struct dpu_sspp_sub_blks sm8150_vig_sblk_1 = + _VIG_SBLK(6, DPU_SSPP_SCALER_QSEED3, + SSPP_SCALER_VER(1, 4)); +static const struct dpu_sspp_sub_blks sm8150_vig_sblk_2 = + _VIG_SBLK(7, DPU_SSPP_SCALER_QSEED3, + SSPP_SCALER_VER(1, 4)); +static const struct dpu_sspp_sub_blks sm8150_vig_sblk_3 = + _VIG_SBLK(8, DPU_SSPP_SCALER_QSEED3, + SSPP_SCALER_VER(1, 4)); static const struct dpu_sspp_sub_blks sdm845_dma_sblk_0 = _DMA_SBLK(1); static const struct dpu_sspp_sub_blks sdm845_dma_sblk_1 = _DMA_SBLK(2); @@ -327,34 +352,60 @@ static const struct dpu_sspp_sub_blks sdm845_dma_sblk_2 = _DMA_SBLK(3); static const struct dpu_sspp_sub_blks sdm845_dma_sblk_3 = _DMA_SBLK(4); static const struct dpu_sspp_sub_blks sc7180_vig_sblk_0 = - _VIG_SBLK(4, DPU_SSPP_SCALER_QSEED4); + _VIG_SBLK(4, DPU_SSPP_SCALER_QSEED4, + SSPP_SCALER_VER(3, 0)); static const struct dpu_sspp_sub_blks sc7280_vig_sblk_0 = - _VIG_SBLK_ROT(4, DPU_SSPP_SCALER_QSEED4, &dpu_rot_sc7280_cfg_v2); + _VIG_SBLK_ROT(4, DPU_SSPP_SCALER_QSEED4, + SSPP_SCALER_VER(3, 0), + &dpu_rot_sc7280_cfg_v2); static const struct dpu_sspp_sub_blks sm6115_vig_sblk_0 = - _VIG_SBLK(2, DPU_SSPP_SCALER_QSEED4); + _VIG_SBLK(2, DPU_SSPP_SCALER_QSEED4, + SSPP_SCALER_VER(3, 0)); static const struct dpu_sspp_sub_blks sm6125_vig_sblk_0 = - _VIG_SBLK(3, DPU_SSPP_SCALER_QSEED3LITE); + _VIG_SBLK(3, DPU_SSPP_SCALER_QSEED3LITE, + SSPP_SCALER_VER(2, 4)); static const struct dpu_sspp_sub_blks sm8250_vig_sblk_0 = - _VIG_SBLK(5, DPU_SSPP_SCALER_QSEED4); + _VIG_SBLK(5, DPU_SSPP_SCALER_QSEED4, + SSPP_SCALER_VER(3, 0)); static const struct dpu_sspp_sub_blks sm8250_vig_sblk_1 = - _VIG_SBLK(6, DPU_SSPP_SCALER_QSEED4); + _VIG_SBLK(6, DPU_SSPP_SCALER_QSEED4, + SSPP_SCALER_VER(3, 0)); static const struct dpu_sspp_sub_blks sm8250_vig_sblk_2 = - _VIG_SBLK(7, DPU_SSPP_SCALER_QSEED4); + _VIG_SBLK(7, DPU_SSPP_SCALER_QSEED4, + SSPP_SCALER_VER(3, 0)); static const struct dpu_sspp_sub_blks sm8250_vig_sblk_3 = - _VIG_SBLK(8, DPU_SSPP_SCALER_QSEED4); + _VIG_SBLK(8, DPU_SSPP_SCALER_QSEED4, + SSPP_SCALER_VER(3, 0)); + +static const struct dpu_sspp_sub_blks sm8450_vig_sblk_0 = + _VIG_SBLK(5, DPU_SSPP_SCALER_QSEED4, + SSPP_SCALER_VER(3, 1)); +static const struct dpu_sspp_sub_blks sm8450_vig_sblk_1 = + _VIG_SBLK(6, DPU_SSPP_SCALER_QSEED4, + SSPP_SCALER_VER(3, 1)); +static const struct dpu_sspp_sub_blks sm8450_vig_sblk_2 = + _VIG_SBLK(7, DPU_SSPP_SCALER_QSEED4, + SSPP_SCALER_VER(3, 1)); +static const struct dpu_sspp_sub_blks sm8450_vig_sblk_3 = + _VIG_SBLK(8, DPU_SSPP_SCALER_QSEED4, + SSPP_SCALER_VER(3, 1)); static const struct dpu_sspp_sub_blks sm8550_vig_sblk_0 = - _VIG_SBLK(7, DPU_SSPP_SCALER_QSEED4); + _VIG_SBLK(7, DPU_SSPP_SCALER_QSEED4, + SSPP_SCALER_VER(3, 2)); static const struct dpu_sspp_sub_blks sm8550_vig_sblk_1 = - _VIG_SBLK(8, DPU_SSPP_SCALER_QSEED4); + _VIG_SBLK(8, DPU_SSPP_SCALER_QSEED4, + SSPP_SCALER_VER(3, 2)); static const struct dpu_sspp_sub_blks sm8550_vig_sblk_2 = - _VIG_SBLK(9, DPU_SSPP_SCALER_QSEED4); + _VIG_SBLK(9, DPU_SSPP_SCALER_QSEED4, + SSPP_SCALER_VER(3, 2)); static const struct dpu_sspp_sub_blks sm8550_vig_sblk_3 = - _VIG_SBLK(10, DPU_SSPP_SCALER_QSEED4); + _VIG_SBLK(10, DPU_SSPP_SCALER_QSEED4, + SSPP_SCALER_VER(3, 2)); static const struct dpu_sspp_sub_blks sm8550_dma_sblk_4 = _DMA_SBLK(5); static const struct dpu_sspp_sub_blks sm8550_dma_sblk_5 = _DMA_SBLK(6); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h index df024e10d3a3..62445075306a 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h @@ -265,7 +265,8 @@ enum { /** * struct dpu_scaler_blk: Scaler information * @info: HW register and features supported by this sub-blk - * @version: qseed block revision + * @version: qseed block revision, on QSEED3+ platforms this is the value of + * scaler_blk.base + QSEED3_HW_VERSION registers. */ struct dpu_scaler_blk { DPU_HW_SUBBLK_INFO; -- cgit v1.2.3 From 07b852c91cbe2c9985d218ab7e2dd1ba51beb9e6 Mon Sep 17 00:00:00 2001 From: Marijn Suijten Date: Sat, 2 Dec 2023 01:40:26 +0200 Subject: drm/msm/dpu: Drop unused get_scaler_ver callback from SSPP This pointer callback is never used and should be removed. Signed-off-by: Marijn Suijten Reviewed-by: Dmitry Baryshkov [DB: dropped the helpers completely, which are unused now] Reviewed-by: Abhinav Kumar Signed-off-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/570096/ Link: https://lore.kernel.org/r/20231201234234.2065610-3-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c | 13 +------------ drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h | 6 ------ drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c | 6 ------ drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h | 3 --- 4 files changed, 1 insertion(+), 27 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c index 8e3c65989c49..b408d456c123 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c @@ -395,15 +395,6 @@ static void _dpu_hw_sspp_setup_scaler3(struct dpu_hw_sspp *ctx, format); } -static u32 _dpu_hw_sspp_get_scaler3_ver(struct dpu_hw_sspp *ctx) -{ - if (!ctx) - return 0; - - return dpu_hw_get_scaler3_ver(&ctx->hw, - ctx->cap->sblk->scaler_blk.base); -} - /* * dpu_hw_sspp_setup_rects() */ @@ -616,10 +607,8 @@ static void _setup_layer_ops(struct dpu_hw_sspp *c, if (test_bit(DPU_SSPP_SCALER_QSEED3, &features) || test_bit(DPU_SSPP_SCALER_QSEED3LITE, &features) || - test_bit(DPU_SSPP_SCALER_QSEED4, &features)) { + test_bit(DPU_SSPP_SCALER_QSEED4, &features)) c->ops.setup_scaler = _dpu_hw_sspp_setup_scaler3; - c->ops.get_scaler_ver = _dpu_hw_sspp_get_scaler3_ver; - } if (test_bit(DPU_SSPP_CDP, &features)) c->ops.setup_cdp = dpu_hw_sspp_setup_cdp; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h index f93969fddb22..b094ea23ad32 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h @@ -296,12 +296,6 @@ struct dpu_hw_sspp_ops { struct dpu_hw_scaler3_cfg *scaler3_cfg, const struct dpu_format *format); - /** - * get_scaler_ver - get scaler h/w version - * @ctx: Pointer to pipe context - */ - u32 (*get_scaler_ver)(struct dpu_hw_sspp *ctx); - /** * setup_cdp - setup client driven prefetch * @pipe: Pointer to software pipe context diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c index 18b16b2d2bf5..0b05061e3e62 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c @@ -381,12 +381,6 @@ end: DPU_REG_WRITE(c, QSEED3_OP_MODE + scaler_offset, op_mode); } -u32 dpu_hw_get_scaler3_ver(struct dpu_hw_blk_reg_map *c, - u32 scaler_offset) -{ - return DPU_REG_READ(c, QSEED3_HW_VERSION + scaler_offset); -} - void dpu_hw_csc_setup(struct dpu_hw_blk_reg_map *c, u32 csc_reg_off, const struct dpu_csc_cfg *data, bool csc10) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h index 4bea139081bc..fe083b2e5696 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h @@ -340,9 +340,6 @@ void dpu_hw_setup_scaler3(struct dpu_hw_blk_reg_map *c, u32 scaler_offset, u32 scaler_version, const struct dpu_format *format); -u32 dpu_hw_get_scaler3_ver(struct dpu_hw_blk_reg_map *c, - u32 scaler_offset); - void dpu_hw_csc_setup(struct dpu_hw_blk_reg_map *c, u32 csc_reg_off, const struct dpu_csc_cfg *data, bool csc10); -- cgit v1.2.3 From 88fc981f8ef232848fb60f77660a27826ea518ef Mon Sep 17 00:00:00 2001 From: Marijn Suijten Date: Sat, 2 Dec 2023 01:40:27 +0200 Subject: drm/msm/dpu: Drop unused qseed_type from catalog dpu_caps The SSPP scaler subblk is responsible for reporting its version (via the .id field, feature bits on the parent SSPP block, and since recently also from reading a register to supersede a read-but-unset version field in the catalog), leaving this global qseed_type field logically unused. Remove this dead code to lighten the catalog and bringup-overhead. Signed-off-by: Marijn Suijten Reviewed-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Signed-off-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/570109/ Link: https://lore.kernel.org/r/20231201234234.2065610-4-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_0_msm8998.h | 1 - drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_0_sdm845.h | 1 - drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h | 1 - drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h | 1 - drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h | 1 - drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h | 1 - drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_3_sm6115.h | 1 - drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_4_sm6350.h | 1 - drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_9_sm6375.h | 1 - drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h | 1 - drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h | 1 - drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h | 1 - drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h | 1 - drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h | 1 - drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h | 2 -- 15 files changed, 16 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_0_msm8998.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_0_msm8998.h index aa1867943c9f..4dcc9f804ac1 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_0_msm8998.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_0_msm8998.h @@ -10,7 +10,6 @@ static const struct dpu_caps msm8998_dpu_caps = { .max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH, .max_mixer_blendstages = 0x7, - .qseed_type = DPU_SSPP_SCALER_QSEED3, .has_src_split = true, .has_dim_layer = true, .has_idle_pc = true, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_0_sdm845.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_0_sdm845.h index 38ac0c1a134b..03159d359500 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_0_sdm845.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_0_sdm845.h @@ -10,7 +10,6 @@ static const struct dpu_caps sdm845_dpu_caps = { .max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH, .max_mixer_blendstages = 0xb, - .qseed_type = DPU_SSPP_SCALER_QSEED3, .has_src_split = true, .has_dim_layer = true, .has_idle_pc = true, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h index c022e57864a4..4184c18d81f3 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h @@ -10,7 +10,6 @@ static const struct dpu_caps sm8150_dpu_caps = { .max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH, .max_mixer_blendstages = 0xb, - .qseed_type = DPU_SSPP_SCALER_QSEED3, .has_src_split = true, .has_dim_layer = true, .has_idle_pc = true, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h index cb0758f0829d..83fb7312bc97 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h @@ -10,7 +10,6 @@ static const struct dpu_caps sc8180x_dpu_caps = { .max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH, .max_mixer_blendstages = 0xb, - .qseed_type = DPU_SSPP_SCALER_QSEED3, .has_src_split = true, .has_dim_layer = true, .has_idle_pc = true, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h index 94278a3e3483..885a2bec8258 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h @@ -10,7 +10,6 @@ static const struct dpu_caps sm8250_dpu_caps = { .max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH, .max_mixer_blendstages = 0xb, - .qseed_type = DPU_SSPP_SCALER_QSEED4, .has_src_split = true, .has_dim_layer = true, .has_idle_pc = true, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h index c0d88ddccb28..c9d1f1292b3d 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h @@ -10,7 +10,6 @@ static const struct dpu_caps sc7180_dpu_caps = { .max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH, .max_mixer_blendstages = 0x9, - .qseed_type = DPU_SSPP_SCALER_QSEED4, .has_dim_layer = true, .has_idle_pc = true, .max_linewidth = DEFAULT_DPU_OUTPUT_LINE_WIDTH, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_3_sm6115.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_3_sm6115.h index 57ce14c18def..320b8f23364f 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_3_sm6115.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_3_sm6115.h @@ -10,7 +10,6 @@ static const struct dpu_caps sm6115_dpu_caps = { .max_mixer_width = DEFAULT_DPU_LINE_WIDTH, .max_mixer_blendstages = 0x4, - .qseed_type = DPU_SSPP_SCALER_QSEED4, .has_dim_layer = true, .has_idle_pc = true, .max_linewidth = 2160, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_4_sm6350.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_4_sm6350.h index 62db84bd15f2..6c6bc754aeb7 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_4_sm6350.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_4_sm6350.h @@ -11,7 +11,6 @@ static const struct dpu_caps sm6350_dpu_caps = { .max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH, .max_mixer_blendstages = 0x7, - .qseed_type = DPU_SSPP_SCALER_QSEED4, .has_src_split = true, .has_dim_layer = true, .has_idle_pc = true, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_9_sm6375.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_9_sm6375.h index 5a3aad364c78..77703f663e25 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_9_sm6375.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_9_sm6375.h @@ -11,7 +11,6 @@ static const struct dpu_caps sm6375_dpu_caps = { .max_mixer_width = DEFAULT_DPU_LINE_WIDTH, .max_mixer_blendstages = 0x4, - .qseed_type = DPU_SSPP_SCALER_QSEED4, .has_dim_layer = true, .has_idle_pc = true, .max_linewidth = 2160, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h index 5aaa24281906..03181ba891d5 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h @@ -10,7 +10,6 @@ static const struct dpu_caps sm8350_dpu_caps = { .max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH, .max_mixer_blendstages = 0xb, - .qseed_type = DPU_SSPP_SCALER_QSEED4, .has_src_split = true, .has_dim_layer = true, .has_idle_pc = true, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h index 15942fa5a8e0..42bb47181ba1 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h @@ -10,7 +10,6 @@ static const struct dpu_caps sc7280_dpu_caps = { .max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH, .max_mixer_blendstages = 0x7, - .qseed_type = DPU_SSPP_SCALER_QSEED4, .has_dim_layer = true, .has_idle_pc = true, .max_linewidth = 2400, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h index 1ccd1edd693c..6da122df0b88 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h @@ -10,7 +10,6 @@ static const struct dpu_caps sc8280xp_dpu_caps = { .max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH, .max_mixer_blendstages = 11, - .qseed_type = DPU_SSPP_SCALER_QSEED4, .has_src_split = true, .has_dim_layer = true, .has_idle_pc = true, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h index 72b0f547242f..330d48b1ea02 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h @@ -10,7 +10,6 @@ static const struct dpu_caps sm8450_dpu_caps = { .max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH, .max_mixer_blendstages = 0xb, - .qseed_type = DPU_SSPP_SCALER_QSEED4, .has_src_split = true, .has_dim_layer = true, .has_idle_pc = true, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h index 69b80af6566a..2b5b342ea0b2 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h @@ -10,7 +10,6 @@ static const struct dpu_caps sm8550_dpu_caps = { .max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH, .max_mixer_blendstages = 0xb, - .qseed_type = DPU_SSPP_SCALER_QSEED4, .has_src_split = true, .has_dim_layer = true, .has_idle_pc = true, diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h index 62445075306a..1bf2d2d64f2a 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h @@ -342,7 +342,6 @@ struct dpu_rotation_cfg { * @max_mixer_width max layer mixer line width support. * @max_mixer_blendstages max layer mixer blend stages or * supported z order - * @qseed_type qseed2 or qseed3 support. * @has_src_split source split feature status * @has_dim_layer dim layer feature status * @has_idle_pc indicate if idle power collapse feature is supported @@ -355,7 +354,6 @@ struct dpu_rotation_cfg { struct dpu_caps { u32 max_mixer_width; u32 max_mixer_blendstages; - u32 qseed_type; bool has_src_split; bool has_dim_layer; bool has_idle_pc; -- cgit v1.2.3 From 6876059d7edfcce3d8946f5a49d65ba9b38b1bab Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 2 Dec 2023 01:40:28 +0200 Subject: drm/msm/dpu: drop the `id' field from DPU_HW_SUBBLK_INFO The field `id' is not used for subblocks. The handling code usually knows, which sub-block it is now looking at. Drop the field completely. Reviewed-by: Abhinav Kumar Signed-off-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/570100/ Link: https://lore.kernel.org/r/20231201234234.2065610-5-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c | 76 ++++++++++++-------------- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h | 2 - 2 files changed, 36 insertions(+), 42 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c index 7056c08b9957..eb9dfdcf610e 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c @@ -252,17 +252,15 @@ static const uint32_t wb2_formats[] = { #define SSPP_SCALER_VER(maj, min) (((maj) << 16) | (min)) /* SSPP common configuration */ -#define _VIG_SBLK(sdma_pri, qseed_ver, scaler_ver) \ +#define _VIG_SBLK(sdma_pri, scaler_ver) \ { \ .maxdwnscale = MAX_DOWNSCALE_RATIO, \ .maxupscale = MAX_UPSCALE_RATIO, \ .smart_dma_priority = sdma_pri, \ .scaler_blk = {.name = "scaler", \ - .id = qseed_ver, \ .version = scaler_ver, \ .base = 0xa00, .len = 0xa0,}, \ .csc_blk = {.name = "csc", \ - .id = DPU_SSPP_CSC_10BIT, \ .base = 0x1a00, .len = 0x100,}, \ .format_list = plane_formats_yuv, \ .num_formats = ARRAY_SIZE(plane_formats_yuv), \ @@ -271,17 +269,15 @@ static const uint32_t wb2_formats[] = { .rotation_cfg = NULL, \ } -#define _VIG_SBLK_ROT(sdma_pri, qseed_ver, scaler_ver, rot_cfg) \ +#define _VIG_SBLK_ROT(sdma_pri, scaler_ver, rot_cfg) \ { \ .maxdwnscale = MAX_DOWNSCALE_RATIO, \ .maxupscale = MAX_UPSCALE_RATIO, \ .smart_dma_priority = sdma_pri, \ .scaler_blk = {.name = "scaler", \ - .id = qseed_ver, \ .version = scaler_ver, \ .base = 0xa00, .len = 0xa0,}, \ .csc_blk = {.name = "csc", \ - .id = DPU_SSPP_CSC_10BIT, \ .base = 0x1a00, .len = 0x100,}, \ .format_list = plane_formats_yuv, \ .num_formats = ARRAY_SIZE(plane_formats_yuv), \ @@ -302,16 +298,16 @@ static const uint32_t wb2_formats[] = { } static const struct dpu_sspp_sub_blks msm8998_vig_sblk_0 = - _VIG_SBLK(0, DPU_SSPP_SCALER_QSEED3, + _VIG_SBLK(0, SSPP_SCALER_VER(1, 2)); static const struct dpu_sspp_sub_blks msm8998_vig_sblk_1 = - _VIG_SBLK(0, DPU_SSPP_SCALER_QSEED3, + _VIG_SBLK(0, SSPP_SCALER_VER(1, 2)); static const struct dpu_sspp_sub_blks msm8998_vig_sblk_2 = - _VIG_SBLK(0, DPU_SSPP_SCALER_QSEED3, + _VIG_SBLK(0, SSPP_SCALER_VER(1, 2)); static const struct dpu_sspp_sub_blks msm8998_vig_sblk_3 = - _VIG_SBLK(0, DPU_SSPP_SCALER_QSEED3, + _VIG_SBLK(0, SSPP_SCALER_VER(1, 2)); static const struct dpu_rotation_cfg dpu_rot_sc7280_cfg_v2 = { @@ -321,29 +317,29 @@ static const struct dpu_rotation_cfg dpu_rot_sc7280_cfg_v2 = { }; static const struct dpu_sspp_sub_blks sdm845_vig_sblk_0 = - _VIG_SBLK(5, DPU_SSPP_SCALER_QSEED3, + _VIG_SBLK(5, SSPP_SCALER_VER(1, 3)); static const struct dpu_sspp_sub_blks sdm845_vig_sblk_1 = - _VIG_SBLK(6, DPU_SSPP_SCALER_QSEED3, + _VIG_SBLK(6, SSPP_SCALER_VER(1, 3)); static const struct dpu_sspp_sub_blks sdm845_vig_sblk_2 = - _VIG_SBLK(7, DPU_SSPP_SCALER_QSEED3, + _VIG_SBLK(7, SSPP_SCALER_VER(1, 3)); static const struct dpu_sspp_sub_blks sdm845_vig_sblk_3 = - _VIG_SBLK(8, DPU_SSPP_SCALER_QSEED3, + _VIG_SBLK(8, SSPP_SCALER_VER(1, 3)); static const struct dpu_sspp_sub_blks sm8150_vig_sblk_0 = - _VIG_SBLK(5, DPU_SSPP_SCALER_QSEED3, + _VIG_SBLK(5, SSPP_SCALER_VER(1, 4)); static const struct dpu_sspp_sub_blks sm8150_vig_sblk_1 = - _VIG_SBLK(6, DPU_SSPP_SCALER_QSEED3, + _VIG_SBLK(6, SSPP_SCALER_VER(1, 4)); static const struct dpu_sspp_sub_blks sm8150_vig_sblk_2 = - _VIG_SBLK(7, DPU_SSPP_SCALER_QSEED3, + _VIG_SBLK(7, SSPP_SCALER_VER(1, 4)); static const struct dpu_sspp_sub_blks sm8150_vig_sblk_3 = - _VIG_SBLK(8, DPU_SSPP_SCALER_QSEED3, + _VIG_SBLK(8, SSPP_SCALER_VER(1, 4)); static const struct dpu_sspp_sub_blks sdm845_dma_sblk_0 = _DMA_SBLK(1); @@ -352,59 +348,59 @@ static const struct dpu_sspp_sub_blks sdm845_dma_sblk_2 = _DMA_SBLK(3); static const struct dpu_sspp_sub_blks sdm845_dma_sblk_3 = _DMA_SBLK(4); static const struct dpu_sspp_sub_blks sc7180_vig_sblk_0 = - _VIG_SBLK(4, DPU_SSPP_SCALER_QSEED4, + _VIG_SBLK(4, SSPP_SCALER_VER(3, 0)); static const struct dpu_sspp_sub_blks sc7280_vig_sblk_0 = - _VIG_SBLK_ROT(4, DPU_SSPP_SCALER_QSEED4, + _VIG_SBLK_ROT(4, SSPP_SCALER_VER(3, 0), &dpu_rot_sc7280_cfg_v2); static const struct dpu_sspp_sub_blks sm6115_vig_sblk_0 = - _VIG_SBLK(2, DPU_SSPP_SCALER_QSEED4, + _VIG_SBLK(2, SSPP_SCALER_VER(3, 0)); static const struct dpu_sspp_sub_blks sm6125_vig_sblk_0 = - _VIG_SBLK(3, DPU_SSPP_SCALER_QSEED3LITE, + _VIG_SBLK(3, SSPP_SCALER_VER(2, 4)); static const struct dpu_sspp_sub_blks sm8250_vig_sblk_0 = - _VIG_SBLK(5, DPU_SSPP_SCALER_QSEED4, + _VIG_SBLK(5, SSPP_SCALER_VER(3, 0)); static const struct dpu_sspp_sub_blks sm8250_vig_sblk_1 = - _VIG_SBLK(6, DPU_SSPP_SCALER_QSEED4, + _VIG_SBLK(6, SSPP_SCALER_VER(3, 0)); static const struct dpu_sspp_sub_blks sm8250_vig_sblk_2 = - _VIG_SBLK(7, DPU_SSPP_SCALER_QSEED4, + _VIG_SBLK(7, SSPP_SCALER_VER(3, 0)); static const struct dpu_sspp_sub_blks sm8250_vig_sblk_3 = - _VIG_SBLK(8, DPU_SSPP_SCALER_QSEED4, + _VIG_SBLK(8, SSPP_SCALER_VER(3, 0)); static const struct dpu_sspp_sub_blks sm8450_vig_sblk_0 = - _VIG_SBLK(5, DPU_SSPP_SCALER_QSEED4, + _VIG_SBLK(5, SSPP_SCALER_VER(3, 1)); static const struct dpu_sspp_sub_blks sm8450_vig_sblk_1 = - _VIG_SBLK(6, DPU_SSPP_SCALER_QSEED4, + _VIG_SBLK(6, SSPP_SCALER_VER(3, 1)); static const struct dpu_sspp_sub_blks sm8450_vig_sblk_2 = - _VIG_SBLK(7, DPU_SSPP_SCALER_QSEED4, + _VIG_SBLK(7, SSPP_SCALER_VER(3, 1)); static const struct dpu_sspp_sub_blks sm8450_vig_sblk_3 = - _VIG_SBLK(8, DPU_SSPP_SCALER_QSEED4, + _VIG_SBLK(8, SSPP_SCALER_VER(3, 1)); static const struct dpu_sspp_sub_blks sm8550_vig_sblk_0 = - _VIG_SBLK(7, DPU_SSPP_SCALER_QSEED4, + _VIG_SBLK(7, SSPP_SCALER_VER(3, 2)); static const struct dpu_sspp_sub_blks sm8550_vig_sblk_1 = - _VIG_SBLK(8, DPU_SSPP_SCALER_QSEED4, + _VIG_SBLK(8, SSPP_SCALER_VER(3, 2)); static const struct dpu_sspp_sub_blks sm8550_vig_sblk_2 = - _VIG_SBLK(9, DPU_SSPP_SCALER_QSEED4, + _VIG_SBLK(9, SSPP_SCALER_VER(3, 2)); static const struct dpu_sspp_sub_blks sm8550_vig_sblk_3 = - _VIG_SBLK(10, DPU_SSPP_SCALER_QSEED4, + _VIG_SBLK(10, SSPP_SCALER_VER(3, 2)); static const struct dpu_sspp_sub_blks sm8550_dma_sblk_4 = _DMA_SBLK(5); static const struct dpu_sspp_sub_blks sm8550_dma_sblk_5 = _DMA_SBLK(6); @@ -473,12 +469,12 @@ static const struct dpu_lm_sub_blks qcm2290_lm_sblk = { * DSPP sub blocks config *************************************************************/ static const struct dpu_dspp_sub_blks msm8998_dspp_sblk = { - .pcc = {.name = "pcc", .id = DPU_DSPP_PCC, .base = 0x1700, + .pcc = {.name = "pcc", .base = 0x1700, .len = 0x90, .version = 0x10007}, }; static const struct dpu_dspp_sub_blks sdm845_dspp_sblk = { - .pcc = {.name = "pcc", .id = DPU_DSPP_PCC, .base = 0x1700, + .pcc = {.name = "pcc", .base = 0x1700, .len = 0x90, .version = 0x40000}, }; @@ -486,19 +482,19 @@ static const struct dpu_dspp_sub_blks sdm845_dspp_sblk = { * PINGPONG sub blocks config *************************************************************/ static const struct dpu_pingpong_sub_blks sdm845_pp_sblk_te = { - .te2 = {.name = "te2", .id = DPU_PINGPONG_TE2, .base = 0x2000, .len = 0x0, + .te2 = {.name = "te2", .base = 0x2000, .len = 0x0, .version = 0x1}, - .dither = {.name = "dither", .id = DPU_PINGPONG_DITHER, .base = 0x30e0, + .dither = {.name = "dither", .base = 0x30e0, .len = 0x20, .version = 0x10000}, }; static const struct dpu_pingpong_sub_blks sdm845_pp_sblk = { - .dither = {.name = "dither", .id = DPU_PINGPONG_DITHER, .base = 0x30e0, + .dither = {.name = "dither", .base = 0x30e0, .len = 0x20, .version = 0x10000}, }; static const struct dpu_pingpong_sub_blks sc7280_pp_sblk = { - .dither = {.name = "dither", .id = DPU_PINGPONG_DITHER, .base = 0xe0, + .dither = {.name = "dither", .base = 0xe0, .len = 0x20, .version = 0x20000}, }; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h index 1bf2d2d64f2a..3cf7d316b475 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h @@ -251,14 +251,12 @@ enum { /** * MACRO DPU_HW_SUBBLK_INFO - information of HW sub-block inside DPU * @name: string name for debug purposes - * @id: enum identifying this sub-block * @base: offset of this sub-block relative to the block * offset * @len register block length of this sub-block */ #define DPU_HW_SUBBLK_INFO \ char name[DPU_HW_BLK_NAME_LEN]; \ - u32 id; \ u32 base; \ u32 len -- cgit v1.2.3 From 01fc6c012fad314290af5cc9a8de3fb612ca67c4 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 2 Dec 2023 01:40:29 +0200 Subject: drm/msm/dpu: drop the `smart_dma_priority' field from struct dpu_sspp_sub_blks In preparation to deduplicating SSPP subblocks, drop the (unused) `smart_dma_priority' field from struct dpu_sspp_sub_blks. If it is needed later (e.g. for SmartDMA v1), it should be added to the SSPP declarations themselves. Reviewed-by: Abhinav Kumar Signed-off-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/570103/ Link: https://lore.kernel.org/r/20231201234234.2065610-6-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c | 112 +++++++++---------------- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h | 2 - 2 files changed, 40 insertions(+), 74 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c index eb9dfdcf610e..4179174ee4ee 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c @@ -252,11 +252,10 @@ static const uint32_t wb2_formats[] = { #define SSPP_SCALER_VER(maj, min) (((maj) << 16) | (min)) /* SSPP common configuration */ -#define _VIG_SBLK(sdma_pri, scaler_ver) \ +#define _VIG_SBLK(scaler_ver) \ { \ .maxdwnscale = MAX_DOWNSCALE_RATIO, \ .maxupscale = MAX_UPSCALE_RATIO, \ - .smart_dma_priority = sdma_pri, \ .scaler_blk = {.name = "scaler", \ .version = scaler_ver, \ .base = 0xa00, .len = 0xa0,}, \ @@ -269,11 +268,10 @@ static const uint32_t wb2_formats[] = { .rotation_cfg = NULL, \ } -#define _VIG_SBLK_ROT(sdma_pri, scaler_ver, rot_cfg) \ +#define _VIG_SBLK_ROT(scaler_ver, rot_cfg) \ { \ .maxdwnscale = MAX_DOWNSCALE_RATIO, \ .maxupscale = MAX_UPSCALE_RATIO, \ - .smart_dma_priority = sdma_pri, \ .scaler_blk = {.name = "scaler", \ .version = scaler_ver, \ .base = 0xa00, .len = 0xa0,}, \ @@ -286,11 +284,10 @@ static const uint32_t wb2_formats[] = { .rotation_cfg = rot_cfg, \ } -#define _DMA_SBLK(sdma_pri) \ +#define _DMA_SBLK() \ { \ .maxdwnscale = SSPP_UNITY_SCALE, \ .maxupscale = SSPP_UNITY_SCALE, \ - .smart_dma_priority = sdma_pri, \ .format_list = plane_formats, \ .num_formats = ARRAY_SIZE(plane_formats), \ .virt_format_list = plane_formats, \ @@ -298,17 +295,13 @@ static const uint32_t wb2_formats[] = { } static const struct dpu_sspp_sub_blks msm8998_vig_sblk_0 = - _VIG_SBLK(0, - SSPP_SCALER_VER(1, 2)); + _VIG_SBLK(SSPP_SCALER_VER(1, 2)); static const struct dpu_sspp_sub_blks msm8998_vig_sblk_1 = - _VIG_SBLK(0, - SSPP_SCALER_VER(1, 2)); + _VIG_SBLK(SSPP_SCALER_VER(1, 2)); static const struct dpu_sspp_sub_blks msm8998_vig_sblk_2 = - _VIG_SBLK(0, - SSPP_SCALER_VER(1, 2)); + _VIG_SBLK(SSPP_SCALER_VER(1, 2)); static const struct dpu_sspp_sub_blks msm8998_vig_sblk_3 = - _VIG_SBLK(0, - SSPP_SCALER_VER(1, 2)); + _VIG_SBLK(SSPP_SCALER_VER(1, 2)); static const struct dpu_rotation_cfg dpu_rot_sc7280_cfg_v2 = { .rot_maxheight = 1088, @@ -317,107 +310,82 @@ static const struct dpu_rotation_cfg dpu_rot_sc7280_cfg_v2 = { }; static const struct dpu_sspp_sub_blks sdm845_vig_sblk_0 = - _VIG_SBLK(5, - SSPP_SCALER_VER(1, 3)); + _VIG_SBLK(SSPP_SCALER_VER(1, 3)); static const struct dpu_sspp_sub_blks sdm845_vig_sblk_1 = - _VIG_SBLK(6, - SSPP_SCALER_VER(1, 3)); + _VIG_SBLK(SSPP_SCALER_VER(1, 3)); static const struct dpu_sspp_sub_blks sdm845_vig_sblk_2 = - _VIG_SBLK(7, - SSPP_SCALER_VER(1, 3)); + _VIG_SBLK(SSPP_SCALER_VER(1, 3)); static const struct dpu_sspp_sub_blks sdm845_vig_sblk_3 = - _VIG_SBLK(8, - SSPP_SCALER_VER(1, 3)); + _VIG_SBLK(SSPP_SCALER_VER(1, 3)); static const struct dpu_sspp_sub_blks sm8150_vig_sblk_0 = - _VIG_SBLK(5, - SSPP_SCALER_VER(1, 4)); + _VIG_SBLK(SSPP_SCALER_VER(1, 4)); static const struct dpu_sspp_sub_blks sm8150_vig_sblk_1 = - _VIG_SBLK(6, - SSPP_SCALER_VER(1, 4)); + _VIG_SBLK(SSPP_SCALER_VER(1, 4)); static const struct dpu_sspp_sub_blks sm8150_vig_sblk_2 = - _VIG_SBLK(7, - SSPP_SCALER_VER(1, 4)); + _VIG_SBLK(SSPP_SCALER_VER(1, 4)); static const struct dpu_sspp_sub_blks sm8150_vig_sblk_3 = - _VIG_SBLK(8, - SSPP_SCALER_VER(1, 4)); + _VIG_SBLK(SSPP_SCALER_VER(1, 4)); -static const struct dpu_sspp_sub_blks sdm845_dma_sblk_0 = _DMA_SBLK(1); -static const struct dpu_sspp_sub_blks sdm845_dma_sblk_1 = _DMA_SBLK(2); -static const struct dpu_sspp_sub_blks sdm845_dma_sblk_2 = _DMA_SBLK(3); -static const struct dpu_sspp_sub_blks sdm845_dma_sblk_3 = _DMA_SBLK(4); +static const struct dpu_sspp_sub_blks sdm845_dma_sblk_0 = _DMA_SBLK(); +static const struct dpu_sspp_sub_blks sdm845_dma_sblk_1 = _DMA_SBLK(); +static const struct dpu_sspp_sub_blks sdm845_dma_sblk_2 = _DMA_SBLK(); +static const struct dpu_sspp_sub_blks sdm845_dma_sblk_3 = _DMA_SBLK(); static const struct dpu_sspp_sub_blks sc7180_vig_sblk_0 = - _VIG_SBLK(4, - SSPP_SCALER_VER(3, 0)); + _VIG_SBLK(SSPP_SCALER_VER(3, 0)); static const struct dpu_sspp_sub_blks sc7280_vig_sblk_0 = - _VIG_SBLK_ROT(4, - SSPP_SCALER_VER(3, 0), + _VIG_SBLK_ROT(SSPP_SCALER_VER(3, 0), &dpu_rot_sc7280_cfg_v2); static const struct dpu_sspp_sub_blks sm6115_vig_sblk_0 = - _VIG_SBLK(2, - SSPP_SCALER_VER(3, 0)); + _VIG_SBLK(SSPP_SCALER_VER(3, 0)); static const struct dpu_sspp_sub_blks sm6125_vig_sblk_0 = - _VIG_SBLK(3, - SSPP_SCALER_VER(2, 4)); + _VIG_SBLK(SSPP_SCALER_VER(2, 4)); static const struct dpu_sspp_sub_blks sm8250_vig_sblk_0 = - _VIG_SBLK(5, - SSPP_SCALER_VER(3, 0)); + _VIG_SBLK(SSPP_SCALER_VER(3, 0)); static const struct dpu_sspp_sub_blks sm8250_vig_sblk_1 = - _VIG_SBLK(6, - SSPP_SCALER_VER(3, 0)); + _VIG_SBLK(SSPP_SCALER_VER(3, 0)); static const struct dpu_sspp_sub_blks sm8250_vig_sblk_2 = - _VIG_SBLK(7, - SSPP_SCALER_VER(3, 0)); + _VIG_SBLK(SSPP_SCALER_VER(3, 0)); static const struct dpu_sspp_sub_blks sm8250_vig_sblk_3 = - _VIG_SBLK(8, - SSPP_SCALER_VER(3, 0)); + _VIG_SBLK(SSPP_SCALER_VER(3, 0)); static const struct dpu_sspp_sub_blks sm8450_vig_sblk_0 = - _VIG_SBLK(5, - SSPP_SCALER_VER(3, 1)); + _VIG_SBLK(SSPP_SCALER_VER(3, 1)); static const struct dpu_sspp_sub_blks sm8450_vig_sblk_1 = - _VIG_SBLK(6, - SSPP_SCALER_VER(3, 1)); + _VIG_SBLK(SSPP_SCALER_VER(3, 1)); static const struct dpu_sspp_sub_blks sm8450_vig_sblk_2 = - _VIG_SBLK(7, - SSPP_SCALER_VER(3, 1)); + _VIG_SBLK(SSPP_SCALER_VER(3, 1)); static const struct dpu_sspp_sub_blks sm8450_vig_sblk_3 = - _VIG_SBLK(8, - SSPP_SCALER_VER(3, 1)); + _VIG_SBLK(SSPP_SCALER_VER(3, 1)); static const struct dpu_sspp_sub_blks sm8550_vig_sblk_0 = - _VIG_SBLK(7, - SSPP_SCALER_VER(3, 2)); + _VIG_SBLK(SSPP_SCALER_VER(3, 2)); static const struct dpu_sspp_sub_blks sm8550_vig_sblk_1 = - _VIG_SBLK(8, - SSPP_SCALER_VER(3, 2)); + _VIG_SBLK(SSPP_SCALER_VER(3, 2)); static const struct dpu_sspp_sub_blks sm8550_vig_sblk_2 = - _VIG_SBLK(9, - SSPP_SCALER_VER(3, 2)); + _VIG_SBLK(SSPP_SCALER_VER(3, 2)); static const struct dpu_sspp_sub_blks sm8550_vig_sblk_3 = - _VIG_SBLK(10, - SSPP_SCALER_VER(3, 2)); -static const struct dpu_sspp_sub_blks sm8550_dma_sblk_4 = _DMA_SBLK(5); -static const struct dpu_sspp_sub_blks sm8550_dma_sblk_5 = _DMA_SBLK(6); + _VIG_SBLK(SSPP_SCALER_VER(3, 2)); +static const struct dpu_sspp_sub_blks sm8550_dma_sblk_4 = _DMA_SBLK(); +static const struct dpu_sspp_sub_blks sm8550_dma_sblk_5 = _DMA_SBLK(); -#define _VIG_SBLK_NOSCALE(sdma_pri) \ +#define _VIG_SBLK_NOSCALE() \ { \ .maxdwnscale = SSPP_UNITY_SCALE, \ .maxupscale = SSPP_UNITY_SCALE, \ - .smart_dma_priority = sdma_pri, \ .format_list = plane_formats_yuv, \ .num_formats = ARRAY_SIZE(plane_formats_yuv), \ .virt_format_list = plane_formats, \ .virt_num_formats = ARRAY_SIZE(plane_formats), \ } -static const struct dpu_sspp_sub_blks qcm2290_vig_sblk_0 = _VIG_SBLK_NOSCALE(2); -static const struct dpu_sspp_sub_blks qcm2290_dma_sblk_0 = _DMA_SBLK(1); +static const struct dpu_sspp_sub_blks qcm2290_vig_sblk_0 = _VIG_SBLK_NOSCALE(); +static const struct dpu_sspp_sub_blks qcm2290_dma_sblk_0 = _DMA_SBLK(); /************************************************************* * MIXER sub blocks config diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h index 3cf7d316b475..b2a9b2cf2c05 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h @@ -368,7 +368,6 @@ struct dpu_caps { * common: Pointer to common configurations shared by sub blocks * @maxdwnscale: max downscale ratio supported(without DECIMATION) * @maxupscale: maxupscale ratio supported - * @smart_dma_priority: hw priority of rect1 of multirect pipe * @max_per_pipe_bw: maximum allowable bandwidth of this pipe in kBps * @qseed_ver: qseed version * @scaler_blk: @@ -382,7 +381,6 @@ struct dpu_caps { struct dpu_sspp_sub_blks { u32 maxdwnscale; u32 maxupscale; - u32 smart_dma_priority; u32 max_per_pipe_bw; u32 qseed_ver; struct dpu_scaler_blk scaler_blk; -- cgit v1.2.3 From 0fd205412e1ee1f60f549269838119969f0883ad Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 2 Dec 2023 01:40:30 +0200 Subject: drm/msm/dpu: deduplicate some (most) of SSPP sub-blocks As we have dropped the variadic parts of SSPP sub-blocks declarations, deduplicate them now, reducing memory cruft. Reviewed-by: Abhinav Kumar Signed-off-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/570112/ Link: https://lore.kernel.org/r/20231201234234.2065610-7-dmitry.baryshkov@linaro.org --- .../drm/msm/disp/dpu1/catalog/dpu_3_0_msm8998.h | 16 ++-- .../gpu/drm/msm/disp/dpu1/catalog/dpu_4_0_sdm845.h | 16 ++-- .../gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h | 16 ++-- .../drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h | 16 ++-- .../gpu/drm/msm/disp/dpu1/catalog/dpu_5_4_sm6125.h | 6 +- .../gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h | 16 ++-- .../gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h | 8 +- .../gpu/drm/msm/disp/dpu1/catalog/dpu_6_3_sm6115.h | 4 +- .../gpu/drm/msm/disp/dpu1/catalog/dpu_6_4_sm6350.h | 8 +- .../drm/msm/disp/dpu1/catalog/dpu_6_5_qcm2290.h | 4 +- .../gpu/drm/msm/disp/dpu1/catalog/dpu_6_9_sm6375.h | 4 +- .../gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h | 16 ++-- .../gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h | 8 +- .../drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h | 16 ++-- .../gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h | 16 ++-- .../gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h | 20 ++--- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c | 97 ++++++---------------- 17 files changed, 120 insertions(+), 167 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_0_msm8998.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_0_msm8998.h index 4dcc9f804ac1..1d3e9666c741 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_0_msm8998.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_0_msm8998.h @@ -69,7 +69,7 @@ static const struct dpu_sspp_cfg msm8998_sspp[] = { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x1ac, .features = VIG_MSM8998_MASK, - .sblk = &msm8998_vig_sblk_0, + .sblk = &dpu_vig_sblk_qseed3_1_2, .xin_id = 0, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG0, @@ -77,7 +77,7 @@ static const struct dpu_sspp_cfg msm8998_sspp[] = { .name = "sspp_1", .id = SSPP_VIG1, .base = 0x6000, .len = 0x1ac, .features = VIG_MSM8998_MASK, - .sblk = &msm8998_vig_sblk_1, + .sblk = &dpu_vig_sblk_qseed3_1_2, .xin_id = 4, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG1, @@ -85,7 +85,7 @@ static const struct dpu_sspp_cfg msm8998_sspp[] = { .name = "sspp_2", .id = SSPP_VIG2, .base = 0x8000, .len = 0x1ac, .features = VIG_MSM8998_MASK, - .sblk = &msm8998_vig_sblk_2, + .sblk = &dpu_vig_sblk_qseed3_1_2, .xin_id = 8, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG2, @@ -93,7 +93,7 @@ static const struct dpu_sspp_cfg msm8998_sspp[] = { .name = "sspp_3", .id = SSPP_VIG3, .base = 0xa000, .len = 0x1ac, .features = VIG_MSM8998_MASK, - .sblk = &msm8998_vig_sblk_3, + .sblk = &dpu_vig_sblk_qseed3_1_2, .xin_id = 12, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG3, @@ -101,7 +101,7 @@ static const struct dpu_sspp_cfg msm8998_sspp[] = { .name = "sspp_8", .id = SSPP_DMA0, .base = 0x24000, .len = 0x1ac, .features = DMA_MSM8998_MASK, - .sblk = &sdm845_dma_sblk_0, + .sblk = &dpu_dma_sblk, .xin_id = 1, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA0, @@ -109,7 +109,7 @@ static const struct dpu_sspp_cfg msm8998_sspp[] = { .name = "sspp_9", .id = SSPP_DMA1, .base = 0x26000, .len = 0x1ac, .features = DMA_MSM8998_MASK, - .sblk = &sdm845_dma_sblk_1, + .sblk = &dpu_dma_sblk, .xin_id = 5, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA1, @@ -117,7 +117,7 @@ static const struct dpu_sspp_cfg msm8998_sspp[] = { .name = "sspp_10", .id = SSPP_DMA2, .base = 0x28000, .len = 0x1ac, .features = DMA_CURSOR_MSM8998_MASK, - .sblk = &sdm845_dma_sblk_2, + .sblk = &dpu_dma_sblk, .xin_id = 9, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA2, @@ -125,7 +125,7 @@ static const struct dpu_sspp_cfg msm8998_sspp[] = { .name = "sspp_11", .id = SSPP_DMA3, .base = 0x2a000, .len = 0x1ac, .features = DMA_CURSOR_MSM8998_MASK, - .sblk = &sdm845_dma_sblk_3, + .sblk = &dpu_dma_sblk, .xin_id = 13, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA3, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_0_sdm845.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_0_sdm845.h index 03159d359500..7a23389a5732 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_0_sdm845.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_0_sdm845.h @@ -67,7 +67,7 @@ static const struct dpu_sspp_cfg sdm845_sspp[] = { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x1c8, .features = VIG_SDM845_MASK_SDMA, - .sblk = &sdm845_vig_sblk_0, + .sblk = &dpu_vig_sblk_qseed3_1_3, .xin_id = 0, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG0, @@ -75,7 +75,7 @@ static const struct dpu_sspp_cfg sdm845_sspp[] = { .name = "sspp_1", .id = SSPP_VIG1, .base = 0x6000, .len = 0x1c8, .features = VIG_SDM845_MASK_SDMA, - .sblk = &sdm845_vig_sblk_1, + .sblk = &dpu_vig_sblk_qseed3_1_3, .xin_id = 4, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG1, @@ -83,7 +83,7 @@ static const struct dpu_sspp_cfg sdm845_sspp[] = { .name = "sspp_2", .id = SSPP_VIG2, .base = 0x8000, .len = 0x1c8, .features = VIG_SDM845_MASK_SDMA, - .sblk = &sdm845_vig_sblk_2, + .sblk = &dpu_vig_sblk_qseed3_1_3, .xin_id = 8, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG2, @@ -91,7 +91,7 @@ static const struct dpu_sspp_cfg sdm845_sspp[] = { .name = "sspp_3", .id = SSPP_VIG3, .base = 0xa000, .len = 0x1c8, .features = VIG_SDM845_MASK_SDMA, - .sblk = &sdm845_vig_sblk_3, + .sblk = &dpu_vig_sblk_qseed3_1_3, .xin_id = 12, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG3, @@ -99,7 +99,7 @@ static const struct dpu_sspp_cfg sdm845_sspp[] = { .name = "sspp_8", .id = SSPP_DMA0, .base = 0x24000, .len = 0x1c8, .features = DMA_SDM845_MASK_SDMA, - .sblk = &sdm845_dma_sblk_0, + .sblk = &dpu_dma_sblk, .xin_id = 1, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA0, @@ -107,7 +107,7 @@ static const struct dpu_sspp_cfg sdm845_sspp[] = { .name = "sspp_9", .id = SSPP_DMA1, .base = 0x26000, .len = 0x1c8, .features = DMA_SDM845_MASK_SDMA, - .sblk = &sdm845_dma_sblk_1, + .sblk = &dpu_dma_sblk, .xin_id = 5, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA1, @@ -115,7 +115,7 @@ static const struct dpu_sspp_cfg sdm845_sspp[] = { .name = "sspp_10", .id = SSPP_DMA2, .base = 0x28000, .len = 0x1c8, .features = DMA_CURSOR_SDM845_MASK_SDMA, - .sblk = &sdm845_dma_sblk_2, + .sblk = &dpu_dma_sblk, .xin_id = 9, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA2, @@ -123,7 +123,7 @@ static const struct dpu_sspp_cfg sdm845_sspp[] = { .name = "sspp_11", .id = SSPP_DMA3, .base = 0x2a000, .len = 0x1c8, .features = DMA_CURSOR_SDM845_MASK_SDMA, - .sblk = &sdm845_dma_sblk_3, + .sblk = &dpu_dma_sblk, .xin_id = 13, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA3, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h index 4184c18d81f3..145f3d5953a3 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h @@ -76,7 +76,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x1f0, .features = VIG_SDM845_MASK, - .sblk = &sm8150_vig_sblk_0, + .sblk = &dpu_vig_sblk_qseed3_1_4, .xin_id = 0, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG0, @@ -84,7 +84,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = { .name = "sspp_1", .id = SSPP_VIG1, .base = 0x6000, .len = 0x1f0, .features = VIG_SDM845_MASK, - .sblk = &sm8150_vig_sblk_1, + .sblk = &dpu_vig_sblk_qseed3_1_4, .xin_id = 4, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG1, @@ -92,7 +92,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = { .name = "sspp_2", .id = SSPP_VIG2, .base = 0x8000, .len = 0x1f0, .features = VIG_SDM845_MASK, - .sblk = &sm8150_vig_sblk_2, + .sblk = &dpu_vig_sblk_qseed3_1_4, .xin_id = 8, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG2, @@ -100,7 +100,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = { .name = "sspp_3", .id = SSPP_VIG3, .base = 0xa000, .len = 0x1f0, .features = VIG_SDM845_MASK, - .sblk = &sm8150_vig_sblk_3, + .sblk = &dpu_vig_sblk_qseed3_1_4, .xin_id = 12, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG3, @@ -108,7 +108,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = { .name = "sspp_8", .id = SSPP_DMA0, .base = 0x24000, .len = 0x1f0, .features = DMA_SDM845_MASK, - .sblk = &sdm845_dma_sblk_0, + .sblk = &dpu_dma_sblk, .xin_id = 1, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA0, @@ -116,7 +116,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = { .name = "sspp_9", .id = SSPP_DMA1, .base = 0x26000, .len = 0x1f0, .features = DMA_SDM845_MASK, - .sblk = &sdm845_dma_sblk_1, + .sblk = &dpu_dma_sblk, .xin_id = 5, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA1, @@ -124,7 +124,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = { .name = "sspp_10", .id = SSPP_DMA2, .base = 0x28000, .len = 0x1f0, .features = DMA_CURSOR_SDM845_MASK, - .sblk = &sdm845_dma_sblk_2, + .sblk = &dpu_dma_sblk, .xin_id = 9, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA2, @@ -132,7 +132,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = { .name = "sspp_11", .id = SSPP_DMA3, .base = 0x2a000, .len = 0x1f0, .features = DMA_CURSOR_SDM845_MASK, - .sblk = &sdm845_dma_sblk_3, + .sblk = &dpu_dma_sblk, .xin_id = 13, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA3, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h index 83fb7312bc97..9e3bec8bc121 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h @@ -75,7 +75,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x1f0, .features = VIG_SDM845_MASK, - .sblk = &sm8150_vig_sblk_0, + .sblk = &dpu_vig_sblk_qseed3_1_4, .xin_id = 0, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG0, @@ -83,7 +83,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = { .name = "sspp_1", .id = SSPP_VIG1, .base = 0x6000, .len = 0x1f0, .features = VIG_SDM845_MASK, - .sblk = &sm8150_vig_sblk_1, + .sblk = &dpu_vig_sblk_qseed3_1_4, .xin_id = 4, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG1, @@ -91,7 +91,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = { .name = "sspp_2", .id = SSPP_VIG2, .base = 0x8000, .len = 0x1f0, .features = VIG_SDM845_MASK, - .sblk = &sm8150_vig_sblk_2, + .sblk = &dpu_vig_sblk_qseed3_1_4, .xin_id = 8, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG2, @@ -99,7 +99,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = { .name = "sspp_3", .id = SSPP_VIG3, .base = 0xa000, .len = 0x1f0, .features = VIG_SDM845_MASK, - .sblk = &sm8150_vig_sblk_3, + .sblk = &dpu_vig_sblk_qseed3_1_4, .xin_id = 12, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG3, @@ -107,7 +107,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = { .name = "sspp_8", .id = SSPP_DMA0, .base = 0x24000, .len = 0x1f0, .features = DMA_SDM845_MASK, - .sblk = &sdm845_dma_sblk_0, + .sblk = &dpu_dma_sblk, .xin_id = 1, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA0, @@ -115,7 +115,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = { .name = "sspp_9", .id = SSPP_DMA1, .base = 0x26000, .len = 0x1f0, .features = DMA_SDM845_MASK, - .sblk = &sdm845_dma_sblk_1, + .sblk = &dpu_dma_sblk, .xin_id = 5, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA1, @@ -123,7 +123,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = { .name = "sspp_10", .id = SSPP_DMA2, .base = 0x28000, .len = 0x1f0, .features = DMA_CURSOR_SDM845_MASK, - .sblk = &sdm845_dma_sblk_2, + .sblk = &dpu_dma_sblk, .xin_id = 9, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA2, @@ -131,7 +131,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = { .name = "sspp_11", .id = SSPP_DMA3, .base = 0x2a000, .len = 0x1f0, .features = DMA_CURSOR_SDM845_MASK, - .sblk = &sdm845_dma_sblk_3, + .sblk = &dpu_dma_sblk, .xin_id = 13, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA3, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_4_sm6125.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_4_sm6125.h index cec7af6667dc..3969f2925d89 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_4_sm6125.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_4_sm6125.h @@ -69,7 +69,7 @@ static const struct dpu_sspp_cfg sm6125_sspp[] = { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x1f0, .features = VIG_SM6125_MASK, - .sblk = &sm6125_vig_sblk_0, + .sblk = &dpu_vig_sblk_qseed3_2_4, .xin_id = 0, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG0, @@ -77,7 +77,7 @@ static const struct dpu_sspp_cfg sm6125_sspp[] = { .name = "sspp_8", .id = SSPP_DMA0, .base = 0x24000, .len = 0x1f0, .features = DMA_SDM845_MASK, - .sblk = &sdm845_dma_sblk_0, + .sblk = &dpu_dma_sblk, .xin_id = 1, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA0, @@ -85,7 +85,7 @@ static const struct dpu_sspp_cfg sm6125_sspp[] = { .name = "sspp_9", .id = SSPP_DMA1, .base = 0x26000, .len = 0x1f0, .features = DMA_SDM845_MASK, - .sblk = &sdm845_dma_sblk_1, + .sblk = &dpu_dma_sblk, .xin_id = 5, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA1, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h index 885a2bec8258..f751d63845d1 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h @@ -75,7 +75,7 @@ static const struct dpu_sspp_cfg sm8250_sspp[] = { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x1f8, .features = VIG_SC7180_MASK_SDMA, - .sblk = &sm8250_vig_sblk_0, + .sblk = &dpu_vig_sblk_qseed3_3_0, .xin_id = 0, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG0, @@ -83,7 +83,7 @@ static const struct dpu_sspp_cfg sm8250_sspp[] = { .name = "sspp_1", .id = SSPP_VIG1, .base = 0x6000, .len = 0x1f8, .features = VIG_SC7180_MASK_SDMA, - .sblk = &sm8250_vig_sblk_1, + .sblk = &dpu_vig_sblk_qseed3_3_0, .xin_id = 4, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG1, @@ -91,7 +91,7 @@ static const struct dpu_sspp_cfg sm8250_sspp[] = { .name = "sspp_2", .id = SSPP_VIG2, .base = 0x8000, .len = 0x1f8, .features = VIG_SC7180_MASK_SDMA, - .sblk = &sm8250_vig_sblk_2, + .sblk = &dpu_vig_sblk_qseed3_3_0, .xin_id = 8, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG2, @@ -99,7 +99,7 @@ static const struct dpu_sspp_cfg sm8250_sspp[] = { .name = "sspp_3", .id = SSPP_VIG3, .base = 0xa000, .len = 0x1f8, .features = VIG_SC7180_MASK_SDMA, - .sblk = &sm8250_vig_sblk_3, + .sblk = &dpu_vig_sblk_qseed3_3_0, .xin_id = 12, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG3, @@ -107,7 +107,7 @@ static const struct dpu_sspp_cfg sm8250_sspp[] = { .name = "sspp_8", .id = SSPP_DMA0, .base = 0x24000, .len = 0x1f8, .features = DMA_SDM845_MASK_SDMA, - .sblk = &sdm845_dma_sblk_0, + .sblk = &dpu_dma_sblk, .xin_id = 1, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA0, @@ -115,7 +115,7 @@ static const struct dpu_sspp_cfg sm8250_sspp[] = { .name = "sspp_9", .id = SSPP_DMA1, .base = 0x26000, .len = 0x1f8, .features = DMA_SDM845_MASK_SDMA, - .sblk = &sdm845_dma_sblk_1, + .sblk = &dpu_dma_sblk, .xin_id = 5, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA1, @@ -123,7 +123,7 @@ static const struct dpu_sspp_cfg sm8250_sspp[] = { .name = "sspp_10", .id = SSPP_DMA2, .base = 0x28000, .len = 0x1f8, .features = DMA_CURSOR_SDM845_MASK_SDMA, - .sblk = &sdm845_dma_sblk_2, + .sblk = &dpu_dma_sblk, .xin_id = 9, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA2, @@ -131,7 +131,7 @@ static const struct dpu_sspp_cfg sm8250_sspp[] = { .name = "sspp_11", .id = SSPP_DMA3, .base = 0x2a000, .len = 0x1f8, .features = DMA_CURSOR_SDM845_MASK_SDMA, - .sblk = &sdm845_dma_sblk_3, + .sblk = &dpu_dma_sblk, .xin_id = 13, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA3, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h index c9d1f1292b3d..7627f16c5b2d 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h @@ -52,7 +52,7 @@ static const struct dpu_sspp_cfg sc7180_sspp[] = { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x1f8, .features = VIG_SC7180_MASK, - .sblk = &sc7180_vig_sblk_0, + .sblk = &dpu_vig_sblk_qseed3_3_0, .xin_id = 0, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG0, @@ -60,7 +60,7 @@ static const struct dpu_sspp_cfg sc7180_sspp[] = { .name = "sspp_8", .id = SSPP_DMA0, .base = 0x24000, .len = 0x1f8, .features = DMA_SDM845_MASK, - .sblk = &sdm845_dma_sblk_0, + .sblk = &dpu_dma_sblk, .xin_id = 1, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA0, @@ -68,7 +68,7 @@ static const struct dpu_sspp_cfg sc7180_sspp[] = { .name = "sspp_9", .id = SSPP_DMA1, .base = 0x26000, .len = 0x1f8, .features = DMA_CURSOR_SDM845_MASK, - .sblk = &sdm845_dma_sblk_1, + .sblk = &dpu_dma_sblk, .xin_id = 5, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA1, @@ -76,7 +76,7 @@ static const struct dpu_sspp_cfg sc7180_sspp[] = { .name = "sspp_10", .id = SSPP_DMA2, .base = 0x28000, .len = 0x1f8, .features = DMA_CURSOR_SDM845_MASK, - .sblk = &sdm845_dma_sblk_2, + .sblk = &dpu_dma_sblk, .xin_id = 9, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA2, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_3_sm6115.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_3_sm6115.h index 320b8f23364f..7aefdb7eb494 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_3_sm6115.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_3_sm6115.h @@ -39,7 +39,7 @@ static const struct dpu_sspp_cfg sm6115_sspp[] = { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x1f8, .features = VIG_SC7180_MASK, - .sblk = &sm6115_vig_sblk_0, + .sblk = &dpu_vig_sblk_qseed3_3_0, .xin_id = 0, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG0, @@ -47,7 +47,7 @@ static const struct dpu_sspp_cfg sm6115_sspp[] = { .name = "sspp_8", .id = SSPP_DMA0, .base = 0x24000, .len = 0x1f8, .features = DMA_SDM845_MASK, - .sblk = &sdm845_dma_sblk_0, + .sblk = &dpu_dma_sblk, .xin_id = 1, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA0, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_4_sm6350.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_4_sm6350.h index 6c6bc754aeb7..99df9816f171 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_4_sm6350.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_4_sm6350.h @@ -59,7 +59,7 @@ static const struct dpu_sspp_cfg sm6350_sspp[] = { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x1f8, .features = VIG_SC7180_MASK, - .sblk = &sc7180_vig_sblk_0, + .sblk = &dpu_vig_sblk_qseed3_3_0, .xin_id = 0, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG0, @@ -67,7 +67,7 @@ static const struct dpu_sspp_cfg sm6350_sspp[] = { .name = "sspp_8", .id = SSPP_DMA0, .base = 0x24000, .len = 0x1f8, .features = DMA_SDM845_MASK, - .sblk = &sdm845_dma_sblk_0, + .sblk = &dpu_dma_sblk, .xin_id = 1, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA0, @@ -75,7 +75,7 @@ static const struct dpu_sspp_cfg sm6350_sspp[] = { .name = "sspp_9", .id = SSPP_DMA1, .base = 0x26000, .len = 0x1f8, .features = DMA_CURSOR_SDM845_MASK, - .sblk = &sdm845_dma_sblk_1, + .sblk = &dpu_dma_sblk, .xin_id = 5, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA1, @@ -83,7 +83,7 @@ static const struct dpu_sspp_cfg sm6350_sspp[] = { .name = "sspp_10", .id = SSPP_DMA2, .base = 0x28000, .len = 0x1f8, .features = DMA_CURSOR_SDM845_MASK, - .sblk = &sdm845_dma_sblk_2, + .sblk = &dpu_dma_sblk, .xin_id = 9, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA2, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_5_qcm2290.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_5_qcm2290.h index fb36fba5171c..3cbb2fe8aba2 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_5_qcm2290.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_5_qcm2290.h @@ -39,7 +39,7 @@ static const struct dpu_sspp_cfg qcm2290_sspp[] = { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x1f8, .features = VIG_QCM2290_MASK, - .sblk = &qcm2290_vig_sblk_0, + .sblk = &dpu_vig_sblk_noscale, .xin_id = 0, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG0, @@ -47,7 +47,7 @@ static const struct dpu_sspp_cfg qcm2290_sspp[] = { .name = "sspp_8", .id = SSPP_DMA0, .base = 0x24000, .len = 0x1f8, .features = DMA_SDM845_MASK, - .sblk = &qcm2290_dma_sblk_0, + .sblk = &dpu_dma_sblk, .xin_id = 1, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA0, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_9_sm6375.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_9_sm6375.h index 77703f663e25..2de304d97c9b 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_9_sm6375.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_9_sm6375.h @@ -40,7 +40,7 @@ static const struct dpu_sspp_cfg sm6375_sspp[] = { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x1f8, .features = VIG_SC7180_MASK, - .sblk = &sm6115_vig_sblk_0, + .sblk = &dpu_vig_sblk_qseed3_3_0, .xin_id = 0, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG0, @@ -48,7 +48,7 @@ static const struct dpu_sspp_cfg sm6375_sspp[] = { .name = "sspp_8", .id = SSPP_DMA0, .base = 0x24000, .len = 0x1f8, .features = DMA_SDM845_MASK, - .sblk = &sdm845_dma_sblk_0, + .sblk = &dpu_dma_sblk, .xin_id = 1, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA0, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h index 03181ba891d5..ad6eb28ead49 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h @@ -74,7 +74,7 @@ static const struct dpu_sspp_cfg sm8350_sspp[] = { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x1f8, .features = VIG_SC7180_MASK_SDMA, - .sblk = &sm8250_vig_sblk_0, + .sblk = &dpu_vig_sblk_qseed3_3_0, .xin_id = 0, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG0, @@ -82,7 +82,7 @@ static const struct dpu_sspp_cfg sm8350_sspp[] = { .name = "sspp_1", .id = SSPP_VIG1, .base = 0x6000, .len = 0x1f8, .features = VIG_SC7180_MASK_SDMA, - .sblk = &sm8250_vig_sblk_1, + .sblk = &dpu_vig_sblk_qseed3_3_0, .xin_id = 4, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG1, @@ -90,7 +90,7 @@ static const struct dpu_sspp_cfg sm8350_sspp[] = { .name = "sspp_2", .id = SSPP_VIG2, .base = 0x8000, .len = 0x1f8, .features = VIG_SC7180_MASK_SDMA, - .sblk = &sm8250_vig_sblk_2, + .sblk = &dpu_vig_sblk_qseed3_3_0, .xin_id = 8, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG2, @@ -98,7 +98,7 @@ static const struct dpu_sspp_cfg sm8350_sspp[] = { .name = "sspp_3", .id = SSPP_VIG3, .base = 0xa000, .len = 0x1f8, .features = VIG_SC7180_MASK_SDMA, - .sblk = &sm8250_vig_sblk_3, + .sblk = &dpu_vig_sblk_qseed3_3_0, .xin_id = 12, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG3, @@ -106,7 +106,7 @@ static const struct dpu_sspp_cfg sm8350_sspp[] = { .name = "sspp_8", .id = SSPP_DMA0, .base = 0x24000, .len = 0x1f8, .features = DMA_SDM845_MASK_SDMA, - .sblk = &sdm845_dma_sblk_0, + .sblk = &dpu_dma_sblk, .xin_id = 1, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA0, @@ -114,7 +114,7 @@ static const struct dpu_sspp_cfg sm8350_sspp[] = { .name = "sspp_9", .id = SSPP_DMA1, .base = 0x26000, .len = 0x1f8, .features = DMA_SDM845_MASK_SDMA, - .sblk = &sdm845_dma_sblk_1, + .sblk = &dpu_dma_sblk, .xin_id = 5, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA1, @@ -122,7 +122,7 @@ static const struct dpu_sspp_cfg sm8350_sspp[] = { .name = "sspp_10", .id = SSPP_DMA2, .base = 0x28000, .len = 0x1f8, .features = DMA_CURSOR_SDM845_MASK_SDMA, - .sblk = &sdm845_dma_sblk_2, + .sblk = &dpu_dma_sblk, .xin_id = 9, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA2, @@ -130,7 +130,7 @@ static const struct dpu_sspp_cfg sm8350_sspp[] = { .name = "sspp_11", .id = SSPP_DMA3, .base = 0x2a000, .len = 0x1f8, .features = DMA_CURSOR_SDM845_MASK_SDMA, - .sblk = &sdm845_dma_sblk_3, + .sblk = &dpu_dma_sblk, .xin_id = 13, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA3, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h index 42bb47181ba1..93564e9e5fa5 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h @@ -57,7 +57,7 @@ static const struct dpu_sspp_cfg sc7280_sspp[] = { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x1f8, .features = VIG_SC7280_MASK_SDMA, - .sblk = &sc7280_vig_sblk_0, + .sblk = &dpu_vig_sblk_qseed3_3_0_rot_v2, .xin_id = 0, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG0, @@ -65,7 +65,7 @@ static const struct dpu_sspp_cfg sc7280_sspp[] = { .name = "sspp_8", .id = SSPP_DMA0, .base = 0x24000, .len = 0x1f8, .features = DMA_SDM845_MASK_SDMA, - .sblk = &sdm845_dma_sblk_0, + .sblk = &dpu_dma_sblk, .xin_id = 1, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA0, @@ -73,7 +73,7 @@ static const struct dpu_sspp_cfg sc7280_sspp[] = { .name = "sspp_9", .id = SSPP_DMA1, .base = 0x26000, .len = 0x1f8, .features = DMA_CURSOR_SDM845_MASK_SDMA, - .sblk = &sdm845_dma_sblk_1, + .sblk = &dpu_dma_sblk, .xin_id = 5, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA1, @@ -81,7 +81,7 @@ static const struct dpu_sspp_cfg sc7280_sspp[] = { .name = "sspp_10", .id = SSPP_DMA2, .base = 0x28000, .len = 0x1f8, .features = DMA_CURSOR_SDM845_MASK_SDMA, - .sblk = &sdm845_dma_sblk_2, + .sblk = &dpu_dma_sblk, .xin_id = 9, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA2, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h index 6da122df0b88..c2f1acd43524 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h @@ -75,7 +75,7 @@ static const struct dpu_sspp_cfg sc8280xp_sspp[] = { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x2ac, .features = VIG_SC7180_MASK, - .sblk = &sm8250_vig_sblk_0, + .sblk = &dpu_vig_sblk_qseed3_3_0, .xin_id = 0, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG0, @@ -83,7 +83,7 @@ static const struct dpu_sspp_cfg sc8280xp_sspp[] = { .name = "sspp_1", .id = SSPP_VIG1, .base = 0x6000, .len = 0x2ac, .features = VIG_SC7180_MASK, - .sblk = &sm8250_vig_sblk_1, + .sblk = &dpu_vig_sblk_qseed3_3_0, .xin_id = 4, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG1, @@ -91,7 +91,7 @@ static const struct dpu_sspp_cfg sc8280xp_sspp[] = { .name = "sspp_2", .id = SSPP_VIG2, .base = 0x8000, .len = 0x2ac, .features = VIG_SC7180_MASK, - .sblk = &sm8250_vig_sblk_2, + .sblk = &dpu_vig_sblk_qseed3_3_0, .xin_id = 8, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG2, @@ -99,7 +99,7 @@ static const struct dpu_sspp_cfg sc8280xp_sspp[] = { .name = "sspp_3", .id = SSPP_VIG3, .base = 0xa000, .len = 0x2ac, .features = VIG_SC7180_MASK, - .sblk = &sm8250_vig_sblk_3, + .sblk = &dpu_vig_sblk_qseed3_3_0, .xin_id = 12, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG3, @@ -107,7 +107,7 @@ static const struct dpu_sspp_cfg sc8280xp_sspp[] = { .name = "sspp_8", .id = SSPP_DMA0, .base = 0x24000, .len = 0x2ac, .features = DMA_SDM845_MASK, - .sblk = &sdm845_dma_sblk_0, + .sblk = &dpu_dma_sblk, .xin_id = 1, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA0, @@ -115,7 +115,7 @@ static const struct dpu_sspp_cfg sc8280xp_sspp[] = { .name = "sspp_9", .id = SSPP_DMA1, .base = 0x26000, .len = 0x2ac, .features = DMA_SDM845_MASK, - .sblk = &sdm845_dma_sblk_1, + .sblk = &dpu_dma_sblk, .xin_id = 5, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA1, @@ -123,7 +123,7 @@ static const struct dpu_sspp_cfg sc8280xp_sspp[] = { .name = "sspp_10", .id = SSPP_DMA2, .base = 0x28000, .len = 0x2ac, .features = DMA_CURSOR_SDM845_MASK, - .sblk = &sdm845_dma_sblk_2, + .sblk = &dpu_dma_sblk, .xin_id = 9, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA2, @@ -131,7 +131,7 @@ static const struct dpu_sspp_cfg sc8280xp_sspp[] = { .name = "sspp_11", .id = SSPP_DMA3, .base = 0x2a000, .len = 0x2ac, .features = DMA_CURSOR_SDM845_MASK, - .sblk = &sdm845_dma_sblk_3, + .sblk = &dpu_dma_sblk, .xin_id = 13, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA3, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h index 330d48b1ea02..0559ef780661 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h @@ -75,7 +75,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x32c, .features = VIG_SC7180_MASK_SDMA, - .sblk = &sm8450_vig_sblk_0, + .sblk = &dpu_vig_sblk_qseed3_3_1, .xin_id = 0, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG0, @@ -83,7 +83,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = { .name = "sspp_1", .id = SSPP_VIG1, .base = 0x6000, .len = 0x32c, .features = VIG_SC7180_MASK_SDMA, - .sblk = &sm8450_vig_sblk_1, + .sblk = &dpu_vig_sblk_qseed3_3_1, .xin_id = 4, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG1, @@ -91,7 +91,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = { .name = "sspp_2", .id = SSPP_VIG2, .base = 0x8000, .len = 0x32c, .features = VIG_SC7180_MASK_SDMA, - .sblk = &sm8450_vig_sblk_2, + .sblk = &dpu_vig_sblk_qseed3_3_1, .xin_id = 8, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG2, @@ -99,7 +99,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = { .name = "sspp_3", .id = SSPP_VIG3, .base = 0xa000, .len = 0x32c, .features = VIG_SC7180_MASK_SDMA, - .sblk = &sm8450_vig_sblk_3, + .sblk = &dpu_vig_sblk_qseed3_3_1, .xin_id = 12, .type = SSPP_TYPE_VIG, .clk_ctrl = DPU_CLK_CTRL_VIG3, @@ -107,7 +107,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = { .name = "sspp_8", .id = SSPP_DMA0, .base = 0x24000, .len = 0x32c, .features = DMA_SDM845_MASK_SDMA, - .sblk = &sdm845_dma_sblk_0, + .sblk = &dpu_dma_sblk, .xin_id = 1, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA0, @@ -115,7 +115,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = { .name = "sspp_9", .id = SSPP_DMA1, .base = 0x26000, .len = 0x32c, .features = DMA_SDM845_MASK_SDMA, - .sblk = &sdm845_dma_sblk_1, + .sblk = &dpu_dma_sblk, .xin_id = 5, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA1, @@ -123,7 +123,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = { .name = "sspp_10", .id = SSPP_DMA2, .base = 0x28000, .len = 0x32c, .features = DMA_CURSOR_SDM845_MASK_SDMA, - .sblk = &sdm845_dma_sblk_2, + .sblk = &dpu_dma_sblk, .xin_id = 9, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA2, @@ -131,7 +131,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = { .name = "sspp_11", .id = SSPP_DMA3, .base = 0x2a000, .len = 0x32c, .features = DMA_CURSOR_SDM845_MASK_SDMA, - .sblk = &sdm845_dma_sblk_3, + .sblk = &dpu_dma_sblk, .xin_id = 13, .type = SSPP_TYPE_DMA, .clk_ctrl = DPU_CLK_CTRL_DMA3, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h index 2b5b342ea0b2..f393d42317af 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h @@ -67,70 +67,70 @@ static const struct dpu_sspp_cfg sm8550_sspp[] = { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x344, .features = VIG_SC7180_MASK, - .sblk = &sm8550_vig_sblk_0, + .sblk = &dpu_vig_sblk_qseed3_3_2, .xin_id = 0, .type = SSPP_TYPE_VIG, }, { .name = "sspp_1", .id = SSPP_VIG1, .base = 0x6000, .len = 0x344, .features = VIG_SC7180_MASK, - .sblk = &sm8550_vig_sblk_1, + .sblk = &dpu_vig_sblk_qseed3_3_2, .xin_id = 4, .type = SSPP_TYPE_VIG, }, { .name = "sspp_2", .id = SSPP_VIG2, .base = 0x8000, .len = 0x344, .features = VIG_SC7180_MASK, - .sblk = &sm8550_vig_sblk_2, + .sblk = &dpu_vig_sblk_qseed3_3_2, .xin_id = 8, .type = SSPP_TYPE_VIG, }, { .name = "sspp_3", .id = SSPP_VIG3, .base = 0xa000, .len = 0x344, .features = VIG_SC7180_MASK, - .sblk = &sm8550_vig_sblk_3, + .sblk = &dpu_vig_sblk_qseed3_3_2, .xin_id = 12, .type = SSPP_TYPE_VIG, }, { .name = "sspp_8", .id = SSPP_DMA0, .base = 0x24000, .len = 0x344, .features = DMA_SDM845_MASK, - .sblk = &sdm845_dma_sblk_0, + .sblk = &dpu_dma_sblk, .xin_id = 1, .type = SSPP_TYPE_DMA, }, { .name = "sspp_9", .id = SSPP_DMA1, .base = 0x26000, .len = 0x344, .features = DMA_SDM845_MASK, - .sblk = &sdm845_dma_sblk_1, + .sblk = &dpu_dma_sblk, .xin_id = 5, .type = SSPP_TYPE_DMA, }, { .name = "sspp_10", .id = SSPP_DMA2, .base = 0x28000, .len = 0x344, .features = DMA_SDM845_MASK, - .sblk = &sdm845_dma_sblk_2, + .sblk = &dpu_dma_sblk, .xin_id = 9, .type = SSPP_TYPE_DMA, }, { .name = "sspp_11", .id = SSPP_DMA3, .base = 0x2a000, .len = 0x344, .features = DMA_SDM845_MASK, - .sblk = &sdm845_dma_sblk_3, + .sblk = &dpu_dma_sblk, .xin_id = 13, .type = SSPP_TYPE_DMA, }, { .name = "sspp_12", .id = SSPP_DMA4, .base = 0x2c000, .len = 0x344, .features = DMA_CURSOR_SDM845_MASK, - .sblk = &sm8550_dma_sblk_4, + .sblk = &dpu_dma_sblk, .xin_id = 14, .type = SSPP_TYPE_DMA, }, { .name = "sspp_13", .id = SSPP_DMA5, .base = 0x2e000, .len = 0x344, .features = DMA_CURSOR_SDM845_MASK, - .sblk = &sm8550_dma_sblk_5, + .sblk = &dpu_dma_sblk, .xin_id = 15, .type = SSPP_TYPE_DMA, }, diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c index 4179174ee4ee..b8ea33d0f364 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c @@ -284,6 +284,16 @@ static const uint32_t wb2_formats[] = { .rotation_cfg = rot_cfg, \ } +#define _VIG_SBLK_NOSCALE() \ + { \ + .maxdwnscale = SSPP_UNITY_SCALE, \ + .maxupscale = SSPP_UNITY_SCALE, \ + .format_list = plane_formats_yuv, \ + .num_formats = ARRAY_SIZE(plane_formats_yuv), \ + .virt_format_list = plane_formats, \ + .virt_num_formats = ARRAY_SIZE(plane_formats), \ + } + #define _DMA_SBLK() \ { \ .maxdwnscale = SSPP_UNITY_SCALE, \ @@ -294,98 +304,41 @@ static const uint32_t wb2_formats[] = { .virt_num_formats = ARRAY_SIZE(plane_formats), \ } -static const struct dpu_sspp_sub_blks msm8998_vig_sblk_0 = - _VIG_SBLK(SSPP_SCALER_VER(1, 2)); -static const struct dpu_sspp_sub_blks msm8998_vig_sblk_1 = - _VIG_SBLK(SSPP_SCALER_VER(1, 2)); -static const struct dpu_sspp_sub_blks msm8998_vig_sblk_2 = - _VIG_SBLK(SSPP_SCALER_VER(1, 2)); -static const struct dpu_sspp_sub_blks msm8998_vig_sblk_3 = - _VIG_SBLK(SSPP_SCALER_VER(1, 2)); - static const struct dpu_rotation_cfg dpu_rot_sc7280_cfg_v2 = { .rot_maxheight = 1088, .rot_num_formats = ARRAY_SIZE(rotation_v2_formats), .rot_format_list = rotation_v2_formats, }; -static const struct dpu_sspp_sub_blks sdm845_vig_sblk_0 = - _VIG_SBLK(SSPP_SCALER_VER(1, 3)); -static const struct dpu_sspp_sub_blks sdm845_vig_sblk_1 = - _VIG_SBLK(SSPP_SCALER_VER(1, 3)); -static const struct dpu_sspp_sub_blks sdm845_vig_sblk_2 = - _VIG_SBLK(SSPP_SCALER_VER(1, 3)); -static const struct dpu_sspp_sub_blks sdm845_vig_sblk_3 = +static const struct dpu_sspp_sub_blks dpu_vig_sblk_noscale = + _VIG_SBLK_NOSCALE(); + +static const struct dpu_sspp_sub_blks dpu_vig_sblk_qseed3_1_2 = + _VIG_SBLK(SSPP_SCALER_VER(1, 2)); + +static const struct dpu_sspp_sub_blks dpu_vig_sblk_qseed3_1_3 = _VIG_SBLK(SSPP_SCALER_VER(1, 3)); -static const struct dpu_sspp_sub_blks sm8150_vig_sblk_0 = - _VIG_SBLK(SSPP_SCALER_VER(1, 4)); -static const struct dpu_sspp_sub_blks sm8150_vig_sblk_1 = - _VIG_SBLK(SSPP_SCALER_VER(1, 4)); -static const struct dpu_sspp_sub_blks sm8150_vig_sblk_2 = - _VIG_SBLK(SSPP_SCALER_VER(1, 4)); -static const struct dpu_sspp_sub_blks sm8150_vig_sblk_3 = +static const struct dpu_sspp_sub_blks dpu_vig_sblk_qseed3_1_4 = _VIG_SBLK(SSPP_SCALER_VER(1, 4)); -static const struct dpu_sspp_sub_blks sdm845_dma_sblk_0 = _DMA_SBLK(); -static const struct dpu_sspp_sub_blks sdm845_dma_sblk_1 = _DMA_SBLK(); -static const struct dpu_sspp_sub_blks sdm845_dma_sblk_2 = _DMA_SBLK(); -static const struct dpu_sspp_sub_blks sdm845_dma_sblk_3 = _DMA_SBLK(); +static const struct dpu_sspp_sub_blks dpu_vig_sblk_qseed3_2_4 = + _VIG_SBLK(SSPP_SCALER_VER(2, 4)); -static const struct dpu_sspp_sub_blks sc7180_vig_sblk_0 = +static const struct dpu_sspp_sub_blks dpu_vig_sblk_qseed3_3_0 = _VIG_SBLK(SSPP_SCALER_VER(3, 0)); -static const struct dpu_sspp_sub_blks sc7280_vig_sblk_0 = +static const struct dpu_sspp_sub_blks dpu_vig_sblk_qseed3_3_0_rot_v2 = _VIG_SBLK_ROT(SSPP_SCALER_VER(3, 0), &dpu_rot_sc7280_cfg_v2); -static const struct dpu_sspp_sub_blks sm6115_vig_sblk_0 = - _VIG_SBLK(SSPP_SCALER_VER(3, 0)); - -static const struct dpu_sspp_sub_blks sm6125_vig_sblk_0 = - _VIG_SBLK(SSPP_SCALER_VER(2, 4)); - -static const struct dpu_sspp_sub_blks sm8250_vig_sblk_0 = - _VIG_SBLK(SSPP_SCALER_VER(3, 0)); -static const struct dpu_sspp_sub_blks sm8250_vig_sblk_1 = - _VIG_SBLK(SSPP_SCALER_VER(3, 0)); -static const struct dpu_sspp_sub_blks sm8250_vig_sblk_2 = - _VIG_SBLK(SSPP_SCALER_VER(3, 0)); -static const struct dpu_sspp_sub_blks sm8250_vig_sblk_3 = - _VIG_SBLK(SSPP_SCALER_VER(3, 0)); - -static const struct dpu_sspp_sub_blks sm8450_vig_sblk_0 = - _VIG_SBLK(SSPP_SCALER_VER(3, 1)); -static const struct dpu_sspp_sub_blks sm8450_vig_sblk_1 = - _VIG_SBLK(SSPP_SCALER_VER(3, 1)); -static const struct dpu_sspp_sub_blks sm8450_vig_sblk_2 = - _VIG_SBLK(SSPP_SCALER_VER(3, 1)); -static const struct dpu_sspp_sub_blks sm8450_vig_sblk_3 = +static const struct dpu_sspp_sub_blks dpu_vig_sblk_qseed3_3_1 = _VIG_SBLK(SSPP_SCALER_VER(3, 1)); -static const struct dpu_sspp_sub_blks sm8550_vig_sblk_0 = - _VIG_SBLK(SSPP_SCALER_VER(3, 2)); -static const struct dpu_sspp_sub_blks sm8550_vig_sblk_1 = +static const struct dpu_sspp_sub_blks dpu_vig_sblk_qseed3_3_2 = _VIG_SBLK(SSPP_SCALER_VER(3, 2)); -static const struct dpu_sspp_sub_blks sm8550_vig_sblk_2 = - _VIG_SBLK(SSPP_SCALER_VER(3, 2)); -static const struct dpu_sspp_sub_blks sm8550_vig_sblk_3 = - _VIG_SBLK(SSPP_SCALER_VER(3, 2)); -static const struct dpu_sspp_sub_blks sm8550_dma_sblk_4 = _DMA_SBLK(); -static const struct dpu_sspp_sub_blks sm8550_dma_sblk_5 = _DMA_SBLK(); - -#define _VIG_SBLK_NOSCALE() \ - { \ - .maxdwnscale = SSPP_UNITY_SCALE, \ - .maxupscale = SSPP_UNITY_SCALE, \ - .format_list = plane_formats_yuv, \ - .num_formats = ARRAY_SIZE(plane_formats_yuv), \ - .virt_format_list = plane_formats, \ - .virt_num_formats = ARRAY_SIZE(plane_formats), \ - } -static const struct dpu_sspp_sub_blks qcm2290_vig_sblk_0 = _VIG_SBLK_NOSCALE(); -static const struct dpu_sspp_sub_blks qcm2290_dma_sblk_0 = _DMA_SBLK(); +static const struct dpu_sspp_sub_blks dpu_dma_sblk = _DMA_SBLK(); /************************************************************* * MIXER sub blocks config -- cgit v1.2.3 From aa83fa5bf6c78f77873954e757a2fd2dd1018c30 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 2 Dec 2023 01:40:31 +0200 Subject: drm/msm/dpu: drop DPU_HW_SUBBLK_INFO macro As the subblock info is now mostly gone, inline and drop the macro DPU_HW_SUBBLK_INFO. Reviewed-by: Abhinav Kumar Signed-off-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/570106/ Link: https://lore.kernel.org/r/20231201234234.2065610-8-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h | 40 ++++++++++++++------------ 1 file changed, 21 insertions(+), 19 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h index b2a9b2cf2c05..f9586ddbafda 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h @@ -248,49 +248,51 @@ enum { u32 len; \ unsigned long features -/** - * MACRO DPU_HW_SUBBLK_INFO - information of HW sub-block inside DPU - * @name: string name for debug purposes - * @base: offset of this sub-block relative to the block - * offset - * @len register block length of this sub-block - */ -#define DPU_HW_SUBBLK_INFO \ - char name[DPU_HW_BLK_NAME_LEN]; \ - u32 base; \ - u32 len - /** * struct dpu_scaler_blk: Scaler information - * @info: HW register and features supported by this sub-blk + * @name: string name for debug purposes + * @base: offset of this sub-block relative to the block offset + * @len: register block length of this sub-block * @version: qseed block revision, on QSEED3+ platforms this is the value of * scaler_blk.base + QSEED3_HW_VERSION registers. */ struct dpu_scaler_blk { - DPU_HW_SUBBLK_INFO; + char name[DPU_HW_BLK_NAME_LEN]; + u32 base; + u32 len; u32 version; }; struct dpu_csc_blk { - DPU_HW_SUBBLK_INFO; + char name[DPU_HW_BLK_NAME_LEN]; + u32 base; + u32 len; }; /** * struct dpu_pp_blk : Pixel processing sub-blk information - * @info: HW register and features supported by this sub-blk + * @name: string name for debug purposes + * @base: offset of this sub-block relative to the block offset + * @len: register block length of this sub-block * @version: HW Algorithm version */ struct dpu_pp_blk { - DPU_HW_SUBBLK_INFO; + char name[DPU_HW_BLK_NAME_LEN]; + u32 base; + u32 len; u32 version; }; /** * struct dpu_dsc_blk - DSC Encoder sub-blk information - * @info: HW register and features supported by this sub-blk + * @name: string name for debug purposes + * @base: offset of this sub-block relative to the block offset + * @len: register block length of this sub-block */ struct dpu_dsc_blk { - DPU_HW_SUBBLK_INFO; + char name[DPU_HW_BLK_NAME_LEN]; + u32 base; + u32 len; }; /** -- cgit v1.2.3 From 2b98aa1d65581a34919ac159cbdc9c2a1b37a258 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 2 Dec 2023 01:40:32 +0200 Subject: drm/msm/dpu: rewrite scaler and CSC presense checks In order to check whether the SSPP block has scaler and CSC subblocks the funcion dpu_plane_atomic_check_pipe() uses macros which enumerate all possible scaler and CSC features. Replace those checks with the scaler and CSC subblock length checks in order to be able to drop those two macros. Suggested-by: Abhinav Kumar Reviewed-by: Abhinav Kumar Signed-off-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/570113/ Link: https://lore.kernel.org/r/20231201234234.2065610-9-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h | 15 --------------- drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c | 4 ++-- 2 files changed, 2 insertions(+), 17 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h index b094ea23ad32..28e7d53bd191 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h @@ -21,21 +21,6 @@ struct dpu_hw_sspp; #define DPU_SSPP_ROT_90 BIT(3) #define DPU_SSPP_SOLID_FILL BIT(4) -/** - * Define all scaler feature bits in catalog - */ -#define DPU_SSPP_SCALER (BIT(DPU_SSPP_SCALER_RGB) | \ - BIT(DPU_SSPP_SCALER_QSEED2) | \ - BIT(DPU_SSPP_SCALER_QSEED3) | \ - BIT(DPU_SSPP_SCALER_QSEED3LITE) | \ - BIT(DPU_SSPP_SCALER_QSEED4)) - -/* - * Define all CSC feature bits in catalog - */ -#define DPU_SSPP_CSC_ANY (BIT(DPU_SSPP_CSC) | \ - BIT(DPU_SSPP_CSC_10BIT)) - /** * Component indices */ diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c index 1e0da38c6f2a..93365332bdac 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c @@ -774,8 +774,8 @@ static int dpu_plane_atomic_check_pipe(struct dpu_plane *pdpu, min_src_size = DPU_FORMAT_IS_YUV(fmt) ? 2 : 1; if (DPU_FORMAT_IS_YUV(fmt) && - (!(pipe->sspp->cap->features & DPU_SSPP_SCALER) || - !(pipe->sspp->cap->features & DPU_SSPP_CSC_ANY))) { + (!pipe->sspp->cap->sblk->scaler_blk.len || + !pipe->sspp->cap->sblk->csc_blk.len)) { DPU_DEBUG_PLANE(pdpu, "plane doesn't have scaler/csc for yuv\n"); return -EINVAL; -- cgit v1.2.3 From 193838acc1111a7001c9fc99af0a248f284ea79d Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 2 Dec 2023 01:40:33 +0200 Subject: drm/msm/dpu: merge DPU_SSPP_SCALER_QSEED3, QSEED3LITE, QSEED4 Three different features, DPU_SSPP_SCALER_QSEED3, QSEED3LITE and QSEED4 are all related to different versions of the same HW scaling block. Corresponding driver parts use scaler_blk.version to identify the correct way to program the hardware. In order to simplify the driver codepath, merge these three feature bits into QSEED3_COMPATIBLE bin. Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/570114/ Link: https://lore.kernel.org/r/20231201234234.2065610-10-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c | 8 ++++---- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h | 8 ++------ drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c | 9 ++------- drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c | 3 +-- 4 files changed, 9 insertions(+), 19 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c index b8ea33d0f364..c24817b0e4e5 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c @@ -22,19 +22,19 @@ BIT(DPU_SSPP_CSC_10BIT)) #define VIG_MSM8998_MASK \ - (VIG_MASK | BIT(DPU_SSPP_SCALER_QSEED3)) + (VIG_MASK | BIT(DPU_SSPP_SCALER_QSEED3_COMPATIBLE)) #define VIG_SDM845_MASK \ - (VIG_MASK | BIT(DPU_SSPP_QOS_8LVL) | BIT(DPU_SSPP_SCALER_QSEED3)) + (VIG_MASK | BIT(DPU_SSPP_QOS_8LVL) | BIT(DPU_SSPP_SCALER_QSEED3_COMPATIBLE)) #define VIG_SDM845_MASK_SDMA \ (VIG_SDM845_MASK | BIT(DPU_SSPP_SMART_DMA_V2)) #define VIG_SC7180_MASK \ - (VIG_MASK | BIT(DPU_SSPP_QOS_8LVL) | BIT(DPU_SSPP_SCALER_QSEED4)) + (VIG_MASK | BIT(DPU_SSPP_QOS_8LVL) | BIT(DPU_SSPP_SCALER_QSEED3_COMPATIBLE)) #define VIG_SM6125_MASK \ - (VIG_MASK | BIT(DPU_SSPP_QOS_8LVL) | BIT(DPU_SSPP_SCALER_QSEED3LITE)) + (VIG_MASK | BIT(DPU_SSPP_QOS_8LVL) | BIT(DPU_SSPP_SCALER_QSEED3_COMPATIBLE)) #define VIG_SC7180_MASK_SDMA \ (VIG_SC7180_MASK | BIT(DPU_SSPP_SMART_DMA_V2)) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h index f9586ddbafda..d6b9000b63b0 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h @@ -51,9 +51,7 @@ enum { /** * SSPP sub-blocks/features * @DPU_SSPP_SCALER_QSEED2, QSEED2 algorithm support - * @DPU_SSPP_SCALER_QSEED3, QSEED3 alogorithm support - * @DPU_SSPP_SCALER_QSEED3LITE, QSEED3 Lite alogorithm support - * @DPU_SSPP_SCALER_QSEED4, QSEED4 algorithm support + * @DPU_SSPP_SCALER_QSEED3_COMPATIBLE, QSEED3-compatible alogorithm support (includes QSEED3, QSEED3LITE and QSEED4) * @DPU_SSPP_SCALER_RGB, RGB Scaler, supported by RGB pipes * @DPU_SSPP_CSC, Support of Color space converion * @DPU_SSPP_CSC_10BIT, Support of 10-bit Color space conversion @@ -71,9 +69,7 @@ enum { */ enum { DPU_SSPP_SCALER_QSEED2 = 0x1, - DPU_SSPP_SCALER_QSEED3, - DPU_SSPP_SCALER_QSEED3LITE, - DPU_SSPP_SCALER_QSEED4, + DPU_SSPP_SCALER_QSEED3_COMPATIBLE, DPU_SSPP_SCALER_RGB, DPU_SSPP_CSC, DPU_SSPP_CSC_10BIT, diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c index b408d456c123..26307d9fc81d 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c @@ -605,9 +605,7 @@ static void _setup_layer_ops(struct dpu_hw_sspp *c, test_bit(DPU_SSPP_SMART_DMA_V2, &c->cap->features)) c->ops.setup_multirect = dpu_hw_sspp_setup_multirect; - if (test_bit(DPU_SSPP_SCALER_QSEED3, &features) || - test_bit(DPU_SSPP_SCALER_QSEED3LITE, &features) || - test_bit(DPU_SSPP_SCALER_QSEED4, &features)) + if (test_bit(DPU_SSPP_SCALER_QSEED3_COMPATIBLE, &features)) c->ops.setup_scaler = _dpu_hw_sspp_setup_scaler3; if (test_bit(DPU_SSPP_CDP, &features)) @@ -643,10 +641,7 @@ int _dpu_hw_sspp_init_debugfs(struct dpu_hw_sspp *hw_pipe, struct dpu_kms *kms, cfg->len, kms); - if (cfg->features & BIT(DPU_SSPP_SCALER_QSEED3) || - cfg->features & BIT(DPU_SSPP_SCALER_QSEED3LITE) || - cfg->features & BIT(DPU_SSPP_SCALER_QSEED2) || - cfg->features & BIT(DPU_SSPP_SCALER_QSEED4)) + if (sblk->scaler_blk.len) dpu_debugfs_create_regset32("scaler_blk", 0400, debugfs_root, sblk->scaler_blk.base + cfg->base, diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c index 93365332bdac..4d7f3894f4d7 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c @@ -470,8 +470,7 @@ static void _dpu_plane_setup_scaler3(struct dpu_hw_sspp *pipe_hw, scale_cfg->src_height[i] /= chroma_subsmpl_v; } - if (pipe_hw->cap->features & - BIT(DPU_SSPP_SCALER_QSEED4)) { + if (pipe_hw->cap->sblk->scaler_blk.version >= 0x3000) { scale_cfg->preload_x[i] = DPU_QSEED4_DEFAULT_PRELOAD_H; scale_cfg->preload_y[i] = DPU_QSEED4_DEFAULT_PRELOAD_V; } else { -- cgit v1.2.3 From 223fb06fbc26e42f9cdf0ca80fb575916548d74b Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 2 Dec 2023 01:40:34 +0200 Subject: drm/msm/gpu: drop duplicating VIG feature masks After folding QSEED3LITE and QSEED4 feature bits into QSEED3_COMPATIBLE several VIG feature masks became equal. Drop these duplicates. Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/570107/ Link: https://lore.kernel.org/r/20231201234234.2065610-11-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_4_sm6125.h | 2 +- drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h | 8 ++++---- drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h | 2 +- drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_3_sm6115.h | 2 +- drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_4_sm6350.h | 2 +- drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_9_sm6375.h | 2 +- drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h | 8 ++++---- drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h | 8 ++++---- drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h | 8 ++++---- drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h | 8 ++++---- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c | 11 +---------- 11 files changed, 26 insertions(+), 35 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_4_sm6125.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_4_sm6125.h index 3969f2925d89..76b2ec0d2489 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_4_sm6125.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_4_sm6125.h @@ -68,7 +68,7 @@ static const struct dpu_sspp_cfg sm6125_sspp[] = { { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x1f0, - .features = VIG_SM6125_MASK, + .features = VIG_SDM845_MASK, .sblk = &dpu_vig_sblk_qseed3_2_4, .xin_id = 0, .type = SSPP_TYPE_VIG, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h index f751d63845d1..9acf07108899 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h @@ -74,7 +74,7 @@ static const struct dpu_sspp_cfg sm8250_sspp[] = { { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x1f8, - .features = VIG_SC7180_MASK_SDMA, + .features = VIG_SDM845_MASK_SDMA, .sblk = &dpu_vig_sblk_qseed3_3_0, .xin_id = 0, .type = SSPP_TYPE_VIG, @@ -82,7 +82,7 @@ static const struct dpu_sspp_cfg sm8250_sspp[] = { }, { .name = "sspp_1", .id = SSPP_VIG1, .base = 0x6000, .len = 0x1f8, - .features = VIG_SC7180_MASK_SDMA, + .features = VIG_SDM845_MASK_SDMA, .sblk = &dpu_vig_sblk_qseed3_3_0, .xin_id = 4, .type = SSPP_TYPE_VIG, @@ -90,7 +90,7 @@ static const struct dpu_sspp_cfg sm8250_sspp[] = { }, { .name = "sspp_2", .id = SSPP_VIG2, .base = 0x8000, .len = 0x1f8, - .features = VIG_SC7180_MASK_SDMA, + .features = VIG_SDM845_MASK_SDMA, .sblk = &dpu_vig_sblk_qseed3_3_0, .xin_id = 8, .type = SSPP_TYPE_VIG, @@ -98,7 +98,7 @@ static const struct dpu_sspp_cfg sm8250_sspp[] = { }, { .name = "sspp_3", .id = SSPP_VIG3, .base = 0xa000, .len = 0x1f8, - .features = VIG_SC7180_MASK_SDMA, + .features = VIG_SDM845_MASK_SDMA, .sblk = &dpu_vig_sblk_qseed3_3_0, .xin_id = 12, .type = SSPP_TYPE_VIG, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h index 7627f16c5b2d..783269c6ead1 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h @@ -51,7 +51,7 @@ static const struct dpu_sspp_cfg sc7180_sspp[] = { { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x1f8, - .features = VIG_SC7180_MASK, + .features = VIG_SDM845_MASK, .sblk = &dpu_vig_sblk_qseed3_3_0, .xin_id = 0, .type = SSPP_TYPE_VIG, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_3_sm6115.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_3_sm6115.h index 7aefdb7eb494..43f64a005f5a 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_3_sm6115.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_3_sm6115.h @@ -38,7 +38,7 @@ static const struct dpu_sspp_cfg sm6115_sspp[] = { { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x1f8, - .features = VIG_SC7180_MASK, + .features = VIG_SDM845_MASK, .sblk = &dpu_vig_sblk_qseed3_3_0, .xin_id = 0, .type = SSPP_TYPE_VIG, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_4_sm6350.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_4_sm6350.h index 99df9816f171..e17a30be7525 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_4_sm6350.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_4_sm6350.h @@ -58,7 +58,7 @@ static const struct dpu_sspp_cfg sm6350_sspp[] = { { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x1f8, - .features = VIG_SC7180_MASK, + .features = VIG_SDM845_MASK, .sblk = &dpu_vig_sblk_qseed3_3_0, .xin_id = 0, .type = SSPP_TYPE_VIG, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_9_sm6375.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_9_sm6375.h index 2de304d97c9b..a06c8634d2d7 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_9_sm6375.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_9_sm6375.h @@ -39,7 +39,7 @@ static const struct dpu_sspp_cfg sm6375_sspp[] = { { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x1f8, - .features = VIG_SC7180_MASK, + .features = VIG_SDM845_MASK, .sblk = &dpu_vig_sblk_qseed3_3_0, .xin_id = 0, .type = SSPP_TYPE_VIG, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h index ad6eb28ead49..bed87250e68c 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h @@ -73,7 +73,7 @@ static const struct dpu_sspp_cfg sm8350_sspp[] = { { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x1f8, - .features = VIG_SC7180_MASK_SDMA, + .features = VIG_SDM845_MASK_SDMA, .sblk = &dpu_vig_sblk_qseed3_3_0, .xin_id = 0, .type = SSPP_TYPE_VIG, @@ -81,7 +81,7 @@ static const struct dpu_sspp_cfg sm8350_sspp[] = { }, { .name = "sspp_1", .id = SSPP_VIG1, .base = 0x6000, .len = 0x1f8, - .features = VIG_SC7180_MASK_SDMA, + .features = VIG_SDM845_MASK_SDMA, .sblk = &dpu_vig_sblk_qseed3_3_0, .xin_id = 4, .type = SSPP_TYPE_VIG, @@ -89,7 +89,7 @@ static const struct dpu_sspp_cfg sm8350_sspp[] = { }, { .name = "sspp_2", .id = SSPP_VIG2, .base = 0x8000, .len = 0x1f8, - .features = VIG_SC7180_MASK_SDMA, + .features = VIG_SDM845_MASK_SDMA, .sblk = &dpu_vig_sblk_qseed3_3_0, .xin_id = 8, .type = SSPP_TYPE_VIG, @@ -97,7 +97,7 @@ static const struct dpu_sspp_cfg sm8350_sspp[] = { }, { .name = "sspp_3", .id = SSPP_VIG3, .base = 0xa000, .len = 0x1f8, - .features = VIG_SC7180_MASK_SDMA, + .features = VIG_SDM845_MASK_SDMA, .sblk = &dpu_vig_sblk_qseed3_3_0, .xin_id = 12, .type = SSPP_TYPE_VIG, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h index c2f1acd43524..8b5c5031e2d9 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h @@ -74,7 +74,7 @@ static const struct dpu_sspp_cfg sc8280xp_sspp[] = { { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x2ac, - .features = VIG_SC7180_MASK, + .features = VIG_SDM845_MASK, .sblk = &dpu_vig_sblk_qseed3_3_0, .xin_id = 0, .type = SSPP_TYPE_VIG, @@ -82,7 +82,7 @@ static const struct dpu_sspp_cfg sc8280xp_sspp[] = { }, { .name = "sspp_1", .id = SSPP_VIG1, .base = 0x6000, .len = 0x2ac, - .features = VIG_SC7180_MASK, + .features = VIG_SDM845_MASK, .sblk = &dpu_vig_sblk_qseed3_3_0, .xin_id = 4, .type = SSPP_TYPE_VIG, @@ -90,7 +90,7 @@ static const struct dpu_sspp_cfg sc8280xp_sspp[] = { }, { .name = "sspp_2", .id = SSPP_VIG2, .base = 0x8000, .len = 0x2ac, - .features = VIG_SC7180_MASK, + .features = VIG_SDM845_MASK, .sblk = &dpu_vig_sblk_qseed3_3_0, .xin_id = 8, .type = SSPP_TYPE_VIG, @@ -98,7 +98,7 @@ static const struct dpu_sspp_cfg sc8280xp_sspp[] = { }, { .name = "sspp_3", .id = SSPP_VIG3, .base = 0xa000, .len = 0x2ac, - .features = VIG_SC7180_MASK, + .features = VIG_SDM845_MASK, .sblk = &dpu_vig_sblk_qseed3_3_0, .xin_id = 12, .type = SSPP_TYPE_VIG, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h index 0559ef780661..7a647e1f729d 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h @@ -74,7 +74,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = { { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x32c, - .features = VIG_SC7180_MASK_SDMA, + .features = VIG_SDM845_MASK_SDMA, .sblk = &dpu_vig_sblk_qseed3_3_1, .xin_id = 0, .type = SSPP_TYPE_VIG, @@ -82,7 +82,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = { }, { .name = "sspp_1", .id = SSPP_VIG1, .base = 0x6000, .len = 0x32c, - .features = VIG_SC7180_MASK_SDMA, + .features = VIG_SDM845_MASK_SDMA, .sblk = &dpu_vig_sblk_qseed3_3_1, .xin_id = 4, .type = SSPP_TYPE_VIG, @@ -90,7 +90,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = { }, { .name = "sspp_2", .id = SSPP_VIG2, .base = 0x8000, .len = 0x32c, - .features = VIG_SC7180_MASK_SDMA, + .features = VIG_SDM845_MASK_SDMA, .sblk = &dpu_vig_sblk_qseed3_3_1, .xin_id = 8, .type = SSPP_TYPE_VIG, @@ -98,7 +98,7 @@ static const struct dpu_sspp_cfg sm8450_sspp[] = { }, { .name = "sspp_3", .id = SSPP_VIG3, .base = 0xa000, .len = 0x32c, - .features = VIG_SC7180_MASK_SDMA, + .features = VIG_SDM845_MASK_SDMA, .sblk = &dpu_vig_sblk_qseed3_3_1, .xin_id = 12, .type = SSPP_TYPE_VIG, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h index f393d42317af..bf56265967c0 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h @@ -66,28 +66,28 @@ static const struct dpu_sspp_cfg sm8550_sspp[] = { { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x344, - .features = VIG_SC7180_MASK, + .features = VIG_SDM845_MASK, .sblk = &dpu_vig_sblk_qseed3_3_2, .xin_id = 0, .type = SSPP_TYPE_VIG, }, { .name = "sspp_1", .id = SSPP_VIG1, .base = 0x6000, .len = 0x344, - .features = VIG_SC7180_MASK, + .features = VIG_SDM845_MASK, .sblk = &dpu_vig_sblk_qseed3_3_2, .xin_id = 4, .type = SSPP_TYPE_VIG, }, { .name = "sspp_2", .id = SSPP_VIG2, .base = 0x8000, .len = 0x344, - .features = VIG_SC7180_MASK, + .features = VIG_SDM845_MASK, .sblk = &dpu_vig_sblk_qseed3_3_2, .xin_id = 8, .type = SSPP_TYPE_VIG, }, { .name = "sspp_3", .id = SSPP_VIG3, .base = 0xa000, .len = 0x344, - .features = VIG_SC7180_MASK, + .features = VIG_SDM845_MASK, .sblk = &dpu_vig_sblk_qseed3_3_2, .xin_id = 12, .type = SSPP_TYPE_VIG, diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c index c24817b0e4e5..ec10f68cf0b2 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c @@ -30,15 +30,6 @@ #define VIG_SDM845_MASK_SDMA \ (VIG_SDM845_MASK | BIT(DPU_SSPP_SMART_DMA_V2)) -#define VIG_SC7180_MASK \ - (VIG_MASK | BIT(DPU_SSPP_QOS_8LVL) | BIT(DPU_SSPP_SCALER_QSEED3_COMPATIBLE)) - -#define VIG_SM6125_MASK \ - (VIG_MASK | BIT(DPU_SSPP_QOS_8LVL) | BIT(DPU_SSPP_SCALER_QSEED3_COMPATIBLE)) - -#define VIG_SC7180_MASK_SDMA \ - (VIG_SC7180_MASK | BIT(DPU_SSPP_SMART_DMA_V2)) - #define VIG_QCM2290_MASK (VIG_BASE_MASK | BIT(DPU_SSPP_QOS_8LVL)) #define DMA_MSM8998_MASK \ @@ -47,7 +38,7 @@ BIT(DPU_SSPP_CDP) | BIT(DPU_SSPP_EXCL_RECT)) #define VIG_SC7280_MASK \ - (VIG_SC7180_MASK | BIT(DPU_SSPP_INLINE_ROTATION)) + (VIG_SDM845_MASK | BIT(DPU_SSPP_INLINE_ROTATION)) #define VIG_SC7280_MASK_SDMA \ (VIG_SC7280_MASK | BIT(DPU_SSPP_SMART_DMA_V2)) -- cgit v1.2.3 From 3c13a56e435348ffc094ff5a3b3133311673390e Mon Sep 17 00:00:00 2001 From: Richard Acayan Date: Mon, 16 Oct 2023 22:18:11 -0400 Subject: drm/msm: mdss: add support for SDM670 Add support for the MDSS block on the SDM670 platform. Reviewed-by: Dmitry Baryshkov Signed-off-by: Richard Acayan Patchwork: https://patchwork.freedesktop.org/patch/562963/ Link: https://lore.kernel.org/r/20231017021805.1083350-13-mailingradian@gmail.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/msm_mdss.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c index 29bb38f0bb2c..6f390c850577 100644 --- a/drivers/gpu/drm/msm/msm_mdss.c +++ b/drivers/gpu/drm/msm/msm_mdss.c @@ -549,6 +549,12 @@ static const struct msm_mdss_data sc8280xp_data = { .macrotile_mode = 1, }; +static const struct msm_mdss_data sdm670_data = { + .ubwc_enc_version = UBWC_2_0, + .ubwc_dec_version = UBWC_2_0, + .highest_bank_bit = 1, +}; + static const struct msm_mdss_data sdm845_data = { .ubwc_enc_version = UBWC_2_0, .ubwc_dec_version = UBWC_2_0, @@ -607,6 +613,7 @@ static const struct of_device_id mdss_dt_match[] = { { .compatible = "qcom,mdss" }, { .compatible = "qcom,msm8998-mdss", .data = &msm8998_data }, { .compatible = "qcom,qcm2290-mdss", .data = &qcm2290_data }, + { .compatible = "qcom,sdm670-mdss", .data = &sdm670_data }, { .compatible = "qcom,sdm845-mdss", .data = &sdm845_data }, { .compatible = "qcom,sc7180-mdss", .data = &sc7180_data }, { .compatible = "qcom,sc7280-mdss", .data = &sc7280_data }, -- cgit v1.2.3 From e140b7e496b73c116454b0a0265bec4cacc9ca40 Mon Sep 17 00:00:00 2001 From: Richard Acayan Date: Mon, 16 Oct 2023 22:18:12 -0400 Subject: drm/msm/dpu: Add hw revision 4.1 (SDM670) The Snapdragon 670 uses similar clocks (with one frequency added) to the Snapdragon 845 but reports DPU revision 4.1. Add support for this DPU with configuration from the Pixel 3a downstream kernel. Since revision 4.0 is SDM845, reuse some configuration from its catalog entry. Link: https://android.googlesource.com/kernel/msm/+/368478b0ae76566927a2769a2bf24dfe7f38bb78/arch/arm64/boot/dts/qcom/sdm670-sde.dtsi Signed-off-by: Richard Acayan Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/562965/ Link: https://lore.kernel.org/r/20231017021805.1083350-14-mailingradian@gmail.com Signed-off-by: Dmitry Baryshkov --- .../gpu/drm/msm/disp/dpu1/catalog/dpu_4_1_sdm670.h | 104 +++++++++++++++++++++ drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c | 1 + drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h | 1 + drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c | 1 + 4 files changed, 107 insertions(+) create mode 100644 drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_1_sdm670.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_1_sdm670.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_1_sdm670.h new file mode 100644 index 000000000000..cbbdaebe357e --- /dev/null +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_4_1_sdm670.h @@ -0,0 +1,104 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2015-2018, The Linux Foundation. All rights reserved. + * Copyright (c) 2022. Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2023, Richard Acayan. All rights reserved. + */ + +#ifndef _DPU_4_1_SDM670_H +#define _DPU_4_1_SDM670_H + +static const struct dpu_mdp_cfg sdm670_mdp = { + .name = "top_0", + .base = 0x0, .len = 0x45c, + .features = BIT(DPU_MDP_AUDIO_SELECT), + .clk_ctrls = { + [DPU_CLK_CTRL_VIG0] = { .reg_off = 0x2ac, .bit_off = 0 }, + [DPU_CLK_CTRL_VIG1] = { .reg_off = 0x2b4, .bit_off = 0 }, + [DPU_CLK_CTRL_DMA0] = { .reg_off = 0x2ac, .bit_off = 8 }, + [DPU_CLK_CTRL_DMA1] = { .reg_off = 0x2b4, .bit_off = 8 }, + [DPU_CLK_CTRL_DMA2] = { .reg_off = 0x2bc, .bit_off = 8 }, + }, +}; + +static const struct dpu_sspp_cfg sdm670_sspp[] = { + { + .name = "sspp_0", .id = SSPP_VIG0, + .base = 0x4000, .len = 0x1c8, + .features = VIG_SDM845_MASK_SDMA, + .sblk = &dpu_vig_sblk_qseed3_1_3, + .xin_id = 0, + .type = SSPP_TYPE_VIG, + .clk_ctrl = DPU_CLK_CTRL_VIG0, + }, { + .name = "sspp_1", .id = SSPP_VIG1, + .base = 0x6000, .len = 0x1c8, + .features = VIG_SDM845_MASK_SDMA, + .sblk = &dpu_vig_sblk_qseed3_1_3, + .xin_id = 4, + .type = SSPP_TYPE_VIG, + .clk_ctrl = DPU_CLK_CTRL_VIG0, + }, { + .name = "sspp_8", .id = SSPP_DMA0, + .base = 0x24000, .len = 0x1c8, + .features = DMA_SDM845_MASK_SDMA, + .sblk = &dpu_dma_sblk, + .xin_id = 1, + .type = SSPP_TYPE_DMA, + .clk_ctrl = DPU_CLK_CTRL_DMA0, + }, { + .name = "sspp_9", .id = SSPP_DMA1, + .base = 0x26000, .len = 0x1c8, + .features = DMA_CURSOR_SDM845_MASK_SDMA, + .sblk = &dpu_dma_sblk, + .xin_id = 5, + .type = SSPP_TYPE_DMA, + .clk_ctrl = DPU_CLK_CTRL_DMA1, + }, { + .name = "sspp_10", .id = SSPP_DMA2, + .base = 0x28000, .len = 0x1c8, + .features = DMA_CURSOR_SDM845_MASK_SDMA, + .sblk = &dpu_dma_sblk, + .xin_id = 9, + .type = SSPP_TYPE_DMA, + .clk_ctrl = DPU_CLK_CTRL_DMA2, + }, +}; + +static const struct dpu_dsc_cfg sdm670_dsc[] = { + { + .name = "dsc_0", .id = DSC_0, + .base = 0x80000, .len = 0x140, + }, { + .name = "dsc_1", .id = DSC_1, + .base = 0x80400, .len = 0x140, + }, +}; + +static const struct dpu_mdss_version sdm670_mdss_ver = { + .core_major_ver = 4, + .core_minor_ver = 1, +}; + +const struct dpu_mdss_cfg dpu_sdm670_cfg = { + .mdss_ver = &sdm670_mdss_ver, + .caps = &sdm845_dpu_caps, + .mdp = &sdm670_mdp, + .ctl_count = ARRAY_SIZE(sdm845_ctl), + .ctl = sdm845_ctl, + .sspp_count = ARRAY_SIZE(sdm670_sspp), + .sspp = sdm670_sspp, + .mixer_count = ARRAY_SIZE(sdm845_lm), + .mixer = sdm845_lm, + .pingpong_count = ARRAY_SIZE(sdm845_pp), + .pingpong = sdm845_pp, + .dsc_count = ARRAY_SIZE(sdm670_dsc), + .dsc = sdm670_dsc, + .intf_count = ARRAY_SIZE(sdm845_intf), + .intf = sdm845_intf, + .vbif_count = ARRAY_SIZE(sdm845_vbif), + .vbif = sdm845_vbif, + .perf = &sdm845_perf_data, +}; + +#endif diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c index ec10f68cf0b2..21aaa7cb5acd 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c @@ -613,6 +613,7 @@ static const struct dpu_qos_lut_entry sc7180_qos_nrt[] = { #include "catalog/dpu_3_0_msm8998.h" #include "catalog/dpu_4_0_sdm845.h" +#include "catalog/dpu_4_1_sdm670.h" #include "catalog/dpu_5_0_sm8150.h" #include "catalog/dpu_5_1_sc8180x.h" diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h index d6b9000b63b0..476214905e76 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h @@ -820,6 +820,7 @@ struct dpu_mdss_cfg { extern const struct dpu_mdss_cfg dpu_msm8998_cfg; extern const struct dpu_mdss_cfg dpu_sdm845_cfg; +extern const struct dpu_mdss_cfg dpu_sdm670_cfg; extern const struct dpu_mdss_cfg dpu_sm8150_cfg; extern const struct dpu_mdss_cfg dpu_sc8180x_cfg; extern const struct dpu_mdss_cfg dpu_sm8250_cfg; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c index 9b2efd7acc87..4426033252ca 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c @@ -1334,6 +1334,7 @@ static const struct dev_pm_ops dpu_pm_ops = { static const struct of_device_id dpu_dt_match[] = { { .compatible = "qcom,msm8998-dpu", .data = &dpu_msm8998_cfg, }, { .compatible = "qcom,qcm2290-dpu", .data = &dpu_qcm2290_cfg, }, + { .compatible = "qcom,sdm670-dpu", .data = &dpu_sdm670_cfg, }, { .compatible = "qcom,sdm845-dpu", .data = &dpu_sdm845_cfg, }, { .compatible = "qcom,sc7180-dpu", .data = &dpu_sc7180_cfg, }, { .compatible = "qcom,sc7280-dpu", .data = &dpu_sc7280_cfg, }, -- cgit v1.2.3 From b94747f7d8c77e1b261afba8b8cdc9a56e35c9a3 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Mon, 30 Oct 2023 11:36:27 +0100 Subject: drm/msm/dpu: add support for SM8650 DPU Add DPU version 10.0 support for the SM8650 platform. Signed-off-by: Neil Armstrong Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/564975/ Link: https://lore.kernel.org/r/20231030-topic-sm8650-upstream-mdss-v2-5-43f1887c82b8@linaro.org Signed-off-by: Dmitry Baryshkov --- .../drm/msm/disp/dpu1/catalog/dpu_10_0_sm8650.h | 457 +++++++++++++++++++++ drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c | 26 ++ drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h | 1 + drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h | 3 + drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c | 1 + 5 files changed, 488 insertions(+) create mode 100644 drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_10_0_sm8650.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_10_0_sm8650.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_10_0_sm8650.h new file mode 100644 index 000000000000..04d2a73dd942 --- /dev/null +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_10_0_sm8650.h @@ -0,0 +1,457 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2022. Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2015-2018, 2020 The Linux Foundation. All rights reserved. + */ + +#ifndef _DPU_10_0_SM8650_H +#define _DPU_10_0_SM8650_H + +static const struct dpu_caps sm8650_dpu_caps = { + .max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH, + .max_mixer_blendstages = 0xb, + .has_src_split = true, + .has_dim_layer = true, + .has_idle_pc = true, + .has_3d_merge = true, + .max_linewidth = 8192, + .pixel_ram_size = DEFAULT_PIXEL_RAM_SIZE, +}; + +static const struct dpu_mdp_cfg sm8650_mdp = { + .name = "top_0", + .base = 0, .len = 0x494, + .features = BIT(DPU_MDP_PERIPH_0_REMOVED), + .clk_ctrls = { + [DPU_CLK_CTRL_REG_DMA] = { .reg_off = 0x2bc, .bit_off = 20 }, + }, +}; + +/* FIXME: get rid of DPU_CTL_SPLIT_DISPLAY in favour of proper ACTIVE_CTL support */ +static const struct dpu_ctl_cfg sm8650_ctl[] = { + { + .name = "ctl_0", .id = CTL_0, + .base = 0x15000, .len = 0x1000, + .features = CTL_SM8550_MASK | BIT(DPU_CTL_SPLIT_DISPLAY), + .intr_start = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 9), + }, { + .name = "ctl_1", .id = CTL_1, + .base = 0x16000, .len = 0x1000, + .features = CTL_SM8550_MASK | BIT(DPU_CTL_SPLIT_DISPLAY), + .intr_start = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 10), + }, { + .name = "ctl_2", .id = CTL_2, + .base = 0x17000, .len = 0x1000, + .features = CTL_SM8550_MASK, + .intr_start = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 11), + }, { + .name = "ctl_3", .id = CTL_3, + .base = 0x18000, .len = 0x1000, + .features = CTL_SM8550_MASK, + .intr_start = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 12), + }, { + .name = "ctl_4", .id = CTL_4, + .base = 0x19000, .len = 0x1000, + .features = CTL_SM8550_MASK, + .intr_start = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 13), + }, { + .name = "ctl_5", .id = CTL_5, + .base = 0x1a000, .len = 0x1000, + .features = CTL_SM8550_MASK, + .intr_start = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 23), + }, +}; + +static const struct dpu_sspp_cfg sm8650_sspp[] = { + { + .name = "sspp_0", .id = SSPP_VIG0, + .base = 0x4000, .len = 0x344, + .features = VIG_SDM845_MASK_SDMA, + .sblk = &dpu_vig_sblk_qseed3_3_3, + .xin_id = 0, + .type = SSPP_TYPE_VIG, + }, { + .name = "sspp_1", .id = SSPP_VIG1, + .base = 0x6000, .len = 0x344, + .features = VIG_SDM845_MASK_SDMA, + .sblk = &dpu_vig_sblk_qseed3_3_3, + .xin_id = 4, + .type = SSPP_TYPE_VIG, + }, { + .name = "sspp_2", .id = SSPP_VIG2, + .base = 0x8000, .len = 0x344, + .features = VIG_SDM845_MASK_SDMA, + .sblk = &dpu_vig_sblk_qseed3_3_3, + .xin_id = 8, + .type = SSPP_TYPE_VIG, + }, { + .name = "sspp_3", .id = SSPP_VIG3, + .base = 0xa000, .len = 0x344, + .features = VIG_SDM845_MASK_SDMA, + .sblk = &dpu_vig_sblk_qseed3_3_3, + .xin_id = 12, + .type = SSPP_TYPE_VIG, + }, { + .name = "sspp_8", .id = SSPP_DMA0, + .base = 0x24000, .len = 0x344, + .features = DMA_SDM845_MASK_SDMA, + .sblk = &dpu_dma_sblk, + .xin_id = 1, + .type = SSPP_TYPE_DMA, + }, { + .name = "sspp_9", .id = SSPP_DMA1, + .base = 0x26000, .len = 0x344, + .features = DMA_SDM845_MASK_SDMA, + .sblk = &dpu_dma_sblk, + .xin_id = 5, + .type = SSPP_TYPE_DMA, + }, { + .name = "sspp_10", .id = SSPP_DMA2, + .base = 0x28000, .len = 0x344, + .features = DMA_SDM845_MASK_SDMA, + .sblk = &dpu_dma_sblk, + .xin_id = 9, + .type = SSPP_TYPE_DMA, + }, { + .name = "sspp_11", .id = SSPP_DMA3, + .base = 0x2a000, .len = 0x344, + .features = DMA_SDM845_MASK_SDMA, + .sblk = &dpu_dma_sblk, + .xin_id = 13, + .type = SSPP_TYPE_DMA, + }, { + .name = "sspp_12", .id = SSPP_DMA4, + .base = 0x2c000, .len = 0x344, + .features = DMA_CURSOR_SDM845_MASK_SDMA, + .sblk = &dpu_dma_sblk, + .xin_id = 14, + .type = SSPP_TYPE_DMA, + }, { + .name = "sspp_13", .id = SSPP_DMA5, + .base = 0x2e000, .len = 0x344, + .features = DMA_CURSOR_SDM845_MASK_SDMA, + .sblk = &dpu_dma_sblk, + .xin_id = 15, + .type = SSPP_TYPE_DMA, + }, +}; + +static const struct dpu_lm_cfg sm8650_lm[] = { + { + .name = "lm_0", .id = LM_0, + .base = 0x44000, .len = 0x400, + .features = MIXER_SDM845_MASK, + .sblk = &sdm845_lm_sblk, + .lm_pair = LM_1, + .pingpong = PINGPONG_0, + .dspp = DSPP_0, + }, { + .name = "lm_1", .id = LM_1, + .base = 0x45000, .len = 0x400, + .features = MIXER_SDM845_MASK, + .sblk = &sdm845_lm_sblk, + .lm_pair = LM_0, + .pingpong = PINGPONG_1, + .dspp = DSPP_1, + }, { + .name = "lm_2", .id = LM_2, + .base = 0x46000, .len = 0x400, + .features = MIXER_SDM845_MASK, + .sblk = &sdm845_lm_sblk, + .lm_pair = LM_3, + .pingpong = PINGPONG_2, + }, { + .name = "lm_3", .id = LM_3, + .base = 0x47000, .len = 0x400, + .features = MIXER_SDM845_MASK, + .sblk = &sdm845_lm_sblk, + .lm_pair = LM_2, + .pingpong = PINGPONG_3, + }, { + .name = "lm_4", .id = LM_4, + .base = 0x48000, .len = 0x400, + .features = MIXER_SDM845_MASK, + .sblk = &sdm845_lm_sblk, + .lm_pair = LM_5, + .pingpong = PINGPONG_4, + }, { + .name = "lm_5", .id = LM_5, + .base = 0x49000, .len = 0x400, + .features = MIXER_SDM845_MASK, + .sblk = &sdm845_lm_sblk, + .lm_pair = LM_4, + .pingpong = PINGPONG_5, + }, +}; + +static const struct dpu_dspp_cfg sm8650_dspp[] = { + { + .name = "dspp_0", .id = DSPP_0, + .base = 0x54000, .len = 0x1800, + .features = DSPP_SC7180_MASK, + .sblk = &sdm845_dspp_sblk, + }, { + .name = "dspp_1", .id = DSPP_1, + .base = 0x56000, .len = 0x1800, + .features = DSPP_SC7180_MASK, + .sblk = &sdm845_dspp_sblk, + }, { + .name = "dspp_2", .id = DSPP_2, + .base = 0x58000, .len = 0x1800, + .features = DSPP_SC7180_MASK, + .sblk = &sdm845_dspp_sblk, + }, { + .name = "dspp_3", .id = DSPP_3, + .base = 0x5a000, .len = 0x1800, + .features = DSPP_SC7180_MASK, + .sblk = &sdm845_dspp_sblk, + }, +}; + +static const struct dpu_pingpong_cfg sm8650_pp[] = { + { + .name = "pingpong_0", .id = PINGPONG_0, + .base = 0x69000, .len = 0, + .features = BIT(DPU_PINGPONG_DITHER), + .sblk = &sc7280_pp_sblk, + .merge_3d = MERGE_3D_0, + .intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 8), + }, { + .name = "pingpong_1", .id = PINGPONG_1, + .base = 0x6a000, .len = 0, + .features = BIT(DPU_PINGPONG_DITHER), + .sblk = &sc7280_pp_sblk, + .merge_3d = MERGE_3D_0, + .intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 9), + }, { + .name = "pingpong_2", .id = PINGPONG_2, + .base = 0x6b000, .len = 0, + .features = BIT(DPU_PINGPONG_DITHER), + .sblk = &sc7280_pp_sblk, + .merge_3d = MERGE_3D_1, + .intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 10), + }, { + .name = "pingpong_3", .id = PINGPONG_3, + .base = 0x6c000, .len = 0, + .features = BIT(DPU_PINGPONG_DITHER), + .sblk = &sc7280_pp_sblk, + .merge_3d = MERGE_3D_1, + .intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 11), + }, { + .name = "pingpong_4", .id = PINGPONG_4, + .base = 0x6d000, .len = 0, + .features = BIT(DPU_PINGPONG_DITHER), + .sblk = &sc7280_pp_sblk, + .merge_3d = MERGE_3D_2, + .intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 30), + }, { + .name = "pingpong_5", .id = PINGPONG_5, + .base = 0x6e000, .len = 0, + .features = BIT(DPU_PINGPONG_DITHER), + .sblk = &sc7280_pp_sblk, + .merge_3d = MERGE_3D_2, + .intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 31), + }, { + .name = "pingpong_6", .id = PINGPONG_6, + .base = 0x66000, .len = 0, + .features = BIT(DPU_PINGPONG_DITHER), + .sblk = &sc7280_pp_sblk, + .merge_3d = MERGE_3D_3, + }, { + .name = "pingpong_7", .id = PINGPONG_7, + .base = 0x66400, .len = 0, + .features = BIT(DPU_PINGPONG_DITHER), + .sblk = &sc7280_pp_sblk, + .merge_3d = MERGE_3D_3, + }, { + .name = "pingpong_8", .id = PINGPONG_8, + .base = 0x7e000, .len = 0, + .features = BIT(DPU_PINGPONG_DITHER), + .sblk = &sc7280_pp_sblk, + .merge_3d = MERGE_3D_4, + }, { + .name = "pingpong_9", .id = PINGPONG_9, + .base = 0x7e400, .len = 0, + .features = BIT(DPU_PINGPONG_DITHER), + .sblk = &sc7280_pp_sblk, + .merge_3d = MERGE_3D_4, + }, +}; + +static const struct dpu_merge_3d_cfg sm8650_merge_3d[] = { + { + .name = "merge_3d_0", .id = MERGE_3D_0, + .base = 0x4e000, .len = 0x8, + }, { + .name = "merge_3d_1", .id = MERGE_3D_1, + .base = 0x4f000, .len = 0x8, + }, { + .name = "merge_3d_2", .id = MERGE_3D_2, + .base = 0x50000, .len = 0x8, + }, { + .name = "merge_3d_3", .id = MERGE_3D_3, + .base = 0x66700, .len = 0x8, + }, { + .name = "merge_3d_4", .id = MERGE_3D_4, + .base = 0x7e700, .len = 0x8, + }, +}; + +/* + * NOTE: Each display compression engine (DCE) contains dual hard + * slice DSC encoders so both share same base address but with + * its own different sub block address. + */ +static const struct dpu_dsc_cfg sm8650_dsc[] = { + { + .name = "dce_0_0", .id = DSC_0, + .base = 0x80000, .len = 0x6, + .features = BIT(DPU_DSC_HW_REV_1_2) | BIT(DPU_DSC_NATIVE_42x_EN), + .sblk = &dsc_sblk_0, + }, { + .name = "dce_0_1", .id = DSC_1, + .base = 0x80000, .len = 0x6, + .features = BIT(DPU_DSC_HW_REV_1_2) | BIT(DPU_DSC_NATIVE_42x_EN), + .sblk = &dsc_sblk_1, + }, { + .name = "dce_1_0", .id = DSC_2, + .base = 0x81000, .len = 0x6, + .features = BIT(DPU_DSC_HW_REV_1_2) | BIT(DPU_DSC_NATIVE_42x_EN), + .sblk = &dsc_sblk_0, + }, { + .name = "dce_1_1", .id = DSC_3, + .base = 0x81000, .len = 0x6, + .features = BIT(DPU_DSC_HW_REV_1_2) | BIT(DPU_DSC_NATIVE_42x_EN), + .sblk = &dsc_sblk_1, + }, { + .name = "dce_2_0", .id = DSC_4, + .base = 0x82000, .len = 0x6, + .features = BIT(DPU_DSC_HW_REV_1_2), + .sblk = &dsc_sblk_0, + }, { + .name = "dce_2_1", .id = DSC_5, + .base = 0x82000, .len = 0x6, + .features = BIT(DPU_DSC_HW_REV_1_2), + .sblk = &dsc_sblk_1, + }, +}; + +static const struct dpu_wb_cfg sm8650_wb[] = { + { + .name = "wb_2", .id = WB_2, + .base = 0x65000, .len = 0x2c8, + .features = WB_SM8250_MASK, + .format_list = wb2_formats, + .num_formats = ARRAY_SIZE(wb2_formats), + .xin_id = 6, + .vbif_idx = VBIF_RT, + .maxlinewidth = 4096, + .intr_wb_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 4), + }, +}; + +static const struct dpu_intf_cfg sm8650_intf[] = { + { + .name = "intf_0", .id = INTF_0, + .base = 0x34000, .len = 0x280, + .features = INTF_SC7280_MASK, + .type = INTF_DP, + .controller_id = MSM_DP_CONTROLLER_0, + .prog_fetch_lines_worst_case = 24, + .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 24), + .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 25), + }, { + .name = "intf_1", .id = INTF_1, + .base = 0x35000, .len = 0x300, + .features = INTF_SC7280_MASK, + .type = INTF_DSI, + .controller_id = MSM_DSI_CONTROLLER_0, + .prog_fetch_lines_worst_case = 24, + .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 26), + .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 27), + .intr_tear_rd_ptr = DPU_IRQ_IDX(MDP_INTF1_TEAR_INTR, 2), + }, { + .name = "intf_2", .id = INTF_2, + .base = 0x36000, .len = 0x300, + .features = INTF_SC7280_MASK, + .type = INTF_DSI, + .controller_id = MSM_DSI_CONTROLLER_1, + .prog_fetch_lines_worst_case = 24, + .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 28), + .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 29), + .intr_tear_rd_ptr = DPU_IRQ_IDX(MDP_INTF2_TEAR_INTR, 2), + }, { + .name = "intf_3", .id = INTF_3, + .base = 0x37000, .len = 0x280, + .features = INTF_SC7280_MASK, + .type = INTF_DP, + .controller_id = MSM_DP_CONTROLLER_1, + .prog_fetch_lines_worst_case = 24, + .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 30), + .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 31), + }, +}; + +static const struct dpu_perf_cfg sm8650_perf_data = { + .max_bw_low = 17000000, + .max_bw_high = 27000000, + .min_core_ib = 2500000, + .min_llcc_ib = 0, + .min_dram_ib = 800000, + .min_prefill_lines = 35, + /* FIXME: lut tables */ + .danger_lut_tbl = {0x3ffff, 0x3ffff, 0x0}, + .safe_lut_tbl = {0xfe00, 0xfe00, 0xffff}, + .qos_lut_tbl = { + {.nentry = ARRAY_SIZE(sc7180_qos_linear), + .entries = sc7180_qos_linear + }, + {.nentry = ARRAY_SIZE(sc7180_qos_macrotile), + .entries = sc7180_qos_macrotile + }, + {.nentry = ARRAY_SIZE(sc7180_qos_nrt), + .entries = sc7180_qos_nrt + }, + /* TODO: macrotile-qseed is different from macrotile */ + }, + .cdp_cfg = { + {.rd_enable = 1, .wr_enable = 1}, + {.rd_enable = 1, .wr_enable = 0} + }, + .clk_inefficiency_factor = 105, + .bw_inefficiency_factor = 120, +}; + +static const struct dpu_mdss_version sm8650_mdss_ver = { + .core_major_ver = 10, + .core_minor_ver = 0, +}; + +const struct dpu_mdss_cfg dpu_sm8650_cfg = { + .mdss_ver = &sm8650_mdss_ver, + .caps = &sm8650_dpu_caps, + .mdp = &sm8650_mdp, + .ctl_count = ARRAY_SIZE(sm8650_ctl), + .ctl = sm8650_ctl, + .sspp_count = ARRAY_SIZE(sm8650_sspp), + .sspp = sm8650_sspp, + .mixer_count = ARRAY_SIZE(sm8650_lm), + .mixer = sm8650_lm, + .dspp_count = ARRAY_SIZE(sm8650_dspp), + .dspp = sm8650_dspp, + .pingpong_count = ARRAY_SIZE(sm8650_pp), + .pingpong = sm8650_pp, + .dsc_count = ARRAY_SIZE(sm8650_dsc), + .dsc = sm8650_dsc, + .merge_3d_count = ARRAY_SIZE(sm8650_merge_3d), + .merge_3d = sm8650_merge_3d, + .wb_count = ARRAY_SIZE(sm8650_wb), + .wb = sm8650_wb, + .intf_count = ARRAY_SIZE(sm8650_intf), + .intf = sm8650_intf, + .vbif_count = ARRAY_SIZE(sm8650_vbif), + .vbif = sm8650_vbif, + .perf = &sm8650_perf_data, +}; + +#endif diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c index 21aaa7cb5acd..d52aae54bbd5 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c @@ -329,6 +329,9 @@ static const struct dpu_sspp_sub_blks dpu_vig_sblk_qseed3_3_1 = static const struct dpu_sspp_sub_blks dpu_vig_sblk_qseed3_3_2 = _VIG_SBLK(SSPP_SCALER_VER(3, 2)); +static const struct dpu_sspp_sub_blks dpu_vig_sblk_qseed3_3_3 = + _VIG_SBLK(SSPP_SCALER_VER(3, 3)); + static const struct dpu_sspp_sub_blks dpu_dma_sblk = _DMA_SBLK(); /************************************************************* @@ -431,6 +434,7 @@ static const u32 msm8998_rt_pri_lvl[] = {1, 2, 2, 2}; static const u32 msm8998_nrt_pri_lvl[] = {1, 1, 1, 1}; static const u32 sdm845_rt_pri_lvl[] = {3, 3, 4, 4, 5, 5, 6, 6}; static const u32 sdm845_nrt_pri_lvl[] = {3, 3, 3, 3, 3, 3, 3, 3}; +static const u32 sm8650_rt_pri_lvl[] = {4, 4, 5, 5, 5, 5, 5, 6}; static const struct dpu_vbif_dynamic_ot_cfg msm8998_ot_rdwr_cfg[] = { { @@ -517,6 +521,26 @@ static const struct dpu_vbif_cfg sm8550_vbif[] = { }, }; +static const struct dpu_vbif_cfg sm8650_vbif[] = { + { + .name = "vbif_rt", .id = VBIF_RT, + .base = 0, .len = 0x1074, + .features = BIT(DPU_VBIF_QOS_REMAP), + .xin_halt_timeout = 0x4000, + .qos_rp_remap_size = 0x40, + .qos_rt_tbl = { + .npriority_lvl = ARRAY_SIZE(sm8650_rt_pri_lvl), + .priority_lvl = sm8650_rt_pri_lvl, + }, + .qos_nrt_tbl = { + .npriority_lvl = ARRAY_SIZE(sdm845_nrt_pri_lvl), + .priority_lvl = sdm845_nrt_pri_lvl, + }, + .memtype_count = 16, + .memtype = {3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3}, + }, +}; + /************************************************************* * PERF data config *************************************************************/ @@ -633,3 +657,5 @@ static const struct dpu_qos_lut_entry sc7180_qos_nrt[] = { #include "catalog/dpu_8_1_sm8450.h" #include "catalog/dpu_9_0_sm8550.h" + +#include "catalog/dpu_10_0_sm8650.h" diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h index 476214905e76..e3c0d007481b 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h @@ -835,5 +835,6 @@ extern const struct dpu_mdss_cfg dpu_sc7280_cfg; extern const struct dpu_mdss_cfg dpu_sc8280xp_cfg; extern const struct dpu_mdss_cfg dpu_sm8450_cfg; extern const struct dpu_mdss_cfg dpu_sm8550_cfg; +extern const struct dpu_mdss_cfg dpu_sm8650_cfg; #endif /* _DPU_HW_CATALOG_H */ diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h index d85157acfbf8..a6702b2bfc68 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h @@ -195,6 +195,8 @@ enum dpu_pingpong { PINGPONG_5, PINGPONG_6, PINGPONG_7, + PINGPONG_8, + PINGPONG_9, PINGPONG_S0, PINGPONG_MAX }; @@ -204,6 +206,7 @@ enum dpu_merge_3d { MERGE_3D_1, MERGE_3D_2, MERGE_3D_3, + MERGE_3D_4, MERGE_3D_MAX }; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c index 4426033252ca..c1cb500adc07 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c @@ -1349,6 +1349,7 @@ static const struct of_device_id dpu_dt_match[] = { { .compatible = "qcom,sm8350-dpu", .data = &dpu_sm8350_cfg, }, { .compatible = "qcom,sm8450-dpu", .data = &dpu_sm8450_cfg, }, { .compatible = "qcom,sm8550-dpu", .data = &dpu_sm8550_cfg, }, + { .compatible = "qcom,sm8650-dpu", .data = &dpu_sm8650_cfg, }, {} }; MODULE_DEVICE_TABLE(of, dpu_dt_match); -- cgit v1.2.3 From e6488c2a354103b6a3212f2fffa854e235d8a80e Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Mon, 30 Oct 2023 11:36:28 +0100 Subject: drm/msm: mdss: add support for SM8650 Add Mobile Display Subsystem (MDSS) support for the SM8650 platform. Reviewed-by: Dmitry Baryshkov Signed-off-by: Neil Armstrong Patchwork: https://patchwork.freedesktop.org/patch/564980/ Link: https://lore.kernel.org/r/20231030-topic-sm8650-upstream-mdss-v2-6-43f1887c82b8@linaro.org Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/msm_mdss.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c index 6f390c850577..16b567a39b8b 100644 --- a/drivers/gpu/drm/msm/msm_mdss.c +++ b/drivers/gpu/drm/msm/msm_mdss.c @@ -628,6 +628,7 @@ static const struct of_device_id mdss_dt_match[] = { { .compatible = "qcom,sm8350-mdss", .data = &sm8250_data }, { .compatible = "qcom,sm8450-mdss", .data = &sm8250_data }, { .compatible = "qcom,sm8550-mdss", .data = &sm8550_data }, + { .compatible = "qcom,sm8650-mdss", .data = &sm8550_data}, {} }; MODULE_DEVICE_TABLE(of, mdss_dt_match); -- cgit v1.2.3 From 3a73e376cff31c6cbc99f0e44068db3a769684d8 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Mon, 30 Oct 2023 11:36:29 +0100 Subject: drm/msm: dsi: add support for DSI-PHY on SM8650 Add DSI PHY support for the SM8650 platform. Reviewed-by: Dmitry Baryshkov Signed-off-by: Neil Armstrong Patchwork: https://patchwork.freedesktop.org/patch/564976/ Link: https://lore.kernel.org/r/20231030-topic-sm8650-upstream-mdss-v2-7-43f1887c82b8@linaro.org Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/dsi/phy/dsi_phy.c | 2 ++ drivers/gpu/drm/msm/dsi/phy/dsi_phy.h | 1 + drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c | 27 +++++++++++++++++++++++++++ 3 files changed, 30 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c index e49ebd9f6326..24a347fe2998 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c @@ -587,6 +587,8 @@ static const struct of_device_id dsi_phy_dt_match[] = { .data = &dsi_phy_5nm_8450_cfgs }, { .compatible = "qcom,sm8550-dsi-phy-4nm", .data = &dsi_phy_4nm_8550_cfgs }, + { .compatible = "qcom,sm8650-dsi-phy-4nm", + .data = &dsi_phy_4nm_8650_cfgs }, #endif {} }; diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h index 8b640d174785..e4275d3ad581 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h @@ -62,6 +62,7 @@ extern const struct msm_dsi_phy_cfg dsi_phy_7nm_7280_cfgs; extern const struct msm_dsi_phy_cfg dsi_phy_5nm_8350_cfgs; extern const struct msm_dsi_phy_cfg dsi_phy_5nm_8450_cfgs; extern const struct msm_dsi_phy_cfg dsi_phy_4nm_8550_cfgs; +extern const struct msm_dsi_phy_cfg dsi_phy_4nm_8650_cfgs; struct msm_dsi_dphy_timing { u32 clk_zero; diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c index 3b1ed02f644d..c66193f2dc0d 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c @@ -1121,6 +1121,10 @@ static const struct regulator_bulk_data dsi_phy_7nm_37750uA_regulators[] = { { .supply = "vdds", .init_load_uA = 37550 }, }; +static const struct regulator_bulk_data dsi_phy_7nm_98000uA_regulators[] = { + { .supply = "vdds", .init_load_uA = 98000 }, +}; + static const struct regulator_bulk_data dsi_phy_7nm_97800uA_regulators[] = { { .supply = "vdds", .init_load_uA = 97800 }, }; @@ -1281,3 +1285,26 @@ const struct msm_dsi_phy_cfg dsi_phy_4nm_8550_cfgs = { .num_dsi_phy = 2, .quirks = DSI_PHY_7NM_QUIRK_V5_2, }; + +const struct msm_dsi_phy_cfg dsi_phy_4nm_8650_cfgs = { + .has_phy_lane = true, + .regulator_data = dsi_phy_7nm_98000uA_regulators, + .num_regulators = ARRAY_SIZE(dsi_phy_7nm_98000uA_regulators), + .ops = { + .enable = dsi_7nm_phy_enable, + .disable = dsi_7nm_phy_disable, + .pll_init = dsi_pll_7nm_init, + .save_pll_state = dsi_7nm_pll_save_state, + .restore_pll_state = dsi_7nm_pll_restore_state, + .set_continuous_clock = dsi_7nm_set_continuous_clock, + }, + .min_pll_rate = 600000000UL, +#ifdef CONFIG_64BIT + .max_pll_rate = 5000000000UL, +#else + .max_pll_rate = ULONG_MAX, +#endif + .io_start = { 0xae95000, 0xae97000 }, + .num_dsi_phy = 2, + .quirks = DSI_PHY_7NM_QUIRK_V5_2, +}; -- cgit v1.2.3 From fec254cc752d0449714c026f021d3043abda15e5 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Mon, 30 Oct 2023 11:36:30 +0100 Subject: drm/msm: dsi: add support for DSI 2.8.0 Add DSI Controller version 2.8.0 support for the SM8650 platform. Reviewed-by: Dmitry Baryshkov Signed-off-by: Neil Armstrong Patchwork: https://patchwork.freedesktop.org/patch/564977/ Link: https://lore.kernel.org/r/20231030-topic-sm8650-upstream-mdss-v2-8-43f1887c82b8@linaro.org Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/dsi/dsi_cfg.c | 17 +++++++++++++++++ drivers/gpu/drm/msm/dsi/dsi_cfg.h | 1 + 2 files changed, 18 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/dsi/dsi_cfg.c b/drivers/gpu/drm/msm/dsi/dsi_cfg.c index 1f98ff74ceb0..10ba7d153d1c 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_cfg.c +++ b/drivers/gpu/drm/msm/dsi/dsi_cfg.c @@ -190,6 +190,21 @@ static const struct msm_dsi_config sm8550_dsi_cfg = { }, }; +static const struct regulator_bulk_data sm8650_dsi_regulators[] = { + { .supply = "vdda", .init_load_uA = 16600 }, /* 1.2 V */ +}; + +static const struct msm_dsi_config sm8650_dsi_cfg = { + .io_offset = DSI_6G_REG_SHIFT, + .regulator_data = sm8650_dsi_regulators, + .num_regulators = ARRAY_SIZE(sm8650_dsi_regulators), + .bus_clk_names = dsi_v2_4_clk_names, + .num_bus_clks = ARRAY_SIZE(dsi_v2_4_clk_names), + .io_start = { + { 0xae94000, 0xae96000 }, + }, +}; + static const struct regulator_bulk_data sc7280_dsi_regulators[] = { { .supply = "vdda", .init_load_uA = 8350 }, /* 1.2 V */ { .supply = "refgen" }, @@ -281,6 +296,8 @@ static const struct msm_dsi_cfg_handler dsi_cfg_handlers[] = { &sdm845_dsi_cfg, &msm_dsi_6g_v2_host_ops}, {MSM_DSI_VER_MAJOR_6G, MSM_DSI_6G_VER_MINOR_V2_7_0, &sm8550_dsi_cfg, &msm_dsi_6g_v2_host_ops}, + {MSM_DSI_VER_MAJOR_6G, MSM_DSI_6G_VER_MINOR_V2_8_0, + &sm8650_dsi_cfg, &msm_dsi_6g_v2_host_ops}, }; const struct msm_dsi_cfg_handler *msm_dsi_cfg_get(u32 major, u32 minor) diff --git a/drivers/gpu/drm/msm/dsi/dsi_cfg.h b/drivers/gpu/drm/msm/dsi/dsi_cfg.h index 43f0dd74edb6..4c9b4b37681b 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_cfg.h +++ b/drivers/gpu/drm/msm/dsi/dsi_cfg.h @@ -28,6 +28,7 @@ #define MSM_DSI_6G_VER_MINOR_V2_5_0 0x20050000 #define MSM_DSI_6G_VER_MINOR_V2_6_0 0x20060000 #define MSM_DSI_6G_VER_MINOR_V2_7_0 0x20070000 +#define MSM_DSI_6G_VER_MINOR_V2_8_0 0x20080000 #define MSM_DSI_V2_VER_MINOR_8064 0x0 -- cgit v1.2.3 From ded61d7dc5a0f8cfe7390aba33187c862d09b177 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sun, 3 Dec 2023 01:42:44 +0300 Subject: drm/msm/mdss: switch mdss to use devm_of_icc_get() Stop using hand-written reset function for ICC release, use devm_of_icc_get() instead. Signed-off-by: Dmitry Baryshkov Reviewed-by: Konrad Dybcio Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/570161/ Link: https://lore.kernel.org/r/20231202224247.1282567-2-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/msm_mdss.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c index 16b567a39b8b..92a290b36959 100644 --- a/drivers/gpu/drm/msm/msm_mdss.c +++ b/drivers/gpu/drm/msm/msm_mdss.c @@ -50,14 +50,14 @@ static int msm_mdss_parse_data_bus_icc_path(struct device *dev, struct icc_path *path0; struct icc_path *path1; - path0 = of_icc_get(dev, "mdp0-mem"); + path0 = devm_of_icc_get(dev, "mdp0-mem"); if (IS_ERR_OR_NULL(path0)) return PTR_ERR_OR_ZERO(path0); msm_mdss->path[0] = path0; msm_mdss->num_paths = 1; - path1 = of_icc_get(dev, "mdp1-mem"); + path1 = devm_of_icc_get(dev, "mdp1-mem"); if (!IS_ERR_OR_NULL(path1)) { msm_mdss->path[1] = path1; msm_mdss->num_paths++; @@ -66,15 +66,6 @@ static int msm_mdss_parse_data_bus_icc_path(struct device *dev, return 0; } -static void msm_mdss_put_icc_path(void *data) -{ - struct msm_mdss *msm_mdss = data; - int i; - - for (i = 0; i < msm_mdss->num_paths; i++) - icc_put(msm_mdss->path[i]); -} - static void msm_mdss_icc_request_bw(struct msm_mdss *msm_mdss, unsigned long bw) { int i; @@ -391,9 +382,6 @@ static struct msm_mdss *msm_mdss_init(struct platform_device *pdev, bool is_mdp5 dev_dbg(&pdev->dev, "mapped mdss address space @%pK\n", msm_mdss->mmio); ret = msm_mdss_parse_data_bus_icc_path(&pdev->dev, msm_mdss); - if (ret) - return ERR_PTR(ret); - ret = devm_add_action_or_reset(&pdev->dev, msm_mdss_put_icc_path, msm_mdss); if (ret) return ERR_PTR(ret); -- cgit v1.2.3 From fabaf176322d687b91a4acf1630c0d0a7d097faa Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Sun, 3 Dec 2023 01:42:45 +0300 Subject: drm/msm/mdss: Rename path references to mdp_path The DPU1 driver needs to handle all MDPn<->DDR paths, as well as CPU<->SLAVE_DISPLAY_CFG. The former ones share how their values are calculated, but the latter one has static predefines spanning all SoCs. In preparation for supporting the CPU<->SLAVE_DISPLAY_CFG path, rename the path-related struct members to include "mdp_". Signed-off-by: Konrad Dybcio Reviewed-by: Dmitry Baryshkov Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/570163/ Link: https://lore.kernel.org/r/20231202224247.1282567-3-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/msm_mdss.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c index 92a290b36959..a1a077e2bac8 100644 --- a/drivers/gpu/drm/msm/msm_mdss.c +++ b/drivers/gpu/drm/msm/msm_mdss.c @@ -40,8 +40,8 @@ struct msm_mdss { struct irq_domain *domain; } irq_controller; const struct msm_mdss_data *mdss_data; - struct icc_path *path[2]; - u32 num_paths; + struct icc_path *mdp_path[2]; + u32 num_mdp_paths; }; static int msm_mdss_parse_data_bus_icc_path(struct device *dev, @@ -54,13 +54,13 @@ static int msm_mdss_parse_data_bus_icc_path(struct device *dev, if (IS_ERR_OR_NULL(path0)) return PTR_ERR_OR_ZERO(path0); - msm_mdss->path[0] = path0; - msm_mdss->num_paths = 1; + msm_mdss->mdp_path[0] = path0; + msm_mdss->num_mdp_paths = 1; path1 = devm_of_icc_get(dev, "mdp1-mem"); if (!IS_ERR_OR_NULL(path1)) { - msm_mdss->path[1] = path1; - msm_mdss->num_paths++; + msm_mdss->mdp_path[1] = path1; + msm_mdss->num_mdp_paths++; } return 0; @@ -70,8 +70,8 @@ static void msm_mdss_icc_request_bw(struct msm_mdss *msm_mdss, unsigned long bw) { int i; - for (i = 0; i < msm_mdss->num_paths; i++) - icc_set_bw(msm_mdss->path[i], 0, Bps_to_icc(bw)); + for (i = 0; i < msm_mdss->num_mdp_paths; i++) + icc_set_bw(msm_mdss->mdp_path[i], 0, Bps_to_icc(bw)); } static void msm_mdss_irq(struct irq_desc *desc) -- cgit v1.2.3 From 7323694e118a24ab954d3a16fe59a68b311cb462 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sun, 3 Dec 2023 01:42:46 +0300 Subject: drm/msm/mdss: inline msm_mdss_icc_request_bw() There are just two places where we set the bandwidth: in the resume and in the suspend paths. Drop the wrapping function msm_mdss_icc_request_bw() and call icc_set_bw() directly. Signed-off-by: Dmitry Baryshkov Reviewed-by: Konrad Dybcio Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/570168/ Link: https://lore.kernel.org/r/20231202224247.1282567-4-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/msm_mdss.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c index a1a077e2bac8..776579ac69d8 100644 --- a/drivers/gpu/drm/msm/msm_mdss.c +++ b/drivers/gpu/drm/msm/msm_mdss.c @@ -66,14 +66,6 @@ static int msm_mdss_parse_data_bus_icc_path(struct device *dev, return 0; } -static void msm_mdss_icc_request_bw(struct msm_mdss *msm_mdss, unsigned long bw) -{ - int i; - - for (i = 0; i < msm_mdss->num_mdp_paths; i++) - icc_set_bw(msm_mdss->mdp_path[i], 0, Bps_to_icc(bw)); -} - static void msm_mdss_irq(struct irq_desc *desc) { struct msm_mdss *msm_mdss = irq_desc_get_handler_data(desc); @@ -227,14 +219,15 @@ const struct msm_mdss_data *msm_mdss_get_mdss_data(struct device *dev) static int msm_mdss_enable(struct msm_mdss *msm_mdss) { - int ret; + int ret, i; /* * Several components have AXI clocks that can only be turned on if * the interconnect is enabled (non-zero bandwidth). Let's make sure * that the interconnects are at least at a minimum amount. */ - msm_mdss_icc_request_bw(msm_mdss, MIN_IB_BW); + for (i = 0; i < msm_mdss->num_mdp_paths; i++) + icc_set_bw(msm_mdss->mdp_path[i], 0, Bps_to_icc(MIN_IB_BW)); ret = clk_bulk_prepare_enable(msm_mdss->num_clocks, msm_mdss->clocks); if (ret) { @@ -286,8 +279,12 @@ static int msm_mdss_enable(struct msm_mdss *msm_mdss) static int msm_mdss_disable(struct msm_mdss *msm_mdss) { + int i; + clk_bulk_disable_unprepare(msm_mdss->num_clocks, msm_mdss->clocks); - msm_mdss_icc_request_bw(msm_mdss, 0); + + for (i = 0; i < msm_mdss->num_mdp_paths; i++) + icc_set_bw(msm_mdss->mdp_path[i], 0, 0); return 0; } -- cgit v1.2.3 From a55c8ff252d374acb6f78b979cadc38073ce95e8 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sun, 3 Dec 2023 01:42:47 +0300 Subject: drm/msm/mdss: Handle the reg bus ICC path Apart from the already handled data bus (MAS_MDP_Pn<->DDR), there's another path that needs to be handled to ensure MDSS functions properly, namely the "reg bus", a.k.a the CPU-MDSS interconnect. Gating that path may have a variety of effects, from none to otherwise inexplicable DSI timeouts. Provide a way for MDSS driver to vote on this bus. A note regarding vote values. Newer platforms have corresponding bandwidth values in the vendor DT files. For the older platforms there was a static vote in the mdss_mdp and rotator drivers. I choose to be conservative here and choose this value as a default. Co-developed-by: Konrad Dybcio Signed-off-by: Konrad Dybcio Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/570164/ Link: https://lore.kernel.org/r/20231202224247.1282567-5-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/msm_mdss.c | 49 ++++++++++++++++++++++++++++++++++++++---- drivers/gpu/drm/msm/msm_mdss.h | 1 + 2 files changed, 46 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c index 776579ac69d8..455b2e3a0cdd 100644 --- a/drivers/gpu/drm/msm/msm_mdss.c +++ b/drivers/gpu/drm/msm/msm_mdss.c @@ -28,6 +28,8 @@ #define MIN_IB_BW 400000000UL /* Min ib vote 400MB */ +#define DEFAULT_REG_BW 153600 /* Used in mdss fbdev driver */ + struct msm_mdss { struct device *dev; @@ -42,6 +44,7 @@ struct msm_mdss { const struct msm_mdss_data *mdss_data; struct icc_path *mdp_path[2]; u32 num_mdp_paths; + struct icc_path *reg_bus_path; }; static int msm_mdss_parse_data_bus_icc_path(struct device *dev, @@ -49,6 +52,7 @@ static int msm_mdss_parse_data_bus_icc_path(struct device *dev, { struct icc_path *path0; struct icc_path *path1; + struct icc_path *reg_bus_path; path0 = devm_of_icc_get(dev, "mdp0-mem"); if (IS_ERR_OR_NULL(path0)) @@ -63,6 +67,10 @@ static int msm_mdss_parse_data_bus_icc_path(struct device *dev, msm_mdss->num_mdp_paths++; } + reg_bus_path = of_icc_get(dev, "cpu-cfg"); + if (!IS_ERR_OR_NULL(reg_bus_path)) + msm_mdss->reg_bus_path = reg_bus_path; + return 0; } @@ -229,6 +237,13 @@ static int msm_mdss_enable(struct msm_mdss *msm_mdss) for (i = 0; i < msm_mdss->num_mdp_paths; i++) icc_set_bw(msm_mdss->mdp_path[i], 0, Bps_to_icc(MIN_IB_BW)); + if (msm_mdss->mdss_data && msm_mdss->mdss_data->reg_bus_bw) + icc_set_bw(msm_mdss->reg_bus_path, 0, + msm_mdss->mdss_data->reg_bus_bw); + else + icc_set_bw(msm_mdss->reg_bus_path, 0, + DEFAULT_REG_BW); + ret = clk_bulk_prepare_enable(msm_mdss->num_clocks, msm_mdss->clocks); if (ret) { dev_err(msm_mdss->dev, "clock enable failed, ret:%d\n", ret); @@ -286,6 +301,9 @@ static int msm_mdss_disable(struct msm_mdss *msm_mdss) for (i = 0; i < msm_mdss->num_mdp_paths; i++) icc_set_bw(msm_mdss->mdp_path[i], 0, 0); + if (msm_mdss->reg_bus_path) + icc_set_bw(msm_mdss->reg_bus_path, 0, 0); + return 0; } @@ -372,6 +390,8 @@ static struct msm_mdss *msm_mdss_init(struct platform_device *pdev, bool is_mdp5 if (!msm_mdss) return ERR_PTR(-ENOMEM); + msm_mdss->mdss_data = of_device_get_match_data(&pdev->dev); + msm_mdss->mmio = devm_platform_ioremap_resource_byname(pdev, is_mdp5 ? "mdss_phys" : "mdss"); if (IS_ERR(msm_mdss->mmio)) return ERR_CAST(msm_mdss->mmio); @@ -462,8 +482,6 @@ static int mdss_probe(struct platform_device *pdev) if (IS_ERR(mdss)) return PTR_ERR(mdss); - mdss->mdss_data = of_device_get_match_data(&pdev->dev); - platform_set_drvdata(pdev, mdss); /* @@ -495,11 +513,13 @@ static const struct msm_mdss_data msm8998_data = { .ubwc_enc_version = UBWC_1_0, .ubwc_dec_version = UBWC_1_0, .highest_bank_bit = 2, + .reg_bus_bw = 76800, }; static const struct msm_mdss_data qcm2290_data = { /* no UBWC */ .highest_bank_bit = 0x2, + .reg_bus_bw = 76800, }; static const struct msm_mdss_data sc7180_data = { @@ -507,6 +527,7 @@ static const struct msm_mdss_data sc7180_data = { .ubwc_dec_version = UBWC_2_0, .ubwc_static = 0x1e, .highest_bank_bit = 0x3, + .reg_bus_bw = 76800, }; static const struct msm_mdss_data sc7280_data = { @@ -516,6 +537,7 @@ static const struct msm_mdss_data sc7280_data = { .ubwc_static = 1, .highest_bank_bit = 1, .macrotile_mode = 1, + .reg_bus_bw = 74000, }; static const struct msm_mdss_data sc8180x_data = { @@ -523,6 +545,7 @@ static const struct msm_mdss_data sc8180x_data = { .ubwc_dec_version = UBWC_3_0, .highest_bank_bit = 3, .macrotile_mode = 1, + .reg_bus_bw = 76800, }; static const struct msm_mdss_data sc8280xp_data = { @@ -532,6 +555,7 @@ static const struct msm_mdss_data sc8280xp_data = { .ubwc_static = 1, .highest_bank_bit = 3, .macrotile_mode = 1, + .reg_bus_bw = 76800, }; static const struct msm_mdss_data sdm670_data = { @@ -544,6 +568,7 @@ static const struct msm_mdss_data sdm845_data = { .ubwc_enc_version = UBWC_2_0, .ubwc_dec_version = UBWC_2_0, .highest_bank_bit = 2, + .reg_bus_bw = 76800, }; static const struct msm_mdss_data sm6350_data = { @@ -552,12 +577,14 @@ static const struct msm_mdss_data sm6350_data = { .ubwc_swizzle = 6, .ubwc_static = 0x1e, .highest_bank_bit = 1, + .reg_bus_bw = 76800, }; static const struct msm_mdss_data sm8150_data = { .ubwc_enc_version = UBWC_3_0, .ubwc_dec_version = UBWC_3_0, .highest_bank_bit = 2, + .reg_bus_bw = 76800, }; static const struct msm_mdss_data sm6115_data = { @@ -566,6 +593,7 @@ static const struct msm_mdss_data sm6115_data = { .ubwc_swizzle = 7, .ubwc_static = 0x11f, .highest_bank_bit = 0x1, + .reg_bus_bw = 76800, }; static const struct msm_mdss_data sm6125_data = { @@ -583,6 +611,18 @@ static const struct msm_mdss_data sm8250_data = { /* TODO: highest_bank_bit = 2 for LP_DDR4 */ .highest_bank_bit = 3, .macrotile_mode = 1, + .reg_bus_bw = 76800, +}; + +static const struct msm_mdss_data sm8350_data = { + .ubwc_enc_version = UBWC_4_0, + .ubwc_dec_version = UBWC_4_0, + .ubwc_swizzle = 6, + .ubwc_static = 1, + /* TODO: highest_bank_bit = 2 for LP_DDR4 */ + .highest_bank_bit = 3, + .macrotile_mode = 1, + .reg_bus_bw = 74000, }; static const struct msm_mdss_data sm8550_data = { @@ -593,6 +633,7 @@ static const struct msm_mdss_data sm8550_data = { /* TODO: highest_bank_bit = 2 for LP_DDR4 */ .highest_bank_bit = 3, .macrotile_mode = 1, + .reg_bus_bw = 57000, }; static const struct of_device_id mdss_dt_match[] = { { .compatible = "qcom,mdss" }, @@ -610,8 +651,8 @@ static const struct of_device_id mdss_dt_match[] = { { .compatible = "qcom,sm6375-mdss", .data = &sm6350_data }, { .compatible = "qcom,sm8150-mdss", .data = &sm8150_data }, { .compatible = "qcom,sm8250-mdss", .data = &sm8250_data }, - { .compatible = "qcom,sm8350-mdss", .data = &sm8250_data }, - { .compatible = "qcom,sm8450-mdss", .data = &sm8250_data }, + { .compatible = "qcom,sm8350-mdss", .data = &sm8350_data }, + { .compatible = "qcom,sm8450-mdss", .data = &sm8350_data }, { .compatible = "qcom,sm8550-mdss", .data = &sm8550_data }, { .compatible = "qcom,sm8650-mdss", .data = &sm8550_data}, {} diff --git a/drivers/gpu/drm/msm/msm_mdss.h b/drivers/gpu/drm/msm/msm_mdss.h index 02bbab42adbc..3afef4b1786d 100644 --- a/drivers/gpu/drm/msm/msm_mdss.h +++ b/drivers/gpu/drm/msm/msm_mdss.h @@ -14,6 +14,7 @@ struct msm_mdss_data { u32 ubwc_static; u32 highest_bank_bit; u32 macrotile_mode; + u32 reg_bus_bw; }; #define UBWC_1_0 0x10000000 -- cgit v1.2.3 From 26513300978f7285c3e776c144f27ef71be61f57 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 4 Dec 2023 08:27:36 +0100 Subject: drm/bridge: tc358768: select CONFIG_VIDEOMODE_HELPERS A dependency on this feature was recently introduced: x86_64-linux-ld: vmlinux.o: in function `tc358768_bridge_pre_enable': tc358768.c:(.text+0xbe3dae): undefined reference to `drm_display_mode_to_videomode' Make sure this is always enabled. Fixes: e5fb21678136 ("drm/bridge: tc358768: Use struct videomode") Signed-off-by: Arnd Bergmann Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20231204072814.968816-1-arnd@kernel.org Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20231204072814.968816-1-arnd@kernel.org --- drivers/gpu/drm/bridge/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig index ba82a1142adf..3e6a4e2044c0 100644 --- a/drivers/gpu/drm/bridge/Kconfig +++ b/drivers/gpu/drm/bridge/Kconfig @@ -313,6 +313,7 @@ config DRM_TOSHIBA_TC358768 select REGMAP_I2C select DRM_PANEL select DRM_MIPI_DSI + select VIDEOMODE_HELPERS help Toshiba TC358768AXBG/TC358778XBG DSI bridge chip driver. -- cgit v1.2.3 From 20c2dbff342aec13bf93c2f6c951da198916a455 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 27 Nov 2023 16:50:25 +0200 Subject: drm/i915: Skip some timing checks on BXT/GLK DSI transcoders MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apparently some BXT/GLK systems have DSI panels whose timings don't agree with the normal cpu transcoder hblank>=32 limitation. This is perhaps fine as there are no specific hblank/etc. limits listed for the BXT/GLK DSI transcoders. Move those checks out from the global intel_mode_valid() into into connector specific .mode_valid() hooks, skipping BXT/GLK DSI connectors. We'll leave the basic [hv]display/[hv]total checks in intel_mode_valid() as those seem like sensible upper limits regardless of the transcoder used. Cc: stable@vger.kernel.org Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/9720 Fixes: 8f4b1068e7fc ("drm/i915: Check some transcoder timing minimum limits") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231127145028.4899-1-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula (cherry picked from commit e0ef2daa8ca8ce4dbc2fd0959e383b753a87fd7d) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/icl_dsi.c | 7 +++++++ drivers/gpu/drm/i915/display/intel_crt.c | 5 +++++ drivers/gpu/drm/i915/display/intel_display.c | 10 ++++++++++ drivers/gpu/drm/i915/display/intel_display.h | 3 +++ drivers/gpu/drm/i915/display/intel_dp.c | 4 ++++ drivers/gpu/drm/i915/display/intel_dp_mst.c | 4 ++++ drivers/gpu/drm/i915/display/intel_dvo.c | 6 ++++++ drivers/gpu/drm/i915/display/intel_hdmi.c | 4 ++++ drivers/gpu/drm/i915/display/intel_lvds.c | 5 +++++ drivers/gpu/drm/i915/display/intel_sdvo.c | 8 +++++++- drivers/gpu/drm/i915/display/intel_tv.c | 8 +++++++- drivers/gpu/drm/i915/display/vlv_dsi.c | 18 +++++++++++++++++- 12 files changed, 79 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index c4585e445198..67143a0f5189 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -1440,6 +1440,13 @@ static void gen11_dsi_post_disable(struct intel_atomic_state *state, static enum drm_mode_status gen11_dsi_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { + struct drm_i915_private *i915 = to_i915(connector->dev); + enum drm_mode_status status; + + status = intel_cpu_transcoder_mode_valid(i915, mode); + if (status != MODE_OK) + return status; + /* FIXME: DSC? */ return intel_dsi_mode_valid(connector, mode); } diff --git a/drivers/gpu/drm/i915/display/intel_crt.c b/drivers/gpu/drm/i915/display/intel_crt.c index 913e5d230a4d..6f6b348b8a40 100644 --- a/drivers/gpu/drm/i915/display/intel_crt.c +++ b/drivers/gpu/drm/i915/display/intel_crt.c @@ -348,8 +348,13 @@ intel_crt_mode_valid(struct drm_connector *connector, struct drm_device *dev = connector->dev; struct drm_i915_private *dev_priv = to_i915(dev); int max_dotclk = dev_priv->max_dotclk_freq; + enum drm_mode_status status; int max_clock; + status = intel_cpu_transcoder_mode_valid(dev_priv, mode); + if (status != MODE_OK) + return status; + if (mode->flags & DRM_MODE_FLAG_DBLSCAN) return MODE_NO_DBLESCAN; diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 94330108145e..4ee7d569b379 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -7869,6 +7869,16 @@ enum drm_mode_status intel_mode_valid(struct drm_device *dev, mode->vtotal > vtotal_max) return MODE_V_ILLEGAL; + return MODE_OK; +} + +enum drm_mode_status intel_cpu_transcoder_mode_valid(struct drm_i915_private *dev_priv, + const struct drm_display_mode *mode) +{ + /* + * Additional transcoder timing limits, + * excluding BXT/GLK DSI transcoders. + */ if (DISPLAY_VER(dev_priv) >= 5) { if (mode->hdisplay < 64 || mode->htotal - mode->hdisplay < 32) diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index 0e5dffe8f018..a05c7e2b782e 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -403,6 +403,9 @@ enum drm_mode_status intel_mode_valid_max_plane_size(struct drm_i915_private *dev_priv, const struct drm_display_mode *mode, bool bigjoiner); +enum drm_mode_status +intel_cpu_transcoder_mode_valid(struct drm_i915_private *i915, + const struct drm_display_mode *mode); enum phy intel_port_to_phy(struct drm_i915_private *i915, enum port port); bool is_trans_port_sync_mode(const struct intel_crtc_state *state); bool is_trans_port_sync_master(const struct intel_crtc_state *state); diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 2852958dd4e7..b21bcd40f111 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -1172,6 +1172,10 @@ intel_dp_mode_valid(struct drm_connector *_connector, enum drm_mode_status status; bool dsc = false, bigjoiner = false; + status = intel_cpu_transcoder_mode_valid(dev_priv, mode); + if (status != MODE_OK) + return status; + if (mode->flags & DRM_MODE_FLAG_DBLCLK) return MODE_H_ILLEGAL; diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 851b312bd844..4816e4e77f83 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -959,6 +959,10 @@ intel_dp_mst_mode_valid_ctx(struct drm_connector *connector, return 0; } + *status = intel_cpu_transcoder_mode_valid(dev_priv, mode); + if (*status != MODE_OK) + return 0; + if (mode->flags & DRM_MODE_FLAG_DBLSCAN) { *status = MODE_NO_DBLESCAN; return 0; diff --git a/drivers/gpu/drm/i915/display/intel_dvo.c b/drivers/gpu/drm/i915/display/intel_dvo.c index 55d6743374bd..9111e9d46486 100644 --- a/drivers/gpu/drm/i915/display/intel_dvo.c +++ b/drivers/gpu/drm/i915/display/intel_dvo.c @@ -217,11 +217,17 @@ intel_dvo_mode_valid(struct drm_connector *_connector, struct drm_display_mode *mode) { struct intel_connector *connector = to_intel_connector(_connector); + struct drm_i915_private *i915 = to_i915(connector->base.dev); struct intel_dvo *intel_dvo = intel_attached_dvo(connector); const struct drm_display_mode *fixed_mode = intel_panel_fixed_mode(connector, mode); int max_dotclk = to_i915(connector->base.dev)->max_dotclk_freq; int target_clock = mode->clock; + enum drm_mode_status status; + + status = intel_cpu_transcoder_mode_valid(i915, mode); + if (status != MODE_OK) + return status; if (mode->flags & DRM_MODE_FLAG_DBLSCAN) return MODE_NO_DBLESCAN; diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index ac315f8e7820..bfa456fa7d25 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -1983,6 +1983,10 @@ intel_hdmi_mode_valid(struct drm_connector *connector, bool ycbcr_420_only; enum intel_output_format sink_format; + status = intel_cpu_transcoder_mode_valid(dev_priv, mode); + if (status != MODE_OK) + return status; + if ((mode->flags & DRM_MODE_FLAG_3D_MASK) == DRM_MODE_FLAG_3D_FRAME_PACKING) clock *= 2; diff --git a/drivers/gpu/drm/i915/display/intel_lvds.c b/drivers/gpu/drm/i915/display/intel_lvds.c index 2a4ca7e65775..bcbdd1984fd9 100644 --- a/drivers/gpu/drm/i915/display/intel_lvds.c +++ b/drivers/gpu/drm/i915/display/intel_lvds.c @@ -389,11 +389,16 @@ intel_lvds_mode_valid(struct drm_connector *_connector, struct drm_display_mode *mode) { struct intel_connector *connector = to_intel_connector(_connector); + struct drm_i915_private *i915 = to_i915(connector->base.dev); const struct drm_display_mode *fixed_mode = intel_panel_fixed_mode(connector, mode); int max_pixclk = to_i915(connector->base.dev)->max_dotclk_freq; enum drm_mode_status status; + status = intel_cpu_transcoder_mode_valid(i915, mode); + if (status != MODE_OK) + return status; + if (mode->flags & DRM_MODE_FLAG_DBLSCAN) return MODE_NO_DBLESCAN; diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index a636f42ceae5..a9ac7d45d1f3 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -1921,13 +1921,19 @@ static enum drm_mode_status intel_sdvo_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { + struct drm_i915_private *i915 = to_i915(connector->dev); struct intel_sdvo *intel_sdvo = intel_attached_sdvo(to_intel_connector(connector)); struct intel_sdvo_connector *intel_sdvo_connector = to_intel_sdvo_connector(connector); - int max_dotclk = to_i915(connector->dev)->max_dotclk_freq; bool has_hdmi_sink = intel_has_hdmi_sink(intel_sdvo_connector, connector->state); + int max_dotclk = i915->max_dotclk_freq; + enum drm_mode_status status; int clock = mode->clock; + status = intel_cpu_transcoder_mode_valid(i915, mode); + if (status != MODE_OK) + return status; + if (mode->flags & DRM_MODE_FLAG_DBLSCAN) return MODE_NO_DBLESCAN; diff --git a/drivers/gpu/drm/i915/display/intel_tv.c b/drivers/gpu/drm/i915/display/intel_tv.c index 31a79fdfc812..2ee4f0d95851 100644 --- a/drivers/gpu/drm/i915/display/intel_tv.c +++ b/drivers/gpu/drm/i915/display/intel_tv.c @@ -958,8 +958,14 @@ static enum drm_mode_status intel_tv_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { + struct drm_i915_private *i915 = to_i915(connector->dev); const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state); - int max_dotclk = to_i915(connector->dev)->max_dotclk_freq; + int max_dotclk = i915->max_dotclk_freq; + enum drm_mode_status status; + + status = intel_cpu_transcoder_mode_valid(i915, mode); + if (status != MODE_OK) + return status; if (mode->flags & DRM_MODE_FLAG_DBLSCAN) return MODE_NO_DBLESCAN; diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index 55da627a8b8d..f488394d3108 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -1541,9 +1541,25 @@ static const struct drm_encoder_funcs intel_dsi_funcs = { .destroy = intel_dsi_encoder_destroy, }; +static enum drm_mode_status vlv_dsi_mode_valid(struct drm_connector *connector, + struct drm_display_mode *mode) +{ + struct drm_i915_private *i915 = to_i915(connector->dev); + + if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { + enum drm_mode_status status; + + status = intel_cpu_transcoder_mode_valid(i915, mode); + if (status != MODE_OK) + return status; + } + + return intel_dsi_mode_valid(connector, mode); +} + static const struct drm_connector_helper_funcs intel_dsi_connector_helper_funcs = { .get_modes = intel_dsi_get_modes, - .mode_valid = intel_dsi_mode_valid, + .mode_valid = vlv_dsi_mode_valid, .atomic_check = intel_digital_connector_atomic_check, }; -- cgit v1.2.3 From 7cf82b25dd91d7f330d9df2de868caca14289ba1 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 27 Nov 2023 16:50:26 +0200 Subject: drm/i915/mst: Fix .mode_valid_ctx() return values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .mode_valid_ctx() returns an errno, not the mode status. Fix the code to do the right thing. Cc: stable@vger.kernel.org Cc: Stanislav Lisovskiy Fixes: d51f25eb479a ("drm/i915: Add DSC support to MST path") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231127145028.4899-2-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula (cherry picked from commit c1799032d2ef6616113b733428dfaa2199a5604b) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_dp_mst.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 4816e4e77f83..1bf21bd9a26b 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -1031,11 +1031,15 @@ intel_dp_mst_mode_valid_ctx(struct drm_connector *connector, * Big joiner configuration needs DSC for TGL which is not true for * XE_LPD where uncompressed joiner is supported. */ - if (DISPLAY_VER(dev_priv) < 13 && bigjoiner && !dsc) - return MODE_CLOCK_HIGH; + if (DISPLAY_VER(dev_priv) < 13 && bigjoiner && !dsc) { + *status = MODE_CLOCK_HIGH; + return 0; + } - if (mode_rate > max_rate && !dsc) - return MODE_CLOCK_HIGH; + if (mode_rate > max_rate && !dsc) { + *status = MODE_CLOCK_HIGH; + return 0; + } *status = intel_mode_valid_max_plane_size(dev_priv, mode, false); return 0; -- cgit v1.2.3 From dd7eb65c493615fda7d459501c3d4a46e00ea5ba Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 27 Nov 2023 16:50:27 +0200 Subject: drm/i915/mst: Reject modes that require the bigjoiner MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have no bigjoiner support in the MST code, so .mode_valid() pretending otherwise is just going to result black screens for users. Reject any mode that needs the joiner. Cc: stable@vger.kernel.org Cc: Stanislav Lisovskiy Fixes: d51f25eb479a ("drm/i915: Add DSC support to MST path") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231127145028.4899-3-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula (cherry picked from commit 9c058492b16f90bb772cb0dad567e8acc68e155d) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_dp_mst.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 1bf21bd9a26b..aa1061262613 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -997,6 +997,10 @@ intel_dp_mst_mode_valid_ctx(struct drm_connector *connector, if (intel_dp_need_bigjoiner(intel_dp, mode->hdisplay, target_clock)) { bigjoiner = true; max_dotclk *= 2; + + /* TODO: add support for bigjoiner */ + *status = MODE_CLOCK_HIGH; + return 0; } if (DISPLAY_VER(dev_priv) >= 10 && -- cgit v1.2.3 From 9f269070abe9c45dc60abc84e29326f855317eac Mon Sep 17 00:00:00 2001 From: heminhong Date: Tue, 14 Nov 2023 10:43:41 +0800 Subject: drm/i915: correct the input parameter on _intel_dsb_commit() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Current, the dewake_scanline variable is defined as unsigned int, an unsigned int variable that is always greater than or equal to 0. when _intel_dsb_commit function is called by intel_dsb_commit function, the dewake_scanline variable may have an int value. So the dewake_scanline variable is necessary to defined as an int. Fixes: f83b94d23770 ("drm/i915/dsb: Use DEwake to combat PkgC latency") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202310052201.AnVbpgPr-lkp@intel.com/ Cc: Ville Syrjälä Cc: Uma Shankar Signed-off-by: heminhong Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20231114024341.14524-1-heminhong@kylinos.cn (cherry picked from commit ef32c3cc9c62252986f09e06b4e525742cd91529) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_dsb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c index 78b6fe24dcd8..7fd6280c54a7 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.c +++ b/drivers/gpu/drm/i915/display/intel_dsb.c @@ -340,7 +340,7 @@ static int intel_dsb_dewake_scanline(const struct intel_crtc_state *crtc_state) } static void _intel_dsb_commit(struct intel_dsb *dsb, u32 ctrl, - unsigned int dewake_scanline) + int dewake_scanline) { struct intel_crtc *crtc = dsb->crtc; struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); -- cgit v1.2.3 From 01a39f1c4f1220a4e6a25729fae87ff5794cbc52 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 4 Dec 2023 22:24:43 +0200 Subject: drm/i915: Fix ADL+ tiled plane stride when the POT stride is smaller than the original MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit plane_view_scanout_stride() currently assumes that we had to pad the mapping stride with dummy pages in order to align it. But that is not the case if the original fb stride exceeds the aligned stride used to populate the remapped view, which is calculated from the user specified framebuffer width rather than the user specified framebuffer stride. Ignore the original fb stride in this case and just stick to the POT aligned stride. Getting this wrong will cause the plane to fetch the wrong data, and can lead to fault errors if the page tables at the bogus location aren't even populated. TODO: figure out if this is OK for CCS, or if we should instead increase the width of the view to cover the entire user specified fb stride instead... Cc: Imre Deak Cc: Juha-Pekka Heikkila Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231204202443.31247-1-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak Reviewed-by: Juha-Pekka Heikkila --- drivers/gpu/drm/i915/display/intel_fb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index 96663f9ac431..32a7ee1c5b34 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -1381,7 +1381,8 @@ plane_view_scanout_stride(const struct intel_framebuffer *fb, int color_plane, struct drm_i915_private *i915 = to_i915(fb->base.dev); unsigned int stride_tiles; - if (IS_ALDERLAKE_P(i915) || DISPLAY_VER(i915) >= 14) + if ((IS_ALDERLAKE_P(i915) || DISPLAY_VER(i915) >= 14) && + src_stride_tiles < dst_stride_tiles) stride_tiles = src_stride_tiles; else stride_tiles = dst_stride_tiles; -- cgit v1.2.3 From 134c78c962279e423f5d699e3d3b379cbf4885c8 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 2 Dec 2023 00:18:33 +0300 Subject: drm/msm/dpu: cleanup dpu_kms_hw_init error path It was noticed that dpu_kms_hw_init()'s error path contains several labels which point to the same code path. Replace all of them with a single label. Suggested-by: Konrad Dybcio Reviewed-by: Jessica Zhang Signed-off-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/570035/ Link: https://lore.kernel.org/r/20231201211845.1026967-2-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c index c1cb500adc07..cc6e2c05e794 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c @@ -1063,7 +1063,7 @@ static int dpu_kms_hw_init(struct msm_kms *kms) if (!dpu_kms->catalog) { DPU_ERROR("device config not known!\n"); rc = -EINVAL; - goto power_error; + goto err_pm_put; } /* @@ -1073,26 +1073,26 @@ static int dpu_kms_hw_init(struct msm_kms *kms) rc = _dpu_kms_mmu_init(dpu_kms); if (rc) { DPU_ERROR("dpu_kms_mmu_init failed: %d\n", rc); - goto power_error; + goto err_pm_put; } dpu_kms->mdss = msm_mdss_get_mdss_data(dpu_kms->pdev->dev.parent); if (IS_ERR(dpu_kms->mdss)) { rc = PTR_ERR(dpu_kms->mdss); DPU_ERROR("failed to get MDSS data: %d\n", rc); - goto power_error; + goto err_pm_put; } if (!dpu_kms->mdss) { rc = -EINVAL; DPU_ERROR("NULL MDSS data\n"); - goto power_error; + goto err_pm_put; } rc = dpu_rm_init(&dpu_kms->rm, dpu_kms->catalog, dpu_kms->mdss, dpu_kms->mmio); if (rc) { DPU_ERROR("rm init failed: %d\n", rc); - goto power_error; + goto err_pm_put; } dpu_kms->rm_init = true; @@ -1104,7 +1104,7 @@ static int dpu_kms_hw_init(struct msm_kms *kms) rc = PTR_ERR(dpu_kms->hw_mdp); DPU_ERROR("failed to get hw_mdp: %d\n", rc); dpu_kms->hw_mdp = NULL; - goto power_error; + goto err_pm_put; } for (i = 0; i < dpu_kms->catalog->vbif_count; i++) { @@ -1115,7 +1115,7 @@ static int dpu_kms_hw_init(struct msm_kms *kms) if (IS_ERR(hw)) { rc = PTR_ERR(hw); DPU_ERROR("failed to init vbif %d: %d\n", vbif->id, rc); - goto power_error; + goto err_pm_put; } dpu_kms->hw_vbif[vbif->id] = hw; @@ -1131,7 +1131,7 @@ static int dpu_kms_hw_init(struct msm_kms *kms) rc = dpu_core_perf_init(&dpu_kms->perf, dpu_kms->catalog->perf, max_core_clk_rate); if (rc) { DPU_ERROR("failed to init perf %d\n", rc); - goto perf_err; + goto err_pm_put; } dpu_kms->hw_intr = dpu_hw_intr_init(dpu_kms->mmio, dpu_kms->catalog); @@ -1139,7 +1139,7 @@ static int dpu_kms_hw_init(struct msm_kms *kms) rc = PTR_ERR(dpu_kms->hw_intr); DPU_ERROR("hw_intr init failed: %d\n", rc); dpu_kms->hw_intr = NULL; - goto hw_intr_init_err; + goto err_pm_put; } dev->mode_config.min_width = 0; @@ -1164,7 +1164,7 @@ static int dpu_kms_hw_init(struct msm_kms *kms) rc = _dpu_kms_drm_obj_init(dpu_kms); if (rc) { DPU_ERROR("modeset init failed: %d\n", rc); - goto drm_obj_init_err; + goto err_pm_put; } dpu_vbif_init_memtypes(dpu_kms); @@ -1173,10 +1173,7 @@ static int dpu_kms_hw_init(struct msm_kms *kms) return 0; -drm_obj_init_err: -hw_intr_init_err: -perf_err: -power_error: +err_pm_put: pm_runtime_put_sync(&dpu_kms->pdev->dev); error: _dpu_kms_hw_destroy(dpu_kms); -- cgit v1.2.3 From b830b06f008755526646bb1d12500c8ccd8be335 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 2 Dec 2023 00:18:34 +0300 Subject: drm/msm/dpu: remove IS_ERR_OR_NULL for dpu_hw_intr_init() error handling Using IS_ERR_OR_NULL() together with PTR_ERR() is a typical mistake. If the value is NULL, then the function will return 0 instead of a proper return code. Replace IS_ERR_OR_NULL() with IS_ERR() in the dpu_hw_intr_init() error check. Reviewed-by: Jessica Zhang Signed-off-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/570036/ Link: https://lore.kernel.org/r/20231201211845.1026967-3-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c index cc6e2c05e794..75663f162618 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c @@ -1135,7 +1135,7 @@ static int dpu_kms_hw_init(struct msm_kms *kms) } dpu_kms->hw_intr = dpu_hw_intr_init(dpu_kms->mmio, dpu_kms->catalog); - if (IS_ERR_OR_NULL(dpu_kms->hw_intr)) { + if (IS_ERR(dpu_kms->hw_intr)) { rc = PTR_ERR(dpu_kms->hw_intr); DPU_ERROR("hw_intr init failed: %d\n", rc); dpu_kms->hw_intr = NULL; -- cgit v1.2.3 From b19e6f7dd2e7cc47bed565064fd7ff52f9424e52 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 2 Dec 2023 00:18:35 +0300 Subject: drm/msm/dpu: use devres-managed allocation for interrupts data Use devm_kzalloc to create interrupts data structure. This allows us to remove corresponding kfree and drop dpu_hw_intr_destroy() function. Reviewed-by: Jessica Zhang Signed-off-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/570038/ Link: https://lore.kernel.org/r/20231201211845.1026967-4-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c | 14 ++++++-------- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.h | 11 ++++------- drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c | 4 +--- 3 files changed, 11 insertions(+), 18 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c index 088807db2c83..946dd0135dff 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c @@ -6,6 +6,8 @@ #include #include +#include + #include "dpu_core_irq.h" #include "dpu_kms.h" #include "dpu_hw_interrupts.h" @@ -472,8 +474,9 @@ u32 dpu_core_irq_read(struct dpu_kms *dpu_kms, return intr_status; } -struct dpu_hw_intr *dpu_hw_intr_init(void __iomem *addr, - const struct dpu_mdss_cfg *m) +struct dpu_hw_intr *dpu_hw_intr_init(struct drm_device *dev, + void __iomem *addr, + const struct dpu_mdss_cfg *m) { struct dpu_hw_intr *intr; unsigned int i; @@ -481,7 +484,7 @@ struct dpu_hw_intr *dpu_hw_intr_init(void __iomem *addr, if (!addr || !m) return ERR_PTR(-EINVAL); - intr = kzalloc(sizeof(*intr), GFP_KERNEL); + intr = drmm_kzalloc(dev, sizeof(*intr), GFP_KERNEL); if (!intr) return ERR_PTR(-ENOMEM); @@ -512,11 +515,6 @@ struct dpu_hw_intr *dpu_hw_intr_init(void __iomem *addr, return intr; } -void dpu_hw_intr_destroy(struct dpu_hw_intr *intr) -{ - kfree(intr); -} - int dpu_core_irq_register_callback(struct dpu_kms *dpu_kms, unsigned int irq_idx, void (*irq_cb)(void *arg), diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.h index 53a21ebc57e8..564b750a28fe 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.h @@ -70,15 +70,12 @@ struct dpu_hw_intr { /** * dpu_hw_intr_init(): Initializes the interrupts hw object + * @dev: Corresponding device for devres management * @addr: mapped register io address of MDP * @m: pointer to MDSS catalog data */ -struct dpu_hw_intr *dpu_hw_intr_init(void __iomem *addr, - const struct dpu_mdss_cfg *m); +struct dpu_hw_intr *dpu_hw_intr_init(struct drm_device *dev, + void __iomem *addr, + const struct dpu_mdss_cfg *m); -/** - * dpu_hw_intr_destroy(): Cleanup interrutps hw object - * @intr: pointer to interrupts hw object - */ -void dpu_hw_intr_destroy(struct dpu_hw_intr *intr); #endif diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c index 75663f162618..34d707db1d0c 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c @@ -795,8 +795,6 @@ static void _dpu_kms_hw_destroy(struct dpu_kms *dpu_kms) { int i; - if (dpu_kms->hw_intr) - dpu_hw_intr_destroy(dpu_kms->hw_intr); dpu_kms->hw_intr = NULL; /* safe to call these more than once during shutdown */ @@ -1134,7 +1132,7 @@ static int dpu_kms_hw_init(struct msm_kms *kms) goto err_pm_put; } - dpu_kms->hw_intr = dpu_hw_intr_init(dpu_kms->mmio, dpu_kms->catalog); + dpu_kms->hw_intr = dpu_hw_intr_init(dev, dpu_kms->mmio, dpu_kms->catalog); if (IS_ERR(dpu_kms->hw_intr)) { rc = PTR_ERR(dpu_kms->hw_intr); DPU_ERROR("hw_intr init failed: %d\n", rc); -- cgit v1.2.3 From bdfa47d9b17a2335666a741a98857f9e5e482dc8 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 2 Dec 2023 00:18:36 +0300 Subject: drm/msm/dpu: use devres-managed allocation for VBIF data Use devm_kzalloc to create VBIF data structure. This allows us to remove corresponding kfree and drop dpu_hw_vbif_destroy() function. Reviewed-by: Jessica Zhang Signed-off-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/570040/ Link: https://lore.kernel.org/r/20231201211845.1026967-5-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_vbif.c | 14 ++++++-------- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_vbif.h | 8 ++++---- drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c | 11 +++-------- 3 files changed, 13 insertions(+), 20 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_vbif.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_vbif.c index a5121a50b2bb..98e34afde2d2 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_vbif.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_vbif.c @@ -2,6 +2,8 @@ /* Copyright (c) 2015-2018, The Linux Foundation. All rights reserved. */ +#include + #include "dpu_hwio.h" #include "dpu_hw_catalog.h" #include "dpu_hw_vbif.h" @@ -211,12 +213,13 @@ static void _setup_vbif_ops(struct dpu_hw_vbif_ops *ops, ops->set_write_gather_en = dpu_hw_set_write_gather_en; } -struct dpu_hw_vbif *dpu_hw_vbif_init(const struct dpu_vbif_cfg *cfg, - void __iomem *addr) +struct dpu_hw_vbif *dpu_hw_vbif_init(struct drm_device *dev, + const struct dpu_vbif_cfg *cfg, + void __iomem *addr) { struct dpu_hw_vbif *c; - c = kzalloc(sizeof(*c), GFP_KERNEL); + c = drmm_kzalloc(dev, sizeof(*c), GFP_KERNEL); if (!c) return ERR_PTR(-ENOMEM); @@ -234,8 +237,3 @@ struct dpu_hw_vbif *dpu_hw_vbif_init(const struct dpu_vbif_cfg *cfg, return c; } - -void dpu_hw_vbif_destroy(struct dpu_hw_vbif *vbif) -{ - kfree(vbif); -} diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_vbif.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_vbif.h index 7e10d2a172b4..e2b4307500e4 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_vbif.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_vbif.h @@ -108,12 +108,12 @@ struct dpu_hw_vbif { /** * dpu_hw_vbif_init() - Initializes the VBIF driver for the passed * VBIF catalog entry. + * @dev: Corresponding device for devres management * @cfg: VBIF catalog entry for which driver object is required * @addr: Mapped register io address of MDSS */ -struct dpu_hw_vbif *dpu_hw_vbif_init(const struct dpu_vbif_cfg *cfg, - void __iomem *addr); - -void dpu_hw_vbif_destroy(struct dpu_hw_vbif *vbif); +struct dpu_hw_vbif *dpu_hw_vbif_init(struct drm_device *dev, + const struct dpu_vbif_cfg *cfg, + void __iomem *addr); #endif /*_DPU_HW_VBIF_H */ diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c index 34d707db1d0c..a1753c54aaac 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c @@ -800,13 +800,8 @@ static void _dpu_kms_hw_destroy(struct dpu_kms *dpu_kms) /* safe to call these more than once during shutdown */ _dpu_kms_mmu_destroy(dpu_kms); - if (dpu_kms->catalog) { - for (i = 0; i < ARRAY_SIZE(dpu_kms->hw_vbif); i++) { - if (dpu_kms->hw_vbif[i]) { - dpu_hw_vbif_destroy(dpu_kms->hw_vbif[i]); - dpu_kms->hw_vbif[i] = NULL; - } - } + for (i = 0; i < ARRAY_SIZE(dpu_kms->hw_vbif); i++) { + dpu_kms->hw_vbif[i] = NULL; } if (dpu_kms->rm_init) @@ -1109,7 +1104,7 @@ static int dpu_kms_hw_init(struct msm_kms *kms) struct dpu_hw_vbif *hw; const struct dpu_vbif_cfg *vbif = &dpu_kms->catalog->vbif[i]; - hw = dpu_hw_vbif_init(vbif, dpu_kms->vbif[vbif->id]); + hw = dpu_hw_vbif_init(dev, vbif, dpu_kms->vbif[vbif->id]); if (IS_ERR(hw)) { rc = PTR_ERR(hw); DPU_ERROR("failed to init vbif %d: %d\n", vbif->id, rc); -- cgit v1.2.3 From 1e897dcc4c673b9d585c09ddcdbe7fab0934e64f Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 2 Dec 2023 00:18:37 +0300 Subject: drm/msm/dpu: use devres-managed allocation for MDP TOP Use devm_kzalloc to create MDP TOP structure. This allows us to remove corresponding kfree and drop dpu_hw_mdp_destroy() function. Reviewed-by: Jessica Zhang Signed-off-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/570047/ Link: https://lore.kernel.org/r/20231201211845.1026967-6-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.c | 17 +++++++---------- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.h | 8 +++++--- drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c | 5 ++--- 3 files changed, 14 insertions(+), 16 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.c index 24e734768a72..05e48cf4ec1d 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.c @@ -2,6 +2,8 @@ /* Copyright (c) 2015-2018, The Linux Foundation. All rights reserved. */ +#include + #include "dpu_hwio.h" #include "dpu_hw_catalog.h" #include "dpu_hw_top.h" @@ -247,16 +249,17 @@ static void _setup_mdp_ops(struct dpu_hw_mdp_ops *ops, ops->intf_audio_select = dpu_hw_intf_audio_select; } -struct dpu_hw_mdp *dpu_hw_mdptop_init(const struct dpu_mdp_cfg *cfg, - void __iomem *addr, - const struct dpu_mdss_cfg *m) +struct dpu_hw_mdp *dpu_hw_mdptop_init(struct drm_device *dev, + const struct dpu_mdp_cfg *cfg, + void __iomem *addr, + const struct dpu_mdss_cfg *m) { struct dpu_hw_mdp *mdp; if (!addr) return ERR_PTR(-EINVAL); - mdp = kzalloc(sizeof(*mdp), GFP_KERNEL); + mdp = drmm_kzalloc(dev, sizeof(*mdp), GFP_KERNEL); if (!mdp) return ERR_PTR(-ENOMEM); @@ -271,9 +274,3 @@ struct dpu_hw_mdp *dpu_hw_mdptop_init(const struct dpu_mdp_cfg *cfg, return mdp; } - -void dpu_hw_mdp_destroy(struct dpu_hw_mdp *mdp) -{ - kfree(mdp); -} - diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.h index 8b1463d2b2f0..6f3dc98087df 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.h @@ -145,13 +145,15 @@ struct dpu_hw_mdp { /** * dpu_hw_mdptop_init - initializes the top driver for the passed config + * @dev: Corresponding device for devres management * @cfg: MDP TOP configuration from catalog * @addr: Mapped register io address of MDP * @m: Pointer to mdss catalog data */ -struct dpu_hw_mdp *dpu_hw_mdptop_init(const struct dpu_mdp_cfg *cfg, - void __iomem *addr, - const struct dpu_mdss_cfg *m); +struct dpu_hw_mdp *dpu_hw_mdptop_init(struct drm_device *dev, + const struct dpu_mdp_cfg *cfg, + void __iomem *addr, + const struct dpu_mdss_cfg *m); void dpu_hw_mdp_destroy(struct dpu_hw_mdp *mdp); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c index a1753c54aaac..5fcb256302b2 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c @@ -810,8 +810,6 @@ static void _dpu_kms_hw_destroy(struct dpu_kms *dpu_kms) dpu_kms->catalog = NULL; - if (dpu_kms->hw_mdp) - dpu_hw_mdp_destroy(dpu_kms->hw_mdp); dpu_kms->hw_mdp = NULL; } @@ -1090,7 +1088,8 @@ static int dpu_kms_hw_init(struct msm_kms *kms) dpu_kms->rm_init = true; - dpu_kms->hw_mdp = dpu_hw_mdptop_init(dpu_kms->catalog->mdp, + dpu_kms->hw_mdp = dpu_hw_mdptop_init(dev, + dpu_kms->catalog->mdp, dpu_kms->mmio, dpu_kms->catalog); if (IS_ERR(dpu_kms->hw_mdp)) { -- cgit v1.2.3 From a106ed98af6848ef5810b12f5c9e2e0566f1d9c4 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 2 Dec 2023 00:18:38 +0300 Subject: drm/msm/dpu: use devres-managed allocation for HW blocks Use devm_kzalloc to create HW block structure. This allows us to remove corresponding kfree and drop all dpu_hw_*_destroy() functions as well as dpu_rm_destroy(), which becomes empty afterwards. Reviewed-by: Jessica Zhang Signed-off-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/570041/ Link: https://lore.kernel.org/r/20231201211845.1026967-7-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c | 19 +++--- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.h | 16 ++--- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c | 12 ++-- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.h | 10 ++- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc_1_2.c | 7 +- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c | 16 ++--- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.h | 12 ++-- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c | 16 ++--- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h | 13 ++-- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c | 14 ++-- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.h | 12 ++-- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_merge3d.c | 14 ++-- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_merge3d.h | 13 ++-- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.c | 15 ++--- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.h | 14 ++-- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c | 17 ++--- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h | 16 ++--- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c | 15 ++--- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h | 13 ++-- drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c | 8 +-- drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h | 1 - drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c | 90 ++++--------------------- drivers/gpu/drm/msm/disp/dpu1/dpu_rm.h | 11 +-- 23 files changed, 127 insertions(+), 247 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c index 86182c734606..e7b680a151d6 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c @@ -4,6 +4,9 @@ */ #include + +#include + #include "dpu_hwio.h" #include "dpu_hw_ctl.h" #include "dpu_kms.h" @@ -680,14 +683,15 @@ static void _setup_ctl_ops(struct dpu_hw_ctl_ops *ops, ops->set_active_pipes = dpu_hw_ctl_set_fetch_pipe_active; }; -struct dpu_hw_ctl *dpu_hw_ctl_init(const struct dpu_ctl_cfg *cfg, - void __iomem *addr, - u32 mixer_count, - const struct dpu_lm_cfg *mixer) +struct dpu_hw_ctl *dpu_hw_ctl_init(struct drm_device *dev, + const struct dpu_ctl_cfg *cfg, + void __iomem *addr, + u32 mixer_count, + const struct dpu_lm_cfg *mixer) { struct dpu_hw_ctl *c; - c = kzalloc(sizeof(*c), GFP_KERNEL); + c = drmm_kzalloc(dev, sizeof(*c), GFP_KERNEL); if (!c) return ERR_PTR(-ENOMEM); @@ -702,8 +706,3 @@ struct dpu_hw_ctl *dpu_hw_ctl_init(const struct dpu_ctl_cfg *cfg, return c; } - -void dpu_hw_ctl_destroy(struct dpu_hw_ctl *ctx) -{ - kfree(ctx); -} diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.h index 1c242298ff2e..279ebd8dfbff 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.h @@ -274,20 +274,16 @@ static inline struct dpu_hw_ctl *to_dpu_hw_ctl(struct dpu_hw_blk *hw) /** * dpu_hw_ctl_init() - Initializes the ctl_path hw driver object. * Should be called before accessing any ctl_path register. + * @dev: Corresponding device for devres management * @cfg: ctl_path catalog entry for which driver object is required * @addr: mapped register io address of MDP * @mixer_count: Number of mixers in @mixer * @mixer: Pointer to an array of Layer Mixers defined in the catalog */ -struct dpu_hw_ctl *dpu_hw_ctl_init(const struct dpu_ctl_cfg *cfg, - void __iomem *addr, - u32 mixer_count, - const struct dpu_lm_cfg *mixer); - -/** - * dpu_hw_ctl_destroy(): Destroys ctl driver context - * should be called to free the context - */ -void dpu_hw_ctl_destroy(struct dpu_hw_ctl *ctx); +struct dpu_hw_ctl *dpu_hw_ctl_init(struct drm_device *dev, + const struct dpu_ctl_cfg *cfg, + void __iomem *addr, + u32 mixer_count, + const struct dpu_lm_cfg *mixer); #endif /*_DPU_HW_CTL_H */ diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c index 509dbaa51d87..5e9aad1b2aa2 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c @@ -3,6 +3,8 @@ * Copyright (c) 2020-2022, Linaro Limited */ +#include + #include #include "dpu_kms.h" @@ -188,12 +190,13 @@ static void _setup_dsc_ops(struct dpu_hw_dsc_ops *ops, ops->dsc_bind_pingpong_blk = dpu_hw_dsc_bind_pingpong_blk; }; -struct dpu_hw_dsc *dpu_hw_dsc_init(const struct dpu_dsc_cfg *cfg, +struct dpu_hw_dsc *dpu_hw_dsc_init(struct drm_device *dev, + const struct dpu_dsc_cfg *cfg, void __iomem *addr) { struct dpu_hw_dsc *c; - c = kzalloc(sizeof(*c), GFP_KERNEL); + c = drmm_kzalloc(dev, sizeof(*c), GFP_KERNEL); if (!c) return ERR_PTR(-ENOMEM); @@ -206,8 +209,3 @@ struct dpu_hw_dsc *dpu_hw_dsc_init(const struct dpu_dsc_cfg *cfg, return c; } - -void dpu_hw_dsc_destroy(struct dpu_hw_dsc *dsc) -{ - kfree(dsc); -} diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.h index d5b597ab8c5c..989c88d2449b 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.h @@ -64,20 +64,24 @@ struct dpu_hw_dsc { /** * dpu_hw_dsc_init() - Initializes the DSC hw driver object. + * @dev: Corresponding device for devres management * @cfg: DSC catalog entry for which driver object is required * @addr: Mapped register io address of MDP * Return: Error code or allocated dpu_hw_dsc context */ -struct dpu_hw_dsc *dpu_hw_dsc_init(const struct dpu_dsc_cfg *cfg, - void __iomem *addr); +struct dpu_hw_dsc *dpu_hw_dsc_init(struct drm_device *dev, + const struct dpu_dsc_cfg *cfg, + void __iomem *addr); /** * dpu_hw_dsc_init_1_2() - initializes the v1.2 DSC hw driver object + * @dev: Corresponding device for devres management * @cfg: DSC catalog entry for which driver object is required * @addr: Mapped register io address of MDP * Returns: Error code or allocated dpu_hw_dsc context */ -struct dpu_hw_dsc *dpu_hw_dsc_init_1_2(const struct dpu_dsc_cfg *cfg, +struct dpu_hw_dsc *dpu_hw_dsc_init_1_2(struct drm_device *dev, + const struct dpu_dsc_cfg *cfg, void __iomem *addr); /** diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc_1_2.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc_1_2.c index 24fe1d98eb86..ba193b0376fe 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc_1_2.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc_1_2.c @@ -4,6 +4,8 @@ * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved */ +#include + #include #include "dpu_kms.h" @@ -367,12 +369,13 @@ static void _setup_dcs_ops_1_2(struct dpu_hw_dsc_ops *ops, ops->dsc_bind_pingpong_blk = dpu_hw_dsc_bind_pingpong_blk_1_2; } -struct dpu_hw_dsc *dpu_hw_dsc_init_1_2(const struct dpu_dsc_cfg *cfg, +struct dpu_hw_dsc *dpu_hw_dsc_init_1_2(struct drm_device *dev, + const struct dpu_dsc_cfg *cfg, void __iomem *addr) { struct dpu_hw_dsc *c; - c = kzalloc(sizeof(*c), GFP_KERNEL); + c = drmm_kzalloc(dev, sizeof(*c), GFP_KERNEL); if (!c) return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c index 9419b2209af8..b1da88e2935f 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c @@ -2,6 +2,8 @@ /* Copyright (c) 2015-2018, The Linux Foundation. All rights reserved. */ +#include + #include "dpu_hwio.h" #include "dpu_hw_catalog.h" #include "dpu_hw_lm.h" @@ -68,15 +70,16 @@ static void _setup_dspp_ops(struct dpu_hw_dspp *c, c->ops.setup_pcc = dpu_setup_dspp_pcc; } -struct dpu_hw_dspp *dpu_hw_dspp_init(const struct dpu_dspp_cfg *cfg, - void __iomem *addr) +struct dpu_hw_dspp *dpu_hw_dspp_init(struct drm_device *dev, + const struct dpu_dspp_cfg *cfg, + void __iomem *addr) { struct dpu_hw_dspp *c; if (!addr) return ERR_PTR(-EINVAL); - c = kzalloc(sizeof(*c), GFP_KERNEL); + c = drmm_kzalloc(dev, sizeof(*c), GFP_KERNEL); if (!c) return ERR_PTR(-ENOMEM); @@ -90,10 +93,3 @@ struct dpu_hw_dspp *dpu_hw_dspp_init(const struct dpu_dspp_cfg *cfg, return c; } - -void dpu_hw_dspp_destroy(struct dpu_hw_dspp *dspp) -{ - kfree(dspp); -} - - diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.h index bea965681330..3b435690b6cc 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.h @@ -81,18 +81,14 @@ static inline struct dpu_hw_dspp *to_dpu_hw_dspp(struct dpu_hw_blk *hw) /** * dpu_hw_dspp_init() - Initializes the DSPP hw driver object. * should be called once before accessing every DSPP. + * @dev: Corresponding device for devres management * @cfg: DSPP catalog entry for which driver object is required * @addr: Mapped register io address of MDP * Return: pointer to structure or ERR_PTR */ -struct dpu_hw_dspp *dpu_hw_dspp_init(const struct dpu_dspp_cfg *cfg, - void __iomem *addr); - -/** - * dpu_hw_dspp_destroy(): Destroys DSPP driver context - * @dspp: Pointer to DSPP driver context - */ -void dpu_hw_dspp_destroy(struct dpu_hw_dspp *dspp); +struct dpu_hw_dspp *dpu_hw_dspp_init(struct drm_device *dev, + const struct dpu_dspp_cfg *cfg, + void __iomem *addr); #endif /*_DPU_HW_DSPP_H */ diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c index e8b8908d3e12..0b6a0a7dcc39 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c @@ -12,6 +12,8 @@ #include +#include + #define INTF_TIMING_ENGINE_EN 0x000 #define INTF_CONFIG 0x004 #define INTF_HSYNC_CTL 0x008 @@ -527,8 +529,10 @@ static void dpu_hw_intf_program_intf_cmd_cfg(struct dpu_hw_intf *ctx, DPU_REG_WRITE(&ctx->hw, INTF_CONFIG2, intf_cfg2); } -struct dpu_hw_intf *dpu_hw_intf_init(const struct dpu_intf_cfg *cfg, - void __iomem *addr, const struct dpu_mdss_version *mdss_rev) +struct dpu_hw_intf *dpu_hw_intf_init(struct drm_device *dev, + const struct dpu_intf_cfg *cfg, + void __iomem *addr, + const struct dpu_mdss_version *mdss_rev) { struct dpu_hw_intf *c; @@ -537,7 +541,7 @@ struct dpu_hw_intf *dpu_hw_intf_init(const struct dpu_intf_cfg *cfg, return NULL; } - c = kzalloc(sizeof(*c), GFP_KERNEL); + c = drmm_kzalloc(dev, sizeof(*c), GFP_KERNEL); if (!c) return ERR_PTR(-ENOMEM); @@ -581,9 +585,3 @@ struct dpu_hw_intf *dpu_hw_intf_init(const struct dpu_intf_cfg *cfg, return c; } - -void dpu_hw_intf_destroy(struct dpu_hw_intf *intf) -{ - kfree(intf); -} - diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h index c539025c418b..215401bb042e 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h @@ -131,17 +131,14 @@ struct dpu_hw_intf { /** * dpu_hw_intf_init() - Initializes the INTF driver for the passed * interface catalog entry. + * @dev: Corresponding device for devres management * @cfg: interface catalog entry for which driver object is required * @addr: mapped register io address of MDP * @mdss_rev: dpu core's major and minor versions */ -struct dpu_hw_intf *dpu_hw_intf_init(const struct dpu_intf_cfg *cfg, - void __iomem *addr, const struct dpu_mdss_version *mdss_rev); - -/** - * dpu_hw_intf_destroy(): Destroys INTF driver context - * @intf: Pointer to INTF driver context - */ -void dpu_hw_intf_destroy(struct dpu_hw_intf *intf); +struct dpu_hw_intf *dpu_hw_intf_init(struct drm_device *dev, + const struct dpu_intf_cfg *cfg, + void __iomem *addr, + const struct dpu_mdss_version *mdss_rev); #endif /*_DPU_HW_INTF_H */ diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c index d1c3bd8379ea..25af52ab602f 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c @@ -4,6 +4,8 @@ * Copyright (c) 2015-2021, The Linux Foundation. All rights reserved. */ +#include + #include "dpu_kms.h" #include "dpu_hw_catalog.h" #include "dpu_hwio.h" @@ -156,8 +158,9 @@ static void _setup_mixer_ops(struct dpu_hw_lm_ops *ops, ops->collect_misr = dpu_hw_lm_collect_misr; } -struct dpu_hw_mixer *dpu_hw_lm_init(const struct dpu_lm_cfg *cfg, - void __iomem *addr) +struct dpu_hw_mixer *dpu_hw_lm_init(struct drm_device *dev, + const struct dpu_lm_cfg *cfg, + void __iomem *addr) { struct dpu_hw_mixer *c; @@ -166,7 +169,7 @@ struct dpu_hw_mixer *dpu_hw_lm_init(const struct dpu_lm_cfg *cfg, return NULL; } - c = kzalloc(sizeof(*c), GFP_KERNEL); + c = drmm_kzalloc(dev, sizeof(*c), GFP_KERNEL); if (!c) return ERR_PTR(-ENOMEM); @@ -180,8 +183,3 @@ struct dpu_hw_mixer *dpu_hw_lm_init(const struct dpu_lm_cfg *cfg, return c; } - -void dpu_hw_lm_destroy(struct dpu_hw_mixer *lm) -{ - kfree(lm); -} diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.h index 36992d046a53..8835fd106413 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.h @@ -95,16 +95,12 @@ static inline struct dpu_hw_mixer *to_dpu_hw_mixer(struct dpu_hw_blk *hw) /** * dpu_hw_lm_init() - Initializes the mixer hw driver object. * should be called once before accessing every mixer. + * @dev: Corresponding device for devres management * @cfg: mixer catalog entry for which driver object is required * @addr: mapped register io address of MDP */ -struct dpu_hw_mixer *dpu_hw_lm_init(const struct dpu_lm_cfg *cfg, - void __iomem *addr); - -/** - * dpu_hw_lm_destroy(): Destroys layer mixer driver context - * @lm: Pointer to LM driver context - */ -void dpu_hw_lm_destroy(struct dpu_hw_mixer *lm); +struct dpu_hw_mixer *dpu_hw_lm_init(struct drm_device *dev, + const struct dpu_lm_cfg *cfg, + void __iomem *addr); #endif /*_DPU_HW_LM_H */ diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_merge3d.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_merge3d.c index 90e0e05eff8d..ddfa40a959cb 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_merge3d.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_merge3d.c @@ -4,6 +4,8 @@ #include +#include + #include "dpu_hw_mdss.h" #include "dpu_hwio.h" #include "dpu_hw_catalog.h" @@ -37,12 +39,13 @@ static void _setup_merge_3d_ops(struct dpu_hw_merge_3d *c, c->ops.setup_3d_mode = dpu_hw_merge_3d_setup_3d_mode; }; -struct dpu_hw_merge_3d *dpu_hw_merge_3d_init(const struct dpu_merge_3d_cfg *cfg, - void __iomem *addr) +struct dpu_hw_merge_3d *dpu_hw_merge_3d_init(struct drm_device *dev, + const struct dpu_merge_3d_cfg *cfg, + void __iomem *addr) { struct dpu_hw_merge_3d *c; - c = kzalloc(sizeof(*c), GFP_KERNEL); + c = drmm_kzalloc(dev, sizeof(*c), GFP_KERNEL); if (!c) return ERR_PTR(-ENOMEM); @@ -55,8 +58,3 @@ struct dpu_hw_merge_3d *dpu_hw_merge_3d_init(const struct dpu_merge_3d_cfg *cfg, return c; } - -void dpu_hw_merge_3d_destroy(struct dpu_hw_merge_3d *hw) -{ - kfree(hw); -} diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_merge3d.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_merge3d.h index 19cec5e88722..c192f02ec1ab 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_merge3d.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_merge3d.h @@ -48,18 +48,13 @@ static inline struct dpu_hw_merge_3d *to_dpu_hw_merge_3d(struct dpu_hw_blk *hw) /** * dpu_hw_merge_3d_init() - Initializes the merge_3d driver for the passed * merge3d catalog entry. + * @dev: Corresponding device for devres management * @cfg: Pingpong catalog entry for which driver object is required * @addr: Mapped register io address of MDP * Return: Error code or allocated dpu_hw_merge_3d context */ -struct dpu_hw_merge_3d *dpu_hw_merge_3d_init(const struct dpu_merge_3d_cfg *cfg, - void __iomem *addr); - -/** - * dpu_hw_merge_3d_destroy - destroys merge_3d driver context - * should be called to free the context - * @pp: Pointer to PP driver context returned by dpu_hw_merge_3d_init - */ -void dpu_hw_merge_3d_destroy(struct dpu_hw_merge_3d *pp); +struct dpu_hw_merge_3d *dpu_hw_merge_3d_init(struct drm_device *dev, + const struct dpu_merge_3d_cfg *cfg, + void __iomem *addr); #endif /*_DPU_HW_MERGE3D_H */ diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.c index 057cac7f5d93..2db4c6fba37a 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.c @@ -4,6 +4,8 @@ #include +#include + #include "dpu_hw_mdss.h" #include "dpu_hwio.h" #include "dpu_hw_catalog.h" @@ -281,12 +283,14 @@ static int dpu_hw_pp_setup_dsc(struct dpu_hw_pingpong *pp) return 0; } -struct dpu_hw_pingpong *dpu_hw_pingpong_init(const struct dpu_pingpong_cfg *cfg, - void __iomem *addr, const struct dpu_mdss_version *mdss_rev) +struct dpu_hw_pingpong *dpu_hw_pingpong_init(struct drm_device *dev, + const struct dpu_pingpong_cfg *cfg, + void __iomem *addr, + const struct dpu_mdss_version *mdss_rev) { struct dpu_hw_pingpong *c; - c = kzalloc(sizeof(*c), GFP_KERNEL); + c = drmm_kzalloc(dev, sizeof(*c), GFP_KERNEL); if (!c) return ERR_PTR(-ENOMEM); @@ -317,8 +321,3 @@ struct dpu_hw_pingpong *dpu_hw_pingpong_init(const struct dpu_pingpong_cfg *cfg, return c; } - -void dpu_hw_pingpong_destroy(struct dpu_hw_pingpong *pp) -{ - kfree(pp); -} diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.h index 0d541ca5b056..a48b69fd79a3 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.h @@ -121,19 +121,15 @@ static inline struct dpu_hw_pingpong *to_dpu_hw_pingpong(struct dpu_hw_blk *hw) /** * dpu_hw_pingpong_init() - initializes the pingpong driver for the passed * pingpong catalog entry. + * @dev: Corresponding device for devres management * @cfg: Pingpong catalog entry for which driver object is required * @addr: Mapped register io address of MDP * @mdss_rev: dpu core's major and minor versions * Return: Error code or allocated dpu_hw_pingpong context */ -struct dpu_hw_pingpong *dpu_hw_pingpong_init(const struct dpu_pingpong_cfg *cfg, - void __iomem *addr, const struct dpu_mdss_version *mdss_rev); - -/** - * dpu_hw_pingpong_destroy - destroys pingpong driver context - * should be called to free the context - * @pp: Pointer to PP driver context returned by dpu_hw_pingpong_init - */ -void dpu_hw_pingpong_destroy(struct dpu_hw_pingpong *pp); +struct dpu_hw_pingpong *dpu_hw_pingpong_init(struct drm_device *dev, + const struct dpu_pingpong_cfg *cfg, + void __iomem *addr, + const struct dpu_mdss_version *mdss_rev); #endif /*_DPU_HW_PINGPONG_H */ diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c index 26307d9fc81d..0bf8a83e8df3 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c @@ -11,6 +11,7 @@ #include "msm_mdss.h" #include +#include #define DPU_FETCH_CONFIG_RESET_VALUE 0x00000087 @@ -669,16 +670,18 @@ int _dpu_hw_sspp_init_debugfs(struct dpu_hw_sspp *hw_pipe, struct dpu_kms *kms, } #endif -struct dpu_hw_sspp *dpu_hw_sspp_init(const struct dpu_sspp_cfg *cfg, - void __iomem *addr, const struct msm_mdss_data *mdss_data, - const struct dpu_mdss_version *mdss_rev) +struct dpu_hw_sspp *dpu_hw_sspp_init(struct drm_device *dev, + const struct dpu_sspp_cfg *cfg, + void __iomem *addr, + const struct msm_mdss_data *mdss_data, + const struct dpu_mdss_version *mdss_rev) { struct dpu_hw_sspp *hw_pipe; if (!addr) return ERR_PTR(-EINVAL); - hw_pipe = kzalloc(sizeof(*hw_pipe), GFP_KERNEL); + hw_pipe = drmm_kzalloc(dev, sizeof(*hw_pipe), GFP_KERNEL); if (!hw_pipe) return ERR_PTR(-ENOMEM); @@ -693,9 +696,3 @@ struct dpu_hw_sspp *dpu_hw_sspp_init(const struct dpu_sspp_cfg *cfg, return hw_pipe; } - -void dpu_hw_sspp_destroy(struct dpu_hw_sspp *ctx) -{ - kfree(ctx); -} - diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h index 28e7d53bd191..b7dc52312c39 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h @@ -318,21 +318,17 @@ struct dpu_kms; /** * dpu_hw_sspp_init() - Initializes the sspp hw driver object. * Should be called once before accessing every pipe. + * @dev: Corresponding device for devres management * @cfg: Pipe catalog entry for which driver object is required * @addr: Mapped register io address of MDP * @mdss_data: UBWC / MDSS configuration data * @mdss_rev: dpu core's major and minor versions */ -struct dpu_hw_sspp *dpu_hw_sspp_init(const struct dpu_sspp_cfg *cfg, - void __iomem *addr, const struct msm_mdss_data *mdss_data, - const struct dpu_mdss_version *mdss_rev); - -/** - * dpu_hw_sspp_destroy(): Destroys SSPP driver context - * should be called during Hw pipe cleanup. - * @ctx: Pointer to SSPP driver context returned by dpu_hw_sspp_init - */ -void dpu_hw_sspp_destroy(struct dpu_hw_sspp *ctx); +struct dpu_hw_sspp *dpu_hw_sspp_init(struct drm_device *dev, + const struct dpu_sspp_cfg *cfg, + void __iomem *addr, + const struct msm_mdss_data *mdss_data, + const struct dpu_mdss_version *mdss_rev); int _dpu_hw_sspp_init_debugfs(struct dpu_hw_sspp *hw_pipe, struct dpu_kms *kms, struct dentry *entry); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c index 9668fb97c047..ed0e80616129 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c @@ -3,6 +3,8 @@ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved */ +#include + #include "dpu_hw_mdss.h" #include "dpu_hwio.h" #include "dpu_hw_catalog.h" @@ -208,15 +210,17 @@ static void _setup_wb_ops(struct dpu_hw_wb_ops *ops, ops->setup_clk_force_ctrl = dpu_hw_wb_setup_clk_force_ctrl; } -struct dpu_hw_wb *dpu_hw_wb_init(const struct dpu_wb_cfg *cfg, - void __iomem *addr, const struct dpu_mdss_version *mdss_rev) +struct dpu_hw_wb *dpu_hw_wb_init(struct drm_device *dev, + const struct dpu_wb_cfg *cfg, + void __iomem *addr, + const struct dpu_mdss_version *mdss_rev) { struct dpu_hw_wb *c; if (!addr) return ERR_PTR(-EINVAL); - c = kzalloc(sizeof(*c), GFP_KERNEL); + c = drmm_kzalloc(dev, sizeof(*c), GFP_KERNEL); if (!c) return ERR_PTR(-ENOMEM); @@ -230,8 +234,3 @@ struct dpu_hw_wb *dpu_hw_wb_init(const struct dpu_wb_cfg *cfg, return c; } - -void dpu_hw_wb_destroy(struct dpu_hw_wb *hw_wb) -{ - kfree(hw_wb); -} diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h index 88792f450a92..e671796ea379 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h @@ -76,18 +76,15 @@ struct dpu_hw_wb { /** * dpu_hw_wb_init() - Initializes the writeback hw driver object. + * @dev: Corresponding device for devres management * @cfg: wb_path catalog entry for which driver object is required * @addr: mapped register io address of MDP * @mdss_rev: dpu core's major and minor versions * Return: Error code or allocated dpu_hw_wb context */ -struct dpu_hw_wb *dpu_hw_wb_init(const struct dpu_wb_cfg *cfg, - void __iomem *addr, const struct dpu_mdss_version *mdss_rev); - -/** - * dpu_hw_wb_destroy(): Destroy writeback hw driver object. - * @hw_wb: Pointer to writeback hw driver object - */ -void dpu_hw_wb_destroy(struct dpu_hw_wb *hw_wb); +struct dpu_hw_wb *dpu_hw_wb_init(struct drm_device *dev, + const struct dpu_wb_cfg *cfg, + void __iomem *addr, + const struct dpu_mdss_version *mdss_rev); #endif /*_DPU_HW_WB_H */ diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c index 5fcb256302b2..dc24fe4bb3b0 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c @@ -804,10 +804,6 @@ static void _dpu_kms_hw_destroy(struct dpu_kms *dpu_kms) dpu_kms->hw_vbif[i] = NULL; } - if (dpu_kms->rm_init) - dpu_rm_destroy(&dpu_kms->rm); - dpu_kms->rm_init = false; - dpu_kms->catalog = NULL; dpu_kms->hw_mdp = NULL; @@ -1080,14 +1076,12 @@ static int dpu_kms_hw_init(struct msm_kms *kms) goto err_pm_put; } - rc = dpu_rm_init(&dpu_kms->rm, dpu_kms->catalog, dpu_kms->mdss, dpu_kms->mmio); + rc = dpu_rm_init(dev, &dpu_kms->rm, dpu_kms->catalog, dpu_kms->mdss, dpu_kms->mmio); if (rc) { DPU_ERROR("rm init failed: %d\n", rc); goto err_pm_put; } - dpu_kms->rm_init = true; - dpu_kms->hw_mdp = dpu_hw_mdptop_init(dev, dpu_kms->catalog->mdp, dpu_kms->mmio, diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h index b6f53ca6e962..df6271017b80 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h @@ -88,7 +88,6 @@ struct dpu_kms { struct drm_private_obj global_state; struct dpu_rm rm; - bool rm_init; struct dpu_hw_vbif *hw_vbif[VBIF_MAX]; struct dpu_hw_mdp *hw_mdp; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c index 8759466e2f37..0bb28cf4a6cb 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c @@ -34,72 +34,8 @@ struct dpu_rm_requirements { struct msm_display_topology topology; }; -int dpu_rm_destroy(struct dpu_rm *rm) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(rm->dspp_blks); i++) { - struct dpu_hw_dspp *hw; - - if (rm->dspp_blks[i]) { - hw = to_dpu_hw_dspp(rm->dspp_blks[i]); - dpu_hw_dspp_destroy(hw); - } - } - for (i = 0; i < ARRAY_SIZE(rm->pingpong_blks); i++) { - struct dpu_hw_pingpong *hw; - - if (rm->pingpong_blks[i]) { - hw = to_dpu_hw_pingpong(rm->pingpong_blks[i]); - dpu_hw_pingpong_destroy(hw); - } - } - for (i = 0; i < ARRAY_SIZE(rm->merge_3d_blks); i++) { - struct dpu_hw_merge_3d *hw; - - if (rm->merge_3d_blks[i]) { - hw = to_dpu_hw_merge_3d(rm->merge_3d_blks[i]); - dpu_hw_merge_3d_destroy(hw); - } - } - for (i = 0; i < ARRAY_SIZE(rm->mixer_blks); i++) { - struct dpu_hw_mixer *hw; - - if (rm->mixer_blks[i]) { - hw = to_dpu_hw_mixer(rm->mixer_blks[i]); - dpu_hw_lm_destroy(hw); - } - } - for (i = 0; i < ARRAY_SIZE(rm->ctl_blks); i++) { - struct dpu_hw_ctl *hw; - - if (rm->ctl_blks[i]) { - hw = to_dpu_hw_ctl(rm->ctl_blks[i]); - dpu_hw_ctl_destroy(hw); - } - } - for (i = 0; i < ARRAY_SIZE(rm->hw_intf); i++) - dpu_hw_intf_destroy(rm->hw_intf[i]); - - for (i = 0; i < ARRAY_SIZE(rm->dsc_blks); i++) { - struct dpu_hw_dsc *hw; - - if (rm->dsc_blks[i]) { - hw = to_dpu_hw_dsc(rm->dsc_blks[i]); - dpu_hw_dsc_destroy(hw); - } - } - - for (i = 0; i < ARRAY_SIZE(rm->hw_wb); i++) - dpu_hw_wb_destroy(rm->hw_wb[i]); - - for (i = 0; i < ARRAY_SIZE(rm->hw_sspp); i++) - dpu_hw_sspp_destroy(rm->hw_sspp[i]); - - return 0; -} - -int dpu_rm_init(struct dpu_rm *rm, +int dpu_rm_init(struct drm_device *dev, + struct dpu_rm *rm, const struct dpu_mdss_cfg *cat, const struct msm_mdss_data *mdss_data, void __iomem *mmio) @@ -119,7 +55,7 @@ int dpu_rm_init(struct dpu_rm *rm, struct dpu_hw_mixer *hw; const struct dpu_lm_cfg *lm = &cat->mixer[i]; - hw = dpu_hw_lm_init(lm, mmio); + hw = dpu_hw_lm_init(dev, lm, mmio); if (IS_ERR(hw)) { rc = PTR_ERR(hw); DPU_ERROR("failed lm object creation: err %d\n", rc); @@ -132,7 +68,7 @@ int dpu_rm_init(struct dpu_rm *rm, struct dpu_hw_merge_3d *hw; const struct dpu_merge_3d_cfg *merge_3d = &cat->merge_3d[i]; - hw = dpu_hw_merge_3d_init(merge_3d, mmio); + hw = dpu_hw_merge_3d_init(dev, merge_3d, mmio); if (IS_ERR(hw)) { rc = PTR_ERR(hw); DPU_ERROR("failed merge_3d object creation: err %d\n", @@ -146,7 +82,7 @@ int dpu_rm_init(struct dpu_rm *rm, struct dpu_hw_pingpong *hw; const struct dpu_pingpong_cfg *pp = &cat->pingpong[i]; - hw = dpu_hw_pingpong_init(pp, mmio, cat->mdss_ver); + hw = dpu_hw_pingpong_init(dev, pp, mmio, cat->mdss_ver); if (IS_ERR(hw)) { rc = PTR_ERR(hw); DPU_ERROR("failed pingpong object creation: err %d\n", @@ -162,7 +98,7 @@ int dpu_rm_init(struct dpu_rm *rm, struct dpu_hw_intf *hw; const struct dpu_intf_cfg *intf = &cat->intf[i]; - hw = dpu_hw_intf_init(intf, mmio, cat->mdss_ver); + hw = dpu_hw_intf_init(dev, intf, mmio, cat->mdss_ver); if (IS_ERR(hw)) { rc = PTR_ERR(hw); DPU_ERROR("failed intf object creation: err %d\n", rc); @@ -175,7 +111,7 @@ int dpu_rm_init(struct dpu_rm *rm, struct dpu_hw_wb *hw; const struct dpu_wb_cfg *wb = &cat->wb[i]; - hw = dpu_hw_wb_init(wb, mmio, cat->mdss_ver); + hw = dpu_hw_wb_init(dev, wb, mmio, cat->mdss_ver); if (IS_ERR(hw)) { rc = PTR_ERR(hw); DPU_ERROR("failed wb object creation: err %d\n", rc); @@ -188,7 +124,7 @@ int dpu_rm_init(struct dpu_rm *rm, struct dpu_hw_ctl *hw; const struct dpu_ctl_cfg *ctl = &cat->ctl[i]; - hw = dpu_hw_ctl_init(ctl, mmio, cat->mixer_count, cat->mixer); + hw = dpu_hw_ctl_init(dev, ctl, mmio, cat->mixer_count, cat->mixer); if (IS_ERR(hw)) { rc = PTR_ERR(hw); DPU_ERROR("failed ctl object creation: err %d\n", rc); @@ -201,7 +137,7 @@ int dpu_rm_init(struct dpu_rm *rm, struct dpu_hw_dspp *hw; const struct dpu_dspp_cfg *dspp = &cat->dspp[i]; - hw = dpu_hw_dspp_init(dspp, mmio); + hw = dpu_hw_dspp_init(dev, dspp, mmio); if (IS_ERR(hw)) { rc = PTR_ERR(hw); DPU_ERROR("failed dspp object creation: err %d\n", rc); @@ -215,9 +151,9 @@ int dpu_rm_init(struct dpu_rm *rm, const struct dpu_dsc_cfg *dsc = &cat->dsc[i]; if (test_bit(DPU_DSC_HW_REV_1_2, &dsc->features)) - hw = dpu_hw_dsc_init_1_2(dsc, mmio); + hw = dpu_hw_dsc_init_1_2(dev, dsc, mmio); else - hw = dpu_hw_dsc_init(dsc, mmio); + hw = dpu_hw_dsc_init(dev, dsc, mmio); if (IS_ERR(hw)) { rc = PTR_ERR(hw); @@ -231,7 +167,7 @@ int dpu_rm_init(struct dpu_rm *rm, struct dpu_hw_sspp *hw; const struct dpu_sspp_cfg *sspp = &cat->sspp[i]; - hw = dpu_hw_sspp_init(sspp, mmio, mdss_data, cat->mdss_ver); + hw = dpu_hw_sspp_init(dev, sspp, mmio, mdss_data, cat->mdss_ver); if (IS_ERR(hw)) { rc = PTR_ERR(hw); DPU_ERROR("failed sspp object creation: err %d\n", rc); @@ -243,8 +179,6 @@ int dpu_rm_init(struct dpu_rm *rm, return 0; fail: - dpu_rm_destroy(rm); - return rc ? rc : -EFAULT; } diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.h index 2b551566cbf4..36752d837be4 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.h @@ -38,24 +38,19 @@ struct dpu_rm { /** * dpu_rm_init - Read hardware catalog and create reservation tracking objects * for all HW blocks. + * @dev: Corresponding device for devres management * @rm: DPU Resource Manager handle * @cat: Pointer to hardware catalog * @mdss_data: Pointer to MDSS / UBWC configuration * @mmio: mapped register io address of MDP * @Return: 0 on Success otherwise -ERROR */ -int dpu_rm_init(struct dpu_rm *rm, +int dpu_rm_init(struct drm_device *dev, + struct dpu_rm *rm, const struct dpu_mdss_cfg *cat, const struct msm_mdss_data *mdss_data, void __iomem *mmio); -/** - * dpu_rm_destroy - Free all memory allocated by dpu_rm_init - * @rm: DPU Resource Manager handle - * @Return: 0 on Success otherwise -ERROR - */ -int dpu_rm_destroy(struct dpu_rm *rm); - /** * dpu_rm_reserve - Given a CRTC->Encoder->Connector display chain, analyze * the use connections and user requirements, specified through related -- cgit v1.2.3 From b0311c1c4e069e2e15f2e2a32bccbcf628be42ec Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 2 Dec 2023 00:18:39 +0300 Subject: drm/msm/dpu: drop unused dpu_plane::lock The field dpu_plane::lock was never used for protecting any kind of data. Drop it now. Reviewed-by: Jessica Zhang Signed-off-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/570043/ Link: https://lore.kernel.org/r/20231201211845.1026967-8-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c index 4d7f3894f4d7..97d2211de0e3 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c @@ -78,8 +78,6 @@ static const uint32_t qcom_compressed_supported_formats[] = { struct dpu_plane { struct drm_plane base; - struct mutex lock; - enum dpu_sspp pipe; uint32_t color_fill; @@ -1230,8 +1228,6 @@ static void dpu_plane_destroy(struct drm_plane *plane) if (pstate->r_pipe.sspp) _dpu_plane_set_qos_ctrl(plane, &pstate->r_pipe, false); - mutex_destroy(&pdpu->lock); - /* this will destroy the states as well */ drm_plane_cleanup(plane); @@ -1491,8 +1487,6 @@ struct drm_plane *dpu_plane_init(struct drm_device *dev, /* success! finalize initialization */ drm_plane_helper_add(plane, &dpu_plane_helper_funcs); - mutex_init(&pdpu->lock); - DPU_DEBUG("%s created for pipe:%u id:%u\n", plane->name, pipe, plane->base.id); return plane; -- cgit v1.2.3 From bcc54a4c063a823cb75b71116762673b380a1ef6 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 2 Dec 2023 00:18:40 +0300 Subject: drm/msm/dpu: remove QoS teardown on plane destruction There is little point in disabling QoS on plane destruction: it happens during DPU device destruction process, after which there will be no running planes. Reviewed-by: Jessica Zhang Signed-off-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/570049/ Link: https://lore.kernel.org/r/20231201211845.1026967-9-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c index 97d2211de0e3..dae7d57f5881 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c @@ -1217,17 +1217,10 @@ static void dpu_plane_atomic_update(struct drm_plane *plane, static void dpu_plane_destroy(struct drm_plane *plane) { struct dpu_plane *pdpu = plane ? to_dpu_plane(plane) : NULL; - struct dpu_plane_state *pstate; DPU_DEBUG_PLANE(pdpu, "\n"); if (pdpu) { - pstate = to_dpu_plane_state(plane->state); - _dpu_plane_set_qos_ctrl(plane, &pstate->pipe, false); - - if (pstate->r_pipe.sspp) - _dpu_plane_set_qos_ctrl(plane, &pstate->r_pipe, false); - /* this will destroy the states as well */ drm_plane_cleanup(plane); -- cgit v1.2.3 From 0e00f9af95bbce1f1d2f193d08e80b446329dd57 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 2 Dec 2023 00:18:41 +0300 Subject: drm/msm/dpu: use drmm-managed allocation for dpu_plane Change struct dpu_plane allocation to use drmm_universal_plane_alloc(). This removes the need to perform any actions on plane destruction. Reviewed-by: Jessica Zhang Signed-off-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/570042/ Link: https://lore.kernel.org/r/20231201211845.1026967-10-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c | 46 +++++++------------------------ 1 file changed, 10 insertions(+), 36 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c index dae7d57f5881..3235ab132540 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c @@ -1214,20 +1214,6 @@ static void dpu_plane_atomic_update(struct drm_plane *plane, } } -static void dpu_plane_destroy(struct drm_plane *plane) -{ - struct dpu_plane *pdpu = plane ? to_dpu_plane(plane) : NULL; - - DPU_DEBUG_PLANE(pdpu, "\n"); - - if (pdpu) { - /* this will destroy the states as well */ - drm_plane_cleanup(plane); - - kfree(pdpu); - } -} - static void dpu_plane_destroy_state(struct drm_plane *plane, struct drm_plane_state *state) { @@ -1397,7 +1383,6 @@ static bool dpu_plane_format_mod_supported(struct drm_plane *plane, static const struct drm_plane_funcs dpu_plane_funcs = { .update_plane = drm_atomic_helper_update_plane, .disable_plane = drm_atomic_helper_disable_plane, - .destroy = dpu_plane_destroy, .reset = dpu_plane_reset, .atomic_duplicate_state = dpu_plane_duplicate_state, .atomic_destroy_state = dpu_plane_destroy_state, @@ -1425,35 +1410,28 @@ struct drm_plane *dpu_plane_init(struct drm_device *dev, struct dpu_hw_sspp *pipe_hw; uint32_t num_formats; uint32_t supported_rotations; - int ret = -EINVAL; - - /* create and zero local structure */ - pdpu = kzalloc(sizeof(*pdpu), GFP_KERNEL); - if (!pdpu) { - DPU_ERROR("[%u]failed to allocate local plane struct\n", pipe); - ret = -ENOMEM; - return ERR_PTR(ret); - } - - /* cache local stuff for later */ - plane = &pdpu->base; - pdpu->pipe = pipe; + int ret; /* initialize underlying h/w driver */ pipe_hw = dpu_rm_get_sspp(&kms->rm, pipe); if (!pipe_hw || !pipe_hw->cap || !pipe_hw->cap->sblk) { DPU_ERROR("[%u]SSPP is invalid\n", pipe); - goto clean_plane; + return ERR_PTR(-EINVAL); } format_list = pipe_hw->cap->sblk->format_list; num_formats = pipe_hw->cap->sblk->num_formats; - ret = drm_universal_plane_init(dev, plane, 0xff, &dpu_plane_funcs, + pdpu = drmm_universal_plane_alloc(dev, struct dpu_plane, base, + 0xff, &dpu_plane_funcs, format_list, num_formats, supported_format_modifiers, type, NULL); - if (ret) - goto clean_plane; + if (IS_ERR(pdpu)) + return ERR_CAST(pdpu); + + /* cache local stuff for later */ + plane = &pdpu->base; + pdpu->pipe = pipe; pdpu->catalog = kms->catalog; @@ -1483,8 +1461,4 @@ struct drm_plane *dpu_plane_init(struct drm_device *dev, DPU_DEBUG("%s created for pipe:%u id:%u\n", plane->name, pipe, plane->base.id); return plane; - -clean_plane: - kfree(pdpu); - return ERR_PTR(ret); } -- cgit v1.2.3 From 3637af92de2b1f51a4ecd5e21ada2d62d3a5a6b5 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 2 Dec 2023 00:18:42 +0300 Subject: drm/msm/dpu: use drmm-managed allocation for dpu_crtc Change struct dpu_crtc allocation to use drmm_crtc_alloc_with_planes(). This removes the need to perform any actions on CRTC destruction. Reviewed-by: Jessica Zhang Signed-off-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/570054/ Link: https://lore.kernel.org/r/20231201211845.1026967-11-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c index 3c475f8042b0..a798c10036e1 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c @@ -51,17 +51,6 @@ static struct dpu_kms *_dpu_crtc_get_kms(struct drm_crtc *crtc) return to_dpu_kms(priv->kms); } -static void dpu_crtc_destroy(struct drm_crtc *crtc) -{ - struct dpu_crtc *dpu_crtc = to_dpu_crtc(crtc); - - if (!crtc) - return; - - drm_crtc_cleanup(crtc); - kfree(dpu_crtc); -} - static struct drm_encoder *get_encoder_from_crtc(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; @@ -1435,7 +1424,6 @@ static int dpu_crtc_late_register(struct drm_crtc *crtc) static const struct drm_crtc_funcs dpu_crtc_funcs = { .set_config = drm_atomic_helper_set_config, - .destroy = dpu_crtc_destroy, .page_flip = drm_atomic_helper_page_flip, .reset = dpu_crtc_reset, .atomic_duplicate_state = dpu_crtc_duplicate_state, @@ -1469,9 +1457,13 @@ struct drm_crtc *dpu_crtc_init(struct drm_device *dev, struct drm_plane *plane, struct dpu_crtc *dpu_crtc; int i, ret; - dpu_crtc = kzalloc(sizeof(*dpu_crtc), GFP_KERNEL); - if (!dpu_crtc) - return ERR_PTR(-ENOMEM); + dpu_crtc = drmm_crtc_alloc_with_planes(dev, struct dpu_crtc, base, + plane, cursor, + &dpu_crtc_funcs, + NULL); + + if (IS_ERR(dpu_crtc)) + return ERR_CAST(dpu_crtc); crtc = &dpu_crtc->base; crtc->dev = dev; @@ -1491,9 +1483,6 @@ struct drm_crtc *dpu_crtc_init(struct drm_device *dev, struct drm_plane *plane, dpu_crtc_frame_event_work); } - drm_crtc_init_with_planes(dev, crtc, plane, cursor, &dpu_crtc_funcs, - NULL); - drm_crtc_helper_add(crtc, &dpu_crtc_helper_funcs); if (dpu_kms->catalog->dspp_count) -- cgit v1.2.3 From 73169b45e1ed296b4357a694f5fa586ac0643ac1 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 2 Dec 2023 00:18:43 +0300 Subject: drm/msm/dpu: use drmm-managed allocation for dpu_encoder_phys Change struct allocation of encoder's phys backend data to use drmm_kzalloc(). This removes the need to perform any actions on encoder destruction. Reviewed-by: Jessica Zhang Signed-off-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/570051/ Link: https://lore.kernel.org/r/20231201211845.1026967-12-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 9 +++++---- drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h | 8 +++++--- .../gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c | 15 ++++----------- .../gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c | 13 ++++--------- drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c | 21 ++++----------------- 5 files changed, 22 insertions(+), 44 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index 1c3560e1c625..cd2e153860b5 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -2179,6 +2179,7 @@ static void dpu_encoder_early_unregister(struct drm_encoder *encoder) } static int dpu_encoder_virt_add_phys_encs( + struct drm_device *dev, struct msm_display_info *disp_info, struct dpu_encoder_virt *dpu_enc, struct dpu_enc_phys_init_params *params) @@ -2200,7 +2201,7 @@ static int dpu_encoder_virt_add_phys_encs( if (disp_info->intf_type == INTF_WB) { - enc = dpu_encoder_phys_wb_init(params); + enc = dpu_encoder_phys_wb_init(dev, params); if (IS_ERR(enc)) { DPU_ERROR_ENC(dpu_enc, "failed to init wb enc: %ld\n", @@ -2211,7 +2212,7 @@ static int dpu_encoder_virt_add_phys_encs( dpu_enc->phys_encs[dpu_enc->num_phys_encs] = enc; ++dpu_enc->num_phys_encs; } else if (disp_info->is_cmd_mode) { - enc = dpu_encoder_phys_cmd_init(params); + enc = dpu_encoder_phys_cmd_init(dev, params); if (IS_ERR(enc)) { DPU_ERROR_ENC(dpu_enc, "failed to init cmd enc: %ld\n", @@ -2222,7 +2223,7 @@ static int dpu_encoder_virt_add_phys_encs( dpu_enc->phys_encs[dpu_enc->num_phys_encs] = enc; ++dpu_enc->num_phys_encs; } else { - enc = dpu_encoder_phys_vid_init(params); + enc = dpu_encoder_phys_vid_init(dev, params); if (IS_ERR(enc)) { DPU_ERROR_ENC(dpu_enc, "failed to init vid enc: %ld\n", @@ -2311,7 +2312,7 @@ static int dpu_encoder_setup_display(struct dpu_encoder_virt *dpu_enc, break; } - ret = dpu_encoder_virt_add_phys_encs(disp_info, + ret = dpu_encoder_virt_add_phys_encs(dpu_kms->dev, disp_info, dpu_enc, &phys_params); if (ret) { DPU_ERROR_ENC(dpu_enc, "failed to add phys encs\n"); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h index 6f04c3d56e77..5dc53b65040e 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h @@ -281,22 +281,24 @@ struct dpu_encoder_wait_info { * @p: Pointer to init params structure * Return: Error code or newly allocated encoder */ -struct dpu_encoder_phys *dpu_encoder_phys_vid_init( +struct dpu_encoder_phys *dpu_encoder_phys_vid_init(struct drm_device *dev, struct dpu_enc_phys_init_params *p); /** * dpu_encoder_phys_cmd_init - Construct a new command mode physical encoder + * @dev: Corresponding device for devres management * @p: Pointer to init params structure * Return: Error code or newly allocated encoder */ -struct dpu_encoder_phys *dpu_encoder_phys_cmd_init( +struct dpu_encoder_phys *dpu_encoder_phys_cmd_init(struct drm_device *dev, struct dpu_enc_phys_init_params *p); /** * dpu_encoder_phys_wb_init - initialize writeback encoder + * @dev: Corresponding device for devres management * @init: Pointer to init info structure with initialization params */ -struct dpu_encoder_phys *dpu_encoder_phys_wb_init( +struct dpu_encoder_phys *dpu_encoder_phys_wb_init(struct drm_device *dev, struct dpu_enc_phys_init_params *p); /** diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c index be185fe69793..d24f45d1f654 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c @@ -13,6 +13,8 @@ #include "dpu_trace.h" #include "disp/msm_disp_snapshot.h" +#include + #define DPU_DEBUG_CMDENC(e, fmt, ...) DPU_DEBUG("enc%d intf%d " fmt, \ (e) && (e)->base.parent ? \ (e)->base.parent->base.id : -1, \ @@ -558,14 +560,6 @@ static void dpu_encoder_phys_cmd_disable(struct dpu_encoder_phys *phys_enc) phys_enc->enable_state = DPU_ENC_DISABLED; } -static void dpu_encoder_phys_cmd_destroy(struct dpu_encoder_phys *phys_enc) -{ - struct dpu_encoder_phys_cmd *cmd_enc = - to_dpu_encoder_phys_cmd(phys_enc); - - kfree(cmd_enc); -} - static void dpu_encoder_phys_cmd_prepare_for_kickoff( struct dpu_encoder_phys *phys_enc) { @@ -731,7 +725,6 @@ static void dpu_encoder_phys_cmd_init_ops( ops->atomic_mode_set = dpu_encoder_phys_cmd_atomic_mode_set; ops->enable = dpu_encoder_phys_cmd_enable; ops->disable = dpu_encoder_phys_cmd_disable; - ops->destroy = dpu_encoder_phys_cmd_destroy; ops->control_vblank_irq = dpu_encoder_phys_cmd_control_vblank_irq; ops->wait_for_commit_done = dpu_encoder_phys_cmd_wait_for_commit_done; ops->prepare_for_kickoff = dpu_encoder_phys_cmd_prepare_for_kickoff; @@ -746,7 +739,7 @@ static void dpu_encoder_phys_cmd_init_ops( ops->get_line_count = dpu_encoder_phys_cmd_get_line_count; } -struct dpu_encoder_phys *dpu_encoder_phys_cmd_init( +struct dpu_encoder_phys *dpu_encoder_phys_cmd_init(struct drm_device *dev, struct dpu_enc_phys_init_params *p) { struct dpu_encoder_phys *phys_enc = NULL; @@ -754,7 +747,7 @@ struct dpu_encoder_phys *dpu_encoder_phys_cmd_init( DPU_DEBUG("intf\n"); - cmd_enc = kzalloc(sizeof(*cmd_enc), GFP_KERNEL); + cmd_enc = drmm_kzalloc(dev, sizeof(*cmd_enc), GFP_KERNEL); if (!cmd_enc) { DPU_ERROR("failed to allocate\n"); return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c index a01fda711883..69bc1b2e514c 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c @@ -11,6 +11,8 @@ #include "dpu_trace.h" #include "disp/msm_disp_snapshot.h" +#include + #define DPU_DEBUG_VIDENC(e, fmt, ...) DPU_DEBUG("enc%d intf%d " fmt, \ (e) && (e)->parent ? \ (e)->parent->base.id : -1, \ @@ -438,12 +440,6 @@ skip_flush: phys_enc->enable_state = DPU_ENC_ENABLING; } -static void dpu_encoder_phys_vid_destroy(struct dpu_encoder_phys *phys_enc) -{ - DPU_DEBUG_VIDENC(phys_enc, "\n"); - kfree(phys_enc); -} - static int dpu_encoder_phys_vid_wait_for_vblank( struct dpu_encoder_phys *phys_enc) { @@ -681,7 +677,6 @@ static void dpu_encoder_phys_vid_init_ops(struct dpu_encoder_phys_ops *ops) ops->atomic_mode_set = dpu_encoder_phys_vid_atomic_mode_set; ops->enable = dpu_encoder_phys_vid_enable; ops->disable = dpu_encoder_phys_vid_disable; - ops->destroy = dpu_encoder_phys_vid_destroy; ops->control_vblank_irq = dpu_encoder_phys_vid_control_vblank_irq; ops->wait_for_commit_done = dpu_encoder_phys_vid_wait_for_commit_done; ops->wait_for_vblank = dpu_encoder_phys_vid_wait_for_vblank; @@ -694,7 +689,7 @@ static void dpu_encoder_phys_vid_init_ops(struct dpu_encoder_phys_ops *ops) ops->get_frame_count = dpu_encoder_phys_vid_get_frame_count; } -struct dpu_encoder_phys *dpu_encoder_phys_vid_init( +struct dpu_encoder_phys *dpu_encoder_phys_vid_init(struct drm_device *dev, struct dpu_enc_phys_init_params *p) { struct dpu_encoder_phys *phys_enc = NULL; @@ -704,7 +699,7 @@ struct dpu_encoder_phys *dpu_encoder_phys_vid_init( return ERR_PTR(-EINVAL); } - phys_enc = kzalloc(sizeof(*phys_enc), GFP_KERNEL); + phys_enc = drmm_kzalloc(dev, sizeof(*phys_enc), GFP_KERNEL); if (!phys_enc) { DPU_ERROR("failed to create encoder due to memory allocation error\n"); return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c index 0b6a761d68b7..bb94909caa25 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c @@ -8,6 +8,7 @@ #include #include +#include #include "dpu_encoder_phys.h" #include "dpu_formats.h" @@ -580,20 +581,6 @@ static void dpu_encoder_phys_wb_disable(struct dpu_encoder_phys *phys_enc) phys_enc->enable_state = DPU_ENC_DISABLED; } -/** - * dpu_encoder_phys_wb_destroy - destroy writeback encoder - * @phys_enc: Pointer to physical encoder - */ -static void dpu_encoder_phys_wb_destroy(struct dpu_encoder_phys *phys_enc) -{ - if (!phys_enc) - return; - - DPU_DEBUG("[wb:%d]\n", phys_enc->hw_wb->idx - WB_0); - - kfree(phys_enc); -} - static void dpu_encoder_phys_wb_prepare_wb_job(struct dpu_encoder_phys *phys_enc, struct drm_writeback_job *job) { @@ -689,7 +676,6 @@ static void dpu_encoder_phys_wb_init_ops(struct dpu_encoder_phys_ops *ops) ops->atomic_mode_set = dpu_encoder_phys_wb_atomic_mode_set; ops->enable = dpu_encoder_phys_wb_enable; ops->disable = dpu_encoder_phys_wb_disable; - ops->destroy = dpu_encoder_phys_wb_destroy; ops->atomic_check = dpu_encoder_phys_wb_atomic_check; ops->wait_for_commit_done = dpu_encoder_phys_wb_wait_for_commit_done; ops->prepare_for_kickoff = dpu_encoder_phys_wb_prepare_for_kickoff; @@ -705,9 +691,10 @@ static void dpu_encoder_phys_wb_init_ops(struct dpu_encoder_phys_ops *ops) /** * dpu_encoder_phys_wb_init - initialize writeback encoder + * @dev: Corresponding device for devres management * @p: Pointer to init info structure with initialization params */ -struct dpu_encoder_phys *dpu_encoder_phys_wb_init( +struct dpu_encoder_phys *dpu_encoder_phys_wb_init(struct drm_device *dev, struct dpu_enc_phys_init_params *p) { struct dpu_encoder_phys *phys_enc = NULL; @@ -720,7 +707,7 @@ struct dpu_encoder_phys *dpu_encoder_phys_wb_init( return ERR_PTR(-EINVAL); } - wb_enc = kzalloc(sizeof(*wb_enc), GFP_KERNEL); + wb_enc = drmm_kzalloc(dev, sizeof(*wb_enc), GFP_KERNEL); if (!wb_enc) { DPU_ERROR("failed to allocate wb phys_enc enc\n"); return ERR_PTR(-ENOMEM); -- cgit v1.2.3 From 3285f4acb23c6ea611583fe32c4ad231daf15a08 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 2 Dec 2023 00:18:44 +0300 Subject: drm/msm/dpu: drop dpu_encoder_phys_ops::destroy Drop the dpu_encoder_phys_ops' destroy() callback. No phys backend implements it anymore, so it is useless. Reviewed-by: Jessica Zhang Signed-off-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/570058/ Link: https://lore.kernel.org/r/20231201211845.1026967-13-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 18 ------------------ drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h | 2 -- 2 files changed, 20 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index cd2e153860b5..6167aa664b30 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -455,24 +455,6 @@ static void dpu_encoder_destroy(struct drm_encoder *drm_enc) dpu_enc = to_dpu_encoder_virt(drm_enc); DPU_DEBUG_ENC(dpu_enc, "\n"); - mutex_lock(&dpu_enc->enc_lock); - - for (i = 0; i < dpu_enc->num_phys_encs; i++) { - struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i]; - - if (phys->ops.destroy) { - phys->ops.destroy(phys); - --dpu_enc->num_phys_encs; - dpu_enc->phys_encs[i] = NULL; - } - } - - if (dpu_enc->num_phys_encs) - DPU_ERROR_ENC(dpu_enc, "expected 0 num_phys_encs not %d\n", - dpu_enc->num_phys_encs); - dpu_enc->num_phys_encs = 0; - mutex_unlock(&dpu_enc->enc_lock); - drm_encoder_cleanup(drm_enc); mutex_destroy(&dpu_enc->enc_lock); } diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h index 5dc53b65040e..b6b48e2c63ef 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h @@ -72,7 +72,6 @@ struct dpu_encoder_phys; * @enable: DRM Call. Enable a DRM mode. * @disable: DRM Call. Disable mode. * @atomic_check: DRM Call. Atomic check new DRM state. - * @destroy: DRM Call. Destroy and release resources. * @control_vblank_irq Register/Deregister for VBLANK IRQ * @wait_for_commit_done: Wait for hardware to have flushed the * current pending frames to hardware @@ -102,7 +101,6 @@ struct dpu_encoder_phys_ops { int (*atomic_check)(struct dpu_encoder_phys *encoder, struct drm_crtc_state *crtc_state, struct drm_connector_state *conn_state); - void (*destroy)(struct dpu_encoder_phys *encoder); int (*control_vblank_irq)(struct dpu_encoder_phys *enc, bool enable); int (*wait_for_commit_done)(struct dpu_encoder_phys *phys_enc); int (*wait_for_tx_complete)(struct dpu_encoder_phys *phys_enc); -- cgit v1.2.3 From cd42c56d9c0bb6007939408b4fbc62bbeccc9037 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 2 Dec 2023 00:18:45 +0300 Subject: drm/msm/dpu: use drmm-managed allocation for dpu_encoder_virt It is incorrect to use devm-managed memory allocations for DRM data structures exposed to userspace. They should use drmm_ allocations. Change struct dpu_encoder allocation to use drmm_encoder_alloc(). This removes the need to perform any actions on encoder destruction. Reviewed-by: Jessica Zhang Signed-off-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/570053/ Link: https://lore.kernel.org/r/20231201211845.1026967-14-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 50 ++++++----------------------- 1 file changed, 10 insertions(+), 40 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index 6167aa664b30..aa1a1646b322 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -442,23 +442,6 @@ int dpu_encoder_get_linecount(struct drm_encoder *drm_enc) return linecount; } -static void dpu_encoder_destroy(struct drm_encoder *drm_enc) -{ - struct dpu_encoder_virt *dpu_enc = NULL; - int i = 0; - - if (!drm_enc) { - DPU_ERROR("invalid encoder\n"); - return; - } - - dpu_enc = to_dpu_encoder_virt(drm_enc); - DPU_DEBUG_ENC(dpu_enc, "\n"); - - drm_encoder_cleanup(drm_enc); - mutex_destroy(&dpu_enc->enc_lock); -} - void dpu_encoder_helper_split_config( struct dpu_encoder_phys *phys_enc, enum dpu_intf interface) @@ -2346,7 +2329,6 @@ static const struct drm_encoder_helper_funcs dpu_encoder_helper_funcs = { }; static const struct drm_encoder_funcs dpu_encoder_funcs = { - .destroy = dpu_encoder_destroy, .late_register = dpu_encoder_late_register, .early_unregister = dpu_encoder_early_unregister, }; @@ -2357,20 +2339,13 @@ struct drm_encoder *dpu_encoder_init(struct drm_device *dev, { struct msm_drm_private *priv = dev->dev_private; struct dpu_kms *dpu_kms = to_dpu_kms(priv->kms); - struct drm_encoder *drm_enc = NULL; - struct dpu_encoder_virt *dpu_enc = NULL; - int ret = 0; + struct dpu_encoder_virt *dpu_enc; + int ret; - dpu_enc = devm_kzalloc(dev->dev, sizeof(*dpu_enc), GFP_KERNEL); - if (!dpu_enc) - return ERR_PTR(-ENOMEM); - - ret = drm_encoder_init(dev, &dpu_enc->base, &dpu_encoder_funcs, - drm_enc_mode, NULL); - if (ret) { - devm_kfree(dev->dev, dpu_enc); - return ERR_PTR(ret); - } + dpu_enc = drmm_encoder_alloc(dev, struct dpu_encoder_virt, base, + &dpu_encoder_funcs, drm_enc_mode, NULL); + if (IS_ERR(dpu_enc)) + return ERR_CAST(dpu_enc); drm_encoder_helper_add(&dpu_enc->base, &dpu_encoder_helper_funcs); @@ -2380,8 +2355,10 @@ struct drm_encoder *dpu_encoder_init(struct drm_device *dev, mutex_init(&dpu_enc->rc_lock); ret = dpu_encoder_setup_display(dpu_enc, dpu_kms, disp_info); - if (ret) - goto fail; + if (ret) { + DPU_ERROR("failed to setup encoder\n"); + return ERR_PTR(-ENOMEM); + } atomic_set(&dpu_enc->frame_done_timeout_ms, 0); atomic_set(&dpu_enc->frame_done_timeout_cnt, 0); @@ -2397,13 +2374,6 @@ struct drm_encoder *dpu_encoder_init(struct drm_device *dev, DPU_DEBUG_ENC(dpu_enc, "created\n"); return &dpu_enc->base; - -fail: - DPU_ERROR("failed to create encoder\n"); - if (drm_enc) - dpu_encoder_destroy(drm_enc); - - return ERR_PTR(ret); } int dpu_encoder_wait_for_event(struct drm_encoder *drm_enc, -- cgit v1.2.3 From e843ca2f30e630675e2d2a75c96f4844f2854430 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sun, 3 Dec 2023 03:24:37 +0300 Subject: drm/msm/dpu: correct clk bit for WB2 block On sc7280 there are two clk bits for WB2: vbif_cli and clk_ctrl. While programming the VBIF params of WB, the driver should be toggling the former bit, while the sc7180_mdp, sc7280_mdp and sm8250_mdp structs list the latter one. Correct that to ensure proper programming sequence for WB2 on these platforms. Fixes: 255f056181ac ("drm/msm/dpu: sc7180: add missing WB2 clock control") Fixes: 3ce166380567 ("drm/msm/dpu: add writeback support for sc7280") Fixes: 53324b99bd7b ("drm/msm/dpu: add writeback blocks to the sm8250 DPU catalog") Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Tested-by: Paloma Arellano Patchwork: https://patchwork.freedesktop.org/patch/570185/ Link: https://lore.kernel.org/r/20231203002437.1291595-1-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h | 2 +- drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h | 2 +- drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h index 9acf07108899..2359c16e9206 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h @@ -31,7 +31,7 @@ static const struct dpu_mdp_cfg sm8250_mdp = { [DPU_CLK_CTRL_DMA2] = { .reg_off = 0x2bc, .bit_off = 8 }, [DPU_CLK_CTRL_DMA3] = { .reg_off = 0x2c4, .bit_off = 8 }, [DPU_CLK_CTRL_REG_DMA] = { .reg_off = 0x2bc, .bit_off = 20 }, - [DPU_CLK_CTRL_WB2] = { .reg_off = 0x3b8, .bit_off = 24 }, + [DPU_CLK_CTRL_WB2] = { .reg_off = 0x2bc, .bit_off = 16 }, }, }; diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h index 783269c6ead1..bcfedfc8251a 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h @@ -24,7 +24,7 @@ static const struct dpu_mdp_cfg sc7180_mdp = { [DPU_CLK_CTRL_DMA0] = { .reg_off = 0x2ac, .bit_off = 8 }, [DPU_CLK_CTRL_DMA1] = { .reg_off = 0x2b4, .bit_off = 8 }, [DPU_CLK_CTRL_DMA2] = { .reg_off = 0x2c4, .bit_off = 8 }, - [DPU_CLK_CTRL_WB2] = { .reg_off = 0x3b8, .bit_off = 24 }, + [DPU_CLK_CTRL_WB2] = { .reg_off = 0x2bc, .bit_off = 16 }, }, }; diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h index 93564e9e5fa5..209675de6742 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h @@ -24,7 +24,7 @@ static const struct dpu_mdp_cfg sc7280_mdp = { [DPU_CLK_CTRL_DMA0] = { .reg_off = 0x2ac, .bit_off = 8 }, [DPU_CLK_CTRL_DMA1] = { .reg_off = 0x2b4, .bit_off = 8 }, [DPU_CLK_CTRL_DMA2] = { .reg_off = 0x2c4, .bit_off = 8 }, - [DPU_CLK_CTRL_WB2] = { .reg_off = 0x3b8, .bit_off = 24 }, + [DPU_CLK_CTRL_WB2] = { .reg_off = 0x2bc, .bit_off = 16 }, }, }; -- cgit v1.2.3 From e0f04e41e8eedd4e5a1275f2318df7e1841855f2 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 4 Dec 2023 09:32:33 +0100 Subject: drm/atomic-helpers: Invoke end_fb_access while owning plane state Invoke drm_plane_helper_funcs.end_fb_access before drm_atomic_helper_commit_hw_done(). The latter function hands over ownership of the plane state to the following commit, which might free it. Releasing resources in end_fb_access then operates on undefined state. This bug has been observed with non-blocking commits when they are being queued up quickly. Here is an example stack trace from the bug report. The plane state has been free'd already, so the pages for drm_gem_fb_vunmap() are gone. Unable to handle kernel paging request at virtual address 0000000100000049 [...] drm_gem_fb_vunmap+0x18/0x74 drm_gem_end_shadow_fb_access+0x1c/0x2c drm_atomic_helper_cleanup_planes+0x58/0xd8 drm_atomic_helper_commit_tail+0x90/0xa0 commit_tail+0x15c/0x188 commit_work+0x14/0x20 Fix this by running end_fb_access immediately after updating all planes in drm_atomic_helper_commit_planes(). The existing clean-up helper drm_atomic_helper_cleanup_planes() now only handles cleanup_fb. For aborted commits, roll back from drm_atomic_helper_prepare_planes() in the new helper drm_atomic_helper_unprepare_planes(). This case is different from regular cleanup, as we have to release the new state; regular cleanup releases the old state. The new helper also invokes cleanup_fb for all planes. The changes mostly involve DRM's atomic helpers. Only two drivers, i915 and nouveau, implement their own commit function. Update them to invoke drm_atomic_helper_unprepare_planes(). Drivers with custom commit_tail function do not require changes. v4: * fix documentation (kernel test robot) v3: * add drm_atomic_helper_unprepare_planes() for rolling back * use correct state for end_fb_access v2: * fix test in drm_atomic_helper_cleanup_planes() Reported-by: Alyssa Ross Closes: https://lore.kernel.org/dri-devel/87leazm0ya.fsf@alyssa.is/ Suggested-by: Daniel Vetter Fixes: 94d879eaf7fb ("drm/atomic-helper: Add {begin,end}_fb_access to plane helpers") Tested-by: Alyssa Ross Reviewed-by: Alyssa Ross Signed-off-by: Thomas Zimmermann Cc: # v6.2+ Link: https://patchwork.freedesktop.org/patch/msgid/20231204083247.22006-1-tzimmermann@suse.de --- drivers/gpu/drm/drm_atomic_helper.c | 78 ++++++++++++++++++---------- drivers/gpu/drm/i915/display/intel_display.c | 2 +- drivers/gpu/drm/nouveau/dispnv50/disp.c | 2 +- include/drm/drm_atomic_helper.h | 2 + 4 files changed, 56 insertions(+), 28 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 2444fc33dd7c..68ffcc0b00dc 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -2012,7 +2012,7 @@ int drm_atomic_helper_commit(struct drm_device *dev, return ret; drm_atomic_helper_async_commit(dev, state); - drm_atomic_helper_cleanup_planes(dev, state); + drm_atomic_helper_unprepare_planes(dev, state); return 0; } @@ -2072,7 +2072,7 @@ int drm_atomic_helper_commit(struct drm_device *dev, return 0; err: - drm_atomic_helper_cleanup_planes(dev, state); + drm_atomic_helper_unprepare_planes(dev, state); return ret; } EXPORT_SYMBOL(drm_atomic_helper_commit); @@ -2650,6 +2650,39 @@ fail_prepare_fb: } EXPORT_SYMBOL(drm_atomic_helper_prepare_planes); +/** + * drm_atomic_helper_unprepare_planes - release plane resources on aborts + * @dev: DRM device + * @state: atomic state object with old state structures + * + * This function cleans up plane state, specifically framebuffers, from the + * atomic state. It undoes the effects of drm_atomic_helper_prepare_planes() + * when aborting an atomic commit. For cleaning up after a successful commit + * use drm_atomic_helper_cleanup_planes(). + */ +void drm_atomic_helper_unprepare_planes(struct drm_device *dev, + struct drm_atomic_state *state) +{ + struct drm_plane *plane; + struct drm_plane_state *new_plane_state; + int i; + + for_each_new_plane_in_state(state, plane, new_plane_state, i) { + const struct drm_plane_helper_funcs *funcs = plane->helper_private; + + if (funcs->end_fb_access) + funcs->end_fb_access(plane, new_plane_state); + } + + for_each_new_plane_in_state(state, plane, new_plane_state, i) { + const struct drm_plane_helper_funcs *funcs = plane->helper_private; + + if (funcs->cleanup_fb) + funcs->cleanup_fb(plane, new_plane_state); + } +} +EXPORT_SYMBOL(drm_atomic_helper_unprepare_planes); + static bool plane_crtc_active(const struct drm_plane_state *state) { return state->crtc && state->crtc->state->active; @@ -2784,6 +2817,17 @@ void drm_atomic_helper_commit_planes(struct drm_device *dev, funcs->atomic_flush(crtc, old_state); } + + /* + * Signal end of framebuffer access here before hw_done. After hw_done, + * a later commit might have already released the plane state. + */ + for_each_old_plane_in_state(old_state, plane, old_plane_state, i) { + const struct drm_plane_helper_funcs *funcs = plane->helper_private; + + if (funcs->end_fb_access) + funcs->end_fb_access(plane, old_plane_state); + } } EXPORT_SYMBOL(drm_atomic_helper_commit_planes); @@ -2911,40 +2955,22 @@ EXPORT_SYMBOL(drm_atomic_helper_disable_planes_on_crtc); * configuration. Hence the old configuration must be perserved in @old_state to * be able to call this function. * - * This function must also be called on the new state when the atomic update - * fails at any point after calling drm_atomic_helper_prepare_planes(). + * This function may not be called on the new state when the atomic update + * fails at any point after calling drm_atomic_helper_prepare_planes(). Use + * drm_atomic_helper_unprepare_planes() in this case. */ void drm_atomic_helper_cleanup_planes(struct drm_device *dev, struct drm_atomic_state *old_state) { struct drm_plane *plane; - struct drm_plane_state *old_plane_state, *new_plane_state; + struct drm_plane_state *old_plane_state; int i; - for_each_oldnew_plane_in_state(old_state, plane, old_plane_state, new_plane_state, i) { + for_each_old_plane_in_state(old_state, plane, old_plane_state, i) { const struct drm_plane_helper_funcs *funcs = plane->helper_private; - if (funcs->end_fb_access) - funcs->end_fb_access(plane, new_plane_state); - } - - for_each_oldnew_plane_in_state(old_state, plane, old_plane_state, new_plane_state, i) { - const struct drm_plane_helper_funcs *funcs; - struct drm_plane_state *plane_state; - - /* - * This might be called before swapping when commit is aborted, - * in which case we have to cleanup the new state. - */ - if (old_plane_state == plane->state) - plane_state = new_plane_state; - else - plane_state = old_plane_state; - - funcs = plane->helper_private; - if (funcs->cleanup_fb) - funcs->cleanup_fb(plane, plane_state); + funcs->cleanup_fb(plane, old_plane_state); } } EXPORT_SYMBOL(drm_atomic_helper_cleanup_planes); diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 28d85e1e858e..02f873738905 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -7475,7 +7475,7 @@ int intel_atomic_commit(struct drm_device *dev, struct drm_atomic_state *_state, for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) intel_color_cleanup_commit(new_crtc_state); - drm_atomic_helper_cleanup_planes(dev, &state->base); + drm_atomic_helper_unprepare_planes(dev, &state->base); intel_runtime_pm_put(&dev_priv->runtime_pm, state->wakeref); return ret; } diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index 7840b6428afb..118807e38422 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -2474,7 +2474,7 @@ nv50_disp_atomic_commit(struct drm_device *dev, err_cleanup: if (ret) - drm_atomic_helper_cleanup_planes(dev, state); + drm_atomic_helper_unprepare_planes(dev, state); done: pm_runtime_put_autosuspend(dev->dev); return ret; diff --git a/include/drm/drm_atomic_helper.h b/include/drm/drm_atomic_helper.h index 536a0b0091c3..006b5c977ad7 100644 --- a/include/drm/drm_atomic_helper.h +++ b/include/drm/drm_atomic_helper.h @@ -97,6 +97,8 @@ void drm_atomic_helper_commit_modeset_enables(struct drm_device *dev, int drm_atomic_helper_prepare_planes(struct drm_device *dev, struct drm_atomic_state *state); +void drm_atomic_helper_unprepare_planes(struct drm_device *dev, + struct drm_atomic_state *state); #define DRM_PLANE_COMMIT_ACTIVE_ONLY BIT(0) #define DRM_PLANE_COMMIT_NO_DISABLE_AFTER_MODESET BIT(1) -- cgit v1.2.3 From fd2ef5fa3556549c565f5b7a07776d899a8ed8b7 Mon Sep 17 00:00:00 2001 From: Jiadong Zhu Date: Fri, 1 Dec 2023 08:38:15 +0800 Subject: drm/amdgpu: disable MCBP by default Disable MCBP(mid command buffer preemption) by default as old Mesa hangs with it. We shall not enable the feature that breaks old usermode driver. Fixes: 50a7c8765ca6 ("drm/amdgpu: enable mcbp by default on gfx9") Signed-off-by: Jiadong Zhu Acked-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 6c0cf64d465a..d5b950fd1d85 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3861,10 +3861,6 @@ static void amdgpu_device_set_mcbp(struct amdgpu_device *adev) adev->gfx.mcbp = true; else if (amdgpu_mcbp == 0) adev->gfx.mcbp = false; - else if ((amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 0, 0)) && - (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 0, 0)) && - adev->gfx.num_gfx_rings) - adev->gfx.mcbp = true; if (amdgpu_sriov_vf(adev)) adev->gfx.mcbp = true; -- cgit v1.2.3 From d5e78f1c2611e22204490b679d962d8f51762969 Mon Sep 17 00:00:00 2001 From: Bokun Zhang Date: Wed, 29 Nov 2023 19:02:57 -0500 Subject: drm/amd/amdgpu: Move vcn4 fw_shared init to a single function - Move VCN4's fw_shared initialization to a separated function. This way, the function can be reused at different locations. Signed-off-by: Bokun Zhang Reviewed-by: Emily Deng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 45 ++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 19 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 48bfcd0d558b..28b7f96b42e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -100,6 +100,31 @@ static int vcn_v4_0_early_init(void *handle) return amdgpu_vcn_early_init(adev); } +static int vcn_v4_0_fw_shared_init(struct amdgpu_device *adev, int inst_idx) +{ + volatile struct amdgpu_vcn4_fw_shared *fw_shared; + + fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; + fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE); + fw_shared->sq.is_enabled = 1; + + fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SMU_DPM_INTERFACE_FLAG); + fw_shared->smu_dpm_interface.smu_interface_type = (adev->flags & AMD_IS_APU) ? + AMDGPU_VCN_SMU_DPM_INTERFACE_APU : AMDGPU_VCN_SMU_DPM_INTERFACE_DGPU; + + if (amdgpu_ip_version(adev, VCN_HWIP, 0) == + IP_VERSION(4, 0, 2)) { + fw_shared->present_flag_0 |= AMDGPU_FW_SHARED_FLAG_0_DRM_KEY_INJECT; + fw_shared->drm_key_wa.method = + AMDGPU_DRM_KEY_INJECT_WORKAROUND_VCNFW_ASD_HANDSHAKING; + } + + if (amdgpu_vcnfw_log) + amdgpu_vcn_fwlog_init(&adev->vcn.inst[inst_idx]); + + return 0; +} + /** * vcn_v4_0_sw_init - sw init for VCN block * @@ -124,8 +149,6 @@ static int vcn_v4_0_sw_init(void *handle) return r; for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - volatile struct amdgpu_vcn4_fw_shared *fw_shared; - if (adev->vcn.harvest_config & (1 << i)) continue; @@ -161,23 +184,7 @@ static int vcn_v4_0_sw_init(void *handle) if (r) return r; - fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; - fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE); - fw_shared->sq.is_enabled = 1; - - fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SMU_DPM_INTERFACE_FLAG); - fw_shared->smu_dpm_interface.smu_interface_type = (adev->flags & AMD_IS_APU) ? - AMDGPU_VCN_SMU_DPM_INTERFACE_APU : AMDGPU_VCN_SMU_DPM_INTERFACE_DGPU; - - if (amdgpu_ip_version(adev, VCN_HWIP, 0) == - IP_VERSION(4, 0, 2)) { - fw_shared->present_flag_0 |= AMDGPU_FW_SHARED_FLAG_0_DRM_KEY_INJECT; - fw_shared->drm_key_wa.method = - AMDGPU_DRM_KEY_INJECT_WORKAROUND_VCNFW_ASD_HANDSHAKING; - } - - if (amdgpu_vcnfw_log) - amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]); + vcn_v4_0_fw_shared_init(adev, i); } if (amdgpu_sriov_vf(adev)) { -- cgit v1.2.3 From e57cd73f971194e94bc42d57b9fcb184c93a8754 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Fri, 1 Dec 2023 06:24:57 -0700 Subject: drm/amd/display: Optimize fast validation cases Optimize fast validation cases to only validate the highest voltage level. This works because during fast validation we only care if the mode can be supported or not (at any vlevel). Tested-by: Daniel Wheeler Reviewed-by: Aric Cyr Acked-by: Rodrigo Siqueira Signed-off-by: Alvin Lee Signed-off-by: Alex Deucher --- .../amd/display/dc/dml/dcn30/display_mode_vba_30.c | 29 +++++++++++++--------- .../amd/display/dc/resource/dcn30/dcn30_resource.c | 2 ++ 2 files changed, 19 insertions(+), 12 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c index 3686f1e7de3a..63c48c29ba49 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c @@ -3542,7 +3542,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l { struct vba_vars_st *v = &mode_lib->vba; int MinPrefetchMode, MaxPrefetchMode; - int i; + int i, start_state; unsigned int j, k, m; bool EnoughWritebackUnits = true; bool WritebackModeSupport = true; @@ -3553,6 +3553,11 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/ + if (mode_lib->validate_max_state) + start_state = v->soc.num_states - 1; + else + start_state = 0; + CalculateMinAndMaxPrefetchMode( mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &MinPrefetchMode, &MaxPrefetchMode); @@ -3851,7 +3856,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l v->SingleDPPViewportSizeSupportPerPlane, &v->ViewportSizeSupport[0][0]); - for (i = 0; i < v->soc.num_states; i++) { + for (i = start_state; i < v->soc.num_states; i++) { for (j = 0; j < 2; j++) { v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed); v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed); @@ -4007,7 +4012,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l /*Total Available Pipes Support Check*/ - for (i = 0; i < v->soc.num_states; i++) { + for (i = start_state; i < v->soc.num_states; i++) { for (j = 0; j < 2; j++) { if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) { v->TotalAvailablePipesSupport[i][j] = true; @@ -4046,7 +4051,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } } - for (i = 0; i < v->soc.num_states; i++) { + for (i = start_state; i < v->soc.num_states; i++) { for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) { v->RequiresDSC[i][k] = false; v->RequiresFEC[i][k] = false; @@ -4174,7 +4179,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } } } - for (i = 0; i < v->soc.num_states; i++) { + for (i = start_state; i < v->soc.num_states; i++) { v->DIOSupport[i] = true; for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) { if (!v->skip_dio_check[k] && v->BlendingAndTiming[k] == k && (v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_hdmi) @@ -4185,7 +4190,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } } - for (i = 0; i < v->soc.num_states; ++i) { + for (i = start_state; i < v->soc.num_states; ++i) { v->ODMCombine4To1SupportCheckOK[i] = true; for (k = 0; k < v->NumberOfActivePlanes; ++k) { if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1 @@ -4197,7 +4202,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */ - for (i = 0; i < v->soc.num_states; i++) { + for (i = start_state; i < v->soc.num_states; i++) { v->NotEnoughDSCUnits[i] = false; v->TotalDSCUnitsRequired = 0.0; for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) { @@ -4217,7 +4222,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } /*DSC Delay per state*/ - for (i = 0; i < v->soc.num_states; i++) { + for (i = start_state; i < v->soc.num_states; i++) { for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) { if (v->OutputBppPerState[i][k] == BPP_INVALID) { v->BPP = 0.0; @@ -4333,7 +4338,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k]; } - for (i = 0; i < v->soc.num_states; i++) { + for (i = start_state; i < v->soc.num_states; i++) { for (j = 0; j < 2; j++) { for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) { v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k]; @@ -5075,7 +5080,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l /*PTE Buffer Size Check*/ - for (i = 0; i < v->soc.num_states; i++) { + for (i = start_state; i < v->soc.num_states; i++) { for (j = 0; j < 2; j++) { v->PTEBufferSizeNotExceeded[i][j] = true; for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) { @@ -5136,7 +5141,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } /*Mode Support, Voltage State and SOC Configuration*/ - for (i = v->soc.num_states - 1; i >= 0; i--) { + for (i = v->soc.num_states - 1; i >= start_state; i--) { for (j = 0; j < 2; j++) { if (v->ScaleRatioAndTapsSupport == 1 && v->SourceFormatPixelAndScanSupport == 1 && v->ViewportSizeSupport[i][j] == 1 && v->DIOSupport[i] == 1 && v->ODMCombine4To1SupportCheckOK[i] == 1 @@ -5158,7 +5163,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } { unsigned int MaximumMPCCombine = 0; - for (i = v->soc.num_states; i >= 0; i--) { + for (i = v->soc.num_states; i >= start_state; i--) { if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) { v->VoltageLevel = i; v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c index 2b6dcb489b90..37a64186f324 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c @@ -1682,6 +1682,7 @@ noinline bool dcn30_internal_validate_bw( * We don't actually support prefetch mode 2, so require that we * at least support prefetch mode 1. */ + context->bw_ctx.dml.validate_max_state = fast_validate; context->bw_ctx.dml.soc.allow_dram_self_refresh_or_dram_clock_change_in_vblank = dm_allow_self_refresh; @@ -1691,6 +1692,7 @@ noinline bool dcn30_internal_validate_bw( memset(merge, 0, sizeof(merge)); vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, merge); } + context->bw_ctx.dml.validate_max_state = false; } dml_log_mode_support_params(&context->bw_ctx.dml); -- cgit v1.2.3 From 885c71ad791c1709f668a37f701d33e6872a902f Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Fri, 1 Dec 2023 06:24:58 -0700 Subject: drm/amd/display: initialize all the dpm level's stutter latency Fix issue when override level bigger than default. Levels 5, 6, and 7 had zero stutter latency, this is because override level being initialized after stutter latency inits. Tested-by: Daniel Wheeler Reviewed-by: Syed Hassan Reviewed-by: Allen Pan Acked-by: Rodrigo Siqueira Signed-off-by: Charlene Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index fa8fe5bf7e57..255af7875c08 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -341,6 +341,9 @@ void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc, break; } + if (dml2->config.bbox_overrides.clks_table.num_states) + p->in_states->num_states = dml2->config.bbox_overrides.clks_table.num_states; + /* Override from passed values, if available */ for (i = 0; i < p->in_states->num_states; i++) { if (dml2->config.bbox_overrides.sr_exit_latency_us) { @@ -397,7 +400,6 @@ void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc, } /* Copy clocks tables entries, if available */ if (dml2->config.bbox_overrides.clks_table.num_states) { - p->in_states->num_states = dml2->config.bbox_overrides.clks_table.num_states; for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_dcfclk_levels; i++) { p->in_states->state_array[i].dcfclk_mhz = dml2->config.bbox_overrides.clks_table.clk_entries[i].dcfclk_mhz; -- cgit v1.2.3 From dd2c5fac91d46df9dc1bf025ef23eff4704bd85f Mon Sep 17 00:00:00 2001 From: Relja Vojvodic Date: Fri, 1 Dec 2023 06:24:59 -0700 Subject: drm/amd/display: Add ODM check during pipe split/merge validation [why] When querying DML for a vlevel after pipes have been split or merged the ODM policy would revert to a default policy, which could cause the query to use the incorrect ODM status. In this case ODM 2to1 was validated, but the last DML query would assume no ODM and return the incorrect vlevel. [how] Added ODM check to apply the correct ODM policy before querying DML. Tested-by: Daniel Wheeler Reviewed-by: Alvin Lee Acked-by: Rodrigo Siqueira Signed-off-by: Relja Vojvodic Signed-off-by: Alex Deucher --- .../amd/display/dc/dcn32/dcn32_resource_helpers.c | 26 ++++++++++++++++++++++ .../gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 3 +++ .../amd/display/dc/resource/dcn32/dcn32_resource.h | 2 ++ 3 files changed, 31 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c index 389ac7ae1154..e8159a459bce 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c @@ -806,3 +806,29 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int return result; } + +void dcn32_update_dml_pipes_odm_policy_based_on_context(struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes) +{ + int i, pipe_cnt; + struct resource_context *res_ctx = &context->res_ctx; + struct pipe_ctx *pipe = NULL; + + for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { + int odm_slice_count = 0; + + if (!res_ctx->pipe_ctx[i].stream) + continue; + pipe = &res_ctx->pipe_ctx[i]; + odm_slice_count = resource_get_odm_slice_count(pipe); + + if (odm_slice_count == 1) + pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_dal; + else if (odm_slice_count == 2) + pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_2to1; + else if (odm_slice_count == 4) + pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_4to1; + + pipe_cnt++; + } +} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 26411d4e9730..de209ca0cf8c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -2192,6 +2192,7 @@ bool dcn32_internal_validate_bw(struct dc *dc, int i; pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate); + dcn32_update_dml_pipes_odm_policy_based_on_context(dc, context, pipes); /* repopulate_pipes = 1 means the pipes were either split or merged. In this case * we have to re-calculate the DET allocation and run through DML once more to @@ -2200,7 +2201,9 @@ bool dcn32_internal_validate_bw(struct dc *dc, * */ context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = dm_prefetch_support_uclk_fclk_and_stutter_if_possible; + vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt); + if (vlevel == context->bw_ctx.dml.soc.num_states) { /* failed after DET size changes */ goto validate_fail; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h index b2f20e6cfb38..9ca799da1a56 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h @@ -193,6 +193,8 @@ bool dcn32_subvp_drr_admissable(struct dc *dc, struct dc_state *context); bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int vlevel); +void dcn32_update_dml_pipes_odm_policy_based_on_context(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes); + /* definitions for run time init of reg offsets */ /* CLK SRC */ -- cgit v1.2.3 From fa745b554733ff0ed9ff918a0a53267300444c88 Mon Sep 17 00:00:00 2001 From: Michael Strauss Date: Fri, 1 Dec 2023 06:25:00 -0700 Subject: drm/amd/display: Only enumerate top local sink as DP2 output [WHY] Many DCN generations only have two HPO link encoders and therefore only support driving a max of two DP2 PHYs. DP2 MST hubs currently can not pass 3x display validation as each downstream sink is enumerated as separate DP2 output. [HOW] Count MST hubs once by treating only 1st remote sink in topology as an encoder. Tested-by: Daniel Wheeler Reviewed-by: Nicholas Kazlauskas Acked-by: Rodrigo Siqueira Signed-off-by: Michael Strauss Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c index 33eab80e89a8..6ba393e5b8ee 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c @@ -157,6 +157,15 @@ bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx) { /* If this assert is hit then we have a link encoder dynamic management issue */ ASSERT(pipe_ctx->stream_res.hpo_dp_stream_enc ? pipe_ctx->link_res.hpo_dp_link_enc != NULL : true); + + /* Count MST hubs once by treating only 1st remote sink in topology as an encoder */ + if (pipe_ctx->stream->link && pipe_ctx->stream->link->remote_sinks[0]) { + return (pipe_ctx->stream_res.hpo_dp_stream_enc && + pipe_ctx->link_res.hpo_dp_link_enc && + dc_is_dp_signal(pipe_ctx->stream->signal) && + (pipe_ctx->stream->link->remote_sinks[0] == pipe_ctx->stream->sink)); + } + return (pipe_ctx->stream_res.hpo_dp_stream_enc && pipe_ctx->link_res.hpo_dp_link_enc && dc_is_dp_signal(pipe_ctx->stream->signal)); -- cgit v1.2.3 From a546a27684407942604bccdf3b62f0765c0f6399 Mon Sep 17 00:00:00 2001 From: Xiaogang Chen Date: Fri, 1 Dec 2023 23:38:12 -0600 Subject: drm/amdkfd: Use partial migrations/mapping for GPU/CPU page faults in SVM This patch implements partial migration/mapping for gpu/cpu page faults in SVM according to migration granularity(default 2MB). A svm range may include pages from both system ram and vram of one gpu now. These chagnes are expected to improve migration performance and reduce mmu callback and TLB flush workloads. Signed-off-by: Xiaogang Chen Reviewed-by: Philip Yang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 146 +++++++++++++------------ drivers/gpu/drm/amd/amdkfd/kfd_migrate.h | 4 + drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 176 +++++++++++++++---------------- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 9 +- 4 files changed, 176 insertions(+), 159 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 6c25dab051d5..b854cbf06dce 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -442,10 +442,10 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, goto out_free; } if (cpages != npages) - pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n", + pr_debug("partial migration, 0x%lx/0x%llx pages collected\n", cpages, npages); else - pr_debug("0x%lx pages migrated\n", cpages); + pr_debug("0x%lx pages collected\n", cpages); r = svm_migrate_copy_to_vram(node, prange, &migrate, &mfence, scratch, ttm_res_offset); migrate_vma_pages(&migrate); @@ -479,6 +479,8 @@ out: * svm_migrate_ram_to_vram - migrate svm range from system to device * @prange: range structure * @best_loc: the device to migrate to + * @start_mgr: start page to migrate + * @last_mgr: last page to migrate * @mm: the process mm structure * @trigger: reason of migration * @@ -489,6 +491,7 @@ out: */ static int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, + unsigned long start_mgr, unsigned long last_mgr, struct mm_struct *mm, uint32_t trigger) { unsigned long addr, start, end; @@ -498,10 +501,10 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, unsigned long cpages = 0; long r = 0; - if (prange->actual_loc == best_loc) { - pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n", - prange->svms, prange->start, prange->last, best_loc); - return 0; + if (start_mgr < prange->start || last_mgr > prange->last) { + pr_debug("range [0x%lx 0x%lx] out prange [0x%lx 0x%lx]\n", + start_mgr, last_mgr, prange->start, prange->last); + return -EFAULT; } node = svm_range_get_node_by_id(prange, best_loc); @@ -510,18 +513,19 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, return -ENODEV; } - pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms, - prange->start, prange->last, best_loc); + pr_debug("svms 0x%p [0x%lx 0x%lx] in [0x%lx 0x%lx] to gpu 0x%x\n", + prange->svms, start_mgr, last_mgr, prange->start, prange->last, + best_loc); - start = prange->start << PAGE_SHIFT; - end = (prange->last + 1) << PAGE_SHIFT; + start = start_mgr << PAGE_SHIFT; + end = (last_mgr + 1) << PAGE_SHIFT; r = svm_range_vram_node_new(node, prange, true); if (r) { dev_dbg(node->adev->dev, "fail %ld to alloc vram\n", r); return r; } - ttm_res_offset = prange->offset << PAGE_SHIFT; + ttm_res_offset = (start_mgr - prange->start + prange->offset) << PAGE_SHIFT; for (addr = start; addr < end;) { unsigned long next; @@ -544,8 +548,11 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, if (cpages) { prange->actual_loc = best_loc; - svm_range_dma_unmap(prange); - } else { + prange->vram_pages = prange->vram_pages + cpages; + } else if (!prange->actual_loc) { + /* if no page migrated and all pages from prange are at + * sys ram drop svm_bo got from svm_range_vram_node_new + */ svm_range_vram_node_free(prange); } @@ -663,9 +670,8 @@ out_oom: * Context: Process context, caller hold mmap read lock, prange->migrate_mutex * * Return: - * 0 - success with all pages migrated * negative values - indicate error - * positive values - partial migration, number of pages not migrated + * positive values or zero - number of pages got migrated */ static long svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange, @@ -676,6 +682,7 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange, uint64_t npages = (end - start) >> PAGE_SHIFT; unsigned long upages = npages; unsigned long cpages = 0; + unsigned long mpages = 0; struct amdgpu_device *adev = node->adev; struct kfd_process_device *pdd; struct dma_fence *mfence = NULL; @@ -725,10 +732,10 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange, goto out_free; } if (cpages != npages) - pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n", + pr_debug("partial migration, 0x%lx/0x%llx pages collected\n", cpages, npages); else - pr_debug("0x%lx pages migrated\n", cpages); + pr_debug("0x%lx pages collected\n", cpages); r = svm_migrate_copy_to_ram(adev, prange, &migrate, &mfence, scratch, npages); @@ -751,17 +758,21 @@ out_free: kvfree(buf); out: if (!r && cpages) { + mpages = cpages - upages; pdd = svm_range_get_pdd_by_node(prange, node); if (pdd) - WRITE_ONCE(pdd->page_out, pdd->page_out + cpages); + WRITE_ONCE(pdd->page_out, pdd->page_out + mpages); } - return r ? r : upages; + + return r ? r : mpages; } /** * svm_migrate_vram_to_ram - migrate svm range from device to system * @prange: range structure * @mm: process mm, use current->mm if NULL + * @start_mgr: start page need be migrated to sys ram + * @last_mgr: last page need be migrated to sys ram * @trigger: reason of migration * @fault_page: is from vmf->page, svm_migrate_to_ram(), this is CPU page fault callback * @@ -771,6 +782,7 @@ out: * 0 - OK, otherwise error code */ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm, + unsigned long start_mgr, unsigned long last_mgr, uint32_t trigger, struct page *fault_page) { struct kfd_node *node; @@ -778,26 +790,33 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm, unsigned long addr; unsigned long start; unsigned long end; - unsigned long upages = 0; + unsigned long mpages = 0; long r = 0; + /* this pragne has no any vram page to migrate to sys ram */ if (!prange->actual_loc) { pr_debug("[0x%lx 0x%lx] already migrated to ram\n", prange->start, prange->last); return 0; } + if (start_mgr < prange->start || last_mgr > prange->last) { + pr_debug("range [0x%lx 0x%lx] out prange [0x%lx 0x%lx]\n", + start_mgr, last_mgr, prange->start, prange->last); + return -EFAULT; + } + node = svm_range_get_node_by_id(prange, prange->actual_loc); if (!node) { pr_debug("failed to get kfd node by id 0x%x\n", prange->actual_loc); return -ENODEV; } pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] from gpu 0x%x to ram\n", - prange->svms, prange, prange->start, prange->last, + prange->svms, prange, start_mgr, last_mgr, prange->actual_loc); - start = prange->start << PAGE_SHIFT; - end = (prange->last + 1) << PAGE_SHIFT; + start = start_mgr << PAGE_SHIFT; + end = (last_mgr + 1) << PAGE_SHIFT; for (addr = start; addr < end;) { unsigned long next; @@ -816,14 +835,21 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm, pr_debug("failed %ld to migrate prange %p\n", r, prange); break; } else { - upages += r; + mpages += r; } addr = next; } - if (r >= 0 && !upages) { - svm_range_vram_node_free(prange); - prange->actual_loc = 0; + if (r >= 0) { + prange->vram_pages -= mpages; + + /* prange does not have vram page set its actual_loc to system + * and drop its svm_bo ref + */ + if (prange->vram_pages == 0 && prange->ttm_res) { + prange->actual_loc = 0; + svm_range_vram_node_free(prange); + } } return r < 0 ? r : 0; @@ -833,17 +859,23 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm, * svm_migrate_vram_to_vram - migrate svm range from device to device * @prange: range structure * @best_loc: the device to migrate to + * @start: start page need be migrated to sys ram + * @last: last page need be migrated to sys ram * @mm: process mm, use current->mm if NULL * @trigger: reason of migration * * Context: Process context, caller hold mmap read lock, svms lock, prange lock * + * migrate all vram pages in prange to sys ram, then migrate + * [start, last] pages from sys ram to gpu node best_loc. + * * Return: * 0 - OK, otherwise error code */ static int svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc, - struct mm_struct *mm, uint32_t trigger) + unsigned long start, unsigned long last, + struct mm_struct *mm, uint32_t trigger) { int r, retries = 3; @@ -855,7 +887,8 @@ svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc, pr_debug("from gpu 0x%x to gpu 0x%x\n", prange->actual_loc, best_loc); do { - r = svm_migrate_vram_to_ram(prange, mm, trigger, NULL); + r = svm_migrate_vram_to_ram(prange, mm, prange->start, prange->last, + trigger, NULL); if (r) return r; } while (prange->actual_loc && --retries); @@ -863,17 +896,21 @@ svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc, if (prange->actual_loc) return -EDEADLK; - return svm_migrate_ram_to_vram(prange, best_loc, mm, trigger); + return svm_migrate_ram_to_vram(prange, best_loc, start, last, mm, trigger); } int svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc, + unsigned long start, unsigned long last, struct mm_struct *mm, uint32_t trigger) { - if (!prange->actual_loc) - return svm_migrate_ram_to_vram(prange, best_loc, mm, trigger); + if (!prange->actual_loc || prange->actual_loc == best_loc) + return svm_migrate_ram_to_vram(prange, best_loc, start, last, + mm, trigger); + else - return svm_migrate_vram_to_vram(prange, best_loc, mm, trigger); + return svm_migrate_vram_to_vram(prange, best_loc, start, last, + mm, trigger); } @@ -889,10 +926,9 @@ svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc, */ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf) { + unsigned long start, last, size; unsigned long addr = vmf->address; struct svm_range_bo *svm_bo; - enum svm_work_list_ops op; - struct svm_range *parent; struct svm_range *prange; struct kfd_process *p; struct mm_struct *mm; @@ -929,51 +965,31 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf) mutex_lock(&p->svms.lock); - prange = svm_range_from_addr(&p->svms, addr, &parent); + prange = svm_range_from_addr(&p->svms, addr, NULL); if (!prange) { pr_debug("failed get range svms 0x%p addr 0x%lx\n", &p->svms, addr); r = -EFAULT; goto out_unlock_svms; } - mutex_lock(&parent->migrate_mutex); - if (prange != parent) - mutex_lock_nested(&prange->migrate_mutex, 1); + mutex_lock(&prange->migrate_mutex); if (!prange->actual_loc) goto out_unlock_prange; - svm_range_lock(parent); - if (prange != parent) - mutex_lock_nested(&prange->lock, 1); - r = svm_range_split_by_granularity(p, mm, addr, parent, prange); - if (prange != parent) - mutex_unlock(&prange->lock); - svm_range_unlock(parent); - if (r) { - pr_debug("failed %d to split range by granularity\n", r); - goto out_unlock_prange; - } + /* Align migration range start and size to granularity size */ + size = 1UL << prange->granularity; + start = max(ALIGN_DOWN(addr, size), prange->start); + last = min(ALIGN(addr + 1, size) - 1, prange->last); - r = svm_migrate_vram_to_ram(prange, vmf->vma->vm_mm, - KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU, - vmf->page); + r = svm_migrate_vram_to_ram(prange, vmf->vma->vm_mm, start, last, + KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU, vmf->page); if (r) pr_debug("failed %d migrate svms 0x%p range 0x%p [0x%lx 0x%lx]\n", - r, prange->svms, prange, prange->start, prange->last); - - /* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */ - if (p->xnack_enabled && parent == prange) - op = SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP; - else - op = SVM_OP_UPDATE_RANGE_NOTIFIER; - svm_range_add_list_work(&p->svms, parent, mm, op); - schedule_deferred_list_work(&p->svms); + r, prange->svms, prange, start, last); out_unlock_prange: - if (prange != parent) - mutex_unlock(&prange->migrate_mutex); - mutex_unlock(&parent->migrate_mutex); + mutex_unlock(&prange->migrate_mutex); out_unlock_svms: mutex_unlock(&p->svms.lock); out_unref_process: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h index 487f26368164..2eebf67f9c2c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h @@ -41,9 +41,13 @@ enum MIGRATION_COPY_DIR { }; int svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc, + unsigned long start, unsigned long last, struct mm_struct *mm, uint32_t trigger); + int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm, + unsigned long start, unsigned long last, uint32_t trigger, struct page *fault_page); + unsigned long svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 613f19510287..617d20f0380f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -158,12 +158,13 @@ svm_is_valid_dma_mapping_addr(struct device *dev, dma_addr_t dma_addr) static int svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, unsigned long offset, unsigned long npages, - unsigned long *hmm_pfns, uint32_t gpuidx) + unsigned long *hmm_pfns, uint32_t gpuidx, uint64_t *vram_pages) { enum dma_data_direction dir = DMA_BIDIRECTIONAL; dma_addr_t *addr = prange->dma_addr[gpuidx]; struct device *dev = adev->dev; struct page *page; + uint64_t vram_pages_dev; int i, r; if (!addr) { @@ -173,6 +174,7 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, prange->dma_addr[gpuidx] = addr; } + vram_pages_dev = 0; addr += offset; for (i = 0; i < npages; i++) { if (svm_is_valid_dma_mapping_addr(dev, addr[i])) @@ -182,6 +184,7 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, if (is_zone_device_page(page)) { struct amdgpu_device *bo_adev = prange->svm_bo->node->adev; + vram_pages_dev++; addr[i] = (hmm_pfns[i] << PAGE_SHIFT) + bo_adev->vm_manager.vram_base_offset - bo_adev->kfd.pgmap.range.start; @@ -198,13 +201,14 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, pr_debug_ratelimited("dma mapping 0x%llx for page addr 0x%lx\n", addr[i] >> PAGE_SHIFT, page_to_pfn(page)); } + *vram_pages = vram_pages_dev; return 0; } static int svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap, unsigned long offset, unsigned long npages, - unsigned long *hmm_pfns) + unsigned long *hmm_pfns, uint64_t *vram_pages) { struct kfd_process *p; uint32_t gpuidx; @@ -223,7 +227,7 @@ svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap, } r = svm_range_dma_map_dev(pdd->dev->adev, prange, offset, npages, - hmm_pfns, gpuidx); + hmm_pfns, gpuidx, vram_pages); if (r) break; } @@ -349,6 +353,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start, INIT_LIST_HEAD(&prange->child_list); atomic_set(&prange->invalid, 0); prange->validate_timestamp = 0; + prange->vram_pages = 0; mutex_init(&prange->migrate_mutex); mutex_init(&prange->lock); @@ -395,6 +400,8 @@ static void svm_range_bo_release(struct kref *kref) prange->start, prange->last); mutex_lock(&prange->lock); prange->svm_bo = NULL; + /* prange should not hold vram page now */ + WARN_ONCE(prange->actual_loc, "prange should not hold vram page"); mutex_unlock(&prange->lock); spin_lock(&svm_bo->list_lock); @@ -975,6 +982,11 @@ svm_range_split_nodes(struct svm_range *new, struct svm_range *old, new->svm_bo = svm_range_bo_ref(old->svm_bo); new->ttm_res = old->ttm_res; + /* set new's vram_pages as old range's now, the acurate vram_pages + * will be updated during mapping + */ + new->vram_pages = min(old->vram_pages, new->npages); + spin_lock(&new->svm_bo->list_lock); list_add(&new->svm_bo_list, &new->svm_bo->range_list); spin_unlock(&new->svm_bo->list_lock); @@ -1135,66 +1147,6 @@ svm_range_add_child(struct svm_range *prange, struct mm_struct *mm, list_add_tail(&pchild->child_list, &prange->child_list); } -/** - * svm_range_split_by_granularity - collect ranges within granularity boundary - * - * @p: the process with svms list - * @mm: mm structure - * @addr: the vm fault address in pages, to split the prange - * @parent: parent range if prange is from child list - * @prange: prange to split - * - * Trims @prange to be a single aligned block of prange->granularity if - * possible. The head and tail are added to the child_list in @parent. - * - * Context: caller must hold mmap_read_lock and prange->lock - * - * Return: - * 0 - OK, otherwise error code - */ -int -svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm, - unsigned long addr, struct svm_range *parent, - struct svm_range *prange) -{ - struct svm_range *head, *tail; - unsigned long start, last, size; - int r; - - /* Align splited range start and size to granularity size, then a single - * PTE will be used for whole range, this reduces the number of PTE - * updated and the L1 TLB space used for translation. - */ - size = 1UL << prange->granularity; - start = ALIGN_DOWN(addr, size); - last = ALIGN(addr + 1, size) - 1; - - pr_debug("svms 0x%p split [0x%lx 0x%lx] to [0x%lx 0x%lx] size 0x%lx\n", - prange->svms, prange->start, prange->last, start, last, size); - - if (start > prange->start) { - r = svm_range_split(prange, start, prange->last, &head); - if (r) - return r; - svm_range_add_child(parent, mm, head, SVM_OP_ADD_RANGE); - } - - if (last < prange->last) { - r = svm_range_split(prange, prange->start, last, &tail); - if (r) - return r; - svm_range_add_child(parent, mm, tail, SVM_OP_ADD_RANGE); - } - - /* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */ - if (p->xnack_enabled && prange->work_item.op == SVM_OP_ADD_RANGE) { - prange->work_item.op = SVM_OP_ADD_RANGE_AND_MAP; - pr_debug("change prange 0x%p [0x%lx 0x%lx] op %d\n", - prange, prange->start, prange->last, - SVM_OP_ADD_RANGE_AND_MAP); - } - return 0; -} static bool svm_nodes_in_same_hive(struct kfd_node *node_a, struct kfd_node *node_b) { @@ -1614,12 +1566,14 @@ static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx) * 5. Release page table (and SVM BO) reservation */ static int svm_range_validate_and_map(struct mm_struct *mm, + unsigned long map_start, unsigned long map_last, struct svm_range *prange, int32_t gpuidx, bool intr, bool wait, bool flush_tlb) { struct svm_validate_context *ctx; unsigned long start, end, addr; struct kfd_process *p; + uint64_t vram_pages; void *owner; int32_t idx; int r = 0; @@ -1688,11 +1642,15 @@ static int svm_range_validate_and_map(struct mm_struct *mm, } } + vram_pages = 0; start = prange->start << PAGE_SHIFT; end = (prange->last + 1) << PAGE_SHIFT; for (addr = start; !r && addr < end; ) { struct hmm_range *hmm_range; + unsigned long map_start_vma; + unsigned long map_last_vma; struct vm_area_struct *vma; + uint64_t vram_pages_vma; unsigned long next = 0; unsigned long offset; unsigned long npages; @@ -1721,9 +1679,11 @@ static int svm_range_validate_and_map(struct mm_struct *mm, if (!r) { offset = (addr - start) >> PAGE_SHIFT; r = svm_range_dma_map(prange, ctx->bitmap, offset, npages, - hmm_range->hmm_pfns); + hmm_range->hmm_pfns, &vram_pages_vma); if (r) pr_debug("failed %d to dma map range\n", r); + else + vram_pages += vram_pages_vma; } svm_range_lock(prange); @@ -1737,9 +1697,16 @@ static int svm_range_validate_and_map(struct mm_struct *mm, r = -EAGAIN; } - if (!r) - r = svm_range_map_to_gpus(prange, offset, npages, readonly, - ctx->bitmap, wait, flush_tlb); + if (!r) { + map_start_vma = max(map_start, prange->start + offset); + map_last_vma = min(map_last, prange->start + offset + npages - 1); + if (map_start_vma <= map_last_vma) { + offset = map_start_vma - prange->start; + npages = map_last_vma - map_start_vma + 1; + r = svm_range_map_to_gpus(prange, offset, npages, readonly, + ctx->bitmap, wait, flush_tlb); + } + } if (!r && next == end) prange->mapped_to_gpu = true; @@ -1749,6 +1716,19 @@ static int svm_range_validate_and_map(struct mm_struct *mm, addr = next; } + if (addr == end) { + prange->vram_pages = vram_pages; + + /* if prange does not include any vram page and it + * has not released svm_bo drop its svm_bo reference + * and set its actaul_loc to sys ram + */ + if (!vram_pages && prange->ttm_res) { + prange->actual_loc = 0; + svm_range_vram_node_free(prange); + } + } + svm_range_unreserve_bos(ctx); if (!r) prange->validate_timestamp = ktime_get_boottime(); @@ -1832,8 +1812,8 @@ static void svm_range_restore_work(struct work_struct *work) */ mutex_lock(&prange->migrate_mutex); - r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE, - false, true, false); + r = svm_range_validate_and_map(mm, prange->start, prange->last, prange, + MAX_GPU_INSTANCE, false, true, false); if (r) pr_debug("failed %d to map 0x%lx to gpus\n", r, prange->start); @@ -2001,6 +1981,7 @@ static struct svm_range *svm_range_clone(struct svm_range *old) new->actual_loc = old->actual_loc; new->granularity = old->granularity; new->mapped_to_gpu = old->mapped_to_gpu; + new->vram_pages = old->vram_pages; bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE); bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE); @@ -2908,6 +2889,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, uint32_t vmid, uint32_t node_id, uint64_t addr, bool write_fault) { + unsigned long start, last, size; struct mm_struct *mm = NULL; struct svm_range_list *svms; struct svm_range *prange; @@ -3043,40 +3025,44 @@ retry_write_locked: kfd_smi_event_page_fault_start(node, p->lead_thread->pid, addr, write_fault, timestamp); - if (prange->actual_loc != best_loc) { + /* Align migration range start and size to granularity size */ + size = 1UL << prange->granularity; + start = max_t(unsigned long, ALIGN_DOWN(addr, size), prange->start); + last = min_t(unsigned long, ALIGN(addr + 1, size) - 1, prange->last); + if (prange->actual_loc != 0 || best_loc != 0) { migration = true; + if (best_loc) { - r = svm_migrate_to_vram(prange, best_loc, mm, - KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU); + r = svm_migrate_to_vram(prange, best_loc, start, last, + mm, KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU); if (r) { pr_debug("svm_migrate_to_vram failed (%d) at %llx, falling back to system memory\n", r, addr); /* Fallback to system memory if migration to * VRAM failed */ - if (prange->actual_loc) - r = svm_migrate_vram_to_ram(prange, mm, - KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, - NULL); + if (prange->actual_loc && prange->actual_loc != best_loc) + r = svm_migrate_vram_to_ram(prange, mm, start, last, + KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, NULL); else r = 0; } } else { - r = svm_migrate_vram_to_ram(prange, mm, - KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, - NULL); + r = svm_migrate_vram_to_ram(prange, mm, start, last, + KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, NULL); } if (r) { pr_debug("failed %d to migrate svms %p [0x%lx 0x%lx]\n", - r, svms, prange->start, prange->last); + r, svms, start, last); goto out_unlock_range; } } - r = svm_range_validate_and_map(mm, prange, gpuidx, false, false, false); + r = svm_range_validate_and_map(mm, start, last, prange, gpuidx, false, + false, false); if (r) pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n", - r, svms, prange->start, prange->last); + r, svms, start, last); kfd_smi_event_page_fault_end(node, p->lead_thread->pid, addr, migration); @@ -3422,18 +3408,24 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange, *migrated = false; best_loc = svm_range_best_prefetch_location(prange); - if (best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED || - best_loc == prange->actual_loc) + /* when best_loc is a gpu node and same as prange->actual_loc + * we still need do migration as prange->actual_loc !=0 does + * not mean all pages in prange are vram. hmm migrate will pick + * up right pages during migration. + */ + if ((best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED) || + (best_loc == 0 && prange->actual_loc == 0)) return 0; if (!best_loc) { - r = svm_migrate_vram_to_ram(prange, mm, + r = svm_migrate_vram_to_ram(prange, mm, prange->start, prange->last, KFD_MIGRATE_TRIGGER_PREFETCH, NULL); *migrated = !r; return r; } - r = svm_migrate_to_vram(prange, best_loc, mm, KFD_MIGRATE_TRIGGER_PREFETCH); + r = svm_migrate_to_vram(prange, best_loc, prange->start, prange->last, + mm, KFD_MIGRATE_TRIGGER_PREFETCH); *migrated = !r; return r; @@ -3488,7 +3480,11 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work) mutex_lock(&prange->migrate_mutex); do { + /* migrate all vram pages in this prange to sys ram + * after that prange->actual_loc should be zero + */ r = svm_migrate_vram_to_ram(prange, mm, + prange->start, prange->last, KFD_MIGRATE_TRIGGER_TTM_EVICTION, NULL); } while (!r && prange->actual_loc && --retries); @@ -3612,8 +3608,8 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm, flush_tlb = !migrated && update_mapping && prange->mapped_to_gpu; - r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE, - true, true, flush_tlb); + r = svm_range_validate_and_map(mm, prange->start, prange->last, prange, + MAX_GPU_INSTANCE, true, true, flush_tlb); if (r) pr_debug("failed %d to map svm range\n", r); @@ -3627,8 +3623,8 @@ out_unlock_range: pr_debug("Remapping prange 0x%p [0x%lx 0x%lx]\n", prange, prange->start, prange->last); mutex_lock(&prange->migrate_mutex); - r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE, - true, true, prange->mapped_to_gpu); + r = svm_range_validate_and_map(mm, prange->start, prange->last, prange, + MAX_GPU_INSTANCE, true, true, prange->mapped_to_gpu); if (r) pr_debug("failed %d on remap svm range\n", r); mutex_unlock(&prange->migrate_mutex); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index c528df1d0ba2..026863a0abcd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -78,6 +78,7 @@ struct svm_work_list_item { * @update_list:link list node used to add to update_list * @mapping: bo_va mapping structure to create and update GPU page table * @npages: number of pages + * @vram_pages: vram pages number in this svm_range * @dma_addr: dma mapping address on each GPU for system memory physical page * @ttm_res: vram ttm resource map * @offset: range start offset within mm_nodes @@ -88,7 +89,9 @@ struct svm_work_list_item { * @flags: flags defined as KFD_IOCTL_SVM_FLAG_* * @perferred_loc: perferred location, 0 for CPU, or GPU id * @perfetch_loc: last prefetch location, 0 for CPU, or GPU id - * @actual_loc: the actual location, 0 for CPU, or GPU id + * @actual_loc: this svm_range location. 0: all pages are from sys ram; + * GPU id: this svm_range may include vram pages from GPU with + * id actual_loc. * @granularity:migration granularity, log2 num pages * @invalid: not 0 means cpu page table is invalidated * @validate_timestamp: system timestamp when range is validated @@ -112,6 +115,7 @@ struct svm_range { struct list_head list; struct list_head update_list; uint64_t npages; + uint64_t vram_pages; dma_addr_t *dma_addr[MAX_GPU_INSTANCE]; struct ttm_resource *ttm_res; uint64_t offset; @@ -168,9 +172,6 @@ struct kfd_node *svm_range_get_node_by_id(struct svm_range *prange, int svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange, bool clear); void svm_range_vram_node_free(struct svm_range *prange); -int svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm, - unsigned long addr, struct svm_range *parent, - struct svm_range *prange); int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, uint32_t vmid, uint32_t node_id, uint64_t addr, bool write_fault); -- cgit v1.2.3 From 62fbfdbbe3a1f188a6310d9418956b918840cd33 Mon Sep 17 00:00:00 2001 From: Daniel Miess Date: Fri, 1 Dec 2023 06:25:01 -0700 Subject: drm/amd/display: Add missing dcn35 RCO registers [Why] Some registers needed for root clock gating in dcn35 are not defined in the dccg header. [How] Add the needed registers and temporarily disable some register writes that are now taking place successfully until the registers can be properly enabled. Tested-by: Daniel Wheeler Reviewed-by: Charlene Liu Acked-by: Rodrigo Siqueira Signed-off-by: Daniel Miess Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h | 32 ++++++++++++ drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c | 62 ++++++++++++++++++++++- drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.h | 51 +++++++++++++++++++ 3 files changed, 143 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h index 76da59d8caaf..ef5c22f41563 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h @@ -296,6 +296,38 @@ type DTBCLK_P1_GATE_DISABLE;\ type DTBCLK_P2_GATE_DISABLE;\ type DTBCLK_P3_GATE_DISABLE;\ + type DSCCLK0_ROOT_GATE_DISABLE;\ + type DSCCLK1_ROOT_GATE_DISABLE;\ + type DSCCLK2_ROOT_GATE_DISABLE;\ + type DSCCLK3_ROOT_GATE_DISABLE;\ + type SYMCLKA_FE_ROOT_GATE_DISABLE;\ + type SYMCLKB_FE_ROOT_GATE_DISABLE;\ + type SYMCLKC_FE_ROOT_GATE_DISABLE;\ + type SYMCLKD_FE_ROOT_GATE_DISABLE;\ + type SYMCLKE_FE_ROOT_GATE_DISABLE;\ + type DPPCLK0_ROOT_GATE_DISABLE;\ + type DPPCLK1_ROOT_GATE_DISABLE;\ + type DPPCLK2_ROOT_GATE_DISABLE;\ + type DPPCLK3_ROOT_GATE_DISABLE;\ + type HDMISTREAMCLK0_ROOT_GATE_DISABLE;\ + type SYMCLKA_ROOT_GATE_DISABLE;\ + type SYMCLKB_ROOT_GATE_DISABLE;\ + type SYMCLKC_ROOT_GATE_DISABLE;\ + type SYMCLKD_ROOT_GATE_DISABLE;\ + type SYMCLKE_ROOT_GATE_DISABLE;\ + type PHYA_REFCLK_ROOT_GATE_DISABLE;\ + type PHYB_REFCLK_ROOT_GATE_DISABLE;\ + type PHYC_REFCLK_ROOT_GATE_DISABLE;\ + type PHYD_REFCLK_ROOT_GATE_DISABLE;\ + type PHYE_REFCLK_ROOT_GATE_DISABLE;\ + type DPSTREAMCLK0_ROOT_GATE_DISABLE;\ + type DPSTREAMCLK1_ROOT_GATE_DISABLE;\ + type DPSTREAMCLK2_ROOT_GATE_DISABLE;\ + type DPSTREAMCLK3_ROOT_GATE_DISABLE;\ + type DPSTREAMCLK0_GATE_DISABLE;\ + type DPSTREAMCLK1_GATE_DISABLE;\ + type DPSTREAMCLK2_GATE_DISABLE;\ + type DPSTREAMCLK3_GATE_DISABLE;\ struct dccg_shift { DCCG_REG_FIELD_LIST(uint8_t) diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c index 142efd390d86..f1ba7bb792ea 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c @@ -506,6 +506,64 @@ static void dccg35_dpp_root_clock_control( dccg->dpp_clock_gated[dpp_inst] = !clock_on; } +static void dccg35_disable_symclk32_se( + struct dccg *dccg, + int hpo_se_inst) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + /* set refclk as the source for symclk32_se */ + switch (hpo_se_inst) { + case 0: + REG_UPDATE_2(SYMCLK32_SE_CNTL, + SYMCLK32_SE0_SRC_SEL, 0, + SYMCLK32_SE0_EN, 0); + if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) { + REG_UPDATE(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE0_GATE_DISABLE, 0); +// REG_UPDATE(DCCG_GATE_DISABLE_CNTL3, +// SYMCLK32_ROOT_SE0_GATE_DISABLE, 0); + } + break; + case 1: + REG_UPDATE_2(SYMCLK32_SE_CNTL, + SYMCLK32_SE1_SRC_SEL, 0, + SYMCLK32_SE1_EN, 0); + if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) { + REG_UPDATE(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE1_GATE_DISABLE, 0); +// REG_UPDATE(DCCG_GATE_DISABLE_CNTL3, +// SYMCLK32_ROOT_SE1_GATE_DISABLE, 0); + } + break; + case 2: + REG_UPDATE_2(SYMCLK32_SE_CNTL, + SYMCLK32_SE2_SRC_SEL, 0, + SYMCLK32_SE2_EN, 0); + if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) { + REG_UPDATE(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE2_GATE_DISABLE, 0); +// REG_UPDATE(DCCG_GATE_DISABLE_CNTL3, +// SYMCLK32_ROOT_SE2_GATE_DISABLE, 0); + } + break; + case 3: + REG_UPDATE_2(SYMCLK32_SE_CNTL, + SYMCLK32_SE3_SRC_SEL, 0, + SYMCLK32_SE3_EN, 0); + if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) { + REG_UPDATE(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE3_GATE_DISABLE, 0); +// REG_UPDATE(DCCG_GATE_DISABLE_CNTL3, +// SYMCLK32_ROOT_SE3_GATE_DISABLE, 0); + } + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + void dccg35_init(struct dccg *dccg) { int otg_inst; @@ -514,7 +572,7 @@ void dccg35_init(struct dccg *dccg) * will cause DCN to hang. */ for (otg_inst = 0; otg_inst < 4; otg_inst++) - dccg31_disable_symclk32_se(dccg, otg_inst); + dccg35_disable_symclk32_se(dccg, otg_inst); if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le) for (otg_inst = 0; otg_inst < 2; otg_inst++) @@ -788,7 +846,7 @@ static const struct dccg_funcs dccg35_funcs = { .dccg_init = dccg35_init, .set_dpstreamclk = dccg35_set_dpstreamclk, .enable_symclk32_se = dccg31_enable_symclk32_se, - .disable_symclk32_se = dccg31_disable_symclk32_se, + .disable_symclk32_se = dccg35_disable_symclk32_se, .enable_symclk32_le = dccg31_enable_symclk32_le, .disable_symclk32_le = dccg31_disable_symclk32_le, .set_symclk32_le_root_clock_gating = dccg31_set_symclk32_le_root_clock_gating, diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.h b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.h index bde48bee0119..1586a45ca3bd 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.h @@ -34,6 +34,7 @@ #define DCCG_REG_LIST_DCN35() \ DCCG_REG_LIST_DCN314(),\ SR(DPPCLK_CTRL),\ + SR(DCCG_GATE_DISABLE_CNTL4),\ SR(DCCG_GATE_DISABLE_CNTL5),\ SR(DCCG_GATE_DISABLE_CNTL6),\ SR(DCCG_GLOBAL_FGCG_REP_CNTL),\ @@ -180,6 +181,56 @@ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P1_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P2_GATE_DISABLE, mask_sh),\ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P3_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DSCCLK0_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DSCCLK1_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DSCCLK2_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DSCCLK3_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKA_FE_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKB_FE_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKC_FE_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKD_FE_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKE_FE_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DPPCLK0_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DPPCLK1_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DPPCLK2_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DPPCLK3_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL2, HDMICHARCLK0_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL4, HDMICHARCLK0_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL6, HDMISTREAMCLK0_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKA_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKB_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKC_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKD_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKE_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE0_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE1_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE2_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE3_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_LE0_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_LE1_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE0_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE1_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE2_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE3_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_LE0_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_LE1_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYA_REFCLK_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYB_REFCLK_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYC_REFCLK_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYD_REFCLK_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYE_REFCLK_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK0_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK1_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK2_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK3_ROOT_GATE_DISABLE, mask_sh),\ + DCCG_SF(HDMISTREAMCLK0_DTO_PARAM, HDMISTREAMCLK0_DTO_PHASE, mask_sh),\ + DCCG_SF(HDMISTREAMCLK0_DTO_PARAM, HDMISTREAMCLK0_DTO_MODULO, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL, DISPCLK_DCCG_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL3, HDMISTREAMCLK0_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK0_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK1_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK2_GATE_DISABLE, mask_sh),\ + DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK3_GATE_DISABLE, mask_sh),\ struct dccg *dccg35_create( struct dc_context *ctx, -- cgit v1.2.3 From b17ef04bf3a4346d66404454d6a646343ddc9749 Mon Sep 17 00:00:00 2001 From: Lewis Huang Date: Fri, 1 Dec 2023 06:25:02 -0700 Subject: drm/amd/display: Pass pwrseq inst for backlight and ABM [Why] OTG inst and pwrseq inst mapping is not align therefore we cannot use otg_inst as pwrseq inst to get DCIO register. [How] 1. Pass the correct pwrseq instance to dmub when set abm pipe. 2. LVTMA control index change from panel_inst to pwrseq_inst. Tested-by: Daniel Wheeler Reviewed-by: Phil Hsieh Acked-by: Rodrigo Siqueira Signed-off-by: Lewis Huang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c | 4 +- .../gpu/drm/amd/display/dc/bios/command_table2.c | 12 ++--- .../gpu/drm/amd/display/dc/bios/command_table2.h | 2 +- drivers/gpu/drm/amd/display/dc/dc_bios_types.h | 2 +- drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c | 8 ++- drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c | 7 ++- drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.h | 2 +- .../drm/amd/display/dc/dcn31/dcn31_panel_cntl.c | 5 +- .../drm/amd/display/dc/hwss/dce110/dce110_hwseq.c | 16 +++--- .../drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c | 36 ++++++++++--- drivers/gpu/drm/amd/display/dc/inc/hw/abm.h | 3 +- drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h | 2 + drivers/gpu/drm/amd/display/dc/link/link_factory.c | 59 +++++++++++++++------- drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 14 ++++- 14 files changed, 119 insertions(+), 53 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index 7cdb1a8a0ba0..2fb804eb5702 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -1691,7 +1691,7 @@ static enum bp_result bios_parser_enable_disp_power_gating( static enum bp_result bios_parser_enable_lvtma_control( struct dc_bios *dcb, uint8_t uc_pwr_on, - uint8_t panel_instance, + uint8_t pwrseq_instance, uint8_t bypass_panel_control_wait) { struct bios_parser *bp = BP_FROM_DCB(dcb); @@ -1699,7 +1699,7 @@ static enum bp_result bios_parser_enable_lvtma_control( if (!bp->cmd_tbl.enable_lvtma_control) return BP_RESULT_FAILURE; - return bp->cmd_tbl.enable_lvtma_control(bp, uc_pwr_on, panel_instance, bypass_panel_control_wait); + return bp->cmd_tbl.enable_lvtma_control(bp, uc_pwr_on, pwrseq_instance, bypass_panel_control_wait); } static bool bios_parser_is_accelerated_mode( diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c index 90a02d7bd3da..ab0adabf9dd4 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c @@ -976,7 +976,7 @@ static unsigned int get_smu_clock_info_v3_1(struct bios_parser *bp, uint8_t id) static enum bp_result enable_lvtma_control( struct bios_parser *bp, uint8_t uc_pwr_on, - uint8_t panel_instance, + uint8_t pwrseq_instance, uint8_t bypass_panel_control_wait); static void init_enable_lvtma_control(struct bios_parser *bp) @@ -989,7 +989,7 @@ static void init_enable_lvtma_control(struct bios_parser *bp) static void enable_lvtma_control_dmcub( struct dc_dmub_srv *dmcub, uint8_t uc_pwr_on, - uint8_t panel_instance, + uint8_t pwrseq_instance, uint8_t bypass_panel_control_wait) { @@ -1002,8 +1002,8 @@ static void enable_lvtma_control_dmcub( DMUB_CMD__VBIOS_LVTMA_CONTROL; cmd.lvtma_control.data.uc_pwr_action = uc_pwr_on; - cmd.lvtma_control.data.panel_inst = - panel_instance; + cmd.lvtma_control.data.pwrseq_inst = + pwrseq_instance; cmd.lvtma_control.data.bypass_panel_control_wait = bypass_panel_control_wait; dm_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); @@ -1012,7 +1012,7 @@ static void enable_lvtma_control_dmcub( static enum bp_result enable_lvtma_control( struct bios_parser *bp, uint8_t uc_pwr_on, - uint8_t panel_instance, + uint8_t pwrseq_instance, uint8_t bypass_panel_control_wait) { enum bp_result result = BP_RESULT_FAILURE; @@ -1021,7 +1021,7 @@ static enum bp_result enable_lvtma_control( bp->base.ctx->dc->debug.dmub_command_table) { enable_lvtma_control_dmcub(bp->base.ctx->dmub_srv, uc_pwr_on, - panel_instance, + pwrseq_instance, bypass_panel_control_wait); return BP_RESULT_OK; } diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table2.h b/drivers/gpu/drm/amd/display/dc/bios/command_table2.h index b6d09bf6cf72..41c8c014397f 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table2.h +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table2.h @@ -96,7 +96,7 @@ struct cmd_tbl { struct bios_parser *bp, uint8_t id); enum bp_result (*enable_lvtma_control)(struct bios_parser *bp, uint8_t uc_pwr_on, - uint8_t panel_instance, + uint8_t pwrseq_instance, uint8_t bypass_panel_control_wait); }; diff --git a/drivers/gpu/drm/amd/display/dc/dc_bios_types.h b/drivers/gpu/drm/amd/display/dc/dc_bios_types.h index be9aa1a71847..26940d94d8fb 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_bios_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_bios_types.h @@ -140,7 +140,7 @@ struct dc_vbios_funcs { enum bp_result (*enable_lvtma_control)( struct dc_bios *bios, uint8_t uc_pwr_on, - uint8_t panel_instance, + uint8_t pwrseq_instance, uint8_t bypass_panel_control_wait); enum bp_result (*get_soc_bb_info)( diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c index d3e6544022b7..930fd929e93a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c @@ -145,7 +145,11 @@ static bool dmub_abm_save_restore_ex( return ret; } -static bool dmub_abm_set_pipe_ex(struct abm *abm, uint32_t otg_inst, uint32_t option, uint32_t panel_inst) +static bool dmub_abm_set_pipe_ex(struct abm *abm, + uint32_t otg_inst, + uint32_t option, + uint32_t panel_inst, + uint32_t pwrseq_inst) { bool ret = false; unsigned int feature_support; @@ -153,7 +157,7 @@ static bool dmub_abm_set_pipe_ex(struct abm *abm, uint32_t otg_inst, uint32_t op feature_support = abm_feature_support(abm, panel_inst); if (feature_support == ABM_LCD_SUPPORT) - ret = dmub_abm_set_pipe(abm, otg_inst, option, panel_inst); + ret = dmub_abm_set_pipe(abm, otg_inst, option, panel_inst, pwrseq_inst); return ret; } diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c index 592a8f7a1c6d..42c802afc468 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c @@ -254,7 +254,11 @@ bool dmub_abm_save_restore( return true; } -bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst, uint32_t option, uint32_t panel_inst) +bool dmub_abm_set_pipe(struct abm *abm, + uint32_t otg_inst, + uint32_t option, + uint32_t panel_inst, + uint32_t pwrseq_inst) { union dmub_rb_cmd cmd; struct dc_context *dc = abm->ctx; @@ -264,6 +268,7 @@ bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst, uint32_t option, uint cmd.abm_set_pipe.header.type = DMUB_CMD__ABM; cmd.abm_set_pipe.header.sub_type = DMUB_CMD__ABM_SET_PIPE; cmd.abm_set_pipe.abm_set_pipe_data.otg_inst = otg_inst; + cmd.abm_set_pipe.abm_set_pipe_data.pwrseq_inst = pwrseq_inst; cmd.abm_set_pipe.abm_set_pipe_data.set_pipe_option = option; cmd.abm_set_pipe.abm_set_pipe_data.panel_inst = panel_inst; cmd.abm_set_pipe.abm_set_pipe_data.ramping_boundary = ramping_boundary; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.h b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.h index 853564d7f471..07ea6c8d414f 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.h @@ -44,7 +44,7 @@ bool dmub_abm_save_restore( struct dc_context *dc, unsigned int panel_inst, struct abm_save_restore *pData); -bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst, uint32_t option, uint32_t panel_inst); +bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst, uint32_t option, uint32_t panel_inst, uint32_t pwrseq_inst); bool dmub_abm_set_backlight_level(struct abm *abm, unsigned int backlight_pwm_u16_16, unsigned int frame_ramp, diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c index 217acd4e292a..d849b1eaa4a5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c @@ -50,7 +50,7 @@ static bool dcn31_query_backlight_info(struct panel_cntl *panel_cntl, union dmub cmd->panel_cntl.header.type = DMUB_CMD__PANEL_CNTL; cmd->panel_cntl.header.sub_type = DMUB_CMD__PANEL_CNTL_QUERY_BACKLIGHT_INFO; cmd->panel_cntl.header.payload_bytes = sizeof(cmd->panel_cntl.data); - cmd->panel_cntl.data.inst = dcn31_panel_cntl->base.inst; + cmd->panel_cntl.data.pwrseq_inst = dcn31_panel_cntl->base.pwrseq_inst; return dm_execute_dmub_cmd(dc_dmub_srv->ctx, cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY); } @@ -78,7 +78,7 @@ static uint32_t dcn31_panel_cntl_hw_init(struct panel_cntl *panel_cntl) cmd.panel_cntl.header.type = DMUB_CMD__PANEL_CNTL; cmd.panel_cntl.header.sub_type = DMUB_CMD__PANEL_CNTL_HW_INIT; cmd.panel_cntl.header.payload_bytes = sizeof(cmd.panel_cntl.data); - cmd.panel_cntl.data.inst = dcn31_panel_cntl->base.inst; + cmd.panel_cntl.data.pwrseq_inst = dcn31_panel_cntl->base.pwrseq_inst; cmd.panel_cntl.data.bl_pwm_cntl = panel_cntl->stored_backlight_registers.BL_PWM_CNTL; cmd.panel_cntl.data.bl_pwm_period_cntl = panel_cntl->stored_backlight_registers.BL_PWM_PERIOD_CNTL; cmd.panel_cntl.data.bl_pwm_ref_div1 = @@ -157,4 +157,5 @@ void dcn31_panel_cntl_construct( dcn31_panel_cntl->base.funcs = &dcn31_link_panel_cntl_funcs; dcn31_panel_cntl->base.ctx = init_data->ctx; dcn31_panel_cntl->base.inst = init_data->inst; + dcn31_panel_cntl->base.pwrseq_inst = init_data->pwrseq_inst; } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index 0a331d17ee92..c73fe5e9b361 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -790,7 +790,7 @@ void dce110_edp_power_control( struct dc_context *ctx = link->ctx; struct bp_transmitter_control cntl = { 0 }; enum bp_result bp_result; - uint8_t panel_instance; + uint8_t pwrseq_instance; if (dal_graphics_object_id_get_connector_id(link->link_enc->connector) @@ -873,7 +873,7 @@ void dce110_edp_power_control( cntl.coherent = false; cntl.lanes_number = LANE_COUNT_FOUR; cntl.hpd_sel = link->link_enc->hpd_source; - panel_instance = link->panel_cntl->inst; + pwrseq_instance = link->panel_cntl->pwrseq_inst; if (ctx->dc->ctx->dmub_srv && ctx->dc->debug.dmub_command_table) { @@ -881,11 +881,11 @@ void dce110_edp_power_control( if (cntl.action == TRANSMITTER_CONTROL_POWER_ON) { bp_result = ctx->dc_bios->funcs->enable_lvtma_control(ctx->dc_bios, LVTMA_CONTROL_POWER_ON, - panel_instance, link->link_powered_externally); + pwrseq_instance, link->link_powered_externally); } else { bp_result = ctx->dc_bios->funcs->enable_lvtma_control(ctx->dc_bios, LVTMA_CONTROL_POWER_OFF, - panel_instance, link->link_powered_externally); + pwrseq_instance, link->link_powered_externally); } } @@ -956,7 +956,7 @@ void dce110_edp_backlight_control( { struct dc_context *ctx = link->ctx; struct bp_transmitter_control cntl = { 0 }; - uint8_t panel_instance; + uint8_t pwrseq_instance; unsigned int pre_T11_delay = OLED_PRE_T11_DELAY; unsigned int post_T7_delay = OLED_POST_T7_DELAY; @@ -1009,7 +1009,7 @@ void dce110_edp_backlight_control( */ /* dc_service_sleep_in_milliseconds(50); */ /*edp 1.2*/ - panel_instance = link->panel_cntl->inst; + pwrseq_instance = link->panel_cntl->pwrseq_inst; if (cntl.action == TRANSMITTER_CONTROL_BACKLIGHT_ON) { if (!link->dc->config.edp_no_power_sequencing) @@ -1034,11 +1034,11 @@ void dce110_edp_backlight_control( if (cntl.action == TRANSMITTER_CONTROL_BACKLIGHT_ON) ctx->dc_bios->funcs->enable_lvtma_control(ctx->dc_bios, LVTMA_CONTROL_LCD_BLON, - panel_instance, link->link_powered_externally); + pwrseq_instance, link->link_powered_externally); else ctx->dc_bios->funcs->enable_lvtma_control(ctx->dc_bios, LVTMA_CONTROL_LCD_BLOFF, - panel_instance, link->link_powered_externally); + pwrseq_instance, link->link_powered_externally); } link_transmitter_control(ctx->dc_bios, &cntl); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c index 467812cf3368..08783ad097d2 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c @@ -137,7 +137,8 @@ void dcn21_PLAT_58856_wa(struct dc_state *context, struct pipe_ctx *pipe_ctx) pipe_ctx->stream->dpms_off = true; } -static bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst, uint32_t option, uint32_t panel_inst) +static bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst, + uint32_t option, uint32_t panel_inst, uint32_t pwrseq_inst) { union dmub_rb_cmd cmd; struct dc_context *dc = abm->ctx; @@ -147,6 +148,7 @@ static bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst, uint32_t optio cmd.abm_set_pipe.header.type = DMUB_CMD__ABM; cmd.abm_set_pipe.header.sub_type = DMUB_CMD__ABM_SET_PIPE; cmd.abm_set_pipe.abm_set_pipe_data.otg_inst = otg_inst; + cmd.abm_set_pipe.abm_set_pipe_data.pwrseq_inst = pwrseq_inst; cmd.abm_set_pipe.abm_set_pipe_data.set_pipe_option = option; cmd.abm_set_pipe.abm_set_pipe_data.panel_inst = panel_inst; cmd.abm_set_pipe.abm_set_pipe_data.ramping_boundary = ramping_boundary; @@ -179,7 +181,6 @@ void dcn21_set_abm_immediate_disable(struct pipe_ctx *pipe_ctx) struct abm *abm = pipe_ctx->stream_res.abm; uint32_t otg_inst = pipe_ctx->stream_res.tg->inst; struct panel_cntl *panel_cntl = pipe_ctx->stream->link->panel_cntl; - struct dmcu *dmcu = pipe_ctx->stream->ctx->dc->res_pool->dmcu; if (dmcu) { @@ -190,9 +191,13 @@ void dcn21_set_abm_immediate_disable(struct pipe_ctx *pipe_ctx) if (abm && panel_cntl) { if (abm->funcs && abm->funcs->set_pipe_ex) { abm->funcs->set_pipe_ex(abm, otg_inst, SET_ABM_PIPE_IMMEDIATELY_DISABLE, - panel_cntl->inst); + panel_cntl->inst, panel_cntl->pwrseq_inst); } else { - dmub_abm_set_pipe(abm, otg_inst, SET_ABM_PIPE_IMMEDIATELY_DISABLE, panel_cntl->inst); + dmub_abm_set_pipe(abm, + otg_inst, + SET_ABM_PIPE_IMMEDIATELY_DISABLE, + panel_cntl->inst, + panel_cntl->pwrseq_inst); } panel_cntl->funcs->store_backlight_level(panel_cntl); } @@ -212,9 +217,16 @@ void dcn21_set_pipe(struct pipe_ctx *pipe_ctx) if (abm && panel_cntl) { if (abm->funcs && abm->funcs->set_pipe_ex) { - abm->funcs->set_pipe_ex(abm, otg_inst, SET_ABM_PIPE_NORMAL, panel_cntl->inst); + abm->funcs->set_pipe_ex(abm, + otg_inst, + SET_ABM_PIPE_NORMAL, + panel_cntl->inst, + panel_cntl->pwrseq_inst); } else { - dmub_abm_set_pipe(abm, otg_inst, SET_ABM_PIPE_NORMAL, panel_cntl->inst); + dmub_abm_set_pipe(abm, otg_inst, + SET_ABM_PIPE_NORMAL, + panel_cntl->inst, + panel_cntl->pwrseq_inst); } } } @@ -237,9 +249,17 @@ bool dcn21_set_backlight_level(struct pipe_ctx *pipe_ctx, if (abm && panel_cntl) { if (abm->funcs && abm->funcs->set_pipe_ex) { - abm->funcs->set_pipe_ex(abm, otg_inst, SET_ABM_PIPE_NORMAL, panel_cntl->inst); + abm->funcs->set_pipe_ex(abm, + otg_inst, + SET_ABM_PIPE_NORMAL, + panel_cntl->inst, + panel_cntl->pwrseq_inst); } else { - dmub_abm_set_pipe(abm, otg_inst, SET_ABM_PIPE_NORMAL, panel_cntl->inst); + dmub_abm_set_pipe(abm, + otg_inst, + SET_ABM_PIPE_NORMAL, + panel_cntl->inst, + panel_cntl->pwrseq_inst); } } } diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h b/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h index 33db15d69f23..9f521cf0fc5a 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h @@ -64,7 +64,8 @@ struct abm_funcs { bool (*set_pipe_ex)(struct abm *abm, unsigned int otg_inst, unsigned int option, - unsigned int panel_inst); + unsigned int panel_inst, + unsigned int pwrseq_inst); }; #endif diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h b/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h index 24af9d80b937..248adc1705e3 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h @@ -56,12 +56,14 @@ struct panel_cntl_funcs { struct panel_cntl_init_data { struct dc_context *ctx; uint32_t inst; + uint32_t pwrseq_inst; }; struct panel_cntl { const struct panel_cntl_funcs *funcs; struct dc_context *ctx; uint32_t inst; + uint32_t pwrseq_inst; /* registers setting needs to be saved and restored at InitBacklight */ struct panel_cntl_backlight_registers stored_backlight_registers; }; diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.c b/drivers/gpu/drm/amd/display/dc/link/link_factory.c index 6b306ea58b9b..5464d8d26bd3 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_factory.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.c @@ -369,6 +369,30 @@ static enum transmitter translate_encoder_to_transmitter( } } +static uint8_t translate_dig_inst_to_pwrseq_inst(struct dc_link *link) +{ + uint8_t pwrseq_inst = 0xF; + struct dc_context *dc_ctx = link->dc->ctx; + + DC_LOGGER_INIT(dc_ctx->logger); + + switch (link->eng_id) { + case ENGINE_ID_DIGA: + pwrseq_inst = 0; + break; + case ENGINE_ID_DIGB: + pwrseq_inst = 1; + break; + default: + DC_LOG_WARNING("Unsupported pwrseq engine id: %d!\n", link->eng_id); + ASSERT(false); + break; + } + + return pwrseq_inst; +} + + static void link_destruct(struct dc_link *link) { int i; @@ -596,24 +620,6 @@ static bool construct_phy(struct dc_link *link, link->ddc_hw_inst = dal_ddc_get_line(get_ddc_pin(link->ddc)); - - if (link->dc->res_pool->funcs->panel_cntl_create && - (link->link_id.id == CONNECTOR_ID_EDP || - link->link_id.id == CONNECTOR_ID_LVDS)) { - panel_cntl_init_data.ctx = dc_ctx; - panel_cntl_init_data.inst = - panel_cntl_init_data.ctx->dc_edp_id_count; - link->panel_cntl = - link->dc->res_pool->funcs->panel_cntl_create( - &panel_cntl_init_data); - panel_cntl_init_data.ctx->dc_edp_id_count++; - - if (link->panel_cntl == NULL) { - DC_ERROR("Failed to create link panel_cntl!\n"); - goto panel_cntl_create_fail; - } - } - enc_init_data.ctx = dc_ctx; bp_funcs->get_src_obj(dc_ctx->dc_bios, link->link_id, 0, &enc_init_data.encoder); @@ -644,6 +650,23 @@ static bool construct_phy(struct dc_link *link, link->dc->res_pool->dig_link_enc_count++; link->link_enc_hw_inst = link->link_enc->transmitter; + + if (link->dc->res_pool->funcs->panel_cntl_create && + (link->link_id.id == CONNECTOR_ID_EDP || + link->link_id.id == CONNECTOR_ID_LVDS)) { + panel_cntl_init_data.ctx = dc_ctx; + panel_cntl_init_data.inst = panel_cntl_init_data.ctx->dc_edp_id_count; + panel_cntl_init_data.pwrseq_inst = translate_dig_inst_to_pwrseq_inst(link); + link->panel_cntl = + link->dc->res_pool->funcs->panel_cntl_create( + &panel_cntl_init_data); + panel_cntl_init_data.ctx->dc_edp_id_count++; + + if (link->panel_cntl == NULL) { + DC_ERROR("Failed to create link panel_cntl!\n"); + goto panel_cntl_create_fail; + } + } for (i = 0; i < 4; i++) { if (bp_funcs->get_device_tag(dc_ctx->dc_bios, link->link_id, i, diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index a365f6c096de..3c092064c72e 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -3498,6 +3498,16 @@ struct dmub_cmd_abm_set_pipe_data { * TODO: Remove. */ uint8_t ramping_boundary; + + /** + * PwrSeq HW Instance. + */ + uint8_t pwrseq_inst; + + /** + * Explicit padding to 4 byte boundary. + */ + uint8_t pad[3]; }; /** @@ -3878,7 +3888,7 @@ enum dmub_cmd_panel_cntl_type { * struct dmub_cmd_panel_cntl_data - Panel control data. */ struct dmub_cmd_panel_cntl_data { - uint32_t inst; /**< panel instance */ + uint32_t pwrseq_inst; /**< pwrseq instance */ uint32_t current_backlight; /* in/out */ uint32_t bl_pwm_cntl; /* in/out */ uint32_t bl_pwm_period_cntl; /* in/out */ @@ -3937,7 +3947,7 @@ struct dmub_cmd_lvtma_control_data { uint8_t uc_pwr_action; /**< LVTMA_ACTION */ uint8_t bypass_panel_control_wait; uint8_t reserved_0[2]; /**< For future use */ - uint8_t panel_inst; /**< LVTMA control instance */ + uint8_t pwrseq_inst; /**< LVTMA control instance */ uint8_t reserved_1[3]; /**< For future use */ }; -- cgit v1.2.3 From d5df648ec830cfd775bdacb3a3640c1e16de90f2 Mon Sep 17 00:00:00 2001 From: Krunoslav Kovac Date: Fri, 1 Dec 2023 06:25:03 -0700 Subject: drm/amd/display: Change dither policy for 10bpc to round We use spatial dither by default for all output bpc (6/8/10). While it makes some sense for FP16, for ARGB2101010 surfaces it makes little sense as even if we skip color pipeline to preserve bit accuracy, spatial dither adds random noise so a few percent pixels are 1 bit off. This commit chages the 10bpc out dither policy to rounding. Also, in Polaris/Vega times, policy used to be round for 10bpc out; it looks like it got inadvertently changed for Navi. Difference is only detectable with capture cards. Tested-by: Daniel Wheeler Reviewed-by: Aric Cyr Reviewed-by: Anthony Koo Acked-by: Rodrigo Siqueira Signed-off-by: Krunoslav Kovac Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index e1c02527d04a..4382d9ae4292 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -4773,7 +4773,7 @@ void resource_build_bit_depth_reduction_params(struct dc_stream_state *stream, option = DITHER_OPTION_SPATIAL8; break; case COLOR_DEPTH_101010: - option = DITHER_OPTION_SPATIAL10; + option = DITHER_OPTION_TRUN10; break; default: option = DITHER_OPTION_DISABLE; @@ -4799,6 +4799,8 @@ void resource_build_bit_depth_reduction_params(struct dc_stream_state *stream, option == DITHER_OPTION_TRUN10_SPATIAL8_FM6) { fmt_bit_depth->flags.TRUNCATE_ENABLED = 1; fmt_bit_depth->flags.TRUNCATE_DEPTH = 2; + if (option == DITHER_OPTION_TRUN10) + fmt_bit_depth->flags.TRUNCATE_MODE = 1; } /* special case - Formatter can only reduce by 4 bits at most. -- cgit v1.2.3 From ab779466166348eecf17d20f620aa9a47965c934 Mon Sep 17 00:00:00 2001 From: Josip Pavic Date: Fri, 1 Dec 2023 06:25:04 -0700 Subject: drm/amd/display: Increase scratch buffer size [Why] Larger data blocks are expected to be transferred between driver and FW in the future. [How] Embiggen the scratch buffer to a cromulent size. Tested-by: Daniel Wheeler Reviewed-by: Nicholas Kazlauskas Acked-by: Rodrigo Siqueira Signed-off-by: Josip Pavic Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c index 38360adc53d9..a329a6ecf4dd 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c @@ -64,7 +64,7 @@ /* Default scratch mem size. */ -#define DMUB_SCRATCH_MEM_SIZE (256) +#define DMUB_SCRATCH_MEM_SIZE (1024) /* Number of windows in use. */ #define DMUB_NUM_WINDOWS (DMUB_WINDOW_TOTAL) -- cgit v1.2.3 From 23cf5a5cd33a518b6bdbe9966dc49f1cf6bfe532 Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Fri, 1 Dec 2023 06:25:05 -0700 Subject: drm/amd/display: insert drv-pmfw log + rollback to new context Rollback to new context for active display: this was previous tested sequence. Avoid to do OTG master toggle is no active display at all, this w/a was for fifo err. Tested-by: Daniel Wheeler Reviewed-by: Chris Park Acked-by: Rodrigo Siqueira Signed-off-by: Charlene Liu Signed-off-by: Alex Deucher --- .../amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 27 ++++++++------ .../drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c | 43 ++++++++++++++++------ 2 files changed, 47 insertions(+), 23 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index d5fde7d23fbf..9e0fa01ecb79 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -80,12 +80,12 @@ static int dcn35_get_active_display_cnt_wa( struct dc *dc, - struct dc_state *context) + struct dc_state *context, + int *all_active_disps) { - int i, display_count; + int i, display_count = 0; bool tmds_present = false; - display_count = 0; for (i = 0; i < context->stream_count; i++) { const struct dc_stream_state *stream = context->streams[i]; @@ -103,7 +103,8 @@ static int dcn35_get_active_display_cnt_wa( link->link_enc->funcs->is_dig_enabled(link->link_enc)) display_count++; } - + if (all_active_disps != NULL) + *all_active_disps = display_count; /* WA for hang on HDMI after display off back on*/ if (display_count == 0 && tmds_present) display_count = 1; @@ -224,15 +225,16 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base, struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk; struct dc *dc = clk_mgr_base->ctx->dc; - int display_count; + int display_count = 0; bool update_dppclk = false; bool update_dispclk = false; bool dpp_clock_lowered = false; + int all_active_disps = 0; if (dc->work_arounds.skip_clock_update) return; - /* DTBCLK is fixed, so set a default if unspecified. */ + display_count = dcn35_get_active_display_cnt_wa(dc, context, &all_active_disps); if (new_clocks->dtbclk_en && !new_clocks->ref_dtbclk_khz) new_clocks->ref_dtbclk_khz = 600000; @@ -254,7 +256,6 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base, } /* check that we're not already in lower */ if (clk_mgr_base->clks.pwr_state != DCN_PWR_STATE_LOW_POWER) { - display_count = dcn35_get_active_display_cnt_wa(dc, context); /* if we can go lower, go lower */ if (display_count == 0) clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_LOW_POWER; @@ -311,11 +312,13 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base, } if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) { - dcn35_disable_otg_wa(clk_mgr_base, context, safe_to_lower, true); - clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz; - dcn35_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz); - dcn35_disable_otg_wa(clk_mgr_base, context, safe_to_lower, false); + if (all_active_disps != 0) { + dcn35_disable_otg_wa(clk_mgr_base, context, safe_to_lower, true); + dcn35_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz); + dcn35_disable_otg_wa(clk_mgr_base, context, safe_to_lower, false); + } else + dcn35_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz); update_dispclk = true; } @@ -826,7 +829,7 @@ static void dcn35_set_low_power_state(struct clk_mgr *clk_mgr_base) struct dc_state *context = dc->current_state; if (clk_mgr_base->clks.pwr_state != DCN_PWR_STATE_LOW_POWER) { - display_count = dcn35_get_active_display_cnt_wa(dc, context); + display_count = dcn35_get_active_display_cnt_wa(dc, context, NULL); /* if we can go lower, go lower */ if (display_count == 0) clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_LOW_POWER; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c index af0a0f292595..786b62ab8d0f 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c @@ -279,7 +279,7 @@ void dcn35_smu_set_display_idle_optimization(struct clk_mgr_internal *clk_mgr, u clk_mgr, VBIOSSMC_MSG_SetDisplayIdleOptimizations, idle_info); - smu_print("VBIOSSMC_MSG_SetDisplayIdleOptimizations idle_info = %d\n", idle_info); + smu_print("%s: VBIOSSMC_MSG_SetDisplayIdleOptimizations idle_info = %x\n", __func__, idle_info); } void dcn35_smu_enable_phy_refclk_pwrdwn(struct clk_mgr_internal *clk_mgr, bool enable) @@ -298,7 +298,7 @@ void dcn35_smu_enable_phy_refclk_pwrdwn(struct clk_mgr_internal *clk_mgr, bool e clk_mgr, VBIOSSMC_MSG_SetDisplayIdleOptimizations, idle_info.data); - smu_print("dcn35_smu_enable_phy_refclk_pwrdwn = %d\n", enable ? 1 : 0); + smu_print("%s smu_enable_phy_refclk_pwrdwn = %d\n", __func__, enable ? 1 : 0); } void dcn35_smu_enable_pme_wa(struct clk_mgr_internal *clk_mgr) @@ -310,6 +310,7 @@ void dcn35_smu_enable_pme_wa(struct clk_mgr_internal *clk_mgr) clk_mgr, VBIOSSMC_MSG_UpdatePmeRestore, 0); + smu_print("%s: SMC_MSG_UpdatePmeRestore\n", __func__); } void dcn35_smu_set_dram_addr_high(struct clk_mgr_internal *clk_mgr, uint32_t addr_high) @@ -350,7 +351,7 @@ void dcn35_smu_transfer_wm_table_dram_2_smu(struct clk_mgr_internal *clk_mgr) void dcn35_smu_set_zstate_support(struct clk_mgr_internal *clk_mgr, enum dcn_zstate_support_state support) { - unsigned int msg_id, param; + unsigned int msg_id, param, retv; if (!clk_mgr->smu_present) return; @@ -360,27 +361,32 @@ void dcn35_smu_set_zstate_support(struct clk_mgr_internal *clk_mgr, enum dcn_zst case DCN_ZSTATE_SUPPORT_ALLOW: msg_id = VBIOSSMC_MSG_AllowZstatesEntry; param = (1 << 10) | (1 << 9) | (1 << 8); + smu_print("%s: SMC_MSG_AllowZstatesEntr msg = ALLOW, param = %d\n", __func__, param); break; case DCN_ZSTATE_SUPPORT_DISALLOW: msg_id = VBIOSSMC_MSG_AllowZstatesEntry; param = 0; + smu_print("%s: SMC_MSG_AllowZstatesEntr msg_id = DISALLOW, param = %d\n", __func__, param); break; case DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY: msg_id = VBIOSSMC_MSG_AllowZstatesEntry; param = (1 << 10); + smu_print("%s: SMC_MSG_AllowZstatesEntr msg = ALLOW_Z10_ONLY, param = %d\n", __func__, param); break; case DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY: msg_id = VBIOSSMC_MSG_AllowZstatesEntry; param = (1 << 10) | (1 << 8); + smu_print("%s: SMC_MSG_AllowZstatesEntr msg = ALLOW_Z8_Z10_ONLY, param = %d\n", __func__, param); break; case DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY: msg_id = VBIOSSMC_MSG_AllowZstatesEntry; param = (1 << 8); + smu_print("%s: SMC_MSG_AllowZstatesEntry msg = ALLOW_Z8_ONLY, param = %d\n", __func__, param); break; default: //DCN_ZSTATE_SUPPORT_UNKNOWN @@ -390,11 +396,11 @@ void dcn35_smu_set_zstate_support(struct clk_mgr_internal *clk_mgr, enum dcn_zst } - dcn35_smu_send_msg_with_param( + retv = dcn35_smu_send_msg_with_param( clk_mgr, msg_id, param); - smu_print("dcn35_smu_set_zstate_support msg_id = %d, param = %d\n", msg_id, param); + smu_print("%s: msg_id = %d, param = 0x%x, return = %d\n", __func__, msg_id, param, retv); } int dcn35_smu_get_dprefclk(struct clk_mgr_internal *clk_mgr) @@ -408,7 +414,7 @@ int dcn35_smu_get_dprefclk(struct clk_mgr_internal *clk_mgr) VBIOSSMC_MSG_GetDprefclkFreq, 0); - smu_print("dcn35_smu_get_DPREF clk = %d mhz\n", dprefclk); + smu_print("%s: SMU DPREF clk = %d mhz\n", __func__, dprefclk); return dprefclk * 1000; } @@ -423,7 +429,7 @@ int dcn35_smu_get_dtbclk(struct clk_mgr_internal *clk_mgr) VBIOSSMC_MSG_GetDtbclkFreq, 0); - smu_print("dcn35_smu_get_dtbclk = %d mhz\n", dtbclk); + smu_print("%s: get_dtbclk = %dmhz\n", __func__, dtbclk); return dtbclk * 1000; } /* Arg = 1: Turn DTB on; 0: Turn DTB CLK OFF. when it is on, it is 600MHZ */ @@ -436,7 +442,7 @@ void dcn35_smu_set_dtbclk(struct clk_mgr_internal *clk_mgr, bool enable) clk_mgr, VBIOSSMC_MSG_SetDtbClk, enable); - smu_print("dcn35_smu_set_dtbclk = %d \n", enable ? 1 : 0); + smu_print("%s: smu_set_dtbclk = %d\n", __func__, enable ? 1 : 0); } void dcn35_vbios_smu_enable_48mhz_tmdp_refclk_pwrdwn(struct clk_mgr_internal *clk_mgr, bool enable) @@ -445,30 +451,45 @@ void dcn35_vbios_smu_enable_48mhz_tmdp_refclk_pwrdwn(struct clk_mgr_internal *cl clk_mgr, VBIOSSMC_MSG_EnableTmdp48MHzRefclkPwrDown, enable); + smu_print("%s: smu_enable_48mhz_tmdp_refclk_pwrdwn = %d\n", __func__, enable ? 1 : 0); } int dcn35_smu_exit_low_power_state(struct clk_mgr_internal *clk_mgr) { - return dcn35_smu_send_msg_with_param( + int retv; + + retv = dcn35_smu_send_msg_with_param( clk_mgr, VBIOSSMC_MSG_DispPsrExit, 0); + smu_print("%s: smu_exit_low_power_state return = %d\n", __func__, retv); + return retv; } int dcn35_smu_get_ips_supported(struct clk_mgr_internal *clk_mgr) { - return dcn35_smu_send_msg_with_param( + int retv; + + retv = dcn35_smu_send_msg_with_param( clk_mgr, VBIOSSMC_MSG_QueryIPS2Support, 0); + + smu_print("%s: VBIOSSMC_MSG_QueryIPS2Support return = %x\n", __func__, retv); + return retv; } void dcn35_smu_write_ips_scratch(struct clk_mgr_internal *clk_mgr, uint32_t param) { REG_WRITE(MP1_SMN_C2PMSG_71, param); + smu_print("%s: write_ips_scratch = %x\n", __func__, param); } uint32_t dcn35_smu_read_ips_scratch(struct clk_mgr_internal *clk_mgr) { - return REG_READ(MP1_SMN_C2PMSG_71); + uint32_t retv; + + retv = REG_READ(MP1_SMN_C2PMSG_71); + smu_print("%s: dcn35_smu_read_ips_scratch = %x\n", __func__, retv); + return retv; } -- cgit v1.2.3 From 94bbf802efd0a8f13147d6664af6e653637340a8 Mon Sep 17 00:00:00 2001 From: Ilya Bakoulin Date: Fri, 1 Dec 2023 06:25:06 -0700 Subject: drm/amd/display: Fix MST PBN/X.Y value calculations Changing PBN calculation to be more in line with spec. We don't need to inflate PBN_NATIVE value by the 1.006 margin, since that is already taken care of in the get_pbn_per_slot function. Tested-by: Daniel Wheeler Reviewed-by: Wenjing Liu Acked-by: Rodrigo Siqueira Signed-off-by: Ilya Bakoulin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/link/link_dpms.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c index aa0a086aa7fc..b16b2e14312e 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c @@ -1057,18 +1057,21 @@ static struct fixed31_32 get_pbn_from_bw_in_kbps(uint64_t kbps) uint32_t denominator = 1; /* - * margin 5300ppm + 300ppm ~ 0.6% as per spec, factor is 1.006 + * The 1.006 factor (margin 5300ppm + 300ppm ~ 0.6% as per spec) is not + * required when determining PBN/time slot utilization on the link between + * us and the branch, since that overhead is already accounted for in + * the get_pbn_per_slot function. + * * The unit of 54/64Mbytes/sec is an arbitrary unit chosen based on * common multiplier to render an integer PBN for all link rate/lane * counts combinations * calculate - * peak_kbps *= (1006/1000) * peak_kbps *= (64/54) - * peak_kbps *= 8 convert to bytes + * peak_kbps /= (8 * 1000) convert to bytes */ - numerator = 64 * PEAK_FACTOR_X1000; - denominator = 54 * 8 * 1000 * 1000; + numerator = 64; + denominator = 54 * 8 * 1000; kbps *= numerator; peak_kbps = dc_fixpt_from_fraction(kbps, denominator); -- cgit v1.2.3 From 823423b8ec7b56e22dad83e171c9ca6418679169 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Fri, 1 Dec 2023 06:25:07 -0700 Subject: drm/amd/display: Use channel_width = 2 for vram table 3.0 VBIOS has suggested to use channel_width=2 for any ASIC that uses vram info 3.0. This is because channel_width in the vram table no longer represents the memory width Tested-by: Daniel Wheeler Reviewed-by: Samson Tam Acked-by: Rodrigo Siqueira Signed-off-by: Alvin Lee Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index 2fb804eb5702..01abc2f3081a 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -2386,7 +2386,13 @@ static enum bp_result get_vram_info_v30( return BP_RESULT_BADBIOSTABLE; info->num_chans = info_v30->channel_num; - info->dram_channel_width_bytes = (1 << info_v30->channel_width) / 8; + /* As suggested by VBIOS we should always use + * dram_channel_width_bytes = 2 when using VRAM + * table version 3.0. This is because the channel_width + * param in the VRAM info table is changed in 7000 series and + * no longer represents the memory channel width. + */ + info->dram_channel_width_bytes = 2; return result; } -- cgit v1.2.3 From 2cbed167d2698f10a67f47f14aaac7d498f6dfb7 Mon Sep 17 00:00:00 2001 From: Johnson Chen Date: Fri, 1 Dec 2023 06:25:08 -0700 Subject: drm/amd/display: Fix null pointer Add guard for NULL pointer access Tested-by: Daniel Wheeler Reviewed-by: Charlene Liu Acked-by: Rodrigo Siqueira Signed-off-by: Johnson Chen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c index 6ba393e5b8ee..38fa7441df51 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c @@ -158,6 +158,8 @@ bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx) /* If this assert is hit then we have a link encoder dynamic management issue */ ASSERT(pipe_ctx->stream_res.hpo_dp_stream_enc ? pipe_ctx->link_res.hpo_dp_link_enc != NULL : true); + if (pipe_ctx->stream == NULL) + return false; /* Count MST hubs once by treating only 1st remote sink in topology as an encoder */ if (pipe_ctx->stream->link && pipe_ctx->stream->link->remote_sinks[0]) { return (pipe_ctx->stream_res.hpo_dp_stream_enc && -- cgit v1.2.3 From d24e50e1005fd584e0fea138aa153349e13b4d94 Mon Sep 17 00:00:00 2001 From: George Shen Date: Fri, 1 Dec 2023 06:25:09 -0700 Subject: drm/amd/display: Skip DPIA-specific DP LL automation flag for non-DPIA links [Why] The is_automated flag logic only applies to USB4 DPIA links during DP LL compliance test automation. The flag should not be set for non-DPIA cases. [How] Add check for DPIA link endpoint type before setting the flag. Also, rename is_automated to skip_fallback_on_link_loss for clarity. Tested-by: Daniel Wheeler Reviewed-by: Meenakshikumar Somasundaram Reviewed-by: Wenjing Liu Acked-by: Rodrigo Siqueira Signed-off-by: George Shen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 8 +++++++- .../gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c | 6 ++++-- .../gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c | 4 ++-- 3 files changed, 13 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index e8c1599ab18a..6ee4db8c0548 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -1541,7 +1541,13 @@ struct dc_link { bool is_dig_mapping_flexible; bool hpd_status; /* HPD status of link without physical HPD pin. */ bool is_hpd_pending; /* Indicates a new received hpd */ - bool is_automated; /* Indicates automated testing */ + + /* USB4 DPIA links skip verifying link cap, instead performing the fallback method + * for every link training. This is incompatible with DP LL compliance automation, + * which expects the same link settings to be used every retry on a link loss. + * This flag is used to skip the fallback when link loss occurs during automation. + */ + bool skip_fallback_on_link_loss; bool edp_sink_present; diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c index 0c00e94e90b1..ae271c830c9a 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c @@ -265,7 +265,7 @@ void dp_handle_link_loss(struct dc_link *link) for (i = count - 1; i >= 0; i--) { // Always use max settings here for DP 1.4a LL Compliance CTS - if (link->is_automated) { + if (link->skip_fallback_on_link_loss) { pipes[i]->link_config.dp_link_settings.lane_count = link->verified_link_cap.lane_count; pipes[i]->link_config.dp_link_settings.link_rate = @@ -404,7 +404,9 @@ bool dp_handle_hpd_rx_irq(struct dc_link *link, if (hpd_irq_dpcd_data.bytes.device_service_irq.bits.AUTOMATED_TEST) { // Workaround for DP 1.4a LL Compliance CTS as USB4 has to share encoders unlike DP and USBC - link->is_automated = true; + if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) + link->skip_fallback_on_link_loss = true; + device_service_clear.bits.AUTOMATED_TEST = 1; core_link_write_dpcd( link, diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c index 4f4e899e5c46..e8dda44b23cb 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c @@ -811,7 +811,7 @@ static enum link_training_result dpia_training_eq_transparent( /* Take into consideration corner case for DP 1.4a LL Compliance CTS as USB4 * has to share encoders unlike DP and USBC */ - if (dp_is_interlane_aligned(dpcd_lane_status_updated) || (link->is_automated && retries_eq)) { + if (dp_is_interlane_aligned(dpcd_lane_status_updated) || (link->skip_fallback_on_link_loss && retries_eq)) { result = LINK_TRAINING_SUCCESS; break; } @@ -1037,7 +1037,7 @@ enum link_training_result dpia_perform_link_training( */ if (result == LINK_TRAINING_SUCCESS) { fsleep(5000); - if (!link->is_automated) + if (!link->skip_fallback_on_link_loss) result = dp_check_link_loss_status(link, <_settings); } else if (result == LINK_TRAINING_ABORT) dpia_training_abort(link, <_settings, repeater_id); -- cgit v1.2.3 From d218291579de53fad8242ad1ae732604de25b635 Mon Sep 17 00:00:00 2001 From: Chris Park Date: Fri, 1 Dec 2023 06:25:10 -0700 Subject: drm/amd/display: Update BIOS FW info table revision [Why] BIOS FW info version 3.5 is introduced to support new ASICs, but it's content is currently same as 3.4. [How] Include minor version 5 in parsing to enable support. Tested-by: Daniel Wheeler Reviewed-by: Dillon Varone Acked-by: Rodrigo Siqueira Signed-off-by: Chris Park Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index 01abc2f3081a..971473c69b32 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -1747,6 +1747,7 @@ static enum bp_result bios_parser_get_firmware_info( result = get_firmware_info_v3_2(bp, info); break; case 4: + case 5: result = get_firmware_info_v3_4(bp, info); break; default: -- cgit v1.2.3 From c59397eff9439bbc8b9a9835142e99ea0abf9cde Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Fri, 1 Dec 2023 06:25:11 -0700 Subject: drm/amd/display: revert removing otg toggle w/a back when no active display w/a use case: - dual display, compliance, toggling between the displays - switching between 120Hz 420 -> 144Hz 444 and vice versa - switching between 144Hz -> 60Hz TMDS or vice versa It'd typically involve TMDS in some capacity since that's the only link signal we leave the OTG running but DIO/PHY off you can hit this in cases where you have multiple displays as well it syncs with the first active OTG, so if you had OTG[0] mapped and FIFO off you'd hit it even if OTG[1] was mapped and had FIFO Tested-by: Daniel Wheeler Reviewed-by: Nicholas Kazlauskas Acked-by: Rodrigo Siqueira Signed-off-by: Charlene Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 10 ++++------ drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c | 6 +++--- 2 files changed, 7 insertions(+), 9 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 9e0fa01ecb79..439414ef464a 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -312,13 +312,11 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base, } if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) { + dcn35_disable_otg_wa(clk_mgr_base, context, safe_to_lower, true); + clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz; - if (all_active_disps != 0) { - dcn35_disable_otg_wa(clk_mgr_base, context, safe_to_lower, true); - dcn35_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz); - dcn35_disable_otg_wa(clk_mgr_base, context, safe_to_lower, false); - } else - dcn35_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz); + dcn35_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz); + dcn35_disable_otg_wa(clk_mgr_base, context, safe_to_lower, false); update_dispclk = true; } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c index 786b62ab8d0f..d6db9d7fced2 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c @@ -475,14 +475,14 @@ int dcn35_smu_get_ips_supported(struct clk_mgr_internal *clk_mgr) VBIOSSMC_MSG_QueryIPS2Support, 0); - smu_print("%s: VBIOSSMC_MSG_QueryIPS2Support return = %x\n", __func__, retv); + //smu_print("%s: VBIOSSMC_MSG_QueryIPS2Support return = %x\n", __func__, retv); return retv; } void dcn35_smu_write_ips_scratch(struct clk_mgr_internal *clk_mgr, uint32_t param) { REG_WRITE(MP1_SMN_C2PMSG_71, param); - smu_print("%s: write_ips_scratch = %x\n", __func__, param); + //smu_print("%s: write_ips_scratch = %x\n", __func__, param); } uint32_t dcn35_smu_read_ips_scratch(struct clk_mgr_internal *clk_mgr) @@ -490,6 +490,6 @@ uint32_t dcn35_smu_read_ips_scratch(struct clk_mgr_internal *clk_mgr) uint32_t retv; retv = REG_READ(MP1_SMN_C2PMSG_71); - smu_print("%s: dcn35_smu_read_ips_scratch = %x\n", __func__, retv); + //smu_print("%s: dcn35_smu_read_ips_scratch = %x\n", __func__, retv); return retv; } -- cgit v1.2.3 From e6ae4c47e8f8941fde115434fd8884e4e972cf6b Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Fri, 1 Dec 2023 06:25:12 -0700 Subject: drm/amd/display: Pass debug watermarks through to DCN35 DML2 [Why] The DC debug options currently do not function for dynamically adjusting our watermarks. [How] Hook them up before passing them to DML2. Also make sure we're using dc->bb_overrides since dc->debug isn't populated during dc_construct. Tested-by: Daniel Wheeler Reviewed-by: Michael Strauss Reviewed-by: Charlene Liu Acked-by: Rodrigo Siqueira Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c index 39cf1ae3a3e1..e9d88f52717b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c @@ -326,6 +326,25 @@ void dcn35_update_bw_bounding_box_fpu(struct dc *dc, dcn3_5_soc.dram_clock_change_latency_us = dc->debug.dram_clock_change_latency_ns / 1000.0; } + + if (dc->bb_overrides.dram_clock_change_latency_ns > 0) + dcn3_5_soc.dram_clock_change_latency_us = + dc->bb_overrides.dram_clock_change_latency_ns / 1000.0; + + if (dc->bb_overrides.sr_exit_time_ns > 0) + dcn3_5_soc.sr_exit_time_us = dc->bb_overrides.sr_exit_time_ns / 1000.0; + + if (dc->bb_overrides.sr_enter_plus_exit_time_ns > 0) + dcn3_5_soc.sr_enter_plus_exit_time_us = + dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0; + + if (dc->bb_overrides.sr_exit_z8_time_ns > 0) + dcn3_5_soc.sr_exit_z8_time_us = dc->bb_overrides.sr_exit_z8_time_ns / 1000.0; + + if (dc->bb_overrides.sr_enter_plus_exit_z8_time_ns > 0) + dcn3_5_soc.sr_enter_plus_exit_z8_time_us = + dc->bb_overrides.sr_enter_plus_exit_z8_time_ns / 1000.0; + /*temp till dml2 fully work without dml1*/ dml_init_instance(&dc->dml, &dcn3_5_soc, &dcn3_5_ip, DML_PROJECT_DCN31); -- cgit v1.2.3 From 43484c4bdb6eb2f74cec61e4e7cfcb6ce8e69e2f Mon Sep 17 00:00:00 2001 From: Relja Vojvodic Date: Fri, 1 Dec 2023 06:25:13 -0700 Subject: drm/amd/display: Added delay to DPM log HW registers were being read to quickly, causing incorrect values to be logged after a clock frequency was changed Tested-by: Daniel Wheeler Reviewed-by: Martin Leung Acked-by: Rodrigo Siqueira Signed-off-by: Relja Vojvodic Signed-off-by: Alex Deucher --- .../amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c | 26 +++++++++++++--------- 1 file changed, 16 insertions(+), 10 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c index a496930b1f9c..95c0b49b531a 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c @@ -460,18 +460,24 @@ static int dcn32_get_dispclk_from_dentist(struct clk_mgr *clk_mgr_base) static void dcn32_auto_dpm_test_log(struct dc_clocks *new_clocks, struct clk_mgr_internal *clk_mgr) { - unsigned int dispclk_khz_reg = REG_READ(CLK1_CLK0_CURRENT_CNT); // DISPCLK - unsigned int dppclk_khz_reg = REG_READ(CLK1_CLK1_CURRENT_CNT); // DPPCLK - unsigned int dprefclk_khz_reg = REG_READ(CLK1_CLK2_CURRENT_CNT); // DPREFCLK - unsigned int dcfclk_khz_reg = REG_READ(CLK1_CLK3_CURRENT_CNT); // DCFCLK - unsigned int dtbclk_khz_reg = REG_READ(CLK1_CLK4_CURRENT_CNT); // DTBCLK - unsigned int fclk_khz_reg = REG_READ(CLK4_CLK0_CURRENT_CNT); // FCLK + unsigned int dispclk_khz_reg, dppclk_khz_reg, dprefclk_khz_reg, dcfclk_khz_reg, dtbclk_khz_reg, + fclk_khz_reg; + int dramclk_khz_override, fclk_khz_override, num_fclk_levels; + + msleep(5); + + dispclk_khz_reg = REG_READ(CLK1_CLK0_CURRENT_CNT); // DISPCLK + dppclk_khz_reg = REG_READ(CLK1_CLK1_CURRENT_CNT); // DPPCLK + dprefclk_khz_reg = REG_READ(CLK1_CLK2_CURRENT_CNT); // DPREFCLK + dcfclk_khz_reg = REG_READ(CLK1_CLK3_CURRENT_CNT); // DCFCLK + dtbclk_khz_reg = REG_READ(CLK1_CLK4_CURRENT_CNT); // DTBCLK + fclk_khz_reg = REG_READ(CLK4_CLK0_CURRENT_CNT); // FCLK // Overrides for these clocks in case there is no p_state change support - int dramclk_khz_override = new_clocks->dramclk_khz; - int fclk_khz_override = new_clocks->fclk_khz; + dramclk_khz_override = new_clocks->dramclk_khz; + fclk_khz_override = new_clocks->fclk_khz; - int num_fclk_levels = clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_fclk_levels - 1; + num_fclk_levels = clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_fclk_levels - 1; if (!new_clocks->p_state_change_support) { dramclk_khz_override = clk_mgr->base.bw_params->max_memclk_mhz * 1000; @@ -707,7 +713,7 @@ static void dcn32_update_clocks(struct clk_mgr *clk_mgr_base, dmcu->funcs->set_psr_wait_loop(dmcu, clk_mgr_base->clks.dispclk_khz / 1000 / 7); - if (dc->config.enable_auto_dpm_test_logs) { + if (dc->config.enable_auto_dpm_test_logs && safe_to_lower) { dcn32_auto_dpm_test_log(new_clocks, clk_mgr); } } -- cgit v1.2.3 From d3586c707b8f64cbe5b778cfe59ac4b8a4be0d3b Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Fri, 1 Dec 2023 06:25:14 -0700 Subject: drm/amd/display: keep domain24 power on if eDP not exist This is w/a: we need to keep domain 24 power up in driver side, and let dmubfw handle it for S0i3. For last display unplugged, if OTG in PG, no interrupt call back coming. Tested-by: Daniel Wheeler Reviewed-by: Chris Park Acked-by: Rodrigo Siqueira Signed-off-by: Charlene Liu Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index 39260371beb9..09498aa92096 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -979,6 +979,8 @@ void dcn35_calc_blocks_to_gate(struct dc *dc, struct dc_state *context, bool hpo_frl_stream_enc_acquired = false; bool hpo_dp_stream_enc_acquired = false; int i = 0, j = 0; + int edp_num = 0; + struct dc_link *edp_links[MAX_NUM_EDP] = { NULL }; memset(update_state, 0, sizeof(struct pg_block_update)); @@ -1019,10 +1021,24 @@ void dcn35_calc_blocks_to_gate(struct dc *dc, struct dc_state *context, if (pipe_ctx->stream_res.opp) update_state->pg_pipe_res_update[PG_OPP][pipe_ctx->stream_res.opp->inst] = false; + } + /*domain24 controls all the otg, mpc, opp, as long as one otg is still up, avoid enabling OTG PG*/ + for (i = 0; i < dc->res_pool->timing_generator_count; i++) { + struct timing_generator *tg = dc->res_pool->timing_generators[i]; + if (tg && tg->funcs->is_tg_enabled(tg)) { + update_state->pg_pipe_res_update[PG_OPTC][i] = false; + break; + } + } - if (pipe_ctx->stream_res.tg) - update_state->pg_pipe_res_update[PG_OPTC][pipe_ctx->stream_res.tg->inst] = false; + dc_get_edp_links(dc, edp_links, &edp_num); + if (edp_num == 0 || + ((!edp_links[0] || !edp_links[0]->edp_sink_present) && + (!edp_links[1] || !edp_links[1]->edp_sink_present))) { + /*eDP not exist on this config, keep Domain24 power on, for S0i3, this will be handled in dmubfw*/ + update_state->pg_pipe_res_update[PG_OPTC][0] = false; } + } void dcn35_calc_blocks_to_ungate(struct dc *dc, struct dc_state *context, @@ -1156,8 +1172,10 @@ void dcn35_block_power_control(struct dc *dc, pg_cntl->funcs->dwb_pg_control(pg_cntl, power_on); } + /*this will need all the clients to unregister optc interruts let dmubfw handle this*/ if (pg_cntl->funcs->plane_otg_pg_control) pg_cntl->funcs->plane_otg_pg_control(pg_cntl, power_on); + } void dcn35_root_clock_control(struct dc *dc, -- cgit v1.2.3 From 2ce156482a6fef349d2eba98e5070c412d3af662 Mon Sep 17 00:00:00 2001 From: Nicholas Susanto Date: Fri, 1 Dec 2023 06:25:15 -0700 Subject: drm/amd/display: Fix disable_otg_wa logic [Why] When switching to another HDMI mode, we are unnecesarilly disabling/enabling FIFO causing both HPO and DIG registers to be set at the same time when only HPO is supposed to be set. This can lead to a system hang the next time we change refresh rates as there are cases when we don't disable OTG/FIFO but FIFO is enabled when it isn't supposed to be. [How] Removing the enable/disable FIFO entirely. Tested-by: Daniel Wheeler Reviewed-by: Nicholas Kazlauskas Acked-by: Rodrigo Siqueira Signed-off-by: Nicholas Susanto Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 439414ef464a..8d4c0b209872 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -127,21 +127,13 @@ static void dcn35_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state * continue; if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal) || !pipe->stream->link_enc)) { - struct stream_encoder *stream_enc = pipe->stream_res.stream_enc; - if (disable) { - if (stream_enc && stream_enc->funcs->disable_fifo) - pipe->stream_res.stream_enc->funcs->disable_fifo(stream_enc); - if (pipe->stream_res.tg && pipe->stream_res.tg->funcs->immediate_disable_crtc) pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg); reset_sync_context_for_pipe(dc, context, i); } else { pipe->stream_res.tg->funcs->enable_crtc(pipe->stream_res.tg); - - if (stream_enc && stream_enc->funcs->enable_fifo) - pipe->stream_res.stream_enc->funcs->enable_fifo(stream_enc); } } } -- cgit v1.2.3 From 21afc872fbc29cd68cfde816d1df4d55848c3f61 Mon Sep 17 00:00:00 2001 From: Ivan Lipski Date: Fri, 1 Dec 2023 06:25:16 -0700 Subject: drm/amd/display: Add monitor patch for specific eDP [WHY] Some eDP panels's ext caps don't write initial value cause the value of dpcd_addr(0x317) is random. It means that sometimes the eDP will clarify it is OLED, miniLED...etc cause the backlight control interface is incorrect. [HOW] Add a new panel patch to remove sink ext caps(HDR,OLED...etc) Tested-by: Daniel Wheeler Reviewed-by: Sun peng Li Acked-by: Rodrigo Siqueira Signed-off-by: Ivan Lipski Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index b4696ec621c4..eaf8d9f48244 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -64,6 +64,12 @@ static void apply_edid_quirks(struct edid *edid, struct dc_edid_caps *edid_caps) DRM_DEBUG_DRIVER("Disabling FAMS on monitor with panel id %X\n", panel_id); edid_caps->panel_patch.disable_fams = true; break; + /* Workaround for some monitors that do not clear DPCD 0x317 if FreeSync is unsupported */ + case drm_edid_encode_panel_id('A', 'U', 'O', 0xA7AB): + case drm_edid_encode_panel_id('A', 'U', 'O', 0xE69B): + DRM_DEBUG_DRIVER("Clearing DPCD 0x317 on monitor with panel id %X\n", panel_id); + edid_caps->panel_patch.remove_sink_ext_caps = true; + break; default: return; } -- cgit v1.2.3 From cfa96a14e89d8341a7308acc4c6168991d4fdac0 Mon Sep 17 00:00:00 2001 From: Yihan Zhu Date: Fri, 1 Dec 2023 06:25:17 -0700 Subject: drm/amd/display: add MPC MCM 1D LUT clock gating programming Missing clock gating programming blocks memory power on from boot up. Tested-by: Daniel Wheeler Reviewed-by: Chris Park Acked-by: Rodrigo Siqueira Signed-off-by: Yihan Zhu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c | 3 ++- drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c index 994b21ed272f..e789e654c387 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c @@ -71,12 +71,13 @@ void mpc32_power_on_blnd_lut( { struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc); + REG_SET(MPCC_MCM_MEM_PWR_CTRL[mpcc_id], 0, MPCC_MCM_1DLUT_MEM_PWR_DIS, power_on); + if (mpc->ctx->dc->debug.enable_mem_low_power.bits.cm) { if (power_on) { REG_UPDATE(MPCC_MCM_MEM_PWR_CTRL[mpcc_id], MPCC_MCM_1DLUT_MEM_PWR_FORCE, 0); REG_WAIT(MPCC_MCM_MEM_PWR_CTRL[mpcc_id], MPCC_MCM_1DLUT_MEM_PWR_STATE, 0, 1, 5); } else if (!mpc->ctx->dc->debug.disable_mem_low_power) { - ASSERT(false); /* TODO: change to mpc * dpp_base->ctx->dc->optimized_required = true; * dpp_base->deferred_reg_writes.bits.disable_blnd_lut = true; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c index 1a0ed1c7e2d4..13324422ff50 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c @@ -736,7 +736,7 @@ static const struct dc_debug_options debug_defaults_drv = { .i2c = true, .dmcu = false, // This is previously known to cause hang on S3 cycles if enabled .dscl = true, - .cm = false, + .cm = true, .mpc = true, .optc = true, .vpg = true, -- cgit v1.2.3 From abd26a3252cbd1a3ae4e46d37596d176fe50b41a Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Fri, 1 Dec 2023 06:25:18 -0700 Subject: drm/amd/display: Add dml2 copy functions Add function to handle deep copying dml2 context. Tested-by: Daniel Wheeler Reviewed-by: Chaitanya Dhere Acked-by: Rodrigo Siqueira Signed-off-by: Dillon Varone Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 15 +++-------- drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c | 29 +++++++++++++++++++++- drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h | 4 +++ 3 files changed, 36 insertions(+), 12 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index eab713c0da0d..102d00a9a24f 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -2258,23 +2258,16 @@ struct dc_state *dc_copy_state(struct dc_state *src_ctx) { int i, j; struct dc_state *new_ctx = kvmalloc(sizeof(struct dc_state), GFP_KERNEL); -#ifdef CONFIG_DRM_AMD_DC_FP - struct dml2_context *dml2 = NULL; -#endif if (!new_ctx) return NULL; memcpy(new_ctx, src_ctx, sizeof(struct dc_state)); #ifdef CONFIG_DRM_AMD_DC_FP - if (new_ctx->bw_ctx.dml2) { - dml2 = kzalloc(sizeof(struct dml2_context), GFP_KERNEL); - if (!dml2) - return NULL; - - memcpy(dml2, src_ctx->bw_ctx.dml2, sizeof(struct dml2_context)); - new_ctx->bw_ctx.dml2 = dml2; - } + if (new_ctx->bw_ctx.dml2 && !dml2_create_copy(&new_ctx->bw_ctx.dml2, src_ctx->bw_ctx.dml2)) { + dc_release_state(new_ctx); + return NULL; + } #endif for (i = 0; i < MAX_PIPES; i++) { diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c index 8f231418870f..9d354fde6908 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c @@ -691,10 +691,15 @@ bool dml2_validate(const struct dc *in_dc, struct dc_state *context, bool fast_v return out; } +static inline struct dml2_context *dml2_allocate_memory(void) +{ + return (struct dml2_context *) kzalloc(sizeof(struct dml2_context), GFP_KERNEL); +} + bool dml2_create(const struct dc *in_dc, const struct dml2_configuration_options *config, struct dml2_context **dml2) { // Allocate Mode Lib Ctx - *dml2 = (struct dml2_context *) kzalloc(sizeof(struct dml2_context), GFP_KERNEL); + *dml2 = dml2_allocate_memory(); if (!(*dml2)) return false; @@ -745,3 +750,25 @@ void dml2_extract_dram_and_fclk_change_support(struct dml2_context *dml2, *fclk_change_support = (unsigned int) dml2->v20.dml_core_ctx.ms.support.FCLKChangeSupport[0]; *dram_clk_change_support = (unsigned int) dml2->v20.dml_core_ctx.ms.support.DRAMClockChangeSupport[0]; } + +void dml2_copy(struct dml2_context *dst_dml2, + struct dml2_context *src_dml2) +{ + /* copy Mode Lib Ctx */ + memcpy(dst_dml2, src_dml2, sizeof(struct dml2_context)); +} + +bool dml2_create_copy(struct dml2_context **dst_dml2, + struct dml2_context *src_dml2) +{ + /* Allocate Mode Lib Ctx */ + *dst_dml2 = dml2_allocate_memory(); + + if (!(*dst_dml2)) + return false; + + /* copy Mode Lib Ctx */ + dml2_copy(*dst_dml2, src_dml2); + + return true; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h index fe15baa4bf09..0de6886969c6 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h @@ -191,6 +191,10 @@ bool dml2_create(const struct dc *in_dc, struct dml2_context **dml2); void dml2_destroy(struct dml2_context *dml2); +void dml2_copy(struct dml2_context *dst_dml2, + struct dml2_context *src_dml2); +bool dml2_create_copy(struct dml2_context **dst_dml2, + struct dml2_context *src_dml2); /* * dml2_validate - Determines if a display configuration is supported or not. -- cgit v1.2.3 From dd5c6362ddcd8bdb07704faff8648593885ecfa1 Mon Sep 17 00:00:00 2001 From: Dennis Chan Date: Fri, 1 Dec 2023 06:25:19 -0700 Subject: drm/amd/display: Fix Replay Desync Error IRQ handler In previous case, Replay didn't identify the IRQ type, This commit fixes the issues for the interrupt. Tested-by: Daniel Wheeler Reviewed-by: Robin Chen Acked-by: Rodrigo Siqueira Signed-off-by: Dennis Chan Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/link/protocols/link_dp_irq_handler.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c index ae271c830c9a..ba69874be5a4 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c @@ -190,9 +190,6 @@ static void handle_hpd_irq_replay_sink(struct dc_link *link) /*AMD Replay version reuse DP_PSR_ERROR_STATUS for REPLAY_ERROR status.*/ union psr_error_status replay_error_status; - if (link->replay_settings.config.force_disable_desync_error_check) - return; - if (!link->replay_settings.replay_feature_enabled) return; @@ -210,9 +207,6 @@ static void handle_hpd_irq_replay_sink(struct dc_link *link) &replay_error_status.raw, sizeof(replay_error_status.raw)); - if (replay_configuration.bits.DESYNC_ERROR_STATUS) - link->replay_settings.config.received_desync_error_hpd = 1; - link->replay_settings.config.replay_error_status.bits.LINK_CRC_ERROR = replay_error_status.bits.LINK_CRC_ERROR; link->replay_settings.config.replay_error_status.bits.DESYNC_ERROR = @@ -225,6 +219,12 @@ static void handle_hpd_irq_replay_sink(struct dc_link *link) link->replay_settings.config.replay_error_status.bits.STATE_TRANSITION_ERROR) { bool allow_active; + if (link->replay_settings.config.replay_error_status.bits.DESYNC_ERROR) + link->replay_settings.config.received_desync_error_hpd = 1; + + if (link->replay_settings.config.force_disable_desync_error_check) + return; + /* Acknowledge and clear configuration bits */ dm_helpers_dp_write_dpcd( link->ctx, -- cgit v1.2.3 From 08a32addf17317b9fac55be9b31275cbf6e41fb7 Mon Sep 17 00:00:00 2001 From: Wenjing Liu Date: Fri, 1 Dec 2023 06:25:20 -0700 Subject: drm/amd/display: add support for DTO genarated dscclk Current implementation will choose to use refclk as dscclk. This is not recommended by hardware team as refclk is a fixed value which could cause unnecessary power consumption or it could be not enough for large DSC timings. So we are adding new interfaces so we could switch to use dynamically generated DSCCLK by DTO. So DSCCLK is programmable based on current pixel clock and dispclk. Tested-by: Daniel Wheeler Reviewed-by: Chaitanya Dhere Acked-by: Rodrigo Siqueira Signed-off-by: Wenjing Liu Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c | 25 ++++++++++++++++++++ drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h | 4 ++++ drivers/gpu/drm/amd/display/dc/link/link_dpms.c | 27 +++++++++++++++++++++- 3 files changed, 55 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c index 5f7f474ef51c..5c323718ec90 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c @@ -989,9 +989,22 @@ static int calc_mpc_flow_ctrl_cnt(const struct dc_stream_state *stream, static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) { struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc; + struct dc *dc = pipe_ctx->stream->ctx->dc; struct dc_stream_state *stream = pipe_ctx->stream; struct pipe_ctx *odm_pipe; int opp_cnt = 1; + struct dccg *dccg = dc->res_pool->dccg; + /* It has been found that when DSCCLK is lower than 16Mhz, we will get DCN + * register access hung. When DSCCLk is based on refclk, DSCCLk is always a + * fixed value higher than 16Mhz so the issue doesn't occur. When DSCCLK is + * generated by DTO, DSCCLK would be based on 1/3 dispclk. For small timings + * with DSC such as 480p60Hz, the dispclk could be low enough to trigger + * this problem. We are implementing a workaround here to keep using dscclk + * based on fixed value refclk when timing is smaller than 3x16Mhz (i.e + * 48Mhz) pixel clock to avoid hitting this problem. + */ + bool should_use_dto_dscclk = (dccg->funcs->set_dto_dscclk != NULL) && + stream->timing.pix_clk_100hz > 480000; ASSERT(dsc); for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) @@ -1014,12 +1027,16 @@ static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) dsc->funcs->dsc_set_config(dsc, &dsc_cfg, &dsc_optc_cfg); dsc->funcs->dsc_enable(dsc, pipe_ctx->stream_res.opp->inst); + if (should_use_dto_dscclk) + dccg->funcs->set_dto_dscclk(dccg, dsc->inst); for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) { struct display_stream_compressor *odm_dsc = odm_pipe->stream_res.dsc; ASSERT(odm_dsc); odm_dsc->funcs->dsc_set_config(odm_dsc, &dsc_cfg, &dsc_optc_cfg); odm_dsc->funcs->dsc_enable(odm_dsc, odm_pipe->stream_res.opp->inst); + if (should_use_dto_dscclk) + dccg->funcs->set_dto_dscclk(dccg, odm_dsc->inst); } dsc_cfg.dc_dsc_cfg.num_slices_h *= opp_cnt; dsc_cfg.pic_width *= opp_cnt; @@ -1039,9 +1056,13 @@ static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) OPTC_DSC_DISABLED, 0, 0); /* disable DSC block */ + if (dccg->funcs->set_ref_dscclk) + dccg->funcs->set_ref_dscclk(dccg, pipe_ctx->stream_res.dsc->inst); dsc->funcs->dsc_disable(pipe_ctx->stream_res.dsc); for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) { ASSERT(odm_pipe->stream_res.dsc); + if (dccg->funcs->set_ref_dscclk) + dccg->funcs->set_ref_dscclk(dccg, odm_pipe->stream_res.dsc->inst); odm_pipe->stream_res.dsc->funcs->dsc_disable(odm_pipe->stream_res.dsc); } } @@ -1124,6 +1145,10 @@ void dcn32_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx * if (!pipe_ctx->next_odm_pipe && current_pipe_ctx->next_odm_pipe && current_pipe_ctx->next_odm_pipe->stream_res.dsc) { struct display_stream_compressor *dsc = current_pipe_ctx->next_odm_pipe->stream_res.dsc; + struct dccg *dccg = dc->res_pool->dccg; + + if (dccg->funcs->set_ref_dscclk) + dccg->funcs->set_ref_dscclk(dccg, dsc->inst); /* disconnect DSC block from stream */ dsc->funcs->dsc_disconnect(dsc); } diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h index ce2f0c0e82bd..6b44557fcb1a 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h @@ -201,6 +201,10 @@ struct dccg_funcs { struct dccg *dccg, enum streamclk_source src, uint32_t otg_inst); + void (*set_dto_dscclk)( + struct dccg *dccg, + uint32_t dsc_inst); + void (*set_ref_dscclk)(struct dccg *dccg, uint32_t dsc_inst); }; #endif //__DAL_DCCG_H__ diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c index b16b2e14312e..5fe8b4871c77 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c @@ -776,10 +776,26 @@ static bool dp_set_dsc_on_rx(struct pipe_ctx *pipe_ctx, bool enable) */ void link_set_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) { + /* TODO: Move this to HWSS as this is hardware programming sequence not a + * link layer sequence + */ struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc; + struct dc *dc = pipe_ctx->stream->ctx->dc; struct dc_stream_state *stream = pipe_ctx->stream; struct pipe_ctx *odm_pipe; int opp_cnt = 1; + struct dccg *dccg = dc->res_pool->dccg; + /* It has been found that when DSCCLK is lower than 16Mhz, we will get DCN + * register access hung. When DSCCLk is based on refclk, DSCCLk is always a + * fixed value higher than 16Mhz so the issue doesn't occur. When DSCCLK is + * generated by DTO, DSCCLK would be based on 1/3 dispclk. For small timings + * with DSC such as 480p60Hz, the dispclk could be low enough to trigger + * this problem. We are implementing a workaround here to keep using dscclk + * based on fixed value refclk when timing is smaller than 3x16Mhz (i.e + * 48Mhz) pixel clock to avoid hitting this problem. + */ + bool should_use_dto_dscclk = (dccg->funcs->set_dto_dscclk != NULL) && + stream->timing.pix_clk_100hz > 480000; DC_LOGGER_INIT(dsc->ctx->logger); for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) @@ -802,11 +818,15 @@ void link_set_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) dsc->funcs->dsc_set_config(dsc, &dsc_cfg, &dsc_optc_cfg); dsc->funcs->dsc_enable(dsc, pipe_ctx->stream_res.opp->inst); + if (should_use_dto_dscclk) + dccg->funcs->set_dto_dscclk(dccg, dsc->inst); for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) { struct display_stream_compressor *odm_dsc = odm_pipe->stream_res.dsc; odm_dsc->funcs->dsc_set_config(odm_dsc, &dsc_cfg, &dsc_optc_cfg); odm_dsc->funcs->dsc_enable(odm_dsc, odm_pipe->stream_res.opp->inst); + if (should_use_dto_dscclk) + dccg->funcs->set_dto_dscclk(dccg, odm_dsc->inst); } dsc_cfg.dc_dsc_cfg.num_slices_h *= opp_cnt; dsc_cfg.pic_width *= opp_cnt; @@ -856,9 +876,14 @@ void link_set_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) } /* disable DSC block */ + if (dccg->funcs->set_ref_dscclk) + dccg->funcs->set_ref_dscclk(dccg, pipe_ctx->stream_res.dsc->inst); pipe_ctx->stream_res.dsc->funcs->dsc_disable(pipe_ctx->stream_res.dsc); - for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) + for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) { + if (dccg->funcs->set_ref_dscclk) + dccg->funcs->set_ref_dscclk(dccg, odm_pipe->stream_res.dsc->inst); odm_pipe->stream_res.dsc->funcs->dsc_disable(odm_pipe->stream_res.dsc); + } } } -- cgit v1.2.3 From b6411638c026fde33046f5515a5a7d37af1da146 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Fri, 1 Dec 2023 06:25:21 -0700 Subject: drm/amd/display: Avoid virtual stream encoder if not explicitly requested Virtual stream encoder should not be a free match for thunderbolt or usbc, and thus should be avoided. Tested-by: Daniel Wheeler Reviewed-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Alex Hung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c index af1b31f4e69a..d08d10969251 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c @@ -1250,7 +1250,10 @@ struct stream_encoder *dcn10_find_first_free_match_stream_enc_for_link( /* Store first available for MST second display * in daisy chain use case */ - j = i; + + if (pool->stream_enc[i]->id != ENGINE_ID_VIRTUAL) + j = i; + if (link->ep_type == DISPLAY_ENDPOINT_PHY && pool->stream_enc[i]->id == link->link_enc->preferred_engine) return pool->stream_enc[i]; -- cgit v1.2.3 From 80af8859b46d1fa386871f71bad95db9ff50ad62 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Fri, 1 Dec 2023 06:25:22 -0700 Subject: drm/amd/display: Skip entire amdgpu_dm build if !CONFIG_DRM_AMD_DC [WHY] Previously this only excluded build for a few amdgpu_dm binaries which makes no sense. [HOW] Wrap the entire Makefile in "ifneq ($(CONFIG_DRM_AMD_DC),)" Tested-by: Daniel Wheeler Reviewed-by: Alex Hung Signed-off-by: Harry Wentland Signed-off-by: Alex Hung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/Makefile | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile index 8bf94920d23e..063205ecb137 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile @@ -25,22 +25,24 @@ +ifneq ($(CONFIG_DRM_AMD_DC),) AMDGPUDM = \ amdgpu_dm.o \ amdgpu_dm_plane.o \ amdgpu_dm_crtc.o \ amdgpu_dm_irq.o \ amdgpu_dm_mst_types.o \ - amdgpu_dm_color.o + amdgpu_dm_color.o \ + amdgpu_dm_services.o \ + amdgpu_dm_helpers.o \ + amdgpu_dm_pp_smu.o \ + amdgpu_dm_psr.o \ + amdgpu_dm_replay.o ifdef CONFIG_DRM_AMD_DC_FP AMDGPUDM += dc_fpu.o endif -ifneq ($(CONFIG_DRM_AMD_DC),) -AMDGPUDM += amdgpu_dm_services.o amdgpu_dm_helpers.o amdgpu_dm_pp_smu.o amdgpu_dm_psr.o amdgpu_dm_replay.o -endif - AMDGPUDM += amdgpu_dm_hdcp.o ifneq ($(CONFIG_DEBUG_FS),) @@ -52,3 +54,4 @@ subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc AMDGPU_DM = $(addprefix $(AMDDALPATH)/amdgpu_dm/,$(AMDGPUDM)) AMD_DISPLAY_FILES += $(AMDGPU_DM) +endif -- cgit v1.2.3 From 198891fd2902fba155fe23f8ad27c9cf8cd8286d Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Fri, 1 Dec 2023 06:25:23 -0700 Subject: drm/amd/display: Create one virtual connector in DC [WHAT] Prepare a virtual connector for writeback. Tested-by: Daniel Wheeler Reviewed-by: Alex Hung Signed-off-by: Harry Wentland Signed-off-by: Alex Hung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 11 +++++++++-- drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c | 3 ++- 2 files changed, 11 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index bc7b8a325353..9b791e3f7e33 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1675,6 +1675,10 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) init_data.nbio_reg_offsets = adev->reg_offset[NBIO_HWIP][0]; init_data.clk_reg_offsets = adev->reg_offset[CLK_HWIP][0]; + /* Enable DWB for tested platforms only */ + if (adev->ip_versions[DCE_HWIP][0] >= IP_VERSION(3, 0, 0)) + init_data.num_virtual_links = 1; + INIT_LIST_HEAD(&adev->dm.da_list); retrieve_dmi_info(&adev->dm); @@ -4465,6 +4469,11 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) continue; } + link = dc_get_link_at_index(dm->dc, i); + + if (link->connector_signal == SIGNAL_TYPE_VIRTUAL) + continue; + aconnector = kzalloc(sizeof(*aconnector), GFP_KERNEL); if (!aconnector) goto fail; @@ -4483,8 +4492,6 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) goto fail; } - link = dc_get_link_at_index(dm->dc, i); - if (!dc_link_detect_connection_type(link, &new_connection_type)) DRM_ERROR("KMS: Failed to detect connector\n"); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c index 97798cee876e..5d62805f3bdf 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c @@ -96,7 +96,8 @@ static void enable_memory_low_power(struct dc *dc) if (dc->debug.enable_mem_low_power.bits.vpg && dc->res_pool->stream_enc[0]->vpg->funcs->vpg_powerdown) { // Power down VPGs for (i = 0; i < dc->res_pool->stream_enc_count; i++) - dc->res_pool->stream_enc[i]->vpg->funcs->vpg_powerdown(dc->res_pool->stream_enc[i]->vpg); + if (dc->res_pool->stream_enc[i]->vpg) + dc->res_pool->stream_enc[i]->vpg->funcs->vpg_powerdown(dc->res_pool->stream_enc[i]->vpg); #if defined(CONFIG_DRM_AMD_DC_FP) for (i = 0; i < dc->res_pool->hpo_dp_stream_enc_count; i++) dc->res_pool->hpo_dp_stream_enc[i]->vpg->funcs->vpg_powerdown(dc->res_pool->hpo_dp_stream_enc[i]->vpg); -- cgit v1.2.3 From dfc03588cf8ce2af8ef810cd226dc98ee4fbac38 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Fri, 1 Dec 2023 06:25:24 -0700 Subject: drm/amd/display: Initialize writeback connector [WHAT] Create a drm_writeback_connector when connector signal equals SIGNAL_TYPE_VIRTUAL. Tested-by: Daniel Wheeler Reviewed-by: Harry Wentland Signed-off-by: Alex Hung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/Makefile | 3 +- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 20 +- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c | 209 +++++++++++++++++++++ .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.h | 35 ++++ 4 files changed, 265 insertions(+), 2 deletions(-) create mode 100644 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c create mode 100644 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile index 063205ecb137..ab2a97e354da 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile @@ -37,7 +37,8 @@ AMDGPUDM = \ amdgpu_dm_helpers.o \ amdgpu_dm_pp_smu.o \ amdgpu_dm_psr.o \ - amdgpu_dm_replay.o + amdgpu_dm_replay.o \ + amdgpu_dm_wb.o ifdef CONFIG_DRM_AMD_DC_FP AMDGPUDM += dc_fpu.o diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 9b791e3f7e33..51a873658535 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -54,6 +54,7 @@ #include "amdgpu_dm_crtc.h" #include "amdgpu_dm_hdcp.h" #include +#include "amdgpu_dm_wb.h" #include "amdgpu_pm.h" #include "amdgpu_atombios.h" @@ -4471,8 +4472,25 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) link = dc_get_link_at_index(dm->dc, i); - if (link->connector_signal == SIGNAL_TYPE_VIRTUAL) + if (link->connector_signal == SIGNAL_TYPE_VIRTUAL) { + struct drm_writeback_connector *wbcon = kzalloc(sizeof(*wbcon), GFP_KERNEL); + + if (!wbcon) { + DRM_ERROR("KMS: Failed to allocate writeback connector\n"); + continue; + } + + if (amdgpu_dm_wb_connector_init(dm, wbcon)) { + DRM_ERROR("KMS: Failed to initialize writeback connector\n"); + kfree(wbcon); + continue; + } + + link->psr_settings.psr_feature_enabled = false; + link->psr_settings.psr_version = DC_PSR_VERSION_UNSUPPORTED; + continue; + } aconnector = kzalloc(sizeof(*aconnector), GFP_KERNEL); if (!aconnector) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c new file mode 100644 index 000000000000..74e656696d8e --- /dev/null +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c @@ -0,0 +1,209 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dm_services_types.h" + +#include "amdgpu.h" +#include "amdgpu_dm.h" +#include "amdgpu_dm_wb.h" +#include "amdgpu_display.h" + +#include +#include + +static const u32 amdgpu_dm_wb_formats[] = { + DRM_FORMAT_XRGB2101010, +}; + +static int amdgpu_dm_wb_encoder_atomic_check(struct drm_encoder *encoder, + struct drm_crtc_state *crtc_state, + struct drm_connector_state *conn_state) +{ + struct drm_framebuffer *fb; + const struct drm_display_mode *mode = &crtc_state->mode; + bool found = false; + uint8_t i; + + if (!conn_state->writeback_job || !conn_state->writeback_job->fb) + return 0; + + fb = conn_state->writeback_job->fb; + if (fb->width != mode->hdisplay || fb->height != mode->vdisplay) { + DRM_DEBUG_KMS("Invalid framebuffer size %ux%u\n", + fb->width, fb->height); + return -EINVAL; + } + + for (i = 0; i < sizeof(amdgpu_dm_wb_formats) / sizeof(u32); i++) { + if (fb->format->format == amdgpu_dm_wb_formats[i]) + found = true; + } + + if (!found) { + DRM_DEBUG_KMS("Invalid pixel format %p4cc\n", + &fb->format->format); + return -EINVAL; + } + + return 0; +} + + +static int amdgpu_dm_wb_connector_get_modes(struct drm_connector *connector) +{ + struct drm_device *dev = connector->dev; + + return drm_add_modes_noedid(connector, dev->mode_config.max_width, + dev->mode_config.max_height); +} + +static int amdgpu_dm_wb_prepare_job(struct drm_writeback_connector *wb_connector, + struct drm_writeback_job *job) +{ + struct amdgpu_framebuffer *afb; + struct drm_gem_object *obj; + struct amdgpu_device *adev; + struct amdgpu_bo *rbo; + uint32_t domain; + int r; + + if (!job->fb) { + DRM_DEBUG_KMS("No FB bound\n"); + return 0; + } + + afb = to_amdgpu_framebuffer(job->fb); + obj = job->fb->obj[0]; + rbo = gem_to_amdgpu_bo(obj); + adev = amdgpu_ttm_adev(rbo->tbo.bdev); + + r = amdgpu_bo_reserve(rbo, true); + if (r) { + dev_err(adev->dev, "fail to reserve bo (%d)\n", r); + return r; + } + + r = dma_resv_reserve_fences(rbo->tbo.base.resv, 1); + if (r) { + dev_err(adev->dev, "reserving fence slot failed (%d)\n", r); + goto error_unlock; + } + + domain = amdgpu_display_supported_domains(adev, rbo->flags); + + r = amdgpu_bo_pin(rbo, domain); + if (unlikely(r != 0)) { + if (r != -ERESTARTSYS) + DRM_ERROR("Failed to pin framebuffer with error %d\n", r); + goto error_unlock; + } + + r = amdgpu_ttm_alloc_gart(&rbo->tbo); + if (unlikely(r != 0)) { + DRM_ERROR("%p bind failed\n", rbo); + goto error_unpin; + } + + amdgpu_bo_unreserve(rbo); + + afb->address = amdgpu_bo_gpu_offset(rbo); + + amdgpu_bo_ref(rbo); + + return 0; + +error_unpin: + amdgpu_bo_unpin(rbo); + +error_unlock: + amdgpu_bo_unreserve(rbo); + return r; +} + +static void amdgpu_dm_wb_cleanup_job(struct drm_writeback_connector *connector, + struct drm_writeback_job *job) +{ + struct amdgpu_bo *rbo; + int r; + + if (!job->fb) + return; + + rbo = gem_to_amdgpu_bo(job->fb->obj[0]); + r = amdgpu_bo_reserve(rbo, false); + if (unlikely(r)) { + DRM_ERROR("failed to reserve rbo before unpin\n"); + return; + } + + amdgpu_bo_unpin(rbo); + amdgpu_bo_unreserve(rbo); + amdgpu_bo_unref(&rbo); +} + +static const struct drm_encoder_helper_funcs amdgpu_dm_wb_encoder_helper_funcs = { + .atomic_check = amdgpu_dm_wb_encoder_atomic_check, +}; + +static const struct drm_connector_funcs amdgpu_dm_wb_connector_funcs = { + .fill_modes = drm_helper_probe_single_connector_modes, + .destroy = drm_connector_cleanup, + .reset = amdgpu_dm_connector_funcs_reset, + .atomic_duplicate_state = amdgpu_dm_connector_atomic_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, +}; + +static const struct drm_connector_helper_funcs amdgpu_dm_wb_conn_helper_funcs = { + .get_modes = amdgpu_dm_wb_connector_get_modes, + .prepare_writeback_job = amdgpu_dm_wb_prepare_job, + .cleanup_writeback_job = amdgpu_dm_wb_cleanup_job, +}; + +int amdgpu_dm_wb_connector_init(struct amdgpu_display_manager *dm, + struct drm_writeback_connector *wbcon) +{ + int res = 0; + + drm_connector_helper_add(&wbcon->base, &amdgpu_dm_wb_conn_helper_funcs); + + res = drm_writeback_connector_init(&dm->adev->ddev, wbcon, + &amdgpu_dm_wb_connector_funcs, + &amdgpu_dm_wb_encoder_helper_funcs, + amdgpu_dm_wb_formats, + ARRAY_SIZE(amdgpu_dm_wb_formats), + amdgpu_dm_get_encoder_crtc_mask(dm->adev)); + + if (res) + return res; + /* + * Some of the properties below require access to state, like bpc. + * Allocate some default initial connector state with our reset helper. + */ + if (wbcon->base.funcs->reset) + wbcon->base.funcs->reset(&wbcon->base); + + return 0; +} diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.h new file mode 100644 index 000000000000..0bc9df7e7ee1 --- /dev/null +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __AMDGPU_DM_WB_H__ +#define __AMDGPU_DM_WB_H__ + +#include + +int amdgpu_dm_wb_connector_init(struct amdgpu_display_manager *dm, + struct drm_writeback_connector *wbcon); + +#endif -- cgit v1.2.3 From 7db7ade270ae8e177cc8bd09753745e7c2dc92e7 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Fri, 1 Dec 2023 06:25:25 -0700 Subject: drm/amd/display: Skip writeback connector when we get amdgpu_dm_connector [WHY] Writeback connectors are based on a different object: drm_writeback_connector, and are therefore different from amdgpu_dm_connector. We need to be careful to ensure code designed for amdgpu_dm_connector doesn't inadvertently try to operate on a drm_writeback_connector. [HOW] Skip them when connector type is DRM_MODE_CONNECTOR_WRITEBACK. Tested-by: Daniel Wheeler Reviewed-by: Alex Hung Signed-off-by: Harry Wentland Signed-off-by: Alex Hung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 62 ++++++++++++++++++++-- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c | 3 ++ .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c | 22 +++++--- 3 files changed, 76 insertions(+), 11 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 51a873658535..1d9a10384f93 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -728,6 +728,10 @@ static void dmub_hpd_callback(struct amdgpu_device *adev, drm_connector_list_iter_begin(dev, &iter); drm_for_each_connector_iter(connector, &iter) { + + if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) + continue; + aconnector = to_amdgpu_dm_connector(connector); if (link && aconnector->dc_link == link) { if (notify->type == DMUB_NOTIFICATION_HPD) @@ -951,6 +955,10 @@ static int amdgpu_dm_audio_component_get_eld(struct device *kdev, int port, drm_connector_list_iter_begin(dev, &conn_iter); drm_for_each_connector_iter(connector, &conn_iter) { + + if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) + continue; + aconnector = to_amdgpu_dm_connector(connector); if (aconnector->audio_inst != port) continue; @@ -2257,6 +2265,10 @@ static int detect_mst_link_for_all_connectors(struct drm_device *dev) drm_connector_list_iter_begin(dev, &iter); drm_for_each_connector_iter(connector, &iter) { + + if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) + continue; + aconnector = to_amdgpu_dm_connector(connector); if (aconnector->dc_link->type == dc_connection_mst_branch && aconnector->mst_mgr.aux) { @@ -2385,6 +2397,10 @@ static void s3_handle_mst(struct drm_device *dev, bool suspend) drm_connector_list_iter_begin(dev, &iter); drm_for_each_connector_iter(connector, &iter) { + + if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) + continue; + aconnector = to_amdgpu_dm_connector(connector); if (aconnector->dc_link->type != dc_connection_mst_branch || aconnector->mst_root) @@ -2906,6 +2922,10 @@ static int dm_resume(void *handle) /* Do detection*/ drm_connector_list_iter_begin(ddev, &iter); drm_for_each_connector_iter(connector, &iter) { + + if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) + continue; + aconnector = to_amdgpu_dm_connector(connector); if (!aconnector->dc_link) @@ -3479,6 +3499,9 @@ static void register_hpd_handlers(struct amdgpu_device *adev) list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) + continue; + aconnector = to_amdgpu_dm_connector(connector); dc_link = aconnector->dc_link; @@ -5516,10 +5539,13 @@ static void fill_stream_properties_from_drm_display_mode( { struct dc_crtc_timing *timing_out = &stream->timing; const struct drm_display_info *info = &connector->display_info; - struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); + struct amdgpu_dm_connector *aconnector = NULL; struct hdmi_vendor_infoframe hv_frame; struct hdmi_avi_infoframe avi_frame; + if (connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK) + aconnector = to_amdgpu_dm_connector(connector); + memset(&hv_frame, 0, sizeof(hv_frame)); memset(&avi_frame, 0, sizeof(avi_frame)); @@ -6964,6 +6990,9 @@ static int dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state, for_each_new_connector_in_state(state, connector, new_con_state, i) { + if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) + continue; + aconnector = to_amdgpu_dm_connector(connector); if (!aconnector->mst_output_port) @@ -8523,6 +8552,9 @@ static void amdgpu_dm_commit_audio(struct drm_device *dev, if (!drm_atomic_crtc_needs_modeset(new_crtc_state)) continue; + if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) + continue; + notify: aconnector = to_amdgpu_dm_connector(connector); @@ -8556,6 +8588,9 @@ notify: if (!status) continue; + if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) + continue; + aconnector = to_amdgpu_dm_connector(connector); mutex_lock(&adev->dm.audio_lock); @@ -8779,7 +8814,12 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) for_each_oldnew_connector_in_state(state, connector, old_con_state, new_con_state, i) { struct dm_connector_state *dm_new_con_state = to_dm_connector_state(new_con_state); struct amdgpu_crtc *acrtc = to_amdgpu_crtc(dm_new_con_state->base.crtc); - struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); + struct amdgpu_dm_connector *aconnector; + + if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) + continue; + + aconnector = to_amdgpu_dm_connector(connector); if (!adev->dm.hdcp_workqueue) continue; @@ -9173,10 +9213,15 @@ out: void dm_restore_drm_connector_state(struct drm_device *dev, struct drm_connector *connector) { - struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); + struct amdgpu_dm_connector *aconnector; struct amdgpu_crtc *disconnected_acrtc; struct dm_crtc_state *acrtc_state; + if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) + return; + + aconnector = to_amdgpu_dm_connector(connector); + if (!aconnector->dc_sink || !connector->state || !connector->encoder) return; @@ -9253,12 +9298,16 @@ static void get_freesync_config_for_crtc( struct dm_connector_state *new_con_state) { struct mod_freesync_config config = {0}; - struct amdgpu_dm_connector *aconnector = - to_amdgpu_dm_connector(new_con_state->base.connector); + struct amdgpu_dm_connector *aconnector; struct drm_display_mode *mode = &new_crtc_state->base.mode; int vrefresh = drm_mode_vrefresh(mode); bool fs_vid_mode = false; + if (new_con_state->base.connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) + return; + + aconnector = to_amdgpu_dm_connector(new_con_state->base.connector); + new_crtc_state->vrr_supported = new_con_state->freesync_capable && vrefresh >= aconnector->min_vfreq && vrefresh <= aconnector->max_vfreq; @@ -10088,6 +10137,9 @@ static int add_affected_mst_dsc_crtcs(struct drm_atomic_state *state, struct drm if (conn_state->crtc != crtc) continue; + if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) + continue; + aconnector = to_amdgpu_dm_connector(connector); if (!aconnector->mst_output_port || !aconnector->mst_root) aconnector = NULL; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c index 52ecfa746b54..f936a35fa9eb 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c @@ -326,6 +326,9 @@ int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name) if (!connector->state || connector->state->crtc != crtc) continue; + if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) + continue; + aconn = to_amdgpu_dm_connector(connector); break; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c index 51467f132c26..58b880acb087 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c @@ -894,10 +894,15 @@ void amdgpu_dm_hpd_init(struct amdgpu_device *adev) drm_connector_list_iter_begin(dev, &iter); drm_for_each_connector_iter(connector, &iter) { - struct amdgpu_dm_connector *amdgpu_dm_connector = - to_amdgpu_dm_connector(connector); + struct amdgpu_dm_connector *amdgpu_dm_connector; + const struct dc_link *dc_link; - const struct dc_link *dc_link = amdgpu_dm_connector->dc_link; + if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) + continue; + + amdgpu_dm_connector = to_amdgpu_dm_connector(connector); + + dc_link = amdgpu_dm_connector->dc_link; if (dc_link->irq_source_hpd != DC_IRQ_SOURCE_INVALID) { dc_interrupt_set(adev->dm.dc, @@ -930,9 +935,14 @@ void amdgpu_dm_hpd_fini(struct amdgpu_device *adev) drm_connector_list_iter_begin(dev, &iter); drm_for_each_connector_iter(connector, &iter) { - struct amdgpu_dm_connector *amdgpu_dm_connector = - to_amdgpu_dm_connector(connector); - const struct dc_link *dc_link = amdgpu_dm_connector->dc_link; + struct amdgpu_dm_connector *amdgpu_dm_connector; + const struct dc_link *dc_link; + + if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) + continue; + + amdgpu_dm_connector = to_amdgpu_dm_connector(connector); + dc_link = amdgpu_dm_connector->dc_link; if (dc_link->irq_source_hpd != DC_IRQ_SOURCE_INVALID) { dc_interrupt_set(adev->dm.dc, -- cgit v1.2.3 From 748b091d641638e68330b1b24195eaba9aadf997 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Fri, 1 Dec 2023 06:25:26 -0700 Subject: drm/amd/display: Return drm_connector from find_first_crtc_matching_connector [WHY] We will be dealing with two types of connector: amdgpu_dm_connector and drm_writeback_connector. [HOW] We want to find both and then cast to the appriopriate type afterwards. Tested-by: Daniel Wheeler Reviewed-by: Alex Hung Signed-off-by: Harry Wentland Signed-off-by: Alex Hung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 8 +++++--- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 2 +- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 4 +++- 3 files changed, 9 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 1d9a10384f93..5c89a043ac10 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2664,7 +2664,7 @@ static int dm_suspend(void *handle) return 0; } -struct amdgpu_dm_connector * +struct drm_connector * amdgpu_dm_find_first_crtc_matching_connector(struct drm_atomic_state *state, struct drm_crtc *crtc) { @@ -2677,7 +2677,7 @@ amdgpu_dm_find_first_crtc_matching_connector(struct drm_atomic_state *state, crtc_from_state = new_con_state->crtc; if (crtc_from_state == crtc) - return to_amdgpu_dm_connector(connector); + return connector; } return NULL; @@ -9407,6 +9407,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm, * update changed items */ struct amdgpu_crtc *acrtc = NULL; + struct drm_connector *connector = NULL; struct amdgpu_dm_connector *aconnector = NULL; struct drm_connector_state *drm_new_conn_state = NULL, *drm_old_conn_state = NULL; struct dm_connector_state *dm_new_conn_state = NULL, *dm_old_conn_state = NULL; @@ -9416,7 +9417,8 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm, dm_old_crtc_state = to_dm_crtc_state(old_crtc_state); dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); acrtc = to_amdgpu_crtc(crtc); - aconnector = amdgpu_dm_find_first_crtc_matching_connector(state, crtc); + connector = amdgpu_dm_find_first_crtc_matching_connector(state, crtc); + aconnector = to_amdgpu_dm_connector(connector); /* TODO This hack should go away */ if (aconnector && enable) { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 3d480be802cb..3710f4d0f2cb 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -834,7 +834,7 @@ struct dc_stream_state * int dm_atomic_get_state(struct drm_atomic_state *state, struct dm_atomic_state **dm_state); -struct amdgpu_dm_connector * +struct drm_connector * amdgpu_dm_find_first_crtc_matching_connector(struct drm_atomic_state *state, struct drm_crtc *crtc); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index b599efda3b19..19ae5587dd5a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -1500,14 +1500,16 @@ int pre_validate_dsc(struct drm_atomic_state *state, int ind = find_crtc_index_in_state_by_stream(state, stream); if (ind >= 0) { + struct drm_connector *connector; struct amdgpu_dm_connector *aconnector; struct drm_connector_state *drm_new_conn_state; struct dm_connector_state *dm_new_conn_state; struct dm_crtc_state *dm_old_crtc_state; - aconnector = + connector = amdgpu_dm_find_first_crtc_matching_connector(state, state->crtcs[ind].ptr); + aconnector = to_amdgpu_dm_connector(connector); drm_new_conn_state = drm_atomic_get_new_connector_state(state, &aconnector->base); -- cgit v1.2.3 From 3e094a2875260543ca74838decc0c995d3765096 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Fri, 1 Dec 2023 06:25:27 -0700 Subject: drm/amd/display: Use drm_connector in create_stream_for_sink [WHAT] We need to use this function for both amdgpu_dm_connectors and drm_writeback_connectors. Modify it to operate on a drm_connector as a common base. Tested-by: Daniel Wheeler Reviewed-by: Alex Hung Signed-off-by: Harry Wentland Signed-off-by: Alex Hung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 65 +++++++++++++---------- 1 file changed, 37 insertions(+), 28 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 5c89a043ac10..46477192265e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -5558,6 +5558,7 @@ static void fill_stream_properties_from_drm_display_mode( && stream->signal == SIGNAL_TYPE_HDMI_TYPE_A) timing_out->pixel_encoding = PIXEL_ENCODING_YCBCR420; else if (drm_mode_is_420_also(info, mode_in) + && aconnector && aconnector->force_yuv420_output) timing_out->pixel_encoding = PIXEL_ENCODING_YCBCR420; else if ((connector->display_info.color_formats & DRM_COLOR_FORMAT_YCBCR444) @@ -5593,7 +5594,7 @@ static void fill_stream_properties_from_drm_display_mode( timing_out->hdmi_vic = hv_frame.vic; } - if (is_freesync_video_mode(mode_in, aconnector)) { + if (aconnector && is_freesync_video_mode(mode_in, aconnector)) { timing_out->h_addressable = mode_in->hdisplay; timing_out->h_total = mode_in->htotal; timing_out->h_sync_width = mode_in->hsync_end - mode_in->hsync_start; @@ -6070,14 +6071,14 @@ static void apply_dsc_policy_for_stream(struct amdgpu_dm_connector *aconnector, } static struct dc_stream_state * -create_stream_for_sink(struct amdgpu_dm_connector *aconnector, +create_stream_for_sink(struct drm_connector *connector, const struct drm_display_mode *drm_mode, const struct dm_connector_state *dm_state, const struct dc_stream_state *old_stream, int requested_bpc) { + struct amdgpu_dm_connector *aconnector = NULL; struct drm_display_mode *preferred_mode = NULL; - struct drm_connector *drm_connector; const struct drm_connector_state *con_state = &dm_state->base; struct dc_stream_state *stream = NULL; struct drm_display_mode mode; @@ -6096,20 +6097,22 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, drm_mode_init(&mode, drm_mode); memset(&saved_mode, 0, sizeof(saved_mode)); - if (aconnector == NULL) { - DRM_ERROR("aconnector is NULL!\n"); + if (connector == NULL) { + DRM_ERROR("connector is NULL!\n"); return stream; } - drm_connector = &aconnector->base; - - if (!aconnector->dc_sink) { - sink = create_fake_sink(aconnector); - if (!sink) - return stream; - } else { - sink = aconnector->dc_sink; - dc_sink_retain(sink); + if (connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK) { + aconnector = NULL; + aconnector = to_amdgpu_dm_connector(connector); + if (!aconnector->dc_sink) { + sink = create_fake_sink(aconnector); + if (!sink) + return stream; + } else { + sink = aconnector->dc_sink; + dc_sink_retain(sink); + } } stream = dc_create_stream_for_sink(sink); @@ -6119,12 +6122,13 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, goto finish; } + /* We leave this NULL for writeback connectors */ stream->dm_stream_context = aconnector; stream->timing.flags.LTE_340MCSC_SCRAMBLE = - drm_connector->display_info.hdmi.scdc.scrambling.low_rates; + connector->display_info.hdmi.scdc.scrambling.low_rates; - list_for_each_entry(preferred_mode, &aconnector->base.modes, head) { + list_for_each_entry(preferred_mode, &connector->modes, head) { /* Search for preferred mode */ if (preferred_mode->type & DRM_MODE_TYPE_PREFERRED) { native_mode_found = true; @@ -6133,7 +6137,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, } if (!native_mode_found) preferred_mode = list_first_entry_or_null( - &aconnector->base.modes, + &connector->modes, struct drm_display_mode, head); @@ -6147,7 +6151,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, * and the modelist may not be filled in time. */ DRM_DEBUG_DRIVER("No preferred mode found\n"); - } else { + } else if (aconnector) { recalculate_timing = is_freesync_video_mode(&mode, aconnector); if (recalculate_timing) { freesync_mode = get_highest_refresh_rate_mode(aconnector, false); @@ -6170,13 +6174,17 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, */ if (!scale || mode_refresh != preferred_refresh) fill_stream_properties_from_drm_display_mode( - stream, &mode, &aconnector->base, con_state, NULL, + stream, &mode, connector, con_state, NULL, requested_bpc); else fill_stream_properties_from_drm_display_mode( - stream, &mode, &aconnector->base, con_state, old_stream, + stream, &mode, connector, con_state, old_stream, requested_bpc); + /* The rest isn't needed for writeback connectors */ + if (!aconnector) + goto finish; + if (aconnector->timing_changed) { drm_dbg(aconnector->base.dev, "overriding timing for automated test, bpc %d, changing to %d\n", @@ -6194,7 +6202,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, fill_audio_info( &stream->audio_info, - drm_connector, + connector, sink); update_stream_signal(stream, sink); @@ -6662,7 +6670,7 @@ create_validate_stream_for_sink(struct amdgpu_dm_connector *aconnector, enum dc_status dc_result = DC_OK; do { - stream = create_stream_for_sink(aconnector, drm_mode, + stream = create_stream_for_sink(connector, drm_mode, dm_state, old_stream, requested_bpc); if (stream == NULL) { @@ -9418,15 +9426,16 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm, dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); acrtc = to_amdgpu_crtc(crtc); connector = amdgpu_dm_find_first_crtc_matching_connector(state, crtc); - aconnector = to_amdgpu_dm_connector(connector); + if (connector && connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK) + aconnector = to_amdgpu_dm_connector(connector); /* TODO This hack should go away */ - if (aconnector && enable) { + if (connector && enable) { /* Make sure fake sink is created in plug-in scenario */ drm_new_conn_state = drm_atomic_get_new_connector_state(state, - &aconnector->base); + connector); drm_old_conn_state = drm_atomic_get_old_connector_state(state, - &aconnector->base); + connector); if (IS_ERR(drm_new_conn_state)) { ret = PTR_ERR_OR_ZERO(drm_new_conn_state); @@ -9573,7 +9582,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm, * added MST connectors not found in existing crtc_state in the chained mode * TODO: need to dig out the root cause of that */ - if (!aconnector) + if (!connector) goto skip_modeset; if (modereset_required(new_crtc_state)) @@ -9616,7 +9625,7 @@ skip_modeset: * We want to do dc stream updates that do not require a * full modeset below. */ - if (!(enable && aconnector && new_crtc_state->active)) + if (!(enable && connector && new_crtc_state->active)) return 0; /* * Given above conditions, the dc state cannot be NULL because: -- cgit v1.2.3 From dbf5d3d02987faa0eec3710dd687cd912362d7b5 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Fri, 1 Dec 2023 06:25:28 -0700 Subject: drm/amd/display: Check writeback connectors in create_validate_stream_for_sink [WHY & HOW] This is to check connector type to avoid unhandled null pointer for writeback connectors. Tested-by: Daniel Wheeler Fixes: 60e034f28600 ("drm/amd/display: Revert "drm/amd/display: Use drm_connector in create_validate_stream_for_sink"") Signed-off-by: Alex Hung Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 46477192265e..315a747f9750 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -6678,6 +6678,9 @@ create_validate_stream_for_sink(struct amdgpu_dm_connector *aconnector, break; } + if (aconnector->base.connector_type == DRM_MODE_CONNECTOR_WRITEBACK) + return stream; + dc_result = dc_validate_stream(adev->dm.dc, stream); if (dc_result == DC_OK && stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) dc_result = dm_dp_mst_is_port_support_mode(aconnector, stream); @@ -9426,7 +9429,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm, dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); acrtc = to_amdgpu_crtc(crtc); connector = amdgpu_dm_find_first_crtc_matching_connector(state, crtc); - if (connector && connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK) + if (connector) aconnector = to_amdgpu_dm_connector(connector); /* TODO This hack should go away */ -- cgit v1.2.3 From ff73d4cdde18bc4607ff10c53351715ee1164be0 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Fri, 1 Dec 2023 06:25:29 -0700 Subject: drm/amd/display: Create amdgpu_dm_wb_connector [WHY] We need to track the dc_link and it would get confusing if re-using the amdgpu_dm_connector. [HOW] Creating new amdgpu_dm_wb_connector. Tested-by: Daniel Wheeler Reviewed-by: Alex Hung Signed-off-by: Harry Wentland Signed-off-by: Alex Hung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 5 +++-- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 8 ++++++++ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c | 16 +++++++++++----- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.h | 3 ++- 4 files changed, 24 insertions(+), 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 315a747f9750..0f41328510c2 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4496,14 +4496,14 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) link = dc_get_link_at_index(dm->dc, i); if (link->connector_signal == SIGNAL_TYPE_VIRTUAL) { - struct drm_writeback_connector *wbcon = kzalloc(sizeof(*wbcon), GFP_KERNEL); + struct amdgpu_dm_wb_connector *wbcon = kzalloc(sizeof(*wbcon), GFP_KERNEL); if (!wbcon) { DRM_ERROR("KMS: Failed to allocate writeback connector\n"); continue; } - if (amdgpu_dm_wb_connector_init(dm, wbcon)) { + if (amdgpu_dm_wb_connector_init(dm, wbcon, i)) { DRM_ERROR("KMS: Failed to initialize writeback connector\n"); kfree(wbcon); continue; @@ -7609,6 +7609,7 @@ static int amdgpu_dm_connector_init(struct amdgpu_display_manager *dm, struct dc_link *link = dc_get_link_at_index(dc, link_index); struct amdgpu_i2c_adapter *i2c; + /* Not needed for writeback connector */ link->priv = aconnector; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 3710f4d0f2cb..0a9974017eba 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -32,6 +32,7 @@ #include #include #include "link_service_types.h" +#include /* * This file contains the definition for amdgpu_display_manager @@ -714,6 +715,13 @@ static inline void amdgpu_dm_set_mst_status(uint8_t *status, #define to_amdgpu_dm_connector(x) container_of(x, struct amdgpu_dm_connector, base) +struct amdgpu_dm_wb_connector { + struct drm_writeback_connector base; + struct dc_link *link; +}; + +#define to_amdgpu_dm_wb_connector(x) container_of(x, struct amdgpu_dm_wb_connector, base) + extern const struct amdgpu_ip_block_version dm_ip_block; struct dm_plane_state { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c index 74e656696d8e..b3e634b0f712 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c @@ -30,6 +30,7 @@ #include "amdgpu_dm.h" #include "amdgpu_dm_wb.h" #include "amdgpu_display.h" +#include "dc.h" #include #include @@ -183,13 +184,18 @@ static const struct drm_connector_helper_funcs amdgpu_dm_wb_conn_helper_funcs = }; int amdgpu_dm_wb_connector_init(struct amdgpu_display_manager *dm, - struct drm_writeback_connector *wbcon) + struct amdgpu_dm_wb_connector *wbcon, + uint32_t link_index) { + struct dc *dc = dm->dc; + struct dc_link *link = dc_get_link_at_index(dc, link_index); int res = 0; - drm_connector_helper_add(&wbcon->base, &amdgpu_dm_wb_conn_helper_funcs); + wbcon->link = link; - res = drm_writeback_connector_init(&dm->adev->ddev, wbcon, + drm_connector_helper_add(&wbcon->base.base, &amdgpu_dm_wb_conn_helper_funcs); + + res = drm_writeback_connector_init(&dm->adev->ddev, &wbcon->base, &amdgpu_dm_wb_connector_funcs, &amdgpu_dm_wb_encoder_helper_funcs, amdgpu_dm_wb_formats, @@ -202,8 +208,8 @@ int amdgpu_dm_wb_connector_init(struct amdgpu_display_manager *dm, * Some of the properties below require access to state, like bpc. * Allocate some default initial connector state with our reset helper. */ - if (wbcon->base.funcs->reset) - wbcon->base.funcs->reset(&wbcon->base); + if (wbcon->base.base.funcs->reset) + wbcon->base.base.funcs->reset(&wbcon->base.base); return 0; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.h index 0bc9df7e7ee1..13d31c857dee 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.h @@ -30,6 +30,7 @@ #include int amdgpu_dm_wb_connector_init(struct amdgpu_display_manager *dm, - struct drm_writeback_connector *wbcon); + struct amdgpu_dm_wb_connector *dm_wbcon, + uint32_t link_index); #endif -- cgit v1.2.3 From 1fb9d7b975baba081724be8ff6370b1a71a8aea4 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Fri, 1 Dec 2023 06:25:30 -0700 Subject: drm/amd/display: Create fake sink and stream for writeback connector [WHAT] Writeback connectors don't have a physical sink but DC still needs a sink to function. Create a fake sink and stream for writeback connectors Tested-by: Daniel Wheeler Reviewed-by: Alex Hung Signed-off-by: Harry Wentland Signed-off-by: Alex Hung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 33 +++++++++++++++-------- 1 file changed, 22 insertions(+), 11 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 0f41328510c2..06ccfc9669c7 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -5715,13 +5715,13 @@ decide_crtc_timing_for_drm_display_mode(struct drm_display_mode *drm_mode, } static struct dc_sink * -create_fake_sink(struct amdgpu_dm_connector *aconnector) +create_fake_sink(struct dc_link *link) { struct dc_sink_init_data sink_init_data = { 0 }; struct dc_sink *sink = NULL; - sink_init_data.link = aconnector->dc_link; - sink_init_data.sink_signal = aconnector->dc_link->connector_signal; + sink_init_data.link = link; + sink_init_data.sink_signal = link->connector_signal; sink = dc_sink_create(&sink_init_data); if (!sink) { @@ -6092,6 +6092,7 @@ create_stream_for_sink(struct drm_connector *connector, enum color_transfer_func tf = TRANSFER_FUNC_UNKNOWN; struct dsc_dec_dpcd_caps dsc_caps; + struct dc_link *link = NULL; struct dc_sink *sink = NULL; drm_mode_init(&mode, drm_mode); @@ -6105,14 +6106,24 @@ create_stream_for_sink(struct drm_connector *connector, if (connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK) { aconnector = NULL; aconnector = to_amdgpu_dm_connector(connector); - if (!aconnector->dc_sink) { - sink = create_fake_sink(aconnector); - if (!sink) - return stream; - } else { - sink = aconnector->dc_sink; - dc_sink_retain(sink); - } + link = aconnector->dc_link; + } else { + struct drm_writeback_connector *wbcon = NULL; + struct amdgpu_dm_wb_connector *dm_wbcon = NULL; + + wbcon = drm_connector_to_writeback(connector); + dm_wbcon = to_amdgpu_dm_wb_connector(wbcon); + link = dm_wbcon->link; + } + + if (!aconnector || !aconnector->dc_sink) { + sink = create_fake_sink(link); + if (!sink) + return stream; + + } else { + sink = aconnector->dc_sink; + dc_sink_retain(sink); } stream = dc_create_stream_for_sink(sink); -- cgit v1.2.3 From c81e13b929df2fd16dce87ac36672978f10eae1c Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Fri, 1 Dec 2023 06:25:31 -0700 Subject: drm/amd/display: Hande writeback request from userspace [WHAT] Handle writeback requests and fill in the required information for DWB programming and setup. Tested-by: Daniel Wheeler Reviewed-by: Harry Wentland Signed-off-by: Alex Hung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | 3 + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 159 ++++++++++++++++++++++ 2 files changed, 162 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 32fe05c810c6..a166d7684719 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -416,6 +416,9 @@ struct amdgpu_crtc { int otg_inst; struct drm_pending_vblank_event *event; + + bool wb_pending; + struct drm_writeback_connector *wb_conn; }; struct amdgpu_encoder_atom_dig { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 06ccfc9669c7..311f815452e0 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -578,6 +578,7 @@ static void dm_crtc_high_irq(void *interrupt_params) { struct common_irq_params *irq_params = interrupt_params; struct amdgpu_device *adev = irq_params->adev; + struct drm_writeback_job *job; struct amdgpu_crtc *acrtc; unsigned long flags; int vrr_active; @@ -586,6 +587,21 @@ static void dm_crtc_high_irq(void *interrupt_params) if (!acrtc) return; + if (acrtc->wb_pending) { + if (acrtc->wb_conn) { + spin_lock_irqsave(&acrtc->wb_conn->job_lock, flags); + job = list_first_entry_or_null(&acrtc->wb_conn->job_queue, + struct drm_writeback_job, + list_entry); + spin_unlock_irqrestore(&acrtc->wb_conn->job_lock, flags); + + if (job) + drm_writeback_signal_completion(acrtc->wb_conn, 0); + } else + DRM_ERROR("%s: no amdgpu_crtc wb_conn\n", __func__); + acrtc->wb_pending = false; + } + vrr_active = amdgpu_dm_crtc_vrr_active_irq(acrtc); drm_dbg_vbl(adev_to_drm(adev), @@ -8639,6 +8655,12 @@ static void amdgpu_dm_crtc_copy_transient_flags(struct drm_crtc_state *crtc_stat stream_state->mode_changed = drm_atomic_crtc_needs_modeset(crtc_state); } +static void dm_clear_writeback(struct amdgpu_display_manager *dm, + struct dm_crtc_state *crtc_state) +{ + dc_stream_remove_writeback(dm->dc, crtc_state->stream, 0); +} + static void amdgpu_dm_commit_streams(struct drm_atomic_state *state, struct dc_state *dc_state) { @@ -8648,9 +8670,34 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state, struct drm_crtc *crtc; struct drm_crtc_state *old_crtc_state, *new_crtc_state; struct dm_crtc_state *dm_old_crtc_state, *dm_new_crtc_state; + struct drm_connector_state *old_con_state; + struct drm_connector *connector; bool mode_set_reset_required = false; u32 i; + /* Disable writeback */ + for_each_old_connector_in_state(state, connector, old_con_state, i) { + struct dm_connector_state *dm_old_con_state; + struct amdgpu_crtc *acrtc; + + if (connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK) + continue; + + old_crtc_state = NULL; + + dm_old_con_state = to_dm_connector_state(old_con_state); + if (!dm_old_con_state->base.crtc) + continue; + + acrtc = to_amdgpu_crtc(dm_old_con_state->base.crtc); + if (acrtc) + old_crtc_state = drm_atomic_get_old_crtc_state(state, &acrtc->base); + + dm_old_crtc_state = to_dm_crtc_state(old_crtc_state); + + dm_clear_writeback(dm, dm_old_crtc_state); + } + for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); @@ -8787,6 +8834,97 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state, } } +static void dm_set_writeback(struct amdgpu_display_manager *dm, + struct dm_crtc_state *crtc_state, + struct drm_connector *connector, + struct drm_connector_state *new_con_state) +{ + struct drm_writeback_connector *wb_conn = drm_connector_to_writeback(connector); + struct amdgpu_crtc *acrtc; + struct dc_writeback_info *wb_info; + struct pipe_ctx *pipe = NULL; + struct amdgpu_framebuffer *afb; + int i = 0; + + wb_info = kzalloc(sizeof(*wb_info), GFP_KERNEL); + if (!wb_info) { + DRM_ERROR("Failed to allocate wb_info\n"); + return; + } + + acrtc = to_amdgpu_crtc(wb_conn->encoder.crtc); + if (!acrtc) { + DRM_ERROR("no amdgpu_crtc found\n"); + return; + } + + afb = to_amdgpu_framebuffer(new_con_state->writeback_job->fb); + if (!afb) { + DRM_ERROR("No amdgpu_framebuffer found\n"); + return; + } + + for (i = 0; i < MAX_PIPES; i++) { + if (dm->dc->current_state->res_ctx.pipe_ctx[i].stream == crtc_state->stream) { + pipe = &dm->dc->current_state->res_ctx.pipe_ctx[i]; + break; + } + } + + /* fill in wb_info */ + wb_info->wb_enabled = true; + + wb_info->dwb_pipe_inst = 0; + wb_info->dwb_params.dwbscl_black_color = 0; + wb_info->dwb_params.hdr_mult = 0x1F000; + wb_info->dwb_params.csc_params.gamut_adjust_type = CM_GAMUT_ADJUST_TYPE_BYPASS; + wb_info->dwb_params.csc_params.gamut_coef_format = CM_GAMUT_REMAP_COEF_FORMAT_S2_13; + wb_info->dwb_params.output_depth = DWB_OUTPUT_PIXEL_DEPTH_10BPC; + wb_info->dwb_params.cnv_params.cnv_out_bpc = DWB_CNV_OUT_BPC_10BPC; + + /* width & height from crtc */ + wb_info->dwb_params.cnv_params.src_width = acrtc->base.mode.crtc_hdisplay; + wb_info->dwb_params.cnv_params.src_height = acrtc->base.mode.crtc_vdisplay; + wb_info->dwb_params.dest_width = acrtc->base.mode.crtc_hdisplay; + wb_info->dwb_params.dest_height = acrtc->base.mode.crtc_vdisplay; + + wb_info->dwb_params.cnv_params.crop_en = false; + wb_info->dwb_params.stereo_params.stereo_enabled = false; + + wb_info->dwb_params.cnv_params.out_max_pix_val = 0x3ff; // 10 bits + wb_info->dwb_params.cnv_params.out_min_pix_val = 0; + wb_info->dwb_params.cnv_params.fc_out_format = DWB_OUT_FORMAT_32BPP_ARGB; + wb_info->dwb_params.cnv_params.out_denorm_mode = DWB_OUT_DENORM_BYPASS; + + wb_info->dwb_params.out_format = dwb_scaler_mode_bypass444; + + wb_info->dwb_params.capture_rate = dwb_capture_rate_0; + + wb_info->dwb_params.scaler_taps.h_taps = 4; + wb_info->dwb_params.scaler_taps.v_taps = 4; + wb_info->dwb_params.scaler_taps.h_taps_c = 2; + wb_info->dwb_params.scaler_taps.v_taps_c = 2; + wb_info->dwb_params.subsample_position = DWB_INTERSTITIAL_SUBSAMPLING; + + wb_info->mcif_buf_params.luma_pitch = afb->base.pitches[0]; + wb_info->mcif_buf_params.chroma_pitch = afb->base.pitches[1]; + + for (i = 0; i < DWB_MCIF_BUF_COUNT; i++) { + wb_info->mcif_buf_params.luma_address[i] = afb->address; + wb_info->mcif_buf_params.chroma_address[i] = 0; + } + + wb_info->mcif_buf_params.p_vmid = 1; + wb_info->mcif_warmup_params.p_vmid = 1; + wb_info->writeback_source_plane = pipe->plane_state; + + dc_stream_add_writeback(dm->dc, crtc_state->stream, wb_info); + + acrtc->wb_pending = true; + acrtc->wb_conn = wb_conn; + drm_writeback_queue_job(wb_conn, new_con_state); +} + /** * amdgpu_dm_atomic_commit_tail() - AMDgpu DM's commit tail implementation. * @state: The atomic state to commit @@ -9119,6 +9257,27 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) amdgpu_dm_commit_planes(state, dev, dm, crtc, wait_for_vblank); } + /* Enable writeback */ + for_each_new_connector_in_state(state, connector, new_con_state, i) { + struct dm_connector_state *dm_new_con_state = to_dm_connector_state(new_con_state); + struct amdgpu_crtc *acrtc = to_amdgpu_crtc(dm_new_con_state->base.crtc); + + if (connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK) + continue; + + if (!new_con_state->writeback_job) + continue; + + new_crtc_state = NULL; + + if (acrtc) + new_crtc_state = drm_atomic_get_new_crtc_state(state, &acrtc->base); + + dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); + + dm_set_writeback(dm, dm_new_crtc_state, connector, new_con_state); + } + /* Update audio instances for each connector. */ amdgpu_dm_commit_audio(dev, state); -- cgit v1.2.3 From f772f902b28662188636faba88e2a10bdb08e128 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Fri, 1 Dec 2023 06:25:32 -0700 Subject: drm/amd/display: Add writeback enable/disable in dc [WHAT] The enable and disable writeback calls need to be included in the coressponding functions in dc_stream. Tested-by: Daniel Wheeler Reviewed-by: Harry Wentland Signed-off-by: Alex Hung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_stream.c | 33 ++++++++++++++++++++++ .../drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c | 4 +++ 2 files changed, 37 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index 4bdf105d1d71..e71d622098d7 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -467,6 +467,25 @@ bool dc_stream_add_writeback(struct dc *dc, struct dwbc *dwb = dc->res_pool->dwbc[wb_info->dwb_pipe_inst]; dwb->otg_inst = stream_status->primary_otg_inst; } + + if (!dc->hwss.update_bandwidth(dc, dc->current_state)) { + dm_error("DC: update_bandwidth failed!\n"); + return false; + } + + /* enable writeback */ + if (dc->hwss.enable_writeback) { + struct dwbc *dwb = dc->res_pool->dwbc[wb_info->dwb_pipe_inst]; + + if (dwb->funcs->is_enabled(dwb)) { + /* writeback pipe already enabled, only need to update */ + dc->hwss.update_writeback(dc, wb_info, dc->current_state); + } else { + /* Enable writeback pipe from scratch*/ + dc->hwss.enable_writeback(dc, wb_info, dc->current_state); + } + } + return true; } @@ -511,6 +530,20 @@ bool dc_stream_remove_writeback(struct dc *dc, } stream->num_wb_info = j; + /* recalculate and apply DML parameters */ + if (!dc->hwss.update_bandwidth(dc, dc->current_state)) { + dm_error("DC: update_bandwidth failed!\n"); + return false; + } + + /* disable writeback */ + if (dc->hwss.disable_writeback) { + struct dwbc *dwb = dc->res_pool->dwbc[dwb_pipe_inst]; + + if (dwb->funcs->is_enabled(dwb)) + dc->hwss.disable_writeback(dc, dwb_pipe_inst); + } + return true; } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c index d71faf2ecd41..fd8a8c10a201 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c @@ -367,6 +367,10 @@ void dcn30_enable_writeback( DC_LOG_DWB("%s dwb_pipe_inst = %d, mpcc_inst = %d",\ __func__, wb_info->dwb_pipe_inst,\ wb_info->mpcc_inst); + + /* Warmup interface */ + dcn30_mmhubbub_warmup(dc, 1, wb_info); + /* Update writeback pipe */ dcn30_set_writeback(dc, wb_info, context); -- cgit v1.2.3 From c09919e6ea5fefd49d8b7b54aa5b222937163108 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Fri, 1 Dec 2023 06:25:33 -0700 Subject: drm/amd/display: Fix writeback_info never got updated [WHY] wb_enabled field is set to false before it is used, and the following code will never be executed. [HOW] Setting wb_enable to false after all removal work is completed. Tested-by: Daniel Wheeler Reviewed-by: Harry Wentland Signed-off-by: Alex Hung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_stream.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index e71d622098d7..8e7bc4a30f27 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -509,18 +509,13 @@ bool dc_stream_remove_writeback(struct dc *dc, return false; } -// stream->writeback_info[dwb_pipe_inst].wb_enabled = false; - for (i = 0; i < stream->num_wb_info; i++) { - /*dynamic update*/ - if (stream->writeback_info[i].wb_enabled && - stream->writeback_info[i].dwb_pipe_inst == dwb_pipe_inst) { - stream->writeback_info[i].wb_enabled = false; - } - } - /* remove writeback info for disabled writeback pipes from stream */ for (i = 0, j = 0; i < stream->num_wb_info; i++) { if (stream->writeback_info[i].wb_enabled) { + + if (stream->writeback_info[i].dwb_pipe_inst == dwb_pipe_inst) + stream->writeback_info[i].wb_enabled = false; + if (j < i) /* trim the array */ memcpy(&stream->writeback_info[j], &stream->writeback_info[i], -- cgit v1.2.3 From 86ecd796a88e26e025f184ff6a2e8872a6dc9ac7 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Fri, 1 Dec 2023 06:25:34 -0700 Subject: drm/amd/display: Validate hw_points_num before using it [WHAT] hw_points_num is 0 before ogam LUT is programmed; however, function "dwb3_program_ogam_pwl" assumes hw_points_num is always greater than 0, i.e. substracting it by 1 as an array index. [HOW] Check hw_points_num is not equal to 0 before using it. Tested-by: Daniel Wheeler Reviewed-by: Harry Wentland Signed-off-by: Alex Hung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c index 701c7d8bc038..03a50c32fcfe 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c @@ -243,6 +243,9 @@ static bool dwb3_program_ogam_lut( return false; } + if (params->hw_points_num == 0) + return false; + REG_SET(DWB_OGAM_CONTROL, 0, DWB_OGAM_MODE, 2); current_mode = dwb3_get_ogam_current(dwbc30); -- cgit v1.2.3 From ab37b88ed9de9de8d582683f7ea17059f1251a7f Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Fri, 1 Dec 2023 06:25:35 -0700 Subject: drm/amd/display: Fix writeback_info is not removed [WHY] Counter j was not updated to present the num of writeback_info when writeback pipes are removed. [HOW] update j (num of writeback info) under the correct condition. Tested-by: Daniel Wheeler Reviewed-by: Harry Wentland Signed-off-by: Alex Hung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_stream.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index 8e7bc4a30f27..3dd9f4ab2261 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -516,11 +516,12 @@ bool dc_stream_remove_writeback(struct dc *dc, if (stream->writeback_info[i].dwb_pipe_inst == dwb_pipe_inst) stream->writeback_info[i].wb_enabled = false; - if (j < i) - /* trim the array */ + /* trim the array */ + if (j < i) { memcpy(&stream->writeback_info[j], &stream->writeback_info[i], sizeof(struct dc_writeback_info)); - j++; + j++; + } } } stream->num_wb_info = j; -- cgit v1.2.3 From f872e2f5f0beabd34c03799a5c597f6ba47b51cc Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Fri, 1 Dec 2023 06:25:36 -0700 Subject: drm/amd/display: Add writeback enable field (wb_enabled) [WHAT] Add a new field to keep track whether a crtc is previously writeback-enabled. Tested-by: Daniel Wheeler Reviewed-by: Harry Wentland Signed-off-by: Alex Hung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | 1 + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 8 ++++++++ 2 files changed, 9 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index a166d7684719..d8083972e393 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -418,6 +418,7 @@ struct amdgpu_crtc { struct drm_pending_vblank_event *event; bool wb_pending; + bool wb_enabled; struct drm_writeback_connector *wb_conn; }; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 311f815452e0..b45b248808f7 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -8693,9 +8693,13 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state, if (acrtc) old_crtc_state = drm_atomic_get_old_crtc_state(state, &acrtc->base); + if (!acrtc->wb_enabled) + continue; + dm_old_crtc_state = to_dm_crtc_state(old_crtc_state); dm_clear_writeback(dm, dm_old_crtc_state); + acrtc->wb_enabled = false; } for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, @@ -9273,9 +9277,13 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) if (acrtc) new_crtc_state = drm_atomic_get_new_crtc_state(state, &acrtc->base); + if (acrtc->wb_enabled) + continue; + dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); dm_set_writeback(dm, dm_new_crtc_state, connector, new_con_state); + acrtc->wb_enabled = true; } /* Update audio instances for each connector. */ -- cgit v1.2.3 From fdf43d25e38f9e6e6a3cdb15335c198fb6d5dcb9 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Fri, 1 Dec 2023 06:25:37 -0700 Subject: drm/amd/display: Setup for mmhubbub3_warmup_mcif with big buffer [WHY] Hardware may require different warmup approaches - big buffer or individual buffers. [HOW] Setup warmup for big buffer when it is required by specific hardware. Tested-by: Daniel Wheeler Reviewed-by: Harry Wentland Signed-off-by: Alex Hung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index b45b248808f7..7e5e4be99b60 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -8844,6 +8844,7 @@ static void dm_set_writeback(struct amdgpu_display_manager *dm, struct drm_connector_state *new_con_state) { struct drm_writeback_connector *wb_conn = drm_connector_to_writeback(connector); + struct amdgpu_device *adev = dm->adev; struct amdgpu_crtc *acrtc; struct dc_writeback_info *wb_info; struct pipe_ctx *pipe = NULL; @@ -8919,6 +8920,11 @@ static void dm_set_writeback(struct amdgpu_display_manager *dm, } wb_info->mcif_buf_params.p_vmid = 1; + if (adev->ip_versions[DCE_HWIP][0] >= IP_VERSION(3, 0, 0)) { + wb_info->mcif_warmup_params.start_address.quad_part = afb->address; + wb_info->mcif_warmup_params.region_size = + wb_info->mcif_buf_params.luma_pitch * wb_info->dwb_params.dest_height; + } wb_info->mcif_warmup_params.p_vmid = 1; wb_info->writeback_source_plane = pipe->plane_state; -- cgit v1.2.3 From 50ad10cba6cd1c7f0ac9049f2c2c6b7589b510d0 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Fri, 1 Dec 2023 06:25:38 -0700 Subject: drm/amd/display: Add new set_fc_enable to struct dwbc_funcs [WHAT] Add a function to enable and disable DWB's frame captures. Tested-by: Daniel Wheeler Reviewed-by: Harry Wentland Signed-off-by: Alex Hung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.c | 23 +++++++++++++++++++++++ drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.h | 2 ++ drivers/gpu/drm/amd/display/dc/inc/hw/dwb.h | 4 ++++ 3 files changed, 29 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.c index 0d98918bf0fc..1b9d9495f76d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.c @@ -130,6 +130,28 @@ bool dwb3_disable(struct dwbc *dwbc) return true; } +void dwb3_set_fc_enable(struct dwbc *dwbc, enum dwb_frame_capture_enable enable) +{ + struct dcn30_dwbc *dwbc30 = TO_DCN30_DWBC(dwbc); + unsigned int pre_locked; + + REG_GET(DWB_UPDATE_CTRL, DWB_UPDATE_LOCK, &pre_locked); + + /* Lock DWB registers */ + if (pre_locked == 0) + REG_UPDATE(DWB_UPDATE_CTRL, DWB_UPDATE_LOCK, 1); + + /* Disable FC */ + REG_UPDATE(FC_MODE_CTRL, FC_FRAME_CAPTURE_EN, enable); + + /* Unlock DWB registers */ + if (pre_locked == 0) + REG_UPDATE(DWB_UPDATE_CTRL, DWB_UPDATE_LOCK, 0); + + DC_LOG_DWB("%s dwb3_fc_disabled at inst = %d", __func__, dwbc->inst); +} + + bool dwb3_update(struct dwbc *dwbc, struct dc_dwb_params *params) { struct dcn30_dwbc *dwbc30 = TO_DCN30_DWBC(dwbc); @@ -226,6 +248,7 @@ static const struct dwbc_funcs dcn30_dwbc_funcs = { .disable = dwb3_disable, .update = dwb3_update, .is_enabled = dwb3_is_enabled, + .set_fc_enable = dwb3_set_fc_enable, .set_stereo = dwb3_set_stereo, .set_new_content = dwb3_set_new_content, .dwb_program_output_csc = NULL, diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.h index a5d1b81e768d..332634b76aac 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.h +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.h @@ -877,6 +877,8 @@ bool dwb3_update(struct dwbc *dwbc, struct dc_dwb_params *params); bool dwb3_is_enabled(struct dwbc *dwbc); +void dwb3_set_fc_enable(struct dwbc *dwbc, enum dwb_frame_capture_enable enable); + void dwb3_set_stereo(struct dwbc *dwbc, struct dwb_stereo_params *stereo_params); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dwb.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dwb.h index 86b711dcc785..729ca0064e94 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dwb.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dwb.h @@ -188,6 +188,10 @@ struct dwbc_funcs { bool (*is_enabled)( struct dwbc *dwbc); + void (*set_fc_enable)( + struct dwbc *dwbc, + enum dwb_frame_capture_enable enable); + void (*set_stereo)( struct dwbc *dwbc, struct dwb_stereo_params *stereo_params); -- cgit v1.2.3 From 87ce0e62694115cfe4210a17c269d6855d2a139b Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Fri, 1 Dec 2023 06:25:39 -0700 Subject: drm/amd/display: Disable DWB frame capture to emulate oneshot [WHY] drm_writeback requires to capture exact one frame in each writeback call. [HOW] frame_capture is disabled after each writeback is completed. Tested-by: Daniel Wheeler Reviewed-by: Harry Wentland Signed-off-by: Alex Hung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 14 +++++++++++- drivers/gpu/drm/amd/display/dc/core/dc_stream.c | 27 +++++++++++++++++++++++ drivers/gpu/drm/amd/display/dc/dc_stream.h | 4 ++++ 3 files changed, 44 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 7e5e4be99b60..6aabe8a3ffef 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -595,8 +595,20 @@ static void dm_crtc_high_irq(void *interrupt_params) list_entry); spin_unlock_irqrestore(&acrtc->wb_conn->job_lock, flags); - if (job) + if (job) { + unsigned int v_total, refresh_hz; + struct dc_stream_state *stream = acrtc->dm_irq_params.stream; + + v_total = stream->adjust.v_total_max ? + stream->adjust.v_total_max : stream->timing.v_total; + refresh_hz = div_u64((uint64_t) stream->timing.pix_clk_100hz * + 100LL, (v_total * stream->timing.h_total)); + mdelay(1000 / refresh_hz); + drm_writeback_signal_completion(acrtc->wb_conn, 0); + dc_stream_fc_disable_writeback(adev->dm.dc, + acrtc->dm_irq_params.stream, 0); + } } else DRM_ERROR("%s: no amdgpu_crtc wb_conn\n", __func__); acrtc->wb_pending = false; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index 3dd9f4ab2261..38cd29b210c0 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -489,6 +489,33 @@ bool dc_stream_add_writeback(struct dc *dc, return true; } +bool dc_stream_fc_disable_writeback(struct dc *dc, + struct dc_stream_state *stream, + uint32_t dwb_pipe_inst) +{ + struct dwbc *dwb = dc->res_pool->dwbc[dwb_pipe_inst]; + + if (stream == NULL) { + dm_error("DC: dc_stream is NULL!\n"); + return false; + } + + if (dwb_pipe_inst >= MAX_DWB_PIPES) { + dm_error("DC: writeback pipe is invalid!\n"); + return false; + } + + if (stream->num_wb_info > MAX_DWB_PIPES) { + dm_error("DC: num_wb_info is invalid!\n"); + return false; + } + + if (dwb->funcs->set_fc_enable) + dwb->funcs->set_fc_enable(dwb, DWB_FRAME_CAPTURE_DISABLE); + + return true; +} + bool dc_stream_remove_writeback(struct dc *dc, struct dc_stream_state *stream, uint32_t dwb_pipe_inst) diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index e61eea6db29c..4ac48c346a33 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -454,6 +454,10 @@ bool dc_stream_add_writeback(struct dc *dc, struct dc_stream_state *stream, struct dc_writeback_info *wb_info); +bool dc_stream_fc_disable_writeback(struct dc *dc, + struct dc_stream_state *stream, + uint32_t dwb_pipe_inst); + bool dc_stream_remove_writeback(struct dc *dc, struct dc_stream_state *stream, uint32_t dwb_pipe_inst); -- cgit v1.2.3 From 514a1cc940c264007805c02173dd5490c0a59f48 Mon Sep 17 00:00:00 2001 From: Roman Li Date: Fri, 1 Dec 2023 06:25:40 -0700 Subject: drm/amd/display: Fix array-index-out-of-bounds in dml2 [Why] UBSAN errors observed in dmesg. array-index-out-of-bounds in dml2/display_mode_core.c [How] Fix the index. Tested-by: Daniel Wheeler Acked-by: Rodrigo Siqueira Signed-off-by: Roman Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c index 59718ee33e51..4d1336e5afc2 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c @@ -9447,12 +9447,12 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc // Output CalculateWatermarks_params->Watermark = &s->dummy_watermark; // Watermarks *Watermark - CalculateWatermarks_params->DRAMClockChangeSupport = &mode_lib->ms.support.DRAMClockChangeSupport[j]; + CalculateWatermarks_params->DRAMClockChangeSupport = &mode_lib->ms.support.DRAMClockChangeSupport[0]; CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = &s->dummy_single_array[0][0]; // dml_float_t *MaxActiveDRAMClockChangeLatencySupported[] CalculateWatermarks_params->SubViewportLinesNeededInMALL = &mode_lib->ms.SubViewportLinesNeededInMALL[j]; // dml_uint_t SubViewportLinesNeededInMALL[] - CalculateWatermarks_params->FCLKChangeSupport = &mode_lib->ms.support.FCLKChangeSupport[j]; + CalculateWatermarks_params->FCLKChangeSupport = &mode_lib->ms.support.FCLKChangeSupport[0]; CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &s->dummy_single[0]; // dml_float_t *MaxActiveFCLKChangeLatencySupported - CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->ms.support.USRRetrainingSupport[j]; + CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->ms.support.USRRetrainingSupport[0]; CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( &mode_lib->scratch, -- cgit v1.2.3 From d3e78612e949e16088b6ee83647b28499c24954d Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Fri, 1 Dec 2023 06:25:41 -0700 Subject: drm/amd/display: Adjust code style This simple commit adjusts part of the code style in some of the dc bios files. Tested-by: Daniel Wheeler Reviewed-by: Aurabindo Pillai Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c | 63 +++++++++++----------- 1 file changed, 30 insertions(+), 33 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index 971473c69b32..aef964f1bcbe 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -2215,22 +2215,22 @@ static enum bp_result bios_parser_get_disp_connector_caps_info( switch (bp->object_info_tbl.revision.minor) { case 4: - default: - object = get_bios_object(bp, object_id); - - if (!object) - return BP_RESULT_BADINPUT; - - record = get_disp_connector_caps_record(bp, object); - if (!record) - return BP_RESULT_NORECORD; - - info->INTERNAL_DISPLAY = - (record->connectcaps & ATOM_CONNECTOR_CAP_INTERNAL_DISPLAY) ? 1 : 0; - info->INTERNAL_DISPLAY_BL = - (record->connectcaps & ATOM_CONNECTOR_CAP_INTERNAL_DISPLAY_BL) ? 1 : 0; - break; - case 5: + default: + object = get_bios_object(bp, object_id); + + if (!object) + return BP_RESULT_BADINPUT; + + record = get_disp_connector_caps_record(bp, object); + if (!record) + return BP_RESULT_NORECORD; + + info->INTERNAL_DISPLAY = + (record->connectcaps & ATOM_CONNECTOR_CAP_INTERNAL_DISPLAY) ? 1 : 0; + info->INTERNAL_DISPLAY_BL = + (record->connectcaps & ATOM_CONNECTOR_CAP_INTERNAL_DISPLAY_BL) ? 1 : 0; + break; + case 5: object_path_v3 = get_bios_object_from_path_v3(bp, object_id); if (!object_path_v3) @@ -3330,27 +3330,28 @@ static enum bp_result get_bracket_layout_record( DC_LOG_DETECTION_EDID_PARSER("Invalid slot_layout_info\n"); return BP_RESULT_BADINPUT; } + tbl = &bp->object_info_tbl; v1_4 = tbl->v1_4; v1_5 = tbl->v1_5; result = BP_RESULT_NORECORD; switch (bp->object_info_tbl.revision.minor) { - case 4: - default: - for (i = 0; i < v1_4->number_of_path; ++i) { - if (bracket_layout_id == - v1_4->display_path[i].display_objid) { - result = update_slot_layout_info(dcb, i, slot_layout_info); - break; - } + case 4: + default: + for (i = 0; i < v1_4->number_of_path; ++i) { + if (bracket_layout_id == v1_4->display_path[i].display_objid) { + result = update_slot_layout_info(dcb, i, slot_layout_info); + break; } - break; - case 5: - for (i = 0; i < v1_5->number_of_path; ++i) - result = update_slot_layout_info_v2(dcb, i, slot_layout_info); - break; + } + break; + case 5: + for (i = 0; i < v1_5->number_of_path; ++i) + result = update_slot_layout_info_v2(dcb, i, slot_layout_info); + break; } + return result; } @@ -3359,9 +3360,7 @@ static enum bp_result bios_get_board_layout_info( struct board_layout_info *board_layout_info) { unsigned int i; - struct bios_parser *bp; - static enum bp_result record_result; unsigned int max_slots; @@ -3371,7 +3370,6 @@ static enum bp_result bios_get_board_layout_info( 0, 0 }; - bp = BP_FROM_DCB(dcb); if (board_layout_info == NULL) { @@ -3552,7 +3550,6 @@ static const struct dc_vbios_funcs vbios_funcs = { .bios_parser_destroy = firmware_parser_destroy, .get_board_layout_info = bios_get_board_layout_info, - /* TODO: use this fn in hw init?*/ .pack_data_tables = bios_parser_pack_data_tables, .get_atom_dc_golden_table = bios_get_atom_dc_golden_table, -- cgit v1.2.3 From 107d678f6aecb4421975a25127b6bf521504b39e Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Fri, 1 Dec 2023 06:25:42 -0700 Subject: drm/amd/display: Update code comment to be more accurate Tested-by: Daniel Wheeler Reviewed-by: Aurabindo Pillai Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 102d00a9a24f..9d3925603979 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1521,7 +1521,7 @@ static void program_timing_sync( } - /* remove any other pipes that are already been synced */ + /* remove any other unblanked pipes as they have already been synced */ if (dc->config.use_pipe_ctx_sync_logic) { /* check pipe's syncd to decide which pipe to be removed */ for (j = 1; j < group_size; j++) { @@ -1534,6 +1534,7 @@ static void program_timing_sync( pipe_set[j]->pipe_idx_syncd = pipe_set[0]->pipe_idx_syncd; } } else { + /* remove any other pipes by checking valid plane */ for (j = j + 1; j < group_size; j++) { bool is_blanked; -- cgit v1.2.3 From c0c22ed7c9fd6e6d50f61ed7347e60342e958e6f Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Fri, 1 Dec 2023 06:25:43 -0700 Subject: drm/amd/display: 3.2.263 This version brings along following fixes: * Enable writeback. * Add multiple fixes for DML2 and DCN35. * Introduce small code style adjustments. Acked-by: Rodrigo Siqueira Signed-off-by: Aric Cyr Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 6ee4db8c0548..61d08349a0d7 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -49,7 +49,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.262" +#define DC_VER "3.2.263" #define MAX_SURFACES 3 #define MAX_PLANES 6 -- cgit v1.2.3 From 24d29d5b189590497947510046eb685e5e2452b6 Mon Sep 17 00:00:00 2001 From: Li Ma Date: Tue, 14 Nov 2023 16:17:51 +0800 Subject: drm/amd/swsmu: update smu v14_0_0 driver if version and metrics table Increment the driver if version and add new mems to the mertics table. Signed-off-by: Li Ma Reviewed-by: Yifan Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/include/kgd_pp_interface.h | 17 +++++ drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 10 +++ .../pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h | 77 +++++++++++++--------- .../gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c | 46 ++++++++++++- 4 files changed, 115 insertions(+), 35 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 8ebba87f4289..36a5ad8c00c5 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -1086,6 +1086,10 @@ struct gpu_metrics_v3_0 { uint16_t average_dram_reads; /* time filtered DRAM write bandwidth [MB/sec] */ uint16_t average_dram_writes; + /* time filtered IPU read bandwidth [MB/sec] */ + uint16_t average_ipu_reads; + /* time filtered IPU write bandwidth [MB/sec] */ + uint16_t average_ipu_writes; /* Driver attached timestamp (in ns) */ uint64_t system_clock_counter; @@ -1105,6 +1109,8 @@ struct gpu_metrics_v3_0 { uint32_t average_all_core_power; /* calculated core power [mW] */ uint16_t average_core_power[16]; + /* time filtered total system power [mW] */ + uint16_t average_sys_power; /* maximum IRM defined STAPM power limit [mW] */ uint16_t stapm_power_limit; /* time filtered STAPM power limit [mW] */ @@ -1117,6 +1123,8 @@ struct gpu_metrics_v3_0 { uint16_t average_ipuclk_frequency; uint16_t average_fclk_frequency; uint16_t average_vclk_frequency; + uint16_t average_uclk_frequency; + uint16_t average_mpipu_frequency; /* Current clocks */ /* target core frequency [MHz] */ @@ -1126,6 +1134,15 @@ struct gpu_metrics_v3_0 { /* GFXCLK frequency limit enforced on GFX [MHz] */ uint16_t current_gfx_maxfreq; + /* Throttle Residency (ASIC dependent) */ + uint32_t throttle_residency_prochot; + uint32_t throttle_residency_spl; + uint32_t throttle_residency_fppt; + uint32_t throttle_residency_sppt; + uint32_t throttle_residency_thm_core; + uint32_t throttle_residency_thm_gfx; + uint32_t throttle_residency_thm_soc; + /* Metrics table alpha filter time constant [us] */ uint32_t time_filter_alphavalue; }; diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index c125253df20b..c2265e027ca8 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -1418,6 +1418,16 @@ typedef enum { METRICS_PCIE_WIDTH, METRICS_CURR_FANPWM, METRICS_CURR_SOCKETPOWER, + METRICS_AVERAGE_VPECLK, + METRICS_AVERAGE_IPUCLK, + METRICS_AVERAGE_MPIPUCLK, + METRICS_THROTTLER_RESIDENCY_PROCHOT, + METRICS_THROTTLER_RESIDENCY_SPL, + METRICS_THROTTLER_RESIDENCY_FPPT, + METRICS_THROTTLER_RESIDENCY_SPPT, + METRICS_THROTTLER_RESIDENCY_THM_CORE, + METRICS_THROTTLER_RESIDENCY_THM_GFX, + METRICS_THROTTLER_RESIDENCY_THM_SOC, } MetricsMember_t; enum smu_cmn2asic_mapping_type { diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h index 22f88842a7fd..8f42771e1f0a 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h @@ -27,7 +27,7 @@ // *** IMPORTANT *** // SMU TEAM: Always increment the interface version if // any structure is changed in this file -#define PMFW_DRIVER_IF_VERSION 6 +#define PMFW_DRIVER_IF_VERSION 7 typedef struct { int32_t value; @@ -150,37 +150,50 @@ typedef struct { } DpmClocks_t; typedef struct { - uint16_t CoreFrequency[16]; //Target core frequency [MHz] - uint16_t CorePower[16]; //CAC calculated core power [mW] - uint16_t CoreTemperature[16]; //TSEN measured core temperature [centi-C] - uint16_t GfxTemperature; //TSEN measured GFX temperature [centi-C] - uint16_t SocTemperature; //TSEN measured SOC temperature [centi-C] - uint16_t StapmOpnLimit; //Maximum IRM defined STAPM power limit [mW] - uint16_t StapmCurrentLimit; //Time filtered STAPM power limit [mW] - uint16_t InfrastructureCpuMaxFreq; //CCLK frequency limit enforced on classic cores [MHz] - uint16_t InfrastructureGfxMaxFreq; //GFXCLK frequency limit enforced on GFX [MHz] - uint16_t SkinTemp; //Maximum skin temperature reported by APU and HS2 chassis sensors [centi-C] - uint16_t GfxclkFrequency; //Time filtered target GFXCLK frequency [MHz] - uint16_t FclkFrequency; //Time filtered target FCLK frequency [MHz] - uint16_t GfxActivity; //Time filtered GFX busy % [0-100] - uint16_t SocclkFrequency; //Time filtered target SOCCLK frequency [MHz] - uint16_t VclkFrequency; //Time filtered target VCLK frequency [MHz] - uint16_t VcnActivity; //Time filtered VCN busy % [0-100] - uint16_t VpeclkFrequency; //Time filtered target VPECLK frequency [MHz] - uint16_t IpuclkFrequency; //Time filtered target IPUCLK frequency [MHz] - uint16_t IpuBusy[8]; //Time filtered IPU per-column busy % [0-100] - uint16_t DRAMReads; //Time filtered DRAM read bandwidth [MB/sec] - uint16_t DRAMWrites; //Time filtered DRAM write bandwidth [MB/sec] - uint16_t CoreC0Residency[16]; //Time filtered per-core C0 residency % [0-100] - uint16_t IpuPower; //Time filtered IPU power [mW] - uint32_t ApuPower; //Time filtered APU power [mW] - uint32_t GfxPower; //Time filtered GFX power [mW] - uint32_t dGpuPower; //Time filtered dGPU power [mW] - uint32_t SocketPower; //Time filtered power used for PPT/STAPM [APU+dGPU] [mW] - uint32_t AllCorePower; //Time filtered sum of core power across all cores in the socket [mW] - uint32_t FilterAlphaValue; //Metrics table alpha filter time constant [us] - uint32_t MetricsCounter; //Counter that is incremented on every metrics table update [PM_TIMER cycles] - uint32_t spare[16]; + uint16_t CoreFrequency[16]; //Target core frequency [MHz] + uint16_t CorePower[16]; //CAC calculated core power [mW] + uint16_t CoreTemperature[16]; //TSEN measured core temperature [centi-C] + uint16_t GfxTemperature; //TSEN measured GFX temperature [centi-C] + uint16_t SocTemperature; //TSEN measured SOC temperature [centi-C] + uint16_t StapmOpnLimit; //Maximum IRM defined STAPM power limit [mW] + uint16_t StapmCurrentLimit; //Time filtered STAPM power limit [mW] + uint16_t InfrastructureCpuMaxFreq; //CCLK frequency limit enforced on classic cores [MHz] + uint16_t InfrastructureGfxMaxFreq; //GFXCLK frequency limit enforced on GFX [MHz] + uint16_t SkinTemp; //Maximum skin temperature reported by APU and HS2 chassis sensors [centi-C] + uint16_t GfxclkFrequency; //Time filtered target GFXCLK frequency [MHz] + uint16_t FclkFrequency; //Time filtered target FCLK frequency [MHz] + uint16_t GfxActivity; //Time filtered GFX busy % [0-100] + uint16_t SocclkFrequency; //Time filtered target SOCCLK frequency [MHz] + uint16_t VclkFrequency; //Time filtered target VCLK frequency [MHz] + uint16_t VcnActivity; //Time filtered VCN busy % [0-100] + uint16_t VpeclkFrequency; //Time filtered target VPECLK frequency [MHz] + uint16_t IpuclkFrequency; //Time filtered target IPUCLK frequency [MHz] + uint16_t IpuBusy[8]; //Time filtered IPU per-column busy % [0-100] + uint16_t DRAMReads; //Time filtered DRAM read bandwidth [MB/sec] + uint16_t DRAMWrites; //Time filtered DRAM write bandwidth [MB/sec] + uint16_t CoreC0Residency[16]; //Time filtered per-core C0 residency % [0-100] + uint16_t IpuPower; //Time filtered IPU power [mW] + uint32_t ApuPower; //Time filtered APU power [mW] + uint32_t GfxPower; //Time filtered GFX power [mW] + uint32_t dGpuPower; //Time filtered dGPU power [mW] + uint32_t SocketPower; //Time filtered power used for PPT/STAPM [APU+dGPU] [mW] + uint32_t AllCorePower; //Time filtered sum of core power across all cores in the socket [mW] + uint32_t FilterAlphaValue; //Metrics table alpha filter time constant [us] + uint32_t MetricsCounter; //Counter that is incremented on every metrics table update [PM_TIMER cycles] + uint16_t MemclkFrequency; //Time filtered target MEMCLK frequency [MHz] + uint16_t MpipuclkFrequency; //Time filtered target MPIPUCLK frequency [MHz] + uint16_t IpuReads; //Time filtered IPU read bandwidth [MB/sec] + uint16_t IpuWrites; //Time filtered IPU write bandwidth [MB/sec] + uint32_t ThrottleResidency_PROCHOT; //Counter that is incremented on every metrics table update when PROCHOT was engaged [PM_TIMER cycles] + uint32_t ThrottleResidency_SPL; //Counter that is incremented on every metrics table update when SPL was engaged [PM_TIMER cycles] + uint32_t ThrottleResidency_FPPT; //Counter that is incremented on every metrics table update when fast PPT was engaged [PM_TIMER cycles] + uint32_t ThrottleResidency_SPPT; //Counter that is incremented on every metrics table update when slow PPT was engaged [PM_TIMER cycles] + uint32_t ThrottleResidency_THM_CORE; //Counter that is incremented on every metrics table update when CORE thermal throttling was engaged [PM_TIMER cycles] + uint32_t ThrottleResidency_THM_GFX; //Counter that is incremented on every metrics table update when GFX thermal throttling was engaged [PM_TIMER cycles] + uint32_t ThrottleResidency_THM_SOC; //Counter that is incremented on every metrics table update when SOC thermal throttling was engaged [PM_TIMER cycles] + uint16_t Psys; //Time filtered Psys power [mW] + uint16_t spare1; + uint32_t spare[6]; } SmuMetrics_t; //ISP tile definitions diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c index 03b38c3a9968..94ccdbfd7090 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c @@ -246,11 +246,20 @@ static int smu_v14_0_0_get_smu_metrics_data(struct smu_context *smu, *value = 0; break; case METRICS_AVERAGE_UCLK: - *value = 0; + *value = metrics->MemclkFrequency; break; case METRICS_AVERAGE_FCLK: *value = metrics->FclkFrequency; break; + case METRICS_AVERAGE_VPECLK: + *value = metrics->VpeclkFrequency; + break; + case METRICS_AVERAGE_IPUCLK: + *value = metrics->IpuclkFrequency; + break; + case METRICS_AVERAGE_MPIPUCLK: + *value = metrics->MpipuclkFrequency; + break; case METRICS_AVERAGE_GFXACTIVITY: *value = metrics->GfxActivity / 100; break; @@ -270,8 +279,26 @@ static int smu_v14_0_0_get_smu_metrics_data(struct smu_context *smu, *value = metrics->SocTemperature / 100 * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; break; - case METRICS_THROTTLER_STATUS: - *value = 0; + case METRICS_THROTTLER_RESIDENCY_PROCHOT: + *value = metrics->ThrottleResidency_PROCHOT; + break; + case METRICS_THROTTLER_RESIDENCY_SPL: + *value = metrics->ThrottleResidency_SPL; + break; + case METRICS_THROTTLER_RESIDENCY_FPPT: + *value = metrics->ThrottleResidency_FPPT; + break; + case METRICS_THROTTLER_RESIDENCY_SPPT: + *value = metrics->ThrottleResidency_SPPT; + break; + case METRICS_THROTTLER_RESIDENCY_THM_CORE: + *value = metrics->ThrottleResidency_THM_CORE; + break; + case METRICS_THROTTLER_RESIDENCY_THM_GFX: + *value = metrics->ThrottleResidency_THM_GFX; + break; + case METRICS_THROTTLER_RESIDENCY_THM_SOC: + *value = metrics->ThrottleResidency_THM_SOC; break; case METRICS_VOLTAGE_VDDGFX: *value = 0; @@ -498,6 +525,8 @@ static ssize_t smu_v14_0_0_get_gpu_metrics(struct smu_context *smu, sizeof(uint16_t) * 16); gpu_metrics->average_dram_reads = metrics.DRAMReads; gpu_metrics->average_dram_writes = metrics.DRAMWrites; + gpu_metrics->average_ipu_reads = metrics.IpuReads; + gpu_metrics->average_ipu_writes = metrics.IpuWrites; gpu_metrics->average_socket_power = metrics.SocketPower; gpu_metrics->average_ipu_power = metrics.IpuPower; @@ -505,6 +534,7 @@ static ssize_t smu_v14_0_0_get_gpu_metrics(struct smu_context *smu, gpu_metrics->average_gfx_power = metrics.GfxPower; gpu_metrics->average_dgpu_power = metrics.dGpuPower; gpu_metrics->average_all_core_power = metrics.AllCorePower; + gpu_metrics->average_sys_power = metrics.Psys; memcpy(&gpu_metrics->average_core_power[0], &metrics.CorePower[0], sizeof(uint16_t) * 16); @@ -515,6 +545,8 @@ static ssize_t smu_v14_0_0_get_gpu_metrics(struct smu_context *smu, gpu_metrics->average_fclk_frequency = metrics.FclkFrequency; gpu_metrics->average_vclk_frequency = metrics.VclkFrequency; gpu_metrics->average_ipuclk_frequency = metrics.IpuclkFrequency; + gpu_metrics->average_uclk_frequency = metrics.MemclkFrequency; + gpu_metrics->average_mpipu_frequency = metrics.MpipuclkFrequency; memcpy(&gpu_metrics->current_coreclk[0], &metrics.CoreFrequency[0], @@ -522,6 +554,14 @@ static ssize_t smu_v14_0_0_get_gpu_metrics(struct smu_context *smu, gpu_metrics->current_core_maxfreq = metrics.InfrastructureCpuMaxFreq; gpu_metrics->current_gfx_maxfreq = metrics.InfrastructureGfxMaxFreq; + gpu_metrics->throttle_residency_prochot = metrics.ThrottleResidency_PROCHOT; + gpu_metrics->throttle_residency_spl = metrics.ThrottleResidency_SPL; + gpu_metrics->throttle_residency_fppt = metrics.ThrottleResidency_FPPT; + gpu_metrics->throttle_residency_sppt = metrics.ThrottleResidency_SPPT; + gpu_metrics->throttle_residency_thm_core = metrics.ThrottleResidency_THM_CORE; + gpu_metrics->throttle_residency_thm_gfx = metrics.ThrottleResidency_THM_GFX; + gpu_metrics->throttle_residency_thm_soc = metrics.ThrottleResidency_THM_SOC; + gpu_metrics->time_filter_alphavalue = metrics.FilterAlphaValue; gpu_metrics->system_clock_counter = ktime_get_boottime_ns(); -- cgit v1.2.3 From 0d65efcbe350f1e9e96f24905df4929188e80d56 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Mon, 4 Dec 2023 10:17:57 +0800 Subject: drm/amd/pm: support new mca smu error code decoding support new mca smu error code decoding from smu 85.86.0 for smu v13.0.6 Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h | 2 ++ drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 9 ++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h index 2b488fcf2f95..e51e8918e667 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h @@ -46,6 +46,8 @@ #define MCA_REG__STATUS__ERRORCODEEXT(x) MCA_REG_FIELD(x, 21, 16) #define MCA_REG__STATUS__ERRORCODE(x) MCA_REG_FIELD(x, 15, 0) +#define MCA_REG__SYND__ERRORINFORMATION(x) MCA_REG_FIELD(x, 17, 0) + enum amdgpu_mca_ip { AMDGPU_MCA_IP_UNKNOW = -1, AMDGPU_MCA_IP_PSP = 0, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index dda2249c4994..ddd782fbee7a 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -2635,6 +2635,7 @@ static bool mca_gfx_smu_bank_is_valid(const struct mca_ras_info *mca_ras, struct static bool mca_smu_bank_is_valid(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, enum amdgpu_mca_error_type type, struct mca_bank_entry *entry) { + struct smu_context *smu = adev->powerplay.pp_handle; uint32_t errcode, instlo; instlo = REG_GET_FIELD(entry->regs[MCA_REG_IDX_IPID], MCMP1_IPIDT0, InstanceIdLo); @@ -2642,7 +2643,13 @@ static bool mca_smu_bank_is_valid(const struct mca_ras_info *mca_ras, struct amd if (instlo != 0x03b30400) return false; - errcode = REG_GET_FIELD(entry->regs[MCA_REG_IDX_STATUS], MCMP1_STATUST0, ErrorCode); + if (!(adev->flags & AMD_IS_APU) && smu->smc_fw_version >= 0x00555600) { + errcode = MCA_REG__SYND__ERRORINFORMATION(entry->regs[MCA_REG_IDX_SYND]); + errcode &= 0xff; + } else { + errcode = REG_GET_FIELD(entry->regs[MCA_REG_IDX_STATUS], MCMP1_STATUST0, ErrorCode); + } + return mca_smu_check_error_code(adev, mca_ras, errcode); } -- cgit v1.2.3 From 71a9d7a2a1034e72074335ac2aff4f9b8013ae6c Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Mon, 20 Nov 2023 10:43:02 +0800 Subject: drm/amdgpu: Update fw version for boot time error query Boot time error query is not available until fw a10109 Signed-off-by: Hawking Zhang Reviewed-by: Stanley Yang Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index 3cf4684d0d3f..5f46877f78cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -821,7 +821,7 @@ static int psp_v13_0_query_boot_status(struct psp_context *psp) if (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6)) return 0; - if (RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_59) < 0x00a10007) + if (RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_59) < 0x00a10109) return 0; for_each_inst(i, inst_mask) { -- cgit v1.2.3 From 04a71f110446eb6ffdaaa13407b4c1bf286db760 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Mon, 4 Dec 2023 10:44:32 +0800 Subject: drm/amdgpu: optimize the printing order of error data sort error data list to optimize the printing order. Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 72634d675e27..bacb59d8b701 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "amdgpu.h" #include "amdgpu_ras.h" @@ -3674,6 +3675,21 @@ static struct ras_err_node *amdgpu_ras_error_node_new(void) return err_node; } +static int ras_err_info_cmp(void *priv, const struct list_head *a, const struct list_head *b) +{ + struct ras_err_node *nodea = container_of(a, struct ras_err_node, node); + struct ras_err_node *nodeb = container_of(b, struct ras_err_node, node); + struct amdgpu_smuio_mcm_config_info *infoa = &nodea->err_info.mcm_info; + struct amdgpu_smuio_mcm_config_info *infob = &nodeb->err_info.mcm_info; + + if (unlikely(infoa->socket_id != infob->socket_id)) + return infoa->socket_id - infob->socket_id; + else + return infoa->die_id - infob->die_id; + + return 0; +} + static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_data, struct amdgpu_smuio_mcm_config_info *mcm_info) { @@ -3691,6 +3707,7 @@ static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_d err_data->err_list_count++; list_add_tail(&err_node->node, &err_data->err_node_list); + list_sort(NULL, &err_data->err_node_list, ras_err_info_cmp); return &err_node->err_info; } -- cgit v1.2.3 From 7a2464fac80d42f6f8819fed97a553e9c2f43310 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 30 Nov 2023 15:50:16 +0800 Subject: drm/radeon: check the alloc_workqueue return value in radeon_crtc_init() check the alloc_workqueue return value in radeon_crtc_init() to avoid null-ptr-deref. Fixes: fa7f517cb26e ("drm/radeon: rework page flip handling v4") Signed-off-by: Yang Yingliang Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_display.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 901e75ec70ff..efd18c8d84c8 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -687,11 +687,16 @@ static void radeon_crtc_init(struct drm_device *dev, int index) if (radeon_crtc == NULL) return; + radeon_crtc->flip_queue = alloc_workqueue("radeon-crtc", WQ_HIGHPRI, 0); + if (!radeon_crtc->flip_queue) { + kfree(radeon_crtc); + return; + } + drm_crtc_init(dev, &radeon_crtc->base, &radeon_crtc_funcs); drm_mode_crtc_set_gamma_size(&radeon_crtc->base, 256); radeon_crtc->crtc_id = index; - radeon_crtc->flip_queue = alloc_workqueue("radeon-crtc", WQ_HIGHPRI, 0); rdev->mode_info.crtcs[index] = radeon_crtc; if (rdev->family >= CHIP_BONAIRE) { -- cgit v1.2.3 From 72ada8603e36291ad91e4f40f10ef742ef79bc4e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 30 Nov 2023 17:34:07 -0500 Subject: drm/amd/display: Increase frame warning limit with KASAN or KCSAN in dml Does the same thing as: commit 6740ec97bcdb ("drm/amd/display: Increase frame warning limit with KASAN or KCSAN in dml2") Reviewed-by: Harry Wentland Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202311302107.hUDXVyWT-lkp@intel.com/ Fixes: 67e38874b85b ("drm/amd/display: Increase num voltage states to 40") Signed-off-by: Alex Deucher Cc: Alvin Lee Cc: Hamza Mahfooz Cc: Samson Tam Cc: Harry Wentland --- drivers/gpu/drm/amd/display/dc/dml/Makefile | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index ea7d60f9a9b4..6042a5a6a44f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -61,8 +61,12 @@ endif endif ifneq ($(CONFIG_FRAME_WARN),0) +ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y) +frame_warn_flag := -Wframe-larger-than=3072 +else frame_warn_flag := -Wframe-larger-than=2048 endif +endif CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_ccflags) -- cgit v1.2.3 From 13ac7c0e30e87e006cfad67ce4337268f65d4333 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Sat, 7 Oct 2023 16:53:24 +0530 Subject: drm/amdgpu: Read aquavanjaram WAFL register state Add support to read state of WAFL links in aquavanjaram SOC. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c | 76 ++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c index a00b8c6f0a94..eec15f1c143f 100644 --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c @@ -848,6 +848,79 @@ static ssize_t aqua_vanjaram_read_xgmi_state(struct amdgpu_device *adev, return xgmi_reg_state->common_header.structure_size; } +#define smnreg_0x11C00070 0x11C00070 +#define smnreg_0x11C00210 0x11C00210 + +static struct aqua_reg_list wafl_reg_addrs[] = { + { smnreg_0x11C00070, 4, DW_ADDR_INCR }, + { smnreg_0x11C00210, 1, 0 }, +}; + +#define WAFL_LINK_REG(smnreg, l) ((smnreg) | (l << 20)) + +#define NUM_WAFL_SMN_REGS 5 + +static ssize_t aqua_vanjaram_read_wafl_state(struct amdgpu_device *adev, + void *buf, size_t max_size) +{ + struct amdgpu_reg_state_wafl_v1_0 *wafl_reg_state; + uint32_t start_addr, incrx, num_regs, szbuf; + struct amdgpu_regs_wafl_v1_0 *wafl_regs; + struct amdgpu_smn_reg_data *reg_data; + const int max_wafl_instances = 8; + int inst = 0, i, j, r, n; + const int wafl_inst = 2; + void *p; + + if (!buf || !max_size) + return -EINVAL; + + wafl_reg_state = (struct amdgpu_reg_state_wafl_v1_0 *)buf; + + szbuf = sizeof(*wafl_reg_state) + + amdgpu_reginst_size(max_wafl_instances, sizeof(*wafl_regs), + NUM_WAFL_SMN_REGS); + + if (max_size < szbuf) + return -EOVERFLOW; + + p = &wafl_reg_state->wafl_state_regs[0]; + for_each_inst(i, adev->aid_mask) { + for (j = 0; j < wafl_inst; ++j) { + wafl_regs = (struct amdgpu_regs_wafl_v1_0 *)p; + wafl_regs->inst_header.instance = inst++; + + wafl_regs->inst_header.state = AMDGPU_INST_S_OK; + wafl_regs->inst_header.num_smn_regs = NUM_WAFL_SMN_REGS; + + reg_data = wafl_regs->smn_reg_values; + + for (r = 0; r < ARRAY_SIZE(wafl_reg_addrs); r++) { + start_addr = wafl_reg_addrs[r].start_addr; + incrx = wafl_reg_addrs[r].incrx; + num_regs = wafl_reg_addrs[r].num_regs; + for (n = 0; n < num_regs; n++) { + aqua_read_smn_ext( + adev, reg_data, + WAFL_LINK_REG(start_addr, j) + + n * incrx, + i); + ++reg_data; + } + } + p = reg_data; + } + } + + wafl_reg_state->common_header.structure_size = szbuf; + wafl_reg_state->common_header.format_revision = 1; + wafl_reg_state->common_header.content_revision = 0; + wafl_reg_state->common_header.state_type = AMDGPU_REG_STATE_TYPE_WAFL; + wafl_reg_state->common_header.num_instances = max_wafl_instances; + + return wafl_reg_state->common_header.structure_size; +} + ssize_t aqua_vanjaram_get_reg_state(struct amdgpu_device *adev, enum amdgpu_reg_state reg_state, void *buf, size_t max_size) @@ -861,6 +934,9 @@ ssize_t aqua_vanjaram_get_reg_state(struct amdgpu_device *adev, case AMDGPU_REG_STATE_TYPE_XGMI: size = aqua_vanjaram_read_xgmi_state(adev, buf, max_size); break; + case AMDGPU_REG_STATE_TYPE_WAFL: + size = aqua_vanjaram_read_wafl_state(adev, buf, max_size); + break; default: return -EINVAL; } -- cgit v1.2.3 From 650f0487d6cd95c4e07a41d3a464d0f60a983a15 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Sat, 7 Oct 2023 19:52:11 +0530 Subject: drm/amdgpu: Read aquavanjaram USR register state Add support to read state of USR links in aquavanjaram SOC. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c | 127 +++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c index eec15f1c143f..d6f808acfb17 100644 --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c @@ -921,6 +921,125 @@ static ssize_t aqua_vanjaram_read_wafl_state(struct amdgpu_device *adev, return wafl_reg_state->common_header.structure_size; } +#define smnreg_0x1B311060 0x1B311060 +#define smnreg_0x1B411060 0x1B411060 +#define smnreg_0x1B511060 0x1B511060 +#define smnreg_0x1B611060 0x1B611060 + +#define smnreg_0x1C307120 0x1C307120 +#define smnreg_0x1C317120 0x1C317120 + +#define smnreg_0x1C320830 0x1C320830 +#define smnreg_0x1C380830 0x1C380830 +#define smnreg_0x1C3D0830 0x1C3D0830 +#define smnreg_0x1C420830 0x1C420830 + +#define smnreg_0x1C320100 0x1C320100 +#define smnreg_0x1C380100 0x1C380100 +#define smnreg_0x1C3D0100 0x1C3D0100 +#define smnreg_0x1C420100 0x1C420100 + +#define smnreg_0x1B310500 0x1B310500 +#define smnreg_0x1C300400 0x1C300400 + +#define USR_CAKE_INCR 0x11000 +#define USR_LINK_INCR 0x100000 +#define USR_CP_INCR 0x10000 + +#define NUM_USR_SMN_REGS 20 + +struct aqua_reg_list usr_reg_addrs[] = { + { smnreg_0x1B311060, 4, DW_ADDR_INCR }, + { smnreg_0x1B411060, 4, DW_ADDR_INCR }, + { smnreg_0x1B511060, 4, DW_ADDR_INCR }, + { smnreg_0x1B611060, 4, DW_ADDR_INCR }, + { smnreg_0x1C307120, 2, DW_ADDR_INCR }, + { smnreg_0x1C317120, 2, DW_ADDR_INCR }, +}; + +#define NUM_USR1_SMN_REGS 46 +struct aqua_reg_list usr1_reg_addrs[] = { + { smnreg_0x1C320830, 6, USR_CAKE_INCR }, + { smnreg_0x1C380830, 5, USR_CAKE_INCR }, + { smnreg_0x1C3D0830, 5, USR_CAKE_INCR }, + { smnreg_0x1C420830, 4, USR_CAKE_INCR }, + { smnreg_0x1C320100, 6, USR_CAKE_INCR }, + { smnreg_0x1C380100, 5, USR_CAKE_INCR }, + { smnreg_0x1C3D0100, 5, USR_CAKE_INCR }, + { smnreg_0x1C420100, 4, USR_CAKE_INCR }, + { smnreg_0x1B310500, 4, USR_LINK_INCR }, + { smnreg_0x1C300400, 2, USR_CP_INCR }, +}; + +static ssize_t aqua_vanjaram_read_usr_state(struct amdgpu_device *adev, + void *buf, size_t max_size, + int reg_state) +{ + uint32_t start_addr, incrx, num_regs, szbuf, num_smn; + struct amdgpu_reg_state_usr_v1_0 *usr_reg_state; + struct amdgpu_regs_usr_v1_0 *usr_regs; + struct amdgpu_smn_reg_data *reg_data; + const int max_usr_instances = 4; + struct aqua_reg_list *reg_addrs; + int inst = 0, i, n, r, arr_size; + void *p; + + if (!buf || !max_size) + return -EINVAL; + + switch (reg_state) { + case AMDGPU_REG_STATE_TYPE_USR: + arr_size = ARRAY_SIZE(usr_reg_addrs); + reg_addrs = usr_reg_addrs; + num_smn = NUM_USR_SMN_REGS; + break; + case AMDGPU_REG_STATE_TYPE_USR_1: + arr_size = ARRAY_SIZE(usr1_reg_addrs); + reg_addrs = usr1_reg_addrs; + num_smn = NUM_USR1_SMN_REGS; + break; + default: + return -EINVAL; + } + + usr_reg_state = (struct amdgpu_reg_state_usr_v1_0 *)buf; + + szbuf = sizeof(*usr_reg_state) + amdgpu_reginst_size(max_usr_instances, + sizeof(*usr_regs), + num_smn); + if (max_size < szbuf) + return -EOVERFLOW; + + p = &usr_reg_state->usr_state_regs[0]; + for_each_inst(i, adev->aid_mask) { + usr_regs = (struct amdgpu_regs_usr_v1_0 *)p; + usr_regs->inst_header.instance = inst++; + usr_regs->inst_header.state = AMDGPU_INST_S_OK; + usr_regs->inst_header.num_smn_regs = num_smn; + reg_data = usr_regs->smn_reg_values; + + for (r = 0; r < arr_size; r++) { + start_addr = reg_addrs[r].start_addr; + incrx = reg_addrs[r].incrx; + num_regs = reg_addrs[r].num_regs; + for (n = 0; n < num_regs; n++) { + aqua_read_smn_ext(adev, reg_data, + start_addr + n * incrx, i); + reg_data++; + } + } + p = reg_data; + } + + usr_reg_state->common_header.structure_size = szbuf; + usr_reg_state->common_header.format_revision = 1; + usr_reg_state->common_header.content_revision = 0; + usr_reg_state->common_header.state_type = AMDGPU_REG_STATE_TYPE_USR; + usr_reg_state->common_header.num_instances = max_usr_instances; + + return usr_reg_state->common_header.structure_size; +} + ssize_t aqua_vanjaram_get_reg_state(struct amdgpu_device *adev, enum amdgpu_reg_state reg_state, void *buf, size_t max_size) @@ -937,6 +1056,14 @@ ssize_t aqua_vanjaram_get_reg_state(struct amdgpu_device *adev, case AMDGPU_REG_STATE_TYPE_WAFL: size = aqua_vanjaram_read_wafl_state(adev, buf, max_size); break; + case AMDGPU_REG_STATE_TYPE_USR: + size = aqua_vanjaram_read_usr_state(adev, buf, max_size, + AMDGPU_REG_STATE_TYPE_USR); + break; + case AMDGPU_REG_STATE_TYPE_USR_1: + size = aqua_vanjaram_read_usr_state( + adev, buf, max_size, AMDGPU_REG_STATE_TYPE_USR_1); + break; default: return -EINVAL; } -- cgit v1.2.3 From 3b35dd87c5969637ab5aa6666bbab6e6929c9e16 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Thu, 30 Nov 2023 16:53:21 -0500 Subject: drm/amd: Add a DC debug mask for DML2 [Why&How] To enable testing/development of DML2, expose a new debug mask for future use. Signed-off-by: Aurabindo Pillai Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/include/amd_shared.h | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index 7f98394338c2..bf7f258c324a 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -257,6 +257,7 @@ enum DC_DEBUG_MASK { DC_DISABLE_MPO = 0x40, DC_DISABLE_REPLAY = 0x50, DC_ENABLE_DPIA_TRACE = 0x80, + DC_ENABLE_DML2 = 0x100, }; enum amd_dpm_forced_level; -- cgit v1.2.3 From 4657b3e45683223b5d982ec13a6e2cd367004bb6 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Wed, 29 Nov 2023 18:06:55 +0530 Subject: drm/amdgpu: Restrict extended wait to PSP v13.0.6 Only PSPv13.0.6 SOCs take a longer time to reach steady state. Other PSPv13 based SOCs don't need extended wait. Also, reduce PSPv13.0.6 wait time. Cc: stable@vger.kernel.org Fixes: fc5988907156 ("drm/amdgpu: update retry times for psp vmbx wait") Fixes: d8c1925ba8cd ("drm/amdgpu: update retry times for psp BL wait") Link: https://lore.kernel.org/amd-gfx/34dd4c66-f7bf-44aa-af8f-c82889dd652c@amd.com/ Signed-off-by: Lijo Lazar Reviewed-by: Asad Kamal Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index 5f46877f78cf..df1844d0800f 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -60,7 +60,7 @@ MODULE_FIRMWARE("amdgpu/psp_14_0_0_ta.bin"); #define GFX_CMD_USB_PD_USE_LFB 0x480 /* Retry times for vmbx ready wait */ -#define PSP_VMBX_POLLING_LIMIT 20000 +#define PSP_VMBX_POLLING_LIMIT 3000 /* VBIOS gfl defines */ #define MBOX_READY_MASK 0x80000000 @@ -161,14 +161,18 @@ static int psp_v13_0_wait_for_vmbx_ready(struct psp_context *psp) static int psp_v13_0_wait_for_bootloader(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; - int retry_loop, ret; + int retry_loop, retry_cnt, ret; + retry_cnt = + (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6)) ? + PSP_VMBX_POLLING_LIMIT : + 10; /* Wait for bootloader to signify that it is ready having bit 31 of * C2PMSG_35 set to 1. All other bits are expected to be cleared. * If there is an error in processing command, bits[7:0] will be set. * This is applicable for PSP v13.0.6 and newer. */ - for (retry_loop = 0; retry_loop < PSP_VMBX_POLLING_LIMIT; retry_loop++) { + for (retry_loop = 0; retry_loop < retry_cnt; retry_loop++) { ret = psp_wait_for( psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35), 0x80000000, 0xffffffff, false); -- cgit v1.2.3 From 0737df9ed0997f5b8addd6e2b9699a8c6edba2e4 Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Mon, 4 Dec 2023 16:57:56 +0800 Subject: drm/radeon/dpm: fix a memleak in sumo_parse_power_table The rdev->pm.dpm.ps allocated by kcalloc should be freed in every following error-handling path. However, in the error-handling of rdev->pm.power_state[i].clock_info the rdev->pm.dpm.ps is not freed, resulting in a memleak in this function. Fixes: 80ea2c129c76 ("drm/radeon/kms: add dpm support for sumo asics (v2)") Signed-off-by: Zhipeng Lu Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/sumo_dpm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/radeon/sumo_dpm.c b/drivers/gpu/drm/radeon/sumo_dpm.c index f74f381af05f..d49c145db437 100644 --- a/drivers/gpu/drm/radeon/sumo_dpm.c +++ b/drivers/gpu/drm/radeon/sumo_dpm.c @@ -1493,8 +1493,10 @@ static int sumo_parse_power_table(struct radeon_device *rdev) non_clock_array_index = power_state->v2.nonClockInfoIndex; non_clock_info = (struct _ATOM_PPLIB_NONCLOCK_INFO *) &non_clock_info_array->nonClockInfo[non_clock_array_index]; - if (!rdev->pm.power_state[i].clock_info) + if (!rdev->pm.power_state[i].clock_info) { + kfree(rdev->pm.dpm.ps); return -EINVAL; + } ps = kzalloc(sizeof(struct sumo_ps), GFP_KERNEL); if (ps == NULL) { kfree(rdev->pm.dpm.ps); -- cgit v1.2.3 From 28c28d7f77c06ac2c0b8f9c82bc04eba22912b3b Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Mon, 4 Dec 2023 18:21:54 +0800 Subject: drm/radeon/trinity_dpm: fix a memleak in trinity_parse_power_table The rdev->pm.dpm.ps allocated by kcalloc should be freed in every following error-handling path. However, in the error-handling of rdev->pm.power_state[i].clock_info the rdev->pm.dpm.ps is not freed, resulting in a memleak in this function. Fixes: d70229f70447 ("drm/radeon/kms: add dpm support for trinity asics") Signed-off-by: Zhipeng Lu Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/trinity_dpm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/radeon/trinity_dpm.c b/drivers/gpu/drm/radeon/trinity_dpm.c index 08ea1c864cb2..ef1cc7bad20a 100644 --- a/drivers/gpu/drm/radeon/trinity_dpm.c +++ b/drivers/gpu/drm/radeon/trinity_dpm.c @@ -1726,8 +1726,10 @@ static int trinity_parse_power_table(struct radeon_device *rdev) non_clock_array_index = power_state->v2.nonClockInfoIndex; non_clock_info = (struct _ATOM_PPLIB_NONCLOCK_INFO *) &non_clock_info_array->nonClockInfo[non_clock_array_index]; - if (!rdev->pm.power_state[i].clock_info) + if (!rdev->pm.power_state[i].clock_info) { + kfree(rdev->pm.dpm.ps); return -EINVAL; + } ps = kzalloc(sizeof(struct sumo_ps), GFP_KERNEL); if (ps == NULL) { kfree(rdev->pm.dpm.ps); -- cgit v1.2.3 From 44f3356e36c2082f0f91c4f6b8859c577cee14a4 Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Fri, 24 Nov 2023 08:32:06 -0500 Subject: drm/amd/amdgpu: Add SMUIO headers for 10.0.2 These were requested by a UMR user for debugging purposes. Signed-off-by: Tom St Denis Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- .../include/asic_reg/smuio/smuio_10_0_2_offset.h | 102 ++++++++++++ .../include/asic_reg/smuio/smuio_10_0_2_sh_mask.h | 184 +++++++++++++++++++++ 2 files changed, 286 insertions(+) create mode 100644 drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_10_0_2_offset.h create mode 100644 drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_10_0_2_sh_mask.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_10_0_2_offset.h b/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_10_0_2_offset.h new file mode 100644 index 000000000000..a4dd372c0541 --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_10_0_2_offset.h @@ -0,0 +1,102 @@ +/* + * Copyright (C) 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _smuio_10_0_2_OFFSET_HEADER + +// addressBlock: smuio_smuio_misc_SmuSmuioDec +// base address: 0x5a000 +#define mmSMUIO_MCM_CONFIG 0x0023 +#define mmSMUIO_MCM_CONFIG_BASE_IDX 0 +#define mmIP_DISCOVERY_VERSION 0x0000 +#define mmIP_DISCOVERY_VERSION_BASE_IDX 1 +#define mmIO_SMUIO_PINSTRAP 0x01b1 +#define mmIO_SMUIO_PINSTRAP_BASE_IDX 1 +#define mmSCRATCH_REGISTER0 0x01b2 +#define mmSCRATCH_REGISTER0_BASE_IDX 1 +#define mmSCRATCH_REGISTER1 0x01b3 +#define mmSCRATCH_REGISTER1_BASE_IDX 1 +#define mmSCRATCH_REGISTER2 0x01b4 +#define mmSCRATCH_REGISTER2_BASE_IDX 1 +#define mmSCRATCH_REGISTER3 0x01b5 +#define mmSCRATCH_REGISTER3_BASE_IDX 1 +#define mmSCRATCH_REGISTER4 0x01b6 +#define mmSCRATCH_REGISTER4_BASE_IDX 1 +#define mmSCRATCH_REGISTER5 0x01b7 +#define mmSCRATCH_REGISTER5_BASE_IDX 1 +#define mmSCRATCH_REGISTER6 0x01b8 +#define mmSCRATCH_REGISTER6_BASE_IDX 1 +#define mmSCRATCH_REGISTER7 0x01b9 +#define mmSCRATCH_REGISTER7_BASE_IDX 1 + + +// addressBlock: smuio_smuio_reset_SmuSmuioDec +// base address: 0x5a300 +#define mmSMUIO_MP_RESET_INTR 0x00c1 +#define mmSMUIO_MP_RESET_INTR_BASE_IDX 0 +#define mmSMUIO_SOC_HALT 0x00c2 +#define mmSMUIO_SOC_HALT_BASE_IDX 0 +#define mmSMUIO_GFX_MISC_CNTL 0x00c8 +#define mmSMUIO_GFX_MISC_CNTL_BASE_IDX 0 + + +// addressBlock: smuio_smuio_ccxctrl_SmuSmuioDec +// base address: 0x5a000 +#define mmPWROK_REFCLK_GAP_CYCLES 0x0001 +#define mmPWROK_REFCLK_GAP_CYCLES_BASE_IDX 1 +#define mmGOLDEN_TSC_INCREMENT_UPPER 0x0004 +#define mmGOLDEN_TSC_INCREMENT_UPPER_BASE_IDX 1 +#define mmGOLDEN_TSC_INCREMENT_LOWER 0x0005 +#define mmGOLDEN_TSC_INCREMENT_LOWER_BASE_IDX 1 +#define mmGOLDEN_TSC_COUNT_UPPER 0x0025 +#define mmGOLDEN_TSC_COUNT_UPPER_BASE_IDX 1 +#define mmGOLDEN_TSC_COUNT_LOWER 0x0026 +#define mmGOLDEN_TSC_COUNT_LOWER_BASE_IDX 1 +#define mmGFX_GOLDEN_TSC_SHADOW_UPPER 0x0029 +#define mmGFX_GOLDEN_TSC_SHADOW_UPPER_BASE_IDX 1 +#define mmGFX_GOLDEN_TSC_SHADOW_LOWER 0x002a +#define mmGFX_GOLDEN_TSC_SHADOW_LOWER_BASE_IDX 1 +#define mmSOC_GOLDEN_TSC_SHADOW_UPPER 0x002b +#define mmSOC_GOLDEN_TSC_SHADOW_UPPER_BASE_IDX 1 +#define mmSOC_GOLDEN_TSC_SHADOW_LOWER 0x002c +#define mmSOC_GOLDEN_TSC_SHADOW_LOWER_BASE_IDX 1 +#define mmSOC_GAP_PWROK 0x002d +#define mmSOC_GAP_PWROK_BASE_IDX 1 + +// addressBlock: smuio_smuio_swtimer_SmuSmuioDec +// base address: 0x5ac40 +#define mmPWR_VIRT_RESET_REQ 0x0110 +#define mmPWR_VIRT_RESET_REQ_BASE_IDX 1 +#define mmPWR_DISP_TIMER_CONTROL 0x0111 +#define mmPWR_DISP_TIMER_CONTROL_BASE_IDX 1 +#define mmPWR_DISP_TIMER2_CONTROL 0x0113 +#define mmPWR_DISP_TIMER2_CONTROL_BASE_IDX 1 +#define mmPWR_DISP_TIMER_GLOBAL_CONTROL 0x0115 +#define mmPWR_DISP_TIMER_GLOBAL_CONTROL_BASE_IDX 1 +#define mmPWR_IH_CONTROL 0x0116 +#define mmPWR_IH_CONTROL_BASE_IDX 1 + +// addressBlock: smuio_smuio_svi0_SmuSmuioDec +// base address: 0x6f000 +#define mmSMUSVI0_TEL_PLANE0 0x520e +#define mmSMUSVI0_TEL_PLANE0_BASE_IDX 1 +#define mmSMUSVI0_PLANE0_CURRENTVID 0x5217 +#define mmSMUSVI0_PLANE0_CURRENTVID_BASE_IDX 1 + +#endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_10_0_2_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_10_0_2_sh_mask.h new file mode 100644 index 000000000000..d10ae61c346b --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_10_0_2_sh_mask.h @@ -0,0 +1,184 @@ +/* + * Copyright (C) 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _smuio_10_0_2_SH_MASK_HEADER + +// addressBlock: smuio_smuio_misc_SmuSmuioDec +//SMUIO_MCM_CONFIG +#define SMUIO_MCM_CONFIG__DIE_ID__SHIFT 0x0 +#define SMUIO_MCM_CONFIG__PKG_TYPE__SHIFT 0x2 +#define SMUIO_MCM_CONFIG__SOCKET_ID__SHIFT 0x5 +#define SMUIO_MCM_CONFIG__PKG_SUBTYPE__SHIFT 0x6 +#define SMUIO_MCM_CONFIG__CONSOLE_K__SHIFT 0x10 +#define SMUIO_MCM_CONFIG__CONSOLE_A__SHIFT 0x11 +#define SMUIO_MCM_CONFIG__DIE_ID_MASK 0x00000003L +#define SMUIO_MCM_CONFIG__PKG_TYPE_MASK 0x0000001CL +#define SMUIO_MCM_CONFIG__SOCKET_ID_MASK 0x00000020L +#define SMUIO_MCM_CONFIG__PKG_SUBTYPE_MASK 0x000000C0L +#define SMUIO_MCM_CONFIG__CONSOLE_K_MASK 0x00010000L +#define SMUIO_MCM_CONFIG__CONSOLE_A_MASK 0x00020000L +//IP_DISCOVERY_VERSION +#define IP_DISCOVERY_VERSION__IP_DISCOVERY_VERSION__SHIFT 0x0 +#define IP_DISCOVERY_VERSION__IP_DISCOVERY_VERSION_MASK 0xFFFFFFFFL +//IO_SMUIO_PINSTRAP +#define IO_SMUIO_PINSTRAP__AUD_PORT_CONN__SHIFT 0x0 +#define IO_SMUIO_PINSTRAP__AUD__SHIFT 0x3 +#define IO_SMUIO_PINSTRAP__AUD_PORT_CONN_MASK 0x00000007L +#define IO_SMUIO_PINSTRAP__AUD_MASK 0x00000018L +//SCRATCH_REGISTER0 +#define SCRATCH_REGISTER0__ScratchPad0__SHIFT 0x0 +#define SCRATCH_REGISTER0__ScratchPad0_MASK 0xFFFFFFFFL +//SCRATCH_REGISTER1 +#define SCRATCH_REGISTER1__ScratchPad1__SHIFT 0x0 +#define SCRATCH_REGISTER1__ScratchPad1_MASK 0xFFFFFFFFL +//SCRATCH_REGISTER2 +#define SCRATCH_REGISTER2__ScratchPad2__SHIFT 0x0 +#define SCRATCH_REGISTER2__ScratchPad2_MASK 0xFFFFFFFFL +//SCRATCH_REGISTER3 +#define SCRATCH_REGISTER3__ScratchPad3__SHIFT 0x0 +#define SCRATCH_REGISTER3__ScratchPad3_MASK 0xFFFFFFFFL +//SCRATCH_REGISTER4 +#define SCRATCH_REGISTER4__ScratchPad4__SHIFT 0x0 +#define SCRATCH_REGISTER4__ScratchPad4_MASK 0xFFFFFFFFL +//SCRATCH_REGISTER5 +#define SCRATCH_REGISTER5__ScratchPad5__SHIFT 0x0 +#define SCRATCH_REGISTER5__ScratchPad5_MASK 0xFFFFFFFFL +//SCRATCH_REGISTER6 +#define SCRATCH_REGISTER6__ScratchPad6__SHIFT 0x0 +#define SCRATCH_REGISTER6__ScratchPad6_MASK 0xFFFFFFFFL +//SCRATCH_REGISTER7 +#define SCRATCH_REGISTER7__ScratchPad7__SHIFT 0x0 +#define SCRATCH_REGISTER7__ScratchPad7_MASK 0xFFFFFFFFL + +// addressBlock: smuio_smuio_reset_SmuSmuioDec +//SMUIO_MP_RESET_INTR +#define SMUIO_MP_RESET_INTR__SMUIO_MP_RESET_INTR__SHIFT 0x0 +#define SMUIO_MP_RESET_INTR__SMUIO_MP_RESET_INTR_MASK 0x00000001L +//SMUIO_SOC_HALT +#define SMUIO_SOC_HALT__WDT_FORCE_PWROK_EN__SHIFT 0x2 +#define SMUIO_SOC_HALT__WDT_FORCE_RESETn_EN__SHIFT 0x3 +#define SMUIO_SOC_HALT__WDT_FORCE_PWROK_EN_MASK 0x00000004L +#define SMUIO_SOC_HALT__WDT_FORCE_RESETn_EN_MASK 0x00000008L +//SMUIO_GFX_MISC_CNTL +#define SMUIO_GFX_MISC_CNTL__SMU_GFX_cold_vs_gfxoff__SHIFT 0x0 +#define SMUIO_GFX_MISC_CNTL__PWR_GFXOFF_STATUS__SHIFT 0x1 +#define SMUIO_GFX_MISC_CNTL__PWR_GFX_DLDO_CLK_SWITCH__SHIFT 0x3 +#define SMUIO_GFX_MISC_CNTL__PWR_GFX_RLC_CGPG_EN__SHIFT 0x4 +#define SMUIO_GFX_MISC_CNTL__SMU_GFX_cold_vs_gfxoff_MASK 0x00000001L +#define SMUIO_GFX_MISC_CNTL__PWR_GFXOFF_STATUS_MASK 0x00000006L +#define SMUIO_GFX_MISC_CNTL__PWR_GFX_DLDO_CLK_SWITCH_MASK 0x00000008L +#define SMUIO_GFX_MISC_CNTL__PWR_GFX_RLC_CGPG_EN_MASK 0x00000010L + +// addressBlock: smuio_smuio_ccxctrl_SmuSmuioDec +//PWROK_REFCLK_GAP_CYCLES +#define PWROK_REFCLK_GAP_CYCLES__Pwrok_PreAssertion_clkgap_cycles__SHIFT 0x0 +#define PWROK_REFCLK_GAP_CYCLES__Pwrok_PostAssertion_clkgap_cycles__SHIFT 0x8 +#define PWROK_REFCLK_GAP_CYCLES__Pwrok_PreAssertion_clkgap_cycles_MASK 0x000000FFL +#define PWROK_REFCLK_GAP_CYCLES__Pwrok_PostAssertion_clkgap_cycles_MASK 0x0000FF00L +//GOLDEN_TSC_INCREMENT_UPPER +#define GOLDEN_TSC_INCREMENT_UPPER__GoldenTscIncrementUpper__SHIFT 0x0 +#define GOLDEN_TSC_INCREMENT_UPPER__GoldenTscIncrementUpper_MASK 0x00FFFFFFL +//GOLDEN_TSC_INCREMENT_LOWER +#define GOLDEN_TSC_INCREMENT_LOWER__GoldenTscIncrementLower__SHIFT 0x0 +#define GOLDEN_TSC_INCREMENT_LOWER__GoldenTscIncrementLower_MASK 0xFFFFFFFFL +//GOLDEN_TSC_COUNT_UPPER +#define GOLDEN_TSC_COUNT_UPPER__GoldenTscCountUpper__SHIFT 0x0 +#define GOLDEN_TSC_COUNT_UPPER__GoldenTscCountUpper_MASK 0x00FFFFFFL +//GOLDEN_TSC_COUNT_LOWER +#define GOLDEN_TSC_COUNT_LOWER__GoldenTscCountLower__SHIFT 0x0 +#define GOLDEN_TSC_COUNT_LOWER__GoldenTscCountLower_MASK 0xFFFFFFFFL +//GFX_GOLDEN_TSC_SHADOW_UPPER +#define GFX_GOLDEN_TSC_SHADOW_UPPER__GfxGoldenTscShadowUpper__SHIFT 0x0 +#define GFX_GOLDEN_TSC_SHADOW_UPPER__GfxGoldenTscShadowUpper_MASK 0x00FFFFFFL +//GFX_GOLDEN_TSC_SHADOW_LOWER +#define GFX_GOLDEN_TSC_SHADOW_LOWER__GfxGoldenTscShadowLower__SHIFT 0x0 +#define GFX_GOLDEN_TSC_SHADOW_LOWER__GfxGoldenTscShadowLower_MASK 0xFFFFFFFFL +//SOC_GOLDEN_TSC_SHADOW_UPPER +#define SOC_GOLDEN_TSC_SHADOW_UPPER__SocGoldenTscShadowUpper__SHIFT 0x0 +#define SOC_GOLDEN_TSC_SHADOW_UPPER__SocGoldenTscShadowUpper_MASK 0x00FFFFFFL +//SOC_GOLDEN_TSC_SHADOW_LOWER +#define SOC_GOLDEN_TSC_SHADOW_LOWER__SocGoldenTscShadowLower__SHIFT 0x0 +#define SOC_GOLDEN_TSC_SHADOW_LOWER__SocGoldenTscShadowLower_MASK 0xFFFFFFFFL +//SOC_GAP_PWROK +#define SOC_GAP_PWROK__soc_gap_pwrok__SHIFT 0x0 +#define SOC_GAP_PWROK__soc_gap_pwrok_MASK 0x00000001L + +// addressBlock: smuio_smuio_swtimer_SmuSmuioDec +//PWR_VIRT_RESET_REQ +#define PWR_VIRT_RESET_REQ__VF_FLR__SHIFT 0x0 +#define PWR_VIRT_RESET_REQ__PF_FLR__SHIFT 0x1f +#define PWR_VIRT_RESET_REQ__VF_FLR_MASK 0x7FFFFFFFL +#define PWR_VIRT_RESET_REQ__PF_FLR_MASK 0x80000000L +//PWR_DISP_TIMER_CONTROL +#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_COUNT__SHIFT 0x0 +#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_ENABLE__SHIFT 0x19 +#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_DISABLE__SHIFT 0x1a +#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_MASK__SHIFT 0x1b +#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_STAT_AK__SHIFT 0x1c +#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_TYPE__SHIFT 0x1d +#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_MODE__SHIFT 0x1e +#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_COUNT_MASK 0x01FFFFFFL +#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_ENABLE_MASK 0x02000000L +#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_DISABLE_MASK 0x04000000L +#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_MASK_MASK 0x08000000L +#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_STAT_AK_MASK 0x10000000L +#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_TYPE_MASK 0x20000000L +#define PWR_DISP_TIMER_CONTROL__DISP_TIMER_INT_MODE_MASK 0x40000000L +//PWR_DISP_TIMER2_CONTROL +#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_COUNT__SHIFT 0x0 +#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_ENABLE__SHIFT 0x19 +#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_DISABLE__SHIFT 0x1a +#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_MASK__SHIFT 0x1b +#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_STAT_AK__SHIFT 0x1c +#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_TYPE__SHIFT 0x1d +#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_MODE__SHIFT 0x1e +#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_COUNT_MASK 0x01FFFFFFL +#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_ENABLE_MASK 0x02000000L +#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_DISABLE_MASK 0x04000000L +#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_MASK_MASK 0x08000000L +#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_STAT_AK_MASK 0x10000000L +#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_TYPE_MASK 0x20000000L +#define PWR_DISP_TIMER2_CONTROL__DISP_TIMER_INT_MODE_MASK 0x40000000L +//PWR_DISP_TIMER_GLOBAL_CONTROL +#define PWR_DISP_TIMER_GLOBAL_CONTROL__DISP_TIMER_PULSE_WIDTH__SHIFT 0x0 +#define PWR_DISP_TIMER_GLOBAL_CONTROL__DISP_TIMER_PULSE_EN__SHIFT 0xa +#define PWR_DISP_TIMER_GLOBAL_CONTROL__DISP_TIMER_PULSE_WIDTH_MASK 0x000003FFL +#define PWR_DISP_TIMER_GLOBAL_CONTROL__DISP_TIMER_PULSE_EN_MASK 0x00000400L +//PWR_IH_CONTROL +#define PWR_IH_CONTROL__MAX_CREDIT__SHIFT 0x0 +#define PWR_IH_CONTROL__DISP_TIMER_TRIGGER_MASK__SHIFT 0x5 +#define PWR_IH_CONTROL__DISP_TIMER2_TRIGGER_MASK__SHIFT 0x6 +#define PWR_IH_CONTROL__PWR_IH_CLK_GATE_EN__SHIFT 0x1f +#define PWR_IH_CONTROL__MAX_CREDIT_MASK 0x0000001FL +#define PWR_IH_CONTROL__DISP_TIMER_TRIGGER_MASK_MASK 0x00000020L +#define PWR_IH_CONTROL__DISP_TIMER2_TRIGGER_MASK_MASK 0x00000040L +#define PWR_IH_CONTROL__PWR_IH_CLK_GATE_EN_MASK 0x80000000L + +// addressBlock: smuio_smuio_svi0_SmuSmuioDec +//SMUSVI0_TEL_PLANE0 +#define SMUSVI0_TEL_PLANE0__SVI0_PLANE0_IDDCOR__SHIFT 0x0 +#define SMUSVI0_TEL_PLANE0__SVI0_PLANE0_VDDCOR__SHIFT 0x10 +#define SMUSVI0_TEL_PLANE0__SVI0_PLANE0_IDDCOR_MASK 0x000000FFL +#define SMUSVI0_TEL_PLANE0__SVI0_PLANE0_VDDCOR_MASK 0x01FF0000L +//SMUSVI0_PLANE0_CURRENTVID +#define SMUSVI0_PLANE0_CURRENTVID__CURRENT_SVI0_PLANE0_VID__SHIFT 0x18 +#define SMUSVI0_PLANE0_CURRENTVID__CURRENT_SVI0_PLANE0_VID_MASK 0xFF000000L + +#endif -- cgit v1.2.3 From 6146081d58e3dd0c50ceb5a70a6906640727ff96 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Wed, 29 Nov 2023 12:37:34 +0530 Subject: drm/amdgpu: Add NULL checks for function pointers Check if function is implemented before making the call. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc15.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 9ad4d6d3122b..9043ebf1e161 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1424,9 +1424,11 @@ static void soc15_common_get_clockgating_state(void *handle, u64 *flags) if (amdgpu_sriov_vf(adev)) *flags = 0; - adev->nbio.funcs->get_clockgating_state(adev, flags); + if (adev->nbio.funcs && adev->nbio.funcs->get_clockgating_state) + adev->nbio.funcs->get_clockgating_state(adev, flags); - adev->hdp.funcs->get_clock_gating_state(adev, flags); + if (adev->hdp.funcs && adev->hdp.funcs->get_clock_gating_state) + adev->hdp.funcs->get_clock_gating_state(adev, flags); if (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 2)) { /* AMD_CG_SUPPORT_DRM_MGCG */ @@ -1441,9 +1443,11 @@ static void soc15_common_get_clockgating_state(void *handle, u64 *flags) } /* AMD_CG_SUPPORT_ROM_MGCG */ - adev->smuio.funcs->get_clock_gating_state(adev, flags); + if (adev->smuio.funcs && adev->smuio.funcs->get_clock_gating_state) + adev->smuio.funcs->get_clock_gating_state(adev, flags); - adev->df.funcs->get_clockgating_state(adev, flags); + if (adev->df.funcs && adev->df.funcs->get_clockgating_state) + adev->df.funcs->get_clockgating_state(adev, flags); } static int soc15_common_set_powergating_state(void *handle, -- cgit v1.2.3 From 828afefd4b0636d92386d4b2c1f53fff87505aba Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Thu, 30 Nov 2023 15:58:21 +0530 Subject: drm/amdgpu: Update HDP 4.4.2 clock gating flags HDP 4.4.2 clockgating is enabled by default, update the flags accordingly. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c index 49e934975719..4db6bb73ead4 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c @@ -129,6 +129,11 @@ static void hdp_v4_0_get_clockgating_state(struct amdgpu_device *adev, { int data; + if (amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 2)) { + /* Default enabled */ + *flags |= AMD_CG_SUPPORT_HDP_MGCG; + return; + } /* AMD_CG_SUPPORT_HDP_LS */ data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_LS)); if (data & HDP_MEM_POWER_LS__LS_ENABLE_MASK) -- cgit v1.2.3 From b12fb2953915b092aaef956f6e80783fa70b9f40 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Thu, 30 Nov 2023 16:35:58 +0530 Subject: drm/amdgpu: Avoid querying DRM MGCG status MP0 v13.0.6 SOCs don't support DRM MGCG. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc15.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 9043ebf1e161..15033efec2ba 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1430,7 +1430,8 @@ static void soc15_common_get_clockgating_state(void *handle, u64 *flags) if (adev->hdp.funcs && adev->hdp.funcs->get_clock_gating_state) adev->hdp.funcs->get_clock_gating_state(adev, flags); - if (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 2)) { + if ((amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 2)) && + (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6))) { /* AMD_CG_SUPPORT_DRM_MGCG */ data = RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_MISC_CGTT_CTRL0)); if (!(data & 0x01000000)) -- cgit v1.2.3 From c03581986234044f2eeae308b7840e0083981034 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 29 Nov 2023 15:44:25 -0500 Subject: drm/amdgpu: fix buffer funcs setting order on suspend We need to disable this after the last eviction call, but before we disable the SDMA IP. Fixes: b70438004a14 ("drm/amdgpu: move buffer funcs setting up a level") Link: https://lore.kernel.org/r/87edgv4x3i.fsf@vps.thesusis.net Reviewed-by: Luben Tuikov Tested-by: Phillip Susi Signed-off-by: Alex Deucher Cc: Phillip Susi Cc: Luben Tuikov --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index d5b950fd1d85..138c7b37af69 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4600,6 +4600,8 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) if (r) return r; + amdgpu_ttm_set_buffer_funcs_status(adev, false); + amdgpu_fence_driver_hw_fini(adev); amdgpu_device_ip_suspend_phase2(adev); -- cgit v1.2.3 From e17768691dd8d0664413de3123621daa0504054c Mon Sep 17 00:00:00 2001 From: Bokun Zhang Date: Wed, 29 Nov 2023 19:11:22 -0500 Subject: drm/amd/amdgpu: SRIOV full reset issue with VCN - After a full reset, VF's FB will be cleaned. This includes the VCN's fw_shared memory. However, there is no suspend-resume routine for SRIOV VF. Therefore, the data in the fw_shared memory will be lost forever and it causes engine hang later on. We must repopulate the data in fw_shared during SRIOV hw_init Signed-off-by: Bokun Zhang Reviewed-by: Emily Deng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 28b7f96b42e5..169ed400ee7b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -1280,6 +1280,9 @@ static int vcn_v4_0_start_sriov(struct amdgpu_device *adev) if (adev->vcn.harvest_config & (1 << i)) continue; + // Must re/init fw_shared at beginning + vcn_v4_0_fw_shared_init(adev, i); + table_size = 0; MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, i, -- cgit v1.2.3 From d6a57588666301acd9d42d3b00d74240964f07f6 Mon Sep 17 00:00:00 2001 From: Jiadong Zhu Date: Fri, 1 Dec 2023 08:38:15 +0800 Subject: drm/amdgpu: disable MCBP by default Disable MCBP(mid command buffer preemption) by default as old Mesa hangs with it. We shall not enable the feature that breaks old usermode driver. Fixes: 50a7c8765ca6 ("drm/amdgpu: enable mcbp by default on gfx9") Signed-off-by: Jiadong Zhu Acked-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 5c0817cbc7c2..1cc1ae2743c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3791,10 +3791,6 @@ static void amdgpu_device_set_mcbp(struct amdgpu_device *adev) adev->gfx.mcbp = true; else if (amdgpu_mcbp == 0) adev->gfx.mcbp = false; - else if ((amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 0, 0)) && - (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 0, 0)) && - adev->gfx.num_gfx_rings) - adev->gfx.mcbp = true; if (amdgpu_sriov_vf(adev)) adev->gfx.mcbp = true; -- cgit v1.2.3 From fec05adc40c25a028c9dfa9d540f800a2d433f80 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Fri, 1 Dec 2023 06:25:07 -0700 Subject: drm/amd/display: Use channel_width = 2 for vram table 3.0 VBIOS has suggested to use channel_width=2 for any ASIC that uses vram info 3.0. This is because channel_width in the vram table no longer represents the memory width Tested-by: Daniel Wheeler Reviewed-by: Samson Tam Acked-by: Rodrigo Siqueira Signed-off-by: Alvin Lee Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index 7cdb1a8a0ba0..6a96810a477e 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -2386,7 +2386,13 @@ static enum bp_result get_vram_info_v30( return BP_RESULT_BADBIOSTABLE; info->num_chans = info_v30->channel_num; - info->dram_channel_width_bytes = (1 << info_v30->channel_width) / 8; + /* As suggested by VBIOS we should always use + * dram_channel_width_bytes = 2 when using VRAM + * table version 3.0. This is because the channel_width + * param in the VRAM info table is changed in 7000 series and + * no longer represents the memory channel width. + */ + info->dram_channel_width_bytes = 2; return result; } -- cgit v1.2.3 From 3d71a8726e05a35beb9de394e86ce896d69e563f Mon Sep 17 00:00:00 2001 From: Ivan Lipski Date: Fri, 1 Dec 2023 06:25:16 -0700 Subject: drm/amd/display: Add monitor patch for specific eDP [WHY] Some eDP panels's ext caps don't write initial value cause the value of dpcd_addr(0x317) is random. It means that sometimes the eDP will clarify it is OLED, miniLED...etc cause the backlight control interface is incorrect. [HOW] Add a new panel patch to remove sink ext caps(HDR,OLED...etc) Tested-by: Daniel Wheeler Reviewed-by: Sun peng Li Acked-by: Rodrigo Siqueira Signed-off-by: Ivan Lipski Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index c7a29bb737e2..aac98f93545a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -63,6 +63,12 @@ static void apply_edid_quirks(struct edid *edid, struct dc_edid_caps *edid_caps) DRM_DEBUG_DRIVER("Disabling FAMS on monitor with panel id %X\n", panel_id); edid_caps->panel_patch.disable_fams = true; break; + /* Workaround for some monitors that do not clear DPCD 0x317 if FreeSync is unsupported */ + case drm_edid_encode_panel_id('A', 'U', 'O', 0xA7AB): + case drm_edid_encode_panel_id('A', 'U', 'O', 0xE69B): + DRM_DEBUG_DRIVER("Clearing DPCD 0x317 on monitor with panel id %X\n", panel_id); + edid_caps->panel_patch.remove_sink_ext_caps = true; + break; default: return; } -- cgit v1.2.3 From 9f7cb03e3c32613fb5891e10ce3ff9169b09ba69 Mon Sep 17 00:00:00 2001 From: Roman Li Date: Fri, 1 Dec 2023 06:25:40 -0700 Subject: drm/amd/display: Fix array-index-out-of-bounds in dml2 [Why] UBSAN errors observed in dmesg. array-index-out-of-bounds in dml2/display_mode_core.c [How] Fix the index. Tested-by: Daniel Wheeler Acked-by: Rodrigo Siqueira Signed-off-by: Roman Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c index 59718ee33e51..4d1336e5afc2 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c @@ -9447,12 +9447,12 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc // Output CalculateWatermarks_params->Watermark = &s->dummy_watermark; // Watermarks *Watermark - CalculateWatermarks_params->DRAMClockChangeSupport = &mode_lib->ms.support.DRAMClockChangeSupport[j]; + CalculateWatermarks_params->DRAMClockChangeSupport = &mode_lib->ms.support.DRAMClockChangeSupport[0]; CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = &s->dummy_single_array[0][0]; // dml_float_t *MaxActiveDRAMClockChangeLatencySupported[] CalculateWatermarks_params->SubViewportLinesNeededInMALL = &mode_lib->ms.SubViewportLinesNeededInMALL[j]; // dml_uint_t SubViewportLinesNeededInMALL[] - CalculateWatermarks_params->FCLKChangeSupport = &mode_lib->ms.support.FCLKChangeSupport[j]; + CalculateWatermarks_params->FCLKChangeSupport = &mode_lib->ms.support.FCLKChangeSupport[0]; CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &s->dummy_single[0]; // dml_float_t *MaxActiveFCLKChangeLatencySupported - CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->ms.support.USRRetrainingSupport[j]; + CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->ms.support.USRRetrainingSupport[0]; CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( &mode_lib->scratch, -- cgit v1.2.3 From 78825df90d427b26964bf9610eaac30542ee9e2d Mon Sep 17 00:00:00 2001 From: Li Ma Date: Tue, 14 Nov 2023 16:17:51 +0800 Subject: drm/amd/swsmu: update smu v14_0_0 driver if version and metrics table Increment the driver if version and add new mems to the mertics table. Signed-off-by: Li Ma Reviewed-by: Yifan Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/include/kgd_pp_interface.h | 17 +++++ drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 10 +++ .../pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h | 77 +++++++++++++--------- .../gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c | 46 ++++++++++++- 4 files changed, 115 insertions(+), 35 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 0d1209f2cf31..1c5049e894e3 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -1085,6 +1085,10 @@ struct gpu_metrics_v3_0 { uint16_t average_dram_reads; /* time filtered DRAM write bandwidth [MB/sec] */ uint16_t average_dram_writes; + /* time filtered IPU read bandwidth [MB/sec] */ + uint16_t average_ipu_reads; + /* time filtered IPU write bandwidth [MB/sec] */ + uint16_t average_ipu_writes; /* Driver attached timestamp (in ns) */ uint64_t system_clock_counter; @@ -1104,6 +1108,8 @@ struct gpu_metrics_v3_0 { uint32_t average_all_core_power; /* calculated core power [mW] */ uint16_t average_core_power[16]; + /* time filtered total system power [mW] */ + uint16_t average_sys_power; /* maximum IRM defined STAPM power limit [mW] */ uint16_t stapm_power_limit; /* time filtered STAPM power limit [mW] */ @@ -1116,6 +1122,8 @@ struct gpu_metrics_v3_0 { uint16_t average_ipuclk_frequency; uint16_t average_fclk_frequency; uint16_t average_vclk_frequency; + uint16_t average_uclk_frequency; + uint16_t average_mpipu_frequency; /* Current clocks */ /* target core frequency [MHz] */ @@ -1125,6 +1133,15 @@ struct gpu_metrics_v3_0 { /* GFXCLK frequency limit enforced on GFX [MHz] */ uint16_t current_gfx_maxfreq; + /* Throttle Residency (ASIC dependent) */ + uint32_t throttle_residency_prochot; + uint32_t throttle_residency_spl; + uint32_t throttle_residency_fppt; + uint32_t throttle_residency_sppt; + uint32_t throttle_residency_thm_core; + uint32_t throttle_residency_thm_gfx; + uint32_t throttle_residency_thm_soc; + /* Metrics table alpha filter time constant [us] */ uint32_t time_filter_alphavalue; }; diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index 23fa71cafb14..f8b2e6cc2568 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -1408,6 +1408,16 @@ typedef enum { METRICS_PCIE_WIDTH, METRICS_CURR_FANPWM, METRICS_CURR_SOCKETPOWER, + METRICS_AVERAGE_VPECLK, + METRICS_AVERAGE_IPUCLK, + METRICS_AVERAGE_MPIPUCLK, + METRICS_THROTTLER_RESIDENCY_PROCHOT, + METRICS_THROTTLER_RESIDENCY_SPL, + METRICS_THROTTLER_RESIDENCY_FPPT, + METRICS_THROTTLER_RESIDENCY_SPPT, + METRICS_THROTTLER_RESIDENCY_THM_CORE, + METRICS_THROTTLER_RESIDENCY_THM_GFX, + METRICS_THROTTLER_RESIDENCY_THM_SOC, } MetricsMember_t; enum smu_cmn2asic_mapping_type { diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h index 22f88842a7fd..8f42771e1f0a 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h @@ -27,7 +27,7 @@ // *** IMPORTANT *** // SMU TEAM: Always increment the interface version if // any structure is changed in this file -#define PMFW_DRIVER_IF_VERSION 6 +#define PMFW_DRIVER_IF_VERSION 7 typedef struct { int32_t value; @@ -150,37 +150,50 @@ typedef struct { } DpmClocks_t; typedef struct { - uint16_t CoreFrequency[16]; //Target core frequency [MHz] - uint16_t CorePower[16]; //CAC calculated core power [mW] - uint16_t CoreTemperature[16]; //TSEN measured core temperature [centi-C] - uint16_t GfxTemperature; //TSEN measured GFX temperature [centi-C] - uint16_t SocTemperature; //TSEN measured SOC temperature [centi-C] - uint16_t StapmOpnLimit; //Maximum IRM defined STAPM power limit [mW] - uint16_t StapmCurrentLimit; //Time filtered STAPM power limit [mW] - uint16_t InfrastructureCpuMaxFreq; //CCLK frequency limit enforced on classic cores [MHz] - uint16_t InfrastructureGfxMaxFreq; //GFXCLK frequency limit enforced on GFX [MHz] - uint16_t SkinTemp; //Maximum skin temperature reported by APU and HS2 chassis sensors [centi-C] - uint16_t GfxclkFrequency; //Time filtered target GFXCLK frequency [MHz] - uint16_t FclkFrequency; //Time filtered target FCLK frequency [MHz] - uint16_t GfxActivity; //Time filtered GFX busy % [0-100] - uint16_t SocclkFrequency; //Time filtered target SOCCLK frequency [MHz] - uint16_t VclkFrequency; //Time filtered target VCLK frequency [MHz] - uint16_t VcnActivity; //Time filtered VCN busy % [0-100] - uint16_t VpeclkFrequency; //Time filtered target VPECLK frequency [MHz] - uint16_t IpuclkFrequency; //Time filtered target IPUCLK frequency [MHz] - uint16_t IpuBusy[8]; //Time filtered IPU per-column busy % [0-100] - uint16_t DRAMReads; //Time filtered DRAM read bandwidth [MB/sec] - uint16_t DRAMWrites; //Time filtered DRAM write bandwidth [MB/sec] - uint16_t CoreC0Residency[16]; //Time filtered per-core C0 residency % [0-100] - uint16_t IpuPower; //Time filtered IPU power [mW] - uint32_t ApuPower; //Time filtered APU power [mW] - uint32_t GfxPower; //Time filtered GFX power [mW] - uint32_t dGpuPower; //Time filtered dGPU power [mW] - uint32_t SocketPower; //Time filtered power used for PPT/STAPM [APU+dGPU] [mW] - uint32_t AllCorePower; //Time filtered sum of core power across all cores in the socket [mW] - uint32_t FilterAlphaValue; //Metrics table alpha filter time constant [us] - uint32_t MetricsCounter; //Counter that is incremented on every metrics table update [PM_TIMER cycles] - uint32_t spare[16]; + uint16_t CoreFrequency[16]; //Target core frequency [MHz] + uint16_t CorePower[16]; //CAC calculated core power [mW] + uint16_t CoreTemperature[16]; //TSEN measured core temperature [centi-C] + uint16_t GfxTemperature; //TSEN measured GFX temperature [centi-C] + uint16_t SocTemperature; //TSEN measured SOC temperature [centi-C] + uint16_t StapmOpnLimit; //Maximum IRM defined STAPM power limit [mW] + uint16_t StapmCurrentLimit; //Time filtered STAPM power limit [mW] + uint16_t InfrastructureCpuMaxFreq; //CCLK frequency limit enforced on classic cores [MHz] + uint16_t InfrastructureGfxMaxFreq; //GFXCLK frequency limit enforced on GFX [MHz] + uint16_t SkinTemp; //Maximum skin temperature reported by APU and HS2 chassis sensors [centi-C] + uint16_t GfxclkFrequency; //Time filtered target GFXCLK frequency [MHz] + uint16_t FclkFrequency; //Time filtered target FCLK frequency [MHz] + uint16_t GfxActivity; //Time filtered GFX busy % [0-100] + uint16_t SocclkFrequency; //Time filtered target SOCCLK frequency [MHz] + uint16_t VclkFrequency; //Time filtered target VCLK frequency [MHz] + uint16_t VcnActivity; //Time filtered VCN busy % [0-100] + uint16_t VpeclkFrequency; //Time filtered target VPECLK frequency [MHz] + uint16_t IpuclkFrequency; //Time filtered target IPUCLK frequency [MHz] + uint16_t IpuBusy[8]; //Time filtered IPU per-column busy % [0-100] + uint16_t DRAMReads; //Time filtered DRAM read bandwidth [MB/sec] + uint16_t DRAMWrites; //Time filtered DRAM write bandwidth [MB/sec] + uint16_t CoreC0Residency[16]; //Time filtered per-core C0 residency % [0-100] + uint16_t IpuPower; //Time filtered IPU power [mW] + uint32_t ApuPower; //Time filtered APU power [mW] + uint32_t GfxPower; //Time filtered GFX power [mW] + uint32_t dGpuPower; //Time filtered dGPU power [mW] + uint32_t SocketPower; //Time filtered power used for PPT/STAPM [APU+dGPU] [mW] + uint32_t AllCorePower; //Time filtered sum of core power across all cores in the socket [mW] + uint32_t FilterAlphaValue; //Metrics table alpha filter time constant [us] + uint32_t MetricsCounter; //Counter that is incremented on every metrics table update [PM_TIMER cycles] + uint16_t MemclkFrequency; //Time filtered target MEMCLK frequency [MHz] + uint16_t MpipuclkFrequency; //Time filtered target MPIPUCLK frequency [MHz] + uint16_t IpuReads; //Time filtered IPU read bandwidth [MB/sec] + uint16_t IpuWrites; //Time filtered IPU write bandwidth [MB/sec] + uint32_t ThrottleResidency_PROCHOT; //Counter that is incremented on every metrics table update when PROCHOT was engaged [PM_TIMER cycles] + uint32_t ThrottleResidency_SPL; //Counter that is incremented on every metrics table update when SPL was engaged [PM_TIMER cycles] + uint32_t ThrottleResidency_FPPT; //Counter that is incremented on every metrics table update when fast PPT was engaged [PM_TIMER cycles] + uint32_t ThrottleResidency_SPPT; //Counter that is incremented on every metrics table update when slow PPT was engaged [PM_TIMER cycles] + uint32_t ThrottleResidency_THM_CORE; //Counter that is incremented on every metrics table update when CORE thermal throttling was engaged [PM_TIMER cycles] + uint32_t ThrottleResidency_THM_GFX; //Counter that is incremented on every metrics table update when GFX thermal throttling was engaged [PM_TIMER cycles] + uint32_t ThrottleResidency_THM_SOC; //Counter that is incremented on every metrics table update when SOC thermal throttling was engaged [PM_TIMER cycles] + uint16_t Psys; //Time filtered Psys power [mW] + uint16_t spare1; + uint32_t spare[6]; } SmuMetrics_t; //ISP tile definitions diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c index 03b38c3a9968..94ccdbfd7090 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c @@ -246,11 +246,20 @@ static int smu_v14_0_0_get_smu_metrics_data(struct smu_context *smu, *value = 0; break; case METRICS_AVERAGE_UCLK: - *value = 0; + *value = metrics->MemclkFrequency; break; case METRICS_AVERAGE_FCLK: *value = metrics->FclkFrequency; break; + case METRICS_AVERAGE_VPECLK: + *value = metrics->VpeclkFrequency; + break; + case METRICS_AVERAGE_IPUCLK: + *value = metrics->IpuclkFrequency; + break; + case METRICS_AVERAGE_MPIPUCLK: + *value = metrics->MpipuclkFrequency; + break; case METRICS_AVERAGE_GFXACTIVITY: *value = metrics->GfxActivity / 100; break; @@ -270,8 +279,26 @@ static int smu_v14_0_0_get_smu_metrics_data(struct smu_context *smu, *value = metrics->SocTemperature / 100 * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; break; - case METRICS_THROTTLER_STATUS: - *value = 0; + case METRICS_THROTTLER_RESIDENCY_PROCHOT: + *value = metrics->ThrottleResidency_PROCHOT; + break; + case METRICS_THROTTLER_RESIDENCY_SPL: + *value = metrics->ThrottleResidency_SPL; + break; + case METRICS_THROTTLER_RESIDENCY_FPPT: + *value = metrics->ThrottleResidency_FPPT; + break; + case METRICS_THROTTLER_RESIDENCY_SPPT: + *value = metrics->ThrottleResidency_SPPT; + break; + case METRICS_THROTTLER_RESIDENCY_THM_CORE: + *value = metrics->ThrottleResidency_THM_CORE; + break; + case METRICS_THROTTLER_RESIDENCY_THM_GFX: + *value = metrics->ThrottleResidency_THM_GFX; + break; + case METRICS_THROTTLER_RESIDENCY_THM_SOC: + *value = metrics->ThrottleResidency_THM_SOC; break; case METRICS_VOLTAGE_VDDGFX: *value = 0; @@ -498,6 +525,8 @@ static ssize_t smu_v14_0_0_get_gpu_metrics(struct smu_context *smu, sizeof(uint16_t) * 16); gpu_metrics->average_dram_reads = metrics.DRAMReads; gpu_metrics->average_dram_writes = metrics.DRAMWrites; + gpu_metrics->average_ipu_reads = metrics.IpuReads; + gpu_metrics->average_ipu_writes = metrics.IpuWrites; gpu_metrics->average_socket_power = metrics.SocketPower; gpu_metrics->average_ipu_power = metrics.IpuPower; @@ -505,6 +534,7 @@ static ssize_t smu_v14_0_0_get_gpu_metrics(struct smu_context *smu, gpu_metrics->average_gfx_power = metrics.GfxPower; gpu_metrics->average_dgpu_power = metrics.dGpuPower; gpu_metrics->average_all_core_power = metrics.AllCorePower; + gpu_metrics->average_sys_power = metrics.Psys; memcpy(&gpu_metrics->average_core_power[0], &metrics.CorePower[0], sizeof(uint16_t) * 16); @@ -515,6 +545,8 @@ static ssize_t smu_v14_0_0_get_gpu_metrics(struct smu_context *smu, gpu_metrics->average_fclk_frequency = metrics.FclkFrequency; gpu_metrics->average_vclk_frequency = metrics.VclkFrequency; gpu_metrics->average_ipuclk_frequency = metrics.IpuclkFrequency; + gpu_metrics->average_uclk_frequency = metrics.MemclkFrequency; + gpu_metrics->average_mpipu_frequency = metrics.MpipuclkFrequency; memcpy(&gpu_metrics->current_coreclk[0], &metrics.CoreFrequency[0], @@ -522,6 +554,14 @@ static ssize_t smu_v14_0_0_get_gpu_metrics(struct smu_context *smu, gpu_metrics->current_core_maxfreq = metrics.InfrastructureCpuMaxFreq; gpu_metrics->current_gfx_maxfreq = metrics.InfrastructureGfxMaxFreq; + gpu_metrics->throttle_residency_prochot = metrics.ThrottleResidency_PROCHOT; + gpu_metrics->throttle_residency_spl = metrics.ThrottleResidency_SPL; + gpu_metrics->throttle_residency_fppt = metrics.ThrottleResidency_FPPT; + gpu_metrics->throttle_residency_sppt = metrics.ThrottleResidency_SPPT; + gpu_metrics->throttle_residency_thm_core = metrics.ThrottleResidency_THM_CORE; + gpu_metrics->throttle_residency_thm_gfx = metrics.ThrottleResidency_THM_GFX; + gpu_metrics->throttle_residency_thm_soc = metrics.ThrottleResidency_THM_SOC; + gpu_metrics->time_filter_alphavalue = metrics.FilterAlphaValue; gpu_metrics->system_clock_counter = ktime_get_boottime_ns(); -- cgit v1.2.3 From 37c57631c18661c4c0dc415e75afd143ed89e098 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Mon, 4 Dec 2023 10:17:57 +0800 Subject: drm/amd/pm: support new mca smu error code decoding support new mca smu error code decoding from smu 85.86.0 for smu v13.0.6 Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h | 2 ++ drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 9 ++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h index 2b488fcf2f95..e51e8918e667 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h @@ -46,6 +46,8 @@ #define MCA_REG__STATUS__ERRORCODEEXT(x) MCA_REG_FIELD(x, 21, 16) #define MCA_REG__STATUS__ERRORCODE(x) MCA_REG_FIELD(x, 15, 0) +#define MCA_REG__SYND__ERRORINFORMATION(x) MCA_REG_FIELD(x, 17, 0) + enum amdgpu_mca_ip { AMDGPU_MCA_IP_UNKNOW = -1, AMDGPU_MCA_IP_PSP = 0, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 0e5a77c3c2e2..900a2d9e6d85 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -2593,13 +2593,20 @@ static bool mca_gfx_smu_bank_is_valid(const struct mca_ras_info *mca_ras, struct static bool mca_smu_bank_is_valid(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, enum amdgpu_mca_error_type type, struct mca_bank_entry *entry) { + struct smu_context *smu = adev->powerplay.pp_handle; uint32_t errcode, instlo; instlo = REG_GET_FIELD(entry->regs[MCA_REG_IDX_IPID], MCMP1_IPIDT0, InstanceIdLo); if (instlo != 0x03b30400) return false; - errcode = REG_GET_FIELD(entry->regs[MCA_REG_IDX_STATUS], MCMP1_STATUST0, ErrorCode); + if (!(adev->flags & AMD_IS_APU) && smu->smc_fw_version >= 0x00555600) { + errcode = MCA_REG__SYND__ERRORINFORMATION(entry->regs[MCA_REG_IDX_SYND]); + errcode &= 0xff; + } else { + errcode = REG_GET_FIELD(entry->regs[MCA_REG_IDX_STATUS], MCMP1_STATUST0, ErrorCode); + } + return mca_smu_check_error_code(adev, mca_ras, errcode); } -- cgit v1.2.3 From 0e8af20517197934cc04f8e361c6bbe198c327fd Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Mon, 20 Nov 2023 10:43:02 +0800 Subject: drm/amdgpu: Update fw version for boot time error query Boot time error query is not available until fw a10109 Signed-off-by: Hawking Zhang Reviewed-by: Stanley Yang Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index 3cf4684d0d3f..5f46877f78cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -821,7 +821,7 @@ static int psp_v13_0_query_boot_status(struct psp_context *psp) if (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6)) return 0; - if (RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_59) < 0x00a10007) + if (RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_59) < 0x00a10109) return 0; for_each_inst(i, inst_mask) { -- cgit v1.2.3 From dbf3850d12baf3ba8a80c302f538d1b01940aef7 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Mon, 4 Dec 2023 10:44:32 +0800 Subject: drm/amdgpu: optimize the printing order of error data sort error data list to optimize the printing order. Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index a3dc68e98910..63fb4cd85e53 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "amdgpu.h" #include "amdgpu_ras.h" @@ -3665,6 +3666,21 @@ static struct ras_err_node *amdgpu_ras_error_node_new(void) return err_node; } +static int ras_err_info_cmp(void *priv, const struct list_head *a, const struct list_head *b) +{ + struct ras_err_node *nodea = container_of(a, struct ras_err_node, node); + struct ras_err_node *nodeb = container_of(b, struct ras_err_node, node); + struct amdgpu_smuio_mcm_config_info *infoa = &nodea->err_info.mcm_info; + struct amdgpu_smuio_mcm_config_info *infob = &nodeb->err_info.mcm_info; + + if (unlikely(infoa->socket_id != infob->socket_id)) + return infoa->socket_id - infob->socket_id; + else + return infoa->die_id - infob->die_id; + + return 0; +} + static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_data, struct amdgpu_smuio_mcm_config_info *mcm_info) { @@ -3682,6 +3698,7 @@ static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_d err_data->err_list_count++; list_add_tail(&err_node->node, &err_data->err_node_list); + list_sort(NULL, &err_data->err_node_list, ras_err_info_cmp); return &err_node->err_info; } -- cgit v1.2.3 From 5b750b22530fe53bf7fd6a30baacd53ada26911b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 30 Nov 2023 17:34:07 -0500 Subject: drm/amd/display: Increase frame warning limit with KASAN or KCSAN in dml Does the same thing as: commit 6740ec97bcdb ("drm/amd/display: Increase frame warning limit with KASAN or KCSAN in dml2") Reviewed-by: Harry Wentland Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202311302107.hUDXVyWT-lkp@intel.com/ Fixes: 67e38874b85b ("drm/amd/display: Increase num voltage states to 40") Signed-off-by: Alex Deucher Cc: Alvin Lee Cc: Hamza Mahfooz Cc: Samson Tam Cc: Harry Wentland --- drivers/gpu/drm/amd/display/dc/dml/Makefile | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index ea7d60f9a9b4..6042a5a6a44f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -61,8 +61,12 @@ endif endif ifneq ($(CONFIG_FRAME_WARN),0) +ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y) +frame_warn_flag := -Wframe-larger-than=3072 +else frame_warn_flag := -Wframe-larger-than=2048 endif +endif CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_ccflags) -- cgit v1.2.3 From 6fce23a4d8c5f93bf80b7f122449fbb97f1e40dd Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Wed, 29 Nov 2023 18:06:55 +0530 Subject: drm/amdgpu: Restrict extended wait to PSP v13.0.6 Only PSPv13.0.6 SOCs take a longer time to reach steady state. Other PSPv13 based SOCs don't need extended wait. Also, reduce PSPv13.0.6 wait time. Cc: stable@vger.kernel.org Fixes: fc5988907156 ("drm/amdgpu: update retry times for psp vmbx wait") Fixes: d8c1925ba8cd ("drm/amdgpu: update retry times for psp BL wait") Link: https://lore.kernel.org/amd-gfx/34dd4c66-f7bf-44aa-af8f-c82889dd652c@amd.com/ Signed-off-by: Lijo Lazar Reviewed-by: Asad Kamal Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index 5f46877f78cf..df1844d0800f 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -60,7 +60,7 @@ MODULE_FIRMWARE("amdgpu/psp_14_0_0_ta.bin"); #define GFX_CMD_USB_PD_USE_LFB 0x480 /* Retry times for vmbx ready wait */ -#define PSP_VMBX_POLLING_LIMIT 20000 +#define PSP_VMBX_POLLING_LIMIT 3000 /* VBIOS gfl defines */ #define MBOX_READY_MASK 0x80000000 @@ -161,14 +161,18 @@ static int psp_v13_0_wait_for_vmbx_ready(struct psp_context *psp) static int psp_v13_0_wait_for_bootloader(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; - int retry_loop, ret; + int retry_loop, retry_cnt, ret; + retry_cnt = + (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6)) ? + PSP_VMBX_POLLING_LIMIT : + 10; /* Wait for bootloader to signify that it is ready having bit 31 of * C2PMSG_35 set to 1. All other bits are expected to be cleared. * If there is an error in processing command, bits[7:0] will be set. * This is applicable for PSP v13.0.6 and newer. */ - for (retry_loop = 0; retry_loop < PSP_VMBX_POLLING_LIMIT; retry_loop++) { + for (retry_loop = 0; retry_loop < retry_cnt; retry_loop++) { ret = psp_wait_for( psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35), 0x80000000, 0xffffffff, false); -- cgit v1.2.3 From 81577503efb49f4ad76af22f9941d72900ef4aab Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Wed, 29 Nov 2023 12:37:34 +0530 Subject: drm/amdgpu: Add NULL checks for function pointers Check if function is implemented before making the call. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc15.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index c82776e5e9aa..edd6344d1e2b 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1423,9 +1423,11 @@ static void soc15_common_get_clockgating_state(void *handle, u64 *flags) if (amdgpu_sriov_vf(adev)) *flags = 0; - adev->nbio.funcs->get_clockgating_state(adev, flags); + if (adev->nbio.funcs && adev->nbio.funcs->get_clockgating_state) + adev->nbio.funcs->get_clockgating_state(adev, flags); - adev->hdp.funcs->get_clock_gating_state(adev, flags); + if (adev->hdp.funcs && adev->hdp.funcs->get_clock_gating_state) + adev->hdp.funcs->get_clock_gating_state(adev, flags); if (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 2)) { /* AMD_CG_SUPPORT_DRM_MGCG */ @@ -1440,9 +1442,11 @@ static void soc15_common_get_clockgating_state(void *handle, u64 *flags) } /* AMD_CG_SUPPORT_ROM_MGCG */ - adev->smuio.funcs->get_clock_gating_state(adev, flags); + if (adev->smuio.funcs && adev->smuio.funcs->get_clock_gating_state) + adev->smuio.funcs->get_clock_gating_state(adev, flags); - adev->df.funcs->get_clockgating_state(adev, flags); + if (adev->df.funcs && adev->df.funcs->get_clockgating_state) + adev->df.funcs->get_clockgating_state(adev, flags); } static int soc15_common_set_powergating_state(void *handle, -- cgit v1.2.3 From 555e39f0270b1a1c51224044be9922b4c3a4c27f Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Thu, 30 Nov 2023 15:58:21 +0530 Subject: drm/amdgpu: Update HDP 4.4.2 clock gating flags HDP 4.4.2 clockgating is enabled by default, update the flags accordingly. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c index 49e934975719..4db6bb73ead4 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c @@ -129,6 +129,11 @@ static void hdp_v4_0_get_clockgating_state(struct amdgpu_device *adev, { int data; + if (amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 2)) { + /* Default enabled */ + *flags |= AMD_CG_SUPPORT_HDP_MGCG; + return; + } /* AMD_CG_SUPPORT_HDP_LS */ data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_LS)); if (data & HDP_MEM_POWER_LS__LS_ENABLE_MASK) -- cgit v1.2.3 From 27b024a88acba17c8e3a71ff4fd425064851e3b7 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Thu, 30 Nov 2023 16:35:58 +0530 Subject: drm/amdgpu: Avoid querying DRM MGCG status MP0 v13.0.6 SOCs don't support DRM MGCG. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc15.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index edd6344d1e2b..51342809af03 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1429,7 +1429,8 @@ static void soc15_common_get_clockgating_state(void *handle, u64 *flags) if (adev->hdp.funcs && adev->hdp.funcs->get_clock_gating_state) adev->hdp.funcs->get_clock_gating_state(adev, flags); - if (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 2)) { + if ((amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 2)) && + (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6))) { /* AMD_CG_SUPPORT_DRM_MGCG */ data = RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_MISC_CGTT_CTRL0)); if (!(data & 0x01000000)) -- cgit v1.2.3 From dab96d8b61aab1a4f99d0b86964a6c40e7bb1756 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 29 Nov 2023 15:44:25 -0500 Subject: drm/amdgpu: fix buffer funcs setting order on suspend We need to disable this after the last eviction call, but before we disable the SDMA IP. Fixes: b70438004a14 ("drm/amdgpu: move buffer funcs setting up a level") Link: https://lore.kernel.org/r/87edgv4x3i.fsf@vps.thesusis.net Reviewed-by: Luben Tuikov Tested-by: Phillip Susi Signed-off-by: Alex Deucher Cc: Phillip Susi Cc: Luben Tuikov --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 1cc1ae2743c1..1f64d8cbb14d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4527,6 +4527,8 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) if (r) return r; + amdgpu_ttm_set_buffer_funcs_status(adev, false); + amdgpu_fence_driver_hw_fini(adev); amdgpu_device_ip_suspend_phase2(adev); -- cgit v1.2.3 From 73bf1c9ae6c054c53b8e84452c5e46f86dd28246 Mon Sep 17 00:00:00 2001 From: Xiang Yang Date: Sat, 12 Aug 2023 14:27:48 +0800 Subject: drm/exynos: fix a potential error pointer dereference Smatch reports the warning below: drivers/gpu/drm/exynos/exynos_hdmi.c:1864 hdmi_bind() error: 'crtc' dereferencing possible ERR_PTR() The return value of exynos_drm_crtc_get_by_type maybe ERR_PTR(-ENODEV), which can not be used directly. Fix this by checking the return value before using it. Signed-off-by: Xiang Yang Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_hdmi.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c index f3aaa4ea3e68..dd9903eab563 100644 --- a/drivers/gpu/drm/exynos/exynos_hdmi.c +++ b/drivers/gpu/drm/exynos/exynos_hdmi.c @@ -1861,6 +1861,8 @@ static int hdmi_bind(struct device *dev, struct device *master, void *data) return ret; crtc = exynos_drm_crtc_get_by_type(drm_dev, EXYNOS_DISPLAY_TYPE_HDMI); + if (IS_ERR(crtc)) + return PTR_ERR(crtc); crtc->pipe_clk = &hdata->phy_clk; ret = hdmi_create_connector(encoder); -- cgit v1.2.3 From 8d1b7809684c688005706125b804e1f9792d2b1b Mon Sep 17 00:00:00 2001 From: Inki Dae Date: Wed, 1 Nov 2023 18:36:51 +0900 Subject: drm/exynos: fix a wrong error checking Fix a wrong error checking in exynos_drm_dma.c module. In the exynos_drm_register_dma function, both arm_iommu_create_mapping() and iommu_get_domain_for_dev() functions are expected to return NULL as an error. However, the error checking is performed using the statement if(IS_ERR(mapping)), which doesn't provide a suitable error value. So check if 'mapping' is NULL, and if it is, return -ENODEV. This issue[1] was reported by Dan. Changelog v1: - fix build warning. [1] https://lore.kernel.org/all/33e52277-1349-472b-a55b-ab5c3462bfcf@moroto.mountain/ Reported-by : Dan Carpenter Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_dma.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/exynos/exynos_drm_dma.c b/drivers/gpu/drm/exynos/exynos_drm_dma.c index a971590b8132..e2c7373f20c6 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_dma.c +++ b/drivers/gpu/drm/exynos/exynos_drm_dma.c @@ -107,18 +107,16 @@ int exynos_drm_register_dma(struct drm_device *drm, struct device *dev, return 0; if (!priv->mapping) { - void *mapping; + void *mapping = NULL; if (IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)) mapping = arm_iommu_create_mapping(&platform_bus_type, EXYNOS_DEV_ADDR_START, EXYNOS_DEV_ADDR_SIZE); else if (IS_ENABLED(CONFIG_IOMMU_DMA)) mapping = iommu_get_domain_for_dev(priv->dma_dev); - else - mapping = ERR_PTR(-ENODEV); - if (IS_ERR(mapping)) - return PTR_ERR(mapping); + if (!mapping) + return -ENODEV; priv->mapping = mapping; } -- cgit v1.2.3 From dcdf1bbe82f4b2a301a3692a0b1942c3fda70644 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Fri, 1 Dec 2023 12:00:32 +0200 Subject: drm/i915: handle uncore spinlock when not available MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The uncore code may not always be available (e.g. when we build the display code with Xe), so we can't always rely on having the uncore's spinlock. To handle this, split the spin_lock/unlock_irqsave/restore() into spin_lock/unlock() followed by a call to local_irq_save/restore() and create wrapper functions for locking and unlocking the uncore's spinlock. In these functions, we have a condition check and only actually try to lock/unlock the spinlock when I915 is defined, and thus uncore is available. This keeps the ifdefs contained in these new functions and all such logic inside the display code. Cc: Tvrtko Ursulin Cc: Jani Nikula Cc: Ville Syrjala Cc: Rodrigo Vivi Signed-off-by: Luca Coelho Reviewed-by: Jouni Högander Signed-off-by: Jouni Högander Link: https://patchwork.freedesktop.org/patch/msgid/20231201100032.1367589-1-luciano.coelho@intel.com --- drivers/gpu/drm/i915/display/intel_vblank.c | 51 +++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 10 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_vblank.c b/drivers/gpu/drm/i915/display/intel_vblank.c index 2cec2abf9746..fe256bf7b485 100644 --- a/drivers/gpu/drm/i915/display/intel_vblank.c +++ b/drivers/gpu/drm/i915/display/intel_vblank.c @@ -265,6 +265,32 @@ int intel_crtc_scanline_to_hw(struct intel_crtc *crtc, int scanline) return (scanline + vtotal - crtc->scanline_offset) % vtotal; } +/* + * The uncore version of the spin lock functions is used to decide + * whether we need to lock the uncore lock or not. This is only + * needed in i915, not in Xe. + * + * This lock in i915 is needed because some old platforms (at least + * IVB and possibly HSW as well), which are not supported in Xe, need + * all register accesses to the same cacheline to be serialized, + * otherwise they may hang. + */ +static void intel_vblank_section_enter(struct drm_i915_private *i915) + __acquires(i915->uncore.lock) +{ +#ifdef I915 + spin_lock(&i915->uncore.lock); +#endif +} + +static void intel_vblank_section_exit(struct drm_i915_private *i915) + __releases(i915->uncore.lock) +{ +#ifdef I915 + spin_unlock(&i915->uncore.lock); +#endif +} + static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc, bool in_vblank_irq, int *vpos, int *hpos, @@ -302,11 +328,12 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc, } /* - * Lock uncore.lock, as we will do multiple timing critical raw - * register reads, potentially with preemption disabled, so the - * following code must not block on uncore.lock. + * Enter vblank critical section, as we will do multiple + * timing critical raw register reads, potentially with + * preemption disabled, so the following code must not block. */ - spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); + local_irq_save(irqflags); + intel_vblank_section_enter(dev_priv); /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */ @@ -374,7 +401,8 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc, /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */ - spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); + intel_vblank_section_exit(dev_priv); + local_irq_restore(irqflags); /* * While in vblank, position will be negative @@ -412,9 +440,13 @@ int intel_get_crtc_scanline(struct intel_crtc *crtc) unsigned long irqflags; int position; - spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); + local_irq_save(irqflags); + intel_vblank_section_enter(dev_priv); + position = __intel_get_crtc_scanline(crtc); - spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); + + intel_vblank_section_exit(dev_priv); + local_irq_restore(irqflags); return position; } @@ -537,7 +569,7 @@ void intel_crtc_update_active_timings(const struct intel_crtc_state *crtc_state, * Need to audit everything to make sure it's safe. */ spin_lock_irqsave(&i915->drm.vblank_time_lock, irqflags); - spin_lock(&i915->uncore.lock); + intel_vblank_section_enter(i915); drm_calc_timestamping_constants(&crtc->base, &adjusted_mode); @@ -546,7 +578,6 @@ void intel_crtc_update_active_timings(const struct intel_crtc_state *crtc_state, crtc->mode_flags = mode_flags; crtc->scanline_offset = intel_crtc_scanline_offset(crtc_state); - - spin_unlock(&i915->uncore.lock); + intel_vblank_section_exit(i915); spin_unlock_irqrestore(&i915->drm.vblank_time_lock, irqflags); } -- cgit v1.2.3 From 261200eb7030dc796f08c1ad778bd0b18b19451b Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 5 Dec 2023 14:15:45 +0200 Subject: drm/i915/rpm: add rpm_to_i915() helper around container_of() Reduce the duplication. Signed-off-by: Jani Nikula Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20231205121545.2338665-1-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_runtime_pm.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 8743153fad87..156cb1536b47 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -50,6 +50,11 @@ * present for a given platform. */ +static struct drm_i915_private *rpm_to_i915(struct intel_runtime_pm *rpm) +{ + return container_of(rpm, struct drm_i915_private, runtime_pm); +} + #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM) #include @@ -349,9 +354,7 @@ intel_runtime_pm_release(struct intel_runtime_pm *rpm, int wakelock) static intel_wakeref_t __intel_runtime_pm_get(struct intel_runtime_pm *rpm, bool wakelock) { - struct drm_i915_private *i915 = container_of(rpm, - struct drm_i915_private, - runtime_pm); + struct drm_i915_private *i915 = rpm_to_i915(rpm); int ret; ret = pm_runtime_get_sync(rpm->kdev); @@ -556,9 +559,7 @@ void intel_runtime_pm_put(struct intel_runtime_pm *rpm, intel_wakeref_t wref) */ void intel_runtime_pm_enable(struct intel_runtime_pm *rpm) { - struct drm_i915_private *i915 = container_of(rpm, - struct drm_i915_private, - runtime_pm); + struct drm_i915_private *i915 = rpm_to_i915(rpm); struct device *kdev = rpm->kdev; /* @@ -611,9 +612,7 @@ void intel_runtime_pm_enable(struct intel_runtime_pm *rpm) void intel_runtime_pm_disable(struct intel_runtime_pm *rpm) { - struct drm_i915_private *i915 = container_of(rpm, - struct drm_i915_private, - runtime_pm); + struct drm_i915_private *i915 = rpm_to_i915(rpm); struct device *kdev = rpm->kdev; /* Transfer rpm ownership back to core */ @@ -628,9 +627,7 @@ void intel_runtime_pm_disable(struct intel_runtime_pm *rpm) void intel_runtime_pm_driver_release(struct intel_runtime_pm *rpm) { - struct drm_i915_private *i915 = container_of(rpm, - struct drm_i915_private, - runtime_pm); + struct drm_i915_private *i915 = rpm_to_i915(rpm); int count = atomic_read(&rpm->wakeref_count); intel_wakeref_auto_fini(&rpm->userfault_wakeref); @@ -645,8 +642,7 @@ void intel_runtime_pm_driver_release(struct intel_runtime_pm *rpm) void intel_runtime_pm_init_early(struct intel_runtime_pm *rpm) { - struct drm_i915_private *i915 = - container_of(rpm, struct drm_i915_private, runtime_pm); + struct drm_i915_private *i915 = rpm_to_i915(rpm); struct pci_dev *pdev = to_pci_dev(i915->drm.dev); struct device *kdev = &pdev->dev; -- cgit v1.2.3 From 922181a52de923a2220998a26d84d94889dd6e97 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 5 Dec 2023 15:41:41 +0200 Subject: drm/i915: use intel_connector in intel_connector_debugfs_add() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prefer struct intel_connector over struct drm_connector. Signed-off-by: Jani Nikula Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231205134143.2427661-1-jani.nikula@intel.com --- .../gpu/drm/i915/display/intel_display_debugfs.c | 53 +++++++++++----------- 1 file changed, 26 insertions(+), 27 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c b/drivers/gpu/drm/i915/display/intel_display_debugfs.c index a7675a7339c3..32539dfd506e 100644 --- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c +++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c @@ -1566,59 +1566,58 @@ DEFINE_SHOW_ATTRIBUTE(intel_crtc_pipe); /** * intel_connector_debugfs_add - add i915 specific connector debugfs files - * @intel_connector: pointer to a registered drm_connector + * @connector: pointer to a registered intel_connector * * Cleanup will be done by drm_connector_unregister() through a call to * drm_debugfs_connector_remove(). */ -void intel_connector_debugfs_add(struct intel_connector *intel_connector) +void intel_connector_debugfs_add(struct intel_connector *connector) { - struct drm_connector *connector = &intel_connector->base; - struct dentry *root = connector->debugfs_entry; - struct drm_i915_private *dev_priv = to_i915(connector->dev); + struct drm_i915_private *i915 = to_i915(connector->base.dev); + struct dentry *root = connector->base.debugfs_entry; + int connector_type = connector->base.connector_type; /* The connector must have been registered beforehands. */ if (!root) return; - intel_drrs_connector_debugfs_add(intel_connector); - intel_psr_connector_debugfs_add(intel_connector); + intel_drrs_connector_debugfs_add(connector); + intel_psr_connector_debugfs_add(connector); - if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) + if (connector_type == DRM_MODE_CONNECTOR_eDP) debugfs_create_file("i915_panel_timings", S_IRUGO, root, - connector, &i915_panel_fops); + &connector->base, &i915_panel_fops); - if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort || - connector->connector_type == DRM_MODE_CONNECTOR_HDMIA || - connector->connector_type == DRM_MODE_CONNECTOR_HDMIB) { + if (connector_type == DRM_MODE_CONNECTOR_DisplayPort || + connector_type == DRM_MODE_CONNECTOR_HDMIA || + connector_type == DRM_MODE_CONNECTOR_HDMIB) { debugfs_create_file("i915_hdcp_sink_capability", S_IRUGO, root, - connector, &i915_hdcp_sink_capability_fops); + &connector->base, &i915_hdcp_sink_capability_fops); } - if (DISPLAY_VER(dev_priv) >= 11 && - ((connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort && - !to_intel_connector(connector)->mst_port) || - connector->connector_type == DRM_MODE_CONNECTOR_eDP)) { + if (DISPLAY_VER(i915) >= 11 && + ((connector_type == DRM_MODE_CONNECTOR_DisplayPort && !connector->mst_port) || + connector_type == DRM_MODE_CONNECTOR_eDP)) { debugfs_create_file("i915_dsc_fec_support", 0644, root, - connector, &i915_dsc_fec_support_fops); + &connector->base, &i915_dsc_fec_support_fops); debugfs_create_file("i915_dsc_bpc", 0644, root, - connector, &i915_dsc_bpc_fops); + &connector->base, &i915_dsc_bpc_fops); debugfs_create_file("i915_dsc_output_format", 0644, root, - connector, &i915_dsc_output_format_fops); + &connector->base, &i915_dsc_output_format_fops); debugfs_create_file("i915_dsc_fractional_bpp", 0644, root, - connector, &i915_dsc_fractional_bpp_fops); + &connector->base, &i915_dsc_fractional_bpp_fops); } - if (connector->connector_type == DRM_MODE_CONNECTOR_DSI || - connector->connector_type == DRM_MODE_CONNECTOR_eDP || - connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort || - connector->connector_type == DRM_MODE_CONNECTOR_HDMIA || - connector->connector_type == DRM_MODE_CONNECTOR_HDMIB) + if (connector_type == DRM_MODE_CONNECTOR_DSI || + connector_type == DRM_MODE_CONNECTOR_eDP || + connector_type == DRM_MODE_CONNECTOR_DisplayPort || + connector_type == DRM_MODE_CONNECTOR_HDMIA || + connector_type == DRM_MODE_CONNECTOR_HDMIB) debugfs_create_file("i915_lpsp_capability", 0444, root, - connector, &i915_lpsp_capability_fops); + &connector->base, &i915_lpsp_capability_fops); } /** -- cgit v1.2.3 From 77bdb83f0dbc8dd64c07bba08ecd2ac83030a508 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 5 Dec 2023 15:41:42 +0200 Subject: drm/i915: pass struct intel_connector to connector debugfs fops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prefer struct intel_connector over struct drm_connector, and unify the declarations in the fops. Signed-off-by: Jani Nikula Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231205134143.2427661-2-jani.nikula@intel.com --- .../gpu/drm/i915/display/intel_display_debugfs.c | 134 ++++++++++----------- 1 file changed, 66 insertions(+), 68 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c b/drivers/gpu/drm/i915/display/intel_display_debugfs.c index 32539dfd506e..85e5e21a859c 100644 --- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c +++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c @@ -1117,11 +1117,10 @@ void intel_display_debugfs_register(struct drm_i915_private *i915) static int i915_panel_show(struct seq_file *m, void *data) { - struct drm_connector *connector = m->private; - struct intel_dp *intel_dp = - intel_attached_dp(to_intel_connector(connector)); + struct intel_connector *connector = m->private; + struct intel_dp *intel_dp = intel_attached_dp(connector); - if (connector->status != connector_status_connected) + if (connector->base.status != connector_status_connected) return -ENODEV; seq_printf(m, "Panel power up delay: %d\n", @@ -1139,23 +1138,23 @@ DEFINE_SHOW_ATTRIBUTE(i915_panel); static int i915_hdcp_sink_capability_show(struct seq_file *m, void *data) { - struct drm_connector *connector = m->private; - struct drm_i915_private *i915 = to_i915(connector->dev); - struct intel_connector *intel_connector = to_intel_connector(connector); + struct intel_connector *connector = m->private; + struct drm_i915_private *i915 = to_i915(connector->base.dev); int ret; ret = drm_modeset_lock_single_interruptible(&i915->drm.mode_config.connection_mutex); if (ret) return ret; - if (!connector->encoder || connector->status != connector_status_connected) { + if (!connector->base.encoder || + connector->base.status != connector_status_connected) { ret = -ENODEV; goto out; } - seq_printf(m, "%s:%d HDCP version: ", connector->name, - connector->base.id); - intel_hdcp_info(m, intel_connector); + seq_printf(m, "%s:%d HDCP version: ", connector->base.name, + connector->base.base.id); + intel_hdcp_info(m, connector); out: drm_modeset_unlock(&i915->drm.mode_config.connection_mutex); @@ -1166,16 +1165,16 @@ DEFINE_SHOW_ATTRIBUTE(i915_hdcp_sink_capability); static int i915_lpsp_capability_show(struct seq_file *m, void *data) { - struct drm_connector *connector = m->private; - struct drm_i915_private *i915 = to_i915(connector->dev); - struct intel_encoder *encoder; + struct intel_connector *connector = m->private; + struct drm_i915_private *i915 = to_i915(connector->base.dev); + struct intel_encoder *encoder = intel_attached_encoder(connector); + int connector_type = connector->base.connector_type; bool lpsp_capable = false; - encoder = intel_attached_encoder(to_intel_connector(connector)); if (!encoder) return -ENODEV; - if (connector->status != connector_status_connected) + if (connector->base.status != connector_status_connected) return -ENODEV; if (DISPLAY_VER(i915) >= 13) @@ -1188,15 +1187,15 @@ static int i915_lpsp_capability_show(struct seq_file *m, void *data) */ lpsp_capable = encoder->port <= PORT_B; else if (DISPLAY_VER(i915) == 11) - lpsp_capable = (connector->connector_type == DRM_MODE_CONNECTOR_DSI || - connector->connector_type == DRM_MODE_CONNECTOR_eDP); + lpsp_capable = (connector_type == DRM_MODE_CONNECTOR_DSI || + connector_type == DRM_MODE_CONNECTOR_eDP); else if (IS_DISPLAY_VER(i915, 9, 10)) lpsp_capable = (encoder->port == PORT_A && - (connector->connector_type == DRM_MODE_CONNECTOR_DSI || - connector->connector_type == DRM_MODE_CONNECTOR_eDP || - connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort)); + (connector_type == DRM_MODE_CONNECTOR_DSI || + connector_type == DRM_MODE_CONNECTOR_eDP || + connector_type == DRM_MODE_CONNECTOR_DisplayPort)); else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) - lpsp_capable = connector->connector_type == DRM_MODE_CONNECTOR_eDP; + lpsp_capable = connector_type == DRM_MODE_CONNECTOR_eDP; seq_printf(m, "LPSP: %s\n", lpsp_capable ? "capable" : "incapable"); @@ -1206,7 +1205,7 @@ DEFINE_SHOW_ATTRIBUTE(i915_lpsp_capability); static int i915_dsc_fec_support_show(struct seq_file *m, void *data) { - struct intel_connector *connector = to_intel_connector(m->private); + struct intel_connector *connector = m->private; struct drm_i915_private *i915 = to_i915(connector->base.dev); struct drm_crtc *crtc; struct intel_dp *intel_dp; @@ -1276,13 +1275,13 @@ static ssize_t i915_dsc_fec_support_write(struct file *file, const char __user *ubuf, size_t len, loff_t *offp) { + struct seq_file *m = file->private_data; + struct intel_connector *connector = m->private; + struct drm_i915_private *i915 = to_i915(connector->base.dev); + struct intel_encoder *encoder = intel_attached_encoder(connector); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); bool dsc_enable = false; int ret; - struct drm_connector *connector = - ((struct seq_file *)file->private_data)->private; - struct intel_encoder *encoder = intel_attached_encoder(to_intel_connector(connector)); - struct drm_i915_private *i915 = to_i915(encoder->base.dev); - struct intel_dp *intel_dp = enc_to_intel_dp(encoder); if (len == 0) return 0; @@ -1320,22 +1319,22 @@ static const struct file_operations i915_dsc_fec_support_fops = { static int i915_dsc_bpc_show(struct seq_file *m, void *data) { - struct drm_connector *connector = m->private; - struct drm_device *dev = connector->dev; + struct intel_connector *connector = m->private; + struct drm_i915_private *i915 = to_i915(connector->base.dev); + struct intel_encoder *encoder = intel_attached_encoder(connector); struct drm_crtc *crtc; struct intel_crtc_state *crtc_state; - struct intel_encoder *encoder = intel_attached_encoder(to_intel_connector(connector)); int ret; if (!encoder) return -ENODEV; - ret = drm_modeset_lock_single_interruptible(&dev->mode_config.connection_mutex); + ret = drm_modeset_lock_single_interruptible(&i915->drm.mode_config.connection_mutex); if (ret) return ret; - crtc = connector->state->crtc; - if (connector->status != connector_status_connected || !crtc) { + crtc = connector->base.state->crtc; + if (connector->base.status != connector_status_connected || !crtc) { ret = -ENODEV; goto out; } @@ -1343,7 +1342,7 @@ static int i915_dsc_bpc_show(struct seq_file *m, void *data) crtc_state = to_intel_crtc_state(crtc->state); seq_printf(m, "Input_BPC: %d\n", crtc_state->dsc.config.bits_per_component); -out: drm_modeset_unlock(&dev->mode_config.connection_mutex); +out: drm_modeset_unlock(&i915->drm.mode_config.connection_mutex); return ret; } @@ -1352,9 +1351,9 @@ static ssize_t i915_dsc_bpc_write(struct file *file, const char __user *ubuf, size_t len, loff_t *offp) { - struct drm_connector *connector = - ((struct seq_file *)file->private_data)->private; - struct intel_encoder *encoder = intel_attached_encoder(to_intel_connector(connector)); + struct seq_file *m = file->private_data; + struct intel_connector *connector = m->private; + struct intel_encoder *encoder = intel_attached_encoder(connector); struct intel_dp *intel_dp = enc_to_intel_dp(encoder); int dsc_bpc = 0; int ret; @@ -1386,22 +1385,22 @@ static const struct file_operations i915_dsc_bpc_fops = { static int i915_dsc_output_format_show(struct seq_file *m, void *data) { - struct drm_connector *connector = m->private; - struct drm_device *dev = connector->dev; + struct intel_connector *connector = m->private; + struct drm_i915_private *i915 = to_i915(connector->base.dev); + struct intel_encoder *encoder = intel_attached_encoder(connector); struct drm_crtc *crtc; struct intel_crtc_state *crtc_state; - struct intel_encoder *encoder = intel_attached_encoder(to_intel_connector(connector)); int ret; if (!encoder) return -ENODEV; - ret = drm_modeset_lock_single_interruptible(&dev->mode_config.connection_mutex); + ret = drm_modeset_lock_single_interruptible(&i915->drm.mode_config.connection_mutex); if (ret) return ret; - crtc = connector->state->crtc; - if (connector->status != connector_status_connected || !crtc) { + crtc = connector->base.state->crtc; + if (connector->base.status != connector_status_connected || !crtc) { ret = -ENODEV; goto out; } @@ -1410,7 +1409,7 @@ static int i915_dsc_output_format_show(struct seq_file *m, void *data) seq_printf(m, "DSC_Output_Format: %s\n", intel_output_format_name(crtc_state->output_format)); -out: drm_modeset_unlock(&dev->mode_config.connection_mutex); +out: drm_modeset_unlock(&i915->drm.mode_config.connection_mutex); return ret; } @@ -1419,9 +1418,9 @@ static ssize_t i915_dsc_output_format_write(struct file *file, const char __user *ubuf, size_t len, loff_t *offp) { - struct drm_connector *connector = - ((struct seq_file *)file->private_data)->private; - struct intel_encoder *encoder = intel_attached_encoder(to_intel_connector(connector)); + struct seq_file *m = file->private_data; + struct intel_connector *connector = m->private; + struct intel_encoder *encoder = intel_attached_encoder(connector); struct intel_dp *intel_dp = enc_to_intel_dp(encoder); int dsc_output_format = 0; int ret; @@ -1453,33 +1452,32 @@ static const struct file_operations i915_dsc_output_format_fops = { static int i915_dsc_fractional_bpp_show(struct seq_file *m, void *data) { - struct drm_connector *connector = m->private; - struct drm_device *dev = connector->dev; + struct intel_connector *connector = m->private; + struct drm_i915_private *i915 = to_i915(connector->base.dev); + struct intel_encoder *encoder = intel_attached_encoder(connector); struct drm_crtc *crtc; struct intel_dp *intel_dp; - struct intel_connector *intel_connector = to_intel_connector(connector); - struct intel_encoder *encoder = intel_attached_encoder(intel_connector); int ret; if (!encoder) return -ENODEV; - ret = drm_modeset_lock_single_interruptible(&dev->mode_config.connection_mutex); + ret = drm_modeset_lock_single_interruptible(&i915->drm.mode_config.connection_mutex); if (ret) return ret; - crtc = connector->state->crtc; - if (connector->status != connector_status_connected || !crtc) { + crtc = connector->base.state->crtc; + if (connector->base.status != connector_status_connected || !crtc) { ret = -ENODEV; goto out; } - intel_dp = intel_attached_dp(intel_connector); + intel_dp = intel_attached_dp(connector); seq_printf(m, "Force_DSC_Fractional_BPP_Enable: %s\n", str_yes_no(intel_dp->force_dsc_fractional_bpp_en)); out: - drm_modeset_unlock(&dev->mode_config.connection_mutex); + drm_modeset_unlock(&i915->drm.mode_config.connection_mutex); return ret; } @@ -1488,10 +1486,10 @@ static ssize_t i915_dsc_fractional_bpp_write(struct file *file, const char __user *ubuf, size_t len, loff_t *offp) { - struct drm_connector *connector = - ((struct seq_file *)file->private_data)->private; - struct intel_encoder *encoder = intel_attached_encoder(to_intel_connector(connector)); - struct drm_i915_private *i915 = to_i915(encoder->base.dev); + struct seq_file *m = file->private_data; + struct intel_connector *connector = m->private; + struct intel_encoder *encoder = intel_attached_encoder(connector); + struct drm_i915_private *i915 = to_i915(connector->base.dev); struct intel_dp *intel_dp = enc_to_intel_dp(encoder); bool dsc_fractional_bpp_enable = false; int ret; @@ -1586,29 +1584,29 @@ void intel_connector_debugfs_add(struct intel_connector *connector) if (connector_type == DRM_MODE_CONNECTOR_eDP) debugfs_create_file("i915_panel_timings", S_IRUGO, root, - &connector->base, &i915_panel_fops); + connector, &i915_panel_fops); if (connector_type == DRM_MODE_CONNECTOR_DisplayPort || connector_type == DRM_MODE_CONNECTOR_HDMIA || connector_type == DRM_MODE_CONNECTOR_HDMIB) { debugfs_create_file("i915_hdcp_sink_capability", S_IRUGO, root, - &connector->base, &i915_hdcp_sink_capability_fops); + connector, &i915_hdcp_sink_capability_fops); } if (DISPLAY_VER(i915) >= 11 && ((connector_type == DRM_MODE_CONNECTOR_DisplayPort && !connector->mst_port) || connector_type == DRM_MODE_CONNECTOR_eDP)) { debugfs_create_file("i915_dsc_fec_support", 0644, root, - &connector->base, &i915_dsc_fec_support_fops); + connector, &i915_dsc_fec_support_fops); debugfs_create_file("i915_dsc_bpc", 0644, root, - &connector->base, &i915_dsc_bpc_fops); + connector, &i915_dsc_bpc_fops); debugfs_create_file("i915_dsc_output_format", 0644, root, - &connector->base, &i915_dsc_output_format_fops); + connector, &i915_dsc_output_format_fops); debugfs_create_file("i915_dsc_fractional_bpp", 0644, root, - &connector->base, &i915_dsc_fractional_bpp_fops); + connector, &i915_dsc_fractional_bpp_fops); } if (connector_type == DRM_MODE_CONNECTOR_DSI || @@ -1617,7 +1615,7 @@ void intel_connector_debugfs_add(struct intel_connector *connector) connector_type == DRM_MODE_CONNECTOR_HDMIA || connector_type == DRM_MODE_CONNECTOR_HDMIB) debugfs_create_file("i915_lpsp_capability", 0444, root, - &connector->base, &i915_lpsp_capability_fops); + connector, &i915_lpsp_capability_fops); } /** -- cgit v1.2.3 From f270b7087dc8369d21018541157a270a023e7f21 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 5 Dec 2023 15:41:43 +0200 Subject: drm/i915: use octal permissions in display debugfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Octal permissions are preferred over the symbolics ones. Signed-off-by: Jani Nikula Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231205134143.2427661-3-jani.nikula@intel.com --- drivers/gpu/drm/i915/display/intel_display_debugfs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c b/drivers/gpu/drm/i915/display/intel_display_debugfs.c index 85e5e21a859c..0cb819fc2a0c 100644 --- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c +++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c @@ -1096,7 +1096,7 @@ void intel_display_debugfs_register(struct drm_i915_private *i915) for (i = 0; i < ARRAY_SIZE(intel_display_debugfs_files); i++) { debugfs_create_file(intel_display_debugfs_files[i].name, - S_IRUGO | S_IWUSR, + 0644, minor->debugfs_root, to_i915(minor->dev), intel_display_debugfs_files[i].fops); @@ -1583,13 +1583,13 @@ void intel_connector_debugfs_add(struct intel_connector *connector) intel_psr_connector_debugfs_add(connector); if (connector_type == DRM_MODE_CONNECTOR_eDP) - debugfs_create_file("i915_panel_timings", S_IRUGO, root, + debugfs_create_file("i915_panel_timings", 0444, root, connector, &i915_panel_fops); if (connector_type == DRM_MODE_CONNECTOR_DisplayPort || connector_type == DRM_MODE_CONNECTOR_HDMIA || connector_type == DRM_MODE_CONNECTOR_HDMIB) { - debugfs_create_file("i915_hdcp_sink_capability", S_IRUGO, root, + debugfs_create_file("i915_hdcp_sink_capability", 0444, root, connector, &i915_hdcp_sink_capability_fops); } -- cgit v1.2.3 From 7054b551de18e9875fbdf8d4f3baade428353545 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 25 Oct 2023 12:11:31 +0200 Subject: drm/i915/display: Use i915_gem_object_get_dma_address to get dma address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Works better for xe like that. obj is no longer const. Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20231204134946.16219-1-maarten.lankhorst@linux.intel.com Reviewed-by: Jouni Högander --- drivers/gpu/drm/i915/display/intel_cursor.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_cursor.c b/drivers/gpu/drm/i915/display/intel_cursor.c index a515ae2831f8..926e2de00eb5 100644 --- a/drivers/gpu/drm/i915/display/intel_cursor.c +++ b/drivers/gpu/drm/i915/display/intel_cursor.c @@ -24,6 +24,8 @@ #include "intel_psr_regs.h" #include "skl_watermark.h" +#include "gem/i915_gem_object.h" + /* Cursor formats */ static const u32 intel_cursor_formats[] = { DRM_FORMAT_ARGB8888, @@ -34,11 +36,11 @@ static u32 intel_cursor_base(const struct intel_plane_state *plane_state) struct drm_i915_private *dev_priv = to_i915(plane_state->uapi.plane->dev); const struct drm_framebuffer *fb = plane_state->hw.fb; - const struct drm_i915_gem_object *obj = intel_fb_obj(fb); + struct drm_i915_gem_object *obj = intel_fb_obj(fb); u32 base; if (DISPLAY_INFO(dev_priv)->cursor_needs_physical) - base = sg_dma_address(obj->mm.pages->sgl); + base = i915_gem_object_get_dma_address(obj, 0); else base = intel_plane_ggtt_offset(plane_state); -- cgit v1.2.3 From 0647ece3819b018cb62a71c3bcb7c2c3243e78ac Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 1 Dec 2023 12:21:08 +0000 Subject: drm/i915/selftests: Fix engine reset count storage for multi-tile Engine->id namespace is per-tile so struct igt_live_test->reset_engine[] needs to be two-dimensional so engine reset counts from all tiles can be stored with no aliasing. With aliasing, if we had a real multi-tile platform, the reset counts would be incorrect for same engine instance on different tiles. Signed-off-by: Tvrtko Ursulin Fixes: 0c29efa23f5c ("drm/i915/selftests: Consider multi-gt instead of to_gt()") Reported-by: Alan Previn Teres Alexis Cc: Tejas Upadhyay Cc: Andi Shyti Cc: Daniele Ceraolo Spurio Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20231201122109.729006-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/selftests/igt_live_test.c | 9 +++++---- drivers/gpu/drm/i915/selftests/igt_live_test.h | 3 ++- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c index 4ddc6d902752..7d41874a49c5 100644 --- a/drivers/gpu/drm/i915/selftests/igt_live_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c @@ -37,8 +37,9 @@ int igt_live_test_begin(struct igt_live_test *t, } for_each_engine(engine, gt, id) - t->reset_engine[id] = - i915_reset_engine_count(&i915->gpu_error, engine); + t->reset_engine[i][id] = + i915_reset_engine_count(&i915->gpu_error, + engine); } t->reset_global = i915_reset_count(&i915->gpu_error); @@ -66,14 +67,14 @@ int igt_live_test_end(struct igt_live_test *t) for_each_gt(gt, i915, i) { for_each_engine(engine, gt, id) { - if (t->reset_engine[id] == + if (t->reset_engine[i][id] == i915_reset_engine_count(&i915->gpu_error, engine)) continue; gt_err(gt, "%s(%s): engine '%s' was reset %d times!\n", t->func, t->name, engine->name, i915_reset_engine_count(&i915->gpu_error, engine) - - t->reset_engine[id]); + t->reset_engine[i][id]); return -EIO; } } diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.h b/drivers/gpu/drm/i915/selftests/igt_live_test.h index 36ed42736c52..83e3ad430922 100644 --- a/drivers/gpu/drm/i915/selftests/igt_live_test.h +++ b/drivers/gpu/drm/i915/selftests/igt_live_test.h @@ -7,6 +7,7 @@ #ifndef IGT_LIVE_TEST_H #define IGT_LIVE_TEST_H +#include "gt/intel_gt_defines.h" /* for I915_MAX_GT */ #include "gt/intel_engine.h" /* for I915_NUM_ENGINES */ struct drm_i915_private; @@ -17,7 +18,7 @@ struct igt_live_test { const char *name; unsigned int reset_global; - unsigned int reset_engine[I915_NUM_ENGINES]; + unsigned int reset_engine[I915_MAX_GT][I915_NUM_ENGINES]; }; /* -- cgit v1.2.3 From cf9cb028ac56696ff879af1154c4b2f0b12701fd Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 1 Dec 2023 12:21:09 +0000 Subject: drm/i915: Use internal class when counting engine resets Commit 503579448db9 ("drm/i915/gsc: Mark internal GSC engine with reserved uabi class") made the GSC0 engine not have a valid uabi class and so broke the engine reset counting, which in turn was made class based in cb823ed9915b ("drm/i915/gt: Use intel_gt as the primary object for handling resets"). Despite the title and commit text of the latter is not mentioning it (and has left the storage array incorrectly sized), tracking by class, despite it adding aliasing in hypthotetical multi-tile systems, is handy for virtual engines which for instance do not have a valid engine->id. Therefore we keep that but just change it to use the internal class which is always valid. We also add a helper to increment the count, which aligns with the existing getter. What was broken without this fix were out of bounds reads every time a reset would happen on the GSC0 engine, or during selftests when storing and cross-checking the counts in igt_live_test_begin and igt_live_test_end. Signed-off-by: Tvrtko Ursulin Fixes: dfed6b58d54f ("drm/i915/gsc: Mark internal GSC engine with reserved uabi class") [tursulin: fixed Fixes tag] Reported-by: Alan Previn Teres Alexis Cc: Daniele Ceraolo Spurio Reviewed-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20231201122109.729006-2-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_reset.c | 2 +- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 5 +++-- drivers/gpu/drm/i915/i915_gpu_error.h | 12 ++++++++++-- 3 files changed, 14 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index d5ed904f355d..6801f8b95c53 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -1293,7 +1293,7 @@ int __intel_engine_reset_bh(struct intel_engine_cs *engine, const char *msg) if (msg) drm_notice(&engine->i915->drm, "Resetting %s for %s\n", engine->name, msg); - atomic_inc(&engine->i915->gpu_error.reset_engine_count[engine->uabi_class]); + i915_increase_reset_engine_count(&engine->i915->gpu_error, engine); ret = intel_gt_reset_engine(engine); if (ret) { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 04f8377fd7a3..58ea285c51d4 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -5003,7 +5003,8 @@ static void capture_error_state(struct intel_guc *guc, if (match) { intel_engine_set_hung_context(e, ce); engine_mask |= e->mask; - atomic_inc(&i915->gpu_error.reset_engine_count[e->uabi_class]); + i915_increase_reset_engine_count(&i915->gpu_error, + e); } } @@ -5015,7 +5016,7 @@ static void capture_error_state(struct intel_guc *guc, } else { intel_engine_set_hung_context(ce->engine, ce); engine_mask = ce->engine->mask; - atomic_inc(&i915->gpu_error.reset_engine_count[ce->engine->uabi_class]); + i915_increase_reset_engine_count(&i915->gpu_error, ce->engine); } with_intel_runtime_pm(&i915->runtime_pm, wakeref) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index 68c964d6720a..b75052e69a2b 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -16,6 +16,7 @@ #include "display/intel_display_device.h" #include "gt/intel_engine.h" +#include "gt/intel_engine_types.h" #include "gt/intel_gt_types.h" #include "gt/uc/intel_uc_fw.h" @@ -232,7 +233,7 @@ struct i915_gpu_error { atomic_t reset_count; /** Number of times an engine has been reset */ - atomic_t reset_engine_count[I915_NUM_ENGINES]; + atomic_t reset_engine_count[MAX_ENGINE_CLASS]; }; struct drm_i915_error_state_buf { @@ -255,7 +256,14 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error) static inline u32 i915_reset_engine_count(struct i915_gpu_error *error, const struct intel_engine_cs *engine) { - return atomic_read(&error->reset_engine_count[engine->uabi_class]); + return atomic_read(&error->reset_engine_count[engine->class]); +} + +static inline void +i915_increase_reset_engine_count(struct i915_gpu_error *error, + const struct intel_engine_cs *engine) +{ + atomic_inc(&error->reset_engine_count[engine->class]); } #define CORE_DUMP_FLAG_NONE 0x0 -- cgit v1.2.3 From 5a6c9a05e55cb2972396cc991af9d74c8c15029a Mon Sep 17 00:00:00 2001 From: Lingkai Dong Date: Wed, 6 Dec 2023 13:51:58 +0000 Subject: drm: Fix FD ownership check in drm_master_check_perm() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The DRM subsystem keeps a record of the owner of a DRM device file descriptor using thread group ID (TGID) instead of process ID (PID), to ensures all threads within the same userspace process are considered the owner. However, the DRM master ownership check compares the current thread's PID against the record, so the thread is incorrectly considered to be not the FD owner if the PID is not equal to the TGID. This causes DRM ioctls to be denied master privileges, even if the same thread that opened the FD performs an ioctl. Fix this by checking TGID. Fixes: 4230cea89cafb ("drm: Track clients by tgid and not tid") Signed-off-by: Lingkai Dong Reviewed-by: Christian König Reviewed-by: Tvrtko Ursulin Cc: # v6.4+ Link: https://patchwork.freedesktop.org/patch/msgid/PA6PR08MB107665920BE9A96658CDA04CE8884A@PA6PR08MB10766.eurprd08.prod.outlook.com Signed-off-by: Christian König --- drivers/gpu/drm/drm_auth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/drm_auth.c b/drivers/gpu/drm/drm_auth.c index 2ed2585ded37..6899b3dc1f12 100644 --- a/drivers/gpu/drm/drm_auth.c +++ b/drivers/gpu/drm/drm_auth.c @@ -236,7 +236,7 @@ static int drm_master_check_perm(struct drm_device *dev, struct drm_file *file_priv) { if (file_priv->was_master && - rcu_access_pointer(file_priv->pid) == task_pid(current)) + rcu_access_pointer(file_priv->pid) == task_tgid(current)) return 0; if (!capable(CAP_SYS_ADMIN)) -- cgit v1.2.3 From 10690b8a49bceafb1badf0ad91842a359e796d8b Mon Sep 17 00:00:00 2001 From: Jouni Högander Date: Thu, 7 Dec 2023 10:34:51 +0200 Subject: drm/i915/display: Add intel_fb_bo_framebuffer_fini MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Xe needs intel_fb_bo_framebuffer_fini for taking care of unpinning the fb and taking reference. In i915 this can be empty. Also move intel_frontbuffer_get to be done after intel_fb_bo_framebuffer_init to have reasonable sequences: intel_fb_bo_framebuffer_init intel_frontbuffer_get ... intel_frontbuffer_put intel_fb_bo_framebuffer_fini v2: Empty function instead of define Signed-off-by: Jouni Högander Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20231207083451.2184562-1-jouni.hogander@intel.com --- drivers/gpu/drm/i915/display/intel_fb.c | 32 ++++++++++++++++++------------ drivers/gpu/drm/i915/display/intel_fb_bo.c | 5 +++++ drivers/gpu/drm/i915/display/intel_fb_bo.h | 2 ++ 3 files changed, 26 insertions(+), 13 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index 32a7ee1c5b34..4f9d2e57a770 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -1889,6 +1889,8 @@ static void intel_user_framebuffer_destroy(struct drm_framebuffer *fb) intel_frontbuffer_put(intel_fb->frontbuffer); + intel_fb_bo_framebuffer_fini(intel_fb_obj(fb)); + kfree(intel_fb); } @@ -1989,13 +1991,15 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, int ret = -EINVAL; int i; - intel_fb->frontbuffer = intel_frontbuffer_get(obj); - if (!intel_fb->frontbuffer) - return -ENOMEM; - ret = intel_fb_bo_framebuffer_init(intel_fb, obj, mode_cmd); if (ret) + return ret; + + intel_fb->frontbuffer = intel_frontbuffer_get(obj); + if (!intel_fb->frontbuffer) { + ret = -ENOMEM; goto err; + } ret = -EINVAL; if (!drm_any_plane_has_format(&dev_priv->drm, @@ -2004,7 +2008,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, drm_dbg_kms(&dev_priv->drm, "unsupported pixel format %p4cc / modifier 0x%llx\n", &mode_cmd->pixel_format, mode_cmd->modifier[0]); - goto err; + goto err_frontbuffer_put; } max_stride = intel_fb_max_stride(dev_priv, mode_cmd->pixel_format, @@ -2015,7 +2019,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, mode_cmd->modifier[0] != DRM_FORMAT_MOD_LINEAR ? "tiled" : "linear", mode_cmd->pitches[0], max_stride); - goto err; + goto err_frontbuffer_put; } /* FIXME need to adjust LINOFF/TILEOFF accordingly. */ @@ -2023,7 +2027,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, drm_dbg_kms(&dev_priv->drm, "plane 0 offset (0x%08x) must be 0\n", mode_cmd->offsets[0]); - goto err; + goto err_frontbuffer_put; } drm_helper_mode_fill_fb_struct(&dev_priv->drm, fb, mode_cmd); @@ -2034,7 +2038,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, if (mode_cmd->handles[i] != mode_cmd->handles[0]) { drm_dbg_kms(&dev_priv->drm, "bad plane %d handle\n", i); - goto err; + goto err_frontbuffer_put; } stride_alignment = intel_fb_stride_alignment(fb, i); @@ -2042,7 +2046,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, drm_dbg_kms(&dev_priv->drm, "plane %d pitch (%d) must be at least %u byte aligned\n", i, fb->pitches[i], stride_alignment); - goto err; + goto err_frontbuffer_put; } if (intel_fb_is_gen12_ccs_aux_plane(fb, i)) { @@ -2053,7 +2057,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, "ccs aux plane %d pitch (%d) must be %d\n", i, fb->pitches[i], ccs_aux_stride); - goto err; + goto err_frontbuffer_put; } } @@ -2062,7 +2066,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, ret = intel_fill_fb_info(dev_priv, intel_fb); if (ret) - goto err; + goto err_frontbuffer_put; if (intel_fb_uses_dpt(fb)) { struct i915_address_space *vm; @@ -2071,7 +2075,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, if (IS_ERR(vm)) { drm_dbg_kms(&dev_priv->drm, "failed to create DPT\n"); ret = PTR_ERR(vm); - goto err; + goto err_frontbuffer_put; } intel_fb->dpt_vm = vm; @@ -2088,8 +2092,10 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, err_free_dpt: if (intel_fb_uses_dpt(fb)) intel_dpt_destroy(intel_fb->dpt_vm); -err: +err_frontbuffer_put: intel_frontbuffer_put(intel_fb->frontbuffer); +err: + intel_fb_bo_framebuffer_fini(obj); return ret; } diff --git a/drivers/gpu/drm/i915/display/intel_fb_bo.c b/drivers/gpu/drm/i915/display/intel_fb_bo.c index ce86eff7b226..4be09541e509 100644 --- a/drivers/gpu/drm/i915/display/intel_fb_bo.c +++ b/drivers/gpu/drm/i915/display/intel_fb_bo.c @@ -11,6 +11,11 @@ #include "intel_fb.h" #include "intel_fb_bo.h" +void intel_fb_bo_framebuffer_fini(struct drm_i915_gem_object *obj) +{ + /* Nothing to do for i915 */ +} + int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb, struct drm_i915_gem_object *obj, struct drm_mode_fb_cmd2 *mode_cmd) diff --git a/drivers/gpu/drm/i915/display/intel_fb_bo.h b/drivers/gpu/drm/i915/display/intel_fb_bo.h index dd06ceec8601..232bf898b013 100644 --- a/drivers/gpu/drm/i915/display/intel_fb_bo.h +++ b/drivers/gpu/drm/i915/display/intel_fb_bo.h @@ -12,6 +12,8 @@ struct drm_i915_gem_object; struct drm_i915_private; struct intel_framebuffer; +void intel_fb_bo_framebuffer_fini(struct drm_i915_gem_object *obj); + int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb, struct drm_i915_gem_object *obj, struct drm_mode_fb_cmd2 *mode_cmd); -- cgit v1.2.3 From 3e743b0fcb90551106c13837548079b91d661384 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Wed, 12 Apr 2023 14:46:33 +0800 Subject: drm/mediatek: Use devm_platform_ioremap_resource() Remove variable 'res' and convert platform_get_resource(), devm_ioremap_resource() to a single call to devm_platform_ioremap_resource(), as this is exactly what this function does. Signed-off-by: Yang Li Reviewed-by: Matthias Brugger Reviewed-by: Alexandre Mergnat Link: https://patchwork.kernel.org/project/dri-devel/patch/20230412064635.41315-1-yang.lee@linux.alibaba.com/ Link: https://patchwork.kernel.org/project/dri-devel/patch/20230412064635.41315-2-yang.lee@linux.alibaba.com/ Link: https://patchwork.kernel.org/project/dri-devel/patch/20230412064635.41315-3-yang.lee@linux.alibaba.com/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_cec.c | 4 +--- drivers/gpu/drm/mediatek/mtk_disp_aal.c | 4 +--- drivers/gpu/drm/mediatek/mtk_disp_ccorr.c | 4 +--- 3 files changed, 3 insertions(+), 9 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/mediatek/mtk_cec.c b/drivers/gpu/drm/mediatek/mtk_cec.c index f47f417d8ba6..8519e9bade36 100644 --- a/drivers/gpu/drm/mediatek/mtk_cec.c +++ b/drivers/gpu/drm/mediatek/mtk_cec.c @@ -185,7 +185,6 @@ static int mtk_cec_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct mtk_cec *cec; - struct resource *res; int ret; cec = devm_kzalloc(dev, sizeof(*cec), GFP_KERNEL); @@ -195,8 +194,7 @@ static int mtk_cec_probe(struct platform_device *pdev) platform_set_drvdata(pdev, cec); spin_lock_init(&cec->lock); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - cec->regs = devm_ioremap_resource(dev, res); + cec->regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(cec->regs)) { ret = PTR_ERR(cec->regs); dev_err(dev, "Failed to ioremap cec: %d\n", ret); diff --git a/drivers/gpu/drm/mediatek/mtk_disp_aal.c b/drivers/gpu/drm/mediatek/mtk_disp_aal.c index 2209159d8855..40fe403086c3 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_aal.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_aal.c @@ -168,7 +168,6 @@ static int mtk_disp_aal_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct mtk_disp_aal *priv; - struct resource *res; int ret; priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); @@ -181,8 +180,7 @@ static int mtk_disp_aal_probe(struct platform_device *pdev) return PTR_ERR(priv->clk); } - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - priv->regs = devm_ioremap_resource(dev, res); + priv->regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(priv->regs)) { dev_err(dev, "failed to ioremap aal\n"); return PTR_ERR(priv->regs); diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ccorr.c b/drivers/gpu/drm/mediatek/mtk_disp_ccorr.c index 4234ff7485e8..465cddce0d32 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_ccorr.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_ccorr.c @@ -153,7 +153,6 @@ static int mtk_disp_ccorr_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct mtk_disp_ccorr *priv; - struct resource *res; int ret; priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); @@ -166,8 +165,7 @@ static int mtk_disp_ccorr_probe(struct platform_device *pdev) return PTR_ERR(priv->clk); } - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - priv->regs = devm_ioremap_resource(dev, res); + priv->regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(priv->regs)) { dev_err(dev, "failed to ioremap ccorr\n"); return PTR_ERR(priv->regs); -- cgit v1.2.3 From df2a5f74e6eda50e1376a32bd60402a28ed51c8e Mon Sep 17 00:00:00 2001 From: Dmitrii Galantsev Date: Wed, 6 Dec 2023 02:04:52 -0600 Subject: drm/amd/pm: fix pp_*clk_od typo Fix pp_dpm_sclk_od and pp_dpm_mclk_od typos. Those were defined as pp_*clk_od but used as pp_dpm_*clk_od instead. This change removes the _dpm part. Fixes: 8cfd6a05750c ("drm/amd/pm: Hide irrelevant pm device attributes") Signed-off-by: Dmitrii Galantsev Reviewed-by: Lijo Lazar Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/amdgpu_pm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index e1497296afee..2cd995b0ceba 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -2238,10 +2238,10 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ } else if (DEVICE_ATTR_IS(xgmi_plpd_policy)) { if (amdgpu_dpm_get_xgmi_plpd_mode(adev, NULL) == XGMI_PLPD_NONE) *states = ATTR_STATE_UNSUPPORTED; - } else if (DEVICE_ATTR_IS(pp_dpm_mclk_od)) { + } else if (DEVICE_ATTR_IS(pp_mclk_od)) { if (amdgpu_dpm_get_mclk_od(adev) == -EOPNOTSUPP) *states = ATTR_STATE_UNSUPPORTED; - } else if (DEVICE_ATTR_IS(pp_dpm_sclk_od)) { + } else if (DEVICE_ATTR_IS(pp_sclk_od)) { if (amdgpu_dpm_get_sclk_od(adev) == -EOPNOTSUPP) *states = ATTR_STATE_UNSUPPORTED; } else if (DEVICE_ATTR_IS(apu_thermal_cap)) { -- cgit v1.2.3 From 0497ae6f8830816d9277a8d5c8d9bf5966f292e1 Mon Sep 17 00:00:00 2001 From: Hamza Mahfooz Date: Tue, 5 Dec 2023 14:55:04 -0500 Subject: drm/amd/display: fix hw rotated modes when PSR-SU is enabled We currently don't support dirty rectangles on hardware rotated modes. So, if a user is using hardware rotated modes with PSR-SU enabled, use PSR-SU FFU for all rotated planes (including cursor planes). Cc: stable@vger.kernel.org Fixes: 30ebe41582d1 ("drm/amd/display: add FB_DAMAGE_CLIPS support") Reported-by: Kai-Heng Feng Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/2952 Tested-by: Kai-Heng Feng Tested-by: Bin Li Reviewed-by: Mario Limonciello Signed-off-by: Hamza Mahfooz Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +++ drivers/gpu/drm/amd/display/dc/dc_hw_types.h | 1 + drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c | 12 ++++++++++-- drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c | 3 ++- 4 files changed, 16 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 6aabe8a3ffef..90868a54d4dd 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -5241,6 +5241,9 @@ static void fill_dc_dirty_rects(struct drm_plane *plane, if (plane->type == DRM_PLANE_TYPE_CURSOR) return; + if (new_plane_state->rotation != DRM_MODE_ROTATE_0) + goto ffu; + num_clips = drm_plane_get_damage_clips_count(new_plane_state); clips = drm_plane_get_damage_clips(new_plane_state); diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h index 9649934ea186..e2a3aa8812df 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h @@ -465,6 +465,7 @@ struct dc_cursor_mi_param { struct fixed31_32 v_scale_ratio; enum dc_rotation_angle rotation; bool mirror; + struct dc_stream_state *stream; }; /* IPP related types */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c index 139cf31d2e45..89c3bf0fe0c9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c @@ -1077,8 +1077,16 @@ void hubp2_cursor_set_position( if (src_y_offset < 0) src_y_offset = 0; /* Save necessary cursor info x, y position. w, h is saved in attribute func. */ - hubp->cur_rect.x = src_x_offset + param->viewport.x; - hubp->cur_rect.y = src_y_offset + param->viewport.y; + if (param->stream->link->psr_settings.psr_version >= DC_PSR_VERSION_SU_1 && + param->rotation != ROTATION_ANGLE_0) { + hubp->cur_rect.x = 0; + hubp->cur_rect.y = 0; + hubp->cur_rect.w = param->stream->timing.h_addressable; + hubp->cur_rect.h = param->stream->timing.v_addressable; + } else { + hubp->cur_rect.x = src_x_offset + param->viewport.x; + hubp->cur_rect.y = src_y_offset + param->viewport.y; + } } void hubp2_clk_cntl(struct hubp *hubp, bool enable) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index 2b8b8366538e..cdb903116eb7 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -3417,7 +3417,8 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx) .h_scale_ratio = pipe_ctx->plane_res.scl_data.ratios.horz, .v_scale_ratio = pipe_ctx->plane_res.scl_data.ratios.vert, .rotation = pipe_ctx->plane_state->rotation, - .mirror = pipe_ctx->plane_state->horizontal_mirror + .mirror = pipe_ctx->plane_state->horizontal_mirror, + .stream = pipe_ctx->stream, }; bool pipe_split_on = false; bool odm_combine_on = (pipe_ctx->next_odm_pipe != NULL) || -- cgit v1.2.3 From a2020be69490ee8778c59a02e7b270dfeecffbd4 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 6 Dec 2023 12:08:26 -0600 Subject: drm/amd/display: Restore guard against default backlight value < 1 nit Mark reports that brightness is not restored after Xorg dpms screen blank. This behavior was introduced by commit d9e865826c20 ("drm/amd/display: Simplify brightness initialization") which dropped the cached backlight value in display code, but also removed code for when the default value read back was less than 1 nit. Restore this code so that the backlight brightness is restored to the correct default value in this circumstance. Reported-by: Mark Herbert Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3031 Cc: stable@vger.kernel.org Cc: Camille Cho Cc: Krunoslav Kovac Cc: Hamza Mahfooz Fixes: d9e865826c20 ("drm/amd/display: Simplify brightness initialization") Acked-by: Alex Deucher Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c index ac0fa88b52a0..bf53a86ea817 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c @@ -287,8 +287,8 @@ bool set_default_brightness_aux(struct dc_link *link) if (link && link->dpcd_sink_ext_caps.bits.oled == 1) { if (!read_default_bl_aux(link, &default_backlight)) default_backlight = 150000; - // if > 5000, it might be wrong readback - if (default_backlight > 5000000) + // if < 1 nits or > 5000, it might be wrong readback + if (default_backlight < 1000 || default_backlight > 5000000) default_backlight = 150000; return edp_set_backlight_level_nits(link, true, -- cgit v1.2.3 From b2662d4cc4ce2db4bd55e00a528b1d35be82c6c3 Mon Sep 17 00:00:00 2001 From: shaoyunl Date: Thu, 23 Nov 2023 13:59:44 -0500 Subject: drm/amdgpu: SW part of MES event log enablement This is the generic SW part, prepare the event log buffer and dump it through debugfs Signed-off-by: shaoyunl Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h | 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 61 +++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 5 +++ 4 files changed, 70 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index c480192e33f5..424bed738296 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -2147,6 +2147,8 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) amdgpu_debugfs_firmware_init(adev); amdgpu_ta_if_debugfs_init(adev); + amdgpu_debugfs_mes_event_log_init(adev); + #if defined(CONFIG_DRM_AMD_DC) if (adev->dc_enabled) dtn_debugfs_init(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h index 371a6f0deb29..0425432d8659 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h @@ -32,3 +32,5 @@ void amdgpu_debugfs_fini(struct amdgpu_device *adev); void amdgpu_debugfs_fence_init(struct amdgpu_device *adev); void amdgpu_debugfs_firmware_init(struct amdgpu_device *adev); void amdgpu_debugfs_gem_init(struct amdgpu_device *adev); +void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev); + diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 9ddbf1494326..e544b823abf6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -98,6 +98,26 @@ static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev) return 0; } +static int amdgpu_mes_event_log_init(struct amdgpu_device *adev) +{ + int r; + + r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, + &adev->mes.event_log_gpu_obj, + &adev->mes.event_log_gpu_addr, + &adev->mes.event_log_cpu_addr); + if (r) { + dev_warn(adev->dev, "failed to create MES event log buffer (%d)", r); + return r; + } + + memset(adev->mes.event_log_cpu_addr, 0, PAGE_SIZE); + + return 0; + +} + static void amdgpu_mes_doorbell_free(struct amdgpu_device *adev) { bitmap_free(adev->mes.doorbell_bitmap); @@ -182,8 +202,14 @@ int amdgpu_mes_init(struct amdgpu_device *adev) if (r) goto error; + r = amdgpu_mes_event_log_init(adev); + if (r) + goto error_doorbell; + return 0; +error_doorbell: + amdgpu_mes_doorbell_free(adev); error: amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); @@ -199,6 +225,10 @@ error_ids: void amdgpu_mes_fini(struct amdgpu_device *adev) { + amdgpu_bo_free_kernel(&adev->mes.event_log_gpu_obj, + &adev->mes.event_log_gpu_addr, + &adev->mes.event_log_cpu_addr); + amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); amdgpu_device_wb_free(adev, adev->mes.read_val_offs); @@ -1479,3 +1509,34 @@ out: amdgpu_ucode_release(&adev->mes.fw[pipe]); return r; } + +#if defined(CONFIG_DEBUG_FS) + +static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused) +{ + struct amdgpu_device *adev = m->private; + uint32_t *mem = (uint32_t *)(adev->mes.event_log_cpu_addr); + + seq_hex_dump(m, "", DUMP_PREFIX_OFFSET, 32, 4, + mem, PAGE_SIZE, false); + + return 0; +} + + +DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_mes_event_log); + +#endif + +void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev) +{ + +#if defined(CONFIG_DEBUG_FS) + struct drm_minor *minor = adev_to_drm(adev)->primary; + struct dentry *root = minor->debugfs_root; + + debugfs_create_file("amdgpu_mes_event_log", 0444, root, + adev, &amdgpu_debugfs_mes_event_log_fops); + +#endif +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index a27b424ffe00..894b9b133000 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -133,6 +133,11 @@ struct amdgpu_mes { uint32_t num_mes_dbs; unsigned long *doorbell_bitmap; + /* MES event log buffer */ + struct amdgpu_bo *event_log_gpu_obj; + uint64_t event_log_gpu_addr; + void *event_log_cpu_addr; + /* ip specific functions */ const struct amdgpu_mes_funcs *funcs; }; -- cgit v1.2.3 From 47c4533543af4759b7668a06c1a2ce06cdc71173 Mon Sep 17 00:00:00 2001 From: shaoyunl Date: Thu, 23 Nov 2023 14:52:55 -0500 Subject: drm/amdgpu: Enable event log on MES 11 Enable event log through the HW specific FW API Signed-off-by: shaoyunl Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 2 ++ drivers/gpu/drm/amd/include/mes_v11_api_def.h | 1 + 2 files changed, 3 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 4dfec56e1b7f..26d71a22395d 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -408,6 +408,8 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.enable_reg_active_poll = 1; mes_set_hw_res_pkt.enable_level_process_quantum_check = 1; mes_set_hw_res_pkt.oversubscription_timer = 50; + mes_set_hw_res_pkt.enable_mes_event_int_logging = 1; + mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr; return mes_v11_0_submit_pkt_and_poll_completion(mes, &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), diff --git a/drivers/gpu/drm/amd/include/mes_v11_api_def.h b/drivers/gpu/drm/amd/include/mes_v11_api_def.h index b1db2b190187..1fbfd1aa987e 100644 --- a/drivers/gpu/drm/amd/include/mes_v11_api_def.h +++ b/drivers/gpu/drm/amd/include/mes_v11_api_def.h @@ -232,6 +232,7 @@ union MESAPI_SET_HW_RESOURCES { }; uint32_t oversubscription_timer; uint64_t doorbell_info; + uint64_t event_intr_history_gpu_mc_ptr; }; uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; -- cgit v1.2.3 From c5a761e2fe58631e1cd0249a729ce1934a11bcbe Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 23 Nov 2023 13:41:12 +0000 Subject: drm/mediatek: Stop using iommu_present() Remove the pointless check. If an IOMMU is providing transparent DMA API ops for any device(s) we care about, the DT code will have enforced the appropriate probe ordering already. And if the IOMMU *is* entirely absent, then attempting to go ahead with CMA and either suceeding or failing decisively seems more useful than deferring forever. Signed-off-by: Robin Murphy Reviewed-by: AngeloGioacchino Del Regno Link: https://patchwork.kernel.org/project/dri-devel/patch/fd1b62aa006556f29f37535814abfe41be63f7ae.1700746094.git.robin.murphy@arm.com/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_drm_drv.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index 2dfaa613276a..48581da51857 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -5,7 +5,6 @@ */ #include -#include #include #include #include @@ -608,9 +607,6 @@ static int mtk_drm_bind(struct device *dev) struct drm_device *drm; int ret, i; - if (!iommu_present(&platform_bus_type)) - return -EPROBE_DEFER; - pdev = of_find_device_by_node(private->mutex_node); if (!pdev) { dev_err(dev, "Waiting for disp-mutex device %pOF\n", -- cgit v1.2.3 From 1b2d98bdd7b7c64265732f5f0dace4c52c9ba8a8 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Thu, 7 Dec 2023 17:37:18 +0100 Subject: drm/msm/dp: Add DisplayPort controller for SM8650 The Qualcomm SM8650 platform comes with a DisplayPort controller with a different base offset than the previous SM8550 SoC, add support for this in the DisplayPort driver. Signed-off-by: Neil Armstrong Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/571132/ Link: https://lore.kernel.org/r/20231207-topic-sm8650-upstream-dp-v1-2-b762c06965bb@linaro.org Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/dp/dp_display.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index 20c5cf7986b2..fd4e62fe163f 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -168,6 +168,11 @@ static const struct msm_dp_desc sm8350_dp_descs[] = { {} }; +static const struct msm_dp_desc sm8650_dp_descs[] = { + { .io_start = 0x0af54000, .id = MSM_DP_CONTROLLER_0, .connector_type = DRM_MODE_CONNECTOR_DisplayPort }, + {} +}; + static const struct of_device_id dp_dt_match[] = { { .compatible = "qcom,sc7180-dp", .data = &sc7180_dp_descs }, { .compatible = "qcom,sc7280-dp", .data = &sc7280_dp_descs }, @@ -178,6 +183,7 @@ static const struct of_device_id dp_dt_match[] = { { .compatible = "qcom,sc8280xp-edp", .data = &sc8280xp_edp_descs }, { .compatible = "qcom,sdm845-dp", .data = &sc7180_dp_descs }, { .compatible = "qcom,sm8350-dp", .data = &sm8350_dp_descs }, + { .compatible = "qcom,sm8650-dp", .data = &sm8650_dp_descs }, {} }; -- cgit v1.2.3 From c4ac0c6c96f0985357c5866cf22fd642390409dd Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 6 Dec 2023 15:02:05 +0300 Subject: drm/msm/dp: Fix platform_get_irq() check The platform_get_irq() function returns negative error codes. It never returns zero. Fix the check accordingly. Fixes: 82c2a5751227 ("drm/msm/dp: tie dp_display_irq_handler() with dp driver") Signed-off-by: Dan Carpenter Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/570755/ Link: https://lore.kernel.org/r/c12bb69b-d676-4345-9712-48aab48f2b48@moroto.mountain Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/dp/dp_display.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index fd4e62fe163f..f8a9e8403c26 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -1168,9 +1168,9 @@ static int dp_display_request_irq(struct dp_display_private *dp) struct platform_device *pdev = dp->dp_display.pdev; dp->irq = platform_get_irq(pdev, 0); - if (!dp->irq) { + if (dp->irq < 0) { DRM_ERROR("failed to get irq\n"); - return -EINVAL; + return dp->irq; } rc = devm_request_irq(&pdev->dev, dp->irq, dp_display_irq_handler, -- cgit v1.2.3 From a08935fc859b22884dcb6b5126d3a986467101ce Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Wed, 4 Oct 2023 06:19:03 +0300 Subject: drm/msm/dpu: drop MSM_ENC_VBLANK support There are no in-kernel users of MSM_ENC_VBLANK wait type. Drop it together with the corresponding wait_for_vblank callback. Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/560701/ Link: https://lore.kernel.org/r/20231004031903.518223-1-dmitry.baryshkov@linaro.org Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 3 --- drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h | 1 - .../gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c | 28 ---------------------- .../gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c | 9 ++++--- drivers/gpu/drm/msm/msm_drv.h | 2 -- 5 files changed, 4 insertions(+), 39 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index aa1a1646b322..889e9bb42715 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -2400,9 +2400,6 @@ int dpu_encoder_wait_for_event(struct drm_encoder *drm_enc, case MSM_ENC_TX_COMPLETE: fn_wait = phys->ops.wait_for_tx_complete; break; - case MSM_ENC_VBLANK: - fn_wait = phys->ops.wait_for_vblank; - break; default: DPU_ERROR_ENC(dpu_enc, "unknown wait event %d\n", event); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h index b6b48e2c63ef..e2934a6702d1 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h @@ -104,7 +104,6 @@ struct dpu_encoder_phys_ops { int (*control_vblank_irq)(struct dpu_encoder_phys *enc, bool enable); int (*wait_for_commit_done)(struct dpu_encoder_phys *phys_enc); int (*wait_for_tx_complete)(struct dpu_encoder_phys *phys_enc); - int (*wait_for_vblank)(struct dpu_encoder_phys *phys_enc); void (*prepare_for_kickoff)(struct dpu_encoder_phys *phys_enc); void (*handle_post_kickoff)(struct dpu_encoder_phys *phys_enc); void (*trigger_start)(struct dpu_encoder_phys *phys_enc); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c index d24f45d1f654..76f1f66e41cd 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c @@ -675,33 +675,6 @@ static int dpu_encoder_phys_cmd_wait_for_commit_done( return _dpu_encoder_phys_cmd_wait_for_ctl_start(phys_enc); } -static int dpu_encoder_phys_cmd_wait_for_vblank( - struct dpu_encoder_phys *phys_enc) -{ - int rc = 0; - struct dpu_encoder_phys_cmd *cmd_enc; - struct dpu_encoder_wait_info wait_info; - - cmd_enc = to_dpu_encoder_phys_cmd(phys_enc); - - /* only required for master controller */ - if (!dpu_encoder_phys_cmd_is_master(phys_enc)) - return rc; - - wait_info.wq = &cmd_enc->pending_vblank_wq; - wait_info.atomic_cnt = &cmd_enc->pending_vblank_cnt; - wait_info.timeout_ms = KICKOFF_TIMEOUT_MS; - - atomic_inc(&cmd_enc->pending_vblank_cnt); - - rc = dpu_encoder_helper_wait_for_irq(phys_enc, - phys_enc->irq[INTR_IDX_RDPTR], - dpu_encoder_phys_cmd_te_rd_ptr_irq, - &wait_info); - - return rc; -} - static void dpu_encoder_phys_cmd_handle_post_kickoff( struct dpu_encoder_phys *phys_enc) { @@ -729,7 +702,6 @@ static void dpu_encoder_phys_cmd_init_ops( ops->wait_for_commit_done = dpu_encoder_phys_cmd_wait_for_commit_done; ops->prepare_for_kickoff = dpu_encoder_phys_cmd_prepare_for_kickoff; ops->wait_for_tx_complete = dpu_encoder_phys_cmd_wait_for_tx_complete; - ops->wait_for_vblank = dpu_encoder_phys_cmd_wait_for_vblank; ops->trigger_start = dpu_encoder_phys_cmd_trigger_start; ops->needs_single_flush = dpu_encoder_phys_cmd_needs_single_flush; ops->irq_control = dpu_encoder_phys_cmd_irq_control; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c index 69bc1b2e514c..2b4e5b5eff44 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c @@ -440,7 +440,7 @@ skip_flush: phys_enc->enable_state = DPU_ENC_ENABLING; } -static int dpu_encoder_phys_vid_wait_for_vblank( +static int dpu_encoder_phys_vid_wait_for_tx_complete( struct dpu_encoder_phys *phys_enc) { struct dpu_encoder_wait_info wait_info; @@ -554,7 +554,7 @@ static void dpu_encoder_phys_vid_disable(struct dpu_encoder_phys *phys_enc) * scanout buffer) don't latch properly.. */ if (dpu_encoder_phys_vid_is_master(phys_enc)) { - ret = dpu_encoder_phys_vid_wait_for_vblank(phys_enc); + ret = dpu_encoder_phys_vid_wait_for_tx_complete(phys_enc); if (ret) { atomic_set(&phys_enc->pending_kickoff_cnt, 0); DRM_ERROR("wait disable failed: id:%u intf:%d ret:%d\n", @@ -574,7 +574,7 @@ static void dpu_encoder_phys_vid_disable(struct dpu_encoder_phys *phys_enc) spin_lock_irqsave(phys_enc->enc_spinlock, lock_flags); dpu_encoder_phys_inc_pending(phys_enc); spin_unlock_irqrestore(phys_enc->enc_spinlock, lock_flags); - ret = dpu_encoder_phys_vid_wait_for_vblank(phys_enc); + ret = dpu_encoder_phys_vid_wait_for_tx_complete(phys_enc); if (ret) { atomic_set(&phys_enc->pending_kickoff_cnt, 0); DRM_ERROR("wait disable failed: id:%u intf:%d ret:%d\n", @@ -679,8 +679,7 @@ static void dpu_encoder_phys_vid_init_ops(struct dpu_encoder_phys_ops *ops) ops->disable = dpu_encoder_phys_vid_disable; ops->control_vblank_irq = dpu_encoder_phys_vid_control_vblank_irq; ops->wait_for_commit_done = dpu_encoder_phys_vid_wait_for_commit_done; - ops->wait_for_vblank = dpu_encoder_phys_vid_wait_for_vblank; - ops->wait_for_tx_complete = dpu_encoder_phys_vid_wait_for_vblank; + ops->wait_for_tx_complete = dpu_encoder_phys_vid_wait_for_tx_complete; ops->irq_control = dpu_encoder_phys_vid_irq_control; ops->prepare_for_kickoff = dpu_encoder_phys_vid_prepare_for_kickoff; ops->handle_post_kickoff = dpu_encoder_phys_vid_handle_post_kickoff; diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index a205127ccc93..c0446fa66b98 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -78,12 +78,10 @@ enum msm_dsi_controller { * enum msm_event_wait - type of HW events to wait for * @MSM_ENC_COMMIT_DONE - wait for the driver to flush the registers to HW * @MSM_ENC_TX_COMPLETE - wait for the HW to transfer the frame to panel - * @MSM_ENC_VBLANK - wait for the HW VBLANK event (for driver-internal waiters) */ enum msm_event_wait { MSM_ENC_COMMIT_DONE = 0, MSM_ENC_TX_COMPLETE, - MSM_ENC_VBLANK, }; /** -- cgit v1.2.3 From 3823119b9c2b5f9e9b760336f75bc989b805cde6 Mon Sep 17 00:00:00 2001 From: Ziqi Zhao Date: Fri, 21 Jul 2023 09:14:46 -0700 Subject: drm/crtc: Fix uninit-value bug in drm_mode_setcrtc The connector_set contains uninitialized values when allocated with kmalloc_array. However, in the "out" branch, the logic assumes that any element in connector_set would be equal to NULL if failed to initialize, which causes the bug reported by Syzbot. The fix is to use an extra variable to keep track of how many connectors are initialized indeed, and use that variable to decrease any refcounts in the "out" branch. Reported-by: syzbot+4fad2e57beb6397ab2fc@syzkaller.appspotmail.com Signed-off-by: Ziqi Zhao Reported-and-tested-by: syzbot+4fad2e57beb6397ab2fc@syzkaller.appspotmail.com Tested-by: Harshit Mogalapalli Link: https://lore.kernel.org/r/20230721161446.8602-1-astrajoan@yahoo.com Signed-off-by: Maxime Ripard --- drivers/gpu/drm/drm_crtc.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index df9bf3c9206e..d718c17ab1e9 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -715,8 +715,7 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data, struct drm_mode_set set; uint32_t __user *set_connectors_ptr; struct drm_modeset_acquire_ctx ctx; - int ret; - int i; + int ret, i, num_connectors; if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -EOPNOTSUPP; @@ -851,6 +850,7 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data, goto out; } + num_connectors = 0; for (i = 0; i < crtc_req->count_connectors; i++) { connector_set[i] = NULL; set_connectors_ptr = (uint32_t __user *)(unsigned long)crtc_req->set_connectors_ptr; @@ -871,6 +871,7 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data, connector->name); connector_set[i] = connector; + num_connectors++; } } @@ -879,7 +880,7 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data, set.y = crtc_req->y; set.mode = mode; set.connectors = connector_set; - set.num_connectors = crtc_req->count_connectors; + set.num_connectors = num_connectors; set.fb = fb; if (drm_drv_uses_atomic_modeset(dev)) @@ -892,7 +893,7 @@ out: drm_framebuffer_put(fb); if (connector_set) { - for (i = 0; i < crtc_req->count_connectors; i++) { + for (i = 0; i < num_connectors; i++) { if (connector_set[i]) drm_connector_put(connector_set[i]); } -- cgit v1.2.3 From 34880b18733efff60b68d074ff74d018ffc309a7 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Tue, 5 Dec 2023 14:09:37 +0100 Subject: drm/i915/selftests: wait for active idle event in i915_active_unlock_wait After i915_active_unlock_wait i915_active can be still non-idle due to barrier async handling in signal_irq_work. As a result one can observe following errors: bcs0: heartbeat pulse did not flush idle tasks *ERROR* pulse active pulse_active [i915]:pulse_retire [i915] *ERROR* pulse count: 0 *ERROR* pulse preallocated barriers? no To prevent it let's wait explicitly for idleness. v2: wait only in live_idle tests Signed-off-by: Andrzej Hajda Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20231205-selftest_wait_for_active_idle_event-v2-1-1437d0bf9829@intel.com --- drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c index 273d440a53e3..bc441ce7b380 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c @@ -84,7 +84,7 @@ static struct pulse *pulse_create(void) static void pulse_unlock_wait(struct pulse *p) { - i915_active_unlock_wait(&p->active); + wait_var_event_timeout(&p->active, i915_active_is_idle(&p->active), HZ); } static int __live_idle_pulse(struct intel_engine_cs *engine, -- cgit v1.2.3 From be5bcc4be9d9d3ae294072441a66fe39b74e5bba Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Wed, 6 Dec 2023 19:43:22 +0100 Subject: drm/i915/guc: Create the guc_to_i915() wrapper Given a reference to "guc", the guc_to_i915() returns the pointer to "i915" private data. Signed-off-by: Andi Shyti Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20231206184322.57111-1-andi.shyti@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_gt.h | 5 +++++ drivers/gpu/drm/i915/gt/uc/intel_guc.c | 2 +- drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c | 2 +- drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 2 +- drivers/gpu/drm/i915/gt/uc/intel_guc_log.c | 10 +++++----- drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c | 2 +- drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 2 +- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 4 ++-- 8 files changed, 17 insertions(+), 12 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 9ffdb05e231e..2f81c1c79238 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -118,6 +118,11 @@ static inline struct intel_gt *gsc_to_gt(struct intel_gsc *gsc) return container_of(gsc, struct intel_gt, gsc); } +static inline struct drm_i915_private *guc_to_i915(struct intel_guc *guc) +{ + return guc_to_gt(guc)->i915; +} + void intel_gt_common_init_early(struct intel_gt *gt); int intel_root_gt_init_early(struct drm_i915_private *i915); int intel_gt_assign_ggtt(struct intel_gt *gt); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 3f3df1166b86..2b450c43bbd7 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -330,7 +330,7 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc) static u32 guc_ctl_devid(struct intel_guc *guc) { - struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + struct drm_i915_private *i915 = guc_to_i915(guc); return (INTEL_DEVID(i915) << 16) | INTEL_REVID(i915); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c index a4da0208c883..a1cd40d80517 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c @@ -355,7 +355,7 @@ guc_capture_alloc_steered_lists(struct intel_guc *guc, static const struct __guc_mmio_reg_descr_group * guc_capture_get_device_reglist(struct intel_guc *guc) { - struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + struct drm_i915_private *i915 = guc_to_i915(guc); const struct __guc_mmio_reg_descr_group *lists; if (GRAPHICS_VER(i915) >= 12) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index ed6ce73ef3b0..0d5197c0824a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -265,7 +265,7 @@ int intel_guc_ct_init(struct intel_guc_ct *ct) u32 *cmds; int err; - err = i915_inject_probe_error(guc_to_gt(guc)->i915, -ENXIO); + err = i915_inject_probe_error(guc_to_i915(guc), -ENXIO); if (err) return err; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c index 55bc8b55fbc0..bf16351c9349 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c @@ -520,7 +520,7 @@ void intel_guc_log_init_early(struct intel_guc_log *log) static int guc_log_relay_create(struct intel_guc_log *log) { struct intel_guc *guc = log_to_guc(log); - struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + struct drm_i915_private *i915 = guc_to_i915(guc); struct rchan *guc_log_relay_chan; size_t n_subbufs, subbuf_size; int ret; @@ -573,7 +573,7 @@ static void guc_log_relay_destroy(struct intel_guc_log *log) static void guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log) { struct intel_guc *guc = log_to_guc(log); - struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + struct drm_i915_private *i915 = guc_to_i915(guc); intel_wakeref_t wakeref; _guc_log_copy_debuglogs_for_relay(log); @@ -589,7 +589,7 @@ static void guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log) static u32 __get_default_log_level(struct intel_guc_log *log) { struct intel_guc *guc = log_to_guc(log); - struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + struct drm_i915_private *i915 = guc_to_i915(guc); /* A negative value means "use platform/config default" */ if (i915->params.guc_log_level < 0) { @@ -664,7 +664,7 @@ void intel_guc_log_destroy(struct intel_guc_log *log) int intel_guc_log_set_level(struct intel_guc_log *log, u32 level) { struct intel_guc *guc = log_to_guc(log); - struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + struct drm_i915_private *i915 = guc_to_i915(guc); intel_wakeref_t wakeref; int ret = 0; @@ -796,7 +796,7 @@ void intel_guc_log_relay_flush(struct intel_guc_log *log) static void guc_log_relay_stop(struct intel_guc_log *log) { struct intel_guc *guc = log_to_guc(log); - struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + struct drm_i915_private *i915 = guc_to_i915(guc); if (!log->relay.started) return; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c index 1adec6de223c..9df7927304ae 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c @@ -14,7 +14,7 @@ static bool __guc_rc_supported(struct intel_guc *guc) { /* GuC RC is unavailable for pre-Gen12 */ return guc->submission_supported && - GRAPHICS_VER(guc_to_gt(guc)->i915) >= 12; + GRAPHICS_VER(guc_to_i915(guc)) >= 12; } static bool __guc_rc_selected(struct intel_guc *guc) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index 2dfb07cc4b33..3e681ab6fbf9 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -34,7 +34,7 @@ static bool __detect_slpc_supported(struct intel_guc *guc) { /* GuC SLPC is unavailable for pre-Gen12 */ return guc->submission_supported && - GRAPHICS_VER(guc_to_gt(guc)->i915) >= 12; + GRAPHICS_VER(guc_to_i915(guc)) >= 12; } static bool __guc_slpc_selected(struct intel_guc *guc) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 58ea285c51d4..a259f1118c5a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -4626,12 +4626,12 @@ static bool __guc_submission_supported(struct intel_guc *guc) { /* GuC submission is unavailable for pre-Gen11 */ return intel_guc_is_supported(guc) && - GRAPHICS_VER(guc_to_gt(guc)->i915) >= 11; + GRAPHICS_VER(guc_to_i915(guc)) >= 11; } static bool __guc_submission_selected(struct intel_guc *guc) { - struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + struct drm_i915_private *i915 = guc_to_i915(guc); if (!intel_guc_submission_is_supported(guc)) return false; -- cgit v1.2.3 From 6e455f5dcdd15fa28edf0ffb5b44d3508512dccf Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 8 Dec 2023 15:12:38 +0200 Subject: drm/crtc: fix uninitialized variable use Commit 3823119b9c2b ("drm/crtc: Fix uninit-value bug in drm_mode_setcrtc") was supposed to fix use of an uninitialized variable, but introduced another. num_connectors is only initialized if crtc_req->count_connectors > 0, but it's used regardless. Fix it. Fixes: 3823119b9c2b ("drm/crtc: Fix uninit-value bug in drm_mode_setcrtc") Cc: syzbot+4fad2e57beb6397ab2fc@syzkaller.appspotmail.com Cc: Ziqi Zhao Cc: Maxime Ripard Cc: Maarten Lankhorst Cc: Thomas Zimmermann Signed-off-by: Jani Nikula Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20231208131238.2924571-1-jani.nikula@intel.com --- drivers/gpu/drm/drm_crtc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index d718c17ab1e9..cb90e70d85e8 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -715,7 +715,7 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data, struct drm_mode_set set; uint32_t __user *set_connectors_ptr; struct drm_modeset_acquire_ctx ctx; - int ret, i, num_connectors; + int ret, i, num_connectors = 0; if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -EOPNOTSUPP; @@ -850,7 +850,6 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data, goto out; } - num_connectors = 0; for (i = 0; i < crtc_req->count_connectors; i++) { connector_set[i] = NULL; set_connectors_ptr = (uint32_t __user *)(unsigned long)crtc_req->set_connectors_ptr; -- cgit v1.2.3 From 2c12eb36f849256f5eb00ffaee9bf99396fd3814 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 5 Dec 2023 20:03:08 +0200 Subject: drm/i915: Fix remapped stride with CCS on ADL+ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On ADL+ the hardware automagically calculates the CCS AUX surface stride from the main surface stride, so when remapping we can't really play a lot of tricks with the main surface stride, or else the AUX surface stride would get miscalculated and no longer match the actual data layout in memory. Supposedly we could remap in 256 main surface tile units (AUX page(4096)/cachline(64)*4(4x1 main surface tiles per AUX cacheline)=256 main surface tiles), but the extra complexity is probably not worth the hassle. So let's just make sure our mapping stride is calculated from the full framebuffer stride (instead of the framebuffer width). This way the stride we program into PLANE_STRIDE will be the original framebuffer stride, and thus there will be no change to the AUX stride/layout. Cc: stable@vger.kernel.org Cc: Imre Deak Cc: Juha-Pekka Heikkila Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231205180308.7505-1-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak --- drivers/gpu/drm/i915/display/intel_fb.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index 4f9d2e57a770..81316b21314d 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -1509,8 +1509,20 @@ static u32 calc_plane_remap_info(const struct intel_framebuffer *fb, int color_p size += remap_info->size; } else { - unsigned int dst_stride = plane_view_dst_stride_tiles(fb, color_plane, - remap_info->width); + unsigned int dst_stride; + + /* + * The hardware automagically calculates the CCS AUX surface + * stride from the main surface stride so can't really remap a + * smaller subset (unless we'd remap in whole AUX page units). + */ + if (intel_fb_needs_pot_stride_remap(fb) && + intel_fb_is_ccs_modifier(fb->base.modifier)) + dst_stride = remap_info->src_stride; + else + dst_stride = remap_info->width; + + dst_stride = plane_view_dst_stride_tiles(fb, color_plane, dst_stride); assign_chk_ovf(i915, remap_info->dst_stride, dst_stride); color_plane_info->mapping_stride = dst_stride * -- cgit v1.2.3 From e81144106e21271c619f0c722a09e27ccb8c043d Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 7 Dec 2023 21:34:34 +0200 Subject: drm/i915: Fix intel_atomic_setup_scalers() plane_state handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the plane_state variable is declared outside the scaler_users loop in intel_atomic_setup_scalers(), and it's never reset back to NULL inside the loop we may end up calling intel_atomic_setup_scaler() with a non-NULL plane state for the pipe scaling case. That is bad because intel_atomic_setup_scaler() determines whether we are doing plane scaling or pipe scaling based on plane_state!=NULL. The end result is that we may miscalculate the scaler mode for pipe scaling. The hardware becomes somewhat upset if we end up in this situation when scanning out a planar format on a SDR plane. We end up programming the pipe scaler into planar mode as well, and the result is a screenfull of garbage. Fix the situation by making sure we pass the correct plane_state==NULL when calculating the scaler mode for pipe scaling. Cc: stable@vger.kernel.org Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231207193441.20206-2-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula --- drivers/gpu/drm/i915/display/skl_scaler.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/skl_scaler.c b/drivers/gpu/drm/i915/display/skl_scaler.c index 1e7c97243fcf..8a934bada624 100644 --- a/drivers/gpu/drm/i915/display/skl_scaler.c +++ b/drivers/gpu/drm/i915/display/skl_scaler.c @@ -504,7 +504,6 @@ int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, { struct drm_plane *plane = NULL; struct intel_plane *intel_plane; - struct intel_plane_state *plane_state = NULL; struct intel_crtc_scaler_state *scaler_state = &crtc_state->scaler_state; struct drm_atomic_state *drm_state = crtc_state->uapi.state; @@ -536,6 +535,7 @@ int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, /* walkthrough scaler_users bits and start assigning scalers */ for (i = 0; i < sizeof(scaler_state->scaler_users) * 8; i++) { + struct intel_plane_state *plane_state = NULL; int *scaler_id; const char *name; int idx, ret; -- cgit v1.2.3 From e05a67fdd3c9293827d44a0dfa3618429b832d59 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 7 Dec 2023 21:34:35 +0200 Subject: drm/i915: Streamline intel_dsc_pps_read() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit intel_dsc_pps_read() is rather convoluted. Make it legible. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231207193441.20206-3-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_vdsc.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.c b/drivers/gpu/drm/i915/display/intel_vdsc.c index 5f2fb702e367..17d6572f9d0a 100644 --- a/drivers/gpu/drm/i915/display/intel_vdsc.c +++ b/drivers/gpu/drm/i915/display/intel_vdsc.c @@ -812,13 +812,13 @@ void intel_dsc_disable(const struct intel_crtc_state *old_crtc_state) } static u32 intel_dsc_pps_read(struct intel_crtc_state *crtc_state, int pps, - bool *check_equal) + bool *all_equal) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *i915 = to_i915(crtc->base.dev); i915_reg_t dsc_reg[2]; int i, vdsc_per_pipe, dsc_reg_num; - u32 val = 0; + u32 val; vdsc_per_pipe = intel_dsc_get_vdsc_per_pipe(crtc_state); dsc_reg_num = min_t(int, ARRAY_SIZE(dsc_reg), vdsc_per_pipe); @@ -827,20 +827,13 @@ static u32 intel_dsc_pps_read(struct intel_crtc_state *crtc_state, int pps, intel_dsc_get_pps_reg(crtc_state, pps, dsc_reg, dsc_reg_num); - if (check_equal) - *check_equal = true; - - for (i = 0; i < dsc_reg_num; i++) { - u32 tmp; + *all_equal = true; - tmp = intel_de_read(i915, dsc_reg[i]); + val = intel_de_read(i915, dsc_reg[0]); - if (i == 0) { - val = tmp; - } else if (check_equal && tmp != val) { - *check_equal = false; - break; - } else if (!check_equal) { + for (i = 1; i < dsc_reg_num; i++) { + if (intel_de_read(i915, dsc_reg[i]) != val) { + *all_equal = false; break; } } -- cgit v1.2.3 From f175de44d0cf5aa688747b96bad0e596a50eaad7 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 7 Dec 2023 21:34:36 +0200 Subject: drm/i915: Drop redundant NULL check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit intel_bios_get_dsc_params() is only called from gen11_dsi_dsc_compute_config() and it always passes a non-NULL crtc_state in. Drop the redundant check. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231207193441.20206-4-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_bios.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index 3e7e96acb24a..aa169b0055e9 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -3475,8 +3475,7 @@ bool intel_bios_get_dsc_params(struct intel_encoder *encoder, if (!devdata->dsc) return false; - if (crtc_state) - fill_dsc(crtc_state, devdata->dsc, dsc_max_bpc); + fill_dsc(crtc_state, devdata->dsc, dsc_max_bpc); return true; } -- cgit v1.2.3 From ed1566a982213c1a8a39cac26aa4c53d289ed4bc Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 7 Dec 2023 21:34:37 +0200 Subject: drm/i915: Drop crtc NULL check from intel_crtc_active() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit intel_crtc_active() is never called with a NULL crtc. Drop the redundant NULL check. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231207193441.20206-5-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula --- drivers/gpu/drm/i915/display/i9xx_wm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/i9xx_wm.c b/drivers/gpu/drm/i915/display/i9xx_wm.c index 03e8fb6caa83..11ca9572e8b3 100644 --- a/drivers/gpu/drm/i915/display/i9xx_wm.c +++ b/drivers/gpu/drm/i915/display/i9xx_wm.c @@ -608,7 +608,7 @@ static bool intel_crtc_active(struct intel_crtc *crtc) * crtc->state->active once we have proper CRTC states wired up * for atomic. */ - return crtc && crtc->active && crtc->base.primary->state->fb && + return crtc->active && crtc->base.primary->state->fb && crtc->config->hw.adjusted_mode.crtc_clock; } -- cgit v1.2.3 From a599d302ae00917038777fad09107576375e2c95 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 7 Dec 2023 21:34:38 +0200 Subject: drm/i915: Drop NULL fb check from intel_fb_uses_dpt() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit intel_fb_uses_dpt() should not be called with a NULL fb, so drop the check. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231207193441.20206-6-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_fb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index 81316b21314d..44ae6c1e1d46 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -771,7 +771,7 @@ bool intel_fb_modifier_uses_dpt(struct drm_i915_private *i915, u64 modifier) bool intel_fb_uses_dpt(const struct drm_framebuffer *fb) { - return fb && to_i915(fb->dev)->display.params.enable_dpt && + return to_i915(fb->dev)->display.params.enable_dpt && intel_fb_modifier_uses_dpt(to_i915(fb->dev), fb->modifier); } -- cgit v1.2.3 From 7f4f756df7a0716b15176f6fa0552e3480a2b981 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 7 Dec 2023 21:34:39 +0200 Subject: drm/i915: Drop redunant null check from intel_get_frame_time_us() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit intel_get_frame_time_us() is never called with a NULL crtc_state so drop the redundant check. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231207193441.20206-7-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_psr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 6029bb71276c..0058b07a7cda 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -893,7 +893,7 @@ transcoder_has_psr2(struct drm_i915_private *dev_priv, enum transcoder cpu_trans static u32 intel_get_frame_time_us(const struct intel_crtc_state *cstate) { - if (!cstate || !cstate->hw.active) + if (!cstate->hw.active) return 0; return DIV_ROUND_UP(1000 * 1000, -- cgit v1.2.3 From e81f48512aa42d633015f182d2ecf91299803077 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 7 Dec 2023 21:34:40 +0200 Subject: drm/i915: s/cstate/crtc_state/ in intel_get_frame_time_us() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use standard variable name 'crtc_state' instead of 'cstate'. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231207193441.20206-8-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_psr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 0058b07a7cda..b6e2e70e1290 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -891,13 +891,13 @@ transcoder_has_psr2(struct drm_i915_private *dev_priv, enum transcoder cpu_trans return false; } -static u32 intel_get_frame_time_us(const struct intel_crtc_state *cstate) +static u32 intel_get_frame_time_us(const struct intel_crtc_state *crtc_state) { - if (!cstate->hw.active) + if (!crtc_state->hw.active) return 0; return DIV_ROUND_UP(1000 * 1000, - drm_mode_vrefresh(&cstate->hw.adjusted_mode)); + drm_mode_vrefresh(&crtc_state->hw.adjusted_mode)); } static void psr2_program_idle_frames(struct intel_dp *intel_dp, -- cgit v1.2.3 From c6117b33a173717714a8dbbf9d14ca85db79725e Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 7 Dec 2023 21:34:41 +0200 Subject: drm/i915/tv: Drop redundant null checks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Neither 'tv_mode' or 'color_conversion' can be NULL, so drop the pointless checks. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231207193441.20206-9-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_tv.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_tv.c b/drivers/gpu/drm/i915/display/intel_tv.c index 2ee4f0d95851..d4386cb3569e 100644 --- a/drivers/gpu/drm/i915/display/intel_tv.c +++ b/drivers/gpu/drm/i915/display/intel_tv.c @@ -1417,9 +1417,6 @@ set_tv_mode_timings(struct drm_i915_private *dev_priv, static void set_color_conversion(struct drm_i915_private *dev_priv, const struct color_conversion *color_conversion) { - if (!color_conversion) - return; - intel_de_write(dev_priv, TV_CSC_Y, (color_conversion->ry << 16) | color_conversion->gy); intel_de_write(dev_priv, TV_CSC_Y2, @@ -1454,9 +1451,6 @@ static void intel_tv_pre_enable(struct intel_atomic_state *state, int xpos, ypos; unsigned int xsize, ysize; - if (!tv_mode) - return; /* can't happen (mode_prepare prevents this) */ - tv_ctl = intel_de_read(dev_priv, TV_CTL); tv_ctl &= TV_CTL_SAVE; -- cgit v1.2.3 From 877fd09a120d0acee073fbada79fad2ab35396c2 Mon Sep 17 00:00:00 2001 From: Radhakrishna Sripada Date: Thu, 7 Dec 2023 14:10:23 -0800 Subject: drm/i915/mtl: Use port clock compatible numbers for C20 phy In C20 pll_state link_bit_rate and clock fields are bit redundant. Since many of the helpers assume the clock values, which are different from link_bit_rate for dp2.0, convert the helpers to use the numbers that are compatible with link_bit_rate. Currently link_bit_rate is compatible with crtc_state->port_clock. The function intel_c20pll_calc_port_clock returns the number which is compatible with crtc_state->port_clock. In order to avoid extra conversions b/ween clock and link_bit_rate, remove "clock" field from the C20 pll_state and then rename "link_bit_rate" as "clock". While at it rely on crtc_state->port_clock during C20 Pll programming. Cc: Clint Taylor Cc: Mika Kahola Signed-off-by: Radhakrishna Sripada Reviewed-by: Mika Kahola Link: https://patchwork.freedesktop.org/patch/msgid/20231207221025.2032207-2-radhakrishna.sripada@intel.com --- drivers/gpu/drm/i915/display/intel_cx0_phy.c | 43 ++++++++++++++-------------- 1 file changed, 22 insertions(+), 21 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy.c b/drivers/gpu/drm/i915/display/intel_cx0_phy.c index 5fbec5784b83..7d412be996ea 100644 --- a/drivers/gpu/drm/i915/display/intel_cx0_phy.c +++ b/drivers/gpu/drm/i915/display/intel_cx0_phy.c @@ -2117,7 +2117,7 @@ int intel_cx0pll_calc_state(struct intel_crtc_state *crtc_state, static bool intel_c20_use_mplla(u32 clock) { /* 10G and 20G rates use MPLLA */ - if (clock == 312500 || clock == 625000) + if (clock == 1000000 || clock == 2000000) return true; return false; @@ -2192,7 +2192,7 @@ void intel_c20pll_dump_hw_state(struct drm_i915_private *i915, drm_dbg_kms(&i915->drm, "cmn[0] = 0x%.4x, cmn[1] = 0x%.4x, cmn[2] = 0x%.4x, cmn[3] = 0x%.4x\n", hw_state->cmn[0], hw_state->cmn[1], hw_state->cmn[2], hw_state->cmn[3]); - if (intel_c20_use_mplla(hw_state->clock)) { + if (intel_c20_use_mplla(hw_state->link_bit_rate)) { for (i = 0; i < ARRAY_SIZE(hw_state->mplla); i++) drm_dbg_kms(&i915->drm, "mplla[%d] = 0x%.4x\n", i, hw_state->mplla[i]); } else { @@ -2220,11 +2220,11 @@ static u8 intel_c20_get_dp_rate(u32 clock) return 6; case 432000: /* 4.32 Gbps eDP */ return 7; - case 312500: /* 10 Gbps DP2.0 */ + case 1000000: /* 10 Gbps DP2.0 */ return 8; - case 421875: /* 13.5 Gbps DP2.0 */ + case 1350000: /* 13.5 Gbps DP2.0 */ return 9; - case 625000: /* 20 Gbps DP2.0*/ + case 2000000: /* 20 Gbps DP2.0 */ return 10; case 648000: /* 6.48 Gbps eDP*/ return 11; @@ -2242,13 +2242,13 @@ static u8 intel_c20_get_hdmi_rate(u32 clock) return 0; switch (clock) { - case 166670: /* 3 Gbps */ - case 333330: /* 6 Gbps */ - case 666670: /* 12 Gbps */ + case 300000: /* 3 Gbps */ + case 600000: /* 6 Gbps */ + case 1200000: /* 12 Gbps */ return 1; - case 444440: /* 8 Gbps */ + case 800000: /* 8 Gbps */ return 2; - case 555560: /* 10 Gbps */ + case 1000000: /* 10 Gbps */ return 3; default: MISSING_CASE(clock); @@ -2259,7 +2259,7 @@ static u8 intel_c20_get_hdmi_rate(u32 clock) static bool is_dp2(u32 clock) { /* DP2.0 clock rates */ - if (clock == 312500 || clock == 421875 || clock == 625000) + if (clock == 1000000 || clock == 1350000 || clock == 2000000) return true; return false; @@ -2268,11 +2268,11 @@ static bool is_dp2(u32 clock) static bool is_hdmi_frl(u32 clock) { switch (clock) { - case 166670: /* 3 Gbps */ - case 333330: /* 6 Gbps */ - case 444440: /* 8 Gbps */ - case 555560: /* 10 Gbps */ - case 666670: /* 12 Gbps */ + case 300000: /* 3 Gbps */ + case 600000: /* 6 Gbps */ + case 800000: /* 8 Gbps */ + case 1000000: /* 10 Gbps */ + case 1200000: /* 12 Gbps */ return true; default: return false; @@ -2305,6 +2305,7 @@ static void intel_c20_pll_program(struct drm_i915_private *i915, const struct intel_c20pll_state *pll_state = &crtc_state->cx0pll_state.c20; bool dp = false; int lane = crtc_state->lane_count > 2 ? INTEL_CX0_BOTH_LANES : INTEL_CX0_LANE0; + u32 clock = crtc_state->port_clock; bool cntx; int i; @@ -2343,7 +2344,7 @@ static void intel_c20_pll_program(struct drm_i915_private *i915, } /* 3.3 mpllb or mplla configuration */ - if (intel_c20_use_mplla(pll_state->clock)) { + if (intel_c20_use_mplla(clock)) { for (i = 0; i < ARRAY_SIZE(pll_state->mplla); i++) { if (cntx) intel_c20_sram_write(i915, encoder->port, INTEL_CX0_LANE0, @@ -2370,23 +2371,23 @@ static void intel_c20_pll_program(struct drm_i915_private *i915, /* 4. Program custom width to match the link protocol */ intel_cx0_rmw(i915, encoder->port, lane, PHY_C20_VDR_CUSTOM_WIDTH, PHY_C20_CUSTOM_WIDTH_MASK, - PHY_C20_CUSTOM_WIDTH(intel_get_c20_custom_width(pll_state->clock, dp)), + PHY_C20_CUSTOM_WIDTH(intel_get_c20_custom_width(clock, dp)), MB_WRITE_COMMITTED); /* 5. For DP or 6. For HDMI */ if (dp) { intel_cx0_rmw(i915, encoder->port, lane, PHY_C20_VDR_CUSTOM_SERDES_RATE, BIT(6) | PHY_C20_CUSTOM_SERDES_MASK, - BIT(6) | PHY_C20_CUSTOM_SERDES(intel_c20_get_dp_rate(pll_state->clock)), + BIT(6) | PHY_C20_CUSTOM_SERDES(intel_c20_get_dp_rate(clock)), MB_WRITE_COMMITTED); } else { intel_cx0_rmw(i915, encoder->port, lane, PHY_C20_VDR_CUSTOM_SERDES_RATE, BIT(7) | PHY_C20_CUSTOM_SERDES_MASK, - is_hdmi_frl(pll_state->clock) ? BIT(7) : 0, + is_hdmi_frl(clock) ? BIT(7) : 0, MB_WRITE_COMMITTED); intel_cx0_write(i915, encoder->port, INTEL_CX0_BOTH_LANES, PHY_C20_VDR_HDMI_RATE, - intel_c20_get_hdmi_rate(pll_state->clock), + intel_c20_get_hdmi_rate(clock), MB_WRITE_COMMITTED); } -- cgit v1.2.3 From 1103672fd6b8486c4cc1ab69623e9a080a00e022 Mon Sep 17 00:00:00 2001 From: Radhakrishna Sripada Date: Thu, 7 Dec 2023 14:10:24 -0800 Subject: drm/i915/mtl: Remove misleading "clock" field from C20 pll_state The field link_bit_rate serves as the actual clock value for the C20 pll_state structure. Remove the misleading clock field. The subsequent patch would rename the link_bit_rate as the clock field. Cc: Clint Taylor Cc: Mika Kahola Signed-off-by: Radhakrishna Sripada Reviewed-by: Mika Kahola Link: https://patchwork.freedesktop.org/patch/msgid/20231207221025.2032207-3-radhakrishna.sripada@intel.com --- drivers/gpu/drm/i915/display/intel_cx0_phy.c | 18 ------------------ drivers/gpu/drm/i915/display/intel_display_types.h | 3 +-- 2 files changed, 1 insertion(+), 20 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy.c b/drivers/gpu/drm/i915/display/intel_cx0_phy.c index 7d412be996ea..d518b55d5150 100644 --- a/drivers/gpu/drm/i915/display/intel_cx0_phy.c +++ b/drivers/gpu/drm/i915/display/intel_cx0_phy.c @@ -746,7 +746,6 @@ static const struct intel_c10pll_state * const mtl_c10_edp_tables[] = { /* C20 basic DP 1.4 tables */ static const struct intel_c20pll_state mtl_c20_dp_rbr = { .link_bit_rate = 162000, - .clock = 162000, .tx = { 0xbe88, /* tx cfg0 */ 0x5800, /* tx cfg1 */ 0x0000, /* tx cfg2 */ @@ -772,7 +771,6 @@ static const struct intel_c20pll_state mtl_c20_dp_rbr = { static const struct intel_c20pll_state mtl_c20_dp_hbr1 = { .link_bit_rate = 270000, - .clock = 270000, .tx = { 0xbe88, /* tx cfg0 */ 0x4800, /* tx cfg1 */ 0x0000, /* tx cfg2 */ @@ -798,7 +796,6 @@ static const struct intel_c20pll_state mtl_c20_dp_hbr1 = { static const struct intel_c20pll_state mtl_c20_dp_hbr2 = { .link_bit_rate = 540000, - .clock = 540000, .tx = { 0xbe88, /* tx cfg0 */ 0x4800, /* tx cfg1 */ 0x0000, /* tx cfg2 */ @@ -824,7 +821,6 @@ static const struct intel_c20pll_state mtl_c20_dp_hbr2 = { static const struct intel_c20pll_state mtl_c20_dp_hbr3 = { .link_bit_rate = 810000, - .clock = 810000, .tx = { 0xbe88, /* tx cfg0 */ 0x4800, /* tx cfg1 */ 0x0000, /* tx cfg2 */ @@ -851,7 +847,6 @@ static const struct intel_c20pll_state mtl_c20_dp_hbr3 = { /* C20 basic DP 2.0 tables */ static const struct intel_c20pll_state mtl_c20_dp_uhbr10 = { .link_bit_rate = 1000000, /* 10 Gbps */ - .clock = 312500, .tx = { 0xbe21, /* tx cfg0 */ 0x4800, /* tx cfg1 */ 0x0000, /* tx cfg2 */ @@ -876,7 +871,6 @@ static const struct intel_c20pll_state mtl_c20_dp_uhbr10 = { static const struct intel_c20pll_state mtl_c20_dp_uhbr13_5 = { .link_bit_rate = 1350000, /* 13.5 Gbps */ - .clock = 421875, .tx = { 0xbea0, /* tx cfg0 */ 0x4800, /* tx cfg1 */ 0x0000, /* tx cfg2 */ @@ -902,7 +896,6 @@ static const struct intel_c20pll_state mtl_c20_dp_uhbr13_5 = { static const struct intel_c20pll_state mtl_c20_dp_uhbr20 = { .link_bit_rate = 2000000, /* 20 Gbps */ - .clock = 625000, .tx = { 0xbe20, /* tx cfg0 */ 0x4800, /* tx cfg1 */ 0x0000, /* tx cfg2 */ @@ -1522,7 +1515,6 @@ static const struct intel_c10pll_state * const mtl_c10_hdmi_tables[] = { static const struct intel_c20pll_state mtl_c20_hdmi_25_175 = { .link_bit_rate = 25175, - .clock = 25175, .tx = { 0xbe88, /* tx cfg0 */ 0x9800, /* tx cfg1 */ 0x0000, /* tx cfg2 */ @@ -1548,7 +1540,6 @@ static const struct intel_c20pll_state mtl_c20_hdmi_25_175 = { static const struct intel_c20pll_state mtl_c20_hdmi_27_0 = { .link_bit_rate = 27000, - .clock = 27000, .tx = { 0xbe88, /* tx cfg0 */ 0x9800, /* tx cfg1 */ 0x0000, /* tx cfg2 */ @@ -1574,7 +1565,6 @@ static const struct intel_c20pll_state mtl_c20_hdmi_27_0 = { static const struct intel_c20pll_state mtl_c20_hdmi_74_25 = { .link_bit_rate = 74250, - .clock = 74250, .tx = { 0xbe88, /* tx cfg0 */ 0x9800, /* tx cfg1 */ 0x0000, /* tx cfg2 */ @@ -1600,7 +1590,6 @@ static const struct intel_c20pll_state mtl_c20_hdmi_74_25 = { static const struct intel_c20pll_state mtl_c20_hdmi_148_5 = { .link_bit_rate = 148500, - .clock = 148500, .tx = { 0xbe88, /* tx cfg0 */ 0x9800, /* tx cfg1 */ 0x0000, /* tx cfg2 */ @@ -1626,7 +1615,6 @@ static const struct intel_c20pll_state mtl_c20_hdmi_148_5 = { static const struct intel_c20pll_state mtl_c20_hdmi_594 = { .link_bit_rate = 594000, - .clock = 594000, .tx = { 0xbe88, /* tx cfg0 */ 0x9800, /* tx cfg1 */ 0x0000, /* tx cfg2 */ @@ -1652,7 +1640,6 @@ static const struct intel_c20pll_state mtl_c20_hdmi_594 = { static const struct intel_c20pll_state mtl_c20_hdmi_300 = { .link_bit_rate = 3000000, - .clock = 166670, .tx = { 0xbe98, /* tx cfg0 */ 0x9800, /* tx cfg1 */ 0x0000, /* tx cfg2 */ @@ -1678,7 +1665,6 @@ static const struct intel_c20pll_state mtl_c20_hdmi_300 = { static const struct intel_c20pll_state mtl_c20_hdmi_600 = { .link_bit_rate = 6000000, - .clock = 333330, .tx = { 0xbe98, /* tx cfg0 */ 0x9800, /* tx cfg1 */ 0x0000, /* tx cfg2 */ @@ -1704,7 +1690,6 @@ static const struct intel_c20pll_state mtl_c20_hdmi_600 = { static const struct intel_c20pll_state mtl_c20_hdmi_800 = { .link_bit_rate = 8000000, - .clock = 444440, .tx = { 0xbe98, /* tx cfg0 */ 0x9800, /* tx cfg1 */ 0x0000, /* tx cfg2 */ @@ -1730,7 +1715,6 @@ static const struct intel_c20pll_state mtl_c20_hdmi_800 = { static const struct intel_c20pll_state mtl_c20_hdmi_1000 = { .link_bit_rate = 10000000, - .clock = 555560, .tx = { 0xbe98, /* tx cfg0 */ 0x9800, /* tx cfg1 */ 0x0000, /* tx cfg2 */ @@ -1756,7 +1740,6 @@ static const struct intel_c20pll_state mtl_c20_hdmi_1000 = { static const struct intel_c20pll_state mtl_c20_hdmi_1200 = { .link_bit_rate = 12000000, - .clock = 666670, .tx = { 0xbe98, /* tx cfg0 */ 0x9800, /* tx cfg1 */ 0x0000, /* tx cfg2 */ @@ -2006,7 +1989,6 @@ static int intel_c20_compute_hdmi_tmds_pll(u64 pixel_clock, struct intel_c20pll_ mpllb_ana_freq_vco = MPLLB_ANA_FREQ_VCO_0; pll_state->link_bit_rate = pixel_clock; - pll_state->clock = pixel_clock; pll_state->tx[0] = 0xbe88; pll_state->tx[1] = 0x9800; pll_state->tx[2] = 0x0000; diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index b3e942f2eeb0..6b348d329957 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -1022,8 +1022,7 @@ struct intel_c10pll_state { }; struct intel_c20pll_state { - u32 link_bit_rate; - u32 clock; /* in kHz */ + u32 link_bit_rate; /* in kHz */ u16 tx[3]; u16 cmn[4]; union { -- cgit v1.2.3 From 872ee9cc0219334486e19da20e56665e612fdcb7 Mon Sep 17 00:00:00 2001 From: Radhakrishna Sripada Date: Thu, 7 Dec 2023 14:10:25 -0800 Subject: drm/i915/mtl: Rename the link_bit_rate to clock in C20 pll_state With the cleanup of the misleading clock value to avoid extra calculations to convert between link_bit_rate and clock, use one standard "clock" field for the c20 pll which works with crtc_state->port_clock field. Cc: Clint Taylor Cc: Mika Kahola Signed-off-by: Radhakrishna Sripada Reviewed-by: Mika Kahola Link: https://patchwork.freedesktop.org/patch/msgid/20231207221025.2032207-4-radhakrishna.sripada@intel.com --- drivers/gpu/drm/i915/display/intel_cx0_phy.c | 42 +++++++++++----------- drivers/gpu/drm/i915/display/intel_display_types.h | 2 +- 2 files changed, 22 insertions(+), 22 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy.c b/drivers/gpu/drm/i915/display/intel_cx0_phy.c index d518b55d5150..4e6ea71ff629 100644 --- a/drivers/gpu/drm/i915/display/intel_cx0_phy.c +++ b/drivers/gpu/drm/i915/display/intel_cx0_phy.c @@ -745,7 +745,7 @@ static const struct intel_c10pll_state * const mtl_c10_edp_tables[] = { /* C20 basic DP 1.4 tables */ static const struct intel_c20pll_state mtl_c20_dp_rbr = { - .link_bit_rate = 162000, + .clock = 162000, .tx = { 0xbe88, /* tx cfg0 */ 0x5800, /* tx cfg1 */ 0x0000, /* tx cfg2 */ @@ -770,7 +770,7 @@ static const struct intel_c20pll_state mtl_c20_dp_rbr = { }; static const struct intel_c20pll_state mtl_c20_dp_hbr1 = { - .link_bit_rate = 270000, + .clock = 270000, .tx = { 0xbe88, /* tx cfg0 */ 0x4800, /* tx cfg1 */ 0x0000, /* tx cfg2 */ @@ -795,7 +795,7 @@ static const struct intel_c20pll_state mtl_c20_dp_hbr1 = { }; static const struct intel_c20pll_state mtl_c20_dp_hbr2 = { - .link_bit_rate = 540000, + .clock = 540000, .tx = { 0xbe88, /* tx cfg0 */ 0x4800, /* tx cfg1 */ 0x0000, /* tx cfg2 */ @@ -820,7 +820,7 @@ static const struct intel_c20pll_state mtl_c20_dp_hbr2 = { }; static const struct intel_c20pll_state mtl_c20_dp_hbr3 = { - .link_bit_rate = 810000, + .clock = 810000, .tx = { 0xbe88, /* tx cfg0 */ 0x4800, /* tx cfg1 */ 0x0000, /* tx cfg2 */ @@ -846,7 +846,7 @@ static const struct intel_c20pll_state mtl_c20_dp_hbr3 = { /* C20 basic DP 2.0 tables */ static const struct intel_c20pll_state mtl_c20_dp_uhbr10 = { - .link_bit_rate = 1000000, /* 10 Gbps */ + .clock = 1000000, /* 10 Gbps */ .tx = { 0xbe21, /* tx cfg0 */ 0x4800, /* tx cfg1 */ 0x0000, /* tx cfg2 */ @@ -870,7 +870,7 @@ static const struct intel_c20pll_state mtl_c20_dp_uhbr10 = { }; static const struct intel_c20pll_state mtl_c20_dp_uhbr13_5 = { - .link_bit_rate = 1350000, /* 13.5 Gbps */ + .clock = 1350000, /* 13.5 Gbps */ .tx = { 0xbea0, /* tx cfg0 */ 0x4800, /* tx cfg1 */ 0x0000, /* tx cfg2 */ @@ -895,7 +895,7 @@ static const struct intel_c20pll_state mtl_c20_dp_uhbr13_5 = { }; static const struct intel_c20pll_state mtl_c20_dp_uhbr20 = { - .link_bit_rate = 2000000, /* 20 Gbps */ + .clock = 2000000, /* 20 Gbps */ .tx = { 0xbe20, /* tx cfg0 */ 0x4800, /* tx cfg1 */ 0x0000, /* tx cfg2 */ @@ -1514,7 +1514,7 @@ static const struct intel_c10pll_state * const mtl_c10_hdmi_tables[] = { }; static const struct intel_c20pll_state mtl_c20_hdmi_25_175 = { - .link_bit_rate = 25175, + .clock = 25175, .tx = { 0xbe88, /* tx cfg0 */ 0x9800, /* tx cfg1 */ 0x0000, /* tx cfg2 */ @@ -1539,7 +1539,7 @@ static const struct intel_c20pll_state mtl_c20_hdmi_25_175 = { }; static const struct intel_c20pll_state mtl_c20_hdmi_27_0 = { - .link_bit_rate = 27000, + .clock = 27000, .tx = { 0xbe88, /* tx cfg0 */ 0x9800, /* tx cfg1 */ 0x0000, /* tx cfg2 */ @@ -1564,7 +1564,7 @@ static const struct intel_c20pll_state mtl_c20_hdmi_27_0 = { }; static const struct intel_c20pll_state mtl_c20_hdmi_74_25 = { - .link_bit_rate = 74250, + .clock = 74250, .tx = { 0xbe88, /* tx cfg0 */ 0x9800, /* tx cfg1 */ 0x0000, /* tx cfg2 */ @@ -1589,7 +1589,7 @@ static const struct intel_c20pll_state mtl_c20_hdmi_74_25 = { }; static const struct intel_c20pll_state mtl_c20_hdmi_148_5 = { - .link_bit_rate = 148500, + .clock = 148500, .tx = { 0xbe88, /* tx cfg0 */ 0x9800, /* tx cfg1 */ 0x0000, /* tx cfg2 */ @@ -1614,7 +1614,7 @@ static const struct intel_c20pll_state mtl_c20_hdmi_148_5 = { }; static const struct intel_c20pll_state mtl_c20_hdmi_594 = { - .link_bit_rate = 594000, + .clock = 594000, .tx = { 0xbe88, /* tx cfg0 */ 0x9800, /* tx cfg1 */ 0x0000, /* tx cfg2 */ @@ -1639,7 +1639,7 @@ static const struct intel_c20pll_state mtl_c20_hdmi_594 = { }; static const struct intel_c20pll_state mtl_c20_hdmi_300 = { - .link_bit_rate = 3000000, + .clock = 3000000, .tx = { 0xbe98, /* tx cfg0 */ 0x9800, /* tx cfg1 */ 0x0000, /* tx cfg2 */ @@ -1664,7 +1664,7 @@ static const struct intel_c20pll_state mtl_c20_hdmi_300 = { }; static const struct intel_c20pll_state mtl_c20_hdmi_600 = { - .link_bit_rate = 6000000, + .clock = 6000000, .tx = { 0xbe98, /* tx cfg0 */ 0x9800, /* tx cfg1 */ 0x0000, /* tx cfg2 */ @@ -1689,7 +1689,7 @@ static const struct intel_c20pll_state mtl_c20_hdmi_600 = { }; static const struct intel_c20pll_state mtl_c20_hdmi_800 = { - .link_bit_rate = 8000000, + .clock = 8000000, .tx = { 0xbe98, /* tx cfg0 */ 0x9800, /* tx cfg1 */ 0x0000, /* tx cfg2 */ @@ -1714,7 +1714,7 @@ static const struct intel_c20pll_state mtl_c20_hdmi_800 = { }; static const struct intel_c20pll_state mtl_c20_hdmi_1000 = { - .link_bit_rate = 10000000, + .clock = 10000000, .tx = { 0xbe98, /* tx cfg0 */ 0x9800, /* tx cfg1 */ 0x0000, /* tx cfg2 */ @@ -1739,7 +1739,7 @@ static const struct intel_c20pll_state mtl_c20_hdmi_1000 = { }; static const struct intel_c20pll_state mtl_c20_hdmi_1200 = { - .link_bit_rate = 12000000, + .clock = 12000000, .tx = { 0xbe98, /* tx cfg0 */ 0x9800, /* tx cfg1 */ 0x0000, /* tx cfg2 */ @@ -1988,7 +1988,7 @@ static int intel_c20_compute_hdmi_tmds_pll(u64 pixel_clock, struct intel_c20pll_ else mpllb_ana_freq_vco = MPLLB_ANA_FREQ_VCO_0; - pll_state->link_bit_rate = pixel_clock; + pll_state->clock = pixel_clock; pll_state->tx[0] = 0xbe88; pll_state->tx[1] = 0x9800; pll_state->tx[2] = 0x0000; @@ -2024,7 +2024,7 @@ static int intel_c20_phy_check_hdmi_link_rate(int clock) int i; for (i = 0; tables[i]; i++) { - if (clock == tables[i]->link_bit_rate) + if (clock == tables[i]->clock) return MODE_OK; } @@ -2076,7 +2076,7 @@ static int intel_c20pll_calc_state(struct intel_crtc_state *crtc_state, return -EINVAL; for (i = 0; tables[i]; i++) { - if (crtc_state->port_clock == tables[i]->link_bit_rate) { + if (crtc_state->port_clock == tables[i]->clock) { crtc_state->cx0pll_state.c20 = *tables[i]; return 0; } @@ -2174,7 +2174,7 @@ void intel_c20pll_dump_hw_state(struct drm_i915_private *i915, drm_dbg_kms(&i915->drm, "cmn[0] = 0x%.4x, cmn[1] = 0x%.4x, cmn[2] = 0x%.4x, cmn[3] = 0x%.4x\n", hw_state->cmn[0], hw_state->cmn[1], hw_state->cmn[2], hw_state->cmn[3]); - if (intel_c20_use_mplla(hw_state->link_bit_rate)) { + if (intel_c20_use_mplla(hw_state->clock)) { for (i = 0; i < ARRAY_SIZE(hw_state->mplla); i++) drm_dbg_kms(&i915->drm, "mplla[%d] = 0x%.4x\n", i, hw_state->mplla[i]); } else { diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 6b348d329957..2616bb6267a1 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -1022,7 +1022,7 @@ struct intel_c10pll_state { }; struct intel_c20pll_state { - u32 link_bit_rate; /* in kHz */ + u32 clock; /* in kHz */ u16 tx[3]; u16 cmn[4]; union { -- cgit v1.2.3 From 6128becaeafa876048bd1b6a83d836329e4940c5 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 28 Sep 2023 18:24:49 +0300 Subject: drm/i915: Stop accessing crtc->state from the flip done irq MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Assuming crtc->state is pointing at the correct thing for the async flip commit is nonsense. If we had already queued up multiple commits this would point at the very lates crtc state even if the older commits hadn't even happened yet. Instead properly stage/arm the event like we do for async flips. Since we don't need to arm multiple of these at the same time we don't need a list like the normal vblank even processing uses. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20230928152450.30109-1-ville.syrjala@linux.intel.com Reviewed-by: Arun R Murthy --- drivers/gpu/drm/i915/display/intel_crtc.c | 9 ++++++++- drivers/gpu/drm/i915/display/intel_display_irq.c | 9 ++++----- drivers/gpu/drm/i915/display/intel_display_types.h | 3 +++ 3 files changed, 15 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_crtc.c b/drivers/gpu/drm/i915/display/intel_crtc.c index 1fd068e6e26c..8a84a31c7b48 100644 --- a/drivers/gpu/drm/i915/display/intel_crtc.c +++ b/drivers/gpu/drm/i915/display/intel_crtc.c @@ -553,8 +553,15 @@ void intel_pipe_update_start(struct intel_atomic_state *state, intel_psr_lock(new_crtc_state); - if (new_crtc_state->do_async_flip) + if (new_crtc_state->do_async_flip) { + spin_lock_irq(&crtc->base.dev->event_lock); + /* arm the event for the flip done irq handler */ + crtc->flip_done_event = new_crtc_state->uapi.event; + spin_unlock_irq(&crtc->base.dev->event_lock); + + new_crtc_state->uapi.event = NULL; return; + } if (intel_crtc_needs_vblank_work(new_crtc_state)) intel_crtc_vblank_work_init(new_crtc_state); diff --git a/drivers/gpu/drm/i915/display/intel_display_irq.c b/drivers/gpu/drm/i915/display/intel_display_irq.c index f8ed53f30b2e..6f2b45cfcf72 100644 --- a/drivers/gpu/drm/i915/display/intel_display_irq.c +++ b/drivers/gpu/drm/i915/display/intel_display_irq.c @@ -340,16 +340,15 @@ static void flip_done_handler(struct drm_i915_private *i915, enum pipe pipe) { struct intel_crtc *crtc = intel_crtc_for_pipe(i915, pipe); - struct drm_crtc_state *crtc_state = crtc->base.state; - struct drm_pending_vblank_event *e = crtc_state->event; struct drm_device *dev = &i915->drm; unsigned long irqflags; spin_lock_irqsave(&dev->event_lock, irqflags); - crtc_state->event = NULL; - - drm_crtc_send_vblank_event(&crtc->base, e); + if (crtc->flip_done_event) { + drm_crtc_send_vblank_event(&crtc->base, crtc->flip_done_event); + crtc->flip_done_event = NULL; + } spin_unlock_irqrestore(&dev->event_lock, irqflags); } diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 2616bb6267a1..341d80c2b9de 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -1475,6 +1475,9 @@ struct intel_crtc { struct intel_crtc_state *config; + /* armed event for async flip */ + struct drm_pending_vblank_event *flip_done_event; + /* Access to these should be protected by dev_priv->irq_lock. */ bool cpu_fifo_underrun_disabled; bool pch_fifo_underrun_disabled; -- cgit v1.2.3 From 362a8dba85ebedbf6939dad78bc6de398a2ef4e7 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 28 Sep 2023 18:24:50 +0300 Subject: drm/i915: Drop irqsave/restore for flip_done_handler() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since flip_done_handler() is always called from the irq handler we can skip the irqsave/restore dance. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20230928152450.30109-2-ville.syrjala@linux.intel.com Reviewed-by: Arun R Murthy --- drivers/gpu/drm/i915/display/intel_display_irq.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_display_irq.c b/drivers/gpu/drm/i915/display/intel_display_irq.c index 6f2b45cfcf72..a7d8f3fc98de 100644 --- a/drivers/gpu/drm/i915/display/intel_display_irq.c +++ b/drivers/gpu/drm/i915/display/intel_display_irq.c @@ -340,17 +340,15 @@ static void flip_done_handler(struct drm_i915_private *i915, enum pipe pipe) { struct intel_crtc *crtc = intel_crtc_for_pipe(i915, pipe); - struct drm_device *dev = &i915->drm; - unsigned long irqflags; - spin_lock_irqsave(&dev->event_lock, irqflags); + spin_lock(&i915->drm.event_lock); if (crtc->flip_done_event) { drm_crtc_send_vblank_event(&crtc->base, crtc->flip_done_event); crtc->flip_done_event = NULL; } - spin_unlock_irqrestore(&dev->event_lock, irqflags); + spin_unlock(&i915->drm.event_lock); } static void hsw_pipe_crc_irq_handler(struct drm_i915_private *dev_priv, -- cgit v1.2.3 From 8814455a0e54ca353b4b0ad5105569d3fdb945cc Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Thu, 7 Dec 2023 21:30:47 +0000 Subject: drm/msm: Refactor UBWC config setting Split up calculating configuration parameters and programming them, so that we can expose them to userspace. Signed-off-by: Connor Abbott Patchwork: https://patchwork.freedesktop.org/patch/571180/ Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 21 ++++--- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 101 ++++++++++++++++++-------------- drivers/gpu/drm/msm/adreno/adreno_gpu.h | 9 +++ 3 files changed, 78 insertions(+), 53 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index e5916c106796..c003f970189b 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -684,7 +684,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); - u32 regbit; + u32 hbb; int ret; gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003); @@ -820,18 +820,15 @@ static int a5xx_hw_init(struct msm_gpu *gpu) gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F); - /* Set the highest bank bit */ - if (adreno_is_a540(adreno_gpu) || adreno_is_a530(adreno_gpu)) - regbit = 2; - else - regbit = 1; + BUG_ON(adreno_gpu->ubwc_config.highest_bank_bit < 13); + hbb = adreno_gpu->ubwc_config.highest_bank_bit - 13; - gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, regbit << 7); - gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, regbit << 1); + gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, hbb << 7); + gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, hbb << 1); if (adreno_is_a509(adreno_gpu) || adreno_is_a512(adreno_gpu) || adreno_is_a540(adreno_gpu)) - gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, regbit); + gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, hbb); /* Disable All flat shading optimization (ALLFLATOPTDIS) */ gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, (1 << 10)); @@ -1785,5 +1782,11 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev) /* Set up the preemption specific bits and pieces for each ringbuffer */ a5xx_preempt_init(gpu); + /* Set the highest bank bit */ + if (adreno_is_a540(adreno_gpu) || adreno_is_a530(adreno_gpu)) + adreno_gpu->ubwc_config.highest_bank_bit = 15; + else + adreno_gpu->ubwc_config.highest_bank_bit = 14; + return gpu; } diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 723f9b97734d..c0bc924cd302 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -1270,81 +1270,92 @@ static void a6xx_set_cp_protect(struct msm_gpu *gpu) gpu_write(gpu, REG_A6XX_CP_PROTECT(count_max - 1), regs[i]); } -static void a6xx_set_ubwc_config(struct msm_gpu *gpu) +static void a6xx_calc_ubwc_config(struct adreno_gpu *gpu) { - struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); /* Unknown, introduced with A650 family, related to UBWC mode/ver 4 */ - u32 rgb565_predicator = 0; + gpu->ubwc_config.rgb565_predicator = 0; /* Unknown, introduced with A650 family */ - u32 uavflagprd_inv = 0; + gpu->ubwc_config.uavflagprd_inv = 0; /* Whether the minimum access length is 64 bits */ - u32 min_acc_len = 0; + gpu->ubwc_config.min_acc_len = 0; /* Entirely magic, per-GPU-gen value */ - u32 ubwc_mode = 0; + gpu->ubwc_config.ubwc_mode = 0; /* * The Highest Bank Bit value represents the bit of the highest DDR bank. - * We then subtract 13 from it (13 is the minimum value allowed by hw) and - * write the lowest two bits of the remaining value as hbb_lo and the - * one above it as hbb_hi to the hardware. This should ideally use DRAM - * type detection. + * This should ideally use DRAM type detection. */ - u32 hbb_hi = 0; - u32 hbb_lo = 2; - /* Unknown, introduced with A640/680 */ - u32 amsbc = 0; + gpu->ubwc_config.highest_bank_bit = 15; - if (adreno_is_a610(adreno_gpu)) { - /* HBB = 14 */ - hbb_lo = 1; - min_acc_len = 1; - ubwc_mode = 1; + if (adreno_is_a610(gpu)) { + gpu->ubwc_config.highest_bank_bit = 14; + gpu->ubwc_config.min_acc_len = 1; + gpu->ubwc_config.ubwc_mode = 1; } /* a618 is using the hw default values */ - if (adreno_is_a618(adreno_gpu)) + if (adreno_is_a618(gpu)) return; - if (adreno_is_a619_holi(adreno_gpu)) - hbb_lo = 0; + if (adreno_is_a619_holi(gpu)) + gpu->ubwc_config.highest_bank_bit = 13; - if (adreno_is_a640_family(adreno_gpu)) - amsbc = 1; + if (adreno_is_a640_family(gpu)) + gpu->ubwc_config.amsbc = 1; - if (adreno_is_a650(adreno_gpu) || - adreno_is_a660(adreno_gpu) || - adreno_is_a690(adreno_gpu) || - adreno_is_a730(adreno_gpu) || - adreno_is_a740_family(adreno_gpu)) { + if (adreno_is_a650(gpu) || + adreno_is_a660(gpu) || + adreno_is_a690(gpu) || + adreno_is_a730(gpu) || + adreno_is_a740_family(gpu)) { /* TODO: get ddr type from bootloader and use 2 for LPDDR4 */ - hbb_lo = 3; - amsbc = 1; - rgb565_predicator = 1; - uavflagprd_inv = 2; + gpu->ubwc_config.highest_bank_bit = 16; + gpu->ubwc_config.amsbc = 1; + gpu->ubwc_config.rgb565_predicator = 1; + gpu->ubwc_config.uavflagprd_inv = 2; } - if (adreno_is_7c3(adreno_gpu)) { - hbb_lo = 1; - amsbc = 1; - rgb565_predicator = 1; - uavflagprd_inv = 2; + if (adreno_is_7c3(gpu)) { + gpu->ubwc_config.highest_bank_bit = 14; + gpu->ubwc_config.amsbc = 1; + gpu->ubwc_config.rgb565_predicator = 1; + gpu->ubwc_config.uavflagprd_inv = 2; } +} + +static void a6xx_set_ubwc_config(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + /* + * We subtract 13 from the highest bank bit (13 is the minimum value + * allowed by hw) and write the lowest two bits of the remaining value + * as hbb_lo and the one above it as hbb_hi to the hardware. + */ + BUG_ON(adreno_gpu->ubwc_config.highest_bank_bit < 13); + u32 hbb = adreno_gpu->ubwc_config.highest_bank_bit - 13; + u32 hbb_hi = hbb >> 2; + u32 hbb_lo = hbb & 3; gpu_write(gpu, REG_A6XX_RB_NC_MODE_CNTL, - rgb565_predicator << 11 | hbb_hi << 10 | amsbc << 4 | - min_acc_len << 3 | hbb_lo << 1 | ubwc_mode); + adreno_gpu->ubwc_config.rgb565_predicator << 11 | + hbb_hi << 10 | adreno_gpu->ubwc_config.amsbc << 4 | + adreno_gpu->ubwc_config.min_acc_len << 3 | + hbb_lo << 1 | adreno_gpu->ubwc_config.ubwc_mode); gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL, hbb_hi << 4 | - min_acc_len << 3 | hbb_lo << 1 | ubwc_mode); + adreno_gpu->ubwc_config.min_acc_len << 3 | + hbb_lo << 1 | adreno_gpu->ubwc_config.ubwc_mode); gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL, hbb_hi << 10 | - uavflagprd_inv << 4 | min_acc_len << 3 | - hbb_lo << 1 | ubwc_mode); + adreno_gpu->ubwc_config.uavflagprd_inv << 4 | + adreno_gpu->ubwc_config.min_acc_len << 3 | + hbb_lo << 1 | adreno_gpu->ubwc_config.ubwc_mode); if (adreno_is_a7xx(adreno_gpu)) gpu_write(gpu, REG_A7XX_GRAS_NC_MODE_CNTL, FIELD_PREP(GENMASK(8, 5), hbb_lo)); - gpu_write(gpu, REG_A6XX_UCHE_MODE_CNTL, min_acc_len << 23 | hbb_lo << 21); + gpu_write(gpu, REG_A6XX_UCHE_MODE_CNTL, + adreno_gpu->ubwc_config.min_acc_len << 23 | hbb_lo << 21); } static int a6xx_cp_init(struct msm_gpu *gpu) @@ -2911,5 +2922,7 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a6xx_fault_handler); + a6xx_calc_ubwc_config(adreno_gpu); + return gpu; } diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index 80b3f6312116..bc14df96feb0 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -165,6 +165,15 @@ struct adreno_gpu { /* firmware: */ const struct firmware *fw[ADRENO_FW_MAX]; + struct { + u32 rgb565_predicator; + u32 uavflagprd_inv; + u32 min_acc_len; + u32 ubwc_mode; + u32 highest_bank_bit; + u32 amsbc; + } ubwc_config; + /* * Register offsets are different between some GPUs. * GPU specific offsets will be exported by GPU specific -- cgit v1.2.3 From 44a88fa45665318473bfdbb832eba1da2d0a3740 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Thu, 7 Dec 2023 21:30:48 +0000 Subject: drm/msm: Add param for the highest bank bit This parameter is programmed by the kernel and influences the tiling layout of images. Exposing it to userspace will allow it to tile/untile images correctly without guessing what value the kernel programmed, and allow us to change it in the future without breaking userspace. Signed-off-by: Connor Abbott Patchwork: https://patchwork.freedesktop.org/patch/571181/ Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 3 +++ include/uapi/drm/msm_drm.h | 1 + 2 files changed, 4 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 3fe9fd240cc7..074fb498706f 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -373,6 +373,9 @@ int adreno_get_param(struct msm_gpu *gpu, struct msm_file_private *ctx, return -EINVAL; *value = ctx->aspace->va_size; return 0; + case MSM_PARAM_HIGHEST_BANK_BIT: + *value = adreno_gpu->ubwc_config.highest_bank_bit; + return 0; default: DBG("%s: invalid param: %u", gpu->name, param); return -EINVAL; diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index 6f2a7ad04aa4..d8a6b3472760 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -86,6 +86,7 @@ struct drm_msm_timespec { #define MSM_PARAM_CMDLINE 0x0d /* WO: override for task cmdline */ #define MSM_PARAM_VA_START 0x0e /* RO: start of valid GPU iova range */ #define MSM_PARAM_VA_SIZE 0x0f /* RO: size of valid GPU iova range (bytes) */ +#define MSM_PARAM_HIGHEST_BANK_BIT 0x10 /* RO */ /* For backwards compat. The original support for preemption was based on * a single ring per priority level so # of priority levels equals the # -- cgit v1.2.3 From 3e6688fd96966b6c275e95c39aa367bc0a490ccd Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Thu, 30 Nov 2023 16:26:41 -0800 Subject: drm/msm/adreno: Fix A680 chip id The only A680 referenced from DeviceTree is patch level 1, which since commit '90b593ce1c9e ("drm/msm/adreno: Switch to chip-id for identifying GPU")' isn't a known chip id. Correct the chip id to allow the A680 to be recognized again. Fixes: 90b593ce1c9e ("drm/msm/adreno: Switch to chip-id for identifying GPU") Signed-off-by: Bjorn Andersson Patchwork: https://patchwork.freedesktop.org/patch/569839/ Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/adreno_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index f62ab5257e66..2ce7d7b1690d 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -464,7 +464,7 @@ static const struct adreno_info gpulist[] = { { 190, 1 }, ), }, { - .chip_ids = ADRENO_CHIP_IDS(0x06080000), + .chip_ids = ADRENO_CHIP_IDS(0x06080001), .family = ADRENO_6XX_GEN2, .revn = 680, .fw = { -- cgit v1.2.3 From a3dec9cdf42bceccdf7a7185099a515ba0a8b29b Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 20 Nov 2023 16:38:45 -0800 Subject: drm/msm/gem: Remove "valid" tracking This was a small optimization for pre-soft-pin userspace. But mesa switched to soft-pin nearly 5yrs ago. So lets drop the optimization and simplify the code. Signed-off-by: Rob Clark Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/568328/ --- drivers/gpu/drm/msm/msm_gem.h | 2 -- drivers/gpu/drm/msm/msm_gem_submit.c | 44 +++++++----------------------------- 2 files changed, 8 insertions(+), 38 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index 6352bd3cc5ed..79195f2fd171 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -275,7 +275,6 @@ struct msm_gem_submit { struct msm_gpu_submitqueue *queue; struct pid *pid; /* submitting process */ bool fault_dumped; /* Limit devcoredump dumping to one per submit */ - bool valid; /* true if no cmdstream patching needed */ bool in_rb; /* "sudo" mode, copy cmds into RB */ struct msm_ringbuffer *ring; unsigned int nr_cmds; @@ -292,7 +291,6 @@ struct msm_gem_submit { } *cmd; /* array of size nr_cmds */ struct { /* make sure these don't conflict w/ MSM_SUBMIT_BO_x */ -#define BO_VALID 0x8000 /* is current addr in cmdstream correct/valid? */ #define BO_LOCKED 0x4000 /* obj lock is held */ #define BO_PINNED 0x2000 /* obj (pages) is pinned and on active list */ uint32_t flags; diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 7aa5e14ab9aa..2fdc373c2589 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -150,8 +150,6 @@ static int submit_lookup_objects(struct msm_gem_submit *submit, submit->bos[i].handle = submit_bo.handle; submit->bos[i].flags = submit_bo.flags; - /* in validate_objects() we figure out if this is true: */ - submit->bos[i].iova = submit_bo.presumed; } spin_lock(&file->table_lock); @@ -278,9 +276,6 @@ static void submit_unlock_unpin_bo(struct msm_gem_submit *submit, int i) { unsigned cleanup_flags = BO_PINNED | BO_LOCKED; submit_cleanup_bo(submit, i, cleanup_flags); - - if (!(submit->bos[i].flags & BO_VALID)) - submit->bos[i].iova = 0; } /* This is where we make sure all the bo's are reserved and pin'd: */ @@ -390,8 +385,6 @@ static int submit_pin_objects(struct msm_gem_submit *submit) struct msm_drm_private *priv = submit->dev->dev_private; int i, ret = 0; - submit->valid = true; - for (i = 0; i < submit->nr_bos; i++) { struct drm_gem_object *obj = submit->bos[i].obj; struct msm_gem_vma *vma; @@ -407,14 +400,7 @@ static int submit_pin_objects(struct msm_gem_submit *submit) if (ret) break; - if (vma->iova == submit->bos[i].iova) { - submit->bos[i].flags |= BO_VALID; - } else { - submit->bos[i].iova = vma->iova; - /* iova changed, so address in cmdstream is not valid: */ - submit->bos[i].flags &= ~BO_VALID; - submit->valid = false; - } + submit->bos[i].iova = vma->iova; } /* @@ -451,7 +437,7 @@ static void submit_attach_object_fences(struct msm_gem_submit *submit) } static int submit_bo(struct msm_gem_submit *submit, uint32_t idx, - struct drm_gem_object **obj, uint64_t *iova, bool *valid) + struct drm_gem_object **obj, uint64_t *iova) { if (idx >= submit->nr_bos) { SUBMIT_ERROR(submit, "invalid buffer index: %u (out of %u)\n", @@ -463,8 +449,6 @@ static int submit_bo(struct msm_gem_submit *submit, uint32_t idx, *obj = submit->bos[idx].obj; if (iova) *iova = submit->bos[idx].iova; - if (valid) - *valid = !!(submit->bos[idx].flags & BO_VALID); return 0; } @@ -477,9 +461,6 @@ static int submit_reloc(struct msm_gem_submit *submit, struct drm_gem_object *ob uint32_t *ptr; int ret = 0; - if (!nr_relocs) - return 0; - if (offset % 4) { SUBMIT_ERROR(submit, "non-aligned cmdstream buffer: %u\n", offset); return -EINVAL; @@ -500,7 +481,6 @@ static int submit_reloc(struct msm_gem_submit *submit, struct drm_gem_object *ob struct drm_msm_gem_submit_reloc submit_reloc = relocs[i]; uint32_t off; uint64_t iova; - bool valid; if (submit_reloc.submit_offset % 4) { SUBMIT_ERROR(submit, "non-aligned reloc offset: %u\n", @@ -519,13 +499,10 @@ static int submit_reloc(struct msm_gem_submit *submit, struct drm_gem_object *ob goto out; } - ret = submit_bo(submit, submit_reloc.reloc_idx, NULL, &iova, &valid); + ret = submit_bo(submit, submit_reloc.reloc_idx, NULL, &iova); if (ret) goto out; - if (valid) - continue; - iova += submit_reloc.reloc_offset; if (submit_reloc.shift < 0) @@ -879,8 +856,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, struct drm_gem_object *obj; uint64_t iova; - ret = submit_bo(submit, submit->cmd[i].idx, - &obj, &iova, NULL); + ret = submit_bo(submit, submit->cmd[i].idx, &obj, &iova); if (ret) goto out; @@ -894,17 +870,13 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, submit->cmd[i].iova = iova + (submit->cmd[i].offset * 4); - if (submit->valid) + if (likely(!submit->cmd[i].nr_relocs)) continue; if (!gpu->allow_relocs) { - if (submit->cmd[i].nr_relocs) { - SUBMIT_ERROR(submit, "relocs not allowed\n"); - ret = -EINVAL; - goto out; - } - - continue; + SUBMIT_ERROR(submit, "relocs not allowed\n"); + ret = -EINVAL; + goto out; } ret = submit_reloc(submit, obj, submit->cmd[i].offset * 4, -- cgit v1.2.3 From 202f98c19a11e335867a1906c2fb6c53680e4603 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 20 Nov 2023 16:38:46 -0800 Subject: drm/msm/gem: Remove submit_unlock_unpin_bo() The only point it is called is before pinning objects, so the "unpin" part of the name is fiction. Just remove it and call submit_cleanup_bo() directly. Signed-off-by: Rob Clark Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/568330/ --- drivers/gpu/drm/msm/msm_gem_submit.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 2fdc373c2589..51726c9ddffd 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -272,12 +272,6 @@ static void submit_cleanup_bo(struct msm_gem_submit *submit, int i, dma_resv_unlock(obj->resv); } -static void submit_unlock_unpin_bo(struct msm_gem_submit *submit, int i) -{ - unsigned cleanup_flags = BO_PINNED | BO_LOCKED; - submit_cleanup_bo(submit, i, cleanup_flags); -} - /* This is where we make sure all the bo's are reserved and pin'd: */ static int submit_lock_objects(struct msm_gem_submit *submit) { @@ -313,10 +307,10 @@ fail: } for (; i >= 0; i--) - submit_unlock_unpin_bo(submit, i); + submit_cleanup_bo(submit, i, BO_LOCKED); if (slow_locked > 0) - submit_unlock_unpin_bo(submit, slow_locked); + submit_cleanup_bo(submit, slow_locked, BO_LOCKED); if (ret == -EDEADLK) { struct drm_gem_object *obj = submit->bos[contended].obj; -- cgit v1.2.3 From ceab575cafed594fb3cee1bec01a0e4ed5e2d752 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 20 Nov 2023 16:38:47 -0800 Subject: drm/msm/gem: Don't queue job to sched in error cases We shouldn't be running the job in error cases. This also avoids having to think too hard about where the objs get unpinned (and if necessary, the resv takes over tracking that the obj is busy).. ie. error cases it always happens synchronously, and normal cases it happens from scheduler job_run() callback. Signed-off-by: Rob Clark Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/568331/ --- drivers/gpu/drm/msm/msm_gem_submit.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 51726c9ddffd..0bcdc589ceea 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -946,6 +946,9 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, } } + if (ret) + goto out; + submit_attach_object_fences(submit); /* The scheduler owns a ref now: */ -- cgit v1.2.3 From 2d7d2c4e84802485a1e765bd0732d41526dcf25c Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 20 Nov 2023 16:38:48 -0800 Subject: drm/msm/gem: Split out submit_unpin_objects() helper Untangle unpinning from unlock/unref loop. The unpin only happens in error paths so it is easier to decouple from the normal unlock path. Since we never have an intermediate state where a subset of buffers are pinned (ie. we never bail out of the pin or unpin loops) we can replace the bo state flag bit with a global flag in the submit. Signed-off-by: Rob Clark Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/568335/ --- drivers/gpu/drm/msm/msm_gem.h | 6 +++--- drivers/gpu/drm/msm/msm_gem_submit.c | 22 +++++++++++++++++----- drivers/gpu/drm/msm/msm_ringbuffer.c | 3 ++- 3 files changed, 22 insertions(+), 9 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index 79195f2fd171..80f4207120ea 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -274,8 +274,9 @@ struct msm_gem_submit { int fence_id; /* key into queue->fence_idr */ struct msm_gpu_submitqueue *queue; struct pid *pid; /* submitting process */ - bool fault_dumped; /* Limit devcoredump dumping to one per submit */ - bool in_rb; /* "sudo" mode, copy cmds into RB */ + bool bos_pinned : 1; + bool fault_dumped:1;/* Limit devcoredump dumping to one per submit */ + bool in_rb : 1; /* "sudo" mode, copy cmds into RB */ struct msm_ringbuffer *ring; unsigned int nr_cmds; unsigned int nr_bos; @@ -292,7 +293,6 @@ struct msm_gem_submit { struct { /* make sure these don't conflict w/ MSM_SUBMIT_BO_x */ #define BO_LOCKED 0x4000 /* obj lock is held */ -#define BO_PINNED 0x2000 /* obj (pages) is pinned and on active list */ uint32_t flags; union { struct drm_gem_object *obj; diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 0bcdc589ceea..208fad42f784 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -265,9 +265,6 @@ static void submit_cleanup_bo(struct msm_gem_submit *submit, int i, */ submit->bos[i].flags &= ~cleanup_flags; - if (flags & BO_PINNED) - msm_gem_unpin_locked(obj); - if (flags & BO_LOCKED) dma_resv_unlock(obj->resv); } @@ -407,13 +404,28 @@ static int submit_pin_objects(struct msm_gem_submit *submit) mutex_lock(&priv->lru.lock); for (i = 0; i < submit->nr_bos; i++) { msm_gem_pin_obj_locked(submit->bos[i].obj); - submit->bos[i].flags |= BO_PINNED; } mutex_unlock(&priv->lru.lock); + submit->bos_pinned = true; + return ret; } +static void submit_unpin_objects(struct msm_gem_submit *submit) +{ + if (!submit->bos_pinned) + return; + + for (int i = 0; i < submit->nr_bos; i++) { + struct drm_gem_object *obj = submit->bos[i].obj; + + msm_gem_unpin_locked(obj); + } + + submit->bos_pinned = false; +} + static void submit_attach_object_fences(struct msm_gem_submit *submit) { int i; @@ -525,7 +537,7 @@ static void submit_cleanup(struct msm_gem_submit *submit, bool error) unsigned i; if (error) - cleanup_flags |= BO_PINNED; + submit_unpin_objects(submit); for (i = 0; i < submit->nr_bos; i++) { struct drm_gem_object *obj = submit->bos[i].obj; diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c index 4968568e3b54..4bc13f7d005a 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.c +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c @@ -29,9 +29,10 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job) struct drm_gem_object *obj = submit->bos[i].obj; msm_gem_unpin_active(obj); - submit->bos[i].flags &= ~BO_PINNED; } + submit->bos_pinned = false; + mutex_unlock(&priv->lru.lock); msm_gpu_submit(gpu, submit); -- cgit v1.2.3 From 3a48a40387e73e1a37428b11cb3ba56d4c108571 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 20 Nov 2023 16:38:49 -0800 Subject: drm/msm/gem: Cleanup submit_cleanup_bo() Now that it only handles unlock duty, drop the superfluous arg and rename it. Signed-off-by: Rob Clark Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/568333/ --- drivers/gpu/drm/msm/msm_gem_submit.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 208fad42f784..2bb5d13726d5 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -248,14 +248,10 @@ out: return ret; } -/* Unwind bo state, according to cleanup_flags. In the success case, only - * the lock is dropped at the end of the submit (and active/pin ref is dropped - * later when the submit is retired). - */ -static void submit_cleanup_bo(struct msm_gem_submit *submit, int i, - unsigned cleanup_flags) +static void submit_unlock_bo(struct msm_gem_submit *submit, int i) { struct drm_gem_object *obj = submit->bos[i].obj; + unsigned cleanup_flags = BO_LOCKED; unsigned flags = submit->bos[i].flags & cleanup_flags; /* @@ -304,10 +300,10 @@ fail: } for (; i >= 0; i--) - submit_cleanup_bo(submit, i, BO_LOCKED); + submit_unlock_bo(submit, i); if (slow_locked > 0) - submit_cleanup_bo(submit, slow_locked, BO_LOCKED); + submit_unlock_bo(submit, slow_locked); if (ret == -EDEADLK) { struct drm_gem_object *obj = submit->bos[contended].obj; @@ -533,7 +529,6 @@ out: */ static void submit_cleanup(struct msm_gem_submit *submit, bool error) { - unsigned cleanup_flags = BO_LOCKED; unsigned i; if (error) @@ -541,7 +536,7 @@ static void submit_cleanup(struct msm_gem_submit *submit, bool error) for (i = 0; i < submit->nr_bos; i++) { struct drm_gem_object *obj = submit->bos[i].obj; - submit_cleanup_bo(submit, i, cleanup_flags); + submit_unlock_bo(submit, i); if (error) drm_gem_object_put(obj); } -- cgit v1.2.3 From 05d249352f1ae909230c230767ca8f4e9fdf8e7b Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 20 Nov 2023 16:38:50 -0800 Subject: drm/exec: Pass in initial # of objects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In cases where the # is known ahead of time, it is silly to do the table resize dance. Signed-off-by: Rob Clark Reviewed-by: Christian König Patchwork: https://patchwork.freedesktop.org/patch/568338/ --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 8 ++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c | 4 ++-- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 2 +- drivers/gpu/drm/drm_exec.c | 13 ++++++++++--- drivers/gpu/drm/drm_gpuvm.c | 4 ++-- drivers/gpu/drm/imagination/pvr_job.c | 2 +- drivers/gpu/drm/nouveau/nouveau_uvmm.c | 2 +- drivers/gpu/drm/tests/drm_exec_test.c | 16 ++++++++-------- include/drm/drm_exec.h | 2 +- 13 files changed, 37 insertions(+), 30 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 41fbc4fd0fac..0bd3c4a6267a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1137,7 +1137,7 @@ static int reserve_bo_and_vm(struct kgd_mem *mem, ctx->n_vms = 1; ctx->sync = &mem->sync; - drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); drm_exec_until_all_locked(&ctx->exec) { ret = amdgpu_vm_lock_pd(vm, &ctx->exec, 2); drm_exec_retry_on_contention(&ctx->exec); @@ -1176,7 +1176,7 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, int ret; ctx->sync = &mem->sync; - drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); drm_exec_until_all_locked(&ctx->exec) { ctx->n_vms = 0; list_for_each_entry(entry, &mem->attachments, list) { @@ -2552,7 +2552,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) amdgpu_sync_create(&sync); - drm_exec_init(&exec, 0); + drm_exec_init(&exec, 0, 0); /* Reserve all BOs and page tables for validation */ drm_exec_until_all_locked(&exec) { /* Reserve all the page directories */ @@ -2793,7 +2793,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) mutex_lock(&process_info->lock); - drm_exec_init(&exec, 0); + drm_exec_init(&exec, 0, 0); drm_exec_until_all_locked(&exec) { list_for_each_entry(peer_vm, &process_info->vm_list_head, vm_list_node) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index e50be6500030..440e9a6786fc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -66,7 +66,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, amdgpu_sync_create(&p->sync); drm_exec_init(&p->exec, DRM_EXEC_INTERRUPTIBLE_WAIT | - DRM_EXEC_IGNORE_DUPLICATES); + DRM_EXEC_IGNORE_DUPLICATES, 0); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c index 720011019741..796fa6f1420b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c @@ -70,7 +70,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct drm_exec exec; int r; - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); drm_exec_until_all_locked(&exec) { r = amdgpu_vm_lock_pd(vm, &exec, 0); if (likely(!r)) @@ -110,7 +110,7 @@ int amdgpu_unmap_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct drm_exec exec; int r; - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); drm_exec_until_all_locked(&exec) { r = amdgpu_vm_lock_pd(vm, &exec, 0); if (likely(!r)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 84beeaa4d21c..49a5f1c73b3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -203,7 +203,7 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj, struct drm_exec exec; long r; - drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES); + drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0); drm_exec_until_all_locked(&exec) { r = drm_exec_prepare_obj(&exec, &bo->tbo.base, 1); drm_exec_retry_on_contention(&exec); @@ -739,7 +739,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, } drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT | - DRM_EXEC_IGNORE_DUPLICATES); + DRM_EXEC_IGNORE_DUPLICATES, 0); drm_exec_until_all_locked(&exec) { if (gobj) { r = drm_exec_lock_obj(&exec, gobj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 9ddbf1494326..abd0b9763904 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -1122,7 +1122,7 @@ int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev, amdgpu_sync_create(&sync); - drm_exec_init(&exec, 0); + drm_exec_init(&exec, 0, 0); drm_exec_until_all_locked(&exec) { r = drm_exec_lock_obj(&exec, &ctx_data->meta_data_obj->tbo.base); @@ -1193,7 +1193,7 @@ int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev, struct drm_exec exec; long r; - drm_exec_init(&exec, 0); + drm_exec_init(&exec, 0, 0); drm_exec_until_all_locked(&exec) { r = drm_exec_lock_obj(&exec, &ctx_data->meta_data_obj->tbo.base); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c index ca45ba8ac171..bfbf59326ee1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c @@ -86,7 +86,7 @@ static int map_ring_data(struct amdgpu_device *adev, struct amdgpu_vm *vm, amdgpu_sync_create(&sync); - drm_exec_init(&exec, 0); + drm_exec_init(&exec, 0, 0); drm_exec_until_all_locked(&exec) { r = drm_exec_lock_obj(&exec, &bo->tbo.base); drm_exec_retry_on_contention(&exec); @@ -149,7 +149,7 @@ static int unmap_ring_data(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct drm_exec exec; long r; - drm_exec_init(&exec, 0); + drm_exec_init(&exec, 0, 0); drm_exec_until_all_locked(&exec) { r = drm_exec_lock_obj(&exec, &bo->tbo.base); drm_exec_retry_on_contention(&exec); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index f2f3c338fd94..76d9f14ccc7c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1529,7 +1529,7 @@ static int svm_range_reserve_bos(struct svm_validate_context *ctx, bool intr) uint32_t gpuidx; int r; - drm_exec_init(&ctx->exec, intr ? DRM_EXEC_INTERRUPTIBLE_WAIT: 0); + drm_exec_init(&ctx->exec, intr ? DRM_EXEC_INTERRUPTIBLE_WAIT: 0, 0); drm_exec_until_all_locked(&ctx->exec) { for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) { pdd = kfd_process_device_from_gpuidx(ctx->process, gpuidx); diff --git a/drivers/gpu/drm/drm_exec.c b/drivers/gpu/drm/drm_exec.c index 5d2809de4517..48ee851b61d9 100644 --- a/drivers/gpu/drm/drm_exec.c +++ b/drivers/gpu/drm/drm_exec.c @@ -69,16 +69,23 @@ static void drm_exec_unlock_all(struct drm_exec *exec) * drm_exec_init - initialize a drm_exec object * @exec: the drm_exec object to initialize * @flags: controls locking behavior, see DRM_EXEC_* defines + * @nr: the initial # of objects * * Initialize the object and make sure that we can track locked objects. + * + * If nr is non-zero then it is used as the initial objects table size. + * In either case, the table will grow (be re-allocated) on demand. */ -void drm_exec_init(struct drm_exec *exec, uint32_t flags) +void drm_exec_init(struct drm_exec *exec, uint32_t flags, unsigned nr) { + if (!nr) + nr = PAGE_SIZE / sizeof(void *); + exec->flags = flags; - exec->objects = kmalloc(PAGE_SIZE, GFP_KERNEL); + exec->objects = kvmalloc_array(nr, sizeof(void *), GFP_KERNEL); /* If allocation here fails, just delay that till the first use */ - exec->max_objects = exec->objects ? PAGE_SIZE / sizeof(void *) : 0; + exec->max_objects = exec->objects ? nr : 0; exec->num_objects = 0; exec->contended = DRM_EXEC_DUMMY; exec->prelocked = NULL; diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c index 9c0922c1a5a2..dc8edca61764 100644 --- a/drivers/gpu/drm/drm_gpuvm.c +++ b/drivers/gpu/drm/drm_gpuvm.c @@ -1250,7 +1250,7 @@ drm_gpuvm_exec_lock(struct drm_gpuvm_exec *vm_exec) unsigned int num_fences = vm_exec->num_fences; int ret; - drm_exec_init(exec, vm_exec->flags); + drm_exec_init(exec, vm_exec->flags, 0); drm_exec_until_all_locked(exec) { ret = drm_gpuvm_prepare_vm(gpuvm, exec, num_fences); @@ -1341,7 +1341,7 @@ drm_gpuvm_exec_lock_range(struct drm_gpuvm_exec *vm_exec, struct drm_exec *exec = &vm_exec->exec; int ret; - drm_exec_init(exec, vm_exec->flags); + drm_exec_init(exec, vm_exec->flags, 0); drm_exec_until_all_locked(exec) { ret = drm_gpuvm_prepare_range(gpuvm, exec, addr, range, diff --git a/drivers/gpu/drm/imagination/pvr_job.c b/drivers/gpu/drm/imagination/pvr_job.c index 04139da6c04d..78c2f3c6dce0 100644 --- a/drivers/gpu/drm/imagination/pvr_job.c +++ b/drivers/gpu/drm/imagination/pvr_job.c @@ -746,7 +746,7 @@ pvr_submit_jobs(struct pvr_device *pvr_dev, struct pvr_file *pvr_file, if (err) goto out_job_data_cleanup; - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT | DRM_EXEC_IGNORE_DUPLICATES); + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT | DRM_EXEC_IGNORE_DUPLICATES, 0); xa_init_flags(&signal_array, XA_FLAGS_ALLOC); diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c index dae3baf707a0..4f223c972c6a 100644 --- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c +++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c @@ -1347,7 +1347,7 @@ nouveau_uvmm_bind_job_submit(struct nouveau_job *job, } } - drm_exec_init(exec, vme->flags); + drm_exec_init(exec, vme->flags, 0); drm_exec_until_all_locked(exec) { ret = bind_lock_validate(job, exec, vme->num_fences); drm_exec_retry_on_contention(exec); diff --git a/drivers/gpu/drm/tests/drm_exec_test.c b/drivers/gpu/drm/tests/drm_exec_test.c index 563949d777dd..81f928a429ba 100644 --- a/drivers/gpu/drm/tests/drm_exec_test.c +++ b/drivers/gpu/drm/tests/drm_exec_test.c @@ -46,7 +46,7 @@ static void sanitycheck(struct kunit *test) { struct drm_exec exec; - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); drm_exec_fini(&exec); KUNIT_SUCCEED(test); } @@ -60,7 +60,7 @@ static void test_lock(struct kunit *test) drm_gem_private_object_init(priv->drm, &gobj, PAGE_SIZE); - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); drm_exec_until_all_locked(&exec) { ret = drm_exec_lock_obj(&exec, &gobj); drm_exec_retry_on_contention(&exec); @@ -80,7 +80,7 @@ static void test_lock_unlock(struct kunit *test) drm_gem_private_object_init(priv->drm, &gobj, PAGE_SIZE); - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); drm_exec_until_all_locked(&exec) { ret = drm_exec_lock_obj(&exec, &gobj); drm_exec_retry_on_contention(&exec); @@ -107,7 +107,7 @@ static void test_duplicates(struct kunit *test) drm_gem_private_object_init(priv->drm, &gobj, PAGE_SIZE); - drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES); + drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0); drm_exec_until_all_locked(&exec) { ret = drm_exec_lock_obj(&exec, &gobj); drm_exec_retry_on_contention(&exec); @@ -134,7 +134,7 @@ static void test_prepare(struct kunit *test) drm_gem_private_object_init(priv->drm, &gobj, PAGE_SIZE); - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); drm_exec_until_all_locked(&exec) { ret = drm_exec_prepare_obj(&exec, &gobj, 1); drm_exec_retry_on_contention(&exec); @@ -159,7 +159,7 @@ static void test_prepare_array(struct kunit *test) drm_gem_private_object_init(priv->drm, &gobj1, PAGE_SIZE); drm_gem_private_object_init(priv->drm, &gobj2, PAGE_SIZE); - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); drm_exec_until_all_locked(&exec) ret = drm_exec_prepare_array(&exec, array, ARRAY_SIZE(array), 1); @@ -174,14 +174,14 @@ static void test_multiple_loops(struct kunit *test) { struct drm_exec exec; - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); drm_exec_until_all_locked(&exec) { break; } drm_exec_fini(&exec); - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); drm_exec_until_all_locked(&exec) { break; diff --git a/include/drm/drm_exec.h b/include/drm/drm_exec.h index b5bf0b6da791..f1a66c048721 100644 --- a/include/drm/drm_exec.h +++ b/include/drm/drm_exec.h @@ -135,7 +135,7 @@ static inline bool drm_exec_is_contended(struct drm_exec *exec) return !!exec->contended; } -void drm_exec_init(struct drm_exec *exec, uint32_t flags); +void drm_exec_init(struct drm_exec *exec, uint32_t flags, unsigned nr); void drm_exec_fini(struct drm_exec *exec); bool drm_exec_cleanup(struct drm_exec *exec); int drm_exec_lock_obj(struct drm_exec *exec, struct drm_gem_object *obj); -- cgit v1.2.3 From a6397e63877e82528c67058e6e6e8d700682df50 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 20 Nov 2023 16:38:51 -0800 Subject: drm/msm/gem: Convert to drm_exec Replace the ww_mutex locking dance with the drm_exec helper. v2: Error path fixes, move drm_exec_fini so we only call it once (and only if we have drm_exec_init() Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/568342/ --- drivers/gpu/drm/msm/Kconfig | 1 + drivers/gpu/drm/msm/msm_gem.h | 5 +- drivers/gpu/drm/msm/msm_gem_submit.c | 119 +++++++---------------------------- 3 files changed, 24 insertions(+), 101 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig index ad70b611b44f..f202f26adab2 100644 --- a/drivers/gpu/drm/msm/Kconfig +++ b/drivers/gpu/drm/msm/Kconfig @@ -17,6 +17,7 @@ config DRM_MSM select DRM_DP_AUX_BUS select DRM_DISPLAY_DP_HELPER select DRM_DISPLAY_HELPER + select DRM_EXEC select DRM_KMS_HELPER select DRM_PANEL select DRM_BRIDGE diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index 80f4207120ea..8d414b072c29 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -9,6 +9,7 @@ #include #include +#include "drm/drm_exec.h" #include "drm/gpu_scheduler.h" #include "msm_drv.h" @@ -258,7 +259,7 @@ struct msm_gem_submit { struct msm_gpu *gpu; struct msm_gem_address_space *aspace; struct list_head node; /* node in ring submit list */ - struct ww_acquire_ctx ticket; + struct drm_exec exec; uint32_t seqno; /* Sequence number of the submit on the ring */ /* Hw fence, which is created when the scheduler executes the job, and @@ -291,8 +292,6 @@ struct msm_gem_submit { struct drm_msm_gem_submit_reloc *relocs; } *cmd; /* array of size nr_cmds */ struct { -/* make sure these don't conflict w/ MSM_SUBMIT_BO_x */ -#define BO_LOCKED 0x4000 /* obj lock is held */ uint32_t flags; union { struct drm_gem_object *obj; diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 2bb5d13726d5..fba78193127d 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -248,85 +248,30 @@ out: return ret; } -static void submit_unlock_bo(struct msm_gem_submit *submit, int i) -{ - struct drm_gem_object *obj = submit->bos[i].obj; - unsigned cleanup_flags = BO_LOCKED; - unsigned flags = submit->bos[i].flags & cleanup_flags; - - /* - * Clear flags bit before dropping lock, so that the msm_job_run() - * path isn't racing with submit_cleanup() (ie. the read/modify/ - * write is protected by the obj lock in all paths) - */ - submit->bos[i].flags &= ~cleanup_flags; - - if (flags & BO_LOCKED) - dma_resv_unlock(obj->resv); -} - /* This is where we make sure all the bo's are reserved and pin'd: */ static int submit_lock_objects(struct msm_gem_submit *submit) { - int contended, slow_locked = -1, i, ret = 0; - -retry: - for (i = 0; i < submit->nr_bos; i++) { - struct drm_gem_object *obj = submit->bos[i].obj; - - if (slow_locked == i) - slow_locked = -1; + int ret; - contended = i; + drm_exec_init(&submit->exec, DRM_EXEC_INTERRUPTIBLE_WAIT, submit->nr_bos); - if (!(submit->bos[i].flags & BO_LOCKED)) { - ret = dma_resv_lock_interruptible(obj->resv, - &submit->ticket); + drm_exec_until_all_locked (&submit->exec) { + for (unsigned i = 0; i < submit->nr_bos; i++) { + struct drm_gem_object *obj = submit->bos[i].obj; + ret = drm_exec_prepare_obj(&submit->exec, obj, 1); + drm_exec_retry_on_contention(&submit->exec); if (ret) - goto fail; - submit->bos[i].flags |= BO_LOCKED; + goto error; } } - ww_acquire_done(&submit->ticket); - return 0; -fail: - if (ret == -EALREADY) { - SUBMIT_ERROR(submit, "handle %u at index %u already on submit list\n", - submit->bos[i].handle, i); - ret = -EINVAL; - } - - for (; i >= 0; i--) - submit_unlock_bo(submit, i); - - if (slow_locked > 0) - submit_unlock_bo(submit, slow_locked); - - if (ret == -EDEADLK) { - struct drm_gem_object *obj = submit->bos[contended].obj; - /* we lost out in a seqno race, lock and retry.. */ - ret = dma_resv_lock_slow_interruptible(obj->resv, - &submit->ticket); - if (!ret) { - submit->bos[contended].flags |= BO_LOCKED; - slow_locked = contended; - goto retry; - } - - /* Not expecting -EALREADY here, if the bo was already - * locked, we should have gotten -EALREADY already from - * the dma_resv_lock_interruptable() call. - */ - WARN_ON_ONCE(ret == -EALREADY); - } - +error: return ret; } -static int submit_fence_sync(struct msm_gem_submit *submit, bool no_implicit) +static int submit_fence_sync(struct msm_gem_submit *submit) { int i, ret = 0; @@ -334,22 +279,6 @@ static int submit_fence_sync(struct msm_gem_submit *submit, bool no_implicit) struct drm_gem_object *obj = submit->bos[i].obj; bool write = submit->bos[i].flags & MSM_SUBMIT_BO_WRITE; - /* NOTE: _reserve_shared() must happen before - * _add_shared_fence(), which makes this a slightly - * strange place to call it. OTOH this is a - * convenient can-fail point to hook it in. - */ - ret = dma_resv_reserve_fences(obj->resv, 1); - if (ret) - return ret; - - /* If userspace has determined that explicit fencing is - * used, it can disable implicit sync on the entire - * submit: - */ - if (no_implicit) - continue; - /* Otherwise userspace can ask for implicit sync to be * disabled on specific buffers. This is useful for internal * usermode driver managed buffers, suballocation, etc. @@ -529,17 +458,14 @@ out: */ static void submit_cleanup(struct msm_gem_submit *submit, bool error) { - unsigned i; - - if (error) + if (error) { submit_unpin_objects(submit); - - for (i = 0; i < submit->nr_bos; i++) { - struct drm_gem_object *obj = submit->bos[i].obj; - submit_unlock_bo(submit, i); - if (error) - drm_gem_object_put(obj); + /* job wasn't enqueued to scheduler, so early retirement: */ + msm_submit_retire(submit); } + + if (submit->exec.objects) + drm_exec_fini(&submit->exec); } void msm_submit_retire(struct msm_gem_submit *submit) @@ -733,7 +659,6 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, struct msm_submit_post_dep *post_deps = NULL; struct drm_syncobj **syncobjs_to_reset = NULL; int out_fence_fd = -1; - bool has_ww_ticket = false; unsigned i; int ret; @@ -839,15 +764,15 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, goto out; /* copy_*_user while holding a ww ticket upsets lockdep */ - ww_acquire_init(&submit->ticket, &reservation_ww_class); - has_ww_ticket = true; ret = submit_lock_objects(submit); if (ret) goto out; - ret = submit_fence_sync(submit, !!(args->flags & MSM_SUBMIT_NO_IMPLICIT)); - if (ret) - goto out; + if (!(args->flags & MSM_SUBMIT_NO_IMPLICIT)) { + ret = submit_fence_sync(submit); + if (ret) + goto out; + } ret = submit_pin_objects(submit); if (ret) @@ -975,8 +900,6 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, out: submit_cleanup(submit, !!ret); - if (has_ww_ticket) - ww_acquire_fini(&submit->ticket); out_unlock: mutex_unlock(&queue->lock); out_post_unlock: -- cgit v1.2.3 From 236fa3873de8f1d4b0c7eaeb4c53b4d1220d55a0 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sun, 3 Dec 2023 01:13:53 +0300 Subject: drm/ci: remove rebase-merge directory Gitlab runner can cache git repository, including the unfinished rebase merge status. New CI job will come as a fresh checkout, however this will not destroy the unfinished rebase, failing our build script. Destroy the unfinished rebase state. Suggested-by: David Heidelberg Signed-off-by: Dmitry Baryshkov Acked-by: Helen Koike Patchwork: https://patchwork.freedesktop.org/patch/570159/ Signed-off-by: Rob Clark --- drivers/gpu/drm/ci/build.sh | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/ci/build.sh b/drivers/gpu/drm/ci/build.sh index e5c5dcedd108..ca2923ed2290 100644 --- a/drivers/gpu/drm/ci/build.sh +++ b/drivers/gpu/drm/ci/build.sh @@ -58,6 +58,9 @@ git config --global user.email "fdo@example.com" git config --global user.name "freedesktop.org CI" git config --global pull.rebase true +# cleanup git state on the worker +rm -rf .git/rebase-merge + # Try to merge fixes from target repo if [ "$(git ls-remote --exit-code --heads ${UPSTREAM_REPO} ${TARGET_BRANCH}-external-fixes)" ]; then git pull ${UPSTREAM_REPO} ${TARGET_BRANCH}-external-fixes -- cgit v1.2.3 From 5169477081a1ed08924949e4893732de92ad7d25 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sat, 9 Dec 2023 23:05:41 +0000 Subject: drm/i915/selftests: Fix spelling mistake "initialiased" -> "initialised" There is a spelling mistake in a pr_err error message. Fix it. Signed-off-by: Colin Ian King Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20231209230541.4055786-1-colin.i.king@gmail.com --- drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c index e57f9390076c..d684a70f2c04 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c @@ -504,7 +504,7 @@ static int igt_dmabuf_export_vmap(void *arg) } if (memchr_inv(ptr, 0, dmabuf->size)) { - pr_err("Exported object not initialiased to zero!\n"); + pr_err("Exported object not initialised to zero!\n"); err = -EINVAL; goto out; } -- cgit v1.2.3 From 23b392b94acb0499f69706c5808c099f590ebcf4 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 5 Dec 2023 20:05:51 +0200 Subject: drm/i915/edp: don't write to DP_LINK_BW_SET when using rate select MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The eDP 1.5 spec adds a clarification for eDP 1.4x: > For eDP v1.4x, if the Source device chooses the Main-Link rate by way > of DPCD 00100h, the Sink device shall ignore DPCD 00115h[2:0]. We write 0 to DP_LINK_BW_SET (DPCD 100h) even when using DP_LINK_RATE_SET (DPCD 114h). Stop doing that, as it can cause the panel to ignore the rate set method. Moreover, 0 is a reserved value for DP_LINK_BW_SET, and should not be used. v2: Improve the comments (Ville) Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/9081 Tested-by: Animesh Manna Reviewed-by: Uma Shankar Cc: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20231205180551.2476228-1-jani.nikula@intel.com --- .../gpu/drm/i915/display/intel_dp_link_training.c | 31 +++++++++++++++------- 1 file changed, 21 insertions(+), 10 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c index dbc1b66c8ee4..1abfafbbfa75 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c @@ -650,19 +650,30 @@ intel_dp_update_link_bw_set(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state, u8 link_bw, u8 rate_select) { - u8 link_config[2]; + u8 lane_count = crtc_state->lane_count; - /* Write the link configuration data */ - link_config[0] = link_bw; - link_config[1] = crtc_state->lane_count; if (crtc_state->enhanced_framing) - link_config[1] |= DP_LANE_COUNT_ENHANCED_FRAME_EN; - drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_BW_SET, link_config, 2); + lane_count |= DP_LANE_COUNT_ENHANCED_FRAME_EN; + + if (link_bw) { + /* DP and eDP v1.3 and earlier link bw set method. */ + u8 link_config[] = { link_bw, lane_count }; - /* eDP 1.4 rate select method. */ - if (!link_bw) - drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_RATE_SET, - &rate_select, 1); + drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_BW_SET, link_config, + ARRAY_SIZE(link_config)); + } else { + /* + * eDP v1.4 and later link rate set method. + * + * eDP v1.4x sinks shall ignore DP_LINK_RATE_SET if + * DP_LINK_BW_SET is set. Avoid writing DP_LINK_BW_SET. + * + * eDP v1.5 sinks allow choosing either, and the last choice + * shall be active. + */ + drm_dp_dpcd_writeb(&intel_dp->aux, DP_LANE_COUNT_SET, lane_count); + drm_dp_dpcd_writeb(&intel_dp->aux, DP_LINK_RATE_SET, rate_select); + } } /* -- cgit v1.2.3 From 7c7c863bf89c5f76d8c7fda177a81559b61dc15b Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 1 Dec 2023 12:21:08 +0000 Subject: drm/i915/selftests: Fix engine reset count storage for multi-tile Engine->id namespace is per-tile so struct igt_live_test->reset_engine[] needs to be two-dimensional so engine reset counts from all tiles can be stored with no aliasing. With aliasing, if we had a real multi-tile platform, the reset counts would be incorrect for same engine instance on different tiles. Signed-off-by: Tvrtko Ursulin Fixes: 0c29efa23f5c ("drm/i915/selftests: Consider multi-gt instead of to_gt()") Reported-by: Alan Previn Teres Alexis Cc: Tejas Upadhyay Cc: Andi Shyti Cc: Daniele Ceraolo Spurio Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20231201122109.729006-1-tvrtko.ursulin@linux.intel.com (cherry picked from commit 0647ece3819b018cb62a71c3bcb7c2c3243e78ac) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/selftests/igt_live_test.c | 9 +++++---- drivers/gpu/drm/i915/selftests/igt_live_test.h | 3 ++- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c index 4ddc6d902752..7d41874a49c5 100644 --- a/drivers/gpu/drm/i915/selftests/igt_live_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c @@ -37,8 +37,9 @@ int igt_live_test_begin(struct igt_live_test *t, } for_each_engine(engine, gt, id) - t->reset_engine[id] = - i915_reset_engine_count(&i915->gpu_error, engine); + t->reset_engine[i][id] = + i915_reset_engine_count(&i915->gpu_error, + engine); } t->reset_global = i915_reset_count(&i915->gpu_error); @@ -66,14 +67,14 @@ int igt_live_test_end(struct igt_live_test *t) for_each_gt(gt, i915, i) { for_each_engine(engine, gt, id) { - if (t->reset_engine[id] == + if (t->reset_engine[i][id] == i915_reset_engine_count(&i915->gpu_error, engine)) continue; gt_err(gt, "%s(%s): engine '%s' was reset %d times!\n", t->func, t->name, engine->name, i915_reset_engine_count(&i915->gpu_error, engine) - - t->reset_engine[id]); + t->reset_engine[i][id]); return -EIO; } } diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.h b/drivers/gpu/drm/i915/selftests/igt_live_test.h index 36ed42736c52..83e3ad430922 100644 --- a/drivers/gpu/drm/i915/selftests/igt_live_test.h +++ b/drivers/gpu/drm/i915/selftests/igt_live_test.h @@ -7,6 +7,7 @@ #ifndef IGT_LIVE_TEST_H #define IGT_LIVE_TEST_H +#include "gt/intel_gt_defines.h" /* for I915_MAX_GT */ #include "gt/intel_engine.h" /* for I915_NUM_ENGINES */ struct drm_i915_private; @@ -17,7 +18,7 @@ struct igt_live_test { const char *name; unsigned int reset_global; - unsigned int reset_engine[I915_NUM_ENGINES]; + unsigned int reset_engine[I915_MAX_GT][I915_NUM_ENGINES]; }; /* -- cgit v1.2.3 From 1f721a93a528268fa97875cff515d1fcb69f4f44 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 1 Dec 2023 12:21:09 +0000 Subject: drm/i915: Use internal class when counting engine resets Commit 503579448db9 ("drm/i915/gsc: Mark internal GSC engine with reserved uabi class") made the GSC0 engine not have a valid uabi class and so broke the engine reset counting, which in turn was made class based in cb823ed9915b ("drm/i915/gt: Use intel_gt as the primary object for handling resets"). Despite the title and commit text of the latter is not mentioning it (and has left the storage array incorrectly sized), tracking by class, despite it adding aliasing in hypthotetical multi-tile systems, is handy for virtual engines which for instance do not have a valid engine->id. Therefore we keep that but just change it to use the internal class which is always valid. We also add a helper to increment the count, which aligns with the existing getter. What was broken without this fix were out of bounds reads every time a reset would happen on the GSC0 engine, or during selftests when storing and cross-checking the counts in igt_live_test_begin and igt_live_test_end. Signed-off-by: Tvrtko Ursulin Fixes: 503579448db9 ("drm/i915/gsc: Mark internal GSC engine with reserved uabi class") [tursulin: fixed Fixes tag] Reported-by: Alan Previn Teres Alexis Cc: Daniele Ceraolo Spurio Reviewed-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20231201122109.729006-2-tvrtko.ursulin@linux.intel.com (cherry picked from commit cf9cb028ac56696ff879af1154c4b2f0b12701fd) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_reset.c | 2 +- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 5 +++-- drivers/gpu/drm/i915/i915_gpu_error.h | 12 ++++++++++-- 3 files changed, 14 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index d5ed904f355d..6801f8b95c53 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -1293,7 +1293,7 @@ int __intel_engine_reset_bh(struct intel_engine_cs *engine, const char *msg) if (msg) drm_notice(&engine->i915->drm, "Resetting %s for %s\n", engine->name, msg); - atomic_inc(&engine->i915->gpu_error.reset_engine_count[engine->uabi_class]); + i915_increase_reset_engine_count(&engine->i915->gpu_error, engine); ret = intel_gt_reset_engine(engine); if (ret) { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index d37698bd6b91..17df71117cc7 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -5001,7 +5001,8 @@ static void capture_error_state(struct intel_guc *guc, if (match) { intel_engine_set_hung_context(e, ce); engine_mask |= e->mask; - atomic_inc(&i915->gpu_error.reset_engine_count[e->uabi_class]); + i915_increase_reset_engine_count(&i915->gpu_error, + e); } } @@ -5013,7 +5014,7 @@ static void capture_error_state(struct intel_guc *guc, } else { intel_engine_set_hung_context(ce->engine, ce); engine_mask = ce->engine->mask; - atomic_inc(&i915->gpu_error.reset_engine_count[ce->engine->uabi_class]); + i915_increase_reset_engine_count(&i915->gpu_error, ce->engine); } with_intel_runtime_pm(&i915->runtime_pm, wakeref) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index 9f5971f5e980..48f6c00402c4 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -16,6 +16,7 @@ #include "display/intel_display_device.h" #include "gt/intel_engine.h" +#include "gt/intel_engine_types.h" #include "gt/intel_gt_types.h" #include "gt/uc/intel_uc_fw.h" @@ -232,7 +233,7 @@ struct i915_gpu_error { atomic_t reset_count; /** Number of times an engine has been reset */ - atomic_t reset_engine_count[I915_NUM_ENGINES]; + atomic_t reset_engine_count[MAX_ENGINE_CLASS]; }; struct drm_i915_error_state_buf { @@ -255,7 +256,14 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error) static inline u32 i915_reset_engine_count(struct i915_gpu_error *error, const struct intel_engine_cs *engine) { - return atomic_read(&error->reset_engine_count[engine->uabi_class]); + return atomic_read(&error->reset_engine_count[engine->class]); +} + +static inline void +i915_increase_reset_engine_count(struct i915_gpu_error *error, + const struct intel_engine_cs *engine) +{ + atomic_inc(&error->reset_engine_count[engine->class]); } #define CORE_DUMP_FLAG_NONE 0x0 -- cgit v1.2.3 From 0ccd963fe555451b1f84e6d14d2b3ef03dd5c947 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 5 Dec 2023 20:03:08 +0200 Subject: drm/i915: Fix remapped stride with CCS on ADL+ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On ADL+ the hardware automagically calculates the CCS AUX surface stride from the main surface stride, so when remapping we can't really play a lot of tricks with the main surface stride, or else the AUX surface stride would get miscalculated and no longer match the actual data layout in memory. Supposedly we could remap in 256 main surface tile units (AUX page(4096)/cachline(64)*4(4x1 main surface tiles per AUX cacheline)=256 main surface tiles), but the extra complexity is probably not worth the hassle. So let's just make sure our mapping stride is calculated from the full framebuffer stride (instead of the framebuffer width). This way the stride we program into PLANE_STRIDE will be the original framebuffer stride, and thus there will be no change to the AUX stride/layout. Cc: stable@vger.kernel.org Cc: Imre Deak Cc: Juha-Pekka Heikkila Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231205180308.7505-1-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak (cherry picked from commit 2c12eb36f849256f5eb00ffaee9bf99396fd3814) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_fb.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index 19b35ece31f1..0dfc1b06255a 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -1501,8 +1501,20 @@ static u32 calc_plane_remap_info(const struct intel_framebuffer *fb, int color_p size += remap_info->size; } else { - unsigned int dst_stride = plane_view_dst_stride_tiles(fb, color_plane, - remap_info->width); + unsigned int dst_stride; + + /* + * The hardware automagically calculates the CCS AUX surface + * stride from the main surface stride so can't really remap a + * smaller subset (unless we'd remap in whole AUX page units). + */ + if (intel_fb_needs_pot_stride_remap(fb) && + intel_fb_is_ccs_modifier(fb->base.modifier)) + dst_stride = remap_info->src_stride; + else + dst_stride = remap_info->width; + + dst_stride = plane_view_dst_stride_tiles(fb, color_plane, dst_stride); assign_chk_ovf(i915, remap_info->dst_stride, dst_stride); color_plane_info->mapping_stride = dst_stride * -- cgit v1.2.3 From c3070f080f9ba18dea92eaa21730f7ab85b5c8f4 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 7 Dec 2023 21:34:34 +0200 Subject: drm/i915: Fix intel_atomic_setup_scalers() plane_state handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the plane_state variable is declared outside the scaler_users loop in intel_atomic_setup_scalers(), and it's never reset back to NULL inside the loop we may end up calling intel_atomic_setup_scaler() with a non-NULL plane state for the pipe scaling case. That is bad because intel_atomic_setup_scaler() determines whether we are doing plane scaling or pipe scaling based on plane_state!=NULL. The end result is that we may miscalculate the scaler mode for pipe scaling. The hardware becomes somewhat upset if we end up in this situation when scanning out a planar format on a SDR plane. We end up programming the pipe scaler into planar mode as well, and the result is a screenfull of garbage. Fix the situation by making sure we pass the correct plane_state==NULL when calculating the scaler mode for pipe scaling. Cc: stable@vger.kernel.org Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231207193441.20206-2-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula (cherry picked from commit e81144106e21271c619f0c722a09e27ccb8c043d) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/skl_scaler.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/skl_scaler.c b/drivers/gpu/drm/i915/display/skl_scaler.c index 1e7c97243fcf..8a934bada624 100644 --- a/drivers/gpu/drm/i915/display/skl_scaler.c +++ b/drivers/gpu/drm/i915/display/skl_scaler.c @@ -504,7 +504,6 @@ int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, { struct drm_plane *plane = NULL; struct intel_plane *intel_plane; - struct intel_plane_state *plane_state = NULL; struct intel_crtc_scaler_state *scaler_state = &crtc_state->scaler_state; struct drm_atomic_state *drm_state = crtc_state->uapi.state; @@ -536,6 +535,7 @@ int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, /* walkthrough scaler_users bits and start assigning scalers */ for (i = 0; i < sizeof(scaler_state->scaler_users) * 8; i++) { + struct intel_plane_state *plane_state = NULL; int *scaler_id; const char *name; int idx, ret; -- cgit v1.2.3 From 324b70e997aab0a7deab8cb90711faccda4e98c8 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 4 Dec 2023 22:24:43 +0200 Subject: drm/i915: Fix ADL+ tiled plane stride when the POT stride is smaller than the original MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit plane_view_scanout_stride() currently assumes that we had to pad the mapping stride with dummy pages in order to align it. But that is not the case if the original fb stride exceeds the aligned stride used to populate the remapped view, which is calculated from the user specified framebuffer width rather than the user specified framebuffer stride. Ignore the original fb stride in this case and just stick to the POT aligned stride. Getting this wrong will cause the plane to fetch the wrong data, and can lead to fault errors if the page tables at the bogus location aren't even populated. TODO: figure out if this is OK for CCS, or if we should instead increase the width of the view to cover the entire user specified fb stride instead... Cc: Imre Deak Cc: Juha-Pekka Heikkila Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231204202443.31247-1-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak Reviewed-by: Juha-Pekka Heikkila (cherry picked from commit 01a39f1c4f1220a4e6a25729fae87ff5794cbc52) Cc: stable@vger.kernel.org Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_fb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index 0dfc1b06255a..646f367a13f5 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -1374,7 +1374,8 @@ plane_view_scanout_stride(const struct intel_framebuffer *fb, int color_plane, struct drm_i915_private *i915 = to_i915(fb->base.dev); unsigned int stride_tiles; - if (IS_ALDERLAKE_P(i915) || DISPLAY_VER(i915) >= 14) + if ((IS_ALDERLAKE_P(i915) || DISPLAY_VER(i915) >= 14) && + src_stride_tiles < dst_stride_tiles) stride_tiles = src_stride_tiles; else stride_tiles = dst_stride_tiles; -- cgit v1.2.3 From e6861d8264cd43c5eb20196e53df36fd71ec5698 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 5 Dec 2023 20:05:51 +0200 Subject: drm/i915/edp: don't write to DP_LINK_BW_SET when using rate select MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The eDP 1.5 spec adds a clarification for eDP 1.4x: > For eDP v1.4x, if the Source device chooses the Main-Link rate by way > of DPCD 00100h, the Sink device shall ignore DPCD 00115h[2:0]. We write 0 to DP_LINK_BW_SET (DPCD 100h) even when using DP_LINK_RATE_SET (DPCD 114h). Stop doing that, as it can cause the panel to ignore the rate set method. Moreover, 0 is a reserved value for DP_LINK_BW_SET, and should not be used. v2: Improve the comments (Ville) Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/9081 Tested-by: Animesh Manna Reviewed-by: Uma Shankar Cc: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20231205180551.2476228-1-jani.nikula@intel.com (cherry picked from commit 23b392b94acb0499f69706c5808c099f590ebcf4) Cc: stable@vger.kernel.org Signed-off-by: Jani Nikula --- .../gpu/drm/i915/display/intel_dp_link_training.c | 31 +++++++++++++++------- 1 file changed, 21 insertions(+), 10 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c index dbc1b66c8ee4..1abfafbbfa75 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c @@ -650,19 +650,30 @@ intel_dp_update_link_bw_set(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state, u8 link_bw, u8 rate_select) { - u8 link_config[2]; + u8 lane_count = crtc_state->lane_count; - /* Write the link configuration data */ - link_config[0] = link_bw; - link_config[1] = crtc_state->lane_count; if (crtc_state->enhanced_framing) - link_config[1] |= DP_LANE_COUNT_ENHANCED_FRAME_EN; - drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_BW_SET, link_config, 2); + lane_count |= DP_LANE_COUNT_ENHANCED_FRAME_EN; + + if (link_bw) { + /* DP and eDP v1.3 and earlier link bw set method. */ + u8 link_config[] = { link_bw, lane_count }; - /* eDP 1.4 rate select method. */ - if (!link_bw) - drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_RATE_SET, - &rate_select, 1); + drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_BW_SET, link_config, + ARRAY_SIZE(link_config)); + } else { + /* + * eDP v1.4 and later link rate set method. + * + * eDP v1.4x sinks shall ignore DP_LINK_RATE_SET if + * DP_LINK_BW_SET is set. Avoid writing DP_LINK_BW_SET. + * + * eDP v1.5 sinks allow choosing either, and the last choice + * shall be active. + */ + drm_dp_dpcd_writeb(&intel_dp->aux, DP_LANE_COUNT_SET, lane_count); + drm_dp_dpcd_writeb(&intel_dp->aux, DP_LINK_RATE_SET, rate_select); + } } /* -- cgit v1.2.3 From 759f14e20891de72e676d9d738eb2c573aa15f52 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 7 Dec 2023 11:38:21 +0200 Subject: drm/edid: also call add modes in EDID connector update fallback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the separate add modes call was added back in commit c533b5167c7e ("drm/edid: add separate drm_edid_connector_add_modes()"), it failed to address drm_edid_override_connector_update(). Also call add modes there. Reported-by: bbaa Closes: https://lore.kernel.org/r/930E9B4C7D91FDFF+29b34d89-8658-4910-966a-c772f320ea03@bbaa.fun Fixes: c533b5167c7e ("drm/edid: add separate drm_edid_connector_add_modes()") Cc: # v6.3+ Signed-off-by: Jani Nikula Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231207093821.2654267-1-jani.nikula@intel.com --- drivers/gpu/drm/drm_edid.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 39db08f803ea..3b4065099872 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -2309,7 +2309,8 @@ int drm_edid_override_connector_update(struct drm_connector *connector) override = drm_edid_override_get(connector); if (override) { - num_modes = drm_edid_connector_update(connector, override); + if (drm_edid_connector_update(connector, override) == 0) + num_modes = drm_edid_connector_add_modes(connector); drm_edid_free(override); -- cgit v1.2.3 From b6961d187fcd138981b8707dac87b9fcdbfe75d1 Mon Sep 17 00:00:00 2001 From: Stuart Lee Date: Fri, 10 Nov 2023 09:29:14 +0800 Subject: drm/mediatek: Fix access violation in mtk_drm_crtc_dma_dev_get Add error handling to check NULL input in mtk_drm_crtc_dma_dev_get function. While display path is not configured correctly, none of crtc is established. So the caller of mtk_drm_crtc_dma_dev_get may pass input parameter *crtc as NULL, Which may cause coredump when we try to get the container of NULL pointer. Fixes: cb1d6bcca542 ("drm/mediatek: Add dma dev get function") Signed-off-by: Stuart Lee Cc: stable@vger.kernel.org Reviewed-by: AngeloGioacchino DEl Regno Tested-by: Macpaul Lin Link: https://patchwork.kernel.org/project/dri-devel/patch/20231110012914.14884-2-stuart.lee@mediatek.com/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_drm_crtc.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c index 0a7d60c191b8..db43f9dff912 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c @@ -926,7 +926,14 @@ static int mtk_drm_crtc_init_comp_planes(struct drm_device *drm_dev, struct device *mtk_drm_crtc_dma_dev_get(struct drm_crtc *crtc) { - struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc); + struct mtk_drm_crtc *mtk_crtc = NULL; + + if (!crtc) + return NULL; + + mtk_crtc = to_mtk_crtc(crtc); + if (!mtk_crtc) + return NULL; return mtk_crtc->dma_dev; } -- cgit v1.2.3 From 62d35629da807d3a0729980e8012263ff84a0966 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sun, 3 Dec 2023 14:53:15 +0300 Subject: drm/msm/dpu: move encoder status to standard encoder debugfs dir Now as we have standard per-encoder debugfs directory, move DPU encoder status file to that directory. Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/570219/ Link: https://lore.kernel.org/r/20231203115315.1306124-4-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 45 ++++------------------------- 1 file changed, 6 insertions(+), 39 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index 889e9bb42715..250c21a4cfc7 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -186,7 +186,6 @@ struct dpu_encoder_virt { struct drm_crtc *crtc; struct drm_connector *connector; - struct dentry *debugfs_root; struct mutex enc_lock; DECLARE_BITMAP(frame_busy_mask, MAX_PHYS_ENCODERS_PER_VIRTUAL); void (*crtc_frame_event_cb)(void *, u32 event); @@ -2078,7 +2077,8 @@ void dpu_encoder_helper_phys_cleanup(struct dpu_encoder_phys *phys_enc) #ifdef CONFIG_DEBUG_FS static int _dpu_encoder_status_show(struct seq_file *s, void *data) { - struct dpu_encoder_virt *dpu_enc = s->private; + struct drm_encoder *drm_enc = s->private; + struct dpu_encoder_virt *dpu_enc = to_dpu_encoder_virt(drm_enc); int i; mutex_lock(&dpu_enc->enc_lock); @@ -2101,48 +2101,16 @@ static int _dpu_encoder_status_show(struct seq_file *s, void *data) DEFINE_SHOW_ATTRIBUTE(_dpu_encoder_status); -static int _dpu_encoder_init_debugfs(struct drm_encoder *drm_enc) +static void dpu_encoder_debugfs_init(struct drm_encoder *drm_enc, struct dentry *root) { - struct dpu_encoder_virt *dpu_enc = to_dpu_encoder_virt(drm_enc); - - char name[12]; - - if (!drm_enc->dev) { - DPU_ERROR("invalid encoder or kms\n"); - return -EINVAL; - } - - snprintf(name, sizeof(name), "encoder%u", drm_enc->base.id); - - /* create overall sub-directory for the encoder */ - dpu_enc->debugfs_root = debugfs_create_dir(name, - drm_enc->dev->primary->debugfs_root); - /* don't error check these */ debugfs_create_file("status", 0600, - dpu_enc->debugfs_root, dpu_enc, &_dpu_encoder_status_fops); - - return 0; + root, drm_enc, &_dpu_encoder_status_fops); } #else -static int _dpu_encoder_init_debugfs(struct drm_encoder *drm_enc) -{ - return 0; -} +#define dpu_encoder_debugfs_init NULL #endif -static int dpu_encoder_late_register(struct drm_encoder *encoder) -{ - return _dpu_encoder_init_debugfs(encoder); -} - -static void dpu_encoder_early_unregister(struct drm_encoder *encoder) -{ - struct dpu_encoder_virt *dpu_enc = to_dpu_encoder_virt(encoder); - - debugfs_remove_recursive(dpu_enc->debugfs_root); -} - static int dpu_encoder_virt_add_phys_encs( struct drm_device *dev, struct msm_display_info *disp_info, @@ -2329,8 +2297,7 @@ static const struct drm_encoder_helper_funcs dpu_encoder_helper_funcs = { }; static const struct drm_encoder_funcs dpu_encoder_funcs = { - .late_register = dpu_encoder_late_register, - .early_unregister = dpu_encoder_early_unregister, + .debugfs_init = dpu_encoder_debugfs_init, }; struct drm_encoder *dpu_encoder_init(struct drm_device *dev, -- cgit v1.2.3 From 9fd2fbaabdb9dba947d1c14e5f4f217bc21afc34 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 11 Dec 2023 11:28:30 -0500 Subject: drm/amdgpu: fix buffer funcs setting order on suspend harder Part of commit dab96d8b61aa ("drm/amdgpu: fix buffer funcs setting order on suspend") got dropped accidently. Add it back. Fixes: dab96d8b61aa ("drm/amdgpu: fix buffer funcs setting order on suspend") Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 1f64d8cbb14d..8dee52ce26d0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4516,8 +4516,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) amdgpu_ras_suspend(adev); - amdgpu_ttm_set_buffer_funcs_status(adev, false); - amdgpu_device_ip_suspend_phase1(adev); if (!adev->in_s0ix) -- cgit v1.2.3 From dbfbf4740e40fbd39ceeb5c42ab301ac2edd7a9f Mon Sep 17 00:00:00 2001 From: Dmitrii Galantsev Date: Wed, 6 Dec 2023 02:04:52 -0600 Subject: drm/amd/pm: fix pp_*clk_od typo Fix pp_dpm_sclk_od and pp_dpm_mclk_od typos. Those were defined as pp_*clk_od but used as pp_dpm_*clk_od instead. This change removes the _dpm part. Fixes: 8cfd6a05750c ("drm/amd/pm: Hide irrelevant pm device attributes") Signed-off-by: Dmitrii Galantsev Reviewed-by: Lijo Lazar Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/amdgpu_pm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index ca2ece24e1e0..49028dde0f87 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -2198,10 +2198,10 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ } else if (DEVICE_ATTR_IS(xgmi_plpd_policy)) { if (amdgpu_dpm_get_xgmi_plpd_mode(adev, NULL) == XGMI_PLPD_NONE) *states = ATTR_STATE_UNSUPPORTED; - } else if (DEVICE_ATTR_IS(pp_dpm_mclk_od)) { + } else if (DEVICE_ATTR_IS(pp_mclk_od)) { if (amdgpu_dpm_get_mclk_od(adev) == -EOPNOTSUPP) *states = ATTR_STATE_UNSUPPORTED; - } else if (DEVICE_ATTR_IS(pp_dpm_sclk_od)) { + } else if (DEVICE_ATTR_IS(pp_sclk_od)) { if (amdgpu_dpm_get_sclk_od(adev) == -EOPNOTSUPP) *states = ATTR_STATE_UNSUPPORTED; } else if (DEVICE_ATTR_IS(apu_thermal_cap)) { -- cgit v1.2.3 From f528ee145bd0076cd0ed7e7b2d435893e6329e98 Mon Sep 17 00:00:00 2001 From: Hamza Mahfooz Date: Tue, 5 Dec 2023 14:55:04 -0500 Subject: drm/amd/display: fix hw rotated modes when PSR-SU is enabled We currently don't support dirty rectangles on hardware rotated modes. So, if a user is using hardware rotated modes with PSR-SU enabled, use PSR-SU FFU for all rotated planes (including cursor planes). Cc: stable@vger.kernel.org Fixes: 30ebe41582d1 ("drm/amd/display: add FB_DAMAGE_CLIPS support") Reported-by: Kai-Heng Feng Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/2952 Tested-by: Kai-Heng Feng Tested-by: Bin Li Reviewed-by: Mario Limonciello Signed-off-by: Hamza Mahfooz Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +++ drivers/gpu/drm/amd/display/dc/dc_hw_types.h | 1 + drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c | 12 ++++++++++-- drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c | 3 ++- 4 files changed, 16 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index b452796fc6d3..c8c00c2a5224 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -5182,6 +5182,9 @@ static void fill_dc_dirty_rects(struct drm_plane *plane, if (plane->type == DRM_PLANE_TYPE_CURSOR) return; + if (new_plane_state->rotation != DRM_MODE_ROTATE_0) + goto ffu; + num_clips = drm_plane_get_damage_clips_count(new_plane_state); clips = drm_plane_get_damage_clips(new_plane_state); diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h index 9649934ea186..e2a3aa8812df 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h @@ -465,6 +465,7 @@ struct dc_cursor_mi_param { struct fixed31_32 v_scale_ratio; enum dc_rotation_angle rotation; bool mirror; + struct dc_stream_state *stream; }; /* IPP related types */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c index 139cf31d2e45..89c3bf0fe0c9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c @@ -1077,8 +1077,16 @@ void hubp2_cursor_set_position( if (src_y_offset < 0) src_y_offset = 0; /* Save necessary cursor info x, y position. w, h is saved in attribute func. */ - hubp->cur_rect.x = src_x_offset + param->viewport.x; - hubp->cur_rect.y = src_y_offset + param->viewport.y; + if (param->stream->link->psr_settings.psr_version >= DC_PSR_VERSION_SU_1 && + param->rotation != ROTATION_ANGLE_0) { + hubp->cur_rect.x = 0; + hubp->cur_rect.y = 0; + hubp->cur_rect.w = param->stream->timing.h_addressable; + hubp->cur_rect.h = param->stream->timing.v_addressable; + } else { + hubp->cur_rect.x = src_x_offset + param->viewport.x; + hubp->cur_rect.y = src_y_offset + param->viewport.y; + } } void hubp2_clk_cntl(struct hubp *hubp, bool enable) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index 2b8b8366538e..cdb903116eb7 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -3417,7 +3417,8 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx) .h_scale_ratio = pipe_ctx->plane_res.scl_data.ratios.horz, .v_scale_ratio = pipe_ctx->plane_res.scl_data.ratios.vert, .rotation = pipe_ctx->plane_state->rotation, - .mirror = pipe_ctx->plane_state->horizontal_mirror + .mirror = pipe_ctx->plane_state->horizontal_mirror, + .stream = pipe_ctx->stream, }; bool pipe_split_on = false; bool odm_combine_on = (pipe_ctx->next_odm_pipe != NULL) || -- cgit v1.2.3 From b96ab339ee50470d13a1faa6ad94d2218a7cd49f Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 6 Dec 2023 12:08:26 -0600 Subject: drm/amd/display: Restore guard against default backlight value < 1 nit Mark reports that brightness is not restored after Xorg dpms screen blank. This behavior was introduced by commit d9e865826c20 ("drm/amd/display: Simplify brightness initialization") which dropped the cached backlight value in display code, but also removed code for when the default value read back was less than 1 nit. Restore this code so that the backlight brightness is restored to the correct default value in this circumstance. Reported-by: Mark Herbert Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3031 Cc: stable@vger.kernel.org Cc: Camille Cho Cc: Krunoslav Kovac Cc: Hamza Mahfooz Fixes: d9e865826c20 ("drm/amd/display: Simplify brightness initialization") Acked-by: Alex Deucher Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c index 996e4ee99023..e5cfaaef70b3 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c @@ -287,8 +287,8 @@ bool set_default_brightness_aux(struct dc_link *link) if (link && link->dpcd_sink_ext_caps.bits.oled == 1) { if (!read_default_bl_aux(link, &default_backlight)) default_backlight = 150000; - // if > 5000, it might be wrong readback - if (default_backlight > 5000000) + // if < 1 nits or > 5000, it might be wrong readback + if (default_backlight < 1000 || default_backlight > 5000000) default_backlight = 150000; return edp_set_backlight_level_nits(link, true, -- cgit v1.2.3 From 15d03119ed215177c52fb5c9edbe184b78263b65 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Fri, 24 Nov 2023 08:53:04 +0100 Subject: drm/i915/display: do not use cursor size reduction on MTL Cursor size reduction is not supported since MTL. Signed-off-by: Andrzej Hajda Reviewed-by: Andi Shyti Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20231124-cur_size_reduction-v1-1-30495dba475f@intel.com --- drivers/gpu/drm/i915/display/intel_display_device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_display_device.h b/drivers/gpu/drm/i915/display/intel_display_device.h index 79e9f1c3e241..fe4268813786 100644 --- a/drivers/gpu/drm/i915/display/intel_display_device.h +++ b/drivers/gpu/drm/i915/display/intel_display_device.h @@ -36,7 +36,7 @@ struct drm_printer; #define HAS_ASYNC_FLIPS(i915) (DISPLAY_VER(i915) >= 5) #define HAS_CDCLK_CRAWL(i915) (DISPLAY_INFO(i915)->has_cdclk_crawl) #define HAS_CDCLK_SQUASH(i915) (DISPLAY_INFO(i915)->has_cdclk_squash) -#define HAS_CUR_FBC(i915) (!HAS_GMCH(i915) && DISPLAY_VER(i915) >= 7) +#define HAS_CUR_FBC(i915) (!HAS_GMCH(i915) && IS_DISPLAY_VER(i915, 7, 13)) #define HAS_D12_PLANE_MINIMIZATION(i915) (IS_ROCKETLAKE(i915) || IS_ALDERLAKE_S(i915)) #define HAS_DDI(i915) (DISPLAY_INFO(i915)->has_ddi) #define HAS_DISPLAY(i915) (DISPLAY_RUNTIME_INFO(i915)->pipe_mask != 0) -- cgit v1.2.3 From 16ac5b21b31b439f03cdf44c153c5f5af94fb3eb Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 21 Sep 2023 12:26:52 -0700 Subject: drm/exynos: Call drm_atomic_helper_shutdown() at shutdown/unbind time Based on grepping through the source code this driver appears to be missing a call to drm_atomic_helper_shutdown() at system shutdown time and at driver unbind time. Among other things, this means that if a panel is in use that it won't be cleanly powered off at system shutdown time. The fact that we should call drm_atomic_helper_shutdown() in the case of OS shutdown/restart and at driver remove (or unbind) time comes straight out of the kernel doc "driver instance overview" in drm_drv.c. A few notes about this fix: - When adding drm_atomic_helper_shutdown() to the unbind path, I added it after drm_kms_helper_poll_fini() since that's when other drivers seemed to have it. - Technically with a previous patch, ("drm/atomic-helper: drm_atomic_helper_shutdown(NULL) should be a noop"), we don't actually need to check to see if our "drm" pointer is NULL before calling drm_atomic_helper_shutdown(). We'll leave the "if" test in, though, so that this patch can land without any dependencies. It could potentially be removed later. - This patch also makes sure to set the drvdata to NULL in the case of bind errors to make sure that shutdown can't access freed data. Suggested-by: Maxime Ripard Reviewed-by: Maxime Ripard Signed-off-by: Douglas Anderson Tested-by: Marek Szyprowski Reviewed-by: Marek Szyprowski Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_drv.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c index 8399256cb5c9..5380fb6c55ae 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.c +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c @@ -300,6 +300,7 @@ err_mode_config_cleanup: drm_mode_config_cleanup(drm); exynos_drm_cleanup_dma(drm); kfree(private); + dev_set_drvdata(dev, NULL); err_free_drm: drm_dev_put(drm); @@ -313,6 +314,7 @@ static void exynos_drm_unbind(struct device *dev) drm_dev_unregister(drm); drm_kms_helper_poll_fini(drm); + drm_atomic_helper_shutdown(drm); component_unbind_all(drm->dev, drm); drm_mode_config_cleanup(drm); @@ -350,9 +352,18 @@ static int exynos_drm_platform_remove(struct platform_device *pdev) return 0; } +static void exynos_drm_platform_shutdown(struct platform_device *pdev) +{ + struct drm_device *drm = platform_get_drvdata(pdev); + + if (drm) + drm_atomic_helper_shutdown(drm); +} + static struct platform_driver exynos_drm_platform_driver = { .probe = exynos_drm_platform_probe, .remove = exynos_drm_platform_remove, + .shutdown = exynos_drm_platform_shutdown, .driver = { .name = "exynos-drm", .pm = &exynos_drm_pm_ops, -- cgit v1.2.3 From 4fe7a1ecaa4104c58c58a12ec29f24894381bdcc Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Wed, 8 Nov 2023 13:09:12 +0900 Subject: drm/exynos: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is ignored (apart from emitting a warning) and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new(), which already returns void. Eventually after all drivers are converted, .remove_new() will be renamed to .remove(). Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König fix merge conflict and drop duplicated patch description. Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 6 ++---- drivers/gpu/drm/exynos/exynos7_drm_decon.c | 6 ++---- drivers/gpu/drm/exynos/exynos_dp.c | 6 ++---- drivers/gpu/drm/exynos/exynos_drm_drv.c | 5 ++--- drivers/gpu/drm/exynos/exynos_drm_fimc.c | 6 ++---- drivers/gpu/drm/exynos/exynos_drm_fimd.c | 6 ++---- drivers/gpu/drm/exynos/exynos_drm_g2d.c | 6 ++---- drivers/gpu/drm/exynos/exynos_drm_gsc.c | 6 ++---- drivers/gpu/drm/exynos/exynos_drm_mic.c | 6 ++---- drivers/gpu/drm/exynos/exynos_drm_rotator.c | 6 ++---- drivers/gpu/drm/exynos/exynos_drm_scaler.c | 6 ++---- drivers/gpu/drm/exynos/exynos_drm_vidi.c | 6 ++---- drivers/gpu/drm/exynos/exynos_hdmi.c | 6 ++---- drivers/gpu/drm/exynos/exynos_mixer.c | 6 ++---- 14 files changed, 28 insertions(+), 55 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c index 4d986077738b..776f2f0b602d 100644 --- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c @@ -862,18 +862,16 @@ err_disable_pm_runtime: return ret; } -static int exynos5433_decon_remove(struct platform_device *pdev) +static void exynos5433_decon_remove(struct platform_device *pdev) { pm_runtime_disable(&pdev->dev); component_del(&pdev->dev, &decon_component_ops); - - return 0; } struct platform_driver exynos5433_decon_driver = { .probe = exynos5433_decon_probe, - .remove = exynos5433_decon_remove, + .remove_new = exynos5433_decon_remove, .driver = { .name = "exynos5433-decon", .pm = pm_ptr(&exynos5433_decon_pm_ops), diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c index 0156a5e94435..0d185c0564b9 100644 --- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c @@ -765,7 +765,7 @@ err_iounmap: return ret; } -static int decon_remove(struct platform_device *pdev) +static void decon_remove(struct platform_device *pdev) { struct decon_context *ctx = dev_get_drvdata(&pdev->dev); @@ -774,8 +774,6 @@ static int decon_remove(struct platform_device *pdev) iounmap(ctx->regs); component_del(&pdev->dev, &decon_component_ops); - - return 0; } static int exynos7_decon_suspend(struct device *dev) @@ -840,7 +838,7 @@ static DEFINE_RUNTIME_DEV_PM_OPS(exynos7_decon_pm_ops, exynos7_decon_suspend, struct platform_driver decon_driver = { .probe = decon_probe, - .remove = decon_remove, + .remove_new = decon_remove, .driver = { .name = "exynos-decon", .pm = pm_ptr(&exynos7_decon_pm_ops), diff --git a/drivers/gpu/drm/exynos/exynos_dp.c b/drivers/gpu/drm/exynos/exynos_dp.c index 3404ec1367fb..ca31bad6c576 100644 --- a/drivers/gpu/drm/exynos/exynos_dp.c +++ b/drivers/gpu/drm/exynos/exynos_dp.c @@ -250,14 +250,12 @@ out: return component_add(&pdev->dev, &exynos_dp_ops); } -static int exynos_dp_remove(struct platform_device *pdev) +static void exynos_dp_remove(struct platform_device *pdev) { struct exynos_dp_device *dp = platform_get_drvdata(pdev); component_del(&pdev->dev, &exynos_dp_ops); analogix_dp_remove(dp->adp); - - return 0; } static int exynos_dp_suspend(struct device *dev) @@ -285,7 +283,7 @@ MODULE_DEVICE_TABLE(of, exynos_dp_match); struct platform_driver dp_driver = { .probe = exynos_dp_probe, - .remove = exynos_dp_remove, + .remove_new = exynos_dp_remove, .driver = { .name = "exynos-dp", .owner = THIS_MODULE, diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c index 5380fb6c55ae..7c59e1164a48 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.c +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c @@ -346,10 +346,9 @@ static int exynos_drm_platform_probe(struct platform_device *pdev) match); } -static int exynos_drm_platform_remove(struct platform_device *pdev) +static void exynos_drm_platform_remove(struct platform_device *pdev) { component_master_del(&pdev->dev, &exynos_drm_ops); - return 0; } static void exynos_drm_platform_shutdown(struct platform_device *pdev) @@ -362,7 +361,7 @@ static void exynos_drm_platform_shutdown(struct platform_device *pdev) static struct platform_driver exynos_drm_platform_driver = { .probe = exynos_drm_platform_probe, - .remove = exynos_drm_platform_remove, + .remove_new = exynos_drm_platform_remove, .shutdown = exynos_drm_platform_shutdown, .driver = { .name = "exynos-drm", diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.c b/drivers/gpu/drm/exynos/exynos_drm_fimc.c index 8de2714599fc..e81a576de398 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimc.c @@ -1367,7 +1367,7 @@ err_pm_dis: return ret; } -static int fimc_remove(struct platform_device *pdev) +static void fimc_remove(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct fimc_context *ctx = get_fimc_context(dev); @@ -1377,8 +1377,6 @@ static int fimc_remove(struct platform_device *pdev) pm_runtime_disable(dev); fimc_put_clocks(ctx); - - return 0; } static int fimc_runtime_suspend(struct device *dev) @@ -1410,7 +1408,7 @@ MODULE_DEVICE_TABLE(of, fimc_of_match); struct platform_driver fimc_driver = { .probe = fimc_probe, - .remove = fimc_remove, + .remove_new = fimc_remove, .driver = { .of_match_table = fimc_of_match, .name = "exynos-drm-fimc", diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c index 8dde7b1e9b35..a9f1c5c05894 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c @@ -1277,13 +1277,11 @@ err_disable_pm_runtime: return ret; } -static int fimd_remove(struct platform_device *pdev) +static void fimd_remove(struct platform_device *pdev) { pm_runtime_disable(&pdev->dev); component_del(&pdev->dev, &fimd_component_ops); - - return 0; } static int exynos_fimd_suspend(struct device *dev) @@ -1325,7 +1323,7 @@ static DEFINE_RUNTIME_DEV_PM_OPS(exynos_fimd_pm_ops, exynos_fimd_suspend, struct platform_driver fimd_driver = { .probe = fimd_probe, - .remove = fimd_remove, + .remove_new = fimd_remove, .driver = { .name = "exynos4-fb", .owner = THIS_MODULE, diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c index 414e585ec7dd..f3138423612e 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c +++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c @@ -1530,7 +1530,7 @@ err_destroy_slab: return ret; } -static int g2d_remove(struct platform_device *pdev) +static void g2d_remove(struct platform_device *pdev) { struct g2d_data *g2d = platform_get_drvdata(pdev); @@ -1545,8 +1545,6 @@ static int g2d_remove(struct platform_device *pdev) g2d_fini_cmdlist(g2d); destroy_workqueue(g2d->g2d_workq); kmem_cache_destroy(g2d->runqueue_slab); - - return 0; } static int g2d_suspend(struct device *dev) @@ -1609,7 +1607,7 @@ MODULE_DEVICE_TABLE(of, exynos_g2d_match); struct platform_driver g2d_driver = { .probe = g2d_probe, - .remove = g2d_remove, + .remove_new = g2d_remove, .driver = { .name = "exynos-drm-g2d", .owner = THIS_MODULE, diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c index 35771fb4e85d..e9a769590415 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c @@ -1309,15 +1309,13 @@ err_pm_dis: return ret; } -static int gsc_remove(struct platform_device *pdev) +static void gsc_remove(struct platform_device *pdev) { struct device *dev = &pdev->dev; component_del(dev, &gsc_component_ops); pm_runtime_dont_use_autosuspend(dev); pm_runtime_disable(dev); - - return 0; } static int __maybe_unused gsc_runtime_suspend(struct device *dev) @@ -1422,7 +1420,7 @@ MODULE_DEVICE_TABLE(of, exynos_drm_gsc_of_match); struct platform_driver gsc_driver = { .probe = gsc_probe, - .remove = gsc_remove, + .remove_new = gsc_remove, .driver = { .name = "exynos-drm-gsc", .owner = THIS_MODULE, diff --git a/drivers/gpu/drm/exynos/exynos_drm_mic.c b/drivers/gpu/drm/exynos/exynos_drm_mic.c index 17bab5b1663f..e2920960180f 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_mic.c +++ b/drivers/gpu/drm/exynos/exynos_drm_mic.c @@ -442,7 +442,7 @@ err: return ret; } -static int exynos_mic_remove(struct platform_device *pdev) +static void exynos_mic_remove(struct platform_device *pdev) { struct exynos_mic *mic = platform_get_drvdata(pdev); @@ -450,8 +450,6 @@ static int exynos_mic_remove(struct platform_device *pdev) pm_runtime_disable(&pdev->dev); drm_bridge_remove(&mic->bridge); - - return 0; } static const struct of_device_id exynos_mic_of_match[] = { @@ -462,7 +460,7 @@ MODULE_DEVICE_TABLE(of, exynos_mic_of_match); struct platform_driver mic_driver = { .probe = exynos_mic_probe, - .remove = exynos_mic_remove, + .remove_new = exynos_mic_remove, .driver = { .name = "exynos-mic", .pm = pm_ptr(&exynos_mic_pm_ops), diff --git a/drivers/gpu/drm/exynos/exynos_drm_rotator.c b/drivers/gpu/drm/exynos/exynos_drm_rotator.c index ffb327c5139e..5f7516655b08 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_rotator.c +++ b/drivers/gpu/drm/exynos/exynos_drm_rotator.c @@ -329,15 +329,13 @@ err_component: return ret; } -static int rotator_remove(struct platform_device *pdev) +static void rotator_remove(struct platform_device *pdev) { struct device *dev = &pdev->dev; component_del(dev, &rotator_component_ops); pm_runtime_dont_use_autosuspend(dev); pm_runtime_disable(dev); - - return 0; } static int rotator_runtime_suspend(struct device *dev) @@ -453,7 +451,7 @@ static DEFINE_RUNTIME_DEV_PM_OPS(rotator_pm_ops, rotator_runtime_suspend, struct platform_driver rotator_driver = { .probe = rotator_probe, - .remove = rotator_remove, + .remove_new = rotator_remove, .driver = { .name = "exynos-rotator", .owner = THIS_MODULE, diff --git a/drivers/gpu/drm/exynos/exynos_drm_scaler.c b/drivers/gpu/drm/exynos/exynos_drm_scaler.c index f2b8b09a6b4e..392f721f13ab 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_scaler.c +++ b/drivers/gpu/drm/exynos/exynos_drm_scaler.c @@ -539,15 +539,13 @@ err_ippdrv_register: return ret; } -static int scaler_remove(struct platform_device *pdev) +static void scaler_remove(struct platform_device *pdev) { struct device *dev = &pdev->dev; component_del(dev, &scaler_component_ops); pm_runtime_dont_use_autosuspend(dev); pm_runtime_disable(dev); - - return 0; } static int clk_disable_unprepare_wrapper(struct clk *clk) @@ -721,7 +719,7 @@ MODULE_DEVICE_TABLE(of, exynos_scaler_match); struct platform_driver scaler_driver = { .probe = scaler_probe, - .remove = scaler_remove, + .remove_new = scaler_remove, .driver = { .name = "exynos-scaler", .owner = THIS_MODULE, diff --git a/drivers/gpu/drm/exynos/exynos_drm_vidi.c b/drivers/gpu/drm/exynos/exynos_drm_vidi.c index f5e1adfcaa51..00382f28748a 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_vidi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_vidi.c @@ -462,7 +462,7 @@ static int vidi_probe(struct platform_device *pdev) return component_add(dev, &vidi_component_ops); } -static int vidi_remove(struct platform_device *pdev) +static void vidi_remove(struct platform_device *pdev) { struct vidi_context *ctx = platform_get_drvdata(pdev); @@ -472,13 +472,11 @@ static int vidi_remove(struct platform_device *pdev) } component_del(&pdev->dev, &vidi_component_ops); - - return 0; } struct platform_driver vidi_driver = { .probe = vidi_probe, - .remove = vidi_remove, + .remove_new = vidi_remove, .driver = { .name = "exynos-drm-vidi", .owner = THIS_MODULE, diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c index dd9903eab563..43bed6cbaaea 100644 --- a/drivers/gpu/drm/exynos/exynos_hdmi.c +++ b/drivers/gpu/drm/exynos/exynos_hdmi.c @@ -2069,7 +2069,7 @@ err_ddc: return ret; } -static int hdmi_remove(struct platform_device *pdev) +static void hdmi_remove(struct platform_device *pdev) { struct hdmi_context *hdata = platform_get_drvdata(pdev); @@ -2092,8 +2092,6 @@ static int hdmi_remove(struct platform_device *pdev) put_device(&hdata->ddc_adpt->dev); mutex_destroy(&hdata->mutex); - - return 0; } static int __maybe_unused exynos_hdmi_suspend(struct device *dev) @@ -2125,7 +2123,7 @@ static const struct dev_pm_ops exynos_hdmi_pm_ops = { struct platform_driver hdmi_driver = { .probe = hdmi_probe, - .remove = hdmi_remove, + .remove_new = hdmi_remove, .driver = { .name = "exynos-hdmi", .owner = THIS_MODULE, diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c index b302392ff0d7..6822333fd0e6 100644 --- a/drivers/gpu/drm/exynos/exynos_mixer.c +++ b/drivers/gpu/drm/exynos/exynos_mixer.c @@ -1258,13 +1258,11 @@ static int mixer_probe(struct platform_device *pdev) return ret; } -static int mixer_remove(struct platform_device *pdev) +static void mixer_remove(struct platform_device *pdev) { pm_runtime_disable(&pdev->dev); component_del(&pdev->dev, &mixer_component_ops); - - return 0; } static int __maybe_unused exynos_mixer_suspend(struct device *dev) @@ -1338,5 +1336,5 @@ struct platform_driver mixer_driver = { .of_match_table = mixer_match_types, }, .probe = mixer_probe, - .remove = mixer_remove, + .remove_new = mixer_remove, }; -- cgit v1.2.3 From ead5a41c8f8a13ad7b1c9fd2d7edb1ea909b777f Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Wed, 1 Nov 2023 21:17:31 +0100 Subject: drm/exynos: dpi: Change connector type to DPI When exynos_drm_dpi.c was written, DRM_MODE_CONNECTOR_DPI did not exist yet and I guess that's the reason why DRM_MODE_CONNECTOR_VGA was used as the connector type. However, now it makes more sense to use DRM_MODE_CONNECTOR_DPI as the connector type. Signed-off-by: Paul Cercueil Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_dpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/exynos/exynos_drm_dpi.c b/drivers/gpu/drm/exynos/exynos_drm_dpi.c index 378e5381978f..0dc36df6ada3 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_dpi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_dpi.c @@ -101,7 +101,7 @@ static int exynos_dpi_create_connector(struct drm_encoder *encoder) ret = drm_connector_init(encoder->dev, connector, &exynos_dpi_connector_funcs, - DRM_MODE_CONNECTOR_VGA); + DRM_MODE_CONNECTOR_DPI); if (ret) { DRM_DEV_ERROR(ctx->dev, "failed to initialize connector with drm\n"); -- cgit v1.2.3 From bf9cd9fef9f15531680325f956f81317d46a159d Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 7 Dec 2023 14:03:14 -0400 Subject: iommu/tegra: Use tegra_dev_iommu_get_stream_id() in the remaining places This API was defined to formalize the access to internal iommu details on some Tegra SOCs, but a few callers got missed. Add them. The helper already masks by 0xFFFF so remove this code from the callers. Suggested-by: Thierry Reding Reviewed-by: Thierry Reding Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/7-v2-16e4def25ebb+820-iommu_fwspec_p1_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/dma/tegra186-gpc-dma.c | 8 +++----- drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp10b.c | 9 ++------- drivers/memory/tegra/tegra186.c | 14 ++++++++------ 3 files changed, 13 insertions(+), 18 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/dma/tegra186-gpc-dma.c b/drivers/dma/tegra186-gpc-dma.c index fa4d4142a68a..88547a23825b 100644 --- a/drivers/dma/tegra186-gpc-dma.c +++ b/drivers/dma/tegra186-gpc-dma.c @@ -1348,8 +1348,8 @@ static int tegra_dma_program_sid(struct tegra_dma_channel *tdc, int stream_id) static int tegra_dma_probe(struct platform_device *pdev) { const struct tegra_dma_chip_data *cdata = NULL; - struct iommu_fwspec *iommu_spec; - unsigned int stream_id, i; + unsigned int i; + u32 stream_id; struct tegra_dma *tdma; int ret; @@ -1378,12 +1378,10 @@ static int tegra_dma_probe(struct platform_device *pdev) tdma->dma_dev.dev = &pdev->dev; - iommu_spec = dev_iommu_fwspec_get(&pdev->dev); - if (!iommu_spec) { + if (!tegra_dev_iommu_get_stream_id(&pdev->dev, &stream_id)) { dev_err(&pdev->dev, "Missing iommu stream-id\n"); return -EINVAL; } - stream_id = iommu_spec->ids[0] & 0xffff; ret = device_property_read_u32(&pdev->dev, "dma-channel-mask", &tdma->chan_mask); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp10b.c index e7e8fdf3adab..29682722b0b3 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp10b.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp10b.c @@ -28,19 +28,14 @@ static void gp10b_ltc_init(struct nvkm_ltc *ltc) { struct nvkm_device *device = ltc->subdev.device; - struct iommu_fwspec *spec; + u32 sid; nvkm_wr32(device, 0x17e27c, ltc->ltc_nr); nvkm_wr32(device, 0x17e000, ltc->ltc_nr); nvkm_wr32(device, 0x100800, ltc->ltc_nr); - spec = dev_iommu_fwspec_get(device->dev); - if (spec) { - u32 sid = spec->ids[0] & 0xffff; - - /* stream ID */ + if (tegra_dev_iommu_get_stream_id(device->dev, &sid)) nvkm_wr32(device, 0x160000, sid << 2); - } } static const struct nvkm_ltc_func diff --git a/drivers/memory/tegra/tegra186.c b/drivers/memory/tegra/tegra186.c index 533f85a4b2bd..9cbf22a10a82 100644 --- a/drivers/memory/tegra/tegra186.c +++ b/drivers/memory/tegra/tegra186.c @@ -111,9 +111,12 @@ static void tegra186_mc_client_sid_override(struct tegra_mc *mc, static int tegra186_mc_probe_device(struct tegra_mc *mc, struct device *dev) { #if IS_ENABLED(CONFIG_IOMMU_API) - struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct of_phandle_args args; unsigned int i, index = 0; + u32 sid; + + if (!tegra_dev_iommu_get_stream_id(dev, &sid)) + return 0; while (!of_parse_phandle_with_args(dev->of_node, "interconnects", "#interconnect-cells", index, &args)) { @@ -121,11 +124,10 @@ static int tegra186_mc_probe_device(struct tegra_mc *mc, struct device *dev) for (i = 0; i < mc->soc->num_clients; i++) { const struct tegra_mc_client *client = &mc->soc->clients[i]; - if (client->id == args.args[0]) { - u32 sid = fwspec->ids[0] & MC_SID_STREAMID_OVERRIDE_MASK; - - tegra186_mc_client_sid_override(mc, client, sid); - } + if (client->id == args.args[0]) + tegra186_mc_client_sid_override( + mc, client, + sid & MC_SID_STREAMID_OVERRIDE_MASK); } } -- cgit v1.2.3 From 19544aa5f5ece80b12315fa68e51fb2ba6f01fa4 Mon Sep 17 00:00:00 2001 From: Saleemkhan Jamadar Date: Tue, 28 Nov 2023 17:02:06 +0530 Subject: drm/amdgpu/jpeg: configure doorbell for each playback Doorbell is configured during start of each playback. v1 - add comment for the doorbell programming change Signed-off-by: Saleemkhan Jamadar Acked-by: Leo Liu Reviewed-by: Veerabadhran Gopalakrishnan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c index 9df011323d4b..6ede85b28cc8 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c @@ -155,13 +155,6 @@ static int jpeg_v4_0_5_hw_init(void *handle) struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; int r; - adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, - (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0); - - WREG32_SOC15(VCN, 0, regVCN_JPEG_DB_CTRL, - ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT | - VCN_JPEG_DB_CTRL__EN_MASK); - r = amdgpu_ring_test_helper(ring); if (r) return r; @@ -336,6 +329,14 @@ static int jpeg_v4_0_5_start(struct amdgpu_device *adev) if (adev->pm.dpm_enabled) amdgpu_dpm_enable_jpeg(adev, true); + /* doorbell programming is done for every playback */ + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0); + + WREG32_SOC15(VCN, 0, regVCN_JPEG_DB_CTRL, + ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT | + VCN_JPEG_DB_CTRL__EN_MASK); + /* disable power gating */ r = jpeg_v4_0_5_disable_static_power_gating(adev); if (r) -- cgit v1.2.3 From a409c053b0b0cc0fc1af684d0b23bd5ca010c4cb Mon Sep 17 00:00:00 2001 From: Taimur Hassan Date: Wed, 6 Dec 2023 14:52:25 -0500 Subject: drm/amd/display: Revert "Fix conversions between bytes and KB" [Why & How] HostVMMinPageSize is expected to be in KB according to spec, the checks later down the line reflect this as well. Reviewed-by: Nicholas Kazlauskas Acked-by: Aurabindo Pillai Signed-off-by: Taimur Hassan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c index 4d1336e5afc2..180f8a98a361 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c @@ -6329,7 +6329,7 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib) mode_lib->ms.NoOfDPPThisState, mode_lib->ms.dpte_group_bytes, s->HostVMInefficiencyFactor, - mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, + mode_lib->ms.soc.hostvm_min_page_size_kbytes, mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels); s->NextMaxVStartup = s->MaxVStartupAllPlanes[j]; @@ -6542,7 +6542,7 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib) mode_lib->ms.cache_display_cfg.plane.HostVMEnable, mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels, mode_lib->ms.cache_display_cfg.plane.GPUVMEnable, - mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, + mode_lib->ms.soc.hostvm_min_page_size_kbytes, mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k], mode_lib->ms.MetaRowBytes[j][k], mode_lib->ms.DPTEBytesPerRow[j][k], @@ -7687,7 +7687,7 @@ dml_bool_t dml_core_mode_support(struct display_mode_lib_st *mode_lib) CalculateVMRowAndSwath_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; CalculateVMRowAndSwath_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; CalculateVMRowAndSwath_params->GPUVMMinPageSizeKBytes = mode_lib->ms.cache_display_cfg.plane.GPUVMMinPageSizeKBytes; - CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; + CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes; CalculateVMRowAndSwath_params->PTEBufferModeOverrideEn = mode_lib->ms.cache_display_cfg.plane.PTEBufferModeOverrideEn; CalculateVMRowAndSwath_params->PTEBufferModeOverrideVal = mode_lib->ms.cache_display_cfg.plane.PTEBufferMode; CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = mode_lib->ms.PTEBufferSizeNotExceededPerState; @@ -7957,7 +7957,7 @@ dml_bool_t dml_core_mode_support(struct display_mode_lib_st *mode_lib) UseMinimumDCFCLK_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; UseMinimumDCFCLK_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable; UseMinimumDCFCLK_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; - UseMinimumDCFCLK_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; + UseMinimumDCFCLK_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes; UseMinimumDCFCLK_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; UseMinimumDCFCLK_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled; UseMinimumDCFCLK_params->ImmediateFlipRequirement = s->ImmediateFlipRequiredFinal; @@ -8699,7 +8699,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc CalculateVMRowAndSwath_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; CalculateVMRowAndSwath_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; CalculateVMRowAndSwath_params->GPUVMMinPageSizeKBytes = mode_lib->ms.cache_display_cfg.plane.GPUVMMinPageSizeKBytes; - CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; + CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes; CalculateVMRowAndSwath_params->PTEBufferModeOverrideEn = mode_lib->ms.cache_display_cfg.plane.PTEBufferModeOverrideEn; CalculateVMRowAndSwath_params->PTEBufferModeOverrideVal = mode_lib->ms.cache_display_cfg.plane.PTEBufferMode; CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = s->dummy_boolean_array[0]; @@ -8805,7 +8805,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc mode_lib->ms.cache_display_cfg.hw.DPPPerSurface, locals->dpte_group_bytes, s->HostVMInefficiencyFactor, - mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, + mode_lib->ms.soc.hostvm_min_page_size_kbytes, mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels); locals->TCalc = 24.0 / locals->DCFCLKDeepSleep; @@ -8995,7 +8995,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc CalculatePrefetchSchedule_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable; CalculatePrefetchSchedule_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable; CalculatePrefetchSchedule_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; - CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; + CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes; CalculatePrefetchSchedule_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k]; CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled; CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired[k]; @@ -9240,7 +9240,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc mode_lib->ms.cache_display_cfg.plane.HostVMEnable, mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels, mode_lib->ms.cache_display_cfg.plane.GPUVMEnable, - mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, + mode_lib->ms.soc.hostvm_min_page_size_kbytes, locals->PDEAndMetaPTEBytesFrame[k], locals->MetaRowByte[k], locals->PixelPTEBytesPerRow[k], -- cgit v1.2.3 From fb01ab528df324a140058a11e9b25e5efdf9671d Mon Sep 17 00:00:00 2001 From: Fangzhi Zuo Date: Wed, 6 Dec 2023 14:52:28 -0500 Subject: drm/amd/display: Populate dtbclk from bounding box dtbclk is unavaliable from pmfw. Try to grab the value from bounding box Reviewed-by: Charlene Liu Acked-by: Aurabindo Pillai Signed-off-by: Fangzhi Zuo Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c | 14 +++++++++----- .../gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c | 5 +++-- 2 files changed, 12 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c index 39cf1ae3a3e1..f154a3eb1d1a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c @@ -124,7 +124,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { .phyclk_mhz = 600.0, .phyclk_d18_mhz = 667.0, .dscclk_mhz = 186.0, - .dtbclk_mhz = 625.0, + .dtbclk_mhz = 600.0, }, { .state = 1, @@ -133,7 +133,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { .phyclk_mhz = 810.0, .phyclk_d18_mhz = 667.0, .dscclk_mhz = 209.0, - .dtbclk_mhz = 625.0, + .dtbclk_mhz = 600.0, }, { .state = 2, @@ -142,7 +142,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { .phyclk_mhz = 810.0, .phyclk_d18_mhz = 667.0, .dscclk_mhz = 209.0, - .dtbclk_mhz = 625.0, + .dtbclk_mhz = 600.0, }, { .state = 3, @@ -151,7 +151,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { .phyclk_mhz = 810.0, .phyclk_d18_mhz = 667.0, .dscclk_mhz = 371.0, - .dtbclk_mhz = 625.0, + .dtbclk_mhz = 600.0, }, { .state = 4, @@ -160,7 +160,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { .phyclk_mhz = 810.0, .phyclk_d18_mhz = 667.0, .dscclk_mhz = 417.0, - .dtbclk_mhz = 625.0, + .dtbclk_mhz = 600.0, }, }, .num_states = 5, @@ -348,6 +348,8 @@ void dcn35_update_bw_bounding_box_fpu(struct dc *dc, clock_limits[i].socclk_mhz; dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].memclk_mhz = clk_table->entries[i].memclk_mhz * clk_table->entries[i].wck_ratio; + dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz = + clock_limits[i].dtbclk_mhz; dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dcfclk_levels = clk_table->num_entries; dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_fclk_levels = @@ -360,6 +362,8 @@ void dcn35_update_bw_bounding_box_fpu(struct dc *dc, clk_table->num_entries; dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_memclk_levels = clk_table->num_entries; + dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dtbclk_levels = + clk_table->num_entries; } } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index fa8fe5bf7e57..db06a5b749b4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -423,8 +423,9 @@ void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc, } for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_dtbclk_levels; i++) { - p->in_states->state_array[i].dtbclk_mhz = - dml2->config.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz; + if (dml2->config.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz > 0) + p->in_states->state_array[i].dtbclk_mhz = + dml2->config.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz; } for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_dispclk_levels; i++) { -- cgit v1.2.3 From e7ab758741672acb21c5d841a9f0309d30e48a06 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 19 Jun 2023 15:04:24 -0500 Subject: drm/amd/display: Disable PSR-SU on Parade 0803 TCON again When screen brightness is rapidly changed and PSR-SU is enabled the display hangs on panels with this TCON even on the latest DCN 3.1.4 microcode (0x8002a81 at this time). This was disabled previously as commit 072030b17830 ("drm/amd: Disable PSR-SU on Parade 0803 TCON") but reverted as commit 1e66a17ce546 ("Revert "drm/amd: Disable PSR-SU on Parade 0803 TCON"") in favor of testing for a new enough microcode (commit cd2e31a9ab93 ("drm/amd/display: Set minimum requirement for using PSR-SU on Phoenix")). As hangs are still happening specifically with this TCON, disable PSR-SU again for it until it can be root caused. Cc: stable@vger.kernel.org Cc: aaron.ma@canonical.com Cc: binli@gnome.org Cc: Marc Rossi Cc: Hamza Mahfooz Signed-off-by: Mario Limonciello Link: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/2046131 Acked-by: Alex Deucher Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/modules/power/power_helpers.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c index a522a7c02911..1675314a3ff2 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c @@ -839,6 +839,8 @@ bool is_psr_su_specific_panel(struct dc_link *link) ((dpcd_caps->sink_dev_id_str[1] == 0x08 && dpcd_caps->sink_dev_id_str[0] == 0x08) || (dpcd_caps->sink_dev_id_str[1] == 0x08 && dpcd_caps->sink_dev_id_str[0] == 0x07))) isPSRSUSupported = false; + else if (dpcd_caps->sink_dev_id_str[1] == 0x08 && dpcd_caps->sink_dev_id_str[0] == 0x03) + isPSRSUSupported = false; else if (dpcd_caps->psr_info.force_psrsu_cap == 0x1) isPSRSUSupported = true; } -- cgit v1.2.3 From dd08ebf6c3525a7ea2186e636df064ea47281987 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 30 Mar 2023 17:31:57 -0400 Subject: drm/xe: Introduce a new DRM driver for Intel GPUs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Xe, is a new driver for Intel GPUs that supports both integrated and discrete platforms starting with Tiger Lake (first Intel Xe Architecture). The code is at a stage where it is already functional and has experimental support for multiple platforms starting from Tiger Lake, with initial support implemented in Mesa (for Iris and Anv, our OpenGL and Vulkan drivers), as well as in NEO (for OpenCL and Level0). The new Xe driver leverages a lot from i915. As for display, the intent is to share the display code with the i915 driver so that there is maximum reuse there. But it is not added in this patch. This initial work is a collaboration of many people and unfortunately the big squashed patch won't fully honor the proper credits. But let's get some git quick stats so we can at least try to preserve some of the credits: Co-developed-by: Matthew Brost Co-developed-by: Matthew Auld Co-developed-by: Matt Roper Co-developed-by: Thomas Hellström Co-developed-by: Francois Dugast Co-developed-by: Lucas De Marchi Co-developed-by: Maarten Lankhorst Co-developed-by: Philippe Lecluse Co-developed-by: Nirmoy Das Co-developed-by: Jani Nikula Co-developed-by: José Roberto de Souza Co-developed-by: Rodrigo Vivi Co-developed-by: Dave Airlie Co-developed-by: Faith Ekstrand Co-developed-by: Daniel Vetter Co-developed-by: Mauro Carvalho Chehab Signed-off-by: Rodrigo Vivi Signed-off-by: Matthew Brost --- Documentation/gpu/drivers.rst | 1 + Documentation/gpu/xe/index.rst | 23 + Documentation/gpu/xe/xe_cs.rst | 8 + Documentation/gpu/xe/xe_firmware.rst | 34 + Documentation/gpu/xe/xe_gt_mcr.rst | 13 + Documentation/gpu/xe/xe_map.rst | 8 + Documentation/gpu/xe/xe_migrate.rst | 8 + Documentation/gpu/xe/xe_mm.rst | 14 + Documentation/gpu/xe/xe_pcode.rst | 14 + Documentation/gpu/xe/xe_pm.rst | 14 + Documentation/gpu/xe/xe_rtp.rst | 20 + Documentation/gpu/xe/xe_wa.rst | 14 + drivers/gpu/drm/Kconfig | 2 + drivers/gpu/drm/Makefile | 1 + drivers/gpu/drm/xe/.gitignore | 2 + drivers/gpu/drm/xe/Kconfig | 63 + drivers/gpu/drm/xe/Kconfig.debug | 96 + drivers/gpu/drm/xe/Makefile | 121 + drivers/gpu/drm/xe/abi/guc_actions_abi.h | 219 ++ drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h | 249 ++ drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h | 189 ++ .../gpu/drm/xe/abi/guc_communication_mmio_abi.h | 49 + drivers/gpu/drm/xe/abi/guc_errors_abi.h | 37 + drivers/gpu/drm/xe/abi/guc_klvs_abi.h | 322 ++ drivers/gpu/drm/xe/abi/guc_messages_abi.h | 234 ++ drivers/gpu/drm/xe/tests/Makefile | 4 + drivers/gpu/drm/xe/tests/xe_bo.c | 303 ++ drivers/gpu/drm/xe/tests/xe_bo_test.c | 25 + drivers/gpu/drm/xe/tests/xe_dma_buf.c | 259 ++ drivers/gpu/drm/xe/tests/xe_dma_buf_test.c | 23 + drivers/gpu/drm/xe/tests/xe_migrate.c | 378 +++ drivers/gpu/drm/xe/tests/xe_migrate_test.c | 23 + drivers/gpu/drm/xe/tests/xe_test.h | 66 + drivers/gpu/drm/xe/xe_bb.c | 97 + drivers/gpu/drm/xe/xe_bb.h | 27 + drivers/gpu/drm/xe/xe_bb_types.h | 20 + drivers/gpu/drm/xe/xe_bo.c | 1698 ++++++++++ drivers/gpu/drm/xe/xe_bo.h | 290 ++ drivers/gpu/drm/xe/xe_bo_doc.h | 179 + drivers/gpu/drm/xe/xe_bo_evict.c | 225 ++ drivers/gpu/drm/xe/xe_bo_evict.h | 15 + drivers/gpu/drm/xe/xe_bo_types.h | 73 + drivers/gpu/drm/xe/xe_debugfs.c | 129 + drivers/gpu/drm/xe/xe_debugfs.h | 13 + drivers/gpu/drm/xe/xe_device.c | 359 +++ drivers/gpu/drm/xe/xe_device.h | 126 + drivers/gpu/drm/xe/xe_device_types.h | 214 ++ drivers/gpu/drm/xe/xe_dma_buf.c | 307 ++ drivers/gpu/drm/xe/xe_dma_buf.h | 15 + drivers/gpu/drm/xe/xe_drv.h | 24 + drivers/gpu/drm/xe/xe_engine.c | 734 +++++ drivers/gpu/drm/xe/xe_engine.h | 54 + drivers/gpu/drm/xe/xe_engine_types.h | 208 ++ drivers/gpu/drm/xe/xe_exec.c | 390 +++ drivers/gpu/drm/xe/xe_exec.h | 14 + drivers/gpu/drm/xe/xe_execlist.c | 489 +++ drivers/gpu/drm/xe/xe_execlist.h | 21 + drivers/gpu/drm/xe/xe_execlist_types.h | 49 + drivers/gpu/drm/xe/xe_force_wake.c | 203 ++ drivers/gpu/drm/xe/xe_force_wake.h | 40 + drivers/gpu/drm/xe/xe_force_wake_types.h | 84 + drivers/gpu/drm/xe/xe_ggtt.c | 304 ++ drivers/gpu/drm/xe/xe_ggtt.h | 28 + drivers/gpu/drm/xe/xe_ggtt_types.h | 28 + drivers/gpu/drm/xe/xe_gpu_scheduler.c | 101 + drivers/gpu/drm/xe/xe_gpu_scheduler.h | 73 + drivers/gpu/drm/xe/xe_gpu_scheduler_types.h | 57 + drivers/gpu/drm/xe/xe_gt.c | 830 +++++ drivers/gpu/drm/xe/xe_gt.h | 64 + drivers/gpu/drm/xe/xe_gt_clock.c | 83 + drivers/gpu/drm/xe/xe_gt_clock.h | 13 + drivers/gpu/drm/xe/xe_gt_debugfs.c | 160 + drivers/gpu/drm/xe/xe_gt_debugfs.h | 13 + drivers/gpu/drm/xe/xe_gt_mcr.c | 552 ++++ drivers/gpu/drm/xe/xe_gt_mcr.h | 26 + drivers/gpu/drm/xe/xe_gt_pagefault.c | 750 +++++ drivers/gpu/drm/xe/xe_gt_pagefault.h | 22 + drivers/gpu/drm/xe/xe_gt_sysfs.c | 55 + drivers/gpu/drm/xe/xe_gt_sysfs.h | 19 + drivers/gpu/drm/xe/xe_gt_sysfs_types.h | 26 + drivers/gpu/drm/xe/xe_gt_topology.c | 144 + drivers/gpu/drm/xe/xe_gt_topology.h | 20 + drivers/gpu/drm/xe/xe_gt_types.h | 320 ++ drivers/gpu/drm/xe/xe_guc.c | 875 +++++ drivers/gpu/drm/xe/xe_guc.h | 57 + drivers/gpu/drm/xe/xe_guc_ads.c | 676 ++++ drivers/gpu/drm/xe/xe_guc_ads.h | 17 + drivers/gpu/drm/xe/xe_guc_ads_types.h | 25 + drivers/gpu/drm/xe/xe_guc_ct.c | 1196 +++++++ drivers/gpu/drm/xe/xe_guc_ct.h | 62 + drivers/gpu/drm/xe/xe_guc_ct_types.h | 87 + drivers/gpu/drm/xe/xe_guc_debugfs.c | 105 + drivers/gpu/drm/xe/xe_guc_debugfs.h | 14 + drivers/gpu/drm/xe/xe_guc_engine_types.h | 52 + drivers/gpu/drm/xe/xe_guc_fwif.h | 392 +++ drivers/gpu/drm/xe/xe_guc_hwconfig.c | 125 + drivers/gpu/drm/xe/xe_guc_hwconfig.h | 17 + drivers/gpu/drm/xe/xe_guc_log.c | 109 + drivers/gpu/drm/xe/xe_guc_log.h | 48 + drivers/gpu/drm/xe/xe_guc_log_types.h | 23 + drivers/gpu/drm/xe/xe_guc_pc.c | 843 +++++ drivers/gpu/drm/xe/xe_guc_pc.h | 15 + drivers/gpu/drm/xe/xe_guc_pc_types.h | 34 + drivers/gpu/drm/xe/xe_guc_reg.h | 147 + drivers/gpu/drm/xe/xe_guc_submit.c | 1695 ++++++++++ drivers/gpu/drm/xe/xe_guc_submit.h | 30 + drivers/gpu/drm/xe/xe_guc_types.h | 71 + drivers/gpu/drm/xe/xe_huc.c | 131 + drivers/gpu/drm/xe/xe_huc.h | 19 + drivers/gpu/drm/xe/xe_huc_debugfs.c | 71 + drivers/gpu/drm/xe/xe_huc_debugfs.h | 14 + drivers/gpu/drm/xe/xe_huc_types.h | 19 + drivers/gpu/drm/xe/xe_hw_engine.c | 658 ++++ drivers/gpu/drm/xe/xe_hw_engine.h | 27 + drivers/gpu/drm/xe/xe_hw_engine_types.h | 107 + drivers/gpu/drm/xe/xe_hw_fence.c | 230 ++ drivers/gpu/drm/xe/xe_hw_fence.h | 27 + drivers/gpu/drm/xe/xe_hw_fence_types.h | 72 + drivers/gpu/drm/xe/xe_irq.c | 565 ++++ drivers/gpu/drm/xe/xe_irq.h | 18 + drivers/gpu/drm/xe/xe_lrc.c | 841 +++++ drivers/gpu/drm/xe/xe_lrc.h | 50 + drivers/gpu/drm/xe/xe_lrc_types.h | 47 + drivers/gpu/drm/xe/xe_macros.h | 20 + drivers/gpu/drm/xe/xe_map.h | 93 + drivers/gpu/drm/xe/xe_migrate.c | 1168 +++++++ drivers/gpu/drm/xe/xe_migrate.h | 88 + drivers/gpu/drm/xe/xe_migrate_doc.h | 88 + drivers/gpu/drm/xe/xe_mmio.c | 466 +++ drivers/gpu/drm/xe/xe_mmio.h | 110 + drivers/gpu/drm/xe/xe_mocs.c | 557 ++++ drivers/gpu/drm/xe/xe_mocs.h | 29 + drivers/gpu/drm/xe/xe_module.c | 76 + drivers/gpu/drm/xe/xe_module.h | 13 + drivers/gpu/drm/xe/xe_pci.c | 651 ++++ drivers/gpu/drm/xe/xe_pci.h | 21 + drivers/gpu/drm/xe/xe_pcode.c | 296 ++ drivers/gpu/drm/xe/xe_pcode.h | 25 + drivers/gpu/drm/xe/xe_pcode_api.h | 40 + drivers/gpu/drm/xe/xe_platform_types.h | 32 + drivers/gpu/drm/xe/xe_pm.c | 207 ++ drivers/gpu/drm/xe/xe_pm.h | 24 + drivers/gpu/drm/xe/xe_preempt_fence.c | 157 + drivers/gpu/drm/xe/xe_preempt_fence.h | 61 + drivers/gpu/drm/xe/xe_preempt_fence_types.h | 33 + drivers/gpu/drm/xe/xe_pt.c | 1542 +++++++++ drivers/gpu/drm/xe/xe_pt.h | 54 + drivers/gpu/drm/xe/xe_pt_types.h | 57 + drivers/gpu/drm/xe/xe_pt_walk.c | 160 + drivers/gpu/drm/xe/xe_pt_walk.h | 161 + drivers/gpu/drm/xe/xe_query.c | 387 +++ drivers/gpu/drm/xe/xe_query.h | 14 + drivers/gpu/drm/xe/xe_reg_sr.c | 248 ++ drivers/gpu/drm/xe/xe_reg_sr.h | 28 + drivers/gpu/drm/xe/xe_reg_sr_types.h | 44 + drivers/gpu/drm/xe/xe_reg_whitelist.c | 73 + drivers/gpu/drm/xe/xe_reg_whitelist.h | 13 + drivers/gpu/drm/xe/xe_res_cursor.h | 226 ++ drivers/gpu/drm/xe/xe_ring_ops.c | 373 +++ drivers/gpu/drm/xe/xe_ring_ops.h | 17 + drivers/gpu/drm/xe/xe_ring_ops_types.h | 22 + drivers/gpu/drm/xe/xe_rtp.c | 144 + drivers/gpu/drm/xe/xe_rtp.h | 340 ++ drivers/gpu/drm/xe/xe_rtp_types.h | 105 + drivers/gpu/drm/xe/xe_sa.c | 96 + drivers/gpu/drm/xe/xe_sa.h | 42 + drivers/gpu/drm/xe/xe_sa_types.h | 19 + drivers/gpu/drm/xe/xe_sched_job.c | 246 ++ drivers/gpu/drm/xe/xe_sched_job.h | 76 + drivers/gpu/drm/xe/xe_sched_job_types.h | 46 + drivers/gpu/drm/xe/xe_step.c | 189 ++ drivers/gpu/drm/xe/xe_step.h | 18 + drivers/gpu/drm/xe/xe_step_types.h | 51 + drivers/gpu/drm/xe/xe_sync.c | 276 ++ drivers/gpu/drm/xe/xe_sync.h | 27 + drivers/gpu/drm/xe/xe_sync_types.h | 27 + drivers/gpu/drm/xe/xe_trace.c | 9 + drivers/gpu/drm/xe/xe_trace.h | 513 +++ drivers/gpu/drm/xe/xe_ttm_gtt_mgr.c | 130 + drivers/gpu/drm/xe/xe_ttm_gtt_mgr.h | 16 + drivers/gpu/drm/xe/xe_ttm_gtt_mgr_types.h | 18 + drivers/gpu/drm/xe/xe_ttm_vram_mgr.c | 403 +++ drivers/gpu/drm/xe/xe_ttm_vram_mgr.h | 41 + drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h | 44 + drivers/gpu/drm/xe/xe_tuning.c | 39 + drivers/gpu/drm/xe/xe_tuning.h | 13 + drivers/gpu/drm/xe/xe_uc.c | 226 ++ drivers/gpu/drm/xe/xe_uc.h | 21 + drivers/gpu/drm/xe/xe_uc_debugfs.c | 26 + drivers/gpu/drm/xe/xe_uc_debugfs.h | 14 + drivers/gpu/drm/xe/xe_uc_fw.c | 406 +++ drivers/gpu/drm/xe/xe_uc_fw.h | 180 ++ drivers/gpu/drm/xe/xe_uc_fw_abi.h | 81 + drivers/gpu/drm/xe/xe_uc_fw_types.h | 112 + drivers/gpu/drm/xe/xe_uc_types.h | 25 + drivers/gpu/drm/xe/xe_vm.c | 3407 ++++++++++++++++++++ drivers/gpu/drm/xe/xe_vm.h | 141 + drivers/gpu/drm/xe/xe_vm_doc.h | 555 ++++ drivers/gpu/drm/xe/xe_vm_madvise.c | 347 ++ drivers/gpu/drm/xe/xe_vm_madvise.h | 15 + drivers/gpu/drm/xe/xe_vm_types.h | 337 ++ drivers/gpu/drm/xe/xe_wa.c | 326 ++ drivers/gpu/drm/xe/xe_wa.h | 18 + drivers/gpu/drm/xe/xe_wait_user_fence.c | 202 ++ drivers/gpu/drm/xe/xe_wait_user_fence.h | 15 + drivers/gpu/drm/xe/xe_wopcm.c | 263 ++ drivers/gpu/drm/xe/xe_wopcm.h | 16 + drivers/gpu/drm/xe/xe_wopcm_types.h | 26 + include/drm/xe_pciids.h | 195 ++ include/uapi/drm/xe_drm.h | 787 +++++ 210 files changed, 40575 insertions(+) create mode 100644 Documentation/gpu/xe/index.rst create mode 100644 Documentation/gpu/xe/xe_cs.rst create mode 100644 Documentation/gpu/xe/xe_firmware.rst create mode 100644 Documentation/gpu/xe/xe_gt_mcr.rst create mode 100644 Documentation/gpu/xe/xe_map.rst create mode 100644 Documentation/gpu/xe/xe_migrate.rst create mode 100644 Documentation/gpu/xe/xe_mm.rst create mode 100644 Documentation/gpu/xe/xe_pcode.rst create mode 100644 Documentation/gpu/xe/xe_pm.rst create mode 100644 Documentation/gpu/xe/xe_rtp.rst create mode 100644 Documentation/gpu/xe/xe_wa.rst create mode 100644 drivers/gpu/drm/xe/.gitignore create mode 100644 drivers/gpu/drm/xe/Kconfig create mode 100644 drivers/gpu/drm/xe/Kconfig.debug create mode 100644 drivers/gpu/drm/xe/Makefile create mode 100644 drivers/gpu/drm/xe/abi/guc_actions_abi.h create mode 100644 drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h create mode 100644 drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h create mode 100644 drivers/gpu/drm/xe/abi/guc_communication_mmio_abi.h create mode 100644 drivers/gpu/drm/xe/abi/guc_errors_abi.h create mode 100644 drivers/gpu/drm/xe/abi/guc_klvs_abi.h create mode 100644 drivers/gpu/drm/xe/abi/guc_messages_abi.h create mode 100644 drivers/gpu/drm/xe/tests/Makefile create mode 100644 drivers/gpu/drm/xe/tests/xe_bo.c create mode 100644 drivers/gpu/drm/xe/tests/xe_bo_test.c create mode 100644 drivers/gpu/drm/xe/tests/xe_dma_buf.c create mode 100644 drivers/gpu/drm/xe/tests/xe_dma_buf_test.c create mode 100644 drivers/gpu/drm/xe/tests/xe_migrate.c create mode 100644 drivers/gpu/drm/xe/tests/xe_migrate_test.c create mode 100644 drivers/gpu/drm/xe/tests/xe_test.h create mode 100644 drivers/gpu/drm/xe/xe_bb.c create mode 100644 drivers/gpu/drm/xe/xe_bb.h create mode 100644 drivers/gpu/drm/xe/xe_bb_types.h create mode 100644 drivers/gpu/drm/xe/xe_bo.c create mode 100644 drivers/gpu/drm/xe/xe_bo.h create mode 100644 drivers/gpu/drm/xe/xe_bo_doc.h create mode 100644 drivers/gpu/drm/xe/xe_bo_evict.c create mode 100644 drivers/gpu/drm/xe/xe_bo_evict.h create mode 100644 drivers/gpu/drm/xe/xe_bo_types.h create mode 100644 drivers/gpu/drm/xe/xe_debugfs.c create mode 100644 drivers/gpu/drm/xe/xe_debugfs.h create mode 100644 drivers/gpu/drm/xe/xe_device.c create mode 100644 drivers/gpu/drm/xe/xe_device.h create mode 100644 drivers/gpu/drm/xe/xe_device_types.h create mode 100644 drivers/gpu/drm/xe/xe_dma_buf.c create mode 100644 drivers/gpu/drm/xe/xe_dma_buf.h create mode 100644 drivers/gpu/drm/xe/xe_drv.h create mode 100644 drivers/gpu/drm/xe/xe_engine.c create mode 100644 drivers/gpu/drm/xe/xe_engine.h create mode 100644 drivers/gpu/drm/xe/xe_engine_types.h create mode 100644 drivers/gpu/drm/xe/xe_exec.c create mode 100644 drivers/gpu/drm/xe/xe_exec.h create mode 100644 drivers/gpu/drm/xe/xe_execlist.c create mode 100644 drivers/gpu/drm/xe/xe_execlist.h create mode 100644 drivers/gpu/drm/xe/xe_execlist_types.h create mode 100644 drivers/gpu/drm/xe/xe_force_wake.c create mode 100644 drivers/gpu/drm/xe/xe_force_wake.h create mode 100644 drivers/gpu/drm/xe/xe_force_wake_types.h create mode 100644 drivers/gpu/drm/xe/xe_ggtt.c create mode 100644 drivers/gpu/drm/xe/xe_ggtt.h create mode 100644 drivers/gpu/drm/xe/xe_ggtt_types.h create mode 100644 drivers/gpu/drm/xe/xe_gpu_scheduler.c create mode 100644 drivers/gpu/drm/xe/xe_gpu_scheduler.h create mode 100644 drivers/gpu/drm/xe/xe_gpu_scheduler_types.h create mode 100644 drivers/gpu/drm/xe/xe_gt.c create mode 100644 drivers/gpu/drm/xe/xe_gt.h create mode 100644 drivers/gpu/drm/xe/xe_gt_clock.c create mode 100644 drivers/gpu/drm/xe/xe_gt_clock.h create mode 100644 drivers/gpu/drm/xe/xe_gt_debugfs.c create mode 100644 drivers/gpu/drm/xe/xe_gt_debugfs.h create mode 100644 drivers/gpu/drm/xe/xe_gt_mcr.c create mode 100644 drivers/gpu/drm/xe/xe_gt_mcr.h create mode 100644 drivers/gpu/drm/xe/xe_gt_pagefault.c create mode 100644 drivers/gpu/drm/xe/xe_gt_pagefault.h create mode 100644 drivers/gpu/drm/xe/xe_gt_sysfs.c create mode 100644 drivers/gpu/drm/xe/xe_gt_sysfs.h create mode 100644 drivers/gpu/drm/xe/xe_gt_sysfs_types.h create mode 100644 drivers/gpu/drm/xe/xe_gt_topology.c create mode 100644 drivers/gpu/drm/xe/xe_gt_topology.h create mode 100644 drivers/gpu/drm/xe/xe_gt_types.h create mode 100644 drivers/gpu/drm/xe/xe_guc.c create mode 100644 drivers/gpu/drm/xe/xe_guc.h create mode 100644 drivers/gpu/drm/xe/xe_guc_ads.c create mode 100644 drivers/gpu/drm/xe/xe_guc_ads.h create mode 100644 drivers/gpu/drm/xe/xe_guc_ads_types.h create mode 100644 drivers/gpu/drm/xe/xe_guc_ct.c create mode 100644 drivers/gpu/drm/xe/xe_guc_ct.h create mode 100644 drivers/gpu/drm/xe/xe_guc_ct_types.h create mode 100644 drivers/gpu/drm/xe/xe_guc_debugfs.c create mode 100644 drivers/gpu/drm/xe/xe_guc_debugfs.h create mode 100644 drivers/gpu/drm/xe/xe_guc_engine_types.h create mode 100644 drivers/gpu/drm/xe/xe_guc_fwif.h create mode 100644 drivers/gpu/drm/xe/xe_guc_hwconfig.c create mode 100644 drivers/gpu/drm/xe/xe_guc_hwconfig.h create mode 100644 drivers/gpu/drm/xe/xe_guc_log.c create mode 100644 drivers/gpu/drm/xe/xe_guc_log.h create mode 100644 drivers/gpu/drm/xe/xe_guc_log_types.h create mode 100644 drivers/gpu/drm/xe/xe_guc_pc.c create mode 100644 drivers/gpu/drm/xe/xe_guc_pc.h create mode 100644 drivers/gpu/drm/xe/xe_guc_pc_types.h create mode 100644 drivers/gpu/drm/xe/xe_guc_reg.h create mode 100644 drivers/gpu/drm/xe/xe_guc_submit.c create mode 100644 drivers/gpu/drm/xe/xe_guc_submit.h create mode 100644 drivers/gpu/drm/xe/xe_guc_types.h create mode 100644 drivers/gpu/drm/xe/xe_huc.c create mode 100644 drivers/gpu/drm/xe/xe_huc.h create mode 100644 drivers/gpu/drm/xe/xe_huc_debugfs.c create mode 100644 drivers/gpu/drm/xe/xe_huc_debugfs.h create mode 100644 drivers/gpu/drm/xe/xe_huc_types.h create mode 100644 drivers/gpu/drm/xe/xe_hw_engine.c create mode 100644 drivers/gpu/drm/xe/xe_hw_engine.h create mode 100644 drivers/gpu/drm/xe/xe_hw_engine_types.h create mode 100644 drivers/gpu/drm/xe/xe_hw_fence.c create mode 100644 drivers/gpu/drm/xe/xe_hw_fence.h create mode 100644 drivers/gpu/drm/xe/xe_hw_fence_types.h create mode 100644 drivers/gpu/drm/xe/xe_irq.c create mode 100644 drivers/gpu/drm/xe/xe_irq.h create mode 100644 drivers/gpu/drm/xe/xe_lrc.c create mode 100644 drivers/gpu/drm/xe/xe_lrc.h create mode 100644 drivers/gpu/drm/xe/xe_lrc_types.h create mode 100644 drivers/gpu/drm/xe/xe_macros.h create mode 100644 drivers/gpu/drm/xe/xe_map.h create mode 100644 drivers/gpu/drm/xe/xe_migrate.c create mode 100644 drivers/gpu/drm/xe/xe_migrate.h create mode 100644 drivers/gpu/drm/xe/xe_migrate_doc.h create mode 100644 drivers/gpu/drm/xe/xe_mmio.c create mode 100644 drivers/gpu/drm/xe/xe_mmio.h create mode 100644 drivers/gpu/drm/xe/xe_mocs.c create mode 100644 drivers/gpu/drm/xe/xe_mocs.h create mode 100644 drivers/gpu/drm/xe/xe_module.c create mode 100644 drivers/gpu/drm/xe/xe_module.h create mode 100644 drivers/gpu/drm/xe/xe_pci.c create mode 100644 drivers/gpu/drm/xe/xe_pci.h create mode 100644 drivers/gpu/drm/xe/xe_pcode.c create mode 100644 drivers/gpu/drm/xe/xe_pcode.h create mode 100644 drivers/gpu/drm/xe/xe_pcode_api.h create mode 100644 drivers/gpu/drm/xe/xe_platform_types.h create mode 100644 drivers/gpu/drm/xe/xe_pm.c create mode 100644 drivers/gpu/drm/xe/xe_pm.h create mode 100644 drivers/gpu/drm/xe/xe_preempt_fence.c create mode 100644 drivers/gpu/drm/xe/xe_preempt_fence.h create mode 100644 drivers/gpu/drm/xe/xe_preempt_fence_types.h create mode 100644 drivers/gpu/drm/xe/xe_pt.c create mode 100644 drivers/gpu/drm/xe/xe_pt.h create mode 100644 drivers/gpu/drm/xe/xe_pt_types.h create mode 100644 drivers/gpu/drm/xe/xe_pt_walk.c create mode 100644 drivers/gpu/drm/xe/xe_pt_walk.h create mode 100644 drivers/gpu/drm/xe/xe_query.c create mode 100644 drivers/gpu/drm/xe/xe_query.h create mode 100644 drivers/gpu/drm/xe/xe_reg_sr.c create mode 100644 drivers/gpu/drm/xe/xe_reg_sr.h create mode 100644 drivers/gpu/drm/xe/xe_reg_sr_types.h create mode 100644 drivers/gpu/drm/xe/xe_reg_whitelist.c create mode 100644 drivers/gpu/drm/xe/xe_reg_whitelist.h create mode 100644 drivers/gpu/drm/xe/xe_res_cursor.h create mode 100644 drivers/gpu/drm/xe/xe_ring_ops.c create mode 100644 drivers/gpu/drm/xe/xe_ring_ops.h create mode 100644 drivers/gpu/drm/xe/xe_ring_ops_types.h create mode 100644 drivers/gpu/drm/xe/xe_rtp.c create mode 100644 drivers/gpu/drm/xe/xe_rtp.h create mode 100644 drivers/gpu/drm/xe/xe_rtp_types.h create mode 100644 drivers/gpu/drm/xe/xe_sa.c create mode 100644 drivers/gpu/drm/xe/xe_sa.h create mode 100644 drivers/gpu/drm/xe/xe_sa_types.h create mode 100644 drivers/gpu/drm/xe/xe_sched_job.c create mode 100644 drivers/gpu/drm/xe/xe_sched_job.h create mode 100644 drivers/gpu/drm/xe/xe_sched_job_types.h create mode 100644 drivers/gpu/drm/xe/xe_step.c create mode 100644 drivers/gpu/drm/xe/xe_step.h create mode 100644 drivers/gpu/drm/xe/xe_step_types.h create mode 100644 drivers/gpu/drm/xe/xe_sync.c create mode 100644 drivers/gpu/drm/xe/xe_sync.h create mode 100644 drivers/gpu/drm/xe/xe_sync_types.h create mode 100644 drivers/gpu/drm/xe/xe_trace.c create mode 100644 drivers/gpu/drm/xe/xe_trace.h create mode 100644 drivers/gpu/drm/xe/xe_ttm_gtt_mgr.c create mode 100644 drivers/gpu/drm/xe/xe_ttm_gtt_mgr.h create mode 100644 drivers/gpu/drm/xe/xe_ttm_gtt_mgr_types.h create mode 100644 drivers/gpu/drm/xe/xe_ttm_vram_mgr.c create mode 100644 drivers/gpu/drm/xe/xe_ttm_vram_mgr.h create mode 100644 drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h create mode 100644 drivers/gpu/drm/xe/xe_tuning.c create mode 100644 drivers/gpu/drm/xe/xe_tuning.h create mode 100644 drivers/gpu/drm/xe/xe_uc.c create mode 100644 drivers/gpu/drm/xe/xe_uc.h create mode 100644 drivers/gpu/drm/xe/xe_uc_debugfs.c create mode 100644 drivers/gpu/drm/xe/xe_uc_debugfs.h create mode 100644 drivers/gpu/drm/xe/xe_uc_fw.c create mode 100644 drivers/gpu/drm/xe/xe_uc_fw.h create mode 100644 drivers/gpu/drm/xe/xe_uc_fw_abi.h create mode 100644 drivers/gpu/drm/xe/xe_uc_fw_types.h create mode 100644 drivers/gpu/drm/xe/xe_uc_types.h create mode 100644 drivers/gpu/drm/xe/xe_vm.c create mode 100644 drivers/gpu/drm/xe/xe_vm.h create mode 100644 drivers/gpu/drm/xe/xe_vm_doc.h create mode 100644 drivers/gpu/drm/xe/xe_vm_madvise.c create mode 100644 drivers/gpu/drm/xe/xe_vm_madvise.h create mode 100644 drivers/gpu/drm/xe/xe_vm_types.h create mode 100644 drivers/gpu/drm/xe/xe_wa.c create mode 100644 drivers/gpu/drm/xe/xe_wa.h create mode 100644 drivers/gpu/drm/xe/xe_wait_user_fence.c create mode 100644 drivers/gpu/drm/xe/xe_wait_user_fence.h create mode 100644 drivers/gpu/drm/xe/xe_wopcm.c create mode 100644 drivers/gpu/drm/xe/xe_wopcm.h create mode 100644 drivers/gpu/drm/xe/xe_wopcm_types.h create mode 100644 include/drm/xe_pciids.h create mode 100644 include/uapi/drm/xe_drm.h (limited to 'drivers/gpu') diff --git a/Documentation/gpu/drivers.rst b/Documentation/gpu/drivers.rst index cc6535f5f28c..b899cbc5c2b4 100644 --- a/Documentation/gpu/drivers.rst +++ b/Documentation/gpu/drivers.rst @@ -18,6 +18,7 @@ GPU Driver Documentation vkms bridge/dw-hdmi xen-front + xe/index afbc komeda-kms panfrost diff --git a/Documentation/gpu/xe/index.rst b/Documentation/gpu/xe/index.rst new file mode 100644 index 000000000000..2fddf9ed251e --- /dev/null +++ b/Documentation/gpu/xe/index.rst @@ -0,0 +1,23 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +======================= +drm/xe Intel GFX Driver +======================= + +The drm/xe driver supports some future GFX cards with rendering, display, +compute and media. Support for currently available platforms like TGL, ADL, +DG2, etc is provided to prototype the driver. + +.. toctree:: + :titlesonly: + + xe_mm + xe_map + xe_migrate + xe_cs + xe_pm + xe_pcode + xe_gt_mcr + xe_wa + xe_rtp + xe_firmware diff --git a/Documentation/gpu/xe/xe_cs.rst b/Documentation/gpu/xe/xe_cs.rst new file mode 100644 index 000000000000..e379aed4f5a8 --- /dev/null +++ b/Documentation/gpu/xe/xe_cs.rst @@ -0,0 +1,8 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +================== +Command submission +================== + +.. kernel-doc:: drivers/gpu/drm/xe/xe_exec.c + :doc: Execbuf (User GPU command submission) diff --git a/Documentation/gpu/xe/xe_firmware.rst b/Documentation/gpu/xe/xe_firmware.rst new file mode 100644 index 000000000000..c01246ae99f5 --- /dev/null +++ b/Documentation/gpu/xe/xe_firmware.rst @@ -0,0 +1,34 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +======== +Firmware +======== + +Firmware Layout +=============== + +.. kernel-doc:: drivers/gpu/drm/xe/xe_uc_fw_abi.h + :doc: Firmware Layout + +Write Once Protected Content Memory (WOPCM) Layout +================================================== + +.. kernel-doc:: drivers/gpu/drm/xe/xe_wopcm.c + :doc: Write Once Protected Content Memory (WOPCM) Layout + +GuC CTB Blob +============ + +.. kernel-doc:: drivers/gpu/drm/xe/xe_guc_ct.c + :doc: GuC CTB Blob + +GuC Power Conservation (PC) +=========================== + +.. kernel-doc:: drivers/gpu/drm/xe/xe_guc_pc.c + :doc: GuC Power Conservation (PC) + +Internal API +============ + +TODO diff --git a/Documentation/gpu/xe/xe_gt_mcr.rst b/Documentation/gpu/xe/xe_gt_mcr.rst new file mode 100644 index 000000000000..848c07bc36d0 --- /dev/null +++ b/Documentation/gpu/xe/xe_gt_mcr.rst @@ -0,0 +1,13 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +============================================== +GT Multicast/Replicated (MCR) Register Support +============================================== + +.. kernel-doc:: drivers/gpu/drm/xe/xe_gt_mcr.c + :doc: GT Multicast/Replicated (MCR) Register Support + +Internal API +============ + +TODO diff --git a/Documentation/gpu/xe/xe_map.rst b/Documentation/gpu/xe/xe_map.rst new file mode 100644 index 000000000000..a098cfd2df04 --- /dev/null +++ b/Documentation/gpu/xe/xe_map.rst @@ -0,0 +1,8 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +========= +Map Layer +========= + +.. kernel-doc:: drivers/gpu/drm/xe/xe_map.h + :doc: Map layer diff --git a/Documentation/gpu/xe/xe_migrate.rst b/Documentation/gpu/xe/xe_migrate.rst new file mode 100644 index 000000000000..f92faec0ac94 --- /dev/null +++ b/Documentation/gpu/xe/xe_migrate.rst @@ -0,0 +1,8 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +============= +Migrate Layer +============= + +.. kernel-doc:: drivers/gpu/drm/xe/xe_migrate_doc.h + :doc: Migrate Layer diff --git a/Documentation/gpu/xe/xe_mm.rst b/Documentation/gpu/xe/xe_mm.rst new file mode 100644 index 000000000000..6c8fd8b4a466 --- /dev/null +++ b/Documentation/gpu/xe/xe_mm.rst @@ -0,0 +1,14 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +================= +Memory Management +================= + +.. kernel-doc:: drivers/gpu/drm/xe/xe_bo_doc.h + :doc: Buffer Objects (BO) + +Pagetable building +================== + +.. kernel-doc:: drivers/gpu/drm/xe/xe_pt.c + :doc: Pagetable building diff --git a/Documentation/gpu/xe/xe_pcode.rst b/Documentation/gpu/xe/xe_pcode.rst new file mode 100644 index 000000000000..d2e22cc45061 --- /dev/null +++ b/Documentation/gpu/xe/xe_pcode.rst @@ -0,0 +1,14 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +===== +Pcode +===== + +.. kernel-doc:: drivers/gpu/drm/xe/xe_pcode.c + :doc: PCODE + +Internal API +============ + +.. kernel-doc:: drivers/gpu/drm/xe/xe_pcode.c + :internal: diff --git a/Documentation/gpu/xe/xe_pm.rst b/Documentation/gpu/xe/xe_pm.rst new file mode 100644 index 000000000000..6781cdfb24f6 --- /dev/null +++ b/Documentation/gpu/xe/xe_pm.rst @@ -0,0 +1,14 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +======================== +Runtime Power Management +======================== + +.. kernel-doc:: drivers/gpu/drm/xe/xe_pm.c + :doc: Xe Power Management + +Internal API +============ + +.. kernel-doc:: drivers/gpu/drm/xe/xe_pm.c + :internal: diff --git a/Documentation/gpu/xe/xe_rtp.rst b/Documentation/gpu/xe/xe_rtp.rst new file mode 100644 index 000000000000..7fdf4b6c1a04 --- /dev/null +++ b/Documentation/gpu/xe/xe_rtp.rst @@ -0,0 +1,20 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +========================= +Register Table Processing +========================= + +.. kernel-doc:: drivers/gpu/drm/xe/xe_rtp.c + :doc: Register Table Processing + +Internal API +============ + +.. kernel-doc:: drivers/gpu/drm/xe/xe_rtp_types.h + :internal: + +.. kernel-doc:: drivers/gpu/drm/xe/xe_rtp.h + :internal: + +.. kernel-doc:: drivers/gpu/drm/xe/xe_rtp.c + :internal: diff --git a/Documentation/gpu/xe/xe_wa.rst b/Documentation/gpu/xe/xe_wa.rst new file mode 100644 index 000000000000..f8811cc6adcc --- /dev/null +++ b/Documentation/gpu/xe/xe_wa.rst @@ -0,0 +1,14 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +==================== +Hardware workarounds +==================== + +.. kernel-doc:: drivers/gpu/drm/xe/xe_wa.c + :doc: Hardware workarounds + +Internal API +============ + +.. kernel-doc:: drivers/gpu/drm/xe/xe_wa.c + :internal: diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 31cfe2c2a2af..2520db0b776e 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -276,6 +276,8 @@ source "drivers/gpu/drm/nouveau/Kconfig" source "drivers/gpu/drm/i915/Kconfig" +source "drivers/gpu/drm/xe/Kconfig" + source "drivers/gpu/drm/kmb/Kconfig" config DRM_VGEM diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 8ac6f4b9546e..104b42df2e95 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -134,6 +134,7 @@ obj-$(CONFIG_DRM_RADEON)+= radeon/ obj-$(CONFIG_DRM_AMDGPU)+= amd/amdgpu/ obj-$(CONFIG_DRM_AMDGPU)+= amd/amdxcp/ obj-$(CONFIG_DRM_I915) += i915/ +obj-$(CONFIG_DRM_XE) += xe/ obj-$(CONFIG_DRM_KMB_DISPLAY) += kmb/ obj-$(CONFIG_DRM_MGAG200) += mgag200/ obj-$(CONFIG_DRM_V3D) += v3d/ diff --git a/drivers/gpu/drm/xe/.gitignore b/drivers/gpu/drm/xe/.gitignore new file mode 100644 index 000000000000..81972dce1aff --- /dev/null +++ b/drivers/gpu/drm/xe/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +*.hdrtest diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig new file mode 100644 index 000000000000..62f54e6d62d9 --- /dev/null +++ b/drivers/gpu/drm/xe/Kconfig @@ -0,0 +1,63 @@ +# SPDX-License-Identifier: GPL-2.0-only +config DRM_XE + tristate "Intel Xe Graphics" + depends on DRM && PCI && MMU + select INTERVAL_TREE + # we need shmfs for the swappable backing store, and in particular + # the shmem_readpage() which depends upon tmpfs + select SHMEM + select TMPFS + select DRM_BUDDY + select DRM_KMS_HELPER + select DRM_PANEL + select DRM_SUBALLOC_HELPER + select RELAY + select IRQ_WORK + select SYNC_FILE + select IOSF_MBI + select CRC32 + select SND_HDA_I915 if SND_HDA_CORE + select CEC_CORE if CEC_NOTIFIER + select VMAP_PFN + select DRM_TTM + select DRM_TTM_HELPER + select DRM_SCHED + select MMU_NOTIFIER + help + Experimental driver for Intel Xe series GPUs + + If "M" is selected, the module will be called xe. + +config DRM_XE_FORCE_PROBE + string "Force probe xe for selected Intel hardware IDs" + depends on DRM_XE + help + This is the default value for the xe.force_probe module + parameter. Using the module parameter overrides this option. + + Force probe the xe for Intel graphics devices that are + recognized but not properly supported by this kernel version. It is + recommended to upgrade to a kernel version with proper support as soon + as it is available. + + It can also be used to block the probe of recognized and fully + supported devices. + + Use "" to disable force probe. If in doubt, use this. + + Use "[,,...]" to force probe the xe for listed + devices. For example, "4500" or "4500,4571". + + Use "*" to force probe the driver for all known devices. + + Use "!" right before the ID to block the probe of the device. For + example, "4500,!4571" forces the probe of 4500 and blocks the probe of + 4571. + + Use "!*" to block the probe of the driver for all known devices. + +menu "drm/Xe Debugging" +depends on DRM_XE +depends on EXPERT +source "drivers/gpu/drm/xe/Kconfig.debug" +endmenu diff --git a/drivers/gpu/drm/xe/Kconfig.debug b/drivers/gpu/drm/xe/Kconfig.debug new file mode 100644 index 000000000000..b61fd43a76fe --- /dev/null +++ b/drivers/gpu/drm/xe/Kconfig.debug @@ -0,0 +1,96 @@ +# SPDX-License-Identifier: GPL-2.0-only +config DRM_XE_WERROR + bool "Force GCC to throw an error instead of a warning when compiling" + # As this may inadvertently break the build, only allow the user + # to shoot oneself in the foot iff they aim really hard + depends on EXPERT + # We use the dependency on !COMPILE_TEST to not be enabled in + # allmodconfig or allyesconfig configurations + depends on !COMPILE_TEST + default n + help + Add -Werror to the build flags for (and only for) xe.ko. + Do not enable this unless you are writing code for the xe.ko module. + + Recommended for driver developers only. + + If in doubt, say "N". + +config DRM_XE_DEBUG + bool "Enable additional driver debugging" + depends on DRM_XE + depends on EXPERT + depends on !COMPILE_TEST + default n + help + Choose this option to turn on extra driver debugging that may affect + performance but will catch some internal issues. + + Recommended for driver developers only. + + If in doubt, say "N". + +config DRM_XE_DEBUG_VM + bool "Enable extra VM debugging info" + default n + help + Enable extra VM debugging info + + Recommended for driver developers only. + + If in doubt, say "N". + +config DRM_XE_DEBUG_MEM + bool "Enable passing SYS/LMEM addresses to user space" + default n + help + Pass object location trough uapi. Intended for extended + testing and development only. + + Recommended for driver developers only. + + If in doubt, say "N". + +config DRM_XE_SIMPLE_ERROR_CAPTURE + bool "Enable simple error capture to dmesg on job timeout" + default n + help + Choose this option when debugging an unexpected job timeout + + Recommended for driver developers only. + + If in doubt, say "N". + +config DRM_XE_KUNIT_TEST + tristate "KUnit tests for the drm xe driver" if !KUNIT_ALL_TESTS + depends on DRM_XE && KUNIT + default KUNIT_ALL_TESTS + select DRM_EXPORT_FOR_TESTS if m + help + Choose this option to allow the driver to perform selftests under + the kunit framework + + Recommended for driver developers only. + + If in doubt, say "N". + +config DRM_XE_LARGE_GUC_BUFFER + bool "Enable larger guc log buffer" + default n + help + Choose this option when debugging guc issues. + Buffer should be large enough for complex issues. + + Recommended for driver developers only. + + If in doubt, say "N". + +config DRM_XE_USERPTR_INVAL_INJECT + bool "Inject userptr invalidation -EINVAL errors" + default n + help + Choose this option when debugging error paths that + are hit during checks for userptr invalidations. + + Recomended for driver developers only. + If in doubt, say "N". diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile new file mode 100644 index 000000000000..228a87f2fe7b --- /dev/null +++ b/drivers/gpu/drm/xe/Makefile @@ -0,0 +1,121 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for the drm device driver. This driver provides support for the +# Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher. + +# Add a set of useful warning flags and enable -Werror for CI to prevent +# trivial mistakes from creeping in. We have to do this piecemeal as we reject +# any patch that isn't warning clean, so turning on -Wall -Wextra (or W=1) we +# need to filter out dubious warnings. Still it is our interest +# to keep running locally with W=1 C=1 until we are completely clean. +# +# Note the danger in using -Wall -Wextra is that when CI updates gcc we +# will most likely get a sudden build breakage... Hopefully we will fix +# new warnings before CI updates! +subdir-ccflags-y := -Wall -Wextra +# making these call cc-disable-warning breaks when trying to build xe.mod.o +# by calling make M=drivers/gpu/drm/xe. This doesn't happen in upstream tree, +# so it was somehow fixed by the changes in the build system. Move it back to +# $(call cc-disable-warning, ...) after rebase. +subdir-ccflags-y += -Wno-unused-parameter +subdir-ccflags-y += -Wno-type-limits +#subdir-ccflags-y += $(call cc-disable-warning, unused-parameter) +#subdir-ccflags-y += $(call cc-disable-warning, type-limits) +subdir-ccflags-y += $(call cc-disable-warning, missing-field-initializers) +subdir-ccflags-y += $(call cc-disable-warning, unused-but-set-variable) +# clang warnings +subdir-ccflags-y += $(call cc-disable-warning, sign-compare) +subdir-ccflags-y += $(call cc-disable-warning, sometimes-uninitialized) +subdir-ccflags-y += $(call cc-disable-warning, initializer-overrides) +subdir-ccflags-y += $(call cc-disable-warning, frame-address) +subdir-ccflags-$(CONFIG_DRM_XE_WERROR) += -Werror + +# Fine grained warnings disable +CFLAGS_xe_pci.o = $(call cc-disable-warning, override-init) + +subdir-ccflags-y += -I$(srctree)/$(src) + +# Please keep these build lists sorted! + +# core driver code + +xe-y += xe_bb.o \ + xe_bo.o \ + xe_bo_evict.o \ + xe_debugfs.o \ + xe_device.o \ + xe_dma_buf.o \ + xe_engine.o \ + xe_exec.o \ + xe_execlist.o \ + xe_force_wake.o \ + xe_ggtt.o \ + xe_gpu_scheduler.o \ + xe_gt.o \ + xe_gt_clock.o \ + xe_gt_debugfs.o \ + xe_gt_mcr.o \ + xe_gt_pagefault.o \ + xe_gt_sysfs.o \ + xe_gt_topology.o \ + xe_guc.o \ + xe_guc_ads.o \ + xe_guc_ct.o \ + xe_guc_debugfs.o \ + xe_guc_hwconfig.o \ + xe_guc_log.o \ + xe_guc_pc.o \ + xe_guc_submit.o \ + xe_hw_engine.o \ + xe_hw_fence.o \ + xe_huc.o \ + xe_huc_debugfs.o \ + xe_irq.o \ + xe_lrc.o \ + xe_migrate.o \ + xe_mmio.o \ + xe_mocs.o \ + xe_module.o \ + xe_pci.o \ + xe_pcode.o \ + xe_pm.o \ + xe_preempt_fence.o \ + xe_pt.o \ + xe_pt_walk.o \ + xe_query.o \ + xe_reg_sr.o \ + xe_reg_whitelist.o \ + xe_rtp.o \ + xe_ring_ops.o \ + xe_sa.o \ + xe_sched_job.o \ + xe_step.o \ + xe_sync.o \ + xe_trace.o \ + xe_ttm_gtt_mgr.o \ + xe_ttm_vram_mgr.o \ + xe_tuning.o \ + xe_uc.o \ + xe_uc_debugfs.o \ + xe_uc_fw.o \ + xe_vm.o \ + xe_vm_madvise.o \ + xe_wait_user_fence.o \ + xe_wa.o \ + xe_wopcm.o + +# XXX: Needed for i915 register definitions. Will be removed after xe-regs. +subdir-ccflags-y += -I$(srctree)/drivers/gpu/drm/i915/ + +obj-$(CONFIG_DRM_XE) += xe.o +obj-$(CONFIG_DRM_XE_KUNIT_TEST) += tests/ +\ +# header test +always-$(CONFIG_DRM_XE_WERROR) += \ + $(patsubst %.h,%.hdrtest, $(shell cd $(srctree)/$(src) && find * -name '*.h')) + +quiet_cmd_hdrtest = HDRTEST $(patsubst %.hdrtest,%.h,$@) + cmd_hdrtest = $(CC) -DHDRTEST $(filter-out $(CFLAGS_GCOV), $(c_flags)) -S -o /dev/null -x c /dev/null -include $<; touch $@ + +$(obj)/%.hdrtest: $(src)/%.h FORCE + $(call if_changed_dep,hdrtest) diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h new file mode 100644 index 000000000000..3062e0e0d467 --- /dev/null +++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h @@ -0,0 +1,219 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2014-2021 Intel Corporation + */ + +#ifndef _ABI_GUC_ACTIONS_ABI_H +#define _ABI_GUC_ACTIONS_ABI_H + +/** + * DOC: HOST2GUC_SELF_CFG + * + * This message is used by Host KMD to setup of the `GuC Self Config KLVs`_. + * + * This message must be sent as `MMIO HXG Message`_. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | DATA0 = MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION = _`GUC_ACTION_HOST2GUC_SELF_CFG` = 0x0508 | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:16 | **KLV_KEY** - KLV key, see `GuC Self Config KLVs`_ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | **KLV_LEN** - KLV length | + * | | | | + * | | | - 32 bit KLV = 1 | + * | | | - 64 bit KLV = 2 | + * +---+-------+--------------------------------------------------------------+ + * | 2 | 31:0 | **VALUE32** - Bits 31-0 of the KLV value | + * +---+-------+--------------------------------------------------------------+ + * | 3 | 31:0 | **VALUE64** - Bits 63-32 of the KLV value (**KLV_LEN** = 2) | + * +---+-------+--------------------------------------------------------------+ + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | DATA0 = **NUM** - 1 if KLV was parsed, 0 if not recognized | + * +---+-------+--------------------------------------------------------------+ + */ +#define GUC_ACTION_HOST2GUC_SELF_CFG 0x0508 + +#define HOST2GUC_SELF_CFG_REQUEST_MSG_LEN (GUC_HXG_REQUEST_MSG_MIN_LEN + 3u) +#define HOST2GUC_SELF_CFG_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 +#define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_KEY (0xffff << 16) +#define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_LEN (0xffff << 0) +#define HOST2GUC_SELF_CFG_REQUEST_MSG_2_VALUE32 GUC_HXG_REQUEST_MSG_n_DATAn +#define HOST2GUC_SELF_CFG_REQUEST_MSG_3_VALUE64 GUC_HXG_REQUEST_MSG_n_DATAn + +#define HOST2GUC_SELF_CFG_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN +#define HOST2GUC_SELF_CFG_RESPONSE_MSG_0_NUM GUC_HXG_RESPONSE_MSG_0_DATA0 + +/** + * DOC: HOST2GUC_CONTROL_CTB + * + * This H2G action allows Vf Host to enable or disable H2G and G2H `CT Buffer`_. + * + * This message must be sent as `MMIO HXG Message`_. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | DATA0 = MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION = _`GUC_ACTION_HOST2GUC_CONTROL_CTB` = 0x4509 | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | **CONTROL** - control `CTB based communication`_ | + * | | | | + * | | | - _`GUC_CTB_CONTROL_DISABLE` = 0 | + * | | | - _`GUC_CTB_CONTROL_ENABLE` = 1 | + * +---+-------+--------------------------------------------------------------+ + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | DATA0 = MBZ | + * +---+-------+--------------------------------------------------------------+ + */ +#define GUC_ACTION_HOST2GUC_CONTROL_CTB 0x4509 + +#define HOST2GUC_CONTROL_CTB_REQUEST_MSG_LEN (GUC_HXG_REQUEST_MSG_MIN_LEN + 1u) +#define HOST2GUC_CONTROL_CTB_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 +#define HOST2GUC_CONTROL_CTB_REQUEST_MSG_1_CONTROL GUC_HXG_REQUEST_MSG_n_DATAn +#define GUC_CTB_CONTROL_DISABLE 0u +#define GUC_CTB_CONTROL_ENABLE 1u + +#define HOST2GUC_CONTROL_CTB_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN +#define HOST2GUC_CONTROL_CTB_RESPONSE_MSG_0_MBZ GUC_HXG_RESPONSE_MSG_0_DATA0 + +/* legacy definitions */ + +enum xe_guc_action { + XE_GUC_ACTION_DEFAULT = 0x0, + XE_GUC_ACTION_REQUEST_PREEMPTION = 0x2, + XE_GUC_ACTION_REQUEST_ENGINE_RESET = 0x3, + XE_GUC_ACTION_ALLOCATE_DOORBELL = 0x10, + XE_GUC_ACTION_DEALLOCATE_DOORBELL = 0x20, + XE_GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE = 0x30, + XE_GUC_ACTION_UK_LOG_ENABLE_LOGGING = 0x40, + XE_GUC_ACTION_FORCE_LOG_BUFFER_FLUSH = 0x302, + XE_GUC_ACTION_ENTER_S_STATE = 0x501, + XE_GUC_ACTION_EXIT_S_STATE = 0x502, + XE_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE = 0x506, + XE_GUC_ACTION_SCHED_CONTEXT = 0x1000, + XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET = 0x1001, + XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE = 0x1002, + XE_GUC_ACTION_SCHED_ENGINE_MODE_SET = 0x1003, + XE_GUC_ACTION_SCHED_ENGINE_MODE_DONE = 0x1004, + XE_GUC_ACTION_SET_CONTEXT_PRIORITY = 0x1005, + XE_GUC_ACTION_SET_CONTEXT_EXECUTION_QUANTUM = 0x1006, + XE_GUC_ACTION_SET_CONTEXT_PREEMPTION_TIMEOUT = 0x1007, + XE_GUC_ACTION_CONTEXT_RESET_NOTIFICATION = 0x1008, + XE_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION = 0x1009, + XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES = 0x100B, + XE_GUC_ACTION_SETUP_PC_GUCRC = 0x3004, + XE_GUC_ACTION_AUTHENTICATE_HUC = 0x4000, + XE_GUC_ACTION_GET_HWCONFIG = 0x4100, + XE_GUC_ACTION_REGISTER_CONTEXT = 0x4502, + XE_GUC_ACTION_DEREGISTER_CONTEXT = 0x4503, + XE_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505, + XE_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506, + XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600, + XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601, + XE_GUC_ACTION_CLIENT_SOFT_RESET = 0x5507, + XE_GUC_ACTION_SET_ENG_UTIL_BUFF = 0x550A, + XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR = 0x6000, + XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC = 0x6002, + XE_GUC_ACTION_PAGE_FAULT_RES_DESC = 0x6003, + XE_GUC_ACTION_ACCESS_COUNTER_NOTIFY = 0x6004, + XE_GUC_ACTION_TLB_INVALIDATION = 0x7000, + XE_GUC_ACTION_TLB_INVALIDATION_DONE = 0x7001, + XE_GUC_ACTION_TLB_INVALIDATION_ALL = 0x7002, + XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION = 0x8002, + XE_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE = 0x8003, + XE_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED = 0x8004, + XE_GUC_ACTION_NOTIFY_EXCEPTION = 0x8005, + XE_GUC_ACTION_LIMIT +}; + +enum xe_guc_rc_options { + XE_GUCRC_HOST_CONTROL, + XE_GUCRC_FIRMWARE_CONTROL, +}; + +enum xe_guc_preempt_options { + XE_GUC_PREEMPT_OPTION_DROP_WORK_Q = 0x4, + XE_GUC_PREEMPT_OPTION_DROP_SUBMIT_Q = 0x8, +}; + +enum xe_guc_report_status { + XE_GUC_REPORT_STATUS_UNKNOWN = 0x0, + XE_GUC_REPORT_STATUS_ACKED = 0x1, + XE_GUC_REPORT_STATUS_ERROR = 0x2, + XE_GUC_REPORT_STATUS_COMPLETE = 0x4, +}; + +enum xe_guc_sleep_state_status { + XE_GUC_SLEEP_STATE_SUCCESS = 0x1, + XE_GUC_SLEEP_STATE_PREEMPT_TO_IDLE_FAILED = 0x2, + XE_GUC_SLEEP_STATE_ENGINE_RESET_FAILED = 0x3 +#define XE_GUC_SLEEP_STATE_INVALID_MASK 0x80000000 +}; + +#define GUC_LOG_CONTROL_LOGGING_ENABLED (1 << 0) +#define GUC_LOG_CONTROL_VERBOSITY_SHIFT 4 +#define GUC_LOG_CONTROL_VERBOSITY_MASK (0xF << GUC_LOG_CONTROL_VERBOSITY_SHIFT) +#define GUC_LOG_CONTROL_DEFAULT_LOGGING (1 << 8) + +#define XE_GUC_TLB_INVAL_TYPE_SHIFT 0 +#define XE_GUC_TLB_INVAL_MODE_SHIFT 8 +/* Flush PPC or SMRO caches along with TLB invalidation request */ +#define XE_GUC_TLB_INVAL_FLUSH_CACHE (1 << 31) + +enum xe_guc_tlb_invalidation_type { + XE_GUC_TLB_INVAL_FULL = 0x0, + XE_GUC_TLB_INVAL_PAGE_SELECTIVE = 0x1, + XE_GUC_TLB_INVAL_PAGE_SELECTIVE_CTX = 0x2, + XE_GUC_TLB_INVAL_GUC = 0x3, +}; + +/* + * 0: Heavy mode of Invalidation: + * The pipeline of the engine(s) for which the invalidation is targeted to is + * blocked, and all the in-flight transactions are guaranteed to be Globally + * Observed before completing the TLB invalidation + * 1: Lite mode of Invalidation: + * TLBs of the targeted engine(s) are immediately invalidated. + * In-flight transactions are NOT guaranteed to be Globally Observed before + * completing TLB invalidation. + * Light Invalidation Mode is to be used only when + * it can be guaranteed (by SW) that the address translations remain invariant + * for the in-flight transactions across the TLB invalidation. In other words, + * this mode can be used when the TLB invalidation is intended to clear out the + * stale cached translations that are no longer in use. Light Invalidation Mode + * is much faster than the Heavy Invalidation Mode, as it does not wait for the + * in-flight transactions to be GOd. + */ +enum xe_guc_tlb_inval_mode { + XE_GUC_TLB_INVAL_MODE_HEAVY = 0x0, + XE_GUC_TLB_INVAL_MODE_LITE = 0x1, +}; + +#endif diff --git a/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h new file mode 100644 index 000000000000..811add10c30d --- /dev/null +++ b/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h @@ -0,0 +1,249 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _GUC_ACTIONS_SLPC_ABI_H_ +#define _GUC_ACTIONS_SLPC_ABI_H_ + +#include + +/** + * DOC: SLPC SHARED DATA STRUCTURE + * + * +----+------+--------------------------------------------------------------+ + * | CL | Bytes| Description | + * +====+======+==============================================================+ + * | 1 | 0-3 | SHARED DATA SIZE | + * | +------+--------------------------------------------------------------+ + * | | 4-7 | GLOBAL STATE | + * | +------+--------------------------------------------------------------+ + * | | 8-11 | DISPLAY DATA ADDRESS | + * | +------+--------------------------------------------------------------+ + * | | 12:63| PADDING | + * +----+------+--------------------------------------------------------------+ + * | | 0:63 | PADDING(PLATFORM INFO) | + * +----+------+--------------------------------------------------------------+ + * | 3 | 0-3 | TASK STATE DATA | + * + +------+--------------------------------------------------------------+ + * | | 4:63 | PADDING | + * +----+------+--------------------------------------------------------------+ + * |4-21|0:1087| OVERRIDE PARAMS AND BIT FIELDS | + * +----+------+--------------------------------------------------------------+ + * | | | PADDING + EXTRA RESERVED PAGE | + * +----+------+--------------------------------------------------------------+ + */ + +/* + * SLPC exposes certain parameters for global configuration by the host. + * These are referred to as override parameters, because in most cases + * the host will not need to modify the default values used by SLPC. + * SLPC remembers the default values which allows the host to easily restore + * them by simply unsetting the override. The host can set or unset override + * parameters during SLPC (re-)initialization using the SLPC Reset event. + * The host can also set or unset override parameters on the fly using the + * Parameter Set and Parameter Unset events + */ + +#define SLPC_MAX_OVERRIDE_PARAMETERS 256 +#define SLPC_OVERRIDE_BITFIELD_SIZE \ + (SLPC_MAX_OVERRIDE_PARAMETERS / 32) + +#define SLPC_PAGE_SIZE_BYTES 4096 +#define SLPC_CACHELINE_SIZE_BYTES 64 +#define SLPC_SHARED_DATA_SIZE_BYTE_HEADER SLPC_CACHELINE_SIZE_BYTES +#define SLPC_SHARED_DATA_SIZE_BYTE_PLATFORM_INFO SLPC_CACHELINE_SIZE_BYTES +#define SLPC_SHARED_DATA_SIZE_BYTE_TASK_STATE SLPC_CACHELINE_SIZE_BYTES +#define SLPC_SHARED_DATA_MODE_DEFN_TABLE_SIZE SLPC_PAGE_SIZE_BYTES +#define SLPC_SHARED_DATA_SIZE_BYTE_MAX (2 * SLPC_PAGE_SIZE_BYTES) + +/* + * Cacheline size aligned (Total size needed for + * SLPM_KMD_MAX_OVERRIDE_PARAMETERS=256 is 1088 bytes) + */ +#define SLPC_OVERRIDE_PARAMS_TOTAL_BYTES (((((SLPC_MAX_OVERRIDE_PARAMETERS * 4) \ + + ((SLPC_MAX_OVERRIDE_PARAMETERS / 32) * 4)) \ + + (SLPC_CACHELINE_SIZE_BYTES - 1)) / SLPC_CACHELINE_SIZE_BYTES) * \ + SLPC_CACHELINE_SIZE_BYTES) + +#define SLPC_SHARED_DATA_SIZE_BYTE_OTHER (SLPC_SHARED_DATA_SIZE_BYTE_MAX - \ + (SLPC_SHARED_DATA_SIZE_BYTE_HEADER \ + + SLPC_SHARED_DATA_SIZE_BYTE_PLATFORM_INFO \ + + SLPC_SHARED_DATA_SIZE_BYTE_TASK_STATE \ + + SLPC_OVERRIDE_PARAMS_TOTAL_BYTES \ + + SLPC_SHARED_DATA_MODE_DEFN_TABLE_SIZE)) + +enum slpc_task_enable { + SLPC_PARAM_TASK_DEFAULT = 0, + SLPC_PARAM_TASK_ENABLED, + SLPC_PARAM_TASK_DISABLED, + SLPC_PARAM_TASK_UNKNOWN +}; + +enum slpc_global_state { + SLPC_GLOBAL_STATE_NOT_RUNNING = 0, + SLPC_GLOBAL_STATE_INITIALIZING = 1, + SLPC_GLOBAL_STATE_RESETTING = 2, + SLPC_GLOBAL_STATE_RUNNING = 3, + SLPC_GLOBAL_STATE_SHUTTING_DOWN = 4, + SLPC_GLOBAL_STATE_ERROR = 5 +}; + +enum slpc_param_id { + SLPC_PARAM_TASK_ENABLE_GTPERF = 0, + SLPC_PARAM_TASK_DISABLE_GTPERF = 1, + SLPC_PARAM_TASK_ENABLE_BALANCER = 2, + SLPC_PARAM_TASK_DISABLE_BALANCER = 3, + SLPC_PARAM_TASK_ENABLE_DCC = 4, + SLPC_PARAM_TASK_DISABLE_DCC = 5, + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ = 6, + SLPC_PARAM_GLOBAL_MAX_GT_UNSLICE_FREQ_MHZ = 7, + SLPC_PARAM_GLOBAL_MIN_GT_SLICE_FREQ_MHZ = 8, + SLPC_PARAM_GLOBAL_MAX_GT_SLICE_FREQ_MHZ = 9, + SLPC_PARAM_GTPERF_THRESHOLD_MAX_FPS = 10, + SLPC_PARAM_GLOBAL_DISABLE_GT_FREQ_MANAGEMENT = 11, + SLPC_PARAM_GTPERF_ENABLE_FRAMERATE_STALLING = 12, + SLPC_PARAM_GLOBAL_DISABLE_RC6_MODE_CHANGE = 13, + SLPC_PARAM_GLOBAL_OC_UNSLICE_FREQ_MHZ = 14, + SLPC_PARAM_GLOBAL_OC_SLICE_FREQ_MHZ = 15, + SLPC_PARAM_GLOBAL_ENABLE_IA_GT_BALANCING = 16, + SLPC_PARAM_GLOBAL_ENABLE_ADAPTIVE_BURST_TURBO = 17, + SLPC_PARAM_GLOBAL_ENABLE_EVAL_MODE = 18, + SLPC_PARAM_GLOBAL_ENABLE_BALANCER_IN_NON_GAMING_MODE = 19, + SLPC_PARAM_GLOBAL_RT_MODE_TURBO_FREQ_DELTA_MHZ = 20, + SLPC_PARAM_PWRGATE_RC_MODE = 21, + SLPC_PARAM_EDR_MODE_COMPUTE_TIMEOUT_MS = 22, + SLPC_PARAM_EDR_QOS_FREQ_MHZ = 23, + SLPC_PARAM_MEDIA_FF_RATIO_MODE = 24, + SLPC_PARAM_ENABLE_IA_FREQ_LIMITING = 25, + SLPC_PARAM_STRATEGIES = 26, + SLPC_PARAM_POWER_PROFILE = 27, + SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY = 28, + SLPC_MAX_PARAM = 32, +}; + +enum slpc_media_ratio_mode { + SLPC_MEDIA_RATIO_MODE_DYNAMIC_CONTROL = 0, + SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_ONE = 1, + SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_TWO = 2, +}; + +enum slpc_gucrc_mode { + SLPC_GUCRC_MODE_HW = 0, + SLPC_GUCRC_MODE_GUCRC_NO_RC6 = 1, + SLPC_GUCRC_MODE_GUCRC_STATIC_TIMEOUT = 2, + SLPC_GUCRC_MODE_GUCRC_DYNAMIC_HYSTERESIS = 3, + + SLPC_GUCRC_MODE_MAX, +}; + +enum slpc_event_id { + SLPC_EVENT_RESET = 0, + SLPC_EVENT_SHUTDOWN = 1, + SLPC_EVENT_PLATFORM_INFO_CHANGE = 2, + SLPC_EVENT_DISPLAY_MODE_CHANGE = 3, + SLPC_EVENT_FLIP_COMPLETE = 4, + SLPC_EVENT_QUERY_TASK_STATE = 5, + SLPC_EVENT_PARAMETER_SET = 6, + SLPC_EVENT_PARAMETER_UNSET = 7, +}; + +struct slpc_task_state_data { + union { + u32 task_status_padding; + struct { + u32 status; +#define SLPC_GTPERF_TASK_ENABLED REG_BIT(0) +#define SLPC_DCC_TASK_ENABLED REG_BIT(11) +#define SLPC_IN_DCC REG_BIT(12) +#define SLPC_BALANCER_ENABLED REG_BIT(15) +#define SLPC_IBC_TASK_ENABLED REG_BIT(16) +#define SLPC_BALANCER_IA_LMT_ENABLED REG_BIT(17) +#define SLPC_BALANCER_IA_LMT_ACTIVE REG_BIT(18) + }; + }; + union { + u32 freq_padding; + struct { +#define SLPC_MAX_UNSLICE_FREQ_MASK REG_GENMASK(7, 0) +#define SLPC_MIN_UNSLICE_FREQ_MASK REG_GENMASK(15, 8) +#define SLPC_MAX_SLICE_FREQ_MASK REG_GENMASK(23, 16) +#define SLPC_MIN_SLICE_FREQ_MASK REG_GENMASK(31, 24) + u32 freq; + }; + }; +} __packed; + +struct slpc_shared_data_header { + /* Total size in bytes of this shared buffer. */ + u32 size; + u32 global_state; + u32 display_data_addr; +} __packed; + +struct slpc_override_params { + u32 bits[SLPC_OVERRIDE_BITFIELD_SIZE]; + u32 values[SLPC_MAX_OVERRIDE_PARAMETERS]; +} __packed; + +struct slpc_shared_data { + struct slpc_shared_data_header header; + u8 shared_data_header_pad[SLPC_SHARED_DATA_SIZE_BYTE_HEADER - + sizeof(struct slpc_shared_data_header)]; + + u8 platform_info_pad[SLPC_SHARED_DATA_SIZE_BYTE_PLATFORM_INFO]; + + struct slpc_task_state_data task_state_data; + u8 task_state_data_pad[SLPC_SHARED_DATA_SIZE_BYTE_TASK_STATE - + sizeof(struct slpc_task_state_data)]; + + struct slpc_override_params override_params; + u8 override_params_pad[SLPC_OVERRIDE_PARAMS_TOTAL_BYTES - + sizeof(struct slpc_override_params)]; + + u8 shared_data_pad[SLPC_SHARED_DATA_SIZE_BYTE_OTHER]; + + /* PAGE 2 (4096 bytes), mode based parameter will be removed soon */ + u8 reserved_mode_definition[4096]; +} __packed; + +/** + * DOC: SLPC H2G MESSAGE FORMAT + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | DATA0 = MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION = _`GUC_ACTION_HOST2GUC_PC_SLPM_REQUEST` = 0x3003 | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:8 | **EVENT_ID** | + * + +-------+--------------------------------------------------------------+ + * | | 7:0 | **EVENT_ARGC** - number of data arguments | + * +---+-------+--------------------------------------------------------------+ + * | 2 | 31:0 | **EVENT_DATA1** | + * +---+-------+--------------------------------------------------------------+ + * |...| 31:0 | ... | + * +---+-------+--------------------------------------------------------------+ + * |2+n| 31:0 | **EVENT_DATAn** | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST 0x3003 + +#define HOST2GUC_PC_SLPC_REQUEST_MSG_MIN_LEN \ + (GUC_HXG_REQUEST_MSG_MIN_LEN + 1u) +#define HOST2GUC_PC_SLPC_EVENT_MAX_INPUT_ARGS 9 +#define HOST2GUC_PC_SLPC_REQUEST_MSG_MAX_LEN \ + (HOST2GUC_PC_SLPC_REQUEST_REQUEST_MSG_MIN_LEN + \ + HOST2GUC_PC_SLPC_EVENT_MAX_INPUT_ARGS) +#define HOST2GUC_PC_SLPC_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 +#define HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ID (0xff << 8) +#define HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC (0xff << 0) +#define HOST2GUC_PC_SLPC_REQUEST_MSG_N_EVENT_DATA_N GUC_HXG_REQUEST_MSG_n_DATAn + +#endif diff --git a/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h new file mode 100644 index 000000000000..41244055cc0c --- /dev/null +++ b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h @@ -0,0 +1,189 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2014-2021 Intel Corporation + */ + +#ifndef _ABI_GUC_COMMUNICATION_CTB_ABI_H +#define _ABI_GUC_COMMUNICATION_CTB_ABI_H + +#include +#include + +#include "guc_messages_abi.h" + +/** + * DOC: CT Buffer + * + * Circular buffer used to send `CTB Message`_ + */ + +/** + * DOC: CTB Descriptor + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31:0 | **HEAD** - offset (in dwords) to the last dword that was | + * | | | read from the `CT Buffer`_. | + * | | | It can only be updated by the receiver. | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | **TAIL** - offset (in dwords) to the last dword that was | + * | | | written to the `CT Buffer`_. | + * | | | It can only be updated by the sender. | + * +---+-------+--------------------------------------------------------------+ + * | 2 | 31:0 | **STATUS** - status of the CTB | + * | | | | + * | | | - _`GUC_CTB_STATUS_NO_ERROR` = 0 (normal operation) | + * | | | - _`GUC_CTB_STATUS_OVERFLOW` = 1 (head/tail too large) | + * | | | - _`GUC_CTB_STATUS_UNDERFLOW` = 2 (truncated message) | + * | | | - _`GUC_CTB_STATUS_MISMATCH` = 4 (head/tail modified) | + * +---+-------+--------------------------------------------------------------+ + * |...| | RESERVED = MBZ | + * +---+-------+--------------------------------------------------------------+ + * | 15| 31:0 | RESERVED = MBZ | + * +---+-------+--------------------------------------------------------------+ + */ + +struct guc_ct_buffer_desc { + u32 head; + u32 tail; + u32 status; +#define GUC_CTB_STATUS_NO_ERROR 0 +#define GUC_CTB_STATUS_OVERFLOW (1 << 0) +#define GUC_CTB_STATUS_UNDERFLOW (1 << 1) +#define GUC_CTB_STATUS_MISMATCH (1 << 2) + u32 reserved[13]; +} __packed; +static_assert(sizeof(struct guc_ct_buffer_desc) == 64); + +/** + * DOC: CTB Message + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31:16 | **FENCE** - message identifier | + * | +-------+--------------------------------------------------------------+ + * | | 15:12 | **FORMAT** - format of the CTB message | + * | | | - _`GUC_CTB_FORMAT_HXG` = 0 - see `CTB HXG Message`_ | + * | +-------+--------------------------------------------------------------+ + * | | 11:8 | **RESERVED** | + * | +-------+--------------------------------------------------------------+ + * | | 7:0 | **NUM_DWORDS** - length of the CTB message (w/o header) | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | optional (depends on FORMAT) | + * +---+-------+ | + * |...| | | + * +---+-------+ | + * | n | 31:0 | | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_CTB_HDR_LEN 1u +#define GUC_CTB_MSG_MIN_LEN GUC_CTB_HDR_LEN +#define GUC_CTB_MSG_MAX_LEN 256u +#define GUC_CTB_MSG_0_FENCE (0xffff << 16) +#define GUC_CTB_MSG_0_FORMAT (0xf << 12) +#define GUC_CTB_FORMAT_HXG 0u +#define GUC_CTB_MSG_0_RESERVED (0xf << 8) +#define GUC_CTB_MSG_0_NUM_DWORDS (0xff << 0) + +/** + * DOC: CTB HXG Message + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31:16 | FENCE | + * | +-------+--------------------------------------------------------------+ + * | | 15:12 | FORMAT = GUC_CTB_FORMAT_HXG_ | + * | +-------+--------------------------------------------------------------+ + * | | 11:8 | RESERVED = MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 7:0 | NUM_DWORDS = length (in dwords) of the embedded HXG message | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | | + * +---+-------+ | + * |...| | [Embedded `HXG Message`_] | + * +---+-------+ | + * | n | 31:0 | | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_CTB_HXG_MSG_MIN_LEN (GUC_CTB_MSG_MIN_LEN + GUC_HXG_MSG_MIN_LEN) +#define GUC_CTB_HXG_MSG_MAX_LEN GUC_CTB_MSG_MAX_LEN + +/** + * DOC: CTB based communication + * + * The CTB (command transport buffer) communication between Host and GuC + * is based on u32 data stream written to the shared buffer. One buffer can + * be used to transmit data only in one direction (one-directional channel). + * + * Current status of the each buffer is stored in the buffer descriptor. + * Buffer descriptor holds tail and head fields that represents active data + * stream. The tail field is updated by the data producer (sender), and head + * field is updated by the data consumer (receiver):: + * + * +------------+ + * | DESCRIPTOR | +=================+============+========+ + * +============+ | | MESSAGE(s) | | + * | address |--------->+=================+============+========+ + * +------------+ + * | head | ^-----head--------^ + * +------------+ + * | tail | ^---------tail-----------------^ + * +------------+ + * | size | ^---------------size--------------------^ + * +------------+ + * + * Each message in data stream starts with the single u32 treated as a header, + * followed by optional set of u32 data that makes message specific payload:: + * + * +------------+---------+---------+---------+ + * | MESSAGE | + * +------------+---------+---------+---------+ + * | msg[0] | [1] | ... | [n-1] | + * +------------+---------+---------+---------+ + * | MESSAGE | MESSAGE PAYLOAD | + * + HEADER +---------+---------+---------+ + * | | 0 | ... | n | + * +======+=====+=========+=========+=========+ + * | 31:16| code| | | | + * +------+-----+ | | | + * | 15:5|flags| | | | + * +------+-----+ | | | + * | 4:0| len| | | | + * +------+-----+---------+---------+---------+ + * + * ^-------------len-------------^ + * + * The message header consists of: + * + * - **len**, indicates length of the message payload (in u32) + * - **code**, indicates message code + * - **flags**, holds various bits to control message handling + */ + +/* + * Definition of the command transport message header (DW0) + * + * bit[4..0] message len (in dwords) + * bit[7..5] reserved + * bit[8] response (G2H only) + * bit[8] write fence to desc (H2G only) + * bit[9] write status to H2G buff (H2G only) + * bit[10] send status back via G2H (H2G only) + * bit[15..11] reserved + * bit[31..16] action code + */ +#define GUC_CT_MSG_LEN_SHIFT 0 +#define GUC_CT_MSG_LEN_MASK 0x1F +#define GUC_CT_MSG_IS_RESPONSE (1 << 8) +#define GUC_CT_MSG_WRITE_FENCE_TO_DESC (1 << 8) +#define GUC_CT_MSG_WRITE_STATUS_TO_BUFF (1 << 9) +#define GUC_CT_MSG_SEND_STATUS (1 << 10) +#define GUC_CT_MSG_ACTION_SHIFT 16 +#define GUC_CT_MSG_ACTION_MASK 0xFFFF + +#endif diff --git a/drivers/gpu/drm/xe/abi/guc_communication_mmio_abi.h b/drivers/gpu/drm/xe/abi/guc_communication_mmio_abi.h new file mode 100644 index 000000000000..ef538e34f894 --- /dev/null +++ b/drivers/gpu/drm/xe/abi/guc_communication_mmio_abi.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2014-2021 Intel Corporation + */ + +#ifndef _ABI_GUC_COMMUNICATION_MMIO_ABI_H +#define _ABI_GUC_COMMUNICATION_MMIO_ABI_H + +/** + * DOC: GuC MMIO based communication + * + * The MMIO based communication between Host and GuC relies on special + * hardware registers which format could be defined by the software + * (so called scratch registers). + * + * Each MMIO based message, both Host to GuC (H2G) and GuC to Host (G2H) + * messages, which maximum length depends on number of available scratch + * registers, is directly written into those scratch registers. + * + * For Gen9+, there are 16 software scratch registers 0xC180-0xC1B8, + * but no H2G command takes more than 4 parameters and the GuC firmware + * itself uses an 4-element array to store the H2G message. + * + * For Gen11+, there are additional 4 registers 0x190240-0x19024C, which + * are, regardless on lower count, preferred over legacy ones. + * + * The MMIO based communication is mainly used during driver initialization + * phase to setup the `CTB based communication`_ that will be used afterwards. + */ + +#define GUC_MAX_MMIO_MSG_LEN 4 + +/** + * DOC: MMIO HXG Message + * + * Format of the MMIO messages follows definitions of `HXG Message`_. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31:0 | | + * +---+-------+ | + * |...| | [Embedded `HXG Message`_] | + * +---+-------+ | + * | n | 31:0 | | + * +---+-------+--------------------------------------------------------------+ + */ + +#endif diff --git a/drivers/gpu/drm/xe/abi/guc_errors_abi.h b/drivers/gpu/drm/xe/abi/guc_errors_abi.h new file mode 100644 index 000000000000..ec83551bf9c0 --- /dev/null +++ b/drivers/gpu/drm/xe/abi/guc_errors_abi.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2014-2021 Intel Corporation + */ + +#ifndef _ABI_GUC_ERRORS_ABI_H +#define _ABI_GUC_ERRORS_ABI_H + +enum xe_guc_response_status { + XE_GUC_RESPONSE_STATUS_SUCCESS = 0x0, + XE_GUC_RESPONSE_STATUS_GENERIC_FAIL = 0xF000, +}; + +enum xe_guc_load_status { + XE_GUC_LOAD_STATUS_DEFAULT = 0x00, + XE_GUC_LOAD_STATUS_START = 0x01, + XE_GUC_LOAD_STATUS_ERROR_DEVID_BUILD_MISMATCH = 0x02, + XE_GUC_LOAD_STATUS_GUC_PREPROD_BUILD_MISMATCH = 0x03, + XE_GUC_LOAD_STATUS_ERROR_DEVID_INVALID_GUCTYPE = 0x04, + XE_GUC_LOAD_STATUS_GDT_DONE = 0x10, + XE_GUC_LOAD_STATUS_IDT_DONE = 0x20, + XE_GUC_LOAD_STATUS_LAPIC_DONE = 0x30, + XE_GUC_LOAD_STATUS_GUCINT_DONE = 0x40, + XE_GUC_LOAD_STATUS_DPC_READY = 0x50, + XE_GUC_LOAD_STATUS_DPC_ERROR = 0x60, + XE_GUC_LOAD_STATUS_EXCEPTION = 0x70, + XE_GUC_LOAD_STATUS_INIT_DATA_INVALID = 0x71, + XE_GUC_LOAD_STATUS_PXP_TEARDOWN_CTRL_ENABLED = 0x72, + XE_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_START, + XE_GUC_LOAD_STATUS_MPU_DATA_INVALID = 0x73, + XE_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID = 0x74, + XE_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_END, + + XE_GUC_LOAD_STATUS_READY = 0xF0, +}; + +#endif diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h new file mode 100644 index 000000000000..47094b9b044c --- /dev/null +++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h @@ -0,0 +1,322 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _ABI_GUC_KLVS_ABI_H +#define _ABI_GUC_KLVS_ABI_H + +#include + +/** + * DOC: GuC KLV + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31:16 | **KEY** - KLV key identifier | + * | | | - `GuC Self Config KLVs`_ | + * | | | - `GuC VGT Policy KLVs`_ | + * | | | - `GuC VF Configuration KLVs`_ | + * | | | | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | **LEN** - length of VALUE (in 32bit dwords) | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | **VALUE** - actual value of the KLV (format depends on KEY) | + * +---+-------+ | + * |...| | | + * +---+-------+ | + * | n | 31:0 | | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_KLV_LEN_MIN 1u +#define GUC_KLV_0_KEY (0xffff << 16) +#define GUC_KLV_0_LEN (0xffff << 0) +#define GUC_KLV_n_VALUE (0xffffffff << 0) + +/** + * DOC: GuC Self Config KLVs + * + * `GuC KLV`_ keys available for use with HOST2GUC_SELF_CFG_. + * + * _`GUC_KLV_SELF_CFG_MEMIRQ_STATUS_ADDR` : 0x0900 + * Refers to 64 bit Global Gfx address (in bytes) of memory based interrupts + * status vector for use by the GuC. + * + * _`GUC_KLV_SELF_CFG_MEMIRQ_SOURCE_ADDR` : 0x0901 + * Refers to 64 bit Global Gfx address (in bytes) of memory based interrupts + * source vector for use by the GuC. + * + * _`GUC_KLV_SELF_CFG_H2G_CTB_ADDR` : 0x0902 + * Refers to 64 bit Global Gfx address of H2G `CT Buffer`_. + * Should be above WOPCM address but below APIC base address for native mode. + * + * _`GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR : 0x0903 + * Refers to 64 bit Global Gfx address of H2G `CTB Descriptor`_. + * Should be above WOPCM address but below APIC base address for native mode. + * + * _`GUC_KLV_SELF_CFG_H2G_CTB_SIZE : 0x0904 + * Refers to size of H2G `CT Buffer`_ in bytes. + * Should be a multiple of 4K. + * + * _`GUC_KLV_SELF_CFG_G2H_CTB_ADDR : 0x0905 + * Refers to 64 bit Global Gfx address of G2H `CT Buffer`_. + * Should be above WOPCM address but below APIC base address for native mode. + * + * _GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR : 0x0906 + * Refers to 64 bit Global Gfx address of G2H `CTB Descriptor`_. + * Should be above WOPCM address but below APIC base address for native mode. + * + * _GUC_KLV_SELF_CFG_G2H_CTB_SIZE : 0x0907 + * Refers to size of G2H `CT Buffer`_ in bytes. + * Should be a multiple of 4K. + */ + +#define GUC_KLV_SELF_CFG_MEMIRQ_STATUS_ADDR_KEY 0x0900 +#define GUC_KLV_SELF_CFG_MEMIRQ_STATUS_ADDR_LEN 2u + +#define GUC_KLV_SELF_CFG_MEMIRQ_SOURCE_ADDR_KEY 0x0901 +#define GUC_KLV_SELF_CFG_MEMIRQ_SOURCE_ADDR_LEN 2u + +#define GUC_KLV_SELF_CFG_H2G_CTB_ADDR_KEY 0x0902 +#define GUC_KLV_SELF_CFG_H2G_CTB_ADDR_LEN 2u + +#define GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR_KEY 0x0903 +#define GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR_LEN 2u + +#define GUC_KLV_SELF_CFG_H2G_CTB_SIZE_KEY 0x0904 +#define GUC_KLV_SELF_CFG_H2G_CTB_SIZE_LEN 1u + +#define GUC_KLV_SELF_CFG_G2H_CTB_ADDR_KEY 0x0905 +#define GUC_KLV_SELF_CFG_G2H_CTB_ADDR_LEN 2u + +#define GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR_KEY 0x0906 +#define GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR_LEN 2u + +#define GUC_KLV_SELF_CFG_G2H_CTB_SIZE_KEY 0x0907 +#define GUC_KLV_SELF_CFG_G2H_CTB_SIZE_LEN 1u + +/* + * Per context scheduling policy update keys. + */ +enum { + GUC_CONTEXT_POLICIES_KLV_ID_EXECUTION_QUANTUM = 0x2001, + GUC_CONTEXT_POLICIES_KLV_ID_PREEMPTION_TIMEOUT = 0x2002, + GUC_CONTEXT_POLICIES_KLV_ID_SCHEDULING_PRIORITY = 0x2003, + GUC_CONTEXT_POLICIES_KLV_ID_PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY = 0x2004, + GUC_CONTEXT_POLICIES_KLV_ID_SLPM_GT_FREQUENCY = 0x2005, + + GUC_CONTEXT_POLICIES_KLV_NUM_IDS = 5, +}; + +/** + * DOC: GuC VGT Policy KLVs + * + * `GuC KLV`_ keys available for use with PF2GUC_UPDATE_VGT_POLICY. + * + * _`GUC_KLV_VGT_POLICY_SCHED_IF_IDLE` : 0x8001 + * This config sets whether strict scheduling is enabled whereby any VF + * that doesn’t have work to submit is still allocated a fixed execution + * time-slice to ensure active VFs execution is always consitent even + * during other VF reprovisiong / rebooting events. Changing this KLV + * impacts all VFs and takes effect on the next VF-Switch event. + * + * :0: don't schedule idle (default) + * :1: schedule if idle + * + * _`GUC_KLV_VGT_POLICY_ADVERSE_SAMPLE_PERIOD` : 0x8002 + * This config sets the sample period for tracking adverse event counters. + * A sample period is the period in millisecs during which events are counted. + * This is applicable for all the VFs. + * + * :0: adverse events are not counted (default) + * :n: sample period in milliseconds + * + * _`GUC_KLV_VGT_POLICY_RESET_AFTER_VF_SWITCH` : 0x8D00 + * This enum is to reset utilized HW engine after VF Switch (i.e to clean + * up Stale HW register left behind by previous VF) + * + * :0: don't reset (default) + * :1: reset + */ + +#define GUC_KLV_VGT_POLICY_SCHED_IF_IDLE_KEY 0x8001 +#define GUC_KLV_VGT_POLICY_SCHED_IF_IDLE_LEN 1u + +#define GUC_KLV_VGT_POLICY_ADVERSE_SAMPLE_PERIOD_KEY 0x8002 +#define GUC_KLV_VGT_POLICY_ADVERSE_SAMPLE_PERIOD_LEN 1u + +#define GUC_KLV_VGT_POLICY_RESET_AFTER_VF_SWITCH_KEY 0x8D00 +#define GUC_KLV_VGT_POLICY_RESET_AFTER_VF_SWITCH_LEN 1u + +/** + * DOC: GuC VF Configuration KLVs + * + * `GuC KLV`_ keys available for use with PF2GUC_UPDATE_VF_CFG. + * + * _`GUC_KLV_VF_CFG_GGTT_START` : 0x0001 + * A 4K aligned start GTT address/offset assigned to VF. + * Value is 64 bits. + * + * _`GUC_KLV_VF_CFG_GGTT_SIZE` : 0x0002 + * A 4K aligned size of GGTT assigned to VF. + * Value is 64 bits. + * + * _`GUC_KLV_VF_CFG_LMEM_SIZE` : 0x0003 + * A 2M aligned size of local memory assigned to VF. + * Value is 64 bits. + * + * _`GUC_KLV_VF_CFG_NUM_CONTEXTS` : 0x0004 + * Refers to the number of contexts allocated to this VF. + * + * :0: no contexts (default) + * :1-65535: number of contexts (Gen12) + * + * _`GUC_KLV_VF_CFG_TILE_MASK` : 0x0005 + * For multi-tiled products, this field contains the bitwise-OR of tiles + * assigned to the VF. Bit-0-set means VF has access to Tile-0, + * Bit-31-set means VF has access to Tile-31, and etc. + * At least one tile will always be allocated. + * If all bits are zero, VF KMD should treat this as a fatal error. + * For, single-tile products this KLV config is ignored. + * + * _`GUC_KLV_VF_CFG_NUM_DOORBELLS` : 0x0006 + * Refers to the number of doorbells allocated to this VF. + * + * :0: no doorbells (default) + * :1-255: number of doorbells (Gen12) + * + * _`GUC_KLV_VF_CFG_EXEC_QUANTUM` : 0x8A01 + * This config sets the VFs-execution-quantum in milliseconds. + * GUC will attempt to obey the maximum values as much as HW is capable + * of and this will never be perfectly-exact (accumulated nano-second + * granularity) since the GPUs clock time runs off a different crystal + * from the CPUs clock. Changing this KLV on a VF that is currently + * running a context wont take effect until a new context is scheduled in. + * That said, when the PF is changing this value from 0xFFFFFFFF to + * something else, it might never take effect if the VF is running an + * inifinitely long compute or shader kernel. In such a scenario, the + * PF would need to trigger a VM PAUSE and then change the KLV to force + * it to take effect. Such cases might typically happen on a 1PF+1VF + * Virtualization config enabled for heavier workloads like AI/ML. + * + * :0: infinite exec quantum (default) + * + * _`GUC_KLV_VF_CFG_PREEMPT_TIMEOUT` : 0x8A02 + * This config sets the VF-preemption-timeout in microseconds. + * GUC will attempt to obey the minimum and maximum values as much as + * HW is capable and this will never be perfectly-exact (accumulated + * nano-second granularity) since the GPUs clock time runs off a + * different crystal from the CPUs clock. Changing this KLV on a VF + * that is currently running a context wont take effect until a new + * context is scheduled in. + * That said, when the PF is changing this value from 0xFFFFFFFF to + * something else, it might never take effect if the VF is running an + * inifinitely long compute or shader kernel. + * In this case, the PF would need to trigger a VM PAUSE and then change + * the KLV to force it to take effect. Such cases might typically happen + * on a 1PF+1VF Virtualization config enabled for heavier workloads like + * AI/ML. + * + * :0: no preemption timeout (default) + * + * _`GUC_KLV_VF_CFG_THRESHOLD_CAT_ERR` : 0x8A03 + * This config sets threshold for CAT errors caused by the VF. + * + * :0: adverse events or error will not be reported (default) + * :n: event occurrence count per sampling interval + * + * _`GUC_KLV_VF_CFG_THRESHOLD_ENGINE_RESET` : 0x8A04 + * This config sets threshold for engine reset caused by the VF. + * + * :0: adverse events or error will not be reported (default) + * :n: event occurrence count per sampling interval + * + * _`GUC_KLV_VF_CFG_THRESHOLD_PAGE_FAULT` : 0x8A05 + * This config sets threshold for page fault errors caused by the VF. + * + * :0: adverse events or error will not be reported (default) + * :n: event occurrence count per sampling interval + * + * _`GUC_KLV_VF_CFG_THRESHOLD_H2G_STORM` : 0x8A06 + * This config sets threshold for H2G interrupts triggered by the VF. + * + * :0: adverse events or error will not be reported (default) + * :n: time (us) per sampling interval + * + * _`GUC_KLV_VF_CFG_THRESHOLD_IRQ_STORM` : 0x8A07 + * This config sets threshold for GT interrupts triggered by the VF's + * workloads. + * + * :0: adverse events or error will not be reported (default) + * :n: time (us) per sampling interval + * + * _`GUC_KLV_VF_CFG_THRESHOLD_DOORBELL_STORM` : 0x8A08 + * This config sets threshold for doorbell's ring triggered by the VF. + * + * :0: adverse events or error will not be reported (default) + * :n: time (us) per sampling interval + * + * _`GUC_KLV_VF_CFG_BEGIN_DOORBELL_ID` : 0x8A0A + * Refers to the start index of doorbell assigned to this VF. + * + * :0: (default) + * :1-255: number of doorbells (Gen12) + * + * _`GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID` : 0x8A0B + * Refers to the start index in context array allocated to this VF’s use. + * + * :0: (default) + * :1-65535: number of contexts (Gen12) + */ + +#define GUC_KLV_VF_CFG_GGTT_START_KEY 0x0001 +#define GUC_KLV_VF_CFG_GGTT_START_LEN 2u + +#define GUC_KLV_VF_CFG_GGTT_SIZE_KEY 0x0002 +#define GUC_KLV_VF_CFG_GGTT_SIZE_LEN 2u + +#define GUC_KLV_VF_CFG_LMEM_SIZE_KEY 0x0003 +#define GUC_KLV_VF_CFG_LMEM_SIZE_LEN 2u + +#define GUC_KLV_VF_CFG_NUM_CONTEXTS_KEY 0x0004 +#define GUC_KLV_VF_CFG_NUM_CONTEXTS_LEN 1u + +#define GUC_KLV_VF_CFG_TILE_MASK_KEY 0x0005 +#define GUC_KLV_VF_CFG_TILE_MASK_LEN 1u + +#define GUC_KLV_VF_CFG_NUM_DOORBELLS_KEY 0x0006 +#define GUC_KLV_VF_CFG_NUM_DOORBELLS_LEN 1u + +#define GUC_KLV_VF_CFG_EXEC_QUANTUM_KEY 0x8a01 +#define GUC_KLV_VF_CFG_EXEC_QUANTUM_LEN 1u + +#define GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_KEY 0x8a02 +#define GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_LEN 1u + +#define GUC_KLV_VF_CFG_THRESHOLD_CAT_ERR_KEY 0x8a03 +#define GUC_KLV_VF_CFG_THRESHOLD_CAT_ERR_LEN 1u + +#define GUC_KLV_VF_CFG_THRESHOLD_ENGINE_RESET_KEY 0x8a04 +#define GUC_KLV_VF_CFG_THRESHOLD_ENGINE_RESET_LEN 1u + +#define GUC_KLV_VF_CFG_THRESHOLD_PAGE_FAULT_KEY 0x8a05 +#define GUC_KLV_VF_CFG_THRESHOLD_PAGE_FAULT_LEN 1u + +#define GUC_KLV_VF_CFG_THRESHOLD_H2G_STORM_KEY 0x8a06 +#define GUC_KLV_VF_CFG_THRESHOLD_H2G_STORM_LEN 1u + +#define GUC_KLV_VF_CFG_THRESHOLD_IRQ_STORM_KEY 0x8a07 +#define GUC_KLV_VF_CFG_THRESHOLD_IRQ_STORM_LEN 1u + +#define GUC_KLV_VF_CFG_THRESHOLD_DOORBELL_STORM_KEY 0x8a08 +#define GUC_KLV_VF_CFG_THRESHOLD_DOORBELL_STORM_LEN 1u + +#define GUC_KLV_VF_CFG_BEGIN_DOORBELL_ID_KEY 0x8a0a +#define GUC_KLV_VF_CFG_BEGIN_DOORBELL_ID_LEN 1u + +#define GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_KEY 0x8a0b +#define GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_LEN 1u + +#endif diff --git a/drivers/gpu/drm/xe/abi/guc_messages_abi.h b/drivers/gpu/drm/xe/abi/guc_messages_abi.h new file mode 100644 index 000000000000..3d199016cf88 --- /dev/null +++ b/drivers/gpu/drm/xe/abi/guc_messages_abi.h @@ -0,0 +1,234 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2014-2021 Intel Corporation + */ + +#ifndef _ABI_GUC_MESSAGES_ABI_H +#define _ABI_GUC_MESSAGES_ABI_H + +/** + * DOC: HXG Message + * + * All messages exchanged with GuC are defined using 32 bit dwords. + * First dword is treated as a message header. Remaining dwords are optional. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | | | | + * | 0 | 31 | **ORIGIN** - originator of the message | + * | | | - _`GUC_HXG_ORIGIN_HOST` = 0 | + * | | | - _`GUC_HXG_ORIGIN_GUC` = 1 | + * | | | | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | **TYPE** - message type | + * | | | - _`GUC_HXG_TYPE_REQUEST` = 0 | + * | | | - _`GUC_HXG_TYPE_EVENT` = 1 | + * | | | - _`GUC_HXG_TYPE_NO_RESPONSE_BUSY` = 3 | + * | | | - _`GUC_HXG_TYPE_NO_RESPONSE_RETRY` = 5 | + * | | | - _`GUC_HXG_TYPE_RESPONSE_FAILURE` = 6 | + * | | | - _`GUC_HXG_TYPE_RESPONSE_SUCCESS` = 7 | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | **AUX** - auxiliary data (depends on TYPE) | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | | + * +---+-------+ | + * |...| | **PAYLOAD** - optional payload (depends on TYPE) | + * +---+-------+ | + * | n | 31:0 | | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_HXG_MSG_MIN_LEN 1u +#define GUC_HXG_MSG_0_ORIGIN (0x1 << 31) +#define GUC_HXG_ORIGIN_HOST 0u +#define GUC_HXG_ORIGIN_GUC 1u +#define GUC_HXG_MSG_0_TYPE (0x7 << 28) +#define GUC_HXG_TYPE_REQUEST 0u +#define GUC_HXG_TYPE_EVENT 1u +#define GUC_HXG_TYPE_NO_RESPONSE_BUSY 3u +#define GUC_HXG_TYPE_NO_RESPONSE_RETRY 5u +#define GUC_HXG_TYPE_RESPONSE_FAILURE 6u +#define GUC_HXG_TYPE_RESPONSE_SUCCESS 7u +#define GUC_HXG_MSG_0_AUX (0xfffffff << 0) +#define GUC_HXG_MSG_n_PAYLOAD (0xffffffff << 0) + +/** + * DOC: HXG Request + * + * The `HXG Request`_ message should be used to initiate synchronous activity + * for which confirmation or return data is expected. + * + * The recipient of this message shall use `HXG Response`_, `HXG Failure`_ + * or `HXG Retry`_ message as a definite reply, and may use `HXG Busy`_ + * message as a intermediate reply. + * + * Format of @DATA0 and all @DATAn fields depends on the @ACTION code. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | **DATA0** - request data (depends on ACTION) | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | **ACTION** - requested action code | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | | + * +---+-------+ | + * |...| | **DATAn** - optional data (depends on ACTION) | + * +---+-------+ | + * | n | 31:0 | | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_HXG_REQUEST_MSG_MIN_LEN GUC_HXG_MSG_MIN_LEN +#define GUC_HXG_REQUEST_MSG_0_DATA0 (0xfff << 16) +#define GUC_HXG_REQUEST_MSG_0_ACTION (0xffff << 0) +#define GUC_HXG_REQUEST_MSG_n_DATAn GUC_HXG_MSG_n_PAYLOAD + +/** + * DOC: HXG Event + * + * The `HXG Event`_ message should be used to initiate asynchronous activity + * that does not involves immediate confirmation nor data. + * + * Format of @DATA0 and all @DATAn fields depends on the @ACTION code. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_EVENT_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | **DATA0** - event data (depends on ACTION) | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | **ACTION** - event action code | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | | + * +---+-------+ | + * |...| | **DATAn** - optional event data (depends on ACTION) | + * +---+-------+ | + * | n | 31:0 | | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_HXG_EVENT_MSG_MIN_LEN GUC_HXG_MSG_MIN_LEN +#define GUC_HXG_EVENT_MSG_0_DATA0 (0xfff << 16) +#define GUC_HXG_EVENT_MSG_0_ACTION (0xffff << 0) +#define GUC_HXG_EVENT_MSG_n_DATAn GUC_HXG_MSG_n_PAYLOAD + +/** + * DOC: HXG Busy + * + * The `HXG Busy`_ message may be used to acknowledge reception of the `HXG Request`_ + * message if the recipient expects that it processing will be longer than default + * timeout. + * + * The @COUNTER field may be used as a progress indicator. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_NO_RESPONSE_BUSY_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | **COUNTER** - progress indicator | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_HXG_BUSY_MSG_LEN GUC_HXG_MSG_MIN_LEN +#define GUC_HXG_BUSY_MSG_0_COUNTER GUC_HXG_MSG_0_AUX + +/** + * DOC: HXG Retry + * + * The `HXG Retry`_ message should be used by recipient to indicate that the + * `HXG Request`_ message was dropped and it should be resent again. + * + * The @REASON field may be used to provide additional information. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_NO_RESPONSE_RETRY_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | **REASON** - reason for retry | + * | | | - _`GUC_HXG_RETRY_REASON_UNSPECIFIED` = 0 | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_HXG_RETRY_MSG_LEN GUC_HXG_MSG_MIN_LEN +#define GUC_HXG_RETRY_MSG_0_REASON GUC_HXG_MSG_0_AUX +#define GUC_HXG_RETRY_REASON_UNSPECIFIED 0u + +/** + * DOC: HXG Failure + * + * The `HXG Failure`_ message shall be used as a reply to the `HXG Request`_ + * message that could not be processed due to an error. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_FAILURE_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | **HINT** - additional error hint | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | **ERROR** - error/result code | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_HXG_FAILURE_MSG_LEN GUC_HXG_MSG_MIN_LEN +#define GUC_HXG_FAILURE_MSG_0_HINT (0xfff << 16) +#define GUC_HXG_FAILURE_MSG_0_ERROR (0xffff << 0) + +/** + * DOC: HXG Response + * + * The `HXG Response`_ message shall be used as a reply to the `HXG Request`_ + * message that was successfully processed without an error. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | **DATA0** - data (depends on ACTION from `HXG Request`_) | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | | + * +---+-------+ | + * |...| | **DATAn** - data (depends on ACTION from `HXG Request`_) | + * +---+-------+ | + * | n | 31:0 | | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_HXG_RESPONSE_MSG_MIN_LEN GUC_HXG_MSG_MIN_LEN +#define GUC_HXG_RESPONSE_MSG_0_DATA0 GUC_HXG_MSG_0_AUX +#define GUC_HXG_RESPONSE_MSG_n_DATAn GUC_HXG_MSG_n_PAYLOAD + +/* deprecated */ +#define INTEL_GUC_MSG_TYPE_SHIFT 28 +#define INTEL_GUC_MSG_TYPE_MASK (0xF << INTEL_GUC_MSG_TYPE_SHIFT) +#define INTEL_GUC_MSG_DATA_SHIFT 16 +#define INTEL_GUC_MSG_DATA_MASK (0xFFF << INTEL_GUC_MSG_DATA_SHIFT) +#define INTEL_GUC_MSG_CODE_SHIFT 0 +#define INTEL_GUC_MSG_CODE_MASK (0xFFFF << INTEL_GUC_MSG_CODE_SHIFT) + +enum intel_guc_msg_type { + INTEL_GUC_MSG_TYPE_REQUEST = 0x0, + INTEL_GUC_MSG_TYPE_RESPONSE = 0xF, +}; + +#endif diff --git a/drivers/gpu/drm/xe/tests/Makefile b/drivers/gpu/drm/xe/tests/Makefile new file mode 100644 index 000000000000..47056b6459e3 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_DRM_XE_KUNIT_TEST) += xe_bo_test.o xe_dma_buf_test.o \ + xe_migrate_test.o diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c new file mode 100644 index 000000000000..87ac21cc8ca9 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -0,0 +1,303 @@ +// SPDX-License-Identifier: GPL-2.0 AND MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include + +#include "xe_bo_evict.h" +#include "xe_pci.h" + +static int ccs_test_migrate(struct xe_gt *gt, struct xe_bo *bo, + bool clear, u64 get_val, u64 assign_val, + struct kunit *test) +{ + struct dma_fence *fence; + struct ttm_tt *ttm; + struct page *page; + pgoff_t ccs_page; + long timeout; + u64 *cpu_map; + int ret; + u32 offset; + + /* Move bo to VRAM if not already there. */ + ret = xe_bo_validate(bo, NULL, false); + if (ret) { + KUNIT_FAIL(test, "Failed to validate bo.\n"); + return ret; + } + + /* Optionally clear bo *and* CCS data in VRAM. */ + if (clear) { + fence = xe_migrate_clear(gt->migrate, bo, bo->ttm.resource, 0); + if (IS_ERR(fence)) { + KUNIT_FAIL(test, "Failed to submit bo clear.\n"); + return PTR_ERR(fence); + } + dma_fence_put(fence); + } + + /* Evict to system. CCS data should be copied. */ + ret = xe_bo_evict(bo, true); + if (ret) { + KUNIT_FAIL(test, "Failed to evict bo.\n"); + return ret; + } + + /* Sync all migration blits */ + timeout = dma_resv_wait_timeout(bo->ttm.base.resv, + DMA_RESV_USAGE_KERNEL, + true, + 5 * HZ); + if (timeout <= 0) { + KUNIT_FAIL(test, "Failed to sync bo eviction.\n"); + return -ETIME; + } + + /* + * Bo with CCS data is now in system memory. Verify backing store + * and data integrity. Then assign for the next testing round while + * we still have a CPU map. + */ + ttm = bo->ttm.ttm; + if (!ttm || !ttm_tt_is_populated(ttm)) { + KUNIT_FAIL(test, "Bo was not in expected placement.\n"); + return -EINVAL; + } + + ccs_page = xe_bo_ccs_pages_start(bo) >> PAGE_SHIFT; + if (ccs_page >= ttm->num_pages) { + KUNIT_FAIL(test, "No TTM CCS pages present.\n"); + return -EINVAL; + } + + page = ttm->pages[ccs_page]; + cpu_map = kmap_local_page(page); + + /* Check first CCS value */ + if (cpu_map[0] != get_val) { + KUNIT_FAIL(test, + "Expected CCS readout 0x%016llx, got 0x%016llx.\n", + (unsigned long long)get_val, + (unsigned long long)cpu_map[0]); + ret = -EINVAL; + } + + /* Check last CCS value, or at least last value in page. */ + offset = xe_device_ccs_bytes(gt->xe, bo->size); + offset = min_t(u32, offset, PAGE_SIZE) / sizeof(u64) - 1; + if (cpu_map[offset] != get_val) { + KUNIT_FAIL(test, + "Expected CCS readout 0x%016llx, got 0x%016llx.\n", + (unsigned long long)get_val, + (unsigned long long)cpu_map[offset]); + ret = -EINVAL; + } + + cpu_map[0] = assign_val; + cpu_map[offset] = assign_val; + kunmap_local(cpu_map); + + return ret; +} + +static void ccs_test_run_gt(struct xe_device *xe, struct xe_gt *gt, + struct kunit *test) +{ + struct xe_bo *bo; + u32 vram_bit; + int ret; + + /* TODO: Sanity check */ + vram_bit = XE_BO_CREATE_VRAM0_BIT << gt->info.vram_id; + kunit_info(test, "Testing gt id %u vram id %u\n", gt->info.id, + gt->info.vram_id); + + bo = xe_bo_create_locked(xe, NULL, NULL, SZ_1M, ttm_bo_type_device, + vram_bit); + if (IS_ERR(bo)) { + KUNIT_FAIL(test, "Failed to create bo.\n"); + return; + } + + kunit_info(test, "Verifying that CCS data is cleared on creation.\n"); + ret = ccs_test_migrate(gt, bo, false, 0ULL, 0xdeadbeefdeadbeefULL, + test); + if (ret) + goto out_unlock; + + kunit_info(test, "Verifying that CCS data survives migration.\n"); + ret = ccs_test_migrate(gt, bo, false, 0xdeadbeefdeadbeefULL, + 0xdeadbeefdeadbeefULL, test); + if (ret) + goto out_unlock; + + kunit_info(test, "Verifying that CCS data can be properly cleared.\n"); + ret = ccs_test_migrate(gt, bo, true, 0ULL, 0ULL, test); + +out_unlock: + xe_bo_unlock_no_vm(bo); + xe_bo_put(bo); +} + +static int ccs_test_run_device(struct xe_device *xe) +{ + struct kunit *test = xe_cur_kunit(); + struct xe_gt *gt; + int id; + + if (!xe_device_has_flat_ccs(xe)) { + kunit_info(test, "Skipping non-flat-ccs device.\n"); + return 0; + } + + for_each_gt(gt, xe, id) + ccs_test_run_gt(xe, gt, test); + + return 0; +} + +void xe_ccs_migrate_kunit(struct kunit *test) +{ + xe_call_for_each_device(ccs_test_run_device); +} +EXPORT_SYMBOL(xe_ccs_migrate_kunit); + +static int evict_test_run_gt(struct xe_device *xe, struct xe_gt *gt, struct kunit *test) +{ + struct xe_bo *bo, *external; + unsigned int bo_flags = XE_BO_CREATE_USER_BIT | + XE_BO_CREATE_VRAM_IF_DGFX(gt); + struct xe_vm *vm = xe_migrate_get_vm(xe->gt[0].migrate); + struct ww_acquire_ctx ww; + int err, i; + + kunit_info(test, "Testing device %s gt id %u vram id %u\n", + dev_name(xe->drm.dev), gt->info.id, gt->info.vram_id); + + for (i = 0; i < 2; ++i) { + xe_vm_lock(vm, &ww, 0, false); + bo = xe_bo_create(xe, NULL, vm, 0x10000, ttm_bo_type_device, + bo_flags); + xe_vm_unlock(vm, &ww); + if (IS_ERR(bo)) { + KUNIT_FAIL(test, "bo create err=%pe\n", bo); + break; + } + + external = xe_bo_create(xe, NULL, NULL, 0x10000, + ttm_bo_type_device, bo_flags); + if (IS_ERR(external)) { + KUNIT_FAIL(test, "external bo create err=%pe\n", external); + goto cleanup_bo; + } + + xe_bo_lock(external, &ww, 0, false); + err = xe_bo_pin_external(external); + xe_bo_unlock(external, &ww); + if (err) { + KUNIT_FAIL(test, "external bo pin err=%pe\n", + ERR_PTR(err)); + goto cleanup_external; + } + + err = xe_bo_evict_all(xe); + if (err) { + KUNIT_FAIL(test, "evict err=%pe\n", ERR_PTR(err)); + goto cleanup_all; + } + + err = xe_bo_restore_kernel(xe); + if (err) { + KUNIT_FAIL(test, "restore kernel err=%pe\n", + ERR_PTR(err)); + goto cleanup_all; + } + + err = xe_bo_restore_user(xe); + if (err) { + KUNIT_FAIL(test, "restore user err=%pe\n", ERR_PTR(err)); + goto cleanup_all; + } + + if (!xe_bo_is_vram(external)) { + KUNIT_FAIL(test, "external bo is not vram\n"); + err = -EPROTO; + goto cleanup_all; + } + + if (xe_bo_is_vram(bo)) { + KUNIT_FAIL(test, "bo is vram\n"); + err = -EPROTO; + goto cleanup_all; + } + + if (i) { + down_read(&vm->lock); + xe_vm_lock(vm, &ww, 0, false); + err = xe_bo_validate(bo, bo->vm, false); + xe_vm_unlock(vm, &ww); + up_read(&vm->lock); + if (err) { + KUNIT_FAIL(test, "bo valid err=%pe\n", + ERR_PTR(err)); + goto cleanup_all; + } + xe_bo_lock(external, &ww, 0, false); + err = xe_bo_validate(external, NULL, false); + xe_bo_unlock(external, &ww); + if (err) { + KUNIT_FAIL(test, "external bo valid err=%pe\n", + ERR_PTR(err)); + goto cleanup_all; + } + } + + xe_bo_lock(external, &ww, 0, false); + xe_bo_unpin_external(external); + xe_bo_unlock(external, &ww); + + xe_bo_put(external); + xe_bo_put(bo); + continue; + +cleanup_all: + xe_bo_lock(external, &ww, 0, false); + xe_bo_unpin_external(external); + xe_bo_unlock(external, &ww); +cleanup_external: + xe_bo_put(external); +cleanup_bo: + xe_bo_put(bo); + break; + } + + xe_vm_put(vm); + + return 0; +} + +static int evict_test_run_device(struct xe_device *xe) +{ + struct kunit *test = xe_cur_kunit(); + struct xe_gt *gt; + int id; + + if (!IS_DGFX(xe)) { + kunit_info(test, "Skipping non-discrete device %s.\n", + dev_name(xe->drm.dev)); + return 0; + } + + for_each_gt(gt, xe, id) + evict_test_run_gt(xe, gt, test); + + return 0; +} + +void xe_bo_evict_kunit(struct kunit *test) +{ + xe_call_for_each_device(evict_test_run_device); +} +EXPORT_SYMBOL(xe_bo_evict_kunit); diff --git a/drivers/gpu/drm/xe/tests/xe_bo_test.c b/drivers/gpu/drm/xe/tests/xe_bo_test.c new file mode 100644 index 000000000000..c8fa29b0b3b2 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_bo_test.c @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright © 2022 Intel Corporation + */ + +#include + +void xe_ccs_migrate_kunit(struct kunit *test); +void xe_bo_evict_kunit(struct kunit *test); + +static struct kunit_case xe_bo_tests[] = { + KUNIT_CASE(xe_ccs_migrate_kunit), + KUNIT_CASE(xe_bo_evict_kunit), + {} +}; + +static struct kunit_suite xe_bo_test_suite = { + .name = "xe_bo", + .test_cases = xe_bo_tests, +}; + +kunit_test_suite(xe_bo_test_suite); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c new file mode 100644 index 000000000000..615d22e3f731 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c @@ -0,0 +1,259 @@ +// SPDX-License-Identifier: GPL-2.0 AND MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include + +#include "xe_pci.h" + +static bool p2p_enabled(struct dma_buf_test_params *params) +{ + return IS_ENABLED(CONFIG_PCI_P2PDMA) && params->attach_ops && + params->attach_ops->allow_peer2peer; +} + +static bool is_dynamic(struct dma_buf_test_params *params) +{ + return IS_ENABLED(CONFIG_DMABUF_MOVE_NOTIFY) && params->attach_ops && + params->attach_ops->move_notify; +} + +static void check_residency(struct kunit *test, struct xe_bo *exported, + struct xe_bo *imported, struct dma_buf *dmabuf) +{ + struct dma_buf_test_params *params = to_dma_buf_test_params(test->priv); + u32 mem_type; + int ret; + + xe_bo_assert_held(exported); + xe_bo_assert_held(imported); + + mem_type = XE_PL_VRAM0; + if (!(params->mem_mask & XE_BO_CREATE_VRAM0_BIT)) + /* No VRAM allowed */ + mem_type = XE_PL_TT; + else if (params->force_different_devices && !p2p_enabled(params)) + /* No P2P */ + mem_type = XE_PL_TT; + else if (params->force_different_devices && !is_dynamic(params) && + (params->mem_mask & XE_BO_CREATE_SYSTEM_BIT)) + /* Pin migrated to TT */ + mem_type = XE_PL_TT; + + if (!xe_bo_is_mem_type(exported, mem_type)) { + KUNIT_FAIL(test, "Exported bo was not in expected memory type.\n"); + return; + } + + if (xe_bo_is_pinned(exported)) + return; + + /* + * Evict exporter. Note that the gem object dma_buf member isn't + * set from xe_gem_prime_export(), and it's needed for the move_notify() + * functionality, so hack that up here. Evicting the exported bo will + * evict also the imported bo through the move_notify() functionality if + * importer is on a different device. If they're on the same device, + * the exporter and the importer should be the same bo. + */ + swap(exported->ttm.base.dma_buf, dmabuf); + ret = xe_bo_evict(exported, true); + swap(exported->ttm.base.dma_buf, dmabuf); + if (ret) { + if (ret != -EINTR && ret != -ERESTARTSYS) + KUNIT_FAIL(test, "Evicting exporter failed with err=%d.\n", + ret); + return; + } + + /* Verify that also importer has been evicted to SYSTEM */ + if (!xe_bo_is_mem_type(imported, XE_PL_SYSTEM)) { + KUNIT_FAIL(test, "Importer wasn't properly evicted.\n"); + return; + } + + /* Re-validate the importer. This should move also exporter in. */ + ret = xe_bo_validate(imported, NULL, false); + if (ret) { + if (ret != -EINTR && ret != -ERESTARTSYS) + KUNIT_FAIL(test, "Validating importer failed with err=%d.\n", + ret); + return; + } + + /* + * If on different devices, the exporter is kept in system if + * possible, saving a migration step as the transfer is just + * likely as fast from system memory. + */ + if (params->force_different_devices && + params->mem_mask & XE_BO_CREATE_SYSTEM_BIT) + KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, XE_PL_TT)); + else + KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, mem_type)); + + if (params->force_different_devices) + KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(imported, XE_PL_TT)); + else + KUNIT_EXPECT_TRUE(test, exported == imported); +} + +static void xe_test_dmabuf_import_same_driver(struct xe_device *xe) +{ + struct kunit *test = xe_cur_kunit(); + struct dma_buf_test_params *params = to_dma_buf_test_params(test->priv); + struct drm_gem_object *import; + struct dma_buf *dmabuf; + struct xe_bo *bo; + + /* No VRAM on this device? */ + if (!ttm_manager_type(&xe->ttm, XE_PL_VRAM0) && + (params->mem_mask & XE_BO_CREATE_VRAM0_BIT)) + return; + + kunit_info(test, "running %s\n", __func__); + bo = xe_bo_create(xe, NULL, NULL, PAGE_SIZE, ttm_bo_type_device, + XE_BO_CREATE_USER_BIT | params->mem_mask); + if (IS_ERR(bo)) { + KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", + PTR_ERR(bo)); + return; + } + + dmabuf = xe_gem_prime_export(&bo->ttm.base, 0); + if (IS_ERR(dmabuf)) { + KUNIT_FAIL(test, "xe_gem_prime_export() failed with err=%ld\n", + PTR_ERR(dmabuf)); + goto out; + } + + import = xe_gem_prime_import(&xe->drm, dmabuf); + if (!IS_ERR(import)) { + struct xe_bo *import_bo = gem_to_xe_bo(import); + + /* + * Did import succeed when it shouldn't due to lack of p2p support? + */ + if (params->force_different_devices && + !p2p_enabled(params) && + !(params->mem_mask & XE_BO_CREATE_SYSTEM_BIT)) { + KUNIT_FAIL(test, + "xe_gem_prime_import() succeeded when it shouldn't have\n"); + } else { + int err; + + /* Is everything where we expect it to be? */ + xe_bo_lock_no_vm(import_bo, NULL); + err = xe_bo_validate(import_bo, NULL, false); + if (err && err != -EINTR && err != -ERESTARTSYS) + KUNIT_FAIL(test, + "xe_bo_validate() failed with err=%d\n", err); + + check_residency(test, bo, import_bo, dmabuf); + xe_bo_unlock_no_vm(import_bo); + } + drm_gem_object_put(import); + } else if (PTR_ERR(import) != -EOPNOTSUPP) { + /* Unexpected error code. */ + KUNIT_FAIL(test, + "xe_gem_prime_import failed with the wrong err=%ld\n", + PTR_ERR(import)); + } else if (!params->force_different_devices || + p2p_enabled(params) || + (params->mem_mask & XE_BO_CREATE_SYSTEM_BIT)) { + /* Shouldn't fail if we can reuse same bo, use p2p or use system */ + KUNIT_FAIL(test, "dynamic p2p attachment failed with err=%ld\n", + PTR_ERR(import)); + } + dma_buf_put(dmabuf); +out: + drm_gem_object_put(&bo->ttm.base); +} + +static const struct dma_buf_attach_ops nop2p_attach_ops = { + .allow_peer2peer = false, + .move_notify = xe_dma_buf_move_notify +}; + +/* + * We test the implementation with bos of different residency and with + * importers with different capabilities; some lacking p2p support and some + * lacking dynamic capabilities (attach_ops == NULL). We also fake + * different devices avoiding the import shortcut that just reuses the same + * gem object. + */ +static const struct dma_buf_test_params test_params[] = { + {.mem_mask = XE_BO_CREATE_VRAM0_BIT, + .attach_ops = &xe_dma_buf_attach_ops}, + {.mem_mask = XE_BO_CREATE_VRAM0_BIT, + .attach_ops = &xe_dma_buf_attach_ops, + .force_different_devices = true}, + + {.mem_mask = XE_BO_CREATE_VRAM0_BIT, + .attach_ops = &nop2p_attach_ops}, + {.mem_mask = XE_BO_CREATE_VRAM0_BIT, + .attach_ops = &nop2p_attach_ops, + .force_different_devices = true}, + + {.mem_mask = XE_BO_CREATE_VRAM0_BIT}, + {.mem_mask = XE_BO_CREATE_VRAM0_BIT, + .force_different_devices = true}, + + {.mem_mask = XE_BO_CREATE_SYSTEM_BIT, + .attach_ops = &xe_dma_buf_attach_ops}, + {.mem_mask = XE_BO_CREATE_SYSTEM_BIT, + .attach_ops = &xe_dma_buf_attach_ops, + .force_different_devices = true}, + + {.mem_mask = XE_BO_CREATE_SYSTEM_BIT, + .attach_ops = &nop2p_attach_ops}, + {.mem_mask = XE_BO_CREATE_SYSTEM_BIT, + .attach_ops = &nop2p_attach_ops, + .force_different_devices = true}, + + {.mem_mask = XE_BO_CREATE_SYSTEM_BIT}, + {.mem_mask = XE_BO_CREATE_SYSTEM_BIT, + .force_different_devices = true}, + + {.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT, + .attach_ops = &xe_dma_buf_attach_ops}, + {.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT, + .attach_ops = &xe_dma_buf_attach_ops, + .force_different_devices = true}, + + {.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT, + .attach_ops = &nop2p_attach_ops}, + {.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT, + .attach_ops = &nop2p_attach_ops, + .force_different_devices = true}, + + {.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT}, + {.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT, + .force_different_devices = true}, + + {} +}; + +static int dma_buf_run_device(struct xe_device *xe) +{ + const struct dma_buf_test_params *params; + struct kunit *test = xe_cur_kunit(); + + for (params = test_params; params->mem_mask; ++params) { + struct dma_buf_test_params p = *params; + + p.base.id = XE_TEST_LIVE_DMA_BUF; + test->priv = &p; + xe_test_dmabuf_import_same_driver(xe); + } + + /* A non-zero return would halt iteration over driver devices */ + return 0; +} + +void xe_dma_buf_kunit(struct kunit *test) +{ + xe_call_for_each_device(dma_buf_run_device); +} +EXPORT_SYMBOL(xe_dma_buf_kunit); diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c new file mode 100644 index 000000000000..7bb292da1193 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright © 2022 Intel Corporation + */ + +#include + +void xe_dma_buf_kunit(struct kunit *test); + +static struct kunit_case xe_dma_buf_tests[] = { + KUNIT_CASE(xe_dma_buf_kunit), + {} +}; + +static struct kunit_suite xe_dma_buf_test_suite = { + .name = "xe_dma_buf", + .test_cases = xe_dma_buf_tests, +}; + +kunit_test_suite(xe_dma_buf_test_suite); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c new file mode 100644 index 000000000000..0f3b819f0a34 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -0,0 +1,378 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020-2022 Intel Corporation + */ + +#include + +#include "xe_pci.h" + +static bool sanity_fence_failed(struct xe_device *xe, struct dma_fence *fence, + const char *str, struct kunit *test) +{ + long ret; + + if (IS_ERR(fence)) { + KUNIT_FAIL(test, "Failed to create fence for %s: %li\n", str, + PTR_ERR(fence)); + return true; + } + if (!fence) + return true; + + ret = dma_fence_wait_timeout(fence, false, 5 * HZ); + if (ret <= 0) { + KUNIT_FAIL(test, "Fence timed out for %s: %li\n", str, ret); + return true; + } + + return false; +} + +static int run_sanity_job(struct xe_migrate *m, struct xe_device *xe, + struct xe_bb *bb, u32 second_idx, const char *str, + struct kunit *test) +{ + struct xe_sched_job *job = xe_bb_create_migration_job(m->eng, bb, + m->batch_base_ofs, + second_idx); + struct dma_fence *fence; + + if (IS_ERR(job)) { + KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n", + PTR_ERR(job)); + return PTR_ERR(job); + } + + xe_sched_job_arm(job); + fence = dma_fence_get(&job->drm.s_fence->finished); + xe_sched_job_push(job); + + if (sanity_fence_failed(xe, fence, str, test)) + return -ETIMEDOUT; + + dma_fence_put(fence); + kunit_info(test, "%s: Job completed\n", str); + return 0; +} + +static void +sanity_populate_cb(struct xe_migrate_pt_update *pt_update, + struct xe_gt *gt, struct iosys_map *map, void *dst, + u32 qword_ofs, u32 num_qwords, + const struct xe_vm_pgtable_update *update) +{ + int i; + u64 *ptr = dst; + + for (i = 0; i < num_qwords; i++) + ptr[i] = (qword_ofs + i - update->ofs) * 0x1111111111111111ULL; +} + +static const struct xe_migrate_pt_update_ops sanity_ops = { + .populate = sanity_populate_cb, +}; + +#define check(_retval, _expected, str, _test) \ + do { if ((_retval) != (_expected)) { \ + KUNIT_FAIL(_test, "Sanity check failed: " str \ + " expected %llx, got %llx\n", \ + (u64)(_expected), (u64)(_retval)); \ + } } while (0) + +static void test_copy(struct xe_migrate *m, struct xe_bo *bo, + struct kunit *test) +{ + struct xe_device *xe = gt_to_xe(m->gt); + u64 retval, expected = 0xc0c0c0c0c0c0c0c0ULL; + bool big = bo->size >= SZ_2M; + struct dma_fence *fence; + const char *str = big ? "Copying big bo" : "Copying small bo"; + int err; + + struct xe_bo *sysmem = xe_bo_create_locked(xe, m->gt, NULL, + bo->size, + ttm_bo_type_kernel, + XE_BO_CREATE_SYSTEM_BIT); + if (IS_ERR(sysmem)) { + KUNIT_FAIL(test, "Failed to allocate sysmem bo for %s: %li\n", + str, PTR_ERR(sysmem)); + return; + } + + err = xe_bo_validate(sysmem, NULL, false); + if (err) { + KUNIT_FAIL(test, "Failed to validate system bo for %s: %li\n", + str, err); + goto out_unlock; + } + + err = xe_bo_vmap(sysmem); + if (err) { + KUNIT_FAIL(test, "Failed to vmap system bo for %s: %li\n", + str, err); + goto out_unlock; + } + + xe_map_memset(xe, &sysmem->vmap, 0, 0xd0, sysmem->size); + fence = xe_migrate_clear(m, sysmem, sysmem->ttm.resource, 0xc0c0c0c0); + if (!sanity_fence_failed(xe, fence, big ? "Clearing sysmem big bo" : + "Clearing sysmem small bo", test)) { + retval = xe_map_rd(xe, &sysmem->vmap, 0, u64); + check(retval, expected, "sysmem first offset should be cleared", + test); + retval = xe_map_rd(xe, &sysmem->vmap, sysmem->size - 8, u64); + check(retval, expected, "sysmem last offset should be cleared", + test); + } + dma_fence_put(fence); + + /* Try to copy 0xc0 from sysmem to lmem with 2MB or 64KiB/4KiB pages */ + xe_map_memset(xe, &sysmem->vmap, 0, 0xc0, sysmem->size); + xe_map_memset(xe, &bo->vmap, 0, 0xd0, bo->size); + + fence = xe_migrate_copy(m, sysmem, sysmem->ttm.resource, + bo->ttm.resource); + if (!sanity_fence_failed(xe, fence, big ? "Copying big bo sysmem -> vram" : + "Copying small bo sysmem -> vram", test)) { + retval = xe_map_rd(xe, &bo->vmap, 0, u64); + check(retval, expected, + "sysmem -> vram bo first offset should be copied", test); + retval = xe_map_rd(xe, &bo->vmap, bo->size - 8, u64); + check(retval, expected, + "sysmem -> vram bo offset should be copied", test); + } + dma_fence_put(fence); + + /* And other way around.. slightly hacky.. */ + xe_map_memset(xe, &sysmem->vmap, 0, 0xd0, sysmem->size); + xe_map_memset(xe, &bo->vmap, 0, 0xc0, bo->size); + + fence = xe_migrate_copy(m, sysmem, bo->ttm.resource, + sysmem->ttm.resource); + if (!sanity_fence_failed(xe, fence, big ? "Copying big bo vram -> sysmem" : + "Copying small bo vram -> sysmem", test)) { + retval = xe_map_rd(xe, &sysmem->vmap, 0, u64); + check(retval, expected, + "vram -> sysmem bo first offset should be copied", test); + retval = xe_map_rd(xe, &sysmem->vmap, bo->size - 8, u64); + check(retval, expected, + "vram -> sysmem bo last offset should be copied", test); + } + dma_fence_put(fence); + + xe_bo_vunmap(sysmem); +out_unlock: + xe_bo_unlock_no_vm(sysmem); + xe_bo_put(sysmem); +} + +static void test_pt_update(struct xe_migrate *m, struct xe_bo *pt, + struct kunit *test) +{ + struct xe_device *xe = gt_to_xe(m->gt); + struct dma_fence *fence; + u64 retval, expected; + int i; + + struct xe_vm_pgtable_update update = { + .ofs = 1, + .qwords = 0x10, + .pt_bo = pt, + }; + struct xe_migrate_pt_update pt_update = { + .ops = &sanity_ops, + }; + + /* Test xe_migrate_update_pgtables() updates the pagetable as expected */ + expected = 0xf0f0f0f0f0f0f0f0ULL; + xe_map_memset(xe, &pt->vmap, 0, (u8)expected, pt->size); + + fence = xe_migrate_update_pgtables(m, NULL, NULL, m->eng, &update, 1, + NULL, 0, &pt_update); + if (sanity_fence_failed(xe, fence, "Migration pagetable update", test)) + return; + + dma_fence_put(fence); + retval = xe_map_rd(xe, &pt->vmap, 0, u64); + check(retval, expected, "PTE[0] must stay untouched", test); + + for (i = 0; i < update.qwords; i++) { + retval = xe_map_rd(xe, &pt->vmap, (update.ofs + i) * 8, u64); + check(retval, i * 0x1111111111111111ULL, "PTE update", test); + } + + retval = xe_map_rd(xe, &pt->vmap, 8 * (update.ofs + update.qwords), + u64); + check(retval, expected, "PTE[0x11] must stay untouched", test); +} + +static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) +{ + struct xe_gt *gt = m->gt; + struct xe_device *xe = gt_to_xe(gt); + struct xe_bo *pt, *bo = m->pt_bo, *big, *tiny; + struct xe_res_cursor src_it; + struct dma_fence *fence; + u64 retval, expected; + struct xe_bb *bb; + int err; + u8 id = gt->info.id; + + err = xe_bo_vmap(bo); + if (err) { + KUNIT_FAIL(test, "Failed to vmap our pagetables: %li\n", + PTR_ERR(bo)); + return; + } + + big = xe_bo_create_pin_map(xe, m->gt, m->eng->vm, SZ_4M, + ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(m->gt) | + XE_BO_CREATE_PINNED_BIT); + if (IS_ERR(big)) { + KUNIT_FAIL(test, "Failed to allocate bo: %li\n", PTR_ERR(big)); + goto vunmap; + } + + pt = xe_bo_create_pin_map(xe, m->gt, m->eng->vm, GEN8_PAGE_SIZE, + ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(m->gt) | + XE_BO_CREATE_PINNED_BIT); + if (IS_ERR(pt)) { + KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n", + PTR_ERR(pt)); + goto free_big; + } + + tiny = xe_bo_create_pin_map(xe, m->gt, m->eng->vm, + 2 * SZ_4K, + ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(m->gt) | + XE_BO_CREATE_PINNED_BIT); + if (IS_ERR(tiny)) { + KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n", + PTR_ERR(pt)); + goto free_pt; + } + + bb = xe_bb_new(m->gt, 32, xe->info.supports_usm); + if (IS_ERR(bb)) { + KUNIT_FAIL(test, "Failed to create batchbuffer: %li\n", + PTR_ERR(bb)); + goto free_tiny; + } + + kunit_info(test, "Starting tests, top level PT addr: %llx, special pagetable base addr: %llx\n", + xe_bo_main_addr(m->eng->vm->pt_root[id]->bo, GEN8_PAGE_SIZE), + xe_bo_main_addr(m->pt_bo, GEN8_PAGE_SIZE)); + + /* First part of the test, are we updating our pagetable bo with a new entry? */ + xe_map_wr(xe, &bo->vmap, GEN8_PAGE_SIZE * (NUM_KERNEL_PDE - 1), u64, 0xdeaddeadbeefbeef); + expected = gen8_pte_encode(NULL, pt, 0, XE_CACHE_WB, 0, 0); + if (m->eng->vm->flags & XE_VM_FLAGS_64K) + expected |= GEN12_PTE_PS64; + xe_res_first(pt->ttm.resource, 0, pt->size, &src_it); + emit_pte(m, bb, NUM_KERNEL_PDE - 1, xe_bo_is_vram(pt), + &src_it, GEN8_PAGE_SIZE, pt); + run_sanity_job(m, xe, bb, bb->len, "Writing PTE for our fake PT", test); + + retval = xe_map_rd(xe, &bo->vmap, GEN8_PAGE_SIZE * (NUM_KERNEL_PDE - 1), + u64); + check(retval, expected, "PTE entry write", test); + + /* Now try to write data to our newly mapped 'pagetable', see if it succeeds */ + bb->len = 0; + bb->cs[bb->len++] = MI_BATCH_BUFFER_END; + xe_map_wr(xe, &pt->vmap, 0, u32, 0xdeaddead); + expected = 0x12345678U; + + emit_clear(m->gt, bb, xe_migrate_vm_addr(NUM_KERNEL_PDE - 1, 0), 4, 4, + expected, IS_DGFX(xe)); + run_sanity_job(m, xe, bb, 1, "Writing to our newly mapped pagetable", + test); + + retval = xe_map_rd(xe, &pt->vmap, 0, u32); + check(retval, expected, "Write to PT after adding PTE", test); + + /* Sanity checks passed, try the full ones! */ + + /* Clear a small bo */ + kunit_info(test, "Clearing small buffer object\n"); + xe_map_memset(xe, &tiny->vmap, 0, 0x22, tiny->size); + expected = 0x224488ff; + fence = xe_migrate_clear(m, tiny, tiny->ttm.resource, expected); + if (sanity_fence_failed(xe, fence, "Clearing small bo", test)) + goto out; + + dma_fence_put(fence); + retval = xe_map_rd(xe, &tiny->vmap, 0, u32); + check(retval, expected, "Command clear small first value", test); + retval = xe_map_rd(xe, &tiny->vmap, tiny->size - 4, u32); + check(retval, expected, "Command clear small last value", test); + + if (IS_DGFX(xe)) { + kunit_info(test, "Copying small buffer object to system\n"); + test_copy(m, tiny, test); + } + + /* Clear a big bo with a fixed value */ + kunit_info(test, "Clearing big buffer object\n"); + xe_map_memset(xe, &big->vmap, 0, 0x11, big->size); + expected = 0x11223344U; + fence = xe_migrate_clear(m, big, big->ttm.resource, expected); + if (sanity_fence_failed(xe, fence, "Clearing big bo", test)) + goto out; + + dma_fence_put(fence); + retval = xe_map_rd(xe, &big->vmap, 0, u32); + check(retval, expected, "Command clear big first value", test); + retval = xe_map_rd(xe, &big->vmap, big->size - 4, u32); + check(retval, expected, "Command clear big last value", test); + + if (IS_DGFX(xe)) { + kunit_info(test, "Copying big buffer object to system\n"); + test_copy(m, big, test); + } + + test_pt_update(m, pt, test); + +out: + xe_bb_free(bb, NULL); +free_tiny: + xe_bo_unpin(tiny); + xe_bo_put(tiny); +free_pt: + xe_bo_unpin(pt); + xe_bo_put(pt); +free_big: + xe_bo_unpin(big); + xe_bo_put(big); +vunmap: + xe_bo_vunmap(m->pt_bo); +} + +static int migrate_test_run_device(struct xe_device *xe) +{ + struct kunit *test = xe_cur_kunit(); + struct xe_gt *gt; + int id; + + for_each_gt(gt, xe, id) { + struct xe_migrate *m = gt->migrate; + struct ww_acquire_ctx ww; + + kunit_info(test, "Testing gt id %d.\n", id); + xe_vm_lock(m->eng->vm, &ww, 0, true); + xe_migrate_sanity_test(m, test); + xe_vm_unlock(m->eng->vm, &ww); + } + + return 0; +} + +void xe_migrate_sanity_kunit(struct kunit *test) +{ + xe_call_for_each_device(migrate_test_run_device); +} +EXPORT_SYMBOL(xe_migrate_sanity_kunit); diff --git a/drivers/gpu/drm/xe/tests/xe_migrate_test.c b/drivers/gpu/drm/xe/tests/xe_migrate_test.c new file mode 100644 index 000000000000..ad779e2bd071 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_migrate_test.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright © 2022 Intel Corporation + */ + +#include + +void xe_migrate_sanity_kunit(struct kunit *test); + +static struct kunit_case xe_migrate_tests[] = { + KUNIT_CASE(xe_migrate_sanity_kunit), + {} +}; + +static struct kunit_suite xe_migrate_test_suite = { + .name = "xe_migrate", + .test_cases = xe_migrate_tests, +}; + +kunit_test_suite(xe_migrate_test_suite); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/xe/tests/xe_test.h b/drivers/gpu/drm/xe/tests/xe_test.h new file mode 100644 index 000000000000..1ec502b5acf3 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_test.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0 AND MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef __XE_TEST_H__ +#define __XE_TEST_H__ + +#include + +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) +#include +#include + +/* + * Each test that provides a kunit private test structure, place a test id + * here and point the kunit->priv to an embedded struct xe_test_priv. + */ +enum xe_test_priv_id { + XE_TEST_LIVE_DMA_BUF, +}; + +/** + * struct xe_test_priv - Base class for test private info + * @id: enum xe_test_priv_id to identify the subclass. + */ +struct xe_test_priv { + enum xe_test_priv_id id; +}; + +#define XE_TEST_DECLARE(x) x +#define XE_TEST_ONLY(x) unlikely(x) +#define XE_TEST_EXPORT +#define xe_cur_kunit() current->kunit_test + +/** + * xe_cur_kunit_priv - Obtain the struct xe_test_priv pointed to by + * current->kunit->priv if it exists and is embedded in the expected subclass. + * @id: Id of the expected subclass. + * + * Return: NULL if the process is not a kunit test, and NULL if the + * current kunit->priv pointer is not pointing to an object of the expected + * subclass. A pointer to the embedded struct xe_test_priv otherwise. + */ +static inline struct xe_test_priv * +xe_cur_kunit_priv(enum xe_test_priv_id id) +{ + struct xe_test_priv *priv; + + if (!xe_cur_kunit()) + return NULL; + + priv = xe_cur_kunit()->priv; + return priv->id == id ? priv : NULL; +} + +#else /* if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) */ + +#define XE_TEST_DECLARE(x) +#define XE_TEST_ONLY(x) 0 +#define XE_TEST_EXPORT static +#define xe_cur_kunit() NULL +#define xe_cur_kunit_priv(_id) NULL + +#endif +#endif diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c new file mode 100644 index 000000000000..8b9209571fd0 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_bb.c @@ -0,0 +1,97 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_bb.h" +#include "xe_sa.h" +#include "xe_device.h" +#include "xe_engine_types.h" +#include "xe_hw_fence.h" +#include "xe_sched_job.h" +#include "xe_vm_types.h" + +#include "gt/intel_gpu_commands.h" + +struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm) +{ + struct xe_bb *bb = kmalloc(sizeof(*bb), GFP_KERNEL); + int err; + + if (!bb) + return ERR_PTR(-ENOMEM); + + bb->bo = xe_sa_bo_new(!usm ? >->kernel_bb_pool : + >->usm.bb_pool, 4 * dwords + 4); + if (IS_ERR(bb->bo)) { + err = PTR_ERR(bb->bo); + goto err; + } + + bb->cs = xe_sa_bo_cpu_addr(bb->bo); + bb->len = 0; + + return bb; +err: + kfree(bb); + return ERR_PTR(err); +} + +static struct xe_sched_job * +__xe_bb_create_job(struct xe_engine *kernel_eng, struct xe_bb *bb, u64 *addr) +{ + u32 size = drm_suballoc_size(bb->bo); + + XE_BUG_ON((bb->len * 4 + 1) > size); + + bb->cs[bb->len++] = MI_BATCH_BUFFER_END; + + xe_sa_bo_flush_write(bb->bo); + + return xe_sched_job_create(kernel_eng, addr); +} + +struct xe_sched_job *xe_bb_create_wa_job(struct xe_engine *wa_eng, + struct xe_bb *bb, u64 batch_base_ofs) +{ + u64 addr = batch_base_ofs + drm_suballoc_soffset(bb->bo); + + XE_BUG_ON(!(wa_eng->vm->flags & XE_VM_FLAG_MIGRATION)); + + return __xe_bb_create_job(wa_eng, bb, &addr); +} + +struct xe_sched_job *xe_bb_create_migration_job(struct xe_engine *kernel_eng, + struct xe_bb *bb, + u64 batch_base_ofs, + u32 second_idx) +{ + u64 addr[2] = { + batch_base_ofs + drm_suballoc_soffset(bb->bo), + batch_base_ofs + drm_suballoc_soffset(bb->bo) + + 4 * second_idx, + }; + + BUG_ON(second_idx > bb->len); + BUG_ON(!(kernel_eng->vm->flags & XE_VM_FLAG_MIGRATION)); + + return __xe_bb_create_job(kernel_eng, bb, addr); +} + +struct xe_sched_job *xe_bb_create_job(struct xe_engine *kernel_eng, + struct xe_bb *bb) +{ + u64 addr = xe_sa_bo_gpu_addr(bb->bo); + + BUG_ON(kernel_eng->vm && kernel_eng->vm->flags & XE_VM_FLAG_MIGRATION); + return __xe_bb_create_job(kernel_eng, bb, &addr); +} + +void xe_bb_free(struct xe_bb *bb, struct dma_fence *fence) +{ + if (!bb) + return; + + xe_sa_bo_free(bb->bo, fence); + kfree(bb); +} diff --git a/drivers/gpu/drm/xe/xe_bb.h b/drivers/gpu/drm/xe/xe_bb.h new file mode 100644 index 000000000000..0cc9260c9634 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_bb.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_BB_H_ +#define _XE_BB_H_ + +#include "xe_bb_types.h" + +struct dma_fence; + +struct xe_gt; +struct xe_engine; +struct xe_sched_job; + +struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 size, bool usm); +struct xe_sched_job *xe_bb_create_job(struct xe_engine *kernel_eng, + struct xe_bb *bb); +struct xe_sched_job *xe_bb_create_migration_job(struct xe_engine *kernel_eng, + struct xe_bb *bb, u64 batch_ofs, + u32 second_idx); +struct xe_sched_job *xe_bb_create_wa_job(struct xe_engine *wa_eng, + struct xe_bb *bb, u64 batch_ofs); +void xe_bb_free(struct xe_bb *bb, struct dma_fence *fence); + +#endif diff --git a/drivers/gpu/drm/xe/xe_bb_types.h b/drivers/gpu/drm/xe/xe_bb_types.h new file mode 100644 index 000000000000..b7d30308cf90 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_bb_types.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_BB_TYPES_H_ +#define _XE_BB_TYPES_H_ + +#include + +struct drm_suballoc; + +struct xe_bb { + struct drm_suballoc *bo; + + u32 *cs; + u32 len; /* in dwords */ +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c new file mode 100644 index 000000000000..ef2c9196c113 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -0,0 +1,1698 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + + +#include "xe_bo.h" + +#include + +#include +#include +#include +#include +#include +#include + +#include "xe_device.h" +#include "xe_dma_buf.h" +#include "xe_ggtt.h" +#include "xe_gt.h" +#include "xe_map.h" +#include "xe_migrate.h" +#include "xe_preempt_fence.h" +#include "xe_res_cursor.h" +#include "xe_trace.h" +#include "xe_vm.h" + +static const struct ttm_place sys_placement_flags = { + .fpfn = 0, + .lpfn = 0, + .mem_type = XE_PL_SYSTEM, + .flags = 0, +}; + +static struct ttm_placement sys_placement = { + .num_placement = 1, + .placement = &sys_placement_flags, + .num_busy_placement = 1, + .busy_placement = &sys_placement_flags, +}; + +bool mem_type_is_vram(u32 mem_type) +{ + return mem_type >= XE_PL_VRAM0; +} + +static bool resource_is_vram(struct ttm_resource *res) +{ + return mem_type_is_vram(res->mem_type); +} + +bool xe_bo_is_vram(struct xe_bo *bo) +{ + return resource_is_vram(bo->ttm.resource); +} + +static bool xe_bo_is_user(struct xe_bo *bo) +{ + return bo->flags & XE_BO_CREATE_USER_BIT; +} + +static struct xe_gt * +mem_type_to_gt(struct xe_device *xe, u32 mem_type) +{ + XE_BUG_ON(!mem_type_is_vram(mem_type)); + + return xe_device_get_gt(xe, mem_type - XE_PL_VRAM0); +} + +static void try_add_system(struct xe_bo *bo, struct ttm_place *places, + u32 bo_flags, u32 *c) +{ + if (bo_flags & XE_BO_CREATE_SYSTEM_BIT) { + places[*c] = (struct ttm_place) { + .mem_type = XE_PL_TT, + }; + *c += 1; + + if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID) + bo->props.preferred_mem_type = XE_PL_TT; + } +} + +static void try_add_vram0(struct xe_device *xe, struct xe_bo *bo, + struct ttm_place *places, u32 bo_flags, u32 *c) +{ + struct xe_gt *gt; + + if (bo_flags & XE_BO_CREATE_VRAM0_BIT) { + gt = mem_type_to_gt(xe, XE_PL_VRAM0); + XE_BUG_ON(!gt->mem.vram.size); + + places[*c] = (struct ttm_place) { + .mem_type = XE_PL_VRAM0, + /* + * For eviction / restore on suspend / resume objects + * pinned in VRAM must be contiguous + */ + .flags = bo_flags & (XE_BO_CREATE_PINNED_BIT | + XE_BO_CREATE_GGTT_BIT) ? + TTM_PL_FLAG_CONTIGUOUS : 0, + }; + *c += 1; + + if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID) + bo->props.preferred_mem_type = XE_PL_VRAM0; + } +} + +static void try_add_vram1(struct xe_device *xe, struct xe_bo *bo, + struct ttm_place *places, u32 bo_flags, u32 *c) +{ + struct xe_gt *gt; + + if (bo_flags & XE_BO_CREATE_VRAM1_BIT) { + gt = mem_type_to_gt(xe, XE_PL_VRAM1); + XE_BUG_ON(!gt->mem.vram.size); + + places[*c] = (struct ttm_place) { + .mem_type = XE_PL_VRAM1, + /* + * For eviction / restore on suspend / resume objects + * pinned in VRAM must be contiguous + */ + .flags = bo_flags & (XE_BO_CREATE_PINNED_BIT | + XE_BO_CREATE_GGTT_BIT) ? + TTM_PL_FLAG_CONTIGUOUS : 0, + }; + *c += 1; + + if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID) + bo->props.preferred_mem_type = XE_PL_VRAM1; + } +} + +static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo, + u32 bo_flags) +{ + struct ttm_place *places = bo->placements; + u32 c = 0; + + bo->props.preferred_mem_type = XE_BO_PROPS_INVALID; + + /* The order of placements should indicate preferred location */ + + if (bo->props.preferred_mem_class == XE_MEM_REGION_CLASS_SYSMEM) { + try_add_system(bo, places, bo_flags, &c); + if (bo->props.preferred_gt == XE_GT1) { + try_add_vram1(xe, bo, places, bo_flags, &c); + try_add_vram0(xe, bo, places, bo_flags, &c); + } else { + try_add_vram0(xe, bo, places, bo_flags, &c); + try_add_vram1(xe, bo, places, bo_flags, &c); + } + } else if (bo->props.preferred_gt == XE_GT1) { + try_add_vram1(xe, bo, places, bo_flags, &c); + try_add_vram0(xe, bo, places, bo_flags, &c); + try_add_system(bo, places, bo_flags, &c); + } else { + try_add_vram0(xe, bo, places, bo_flags, &c); + try_add_vram1(xe, bo, places, bo_flags, &c); + try_add_system(bo, places, bo_flags, &c); + } + + if (!c) + return -EINVAL; + + bo->placement = (struct ttm_placement) { + .num_placement = c, + .placement = places, + .num_busy_placement = c, + .busy_placement = places, + }; + + return 0; +} + +int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo, + u32 bo_flags) +{ + xe_bo_assert_held(bo); + return __xe_bo_placement_for_flags(xe, bo, bo_flags); +} + +static void xe_evict_flags(struct ttm_buffer_object *tbo, + struct ttm_placement *placement) +{ + struct xe_bo *bo; + + if (!xe_bo_is_xe_bo(tbo)) { + /* Don't handle scatter gather BOs */ + if (tbo->type == ttm_bo_type_sg) { + placement->num_placement = 0; + placement->num_busy_placement = 0; + return; + } + + *placement = sys_placement; + return; + } + + /* + * For xe, sg bos that are evicted to system just triggers a + * rebind of the sg list upon subsequent validation to XE_PL_TT. + */ + + bo = ttm_to_xe_bo(tbo); + switch (tbo->resource->mem_type) { + case XE_PL_VRAM0: + case XE_PL_VRAM1: + case XE_PL_TT: + default: + /* for now kick out to system */ + *placement = sys_placement; + break; + } +} + +struct xe_ttm_tt { + struct ttm_tt ttm; + struct device *dev; + struct sg_table sgt; + struct sg_table *sg; +}; + +static int xe_tt_map_sg(struct ttm_tt *tt) +{ + struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm); + unsigned long num_pages = tt->num_pages; + int ret; + + XE_BUG_ON(tt->page_flags & TTM_TT_FLAG_EXTERNAL); + + if (xe_tt->sg) + return 0; + + ret = sg_alloc_table_from_pages(&xe_tt->sgt, tt->pages, num_pages, + 0, (u64)num_pages << PAGE_SHIFT, + GFP_KERNEL); + if (ret) + return ret; + + xe_tt->sg = &xe_tt->sgt; + ret = dma_map_sgtable(xe_tt->dev, xe_tt->sg, DMA_BIDIRECTIONAL, + DMA_ATTR_SKIP_CPU_SYNC); + if (ret) { + sg_free_table(xe_tt->sg); + xe_tt->sg = NULL; + return ret; + } + + return 0; +} + +struct sg_table *xe_bo_get_sg(struct xe_bo *bo) +{ + struct ttm_tt *tt = bo->ttm.ttm; + struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm); + + return xe_tt->sg; +} + +static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo, + u32 page_flags) +{ + struct xe_bo *bo = ttm_to_xe_bo(ttm_bo); + struct xe_device *xe = xe_bo_device(bo); + struct xe_ttm_tt *tt; + int err; + + tt = kzalloc(sizeof(*tt), GFP_KERNEL); + if (!tt) + return NULL; + + tt->dev = xe->drm.dev; + + /* TODO: Select caching mode */ + err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags, + bo->flags & XE_BO_SCANOUT_BIT ? ttm_write_combined : ttm_cached, + DIV_ROUND_UP(xe_device_ccs_bytes(xe_bo_device(bo), + bo->ttm.base.size), + PAGE_SIZE)); + if (err) { + kfree(tt); + return NULL; + } + + return &tt->ttm; +} + +static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt, + struct ttm_operation_ctx *ctx) +{ + int err; + + /* + * dma-bufs are not populated with pages, and the dma- + * addresses are set up when moved to XE_PL_TT. + */ + if (tt->page_flags & TTM_TT_FLAG_EXTERNAL) + return 0; + + err = ttm_pool_alloc(&ttm_dev->pool, tt, ctx); + if (err) + return err; + + /* A follow up may move this xe_bo_move when BO is moved to XE_PL_TT */ + err = xe_tt_map_sg(tt); + if (err) + ttm_pool_free(&ttm_dev->pool, tt); + + return err; +} + +static void xe_ttm_tt_unpopulate(struct ttm_device *ttm_dev, struct ttm_tt *tt) +{ + struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm); + + if (tt->page_flags & TTM_TT_FLAG_EXTERNAL) + return; + + if (xe_tt->sg) { + dma_unmap_sgtable(xe_tt->dev, xe_tt->sg, + DMA_BIDIRECTIONAL, 0); + sg_free_table(xe_tt->sg); + xe_tt->sg = NULL; + } + + return ttm_pool_free(&ttm_dev->pool, tt); +} + +static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt) +{ + ttm_tt_fini(tt); + kfree(tt); +} + +static int xe_ttm_io_mem_reserve(struct ttm_device *bdev, + struct ttm_resource *mem) +{ + struct xe_device *xe = ttm_to_xe_device(bdev); + struct xe_gt *gt; + + switch (mem->mem_type) { + case XE_PL_SYSTEM: + case XE_PL_TT: + return 0; + case XE_PL_VRAM0: + case XE_PL_VRAM1: + gt = mem_type_to_gt(xe, mem->mem_type); + mem->bus.offset = mem->start << PAGE_SHIFT; + + if (gt->mem.vram.mapping && + mem->placement & TTM_PL_FLAG_CONTIGUOUS) + mem->bus.addr = (u8 *)gt->mem.vram.mapping + + mem->bus.offset; + + mem->bus.offset += gt->mem.vram.io_start; + mem->bus.is_iomem = true; + +#if !defined(CONFIG_X86) + mem->bus.caching = ttm_write_combined; +#endif + break; + default: + return -EINVAL; + } + return 0; +} + +static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo, + const struct ttm_operation_ctx *ctx) +{ + struct dma_resv_iter cursor; + struct dma_fence *fence; + struct xe_vma *vma; + int ret = 0; + + dma_resv_assert_held(bo->ttm.base.resv); + + if (!xe_device_in_fault_mode(xe) && !list_empty(&bo->vmas)) { + dma_resv_iter_begin(&cursor, bo->ttm.base.resv, + DMA_RESV_USAGE_BOOKKEEP); + dma_resv_for_each_fence_unlocked(&cursor, fence) + dma_fence_enable_sw_signaling(fence); + dma_resv_iter_end(&cursor); + } + + list_for_each_entry(vma, &bo->vmas, bo_link) { + struct xe_vm *vm = vma->vm; + + trace_xe_vma_evict(vma); + + if (xe_vm_in_fault_mode(vm)) { + /* Wait for pending binds / unbinds. */ + long timeout; + + if (ctx->no_wait_gpu && + !dma_resv_test_signaled(bo->ttm.base.resv, + DMA_RESV_USAGE_BOOKKEEP)) + return -EBUSY; + + timeout = dma_resv_wait_timeout(bo->ttm.base.resv, + DMA_RESV_USAGE_BOOKKEEP, + ctx->interruptible, + MAX_SCHEDULE_TIMEOUT); + if (timeout > 0) { + ret = xe_vm_invalidate_vma(vma); + XE_WARN_ON(ret); + } else if (!timeout) { + ret = -ETIME; + } else { + ret = timeout; + } + + } else { + bool vm_resv_locked = false; + struct xe_vm *vm = vma->vm; + + /* + * We need to put the vma on the vm's rebind_list, + * but need the vm resv to do so. If we can't verify + * that we indeed have it locked, put the vma an the + * vm's notifier.rebind_list instead and scoop later. + */ + if (dma_resv_trylock(&vm->resv)) + vm_resv_locked = true; + else if (ctx->resv != &vm->resv) { + spin_lock(&vm->notifier.list_lock); + list_move_tail(&vma->notifier.rebind_link, + &vm->notifier.rebind_list); + spin_unlock(&vm->notifier.list_lock); + continue; + } + + xe_vm_assert_held(vm); + if (list_empty(&vma->rebind_link) && vma->gt_present) + list_add_tail(&vma->rebind_link, &vm->rebind_list); + + if (vm_resv_locked) + dma_resv_unlock(&vm->resv); + } + } + + return ret; +} + +/* + * The dma-buf map_attachment() / unmap_attachment() is hooked up here. + * Note that unmapping the attachment is deferred to the next + * map_attachment time, or to bo destroy (after idling) whichever comes first. + * This is to avoid syncing before unmap_attachment(), assuming that the + * caller relies on idling the reservation object before moving the + * backing store out. Should that assumption not hold, then we will be able + * to unconditionally call unmap_attachment() when moving out to system. + */ +static int xe_bo_move_dmabuf(struct ttm_buffer_object *ttm_bo, + struct ttm_resource *old_res, + struct ttm_resource *new_res) +{ + struct dma_buf_attachment *attach = ttm_bo->base.import_attach; + struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm, struct xe_ttm_tt, + ttm); + struct sg_table *sg; + + XE_BUG_ON(!attach); + XE_BUG_ON(!ttm_bo->ttm); + + if (new_res->mem_type == XE_PL_SYSTEM) + goto out; + + if (ttm_bo->sg) { + dma_buf_unmap_attachment(attach, ttm_bo->sg, DMA_BIDIRECTIONAL); + ttm_bo->sg = NULL; + } + + sg = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL); + if (IS_ERR(sg)) + return PTR_ERR(sg); + + ttm_bo->sg = sg; + xe_tt->sg = sg; + +out: + ttm_bo_move_null(ttm_bo, new_res); + + return 0; +} + +/** + * xe_bo_move_notify - Notify subsystems of a pending move + * @bo: The buffer object + * @ctx: The struct ttm_operation_ctx controlling locking and waits. + * + * This function notifies subsystems of an upcoming buffer move. + * Upon receiving such a notification, subsystems should schedule + * halting access to the underlying pages and optionally add a fence + * to the buffer object's dma_resv object, that signals when access is + * stopped. The caller will wait on all dma_resv fences before + * starting the move. + * + * A subsystem may commence access to the object after obtaining + * bindings to the new backing memory under the object lock. + * + * Return: 0 on success, -EINTR or -ERESTARTSYS if interrupted in fault mode, + * negative error code on error. + */ +static int xe_bo_move_notify(struct xe_bo *bo, + const struct ttm_operation_ctx *ctx) +{ + struct ttm_buffer_object *ttm_bo = &bo->ttm; + struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev); + int ret; + + /* + * If this starts to call into many components, consider + * using a notification chain here. + */ + + if (xe_bo_is_pinned(bo)) + return -EINVAL; + + xe_bo_vunmap(bo); + ret = xe_bo_trigger_rebind(xe, bo, ctx); + if (ret) + return ret; + + /* Don't call move_notify() for imported dma-bufs. */ + if (ttm_bo->base.dma_buf && !ttm_bo->base.import_attach) + dma_buf_move_notify(ttm_bo->base.dma_buf); + + return 0; +} + +static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, + struct ttm_operation_ctx *ctx, + struct ttm_resource *new_mem, + struct ttm_place *hop) +{ + struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev); + struct xe_bo *bo = ttm_to_xe_bo(ttm_bo); + struct ttm_resource *old_mem = ttm_bo->resource; + struct ttm_tt *ttm = ttm_bo->ttm; + struct xe_gt *gt = NULL; + struct dma_fence *fence; + bool move_lacks_source; + bool needs_clear; + int ret = 0; + + if (!old_mem) { + if (new_mem->mem_type != TTM_PL_SYSTEM) { + hop->mem_type = TTM_PL_SYSTEM; + hop->flags = TTM_PL_FLAG_TEMPORARY; + ret = -EMULTIHOP; + goto out; + } + + ttm_bo_move_null(ttm_bo, new_mem); + goto out; + } + + if (ttm_bo->type == ttm_bo_type_sg) { + ret = xe_bo_move_notify(bo, ctx); + if (!ret) + ret = xe_bo_move_dmabuf(ttm_bo, old_mem, new_mem); + goto out; + } + + move_lacks_source = !resource_is_vram(old_mem) && + (!ttm || !ttm_tt_is_populated(ttm)); + + needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) || + (!ttm && ttm_bo->type == ttm_bo_type_device); + + if ((move_lacks_source && !needs_clear) || + (old_mem->mem_type == XE_PL_SYSTEM && + new_mem->mem_type == XE_PL_TT)) { + ttm_bo_move_null(ttm_bo, new_mem); + goto out; + } + + if (!move_lacks_source && !xe_bo_is_pinned(bo)) { + ret = xe_bo_move_notify(bo, ctx); + if (ret) + goto out; + } + + if (old_mem->mem_type == XE_PL_TT && + new_mem->mem_type == XE_PL_SYSTEM) { + long timeout = dma_resv_wait_timeout(ttm_bo->base.resv, + DMA_RESV_USAGE_BOOKKEEP, + true, + MAX_SCHEDULE_TIMEOUT); + if (timeout < 0) { + ret = timeout; + goto out; + } + ttm_bo_move_null(ttm_bo, new_mem); + goto out; + } + + if (!move_lacks_source && + ((old_mem->mem_type == XE_PL_SYSTEM && resource_is_vram(new_mem)) || + (resource_is_vram(old_mem) && + new_mem->mem_type == XE_PL_SYSTEM))) { + hop->fpfn = 0; + hop->lpfn = 0; + hop->mem_type = XE_PL_TT; + hop->flags = TTM_PL_FLAG_TEMPORARY; + ret = -EMULTIHOP; + goto out; + } + + if (bo->gt) + gt = bo->gt; + else if (resource_is_vram(new_mem)) + gt = mem_type_to_gt(xe, new_mem->mem_type); + else if (resource_is_vram(old_mem)) + gt = mem_type_to_gt(xe, old_mem->mem_type); + + XE_BUG_ON(!gt); + XE_BUG_ON(!gt->migrate); + + trace_xe_bo_move(bo); + xe_device_mem_access_get(xe); + + if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) { + /* + * Kernel memory that is pinned should only be moved on suspend + * / resume, some of the pinned memory is required for the + * device to resume / use the GPU to move other evicted memory + * (user memory) around. This likely could be optimized a bit + * futher where we find the minimum set of pinned memory + * required for resume but for simplity doing a memcpy for all + * pinned memory. + */ + ret = xe_bo_vmap(bo); + if (!ret) { + ret = ttm_bo_move_memcpy(ttm_bo, ctx, new_mem); + + /* Create a new VMAP once kernel BO back in VRAM */ + if (!ret && resource_is_vram(new_mem)) { + void *new_addr = gt->mem.vram.mapping + + (new_mem->start << PAGE_SHIFT); + + XE_BUG_ON(new_mem->start != + bo->placements->fpfn); + + iosys_map_set_vaddr_iomem(&bo->vmap, new_addr); + } + } + } else { + if (move_lacks_source) + fence = xe_migrate_clear(gt->migrate, bo, new_mem, 0); + else + fence = xe_migrate_copy(gt->migrate, bo, old_mem, new_mem); + if (IS_ERR(fence)) { + ret = PTR_ERR(fence); + xe_device_mem_access_put(xe); + goto out; + } + ret = ttm_bo_move_accel_cleanup(ttm_bo, fence, evict, true, + new_mem); + dma_fence_put(fence); + } + + xe_device_mem_access_put(xe); + trace_printk("new_mem->mem_type=%d\n", new_mem->mem_type); + +out: + return ret; + +} + +static unsigned long xe_ttm_io_mem_pfn(struct ttm_buffer_object *bo, + unsigned long page_offset) +{ + struct xe_device *xe = ttm_to_xe_device(bo->bdev); + struct xe_gt *gt = mem_type_to_gt(xe, bo->resource->mem_type); + struct xe_res_cursor cursor; + + xe_res_first(bo->resource, (u64)page_offset << PAGE_SHIFT, 0, &cursor); + return (gt->mem.vram.io_start + cursor.start) >> PAGE_SHIFT; +} + +static void __xe_bo_vunmap(struct xe_bo *bo); + +/* + * TODO: Move this function to TTM so we don't rely on how TTM does its + * locking, thereby abusing TTM internals. + */ +static bool xe_ttm_bo_lock_in_destructor(struct ttm_buffer_object *ttm_bo) +{ + bool locked; + + XE_WARN_ON(kref_read(&ttm_bo->kref)); + + /* + * We can typically only race with TTM trylocking under the + * lru_lock, which will immediately be unlocked again since + * the ttm_bo refcount is zero at this point. So trylocking *should* + * always succeed here, as long as we hold the lru lock. + */ + spin_lock(&ttm_bo->bdev->lru_lock); + locked = dma_resv_trylock(ttm_bo->base.resv); + spin_unlock(&ttm_bo->bdev->lru_lock); + XE_WARN_ON(!locked); + + return locked; +} + +static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo) +{ + struct dma_resv_iter cursor; + struct dma_fence *fence; + struct dma_fence *replacement = NULL; + struct xe_bo *bo; + + if (!xe_bo_is_xe_bo(ttm_bo)) + return; + + bo = ttm_to_xe_bo(ttm_bo); + XE_WARN_ON(bo->created && kref_read(&ttm_bo->base.refcount)); + + /* + * Corner case where TTM fails to allocate memory and this BOs resv + * still points the VMs resv + */ + if (ttm_bo->base.resv != &ttm_bo->base._resv) + return; + + if (!xe_ttm_bo_lock_in_destructor(ttm_bo)) + return; + + /* + * Scrub the preempt fences if any. The unbind fence is already + * attached to the resv. + * TODO: Don't do this for external bos once we scrub them after + * unbind. + */ + dma_resv_for_each_fence(&cursor, ttm_bo->base.resv, + DMA_RESV_USAGE_BOOKKEEP, fence) { + if (xe_fence_is_xe_preempt(fence) && + !dma_fence_is_signaled(fence)) { + if (!replacement) + replacement = dma_fence_get_stub(); + + dma_resv_replace_fences(ttm_bo->base.resv, + fence->context, + replacement, + DMA_RESV_USAGE_BOOKKEEP); + } + } + dma_fence_put(replacement); + + dma_resv_unlock(ttm_bo->base.resv); +} + +static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo) +{ + if (!xe_bo_is_xe_bo(ttm_bo)) + return; + + /* + * Object is idle and about to be destroyed. Release the + * dma-buf attachment. + */ + if (ttm_bo->type == ttm_bo_type_sg && ttm_bo->sg) { + struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm, + struct xe_ttm_tt, ttm); + + dma_buf_unmap_attachment(ttm_bo->base.import_attach, ttm_bo->sg, + DMA_BIDIRECTIONAL); + ttm_bo->sg = NULL; + xe_tt->sg = NULL; + } +} + +struct ttm_device_funcs xe_ttm_funcs = { + .ttm_tt_create = xe_ttm_tt_create, + .ttm_tt_populate = xe_ttm_tt_populate, + .ttm_tt_unpopulate = xe_ttm_tt_unpopulate, + .ttm_tt_destroy = xe_ttm_tt_destroy, + .evict_flags = xe_evict_flags, + .move = xe_bo_move, + .io_mem_reserve = xe_ttm_io_mem_reserve, + .io_mem_pfn = xe_ttm_io_mem_pfn, + .release_notify = xe_ttm_bo_release_notify, + .eviction_valuable = ttm_bo_eviction_valuable, + .delete_mem_notify = xe_ttm_bo_delete_mem_notify, +}; + +static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo) +{ + struct xe_bo *bo = ttm_to_xe_bo(ttm_bo); + + if (bo->ttm.base.import_attach) + drm_prime_gem_destroy(&bo->ttm.base, NULL); + drm_gem_object_release(&bo->ttm.base); + + WARN_ON(!list_empty(&bo->vmas)); + + if (bo->ggtt_node.size) + xe_ggtt_remove_bo(bo->gt->mem.ggtt, bo); + + if (bo->vm && xe_bo_is_user(bo)) + xe_vm_put(bo->vm); + + kfree(bo); +} + +static void xe_gem_object_free(struct drm_gem_object *obj) +{ + /* Our BO reference counting scheme works as follows: + * + * The gem object kref is typically used throughout the driver, + * and the gem object holds a ttm_buffer_object refcount, so + * that when the last gem object reference is put, which is when + * we end up in this function, we put also that ttm_buffer_object + * refcount. Anything using gem interfaces is then no longer + * allowed to access the object in a way that requires a gem + * refcount, including locking the object. + * + * driver ttm callbacks is allowed to use the ttm_buffer_object + * refcount directly if needed. + */ + __xe_bo_vunmap(gem_to_xe_bo(obj)); + ttm_bo_put(container_of(obj, struct ttm_buffer_object, base)); +} + +static bool should_migrate_to_system(struct xe_bo *bo) +{ + struct xe_device *xe = xe_bo_device(bo); + + return xe_device_in_fault_mode(xe) && bo->props.cpu_atomic; +} + +static vm_fault_t xe_gem_fault(struct vm_fault *vmf) +{ + struct ttm_buffer_object *tbo = vmf->vma->vm_private_data; + struct drm_device *ddev = tbo->base.dev; + vm_fault_t ret; + int idx, r = 0; + + ret = ttm_bo_vm_reserve(tbo, vmf); + if (ret) + return ret; + + if (drm_dev_enter(ddev, &idx)) { + struct xe_bo *bo = ttm_to_xe_bo(tbo); + + trace_xe_bo_cpu_fault(bo); + + if (should_migrate_to_system(bo)) { + r = xe_bo_migrate(bo, XE_PL_TT); + if (r == -EBUSY || r == -ERESTARTSYS || r == -EINTR) + ret = VM_FAULT_NOPAGE; + else if (r) + ret = VM_FAULT_SIGBUS; + } + if (!ret) + ret = ttm_bo_vm_fault_reserved(vmf, + vmf->vma->vm_page_prot, + TTM_BO_VM_NUM_PREFAULT); + + drm_dev_exit(idx); + } else { + ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot); + } + if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) + return ret; + + dma_resv_unlock(tbo->base.resv); + return ret; +} + +static const struct vm_operations_struct xe_gem_vm_ops = { + .fault = xe_gem_fault, + .open = ttm_bo_vm_open, + .close = ttm_bo_vm_close, + .access = ttm_bo_vm_access +}; + +static const struct drm_gem_object_funcs xe_gem_object_funcs = { + .free = xe_gem_object_free, + .mmap = drm_gem_ttm_mmap, + .export = xe_gem_prime_export, + .vm_ops = &xe_gem_vm_ops, +}; + +/** + * xe_bo_alloc - Allocate storage for a struct xe_bo + * + * This funcition is intended to allocate storage to be used for input + * to __xe_bo_create_locked(), in the case a pointer to the bo to be + * created is needed before the call to __xe_bo_create_locked(). + * If __xe_bo_create_locked ends up never to be called, then the + * storage allocated with this function needs to be freed using + * xe_bo_free(). + * + * Return: A pointer to an uninitialized struct xe_bo on success, + * ERR_PTR(-ENOMEM) on error. + */ +struct xe_bo *xe_bo_alloc(void) +{ + struct xe_bo *bo = kzalloc(sizeof(*bo), GFP_KERNEL); + + if (!bo) + return ERR_PTR(-ENOMEM); + + return bo; +} + +/** + * xe_bo_free - Free storage allocated using xe_bo_alloc() + * @bo: The buffer object storage. + * + * Refer to xe_bo_alloc() documentation for valid use-cases. + */ +void xe_bo_free(struct xe_bo *bo) +{ + kfree(bo); +} + +struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, + struct xe_gt *gt, struct dma_resv *resv, + size_t size, enum ttm_bo_type type, + u32 flags) +{ + struct ttm_operation_ctx ctx = { + .interruptible = true, + .no_wait_gpu = false, + }; + struct ttm_placement *placement; + uint32_t alignment; + int err; + + /* Only kernel objects should set GT */ + XE_BUG_ON(gt && type != ttm_bo_type_kernel); + + if (!bo) { + bo = xe_bo_alloc(); + if (IS_ERR(bo)) + return bo; + } + + if (flags & (XE_BO_CREATE_VRAM0_BIT | XE_BO_CREATE_VRAM1_BIT) && + !(flags & XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT) && + xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) { + size = ALIGN(size, SZ_64K); + flags |= XE_BO_INTERNAL_64K; + alignment = SZ_64K >> PAGE_SHIFT; + } else { + alignment = SZ_4K >> PAGE_SHIFT; + } + + bo->gt = gt; + bo->size = size; + bo->flags = flags; + bo->ttm.base.funcs = &xe_gem_object_funcs; + bo->props.preferred_mem_class = XE_BO_PROPS_INVALID; + bo->props.preferred_gt = XE_BO_PROPS_INVALID; + bo->props.preferred_mem_type = XE_BO_PROPS_INVALID; + bo->ttm.priority = DRM_XE_VMA_PRIORITY_NORMAL; + INIT_LIST_HEAD(&bo->vmas); + INIT_LIST_HEAD(&bo->pinned_link); + + drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size); + + if (resv) { + ctx.allow_res_evict = true; + ctx.resv = resv; + } + + err = __xe_bo_placement_for_flags(xe, bo, bo->flags); + if (WARN_ON(err)) + return ERR_PTR(err); + + /* Defer populating type_sg bos */ + placement = (type == ttm_bo_type_sg || + bo->flags & XE_BO_DEFER_BACKING) ? &sys_placement : + &bo->placement; + err = ttm_bo_init_reserved(&xe->ttm, &bo->ttm, type, + placement, alignment, + &ctx, NULL, resv, xe_ttm_bo_destroy); + if (err) + return ERR_PTR(err); + + bo->created = true; + ttm_bo_move_to_lru_tail_unlocked(&bo->ttm); + + return bo; +} + +struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_gt *gt, + struct xe_vm *vm, size_t size, + enum ttm_bo_type type, u32 flags) +{ + struct xe_bo *bo; + int err; + + if (vm) + xe_vm_assert_held(vm); + bo = __xe_bo_create_locked(xe, NULL, gt, vm ? &vm->resv : NULL, size, + type, flags); + if (IS_ERR(bo)) + return bo; + + if (vm && xe_bo_is_user(bo)) + xe_vm_get(vm); + bo->vm = vm; + + if (flags & XE_BO_CREATE_GGTT_BIT) { + XE_BUG_ON(!gt); + + err = xe_ggtt_insert_bo(gt->mem.ggtt, bo); + if (err) + goto err_unlock_put_bo; + } + + return bo; + +err_unlock_put_bo: + xe_bo_unlock_vm_held(bo); + xe_bo_put(bo); + return ERR_PTR(err); +} + +struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_gt *gt, + struct xe_vm *vm, size_t size, + enum ttm_bo_type type, u32 flags) +{ + struct xe_bo *bo = xe_bo_create_locked(xe, gt, vm, size, type, flags); + + if (!IS_ERR(bo)) + xe_bo_unlock_vm_held(bo); + + return bo; +} + +struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_gt *gt, + struct xe_vm *vm, size_t size, + enum ttm_bo_type type, u32 flags) +{ + struct xe_bo *bo = xe_bo_create_locked(xe, gt, vm, size, type, flags); + int err; + + if (IS_ERR(bo)) + return bo; + + err = xe_bo_pin(bo); + if (err) + goto err_put; + + err = xe_bo_vmap(bo); + if (err) + goto err_unpin; + + xe_bo_unlock_vm_held(bo); + + return bo; + +err_unpin: + xe_bo_unpin(bo); +err_put: + xe_bo_unlock_vm_held(bo); + xe_bo_put(bo); + return ERR_PTR(err); +} + +struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_gt *gt, + const void *data, size_t size, + enum ttm_bo_type type, u32 flags) +{ + struct xe_bo *bo = xe_bo_create_pin_map(xe, gt, NULL, + ALIGN(size, PAGE_SIZE), + type, flags); + if (IS_ERR(bo)) + return bo; + + xe_map_memcpy_to(xe, &bo->vmap, 0, data, size); + + return bo; +} + +/* + * XXX: This is in the VM bind data path, likely should calculate this once and + * store, with a recalculation if the BO is moved. + */ +static uint64_t vram_region_io_offset(struct xe_bo *bo) +{ + struct xe_device *xe = xe_bo_device(bo); + struct xe_gt *gt = mem_type_to_gt(xe, bo->ttm.resource->mem_type); + + return gt->mem.vram.io_start - xe->mem.vram.io_start; +} + +/** + * xe_bo_pin_external - pin an external BO + * @bo: buffer object to be pinned + * + * Pin an external (not tied to a VM, can be exported via dma-buf / prime FD) + * BO. Unique call compared to xe_bo_pin as this function has it own set of + * asserts and code to ensure evict / restore on suspend / resume. + * + * Returns 0 for success, negative error code otherwise. + */ +int xe_bo_pin_external(struct xe_bo *bo) +{ + struct xe_device *xe = xe_bo_device(bo); + int err; + + XE_BUG_ON(bo->vm); + XE_BUG_ON(!xe_bo_is_user(bo)); + + if (!xe_bo_is_pinned(bo)) { + err = xe_bo_validate(bo, NULL, false); + if (err) + return err; + + if (xe_bo_is_vram(bo)) { + spin_lock(&xe->pinned.lock); + list_add_tail(&bo->pinned_link, + &xe->pinned.external_vram); + spin_unlock(&xe->pinned.lock); + } + } + + ttm_bo_pin(&bo->ttm); + + /* + * FIXME: If we always use the reserve / unreserve functions for locking + * we do not need this. + */ + ttm_bo_move_to_lru_tail_unlocked(&bo->ttm); + + return 0; +} + +int xe_bo_pin(struct xe_bo *bo) +{ + struct xe_device *xe = xe_bo_device(bo); + int err; + + /* We currently don't expect user BO to be pinned */ + XE_BUG_ON(xe_bo_is_user(bo)); + + /* Pinned object must be in GGTT or have pinned flag */ + XE_BUG_ON(!(bo->flags & (XE_BO_CREATE_PINNED_BIT | + XE_BO_CREATE_GGTT_BIT))); + + /* + * No reason we can't support pinning imported dma-bufs we just don't + * expect to pin an imported dma-buf. + */ + XE_BUG_ON(bo->ttm.base.import_attach); + + /* We only expect at most 1 pin */ + XE_BUG_ON(xe_bo_is_pinned(bo)); + + err = xe_bo_validate(bo, NULL, false); + if (err) + return err; + + /* + * For pinned objects in on DGFX, we expect these objects to be in + * contiguous VRAM memory. Required eviction / restore during suspend / + * resume (force restore to same physical address). + */ + if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) && + bo->flags & XE_BO_INTERNAL_TEST)) { + struct ttm_place *place = &(bo->placements[0]); + bool lmem; + + XE_BUG_ON(!(place->flags & TTM_PL_FLAG_CONTIGUOUS)); + XE_BUG_ON(!mem_type_is_vram(place->mem_type)); + + place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE, &lmem) - + vram_region_io_offset(bo)) >> PAGE_SHIFT; + place->lpfn = place->fpfn + (bo->size >> PAGE_SHIFT); + + spin_lock(&xe->pinned.lock); + list_add_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present); + spin_unlock(&xe->pinned.lock); + } + + ttm_bo_pin(&bo->ttm); + + /* + * FIXME: If we always use the reserve / unreserve functions for locking + * we do not need this. + */ + ttm_bo_move_to_lru_tail_unlocked(&bo->ttm); + + return 0; +} + +/** + * xe_bo_unpin_external - unpin an external BO + * @bo: buffer object to be unpinned + * + * Unpin an external (not tied to a VM, can be exported via dma-buf / prime FD) + * BO. Unique call compared to xe_bo_unpin as this function has it own set of + * asserts and code to ensure evict / restore on suspend / resume. + * + * Returns 0 for success, negative error code otherwise. + */ +void xe_bo_unpin_external(struct xe_bo *bo) +{ + struct xe_device *xe = xe_bo_device(bo); + + XE_BUG_ON(bo->vm); + XE_BUG_ON(!xe_bo_is_pinned(bo)); + XE_BUG_ON(!xe_bo_is_user(bo)); + + if (bo->ttm.pin_count == 1 && !list_empty(&bo->pinned_link)) { + spin_lock(&xe->pinned.lock); + list_del_init(&bo->pinned_link); + spin_unlock(&xe->pinned.lock); + } + + ttm_bo_unpin(&bo->ttm); + + /* + * FIXME: If we always use the reserve / unreserve functions for locking + * we do not need this. + */ + ttm_bo_move_to_lru_tail_unlocked(&bo->ttm); +} + +void xe_bo_unpin(struct xe_bo *bo) +{ + struct xe_device *xe = xe_bo_device(bo); + + XE_BUG_ON(bo->ttm.base.import_attach); + XE_BUG_ON(!xe_bo_is_pinned(bo)); + + if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) && + bo->flags & XE_BO_INTERNAL_TEST)) { + XE_BUG_ON(list_empty(&bo->pinned_link)); + + spin_lock(&xe->pinned.lock); + list_del_init(&bo->pinned_link); + spin_unlock(&xe->pinned.lock); + } + + ttm_bo_unpin(&bo->ttm); +} + +/** + * xe_bo_validate() - Make sure the bo is in an allowed placement + * @bo: The bo, + * @vm: Pointer to a the vm the bo shares a locked dma_resv object with, or + * NULL. Used together with @allow_res_evict. + * @allow_res_evict: Whether it's allowed to evict bos sharing @vm's + * reservation object. + * + * Make sure the bo is in allowed placement, migrating it if necessary. If + * needed, other bos will be evicted. If bos selected for eviction shares + * the @vm's reservation object, they can be evicted iff @allow_res_evict is + * set to true, otherwise they will be bypassed. + * + * Return: 0 on success, negative error code on failure. May return + * -EINTR or -ERESTARTSYS if internal waits are interrupted by a signal. + */ +int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict) +{ + struct ttm_operation_ctx ctx = { + .interruptible = true, + .no_wait_gpu = false, + }; + + if (vm) { + lockdep_assert_held(&vm->lock); + xe_vm_assert_held(vm); + + ctx.allow_res_evict = allow_res_evict; + ctx.resv = &vm->resv; + } + + return ttm_bo_validate(&bo->ttm, &bo->placement, &ctx); +} + +bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo) +{ + if (bo->destroy == &xe_ttm_bo_destroy) + return true; + + return false; +} + +dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, + size_t page_size, bool *is_lmem) +{ + struct xe_res_cursor cur; + u64 page; + + if (!READ_ONCE(bo->ttm.pin_count)) + xe_bo_assert_held(bo); + + XE_BUG_ON(page_size > PAGE_SIZE); + page = offset >> PAGE_SHIFT; + offset &= (PAGE_SIZE - 1); + + *is_lmem = xe_bo_is_vram(bo); + + if (!*is_lmem) { + XE_BUG_ON(!bo->ttm.ttm); + + xe_res_first_sg(xe_bo_get_sg(bo), page << PAGE_SHIFT, + page_size, &cur); + return xe_res_dma(&cur) + offset; + } else { + struct xe_res_cursor cur; + + xe_res_first(bo->ttm.resource, page << PAGE_SHIFT, + page_size, &cur); + return cur.start + offset + vram_region_io_offset(bo); + } +} + +int xe_bo_vmap(struct xe_bo *bo) +{ + void *virtual; + bool is_iomem; + int ret; + + xe_bo_assert_held(bo); + + if (!iosys_map_is_null(&bo->vmap)) + return 0; + + /* + * We use this more or less deprecated interface for now since + * ttm_bo_vmap() doesn't offer the optimization of kmapping + * single page bos, which is done here. + * TODO: Fix up ttm_bo_vmap to do that, or fix up ttm_bo_kmap + * to use struct iosys_map. + */ + ret = ttm_bo_kmap(&bo->ttm, 0, bo->size >> PAGE_SHIFT, &bo->kmap); + if (ret) + return ret; + + virtual = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem); + if (is_iomem) + iosys_map_set_vaddr_iomem(&bo->vmap, (void __iomem *)virtual); + else + iosys_map_set_vaddr(&bo->vmap, virtual); + + return 0; +} + +static void __xe_bo_vunmap(struct xe_bo *bo) +{ + if (!iosys_map_is_null(&bo->vmap)) { + iosys_map_clear(&bo->vmap); + ttm_bo_kunmap(&bo->kmap); + } +} + +void xe_bo_vunmap(struct xe_bo *bo) +{ + xe_bo_assert_held(bo); + __xe_bo_vunmap(bo); +} + +int xe_gem_create_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_file *xef = to_xe_file(file); + struct drm_xe_gem_create *args = data; + struct ww_acquire_ctx ww; + struct xe_vm *vm = NULL; + struct xe_bo *bo; + unsigned bo_flags = XE_BO_CREATE_USER_BIT; + u32 handle; + int err; + + if (XE_IOCTL_ERR(xe, args->extensions)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, args->flags & + ~(XE_GEM_CREATE_FLAG_DEFER_BACKING | + XE_GEM_CREATE_FLAG_SCANOUT | + xe->info.mem_region_mask))) + return -EINVAL; + + /* at least one memory type must be specified */ + if (XE_IOCTL_ERR(xe, !(args->flags & xe->info.mem_region_mask))) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, args->handle)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, args->size > SIZE_MAX)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, args->size & ~PAGE_MASK)) + return -EINVAL; + + if (args->vm_id) { + vm = xe_vm_lookup(xef, args->vm_id); + if (XE_IOCTL_ERR(xe, !vm)) + return -ENOENT; + err = xe_vm_lock(vm, &ww, 0, true); + if (err) { + xe_vm_put(vm); + return err; + } + } + + if (args->flags & XE_GEM_CREATE_FLAG_DEFER_BACKING) + bo_flags |= XE_BO_DEFER_BACKING; + + if (args->flags & XE_GEM_CREATE_FLAG_SCANOUT) + bo_flags |= XE_BO_SCANOUT_BIT; + + bo_flags |= args->flags << (ffs(XE_BO_CREATE_SYSTEM_BIT) - 1); + bo = xe_bo_create(xe, NULL, vm, args->size, ttm_bo_type_device, + bo_flags); + if (vm) { + xe_vm_unlock(vm, &ww); + xe_vm_put(vm); + } + + if (IS_ERR(bo)) + return PTR_ERR(bo); + + err = drm_gem_handle_create(file, &bo->ttm.base, &handle); + xe_bo_put(bo); + if (err) + return err; + + args->handle = handle; + + return 0; +} + +int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct drm_xe_gem_mmap_offset *args = data; + struct drm_gem_object *gem_obj; + + if (XE_IOCTL_ERR(xe, args->extensions)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, args->flags)) + return -EINVAL; + + gem_obj = drm_gem_object_lookup(file, args->handle); + if (XE_IOCTL_ERR(xe, !gem_obj)) + return -ENOENT; + + /* The mmap offset was set up at BO allocation time. */ + args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node); + + xe_bo_put(gem_to_xe_bo(gem_obj)); + return 0; +} + +int xe_bo_lock(struct xe_bo *bo, struct ww_acquire_ctx *ww, + int num_resv, bool intr) +{ + struct ttm_validate_buffer tv_bo; + LIST_HEAD(objs); + LIST_HEAD(dups); + + XE_BUG_ON(!ww); + + tv_bo.num_shared = num_resv; + tv_bo.bo = &bo->ttm;; + list_add_tail(&tv_bo.head, &objs); + + return ttm_eu_reserve_buffers(ww, &objs, intr, &dups); +} + +void xe_bo_unlock(struct xe_bo *bo, struct ww_acquire_ctx *ww) +{ + dma_resv_unlock(bo->ttm.base.resv); + ww_acquire_fini(ww); +} + +/** + * xe_bo_can_migrate - Whether a buffer object likely can be migrated + * @bo: The buffer object to migrate + * @mem_type: The TTM memory type intended to migrate to + * + * Check whether the buffer object supports migration to the + * given memory type. Note that pinning may affect the ability to migrate as + * returned by this function. + * + * This function is primarily intended as a helper for checking the + * possibility to migrate buffer objects and can be called without + * the object lock held. + * + * Return: true if migration is possible, false otherwise. + */ +bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type) +{ + unsigned int cur_place; + + if (bo->ttm.type == ttm_bo_type_kernel) + return true; + + if (bo->ttm.type == ttm_bo_type_sg) + return false; + + for (cur_place = 0; cur_place < bo->placement.num_placement; + cur_place++) { + if (bo->placements[cur_place].mem_type == mem_type) + return true; + } + + return false; +} + +static void xe_place_from_ttm_type(u32 mem_type, struct ttm_place *place) +{ + memset(place, 0, sizeof(*place)); + place->mem_type = mem_type; +} + +/** + * xe_bo_migrate - Migrate an object to the desired region id + * @bo: The buffer object to migrate. + * @mem_type: The TTM region type to migrate to. + * + * Attempt to migrate the buffer object to the desired memory region. The + * buffer object may not be pinned, and must be locked. + * On successful completion, the object memory type will be updated, + * but an async migration task may not have completed yet, and to + * accomplish that, the object's kernel fences must be signaled with + * the object lock held. + * + * Return: 0 on success. Negative error code on failure. In particular may + * return -EINTR or -ERESTARTSYS if signal pending. + */ +int xe_bo_migrate(struct xe_bo *bo, u32 mem_type) +{ + struct ttm_operation_ctx ctx = { + .interruptible = true, + .no_wait_gpu = false, + }; + struct ttm_placement placement; + struct ttm_place requested; + + xe_bo_assert_held(bo); + + if (bo->ttm.resource->mem_type == mem_type) + return 0; + + if (xe_bo_is_pinned(bo)) + return -EBUSY; + + if (!xe_bo_can_migrate(bo, mem_type)) + return -EINVAL; + + xe_place_from_ttm_type(mem_type, &requested); + placement.num_placement = 1; + placement.num_busy_placement = 1; + placement.placement = &requested; + placement.busy_placement = &requested; + + return ttm_bo_validate(&bo->ttm, &placement, &ctx); +} + +/** + * xe_bo_evict - Evict an object to evict placement + * @bo: The buffer object to migrate. + * @force_alloc: Set force_alloc in ttm_operation_ctx + * + * On successful completion, the object memory will be moved to evict + * placement. Ths function blocks until the object has been fully moved. + * + * Return: 0 on success. Negative error code on failure. + */ +int xe_bo_evict(struct xe_bo *bo, bool force_alloc) +{ + struct ttm_operation_ctx ctx = { + .interruptible = false, + .no_wait_gpu = false, + .force_alloc = force_alloc, + }; + struct ttm_placement placement; + int ret; + + xe_evict_flags(&bo->ttm, &placement); + ret = ttm_bo_validate(&bo->ttm, &placement, &ctx); + if (ret) + return ret; + + dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL, + false, MAX_SCHEDULE_TIMEOUT); + + return 0; +} + +/** + * xe_bo_needs_ccs_pages - Whether a bo needs to back up CCS pages when + * placed in system memory. + * @bo: The xe_bo + * + * If a bo has an allowable placement in XE_PL_TT memory, it can't use + * flat CCS compression, because the GPU then has no way to access the + * CCS metadata using relevant commands. For the opposite case, we need to + * allocate storage for the CCS metadata when the BO is not resident in + * VRAM memory. + * + * Return: true if extra pages need to be allocated, false otherwise. + */ +bool xe_bo_needs_ccs_pages(struct xe_bo *bo) +{ + return bo->ttm.type == ttm_bo_type_device && + !(bo->flags & XE_BO_CREATE_SYSTEM_BIT) && + (bo->flags & (XE_BO_CREATE_VRAM0_BIT | XE_BO_CREATE_VRAM1_BIT)); +} + +/** + * __xe_bo_release_dummy() - Dummy kref release function + * @kref: The embedded struct kref. + * + * Dummy release function for xe_bo_put_deferred(). Keep off. + */ +void __xe_bo_release_dummy(struct kref *kref) +{ +} + +/** + * xe_bo_put_commit() - Put bos whose put was deferred by xe_bo_put_deferred(). + * @deferred: The lockless list used for the call to xe_bo_put_deferred(). + * + * Puts all bos whose put was deferred by xe_bo_put_deferred(). + * The @deferred list can be either an onstack local list or a global + * shared list used by a workqueue. + */ +void xe_bo_put_commit(struct llist_head *deferred) +{ + struct llist_node *freed; + struct xe_bo *bo, *next; + + if (!deferred) + return; + + freed = llist_del_all(deferred); + if (!freed) + return; + + llist_for_each_entry_safe(bo, next, freed, freed) + drm_gem_object_free(&bo->ttm.base.refcount); +} + +/** + * xe_bo_dumb_create - Create a dumb bo as backing for a fb + * @file_priv: ... + * @dev: ... + * @args: ... + * + * See dumb_create() hook in include/drm/drm_drv.h + * + * Return: ... + */ +int xe_bo_dumb_create(struct drm_file *file_priv, + struct drm_device *dev, + struct drm_mode_create_dumb *args) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_bo *bo; + uint32_t handle; + int cpp = DIV_ROUND_UP(args->bpp, 8); + int err; + u32 page_size = max_t(u32, PAGE_SIZE, + xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K); + + args->pitch = ALIGN(args->width * cpp, 64); + args->size = ALIGN(mul_u32_u32(args->pitch, args->height), + page_size); + + bo = xe_bo_create(xe, NULL, NULL, args->size, ttm_bo_type_device, + XE_BO_CREATE_VRAM_IF_DGFX(to_gt(xe)) | + XE_BO_CREATE_USER_BIT | XE_BO_SCANOUT_BIT); + if (IS_ERR(bo)) + return PTR_ERR(bo); + + err = drm_gem_handle_create(file_priv, &bo->ttm.base, &handle); + /* drop reference from allocate - handle holds it now */ + drm_gem_object_put(&bo->ttm.base); + if (!err) + args->handle = handle; + return err; +} + +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) +#include "tests/xe_bo.c" +#endif diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h new file mode 100644 index 000000000000..1a49c0a3c4c6 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -0,0 +1,290 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _XE_BO_H_ +#define _XE_BO_H_ + +#include "xe_bo_types.h" +#include "xe_macros.h" +#include "xe_vm_types.h" + +#define XE_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */ + +#define XE_BO_CREATE_USER_BIT BIT(1) +#define XE_BO_CREATE_SYSTEM_BIT BIT(2) +#define XE_BO_CREATE_VRAM0_BIT BIT(3) +#define XE_BO_CREATE_VRAM1_BIT BIT(4) +#define XE_BO_CREATE_VRAM_IF_DGFX(gt) \ + (IS_DGFX(gt_to_xe(gt)) ? XE_BO_CREATE_VRAM0_BIT << gt->info.vram_id : \ + XE_BO_CREATE_SYSTEM_BIT) +#define XE_BO_CREATE_GGTT_BIT BIT(5) +#define XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT BIT(6) +#define XE_BO_CREATE_PINNED_BIT BIT(7) +#define XE_BO_DEFER_BACKING BIT(8) +#define XE_BO_SCANOUT_BIT BIT(9) +/* this one is trigger internally only */ +#define XE_BO_INTERNAL_TEST BIT(30) +#define XE_BO_INTERNAL_64K BIT(31) + +#define PPAT_UNCACHED GENMASK_ULL(4, 3) +#define PPAT_CACHED_PDE 0 +#define PPAT_CACHED BIT_ULL(7) +#define PPAT_DISPLAY_ELLC BIT_ULL(4) + +#define GEN8_PTE_SHIFT 12 +#define GEN8_PAGE_SIZE (1 << GEN8_PTE_SHIFT) +#define GEN8_PTE_MASK (GEN8_PAGE_SIZE - 1) +#define GEN8_PDE_SHIFT (GEN8_PTE_SHIFT - 3) +#define GEN8_PDES (1 << GEN8_PDE_SHIFT) +#define GEN8_PDE_MASK (GEN8_PDES - 1) + +#define GEN8_64K_PTE_SHIFT 16 +#define GEN8_64K_PAGE_SIZE (1 << GEN8_64K_PTE_SHIFT) +#define GEN8_64K_PTE_MASK (GEN8_64K_PAGE_SIZE - 1) +#define GEN8_64K_PDE_MASK (GEN8_PDE_MASK >> 4) + +#define GEN8_PDE_PS_2M BIT_ULL(7) +#define GEN8_PDPE_PS_1G BIT_ULL(7) +#define GEN8_PDE_IPS_64K BIT_ULL(11) + +#define GEN12_GGTT_PTE_LM BIT_ULL(1) +#define GEN12_USM_PPGTT_PTE_AE BIT_ULL(10) +#define GEN12_PPGTT_PTE_LM BIT_ULL(11) +#define GEN12_PDE_64K BIT_ULL(6) +#define GEN12_PTE_PS64 BIT_ULL(8) + +#define GEN8_PAGE_PRESENT BIT_ULL(0) +#define GEN8_PAGE_RW BIT_ULL(1) + +#define PTE_READ_ONLY BIT(0) + +#define XE_PL_SYSTEM TTM_PL_SYSTEM +#define XE_PL_TT TTM_PL_TT +#define XE_PL_VRAM0 TTM_PL_VRAM +#define XE_PL_VRAM1 (XE_PL_VRAM0 + 1) + +#define XE_BO_PROPS_INVALID (-1) + +struct sg_table; + +struct xe_bo *xe_bo_alloc(void); +void xe_bo_free(struct xe_bo *bo); + +struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, + struct xe_gt *gt, struct dma_resv *resv, + size_t size, enum ttm_bo_type type, + u32 flags); +struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_gt *gt, + struct xe_vm *vm, size_t size, + enum ttm_bo_type type, u32 flags); +struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_gt *gt, + struct xe_vm *vm, size_t size, + enum ttm_bo_type type, u32 flags); +struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_gt *gt, + struct xe_vm *vm, size_t size, + enum ttm_bo_type type, u32 flags); +struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_gt *gt, + const void *data, size_t size, + enum ttm_bo_type type, u32 flags); + +int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo, + u32 bo_flags); + +static inline struct xe_bo *ttm_to_xe_bo(const struct ttm_buffer_object *bo) +{ + return container_of(bo, struct xe_bo, ttm); +} + +static inline struct xe_bo *gem_to_xe_bo(const struct drm_gem_object *obj) +{ + return container_of(obj, struct xe_bo, ttm.base); +} + +#define xe_bo_device(bo) ttm_to_xe_device((bo)->ttm.bdev) + +static inline struct xe_bo *xe_bo_get(struct xe_bo *bo) +{ + if (bo) + drm_gem_object_get(&bo->ttm.base); + + return bo; +} + +static inline void xe_bo_put(struct xe_bo *bo) +{ + if (bo) + drm_gem_object_put(&bo->ttm.base); +} + +static inline void xe_bo_assert_held(struct xe_bo *bo) +{ + if (bo) + dma_resv_assert_held((bo)->ttm.base.resv); +} + +int xe_bo_lock(struct xe_bo *bo, struct ww_acquire_ctx *ww, + int num_resv, bool intr); + +void xe_bo_unlock(struct xe_bo *bo, struct ww_acquire_ctx *ww); + +static inline void xe_bo_unlock_vm_held(struct xe_bo *bo) +{ + if (bo) { + XE_BUG_ON(bo->vm && bo->ttm.base.resv != &bo->vm->resv); + if (bo->vm) + xe_vm_assert_held(bo->vm); + else + dma_resv_unlock(bo->ttm.base.resv); + } +} + +static inline void xe_bo_lock_no_vm(struct xe_bo *bo, + struct ww_acquire_ctx *ctx) +{ + if (bo) { + XE_BUG_ON(bo->vm || (bo->ttm.type != ttm_bo_type_sg && + bo->ttm.base.resv != &bo->ttm.base._resv)); + dma_resv_lock(bo->ttm.base.resv, ctx); + } +} + +static inline void xe_bo_unlock_no_vm(struct xe_bo *bo) +{ + if (bo) { + XE_BUG_ON(bo->vm || (bo->ttm.type != ttm_bo_type_sg && + bo->ttm.base.resv != &bo->ttm.base._resv)); + dma_resv_unlock(bo->ttm.base.resv); + } +} + +int xe_bo_pin_external(struct xe_bo *bo); +int xe_bo_pin(struct xe_bo *bo); +void xe_bo_unpin_external(struct xe_bo *bo); +void xe_bo_unpin(struct xe_bo *bo); +int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict); + +static inline bool xe_bo_is_pinned(struct xe_bo *bo) +{ + return bo->ttm.pin_count; +} + +static inline void xe_bo_unpin_map_no_vm(struct xe_bo *bo) +{ + if (likely(bo)) { + xe_bo_lock_no_vm(bo, NULL); + xe_bo_unpin(bo); + xe_bo_unlock_no_vm(bo); + + xe_bo_put(bo); + } +} + +bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo); +dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, + size_t page_size, bool *is_lmem); + +static inline dma_addr_t +xe_bo_main_addr(struct xe_bo *bo, size_t page_size) +{ + bool is_lmem; + + return xe_bo_addr(bo, 0, page_size, &is_lmem); +} + +static inline u32 +xe_bo_ggtt_addr(struct xe_bo *bo) +{ + XE_BUG_ON(bo->ggtt_node.size > bo->size); + XE_BUG_ON(bo->ggtt_node.start + bo->ggtt_node.size > (1ull << 32)); + return bo->ggtt_node.start; +} + +int xe_bo_vmap(struct xe_bo *bo); +void xe_bo_vunmap(struct xe_bo *bo); + +bool mem_type_is_vram(u32 mem_type); +bool xe_bo_is_vram(struct xe_bo *bo); + +bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type); + +int xe_bo_migrate(struct xe_bo *bo, u32 mem_type); +int xe_bo_evict(struct xe_bo *bo, bool force_alloc); + +extern struct ttm_device_funcs xe_ttm_funcs; + +int xe_gem_create_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); +int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); +int xe_bo_dumb_create(struct drm_file *file_priv, + struct drm_device *dev, + struct drm_mode_create_dumb *args); + +bool xe_bo_needs_ccs_pages(struct xe_bo *bo); + +static inline size_t xe_bo_ccs_pages_start(struct xe_bo *bo) +{ + return PAGE_ALIGN(bo->ttm.base.size); +} + +void __xe_bo_release_dummy(struct kref *kref); + +/** + * xe_bo_put_deferred() - Put a buffer object with delayed final freeing + * @bo: The bo to put. + * @deferred: List to which to add the buffer object if we cannot put, or + * NULL if the function is to put unconditionally. + * + * Since the final freeing of an object includes both sleeping and (!) + * memory allocation in the dma_resv individualization, it's not ok + * to put an object from atomic context nor from within a held lock + * tainted by reclaim. In such situations we want to defer the final + * freeing until we've exited the restricting context, or in the worst + * case to a workqueue. + * This function either puts the object if possible without the refcount + * reaching zero, or adds it to the @deferred list if that was not possible. + * The caller needs to follow up with a call to xe_bo_put_commit() to actually + * put the bo iff this function returns true. It's safe to always + * follow up with a call to xe_bo_put_commit(). + * TODO: It's TTM that is the villain here. Perhaps TTM should add an + * interface like this. + * + * Return: true if @bo was the first object put on the @freed list, + * false otherwise. + */ +static inline bool +xe_bo_put_deferred(struct xe_bo *bo, struct llist_head *deferred) +{ + if (!deferred) { + xe_bo_put(bo); + return false; + } + + if (!kref_put(&bo->ttm.base.refcount, __xe_bo_release_dummy)) + return false; + + return llist_add(&bo->freed, deferred); +} + +void xe_bo_put_commit(struct llist_head *deferred); + +struct sg_table *xe_bo_get_sg(struct xe_bo *bo); + +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) +/** + * xe_bo_is_mem_type - Whether the bo currently resides in the given + * TTM memory type + * @bo: The bo to check. + * @mem_type: The TTM memory type. + * + * Return: true iff the bo resides in @mem_type, false otherwise. + */ +static inline bool xe_bo_is_mem_type(struct xe_bo *bo, u32 mem_type) +{ + xe_bo_assert_held(bo); + return bo->ttm.resource->mem_type == mem_type; +} +#endif +#endif diff --git a/drivers/gpu/drm/xe/xe_bo_doc.h b/drivers/gpu/drm/xe/xe_bo_doc.h new file mode 100644 index 000000000000..f57d440cc95a --- /dev/null +++ b/drivers/gpu/drm/xe/xe_bo_doc.h @@ -0,0 +1,179 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_BO_DOC_H_ +#define _XE_BO_DOC_H_ + +/** + * DOC: Buffer Objects (BO) + * + * BO management + * ============= + * + * TTM manages (placement, eviction, etc...) all BOs in XE. + * + * BO creation + * =========== + * + * Create a chunk of memory which can be used by the GPU. Placement rules + * (sysmem or vram region) passed in upon creation. TTM handles placement of BO + * and can trigger eviction of other BOs to make space for the new BO. + * + * Kernel BOs + * ---------- + * + * A kernel BO is created as part of driver load (e.g. uC firmware images, GuC + * ADS, etc...) or a BO created as part of a user operation which requires + * a kernel BO (e.g. engine state, memory for page tables, etc...). These BOs + * are typically mapped in the GGTT (any kernel BOs aside memory for page tables + * are in the GGTT), are pinned (can't move or be evicted at runtime), have a + * vmap (XE can access the memory via xe_map layer) and have contiguous physical + * memory. + * + * More details of why kernel BOs are pinned and contiguous below. + * + * User BOs + * -------- + * + * A user BO is created via the DRM_IOCTL_XE_GEM_CREATE IOCTL. Once it is + * created the BO can be mmap'd (via DRM_IOCTL_XE_GEM_MMAP_OFFSET) for user + * access and it can be bound for GPU access (via DRM_IOCTL_XE_VM_BIND). All + * user BOs are evictable and user BOs are never pinned by XE. The allocation of + * the backing store can be defered from creation time until first use which is + * either mmap, bind, or pagefault. + * + * Private BOs + * ~~~~~~~~~~~ + * + * A private BO is a user BO created with a valid VM argument passed into the + * create IOCTL. If a BO is private it cannot be exported via prime FD and + * mappings can only be created for the BO within the VM it is tied to. Lastly, + * the BO dma-resv slots / lock point to the VM's dma-resv slots / lock (all + * private BOs to a VM share common dma-resv slots / lock). + * + * External BOs + * ~~~~~~~~~~~~ + * + * An external BO is a user BO created with a NULL VM argument passed into the + * create IOCTL. An external BO can be shared with different UMDs / devices via + * prime FD and the BO can be mapped into multiple VMs. An external BO has its + * own unique dma-resv slots / lock. An external BO will be in an array of all + * VMs which has a mapping of the BO. This allows VMs to lookup and lock all + * external BOs mapped in the VM as needed. + * + * BO placement + * ~~~~~~~~~~~~ + * + * When a user BO is created, a mask of valid placements is passed indicating + * which memory regions are considered valid. + * + * The memory region information is available via query uAPI (TODO: add link). + * + * BO validation + * ============= + * + * BO validation (ttm_bo_validate) refers to ensuring a BO has a valid + * placement. If a BO was swapped to temporary storage, a validation call will + * trigger a move back to a valid (location where GPU can access BO) placement. + * Validation of a BO may evict other BOs to make room for the BO being + * validated. + * + * BO eviction / moving + * ==================== + * + * All eviction (or in other words, moving a BO from one memory location to + * another) is routed through TTM with a callback into XE. + * + * Runtime eviction + * ---------------- + * + * Runtime evictions refers to during normal operations where TTM decides it + * needs to move a BO. Typically this is because TTM needs to make room for + * another BO and the evicted BO is first BO on LRU list that is not locked. + * + * An example of this is a new BO which can only be placed in VRAM but there is + * not space in VRAM. There could be multiple BOs which have sysmem and VRAM + * placement rules which currently reside in VRAM, TTM trigger a will move of + * one (or multiple) of these BO(s) until there is room in VRAM to place the new + * BO. The evicted BO(s) are valid but still need new bindings before the BO + * used again (exec or compute mode rebind worker). + * + * Another example would be, TTM can't find a BO to evict which has another + * valid placement. In this case TTM will evict one (or multiple) unlocked BO(s) + * to a temporary unreachable (invalid) placement. The evicted BO(s) are invalid + * and before next use need to be moved to a valid placement and rebound. + * + * In both cases, moves of these BOs are scheduled behind the fences in the BO's + * dma-resv slots. + * + * WW locking tries to ensures if 2 VMs use 51% of the memory forward progress + * is made on both VMs. + * + * Runtime eviction uses per a GT migration engine (TODO: link to migration + * engine doc) to do a GPU memcpy from one location to another. + * + * Rebinds after runtime eviction + * ------------------------------ + * + * When BOs are moved, every mapping (VMA) of the BO needs to rebound before + * the BO is used again. Every VMA is added to an evicted list of its VM when + * the BO is moved. This is safe because of the VM locking structure (TODO: link + * to VM locking doc). On the next use of a VM (exec or compute mode rebind + * worker) the evicted VMA list is checked and rebinds are triggered. In the + * case of faulting VM, the rebind is done in the page fault handler. + * + * Suspend / resume eviction of VRAM + * --------------------------------- + * + * During device suspend / resume VRAM may lose power which means the contents + * of VRAM's memory is blown away. Thus BOs present in VRAM at the time of + * suspend must be moved to sysmem in order for their contents to be saved. + * + * A simple TTM call (ttm_resource_manager_evict_all) can move all non-pinned + * (user) BOs to sysmem. External BOs that are pinned need to be manually + * evicted with a simple loop + xe_bo_evict call. It gets a little trickier + * with kernel BOs. + * + * Some kernel BOs are used by the GT migration engine to do moves, thus we + * can't move all of the BOs via the GT migration engine. For simplity, use a + * TTM memcpy (CPU) to move any kernel (pinned) BO on either suspend or resume. + * + * Some kernel BOs need to be restored to the exact same physical location. TTM + * makes this rather easy but the caveat is the memory must be contiguous. Again + * for simplity, we enforce that all kernel (pinned) BOs are contiguous and + * restored to the same physical location. + * + * Pinned external BOs in VRAM are restored on resume via the GPU. + * + * Rebinds after suspend / resume + * ------------------------------ + * + * Most kernel BOs have GGTT mappings which must be restored during the resume + * process. All user BOs are rebound after validation on their next use. + * + * Future work + * =========== + * + * Trim the list of BOs which is saved / restored via TTM memcpy on suspend / + * resume. All we really need to save / restore via TTM memcpy is the memory + * required for the GuC to load and the memory for the GT migrate engine to + * operate. + * + * Do not require kernel BOs to be contiguous in physical memory / restored to + * the same physical address on resume. In all likelihood the only memory that + * needs to be restored to the same physical address is memory used for page + * tables. All of that memory is allocated 1 page at time so the contiguous + * requirement isn't needed. Some work on the vmap code would need to be done if + * kernel BOs are not contiguous too. + * + * Make some kernel BO evictable rather than pinned. An example of this would be + * engine state, in all likelihood if the dma-slots of these BOs where properly + * used rather than pinning we could safely evict + rebind these BOs as needed. + * + * Some kernel BOs do not need to be restored on resume (e.g. GuC ADS as that is + * repopulated on resume), add flag to mark such objects as no save / restore. + */ + +#endif diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c new file mode 100644 index 000000000000..7046dc203138 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_bo_evict.c @@ -0,0 +1,225 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_bo.h" +#include "xe_bo_evict.h" +#include "xe_device.h" +#include "xe_ggtt.h" +#include "xe_gt.h" + +/** + * xe_bo_evict_all - evict all BOs from VRAM + * + * @xe: xe device + * + * Evict non-pinned user BOs first (via GPU), evict pinned external BOs next + * (via GPU), wait for evictions, and finally evict pinned kernel BOs via CPU. + * All eviction magic done via TTM calls. + * + * Evict == move VRAM BOs to temporary (typically system) memory. + * + * This function should be called before the device goes into a suspend state + * where the VRAM loses power. + */ +int xe_bo_evict_all(struct xe_device *xe) +{ + struct ttm_device *bdev = &xe->ttm; + struct ww_acquire_ctx ww; + struct xe_bo *bo; + struct xe_gt *gt; + struct list_head still_in_list; + u32 mem_type; + u8 id; + int ret; + + if (!IS_DGFX(xe)) + return 0; + + /* User memory */ + for (mem_type = XE_PL_VRAM0; mem_type <= XE_PL_VRAM1; ++mem_type) { + struct ttm_resource_manager *man = + ttm_manager_type(bdev, mem_type); + + if (man) { + ret = ttm_resource_manager_evict_all(bdev, man); + if (ret) + return ret; + } + } + + /* Pinned user memory in VRAM */ + INIT_LIST_HEAD(&still_in_list); + spin_lock(&xe->pinned.lock); + for (;;) { + bo = list_first_entry_or_null(&xe->pinned.external_vram, + typeof(*bo), pinned_link); + if (!bo) + break; + xe_bo_get(bo); + list_move_tail(&bo->pinned_link, &still_in_list); + spin_unlock(&xe->pinned.lock); + + xe_bo_lock(bo, &ww, 0, false); + ret = xe_bo_evict(bo, true); + xe_bo_unlock(bo, &ww); + xe_bo_put(bo); + if (ret) { + spin_lock(&xe->pinned.lock); + list_splice_tail(&still_in_list, + &xe->pinned.external_vram); + spin_unlock(&xe->pinned.lock); + return ret; + } + + spin_lock(&xe->pinned.lock); + } + list_splice_tail(&still_in_list, &xe->pinned.external_vram); + spin_unlock(&xe->pinned.lock); + + /* + * Wait for all user BO to be evicted as those evictions depend on the + * memory moved below. + */ + for_each_gt(gt, xe, id) + xe_gt_migrate_wait(gt); + + spin_lock(&xe->pinned.lock); + for (;;) { + bo = list_first_entry_or_null(&xe->pinned.kernel_bo_present, + typeof(*bo), pinned_link); + if (!bo) + break; + xe_bo_get(bo); + list_move_tail(&bo->pinned_link, &xe->pinned.evicted); + spin_unlock(&xe->pinned.lock); + + xe_bo_lock(bo, &ww, 0, false); + ret = xe_bo_evict(bo, true); + xe_bo_unlock(bo, &ww); + xe_bo_put(bo); + if (ret) + return ret; + + spin_lock(&xe->pinned.lock); + } + spin_unlock(&xe->pinned.lock); + + return 0; +} + +/** + * xe_bo_restore_kernel - restore kernel BOs to VRAM + * + * @xe: xe device + * + * Move kernel BOs from temporary (typically system) memory to VRAM via CPU. All + * moves done via TTM calls. + * + * This function should be called early, before trying to init the GT, on device + * resume. + */ +int xe_bo_restore_kernel(struct xe_device *xe) +{ + struct ww_acquire_ctx ww; + struct xe_bo *bo; + int ret; + + if (!IS_DGFX(xe)) + return 0; + + spin_lock(&xe->pinned.lock); + for (;;) { + bo = list_first_entry_or_null(&xe->pinned.evicted, + typeof(*bo), pinned_link); + if (!bo) + break; + xe_bo_get(bo); + list_move_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present); + spin_unlock(&xe->pinned.lock); + + xe_bo_lock(bo, &ww, 0, false); + ret = xe_bo_validate(bo, NULL, false); + xe_bo_unlock(bo, &ww); + if (ret) { + xe_bo_put(bo); + return ret; + } + + if (bo->flags & XE_BO_CREATE_GGTT_BIT) + xe_ggtt_map_bo(bo->gt->mem.ggtt, bo); + + /* + * We expect validate to trigger a move VRAM and our move code + * should setup the iosys map. + */ + XE_BUG_ON(iosys_map_is_null(&bo->vmap)); + XE_BUG_ON(!xe_bo_is_vram(bo)); + + xe_bo_put(bo); + + spin_lock(&xe->pinned.lock); + } + spin_unlock(&xe->pinned.lock); + + return 0; +} + +/** + * xe_bo_restore_user - restore pinned user BOs to VRAM + * + * @xe: xe device + * + * Move pinned user BOs from temporary (typically system) memory to VRAM via + * CPU. All moves done via TTM calls. + * + * This function should be called late, after GT init, on device resume. + */ +int xe_bo_restore_user(struct xe_device *xe) +{ + struct ww_acquire_ctx ww; + struct xe_bo *bo; + struct xe_gt *gt; + struct list_head still_in_list; + u8 id; + int ret; + + if (!IS_DGFX(xe)) + return 0; + + /* Pinned user memory in VRAM should be validated on resume */ + INIT_LIST_HEAD(&still_in_list); + spin_lock(&xe->pinned.lock); + for (;;) { + bo = list_first_entry_or_null(&xe->pinned.external_vram, + typeof(*bo), pinned_link); + if (!bo) + break; + list_move_tail(&bo->pinned_link, &still_in_list); + xe_bo_get(bo); + spin_unlock(&xe->pinned.lock); + + xe_bo_lock(bo, &ww, 0, false); + ret = xe_bo_validate(bo, NULL, false); + xe_bo_unlock(bo, &ww); + xe_bo_put(bo); + if (ret) { + spin_lock(&xe->pinned.lock); + list_splice_tail(&still_in_list, + &xe->pinned.external_vram); + spin_unlock(&xe->pinned.lock); + return ret; + } + + spin_lock(&xe->pinned.lock); + } + list_splice_tail(&still_in_list, &xe->pinned.external_vram); + spin_unlock(&xe->pinned.lock); + + /* Wait for validate to complete */ + for_each_gt(gt, xe, id) + xe_gt_migrate_wait(gt); + + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_bo_evict.h b/drivers/gpu/drm/xe/xe_bo_evict.h new file mode 100644 index 000000000000..746894798852 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_bo_evict.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_BO_EVICT_H_ +#define _XE_BO_EVICT_H_ + +struct xe_device; + +int xe_bo_evict_all(struct xe_device *xe); +int xe_bo_restore_kernel(struct xe_device *xe); +int xe_bo_restore_user(struct xe_device *xe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h new file mode 100644 index 000000000000..06de3330211d --- /dev/null +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_BO_TYPES_H_ +#define _XE_BO_TYPES_H_ + +#include + +#include +#include +#include +#include +#include + +struct xe_device; +struct xe_vm; + +#define XE_BO_MAX_PLACEMENTS 3 + +/** @xe_bo: XE buffer object */ +struct xe_bo { + /** @ttm: TTM base buffer object */ + struct ttm_buffer_object ttm; + /** @size: Size of this buffer object */ + size_t size; + /** @flags: flags for this buffer object */ + u32 flags; + /** @vm: VM this BO is attached to, for extobj this will be NULL */ + struct xe_vm *vm; + /** @gt: GT this BO is attached to (kernel BO only) */ + struct xe_gt *gt; + /** @vmas: List of VMAs for this BO */ + struct list_head vmas; + /** @placements: valid placements for this BO */ + struct ttm_place placements[XE_BO_MAX_PLACEMENTS]; + /** @placement: current placement for this BO */ + struct ttm_placement placement; + /** @ggtt_node: GGTT node if this BO is mapped in the GGTT */ + struct drm_mm_node ggtt_node; + /** @vmap: iosys map of this buffer */ + struct iosys_map vmap; + /** @ttm_kmap: TTM bo kmap object for internal use only. Keep off. */ + struct ttm_bo_kmap_obj kmap; + /** @pinned_link: link to present / evicted list of pinned BO */ + struct list_head pinned_link; + /** @props: BO user controlled properties */ + struct { + /** @preferred_mem: preferred memory class for this BO */ + s16 preferred_mem_class; + /** @prefered_gt: preferred GT for this BO */ + s16 preferred_gt; + /** @preferred_mem_type: preferred memory type */ + s32 preferred_mem_type; + /** + * @cpu_atomic: the CPU expects to do atomics operations to + * this BO + */ + bool cpu_atomic; + /** + * @device_atomic: the device expects to do atomics operations + * to this BO + */ + bool device_atomic; + } props; + /** @freed: List node for delayed put. */ + struct llist_node freed; + /** @created: Whether the bo has passed initial creation */ + bool created; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c new file mode 100644 index 000000000000..84db7b3f501e --- /dev/null +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -0,0 +1,129 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include + +#include + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_debugfs.h" +#include "xe_gt_debugfs.h" +#include "xe_step.h" + +#ifdef CONFIG_DRM_XE_DEBUG +#include "xe_bo_evict.h" +#include "xe_migrate.h" +#include "xe_vm.h" +#endif + +static struct xe_device *node_to_xe(struct drm_info_node *node) +{ + return to_xe_device(node->minor->dev); +} + +static int info(struct seq_file *m, void *data) +{ + struct xe_device *xe = node_to_xe(m->private); + struct drm_printer p = drm_seq_file_printer(m); + struct xe_gt *gt; + u8 id; + + drm_printf(&p, "graphics_verx100 %d\n", xe->info.graphics_verx100); + drm_printf(&p, "media_verx100 %d\n", xe->info.media_verx100); + drm_printf(&p, "stepping G:%s M:%s D:%s B:%s\n", + xe_step_name(xe->info.step.graphics), + xe_step_name(xe->info.step.media), + xe_step_name(xe->info.step.display), + xe_step_name(xe->info.step.basedie)); + drm_printf(&p, "is_dgfx %s\n", str_yes_no(xe->info.is_dgfx)); + drm_printf(&p, "platform %d\n", xe->info.platform); + drm_printf(&p, "subplatform %d\n", + xe->info.subplatform > XE_SUBPLATFORM_NONE ? xe->info.subplatform : 0); + drm_printf(&p, "devid 0x%x\n", xe->info.devid); + drm_printf(&p, "revid %d\n", xe->info.revid); + drm_printf(&p, "tile_count %d\n", xe->info.tile_count); + drm_printf(&p, "vm_max_level %d\n", xe->info.vm_max_level); + drm_printf(&p, "enable_guc %s\n", str_yes_no(xe->info.enable_guc)); + drm_printf(&p, "supports_usm %s\n", str_yes_no(xe->info.supports_usm)); + drm_printf(&p, "has_flat_ccs %s\n", str_yes_no(xe->info.has_flat_ccs)); + for_each_gt(gt, xe, id) { + drm_printf(&p, "gt%d force wake %d\n", id, + xe_force_wake_ref(gt_to_fw(gt), XE_FW_GT)); + drm_printf(&p, "gt%d engine_mask 0x%llx\n", id, + gt->info.engine_mask); + } + + return 0; +} + +static const struct drm_info_list debugfs_list[] = { + {"info", info, 0}, +}; + +static int forcewake_open(struct inode *inode, struct file *file) +{ + struct xe_device *xe = inode->i_private; + struct xe_gt *gt; + u8 id; + + for_each_gt(gt, xe, id) + XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + + return 0; +} + +static int forcewake_release(struct inode *inode, struct file *file) +{ + struct xe_device *xe = inode->i_private; + struct xe_gt *gt; + u8 id; + + for_each_gt(gt, xe, id) + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + + return 0; +} + +static const struct file_operations forcewake_all_fops = { + .owner = THIS_MODULE, + .open = forcewake_open, + .release = forcewake_release, +}; + +void xe_debugfs_register(struct xe_device *xe) +{ + struct ttm_device *bdev = &xe->ttm; + struct drm_minor *minor = xe->drm.primary; + struct dentry *root = minor->debugfs_root; + struct ttm_resource_manager *man; + struct xe_gt *gt; + u32 mem_type; + u8 id; + + drm_debugfs_create_files(debugfs_list, + ARRAY_SIZE(debugfs_list), + root, minor); + + debugfs_create_file("forcewake_all", 0400, root, xe, + &forcewake_all_fops); + + for (mem_type = XE_PL_VRAM0; mem_type <= XE_PL_VRAM1; ++mem_type) { + man = ttm_manager_type(bdev, mem_type); + + if (man) { + char name[16]; + + sprintf(name, "vram%d_mm", mem_type - XE_PL_VRAM0); + ttm_resource_manager_create_debugfs(man, root, name); + } + } + + man = ttm_manager_type(bdev, XE_PL_TT); + ttm_resource_manager_create_debugfs(man, root, "gtt_mm"); + + for_each_gt(gt, xe, id) + xe_gt_debugfs_register(gt); +} diff --git a/drivers/gpu/drm/xe/xe_debugfs.h b/drivers/gpu/drm/xe/xe_debugfs.h new file mode 100644 index 000000000000..715b8e2e0bd9 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_debugfs.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_DEBUGFS_H_ +#define _XE_DEBUGFS_H_ + +struct xe_device; + +void xe_debugfs_register(struct xe_device *xe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c new file mode 100644 index 000000000000..93dea2b9c464 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_device.c @@ -0,0 +1,359 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "xe_device.h" + +#include +#include +#include +#include +#include +#include + +#include "xe_bo.h" +#include "xe_debugfs.h" +#include "xe_dma_buf.h" +#include "xe_drv.h" +#include "xe_engine.h" +#include "xe_exec.h" +#include "xe_gt.h" +#include "xe_irq.h" +#include "xe_module.h" +#include "xe_mmio.h" +#include "xe_pcode.h" +#include "xe_pm.h" +#include "xe_query.h" +#include "xe_vm.h" +#include "xe_vm_madvise.h" +#include "xe_wait_user_fence.h" + +static int xe_file_open(struct drm_device *dev, struct drm_file *file) +{ + struct xe_file *xef; + + xef = kzalloc(sizeof(*xef), GFP_KERNEL); + if (!xef) + return -ENOMEM; + + xef->drm = file; + + mutex_init(&xef->vm.lock); + xa_init_flags(&xef->vm.xa, XA_FLAGS_ALLOC1); + + mutex_init(&xef->engine.lock); + xa_init_flags(&xef->engine.xa, XA_FLAGS_ALLOC1); + + file->driver_priv = xef; + return 0; +} + +static void device_kill_persitent_engines(struct xe_device *xe, + struct xe_file *xef); + +static void xe_file_close(struct drm_device *dev, struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_file *xef = file->driver_priv; + struct xe_vm *vm; + struct xe_engine *e; + unsigned long idx; + + mutex_lock(&xef->engine.lock); + xa_for_each(&xef->engine.xa, idx, e) { + xe_engine_kill(e); + xe_engine_put(e); + } + mutex_unlock(&xef->engine.lock); + mutex_destroy(&xef->engine.lock); + device_kill_persitent_engines(xe, xef); + + mutex_lock(&xef->vm.lock); + xa_for_each(&xef->vm.xa, idx, vm) + xe_vm_close_and_put(vm); + mutex_unlock(&xef->vm.lock); + mutex_destroy(&xef->vm.lock); + + kfree(xef); +} + +static const struct drm_ioctl_desc xe_ioctls[] = { + DRM_IOCTL_DEF_DRV(XE_DEVICE_QUERY, xe_query_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_GEM_CREATE, xe_gem_create_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_GEM_MMAP_OFFSET, xe_gem_mmap_offset_ioctl, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_VM_CREATE, xe_vm_create_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_VM_DESTROY, xe_vm_destroy_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_VM_BIND, xe_vm_bind_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_ENGINE_CREATE, xe_engine_create_ioctl, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_ENGINE_DESTROY, xe_engine_destroy_ioctl, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_EXEC, xe_exec_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_MMIO, xe_mmio_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_ENGINE_SET_PROPERTY, xe_engine_set_property_ioctl, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_VM_MADVISE, xe_vm_madvise_ioctl, DRM_RENDER_ALLOW), +}; + +static const struct file_operations xe_driver_fops = { + .owner = THIS_MODULE, + .open = drm_open, + .release = drm_release_noglobal, + .unlocked_ioctl = drm_ioctl, + .mmap = drm_gem_mmap, + .poll = drm_poll, + .read = drm_read, +// .compat_ioctl = i915_ioc32_compat_ioctl, + .llseek = noop_llseek, +}; + +static void xe_driver_release(struct drm_device *dev) +{ + struct xe_device *xe = to_xe_device(dev); + + pci_set_drvdata(to_pci_dev(xe->drm.dev), NULL); +} + +static struct drm_driver driver = { + /* Don't use MTRRs here; the Xserver or userspace app should + * deal with them for Intel hardware. + */ + .driver_features = + DRIVER_GEM | + DRIVER_RENDER | DRIVER_SYNCOBJ | + DRIVER_SYNCOBJ_TIMELINE, + .open = xe_file_open, + .postclose = xe_file_close, + + .gem_prime_import = xe_gem_prime_import, + + .dumb_create = xe_bo_dumb_create, + .dumb_map_offset = drm_gem_ttm_dumb_map_offset, + .release = &xe_driver_release, + + .ioctls = xe_ioctls, + .num_ioctls = ARRAY_SIZE(xe_ioctls), + .fops = &xe_driver_fops, + .name = DRIVER_NAME, + .desc = DRIVER_DESC, + .date = DRIVER_DATE, + .major = DRIVER_MAJOR, + .minor = DRIVER_MINOR, + .patchlevel = DRIVER_PATCHLEVEL, +}; + +static void xe_device_destroy(struct drm_device *dev, void *dummy) +{ + struct xe_device *xe = to_xe_device(dev); + + destroy_workqueue(xe->ordered_wq); + mutex_destroy(&xe->persitent_engines.lock); + ttm_device_fini(&xe->ttm); +} + +struct xe_device *xe_device_create(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct xe_device *xe; + int err; + + err = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &driver); + if (err) + return ERR_PTR(err); + + xe = devm_drm_dev_alloc(&pdev->dev, &driver, struct xe_device, drm); + if (IS_ERR(xe)) + return xe; + + err = ttm_device_init(&xe->ttm, &xe_ttm_funcs, xe->drm.dev, + xe->drm.anon_inode->i_mapping, + xe->drm.vma_offset_manager, false, false); + if (WARN_ON(err)) + goto err_put; + + xe->info.devid = pdev->device; + xe->info.revid = pdev->revision; + xe->info.enable_guc = enable_guc; + + spin_lock_init(&xe->irq.lock); + + init_waitqueue_head(&xe->ufence_wq); + + mutex_init(&xe->usm.lock); + xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC1); + + mutex_init(&xe->persitent_engines.lock); + INIT_LIST_HEAD(&xe->persitent_engines.list); + + spin_lock_init(&xe->pinned.lock); + INIT_LIST_HEAD(&xe->pinned.kernel_bo_present); + INIT_LIST_HEAD(&xe->pinned.external_vram); + INIT_LIST_HEAD(&xe->pinned.evicted); + + xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0); + + mutex_init(&xe->sb_lock); + xe->enabled_irq_mask = ~0; + + err = drmm_add_action_or_reset(&xe->drm, xe_device_destroy, NULL); + if (err) + goto err_put; + + mutex_init(&xe->mem_access.lock); + return xe; + +err_put: + drm_dev_put(&xe->drm); + + return ERR_PTR(err); +} + +int xe_device_probe(struct xe_device *xe) +{ + struct xe_gt *gt; + int err; + u8 id; + + xe->info.mem_region_mask = 1; + + for_each_gt(gt, xe, id) { + err = xe_gt_alloc(xe, gt); + if (err) + return err; + } + + err = xe_mmio_init(xe); + if (err) + return err; + + for_each_gt(gt, xe, id) { + err = xe_pcode_probe(gt); + if (err) + return err; + } + + err = xe_irq_install(xe); + if (err) + return err; + + for_each_gt(gt, xe, id) { + err = xe_gt_init_early(gt); + if (err) + goto err_irq_shutdown; + } + + err = xe_mmio_probe_vram(xe); + if (err) + goto err_irq_shutdown; + + for_each_gt(gt, xe, id) { + err = xe_gt_init_noalloc(gt); + if (err) + goto err_irq_shutdown; + } + + for_each_gt(gt, xe, id) { + err = xe_gt_init(gt); + if (err) + goto err_irq_shutdown; + } + + err = drm_dev_register(&xe->drm, 0); + if (err) + goto err_irq_shutdown; + + xe_debugfs_register(xe); + + return 0; + +err_irq_shutdown: + xe_irq_shutdown(xe); + return err; +} + +void xe_device_remove(struct xe_device *xe) +{ + xe_irq_shutdown(xe); +} + +void xe_device_shutdown(struct xe_device *xe) +{ +} + +void xe_device_add_persitent_engines(struct xe_device *xe, struct xe_engine *e) +{ + mutex_lock(&xe->persitent_engines.lock); + list_add_tail(&e->persitent.link, &xe->persitent_engines.list); + mutex_unlock(&xe->persitent_engines.lock); +} + +void xe_device_remove_persitent_engines(struct xe_device *xe, + struct xe_engine *e) +{ + mutex_lock(&xe->persitent_engines.lock); + if (!list_empty(&e->persitent.link)) + list_del(&e->persitent.link); + mutex_unlock(&xe->persitent_engines.lock); +} + +static void device_kill_persitent_engines(struct xe_device *xe, + struct xe_file *xef) +{ + struct xe_engine *e, *next; + + mutex_lock(&xe->persitent_engines.lock); + list_for_each_entry_safe(e, next, &xe->persitent_engines.list, + persitent.link) + if (e->persitent.xef == xef) { + xe_engine_kill(e); + list_del_init(&e->persitent.link); + } + mutex_unlock(&xe->persitent_engines.lock); +} + +#define SOFTWARE_FLAGS_SPR33 _MMIO(0x4F084) + +void xe_device_wmb(struct xe_device *xe) +{ + struct xe_gt *gt = xe_device_get_gt(xe, 0); + + wmb(); + if (IS_DGFX(xe)) + xe_mmio_write32(gt, SOFTWARE_FLAGS_SPR33.reg, 0); +} + +u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size) +{ + return xe_device_has_flat_ccs(xe) ? + DIV_ROUND_UP(size, NUM_BYTES_PER_CCS_BYTE) : 0; +} + +void xe_device_mem_access_get(struct xe_device *xe) +{ + bool resumed = xe_pm_runtime_resume_if_suspended(xe); + + mutex_lock(&xe->mem_access.lock); + if (xe->mem_access.ref++ == 0) + xe->mem_access.hold_rpm = xe_pm_runtime_get_if_active(xe); + mutex_unlock(&xe->mem_access.lock); + + /* The usage counter increased if device was immediately resumed */ + if (resumed) + xe_pm_runtime_put(xe); + + XE_WARN_ON(xe->mem_access.ref == U32_MAX); +} + +void xe_device_mem_access_put(struct xe_device *xe) +{ + mutex_lock(&xe->mem_access.lock); + if (--xe->mem_access.ref == 0 && xe->mem_access.hold_rpm) + xe_pm_runtime_put(xe); + mutex_unlock(&xe->mem_access.lock); + + XE_WARN_ON(xe->mem_access.ref < 0); +} diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h new file mode 100644 index 000000000000..88d55671b068 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_device.h @@ -0,0 +1,126 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _XE_DEVICE_H_ +#define _XE_DEVICE_H_ + +struct xe_engine; +struct xe_file; + +#include + +#include "xe_device_types.h" +#include "xe_macros.h" +#include "xe_force_wake.h" + +#include "gt/intel_gpu_commands.h" + +static inline struct xe_device *to_xe_device(const struct drm_device *dev) +{ + return container_of(dev, struct xe_device, drm); +} + +static inline struct xe_device *pdev_to_xe_device(struct pci_dev *pdev) +{ + return pci_get_drvdata(pdev); +} + +static inline struct xe_device *ttm_to_xe_device(struct ttm_device *ttm) +{ + return container_of(ttm, struct xe_device, ttm); +} + +struct xe_device *xe_device_create(struct pci_dev *pdev, + const struct pci_device_id *ent); +int xe_device_probe(struct xe_device *xe); +void xe_device_remove(struct xe_device *xe); +void xe_device_shutdown(struct xe_device *xe); + +void xe_device_add_persitent_engines(struct xe_device *xe, struct xe_engine *e); +void xe_device_remove_persitent_engines(struct xe_device *xe, + struct xe_engine *e); + +void xe_device_wmb(struct xe_device *xe); + +static inline struct xe_file *to_xe_file(const struct drm_file *file) +{ + return file->driver_priv; +} + +static inline struct xe_gt *xe_device_get_gt(struct xe_device *xe, u8 gt_id) +{ + struct xe_gt *gt; + + XE_BUG_ON(gt_id > XE_MAX_GT); + gt = xe->gt + gt_id; + XE_BUG_ON(gt->info.id != gt_id); + XE_BUG_ON(gt->info.type == XE_GT_TYPE_UNINITIALIZED); + + return gt; +} + +/* + * FIXME: Placeholder until multi-gt lands. Once that lands, kill this function. + */ +static inline struct xe_gt *to_gt(struct xe_device *xe) +{ + return xe->gt; +} + +static inline bool xe_device_guc_submission_enabled(struct xe_device *xe) +{ + return xe->info.enable_guc; +} + +static inline void xe_device_guc_submission_disable(struct xe_device *xe) +{ + xe->info.enable_guc = false; +} + +#define for_each_gt(gt__, xe__, id__) \ + for ((id__) = 0; (id__) < (xe__)->info.tile_count; (id__++)) \ + for_each_if ((gt__) = xe_device_get_gt((xe__), (id__))) + +static inline struct xe_force_wake * gt_to_fw(struct xe_gt *gt) +{ + return >->mmio.fw; +} + +void xe_device_mem_access_get(struct xe_device *xe); +void xe_device_mem_access_put(struct xe_device *xe); + +static inline void xe_device_assert_mem_access(struct xe_device *xe) +{ + XE_WARN_ON(!xe->mem_access.ref); +} + +static inline bool xe_device_mem_access_ongoing(struct xe_device *xe) +{ + bool ret; + + mutex_lock(&xe->mem_access.lock); + ret = xe->mem_access.ref; + mutex_unlock(&xe->mem_access.lock); + + return ret; +} + +static inline bool xe_device_in_fault_mode(struct xe_device *xe) +{ + return xe->usm.num_vm_in_fault_mode != 0; +} + +static inline bool xe_device_in_non_fault_mode(struct xe_device *xe) +{ + return xe->usm.num_vm_in_non_fault_mode != 0; +} + +static inline bool xe_device_has_flat_ccs(struct xe_device *xe) +{ + return xe->info.has_flat_ccs; +} + +u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size); +#endif diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h new file mode 100644 index 000000000000..d62ee85bfcbe --- /dev/null +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -0,0 +1,214 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_DEVICE_TYPES_H_ +#define _XE_DEVICE_TYPES_H_ + +#include + +#include +#include +#include + +#include "xe_gt_types.h" +#include "xe_platform_types.h" +#include "xe_step_types.h" + +#define XE_BO_INVALID_OFFSET LONG_MAX + +#define GRAPHICS_VER(xe) ((xe)->info.graphics_verx100 / 100) +#define MEDIA_VER(xe) ((xe)->info.media_verx100 / 100) +#define GRAPHICS_VERx100(xe) ((xe)->info.graphics_verx100) +#define MEDIA_VERx100(xe) ((xe)->info.media_verx100) +#define IS_DGFX(xe) ((xe)->info.is_dgfx) + +#define XE_VRAM_FLAGS_NEED64K BIT(0) + +#define XE_GT0 0 +#define XE_GT1 1 +#define XE_MAX_GT (XE_GT1 + 1) + +#define XE_MAX_ASID (BIT(20)) + +#define IS_PLATFORM_STEP(_xe, _platform, min_step, max_step) \ + ((_xe)->info.platform == (_platform) && \ + (_xe)->info.step.graphics >= (min_step) && \ + (_xe)->info.step.graphics < (max_step)) +#define IS_SUBPLATFORM_STEP(_xe, _platform, sub, min_step, max_step) \ + ((_xe)->info.platform == (_platform) && \ + (_xe)->info.subplatform == (sub) && \ + (_xe)->info.step.graphics >= (min_step) && \ + (_xe)->info.step.graphics < (max_step)) + +/** + * struct xe_device - Top level struct of XE device + */ +struct xe_device { + /** @drm: drm device */ + struct drm_device drm; + + /** @info: device info */ + struct intel_device_info { + /** @graphics_verx100: graphics IP version */ + u32 graphics_verx100; + /** @media_verx100: media IP version */ + u32 media_verx100; + /** @mem_region_mask: mask of valid memory regions */ + u32 mem_region_mask; + /** @is_dgfx: is discrete device */ + bool is_dgfx; + /** @platform: XE platform enum */ + enum xe_platform platform; + /** @subplatform: XE subplatform enum */ + enum xe_subplatform subplatform; + /** @devid: device ID */ + u16 devid; + /** @revid: device revision */ + u8 revid; + /** @step: stepping information for each IP */ + struct xe_step_info step; + /** @dma_mask_size: DMA address bits */ + u8 dma_mask_size; + /** @vram_flags: Vram flags */ + u8 vram_flags; + /** @tile_count: Number of tiles */ + u8 tile_count; + /** @vm_max_level: Max VM level */ + u8 vm_max_level; + /** @media_ver: Media version */ + u8 media_ver; + /** @supports_usm: Supports unified shared memory */ + bool supports_usm; + /** @enable_guc: GuC submission enabled */ + bool enable_guc; + /** @has_flat_ccs: Whether flat CCS metadata is used */ + bool has_flat_ccs; + /** @has_4tile: Whether tile-4 tiling is supported */ + bool has_4tile; + } info; + + /** @irq: device interrupt state */ + struct { + /** @lock: lock for processing irq's on this device */ + spinlock_t lock; + + /** @enabled: interrupts enabled on this device */ + bool enabled; + } irq; + + /** @ttm: ttm device */ + struct ttm_device ttm; + + /** @mmio: mmio info for device */ + struct { + /** @size: size of MMIO space for device */ + size_t size; + /** @regs: pointer to MMIO space for device */ + void *regs; + } mmio; + + /** @mem: memory info for device */ + struct { + /** @vram: VRAM info for device */ + struct { + /** @io_start: start address of VRAM */ + resource_size_t io_start; + /** @size: size of VRAM */ + resource_size_t size; + /** @mapping: pointer to VRAM mappable space */ + void *__iomem mapping; + } vram; + } mem; + + /** @usm: unified memory state */ + struct { + /** @asid: convert a ASID to VM */ + struct xarray asid_to_vm; + /** @next_asid: next ASID, used to cyclical alloc asids */ + u32 next_asid; + /** @num_vm_in_fault_mode: number of VM in fault mode */ + u32 num_vm_in_fault_mode; + /** @num_vm_in_non_fault_mode: number of VM in non-fault mode */ + u32 num_vm_in_non_fault_mode; + /** @lock: protects UM state */ + struct mutex lock; + } usm; + + /** @persitent_engines: engines that are closed but still running */ + struct { + /** @lock: protects persitent engines */ + struct mutex lock; + /** @list: list of persitent engines */ + struct list_head list; + } persitent_engines; + + /** @pinned: pinned BO state */ + struct { + /** @lock: protected pinned BO list state */ + spinlock_t lock; + /** @evicted: pinned kernel BO that are present */ + struct list_head kernel_bo_present; + /** @evicted: pinned BO that have been evicted */ + struct list_head evicted; + /** @external_vram: pinned external BO in vram*/ + struct list_head external_vram; + } pinned; + + /** @ufence_wq: user fence wait queue */ + wait_queue_head_t ufence_wq; + + /** @ordered_wq: used to serialize compute mode resume */ + struct workqueue_struct *ordered_wq; + + /** @gt: graphics tile */ + struct xe_gt gt[XE_MAX_GT]; + + /** + * @mem_access: keep track of memory access in the device, possibly + * triggering additional actions when they occur. + */ + struct { + /** @lock: protect the ref count */ + struct mutex lock; + /** @ref: ref count of memory accesses */ + u32 ref; + /** @hold_rpm: need to put rpm ref back at the end */ + bool hold_rpm; + } mem_access; + + /** @d3cold_allowed: Indicates if d3cold is a valid device state */ + bool d3cold_allowed; + + /* For pcode */ + struct mutex sb_lock; + + u32 enabled_irq_mask; +}; + +/** + * struct xe_file - file handle for XE driver + */ +struct xe_file { + /** @drm: base DRM file */ + struct drm_file *drm; + + /** @vm: VM state for file */ + struct { + /** @xe: xarray to store VMs */ + struct xarray xa; + /** @lock: protects file VM state */ + struct mutex lock; + } vm; + + /** @engine: Submission engine state for file */ + struct { + /** @xe: xarray to store engines */ + struct xarray xa; + /** @lock: protects file engine state */ + struct mutex lock; + } engine; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c new file mode 100644 index 000000000000..d09ff25bd940 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_dma_buf.c @@ -0,0 +1,307 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include + +#include +#include + +#include + +#include +#include + +#include "tests/xe_test.h" +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_dma_buf.h" +#include "xe_ttm_vram_mgr.h" +#include "xe_vm.h" + +MODULE_IMPORT_NS(DMA_BUF); + +static int xe_dma_buf_attach(struct dma_buf *dmabuf, + struct dma_buf_attachment *attach) +{ + struct drm_gem_object *obj = attach->dmabuf->priv; + + if (attach->peer2peer && + pci_p2pdma_distance(to_pci_dev(obj->dev->dev), attach->dev, false) < 0) + attach->peer2peer = false; + + if (!attach->peer2peer && !xe_bo_can_migrate(gem_to_xe_bo(obj), XE_PL_TT)) + return -EOPNOTSUPP; + + xe_device_mem_access_get(to_xe_device(obj->dev)); + return 0; +} + +static void xe_dma_buf_detach(struct dma_buf *dmabuf, + struct dma_buf_attachment *attach) +{ + struct drm_gem_object *obj = attach->dmabuf->priv; + + xe_device_mem_access_put(to_xe_device(obj->dev)); +} + +static int xe_dma_buf_pin(struct dma_buf_attachment *attach) +{ + struct drm_gem_object *obj = attach->dmabuf->priv; + struct xe_bo *bo = gem_to_xe_bo(obj); + + /* + * Migrate to TT first to increase the chance of non-p2p clients + * can attach. + */ + (void)xe_bo_migrate(bo, XE_PL_TT); + xe_bo_pin_external(bo); + + return 0; +} + +static void xe_dma_buf_unpin(struct dma_buf_attachment *attach) +{ + struct drm_gem_object *obj = attach->dmabuf->priv; + struct xe_bo *bo = gem_to_xe_bo(obj); + + xe_bo_unpin_external(bo); +} + +static struct sg_table *xe_dma_buf_map(struct dma_buf_attachment *attach, + enum dma_data_direction dir) +{ + struct dma_buf *dma_buf = attach->dmabuf; + struct drm_gem_object *obj = dma_buf->priv; + struct xe_bo *bo = gem_to_xe_bo(obj); + struct sg_table *sgt; + int r = 0; + + if (!attach->peer2peer && !xe_bo_can_migrate(bo, XE_PL_TT)) + return ERR_PTR(-EOPNOTSUPP); + + if (!xe_bo_is_pinned(bo)) { + if (!attach->peer2peer || + bo->ttm.resource->mem_type == XE_PL_SYSTEM) { + if (xe_bo_can_migrate(bo, XE_PL_TT)) + r = xe_bo_migrate(bo, XE_PL_TT); + else + r = xe_bo_validate(bo, NULL, false); + } + if (r) + return ERR_PTR(r); + } + + switch (bo->ttm.resource->mem_type) { + case XE_PL_TT: + sgt = drm_prime_pages_to_sg(obj->dev, + bo->ttm.ttm->pages, + bo->ttm.ttm->num_pages); + if (IS_ERR(sgt)) + return sgt; + + if (dma_map_sgtable(attach->dev, sgt, dir, + DMA_ATTR_SKIP_CPU_SYNC)) + goto error_free; + break; + + case XE_PL_VRAM0: + case XE_PL_VRAM1: + r = xe_ttm_vram_mgr_alloc_sgt(xe_bo_device(bo), + bo->ttm.resource, 0, + bo->ttm.base.size, attach->dev, + dir, &sgt); + if (r) + return ERR_PTR(r); + break; + default: + return ERR_PTR(-EINVAL); + } + + return sgt; + +error_free: + sg_free_table(sgt); + kfree(sgt); + return ERR_PTR(-EBUSY); +} + +static void xe_dma_buf_unmap(struct dma_buf_attachment *attach, + struct sg_table *sgt, + enum dma_data_direction dir) +{ + struct dma_buf *dma_buf = attach->dmabuf; + struct xe_bo *bo = gem_to_xe_bo(dma_buf->priv); + + if (!xe_bo_is_vram(bo)) { + dma_unmap_sgtable(attach->dev, sgt, dir, 0); + sg_free_table(sgt); + kfree(sgt); + } else { + xe_ttm_vram_mgr_free_sgt(attach->dev, dir, sgt); + } +} + +static int xe_dma_buf_begin_cpu_access(struct dma_buf *dma_buf, + enum dma_data_direction direction) +{ + struct drm_gem_object *obj = dma_buf->priv; + struct xe_bo *bo = gem_to_xe_bo(obj); + bool reads = (direction == DMA_BIDIRECTIONAL || + direction == DMA_FROM_DEVICE); + + if (!reads) + return 0; + + xe_bo_lock_no_vm(bo, NULL); + (void)xe_bo_migrate(bo, XE_PL_TT); + xe_bo_unlock_no_vm(bo); + + return 0; +} + +const struct dma_buf_ops xe_dmabuf_ops = { + .attach = xe_dma_buf_attach, + .detach = xe_dma_buf_detach, + .pin = xe_dma_buf_pin, + .unpin = xe_dma_buf_unpin, + .map_dma_buf = xe_dma_buf_map, + .unmap_dma_buf = xe_dma_buf_unmap, + .release = drm_gem_dmabuf_release, + .begin_cpu_access = xe_dma_buf_begin_cpu_access, + .mmap = drm_gem_dmabuf_mmap, + .vmap = drm_gem_dmabuf_vmap, + .vunmap = drm_gem_dmabuf_vunmap, +}; + +struct dma_buf *xe_gem_prime_export(struct drm_gem_object *obj, int flags) +{ + struct xe_bo *bo = gem_to_xe_bo(obj); + struct dma_buf *buf; + + if (bo->vm) + return ERR_PTR(-EPERM); + + buf = drm_gem_prime_export(obj, flags); + if (!IS_ERR(buf)) + buf->ops = &xe_dmabuf_ops; + + return buf; +} + +static struct drm_gem_object * +xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage, + struct dma_buf *dma_buf) +{ + struct dma_resv *resv = dma_buf->resv; + struct xe_device *xe = to_xe_device(dev); + struct xe_bo *bo; + int ret; + + dma_resv_lock(resv, NULL); + bo = __xe_bo_create_locked(xe, storage, NULL, resv, dma_buf->size, + ttm_bo_type_sg, XE_BO_CREATE_SYSTEM_BIT); + if (IS_ERR(bo)) { + ret = PTR_ERR(bo); + goto error; + } + dma_resv_unlock(resv); + + return &bo->ttm.base; + +error: + dma_resv_unlock(resv); + return ERR_PTR(ret); +} + +static void xe_dma_buf_move_notify(struct dma_buf_attachment *attach) +{ + struct drm_gem_object *obj = attach->importer_priv; + struct xe_bo *bo = gem_to_xe_bo(obj); + + XE_WARN_ON(xe_bo_evict(bo, false)); +} + +static const struct dma_buf_attach_ops xe_dma_buf_attach_ops = { + .allow_peer2peer = true, + .move_notify = xe_dma_buf_move_notify +}; + +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) + +struct dma_buf_test_params { + struct xe_test_priv base; + const struct dma_buf_attach_ops *attach_ops; + bool force_different_devices; + u32 mem_mask; +}; + +#define to_dma_buf_test_params(_priv) \ + container_of(_priv, struct dma_buf_test_params, base) +#endif + +struct drm_gem_object *xe_gem_prime_import(struct drm_device *dev, + struct dma_buf *dma_buf) +{ + XE_TEST_DECLARE(struct dma_buf_test_params *test = + to_dma_buf_test_params + (xe_cur_kunit_priv(XE_TEST_LIVE_DMA_BUF));) + const struct dma_buf_attach_ops *attach_ops; + struct dma_buf_attachment *attach; + struct drm_gem_object *obj; + struct xe_bo *bo; + + if (dma_buf->ops == &xe_dmabuf_ops) { + obj = dma_buf->priv; + if (obj->dev == dev && + !XE_TEST_ONLY(test && test->force_different_devices)) { + /* + * Importing dmabuf exported from out own gem increases + * refcount on gem itself instead of f_count of dmabuf. + */ + drm_gem_object_get(obj); + return obj; + } + } + + /* + * Don't publish the bo until we have a valid attachment, and a + * valid attachment needs the bo address. So pre-create a bo before + * creating the attachment and publish. + */ + bo = xe_bo_alloc(); + if (IS_ERR(bo)) + return ERR_CAST(bo); + + attach_ops = &xe_dma_buf_attach_ops; +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) + if (test) + attach_ops = test->attach_ops; +#endif + + attach = dma_buf_dynamic_attach(dma_buf, dev->dev, attach_ops, &bo->ttm.base); + if (IS_ERR(attach)) { + obj = ERR_CAST(attach); + goto out_err; + } + + /* Errors here will take care of freeing the bo. */ + obj = xe_dma_buf_init_obj(dev, bo, dma_buf); + if (IS_ERR(obj)) + return obj; + + + get_dma_buf(dma_buf); + obj->import_attach = attach; + return obj; + +out_err: + xe_bo_free(bo); + + return obj; +} + +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) +#include "tests/xe_dma_buf.c" +#endif diff --git a/drivers/gpu/drm/xe/xe_dma_buf.h b/drivers/gpu/drm/xe/xe_dma_buf.h new file mode 100644 index 000000000000..861dd28a862c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_dma_buf.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_DMA_BUF_H_ +#define _XE_DMA_BUF_H_ + +#include + +struct dma_buf *xe_gem_prime_export(struct drm_gem_object *obj, int flags); +struct drm_gem_object *xe_gem_prime_import(struct drm_device *dev, + struct dma_buf *dma_buf); + +#endif diff --git a/drivers/gpu/drm/xe/xe_drv.h b/drivers/gpu/drm/xe/xe_drv.h new file mode 100644 index 000000000000..0377e5e4e35f --- /dev/null +++ b/drivers/gpu/drm/xe/xe_drv.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _XE_DRV_H_ +#define _XE_DRV_H_ + +#include + +#define DRIVER_NAME "xe" +#define DRIVER_DESC "Intel Xe Graphics" +#define DRIVER_DATE "20201103" +#define DRIVER_TIMESTAMP 1604406085 + +/* Interface history: + * + * 1.1: Original. + */ +#define DRIVER_MAJOR 1 +#define DRIVER_MINOR 1 +#define DRIVER_PATCHLEVEL 0 + +#endif diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c new file mode 100644 index 000000000000..63219bd98be7 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_engine.c @@ -0,0 +1,734 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "xe_engine.h" + +#include +#include +#include +#include + +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_lrc.h" +#include "xe_macros.h" +#include "xe_migrate.h" +#include "xe_pm.h" +#include "xe_trace.h" +#include "xe_vm.h" + +static struct xe_engine *__xe_engine_create(struct xe_device *xe, + struct xe_vm *vm, + u32 logical_mask, + u16 width, struct xe_hw_engine *hwe, + u32 flags) +{ + struct xe_engine *e; + struct xe_gt *gt = hwe->gt; + int err; + int i; + + e = kzalloc(sizeof(*e) + sizeof(struct xe_lrc) * width, GFP_KERNEL); + if (!e) + return ERR_PTR(-ENOMEM); + + kref_init(&e->refcount); + e->flags = flags; + e->hwe = hwe; + e->gt = gt; + if (vm) + e->vm = xe_vm_get(vm); + e->class = hwe->class; + e->width = width; + e->logical_mask = logical_mask; + e->fence_irq = >->fence_irq[hwe->class]; + e->ring_ops = gt->ring_ops[hwe->class]; + e->ops = gt->engine_ops; + INIT_LIST_HEAD(&e->persitent.link); + INIT_LIST_HEAD(&e->compute.link); + INIT_LIST_HEAD(&e->multi_gt_link); + + /* FIXME: Wire up to configurable default value */ + e->sched_props.timeslice_us = 1 * 1000; + e->sched_props.preempt_timeout_us = 640 * 1000; + + if (xe_engine_is_parallel(e)) { + e->parallel.composite_fence_ctx = dma_fence_context_alloc(1); + e->parallel.composite_fence_seqno = 1; + } + if (e->flags & ENGINE_FLAG_VM) { + e->bind.fence_ctx = dma_fence_context_alloc(1); + e->bind.fence_seqno = 1; + } + + for (i = 0; i < width; ++i) { + err = xe_lrc_init(e->lrc + i, hwe, e, vm, SZ_16K); + if (err) + goto err_lrc; + } + + err = e->ops->init(e); + if (err) + goto err_lrc; + + return e; + +err_lrc: + for (i = i - 1; i >= 0; --i) + xe_lrc_finish(e->lrc + i); + kfree(e); + return ERR_PTR(err); +} + +struct xe_engine *xe_engine_create(struct xe_device *xe, struct xe_vm *vm, + u32 logical_mask, u16 width, + struct xe_hw_engine *hwe, u32 flags) +{ + struct ww_acquire_ctx ww; + struct xe_engine *e; + int err; + + if (vm) { + err = xe_vm_lock(vm, &ww, 0, true); + if (err) + return ERR_PTR(err); + } + e = __xe_engine_create(xe, vm, logical_mask, width, hwe, flags); + if (vm) + xe_vm_unlock(vm, &ww); + + return e; +} + +struct xe_engine *xe_engine_create_class(struct xe_device *xe, struct xe_gt *gt, + struct xe_vm *vm, + enum xe_engine_class class, u32 flags) +{ + struct xe_hw_engine *hwe, *hwe0 = NULL; + enum xe_hw_engine_id id; + u32 logical_mask = 0; + + for_each_hw_engine(hwe, gt, id) { + if (xe_hw_engine_is_reserved(hwe)) + continue; + + if (hwe->class == class) { + logical_mask |= BIT(hwe->logical_instance); + if (!hwe0) + hwe0 = hwe; + } + } + + if (!logical_mask) + return ERR_PTR(-ENODEV); + + return xe_engine_create(xe, vm, logical_mask, 1, hwe0, flags); +} + +void xe_engine_destroy(struct kref *ref) +{ + struct xe_engine *e = container_of(ref, struct xe_engine, refcount); + struct xe_engine *engine, *next; + + if (!(e->flags & ENGINE_FLAG_BIND_ENGINE_CHILD)) { + list_for_each_entry_safe(engine, next, &e->multi_gt_list, + multi_gt_link) + xe_engine_put(engine); + } + + e->ops->fini(e); +} + +void xe_engine_fini(struct xe_engine *e) +{ + int i; + + for (i = 0; i < e->width; ++i) + xe_lrc_finish(e->lrc + i); + if (e->vm) + xe_vm_put(e->vm); + + kfree(e); +} + +struct xe_engine *xe_engine_lookup(struct xe_file *xef, u32 id) +{ + struct xe_engine *e; + + mutex_lock(&xef->engine.lock); + e = xa_load(&xef->engine.xa, id); + mutex_unlock(&xef->engine.lock); + + if (e) + xe_engine_get(e); + + return e; +} + +static int engine_set_priority(struct xe_device *xe, struct xe_engine *e, + u64 value, bool create) +{ + if (XE_IOCTL_ERR(xe, value > XE_ENGINE_PRIORITY_HIGH)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, value == XE_ENGINE_PRIORITY_HIGH && + !capable(CAP_SYS_NICE))) + return -EPERM; + + return e->ops->set_priority(e, value); +} + +static int engine_set_timeslice(struct xe_device *xe, struct xe_engine *e, + u64 value, bool create) +{ + if (!capable(CAP_SYS_NICE)) + return -EPERM; + + return e->ops->set_timeslice(e, value); +} + +static int engine_set_preemption_timeout(struct xe_device *xe, + struct xe_engine *e, u64 value, + bool create) +{ + if (!capable(CAP_SYS_NICE)) + return -EPERM; + + return e->ops->set_preempt_timeout(e, value); +} + +static int engine_set_compute_mode(struct xe_device *xe, struct xe_engine *e, + u64 value, bool create) +{ + if (XE_IOCTL_ERR(xe, !create)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, e->flags & ENGINE_FLAG_COMPUTE_MODE)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, e->flags & ENGINE_FLAG_VM)) + return -EINVAL; + + if (value) { + struct xe_vm *vm = e->vm; + int err; + + if (XE_IOCTL_ERR(xe, xe_vm_in_fault_mode(vm))) + return -EOPNOTSUPP; + + if (XE_IOCTL_ERR(xe, !xe_vm_in_compute_mode(vm))) + return -EOPNOTSUPP; + + if (XE_IOCTL_ERR(xe, e->width != 1)) + return -EINVAL; + + e->compute.context = dma_fence_context_alloc(1); + spin_lock_init(&e->compute.lock); + + err = xe_vm_add_compute_engine(vm, e); + if (XE_IOCTL_ERR(xe, err)) + return err; + + e->flags |= ENGINE_FLAG_COMPUTE_MODE; + e->flags &= ~ENGINE_FLAG_PERSISTENT; + } + + return 0; +} + +static int engine_set_persistence(struct xe_device *xe, struct xe_engine *e, + u64 value, bool create) +{ + if (XE_IOCTL_ERR(xe, !create)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, e->flags & ENGINE_FLAG_COMPUTE_MODE)) + return -EINVAL; + + if (value) + e->flags |= ENGINE_FLAG_PERSISTENT; + else + e->flags &= ~ENGINE_FLAG_PERSISTENT; + + return 0; +} + +static int engine_set_job_timeout(struct xe_device *xe, struct xe_engine *e, + u64 value, bool create) +{ + if (XE_IOCTL_ERR(xe, !create)) + return -EINVAL; + + if (!capable(CAP_SYS_NICE)) + return -EPERM; + + return e->ops->set_job_timeout(e, value); +} + +static int engine_set_acc_trigger(struct xe_device *xe, struct xe_engine *e, + u64 value, bool create) +{ + if (XE_IOCTL_ERR(xe, !create)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, !xe->info.supports_usm)) + return -EINVAL; + + e->usm.acc_trigger = value; + + return 0; +} + +static int engine_set_acc_notify(struct xe_device *xe, struct xe_engine *e, + u64 value, bool create) +{ + if (XE_IOCTL_ERR(xe, !create)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, !xe->info.supports_usm)) + return -EINVAL; + + e->usm.acc_notify = value; + + return 0; +} + +static int engine_set_acc_granularity(struct xe_device *xe, struct xe_engine *e, + u64 value, bool create) +{ + if (XE_IOCTL_ERR(xe, !create)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, !xe->info.supports_usm)) + return -EINVAL; + + e->usm.acc_granularity = value; + + return 0; +} + +typedef int (*xe_engine_set_property_fn)(struct xe_device *xe, + struct xe_engine *e, + u64 value, bool create); + +static const xe_engine_set_property_fn engine_set_property_funcs[] = { + [XE_ENGINE_PROPERTY_PRIORITY] = engine_set_priority, + [XE_ENGINE_PROPERTY_TIMESLICE] = engine_set_timeslice, + [XE_ENGINE_PROPERTY_PREEMPTION_TIMEOUT] = engine_set_preemption_timeout, + [XE_ENGINE_PROPERTY_COMPUTE_MODE] = engine_set_compute_mode, + [XE_ENGINE_PROPERTY_PERSISTENCE] = engine_set_persistence, + [XE_ENGINE_PROPERTY_JOB_TIMEOUT] = engine_set_job_timeout, + [XE_ENGINE_PROPERTY_ACC_TRIGGER] = engine_set_acc_trigger, + [XE_ENGINE_PROPERTY_ACC_NOTIFY] = engine_set_acc_notify, + [XE_ENGINE_PROPERTY_ACC_GRANULARITY] = engine_set_acc_granularity, +}; + +static int engine_user_ext_set_property(struct xe_device *xe, + struct xe_engine *e, + u64 extension, + bool create) +{ + u64 __user *address = u64_to_user_ptr(extension); + struct drm_xe_ext_engine_set_property ext; + int err; + u32 idx; + + err = __copy_from_user(&ext, address, sizeof(ext)); + if (XE_IOCTL_ERR(xe, err)) + return -EFAULT; + + if (XE_IOCTL_ERR(xe, ext.property >= + ARRAY_SIZE(engine_set_property_funcs))) + return -EINVAL; + + idx = array_index_nospec(ext.property, ARRAY_SIZE(engine_set_property_funcs)); + return engine_set_property_funcs[idx](xe, e, ext.value, create); +} + +typedef int (*xe_engine_user_extension_fn)(struct xe_device *xe, + struct xe_engine *e, + u64 extension, + bool create); + +static const xe_engine_set_property_fn engine_user_extension_funcs[] = { + [XE_ENGINE_EXTENSION_SET_PROPERTY] = engine_user_ext_set_property, +}; + +#define MAX_USER_EXTENSIONS 16 +static int engine_user_extensions(struct xe_device *xe, struct xe_engine *e, + u64 extensions, int ext_number, bool create) +{ + u64 __user *address = u64_to_user_ptr(extensions); + struct xe_user_extension ext; + int err; + u32 idx; + + if (XE_IOCTL_ERR(xe, ext_number >= MAX_USER_EXTENSIONS)) + return -E2BIG; + + err = __copy_from_user(&ext, address, sizeof(ext)); + if (XE_IOCTL_ERR(xe, err)) + return -EFAULT; + + if (XE_IOCTL_ERR(xe, ext.name >= + ARRAY_SIZE(engine_user_extension_funcs))) + return -EINVAL; + + idx = array_index_nospec(ext.name, + ARRAY_SIZE(engine_user_extension_funcs)); + err = engine_user_extension_funcs[idx](xe, e, extensions, create); + if (XE_IOCTL_ERR(xe, err)) + return err; + + if (ext.next_extension) + return engine_user_extensions(xe, e, ext.next_extension, + ++ext_number, create); + + return 0; +} + +static const enum xe_engine_class user_to_xe_engine_class[] = { + [DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER, + [DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY, + [DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE, + [DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE, + [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE, +}; + +static struct xe_hw_engine * +find_hw_engine(struct xe_device *xe, + struct drm_xe_engine_class_instance eci) +{ + u32 idx; + + if (eci.engine_class > ARRAY_SIZE(user_to_xe_engine_class)) + return NULL; + + if (eci.gt_id >= xe->info.tile_count) + return NULL; + + idx = array_index_nospec(eci.engine_class, + ARRAY_SIZE(user_to_xe_engine_class)); + + return xe_gt_hw_engine(xe_device_get_gt(xe, eci.gt_id), + user_to_xe_engine_class[idx], + eci.engine_instance, true); +} + +static u32 bind_engine_logical_mask(struct xe_device *xe, struct xe_gt *gt, + struct drm_xe_engine_class_instance *eci, + u16 width, u16 num_placements) +{ + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + u32 logical_mask = 0; + + if (XE_IOCTL_ERR(xe, width != 1)) + return 0; + if (XE_IOCTL_ERR(xe, num_placements != 1)) + return 0; + if (XE_IOCTL_ERR(xe, eci[0].engine_instance != 0)) + return 0; + + eci[0].engine_class = DRM_XE_ENGINE_CLASS_COPY; + + for_each_hw_engine(hwe, gt, id) { + if (xe_hw_engine_is_reserved(hwe)) + continue; + + if (hwe->class == + user_to_xe_engine_class[DRM_XE_ENGINE_CLASS_COPY]) + logical_mask |= BIT(hwe->logical_instance); + } + + return logical_mask; +} + +static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt, + struct drm_xe_engine_class_instance *eci, + u16 width, u16 num_placements) +{ + int len = width * num_placements; + int i, j, n; + u16 class; + u16 gt_id; + u32 return_mask = 0, prev_mask; + + if (XE_IOCTL_ERR(xe, !xe_device_guc_submission_enabled(xe) && + len > 1)) + return 0; + + for (i = 0; i < width; ++i) { + u32 current_mask = 0; + + for (j = 0; j < num_placements; ++j) { + struct xe_hw_engine *hwe; + + n = j * width + i; + + hwe = find_hw_engine(xe, eci[n]); + if (XE_IOCTL_ERR(xe, !hwe)) + return 0; + + if (XE_IOCTL_ERR(xe, xe_hw_engine_is_reserved(hwe))) + return 0; + + if (XE_IOCTL_ERR(xe, n && eci[n].gt_id != gt_id) || + XE_IOCTL_ERR(xe, n && eci[n].engine_class != class)) + return 0; + + class = eci[n].engine_class; + gt_id = eci[n].gt_id; + + if (width == 1 || !i) + return_mask |= BIT(eci[n].engine_instance); + current_mask |= BIT(eci[n].engine_instance); + } + + /* Parallel submissions must be logically contiguous */ + if (i && XE_IOCTL_ERR(xe, current_mask != prev_mask << 1)) + return 0; + + prev_mask = current_mask; + } + + return return_mask; +} + +int xe_engine_create_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_file *xef = to_xe_file(file); + struct drm_xe_engine_create *args = data; + struct drm_xe_engine_class_instance eci[XE_HW_ENGINE_MAX_INSTANCE]; + struct drm_xe_engine_class_instance __user *user_eci = + u64_to_user_ptr(args->instances); + struct xe_hw_engine *hwe; + struct xe_vm *vm, *migrate_vm; + struct xe_gt *gt; + struct xe_engine *e = NULL; + u32 logical_mask; + u32 id; + int len; + int err; + + if (XE_IOCTL_ERR(xe, args->flags)) + return -EINVAL; + + len = args->width * args->num_placements; + if (XE_IOCTL_ERR(xe, !len || len > XE_HW_ENGINE_MAX_INSTANCE)) + return -EINVAL; + + err = __copy_from_user(eci, user_eci, + sizeof(struct drm_xe_engine_class_instance) * + len); + if (XE_IOCTL_ERR(xe, err)) + return -EFAULT; + + if (XE_IOCTL_ERR(xe, eci[0].gt_id >= xe->info.tile_count)) + return -EINVAL; + + xe_pm_runtime_get(xe); + + if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) { + for_each_gt(gt, xe, id) { + struct xe_engine *new; + + if (xe_gt_is_media_type(gt)) + continue; + + eci[0].gt_id = gt->info.id; + logical_mask = bind_engine_logical_mask(xe, gt, eci, + args->width, + args->num_placements); + if (XE_IOCTL_ERR(xe, !logical_mask)) { + err = -EINVAL; + goto put_rpm; + } + + hwe = find_hw_engine(xe, eci[0]); + if (XE_IOCTL_ERR(xe, !hwe)) { + err = -EINVAL; + goto put_rpm; + } + + migrate_vm = xe_migrate_get_vm(gt->migrate); + new = xe_engine_create(xe, migrate_vm, logical_mask, + args->width, hwe, + ENGINE_FLAG_PERSISTENT | + ENGINE_FLAG_VM | + (id ? + ENGINE_FLAG_BIND_ENGINE_CHILD : + 0)); + xe_vm_put(migrate_vm); + if (IS_ERR(new)) { + err = PTR_ERR(new); + if (e) + goto put_engine; + goto put_rpm; + } + if (id == 0) + e = new; + else + list_add_tail(&new->multi_gt_list, + &e->multi_gt_link); + } + } else { + gt = xe_device_get_gt(xe, eci[0].gt_id); + logical_mask = calc_validate_logical_mask(xe, gt, eci, + args->width, + args->num_placements); + if (XE_IOCTL_ERR(xe, !logical_mask)) { + err = -EINVAL; + goto put_rpm; + } + + hwe = find_hw_engine(xe, eci[0]); + if (XE_IOCTL_ERR(xe, !hwe)) { + err = -EINVAL; + goto put_rpm; + } + + vm = xe_vm_lookup(xef, args->vm_id); + if (XE_IOCTL_ERR(xe, !vm)) { + err = -ENOENT; + goto put_rpm; + } + + e = xe_engine_create(xe, vm, logical_mask, + args->width, hwe, ENGINE_FLAG_PERSISTENT); + xe_vm_put(vm); + if (IS_ERR(e)) { + err = PTR_ERR(e); + goto put_rpm; + } + } + + if (args->extensions) { + err = engine_user_extensions(xe, e, args->extensions, 0, true); + if (XE_IOCTL_ERR(xe, err)) + goto put_engine; + } + + if (XE_IOCTL_ERR(xe, e->vm && xe_vm_in_compute_mode(e->vm) != + !!(e->flags & ENGINE_FLAG_COMPUTE_MODE))) { + err = -ENOTSUPP; + goto put_engine; + } + + e->persitent.xef = xef; + + mutex_lock(&xef->engine.lock); + err = xa_alloc(&xef->engine.xa, &id, e, xa_limit_32b, GFP_KERNEL); + mutex_unlock(&xef->engine.lock); + if (err) + goto put_engine; + + args->engine_id = id; + + return 0; + +put_engine: + xe_engine_kill(e); + xe_engine_put(e); +put_rpm: + xe_pm_runtime_put(xe); + return err; +} + +static void engine_kill_compute(struct xe_engine *e) +{ + if (!xe_vm_in_compute_mode(e->vm)) + return; + + down_write(&e->vm->lock); + list_del(&e->compute.link); + --e->vm->preempt.num_engines; + if (e->compute.pfence) { + dma_fence_enable_sw_signaling(e->compute.pfence); + dma_fence_put(e->compute.pfence); + e->compute.pfence = NULL; + } + up_write(&e->vm->lock); +} + +void xe_engine_kill(struct xe_engine *e) +{ + struct xe_engine *engine = e, *next; + + list_for_each_entry_safe(engine, next, &engine->multi_gt_list, + multi_gt_link) { + e->ops->kill(engine); + engine_kill_compute(engine); + } + + e->ops->kill(e); + engine_kill_compute(e); +} + +int xe_engine_destroy_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_file *xef = to_xe_file(file); + struct drm_xe_engine_destroy *args = data; + struct xe_engine *e; + + if (XE_IOCTL_ERR(xe, args->pad)) + return -EINVAL; + + mutex_lock(&xef->engine.lock); + e = xa_erase(&xef->engine.xa, args->engine_id); + mutex_unlock(&xef->engine.lock); + if (XE_IOCTL_ERR(xe, !e)) + return -ENOENT; + + if (!(e->flags & ENGINE_FLAG_PERSISTENT)) + xe_engine_kill(e); + else + xe_device_add_persitent_engines(xe, e); + + trace_xe_engine_close(e); + xe_engine_put(e); + xe_pm_runtime_put(xe); + + return 0; +} + +int xe_engine_set_property_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_file *xef = to_xe_file(file); + struct drm_xe_engine_set_property *args = data; + struct xe_engine *e; + int ret; + u32 idx; + + e = xe_engine_lookup(xef, args->engine_id); + if (XE_IOCTL_ERR(xe, !e)) + return -ENOENT; + + if (XE_IOCTL_ERR(xe, args->property >= + ARRAY_SIZE(engine_set_property_funcs))) { + ret = -EINVAL; + goto out; + } + + idx = array_index_nospec(args->property, + ARRAY_SIZE(engine_set_property_funcs)); + ret = engine_set_property_funcs[idx](xe, e, args->value, false); + if (XE_IOCTL_ERR(xe, ret)) + goto out; + + if (args->extensions) + ret = engine_user_extensions(xe, e, args->extensions, 0, + false); +out: + xe_engine_put(e); + + return ret; +} diff --git a/drivers/gpu/drm/xe/xe_engine.h b/drivers/gpu/drm/xe/xe_engine.h new file mode 100644 index 000000000000..4d1b609fea7e --- /dev/null +++ b/drivers/gpu/drm/xe/xe_engine.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _XE_ENGINE_H_ +#define _XE_ENGINE_H_ + +#include "xe_engine_types.h" +#include "xe_vm_types.h" + +struct drm_device; +struct drm_file; +struct xe_device; +struct xe_file; + +struct xe_engine *xe_engine_create(struct xe_device *xe, struct xe_vm *vm, + u32 logical_mask, u16 width, + struct xe_hw_engine *hw_engine, u32 flags); +struct xe_engine *xe_engine_create_class(struct xe_device *xe, struct xe_gt *gt, + struct xe_vm *vm, + enum xe_engine_class class, u32 flags); + +void xe_engine_fini(struct xe_engine *e); +void xe_engine_destroy(struct kref *ref); + +struct xe_engine *xe_engine_lookup(struct xe_file *xef, u32 id); + +static inline struct xe_engine *xe_engine_get(struct xe_engine *engine) +{ + kref_get(&engine->refcount); + return engine; +} + +static inline void xe_engine_put(struct xe_engine *engine) +{ + kref_put(&engine->refcount, xe_engine_destroy); +} + +static inline bool xe_engine_is_parallel(struct xe_engine *engine) +{ + return engine->width > 1; +} + +void xe_engine_kill(struct xe_engine *e); + +int xe_engine_create_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); +int xe_engine_destroy_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); +int xe_engine_set_property_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); + +#endif diff --git a/drivers/gpu/drm/xe/xe_engine_types.h b/drivers/gpu/drm/xe/xe_engine_types.h new file mode 100644 index 000000000000..3dfa1c14e181 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_engine_types.h @@ -0,0 +1,208 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_ENGINE_TYPES_H_ +#define _XE_ENGINE_TYPES_H_ + +#include + +#include + +#include "xe_gpu_scheduler_types.h" +#include "xe_hw_engine_types.h" +#include "xe_hw_fence_types.h" +#include "xe_lrc_types.h" + +struct xe_execlist_engine; +struct xe_gt; +struct xe_guc_engine; +struct xe_hw_engine; +struct xe_vm; + +enum xe_engine_priority { + XE_ENGINE_PRIORITY_UNSET = -2, /* For execlist usage only */ + XE_ENGINE_PRIORITY_LOW = 0, + XE_ENGINE_PRIORITY_NORMAL, + XE_ENGINE_PRIORITY_HIGH, + XE_ENGINE_PRIORITY_KERNEL, + + XE_ENGINE_PRIORITY_COUNT +}; + +/** + * struct xe_engine - Submission engine + * + * Contains all state necessary for submissions. Can either be a user object or + * a kernel object. + */ +struct xe_engine { + /** @gt: graphics tile this engine can submit to */ + struct xe_gt *gt; + /** + * @hwe: A hardware of the same class. May (physical engine) or may not + * (virtual engine) be where jobs actual engine up running. Should never + * really be used for submissions. + */ + struct xe_hw_engine *hwe; + /** @refcount: ref count of this engine */ + struct kref refcount; + /** @vm: VM (address space) for this engine */ + struct xe_vm *vm; + /** @class: class of this engine */ + enum xe_engine_class class; + /** @priority: priority of this exec queue */ + enum xe_engine_priority priority; + /** + * @logical_mask: logical mask of where job submitted to engine can run + */ + u32 logical_mask; + /** @name: name of this engine */ + char name[MAX_FENCE_NAME_LEN]; + /** @width: width (number BB submitted per exec) of this engine */ + u16 width; + /** @fence_irq: fence IRQ used to signal job completion */ + struct xe_hw_fence_irq *fence_irq; + +#define ENGINE_FLAG_BANNED BIT(0) +#define ENGINE_FLAG_KERNEL BIT(1) +#define ENGINE_FLAG_PERSISTENT BIT(2) +#define ENGINE_FLAG_COMPUTE_MODE BIT(3) +#define ENGINE_FLAG_VM BIT(4) +#define ENGINE_FLAG_BIND_ENGINE_CHILD BIT(5) +#define ENGINE_FLAG_WA BIT(6) + + /** + * @flags: flags for this engine, should statically setup aside from ban + * bit + */ + unsigned long flags; + + union { + /** @multi_gt_list: list head for VM bind engines if multi-GT */ + struct list_head multi_gt_list; + /** @multi_gt_link: link for VM bind engines if multi-GT */ + struct list_head multi_gt_link; + }; + + union { + /** @execlist: execlist backend specific state for engine */ + struct xe_execlist_engine *execlist; + /** @guc: GuC backend specific state for engine */ + struct xe_guc_engine *guc; + }; + + /** + * @persitent: persitent engine state + */ + struct { + /** @xef: file which this engine belongs to */ + struct xe_file *xef; + /** @link: link in list of persitent engines */ + struct list_head link; + } persitent; + + union { + /** + * @parallel: parallel submission state + */ + struct { + /** @composite_fence_ctx: context composite fence */ + u64 composite_fence_ctx; + /** @composite_fence_seqno: seqno for composite fence */ + u32 composite_fence_seqno; + } parallel; + /** + * @bind: bind submission state + */ + struct { + /** @fence_ctx: context bind fence */ + u64 fence_ctx; + /** @fence_seqno: seqno for bind fence */ + u32 fence_seqno; + } bind; + }; + + /** @sched_props: scheduling properties */ + struct { + /** @timeslice_us: timeslice period in micro-seconds */ + u32 timeslice_us; + /** @preempt_timeout_us: preemption timeout in micro-seconds */ + u32 preempt_timeout_us; + } sched_props; + + /** @compute: compute engine state */ + struct { + /** @pfence: preemption fence */ + struct dma_fence *pfence; + /** @context: preemption fence context */ + u64 context; + /** @seqno: preemption fence seqno */ + u32 seqno; + /** @link: link into VM's list of engines */ + struct list_head link; + /** @lock: preemption fences lock */ + spinlock_t lock; + } compute; + + /** @usm: unified shared memory state */ + struct { + /** @acc_trigger: access counter trigger */ + u32 acc_trigger; + /** @acc_notify: access counter notify */ + u32 acc_notify; + /** @acc_granularity: access counter granularity */ + u32 acc_granularity; + } usm; + + /** @ops: submission backend engine operations */ + const struct xe_engine_ops *ops; + + /** @ring_ops: ring operations for this engine */ + const struct xe_ring_ops *ring_ops; + /** @entity: DRM sched entity for this engine (1 to 1 relationship) */ + struct drm_sched_entity *entity; + /** @lrc: logical ring context for this engine */ + struct xe_lrc lrc[0]; +}; + +/** + * struct xe_engine_ops - Submission backend engine operations + */ +struct xe_engine_ops { + /** @init: Initialize engine for submission backend */ + int (*init)(struct xe_engine *e); + /** @kill: Kill inflight submissions for backend */ + void (*kill)(struct xe_engine *e); + /** @fini: Fini engine for submission backend */ + void (*fini)(struct xe_engine *e); + /** @set_priority: Set priority for engine */ + int (*set_priority)(struct xe_engine *e, + enum xe_engine_priority priority); + /** @set_timeslice: Set timeslice for engine */ + int (*set_timeslice)(struct xe_engine *e, u32 timeslice_us); + /** @set_preempt_timeout: Set preemption timeout for engine */ + int (*set_preempt_timeout)(struct xe_engine *e, u32 preempt_timeout_us); + /** @set_job_timeout: Set job timeout for engine */ + int (*set_job_timeout)(struct xe_engine *e, u32 job_timeout_ms); + /** + * @suspend: Suspend engine from executing, allowed to be called + * multiple times in a row before resume with the caveat that + * suspend_wait returns before calling suspend again. + */ + int (*suspend)(struct xe_engine *e); + /** + * @suspend_wait: Wait for an engine to suspend executing, should be + * call after suspend. + */ + void (*suspend_wait)(struct xe_engine *e); + /** + * @resume: Resume engine execution, engine must be in a suspended + * state and dma fence returned from most recent suspend call must be + * signalled when this function is called. + */ + void (*resume)(struct xe_engine *e); +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c new file mode 100644 index 000000000000..00f298acc436 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -0,0 +1,390 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include +#include +#include + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_engine.h" +#include "xe_exec.h" +#include "xe_macros.h" +#include "xe_sched_job.h" +#include "xe_sync.h" +#include "xe_vm.h" + +/** + * DOC: Execbuf (User GPU command submission) + * + * Execs have historically been rather complicated in DRM drivers (at least in + * the i915) because a few things: + * + * - Passing in a list BO which are read / written to creating implicit syncs + * - Binding at exec time + * - Flow controlling the ring at exec time + * + * In XE we avoid all of this complication by not allowing a BO list to be + * passed into an exec, using the dma-buf implicit sync uAPI, have binds as + * seperate operations, and using the DRM scheduler to flow control the ring. + * Let's deep dive on each of these. + * + * We can get away from a BO list by forcing the user to use in / out fences on + * every exec rather than the kernel tracking dependencies of BO (e.g. if the + * user knows an exec writes to a BO and reads from the BO in the next exec, it + * is the user's responsibility to pass in / out fence between the two execs). + * + * Implicit dependencies for external BOs are handled by using the dma-buf + * implicit dependency uAPI (TODO: add link). To make this works each exec must + * install the job's fence into the DMA_RESV_USAGE_WRITE slot of every external + * BO mapped in the VM. + * + * We do not allow a user to trigger a bind at exec time rather we have a VM + * bind IOCTL which uses the same in / out fence interface as exec. In that + * sense, a VM bind is basically the same operation as an exec from the user + * perspective. e.g. If an exec depends on a VM bind use the in / out fence + * interface (struct drm_xe_sync) to synchronize like syncing between two + * dependent execs. + * + * Although a user cannot trigger a bind, we still have to rebind userptrs in + * the VM that have been invalidated since the last exec, likewise we also have + * to rebind BOs that have been evicted by the kernel. We schedule these rebinds + * behind any pending kernel operations on any external BOs in VM or any BOs + * private to the VM. This is accomplished by the rebinds waiting on BOs + * DMA_RESV_USAGE_KERNEL slot (kernel ops) and kernel ops waiting on all BOs + * slots (inflight execs are in the DMA_RESV_USAGE_BOOKING for private BOs and + * in DMA_RESV_USAGE_WRITE for external BOs). + * + * Rebinds / dma-resv usage applies to non-compute mode VMs only as for compute + * mode VMs we use preempt fences and a rebind worker (TODO: add link). + * + * There is no need to flow control the ring in the exec as we write the ring at + * submission time and set the DRM scheduler max job limit SIZE_OF_RING / + * MAX_JOB_SIZE. The DRM scheduler will then hold all jobs until space in the + * ring is available. + * + * All of this results in a rather simple exec implementation. + * + * Flow + * ~~~~ + * + * .. code-block:: + * + * Parse input arguments + * Wait for any async VM bind passed as in-fences to start + * <----------------------------------------------------------------------| + * Lock global VM lock in read mode | + * Pin userptrs (also finds userptr invalidated since last exec) | + * Lock exec (VM dma-resv lock, external BOs dma-resv locks) | + * Validate BOs that have been evicted | + * Create job | + * Rebind invalidated userptrs + evicted BOs (non-compute-mode) | + * Add rebind fence dependency to job | + * Add job VM dma-resv bookkeeping slot (non-compute mode) | + * Add job to external BOs dma-resv write slots (non-compute mode) | + * Check if any userptrs invalidated since pin ------ Drop locks ---------| + * Install in / out fences for job + * Submit job + * Unlock all + */ + +static int xe_exec_begin(struct xe_engine *e, struct ww_acquire_ctx *ww, + struct ttm_validate_buffer tv_onstack[], + struct ttm_validate_buffer **tv, + struct list_head *objs) +{ + struct xe_vm *vm = e->vm; + struct xe_vma *vma; + LIST_HEAD(dups); + int err; + + *tv = NULL; + if (xe_vm_no_dma_fences(e->vm)) + return 0; + + err = xe_vm_lock_dma_resv(vm, ww, tv_onstack, tv, objs, true, 1); + if (err) + return err; + + /* + * Validate BOs that have been evicted (i.e. make sure the + * BOs have valid placements possibly moving an evicted BO back + * to a location where the GPU can access it). + */ + list_for_each_entry(vma, &vm->rebind_list, rebind_link) { + if (xe_vma_is_userptr(vma)) + continue; + + err = xe_bo_validate(vma->bo, vm, false); + if (err) { + xe_vm_unlock_dma_resv(vm, tv_onstack, *tv, ww, objs); + *tv = NULL; + return err; + } + } + + return 0; +} + +static void xe_exec_end(struct xe_engine *e, + struct ttm_validate_buffer *tv_onstack, + struct ttm_validate_buffer *tv, + struct ww_acquire_ctx *ww, + struct list_head *objs) +{ + if (!xe_vm_no_dma_fences(e->vm)) + xe_vm_unlock_dma_resv(e->vm, tv_onstack, tv, ww, objs); +} + +int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_file *xef = to_xe_file(file); + struct drm_xe_exec *args = data; + struct drm_xe_sync __user *syncs_user = u64_to_user_ptr(args->syncs); + u64 __user *addresses_user = u64_to_user_ptr(args->address); + struct xe_engine *engine; + struct xe_sync_entry *syncs = NULL; + u64 addresses[XE_HW_ENGINE_MAX_INSTANCE]; + struct ttm_validate_buffer tv_onstack[XE_ONSTACK_TV]; + struct ttm_validate_buffer *tv = NULL; + u32 i, num_syncs = 0; + struct xe_sched_job *job; + struct dma_fence *rebind_fence; + struct xe_vm *vm; + struct ww_acquire_ctx ww; + struct list_head objs; + bool write_locked; + int err = 0; + + if (XE_IOCTL_ERR(xe, args->extensions)) + return -EINVAL; + + engine = xe_engine_lookup(xef, args->engine_id); + if (XE_IOCTL_ERR(xe, !engine)) + return -ENOENT; + + if (XE_IOCTL_ERR(xe, engine->flags & ENGINE_FLAG_VM)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, engine->width != args->num_batch_buffer)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, engine->flags & ENGINE_FLAG_BANNED)) { + err = -ECANCELED; + goto err_engine; + } + + if (args->num_syncs) { + syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL); + if (!syncs) { + err = -ENOMEM; + goto err_engine; + } + } + + vm = engine->vm; + + for (i = 0; i < args->num_syncs; i++) { + err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs++], + &syncs_user[i], true, + xe_vm_no_dma_fences(vm)); + if (err) + goto err_syncs; + } + + if (xe_engine_is_parallel(engine)) { + err = __copy_from_user(addresses, addresses_user, sizeof(u64) * + engine->width); + if (err) { + err = -EFAULT; + goto err_syncs; + } + } + + /* + * We can't install a job into the VM dma-resv shared slot before an + * async VM bind passed in as a fence without the risk of deadlocking as + * the bind can trigger an eviction which in turn depends on anything in + * the VM dma-resv shared slots. Not an ideal solution, but we wait for + * all dependent async VM binds to start (install correct fences into + * dma-resv slots) before moving forward. + */ + if (!xe_vm_no_dma_fences(vm) && + vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS) { + for (i = 0; i < args->num_syncs; i++) { + struct dma_fence *fence = syncs[i].fence; + if (fence) { + err = xe_vm_async_fence_wait_start(fence); + if (err) + goto err_syncs; + } + } + } + +retry: + if (!xe_vm_no_dma_fences(vm) && xe_vm_userptr_check_repin(vm)) { + err = down_write_killable(&vm->lock); + write_locked = true; + } else { + /* We don't allow execs while the VM is in error state */ + err = down_read_interruptible(&vm->lock); + write_locked = false; + } + if (err) + goto err_syncs; + + /* We don't allow execs while the VM is in error state */ + if (vm->async_ops.error) { + err = vm->async_ops.error; + goto err_unlock_list; + } + + /* + * Extreme corner where we exit a VM error state with a munmap style VM + * unbind inflight which requires a rebind. In this case the rebind + * needs to install some fences into the dma-resv slots. The worker to + * do this queued, let that worker make progress by dropping vm->lock, + * flushing the worker and retrying the exec. + */ + if (vm->async_ops.munmap_rebind_inflight) { + if (write_locked) + up_write(&vm->lock); + else + up_read(&vm->lock); + flush_work(&vm->async_ops.work); + goto retry; + } + + if (write_locked) { + err = xe_vm_userptr_pin(vm); + downgrade_write(&vm->lock); + write_locked = false; + if (err) + goto err_unlock_list; + } + + err = xe_exec_begin(engine, &ww, tv_onstack, &tv, &objs); + if (err) + goto err_unlock_list; + + if (xe_vm_is_closed(engine->vm)) { + drm_warn(&xe->drm, "Trying to schedule after vm is closed\n"); + err = -EIO; + goto err_engine_end; + } + + job = xe_sched_job_create(engine, xe_engine_is_parallel(engine) ? + addresses : &args->address); + if (IS_ERR(job)) { + err = PTR_ERR(job); + goto err_engine_end; + } + + /* + * Rebind any invalidated userptr or evicted BOs in the VM, non-compute + * VM mode only. + */ + rebind_fence = xe_vm_rebind(vm, false); + if (IS_ERR(rebind_fence)) { + err = PTR_ERR(rebind_fence); + goto err_put_job; + } + + /* + * We store the rebind_fence in the VM so subsequent execs don't get + * scheduled before the rebinds of userptrs / evicted BOs is complete. + */ + if (rebind_fence) { + dma_fence_put(vm->rebind_fence); + vm->rebind_fence = rebind_fence; + } + if (vm->rebind_fence) { + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &vm->rebind_fence->flags)) { + dma_fence_put(vm->rebind_fence); + vm->rebind_fence = NULL; + } else { + dma_fence_get(vm->rebind_fence); + err = drm_sched_job_add_dependency(&job->drm, + vm->rebind_fence); + if (err) + goto err_put_job; + } + } + + /* Wait behind munmap style rebinds */ + if (!xe_vm_no_dma_fences(vm)) { + err = drm_sched_job_add_resv_dependencies(&job->drm, + &vm->resv, + DMA_RESV_USAGE_KERNEL); + if (err) + goto err_put_job; + } + + for (i = 0; i < num_syncs && !err; i++) + err = xe_sync_entry_add_deps(&syncs[i], job); + if (err) + goto err_put_job; + + if (!xe_vm_no_dma_fences(vm)) { + err = down_read_interruptible(&vm->userptr.notifier_lock); + if (err) + goto err_put_job; + + err = __xe_vm_userptr_needs_repin(vm); + if (err) + goto err_repin; + } + + /* + * Point of no return, if we error after this point just set an error on + * the job and let the DRM scheduler / backend clean up the job. + */ + xe_sched_job_arm(job); + if (!xe_vm_no_dma_fences(vm)) { + /* Block userptr invalidations / BO eviction */ + dma_resv_add_fence(&vm->resv, + &job->drm.s_fence->finished, + DMA_RESV_USAGE_BOOKKEEP); + + /* + * Make implicit sync work across drivers, assuming all external + * BOs are written as we don't pass in a read / write list. + */ + xe_vm_fence_all_extobjs(vm, &job->drm.s_fence->finished, + DMA_RESV_USAGE_WRITE); + } + + for (i = 0; i < num_syncs; i++) + xe_sync_entry_signal(&syncs[i], job, + &job->drm.s_fence->finished); + + xe_sched_job_push(job); + +err_repin: + if (!xe_vm_no_dma_fences(vm)) + up_read(&vm->userptr.notifier_lock); +err_put_job: + if (err) + xe_sched_job_put(job); +err_engine_end: + xe_exec_end(engine, tv_onstack, tv, &ww, &objs); +err_unlock_list: + if (write_locked) + up_write(&vm->lock); + else + up_read(&vm->lock); + if (err == -EAGAIN) + goto retry; +err_syncs: + for (i = 0; i < num_syncs; i++) + xe_sync_entry_cleanup(&syncs[i]); + kfree(syncs); +err_engine: + xe_engine_put(engine); + + return err; +} diff --git a/drivers/gpu/drm/xe/xe_exec.h b/drivers/gpu/drm/xe/xe_exec.h new file mode 100644 index 000000000000..e4932494cea3 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_exec.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_EXEC_H_ +#define _XE_EXEC_H_ + +struct drm_device; +struct drm_file; + +int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file); + +#endif diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c new file mode 100644 index 000000000000..47587571123a --- /dev/null +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -0,0 +1,489 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include + +#include "xe_execlist.h" + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_engine.h" +#include "xe_hw_fence.h" +#include "xe_gt.h" +#include "xe_lrc.h" +#include "xe_macros.h" +#include "xe_mmio.h" +#include "xe_mocs.h" +#include "xe_ring_ops_types.h" +#include "xe_sched_job.h" + +#include "i915_reg.h" +#include "gt/intel_gpu_commands.h" +#include "gt/intel_gt_regs.h" +#include "gt/intel_lrc_reg.h" +#include "gt/intel_engine_regs.h" + +#define XE_EXECLIST_HANG_LIMIT 1 + +#define GEN11_SW_CTX_ID_SHIFT 37 +#define GEN11_SW_CTX_ID_WIDTH 11 +#define XEHP_SW_CTX_ID_SHIFT 39 +#define XEHP_SW_CTX_ID_WIDTH 16 + +#define GEN11_SW_CTX_ID \ + GENMASK_ULL(GEN11_SW_CTX_ID_WIDTH + GEN11_SW_CTX_ID_SHIFT - 1, \ + GEN11_SW_CTX_ID_SHIFT) + +#define XEHP_SW_CTX_ID \ + GENMASK_ULL(XEHP_SW_CTX_ID_WIDTH + XEHP_SW_CTX_ID_SHIFT - 1, \ + XEHP_SW_CTX_ID_SHIFT) + + +static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc, + u32 ctx_id) +{ + struct xe_gt *gt = hwe->gt; + struct xe_device *xe = gt_to_xe(gt); + u64 lrc_desc; + + printk(KERN_INFO "__start_lrc(%s, 0x%p, %u)\n", hwe->name, lrc, ctx_id); + + lrc_desc = xe_lrc_descriptor(lrc); + + if (GRAPHICS_VERx100(xe) >= 1250) { + XE_BUG_ON(!FIELD_FIT(XEHP_SW_CTX_ID, ctx_id)); + lrc_desc |= FIELD_PREP(XEHP_SW_CTX_ID, ctx_id); + } else { + XE_BUG_ON(!FIELD_FIT(GEN11_SW_CTX_ID, ctx_id)); + lrc_desc |= FIELD_PREP(GEN11_SW_CTX_ID, ctx_id); + } + + if (hwe->class == XE_ENGINE_CLASS_COMPUTE) + xe_mmio_write32(hwe->gt, GEN12_RCU_MODE.reg, + _MASKED_BIT_ENABLE(GEN12_RCU_MODE_CCS_ENABLE)); + + xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); + lrc->ring.old_tail = lrc->ring.tail; + + /* + * Make sure the context image is complete before we submit it to HW. + * + * Ostensibly, writes (including the WCB) should be flushed prior to + * an uncached write such as our mmio register access, the empirical + * evidence (esp. on Braswell) suggests that the WC write into memory + * may not be visible to the HW prior to the completion of the UC + * register write and that we may begin execution from the context + * before its image is complete leading to invalid PD chasing. + */ + wmb(); + + xe_mmio_write32(gt, RING_HWS_PGA(hwe->mmio_base).reg, + xe_bo_ggtt_addr(hwe->hwsp)); + xe_mmio_read32(gt, RING_HWS_PGA(hwe->mmio_base).reg); + xe_mmio_write32(gt, RING_MODE_GEN7(hwe->mmio_base).reg, + _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE)); + + xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS(hwe->mmio_base).reg + 0, + lower_32_bits(lrc_desc)); + xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS(hwe->mmio_base).reg + 4, + upper_32_bits(lrc_desc)); + xe_mmio_write32(gt, RING_EXECLIST_CONTROL(hwe->mmio_base).reg, + EL_CTRL_LOAD); +} + +static void __xe_execlist_port_start(struct xe_execlist_port *port, + struct xe_execlist_engine *exl) +{ + struct xe_device *xe = gt_to_xe(port->hwe->gt); + int max_ctx = FIELD_MAX(GEN11_SW_CTX_ID); + + if (GRAPHICS_VERx100(xe) >= 1250) + max_ctx = FIELD_MAX(XEHP_SW_CTX_ID); + + xe_execlist_port_assert_held(port); + + if (port->running_exl != exl || !exl->has_run) { + port->last_ctx_id++; + + /* 0 is reserved for the kernel context */ + if (port->last_ctx_id > max_ctx) + port->last_ctx_id = 1; + } + + __start_lrc(port->hwe, exl->engine->lrc, port->last_ctx_id); + port->running_exl = exl; + exl->has_run = true; +} + +static void __xe_execlist_port_idle(struct xe_execlist_port *port) +{ + u32 noop[2] = { MI_NOOP, MI_NOOP }; + + xe_execlist_port_assert_held(port); + + if (!port->running_exl) + return; + + printk(KERN_INFO "__xe_execlist_port_idle(%d:%d)\n", port->hwe->class, + port->hwe->instance); + + xe_lrc_write_ring(&port->hwe->kernel_lrc, noop, sizeof(noop)); + __start_lrc(port->hwe, &port->hwe->kernel_lrc, 0); + port->running_exl = NULL; +} + +static bool xe_execlist_is_idle(struct xe_execlist_engine *exl) +{ + struct xe_lrc *lrc = exl->engine->lrc; + + return lrc->ring.tail == lrc->ring.old_tail; +} + +static void __xe_execlist_port_start_next_active(struct xe_execlist_port *port) +{ + struct xe_execlist_engine *exl = NULL; + int i; + + xe_execlist_port_assert_held(port); + + for (i = ARRAY_SIZE(port->active) - 1; i >= 0; i--) { + while (!list_empty(&port->active[i])) { + exl = list_first_entry(&port->active[i], + struct xe_execlist_engine, + active_link); + list_del(&exl->active_link); + + if (xe_execlist_is_idle(exl)) { + exl->active_priority = XE_ENGINE_PRIORITY_UNSET; + continue; + } + + list_add_tail(&exl->active_link, &port->active[i]); + __xe_execlist_port_start(port, exl); + return; + } + } + + __xe_execlist_port_idle(port); +} + +static u64 read_execlist_status(struct xe_hw_engine *hwe) +{ + struct xe_gt *gt = hwe->gt; + u32 hi, lo; + + lo = xe_mmio_read32(gt, RING_EXECLIST_STATUS_LO(hwe->mmio_base).reg); + hi = xe_mmio_read32(gt, RING_EXECLIST_STATUS_HI(hwe->mmio_base).reg); + + printk(KERN_INFO "EXECLIST_STATUS %d:%d = 0x%08x %08x\n", hwe->class, + hwe->instance, hi, lo); + + return lo | (u64)hi << 32; +} + +static void xe_execlist_port_irq_handler_locked(struct xe_execlist_port *port) +{ + u64 status; + + xe_execlist_port_assert_held(port); + + status = read_execlist_status(port->hwe); + if (status & BIT(7)) + return; + + __xe_execlist_port_start_next_active(port); +} + +static void xe_execlist_port_irq_handler(struct xe_hw_engine *hwe, + u16 intr_vec) +{ + struct xe_execlist_port *port = hwe->exl_port; + + spin_lock(&port->lock); + xe_execlist_port_irq_handler_locked(port); + spin_unlock(&port->lock); +} + +static void xe_execlist_port_wake_locked(struct xe_execlist_port *port, + enum xe_engine_priority priority) +{ + xe_execlist_port_assert_held(port); + + if (port->running_exl && port->running_exl->active_priority >= priority) + return; + + __xe_execlist_port_start_next_active(port); +} + +static void xe_execlist_make_active(struct xe_execlist_engine *exl) +{ + struct xe_execlist_port *port = exl->port; + enum xe_engine_priority priority = exl->active_priority; + + XE_BUG_ON(priority == XE_ENGINE_PRIORITY_UNSET); + XE_BUG_ON(priority < 0); + XE_BUG_ON(priority >= ARRAY_SIZE(exl->port->active)); + + spin_lock_irq(&port->lock); + + if (exl->active_priority != priority && + exl->active_priority != XE_ENGINE_PRIORITY_UNSET) { + /* Priority changed, move it to the right list */ + list_del(&exl->active_link); + exl->active_priority = XE_ENGINE_PRIORITY_UNSET; + } + + if (exl->active_priority == XE_ENGINE_PRIORITY_UNSET) { + exl->active_priority = priority; + list_add_tail(&exl->active_link, &port->active[priority]); + } + + xe_execlist_port_wake_locked(exl->port, priority); + + spin_unlock_irq(&port->lock); +} + +static void xe_execlist_port_irq_fail_timer(struct timer_list *timer) +{ + struct xe_execlist_port *port = + container_of(timer, struct xe_execlist_port, irq_fail); + + spin_lock_irq(&port->lock); + xe_execlist_port_irq_handler_locked(port); + spin_unlock_irq(&port->lock); + + port->irq_fail.expires = jiffies + msecs_to_jiffies(1000); + add_timer(&port->irq_fail); +} + +struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe, + struct xe_hw_engine *hwe) +{ + struct drm_device *drm = &xe->drm; + struct xe_execlist_port *port; + int i; + + port = drmm_kzalloc(drm, sizeof(*port), GFP_KERNEL); + if (!port) + return ERR_PTR(-ENOMEM); + + port->hwe = hwe; + + spin_lock_init(&port->lock); + for (i = 0; i < ARRAY_SIZE(port->active); i++) + INIT_LIST_HEAD(&port->active[i]); + + port->last_ctx_id = 1; + port->running_exl = NULL; + + hwe->irq_handler = xe_execlist_port_irq_handler; + + /* TODO: Fix the interrupt code so it doesn't race like mad */ + timer_setup(&port->irq_fail, xe_execlist_port_irq_fail_timer, 0); + port->irq_fail.expires = jiffies + msecs_to_jiffies(1000); + add_timer(&port->irq_fail); + + return port; +} + +void xe_execlist_port_destroy(struct xe_execlist_port *port) +{ + del_timer(&port->irq_fail); + + /* Prevent an interrupt while we're destroying */ + spin_lock_irq(>_to_xe(port->hwe->gt)->irq.lock); + port->hwe->irq_handler = NULL; + spin_unlock_irq(>_to_xe(port->hwe->gt)->irq.lock); +} + +static struct dma_fence * +execlist_run_job(struct drm_sched_job *drm_job) +{ + struct xe_sched_job *job = to_xe_sched_job(drm_job); + struct xe_engine *e = job->engine; + struct xe_execlist_engine *exl = job->engine->execlist; + + e->ring_ops->emit_job(job); + xe_execlist_make_active(exl); + + return dma_fence_get(job->fence); +} + +static void execlist_job_free(struct drm_sched_job *drm_job) +{ + struct xe_sched_job *job = to_xe_sched_job(drm_job); + + xe_sched_job_put(job); +} + +static const struct drm_sched_backend_ops drm_sched_ops = { + .run_job = execlist_run_job, + .free_job = execlist_job_free, +}; + +static int execlist_engine_init(struct xe_engine *e) +{ + struct drm_gpu_scheduler *sched; + struct xe_execlist_engine *exl; + int err; + + XE_BUG_ON(xe_device_guc_submission_enabled(gt_to_xe(e->gt))); + + exl = kzalloc(sizeof(*exl), GFP_KERNEL); + if (!exl) + return -ENOMEM; + + exl->engine = e; + + err = drm_sched_init(&exl->sched, &drm_sched_ops, NULL, 1, + e->lrc[0].ring.size / MAX_JOB_SIZE_BYTES, + XE_SCHED_HANG_LIMIT, XE_SCHED_JOB_TIMEOUT, + NULL, NULL, e->hwe->name, + gt_to_xe(e->gt)->drm.dev); + if (err) + goto err_free; + + sched = &exl->sched; + err = drm_sched_entity_init(&exl->entity, 0, &sched, 1, NULL); + if (err) + goto err_sched; + + exl->port = e->hwe->exl_port; + exl->has_run = false; + exl->active_priority = XE_ENGINE_PRIORITY_UNSET; + e->execlist = exl; + e->entity = &exl->entity; + + switch (e->class) { + case XE_ENGINE_CLASS_RENDER: + sprintf(e->name, "rcs%d", ffs(e->logical_mask) - 1); + break; + case XE_ENGINE_CLASS_VIDEO_DECODE: + sprintf(e->name, "vcs%d", ffs(e->logical_mask) - 1); + break; + case XE_ENGINE_CLASS_VIDEO_ENHANCE: + sprintf(e->name, "vecs%d", ffs(e->logical_mask) - 1); + break; + case XE_ENGINE_CLASS_COPY: + sprintf(e->name, "bcs%d", ffs(e->logical_mask) - 1); + break; + case XE_ENGINE_CLASS_COMPUTE: + sprintf(e->name, "ccs%d", ffs(e->logical_mask) - 1); + break; + default: + XE_WARN_ON(e->class); + } + + return 0; + +err_sched: + drm_sched_fini(&exl->sched); +err_free: + kfree(exl); + return err; +} + +static void execlist_engine_fini_async(struct work_struct *w) +{ + struct xe_execlist_engine *ee = + container_of(w, struct xe_execlist_engine, fini_async); + struct xe_engine *e = ee->engine; + struct xe_execlist_engine *exl = e->execlist; + unsigned long flags; + + XE_BUG_ON(xe_device_guc_submission_enabled(gt_to_xe(e->gt))); + + spin_lock_irqsave(&exl->port->lock, flags); + if (WARN_ON(exl->active_priority != XE_ENGINE_PRIORITY_UNSET)) + list_del(&exl->active_link); + spin_unlock_irqrestore(&exl->port->lock, flags); + + if (e->flags & ENGINE_FLAG_PERSISTENT) + xe_device_remove_persitent_engines(gt_to_xe(e->gt), e); + drm_sched_entity_fini(&exl->entity); + drm_sched_fini(&exl->sched); + kfree(exl); + + xe_engine_fini(e); +} + +static void execlist_engine_kill(struct xe_engine *e) +{ + /* NIY */ +} + +static void execlist_engine_fini(struct xe_engine *e) +{ + INIT_WORK(&e->execlist->fini_async, execlist_engine_fini_async); + queue_work(system_unbound_wq, &e->execlist->fini_async); +} + +static int execlist_engine_set_priority(struct xe_engine *e, + enum xe_engine_priority priority) +{ + /* NIY */ + return 0; +} + +static int execlist_engine_set_timeslice(struct xe_engine *e, u32 timeslice_us) +{ + /* NIY */ + return 0; +} + +static int execlist_engine_set_preempt_timeout(struct xe_engine *e, + u32 preempt_timeout_us) +{ + /* NIY */ + return 0; +} + +static int execlist_engine_set_job_timeout(struct xe_engine *e, + u32 job_timeout_ms) +{ + /* NIY */ + return 0; +} + +static int execlist_engine_suspend(struct xe_engine *e) +{ + /* NIY */ + return 0; +} + +static void execlist_engine_suspend_wait(struct xe_engine *e) + +{ + /* NIY */ +} + +static void execlist_engine_resume(struct xe_engine *e) +{ + xe_mocs_init_engine(e); +} + +static const struct xe_engine_ops execlist_engine_ops = { + .init = execlist_engine_init, + .kill = execlist_engine_kill, + .fini = execlist_engine_fini, + .set_priority = execlist_engine_set_priority, + .set_timeslice = execlist_engine_set_timeslice, + .set_preempt_timeout = execlist_engine_set_preempt_timeout, + .set_job_timeout = execlist_engine_set_job_timeout, + .suspend = execlist_engine_suspend, + .suspend_wait = execlist_engine_suspend_wait, + .resume = execlist_engine_resume, +}; + +int xe_execlist_init(struct xe_gt *gt) +{ + /* GuC submission enabled, nothing to do */ + if (xe_device_guc_submission_enabled(gt_to_xe(gt))) + return 0; + + gt->engine_ops = &execlist_engine_ops; + + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_execlist.h b/drivers/gpu/drm/xe/xe_execlist.h new file mode 100644 index 000000000000..6a0442a6eff6 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_execlist.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _XE_EXECLIST_H_ +#define _XE_EXECLIST_H_ + +#include "xe_execlist_types.h" + +struct xe_device; +struct xe_gt; + +#define xe_execlist_port_assert_held(port) lockdep_assert_held(&(port)->lock); + +int xe_execlist_init(struct xe_gt *gt); +struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe, + struct xe_hw_engine *hwe); +void xe_execlist_port_destroy(struct xe_execlist_port *port); + +#endif diff --git a/drivers/gpu/drm/xe/xe_execlist_types.h b/drivers/gpu/drm/xe/xe_execlist_types.h new file mode 100644 index 000000000000..9b1239b47292 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_execlist_types.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_EXECLIST_TYPES_H_ +#define _XE_EXECLIST_TYPES_H_ + +#include +#include +#include + +#include "xe_engine_types.h" + +struct xe_hw_engine; +struct xe_execlist_engine; + +struct xe_execlist_port { + struct xe_hw_engine *hwe; + + spinlock_t lock; + + struct list_head active[XE_ENGINE_PRIORITY_COUNT]; + + u32 last_ctx_id; + + struct xe_execlist_engine *running_exl; + + struct timer_list irq_fail; +}; + +struct xe_execlist_engine { + struct xe_engine *engine; + + struct drm_gpu_scheduler sched; + + struct drm_sched_entity entity; + + struct xe_execlist_port *port; + + bool has_run; + + struct work_struct fini_async; + + enum xe_engine_priority active_priority; + struct list_head active_link; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c new file mode 100644 index 000000000000..0320ce7ba3d1 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_force_wake.c @@ -0,0 +1,203 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include + +#include "xe_force_wake.h" +#include "xe_gt.h" +#include "xe_mmio.h" +#include "gt/intel_gt_regs.h" + +#define XE_FORCE_WAKE_ACK_TIMEOUT_MS 50 + +static struct xe_gt * +fw_to_gt(struct xe_force_wake *fw) +{ + return fw->gt; +} + +static struct xe_device * +fw_to_xe(struct xe_force_wake *fw) +{ + return gt_to_xe(fw_to_gt(fw)); +} + +static void domain_init(struct xe_force_wake_domain *domain, + enum xe_force_wake_domain_id id, + u32 reg, u32 ack, u32 val, u32 mask) +{ + domain->id = id; + domain->reg_ctl = reg; + domain->reg_ack = ack; + domain->val = val; + domain->mask = mask; +} + +#define FORCEWAKE_ACK_GT_MTL _MMIO(0xdfc) + +void xe_force_wake_init_gt(struct xe_gt *gt, struct xe_force_wake *fw) +{ + struct xe_device *xe = gt_to_xe(gt); + + fw->gt = gt; + mutex_init(&fw->lock); + + /* Assuming gen11+ so assert this assumption is correct */ + XE_BUG_ON(GRAPHICS_VER(gt_to_xe(gt)) < 11); + + if (xe->info.platform == XE_METEORLAKE) { + domain_init(&fw->domains[XE_FW_DOMAIN_ID_GT], + XE_FW_DOMAIN_ID_GT, + FORCEWAKE_GT_GEN9.reg, + FORCEWAKE_ACK_GT_MTL.reg, + BIT(0), BIT(16)); + } else { + domain_init(&fw->domains[XE_FW_DOMAIN_ID_GT], + XE_FW_DOMAIN_ID_GT, + FORCEWAKE_GT_GEN9.reg, + FORCEWAKE_ACK_GT_GEN9.reg, + BIT(0), BIT(16)); + } +} + +void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw) +{ + int i, j; + + /* Assuming gen11+ so assert this assumption is correct */ + XE_BUG_ON(GRAPHICS_VER(gt_to_xe(gt)) < 11); + + if (!xe_gt_is_media_type(gt)) + domain_init(&fw->domains[XE_FW_DOMAIN_ID_RENDER], + XE_FW_DOMAIN_ID_RENDER, + FORCEWAKE_RENDER_GEN9.reg, + FORCEWAKE_ACK_RENDER_GEN9.reg, + BIT(0), BIT(16)); + + for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) { + if (!(gt->info.engine_mask & BIT(i))) + continue; + + domain_init(&fw->domains[XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j], + XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j, + FORCEWAKE_MEDIA_VDBOX_GEN11(j).reg, + FORCEWAKE_ACK_MEDIA_VDBOX_GEN11(j).reg, + BIT(0), BIT(16)); + } + + for (i = XE_HW_ENGINE_VECS0, j =0; i <= XE_HW_ENGINE_VECS3; ++i, ++j) { + if (!(gt->info.engine_mask & BIT(i))) + continue; + + domain_init(&fw->domains[XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j], + XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j, + FORCEWAKE_MEDIA_VEBOX_GEN11(j).reg, + FORCEWAKE_ACK_MEDIA_VEBOX_GEN11(j).reg, + BIT(0), BIT(16)); + } +} + +void xe_force_wake_prune(struct xe_gt *gt, struct xe_force_wake *fw) +{ + int i, j; + + /* Call after fuses have been read, prune domains that are fused off */ + + for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) + if (!(gt->info.engine_mask & BIT(i))) + fw->domains[XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j].reg_ctl = 0; + + for (i = XE_HW_ENGINE_VECS0, j =0; i <= XE_HW_ENGINE_VECS3; ++i, ++j) + if (!(gt->info.engine_mask & BIT(i))) + fw->domains[XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j].reg_ctl = 0; +} + +static void domain_wake(struct xe_gt *gt, struct xe_force_wake_domain *domain) +{ + xe_mmio_write32(gt, domain->reg_ctl, domain->mask | domain->val); +} + +static int domain_wake_wait(struct xe_gt *gt, + struct xe_force_wake_domain *domain) +{ + return xe_mmio_wait32(gt, domain->reg_ack, domain->val, domain->val, + XE_FORCE_WAKE_ACK_TIMEOUT_MS); +} + +static void domain_sleep(struct xe_gt *gt, struct xe_force_wake_domain *domain) +{ + xe_mmio_write32(gt, domain->reg_ctl, domain->mask); +} + +static int domain_sleep_wait(struct xe_gt *gt, + struct xe_force_wake_domain *domain) +{ + return xe_mmio_wait32(gt, domain->reg_ack, 0, domain->val, + XE_FORCE_WAKE_ACK_TIMEOUT_MS); +} + +#define for_each_fw_domain_masked(domain__, mask__, fw__, tmp__) \ + for (tmp__ = (mask__); tmp__ ;) \ + for_each_if((domain__ = ((fw__)->domains + \ + __mask_next_bit(tmp__))) && \ + domain__->reg_ctl) + +int xe_force_wake_get(struct xe_force_wake *fw, + enum xe_force_wake_domains domains) +{ + struct xe_device *xe = fw_to_xe(fw); + struct xe_gt *gt = fw_to_gt(fw); + struct xe_force_wake_domain *domain; + enum xe_force_wake_domains tmp, woken = 0; + int ret, ret2 = 0; + + mutex_lock(&fw->lock); + for_each_fw_domain_masked(domain, domains, fw, tmp) { + if (!domain->ref++) { + woken |= BIT(domain->id); + domain_wake(gt, domain); + } + } + for_each_fw_domain_masked(domain, woken, fw, tmp) { + ret = domain_wake_wait(gt, domain); + ret2 |= ret; + if (ret) + drm_notice(&xe->drm, "Force wake domain (%d) failed to ack wake, ret=%d\n", + domain->id, ret); + } + fw->awake_domains |= woken; + mutex_unlock(&fw->lock); + + return ret2; +} + +int xe_force_wake_put(struct xe_force_wake *fw, + enum xe_force_wake_domains domains) +{ + struct xe_device *xe = fw_to_xe(fw); + struct xe_gt *gt = fw_to_gt(fw); + struct xe_force_wake_domain *domain; + enum xe_force_wake_domains tmp, sleep = 0; + int ret, ret2 = 0; + + mutex_lock(&fw->lock); + for_each_fw_domain_masked(domain, domains, fw, tmp) { + if (!--domain->ref) { + sleep |= BIT(domain->id); + domain_sleep(gt, domain); + } + } + for_each_fw_domain_masked(domain, sleep, fw, tmp) { + ret = domain_sleep_wait(gt, domain); + ret2 |= ret; + if (ret) + drm_notice(&xe->drm, "Force wake domain (%d) failed to ack sleep, ret=%d\n", + domain->id, ret); + } + fw->awake_domains &= ~sleep; + mutex_unlock(&fw->lock); + + return ret2; +} diff --git a/drivers/gpu/drm/xe/xe_force_wake.h b/drivers/gpu/drm/xe/xe_force_wake.h new file mode 100644 index 000000000000..5adb8daa3b71 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_force_wake.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_FORCE_WAKE_H_ +#define _XE_FORCE_WAKE_H_ + +#include "xe_force_wake_types.h" +#include "xe_macros.h" + +struct xe_gt; + +void xe_force_wake_init_gt(struct xe_gt *gt, + struct xe_force_wake *fw); +void xe_force_wake_init_engines(struct xe_gt *gt, + struct xe_force_wake *fw); +void xe_force_wake_prune(struct xe_gt *gt, + struct xe_force_wake *fw); +int xe_force_wake_get(struct xe_force_wake *fw, + enum xe_force_wake_domains domains); +int xe_force_wake_put(struct xe_force_wake *fw, + enum xe_force_wake_domains domains); + +static inline int +xe_force_wake_ref(struct xe_force_wake *fw, + enum xe_force_wake_domains domain) +{ + XE_BUG_ON(!domain); + return fw->domains[ffs(domain) - 1].ref; +} + +static inline void +xe_force_wake_assert_held(struct xe_force_wake *fw, + enum xe_force_wake_domains domain) +{ + XE_BUG_ON(!(fw->awake_domains & domain)); +} + +#endif diff --git a/drivers/gpu/drm/xe/xe_force_wake_types.h b/drivers/gpu/drm/xe/xe_force_wake_types.h new file mode 100644 index 000000000000..208dd629d7b1 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_force_wake_types.h @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_FORCE_WAKE_TYPES_H_ +#define _XE_FORCE_WAKE_TYPES_H_ + +#include +#include + +enum xe_force_wake_domain_id { + XE_FW_DOMAIN_ID_GT = 0, + XE_FW_DOMAIN_ID_RENDER, + XE_FW_DOMAIN_ID_MEDIA, + XE_FW_DOMAIN_ID_MEDIA_VDBOX0, + XE_FW_DOMAIN_ID_MEDIA_VDBOX1, + XE_FW_DOMAIN_ID_MEDIA_VDBOX2, + XE_FW_DOMAIN_ID_MEDIA_VDBOX3, + XE_FW_DOMAIN_ID_MEDIA_VDBOX4, + XE_FW_DOMAIN_ID_MEDIA_VDBOX5, + XE_FW_DOMAIN_ID_MEDIA_VDBOX6, + XE_FW_DOMAIN_ID_MEDIA_VDBOX7, + XE_FW_DOMAIN_ID_MEDIA_VEBOX0, + XE_FW_DOMAIN_ID_MEDIA_VEBOX1, + XE_FW_DOMAIN_ID_MEDIA_VEBOX2, + XE_FW_DOMAIN_ID_MEDIA_VEBOX3, + XE_FW_DOMAIN_ID_GSC, + XE_FW_DOMAIN_ID_COUNT +}; + +enum xe_force_wake_domains { + XE_FW_GT = BIT(XE_FW_DOMAIN_ID_GT), + XE_FW_RENDER = BIT(XE_FW_DOMAIN_ID_RENDER), + XE_FW_MEDIA = BIT(XE_FW_DOMAIN_ID_MEDIA), + XE_FW_MEDIA_VDBOX0 = BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX0), + XE_FW_MEDIA_VDBOX1 = BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX1), + XE_FW_MEDIA_VDBOX2 = BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX2), + XE_FW_MEDIA_VDBOX3 = BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX3), + XE_FW_MEDIA_VDBOX4 = BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX4), + XE_FW_MEDIA_VDBOX5 = BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX5), + XE_FW_MEDIA_VDBOX6 = BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX6), + XE_FW_MEDIA_VDBOX7 = BIT(XE_FW_DOMAIN_ID_MEDIA_VDBOX7), + XE_FW_MEDIA_VEBOX0 = BIT(XE_FW_DOMAIN_ID_MEDIA_VEBOX0), + XE_FW_MEDIA_VEBOX1 = BIT(XE_FW_DOMAIN_ID_MEDIA_VEBOX1), + XE_FW_MEDIA_VEBOX2 = BIT(XE_FW_DOMAIN_ID_MEDIA_VEBOX2), + XE_FW_MEDIA_VEBOX3 = BIT(XE_FW_DOMAIN_ID_MEDIA_VEBOX3), + XE_FW_GSC = BIT(XE_FW_DOMAIN_ID_GSC), + XE_FORCEWAKE_ALL = BIT(XE_FW_DOMAIN_ID_COUNT) - 1 +}; + +/** + * struct xe_force_wake_domain - XE force wake domains + */ +struct xe_force_wake_domain { + /** @id: domain force wake id */ + enum xe_force_wake_domain_id id; + /** @reg_ctl: domain wake control register address */ + u32 reg_ctl; + /** @reg_ack: domain ack register address */ + u32 reg_ack; + /** @val: domain wake write value */ + u32 val; + /** @mask: domain mask */ + u32 mask; + /** @ref: domain reference */ + u32 ref; +}; + +/** + * struct xe_force_wake - XE force wake + */ +struct xe_force_wake { + /** @gt: back pointers to GT */ + struct xe_gt *gt; + /** @lock: protects everything force wake struct */ + struct mutex lock; + /** @awake_domains: mask of all domains awake */ + enum xe_force_wake_domains awake_domains; + /** @domains: force wake domains */ + struct xe_force_wake_domain domains[XE_FW_DOMAIN_ID_COUNT]; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c new file mode 100644 index 000000000000..eab74a509f68 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -0,0 +1,304 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "xe_ggtt.h" + +#include +#include + +#include + +#include "xe_device.h" +#include "xe_bo.h" +#include "xe_gt.h" +#include "xe_mmio.h" +#include "xe_wopcm.h" + +#include "i915_reg.h" +#include "gt/intel_gt_regs.h" + +/* FIXME: Common file, preferably auto-gen */ +#define MTL_GGTT_PTE_PAT0 BIT(52) +#define MTL_GGTT_PTE_PAT1 BIT(53) + +u64 xe_ggtt_pte_encode(struct xe_bo *bo, u64 bo_offset) +{ + struct xe_device *xe = xe_bo_device(bo); + u64 pte; + bool is_lmem; + + pte = xe_bo_addr(bo, bo_offset, GEN8_PAGE_SIZE, &is_lmem); + pte |= GEN8_PAGE_PRESENT; + + if (is_lmem) + pte |= GEN12_GGTT_PTE_LM; + + /* FIXME: vfunc + pass in caching rules */ + if (xe->info.platform == XE_METEORLAKE) { + pte |= MTL_GGTT_PTE_PAT0; + pte |= MTL_GGTT_PTE_PAT1; + } + + return pte; +} + +static unsigned int probe_gsm_size(struct pci_dev *pdev) +{ + u16 gmch_ctl, ggms; + + pci_read_config_word(pdev, SNB_GMCH_CTRL, &gmch_ctl); + ggms = (gmch_ctl >> BDW_GMCH_GGMS_SHIFT) & BDW_GMCH_GGMS_MASK; + return ggms ? SZ_1M << ggms : 0; +} + +void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte) +{ + XE_BUG_ON(addr & GEN8_PTE_MASK); + XE_BUG_ON(addr >= ggtt->size); + + writeq(pte, &ggtt->gsm[addr >> GEN8_PTE_SHIFT]); +} + +static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size) +{ + u64 end = start + size - 1; + u64 scratch_pte; + + XE_BUG_ON(start >= end); + + if (ggtt->scratch) + scratch_pte = xe_ggtt_pte_encode(ggtt->scratch, 0); + else + scratch_pte = 0; + + while (start < end) { + xe_ggtt_set_pte(ggtt, start, scratch_pte); + start += GEN8_PAGE_SIZE; + } +} + +static void ggtt_fini_noalloc(struct drm_device *drm, void *arg) +{ + struct xe_ggtt *ggtt = arg; + + mutex_destroy(&ggtt->lock); + drm_mm_takedown(&ggtt->mm); + + xe_bo_unpin_map_no_vm(ggtt->scratch); +} + +int xe_ggtt_init_noalloc(struct xe_gt *gt, struct xe_ggtt *ggtt) +{ + struct xe_device *xe = gt_to_xe(gt); + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + unsigned int gsm_size; + + XE_BUG_ON(xe_gt_is_media_type(gt)); + + ggtt->gt = gt; + + gsm_size = probe_gsm_size(pdev); + if (gsm_size == 0) { + drm_err(&xe->drm, "Hardware reported no preallocated GSM\n"); + return -ENOMEM; + } + + ggtt->gsm = gt->mmio.regs + SZ_8M; + ggtt->size = (gsm_size / 8) * (u64)GEN8_PAGE_SIZE; + + /* + * 8B per entry, each points to a 4KB page. + * + * The GuC owns the WOPCM space, thus we can't allocate GGTT address in + * this area. Even though we likely configure the WOPCM to less than the + * maximum value, to simplify the driver load (no need to fetch HuC + + * GuC firmwares and determine there sizes before initializing the GGTT) + * just start the GGTT allocation above the max WOPCM size. This might + * waste space in the GGTT (WOPCM is 2MB on modern platforms) but we can + * live with this. + * + * Another benifit of this is the GuC bootrom can't access anything + * below the WOPCM max size so anything the bootom needs to access (e.g. + * a RSA key) needs to be placed in the GGTT above the WOPCM max size. + * Starting the GGTT allocations above the WOPCM max give us the correct + * placement for free. + */ + drm_mm_init(&ggtt->mm, xe_wopcm_size(xe), + ggtt->size - xe_wopcm_size(xe)); + mutex_init(&ggtt->lock); + + return drmm_add_action_or_reset(&xe->drm, ggtt_fini_noalloc, ggtt); +} + +static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt) +{ + struct drm_mm_node *hole; + u64 start, end; + + /* Display may have allocated inside ggtt, so be careful with clearing here */ + mutex_lock(&ggtt->lock); + drm_mm_for_each_hole(hole, &ggtt->mm, start, end) + xe_ggtt_clear(ggtt, start, end - start); + + xe_ggtt_invalidate(ggtt->gt); + mutex_unlock(&ggtt->lock); +} + +int xe_ggtt_init(struct xe_gt *gt, struct xe_ggtt *ggtt) +{ + struct xe_device *xe = gt_to_xe(gt); + int err; + + ggtt->scratch = xe_bo_create_locked(xe, gt, NULL, GEN8_PAGE_SIZE, + ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(gt) | + XE_BO_CREATE_PINNED_BIT); + if (IS_ERR(ggtt->scratch)) { + err = PTR_ERR(ggtt->scratch); + goto err; + } + + err = xe_bo_pin(ggtt->scratch); + xe_bo_unlock_no_vm(ggtt->scratch); + if (err) { + xe_bo_put(ggtt->scratch); + goto err; + } + + xe_ggtt_initial_clear(ggtt); + return 0; +err: + ggtt->scratch = NULL; + return err; +} + +#define GEN12_GUC_TLB_INV_CR _MMIO(0xcee8) +#define GEN12_GUC_TLB_INV_CR_INVALIDATE (1 << 0) +#define PVC_GUC_TLB_INV_DESC0 _MMIO(0xcf7c) +#define PVC_GUC_TLB_INV_DESC0_VALID (1 << 0) +#define PVC_GUC_TLB_INV_DESC1 _MMIO(0xcf80) +#define PVC_GUC_TLB_INV_DESC1_INVALIDATE (1 << 6) + +void xe_ggtt_invalidate(struct xe_gt *gt) +{ + /* TODO: vfunc for GuC vs. non-GuC */ + + /* TODO: i915 makes comments about this being uncached and + * therefore flushing WC buffers. Is that really true here? + */ + xe_mmio_write32(gt, GFX_FLSH_CNTL_GEN6.reg, GFX_FLSH_CNTL_EN); + if (xe_device_guc_submission_enabled(gt_to_xe(gt))) { + struct xe_device *xe = gt_to_xe(gt); + + /* TODO: also use vfunc here */ + if (xe->info.platform == XE_PVC) { + xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC1.reg, + PVC_GUC_TLB_INV_DESC1_INVALIDATE); + xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC0.reg, + PVC_GUC_TLB_INV_DESC0_VALID); + } else + xe_mmio_write32(gt, GEN12_GUC_TLB_INV_CR.reg, + GEN12_GUC_TLB_INV_CR_INVALIDATE); + } +} + +void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix) +{ + u64 addr, scratch_pte; + + scratch_pte = xe_ggtt_pte_encode(ggtt->scratch, 0); + + printk("%sGlobal GTT:", prefix); + for (addr = 0; addr < ggtt->size; addr += GEN8_PAGE_SIZE) { + unsigned int i = addr / GEN8_PAGE_SIZE; + + XE_BUG_ON(addr > U32_MAX); + if (ggtt->gsm[i] == scratch_pte) + continue; + + printk("%s ggtt[0x%08x] = 0x%016llx", + prefix, (u32)addr, ggtt->gsm[i]); + } +} + +int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, struct drm_mm_node *node, + u32 size, u32 align, u32 mm_flags) +{ + return drm_mm_insert_node_generic(&ggtt->mm, node, size, align, 0, + mm_flags); +} + +int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, + u32 size, u32 align) +{ + int ret; + + mutex_lock(&ggtt->lock); + ret = xe_ggtt_insert_special_node_locked(ggtt, node, size, + align, DRM_MM_INSERT_HIGH); + mutex_unlock(&ggtt->lock); + + return ret; +} + +void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) +{ + u64 start = bo->ggtt_node.start; + u64 offset, pte; + + for (offset = 0; offset < bo->size; offset += GEN8_PAGE_SIZE) { + pte = xe_ggtt_pte_encode(bo, offset); + xe_ggtt_set_pte(ggtt, start + offset, pte); + } + + xe_ggtt_invalidate(ggtt->gt); +} + +int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) +{ + int err; + + if (XE_WARN_ON(bo->ggtt_node.size)) { + /* Someone's already inserted this BO in the GGTT */ + XE_BUG_ON(bo->ggtt_node.size != bo->size); + return 0; + } + + err = xe_bo_validate(bo, NULL, false); + if (err) + return err; + + mutex_lock(&ggtt->lock); + err = drm_mm_insert_node(&ggtt->mm, &bo->ggtt_node, bo->size); + if (!err) + xe_ggtt_map_bo(ggtt, bo); + mutex_unlock(&ggtt->lock); + + return 0; +} + +void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node) +{ + mutex_lock(&ggtt->lock); + + xe_ggtt_clear(ggtt, node->start, node->size); + drm_mm_remove_node(node); + node->size = 0; + + xe_ggtt_invalidate(ggtt->gt); + + mutex_unlock(&ggtt->lock); +} + +void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) +{ + if (XE_WARN_ON(!bo->ggtt_node.size)) + return; + + /* This BO is not currently in the GGTT */ + XE_BUG_ON(bo->ggtt_node.size != bo->size); + + xe_ggtt_remove_node(ggtt, &bo->ggtt_node); +} diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h new file mode 100644 index 000000000000..289c6852ad1a --- /dev/null +++ b/drivers/gpu/drm/xe/xe_ggtt.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _XE_GGTT_H_ +#define _XE_GGTT_H_ + +#include "xe_ggtt_types.h" + +u64 xe_ggtt_pte_encode(struct xe_bo *bo, u64 bo_offset); +void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte); +void xe_ggtt_invalidate(struct xe_gt *gt); +int xe_ggtt_init_noalloc(struct xe_gt *gt, struct xe_ggtt *ggtt); +int xe_ggtt_init(struct xe_gt *gt, struct xe_ggtt *ggtt); +void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix); + +int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, + u32 size, u32 align); +int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, + struct drm_mm_node *node, + u32 size, u32 align, u32 mm_flags); +void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node); +void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); +int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); +void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); + +#endif diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h new file mode 100644 index 000000000000..e04193001763 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_ggtt_types.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GGTT_TYPES_H_ +#define _XE_GGTT_TYPES_H_ + +#include + +struct xe_bo; +struct xe_gt; + +struct xe_ggtt { + struct xe_gt *gt; + + u64 size; + + struct xe_bo *scratch; + + struct mutex lock; + + u64 __iomem *gsm; + + struct drm_mm mm; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.c b/drivers/gpu/drm/xe/xe_gpu_scheduler.c new file mode 100644 index 000000000000..e4ad1d6ce1d5 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include "xe_gpu_scheduler.h" + +static void xe_sched_process_msg_queue(struct xe_gpu_scheduler *sched) +{ + if (!READ_ONCE(sched->base.pause_submit)) + queue_work(sched->base.submit_wq, &sched->work_process_msg); +} + +static void xe_sched_process_msg_queue_if_ready(struct xe_gpu_scheduler *sched) +{ + struct xe_sched_msg *msg; + + spin_lock(&sched->base.job_list_lock); + msg = list_first_entry_or_null(&sched->msgs, struct xe_sched_msg, link); + if (msg) + xe_sched_process_msg_queue(sched); + spin_unlock(&sched->base.job_list_lock); +} + +static struct xe_sched_msg * +xe_sched_get_msg(struct xe_gpu_scheduler *sched) +{ + struct xe_sched_msg *msg; + + spin_lock(&sched->base.job_list_lock); + msg = list_first_entry_or_null(&sched->msgs, + struct xe_sched_msg, link); + if (msg) + list_del(&msg->link); + spin_unlock(&sched->base.job_list_lock); + + return msg; +} + +static void xe_sched_process_msg_work(struct work_struct *w) +{ + struct xe_gpu_scheduler *sched = + container_of(w, struct xe_gpu_scheduler, work_process_msg); + struct xe_sched_msg *msg; + + if (READ_ONCE(sched->base.pause_submit)) + return; + + msg = xe_sched_get_msg(sched); + if (msg) { + sched->ops->process_msg(msg); + + xe_sched_process_msg_queue_if_ready(sched); + } +} + +int xe_sched_init(struct xe_gpu_scheduler *sched, + const struct drm_sched_backend_ops *ops, + const struct xe_sched_backend_ops *xe_ops, + struct workqueue_struct *submit_wq, + uint32_t hw_submission, unsigned hang_limit, + long timeout, struct workqueue_struct *timeout_wq, + atomic_t *score, const char *name, + struct device *dev) +{ + sched->ops = xe_ops; + INIT_LIST_HEAD(&sched->msgs); + INIT_WORK(&sched->work_process_msg, xe_sched_process_msg_work); + + return drm_sched_init(&sched->base, ops, submit_wq, 1, hw_submission, + hang_limit, timeout, timeout_wq, score, name, + dev); +} + +void xe_sched_fini(struct xe_gpu_scheduler *sched) +{ + xe_sched_submission_stop(sched); + drm_sched_fini(&sched->base); +} + +void xe_sched_submission_start(struct xe_gpu_scheduler *sched) +{ + drm_sched_wqueue_start(&sched->base); + queue_work(sched->base.submit_wq, &sched->work_process_msg); +} + +void xe_sched_submission_stop(struct xe_gpu_scheduler *sched) +{ + drm_sched_wqueue_stop(&sched->base); + cancel_work_sync(&sched->work_process_msg); +} + +void xe_sched_add_msg(struct xe_gpu_scheduler *sched, + struct xe_sched_msg *msg) +{ + spin_lock(&sched->base.job_list_lock); + list_add_tail(&msg->link, &sched->msgs); + spin_unlock(&sched->base.job_list_lock); + + xe_sched_process_msg_queue(sched); +} diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.h b/drivers/gpu/drm/xe/xe_gpu_scheduler.h new file mode 100644 index 000000000000..10c6bb9c9386 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GPU_SCHEDULER_H_ +#define _XE_GPU_SCHEDULER_H_ + +#include "xe_gpu_scheduler_types.h" +#include "xe_sched_job_types.h" + +int xe_sched_init(struct xe_gpu_scheduler *sched, + const struct drm_sched_backend_ops *ops, + const struct xe_sched_backend_ops *xe_ops, + struct workqueue_struct *submit_wq, + uint32_t hw_submission, unsigned hang_limit, + long timeout, struct workqueue_struct *timeout_wq, + atomic_t *score, const char *name, + struct device *dev); +void xe_sched_fini(struct xe_gpu_scheduler *sched); + +void xe_sched_submission_start(struct xe_gpu_scheduler *sched); +void xe_sched_submission_stop(struct xe_gpu_scheduler *sched); + +void xe_sched_add_msg(struct xe_gpu_scheduler *sched, + struct xe_sched_msg *msg); + +static inline void xe_sched_stop(struct xe_gpu_scheduler *sched) +{ + drm_sched_stop(&sched->base, NULL); +} + +static inline void xe_sched_tdr_queue_imm(struct xe_gpu_scheduler *sched) +{ + drm_sched_tdr_queue_imm(&sched->base); +} + +static inline void xe_sched_resubmit_jobs(struct xe_gpu_scheduler *sched) +{ + drm_sched_resubmit_jobs(&sched->base); +} + +static inline bool +xe_sched_invalidate_job(struct xe_sched_job *job, int threshold) +{ + return drm_sched_invalidate_job(&job->drm, threshold); +} + +static inline void xe_sched_add_pending_job(struct xe_gpu_scheduler *sched, + struct xe_sched_job *job) +{ + list_add(&job->drm.list, &sched->base.pending_list); +} + +static inline +struct xe_sched_job *xe_sched_first_pending_job(struct xe_gpu_scheduler *sched) +{ + return list_first_entry_or_null(&sched->base.pending_list, + struct xe_sched_job, drm.list); +} + +static inline int +xe_sched_entity_init(struct xe_sched_entity *entity, + struct xe_gpu_scheduler *sched) +{ + return drm_sched_entity_init(entity, 0, + (struct drm_gpu_scheduler **)&sched, + 1, NULL); +} + +#define xe_sched_entity_fini drm_sched_entity_fini + +#endif diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler_types.h b/drivers/gpu/drm/xe/xe_gpu_scheduler_types.h new file mode 100644 index 000000000000..6731b13da8bb --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler_types.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GPU_SCHEDULER_TYPES_H_ +#define _XE_GPU_SCHEDULER_TYPES_H_ + +#include + +/** + * struct xe_sched_msg - an in-band (relative to GPU scheduler run queue) + * message + * + * Generic enough for backend defined messages, backend can expand if needed. + */ +struct xe_sched_msg { + /** @link: list link into the gpu scheduler list of messages */ + struct list_head link; + /** + * @private_data: opaque pointer to message private data (backend defined) + */ + void *private_data; + /** @opcode: opcode of message (backend defined) */ + unsigned int opcode; +}; + +/** + * struct xe_sched_backend_ops - Define the backend operations called by the + * scheduler + */ +struct xe_sched_backend_ops { + /** + * @process_msg: Process a message. Allowed to block, it is this + * function's responsibility to free message if dynamically allocated. + */ + void (*process_msg)(struct xe_sched_msg *msg); +}; + +/** + * struct xe_gpu_scheduler - Xe GPU scheduler + */ +struct xe_gpu_scheduler { + /** @base: DRM GPU scheduler */ + struct drm_gpu_scheduler base; + /** @ops: Xe scheduler ops */ + const struct xe_sched_backend_ops *ops; + /** @msgs: list of messages to be processed in @work_process_msg */ + struct list_head msgs; + /** @work_process_msg: processes messages */ + struct work_struct work_process_msg; +}; + +#define xe_sched_entity drm_sched_entity +#define xe_sched_policy drm_sched_policy + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c new file mode 100644 index 000000000000..5f8fa9d98d5a --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -0,0 +1,830 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include + +#include + +#include "xe_bb.h" +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_engine.h" +#include "xe_execlist.h" +#include "xe_force_wake.h" +#include "xe_ggtt.h" +#include "xe_gt.h" +#include "xe_gt_clock.h" +#include "xe_gt_mcr.h" +#include "xe_gt_pagefault.h" +#include "xe_gt_sysfs.h" +#include "xe_gt_topology.h" +#include "xe_hw_fence.h" +#include "xe_irq.h" +#include "xe_lrc.h" +#include "xe_map.h" +#include "xe_migrate.h" +#include "xe_mmio.h" +#include "xe_mocs.h" +#include "xe_reg_sr.h" +#include "xe_ring_ops.h" +#include "xe_sa.h" +#include "xe_sched_job.h" +#include "xe_ttm_gtt_mgr.h" +#include "xe_ttm_vram_mgr.h" +#include "xe_tuning.h" +#include "xe_uc.h" +#include "xe_vm.h" +#include "xe_wa.h" +#include "xe_wopcm.h" + +#include "gt/intel_gt_regs.h" + +struct xe_gt *xe_find_full_gt(struct xe_gt *gt) +{ + struct xe_gt *search; + u8 id; + + XE_BUG_ON(!xe_gt_is_media_type(gt)); + + for_each_gt(search, gt_to_xe(gt), id) { + if (search->info.vram_id == gt->info.vram_id) + return search; + } + + XE_BUG_ON("NOT POSSIBLE"); + return NULL; +} + +int xe_gt_alloc(struct xe_device *xe, struct xe_gt *gt) +{ + struct drm_device *drm = &xe->drm; + + XE_BUG_ON(gt->info.type == XE_GT_TYPE_UNINITIALIZED); + + if (!xe_gt_is_media_type(gt)) { + gt->mem.ggtt = drmm_kzalloc(drm, sizeof(*gt->mem.ggtt), + GFP_KERNEL); + if (!gt->mem.ggtt) + return -ENOMEM; + + gt->mem.vram_mgr = drmm_kzalloc(drm, sizeof(*gt->mem.vram_mgr), + GFP_KERNEL); + if (!gt->mem.vram_mgr) + return -ENOMEM; + + gt->mem.gtt_mgr = drmm_kzalloc(drm, sizeof(*gt->mem.gtt_mgr), + GFP_KERNEL); + if (!gt->mem.gtt_mgr) + return -ENOMEM; + } else { + struct xe_gt *full_gt = xe_find_full_gt(gt); + + gt->mem.ggtt = full_gt->mem.ggtt; + gt->mem.vram_mgr = full_gt->mem.vram_mgr; + gt->mem.gtt_mgr = full_gt->mem.gtt_mgr; + } + + gt->ordered_wq = alloc_ordered_workqueue("gt-ordered-wq", 0); + + return 0; +} + +/* FIXME: These should be in a common file */ +#define CHV_PPAT_SNOOP REG_BIT(6) +#define GEN8_PPAT_AGE(x) ((x)<<4) +#define GEN8_PPAT_LLCeLLC (3<<2) +#define GEN8_PPAT_LLCELLC (2<<2) +#define GEN8_PPAT_LLC (1<<2) +#define GEN8_PPAT_WB (3<<0) +#define GEN8_PPAT_WT (2<<0) +#define GEN8_PPAT_WC (1<<0) +#define GEN8_PPAT_UC (0<<0) +#define GEN8_PPAT_ELLC_OVERRIDE (0<<2) +#define GEN8_PPAT(i, x) ((u64)(x) << ((i) * 8)) +#define GEN12_PPAT_CLOS(x) ((x)<<2) + +static void tgl_setup_private_ppat(struct xe_gt *gt) +{ + /* TGL doesn't support LLC or AGE settings */ + xe_mmio_write32(gt, GEN12_PAT_INDEX(0).reg, GEN8_PPAT_WB); + xe_mmio_write32(gt, GEN12_PAT_INDEX(1).reg, GEN8_PPAT_WC); + xe_mmio_write32(gt, GEN12_PAT_INDEX(2).reg, GEN8_PPAT_WT); + xe_mmio_write32(gt, GEN12_PAT_INDEX(3).reg, GEN8_PPAT_UC); + xe_mmio_write32(gt, GEN12_PAT_INDEX(4).reg, GEN8_PPAT_WB); + xe_mmio_write32(gt, GEN12_PAT_INDEX(5).reg, GEN8_PPAT_WB); + xe_mmio_write32(gt, GEN12_PAT_INDEX(6).reg, GEN8_PPAT_WB); + xe_mmio_write32(gt, GEN12_PAT_INDEX(7).reg, GEN8_PPAT_WB); +} + +static void pvc_setup_private_ppat(struct xe_gt *gt) +{ + xe_mmio_write32(gt, GEN12_PAT_INDEX(0).reg, GEN8_PPAT_UC); + xe_mmio_write32(gt, GEN12_PAT_INDEX(1).reg, GEN8_PPAT_WC); + xe_mmio_write32(gt, GEN12_PAT_INDEX(2).reg, GEN8_PPAT_WT); + xe_mmio_write32(gt, GEN12_PAT_INDEX(3).reg, GEN8_PPAT_WB); + xe_mmio_write32(gt, GEN12_PAT_INDEX(4).reg, + GEN12_PPAT_CLOS(1) | GEN8_PPAT_WT); + xe_mmio_write32(gt, GEN12_PAT_INDEX(5).reg, + GEN12_PPAT_CLOS(1) | GEN8_PPAT_WB); + xe_mmio_write32(gt, GEN12_PAT_INDEX(6).reg, + GEN12_PPAT_CLOS(2) | GEN8_PPAT_WT); + xe_mmio_write32(gt, GEN12_PAT_INDEX(7).reg, + GEN12_PPAT_CLOS(2) | GEN8_PPAT_WB); +} + +#define MTL_PPAT_L4_CACHE_POLICY_MASK REG_GENMASK(3, 2) +#define MTL_PAT_INDEX_COH_MODE_MASK REG_GENMASK(1, 0) +#define MTL_PPAT_3_UC REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 3) +#define MTL_PPAT_1_WT REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 1) +#define MTL_PPAT_0_WB REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 0) +#define MTL_3_COH_2W REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 3) +#define MTL_2_COH_1W REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 2) +#define MTL_0_COH_NON REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 0) + +static void mtl_setup_private_ppat(struct xe_gt *gt) +{ + xe_mmio_write32(gt, GEN12_PAT_INDEX(0).reg, MTL_PPAT_0_WB); + xe_mmio_write32(gt, GEN12_PAT_INDEX(1).reg, + MTL_PPAT_1_WT | MTL_2_COH_1W); + xe_mmio_write32(gt, GEN12_PAT_INDEX(2).reg, + MTL_PPAT_3_UC | MTL_2_COH_1W); + xe_mmio_write32(gt, GEN12_PAT_INDEX(3).reg, + MTL_PPAT_0_WB | MTL_2_COH_1W); + xe_mmio_write32(gt, GEN12_PAT_INDEX(4).reg, + MTL_PPAT_0_WB | MTL_3_COH_2W); +} + +static void setup_private_ppat(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + if (xe->info.platform == XE_METEORLAKE) + mtl_setup_private_ppat(gt); + else if (xe->info.platform == XE_PVC) + pvc_setup_private_ppat(gt); + else + tgl_setup_private_ppat(gt); +} + +static int gt_ttm_mgr_init(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + int err; + struct sysinfo si; + u64 gtt_size; + + si_meminfo(&si); + gtt_size = (u64)si.totalram * si.mem_unit * 3/4; + + if (gt->mem.vram.size) { + err = xe_ttm_vram_mgr_init(gt, gt->mem.vram_mgr); + if (err) + return err; + gtt_size = min(max((XE_DEFAULT_GTT_SIZE_MB << 20), + gt->mem.vram.size), + gtt_size); + xe->info.mem_region_mask |= BIT(gt->info.vram_id) << 1; + } + + err = xe_ttm_gtt_mgr_init(gt, gt->mem.gtt_mgr, gtt_size); + if (err) + return err; + + return 0; +} + +static void gt_fini(struct drm_device *drm, void *arg) +{ + struct xe_gt *gt = arg; + int i; + + destroy_workqueue(gt->ordered_wq); + + for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) + xe_hw_fence_irq_finish(>->fence_irq[i]); +} + +static void gt_reset_worker(struct work_struct *w); + +int emit_nop_job(struct xe_gt *gt, struct xe_engine *e) +{ + struct xe_sched_job *job; + struct xe_bb *bb; + struct dma_fence *fence; + u64 batch_ofs; + long timeout; + + bb = xe_bb_new(gt, 4, false); + if (IS_ERR(bb)) + return PTR_ERR(bb); + + batch_ofs = xe_bo_ggtt_addr(gt->kernel_bb_pool.bo); + job = xe_bb_create_wa_job(e, bb, batch_ofs); + if (IS_ERR(job)) { + xe_bb_free(bb, NULL); + return PTR_ERR(bb); + } + + xe_sched_job_arm(job); + fence = dma_fence_get(&job->drm.s_fence->finished); + xe_sched_job_push(job); + + timeout = dma_fence_wait_timeout(fence, false, HZ); + dma_fence_put(fence); + xe_bb_free(bb, NULL); + if (timeout < 0) + return timeout; + else if (!timeout) + return -ETIME; + + return 0; +} + +int emit_wa_job(struct xe_gt *gt, struct xe_engine *e) +{ + struct xe_reg_sr *sr = &e->hwe->reg_lrc; + struct xe_reg_sr_entry *entry; + unsigned long reg; + struct xe_sched_job *job; + struct xe_bb *bb; + struct dma_fence *fence; + u64 batch_ofs; + long timeout; + int count = 0; + + bb = xe_bb_new(gt, SZ_4K, false); /* Just pick a large BB size */ + if (IS_ERR(bb)) + return PTR_ERR(bb); + + xa_for_each(&sr->xa, reg, entry) + ++count; + + if (count) { + bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM(count); + xa_for_each(&sr->xa, reg, entry) { + bb->cs[bb->len++] = reg; + bb->cs[bb->len++] = entry->set_bits; + } + } + bb->cs[bb->len++] = MI_NOOP; + bb->cs[bb->len++] = MI_BATCH_BUFFER_END; + + batch_ofs = xe_bo_ggtt_addr(gt->kernel_bb_pool.bo); + job = xe_bb_create_wa_job(e, bb, batch_ofs); + if (IS_ERR(job)) { + xe_bb_free(bb, NULL); + return PTR_ERR(bb); + } + + xe_sched_job_arm(job); + fence = dma_fence_get(&job->drm.s_fence->finished); + xe_sched_job_push(job); + + timeout = dma_fence_wait_timeout(fence, false, HZ); + dma_fence_put(fence); + xe_bb_free(bb, NULL); + if (timeout < 0) + return timeout; + else if (!timeout) + return -ETIME; + + return 0; +} + +int xe_gt_record_default_lrcs(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + int err = 0; + + for_each_hw_engine(hwe, gt, id) { + struct xe_engine *e, *nop_e; + struct xe_vm *vm; + void *default_lrc; + + if (gt->default_lrc[hwe->class]) + continue; + + xe_reg_sr_init(&hwe->reg_lrc, "LRC", xe); + xe_wa_process_lrc(hwe); + + default_lrc = drmm_kzalloc(&xe->drm, + xe_lrc_size(xe, hwe->class), + GFP_KERNEL); + if (!default_lrc) + return -ENOMEM; + + vm = xe_migrate_get_vm(gt->migrate); + e = xe_engine_create(xe, vm, BIT(hwe->logical_instance), 1, + hwe, ENGINE_FLAG_WA); + if (IS_ERR(e)) { + err = PTR_ERR(e); + goto put_vm; + } + + /* Prime golden LRC with known good state */ + err = emit_wa_job(gt, e); + if (err) + goto put_engine; + + nop_e = xe_engine_create(xe, vm, BIT(hwe->logical_instance), + 1, hwe, ENGINE_FLAG_WA); + if (IS_ERR(nop_e)) { + err = PTR_ERR(nop_e); + goto put_engine; + } + + /* Switch to different LRC */ + err = emit_nop_job(gt, nop_e); + if (err) + goto put_nop_e; + + /* Reload golden LRC to record the effect of any indirect W/A */ + err = emit_nop_job(gt, e); + if (err) + goto put_nop_e; + + xe_map_memcpy_from(xe, default_lrc, + &e->lrc[0].bo->vmap, + xe_lrc_pphwsp_offset(&e->lrc[0]), + xe_lrc_size(xe, hwe->class)); + + gt->default_lrc[hwe->class] = default_lrc; +put_nop_e: + xe_engine_put(nop_e); +put_engine: + xe_engine_put(e); +put_vm: + xe_vm_put(vm); + if (err) + break; + } + + return err; +} + +int xe_gt_init_early(struct xe_gt *gt) +{ + int err; + + xe_force_wake_init_gt(gt, gt_to_fw(gt)); + + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (err) + return err; + + xe_gt_topology_init(gt); + xe_gt_mcr_init(gt); + + err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); + if (err) + return err; + + xe_reg_sr_init(>->reg_sr, "GT", gt_to_xe(gt)); + xe_wa_process_gt(gt); + xe_tuning_process_gt(gt); + + return 0; +} + +/** + * xe_gt_init_noalloc - Init GT up to the point where allocations can happen. + * @gt: The GT to initialize. + * + * This function prepares the GT to allow memory allocations to VRAM, but is not + * allowed to allocate memory itself. This state is useful for display readout, + * because the inherited display framebuffer will otherwise be overwritten as it + * is usually put at the start of VRAM. + * + * Returns: 0 on success, negative error code on error. + */ +int xe_gt_init_noalloc(struct xe_gt *gt) +{ + int err, err2; + + if (xe_gt_is_media_type(gt)) + return 0; + + xe_device_mem_access_get(gt_to_xe(gt)); + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (err) + goto err; + + err = gt_ttm_mgr_init(gt); + if (err) + goto err_force_wake; + + err = xe_ggtt_init_noalloc(gt, gt->mem.ggtt); + +err_force_wake: + err2 = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); + XE_WARN_ON(err2); + xe_device_mem_access_put(gt_to_xe(gt)); +err: + return err; +} + +static int gt_fw_domain_init(struct xe_gt *gt) +{ + int err, i; + + xe_device_mem_access_get(gt_to_xe(gt)); + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (err) + goto err_hw_fence_irq; + + if (!xe_gt_is_media_type(gt)) { + err = xe_ggtt_init(gt, gt->mem.ggtt); + if (err) + goto err_force_wake; + } + + /* Allow driver to load if uC init fails (likely missing firmware) */ + err = xe_uc_init(>->uc); + XE_WARN_ON(err); + + err = xe_uc_init_hwconfig(>->uc); + if (err) + goto err_force_wake; + + /* Enables per hw engine IRQs */ + xe_gt_irq_postinstall(gt); + + /* Rerun MCR init as we now have hw engine list */ + xe_gt_mcr_init(gt); + + err = xe_hw_engines_init_early(gt); + if (err) + goto err_force_wake; + + err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); + XE_WARN_ON(err); + xe_device_mem_access_put(gt_to_xe(gt)); + + return 0; + +err_force_wake: + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); +err_hw_fence_irq: + for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) + xe_hw_fence_irq_finish(>->fence_irq[i]); + xe_device_mem_access_put(gt_to_xe(gt)); + + return err; +} + +static int all_fw_domain_init(struct xe_gt *gt) +{ + int err, i; + + xe_device_mem_access_get(gt_to_xe(gt)); + err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (err) + goto err_hw_fence_irq; + + setup_private_ppat(gt); + + xe_reg_sr_apply_mmio(>->reg_sr, gt); + + err = xe_gt_clock_init(gt); + if (err) + goto err_force_wake; + + xe_mocs_init(gt); + err = xe_execlist_init(gt); + if (err) + goto err_force_wake; + + err = xe_hw_engines_init(gt); + if (err) + goto err_force_wake; + + err = xe_uc_init_post_hwconfig(>->uc); + if (err) + goto err_force_wake; + + /* + * FIXME: This should be ok as SA should only be used by gt->migrate and + * vm->gt->migrate and both should be pointing to a non-media GT. But to + * realy safe, convert gt->kernel_bb_pool to a pointer and point a media + * GT to the kernel_bb_pool on a real tile. + */ + if (!xe_gt_is_media_type(gt)) { + err = xe_sa_bo_manager_init(gt, >->kernel_bb_pool, SZ_1M, 16); + if (err) + goto err_force_wake; + + /* + * USM has its only SA pool to non-block behind user operations + */ + if (gt_to_xe(gt)->info.supports_usm) { + err = xe_sa_bo_manager_init(gt, >->usm.bb_pool, + SZ_1M, 16); + if (err) + goto err_force_wake; + } + } + + if (!xe_gt_is_media_type(gt)) { + gt->migrate = xe_migrate_init(gt); + if (IS_ERR(gt->migrate)) + goto err_force_wake; + } else { + gt->migrate = xe_find_full_gt(gt)->migrate; + } + + err = xe_uc_init_hw(>->uc); + if (err) + goto err_force_wake; + + err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); + XE_WARN_ON(err); + xe_device_mem_access_put(gt_to_xe(gt)); + + return 0; + +err_force_wake: + xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); +err_hw_fence_irq: + for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) + xe_hw_fence_irq_finish(>->fence_irq[i]); + xe_device_mem_access_put(gt_to_xe(gt)); + + return err; +} + +int xe_gt_init(struct xe_gt *gt) +{ + int err; + int i; + + INIT_WORK(>->reset.worker, gt_reset_worker); + + for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) { + gt->ring_ops[i] = xe_ring_ops_get(gt, i); + xe_hw_fence_irq_init(>->fence_irq[i]); + } + + err = xe_gt_pagefault_init(gt); + if (err) + return err; + + xe_gt_sysfs_init(gt); + + err = gt_fw_domain_init(gt); + if (err) + return err; + + xe_force_wake_init_engines(gt, gt_to_fw(gt)); + + err = all_fw_domain_init(gt); + if (err) + return err; + + xe_force_wake_prune(gt, gt_to_fw(gt)); + + err = drmm_add_action_or_reset(>_to_xe(gt)->drm, gt_fini, gt); + if (err) + return err; + + return 0; +} + +int do_gt_reset(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + int err; + + xe_mmio_write32(gt, GEN6_GDRST.reg, GEN11_GRDOM_FULL); + err = xe_mmio_wait32(gt, GEN6_GDRST.reg, 0, GEN11_GRDOM_FULL, 5); + if (err) + drm_err(&xe->drm, + "GT reset failed to clear GEN11_GRDOM_FULL\n"); + + return err; +} + +static int do_gt_restart(struct xe_gt *gt) +{ + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + int err; + + setup_private_ppat(gt); + + xe_reg_sr_apply_mmio(>->reg_sr, gt); + + err = xe_wopcm_init(>->uc.wopcm); + if (err) + return err; + + for_each_hw_engine(hwe, gt, id) + xe_hw_engine_enable_ring(hwe); + + err = xe_uc_init_hw(>->uc); + if (err) + return err; + + xe_mocs_init(gt); + err = xe_uc_start(>->uc); + if (err) + return err; + + for_each_hw_engine(hwe, gt, id) { + xe_reg_sr_apply_mmio(&hwe->reg_sr, gt); + xe_reg_sr_apply_whitelist(&hwe->reg_whitelist, + hwe->mmio_base, gt); + } + + return 0; +} + +static int gt_reset(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + int err; + + /* We only support GT resets with GuC submission */ + if (!xe_device_guc_submission_enabled(gt_to_xe(gt))) + return -ENODEV; + + drm_info(&xe->drm, "GT reset started\n"); + + xe_device_mem_access_get(gt_to_xe(gt)); + err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (err) + goto err_msg; + + xe_uc_stop_prepare(>->uc); + xe_gt_pagefault_reset(gt); + + err = xe_uc_stop(>->uc); + if (err) + goto err_out; + + err = do_gt_reset(gt); + if (err) + goto err_out; + + err = do_gt_restart(gt); + if (err) + goto err_out; + + xe_device_mem_access_put(gt_to_xe(gt)); + err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); + XE_WARN_ON(err); + + drm_info(&xe->drm, "GT reset done\n"); + + return 0; + +err_out: + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); +err_msg: + XE_WARN_ON(xe_uc_start(>->uc)); + xe_device_mem_access_put(gt_to_xe(gt)); + drm_err(&xe->drm, "GT reset failed, err=%d\n", err); + + return err; +} + +static void gt_reset_worker(struct work_struct *w) +{ + struct xe_gt *gt = container_of(w, typeof(*gt), reset.worker); + + gt_reset(gt); +} + +void xe_gt_reset_async(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + drm_info(&xe->drm, "Try GT reset\n"); + + /* Don't do a reset while one is already in flight */ + if (xe_uc_reset_prepare(>->uc)) + return; + + drm_info(&xe->drm, "Doing GT reset\n"); + queue_work(gt->ordered_wq, >->reset.worker); +} + +void xe_gt_suspend_prepare(struct xe_gt *gt) +{ + xe_device_mem_access_get(gt_to_xe(gt)); + XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + + xe_uc_stop_prepare(>->uc); + + xe_device_mem_access_put(gt_to_xe(gt)); + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); +} + +int xe_gt_suspend(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + int err; + + /* For now suspend/resume is only allowed with GuC */ + if (!xe_device_guc_submission_enabled(gt_to_xe(gt))) + return -ENODEV; + + xe_device_mem_access_get(gt_to_xe(gt)); + err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (err) + goto err_msg; + + err = xe_uc_suspend(>->uc); + if (err) + goto err_force_wake; + + xe_device_mem_access_put(gt_to_xe(gt)); + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + drm_info(&xe->drm, "GT suspended\n"); + + return 0; + +err_force_wake: + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); +err_msg: + xe_device_mem_access_put(gt_to_xe(gt)); + drm_err(&xe->drm, "GT suspend failed: %d\n", err); + + return err; +} + +int xe_gt_resume(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + int err; + + xe_device_mem_access_get(gt_to_xe(gt)); + err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (err) + goto err_msg; + + err = do_gt_restart(gt); + if (err) + goto err_force_wake; + + xe_device_mem_access_put(gt_to_xe(gt)); + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + drm_info(&xe->drm, "GT resumed\n"); + + return 0; + +err_force_wake: + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); +err_msg: + xe_device_mem_access_put(gt_to_xe(gt)); + drm_err(&xe->drm, "GT resume failed: %d\n", err); + + return err; +} + +void xe_gt_migrate_wait(struct xe_gt *gt) +{ + xe_migrate_wait(gt->migrate); +} + +struct xe_hw_engine *xe_gt_hw_engine(struct xe_gt *gt, + enum xe_engine_class class, + u16 instance, bool logical) +{ + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + + for_each_hw_engine(hwe, gt, id) + if (hwe->class == class && + ((!logical && hwe->instance == instance) || + (logical && hwe->logical_instance == instance))) + return hwe; + + return NULL; +} + +struct xe_hw_engine *xe_gt_any_hw_engine_by_reset_domain(struct xe_gt *gt, + enum xe_engine_class class) +{ + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + + for_each_hw_engine(hwe, gt, id) { + switch (class) { + case XE_ENGINE_CLASS_RENDER: + case XE_ENGINE_CLASS_COMPUTE: + if (hwe->class == XE_ENGINE_CLASS_RENDER || + hwe->class == XE_ENGINE_CLASS_COMPUTE) + return hwe; + break; + default: + if (hwe->class == class) + return hwe; + } + } + + return NULL; +} diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h new file mode 100644 index 000000000000..5dc08a993cfe --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GT_H_ +#define _XE_GT_H_ + +#include + +#include "xe_device_types.h" +#include "xe_hw_engine.h" + +#define for_each_hw_engine(hwe__, gt__, id__) \ + for ((id__) = 0; (id__) < ARRAY_SIZE((gt__)->hw_engines); (id__)++) \ + for_each_if (((hwe__) = (gt__)->hw_engines + (id__)) && \ + xe_hw_engine_is_valid((hwe__))) + +int xe_gt_alloc(struct xe_device *xe, struct xe_gt *gt); +int xe_gt_init_early(struct xe_gt *gt); +int xe_gt_init_noalloc(struct xe_gt *gt); +int xe_gt_init(struct xe_gt *gt); +int xe_gt_record_default_lrcs(struct xe_gt *gt); +void xe_gt_suspend_prepare(struct xe_gt *gt); +int xe_gt_suspend(struct xe_gt *gt); +int xe_gt_resume(struct xe_gt *gt); +void xe_gt_reset_async(struct xe_gt *gt); +void xe_gt_migrate_wait(struct xe_gt *gt); + +struct xe_gt *xe_find_full_gt(struct xe_gt *gt); + +/** + * xe_gt_any_hw_engine_by_reset_domain - scan the list of engines and return the + * first that matches the same reset domain as @class + * @gt: GT structure + * @class: hw engine class to lookup + */ +struct xe_hw_engine * +xe_gt_any_hw_engine_by_reset_domain(struct xe_gt *gt, enum xe_engine_class class); + +struct xe_hw_engine *xe_gt_hw_engine(struct xe_gt *gt, + enum xe_engine_class class, + u16 instance, + bool logical); + +static inline bool xe_gt_is_media_type(struct xe_gt *gt) +{ + return gt->info.type == XE_GT_TYPE_MEDIA; +} + +static inline struct xe_device * gt_to_xe(struct xe_gt *gt) +{ + return gt->xe; +} + +static inline bool xe_gt_is_usm_hwe(struct xe_gt *gt, struct xe_hw_engine *hwe) +{ + struct xe_device *xe = gt_to_xe(gt); + + return xe->info.supports_usm && hwe->class == XE_ENGINE_CLASS_COPY && + hwe->instance == gt->usm.reserved_bcs_instance; +} + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c new file mode 100644 index 000000000000..575433e9718a --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_clock.c @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "i915_reg.h" +#include "gt/intel_gt_regs.h" + +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_gt_clock.h" +#include "xe_macros.h" +#include "xe_mmio.h" + +static u32 read_reference_ts_freq(struct xe_gt *gt) +{ + u32 ts_override = xe_mmio_read32(gt, GEN9_TIMESTAMP_OVERRIDE.reg); + u32 base_freq, frac_freq; + + base_freq = ((ts_override & GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK) >> + GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_SHIFT) + 1; + base_freq *= 1000000; + + frac_freq = ((ts_override & + GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK) >> + GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT); + frac_freq = 1000000 / (frac_freq + 1); + + return base_freq + frac_freq; +} + +static u32 get_crystal_clock_freq(u32 rpm_config_reg) +{ + const u32 f19_2_mhz = 19200000; + const u32 f24_mhz = 24000000; + const u32 f25_mhz = 25000000; + const u32 f38_4_mhz = 38400000; + u32 crystal_clock = + (rpm_config_reg & GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >> + GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT; + + switch (crystal_clock) { + case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ: + return f24_mhz; + case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ: + return f19_2_mhz; + case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ: + return f38_4_mhz; + case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ: + return f25_mhz; + default: + XE_BUG_ON("NOT_POSSIBLE"); + return 0; + } +} + +int xe_gt_clock_init(struct xe_gt *gt) +{ + u32 ctc_reg = xe_mmio_read32(gt, CTC_MODE.reg); + u32 freq = 0; + + /* Assuming gen11+ so assert this assumption is correct */ + XE_BUG_ON(GRAPHICS_VER(gt_to_xe(gt)) < 11); + + if ((ctc_reg & CTC_SOURCE_PARAMETER_MASK) == CTC_SOURCE_DIVIDE_LOGIC) { + freq = read_reference_ts_freq(gt); + } else { + u32 c0 = xe_mmio_read32(gt, RPM_CONFIG0.reg); + + freq = get_crystal_clock_freq(c0); + + /* + * Now figure out how the command stream's timestamp + * register increments from this frequency (it might + * increment only every few clock cycle). + */ + freq >>= 3 - ((c0 & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >> + GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT); + } + + gt->info.clock_freq = freq; + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_gt_clock.h b/drivers/gpu/drm/xe/xe_gt_clock.h new file mode 100644 index 000000000000..511923afd224 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_clock.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GT_CLOCK_H_ +#define _XE_GT_CLOCK_H_ + +struct xe_gt; + +int xe_gt_clock_init(struct xe_gt *gt); + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c new file mode 100644 index 000000000000..cd1888784141 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -0,0 +1,160 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include +#include + +#include "xe_device.h" +#include "xe_force_wake.h" +#include "xe_gt.h" +#include "xe_gt_debugfs.h" +#include "xe_gt_mcr.h" +#include "xe_gt_pagefault.h" +#include "xe_gt_topology.h" +#include "xe_hw_engine.h" +#include "xe_macros.h" +#include "xe_uc_debugfs.h" + +static struct xe_gt *node_to_gt(struct drm_info_node *node) +{ + return node->info_ent->data; +} + +static int hw_engines(struct seq_file *m, void *data) +{ + struct xe_gt *gt = node_to_gt(m->private); + struct xe_device *xe = gt_to_xe(gt); + struct drm_printer p = drm_seq_file_printer(m); + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + int err; + + xe_device_mem_access_get(xe); + err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (err) { + xe_device_mem_access_put(xe); + return err; + } + + for_each_hw_engine(hwe, gt, id) + xe_hw_engine_print_state(hwe, &p); + + xe_device_mem_access_put(xe); + err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (err) + return err; + + return 0; +} + +static int force_reset(struct seq_file *m, void *data) +{ + struct xe_gt *gt = node_to_gt(m->private); + + xe_gt_reset_async(gt); + + return 0; +} + +static int sa_info(struct seq_file *m, void *data) +{ + struct xe_gt *gt = node_to_gt(m->private); + struct drm_printer p = drm_seq_file_printer(m); + + drm_suballoc_dump_debug_info(>->kernel_bb_pool.base, &p, + gt->kernel_bb_pool.gpu_addr); + + return 0; +} + +static int topology(struct seq_file *m, void *data) +{ + struct xe_gt *gt = node_to_gt(m->private); + struct drm_printer p = drm_seq_file_printer(m); + + xe_gt_topology_dump(gt, &p); + + return 0; +} + +static int steering(struct seq_file *m, void *data) +{ + struct xe_gt *gt = node_to_gt(m->private); + struct drm_printer p = drm_seq_file_printer(m); + + xe_gt_mcr_steering_dump(gt, &p); + + return 0; +} + +#ifdef CONFIG_DRM_XE_DEBUG +static int invalidate_tlb(struct seq_file *m, void *data) +{ + struct xe_gt *gt = node_to_gt(m->private); + int seqno; + int ret = 0; + + seqno = xe_gt_tlb_invalidation(gt); + XE_WARN_ON(seqno < 0); + if (seqno > 0) + ret = xe_gt_tlb_invalidation_wait(gt, seqno); + XE_WARN_ON(ret < 0); + + return 0; +} +#endif + +static const struct drm_info_list debugfs_list[] = { + {"hw_engines", hw_engines, 0}, + {"force_reset", force_reset, 0}, + {"sa_info", sa_info, 0}, + {"topology", topology, 0}, + {"steering", steering, 0}, +#ifdef CONFIG_DRM_XE_DEBUG + {"invalidate_tlb", invalidate_tlb, 0}, +#endif +}; + +void xe_gt_debugfs_register(struct xe_gt *gt) +{ + struct drm_minor *minor = gt_to_xe(gt)->drm.primary; + struct dentry *root; + struct drm_info_list *local; + char name[8]; + int i; + + XE_BUG_ON(!minor->debugfs_root); + + sprintf(name, "gt%d", gt->info.id); + root = debugfs_create_dir(name, minor->debugfs_root); + if (IS_ERR(root)) { + XE_WARN_ON("Create GT directory failed"); + return; + } + + /* + * Allocate local copy as we need to pass in the GT to the debugfs + * entry and drm_debugfs_create_files just references the drm_info_list + * passed in (e.g. can't define this on the stack). + */ +#define DEBUGFS_SIZE ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list) + local = drmm_kmalloc(>_to_xe(gt)->drm, DEBUGFS_SIZE, GFP_KERNEL); + if (!local) { + XE_WARN_ON("Couldn't allocate memory"); + return; + } + + memcpy(local, debugfs_list, DEBUGFS_SIZE); +#undef DEBUGFS_SIZE + + for (i = 0; i < ARRAY_SIZE(debugfs_list); ++i) + local[i].data = gt; + + drm_debugfs_create_files(local, + ARRAY_SIZE(debugfs_list), + root, minor); + + xe_uc_debugfs_register(>->uc, root); +} diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.h b/drivers/gpu/drm/xe/xe_gt_debugfs.h new file mode 100644 index 000000000000..5a329f118a57 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GT_DEBUGFS_H_ +#define _XE_GT_DEBUGFS_H_ + +struct xe_gt; + +void xe_gt_debugfs_register(struct xe_gt *gt); + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c new file mode 100644 index 000000000000..b69c0d6c6b2f --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -0,0 +1,552 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_gt.h" +#include "xe_gt_mcr.h" +#include "xe_gt_topology.h" +#include "xe_gt_types.h" +#include "xe_mmio.h" + +#include "gt/intel_gt_regs.h" + +/** + * DOC: GT Multicast/Replicated (MCR) Register Support + * + * Some GT registers are designed as "multicast" or "replicated" registers: + * multiple instances of the same register share a single MMIO offset. MCR + * registers are generally used when the hardware needs to potentially track + * independent values of a register per hardware unit (e.g., per-subslice, + * per-L3bank, etc.). The specific types of replication that exist vary + * per-platform. + * + * MMIO accesses to MCR registers are controlled according to the settings + * programmed in the platform's MCR_SELECTOR register(s). MMIO writes to MCR + * registers can be done in either a (i.e., a single write updates all + * instances of the register to the same value) or unicast (a write updates only + * one specific instance). Reads of MCR registers always operate in a unicast + * manner regardless of how the multicast/unicast bit is set in MCR_SELECTOR. + * Selection of a specific MCR instance for unicast operations is referred to + * as "steering." + * + * If MCR register operations are steered toward a hardware unit that is + * fused off or currently powered down due to power gating, the MMIO operation + * is "terminated" by the hardware. Terminated read operations will return a + * value of zero and terminated unicast write operations will be silently + * ignored. + */ + +enum { + MCR_OP_READ, + MCR_OP_WRITE +}; + +static const struct xe_mmio_range xelp_l3bank_steering_table[] = { + { 0x00B100, 0x00B3FF }, + {}, +}; + +/* + * Although the bspec lists more "MSLICE" ranges than shown here, some of those + * are of a "GAM" subclass that has special rules and doesn't need to be + * included here. + */ +static const struct xe_mmio_range xehp_mslice_steering_table[] = { + { 0x00DD00, 0x00DDFF }, + { 0x00E900, 0x00FFFF }, /* 0xEA00 - OxEFFF is unused */ + {}, +}; + +static const struct xe_mmio_range xehp_lncf_steering_table[] = { + { 0x00B000, 0x00B0FF }, + { 0x00D880, 0x00D8FF }, + {}, +}; + +/* + * We have several types of MCR registers where steering to (0,0) will always + * provide us with a non-terminated value. We'll stick them all in the same + * table for simplicity. + */ +static const struct xe_mmio_range xehpc_instance0_steering_table[] = { + { 0x004000, 0x004AFF }, /* HALF-BSLICE */ + { 0x008800, 0x00887F }, /* CC */ + { 0x008A80, 0x008AFF }, /* TILEPSMI */ + { 0x00B000, 0x00B0FF }, /* HALF-BSLICE */ + { 0x00B100, 0x00B3FF }, /* L3BANK */ + { 0x00C800, 0x00CFFF }, /* HALF-BSLICE */ + { 0x00D800, 0x00D8FF }, /* HALF-BSLICE */ + { 0x00DD00, 0x00DDFF }, /* BSLICE */ + { 0x00E900, 0x00E9FF }, /* HALF-BSLICE */ + { 0x00EC00, 0x00EEFF }, /* HALF-BSLICE */ + { 0x00F000, 0x00FFFF }, /* HALF-BSLICE */ + { 0x024180, 0x0241FF }, /* HALF-BSLICE */ + {}, +}; + +static const struct xe_mmio_range xelpg_instance0_steering_table[] = { + { 0x000B00, 0x000BFF }, /* SQIDI */ + { 0x001000, 0x001FFF }, /* SQIDI */ + { 0x004000, 0x0048FF }, /* GAM */ + { 0x008700, 0x0087FF }, /* SQIDI */ + { 0x00B000, 0x00B0FF }, /* NODE */ + { 0x00C800, 0x00CFFF }, /* GAM */ + { 0x00D880, 0x00D8FF }, /* NODE */ + { 0x00DD00, 0x00DDFF }, /* OAAL2 */ + {}, +}; + +static const struct xe_mmio_range xelpg_l3bank_steering_table[] = { + { 0x00B100, 0x00B3FF }, + {}, +}; + +static const struct xe_mmio_range xelp_dss_steering_table[] = { + { 0x008150, 0x00815F }, + { 0x009520, 0x00955F }, + { 0x00DE80, 0x00E8FF }, + { 0x024A00, 0x024A7F }, + {}, +}; + +/* DSS steering is used for GSLICE ranges as well */ +static const struct xe_mmio_range xehp_dss_steering_table[] = { + { 0x005200, 0x0052FF }, /* GSLICE */ + { 0x005400, 0x007FFF }, /* GSLICE */ + { 0x008140, 0x00815F }, /* GSLICE (0x8140-0x814F), DSS (0x8150-0x815F) */ + { 0x008D00, 0x008DFF }, /* DSS */ + { 0x0094D0, 0x00955F }, /* GSLICE (0x94D0-0x951F), DSS (0x9520-0x955F) */ + { 0x009680, 0x0096FF }, /* DSS */ + { 0x00D800, 0x00D87F }, /* GSLICE */ + { 0x00DC00, 0x00DCFF }, /* GSLICE */ + { 0x00DE80, 0x00E8FF }, /* DSS (0xE000-0xE0FF reserved ) */ + { 0x017000, 0x017FFF }, /* GSLICE */ + { 0x024A00, 0x024A7F }, /* DSS */ + {}, +}; + +/* DSS steering is used for COMPUTE ranges as well */ +static const struct xe_mmio_range xehpc_dss_steering_table[] = { + { 0x008140, 0x00817F }, /* COMPUTE (0x8140-0x814F & 0x8160-0x817F), DSS (0x8150-0x815F) */ + { 0x0094D0, 0x00955F }, /* COMPUTE (0x94D0-0x951F), DSS (0x9520-0x955F) */ + { 0x009680, 0x0096FF }, /* DSS */ + { 0x00DC00, 0x00DCFF }, /* COMPUTE */ + { 0x00DE80, 0x00E7FF }, /* DSS (0xDF00-0xE1FF reserved ) */ + {}, +}; + +/* DSS steering is used for SLICE ranges as well */ +static const struct xe_mmio_range xelpg_dss_steering_table[] = { + { 0x005200, 0x0052FF }, /* SLICE */ + { 0x005500, 0x007FFF }, /* SLICE */ + { 0x008140, 0x00815F }, /* SLICE (0x8140-0x814F), DSS (0x8150-0x815F) */ + { 0x0094D0, 0x00955F }, /* SLICE (0x94D0-0x951F), DSS (0x9520-0x955F) */ + { 0x009680, 0x0096FF }, /* DSS */ + { 0x00D800, 0x00D87F }, /* SLICE */ + { 0x00DC00, 0x00DCFF }, /* SLICE */ + { 0x00DE80, 0x00E8FF }, /* DSS (0xE000-0xE0FF reserved) */ + {}, +}; + +static const struct xe_mmio_range xelpmp_oaddrm_steering_table[] = { + { 0x393200, 0x39323F }, + { 0x393400, 0x3934FF }, + {}, +}; + +/* + * DG2 GAM registers are a special case; this table is checked directly in + * xe_gt_mcr_get_nonterminated_steering and is not hooked up via + * gt->steering[]. + */ +static const struct xe_mmio_range dg2_gam_ranges[] = { + { 0x004000, 0x004AFF }, + { 0x00C800, 0x00CFFF }, + { 0x00F000, 0x00FFFF }, + {}, +}; + +static void init_steering_l3bank(struct xe_gt *gt) +{ + if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) { + u32 mslice_mask = REG_FIELD_GET(GEN12_MEML3_EN_MASK, + xe_mmio_read32(gt, GEN10_MIRROR_FUSE3.reg)); + u32 bank_mask = REG_FIELD_GET(GT_L3_EXC_MASK, + xe_mmio_read32(gt, XEHP_FUSE4.reg)); + + /* + * Group selects mslice, instance selects bank within mslice. + * Bank 0 is always valid _except_ when the bank mask is 010b. + */ + gt->steering[L3BANK].group_target = __ffs(mslice_mask); + gt->steering[L3BANK].instance_target = + bank_mask & BIT(0) ? 0 : 2; + } else { + u32 fuse = REG_FIELD_GET(GEN10_L3BANK_MASK, + ~xe_mmio_read32(gt, GEN10_MIRROR_FUSE3.reg)); + + gt->steering[L3BANK].group_target = 0; /* unused */ + gt->steering[L3BANK].instance_target = __ffs(fuse); + } +} + +static void init_steering_mslice(struct xe_gt *gt) +{ + u32 mask = REG_FIELD_GET(GEN12_MEML3_EN_MASK, + xe_mmio_read32(gt, GEN10_MIRROR_FUSE3.reg)); + + /* + * mslice registers are valid (not terminated) if either the meml3 + * associated with the mslice is present, or at least one DSS associated + * with the mslice is present. There will always be at least one meml3 + * so we can just use that to find a non-terminated mslice and ignore + * the DSS fusing. + */ + gt->steering[MSLICE].group_target = __ffs(mask); + gt->steering[MSLICE].instance_target = 0; /* unused */ + + /* + * LNCF termination is also based on mslice presence, so we'll set + * it up here. Either LNCF within a non-terminated mslice will work, + * so we just always pick LNCF 0 here. + */ + gt->steering[LNCF].group_target = __ffs(mask) << 1; + gt->steering[LNCF].instance_target = 0; /* unused */ +} + +static void init_steering_dss(struct xe_gt *gt) +{ + unsigned int dss = min(xe_dss_mask_group_ffs(gt->fuse_topo.g_dss_mask, 0, 0), + xe_dss_mask_group_ffs(gt->fuse_topo.c_dss_mask, 0, 0)); + unsigned int dss_per_grp = gt_to_xe(gt)->info.platform == XE_PVC ? 8 : 4; + + gt->steering[DSS].group_target = dss / dss_per_grp; + gt->steering[DSS].instance_target = dss % dss_per_grp; +} + +static void init_steering_oaddrm(struct xe_gt *gt) +{ + /* + * First instance is only terminated if the entire first media slice + * is absent (i.e., no VCS0 or VECS0). + */ + if (gt->info.engine_mask & (XE_HW_ENGINE_VCS0 | XE_HW_ENGINE_VECS0)) + gt->steering[OADDRM].group_target = 0; + else + gt->steering[OADDRM].group_target = 1; + + gt->steering[DSS].instance_target = 0; /* unused */ +} + +static void init_steering_inst0(struct xe_gt *gt) +{ + gt->steering[DSS].group_target = 0; /* unused */ + gt->steering[DSS].instance_target = 0; /* unused */ +} + +static const struct { + const char *name; + void (*init)(struct xe_gt *); +} xe_steering_types[] = { + { "L3BANK", init_steering_l3bank }, + { "MSLICE", init_steering_mslice }, + { "LNCF", NULL }, /* initialized by mslice init */ + { "DSS", init_steering_dss }, + { "OADDRM", init_steering_oaddrm }, + { "INSTANCE 0", init_steering_inst0 }, +}; + +void xe_gt_mcr_init(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + BUILD_BUG_ON(ARRAY_SIZE(xe_steering_types) != NUM_STEERING_TYPES); + + spin_lock_init(>->mcr_lock); + + if (gt->info.type == XE_GT_TYPE_MEDIA) { + drm_WARN_ON(&xe->drm, MEDIA_VER(xe) < 13); + + gt->steering[OADDRM].ranges = xelpmp_oaddrm_steering_table; + } else if (GRAPHICS_VERx100(xe) >= 1270) { + gt->steering[INSTANCE0].ranges = xelpg_instance0_steering_table; + gt->steering[L3BANK].ranges = xelpg_l3bank_steering_table; + gt->steering[DSS].ranges = xelpg_dss_steering_table; + } else if (xe->info.platform == XE_PVC) { + gt->steering[INSTANCE0].ranges = xehpc_instance0_steering_table; + gt->steering[DSS].ranges = xehpc_dss_steering_table; + } else if (xe->info.platform == XE_DG2) { + gt->steering[MSLICE].ranges = xehp_mslice_steering_table; + gt->steering[LNCF].ranges = xehp_lncf_steering_table; + gt->steering[DSS].ranges = xehp_dss_steering_table; + } else { + gt->steering[L3BANK].ranges = xelp_l3bank_steering_table; + gt->steering[DSS].ranges = xelp_dss_steering_table; + } + + /* Select non-terminated steering target for each type */ + for (int i = 0; i < NUM_STEERING_TYPES; i++) + if (gt->steering[i].ranges && xe_steering_types[i].init) + xe_steering_types[i].init(gt); +} + +/* + * xe_gt_mcr_get_nonterminated_steering - find group/instance values that + * will steer a register to a non-terminated instance + * @gt: GT structure + * @reg: register for which the steering is required + * @group: return variable for group steering + * @instance: return variable for instance steering + * + * This function returns a group/instance pair that is guaranteed to work for + * read steering of the given register. Note that a value will be returned even + * if the register is not replicated and therefore does not actually require + * steering. + * + * Returns true if the caller should steer to the @group/@instance values + * returned. Returns false if the caller need not perform any steering (i.e., + * the DG2 GAM range special case). + */ +static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt, + i915_mcr_reg_t reg, + u8 *group, u8 *instance) +{ + for (int type = 0; type < NUM_STEERING_TYPES; type++) { + if (!gt->steering[type].ranges) + continue; + + for (int i = 0; gt->steering[type].ranges[i].end > 0; i++) { + if (xe_mmio_in_range(>->steering[type].ranges[i], reg.reg)) { + *group = gt->steering[type].group_target; + *instance = gt->steering[type].instance_target; + return true; + } + } + } + + /* + * All MCR registers should usually be part of one of the steering + * ranges we're tracking. However there's one special case: DG2 + * GAM registers are technically multicast registers, but are special + * in a number of ways: + * - they have their own dedicated steering control register (they + * don't share 0xFDC with other MCR classes) + * - all reads should be directed to instance 1 (unicast reads against + * other instances are not allowed), and instance 1 is already the + * the hardware's default steering target, which we never change + * + * Ultimately this means that we can just treat them as if they were + * unicast registers and all operations will work properly. + */ + for (int i = 0; dg2_gam_ranges[i].end > 0; i++) + if (xe_mmio_in_range(&dg2_gam_ranges[i], reg.reg)) + return false; + + /* + * Not found in a steering table and not a DG2 GAM register? We'll + * just steer to 0/0 as a guess and raise a warning. + */ + drm_WARN(>_to_xe(gt)->drm, true, + "Did not find MCR register %#x in any MCR steering table\n", + reg.reg); + *group = 0; + *instance = 0; + + return true; +} + +#define STEER_SEMAPHORE 0xFD0 + +/* + * Obtain exclusive access to MCR steering. On MTL and beyond we also need + * to synchronize with external clients (e.g., firmware), so a semaphore + * register will also need to be taken. + */ +static void mcr_lock(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + int ret; + + spin_lock(>->mcr_lock); + + /* + * Starting with MTL we also need to grab a semaphore register + * to synchronize with external agents (e.g., firmware) that now + * shares the same steering control register. + */ + if (GRAPHICS_VERx100(xe) >= 1270) + ret = wait_for_us(xe_mmio_read32(gt, STEER_SEMAPHORE) == 0x1, 10); + + drm_WARN_ON_ONCE(&xe->drm, ret == -ETIMEDOUT); +} + +static void mcr_unlock(struct xe_gt *gt) { + /* Release hardware semaphore */ + if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) + xe_mmio_write32(gt, STEER_SEMAPHORE, 0x1); + + spin_unlock(>->mcr_lock); +} + +/* + * Access a register with specific MCR steering + * + * Caller needs to make sure the relevant forcewake wells are up. + */ +static u32 rw_with_mcr_steering(struct xe_gt *gt, i915_mcr_reg_t reg, u8 rw_flag, + int group, int instance, u32 value) +{ + u32 steer_reg, steer_val, val = 0; + + lockdep_assert_held(>->mcr_lock); + + if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) { + steer_reg = MTL_MCR_SELECTOR.reg; + steer_val = REG_FIELD_PREP(MTL_MCR_GROUPID, group) | + REG_FIELD_PREP(MTL_MCR_INSTANCEID, instance); + } else { + steer_reg = GEN8_MCR_SELECTOR.reg; + steer_val = REG_FIELD_PREP(GEN11_MCR_SLICE_MASK, group) | + REG_FIELD_PREP(GEN11_MCR_SUBSLICE_MASK, instance); + } + + /* + * Always leave the hardware in multicast mode when doing reads + * (see comment about Wa_22013088509 below) and only change it + * to unicast mode when doing writes of a specific instance. + * + * No need to save old steering reg value. + */ + if (rw_flag == MCR_OP_READ) + steer_val |= GEN11_MCR_MULTICAST; + + xe_mmio_write32(gt, steer_reg, steer_val); + + if (rw_flag == MCR_OP_READ) + val = xe_mmio_read32(gt, reg.reg); + else + xe_mmio_write32(gt, reg.reg, value); + + /* + * If we turned off the multicast bit (during a write) we're required + * to turn it back on before finishing. The group and instance values + * don't matter since they'll be re-programmed on the next MCR + * operation. + */ + if (rw_flag == MCR_OP_WRITE) + xe_mmio_write32(gt, steer_reg, GEN11_MCR_MULTICAST); + + return val; +} + +/** + * xe_gt_mcr_unicast_read_any - reads a non-terminated instance of an MCR register + * @gt: GT structure + * @reg: register to read + * + * Reads a GT MCR register. The read will be steered to a non-terminated + * instance (i.e., one that isn't fused off or powered down by power gating). + * This function assumes the caller is already holding any necessary forcewake + * domains. + * + * Returns the value from a non-terminated instance of @reg. + */ +u32 xe_gt_mcr_unicast_read_any(struct xe_gt *gt, i915_mcr_reg_t reg) +{ + u8 group, instance; + u32 val; + bool steer; + + steer = xe_gt_mcr_get_nonterminated_steering(gt, reg, &group, &instance); + + if (steer) { + mcr_lock(gt); + val = rw_with_mcr_steering(gt, reg, MCR_OP_READ, + group, instance, 0); + mcr_unlock(gt); + } else { + /* DG2 GAM special case rules; treat as if unicast */ + val = xe_mmio_read32(gt, reg.reg); + } + + return val; +} + +/** + * xe_gt_mcr_unicast_read - read a specific instance of an MCR register + * @gt: GT structure + * @reg: the MCR register to read + * @group: the MCR group + * @instance: the MCR instance + * + * Returns the value read from an MCR register after steering toward a specific + * group/instance. + */ +u32 xe_gt_mcr_unicast_read(struct xe_gt *gt, + i915_mcr_reg_t reg, + int group, int instance) +{ + u32 val; + + mcr_lock(gt); + val = rw_with_mcr_steering(gt, reg, MCR_OP_READ, group, instance, 0); + mcr_unlock(gt); + + return val; +} + +/** + * xe_gt_mcr_unicast_write - write a specific instance of an MCR register + * @gt: GT structure + * @reg: the MCR register to write + * @value: value to write + * @group: the MCR group + * @instance: the MCR instance + * + * Write an MCR register in unicast mode after steering toward a specific + * group/instance. + */ +void xe_gt_mcr_unicast_write(struct xe_gt *gt, i915_mcr_reg_t reg, u32 value, + int group, int instance) +{ + mcr_lock(gt); + rw_with_mcr_steering(gt, reg, MCR_OP_WRITE, group, instance, value); + mcr_unlock(gt); +} + +/** + * xe_gt_mcr_multicast_write - write a value to all instances of an MCR register + * @gt: GT structure + * @reg: the MCR register to write + * @value: value to write + * + * Write an MCR register in multicast mode to update all instances. + */ +void xe_gt_mcr_multicast_write(struct xe_gt *gt, i915_mcr_reg_t reg, u32 value) +{ + /* + * Synchronize with any unicast operations. Once we have exclusive + * access, the MULTICAST bit should already be set, so there's no need + * to touch the steering register. + */ + mcr_lock(gt); + xe_mmio_write32(gt, reg.reg, value); + mcr_unlock(gt); +} + +void xe_gt_mcr_steering_dump(struct xe_gt *gt, struct drm_printer *p) +{ + for (int i = 0; i < NUM_STEERING_TYPES; i++) { + if (gt->steering[i].ranges) { + drm_printf(p, "%s steering: group=%#x, instance=%#x\n", + xe_steering_types[i].name, + gt->steering[i].group_target, + gt->steering[i].instance_target); + for (int j = 0; gt->steering[i].ranges[j].end; j++) + drm_printf(p, "\t0x%06x - 0x%06x\n", + gt->steering[i].ranges[j].start, + gt->steering[i].ranges[j].end); + } + } +} diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.h b/drivers/gpu/drm/xe/xe_gt_mcr.h new file mode 100644 index 000000000000..62ec6eb654a0 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_mcr.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GT_MCR_H_ +#define _XE_GT_MCR_H_ + +#include "i915_reg_defs.h" + +struct drm_printer; +struct xe_gt; + +void xe_gt_mcr_init(struct xe_gt *gt); + +u32 xe_gt_mcr_unicast_read(struct xe_gt *gt, i915_mcr_reg_t reg, + int group, int instance); +u32 xe_gt_mcr_unicast_read_any(struct xe_gt *gt, i915_mcr_reg_t reg); + +void xe_gt_mcr_unicast_write(struct xe_gt *gt, i915_mcr_reg_t reg, u32 value, + int group, int instance); +void xe_gt_mcr_multicast_write(struct xe_gt *gt, i915_mcr_reg_t reg, u32 value); + +void xe_gt_mcr_steering_dump(struct xe_gt *gt, struct drm_printer *p); + +#endif /* _XE_GT_MCR_H_ */ diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c new file mode 100644 index 000000000000..7125113b7390 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -0,0 +1,750 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include + +#include +#include + +#include "xe_bo.h" +#include "xe_gt.h" +#include "xe_guc.h" +#include "xe_guc_ct.h" +#include "xe_gt_pagefault.h" +#include "xe_migrate.h" +#include "xe_pt.h" +#include "xe_trace.h" +#include "xe_vm.h" + +struct pagefault { + u64 page_addr; + u32 asid; + u16 pdata; + u8 vfid; + u8 access_type; + u8 fault_type; + u8 fault_level; + u8 engine_class; + u8 engine_instance; + u8 fault_unsuccessful; +}; + +enum access_type { + ACCESS_TYPE_READ = 0, + ACCESS_TYPE_WRITE = 1, + ACCESS_TYPE_ATOMIC = 2, + ACCESS_TYPE_RESERVED = 3, +}; + +enum fault_type { + NOT_PRESENT = 0, + WRITE_ACCESS_VIOLATION = 1, + ATOMIC_ACCESS_VIOLATION = 2, +}; + +struct acc { + u64 va_range_base; + u32 asid; + u32 sub_granularity; + u8 granularity; + u8 vfid; + u8 access_type; + u8 engine_class; + u8 engine_instance; +}; + +static struct xe_gt * +guc_to_gt(struct xe_guc *guc) +{ + return container_of(guc, struct xe_gt, uc.guc); +} + +static int send_tlb_invalidation(struct xe_guc *guc) +{ + struct xe_gt *gt = guc_to_gt(guc); + u32 action[] = { + XE_GUC_ACTION_TLB_INVALIDATION, + 0, + XE_GUC_TLB_INVAL_FULL << XE_GUC_TLB_INVAL_TYPE_SHIFT | + XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT | + XE_GUC_TLB_INVAL_FLUSH_CACHE, + }; + int seqno; + int ret; + + /* + * XXX: The seqno algorithm relies on TLB invalidation being processed + * in order which they currently are, if that changes the algorithm will + * need to be updated. + */ + mutex_lock(&guc->ct.lock); + seqno = gt->usm.tlb_invalidation_seqno; + action[1] = seqno; + gt->usm.tlb_invalidation_seqno = (gt->usm.tlb_invalidation_seqno + 1) % + TLB_INVALIDATION_SEQNO_MAX; + if (!gt->usm.tlb_invalidation_seqno) + gt->usm.tlb_invalidation_seqno = 1; + ret = xe_guc_ct_send_locked(&guc->ct, action, ARRAY_SIZE(action), + G2H_LEN_DW_TLB_INVALIDATE, 1); + if (!ret) + ret = seqno; + mutex_unlock(&guc->ct.lock); + + return ret; +} + +static bool access_is_atomic(enum access_type access_type) +{ + return access_type == ACCESS_TYPE_ATOMIC; +} + +static bool vma_is_valid(struct xe_gt *gt, struct xe_vma *vma) +{ + return BIT(gt->info.id) & vma->gt_present && + !(BIT(gt->info.id) & vma->usm.gt_invalidated); +} + +static bool vma_matches(struct xe_vma *vma, struct xe_vma *lookup) +{ + if (lookup->start > vma->end || lookup->end < vma->start) + return false; + + return true; +} + +static bool only_needs_bo_lock(struct xe_bo *bo) +{ + return bo && bo->vm; +} + +static struct xe_vma *lookup_vma(struct xe_vm *vm, u64 page_addr) +{ + struct xe_vma *vma = NULL, lookup; + + lookup.start = page_addr; + lookup.end = lookup.start + SZ_4K - 1; + if (vm->usm.last_fault_vma) { /* Fast lookup */ + if (vma_matches(vm->usm.last_fault_vma, &lookup)) + vma = vm->usm.last_fault_vma; + } + if (!vma) + vma = xe_vm_find_overlapping_vma(vm, &lookup); + + return vma; +} + +static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf) +{ + struct xe_device *xe = gt_to_xe(gt); + struct xe_vm *vm; + struct xe_vma *vma = NULL; + struct xe_bo *bo; + LIST_HEAD(objs); + LIST_HEAD(dups); + struct ttm_validate_buffer tv_bo, tv_vm; + struct ww_acquire_ctx ww; + struct dma_fence *fence; + bool write_locked; + int ret = 0; + bool atomic; + + /* ASID to VM */ + mutex_lock(&xe->usm.lock); + vm = xa_load(&xe->usm.asid_to_vm, pf->asid); + if (vm) + xe_vm_get(vm); + mutex_unlock(&xe->usm.lock); + if (!vm || !xe_vm_in_fault_mode(vm)) + return -EINVAL; + +retry_userptr: + /* + * TODO: Avoid exclusive lock if VM doesn't have userptrs, or + * start out read-locked? + */ + down_write(&vm->lock); + write_locked = true; + vma = lookup_vma(vm, pf->page_addr); + if (!vma) { + ret = -EINVAL; + goto unlock_vm; + } + + if (!xe_vma_is_userptr(vma) || !xe_vma_userptr_check_repin(vma)) { + downgrade_write(&vm->lock); + write_locked = false; + } + + trace_xe_vma_pagefault(vma); + + atomic = access_is_atomic(pf->access_type); + + /* Check if VMA is valid */ + if (vma_is_valid(gt, vma) && !atomic) + goto unlock_vm; + + /* TODO: Validate fault */ + + if (xe_vma_is_userptr(vma) && write_locked) { + spin_lock(&vm->userptr.invalidated_lock); + list_del_init(&vma->userptr.invalidate_link); + spin_unlock(&vm->userptr.invalidated_lock); + + ret = xe_vma_userptr_pin_pages(vma); + if (ret) + goto unlock_vm; + + downgrade_write(&vm->lock); + write_locked = false; + } + + /* Lock VM and BOs dma-resv */ + bo = vma->bo; + if (only_needs_bo_lock(bo)) { + /* This path ensures the BO's LRU is updated */ + ret = xe_bo_lock(bo, &ww, xe->info.tile_count, false); + } else { + tv_vm.num_shared = xe->info.tile_count; + tv_vm.bo = xe_vm_ttm_bo(vm); + list_add(&tv_vm.head, &objs); + if (bo) { + tv_bo.bo = &bo->ttm; + tv_bo.num_shared = xe->info.tile_count; + list_add(&tv_bo.head, &objs); + } + ret = ttm_eu_reserve_buffers(&ww, &objs, false, &dups); + } + if (ret) + goto unlock_vm; + + if (atomic) { + if (xe_vma_is_userptr(vma)) { + ret = -EACCES; + goto unlock_dma_resv; + } + + /* Migrate to VRAM, move should invalidate the VMA first */ + ret = xe_bo_migrate(bo, XE_PL_VRAM0 + gt->info.vram_id); + if (ret) + goto unlock_dma_resv; + } else if (bo) { + /* Create backing store if needed */ + ret = xe_bo_validate(bo, vm, true); + if (ret) + goto unlock_dma_resv; + } + + /* Bind VMA only to the GT that has faulted */ + trace_xe_vma_pf_bind(vma); + fence = __xe_pt_bind_vma(gt, vma, xe_gt_migrate_engine(gt), NULL, 0, + vma->gt_present & BIT(gt->info.id)); + if (IS_ERR(fence)) { + ret = PTR_ERR(fence); + goto unlock_dma_resv; + } + + /* + * XXX: Should we drop the lock before waiting? This only helps if doing + * GPU binds which is currently only done if we have to wait for more + * than 10ms on a move. + */ + dma_fence_wait(fence, false); + dma_fence_put(fence); + + if (xe_vma_is_userptr(vma)) + ret = xe_vma_userptr_check_repin(vma); + vma->usm.gt_invalidated &= ~BIT(gt->info.id); + +unlock_dma_resv: + if (only_needs_bo_lock(bo)) + xe_bo_unlock(bo, &ww); + else + ttm_eu_backoff_reservation(&ww, &objs); +unlock_vm: + if (!ret) + vm->usm.last_fault_vma = vma; + if (write_locked) + up_write(&vm->lock); + else + up_read(&vm->lock); + if (ret == -EAGAIN) + goto retry_userptr; + + if (!ret) { + /* + * FIXME: Doing a full TLB invalidation for now, likely could + * defer TLB invalidate + fault response to a callback of fence + * too + */ + ret = send_tlb_invalidation(>->uc.guc); + if (ret >= 0) + ret = 0; + } + xe_vm_put(vm); + + return ret; +} + +static int send_pagefault_reply(struct xe_guc *guc, + struct xe_guc_pagefault_reply *reply) +{ + u32 action[] = { + XE_GUC_ACTION_PAGE_FAULT_RES_DESC, + reply->dw0, + reply->dw1, + }; + + return xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); +} + +static void print_pagefault(struct xe_device *xe, struct pagefault *pf) +{ + drm_warn(&xe->drm, "\n\tASID: %d\n" + "\tVFID: %d\n" + "\tPDATA: 0x%04x\n" + "\tFaulted Address: 0x%08x%08x\n" + "\tFaultType: %d\n" + "\tAccessType: %d\n" + "\tFaultLevel: %d\n" + "\tEngineClass: %d\n" + "\tEngineInstance: %d\n", + pf->asid, pf->vfid, pf->pdata, upper_32_bits(pf->page_addr), + lower_32_bits(pf->page_addr), + pf->fault_type, pf->access_type, pf->fault_level, + pf->engine_class, pf->engine_instance); +} + +#define PF_MSG_LEN_DW 4 + +static int get_pagefault(struct pf_queue *pf_queue, struct pagefault *pf) +{ + const struct xe_guc_pagefault_desc *desc; + int ret = 0; + + spin_lock_irq(&pf_queue->lock); + if (pf_queue->head != pf_queue->tail) { + desc = (const struct xe_guc_pagefault_desc *) + (pf_queue->data + pf_queue->head); + + pf->fault_level = FIELD_GET(PFD_FAULT_LEVEL, desc->dw0); + pf->engine_class = FIELD_GET(PFD_ENG_CLASS, desc->dw0); + pf->engine_instance = FIELD_GET(PFD_ENG_INSTANCE, desc->dw0); + pf->pdata = FIELD_GET(PFD_PDATA_HI, desc->dw1) << + PFD_PDATA_HI_SHIFT; + pf->pdata |= FIELD_GET(PFD_PDATA_LO, desc->dw0); + pf->asid = FIELD_GET(PFD_ASID, desc->dw1); + pf->vfid = FIELD_GET(PFD_VFID, desc->dw2); + pf->access_type = FIELD_GET(PFD_ACCESS_TYPE, desc->dw2); + pf->fault_type = FIELD_GET(PFD_FAULT_TYPE, desc->dw2); + pf->page_addr = (u64)(FIELD_GET(PFD_VIRTUAL_ADDR_HI, desc->dw3)) << + PFD_VIRTUAL_ADDR_HI_SHIFT; + pf->page_addr |= FIELD_GET(PFD_VIRTUAL_ADDR_LO, desc->dw2) << + PFD_VIRTUAL_ADDR_LO_SHIFT; + + pf_queue->head = (pf_queue->head + PF_MSG_LEN_DW) % + PF_QUEUE_NUM_DW; + } else { + ret = -1; + } + spin_unlock_irq(&pf_queue->lock); + + return ret; +} + +static bool pf_queue_full(struct pf_queue *pf_queue) +{ + lockdep_assert_held(&pf_queue->lock); + + return CIRC_SPACE(pf_queue->tail, pf_queue->head, PF_QUEUE_NUM_DW) <= + PF_MSG_LEN_DW; +} + +int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len) +{ + struct xe_gt *gt = guc_to_gt(guc); + struct pf_queue *pf_queue; + unsigned long flags; + u32 asid; + bool full; + + if (unlikely(len != PF_MSG_LEN_DW)) + return -EPROTO; + + asid = FIELD_GET(PFD_ASID, msg[1]); + pf_queue = >->usm.pf_queue[asid % NUM_PF_QUEUE]; + + spin_lock_irqsave(&pf_queue->lock, flags); + full = pf_queue_full(pf_queue); + if (!full) { + memcpy(pf_queue->data + pf_queue->tail, msg, len * sizeof(u32)); + pf_queue->tail = (pf_queue->tail + len) % PF_QUEUE_NUM_DW; + queue_work(gt->usm.pf_wq, &pf_queue->worker); + } else { + XE_WARN_ON("PF Queue full, shouldn't be possible"); + } + spin_unlock_irqrestore(&pf_queue->lock, flags); + + return full ? -ENOSPC : 0; +} + +static void pf_queue_work_func(struct work_struct *w) +{ + struct pf_queue *pf_queue = container_of(w, struct pf_queue, worker); + struct xe_gt *gt = pf_queue->gt; + struct xe_device *xe = gt_to_xe(gt); + struct xe_guc_pagefault_reply reply = {}; + struct pagefault pf = {}; + int ret; + + ret = get_pagefault(pf_queue, &pf); + if (ret) + return; + + ret = handle_pagefault(gt, &pf); + if (unlikely(ret)) { + print_pagefault(xe, &pf); + pf.fault_unsuccessful = 1; + drm_warn(&xe->drm, "Fault response: Unsuccessful %d\n", ret); + } + + reply.dw0 = FIELD_PREP(PFR_VALID, 1) | + FIELD_PREP(PFR_SUCCESS, pf.fault_unsuccessful) | + FIELD_PREP(PFR_REPLY, PFR_ACCESS) | + FIELD_PREP(PFR_DESC_TYPE, FAULT_RESPONSE_DESC) | + FIELD_PREP(PFR_ASID, pf.asid); + + reply.dw1 = FIELD_PREP(PFR_VFID, pf.vfid) | + FIELD_PREP(PFR_ENG_INSTANCE, pf.engine_instance) | + FIELD_PREP(PFR_ENG_CLASS, pf.engine_class) | + FIELD_PREP(PFR_PDATA, pf.pdata); + + send_pagefault_reply(>->uc.guc, &reply); +} + +static void acc_queue_work_func(struct work_struct *w); + +int xe_gt_pagefault_init(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + int i; + + if (!xe->info.supports_usm) + return 0; + + gt->usm.tlb_invalidation_seqno = 1; + for (i = 0; i < NUM_PF_QUEUE; ++i) { + gt->usm.pf_queue[i].gt = gt; + spin_lock_init(>->usm.pf_queue[i].lock); + INIT_WORK(>->usm.pf_queue[i].worker, pf_queue_work_func); + } + for (i = 0; i < NUM_ACC_QUEUE; ++i) { + gt->usm.acc_queue[i].gt = gt; + spin_lock_init(>->usm.acc_queue[i].lock); + INIT_WORK(>->usm.acc_queue[i].worker, acc_queue_work_func); + } + + gt->usm.pf_wq = alloc_workqueue("xe_gt_page_fault_work_queue", + WQ_UNBOUND | WQ_HIGHPRI, NUM_PF_QUEUE); + if (!gt->usm.pf_wq) + return -ENOMEM; + + gt->usm.acc_wq = alloc_workqueue("xe_gt_access_counter_work_queue", + WQ_UNBOUND | WQ_HIGHPRI, + NUM_ACC_QUEUE); + if (!gt->usm.acc_wq) + return -ENOMEM; + + return 0; +} + +void xe_gt_pagefault_reset(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + int i; + + if (!xe->info.supports_usm) + return; + + for (i = 0; i < NUM_PF_QUEUE; ++i) { + spin_lock_irq(>->usm.pf_queue[i].lock); + gt->usm.pf_queue[i].head = 0; + gt->usm.pf_queue[i].tail = 0; + spin_unlock_irq(>->usm.pf_queue[i].lock); + } + + for (i = 0; i < NUM_ACC_QUEUE; ++i) { + spin_lock(>->usm.acc_queue[i].lock); + gt->usm.acc_queue[i].head = 0; + gt->usm.acc_queue[i].tail = 0; + spin_unlock(>->usm.acc_queue[i].lock); + } +} + +int xe_gt_tlb_invalidation(struct xe_gt *gt) +{ + return send_tlb_invalidation(>->uc.guc); +} + +static bool tlb_invalidation_seqno_past(struct xe_gt *gt, int seqno) +{ + if (gt->usm.tlb_invalidation_seqno_recv >= seqno) + return true; + + if (seqno - gt->usm.tlb_invalidation_seqno_recv > + (TLB_INVALIDATION_SEQNO_MAX / 2)) + return true; + + return false; +} + +int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno) +{ + struct xe_device *xe = gt_to_xe(gt); + struct xe_guc *guc = >->uc.guc; + int ret; + + /* + * XXX: See above, this algorithm only works if seqno are always in + * order + */ + ret = wait_event_timeout(guc->ct.wq, + tlb_invalidation_seqno_past(gt, seqno), + HZ / 5); + if (!ret) { + drm_err(&xe->drm, "TLB invalidation time'd out, seqno=%d, recv=%d\n", + seqno, gt->usm.tlb_invalidation_seqno_recv); + return -ETIME; + } + + return 0; +} + +int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len) +{ + struct xe_gt *gt = guc_to_gt(guc); + int expected_seqno; + + if (unlikely(len != 1)) + return -EPROTO; + + /* Sanity check on seqno */ + expected_seqno = (gt->usm.tlb_invalidation_seqno_recv + 1) % + TLB_INVALIDATION_SEQNO_MAX; + XE_WARN_ON(expected_seqno != msg[0]); + + gt->usm.tlb_invalidation_seqno_recv = msg[0]; + smp_wmb(); + wake_up_all(&guc->ct.wq); + + return 0; +} + +static int granularity_in_byte(int val) +{ + switch (val) { + case 0: + return SZ_128K; + case 1: + return SZ_2M; + case 2: + return SZ_16M; + case 3: + return SZ_64M; + default: + return 0; + } +} + +static int sub_granularity_in_byte(int val) +{ + return (granularity_in_byte(val) / 32); +} + +static void print_acc(struct xe_device *xe, struct acc *acc) +{ + drm_warn(&xe->drm, "Access counter request:\n" + "\tType: %s\n" + "\tASID: %d\n" + "\tVFID: %d\n" + "\tEngine: %d:%d\n" + "\tGranularity: 0x%x KB Region/ %d KB sub-granularity\n" + "\tSub_Granularity Vector: 0x%08x\n" + "\tVA Range base: 0x%016llx\n", + acc->access_type ? "AC_NTFY_VAL" : "AC_TRIG_VAL", + acc->asid, acc->vfid, acc->engine_class, acc->engine_instance, + granularity_in_byte(acc->granularity) / SZ_1K, + sub_granularity_in_byte(acc->granularity) / SZ_1K, + acc->sub_granularity, acc->va_range_base); +} + +static struct xe_vma *get_acc_vma(struct xe_vm *vm, struct acc *acc) +{ + u64 page_va = acc->va_range_base + (ffs(acc->sub_granularity) - 1) * + sub_granularity_in_byte(acc->granularity); + struct xe_vma lookup; + + lookup.start = page_va; + lookup.end = lookup.start + SZ_4K - 1; + + return xe_vm_find_overlapping_vma(vm, &lookup); +} + +static int handle_acc(struct xe_gt *gt, struct acc *acc) +{ + struct xe_device *xe = gt_to_xe(gt); + struct xe_vm *vm; + struct xe_vma *vma; + struct xe_bo *bo; + LIST_HEAD(objs); + LIST_HEAD(dups); + struct ttm_validate_buffer tv_bo, tv_vm; + struct ww_acquire_ctx ww; + int ret = 0; + + /* We only support ACC_TRIGGER at the moment */ + if (acc->access_type != ACC_TRIGGER) + return -EINVAL; + + /* ASID to VM */ + mutex_lock(&xe->usm.lock); + vm = xa_load(&xe->usm.asid_to_vm, acc->asid); + if (vm) + xe_vm_get(vm); + mutex_unlock(&xe->usm.lock); + if (!vm || !xe_vm_in_fault_mode(vm)) + return -EINVAL; + + down_read(&vm->lock); + + /* Lookup VMA */ + vma = get_acc_vma(vm, acc); + if (!vma) { + ret = -EINVAL; + goto unlock_vm; + } + + trace_xe_vma_acc(vma); + + /* Userptr can't be migrated, nothing to do */ + if (xe_vma_is_userptr(vma)) + goto unlock_vm; + + /* Lock VM and BOs dma-resv */ + bo = vma->bo; + if (only_needs_bo_lock(bo)) { + /* This path ensures the BO's LRU is updated */ + ret = xe_bo_lock(bo, &ww, xe->info.tile_count, false); + } else { + tv_vm.num_shared = xe->info.tile_count; + tv_vm.bo = xe_vm_ttm_bo(vm); + list_add(&tv_vm.head, &objs); + tv_bo.bo = &bo->ttm; + tv_bo.num_shared = xe->info.tile_count; + list_add(&tv_bo.head, &objs); + ret = ttm_eu_reserve_buffers(&ww, &objs, false, &dups); + } + if (ret) + goto unlock_vm; + + /* Migrate to VRAM, move should invalidate the VMA first */ + ret = xe_bo_migrate(bo, XE_PL_VRAM0 + gt->info.vram_id); + + if (only_needs_bo_lock(bo)) + xe_bo_unlock(bo, &ww); + else + ttm_eu_backoff_reservation(&ww, &objs); +unlock_vm: + up_read(&vm->lock); + xe_vm_put(vm); + + return ret; +} + +#define make_u64(hi__, low__) ((u64)(hi__) << 32 | (u64)(low__)) + +static int get_acc(struct acc_queue *acc_queue, struct acc *acc) +{ + const struct xe_guc_acc_desc *desc; + int ret = 0; + + spin_lock(&acc_queue->lock); + if (acc_queue->head != acc_queue->tail) { + desc = (const struct xe_guc_acc_desc *) + (acc_queue->data + acc_queue->head); + + acc->granularity = FIELD_GET(ACC_GRANULARITY, desc->dw2); + acc->sub_granularity = FIELD_GET(ACC_SUBG_HI, desc->dw1) << 31 | + FIELD_GET(ACC_SUBG_LO, desc->dw0); + acc->engine_class = FIELD_GET(ACC_ENG_CLASS, desc->dw1); + acc->engine_instance = FIELD_GET(ACC_ENG_INSTANCE, desc->dw1); + acc->asid = FIELD_GET(ACC_ASID, desc->dw1); + acc->vfid = FIELD_GET(ACC_VFID, desc->dw2); + acc->access_type = FIELD_GET(ACC_TYPE, desc->dw0); + acc->va_range_base = make_u64(desc->dw3 & ACC_VIRTUAL_ADDR_RANGE_HI, + desc->dw2 & ACC_VIRTUAL_ADDR_RANGE_LO); + } else { + ret = -1; + } + spin_unlock(&acc_queue->lock); + + return ret; +} + +static void acc_queue_work_func(struct work_struct *w) +{ + struct acc_queue *acc_queue = container_of(w, struct acc_queue, worker); + struct xe_gt *gt = acc_queue->gt; + struct xe_device *xe = gt_to_xe(gt); + struct acc acc = {}; + int ret; + + ret = get_acc(acc_queue, &acc); + if (ret) + return; + + ret = handle_acc(gt, &acc); + if (unlikely(ret)) { + print_acc(xe, &acc); + drm_warn(&xe->drm, "ACC: Unsuccessful %d\n", ret); + } +} + +#define ACC_MSG_LEN_DW 4 + +static bool acc_queue_full(struct acc_queue *acc_queue) +{ + lockdep_assert_held(&acc_queue->lock); + + return CIRC_SPACE(acc_queue->tail, acc_queue->head, ACC_QUEUE_NUM_DW) <= + ACC_MSG_LEN_DW; +} + +int xe_guc_access_counter_notify_handler(struct xe_guc *guc, u32 *msg, u32 len) +{ + struct xe_gt *gt = guc_to_gt(guc); + struct acc_queue *acc_queue; + u32 asid; + bool full; + + if (unlikely(len != ACC_MSG_LEN_DW)) + return -EPROTO; + + asid = FIELD_GET(ACC_ASID, msg[1]); + acc_queue = >->usm.acc_queue[asid % NUM_ACC_QUEUE]; + + spin_lock(&acc_queue->lock); + full = acc_queue_full(acc_queue); + if (!full) { + memcpy(acc_queue->data + acc_queue->tail, msg, + len * sizeof(u32)); + acc_queue->tail = (acc_queue->tail + len) % ACC_QUEUE_NUM_DW; + queue_work(gt->usm.acc_wq, &acc_queue->worker); + } else { + drm_warn(>_to_xe(gt)->drm, "ACC Queue full, dropping ACC"); + } + spin_unlock(&acc_queue->lock); + + return full ? -ENOSPC : 0; +} diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.h b/drivers/gpu/drm/xe/xe_gt_pagefault.h new file mode 100644 index 000000000000..35f68027cc9c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GT_PAGEFAULT_H_ +#define _XE_GT_PAGEFAULT_H_ + +#include + +struct xe_gt; +struct xe_guc; + +int xe_gt_pagefault_init(struct xe_gt *gt); +void xe_gt_pagefault_reset(struct xe_gt *gt); +int xe_gt_tlb_invalidation(struct xe_gt *gt); +int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno); +int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len); +int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len); +int xe_guc_access_counter_notify_handler(struct xe_guc *guc, u32 *msg, u32 len); + +#endif /* _XE_GT_PAGEFAULT_ */ diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs.c b/drivers/gpu/drm/xe/xe_gt_sysfs.c new file mode 100644 index 000000000000..2d966d935b8e --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sysfs.c @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include +#include +#include +#include "xe_gt.h" +#include "xe_gt_sysfs.h" + +static void xe_gt_sysfs_kobj_release(struct kobject *kobj) +{ + kfree(kobj); +} + +static struct kobj_type xe_gt_sysfs_kobj_type = { + .release = xe_gt_sysfs_kobj_release, + .sysfs_ops = &kobj_sysfs_ops, +}; + +static void gt_sysfs_fini(struct drm_device *drm, void *arg) +{ + struct xe_gt *gt = arg; + + kobject_put(gt->sysfs); +} + +int xe_gt_sysfs_init(struct xe_gt *gt) +{ + struct device *dev = gt_to_xe(gt)->drm.dev; + struct kobj_gt *kg; + int err; + + kg = kzalloc(sizeof(*kg), GFP_KERNEL); + if (!kg) + return -ENOMEM; + + kobject_init(&kg->base, &xe_gt_sysfs_kobj_type); + kg->gt = gt; + + err = kobject_add(&kg->base, &dev->kobj, "gt%d", gt->info.id); + if (err) { + kobject_put(&kg->base); + return err; + } + + gt->sysfs = &kg->base; + + err = drmm_add_action_or_reset(>_to_xe(gt)->drm, gt_sysfs_fini, gt); + if (err) + return err; + + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs.h b/drivers/gpu/drm/xe/xe_gt_sysfs.h new file mode 100644 index 000000000000..ecbfcc5c7d42 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sysfs.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GT_SYSFS_H_ +#define _XE_GT_SYSFS_H_ + +#include "xe_gt_sysfs_types.h" + +int xe_gt_sysfs_init(struct xe_gt *gt); + +static inline struct xe_gt * +kobj_to_gt(struct kobject *kobj) +{ + return container_of(kobj, struct kobj_gt, base)->gt; +} + +#endif /* _XE_GT_SYSFS_H_ */ diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs_types.h b/drivers/gpu/drm/xe/xe_gt_sysfs_types.h new file mode 100644 index 000000000000..d3bc6b83360f --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sysfs_types.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GT_SYSFS_TYPES_H_ +#define _XE_GT_SYSFS_TYPES_H_ + +#include + +struct xe_gt; + +/** + * struct kobj_gt - A GT's kobject struct that connects the kobject and the GT + * + * When dealing with multiple GTs, this struct helps to understand which GT + * needs to be addressed on a given sysfs call. + */ +struct kobj_gt { + /** @base: The actual kobject */ + struct kobject base; + /** @gt: A pointer to the GT itself */ + struct xe_gt *gt; +}; + +#endif /* _XE_GT_SYSFS_TYPES_H_ */ diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c new file mode 100644 index 000000000000..8e02e362ba27 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_topology.c @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include + +#include "xe_gt.h" +#include "xe_gt_topology.h" +#include "xe_mmio.h" + +#define XE_MAX_DSS_FUSE_BITS (32 * XE_MAX_DSS_FUSE_REGS) +#define XE_MAX_EU_FUSE_BITS (32 * XE_MAX_EU_FUSE_REGS) + +#define XELP_EU_ENABLE 0x9134 /* "_DISABLE" on Xe_LP */ +#define XELP_EU_MASK REG_GENMASK(7, 0) +#define XELP_GT_GEOMETRY_DSS_ENABLE 0x913c +#define XEHP_GT_COMPUTE_DSS_ENABLE 0x9144 +#define XEHPC_GT_COMPUTE_DSS_ENABLE_EXT 0x9148 + +static void +load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...) +{ + va_list argp; + u32 fuse_val[XE_MAX_DSS_FUSE_REGS] = {}; + int i; + + if (drm_WARN_ON(>_to_xe(gt)->drm, numregs > XE_MAX_DSS_FUSE_REGS)) + numregs = XE_MAX_DSS_FUSE_REGS; + + va_start(argp, numregs); + for (i = 0; i < numregs; i++) + fuse_val[i] = xe_mmio_read32(gt, va_arg(argp, u32)); + va_end(argp); + + bitmap_from_arr32(mask, fuse_val, numregs * 32); +} + +static void +load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask) +{ + struct xe_device *xe = gt_to_xe(gt); + u32 reg = xe_mmio_read32(gt, XELP_EU_ENABLE); + u32 val = 0; + int i; + + BUILD_BUG_ON(XE_MAX_EU_FUSE_REGS > 1); + + /* + * Pre-Xe_HP platforms inverted the bit meaning (disable instead + * of enable). + */ + if (GRAPHICS_VERx100(xe) < 1250) + reg = ~reg & XELP_EU_MASK; + + /* On PVC, one bit = one EU */ + if (GRAPHICS_VERx100(xe) == 1260) { + val = reg; + } else { + /* All other platforms, one bit = 2 EU */ + for (i = 0; i < fls(reg); i++) + if (reg & BIT(i)) + val |= 0x3 << 2 * i; + } + + bitmap_from_arr32(mask, &val, XE_MAX_EU_FUSE_BITS); +} + +void +xe_gt_topology_init(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + struct drm_printer p = drm_debug_printer("GT topology"); + int num_geometry_regs, num_compute_regs; + + if (GRAPHICS_VERx100(xe) == 1260) { + num_geometry_regs = 0; + num_compute_regs = 2; + } else if (GRAPHICS_VERx100(xe) >= 1250) { + num_geometry_regs = 1; + num_compute_regs = 1; + } else { + num_geometry_regs = 1; + num_compute_regs = 0; + } + + load_dss_mask(gt, gt->fuse_topo.g_dss_mask, num_geometry_regs, + XELP_GT_GEOMETRY_DSS_ENABLE); + load_dss_mask(gt, gt->fuse_topo.c_dss_mask, num_compute_regs, + XEHP_GT_COMPUTE_DSS_ENABLE, + XEHPC_GT_COMPUTE_DSS_ENABLE_EXT); + load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss); + + xe_gt_topology_dump(gt, &p); +} + +unsigned int +xe_gt_topology_count_dss(xe_dss_mask_t mask) +{ + return bitmap_weight(mask, XE_MAX_DSS_FUSE_BITS); +} + +u64 +xe_gt_topology_dss_group_mask(xe_dss_mask_t mask, int grpsize) +{ + xe_dss_mask_t per_dss_mask = {}; + u64 grpmask = 0; + + WARN_ON(DIV_ROUND_UP(XE_MAX_DSS_FUSE_BITS, grpsize) > BITS_PER_TYPE(grpmask)); + + bitmap_fill(per_dss_mask, grpsize); + for (int i = 0; !bitmap_empty(mask, XE_MAX_DSS_FUSE_BITS); i++) { + if (bitmap_intersects(mask, per_dss_mask, grpsize)) + grpmask |= BIT(i); + + bitmap_shift_right(mask, mask, grpsize, XE_MAX_DSS_FUSE_BITS); + } + + return grpmask; +} + +void +xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p) +{ + drm_printf(p, "dss mask (geometry): %*pb\n", XE_MAX_DSS_FUSE_BITS, + gt->fuse_topo.g_dss_mask); + drm_printf(p, "dss mask (compute): %*pb\n", XE_MAX_DSS_FUSE_BITS, + gt->fuse_topo.c_dss_mask); + + drm_printf(p, "EU mask per DSS: %*pb\n", XE_MAX_EU_FUSE_BITS, + gt->fuse_topo.eu_mask_per_dss); + +} + +/* + * Used to obtain the index of the first DSS. Can start searching from the + * beginning of a specific dss group (e.g., gslice, cslice, etc.) if + * groupsize and groupnum are non-zero. + */ +unsigned int +xe_dss_mask_group_ffs(xe_dss_mask_t mask, int groupsize, int groupnum) +{ + return find_next_bit(mask, XE_MAX_DSS_FUSE_BITS, groupnum * groupsize); +} diff --git a/drivers/gpu/drm/xe/xe_gt_topology.h b/drivers/gpu/drm/xe/xe_gt_topology.h new file mode 100644 index 000000000000..7a0abc64084f --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_topology.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef __XE_GT_TOPOLOGY_H__ +#define __XE_GT_TOPOLOGY_H__ + +#include "xe_gt_types.h" + +struct drm_printer; + +void xe_gt_topology_init(struct xe_gt *gt); + +void xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p); + +unsigned int +xe_dss_mask_group_ffs(xe_dss_mask_t mask, int groupsize, int groupnum); + +#endif /* __XE_GT_TOPOLOGY_H__ */ diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h new file mode 100644 index 000000000000..c80a9215098d --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -0,0 +1,320 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GT_TYPES_H_ +#define _XE_GT_TYPES_H_ + +#include "xe_force_wake_types.h" +#include "xe_hw_engine_types.h" +#include "xe_hw_fence_types.h" +#include "xe_reg_sr_types.h" +#include "xe_sa_types.h" +#include "xe_uc_types.h" + +struct xe_engine_ops; +struct xe_ggtt; +struct xe_migrate; +struct xe_ring_ops; +struct xe_ttm_gtt_mgr; +struct xe_ttm_vram_mgr; + +enum xe_gt_type { + XE_GT_TYPE_UNINITIALIZED, + XE_GT_TYPE_MAIN, + XE_GT_TYPE_REMOTE, + XE_GT_TYPE_MEDIA, +}; + +#define XE_MAX_DSS_FUSE_REGS 2 +#define XE_MAX_EU_FUSE_REGS 1 + +typedef unsigned long xe_dss_mask_t[BITS_TO_LONGS(32 * XE_MAX_DSS_FUSE_REGS)]; +typedef unsigned long xe_eu_mask_t[BITS_TO_LONGS(32 * XE_MAX_DSS_FUSE_REGS)]; + +struct xe_mmio_range { + u32 start; + u32 end; +}; + +/* + * The hardware has multiple kinds of multicast register ranges that need + * special register steering (and future platforms are expected to add + * additional types). + * + * During driver startup, we initialize the steering control register to + * direct reads to a slice/subslice that are valid for the 'subslice' class + * of multicast registers. If another type of steering does not have any + * overlap in valid steering targets with 'subslice' style registers, we will + * need to explicitly re-steer reads of registers of the other type. + * + * Only the replication types that may need additional non-default steering + * are listed here. + */ +enum xe_steering_type { + L3BANK, + MSLICE, + LNCF, + DSS, + OADDRM, + + /* + * On some platforms there are multiple types of MCR registers that + * will always return a non-terminated value at instance (0, 0). We'll + * lump those all into a single category to keep things simple. + */ + INSTANCE0, + + NUM_STEERING_TYPES +}; + +/** + * struct xe_gt - Top level struct of a graphics tile + * + * A graphics tile may be a physical split (duplicate pieces of silicon, + * different GGTT + VRAM) or a virtual split (shared GGTT + VRAM). Either way + * this structure encapsulates of everything a GT is (MMIO, VRAM, memory + * management, microcontrols, and a hardware set of engines). + */ +struct xe_gt { + /** @xe: backpointer to XE device */ + struct xe_device *xe; + + /** @info: GT info */ + struct { + /** @type: type of GT */ + enum xe_gt_type type; + /** @id: id of GT */ + u8 id; + /** @vram: id of the VRAM for this GT */ + u8 vram_id; + /** @clock_freq: clock frequency */ + u32 clock_freq; + /** @engine_mask: mask of engines present on GT */ + u64 engine_mask; + } info; + + /** + * @mmio: mmio info for GT, can be subset of the global device mmio + * space + */ + struct { + /** @size: size of MMIO space on GT */ + size_t size; + /** @regs: pointer to MMIO space on GT */ + void *regs; + /** @fw: force wake for GT */ + struct xe_force_wake fw; + /** + * @adj_limit: adjust MMIO address if address is below this + * value + */ + u32 adj_limit; + /** @adj_offset: offect to add to MMIO address when adjusting */ + u32 adj_offset; + } mmio; + + /** + * @reg_sr: table with registers to be restored on GT init/resume/reset + */ + struct xe_reg_sr reg_sr; + + /** + * @mem: memory management info for GT, multiple GTs can point to same + * objects (virtual split) + */ + struct { + /** + * @vram: VRAM info for GT, multiple GTs can point to same info + * (virtual split), can be subset of global device VRAM + */ + struct { + /** @io_start: start address of VRAM */ + resource_size_t io_start; + /** @size: size of VRAM */ + resource_size_t size; + /** @mapping: pointer to VRAM mappable space */ + void *__iomem mapping; + } vram; + /** @vram_mgr: VRAM TTM manager */ + struct xe_ttm_vram_mgr *vram_mgr; + /** @gtt_mr: GTT TTM manager */ + struct xe_ttm_gtt_mgr *gtt_mgr; + /** @ggtt: Global graphics translation table */ + struct xe_ggtt *ggtt; + } mem; + + /** @reset: state for GT resets */ + struct { + /** + * @worker: work so GT resets can done async allowing to reset + * code to safely flush all code paths + */ + struct work_struct worker; + } reset; + + /** @usm: unified shared memory state */ + struct { + /** + * @bb_pool: Pool from which batchbuffers, for USM operations + * (e.g. migrations, fixing page tables), are allocated. + * Dedicated pool needed so USM operations to not get blocked + * behind any user operations which may have resulted in a + * fault. + */ + struct xe_sa_manager bb_pool; + /** + * @reserved_bcs_instance: reserved BCS instance used for USM + * operations (e.g. mmigrations, fixing page tables) + */ + u16 reserved_bcs_instance; + /** + * @tlb_invalidation_seqno: TLB invalidation seqno, protected by + * CT lock + */ +#define TLB_INVALIDATION_SEQNO_MAX 0x100000 + int tlb_invalidation_seqno; + /** + * @tlb_invalidation_seqno_recv: last received TLB invalidation + * seqno, protected by CT lock + */ + int tlb_invalidation_seqno_recv; + /** @pf_wq: page fault work queue, unbound, high priority */ + struct workqueue_struct *pf_wq; + /** @acc_wq: access counter work queue, unbound, high priority */ + struct workqueue_struct *acc_wq; + /** + * @pf_queue: Page fault queue used to sync faults so faults can + * be processed not under the GuC CT lock. The queue is sized so + * it can sync all possible faults (1 per physical engine). + * Multiple queues exists for page faults from different VMs are + * be processed in parallel. + */ + struct pf_queue { + /** @gt: back pointer to GT */ + struct xe_gt *gt; +#define PF_QUEUE_NUM_DW 128 + /** @data: data in the page fault queue */ + u32 data[PF_QUEUE_NUM_DW]; + /** + * @head: head pointer in DWs for page fault queue, + * moved by worker which processes faults. + */ + u16 head; + /** + * @tail: tail pointer in DWs for page fault queue, + * moved by G2H handler. + */ + u16 tail; + /** @lock: protects page fault queue */ + spinlock_t lock; + /** @worker: to process page faults */ + struct work_struct worker; +#define NUM_PF_QUEUE 4 + } pf_queue[NUM_PF_QUEUE]; + /** + * @acc_queue: Same as page fault queue, cannot process access + * counters under CT lock. + */ + struct acc_queue { + /** @gt: back pointer to GT */ + struct xe_gt *gt; +#define ACC_QUEUE_NUM_DW 128 + /** @data: data in the page fault queue */ + u32 data[ACC_QUEUE_NUM_DW]; + /** + * @head: head pointer in DWs for page fault queue, + * moved by worker which processes faults. + */ + u16 head; + /** + * @tail: tail pointer in DWs for page fault queue, + * moved by G2H handler. + */ + u16 tail; + /** @lock: protects page fault queue */ + spinlock_t lock; + /** @worker: to process access counters */ + struct work_struct worker; +#define NUM_ACC_QUEUE 4 + } acc_queue[NUM_ACC_QUEUE]; + } usm; + + /** @ordered_wq: used to serialize GT resets and TDRs */ + struct workqueue_struct *ordered_wq; + + /** @uc: micro controllers on the GT */ + struct xe_uc uc; + + /** @engine_ops: submission backend engine operations */ + const struct xe_engine_ops *engine_ops; + + /** + * @ring_ops: ring operations for this hw engine (1 per engine class) + */ + const struct xe_ring_ops *ring_ops[XE_ENGINE_CLASS_MAX]; + + /** @fence_irq: fence IRQs (1 per engine class) */ + struct xe_hw_fence_irq fence_irq[XE_ENGINE_CLASS_MAX]; + + /** @default_lrc: default LRC state */ + void *default_lrc[XE_ENGINE_CLASS_MAX]; + + /** @hw_engines: hardware engines on the GT */ + struct xe_hw_engine hw_engines[XE_NUM_HW_ENGINES]; + + /** @kernel_bb_pool: Pool from which batchbuffers are allocated */ + struct xe_sa_manager kernel_bb_pool; + + /** @migrate: Migration helper for vram blits and clearing */ + struct xe_migrate *migrate; + + /** @pcode: GT's PCODE */ + struct { + /** @lock: protecting GT's PCODE mailbox data */ + struct mutex lock; + } pcode; + + /** @sysfs: sysfs' kobj used by xe_gt_sysfs */ + struct kobject *sysfs; + + /** @mocs: info */ + struct { + /** @uc_index: UC index */ + u8 uc_index; + /** @wb_index: WB index, only used on L3_CCS platforms */ + u8 wb_index; + } mocs; + + /** @fuse_topo: GT topology reported by fuse registers */ + struct { + /** @g_dss_mask: dual-subslices usable by geometry */ + xe_dss_mask_t g_dss_mask; + + /** @c_dss_mask: dual-subslices usable by compute */ + xe_dss_mask_t c_dss_mask; + + /** @eu_mask_per_dss: EU mask per DSS*/ + xe_eu_mask_t eu_mask_per_dss; + } fuse_topo; + + /** @steering: register steering for individual HW units */ + struct { + /* @ranges: register ranges used for this steering type */ + const struct xe_mmio_range *ranges; + + /** @group_target: target to steer accesses to */ + u16 group_target; + /** @instance_target: instance to steer accesses to */ + u16 instance_target; + } steering[NUM_STEERING_TYPES]; + + /** + * @mcr_lock: protects the MCR_SELECTOR register for the duration + * of a steered operation + */ + spinlock_t mcr_lock; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c new file mode 100644 index 000000000000..3c285d849ef6 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -0,0 +1,875 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_guc.h" +#include "xe_guc_ads.h" +#include "xe_guc_ct.h" +#include "xe_guc_hwconfig.h" +#include "xe_guc_log.h" +#include "xe_guc_reg.h" +#include "xe_guc_pc.h" +#include "xe_guc_submit.h" +#include "xe_gt.h" +#include "xe_platform_types.h" +#include "xe_uc_fw.h" +#include "xe_wopcm.h" +#include "xe_mmio.h" +#include "xe_force_wake.h" +#include "i915_reg_defs.h" +#include "gt/intel_gt_regs.h" + +/* TODO: move to common file */ +#define GUC_PVC_MOCS_INDEX_MASK REG_GENMASK(25, 24) +#define PVC_MOCS_UC_INDEX 1 +#define PVC_GUC_MOCS_INDEX(index) REG_FIELD_PREP(GUC_PVC_MOCS_INDEX_MASK,\ + index) + +static struct xe_gt * +guc_to_gt(struct xe_guc *guc) +{ + return container_of(guc, struct xe_gt, uc.guc); +} + +static struct xe_device * +guc_to_xe(struct xe_guc *guc) +{ + return gt_to_xe(guc_to_gt(guc)); +} + +/* GuC addresses above GUC_GGTT_TOP also don't map through the GTT */ +#define GUC_GGTT_TOP 0xFEE00000 +static u32 guc_bo_ggtt_addr(struct xe_guc *guc, + struct xe_bo *bo) +{ + u32 addr = xe_bo_ggtt_addr(bo); + + XE_BUG_ON(addr < xe_wopcm_size(guc_to_xe(guc))); + XE_BUG_ON(range_overflows_t(u32, addr, bo->size, GUC_GGTT_TOP)); + + return addr; +} + +static u32 guc_ctl_debug_flags(struct xe_guc *guc) +{ + u32 level = xe_guc_log_get_level(&guc->log); + u32 flags = 0; + + if (!GUC_LOG_LEVEL_IS_VERBOSE(level)) + flags |= GUC_LOG_DISABLED; + else + flags |= GUC_LOG_LEVEL_TO_VERBOSITY(level) << + GUC_LOG_VERBOSITY_SHIFT; + + return flags; +} + +static u32 guc_ctl_feature_flags(struct xe_guc *guc) +{ + return GUC_CTL_ENABLE_SLPC; +} + +static u32 guc_ctl_log_params_flags(struct xe_guc *guc) +{ + u32 offset = guc_bo_ggtt_addr(guc, guc->log.bo) >> PAGE_SHIFT; + u32 flags; + + #if (((CRASH_BUFFER_SIZE) % SZ_1M) == 0) + #define LOG_UNIT SZ_1M + #define LOG_FLAG GUC_LOG_LOG_ALLOC_UNITS + #else + #define LOG_UNIT SZ_4K + #define LOG_FLAG 0 + #endif + + #if (((CAPTURE_BUFFER_SIZE) % SZ_1M) == 0) + #define CAPTURE_UNIT SZ_1M + #define CAPTURE_FLAG GUC_LOG_CAPTURE_ALLOC_UNITS + #else + #define CAPTURE_UNIT SZ_4K + #define CAPTURE_FLAG 0 + #endif + + BUILD_BUG_ON(!CRASH_BUFFER_SIZE); + BUILD_BUG_ON(!IS_ALIGNED(CRASH_BUFFER_SIZE, LOG_UNIT)); + BUILD_BUG_ON(!DEBUG_BUFFER_SIZE); + BUILD_BUG_ON(!IS_ALIGNED(DEBUG_BUFFER_SIZE, LOG_UNIT)); + BUILD_BUG_ON(!CAPTURE_BUFFER_SIZE); + BUILD_BUG_ON(!IS_ALIGNED(CAPTURE_BUFFER_SIZE, CAPTURE_UNIT)); + + BUILD_BUG_ON((CRASH_BUFFER_SIZE / LOG_UNIT - 1) > + (GUC_LOG_CRASH_MASK >> GUC_LOG_CRASH_SHIFT)); + BUILD_BUG_ON((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) > + (GUC_LOG_DEBUG_MASK >> GUC_LOG_DEBUG_SHIFT)); + BUILD_BUG_ON((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) > + (GUC_LOG_CAPTURE_MASK >> GUC_LOG_CAPTURE_SHIFT)); + + flags = GUC_LOG_VALID | + GUC_LOG_NOTIFY_ON_HALF_FULL | + CAPTURE_FLAG | + LOG_FLAG | + ((CRASH_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_CRASH_SHIFT) | + ((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_DEBUG_SHIFT) | + ((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) << + GUC_LOG_CAPTURE_SHIFT) | + (offset << GUC_LOG_BUF_ADDR_SHIFT); + + #undef LOG_UNIT + #undef LOG_FLAG + #undef CAPTURE_UNIT + #undef CAPTURE_FLAG + + return flags; +} + +static u32 guc_ctl_ads_flags(struct xe_guc *guc) +{ + u32 ads = guc_bo_ggtt_addr(guc, guc->ads.bo) >> PAGE_SHIFT; + u32 flags = ads << GUC_ADS_ADDR_SHIFT; + + return flags; +} + +static u32 guc_ctl_wa_flags(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + u32 flags = 0; + + /* Wa_22012773006:gen11,gen12 < XeHP */ + if (GRAPHICS_VER(xe) >= 11 && + GRAPHICS_VERx100(xe) < 1250) + flags |= GUC_WA_POLLCS; + + /* Wa_16011759253 */ + /* Wa_22011383443 */ + if (IS_SUBPLATFORM_STEP(xe, XE_DG2, XE_SUBPLATFORM_DG2_G10, STEP_A0, STEP_B0) || + IS_PLATFORM_STEP(xe, XE_PVC, STEP_A0, STEP_B0)) + flags |= GUC_WA_GAM_CREDITS; + + /* Wa_14014475959 */ + if (IS_PLATFORM_STEP(xe, XE_METEORLAKE, STEP_A0, STEP_B0) || + xe->info.platform == XE_DG2) + flags |= GUC_WA_HOLD_CCS_SWITCHOUT; + + /* + * Wa_14012197797 + * Wa_22011391025 + * + * The same WA bit is used for both and 22011391025 is applicable to + * all DG2. + */ + if (xe->info.platform == XE_DG2) + flags |= GUC_WA_DUAL_QUEUE; + + /* + * Wa_2201180203 + * GUC_WA_PRE_PARSER causes media workload hang for PVC A0 and PCIe + * errors. Disable this for PVC A0 steppings. + */ + if (GRAPHICS_VER(xe) <= 12 && + !IS_PLATFORM_STEP(xe, XE_PVC, STEP_A0, STEP_B0)) + flags |= GUC_WA_PRE_PARSER; + + /* Wa_16011777198 */ + if (IS_SUBPLATFORM_STEP(xe, XE_DG2, XE_SUBPLATFORM_DG2_G10, STEP_A0, STEP_C0) || + IS_SUBPLATFORM_STEP(xe, XE_DG2, XE_SUBPLATFORM_DG2_G11, STEP_A0, + STEP_B0)) + flags |= GUC_WA_RCS_RESET_BEFORE_RC6; + + /* + * Wa_22012727170 + * Wa_22012727685 + * + * This WA is applicable to PVC CT A0, but causes media regressions. + * Drop the WA for PVC. + */ + if (IS_SUBPLATFORM_STEP(xe, XE_DG2, XE_SUBPLATFORM_DG2_G10, STEP_A0, STEP_C0) || + IS_SUBPLATFORM_STEP(xe, XE_DG2, XE_SUBPLATFORM_DG2_G11, STEP_A0, + STEP_FOREVER)) + flags |= GUC_WA_CONTEXT_ISOLATION; + + /* Wa_16015675438, Wa_18020744125 */ + if (!xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_RENDER)) + flags |= GUC_WA_RCS_REGS_IN_CCS_REGS_LIST; + + /* Wa_1509372804 */ + if (IS_PLATFORM_STEP(xe, XE_PVC, STEP_A0, STEP_C0)) + flags |= GUC_WA_RENDER_RST_RC6_EXIT; + + + return flags; +} + +static u32 guc_ctl_devid(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + + return (((u32)xe->info.devid) << 16) | xe->info.revid; +} + +static void guc_init_params(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + u32 *params = guc->params; + int i; + + BUILD_BUG_ON(sizeof(guc->params) != GUC_CTL_MAX_DWORDS * sizeof(u32)); + BUILD_BUG_ON(SOFT_SCRATCH_COUNT != GUC_CTL_MAX_DWORDS + 2); + + params[GUC_CTL_LOG_PARAMS] = guc_ctl_log_params_flags(guc); + params[GUC_CTL_FEATURE] = guc_ctl_feature_flags(guc); + params[GUC_CTL_DEBUG] = guc_ctl_debug_flags(guc); + params[GUC_CTL_ADS] = guc_ctl_ads_flags(guc); + params[GUC_CTL_WA] = guc_ctl_wa_flags(guc); + params[GUC_CTL_DEVID] = guc_ctl_devid(guc); + + for (i = 0; i < GUC_CTL_MAX_DWORDS; i++) + drm_dbg(&xe->drm, "GuC param[%2d] = 0x%08x\n", i, params[i]); +} + +/* + * Initialise the GuC parameter block before starting the firmware + * transfer. These parameters are read by the firmware on startup + * and cannot be changed thereafter. + */ +void guc_write_params(struct xe_guc *guc) +{ + struct xe_gt *gt = guc_to_gt(guc); + int i; + + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); + + xe_mmio_write32(gt, SOFT_SCRATCH(0).reg, 0); + + for (i = 0; i < GUC_CTL_MAX_DWORDS; i++) + xe_mmio_write32(gt, SOFT_SCRATCH(1 + i).reg, guc->params[i]); +} + +#define MEDIA_GUC_HOST_INTERRUPT _MMIO(0x190304) + +int xe_guc_init(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + int ret; + + guc->fw.type = XE_UC_FW_TYPE_GUC; + ret = xe_uc_fw_init(&guc->fw); + if (ret) + goto out; + + ret = xe_guc_log_init(&guc->log); + if (ret) + goto out; + + ret = xe_guc_ads_init(&guc->ads); + if (ret) + goto out; + + ret = xe_guc_ct_init(&guc->ct); + if (ret) + goto out; + + ret = xe_guc_pc_init(&guc->pc); + if (ret) + goto out; + + guc_init_params(guc); + + if (xe_gt_is_media_type(gt)) + guc->notify_reg = MEDIA_GUC_HOST_INTERRUPT.reg; + else + guc->notify_reg = GEN11_GUC_HOST_INTERRUPT.reg; + + xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE); + + return 0; + +out: + drm_err(&xe->drm, "GuC init failed with %d", ret); + return ret; +} + +/** + * xe_guc_init_post_hwconfig - initialize GuC post hwconfig load + * @guc: The GuC object + * + * Return: 0 on success, negative error code on error. + */ +int xe_guc_init_post_hwconfig(struct xe_guc *guc) +{ + return xe_guc_ads_init_post_hwconfig(&guc->ads); +} + +int xe_guc_post_load_init(struct xe_guc *guc) +{ + xe_guc_ads_populate_post_load(&guc->ads); + + return 0; +} + +int xe_guc_reset(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + u32 guc_status; + int ret; + + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); + + xe_mmio_write32(gt, GEN6_GDRST.reg, GEN11_GRDOM_GUC); + + ret = xe_mmio_wait32(gt, GEN6_GDRST.reg, 0, GEN11_GRDOM_GUC, 5); + if (ret) { + drm_err(&xe->drm, "GuC reset timed out, GEN6_GDRST=0x%8x\n", + xe_mmio_read32(gt, GEN6_GDRST.reg)); + goto err_out; + } + + guc_status = xe_mmio_read32(gt, GUC_STATUS.reg); + if (!(guc_status & GS_MIA_IN_RESET)) { + drm_err(&xe->drm, + "GuC status: 0x%x, MIA core expected to be in reset\n", + guc_status); + ret = -EIO; + goto err_out; + } + + return 0; + +err_out: + + return ret; +} + +static void guc_prepare_xfer(struct xe_guc *guc) +{ + struct xe_gt *gt = guc_to_gt(guc); + struct xe_device *xe = guc_to_xe(guc); + u32 shim_flags = GUC_ENABLE_READ_CACHE_LOGIC | + GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA | + GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA | + GUC_ENABLE_MIA_CLOCK_GATING; + + if (GRAPHICS_VERx100(xe) < 1250) + shim_flags |= GUC_DISABLE_SRAM_INIT_TO_ZEROES | + GUC_ENABLE_MIA_CACHING; + + if (xe->info.platform == XE_PVC) + shim_flags |= PVC_GUC_MOCS_INDEX(PVC_MOCS_UC_INDEX); + + /* Must program this register before loading the ucode with DMA */ + xe_mmio_write32(gt, GUC_SHIM_CONTROL.reg, shim_flags); + + xe_mmio_write32(gt, GEN9_GT_PM_CONFIG.reg, GT_DOORBELL_ENABLE); +} + +/* + * Supporting MMIO & in memory RSA + */ +static int guc_xfer_rsa(struct xe_guc *guc) +{ + struct xe_gt *gt = guc_to_gt(guc); + u32 rsa[UOS_RSA_SCRATCH_COUNT]; + size_t copied; + int i; + + if (guc->fw.rsa_size > 256) { + u32 rsa_ggtt_addr = xe_bo_ggtt_addr(guc->fw.bo) + + xe_uc_fw_rsa_offset(&guc->fw); + xe_mmio_write32(gt, UOS_RSA_SCRATCH(0).reg, rsa_ggtt_addr); + return 0; + } + + copied = xe_uc_fw_copy_rsa(&guc->fw, rsa, sizeof(rsa)); + if (copied < sizeof(rsa)) + return -ENOMEM; + + for (i = 0; i < UOS_RSA_SCRATCH_COUNT; i++) + xe_mmio_write32(gt, UOS_RSA_SCRATCH(i).reg, rsa[i]); + + return 0; +} + +/* + * Read the GuC status register (GUC_STATUS) and store it in the + * specified location; then return a boolean indicating whether + * the value matches either of two values representing completion + * of the GuC boot process. + * + * This is used for polling the GuC status in a wait_for() + * loop below. + */ +static bool guc_ready(struct xe_guc *guc, u32 *status) +{ + u32 val = xe_mmio_read32(guc_to_gt(guc), GUC_STATUS.reg); + u32 uk_val = REG_FIELD_GET(GS_UKERNEL_MASK, val); + + *status = val; + return uk_val == XE_GUC_LOAD_STATUS_READY; +} + +static int guc_wait_ucode(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + u32 status; + int ret; + + /* + * Wait for the GuC to start up. + * NB: Docs recommend not using the interrupt for completion. + * Measurements indicate this should take no more than 20ms + * (assuming the GT clock is at maximum frequency). So, a + * timeout here indicates that the GuC has failed and is unusable. + * (Higher levels of the driver may decide to reset the GuC and + * attempt the ucode load again if this happens.) + * + * FIXME: There is a known (but exceedingly unlikely) race condition + * where the asynchronous frequency management code could reduce + * the GT clock while a GuC reload is in progress (during a full + * GT reset). A fix is in progress but there are complex locking + * issues to be resolved. In the meantime bump the timeout to + * 200ms. Even at slowest clock, this should be sufficient. And + * in the working case, a larger timeout makes no difference. + */ + ret = wait_for(guc_ready(guc, &status), 200); + if (ret) { + struct drm_device *drm = &xe->drm; + struct drm_printer p = drm_info_printer(drm->dev); + + drm_info(drm, "GuC load failed: status = 0x%08X\n", status); + drm_info(drm, "GuC load failed: status: Reset = %d, " + "BootROM = 0x%02X, UKernel = 0x%02X, " + "MIA = 0x%02X, Auth = 0x%02X\n", + REG_FIELD_GET(GS_MIA_IN_RESET, status), + REG_FIELD_GET(GS_BOOTROM_MASK, status), + REG_FIELD_GET(GS_UKERNEL_MASK, status), + REG_FIELD_GET(GS_MIA_MASK, status), + REG_FIELD_GET(GS_AUTH_STATUS_MASK, status)); + + if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) { + drm_info(drm, "GuC firmware signature verification failed\n"); + ret = -ENOEXEC; + } + + if (REG_FIELD_GET(GS_UKERNEL_MASK, status) == + XE_GUC_LOAD_STATUS_EXCEPTION) { + drm_info(drm, "GuC firmware exception. EIP: %#x\n", + xe_mmio_read32(guc_to_gt(guc), + SOFT_SCRATCH(13).reg)); + ret = -ENXIO; + } + + xe_guc_log_print(&guc->log, &p); + } else { + drm_dbg(&xe->drm, "GuC successfully loaded"); + } + + return ret; +} + +static int __xe_guc_upload(struct xe_guc *guc) +{ + int ret; + + guc_write_params(guc); + guc_prepare_xfer(guc); + + /* + * Note that GuC needs the CSS header plus uKernel code to be copied + * by the DMA engine in one operation, whereas the RSA signature is + * loaded separately, either by copying it to the UOS_RSA_SCRATCH + * register (if key size <= 256) or through a ggtt-pinned vma (if key + * size > 256). The RSA size and therefore the way we provide it to the + * HW is fixed for each platform and hard-coded in the bootrom. + */ + ret = guc_xfer_rsa(guc); + if (ret) + goto out; + /* + * Current uCode expects the code to be loaded at 8k; locations below + * this are used for the stack. + */ + ret = xe_uc_fw_upload(&guc->fw, 0x2000, UOS_MOVE); + if (ret) + goto out; + + /* Wait for authentication */ + ret = guc_wait_ucode(guc); + if (ret) + goto out; + + xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_RUNNING); + return 0; + +out: + xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOAD_FAIL); + return 0 /* FIXME: ret, don't want to stop load currently */; +} + +/** + * xe_guc_min_load_for_hwconfig - load minimal GuC and read hwconfig table + * @guc: The GuC object + * + * This function uploads a minimal GuC that does not support submissions but + * in a state where the hwconfig table can be read. Next, it reads and parses + * the hwconfig table so it can be used for subsequent steps in the driver load. + * Lastly, it enables CT communication (XXX: this is needed for PFs/VFs only). + * + * Return: 0 on success, negative error code on error. + */ +int xe_guc_min_load_for_hwconfig(struct xe_guc *guc) +{ + int ret; + + xe_guc_ads_populate_minimal(&guc->ads); + + ret = __xe_guc_upload(guc); + if (ret) + return ret; + + ret = xe_guc_hwconfig_init(guc); + if (ret) + return ret; + + ret = xe_guc_enable_communication(guc); + if (ret) + return ret; + + return 0; +} + +int xe_guc_upload(struct xe_guc *guc) +{ + xe_guc_ads_populate(&guc->ads); + + return __xe_guc_upload(guc); +} + +static void guc_handle_mmio_msg(struct xe_guc *guc) +{ + struct xe_gt *gt = guc_to_gt(guc); + u32 msg; + + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); + + msg = xe_mmio_read32(gt, SOFT_SCRATCH(15).reg); + msg &= XE_GUC_RECV_MSG_EXCEPTION | + XE_GUC_RECV_MSG_CRASH_DUMP_POSTED; + xe_mmio_write32(gt, SOFT_SCRATCH(15).reg, 0); + + if (msg & XE_GUC_RECV_MSG_CRASH_DUMP_POSTED) + drm_err(&guc_to_xe(guc)->drm, + "Received early GuC crash dump notification!\n"); + + if (msg & XE_GUC_RECV_MSG_EXCEPTION) + drm_err(&guc_to_xe(guc)->drm, + "Received early GuC exception notification!\n"); +} + +void guc_enable_irq(struct xe_guc *guc) +{ + struct xe_gt *gt = guc_to_gt(guc); + u32 events = xe_gt_is_media_type(gt) ? + REG_FIELD_PREP(ENGINE0_MASK, GUC_INTR_GUC2HOST) : + REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST); + + xe_mmio_write32(gt, GEN11_GUC_SG_INTR_ENABLE.reg, + REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST)); + if (xe_gt_is_media_type(gt)) + xe_mmio_rmw32(gt, GEN11_GUC_SG_INTR_MASK.reg, events, 0); + else + xe_mmio_write32(gt, GEN11_GUC_SG_INTR_MASK.reg, ~events); +} + +int xe_guc_enable_communication(struct xe_guc *guc) +{ + int err; + + guc_enable_irq(guc); + + xe_mmio_rmw32(guc_to_gt(guc), GEN6_PMINTRMSK.reg, + ARAT_EXPIRED_INTRMSK, 0); + + err = xe_guc_ct_enable(&guc->ct); + if (err) + return err; + + guc_handle_mmio_msg(guc); + + return 0; +} + +int xe_guc_suspend(struct xe_guc *guc) +{ + int ret; + u32 action[] = { + XE_GUC_ACTION_CLIENT_SOFT_RESET, + }; + + ret = xe_guc_send_mmio(guc, action, ARRAY_SIZE(action)); + if (ret) { + drm_err(&guc_to_xe(guc)->drm, + "GuC suspend: CLIENT_SOFT_RESET fail: %d!\n", ret); + return ret; + } + + xe_guc_sanitize(guc); + return 0; +} + +void xe_guc_notify(struct xe_guc *guc) +{ + struct xe_gt *gt = guc_to_gt(guc); + + xe_mmio_write32(gt, guc->notify_reg, GUC_SEND_TRIGGER); +} + +int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr) +{ + u32 action[] = { + XE_GUC_ACTION_AUTHENTICATE_HUC, + rsa_addr + }; + + return xe_guc_ct_send_block(&guc->ct, action, ARRAY_SIZE(action)); +} + +#define MEDIA_SOFT_SCRATCH(n) _MMIO(0x190310 + (n) * 4) +#define MEDIA_SOFT_SCRATCH_COUNT 4 + +int xe_guc_send_mmio(struct xe_guc *guc, const u32 *request, u32 len) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + u32 header; + u32 reply_reg = xe_gt_is_media_type(gt) ? + MEDIA_SOFT_SCRATCH(0).reg : GEN11_SOFT_SCRATCH(0).reg; + int ret; + int i; + + XE_BUG_ON(guc->ct.enabled); + XE_BUG_ON(!len); + XE_BUG_ON(len > GEN11_SOFT_SCRATCH_COUNT); + XE_BUG_ON(len > MEDIA_SOFT_SCRATCH_COUNT); + XE_BUG_ON(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, request[0]) != + GUC_HXG_ORIGIN_HOST); + XE_BUG_ON(FIELD_GET(GUC_HXG_MSG_0_TYPE, request[0]) != + GUC_HXG_TYPE_REQUEST); + +retry: + /* Not in critical data-path, just do if else for GT type */ + if (xe_gt_is_media_type(gt)) { + for (i = 0; i < len; ++i) + xe_mmio_write32(gt, MEDIA_SOFT_SCRATCH(i).reg, + request[i]); +#define LAST_INDEX MEDIA_SOFT_SCRATCH_COUNT - 1 + xe_mmio_read32(gt, MEDIA_SOFT_SCRATCH(LAST_INDEX).reg); + } else { + for (i = 0; i < len; ++i) + xe_mmio_write32(gt, GEN11_SOFT_SCRATCH(i).reg, + request[i]); +#undef LAST_INDEX +#define LAST_INDEX GEN11_SOFT_SCRATCH_COUNT - 1 + xe_mmio_read32(gt, GEN11_SOFT_SCRATCH(LAST_INDEX).reg); + } + + xe_guc_notify(guc); + + ret = xe_mmio_wait32(gt, reply_reg, + FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, + GUC_HXG_ORIGIN_GUC), + GUC_HXG_MSG_0_ORIGIN, + 50); + if (ret) { +timeout: + drm_err(&xe->drm, "mmio request 0x%08x: no reply 0x%08x\n", + request[0], xe_mmio_read32(gt, reply_reg)); + return ret; + } + + header = xe_mmio_read32(gt, reply_reg); + if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) == + GUC_HXG_TYPE_NO_RESPONSE_BUSY) { +#define done ({ header = xe_mmio_read32(gt, reply_reg); \ + FIELD_GET(GUC_HXG_MSG_0_ORIGIN, header) != \ + GUC_HXG_ORIGIN_GUC || \ + FIELD_GET(GUC_HXG_MSG_0_TYPE, header) != \ + GUC_HXG_TYPE_NO_RESPONSE_BUSY; }) + + ret = wait_for(done, 1000); + if (unlikely(ret)) + goto timeout; + if (unlikely(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, header) != + GUC_HXG_ORIGIN_GUC)) + goto proto; +#undef done + } + + if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) == + GUC_HXG_TYPE_NO_RESPONSE_RETRY) { + u32 reason = FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, header); + + drm_dbg(&xe->drm, "mmio request %#x: retrying, reason %u\n", + request[0], reason); + goto retry; + } + + if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) == + GUC_HXG_TYPE_RESPONSE_FAILURE) { + u32 hint = FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, header); + u32 error = FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, header); + + drm_err(&xe->drm, "mmio request %#x: failure %x/%u\n", + request[0], error, hint); + return -ENXIO; + } + + if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) != + GUC_HXG_TYPE_RESPONSE_SUCCESS) { +proto: + drm_err(&xe->drm, "mmio request %#x: unexpected reply %#x\n", + request[0], header); + return -EPROTO; + } + + /* Use data from the GuC response as our return value */ + return FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, header); +} + +static int guc_self_cfg(struct xe_guc *guc, u16 key, u16 len, u64 val) +{ + u32 request[HOST2GUC_SELF_CFG_REQUEST_MSG_LEN] = { + FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | + FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, + GUC_ACTION_HOST2GUC_SELF_CFG), + FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_KEY, key) | + FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_LEN, len), + FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_2_VALUE32, + lower_32_bits(val)), + FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_3_VALUE64, + upper_32_bits(val)), + }; + int ret; + + XE_BUG_ON(len > 2); + XE_BUG_ON(len == 1 && upper_32_bits(val)); + + /* Self config must go over MMIO */ + ret = xe_guc_send_mmio(guc, request, ARRAY_SIZE(request)); + + if (unlikely(ret < 0)) + return ret; + if (unlikely(ret > 1)) + return -EPROTO; + if (unlikely(!ret)) + return -ENOKEY; + + return 0; +} + +int xe_guc_self_cfg32(struct xe_guc *guc, u16 key, u32 val) +{ + return guc_self_cfg(guc, key, 1, val); +} + +int xe_guc_self_cfg64(struct xe_guc *guc, u16 key, u64 val) +{ + return guc_self_cfg(guc, key, 2, val); +} + +void xe_guc_irq_handler(struct xe_guc *guc, const u16 iir) +{ + if (iir & GUC_INTR_GUC2HOST) + xe_guc_ct_irq_handler(&guc->ct); +} + +void xe_guc_sanitize(struct xe_guc *guc) +{ + xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE); + xe_guc_ct_disable(&guc->ct); +} + +int xe_guc_reset_prepare(struct xe_guc *guc) +{ + return xe_guc_submit_reset_prepare(guc); +} + +void xe_guc_reset_wait(struct xe_guc *guc) +{ + xe_guc_submit_reset_wait(guc); +} + +void xe_guc_stop_prepare(struct xe_guc *guc) +{ + XE_WARN_ON(xe_guc_pc_stop(&guc->pc)); +} + +int xe_guc_stop(struct xe_guc *guc) +{ + int ret; + + xe_guc_ct_disable(&guc->ct); + + ret = xe_guc_submit_stop(guc); + if (ret) + return ret; + + return 0; +} + +int xe_guc_start(struct xe_guc *guc) +{ + int ret; + + ret = xe_guc_submit_start(guc); + if (ret) + return ret; + + ret = xe_guc_pc_start(&guc->pc); + XE_WARN_ON(ret); + + return 0; +} + +void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p) +{ + struct xe_gt *gt = guc_to_gt(guc); + u32 status; + int err; + int i; + + xe_uc_fw_print(&guc->fw, p); + + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (err) + return; + + status = xe_mmio_read32(gt, GUC_STATUS.reg); + + drm_printf(p, "\nGuC status 0x%08x:\n", status); + drm_printf(p, "\tBootrom status = 0x%x\n", + (status & GS_BOOTROM_MASK) >> GS_BOOTROM_SHIFT); + drm_printf(p, "\tuKernel status = 0x%x\n", + (status & GS_UKERNEL_MASK) >> GS_UKERNEL_SHIFT); + drm_printf(p, "\tMIA Core status = 0x%x\n", + (status & GS_MIA_MASK) >> GS_MIA_SHIFT); + drm_printf(p, "\tLog level = %d\n", + xe_guc_log_get_level(&guc->log)); + + drm_puts(p, "\nScratch registers:\n"); + for (i = 0; i < SOFT_SCRATCH_COUNT; i++) { + drm_printf(p, "\t%2d: \t0x%x\n", + i, xe_mmio_read32(gt, SOFT_SCRATCH(i).reg)); + } + + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); + + xe_guc_ct_print(&guc->ct, p); + xe_guc_submit_print(guc, p); +} diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h new file mode 100644 index 000000000000..72b71d75566c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_H_ +#define _XE_GUC_H_ + +#include "xe_hw_engine_types.h" +#include "xe_guc_types.h" +#include "xe_macros.h" + +struct drm_printer; + +int xe_guc_init(struct xe_guc *guc); +int xe_guc_init_post_hwconfig(struct xe_guc *guc); +int xe_guc_post_load_init(struct xe_guc *guc); +int xe_guc_reset(struct xe_guc *guc); +int xe_guc_upload(struct xe_guc *guc); +int xe_guc_min_load_for_hwconfig(struct xe_guc *guc); +int xe_guc_enable_communication(struct xe_guc *guc); +int xe_guc_suspend(struct xe_guc *guc); +void xe_guc_notify(struct xe_guc *guc); +int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr); +int xe_guc_send_mmio(struct xe_guc *guc, const u32 *request, u32 len); +int xe_guc_self_cfg32(struct xe_guc *guc, u16 key, u32 val); +int xe_guc_self_cfg64(struct xe_guc *guc, u16 key, u64 val); +void xe_guc_irq_handler(struct xe_guc *guc, const u16 iir); +void xe_guc_sanitize(struct xe_guc *guc); +void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p); +int xe_guc_reset_prepare(struct xe_guc *guc); +void xe_guc_reset_wait(struct xe_guc *guc); +void xe_guc_stop_prepare(struct xe_guc *guc); +int xe_guc_stop(struct xe_guc *guc); +int xe_guc_start(struct xe_guc *guc); + +static inline u16 xe_engine_class_to_guc_class(enum xe_engine_class class) +{ + switch (class) { + case XE_ENGINE_CLASS_RENDER: + return GUC_RENDER_CLASS; + case XE_ENGINE_CLASS_VIDEO_DECODE: + return GUC_VIDEO_CLASS; + case XE_ENGINE_CLASS_VIDEO_ENHANCE: + return GUC_VIDEOENHANCE_CLASS; + case XE_ENGINE_CLASS_COPY: + return GUC_BLITTER_CLASS; + case XE_ENGINE_CLASS_COMPUTE: + return GUC_COMPUTE_CLASS; + case XE_ENGINE_CLASS_OTHER: + default: + XE_WARN_ON(class); + return -1; + } +} + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c new file mode 100644 index 000000000000..0c08cecaca40 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -0,0 +1,676 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include + +#include "xe_bo.h" +#include "xe_gt.h" +#include "xe_guc.h" +#include "xe_guc_ads.h" +#include "xe_guc_reg.h" +#include "xe_hw_engine.h" +#include "xe_lrc.h" +#include "xe_map.h" +#include "xe_mmio.h" +#include "xe_platform_types.h" +#include "gt/intel_gt_regs.h" +#include "gt/intel_engine_regs.h" + +/* Slack of a few additional entries per engine */ +#define ADS_REGSET_EXTRA_MAX 8 + +static struct xe_guc * +ads_to_guc(struct xe_guc_ads *ads) +{ + return container_of(ads, struct xe_guc, ads); +} + +static struct xe_gt * +ads_to_gt(struct xe_guc_ads *ads) +{ + return container_of(ads, struct xe_gt, uc.guc.ads); +} + +static struct xe_device * +ads_to_xe(struct xe_guc_ads *ads) +{ + return gt_to_xe(ads_to_gt(ads)); +} + +static struct iosys_map * +ads_to_map(struct xe_guc_ads *ads) +{ + return &ads->bo->vmap; +} + +/* UM Queue parameters: */ +#define GUC_UM_QUEUE_SIZE (SZ_64K) +#define GUC_PAGE_RES_TIMEOUT_US (-1) + +/* + * The Additional Data Struct (ADS) has pointers for different buffers used by + * the GuC. One single gem object contains the ADS struct itself (guc_ads) and + * all the extra buffers indirectly linked via the ADS struct's entries. + * + * Layout of the ADS blob allocated for the GuC: + * + * +---------------------------------------+ <== base + * | guc_ads | + * +---------------------------------------+ + * | guc_policies | + * +---------------------------------------+ + * | guc_gt_system_info | + * +---------------------------------------+ + * | guc_engine_usage | + * +---------------------------------------+ + * | guc_um_init_params | + * +---------------------------------------+ <== static + * | guc_mmio_reg[countA] (engine 0.0) | + * | guc_mmio_reg[countB] (engine 0.1) | + * | guc_mmio_reg[countC] (engine 1.0) | + * | ... | + * +---------------------------------------+ <== dynamic + * | padding | + * +---------------------------------------+ <== 4K aligned + * | golden contexts | + * +---------------------------------------+ + * | padding | + * +---------------------------------------+ <== 4K aligned + * | capture lists | + * +---------------------------------------+ + * | padding | + * +---------------------------------------+ <== 4K aligned + * | UM queues | + * +---------------------------------------+ + * | padding | + * +---------------------------------------+ <== 4K aligned + * | private data | + * +---------------------------------------+ + * | padding | + * +---------------------------------------+ <== 4K aligned + */ +struct __guc_ads_blob { + struct guc_ads ads; + struct guc_policies policies; + struct guc_gt_system_info system_info; + struct guc_engine_usage engine_usage; + struct guc_um_init_params um_init_params; + /* From here on, location is dynamic! Refer to above diagram. */ + struct guc_mmio_reg regset[0]; +} __packed; + +#define ads_blob_read(ads_, field_) \ + xe_map_rd_field(ads_to_xe(ads_), ads_to_map(ads_), 0, \ + struct __guc_ads_blob, field_) + +#define ads_blob_write(ads_, field_, val_) \ + xe_map_wr_field(ads_to_xe(ads_), ads_to_map(ads_), 0, \ + struct __guc_ads_blob, field_, val_) + +#define info_map_write(xe_, map_, field_, val_) \ + xe_map_wr_field(xe_, map_, 0, struct guc_gt_system_info, field_, val_) + +#define info_map_read(xe_, map_, field_) \ + xe_map_rd_field(xe_, map_, 0, struct guc_gt_system_info, field_) + +static size_t guc_ads_regset_size(struct xe_guc_ads *ads) +{ + XE_BUG_ON(!ads->regset_size); + + return ads->regset_size; +} + +static size_t guc_ads_golden_lrc_size(struct xe_guc_ads *ads) +{ + return PAGE_ALIGN(ads->golden_lrc_size); +} + +static size_t guc_ads_capture_size(struct xe_guc_ads *ads) +{ + /* FIXME: Allocate a proper capture list */ + return PAGE_ALIGN(PAGE_SIZE); +} + +static size_t guc_ads_um_queues_size(struct xe_guc_ads *ads) +{ + struct xe_device *xe = ads_to_xe(ads); + + if (!xe->info.supports_usm) + return 0; + + return GUC_UM_QUEUE_SIZE * GUC_UM_HW_QUEUE_MAX; +} + +static size_t guc_ads_private_data_size(struct xe_guc_ads *ads) +{ + return PAGE_ALIGN(ads_to_guc(ads)->fw.private_data_size); +} + +static size_t guc_ads_regset_offset(struct xe_guc_ads *ads) +{ + return offsetof(struct __guc_ads_blob, regset); +} + +static size_t guc_ads_golden_lrc_offset(struct xe_guc_ads *ads) +{ + size_t offset; + + offset = guc_ads_regset_offset(ads) + + guc_ads_regset_size(ads); + + return PAGE_ALIGN(offset); +} + +static size_t guc_ads_capture_offset(struct xe_guc_ads *ads) +{ + size_t offset; + + offset = guc_ads_golden_lrc_offset(ads) + + guc_ads_golden_lrc_size(ads); + + return PAGE_ALIGN(offset); +} + +static size_t guc_ads_um_queues_offset(struct xe_guc_ads *ads) +{ + u32 offset; + + offset = guc_ads_capture_offset(ads) + + guc_ads_capture_size(ads); + + return PAGE_ALIGN(offset); +} + +static size_t guc_ads_private_data_offset(struct xe_guc_ads *ads) +{ + size_t offset; + + offset = guc_ads_um_queues_offset(ads) + + guc_ads_um_queues_size(ads); + + return PAGE_ALIGN(offset); +} + +static size_t guc_ads_size(struct xe_guc_ads *ads) +{ + return guc_ads_private_data_offset(ads) + + guc_ads_private_data_size(ads); +} + +static void guc_ads_fini(struct drm_device *drm, void *arg) +{ + struct xe_guc_ads *ads = arg; + + xe_bo_unpin_map_no_vm(ads->bo); +} + +static size_t calculate_regset_size(struct xe_gt *gt) +{ + struct xe_reg_sr_entry *sr_entry; + unsigned long sr_idx; + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + unsigned int count = 0; + + for_each_hw_engine(hwe, gt, id) + xa_for_each(&hwe->reg_sr.xa, sr_idx, sr_entry) + count++; + + count += (ADS_REGSET_EXTRA_MAX + LNCFCMOCS_REG_COUNT) * XE_NUM_HW_ENGINES; + + return count * sizeof(struct guc_mmio_reg); +} + +static u32 engine_enable_mask(struct xe_gt *gt, enum xe_engine_class class) +{ + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + u32 mask = 0; + + for_each_hw_engine(hwe, gt, id) + if (hwe->class == class) + mask |= BIT(hwe->instance); + + return mask; +} + +static size_t calculate_golden_lrc_size(struct xe_guc_ads *ads) +{ + struct xe_device *xe = ads_to_xe(ads); + struct xe_gt *gt = ads_to_gt(ads); + size_t total_size = 0, alloc_size, real_size; + int class; + + for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) { + if (class == XE_ENGINE_CLASS_OTHER) + continue; + + if (!engine_enable_mask(gt, class)) + continue; + + real_size = xe_lrc_size(xe, class); + alloc_size = PAGE_ALIGN(real_size); + total_size += alloc_size; + } + + return total_size; +} + +#define MAX_GOLDEN_LRC_SIZE (SZ_4K * 64) + +int xe_guc_ads_init(struct xe_guc_ads *ads) +{ + struct xe_device *xe = ads_to_xe(ads); + struct xe_gt *gt = ads_to_gt(ads); + struct xe_bo *bo; + int err; + + ads->golden_lrc_size = calculate_golden_lrc_size(ads); + ads->regset_size = calculate_regset_size(gt); + + bo = xe_bo_create_pin_map(xe, gt, NULL, guc_ads_size(ads) + + MAX_GOLDEN_LRC_SIZE, + ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(gt) | + XE_BO_CREATE_GGTT_BIT); + if (IS_ERR(bo)) + return PTR_ERR(bo); + + ads->bo = bo; + + err = drmm_add_action_or_reset(&xe->drm, guc_ads_fini, ads); + if (err) + return err; + + return 0; +} + +/** + * xe_guc_ads_init_post_hwconfig - initialize ADS post hwconfig load + * @ads: Additional data structures object + * + * Recalcuate golden_lrc_size & regset_size as the number hardware engines may + * have changed after the hwconfig was loaded. Also verify the new sizes fit in + * the already allocated ADS buffer object. + * + * Return: 0 on success, negative error code on error. + */ +int xe_guc_ads_init_post_hwconfig(struct xe_guc_ads *ads) +{ + struct xe_gt *gt = ads_to_gt(ads); + u32 prev_regset_size = ads->regset_size; + + XE_BUG_ON(!ads->bo); + + ads->golden_lrc_size = calculate_golden_lrc_size(ads); + ads->regset_size = calculate_regset_size(gt); + + XE_WARN_ON(ads->golden_lrc_size + + (ads->regset_size - prev_regset_size) > + MAX_GOLDEN_LRC_SIZE); + + return 0; +} + +static void guc_policies_init(struct xe_guc_ads *ads) +{ + ads_blob_write(ads, policies.dpc_promote_time, + GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US); + ads_blob_write(ads, policies.max_num_work_items, + GLOBAL_POLICY_MAX_NUM_WI); + ads_blob_write(ads, policies.global_flags, 0); + ads_blob_write(ads, policies.is_valid, 1); +} + +static void fill_engine_enable_masks(struct xe_gt *gt, + struct iosys_map *info_map) +{ + struct xe_device *xe = gt_to_xe(gt); + + info_map_write(xe, info_map, engine_enabled_masks[GUC_RENDER_CLASS], + engine_enable_mask(gt, XE_ENGINE_CLASS_RENDER)); + info_map_write(xe, info_map, engine_enabled_masks[GUC_BLITTER_CLASS], + engine_enable_mask(gt, XE_ENGINE_CLASS_COPY)); + info_map_write(xe, info_map, engine_enabled_masks[GUC_VIDEO_CLASS], + engine_enable_mask(gt, XE_ENGINE_CLASS_VIDEO_DECODE)); + info_map_write(xe, info_map, + engine_enabled_masks[GUC_VIDEOENHANCE_CLASS], + engine_enable_mask(gt, XE_ENGINE_CLASS_VIDEO_ENHANCE)); + info_map_write(xe, info_map, engine_enabled_masks[GUC_COMPUTE_CLASS], + engine_enable_mask(gt, XE_ENGINE_CLASS_COMPUTE)); +} + +static void guc_prep_golden_lrc_null(struct xe_guc_ads *ads) +{ + struct xe_device *xe = ads_to_xe(ads); + struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), + offsetof(struct __guc_ads_blob, system_info)); + u8 guc_class; + + for (guc_class = 0; guc_class <= GUC_MAX_ENGINE_CLASSES; ++guc_class) { + if (!info_map_read(xe, &info_map, + engine_enabled_masks[guc_class])) + continue; + + ads_blob_write(ads, ads.eng_state_size[guc_class], + guc_ads_golden_lrc_size(ads) - + xe_lrc_skip_size(xe)); + ads_blob_write(ads, ads.golden_context_lrca[guc_class], + xe_bo_ggtt_addr(ads->bo) + + guc_ads_golden_lrc_offset(ads)); + } +} + +static void guc_mapping_table_init_invalid(struct xe_gt *gt, + struct iosys_map *info_map) +{ + struct xe_device *xe = gt_to_xe(gt); + unsigned int i, j; + + /* Table must be set to invalid values for entries not used */ + for (i = 0; i < GUC_MAX_ENGINE_CLASSES; ++i) + for (j = 0; j < GUC_MAX_INSTANCES_PER_CLASS; ++j) + info_map_write(xe, info_map, mapping_table[i][j], + GUC_MAX_INSTANCES_PER_CLASS); +} + +static void guc_mapping_table_init(struct xe_gt *gt, + struct iosys_map *info_map) +{ + struct xe_device *xe = gt_to_xe(gt); + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + + guc_mapping_table_init_invalid(gt, info_map); + + for_each_hw_engine(hwe, gt, id) { + u8 guc_class; + + guc_class = xe_engine_class_to_guc_class(hwe->class); + info_map_write(xe, info_map, + mapping_table[guc_class][hwe->logical_instance], + hwe->instance); + } +} + +static void guc_capture_list_init(struct xe_guc_ads *ads) +{ + int i, j; + u32 addr = xe_bo_ggtt_addr(ads->bo) + guc_ads_capture_offset(ads); + + /* FIXME: Populate a proper capture list */ + for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; i++) { + for (j = 0; j < GUC_MAX_ENGINE_CLASSES; j++) { + ads_blob_write(ads, ads.capture_instance[i][j], addr); + ads_blob_write(ads, ads.capture_class[i][j], addr); + } + + ads_blob_write(ads, ads.capture_global[i], addr); + } +} + +static void guc_mmio_regset_write_one(struct xe_guc_ads *ads, + struct iosys_map *regset_map, + u32 reg, u32 flags, + unsigned int n_entry) +{ + struct guc_mmio_reg entry = { + .offset = reg, + .flags = flags, + /* TODO: steering */ + }; + + xe_map_memcpy_to(ads_to_xe(ads), regset_map, n_entry * sizeof(entry), + &entry, sizeof(entry)); +} + +static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads, + struct iosys_map *regset_map, + struct xe_hw_engine *hwe) +{ + struct xe_hw_engine *hwe_rcs_reset_domain = + xe_gt_any_hw_engine_by_reset_domain(hwe->gt, XE_ENGINE_CLASS_RENDER); + struct xe_reg_sr_entry *entry; + unsigned long idx; + unsigned count = 0; + const struct { + u32 reg; + u32 flags; + bool skip; + } *e, extra_regs[] = { + { .reg = RING_MODE_GEN7(hwe->mmio_base).reg, }, + { .reg = RING_HWS_PGA(hwe->mmio_base).reg, }, + { .reg = RING_IMR(hwe->mmio_base).reg, }, + { .reg = GEN12_RCU_MODE.reg, .flags = 0x3, + .skip = hwe != hwe_rcs_reset_domain }, + }; + u32 i; + + BUILD_BUG_ON(ARRAY_SIZE(extra_regs) > ADS_REGSET_EXTRA_MAX); + + xa_for_each(&hwe->reg_sr.xa, idx, entry) { + u32 flags = entry->masked_reg ? GUC_REGSET_MASKED : 0; + + guc_mmio_regset_write_one(ads, regset_map, idx, flags, count++); + } + + for (e = extra_regs; e < extra_regs + ARRAY_SIZE(extra_regs); e++) { + if (e->skip) + continue; + + guc_mmio_regset_write_one(ads, regset_map, + e->reg, e->flags, count++); + } + + for (i = 0; i < LNCFCMOCS_REG_COUNT; i++) { + guc_mmio_regset_write_one(ads, regset_map, + GEN9_LNCFCMOCS(i).reg, 0, count++); + } + + XE_BUG_ON(ads->regset_size < (count * sizeof(struct guc_mmio_reg))); + + return count; +} + +static void guc_mmio_reg_state_init(struct xe_guc_ads *ads) +{ + size_t regset_offset = guc_ads_regset_offset(ads); + struct xe_gt *gt = ads_to_gt(ads); + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + u32 addr = xe_bo_ggtt_addr(ads->bo) + regset_offset; + struct iosys_map regset_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), + regset_offset); + + for_each_hw_engine(hwe, gt, id) { + unsigned int count; + u8 gc; + + /* + * 1. Write all MMIO entries for this engine to the table. No + * need to worry about fused-off engines and when there are + * entries in the regset: the reg_state_list has been zero'ed + * by xe_guc_ads_populate() + */ + count = guc_mmio_regset_write(ads, ®set_map, hwe); + if (!count) + continue; + + /* + * 2. Record in the header (ads.reg_state_list) the address + * location and number of entries + */ + gc = xe_engine_class_to_guc_class(hwe->class); + ads_blob_write(ads, ads.reg_state_list[gc][hwe->instance].address, addr); + ads_blob_write(ads, ads.reg_state_list[gc][hwe->instance].count, count); + + addr += count * sizeof(struct guc_mmio_reg); + iosys_map_incr(®set_map, count * sizeof(struct guc_mmio_reg)); + } +} + +static void guc_um_init_params(struct xe_guc_ads *ads) +{ + u32 um_queue_offset = guc_ads_um_queues_offset(ads); + u64 base_dpa; + u32 base_ggtt; + int i; + + base_ggtt = xe_bo_ggtt_addr(ads->bo) + um_queue_offset; + base_dpa = xe_bo_main_addr(ads->bo, PAGE_SIZE) + um_queue_offset; + + for (i = 0; i < GUC_UM_HW_QUEUE_MAX; ++i) { + ads_blob_write(ads, um_init_params.queue_params[i].base_dpa, + base_dpa + (i * GUC_UM_QUEUE_SIZE)); + ads_blob_write(ads, um_init_params.queue_params[i].base_ggtt_address, + base_ggtt + (i * GUC_UM_QUEUE_SIZE)); + ads_blob_write(ads, um_init_params.queue_params[i].size_in_bytes, + GUC_UM_QUEUE_SIZE); + } + + ads_blob_write(ads, um_init_params.page_response_timeout_in_us, + GUC_PAGE_RES_TIMEOUT_US); +} + +static void guc_doorbell_init(struct xe_guc_ads *ads) +{ + struct xe_device *xe = ads_to_xe(ads); + struct xe_gt *gt = ads_to_gt(ads); + + if (GRAPHICS_VER(xe) >= 12 && !IS_DGFX(xe)) { + u32 distdbreg = + xe_mmio_read32(gt, GEN12_DIST_DBS_POPULATED.reg); + + ads_blob_write(ads, + system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI], + ((distdbreg >> GEN12_DOORBELLS_PER_SQIDI_SHIFT) + & GEN12_DOORBELLS_PER_SQIDI) + 1); + } +} + +/** + * xe_guc_ads_populate_minimal - populate minimal ADS + * @ads: Additional data structures object + * + * This function populates a minimal ADS that does not support submissions but + * enough so the GuC can load and the hwconfig table can be read. + */ +void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads) +{ + struct xe_gt *gt = ads_to_gt(ads); + struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), + offsetof(struct __guc_ads_blob, system_info)); + u32 base = xe_bo_ggtt_addr(ads->bo); + + XE_BUG_ON(!ads->bo); + + xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size); + guc_policies_init(ads); + guc_prep_golden_lrc_null(ads); + guc_mapping_table_init_invalid(gt, &info_map); + guc_doorbell_init(ads); + + ads_blob_write(ads, ads.scheduler_policies, base + + offsetof(struct __guc_ads_blob, policies)); + ads_blob_write(ads, ads.gt_system_info, base + + offsetof(struct __guc_ads_blob, system_info)); + ads_blob_write(ads, ads.private_data, base + + guc_ads_private_data_offset(ads)); +} + +void xe_guc_ads_populate(struct xe_guc_ads *ads) +{ + struct xe_device *xe = ads_to_xe(ads); + struct xe_gt *gt = ads_to_gt(ads); + struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), + offsetof(struct __guc_ads_blob, system_info)); + u32 base = xe_bo_ggtt_addr(ads->bo); + + XE_BUG_ON(!ads->bo); + + xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size); + guc_policies_init(ads); + fill_engine_enable_masks(gt, &info_map); + guc_mmio_reg_state_init(ads); + guc_prep_golden_lrc_null(ads); + guc_mapping_table_init(gt, &info_map); + guc_capture_list_init(ads); + guc_doorbell_init(ads); + + if (xe->info.supports_usm) { + guc_um_init_params(ads); + ads_blob_write(ads, ads.um_init_data, base + + offsetof(struct __guc_ads_blob, um_init_params)); + } + + ads_blob_write(ads, ads.scheduler_policies, base + + offsetof(struct __guc_ads_blob, policies)); + ads_blob_write(ads, ads.gt_system_info, base + + offsetof(struct __guc_ads_blob, system_info)); + ads_blob_write(ads, ads.private_data, base + + guc_ads_private_data_offset(ads)); +} + +static void guc_populate_golden_lrc(struct xe_guc_ads *ads) +{ + struct xe_device *xe = ads_to_xe(ads); + struct xe_gt *gt = ads_to_gt(ads); + struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), + offsetof(struct __guc_ads_blob, system_info)); + size_t total_size = 0, alloc_size, real_size; + u32 addr_ggtt, offset; + int class; + + offset = guc_ads_golden_lrc_offset(ads); + addr_ggtt = xe_bo_ggtt_addr(ads->bo) + offset; + + for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) { + u8 guc_class; + + if (class == XE_ENGINE_CLASS_OTHER) + continue; + + guc_class = xe_engine_class_to_guc_class(class); + + if (!info_map_read(xe, &info_map, + engine_enabled_masks[guc_class])) + continue; + + XE_BUG_ON(!gt->default_lrc[class]); + + real_size = xe_lrc_size(xe, class); + alloc_size = PAGE_ALIGN(real_size); + total_size += alloc_size; + + /* + * This interface is slightly confusing. We need to pass the + * base address of the full golden context and the size of just + * the engine state, which is the section of the context image + * that starts after the execlists LRC registers. This is + * required to allow the GuC to restore just the engine state + * when a watchdog reset occurs. + * We calculate the engine state size by removing the size of + * what comes before it in the context image (which is identical + * on all engines). + */ + ads_blob_write(ads, ads.eng_state_size[guc_class], + real_size - xe_lrc_skip_size(xe)); + ads_blob_write(ads, ads.golden_context_lrca[guc_class], + addr_ggtt); + + xe_map_memcpy_to(xe, ads_to_map(ads), offset, + gt->default_lrc[class], real_size); + + addr_ggtt += alloc_size; + offset += alloc_size; + } + + XE_BUG_ON(total_size != ads->golden_lrc_size); +} + +void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads) +{ + guc_populate_golden_lrc(ads); +} diff --git a/drivers/gpu/drm/xe/xe_guc_ads.h b/drivers/gpu/drm/xe/xe_guc_ads.h new file mode 100644 index 000000000000..138ef6267671 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_ads.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_ADS_H_ +#define _XE_GUC_ADS_H_ + +#include "xe_guc_ads_types.h" + +int xe_guc_ads_init(struct xe_guc_ads *ads); +int xe_guc_ads_init_post_hwconfig(struct xe_guc_ads *ads); +void xe_guc_ads_populate(struct xe_guc_ads *ads); +void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads); +void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads); + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_ads_types.h b/drivers/gpu/drm/xe/xe_guc_ads_types.h new file mode 100644 index 000000000000..4afe44bece4b --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_ads_types.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_ADS_TYPES_H_ +#define _XE_GUC_ADS_TYPES_H_ + +#include + +struct xe_bo; + +/** + * struct xe_guc_ads - GuC additional data structures (ADS) + */ +struct xe_guc_ads { + /** @bo: XE BO for GuC ads blob */ + struct xe_bo *bo; + /** @golden_lrc_size: golden LRC size */ + size_t golden_lrc_size; + /** @regset_size: size of register set passed to GuC for save/restore */ + u32 regset_size; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c new file mode 100644 index 000000000000..61a424c41779 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -0,0 +1,1196 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include +#include +#include + +#include + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_guc.h" +#include "xe_guc_ct.h" +#include "xe_gt_pagefault.h" +#include "xe_guc_submit.h" +#include "xe_map.h" +#include "xe_trace.h" + +/* Used when a CT send wants to block and / or receive data */ +struct g2h_fence { + wait_queue_head_t wq; + u32 *response_buffer; + u32 seqno; + u16 response_len; + u16 error; + u16 hint; + u16 reason; + bool retry; + bool fail; + bool done; +}; + +static void g2h_fence_init(struct g2h_fence *g2h_fence, u32 *response_buffer) +{ + g2h_fence->response_buffer = response_buffer; + g2h_fence->response_len = 0; + g2h_fence->fail = false; + g2h_fence->retry = false; + g2h_fence->done = false; + g2h_fence->seqno = ~0x0; +} + +static bool g2h_fence_needs_alloc(struct g2h_fence *g2h_fence) +{ + return g2h_fence->seqno == ~0x0; +} + +static struct xe_guc * +ct_to_guc(struct xe_guc_ct *ct) +{ + return container_of(ct, struct xe_guc, ct); +} + +static struct xe_gt * +ct_to_gt(struct xe_guc_ct *ct) +{ + return container_of(ct, struct xe_gt, uc.guc.ct); +} + +static struct xe_device * +ct_to_xe(struct xe_guc_ct *ct) +{ + return gt_to_xe(ct_to_gt(ct)); +} + +/** + * DOC: GuC CTB Blob + * + * We allocate single blob to hold both CTB descriptors and buffers: + * + * +--------+-----------------------------------------------+------+ + * | offset | contents | size | + * +========+===============================================+======+ + * | 0x0000 | H2G CTB Descriptor (send) | | + * +--------+-----------------------------------------------+ 4K | + * | 0x0800 | G2H CTB Descriptor (g2h) | | + * +--------+-----------------------------------------------+------+ + * | 0x1000 | H2G CT Buffer (send) | n*4K | + * | | | | + * +--------+-----------------------------------------------+------+ + * | 0x1000 | G2H CT Buffer (g2h) | m*4K | + * | + n*4K | | | + * +--------+-----------------------------------------------+------+ + * + * Size of each ``CT Buffer`` must be multiple of 4K. + * We don't expect too many messages in flight at any time, unless we are + * using the GuC submission. In that case each request requires a minimum + * 2 dwords which gives us a maximum 256 queue'd requests. Hopefully this + * enough space to avoid backpressure on the driver. We increase the size + * of the receive buffer (relative to the send) to ensure a G2H response + * CTB has a landing spot. + */ + +#define CTB_DESC_SIZE ALIGN(sizeof(struct guc_ct_buffer_desc), SZ_2K) +#define CTB_H2G_BUFFER_SIZE (SZ_4K) +#define CTB_G2H_BUFFER_SIZE (4 * CTB_H2G_BUFFER_SIZE) +#define G2H_ROOM_BUFFER_SIZE (CTB_G2H_BUFFER_SIZE / 4) + +static size_t guc_ct_size(void) +{ + return 2 * CTB_DESC_SIZE + CTB_H2G_BUFFER_SIZE + + CTB_G2H_BUFFER_SIZE; +} + +static void guc_ct_fini(struct drm_device *drm, void *arg) +{ + struct xe_guc_ct *ct = arg; + + xa_destroy(&ct->fence_lookup); + xe_bo_unpin_map_no_vm(ct->bo); +} + +static void g2h_worker_func(struct work_struct *w); + +static void primelockdep(struct xe_guc_ct *ct) +{ + if (!IS_ENABLED(CONFIG_LOCKDEP)) + return; + + fs_reclaim_acquire(GFP_KERNEL); + might_lock(&ct->lock); + fs_reclaim_release(GFP_KERNEL); +} + +int xe_guc_ct_init(struct xe_guc_ct *ct) +{ + struct xe_device *xe = ct_to_xe(ct); + struct xe_gt *gt = ct_to_gt(ct); + struct xe_bo *bo; + int err; + + XE_BUG_ON(guc_ct_size() % PAGE_SIZE); + + mutex_init(&ct->lock); + spin_lock_init(&ct->fast_lock); + xa_init(&ct->fence_lookup); + ct->fence_context = dma_fence_context_alloc(1); + INIT_WORK(&ct->g2h_worker, g2h_worker_func); + init_waitqueue_head(&ct->wq); + + primelockdep(ct); + + bo = xe_bo_create_pin_map(xe, gt, NULL, guc_ct_size(), + ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(gt) | + XE_BO_CREATE_GGTT_BIT); + if (IS_ERR(bo)) + return PTR_ERR(bo); + + ct->bo = bo; + + err = drmm_add_action_or_reset(&xe->drm, guc_ct_fini, ct); + if (err) + return err; + + return 0; +} + +#define desc_read(xe_, guc_ctb__, field_) \ + xe_map_rd_field(xe_, &guc_ctb__->desc, 0, \ + struct guc_ct_buffer_desc, field_) + +#define desc_write(xe_, guc_ctb__, field_, val_) \ + xe_map_wr_field(xe_, &guc_ctb__->desc, 0, \ + struct guc_ct_buffer_desc, field_, val_) + +static void guc_ct_ctb_h2g_init(struct xe_device *xe, struct guc_ctb *h2g, + struct iosys_map *map) +{ + h2g->size = CTB_H2G_BUFFER_SIZE / sizeof(u32); + h2g->resv_space = 0; + h2g->tail = 0; + h2g->head = 0; + h2g->space = CIRC_SPACE(h2g->tail, h2g->head, h2g->size) - + h2g->resv_space; + h2g->broken = false; + + h2g->desc = *map; + xe_map_memset(xe, &h2g->desc, 0, 0, sizeof(struct guc_ct_buffer_desc)); + + h2g->cmds = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE * 2); +} + +static void guc_ct_ctb_g2h_init(struct xe_device *xe, struct guc_ctb *g2h, + struct iosys_map *map) +{ + g2h->size = CTB_G2H_BUFFER_SIZE / sizeof(u32); + g2h->resv_space = G2H_ROOM_BUFFER_SIZE / sizeof(u32); + g2h->head = 0; + g2h->tail = 0; + g2h->space = CIRC_SPACE(g2h->tail, g2h->head, g2h->size) - + g2h->resv_space; + g2h->broken = false; + + g2h->desc = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE); + xe_map_memset(xe, &g2h->desc, 0, 0, sizeof(struct guc_ct_buffer_desc)); + + g2h->cmds = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE * 2 + + CTB_H2G_BUFFER_SIZE); +} + +static int guc_ct_ctb_h2g_register(struct xe_guc_ct *ct) +{ + struct xe_guc *guc = ct_to_guc(ct); + u32 desc_addr, ctb_addr, size; + int err; + + desc_addr = xe_bo_ggtt_addr(ct->bo); + ctb_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE * 2; + size = ct->ctbs.h2g.size * sizeof(u32); + + err = xe_guc_self_cfg64(guc, + GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR_KEY, + desc_addr); + if (err) + return err; + + err = xe_guc_self_cfg64(guc, + GUC_KLV_SELF_CFG_H2G_CTB_ADDR_KEY, + ctb_addr); + if (err) + return err; + + return xe_guc_self_cfg32(guc, + GUC_KLV_SELF_CFG_H2G_CTB_SIZE_KEY, + size); +} + +static int guc_ct_ctb_g2h_register(struct xe_guc_ct *ct) +{ + struct xe_guc *guc = ct_to_guc(ct); + u32 desc_addr, ctb_addr, size; + int err; + + desc_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE; + ctb_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE * 2 + + CTB_H2G_BUFFER_SIZE; + size = ct->ctbs.g2h.size * sizeof(u32); + + err = xe_guc_self_cfg64(guc, + GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR_KEY, + desc_addr); + if (err) + return err; + + err = xe_guc_self_cfg64(guc, + GUC_KLV_SELF_CFG_G2H_CTB_ADDR_KEY, + ctb_addr); + if (err) + return err; + + return xe_guc_self_cfg32(guc, + GUC_KLV_SELF_CFG_G2H_CTB_SIZE_KEY, + size); +} + +static int guc_ct_control_toggle(struct xe_guc_ct *ct, bool enable) +{ + u32 request[HOST2GUC_CONTROL_CTB_REQUEST_MSG_LEN] = { + FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | + FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, + GUC_ACTION_HOST2GUC_CONTROL_CTB), + FIELD_PREP(HOST2GUC_CONTROL_CTB_REQUEST_MSG_1_CONTROL, + enable ? GUC_CTB_CONTROL_ENABLE : + GUC_CTB_CONTROL_DISABLE), + }; + int ret = xe_guc_send_mmio(ct_to_guc(ct), request, ARRAY_SIZE(request)); + + return ret > 0 ? -EPROTO : ret; +} + +int xe_guc_ct_enable(struct xe_guc_ct *ct) +{ + struct xe_device *xe = ct_to_xe(ct); + int err; + + XE_BUG_ON(ct->enabled); + + guc_ct_ctb_h2g_init(xe, &ct->ctbs.h2g, &ct->bo->vmap); + guc_ct_ctb_g2h_init(xe, &ct->ctbs.g2h, &ct->bo->vmap); + + err = guc_ct_ctb_h2g_register(ct); + if (err) + goto err_out; + + err = guc_ct_ctb_g2h_register(ct); + if (err) + goto err_out; + + err = guc_ct_control_toggle(ct, true); + if (err) + goto err_out; + + mutex_lock(&ct->lock); + ct->g2h_outstanding = 0; + ct->enabled = true; + mutex_unlock(&ct->lock); + + smp_mb(); + wake_up_all(&ct->wq); + drm_dbg(&xe->drm, "GuC CT communication channel enabled\n"); + + return 0; + +err_out: + drm_err(&xe->drm, "Failed to enabled CT (%d)\n", err); + + return err; +} + +void xe_guc_ct_disable(struct xe_guc_ct *ct) +{ + mutex_lock(&ct->lock); + ct->enabled = false; + mutex_unlock(&ct->lock); + + xa_destroy(&ct->fence_lookup); +} + +static bool h2g_has_room(struct xe_guc_ct *ct, u32 cmd_len) +{ + struct guc_ctb *h2g = &ct->ctbs.h2g; + + lockdep_assert_held(&ct->lock); + + if (cmd_len > h2g->space) { + h2g->head = desc_read(ct_to_xe(ct), h2g, head); + h2g->space = CIRC_SPACE(h2g->tail, h2g->head, h2g->size) - + h2g->resv_space; + if (cmd_len > h2g->space) + return false; + } + + return true; +} + +static bool g2h_has_room(struct xe_guc_ct *ct, u32 g2h_len) +{ + lockdep_assert_held(&ct->lock); + + return ct->ctbs.g2h.space > g2h_len; +} + +static int has_room(struct xe_guc_ct *ct, u32 cmd_len, u32 g2h_len) +{ + lockdep_assert_held(&ct->lock); + + if (!g2h_has_room(ct, g2h_len) || !h2g_has_room(ct, cmd_len)) + return -EBUSY; + + return 0; +} + +static void h2g_reserve_space(struct xe_guc_ct *ct, u32 cmd_len) +{ + lockdep_assert_held(&ct->lock); + ct->ctbs.h2g.space -= cmd_len; +} + +static void g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h) +{ + XE_BUG_ON(g2h_len > ct->ctbs.g2h.space); + + if (g2h_len) { + spin_lock_irq(&ct->fast_lock); + ct->ctbs.g2h.space -= g2h_len; + ct->g2h_outstanding += num_g2h; + spin_unlock_irq(&ct->fast_lock); + } +} + +static void __g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len) +{ + lockdep_assert_held(&ct->fast_lock); + XE_WARN_ON(ct->ctbs.g2h.space + g2h_len > + ct->ctbs.g2h.size - ct->ctbs.g2h.resv_space); + + ct->ctbs.g2h.space += g2h_len; + --ct->g2h_outstanding; +} + +static void g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len) +{ + spin_lock_irq(&ct->fast_lock); + __g2h_release_space(ct, g2h_len); + spin_unlock_irq(&ct->fast_lock); +} + +static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len, + u32 ct_fence_value, bool want_response) +{ + struct xe_device *xe = ct_to_xe(ct); + struct guc_ctb *h2g = &ct->ctbs.h2g; + u32 cmd[GUC_CTB_MSG_MAX_LEN / sizeof(u32)]; + u32 cmd_len = len + GUC_CTB_HDR_LEN; + u32 cmd_idx = 0, i; + u32 tail = h2g->tail; + struct iosys_map map = IOSYS_MAP_INIT_OFFSET(&h2g->cmds, + tail * sizeof(u32)); + + lockdep_assert_held(&ct->lock); + XE_BUG_ON(len * sizeof(u32) > GUC_CTB_MSG_MAX_LEN); + XE_BUG_ON(tail > h2g->size); + + /* Command will wrap, zero fill (NOPs), return and check credits again */ + if (tail + cmd_len > h2g->size) { + xe_map_memset(xe, &map, 0, 0, (h2g->size - tail) * sizeof(u32)); + h2g_reserve_space(ct, (h2g->size - tail)); + h2g->tail = 0; + desc_write(xe, h2g, tail, h2g->tail); + + return -EAGAIN; + } + + /* + * dw0: CT header (including fence) + * dw1: HXG header (including action code) + * dw2+: action data + */ + cmd[cmd_idx++] = FIELD_PREP(GUC_CTB_MSG_0_FORMAT, GUC_CTB_FORMAT_HXG) | + FIELD_PREP(GUC_CTB_MSG_0_NUM_DWORDS, len) | + FIELD_PREP(GUC_CTB_MSG_0_FENCE, ct_fence_value); + if (want_response) { + cmd[cmd_idx++] = + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | + FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION | + GUC_HXG_EVENT_MSG_0_DATA0, action[0]); + } else { + cmd[cmd_idx++] = + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_EVENT) | + FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION | + GUC_HXG_EVENT_MSG_0_DATA0, action[0]); + } + for (i = 1; i < len; ++i) + cmd[cmd_idx++] = action[i]; + + /* Write H2G ensuring visable before descriptor update */ + xe_map_memcpy_to(xe, &map, 0, cmd, cmd_len * sizeof(u32)); + xe_device_wmb(ct_to_xe(ct)); + + /* Update local copies */ + h2g->tail = (tail + cmd_len) % h2g->size; + h2g_reserve_space(ct, cmd_len); + + /* Update descriptor */ + desc_write(xe, h2g, tail, h2g->tail); + + return 0; +} + +static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, + u32 len, u32 g2h_len, u32 num_g2h, + struct g2h_fence *g2h_fence) +{ + int ret; + + XE_BUG_ON(g2h_len && g2h_fence); + XE_BUG_ON(num_g2h && g2h_fence); + XE_BUG_ON(g2h_len && !num_g2h); + XE_BUG_ON(!g2h_len && num_g2h); + lockdep_assert_held(&ct->lock); + + if (unlikely(ct->ctbs.h2g.broken)) { + ret = -EPIPE; + goto out; + } + + if (unlikely(!ct->enabled)) { + ret = -ENODEV; + goto out; + } + + if (g2h_fence) { + g2h_len = GUC_CTB_HXG_MSG_MAX_LEN; + num_g2h = 1; + + if (g2h_fence_needs_alloc(g2h_fence)) { + void *ptr; + + g2h_fence->seqno = (ct->fence_seqno++ & 0xffff); + init_waitqueue_head(&g2h_fence->wq); + ptr = xa_store(&ct->fence_lookup, + g2h_fence->seqno, + g2h_fence, GFP_ATOMIC); + if (IS_ERR(ptr)) { + ret = PTR_ERR(ptr); + goto out; + } + } + } + + xe_device_mem_access_get(ct_to_xe(ct)); +retry: + ret = has_room(ct, len + GUC_CTB_HDR_LEN, g2h_len); + if (unlikely(ret)) + goto put_wa; + + ret = h2g_write(ct, action, len, g2h_fence ? g2h_fence->seqno : 0, + !!g2h_fence); + if (unlikely(ret)) { + if (ret == -EAGAIN) + goto retry; + goto put_wa; + } + + g2h_reserve_space(ct, g2h_len, num_g2h); + xe_guc_notify(ct_to_guc(ct)); +put_wa: + xe_device_mem_access_put(ct_to_xe(ct)); +out: + + return ret; +} + +static void kick_reset(struct xe_guc_ct *ct) +{ + xe_gt_reset_async(ct_to_gt(ct)); +} + +static int dequeue_one_g2h(struct xe_guc_ct *ct); + +static int guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len, + u32 g2h_len, u32 num_g2h, + struct g2h_fence *g2h_fence) +{ + struct drm_device *drm = &ct_to_xe(ct)->drm; + struct drm_printer p = drm_info_printer(drm->dev); + unsigned int sleep_period_ms = 1; + int ret; + + XE_BUG_ON(g2h_len && g2h_fence); + lockdep_assert_held(&ct->lock); + +try_again: + ret = __guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, + g2h_fence); + + /* + * We wait to try to restore credits for about 1 second before bailing. + * In the case of H2G credits we have no choice but just to wait for the + * GuC to consume H2Gs in the channel so we use a wait / sleep loop. In + * the case of G2H we process any G2H in the channel, hopefully freeing + * credits as we consume the G2H messages. + */ + if (unlikely(ret == -EBUSY && + !h2g_has_room(ct, len + GUC_CTB_HDR_LEN))) { + struct guc_ctb *h2g = &ct->ctbs.h2g; + + if (sleep_period_ms == 1024) + goto broken; + + trace_xe_guc_ct_h2g_flow_control(h2g->head, h2g->tail, + h2g->size, h2g->space, + len + GUC_CTB_HDR_LEN); + msleep(sleep_period_ms); + sleep_period_ms <<= 1; + + goto try_again; + } else if (unlikely(ret == -EBUSY)) { + struct xe_device *xe = ct_to_xe(ct); + struct guc_ctb *g2h = &ct->ctbs.g2h; + + trace_xe_guc_ct_g2h_flow_control(g2h->head, + desc_read(xe, g2h, tail), + g2h->size, g2h->space, + g2h_fence ? + GUC_CTB_HXG_MSG_MAX_LEN : + g2h_len); + +#define g2h_avail(ct) \ + (desc_read(ct_to_xe(ct), (&ct->ctbs.g2h), tail) != ct->ctbs.g2h.head) + if (!wait_event_timeout(ct->wq, !ct->g2h_outstanding || + g2h_avail(ct), HZ)) + goto broken; +#undef g2h_avail + + if (dequeue_one_g2h(ct) < 0) + goto broken; + + goto try_again; + } + + return ret; + +broken: + drm_err(drm, "No forward process on H2G, reset required"); + xe_guc_ct_print(ct, &p); + ct->ctbs.h2g.broken = true; + + return -EDEADLK; +} + +static int guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len, + u32 g2h_len, u32 num_g2h, struct g2h_fence *g2h_fence) +{ + int ret; + + XE_BUG_ON(g2h_len && g2h_fence); + + mutex_lock(&ct->lock); + ret = guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, g2h_fence); + mutex_unlock(&ct->lock); + + return ret; +} + +int xe_guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len, + u32 g2h_len, u32 num_g2h) +{ + int ret; + + ret = guc_ct_send(ct, action, len, g2h_len, num_g2h, NULL); + if (ret == -EDEADLK) + kick_reset(ct); + + return ret; +} + +int xe_guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len, + u32 g2h_len, u32 num_g2h) +{ + int ret; + + ret = guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, NULL); + if (ret == -EDEADLK) + kick_reset(ct); + + return ret; +} + +int xe_guc_ct_send_g2h_handler(struct xe_guc_ct *ct, const u32 *action, u32 len) +{ + int ret; + + lockdep_assert_held(&ct->lock); + + ret = guc_ct_send_locked(ct, action, len, 0, 0, NULL); + if (ret == -EDEADLK) + kick_reset(ct); + + return ret; +} + +/* + * Check if a GT reset is in progress or will occur and if GT reset brought the + * CT back up. Randomly picking 5 seconds for an upper limit to do a GT a reset. + */ +static bool retry_failure(struct xe_guc_ct *ct, int ret) +{ + if (!(ret == -EDEADLK || ret == -EPIPE || ret == -ENODEV)) + return false; + +#define ct_alive(ct) \ + (ct->enabled && !ct->ctbs.h2g.broken && !ct->ctbs.g2h.broken) + if (!wait_event_interruptible_timeout(ct->wq, ct_alive(ct), HZ * 5)) + return false; +#undef ct_alive + + return true; +} + +static int guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len, + u32 *response_buffer, bool no_fail) +{ + struct xe_device *xe = ct_to_xe(ct); + struct g2h_fence g2h_fence; + int ret = 0; + + /* + * We use a fence to implement blocking sends / receiving response data. + * The seqno of the fence is sent in the H2G, returned in the G2H, and + * an xarray is used as storage media with the seqno being to key. + * Fields in the fence hold success, failure, retry status and the + * response data. Safe to allocate on the stack as the xarray is the + * only reference and it cannot be present after this function exits. + */ +retry: + g2h_fence_init(&g2h_fence, response_buffer); +retry_same_fence: + ret = guc_ct_send(ct, action, len, 0, 0, &g2h_fence); + if (unlikely(ret == -ENOMEM)) { + void *ptr; + + /* Retry allocation /w GFP_KERNEL */ + ptr = xa_store(&ct->fence_lookup, + g2h_fence.seqno, + &g2h_fence, GFP_KERNEL); + if (IS_ERR(ptr)) { + return PTR_ERR(ptr); + } + + goto retry_same_fence; + } else if (unlikely(ret)) { + if (ret == -EDEADLK) + kick_reset(ct); + + if (no_fail && retry_failure(ct, ret)) + goto retry_same_fence; + + if (!g2h_fence_needs_alloc(&g2h_fence)) + xa_erase_irq(&ct->fence_lookup, g2h_fence.seqno); + + return ret; + } + + ret = wait_event_timeout(g2h_fence.wq, g2h_fence.done, HZ); + if (!ret) { + drm_err(&xe->drm, "Timed out wait for G2H, fence %u, action %04x", + g2h_fence.seqno, action[0]); + xa_erase_irq(&ct->fence_lookup, g2h_fence.seqno); + return -ETIME; + } + + if (g2h_fence.retry) { + drm_warn(&xe->drm, "Send retry, action 0x%04x, reason %d", + action[0], g2h_fence.reason); + goto retry; + } + if (g2h_fence.fail) { + drm_err(&xe->drm, "Send failed, action 0x%04x, error %d, hint %d", + action[0], g2h_fence.error, g2h_fence.hint); + ret = -EIO; + } + + return ret > 0 ? 0 : ret; +} + +int xe_guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len, + u32 *response_buffer) +{ + return guc_ct_send_recv(ct, action, len, response_buffer, false); +} + +int xe_guc_ct_send_recv_no_fail(struct xe_guc_ct *ct, const u32 *action, + u32 len, u32 *response_buffer) +{ + return guc_ct_send_recv(ct, action, len, response_buffer, true); +} + +static int parse_g2h_event(struct xe_guc_ct *ct, u32 *msg, u32 len) +{ + u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1]); + + lockdep_assert_held(&ct->lock); + + switch (action) { + case XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE: + case XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE: + case XE_GUC_ACTION_SCHED_ENGINE_MODE_DONE: + case XE_GUC_ACTION_TLB_INVALIDATION_DONE: + g2h_release_space(ct, len); + } + + return 0; +} + +static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len) +{ + struct xe_device *xe = ct_to_xe(ct); + u32 response_len = len - GUC_CTB_MSG_MIN_LEN; + u32 fence = FIELD_GET(GUC_CTB_MSG_0_FENCE, msg[0]); + u32 type = FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[1]); + struct g2h_fence *g2h_fence; + + lockdep_assert_held(&ct->lock); + + g2h_fence = xa_erase(&ct->fence_lookup, fence); + if (unlikely(!g2h_fence)) { + /* Don't tear down channel, as send could've timed out */ + drm_warn(&xe->drm, "G2H fence (%u) not found!\n", fence); + g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN); + return 0; + } + + XE_WARN_ON(fence != g2h_fence->seqno); + + if (type == GUC_HXG_TYPE_RESPONSE_FAILURE) { + g2h_fence->fail = true; + g2h_fence->error = + FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, msg[0]); + g2h_fence->hint = + FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, msg[0]); + } else if (type == GUC_HXG_TYPE_NO_RESPONSE_RETRY) { + g2h_fence->retry = true; + g2h_fence->reason = + FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, msg[0]); + } else if (g2h_fence->response_buffer) { + g2h_fence->response_len = response_len; + memcpy(g2h_fence->response_buffer, msg + GUC_CTB_MSG_MIN_LEN, + response_len * sizeof(u32)); + } + + g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN); + + g2h_fence->done = true; + smp_mb(); + + wake_up(&g2h_fence->wq); + + return 0; +} + +static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) +{ + struct xe_device *xe = ct_to_xe(ct); + u32 header, hxg, origin, type; + int ret; + + lockdep_assert_held(&ct->lock); + + header = msg[0]; + hxg = msg[1]; + + origin = FIELD_GET(GUC_HXG_MSG_0_ORIGIN, hxg); + if (unlikely(origin != GUC_HXG_ORIGIN_GUC)) { + drm_err(&xe->drm, + "G2H channel broken on read, origin=%d, reset required\n", + origin); + ct->ctbs.g2h.broken = true; + + return -EPROTO; + } + + type = FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg); + switch (type) { + case GUC_HXG_TYPE_EVENT: + ret = parse_g2h_event(ct, msg, len); + break; + case GUC_HXG_TYPE_RESPONSE_SUCCESS: + case GUC_HXG_TYPE_RESPONSE_FAILURE: + case GUC_HXG_TYPE_NO_RESPONSE_RETRY: + ret = parse_g2h_response(ct, msg, len); + break; + default: + drm_err(&xe->drm, + "G2H channel broken on read, type=%d, reset required\n", + type); + ct->ctbs.g2h.broken = true; + + ret = -EOPNOTSUPP; + } + + return ret; +} + +static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) +{ + struct xe_device *xe = ct_to_xe(ct); + struct xe_guc *guc = ct_to_guc(ct); + u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1]); + u32 *payload = msg + GUC_CTB_HXG_MSG_MIN_LEN; + u32 adj_len = len - GUC_CTB_HXG_MSG_MIN_LEN; + int ret = 0; + + if (FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[1]) != GUC_HXG_TYPE_EVENT) + return 0; + + switch (action) { + case XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE: + ret = xe_guc_sched_done_handler(guc, payload, adj_len); + break; + case XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE: + ret = xe_guc_deregister_done_handler(guc, payload, adj_len); + break; + case XE_GUC_ACTION_CONTEXT_RESET_NOTIFICATION: + ret = xe_guc_engine_reset_handler(guc, payload, adj_len); + break; + case XE_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION: + ret = xe_guc_engine_reset_failure_handler(guc, payload, + adj_len); + break; + case XE_GUC_ACTION_SCHED_ENGINE_MODE_DONE: + /* Selftest only at the moment */ + break; + case XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION: + case XE_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE: + /* FIXME: Handle this */ + break; + case XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR: + ret = xe_guc_engine_memory_cat_error_handler(guc, payload, + adj_len); + break; + case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC: + ret = xe_guc_pagefault_handler(guc, payload, adj_len); + break; + case XE_GUC_ACTION_TLB_INVALIDATION_DONE: + ret = xe_guc_tlb_invalidation_done_handler(guc, payload, + adj_len); + break; + case XE_GUC_ACTION_ACCESS_COUNTER_NOTIFY: + ret = xe_guc_access_counter_notify_handler(guc, payload, + adj_len); + break; + default: + drm_err(&xe->drm, "unexpected action 0x%04x\n", action); + } + + if (ret) + drm_err(&xe->drm, "action 0x%04x failed processing, ret=%d\n", + action, ret); + + return 0; +} + +static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path) +{ + struct xe_device *xe = ct_to_xe(ct); + struct guc_ctb *g2h = &ct->ctbs.g2h; + u32 tail, head, len; + s32 avail; + + lockdep_assert_held(&ct->fast_lock); + + if (!ct->enabled) + return -ENODEV; + + if (g2h->broken) + return -EPIPE; + + /* Calculate DW available to read */ + tail = desc_read(xe, g2h, tail); + avail = tail - g2h->head; + if (unlikely(avail == 0)) + return 0; + + if (avail < 0) + avail += g2h->size; + + /* Read header */ + xe_map_memcpy_from(xe, msg, &g2h->cmds, sizeof(u32) * g2h->head, sizeof(u32)); + len = FIELD_GET(GUC_CTB_MSG_0_NUM_DWORDS, msg[0]) + GUC_CTB_MSG_MIN_LEN; + if (len > avail) { + drm_err(&xe->drm, + "G2H channel broken on read, avail=%d, len=%d, reset required\n", + avail, len); + g2h->broken = true; + + return -EPROTO; + } + + head = (g2h->head + 1) % g2h->size; + avail = len - 1; + + /* Read G2H message */ + if (avail + head > g2h->size) { + u32 avail_til_wrap = g2h->size - head; + + xe_map_memcpy_from(xe, msg + 1, + &g2h->cmds, sizeof(u32) * head, + avail_til_wrap * sizeof(u32)); + xe_map_memcpy_from(xe, msg + 1 + avail_til_wrap, + &g2h->cmds, 0, + (avail - avail_til_wrap) * sizeof(u32)); + } else { + xe_map_memcpy_from(xe, msg + 1, + &g2h->cmds, sizeof(u32) * head, + avail * sizeof(u32)); + } + + if (fast_path) { + if (FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[1]) != GUC_HXG_TYPE_EVENT) + return 0; + + switch (FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1])) { + case XE_GUC_ACTION_TLB_INVALIDATION_DONE: + case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC: + break; /* Process these in fast-path */ + default: + return 0; + } + } + + /* Update local / descriptor header */ + g2h->head = (head + avail) % g2h->size; + desc_write(xe, g2h, head, g2h->head); + + return len; +} + +static void g2h_fast_path(struct xe_guc_ct *ct, u32 *msg, u32 len) +{ + struct xe_device *xe = ct_to_xe(ct); + struct xe_guc *guc = ct_to_guc(ct); + u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1]); + u32 *payload = msg + GUC_CTB_HXG_MSG_MIN_LEN; + u32 adj_len = len - GUC_CTB_HXG_MSG_MIN_LEN; + int ret = 0; + + switch (action) { + case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC: + ret = xe_guc_pagefault_handler(guc, payload, adj_len); + break; + case XE_GUC_ACTION_TLB_INVALIDATION_DONE: + __g2h_release_space(ct, len); + ret = xe_guc_tlb_invalidation_done_handler(guc, payload, + adj_len); + break; + default: + XE_WARN_ON("NOT_POSSIBLE"); + } + + if (ret) + drm_err(&xe->drm, "action 0x%04x failed processing, ret=%d\n", + action, ret); +} + +/** + * xe_guc_ct_fast_path - process critical G2H in the IRQ handler + * @ct: GuC CT object + * + * Anything related to page faults is critical for performance, process these + * critical G2H in the IRQ. This is safe as these handlers either just wake up + * waiters or queue another worker. + */ +void xe_guc_ct_fast_path(struct xe_guc_ct *ct) +{ + struct xe_device *xe = ct_to_xe(ct); + int len; + + if (!xe_device_in_fault_mode(xe) || !xe_device_mem_access_ongoing(xe)) + return; + + spin_lock(&ct->fast_lock); + do { + len = g2h_read(ct, ct->fast_msg, true); + if (len > 0) + g2h_fast_path(ct, ct->fast_msg, len); + } while (len > 0); + spin_unlock(&ct->fast_lock); +} + +/* Returns less than zero on error, 0 on done, 1 on more available */ +static int dequeue_one_g2h(struct xe_guc_ct *ct) +{ + int len; + int ret; + + lockdep_assert_held(&ct->lock); + + spin_lock_irq(&ct->fast_lock); + len = g2h_read(ct, ct->msg, false); + spin_unlock_irq(&ct->fast_lock); + if (len <= 0) + return len; + + ret = parse_g2h_msg(ct, ct->msg, len); + if (unlikely(ret < 0)) + return ret; + + ret = process_g2h_msg(ct, ct->msg, len); + if (unlikely(ret < 0)) + return ret; + + return 1; +} + +static void g2h_worker_func(struct work_struct *w) +{ + struct xe_guc_ct *ct = container_of(w, struct xe_guc_ct, g2h_worker); + int ret; + + xe_device_mem_access_get(ct_to_xe(ct)); + do { + mutex_lock(&ct->lock); + ret = dequeue_one_g2h(ct); + mutex_unlock(&ct->lock); + + if (unlikely(ret == -EPROTO || ret == -EOPNOTSUPP)) { + struct drm_device *drm = &ct_to_xe(ct)->drm; + struct drm_printer p = drm_info_printer(drm->dev); + + xe_guc_ct_print(ct, &p); + kick_reset(ct); + } + } while (ret == 1); + xe_device_mem_access_put(ct_to_xe(ct)); +} + +static void guc_ct_ctb_print(struct xe_device *xe, struct guc_ctb *ctb, + struct drm_printer *p) +{ + u32 head, tail; + + drm_printf(p, "\tsize: %d\n", ctb->size); + drm_printf(p, "\tresv_space: %d\n", ctb->resv_space); + drm_printf(p, "\thead: %d\n", ctb->head); + drm_printf(p, "\ttail: %d\n", ctb->tail); + drm_printf(p, "\tspace: %d\n", ctb->space); + drm_printf(p, "\tbroken: %d\n", ctb->broken); + + head = desc_read(xe, ctb, head); + tail = desc_read(xe, ctb, tail); + drm_printf(p, "\thead (memory): %d\n", head); + drm_printf(p, "\ttail (memory): %d\n", tail); + drm_printf(p, "\tstatus (memory): 0x%x\n", desc_read(xe, ctb, status)); + + if (head != tail) { + struct iosys_map map = + IOSYS_MAP_INIT_OFFSET(&ctb->cmds, head * sizeof(u32)); + + while (head != tail) { + drm_printf(p, "\tcmd[%d]: 0x%08x\n", head, + xe_map_rd(xe, &map, 0, u32)); + ++head; + if (head == ctb->size) { + head = 0; + map = ctb->cmds; + } else { + iosys_map_incr(&map, sizeof(u32)); + } + } + } +} + +void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p) +{ + if (ct->enabled) { + drm_puts(p, "\nH2G CTB (all sizes in DW):\n"); + guc_ct_ctb_print(ct_to_xe(ct), &ct->ctbs.h2g, p); + + drm_puts(p, "\nG2H CTB (all sizes in DW):\n"); + guc_ct_ctb_print(ct_to_xe(ct), &ct->ctbs.g2h, p); + drm_printf(p, "\tg2h outstanding: %d\n", ct->g2h_outstanding); + } else { + drm_puts(p, "\nCT disabled\n"); + } +} + +#ifdef XE_GUC_CT_SELFTEST +/* + * Disable G2H processing in IRQ handler to force xe_guc_ct_send to enter flow + * control if enough sent, 8k sends is enough. Verify forward process, verify + * credits expected values on exit. + */ +void xe_guc_ct_selftest(struct xe_guc_ct *ct, struct drm_printer *p) +{ + struct guc_ctb *g2h = &ct->ctbs.g2h; + u32 action[] = { XE_GUC_ACTION_SCHED_ENGINE_MODE_SET, 0, 0, 1, }; + u32 bad_action[] = { XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET, 0, 0, }; + int ret; + int i; + + ct->suppress_irq_handler = true; + drm_puts(p, "Starting GuC CT selftest\n"); + + for (i = 0; i < 8192; ++i) { + ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 4, 1); + if (ret) { + drm_printf(p, "Aborted pass %d, ret %d\n", i, ret); + xe_guc_ct_print(ct, p); + break; + } + } + + ct->suppress_irq_handler = false; + if (!ret) { + xe_guc_ct_irq_handler(ct); + msleep(200); + if (g2h->space != + CIRC_SPACE(0, 0, g2h->size) - g2h->resv_space) { + drm_printf(p, "Mismatch on space %d, %d\n", + g2h->space, + CIRC_SPACE(0, 0, g2h->size) - + g2h->resv_space); + ret = -EIO; + } + if (ct->g2h_outstanding) { + drm_printf(p, "Outstanding G2H, %d\n", + ct->g2h_outstanding); + ret = -EIO; + } + } + + /* Check failure path for blocking CTs too */ + xe_guc_ct_send_block(ct, bad_action, ARRAY_SIZE(bad_action)); + if (g2h->space != + CIRC_SPACE(0, 0, g2h->size) - g2h->resv_space) { + drm_printf(p, "Mismatch on space %d, %d\n", + g2h->space, + CIRC_SPACE(0, 0, g2h->size) - + g2h->resv_space); + ret = -EIO; + } + if (ct->g2h_outstanding) { + drm_printf(p, "Outstanding G2H, %d\n", + ct->g2h_outstanding); + ret = -EIO; + } + + drm_printf(p, "GuC CT selftest done - %s\n", ret ? "FAIL" : "PASS"); +} +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h new file mode 100644 index 000000000000..49fb74f91e4d --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_ct.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_CT_H_ +#define _XE_GUC_CT_H_ + +#include "xe_guc_ct_types.h" + +struct drm_printer; + +int xe_guc_ct_init(struct xe_guc_ct *ct); +int xe_guc_ct_enable(struct xe_guc_ct *ct); +void xe_guc_ct_disable(struct xe_guc_ct *ct); +void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p); +void xe_guc_ct_fast_path(struct xe_guc_ct *ct); + +static inline void xe_guc_ct_irq_handler(struct xe_guc_ct *ct) +{ + wake_up_all(&ct->wq); +#ifdef XE_GUC_CT_SELFTEST + if (!ct->suppress_irq_handler && ct->enabled) + queue_work(system_unbound_wq, &ct->g2h_worker); +#else + if (ct->enabled) + queue_work(system_unbound_wq, &ct->g2h_worker); +#endif + xe_guc_ct_fast_path(ct); +} + +/* Basic CT send / receives */ +int xe_guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len, + u32 g2h_len, u32 num_g2h); +int xe_guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len, + u32 g2h_len, u32 num_g2h); +int xe_guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len, + u32 *response_buffer); +static inline int +xe_guc_ct_send_block(struct xe_guc_ct *ct, const u32 *action, u32 len) +{ + return xe_guc_ct_send_recv(ct, action, len, NULL); +} + +/* This is only version of the send CT you can call from a G2H handler */ +int xe_guc_ct_send_g2h_handler(struct xe_guc_ct *ct, const u32 *action, + u32 len); + +/* Can't fail because a GT reset is in progress */ +int xe_guc_ct_send_recv_no_fail(struct xe_guc_ct *ct, const u32 *action, + u32 len, u32 *response_buffer); +static inline int +xe_guc_ct_send_block_no_fail(struct xe_guc_ct *ct, const u32 *action, u32 len) +{ + return xe_guc_ct_send_recv_no_fail(ct, action, len, NULL); +} + +#ifdef XE_GUC_CT_SELFTEST +void xe_guc_ct_selftest(struct xe_guc_ct *ct, struct drm_printer *p); +#endif + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h new file mode 100644 index 000000000000..17b148bf3735 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_CT_TYPES_H_ +#define _XE_GUC_CT_TYPES_H_ + +#include +#include +#include +#include +#include + +#include "abi/guc_communication_ctb_abi.h" + +#define XE_GUC_CT_SELFTEST + +struct xe_bo; + +/** + * struct guc_ctb - GuC command transport buffer (CTB) + */ +struct guc_ctb { + /** @desc: dma buffer map for CTB descriptor */ + struct iosys_map desc; + /** @cmds: dma buffer map for CTB commands */ + struct iosys_map cmds; + /** @size: size of CTB commands (DW) */ + u32 size; + /** @resv_space: reserved space of CTB commands (DW) */ + u32 resv_space; + /** @head: head of CTB commands (DW) */ + u32 head; + /** @tail: tail of CTB commands (DW) */ + u32 tail; + /** @space: space in CTB commands (DW) */ + u32 space; + /** @broken: channel broken */ + bool broken; +}; + +/** + * struct xe_guc_ct - GuC command transport (CT) layer + * + * Includes a pair of CT buffers for bi-directional communication and tracking + * for the H2G and G2H requests sent and received through the buffers. + */ +struct xe_guc_ct { + /** @bo: XE BO for CT */ + struct xe_bo *bo; + /** @lock: protects everything in CT layer */ + struct mutex lock; + /** @fast_lock: protects G2H channel and credits */ + spinlock_t fast_lock; + /** @ctbs: buffers for sending and receiving commands */ + struct { + /** @send: Host to GuC (H2G, send) channel */ + struct guc_ctb h2g; + /** @recv: GuC to Host (G2H, receive) channel */ + struct guc_ctb g2h; + } ctbs; + /** @g2h_outstanding: number of outstanding G2H */ + u32 g2h_outstanding; + /** @g2h_worker: worker to process G2H messages */ + struct work_struct g2h_worker; + /** @enabled: CT enabled */ + bool enabled; + /** @fence_seqno: G2H fence seqno - 16 bits used by CT */ + u32 fence_seqno; + /** @fence_context: context for G2H fence */ + u64 fence_context; + /** @fence_lookup: G2H fence lookup */ + struct xarray fence_lookup; + /** @wq: wait queue used for reliable CT sends and freeing G2H credits */ + wait_queue_head_t wq; +#ifdef XE_GUC_CT_SELFTEST + /** @suppress_irq_handler: force flow control to sender */ + bool suppress_irq_handler; +#endif + /** @msg: Message buffer */ + u32 msg[GUC_CTB_MSG_MAX_LEN]; + /** @fast_msg: Message buffer */ + u32 fast_msg[GUC_CTB_MSG_MAX_LEN]; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.c b/drivers/gpu/drm/xe/xe_guc_debugfs.c new file mode 100644 index 000000000000..916e9633b322 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_debugfs.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include +#include + +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_guc.h" +#include "xe_guc_ct.h" +#include "xe_guc_debugfs.h" +#include "xe_guc_log.h" +#include "xe_macros.h" + +static struct xe_gt * +guc_to_gt(struct xe_guc *guc) +{ + return container_of(guc, struct xe_gt, uc.guc); +} + +static struct xe_device * +guc_to_xe(struct xe_guc *guc) +{ + return gt_to_xe(guc_to_gt(guc)); +} + +static struct xe_guc *node_to_guc(struct drm_info_node *node) +{ + return node->info_ent->data; +} + +static int guc_info(struct seq_file *m, void *data) +{ + struct xe_guc *guc = node_to_guc(m->private); + struct xe_device *xe = guc_to_xe(guc); + struct drm_printer p = drm_seq_file_printer(m); + + xe_device_mem_access_get(xe); + xe_guc_print_info(guc, &p); + xe_device_mem_access_put(xe); + + return 0; +} + +static int guc_log(struct seq_file *m, void *data) +{ + struct xe_guc *guc = node_to_guc(m->private); + struct xe_device *xe = guc_to_xe(guc); + struct drm_printer p = drm_seq_file_printer(m); + + xe_device_mem_access_get(xe); + xe_guc_log_print(&guc->log, &p); + xe_device_mem_access_put(xe); + + return 0; +} + +#ifdef XE_GUC_CT_SELFTEST +static int guc_ct_selftest(struct seq_file *m, void *data) +{ + struct xe_guc *guc = node_to_guc(m->private); + struct xe_device *xe = guc_to_xe(guc); + struct drm_printer p = drm_seq_file_printer(m); + + xe_device_mem_access_get(xe); + xe_guc_ct_selftest(&guc->ct, &p); + xe_device_mem_access_put(xe); + + return 0; +} +#endif + +static const struct drm_info_list debugfs_list[] = { + {"guc_info", guc_info, 0}, + {"guc_log", guc_log, 0}, +#ifdef XE_GUC_CT_SELFTEST + {"guc_ct_selftest", guc_ct_selftest, 0}, +#endif +}; + +void xe_guc_debugfs_register(struct xe_guc *guc, struct dentry *parent) +{ + struct drm_minor *minor = guc_to_xe(guc)->drm.primary; + struct drm_info_list *local; + int i; + +#define DEBUGFS_SIZE ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list) + local = drmm_kmalloc(&guc_to_xe(guc)->drm, DEBUGFS_SIZE, GFP_KERNEL); + if (!local) { + XE_WARN_ON("Couldn't allocate memory"); + return; + } + + memcpy(local, debugfs_list, DEBUGFS_SIZE); +#undef DEBUGFS_SIZE + + for (i = 0; i < ARRAY_SIZE(debugfs_list); ++i) + local[i].data = guc; + + drm_debugfs_create_files(local, + ARRAY_SIZE(debugfs_list), + parent, minor); +} diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.h b/drivers/gpu/drm/xe/xe_guc_debugfs.h new file mode 100644 index 000000000000..4756dff26fca --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_debugfs.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_DEBUGFS_H_ +#define _XE_GUC_DEBUGFS_H_ + +struct dentry; +struct xe_guc; + +void xe_guc_debugfs_register(struct xe_guc *guc, struct dentry *parent); + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_engine_types.h b/drivers/gpu/drm/xe/xe_guc_engine_types.h new file mode 100644 index 000000000000..512615d1ce8c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_engine_types.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_ENGINE_TYPES_H_ +#define _XE_GUC_ENGINE_TYPES_H_ + +#include +#include + +#include "xe_gpu_scheduler_types.h" + +struct dma_fence; +struct xe_engine; + +/** + * struct xe_guc_engine - GuC specific state for an xe_engine + */ +struct xe_guc_engine { + /** @engine: Backpointer to parent xe_engine */ + struct xe_engine *engine; + /** @sched: GPU scheduler for this xe_engine */ + struct xe_gpu_scheduler sched; + /** @entity: Scheduler entity for this xe_engine */ + struct xe_sched_entity entity; + /** + * @static_msgs: Static messages for this xe_engine, used when a message + * needs to sent through the GPU scheduler but memory allocations are + * not allowed. + */ +#define MAX_STATIC_MSG_TYPE 3 + struct xe_sched_msg static_msgs[MAX_STATIC_MSG_TYPE]; + /** @fini_async: do final fini async from this worker */ + struct work_struct fini_async; + /** @resume_time: time of last resume */ + u64 resume_time; + /** @state: GuC specific state for this xe_engine */ + atomic_t state; + /** @wqi_head: work queue item tail */ + u32 wqi_head; + /** @wqi_tail: work queue item tail */ + u32 wqi_tail; + /** @id: GuC id for this xe_engine */ + u16 id; + /** @suspend_wait: wait queue used to wait on pending suspends */ + wait_queue_head_t suspend_wait; + /** @suspend_pending: a suspend of the engine is pending */ + bool suspend_pending; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h new file mode 100644 index 000000000000..f562404a6cf7 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_fwif.h @@ -0,0 +1,392 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_FWIF_H +#define _XE_GUC_FWIF_H + +#include + +#include "abi/guc_actions_abi.h" +#include "abi/guc_actions_slpc_abi.h" +#include "abi/guc_errors_abi.h" +#include "abi/guc_communication_mmio_abi.h" +#include "abi/guc_communication_ctb_abi.h" +#include "abi/guc_klvs_abi.h" +#include "abi/guc_messages_abi.h" + +#define G2H_LEN_DW_SCHED_CONTEXT_MODE_SET 4 +#define G2H_LEN_DW_DEREGISTER_CONTEXT 3 +#define G2H_LEN_DW_TLB_INVALIDATE 3 + +#define GUC_CONTEXT_DISABLE 0 +#define GUC_CONTEXT_ENABLE 1 + +#define GUC_CLIENT_PRIORITY_KMD_HIGH 0 +#define GUC_CLIENT_PRIORITY_HIGH 1 +#define GUC_CLIENT_PRIORITY_KMD_NORMAL 2 +#define GUC_CLIENT_PRIORITY_NORMAL 3 +#define GUC_CLIENT_PRIORITY_NUM 4 + +#define GUC_RENDER_ENGINE 0 +#define GUC_VIDEO_ENGINE 1 +#define GUC_BLITTER_ENGINE 2 +#define GUC_VIDEOENHANCE_ENGINE 3 +#define GUC_VIDEO_ENGINE2 4 +#define GUC_MAX_ENGINES_NUM (GUC_VIDEO_ENGINE2 + 1) + +#define GUC_RENDER_CLASS 0 +#define GUC_VIDEO_CLASS 1 +#define GUC_VIDEOENHANCE_CLASS 2 +#define GUC_BLITTER_CLASS 3 +#define GUC_COMPUTE_CLASS 4 +#define GUC_GSC_OTHER_CLASS 5 +#define GUC_LAST_ENGINE_CLASS GUC_GSC_OTHER_CLASS +#define GUC_MAX_ENGINE_CLASSES 16 +#define GUC_MAX_INSTANCES_PER_CLASS 32 + +/* Work item for submitting workloads into work queue of GuC. */ +#define WQ_STATUS_ACTIVE 1 +#define WQ_STATUS_SUSPENDED 2 +#define WQ_STATUS_CMD_ERROR 3 +#define WQ_STATUS_ENGINE_ID_NOT_USED 4 +#define WQ_STATUS_SUSPENDED_FROM_RESET 5 +#define WQ_TYPE_NOOP 0x4 +#define WQ_TYPE_MULTI_LRC 0x5 +#define WQ_TYPE_MASK GENMASK(7, 0) +#define WQ_LEN_MASK GENMASK(26, 16) + +#define WQ_GUC_ID_MASK GENMASK(15, 0) +#define WQ_RING_TAIL_MASK GENMASK(28, 18) + +struct guc_wq_item { + u32 header; + u32 context_desc; + u32 submit_element_info; + u32 fence_id; +} __packed; + +struct guc_sched_wq_desc { + u32 head; + u32 tail; + u32 error_offset; + u32 wq_status; + u32 reserved[28]; +} __packed; + +/* Helper for context registration H2G */ +struct guc_ctxt_registration_info { + u32 flags; + u32 context_idx; + u32 engine_class; + u32 engine_submit_mask; + u32 wq_desc_lo; + u32 wq_desc_hi; + u32 wq_base_lo; + u32 wq_base_hi; + u32 wq_size; + u32 hwlrca_lo; + u32 hwlrca_hi; +}; +#define CONTEXT_REGISTRATION_FLAG_KMD BIT(0) + +/* 32-bit KLV structure as used by policy updates and others */ +struct guc_klv_generic_dw_t { + u32 kl; + u32 value; +} __packed; + +/* Format of the UPDATE_CONTEXT_POLICIES H2G data packet */ +struct guc_update_engine_policy_header { + u32 action; + u32 guc_id; +} __packed; + +struct guc_update_engine_policy { + struct guc_update_engine_policy_header header; + struct guc_klv_generic_dw_t klv[GUC_CONTEXT_POLICIES_KLV_NUM_IDS]; +} __packed; + +/* GUC_CTL_* - Parameters for loading the GuC */ +#define GUC_CTL_LOG_PARAMS 0 +#define GUC_LOG_VALID BIT(0) +#define GUC_LOG_NOTIFY_ON_HALF_FULL BIT(1) +#define GUC_LOG_CAPTURE_ALLOC_UNITS BIT(2) +#define GUC_LOG_LOG_ALLOC_UNITS BIT(3) +#define GUC_LOG_CRASH_SHIFT 4 +#define GUC_LOG_CRASH_MASK (0x3 << GUC_LOG_CRASH_SHIFT) +#define GUC_LOG_DEBUG_SHIFT 6 +#define GUC_LOG_DEBUG_MASK (0xF << GUC_LOG_DEBUG_SHIFT) +#define GUC_LOG_CAPTURE_SHIFT 10 +#define GUC_LOG_CAPTURE_MASK (0x3 << GUC_LOG_CAPTURE_SHIFT) +#define GUC_LOG_BUF_ADDR_SHIFT 12 + +#define GUC_CTL_WA 1 +#define GUC_WA_GAM_CREDITS BIT(10) +#define GUC_WA_DUAL_QUEUE BIT(11) +#define GUC_WA_RCS_RESET_BEFORE_RC6 BIT(13) +#define GUC_WA_CONTEXT_ISOLATION BIT(15) +#define GUC_WA_PRE_PARSER BIT(14) +#define GUC_WA_HOLD_CCS_SWITCHOUT BIT(17) +#define GUC_WA_POLLCS BIT(18) +#define GUC_WA_RENDER_RST_RC6_EXIT BIT(19) +#define GUC_WA_RCS_REGS_IN_CCS_REGS_LIST BIT(21) + +#define GUC_CTL_FEATURE 2 +#define GUC_CTL_ENABLE_SLPC BIT(2) +#define GUC_CTL_DISABLE_SCHEDULER BIT(14) + +#define GUC_CTL_DEBUG 3 +#define GUC_LOG_VERBOSITY_SHIFT 0 +#define GUC_LOG_VERBOSITY_LOW (0 << GUC_LOG_VERBOSITY_SHIFT) +#define GUC_LOG_VERBOSITY_MED (1 << GUC_LOG_VERBOSITY_SHIFT) +#define GUC_LOG_VERBOSITY_HIGH (2 << GUC_LOG_VERBOSITY_SHIFT) +#define GUC_LOG_VERBOSITY_ULTRA (3 << GUC_LOG_VERBOSITY_SHIFT) +#define GUC_LOG_VERBOSITY_MIN 0 +#define GUC_LOG_VERBOSITY_MAX 3 +#define GUC_LOG_VERBOSITY_MASK 0x0000000f +#define GUC_LOG_DESTINATION_MASK (3 << 4) +#define GUC_LOG_DISABLED (1 << 6) +#define GUC_PROFILE_ENABLED (1 << 7) + +#define GUC_CTL_ADS 4 +#define GUC_ADS_ADDR_SHIFT 1 +#define GUC_ADS_ADDR_MASK (0xFFFFF << GUC_ADS_ADDR_SHIFT) + +#define GUC_CTL_DEVID 5 + +#define GUC_CTL_MAX_DWORDS 14 + +/* Scheduling policy settings */ + +#define GLOBAL_POLICY_MAX_NUM_WI 15 + +/* Don't reset an engine upon preemption failure */ +#define GLOBAL_POLICY_DISABLE_ENGINE_RESET BIT(0) + +#define GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US 500000 + +struct guc_policies { + u32 submission_queue_depth[GUC_MAX_ENGINE_CLASSES]; + /* In micro seconds. How much time to allow before DPC processing is + * called back via interrupt (to prevent DPC queue drain starving). + * Typically 1000s of micro seconds (example only, not granularity). */ + u32 dpc_promote_time; + + /* Must be set to take these new values. */ + u32 is_valid; + + /* Max number of WIs to process per call. A large value may keep CS + * idle. */ + u32 max_num_work_items; + + u32 global_flags; + u32 reserved[4]; +} __packed; + +/* GuC MMIO reg state struct */ +struct guc_mmio_reg { + u32 offset; + u32 value; + u32 flags; + u32 mask; +#define GUC_REGSET_MASKED BIT(0) +#define GUC_REGSET_MASKED_WITH_VALUE BIT(2) +#define GUC_REGSET_RESTORE_ONLY BIT(3) +} __packed; + +/* GuC register sets */ +struct guc_mmio_reg_set { + u32 address; + u16 count; + u16 reserved; +} __packed; + +/* Generic GT SysInfo data types */ +#define GUC_GENERIC_GT_SYSINFO_SLICE_ENABLED 0 +#define GUC_GENERIC_GT_SYSINFO_VDBOX_SFC_SUPPORT_MASK 1 +#define GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI 2 +#define GUC_GENERIC_GT_SYSINFO_MAX 16 + +/* HW info */ +struct guc_gt_system_info { + u8 mapping_table[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS]; + u32 engine_enabled_masks[GUC_MAX_ENGINE_CLASSES]; + u32 generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_MAX]; +} __packed; + +enum { + GUC_CAPTURE_LIST_INDEX_PF = 0, + GUC_CAPTURE_LIST_INDEX_VF = 1, + GUC_CAPTURE_LIST_INDEX_MAX = 2, +}; + +/* GuC Additional Data Struct */ +struct guc_ads { + struct guc_mmio_reg_set reg_state_list[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS]; + u32 reserved0; + u32 scheduler_policies; + u32 gt_system_info; + u32 reserved1; + u32 control_data; + u32 golden_context_lrca[GUC_MAX_ENGINE_CLASSES]; + u32 eng_state_size[GUC_MAX_ENGINE_CLASSES]; + u32 private_data; + u32 um_init_data; + u32 capture_instance[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES]; + u32 capture_class[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES]; + u32 capture_global[GUC_CAPTURE_LIST_INDEX_MAX]; + u32 reserved[14]; +} __packed; + +/* Engine usage stats */ +struct guc_engine_usage_record { + u32 current_context_index; + u32 last_switch_in_stamp; + u32 reserved0; + u32 total_runtime; + u32 reserved1[4]; +} __packed; + +struct guc_engine_usage { + struct guc_engine_usage_record engines[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS]; +} __packed; + +/* This action will be programmed in C1BC - SOFT_SCRATCH_15_REG */ +enum xe_guc_recv_message { + XE_GUC_RECV_MSG_CRASH_DUMP_POSTED = BIT(1), + XE_GUC_RECV_MSG_EXCEPTION = BIT(30), +}; + +/* Page fault structures */ +struct access_counter_desc { + u32 dw0; +#define ACCESS_COUNTER_TYPE BIT(0) +#define ACCESS_COUNTER_SUBG_LO GENMASK(31, 1) + + u32 dw1; +#define ACCESS_COUNTER_SUBG_HI BIT(0) +#define ACCESS_COUNTER_RSVD0 GENMASK(2, 1) +#define ACCESS_COUNTER_ENG_INSTANCE GENMASK(8, 3) +#define ACCESS_COUNTER_ENG_CLASS GENMASK(11, 9) +#define ACCESS_COUNTER_ASID GENMASK(31, 12) + + u32 dw2; +#define ACCESS_COUNTER_VFID GENMASK(5, 0) +#define ACCESS_COUNTER_RSVD1 GENMASK(7, 6) +#define ACCESS_COUNTER_GRANULARITY GENMASK(10, 8) +#define ACCESS_COUNTER_RSVD2 GENMASK(16, 11) +#define ACCESS_COUNTER_VIRTUAL_ADDR_RANGE_LO GENMASK(31, 17) + + u32 dw3; +#define ACCESS_COUNTER_VIRTUAL_ADDR_RANGE_HI GENMASK(31, 0) +} __packed; + +enum guc_um_queue_type { + GUC_UM_HW_QUEUE_PAGE_FAULT = 0, + GUC_UM_HW_QUEUE_PAGE_FAULT_RESPONSE, + GUC_UM_HW_QUEUE_ACCESS_COUNTER, + GUC_UM_HW_QUEUE_MAX +}; + +struct guc_um_queue_params { + u64 base_dpa; + u32 base_ggtt_address; + u32 size_in_bytes; + u32 rsvd[4]; +} __packed; + +struct guc_um_init_params { + u64 page_response_timeout_in_us; + u32 rsvd[6]; + struct guc_um_queue_params queue_params[GUC_UM_HW_QUEUE_MAX]; +} __packed; + +enum xe_guc_fault_reply_type { + PFR_ACCESS = 0, + PFR_ENGINE, + PFR_VFID, + PFR_ALL, + PFR_INVALID +}; + +enum xe_guc_response_desc_type { + TLB_INVALIDATION_DESC = 0, + FAULT_RESPONSE_DESC +}; + +struct xe_guc_pagefault_desc { + u32 dw0; +#define PFD_FAULT_LEVEL GENMASK(2, 0) +#define PFD_SRC_ID GENMASK(10, 3) +#define PFD_RSVD_0 GENMASK(17, 11) +#define XE2_PFD_TRVA_FAULT BIT(18) +#define PFD_ENG_INSTANCE GENMASK(24, 19) +#define PFD_ENG_CLASS GENMASK(27, 25) +#define PFD_PDATA_LO GENMASK(31, 28) + + u32 dw1; +#define PFD_PDATA_HI GENMASK(11, 0) +#define PFD_PDATA_HI_SHIFT 4 +#define PFD_ASID GENMASK(31, 12) + + u32 dw2; +#define PFD_ACCESS_TYPE GENMASK(1, 0) +#define PFD_FAULT_TYPE GENMASK(3, 2) +#define PFD_VFID GENMASK(9, 4) +#define PFD_RSVD_1 GENMASK(11, 10) +#define PFD_VIRTUAL_ADDR_LO GENMASK(31, 12) +#define PFD_VIRTUAL_ADDR_LO_SHIFT 12 + + u32 dw3; +#define PFD_VIRTUAL_ADDR_HI GENMASK(31, 0) +#define PFD_VIRTUAL_ADDR_HI_SHIFT 32 +} __packed; + +struct xe_guc_pagefault_reply { + u32 dw0; +#define PFR_VALID BIT(0) +#define PFR_SUCCESS BIT(1) +#define PFR_REPLY GENMASK(4, 2) +#define PFR_RSVD_0 GENMASK(9, 5) +#define PFR_DESC_TYPE GENMASK(11, 10) +#define PFR_ASID GENMASK(31, 12) + + u32 dw1; +#define PFR_VFID GENMASK(5, 0) +#define PFR_RSVD_1 BIT(6) +#define PFR_ENG_INSTANCE GENMASK(12, 7) +#define PFR_ENG_CLASS GENMASK(15, 13) +#define PFR_PDATA GENMASK(31, 16) + + u32 dw2; +#define PFR_RSVD_2 GENMASK(31, 0) +} __packed; + +struct xe_guc_acc_desc { + u32 dw0; +#define ACC_TYPE BIT(0) +#define ACC_TRIGGER 0 +#define ACC_NOTIFY 1 +#define ACC_SUBG_LO GENMASK(31, 1) + + u32 dw1; +#define ACC_SUBG_HI BIT(0) +#define ACC_RSVD0 GENMASK(2, 1) +#define ACC_ENG_INSTANCE GENMASK(8, 3) +#define ACC_ENG_CLASS GENMASK(11, 9) +#define ACC_ASID GENMASK(31, 12) + + u32 dw2; +#define ACC_VFID GENMASK(5, 0) +#define ACC_RSVD1 GENMASK(7, 6) +#define ACC_GRANULARITY GENMASK(10, 8) +#define ACC_RSVD2 GENMASK(16, 11) +#define ACC_VIRTUAL_ADDR_RANGE_LO GENMASK(31, 17) + + u32 dw3; +#define ACC_VIRTUAL_ADDR_RANGE_HI GENMASK(31, 0) +} __packed; + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.c b/drivers/gpu/drm/xe/xe_guc_hwconfig.c new file mode 100644 index 000000000000..8dfd48f71a7c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.c @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_guc.h" +#include "xe_guc_hwconfig.h" +#include "xe_map.h" + +static struct xe_gt * +guc_to_gt(struct xe_guc *guc) +{ + return container_of(guc, struct xe_gt, uc.guc); +} + +static struct xe_device * +guc_to_xe(struct xe_guc *guc) +{ + return gt_to_xe(guc_to_gt(guc)); +} + +static int send_get_hwconfig(struct xe_guc *guc, u32 ggtt_addr, u32 size) +{ + u32 action[] = { + XE_GUC_ACTION_GET_HWCONFIG, + lower_32_bits(ggtt_addr), + upper_32_bits(ggtt_addr), + size, + }; + + return xe_guc_send_mmio(guc, action, ARRAY_SIZE(action)); +} + +static int guc_hwconfig_size(struct xe_guc *guc, u32 *size) +{ + int ret = send_get_hwconfig(guc, 0, 0); + + if (ret < 0) + return ret; + + *size = ret; + return 0; +} + +static int guc_hwconfig_copy(struct xe_guc *guc) +{ + int ret = send_get_hwconfig(guc, xe_bo_ggtt_addr(guc->hwconfig.bo), + guc->hwconfig.size); + + if (ret < 0) + return ret; + + return 0; +} + +static void guc_hwconfig_fini(struct drm_device *drm, void *arg) +{ + struct xe_guc *guc = arg; + + xe_bo_unpin_map_no_vm(guc->hwconfig.bo); +} + +int xe_guc_hwconfig_init(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + struct xe_bo *bo; + u32 size; + int err; + + /* Initialization already done */ + if (guc->hwconfig.bo) + return 0; + + /* + * All hwconfig the same across GTs so only GT0 needs to be configured + */ + if (gt->info.id != XE_GT0) + return 0; + + /* ADL_P, DG2+ supports hwconfig table */ + if (GRAPHICS_VERx100(xe) < 1255 && xe->info.platform != XE_ALDERLAKE_P) + return 0; + + err = guc_hwconfig_size(guc, &size); + if (err) + return err; + if (!size) + return -EINVAL; + + bo = xe_bo_create_pin_map(xe, gt, NULL, PAGE_ALIGN(size), + ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(gt) | + XE_BO_CREATE_GGTT_BIT); + if (IS_ERR(bo)) + return PTR_ERR(bo); + guc->hwconfig.bo = bo; + guc->hwconfig.size = size; + + err = drmm_add_action_or_reset(&xe->drm, guc_hwconfig_fini, guc); + if (err) + return err; + + return guc_hwconfig_copy(guc); +} + +u32 xe_guc_hwconfig_size(struct xe_guc *guc) +{ + return !guc->hwconfig.bo ? 0 : guc->hwconfig.size; +} + +void xe_guc_hwconfig_copy(struct xe_guc *guc, void *dst) +{ + struct xe_device *xe = guc_to_xe(guc); + + XE_BUG_ON(!guc->hwconfig.bo); + + xe_map_memcpy_from(xe, dst, &guc->hwconfig.bo->vmap, 0, + guc->hwconfig.size); +} diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.h b/drivers/gpu/drm/xe/xe_guc_hwconfig.h new file mode 100644 index 000000000000..b5794d641900 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_HWCONFIG_H_ +#define _XE_GUC_HWCONFIG_H_ + +#include + +struct xe_guc; + +int xe_guc_hwconfig_init(struct xe_guc *guc); +u32 xe_guc_hwconfig_size(struct xe_guc *guc); +void xe_guc_hwconfig_copy(struct xe_guc *guc, void *dst); + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c new file mode 100644 index 000000000000..7ec1b2bb1f8e --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_log.c @@ -0,0 +1,109 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include + +#include "xe_bo.h" +#include "xe_gt.h" +#include "xe_guc_log.h" +#include "xe_map.h" +#include "xe_module.h" + +static struct xe_gt * +log_to_gt(struct xe_guc_log *log) +{ + return container_of(log, struct xe_gt, uc.guc.log); +} + +static struct xe_device * +log_to_xe(struct xe_guc_log *log) +{ + return gt_to_xe(log_to_gt(log)); +} + +static size_t guc_log_size(void) +{ + /* + * GuC Log buffer Layout + * + * +===============================+ 00B + * | Crash dump state header | + * +-------------------------------+ 32B + * | Debug state header | + * +-------------------------------+ 64B + * | Capture state header | + * +-------------------------------+ 96B + * | | + * +===============================+ PAGE_SIZE (4KB) + * | Crash Dump logs | + * +===============================+ + CRASH_SIZE + * | Debug logs | + * +===============================+ + DEBUG_SIZE + * | Capture logs | + * +===============================+ + CAPTURE_SIZE + */ + return PAGE_SIZE + CRASH_BUFFER_SIZE + DEBUG_BUFFER_SIZE + + CAPTURE_BUFFER_SIZE; +} + +void xe_guc_log_print(struct xe_guc_log *log, struct drm_printer *p) +{ + struct xe_device *xe = log_to_xe(log); + size_t size; + int i, j; + + XE_BUG_ON(!log->bo); + + size = log->bo->size; + +#define DW_PER_READ 128 + XE_BUG_ON(size % (DW_PER_READ * sizeof(u32))); + for (i = 0; i < size / sizeof(u32); i += DW_PER_READ) { + u32 read[DW_PER_READ]; + + xe_map_memcpy_from(xe, read, &log->bo->vmap, i * sizeof(u32), + DW_PER_READ * sizeof(u32)); +#define DW_PER_PRINT 4 + for (j = 0; j < DW_PER_READ / DW_PER_PRINT; ++j) { + u32 *print = read + j * DW_PER_PRINT; + + drm_printf(p, "0x%08x 0x%08x 0x%08x 0x%08x\n", + *(print + 0), *(print + 1), + *(print + 2), *(print + 3)); + } + } +} + +static void guc_log_fini(struct drm_device *drm, void *arg) +{ + struct xe_guc_log *log = arg; + + xe_bo_unpin_map_no_vm(log->bo); +} + +int xe_guc_log_init(struct xe_guc_log *log) +{ + struct xe_device *xe = log_to_xe(log); + struct xe_gt *gt = log_to_gt(log); + struct xe_bo *bo; + int err; + + bo = xe_bo_create_pin_map(xe, gt, NULL, guc_log_size(), + ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(gt) | + XE_BO_CREATE_GGTT_BIT); + if (IS_ERR(bo)) + return PTR_ERR(bo); + + xe_map_memset(xe, &bo->vmap, 0, 0, guc_log_size()); + log->bo = bo; + log->level = xe_guc_log_level; + + err = drmm_add_action_or_reset(&xe->drm, guc_log_fini, log); + if (err) + return err; + + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_guc_log.h b/drivers/gpu/drm/xe/xe_guc_log.h new file mode 100644 index 000000000000..2d25ab28b4b3 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_log.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_LOG_H_ +#define _XE_GUC_LOG_H_ + +#include "xe_guc_log_types.h" + +struct drm_printer; + +#if IS_ENABLED(CONFIG_DRM_XE_LARGE_GUC_BUFFER) +#define CRASH_BUFFER_SIZE SZ_1M +#define DEBUG_BUFFER_SIZE SZ_8M +#define CAPTURE_BUFFER_SIZE SZ_2M +#else +#define CRASH_BUFFER_SIZE SZ_8K +#define DEBUG_BUFFER_SIZE SZ_64K +#define CAPTURE_BUFFER_SIZE SZ_16K +#endif +/* + * While we're using plain log level in i915, GuC controls are much more... + * "elaborate"? We have a couple of bits for verbosity, separate bit for actual + * log enabling, and separate bit for default logging - which "conveniently" + * ignores the enable bit. + */ +#define GUC_LOG_LEVEL_DISABLED 0 +#define GUC_LOG_LEVEL_NON_VERBOSE 1 +#define GUC_LOG_LEVEL_IS_ENABLED(x) ((x) > GUC_LOG_LEVEL_DISABLED) +#define GUC_LOG_LEVEL_IS_VERBOSE(x) ((x) > GUC_LOG_LEVEL_NON_VERBOSE) +#define GUC_LOG_LEVEL_TO_VERBOSITY(x) ({ \ + typeof(x) _x = (x); \ + GUC_LOG_LEVEL_IS_VERBOSE(_x) ? _x - 2 : 0; \ +}) +#define GUC_VERBOSITY_TO_LOG_LEVEL(x) ((x) + 2) +#define GUC_LOG_LEVEL_MAX GUC_VERBOSITY_TO_LOG_LEVEL(GUC_LOG_VERBOSITY_MAX) + +int xe_guc_log_init(struct xe_guc_log *log); +void xe_guc_log_print(struct xe_guc_log *log, struct drm_printer *p); + +static inline u32 +xe_guc_log_get_level(struct xe_guc_log *log) +{ + return log->level; +} + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_log_types.h b/drivers/gpu/drm/xe/xe_guc_log_types.h new file mode 100644 index 000000000000..125080d138a7 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_log_types.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_LOG_TYPES_H_ +#define _XE_GUC_LOG_TYPES_H_ + +#include + +struct xe_bo; + +/** + * struct xe_guc_log - GuC log + */ +struct xe_guc_log { + /** @level: GuC log level */ + u32 level; + /** @bo: XE BO for GuC log */ + struct xe_bo *bo; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c new file mode 100644 index 000000000000..227e30a482e3 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -0,0 +1,843 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_gt_types.h" +#include "xe_gt_sysfs.h" +#include "xe_guc_ct.h" +#include "xe_map.h" +#include "xe_mmio.h" +#include "xe_pcode.h" +#include "i915_reg_defs.h" +#include "i915_reg.h" + +#include "intel_mchbar_regs.h" + +/* For GEN6_RP_STATE_CAP.reg to be merged when the definition moves to Xe */ +#define RP0_MASK REG_GENMASK(7, 0) +#define RP1_MASK REG_GENMASK(15, 8) +#define RPN_MASK REG_GENMASK(23, 16) + +#define GEN10_FREQ_INFO_REC _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5ef0) +#define RPE_MASK REG_GENMASK(15, 8) + +#include "gt/intel_gt_regs.h" +/* For GEN6_RPNSWREQ.reg to be merged when the definition moves to Xe */ +#define REQ_RATIO_MASK REG_GENMASK(31, 23) + +/* For GEN6_GT_CORE_STATUS.reg to be merged when the definition moves to Xe */ +#define RCN_MASK REG_GENMASK(2, 0) + +#define GEN12_RPSTAT1 _MMIO(0x1381b4) +#define GEN12_CAGF_MASK REG_GENMASK(19, 11) + +#define GT_FREQUENCY_MULTIPLIER 50 +#define GEN9_FREQ_SCALER 3 + +/** + * DOC: GuC Power Conservation (PC) + * + * GuC Power Conservation (PC) supports multiple features for the most + * efficient and performing use of the GT when GuC submission is enabled, + * including frequency management, Render-C states management, and various + * algorithms for power balancing. + * + * Single Loop Power Conservation (SLPC) is the name given to the suite of + * connected power conservation features in the GuC firmware. The firmware + * exposes a programming interface to the host for the control of SLPC. + * + * Frequency management: + * ===================== + * + * Xe driver enables SLPC with all of its defaults features and frequency + * selection, which varies per platform. + * Xe's GuC PC provides a sysfs API for frequency management: + * + * device/gt#/freq_* *read-only* files: + * - freq_act: The actual resolved frequency decided by PCODE. + * - freq_cur: The current one requested by GuC PC to the Hardware. + * - freq_rpn: The Render Performance (RP) N level, which is the minimal one. + * - freq_rpe: The Render Performance (RP) E level, which is the efficient one. + * - freq_rp0: The Render Performance (RP) 0 level, which is the maximum one. + * + * device/gt#/freq_* *read-write* files: + * - freq_min: GuC PC min request. + * - freq_max: GuC PC max request. + * If max <= min, then freq_min becomes a fixed frequency request. + * + * Render-C States: + * ================ + * + * Render-C states is also a GuC PC feature that is now enabled in Xe for + * all platforms. + * Xe's GuC PC provides a sysfs API for Render-C States: + * + * device/gt#/rc* *read-only* files: + * - rc_status: Provide the actual immediate status of Render-C: (rc0 or rc6) + * - rc6_residency: Provide the rc6_residency counter in units of 1.28 uSec. + * Prone to overflows. + */ + +static struct xe_guc * +pc_to_guc(struct xe_guc_pc *pc) +{ + return container_of(pc, struct xe_guc, pc); +} + +static struct xe_device * +pc_to_xe(struct xe_guc_pc *pc) +{ + struct xe_guc *guc = pc_to_guc(pc); + struct xe_gt *gt = container_of(guc, struct xe_gt, uc.guc); + + return gt_to_xe(gt); +} + +static struct xe_gt * +pc_to_gt(struct xe_guc_pc *pc) +{ + return container_of(pc, struct xe_gt, uc.guc.pc); +} + +static struct xe_guc_pc * +dev_to_pc(struct device *dev) +{ + return &kobj_to_gt(&dev->kobj)->uc.guc.pc; +} + +static struct iosys_map * +pc_to_maps(struct xe_guc_pc *pc) +{ + return &pc->bo->vmap; +} + +#define slpc_shared_data_read(pc_, field_) \ + xe_map_rd_field(pc_to_xe(pc_), pc_to_maps(pc_), 0, \ + struct slpc_shared_data, field_) + +#define slpc_shared_data_write(pc_, field_, val_) \ + xe_map_wr_field(pc_to_xe(pc_), pc_to_maps(pc_), 0, \ + struct slpc_shared_data, field_, val_) + +#define SLPC_EVENT(id, count) \ + (FIELD_PREP(HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ID, id) | \ + FIELD_PREP(HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC, count)) + +static bool pc_is_in_state(struct xe_guc_pc *pc, enum slpc_global_state state) +{ + xe_device_assert_mem_access(pc_to_xe(pc)); + return slpc_shared_data_read(pc, header.global_state) == state; +} + +static int pc_action_reset(struct xe_guc_pc *pc) +{ + struct xe_guc_ct *ct = &pc_to_guc(pc)->ct; + int ret; + u32 action[] = { + GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, + SLPC_EVENT(SLPC_EVENT_RESET, 2), + xe_bo_ggtt_addr(pc->bo), + 0, + }; + + ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); + if (ret) + drm_err(&pc_to_xe(pc)->drm, "GuC PC reset: %pe", ERR_PTR(ret)); + + return ret; +} + +static int pc_action_shutdown(struct xe_guc_pc *pc) +{ + struct xe_guc_ct *ct = &pc_to_guc(pc)->ct; + int ret; + u32 action[] = { + GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, + SLPC_EVENT(SLPC_EVENT_SHUTDOWN, 2), + xe_bo_ggtt_addr(pc->bo), + 0, + }; + + ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); + if (ret) + drm_err(&pc_to_xe(pc)->drm, "GuC PC shutdown %pe", + ERR_PTR(ret)); + + return ret; +} + +static int pc_action_query_task_state(struct xe_guc_pc *pc) +{ + struct xe_guc_ct *ct = &pc_to_guc(pc)->ct; + int ret; + u32 action[] = { + GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, + SLPC_EVENT(SLPC_EVENT_QUERY_TASK_STATE, 2), + xe_bo_ggtt_addr(pc->bo), + 0, + }; + + if (!pc_is_in_state(pc, SLPC_GLOBAL_STATE_RUNNING)) + return -EAGAIN; + + /* Blocking here to ensure the results are ready before reading them */ + ret = xe_guc_ct_send_block(ct, action, ARRAY_SIZE(action)); + if (ret) + drm_err(&pc_to_xe(pc)->drm, + "GuC PC query task state failed: %pe", ERR_PTR(ret)); + + return ret; +} + +static int pc_action_set_param(struct xe_guc_pc *pc, u8 id, u32 value) +{ + struct xe_guc_ct *ct = &pc_to_guc(pc)->ct; + int ret; + u32 action[] = { + GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, + SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2), + id, + value, + }; + + if (!pc_is_in_state(pc, SLPC_GLOBAL_STATE_RUNNING)) + return -EAGAIN; + + ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); + if (ret) + drm_err(&pc_to_xe(pc)->drm, "GuC PC set param failed: %pe", + ERR_PTR(ret)); + + return ret; +} + +static int pc_action_setup_gucrc(struct xe_guc_pc *pc, u32 mode) +{ + struct xe_guc_ct *ct = &pc_to_guc(pc)->ct; + u32 action[] = { + XE_GUC_ACTION_SETUP_PC_GUCRC, + mode, + }; + int ret; + + ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); + if (ret) + drm_err(&pc_to_xe(pc)->drm, "GuC RC enable failed: %pe", + ERR_PTR(ret)); + return ret; +} + +static u32 decode_freq(u32 raw) +{ + return DIV_ROUND_CLOSEST(raw * GT_FREQUENCY_MULTIPLIER, + GEN9_FREQ_SCALER); +} + +static u32 pc_get_min_freq(struct xe_guc_pc *pc) +{ + u32 freq; + + freq = FIELD_GET(SLPC_MIN_UNSLICE_FREQ_MASK, + slpc_shared_data_read(pc, task_state_data.freq)); + + return decode_freq(freq); +} + +static int pc_set_min_freq(struct xe_guc_pc *pc, u32 freq) +{ + /* + * Let's only check for the rpn-rp0 range. If max < min, + * min becomes a fixed request. + */ + if (freq < pc->rpn_freq || freq > pc->rp0_freq) + return -EINVAL; + + /* + * GuC policy is to elevate minimum frequency to the efficient levels + * Our goal is to have the admin choices respected. + */ + pc_action_set_param(pc, SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY, + freq < pc->rpe_freq); + + return pc_action_set_param(pc, + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, + freq); +} + +static int pc_get_max_freq(struct xe_guc_pc *pc) +{ + u32 freq; + + freq = FIELD_GET(SLPC_MAX_UNSLICE_FREQ_MASK, + slpc_shared_data_read(pc, task_state_data.freq)); + + return decode_freq(freq); +} + +static int pc_set_max_freq(struct xe_guc_pc *pc, u32 freq) +{ + /* + * Let's only check for the rpn-rp0 range. If max < min, + * min becomes a fixed request. + * Also, overclocking is not supported. + */ + if (freq < pc->rpn_freq || freq > pc->rp0_freq) + return -EINVAL; + + return pc_action_set_param(pc, + SLPC_PARAM_GLOBAL_MAX_GT_UNSLICE_FREQ_MHZ, + freq); +} + +static void pc_update_rp_values(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + struct xe_device *xe = gt_to_xe(gt); + u32 reg; + + /* + * For PVC we still need to use fused RP1 as the approximation for RPe + * For other platforms than PVC we get the resolved RPe directly from + * PCODE at a different register + */ + if (xe->info.platform == XE_PVC) + reg = xe_mmio_read32(gt, PVC_RP_STATE_CAP.reg); + else + reg = xe_mmio_read32(gt, GEN10_FREQ_INFO_REC.reg); + + pc->rpe_freq = REG_FIELD_GET(RPE_MASK, reg) * GT_FREQUENCY_MULTIPLIER; + + /* + * RPe is decided at runtime by PCODE. In the rare case where that's + * smaller than the fused min, we will trust the PCODE and use that + * as our minimum one. + */ + pc->rpn_freq = min(pc->rpn_freq, pc->rpe_freq); +} + +static ssize_t freq_act_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct kobject *kobj = &dev->kobj; + struct xe_gt *gt = kobj_to_gt(kobj); + u32 freq; + ssize_t ret; + + /* + * When in RC6, actual frequency is 0. Let's block RC6 so we are able + * to verify that our freq requests are really happening. + */ + ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (ret) + return ret; + + xe_device_mem_access_get(gt_to_xe(gt)); + freq = xe_mmio_read32(gt, GEN12_RPSTAT1.reg); + xe_device_mem_access_put(gt_to_xe(gt)); + + freq = REG_FIELD_GET(GEN12_CAGF_MASK, freq); + ret = sysfs_emit(buf, "%d\n", decode_freq(freq)); + + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + return ret; +} +static DEVICE_ATTR_RO(freq_act); + +static ssize_t freq_cur_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct kobject *kobj = &dev->kobj; + struct xe_gt *gt = kobj_to_gt(kobj); + u32 freq; + ssize_t ret; + + /* + * GuC SLPC plays with cur freq request when GuCRC is enabled + * Block RC6 for a more reliable read. + */ + ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (ret) + return ret; + + xe_device_mem_access_get(gt_to_xe(gt)); + freq = xe_mmio_read32(gt, GEN6_RPNSWREQ.reg); + xe_device_mem_access_put(gt_to_xe(gt)); + + freq = REG_FIELD_GET(REQ_RATIO_MASK, freq); + ret = sysfs_emit(buf, "%d\n", decode_freq(freq)); + + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + return ret; +} +static DEVICE_ATTR_RO(freq_cur); + +static ssize_t freq_rp0_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct xe_guc_pc *pc = dev_to_pc(dev); + + return sysfs_emit(buf, "%d\n", pc->rp0_freq); +} +static DEVICE_ATTR_RO(freq_rp0); + +static ssize_t freq_rpe_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct xe_guc_pc *pc = dev_to_pc(dev); + + pc_update_rp_values(pc); + return sysfs_emit(buf, "%d\n", pc->rpe_freq); +} +static DEVICE_ATTR_RO(freq_rpe); + +static ssize_t freq_rpn_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct xe_guc_pc *pc = dev_to_pc(dev); + + return sysfs_emit(buf, "%d\n", pc->rpn_freq); +} +static DEVICE_ATTR_RO(freq_rpn); + +static ssize_t freq_min_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct xe_guc_pc *pc = dev_to_pc(dev); + struct xe_gt *gt = pc_to_gt(pc); + ssize_t ret; + + xe_device_mem_access_get(pc_to_xe(pc)); + mutex_lock(&pc->freq_lock); + if (!pc->freq_ready) { + /* Might be in the middle of a gt reset */ + ret = -EAGAIN; + goto out; + } + + /* + * GuC SLPC plays with min freq request when GuCRC is enabled + * Block RC6 for a more reliable read. + */ + ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (ret) + goto out; + + ret = pc_action_query_task_state(pc); + if (ret) + goto fw; + + ret = sysfs_emit(buf, "%d\n", pc_get_min_freq(pc)); + +fw: + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); +out: + mutex_unlock(&pc->freq_lock); + xe_device_mem_access_put(pc_to_xe(pc)); + return ret; +} + +static ssize_t freq_min_store(struct device *dev, struct device_attribute *attr, + const char *buff, size_t count) +{ + struct xe_guc_pc *pc = dev_to_pc(dev); + u32 freq; + ssize_t ret; + + ret = kstrtou32(buff, 0, &freq); + if (ret) + return ret; + + xe_device_mem_access_get(pc_to_xe(pc)); + mutex_lock(&pc->freq_lock); + if (!pc->freq_ready) { + /* Might be in the middle of a gt reset */ + ret = -EAGAIN; + goto out; + } + + ret = pc_set_min_freq(pc, freq); + if (ret) + goto out; + + pc->user_requested_min = freq; + +out: + mutex_unlock(&pc->freq_lock); + xe_device_mem_access_put(pc_to_xe(pc)); + return ret ?: count; +} +static DEVICE_ATTR_RW(freq_min); + +static ssize_t freq_max_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct xe_guc_pc *pc = dev_to_pc(dev); + ssize_t ret; + + xe_device_mem_access_get(pc_to_xe(pc)); + mutex_lock(&pc->freq_lock); + if (!pc->freq_ready) { + /* Might be in the middle of a gt reset */ + ret = -EAGAIN; + goto out; + } + + ret = pc_action_query_task_state(pc); + if (ret) + goto out; + + ret = sysfs_emit(buf, "%d\n", pc_get_max_freq(pc)); + +out: + mutex_unlock(&pc->freq_lock); + xe_device_mem_access_put(pc_to_xe(pc)); + return ret; +} + +static ssize_t freq_max_store(struct device *dev, struct device_attribute *attr, + const char *buff, size_t count) +{ + struct xe_guc_pc *pc = dev_to_pc(dev); + u32 freq; + ssize_t ret; + + ret = kstrtou32(buff, 0, &freq); + if (ret) + return ret; + + xe_device_mem_access_get(pc_to_xe(pc)); + mutex_lock(&pc->freq_lock); + if (!pc->freq_ready) { + /* Might be in the middle of a gt reset */ + ret = -EAGAIN; + goto out; + } + + ret = pc_set_max_freq(pc, freq); + if (ret) + goto out; + + pc->user_requested_max = freq; + +out: + mutex_unlock(&pc->freq_lock); + xe_device_mem_access_put(pc_to_xe(pc)); + return ret ?: count; +} +static DEVICE_ATTR_RW(freq_max); + +static ssize_t rc_status_show(struct device *dev, + struct device_attribute *attr, char *buff) +{ + struct xe_guc_pc *pc = dev_to_pc(dev); + struct xe_gt *gt = pc_to_gt(pc); + u32 reg; + + xe_device_mem_access_get(gt_to_xe(gt)); + reg = xe_mmio_read32(gt, GEN6_GT_CORE_STATUS.reg); + xe_device_mem_access_put(gt_to_xe(gt)); + + switch (REG_FIELD_GET(RCN_MASK, reg)) { + case GEN6_RC6: + return sysfs_emit(buff, "rc6\n"); + case GEN6_RC0: + return sysfs_emit(buff, "rc0\n"); + default: + return -ENOENT; + } +} +static DEVICE_ATTR_RO(rc_status); + +static ssize_t rc6_residency_show(struct device *dev, + struct device_attribute *attr, char *buff) +{ + struct xe_guc_pc *pc = dev_to_pc(dev); + struct xe_gt *gt = pc_to_gt(pc); + u32 reg; + ssize_t ret; + + ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (ret) + return ret; + + xe_device_mem_access_get(pc_to_xe(pc)); + reg = xe_mmio_read32(gt, GEN6_GT_GFX_RC6.reg); + xe_device_mem_access_put(pc_to_xe(pc)); + + ret = sysfs_emit(buff, "%u\n", reg); + + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + return ret; +} +static DEVICE_ATTR_RO(rc6_residency); + +static const struct attribute *pc_attrs[] = { + &dev_attr_freq_act.attr, + &dev_attr_freq_cur.attr, + &dev_attr_freq_rp0.attr, + &dev_attr_freq_rpe.attr, + &dev_attr_freq_rpn.attr, + &dev_attr_freq_min.attr, + &dev_attr_freq_max.attr, + &dev_attr_rc_status.attr, + &dev_attr_rc6_residency.attr, + NULL +}; + +static void pc_init_fused_rp_values(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + struct xe_device *xe = gt_to_xe(gt); + u32 reg; + + xe_device_assert_mem_access(pc_to_xe(pc)); + + if (xe->info.platform == XE_PVC) + reg = xe_mmio_read32(gt, PVC_RP_STATE_CAP.reg); + else + reg = xe_mmio_read32(gt, GEN6_RP_STATE_CAP.reg); + pc->rp0_freq = REG_FIELD_GET(RP0_MASK, reg) * GT_FREQUENCY_MULTIPLIER; + pc->rpn_freq = REG_FIELD_GET(RPN_MASK, reg) * GT_FREQUENCY_MULTIPLIER; +} + +static int pc_adjust_freq_bounds(struct xe_guc_pc *pc) +{ + int ret; + + lockdep_assert_held(&pc->freq_lock); + + ret = pc_action_query_task_state(pc); + if (ret) + return ret; + + /* + * GuC defaults to some RPmax that is not actually achievable without + * overclocking. Let's adjust it to the Hardware RP0, which is the + * regular maximum + */ + if (pc_get_max_freq(pc) > pc->rp0_freq) + pc_set_max_freq(pc, pc->rp0_freq); + + /* + * Same thing happens for Server platforms where min is listed as + * RPMax + */ + if (pc_get_min_freq(pc) > pc->rp0_freq) + pc_set_min_freq(pc, pc->rp0_freq); + + return 0; +} + +static int pc_adjust_requested_freq(struct xe_guc_pc *pc) +{ + int ret = 0; + + lockdep_assert_held(&pc->freq_lock); + + if (pc->user_requested_min != 0) { + ret = pc_set_min_freq(pc, pc->user_requested_min); + if (ret) + return ret; + } + + if (pc->user_requested_max != 0) { + ret = pc_set_max_freq(pc, pc->user_requested_max); + if (ret) + return ret; + } + + return ret; +} + +static int pc_gucrc_disable(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + int ret; + + xe_device_assert_mem_access(pc_to_xe(pc)); + + ret = pc_action_setup_gucrc(pc, XE_GUCRC_HOST_CONTROL); + if (ret) + return ret; + + ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (ret) + return ret; + + xe_mmio_write32(gt, GEN9_PG_ENABLE.reg, 0); + xe_mmio_write32(gt, GEN6_RC_CONTROL.reg, 0); + xe_mmio_write32(gt, GEN6_RC_STATE.reg, 0); + + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + return 0; +} + +static void pc_init_pcode_freq(struct xe_guc_pc *pc) +{ + u32 min = DIV_ROUND_CLOSEST(pc->rpn_freq, GT_FREQUENCY_MULTIPLIER); + u32 max = DIV_ROUND_CLOSEST(pc->rp0_freq, GT_FREQUENCY_MULTIPLIER); + + XE_WARN_ON(xe_pcode_init_min_freq_table(pc_to_gt(pc), min, max)); +} + +static int pc_init_freqs(struct xe_guc_pc *pc) +{ + int ret; + + mutex_lock(&pc->freq_lock); + + ret = pc_adjust_freq_bounds(pc); + if (ret) + goto out; + + ret = pc_adjust_requested_freq(pc); + if (ret) + goto out; + + pc_update_rp_values(pc); + + pc_init_pcode_freq(pc); + + /* + * The frequencies are really ready for use only after the user + * requested ones got restored. + */ + pc->freq_ready = true; + +out: + mutex_unlock(&pc->freq_lock); + return ret; +} + +/** + * xe_guc_pc_start - Start GuC's Power Conservation component + * @pc: Xe_GuC_PC instance + */ +int xe_guc_pc_start(struct xe_guc_pc *pc) +{ + struct xe_device *xe = pc_to_xe(pc); + struct xe_gt *gt = pc_to_gt(pc); + u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data)); + int ret; + + XE_WARN_ON(!xe_device_guc_submission_enabled(xe)); + + xe_device_mem_access_get(pc_to_xe(pc)); + + memset(pc->bo->vmap.vaddr, 0, size); + slpc_shared_data_write(pc, header.size, size); + + ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (ret) + return ret; + + ret = pc_action_reset(pc); + if (ret) + goto out; + + if (wait_for(pc_is_in_state(pc, SLPC_GLOBAL_STATE_RUNNING), 5)) { + drm_err(&pc_to_xe(pc)->drm, "GuC PC Start failed\n"); + ret = -EIO; + goto out; + } + + ret = pc_init_freqs(pc); + if (ret) + goto out; + + if (xe->info.platform == XE_PVC) { + pc_gucrc_disable(pc); + ret = 0; + goto out; + } + + ret = pc_action_setup_gucrc(pc, XE_GUCRC_FIRMWARE_CONTROL); + +out: + xe_device_mem_access_put(pc_to_xe(pc)); + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + return ret; +} + +/** + * xe_guc_pc_stop - Stop GuC's Power Conservation component + * @pc: Xe_GuC_PC instance + */ +int xe_guc_pc_stop(struct xe_guc_pc *pc) +{ + int ret; + + xe_device_mem_access_get(pc_to_xe(pc)); + + ret = pc_gucrc_disable(pc); + if (ret) + goto out; + + mutex_lock(&pc->freq_lock); + pc->freq_ready = false; + mutex_unlock(&pc->freq_lock); + + ret = pc_action_shutdown(pc); + if (ret) + goto out; + + if (wait_for(pc_is_in_state(pc, SLPC_GLOBAL_STATE_NOT_RUNNING), 5)) { + drm_err(&pc_to_xe(pc)->drm, "GuC PC Shutdown failed\n"); + ret = -EIO; + } + +out: + xe_device_mem_access_put(pc_to_xe(pc)); + return ret; +} + +static void pc_fini(struct drm_device *drm, void *arg) +{ + struct xe_guc_pc *pc = arg; + + XE_WARN_ON(xe_guc_pc_stop(pc)); + sysfs_remove_files(pc_to_gt(pc)->sysfs, pc_attrs); + xe_bo_unpin_map_no_vm(pc->bo); +} + +/** + * xe_guc_pc_init - Initialize GuC's Power Conservation component + * @pc: Xe_GuC_PC instance + */ +int xe_guc_pc_init(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + struct xe_device *xe = gt_to_xe(gt); + struct xe_bo *bo; + u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data)); + int err; + + mutex_init(&pc->freq_lock); + + bo = xe_bo_create_pin_map(xe, gt, NULL, size, + ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(gt) | + XE_BO_CREATE_GGTT_BIT); + + if (IS_ERR(bo)) + return PTR_ERR(bo); + + pc->bo = bo; + + pc_init_fused_rp_values(pc); + + err = sysfs_create_files(gt->sysfs, pc_attrs); + if (err) + return err; + + err = drmm_add_action_or_reset(&xe->drm, pc_fini, pc); + if (err) + return err; + + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h new file mode 100644 index 000000000000..da29e4934868 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_pc.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_PC_H_ +#define _XE_GUC_PC_H_ + +#include "xe_guc_pc_types.h" + +int xe_guc_pc_init(struct xe_guc_pc *pc); +int xe_guc_pc_start(struct xe_guc_pc *pc); +int xe_guc_pc_stop(struct xe_guc_pc *pc); + +#endif /* _XE_GUC_PC_H_ */ diff --git a/drivers/gpu/drm/xe/xe_guc_pc_types.h b/drivers/gpu/drm/xe/xe_guc_pc_types.h new file mode 100644 index 000000000000..39548e03acf4 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_pc_types.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_PC_TYPES_H_ +#define _XE_GUC_PC_TYPES_H_ + +#include +#include + +/** + * struct xe_guc_pc - GuC Power Conservation (PC) + */ +struct xe_guc_pc { + /** @bo: GGTT buffer object that is shared with GuC PC */ + struct xe_bo *bo; + /** @rp0_freq: HW RP0 frequency - The Maximum one */ + u32 rp0_freq; + /** @rpe_freq: HW RPe frequency - The Efficient one */ + u32 rpe_freq; + /** @rpn_freq: HW RPN frequency - The Minimum one */ + u32 rpn_freq; + /** @user_requested_min: Stash the minimum requested freq by user */ + u32 user_requested_min; + /** @user_requested_max: Stash the maximum requested freq by user */ + u32 user_requested_max; + /** @freq_lock: Let's protect the frequencies */ + struct mutex freq_lock; + /** @freq_ready: Only handle freq changes, if they are really ready */ + bool freq_ready; +}; + +#endif /* _XE_GUC_PC_TYPES_H_ */ diff --git a/drivers/gpu/drm/xe/xe_guc_reg.h b/drivers/gpu/drm/xe/xe_guc_reg.h new file mode 100644 index 000000000000..1e16a9b76ddc --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_reg.h @@ -0,0 +1,147 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_REG_H_ +#define _XE_GUC_REG_H_ + +#include +#include + +#include "i915_reg.h" + +/* Definitions of GuC H/W registers, bits, etc */ + +#define GUC_STATUS _MMIO(0xc000) +#define GS_RESET_SHIFT 0 +#define GS_MIA_IN_RESET (0x01 << GS_RESET_SHIFT) +#define GS_BOOTROM_SHIFT 1 +#define GS_BOOTROM_MASK (0x7F << GS_BOOTROM_SHIFT) +#define GS_BOOTROM_RSA_FAILED (0x50 << GS_BOOTROM_SHIFT) +#define GS_BOOTROM_JUMP_PASSED (0x76 << GS_BOOTROM_SHIFT) +#define GS_UKERNEL_SHIFT 8 +#define GS_UKERNEL_MASK (0xFF << GS_UKERNEL_SHIFT) +#define GS_MIA_SHIFT 16 +#define GS_MIA_MASK (0x07 << GS_MIA_SHIFT) +#define GS_MIA_CORE_STATE (0x01 << GS_MIA_SHIFT) +#define GS_MIA_HALT_REQUESTED (0x02 << GS_MIA_SHIFT) +#define GS_MIA_ISR_ENTRY (0x04 << GS_MIA_SHIFT) +#define GS_AUTH_STATUS_SHIFT 30 +#define GS_AUTH_STATUS_MASK (0x03 << GS_AUTH_STATUS_SHIFT) +#define GS_AUTH_STATUS_BAD (0x01 << GS_AUTH_STATUS_SHIFT) +#define GS_AUTH_STATUS_GOOD (0x02 << GS_AUTH_STATUS_SHIFT) + +#define SOFT_SCRATCH(n) _MMIO(0xc180 + (n) * 4) +#define SOFT_SCRATCH_COUNT 16 + +#define GEN11_SOFT_SCRATCH(n) _MMIO(0x190240 + (n) * 4) +#define GEN11_SOFT_SCRATCH_COUNT 4 + +#define UOS_RSA_SCRATCH(i) _MMIO(0xc200 + (i) * 4) +#define UOS_RSA_SCRATCH_COUNT 64 + +#define DMA_ADDR_0_LOW _MMIO(0xc300) +#define DMA_ADDR_0_HIGH _MMIO(0xc304) +#define DMA_ADDR_1_LOW _MMIO(0xc308) +#define DMA_ADDR_1_HIGH _MMIO(0xc30c) +#define DMA_ADDRESS_SPACE_WOPCM (7 << 16) +#define DMA_ADDRESS_SPACE_GTT (8 << 16) +#define DMA_COPY_SIZE _MMIO(0xc310) +#define DMA_CTRL _MMIO(0xc314) +#define HUC_UKERNEL (1<<9) +#define UOS_MOVE (1<<4) +#define START_DMA (1<<0) +#define DMA_GUC_WOPCM_OFFSET _MMIO(0xc340) +#define GUC_WOPCM_OFFSET_VALID (1<<0) +#define HUC_LOADING_AGENT_VCR (0<<1) +#define HUC_LOADING_AGENT_GUC (1<<1) +#define GUC_WOPCM_OFFSET_SHIFT 14 +#define GUC_WOPCM_OFFSET_MASK (0x3ffff << GUC_WOPCM_OFFSET_SHIFT) +#define GUC_MAX_IDLE_COUNT _MMIO(0xC3E4) + +#define HUC_STATUS2 _MMIO(0xD3B0) +#define HUC_FW_VERIFIED (1<<7) + +#define GEN11_HUC_KERNEL_LOAD_INFO _MMIO(0xC1DC) +#define HUC_LOAD_SUCCESSFUL (1 << 0) + +#define GUC_WOPCM_SIZE _MMIO(0xc050) +#define GUC_WOPCM_SIZE_LOCKED (1<<0) +#define GUC_WOPCM_SIZE_SHIFT 12 +#define GUC_WOPCM_SIZE_MASK (0xfffff << GUC_WOPCM_SIZE_SHIFT) + +#define GEN8_GT_PM_CONFIG _MMIO(0x138140) +#define GEN9LP_GT_PM_CONFIG _MMIO(0x138140) +#define GEN9_GT_PM_CONFIG _MMIO(0x13816c) +#define GT_DOORBELL_ENABLE (1<<0) + +#define GEN8_GTCR _MMIO(0x4274) +#define GEN8_GTCR_INVALIDATE (1<<0) + +#define GEN12_GUC_TLB_INV_CR _MMIO(0xcee8) +#define GEN12_GUC_TLB_INV_CR_INVALIDATE (1 << 0) + +#define GUC_ARAT_C6DIS _MMIO(0xA178) + +#define GUC_SHIM_CONTROL _MMIO(0xc064) +#define GUC_DISABLE_SRAM_INIT_TO_ZEROES (1<<0) +#define GUC_ENABLE_READ_CACHE_LOGIC (1<<1) +#define GUC_ENABLE_MIA_CACHING (1<<2) +#define GUC_GEN10_MSGCH_ENABLE (1<<4) +#define GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA (1<<9) +#define GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA (1<<10) +#define GUC_ENABLE_MIA_CLOCK_GATING (1<<15) +#define GUC_GEN10_SHIM_WC_ENABLE (1<<21) + +#define GUC_SEND_INTERRUPT _MMIO(0xc4c8) +#define GUC_SEND_TRIGGER (1<<0) +#define GEN11_GUC_HOST_INTERRUPT _MMIO(0x1901f0) + +#define GUC_NUM_DOORBELLS 256 + +/* format of the HW-monitored doorbell cacheline */ +struct guc_doorbell_info { + u32 db_status; +#define GUC_DOORBELL_DISABLED 0 +#define GUC_DOORBELL_ENABLED 1 + + u32 cookie; + u32 reserved[14]; +} __packed; + +#define GEN8_DRBREGL(x) _MMIO(0x1000 + (x) * 8) +#define GEN8_DRB_VALID (1<<0) +#define GEN8_DRBREGU(x) _MMIO(0x1000 + (x) * 8 + 4) + +#define GEN12_DIST_DBS_POPULATED _MMIO(0xd08) +#define GEN12_DOORBELLS_PER_SQIDI_SHIFT 16 +#define GEN12_DOORBELLS_PER_SQIDI (0xff) +#define GEN12_SQIDIS_DOORBELL_EXIST (0xffff) + +#define DE_GUCRMR _MMIO(0x44054) + +#define GUC_BCS_RCS_IER _MMIO(0xC550) +#define GUC_VCS2_VCS1_IER _MMIO(0xC554) +#define GUC_WD_VECS_IER _MMIO(0xC558) +#define GUC_PM_P24C_IER _MMIO(0xC55C) + +/* GuC Interrupt Vector */ +#define GUC_INTR_GUC2HOST BIT(15) +#define GUC_INTR_EXEC_ERROR BIT(14) +#define GUC_INTR_DISPLAY_EVENT BIT(13) +#define GUC_INTR_SEM_SIG BIT(12) +#define GUC_INTR_IOMMU2GUC BIT(11) +#define GUC_INTR_DOORBELL_RANG BIT(10) +#define GUC_INTR_DMA_DONE BIT(9) +#define GUC_INTR_FATAL_ERROR BIT(8) +#define GUC_INTR_NOTIF_ERROR BIT(7) +#define GUC_INTR_SW_INT_6 BIT(6) +#define GUC_INTR_SW_INT_5 BIT(5) +#define GUC_INTR_SW_INT_4 BIT(4) +#define GUC_INTR_SW_INT_3 BIT(3) +#define GUC_INTR_SW_INT_2 BIT(2) +#define GUC_INTR_SW_INT_1 BIT(1) +#define GUC_INTR_SW_INT_0 BIT(0) + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c new file mode 100644 index 000000000000..e0d424c2b78c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -0,0 +1,1695 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include +#include +#include +#include +#include + +#include + +#include "xe_device.h" +#include "xe_engine.h" +#include "xe_guc.h" +#include "xe_guc_ct.h" +#include "xe_guc_engine_types.h" +#include "xe_guc_submit.h" +#include "xe_gt.h" +#include "xe_force_wake.h" +#include "xe_gpu_scheduler.h" +#include "xe_hw_engine.h" +#include "xe_hw_fence.h" +#include "xe_lrc.h" +#include "xe_macros.h" +#include "xe_map.h" +#include "xe_mocs.h" +#include "xe_ring_ops_types.h" +#include "xe_sched_job.h" +#include "xe_trace.h" +#include "xe_vm.h" + +#include "gt/intel_lrc_reg.h" + +static struct xe_gt * +guc_to_gt(struct xe_guc *guc) +{ + return container_of(guc, struct xe_gt, uc.guc); +} + +static struct xe_device * +guc_to_xe(struct xe_guc *guc) +{ + return gt_to_xe(guc_to_gt(guc)); +} + +static struct xe_guc * +engine_to_guc(struct xe_engine *e) +{ + return &e->gt->uc.guc; +} + +/* + * Helpers for engine state, using an atomic as some of the bits can transition + * as the same time (e.g. a suspend can be happning at the same time as schedule + * engine done being processed). + */ +#define ENGINE_STATE_REGISTERED (1 << 0) +#define ENGINE_STATE_ENABLED (1 << 1) +#define ENGINE_STATE_PENDING_ENABLE (1 << 2) +#define ENGINE_STATE_PENDING_DISABLE (1 << 3) +#define ENGINE_STATE_DESTROYED (1 << 4) +#define ENGINE_STATE_SUSPENDED (1 << 5) +#define ENGINE_STATE_RESET (1 << 6) +#define ENGINE_STATE_KILLED (1 << 7) + +static bool engine_registered(struct xe_engine *e) +{ + return atomic_read(&e->guc->state) & ENGINE_STATE_REGISTERED; +} + +static void set_engine_registered(struct xe_engine *e) +{ + atomic_or(ENGINE_STATE_REGISTERED, &e->guc->state); +} + +static void clear_engine_registered(struct xe_engine *e) +{ + atomic_and(~ENGINE_STATE_REGISTERED, &e->guc->state); +} + +static bool engine_enabled(struct xe_engine *e) +{ + return atomic_read(&e->guc->state) & ENGINE_STATE_ENABLED; +} + +static void set_engine_enabled(struct xe_engine *e) +{ + atomic_or(ENGINE_STATE_ENABLED, &e->guc->state); +} + +static void clear_engine_enabled(struct xe_engine *e) +{ + atomic_and(~ENGINE_STATE_ENABLED, &e->guc->state); +} + +static bool engine_pending_enable(struct xe_engine *e) +{ + return atomic_read(&e->guc->state) & ENGINE_STATE_PENDING_ENABLE; +} + +static void set_engine_pending_enable(struct xe_engine *e) +{ + atomic_or(ENGINE_STATE_PENDING_ENABLE, &e->guc->state); +} + +static void clear_engine_pending_enable(struct xe_engine *e) +{ + atomic_and(~ENGINE_STATE_PENDING_ENABLE, &e->guc->state); +} + +static bool engine_pending_disable(struct xe_engine *e) +{ + return atomic_read(&e->guc->state) & ENGINE_STATE_PENDING_DISABLE; +} + +static void set_engine_pending_disable(struct xe_engine *e) +{ + atomic_or(ENGINE_STATE_PENDING_DISABLE, &e->guc->state); +} + +static void clear_engine_pending_disable(struct xe_engine *e) +{ + atomic_and(~ENGINE_STATE_PENDING_DISABLE, &e->guc->state); +} + +static bool engine_destroyed(struct xe_engine *e) +{ + return atomic_read(&e->guc->state) & ENGINE_STATE_DESTROYED; +} + +static void set_engine_destroyed(struct xe_engine *e) +{ + atomic_or(ENGINE_STATE_DESTROYED, &e->guc->state); +} + +static bool engine_banned(struct xe_engine *e) +{ + return (e->flags & ENGINE_FLAG_BANNED); +} + +static void set_engine_banned(struct xe_engine *e) +{ + e->flags |= ENGINE_FLAG_BANNED; +} + +static bool engine_suspended(struct xe_engine *e) +{ + return atomic_read(&e->guc->state) & ENGINE_STATE_SUSPENDED; +} + +static void set_engine_suspended(struct xe_engine *e) +{ + atomic_or(ENGINE_STATE_SUSPENDED, &e->guc->state); +} + +static void clear_engine_suspended(struct xe_engine *e) +{ + atomic_and(~ENGINE_STATE_SUSPENDED, &e->guc->state); +} + +static bool engine_reset(struct xe_engine *e) +{ + return atomic_read(&e->guc->state) & ENGINE_STATE_RESET; +} + +static void set_engine_reset(struct xe_engine *e) +{ + atomic_or(ENGINE_STATE_RESET, &e->guc->state); +} + +static bool engine_killed(struct xe_engine *e) +{ + return atomic_read(&e->guc->state) & ENGINE_STATE_KILLED; +} + +static void set_engine_killed(struct xe_engine *e) +{ + atomic_or(ENGINE_STATE_KILLED, &e->guc->state); +} + +static bool engine_killed_or_banned(struct xe_engine *e) +{ + return engine_killed(e) || engine_banned(e); +} + +static void guc_submit_fini(struct drm_device *drm, void *arg) +{ + struct xe_guc *guc = arg; + + xa_destroy(&guc->submission_state.engine_lookup); + ida_destroy(&guc->submission_state.guc_ids); + bitmap_free(guc->submission_state.guc_ids_bitmap); +} + +#define GUC_ID_MAX 65535 +#define GUC_ID_NUMBER_MLRC 4096 +#define GUC_ID_NUMBER_SLRC (GUC_ID_MAX - GUC_ID_NUMBER_MLRC) +#define GUC_ID_START_MLRC GUC_ID_NUMBER_SLRC + +static const struct xe_engine_ops guc_engine_ops; + +static void primelockdep(struct xe_guc *guc) +{ + if (!IS_ENABLED(CONFIG_LOCKDEP)) + return; + + fs_reclaim_acquire(GFP_KERNEL); + + mutex_lock(&guc->submission_state.lock); + might_lock(&guc->submission_state.suspend.lock); + mutex_unlock(&guc->submission_state.lock); + + fs_reclaim_release(GFP_KERNEL); +} + +int xe_guc_submit_init(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + int err; + + guc->submission_state.guc_ids_bitmap = + bitmap_zalloc(GUC_ID_NUMBER_MLRC, GFP_KERNEL); + if (!guc->submission_state.guc_ids_bitmap) + return -ENOMEM; + + gt->engine_ops = &guc_engine_ops; + + mutex_init(&guc->submission_state.lock); + xa_init(&guc->submission_state.engine_lookup); + ida_init(&guc->submission_state.guc_ids); + + spin_lock_init(&guc->submission_state.suspend.lock); + guc->submission_state.suspend.context = dma_fence_context_alloc(1); + + primelockdep(guc); + + err = drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); + if (err) + return err; + + return 0; +} + +static int alloc_guc_id(struct xe_guc *guc, struct xe_engine *e) +{ + int ret; + void *ptr; + + /* + * Must use GFP_NOWAIT as this lock is in the dma fence signalling path, + * worse case user gets -ENOMEM on engine create and has to try again. + * + * FIXME: Have caller pre-alloc or post-alloc /w GFP_KERNEL to prevent + * failure. + */ + lockdep_assert_held(&guc->submission_state.lock); + + if (xe_engine_is_parallel(e)) { + void *bitmap = guc->submission_state.guc_ids_bitmap; + + ret = bitmap_find_free_region(bitmap, GUC_ID_NUMBER_MLRC, + order_base_2(e->width)); + } else { + ret = ida_simple_get(&guc->submission_state.guc_ids, 0, + GUC_ID_NUMBER_SLRC, GFP_NOWAIT); + } + if (ret < 0) + return ret; + + e->guc->id = ret; + if (xe_engine_is_parallel(e)) + e->guc->id += GUC_ID_START_MLRC; + + ptr = xa_store(&guc->submission_state.engine_lookup, + e->guc->id, e, GFP_NOWAIT); + if (IS_ERR(ptr)) { + ret = PTR_ERR(ptr); + goto err_release; + } + + return 0; + +err_release: + ida_simple_remove(&guc->submission_state.guc_ids, e->guc->id); + return ret; +} + +static void release_guc_id(struct xe_guc *guc, struct xe_engine *e) +{ + mutex_lock(&guc->submission_state.lock); + xa_erase(&guc->submission_state.engine_lookup, e->guc->id); + if (xe_engine_is_parallel(e)) + bitmap_release_region(guc->submission_state.guc_ids_bitmap, + e->guc->id - GUC_ID_START_MLRC, + order_base_2(e->width)); + else + ida_simple_remove(&guc->submission_state.guc_ids, e->guc->id); + mutex_unlock(&guc->submission_state.lock); +} + +struct engine_policy { + u32 count; + struct guc_update_engine_policy h2g; +}; + +static u32 __guc_engine_policy_action_size(struct engine_policy *policy) +{ + size_t bytes = sizeof(policy->h2g.header) + + (sizeof(policy->h2g.klv[0]) * policy->count); + + return bytes / sizeof(u32); +} + +static void __guc_engine_policy_start_klv(struct engine_policy *policy, + u16 guc_id) +{ + policy->h2g.header.action = + XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES; + policy->h2g.header.guc_id = guc_id; + policy->count = 0; +} + +#define MAKE_ENGINE_POLICY_ADD(func, id) \ +static void __guc_engine_policy_add_##func(struct engine_policy *policy, \ + u32 data) \ +{ \ + XE_BUG_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ + \ + policy->h2g.klv[policy->count].kl = \ + FIELD_PREP(GUC_KLV_0_KEY, \ + GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ + FIELD_PREP(GUC_KLV_0_LEN, 1); \ + policy->h2g.klv[policy->count].value = data; \ + policy->count++; \ +} + +MAKE_ENGINE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) +MAKE_ENGINE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) +MAKE_ENGINE_POLICY_ADD(priority, SCHEDULING_PRIORITY) +#undef MAKE_ENGINE_POLICY_ADD + +static const int xe_engine_prio_to_guc[] = { + [XE_ENGINE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL, + [XE_ENGINE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL, + [XE_ENGINE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH, + [XE_ENGINE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH, +}; + +static void init_policies(struct xe_guc *guc, struct xe_engine *e) +{ + struct engine_policy policy; + enum xe_engine_priority prio = e->priority; + u32 timeslice_us = e->sched_props.timeslice_us; + u32 preempt_timeout_us = e->sched_props.preempt_timeout_us; + + XE_BUG_ON(!engine_registered(e)); + + __guc_engine_policy_start_klv(&policy, e->guc->id); + __guc_engine_policy_add_priority(&policy, xe_engine_prio_to_guc[prio]); + __guc_engine_policy_add_execution_quantum(&policy, timeslice_us); + __guc_engine_policy_add_preemption_timeout(&policy, preempt_timeout_us); + + xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, + __guc_engine_policy_action_size(&policy), 0, 0); +} + +static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_engine *e) +{ + struct engine_policy policy; + + __guc_engine_policy_start_klv(&policy, e->guc->id); + __guc_engine_policy_add_preemption_timeout(&policy, 1); + + xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, + __guc_engine_policy_action_size(&policy), 0, 0); +} + +#define PARALLEL_SCRATCH_SIZE 2048 +#define WQ_SIZE (PARALLEL_SCRATCH_SIZE / 2) +#define WQ_OFFSET (PARALLEL_SCRATCH_SIZE - WQ_SIZE) +#define CACHELINE_BYTES 64 + +struct sync_semaphore { + u32 semaphore; + u8 unused[CACHELINE_BYTES - sizeof(u32)]; +}; + +struct parallel_scratch { + struct guc_sched_wq_desc wq_desc; + + struct sync_semaphore go; + struct sync_semaphore join[XE_HW_ENGINE_MAX_INSTANCE]; + + u8 unused[WQ_OFFSET - sizeof(struct guc_sched_wq_desc) - + sizeof(struct sync_semaphore) * (XE_HW_ENGINE_MAX_INSTANCE + 1)]; + + u32 wq[WQ_SIZE / sizeof(u32)]; +}; + +#define parallel_read(xe_, map_, field_) \ + xe_map_rd_field(xe_, &map_, 0, struct parallel_scratch, field_) +#define parallel_write(xe_, map_, field_, val_) \ + xe_map_wr_field(xe_, &map_, 0, struct parallel_scratch, field_, val_) + +static void __register_mlrc_engine(struct xe_guc *guc, + struct xe_engine *e, + struct guc_ctxt_registration_info *info) +{ +#define MAX_MLRC_REG_SIZE (13 + XE_HW_ENGINE_MAX_INSTANCE * 2) + u32 action[MAX_MLRC_REG_SIZE]; + int len = 0; + int i; + + XE_BUG_ON(!xe_engine_is_parallel(e)); + + action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; + action[len++] = info->flags; + action[len++] = info->context_idx; + action[len++] = info->engine_class; + action[len++] = info->engine_submit_mask; + action[len++] = info->wq_desc_lo; + action[len++] = info->wq_desc_hi; + action[len++] = info->wq_base_lo; + action[len++] = info->wq_base_hi; + action[len++] = info->wq_size; + action[len++] = e->width; + action[len++] = info->hwlrca_lo; + action[len++] = info->hwlrca_hi; + + for (i = 1; i < e->width; ++i) { + struct xe_lrc *lrc = e->lrc + i; + + action[len++] = lower_32_bits(xe_lrc_descriptor(lrc)); + action[len++] = upper_32_bits(xe_lrc_descriptor(lrc)); + } + + XE_BUG_ON(len > MAX_MLRC_REG_SIZE); +#undef MAX_MLRC_REG_SIZE + + xe_guc_ct_send(&guc->ct, action, len, 0, 0); +} + +static void __register_engine(struct xe_guc *guc, + struct guc_ctxt_registration_info *info) +{ + u32 action[] = { + XE_GUC_ACTION_REGISTER_CONTEXT, + info->flags, + info->context_idx, + info->engine_class, + info->engine_submit_mask, + info->wq_desc_lo, + info->wq_desc_hi, + info->wq_base_lo, + info->wq_base_hi, + info->wq_size, + info->hwlrca_lo, + info->hwlrca_hi, + }; + + xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); +} + +static void register_engine(struct xe_engine *e) +{ + struct xe_guc *guc = engine_to_guc(e); + struct xe_device *xe = guc_to_xe(guc); + struct xe_lrc *lrc = e->lrc; + struct guc_ctxt_registration_info info; + + XE_BUG_ON(engine_registered(e)); + + memset(&info, 0, sizeof(info)); + info.context_idx = e->guc->id; + info.engine_class = xe_engine_class_to_guc_class(e->class); + info.engine_submit_mask = e->logical_mask; + info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc)); + info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc)); + info.flags = CONTEXT_REGISTRATION_FLAG_KMD; + + if (xe_engine_is_parallel(e)) { + u32 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc); + struct iosys_map map = xe_lrc_parallel_map(lrc); + + info.wq_desc_lo = lower_32_bits(ggtt_addr + + offsetof(struct parallel_scratch, wq_desc)); + info.wq_desc_hi = upper_32_bits(ggtt_addr + + offsetof(struct parallel_scratch, wq_desc)); + info.wq_base_lo = lower_32_bits(ggtt_addr + + offsetof(struct parallel_scratch, wq[0])); + info.wq_base_hi = upper_32_bits(ggtt_addr + + offsetof(struct parallel_scratch, wq[0])); + info.wq_size = WQ_SIZE; + + e->guc->wqi_head = 0; + e->guc->wqi_tail = 0; + xe_map_memset(xe, &map, 0, 0, PARALLEL_SCRATCH_SIZE - WQ_SIZE); + parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE); + } + + set_engine_registered(e); + trace_xe_engine_register(e); + if (xe_engine_is_parallel(e)) + __register_mlrc_engine(guc, e, &info); + else + __register_engine(guc, &info); + init_policies(guc, e); +} + +static u32 wq_space_until_wrap(struct xe_engine *e) +{ + return (WQ_SIZE - e->guc->wqi_tail); +} + +static int wq_wait_for_space(struct xe_engine *e, u32 wqi_size) +{ + struct xe_guc *guc = engine_to_guc(e); + struct xe_device *xe = guc_to_xe(guc); + struct iosys_map map = xe_lrc_parallel_map(e->lrc); + unsigned int sleep_period_ms = 1; + +#define AVAILABLE_SPACE \ + CIRC_SPACE(e->guc->wqi_tail, e->guc->wqi_head, WQ_SIZE) + if (wqi_size > AVAILABLE_SPACE) { +try_again: + e->guc->wqi_head = parallel_read(xe, map, wq_desc.head); + if (wqi_size > AVAILABLE_SPACE) { + if (sleep_period_ms == 1024) { + xe_gt_reset_async(e->gt); + return -ENODEV; + } + + msleep(sleep_period_ms); + sleep_period_ms <<= 1; + goto try_again; + } + } +#undef AVAILABLE_SPACE + + return 0; +} + +static int wq_noop_append(struct xe_engine *e) +{ + struct xe_guc *guc = engine_to_guc(e); + struct xe_device *xe = guc_to_xe(guc); + struct iosys_map map = xe_lrc_parallel_map(e->lrc); + u32 len_dw = wq_space_until_wrap(e) / sizeof(u32) - 1; + + if (wq_wait_for_space(e, wq_space_until_wrap(e))) + return -ENODEV; + + XE_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); + + parallel_write(xe, map, wq[e->guc->wqi_tail / sizeof(u32)], + FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | + FIELD_PREP(WQ_LEN_MASK, len_dw)); + e->guc->wqi_tail = 0; + + return 0; +} + +static void wq_item_append(struct xe_engine *e) +{ + struct xe_guc *guc = engine_to_guc(e); + struct xe_device *xe = guc_to_xe(guc); + struct iosys_map map = xe_lrc_parallel_map(e->lrc); + u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + 3]; + u32 wqi_size = (e->width + 3) * sizeof(u32); + u32 len_dw = (wqi_size / sizeof(u32)) - 1; + int i = 0, j; + + if (wqi_size > wq_space_until_wrap(e)) { + if (wq_noop_append(e)) + return; + } + if (wq_wait_for_space(e, wqi_size)) + return; + + wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | + FIELD_PREP(WQ_LEN_MASK, len_dw); + wqi[i++] = xe_lrc_descriptor(e->lrc); + wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, e->guc->id) | + FIELD_PREP(WQ_RING_TAIL_MASK, e->lrc->ring.tail / sizeof(u64)); + wqi[i++] = 0; + for (j = 1; j < e->width; ++j) { + struct xe_lrc *lrc = e->lrc + j; + + wqi[i++] = lrc->ring.tail / sizeof(u64); + } + + XE_BUG_ON(i != wqi_size / sizeof(u32)); + + iosys_map_incr(&map, offsetof(struct parallel_scratch, + wq[e->guc->wqi_tail / sizeof(u32)])); + xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size); + e->guc->wqi_tail += wqi_size; + XE_BUG_ON(e->guc->wqi_tail > WQ_SIZE); + + xe_device_wmb(xe); + + map = xe_lrc_parallel_map(e->lrc); + parallel_write(xe, map, wq_desc.tail, e->guc->wqi_tail); +} + +#define RESUME_PENDING ~0x0ull +static void submit_engine(struct xe_engine *e) +{ + struct xe_guc *guc = engine_to_guc(e); + struct xe_lrc *lrc = e->lrc; + u32 action[3]; + u32 g2h_len = 0; + u32 num_g2h = 0; + int len = 0; + bool extra_submit = false; + + XE_BUG_ON(!engine_registered(e)); + + if (xe_engine_is_parallel(e)) + wq_item_append(e); + else + xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); + + if (engine_suspended(e) && !xe_engine_is_parallel(e)) + return; + + if (!engine_enabled(e) && !engine_suspended(e)) { + action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET; + action[len++] = e->guc->id; + action[len++] = GUC_CONTEXT_ENABLE; + g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; + num_g2h = 1; + if (xe_engine_is_parallel(e)) + extra_submit = true; + + e->guc->resume_time = RESUME_PENDING; + set_engine_pending_enable(e); + set_engine_enabled(e); + trace_xe_engine_scheduling_enable(e); + } else { + action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; + action[len++] = e->guc->id; + trace_xe_engine_submit(e); + } + + xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h); + + if (extra_submit) { + len = 0; + action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; + action[len++] = e->guc->id; + trace_xe_engine_submit(e); + + xe_guc_ct_send(&guc->ct, action, len, 0, 0); + } +} + +static struct dma_fence * +guc_engine_run_job(struct drm_sched_job *drm_job) +{ + struct xe_sched_job *job = to_xe_sched_job(drm_job); + struct xe_engine *e = job->engine; + + XE_BUG_ON((engine_destroyed(e) || engine_pending_disable(e)) && + !engine_banned(e) && !engine_suspended(e)); + + trace_xe_sched_job_run(job); + + if (!engine_killed_or_banned(e) && !xe_sched_job_is_error(job)) { + if (!engine_registered(e)) + register_engine(e); + e->ring_ops->emit_job(job); + submit_engine(e); + } + + if (test_and_set_bit(JOB_FLAG_SUBMIT, &job->fence->flags)) + return job->fence; + else + return dma_fence_get(job->fence); +} + +static void guc_engine_free_job(struct drm_sched_job *drm_job) +{ + struct xe_sched_job *job = to_xe_sched_job(drm_job); + + trace_xe_sched_job_free(job); + xe_sched_job_put(job); +} + +static int guc_read_stopped(struct xe_guc *guc) +{ + return atomic_read(&guc->submission_state.stopped); +} + +#define MAKE_SCHED_CONTEXT_ACTION(e, enable_disable) \ + u32 action[] = { \ + XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET, \ + e->guc->id, \ + GUC_CONTEXT_##enable_disable, \ + } + +static void disable_scheduling_deregister(struct xe_guc *guc, + struct xe_engine *e) +{ + MAKE_SCHED_CONTEXT_ACTION(e, DISABLE); + int ret; + + set_min_preemption_timeout(guc, e); + smp_rmb(); + ret = wait_event_timeout(guc->ct.wq, !engine_pending_enable(e) || + guc_read_stopped(guc), HZ * 5); + if (!ret) { + struct xe_gpu_scheduler *sched = &e->guc->sched; + + XE_WARN_ON("Pending enable failed to respond"); + xe_sched_submission_start(sched); + xe_gt_reset_async(e->gt); + xe_sched_tdr_queue_imm(sched); + return; + } + + clear_engine_enabled(e); + set_engine_pending_disable(e); + set_engine_destroyed(e); + trace_xe_engine_scheduling_disable(e); + + /* + * Reserve space for both G2H here as the 2nd G2H is sent from a G2H + * handler and we are not allowed to reserved G2H space in handlers. + */ + xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), + G2H_LEN_DW_SCHED_CONTEXT_MODE_SET + + G2H_LEN_DW_DEREGISTER_CONTEXT, 2); +} + +static void guc_engine_print(struct xe_engine *e, struct drm_printer *p); + +#if IS_ENABLED(CONFIG_DRM_XE_SIMPLE_ERROR_CAPTURE) +static void simple_error_capture(struct xe_engine *e) +{ + struct xe_guc *guc = engine_to_guc(e); + struct drm_printer p = drm_err_printer(""); + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + u32 adj_logical_mask = e->logical_mask; + u32 width_mask = (0x1 << e->width) - 1; + int i; + bool cookie; + + if (e->vm && !e->vm->error_capture.capture_once) { + e->vm->error_capture.capture_once = true; + cookie = dma_fence_begin_signalling(); + for (i = 0; e->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) { + if (adj_logical_mask & BIT(i)) { + adj_logical_mask |= width_mask << i; + i += e->width; + } else { + ++i; + } + } + + xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL); + xe_guc_ct_print(&guc->ct, &p); + guc_engine_print(e, &p); + for_each_hw_engine(hwe, guc_to_gt(guc), id) { + if (hwe->class != e->hwe->class || + !(BIT(hwe->logical_instance) & adj_logical_mask)) + continue; + xe_hw_engine_print_state(hwe, &p); + } + xe_analyze_vm(&p, e->vm, e->gt->info.id); + xe_force_wake_put(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL); + dma_fence_end_signalling(cookie); + } +} +#else +static void simple_error_capture(struct xe_engine *e) +{ +} +#endif + +static enum drm_gpu_sched_stat +guc_engine_timedout_job(struct drm_sched_job *drm_job) +{ + struct xe_sched_job *job = to_xe_sched_job(drm_job); + struct xe_sched_job *tmp_job; + struct xe_engine *e = job->engine; + struct xe_gpu_scheduler *sched = &e->guc->sched; + struct xe_device *xe = guc_to_xe(engine_to_guc(e)); + int err = -ETIME; + int i = 0; + + if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) { + XE_WARN_ON(e->flags & ENGINE_FLAG_KERNEL); + XE_WARN_ON(e->flags & ENGINE_FLAG_VM && !engine_killed(e)); + + drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx", + xe_sched_job_seqno(job), e->guc->id, e->flags); + simple_error_capture(e); + } else { + drm_dbg(&xe->drm, "Timedout signaled job: seqno=%u, guc_id=%d, flags=0x%lx", + xe_sched_job_seqno(job), e->guc->id, e->flags); + } + trace_xe_sched_job_timedout(job); + + /* Kill the run_job entry point */ + xe_sched_submission_stop(sched); + + /* + * Kernel jobs should never fail, nor should VM jobs if they do + * somethings has gone wrong and the GT needs a reset + */ + if (e->flags & ENGINE_FLAG_KERNEL || + (e->flags & ENGINE_FLAG_VM && !engine_killed(e))) { + if (!xe_sched_invalidate_job(job, 2)) { + xe_sched_add_pending_job(sched, job); + xe_sched_submission_start(sched); + xe_gt_reset_async(e->gt); + goto out; + } + } + + /* Engine state now stable, disable scheduling if needed */ + if (engine_enabled(e)) { + struct xe_guc *guc = engine_to_guc(e); + int ret; + + if (engine_reset(e)) + err = -EIO; + set_engine_banned(e); + xe_engine_get(e); + disable_scheduling_deregister(engine_to_guc(e), e); + + /* + * Must wait for scheduling to be disabled before signalling + * any fences, if GT broken the GT reset code should signal us. + * + * FIXME: Tests can generate a ton of 0x6000 (IOMMU CAT fault + * error) messages which can cause the schedule disable to get + * lost. If this occurs, trigger a GT reset to recover. + */ + smp_rmb(); + ret = wait_event_timeout(guc->ct.wq, + !engine_pending_disable(e) || + guc_read_stopped(guc), HZ * 5); + if (!ret) { + XE_WARN_ON("Schedule disable failed to respond"); + xe_sched_add_pending_job(sched, job); + xe_sched_submission_start(sched); + xe_gt_reset_async(e->gt); + xe_sched_tdr_queue_imm(sched); + goto out; + } + } + + /* Stop fence signaling */ + xe_hw_fence_irq_stop(e->fence_irq); + + /* + * Fence state now stable, stop / start scheduler which cleans up any + * fences that are complete + */ + xe_sched_add_pending_job(sched, job); + xe_sched_submission_start(sched); + xe_sched_tdr_queue_imm(&e->guc->sched); + + /* Mark all outstanding jobs as bad, thus completing them */ + spin_lock(&sched->base.job_list_lock); + list_for_each_entry(tmp_job, &sched->base.pending_list, drm.list) + xe_sched_job_set_error(tmp_job, !i++ ? err : -ECANCELED); + spin_unlock(&sched->base.job_list_lock); + + /* Start fence signaling */ + xe_hw_fence_irq_start(e->fence_irq); + +out: + return DRM_GPU_SCHED_STAT_NOMINAL; +} + +static void __guc_engine_fini_async(struct work_struct *w) +{ + struct xe_guc_engine *ge = + container_of(w, struct xe_guc_engine, fini_async); + struct xe_engine *e = ge->engine; + struct xe_guc *guc = engine_to_guc(e); + + trace_xe_engine_destroy(e); + + if (e->flags & ENGINE_FLAG_PERSISTENT) + xe_device_remove_persitent_engines(gt_to_xe(e->gt), e); + release_guc_id(guc, e); + xe_sched_entity_fini(&ge->entity); + xe_sched_fini(&ge->sched); + + if (!(e->flags & ENGINE_FLAG_KERNEL)) { + kfree(ge); + xe_engine_fini(e); + } +} + +static void guc_engine_fini_async(struct xe_engine *e) +{ + bool kernel = e->flags & ENGINE_FLAG_KERNEL; + + INIT_WORK(&e->guc->fini_async, __guc_engine_fini_async); + queue_work(system_unbound_wq, &e->guc->fini_async); + + /* We must block on kernel engines so slabs are empty on driver unload */ + if (kernel) { + struct xe_guc_engine *ge = e->guc; + + flush_work(&ge->fini_async); + kfree(ge); + xe_engine_fini(e); + } +} + +static void __guc_engine_fini(struct xe_guc *guc, struct xe_engine *e) +{ + /* + * Might be done from within the GPU scheduler, need to do async as we + * fini the scheduler when the engine is fini'd, the scheduler can't + * complete fini within itself (circular dependency). Async resolves + * this we and don't really care when everything is fini'd, just that it + * is. + */ + guc_engine_fini_async(e); +} + +static void __guc_engine_process_msg_cleanup(struct xe_sched_msg *msg) +{ + struct xe_engine *e = msg->private_data; + struct xe_guc *guc = engine_to_guc(e); + + XE_BUG_ON(e->flags & ENGINE_FLAG_KERNEL); + trace_xe_engine_cleanup_entity(e); + + if (engine_registered(e)) + disable_scheduling_deregister(guc, e); + else + __guc_engine_fini(guc, e); +} + +static bool guc_engine_allowed_to_change_state(struct xe_engine *e) +{ + return !engine_killed_or_banned(e) && engine_registered(e); +} + +static void __guc_engine_process_msg_set_sched_props(struct xe_sched_msg *msg) +{ + struct xe_engine *e = msg->private_data; + struct xe_guc *guc = engine_to_guc(e); + + if (guc_engine_allowed_to_change_state(e)) + init_policies(guc, e); + kfree(msg); +} + +static void suspend_fence_signal(struct xe_engine *e) +{ + struct xe_guc *guc = engine_to_guc(e); + + XE_BUG_ON(!engine_suspended(e) && !engine_killed(e) && + !guc_read_stopped(guc)); + XE_BUG_ON(!e->guc->suspend_pending); + + e->guc->suspend_pending = false; + smp_wmb(); + wake_up(&e->guc->suspend_wait); +} + +static void __guc_engine_process_msg_suspend(struct xe_sched_msg *msg) +{ + struct xe_engine *e = msg->private_data; + struct xe_guc *guc = engine_to_guc(e); + + if (guc_engine_allowed_to_change_state(e) && !engine_suspended(e) && + engine_enabled(e)) { + wait_event(guc->ct.wq, e->guc->resume_time != RESUME_PENDING || + guc_read_stopped(guc)); + + if (!guc_read_stopped(guc)) { + MAKE_SCHED_CONTEXT_ACTION(e, DISABLE); + s64 since_resume_ms = + ktime_ms_delta(ktime_get(), + e->guc->resume_time); + s64 wait_ms = e->vm->preempt.min_run_period_ms - + since_resume_ms; + + if (wait_ms > 0 && e->guc->resume_time) + msleep(wait_ms); + + set_engine_suspended(e); + clear_engine_enabled(e); + set_engine_pending_disable(e); + trace_xe_engine_scheduling_disable(e); + + xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), + G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); + } + } else if (e->guc->suspend_pending) { + set_engine_suspended(e); + suspend_fence_signal(e); + } +} + +static void __guc_engine_process_msg_resume(struct xe_sched_msg *msg) +{ + struct xe_engine *e = msg->private_data; + struct xe_guc *guc = engine_to_guc(e); + + if (guc_engine_allowed_to_change_state(e)) { + MAKE_SCHED_CONTEXT_ACTION(e, ENABLE); + + e->guc->resume_time = RESUME_PENDING; + clear_engine_suspended(e); + set_engine_pending_enable(e); + set_engine_enabled(e); + trace_xe_engine_scheduling_enable(e); + + xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), + G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); + } else { + clear_engine_suspended(e); + } +} + +#define CLEANUP 1 /* Non-zero values to catch uninitialized msg */ +#define SET_SCHED_PROPS 2 +#define SUSPEND 3 +#define RESUME 4 + +static void guc_engine_process_msg(struct xe_sched_msg *msg) +{ + trace_xe_sched_msg_recv(msg); + + switch (msg->opcode) { + case CLEANUP: + __guc_engine_process_msg_cleanup(msg); + break; + case SET_SCHED_PROPS: + __guc_engine_process_msg_set_sched_props(msg); + break; + case SUSPEND: + __guc_engine_process_msg_suspend(msg); + break; + case RESUME: + __guc_engine_process_msg_resume(msg); + break; + default: + XE_BUG_ON("Unknown message type"); + } +} + +static const struct drm_sched_backend_ops drm_sched_ops = { + .run_job = guc_engine_run_job, + .free_job = guc_engine_free_job, + .timedout_job = guc_engine_timedout_job, +}; + +static const struct xe_sched_backend_ops xe_sched_ops = { + .process_msg = guc_engine_process_msg, +}; + +static int guc_engine_init(struct xe_engine *e) +{ + struct xe_gpu_scheduler *sched; + struct xe_guc *guc = engine_to_guc(e); + struct xe_guc_engine *ge; + long timeout; + int err; + + XE_BUG_ON(!xe_device_guc_submission_enabled(guc_to_xe(guc))); + + ge = kzalloc(sizeof(*ge), GFP_KERNEL); + if (!ge) + return -ENOMEM; + + e->guc = ge; + ge->engine = e; + init_waitqueue_head(&ge->suspend_wait); + + timeout = xe_vm_no_dma_fences(e->vm) ? MAX_SCHEDULE_TIMEOUT : HZ * 5; + err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, NULL, + e->lrc[0].ring.size / MAX_JOB_SIZE_BYTES, + 64, timeout, guc_to_gt(guc)->ordered_wq, NULL, + e->name, gt_to_xe(e->gt)->drm.dev); + if (err) + goto err_free; + + sched = &ge->sched; + err = xe_sched_entity_init(&ge->entity, sched); + if (err) + goto err_sched; + e->priority = XE_ENGINE_PRIORITY_NORMAL; + + mutex_lock(&guc->submission_state.lock); + + err = alloc_guc_id(guc, e); + if (err) + goto err_entity; + + e->entity = &ge->entity; + + if (guc_read_stopped(guc)) + xe_sched_stop(sched); + + mutex_unlock(&guc->submission_state.lock); + + switch (e->class) { + case XE_ENGINE_CLASS_RENDER: + sprintf(e->name, "rcs%d", e->guc->id); + break; + case XE_ENGINE_CLASS_VIDEO_DECODE: + sprintf(e->name, "vcs%d", e->guc->id); + break; + case XE_ENGINE_CLASS_VIDEO_ENHANCE: + sprintf(e->name, "vecs%d", e->guc->id); + break; + case XE_ENGINE_CLASS_COPY: + sprintf(e->name, "bcs%d", e->guc->id); + break; + case XE_ENGINE_CLASS_COMPUTE: + sprintf(e->name, "ccs%d", e->guc->id); + break; + default: + XE_WARN_ON(e->class); + } + + trace_xe_engine_create(e); + + return 0; + +err_entity: + xe_sched_entity_fini(&ge->entity); +err_sched: + xe_sched_fini(&ge->sched); +err_free: + kfree(ge); + + return err; +} + +static void guc_engine_kill(struct xe_engine *e) +{ + trace_xe_engine_kill(e); + set_engine_killed(e); + xe_sched_tdr_queue_imm(&e->guc->sched); +} + +static void guc_engine_add_msg(struct xe_engine *e, struct xe_sched_msg *msg, + u32 opcode) +{ + INIT_LIST_HEAD(&msg->link); + msg->opcode = opcode; + msg->private_data = e; + + trace_xe_sched_msg_add(msg); + xe_sched_add_msg(&e->guc->sched, msg); +} + +#define STATIC_MSG_CLEANUP 0 +#define STATIC_MSG_SUSPEND 1 +#define STATIC_MSG_RESUME 2 +static void guc_engine_fini(struct xe_engine *e) +{ + struct xe_sched_msg *msg = e->guc->static_msgs + STATIC_MSG_CLEANUP; + + if (!(e->flags & ENGINE_FLAG_KERNEL)) + guc_engine_add_msg(e, msg, CLEANUP); + else + __guc_engine_fini(engine_to_guc(e), e); +} + +static int guc_engine_set_priority(struct xe_engine *e, + enum xe_engine_priority priority) +{ + struct xe_sched_msg *msg; + + if (e->priority == priority || engine_killed_or_banned(e)) + return 0; + + msg = kmalloc(sizeof(*msg), GFP_KERNEL); + if (!msg) + return -ENOMEM; + + guc_engine_add_msg(e, msg, SET_SCHED_PROPS); + e->priority = priority; + + return 0; +} + +static int guc_engine_set_timeslice(struct xe_engine *e, u32 timeslice_us) +{ + struct xe_sched_msg *msg; + + if (e->sched_props.timeslice_us == timeslice_us || + engine_killed_or_banned(e)) + return 0; + + msg = kmalloc(sizeof(*msg), GFP_KERNEL); + if (!msg) + return -ENOMEM; + + e->sched_props.timeslice_us = timeslice_us; + guc_engine_add_msg(e, msg, SET_SCHED_PROPS); + + return 0; +} + +static int guc_engine_set_preempt_timeout(struct xe_engine *e, + u32 preempt_timeout_us) +{ + struct xe_sched_msg *msg; + + if (e->sched_props.preempt_timeout_us == preempt_timeout_us || + engine_killed_or_banned(e)) + return 0; + + msg = kmalloc(sizeof(*msg), GFP_KERNEL); + if (!msg) + return -ENOMEM; + + e->sched_props.preempt_timeout_us = preempt_timeout_us; + guc_engine_add_msg(e, msg, SET_SCHED_PROPS); + + return 0; +} + +static int guc_engine_set_job_timeout(struct xe_engine *e, u32 job_timeout_ms) +{ + struct xe_gpu_scheduler *sched = &e->guc->sched; + + XE_BUG_ON(engine_registered(e)); + XE_BUG_ON(engine_banned(e)); + XE_BUG_ON(engine_killed(e)); + + sched->base.timeout = job_timeout_ms; + + return 0; +} + +static int guc_engine_suspend(struct xe_engine *e) +{ + struct xe_sched_msg *msg = e->guc->static_msgs + STATIC_MSG_SUSPEND; + + if (engine_killed_or_banned(e) || e->guc->suspend_pending) + return -EINVAL; + + e->guc->suspend_pending = true; + guc_engine_add_msg(e, msg, SUSPEND); + + return 0; +} + +static void guc_engine_suspend_wait(struct xe_engine *e) +{ + struct xe_guc *guc = engine_to_guc(e); + + wait_event(e->guc->suspend_wait, !e->guc->suspend_pending || + guc_read_stopped(guc)); +} + +static void guc_engine_resume(struct xe_engine *e) +{ + struct xe_sched_msg *msg = e->guc->static_msgs + STATIC_MSG_RESUME; + + XE_BUG_ON(e->guc->suspend_pending); + + xe_mocs_init_engine(e); + guc_engine_add_msg(e, msg, RESUME); +} + +/* + * All of these functions are an abstraction layer which other parts of XE can + * use to trap into the GuC backend. All of these functions, aside from init, + * really shouldn't do much other than trap into the DRM scheduler which + * synchronizes these operations. + */ +static const struct xe_engine_ops guc_engine_ops = { + .init = guc_engine_init, + .kill = guc_engine_kill, + .fini = guc_engine_fini, + .set_priority = guc_engine_set_priority, + .set_timeslice = guc_engine_set_timeslice, + .set_preempt_timeout = guc_engine_set_preempt_timeout, + .set_job_timeout = guc_engine_set_job_timeout, + .suspend = guc_engine_suspend, + .suspend_wait = guc_engine_suspend_wait, + .resume = guc_engine_resume, +}; + +static void guc_engine_stop(struct xe_guc *guc, struct xe_engine *e) +{ + struct xe_gpu_scheduler *sched = &e->guc->sched; + + /* Stop scheduling + flush any DRM scheduler operations */ + xe_sched_submission_stop(sched); + + /* Clean up lost G2H + reset engine state */ + if (engine_destroyed(e) && engine_registered(e)) { + if (engine_banned(e)) + xe_engine_put(e); + else + __guc_engine_fini(guc, e); + } + if (e->guc->suspend_pending) { + set_engine_suspended(e); + suspend_fence_signal(e); + } + atomic_and(ENGINE_STATE_DESTROYED | ENGINE_STATE_SUSPENDED, + &e->guc->state); + e->guc->resume_time = 0; + trace_xe_engine_stop(e); + + /* + * Ban any engine (aside from kernel and engines used for VM ops) with a + * started but not complete job or if a job has gone through a GT reset + * more than twice. + */ + if (!(e->flags & (ENGINE_FLAG_KERNEL | ENGINE_FLAG_VM))) { + struct xe_sched_job *job = xe_sched_first_pending_job(sched); + + if (job) { + if ((xe_sched_job_started(job) && + !xe_sched_job_completed(job)) || + xe_sched_invalidate_job(job, 2)) { + trace_xe_sched_job_ban(job); + xe_sched_tdr_queue_imm(&e->guc->sched); + set_engine_banned(e); + } + } + } +} + +int xe_guc_submit_reset_prepare(struct xe_guc *guc) +{ + int ret; + + /* + * Using an atomic here rather than submission_state.lock as this + * function can be called while holding the CT lock (engine reset + * failure). submission_state.lock needs the CT lock to resubmit jobs. + * Atomic is not ideal, but it works to prevent against concurrent reset + * and releasing any TDRs waiting on guc->submission_state.stopped. + */ + ret = atomic_fetch_or(1, &guc->submission_state.stopped); + smp_wmb(); + wake_up_all(&guc->ct.wq); + + return ret; +} + +void xe_guc_submit_reset_wait(struct xe_guc *guc) +{ + wait_event(guc->ct.wq, !guc_read_stopped(guc)); +} + +int xe_guc_submit_stop(struct xe_guc *guc) +{ + struct xe_engine *e; + unsigned long index; + + XE_BUG_ON(guc_read_stopped(guc) != 1); + + mutex_lock(&guc->submission_state.lock); + + xa_for_each(&guc->submission_state.engine_lookup, index, e) + guc_engine_stop(guc, e); + + mutex_unlock(&guc->submission_state.lock); + + /* + * No one can enter the backend at this point, aside from new engine + * creation which is protected by guc->submission_state.lock. + */ + + return 0; +} + +static void guc_engine_start(struct xe_engine *e) +{ + struct xe_gpu_scheduler *sched = &e->guc->sched; + + if (!engine_killed_or_banned(e)) { + int i; + + trace_xe_engine_resubmit(e); + for (i = 0; i < e->width; ++i) + xe_lrc_set_ring_head(e->lrc + i, e->lrc[i].ring.tail); + xe_sched_resubmit_jobs(sched); + } + + xe_sched_submission_start(sched); +} + +int xe_guc_submit_start(struct xe_guc *guc) +{ + struct xe_engine *e; + unsigned long index; + + XE_BUG_ON(guc_read_stopped(guc) != 1); + + mutex_lock(&guc->submission_state.lock); + atomic_dec(&guc->submission_state.stopped); + xa_for_each(&guc->submission_state.engine_lookup, index, e) + guc_engine_start(e); + mutex_unlock(&guc->submission_state.lock); + + wake_up_all(&guc->ct.wq); + + return 0; +} + +static struct xe_engine * +g2h_engine_lookup(struct xe_guc *guc, u32 guc_id) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_engine *e; + + if (unlikely(guc_id >= GUC_ID_MAX)) { + drm_err(&xe->drm, "Invalid guc_id %u", guc_id); + return NULL; + } + + e = xa_load(&guc->submission_state.engine_lookup, guc_id); + if (unlikely(!e)) { + drm_err(&xe->drm, "Not engine present for guc_id %u", guc_id); + return NULL; + } + + XE_BUG_ON(e->guc->id != guc_id); + + return e; +} + +static void deregister_engine(struct xe_guc *guc, struct xe_engine *e) +{ + u32 action[] = { + XE_GUC_ACTION_DEREGISTER_CONTEXT, + e->guc->id, + }; + + trace_xe_engine_deregister(e); + + xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action)); +} + +int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_engine *e; + u32 guc_id = msg[0]; + + if (unlikely(len < 2)) { + drm_err(&xe->drm, "Invalid length %u", len); + return -EPROTO; + } + + e = g2h_engine_lookup(guc, guc_id); + if (unlikely(!e)) + return -EPROTO; + + if (unlikely(!engine_pending_enable(e) && + !engine_pending_disable(e))) { + drm_err(&xe->drm, "Unexpected engine state 0x%04x", + atomic_read(&e->guc->state)); + return -EPROTO; + } + + trace_xe_engine_scheduling_done(e); + + if (engine_pending_enable(e)) { + e->guc->resume_time = ktime_get(); + clear_engine_pending_enable(e); + smp_wmb(); + wake_up_all(&guc->ct.wq); + } else { + clear_engine_pending_disable(e); + if (e->guc->suspend_pending) { + suspend_fence_signal(e); + } else { + if (engine_banned(e)) { + smp_wmb(); + wake_up_all(&guc->ct.wq); + } + deregister_engine(guc, e); + } + } + + return 0; +} + +int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_engine *e; + u32 guc_id = msg[0]; + + if (unlikely(len < 1)) { + drm_err(&xe->drm, "Invalid length %u", len); + return -EPROTO; + } + + e = g2h_engine_lookup(guc, guc_id); + if (unlikely(!e)) + return -EPROTO; + + if (!engine_destroyed(e) || engine_pending_disable(e) || + engine_pending_enable(e) || engine_enabled(e)) { + drm_err(&xe->drm, "Unexpected engine state 0x%04x", + atomic_read(&e->guc->state)); + return -EPROTO; + } + + trace_xe_engine_deregister_done(e); + + clear_engine_registered(e); + if (engine_banned(e)) + xe_engine_put(e); + else + __guc_engine_fini(guc, e); + + return 0; +} + +int xe_guc_engine_reset_handler(struct xe_guc *guc, u32 *msg, u32 len) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_engine *e; + u32 guc_id = msg[0]; + + if (unlikely(len < 1)) { + drm_err(&xe->drm, "Invalid length %u", len); + return -EPROTO; + } + + e = g2h_engine_lookup(guc, guc_id); + if (unlikely(!e)) + return -EPROTO; + + drm_info(&xe->drm, "Engine reset: guc_id=%d", guc_id); + + /* FIXME: Do error capture, most likely async */ + + trace_xe_engine_reset(e); + + /* + * A banned engine is a NOP at this point (came from + * guc_engine_timedout_job). Otherwise, kick drm scheduler to cancel + * jobs by setting timeout of the job to the minimum value kicking + * guc_engine_timedout_job. + */ + set_engine_reset(e); + if (!engine_banned(e)) + xe_sched_tdr_queue_imm(&e->guc->sched); + + return 0; +} + +int xe_guc_engine_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, + u32 len) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_engine *e; + u32 guc_id = msg[0]; + + if (unlikely(len < 1)) { + drm_err(&xe->drm, "Invalid length %u", len); + return -EPROTO; + } + + e = g2h_engine_lookup(guc, guc_id); + if (unlikely(!e)) + return -EPROTO; + + drm_warn(&xe->drm, "Engine memory cat error: guc_id=%d", guc_id); + trace_xe_engine_memory_cat_error(e); + + /* Treat the same as engine reset */ + set_engine_reset(e); + if (!engine_banned(e)) + xe_sched_tdr_queue_imm(&e->guc->sched); + + return 0; +} + +int xe_guc_engine_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len) +{ + struct xe_device *xe = guc_to_xe(guc); + u8 guc_class, instance; + u32 reason; + + if (unlikely(len != 3)) { + drm_err(&xe->drm, "Invalid length %u", len); + return -EPROTO; + } + + guc_class = msg[0]; + instance = msg[1]; + reason = msg[2]; + + /* Unexpected failure of a hardware feature, log an actual error */ + drm_err(&xe->drm, "GuC engine reset request failed on %d:%d because 0x%08X", + guc_class, instance, reason); + + xe_gt_reset_async(guc_to_gt(guc)); + + return 0; +} + +static void guc_engine_wq_print(struct xe_engine *e, struct drm_printer *p) +{ + struct xe_guc *guc = engine_to_guc(e); + struct xe_device *xe = guc_to_xe(guc); + struct iosys_map map = xe_lrc_parallel_map(e->lrc); + int i; + + drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n", + e->guc->wqi_head, parallel_read(xe, map, wq_desc.head)); + drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n", + e->guc->wqi_tail, parallel_read(xe, map, wq_desc.tail)); + drm_printf(p, "\tWQ status: %u\n", + parallel_read(xe, map, wq_desc.wq_status)); + if (parallel_read(xe, map, wq_desc.head) != + parallel_read(xe, map, wq_desc.tail)) { + for (i = parallel_read(xe, map, wq_desc.head); + i != parallel_read(xe, map, wq_desc.tail); + i = (i + sizeof(u32)) % WQ_SIZE) + drm_printf(p, "\tWQ[%ld]: 0x%08x\n", i / sizeof(u32), + parallel_read(xe, map, wq[i / sizeof(u32)])); + } +} + +static void guc_engine_print(struct xe_engine *e, struct drm_printer *p) +{ + struct xe_gpu_scheduler *sched = &e->guc->sched; + struct xe_sched_job *job; + int i; + + drm_printf(p, "\nGuC ID: %d\n", e->guc->id); + drm_printf(p, "\tName: %s\n", e->name); + drm_printf(p, "\tClass: %d\n", e->class); + drm_printf(p, "\tLogical mask: 0x%x\n", e->logical_mask); + drm_printf(p, "\tWidth: %d\n", e->width); + drm_printf(p, "\tRef: %d\n", kref_read(&e->refcount)); + drm_printf(p, "\tTimeout: %ld (ms)\n", sched->base.timeout); + drm_printf(p, "\tTimeslice: %u (us)\n", e->sched_props.timeslice_us); + drm_printf(p, "\tPreempt timeout: %u (us)\n", + e->sched_props.preempt_timeout_us); + for (i = 0; i < e->width; ++i ) { + struct xe_lrc *lrc = e->lrc + i; + + drm_printf(p, "\tHW Context Desc: 0x%08x\n", + lower_32_bits(xe_lrc_ggtt_addr(lrc))); + drm_printf(p, "\tLRC Head: (memory) %u\n", + xe_lrc_ring_head(lrc)); + drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n", + lrc->ring.tail, + xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL)); + drm_printf(p, "\tStart seqno: (memory) %d\n", + xe_lrc_start_seqno(lrc)); + drm_printf(p, "\tSeqno: (memory) %d\n", xe_lrc_seqno(lrc)); + } + drm_printf(p, "\tSchedule State: 0x%x\n", atomic_read(&e->guc->state)); + drm_printf(p, "\tFlags: 0x%lx\n", e->flags); + if (xe_engine_is_parallel(e)) + guc_engine_wq_print(e, p); + + spin_lock(&sched->base.job_list_lock); + list_for_each_entry(job, &sched->base.pending_list, drm.list) + drm_printf(p, "\tJob: seqno=%d, fence=%d, finished=%d\n", + xe_sched_job_seqno(job), + dma_fence_is_signaled(job->fence) ? 1 : 0, + dma_fence_is_signaled(&job->drm.s_fence->finished) ? + 1 : 0); + spin_unlock(&sched->base.job_list_lock); +} + +void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p) +{ + struct xe_engine *e; + unsigned long index; + + if (!xe_device_guc_submission_enabled(guc_to_xe(guc))) + return; + + mutex_lock(&guc->submission_state.lock); + xa_for_each(&guc->submission_state.engine_lookup, index, e) + guc_engine_print(e, p); + mutex_unlock(&guc->submission_state.lock); +} diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h new file mode 100644 index 000000000000..8002734d6f24 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_submit.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_SUBMIT_H_ +#define _XE_GUC_SUBMIT_H_ + +#include + +struct drm_printer; +struct xe_engine; +struct xe_guc; + +int xe_guc_submit_init(struct xe_guc *guc); +void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p); + +int xe_guc_submit_reset_prepare(struct xe_guc *guc); +void xe_guc_submit_reset_wait(struct xe_guc *guc); +int xe_guc_submit_stop(struct xe_guc *guc); +int xe_guc_submit_start(struct xe_guc *guc); + +int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len); +int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len); +int xe_guc_engine_reset_handler(struct xe_guc *guc, u32 *msg, u32 len); +int xe_guc_engine_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, + u32 len); +int xe_guc_engine_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len); + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h new file mode 100644 index 000000000000..ca177853cc12 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_types.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_TYPES_H_ +#define _XE_GUC_TYPES_H_ + +#include +#include + +#include "xe_guc_ads_types.h" +#include "xe_guc_ct_types.h" +#include "xe_guc_fwif.h" +#include "xe_guc_log_types.h" +#include "xe_guc_pc_types.h" +#include "xe_uc_fw_types.h" + +/** + * struct xe_guc - Graphic micro controller + */ +struct xe_guc { + /** @fw: Generic uC firmware management */ + struct xe_uc_fw fw; + /** @log: GuC log */ + struct xe_guc_log log; + /** @ads: GuC ads */ + struct xe_guc_ads ads; + /** @ct: GuC ct */ + struct xe_guc_ct ct; + /** @pc: GuC Power Conservation */ + struct xe_guc_pc pc; + /** @submission_state: GuC submission state */ + struct { + /** @engine_lookup: Lookup an xe_engine from guc_id */ + struct xarray engine_lookup; + /** @guc_ids: used to allocate new guc_ids, single-lrc */ + struct ida guc_ids; + /** @guc_ids_bitmap: used to allocate new guc_ids, multi-lrc */ + unsigned long *guc_ids_bitmap; + /** @stopped: submissions are stopped */ + atomic_t stopped; + /** @lock: protects submission state */ + struct mutex lock; + /** @suspend: suspend fence state */ + struct { + /** @lock: suspend fences lock */ + spinlock_t lock; + /** @context: suspend fences context */ + u64 context; + /** @seqno: suspend fences seqno */ + u32 seqno; + } suspend; + } submission_state; + /** @hwconfig: Hardware config state */ + struct { + /** @bo: buffer object of the hardware config */ + struct xe_bo *bo; + /** @size: size of the hardware config */ + u32 size; + } hwconfig; + + /** + * @notify_reg: Register which is written to notify GuC of H2G messages + */ + u32 notify_reg; + /** @params: Control params for fw initialization */ + u32 params[GUC_CTL_MAX_DWORDS]; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c new file mode 100644 index 000000000000..93b22fac6e14 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_huc.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_force_wake.h" +#include "xe_gt.h" +#include "xe_guc.h" +#include "xe_guc_reg.h" +#include "xe_huc.h" +#include "xe_mmio.h" +#include "xe_uc_fw.h" + +static struct xe_gt * +huc_to_gt(struct xe_huc *huc) +{ + return container_of(huc, struct xe_gt, uc.huc); +} + +static struct xe_device * +huc_to_xe(struct xe_huc *huc) +{ + return gt_to_xe(huc_to_gt(huc)); +} + +static struct xe_guc * +huc_to_guc(struct xe_huc *huc) +{ + return &container_of(huc, struct xe_uc, huc)->guc; +} + +int xe_huc_init(struct xe_huc *huc) +{ + struct xe_device *xe = huc_to_xe(huc); + int ret; + + huc->fw.type = XE_UC_FW_TYPE_HUC; + ret = xe_uc_fw_init(&huc->fw); + if (ret) + goto out; + + xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_LOADABLE); + + return 0; + +out: + if (xe_uc_fw_is_disabled(&huc->fw)) { + drm_info(&xe->drm, "HuC disabled\n"); + return 0; + } + drm_err(&xe->drm, "HuC init failed with %d", ret); + return ret; +} + +int xe_huc_upload(struct xe_huc *huc) +{ + if (xe_uc_fw_is_disabled(&huc->fw)) + return 0; + return xe_uc_fw_upload(&huc->fw, 0, HUC_UKERNEL); +} + +int xe_huc_auth(struct xe_huc *huc) +{ + struct xe_device *xe = huc_to_xe(huc); + struct xe_gt *gt = huc_to_gt(huc); + struct xe_guc *guc = huc_to_guc(huc); + int ret; + if (xe_uc_fw_is_disabled(&huc->fw)) + return 0; + + XE_BUG_ON(xe_uc_fw_is_running(&huc->fw)); + + if (!xe_uc_fw_is_loaded(&huc->fw)) + return -ENOEXEC; + + ret = xe_guc_auth_huc(guc, xe_bo_ggtt_addr(huc->fw.bo) + + xe_uc_fw_rsa_offset(&huc->fw)); + if (ret) { + drm_err(&xe->drm, "HuC: GuC did not ack Auth request %d\n", + ret); + goto fail; + } + + ret = xe_mmio_wait32(gt, GEN11_HUC_KERNEL_LOAD_INFO.reg, + HUC_LOAD_SUCCESSFUL, + HUC_LOAD_SUCCESSFUL, 100); + if (ret) { + drm_err(&xe->drm, "HuC: Firmware not verified %d\n", ret); + goto fail; + } + + xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_RUNNING); + drm_dbg(&xe->drm, "HuC authenticated\n"); + + return 0; + +fail: + drm_err(&xe->drm, "HuC authentication failed %d\n", ret); + xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_LOAD_FAIL); + + return ret; +} + +void xe_huc_sanitize(struct xe_huc *huc) +{ + if (xe_uc_fw_is_disabled(&huc->fw)) + return; + xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_LOADABLE); +} + +void xe_huc_print_info(struct xe_huc *huc, struct drm_printer *p) +{ + struct xe_gt *gt = huc_to_gt(huc); + int err; + + xe_uc_fw_print(&huc->fw, p); + + if (xe_uc_fw_is_disabled(&huc->fw)) + return; + + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (err) + return; + + drm_printf(p, "\nHuC status: 0x%08x\n", + xe_mmio_read32(gt, GEN11_HUC_KERNEL_LOAD_INFO.reg)); + + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); +} diff --git a/drivers/gpu/drm/xe/xe_huc.h b/drivers/gpu/drm/xe/xe_huc.h new file mode 100644 index 000000000000..5802c43b6ce2 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_huc.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_HUC_H_ +#define _XE_HUC_H_ + +#include "xe_huc_types.h" + +struct drm_printer; + +int xe_huc_init(struct xe_huc *huc); +int xe_huc_upload(struct xe_huc *huc); +int xe_huc_auth(struct xe_huc *huc); +void xe_huc_sanitize(struct xe_huc *huc); +void xe_huc_print_info(struct xe_huc *huc, struct drm_printer *p); + +#endif diff --git a/drivers/gpu/drm/xe/xe_huc_debugfs.c b/drivers/gpu/drm/xe/xe_huc_debugfs.c new file mode 100644 index 000000000000..268bac36336a --- /dev/null +++ b/drivers/gpu/drm/xe/xe_huc_debugfs.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include +#include + +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_huc.h" +#include "xe_huc_debugfs.h" +#include "xe_macros.h" + +static struct xe_gt * +huc_to_gt(struct xe_huc *huc) +{ + return container_of(huc, struct xe_gt, uc.huc); +} + +static struct xe_device * +huc_to_xe(struct xe_huc *huc) +{ + return gt_to_xe(huc_to_gt(huc)); +} + +static struct xe_huc *node_to_huc(struct drm_info_node *node) +{ + return node->info_ent->data; +} + +static int huc_info(struct seq_file *m, void *data) +{ + struct xe_huc *huc = node_to_huc(m->private); + struct xe_device *xe = huc_to_xe(huc); + struct drm_printer p = drm_seq_file_printer(m); + + xe_device_mem_access_get(xe); + xe_huc_print_info(huc, &p); + xe_device_mem_access_put(xe); + + return 0; +} + +static const struct drm_info_list debugfs_list[] = { + {"huc_info", huc_info, 0}, +}; + +void xe_huc_debugfs_register(struct xe_huc *huc, struct dentry *parent) +{ + struct drm_minor *minor = huc_to_xe(huc)->drm.primary; + struct drm_info_list *local; + int i; + +#define DEBUGFS_SIZE ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list) + local = drmm_kmalloc(&huc_to_xe(huc)->drm, DEBUGFS_SIZE, GFP_KERNEL); + if (!local) { + XE_WARN_ON("Couldn't allocate memory"); + return; + } + + memcpy(local, debugfs_list, DEBUGFS_SIZE); +#undef DEBUGFS_SIZE + + for (i = 0; i < ARRAY_SIZE(debugfs_list); ++i) + local[i].data = huc; + + drm_debugfs_create_files(local, + ARRAY_SIZE(debugfs_list), + parent, minor); +} diff --git a/drivers/gpu/drm/xe/xe_huc_debugfs.h b/drivers/gpu/drm/xe/xe_huc_debugfs.h new file mode 100644 index 000000000000..ec58f1818804 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_huc_debugfs.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_HUC_DEBUGFS_H_ +#define _XE_HUC_DEBUGFS_H_ + +struct dentry; +struct xe_huc; + +void xe_huc_debugfs_register(struct xe_huc *huc, struct dentry *parent); + +#endif diff --git a/drivers/gpu/drm/xe/xe_huc_types.h b/drivers/gpu/drm/xe/xe_huc_types.h new file mode 100644 index 000000000000..cae6d19097df --- /dev/null +++ b/drivers/gpu/drm/xe/xe_huc_types.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_HUC_TYPES_H_ +#define _XE_HUC_TYPES_H_ + +#include "xe_uc_fw_types.h" + +/** + * struct xe_huc - HuC + */ +struct xe_huc { + /** @fw: Generic uC firmware management */ + struct xe_uc_fw fw; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c new file mode 100644 index 000000000000..fd89dd90131c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -0,0 +1,658 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "xe_hw_engine.h" + +#include + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_execlist.h" +#include "xe_force_wake.h" +#include "xe_gt.h" +#include "xe_gt_topology.h" +#include "xe_hw_fence.h" +#include "xe_lrc.h" +#include "xe_macros.h" +#include "xe_mmio.h" +#include "xe_reg_sr.h" +#include "xe_sched_job.h" +#include "xe_wa.h" + +#include "gt/intel_engine_regs.h" +#include "i915_reg.h" +#include "gt/intel_gt_regs.h" + +#define MAX_MMIO_BASES 3 +struct engine_info { + const char *name; + unsigned int class : 8; + unsigned int instance : 8; + enum xe_force_wake_domains domain; + /* mmio bases table *must* be sorted in reverse graphics_ver order */ + struct engine_mmio_base { + unsigned int graphics_ver : 8; + unsigned int base : 24; + } mmio_bases[MAX_MMIO_BASES]; +}; + +static const struct engine_info engine_infos[] = { + [XE_HW_ENGINE_RCS0] = { + .name = "rcs0", + .class = XE_ENGINE_CLASS_RENDER, + .instance = 0, + .domain = XE_FW_RENDER, + .mmio_bases = { + { .graphics_ver = 1, .base = RENDER_RING_BASE } + }, + }, + [XE_HW_ENGINE_BCS0] = { + .name = "bcs0", + .class = XE_ENGINE_CLASS_COPY, + .instance = 0, + .domain = XE_FW_RENDER, + .mmio_bases = { + { .graphics_ver = 6, .base = BLT_RING_BASE } + }, + }, + [XE_HW_ENGINE_BCS1] = { + .name = "bcs1", + .class = XE_ENGINE_CLASS_COPY, + .instance = 1, + .domain = XE_FW_RENDER, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHPC_BCS1_RING_BASE } + }, + }, + [XE_HW_ENGINE_BCS2] = { + .name = "bcs2", + .class = XE_ENGINE_CLASS_COPY, + .instance = 2, + .domain = XE_FW_RENDER, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHPC_BCS2_RING_BASE } + }, + }, + [XE_HW_ENGINE_BCS3] = { + .name = "bcs3", + .class = XE_ENGINE_CLASS_COPY, + .instance = 3, + .domain = XE_FW_RENDER, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHPC_BCS3_RING_BASE } + }, + }, + [XE_HW_ENGINE_BCS4] = { + .name = "bcs4", + .class = XE_ENGINE_CLASS_COPY, + .instance = 4, + .domain = XE_FW_RENDER, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHPC_BCS4_RING_BASE } + }, + }, + [XE_HW_ENGINE_BCS5] = { + .name = "bcs5", + .class = XE_ENGINE_CLASS_COPY, + .instance = 5, + .domain = XE_FW_RENDER, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHPC_BCS5_RING_BASE } + }, + }, + [XE_HW_ENGINE_BCS6] = { + .name = "bcs6", + .class = XE_ENGINE_CLASS_COPY, + .instance = 6, + .domain = XE_FW_RENDER, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHPC_BCS6_RING_BASE } + }, + }, + [XE_HW_ENGINE_BCS7] = { + .name = "bcs7", + .class = XE_ENGINE_CLASS_COPY, + .instance = 7, + .domain = XE_FW_RENDER, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHPC_BCS7_RING_BASE } + }, + }, + [XE_HW_ENGINE_BCS8] = { + .name = "bcs8", + .class = XE_ENGINE_CLASS_COPY, + .instance = 8, + .domain = XE_FW_RENDER, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHPC_BCS8_RING_BASE } + }, + }, + + [XE_HW_ENGINE_VCS0] = { + .name = "vcs0", + .class = XE_ENGINE_CLASS_VIDEO_DECODE, + .instance = 0, + .domain = XE_FW_MEDIA_VDBOX0, + .mmio_bases = { + { .graphics_ver = 11, .base = GEN11_BSD_RING_BASE }, + { .graphics_ver = 6, .base = GEN6_BSD_RING_BASE }, + { .graphics_ver = 4, .base = BSD_RING_BASE } + }, + }, + [XE_HW_ENGINE_VCS1] = { + .name = "vcs1", + .class = XE_ENGINE_CLASS_VIDEO_DECODE, + .instance = 1, + .domain = XE_FW_MEDIA_VDBOX1, + .mmio_bases = { + { .graphics_ver = 11, .base = GEN11_BSD2_RING_BASE }, + { .graphics_ver = 8, .base = GEN8_BSD2_RING_BASE } + }, + }, + [XE_HW_ENGINE_VCS2] = { + .name = "vcs2", + .class = XE_ENGINE_CLASS_VIDEO_DECODE, + .instance = 2, + .domain = XE_FW_MEDIA_VDBOX2, + .mmio_bases = { + { .graphics_ver = 11, .base = GEN11_BSD3_RING_BASE } + }, + }, + [XE_HW_ENGINE_VCS3] = { + .name = "vcs3", + .class = XE_ENGINE_CLASS_VIDEO_DECODE, + .instance = 3, + .domain = XE_FW_MEDIA_VDBOX3, + .mmio_bases = { + { .graphics_ver = 11, .base = GEN11_BSD4_RING_BASE } + }, + }, + [XE_HW_ENGINE_VCS4] = { + .name = "vcs4", + .class = XE_ENGINE_CLASS_VIDEO_DECODE, + .instance = 4, + .domain = XE_FW_MEDIA_VDBOX4, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHP_BSD5_RING_BASE } + }, + }, + [XE_HW_ENGINE_VCS5] = { + .name = "vcs5", + .class = XE_ENGINE_CLASS_VIDEO_DECODE, + .instance = 5, + .domain = XE_FW_MEDIA_VDBOX5, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHP_BSD6_RING_BASE } + }, + }, + [XE_HW_ENGINE_VCS6] = { + .name = "vcs6", + .class = XE_ENGINE_CLASS_VIDEO_DECODE, + .instance = 6, + .domain = XE_FW_MEDIA_VDBOX6, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHP_BSD7_RING_BASE } + }, + }, + [XE_HW_ENGINE_VCS7] = { + .name = "vcs7", + .class = XE_ENGINE_CLASS_VIDEO_DECODE, + .instance = 7, + .domain = XE_FW_MEDIA_VDBOX7, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHP_BSD8_RING_BASE } + }, + }, + [XE_HW_ENGINE_VECS0] = { + .name = "vecs0", + .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, + .instance = 0, + .domain = XE_FW_MEDIA_VEBOX0, + .mmio_bases = { + { .graphics_ver = 11, .base = GEN11_VEBOX_RING_BASE }, + { .graphics_ver = 7, .base = VEBOX_RING_BASE } + }, + }, + [XE_HW_ENGINE_VECS1] = { + .name = "vecs1", + .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, + .instance = 1, + .domain = XE_FW_MEDIA_VEBOX1, + .mmio_bases = { + { .graphics_ver = 11, .base = GEN11_VEBOX2_RING_BASE } + }, + }, + [XE_HW_ENGINE_VECS2] = { + .name = "vecs2", + .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, + .instance = 2, + .domain = XE_FW_MEDIA_VEBOX2, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHP_VEBOX3_RING_BASE } + }, + }, + [XE_HW_ENGINE_VECS3] = { + .name = "vecs3", + .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, + .instance = 3, + .domain = XE_FW_MEDIA_VEBOX3, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHP_VEBOX4_RING_BASE } + }, + }, + [XE_HW_ENGINE_CCS0] = { + .name = "ccs0", + .class = XE_ENGINE_CLASS_COMPUTE, + .instance = 0, + .domain = XE_FW_RENDER, + .mmio_bases = { + { .graphics_ver = 12, .base = GEN12_COMPUTE0_RING_BASE }, + }, + }, + [XE_HW_ENGINE_CCS1] = { + .name = "ccs1", + .class = XE_ENGINE_CLASS_COMPUTE, + .instance = 1, + .domain = XE_FW_RENDER, + .mmio_bases = { + { .graphics_ver = 12, .base = GEN12_COMPUTE1_RING_BASE }, + }, + }, + [XE_HW_ENGINE_CCS2] = { + .name = "ccs2", + .class = XE_ENGINE_CLASS_COMPUTE, + .instance = 2, + .domain = XE_FW_RENDER, + .mmio_bases = { + { .graphics_ver = 12, .base = GEN12_COMPUTE2_RING_BASE }, + }, + }, + [XE_HW_ENGINE_CCS3] = { + .name = "ccs3", + .class = XE_ENGINE_CLASS_COMPUTE, + .instance = 3, + .domain = XE_FW_RENDER, + .mmio_bases = { + { .graphics_ver = 12, .base = GEN12_COMPUTE3_RING_BASE }, + }, + }, +}; + +static u32 engine_info_mmio_base(const struct engine_info *info, + unsigned int graphics_ver) +{ + int i; + + for (i = 0; i < MAX_MMIO_BASES; i++) + if (graphics_ver >= info->mmio_bases[i].graphics_ver) + break; + + XE_BUG_ON(i == MAX_MMIO_BASES); + XE_BUG_ON(!info->mmio_bases[i].base); + + return info->mmio_bases[i].base; +} + +static void hw_engine_fini(struct drm_device *drm, void *arg) +{ + struct xe_hw_engine *hwe = arg; + + if (hwe->exl_port) + xe_execlist_port_destroy(hwe->exl_port); + xe_lrc_finish(&hwe->kernel_lrc); + + xe_bo_unpin_map_no_vm(hwe->hwsp); + + hwe->gt = NULL; +} + +static void hw_engine_mmio_write32(struct xe_hw_engine *hwe, u32 reg, u32 val) +{ + XE_BUG_ON(reg & hwe->mmio_base); + xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain); + + xe_mmio_write32(hwe->gt, reg + hwe->mmio_base, val); +} + +static u32 hw_engine_mmio_read32(struct xe_hw_engine *hwe, u32 reg) +{ + XE_BUG_ON(reg & hwe->mmio_base); + xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain); + + return xe_mmio_read32(hwe->gt, reg + hwe->mmio_base); +} + +void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe) +{ + u32 ccs_mask = + xe_hw_engine_mask_per_class(hwe->gt, XE_ENGINE_CLASS_COMPUTE); + + if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask & BIT(0)) + xe_mmio_write32(hwe->gt, GEN12_RCU_MODE.reg, + _MASKED_BIT_ENABLE(GEN12_RCU_MODE_CCS_ENABLE)); + + hw_engine_mmio_write32(hwe, RING_HWSTAM(0).reg, ~0x0); + hw_engine_mmio_write32(hwe, RING_HWS_PGA(0).reg, + xe_bo_ggtt_addr(hwe->hwsp)); + hw_engine_mmio_write32(hwe, RING_MODE_GEN7(0).reg, + _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE)); + hw_engine_mmio_write32(hwe, RING_MI_MODE(0).reg, + _MASKED_BIT_DISABLE(STOP_RING)); + hw_engine_mmio_read32(hwe, RING_MI_MODE(0).reg); +} + +static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe, + enum xe_hw_engine_id id) +{ + struct xe_device *xe = gt_to_xe(gt); + const struct engine_info *info; + + if (WARN_ON(id >= ARRAY_SIZE(engine_infos) || !engine_infos[id].name)) + return; + + if (!(gt->info.engine_mask & BIT(id))) + return; + + info = &engine_infos[id]; + + XE_BUG_ON(hwe->gt); + + hwe->gt = gt; + hwe->class = info->class; + hwe->instance = info->instance; + hwe->mmio_base = engine_info_mmio_base(info, GRAPHICS_VER(xe)); + hwe->domain = info->domain; + hwe->name = info->name; + hwe->fence_irq = >->fence_irq[info->class]; + hwe->engine_id = id; + + xe_reg_sr_init(&hwe->reg_sr, hwe->name, gt_to_xe(gt)); + xe_wa_process_engine(hwe); + + xe_reg_sr_init(&hwe->reg_whitelist, hwe->name, gt_to_xe(gt)); + xe_reg_whitelist_process_engine(hwe); +} + +static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe, + enum xe_hw_engine_id id) +{ + struct xe_device *xe = gt_to_xe(gt); + int err; + + XE_BUG_ON(id >= ARRAY_SIZE(engine_infos) || !engine_infos[id].name); + XE_BUG_ON(!(gt->info.engine_mask & BIT(id))); + + xe_reg_sr_apply_mmio(&hwe->reg_sr, gt); + xe_reg_sr_apply_whitelist(&hwe->reg_whitelist, hwe->mmio_base, gt); + + hwe->hwsp = xe_bo_create_locked(xe, gt, NULL, SZ_4K, ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(gt) | + XE_BO_CREATE_GGTT_BIT); + if (IS_ERR(hwe->hwsp)) { + err = PTR_ERR(hwe->hwsp); + goto err_name; + } + + err = xe_bo_pin(hwe->hwsp); + if (err) + goto err_unlock_put_hwsp; + + err = xe_bo_vmap(hwe->hwsp); + if (err) + goto err_unpin_hwsp; + + xe_bo_unlock_no_vm(hwe->hwsp); + + err = xe_lrc_init(&hwe->kernel_lrc, hwe, NULL, NULL, SZ_16K); + if (err) + goto err_hwsp; + + if (!xe_device_guc_submission_enabled(xe)) { + hwe->exl_port = xe_execlist_port_create(xe, hwe); + if (IS_ERR(hwe->exl_port)) { + err = PTR_ERR(hwe->exl_port); + goto err_kernel_lrc; + } + } + + if (xe_device_guc_submission_enabled(xe)) + xe_hw_engine_enable_ring(hwe); + + /* We reserve the highest BCS instance for USM */ + if (xe->info.supports_usm && hwe->class == XE_ENGINE_CLASS_COPY) + gt->usm.reserved_bcs_instance = hwe->instance; + + err = drmm_add_action_or_reset(&xe->drm, hw_engine_fini, hwe); + if (err) + return err; + + return 0; + +err_unpin_hwsp: + xe_bo_unpin(hwe->hwsp); +err_unlock_put_hwsp: + xe_bo_unlock_no_vm(hwe->hwsp); + xe_bo_put(hwe->hwsp); +err_kernel_lrc: + xe_lrc_finish(&hwe->kernel_lrc); +err_hwsp: + xe_bo_put(hwe->hwsp); +err_name: + hwe->name = NULL; + + return err; +} + +static void hw_engine_setup_logical_mapping(struct xe_gt *gt) +{ + int class; + + /* FIXME: Doing a simple logical mapping that works for most hardware */ + for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) { + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + int logical_instance = 0; + + for_each_hw_engine(hwe, gt, id) + if (hwe->class == class) + hwe->logical_instance = logical_instance++; + } +} + +static void read_fuses(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + u32 media_fuse; + u16 vdbox_mask; + u16 vebox_mask; + u32 bcs_mask; + int i, j; + + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); + + /* + * FIXME: Hack job, thinking we should have table of vfuncs for each + * class which picks the correct vfunc based on IP version. + */ + + media_fuse = xe_mmio_read32(gt, GEN11_GT_VEBOX_VDBOX_DISABLE.reg); + if (GRAPHICS_VERx100(xe) < 1250) + media_fuse = ~media_fuse; + + vdbox_mask = media_fuse & GEN11_GT_VDBOX_DISABLE_MASK; + vebox_mask = (media_fuse & GEN11_GT_VEBOX_DISABLE_MASK) >> + GEN11_GT_VEBOX_DISABLE_SHIFT; + + for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) { + if (!(gt->info.engine_mask & BIT(i))) + continue; + + if (!(BIT(j) & vdbox_mask)) { + gt->info.engine_mask &= ~BIT(i); + drm_info(&xe->drm, "vcs%u fused off\n", j); + } + } + + for (i = XE_HW_ENGINE_VECS0, j = 0; i <= XE_HW_ENGINE_VECS3; ++i, ++j) { + if (!(gt->info.engine_mask & BIT(i))) + continue; + + if (!(BIT(j) & vebox_mask)) { + gt->info.engine_mask &= ~BIT(i); + drm_info(&xe->drm, "vecs%u fused off\n", j); + } + } + + bcs_mask = xe_mmio_read32(gt, GEN10_MIRROR_FUSE3.reg); + bcs_mask = REG_FIELD_GET(GEN12_MEML3_EN_MASK, bcs_mask); + + for (i = XE_HW_ENGINE_BCS1, j = 0; i <= XE_HW_ENGINE_BCS8; ++i, ++j) { + if (!(gt->info.engine_mask & BIT(i))) + continue; + + if (!(BIT(j/2) & bcs_mask)) { + gt->info.engine_mask &= ~BIT(i); + drm_info(&xe->drm, "bcs%u fused off\n", j); + } + } + + /* TODO: compute engines */ +} + +int xe_hw_engines_init_early(struct xe_gt *gt) +{ + int i; + + read_fuses(gt); + + for (i = 0; i < ARRAY_SIZE(gt->hw_engines); i++) + hw_engine_init_early(gt, >->hw_engines[i], i); + + return 0; +} + +int xe_hw_engines_init(struct xe_gt *gt) +{ + int err; + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + + for_each_hw_engine(hwe, gt, id) { + err = hw_engine_init(gt, hwe, id); + if (err) + return err; + } + + hw_engine_setup_logical_mapping(gt); + + return 0; +} + +void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec) +{ + wake_up_all(>_to_xe(hwe->gt)->ufence_wq); + + if (hwe->irq_handler) + hwe->irq_handler(hwe, intr_vec); + + if (intr_vec & GT_RENDER_USER_INTERRUPT) + xe_hw_fence_irq_run(hwe->fence_irq); +} + +void xe_hw_engine_print_state(struct xe_hw_engine *hwe, struct drm_printer *p) +{ + if (!xe_hw_engine_is_valid(hwe)) + return; + + drm_printf(p, "%s (physical), logical instance=%d\n", hwe->name, + hwe->logical_instance); + drm_printf(p, "\tForcewake: domain 0x%x, ref %d\n", + hwe->domain, + xe_force_wake_ref(gt_to_fw(hwe->gt), hwe->domain)); + drm_printf(p, "\tMMIO base: 0x%08x\n", hwe->mmio_base); + + drm_printf(p, "\tHWSTAM: 0x%08x\n", + hw_engine_mmio_read32(hwe, RING_HWSTAM(0).reg)); + drm_printf(p, "\tRING_HWS_PGA: 0x%08x\n", + hw_engine_mmio_read32(hwe, RING_HWS_PGA(0).reg)); + + drm_printf(p, "\tRING_EXECLIST_STATUS_LO: 0x%08x\n", + hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0).reg)); + drm_printf(p, "\tRING_EXECLIST_STATUS_HI: 0x%08x\n", + hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0).reg)); + drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_LO: 0x%08x\n", + hw_engine_mmio_read32(hwe, + RING_EXECLIST_SQ_CONTENTS(0).reg)); + drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_HI: 0x%08x\n", + hw_engine_mmio_read32(hwe, + RING_EXECLIST_SQ_CONTENTS(0).reg) + 4); + drm_printf(p, "\tRING_EXECLIST_CONTROL: 0x%08x\n", + hw_engine_mmio_read32(hwe, RING_EXECLIST_CONTROL(0).reg)); + + drm_printf(p, "\tRING_START: 0x%08x\n", + hw_engine_mmio_read32(hwe, RING_START(0).reg)); + drm_printf(p, "\tRING_HEAD: 0x%08x\n", + hw_engine_mmio_read32(hwe, RING_HEAD(0).reg) & HEAD_ADDR); + drm_printf(p, "\tRING_TAIL: 0x%08x\n", + hw_engine_mmio_read32(hwe, RING_TAIL(0).reg) & TAIL_ADDR); + drm_printf(p, "\tRING_CTL: 0x%08x\n", + hw_engine_mmio_read32(hwe, RING_CTL(0).reg)); + drm_printf(p, "\tRING_MODE: 0x%08x\n", + hw_engine_mmio_read32(hwe, RING_MI_MODE(0).reg)); + drm_printf(p, "\tRING_MODE_GEN7: 0x%08x\n", + hw_engine_mmio_read32(hwe, RING_MODE_GEN7(0).reg)); + + drm_printf(p, "\tRING_IMR: 0x%08x\n", + hw_engine_mmio_read32(hwe, RING_IMR(0).reg)); + drm_printf(p, "\tRING_ESR: 0x%08x\n", + hw_engine_mmio_read32(hwe, RING_ESR(0).reg)); + drm_printf(p, "\tRING_EMR: 0x%08x\n", + hw_engine_mmio_read32(hwe, RING_EMR(0).reg)); + drm_printf(p, "\tRING_EIR: 0x%08x\n", + hw_engine_mmio_read32(hwe, RING_EIR(0).reg)); + + drm_printf(p, "\tACTHD: 0x%08x_%08x\n", + hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0).reg), + hw_engine_mmio_read32(hwe, RING_ACTHD(0).reg)); + drm_printf(p, "\tBBADDR: 0x%08x_%08x\n", + hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0).reg), + hw_engine_mmio_read32(hwe, RING_BBADDR(0).reg)); + drm_printf(p, "\tDMA_FADDR: 0x%08x_%08x\n", + hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0).reg), + hw_engine_mmio_read32(hwe, RING_DMA_FADD(0).reg)); + + drm_printf(p, "\tIPEIR: 0x%08x\n", + hw_engine_mmio_read32(hwe, IPEIR(0).reg)); + drm_printf(p, "\tIPEHR: 0x%08x\n\n", + hw_engine_mmio_read32(hwe, IPEHR(0).reg)); + + if (hwe->class == XE_ENGINE_CLASS_COMPUTE) + drm_printf(p, "\tGEN12_RCU_MODE: 0x%08x\n", + xe_mmio_read32(hwe->gt, GEN12_RCU_MODE.reg)); + +} + +u32 xe_hw_engine_mask_per_class(struct xe_gt *gt, + enum xe_engine_class engine_class) +{ + u32 mask = 0; + enum xe_hw_engine_id id; + + for (id = 0; id < XE_NUM_HW_ENGINES; ++id) { + if (engine_infos[id].class == engine_class && + gt->info.engine_mask & BIT(id)) + mask |= BIT(engine_infos[id].instance); + } + return mask; +} + +bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe) +{ + struct xe_gt *gt = hwe->gt; + struct xe_device *xe = gt_to_xe(gt); + + return xe->info.supports_usm && hwe->class == XE_ENGINE_CLASS_COPY && + hwe->instance == gt->usm.reserved_bcs_instance; +} diff --git a/drivers/gpu/drm/xe/xe_hw_engine.h b/drivers/gpu/drm/xe/xe_hw_engine.h new file mode 100644 index 000000000000..ceab65397256 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hw_engine.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _XE_HW_ENGINE_H_ +#define _XE_HW_ENGINE_H_ + +#include "xe_hw_engine_types.h" + +struct drm_printer; + +int xe_hw_engines_init_early(struct xe_gt *gt); +int xe_hw_engines_init(struct xe_gt *gt); +void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec); +void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe); +void xe_hw_engine_print_state(struct xe_hw_engine *hwe, struct drm_printer *p); +u32 xe_hw_engine_mask_per_class(struct xe_gt *gt, + enum xe_engine_class engine_class); + +bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe); +static inline bool xe_hw_engine_is_valid(struct xe_hw_engine *hwe) +{ + return hwe->name; +} + +#endif diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h new file mode 100644 index 000000000000..05a2fdc381d7 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h @@ -0,0 +1,107 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_HW_ENGINE_TYPES_H_ +#define _XE_HW_ENGINE_TYPES_H_ + +#include "xe_force_wake_types.h" +#include "xe_lrc_types.h" +#include "xe_reg_sr_types.h" + +/* See "Engine ID Definition" struct in the Icelake PRM */ +enum xe_engine_class { + XE_ENGINE_CLASS_RENDER = 0, + XE_ENGINE_CLASS_VIDEO_DECODE = 1, + XE_ENGINE_CLASS_VIDEO_ENHANCE = 2, + XE_ENGINE_CLASS_COPY = 3, + XE_ENGINE_CLASS_OTHER = 4, + XE_ENGINE_CLASS_COMPUTE = 5, + XE_ENGINE_CLASS_MAX = 6, +}; + +enum xe_hw_engine_id { + XE_HW_ENGINE_RCS0, + XE_HW_ENGINE_BCS0, + XE_HW_ENGINE_BCS1, + XE_HW_ENGINE_BCS2, + XE_HW_ENGINE_BCS3, + XE_HW_ENGINE_BCS4, + XE_HW_ENGINE_BCS5, + XE_HW_ENGINE_BCS6, + XE_HW_ENGINE_BCS7, + XE_HW_ENGINE_BCS8, + XE_HW_ENGINE_VCS0, + XE_HW_ENGINE_VCS1, + XE_HW_ENGINE_VCS2, + XE_HW_ENGINE_VCS3, + XE_HW_ENGINE_VCS4, + XE_HW_ENGINE_VCS5, + XE_HW_ENGINE_VCS6, + XE_HW_ENGINE_VCS7, + XE_HW_ENGINE_VECS0, + XE_HW_ENGINE_VECS1, + XE_HW_ENGINE_VECS2, + XE_HW_ENGINE_VECS3, + XE_HW_ENGINE_CCS0, + XE_HW_ENGINE_CCS1, + XE_HW_ENGINE_CCS2, + XE_HW_ENGINE_CCS3, + XE_NUM_HW_ENGINES, +}; + +/* FIXME: s/XE_HW_ENGINE_MAX_INSTANCE/XE_HW_ENGINE_MAX_COUNT */ +#define XE_HW_ENGINE_MAX_INSTANCE 9 + +struct xe_bo; +struct xe_execlist_port; +struct xe_gt; + +/** + * struct xe_hw_engine - Hardware engine + * + * Contains all the hardware engine state for physical instances. + */ +struct xe_hw_engine { + /** @gt: graphics tile this hw engine belongs to */ + struct xe_gt *gt; + /** @name: name of this hw engine */ + const char *name; + /** @class: class of this hw engine */ + enum xe_engine_class class; + /** @instance: physical instance of this hw engine */ + u16 instance; + /** @logical_instance: logical instance of this hw engine */ + u16 logical_instance; + /** @mmio_base: MMIO base address of this hw engine*/ + u32 mmio_base; + /** + * @reg_sr: table with registers to be restored on GT init/resume/reset + */ + struct xe_reg_sr reg_sr; + /** + * @reg_whitelist: table with registers to be whitelisted + */ + struct xe_reg_sr reg_whitelist; + /** + * @reg_lrc: LRC workaround registers + */ + struct xe_reg_sr reg_lrc; + /** @domain: force wake domain of this hw engine */ + enum xe_force_wake_domains domain; + /** @hwsp: hardware status page buffer object */ + struct xe_bo *hwsp; + /** @kernel_lrc: Kernel LRC (should be replaced /w an xe_engine) */ + struct xe_lrc kernel_lrc; + /** @exl_port: execlists port */ + struct xe_execlist_port *exl_port; + /** @fence_irq: fence IRQ to run when a hw engine IRQ is received */ + struct xe_hw_fence_irq *fence_irq; + /** @irq_handler: IRQ handler to run when hw engine IRQ is received */ + void (*irq_handler)(struct xe_hw_engine *, u16); + /** @engine_id: id for this hw engine */ + enum xe_hw_engine_id engine_id; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c b/drivers/gpu/drm/xe/xe_hw_fence.c new file mode 100644 index 000000000000..e56ca2867545 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hw_fence.c @@ -0,0 +1,230 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "xe_hw_fence.h" + +#include +#include + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_hw_engine.h" +#include "xe_macros.h" +#include "xe_map.h" +#include "xe_trace.h" + +static struct kmem_cache *xe_hw_fence_slab; + +int __init xe_hw_fence_module_init(void) +{ + xe_hw_fence_slab = kmem_cache_create("xe_hw_fence", + sizeof(struct xe_hw_fence), 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!xe_hw_fence_slab) + return -ENOMEM; + + return 0; +} + +void xe_hw_fence_module_exit(void) +{ + rcu_barrier(); + kmem_cache_destroy(xe_hw_fence_slab); +} + +static struct xe_hw_fence *fence_alloc(void) +{ + return kmem_cache_zalloc(xe_hw_fence_slab, GFP_KERNEL); +} + +static void fence_free(struct rcu_head *rcu) +{ + struct xe_hw_fence *fence = + container_of(rcu, struct xe_hw_fence, dma.rcu); + + if (!WARN_ON_ONCE(!fence)) + kmem_cache_free(xe_hw_fence_slab, fence); +} + +static void hw_fence_irq_run_cb(struct irq_work *work) +{ + struct xe_hw_fence_irq *irq = container_of(work, typeof(*irq), work); + struct xe_hw_fence *fence, *next; + bool tmp; + + tmp = dma_fence_begin_signalling(); + spin_lock(&irq->lock); + if (irq->enabled) { + list_for_each_entry_safe(fence, next, &irq->pending, irq_link) { + struct dma_fence *dma_fence = &fence->dma; + + trace_xe_hw_fence_try_signal(fence); + if (dma_fence_is_signaled_locked(dma_fence)) { + trace_xe_hw_fence_signal(fence); + list_del_init(&fence->irq_link); + dma_fence_put(dma_fence); + } + } + } + spin_unlock(&irq->lock); + dma_fence_end_signalling(tmp); +} + +void xe_hw_fence_irq_init(struct xe_hw_fence_irq *irq) +{ + spin_lock_init(&irq->lock); + init_irq_work(&irq->work, hw_fence_irq_run_cb); + INIT_LIST_HEAD(&irq->pending); + irq->enabled = true; +} + +void xe_hw_fence_irq_finish(struct xe_hw_fence_irq *irq) +{ + struct xe_hw_fence *fence, *next; + unsigned long flags; + int err; + bool tmp; + + if (XE_WARN_ON(!list_empty(&irq->pending))) { + tmp = dma_fence_begin_signalling(); + spin_lock_irqsave(&irq->lock, flags); + list_for_each_entry_safe(fence, next, &irq->pending, irq_link) { + list_del_init(&fence->irq_link); + err = dma_fence_signal_locked(&fence->dma); + dma_fence_put(&fence->dma); + XE_WARN_ON(err); + } + spin_unlock_irqrestore(&irq->lock, flags); + dma_fence_end_signalling(tmp); + } +} + +void xe_hw_fence_irq_run(struct xe_hw_fence_irq *irq) +{ + irq_work_queue(&irq->work); +} + +void xe_hw_fence_irq_stop(struct xe_hw_fence_irq *irq) +{ + spin_lock_irq(&irq->lock); + irq->enabled = false; + spin_unlock_irq(&irq->lock); +} + +void xe_hw_fence_irq_start(struct xe_hw_fence_irq *irq) +{ + spin_lock_irq(&irq->lock); + irq->enabled = true; + spin_unlock_irq(&irq->lock); + + irq_work_queue(&irq->work); +} + +void xe_hw_fence_ctx_init(struct xe_hw_fence_ctx *ctx, struct xe_gt *gt, + struct xe_hw_fence_irq *irq, const char *name) +{ + ctx->gt = gt; + ctx->irq = irq; + ctx->dma_fence_ctx = dma_fence_context_alloc(1); + ctx->next_seqno = 1; + sprintf(ctx->name, "%s", name); +} + +void xe_hw_fence_ctx_finish(struct xe_hw_fence_ctx *ctx) +{ +} + +static struct xe_hw_fence *to_xe_hw_fence(struct dma_fence *fence); + +static struct xe_hw_fence_irq *xe_hw_fence_irq(struct xe_hw_fence *fence) +{ + return container_of(fence->dma.lock, struct xe_hw_fence_irq, lock); +} + +static const char *xe_hw_fence_get_driver_name(struct dma_fence *dma_fence) +{ + struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence); + + return dev_name(gt_to_xe(fence->ctx->gt)->drm.dev); +} + +static const char *xe_hw_fence_get_timeline_name(struct dma_fence *dma_fence) +{ + struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence); + + return fence->ctx->name; +} + +static bool xe_hw_fence_signaled(struct dma_fence *dma_fence) +{ + struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence); + struct xe_device *xe = gt_to_xe(fence->ctx->gt); + u32 seqno = xe_map_rd(xe, &fence->seqno_map, 0, u32); + + return dma_fence->error || + (s32)fence->dma.seqno <= (s32)seqno; +} + +static bool xe_hw_fence_enable_signaling(struct dma_fence *dma_fence) +{ + struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence); + struct xe_hw_fence_irq *irq = xe_hw_fence_irq(fence); + + dma_fence_get(dma_fence); + list_add_tail(&fence->irq_link, &irq->pending); + + /* SW completed (no HW IRQ) so kick handler to signal fence */ + if (xe_hw_fence_signaled(dma_fence)) + xe_hw_fence_irq_run(irq); + + return true; +} + +static void xe_hw_fence_release(struct dma_fence *dma_fence) +{ + struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence); + + trace_xe_hw_fence_free(fence); + XE_BUG_ON(!list_empty(&fence->irq_link)); + call_rcu(&dma_fence->rcu, fence_free); +} + +static const struct dma_fence_ops xe_hw_fence_ops = { + .get_driver_name = xe_hw_fence_get_driver_name, + .get_timeline_name = xe_hw_fence_get_timeline_name, + .enable_signaling = xe_hw_fence_enable_signaling, + .signaled = xe_hw_fence_signaled, + .release = xe_hw_fence_release, +}; + +static struct xe_hw_fence *to_xe_hw_fence(struct dma_fence *fence) +{ + if (XE_WARN_ON(fence->ops != &xe_hw_fence_ops)) + return NULL; + + return container_of(fence, struct xe_hw_fence, dma); +} + +struct xe_hw_fence *xe_hw_fence_create(struct xe_hw_fence_ctx *ctx, + struct iosys_map seqno_map) +{ + struct xe_hw_fence *fence; + + fence = fence_alloc(); + if (!fence) + return ERR_PTR(-ENOMEM); + + dma_fence_init(&fence->dma, &xe_hw_fence_ops, &ctx->irq->lock, + ctx->dma_fence_ctx, ctx->next_seqno++); + + fence->ctx = ctx; + fence->seqno_map = seqno_map; + INIT_LIST_HEAD(&fence->irq_link); + + trace_xe_hw_fence_create(fence); + + return fence; +} diff --git a/drivers/gpu/drm/xe/xe_hw_fence.h b/drivers/gpu/drm/xe/xe_hw_fence.h new file mode 100644 index 000000000000..07f202db6526 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hw_fence.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _XE_HW_FENCE_H_ +#define _XE_HW_FENCE_H_ + +#include "xe_hw_fence_types.h" + +int xe_hw_fence_module_init(void); +void xe_hw_fence_module_exit(void); + +void xe_hw_fence_irq_init(struct xe_hw_fence_irq *irq); +void xe_hw_fence_irq_finish(struct xe_hw_fence_irq *irq); +void xe_hw_fence_irq_run(struct xe_hw_fence_irq *irq); +void xe_hw_fence_irq_stop(struct xe_hw_fence_irq *irq); +void xe_hw_fence_irq_start(struct xe_hw_fence_irq *irq); + +void xe_hw_fence_ctx_init(struct xe_hw_fence_ctx *ctx, struct xe_gt *gt, + struct xe_hw_fence_irq *irq, const char *name); +void xe_hw_fence_ctx_finish(struct xe_hw_fence_ctx *ctx); + +struct xe_hw_fence *xe_hw_fence_create(struct xe_hw_fence_ctx *ctx, + struct iosys_map seqno_map); + +#endif diff --git a/drivers/gpu/drm/xe/xe_hw_fence_types.h b/drivers/gpu/drm/xe/xe_hw_fence_types.h new file mode 100644 index 000000000000..a78e50eb3cb8 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hw_fence_types.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_HW_FENCE_TYPES_H_ +#define _XE_HW_FENCE_TYPES_H_ + +#include +#include +#include +#include +#include + +struct xe_gt; + +/** + * struct xe_hw_fence_irq - hardware fence IRQ handler + * + * One per engine class, signals completed xe_hw_fences, triggered via hw engine + * interrupt. On each trigger, search list of pending fences and signal. + */ +struct xe_hw_fence_irq { + /** @lock: protects all xe_hw_fences + pending list */ + spinlock_t lock; + /** @work: IRQ worker run to signal the fences */ + struct irq_work work; + /** @pending: list of pending xe_hw_fences */ + struct list_head pending; + /** @enabled: fence signaling enabled */ + bool enabled; +}; + +#define MAX_FENCE_NAME_LEN 16 + +/** + * struct xe_hw_fence_ctx - hardware fence context + * + * The context for a hardware fence. 1 to 1 relationship with xe_engine. Points + * to a xe_hw_fence_irq, maintains serial seqno. + */ +struct xe_hw_fence_ctx { + /** @gt: graphics tile of hardware fence context */ + struct xe_gt *gt; + /** @irq: fence irq handler */ + struct xe_hw_fence_irq *irq; + /** @dma_fence_ctx: dma fence context for hardware fence */ + u64 dma_fence_ctx; + /** @next_seqno: next seqno for hardware fence */ + u32 next_seqno; + /** @name: name of hardware fence context */ + char name[MAX_FENCE_NAME_LEN]; +}; + +/** + * struct xe_hw_fence - hardware fence + * + * Used to indicate a xe_sched_job is complete via a seqno written to memory. + * Signals on error or seqno past. + */ +struct xe_hw_fence { + /** @dma: base dma fence for hardware fence context */ + struct dma_fence dma; + /** @ctx: hardware fence context */ + struct xe_hw_fence_ctx *ctx; + /** @seqno_map: I/O map for seqno */ + struct iosys_map seqno_map; + /** @irq_link: Link in struct xe_hw_fence_irq.pending */ + struct list_head irq_link; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c new file mode 100644 index 000000000000..df2e3573201d --- /dev/null +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -0,0 +1,565 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include + +#include + +#include "xe_device.h" +#include "xe_drv.h" +#include "xe_guc.h" +#include "xe_gt.h" +#include "xe_hw_engine.h" +#include "xe_mmio.h" + +#include "i915_reg.h" +#include "gt/intel_gt_regs.h" + +static void gen3_assert_iir_is_zero(struct xe_gt *gt, i915_reg_t reg) +{ + u32 val = xe_mmio_read32(gt, reg.reg); + + if (val == 0) + return; + + drm_WARN(>_to_xe(gt)->drm, 1, + "Interrupt register 0x%x is not zero: 0x%08x\n", + reg.reg, val); + xe_mmio_write32(gt, reg.reg, 0xffffffff); + xe_mmio_read32(gt, reg.reg); + xe_mmio_write32(gt, reg.reg, 0xffffffff); + xe_mmio_read32(gt, reg.reg); +} + +static void gen3_irq_init(struct xe_gt *gt, + i915_reg_t imr, u32 imr_val, + i915_reg_t ier, u32 ier_val, + i915_reg_t iir) +{ + gen3_assert_iir_is_zero(gt, iir); + + xe_mmio_write32(gt, ier.reg, ier_val); + xe_mmio_write32(gt, imr.reg, imr_val); + xe_mmio_read32(gt, imr.reg); +} +#define GEN3_IRQ_INIT(gt, type, imr_val, ier_val) \ + gen3_irq_init((gt), \ + type##IMR, imr_val, \ + type##IER, ier_val, \ + type##IIR) + +static void gen3_irq_reset(struct xe_gt *gt, i915_reg_t imr, i915_reg_t iir, + i915_reg_t ier) +{ + xe_mmio_write32(gt, imr.reg, 0xffffffff); + xe_mmio_read32(gt, imr.reg); + + xe_mmio_write32(gt, ier.reg, 0); + + /* IIR can theoretically queue up two events. Be paranoid. */ + xe_mmio_write32(gt, iir.reg, 0xffffffff); + xe_mmio_read32(gt, iir.reg); + xe_mmio_write32(gt, iir.reg, 0xffffffff); + xe_mmio_read32(gt, iir.reg); +} +#define GEN3_IRQ_RESET(gt, type) \ + gen3_irq_reset((gt), type##IMR, type##IIR, type##IER) + +static u32 gen11_intr_disable(struct xe_gt *gt) +{ + xe_mmio_write32(gt, GEN11_GFX_MSTR_IRQ.reg, 0); + + /* + * Now with master disabled, get a sample of level indications + * for this interrupt. Indications will be cleared on related acks. + * New indications can and will light up during processing, + * and will generate new interrupt after enabling master. + */ + return xe_mmio_read32(gt, GEN11_GFX_MSTR_IRQ.reg); +} + +static u32 +gen11_gu_misc_irq_ack(struct xe_gt *gt, const u32 master_ctl) +{ + u32 iir; + + if (!(master_ctl & GEN11_GU_MISC_IRQ)) + return 0; + + iir = xe_mmio_read32(gt, GEN11_GU_MISC_IIR.reg); + if (likely(iir)) + xe_mmio_write32(gt, GEN11_GU_MISC_IIR.reg, iir); + + return iir; +} + +static inline void gen11_intr_enable(struct xe_gt *gt, bool stall) +{ + xe_mmio_write32(gt, GEN11_GFX_MSTR_IRQ.reg, GEN11_MASTER_IRQ); + if (stall) + xe_mmio_read32(gt, GEN11_GFX_MSTR_IRQ.reg); +} + +static void gen11_gt_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) +{ + u32 irqs, dmask, smask; + u32 ccs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COMPUTE); + u32 bcs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COPY); + + if (xe_device_guc_submission_enabled(xe)) { + irqs = GT_RENDER_USER_INTERRUPT | + GT_RENDER_PIPECTL_NOTIFY_INTERRUPT; + } else { + irqs = GT_RENDER_USER_INTERRUPT | + GT_CS_MASTER_ERROR_INTERRUPT | + GT_CONTEXT_SWITCH_INTERRUPT | + GT_WAIT_SEMAPHORE_INTERRUPT; + } + + dmask = irqs << 16 | irqs; + smask = irqs << 16; + + /* Enable RCS, BCS, VCS and VECS class interrupts. */ + xe_mmio_write32(gt, GEN11_RENDER_COPY_INTR_ENABLE.reg, dmask); + xe_mmio_write32(gt, GEN11_VCS_VECS_INTR_ENABLE.reg, dmask); + if (ccs_mask) + xe_mmio_write32(gt, GEN12_CCS_RSVD_INTR_ENABLE.reg, smask); + + /* Unmask irqs on RCS, BCS, VCS and VECS engines. */ + xe_mmio_write32(gt, GEN11_RCS0_RSVD_INTR_MASK.reg, ~smask); + xe_mmio_write32(gt, GEN11_BCS_RSVD_INTR_MASK.reg, ~smask); + if (bcs_mask & (BIT(1)|BIT(2))) + xe_mmio_write32(gt, XEHPC_BCS1_BCS2_INTR_MASK.reg, ~dmask); + if (bcs_mask & (BIT(3)|BIT(4))) + xe_mmio_write32(gt, XEHPC_BCS3_BCS4_INTR_MASK.reg, ~dmask); + if (bcs_mask & (BIT(5)|BIT(6))) + xe_mmio_write32(gt, XEHPC_BCS5_BCS6_INTR_MASK.reg, ~dmask); + if (bcs_mask & (BIT(7)|BIT(8))) + xe_mmio_write32(gt, XEHPC_BCS7_BCS8_INTR_MASK.reg, ~dmask); + xe_mmio_write32(gt, GEN11_VCS0_VCS1_INTR_MASK.reg, ~dmask); + xe_mmio_write32(gt, GEN11_VCS2_VCS3_INTR_MASK.reg, ~dmask); + //if (HAS_ENGINE(gt, VCS4) || HAS_ENGINE(gt, VCS5)) + // intel_uncore_write(uncore, GEN12_VCS4_VCS5_INTR_MASK, ~dmask); + //if (HAS_ENGINE(gt, VCS6) || HAS_ENGINE(gt, VCS7)) + // intel_uncore_write(uncore, GEN12_VCS6_VCS7_INTR_MASK, ~dmask); + xe_mmio_write32(gt, GEN11_VECS0_VECS1_INTR_MASK.reg, ~dmask); + //if (HAS_ENGINE(gt, VECS2) || HAS_ENGINE(gt, VECS3)) + // intel_uncore_write(uncore, GEN12_VECS2_VECS3_INTR_MASK, ~dmask); + if (ccs_mask & (BIT(0)|BIT(1))) + xe_mmio_write32(gt, GEN12_CCS0_CCS1_INTR_MASK.reg, ~dmask); + if (ccs_mask & (BIT(2)|BIT(3))) + xe_mmio_write32(gt, GEN12_CCS2_CCS3_INTR_MASK.reg, ~dmask); + + /* + * RPS interrupts will get enabled/disabled on demand when RPS itself + * is enabled/disabled. + */ + /* TODO: gt->pm_ier, gt->pm_imr */ + xe_mmio_write32(gt, GEN11_GPM_WGBOXPERF_INTR_ENABLE.reg, 0); + xe_mmio_write32(gt, GEN11_GPM_WGBOXPERF_INTR_MASK.reg, ~0); + + /* Same thing for GuC interrupts */ + xe_mmio_write32(gt, GEN11_GUC_SG_INTR_ENABLE.reg, 0); + xe_mmio_write32(gt, GEN11_GUC_SG_INTR_MASK.reg, ~0); +} + +static void gen11_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) +{ + /* TODO: PCH */ + + gen11_gt_irq_postinstall(xe, gt); + + GEN3_IRQ_INIT(gt, GEN11_GU_MISC_, ~GEN11_GU_MISC_GSE, + GEN11_GU_MISC_GSE); + + gen11_intr_enable(gt, true); +} + +static u32 +gen11_gt_engine_identity(struct xe_device *xe, + struct xe_gt *gt, + const unsigned int bank, + const unsigned int bit) +{ + u32 timeout_ts; + u32 ident; + + lockdep_assert_held(&xe->irq.lock); + + xe_mmio_write32(gt, GEN11_IIR_REG_SELECTOR(bank).reg, BIT(bit)); + + /* + * NB: Specs do not specify how long to spin wait, + * so we do ~100us as an educated guess. + */ + timeout_ts = (local_clock() >> 10) + 100; + do { + ident = xe_mmio_read32(gt, GEN11_INTR_IDENTITY_REG(bank).reg); + } while (!(ident & GEN11_INTR_DATA_VALID) && + !time_after32(local_clock() >> 10, timeout_ts)); + + if (unlikely(!(ident & GEN11_INTR_DATA_VALID))) { + drm_err(&xe->drm, "INTR_IDENTITY_REG%u:%u 0x%08x not valid!\n", + bank, bit, ident); + return 0; + } + + xe_mmio_write32(gt, GEN11_INTR_IDENTITY_REG(bank).reg, + GEN11_INTR_DATA_VALID); + + return ident; +} + +#define OTHER_MEDIA_GUC_INSTANCE 16 + +static void +gen11_gt_other_irq_handler(struct xe_gt *gt, const u8 instance, const u16 iir) +{ + if (instance == OTHER_GUC_INSTANCE && !xe_gt_is_media_type(gt)) + return xe_guc_irq_handler(>->uc.guc, iir); + if (instance == OTHER_MEDIA_GUC_INSTANCE && xe_gt_is_media_type(gt)) + return xe_guc_irq_handler(>->uc.guc, iir); + + if (instance != OTHER_GUC_INSTANCE && + instance != OTHER_MEDIA_GUC_INSTANCE) { + WARN_ONCE(1, "unhandled other interrupt instance=0x%x, iir=0x%x\n", + instance, iir); + } +} + +static void gen11_gt_irq_handler(struct xe_device *xe, struct xe_gt *gt, + u32 master_ctl, long unsigned int *intr_dw, + u32 *identity) +{ + unsigned int bank, bit; + u16 instance, intr_vec; + enum xe_engine_class class; + struct xe_hw_engine *hwe; + + spin_lock(&xe->irq.lock); + + for (bank = 0; bank < 2; bank++) { + if (!(master_ctl & GEN11_GT_DW_IRQ(bank))) + continue; + + if (!xe_gt_is_media_type(gt)) { + intr_dw[bank] = + xe_mmio_read32(gt, GEN11_GT_INTR_DW(bank).reg); + for_each_set_bit(bit, intr_dw + bank, 32) + identity[bit] = gen11_gt_engine_identity(xe, gt, + bank, + bit); + xe_mmio_write32(gt, GEN11_GT_INTR_DW(bank).reg, + intr_dw[bank]); + } + + for_each_set_bit(bit, intr_dw + bank, 32) { + class = GEN11_INTR_ENGINE_CLASS(identity[bit]); + instance = GEN11_INTR_ENGINE_INSTANCE(identity[bit]); + intr_vec = GEN11_INTR_ENGINE_INTR(identity[bit]); + + if (class == XE_ENGINE_CLASS_OTHER) { + gen11_gt_other_irq_handler(gt, instance, + intr_vec); + continue; + } + + hwe = xe_gt_hw_engine(gt, class, instance, false); + if (!hwe) + continue; + + xe_hw_engine_handle_irq(hwe, intr_vec); + } + } + + spin_unlock(&xe->irq.lock); +} + +static irqreturn_t gen11_irq_handler(int irq, void *arg) +{ + struct xe_device *xe = arg; + struct xe_gt *gt = xe_device_get_gt(xe, 0); /* Only 1 GT here */ + u32 master_ctl, gu_misc_iir; + long unsigned int intr_dw[2]; + u32 identity[32]; + + master_ctl = gen11_intr_disable(gt); + if (!master_ctl) { + gen11_intr_enable(gt, false); + return IRQ_NONE; + } + + gen11_gt_irq_handler(xe, gt, master_ctl, intr_dw, identity); + + gu_misc_iir = gen11_gu_misc_irq_ack(gt, master_ctl); + + gen11_intr_enable(gt, false); + + return IRQ_HANDLED; +} + +static u32 dg1_intr_disable(struct xe_device *xe) +{ + struct xe_gt *gt = xe_device_get_gt(xe, 0); + u32 val; + + /* First disable interrupts */ + xe_mmio_write32(gt, DG1_MSTR_TILE_INTR.reg, 0); + + /* Get the indication levels and ack the master unit */ + val = xe_mmio_read32(gt, DG1_MSTR_TILE_INTR.reg); + if (unlikely(!val)) + return 0; + + xe_mmio_write32(gt, DG1_MSTR_TILE_INTR.reg, val); + + return val; +} + +static void dg1_intr_enable(struct xe_device *xe, bool stall) +{ + struct xe_gt *gt = xe_device_get_gt(xe, 0); + + xe_mmio_write32(gt, DG1_MSTR_TILE_INTR.reg, DG1_MSTR_IRQ); + if (stall) + xe_mmio_read32(gt, DG1_MSTR_TILE_INTR.reg); +} + +static void dg1_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) +{ + gen11_gt_irq_postinstall(xe, gt); + + GEN3_IRQ_INIT(gt, GEN11_GU_MISC_, ~GEN11_GU_MISC_GSE, + GEN11_GU_MISC_GSE); + + if (gt->info.id + 1 == xe->info.tile_count) + dg1_intr_enable(xe, true); +} + +static irqreturn_t dg1_irq_handler(int irq, void *arg) +{ + struct xe_device *xe = arg; + struct xe_gt *gt; + u32 master_tile_ctl, master_ctl = 0, gu_misc_iir; + long unsigned int intr_dw[2]; + u32 identity[32]; + u8 id; + + /* TODO: This really shouldn't be copied+pasted */ + + master_tile_ctl = dg1_intr_disable(xe); + if (!master_tile_ctl) { + dg1_intr_enable(xe, false); + return IRQ_NONE; + } + + for_each_gt(gt, xe, id) { + if ((master_tile_ctl & DG1_MSTR_TILE(gt->info.vram_id)) == 0) + continue; + + if (!xe_gt_is_media_type(gt)) + master_ctl = xe_mmio_read32(gt, GEN11_GFX_MSTR_IRQ.reg); + + /* + * We might be in irq handler just when PCIe DPC is initiated + * and all MMIO reads will be returned with all 1's. Ignore this + * irq as device is inaccessible. + */ + if (master_ctl == REG_GENMASK(31, 0)) { + dev_dbg(gt_to_xe(gt)->drm.dev, + "Ignore this IRQ as device might be in DPC containment.\n"); + return IRQ_HANDLED; + } + + if (!xe_gt_is_media_type(gt)) + xe_mmio_write32(gt, GEN11_GFX_MSTR_IRQ.reg, master_ctl); + gen11_gt_irq_handler(xe, gt, master_ctl, intr_dw, identity); + } + + gu_misc_iir = gen11_gu_misc_irq_ack(gt, master_ctl); + + dg1_intr_enable(xe, false); + + return IRQ_HANDLED; +} + +static void gen11_gt_irq_reset(struct xe_gt *gt) +{ + u32 ccs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COMPUTE); + u32 bcs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COPY); + + /* Disable RCS, BCS, VCS and VECS class engines. */ + xe_mmio_write32(gt, GEN11_RENDER_COPY_INTR_ENABLE.reg, 0); + xe_mmio_write32(gt, GEN11_VCS_VECS_INTR_ENABLE.reg, 0); + if (ccs_mask) + xe_mmio_write32(gt, GEN12_CCS_RSVD_INTR_ENABLE.reg, 0); + + /* Restore masks irqs on RCS, BCS, VCS and VECS engines. */ + xe_mmio_write32(gt, GEN11_RCS0_RSVD_INTR_MASK.reg, ~0); + xe_mmio_write32(gt, GEN11_BCS_RSVD_INTR_MASK.reg, ~0); + if (bcs_mask & (BIT(1)|BIT(2))) + xe_mmio_write32(gt, XEHPC_BCS1_BCS2_INTR_MASK.reg, ~0); + if (bcs_mask & (BIT(3)|BIT(4))) + xe_mmio_write32(gt, XEHPC_BCS3_BCS4_INTR_MASK.reg, ~0); + if (bcs_mask & (BIT(5)|BIT(6))) + xe_mmio_write32(gt, XEHPC_BCS5_BCS6_INTR_MASK.reg, ~0); + if (bcs_mask & (BIT(7)|BIT(8))) + xe_mmio_write32(gt, XEHPC_BCS7_BCS8_INTR_MASK.reg, ~0); + xe_mmio_write32(gt, GEN11_VCS0_VCS1_INTR_MASK.reg, ~0); + xe_mmio_write32(gt, GEN11_VCS2_VCS3_INTR_MASK.reg, ~0); +// if (HAS_ENGINE(gt, VCS4) || HAS_ENGINE(gt, VCS5)) +// xe_mmio_write32(xe, GEN12_VCS4_VCS5_INTR_MASK.reg, ~0); +// if (HAS_ENGINE(gt, VCS6) || HAS_ENGINE(gt, VCS7)) +// xe_mmio_write32(xe, GEN12_VCS6_VCS7_INTR_MASK.reg, ~0); + xe_mmio_write32(gt, GEN11_VECS0_VECS1_INTR_MASK.reg, ~0); +// if (HAS_ENGINE(gt, VECS2) || HAS_ENGINE(gt, VECS3)) +// xe_mmio_write32(xe, GEN12_VECS2_VECS3_INTR_MASK.reg, ~0); + if (ccs_mask & (BIT(0)|BIT(1))) + xe_mmio_write32(gt, GEN12_CCS0_CCS1_INTR_MASK.reg, ~0); + if (ccs_mask & (BIT(2)|BIT(3))) + xe_mmio_write32(gt, GEN12_CCS2_CCS3_INTR_MASK.reg, ~0); + + xe_mmio_write32(gt, GEN11_GPM_WGBOXPERF_INTR_ENABLE.reg, 0); + xe_mmio_write32(gt, GEN11_GPM_WGBOXPERF_INTR_MASK.reg, ~0); + xe_mmio_write32(gt, GEN11_GUC_SG_INTR_ENABLE.reg, 0); + xe_mmio_write32(gt, GEN11_GUC_SG_INTR_MASK.reg, ~0); +} + +static void gen11_irq_reset(struct xe_gt *gt) +{ + gen11_intr_disable(gt); + + gen11_gt_irq_reset(gt); + + GEN3_IRQ_RESET(gt, GEN11_GU_MISC_); + GEN3_IRQ_RESET(gt, GEN8_PCU_); +} + +static void dg1_irq_reset(struct xe_gt *gt) +{ + if (gt->info.id == 0) + dg1_intr_disable(gt_to_xe(gt)); + + gen11_gt_irq_reset(gt); + + GEN3_IRQ_RESET(gt, GEN11_GU_MISC_); + GEN3_IRQ_RESET(gt, GEN8_PCU_); +} + +void xe_irq_reset(struct xe_device *xe) +{ + struct xe_gt *gt; + u8 id; + + for_each_gt(gt, xe, id) { + if (GRAPHICS_VERx100(xe) >= 1210) { + dg1_irq_reset(gt); + } else if (GRAPHICS_VER(xe) >= 11) { + gen11_irq_reset(gt); + } else { + drm_err(&xe->drm, "No interrupt reset hook"); + } + } +} + +void xe_gt_irq_postinstall(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + if (GRAPHICS_VERx100(xe) >= 1210) + dg1_irq_postinstall(xe, gt); + else if (GRAPHICS_VER(xe) >= 11) + gen11_irq_postinstall(xe, gt); + else + drm_err(&xe->drm, "No interrupt postinstall hook"); +} + +static void xe_irq_postinstall(struct xe_device *xe) +{ + struct xe_gt *gt; + u8 id; + + for_each_gt(gt, xe, id) + xe_gt_irq_postinstall(gt); +} + +static irq_handler_t xe_irq_handler(struct xe_device *xe) +{ + if (GRAPHICS_VERx100(xe) >= 1210) { + return dg1_irq_handler; + } else if (GRAPHICS_VER(xe) >= 11) { + return gen11_irq_handler; + } else { + return NULL; + } +} + +static void irq_uninstall(struct drm_device *drm, void *arg) +{ + struct xe_device *xe = arg; + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + int irq = pdev->irq; + + if (!xe->irq.enabled) + return; + + xe->irq.enabled = false; + xe_irq_reset(xe); + free_irq(irq, xe); + if (pdev->msi_enabled) + pci_disable_msi(pdev); +} + +int xe_irq_install(struct xe_device *xe) +{ + int irq = to_pci_dev(xe->drm.dev)->irq; + static irq_handler_t irq_handler; + int err; + + irq_handler = xe_irq_handler(xe); + if (!irq_handler) { + drm_err(&xe->drm, "No supported interrupt handler"); + return -EINVAL; + } + + xe->irq.enabled = true; + + xe_irq_reset(xe); + + err = request_irq(irq, irq_handler, + IRQF_SHARED, DRIVER_NAME, xe); + if (err < 0) { + xe->irq.enabled = false; + return err; + } + + err = drmm_add_action_or_reset(&xe->drm, irq_uninstall, xe); + if (err) + return err; + + return err; +} + +void xe_irq_shutdown(struct xe_device *xe) +{ + irq_uninstall(&xe->drm, xe); +} + +void xe_irq_suspend(struct xe_device *xe) +{ + spin_lock_irq(&xe->irq.lock); + xe->irq.enabled = false; + xe_irq_reset(xe); + spin_unlock_irq(&xe->irq.lock); +} + +void xe_irq_resume(struct xe_device *xe) +{ + spin_lock_irq(&xe->irq.lock); + xe->irq.enabled = true; + xe_irq_reset(xe); + xe_irq_postinstall(xe); + spin_unlock_irq(&xe->irq.lock); +} diff --git a/drivers/gpu/drm/xe/xe_irq.h b/drivers/gpu/drm/xe/xe_irq.h new file mode 100644 index 000000000000..34ecf22b32d3 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_irq.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_IRQ_H_ +#define _XE_IRQ_H_ + +struct xe_device; +struct xe_gt; + +int xe_irq_install(struct xe_device *xe); +void xe_gt_irq_postinstall(struct xe_gt *gt); +void xe_irq_shutdown(struct xe_device *xe); +void xe_irq_suspend(struct xe_device *xe); +void xe_irq_resume(struct xe_device *xe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c new file mode 100644 index 000000000000..056c2c5a0b81 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -0,0 +1,841 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "xe_lrc.h" + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_engine_types.h" +#include "xe_gt.h" +#include "xe_map.h" +#include "xe_hw_fence.h" +#include "xe_vm.h" + +#include "i915_reg.h" +#include "gt/intel_gpu_commands.h" +#include "gt/intel_gt_regs.h" +#include "gt/intel_lrc_reg.h" +#include "gt/intel_engine_regs.h" + +#define GEN8_CTX_VALID (1 << 0) +#define GEN8_CTX_L3LLC_COHERENT (1 << 5) +#define GEN8_CTX_PRIVILEGE (1 << 8) +#define GEN8_CTX_ADDRESSING_MODE_SHIFT 3 +#define INTEL_LEGACY_64B_CONTEXT 3 + +#define GEN11_ENGINE_CLASS_SHIFT 61 +#define GEN11_ENGINE_INSTANCE_SHIFT 48 + +static struct xe_device * +lrc_to_xe(struct xe_lrc *lrc) +{ + return gt_to_xe(lrc->fence_ctx.gt); +} + +size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class) +{ + switch (class) { + case XE_ENGINE_CLASS_RENDER: + case XE_ENGINE_CLASS_COMPUTE: + /* 14 pages since graphics_ver == 11 */ + return 14 * SZ_4K; + default: + WARN(1, "Unknown engine class: %d", class); + fallthrough; + case XE_ENGINE_CLASS_COPY: + case XE_ENGINE_CLASS_VIDEO_DECODE: + case XE_ENGINE_CLASS_VIDEO_ENHANCE: + return 2 * SZ_4K; + } +} + +/* + * The per-platform tables are u8-encoded in @data. Decode @data and set the + * addresses' offset and commands in @regs. The following encoding is used + * for each byte. There are 2 steps: decoding commands and decoding addresses. + * + * Commands: + * [7]: create NOPs - number of NOPs are set in lower bits + * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set + * MI_LRI_FORCE_POSTED + * [5:0]: Number of NOPs or registers to set values to in case of + * MI_LOAD_REGISTER_IMM + * + * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count" + * number of registers. They are set by using the REG/REG16 macros: the former + * is used for offsets smaller than 0x200 while the latter is for values bigger + * than that. Those macros already set all the bits documented below correctly: + * + * [7]: When a register offset needs more than 6 bits, use additional bytes, to + * follow, for the lower bits + * [6:0]: Register offset, without considering the engine base. + * + * This function only tweaks the commands and register offsets. Values are not + * filled out. + */ +static void set_offsets(u32 *regs, + const u8 *data, + const struct xe_hw_engine *hwe) +#define NOP(x) (BIT(7) | (x)) +#define LRI(count, flags) ((flags) << 6 | (count) | \ + BUILD_BUG_ON_ZERO(count >= BIT(6))) +#define POSTED BIT(0) +#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200)) +#define REG16(x) \ + (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ + (((x) >> 2) & 0x7f) +#define END 0 +{ + const u32 base = hwe->mmio_base; + + while (*data) { + u8 count, flags; + + if (*data & BIT(7)) { /* skip */ + count = *data++ & ~BIT(7); + regs += count; + continue; + } + + count = *data & 0x3f; + flags = *data >> 6; + data++; + + *regs = MI_LOAD_REGISTER_IMM(count); + if (flags & POSTED) + *regs |= MI_LRI_FORCE_POSTED; + *regs |= MI_LRI_LRM_CS_MMIO; + regs++; + + XE_BUG_ON(!count); + do { + u32 offset = 0; + u8 v; + + do { + v = *data++; + offset <<= 7; + offset |= v & ~BIT(7); + } while (v & BIT(7)); + + regs[0] = base + (offset << 2); + regs += 2; + } while (--count); + } + + *regs = MI_BATCH_BUFFER_END | BIT(0); +} + +static const u8 gen12_xcs_offsets[] = { + NOP(1), + LRI(13, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + REG16(0x2b4), + + NOP(5), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + END +}; + +static const u8 dg2_xcs_offsets[] = { + NOP(1), + LRI(15, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + REG16(0x2b4), + REG(0x120), + REG(0x124), + + NOP(1), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + END +}; + +static const u8 gen12_rcs_offsets[] = { + NOP(1), + LRI(13, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + REG16(0x2b4), + + NOP(5), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + LRI(3, POSTED), + REG(0x1b0), + REG16(0x5a8), + REG16(0x5ac), + + NOP(6), + LRI(1, 0), + REG(0x0c8), + NOP(3 + 9 + 1), + + LRI(51, POSTED), + REG16(0x588), + REG16(0x588), + REG16(0x588), + REG16(0x588), + REG16(0x588), + REG16(0x588), + REG(0x028), + REG(0x09c), + REG(0x0c0), + REG(0x178), + REG(0x17c), + REG16(0x358), + REG(0x170), + REG(0x150), + REG(0x154), + REG(0x158), + REG16(0x41c), + REG16(0x600), + REG16(0x604), + REG16(0x608), + REG16(0x60c), + REG16(0x610), + REG16(0x614), + REG16(0x618), + REG16(0x61c), + REG16(0x620), + REG16(0x624), + REG16(0x628), + REG16(0x62c), + REG16(0x630), + REG16(0x634), + REG16(0x638), + REG16(0x63c), + REG16(0x640), + REG16(0x644), + REG16(0x648), + REG16(0x64c), + REG16(0x650), + REG16(0x654), + REG16(0x658), + REG16(0x65c), + REG16(0x660), + REG16(0x664), + REG16(0x668), + REG16(0x66c), + REG16(0x670), + REG16(0x674), + REG16(0x678), + REG16(0x67c), + REG(0x068), + REG(0x084), + NOP(1), + + END +}; + +static const u8 xehp_rcs_offsets[] = { + NOP(1), + LRI(13, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + REG16(0x2b4), + + NOP(5), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + LRI(3, POSTED), + REG(0x1b0), + REG16(0x5a8), + REG16(0x5ac), + + NOP(6), + LRI(1, 0), + REG(0x0c8), + + END +}; + +static const u8 dg2_rcs_offsets[] = { + NOP(1), + LRI(15, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + REG16(0x2b4), + REG(0x120), + REG(0x124), + + NOP(1), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + LRI(3, POSTED), + REG(0x1b0), + REG16(0x5a8), + REG16(0x5ac), + + NOP(6), + LRI(1, 0), + REG(0x0c8), + + END +}; + +static const u8 mtl_rcs_offsets[] = { + NOP(1), + LRI(15, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + REG16(0x2b4), + REG(0x120), + REG(0x124), + + NOP(1), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + NOP(2), + LRI(2, POSTED), + REG16(0x5a8), + REG16(0x5ac), + + NOP(6), + LRI(1, 0), + REG(0x0c8), + + END +}; + +#undef END +#undef REG16 +#undef REG +#undef LRI +#undef NOP + +static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class) +{ + if (class == XE_ENGINE_CLASS_RENDER) { + if (GRAPHICS_VERx100(xe) >= 1270) + return mtl_rcs_offsets; + else if (GRAPHICS_VERx100(xe) >= 1255) + return dg2_rcs_offsets; + else if (GRAPHICS_VERx100(xe) >= 1250) + return xehp_rcs_offsets; + else + return gen12_rcs_offsets; + } else { + if (GRAPHICS_VERx100(xe) >= 1255) + return dg2_xcs_offsets; + else + return gen12_xcs_offsets; + } +} + +static void set_context_control(u32 * regs, struct xe_hw_engine *hwe) +{ + regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH) | + _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) | + CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT; + + /* TODO: Timestamp */ +} + +static int lrc_ring_mi_mode(struct xe_hw_engine *hwe) +{ + struct xe_device *xe = gt_to_xe(hwe->gt); + + if (GRAPHICS_VERx100(xe) >= 1250) + return 0x70; + else + return 0x60; +} + +static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe) +{ + int x; + + x = lrc_ring_mi_mode(hwe); + regs[x + 1] &= ~STOP_RING; + regs[x + 1] |= STOP_RING << 16; +} + +static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc) +{ + return 0; +} + +u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc) +{ + return lrc->ring.size; +} + +/* Make the magic macros work */ +#define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset + +#define LRC_SEQNO_PPHWSP_OFFSET 512 +#define LRC_START_SEQNO_PPHWSP_OFFSET LRC_SEQNO_PPHWSP_OFFSET + 8 +#define LRC_PARALLEL_PPHWSP_OFFSET 2048 +#define LRC_PPHWSP_SIZE SZ_4K + +static size_t lrc_reg_size(struct xe_device *xe) +{ + if (GRAPHICS_VERx100(xe) >= 1250) + return 96 * sizeof(u32); + else + return 80 * sizeof(u32); +} + +size_t xe_lrc_skip_size(struct xe_device *xe) +{ + return LRC_PPHWSP_SIZE + lrc_reg_size(xe); +} + +static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc) +{ + /* The seqno is stored in the driver-defined portion of PPHWSP */ + return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET; +} + +static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc) +{ + /* The start seqno is stored in the driver-defined portion of PPHWSP */ + return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET; +} + +static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc) +{ + /* The parallel is stored in the driver-defined portion of PPHWSP */ + return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET; +} + +static inline u32 __xe_lrc_regs_offset(struct xe_lrc *lrc) +{ + return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE; +} + +#define DECL_MAP_ADDR_HELPERS(elem) \ +static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \ +{ \ + struct iosys_map map = lrc->bo->vmap; \ +\ + XE_BUG_ON(iosys_map_is_null(&map)); \ + iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \ + return map; \ +} \ +static inline u32 __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \ +{ \ + return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \ +} \ + +DECL_MAP_ADDR_HELPERS(ring) +DECL_MAP_ADDR_HELPERS(pphwsp) +DECL_MAP_ADDR_HELPERS(seqno) +DECL_MAP_ADDR_HELPERS(regs) +DECL_MAP_ADDR_HELPERS(start_seqno) +DECL_MAP_ADDR_HELPERS(parallel) + +#undef DECL_MAP_ADDR_HELPERS + +u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc) +{ + return __xe_lrc_pphwsp_ggtt_addr(lrc); +} + +u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr) +{ + struct xe_device *xe = lrc_to_xe(lrc); + struct iosys_map map; + + map = __xe_lrc_regs_map(lrc); + iosys_map_incr(&map, reg_nr * sizeof(u32)); + return xe_map_read32(xe, &map); +} + +void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val) +{ + struct xe_device *xe = lrc_to_xe(lrc); + struct iosys_map map; + + map = __xe_lrc_regs_map(lrc); + iosys_map_incr(&map, reg_nr * sizeof(u32)); + xe_map_write32(xe, &map, val); +} + +static void *empty_lrc_data(struct xe_hw_engine *hwe) +{ + struct xe_device *xe = gt_to_xe(hwe->gt); + void *data; + u32 *regs; + + data = kzalloc(xe_lrc_size(xe, hwe->class), GFP_KERNEL); + if (!data) + return NULL; + + /* 1st page: Per-Process of HW status Page */ + regs = data + LRC_PPHWSP_SIZE; + set_offsets(regs, reg_offsets(xe, hwe->class), hwe); + set_context_control(regs, hwe); + reset_stop_ring(regs, hwe); + + return data; +} + +static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm) +{ + u64 desc = xe_vm_pdp4_descriptor(vm, lrc->full_gt); + + xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc)); + xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc)); +} + +#define PVC_CTX_ASID (0x2e + 1) +#define PVC_CTX_ACC_CTR_THOLD (0x2a + 1) +#define ACC_GRANULARITY_S 20 +#define ACC_NOTIFY_S 16 + +int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, + struct xe_engine *e, struct xe_vm *vm, u32 ring_size) +{ + struct xe_gt *gt = hwe->gt; + struct xe_device *xe = gt_to_xe(gt); + struct iosys_map map; + void *init_data = NULL; + u32 arb_enable; + int err; + + lrc->flags = 0; + + lrc->bo = xe_bo_create_locked(xe, hwe->gt, vm, + ring_size + xe_lrc_size(xe, hwe->class), + ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(hwe->gt) | + XE_BO_CREATE_GGTT_BIT); + if (IS_ERR(lrc->bo)) + return PTR_ERR(lrc->bo); + + if (xe_gt_is_media_type(hwe->gt)) + lrc->full_gt = xe_find_full_gt(hwe->gt); + else + lrc->full_gt = hwe->gt; + + /* + * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address + * via VM bind calls. + */ + err = xe_bo_pin(lrc->bo); + if (err) + goto err_unlock_put_bo; + lrc->flags |= XE_LRC_PINNED; + + err = xe_bo_vmap(lrc->bo); + if (err) + goto err_unpin_bo; + + xe_bo_unlock_vm_held(lrc->bo); + + lrc->ring.size = ring_size; + lrc->ring.tail = 0; + + xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt, + hwe->fence_irq, hwe->name); + + if (!gt->default_lrc[hwe->class]) { + init_data = empty_lrc_data(hwe); + if (!init_data) { + xe_lrc_finish(lrc); + return -ENOMEM; + } + } + + /* + * Init Per-Process of HW status Page, LRC / context state to known + * values + */ + map = __xe_lrc_pphwsp_map(lrc); + if (!init_data) { + xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */ + xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE, + gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE, + xe_lrc_size(xe, hwe->class) - LRC_PPHWSP_SIZE); + } else { + xe_map_memcpy_to(xe, &map, 0, init_data, + xe_lrc_size(xe, hwe->class)); + kfree(init_data); + } + + if (vm) + xe_lrc_set_ppgtt(lrc, vm); + + xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc)); + xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0); + xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); + xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL, + RING_CTL_SIZE(lrc->ring.size) | RING_VALID); + if (xe->info.supports_usm && vm) { + xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, + (e->usm.acc_granularity << + ACC_GRANULARITY_S) | vm->usm.asid); + xe_lrc_write_ctx_reg(lrc, PVC_CTX_ACC_CTR_THOLD, + (e->usm.acc_notify << ACC_NOTIFY_S) | + e->usm.acc_trigger); + } + + lrc->desc = GEN8_CTX_VALID; + lrc->desc |= INTEL_LEGACY_64B_CONTEXT << GEN8_CTX_ADDRESSING_MODE_SHIFT; + /* TODO: Priority */ + + /* While this appears to have something about privileged batches or + * some such, it really just means PPGTT mode. + */ + if (vm) + lrc->desc |= GEN8_CTX_PRIVILEGE; + + if (GRAPHICS_VERx100(xe) < 1250) { + lrc->desc |= (u64)hwe->instance << GEN11_ENGINE_INSTANCE_SHIFT; + lrc->desc |= (u64)hwe->class << GEN11_ENGINE_CLASS_SHIFT; + } + + arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE; + xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable)); + + return 0; + +err_unpin_bo: + if (lrc->flags & XE_LRC_PINNED) + xe_bo_unpin(lrc->bo); +err_unlock_put_bo: + xe_bo_unlock_vm_held(lrc->bo); + xe_bo_put(lrc->bo); + return err; +} + +void xe_lrc_finish(struct xe_lrc *lrc) +{ + struct ww_acquire_ctx ww; + + xe_hw_fence_ctx_finish(&lrc->fence_ctx); + if (lrc->flags & XE_LRC_PINNED) { + if (lrc->bo->vm) + xe_vm_lock(lrc->bo->vm, &ww, 0, false); + else + xe_bo_lock_no_vm(lrc->bo, NULL); + xe_bo_unpin(lrc->bo); + if (lrc->bo->vm) + xe_vm_unlock(lrc->bo->vm, &ww); + else + xe_bo_unlock_no_vm(lrc->bo); + } + xe_bo_put(lrc->bo); +} + +void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head) +{ + xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head); +} + +u32 xe_lrc_ring_head(struct xe_lrc *lrc) +{ + return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR; +} + +u32 xe_lrc_ring_space(struct xe_lrc *lrc) +{ + const u32 head = xe_lrc_ring_head(lrc); + const u32 tail = lrc->ring.tail; + const u32 size = lrc->ring.size; + + return ((head - tail - 1) & (size - 1)) + 1; +} + +static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring, + const void *data, size_t size) +{ + struct xe_device *xe = lrc_to_xe(lrc); + + iosys_map_incr(&ring, lrc->ring.tail); + xe_map_memcpy_to(xe, &ring, 0, data, size); + lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1); +} + +void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size) +{ + struct iosys_map ring; + u32 rhs; + size_t aligned_size; + + XE_BUG_ON(!IS_ALIGNED(size, 4)); + aligned_size = ALIGN(size, 8); + + ring = __xe_lrc_ring_map(lrc); + + XE_BUG_ON(lrc->ring.tail >= lrc->ring.size); + rhs = lrc->ring.size - lrc->ring.tail; + if (size > rhs) { + __xe_lrc_write_ring(lrc, ring, data, rhs); + __xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs); + } else { + __xe_lrc_write_ring(lrc, ring, data, size); + } + + if (aligned_size > size) { + u32 noop = MI_NOOP; + + __xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop)); + } +} + +u64 xe_lrc_descriptor(struct xe_lrc *lrc) +{ + return lrc->desc | xe_lrc_ggtt_addr(lrc); +} + +u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc) +{ + return __xe_lrc_seqno_ggtt_addr(lrc); +} + +struct dma_fence *xe_lrc_create_seqno_fence(struct xe_lrc *lrc) +{ + return &xe_hw_fence_create(&lrc->fence_ctx, + __xe_lrc_seqno_map(lrc))->dma; +} + +s32 xe_lrc_seqno(struct xe_lrc *lrc) +{ + struct iosys_map map = __xe_lrc_seqno_map(lrc); + + return xe_map_read32(lrc_to_xe(lrc), &map); +} + +s32 xe_lrc_start_seqno(struct xe_lrc *lrc) +{ + struct iosys_map map = __xe_lrc_start_seqno_map(lrc); + + return xe_map_read32(lrc_to_xe(lrc), &map); +} + +u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc) +{ + return __xe_lrc_start_seqno_ggtt_addr(lrc); +} + +u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc) +{ + return __xe_lrc_parallel_ggtt_addr(lrc); +} + +struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc) +{ + return __xe_lrc_parallel_map(lrc); +} diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h new file mode 100644 index 000000000000..e37f89e75ef8 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ +#ifndef _XE_LRC_H_ +#define _XE_LRC_H_ + +#include "xe_lrc_types.h" + +struct xe_device; +struct xe_engine; +enum xe_engine_class; +struct xe_hw_engine; +struct xe_vm; + +#define LRC_PPHWSP_SCRATCH_ADDR (0x34 * 4) + +int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, + struct xe_engine *e, struct xe_vm *vm, u32 ring_size); +void xe_lrc_finish(struct xe_lrc *lrc); + +size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class); +u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc); + +void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head); +u32 xe_lrc_ring_head(struct xe_lrc *lrc); +u32 xe_lrc_ring_space(struct xe_lrc *lrc); +void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size); + +u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc); +u32 *xe_lrc_regs(struct xe_lrc *lrc); + +u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr); +void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val); + +u64 xe_lrc_descriptor(struct xe_lrc *lrc); + +u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc); +struct dma_fence *xe_lrc_create_seqno_fence(struct xe_lrc *lrc); +s32 xe_lrc_seqno(struct xe_lrc *lrc); + +u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc); +s32 xe_lrc_start_seqno(struct xe_lrc *lrc); + +u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc); +struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc); + +size_t xe_lrc_skip_size(struct xe_device *xe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h new file mode 100644 index 000000000000..2827efa2091d --- /dev/null +++ b/drivers/gpu/drm/xe/xe_lrc_types.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_LRC_TYPES_H_ +#define _XE_LRC_TYPES_H_ + +#include "xe_hw_fence_types.h" + +struct xe_bo; + +/** + * struct xe_lrc - Logical ring context (LRC) and submission ring object + */ +struct xe_lrc { + /** + * @bo: buffer object (memory) for logical ring context, per process HW + * status page, and submission ring. + */ + struct xe_bo *bo; + + /** @full_gt: full GT which this LRC belongs to */ + struct xe_gt *full_gt; + + /** @flags: LRC flags */ + u32 flags; +#define XE_LRC_PINNED BIT(1) + + /** @ring: submission ring state */ + struct { + /** @size: size of submission ring */ + u32 size; + /** @tail: tail of submission ring */ + u32 tail; + /** @old_tail: shadow of tail */ + u32 old_tail; + } ring; + + /** @desc: LRC descriptor */ + u64 desc; + + /** @fence_ctx: context for hw fence */ + struct xe_hw_fence_ctx fence_ctx; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_macros.h b/drivers/gpu/drm/xe/xe_macros.h new file mode 100644 index 000000000000..0d24c124d202 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_macros.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _XE_MACROS_H_ +#define _XE_MACROS_H_ + +#include + +#define XE_EXTRA_DEBUG 1 +#define XE_WARN_ON WARN_ON +#define XE_BUG_ON BUG_ON + +#define XE_IOCTL_ERR(xe, cond) \ + ((cond) && (drm_info(&(xe)->drm, \ + "Ioctl argument check failed at %s:%d: %s", \ + __FILE__, __LINE__, #cond), 1)) + +#endif diff --git a/drivers/gpu/drm/xe/xe_map.h b/drivers/gpu/drm/xe/xe_map.h new file mode 100644 index 000000000000..0bac1f73a80d --- /dev/null +++ b/drivers/gpu/drm/xe/xe_map.h @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef __XE_MAP_H__ +#define __XE_MAP_H__ + +#include + +#include + +/** + * DOC: Map layer + * + * All access to any memory shared with a device (both sysmem and vram) in the + * XE driver should go through this layer (xe_map). This layer is built on top + * of :ref:`driver-api/device-io:Generalizing Access to System and I/O Memory` + * and with extra hooks into the XE driver that allows adding asserts to memory + * accesses (e.g. for blocking runtime_pm D3Cold on Discrete Graphics). + */ + +static inline void xe_map_memcpy_to(struct xe_device *xe, struct iosys_map *dst, + size_t dst_offset, const void *src, + size_t len) +{ + xe_device_assert_mem_access(xe); + iosys_map_memcpy_to(dst, dst_offset, src, len); +} + +static inline void xe_map_memcpy_from(struct xe_device *xe, void *dst, + const struct iosys_map *src, + size_t src_offset, size_t len) +{ + xe_device_assert_mem_access(xe); + iosys_map_memcpy_from(dst, src, src_offset, len); +} + +static inline void xe_map_memset(struct xe_device *xe, + struct iosys_map *dst, size_t offset, + int value, size_t len) +{ + xe_device_assert_mem_access(xe); + iosys_map_memset(dst, offset, value, len); +} + +/* FIXME: We likely should kill these two functions sooner or later */ +static inline u32 xe_map_read32(struct xe_device *xe, struct iosys_map *map) +{ + xe_device_assert_mem_access(xe); + + if (map->is_iomem) + return readl(map->vaddr_iomem); + else + return READ_ONCE(*(u32 *)map->vaddr); +} + +static inline void xe_map_write32(struct xe_device *xe, struct iosys_map *map, + u32 val) +{ + xe_device_assert_mem_access(xe); + + if (map->is_iomem) + writel(val, map->vaddr_iomem); + else + *(u32 *)map->vaddr = val; +} + +#define xe_map_rd(xe__, map__, offset__, type__) ({ \ + struct xe_device *__xe = xe__; \ + xe_device_assert_mem_access(__xe); \ + iosys_map_rd(map__, offset__, type__); \ +}) + +#define xe_map_wr(xe__, map__, offset__, type__, val__) ({ \ + struct xe_device *__xe = xe__; \ + xe_device_assert_mem_access(__xe); \ + iosys_map_wr(map__, offset__, type__, val__); \ +}) + +#define xe_map_rd_field(xe__, map__, struct_offset__, struct_type__, field__) ({ \ + struct xe_device *__xe = xe__; \ + xe_device_assert_mem_access(__xe); \ + iosys_map_rd_field(map__, struct_offset__, struct_type__, field__); \ +}) + +#define xe_map_wr_field(xe__, map__, struct_offset__, struct_type__, field__, val__) ({ \ + struct xe_device *__xe = xe__; \ + xe_device_assert_mem_access(__xe); \ + iosys_map_wr_field(map__, struct_offset__, struct_type__, field__, val__); \ +}) + +#endif diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c new file mode 100644 index 000000000000..7fc40e8009c3 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -0,0 +1,1168 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ +#include "xe_migrate.h" + +#include "xe_bb.h" +#include "xe_bo.h" +#include "xe_engine.h" +#include "xe_ggtt.h" +#include "xe_gt.h" +#include "xe_hw_engine.h" +#include "xe_lrc.h" +#include "xe_map.h" +#include "xe_mocs.h" +#include "xe_pt.h" +#include "xe_res_cursor.h" +#include "xe_sched_job.h" +#include "xe_sync.h" +#include "xe_trace.h" +#include "xe_vm.h" + +#include +#include +#include +#include + +#include "gt/intel_gpu_commands.h" + +struct xe_migrate { + struct xe_engine *eng; + struct xe_gt *gt; + struct mutex job_mutex; + struct xe_bo *pt_bo; + struct xe_bo *cleared_bo; + u64 batch_base_ofs; + u64 usm_batch_base_ofs; + u64 cleared_vram_ofs; + struct dma_fence *fence; + struct drm_suballoc_manager vm_update_sa; +}; + +#define MAX_PREEMPTDISABLE_TRANSFER SZ_8M /* Around 1ms. */ +#define NUM_KERNEL_PDE 17 +#define NUM_PT_SLOTS 32 +#define NUM_PT_PER_BLIT (MAX_PREEMPTDISABLE_TRANSFER / SZ_2M) + +struct xe_engine *xe_gt_migrate_engine(struct xe_gt *gt) +{ + return gt->migrate->eng; +} + +static void xe_migrate_fini(struct drm_device *dev, void *arg) +{ + struct xe_migrate *m = arg; + struct ww_acquire_ctx ww; + + xe_vm_lock(m->eng->vm, &ww, 0, false); + xe_bo_unpin(m->pt_bo); + if (m->cleared_bo) + xe_bo_unpin(m->cleared_bo); + xe_vm_unlock(m->eng->vm, &ww); + + dma_fence_put(m->fence); + if (m->cleared_bo) + xe_bo_put(m->cleared_bo); + xe_bo_put(m->pt_bo); + drm_suballoc_manager_fini(&m->vm_update_sa); + mutex_destroy(&m->job_mutex); + xe_vm_close_and_put(m->eng->vm); + xe_engine_put(m->eng); +} + +static u64 xe_migrate_vm_addr(u64 slot, u32 level) +{ + XE_BUG_ON(slot >= NUM_PT_SLOTS); + + /* First slot is reserved for mapping of PT bo and bb, start from 1 */ + return (slot + 1ULL) << xe_pt_shift(level + 1); +} + +static u64 xe_migrate_vram_ofs(u64 addr) +{ + return addr + (256ULL << xe_pt_shift(2)); +} + +/* + * For flat CCS clearing we need a cleared chunk of memory to copy from, + * since the CCS clearing mode of XY_FAST_COLOR_BLT appears to be buggy + * (it clears on only 14 bytes in each chunk of 16). + * If clearing the main surface one can use the part of the main surface + * already cleared, but for clearing as part of copying non-compressed + * data out of system memory, we don't readily have a cleared part of + * VRAM to copy from, so create one to use for that case. + */ +static int xe_migrate_create_cleared_bo(struct xe_migrate *m, struct xe_vm *vm) +{ + struct xe_gt *gt = m->gt; + struct xe_device *xe = vm->xe; + size_t cleared_size; + u64 vram_addr; + bool is_vram; + + if (!xe_device_has_flat_ccs(xe)) + return 0; + + cleared_size = xe_device_ccs_bytes(xe, MAX_PREEMPTDISABLE_TRANSFER); + cleared_size = PAGE_ALIGN(cleared_size); + m->cleared_bo = xe_bo_create_pin_map(xe, gt, vm, cleared_size, + ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(gt) | + XE_BO_CREATE_PINNED_BIT); + if (IS_ERR(m->cleared_bo)) + return PTR_ERR(m->cleared_bo); + + xe_map_memset(xe, &m->cleared_bo->vmap, 0, 0x00, cleared_size); + vram_addr = xe_bo_addr(m->cleared_bo, 0, GEN8_PAGE_SIZE, &is_vram); + XE_BUG_ON(!is_vram); + m->cleared_vram_ofs = xe_migrate_vram_ofs(vram_addr); + + return 0; +} + +static int xe_migrate_prepare_vm(struct xe_gt *gt, struct xe_migrate *m, + struct xe_vm *vm) +{ + u8 id = gt->info.id; + u32 num_entries = NUM_PT_SLOTS, num_level = vm->pt_root[id]->level; + u32 map_ofs, level, i; + struct xe_device *xe = gt_to_xe(m->gt); + struct xe_bo *bo, *batch = gt->kernel_bb_pool.bo; + u64 entry; + int ret; + + /* Can't bump NUM_PT_SLOTS too high */ + BUILD_BUG_ON(NUM_PT_SLOTS > SZ_2M/GEN8_PAGE_SIZE); + /* Must be a multiple of 64K to support all platforms */ + BUILD_BUG_ON(NUM_PT_SLOTS * GEN8_PAGE_SIZE % SZ_64K); + /* And one slot reserved for the 4KiB page table updates */ + BUILD_BUG_ON(!(NUM_KERNEL_PDE & 1)); + + /* Need to be sure everything fits in the first PT, or create more */ + XE_BUG_ON(m->batch_base_ofs + batch->size >= SZ_2M); + + bo = xe_bo_create_pin_map(vm->xe, m->gt, vm, + num_entries * GEN8_PAGE_SIZE, + ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(m->gt) | + XE_BO_CREATE_PINNED_BIT); + if (IS_ERR(bo)) + return PTR_ERR(bo); + + ret = xe_migrate_create_cleared_bo(m, vm); + if (ret) { + xe_bo_put(bo); + return ret; + } + + entry = gen8_pde_encode(bo, bo->size - GEN8_PAGE_SIZE, XE_CACHE_WB); + xe_pt_write(xe, &vm->pt_root[id]->bo->vmap, 0, entry); + + map_ofs = (num_entries - num_level) * GEN8_PAGE_SIZE; + + /* Map the entire BO in our level 0 pt */ + for (i = 0, level = 0; i < num_entries; level++) { + entry = gen8_pte_encode(NULL, bo, i * GEN8_PAGE_SIZE, + XE_CACHE_WB, 0, 0); + + xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64, entry); + + if (vm->flags & XE_VM_FLAGS_64K) + i += 16; + else + i += 1; + } + + if (!IS_DGFX(xe)) { + XE_BUG_ON(xe->info.supports_usm); + + /* Write out batch too */ + m->batch_base_ofs = NUM_PT_SLOTS * GEN8_PAGE_SIZE; + for (i = 0; i < batch->size; + i += vm->flags & XE_VM_FLAGS_64K ? GEN8_64K_PAGE_SIZE : + GEN8_PAGE_SIZE) { + entry = gen8_pte_encode(NULL, batch, i, + XE_CACHE_WB, 0, 0); + + xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64, + entry); + level++; + } + } else { + bool is_lmem; + u64 batch_addr = xe_bo_addr(batch, 0, GEN8_PAGE_SIZE, &is_lmem); + + m->batch_base_ofs = xe_migrate_vram_ofs(batch_addr); + + if (xe->info.supports_usm) { + batch = gt->usm.bb_pool.bo; + batch_addr = xe_bo_addr(batch, 0, GEN8_PAGE_SIZE, + &is_lmem); + m->usm_batch_base_ofs = xe_migrate_vram_ofs(batch_addr); + } + } + + for (level = 1; level < num_level; level++) { + u32 flags = 0; + + if (vm->flags & XE_VM_FLAGS_64K && level == 1) + flags = GEN12_PDE_64K; + + entry = gen8_pde_encode(bo, map_ofs + (level - 1) * + GEN8_PAGE_SIZE, XE_CACHE_WB); + xe_map_wr(xe, &bo->vmap, map_ofs + GEN8_PAGE_SIZE * level, u64, + entry | flags); + } + + /* Write PDE's that point to our BO. */ + for (i = 0; i < num_entries - num_level; i++) { + entry = gen8_pde_encode(bo, i * GEN8_PAGE_SIZE, + XE_CACHE_WB); + + xe_map_wr(xe, &bo->vmap, map_ofs + GEN8_PAGE_SIZE + + (i + 1) * 8, u64, entry); + } + + /* Identity map the entire vram at 256GiB offset */ + if (IS_DGFX(xe)) { + u64 pos, ofs, flags; + + level = 2; + ofs = map_ofs + GEN8_PAGE_SIZE * level + 256 * 8; + flags = GEN8_PAGE_RW | GEN8_PAGE_PRESENT | PPAT_CACHED | + GEN12_PPGTT_PTE_LM | GEN8_PDPE_PS_1G; + + /* + * Use 1GB pages, it shouldn't matter the physical amount of + * vram is less, when we don't access it. + */ + for (pos = 0; pos < xe->mem.vram.size; pos += SZ_1G, ofs += 8) + xe_map_wr(xe, &bo->vmap, ofs, u64, pos | flags); + } + + /* + * Example layout created above, with root level = 3: + * [PT0...PT7]: kernel PT's for copy/clear; 64 or 4KiB PTE's + * [PT8]: Kernel PT for VM_BIND, 4 KiB PTE's + * [PT9...PT28]: Userspace PT's for VM_BIND, 4 KiB PTE's + * [PT29 = PDE 0] [PT30 = PDE 1] [PT31 = PDE 2] + * + * This makes the lowest part of the VM point to the pagetables. + * Hence the lowest 2M in the vm should point to itself, with a few writes + * and flushes, other parts of the VM can be used either for copying and + * clearing. + * + * For performance, the kernel reserves PDE's, so about 20 are left + * for async VM updates. + * + * To make it easier to work, each scratch PT is put in slot (1 + PT #) + * everywhere, this allows lockless updates to scratch pages by using + * the different addresses in VM. + */ +#define NUM_VMUSA_UNIT_PER_PAGE 32 +#define VM_SA_UPDATE_UNIT_SIZE (GEN8_PAGE_SIZE / NUM_VMUSA_UNIT_PER_PAGE) +#define NUM_VMUSA_WRITES_PER_UNIT (VM_SA_UPDATE_UNIT_SIZE / sizeof(u64)) + drm_suballoc_manager_init(&m->vm_update_sa, + (map_ofs / GEN8_PAGE_SIZE - NUM_KERNEL_PDE) * + NUM_VMUSA_UNIT_PER_PAGE, 0); + + m->pt_bo = bo; + return 0; +} + +struct xe_migrate *xe_migrate_init(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + struct xe_migrate *m; + struct xe_vm *vm; + struct ww_acquire_ctx ww; + int err; + + XE_BUG_ON(xe_gt_is_media_type(gt)); + + m = drmm_kzalloc(&xe->drm, sizeof(*m), GFP_KERNEL); + if (!m) + return ERR_PTR(-ENOMEM); + + m->gt = gt; + + /* Special layout, prepared below.. */ + vm = xe_vm_create(xe, XE_VM_FLAG_MIGRATION | + XE_VM_FLAG_SET_GT_ID(gt)); + if (IS_ERR(vm)) + return ERR_CAST(vm); + + xe_vm_lock(vm, &ww, 0, false); + err = xe_migrate_prepare_vm(gt, m, vm); + xe_vm_unlock(vm, &ww); + if (err) { + xe_vm_close_and_put(vm); + return ERR_PTR(err); + } + + if (xe->info.supports_usm) { + struct xe_hw_engine *hwe = xe_gt_hw_engine(gt, + XE_ENGINE_CLASS_COPY, + gt->usm.reserved_bcs_instance, + false); + if (!hwe) + return ERR_PTR(-EINVAL); + + m->eng = xe_engine_create(xe, vm, + BIT(hwe->logical_instance), 1, + hwe, ENGINE_FLAG_KERNEL); + } else { + m->eng = xe_engine_create_class(xe, gt, vm, + XE_ENGINE_CLASS_COPY, + ENGINE_FLAG_KERNEL); + } + if (IS_ERR(m->eng)) { + xe_vm_close_and_put(vm); + return ERR_CAST(m->eng); + } + + mutex_init(&m->job_mutex); + + err = drmm_add_action_or_reset(&xe->drm, xe_migrate_fini, m); + if (err) + return ERR_PTR(err); + + return m; +} + +static void emit_arb_clear(struct xe_bb *bb) +{ + /* 1 dword */ + bb->cs[bb->len++] = MI_ARB_ON_OFF | MI_ARB_DISABLE; +} + +static u64 xe_migrate_res_sizes(struct xe_res_cursor *cur) +{ + /* + * For VRAM we use identity mapped pages so we are limited to current + * cursor size. For system we program the pages ourselves so we have no + * such limitation. + */ + return min_t(u64, MAX_PREEMPTDISABLE_TRANSFER, + mem_type_is_vram(cur->mem_type) ? cur->size : + cur->remaining); +} + +static u32 pte_update_size(struct xe_migrate *m, + bool is_vram, + struct xe_res_cursor *cur, + u64 *L0, u64 *L0_ofs, u32 *L0_pt, + u32 cmd_size, u32 pt_ofs, u32 avail_pts) +{ + u32 cmds = 0; + + *L0_pt = pt_ofs; + if (!is_vram) { + /* Clip L0 to available size */ + u64 size = min(*L0, (u64)avail_pts * SZ_2M); + u64 num_4k_pages = DIV_ROUND_UP(size, GEN8_PAGE_SIZE); + + *L0 = size; + *L0_ofs = xe_migrate_vm_addr(pt_ofs, 0); + + /* MI_STORE_DATA_IMM */ + cmds += 3 * DIV_ROUND_UP(num_4k_pages, 0x1ff); + + /* PDE qwords */ + cmds += num_4k_pages * 2; + + /* Each chunk has a single blit command */ + cmds += cmd_size; + } else { + /* Offset into identity map. */ + *L0_ofs = xe_migrate_vram_ofs(cur->start); + cmds += cmd_size; + } + + return cmds; +} + +static void emit_pte(struct xe_migrate *m, + struct xe_bb *bb, u32 at_pt, + bool is_vram, + struct xe_res_cursor *cur, + u32 size, struct xe_bo *bo) +{ + u32 ptes; + u64 ofs = at_pt * GEN8_PAGE_SIZE; + u64 cur_ofs; + + /* + * FIXME: Emitting VRAM PTEs to L0 PTs is forbidden. Currently + * we're only emitting VRAM PTEs during sanity tests, so when + * that's moved to a Kunit test, we should condition VRAM PTEs + * on running tests. + */ + + ptes = DIV_ROUND_UP(size, GEN8_PAGE_SIZE); + + while (ptes) { + u32 chunk = min(0x1ffU, ptes); + + bb->cs[bb->len++] = MI_STORE_DATA_IMM | BIT(21) | + (chunk * 2 + 1); + bb->cs[bb->len++] = ofs; + bb->cs[bb->len++] = 0; + + cur_ofs = ofs; + ofs += chunk * 8; + ptes -= chunk; + + while (chunk--) { + u64 addr; + + XE_BUG_ON(cur->start & (PAGE_SIZE - 1)); + + if (is_vram) { + addr = cur->start; + + /* Is this a 64K PTE entry? */ + if ((m->eng->vm->flags & XE_VM_FLAGS_64K) && + !(cur_ofs & (16 * 8 - 1))) { + XE_WARN_ON(!IS_ALIGNED(addr, SZ_64K)); + addr |= GEN12_PTE_PS64; + } + + addr |= GEN12_PPGTT_PTE_LM; + } else { + addr = xe_res_dma(cur); + } + addr |= PPAT_CACHED | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; + bb->cs[bb->len++] = lower_32_bits(addr); + bb->cs[bb->len++] = upper_32_bits(addr); + + xe_res_next(cur, PAGE_SIZE); + cur_ofs += 8; + } + } +} + +#define EMIT_COPY_CCS_DW 5 +static void emit_copy_ccs(struct xe_gt *gt, struct xe_bb *bb, + u64 dst_ofs, bool dst_is_indirect, + u64 src_ofs, bool src_is_indirect, + u32 size) +{ + u32 *cs = bb->cs + bb->len; + u32 num_ccs_blks; + u32 mocs = xe_mocs_index_to_value(gt->mocs.uc_index); + + num_ccs_blks = DIV_ROUND_UP(xe_device_ccs_bytes(gt_to_xe(gt), size), + NUM_CCS_BYTES_PER_BLOCK); + XE_BUG_ON(num_ccs_blks > NUM_CCS_BLKS_PER_XFER); + *cs++ = XY_CTRL_SURF_COPY_BLT | + (src_is_indirect ? 0x0 : 0x1) << SRC_ACCESS_TYPE_SHIFT | + (dst_is_indirect ? 0x0 : 0x1) << DST_ACCESS_TYPE_SHIFT | + ((num_ccs_blks - 1) & CCS_SIZE_MASK) << CCS_SIZE_SHIFT; + *cs++ = lower_32_bits(src_ofs); + *cs++ = upper_32_bits(src_ofs) | + FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, mocs); + *cs++ = lower_32_bits(dst_ofs); + *cs++ = upper_32_bits(dst_ofs) | + FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, mocs); + + bb->len = cs - bb->cs; +} + +#define EMIT_COPY_DW 10 +static void emit_copy(struct xe_gt *gt, struct xe_bb *bb, + u64 src_ofs, u64 dst_ofs, unsigned int size, + unsigned pitch) +{ + XE_BUG_ON(size / pitch > S16_MAX); + XE_BUG_ON(pitch / 4 > S16_MAX); + XE_BUG_ON(pitch > U16_MAX); + + bb->cs[bb->len++] = GEN9_XY_FAST_COPY_BLT_CMD | (10 - 2); + bb->cs[bb->len++] = BLT_DEPTH_32 | pitch; + bb->cs[bb->len++] = 0; + bb->cs[bb->len++] = (size / pitch) << 16 | pitch / 4; + bb->cs[bb->len++] = lower_32_bits(dst_ofs); + bb->cs[bb->len++] = upper_32_bits(dst_ofs); + bb->cs[bb->len++] = 0; + bb->cs[bb->len++] = pitch; + bb->cs[bb->len++] = lower_32_bits(src_ofs); + bb->cs[bb->len++] = upper_32_bits(src_ofs); +} + +static int job_add_deps(struct xe_sched_job *job, struct dma_resv *resv, + enum dma_resv_usage usage) +{ + return drm_sched_job_add_resv_dependencies(&job->drm, resv, usage); +} + +static u64 xe_migrate_batch_base(struct xe_migrate *m, bool usm) +{ + return usm ? m->usm_batch_base_ofs : m->batch_base_ofs; +} + +static u32 xe_migrate_ccs_copy(struct xe_migrate *m, + struct xe_bb *bb, + u64 src_ofs, bool src_is_vram, + u64 dst_ofs, bool dst_is_vram, u32 dst_size, + u64 ccs_ofs, bool copy_ccs) +{ + struct xe_gt *gt = m->gt; + u32 flush_flags = 0; + + if (xe_device_has_flat_ccs(gt_to_xe(gt)) && !copy_ccs && dst_is_vram) { + /* + * If the bo doesn't have any CCS metadata attached, we still + * need to clear it for security reasons. + */ + emit_copy_ccs(gt, bb, dst_ofs, true, m->cleared_vram_ofs, false, + dst_size); + flush_flags = MI_FLUSH_DW_CCS; + } else if (copy_ccs) { + if (!src_is_vram) + src_ofs = ccs_ofs; + else if (!dst_is_vram) + dst_ofs = ccs_ofs; + + /* + * At the moment, we don't support copying CCS metadata from + * system to system. + */ + XE_BUG_ON(!src_is_vram && !dst_is_vram); + + emit_copy_ccs(gt, bb, dst_ofs, dst_is_vram, src_ofs, + src_is_vram, dst_size); + if (dst_is_vram) + flush_flags = MI_FLUSH_DW_CCS; + } + + return flush_flags; +} + +struct dma_fence *xe_migrate_copy(struct xe_migrate *m, + struct xe_bo *bo, + struct ttm_resource *src, + struct ttm_resource *dst) +{ + struct xe_gt *gt = m->gt; + struct xe_device *xe = gt_to_xe(gt); + struct dma_fence *fence = NULL; + u64 size = bo->size; + struct xe_res_cursor src_it, dst_it, ccs_it; + u64 src_L0_ofs, dst_L0_ofs; + u32 src_L0_pt, dst_L0_pt; + u64 src_L0, dst_L0; + int pass = 0; + int err; + bool src_is_vram = mem_type_is_vram(src->mem_type); + bool dst_is_vram = mem_type_is_vram(dst->mem_type); + bool copy_ccs = xe_device_has_flat_ccs(xe) && xe_bo_needs_ccs_pages(bo); + bool copy_system_ccs = copy_ccs && (!src_is_vram || !dst_is_vram); + + if (!src_is_vram) + xe_res_first_sg(xe_bo_get_sg(bo), 0, bo->size, &src_it); + else + xe_res_first(src, 0, bo->size, &src_it); + if (!dst_is_vram) + xe_res_first_sg(xe_bo_get_sg(bo), 0, bo->size, &dst_it); + else + xe_res_first(dst, 0, bo->size, &dst_it); + + if (copy_system_ccs) + xe_res_first_sg(xe_bo_get_sg(bo), xe_bo_ccs_pages_start(bo), + PAGE_ALIGN(xe_device_ccs_bytes(xe, size)), + &ccs_it); + + while (size) { + u32 batch_size = 2; /* arb_clear() + MI_BATCH_BUFFER_END */ + struct xe_sched_job *job; + struct xe_bb *bb; + u32 flush_flags; + u32 update_idx; + u64 ccs_ofs, ccs_size; + u32 ccs_pt; + bool usm = xe->info.supports_usm; + + src_L0 = xe_migrate_res_sizes(&src_it); + dst_L0 = xe_migrate_res_sizes(&dst_it); + + drm_dbg(&xe->drm, "Pass %u, sizes: %llu & %llu\n", + pass++, src_L0, dst_L0); + + src_L0 = min(src_L0, dst_L0); + + batch_size += pte_update_size(m, src_is_vram, &src_it, &src_L0, + &src_L0_ofs, &src_L0_pt, 0, 0, + NUM_PT_PER_BLIT); + + batch_size += pte_update_size(m, dst_is_vram, &dst_it, &src_L0, + &dst_L0_ofs, &dst_L0_pt, 0, + NUM_PT_PER_BLIT, NUM_PT_PER_BLIT); + + if (copy_system_ccs) { + ccs_size = xe_device_ccs_bytes(xe, src_L0); + batch_size += pte_update_size(m, false, &ccs_it, &ccs_size, + &ccs_ofs, &ccs_pt, 0, + 2 * NUM_PT_PER_BLIT, + NUM_PT_PER_BLIT); + } + + /* Add copy commands size here */ + batch_size += EMIT_COPY_DW + + (xe_device_has_flat_ccs(xe) ? EMIT_COPY_CCS_DW : 0); + + bb = xe_bb_new(gt, batch_size, usm); + if (IS_ERR(bb)) { + err = PTR_ERR(bb); + goto err_sync; + } + + /* Preemption is enabled again by the ring ops. */ + if (!src_is_vram || !dst_is_vram) + emit_arb_clear(bb); + + if (!src_is_vram) + emit_pte(m, bb, src_L0_pt, src_is_vram, &src_it, src_L0, + bo); + else + xe_res_next(&src_it, src_L0); + + if (!dst_is_vram) + emit_pte(m, bb, dst_L0_pt, dst_is_vram, &dst_it, src_L0, + bo); + else + xe_res_next(&dst_it, src_L0); + + if (copy_system_ccs) + emit_pte(m, bb, ccs_pt, false, &ccs_it, ccs_size, bo); + + bb->cs[bb->len++] = MI_BATCH_BUFFER_END; + update_idx = bb->len; + + emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0, GEN8_PAGE_SIZE); + flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs, src_is_vram, + dst_L0_ofs, dst_is_vram, + src_L0, ccs_ofs, copy_ccs); + + mutex_lock(&m->job_mutex); + job = xe_bb_create_migration_job(m->eng, bb, + xe_migrate_batch_base(m, usm), + update_idx); + if (IS_ERR(job)) { + err = PTR_ERR(job); + goto err; + } + + xe_sched_job_add_migrate_flush(job, flush_flags); + if (!fence) { + err = job_add_deps(job, bo->ttm.base.resv, + DMA_RESV_USAGE_BOOKKEEP); + if (err) + goto err_job; + } + + xe_sched_job_arm(job); + dma_fence_put(fence); + fence = dma_fence_get(&job->drm.s_fence->finished); + xe_sched_job_push(job); + + dma_fence_put(m->fence); + m->fence = dma_fence_get(fence); + + mutex_unlock(&m->job_mutex); + + xe_bb_free(bb, fence); + size -= src_L0; + continue; + +err_job: + xe_sched_job_put(job); +err: + mutex_unlock(&m->job_mutex); + xe_bb_free(bb, NULL); + +err_sync: + /* Sync partial copy if any. */ + if (fence) { + dma_fence_wait(fence, false); + dma_fence_put(fence); + } + + return ERR_PTR(err); + } + + return fence; +} + +static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, + u32 size, u32 pitch, u32 value, bool is_vram) +{ + u32 *cs = bb->cs + bb->len; + u32 len = XY_FAST_COLOR_BLT_DW; + u32 mocs = xe_mocs_index_to_value(gt->mocs.uc_index); + + if (GRAPHICS_VERx100(gt->xe) < 1250) + len = 11; + + *cs++ = XY_FAST_COLOR_BLT_CMD | XY_FAST_COLOR_BLT_DEPTH_32 | + (len - 2); + *cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, mocs) | + (pitch - 1); + *cs++ = 0; + *cs++ = (size / pitch) << 16 | pitch / 4; + *cs++ = lower_32_bits(src_ofs); + *cs++ = upper_32_bits(src_ofs); + *cs++ = (is_vram ? 0x0 : 0x1) << XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT; + *cs++ = value; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + + if (len > 11) { + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + } + + XE_BUG_ON(cs - bb->cs != len + bb->len); + bb->len += len; + + return 0; +} + +struct dma_fence *xe_migrate_clear(struct xe_migrate *m, + struct xe_bo *bo, + struct ttm_resource *dst, + u32 value) +{ + bool clear_vram = mem_type_is_vram(dst->mem_type); + struct xe_gt *gt = m->gt; + struct xe_device *xe = gt_to_xe(gt); + struct dma_fence *fence = NULL; + u64 size = bo->size; + struct xe_res_cursor src_it; + struct ttm_resource *src = dst; + int err; + int pass = 0; + + if (!clear_vram) + xe_res_first_sg(xe_bo_get_sg(bo), 0, bo->size, &src_it); + else + xe_res_first(src, 0, bo->size, &src_it); + + while (size) { + u64 clear_L0_ofs; + u32 clear_L0_pt; + u32 flush_flags = 0; + u64 clear_L0; + struct xe_sched_job *job; + struct xe_bb *bb; + u32 batch_size, update_idx; + bool usm = xe->info.supports_usm; + + clear_L0 = xe_migrate_res_sizes(&src_it); + drm_dbg(&xe->drm, "Pass %u, size: %llu\n", pass++, clear_L0); + + /* Calculate final sizes and batch size.. */ + batch_size = 2 + + pte_update_size(m, clear_vram, &src_it, + &clear_L0, &clear_L0_ofs, &clear_L0_pt, + XY_FAST_COLOR_BLT_DW, 0, NUM_PT_PER_BLIT); + if (xe_device_has_flat_ccs(xe) && clear_vram) + batch_size += EMIT_COPY_CCS_DW; + + /* Clear commands */ + + if (WARN_ON_ONCE(!clear_L0)) + break; + + bb = xe_bb_new(gt, batch_size, usm); + if (IS_ERR(bb)) { + err = PTR_ERR(bb); + goto err_sync; + } + + size -= clear_L0; + + /* TODO: Add dependencies here */ + + /* Preemption is enabled again by the ring ops. */ + if (!clear_vram) { + emit_arb_clear(bb); + emit_pte(m, bb, clear_L0_pt, clear_vram, &src_it, clear_L0, + bo); + } else { + xe_res_next(&src_it, clear_L0); + } + bb->cs[bb->len++] = MI_BATCH_BUFFER_END; + update_idx = bb->len; + + emit_clear(gt, bb, clear_L0_ofs, clear_L0, GEN8_PAGE_SIZE, + value, clear_vram); + if (xe_device_has_flat_ccs(xe) && clear_vram) { + emit_copy_ccs(gt, bb, clear_L0_ofs, true, + m->cleared_vram_ofs, false, clear_L0); + flush_flags = MI_FLUSH_DW_CCS; + } + + mutex_lock(&m->job_mutex); + job = xe_bb_create_migration_job(m->eng, bb, + xe_migrate_batch_base(m, usm), + update_idx); + if (IS_ERR(job)) { + err = PTR_ERR(job); + goto err; + } + + xe_sched_job_add_migrate_flush(job, flush_flags); + + xe_sched_job_arm(job); + dma_fence_put(fence); + fence = dma_fence_get(&job->drm.s_fence->finished); + xe_sched_job_push(job); + + dma_fence_put(m->fence); + m->fence = dma_fence_get(fence); + + mutex_unlock(&m->job_mutex); + + xe_bb_free(bb, fence); + continue; + +err: + mutex_unlock(&m->job_mutex); + xe_bb_free(bb, NULL); +err_sync: + /* Sync partial copies if any. */ + if (fence) { + dma_fence_wait(m->fence, false); + dma_fence_put(fence); + } + + return ERR_PTR(err); + } + + return fence; +} + +static void write_pgtable(struct xe_gt *gt, struct xe_bb *bb, u64 ppgtt_ofs, + const struct xe_vm_pgtable_update *update, + struct xe_migrate_pt_update *pt_update) +{ + const struct xe_migrate_pt_update_ops *ops = pt_update->ops; + u32 chunk; + u32 ofs = update->ofs, size = update->qwords; + + /* + * If we have 512 entries (max), we would populate it ourselves, + * and update the PDE above it to the new pointer. + * The only time this can only happen if we have to update the top + * PDE. This requires a BO that is almost vm->size big. + * + * This shouldn't be possible in practice.. might change when 16K + * pages are used. Hence the BUG_ON. + */ + XE_BUG_ON(update->qwords > 0x1ff); + if (!ppgtt_ofs) { + bool is_lmem; + + ppgtt_ofs = xe_migrate_vram_ofs(xe_bo_addr(update->pt_bo, 0, + GEN8_PAGE_SIZE, + &is_lmem)); + XE_BUG_ON(!is_lmem); + } + + do { + u64 addr = ppgtt_ofs + ofs * 8; + chunk = min(update->qwords, 0x1ffU); + + /* Ensure populatefn can do memset64 by aligning bb->cs */ + if (!(bb->len & 1)) + bb->cs[bb->len++] = MI_NOOP; + + bb->cs[bb->len++] = MI_STORE_DATA_IMM | BIT(21) | + (chunk * 2 + 1); + bb->cs[bb->len++] = lower_32_bits(addr); + bb->cs[bb->len++] = upper_32_bits(addr); + ops->populate(pt_update, gt, NULL, bb->cs + bb->len, ofs, chunk, + update); + + bb->len += chunk * 2; + ofs += chunk; + size -= chunk; + } while (size); +} + +struct xe_vm *xe_migrate_get_vm(struct xe_migrate *m) +{ + return xe_vm_get(m->eng->vm); +} + +static struct dma_fence * +xe_migrate_update_pgtables_cpu(struct xe_migrate *m, + struct xe_vm *vm, struct xe_bo *bo, + const struct xe_vm_pgtable_update *updates, + u32 num_updates, bool wait_vm, + struct xe_migrate_pt_update *pt_update) +{ + const struct xe_migrate_pt_update_ops *ops = pt_update->ops; + struct dma_fence *fence; + int err; + u32 i; + + /* Wait on BO moves for 10 ms, then fall back to GPU job */ + if (bo) { + long wait; + + wait = dma_resv_wait_timeout(bo->ttm.base.resv, + DMA_RESV_USAGE_KERNEL, + true, HZ / 100); + if (wait <= 0) + return ERR_PTR(-ETIME); + } + if (wait_vm) { + long wait; + + wait = dma_resv_wait_timeout(&vm->resv, + DMA_RESV_USAGE_BOOKKEEP, + true, HZ / 100); + if (wait <= 0) + return ERR_PTR(-ETIME); + } + + if (ops->pre_commit) { + err = ops->pre_commit(pt_update); + if (err) + return ERR_PTR(err); + } + for (i = 0; i < num_updates; i++) { + const struct xe_vm_pgtable_update *update = &updates[i]; + + ops->populate(pt_update, m->gt, &update->pt_bo->vmap, NULL, + update->ofs, update->qwords, update); + } + + trace_xe_vm_cpu_bind(vm); + xe_device_wmb(vm->xe); + + fence = dma_fence_get_stub(); + + return fence; +} + +static bool no_in_syncs(struct xe_sync_entry *syncs, u32 num_syncs) +{ + int i; + + for (i = 0; i < num_syncs; i++) { + struct dma_fence *fence = syncs[i].fence; + + if (fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &fence->flags)) + return false; + } + + return true; +} + +static bool engine_is_idle(struct xe_engine *e) +{ + return !e || e->lrc[0].fence_ctx.next_seqno == 1 || + xe_lrc_seqno(&e->lrc[0]) == e->lrc[0].fence_ctx.next_seqno; +} + +struct dma_fence * +xe_migrate_update_pgtables(struct xe_migrate *m, + struct xe_vm *vm, + struct xe_bo *bo, + struct xe_engine *eng, + const struct xe_vm_pgtable_update *updates, + u32 num_updates, + struct xe_sync_entry *syncs, u32 num_syncs, + struct xe_migrate_pt_update *pt_update) +{ + const struct xe_migrate_pt_update_ops *ops = pt_update->ops; + struct xe_gt *gt = m->gt; + struct xe_device *xe = gt_to_xe(gt); + struct xe_sched_job *job; + struct dma_fence *fence; + struct drm_suballoc *sa_bo = NULL; + struct xe_vma *vma = pt_update->vma; + struct xe_bb *bb; + u32 i, batch_size, ppgtt_ofs, update_idx, page_ofs = 0; + u64 addr; + int err = 0; + bool usm = !eng && xe->info.supports_usm; + bool first_munmap_rebind = vma && vma->first_munmap_rebind; + + /* Use the CPU if no in syncs and engine is idle */ + if (no_in_syncs(syncs, num_syncs) && engine_is_idle(eng)) { + fence = xe_migrate_update_pgtables_cpu(m, vm, bo, updates, + num_updates, + first_munmap_rebind, + pt_update); + if (!IS_ERR(fence) || fence == ERR_PTR(-EAGAIN)) + return fence; + } + + /* fixed + PTE entries */ + if (IS_DGFX(xe)) + batch_size = 2; + else + batch_size = 6 + num_updates * 2; + + for (i = 0; i < num_updates; i++) { + u32 num_cmds = DIV_ROUND_UP(updates[i].qwords, 0x1ff); + + /* align noop + MI_STORE_DATA_IMM cmd prefix */ + batch_size += 4 * num_cmds + updates[i].qwords * 2; + } + + /* + * XXX: Create temp bo to copy from, if batch_size becomes too big? + * + * Worst case: Sum(2 * (each lower level page size) + (top level page size)) + * Should be reasonably bound.. + */ + XE_BUG_ON(batch_size >= SZ_128K); + + bb = xe_bb_new(gt, batch_size, !eng && xe->info.supports_usm); + if (IS_ERR(bb)) + return ERR_CAST(bb); + + /* For sysmem PTE's, need to map them in our hole.. */ + if (!IS_DGFX(xe)) { + ppgtt_ofs = NUM_KERNEL_PDE - 1; + if (eng) { + XE_BUG_ON(num_updates > NUM_VMUSA_WRITES_PER_UNIT); + + sa_bo = drm_suballoc_new(&m->vm_update_sa, 1, + GFP_KERNEL, true, 0); + if (IS_ERR(sa_bo)) { + err = PTR_ERR(sa_bo); + goto err; + } + + ppgtt_ofs = NUM_KERNEL_PDE + + (drm_suballoc_soffset(sa_bo) / + NUM_VMUSA_UNIT_PER_PAGE); + page_ofs = (drm_suballoc_soffset(sa_bo) % + NUM_VMUSA_UNIT_PER_PAGE) * + VM_SA_UPDATE_UNIT_SIZE; + } + + /* Preemption is enabled again by the ring ops. */ + emit_arb_clear(bb); + + /* Map our PT's to gtt */ + bb->cs[bb->len++] = MI_STORE_DATA_IMM | BIT(21) | + (num_updates * 2 + 1); + bb->cs[bb->len++] = ppgtt_ofs * GEN8_PAGE_SIZE + page_ofs; + bb->cs[bb->len++] = 0; /* upper_32_bits */ + + for (i = 0; i < num_updates; i++) { + struct xe_bo *pt_bo = updates[i].pt_bo; + + BUG_ON(pt_bo->size != SZ_4K); + + addr = gen8_pte_encode(NULL, pt_bo, 0, XE_CACHE_WB, + 0, 0); + bb->cs[bb->len++] = lower_32_bits(addr); + bb->cs[bb->len++] = upper_32_bits(addr); + } + + bb->cs[bb->len++] = MI_BATCH_BUFFER_END; + update_idx = bb->len; + + addr = xe_migrate_vm_addr(ppgtt_ofs, 0) + + (page_ofs / sizeof(u64)) * GEN8_PAGE_SIZE; + for (i = 0; i < num_updates; i++) + write_pgtable(m->gt, bb, addr + i * GEN8_PAGE_SIZE, + &updates[i], pt_update); + } else { + /* phys pages, no preamble required */ + bb->cs[bb->len++] = MI_BATCH_BUFFER_END; + update_idx = bb->len; + + /* Preemption is enabled again by the ring ops. */ + emit_arb_clear(bb); + for (i = 0; i < num_updates; i++) + write_pgtable(m->gt, bb, 0, &updates[i], pt_update); + } + + if (!eng) + mutex_lock(&m->job_mutex); + + job = xe_bb_create_migration_job(eng ?: m->eng, bb, + xe_migrate_batch_base(m, usm), + update_idx); + if (IS_ERR(job)) { + err = PTR_ERR(job); + goto err_bb; + } + + /* Wait on BO move */ + if (bo) { + err = job_add_deps(job, bo->ttm.base.resv, + DMA_RESV_USAGE_KERNEL); + if (err) + goto err_job; + } + + /* + * Munmap style VM unbind, need to wait for all jobs to be complete / + * trigger preempts before moving forward + */ + if (first_munmap_rebind) { + err = job_add_deps(job, &vm->resv, + DMA_RESV_USAGE_BOOKKEEP); + if (err) + goto err_job; + } + + for (i = 0; !err && i < num_syncs; i++) + err = xe_sync_entry_add_deps(&syncs[i], job); + + if (err) + goto err_job; + + if (ops->pre_commit) { + err = ops->pre_commit(pt_update); + if (err) + goto err_job; + } + xe_sched_job_arm(job); + fence = dma_fence_get(&job->drm.s_fence->finished); + xe_sched_job_push(job); + + if (!eng) + mutex_unlock(&m->job_mutex); + + xe_bb_free(bb, fence); + drm_suballoc_free(sa_bo, fence); + + return fence; + +err_job: + xe_sched_job_put(job); +err_bb: + if (!eng) + mutex_unlock(&m->job_mutex); + xe_bb_free(bb, NULL); +err: + drm_suballoc_free(sa_bo, NULL); + return ERR_PTR(err); +} + +void xe_migrate_wait(struct xe_migrate *m) +{ + if (m->fence) + dma_fence_wait(m->fence, false); +} + +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) +#include "tests/xe_migrate.c" +#endif diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h new file mode 100644 index 000000000000..267057a3847f --- /dev/null +++ b/drivers/gpu/drm/xe/xe_migrate.h @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2020 Intel Corporation + */ + +#ifndef __XE_MIGRATE__ +#define __XE_MIGRATE__ + +#include + +struct dma_fence; +struct iosys_map; +struct ttm_resource; + +struct xe_bo; +struct xe_gt; +struct xe_engine; +struct xe_migrate; +struct xe_migrate_pt_update; +struct xe_sync_entry; +struct xe_pt; +struct xe_vm; +struct xe_vm_pgtable_update; +struct xe_vma; + +struct xe_migrate_pt_update_ops { + /** + * populate() - Populate a command buffer or page-table with ptes. + * @pt_update: Embeddable callback argument. + * @gt: The gt for the current operation. + * @map: struct iosys_map into the memory to be populated. + * @pos: If @map is NULL, map into the memory to be populated. + * @ofs: qword offset into @map, unused if @map is NULL. + * @num_qwords: Number of qwords to write. + * @update: Information about the PTEs to be inserted. + * + * This interface is intended to be used as a callback into the + * page-table system to populate command buffers or shared + * page-tables with PTEs. + */ + void (*populate)(struct xe_migrate_pt_update *pt_update, + struct xe_gt *gt, struct iosys_map *map, + void *pos, u32 ofs, u32 num_qwords, + const struct xe_vm_pgtable_update *update); + + /** + * pre_commit(): Callback to be called just before arming the + * sched_job. + * @pt_update: Pointer to embeddable callback argument. + * + * Return: 0 on success, negative error code on error. + */ + int (*pre_commit)(struct xe_migrate_pt_update *pt_update); +}; + +struct xe_migrate_pt_update { + const struct xe_migrate_pt_update_ops *ops; + struct xe_vma *vma; +}; + +struct xe_migrate *xe_migrate_init(struct xe_gt *gt); + +struct dma_fence *xe_migrate_copy(struct xe_migrate *m, + struct xe_bo *bo, + struct ttm_resource *src, + struct ttm_resource *dst); + +struct dma_fence *xe_migrate_clear(struct xe_migrate *m, + struct xe_bo *bo, + struct ttm_resource *dst, + u32 value); + +struct xe_vm *xe_migrate_get_vm(struct xe_migrate *m); + +struct dma_fence * +xe_migrate_update_pgtables(struct xe_migrate *m, + struct xe_vm *vm, + struct xe_bo *bo, + struct xe_engine *eng, + const struct xe_vm_pgtable_update *updates, + u32 num_updates, + struct xe_sync_entry *syncs, u32 num_syncs, + struct xe_migrate_pt_update *pt_update); + +void xe_migrate_wait(struct xe_migrate *m); + +struct xe_engine *xe_gt_migrate_engine(struct xe_gt *gt); +#endif diff --git a/drivers/gpu/drm/xe/xe_migrate_doc.h b/drivers/gpu/drm/xe/xe_migrate_doc.h new file mode 100644 index 000000000000..6a68fdff08dc --- /dev/null +++ b/drivers/gpu/drm/xe/xe_migrate_doc.h @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_MIGRATE_DOC_H_ +#define _XE_MIGRATE_DOC_H_ + +/** + * DOC: Migrate Layer + * + * The XE migrate layer is used generate jobs which can copy memory (eviction), + * clear memory, or program tables (binds). This layer exists in every GT, has + * a migrate engine, and uses a special VM for all generated jobs. + * + * Special VM details + * ================== + * + * The special VM is configured with a page structure where we can dynamically + * map BOs which need to be copied and cleared, dynamically map other VM's page + * table BOs for updates, and identity map the entire device's VRAM with 1 GB + * pages. + * + * Currently the page structure consists of 48 phyiscal pages with 16 being + * reserved for BO mapping during copies and clear, 1 reserved for kernel binds, + * several pages are needed to setup the identity mappings (exact number based + * on how many bits of address space the device has), and the rest are reserved + * user bind operations. + * + * TODO: Diagram of layout + * + * Bind jobs + * ========= + * + * A bind job consist of two batches and runs either on the migrate engine + * (kernel binds) or the bind engine passed in (user binds). In both cases the + * VM of the engine is the migrate VM. + * + * The first batch is used to update the migration VM page structure to point to + * the bind VM page table BOs which need to be updated. A physical page is + * required for this. If it is a user bind, the page is allocated from pool of + * pages reserved user bind operations with drm_suballoc managing this pool. If + * it is a kernel bind, the page reserved for kernel binds is used. + * + * The first batch is only required for devices without VRAM as when the device + * has VRAM the bind VM page table BOs are in VRAM and the identity mapping can + * be used. + * + * The second batch is used to program page table updated in the bind VM. Why + * not just one batch? Well the TLBs need to be invalidated between these two + * batches and that only can be done from the ring. + * + * When the bind job complete, the page allocated is returned the pool of pages + * reserved for user bind operations if a user bind. No need do this for kernel + * binds as the reserved kernel page is serially used by each job. + * + * Copy / clear jobs + * ================= + * + * A copy or clear job consist of two batches and runs on the migrate engine. + * + * Like binds, the first batch is used update the migration VM page structure. + * In copy jobs, we need to map the source and destination of the BO into page + * the structure. In clear jobs, we just need to add 1 mapping of BO into the + * page structure. We use the 16 reserved pages in migration VM for mappings, + * this gives us a maximum copy size of 16 MB and maximum clear size of 32 MB. + * + * The second batch is used do either do the copy or clear. Again similar to + * binds, two batches are required as the TLBs need to be invalidated from the + * ring between the batches. + * + * More than one job will be generated if the BO is larger than maximum copy / + * clear size. + * + * Future work + * =========== + * + * Update copy and clear code to use identity mapped VRAM. + * + * Can we rework the use of the pages async binds to use all the entries in each + * page? + * + * Using large pages for sysmem mappings. + * + * Is it possible to identity map the sysmem? We should explore this. + */ + +#endif diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c new file mode 100644 index 000000000000..42e2405f2f48 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -0,0 +1,466 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "xe_mmio.h" + +#include +#include + +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_gt_mcr.h" +#include "xe_macros.h" +#include "xe_module.h" + +#include "i915_reg.h" +#include "gt/intel_engine_regs.h" +#include "gt/intel_gt_regs.h" + +#define XEHP_MTCFG_ADDR _MMIO(0x101800) +#define TILE_COUNT REG_GENMASK(15, 8) +#define GEN12_LMEM_BAR 2 + +static int xe_set_dma_info(struct xe_device *xe) +{ + unsigned int mask_size = xe->info.dma_mask_size; + int err; + + /* + * We don't have a max segment size, so set it to the max so sg's + * debugging layer doesn't complain + */ + dma_set_max_seg_size(xe->drm.dev, UINT_MAX); + + err = dma_set_mask(xe->drm.dev, DMA_BIT_MASK(mask_size)); + if (err) + goto mask_err; + + err = dma_set_coherent_mask(xe->drm.dev, DMA_BIT_MASK(mask_size)); + if (err) + goto mask_err; + + return 0; + +mask_err: + drm_err(&xe->drm, "Can't set DMA mask/consistent mask (%d)\n", err); + return err; +} + +#ifdef CONFIG_64BIT +static int +_resize_bar(struct xe_device *xe, int resno, resource_size_t size) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + int bar_size = pci_rebar_bytes_to_size(size); + int ret; + + if (pci_resource_len(pdev, resno)) + pci_release_resource(pdev, resno); + + ret = pci_resize_resource(pdev, resno, bar_size); + if (ret) { + drm_info(&xe->drm, "Failed to resize BAR%d to %dM (%pe)\n", + resno, 1 << bar_size, ERR_PTR(ret)); + return -1; + } + + drm_info(&xe->drm, "BAR%d resized to %dM\n", resno, 1 << bar_size); + return 1; +} + +static int xe_resize_lmem_bar(struct xe_device *xe, resource_size_t lmem_size) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + struct pci_bus *root = pdev->bus; + struct resource *root_res; + resource_size_t rebar_size; + resource_size_t current_size; + u32 pci_cmd; + int i; + int ret; + u64 force_lmem_bar_size = xe_force_lmem_bar_size; + + current_size = roundup_pow_of_two(pci_resource_len(pdev, GEN12_LMEM_BAR)); + + if (force_lmem_bar_size) { + u32 bar_sizes; + + rebar_size = force_lmem_bar_size * (resource_size_t)SZ_1M; + bar_sizes = pci_rebar_get_possible_sizes(pdev, GEN12_LMEM_BAR); + + if (rebar_size == current_size) + return 0; + + if (!(bar_sizes & BIT(pci_rebar_bytes_to_size(rebar_size))) || + rebar_size >= roundup_pow_of_two(lmem_size)) { + rebar_size = lmem_size; + drm_info(&xe->drm, + "Given bar size is not within supported size, setting it to default: %llu\n", + (u64)lmem_size >> 20); + } + } else { + rebar_size = current_size; + + if (rebar_size != roundup_pow_of_two(lmem_size)) + rebar_size = lmem_size; + else + return 0; + } + + while (root->parent) + root = root->parent; + + pci_bus_for_each_resource(root, root_res, i) { + if (root_res && root_res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) && + root_res->start > 0x100000000ull) + break; + } + + if (!root_res) { + drm_info(&xe->drm, "Can't resize LMEM BAR - platform support is missing\n"); + return -1; + } + + pci_read_config_dword(pdev, PCI_COMMAND, &pci_cmd); + pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd & ~PCI_COMMAND_MEMORY); + + ret = _resize_bar(xe, GEN12_LMEM_BAR, rebar_size); + + pci_assign_unassigned_bus_resources(pdev->bus); + pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd); + return ret; +} +#else +static int xe_resize_lmem_bar(struct xe_device *xe, resource_size_t lmem_size) { return 0; } +#endif + +static bool xe_pci_resource_valid(struct pci_dev *pdev, int bar) +{ + if (!pci_resource_flags(pdev, bar)) + return false; + + if (pci_resource_flags(pdev, bar) & IORESOURCE_UNSET) + return false; + + if (!pci_resource_len(pdev, bar)) + return false; + + return true; +} + +int xe_mmio_probe_vram(struct xe_device *xe) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + struct xe_gt *gt; + u8 id; + u64 lmem_size; + u64 original_size; + u64 current_size; + u64 flat_ccs_base; + int resize_result; + + if (!IS_DGFX(xe)) { + xe->mem.vram.mapping = 0; + xe->mem.vram.size = 0; + xe->mem.vram.io_start = 0; + + for_each_gt(gt, xe, id) { + gt->mem.vram.mapping = 0; + gt->mem.vram.size = 0; + gt->mem.vram.io_start = 0; + } + return 0; + } + + if (!xe_pci_resource_valid(pdev, GEN12_LMEM_BAR)) { + drm_err(&xe->drm, "pci resource is not valid\n"); + return -ENXIO; + } + + gt = xe_device_get_gt(xe, 0); + lmem_size = xe_mmio_read64(gt, GEN12_GSMBASE.reg); + + original_size = pci_resource_len(pdev, GEN12_LMEM_BAR); + + if (xe->info.has_flat_ccs) { + int err; + u32 reg; + + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (err) + return err; + reg = xe_gt_mcr_unicast_read_any(gt, XEHP_TILE0_ADDR_RANGE); + lmem_size = (u64)REG_FIELD_GET(GENMASK(14, 8), reg) * SZ_1G; + reg = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR); + flat_ccs_base = (u64)REG_FIELD_GET(GENMASK(31, 8), reg) * SZ_64K; + + drm_info(&xe->drm, "lmem_size: 0x%llx flat_ccs_base: 0x%llx\n", + lmem_size, flat_ccs_base); + + err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); + if (err) + return err; + } else { + flat_ccs_base = lmem_size; + } + + resize_result = xe_resize_lmem_bar(xe, lmem_size); + current_size = pci_resource_len(pdev, GEN12_LMEM_BAR); + xe->mem.vram.io_start = pci_resource_start(pdev, GEN12_LMEM_BAR); + + xe->mem.vram.size = min(current_size, lmem_size); + + if (!xe->mem.vram.size) + return -EIO; + + if (resize_result > 0) + drm_info(&xe->drm, "Successfully resize LMEM from %lluMiB to %lluMiB\n", + (u64)original_size >> 20, + (u64)current_size >> 20); + else if (xe->mem.vram.size < lmem_size && !xe_force_lmem_bar_size) + drm_info(&xe->drm, "Using a reduced BAR size of %lluMiB. Consider enabling 'Resizable BAR' support in your BIOS.\n", + (u64)xe->mem.vram.size >> 20); + if (xe->mem.vram.size < lmem_size) + drm_warn(&xe->drm, "Restricting VRAM size to PCI resource size (0x%llx->0x%llx)\n", + lmem_size, xe->mem.vram.size); + +#ifdef CONFIG_64BIT + xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, xe->mem.vram.size); +#endif + + xe->mem.vram.size = min_t(u64, xe->mem.vram.size, flat_ccs_base); + + drm_info(&xe->drm, "TOTAL VRAM: %pa, %pa\n", &xe->mem.vram.io_start, &xe->mem.vram.size); + + /* FIXME: Assuming equally partitioned VRAM, incorrect */ + if (xe->info.tile_count > 1) { + u8 adj_tile_count = xe->info.tile_count; + resource_size_t size, io_start; + + for_each_gt(gt, xe, id) + if (xe_gt_is_media_type(gt)) + --adj_tile_count; + + XE_BUG_ON(!adj_tile_count); + + size = xe->mem.vram.size / adj_tile_count; + io_start = xe->mem.vram.io_start; + + for_each_gt(gt, xe, id) { + if (id && !xe_gt_is_media_type(gt)) + io_start += size; + + gt->mem.vram.size = size; + gt->mem.vram.io_start = io_start; + gt->mem.vram.mapping = xe->mem.vram.mapping + + (io_start - xe->mem.vram.io_start); + + drm_info(&xe->drm, "VRAM[%u, %u]: %pa, %pa\n", + id, gt->info.vram_id, >->mem.vram.io_start, + >->mem.vram.size); + } + } else { + gt->mem.vram.size = xe->mem.vram.size; + gt->mem.vram.io_start = xe->mem.vram.io_start; + gt->mem.vram.mapping = xe->mem.vram.mapping; + + drm_info(&xe->drm, "VRAM: %pa\n", >->mem.vram.size); + } + return 0; +} + +static void xe_mmio_probe_tiles(struct xe_device *xe) +{ + struct xe_gt *gt = xe_device_get_gt(xe, 0); + u32 mtcfg; + u8 adj_tile_count; + u8 id; + + if (xe->info.tile_count == 1) + return; + + mtcfg = xe_mmio_read64(gt, XEHP_MTCFG_ADDR.reg); + adj_tile_count = xe->info.tile_count = + REG_FIELD_GET(TILE_COUNT, mtcfg) + 1; + if (xe->info.media_ver >= 13) + xe->info.tile_count *= 2; + + drm_info(&xe->drm, "tile_count: %d, adj_tile_count %d\n", + xe->info.tile_count, adj_tile_count); + + if (xe->info.tile_count > 1) { + const int mmio_bar = 0; + size_t size; + void *regs; + + if (adj_tile_count > 1) { + pci_iounmap(to_pci_dev(xe->drm.dev), xe->mmio.regs); + xe->mmio.size = SZ_16M * adj_tile_count; + xe->mmio.regs = pci_iomap(to_pci_dev(xe->drm.dev), + mmio_bar, xe->mmio.size); + } + + size = xe->mmio.size / adj_tile_count; + regs = xe->mmio.regs; + + for_each_gt(gt, xe, id) { + if (id && !xe_gt_is_media_type(gt)) + regs += size; + gt->mmio.size = size; + gt->mmio.regs = regs; + } + } +} + +static void mmio_fini(struct drm_device *drm, void *arg) +{ + struct xe_device *xe = arg; + + pci_iounmap(to_pci_dev(xe->drm.dev), xe->mmio.regs); + if (xe->mem.vram.mapping) + iounmap(xe->mem.vram.mapping); +} + +int xe_mmio_init(struct xe_device *xe) +{ + struct xe_gt *gt = xe_device_get_gt(xe, 0); + const int mmio_bar = 0; + int err; + + /* + * Map the entire BAR, which includes registers (0-4MB), reserved space + * (4MB-8MB), and GGTT (8MB-16MB). Other parts of the driver (GTs, + * GGTTs) will derive the pointers they need from the mapping in the + * device structure. + */ + xe->mmio.size = SZ_16M; + xe->mmio.regs = pci_iomap(to_pci_dev(xe->drm.dev), mmio_bar, + xe->mmio.size); + if (xe->mmio.regs == NULL) { + drm_err(&xe->drm, "failed to map registers\n"); + return -EIO; + } + + err = drmm_add_action_or_reset(&xe->drm, mmio_fini, xe); + if (err) + return err; + + /* 1 GT for now, 1 to 1 mapping, may change on multi-GT devices */ + gt->mmio.size = xe->mmio.size; + gt->mmio.regs = xe->mmio.regs; + + /* + * The boot firmware initializes local memory and assesses its health. + * If memory training fails, the punit will have been instructed to + * keep the GT powered down; we won't be able to communicate with it + * and we should not continue with driver initialization. + */ + if (IS_DGFX(xe) && !(xe_mmio_read32(gt, GU_CNTL.reg) & LMEM_INIT)) { + drm_err(&xe->drm, "LMEM not initialized by firmware\n"); + return -ENODEV; + } + + err = xe_set_dma_info(xe); + if (err) + return err; + + xe_mmio_probe_tiles(xe); + + return 0; +} + +#define VALID_MMIO_FLAGS (\ + DRM_XE_MMIO_BITS_MASK |\ + DRM_XE_MMIO_READ |\ + DRM_XE_MMIO_WRITE) + +static const i915_reg_t mmio_read_whitelist[] = { + RING_TIMESTAMP(RENDER_RING_BASE), +}; + +int xe_mmio_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct drm_xe_mmio *args = data; + unsigned int bits_flag, bytes; + bool allowed; + int ret = 0; + + if (XE_IOCTL_ERR(xe, args->extensions)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, args->flags & ~VALID_MMIO_FLAGS)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, !(args->flags & DRM_XE_MMIO_WRITE) && args->value)) + return -EINVAL; + + allowed = capable(CAP_SYS_ADMIN); + if (!allowed && ((args->flags & ~DRM_XE_MMIO_BITS_MASK) == DRM_XE_MMIO_READ)) { + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(mmio_read_whitelist); i++) { + if (mmio_read_whitelist[i].reg == args->addr) { + allowed = true; + break; + } + } + } + + if (XE_IOCTL_ERR(xe, !allowed)) + return -EPERM; + + bits_flag = args->flags & DRM_XE_MMIO_BITS_MASK; + bytes = 1 << bits_flag; + if (XE_IOCTL_ERR(xe, args->addr + bytes > xe->mmio.size)) + return -EINVAL; + + xe_force_wake_get(gt_to_fw(&xe->gt[0]), XE_FORCEWAKE_ALL); + + if (args->flags & DRM_XE_MMIO_WRITE) { + switch (bits_flag) { + case DRM_XE_MMIO_8BIT: + return -EINVAL; /* TODO */ + case DRM_XE_MMIO_16BIT: + return -EINVAL; /* TODO */ + case DRM_XE_MMIO_32BIT: + if (XE_IOCTL_ERR(xe, args->value > U32_MAX)) + return -EINVAL; + xe_mmio_write32(to_gt(xe), args->addr, args->value); + break; + case DRM_XE_MMIO_64BIT: + xe_mmio_write64(to_gt(xe), args->addr, args->value); + break; + default: + drm_WARN(&xe->drm, 1, "Invalid MMIO bit size"); + ret = -EINVAL; + goto exit; + } + } + + if (args->flags & DRM_XE_MMIO_READ) { + switch (bits_flag) { + case DRM_XE_MMIO_8BIT: + return -EINVAL; /* TODO */ + case DRM_XE_MMIO_16BIT: + return -EINVAL; /* TODO */ + case DRM_XE_MMIO_32BIT: + args->value = xe_mmio_read32(to_gt(xe), args->addr); + break; + case DRM_XE_MMIO_64BIT: + args->value = xe_mmio_read64(to_gt(xe), args->addr); + break; + default: + drm_WARN(&xe->drm, 1, "Invalid MMIO bit size"); + ret = -EINVAL; + } + } + +exit: + xe_force_wake_put(gt_to_fw(&xe->gt[0]), XE_FORCEWAKE_ALL); + + return ret; +} diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h new file mode 100644 index 000000000000..09d24467096f --- /dev/null +++ b/drivers/gpu/drm/xe/xe_mmio.h @@ -0,0 +1,110 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _XE_MMIO_H_ +#define _XE_MMIO_H_ + +#include + +#include "xe_gt_types.h" + +/* + * FIXME: This header has been deemed evil and we need to kill it. Temporarily + * including so we can use 'wait_for' and unblock initial development. A follow + * should replace 'wait_for' with a sane version and drop including this header. + */ +#include "i915_utils.h" + +struct drm_device; +struct drm_file; +struct xe_device; + +int xe_mmio_init(struct xe_device *xe); + +static inline u8 xe_mmio_read8(struct xe_gt *gt, u32 reg) +{ + if (reg < gt->mmio.adj_limit) + reg += gt->mmio.adj_offset; + + return readb(gt->mmio.regs + reg); +} + +static inline void xe_mmio_write32(struct xe_gt *gt, + u32 reg, u32 val) +{ + if (reg < gt->mmio.adj_limit) + reg += gt->mmio.adj_offset; + + writel(val, gt->mmio.regs + reg); +} + +static inline u32 xe_mmio_read32(struct xe_gt *gt, u32 reg) +{ + if (reg < gt->mmio.adj_limit) + reg += gt->mmio.adj_offset; + + return readl(gt->mmio.regs + reg); +} + +static inline u32 xe_mmio_rmw32(struct xe_gt *gt, u32 reg, u32 mask, + u32 val) +{ + u32 old, reg_val; + + old = xe_mmio_read32(gt, reg); + reg_val = (old & mask) | val; + xe_mmio_write32(gt, reg, reg_val); + + return old; +} + +static inline void xe_mmio_write64(struct xe_gt *gt, + u32 reg, u64 val) +{ + if (reg < gt->mmio.adj_limit) + reg += gt->mmio.adj_offset; + + writeq(val, gt->mmio.regs + reg); +} + +static inline u64 xe_mmio_read64(struct xe_gt *gt, u32 reg) +{ + if (reg < gt->mmio.adj_limit) + reg += gt->mmio.adj_offset; + + return readq(gt->mmio.regs + reg); +} + +static inline int xe_mmio_write32_and_verify(struct xe_gt *gt, + u32 reg, u32 val, + u32 mask, u32 eval) +{ + u32 reg_val; + + xe_mmio_write32(gt, reg, val); + reg_val = xe_mmio_read32(gt, reg); + + return (reg_val & mask) != eval ? -EINVAL : 0; +} + +static inline int xe_mmio_wait32(struct xe_gt *gt, + u32 reg, u32 val, + u32 mask, u32 timeout_ms) +{ + return wait_for((xe_mmio_read32(gt, reg) & mask) == val, + timeout_ms); +} + +int xe_mmio_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); + +static inline bool xe_mmio_in_range(const struct xe_mmio_range *range, u32 reg) +{ + return range && reg >= range->start && reg <= range->end; +} + +int xe_mmio_probe_vram(struct xe_device *xe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c new file mode 100644 index 000000000000..86b966fffbe5 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -0,0 +1,557 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_engine.h" +#include "xe_gt.h" +#include "xe_platform_types.h" +#include "xe_mmio.h" +#include "xe_mocs.h" +#include "xe_step_types.h" + +#include "gt/intel_gt_regs.h" + +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) +#define mocs_dbg drm_dbg +#else +__printf(2, 3) +static inline void mocs_dbg(const struct drm_device *dev, + const char *format, ...) +{ /* noop */ } +#endif + +/* + * MOCS indexes used for GPU surfaces, defining the cacheability of the + * surface data and the coherency for this data wrt. CPU vs. GPU accesses. + */ +enum xe_mocs_info_index { + /* + * Not cached anywhere, coherency between CPU and GPU accesses is + * guaranteed. + */ + XE_MOCS_UNCACHED, + /* + * Cacheability and coherency controlled by the kernel automatically + * based on the xxxx IOCTL setting and the current + * usage of the surface (used for display scanout or not). + */ + XE_MOCS_PTE, + /* + * Cached in all GPU caches available on the platform. + * Coherency between CPU and GPU accesses to the surface is not + * guaranteed without extra synchronization. + */ + XE_MOCS_CACHED, +}; + +enum { + HAS_GLOBAL_MOCS = BIT(0), + HAS_RENDER_L3CC = BIT(1), +}; + +struct xe_mocs_entry { + u32 control_value; + u16 l3cc_value; + u16 used; +}; + +struct xe_mocs_info { + unsigned int size; + unsigned int n_entries; + const struct xe_mocs_entry *table; + u8 uc_index; + u8 wb_index; + u8 unused_entries_index; +}; + +/* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */ +#define _LE_CACHEABILITY(value) ((value) << 0) +#define _LE_TGT_CACHE(value) ((value) << 2) +#define LE_LRUM(value) ((value) << 4) +#define LE_AOM(value) ((value) << 6) +#define LE_RSC(value) ((value) << 7) +#define LE_SCC(value) ((value) << 8) +#define LE_PFM(value) ((value) << 11) +#define LE_SCF(value) ((value) << 14) +#define LE_COS(value) ((value) << 15) +#define LE_SSE(value) ((value) << 17) + +/* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */ +#define L3_ESC(value) ((value) << 0) +#define L3_SCC(value) ((value) << 1) +#define _L3_CACHEABILITY(value) ((value) << 4) +#define L3_GLBGO(value) ((value) << 6) +#define L3_LKUP(value) ((value) << 7) + +/* Helper defines */ +#define GEN9_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */ +#define PVC_NUM_MOCS_ENTRIES 3 +#define MTL_NUM_MOCS_ENTRIES 16 + +/* (e)LLC caching options */ +/* + * Note: LE_0_PAGETABLE works only up to Gen11; for newer gens it means + * the same as LE_UC + */ +#define LE_0_PAGETABLE _LE_CACHEABILITY(0) +#define LE_1_UC _LE_CACHEABILITY(1) +#define LE_2_WT _LE_CACHEABILITY(2) +#define LE_3_WB _LE_CACHEABILITY(3) + +/* Target cache */ +#define LE_TC_0_PAGETABLE _LE_TGT_CACHE(0) +#define LE_TC_1_LLC _LE_TGT_CACHE(1) +#define LE_TC_2_LLC_ELLC _LE_TGT_CACHE(2) +#define LE_TC_3_LLC_ELLC_ALT _LE_TGT_CACHE(3) + +/* L3 caching options */ +#define L3_0_DIRECT _L3_CACHEABILITY(0) +#define L3_1_UC _L3_CACHEABILITY(1) +#define L3_2_RESERVED _L3_CACHEABILITY(2) +#define L3_3_WB _L3_CACHEABILITY(3) + +#define MOCS_ENTRY(__idx, __control_value, __l3cc_value) \ + [__idx] = { \ + .control_value = __control_value, \ + .l3cc_value = __l3cc_value, \ + .used = 1, \ + } + +/* + * MOCS tables + * + * These are the MOCS tables that are programmed across all the rings. + * The control value is programmed to all the rings that support the + * MOCS registers. While the l3cc_values are only programmed to the + * LNCFCMOCS0 - LNCFCMOCS32 registers. + * + * These tables are intended to be kept reasonably consistent across + * HW platforms, and for ICL+, be identical across OSes. To achieve + * that, for Icelake and above, list of entries is published as part + * of bspec. + * + * Entries not part of the following tables are undefined as far as + * userspace is concerned and shouldn't be relied upon. For Gen < 12 + * they will be initialized to PTE. Gen >= 12 don't have a setting for + * PTE and those platforms except TGL/RKL will be initialized L3 WB to + * catch accidental use of reserved and unused mocs indexes. + * + * The last few entries are reserved by the hardware. For ICL+ they + * should be initialized according to bspec and never used, for older + * platforms they should never be written to. + * + * NOTE1: These tables are part of bspec and defined as part of hardware + * interface for ICL+. For older platforms, they are part of kernel + * ABI. It is expected that, for specific hardware platform, existing + * entries will remain constant and the table will only be updated by + * adding new entries, filling unused positions. + * + * NOTE2: For GEN >= 12 except TGL and RKL, reserved and unspecified MOCS + * indices have been set to L3 WB. These reserved entries should never + * be used, they may be changed to low performant variants with better + * coherency in the future if more entries are needed. + * For TGL/RKL, all the unspecified MOCS indexes are mapped to L3 UC. + */ + +#define GEN11_MOCS_ENTRIES \ + /* Entries 0 and 1 are defined per-platform */ \ + /* Base - L3 + LLC */ \ + MOCS_ENTRY(2, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ + L3_3_WB), \ + /* Base - Uncached */ \ + MOCS_ENTRY(3, \ + LE_1_UC | LE_TC_1_LLC, \ + L3_1_UC), \ + /* Base - L3 */ \ + MOCS_ENTRY(4, \ + LE_1_UC | LE_TC_1_LLC, \ + L3_3_WB), \ + /* Base - LLC */ \ + MOCS_ENTRY(5, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ + L3_1_UC), \ + /* Age 0 - LLC */ \ + MOCS_ENTRY(6, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \ + L3_1_UC), \ + /* Age 0 - L3 + LLC */ \ + MOCS_ENTRY(7, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \ + L3_3_WB), \ + /* Age: Don't Chg. - LLC */ \ + MOCS_ENTRY(8, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \ + L3_1_UC), \ + /* Age: Don't Chg. - L3 + LLC */ \ + MOCS_ENTRY(9, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \ + L3_3_WB), \ + /* No AOM - LLC */ \ + MOCS_ENTRY(10, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \ + L3_1_UC), \ + /* No AOM - L3 + LLC */ \ + MOCS_ENTRY(11, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \ + L3_3_WB), \ + /* No AOM; Age 0 - LLC */ \ + MOCS_ENTRY(12, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \ + L3_1_UC), \ + /* No AOM; Age 0 - L3 + LLC */ \ + MOCS_ENTRY(13, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \ + L3_3_WB), \ + /* No AOM; Age:DC - LLC */ \ + MOCS_ENTRY(14, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \ + L3_1_UC), \ + /* No AOM; Age:DC - L3 + LLC */ \ + MOCS_ENTRY(15, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \ + L3_3_WB), \ + /* Self-Snoop - L3 + LLC */ \ + MOCS_ENTRY(18, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3), \ + L3_3_WB), \ + /* Skip Caching - L3 + LLC(12.5%) */ \ + MOCS_ENTRY(19, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(7), \ + L3_3_WB), \ + /* Skip Caching - L3 + LLC(25%) */ \ + MOCS_ENTRY(20, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(3), \ + L3_3_WB), \ + /* Skip Caching - L3 + LLC(50%) */ \ + MOCS_ENTRY(21, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(1), \ + L3_3_WB), \ + /* Skip Caching - L3 + LLC(75%) */ \ + MOCS_ENTRY(22, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(3), \ + L3_3_WB), \ + /* Skip Caching - L3 + LLC(87.5%) */ \ + MOCS_ENTRY(23, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(7), \ + L3_3_WB), \ + /* HW Reserved - SW program but never use */ \ + MOCS_ENTRY(62, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ + L3_1_UC), \ + /* HW Reserved - SW program but never use */ \ + MOCS_ENTRY(63, \ + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ + L3_1_UC) + +static const struct xe_mocs_entry tgl_mocs_desc[] = { + /* + * NOTE: + * Reserved and unspecified MOCS indices have been set to (L3 + LCC). + * These reserved entries should never be used, they may be changed + * to low performant variants with better coherency in the future if + * more entries are needed. We are programming index XE_MOCS_PTE(1) + * only, __init_mocs_table() take care to program unused index with + * this entry. + */ + MOCS_ENTRY(XE_MOCS_PTE, + LE_0_PAGETABLE | LE_TC_0_PAGETABLE, + L3_1_UC), + GEN11_MOCS_ENTRIES, + + /* Implicitly enable L1 - HDC:L1 + L3 + LLC */ + MOCS_ENTRY(48, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + /* Implicitly enable L1 - HDC:L1 + L3 */ + MOCS_ENTRY(49, + LE_1_UC | LE_TC_1_LLC, + L3_3_WB), + /* Implicitly enable L1 - HDC:L1 + LLC */ + MOCS_ENTRY(50, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_1_UC), + /* Implicitly enable L1 - HDC:L1 */ + MOCS_ENTRY(51, + LE_1_UC | LE_TC_1_LLC, + L3_1_UC), + /* HW Special Case (CCS) */ + MOCS_ENTRY(60, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_1_UC), + /* HW Special Case (Displayable) */ + MOCS_ENTRY(61, + LE_1_UC | LE_TC_1_LLC, + L3_3_WB), +}; + +static const struct xe_mocs_entry dg1_mocs_desc[] = { + /* UC */ + MOCS_ENTRY(1, 0, L3_1_UC), + /* WB - L3 */ + MOCS_ENTRY(5, 0, L3_3_WB), + /* WB - L3 50% */ + MOCS_ENTRY(6, 0, L3_ESC(1) | L3_SCC(1) | L3_3_WB), + /* WB - L3 25% */ + MOCS_ENTRY(7, 0, L3_ESC(1) | L3_SCC(3) | L3_3_WB), + /* WB - L3 12.5% */ + MOCS_ENTRY(8, 0, L3_ESC(1) | L3_SCC(7) | L3_3_WB), + + /* HDC:L1 + L3 */ + MOCS_ENTRY(48, 0, L3_3_WB), + /* HDC:L1 */ + MOCS_ENTRY(49, 0, L3_1_UC), + + /* HW Reserved */ + MOCS_ENTRY(60, 0, L3_1_UC), + MOCS_ENTRY(61, 0, L3_1_UC), + MOCS_ENTRY(62, 0, L3_1_UC), + MOCS_ENTRY(63, 0, L3_1_UC), +}; + +static const struct xe_mocs_entry gen12_mocs_desc[] = { + GEN11_MOCS_ENTRIES, + /* Implicitly enable L1 - HDC:L1 + L3 + LLC */ + MOCS_ENTRY(48, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + /* Implicitly enable L1 - HDC:L1 + L3 */ + MOCS_ENTRY(49, + LE_1_UC | LE_TC_1_LLC, + L3_3_WB), + /* Implicitly enable L1 - HDC:L1 + LLC */ + MOCS_ENTRY(50, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_1_UC), + /* Implicitly enable L1 - HDC:L1 */ + MOCS_ENTRY(51, + LE_1_UC | LE_TC_1_LLC, + L3_1_UC), + /* HW Special Case (CCS) */ + MOCS_ENTRY(60, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_1_UC), + /* HW Special Case (Displayable) */ + MOCS_ENTRY(61, + LE_1_UC | LE_TC_1_LLC, + L3_3_WB), +}; + +static const struct xe_mocs_entry dg2_mocs_desc[] = { + /* UC - Coherent; GO:L3 */ + MOCS_ENTRY(0, 0, L3_1_UC | L3_LKUP(1)), + /* UC - Coherent; GO:Memory */ + MOCS_ENTRY(1, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)), + /* UC - Non-Coherent; GO:Memory */ + MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1)), + + /* WB - LC */ + MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)), +}; + +static const struct xe_mocs_entry dg2_mocs_desc_g10_ax[] = { + /* Wa_14011441408: Set Go to Memory for MOCS#0 */ + MOCS_ENTRY(0, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)), + /* UC - Coherent; GO:Memory */ + MOCS_ENTRY(1, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)), + /* UC - Non-Coherent; GO:Memory */ + MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1)), + + /* WB - LC */ + MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)), +}; + +static const struct xe_mocs_entry pvc_mocs_desc[] = { + /* Error */ + MOCS_ENTRY(0, 0, L3_3_WB), + + /* UC */ + MOCS_ENTRY(1, 0, L3_1_UC), + + /* WB */ + MOCS_ENTRY(2, 0, L3_3_WB), +}; + +static unsigned int get_mocs_settings(struct xe_device *xe, + struct xe_mocs_info *info) +{ + unsigned int flags; + + memset(info, 0, sizeof(struct xe_mocs_info)); + + info->unused_entries_index = XE_MOCS_PTE; + switch (xe->info.platform) { + case XE_PVC: + info->size = ARRAY_SIZE(pvc_mocs_desc); + info->table = pvc_mocs_desc; + info->n_entries = PVC_NUM_MOCS_ENTRIES; + info->uc_index = 1; + info->wb_index = 2; + info->unused_entries_index = 2; + break; + case XE_METEORLAKE: + info->size = ARRAY_SIZE(dg2_mocs_desc); + info->table = dg2_mocs_desc; + info->n_entries = MTL_NUM_MOCS_ENTRIES; + info->uc_index = 1; + info->unused_entries_index = 3; + break; + case XE_DG2: + if (xe->info.subplatform == XE_SUBPLATFORM_DG2_G10 && + xe->info.step.graphics >= STEP_A0 && + xe->info.step.graphics <= STEP_B0) { + info->size = ARRAY_SIZE(dg2_mocs_desc_g10_ax); + info->table = dg2_mocs_desc_g10_ax; + } else { + info->size = ARRAY_SIZE(dg2_mocs_desc); + info->table = dg2_mocs_desc; + } + info->uc_index = 1; + info->n_entries = GEN9_NUM_MOCS_ENTRIES; + info->unused_entries_index = 3; + break; + case XE_DG1: + info->size = ARRAY_SIZE(dg1_mocs_desc); + info->table = dg1_mocs_desc; + info->uc_index = 1; + info->n_entries = GEN9_NUM_MOCS_ENTRIES; + info->uc_index = 1; + info->unused_entries_index = 5; + break; + case XE_TIGERLAKE: + info->size = ARRAY_SIZE(tgl_mocs_desc); + info->table = tgl_mocs_desc; + info->n_entries = GEN9_NUM_MOCS_ENTRIES; + info->uc_index = 3; + break; + case XE_ALDERLAKE_S: + case XE_ALDERLAKE_P: + info->size = ARRAY_SIZE(gen12_mocs_desc); + info->table = gen12_mocs_desc; + info->n_entries = GEN9_NUM_MOCS_ENTRIES; + info->uc_index = 3; + info->unused_entries_index = 2; + break; + default: + drm_err(&xe->drm, "Platform that should have a MOCS table does not.\n"); + return 0; + } + + if (XE_WARN_ON(info->size > info->n_entries)) + return 0; + + flags = HAS_RENDER_L3CC; + if (!IS_DGFX(xe)) + flags |= HAS_GLOBAL_MOCS; + + return flags; +} + +/* + * Get control_value from MOCS entry taking into account when it's not used + * then if unused_entries_index is non-zero then its value will be returned + * otherwise XE_MOCS_PTE's value is returned in this case. + */ +static u32 get_entry_control(const struct xe_mocs_info *info, + unsigned int index) +{ + if (index < info->size && info->table[index].used) + return info->table[index].control_value; + return info->table[info->unused_entries_index].control_value; +} + +static void __init_mocs_table(struct xe_gt *gt, + const struct xe_mocs_info *info, + u32 addr) +{ + struct xe_device *xe = gt_to_xe(gt); + + unsigned int i; + u32 mocs; + + mocs_dbg(>->xe->drm, "entries:%d\n", info->n_entries); + drm_WARN_ONCE(&xe->drm, !info->unused_entries_index, + "Unused entries index should have been defined\n"); + for (i = 0; + i < info->n_entries ? (mocs = get_entry_control(info, i)), 1 : 0; + i++) { + mocs_dbg(>->xe->drm, "%d 0x%x 0x%x\n", i, _MMIO(addr + i * 4).reg, mocs); + xe_mmio_write32(gt, _MMIO(addr + i * 4).reg, mocs); + } +} + +/* + * Get l3cc_value from MOCS entry taking into account when it's not used + * then if unused_entries_index is not zero then its value will be returned + * otherwise I915_MOCS_PTE's value is returned in this case. + */ +static u16 get_entry_l3cc(const struct xe_mocs_info *info, + unsigned int index) +{ + if (index < info->size && info->table[index].used) + return info->table[index].l3cc_value; + return info->table[info->unused_entries_index].l3cc_value; +} + +static u32 l3cc_combine(u16 low, u16 high) +{ + return low | (u32)high << 16; +} + +static void init_l3cc_table(struct xe_gt *gt, + const struct xe_mocs_info *info) +{ + unsigned int i; + u32 l3cc; + + mocs_dbg(>->xe->drm, "entries:%d\n", info->n_entries); + for (i = 0; + i < (info->n_entries + 1) / 2 ? + (l3cc = l3cc_combine(get_entry_l3cc(info, 2 * i), + get_entry_l3cc(info, 2 * i + 1))), 1 : 0; + i++) { + mocs_dbg(>->xe->drm, "%d 0x%x 0x%x\n", i, GEN9_LNCFCMOCS(i).reg, l3cc); + xe_mmio_write32(gt, GEN9_LNCFCMOCS(i).reg, l3cc); + } +} + +void xe_mocs_init_engine(const struct xe_engine *engine) +{ + struct xe_mocs_info table; + unsigned int flags; + + flags = get_mocs_settings(engine->gt->xe, &table); + if (!flags) + return; + + if (flags & HAS_RENDER_L3CC && engine->class == XE_ENGINE_CLASS_RENDER) + init_l3cc_table(engine->gt, &table); +} + +void xe_mocs_init(struct xe_gt *gt) +{ + struct xe_mocs_info table; + unsigned int flags; + + /* + * LLC and eDRAM control values are not applicable to dgfx + */ + flags = get_mocs_settings(gt->xe, &table); + mocs_dbg(>->xe->drm, "flag:0x%x\n", flags); + gt->mocs.uc_index = table.uc_index; + gt->mocs.wb_index = table.wb_index; + + if (flags & HAS_GLOBAL_MOCS) + __init_mocs_table(gt, &table, GEN12_GLOBAL_MOCS(0).reg); + + /* + * Initialize the L3CC table as part of mocs initalization to make + * sure the LNCFCMOCSx registers are programmed for the subsequent + * memory transactions including guc transactions + */ + if (flags & HAS_RENDER_L3CC) + init_l3cc_table(gt, &table); +} diff --git a/drivers/gpu/drm/xe/xe_mocs.h b/drivers/gpu/drm/xe/xe_mocs.h new file mode 100644 index 000000000000..aba1abe216ab --- /dev/null +++ b/drivers/gpu/drm/xe/xe_mocs.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_MOCS_H_ +#define _XE_MOCS_H_ + +#include + +struct xe_engine; +struct xe_gt; + +void xe_mocs_init_engine(const struct xe_engine *engine); +void xe_mocs_init(struct xe_gt *gt); + +/** + * xe_mocs_index_to_value - Translate mocs index to the mocs value exected by + * most blitter commands. + * @mocs_index: index into the mocs tables + * + * Return: The corresponding mocs value to be programmed. + */ +static inline u32 xe_mocs_index_to_value(u32 mocs_index) +{ + return mocs_index << 1; +} + +#endif diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c new file mode 100644 index 000000000000..cc862553a252 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_module.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include +#include + +#include "xe_drv.h" +#include "xe_hw_fence.h" +#include "xe_module.h" +#include "xe_pci.h" +#include "xe_sched_job.h" + +bool enable_guc = true; +module_param_named_unsafe(enable_guc, enable_guc, bool, 0444); +MODULE_PARM_DESC(enable_guc, "Enable GuC submission"); + +u32 xe_force_lmem_bar_size; +module_param_named(lmem_bar_size, xe_force_lmem_bar_size, uint, 0600); +MODULE_PARM_DESC(lmem_bar_size, "Set the lmem bar size(in MiB)"); + +int xe_guc_log_level = 5; +module_param_named(guc_log_level, xe_guc_log_level, int, 0600); +MODULE_PARM_DESC(guc_log_level, "GuC firmware logging level (0=disable, 1..5=enable with verbosity min..max)"); + +char *xe_param_force_probe = CONFIG_DRM_XE_FORCE_PROBE; +module_param_named_unsafe(force_probe, xe_param_force_probe, charp, 0400); +MODULE_PARM_DESC(force_probe, + "Force probe options for specified devices. See CONFIG_DRM_XE_FORCE_PROBE for details."); + +struct init_funcs { + int (*init)(void); + void (*exit)(void); +}; +#define MAKE_INIT_EXIT_FUNCS(name) \ + { .init = xe_##name##_module_init, \ + .exit = xe_##name##_module_exit, } +static const struct init_funcs init_funcs[] = { + MAKE_INIT_EXIT_FUNCS(hw_fence), + MAKE_INIT_EXIT_FUNCS(sched_job), +}; + +static int __init xe_init(void) +{ + int err, i; + + for (i = 0; i < ARRAY_SIZE(init_funcs); i++) { + err = init_funcs[i].init(); + if (err) { + while (i--) + init_funcs[i].exit(); + return err; + } + } + + return xe_register_pci_driver(); +} + +static void __exit xe_exit(void) +{ + int i; + + xe_unregister_pci_driver(); + + for (i = ARRAY_SIZE(init_funcs) - 1; i >= 0; i--) + init_funcs[i].exit(); +} + +module_init(xe_init); +module_exit(xe_exit); + +MODULE_AUTHOR("Intel Corporation"); + +MODULE_DESCRIPTION(DRIVER_DESC); +MODULE_LICENSE("GPL and additional rights"); diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h new file mode 100644 index 000000000000..2c6ee46f5595 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_module.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#include + +/* Module modprobe variables */ +extern bool enable_guc; +extern bool enable_display; +extern u32 xe_force_lmem_bar_size; +extern int xe_guc_log_level; +extern char *xe_param_force_probe; diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c new file mode 100644 index 000000000000..55d8a597a068 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -0,0 +1,651 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "xe_pci.h" + +#include +#include +#include +#include + +#include +#include +#include + +#include "xe_drv.h" +#include "xe_device.h" +#include "xe_macros.h" +#include "xe_module.h" +#include "xe_pm.h" +#include "xe_step.h" + +#include "i915_reg.h" + +#define DEV_INFO_FOR_EACH_FLAG(func) \ + func(require_force_probe); \ + func(is_dgfx); \ + /* Keep has_* in alphabetical order */ \ + +struct xe_subplatform_desc { + enum xe_subplatform subplatform; + const char *name; + const u16 *pciidlist; +}; + +struct xe_gt_desc { + enum xe_gt_type type; + u8 vram_id; + u64 engine_mask; + u32 mmio_adj_limit; + u32 mmio_adj_offset; +}; + +struct xe_device_desc { + u8 graphics_ver; + u8 graphics_rel; + u8 media_ver; + u8 media_rel; + + u64 platform_engine_mask; /* Engines supported by the HW */ + + enum xe_platform platform; + const char *platform_name; + const struct xe_subplatform_desc *subplatforms; + const struct xe_gt_desc *extra_gts; + + u8 dma_mask_size; /* available DMA address bits */ + + u8 gt; /* GT number, 0 if undefined */ + +#define DEFINE_FLAG(name) u8 name:1 + DEV_INFO_FOR_EACH_FLAG(DEFINE_FLAG); +#undef DEFINE_FLAG + + u8 vram_flags; + u8 max_tiles; + u8 vm_max_level; + + bool supports_usm; + bool has_flat_ccs; + bool has_4tile; +}; + +#define PLATFORM(x) \ + .platform = (x), \ + .platform_name = #x + +#define NOP(x) x + +/* Keep in gen based order, and chronological order within a gen */ +#define GEN12_FEATURES \ + .require_force_probe = true, \ + .graphics_ver = 12, \ + .media_ver = 12, \ + .dma_mask_size = 39, \ + .max_tiles = 1, \ + .vm_max_level = 3, \ + .vram_flags = 0 + +static const struct xe_device_desc tgl_desc = { + GEN12_FEATURES, + PLATFORM(XE_TIGERLAKE), + .platform_engine_mask = + BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) | + BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VCS0) | + BIT(XE_HW_ENGINE_VCS2), +}; + +static const struct xe_device_desc adl_s_desc = { + GEN12_FEATURES, + PLATFORM(XE_ALDERLAKE_S), + .platform_engine_mask = + BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) | + BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VCS0) | + BIT(XE_HW_ENGINE_VCS2), +}; + +static const u16 adlp_rplu_ids[] = { XE_RPLU_IDS(NOP), 0 }; + +static const struct xe_device_desc adl_p_desc = { + GEN12_FEATURES, + PLATFORM(XE_ALDERLAKE_P), + .platform_engine_mask = + BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) | + BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VCS0) | + BIT(XE_HW_ENGINE_VCS2), + .subplatforms = (const struct xe_subplatform_desc[]) { + { XE_SUBPLATFORM_ADLP_RPLU, "RPLU", adlp_rplu_ids }, + {}, + }, +}; + +#define DGFX_FEATURES \ + .is_dgfx = 1 + +static const struct xe_device_desc dg1_desc = { + GEN12_FEATURES, + DGFX_FEATURES, + .graphics_rel = 10, + PLATFORM(XE_DG1), + .platform_engine_mask = + BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) | + BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VCS0) | + BIT(XE_HW_ENGINE_VCS2), +}; + +#define XE_HP_FEATURES \ + .require_force_probe = true, \ + .graphics_ver = 12, \ + .graphics_rel = 50, \ + .has_flat_ccs = true, \ + .dma_mask_size = 46, \ + .max_tiles = 1, \ + .vm_max_level = 3 + +#define XE_HPM_FEATURES \ + .media_ver = 12, \ + .media_rel = 50 + +static const u16 dg2_g10_ids[] = { XE_DG2_G10_IDS(NOP), XE_ATS_M150_IDS(NOP), 0 }; +static const u16 dg2_g11_ids[] = { XE_DG2_G11_IDS(NOP), XE_ATS_M75_IDS(NOP), 0 }; +static const u16 dg2_g12_ids[] = { XE_DG2_G12_IDS(NOP), 0 }; + +#define DG2_FEATURES \ + DGFX_FEATURES, \ + .graphics_rel = 55, \ + .media_rel = 55, \ + PLATFORM(XE_DG2), \ + .subplatforms = (const struct xe_subplatform_desc[]) { \ + { XE_SUBPLATFORM_DG2_G10, "G10", dg2_g10_ids }, \ + { XE_SUBPLATFORM_DG2_G11, "G11", dg2_g11_ids }, \ + { XE_SUBPLATFORM_DG2_G12, "G12", dg2_g12_ids }, \ + { } \ + }, \ + .platform_engine_mask = \ + BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) | \ + BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VECS1) | \ + BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) | \ + BIT(XE_HW_ENGINE_CCS0) | BIT(XE_HW_ENGINE_CCS1) | \ + BIT(XE_HW_ENGINE_CCS2) | BIT(XE_HW_ENGINE_CCS3), \ + .require_force_probe = true, \ + .vram_flags = XE_VRAM_FLAGS_NEED64K, \ + .has_4tile = 1 + +static const struct xe_device_desc ats_m_desc = { + XE_HP_FEATURES, + XE_HPM_FEATURES, + + DG2_FEATURES, +}; + +static const struct xe_device_desc dg2_desc = { + XE_HP_FEATURES, + XE_HPM_FEATURES, + + DG2_FEATURES, +}; + +#define PVC_ENGINES \ + BIT(XE_HW_ENGINE_BCS0) | BIT(XE_HW_ENGINE_BCS1) | \ + BIT(XE_HW_ENGINE_BCS2) | BIT(XE_HW_ENGINE_BCS3) | \ + BIT(XE_HW_ENGINE_BCS4) | BIT(XE_HW_ENGINE_BCS5) | \ + BIT(XE_HW_ENGINE_BCS6) | BIT(XE_HW_ENGINE_BCS7) | \ + BIT(XE_HW_ENGINE_BCS8) | \ + BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS1) | \ + BIT(XE_HW_ENGINE_VCS2) | \ + BIT(XE_HW_ENGINE_CCS0) | BIT(XE_HW_ENGINE_CCS1) | \ + BIT(XE_HW_ENGINE_CCS2) | BIT(XE_HW_ENGINE_CCS3) + +static const struct xe_gt_desc pvc_gts[] = { + { + .type = XE_GT_TYPE_REMOTE, + .vram_id = 1, + .engine_mask = PVC_ENGINES, + .mmio_adj_limit = 0, + .mmio_adj_offset = 0, + }, +}; + +static const __maybe_unused struct xe_device_desc pvc_desc = { + XE_HP_FEATURES, + XE_HPM_FEATURES, + DGFX_FEATURES, + PLATFORM(XE_PVC), + .extra_gts = pvc_gts, + .graphics_rel = 60, + .has_flat_ccs = 0, + .media_rel = 60, + .platform_engine_mask = PVC_ENGINES, + .vram_flags = XE_VRAM_FLAGS_NEED64K, + .dma_mask_size = 52, + .max_tiles = 2, + .vm_max_level = 4, + .supports_usm = true, +}; + +#define MTL_MEDIA_ENGINES \ + BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) | \ + BIT(XE_HW_ENGINE_VECS0) /* TODO: GSC0 */ + +static const struct xe_gt_desc xelpmp_gts[] = { + { + .type = XE_GT_TYPE_MEDIA, + .vram_id = 0, + .engine_mask = MTL_MEDIA_ENGINES, + .mmio_adj_limit = 0x40000, + .mmio_adj_offset = 0x380000, + }, +}; + +#define MTL_MAIN_ENGINES \ + BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) | \ + BIT(XE_HW_ENGINE_CCS0) + +static const struct xe_device_desc mtl_desc = { + /* + * Real graphics IP version will be obtained from hardware GMD_ID + * register. Value provided here is just for sanity checking. + */ + .require_force_probe = true, + .graphics_ver = 12, + .graphics_rel = 70, + .dma_mask_size = 46, + .max_tiles = 2, + .vm_max_level = 3, + .media_ver = 13, + PLATFORM(XE_METEORLAKE), + .extra_gts = xelpmp_gts, + .platform_engine_mask = MTL_MAIN_ENGINES, +}; + +#undef PLATFORM + +#define INTEL_VGA_DEVICE(id, info) { \ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, id), \ + PCI_BASE_CLASS_DISPLAY << 16, 0xff << 16, \ + (unsigned long) info } + +/* + * Make sure any device matches here are from most specific to most + * general. For example, since the Quanta match is based on the subsystem + * and subvendor IDs, we need it to come before the more general IVB + * PCI ID matches, otherwise we'll use the wrong info struct above. + */ +static const struct pci_device_id pciidlist[] = { + XE_TGL_GT2_IDS(INTEL_VGA_DEVICE, &tgl_desc), + XE_DG1_IDS(INTEL_VGA_DEVICE, &dg1_desc), + XE_ATS_M_IDS(INTEL_VGA_DEVICE, &ats_m_desc), + XE_DG2_IDS(INTEL_VGA_DEVICE, &dg2_desc), + XE_ADLS_IDS(INTEL_VGA_DEVICE, &adl_s_desc), + XE_ADLP_IDS(INTEL_VGA_DEVICE, &adl_p_desc), + XE_MTL_IDS(INTEL_VGA_DEVICE, &mtl_desc), + { } +}; +MODULE_DEVICE_TABLE(pci, pciidlist); + +#undef INTEL_VGA_DEVICE + +/* is device_id present in comma separated list of ids */ +static bool device_id_in_list(u16 device_id, const char *devices, bool negative) +{ + char *s, *p, *tok; + bool ret; + + if (!devices || !*devices) + return false; + + /* match everything */ + if (negative && strcmp(devices, "!*") == 0) + return true; + if (!negative && strcmp(devices, "*") == 0) + return true; + + s = kstrdup(devices, GFP_KERNEL); + if (!s) + return false; + + for (p = s, ret = false; (tok = strsep(&p, ",")) != NULL; ) { + u16 val; + + if (negative && tok[0] == '!') + tok++; + else if ((negative && tok[0] != '!') || + (!negative && tok[0] == '!')) + continue; + + if (kstrtou16(tok, 16, &val) == 0 && val == device_id) { + ret = true; + break; + } + } + + kfree(s); + + return ret; +} + +static bool id_forced(u16 device_id) +{ + return device_id_in_list(device_id, xe_param_force_probe, false); +} + +static bool id_blocked(u16 device_id) +{ + return device_id_in_list(device_id, xe_param_force_probe, true); +} + +static const struct xe_subplatform_desc * +subplatform_get(const struct xe_device *xe, const struct xe_device_desc *desc) +{ + const struct xe_subplatform_desc *sp; + const u16 *id; + + for (sp = desc->subplatforms; sp && sp->subplatform; sp++) + for (id = sp->pciidlist; *id; id++) + if (*id == xe->info.devid) + return sp; + + return NULL; +} + +static void xe_pci_remove(struct pci_dev *pdev) +{ + struct xe_device *xe; + + xe = pci_get_drvdata(pdev); + if (!xe) /* driver load aborted, nothing to cleanup */ + return; + + xe_device_remove(xe); + pci_set_drvdata(pdev, NULL); +} + +static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + const struct xe_device_desc *desc = (void *)ent->driver_data; + const struct xe_subplatform_desc *spd; + struct xe_device *xe; + struct xe_gt *gt; + u8 id; + int err; + + if (desc->require_force_probe && !id_forced(pdev->device)) { + dev_info(&pdev->dev, + "Your graphics device %04x is not officially supported\n" + "by xe driver in this kernel version. To force Xe probe,\n" + "use xe.force_probe='%04x' and i915.force_probe='!%04x'\n" + "module parameters or CONFIG_DRM_XE_FORCE_PROBE='%04x' and\n" + "CONFIG_DRM_I915_FORCE_PROBE='!%04x' configuration options.\n", + pdev->device, pdev->device, pdev->device, + pdev->device, pdev->device); + return -ENODEV; + } + + if (id_blocked(pdev->device)) { + dev_info(&pdev->dev, "Probe blocked for device [%04x:%04x].\n", + pdev->vendor, pdev->device); + return -ENODEV; + } + + xe = xe_device_create(pdev, ent); + if (IS_ERR(xe)) + return PTR_ERR(xe); + + xe->info.graphics_verx100 = desc->graphics_ver * 100 + + desc->graphics_rel; + xe->info.media_verx100 = desc->media_ver * 100 + + desc->media_rel; + xe->info.is_dgfx = desc->is_dgfx; + xe->info.platform = desc->platform; + xe->info.dma_mask_size = desc->dma_mask_size; + xe->info.vram_flags = desc->vram_flags; + xe->info.tile_count = desc->max_tiles; + xe->info.vm_max_level = desc->vm_max_level; + xe->info.media_ver = desc->media_ver; + xe->info.supports_usm = desc->supports_usm; + xe->info.has_flat_ccs = desc->has_flat_ccs; + xe->info.has_4tile = desc->has_4tile; + + spd = subplatform_get(xe, desc); + xe->info.subplatform = spd ? spd->subplatform : XE_SUBPLATFORM_NONE; + xe->info.step = xe_step_get(xe); + + for (id = 0; id < xe->info.tile_count; ++id) { + gt = xe->gt + id; + gt->info.id = id; + gt->xe = xe; + + if (id == 0) { + gt->info.type = XE_GT_TYPE_MAIN; + gt->info.vram_id = id; + gt->info.engine_mask = desc->platform_engine_mask; + gt->mmio.adj_limit = 0; + gt->mmio.adj_offset = 0; + } else { + gt->info.type = desc->extra_gts[id - 1].type; + gt->info.vram_id = desc->extra_gts[id - 1].vram_id; + gt->info.engine_mask = + desc->extra_gts[id - 1].engine_mask; + gt->mmio.adj_limit = + desc->extra_gts[id - 1].mmio_adj_limit; + gt->mmio.adj_offset = + desc->extra_gts[id - 1].mmio_adj_offset; + } + } + + drm_dbg(&xe->drm, "%s %s %04x:%04x dgfx:%d gfx100:%d media100:%d dma_m_s:%d tc:%d", + desc->platform_name, spd ? spd->name : "", + xe->info.devid, xe->info.revid, + xe->info.is_dgfx, xe->info.graphics_verx100, + xe->info.media_verx100, + xe->info.dma_mask_size, xe->info.tile_count); + + drm_dbg(&xe->drm, "Stepping = (G:%s, M:%s, D:%s, B:%s)\n", + xe_step_name(xe->info.step.graphics), + xe_step_name(xe->info.step.media), + xe_step_name(xe->info.step.display), + xe_step_name(xe->info.step.basedie)); + + pci_set_drvdata(pdev, xe); + err = pci_enable_device(pdev); + if (err) { + drm_dev_put(&xe->drm); + return err; + } + + pci_set_master(pdev); + + if (pci_enable_msi(pdev) < 0) + drm_dbg(&xe->drm, "can't enable MSI"); + + err = xe_device_probe(xe); + if (err) { + pci_disable_device(pdev); + return err; + } + + xe_pm_runtime_init(xe); + + return 0; +} + +static void xe_pci_shutdown(struct pci_dev *pdev) +{ + xe_device_shutdown(pdev_to_xe_device(pdev)); +} + +#ifdef CONFIG_PM_SLEEP +static int xe_pci_suspend(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + int err; + + err = xe_pm_suspend(pdev_to_xe_device(pdev)); + if (err) + return err; + + pci_save_state(pdev); + pci_disable_device(pdev); + + err = pci_set_power_state(pdev, PCI_D3hot); + if (err) + return err; + + return 0; +} + +static int xe_pci_resume(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + int err; + + err = pci_set_power_state(pdev, PCI_D0); + if (err) + return err; + + pci_restore_state(pdev); + + err = pci_enable_device(pdev); + if (err) + return err; + + pci_set_master(pdev); + + err = xe_pm_resume(pdev_to_xe_device(pdev)); + if (err) + return err; + + return 0; +} +#endif + +static int xe_pci_runtime_suspend(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct xe_device *xe = pdev_to_xe_device(pdev); + int err; + + err = xe_pm_runtime_suspend(xe); + if (err) + return err; + + pci_save_state(pdev); + + if (xe->d3cold_allowed) { + pci_disable_device(pdev); + pci_ignore_hotplug(pdev); + pci_set_power_state(pdev, PCI_D3cold); + } else { + pci_set_power_state(pdev, PCI_D3hot); + } + + return 0; +} + +static int xe_pci_runtime_resume(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct xe_device *xe = pdev_to_xe_device(pdev); + int err; + + err = pci_set_power_state(pdev, PCI_D0); + if (err) + return err; + + pci_restore_state(pdev); + + if (xe->d3cold_allowed) { + err = pci_enable_device(pdev); + if (err) + return err; + + pci_set_master(pdev); + } + + return xe_pm_runtime_resume(xe); +} + +static int xe_pci_runtime_idle(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct xe_device *xe = pdev_to_xe_device(pdev); + + /* + * FIXME: d3cold should be allowed (true) if + * (IS_DGFX(xe) && !xe_device_mem_access_ongoing(xe)) + * however the change to the buddy allocator broke the + * xe_bo_restore_kernel when the pci device is disabled + */ + xe->d3cold_allowed = false; + + return 0; +} + +static const struct dev_pm_ops xe_pm_ops = { + .suspend = xe_pci_suspend, + .resume = xe_pci_resume, + .freeze = xe_pci_suspend, + .thaw = xe_pci_resume, + .poweroff = xe_pci_suspend, + .restore = xe_pci_resume, + .runtime_suspend = xe_pci_runtime_suspend, + .runtime_resume = xe_pci_runtime_resume, + .runtime_idle = xe_pci_runtime_idle, +}; + +static struct pci_driver xe_pci_driver = { + .name = DRIVER_NAME, + .id_table = pciidlist, + .probe = xe_pci_probe, + .remove = xe_pci_remove, + .shutdown = xe_pci_shutdown, + .driver.pm = &xe_pm_ops, +}; + +int xe_register_pci_driver(void) +{ + return pci_register_driver(&xe_pci_driver); +} + +void xe_unregister_pci_driver(void) +{ + pci_unregister_driver(&xe_pci_driver); +} + +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) +static int dev_to_xe_device_fn(struct device *dev, void *data) + +{ + struct drm_device *drm = dev_get_drvdata(dev); + int (*xe_fn)(struct xe_device *xe) = data; + int ret = 0; + int idx; + + if (drm_dev_enter(drm, &idx)) + ret = xe_fn(to_xe_device(dev_get_drvdata(dev))); + drm_dev_exit(idx); + + return ret; +} + +/** + * xe_call_for_each_device - Iterate over all devices this driver binds to + * @xe_fn: Function to call for each device. + * + * This function iterated over all devices this driver binds to, and calls + * @xe_fn: for each one of them. If the called function returns anything else + * than 0, iteration is stopped and the return value is returned by this + * function. Across each function call, drm_dev_enter() / drm_dev_exit() is + * called for the corresponding drm device. + * + * Return: Zero or the error code of a call to @xe_fn returning an error + * code. + */ +int xe_call_for_each_device(xe_device_fn xe_fn) +{ + return driver_for_each_device(&xe_pci_driver.driver, NULL, + xe_fn, dev_to_xe_device_fn); +} +#endif diff --git a/drivers/gpu/drm/xe/xe_pci.h b/drivers/gpu/drm/xe/xe_pci.h new file mode 100644 index 000000000000..9e3089549d5f --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pci.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _XE_PCI_H_ +#define _XE_PCI_H_ + +#include "tests/xe_test.h" + +int xe_register_pci_driver(void); +void xe_unregister_pci_driver(void); + +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) +struct xe_device; + +typedef int (*xe_device_fn)(struct xe_device *); + +int xe_call_for_each_device(xe_device_fn xe_fn); +#endif +#endif diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c new file mode 100644 index 000000000000..236159c8a6c0 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pcode.c @@ -0,0 +1,296 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_pcode_api.h" +#include "xe_pcode.h" + +#include "xe_gt.h" +#include "xe_mmio.h" + +#include + +/** + * DOC: PCODE + * + * Xe PCODE is the component responsible for interfacing with the PCODE + * firmware. + * It shall provide a very simple ABI to other Xe components, but be the + * single and consolidated place that will communicate with PCODE. All read + * and write operations to PCODE will be internal and private to this component. + * + * What's next: + * - PCODE hw metrics + * - PCODE for display operations + */ + +static int pcode_mailbox_status(struct xe_gt *gt) +{ + u32 err; + static const struct pcode_err_decode err_decode[] = { + [PCODE_ILLEGAL_CMD] = {-ENXIO, "Illegal Command"}, + [PCODE_TIMEOUT] = {-ETIMEDOUT, "Timed out"}, + [PCODE_ILLEGAL_DATA] = {-EINVAL, "Illegal Data"}, + [PCODE_ILLEGAL_SUBCOMMAND] = {-ENXIO, "Illegal Subcommand"}, + [PCODE_LOCKED] = {-EBUSY, "PCODE Locked"}, + [PCODE_GT_RATIO_OUT_OF_RANGE] = {-EOVERFLOW, + "GT ratio out of range"}, + [PCODE_REJECTED] = {-EACCES, "PCODE Rejected"}, + [PCODE_ERROR_MASK] = {-EPROTO, "Unknown"}, + }; + + lockdep_assert_held(>->pcode.lock); + + err = xe_mmio_read32(gt, PCODE_MAILBOX.reg) & PCODE_ERROR_MASK; + if (err) { + drm_err(>_to_xe(gt)->drm, "PCODE Mailbox failed: %d %s", err, + err_decode[err].str ?: "Unknown"); + return err_decode[err].errno ?: -EPROTO; + } + + return 0; +} + +static bool pcode_mailbox_done(struct xe_gt *gt) +{ + lockdep_assert_held(>->pcode.lock); + return (xe_mmio_read32(gt, PCODE_MAILBOX.reg) & PCODE_READY) == 0; +} + +static int pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1, + unsigned int timeout, bool return_data, bool atomic) +{ + lockdep_assert_held(>->pcode.lock); + + if (!pcode_mailbox_done(gt)) + return -EAGAIN; + + xe_mmio_write32(gt, PCODE_DATA0.reg, *data0); + xe_mmio_write32(gt, PCODE_DATA1.reg, data1 ? *data1 : 0); + xe_mmio_write32(gt, PCODE_MAILBOX.reg, PCODE_READY | mbox); + + if (atomic) + _wait_for_atomic(pcode_mailbox_done(gt), timeout * 1000, 1); + else + wait_for(pcode_mailbox_done(gt), timeout); + + if (return_data) { + *data0 = xe_mmio_read32(gt, PCODE_DATA0.reg); + if (data1) + *data1 = xe_mmio_read32(gt, PCODE_DATA1.reg); + } + + return pcode_mailbox_status(gt); +} + +int xe_pcode_write_timeout(struct xe_gt *gt, u32 mbox, u32 data, int timeout) +{ + int err; + + mutex_lock(>->pcode.lock); + err = pcode_mailbox_rw(gt, mbox, &data, NULL, timeout, false, false); + mutex_unlock(>->pcode.lock); + + return err; +} + +int xe_pcode_read(struct xe_gt *gt, u32 mbox, u32 *val, u32 *val1) +{ + int err; + + mutex_lock(>->pcode.lock); + err = pcode_mailbox_rw(gt, mbox, val, val1, 1, true, false); + mutex_unlock(>->pcode.lock); + + return err; +} + +static bool xe_pcode_try_request(struct xe_gt *gt, u32 mbox, + u32 request, u32 reply_mask, u32 reply, + u32 *status, bool atomic) +{ + *status = pcode_mailbox_rw(gt, mbox, &request, NULL, 1, true, atomic); + + return (*status == 0) && ((request & reply_mask) == reply); +} + +/** + * xe_pcode_request - send PCODE request until acknowledgment + * @gt: gt + * @mbox: PCODE mailbox ID the request is targeted for + * @request: request ID + * @reply_mask: mask used to check for request acknowledgment + * @reply: value used to check for request acknowledgment + * @timeout_base_ms: timeout for polling with preemption enabled + * + * Keep resending the @request to @mbox until PCODE acknowledges it, PCODE + * reports an error or an overall timeout of @timeout_base_ms+50 ms expires. + * The request is acknowledged once the PCODE reply dword equals @reply after + * applying @reply_mask. Polling is first attempted with preemption enabled + * for @timeout_base_ms and if this times out for another 50 ms with + * preemption disabled. + * + * Returns 0 on success, %-ETIMEDOUT in case of a timeout, <0 in case of some + * other error as reported by PCODE. + */ +int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request, + u32 reply_mask, u32 reply, int timeout_base_ms) +{ + u32 status; + int ret; + bool atomic = false; + + mutex_lock(>->pcode.lock); + +#define COND \ + xe_pcode_try_request(gt, mbox, request, reply_mask, reply, &status, atomic) + + /* + * Prime the PCODE by doing a request first. Normally it guarantees + * that a subsequent request, at most @timeout_base_ms later, succeeds. + * _wait_for() doesn't guarantee when its passed condition is evaluated + * first, so send the first request explicitly. + */ + if (COND) { + ret = 0; + goto out; + } + ret = _wait_for(COND, timeout_base_ms * 1000, 10, 10); + if (!ret) + goto out; + + /* + * The above can time out if the number of requests was low (2 in the + * worst case) _and_ PCODE was busy for some reason even after a + * (queued) request and @timeout_base_ms delay. As a workaround retry + * the poll with preemption disabled to maximize the number of + * requests. Increase the timeout from @timeout_base_ms to 50ms to + * account for interrupts that could reduce the number of these + * requests, and for any quirks of the PCODE firmware that delays + * the request completion. + */ + drm_err(>_to_xe(gt)->drm, + "PCODE timeout, retrying with preemption disabled\n"); + drm_WARN_ON_ONCE(>_to_xe(gt)->drm, timeout_base_ms > 1); + preempt_disable(); + atomic = true; + ret = wait_for_atomic(COND, 50); + atomic = false; + preempt_enable(); + +out: + mutex_unlock(>->pcode.lock); + return status ? status : ret; +#undef COND +} +/** + * xe_pcode_init_min_freq_table - Initialize PCODE's QOS frequency table + * @gt: gt instance + * @min_gt_freq: Minimal (RPn) GT frequency in units of 50MHz. + * @max_gt_freq: Maximal (RP0) GT frequency in units of 50MHz. + * + * This function initialize PCODE's QOS frequency table for a proper minimal + * frequency/power steering decision, depending on the current requested GT + * frequency. For older platforms this was a more complete table including + * the IA freq. However for the latest platforms this table become a simple + * 1-1 Ring vs GT frequency. Even though, without setting it, PCODE might + * not take the right decisions for some memory frequencies and affect latency. + * + * It returns 0 on success, and -ERROR number on failure, -EINVAL if max + * frequency is higher then the minimal, and other errors directly translated + * from the PCODE Error returs: + * - -ENXIO: "Illegal Command" + * - -ETIMEDOUT: "Timed out" + * - -EINVAL: "Illegal Data" + * - -ENXIO, "Illegal Subcommand" + * - -EBUSY: "PCODE Locked" + * - -EOVERFLOW, "GT ratio out of range" + * - -EACCES, "PCODE Rejected" + * - -EPROTO, "Unknown" + */ +int xe_pcode_init_min_freq_table(struct xe_gt *gt, u32 min_gt_freq, + u32 max_gt_freq) +{ + int ret; + u32 freq; + + if (IS_DGFX(gt_to_xe(gt))) + return 0; + + if (max_gt_freq <= min_gt_freq) + return -EINVAL; + + mutex_lock(>->pcode.lock); + for (freq = min_gt_freq; freq <= max_gt_freq; freq++) { + u32 data = freq << PCODE_FREQ_RING_RATIO_SHIFT | freq; + + ret = pcode_mailbox_rw(gt, PCODE_WRITE_MIN_FREQ_TABLE, + &data, NULL, 1, false, false); + if (ret) + goto unlock; + } + +unlock: + mutex_unlock(>->pcode.lock); + return ret; +} + +static bool pcode_dgfx_status_complete(struct xe_gt *gt) +{ + u32 data = DGFX_GET_INIT_STATUS; + int status = pcode_mailbox_rw(gt, DGFX_PCODE_STATUS, + &data, NULL, 1, true, false); + + return status == 0 && + (data & DGFX_INIT_STATUS_COMPLETE) == DGFX_INIT_STATUS_COMPLETE; +} + +/** + * xe_pcode_init - Ensure PCODE is initialized + * @gt: gt instance + * + * This function ensures that PCODE is properly initialized. To be called during + * probe and resume paths. + * + * It returns 0 on success, and -error number on failure. + */ +int xe_pcode_init(struct xe_gt *gt) +{ + int timeout = 180000; /* 3 min */ + int ret; + + if (!IS_DGFX(gt_to_xe(gt))) + return 0; + + mutex_lock(>->pcode.lock); + ret = wait_for(pcode_dgfx_status_complete(gt), timeout); + mutex_unlock(>->pcode.lock); + + if (ret) + drm_err(>_to_xe(gt)->drm, + "PCODE initialization timedout after: %d min\n", + timeout / 60000); + + return ret; +} + +/** + * xe_pcode_probe - Prepare xe_pcode and also ensure PCODE is initialized. + * @gt: gt instance + * + * This function initializes the xe_pcode component, and when needed, it ensures + * that PCODE has properly performed its initialization and it is really ready + * to go. To be called once only during probe. + * + * It returns 0 on success, and -error number on failure. + */ +int xe_pcode_probe(struct xe_gt *gt) +{ + mutex_init(>->pcode.lock); + + if (!IS_DGFX(gt_to_xe(gt))) + return 0; + + return xe_pcode_init(gt); +} diff --git a/drivers/gpu/drm/xe/xe_pcode.h b/drivers/gpu/drm/xe/xe_pcode.h new file mode 100644 index 000000000000..3b4aa8c1a3ba --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pcode.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_PCODE_H_ +#define _XE_PCODE_H_ + +#include +struct xe_gt; + +int xe_pcode_probe(struct xe_gt *gt); +int xe_pcode_init(struct xe_gt *gt); +int xe_pcode_init_min_freq_table(struct xe_gt *gt, u32 min_gt_freq, + u32 max_gt_freq); +int xe_pcode_read(struct xe_gt *gt, u32 mbox, u32 *val, u32 *val1); +int xe_pcode_write_timeout(struct xe_gt *gt, u32 mbox, u32 val, + int timeout_ms); +#define xe_pcode_write(gt, mbox, val) \ + xe_pcode_write_timeout(gt, mbox, val, 1) + +int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request, + u32 reply_mask, u32 reply, int timeout_ms); + +#endif diff --git a/drivers/gpu/drm/xe/xe_pcode_api.h b/drivers/gpu/drm/xe/xe_pcode_api.h new file mode 100644 index 000000000000..0762c8a912c7 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pcode_api.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +/* Internal to xe_pcode */ + +#define PCODE_MAILBOX _MMIO(0x138124) +#define PCODE_READY REG_BIT(31) +#define PCODE_MB_PARAM2 REG_GENMASK(23, 16) +#define PCODE_MB_PARAM1 REG_GENMASK(15, 8) +#define PCODE_MB_COMMAND REG_GENMASK(7, 0) +#define PCODE_ERROR_MASK 0xFF +#define PCODE_SUCCESS 0x0 +#define PCODE_ILLEGAL_CMD 0x1 +#define PCODE_TIMEOUT 0x2 +#define PCODE_ILLEGAL_DATA 0x3 +#define PCODE_ILLEGAL_SUBCOMMAND 0x4 +#define PCODE_LOCKED 0x6 +#define PCODE_GT_RATIO_OUT_OF_RANGE 0x10 +#define PCODE_REJECTED 0x11 + +#define PCODE_DATA0 _MMIO(0x138128) +#define PCODE_DATA1 _MMIO(0x13812C) + +/* Min Freq QOS Table */ +#define PCODE_WRITE_MIN_FREQ_TABLE 0x8 +#define PCODE_READ_MIN_FREQ_TABLE 0x9 +#define PCODE_FREQ_RING_RATIO_SHIFT 16 + +/* PCODE Init */ +#define DGFX_PCODE_STATUS 0x7E +#define DGFX_GET_INIT_STATUS 0x0 +#define DGFX_INIT_STATUS_COMPLETE 0x1 + +struct pcode_err_decode { + int errno; + const char *str; +}; + diff --git a/drivers/gpu/drm/xe/xe_platform_types.h b/drivers/gpu/drm/xe/xe_platform_types.h new file mode 100644 index 000000000000..72612c832e88 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_platform_types.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_PLATFORM_INFO_TYPES_H_ +#define _XE_PLATFORM_INFO_TYPES_H_ + +/* Keep in gen based order, and chronological order within a gen */ +enum xe_platform { + XE_PLATFORM_UNINITIALIZED = 0, + /* gen12 */ + XE_TIGERLAKE, + XE_ROCKETLAKE, + XE_DG1, + XE_DG2, + XE_PVC, + XE_ALDERLAKE_S, + XE_ALDERLAKE_P, + XE_METEORLAKE, +}; + +enum xe_subplatform { + XE_SUBPLATFORM_UNINITIALIZED = 0, + XE_SUBPLATFORM_NONE, + XE_SUBPLATFORM_DG2_G10, + XE_SUBPLATFORM_DG2_G11, + XE_SUBPLATFORM_DG2_G12, + XE_SUBPLATFORM_ADLP_RPLU, +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c new file mode 100644 index 000000000000..fb0355530e7b --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -0,0 +1,207 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include + +#include + +#include "xe_bo.h" +#include "xe_bo_evict.h" +#include "xe_device.h" +#include "xe_pm.h" +#include "xe_gt.h" +#include "xe_ggtt.h" +#include "xe_irq.h" +#include "xe_pcode.h" + +/** + * DOC: Xe Power Management + * + * Xe PM shall be guided by the simplicity. + * Use the simplest hook options whenever possible. + * Let's not reinvent the runtime_pm references and hooks. + * Shall have a clear separation of display and gt underneath this component. + * + * What's next: + * + * For now s2idle and s3 are only working in integrated devices. The next step + * is to iterate through all VRAM's BO backing them up into the system memory + * before allowing the system suspend. + * + * Also runtime_pm needs to be here from the beginning. + * + * RC6/RPS are also critical PM features. Let's start with GuCRC and GuC SLPC + * and no wait boost. Frequency optimizations should come on a next stage. + */ + +/** + * xe_pm_suspend - Helper for System suspend, i.e. S0->S3 / S0->S2idle + * @xe: xe device instance + * + * Return: 0 on success + */ +int xe_pm_suspend(struct xe_device *xe) +{ + struct xe_gt *gt; + u8 id; + int err; + + for_each_gt(gt, xe, id) + xe_gt_suspend_prepare(gt); + + /* FIXME: Super racey... */ + err = xe_bo_evict_all(xe); + if (err) + return err; + + for_each_gt(gt, xe, id) { + err = xe_gt_suspend(gt); + if (err) + return err; + } + + xe_irq_suspend(xe); + + return 0; +} + +/** + * xe_pm_resume - Helper for System resume S3->S0 / S2idle->S0 + * @xe: xe device instance + * + * Return: 0 on success + */ +int xe_pm_resume(struct xe_device *xe) +{ + struct xe_gt *gt; + u8 id; + int err; + + for_each_gt(gt, xe, id) { + err = xe_pcode_init(gt); + if (err) + return err; + } + + /* + * This only restores pinned memory which is the memory required for the + * GT(s) to resume. + */ + err = xe_bo_restore_kernel(xe); + if (err) + return err; + + xe_irq_resume(xe); + + for_each_gt(gt, xe, id) + xe_gt_resume(gt); + + err = xe_bo_restore_user(xe); + if (err) + return err; + + return 0; +} + +void xe_pm_runtime_init(struct xe_device *xe) +{ + struct device *dev = xe->drm.dev; + + pm_runtime_use_autosuspend(dev); + pm_runtime_set_autosuspend_delay(dev, 1000); + pm_runtime_set_active(dev); + pm_runtime_allow(dev); + pm_runtime_mark_last_busy(dev); + pm_runtime_put_autosuspend(dev); +} + +int xe_pm_runtime_suspend(struct xe_device *xe) +{ + struct xe_gt *gt; + u8 id; + int err; + + if (xe->d3cold_allowed) { + if (xe_device_mem_access_ongoing(xe)) + return -EBUSY; + + err = xe_bo_evict_all(xe); + if (err) + return err; + } + + for_each_gt(gt, xe, id) { + err = xe_gt_suspend(gt); + if (err) + return err; + } + + xe_irq_suspend(xe); + + return 0; +} + +int xe_pm_runtime_resume(struct xe_device *xe) +{ + struct xe_gt *gt; + u8 id; + int err; + + if (xe->d3cold_allowed) { + for_each_gt(gt, xe, id) { + err = xe_pcode_init(gt); + if (err) + return err; + } + + /* + * This only restores pinned memory which is the memory + * required for the GT(s) to resume. + */ + err = xe_bo_restore_kernel(xe); + if (err) + return err; + } + + xe_irq_resume(xe); + + for_each_gt(gt, xe, id) + xe_gt_resume(gt); + + if (xe->d3cold_allowed) { + err = xe_bo_restore_user(xe); + if (err) + return err; + } + + return 0; +} + +int xe_pm_runtime_get(struct xe_device *xe) +{ + return pm_runtime_get_sync(xe->drm.dev); +} + +int xe_pm_runtime_put(struct xe_device *xe) +{ + pm_runtime_mark_last_busy(xe->drm.dev); + return pm_runtime_put_autosuspend(xe->drm.dev); +} + +/* Return true if resume operation happened and usage count was increased */ +bool xe_pm_runtime_resume_if_suspended(struct xe_device *xe) +{ + /* In case we are suspended we need to immediately wake up */ + if (pm_runtime_suspended(xe->drm.dev)) + return !pm_runtime_resume_and_get(xe->drm.dev); + + return false; +} + +int xe_pm_runtime_get_if_active(struct xe_device *xe) +{ + WARN_ON(pm_runtime_suspended(xe->drm.dev)); + return pm_runtime_get_if_active(xe->drm.dev, true); +} diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h new file mode 100644 index 000000000000..b8c5f9558e26 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pm.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_PM_H_ +#define _XE_PM_H_ + +#include + +struct xe_device; + +int xe_pm_suspend(struct xe_device *xe); +int xe_pm_resume(struct xe_device *xe); + +void xe_pm_runtime_init(struct xe_device *xe); +int xe_pm_runtime_suspend(struct xe_device *xe); +int xe_pm_runtime_resume(struct xe_device *xe); +int xe_pm_runtime_get(struct xe_device *xe); +int xe_pm_runtime_put(struct xe_device *xe); +bool xe_pm_runtime_resume_if_suspended(struct xe_device *xe); +int xe_pm_runtime_get_if_active(struct xe_device *xe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.c b/drivers/gpu/drm/xe/xe_preempt_fence.c new file mode 100644 index 000000000000..6ab9ff442766 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_preempt_fence.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include + +#include "xe_engine.h" +#include "xe_preempt_fence.h" +#include "xe_vm.h" + +static void preempt_fence_work_func(struct work_struct *w) +{ + bool cookie = dma_fence_begin_signalling(); + struct xe_preempt_fence *pfence = + container_of(w, typeof(*pfence), preempt_work); + struct xe_engine *e = pfence->engine; + + if (pfence->error) + dma_fence_set_error(&pfence->base, pfence->error); + else + e->ops->suspend_wait(e); + + dma_fence_signal(&pfence->base); + dma_fence_end_signalling(cookie); + + queue_work(system_unbound_wq, &e->vm->preempt.rebind_work); + + xe_engine_put(e); +} + +static const char * +preempt_fence_get_driver_name(struct dma_fence *fence) +{ + return "xe"; +} + +static const char * +preempt_fence_get_timeline_name(struct dma_fence *fence) +{ + return "preempt"; +} + +static bool preempt_fence_enable_signaling(struct dma_fence *fence) +{ + struct xe_preempt_fence *pfence = + container_of(fence, typeof(*pfence), base); + struct xe_engine *e = pfence->engine; + + pfence->error = e->ops->suspend(e); + queue_work(system_unbound_wq, &pfence->preempt_work); + return true; +} + +static const struct dma_fence_ops preempt_fence_ops = { + .get_driver_name = preempt_fence_get_driver_name, + .get_timeline_name = preempt_fence_get_timeline_name, + .enable_signaling = preempt_fence_enable_signaling, +}; + +/** + * xe_preempt_fence_alloc() - Allocate a preempt fence with minimal + * initialization + * + * Allocate a preempt fence, and initialize its list head. + * If the preempt_fence allocated has been armed with + * xe_preempt_fence_arm(), it must be freed using dma_fence_put(). If not, + * it must be freed using xe_preempt_fence_free(). + * + * Return: A struct xe_preempt_fence pointer used for calling into + * xe_preempt_fence_arm() or xe_preempt_fence_free(). + * An error pointer on error. + */ +struct xe_preempt_fence *xe_preempt_fence_alloc(void) +{ + struct xe_preempt_fence *pfence; + + pfence = kmalloc(sizeof(*pfence), GFP_KERNEL); + if (!pfence) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&pfence->link); + INIT_WORK(&pfence->preempt_work, preempt_fence_work_func); + + return pfence; +} + +/** + * xe_preempt_fence_free() - Free a preempt fence allocated using + * xe_preempt_fence_alloc(). + * @pfence: pointer obtained from xe_preempt_fence_alloc(); + * + * Free a preempt fence that has not yet been armed. + */ +void xe_preempt_fence_free(struct xe_preempt_fence *pfence) +{ + list_del(&pfence->link); + kfree(pfence); +} + +/** + * xe_preempt_fence_arm() - Arm a preempt fence allocated using + * xe_preempt_fence_alloc(). + * @pfence: The struct xe_preempt_fence pointer returned from + * xe_preempt_fence_alloc(). + * @e: The struct xe_engine used for arming. + * @context: The dma-fence context used for arming. + * @seqno: The dma-fence seqno used for arming. + * + * Inserts the preempt fence into @context's timeline, takes @link off any + * list, and registers the struct xe_engine as the xe_engine to be preempted. + * + * Return: A pointer to a struct dma_fence embedded into the preempt fence. + * This function doesn't error. + */ +struct dma_fence * +xe_preempt_fence_arm(struct xe_preempt_fence *pfence, struct xe_engine *e, + u64 context, u32 seqno) +{ + list_del_init(&pfence->link); + pfence->engine = xe_engine_get(e); + dma_fence_init(&pfence->base, &preempt_fence_ops, + &e->compute.lock, context, seqno); + + return &pfence->base; +} + +/** + * xe_preempt_fence_create() - Helper to create and arm a preempt fence. + * @e: The struct xe_engine used for arming. + * @context: The dma-fence context used for arming. + * @seqno: The dma-fence seqno used for arming. + * + * Allocates and inserts the preempt fence into @context's timeline, + * and registers @e as the struct xe_engine to be preempted. + * + * Return: A pointer to the resulting struct dma_fence on success. An error + * pointer on error. In particular if allocation fails it returns + * ERR_PTR(-ENOMEM); + */ +struct dma_fence * +xe_preempt_fence_create(struct xe_engine *e, + u64 context, u32 seqno) +{ + struct xe_preempt_fence *pfence; + + pfence = xe_preempt_fence_alloc(); + if (IS_ERR(pfence)) + return ERR_CAST(pfence); + + return xe_preempt_fence_arm(pfence, e, context, seqno); +} + +bool xe_fence_is_xe_preempt(const struct dma_fence *fence) +{ + return fence->ops == &preempt_fence_ops; +} diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.h b/drivers/gpu/drm/xe/xe_preempt_fence.h new file mode 100644 index 000000000000..4f3966103203 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_preempt_fence.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_PREEMPT_FENCE_H_ +#define _XE_PREEMPT_FENCE_H_ + +#include "xe_preempt_fence_types.h" + +struct list_head; + +struct dma_fence * +xe_preempt_fence_create(struct xe_engine *e, + u64 context, u32 seqno); + +struct xe_preempt_fence *xe_preempt_fence_alloc(void); + +void xe_preempt_fence_free(struct xe_preempt_fence *pfence); + +struct dma_fence * +xe_preempt_fence_arm(struct xe_preempt_fence *pfence, struct xe_engine *e, + u64 context, u32 seqno); + +static inline struct xe_preempt_fence * +to_preempt_fence(struct dma_fence *fence) +{ + return container_of(fence, struct xe_preempt_fence, base); +} + +/** + * xe_preempt_fence_link() - Return a link used to keep unarmed preempt + * fences on a list. + * @pfence: Pointer to the preempt fence. + * + * The link is embedded in the struct xe_preempt_fence. Use + * link_to_preempt_fence() to convert back to the preempt fence. + * + * Return: A pointer to an embedded struct list_head. + */ +static inline struct list_head * +xe_preempt_fence_link(struct xe_preempt_fence *pfence) +{ + return &pfence->link; +} + +/** + * to_preempt_fence_from_link() - Convert back to a preempt fence pointer + * from a link obtained with xe_preempt_fence_link(). + * @link: The struct list_head obtained from xe_preempt_fence_link(). + * + * Return: A pointer to the embedding struct xe_preempt_fence. + */ +static inline struct xe_preempt_fence * +to_preempt_fence_from_link(struct list_head *link) +{ + return container_of(link, struct xe_preempt_fence, link); +} + +bool xe_fence_is_xe_preempt(const struct dma_fence *fence); +#endif diff --git a/drivers/gpu/drm/xe/xe_preempt_fence_types.h b/drivers/gpu/drm/xe/xe_preempt_fence_types.h new file mode 100644 index 000000000000..9d9efd8ff0ed --- /dev/null +++ b/drivers/gpu/drm/xe/xe_preempt_fence_types.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_PREEMPT_FENCE_TYPES_H_ +#define _XE_PREEMPT_FENCE_TYPES_H_ + +#include +#include + +struct xe_engine; + +/** + * struct xe_preempt_fence - XE preempt fence + * + * A preemption fence which suspends the execution of an xe_engine on the + * hardware and triggers a callback once the xe_engine is complete. + */ +struct xe_preempt_fence { + /** @base: dma fence base */ + struct dma_fence base; + /** @link: link into list of pending preempt fences */ + struct list_head link; + /** @engine: xe engine for this preempt fence */ + struct xe_engine *engine; + /** @preempt_work: work struct which issues preemption */ + struct work_struct preempt_work; + /** @error: preempt fence is in error state */ + int error; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c new file mode 100644 index 000000000000..81193ddd0af7 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -0,0 +1,1542 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_migrate.h" +#include "xe_pt.h" +#include "xe_pt_types.h" +#include "xe_pt_walk.h" +#include "xe_vm.h" +#include "xe_res_cursor.h" + +struct xe_pt_dir { + struct xe_pt pt; + /** @dir: Directory structure for the xe_pt_walk functionality */ + struct xe_ptw_dir dir; +}; + +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) +#define xe_pt_set_addr(__xe_pt, __addr) ((__xe_pt)->addr = (__addr)) +#define xe_pt_addr(__xe_pt) ((__xe_pt)->addr) +#else +#define xe_pt_set_addr(__xe_pt, __addr) +#define xe_pt_addr(__xe_pt) 0ull +#endif + +static const u64 xe_normal_pt_shifts[] = {12, 21, 30, 39, 48}; +static const u64 xe_compact_pt_shifts[] = {16, 21, 30, 39, 48}; + +#define XE_PT_HIGHEST_LEVEL (ARRAY_SIZE(xe_normal_pt_shifts) - 1) + +static struct xe_pt_dir *as_xe_pt_dir(struct xe_pt *pt) +{ + return container_of(pt, struct xe_pt_dir, pt); +} + +static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index) +{ + return container_of(pt_dir->dir.entries[index], struct xe_pt, base); +} + +/** + * gen8_pde_encode() - Encode a page-table directory entry pointing to + * another page-table. + * @bo: The page-table bo of the page-table to point to. + * @bo_offset: Offset in the page-table bo to point to. + * @level: The cache level indicating the caching of @bo. + * + * TODO: Rename. + * + * Return: An encoded page directory entry. No errors. + */ +u64 gen8_pde_encode(struct xe_bo *bo, u64 bo_offset, + const enum xe_cache_level level) +{ + u64 pde; + bool is_lmem; + + pde = xe_bo_addr(bo, bo_offset, GEN8_PAGE_SIZE, &is_lmem); + pde |= GEN8_PAGE_PRESENT | GEN8_PAGE_RW; + + XE_WARN_ON(IS_DGFX(xe_bo_device(bo)) && !is_lmem); + + /* FIXME: I don't think the PPAT handling is correct for MTL */ + + if (level != XE_CACHE_NONE) + pde |= PPAT_CACHED_PDE; + else + pde |= PPAT_UNCACHED; + + return pde; +} + +static dma_addr_t vma_addr(struct xe_vma *vma, u64 offset, + size_t page_size, bool *is_lmem) +{ + if (xe_vma_is_userptr(vma)) { + struct xe_res_cursor cur; + u64 page; + + *is_lmem = false; + page = offset >> PAGE_SHIFT; + offset &= (PAGE_SIZE - 1); + + xe_res_first_sg(vma->userptr.sg, page << PAGE_SHIFT, page_size, + &cur); + return xe_res_dma(&cur) + offset; + } else { + return xe_bo_addr(vma->bo, offset, page_size, is_lmem); + } +} + +static u64 __gen8_pte_encode(u64 pte, enum xe_cache_level cache, u32 flags, + u32 pt_level) +{ + pte |= GEN8_PAGE_PRESENT | GEN8_PAGE_RW; + + if (unlikely(flags & PTE_READ_ONLY)) + pte &= ~GEN8_PAGE_RW; + + /* FIXME: I don't think the PPAT handling is correct for MTL */ + + switch (cache) { + case XE_CACHE_NONE: + pte |= PPAT_UNCACHED; + break; + case XE_CACHE_WT: + pte |= PPAT_DISPLAY_ELLC; + break; + default: + pte |= PPAT_CACHED; + break; + } + + if (pt_level == 1) + pte |= GEN8_PDE_PS_2M; + else if (pt_level == 2) + pte |= GEN8_PDPE_PS_1G; + + /* XXX: Does hw support 1 GiB pages? */ + XE_BUG_ON(pt_level > 2); + + return pte; +} + +/** + * gen8_pte_encode() - Encode a page-table entry pointing to memory. + * @vma: The vma representing the memory to point to. + * @bo: If @vma is NULL, representing the memory to point to. + * @offset: The offset into @vma or @bo. + * @cache: The cache level indicating + * @flags: Currently only supports PTE_READ_ONLY for read-only access. + * @pt_level: The page-table level of the page-table into which the entry + * is to be inserted. + * + * TODO: Rename. + * + * Return: An encoded page-table entry. No errors. + */ +u64 gen8_pte_encode(struct xe_vma *vma, struct xe_bo *bo, + u64 offset, enum xe_cache_level cache, + u32 flags, u32 pt_level) +{ + u64 pte; + bool is_vram; + + if (vma) + pte = vma_addr(vma, offset, GEN8_PAGE_SIZE, &is_vram); + else + pte = xe_bo_addr(bo, offset, GEN8_PAGE_SIZE, &is_vram); + + if (is_vram) { + pte |= GEN12_PPGTT_PTE_LM; + if (vma && vma->use_atomic_access_pte_bit) + pte |= GEN12_USM_PPGTT_PTE_AE; + } + + return __gen8_pte_encode(pte, cache, flags, pt_level); +} + +static u64 __xe_pt_empty_pte(struct xe_gt *gt, struct xe_vm *vm, + unsigned int level) +{ + u8 id = gt->info.id; + + XE_BUG_ON(xe_gt_is_media_type(gt)); + + if (!vm->scratch_bo[id]) + return 0; + + if (level == 0) { + u64 empty = gen8_pte_encode(NULL, vm->scratch_bo[id], 0, + XE_CACHE_WB, 0, 0); + if (vm->flags & XE_VM_FLAGS_64K) + empty |= GEN12_PTE_PS64; + + return empty; + } else { + return gen8_pde_encode(vm->scratch_pt[id][level - 1]->bo, 0, + XE_CACHE_WB); + } +} + +/** + * xe_pt_create() - Create a page-table. + * @vm: The vm to create for. + * @gt: The gt to create for. + * @level: The page-table level. + * + * Allocate and initialize a single struct xe_pt metadata structure. Also + * create the corresponding page-table bo, but don't initialize it. If the + * level is grater than zero, then it's assumed to be a directory page- + * table and the directory structure is also allocated and initialized to + * NULL pointers. + * + * Return: A valid struct xe_pt pointer on success, Pointer error code on + * error. + */ +struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_gt *gt, + unsigned int level) +{ + struct xe_pt *pt; + struct xe_bo *bo; + size_t size; + int err; + + size = !level ? sizeof(struct xe_pt) : sizeof(struct xe_pt_dir) + + GEN8_PDES * sizeof(struct xe_ptw *); + pt = kzalloc(size, GFP_KERNEL); + if (!pt) + return ERR_PTR(-ENOMEM); + + bo = xe_bo_create_pin_map(vm->xe, gt, vm, SZ_4K, + ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(gt) | + XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT | + XE_BO_CREATE_PINNED_BIT); + if (IS_ERR(bo)) { + err = PTR_ERR(bo); + goto err_kfree; + } + pt->bo = bo; + pt->level = level; + pt->base.dir = level ? &as_xe_pt_dir(pt)->dir : NULL; + + XE_BUG_ON(level > XE_VM_MAX_LEVEL); + + return pt; + +err_kfree: + kfree(pt); + return ERR_PTR(err); +} + +/** + * xe_pt_populate_empty() - Populate a page-table bo with scratch- or zero + * entries. + * @gt: The gt the scratch pagetable of which to use. + * @vm: The vm we populate for. + * @pt: The pagetable the bo of which to initialize. + * + * Populate the page-table bo of @pt with entries pointing into the gt's + * scratch page-table tree if any. Otherwise populate with zeros. + */ +void xe_pt_populate_empty(struct xe_gt *gt, struct xe_vm *vm, + struct xe_pt *pt) +{ + struct iosys_map *map = &pt->bo->vmap; + u64 empty; + int i; + + XE_BUG_ON(xe_gt_is_media_type(gt)); + + if (!vm->scratch_bo[gt->info.id]) { + /* + * FIXME: Some memory is allocated already allocated to zero? + * Find out which memory that is and avoid this memset... + */ + xe_map_memset(vm->xe, map, 0, 0, SZ_4K); + } else { + empty = __xe_pt_empty_pte(gt, vm, pt->level); + for (i = 0; i < GEN8_PDES; i++) + xe_pt_write(vm->xe, map, i, empty); + } +} + +/** + * xe_pt_shift() - Return the ilog2 value of the size of the address range of + * a page-table at a certain level. + * @level: The level. + * + * Return: The ilog2 value of the size of the address range of a page-table + * at level @level. + */ +unsigned int xe_pt_shift(unsigned int level) +{ + return GEN8_PTE_SHIFT + GEN8_PDE_SHIFT * level; +} + +/** + * xe_pt_destroy() - Destroy a page-table tree. + * @pt: The root of the page-table tree to destroy. + * @flags: vm flags. Currently unused. + * @deferred: List head of lockless list for deferred putting. NULL for + * immediate putting. + * + * Puts the page-table bo, recursively calls xe_pt_destroy on all children + * and finally frees @pt. TODO: Can we remove the @flags argument? + */ +void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred) +{ + int i; + + if (!pt) + return; + + XE_BUG_ON(!list_empty(&pt->bo->vmas)); + xe_bo_unpin(pt->bo); + xe_bo_put_deferred(pt->bo, deferred); + + if (pt->level > 0 && pt->num_live) { + struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt); + + for (i = 0; i < GEN8_PDES; i++) { + if (xe_pt_entry(pt_dir, i)) + xe_pt_destroy(xe_pt_entry(pt_dir, i), flags, + deferred); + } + } + kfree(pt); +} + +/** + * xe_pt_create_scratch() - Setup a scratch memory pagetable tree for the + * given gt and vm. + * @xe: xe device. + * @gt: gt to set up for. + * @vm: vm to set up for. + * + * Sets up a pagetable tree with one page-table per level and a single + * leaf bo. All pagetable entries point to the single page-table or, + * for L0, the single bo one level below. + * + * Return: 0 on success, negative error code on error. + */ +int xe_pt_create_scratch(struct xe_device *xe, struct xe_gt *gt, + struct xe_vm *vm) +{ + u8 id = gt->info.id; + int i; + + vm->scratch_bo[id] = xe_bo_create(xe, gt, vm, SZ_4K, + ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(gt) | + XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT | + XE_BO_CREATE_PINNED_BIT); + if (IS_ERR(vm->scratch_bo[id])) + return PTR_ERR(vm->scratch_bo[id]); + xe_bo_pin(vm->scratch_bo[id]); + + for (i = 0; i < vm->pt_root[id]->level; i++) { + vm->scratch_pt[id][i] = xe_pt_create(vm, gt, i); + if (IS_ERR(vm->scratch_pt[id][i])) + return PTR_ERR(vm->scratch_pt[id][i]); + + xe_pt_populate_empty(gt, vm, vm->scratch_pt[id][i]); + } + + return 0; +} + +/** + * DOC: Pagetable building + * + * Below we use the term "page-table" for both page-directories, containing + * pointers to lower level page-directories or page-tables, and level 0 + * page-tables that contain only page-table-entries pointing to memory pages. + * + * When inserting an address range in an already existing page-table tree + * there will typically be a set of page-tables that are shared with other + * address ranges, and a set that are private to this address range. + * The set of shared page-tables can be at most two per level, + * and those can't be updated immediately because the entries of those + * page-tables may still be in use by the gpu for other mappings. Therefore + * when inserting entries into those, we instead stage those insertions by + * adding insertion data into struct xe_vm_pgtable_update structures. This + * data, (subtrees for the cpu and page-table-entries for the gpu) is then + * added in a separate commit step. CPU-data is committed while still under the + * vm lock, the object lock and for userptr, the notifier lock in read mode. + * The GPU async data is committed either by the GPU or CPU after fulfilling + * relevant dependencies. + * For non-shared page-tables (and, in fact, for shared ones that aren't + * existing at the time of staging), we add the data in-place without the + * special update structures. This private part of the page-table tree will + * remain disconnected from the vm page-table tree until data is committed to + * the shared page tables of the vm tree in the commit phase. + */ + +struct xe_pt_update { + /** @update: The update structure we're building for this parent. */ + struct xe_vm_pgtable_update *update; + /** @parent: The parent. Used to detect a parent change. */ + struct xe_pt *parent; + /** @preexisting: Whether the parent was pre-existing or allocated */ + bool preexisting; +}; + +struct xe_pt_stage_bind_walk { + /** base: The base class. */ + struct xe_pt_walk base; + + /* Input parameters for the walk */ + /** @vm: The vm we're building for. */ + struct xe_vm *vm; + /** @gt: The gt we're building for. */ + struct xe_gt *gt; + /** @cache: Desired cache level for the ptes */ + enum xe_cache_level cache; + /** @default_pte: PTE flag only template. No address is associated */ + u64 default_pte; + /** @dma_offset: DMA offset to add to the PTE. */ + u64 dma_offset; + /** + * @needs_64k: This address range enforces 64K alignment and + * granularity. + */ + bool needs_64K; + /** + * @pte_flags: Flags determining PTE setup. These are not flags + * encoded directly in the PTE. See @default_pte for those. + */ + u32 pte_flags; + + /* Also input, but is updated during the walk*/ + /** @curs: The DMA address cursor. */ + struct xe_res_cursor *curs; + /** @va_curs_start: The Virtual address coresponding to @curs->start */ + u64 va_curs_start; + + /* Output */ + struct xe_walk_update { + /** @wupd.entries: Caller provided storage. */ + struct xe_vm_pgtable_update *entries; + /** @wupd.num_used_entries: Number of update @entries used. */ + unsigned int num_used_entries; + /** @wupd.updates: Tracks the update entry at a given level */ + struct xe_pt_update updates[XE_VM_MAX_LEVEL + 1]; + } wupd; + + /* Walk state */ + /** + * @l0_end_addr: The end address of the current l0 leaf. Used for + * 64K granularity detection. + */ + u64 l0_end_addr; + /** @addr_64K: The start address of the current 64K chunk. */ + u64 addr_64K; + /** @found_64: Whether @add_64K actually points to a 64K chunk. */ + bool found_64K; +}; + +static int +xe_pt_new_shared(struct xe_walk_update *wupd, struct xe_pt *parent, + pgoff_t offset, bool alloc_entries) +{ + struct xe_pt_update *upd = &wupd->updates[parent->level]; + struct xe_vm_pgtable_update *entry; + + /* + * For *each level*, we could only have one active + * struct xt_pt_update at any one time. Once we move on to a + * new parent and page-directory, the old one is complete, and + * updates are either already stored in the build tree or in + * @wupd->entries + */ + if (likely(upd->parent == parent)) + return 0; + + upd->parent = parent; + upd->preexisting = true; + + if (wupd->num_used_entries == XE_VM_MAX_LEVEL * 2 + 1) + return -EINVAL; + + entry = wupd->entries + wupd->num_used_entries++; + upd->update = entry; + entry->ofs = offset; + entry->pt_bo = parent->bo; + entry->pt = parent; + entry->flags = 0; + entry->qwords = 0; + + if (alloc_entries) { + entry->pt_entries = kmalloc_array(GEN8_PDES, + sizeof(*entry->pt_entries), + GFP_KERNEL); + if (!entry->pt_entries) + return -ENOMEM; + } + + return 0; +} + +/* + * NOTE: This is a very frequently called function so we allow ourselves + * to annotate (using branch prediction hints) the fastpath of updating a + * non-pre-existing pagetable with leaf ptes. + */ +static int +xe_pt_insert_entry(struct xe_pt_stage_bind_walk *xe_walk, struct xe_pt *parent, + pgoff_t offset, struct xe_pt *xe_child, u64 pte) +{ + struct xe_pt_update *upd = &xe_walk->wupd.updates[parent->level]; + struct xe_pt_update *child_upd = xe_child ? + &xe_walk->wupd.updates[xe_child->level] : NULL; + int ret; + + ret = xe_pt_new_shared(&xe_walk->wupd, parent, offset, true); + if (unlikely(ret)) + return ret; + + /* + * Register this new pagetable so that it won't be recognized as + * a shared pagetable by a subsequent insertion. + */ + if (unlikely(child_upd)) { + child_upd->update = NULL; + child_upd->parent = xe_child; + child_upd->preexisting = false; + } + + if (likely(!upd->preexisting)) { + /* Continue building a non-connected subtree. */ + struct iosys_map *map = &parent->bo->vmap; + + if (unlikely(xe_child)) + parent->base.dir->entries[offset] = &xe_child->base; + + xe_pt_write(xe_walk->vm->xe, map, offset, pte); + parent->num_live++; + } else { + /* Shared pt. Stage update. */ + unsigned int idx; + struct xe_vm_pgtable_update *entry = upd->update; + + idx = offset - entry->ofs; + entry->pt_entries[idx].pt = xe_child; + entry->pt_entries[idx].pte = pte; + entry->qwords++; + } + + return 0; +} + +static bool xe_pt_hugepte_possible(u64 addr, u64 next, unsigned int level, + struct xe_pt_stage_bind_walk *xe_walk) +{ + u64 size, dma; + + /* Does the virtual range requested cover a huge pte? */ + if (!xe_pt_covers(addr, next, level, &xe_walk->base)) + return false; + + /* Does the DMA segment cover the whole pte? */ + if (next - xe_walk->va_curs_start > xe_walk->curs->size) + return false; + + /* Is the DMA address huge PTE size aligned? */ + size = next - addr; + dma = addr - xe_walk->va_curs_start + xe_res_dma(xe_walk->curs); + + return IS_ALIGNED(dma, size); +} + +/* + * Scan the requested mapping to check whether it can be done entirely + * with 64K PTEs. + */ +static bool +xe_pt_scan_64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk) +{ + struct xe_res_cursor curs = *xe_walk->curs; + + if (!IS_ALIGNED(addr, SZ_64K)) + return false; + + if (next > xe_walk->l0_end_addr) + return false; + + xe_res_next(&curs, addr - xe_walk->va_curs_start); + for (; addr < next; addr += SZ_64K) { + if (!IS_ALIGNED(xe_res_dma(&curs), SZ_64K) || curs.size < SZ_64K) + return false; + + xe_res_next(&curs, SZ_64K); + } + + return addr == next; +} + +/* + * For non-compact "normal" 4K level-0 pagetables, we want to try to group + * addresses together in 64K-contigous regions to add a 64K TLB hint for the + * device to the PTE. + * This function determines whether the address is part of such a + * segment. For VRAM in normal pagetables, this is strictly necessary on + * some devices. + */ +static bool +xe_pt_is_pte_ps64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk) +{ + /* Address is within an already found 64k region */ + if (xe_walk->found_64K && addr - xe_walk->addr_64K < SZ_64K) + return true; + + xe_walk->found_64K = xe_pt_scan_64K(addr, addr + SZ_64K, xe_walk); + xe_walk->addr_64K = addr; + + return xe_walk->found_64K; +} + +static int +xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, + unsigned int level, u64 addr, u64 next, + struct xe_ptw **child, + enum page_walk_action *action, + struct xe_pt_walk *walk) +{ + struct xe_pt_stage_bind_walk *xe_walk = + container_of(walk, typeof(*xe_walk), base); + struct xe_pt *xe_parent = container_of(parent, typeof(*xe_parent), base); + struct xe_pt *xe_child; + bool covers; + int ret = 0; + u64 pte; + + /* Is this a leaf entry ?*/ + if (level == 0 || xe_pt_hugepte_possible(addr, next, level, xe_walk)) { + struct xe_res_cursor *curs = xe_walk->curs; + + XE_WARN_ON(xe_walk->va_curs_start != addr); + + pte = __gen8_pte_encode(xe_res_dma(curs) + xe_walk->dma_offset, + xe_walk->cache, xe_walk->pte_flags, + level); + pte |= xe_walk->default_pte; + + /* + * Set the GEN12_PTE_PS64 hint if possible, otherwise if + * this device *requires* 64K PTE size for VRAM, fail. + */ + if (level == 0 && !xe_parent->is_compact) { + if (xe_pt_is_pte_ps64K(addr, next, xe_walk)) + pte |= GEN12_PTE_PS64; + else if (XE_WARN_ON(xe_walk->needs_64K)) + return -EINVAL; + } + + ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, NULL, pte); + if (unlikely(ret)) + return ret; + + xe_res_next(curs, next - addr); + xe_walk->va_curs_start = next; + *action = ACTION_CONTINUE; + + return ret; + } + + /* + * Descending to lower level. Determine if we need to allocate a + * new page table or -directory, which we do if there is no + * previous one or there is one we can completely replace. + */ + if (level == 1) { + walk->shifts = xe_normal_pt_shifts; + xe_walk->l0_end_addr = next; + } + + covers = xe_pt_covers(addr, next, level, &xe_walk->base); + if (covers || !*child) { + u64 flags = 0; + + xe_child = xe_pt_create(xe_walk->vm, xe_walk->gt, level - 1); + if (IS_ERR(xe_child)) + return PTR_ERR(xe_child); + + xe_pt_set_addr(xe_child, + round_down(addr, 1ull << walk->shifts[level])); + + if (!covers) + xe_pt_populate_empty(xe_walk->gt, xe_walk->vm, xe_child); + + *child = &xe_child->base; + + /* + * Prefer the compact pagetable layout for L0 if possible. + * TODO: Suballocate the pt bo to avoid wasting a lot of + * memory. + */ + if (GRAPHICS_VERx100(xe_walk->gt->xe) >= 1250 && level == 1 && + covers && xe_pt_scan_64K(addr, next, xe_walk)) { + walk->shifts = xe_compact_pt_shifts; + flags |= GEN12_PDE_64K; + xe_child->is_compact = true; + } + + pte = gen8_pde_encode(xe_child->bo, 0, xe_walk->cache) | flags; + ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, xe_child, + pte); + } + + *action = ACTION_SUBTREE; + return ret; +} + +static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = { + .pt_entry = xe_pt_stage_bind_entry, +}; + +/** + * xe_pt_stage_bind() - Build a disconnected page-table tree for a given address + * range. + * @gt: The gt we're building for. + * @vma: The vma indicating the address range. + * @entries: Storage for the update entries used for connecting the tree to + * the main tree at commit time. + * @num_entries: On output contains the number of @entries used. + * + * This function builds a disconnected page-table tree for a given address + * range. The tree is connected to the main vm tree for the gpu using + * xe_migrate_update_pgtables() and for the cpu using xe_pt_commit_bind(). + * The function builds xe_vm_pgtable_update structures for already existing + * shared page-tables, and non-existing shared and non-shared page-tables + * are built and populated directly. + * + * Return 0 on success, negative error code on error. + */ +static int +xe_pt_stage_bind(struct xe_gt *gt, struct xe_vma *vma, + struct xe_vm_pgtable_update *entries, u32 *num_entries) +{ + struct xe_bo *bo = vma->bo; + bool is_vram = !xe_vma_is_userptr(vma) && bo && xe_bo_is_vram(bo); + struct xe_res_cursor curs; + struct xe_pt_stage_bind_walk xe_walk = { + .base = { + .ops = &xe_pt_stage_bind_ops, + .shifts = xe_normal_pt_shifts, + .max_level = XE_PT_HIGHEST_LEVEL, + }, + .vm = vma->vm, + .gt = gt, + .curs = &curs, + .va_curs_start = vma->start, + .pte_flags = vma->pte_flags, + .wupd.entries = entries, + .needs_64K = (vma->vm->flags & XE_VM_FLAGS_64K) && is_vram, + }; + struct xe_pt *pt = vma->vm->pt_root[gt->info.id]; + int ret; + + if (is_vram) { + xe_walk.default_pte = GEN12_PPGTT_PTE_LM; + if (vma && vma->use_atomic_access_pte_bit) + xe_walk.default_pte |= GEN12_USM_PPGTT_PTE_AE; + xe_walk.dma_offset = gt->mem.vram.io_start - + gt_to_xe(gt)->mem.vram.io_start; + xe_walk.cache = XE_CACHE_WB; + } else { + if (!xe_vma_is_userptr(vma) && bo->flags & XE_BO_SCANOUT_BIT) + xe_walk.cache = XE_CACHE_WT; + else + xe_walk.cache = XE_CACHE_WB; + } + + xe_bo_assert_held(bo); + if (xe_vma_is_userptr(vma)) + xe_res_first_sg(vma->userptr.sg, 0, vma->end - vma->start + 1, + &curs); + else if (xe_bo_is_vram(bo)) + xe_res_first(bo->ttm.resource, vma->bo_offset, + vma->end - vma->start + 1, &curs); + else + xe_res_first_sg(xe_bo_get_sg(bo), vma->bo_offset, + vma->end - vma->start + 1, &curs); + + ret = xe_pt_walk_range(&pt->base, pt->level, vma->start, vma->end + 1, + &xe_walk.base); + + *num_entries = xe_walk.wupd.num_used_entries; + return ret; +} + +/** + * xe_pt_nonshared_offsets() - Determine the non-shared entry offsets of a + * shared pagetable. + * @addr: The start address within the non-shared pagetable. + * @end: The end address within the non-shared pagetable. + * @level: The level of the non-shared pagetable. + * @walk: Walk info. The function adjusts the walk action. + * @action: next action to perform (see enum page_walk_action) + * @offset: Ignored on input, First non-shared entry on output. + * @end_offset: Ignored on input, Last non-shared entry + 1 on output. + * + * A non-shared page-table has some entries that belong to the address range + * and others that don't. This function determines the entries that belong + * fully to the address range. Depending on level, some entries may + * partially belong to the address range (that can't happen at level 0). + * The function detects that and adjust those offsets to not include those + * partial entries. Iff it does detect partial entries, we know that there must + * be shared page tables also at lower levels, so it adjusts the walk action + * accordingly. + * + * Return: true if there were non-shared entries, false otherwise. + */ +static bool xe_pt_nonshared_offsets(u64 addr, u64 end, unsigned int level, + struct xe_pt_walk *walk, + enum page_walk_action *action, + pgoff_t *offset, pgoff_t *end_offset) +{ + u64 size = 1ull << walk->shifts[level]; + + *offset = xe_pt_offset(addr, level, walk); + *end_offset = xe_pt_num_entries(addr, end, level, walk) + *offset; + + if (!level) + return true; + + /* + * If addr or next are not size aligned, there are shared pts at lower + * level, so in that case traverse down the subtree + */ + *action = ACTION_CONTINUE; + if (!IS_ALIGNED(addr, size)) { + *action = ACTION_SUBTREE; + (*offset)++; + } + + if (!IS_ALIGNED(end, size)) { + *action = ACTION_SUBTREE; + (*end_offset)--; + } + + return *end_offset > *offset; +} + +struct xe_pt_zap_ptes_walk { + /** @base: The walk base-class */ + struct xe_pt_walk base; + + /* Input parameters for the walk */ + /** @gt: The gt we're building for */ + struct xe_gt *gt; + + /* Output */ + /** @needs_invalidate: Whether we need to invalidate TLB*/ + bool needs_invalidate; +}; + +static int xe_pt_zap_ptes_entry(struct xe_ptw *parent, pgoff_t offset, + unsigned int level, u64 addr, u64 next, + struct xe_ptw **child, + enum page_walk_action *action, + struct xe_pt_walk *walk) +{ + struct xe_pt_zap_ptes_walk *xe_walk = + container_of(walk, typeof(*xe_walk), base); + struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); + pgoff_t end_offset; + + XE_BUG_ON(!*child); + XE_BUG_ON(!level && xe_child->is_compact); + + /* + * Note that we're called from an entry callback, and we're dealing + * with the child of that entry rather than the parent, so need to + * adjust level down. + */ + if (xe_pt_nonshared_offsets(addr, next, --level, walk, action, &offset, + &end_offset)) { + xe_map_memset(gt_to_xe(xe_walk->gt), &xe_child->bo->vmap, + offset * sizeof(u64), 0, + (end_offset - offset) * sizeof(u64)); + xe_walk->needs_invalidate = true; + } + + return 0; +} + +static const struct xe_pt_walk_ops xe_pt_zap_ptes_ops = { + .pt_entry = xe_pt_zap_ptes_entry, +}; + +/** + * xe_pt_zap_ptes() - Zap (zero) gpu ptes of an address range + * @gt: The gt we're zapping for. + * @vma: GPU VMA detailing address range. + * + * Eviction and Userptr invalidation needs to be able to zap the + * gpu ptes of a given address range in pagefaulting mode. + * In order to be able to do that, that function needs access to the shared + * page-table entrieaso it can either clear the leaf PTEs or + * clear the pointers to lower-level page-tables. The caller is required + * to hold the necessary locks to ensure neither the page-table connectivity + * nor the page-table entries of the range is updated from under us. + * + * Return: Whether ptes were actually updated and a TLB invalidation is + * required. + */ +bool xe_pt_zap_ptes(struct xe_gt *gt, struct xe_vma *vma) +{ + struct xe_pt_zap_ptes_walk xe_walk = { + .base = { + .ops = &xe_pt_zap_ptes_ops, + .shifts = xe_normal_pt_shifts, + .max_level = XE_PT_HIGHEST_LEVEL, + }, + .gt = gt, + }; + struct xe_pt *pt = vma->vm->pt_root[gt->info.id]; + + if (!(vma->gt_present & BIT(gt->info.id))) + return false; + + (void)xe_pt_walk_shared(&pt->base, pt->level, vma->start, vma->end + 1, + &xe_walk.base); + + return xe_walk.needs_invalidate; +} + +static void +xe_vm_populate_pgtable(struct xe_migrate_pt_update *pt_update, struct xe_gt *gt, + struct iosys_map *map, void *data, + u32 qword_ofs, u32 num_qwords, + const struct xe_vm_pgtable_update *update) +{ + struct xe_pt_entry *ptes = update->pt_entries; + u64 *ptr = data; + u32 i; + + XE_BUG_ON(xe_gt_is_media_type(gt)); + + for (i = 0; i < num_qwords; i++) { + if (map) + xe_map_wr(gt_to_xe(gt), map, (qword_ofs + i) * + sizeof(u64), u64, ptes[i].pte); + else + ptr[i] = ptes[i].pte; + } +} + +static void xe_pt_abort_bind(struct xe_vma *vma, + struct xe_vm_pgtable_update *entries, + u32 num_entries) +{ + u32 i, j; + + for (i = 0; i < num_entries; i++) { + if (!entries[i].pt_entries) + continue; + + for (j = 0; j < entries[i].qwords; j++) + xe_pt_destroy(entries[i].pt_entries[j].pt, vma->vm->flags, NULL); + kfree(entries[i].pt_entries); + } +} + +static void xe_pt_commit_locks_assert(struct xe_vma *vma) +{ + struct xe_vm *vm = vma->vm; + + lockdep_assert_held(&vm->lock); + + if (xe_vma_is_userptr(vma)) + lockdep_assert_held_read(&vm->userptr.notifier_lock); + else + dma_resv_assert_held(vma->bo->ttm.base.resv); + + dma_resv_assert_held(&vm->resv); +} + +static void xe_pt_commit_bind(struct xe_vma *vma, + struct xe_vm_pgtable_update *entries, + u32 num_entries, bool rebind, + struct llist_head *deferred) +{ + u32 i, j; + + xe_pt_commit_locks_assert(vma); + + for (i = 0; i < num_entries; i++) { + struct xe_pt *pt = entries[i].pt; + struct xe_pt_dir *pt_dir; + + if (!rebind) + pt->num_live += entries[i].qwords; + + if (!pt->level) { + kfree(entries[i].pt_entries); + continue; + } + + pt_dir = as_xe_pt_dir(pt); + for (j = 0; j < entries[i].qwords; j++) { + u32 j_ = j + entries[i].ofs; + struct xe_pt *newpte = entries[i].pt_entries[j].pt; + + if (xe_pt_entry(pt_dir, j_)) + xe_pt_destroy(xe_pt_entry(pt_dir, j_), + vma->vm->flags, deferred); + + pt_dir->dir.entries[j_] = &newpte->base; + } + kfree(entries[i].pt_entries); + } +} + +static int +xe_pt_prepare_bind(struct xe_gt *gt, struct xe_vma *vma, + struct xe_vm_pgtable_update *entries, u32 *num_entries, + bool rebind) +{ + int err; + + *num_entries = 0; + err = xe_pt_stage_bind(gt, vma, entries, num_entries); + if (!err) + BUG_ON(!*num_entries); + else /* abort! */ + xe_pt_abort_bind(vma, entries, *num_entries); + + return err; +} + +static void xe_vm_dbg_print_entries(struct xe_device *xe, + const struct xe_vm_pgtable_update *entries, + unsigned int num_entries) +#if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)) +{ + unsigned int i; + + vm_dbg(&xe->drm, "%u entries to update\n", num_entries); + for (i = 0; i < num_entries; i++) { + const struct xe_vm_pgtable_update *entry = &entries[i]; + struct xe_pt *xe_pt = entry->pt; + u64 page_size = 1ull << xe_pt_shift(xe_pt->level); + u64 end; + u64 start; + + XE_BUG_ON(entry->pt->is_compact); + start = entry->ofs * page_size; + end = start + page_size * entry->qwords; + vm_dbg(&xe->drm, + "\t%u: Update level %u at (%u + %u) [%llx...%llx) f:%x\n", + i, xe_pt->level, entry->ofs, entry->qwords, + xe_pt_addr(xe_pt) + start, xe_pt_addr(xe_pt) + end, 0); + } +} +#else +{} +#endif + +#ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT + +static int xe_pt_userptr_inject_eagain(struct xe_vma *vma) +{ + u32 divisor = vma->userptr.divisor ? vma->userptr.divisor : 2; + static u32 count; + + if (count++ % divisor == divisor - 1) { + struct xe_vm *vm = vma->vm; + + vma->userptr.divisor = divisor << 1; + spin_lock(&vm->userptr.invalidated_lock); + list_move_tail(&vma->userptr.invalidate_link, + &vm->userptr.invalidated); + spin_unlock(&vm->userptr.invalidated_lock); + return true; + } + + return false; +} + +#else + +static bool xe_pt_userptr_inject_eagain(struct xe_vma *vma) +{ + return false; +} + +#endif + +/** + * struct xe_pt_migrate_pt_update - Callback argument for pre-commit callbacks + * @base: Base we derive from. + * @bind: Whether this is a bind or an unbind operation. A bind operation + * makes the pre-commit callback error with -EAGAIN if it detects a + * pending invalidation. + * @locked: Whether the pre-commit callback locked the userptr notifier lock + * and it needs unlocking. + */ +struct xe_pt_migrate_pt_update { + struct xe_migrate_pt_update base; + bool bind; + bool locked; +}; + +static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) +{ + struct xe_pt_migrate_pt_update *userptr_update = + container_of(pt_update, typeof(*userptr_update), base); + struct xe_vma *vma = pt_update->vma; + unsigned long notifier_seq = vma->userptr.notifier_seq; + struct xe_vm *vm = vma->vm; + + userptr_update->locked = false; + + /* + * Wait until nobody is running the invalidation notifier, and + * since we're exiting the loop holding the notifier lock, + * nobody can proceed invalidating either. + * + * Note that we don't update the vma->userptr.notifier_seq since + * we don't update the userptr pages. + */ + do { + down_read(&vm->userptr.notifier_lock); + if (!mmu_interval_read_retry(&vma->userptr.notifier, + notifier_seq)) + break; + + up_read(&vm->userptr.notifier_lock); + + if (userptr_update->bind) + return -EAGAIN; + + notifier_seq = mmu_interval_read_begin(&vma->userptr.notifier); + } while (true); + + /* Inject errors to test_whether they are handled correctly */ + if (userptr_update->bind && xe_pt_userptr_inject_eagain(vma)) { + up_read(&vm->userptr.notifier_lock); + return -EAGAIN; + } + + userptr_update->locked = true; + + return 0; +} + +static const struct xe_migrate_pt_update_ops bind_ops = { + .populate = xe_vm_populate_pgtable, +}; + +static const struct xe_migrate_pt_update_ops userptr_bind_ops = { + .populate = xe_vm_populate_pgtable, + .pre_commit = xe_pt_userptr_pre_commit, +}; + +/** + * __xe_pt_bind_vma() - Build and connect a page-table tree for the vma + * address range. + * @gt: The gt to bind for. + * @vma: The vma to bind. + * @e: The engine with which to do pipelined page-table updates. + * @syncs: Entries to sync on before binding the built tree to the live vm tree. + * @num_syncs: Number of @sync entries. + * @rebind: Whether we're rebinding this vma to the same address range without + * an unbind in-between. + * + * This function builds a page-table tree (see xe_pt_stage_bind() for more + * information on page-table building), and the xe_vm_pgtable_update entries + * abstracting the operations needed to attach it to the main vm tree. It + * then takes the relevant locks and updates the metadata side of the main + * vm tree and submits the operations for pipelined attachment of the + * gpu page-table to the vm main tree, (which can be done either by the + * cpu and the GPU). + * + * Return: A valid dma-fence representing the pipelined attachment operation + * on success, an error pointer on error. + */ +struct dma_fence * +__xe_pt_bind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e, + struct xe_sync_entry *syncs, u32 num_syncs, + bool rebind) +{ + struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1]; + struct xe_pt_migrate_pt_update bind_pt_update = { + .base = { + .ops = xe_vma_is_userptr(vma) ? &userptr_bind_ops : &bind_ops, + .vma = vma, + }, + .bind = true, + }; + struct xe_vm *vm = vma->vm; + u32 num_entries; + struct dma_fence *fence; + int err; + + bind_pt_update.locked = false; + xe_bo_assert_held(vma->bo); + xe_vm_assert_held(vm); + XE_BUG_ON(xe_gt_is_media_type(gt)); + + vm_dbg(&vma->vm->xe->drm, + "Preparing bind, with range [%llx...%llx) engine %p.\n", + vma->start, vma->end, e); + + err = xe_pt_prepare_bind(gt, vma, entries, &num_entries, rebind); + if (err) + goto err; + XE_BUG_ON(num_entries > ARRAY_SIZE(entries)); + + xe_vm_dbg_print_entries(gt_to_xe(gt), entries, num_entries); + + fence = xe_migrate_update_pgtables(gt->migrate, + vm, vma->bo, + e ? e : vm->eng[gt->info.id], + entries, num_entries, + syncs, num_syncs, + &bind_pt_update.base); + if (!IS_ERR(fence)) { + LLIST_HEAD(deferred); + + /* add shared fence now for pagetable delayed destroy */ + dma_resv_add_fence(&vm->resv, fence, !rebind && + vma->last_munmap_rebind ? + DMA_RESV_USAGE_KERNEL : + DMA_RESV_USAGE_BOOKKEEP); + + if (!xe_vma_is_userptr(vma) && !vma->bo->vm) + dma_resv_add_fence(vma->bo->ttm.base.resv, fence, + DMA_RESV_USAGE_BOOKKEEP); + xe_pt_commit_bind(vma, entries, num_entries, rebind, + bind_pt_update.locked ? &deferred : NULL); + + /* This vma is live (again?) now */ + vma->gt_present |= BIT(gt->info.id); + + if (bind_pt_update.locked) { + vma->userptr.initial_bind = true; + up_read(&vm->userptr.notifier_lock); + xe_bo_put_commit(&deferred); + } + if (!rebind && vma->last_munmap_rebind && + xe_vm_in_compute_mode(vm)) + queue_work(vm->xe->ordered_wq, + &vm->preempt.rebind_work); + } else { + if (bind_pt_update.locked) + up_read(&vm->userptr.notifier_lock); + xe_pt_abort_bind(vma, entries, num_entries); + } + + return fence; + +err: + return ERR_PTR(err); +} + +struct xe_pt_stage_unbind_walk { + /** @base: The pagewalk base-class. */ + struct xe_pt_walk base; + + /* Input parameters for the walk */ + /** @gt: The gt we're unbinding from. */ + struct xe_gt *gt; + + /** + * @modified_start: Walk range start, modified to include any + * shared pagetables that we're the only user of and can thus + * treat as private. + */ + u64 modified_start; + /** @modified_end: Walk range start, modified like @modified_start. */ + u64 modified_end; + + /* Output */ + /* @wupd: Structure to track the page-table updates we're building */ + struct xe_walk_update wupd; +}; + +/* + * Check whether this range is the only one populating this pagetable, + * and in that case, update the walk range checks so that higher levels don't + * view us as a shared pagetable. + */ +static bool xe_pt_check_kill(u64 addr, u64 next, unsigned int level, + const struct xe_pt *child, + enum page_walk_action *action, + struct xe_pt_walk *walk) +{ + struct xe_pt_stage_unbind_walk *xe_walk = + container_of(walk, typeof(*xe_walk), base); + unsigned int shift = walk->shifts[level]; + u64 size = 1ull << shift; + + if (IS_ALIGNED(addr, size) && IS_ALIGNED(next, size) && + ((next - addr) >> shift) == child->num_live) { + u64 size = 1ull << walk->shifts[level + 1]; + + *action = ACTION_CONTINUE; + + if (xe_walk->modified_start >= addr) + xe_walk->modified_start = round_down(addr, size); + if (xe_walk->modified_end <= next) + xe_walk->modified_end = round_up(next, size); + + return true; + } + + return false; +} + +static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset, + unsigned int level, u64 addr, u64 next, + struct xe_ptw **child, + enum page_walk_action *action, + struct xe_pt_walk *walk) +{ + struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); + + XE_BUG_ON(!*child); + XE_BUG_ON(!level && xe_child->is_compact); + + xe_pt_check_kill(addr, next, level - 1, xe_child, action, walk); + + return 0; +} + +static int +xe_pt_stage_unbind_post_descend(struct xe_ptw *parent, pgoff_t offset, + unsigned int level, u64 addr, u64 next, + struct xe_ptw **child, + enum page_walk_action *action, + struct xe_pt_walk *walk) +{ + struct xe_pt_stage_unbind_walk *xe_walk = + container_of(walk, typeof(*xe_walk), base); + struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); + pgoff_t end_offset; + u64 size = 1ull << walk->shifts[--level]; + + if (!IS_ALIGNED(addr, size)) + addr = xe_walk->modified_start; + if (!IS_ALIGNED(next, size)) + next = xe_walk->modified_end; + + /* Parent == *child is the root pt. Don't kill it. */ + if (parent != *child && + xe_pt_check_kill(addr, next, level, xe_child, action, walk)) + return 0; + + if (!xe_pt_nonshared_offsets(addr, next, level, walk, action, &offset, + &end_offset)) + return 0; + + (void)xe_pt_new_shared(&xe_walk->wupd, xe_child, offset, false); + xe_walk->wupd.updates[level].update->qwords = end_offset - offset; + + return 0; +} + +static const struct xe_pt_walk_ops xe_pt_stage_unbind_ops = { + .pt_entry = xe_pt_stage_unbind_entry, + .pt_post_descend = xe_pt_stage_unbind_post_descend, +}; + +/** + * xe_pt_stage_unbind() - Build page-table update structures for an unbind + * operation + * @gt: The gt we're unbinding for. + * @vma: The vma we're unbinding. + * @entries: Caller-provided storage for the update structures. + * + * Builds page-table update structures for an unbind operation. The function + * will attempt to remove all page-tables that we're the only user + * of, and for that to work, the unbind operation must be committed in the + * same critical section that blocks racing binds to the same page-table tree. + * + * Return: The number of entries used. + */ +static unsigned int xe_pt_stage_unbind(struct xe_gt *gt, struct xe_vma *vma, + struct xe_vm_pgtable_update *entries) +{ + struct xe_pt_stage_unbind_walk xe_walk = { + .base = { + .ops = &xe_pt_stage_unbind_ops, + .shifts = xe_normal_pt_shifts, + .max_level = XE_PT_HIGHEST_LEVEL, + }, + .gt = gt, + .modified_start = vma->start, + .modified_end = vma->end + 1, + .wupd.entries = entries, + }; + struct xe_pt *pt = vma->vm->pt_root[gt->info.id]; + + (void)xe_pt_walk_shared(&pt->base, pt->level, vma->start, vma->end + 1, + &xe_walk.base); + + return xe_walk.wupd.num_used_entries; +} + +static void +xe_migrate_clear_pgtable_callback(struct xe_migrate_pt_update *pt_update, + struct xe_gt *gt, struct iosys_map *map, + void *ptr, u32 qword_ofs, u32 num_qwords, + const struct xe_vm_pgtable_update *update) +{ + struct xe_vma *vma = pt_update->vma; + u64 empty = __xe_pt_empty_pte(gt, vma->vm, update->pt->level); + int i; + + XE_BUG_ON(xe_gt_is_media_type(gt)); + + if (map && map->is_iomem) + for (i = 0; i < num_qwords; ++i) + xe_map_wr(gt_to_xe(gt), map, (qword_ofs + i) * + sizeof(u64), u64, empty); + else if (map) + memset64(map->vaddr + qword_ofs * sizeof(u64), empty, + num_qwords); + else + memset64(ptr, empty, num_qwords); +} + +static void +xe_pt_commit_unbind(struct xe_vma *vma, + struct xe_vm_pgtable_update *entries, u32 num_entries, + struct llist_head *deferred) +{ + u32 j; + + xe_pt_commit_locks_assert(vma); + + for (j = 0; j < num_entries; ++j) { + struct xe_vm_pgtable_update *entry = &entries[j]; + struct xe_pt *pt = entry->pt; + + pt->num_live -= entry->qwords; + if (pt->level) { + struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt); + u32 i; + + for (i = entry->ofs; i < entry->ofs + entry->qwords; + i++) { + if (xe_pt_entry(pt_dir, i)) + xe_pt_destroy(xe_pt_entry(pt_dir, i), + vma->vm->flags, deferred); + + pt_dir->dir.entries[i] = NULL; + } + } + } +} + +static const struct xe_migrate_pt_update_ops unbind_ops = { + .populate = xe_migrate_clear_pgtable_callback, +}; + +static const struct xe_migrate_pt_update_ops userptr_unbind_ops = { + .populate = xe_migrate_clear_pgtable_callback, + .pre_commit = xe_pt_userptr_pre_commit, +}; + +/** + * __xe_pt_unbind_vma() - Disconnect and free a page-table tree for the vma + * address range. + * @gt: The gt to unbind for. + * @vma: The vma to unbind. + * @e: The engine with which to do pipelined page-table updates. + * @syncs: Entries to sync on before disconnecting the tree to be destroyed. + * @num_syncs: Number of @sync entries. + * + * This function builds a the xe_vm_pgtable_update entries abstracting the + * operations needed to detach the page-table tree to be destroyed from the + * man vm tree. + * It then takes the relevant locks and submits the operations for + * pipelined detachment of the gpu page-table from the vm main tree, + * (which can be done either by the cpu and the GPU), Finally it frees the + * detached page-table tree. + * + * Return: A valid dma-fence representing the pipelined detachment operation + * on success, an error pointer on error. + */ +struct dma_fence * +__xe_pt_unbind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e, + struct xe_sync_entry *syncs, u32 num_syncs) +{ + struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1]; + struct xe_pt_migrate_pt_update unbind_pt_update = { + .base = { + .ops = xe_vma_is_userptr(vma) ? &userptr_unbind_ops : + &unbind_ops, + .vma = vma, + }, + }; + struct xe_vm *vm = vma->vm; + u32 num_entries; + struct dma_fence *fence = NULL; + LLIST_HEAD(deferred); + + xe_bo_assert_held(vma->bo); + xe_vm_assert_held(vm); + XE_BUG_ON(xe_gt_is_media_type(gt)); + + vm_dbg(&vma->vm->xe->drm, + "Preparing unbind, with range [%llx...%llx) engine %p.\n", + vma->start, vma->end, e); + + num_entries = xe_pt_stage_unbind(gt, vma, entries); + XE_BUG_ON(num_entries > ARRAY_SIZE(entries)); + + xe_vm_dbg_print_entries(gt_to_xe(gt), entries, num_entries); + + /* + * Even if we were already evicted and unbind to destroy, we need to + * clear again here. The eviction may have updated pagetables at a + * lower level, because it needs to be more conservative. + */ + fence = xe_migrate_update_pgtables(gt->migrate, + vm, NULL, e ? e : + vm->eng[gt->info.id], + entries, num_entries, + syncs, num_syncs, + &unbind_pt_update.base); + if (!IS_ERR(fence)) { + /* add shared fence now for pagetable delayed destroy */ + dma_resv_add_fence(&vm->resv, fence, + DMA_RESV_USAGE_BOOKKEEP); + + /* This fence will be installed by caller when doing eviction */ + if (!xe_vma_is_userptr(vma) && !vma->bo->vm) + dma_resv_add_fence(vma->bo->ttm.base.resv, fence, + DMA_RESV_USAGE_BOOKKEEP); + xe_pt_commit_unbind(vma, entries, num_entries, + unbind_pt_update.locked ? &deferred : NULL); + vma->gt_present &= ~BIT(gt->info.id); + } + + if (!vma->gt_present) + list_del_init(&vma->rebind_link); + + if (unbind_pt_update.locked) { + XE_WARN_ON(!xe_vma_is_userptr(vma)); + + if (!vma->gt_present) { + spin_lock(&vm->userptr.invalidated_lock); + list_del_init(&vma->userptr.invalidate_link); + spin_unlock(&vm->userptr.invalidated_lock); + } + up_read(&vm->userptr.notifier_lock); + xe_bo_put_commit(&deferred); + } + + return fence; +} diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h new file mode 100644 index 000000000000..1152043e5c63 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pt.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ +#ifndef _XE_PT_H_ +#define _XE_PT_H_ + +#include + +#include "xe_pt_types.h" + +struct dma_fence; +struct xe_bo; +struct xe_device; +struct xe_engine; +struct xe_gt; +struct xe_sync_entry; +struct xe_vm; +struct xe_vma; + +#define xe_pt_write(xe, map, idx, data) \ + xe_map_wr(xe, map, (idx) * sizeof(u64), u64, data) + +unsigned int xe_pt_shift(unsigned int level); + +struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_gt *gt, + unsigned int level); + +int xe_pt_create_scratch(struct xe_device *xe, struct xe_gt *gt, + struct xe_vm *vm); + +void xe_pt_populate_empty(struct xe_gt *gt, struct xe_vm *vm, + struct xe_pt *pt); + +void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred); + +struct dma_fence * +__xe_pt_bind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e, + struct xe_sync_entry *syncs, u32 num_syncs, + bool rebind); + +struct dma_fence * +__xe_pt_unbind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e, + struct xe_sync_entry *syncs, u32 num_syncs); + +bool xe_pt_zap_ptes(struct xe_gt *gt, struct xe_vma *vma); + +u64 gen8_pde_encode(struct xe_bo *bo, u64 bo_offset, + const enum xe_cache_level level); + +u64 gen8_pte_encode(struct xe_vma *vma, struct xe_bo *bo, + u64 offset, enum xe_cache_level cache, + u32 flags, u32 pt_level); +#endif diff --git a/drivers/gpu/drm/xe/xe_pt_types.h b/drivers/gpu/drm/xe/xe_pt_types.h new file mode 100644 index 000000000000..2ed64c0a4485 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pt_types.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_PT_TYPES_H_ +#define _XE_PT_TYPES_H_ + +#include "xe_pt_walk.h" + +enum xe_cache_level { + XE_CACHE_NONE, + XE_CACHE_WT, + XE_CACHE_WB, +}; + +#define XE_VM_MAX_LEVEL 4 + +struct xe_pt { + struct xe_ptw base; + struct xe_bo *bo; + unsigned int level; + unsigned int num_live; + bool rebind; + bool is_compact; +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) + /** addr: Virtual address start address of the PT. */ + u64 addr; +#endif +}; + +struct xe_pt_entry { + struct xe_pt *pt; + u64 pte; +}; + +struct xe_vm_pgtable_update { + /** @bo: page table bo to write to */ + struct xe_bo *pt_bo; + + /** @ofs: offset inside this PTE to begin writing to (in qwords) */ + u32 ofs; + + /** @qwords: number of PTE's to write */ + u32 qwords; + + /** @pt: opaque pointer useful for the caller of xe_migrate_update_pgtables */ + struct xe_pt *pt; + + /** @pt_entries: Newly added pagetable entries */ + struct xe_pt_entry *pt_entries; + + /** @flags: Target flags */ + u32 flags; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_pt_walk.c b/drivers/gpu/drm/xe/xe_pt_walk.c new file mode 100644 index 000000000000..0def89af4372 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pt_walk.c @@ -0,0 +1,160 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright © 2022 Intel Corporation + */ +#include "xe_pt_walk.h" + +/** + * DOC: GPU page-table tree walking. + * The utilities in this file are similar to the CPU page-table walk + * utilities in mm/pagewalk.c. The main difference is that we distinguish + * the various levels of a page-table tree with an unsigned integer rather + * than by name. 0 is the lowest level, and page-tables with level 0 can + * not be directories pointing to lower levels, whereas all other levels + * can. The user of the utilities determines the highest level. + * + * Nomenclature: + * Each struct xe_ptw, regardless of level is referred to as a page table, and + * multiple page tables typically form a page table tree with page tables at + * intermediate levels being page directories pointing at page tables at lower + * levels. A shared page table for a given address range is a page-table which + * is neither fully within nor fully outside the address range and that can + * thus be shared by two or more address ranges. + * + * Please keep this code generic so that it can used as a drm-wide page- + * table walker should other drivers find use for it. + */ +static u64 xe_pt_addr_end(u64 addr, u64 end, unsigned int level, + const struct xe_pt_walk *walk) +{ + u64 size = 1ull << walk->shifts[level]; + u64 tmp = round_up(addr + 1, size); + + return min_t(u64, tmp, end); +} + +static bool xe_pt_next(pgoff_t *offset, u64 *addr, u64 next, u64 end, + unsigned int level, const struct xe_pt_walk *walk) +{ + pgoff_t step = 1; + + /* Shared pt walk skips to the last pagetable */ + if (unlikely(walk->shared_pt_mode)) { + unsigned int shift = walk->shifts[level]; + u64 skip_to = round_down(end, 1ull << shift); + + if (skip_to > next) { + step += (skip_to - next) >> shift; + next = skip_to; + } + } + + *addr = next; + *offset += step; + + return next != end; +} + +/** + * xe_pt_walk_range() - Walk a range of a gpu page table tree with callbacks + * for each page-table entry in all levels. + * @parent: The root page table for walk start. + * @level: The root page table level. + * @addr: Virtual address start. + * @end: Virtual address end + 1. + * @walk: Walk info. + * + * Similar to the CPU page-table walker, this is a helper to walk + * a gpu page table and call a provided callback function for each entry. + * + * Return: 0 on success, negative error code on error. The error is + * propagated from the callback and on error the walk is terminated. + */ +int xe_pt_walk_range(struct xe_ptw *parent, unsigned int level, + u64 addr, u64 end, struct xe_pt_walk *walk) +{ + pgoff_t offset = xe_pt_offset(addr, level, walk); + struct xe_ptw **entries = parent->dir ? parent->dir->entries : NULL; + const struct xe_pt_walk_ops *ops = walk->ops; + enum page_walk_action action; + struct xe_ptw *child; + int err = 0; + u64 next; + + do { + next = xe_pt_addr_end(addr, end, level, walk); + if (walk->shared_pt_mode && xe_pt_covers(addr, next, level, + walk)) + continue; +again: + action = ACTION_SUBTREE; + child = entries ? entries[offset] : NULL; + err = ops->pt_entry(parent, offset, level, addr, next, + &child, &action, walk); + if (err) + break; + + /* Probably not needed yet for gpu pagetable walk. */ + if (unlikely(action == ACTION_AGAIN)) + goto again; + + if (likely(!level || !child || action == ACTION_CONTINUE)) + continue; + + err = xe_pt_walk_range(child, level - 1, addr, next, walk); + + if (!err && ops->pt_post_descend) + err = ops->pt_post_descend(parent, offset, level, addr, + next, &child, &action, walk); + if (err) + break; + + } while (xe_pt_next(&offset, &addr, next, end, level, walk)); + + return err; +} + +/** + * xe_pt_walk_shared() - Walk shared page tables of a page-table tree. + * @parent: Root page table directory. + * @level: Level of the root. + * @addr: Start address. + * @end: Last address + 1. + * @walk: Walk info. + * + * This function is similar to xe_pt_walk_range() but it skips page tables + * that are private to the range. Since the root (or @parent) page table is + * typically also a shared page table this function is different in that it + * calls the pt_entry callback and the post_descend callback also for the + * root. The root can be detected in the callbacks by checking whether + * parent == *child. + * Walking only the shared page tables is common for unbind-type operations + * where the page-table entries for an address range are cleared or detached + * from the main page-table tree. + * + * Return: 0 on success, negative error code on error: If a callback + * returns an error, the walk will be terminated and the error returned by + * this function. + */ +int xe_pt_walk_shared(struct xe_ptw *parent, unsigned int level, + u64 addr, u64 end, struct xe_pt_walk *walk) +{ + const struct xe_pt_walk_ops *ops = walk->ops; + enum page_walk_action action = ACTION_SUBTREE; + struct xe_ptw *child = parent; + int err; + + walk->shared_pt_mode = true; + err = walk->ops->pt_entry(parent, 0, level + 1, addr, end, + &child, &action, walk); + + if (err || action != ACTION_SUBTREE) + return err; + + err = xe_pt_walk_range(parent, level, addr, end, walk); + if (!err && ops->pt_post_descend) { + err = ops->pt_post_descend(parent, 0, level + 1, addr, end, + &child, &action, walk); + } + return err; +} diff --git a/drivers/gpu/drm/xe/xe_pt_walk.h b/drivers/gpu/drm/xe/xe_pt_walk.h new file mode 100644 index 000000000000..42c51fa601ec --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pt_walk.h @@ -0,0 +1,161 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright © 2022 Intel Corporation + */ +#ifndef __XE_PT_WALK__ +#define __XE_PT_WALK__ + +#include +#include + +struct xe_ptw_dir; + +/** + * struct xe_ptw - base class for driver pagetable subclassing. + * @dir: Pointer to an array of children if any. + * + * Drivers could subclass this, and if it's a page-directory, typically + * embed the xe_ptw_dir::entries array in the same allocation. + */ +struct xe_ptw { + struct xe_ptw_dir *dir; +}; + +/** + * struct xe_ptw_dir - page directory structure + * @entries: Array holding page directory children. + * + * It is the responsibility of the user to ensure @entries is + * correctly sized. + */ +struct xe_ptw_dir { + struct xe_ptw *entries[0]; +}; + +/** + * struct xe_pt_walk - Embeddable struct for walk parameters + */ +struct xe_pt_walk { + /** @ops: The walk ops used for the pagewalk */ + const struct xe_pt_walk_ops *ops; + /** + * @shifts: Array of page-table entry shifts used for the + * different levels, starting out with the leaf level 0 + * page-shift as the first entry. It's legal for this pointer to be + * changed during the walk. + */ + const u64 *shifts; + /** @max_level: Highest populated level in @sizes */ + unsigned int max_level; + /** + * @shared_pt_mode: Whether to skip all entries that are private + * to the address range and called only for entries that are + * shared with other address ranges. Such entries are referred to + * as shared pagetables. + */ + bool shared_pt_mode; +}; + +/** + * typedef xe_pt_entry_fn - gpu page-table-walk callback-function + * @parent: The parent page.table. + * @offset: The offset (number of entries) into the page table. + * @level: The level of @parent. + * @addr: The virtual address. + * @next: The virtual address for the next call, or end address. + * @child: Pointer to pointer to child page-table at this @offset. The + * function may modify the value pointed to if, for example, allocating a + * child page table. + * @action: The walk action to take upon return. See . + * @walk: The walk parameters. + */ +typedef int (*xe_pt_entry_fn)(struct xe_ptw *parent, pgoff_t offset, + unsigned int level, u64 addr, u64 next, + struct xe_ptw **child, + enum page_walk_action *action, + struct xe_pt_walk *walk); + +/** + * struct xe_pt_walk_ops - Walk callbacks. + */ +struct xe_pt_walk_ops { + /** + * @pt_entry: Callback to be called for each page table entry prior + * to descending to the next level. The returned value of the action + * function parameter is honored. + */ + xe_pt_entry_fn pt_entry; + /** + * @pt_post_descend: Callback to be called for each page table entry + * after return from descending to the next level. The returned value + * of the action function parameter is ignored. + */ + xe_pt_entry_fn pt_post_descend; +}; + +int xe_pt_walk_range(struct xe_ptw *parent, unsigned int level, + u64 addr, u64 end, struct xe_pt_walk *walk); + +int xe_pt_walk_shared(struct xe_ptw *parent, unsigned int level, + u64 addr, u64 end, struct xe_pt_walk *walk); + +/** + * xe_pt_covers - Whether the address range covers an entire entry in @level + * @addr: Start of the range. + * @end: End of range + 1. + * @level: Page table level. + * @walk: Page table walk info. + * + * This function is a helper to aid in determining whether a leaf page table + * entry can be inserted at this @level. + * + * Return: Whether the range provided covers exactly an entry at this level. + */ +static inline bool xe_pt_covers(u64 addr, u64 end, unsigned int level, + const struct xe_pt_walk *walk) +{ + u64 pt_size = 1ull << walk->shifts[level]; + + return end - addr == pt_size && IS_ALIGNED(addr, pt_size); +} + +/** + * xe_pt_num_entries: Number of page-table entries of a given range at this + * level + * @addr: Start address. + * @end: End address. + * @level: Page table level. + * @walk: Walk info. + * + * Return: The number of page table entries at this level between @start and + * @end. + */ +static inline pgoff_t +xe_pt_num_entries(u64 addr, u64 end, unsigned int level, + const struct xe_pt_walk *walk) +{ + u64 pt_size = 1ull << walk->shifts[level]; + + return (round_up(end, pt_size) - round_down(addr, pt_size)) >> + walk->shifts[level]; +} + +/** + * xe_pt_offset: Offset of the page-table entry for a given address. + * @addr: The address. + * @level: Page table level. + * @walk: Walk info. + * + * Return: The page table entry offset for the given address in a + * page table with size indicated by @level. + */ +static inline pgoff_t +xe_pt_offset(u64 addr, unsigned int level, const struct xe_pt_walk *walk) +{ + if (level < walk->max_level) + addr &= ((1ull << walk->shifts[level + 1]) - 1); + + return addr >> walk->shifts[level]; +} + +#endif diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c new file mode 100644 index 000000000000..6e904e97f456 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_query.c @@ -0,0 +1,387 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include +#include +#include + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_macros.h" +#include "xe_query.h" +#include "xe_ggtt.h" +#include "xe_guc_hwconfig.h" + +static const enum xe_engine_class xe_to_user_engine_class[] = { + [XE_ENGINE_CLASS_RENDER] = DRM_XE_ENGINE_CLASS_RENDER, + [XE_ENGINE_CLASS_COPY] = DRM_XE_ENGINE_CLASS_COPY, + [XE_ENGINE_CLASS_VIDEO_DECODE] = DRM_XE_ENGINE_CLASS_VIDEO_DECODE, + [XE_ENGINE_CLASS_VIDEO_ENHANCE] = DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE, + [XE_ENGINE_CLASS_COMPUTE] = DRM_XE_ENGINE_CLASS_COMPUTE, +}; + +static size_t calc_hw_engine_info_size(struct xe_device *xe) +{ + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + struct xe_gt *gt; + u8 gt_id; + int i = 0; + + for_each_gt(gt, xe, gt_id) + for_each_hw_engine(hwe, gt, id) { + if (xe_hw_engine_is_reserved(hwe)) + continue; + i++; + } + + return i * sizeof(struct drm_xe_engine_class_instance); +} + +static int query_engines(struct xe_device *xe, + struct drm_xe_device_query *query) +{ + size_t size = calc_hw_engine_info_size(xe); + struct drm_xe_engine_class_instance __user *query_ptr = + u64_to_user_ptr(query->data); + struct drm_xe_engine_class_instance *hw_engine_info; + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + struct xe_gt *gt; + u8 gt_id; + int i = 0; + + if (query->size == 0) { + query->size = size; + return 0; + } else if (XE_IOCTL_ERR(xe, query->size != size)) { + return -EINVAL; + } + + hw_engine_info = kmalloc(size, GFP_KERNEL); + if (XE_IOCTL_ERR(xe, !hw_engine_info)) + return -ENOMEM; + + for_each_gt(gt, xe, gt_id) + for_each_hw_engine(hwe, gt, id) { + if (xe_hw_engine_is_reserved(hwe)) + continue; + + hw_engine_info[i].engine_class = + xe_to_user_engine_class[hwe->class]; + hw_engine_info[i].engine_instance = + hwe->logical_instance; + hw_engine_info[i++].gt_id = gt->info.id; + } + + if (copy_to_user(query_ptr, hw_engine_info, size)) { + kfree(hw_engine_info); + return -EFAULT; + } + kfree(hw_engine_info); + + return 0; +} + +static size_t calc_memory_usage_size(struct xe_device *xe) +{ + u32 num_managers = 1; + int i; + + for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) + if (ttm_manager_type(&xe->ttm, i)) + num_managers++; + + return offsetof(struct drm_xe_query_mem_usage, regions[num_managers]); +} + +static int query_memory_usage(struct xe_device *xe, + struct drm_xe_device_query *query) +{ + size_t size = calc_memory_usage_size(xe); + struct drm_xe_query_mem_usage *usage; + struct drm_xe_query_mem_usage __user *query_ptr = + u64_to_user_ptr(query->data); + struct ttm_resource_manager *man; + int ret, i; + + if (query->size == 0) { + query->size = size; + return 0; + } else if (XE_IOCTL_ERR(xe, query->size != size)) { + return -EINVAL; + } + + usage = kmalloc(size, GFP_KERNEL); + if (XE_IOCTL_ERR(xe, !usage)) + return -ENOMEM; + + usage->pad = 0; + + man = ttm_manager_type(&xe->ttm, XE_PL_TT); + usage->regions[0].mem_class = XE_MEM_REGION_CLASS_SYSMEM; + usage->regions[0].instance = 0; + usage->regions[0].pad = 0; + usage->regions[0].min_page_size = PAGE_SIZE; + usage->regions[0].max_page_size = PAGE_SIZE; + usage->regions[0].total_size = man->size << PAGE_SHIFT; + usage->regions[0].used = ttm_resource_manager_usage(man); + usage->num_regions = 1; + + for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) { + man = ttm_manager_type(&xe->ttm, i); + if (man) { + usage->regions[usage->num_regions].mem_class = + XE_MEM_REGION_CLASS_VRAM; + usage->regions[usage->num_regions].instance = + usage->num_regions; + usage->regions[usage->num_regions].pad = 0; + usage->regions[usage->num_regions].min_page_size = + xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? + SZ_64K : PAGE_SIZE; + usage->regions[usage->num_regions].max_page_size = + SZ_1G; + usage->regions[usage->num_regions].total_size = + man->size; + usage->regions[usage->num_regions++].used = + ttm_resource_manager_usage(man); + } + } + + if (!copy_to_user(query_ptr, usage, size)) + ret = 0; + else + ret = -ENOSPC; + + kfree(usage); + return ret; +} + +static int query_config(struct xe_device *xe, struct drm_xe_device_query *query) +{ + u32 num_params = XE_QUERY_CONFIG_NUM_PARAM; + size_t size = + sizeof(struct drm_xe_query_config) + num_params * sizeof(u64); + struct drm_xe_query_config __user *query_ptr = + u64_to_user_ptr(query->data); + struct drm_xe_query_config *config; + + if (query->size == 0) { + query->size = size; + return 0; + } else if (XE_IOCTL_ERR(xe, query->size != size)) { + return -EINVAL; + } + + config = kzalloc(size, GFP_KERNEL); + if (XE_IOCTL_ERR(xe, !config)) + return -ENOMEM; + + config->num_params = num_params; + config->info[XE_QUERY_CONFIG_REV_AND_DEVICE_ID] = + xe->info.devid | (xe->info.revid << 16); + if (to_gt(xe)->mem.vram.size) + config->info[XE_QUERY_CONFIG_FLAGS] = + XE_QUERY_CONFIG_FLAGS_HAS_VRAM; + if (xe->info.enable_guc) + config->info[XE_QUERY_CONFIG_FLAGS] |= + XE_QUERY_CONFIG_FLAGS_USE_GUC; + config->info[XE_QUERY_CONFIG_MIN_ALIGNEMENT] = + xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K; + config->info[XE_QUERY_CONFIG_VA_BITS] = 12 + + (9 * (xe->info.vm_max_level + 1)); + config->info[XE_QUERY_CONFIG_GT_COUNT] = xe->info.tile_count; + config->info[XE_QUERY_CONFIG_MEM_REGION_COUNT] = + hweight_long(xe->info.mem_region_mask); + + if (copy_to_user(query_ptr, config, size)) { + kfree(config); + return -EFAULT; + } + kfree(config); + + return 0; +} + +static int query_gts(struct xe_device *xe, struct drm_xe_device_query *query) +{ + struct xe_gt *gt; + size_t size = sizeof(struct drm_xe_query_gts) + + xe->info.tile_count * sizeof(struct drm_xe_query_gt); + struct drm_xe_query_gts __user *query_ptr = + u64_to_user_ptr(query->data); + struct drm_xe_query_gts *gts; + u8 id; + + if (query->size == 0) { + query->size = size; + return 0; + } else if (XE_IOCTL_ERR(xe, query->size != size)) { + return -EINVAL; + } + + gts = kzalloc(size, GFP_KERNEL); + if (XE_IOCTL_ERR(xe, !gts)) + return -ENOMEM; + + gts->num_gt = xe->info.tile_count; + for_each_gt(gt, xe, id) { + if (id == 0) + gts->gts[id].type = XE_QUERY_GT_TYPE_MAIN; + else if (xe_gt_is_media_type(gt)) + gts->gts[id].type = XE_QUERY_GT_TYPE_MEDIA; + else + gts->gts[id].type = XE_QUERY_GT_TYPE_REMOTE; + gts->gts[id].instance = id; + gts->gts[id].clock_freq = gt->info.clock_freq; + if (!IS_DGFX(xe)) + gts->gts[id].native_mem_regions = 0x1; + else + gts->gts[id].native_mem_regions = + BIT(gt->info.vram_id) << 1; + gts->gts[id].slow_mem_regions = xe->info.mem_region_mask ^ + gts->gts[id].native_mem_regions; + } + + if (copy_to_user(query_ptr, gts, size)) { + kfree(gts); + return -EFAULT; + } + kfree(gts); + + return 0; +} + +static int query_hwconfig(struct xe_device *xe, + struct drm_xe_device_query *query) +{ + struct xe_gt *gt = xe_device_get_gt(xe, 0); + size_t size = xe_guc_hwconfig_size(>->uc.guc); + void __user *query_ptr = u64_to_user_ptr(query->data); + void *hwconfig; + + if (query->size == 0) { + query->size = size; + return 0; + } else if (XE_IOCTL_ERR(xe, query->size != size)) { + return -EINVAL; + } + + hwconfig = kzalloc(size, GFP_KERNEL); + if (XE_IOCTL_ERR(xe, !hwconfig)) + return -ENOMEM; + + xe_device_mem_access_get(xe); + xe_guc_hwconfig_copy(>->uc.guc, hwconfig); + xe_device_mem_access_put(xe); + + if (copy_to_user(query_ptr, hwconfig, size)) { + kfree(hwconfig); + return -EFAULT; + } + kfree(hwconfig); + + return 0; +} + +static size_t calc_topo_query_size(struct xe_device *xe) +{ + return xe->info.tile_count * + (3 * sizeof(struct drm_xe_query_topology_mask) + + sizeof_field(struct xe_gt, fuse_topo.g_dss_mask) + + sizeof_field(struct xe_gt, fuse_topo.c_dss_mask) + + sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss)); +} + +static void __user *copy_mask(void __user *ptr, + struct drm_xe_query_topology_mask *topo, + void *mask, size_t mask_size) +{ + topo->num_bytes = mask_size; + + if (copy_to_user(ptr, topo, sizeof(*topo))) + return ERR_PTR(-EFAULT); + ptr += sizeof(topo); + + if (copy_to_user(ptr, mask, mask_size)) + return ERR_PTR(-EFAULT); + ptr += mask_size; + + return ptr; +} + +static int query_gt_topology(struct xe_device *xe, + struct drm_xe_device_query *query) +{ + void __user *query_ptr = u64_to_user_ptr(query->data); + size_t size = calc_topo_query_size(xe); + struct drm_xe_query_topology_mask topo; + struct xe_gt *gt; + int id; + + if (query->size == 0) { + query->size = size; + return 0; + } else if (XE_IOCTL_ERR(xe, query->size != size)) { + return -EINVAL; + } + + for_each_gt(gt, xe, id) { + topo.gt_id = id; + + topo.type = XE_TOPO_DSS_GEOMETRY; + query_ptr = copy_mask(query_ptr, &topo, + gt->fuse_topo.g_dss_mask, + sizeof(gt->fuse_topo.g_dss_mask)); + if (IS_ERR(query_ptr)) + return PTR_ERR(query_ptr); + + topo.type = XE_TOPO_DSS_COMPUTE; + query_ptr = copy_mask(query_ptr, &topo, + gt->fuse_topo.c_dss_mask, + sizeof(gt->fuse_topo.c_dss_mask)); + if (IS_ERR(query_ptr)) + return PTR_ERR(query_ptr); + + topo.type = XE_TOPO_EU_PER_DSS; + query_ptr = copy_mask(query_ptr, &topo, + gt->fuse_topo.eu_mask_per_dss, + sizeof(gt->fuse_topo.eu_mask_per_dss)); + if (IS_ERR(query_ptr)) + return PTR_ERR(query_ptr); + } + + return 0; +} + +static int (* const xe_query_funcs[])(struct xe_device *xe, + struct drm_xe_device_query *query) = { + query_engines, + query_memory_usage, + query_config, + query_gts, + query_hwconfig, + query_gt_topology, +}; + +int xe_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct drm_xe_device_query *query = data; + u32 idx; + + if (XE_IOCTL_ERR(xe, query->extensions != 0)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, query->query > ARRAY_SIZE(xe_query_funcs))) + return -EINVAL; + + idx = array_index_nospec(query->query, ARRAY_SIZE(xe_query_funcs)); + if (XE_IOCTL_ERR(xe, !xe_query_funcs[idx])) + return -EINVAL; + + return xe_query_funcs[idx](xe, query); +} diff --git a/drivers/gpu/drm/xe/xe_query.h b/drivers/gpu/drm/xe/xe_query.h new file mode 100644 index 000000000000..beeb7a8192b4 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_query.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_QUERY_H_ +#define _XE_QUERY_H_ + +struct drm_device; +struct drm_file; + +int xe_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file); + +#endif diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c new file mode 100644 index 000000000000..16e025dcf2cc --- /dev/null +++ b/drivers/gpu/drm/xe/xe_reg_sr.c @@ -0,0 +1,248 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_reg_sr.h" + +#include +#include +#include + +#include +#include + +#include "xe_rtp_types.h" +#include "xe_device_types.h" +#include "xe_force_wake.h" +#include "xe_gt.h" +#include "xe_gt_mcr.h" +#include "xe_macros.h" +#include "xe_mmio.h" + +#include "gt/intel_engine_regs.h" +#include "gt/intel_gt_regs.h" + +#define XE_REG_SR_GROW_STEP_DEFAULT 16 + +static void reg_sr_fini(struct drm_device *drm, void *arg) +{ + struct xe_reg_sr *sr = arg; + + xa_destroy(&sr->xa); + kfree(sr->pool.arr); + memset(&sr->pool, 0, sizeof(sr->pool)); +} + +int xe_reg_sr_init(struct xe_reg_sr *sr, const char *name, struct xe_device *xe) +{ + xa_init(&sr->xa); + memset(&sr->pool, 0, sizeof(sr->pool)); + sr->pool.grow_step = XE_REG_SR_GROW_STEP_DEFAULT; + sr->name = name; + + return drmm_add_action_or_reset(&xe->drm, reg_sr_fini, sr); +} + +int xe_reg_sr_dump_kv(struct xe_reg_sr *sr, + struct xe_reg_sr_kv **dst) +{ + struct xe_reg_sr_kv *iter; + struct xe_reg_sr_entry *entry; + unsigned long idx; + + if (xa_empty(&sr->xa)) { + *dst = NULL; + return 0; + } + + *dst = kmalloc_array(sr->pool.used, sizeof(**dst), GFP_KERNEL); + if (!*dst) + return -ENOMEM; + + iter = *dst; + xa_for_each(&sr->xa, idx, entry) { + iter->k = idx; + iter->v = *entry; + iter++; + } + + return 0; +} + +static struct xe_reg_sr_entry *alloc_entry(struct xe_reg_sr *sr) +{ + if (sr->pool.used == sr->pool.allocated) { + struct xe_reg_sr_entry *arr; + + arr = krealloc_array(sr->pool.arr, + ALIGN(sr->pool.allocated + 1, sr->pool.grow_step), + sizeof(*arr), GFP_KERNEL); + if (!arr) + return NULL; + + sr->pool.arr = arr; + sr->pool.allocated += sr->pool.grow_step; + } + + return &sr->pool.arr[sr->pool.used++]; +} + +static bool compatible_entries(const struct xe_reg_sr_entry *e1, + const struct xe_reg_sr_entry *e2) +{ + /* + * Don't allow overwriting values: clr_bits/set_bits should be disjoint + * when operating in the same register + */ + if (e1->clr_bits & e2->clr_bits || e1->set_bits & e2->set_bits || + e1->clr_bits & e2->set_bits || e1->set_bits & e2->clr_bits) + return false; + + if (e1->masked_reg != e2->masked_reg) + return false; + + if (e1->reg_type != e2->reg_type) + return false; + + return true; +} + +int xe_reg_sr_add(struct xe_reg_sr *sr, u32 reg, + const struct xe_reg_sr_entry *e) +{ + unsigned long idx = reg; + struct xe_reg_sr_entry *pentry = xa_load(&sr->xa, idx); + int ret; + + if (pentry) { + if (!compatible_entries(pentry, e)) { + ret = -EINVAL; + goto fail; + } + + pentry->clr_bits |= e->clr_bits; + pentry->set_bits |= e->set_bits; + pentry->read_mask |= e->read_mask; + + return 0; + } + + pentry = alloc_entry(sr); + if (!pentry) { + ret = -ENOMEM; + goto fail; + } + + *pentry = *e; + ret = xa_err(xa_store(&sr->xa, idx, pentry, GFP_KERNEL)); + if (ret) + goto fail; + + return 0; + +fail: + DRM_ERROR("Discarding save-restore reg %04lx (clear: %08x, set: %08x, masked: %s): ret=%d\n", + idx, e->clr_bits, e->set_bits, + str_yes_no(e->masked_reg), ret); + + return ret; +} + +static void apply_one_mmio(struct xe_gt *gt, u32 reg, + struct xe_reg_sr_entry *entry) +{ + struct xe_device *xe = gt_to_xe(gt); + u32 val; + + /* + * If this is a masked register, need to figure what goes on the upper + * 16 bits: it's either the clr_bits (when using FIELD_SET and WR) or + * the set_bits, when using SET. + * + * When it's not masked, we have to read it from hardware, unless we are + * supposed to set all bits. + */ + if (entry->masked_reg) + val = (entry->clr_bits ?: entry->set_bits << 16); + else if (entry->clr_bits + 1) + val = (entry->reg_type == XE_RTP_REG_MCR ? + xe_gt_mcr_unicast_read_any(gt, MCR_REG(reg)) : + xe_mmio_read32(gt, reg)) & (~entry->clr_bits); + else + val = 0; + + /* + * TODO: add selftest to validate all tables, regardless of platform: + * - Masked registers can't have set_bits with upper bits set + * - set_bits must be contained in clr_bits + */ + val |= entry->set_bits; + + drm_dbg(&xe->drm, "REG[0x%x] = 0x%08x", reg, val); + + if (entry->reg_type == XE_RTP_REG_MCR) + xe_gt_mcr_multicast_write(gt, MCR_REG(reg), val); + else + xe_mmio_write32(gt, reg, val); +} + +void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + struct xe_reg_sr_entry *entry; + unsigned long reg; + int err; + + drm_dbg(&xe->drm, "Applying %s save-restore MMIOs\n", sr->name); + + err = xe_force_wake_get(>->mmio.fw, XE_FORCEWAKE_ALL); + if (err) + goto err_force_wake; + + xa_for_each(&sr->xa, reg, entry) + apply_one_mmio(gt, reg, entry); + + err = xe_force_wake_put(>->mmio.fw, XE_FORCEWAKE_ALL); + XE_WARN_ON(err); + + return; + +err_force_wake: + drm_err(&xe->drm, "Failed to apply, err=%d\n", err); +} + +void xe_reg_sr_apply_whitelist(struct xe_reg_sr *sr, u32 mmio_base, + struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + struct xe_reg_sr_entry *entry; + unsigned long reg; + unsigned int slot = 0; + int err; + + drm_dbg(&xe->drm, "Whitelisting %s registers\n", sr->name); + + err = xe_force_wake_get(>->mmio.fw, XE_FORCEWAKE_ALL); + if (err) + goto err_force_wake; + + xa_for_each(&sr->xa, reg, entry) { + xe_mmio_write32(gt, RING_FORCE_TO_NONPRIV(mmio_base, slot).reg, + reg | entry->set_bits); + slot++; + } + + /* And clear the rest just in case of garbage */ + for (; slot < RING_MAX_NONPRIV_SLOTS; slot++) + xe_mmio_write32(gt, RING_FORCE_TO_NONPRIV(mmio_base, slot).reg, + RING_NOPID(mmio_base).reg); + + err = xe_force_wake_put(>->mmio.fw, XE_FORCEWAKE_ALL); + XE_WARN_ON(err); + + return; + +err_force_wake: + drm_err(&xe->drm, "Failed to apply, err=%d\n", err); +} diff --git a/drivers/gpu/drm/xe/xe_reg_sr.h b/drivers/gpu/drm/xe/xe_reg_sr.h new file mode 100644 index 000000000000..c3a9db251e92 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_reg_sr.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_REG_SR_ +#define _XE_REG_SR_ + +#include "xe_reg_sr_types.h" + +/* + * Reg save/restore bookkeeping + */ + +struct xe_device; +struct xe_gt; + +int xe_reg_sr_init(struct xe_reg_sr *sr, const char *name, struct xe_device *xe); +int xe_reg_sr_dump_kv(struct xe_reg_sr *sr, + struct xe_reg_sr_kv **dst); + +int xe_reg_sr_add(struct xe_reg_sr *sr, u32 reg, + const struct xe_reg_sr_entry *e); +void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt); +void xe_reg_sr_apply_whitelist(struct xe_reg_sr *sr, u32 mmio_base, + struct xe_gt *gt); + +#endif diff --git a/drivers/gpu/drm/xe/xe_reg_sr_types.h b/drivers/gpu/drm/xe/xe_reg_sr_types.h new file mode 100644 index 000000000000..2fa7ff3966ba --- /dev/null +++ b/drivers/gpu/drm/xe/xe_reg_sr_types.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_REG_SR_TYPES_ +#define _XE_REG_SR_TYPES_ + +#include +#include + +#include "i915_reg_defs.h" + +struct xe_reg_sr_entry { + u32 clr_bits; + u32 set_bits; + /* Mask for bits to consider when reading value back */ + u32 read_mask; + /* + * "Masked registers" are marked in spec as register with the upper 16 + * bits as a mask for the bits that is being updated on the lower 16 + * bits when writing to it. + */ + u8 masked_reg; + u8 reg_type; +}; + +struct xe_reg_sr_kv { + u32 k; + struct xe_reg_sr_entry v; +}; + +struct xe_reg_sr { + struct { + struct xe_reg_sr_entry *arr; + unsigned int used; + unsigned int allocated; + unsigned int grow_step; + } pool; + struct xarray xa; + const char *name; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c new file mode 100644 index 000000000000..2e0c87b72395 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include "xe_reg_whitelist.h" + +#include "xe_platform_types.h" +#include "xe_gt_types.h" +#include "xe_rtp.h" + +#include "../i915/gt/intel_engine_regs.h" +#include "../i915/gt/intel_gt_regs.h" + +#undef _MMIO +#undef MCR_REG +#define _MMIO(x) _XE_RTP_REG(x) +#define MCR_REG(x) _XE_RTP_MCR_REG(x) + +static bool match_not_render(const struct xe_gt *gt, + const struct xe_hw_engine *hwe) +{ + return hwe->class != XE_ENGINE_CLASS_RENDER; +} + +static const struct xe_rtp_entry register_whitelist[] = { + { XE_RTP_NAME("WaAllowPMDepthAndInvocationCountAccessFromUMD, 1408556865"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), + XE_WHITELIST_REGISTER(PS_INVOCATION_COUNT, + RING_FORCE_TO_NONPRIV_ACCESS_RD | + RING_FORCE_TO_NONPRIV_RANGE_4) + }, + { XE_RTP_NAME("1508744258, 14012131227, 1808121037"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), + XE_WHITELIST_REGISTER(GEN7_COMMON_SLICE_CHICKEN1, 0) + }, + { XE_RTP_NAME("1806527549"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), + XE_WHITELIST_REGISTER(HIZ_CHICKEN, 0) + }, + { XE_RTP_NAME("allow_read_ctx_timestamp"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1260), FUNC(match_not_render)), + XE_WHITELIST_REGISTER(RING_CTX_TIMESTAMP(0), + RING_FORCE_TO_NONPRIV_ACCESS_RD, + XE_RTP_FLAG(ENGINE_BASE)) + }, + { XE_RTP_NAME("16014440446_part_1"), + XE_RTP_RULES(PLATFORM(PVC)), + XE_WHITELIST_REGISTER(_MMIO(0x4400), + RING_FORCE_TO_NONPRIV_DENY | + RING_FORCE_TO_NONPRIV_RANGE_64) + }, + { XE_RTP_NAME("16014440446_part_2"), + XE_RTP_RULES(PLATFORM(PVC)), + XE_WHITELIST_REGISTER(_MMIO(0x4500), + RING_FORCE_TO_NONPRIV_DENY | + RING_FORCE_TO_NONPRIV_RANGE_64) + }, + {} +}; + +/** + * xe_reg_whitelist_process_engine - process table of registers to whitelist + * @hwe: engine instance to process whitelist for + * + * Process wwhitelist table for this platform, saving in @hwe all the + * registers that need to be whitelisted by the hardware so they can be accessed + * by userspace. + */ +void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe) +{ + xe_rtp_process(register_whitelist, &hwe->reg_whitelist, hwe->gt, hwe); +} diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.h b/drivers/gpu/drm/xe/xe_reg_whitelist.h new file mode 100644 index 000000000000..6e861b1bdb01 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_REG_WHITELIST_ +#define _XE_REG_WHITELIST_ + +struct xe_hw_engine; + +void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_res_cursor.h b/drivers/gpu/drm/xe/xe_res_cursor.h new file mode 100644 index 000000000000..f54409850d74 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_res_cursor.h @@ -0,0 +1,226 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XE_RES_CURSOR_H__ +#define __XE_RES_CURSOR_H__ + +#include + +#include +#include +#include +#include +#include + +#include "xe_bo.h" +#include "xe_macros.h" +#include "xe_ttm_vram_mgr.h" + +/* state back for walking over vram_mgr and gtt_mgr allocations */ +struct xe_res_cursor { + u64 start; + u64 size; + u64 remaining; + void *node; + u32 mem_type; + struct scatterlist *sgl; +}; + +/** + * xe_res_first - initialize a xe_res_cursor + * + * @res: TTM resource object to walk + * @start: Start of the range + * @size: Size of the range + * @cur: cursor object to initialize + * + * Start walking over the range of allocations between @start and @size. + */ +static inline void xe_res_first(struct ttm_resource *res, + u64 start, u64 size, + struct xe_res_cursor *cur) +{ + struct drm_buddy_block *block; + struct list_head *head, *next; + + cur->sgl = NULL; + if (!res) + goto fallback; + + XE_BUG_ON(start + size > res->size); + + cur->mem_type = res->mem_type; + + switch (cur->mem_type) { + case XE_PL_VRAM0: + case XE_PL_VRAM1: + head = &to_xe_ttm_vram_mgr_resource(res)->blocks; + + block = list_first_entry_or_null(head, + struct drm_buddy_block, + link); + if (!block) + goto fallback; + + while (start >= xe_ttm_vram_mgr_block_size(block)) { + start -= xe_ttm_vram_mgr_block_size(block); + + next = block->link.next; + if (next != head) + block = list_entry(next, struct drm_buddy_block, + link); + } + + cur->start = xe_ttm_vram_mgr_block_start(block) + start; + cur->size = min(xe_ttm_vram_mgr_block_size(block) - start, + size); + cur->remaining = size; + cur->node = block; + break; + default: + goto fallback; + } + + return; + +fallback: + cur->start = start; + cur->size = size; + cur->remaining = size; + cur->node = NULL; + cur->mem_type = XE_PL_TT; + XE_WARN_ON(res && start + size > res->size); + return; +} + +static inline void __xe_res_sg_next(struct xe_res_cursor *cur) +{ + struct scatterlist *sgl = cur->sgl; + u64 start = cur->start; + + while (start >= sg_dma_len(sgl)) { + start -= sg_dma_len(sgl); + sgl = sg_next(sgl); + XE_BUG_ON(!sgl); + } + + cur->start = start; + cur->size = sg_dma_len(sgl) - start; + cur->sgl = sgl; +} + +/** + * xe_res_first_sg - initialize a xe_res_cursor with a scatter gather table + * + * @sg: scatter gather table to walk + * @start: Start of the range + * @size: Size of the range + * @cur: cursor object to initialize + * + * Start walking over the range of allocations between @start and @size. + */ +static inline void xe_res_first_sg(const struct sg_table *sg, + u64 start, u64 size, + struct xe_res_cursor *cur) +{ + XE_BUG_ON(!sg); + XE_BUG_ON(!IS_ALIGNED(start, PAGE_SIZE) || + !IS_ALIGNED(size, PAGE_SIZE)); + cur->node = NULL; + cur->start = start; + cur->remaining = size; + cur->size = 0; + cur->sgl = sg->sgl; + cur->mem_type = XE_PL_TT; + __xe_res_sg_next(cur); +} + +/** + * xe_res_next - advance the cursor + * + * @cur: the cursor to advance + * @size: number of bytes to move forward + * + * Move the cursor @size bytes forwrad, walking to the next node if necessary. + */ +static inline void xe_res_next(struct xe_res_cursor *cur, u64 size) +{ + struct drm_buddy_block *block; + struct list_head *next; + u64 start; + + XE_BUG_ON(size > cur->remaining); + + cur->remaining -= size; + if (!cur->remaining) + return; + + if (cur->size > size) { + cur->size -= size; + cur->start += size; + return; + } + + if (cur->sgl) { + cur->start += size; + __xe_res_sg_next(cur); + return; + } + + switch (cur->mem_type) { + case XE_PL_VRAM0: + case XE_PL_VRAM1: + start = size - cur->size; + block = cur->node; + + next = block->link.next; + block = list_entry(next, struct drm_buddy_block, link); + + + while (start >= xe_ttm_vram_mgr_block_size(block)) { + start -= xe_ttm_vram_mgr_block_size(block); + + next = block->link.next; + block = list_entry(next, struct drm_buddy_block, link); + } + + cur->start = xe_ttm_vram_mgr_block_start(block) + start; + cur->size = min(xe_ttm_vram_mgr_block_size(block) - start, + cur->remaining); + cur->node = block; + break; + default: + return; + } +} + +/** + * xe_res_dma - return dma address of cursor at current position + * + * @cur: the cursor to return the dma address from + */ +static inline u64 xe_res_dma(const struct xe_res_cursor *cur) +{ + return cur->sgl ? sg_dma_address(cur->sgl) + cur->start : cur->start; +} +#endif diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c new file mode 100644 index 000000000000..fda7978a63e0 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -0,0 +1,373 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_engine_types.h" +#include "xe_gt.h" +#include "xe_lrc.h" +#include "xe_macros.h" +#include "xe_ring_ops.h" +#include "xe_sched_job.h" +#include "xe_vm_types.h" + +#include "i915_reg.h" +#include "gt/intel_gpu_commands.h" +#include "gt/intel_gt_regs.h" +#include "gt/intel_lrc_reg.h" + +static u32 preparser_disable(bool state) +{ + return MI_ARB_CHECK | BIT(8) | state; +} + +static int emit_aux_table_inv(struct xe_gt *gt, u32 addr, u32 *dw, int i) +{ + dw[i++] = MI_LOAD_REGISTER_IMM(1) | MI_LRI_MMIO_REMAP_EN; + dw[i++] = addr + gt->mmio.adj_offset; + dw[i++] = AUX_INV; + dw[i++] = MI_NOOP; + + return i; +} + +static int emit_user_interrupt(u32 *dw, int i) +{ + dw[i++] = MI_USER_INTERRUPT; + dw[i++] = MI_ARB_ON_OFF | MI_ARB_ENABLE; + dw[i++] = MI_ARB_CHECK; + + return i; +} + +static int emit_store_imm_ggtt(u32 addr, u32 value, u32 *dw, int i) +{ + dw[i++] = MI_STORE_DATA_IMM | BIT(22) /* GGTT */ | 2; + dw[i++] = addr; + dw[i++] = 0; + dw[i++] = value; + + return i; +} + +static int emit_flush_imm_ggtt(u32 addr, u32 value, u32 *dw, int i) +{ + dw[i++] = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; + dw[i++] = addr | MI_FLUSH_DW_USE_GTT; + dw[i++] = 0; + dw[i++] = value; + + return i; +} + +static int emit_bb_start(u64 batch_addr, u32 ppgtt_flag, u32 *dw, int i) +{ + dw[i++] = MI_BATCH_BUFFER_START_GEN8 | ppgtt_flag; + dw[i++] = lower_32_bits(batch_addr); + dw[i++] = upper_32_bits(batch_addr); + + return i; +} + +static int emit_flush_invalidate(u32 flag, u32 *dw, int i) +{ + dw[i] = MI_FLUSH_DW + 1; + dw[i] |= flag; + dw[i++] |= MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW | + MI_FLUSH_DW_STORE_INDEX; + + dw[i++] = LRC_PPHWSP_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; + dw[i++] = 0; + dw[i++] = ~0U; + + return i; +} + +static int emit_pipe_invalidate(u32 mask_flags, u32 *dw, int i) +{ + u32 flags = PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_COMMAND_CACHE_INVALIDATE | + PIPE_CONTROL_TLB_INVALIDATE | + PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE | + PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | + PIPE_CONTROL_VF_CACHE_INVALIDATE | + PIPE_CONTROL_CONST_CACHE_INVALIDATE | + PIPE_CONTROL_STATE_CACHE_INVALIDATE | + PIPE_CONTROL_QW_WRITE | + PIPE_CONTROL_STORE_DATA_INDEX; + + flags &= ~mask_flags; + + dw[i++] = GFX_OP_PIPE_CONTROL(6); + dw[i++] = flags; + dw[i++] = LRC_PPHWSP_SCRATCH_ADDR; + dw[i++] = 0; + dw[i++] = 0; + dw[i++] = 0; + + return i; +} + +#define MI_STORE_QWORD_IMM_GEN8_POSTED (MI_INSTR(0x20, 3) | (1 << 21)) + +static int emit_store_imm_ppgtt_posted(u64 addr, u64 value, + u32 *dw, int i) +{ + dw[i++] = MI_STORE_QWORD_IMM_GEN8_POSTED; + dw[i++] = lower_32_bits(addr); + dw[i++] = upper_32_bits(addr); + dw[i++] = lower_32_bits(value); + dw[i++] = upper_32_bits(value); + + return i; +} + +static int emit_pipe_imm_ggtt(u32 addr, u32 value, bool stall_only, u32 *dw, + int i) +{ + dw[i++] = GFX_OP_PIPE_CONTROL(6); + dw[i++] = (stall_only ? PIPE_CONTROL_CS_STALL : + PIPE_CONTROL_FLUSH_ENABLE | PIPE_CONTROL_CS_STALL) | + PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_QW_WRITE; + dw[i++] = addr; + dw[i++] = 0; + dw[i++] = value; + dw[i++] = 0; /* We're thrashing one extra dword. */ + + return i; +} + +static u32 get_ppgtt_flag(struct xe_sched_job *job) +{ + return !(job->engine->flags & ENGINE_FLAG_WA) ? BIT(8) : 0; +} + +static void __emit_job_gen12_copy(struct xe_sched_job *job, struct xe_lrc *lrc, + u64 batch_addr, u32 seqno) +{ + u32 dw[MAX_JOB_SIZE_DW], i = 0; + u32 ppgtt_flag = get_ppgtt_flag(job); + + /* XXX: Conditional flushing possible */ + dw[i++] = preparser_disable(true); + i = emit_flush_invalidate(0, dw, i); + dw[i++] = preparser_disable(false); + + i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), + seqno, dw, i); + + i = emit_bb_start(batch_addr, ppgtt_flag, dw, i); + + if (job->user_fence.used) + i = emit_store_imm_ppgtt_posted(job->user_fence.addr, + job->user_fence.value, + dw, i); + + i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, dw, i); + + i = emit_user_interrupt(dw, i); + + XE_BUG_ON(i > MAX_JOB_SIZE_DW); + + xe_lrc_write_ring(lrc, dw, i * sizeof(*dw)); +} + +static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, + u64 batch_addr, u32 seqno) +{ + u32 dw[MAX_JOB_SIZE_DW], i = 0; + u32 ppgtt_flag = get_ppgtt_flag(job); + struct xe_gt *gt = job->engine->gt; + struct xe_device *xe = gt_to_xe(gt); + bool decode = job->engine->class == XE_ENGINE_CLASS_VIDEO_DECODE; + + /* XXX: Conditional flushing possible */ + dw[i++] = preparser_disable(true); + i = emit_flush_invalidate(decode ? MI_INVALIDATE_BSD : 0, dw, i); + /* Wa_1809175790 */ + if (!xe->info.has_flat_ccs) { + if (decode) + i = emit_aux_table_inv(gt, GEN12_VD0_AUX_INV.reg, dw, i); + else + i = emit_aux_table_inv(gt, GEN12_VE0_AUX_INV.reg, dw, i); + } + dw[i++] = preparser_disable(false); + + i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), + seqno, dw, i); + + i = emit_bb_start(batch_addr, ppgtt_flag, dw, i); + + if (job->user_fence.used) + i = emit_store_imm_ppgtt_posted(job->user_fence.addr, + job->user_fence.value, + dw, i); + + i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, dw, i); + + i = emit_user_interrupt(dw, i); + + XE_BUG_ON(i > MAX_JOB_SIZE_DW); + + xe_lrc_write_ring(lrc, dw, i * sizeof(*dw)); +} + +/* + * 3D-related flags that can't be set on _engines_ that lack access to the 3D + * pipeline (i.e., CCS engines). + */ +#define PIPE_CONTROL_3D_ENGINE_FLAGS (\ + PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | \ + PIPE_CONTROL_DEPTH_CACHE_FLUSH | \ + PIPE_CONTROL_TILE_CACHE_FLUSH | \ + PIPE_CONTROL_DEPTH_STALL | \ + PIPE_CONTROL_STALL_AT_SCOREBOARD | \ + PIPE_CONTROL_PSD_SYNC | \ + PIPE_CONTROL_AMFS_FLUSH | \ + PIPE_CONTROL_VF_CACHE_INVALIDATE | \ + PIPE_CONTROL_GLOBAL_SNAPSHOT_RESET) + +/* 3D-related flags that can't be set on _platforms_ that lack a 3D pipeline */ +#define PIPE_CONTROL_3D_ARCH_FLAGS ( \ + PIPE_CONTROL_3D_ENGINE_FLAGS | \ + PIPE_CONTROL_INDIRECT_STATE_DISABLE | \ + PIPE_CONTROL_FLUSH_ENABLE | \ + PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | \ + PIPE_CONTROL_DC_FLUSH_ENABLE) + +static void __emit_job_gen12_render_compute(struct xe_sched_job *job, + struct xe_lrc *lrc, + u64 batch_addr, u32 seqno) +{ + u32 dw[MAX_JOB_SIZE_DW], i = 0; + u32 ppgtt_flag = get_ppgtt_flag(job); + struct xe_gt *gt = job->engine->gt; + struct xe_device *xe = gt_to_xe(gt); + bool pvc = xe->info.platform == XE_PVC; + u32 mask_flags = 0; + + /* XXX: Conditional flushing possible */ + dw[i++] = preparser_disable(true); + if (pvc) + mask_flags = PIPE_CONTROL_3D_ARCH_FLAGS; + else if (job->engine->class == XE_ENGINE_CLASS_COMPUTE) + mask_flags = PIPE_CONTROL_3D_ENGINE_FLAGS; + i = emit_pipe_invalidate(mask_flags, dw, i); + /* Wa_1809175790 */ + if (!xe->info.has_flat_ccs) + i = emit_aux_table_inv(gt, GEN12_CCS_AUX_INV.reg, dw, i); + dw[i++] = preparser_disable(false); + + i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), + seqno, dw, i); + + i = emit_bb_start(batch_addr, ppgtt_flag, dw, i); + + if (job->user_fence.used) + i = emit_store_imm_ppgtt_posted(job->user_fence.addr, + job->user_fence.value, + dw, i); + + i = emit_pipe_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, pvc, dw, i); + + i = emit_user_interrupt(dw, i); + + XE_BUG_ON(i > MAX_JOB_SIZE_DW); + + xe_lrc_write_ring(lrc, dw, i * sizeof(*dw)); +} + +static void emit_migration_job_gen12(struct xe_sched_job *job, + struct xe_lrc *lrc, u32 seqno) +{ + u32 dw[MAX_JOB_SIZE_DW], i = 0; + + i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), + seqno, dw, i); + + i = emit_bb_start(job->batch_addr[0], BIT(8), dw, i); + + dw[i++] = preparser_disable(true); + i = emit_flush_invalidate(0, dw, i); + dw[i++] = preparser_disable(false); + + i = emit_bb_start(job->batch_addr[1], BIT(8), dw, i); + + dw[i++] = (MI_FLUSH_DW | MI_INVALIDATE_TLB | job->migrate_flush_flags | + MI_FLUSH_DW_OP_STOREDW) + 1; + dw[i++] = xe_lrc_seqno_ggtt_addr(lrc) | MI_FLUSH_DW_USE_GTT; + dw[i++] = 0; + dw[i++] = seqno; /* value */ + + i = emit_user_interrupt(dw, i); + + XE_BUG_ON(i > MAX_JOB_SIZE_DW); + + xe_lrc_write_ring(lrc, dw, i * sizeof(*dw)); +} + +static void emit_job_gen12_copy(struct xe_sched_job *job) +{ + int i; + + if (xe_sched_job_is_migration(job->engine)) { + emit_migration_job_gen12(job, job->engine->lrc, + xe_sched_job_seqno(job)); + return; + } + + for (i = 0; i < job->engine->width; ++i) + __emit_job_gen12_copy(job, job->engine->lrc + i, + job->batch_addr[i], + xe_sched_job_seqno(job)); +} + +static void emit_job_gen12_video(struct xe_sched_job *job) +{ + int i; + + /* FIXME: Not doing parallel handshake for now */ + for (i = 0; i < job->engine->width; ++i) + __emit_job_gen12_video(job, job->engine->lrc + i, + job->batch_addr[i], + xe_sched_job_seqno(job)); +} + +static void emit_job_gen12_render_compute(struct xe_sched_job *job) +{ + int i; + + for (i = 0; i < job->engine->width; ++i) + __emit_job_gen12_render_compute(job, job->engine->lrc + i, + job->batch_addr[i], + xe_sched_job_seqno(job)); +} + +static const struct xe_ring_ops ring_ops_gen12_copy = { + .emit_job = emit_job_gen12_copy, +}; + +static const struct xe_ring_ops ring_ops_gen12_video = { + .emit_job = emit_job_gen12_video, +}; + +static const struct xe_ring_ops ring_ops_gen12_render_compute = { + .emit_job = emit_job_gen12_render_compute, +}; + +const struct xe_ring_ops * +xe_ring_ops_get(struct xe_gt *gt, enum xe_engine_class class) +{ + switch (class) { + case XE_ENGINE_CLASS_COPY: + return &ring_ops_gen12_copy; + case XE_ENGINE_CLASS_VIDEO_DECODE: + case XE_ENGINE_CLASS_VIDEO_ENHANCE: + return &ring_ops_gen12_video; + case XE_ENGINE_CLASS_RENDER: + case XE_ENGINE_CLASS_COMPUTE: + return &ring_ops_gen12_render_compute; + default: + return NULL; + } +} diff --git a/drivers/gpu/drm/xe/xe_ring_ops.h b/drivers/gpu/drm/xe/xe_ring_ops.h new file mode 100644 index 000000000000..e942735d76a6 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_ring_ops.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_RING_OPS_H_ +#define _XE_RING_OPS_H_ + +#include "xe_hw_engine_types.h" +#include "xe_ring_ops_types.h" + +struct xe_gt; + +const struct xe_ring_ops * +xe_ring_ops_get(struct xe_gt *gt, enum xe_engine_class class); + +#endif diff --git a/drivers/gpu/drm/xe/xe_ring_ops_types.h b/drivers/gpu/drm/xe/xe_ring_ops_types.h new file mode 100644 index 000000000000..1ae56e2ee7b4 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_ring_ops_types.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_RING_OPS_TYPES_H_ +#define _XE_RING_OPS_TYPES_H_ + +struct xe_sched_job; + +#define MAX_JOB_SIZE_DW 48 +#define MAX_JOB_SIZE_BYTES (MAX_JOB_SIZE_DW * 4) + +/** + * struct xe_ring_ops - Ring operations + */ +struct xe_ring_ops { + /** @emit_job: Write job to ring */ + void (*emit_job)(struct xe_sched_job *job); +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c new file mode 100644 index 000000000000..9e8d0e43c643 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_rtp.h" + +#include + +#include "xe_gt.h" +#include "xe_macros.h" +#include "xe_reg_sr.h" + +/** + * DOC: Register Table Processing + * + * Internal infrastructure to define how registers should be updated based on + * rules and actions. This can be used to define tables with multiple entries + * (one per register) that will be walked over at some point in time to apply + * the values to the registers that have matching rules. + */ + +static bool rule_matches(struct xe_gt *gt, + struct xe_hw_engine *hwe, + const struct xe_rtp_entry *entry) +{ + const struct xe_device *xe = gt_to_xe(gt); + const struct xe_rtp_rule *r; + unsigned int i; + bool match; + + for (r = entry->rules, i = 0; i < entry->n_rules; + r = &entry->rules[++i]) { + switch (r->match_type) { + case XE_RTP_MATCH_PLATFORM: + match = xe->info.platform == r->platform; + break; + case XE_RTP_MATCH_SUBPLATFORM: + match = xe->info.platform == r->platform && + xe->info.subplatform == r->subplatform; + break; + case XE_RTP_MATCH_GRAPHICS_VERSION: + /* TODO: match display */ + match = xe->info.graphics_verx100 == r->ver_start; + break; + case XE_RTP_MATCH_GRAPHICS_VERSION_RANGE: + match = xe->info.graphics_verx100 >= r->ver_start && + xe->info.graphics_verx100 <= r->ver_end; + break; + case XE_RTP_MATCH_MEDIA_VERSION: + match = xe->info.media_verx100 == r->ver_start; + break; + case XE_RTP_MATCH_MEDIA_VERSION_RANGE: + match = xe->info.media_verx100 >= r->ver_start && + xe->info.media_verx100 <= r->ver_end; + break; + case XE_RTP_MATCH_STEP: + /* TODO: match media/display */ + match = xe->info.step.graphics >= r->step_start && + xe->info.step.graphics < r->step_end; + break; + case XE_RTP_MATCH_ENGINE_CLASS: + match = hwe->class == r->engine_class; + break; + case XE_RTP_MATCH_NOT_ENGINE_CLASS: + match = hwe->class != r->engine_class; + break; + case XE_RTP_MATCH_FUNC: + match = r->match_func(gt, hwe); + break; + case XE_RTP_MATCH_INTEGRATED: + match = !xe->info.is_dgfx; + break; + case XE_RTP_MATCH_DISCRETE: + match = xe->info.is_dgfx; + break; + + default: + XE_WARN_ON(r->match_type); + } + + if (!match) + return false; + } + + return true; +} + +static void rtp_add_sr_entry(const struct xe_rtp_entry *entry, + struct xe_gt *gt, + u32 mmio_base, + struct xe_reg_sr *sr) +{ + u32 reg = entry->regval.reg + mmio_base; + struct xe_reg_sr_entry sr_entry = { + .clr_bits = entry->regval.clr_bits, + .set_bits = entry->regval.set_bits, + .read_mask = entry->regval.read_mask, + .masked_reg = entry->regval.flags & XE_RTP_FLAG_MASKED_REG, + .reg_type = entry->regval.reg_type, + }; + + xe_reg_sr_add(sr, reg, &sr_entry); +} + +/** + * xe_rtp_process - Process all rtp @entries, adding the matching ones to @sr + * @entries: Table with RTP definitions + * @sr: Where to add an entry to with the values for matching. This can be + * viewed as the "coalesced view" of multiple the tables. The bits for each + * register set are expected not to collide with previously added entries + * @gt: The GT to be used for matching rules + * @hwe: Engine instance to use for matching rules and as mmio base + * + * Walk the table pointed by @entries (with an empty sentinel) and add all + * entries with matching rules to @sr. If @hwe is not NULL, its mmio_base is + * used to calculate the right register offset + */ +void xe_rtp_process(const struct xe_rtp_entry *entries, struct xe_reg_sr *sr, + struct xe_gt *gt, struct xe_hw_engine *hwe) +{ + const struct xe_rtp_entry *entry; + + for (entry = entries; entry && entry->name; entry++) { + u32 mmio_base = 0; + + if (entry->regval.flags & XE_RTP_FLAG_FOREACH_ENGINE) { + struct xe_hw_engine *each_hwe; + enum xe_hw_engine_id id; + + for_each_hw_engine(each_hwe, gt, id) { + mmio_base = each_hwe->mmio_base; + + if (rule_matches(gt, each_hwe, entry)) + rtp_add_sr_entry(entry, gt, mmio_base, sr); + } + } else if (rule_matches(gt, hwe, entry)) { + if (entry->regval.flags & XE_RTP_FLAG_ENGINE_BASE) + mmio_base = hwe->mmio_base; + + rtp_add_sr_entry(entry, gt, mmio_base, sr); + } + } +} diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h new file mode 100644 index 000000000000..d4e11fdde77f --- /dev/null +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -0,0 +1,340 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_RTP_ +#define _XE_RTP_ + +#include +#include + +#include "xe_rtp_types.h" + +#include "i915_reg_defs.h" + +/* + * Register table poke infrastructure + */ + +struct xe_hw_engine; +struct xe_gt; +struct xe_reg_sr; + +/* + * Helper macros - not to be used outside this header. + */ +/* This counts to 12. Any more, it will return 13th argument. */ +#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n +#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) + +#define __CONCAT(a, b) a ## b +#define CONCATENATE(a, b) __CONCAT(a, b) + +#define __CALL_FOR_EACH_1(MACRO_, x, ...) MACRO_(x) +#define __CALL_FOR_EACH_2(MACRO_, x, ...) \ + MACRO_(x) __CALL_FOR_EACH_1(MACRO_, ##__VA_ARGS__) +#define __CALL_FOR_EACH_3(MACRO_, x, ...) \ + MACRO_(x) __CALL_FOR_EACH_2(MACRO_, ##__VA_ARGS__) +#define __CALL_FOR_EACH_4(MACRO_, x, ...) \ + MACRO_(x) __CALL_FOR_EACH_3(MACRO_, ##__VA_ARGS__) + +#define _CALL_FOR_EACH(NARGS_, MACRO_, x, ...) \ + CONCATENATE(__CALL_FOR_EACH_, NARGS_)(MACRO_, x, ##__VA_ARGS__) +#define CALL_FOR_EACH(MACRO_, x, ...) \ + _CALL_FOR_EACH(COUNT_ARGS(x, ##__VA_ARGS__), MACRO_, x, ##__VA_ARGS__) + +#define _XE_RTP_REG(x_) (x_), \ + .reg_type = XE_RTP_REG_REGULAR +#define _XE_RTP_MCR_REG(x_) (x_), \ + .reg_type = XE_RTP_REG_MCR + +/* + * Helper macros for concatenating prefix - do not use them directly outside + * this header + */ +#define __ADD_XE_RTP_FLAG_PREFIX(x) CONCATENATE(XE_RTP_FLAG_, x) | +#define __ADD_XE_RTP_RULE_PREFIX(x) CONCATENATE(XE_RTP_RULE_, x) , + +/* + * Macros to encode rules to match against platform, IP version, stepping, etc. + * Shouldn't be used directly - see XE_RTP_RULES() + */ + +#define _XE_RTP_RULE_PLATFORM(plat__) \ + { .match_type = XE_RTP_MATCH_PLATFORM, .platform = plat__ } + +#define _XE_RTP_RULE_SUBPLATFORM(plat__, sub__) \ + { .match_type = XE_RTP_MATCH_SUBPLATFORM, \ + .platform = plat__, .subplatform = sub__ } + +#define _XE_RTP_RULE_STEP(start__, end__) \ + { .match_type = XE_RTP_MATCH_STEP, \ + .step_start = start__, .step_end = end__ } + +#define _XE_RTP_RULE_ENGINE_CLASS(cls__) \ + { .match_type = XE_RTP_MATCH_ENGINE_CLASS, \ + .engine_class = (cls__) } + +/** + * XE_RTP_RULE_PLATFORM - Create rule matching platform + * @plat_: platform to match + * + * Refer to XE_RTP_RULES() for expected usage. + */ +#define XE_RTP_RULE_PLATFORM(plat_) \ + _XE_RTP_RULE_PLATFORM(XE_##plat_) + +/** + * XE_RTP_RULE_SUBPLATFORM - Create rule matching platform and sub-platform + * @plat_: platform to match + * @sub_: sub-platform to match + * + * Refer to XE_RTP_RULES() for expected usage. + */ +#define XE_RTP_RULE_SUBPLATFORM(plat_, sub_) \ + _XE_RTP_RULE_SUBPLATFORM(XE_##plat_, XE_SUBPLATFORM_##plat_##_##sub_) + +/** + * XE_RTP_RULE_STEP - Create rule matching platform stepping + * @start_: First stepping matching the rule + * @end_: First stepping that does not match the rule + * + * Note that the range matching this rule [ @start_, @end_ ), i.e. inclusive on + * the left, exclusive on the right. + * + * Refer to XE_RTP_RULES() for expected usage. + */ +#define XE_RTP_RULE_STEP(start_, end_) \ + _XE_RTP_RULE_STEP(STEP_##start_, STEP_##end_) + +/** + * XE_RTP_RULE_ENGINE_CLASS - Create rule matching an engine class + * @cls_: Engine class to match + * + * Refer to XE_RTP_RULES() for expected usage. + */ +#define XE_RTP_RULE_ENGINE_CLASS(cls_) \ + _XE_RTP_RULE_ENGINE_CLASS(XE_ENGINE_CLASS_##cls_) + +/** + * XE_RTP_RULE_FUNC - Create rule using callback function for match + * @func__: Function to call to decide if rule matches + * + * This allows more complex checks to be performed. The ``XE_RTP`` + * infrastructure will simply call the function @func_ passed to decide if this + * rule matches the device. + * + * Refer to XE_RTP_RULES() for expected usage. + */ +#define XE_RTP_RULE_FUNC(func__) \ + { .match_type = XE_RTP_MATCH_FUNC, \ + .match_func = (func__) } + +/** + * XE_RTP_RULE_GRAPHICS_VERSION - Create rule matching graphics version + * @ver__: Graphics IP version to match + * + * Refer to XE_RTP_RULES() for expected usage. + */ +#define XE_RTP_RULE_GRAPHICS_VERSION(ver__) \ + { .match_type = XE_RTP_MATCH_GRAPHICS_VERSION, \ + .ver_start = ver__, } + +/** + * XE_RTP_RULE_GRAPHICS_VERSION_RANGE - Create rule matching a range of graphics version + * @ver_start__: First graphics IP version to match + * @ver_end__: Last graphics IP version to match + * + * Note that the range matching this rule is [ @ver_start__, @ver_end__ ], i.e. + * inclusive on boths sides + * + * Refer to XE_RTP_RULES() for expected usage. + */ +#define XE_RTP_RULE_GRAPHICS_VERSION_RANGE(ver_start__, ver_end__) \ + { .match_type = XE_RTP_MATCH_GRAPHICS_VERSION_RANGE, \ + .ver_start = ver_start__, .ver_end = ver_end__, } + +/** + * XE_RTP_RULE_MEDIA_VERSION - Create rule matching media version + * @ver__: Graphics IP version to match + * + * Refer to XE_RTP_RULES() for expected usage. + */ +#define XE_RTP_RULE_MEDIA_VERSION(ver__) \ + { .match_type = XE_RTP_MATCH_MEDIA_VERSION, \ + .ver_start = ver__, } + +/** + * XE_RTP_RULE_MEDIA_VERSION_RANGE - Create rule matching a range of media version + * @ver_start__: First media IP version to match + * @ver_end__: Last media IP version to match + * + * Note that the range matching this rule is [ @ver_start__, @ver_end__ ], i.e. + * inclusive on boths sides + * + * Refer to XE_RTP_RULES() for expected usage. + */ +#define XE_RTP_RULE_MEDIA_VERSION_RANGE(ver_start__, ver_end__) \ + { .match_type = XE_RTP_MATCH_MEDIA_VERSION_RANGE, \ + .ver_start = ver_start__, .ver_end = ver_end__, } + +/** + * XE_RTP_RULE_IS_INTEGRATED - Create a rule matching integrated graphics devices + * + * Refer to XE_RTP_RULES() for expected usage. + */ +#define XE_RTP_RULE_IS_INTEGRATED \ + { .match_type = XE_RTP_MATCH_INTEGRATED } + +/** + * XE_RTP_RULE_IS_DISCRETE - Create a rule matching discrete graphics devices + * + * Refer to XE_RTP_RULES() for expected usage. + */ +#define XE_RTP_RULE_IS_DISCRETE \ + { .match_type = XE_RTP_MATCH_DISCRETE } + +/** + * XE_RTP_WR - Helper to write a value to the register, overriding all the bits + * @reg_: Register + * @val_: Value to set + * @...: Additional fields to override in the struct xe_rtp_regval entry + * + * The correspondent notation in bspec is: + * + * REGNAME = VALUE + */ +#define XE_RTP_WR(reg_, val_, ...) \ + .regval = { .reg = reg_, .clr_bits = ~0u, .set_bits = (val_), \ + .read_mask = (~0u), ##__VA_ARGS__ } + +/** + * XE_RTP_SET - Set bits from @val_ in the register. + * @reg_: Register + * @val_: Bits to set in the register + * @...: Additional fields to override in the struct xe_rtp_regval entry + * + * For masked registers this translates to a single write, while for other + * registers it's a RMW. The correspondent bspec notation is (example for bits 2 + * and 5, but could be any): + * + * REGNAME[2] = 1 + * REGNAME[5] = 1 + */ +#define XE_RTP_SET(reg_, val_, ...) \ + .regval = { .reg = reg_, .clr_bits = (val_), .set_bits = (val_), \ + .read_mask = (val_), ##__VA_ARGS__ } + +/** + * XE_RTP_CLR: Clear bits from @val_ in the register. + * @reg_: Register + * @val_: Bits to clear in the register + * @...: Additional fields to override in the struct xe_rtp_regval entry + * + * For masked registers this translates to a single write, while for other + * registers it's a RMW. The correspondent bspec notation is (example for bits 2 + * and 5, but could be any): + * + * REGNAME[2] = 0 + * REGNAME[5] = 0 + */ +#define XE_RTP_CLR(reg_, val_, ...) \ + .regval = { .reg = reg_, .clr_bits = (val_), .set_bits = 0, \ + .read_mask = (val_), ##__VA_ARGS__ } + +/** + * XE_RTP_FIELD_SET: Set a bit range, defined by @mask_bits_, to the value in + * @reg_: Register + * @mask_bits_: Mask of bits to be changed in the register, forming a field + * @val_: Value to set in the field denoted by @mask_bits_ + * @...: Additional fields to override in the struct xe_rtp_regval entry + * + * For masked registers this translates to a single write, while for other + * registers it's a RMW. The correspondent bspec notation is: + * + * REGNAME[:] = VALUE + */ +#define XE_RTP_FIELD_SET(reg_, mask_bits_, val_, ...) \ + .regval = { .reg = reg_, .clr_bits = (mask_bits_), .set_bits = (val_),\ + .read_mask = (mask_bits_), ##__VA_ARGS__ } + +#define XE_RTP_FIELD_SET_NO_READ_MASK(reg_, mask_bits_, val_, ...) \ + .regval = { .reg = reg_, .clr_bits = (mask_bits_), .set_bits = (val_),\ + .read_mask = 0, ##__VA_ARGS__ } + +/** + * XE_WHITELIST_REGISTER - Add register to userspace whitelist + * @reg_: Register + * @flags_: Whitelist-specific flags to set + * @...: Additional fields to override in the struct xe_rtp_regval entry + * + * Add a register to the whitelist, allowing userspace to modify the ster with + * regular user privileges. + */ +#define XE_WHITELIST_REGISTER(reg_, flags_, ...) \ + /* TODO fail build if ((flags) & ~(RING_FORCE_TO_NONPRIV_MASK_VALID)) */\ + .regval = { .reg = reg_, .set_bits = (flags_), \ + .clr_bits = RING_FORCE_TO_NONPRIV_MASK_VALID, \ + ##__VA_ARGS__ } + +/** + * XE_RTP_NAME - Helper to set the name in xe_rtp_entry + * @s_: Name describing this rule, often a HW-specific number + * + * TODO: maybe move this behind a debug config? + */ +#define XE_RTP_NAME(s_) .name = (s_) + +/** + * XE_RTP_FLAG - Helper to add multiple flags to a struct xe_rtp_regval entry + * @f1_: Last part of a ``XE_RTP_FLAG_*`` + * @...: Additional flags, defined like @f1_ + * + * Helper to automatically add a ``XE_RTP_FLAG_`` prefix to @f1_ so it can be + * easily used to define struct xe_rtp_regval entries. Example: + * + * .. code-block:: c + * + * const struct xe_rtp_entry wa_entries[] = { + * ... + * { XE_RTP_NAME("test-entry"), + * XE_RTP_FLAG(FOREACH_ENGINE, MASKED_REG), + * ... + * }, + * ... + * }; + */ +#define XE_RTP_FLAG(f1_, ...) \ + .flags = (CALL_FOR_EACH(__ADD_XE_RTP_FLAG_PREFIX, f1_, ##__VA_ARGS__) 0) + +/** + * XE_RTP_RULES - Helper to set multiple rules to a struct xe_rtp_entry entry + * @r1: Last part of XE_RTP_MATCH_* + * @...: Additional rules, defined like @r1 + * + * At least one rule is needed and up to 4 are supported. Multiple rules are + * AND'ed together, i.e. all the rules must evaluate to true for the entry to + * be processed. See XE_RTP_MATCH_* for the possible match rules. Example: + * + * .. code-block:: c + * + * const struct xe_rtp_entry wa_entries[] = { + * ... + * { XE_RTP_NAME("test-entry"), + * XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), + * ... + * }, + * ... + * }; + */ +#define XE_RTP_RULES(r1, ...) \ + .n_rules = COUNT_ARGS(r1, ##__VA_ARGS__), \ + .rules = (struct xe_rtp_rule[]) { \ + CALL_FOR_EACH(__ADD_XE_RTP_RULE_PREFIX, r1, ##__VA_ARGS__) \ + } + +void xe_rtp_process(const struct xe_rtp_entry *entries, struct xe_reg_sr *sr, + struct xe_gt *gt, struct xe_hw_engine *hwe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h new file mode 100644 index 000000000000..b55b556a2495 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_rtp_types.h @@ -0,0 +1,105 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_RTP_TYPES_ +#define _XE_RTP_TYPES_ + +#include + +#include "i915_reg_defs.h" + +struct xe_hw_engine; +struct xe_gt; + +enum { + XE_RTP_REG_REGULAR, + XE_RTP_REG_MCR, +}; + +/** + * struct xe_rtp_regval - register and value for rtp table + */ +struct xe_rtp_regval { + /** @reg: Register */ + u32 reg; + /* + * TODO: maybe we need a union here with a func pointer for cases + * that are too specific to be generalized + */ + /** @clr_bits: bits to clear when updating register */ + u32 clr_bits; + /** @set_bits: bits to set when updating register */ + u32 set_bits; +#define XE_RTP_NOCHECK .read_mask = 0 + /** @read_mask: mask for bits to consider when reading value back */ + u32 read_mask; +#define XE_RTP_FLAG_FOREACH_ENGINE BIT(0) +#define XE_RTP_FLAG_MASKED_REG BIT(1) +#define XE_RTP_FLAG_ENGINE_BASE BIT(2) + /** @flags: flags to apply on rule evaluation or action */ + u8 flags; + /** @reg_type: register type, see ``XE_RTP_REG_*`` */ + u8 reg_type; +}; + +enum { + XE_RTP_MATCH_PLATFORM, + XE_RTP_MATCH_SUBPLATFORM, + XE_RTP_MATCH_GRAPHICS_VERSION, + XE_RTP_MATCH_GRAPHICS_VERSION_RANGE, + XE_RTP_MATCH_MEDIA_VERSION, + XE_RTP_MATCH_MEDIA_VERSION_RANGE, + XE_RTP_MATCH_INTEGRATED, + XE_RTP_MATCH_DISCRETE, + XE_RTP_MATCH_STEP, + XE_RTP_MATCH_ENGINE_CLASS, + XE_RTP_MATCH_NOT_ENGINE_CLASS, + XE_RTP_MATCH_FUNC, +}; + +/** struct xe_rtp_rule - match rule for processing entry */ +struct xe_rtp_rule { + u8 match_type; + + /* match filters */ + union { + /* MATCH_PLATFORM / MATCH_SUBPLATFORM */ + struct { + u8 platform; + u8 subplatform; + }; + /* + * MATCH_GRAPHICS_VERSION / XE_RTP_MATCH_GRAPHICS_VERSION_RANGE / + * MATCH_MEDIA_VERSION / XE_RTP_MATCH_MEDIA_VERSION_RANGE + */ + struct { + u32 ver_start; +#define XE_RTP_END_VERSION_UNDEFINED U32_MAX + u32 ver_end; + }; + /* MATCH_STEP */ + struct { + u8 step_start; + u8 step_end; + }; + /* MATCH_ENGINE_CLASS / MATCH_NOT_ENGINE_CLASS */ + struct { + u8 engine_class; + }; + /* MATCH_FUNC */ + bool (*match_func)(const struct xe_gt *gt, + const struct xe_hw_engine *hwe); + }; +}; + +/** struct xe_rtp_entry - Entry in an rtp table */ +struct xe_rtp_entry { + const char *name; + const struct xe_rtp_regval regval; + const struct xe_rtp_rule *rules; + unsigned int n_rules; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c new file mode 100644 index 000000000000..7403410cd806 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sa.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include +#include + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_map.h" +#include "xe_sa.h" + +static void xe_sa_bo_manager_fini(struct drm_device *drm, void *arg) +{ + struct xe_sa_manager *sa_manager = arg; + struct xe_bo *bo = sa_manager->bo; + + if (!bo) { + drm_err(drm, "no bo for sa manager\n"); + return; + } + + drm_suballoc_manager_fini(&sa_manager->base); + + if (bo->vmap.is_iomem) + kvfree(sa_manager->cpu_ptr); + + xe_bo_unpin_map_no_vm(bo); + sa_manager->bo = NULL; +} + +int xe_sa_bo_manager_init(struct xe_gt *gt, + struct xe_sa_manager *sa_manager, + u32 size, u32 align) +{ + struct xe_device *xe = gt_to_xe(gt); + u32 managed_size = size - SZ_4K; + struct xe_bo *bo; + + sa_manager->bo = NULL; + + bo = xe_bo_create_pin_map(xe, gt, NULL, size, ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(gt) | + XE_BO_CREATE_GGTT_BIT); + if (IS_ERR(bo)) { + drm_err(&xe->drm, "failed to allocate bo for sa manager: %ld\n", + PTR_ERR(bo)); + return PTR_ERR(bo); + } + sa_manager->bo = bo; + + drm_suballoc_manager_init(&sa_manager->base, managed_size, align); + sa_manager->gpu_addr = xe_bo_ggtt_addr(bo); + + if (bo->vmap.is_iomem) { + sa_manager->cpu_ptr = kvzalloc(managed_size, GFP_KERNEL); + if (!sa_manager->cpu_ptr) { + xe_bo_unpin_map_no_vm(sa_manager->bo); + sa_manager->bo = NULL; + return -ENOMEM; + } + } else { + sa_manager->cpu_ptr = bo->vmap.vaddr; + memset(sa_manager->cpu_ptr, 0, bo->ttm.base.size); + } + + return drmm_add_action_or_reset(&xe->drm, xe_sa_bo_manager_fini, + sa_manager); +} + +struct drm_suballoc *xe_sa_bo_new(struct xe_sa_manager *sa_manager, + unsigned size) +{ + return drm_suballoc_new(&sa_manager->base, size, GFP_KERNEL, true, 0); +} + +void xe_sa_bo_flush_write(struct drm_suballoc *sa_bo) +{ + struct xe_sa_manager *sa_manager = to_xe_sa_manager(sa_bo->manager); + struct xe_device *xe = gt_to_xe(sa_manager->bo->gt); + + if (!sa_manager->bo->vmap.is_iomem) + return; + + xe_map_memcpy_to(xe, &sa_manager->bo->vmap, drm_suballoc_soffset(sa_bo), + xe_sa_bo_cpu_addr(sa_bo), + drm_suballoc_size(sa_bo)); +} + +void xe_sa_bo_free(struct drm_suballoc *sa_bo, + struct dma_fence *fence) +{ + drm_suballoc_free(sa_bo, fence); +} diff --git a/drivers/gpu/drm/xe/xe_sa.h b/drivers/gpu/drm/xe/xe_sa.h new file mode 100644 index 000000000000..742282ef7179 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sa.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ +#ifndef _XE_SA_H_ +#define _XE_SA_H_ + +#include "xe_sa_types.h" + +struct dma_fence; +struct xe_bo; +struct xe_gt; + +int xe_sa_bo_manager_init(struct xe_gt *gt, + struct xe_sa_manager *sa_manager, + u32 size, u32 align); + +struct drm_suballoc *xe_sa_bo_new(struct xe_sa_manager *sa_manager, + u32 size); +void xe_sa_bo_flush_write(struct drm_suballoc *sa_bo); +void xe_sa_bo_free(struct drm_suballoc *sa_bo, + struct dma_fence *fence); + +static inline struct xe_sa_manager * +to_xe_sa_manager(struct drm_suballoc_manager *mng) +{ + return container_of(mng, struct xe_sa_manager, base); +} + +static inline u64 xe_sa_bo_gpu_addr(struct drm_suballoc *sa) +{ + return to_xe_sa_manager(sa->manager)->gpu_addr + + drm_suballoc_soffset(sa); +} + +static inline void *xe_sa_bo_cpu_addr(struct drm_suballoc *sa) +{ + return to_xe_sa_manager(sa->manager)->cpu_ptr + + drm_suballoc_soffset(sa); +} + +#endif diff --git a/drivers/gpu/drm/xe/xe_sa_types.h b/drivers/gpu/drm/xe/xe_sa_types.h new file mode 100644 index 000000000000..2ef896aeca1d --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sa_types.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ +#ifndef _XE_SA_TYPES_H_ +#define _XE_SA_TYPES_H_ + +#include + +struct xe_bo; + +struct xe_sa_manager { + struct drm_suballoc_manager base; + struct xe_bo *bo; + u64 gpu_addr; + void *cpu_ptr; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c new file mode 100644 index 000000000000..ab81bfe17e8a --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sched_job.c @@ -0,0 +1,246 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "xe_sched_job.h" + +#include +#include + +#include "xe_device_types.h" +#include "xe_engine.h" +#include "xe_gt.h" +#include "xe_hw_engine_types.h" +#include "xe_hw_fence.h" +#include "xe_lrc.h" +#include "xe_macros.h" +#include "xe_trace.h" +#include "xe_vm.h" + +static struct kmem_cache *xe_sched_job_slab; +static struct kmem_cache *xe_sched_job_parallel_slab; + +int __init xe_sched_job_module_init(void) +{ + xe_sched_job_slab = + kmem_cache_create("xe_sched_job", + sizeof(struct xe_sched_job) + + sizeof(u64), 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!xe_sched_job_slab) + return -ENOMEM; + + xe_sched_job_parallel_slab = + kmem_cache_create("xe_sched_job_parallel", + sizeof(struct xe_sched_job) + + sizeof(u64) * + XE_HW_ENGINE_MAX_INSTANCE , 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!xe_sched_job_parallel_slab) { + kmem_cache_destroy(xe_sched_job_slab); + return -ENOMEM; + } + + return 0; +} + +void xe_sched_job_module_exit(void) +{ + kmem_cache_destroy(xe_sched_job_slab); + kmem_cache_destroy(xe_sched_job_parallel_slab); +} + +static struct xe_sched_job *job_alloc(bool parallel) +{ + return kmem_cache_zalloc(parallel ? xe_sched_job_parallel_slab : + xe_sched_job_slab, GFP_KERNEL); +} + +bool xe_sched_job_is_migration(struct xe_engine *e) +{ + return e->vm && (e->vm->flags & XE_VM_FLAG_MIGRATION) && + !(e->flags & ENGINE_FLAG_WA); +} + +static void job_free(struct xe_sched_job *job) +{ + struct xe_engine *e = job->engine; + bool is_migration = xe_sched_job_is_migration(e); + + kmem_cache_free(xe_engine_is_parallel(job->engine) || is_migration ? + xe_sched_job_parallel_slab : xe_sched_job_slab, job); +} + +struct xe_sched_job *xe_sched_job_create(struct xe_engine *e, + u64 *batch_addr) +{ + struct xe_sched_job *job; + struct dma_fence **fences; + bool is_migration = xe_sched_job_is_migration(e); + int err; + int i, j; + u32 width; + + /* Migration and kernel engines have their own locking */ + if (!(e->flags & (ENGINE_FLAG_KERNEL | ENGINE_FLAG_VM | + ENGINE_FLAG_WA))) { + lockdep_assert_held(&e->vm->lock); + if (!xe_vm_no_dma_fences(e->vm)) + xe_vm_assert_held(e->vm); + } + + job = job_alloc(xe_engine_is_parallel(e) || is_migration); + if (!job) + return ERR_PTR(-ENOMEM); + + job->engine = e; + kref_init(&job->refcount); + xe_engine_get(job->engine); + + err = drm_sched_job_init(&job->drm, e->entity, 1, NULL); + if (err) + goto err_free; + + if (!xe_engine_is_parallel(e)) { + job->fence = xe_lrc_create_seqno_fence(e->lrc); + if (IS_ERR(job->fence)) { + err = PTR_ERR(job->fence); + goto err_sched_job; + } + } else { + struct dma_fence_array *cf; + + fences = kmalloc_array(e->width, sizeof(*fences), GFP_KERNEL); + if (!fences) { + err = -ENOMEM; + goto err_sched_job; + } + + for (j = 0; j < e->width; ++j) { + fences[j] = xe_lrc_create_seqno_fence(e->lrc + j); + if (IS_ERR(fences[j])) { + err = PTR_ERR(fences[j]); + goto err_fences; + } + } + + cf = dma_fence_array_create(e->width, fences, + e->parallel.composite_fence_ctx, + e->parallel.composite_fence_seqno++, + false); + if (!cf) { + --e->parallel.composite_fence_seqno; + err = -ENOMEM; + goto err_fences; + } + + /* Sanity check */ + for (j = 0; j < e->width; ++j) + XE_BUG_ON(cf->base.seqno != fences[j]->seqno); + + job->fence = &cf->base; + } + + width = e->width; + if (is_migration) + width = 2; + + for (i = 0; i < width; ++i) + job->batch_addr[i] = batch_addr[i]; + + trace_xe_sched_job_create(job); + return job; + +err_fences: + for (j = j - 1; j >= 0; --j) { + --e->lrc[j].fence_ctx.next_seqno; + dma_fence_put(fences[j]); + } + kfree(fences); +err_sched_job: + drm_sched_job_cleanup(&job->drm); +err_free: + xe_engine_put(e); + job_free(job); + return ERR_PTR(err); +} + +/** + * xe_sched_job_destroy - Destroy XE schedule job + * @ref: reference to XE schedule job + * + * Called when ref == 0, drop a reference to job's xe_engine + fence, cleanup + * base DRM schedule job, and free memory for XE schedule job. + */ +void xe_sched_job_destroy(struct kref *ref) +{ + struct xe_sched_job *job = + container_of(ref, struct xe_sched_job, refcount); + + xe_engine_put(job->engine); + dma_fence_put(job->fence); + drm_sched_job_cleanup(&job->drm); + job_free(job); +} + +void xe_sched_job_set_error(struct xe_sched_job *job, int error) +{ + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) + return; + + dma_fence_set_error(job->fence, error); + + if (dma_fence_is_array(job->fence)) { + struct dma_fence_array *array = + to_dma_fence_array(job->fence); + struct dma_fence **child = array->fences; + unsigned int nchild = array->num_fences; + + do { + struct dma_fence *current_fence = *child++; + + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + ¤t_fence->flags)) + continue; + dma_fence_set_error(current_fence, error); + } while (--nchild); + } + + trace_xe_sched_job_set_error(job); + + dma_fence_enable_sw_signaling(job->fence); + xe_hw_fence_irq_run(job->engine->fence_irq); +} + +bool xe_sched_job_started(struct xe_sched_job *job) +{ + struct xe_lrc *lrc = job->engine->lrc; + + return xe_lrc_start_seqno(lrc) >= xe_sched_job_seqno(job); +} + +bool xe_sched_job_completed(struct xe_sched_job *job) +{ + struct xe_lrc *lrc = job->engine->lrc; + + /* + * Can safely check just LRC[0] seqno as that is last seqno written when + * parallel handshake is done. + */ + + return xe_lrc_seqno(lrc) >= xe_sched_job_seqno(job); +} + +void xe_sched_job_arm(struct xe_sched_job *job) +{ + drm_sched_job_arm(&job->drm); +} + +void xe_sched_job_push(struct xe_sched_job *job) +{ + xe_sched_job_get(job); + trace_xe_sched_job_exec(job); + drm_sched_entity_push_job(&job->drm); + xe_sched_job_put(job); +} diff --git a/drivers/gpu/drm/xe/xe_sched_job.h b/drivers/gpu/drm/xe/xe_sched_job.h new file mode 100644 index 000000000000..5315ad8656c2 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sched_job.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _XE_SCHED_JOB_H_ +#define _XE_SCHED_JOB_H_ + +#include "xe_sched_job_types.h" + +#define XE_SCHED_HANG_LIMIT 1 +#define XE_SCHED_JOB_TIMEOUT LONG_MAX + +int xe_sched_job_module_init(void); +void xe_sched_job_module_exit(void); + +struct xe_sched_job *xe_sched_job_create(struct xe_engine *e, + u64 *batch_addr); +void xe_sched_job_destroy(struct kref *ref); + +/** + * xe_sched_job_get - get reference to XE schedule job + * @job: XE schedule job object + * + * Increment XE schedule job's reference count + */ +static inline struct xe_sched_job *xe_sched_job_get(struct xe_sched_job *job) +{ + kref_get(&job->refcount); + return job; +} + +/** + * xe_sched_job_put - put reference to XE schedule job + * @job: XE schedule job object + * + * Decrement XE schedule job's reference count, call xe_sched_job_destroy when + * reference count == 0. + */ +static inline void xe_sched_job_put(struct xe_sched_job *job) +{ + kref_put(&job->refcount, xe_sched_job_destroy); +} + +void xe_sched_job_set_error(struct xe_sched_job *job, int error); +static inline bool xe_sched_job_is_error(struct xe_sched_job *job) +{ + return job->fence->error < 0; +} + +bool xe_sched_job_started(struct xe_sched_job *job); +bool xe_sched_job_completed(struct xe_sched_job *job); + +void xe_sched_job_arm(struct xe_sched_job *job); +void xe_sched_job_push(struct xe_sched_job *job); + +static inline struct xe_sched_job * +to_xe_sched_job(struct drm_sched_job *drm) +{ + return container_of(drm, struct xe_sched_job, drm); +} + +static inline u32 xe_sched_job_seqno(struct xe_sched_job *job) +{ + return job->fence->seqno; +} + +static inline void +xe_sched_job_add_migrate_flush(struct xe_sched_job *job, u32 flags) +{ + job->migrate_flush_flags = flags; +} + +bool xe_sched_job_is_migration(struct xe_engine *e); + +#endif diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h new file mode 100644 index 000000000000..fd1d75996127 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sched_job_types.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_SCHED_JOB_TYPES_H_ +#define _XE_SCHED_JOB_TYPES_H_ + +#include + +#include + +struct xe_engine; + +/** + * struct xe_sched_job - XE schedule job (batch buffer tracking) + */ +struct xe_sched_job { + /** @drm: base DRM scheduler job */ + struct drm_sched_job drm; + /** @engine: XE submission engine */ + struct xe_engine *engine; + /** @refcount: ref count of this job */ + struct kref refcount; + /** + * @fence: dma fence to indicate completion. 1 way relationship - job + * can safely reference fence, fence cannot safely reference job. + */ +#define JOB_FLAG_SUBMIT DMA_FENCE_FLAG_USER_BITS + struct dma_fence *fence; + /** @user_fence: write back value when BB is complete */ + struct { + /** @used: user fence is used */ + bool used; + /** @addr: address to write to */ + u64 addr; + /** @value: write back value */ + u64 value; + } user_fence; + /** @migrate_flush_flags: Additional flush flags for migration jobs */ + u32 migrate_flush_flags; + /** @batch_addr: batch buffer address of job */ + u64 batch_addr[0]; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_step.c b/drivers/gpu/drm/xe/xe_step.c new file mode 100644 index 000000000000..ca77d0971529 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_step.c @@ -0,0 +1,189 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_step.h" + +#include "xe_device.h" +#include "xe_platform_types.h" + +/* + * Provide mapping between PCI's revision ID to the individual GMD + * (Graphics/Media/Display) stepping values that can be compared numerically. + * + * Some platforms may have unusual ways of mapping PCI revision ID to GMD + * steppings. E.g., in some cases a higher PCI revision may translate to a + * lower stepping of the GT and/or display IP. + * + * Also note that some revisions/steppings may have been set aside as + * placeholders but never materialized in real hardware; in those cases there + * may be jumps in the revision IDs or stepping values in the tables below. + */ + +/* + * Some platforms always have the same stepping value for GT and display; + * use a macro to define these to make it easier to identify the platforms + * where the two steppings can deviate. + */ +#define COMMON_GT_MEDIA_STEP(x_) \ + .graphics = STEP_##x_, \ + .media = STEP_##x_ + +#define COMMON_STEP(x_) \ + COMMON_GT_MEDIA_STEP(x_), \ + .graphics = STEP_##x_, \ + .media = STEP_##x_, \ + .display = STEP_##x_ + +__diag_push(); +__diag_ignore_all("-Woverride-init", "Allow field overrides in table"); + +/* Same GT stepping between tgl_uy_revids and tgl_revids don't mean the same HW */ +static const struct xe_step_info tgl_revids[] = { + [0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_B0 }, + [1] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_D0 }, +}; + +static const struct xe_step_info dg1_revids[] = { + [0] = { COMMON_STEP(A0) }, + [1] = { COMMON_STEP(B0) }, +}; + +static const struct xe_step_info adls_revids[] = { + [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A0 }, + [0x1] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A2 }, + [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_B0 }, + [0x8] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_B0 }, + [0xC] = { COMMON_GT_MEDIA_STEP(D0), .display = STEP_C0 }, +}; + +static const struct xe_step_info dg2_g10_revid_step_tbl[] = { + [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A0 }, + [0x1] = { COMMON_GT_MEDIA_STEP(A1), .display = STEP_A0 }, + [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_B0 }, + [0x8] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_C0 }, +}; + +static const struct xe_step_info dg2_g11_revid_step_tbl[] = { + [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_B0 }, + [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_C0 }, + [0x5] = { COMMON_GT_MEDIA_STEP(B1), .display = STEP_C0 }, +}; + +static const struct xe_step_info dg2_g12_revid_step_tbl[] = { + [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_C0 }, + [0x1] = { COMMON_GT_MEDIA_STEP(A1), .display = STEP_C0 }, +}; + +static const struct xe_step_info pvc_revid_step_tbl[] = { + [0x3] = { .graphics = STEP_A0 }, + [0x5] = { .graphics = STEP_B0 }, + [0x6] = { .graphics = STEP_B1 }, + [0x7] = { .graphics = STEP_C0 }, +}; + +static const int pvc_basedie_subids[] = { + [0x0] = STEP_A0, + [0x3] = STEP_B0, + [0x4] = STEP_B1, + [0x5] = STEP_B3, +}; + +__diag_pop(); + +struct xe_step_info xe_step_get(struct xe_device *xe) +{ + const struct xe_step_info *revids = NULL; + struct xe_step_info step = {}; + u16 revid = xe->info.revid; + int size = 0; + const int *basedie_info = NULL; + int basedie_size = 0; + int baseid = 0; + + if (xe->info.platform == XE_PVC) { + baseid = FIELD_GET(GENMASK(5, 3), xe->info.revid); + revid = FIELD_GET(GENMASK(2, 0), xe->info.revid); + revids = pvc_revid_step_tbl; + size = ARRAY_SIZE(pvc_revid_step_tbl); + basedie_info = pvc_basedie_subids; + basedie_size = ARRAY_SIZE(pvc_basedie_subids); + } else if (xe->info.subplatform == XE_SUBPLATFORM_DG2_G10) { + revids = dg2_g10_revid_step_tbl; + size = ARRAY_SIZE(dg2_g10_revid_step_tbl); + } else if (xe->info.subplatform == XE_SUBPLATFORM_DG2_G11) { + revids = dg2_g11_revid_step_tbl; + size = ARRAY_SIZE(dg2_g11_revid_step_tbl); + } else if (xe->info.subplatform == XE_SUBPLATFORM_DG2_G12) { + revids = dg2_g12_revid_step_tbl; + size = ARRAY_SIZE(dg2_g12_revid_step_tbl); + } else if (xe->info.platform == XE_ALDERLAKE_S) { + revids = adls_revids; + size = ARRAY_SIZE(adls_revids); + } else if (xe->info.platform == XE_DG1) { + revids = dg1_revids; + size = ARRAY_SIZE(dg1_revids); + } else if (xe->info.platform == XE_TIGERLAKE) { + revids = tgl_revids; + size = ARRAY_SIZE(tgl_revids); + } + + /* Not using the stepping scheme for the platform yet. */ + if (!revids) + return step; + + if (revid < size && revids[revid].graphics != STEP_NONE) { + step = revids[revid]; + } else { + drm_warn(&xe->drm, "Unknown revid 0x%02x\n", revid); + + /* + * If we hit a gap in the revid array, use the information for + * the next revid. + * + * This may be wrong in all sorts of ways, especially if the + * steppings in the array are not monotonically increasing, but + * it's better than defaulting to 0. + */ + while (revid < size && revids[revid].graphics == STEP_NONE) + revid++; + + if (revid < size) { + drm_dbg(&xe->drm, "Using steppings for revid 0x%02x\n", + revid); + step = revids[revid]; + } else { + drm_dbg(&xe->drm, "Using future steppings\n"); + step.graphics = STEP_FUTURE; + step.display = STEP_FUTURE; + } + } + + drm_WARN_ON(&xe->drm, step.graphics == STEP_NONE); + + if (basedie_info && basedie_size) { + if (baseid < basedie_size && basedie_info[baseid] != STEP_NONE) { + step.basedie = basedie_info[baseid]; + } else { + drm_warn(&xe->drm, "Unknown baseid 0x%02x\n", baseid); + step.basedie = STEP_FUTURE; + } + } + + return step; +} + +#define STEP_NAME_CASE(name) \ + case STEP_##name: \ + return #name; + +const char *xe_step_name(enum xe_step step) +{ + switch (step) { + STEP_NAME_LIST(STEP_NAME_CASE); + + default: + return "**"; + } +} diff --git a/drivers/gpu/drm/xe/xe_step.h b/drivers/gpu/drm/xe/xe_step.h new file mode 100644 index 000000000000..0c596c8579fb --- /dev/null +++ b/drivers/gpu/drm/xe/xe_step.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_STEP_H_ +#define _XE_STEP_H_ + +#include + +#include "xe_step_types.h" + +struct xe_device; + +struct xe_step_info xe_step_get(struct xe_device *xe); +const char *xe_step_name(enum xe_step step); + +#endif diff --git a/drivers/gpu/drm/xe/xe_step_types.h b/drivers/gpu/drm/xe/xe_step_types.h new file mode 100644 index 000000000000..b7859f9647ca --- /dev/null +++ b/drivers/gpu/drm/xe/xe_step_types.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_STEP_TYPES_H_ +#define _XE_STEP_TYPES_H_ + +#include + +struct xe_step_info { + u8 graphics; + u8 media; + u8 display; + u8 basedie; +}; + +#define STEP_ENUM_VAL(name) STEP_##name, + +#define STEP_NAME_LIST(func) \ + func(A0) \ + func(A1) \ + func(A2) \ + func(B0) \ + func(B1) \ + func(B2) \ + func(B3) \ + func(C0) \ + func(C1) \ + func(D0) \ + func(D1) \ + func(E0) \ + func(F0) \ + func(G0) \ + func(H0) \ + func(I0) \ + func(I1) \ + func(J0) + +/* + * Symbolic steppings that do not match the hardware. These are valid both as gt + * and display steppings as symbolic names. + */ +enum xe_step { + STEP_NONE = 0, + STEP_NAME_LIST(STEP_ENUM_VAL) + STEP_FUTURE, + STEP_FOREVER, +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c new file mode 100644 index 000000000000..0fbd8d0978cf --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -0,0 +1,276 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "xe_sync.h" + +#include +#include +#include +#include +#include +#include + +#include "xe_device_types.h" +#include "xe_sched_job_types.h" +#include "xe_macros.h" + +#define SYNC_FLAGS_TYPE_MASK 0x3 +#define SYNC_FLAGS_FENCE_INSTALLED 0x10000 + +struct user_fence { + struct xe_device *xe; + struct kref refcount; + struct dma_fence_cb cb; + struct work_struct worker; + struct mm_struct *mm; + u64 __user *addr; + u64 value; +}; + +static void user_fence_destroy(struct kref *kref) +{ + struct user_fence *ufence = container_of(kref, struct user_fence, + refcount); + + mmdrop(ufence->mm); + kfree(ufence); +} + +static void user_fence_get(struct user_fence *ufence) +{ + kref_get(&ufence->refcount); +} + +static void user_fence_put(struct user_fence *ufence) +{ + kref_put(&ufence->refcount, user_fence_destroy); +} + +static struct user_fence *user_fence_create(struct xe_device *xe, u64 addr, + u64 value) +{ + struct user_fence *ufence; + + ufence = kmalloc(sizeof(*ufence), GFP_KERNEL); + if (!ufence) + return NULL; + + ufence->xe = xe; + kref_init(&ufence->refcount); + ufence->addr = u64_to_user_ptr(addr); + ufence->value = value; + ufence->mm = current->mm; + mmgrab(ufence->mm); + + return ufence; +} + +static void user_fence_worker(struct work_struct *w) +{ + struct user_fence *ufence = container_of(w, struct user_fence, worker); + + if (mmget_not_zero(ufence->mm)) { + kthread_use_mm(ufence->mm); + if (copy_to_user(ufence->addr, &ufence->value, sizeof(ufence->value))) + XE_WARN_ON("Copy to user failed"); + kthread_unuse_mm(ufence->mm); + mmput(ufence->mm); + } + + wake_up_all(&ufence->xe->ufence_wq); + user_fence_put(ufence); +} + +static void kick_ufence(struct user_fence *ufence, struct dma_fence *fence) +{ + INIT_WORK(&ufence->worker, user_fence_worker); + queue_work(ufence->xe->ordered_wq, &ufence->worker); + dma_fence_put(fence); +} + +static void user_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) +{ + struct user_fence *ufence = container_of(cb, struct user_fence, cb); + + kick_ufence(ufence, fence); +} + +int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, + struct xe_sync_entry *sync, + struct drm_xe_sync __user *sync_user, + bool exec, bool no_dma_fences) +{ + struct drm_xe_sync sync_in; + int err; + + if (copy_from_user(&sync_in, sync_user, sizeof(*sync_user))) + return -EFAULT; + + if (XE_IOCTL_ERR(xe, sync_in.flags & + ~(SYNC_FLAGS_TYPE_MASK | DRM_XE_SYNC_SIGNAL))) + return -EINVAL; + + switch (sync_in.flags & SYNC_FLAGS_TYPE_MASK) { + case DRM_XE_SYNC_SYNCOBJ: + if (XE_IOCTL_ERR(xe, no_dma_fences)) + return -ENOTSUPP; + + if (XE_IOCTL_ERR(xe, upper_32_bits(sync_in.addr))) + return -EINVAL; + + sync->syncobj = drm_syncobj_find(xef->drm, sync_in.handle); + if (XE_IOCTL_ERR(xe, !sync->syncobj)) + return -ENOENT; + + if (!(sync_in.flags & DRM_XE_SYNC_SIGNAL)) { + sync->fence = drm_syncobj_fence_get(sync->syncobj); + if (XE_IOCTL_ERR(xe, !sync->fence)) + return -EINVAL; + } + break; + + case DRM_XE_SYNC_TIMELINE_SYNCOBJ: + if (XE_IOCTL_ERR(xe, no_dma_fences)) + return -ENOTSUPP; + + if (XE_IOCTL_ERR(xe, upper_32_bits(sync_in.addr))) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, sync_in.timeline_value == 0)) + return -EINVAL; + + sync->syncobj = drm_syncobj_find(xef->drm, sync_in.handle); + if (XE_IOCTL_ERR(xe, !sync->syncobj)) + return -ENOENT; + + if (sync_in.flags & DRM_XE_SYNC_SIGNAL) { + sync->chain_fence = dma_fence_chain_alloc(); + if (!sync->chain_fence) + return -ENOMEM; + } else { + sync->fence = drm_syncobj_fence_get(sync->syncobj); + if (XE_IOCTL_ERR(xe, !sync->fence)) + return -EINVAL; + + err = dma_fence_chain_find_seqno(&sync->fence, + sync_in.timeline_value); + if (err) + return err; + } + break; + + case DRM_XE_SYNC_DMA_BUF: + if (XE_IOCTL_ERR(xe, "TODO")) + return -EINVAL; + break; + + case DRM_XE_SYNC_USER_FENCE: + if (XE_IOCTL_ERR(xe, !(sync_in.flags & DRM_XE_SYNC_SIGNAL))) + return -ENOTSUPP; + + if (XE_IOCTL_ERR(xe, sync_in.addr & 0x7)) + return -EINVAL; + + if (exec) { + sync->addr = sync_in.addr; + } else { + sync->ufence = user_fence_create(xe, sync_in.addr, + sync_in.timeline_value); + if (XE_IOCTL_ERR(xe, !sync->ufence)) + return -ENOMEM; + } + + break; + + default: + return -EINVAL; + } + + sync->flags = sync_in.flags; + sync->timeline_value = sync_in.timeline_value; + + return 0; +} + +int xe_sync_entry_wait(struct xe_sync_entry *sync) +{ + if (sync->fence) + dma_fence_wait(sync->fence, true); + + return 0; +} + +int xe_sync_entry_add_deps(struct xe_sync_entry *sync, struct xe_sched_job *job) +{ + int err; + + if (sync->fence) { + err = drm_sched_job_add_dependency(&job->drm, + dma_fence_get(sync->fence)); + if (err) { + dma_fence_put(sync->fence); + return err; + } + } + + return 0; +} + +bool xe_sync_entry_signal(struct xe_sync_entry *sync, struct xe_sched_job *job, + struct dma_fence *fence) +{ + if (!(sync->flags & DRM_XE_SYNC_SIGNAL) || + sync->flags & SYNC_FLAGS_FENCE_INSTALLED) + return false; + + if (sync->chain_fence) { + drm_syncobj_add_point(sync->syncobj, sync->chain_fence, + fence, sync->timeline_value); + /* + * The chain's ownership is transferred to the + * timeline. + */ + sync->chain_fence = NULL; + } else if (sync->syncobj) { + drm_syncobj_replace_fence(sync->syncobj, fence); + } else if (sync->ufence) { + int err; + + dma_fence_get(fence); + user_fence_get(sync->ufence); + err = dma_fence_add_callback(fence, &sync->ufence->cb, + user_fence_cb); + if (err == -ENOENT) { + kick_ufence(sync->ufence, fence); + } else if (err) { + XE_WARN_ON("failed to add user fence"); + user_fence_put(sync->ufence); + dma_fence_put(fence); + } + } else if ((sync->flags & SYNC_FLAGS_TYPE_MASK) == + DRM_XE_SYNC_USER_FENCE) { + job->user_fence.used = true; + job->user_fence.addr = sync->addr; + job->user_fence.value = sync->timeline_value; + } + + /* TODO: external BO? */ + + sync->flags |= SYNC_FLAGS_FENCE_INSTALLED; + + return true; +} + +void xe_sync_entry_cleanup(struct xe_sync_entry *sync) +{ + if (sync->syncobj) + drm_syncobj_put(sync->syncobj); + if (sync->fence) + dma_fence_put(sync->fence); + if (sync->chain_fence) + dma_fence_put(&sync->chain_fence->base); + if (sync->ufence) + user_fence_put(sync->ufence); +} diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h new file mode 100644 index 000000000000..4cbcf7a19911 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sync.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _XE_SYNC_H_ +#define _XE_SYNC_H_ + +#include "xe_sync_types.h" + +struct xe_device; +struct xe_file; +struct xe_sched_job; + +int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, + struct xe_sync_entry *sync, + struct drm_xe_sync __user *sync_user, + bool exec, bool compute_mode); +int xe_sync_entry_wait(struct xe_sync_entry *sync); +int xe_sync_entry_add_deps(struct xe_sync_entry *sync, + struct xe_sched_job *job); +bool xe_sync_entry_signal(struct xe_sync_entry *sync, + struct xe_sched_job *job, + struct dma_fence *fence); +void xe_sync_entry_cleanup(struct xe_sync_entry *sync); + +#endif diff --git a/drivers/gpu/drm/xe/xe_sync_types.h b/drivers/gpu/drm/xe/xe_sync_types.h new file mode 100644 index 000000000000..24fccc26cb53 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sync_types.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_SYNC_TYPES_H_ +#define _XE_SYNC_TYPES_H_ + +#include + +struct drm_syncobj; +struct dma_fence; +struct dma_fence_chain; +struct drm_xe_sync; +struct user_fence; + +struct xe_sync_entry { + struct drm_syncobj *syncobj; + struct dma_fence *fence; + struct dma_fence_chain *chain_fence; + struct user_fence *ufence; + u64 addr; + u64 timeline_value; + u32 flags; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_trace.c b/drivers/gpu/drm/xe/xe_trace.c new file mode 100644 index 000000000000..2570c0b859c4 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_trace.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef __CHECKER__ +#define CREATE_TRACE_POINTS +#include "xe_trace.h" +#endif diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h new file mode 100644 index 000000000000..a5f963f1f6eb --- /dev/null +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -0,0 +1,513 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright © 2022 Intel Corporation + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM xe + +#if !defined(_XE_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) +#define _XE_TRACE_H_ + +#include +#include + +#include "xe_bo_types.h" +#include "xe_engine_types.h" +#include "xe_gpu_scheduler_types.h" +#include "xe_gt_types.h" +#include "xe_guc_engine_types.h" +#include "xe_sched_job.h" +#include "xe_vm_types.h" + +DECLARE_EVENT_CLASS(xe_bo, + TP_PROTO(struct xe_bo *bo), + TP_ARGS(bo), + + TP_STRUCT__entry( + __field(size_t, size) + __field(u32, flags) + __field(u64, vm) + ), + + TP_fast_assign( + __entry->size = bo->size; + __entry->flags = bo->flags; + __entry->vm = (u64)bo->vm; + ), + + TP_printk("size=%ld, flags=0x%02x, vm=0x%016llx", + __entry->size, __entry->flags, __entry->vm) +); + +DEFINE_EVENT(xe_bo, xe_bo_cpu_fault, + TP_PROTO(struct xe_bo *bo), + TP_ARGS(bo) +); + +DEFINE_EVENT(xe_bo, xe_bo_move, + TP_PROTO(struct xe_bo *bo), + TP_ARGS(bo) +); + +DECLARE_EVENT_CLASS(xe_engine, + TP_PROTO(struct xe_engine *e), + TP_ARGS(e), + + TP_STRUCT__entry( + __field(enum xe_engine_class, class) + __field(u32, logical_mask) + __field(u8, gt_id) + __field(u16, width) + __field(u16, guc_id) + __field(u32, guc_state) + __field(u32, flags) + ), + + TP_fast_assign( + __entry->class = e->class; + __entry->logical_mask = e->logical_mask; + __entry->gt_id = e->gt->info.id; + __entry->width = e->width; + __entry->guc_id = e->guc->id; + __entry->guc_state = atomic_read(&e->guc->state); + __entry->flags = e->flags; + ), + + TP_printk("%d:0x%x, gt=%d, width=%d, guc_id=%d, guc_state=0x%x, flags=0x%x", + __entry->class, __entry->logical_mask, + __entry->gt_id, __entry->width, __entry->guc_id, + __entry->guc_state, __entry->flags) +); + +DEFINE_EVENT(xe_engine, xe_engine_create, + TP_PROTO(struct xe_engine *e), + TP_ARGS(e) +); + +DEFINE_EVENT(xe_engine, xe_engine_supress_resume, + TP_PROTO(struct xe_engine *e), + TP_ARGS(e) +); + +DEFINE_EVENT(xe_engine, xe_engine_submit, + TP_PROTO(struct xe_engine *e), + TP_ARGS(e) +); + +DEFINE_EVENT(xe_engine, xe_engine_scheduling_enable, + TP_PROTO(struct xe_engine *e), + TP_ARGS(e) +); + +DEFINE_EVENT(xe_engine, xe_engine_scheduling_disable, + TP_PROTO(struct xe_engine *e), + TP_ARGS(e) +); + +DEFINE_EVENT(xe_engine, xe_engine_scheduling_done, + TP_PROTO(struct xe_engine *e), + TP_ARGS(e) +); + +DEFINE_EVENT(xe_engine, xe_engine_register, + TP_PROTO(struct xe_engine *e), + TP_ARGS(e) +); + +DEFINE_EVENT(xe_engine, xe_engine_deregister, + TP_PROTO(struct xe_engine *e), + TP_ARGS(e) +); + +DEFINE_EVENT(xe_engine, xe_engine_deregister_done, + TP_PROTO(struct xe_engine *e), + TP_ARGS(e) +); + +DEFINE_EVENT(xe_engine, xe_engine_close, + TP_PROTO(struct xe_engine *e), + TP_ARGS(e) +); + +DEFINE_EVENT(xe_engine, xe_engine_kill, + TP_PROTO(struct xe_engine *e), + TP_ARGS(e) +); + +DEFINE_EVENT(xe_engine, xe_engine_cleanup_entity, + TP_PROTO(struct xe_engine *e), + TP_ARGS(e) +); + +DEFINE_EVENT(xe_engine, xe_engine_destroy, + TP_PROTO(struct xe_engine *e), + TP_ARGS(e) +); + +DEFINE_EVENT(xe_engine, xe_engine_reset, + TP_PROTO(struct xe_engine *e), + TP_ARGS(e) +); + +DEFINE_EVENT(xe_engine, xe_engine_memory_cat_error, + TP_PROTO(struct xe_engine *e), + TP_ARGS(e) +); + +DEFINE_EVENT(xe_engine, xe_engine_stop, + TP_PROTO(struct xe_engine *e), + TP_ARGS(e) +); + +DEFINE_EVENT(xe_engine, xe_engine_resubmit, + TP_PROTO(struct xe_engine *e), + TP_ARGS(e) +); + +DECLARE_EVENT_CLASS(xe_sched_job, + TP_PROTO(struct xe_sched_job *job), + TP_ARGS(job), + + TP_STRUCT__entry( + __field(u32, seqno) + __field(u16, guc_id) + __field(u32, guc_state) + __field(u32, flags) + __field(int, error) + __field(u64, fence) + __field(u64, batch_addr) + ), + + TP_fast_assign( + __entry->seqno = xe_sched_job_seqno(job); + __entry->guc_id = job->engine->guc->id; + __entry->guc_state = + atomic_read(&job->engine->guc->state); + __entry->flags = job->engine->flags; + __entry->error = job->fence->error; + __entry->fence = (u64)job->fence; + __entry->batch_addr = (u64)job->batch_addr[0]; + ), + + TP_printk("fence=0x%016llx, seqno=%u, guc_id=%d, batch_addr=0x%012llx, guc_state=0x%x, flags=0x%x, error=%d", + __entry->fence, __entry->seqno, __entry->guc_id, + __entry->batch_addr, __entry->guc_state, + __entry->flags, __entry->error) +); + +DEFINE_EVENT(xe_sched_job, xe_sched_job_create, + TP_PROTO(struct xe_sched_job *job), + TP_ARGS(job) +); + +DEFINE_EVENT(xe_sched_job, xe_sched_job_exec, + TP_PROTO(struct xe_sched_job *job), + TP_ARGS(job) +); + +DEFINE_EVENT(xe_sched_job, xe_sched_job_run, + TP_PROTO(struct xe_sched_job *job), + TP_ARGS(job) +); + +DEFINE_EVENT(xe_sched_job, xe_sched_job_free, + TP_PROTO(struct xe_sched_job *job), + TP_ARGS(job) +); + +DEFINE_EVENT(xe_sched_job, xe_sched_job_timedout, + TP_PROTO(struct xe_sched_job *job), + TP_ARGS(job) +); + +DEFINE_EVENT(xe_sched_job, xe_sched_job_set_error, + TP_PROTO(struct xe_sched_job *job), + TP_ARGS(job) +); + +DEFINE_EVENT(xe_sched_job, xe_sched_job_ban, + TP_PROTO(struct xe_sched_job *job), + TP_ARGS(job) +); + +DECLARE_EVENT_CLASS(xe_sched_msg, + TP_PROTO(struct xe_sched_msg *msg), + TP_ARGS(msg), + + TP_STRUCT__entry( + __field(u32, opcode) + __field(u16, guc_id) + ), + + TP_fast_assign( + __entry->opcode = msg->opcode; + __entry->guc_id = + ((struct xe_engine *)msg->private_data)->guc->id; + ), + + TP_printk("guc_id=%d, opcode=%u", __entry->guc_id, + __entry->opcode) +); + +DEFINE_EVENT(xe_sched_msg, xe_sched_msg_add, + TP_PROTO(struct xe_sched_msg *msg), + TP_ARGS(msg) +); + +DEFINE_EVENT(xe_sched_msg, xe_sched_msg_recv, + TP_PROTO(struct xe_sched_msg *msg), + TP_ARGS(msg) +); + +DECLARE_EVENT_CLASS(xe_hw_fence, + TP_PROTO(struct xe_hw_fence *fence), + TP_ARGS(fence), + + TP_STRUCT__entry( + __field(u64, ctx) + __field(u32, seqno) + __field(u64, fence) + ), + + TP_fast_assign( + __entry->ctx = fence->dma.context; + __entry->seqno = fence->dma.seqno; + __entry->fence = (u64)fence; + ), + + TP_printk("ctx=0x%016llx, fence=0x%016llx, seqno=%u", + __entry->ctx, __entry->fence, __entry->seqno) +); + +DEFINE_EVENT(xe_hw_fence, xe_hw_fence_create, + TP_PROTO(struct xe_hw_fence *fence), + TP_ARGS(fence) +); + +DEFINE_EVENT(xe_hw_fence, xe_hw_fence_signal, + TP_PROTO(struct xe_hw_fence *fence), + TP_ARGS(fence) +); + +DEFINE_EVENT(xe_hw_fence, xe_hw_fence_try_signal, + TP_PROTO(struct xe_hw_fence *fence), + TP_ARGS(fence) +); + +DEFINE_EVENT(xe_hw_fence, xe_hw_fence_free, + TP_PROTO(struct xe_hw_fence *fence), + TP_ARGS(fence) +); + +DECLARE_EVENT_CLASS(xe_vma, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma), + + TP_STRUCT__entry( + __field(u64, vma) + __field(u32, asid) + __field(u64, start) + __field(u64, end) + __field(u64, ptr) + ), + + TP_fast_assign( + __entry->vma = (u64)vma; + __entry->asid = vma->vm->usm.asid; + __entry->start = vma->start; + __entry->end = vma->end; + __entry->ptr = (u64)vma->userptr.ptr; + ), + + TP_printk("vma=0x%016llx, asid=0x%05x, start=0x%012llx, end=0x%012llx, ptr=0x%012llx,", + __entry->vma, __entry->asid, __entry->start, + __entry->end, __entry->ptr) +) + +DEFINE_EVENT(xe_vma, xe_vma_flush, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_pagefault, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_acc, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_fail, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_bind, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_pf_bind, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_unbind, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_userptr_rebind_worker, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_userptr_rebind_exec, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_rebind_worker, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_rebind_exec, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_userptr_invalidate, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_usm_invalidate, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_evict, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DEFINE_EVENT(xe_vma, xe_vma_userptr_invalidate_complete, + TP_PROTO(struct xe_vma *vma), + TP_ARGS(vma) +); + +DECLARE_EVENT_CLASS(xe_vm, + TP_PROTO(struct xe_vm *vm), + TP_ARGS(vm), + + TP_STRUCT__entry( + __field(u64, vm) + __field(u32, asid) + ), + + TP_fast_assign( + __entry->vm = (u64)vm; + __entry->asid = vm->usm.asid; + ), + + TP_printk("vm=0x%016llx, asid=0x%05x", __entry->vm, + __entry->asid) +); + +DEFINE_EVENT(xe_vm, xe_vm_create, + TP_PROTO(struct xe_vm *vm), + TP_ARGS(vm) +); + +DEFINE_EVENT(xe_vm, xe_vm_free, + TP_PROTO(struct xe_vm *vm), + TP_ARGS(vm) +); + +DEFINE_EVENT(xe_vm, xe_vm_cpu_bind, + TP_PROTO(struct xe_vm *vm), + TP_ARGS(vm) +); + +DEFINE_EVENT(xe_vm, xe_vm_restart, + TP_PROTO(struct xe_vm *vm), + TP_ARGS(vm) +); + +DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_enter, + TP_PROTO(struct xe_vm *vm), + TP_ARGS(vm) +); + +DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_retry, + TP_PROTO(struct xe_vm *vm), + TP_ARGS(vm) +); + +DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_exit, + TP_PROTO(struct xe_vm *vm), + TP_ARGS(vm) +); + +TRACE_EVENT(xe_guc_ct_h2g_flow_control, + TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len), + TP_ARGS(_head, _tail, size, space, len), + + TP_STRUCT__entry( + __field(u32, _head) + __field(u32, _tail) + __field(u32, size) + __field(u32, space) + __field(u32, len) + ), + + TP_fast_assign( + __entry->_head = _head; + __entry->_tail = _tail; + __entry->size = size; + __entry->space = space; + __entry->len = len; + ), + + TP_printk("head=%u, tail=%u, size=%u, space=%u, len=%u", + __entry->_head, __entry->_tail, __entry->size, + __entry->space, __entry->len) +); + +TRACE_EVENT(xe_guc_ct_g2h_flow_control, + TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len), + TP_ARGS(_head, _tail, size, space, len), + + TP_STRUCT__entry( + __field(u32, _head) + __field(u32, _tail) + __field(u32, size) + __field(u32, space) + __field(u32, len) + ), + + TP_fast_assign( + __entry->_head = _head; + __entry->_tail = _tail; + __entry->size = size; + __entry->space = space; + __entry->len = len; + ), + + TP_printk("head=%u, tail=%u, size=%u, space=%u, len=%u", + __entry->_head, __entry->_tail, __entry->size, + __entry->space, __entry->len) +); + +#endif + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/xe +#define TRACE_INCLUDE_FILE xe_trace +#include diff --git a/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.c b/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.c new file mode 100644 index 000000000000..a0ba8bba84d1 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.c @@ -0,0 +1,130 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021-2022 Intel Corporation + * Copyright (C) 2021-2002 Red Hat + */ + +#include + +#include +#include +#include + +#include "xe_bo.h" +#include "xe_gt.h" +#include "xe_ttm_gtt_mgr.h" + +struct xe_ttm_gtt_node { + struct ttm_buffer_object *tbo; + struct ttm_range_mgr_node base; +}; + +static inline struct xe_ttm_gtt_mgr * +to_gtt_mgr(struct ttm_resource_manager *man) +{ + return container_of(man, struct xe_ttm_gtt_mgr, manager); +} + +static inline struct xe_ttm_gtt_node * +to_xe_ttm_gtt_node(struct ttm_resource *res) +{ + return container_of(res, struct xe_ttm_gtt_node, base.base); +} + +static int xe_ttm_gtt_mgr_new(struct ttm_resource_manager *man, + struct ttm_buffer_object *tbo, + const struct ttm_place *place, + struct ttm_resource **res) +{ + struct xe_ttm_gtt_node *node; + int r; + + node = kzalloc(struct_size(node, base.mm_nodes, 1), GFP_KERNEL); + if (!node) + return -ENOMEM; + + node->tbo = tbo; + ttm_resource_init(tbo, place, &node->base.base); + + if (!(place->flags & TTM_PL_FLAG_TEMPORARY) && + ttm_resource_manager_usage(man) > (man->size << PAGE_SHIFT)) { + r = -ENOSPC; + goto err_fini; + } + + node->base.mm_nodes[0].start = 0; + node->base.mm_nodes[0].size = PFN_UP(node->base.base.size); + node->base.base.start = XE_BO_INVALID_OFFSET; + + *res = &node->base.base; + + return 0; + +err_fini: + ttm_resource_fini(man, &node->base.base); + kfree(node); + return r; +} + +static void xe_ttm_gtt_mgr_del(struct ttm_resource_manager *man, + struct ttm_resource *res) +{ + struct xe_ttm_gtt_node *node = to_xe_ttm_gtt_node(res); + + ttm_resource_fini(man, res); + kfree(node); +} + +static void xe_ttm_gtt_mgr_debug(struct ttm_resource_manager *man, + struct drm_printer *printer) +{ + +} + +static const struct ttm_resource_manager_func xe_ttm_gtt_mgr_func = { + .alloc = xe_ttm_gtt_mgr_new, + .free = xe_ttm_gtt_mgr_del, + .debug = xe_ttm_gtt_mgr_debug +}; + +static void ttm_gtt_mgr_fini(struct drm_device *drm, void *arg) +{ + struct xe_ttm_gtt_mgr *mgr = arg; + struct xe_device *xe = gt_to_xe(mgr->gt); + struct ttm_resource_manager *man = &mgr->manager; + int err; + + ttm_resource_manager_set_used(man, false); + + err = ttm_resource_manager_evict_all(&xe->ttm, man); + if (err) + return; + + ttm_resource_manager_cleanup(man); + ttm_set_driver_manager(&xe->ttm, XE_PL_TT, NULL); +} + +int xe_ttm_gtt_mgr_init(struct xe_gt *gt, struct xe_ttm_gtt_mgr *mgr, + u64 gtt_size) +{ + struct xe_device *xe = gt_to_xe(gt); + struct ttm_resource_manager *man = &mgr->manager; + int err; + + XE_BUG_ON(xe_gt_is_media_type(gt)); + + mgr->gt = gt; + man->use_tt = true; + man->func = &xe_ttm_gtt_mgr_func; + + ttm_resource_manager_init(man, &xe->ttm, gtt_size >> PAGE_SHIFT); + + ttm_set_driver_manager(&xe->ttm, XE_PL_TT, &mgr->manager); + ttm_resource_manager_set_used(man, true); + + err = drmm_add_action_or_reset(&xe->drm, ttm_gtt_mgr_fini, mgr); + if (err) + return err; + + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.h b/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.h new file mode 100644 index 000000000000..d1d57cb9c2b8 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_TTGM_GTT_MGR_H_ +#define _XE_TTGM_GTT_MGR_H_ + +#include "xe_ttm_gtt_mgr_types.h" + +struct xe_gt; + +int xe_ttm_gtt_mgr_init(struct xe_gt *gt, struct xe_ttm_gtt_mgr *mgr, + u64 gtt_size); + +#endif diff --git a/drivers/gpu/drm/xe/xe_ttm_gtt_mgr_types.h b/drivers/gpu/drm/xe/xe_ttm_gtt_mgr_types.h new file mode 100644 index 000000000000..c66737488326 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_ttm_gtt_mgr_types.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_TTM_GTT_MGR_TYPES_H_ +#define _XE_TTM_GTT_MGR_TYPES_H_ + +#include + +struct xe_gt; + +struct xe_ttm_gtt_mgr { + struct xe_gt *gt; + struct ttm_resource_manager manager; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c new file mode 100644 index 000000000000..e391e81d3640 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c @@ -0,0 +1,403 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021-2022 Intel Corporation + * Copyright (C) 2021-2002 Red Hat + */ + +#include + +#include +#include + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_res_cursor.h" +#include "xe_ttm_vram_mgr.h" + +static inline struct xe_ttm_vram_mgr * +to_vram_mgr(struct ttm_resource_manager *man) +{ + return container_of(man, struct xe_ttm_vram_mgr, manager); +} + +static inline struct xe_gt * +mgr_to_gt(struct xe_ttm_vram_mgr *mgr) +{ + return mgr->gt; +} + +static inline struct drm_buddy_block * +xe_ttm_vram_mgr_first_block(struct list_head *list) +{ + return list_first_entry_or_null(list, struct drm_buddy_block, link); +} + +static inline bool xe_is_vram_mgr_blocks_contiguous(struct list_head *head) +{ + struct drm_buddy_block *block; + u64 start, size; + + block = xe_ttm_vram_mgr_first_block(head); + if (!block) + return false; + + while (head != block->link.next) { + start = xe_ttm_vram_mgr_block_start(block); + size = xe_ttm_vram_mgr_block_size(block); + + block = list_entry(block->link.next, struct drm_buddy_block, + link); + if (start + size != xe_ttm_vram_mgr_block_start(block)) + return false; + } + + return true; +} + +static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man, + struct ttm_buffer_object *tbo, + const struct ttm_place *place, + struct ttm_resource **res) +{ + u64 max_bytes, cur_size, min_block_size; + struct xe_ttm_vram_mgr *mgr = to_vram_mgr(man); + struct xe_ttm_vram_mgr_resource *vres; + u64 size, remaining_size, lpfn, fpfn; + struct drm_buddy *mm = &mgr->mm; + struct drm_buddy_block *block; + unsigned long pages_per_block; + int r; + + lpfn = (u64)place->lpfn << PAGE_SHIFT; + if (!lpfn) + lpfn = man->size; + + fpfn = (u64)place->fpfn << PAGE_SHIFT; + + max_bytes = mgr->gt->mem.vram.size; + if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { + pages_per_block = ~0ul; + } else { +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + pages_per_block = HPAGE_PMD_NR; +#else + /* default to 2MB */ + pages_per_block = 2UL << (20UL - PAGE_SHIFT); +#endif + + pages_per_block = max_t(uint32_t, pages_per_block, + tbo->page_alignment); + } + + vres = kzalloc(sizeof(*vres), GFP_KERNEL); + if (!vres) + return -ENOMEM; + + ttm_resource_init(tbo, place, &vres->base); + remaining_size = vres->base.size; + + /* bail out quickly if there's likely not enough VRAM for this BO */ + if (ttm_resource_manager_usage(man) > max_bytes) { + r = -ENOSPC; + goto error_fini; + } + + INIT_LIST_HEAD(&vres->blocks); + + if (place->flags & TTM_PL_FLAG_TOPDOWN) + vres->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION; + + if (fpfn || lpfn != man->size) + /* Allocate blocks in desired range */ + vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; + + mutex_lock(&mgr->lock); + while (remaining_size) { + if (tbo->page_alignment) + min_block_size = tbo->page_alignment << PAGE_SHIFT; + else + min_block_size = mgr->default_page_size; + + XE_BUG_ON(min_block_size < mm->chunk_size); + + /* Limit maximum size to 2GiB due to SG table limitations */ + size = min(remaining_size, 2ULL << 30); + + if (size >= pages_per_block << PAGE_SHIFT) + min_block_size = pages_per_block << PAGE_SHIFT; + + cur_size = size; + + if (fpfn + size != place->lpfn << PAGE_SHIFT) { + /* + * Except for actual range allocation, modify the size and + * min_block_size conforming to continuous flag enablement + */ + if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { + size = roundup_pow_of_two(size); + min_block_size = size; + /* + * Modify the size value if size is not + * aligned with min_block_size + */ + } else if (!IS_ALIGNED(size, min_block_size)) { + size = round_up(size, min_block_size); + } + } + + r = drm_buddy_alloc_blocks(mm, fpfn, + lpfn, + size, + min_block_size, + &vres->blocks, + vres->flags); + if (unlikely(r)) + goto error_free_blocks; + + if (size > remaining_size) + remaining_size = 0; + else + remaining_size -= size; + } + mutex_unlock(&mgr->lock); + + if (cur_size != size) { + struct drm_buddy_block *block; + struct list_head *trim_list; + u64 original_size; + LIST_HEAD(temp); + + trim_list = &vres->blocks; + original_size = vres->base.size; + + /* + * If size value is rounded up to min_block_size, trim the last + * block to the required size + */ + if (!list_is_singular(&vres->blocks)) { + block = list_last_entry(&vres->blocks, typeof(*block), link); + list_move_tail(&block->link, &temp); + trim_list = &temp; + /* + * Compute the original_size value by subtracting the + * last block size with (aligned size - original size) + */ + original_size = xe_ttm_vram_mgr_block_size(block) - + (size - cur_size); + } + + mutex_lock(&mgr->lock); + drm_buddy_block_trim(mm, + original_size, + trim_list); + mutex_unlock(&mgr->lock); + + if (!list_empty(&temp)) + list_splice_tail(trim_list, &vres->blocks); + } + + vres->base.start = 0; + list_for_each_entry(block, &vres->blocks, link) { + unsigned long start; + + start = xe_ttm_vram_mgr_block_start(block) + + xe_ttm_vram_mgr_block_size(block); + start >>= PAGE_SHIFT; + + if (start > PFN_UP(vres->base.size)) + start -= PFN_UP(vres->base.size); + else + start = 0; + vres->base.start = max(vres->base.start, start); + } + + if (xe_is_vram_mgr_blocks_contiguous(&vres->blocks)) + vres->base.placement |= TTM_PL_FLAG_CONTIGUOUS; + + *res = &vres->base; + return 0; + +error_free_blocks: + drm_buddy_free_list(mm, &vres->blocks); + mutex_unlock(&mgr->lock); +error_fini: + ttm_resource_fini(man, &vres->base); + kfree(vres); + + return r; +} + +static void xe_ttm_vram_mgr_del(struct ttm_resource_manager *man, + struct ttm_resource *res) +{ + struct xe_ttm_vram_mgr_resource *vres = + to_xe_ttm_vram_mgr_resource(res); + struct xe_ttm_vram_mgr *mgr = to_vram_mgr(man); + struct drm_buddy *mm = &mgr->mm; + + mutex_lock(&mgr->lock); + drm_buddy_free_list(mm, &vres->blocks); + mutex_unlock(&mgr->lock); + + ttm_resource_fini(man, res); + + kfree(vres); +} + +static void xe_ttm_vram_mgr_debug(struct ttm_resource_manager *man, + struct drm_printer *printer) +{ + struct xe_ttm_vram_mgr *mgr = to_vram_mgr(man); + struct drm_buddy *mm = &mgr->mm; + + mutex_lock(&mgr->lock); + drm_buddy_print(mm, printer); + mutex_unlock(&mgr->lock); + drm_printf(printer, "man size:%llu\n", man->size); +} + +static const struct ttm_resource_manager_func xe_ttm_vram_mgr_func = { + .alloc = xe_ttm_vram_mgr_new, + .free = xe_ttm_vram_mgr_del, + .debug = xe_ttm_vram_mgr_debug +}; + +static void ttm_vram_mgr_fini(struct drm_device *drm, void *arg) +{ + struct xe_ttm_vram_mgr *mgr = arg; + struct xe_device *xe = gt_to_xe(mgr->gt); + struct ttm_resource_manager *man = &mgr->manager; + int err; + + ttm_resource_manager_set_used(man, false); + + err = ttm_resource_manager_evict_all(&xe->ttm, man); + if (err) + return; + + drm_buddy_fini(&mgr->mm); + + ttm_resource_manager_cleanup(man); + ttm_set_driver_manager(&xe->ttm, XE_PL_VRAM0 + mgr->gt->info.vram_id, + NULL); +} + +int xe_ttm_vram_mgr_init(struct xe_gt *gt, struct xe_ttm_vram_mgr *mgr) +{ + struct xe_device *xe = gt_to_xe(gt); + struct ttm_resource_manager *man = &mgr->manager; + int err; + + XE_BUG_ON(xe_gt_is_media_type(gt)); + + mgr->gt = gt; + man->func = &xe_ttm_vram_mgr_func; + + ttm_resource_manager_init(man, &xe->ttm, gt->mem.vram.size); + err = drm_buddy_init(&mgr->mm, man->size, PAGE_SIZE); + if (err) + return err; + + mutex_init(&mgr->lock); + mgr->default_page_size = PAGE_SIZE; + + ttm_set_driver_manager(&xe->ttm, XE_PL_VRAM0 + gt->info.vram_id, + &mgr->manager); + ttm_resource_manager_set_used(man, true); + + err = drmm_add_action_or_reset(&xe->drm, ttm_vram_mgr_fini, mgr); + if (err) + return err; + + return 0; +} + +int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe, + struct ttm_resource *res, + u64 offset, u64 length, + struct device *dev, + enum dma_data_direction dir, + struct sg_table **sgt) +{ + struct xe_gt *gt = xe_device_get_gt(xe, res->mem_type - XE_PL_VRAM0); + struct xe_res_cursor cursor; + struct scatterlist *sg; + int num_entries = 0; + int i, r; + + *sgt = kmalloc(sizeof(**sgt), GFP_KERNEL); + if (!*sgt) + return -ENOMEM; + + /* Determine the number of DRM_BUDDY blocks to export */ + xe_res_first(res, offset, length, &cursor); + while (cursor.remaining) { + num_entries++; + xe_res_next(&cursor, cursor.size); + } + + r = sg_alloc_table(*sgt, num_entries, GFP_KERNEL); + if (r) + goto error_free; + + /* Initialize scatterlist nodes of sg_table */ + for_each_sgtable_sg((*sgt), sg, i) + sg->length = 0; + + /* + * Walk down DRM_BUDDY blocks to populate scatterlist nodes + * @note: Use iterator api to get first the DRM_BUDDY block + * and the number of bytes from it. Access the following + * DRM_BUDDY block(s) if more buffer needs to exported + */ + xe_res_first(res, offset, length, &cursor); + for_each_sgtable_sg((*sgt), sg, i) { + phys_addr_t phys = cursor.start + gt->mem.vram.io_start; + size_t size = cursor.size; + dma_addr_t addr; + + addr = dma_map_resource(dev, phys, size, dir, + DMA_ATTR_SKIP_CPU_SYNC); + r = dma_mapping_error(dev, addr); + if (r) + goto error_unmap; + + sg_set_page(sg, NULL, size, 0); + sg_dma_address(sg) = addr; + sg_dma_len(sg) = size; + + xe_res_next(&cursor, cursor.size); + } + + return 0; + +error_unmap: + for_each_sgtable_sg((*sgt), sg, i) { + if (!sg->length) + continue; + + dma_unmap_resource(dev, sg->dma_address, + sg->length, dir, + DMA_ATTR_SKIP_CPU_SYNC); + } + sg_free_table(*sgt); + +error_free: + kfree(*sgt); + return r; +} + +void xe_ttm_vram_mgr_free_sgt(struct device *dev, enum dma_data_direction dir, + struct sg_table *sgt) +{ + struct scatterlist *sg; + int i; + + for_each_sgtable_sg(sgt, sg, i) + dma_unmap_resource(dev, sg->dma_address, + sg->length, dir, + DMA_ATTR_SKIP_CPU_SYNC); + sg_free_table(sgt); + kfree(sgt); +} diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h new file mode 100644 index 000000000000..537fccec4318 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_TTM_VRAM_MGR_H_ +#define _XE_TTM_VRAM_MGR_H_ + +#include "xe_ttm_vram_mgr_types.h" + +enum dma_data_direction; +struct xe_device; +struct xe_gt; + +int xe_ttm_vram_mgr_init(struct xe_gt *gt, struct xe_ttm_vram_mgr *mgr); +int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe, + struct ttm_resource *res, + u64 offset, u64 length, + struct device *dev, + enum dma_data_direction dir, + struct sg_table **sgt); +void xe_ttm_vram_mgr_free_sgt(struct device *dev, enum dma_data_direction dir, + struct sg_table *sgt); + +static inline u64 xe_ttm_vram_mgr_block_start(struct drm_buddy_block *block) +{ + return drm_buddy_block_offset(block); +} + +static inline u64 xe_ttm_vram_mgr_block_size(struct drm_buddy_block *block) +{ + return PAGE_SIZE << drm_buddy_block_order(block); +} + +static inline struct xe_ttm_vram_mgr_resource * +to_xe_ttm_vram_mgr_resource(struct ttm_resource *res) +{ + return container_of(res, struct xe_ttm_vram_mgr_resource, base); +} + +#endif diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h new file mode 100644 index 000000000000..39b93c71c21b --- /dev/null +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_TTM_VRAM_MGR_TYPES_H_ +#define _XE_TTM_VRAM_MGR_TYPES_H_ + +#include +#include + +struct xe_gt; + +/** + * struct xe_ttm_vram_mgr - XE TTM VRAM manager + * + * Manages placement of TTM resource in VRAM. + */ +struct xe_ttm_vram_mgr { + /** @gt: Graphics tile which the VRAM belongs to */ + struct xe_gt *gt; + /** @manager: Base TTM resource manager */ + struct ttm_resource_manager manager; + /** @mm: DRM buddy allocator which manages the VRAM */ + struct drm_buddy mm; + /** @default_page_size: default page size */ + u64 default_page_size; + /** @lock: protects allocations of VRAM */ + struct mutex lock; +}; + +/** + * struct xe_ttm_vram_mgr_resource - XE TTM VRAM resource + */ +struct xe_ttm_vram_mgr_resource { + /** @base: Base TTM resource */ + struct ttm_resource base; + /** @blocks: list of DRM buddy blocks */ + struct list_head blocks; + /** @flags: flags associated with the resource */ + unsigned long flags; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c new file mode 100644 index 000000000000..e043db037368 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_wa.h" + +#include "xe_platform_types.h" +#include "xe_gt_types.h" +#include "xe_rtp.h" + +#include "gt/intel_gt_regs.h" + +#undef _MMIO +#undef MCR_REG +#define _MMIO(x) _XE_RTP_REG(x) +#define MCR_REG(x) _XE_RTP_MCR_REG(x) + +static const struct xe_rtp_entry gt_tunings[] = { + { XE_RTP_NAME("Tuning: 32B Access Enable"), + XE_RTP_RULES(PLATFORM(DG2)), + XE_RTP_SET(XEHP_SQCM, EN_32B_ACCESS) + }, + {} +}; + +static const struct xe_rtp_entry context_tunings[] = { + { XE_RTP_NAME("1604555607"), + XE_RTP_RULES(GRAPHICS_VERSION(1200)), + XE_RTP_FIELD_SET_NO_READ_MASK(XEHP_FF_MODE2, FF_MODE2_TDS_TIMER_MASK, + FF_MODE2_TDS_TIMER_128) + }, + {} +}; + +void xe_tuning_process_gt(struct xe_gt *gt) +{ + xe_rtp_process(gt_tunings, >->reg_sr, gt, NULL); +} diff --git a/drivers/gpu/drm/xe/xe_tuning.h b/drivers/gpu/drm/xe/xe_tuning.h new file mode 100644 index 000000000000..66dbc93192bd --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tuning.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_TUNING_ +#define _XE_TUNING_ + +struct xe_gt; + +void xe_tuning_process_gt(struct xe_gt *gt); + +#endif diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c new file mode 100644 index 000000000000..938d14698003 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -0,0 +1,226 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_device.h" +#include "xe_huc.h" +#include "xe_gt.h" +#include "xe_guc.h" +#include "xe_guc_pc.h" +#include "xe_guc_submit.h" +#include "xe_uc.h" +#include "xe_uc_fw.h" +#include "xe_wopcm.h" + +static struct xe_gt * +uc_to_gt(struct xe_uc *uc) +{ + return container_of(uc, struct xe_gt, uc); +} + +static struct xe_device * +uc_to_xe(struct xe_uc *uc) +{ + return gt_to_xe(uc_to_gt(uc)); +} + +/* Should be called once at driver load only */ +int xe_uc_init(struct xe_uc *uc) +{ + int ret; + + /* GuC submission not enabled, nothing to do */ + if (!xe_device_guc_submission_enabled(uc_to_xe(uc))) + return 0; + + ret = xe_guc_init(&uc->guc); + if (ret) + goto err; + + ret = xe_huc_init(&uc->huc); + if (ret) + goto err; + + ret = xe_wopcm_init(&uc->wopcm); + if (ret) + goto err; + + ret = xe_guc_submit_init(&uc->guc); + if (ret) + goto err; + + return 0; + +err: + /* If any uC firmwares not found, fall back to execlists */ + xe_device_guc_submission_disable(uc_to_xe(uc)); + + return ret; +} + +/** + * xe_uc_init_post_hwconfig - init Uc post hwconfig load + * @uc: The UC object + * + * Return: 0 on success, negative error code on error. + */ +int xe_uc_init_post_hwconfig(struct xe_uc *uc) +{ + /* GuC submission not enabled, nothing to do */ + if (!xe_device_guc_submission_enabled(uc_to_xe(uc))) + return 0; + + return xe_guc_init_post_hwconfig(&uc->guc); +} + +static int uc_reset(struct xe_uc *uc) +{ + struct xe_device *xe = uc_to_xe(uc); + int ret; + + ret = xe_guc_reset(&uc->guc); + if (ret) { + drm_err(&xe->drm, "Failed to reset GuC, ret = %d\n", ret); + return ret; + } + + return 0; +} + +static int uc_sanitize(struct xe_uc *uc) +{ + xe_huc_sanitize(&uc->huc); + xe_guc_sanitize(&uc->guc); + + return uc_reset(uc); +} + +/** + * xe_uc_init_hwconfig - minimally init Uc, read and parse hwconfig + * @uc: The UC object + * + * Return: 0 on success, negative error code on error. + */ +int xe_uc_init_hwconfig(struct xe_uc *uc) +{ + int ret; + + /* GuC submission not enabled, nothing to do */ + if (!xe_device_guc_submission_enabled(uc_to_xe(uc))) + return 0; + + ret = xe_guc_min_load_for_hwconfig(&uc->guc); + if (ret) + return ret; + + return 0; +} + +/* + * Should be called during driver load, after every GT reset, and after every + * suspend to reload / auth the firmwares. + */ +int xe_uc_init_hw(struct xe_uc *uc) +{ + int ret; + + /* GuC submission not enabled, nothing to do */ + if (!xe_device_guc_submission_enabled(uc_to_xe(uc))) + return 0; + + ret = uc_sanitize(uc); + if (ret) + return ret; + + ret = xe_huc_upload(&uc->huc); + if (ret) + return ret; + + ret = xe_guc_upload(&uc->guc); + if (ret) + return ret; + + ret = xe_guc_enable_communication(&uc->guc); + if (ret) + return ret; + + ret = xe_gt_record_default_lrcs(uc_to_gt(uc)); + if (ret) + return ret; + + ret = xe_guc_post_load_init(&uc->guc); + if (ret) + return ret; + + ret = xe_guc_pc_start(&uc->guc.pc); + if (ret) + return ret; + + /* We don't fail the driver load if HuC fails to auth, but let's warn */ + ret = xe_huc_auth(&uc->huc); + XE_WARN_ON(ret); + + return 0; +} + +int xe_uc_reset_prepare(struct xe_uc *uc) +{ + /* GuC submission not enabled, nothing to do */ + if (!xe_device_guc_submission_enabled(uc_to_xe(uc))) + return 0; + + return xe_guc_reset_prepare(&uc->guc); +} + +void xe_uc_stop_prepare(struct xe_uc *uc) +{ + xe_guc_stop_prepare(&uc->guc); +} + +int xe_uc_stop(struct xe_uc *uc) +{ + /* GuC submission not enabled, nothing to do */ + if (!xe_device_guc_submission_enabled(uc_to_xe(uc))) + return 0; + + return xe_guc_stop(&uc->guc); +} + +int xe_uc_start(struct xe_uc *uc) +{ + /* GuC submission not enabled, nothing to do */ + if (!xe_device_guc_submission_enabled(uc_to_xe(uc))) + return 0; + + return xe_guc_start(&uc->guc); +} + +static void uc_reset_wait(struct xe_uc *uc) +{ + int ret; + +again: + xe_guc_reset_wait(&uc->guc); + + ret = xe_uc_reset_prepare(uc); + if (ret) + goto again; +} + +int xe_uc_suspend(struct xe_uc *uc) +{ + int ret; + + /* GuC submission not enabled, nothing to do */ + if (!xe_device_guc_submission_enabled(uc_to_xe(uc))) + return 0; + + uc_reset_wait(uc); + + ret = xe_uc_stop(uc); + if (ret) + return ret; + + return xe_guc_suspend(&uc->guc); +} diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h new file mode 100644 index 000000000000..380e722f95fc --- /dev/null +++ b/drivers/gpu/drm/xe/xe_uc.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_UC_H_ +#define _XE_UC_H_ + +#include "xe_uc_types.h" + +int xe_uc_init(struct xe_uc *uc); +int xe_uc_init_hwconfig(struct xe_uc *uc); +int xe_uc_init_post_hwconfig(struct xe_uc *uc); +int xe_uc_init_hw(struct xe_uc *uc); +int xe_uc_reset_prepare(struct xe_uc *uc); +void xe_uc_stop_prepare(struct xe_uc *uc); +int xe_uc_stop(struct xe_uc *uc); +int xe_uc_start(struct xe_uc *uc); +int xe_uc_suspend(struct xe_uc *uc); + +#endif diff --git a/drivers/gpu/drm/xe/xe_uc_debugfs.c b/drivers/gpu/drm/xe/xe_uc_debugfs.c new file mode 100644 index 000000000000..0a39ec5a6e99 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_uc_debugfs.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include + +#include "xe_gt.h" +#include "xe_guc_debugfs.h" +#include "xe_huc_debugfs.h" +#include "xe_macros.h" +#include "xe_uc_debugfs.h" + +void xe_uc_debugfs_register(struct xe_uc *uc, struct dentry *parent) +{ + struct dentry *root; + + root = debugfs_create_dir("uc", parent); + if (IS_ERR(root)) { + XE_WARN_ON("Create UC directory failed"); + return; + } + + xe_guc_debugfs_register(&uc->guc, root); + xe_huc_debugfs_register(&uc->huc, root); +} diff --git a/drivers/gpu/drm/xe/xe_uc_debugfs.h b/drivers/gpu/drm/xe/xe_uc_debugfs.h new file mode 100644 index 000000000000..a13382df2bd7 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_uc_debugfs.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_UC_DEBUGFS_H_ +#define _XE_UC_DEBUGFS_H_ + +struct dentry; +struct xe_uc; + +void xe_uc_debugfs_register(struct xe_uc *uc, struct dentry *parent); + +#endif diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c new file mode 100644 index 000000000000..86c47b7f0901 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -0,0 +1,406 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include +#include + +#include + +#include "xe_bo.h" +#include "xe_device_types.h" +#include "xe_force_wake.h" +#include "xe_gt.h" +#include "xe_guc_reg.h" +#include "xe_map.h" +#include "xe_mmio.h" +#include "xe_uc_fw.h" + +static struct xe_gt * +__uc_fw_to_gt(struct xe_uc_fw *uc_fw, enum xe_uc_fw_type type) +{ + if (type == XE_UC_FW_TYPE_GUC) + return container_of(uc_fw, struct xe_gt, uc.guc.fw); + + XE_BUG_ON(type != XE_UC_FW_TYPE_HUC); + return container_of(uc_fw, struct xe_gt, uc.huc.fw); +} + +static struct xe_gt *uc_fw_to_gt(struct xe_uc_fw *uc_fw) +{ + return __uc_fw_to_gt(uc_fw, uc_fw->type); +} + +static struct xe_device *uc_fw_to_xe(struct xe_uc_fw *uc_fw) +{ + return gt_to_xe(uc_fw_to_gt(uc_fw)); +} + +/* + * List of required GuC and HuC binaries per-platform. + * Must be ordered based on platform + revid, from newer to older. + */ +#define XE_GUC_FIRMWARE_DEFS(fw_def, guc_def) \ + fw_def(METEORLAKE, 0, guc_def(mtl, 70, 5, 2)) \ + fw_def(ALDERLAKE_P, 0, guc_def(adlp, 70, 5, 2)) \ + fw_def(ALDERLAKE_S, 0, guc_def(tgl, 70, 5, 2)) \ + fw_def(PVC, 0, guc_def(pvc, 70, 5, 2)) \ + fw_def(DG2, 0, guc_def(dg2, 70, 5, 2)) \ + fw_def(DG1, 0, guc_def(dg1, 70, 5, 2)) \ + fw_def(TIGERLAKE, 0, guc_def(tgl, 70, 5, 2)) + +#define XE_HUC_FIRMWARE_DEFS(fw_def, huc_def) \ + fw_def(DG1, 0, huc_def(dg1, 7, 9, 3)) \ + fw_def(TIGERLAKE, 0, huc_def(tgl, 7, 9, 3)) + +#define __MAKE_UC_FW_PATH_MAJOR(prefix_, name_, major_) \ + "xe/" \ + __stringify(prefix_) "_" name_ "_" \ + __stringify(major_) ".bin" + +#define __MAKE_UC_FW_PATH(prefix_, name_, major_, minor_, patch_) \ + "xe/" \ + __stringify(prefix_) name_ \ + __stringify(major_) "." \ + __stringify(minor_) "." \ + __stringify(patch_) ".bin" + +#define MAKE_GUC_FW_PATH(prefix_, major_, minor_, patch_) \ + __MAKE_UC_FW_PATH_MAJOR(prefix_, "guc", major_) + +#define MAKE_HUC_FW_PATH(prefix_, major_, minor_, bld_num_) \ + __MAKE_UC_FW_PATH(prefix_, "_huc_", major_, minor_, bld_num_) + +/* All blobs need to be declared via MODULE_FIRMWARE() */ +#define XE_UC_MODULE_FW(platform_, revid_, uc_) \ + MODULE_FIRMWARE(uc_); + +XE_GUC_FIRMWARE_DEFS(XE_UC_MODULE_FW, MAKE_GUC_FW_PATH) +XE_HUC_FIRMWARE_DEFS(XE_UC_MODULE_FW, MAKE_HUC_FW_PATH) + +/* The below structs and macros are used to iterate across the list of blobs */ +struct __packed uc_fw_blob { + u8 major; + u8 minor; + const char *path; +}; + +#define UC_FW_BLOB(major_, minor_, path_) \ + { .major = major_, .minor = minor_, .path = path_ } + +#define GUC_FW_BLOB(prefix_, major_, minor_, patch_) \ + UC_FW_BLOB(major_, minor_, \ + MAKE_GUC_FW_PATH(prefix_, major_, minor_, patch_)) + +#define HUC_FW_BLOB(prefix_, major_, minor_, bld_num_) \ + UC_FW_BLOB(major_, minor_, \ + MAKE_HUC_FW_PATH(prefix_, major_, minor_, bld_num_)) + +struct __packed uc_fw_platform_requirement { + enum xe_platform p; + u8 rev; /* first platform rev using this FW */ + const struct uc_fw_blob blob; +}; + +#define MAKE_FW_LIST(platform_, revid_, uc_) \ +{ \ + .p = XE_##platform_, \ + .rev = revid_, \ + .blob = uc_, \ +}, + +struct fw_blobs_by_type { + const struct uc_fw_platform_requirement *blobs; + u32 count; +}; + +static void +uc_fw_auto_select(struct xe_device *xe, struct xe_uc_fw *uc_fw) +{ + static const struct uc_fw_platform_requirement blobs_guc[] = { + XE_GUC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB) + }; + static const struct uc_fw_platform_requirement blobs_huc[] = { + XE_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB) + }; + static const struct fw_blobs_by_type blobs_all[XE_UC_FW_NUM_TYPES] = { + [XE_UC_FW_TYPE_GUC] = { blobs_guc, ARRAY_SIZE(blobs_guc) }, + [XE_UC_FW_TYPE_HUC] = { blobs_huc, ARRAY_SIZE(blobs_huc) }, + }; + static const struct uc_fw_platform_requirement *fw_blobs; + enum xe_platform p = xe->info.platform; + u32 fw_count; + u8 rev = xe->info.revid; + int i; + + XE_BUG_ON(uc_fw->type >= ARRAY_SIZE(blobs_all)); + fw_blobs = blobs_all[uc_fw->type].blobs; + fw_count = blobs_all[uc_fw->type].count; + + for (i = 0; i < fw_count && p <= fw_blobs[i].p; i++) { + if (p == fw_blobs[i].p && rev >= fw_blobs[i].rev) { + const struct uc_fw_blob *blob = &fw_blobs[i].blob; + + uc_fw->path = blob->path; + uc_fw->major_ver_wanted = blob->major; + uc_fw->minor_ver_wanted = blob->minor; + break; + } + } +} + +/** + * xe_uc_fw_copy_rsa - copy fw RSA to buffer + * + * @uc_fw: uC firmware + * @dst: dst buffer + * @max_len: max number of bytes to copy + * + * Return: number of copied bytes. + */ +size_t xe_uc_fw_copy_rsa(struct xe_uc_fw *uc_fw, void *dst, u32 max_len) +{ + struct xe_device *xe = uc_fw_to_xe(uc_fw); + u32 size = min_t(u32, uc_fw->rsa_size, max_len); + + XE_BUG_ON(size % 4); + XE_BUG_ON(!xe_uc_fw_is_available(uc_fw)); + + xe_map_memcpy_from(xe, dst, &uc_fw->bo->vmap, + xe_uc_fw_rsa_offset(uc_fw), size); + + return size; +} + +static void uc_fw_fini(struct drm_device *drm, void *arg) +{ + struct xe_uc_fw *uc_fw = arg; + + if (!xe_uc_fw_is_available(uc_fw)) + return; + + xe_bo_unpin_map_no_vm(uc_fw->bo); + xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_SELECTED); +} + +int xe_uc_fw_init(struct xe_uc_fw *uc_fw) +{ + struct xe_device *xe = uc_fw_to_xe(uc_fw); + struct xe_gt *gt = uc_fw_to_gt(uc_fw); + struct device *dev = xe->drm.dev; + const struct firmware *fw = NULL; + struct uc_css_header *css; + struct xe_bo *obj; + size_t size; + int err; + + /* + * we use FIRMWARE_UNINITIALIZED to detect checks against uc_fw->status + * before we're looked at the HW caps to see if we have uc support + */ + BUILD_BUG_ON(XE_UC_FIRMWARE_UNINITIALIZED); + XE_BUG_ON(uc_fw->status); + XE_BUG_ON(uc_fw->path); + + uc_fw_auto_select(xe, uc_fw); + xe_uc_fw_change_status(uc_fw, uc_fw->path ? *uc_fw->path ? + XE_UC_FIRMWARE_SELECTED : + XE_UC_FIRMWARE_DISABLED : + XE_UC_FIRMWARE_NOT_SUPPORTED); + + /* Transform no huc in the list into firmware disabled */ + if (uc_fw->type == XE_UC_FW_TYPE_HUC && !xe_uc_fw_is_supported(uc_fw)) { + xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_DISABLED); + err = -ENOPKG; + return err; + } + err = request_firmware(&fw, uc_fw->path, dev); + if (err) + goto fail; + + /* Check the size of the blob before examining buffer contents */ + if (unlikely(fw->size < sizeof(struct uc_css_header))) { + drm_warn(&xe->drm, "%s firmware %s: invalid size: %zu < %zu\n", + xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, + fw->size, sizeof(struct uc_css_header)); + err = -ENODATA; + goto fail; + } + + css = (struct uc_css_header *)fw->data; + + /* Check integrity of size values inside CSS header */ + size = (css->header_size_dw - css->key_size_dw - css->modulus_size_dw - + css->exponent_size_dw) * sizeof(u32); + if (unlikely(size != sizeof(struct uc_css_header))) { + drm_warn(&xe->drm, + "%s firmware %s: unexpected header size: %zu != %zu\n", + xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, + fw->size, sizeof(struct uc_css_header)); + err = -EPROTO; + goto fail; + } + + /* uCode size must calculated from other sizes */ + uc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32); + + /* now RSA */ + uc_fw->rsa_size = css->key_size_dw * sizeof(u32); + + /* At least, it should have header, uCode and RSA. Size of all three. */ + size = sizeof(struct uc_css_header) + uc_fw->ucode_size + + uc_fw->rsa_size; + if (unlikely(fw->size < size)) { + drm_warn(&xe->drm, "%s firmware %s: invalid size: %zu < %zu\n", + xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, + fw->size, size); + err = -ENOEXEC; + goto fail; + } + + /* Get version numbers from the CSS header */ + uc_fw->major_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, + css->sw_version); + uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MINOR, + css->sw_version); + + if (uc_fw->major_ver_found != uc_fw->major_ver_wanted || + uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) { + drm_notice(&xe->drm, "%s firmware %s: unexpected version: %u.%u != %u.%u\n", + xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, + uc_fw->major_ver_found, uc_fw->minor_ver_found, + uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); + if (!xe_uc_fw_is_overridden(uc_fw)) { + err = -ENOEXEC; + goto fail; + } + } + + if (uc_fw->type == XE_UC_FW_TYPE_GUC) + uc_fw->private_data_size = css->private_data_size; + + obj = xe_bo_create_from_data(xe, gt, fw->data, fw->size, + ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(gt) | + XE_BO_CREATE_GGTT_BIT); + if (IS_ERR(obj)) { + drm_notice(&xe->drm, "%s firmware %s: failed to create / populate bo", + xe_uc_fw_type_repr(uc_fw->type), uc_fw->path); + err = PTR_ERR(obj); + goto fail; + } + + uc_fw->bo = obj; + uc_fw->size = fw->size; + xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_AVAILABLE); + + release_firmware(fw); + + err = drmm_add_action_or_reset(&xe->drm, uc_fw_fini, uc_fw); + if (err) + return err; + + return 0; + +fail: + xe_uc_fw_change_status(uc_fw, err == -ENOENT ? + XE_UC_FIRMWARE_MISSING : + XE_UC_FIRMWARE_ERROR); + + drm_notice(&xe->drm, "%s firmware %s: fetch failed with error %d\n", + xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, err); + drm_info(&xe->drm, "%s firmware(s) can be downloaded from %s\n", + xe_uc_fw_type_repr(uc_fw->type), XE_UC_FIRMWARE_URL); + + release_firmware(fw); /* OK even if fw is NULL */ + return err; +} + +static u32 uc_fw_ggtt_offset(struct xe_uc_fw *uc_fw) +{ + return xe_bo_ggtt_addr(uc_fw->bo); +} + +static int uc_fw_xfer(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags) +{ + struct xe_device *xe = uc_fw_to_xe(uc_fw); + struct xe_gt *gt = uc_fw_to_gt(uc_fw); + u32 src_offset; + int ret; + + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); + + /* Set the source address for the uCode */ + src_offset = uc_fw_ggtt_offset(uc_fw); + xe_mmio_write32(gt, DMA_ADDR_0_LOW.reg, lower_32_bits(src_offset)); + xe_mmio_write32(gt, DMA_ADDR_0_HIGH.reg, upper_32_bits(src_offset)); + + /* Set the DMA destination */ + xe_mmio_write32(gt, DMA_ADDR_1_LOW.reg, offset); + xe_mmio_write32(gt, DMA_ADDR_1_HIGH.reg, DMA_ADDRESS_SPACE_WOPCM); + + /* + * Set the transfer size. The header plus uCode will be copied to WOPCM + * via DMA, excluding any other components + */ + xe_mmio_write32(gt, DMA_COPY_SIZE.reg, + sizeof(struct uc_css_header) + uc_fw->ucode_size); + + /* Start the DMA */ + xe_mmio_write32(gt, DMA_CTRL.reg, + _MASKED_BIT_ENABLE(dma_flags | START_DMA)); + + /* Wait for DMA to finish */ + ret = xe_mmio_wait32(gt, DMA_CTRL.reg, 0, START_DMA, 100); + if (ret) + drm_err(&xe->drm, "DMA for %s fw failed, DMA_CTRL=%u\n", + xe_uc_fw_type_repr(uc_fw->type), + xe_mmio_read32(gt, DMA_CTRL.reg)); + + /* Disable the bits once DMA is over */ + xe_mmio_write32(gt, DMA_CTRL.reg, _MASKED_BIT_DISABLE(dma_flags)); + + return ret; +} + +int xe_uc_fw_upload(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags) +{ + struct xe_device *xe = uc_fw_to_xe(uc_fw); + int err; + + /* make sure the status was cleared the last time we reset the uc */ + XE_BUG_ON(xe_uc_fw_is_loaded(uc_fw)); + + if (!xe_uc_fw_is_loadable(uc_fw)) + return -ENOEXEC; + + /* Call custom loader */ + err = uc_fw_xfer(uc_fw, offset, dma_flags); + if (err) + goto fail; + + xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_TRANSFERRED); + return 0; + +fail: + drm_err(&xe->drm, "Failed to load %s firmware %s (%d)\n", + xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, + err); + xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_LOAD_FAIL); + return err; +} + + +void xe_uc_fw_print(struct xe_uc_fw *uc_fw, struct drm_printer *p) +{ + drm_printf(p, "%s firmware: %s\n", + xe_uc_fw_type_repr(uc_fw->type), uc_fw->path); + drm_printf(p, "\tstatus: %s\n", + xe_uc_fw_status_repr(uc_fw->status)); + drm_printf(p, "\tversion: wanted %u.%u, found %u.%u\n", + uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted, + uc_fw->major_ver_found, uc_fw->minor_ver_found); + drm_printf(p, "\tuCode: %u bytes\n", uc_fw->ucode_size); + drm_printf(p, "\tRSA: %u bytes\n", uc_fw->rsa_size); +} diff --git a/drivers/gpu/drm/xe/xe_uc_fw.h b/drivers/gpu/drm/xe/xe_uc_fw.h new file mode 100644 index 000000000000..b0df5064b27d --- /dev/null +++ b/drivers/gpu/drm/xe/xe_uc_fw.h @@ -0,0 +1,180 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_UC_FW_H_ +#define _XE_UC_FW_H_ + +#include + +#include "xe_uc_fw_types.h" +#include "xe_uc_fw_abi.h" +#include "xe_macros.h" + +struct drm_printer; + +int xe_uc_fw_init(struct xe_uc_fw *uc_fw); +size_t xe_uc_fw_copy_rsa(struct xe_uc_fw *uc_fw, void *dst, u32 max_len); +int xe_uc_fw_upload(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags); +void xe_uc_fw_print(struct xe_uc_fw *uc_fw, struct drm_printer *p); + +static inline u32 xe_uc_fw_rsa_offset(struct xe_uc_fw *uc_fw) +{ + return sizeof(struct uc_css_header) + uc_fw->ucode_size; +} + +static inline void xe_uc_fw_change_status(struct xe_uc_fw *uc_fw, + enum xe_uc_fw_status status) +{ + uc_fw->__status = status; +} + +static inline +const char *xe_uc_fw_status_repr(enum xe_uc_fw_status status) +{ + switch (status) { + case XE_UC_FIRMWARE_NOT_SUPPORTED: + return "N/A"; + case XE_UC_FIRMWARE_UNINITIALIZED: + return "UNINITIALIZED"; + case XE_UC_FIRMWARE_DISABLED: + return "DISABLED"; + case XE_UC_FIRMWARE_SELECTED: + return "SELECTED"; + case XE_UC_FIRMWARE_MISSING: + return "MISSING"; + case XE_UC_FIRMWARE_ERROR: + return "ERROR"; + case XE_UC_FIRMWARE_AVAILABLE: + return "AVAILABLE"; + case XE_UC_FIRMWARE_INIT_FAIL: + return "INIT FAIL"; + case XE_UC_FIRMWARE_LOADABLE: + return "LOADABLE"; + case XE_UC_FIRMWARE_LOAD_FAIL: + return "LOAD FAIL"; + case XE_UC_FIRMWARE_TRANSFERRED: + return "TRANSFERRED"; + case XE_UC_FIRMWARE_RUNNING: + return "RUNNING"; + } + return ""; +} + +static inline int xe_uc_fw_status_to_error(enum xe_uc_fw_status status) +{ + switch (status) { + case XE_UC_FIRMWARE_NOT_SUPPORTED: + return -ENODEV; + case XE_UC_FIRMWARE_UNINITIALIZED: + return -EACCES; + case XE_UC_FIRMWARE_DISABLED: + return -EPERM; + case XE_UC_FIRMWARE_MISSING: + return -ENOENT; + case XE_UC_FIRMWARE_ERROR: + return -ENOEXEC; + case XE_UC_FIRMWARE_INIT_FAIL: + case XE_UC_FIRMWARE_LOAD_FAIL: + return -EIO; + case XE_UC_FIRMWARE_SELECTED: + return -ESTALE; + case XE_UC_FIRMWARE_AVAILABLE: + case XE_UC_FIRMWARE_LOADABLE: + case XE_UC_FIRMWARE_TRANSFERRED: + case XE_UC_FIRMWARE_RUNNING: + return 0; + } + return -EINVAL; +} + +static inline const char *xe_uc_fw_type_repr(enum xe_uc_fw_type type) +{ + switch (type) { + case XE_UC_FW_TYPE_GUC: + return "GuC"; + case XE_UC_FW_TYPE_HUC: + return "HuC"; + } + return "uC"; +} + +static inline enum xe_uc_fw_status +__xe_uc_fw_status(struct xe_uc_fw *uc_fw) +{ + /* shouldn't call this before checking hw/blob availability */ + XE_BUG_ON(uc_fw->status == XE_UC_FIRMWARE_UNINITIALIZED); + return uc_fw->status; +} + +static inline bool xe_uc_fw_is_supported(struct xe_uc_fw *uc_fw) +{ + return __xe_uc_fw_status(uc_fw) != XE_UC_FIRMWARE_NOT_SUPPORTED; +} + +static inline bool xe_uc_fw_is_enabled(struct xe_uc_fw *uc_fw) +{ + return __xe_uc_fw_status(uc_fw) > XE_UC_FIRMWARE_DISABLED; +} + +static inline bool xe_uc_fw_is_disabled(struct xe_uc_fw *uc_fw) +{ + return __xe_uc_fw_status(uc_fw) == XE_UC_FIRMWARE_DISABLED; +} + +static inline bool xe_uc_fw_is_available(struct xe_uc_fw *uc_fw) +{ + return __xe_uc_fw_status(uc_fw) >= XE_UC_FIRMWARE_AVAILABLE; +} + +static inline bool xe_uc_fw_is_loadable(struct xe_uc_fw *uc_fw) +{ + return __xe_uc_fw_status(uc_fw) >= XE_UC_FIRMWARE_LOADABLE; +} + +static inline bool xe_uc_fw_is_loaded(struct xe_uc_fw *uc_fw) +{ + return __xe_uc_fw_status(uc_fw) >= XE_UC_FIRMWARE_TRANSFERRED; +} + +static inline bool xe_uc_fw_is_running(struct xe_uc_fw *uc_fw) +{ + return __xe_uc_fw_status(uc_fw) == XE_UC_FIRMWARE_RUNNING; +} + +static inline bool xe_uc_fw_is_overridden(const struct xe_uc_fw *uc_fw) +{ + return uc_fw->user_overridden; +} + +static inline void xe_uc_fw_sanitize(struct xe_uc_fw *uc_fw) +{ + if (xe_uc_fw_is_loaded(uc_fw)) + xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_LOADABLE); +} + +static inline u32 __xe_uc_fw_get_upload_size(struct xe_uc_fw *uc_fw) +{ + return sizeof(struct uc_css_header) + uc_fw->ucode_size; +} + +/** + * xe_uc_fw_get_upload_size() - Get size of firmware needed to be uploaded. + * @uc_fw: uC firmware. + * + * Get the size of the firmware and header that will be uploaded to WOPCM. + * + * Return: Upload firmware size, or zero on firmware fetch failure. + */ +static inline u32 xe_uc_fw_get_upload_size(struct xe_uc_fw *uc_fw) +{ + if (!xe_uc_fw_is_available(uc_fw)) + return 0; + + return __xe_uc_fw_get_upload_size(uc_fw); +} + +#define XE_UC_FIRMWARE_URL "https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git/tree/xe" + +#endif diff --git a/drivers/gpu/drm/xe/xe_uc_fw_abi.h b/drivers/gpu/drm/xe/xe_uc_fw_abi.h new file mode 100644 index 000000000000..dafd26cb0c41 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_uc_fw_abi.h @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_UC_FW_ABI_H +#define _XE_UC_FW_ABI_H + +#include +#include + +/** + * DOC: Firmware Layout + * + * The GuC/HuC firmware layout looks like this:: + * + * +======================================================================+ + * | Firmware blob | + * +===============+===============+============+============+============+ + * | CSS header | uCode | RSA key | modulus | exponent | + * +===============+===============+============+============+============+ + * <-header size-> <---header size continued -----------> + * <--- size -----------------------------------------------------------> + * <-key size-> + * <-mod size-> + * <-exp size-> + * + * The firmware may or may not have modulus key and exponent data. The header, + * uCode and RSA signature are must-have components that will be used by driver. + * Length of each components, which is all in dwords, can be found in header. + * In the case that modulus and exponent are not present in fw, a.k.a truncated + * image, the length value still appears in header. + * + * Driver will do some basic fw size validation based on the following rules: + * + * 1. Header, uCode and RSA are must-have components. + * 2. All firmware components, if they present, are in the sequence illustrated + * in the layout table above. + * 3. Length info of each component can be found in header, in dwords. + * 4. Modulus and exponent key are not required by driver. They may not appear + * in fw. So driver will load a truncated firmware in this case. + */ + +struct uc_css_header { + u32 module_type; + /* + * header_size includes all non-uCode bits, including css_header, rsa + * key, modulus key and exponent data. + */ + u32 header_size_dw; + u32 header_version; + u32 module_id; + u32 module_vendor; + u32 date; +#define CSS_DATE_DAY (0xFF << 0) +#define CSS_DATE_MONTH (0xFF << 8) +#define CSS_DATE_YEAR (0xFFFF << 16) + u32 size_dw; /* uCode plus header_size_dw */ + u32 key_size_dw; + u32 modulus_size_dw; + u32 exponent_size_dw; + u32 time; +#define CSS_TIME_HOUR (0xFF << 0) +#define CSS_DATE_MIN (0xFF << 8) +#define CSS_DATE_SEC (0xFFFF << 16) + char username[8]; + char buildnumber[12]; + u32 sw_version; +#define CSS_SW_VERSION_UC_MAJOR (0xFF << 16) +#define CSS_SW_VERSION_UC_MINOR (0xFF << 8) +#define CSS_SW_VERSION_UC_PATCH (0xFF << 0) + u32 reserved0[13]; + union { + u32 private_data_size; /* only applies to GuC */ + u32 reserved1; + }; + u32 header_info; +} __packed; +static_assert(sizeof(struct uc_css_header) == 128); + +#endif diff --git a/drivers/gpu/drm/xe/xe_uc_fw_types.h b/drivers/gpu/drm/xe/xe_uc_fw_types.h new file mode 100644 index 000000000000..1cfd30a655df --- /dev/null +++ b/drivers/gpu/drm/xe/xe_uc_fw_types.h @@ -0,0 +1,112 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_UC_FW_TYPES_H_ +#define _XE_UC_FW_TYPES_H_ + +#include + +struct xe_bo; + +/* + * +------------+---------------------------------------------------+ + * | PHASE | FIRMWARE STATUS TRANSITIONS | + * +============+===================================================+ + * | | UNINITIALIZED | + * +------------+- / | \ -+ + * | | DISABLED <--/ | \--> NOT_SUPPORTED | + * | init_early | V | + * | | SELECTED | + * +------------+- / | \ -+ + * | | MISSING <--/ | \--> ERROR | + * | fetch | V | + * | | AVAILABLE | + * +------------+- | \ -+ + * | | | \--> INIT FAIL | + * | init | V | + * | | /------> LOADABLE <----<-----------\ | + * +------------+- \ / \ \ \ -+ + * | | LOAD FAIL <--< \--> TRANSFERRED \ | + * | upload | \ / \ / | + * | | \---------/ \--> RUNNING | + * +------------+---------------------------------------------------+ + */ + +/* + * FIXME: Ported from the i915 and this is state machine is way too complicated. + * Circle back and simplify this. + */ +enum xe_uc_fw_status { + XE_UC_FIRMWARE_NOT_SUPPORTED = -1, /* no uc HW */ + XE_UC_FIRMWARE_UNINITIALIZED = 0, /* used to catch checks done too early */ + XE_UC_FIRMWARE_DISABLED, /* disabled */ + XE_UC_FIRMWARE_SELECTED, /* selected the blob we want to load */ + XE_UC_FIRMWARE_MISSING, /* blob not found on the system */ + XE_UC_FIRMWARE_ERROR, /* invalid format or version */ + XE_UC_FIRMWARE_AVAILABLE, /* blob found and copied in mem */ + XE_UC_FIRMWARE_INIT_FAIL, /* failed to prepare fw objects for load */ + XE_UC_FIRMWARE_LOADABLE, /* all fw-required objects are ready */ + XE_UC_FIRMWARE_LOAD_FAIL, /* failed to xfer or init/auth the fw */ + XE_UC_FIRMWARE_TRANSFERRED, /* dma xfer done */ + XE_UC_FIRMWARE_RUNNING /* init/auth done */ +}; + +enum xe_uc_fw_type { + XE_UC_FW_TYPE_GUC = 0, + XE_UC_FW_TYPE_HUC +}; +#define XE_UC_FW_NUM_TYPES 2 + +/** + * struct xe_uc_fw - XE micro controller firmware + */ +struct xe_uc_fw { + /** @type: type uC firmware */ + enum xe_uc_fw_type type; + union { + /** @status: firmware load status */ + const enum xe_uc_fw_status status; + /** + * @__status: private firmware load status - only to be used + * by firmware laoding code + */ + enum xe_uc_fw_status __status; + }; + /** @path: path to uC firmware */ + const char *path; + /** @user_overridden: user provided path to uC firmware via modparam */ + bool user_overridden; + /** @size: size of uC firmware including css header */ + size_t size; + + /** @bo: XE BO for uC firmware */ + struct xe_bo *bo; + + /* + * The firmware build process will generate a version header file with + * major and minor version defined. The versions are built into CSS + * header of firmware. The xe kernel driver set the minimal firmware + * version required per platform. + */ + + /** @major_ver_wanted: major firmware version wanted by platform */ + u16 major_ver_wanted; + /** @minor_ver_wanted: minor firmware version wanted by platform */ + u16 minor_ver_wanted; + /** @major_ver_found: major version found in firmware blob */ + u16 major_ver_found; + /** @minor_ver_found: major version found in firmware blob */ + u16 minor_ver_found; + + /** @rsa_size: RSA size */ + u32 rsa_size; + /** @ucode_size: micro kernel size */ + u32 ucode_size; + + /** @private_data_size: size of private data found in uC css header */ + u32 private_data_size; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_uc_types.h b/drivers/gpu/drm/xe/xe_uc_types.h new file mode 100644 index 000000000000..49bef6498b85 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_uc_types.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_UC_TYPES_H_ +#define _XE_UC_TYPES_H_ + +#include "xe_guc_types.h" +#include "xe_huc_types.h" +#include "xe_wopcm_types.h" + +/** + * struct xe_uc - XE micro controllers + */ +struct xe_uc { + /** @guc: Graphics micro controller */ + struct xe_guc guc; + /** @huc: HuC */ + struct xe_huc huc; + /** @wopcm: WOPCM */ + struct xe_wopcm wopcm; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c new file mode 100644 index 000000000000..d47a8617c5b6 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -0,0 +1,3407 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "xe_vm.h" + +#include + +#include +#include +#include +#include +#include +#include + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_engine.h" +#include "xe_gt.h" +#include "xe_gt_pagefault.h" +#include "xe_migrate.h" +#include "xe_pm.h" +#include "xe_preempt_fence.h" +#include "xe_pt.h" +#include "xe_res_cursor.h" +#include "xe_trace.h" +#include "xe_sync.h" + +#define TEST_VM_ASYNC_OPS_ERROR + +/** + * xe_vma_userptr_check_repin() - Advisory check for repin needed + * @vma: The userptr vma + * + * Check if the userptr vma has been invalidated since last successful + * repin. The check is advisory only and can the function can be called + * without the vm->userptr.notifier_lock held. There is no guarantee that the + * vma userptr will remain valid after a lockless check, so typically + * the call needs to be followed by a proper check under the notifier_lock. + * + * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended. + */ +int xe_vma_userptr_check_repin(struct xe_vma *vma) +{ + return mmu_interval_check_retry(&vma->userptr.notifier, + vma->userptr.notifier_seq) ? + -EAGAIN : 0; +} + +int xe_vma_userptr_pin_pages(struct xe_vma *vma) +{ + struct xe_vm *vm = vma->vm; + struct xe_device *xe = vm->xe; + const unsigned long num_pages = + (vma->end - vma->start + 1) >> PAGE_SHIFT; + struct page **pages; + bool in_kthread = !current->mm; + unsigned long notifier_seq; + int pinned, ret, i; + bool read_only = vma->pte_flags & PTE_READ_ONLY; + + lockdep_assert_held(&vm->lock); + XE_BUG_ON(!xe_vma_is_userptr(vma)); +retry: + if (vma->destroyed) + return 0; + + notifier_seq = mmu_interval_read_begin(&vma->userptr.notifier); + if (notifier_seq == vma->userptr.notifier_seq) + return 0; + + pages = kvmalloc_array(num_pages, sizeof(*pages), GFP_KERNEL); + if (!pages) + return -ENOMEM; + + if (vma->userptr.sg) { + dma_unmap_sgtable(xe->drm.dev, + vma->userptr.sg, + read_only ? DMA_TO_DEVICE : + DMA_BIDIRECTIONAL, 0); + sg_free_table(vma->userptr.sg); + vma->userptr.sg = NULL; + } + + pinned = ret = 0; + if (in_kthread) { + if (!mmget_not_zero(vma->userptr.notifier.mm)) { + ret = -EFAULT; + goto mm_closed; + } + kthread_use_mm(vma->userptr.notifier.mm); + } + + while (pinned < num_pages) { + ret = get_user_pages_fast(vma->userptr.ptr + pinned * PAGE_SIZE, + num_pages - pinned, + read_only ? 0 : FOLL_WRITE, + &pages[pinned]); + if (ret < 0) { + if (in_kthread) + ret = 0; + break; + } + + pinned += ret; + ret = 0; + } + + if (in_kthread) { + kthread_unuse_mm(vma->userptr.notifier.mm); + mmput(vma->userptr.notifier.mm); + } +mm_closed: + if (ret) + goto out; + + ret = sg_alloc_table_from_pages(&vma->userptr.sgt, pages, pinned, + 0, (u64)pinned << PAGE_SHIFT, + GFP_KERNEL); + if (ret) { + vma->userptr.sg = NULL; + goto out; + } + vma->userptr.sg = &vma->userptr.sgt; + + ret = dma_map_sgtable(xe->drm.dev, vma->userptr.sg, + read_only ? DMA_TO_DEVICE : + DMA_BIDIRECTIONAL, + DMA_ATTR_SKIP_CPU_SYNC | + DMA_ATTR_NO_KERNEL_MAPPING); + if (ret) { + sg_free_table(vma->userptr.sg); + vma->userptr.sg = NULL; + goto out; + } + + for (i = 0; i < pinned; ++i) { + if (!read_only) { + lock_page(pages[i]); + set_page_dirty(pages[i]); + unlock_page(pages[i]); + } + + mark_page_accessed(pages[i]); + } + +out: + release_pages(pages, pinned); + kvfree(pages); + + if (!(ret < 0)) { + vma->userptr.notifier_seq = notifier_seq; + if (xe_vma_userptr_check_repin(vma) == -EAGAIN) + goto retry; + } + + return ret < 0 ? ret : 0; +} + +static bool preempt_fences_waiting(struct xe_vm *vm) +{ + struct xe_engine *e; + + lockdep_assert_held(&vm->lock); + xe_vm_assert_held(vm); + + list_for_each_entry(e, &vm->preempt.engines, compute.link) { + if (!e->compute.pfence || (e->compute.pfence && + test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, + &e->compute.pfence->flags))) { + return true; + } + } + + return false; +} + +static void free_preempt_fences(struct list_head *list) +{ + struct list_head *link, *next; + + list_for_each_safe(link, next, list) + xe_preempt_fence_free(to_preempt_fence_from_link(link)); +} + +static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list, + unsigned int *count) +{ + lockdep_assert_held(&vm->lock); + xe_vm_assert_held(vm); + + if (*count >= vm->preempt.num_engines) + return 0; + + for (; *count < vm->preempt.num_engines; ++(*count)) { + struct xe_preempt_fence *pfence = xe_preempt_fence_alloc(); + + if (IS_ERR(pfence)) + return PTR_ERR(pfence); + + list_move_tail(xe_preempt_fence_link(pfence), list); + } + + return 0; +} + +static int wait_for_existing_preempt_fences(struct xe_vm *vm) +{ + struct xe_engine *e; + + xe_vm_assert_held(vm); + + list_for_each_entry(e, &vm->preempt.engines, compute.link) { + if (e->compute.pfence) { + long timeout = dma_fence_wait(e->compute.pfence, false); + + if (timeout < 0) + return -ETIME; + dma_fence_put(e->compute.pfence); + e->compute.pfence = NULL; + } + } + + return 0; +} + +static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list) +{ + struct list_head *link; + struct xe_engine *e; + + list_for_each_entry(e, &vm->preempt.engines, compute.link) { + struct dma_fence *fence; + + link = list->next; + XE_BUG_ON(link == list); + + fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link), + e, e->compute.context, + ++e->compute.seqno); + dma_fence_put(e->compute.pfence); + e->compute.pfence = fence; + } +} + +static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo) +{ + struct xe_engine *e; + struct ww_acquire_ctx ww; + int err; + + err = xe_bo_lock(bo, &ww, vm->preempt.num_engines, true); + if (err) + return err; + + list_for_each_entry(e, &vm->preempt.engines, compute.link) + if (e->compute.pfence) { + dma_resv_add_fence(bo->ttm.base.resv, + e->compute.pfence, + DMA_RESV_USAGE_BOOKKEEP); + } + + xe_bo_unlock(bo, &ww); + return 0; +} + +/** + * xe_vm_fence_all_extobjs() - Add a fence to vm's external objects' resv + * @vm: The vm. + * @fence: The fence to add. + * @usage: The resv usage for the fence. + * + * Loops over all of the vm's external object bindings and adds a @fence + * with the given @usage to all of the external object's reservation + * objects. + */ +void xe_vm_fence_all_extobjs(struct xe_vm *vm, struct dma_fence *fence, + enum dma_resv_usage usage) +{ + struct xe_vma *vma; + + list_for_each_entry(vma, &vm->extobj.list, extobj.link) + dma_resv_add_fence(vma->bo->ttm.base.resv, fence, usage); +} + +static void resume_and_reinstall_preempt_fences(struct xe_vm *vm) +{ + struct xe_engine *e; + + lockdep_assert_held(&vm->lock); + xe_vm_assert_held(vm); + + list_for_each_entry(e, &vm->preempt.engines, compute.link) { + e->ops->resume(e); + + dma_resv_add_fence(&vm->resv, e->compute.pfence, + DMA_RESV_USAGE_BOOKKEEP); + xe_vm_fence_all_extobjs(vm, e->compute.pfence, + DMA_RESV_USAGE_BOOKKEEP); + } +} + +int xe_vm_add_compute_engine(struct xe_vm *vm, struct xe_engine *e) +{ + struct ttm_validate_buffer tv_onstack[XE_ONSTACK_TV]; + struct ttm_validate_buffer *tv; + struct ww_acquire_ctx ww; + struct list_head objs; + struct dma_fence *pfence; + int err; + bool wait; + + XE_BUG_ON(!xe_vm_in_compute_mode(vm)); + + down_write(&vm->lock); + + err = xe_vm_lock_dma_resv(vm, &ww, tv_onstack, &tv, &objs, true, 1); + if (err) + goto out_unlock_outer; + + pfence = xe_preempt_fence_create(e, e->compute.context, + ++e->compute.seqno); + if (!pfence) { + err = -ENOMEM; + goto out_unlock; + } + + list_add(&e->compute.link, &vm->preempt.engines); + ++vm->preempt.num_engines; + e->compute.pfence = pfence; + + down_read(&vm->userptr.notifier_lock); + + dma_resv_add_fence(&vm->resv, pfence, + DMA_RESV_USAGE_BOOKKEEP); + + xe_vm_fence_all_extobjs(vm, pfence, DMA_RESV_USAGE_BOOKKEEP); + + /* + * Check to see if a preemption on VM is in flight or userptr + * invalidation, if so trigger this preempt fence to sync state with + * other preempt fences on the VM. + */ + wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm); + if (wait) + dma_fence_enable_sw_signaling(pfence); + + up_read(&vm->userptr.notifier_lock); + +out_unlock: + xe_vm_unlock_dma_resv(vm, tv_onstack, tv, &ww, &objs); +out_unlock_outer: + up_write(&vm->lock); + + return err; +} + +/** + * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs + * that need repinning. + * @vm: The VM. + * + * This function checks for whether the VM has userptrs that need repinning, + * and provides a release-type barrier on the userptr.notifier_lock after + * checking. + * + * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are. + */ +int __xe_vm_userptr_needs_repin(struct xe_vm *vm) +{ + lockdep_assert_held_read(&vm->userptr.notifier_lock); + + return (list_empty(&vm->userptr.repin_list) && + list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN; +} + +/** + * xe_vm_lock_dma_resv() - Lock the vm dma_resv object and the dma_resv + * objects of the vm's external buffer objects. + * @vm: The vm. + * @ww: Pointer to a struct ww_acquire_ctx locking context. + * @tv_onstack: Array size XE_ONSTACK_TV of storage for the struct + * ttm_validate_buffers used for locking. + * @tv: Pointer to a pointer that on output contains the actual storage used. + * @objs: List head for the buffer objects locked. + * @intr: Whether to lock interruptible. + * @num_shared: Number of dma-fence slots to reserve in the locked objects. + * + * Locks the vm dma-resv objects and all the dma-resv objects of the + * buffer objects on the vm external object list. The TTM utilities require + * a list of struct ttm_validate_buffers pointing to the actual buffer + * objects to lock. Storage for those struct ttm_validate_buffers should + * be provided in @tv_onstack, and is typically reserved on the stack + * of the caller. If the size of @tv_onstack isn't sufficient, then + * storage will be allocated internally using kvmalloc(). + * + * The function performs deadlock handling internally, and after a + * successful return the ww locking transaction should be considered + * sealed. + * + * Return: 0 on success, Negative error code on error. In particular if + * @intr is set to true, -EINTR or -ERESTARTSYS may be returned. In case + * of error, any locking performed has been reverted. + */ +int xe_vm_lock_dma_resv(struct xe_vm *vm, struct ww_acquire_ctx *ww, + struct ttm_validate_buffer *tv_onstack, + struct ttm_validate_buffer **tv, + struct list_head *objs, + bool intr, + unsigned int num_shared) +{ + struct ttm_validate_buffer *tv_vm, *tv_bo; + struct xe_vma *vma, *next; + LIST_HEAD(dups); + int err; + + lockdep_assert_held(&vm->lock); + + if (vm->extobj.entries < XE_ONSTACK_TV) { + tv_vm = tv_onstack; + } else { + tv_vm = kvmalloc_array(vm->extobj.entries + 1, sizeof(*tv_vm), + GFP_KERNEL); + if (!tv_vm) + return -ENOMEM; + } + tv_bo = tv_vm + 1; + + INIT_LIST_HEAD(objs); + list_for_each_entry(vma, &vm->extobj.list, extobj.link) { + tv_bo->num_shared = num_shared; + tv_bo->bo = &vma->bo->ttm; + + list_add_tail(&tv_bo->head, objs); + tv_bo++; + } + tv_vm->num_shared = num_shared; + tv_vm->bo = xe_vm_ttm_bo(vm); + list_add_tail(&tv_vm->head, objs); + err = ttm_eu_reserve_buffers(ww, objs, intr, &dups); + if (err) + goto out_err; + + spin_lock(&vm->notifier.list_lock); + list_for_each_entry_safe(vma, next, &vm->notifier.rebind_list, + notifier.rebind_link) { + xe_bo_assert_held(vma->bo); + + list_del_init(&vma->notifier.rebind_link); + if (vma->gt_present && !vma->destroyed) + list_move_tail(&vma->rebind_link, &vm->rebind_list); + } + spin_unlock(&vm->notifier.list_lock); + + *tv = tv_vm; + return 0; + +out_err: + if (tv_vm != tv_onstack) + kvfree(tv_vm); + + return err; +} + +/** + * xe_vm_unlock_dma_resv() - Unlock reservation objects locked by + * xe_vm_lock_dma_resv() + * @vm: The vm. + * @tv_onstack: The @tv_onstack array given to xe_vm_lock_dma_resv(). + * @tv: The value of *@tv given by xe_vm_lock_dma_resv(). + * @ww: The ww_acquire_context used for locking. + * @objs: The list returned from xe_vm_lock_dma_resv(). + * + * Unlocks the reservation objects and frees any memory allocated by + * xe_vm_lock_dma_resv(). + */ +void xe_vm_unlock_dma_resv(struct xe_vm *vm, + struct ttm_validate_buffer *tv_onstack, + struct ttm_validate_buffer *tv, + struct ww_acquire_ctx *ww, + struct list_head *objs) +{ + /* + * Nothing should've been able to enter the list while we were locked, + * since we've held the dma-resvs of all the vm's external objects, + * and holding the dma_resv of an object is required for list + * addition, and we shouldn't add ourselves. + */ + XE_WARN_ON(!list_empty(&vm->notifier.rebind_list)); + + ttm_eu_backoff_reservation(ww, objs); + if (tv && tv != tv_onstack) + kvfree(tv); +} + +static void preempt_rebind_work_func(struct work_struct *w) +{ + struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work); + struct xe_vma *vma; + struct ttm_validate_buffer tv_onstack[XE_ONSTACK_TV]; + struct ttm_validate_buffer *tv; + struct ww_acquire_ctx ww; + struct list_head objs; + struct dma_fence *rebind_fence; + unsigned int fence_count = 0; + LIST_HEAD(preempt_fences); + int err; + long wait; + int __maybe_unused tries = 0; + + XE_BUG_ON(!xe_vm_in_compute_mode(vm)); + trace_xe_vm_rebind_worker_enter(vm); + + if (xe_vm_is_closed(vm)) { + trace_xe_vm_rebind_worker_exit(vm); + return; + } + + down_write(&vm->lock); + +retry: + if (vm->async_ops.error) + goto out_unlock_outer; + + /* + * Extreme corner where we exit a VM error state with a munmap style VM + * unbind inflight which requires a rebind. In this case the rebind + * needs to install some fences into the dma-resv slots. The worker to + * do this queued, let that worker make progress by dropping vm->lock + * and trying this again. + */ + if (vm->async_ops.munmap_rebind_inflight) { + up_write(&vm->lock); + flush_work(&vm->async_ops.work); + goto retry; + } + + if (xe_vm_userptr_check_repin(vm)) { + err = xe_vm_userptr_pin(vm); + if (err) + goto out_unlock_outer; + } + + err = xe_vm_lock_dma_resv(vm, &ww, tv_onstack, &tv, &objs, + false, vm->preempt.num_engines); + if (err) + goto out_unlock_outer; + + /* Fresh preempt fences already installed. Everyting is running. */ + if (!preempt_fences_waiting(vm)) + goto out_unlock; + + /* + * This makes sure vm is completely suspended and also balances + * xe_engine suspend- and resume; we resume *all* vm engines below. + */ + err = wait_for_existing_preempt_fences(vm); + if (err) + goto out_unlock; + + err = alloc_preempt_fences(vm, &preempt_fences, &fence_count); + if (err) + goto out_unlock; + + list_for_each_entry(vma, &vm->rebind_list, rebind_link) { + if (xe_vma_is_userptr(vma) || vma->destroyed) + continue; + + err = xe_bo_validate(vma->bo, vm, false); + if (err) + goto out_unlock; + } + + rebind_fence = xe_vm_rebind(vm, true); + if (IS_ERR(rebind_fence)) { + err = PTR_ERR(rebind_fence); + goto out_unlock; + } + + if (rebind_fence) { + dma_fence_wait(rebind_fence, false); + dma_fence_put(rebind_fence); + } + + /* Wait on munmap style VM unbinds */ + wait = dma_resv_wait_timeout(&vm->resv, + DMA_RESV_USAGE_KERNEL, + false, MAX_SCHEDULE_TIMEOUT); + if (wait <= 0) { + err = -ETIME; + goto out_unlock; + } + +#define retry_required(__tries, __vm) \ + (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \ + (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \ + __xe_vm_userptr_needs_repin(__vm)) + + down_read(&vm->userptr.notifier_lock); + if (retry_required(tries, vm)) { + up_read(&vm->userptr.notifier_lock); + err = -EAGAIN; + goto out_unlock; + } + +#undef retry_required + + /* Point of no return. */ + arm_preempt_fences(vm, &preempt_fences); + resume_and_reinstall_preempt_fences(vm); + up_read(&vm->userptr.notifier_lock); + +out_unlock: + xe_vm_unlock_dma_resv(vm, tv_onstack, tv, &ww, &objs); +out_unlock_outer: + if (err == -EAGAIN) { + trace_xe_vm_rebind_worker_retry(vm); + goto retry; + } + up_write(&vm->lock); + + free_preempt_fences(&preempt_fences); + + XE_WARN_ON(err < 0); /* TODO: Kill VM or put in error state */ + trace_xe_vm_rebind_worker_exit(vm); +} + +struct async_op_fence; +static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, + struct xe_engine *e, struct xe_sync_entry *syncs, + u32 num_syncs, struct async_op_fence *afence); + +static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq) +{ + struct xe_vma *vma = container_of(mni, struct xe_vma, userptr.notifier); + struct xe_vm *vm = vma->vm; + struct dma_resv_iter cursor; + struct dma_fence *fence; + long err; + + XE_BUG_ON(!xe_vma_is_userptr(vma)); + trace_xe_vma_userptr_invalidate(vma); + + if (!mmu_notifier_range_blockable(range)) + return false; + + down_write(&vm->userptr.notifier_lock); + mmu_interval_set_seq(mni, cur_seq); + + /* No need to stop gpu access if the userptr is not yet bound. */ + if (!vma->userptr.initial_bind) { + up_write(&vm->userptr.notifier_lock); + return true; + } + + /* + * Tell exec and rebind worker they need to repin and rebind this + * userptr. + */ + if (!xe_vm_in_fault_mode(vm) && !vma->destroyed && vma->gt_present) { + spin_lock(&vm->userptr.invalidated_lock); + list_move_tail(&vma->userptr.invalidate_link, + &vm->userptr.invalidated); + spin_unlock(&vm->userptr.invalidated_lock); + } + + up_write(&vm->userptr.notifier_lock); + + /* + * Preempt fences turn into schedule disables, pipeline these. + * Note that even in fault mode, we need to wait for binds and + * unbinds to complete, and those are attached as BOOKMARK fences + * to the vm. + */ + dma_resv_iter_begin(&cursor, &vm->resv, + DMA_RESV_USAGE_BOOKKEEP); + dma_resv_for_each_fence_unlocked(&cursor, fence) + dma_fence_enable_sw_signaling(fence); + dma_resv_iter_end(&cursor); + + err = dma_resv_wait_timeout(&vm->resv, + DMA_RESV_USAGE_BOOKKEEP, + false, MAX_SCHEDULE_TIMEOUT); + XE_WARN_ON(err <= 0); + + if (xe_vm_in_fault_mode(vm)) { + err = xe_vm_invalidate_vma(vma); + XE_WARN_ON(err); + } + + trace_xe_vma_userptr_invalidate_complete(vma); + + return true; +} + +static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = { + .invalidate = vma_userptr_invalidate, +}; + +int xe_vm_userptr_pin(struct xe_vm *vm) +{ + struct xe_vma *vma, *next; + int err = 0; + LIST_HEAD(tmp_evict); + + lockdep_assert_held_write(&vm->lock); + + /* Collect invalidated userptrs */ + spin_lock(&vm->userptr.invalidated_lock); + list_for_each_entry_safe(vma, next, &vm->userptr.invalidated, + userptr.invalidate_link) { + list_del_init(&vma->userptr.invalidate_link); + list_move_tail(&vma->userptr_link, &vm->userptr.repin_list); + } + spin_unlock(&vm->userptr.invalidated_lock); + + /* Pin and move to temporary list */ + list_for_each_entry_safe(vma, next, &vm->userptr.repin_list, userptr_link) { + err = xe_vma_userptr_pin_pages(vma); + if (err < 0) + goto out_err; + + list_move_tail(&vma->userptr_link, &tmp_evict); + } + + /* Take lock and move to rebind_list for rebinding. */ + err = dma_resv_lock_interruptible(&vm->resv, NULL); + if (err) + goto out_err; + + list_for_each_entry_safe(vma, next, &tmp_evict, userptr_link) { + list_del_init(&vma->userptr_link); + list_move_tail(&vma->rebind_link, &vm->rebind_list); + } + + dma_resv_unlock(&vm->resv); + + return 0; + +out_err: + list_splice_tail(&tmp_evict, &vm->userptr.repin_list); + + return err; +} + +/** + * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs + * that need repinning. + * @vm: The VM. + * + * This function does an advisory check for whether the VM has userptrs that + * need repinning. + * + * Return: 0 if there are no indications of userptrs needing repinning, + * -EAGAIN if there are. + */ +int xe_vm_userptr_check_repin(struct xe_vm *vm) +{ + return (list_empty_careful(&vm->userptr.repin_list) && + list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN; +} + +static struct dma_fence * +xe_vm_bind_vma(struct xe_vma *vma, struct xe_engine *e, + struct xe_sync_entry *syncs, u32 num_syncs); + +struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) +{ + struct dma_fence *fence = NULL; + struct xe_vma *vma, *next; + + lockdep_assert_held(&vm->lock); + if (xe_vm_no_dma_fences(vm) && !rebind_worker) + return NULL; + + xe_vm_assert_held(vm); + list_for_each_entry_safe(vma, next, &vm->rebind_list, rebind_link) { + XE_WARN_ON(!vma->gt_present); + + list_del_init(&vma->rebind_link); + dma_fence_put(fence); + if (rebind_worker) + trace_xe_vma_rebind_worker(vma); + else + trace_xe_vma_rebind_exec(vma); + fence = xe_vm_bind_vma(vma, NULL, NULL, 0); + if (IS_ERR(fence)) + return fence; + } + + return fence; +} + +static struct xe_vma *xe_vma_create(struct xe_vm *vm, + struct xe_bo *bo, + u64 bo_offset_or_userptr, + u64 start, u64 end, + bool read_only, + u64 gt_mask) +{ + struct xe_vma *vma; + struct xe_gt *gt; + u8 id; + + XE_BUG_ON(start >= end); + XE_BUG_ON(end >= vm->size); + + vma = kzalloc(sizeof(*vma), GFP_KERNEL); + if (!vma) { + vma = ERR_PTR(-ENOMEM); + return vma; + } + + INIT_LIST_HEAD(&vma->rebind_link); + INIT_LIST_HEAD(&vma->unbind_link); + INIT_LIST_HEAD(&vma->userptr_link); + INIT_LIST_HEAD(&vma->userptr.invalidate_link); + INIT_LIST_HEAD(&vma->notifier.rebind_link); + INIT_LIST_HEAD(&vma->extobj.link); + + vma->vm = vm; + vma->start = start; + vma->end = end; + if (read_only) + vma->pte_flags = PTE_READ_ONLY; + + if (gt_mask) { + vma->gt_mask = gt_mask; + } else { + for_each_gt(gt, vm->xe, id) + if (!xe_gt_is_media_type(gt)) + vma->gt_mask |= 0x1 << id; + } + + if (vm->xe->info.platform == XE_PVC) + vma->use_atomic_access_pte_bit = true; + + if (bo) { + xe_bo_assert_held(bo); + vma->bo_offset = bo_offset_or_userptr; + vma->bo = xe_bo_get(bo); + list_add_tail(&vma->bo_link, &bo->vmas); + } else /* userptr */ { + u64 size = end - start + 1; + int err; + + vma->userptr.ptr = bo_offset_or_userptr; + + err = mmu_interval_notifier_insert(&vma->userptr.notifier, + current->mm, + vma->userptr.ptr, size, + &vma_userptr_notifier_ops); + if (err) { + kfree(vma); + vma = ERR_PTR(err); + return vma; + } + + vma->userptr.notifier_seq = LONG_MAX; + xe_vm_get(vm); + } + + return vma; +} + +static bool vm_remove_extobj(struct xe_vma *vma) +{ + if (!list_empty(&vma->extobj.link)) { + vma->vm->extobj.entries--; + list_del_init(&vma->extobj.link); + return true; + } + return false; +} + +static void xe_vma_destroy_late(struct xe_vma *vma) +{ + struct xe_vm *vm = vma->vm; + struct xe_device *xe = vm->xe; + bool read_only = vma->pte_flags & PTE_READ_ONLY; + + if (xe_vma_is_userptr(vma)) { + if (vma->userptr.sg) { + dma_unmap_sgtable(xe->drm.dev, + vma->userptr.sg, + read_only ? DMA_TO_DEVICE : + DMA_BIDIRECTIONAL, 0); + sg_free_table(vma->userptr.sg); + vma->userptr.sg = NULL; + } + + /* + * Since userptr pages are not pinned, we can't remove + * the notifer until we're sure the GPU is not accessing + * them anymore + */ + mmu_interval_notifier_remove(&vma->userptr.notifier); + xe_vm_put(vm); + } else { + xe_bo_put(vma->bo); + } + + kfree(vma); +} + +static void vma_destroy_work_func(struct work_struct *w) +{ + struct xe_vma *vma = + container_of(w, struct xe_vma, destroy_work); + + xe_vma_destroy_late(vma); +} + +static struct xe_vma * +bo_has_vm_references_locked(struct xe_bo *bo, struct xe_vm *vm, + struct xe_vma *ignore) +{ + struct xe_vma *vma; + + list_for_each_entry(vma, &bo->vmas, bo_link) { + if (vma != ignore && vma->vm == vm && !vma->destroyed) + return vma; + } + + return NULL; +} + +static bool bo_has_vm_references(struct xe_bo *bo, struct xe_vm *vm, + struct xe_vma *ignore) +{ + struct ww_acquire_ctx ww; + bool ret; + + xe_bo_lock(bo, &ww, 0, false); + ret = !!bo_has_vm_references_locked(bo, vm, ignore); + xe_bo_unlock(bo, &ww); + + return ret; +} + +static void __vm_insert_extobj(struct xe_vm *vm, struct xe_vma *vma) +{ + list_add(&vma->extobj.link, &vm->extobj.list); + vm->extobj.entries++; +} + +static void vm_insert_extobj(struct xe_vm *vm, struct xe_vma *vma) +{ + struct xe_bo *bo = vma->bo; + + lockdep_assert_held_write(&vm->lock); + + if (bo_has_vm_references(bo, vm, vma)) + return; + + __vm_insert_extobj(vm, vma); +} + +static void vma_destroy_cb(struct dma_fence *fence, + struct dma_fence_cb *cb) +{ + struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb); + + INIT_WORK(&vma->destroy_work, vma_destroy_work_func); + queue_work(system_unbound_wq, &vma->destroy_work); +} + +static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) +{ + struct xe_vm *vm = vma->vm; + + lockdep_assert_held_write(&vm->lock); + XE_BUG_ON(!list_empty(&vma->unbind_link)); + + if (xe_vma_is_userptr(vma)) { + XE_WARN_ON(!vma->destroyed); + spin_lock(&vm->userptr.invalidated_lock); + list_del_init(&vma->userptr.invalidate_link); + spin_unlock(&vm->userptr.invalidated_lock); + list_del(&vma->userptr_link); + } else { + xe_bo_assert_held(vma->bo); + list_del(&vma->bo_link); + + spin_lock(&vm->notifier.list_lock); + list_del(&vma->notifier.rebind_link); + spin_unlock(&vm->notifier.list_lock); + + if (!vma->bo->vm && vm_remove_extobj(vma)) { + struct xe_vma *other; + + other = bo_has_vm_references_locked(vma->bo, vm, NULL); + + if (other) + __vm_insert_extobj(vm, other); + } + } + + xe_vm_assert_held(vm); + if (!list_empty(&vma->rebind_link)) + list_del(&vma->rebind_link); + + if (fence) { + int ret = dma_fence_add_callback(fence, &vma->destroy_cb, + vma_destroy_cb); + + if (ret) { + XE_WARN_ON(ret != -ENOENT); + xe_vma_destroy_late(vma); + } + } else { + xe_vma_destroy_late(vma); + } +} + +static void xe_vma_destroy_unlocked(struct xe_vma *vma) +{ + struct ttm_validate_buffer tv[2]; + struct ww_acquire_ctx ww; + struct xe_bo *bo = vma->bo; + LIST_HEAD(objs); + LIST_HEAD(dups); + int err; + + memset(tv, 0, sizeof(tv)); + tv[0].bo = xe_vm_ttm_bo(vma->vm); + list_add(&tv[0].head, &objs); + + if (bo) { + tv[1].bo = &xe_bo_get(bo)->ttm; + list_add(&tv[1].head, &objs); + } + err = ttm_eu_reserve_buffers(&ww, &objs, false, &dups); + XE_WARN_ON(err); + + xe_vma_destroy(vma, NULL); + + ttm_eu_backoff_reservation(&ww, &objs); + if (bo) + xe_bo_put(bo); +} + +static struct xe_vma *to_xe_vma(const struct rb_node *node) +{ + BUILD_BUG_ON(offsetof(struct xe_vma, vm_node) != 0); + return (struct xe_vma *)node; +} + +static int xe_vma_cmp(const struct xe_vma *a, const struct xe_vma *b) +{ + if (a->end < b->start) { + return -1; + } else if (b->end < a->start) { + return 1; + } else { + return 0; + } +} + +static bool xe_vma_less_cb(struct rb_node *a, const struct rb_node *b) +{ + return xe_vma_cmp(to_xe_vma(a), to_xe_vma(b)) < 0; +} + +int xe_vma_cmp_vma_cb(const void *key, const struct rb_node *node) +{ + struct xe_vma *cmp = to_xe_vma(node); + const struct xe_vma *own = key; + + if (own->start > cmp->end) + return 1; + + if (own->end < cmp->start) + return -1; + + return 0; +} + +struct xe_vma * +xe_vm_find_overlapping_vma(struct xe_vm *vm, const struct xe_vma *vma) +{ + struct rb_node *node; + + if (xe_vm_is_closed(vm)) + return NULL; + + XE_BUG_ON(vma->end >= vm->size); + lockdep_assert_held(&vm->lock); + + node = rb_find(vma, &vm->vmas, xe_vma_cmp_vma_cb); + + return node ? to_xe_vma(node) : NULL; +} + +static void xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma) +{ + XE_BUG_ON(vma->vm != vm); + lockdep_assert_held(&vm->lock); + + rb_add(&vma->vm_node, &vm->vmas, xe_vma_less_cb); +} + +static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma) +{ + XE_BUG_ON(vma->vm != vm); + lockdep_assert_held(&vm->lock); + + rb_erase(&vma->vm_node, &vm->vmas); + if (vm->usm.last_fault_vma == vma) + vm->usm.last_fault_vma = NULL; +} + +static void async_op_work_func(struct work_struct *w); +static void vm_destroy_work_func(struct work_struct *w); + +struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) +{ + struct xe_vm *vm; + int err, i = 0, number_gts = 0; + struct xe_gt *gt; + u8 id; + + vm = kzalloc(sizeof(*vm), GFP_KERNEL); + if (!vm) + return ERR_PTR(-ENOMEM); + + vm->xe = xe; + kref_init(&vm->refcount); + dma_resv_init(&vm->resv); + + vm->size = 1ull << xe_pt_shift(xe->info.vm_max_level + 1); + + vm->vmas = RB_ROOT; + vm->flags = flags; + + init_rwsem(&vm->lock); + + INIT_LIST_HEAD(&vm->rebind_list); + + INIT_LIST_HEAD(&vm->userptr.repin_list); + INIT_LIST_HEAD(&vm->userptr.invalidated); + init_rwsem(&vm->userptr.notifier_lock); + spin_lock_init(&vm->userptr.invalidated_lock); + + INIT_LIST_HEAD(&vm->notifier.rebind_list); + spin_lock_init(&vm->notifier.list_lock); + + INIT_LIST_HEAD(&vm->async_ops.pending); + INIT_WORK(&vm->async_ops.work, async_op_work_func); + spin_lock_init(&vm->async_ops.lock); + + INIT_WORK(&vm->destroy_work, vm_destroy_work_func); + + INIT_LIST_HEAD(&vm->preempt.engines); + vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */ + + INIT_LIST_HEAD(&vm->extobj.list); + + if (!(flags & XE_VM_FLAG_MIGRATION)) { + /* We need to immeditatelly exit from any D3 state */ + xe_pm_runtime_get(xe); + xe_device_mem_access_get(xe); + } + + err = dma_resv_lock_interruptible(&vm->resv, NULL); + if (err) + goto err_put; + + if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) + vm->flags |= XE_VM_FLAGS_64K; + + for_each_gt(gt, xe, id) { + if (xe_gt_is_media_type(gt)) + continue; + + if (flags & XE_VM_FLAG_MIGRATION && + gt->info.id != XE_VM_FLAG_GT_ID(flags)) + continue; + + vm->pt_root[id] = xe_pt_create(vm, gt, xe->info.vm_max_level); + if (IS_ERR(vm->pt_root[id])) { + err = PTR_ERR(vm->pt_root[id]); + vm->pt_root[id] = NULL; + goto err_destroy_root; + } + } + + if (flags & XE_VM_FLAG_SCRATCH_PAGE) { + for_each_gt(gt, xe, id) { + if (!vm->pt_root[id]) + continue; + + err = xe_pt_create_scratch(xe, gt, vm); + if (err) + goto err_scratch_pt; + } + } + + if (flags & DRM_XE_VM_CREATE_COMPUTE_MODE) { + INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); + vm->flags |= XE_VM_FLAG_COMPUTE_MODE; + } + + if (flags & DRM_XE_VM_CREATE_ASYNC_BIND_OPS) { + vm->async_ops.fence.context = dma_fence_context_alloc(1); + vm->flags |= XE_VM_FLAG_ASYNC_BIND_OPS; + } + + /* Fill pt_root after allocating scratch tables */ + for_each_gt(gt, xe, id) { + if (!vm->pt_root[id]) + continue; + + xe_pt_populate_empty(gt, vm, vm->pt_root[id]); + } + dma_resv_unlock(&vm->resv); + + /* Kernel migration VM shouldn't have a circular loop.. */ + if (!(flags & XE_VM_FLAG_MIGRATION)) { + for_each_gt(gt, xe, id) { + struct xe_vm *migrate_vm; + struct xe_engine *eng; + + if (!vm->pt_root[id]) + continue; + + migrate_vm = xe_migrate_get_vm(gt->migrate); + eng = xe_engine_create_class(xe, gt, migrate_vm, + XE_ENGINE_CLASS_COPY, + ENGINE_FLAG_VM); + xe_vm_put(migrate_vm); + if (IS_ERR(eng)) { + xe_vm_close_and_put(vm); + return ERR_CAST(eng); + } + vm->eng[id] = eng; + number_gts++; + } + } + + if (number_gts > 1) + vm->composite_fence_ctx = dma_fence_context_alloc(1); + + mutex_lock(&xe->usm.lock); + if (flags & XE_VM_FLAG_FAULT_MODE) + xe->usm.num_vm_in_fault_mode++; + else if (!(flags & XE_VM_FLAG_MIGRATION)) + xe->usm.num_vm_in_non_fault_mode++; + mutex_unlock(&xe->usm.lock); + + trace_xe_vm_create(vm); + + return vm; + +err_scratch_pt: + for_each_gt(gt, xe, id) { + if (!vm->pt_root[id]) + continue; + + i = vm->pt_root[id]->level; + while (i) + if (vm->scratch_pt[id][--i]) + xe_pt_destroy(vm->scratch_pt[id][i], + vm->flags, NULL); + xe_bo_unpin(vm->scratch_bo[id]); + xe_bo_put(vm->scratch_bo[id]); + } +err_destroy_root: + for_each_gt(gt, xe, id) { + if (vm->pt_root[id]) + xe_pt_destroy(vm->pt_root[id], vm->flags, NULL); + } + dma_resv_unlock(&vm->resv); +err_put: + dma_resv_fini(&vm->resv); + kfree(vm); + if (!(flags & XE_VM_FLAG_MIGRATION)) { + xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); + } + return ERR_PTR(err); +} + +static void flush_async_ops(struct xe_vm *vm) +{ + queue_work(system_unbound_wq, &vm->async_ops.work); + flush_work(&vm->async_ops.work); +} + +static void vm_error_capture(struct xe_vm *vm, int err, + u32 op, u64 addr, u64 size) +{ + struct drm_xe_vm_bind_op_error_capture capture; + u64 __user *address = + u64_to_user_ptr(vm->async_ops.error_capture.addr); + bool in_kthread = !current->mm; + + capture.error = err; + capture.op = op; + capture.addr = addr; + capture.size = size; + + if (in_kthread) { + if (!mmget_not_zero(vm->async_ops.error_capture.mm)) + goto mm_closed; + kthread_use_mm(vm->async_ops.error_capture.mm); + } + + if (copy_to_user(address, &capture, sizeof(capture))) + XE_WARN_ON("Copy to user failed"); + + if (in_kthread) { + kthread_unuse_mm(vm->async_ops.error_capture.mm); + mmput(vm->async_ops.error_capture.mm); + } + +mm_closed: + wake_up_all(&vm->async_ops.error_capture.wq); +} + +void xe_vm_close_and_put(struct xe_vm *vm) +{ + struct rb_root contested = RB_ROOT; + struct ww_acquire_ctx ww; + struct xe_device *xe = vm->xe; + struct xe_gt *gt; + u8 id; + + XE_BUG_ON(vm->preempt.num_engines); + + vm->size = 0; + smp_mb(); + flush_async_ops(vm); + if (xe_vm_in_compute_mode(vm)) + flush_work(&vm->preempt.rebind_work); + + for_each_gt(gt, xe, id) { + if (vm->eng[id]) { + xe_engine_kill(vm->eng[id]); + xe_engine_put(vm->eng[id]); + vm->eng[id] = NULL; + } + } + + down_write(&vm->lock); + xe_vm_lock(vm, &ww, 0, false); + while (vm->vmas.rb_node) { + struct xe_vma *vma = to_xe_vma(vm->vmas.rb_node); + + if (xe_vma_is_userptr(vma)) { + down_read(&vm->userptr.notifier_lock); + vma->destroyed = true; + up_read(&vm->userptr.notifier_lock); + } + + rb_erase(&vma->vm_node, &vm->vmas); + + /* easy case, remove from VMA? */ + if (xe_vma_is_userptr(vma) || vma->bo->vm) { + xe_vma_destroy(vma, NULL); + continue; + } + + rb_add(&vma->vm_node, &contested, xe_vma_less_cb); + } + + /* + * All vm operations will add shared fences to resv. + * The only exception is eviction for a shared object, + * but even so, the unbind when evicted would still + * install a fence to resv. Hence it's safe to + * destroy the pagetables immediately. + */ + for_each_gt(gt, xe, id) { + if (vm->scratch_bo[id]) { + u32 i; + + xe_bo_unpin(vm->scratch_bo[id]); + xe_bo_put(vm->scratch_bo[id]); + for (i = 0; i < vm->pt_root[id]->level; i++) + xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, + NULL); + } + } + xe_vm_unlock(vm, &ww); + + if (contested.rb_node) { + + /* + * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL + * Since we hold a refcount to the bo, we can remove and free + * the members safely without locking. + */ + while (contested.rb_node) { + struct xe_vma *vma = to_xe_vma(contested.rb_node); + + rb_erase(&vma->vm_node, &contested); + xe_vma_destroy_unlocked(vma); + } + } + + if (vm->async_ops.error_capture.addr) + wake_up_all(&vm->async_ops.error_capture.wq); + + XE_WARN_ON(!list_empty(&vm->extobj.list)); + up_write(&vm->lock); + + xe_vm_put(vm); +} + +static void vm_destroy_work_func(struct work_struct *w) +{ + struct xe_vm *vm = + container_of(w, struct xe_vm, destroy_work); + struct ww_acquire_ctx ww; + struct xe_device *xe = vm->xe; + struct xe_gt *gt; + u8 id; + void *lookup; + + /* xe_vm_close_and_put was not called? */ + XE_WARN_ON(vm->size); + + if (!(vm->flags & XE_VM_FLAG_MIGRATION)) { + xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); + + mutex_lock(&xe->usm.lock); + lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid); + XE_WARN_ON(lookup != vm); + mutex_unlock(&xe->usm.lock); + } + + /* + * XXX: We delay destroying the PT root until the VM if freed as PT root + * is needed for xe_vm_lock to work. If we remove that dependency this + * can be moved to xe_vm_close_and_put. + */ + xe_vm_lock(vm, &ww, 0, false); + for_each_gt(gt, xe, id) { + if (vm->pt_root[id]) { + xe_pt_destroy(vm->pt_root[id], vm->flags, NULL); + vm->pt_root[id] = NULL; + } + } + xe_vm_unlock(vm, &ww); + + mutex_lock(&xe->usm.lock); + if (vm->flags & XE_VM_FLAG_FAULT_MODE) + xe->usm.num_vm_in_fault_mode--; + else if (!(vm->flags & XE_VM_FLAG_MIGRATION)) + xe->usm.num_vm_in_non_fault_mode--; + mutex_unlock(&xe->usm.lock); + + trace_xe_vm_free(vm); + dma_fence_put(vm->rebind_fence); + dma_resv_fini(&vm->resv); + kfree(vm); + +} + +void xe_vm_free(struct kref *ref) +{ + struct xe_vm *vm = container_of(ref, struct xe_vm, refcount); + + /* To destroy the VM we need to be able to sleep */ + queue_work(system_unbound_wq, &vm->destroy_work); +} + +struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id) +{ + struct xe_vm *vm; + + mutex_lock(&xef->vm.lock); + vm = xa_load(&xef->vm.xa, id); + mutex_unlock(&xef->vm.lock); + + if (vm) + xe_vm_get(vm); + + return vm; +} + +u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_gt *full_gt) +{ + XE_BUG_ON(xe_gt_is_media_type(full_gt)); + + return gen8_pde_encode(vm->pt_root[full_gt->info.id]->bo, 0, + XE_CACHE_WB); +} + +static struct dma_fence * +xe_vm_unbind_vma(struct xe_vma *vma, struct xe_engine *e, + struct xe_sync_entry *syncs, u32 num_syncs) +{ + struct xe_gt *gt; + struct dma_fence *fence = NULL; + struct dma_fence **fences = NULL; + struct dma_fence_array *cf = NULL; + struct xe_vm *vm = vma->vm; + int cur_fence = 0, i; + int number_gts = hweight_long(vma->gt_present); + int err; + u8 id; + + trace_xe_vma_unbind(vma); + + if (number_gts > 1) { + fences = kmalloc_array(number_gts, sizeof(*fences), + GFP_KERNEL); + if (!fences) + return ERR_PTR(-ENOMEM); + } + + for_each_gt(gt, vm->xe, id) { + if (!(vma->gt_present & BIT(id))) + goto next; + + XE_BUG_ON(xe_gt_is_media_type(gt)); + + fence = __xe_pt_unbind_vma(gt, vma, e, syncs, num_syncs); + if (IS_ERR(fence)) { + err = PTR_ERR(fence); + goto err_fences; + } + + if (fences) + fences[cur_fence++] = fence; + +next: + if (e && vm->pt_root[id] && !list_empty(&e->multi_gt_list)) + e = list_next_entry(e, multi_gt_list); + } + + if (fences) { + cf = dma_fence_array_create(number_gts, fences, + vm->composite_fence_ctx, + vm->composite_fence_seqno++, + false); + if (!cf) { + --vm->composite_fence_seqno; + err = -ENOMEM; + goto err_fences; + } + } + + for (i = 0; i < num_syncs; i++) + xe_sync_entry_signal(&syncs[i], NULL, cf ? &cf->base : fence); + + return cf ? &cf->base : !fence ? dma_fence_get_stub() : fence; + +err_fences: + if (fences) { + while (cur_fence) { + /* FIXME: Rewind the previous binds? */ + dma_fence_put(fences[--cur_fence]); + } + kfree(fences); + } + + return ERR_PTR(err); +} + +static struct dma_fence * +xe_vm_bind_vma(struct xe_vma *vma, struct xe_engine *e, + struct xe_sync_entry *syncs, u32 num_syncs) +{ + struct xe_gt *gt; + struct dma_fence *fence; + struct dma_fence **fences = NULL; + struct dma_fence_array *cf = NULL; + struct xe_vm *vm = vma->vm; + int cur_fence = 0, i; + int number_gts = hweight_long(vma->gt_mask); + int err; + u8 id; + + trace_xe_vma_bind(vma); + + if (number_gts > 1) { + fences = kmalloc_array(number_gts, sizeof(*fences), + GFP_KERNEL); + if (!fences) + return ERR_PTR(-ENOMEM); + } + + for_each_gt(gt, vm->xe, id) { + if (!(vma->gt_mask & BIT(id))) + goto next; + + XE_BUG_ON(xe_gt_is_media_type(gt)); + fence = __xe_pt_bind_vma(gt, vma, e, syncs, num_syncs, + vma->gt_present & BIT(id)); + if (IS_ERR(fence)) { + err = PTR_ERR(fence); + goto err_fences; + } + + if (fences) + fences[cur_fence++] = fence; + +next: + if (e && vm->pt_root[id] && !list_empty(&e->multi_gt_list)) + e = list_next_entry(e, multi_gt_list); + } + + if (fences) { + cf = dma_fence_array_create(number_gts, fences, + vm->composite_fence_ctx, + vm->composite_fence_seqno++, + false); + if (!cf) { + --vm->composite_fence_seqno; + err = -ENOMEM; + goto err_fences; + } + } + + for (i = 0; i < num_syncs; i++) + xe_sync_entry_signal(&syncs[i], NULL, cf ? &cf->base : fence); + + return cf ? &cf->base : fence; + +err_fences: + if (fences) { + while (cur_fence) { + /* FIXME: Rewind the previous binds? */ + dma_fence_put(fences[--cur_fence]); + } + kfree(fences); + } + + return ERR_PTR(err); +} + +struct async_op_fence { + struct dma_fence fence; + struct dma_fence_cb cb; + struct xe_vm *vm; + wait_queue_head_t wq; + bool started; +}; + +static const char *async_op_fence_get_driver_name(struct dma_fence *dma_fence) +{ + return "xe"; +} + +static const char * +async_op_fence_get_timeline_name(struct dma_fence *dma_fence) +{ + return "async_op_fence"; +} + +static const struct dma_fence_ops async_op_fence_ops = { + .get_driver_name = async_op_fence_get_driver_name, + .get_timeline_name = async_op_fence_get_timeline_name, +}; + +static void async_op_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) +{ + struct async_op_fence *afence = + container_of(cb, struct async_op_fence, cb); + + dma_fence_signal(&afence->fence); + xe_vm_put(afence->vm); + dma_fence_put(&afence->fence); +} + +static void add_async_op_fence_cb(struct xe_vm *vm, + struct dma_fence *fence, + struct async_op_fence *afence) +{ + int ret; + + if (!xe_vm_no_dma_fences(vm)) { + afence->started = true; + smp_wmb(); + wake_up_all(&afence->wq); + } + + afence->vm = xe_vm_get(vm); + dma_fence_get(&afence->fence); + ret = dma_fence_add_callback(fence, &afence->cb, async_op_fence_cb); + if (ret == -ENOENT) + dma_fence_signal(&afence->fence); + if (ret) { + xe_vm_put(vm); + dma_fence_put(&afence->fence); + } + XE_WARN_ON(ret && ret != -ENOENT); +} + +int xe_vm_async_fence_wait_start(struct dma_fence *fence) +{ + if (fence->ops == &async_op_fence_ops) { + struct async_op_fence *afence = + container_of(fence, struct async_op_fence, fence); + + XE_BUG_ON(xe_vm_no_dma_fences(afence->vm)); + + smp_rmb(); + return wait_event_interruptible(afence->wq, afence->started); + } + + return 0; +} + +static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, + struct xe_engine *e, struct xe_sync_entry *syncs, + u32 num_syncs, struct async_op_fence *afence) +{ + struct dma_fence *fence; + + xe_vm_assert_held(vm); + + fence = xe_vm_bind_vma(vma, e, syncs, num_syncs); + if (IS_ERR(fence)) + return PTR_ERR(fence); + if (afence) + add_async_op_fence_cb(vm, fence, afence); + + dma_fence_put(fence); + return 0; +} + +static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_engine *e, + struct xe_bo *bo, struct xe_sync_entry *syncs, + u32 num_syncs, struct async_op_fence *afence) +{ + int err; + + xe_vm_assert_held(vm); + xe_bo_assert_held(bo); + + if (bo) { + err = xe_bo_validate(bo, vm, true); + if (err) + return err; + } + + return __xe_vm_bind(vm, vma, e, syncs, num_syncs, afence); +} + +static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma, + struct xe_engine *e, struct xe_sync_entry *syncs, + u32 num_syncs, struct async_op_fence *afence) +{ + struct dma_fence *fence; + + xe_vm_assert_held(vm); + xe_bo_assert_held(vma->bo); + + fence = xe_vm_unbind_vma(vma, e, syncs, num_syncs); + if (IS_ERR(fence)) + return PTR_ERR(fence); + if (afence) + add_async_op_fence_cb(vm, fence, afence); + + xe_vma_destroy(vma, fence); + dma_fence_put(fence); + + return 0; +} + +static int vm_set_error_capture_address(struct xe_device *xe, struct xe_vm *vm, + u64 value) +{ + if (XE_IOCTL_ERR(xe, !value)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, !(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS))) + return -ENOTSUPP; + + if (XE_IOCTL_ERR(xe, vm->async_ops.error_capture.addr)) + return -ENOTSUPP; + + vm->async_ops.error_capture.mm = current->mm; + vm->async_ops.error_capture.addr = value; + init_waitqueue_head(&vm->async_ops.error_capture.wq); + + return 0; +} + +typedef int (*xe_vm_set_property_fn)(struct xe_device *xe, struct xe_vm *vm, + u64 value); + +static const xe_vm_set_property_fn vm_set_property_funcs[] = { + [XE_VM_PROPERTY_BIND_OP_ERROR_CAPTURE_ADDRESS] = + vm_set_error_capture_address, +}; + +static int vm_user_ext_set_property(struct xe_device *xe, struct xe_vm *vm, + u64 extension) +{ + u64 __user *address = u64_to_user_ptr(extension); + struct drm_xe_ext_vm_set_property ext; + int err; + + err = __copy_from_user(&ext, address, sizeof(ext)); + if (XE_IOCTL_ERR(xe, err)) + return -EFAULT; + + if (XE_IOCTL_ERR(xe, ext.property >= + ARRAY_SIZE(vm_set_property_funcs))) + return -EINVAL; + + return vm_set_property_funcs[ext.property](xe, vm, ext.value); +} + +typedef int (*xe_vm_user_extension_fn)(struct xe_device *xe, struct xe_vm *vm, + u64 extension); + +static const xe_vm_set_property_fn vm_user_extension_funcs[] = { + [XE_VM_EXTENSION_SET_PROPERTY] = vm_user_ext_set_property, +}; + +#define MAX_USER_EXTENSIONS 16 +static int vm_user_extensions(struct xe_device *xe, struct xe_vm *vm, + u64 extensions, int ext_number) +{ + u64 __user *address = u64_to_user_ptr(extensions); + struct xe_user_extension ext; + int err; + + if (XE_IOCTL_ERR(xe, ext_number >= MAX_USER_EXTENSIONS)) + return -E2BIG; + + err = __copy_from_user(&ext, address, sizeof(ext)); + if (XE_IOCTL_ERR(xe, err)) + return -EFAULT; + + if (XE_IOCTL_ERR(xe, ext.name >= + ARRAY_SIZE(vm_user_extension_funcs))) + return -EINVAL; + + err = vm_user_extension_funcs[ext.name](xe, vm, extensions); + if (XE_IOCTL_ERR(xe, err)) + return err; + + if (ext.next_extension) + return vm_user_extensions(xe, vm, ext.next_extension, + ++ext_number); + + return 0; +} + +#define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_SCRATCH_PAGE | \ + DRM_XE_VM_CREATE_COMPUTE_MODE | \ + DRM_XE_VM_CREATE_ASYNC_BIND_OPS | \ + DRM_XE_VM_CREATE_FAULT_MODE) + +int xe_vm_create_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_file *xef = to_xe_file(file); + struct drm_xe_vm_create *args = data; + struct xe_vm *vm; + u32 id, asid; + int err; + u32 flags = 0; + + if (XE_IOCTL_ERR(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, args->flags & DRM_XE_VM_CREATE_SCRATCH_PAGE && + args->flags & DRM_XE_VM_CREATE_FAULT_MODE)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, args->flags & DRM_XE_VM_CREATE_COMPUTE_MODE && + args->flags & DRM_XE_VM_CREATE_FAULT_MODE)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, args->flags & DRM_XE_VM_CREATE_FAULT_MODE && + xe_device_in_non_fault_mode(xe))) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, !(args->flags & DRM_XE_VM_CREATE_FAULT_MODE) && + xe_device_in_fault_mode(xe))) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, args->flags & DRM_XE_VM_CREATE_FAULT_MODE && + !xe->info.supports_usm)) + return -EINVAL; + + if (args->flags & DRM_XE_VM_CREATE_SCRATCH_PAGE) + flags |= XE_VM_FLAG_SCRATCH_PAGE; + if (args->flags & DRM_XE_VM_CREATE_COMPUTE_MODE) + flags |= XE_VM_FLAG_COMPUTE_MODE; + if (args->flags & DRM_XE_VM_CREATE_ASYNC_BIND_OPS) + flags |= XE_VM_FLAG_ASYNC_BIND_OPS; + if (args->flags & DRM_XE_VM_CREATE_FAULT_MODE) + flags |= XE_VM_FLAG_FAULT_MODE; + + vm = xe_vm_create(xe, flags); + if (IS_ERR(vm)) + return PTR_ERR(vm); + + if (args->extensions) { + err = vm_user_extensions(xe, vm, args->extensions, 0); + if (XE_IOCTL_ERR(xe, err)) { + xe_vm_close_and_put(vm); + return err; + } + } + + mutex_lock(&xef->vm.lock); + err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL); + mutex_unlock(&xef->vm.lock); + if (err) { + xe_vm_close_and_put(vm); + return err; + } + + mutex_lock(&xe->usm.lock); + err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, + XA_LIMIT(0, XE_MAX_ASID - 1), + &xe->usm.next_asid, GFP_KERNEL); + mutex_unlock(&xe->usm.lock); + if (err) { + xe_vm_close_and_put(vm); + return err; + } + vm->usm.asid = asid; + + args->vm_id = id; + +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM) + /* Warning: Security issue - never enable by default */ + args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, GEN8_PAGE_SIZE); +#endif + + return 0; +} + +int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_file *xef = to_xe_file(file); + struct drm_xe_vm_destroy *args = data; + struct xe_vm *vm; + + if (XE_IOCTL_ERR(xe, args->pad)) + return -EINVAL; + + vm = xe_vm_lookup(xef, args->vm_id); + if (XE_IOCTL_ERR(xe, !vm)) + return -ENOENT; + xe_vm_put(vm); + + /* FIXME: Extend this check to non-compute mode VMs */ + if (XE_IOCTL_ERR(xe, vm->preempt.num_engines)) + return -EBUSY; + + mutex_lock(&xef->vm.lock); + xa_erase(&xef->vm.xa, args->vm_id); + mutex_unlock(&xef->vm.lock); + + xe_vm_close_and_put(vm); + + return 0; +} + +static const u32 region_to_mem_type[] = { + XE_PL_TT, + XE_PL_VRAM0, + XE_PL_VRAM1, +}; + +static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma, + struct xe_engine *e, u32 region, + struct xe_sync_entry *syncs, u32 num_syncs, + struct async_op_fence *afence) +{ + int err; + + XE_BUG_ON(region > ARRAY_SIZE(region_to_mem_type)); + + if (!xe_vma_is_userptr(vma)) { + err = xe_bo_migrate(vma->bo, region_to_mem_type[region]); + if (err) + return err; + } + + if (vma->gt_mask != (vma->gt_present & ~vma->usm.gt_invalidated)) { + return xe_vm_bind(vm, vma, e, vma->bo, syncs, num_syncs, + afence); + } else { + int i; + + /* Nothing to do, signal fences now */ + for (i = 0; i < num_syncs; i++) + xe_sync_entry_signal(&syncs[i], NULL, + dma_fence_get_stub()); + if (afence) + dma_fence_signal(&afence->fence); + return 0; + } +} + +#define VM_BIND_OP(op) (op & 0xffff) + +static int __vm_bind_ioctl(struct xe_vm *vm, struct xe_vma *vma, + struct xe_engine *e, struct xe_bo *bo, u32 op, + u32 region, struct xe_sync_entry *syncs, + u32 num_syncs, struct async_op_fence *afence) +{ + switch (VM_BIND_OP(op)) { + case XE_VM_BIND_OP_MAP: + return xe_vm_bind(vm, vma, e, bo, syncs, num_syncs, afence); + case XE_VM_BIND_OP_UNMAP: + case XE_VM_BIND_OP_UNMAP_ALL: + return xe_vm_unbind(vm, vma, e, syncs, num_syncs, afence); + case XE_VM_BIND_OP_MAP_USERPTR: + return xe_vm_bind(vm, vma, e, NULL, syncs, num_syncs, afence); + case XE_VM_BIND_OP_PREFETCH: + return xe_vm_prefetch(vm, vma, e, region, syncs, num_syncs, + afence); + break; + default: + XE_BUG_ON("NOT POSSIBLE"); + return -EINVAL; + } +} + +struct ttm_buffer_object *xe_vm_ttm_bo(struct xe_vm *vm) +{ + int idx = vm->flags & XE_VM_FLAG_MIGRATION ? + XE_VM_FLAG_GT_ID(vm->flags) : 0; + + /* Safe to use index 0 as all BO in the VM share a single dma-resv lock */ + return &vm->pt_root[idx]->bo->ttm; +} + +static void xe_vm_tv_populate(struct xe_vm *vm, struct ttm_validate_buffer *tv) +{ + tv->num_shared = 1; + tv->bo = xe_vm_ttm_bo(vm); +} + +static bool is_map_op(u32 op) +{ + return VM_BIND_OP(op) == XE_VM_BIND_OP_MAP || + VM_BIND_OP(op) == XE_VM_BIND_OP_MAP_USERPTR; +} + +static bool is_unmap_op(u32 op) +{ + return VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP || + VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL; +} + +static int vm_bind_ioctl(struct xe_vm *vm, struct xe_vma *vma, + struct xe_engine *e, struct xe_bo *bo, + struct drm_xe_vm_bind_op *bind_op, + struct xe_sync_entry *syncs, u32 num_syncs, + struct async_op_fence *afence) +{ + LIST_HEAD(objs); + LIST_HEAD(dups); + struct ttm_validate_buffer tv_bo, tv_vm; + struct ww_acquire_ctx ww; + struct xe_bo *vbo; + int err, i; + + lockdep_assert_held(&vm->lock); + XE_BUG_ON(!list_empty(&vma->unbind_link)); + + /* Binds deferred to faults, signal fences now */ + if (xe_vm_in_fault_mode(vm) && is_map_op(bind_op->op) && + !(bind_op->op & XE_VM_BIND_FLAG_IMMEDIATE)) { + for (i = 0; i < num_syncs; i++) + xe_sync_entry_signal(&syncs[i], NULL, + dma_fence_get_stub()); + if (afence) + dma_fence_signal(&afence->fence); + return 0; + } + + xe_vm_tv_populate(vm, &tv_vm); + list_add_tail(&tv_vm.head, &objs); + vbo = vma->bo; + if (vbo) { + /* + * An unbind can drop the last reference to the BO and + * the BO is needed for ttm_eu_backoff_reservation so + * take a reference here. + */ + xe_bo_get(vbo); + + tv_bo.bo = &vbo->ttm; + tv_bo.num_shared = 1; + list_add(&tv_bo.head, &objs); + } + +again: + err = ttm_eu_reserve_buffers(&ww, &objs, true, &dups); + if (!err) { + err = __vm_bind_ioctl(vm, vma, e, bo, + bind_op->op, bind_op->region, syncs, + num_syncs, afence); + ttm_eu_backoff_reservation(&ww, &objs); + if (err == -EAGAIN && xe_vma_is_userptr(vma)) { + lockdep_assert_held_write(&vm->lock); + err = xe_vma_userptr_pin_pages(vma); + if (!err) + goto again; + } + } + xe_bo_put(vbo); + + return err; +} + +struct async_op { + struct xe_vma *vma; + struct xe_engine *engine; + struct xe_bo *bo; + struct drm_xe_vm_bind_op bind_op; + struct xe_sync_entry *syncs; + u32 num_syncs; + struct list_head link; + struct async_op_fence *fence; +}; + +static void async_op_cleanup(struct xe_vm *vm, struct async_op *op) +{ + while (op->num_syncs--) + xe_sync_entry_cleanup(&op->syncs[op->num_syncs]); + kfree(op->syncs); + xe_bo_put(op->bo); + if (op->engine) + xe_engine_put(op->engine); + xe_vm_put(vm); + if (op->fence) + dma_fence_put(&op->fence->fence); + kfree(op); +} + +static struct async_op *next_async_op(struct xe_vm *vm) +{ + return list_first_entry_or_null(&vm->async_ops.pending, + struct async_op, link); +} + +static void vm_set_async_error(struct xe_vm *vm, int err) +{ + lockdep_assert_held(&vm->lock); + vm->async_ops.error = err; +} + +static void async_op_work_func(struct work_struct *w) +{ + struct xe_vm *vm = container_of(w, struct xe_vm, async_ops.work); + + for (;;) { + struct async_op *op; + int err; + + if (vm->async_ops.error && !xe_vm_is_closed(vm)) + break; + + spin_lock_irq(&vm->async_ops.lock); + op = next_async_op(vm); + if (op) + list_del_init(&op->link); + spin_unlock_irq(&vm->async_ops.lock); + + if (!op) + break; + + if (!xe_vm_is_closed(vm)) { + bool first, last; + + down_write(&vm->lock); +again: + first = op->vma->first_munmap_rebind; + last = op->vma->last_munmap_rebind; +#ifdef TEST_VM_ASYNC_OPS_ERROR +#define FORCE_ASYNC_OP_ERROR BIT(31) + if (!(op->bind_op.op & FORCE_ASYNC_OP_ERROR)) { + err = vm_bind_ioctl(vm, op->vma, op->engine, + op->bo, &op->bind_op, + op->syncs, op->num_syncs, + op->fence); + } else { + err = -ENOMEM; + op->bind_op.op &= ~FORCE_ASYNC_OP_ERROR; + } +#else + err = vm_bind_ioctl(vm, op->vma, op->engine, op->bo, + &op->bind_op, op->syncs, + op->num_syncs, op->fence); +#endif + /* + * In order for the fencing to work (stall behind + * existing jobs / prevent new jobs from running) all + * the dma-resv slots need to be programmed in a batch + * relative to execs / the rebind worker. The vm->lock + * ensure this. + */ + if (!err && ((first && VM_BIND_OP(op->bind_op.op) == + XE_VM_BIND_OP_UNMAP) || + vm->async_ops.munmap_rebind_inflight)) { + if (last) { + op->vma->last_munmap_rebind = false; + vm->async_ops.munmap_rebind_inflight = + false; + } else { + vm->async_ops.munmap_rebind_inflight = + true; + + async_op_cleanup(vm, op); + + spin_lock_irq(&vm->async_ops.lock); + op = next_async_op(vm); + XE_BUG_ON(!op); + list_del_init(&op->link); + spin_unlock_irq(&vm->async_ops.lock); + + goto again; + } + } + if (err) { + trace_xe_vma_fail(op->vma); + drm_warn(&vm->xe->drm, "Async VM op(%d) failed with %d", + VM_BIND_OP(op->bind_op.op), + err); + + spin_lock_irq(&vm->async_ops.lock); + list_add(&op->link, &vm->async_ops.pending); + spin_unlock_irq(&vm->async_ops.lock); + + vm_set_async_error(vm, err); + up_write(&vm->lock); + + if (vm->async_ops.error_capture.addr) + vm_error_capture(vm, err, + op->bind_op.op, + op->bind_op.addr, + op->bind_op.range); + break; + } + up_write(&vm->lock); + } else { + trace_xe_vma_flush(op->vma); + + if (is_unmap_op(op->bind_op.op)) { + down_write(&vm->lock); + xe_vma_destroy_unlocked(op->vma); + up_write(&vm->lock); + } + + if (op->fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &op->fence->fence.flags)) { + if (!xe_vm_no_dma_fences(vm)) { + op->fence->started = true; + smp_wmb(); + wake_up_all(&op->fence->wq); + } + dma_fence_signal(&op->fence->fence); + } + } + + async_op_cleanup(vm, op); + } +} + +static int __vm_bind_ioctl_async(struct xe_vm *vm, struct xe_vma *vma, + struct xe_engine *e, struct xe_bo *bo, + struct drm_xe_vm_bind_op *bind_op, + struct xe_sync_entry *syncs, u32 num_syncs) +{ + struct async_op *op; + bool installed = false; + u64 seqno; + int i; + + lockdep_assert_held(&vm->lock); + + op = kmalloc(sizeof(*op), GFP_KERNEL); + if (!op) { + return -ENOMEM; + } + + if (num_syncs) { + op->fence = kmalloc(sizeof(*op->fence), GFP_KERNEL); + if (!op->fence) { + kfree(op); + return -ENOMEM; + } + + seqno = e ? ++e->bind.fence_seqno : ++vm->async_ops.fence.seqno; + dma_fence_init(&op->fence->fence, &async_op_fence_ops, + &vm->async_ops.lock, e ? e->bind.fence_ctx : + vm->async_ops.fence.context, seqno); + + if (!xe_vm_no_dma_fences(vm)) { + op->fence->vm = vm; + op->fence->started = false; + init_waitqueue_head(&op->fence->wq); + } + } else { + op->fence = NULL; + } + op->vma = vma; + op->engine = e; + op->bo = bo; + op->bind_op = *bind_op; + op->syncs = syncs; + op->num_syncs = num_syncs; + INIT_LIST_HEAD(&op->link); + + for (i = 0; i < num_syncs; i++) + installed |= xe_sync_entry_signal(&syncs[i], NULL, + &op->fence->fence); + + if (!installed && op->fence) + dma_fence_signal(&op->fence->fence); + + spin_lock_irq(&vm->async_ops.lock); + list_add_tail(&op->link, &vm->async_ops.pending); + spin_unlock_irq(&vm->async_ops.lock); + + if (!vm->async_ops.error) + queue_work(system_unbound_wq, &vm->async_ops.work); + + return 0; +} + +static int vm_bind_ioctl_async(struct xe_vm *vm, struct xe_vma *vma, + struct xe_engine *e, struct xe_bo *bo, + struct drm_xe_vm_bind_op *bind_op, + struct xe_sync_entry *syncs, u32 num_syncs) +{ + struct xe_vma *__vma, *next; + struct list_head rebind_list; + struct xe_sync_entry *in_syncs = NULL, *out_syncs = NULL; + u32 num_in_syncs = 0, num_out_syncs = 0; + bool first = true, last; + int err; + int i; + + lockdep_assert_held(&vm->lock); + + /* Not a linked list of unbinds + rebinds, easy */ + if (list_empty(&vma->unbind_link)) + return __vm_bind_ioctl_async(vm, vma, e, bo, bind_op, + syncs, num_syncs); + + /* + * Linked list of unbinds + rebinds, decompose syncs into 'in / out' + * passing the 'in' to the first operation and 'out' to the last. Also + * the reference counting is a little tricky, increment the VM / bind + * engine ref count on all but the last operation and increment the BOs + * ref count on each rebind. + */ + + XE_BUG_ON(VM_BIND_OP(bind_op->op) != XE_VM_BIND_OP_UNMAP && + VM_BIND_OP(bind_op->op) != XE_VM_BIND_OP_UNMAP_ALL && + VM_BIND_OP(bind_op->op) != XE_VM_BIND_OP_PREFETCH); + + /* Decompose syncs */ + if (num_syncs) { + in_syncs = kmalloc(sizeof(*in_syncs) * num_syncs, GFP_KERNEL); + out_syncs = kmalloc(sizeof(*out_syncs) * num_syncs, GFP_KERNEL); + if (!in_syncs || !out_syncs) { + err = -ENOMEM; + goto out_error; + } + + for (i = 0; i < num_syncs; ++i) { + bool signal = syncs[i].flags & DRM_XE_SYNC_SIGNAL; + + if (signal) + out_syncs[num_out_syncs++] = syncs[i]; + else + in_syncs[num_in_syncs++] = syncs[i]; + } + } + + /* Do unbinds + move rebinds to new list */ + INIT_LIST_HEAD(&rebind_list); + list_for_each_entry_safe(__vma, next, &vma->unbind_link, unbind_link) { + if (__vma->destroyed || + VM_BIND_OP(bind_op->op) == XE_VM_BIND_OP_PREFETCH) { + list_del_init(&__vma->unbind_link); + xe_bo_get(bo); + err = __vm_bind_ioctl_async(xe_vm_get(vm), __vma, + e ? xe_engine_get(e) : NULL, + bo, bind_op, first ? + in_syncs : NULL, + first ? num_in_syncs : 0); + if (err) { + xe_bo_put(bo); + xe_vm_put(vm); + if (e) + xe_engine_put(e); + goto out_error; + } + in_syncs = NULL; + first = false; + } else { + list_move_tail(&__vma->unbind_link, &rebind_list); + } + } + last = list_empty(&rebind_list); + if (!last) { + xe_vm_get(vm); + if (e) + xe_engine_get(e); + } + err = __vm_bind_ioctl_async(vm, vma, e, + bo, bind_op, + first ? in_syncs : + last ? out_syncs : NULL, + first ? num_in_syncs : + last ? num_out_syncs : 0); + if (err) { + if (!last) { + xe_vm_put(vm); + if (e) + xe_engine_put(e); + } + goto out_error; + } + in_syncs = NULL; + + /* Do rebinds */ + list_for_each_entry_safe(__vma, next, &rebind_list, unbind_link) { + list_del_init(&__vma->unbind_link); + last = list_empty(&rebind_list); + + if (xe_vma_is_userptr(__vma)) { + bind_op->op = XE_VM_BIND_FLAG_ASYNC | + XE_VM_BIND_OP_MAP_USERPTR; + } else { + bind_op->op = XE_VM_BIND_FLAG_ASYNC | + XE_VM_BIND_OP_MAP; + xe_bo_get(__vma->bo); + } + + if (!last) { + xe_vm_get(vm); + if (e) + xe_engine_get(e); + } + + err = __vm_bind_ioctl_async(vm, __vma, e, + __vma->bo, bind_op, last ? + out_syncs : NULL, + last ? num_out_syncs : 0); + if (err) { + if (!last) { + xe_vm_put(vm); + if (e) + xe_engine_put(e); + } + goto out_error; + } + } + + kfree(syncs); + return 0; + +out_error: + kfree(in_syncs); + kfree(out_syncs); + kfree(syncs); + + return err; +} + +static int __vm_bind_ioctl_lookup_vma(struct xe_vm *vm, struct xe_bo *bo, + u64 addr, u64 range, u32 op) +{ + struct xe_device *xe = vm->xe; + struct xe_vma *vma, lookup; + bool async = !!(op & XE_VM_BIND_FLAG_ASYNC); + + lockdep_assert_held(&vm->lock); + + lookup.start = addr; + lookup.end = addr + range - 1; + + switch (VM_BIND_OP(op)) { + case XE_VM_BIND_OP_MAP: + case XE_VM_BIND_OP_MAP_USERPTR: + vma = xe_vm_find_overlapping_vma(vm, &lookup); + if (XE_IOCTL_ERR(xe, vma)) + return -EBUSY; + break; + case XE_VM_BIND_OP_UNMAP: + case XE_VM_BIND_OP_PREFETCH: + vma = xe_vm_find_overlapping_vma(vm, &lookup); + if (XE_IOCTL_ERR(xe, !vma) || + XE_IOCTL_ERR(xe, (vma->start != addr || + vma->end != addr + range - 1) && !async)) + return -EINVAL; + break; + case XE_VM_BIND_OP_UNMAP_ALL: + break; + default: + XE_BUG_ON("NOT POSSIBLE"); + return -EINVAL; + } + + return 0; +} + +static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma) +{ + down_read(&vm->userptr.notifier_lock); + vma->destroyed = true; + up_read(&vm->userptr.notifier_lock); + xe_vm_remove_vma(vm, vma); +} + +static int prep_replacement_vma(struct xe_vm *vm, struct xe_vma *vma) +{ + int err; + + if (vma->bo && !vma->bo->vm) { + vm_insert_extobj(vm, vma); + err = add_preempt_fences(vm, vma->bo); + if (err) + return err; + } + + return 0; +} + +/* + * Find all overlapping VMAs in lookup range and add to a list in the returned + * VMA, all of VMAs found will be unbound. Also possibly add 2 new VMAs that + * need to be bound if first / last VMAs are not fully unbound. This is akin to + * how munmap works. + */ +static struct xe_vma *vm_unbind_lookup_vmas(struct xe_vm *vm, + struct xe_vma *lookup) +{ + struct xe_vma *vma = xe_vm_find_overlapping_vma(vm, lookup); + struct rb_node *node; + struct xe_vma *first = vma, *last = vma, *new_first = NULL, + *new_last = NULL, *__vma, *next; + int err = 0; + bool first_munmap_rebind = false; + + lockdep_assert_held(&vm->lock); + XE_BUG_ON(!vma); + + node = &vma->vm_node; + while ((node = rb_next(node))) { + if (!xe_vma_cmp_vma_cb(lookup, node)) { + __vma = to_xe_vma(node); + list_add_tail(&__vma->unbind_link, &vma->unbind_link); + last = __vma; + } else { + break; + } + } + + node = &vma->vm_node; + while ((node = rb_prev(node))) { + if (!xe_vma_cmp_vma_cb(lookup, node)) { + __vma = to_xe_vma(node); + list_add(&__vma->unbind_link, &vma->unbind_link); + first = __vma; + } else { + break; + } + } + + if (first->start != lookup->start) { + struct ww_acquire_ctx ww; + + if (first->bo) + err = xe_bo_lock(first->bo, &ww, 0, true); + if (err) + goto unwind; + new_first = xe_vma_create(first->vm, first->bo, + first->bo ? first->bo_offset : + first->userptr.ptr, + first->start, + lookup->start - 1, + (first->pte_flags & PTE_READ_ONLY), + first->gt_mask); + if (first->bo) + xe_bo_unlock(first->bo, &ww); + if (!new_first) { + err = -ENOMEM; + goto unwind; + } + if (!first->bo) { + err = xe_vma_userptr_pin_pages(new_first); + if (err) + goto unwind; + } + err = prep_replacement_vma(vm, new_first); + if (err) + goto unwind; + } + + if (last->end != lookup->end) { + struct ww_acquire_ctx ww; + u64 chunk = lookup->end + 1 - last->start; + + if (last->bo) + err = xe_bo_lock(last->bo, &ww, 0, true); + if (err) + goto unwind; + new_last = xe_vma_create(last->vm, last->bo, + last->bo ? last->bo_offset + chunk : + last->userptr.ptr + chunk, + last->start + chunk, + last->end, + (last->pte_flags & PTE_READ_ONLY), + last->gt_mask); + if (last->bo) + xe_bo_unlock(last->bo, &ww); + if (!new_last) { + err = -ENOMEM; + goto unwind; + } + if (!last->bo) { + err = xe_vma_userptr_pin_pages(new_last); + if (err) + goto unwind; + } + err = prep_replacement_vma(vm, new_last); + if (err) + goto unwind; + } + + prep_vma_destroy(vm, vma); + if (list_empty(&vma->unbind_link) && (new_first || new_last)) + vma->first_munmap_rebind = true; + list_for_each_entry(__vma, &vma->unbind_link, unbind_link) { + if ((new_first || new_last) && !first_munmap_rebind) { + __vma->first_munmap_rebind = true; + first_munmap_rebind = true; + } + prep_vma_destroy(vm, __vma); + } + if (new_first) { + xe_vm_insert_vma(vm, new_first); + list_add_tail(&new_first->unbind_link, &vma->unbind_link); + if (!new_last) + new_first->last_munmap_rebind = true; + } + if (new_last) { + xe_vm_insert_vma(vm, new_last); + list_add_tail(&new_last->unbind_link, &vma->unbind_link); + new_last->last_munmap_rebind = true; + } + + return vma; + +unwind: + list_for_each_entry_safe(__vma, next, &vma->unbind_link, unbind_link) + list_del_init(&__vma->unbind_link); + if (new_last) { + prep_vma_destroy(vm, new_last); + xe_vma_destroy_unlocked(new_last); + } + if (new_first) { + prep_vma_destroy(vm, new_first); + xe_vma_destroy_unlocked(new_first); + } + + return ERR_PTR(err); +} + +/* + * Similar to vm_unbind_lookup_vmas, find all VMAs in lookup range to prefetch + */ +static struct xe_vma *vm_prefetch_lookup_vmas(struct xe_vm *vm, + struct xe_vma *lookup, + u32 region) +{ + struct xe_vma *vma = xe_vm_find_overlapping_vma(vm, lookup), *__vma, + *next; + struct rb_node *node; + + if (!xe_vma_is_userptr(vma)) { + if (!xe_bo_can_migrate(vma->bo, region_to_mem_type[region])) + return ERR_PTR(-EINVAL); + } + + node = &vma->vm_node; + while ((node = rb_next(node))) { + if (!xe_vma_cmp_vma_cb(lookup, node)) { + __vma = to_xe_vma(node); + if (!xe_vma_is_userptr(__vma)) { + if (!xe_bo_can_migrate(__vma->bo, region_to_mem_type[region])) + goto flush_list; + } + list_add_tail(&__vma->unbind_link, &vma->unbind_link); + } else { + break; + } + } + + node = &vma->vm_node; + while ((node = rb_prev(node))) { + if (!xe_vma_cmp_vma_cb(lookup, node)) { + __vma = to_xe_vma(node); + if (!xe_vma_is_userptr(__vma)) { + if (!xe_bo_can_migrate(__vma->bo, region_to_mem_type[region])) + goto flush_list; + } + list_add(&__vma->unbind_link, &vma->unbind_link); + } else { + break; + } + } + + return vma; + +flush_list: + list_for_each_entry_safe(__vma, next, &vma->unbind_link, + unbind_link) + list_del_init(&__vma->unbind_link); + + return ERR_PTR(-EINVAL); +} + +static struct xe_vma *vm_unbind_all_lookup_vmas(struct xe_vm *vm, + struct xe_bo *bo) +{ + struct xe_vma *first = NULL, *vma; + + lockdep_assert_held(&vm->lock); + xe_bo_assert_held(bo); + + list_for_each_entry(vma, &bo->vmas, bo_link) { + if (vma->vm != vm) + continue; + + prep_vma_destroy(vm, vma); + if (!first) + first = vma; + else + list_add_tail(&vma->unbind_link, &first->unbind_link); + } + + return first; +} + +static struct xe_vma *vm_bind_ioctl_lookup_vma(struct xe_vm *vm, + struct xe_bo *bo, + u64 bo_offset_or_userptr, + u64 addr, u64 range, u32 op, + u64 gt_mask, u32 region) +{ + struct ww_acquire_ctx ww; + struct xe_vma *vma, lookup; + int err; + + lockdep_assert_held(&vm->lock); + + lookup.start = addr; + lookup.end = addr + range - 1; + + switch (VM_BIND_OP(op)) { + case XE_VM_BIND_OP_MAP: + XE_BUG_ON(!bo); + + err = xe_bo_lock(bo, &ww, 0, true); + if (err) + return ERR_PTR(err); + vma = xe_vma_create(vm, bo, bo_offset_or_userptr, addr, + addr + range - 1, + op & XE_VM_BIND_FLAG_READONLY, + gt_mask); + xe_bo_unlock(bo, &ww); + if (!vma) + return ERR_PTR(-ENOMEM); + + xe_vm_insert_vma(vm, vma); + if (!bo->vm) { + vm_insert_extobj(vm, vma); + err = add_preempt_fences(vm, bo); + if (err) { + prep_vma_destroy(vm, vma); + xe_vma_destroy_unlocked(vma); + + return ERR_PTR(err); + } + } + break; + case XE_VM_BIND_OP_UNMAP: + vma = vm_unbind_lookup_vmas(vm, &lookup); + break; + case XE_VM_BIND_OP_PREFETCH: + vma = vm_prefetch_lookup_vmas(vm, &lookup, region); + break; + case XE_VM_BIND_OP_UNMAP_ALL: + XE_BUG_ON(!bo); + + err = xe_bo_lock(bo, &ww, 0, true); + if (err) + return ERR_PTR(err); + vma = vm_unbind_all_lookup_vmas(vm, bo); + if (!vma) + vma = ERR_PTR(-EINVAL); + xe_bo_unlock(bo, &ww); + break; + case XE_VM_BIND_OP_MAP_USERPTR: + XE_BUG_ON(bo); + + vma = xe_vma_create(vm, NULL, bo_offset_or_userptr, addr, + addr + range - 1, + op & XE_VM_BIND_FLAG_READONLY, + gt_mask); + if (!vma) + return ERR_PTR(-ENOMEM); + + err = xe_vma_userptr_pin_pages(vma); + if (err) { + xe_vma_destroy(vma, NULL); + + return ERR_PTR(err); + } else { + xe_vm_insert_vma(vm, vma); + } + break; + default: + XE_BUG_ON("NOT POSSIBLE"); + vma = ERR_PTR(-EINVAL); + } + + return vma; +} + +#ifdef TEST_VM_ASYNC_OPS_ERROR +#define SUPPORTED_FLAGS \ + (FORCE_ASYNC_OP_ERROR | XE_VM_BIND_FLAG_ASYNC | \ + XE_VM_BIND_FLAG_READONLY | XE_VM_BIND_FLAG_IMMEDIATE | 0xffff) +#else +#define SUPPORTED_FLAGS \ + (XE_VM_BIND_FLAG_ASYNC | XE_VM_BIND_FLAG_READONLY | \ + XE_VM_BIND_FLAG_IMMEDIATE | 0xffff) +#endif +#define XE_64K_PAGE_MASK 0xffffull + +#define MAX_BINDS 512 /* FIXME: Picking random upper limit */ + +static int vm_bind_ioctl_check_args(struct xe_device *xe, + struct drm_xe_vm_bind *args, + struct drm_xe_vm_bind_op **bind_ops, + bool *async) +{ + int err; + int i; + + if (XE_IOCTL_ERR(xe, args->extensions) || + XE_IOCTL_ERR(xe, !args->num_binds) || + XE_IOCTL_ERR(xe, args->num_binds > MAX_BINDS)) + return -EINVAL; + + if (args->num_binds > 1) { + u64 __user *bind_user = + u64_to_user_ptr(args->vector_of_binds); + + *bind_ops = kmalloc(sizeof(struct drm_xe_vm_bind_op) * + args->num_binds, GFP_KERNEL); + if (!*bind_ops) + return -ENOMEM; + + err = __copy_from_user(*bind_ops, bind_user, + sizeof(struct drm_xe_vm_bind_op) * + args->num_binds); + if (XE_IOCTL_ERR(xe, err)) { + err = -EFAULT; + goto free_bind_ops; + } + } else { + *bind_ops = &args->bind; + } + + for (i = 0; i < args->num_binds; ++i) { + u64 range = (*bind_ops)[i].range; + u64 addr = (*bind_ops)[i].addr; + u32 op = (*bind_ops)[i].op; + u32 obj = (*bind_ops)[i].obj; + u64 obj_offset = (*bind_ops)[i].obj_offset; + u32 region = (*bind_ops)[i].region; + + if (i == 0) { + *async = !!(op & XE_VM_BIND_FLAG_ASYNC); + } else if (XE_IOCTL_ERR(xe, !*async) || + XE_IOCTL_ERR(xe, !(op & XE_VM_BIND_FLAG_ASYNC)) || + XE_IOCTL_ERR(xe, VM_BIND_OP(op) == + XE_VM_BIND_OP_RESTART)) { + err = -EINVAL; + goto free_bind_ops; + } + + if (XE_IOCTL_ERR(xe, !*async && + VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL)) { + err = -EINVAL; + goto free_bind_ops; + } + + if (XE_IOCTL_ERR(xe, !*async && + VM_BIND_OP(op) == XE_VM_BIND_OP_PREFETCH)) { + err = -EINVAL; + goto free_bind_ops; + } + + if (XE_IOCTL_ERR(xe, VM_BIND_OP(op) > + XE_VM_BIND_OP_PREFETCH) || + XE_IOCTL_ERR(xe, op & ~SUPPORTED_FLAGS) || + XE_IOCTL_ERR(xe, !obj && + VM_BIND_OP(op) == XE_VM_BIND_OP_MAP) || + XE_IOCTL_ERR(xe, !obj && + VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL) || + XE_IOCTL_ERR(xe, addr && + VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL) || + XE_IOCTL_ERR(xe, range && + VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL) || + XE_IOCTL_ERR(xe, obj && + VM_BIND_OP(op) == XE_VM_BIND_OP_MAP_USERPTR) || + XE_IOCTL_ERR(xe, obj && + VM_BIND_OP(op) == XE_VM_BIND_OP_PREFETCH) || + XE_IOCTL_ERR(xe, region && + VM_BIND_OP(op) != XE_VM_BIND_OP_PREFETCH) || + XE_IOCTL_ERR(xe, !(BIT(region) & + xe->info.mem_region_mask)) || + XE_IOCTL_ERR(xe, obj && + VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP)) { + err = -EINVAL; + goto free_bind_ops; + } + + if (XE_IOCTL_ERR(xe, obj_offset & ~PAGE_MASK) || + XE_IOCTL_ERR(xe, addr & ~PAGE_MASK) || + XE_IOCTL_ERR(xe, range & ~PAGE_MASK) || + XE_IOCTL_ERR(xe, !range && VM_BIND_OP(op) != + XE_VM_BIND_OP_RESTART && + VM_BIND_OP(op) != XE_VM_BIND_OP_UNMAP_ALL)) { + err = -EINVAL; + goto free_bind_ops; + } + } + + return 0; + +free_bind_ops: + if (args->num_binds > 1) + kfree(*bind_ops); + return err; +} + +int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_file *xef = to_xe_file(file); + struct drm_xe_vm_bind *args = data; + struct drm_xe_sync __user *syncs_user; + struct xe_bo **bos = NULL; + struct xe_vma **vmas = NULL; + struct xe_vm *vm; + struct xe_engine *e = NULL; + u32 num_syncs; + struct xe_sync_entry *syncs = NULL; + struct drm_xe_vm_bind_op *bind_ops; + bool async; + int err; + int i, j = 0; + + err = vm_bind_ioctl_check_args(xe, args, &bind_ops, &async); + if (err) + return err; + + vm = xe_vm_lookup(xef, args->vm_id); + if (XE_IOCTL_ERR(xe, !vm)) { + err = -EINVAL; + goto free_objs; + } + + if (XE_IOCTL_ERR(xe, xe_vm_is_closed(vm))) { + DRM_ERROR("VM closed while we began looking up?\n"); + err = -ENOENT; + goto put_vm; + } + + if (args->engine_id) { + e = xe_engine_lookup(xef, args->engine_id); + if (XE_IOCTL_ERR(xe, !e)) { + err = -ENOENT; + goto put_vm; + } + if (XE_IOCTL_ERR(xe, !(e->flags & ENGINE_FLAG_VM))) { + err = -EINVAL; + goto put_engine; + } + } + + if (VM_BIND_OP(bind_ops[0].op) == XE_VM_BIND_OP_RESTART) { + if (XE_IOCTL_ERR(xe, !(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS))) + err = -ENOTSUPP; + if (XE_IOCTL_ERR(xe, !err && args->num_syncs)) + err = EINVAL; + if (XE_IOCTL_ERR(xe, !err && !vm->async_ops.error)) + err = -EPROTO; + + if (!err) { + down_write(&vm->lock); + trace_xe_vm_restart(vm); + vm_set_async_error(vm, 0); + up_write(&vm->lock); + + queue_work(system_unbound_wq, &vm->async_ops.work); + + /* Rebinds may have been blocked, give worker a kick */ + if (xe_vm_in_compute_mode(vm)) + queue_work(vm->xe->ordered_wq, + &vm->preempt.rebind_work); + } + + goto put_engine; + } + + if (XE_IOCTL_ERR(xe, !vm->async_ops.error && + async != !!(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS))) { + err = -ENOTSUPP; + goto put_engine; + } + + for (i = 0; i < args->num_binds; ++i) { + u64 range = bind_ops[i].range; + u64 addr = bind_ops[i].addr; + + if (XE_IOCTL_ERR(xe, range > vm->size) || + XE_IOCTL_ERR(xe, addr > vm->size - range)) { + err = -EINVAL; + goto put_engine; + } + + if (bind_ops[i].gt_mask) { + u64 valid_gts = BIT(xe->info.tile_count) - 1; + + if (XE_IOCTL_ERR(xe, bind_ops[i].gt_mask & + ~valid_gts)) { + err = -EINVAL; + goto put_engine; + } + } + } + + bos = kzalloc(sizeof(*bos) * args->num_binds, GFP_KERNEL); + if (!bos) { + err = -ENOMEM; + goto put_engine; + } + + vmas = kzalloc(sizeof(*vmas) * args->num_binds, GFP_KERNEL); + if (!vmas) { + err = -ENOMEM; + goto put_engine; + } + + for (i = 0; i < args->num_binds; ++i) { + struct drm_gem_object *gem_obj; + u64 range = bind_ops[i].range; + u64 addr = bind_ops[i].addr; + u32 obj = bind_ops[i].obj; + u64 obj_offset = bind_ops[i].obj_offset; + + if (!obj) + continue; + + gem_obj = drm_gem_object_lookup(file, obj); + if (XE_IOCTL_ERR(xe, !gem_obj)) { + err = -ENOENT; + goto put_obj; + } + bos[i] = gem_to_xe_bo(gem_obj); + + if (XE_IOCTL_ERR(xe, range > bos[i]->size) || + XE_IOCTL_ERR(xe, obj_offset > + bos[i]->size - range)) { + err = -EINVAL; + goto put_obj; + } + + if (bos[i]->flags & XE_BO_INTERNAL_64K) { + if (XE_IOCTL_ERR(xe, obj_offset & + XE_64K_PAGE_MASK) || + XE_IOCTL_ERR(xe, addr & XE_64K_PAGE_MASK) || + XE_IOCTL_ERR(xe, range & XE_64K_PAGE_MASK)) { + err = -EINVAL; + goto put_obj; + } + } + } + + if (args->num_syncs) { + syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL); + if (!syncs) { + err = -ENOMEM; + goto put_obj; + } + } + + syncs_user = u64_to_user_ptr(args->syncs); + for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { + err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], + &syncs_user[num_syncs], false, + xe_vm_no_dma_fences(vm)); + if (err) + goto free_syncs; + } + + err = down_write_killable(&vm->lock); + if (err) + goto free_syncs; + + /* Do some error checking first to make the unwind easier */ + for (i = 0; i < args->num_binds; ++i) { + u64 range = bind_ops[i].range; + u64 addr = bind_ops[i].addr; + u32 op = bind_ops[i].op; + + err = __vm_bind_ioctl_lookup_vma(vm, bos[i], addr, range, op); + if (err) + goto release_vm_lock; + } + + for (i = 0; i < args->num_binds; ++i) { + u64 range = bind_ops[i].range; + u64 addr = bind_ops[i].addr; + u32 op = bind_ops[i].op; + u64 obj_offset = bind_ops[i].obj_offset; + u64 gt_mask = bind_ops[i].gt_mask; + u32 region = bind_ops[i].region; + + vmas[i] = vm_bind_ioctl_lookup_vma(vm, bos[i], obj_offset, + addr, range, op, gt_mask, + region); + if (IS_ERR(vmas[i])) { + err = PTR_ERR(vmas[i]); + vmas[i] = NULL; + goto destroy_vmas; + } + } + + for (j = 0; j < args->num_binds; ++j) { + struct xe_sync_entry *__syncs; + u32 __num_syncs = 0; + bool first_or_last = j == 0 || j == args->num_binds - 1; + + if (args->num_binds == 1) { + __num_syncs = num_syncs; + __syncs = syncs; + } else if (first_or_last && num_syncs) { + bool first = j == 0; + + __syncs = kmalloc(sizeof(*__syncs) * num_syncs, + GFP_KERNEL); + if (!__syncs) { + err = ENOMEM; + break; + } + + /* in-syncs on first bind, out-syncs on last bind */ + for (i = 0; i < num_syncs; ++i) { + bool signal = syncs[i].flags & + DRM_XE_SYNC_SIGNAL; + + if ((first && !signal) || (!first && signal)) + __syncs[__num_syncs++] = syncs[i]; + } + } else { + __num_syncs = 0; + __syncs = NULL; + } + + if (async) { + bool last = j == args->num_binds - 1; + + /* + * Each pass of async worker drops the ref, take a ref + * here, 1 set of refs taken above + */ + if (!last) { + if (e) + xe_engine_get(e); + xe_vm_get(vm); + } + + err = vm_bind_ioctl_async(vm, vmas[j], e, bos[j], + bind_ops + j, __syncs, + __num_syncs); + if (err && !last) { + if (e) + xe_engine_put(e); + xe_vm_put(vm); + } + if (err) + break; + } else { + XE_BUG_ON(j != 0); /* Not supported */ + err = vm_bind_ioctl(vm, vmas[j], e, bos[j], + bind_ops + j, __syncs, + __num_syncs, NULL); + break; /* Needed so cleanup loops work */ + } + } + + /* Most of cleanup owned by the async bind worker */ + if (async && !err) { + up_write(&vm->lock); + if (args->num_binds > 1) + kfree(syncs); + goto free_objs; + } + +destroy_vmas: + for (i = j; err && i < args->num_binds; ++i) { + u32 op = bind_ops[i].op; + struct xe_vma *vma, *next; + + if (!vmas[i]) + break; + + list_for_each_entry_safe(vma, next, &vma->unbind_link, + unbind_link) { + list_del_init(&vma->unbind_link); + if (!vma->destroyed) { + prep_vma_destroy(vm, vma); + xe_vma_destroy_unlocked(vma); + } + } + + switch (VM_BIND_OP(op)) { + case XE_VM_BIND_OP_MAP: + prep_vma_destroy(vm, vmas[i]); + xe_vma_destroy_unlocked(vmas[i]); + break; + case XE_VM_BIND_OP_MAP_USERPTR: + prep_vma_destroy(vm, vmas[i]); + xe_vma_destroy_unlocked(vmas[i]); + break; + } + } +release_vm_lock: + up_write(&vm->lock); +free_syncs: + while (num_syncs--) { + if (async && j && + !(syncs[num_syncs].flags & DRM_XE_SYNC_SIGNAL)) + continue; /* Still in async worker */ + xe_sync_entry_cleanup(&syncs[num_syncs]); + } + + kfree(syncs); +put_obj: + for (i = j; i < args->num_binds; ++i) + xe_bo_put(bos[i]); +put_engine: + if (e) + xe_engine_put(e); +put_vm: + xe_vm_put(vm); +free_objs: + kfree(bos); + kfree(vmas); + if (args->num_binds > 1) + kfree(bind_ops); + return err; +} + +/* + * XXX: Using the TTM wrappers for now, likely can call into dma-resv code + * directly to optimize. Also this likely should be an inline function. + */ +int xe_vm_lock(struct xe_vm *vm, struct ww_acquire_ctx *ww, + int num_resv, bool intr) +{ + struct ttm_validate_buffer tv_vm; + LIST_HEAD(objs); + LIST_HEAD(dups); + + XE_BUG_ON(!ww); + + tv_vm.num_shared = num_resv; + tv_vm.bo = xe_vm_ttm_bo(vm);; + list_add_tail(&tv_vm.head, &objs); + + return ttm_eu_reserve_buffers(ww, &objs, intr, &dups); +} + +void xe_vm_unlock(struct xe_vm *vm, struct ww_acquire_ctx *ww) +{ + dma_resv_unlock(&vm->resv); + ww_acquire_fini(ww); +} + +/** + * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock + * @vma: VMA to invalidate + * + * Walks a list of page tables leaves which it memset the entries owned by this + * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is + * complete. + * + * Returns 0 for success, negative error code otherwise. + */ +int xe_vm_invalidate_vma(struct xe_vma *vma) +{ + struct xe_device *xe = vma->vm->xe; + struct xe_gt *gt; + u32 gt_needs_invalidate = 0; + int seqno[XE_MAX_GT]; + u8 id; + int ret; + + XE_BUG_ON(!xe_vm_in_fault_mode(vma->vm)); + trace_xe_vma_usm_invalidate(vma); + + /* Check that we don't race with page-table updates */ + if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { + if (xe_vma_is_userptr(vma)) { + WARN_ON_ONCE(!mmu_interval_check_retry + (&vma->userptr.notifier, + vma->userptr.notifier_seq)); + WARN_ON_ONCE(!dma_resv_test_signaled(&vma->vm->resv, + DMA_RESV_USAGE_BOOKKEEP)); + + } else { + xe_bo_assert_held(vma->bo); + } + } + + for_each_gt(gt, xe, id) { + if (xe_pt_zap_ptes(gt, vma)) { + gt_needs_invalidate |= BIT(id); + xe_device_wmb(xe); + seqno[id] = xe_gt_tlb_invalidation(gt); + if (seqno[id] < 0) + return seqno[id]; + } + } + + for_each_gt(gt, xe, id) { + if (gt_needs_invalidate & BIT(id)) { + ret = xe_gt_tlb_invalidation_wait(gt, seqno[id]); + if (ret < 0) + return ret; + } + } + + vma->usm.gt_invalidated = vma->gt_mask; + + return 0; +} + +#if IS_ENABLED(CONFIG_DRM_XE_SIMPLE_ERROR_CAPTURE) +int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id) +{ + struct rb_node *node; + bool is_lmem; + uint64_t addr; + + if (!down_read_trylock(&vm->lock)) { + drm_printf(p, " Failed to acquire VM lock to dump capture"); + return 0; + } + if (vm->pt_root[gt_id]) { + addr = xe_bo_addr(vm->pt_root[gt_id]->bo, 0, GEN8_PAGE_SIZE, &is_lmem); + drm_printf(p, " VM root: A:0x%llx %s\n", addr, is_lmem ? "LMEM" : "SYS"); + } + + for (node = rb_first(&vm->vmas); node; node = rb_next(node)) { + struct xe_vma *vma = to_xe_vma(node); + bool is_userptr = xe_vma_is_userptr(vma); + + if (is_userptr) { + struct xe_res_cursor cur; + + xe_res_first_sg(vma->userptr.sg, 0, GEN8_PAGE_SIZE, &cur); + addr = xe_res_dma(&cur); + } else { + addr = xe_bo_addr(vma->bo, 0, GEN8_PAGE_SIZE, &is_lmem); + } + drm_printf(p, " [%016llx-%016llx] S:0x%016llx A:%016llx %s\n", + vma->start, vma->end, vma->end - vma->start + 1ull, + addr, is_userptr ? "USR" : is_lmem ? "VRAM" : "SYS"); + } + up_read(&vm->lock); + + return 0; +} +#else +int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id) +{ + return 0; +} +#endif diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h new file mode 100644 index 000000000000..3468ed9d0528 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -0,0 +1,141 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _XE_VM_H_ +#define _XE_VM_H_ + +#include "xe_macros.h" +#include "xe_map.h" +#include "xe_vm_types.h" + +struct drm_device; +struct drm_printer; +struct drm_file; + +struct ttm_buffer_object; +struct ttm_validate_buffer; + +struct xe_engine; +struct xe_file; +struct xe_sync_entry; + +struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags); +void xe_vm_free(struct kref *ref); + +struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id); +int xe_vma_cmp_vma_cb(const void *key, const struct rb_node *node); + +static inline struct xe_vm *xe_vm_get(struct xe_vm *vm) +{ + kref_get(&vm->refcount); + return vm; +} + +static inline void xe_vm_put(struct xe_vm *vm) +{ + kref_put(&vm->refcount, xe_vm_free); +} + +int xe_vm_lock(struct xe_vm *vm, struct ww_acquire_ctx *ww, + int num_resv, bool intr); + +void xe_vm_unlock(struct xe_vm *vm, struct ww_acquire_ctx *ww); + +static inline bool xe_vm_is_closed(struct xe_vm *vm) +{ + /* Only guaranteed not to change when vm->resv is held */ + return !vm->size; +} + +struct xe_vma * +xe_vm_find_overlapping_vma(struct xe_vm *vm, const struct xe_vma *vma); + +#define xe_vm_assert_held(vm) dma_resv_assert_held(&(vm)->resv) + +u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_gt *full_gt); + +int xe_vm_create_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); +int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); +int xe_vm_bind_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); + +void xe_vm_close_and_put(struct xe_vm *vm); + +static inline bool xe_vm_in_compute_mode(struct xe_vm *vm) +{ + return vm->flags & XE_VM_FLAG_COMPUTE_MODE; +} + +static inline bool xe_vm_in_fault_mode(struct xe_vm *vm) +{ + return vm->flags & XE_VM_FLAG_FAULT_MODE; +} + +static inline bool xe_vm_no_dma_fences(struct xe_vm *vm) +{ + return xe_vm_in_compute_mode(vm) || xe_vm_in_fault_mode(vm); +} + +int xe_vm_add_compute_engine(struct xe_vm *vm, struct xe_engine *e); + +int xe_vm_userptr_pin(struct xe_vm *vm); + +int __xe_vm_userptr_needs_repin(struct xe_vm *vm); + +int xe_vm_userptr_check_repin(struct xe_vm *vm); + +struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker); + +int xe_vm_invalidate_vma(struct xe_vma *vma); + +int xe_vm_async_fence_wait_start(struct dma_fence *fence); + +extern struct ttm_device_funcs xe_ttm_funcs; + +struct ttm_buffer_object *xe_vm_ttm_bo(struct xe_vm *vm); + +static inline bool xe_vma_is_userptr(struct xe_vma *vma) +{ + return !vma->bo; +} + +int xe_vma_userptr_pin_pages(struct xe_vma *vma); + +int xe_vma_userptr_check_repin(struct xe_vma *vma); + +/* + * XE_ONSTACK_TV is used to size the tv_onstack array that is input + * to xe_vm_lock_dma_resv() and xe_vm_unlock_dma_resv(). + */ +#define XE_ONSTACK_TV 20 +int xe_vm_lock_dma_resv(struct xe_vm *vm, struct ww_acquire_ctx *ww, + struct ttm_validate_buffer *tv_onstack, + struct ttm_validate_buffer **tv, + struct list_head *objs, + bool intr, + unsigned int num_shared); + +void xe_vm_unlock_dma_resv(struct xe_vm *vm, + struct ttm_validate_buffer *tv_onstack, + struct ttm_validate_buffer *tv, + struct ww_acquire_ctx *ww, + struct list_head *objs); + +void xe_vm_fence_all_extobjs(struct xe_vm *vm, struct dma_fence *fence, + enum dma_resv_usage usage); + +int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id); + +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) +#define vm_dbg drm_dbg +#else +__printf(2, 3) +static inline void vm_dbg(const struct drm_device *dev, + const char *format, ...) +{ /* noop */ } +#endif +#endif diff --git a/drivers/gpu/drm/xe/xe_vm_doc.h b/drivers/gpu/drm/xe/xe_vm_doc.h new file mode 100644 index 000000000000..5b6216964c45 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_vm_doc.h @@ -0,0 +1,555 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_VM_DOC_H_ +#define _XE_VM_DOC_H_ + +/** + * DOC: XE VM (user address space) + * + * VM creation + * =========== + * + * Allocate a physical page for root of the page table structure, create default + * bind engine, and return a handle to the user. + * + * Scratch page + * ------------ + * + * If the VM is created with the flag, DRM_XE_VM_CREATE_SCRATCH_PAGE, set the + * entire page table structure defaults pointing to blank page allocated by the + * VM. Invalid memory access rather than fault just read / write to this page. + * + * VM bind (create GPU mapping for a BO or userptr) + * ================================================ + * + * Creates GPU mapings for a BO or userptr within a VM. VM binds uses the same + * in / out fence interface (struct drm_xe_sync) as execs which allows users to + * think of binds and execs as more or less the same operation. + * + * Operations + * ---------- + * + * XE_VM_BIND_OP_MAP - Create mapping for a BO + * XE_VM_BIND_OP_UNMAP - Destroy mapping for a BO / userptr + * XE_VM_BIND_OP_MAP_USERPTR - Create mapping for userptr + * + * Implementation details + * ~~~~~~~~~~~~~~~~~~~~~~ + * + * All bind operations are implemented via a hybrid approach of using the CPU + * and GPU to modify page tables. If a new physical page is allocated in the + * page table structure we populate that page via the CPU and insert that new + * page into the existing page table structure via a GPU job. Also any existing + * pages in the page table structure that need to be modified also are updated + * via the GPU job. As the root physical page is prealloced on VM creation our + * GPU job will always have at least 1 update. The in / out fences are passed to + * this job so again this is conceptually the same as an exec. + * + * Very simple example of few binds on an empty VM with 48 bits of address space + * and the resulting operations: + * + * .. code-block:: + * + * bind BO0 0x0-0x1000 + * alloc page level 3a, program PTE[0] to BO0 phys address (CPU) + * alloc page level 2, program PDE[0] page level 3a phys address (CPU) + * alloc page level 1, program PDE[0] page level 2 phys address (CPU) + * update root PDE[0] to page level 1 phys address (GPU) + * + * bind BO1 0x201000-0x202000 + * alloc page level 3b, program PTE[1] to BO1 phys address (CPU) + * update page level 2 PDE[1] to page level 3b phys address (GPU) + * + * bind BO2 0x1ff000-0x201000 + * update page level 3a PTE[511] to BO2 phys addres (GPU) + * update page level 3b PTE[0] to BO2 phys addres + 0x1000 (GPU) + * + * GPU bypass + * ~~~~~~~~~~ + * + * In the above example the steps using the GPU can be converted to CPU if the + * bind can be done immediately (all in-fences satisfied, VM dma-resv kernel + * slot is idle). + * + * Address space + * ------------- + * + * Depending on platform either 48 or 57 bits of address space is supported. + * + * Page sizes + * ---------- + * + * The minimum page size is either 4k or 64k depending on platform and memory + * placement (sysmem vs. VRAM). We enforce that binds must be aligned to the + * minimum page size. + * + * Larger pages (2M or 1GB) can be used for BOs in VRAM, the BO physical address + * is aligned to the larger pages size, and VA is aligned to the larger page + * size. Larger pages for userptrs / BOs in sysmem should be possible but is not + * yet implemented. + * + * Sync error handling mode + * ------------------------ + * + * In both modes during the bind IOCTL the user input is validated. In sync + * error handling mode the newly bound BO is validated (potentially moved back + * to a region of memory where is can be used), page tables are updated by the + * CPU and the job to do the GPU binds is created in the IOCTL itself. This step + * can fail due to memory pressure. The user can recover by freeing memory and + * trying this operation again. + * + * Async error handling mode + * ------------------------- + * + * In async error handling the step of validating the BO, updating page tables, + * and generating a job are deferred to an async worker. As this step can now + * fail after the IOCTL has reported success we need an error handling flow for + * which the user can recover from. + * + * The solution is for a user to register a user address with the VM which the + * VM uses to report errors to. The ufence wait interface can be used to wait on + * a VM going into an error state. Once an error is reported the VM's async + * worker is paused. While the VM's async worker is paused sync, + * XE_VM_BIND_OP_UNMAP operations are allowed (this can free memory). Once the + * uses believe the error state is fixed, the async worker can be resumed via + * XE_VM_BIND_OP_RESTART operation. When VM async bind work is restarted, the + * first operation processed is the operation that caused the original error. + * + * Bind queues / engines + * --------------------- + * + * Think of the case where we have two bind operations A + B and are submitted + * in that order. A has in fences while B has none. If using a single bind + * queue, B is now blocked on A's in fences even though it is ready to run. This + * example is a real use case for VK sparse binding. We work around this + * limitation by implementing bind engines. + * + * In the bind IOCTL the user can optionally pass in an engine ID which must map + * to an engine which is of the special class DRM_XE_ENGINE_CLASS_VM_BIND. + * Underneath this is a really virtual engine that can run on any of the copy + * hardware engines. The job(s) created each IOCTL are inserted into this + * engine's ring. In the example above if A and B have different bind engines B + * is free to pass A. If the engine ID field is omitted, the default bind queue + * for the VM is used. + * + * TODO: Explain race in issue 41 and how we solve it + * + * Array of bind operations + * ------------------------ + * + * The uAPI allows multiple binds operations to be passed in via a user array, + * of struct drm_xe_vm_bind_op, in a single VM bind IOCTL. This interface + * matches the VK sparse binding API. The implementation is rather simple, parse + * the array into a list of operations, pass the in fences to the first operation, + * and pass the out fences to the last operation. The ordered nature of a bind + * engine makes this possible. + * + * Munmap semantics for unbinds + * ---------------------------- + * + * Munmap allows things like: + * + * .. code-block:: + * + * 0x0000-0x2000 and 0x3000-0x5000 have mappings + * Munmap 0x1000-0x4000, results in mappings 0x0000-0x1000 and 0x4000-0x5000 + * + * To support this semantic in the above example we decompose the above example + * into 4 operations: + * + * .. code-block:: + * + * unbind 0x0000-0x2000 + * unbind 0x3000-0x5000 + * rebind 0x0000-0x1000 + * rebind 0x4000-0x5000 + * + * Why not just do a partial unbind of 0x1000-0x2000 and 0x3000-0x4000? This + * falls apart when using large pages at the edges and the unbind forces us to + * use a smaller page size. For simplity we always issue a set of unbinds + * unmapping anything in the range and at most 2 rebinds on the edges. + * + * Similar to an array of binds, in fences are passed to the first operation and + * out fences are signaled on the last operation. + * + * In this example there is a window of time where 0x0000-0x1000 and + * 0x4000-0x5000 are invalid but the user didn't ask for these addresses to be + * removed from the mapping. To work around this we treat any munmap style + * unbinds which require a rebind as a kernel operations (BO eviction or userptr + * invalidation). The first operation waits on the VM's + * DMA_RESV_USAGE_PREEMPT_FENCE slots (waits for all pending jobs on VM to + * complete / triggers preempt fences) and the last operation is installed in + * the VM's DMA_RESV_USAGE_KERNEL slot (blocks future jobs / resume compute mode + * VM). The caveat is all dma-resv slots must be updated atomically with respect + * to execs and compute mode rebind worker. To accomplish this, hold the + * vm->lock in write mode from the first operation until the last. + * + * Deferred binds in fault mode + * ---------------------------- + * + * In a VM is in fault mode (TODO: link to fault mode), new bind operations that + * create mappings are by default are deferred to the page fault handler (first + * use). This behavior can be overriden by setting the flag + * XE_VM_BIND_FLAG_IMMEDIATE which indicates to creating the mapping + * immediately. + * + * User pointer + * ============ + * + * User pointers are user allocated memory (malloc'd, mmap'd, etc..) for which the + * user wants to create a GPU mapping. Typically in other DRM drivers a dummy BO + * was created and then a binding was created. We bypass creating a dummy BO in + * XE and simply create a binding directly from the userptr. + * + * Invalidation + * ------------ + * + * Since this a core kernel managed memory the kernel can move this memory + * whenever it wants. We register an invalidation MMU notifier to alert XE when + * a user poiter is about to move. The invalidation notifier needs to block + * until all pending users (jobs or compute mode engines) of the userptr are + * idle to ensure no faults. This done by waiting on all of VM's dma-resv slots. + * + * Rebinds + * ------- + * + * Either the next exec (non-compute) or rebind worker (compute mode) will + * rebind the userptr. The invalidation MMU notifier kicks the rebind worker + * after the VM dma-resv wait if the VM is in compute mode. + * + * Compute mode + * ============ + * + * A VM in compute mode enables long running workloads and ultra low latency + * submission (ULLS). ULLS is implemented via a continuously running batch + + * semaphores. This enables to the user to insert jump to new batch commands + * into the continuously running batch. In both cases these batches exceed the + * time a dma fence is allowed to exist for before signaling, as such dma fences + * are not used when a VM is in compute mode. User fences (TODO: link user fence + * doc) are used instead to signal operation's completion. + * + * Preempt fences + * -------------- + * + * If the kernel decides to move memory around (either userptr invalidate, BO + * eviction, or mumap style unbind which results in a rebind) and a batch is + * running on an engine, that batch can fault or cause a memory corruption as + * page tables for the moved memory are no longer valid. To work around this we + * introduce the concept of preempt fences. When sw signaling is enabled on a + * preempt fence it tells the submission backend to kick that engine off the + * hardware and the preempt fence signals when the engine is off the hardware. + * Once all preempt fences are signaled for a VM the kernel can safely move the + * memory and kick the rebind worker which resumes all the engines execution. + * + * A preempt fence, for every engine using the VM, is installed the VM's + * dma-resv DMA_RESV_USAGE_PREEMPT_FENCE slot. The same preempt fence, for every + * engine using the VM, is also installed into the same dma-resv slot of every + * external BO mapped in the VM. + * + * Rebind worker + * ------------- + * + * The rebind worker is very similar to an exec. It is resposible for rebinding + * evicted BOs or userptrs, waiting on those operations, installing new preempt + * fences, and finally resuming executing of engines in the VM. + * + * Flow + * ~~~~ + * + * .. code-block:: + * + * <----------------------------------------------------------------------| + * Check if VM is closed, if so bail out | + * Lock VM global lock in read mode | + * Pin userptrs (also finds userptr invalidated since last rebind worker) | + * Lock VM dma-resv and external BOs dma-resv | + * Validate BOs that have been evicted | + * Wait on and allocate new preempt fences for every engine using the VM | + * Rebind invalidated userptrs + evicted BOs | + * Wait on last rebind fence | + * Wait VM's DMA_RESV_USAGE_KERNEL dma-resv slot | + * Install preeempt fences and issue resume for every engine using the VM | + * Check if any userptrs invalidated since pin | + * Squash resume for all engines | + * Unlock all | + * Wait all VM's dma-resv slots | + * Retry ---------------------------------------------------------- + * Release all engines waiting to resume + * Unlock all + * + * Timeslicing + * ----------- + * + * In order to prevent an engine from continuously being kicked off the hardware + * and making no forward progress an engine has a period of time it allowed to + * run after resume before it can be kicked off again. This effectively gives + * each engine a timeslice. + * + * Handling multiple GTs + * ===================== + * + * If a GT has slower access to some regions and the page table structure are in + * the slow region, the performance on that GT could adversely be affected. To + * work around this we allow a VM page tables to be shadowed in multiple GTs. + * When VM is created, a default bind engine and PT table structure are created + * on each GT. + * + * Binds can optionally pass in a mask of GTs where a mapping should be created, + * if this mask is zero then default to all the GTs where the VM has page + * tables. + * + * The implementation for this breaks down into a bunch for_each_gt loops in + * various places plus exporting a composite fence for multi-GT binds to the + * user. + * + * Fault mode (unified shared memory) + * ================================== + * + * A VM in fault mode can be enabled on devices that support page faults. If + * page faults are enabled, using dma fences can potentially induce a deadlock: + * A pending page fault can hold up the GPU work which holds up the dma fence + * signaling, and memory allocation is usually required to resolve a page + * fault, but memory allocation is not allowed to gate dma fence signaling. As + * such, dma fences are not allowed when VM is in fault mode. Because dma-fences + * are not allowed, long running workloads and ULLS are enabled on a faulting + * VM. + * + * Defered VM binds + * ---------------- + * + * By default, on a faulting VM binds just allocate the VMA and the actual + * updating of the page tables is defered to the page fault handler. This + * behavior can be overridden by setting the flag XE_VM_BIND_FLAG_IMMEDIATE in + * the VM bind which will then do the bind immediately. + * + * Page fault handler + * ------------------ + * + * Page faults are received in the G2H worker under the CT lock which is in the + * path of dma fences (no memory allocations are allowed, faults require memory + * allocations) thus we cannot process faults under the CT lock. Another issue + * is faults issue TLB invalidations which require G2H credits and we cannot + * allocate G2H credits in the G2H handlers without deadlocking. Lastly, we do + * not want the CT lock to be an outer lock of the VM global lock (VM global + * lock required to fault processing). + * + * To work around the above issue with processing faults in the G2H worker, we + * sink faults to a buffer which is large enough to sink all possible faults on + * the GT (1 per hardware engine) and kick a worker to process the faults. Since + * the page faults G2H are already received in a worker, kicking another worker + * adds more latency to a critical performance path. We add a fast path in the + * G2H irq handler which looks at first G2H and if it is a page fault we sink + * the fault to the buffer and kick the worker to process the fault. TLB + * invalidation responses are also in the critical path so these can also be + * processed in this fast path. + * + * Multiple buffers and workers are used and hashed over based on the ASID so + * faults from different VMs can be processed in parallel. + * + * The page fault handler itself is rather simple, flow is below. + * + * .. code-block:: + * + * Lookup VM from ASID in page fault G2H + * Lock VM global lock in read mode + * Lookup VMA from address in page fault G2H + * Check if VMA is valid, if not bail + * Check if VMA's BO has backing store, if not allocate + * <----------------------------------------------------------------------| + * If userptr, pin pages | + * Lock VM & BO dma-resv locks | + * If atomic fault, migrate to VRAM, else validate BO location | + * Issue rebind | + * Wait on rebind to complete | + * Check if userptr invalidated since pin | + * Drop VM & BO dma-resv locks | + * Retry ---------------------------------------------------------- + * Unlock all + * Issue blocking TLB invalidation | + * Send page fault response to GuC + * + * Access counters + * --------------- + * + * Access counters can be configured to trigger a G2H indicating the device is + * accessing VMAs in system memory frequently as hint to migrate those VMAs to + * VRAM. + * + * Same as the page fault handler, access counters G2H cannot be processed the + * G2H worker under the CT lock. Again we use a buffer to sink access counter + * G2H. Unlike page faults there is no upper bound so if the buffer is full we + * simply drop the G2H. Access counters are a best case optimization and it is + * safe to drop these unlike page faults. + * + * The access counter handler itself is rather simple flow is below. + * + * .. code-block:: + * + * Lookup VM from ASID in access counter G2H + * Lock VM global lock in read mode + * Lookup VMA from address in access counter G2H + * If userptr, bail nothing to do + * Lock VM & BO dma-resv locks + * Issue migration to VRAM + * Unlock all + * + * Notice no rebind is issued in the access counter handler as the rebind will + * be issued on next page fault. + * + * Cavets with eviction / user pointer invalidation + * ------------------------------------------------ + * + * In the case of eviction and user pointer invalidation on a faulting VM, there + * is no need to issue a rebind rather we just need to blow away the page tables + * for the VMAs and the page fault handler will rebind the VMAs when they fault. + * The cavet is to update / read the page table structure the VM global lock is + * neeeed. In both the case of eviction and user pointer invalidation locks are + * held which make acquiring the VM global lock impossible. To work around this + * every VMA maintains a list of leaf page table entries which should be written + * to zero to blow away the VMA's page tables. After writing zero to these + * entries a blocking TLB invalidate is issued. At this point it is safe for the + * kernel to move the VMA's memory around. This is a necessary lockless + * algorithm and is safe as leafs cannot be changed while either an eviction or + * userptr invalidation is occurring. + * + * Locking + * ======= + * + * VM locking protects all of the core data paths (bind operations, execs, + * evictions, and compute mode rebind worker) in XE. + * + * Locks + * ----- + * + * VM global lock (vm->lock) - rw semaphore lock. Outer most lock which protects + * the list of userptrs mapped in the VM, the list of engines using this VM, and + * the array of external BOs mapped in the VM. When adding or removing any of the + * aforemented state from the VM should acquire this lock in write mode. The VM + * bind path also acquires this lock in write while while the exec / compute + * mode rebind worker acquire this lock in read mode. + * + * VM dma-resv lock (vm->ttm.base.resv->lock) - WW lock. Protects VM dma-resv + * slots which is shared with any private BO in the VM. Expected to be acquired + * during VM binds, execs, and compute mode rebind worker. This lock is also + * held when private BOs are being evicted. + * + * external BO dma-resv lock (bo->ttm.base.resv->lock) - WW lock. Protects + * external BO dma-resv slots. Expected to be acquired during VM binds (in + * addition to the VM dma-resv lock). All external BO dma-locks within a VM are + * expected to be acquired (in addition to the VM dma-resv lock) during execs + * and the compute mode rebind worker. This lock is also held when an external + * BO is being evicted. + * + * Putting it all together + * ----------------------- + * + * 1. An exec and bind operation with the same VM can't be executing at the same + * time (vm->lock). + * + * 2. A compute mode rebind worker and bind operation with the same VM can't be + * executing at the same time (vm->lock). + * + * 3. We can't add / remove userptrs or external BOs to a VM while an exec with + * the same VM is executing (vm->lock). + * + * 4. We can't add / remove userptrs, external BOs, or engines to a VM while a + * compute mode rebind worker with the same VM is executing (vm->lock). + * + * 5. Evictions within a VM can't be happen while an exec with the same VM is + * executing (dma-resv locks). + * + * 6. Evictions within a VM can't be happen while a compute mode rebind worker + * with the same VM is executing (dma-resv locks). + * + * dma-resv usage + * ============== + * + * As previously stated to enforce the ordering of kernel ops (eviction, userptr + * invalidation, munmap style unbinds which result in a rebind), rebinds during + * execs, execs, and resumes in the rebind worker we use both the VMs and + * external BOs dma-resv slots. Let try to make this as clear as possible. + * + * Slot installation + * ----------------- + * + * 1. Jobs from kernel ops install themselves into the DMA_RESV_USAGE_KERNEL + * slot of either an external BO or VM (depends on if kernel op is operating on + * an external or private BO) + * + * 2. In non-compute mode, jobs from execs install themselves into the + * DMA_RESV_USAGE_BOOKKEEP slot of the VM + * + * 3. In non-compute mode, jobs from execs install themselves into the + * DMA_RESV_USAGE_WRITE slot of all external BOs in the VM + * + * 4. Jobs from binds install themselves into the DMA_RESV_USAGE_BOOKKEEP slot + * of the VM + * + * 5. Jobs from binds install themselves into the DMA_RESV_USAGE_BOOKKEEP slot + * of the external BO (if the bind is to an external BO, this is addition to #4) + * + * 6. Every engine using a compute mode VM has a preempt fence in installed into + * the DMA_RESV_USAGE_PREEMPT_FENCE slot of the VM + * + * 7. Every engine using a compute mode VM has a preempt fence in installed into + * the DMA_RESV_USAGE_PREEMPT_FENCE slot of all the external BOs in the VM + * + * Slot waiting + * ------------ + * + * 1. The exection of all jobs from kernel ops shall wait on all slots + * (DMA_RESV_USAGE_PREEMPT_FENCE) of either an external BO or VM (depends on if + * kernel op is operating on external or private BO) + * + * 2. In non-compute mode, the exection of all jobs from rebinds in execs shall + * wait on the DMA_RESV_USAGE_KERNEL slot of either an external BO or VM + * (depends on if the rebind is operatiing on an external or private BO) + * + * 3. In non-compute mode, the exection of all jobs from execs shall wait on the + * last rebind job + * + * 4. In compute mode, the exection of all jobs from rebinds in the rebind + * worker shall wait on the DMA_RESV_USAGE_KERNEL slot of either an external BO + * or VM (depends on if rebind is operating on external or private BO) + * + * 5. In compute mode, resumes in rebind worker shall wait on last rebind fence + * + * 6. In compute mode, resumes in rebind worker shall wait on the + * DMA_RESV_USAGE_KERNEL slot of the VM + * + * Putting it all together + * ----------------------- + * + * 1. New jobs from kernel ops are blocked behind any existing jobs from + * non-compute mode execs + * + * 2. New jobs from non-compute mode execs are blocked behind any existing jobs + * from kernel ops and rebinds + * + * 3. New jobs from kernel ops are blocked behind all preempt fences signaling in + * compute mode + * + * 4. Compute mode engine resumes are blocked behind any existing jobs from + * kernel ops and rebinds + * + * Future work + * =========== + * + * Support large pages for sysmem and userptr. + * + * Update page faults to handle BOs are page level grainularity (e.g. part of BO + * could be in system memory while another part could be in VRAM). + * + * Page fault handler likely we be optimized a bit more (e.g. Rebinds always + * wait on the dma-resv kernel slots of VM or BO, technically we only have to + * wait the BO moving. If using a job to do the rebind, we could not block in + * the page fault handler rather attach a callback to fence of the rebind job to + * signal page fault complete. Our handling of short circuting for atomic faults + * for bound VMAs could be better. etc...). We can tune all of this once we have + * benchmarks / performance number from workloads up and running. + */ + +#endif diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c new file mode 100644 index 000000000000..4498aa2fbd47 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_vm_madvise.c @@ -0,0 +1,347 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include +#include +#include + +#include "xe_bo.h" +#include "xe_vm.h" +#include "xe_vm_madvise.h" + +static int madvise_preferred_mem_class(struct xe_device *xe, struct xe_vm *vm, + struct xe_vma **vmas, int num_vmas, + u64 value) +{ + int i, err; + + if (XE_IOCTL_ERR(xe, value > XE_MEM_REGION_CLASS_VRAM)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, value == XE_MEM_REGION_CLASS_VRAM && + !xe->info.is_dgfx)) + return -EINVAL; + + for (i = 0; i < num_vmas; ++i) { + struct xe_bo *bo; + struct ww_acquire_ctx ww; + + bo = vmas[i]->bo; + + err = xe_bo_lock(bo, &ww, 0, true); + if (err) + return err; + bo->props.preferred_mem_class = value; + xe_bo_placement_for_flags(xe, bo, bo->flags); + xe_bo_unlock(bo, &ww); + } + + return 0; +} + +static int madvise_preferred_gt(struct xe_device *xe, struct xe_vm *vm, + struct xe_vma **vmas, int num_vmas, u64 value) +{ + int i, err; + + if (XE_IOCTL_ERR(xe, value > xe->info.tile_count)) + return -EINVAL; + + for (i = 0; i < num_vmas; ++i) { + struct xe_bo *bo; + struct ww_acquire_ctx ww; + + bo = vmas[i]->bo; + + err = xe_bo_lock(bo, &ww, 0, true); + if (err) + return err; + bo->props.preferred_gt = value; + xe_bo_placement_for_flags(xe, bo, bo->flags); + xe_bo_unlock(bo, &ww); + } + + return 0; +} + +static int madvise_preferred_mem_class_gt(struct xe_device *xe, + struct xe_vm *vm, + struct xe_vma **vmas, int num_vmas, + u64 value) +{ + int i, err; + u32 gt_id = upper_32_bits(value); + u32 mem_class = lower_32_bits(value); + + if (XE_IOCTL_ERR(xe, mem_class > XE_MEM_REGION_CLASS_VRAM)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, mem_class == XE_MEM_REGION_CLASS_VRAM && + !xe->info.is_dgfx)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, gt_id > xe->info.tile_count)) + return -EINVAL; + + for (i = 0; i < num_vmas; ++i) { + struct xe_bo *bo; + struct ww_acquire_ctx ww; + + bo = vmas[i]->bo; + + err = xe_bo_lock(bo, &ww, 0, true); + if (err) + return err; + bo->props.preferred_mem_class = mem_class; + bo->props.preferred_gt = gt_id; + xe_bo_placement_for_flags(xe, bo, bo->flags); + xe_bo_unlock(bo, &ww); + } + + return 0; +} + +static int madvise_cpu_atomic(struct xe_device *xe, struct xe_vm *vm, + struct xe_vma **vmas, int num_vmas, u64 value) +{ + int i, err; + + for (i = 0; i < num_vmas; ++i) { + struct xe_bo *bo; + struct ww_acquire_ctx ww; + + bo = vmas[i]->bo; + if (XE_IOCTL_ERR(xe, !(bo->flags & XE_BO_CREATE_SYSTEM_BIT))) + return -EINVAL; + + err = xe_bo_lock(bo, &ww, 0, true); + if (err) + return err; + bo->props.cpu_atomic = !!value; + + /* + * All future CPU accesses must be from system memory only, we + * just invalidate the CPU page tables which will trigger a + * migration on next access. + */ + if (bo->props.cpu_atomic) + ttm_bo_unmap_virtual(&bo->ttm); + xe_bo_unlock(bo, &ww); + } + + return 0; +} + +static int madvise_device_atomic(struct xe_device *xe, struct xe_vm *vm, + struct xe_vma **vmas, int num_vmas, u64 value) +{ + int i, err; + + for (i = 0; i < num_vmas; ++i) { + struct xe_bo *bo; + struct ww_acquire_ctx ww; + + bo = vmas[i]->bo; + if (XE_IOCTL_ERR(xe, !(bo->flags & XE_BO_CREATE_VRAM0_BIT) && + !(bo->flags & XE_BO_CREATE_VRAM1_BIT))) + return -EINVAL; + + err = xe_bo_lock(bo, &ww, 0, true); + if (err) + return err; + bo->props.device_atomic = !!value; + xe_bo_unlock(bo, &ww); + } + + return 0; +} + +static int madvise_priority(struct xe_device *xe, struct xe_vm *vm, + struct xe_vma **vmas, int num_vmas, u64 value) +{ + int i, err; + + if (XE_IOCTL_ERR(xe, value > DRM_XE_VMA_PRIORITY_HIGH)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, value == DRM_XE_VMA_PRIORITY_HIGH && + !capable(CAP_SYS_NICE))) + return -EPERM; + + for (i = 0; i < num_vmas; ++i) { + struct xe_bo *bo; + struct ww_acquire_ctx ww; + + bo = vmas[i]->bo; + + err = xe_bo_lock(bo, &ww, 0, true); + if (err) + return err; + bo->ttm.priority = value; + ttm_bo_move_to_lru_tail(&bo->ttm); + xe_bo_unlock(bo, &ww); + } + + return 0; +} + +static int madvise_pin(struct xe_device *xe, struct xe_vm *vm, + struct xe_vma **vmas, int num_vmas, u64 value) +{ + XE_WARN_ON("NIY"); + return 0; +} + +typedef int (*madvise_func)(struct xe_device *xe, struct xe_vm *vm, + struct xe_vma **vmas, int num_vmas, u64 value); + +static const madvise_func madvise_funcs[] = { + [DRM_XE_VM_MADVISE_PREFERRED_MEM_CLASS] = madvise_preferred_mem_class, + [DRM_XE_VM_MADVISE_PREFERRED_GT] = madvise_preferred_gt, + [DRM_XE_VM_MADVISE_PREFERRED_MEM_CLASS_GT] = + madvise_preferred_mem_class_gt, + [DRM_XE_VM_MADVISE_CPU_ATOMIC] = madvise_cpu_atomic, + [DRM_XE_VM_MADVISE_DEVICE_ATOMIC] = madvise_device_atomic, + [DRM_XE_VM_MADVISE_PRIORITY] = madvise_priority, + [DRM_XE_VM_MADVISE_PIN] = madvise_pin, +}; + +static struct xe_vma *node_to_vma(const struct rb_node *node) +{ + BUILD_BUG_ON(offsetof(struct xe_vma, vm_node) != 0); + return (struct xe_vma *)node; +} + +static struct xe_vma ** +get_vmas(struct xe_vm *vm, int *num_vmas, u64 addr, u64 range) +{ + struct xe_vma **vmas; + struct xe_vma *vma, *__vma, lookup; + int max_vmas = 8; + struct rb_node *node; + + lockdep_assert_held(&vm->lock); + + vmas = kmalloc(max_vmas * sizeof(*vmas), GFP_KERNEL); + if (!vmas) + return NULL; + + lookup.start = addr; + lookup.end = addr + range - 1; + + vma = xe_vm_find_overlapping_vma(vm, &lookup); + if (!vma) + return vmas; + + if (!xe_vma_is_userptr(vma)) { + vmas[*num_vmas] = vma; + *num_vmas += 1; + } + + node = &vma->vm_node; + while ((node = rb_next(node))) { + if (!xe_vma_cmp_vma_cb(&lookup, node)) { + __vma = node_to_vma(node); + if (xe_vma_is_userptr(__vma)) + continue; + + if (*num_vmas == max_vmas) { + struct xe_vma **__vmas = + krealloc(vmas, max_vmas * sizeof(*vmas), + GFP_KERNEL); + + if (!__vmas) + return NULL; + vmas = __vmas; + } + vmas[*num_vmas] = __vma; + *num_vmas += 1; + } else { + break; + } + } + + node = &vma->vm_node; + while ((node = rb_prev(node))) { + if (!xe_vma_cmp_vma_cb(&lookup, node)) { + __vma = node_to_vma(node); + if (xe_vma_is_userptr(__vma)) + continue; + + if (*num_vmas == max_vmas) { + struct xe_vma **__vmas = + krealloc(vmas, max_vmas * sizeof(*vmas), + GFP_KERNEL); + + if (!__vmas) + return NULL; + vmas = __vmas; + } + vmas[*num_vmas] = __vma; + *num_vmas += 1; + } else { + break; + } + } + + return vmas; +} + +int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_file *xef = to_xe_file(file); + struct drm_xe_vm_madvise *args = data; + struct xe_vm *vm; + struct xe_vma **vmas = NULL; + int num_vmas = 0, err = 0, idx; + + if (XE_IOCTL_ERR(xe, args->extensions)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, args->property > ARRAY_SIZE(madvise_funcs))) + return -EINVAL; + + vm = xe_vm_lookup(xef, args->vm_id); + if (XE_IOCTL_ERR(xe, !vm)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, xe_vm_is_closed(vm))) { + err = -ENOENT; + goto put_vm; + } + + if (XE_IOCTL_ERR(xe, !xe_vm_in_fault_mode(vm))) { + err = -EINVAL; + goto put_vm; + } + + down_read(&vm->lock); + + vmas = get_vmas(vm, &num_vmas, args->addr, args->range); + if (XE_IOCTL_ERR(xe, err)) + goto unlock_vm; + + if (XE_IOCTL_ERR(xe, !vmas)) { + err = -ENOMEM; + goto unlock_vm; + } + + if (XE_IOCTL_ERR(xe, !num_vmas)) { + err = -EINVAL; + goto unlock_vm; + } + + idx = array_index_nospec(args->property, ARRAY_SIZE(madvise_funcs)); + err = madvise_funcs[idx](xe, vm, vmas, num_vmas, args->value); + +unlock_vm: + up_read(&vm->lock); +put_vm: + xe_vm_put(vm); + kfree(vmas); + return err; +} diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.h b/drivers/gpu/drm/xe/xe_vm_madvise.h new file mode 100644 index 000000000000..eecd33acd248 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_vm_madvise.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _XE_VM_MADVISE_H_ +#define _XE_VM_MADVISE_H_ + +struct drm_device; +struct drm_file; + +int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); + +#endif diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h new file mode 100644 index 000000000000..2a3b911ab358 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -0,0 +1,337 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_VM_TYPES_H_ +#define _XE_VM_TYPES_H_ + +#include +#include +#include +#include + +#include "xe_device_types.h" +#include "xe_pt_types.h" + +struct xe_bo; +struct xe_vm; + +struct xe_vma { + struct rb_node vm_node; + /** @vm: VM which this VMA belongs to */ + struct xe_vm *vm; + + /** + * @start: start address of this VMA within its address domain, end - + * start + 1 == VMA size + */ + u64 start; + /** @end: end address of this VMA within its address domain */ + u64 end; + /** @pte_flags: pte flags for this VMA */ + u32 pte_flags; + + /** @bo: BO if not a userptr, must be NULL is userptr */ + struct xe_bo *bo; + /** @bo_offset: offset into BO if not a userptr, unused for userptr */ + u64 bo_offset; + + /** @gt_mask: GT mask of where to create binding for this VMA */ + u64 gt_mask; + + /** + * @gt_present: GT mask of binding are present for this VMA. + * protected by vm->lock, vm->resv and for userptrs, + * vm->userptr.notifier_lock for writing. Needs either for reading, + * but if reading is done under the vm->lock only, it needs to be held + * in write mode. + */ + u64 gt_present; + + /** + * @destroyed: VMA is destroyed, in the sense that it shouldn't be + * subject to rebind anymore. This field must be written under + * the vm lock in write mode and the userptr.notifier_lock in + * either mode. Read under the vm lock or the userptr.notifier_lock in + * write mode. + */ + bool destroyed; + + /** + * @first_munmap_rebind: VMA is first in a sequence of ops that triggers + * a rebind (munmap style VM unbinds). This indicates the operation + * using this VMA must wait on all dma-resv slots (wait for pending jobs + * / trigger preempt fences). + */ + bool first_munmap_rebind; + + /** + * @last_munmap_rebind: VMA is first in a sequence of ops that triggers + * a rebind (munmap style VM unbinds). This indicates the operation + * using this VMA must install itself into kernel dma-resv slot (blocks + * future jobs) and kick the rebind work in compute mode. + */ + bool last_munmap_rebind; + + /** @use_atomic_access_pte_bit: Set atomic access bit in PTE */ + bool use_atomic_access_pte_bit; + + union { + /** @bo_link: link into BO if not a userptr */ + struct list_head bo_link; + /** @userptr_link: link into VM repin list if userptr */ + struct list_head userptr_link; + }; + + /** + * @rebind_link: link into VM if this VMA needs rebinding, and + * if it's a bo (not userptr) needs validation after a possible + * eviction. Protected by the vm's resv lock. + */ + struct list_head rebind_link; + + /** + * @unbind_link: link or list head if an unbind of multiple VMAs, in + * single unbind op, is being done. + */ + struct list_head unbind_link; + + /** @destroy_cb: callback to destroy VMA when unbind job is done */ + struct dma_fence_cb destroy_cb; + + /** @destroy_work: worker to destroy this BO */ + struct work_struct destroy_work; + + /** @userptr: user pointer state */ + struct { + /** @ptr: user pointer */ + uintptr_t ptr; + /** @invalidate_link: Link for the vm::userptr.invalidated list */ + struct list_head invalidate_link; + /** + * @notifier: MMU notifier for user pointer (invalidation call back) + */ + struct mmu_interval_notifier notifier; + /** @sgt: storage for a scatter gather table */ + struct sg_table sgt; + /** @sg: allocated scatter gather table */ + struct sg_table *sg; + /** @notifier_seq: notifier sequence number */ + unsigned long notifier_seq; + /** + * @initial_bind: user pointer has been bound at least once. + * write: vm->userptr.notifier_lock in read mode and vm->resv held. + * read: vm->userptr.notifier_lock in write mode or vm->resv held. + */ + bool initial_bind; +#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) + u32 divisor; +#endif + } userptr; + + /** @usm: unified shared memory state */ + struct { + /** @gt_invalidated: VMA has been invalidated */ + u64 gt_invalidated; + } usm; + + struct { + struct list_head rebind_link; + } notifier; + + struct { + /** + * @extobj.link: Link into vm's external object list. + * protected by the vm lock. + */ + struct list_head link; + } extobj; +}; + +struct xe_device; + +#define xe_vm_assert_held(vm) dma_resv_assert_held(&(vm)->resv) + +struct xe_vm { + struct xe_device *xe; + + struct kref refcount; + + /* engine used for (un)binding vma's */ + struct xe_engine *eng[XE_MAX_GT]; + + /** Protects @rebind_list and the page-table structures */ + struct dma_resv resv; + + u64 size; + struct rb_root vmas; + + struct xe_pt *pt_root[XE_MAX_GT]; + struct xe_bo *scratch_bo[XE_MAX_GT]; + struct xe_pt *scratch_pt[XE_MAX_GT][XE_VM_MAX_LEVEL]; + + /** @flags: flags for this VM, statically setup a creation time */ +#define XE_VM_FLAGS_64K BIT(0) +#define XE_VM_FLAG_COMPUTE_MODE BIT(1) +#define XE_VM_FLAG_ASYNC_BIND_OPS BIT(2) +#define XE_VM_FLAG_MIGRATION BIT(3) +#define XE_VM_FLAG_SCRATCH_PAGE BIT(4) +#define XE_VM_FLAG_FAULT_MODE BIT(5) +#define XE_VM_FLAG_GT_ID(flags) (((flags) >> 6) & 0x3) +#define XE_VM_FLAG_SET_GT_ID(gt) ((gt)->info.id << 6) + unsigned long flags; + + /** @composite_fence_ctx: context composite fence */ + u64 composite_fence_ctx; + /** @composite_fence_seqno: seqno for composite fence */ + u32 composite_fence_seqno; + + /** + * @lock: outer most lock, protects objects of anything attached to this + * VM + */ + struct rw_semaphore lock; + + /** + * @rebind_list: list of VMAs that need rebinding, and if they are + * bos (not userptr), need validation after a possible eviction. The + * list is protected by @resv. + */ + struct list_head rebind_list; + + /** @rebind_fence: rebind fence from execbuf */ + struct dma_fence *rebind_fence; + + /** + * @destroy_work: worker to destroy VM, needed as a dma_fence signaling + * from an irq context can be last put and the destroy needs to be able + * to sleep. + */ + struct work_struct destroy_work; + + /** @extobj: bookkeeping for external objects. Protected by the vm lock */ + struct { + /** @enties: number of external BOs attached this VM */ + u32 entries; + /** @list: list of vmas with external bos attached */ + struct list_head list; + } extobj; + + /** @async_ops: async VM operations (bind / unbinds) */ + struct { + /** @list: list of pending async VM ops */ + struct list_head pending; + /** @work: worker to execute async VM ops */ + struct work_struct work; + /** @lock: protects list of pending async VM ops and fences */ + spinlock_t lock; + /** @error_capture: error capture state */ + struct { + /** @mm: user MM */ + struct mm_struct *mm; + /** + * @addr: user pointer to copy error capture state too + */ + u64 addr; + /** @wq: user fence wait queue for VM errors */ + wait_queue_head_t wq; + } error_capture; + /** @fence: fence state */ + struct { + /** @context: context of async fence */ + u64 context; + /** @seqno: seqno of async fence */ + u32 seqno; + } fence; + /** @error: error state for async VM ops */ + int error; + /** + * @munmap_rebind_inflight: an munmap style VM bind is in the + * middle of a set of ops which requires a rebind at the end. + */ + bool munmap_rebind_inflight; + } async_ops; + + /** @userptr: user pointer state */ + struct { + /** + * @userptr.repin_list: list of VMAs which are user pointers, + * and needs repinning. Protected by @lock. + */ + struct list_head repin_list; + /** + * @notifier_lock: protects notifier in write mode and + * submission in read mode. + */ + struct rw_semaphore notifier_lock; + /** + * @userptr.invalidated_lock: Protects the + * @userptr.invalidated list. + */ + spinlock_t invalidated_lock; + /** + * @userptr.invalidated: List of invalidated userptrs, not yet + * picked + * up for revalidation. Protected from access with the + * @invalidated_lock. Removing items from the list + * additionally requires @lock in write mode, and adding + * items to the list requires the @userptr.notifer_lock in + * write mode. + */ + struct list_head invalidated; + } userptr; + + /** @preempt: preempt state */ + struct { + /** + * @min_run_period_ms: The minimum run period before preempting + * an engine again + */ + s64 min_run_period_ms; + /** @engines: list of engines attached to this VM */ + struct list_head engines; + /** @num_engines: number user engines attached to this VM */ + int num_engines; + /** + * @rebind_work: worker to rebind invalidated userptrs / evicted + * BOs + */ + struct work_struct rebind_work; + } preempt; + + /** @um: unified memory state */ + struct { + /** @asid: address space ID, unique to each VM */ + u32 asid; + /** + * @last_fault_vma: Last fault VMA, used for fast lookup when we + * get a flood of faults to the same VMA + */ + struct xe_vma *last_fault_vma; + } usm; + + /** + * @notifier: Lists and locks for temporary usage within notifiers where + * we either can't grab the vm lock or the vm resv. + */ + struct { + /** @notifier.list_lock: lock protecting @rebind_list */ + spinlock_t list_lock; + /** + * @notifier.rebind_list: list of vmas that we want to put on the + * main @rebind_list. This list is protected for writing by both + * notifier.list_lock, and the resv of the bo the vma points to, + * and for reading by the notifier.list_lock only. + */ + struct list_head rebind_list; + } notifier; + + /** @error_capture: allow to track errors */ + struct { + /** @capture_once: capture only one error per VM */ + bool capture_once; + } error_capture; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c new file mode 100644 index 000000000000..b56141ba7145 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -0,0 +1,326 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_wa.h" + +#include + +#include "xe_device_types.h" +#include "xe_force_wake.h" +#include "xe_gt.h" +#include "xe_hw_engine_types.h" +#include "xe_mmio.h" +#include "xe_platform_types.h" +#include "xe_rtp.h" +#include "xe_step.h" + +#include "gt/intel_engine_regs.h" +#include "gt/intel_gt_regs.h" +#include "i915_reg.h" + +/** + * DOC: Hardware workarounds + * + * Hardware workarounds are register programming documented to be executed in + * the driver that fall outside of the normal programming sequences for a + * platform. There are some basic categories of workarounds, depending on + * how/when they are applied: + * + * - LRC workarounds: workarounds that touch registers that are + * saved/restored to/from the HW context image. The list is emitted (via Load + * Register Immediate commands) once when initializing the device and saved in + * the default context. That default context is then used on every context + * creation to have a "primed golden context", i.e. a context image that + * already contains the changes needed to all the registers. + * + * TODO: Although these workarounds are maintained here, they are not + * currently being applied. + * + * - Engine workarounds: the list of these WAs is applied whenever the specific + * engine is reset. It's also possible that a set of engine classes share a + * common power domain and they are reset together. This happens on some + * platforms with render and compute engines. In this case (at least) one of + * them need to keeep the workaround programming: the approach taken in the + * driver is to tie those workarounds to the first compute/render engine that + * is registered. When executing with GuC submission, engine resets are + * outside of kernel driver control, hence the list of registers involved in + * written once, on engine initialization, and then passed to GuC, that + * saves/restores their values before/after the reset takes place. See + * ``drivers/gpu/drm/xe/xe_guc_ads.c`` for reference. + * + * - GT workarounds: the list of these WAs is applied whenever these registers + * revert to their default values: on GPU reset, suspend/resume [1]_, etc. + * + * - Register whitelist: some workarounds need to be implemented in userspace, + * but need to touch privileged registers. The whitelist in the kernel + * instructs the hardware to allow the access to happen. From the kernel side, + * this is just a special case of a MMIO workaround (as we write the list of + * these to/be-whitelisted registers to some special HW registers). + * + * - Workaround batchbuffers: buffers that get executed automatically by the + * hardware on every HW context restore. These buffers are created and + * programmed in the default context so the hardware always go through those + * programming sequences when switching contexts. The support for workaround + * batchbuffers is enabled these hardware mechanisms: + * + * #. INDIRECT_CTX: A batchbuffer and an offset are provided in the default + * context, pointing the hardware to jump to that location when that offset + * is reached in the context restore. Workaround batchbuffer in the driver + * currently uses this mechanism for all platforms. + * + * #. BB_PER_CTX_PTR: A batchbuffer is provided in the default context, + * pointing the hardware to a buffer to continue executing after the + * engine registers are restored in a context restore sequence. This is + * currently not used in the driver. + * + * - Other: There are WAs that, due to their nature, cannot be applied from a + * central place. Those are peppered around the rest of the code, as needed. + * Workarounds related to the display IP are the main example. + * + * .. [1] Technically, some registers are powercontext saved & restored, so they + * survive a suspend/resume. In practice, writing them again is not too + * costly and simplifies things, so it's the approach taken in the driver. + * + * .. note:: + * Hardware workarounds in xe work the same way as in i915, with the + * difference of how they are maintained in the code. In xe it uses the + * xe_rtp infrastructure so the workarounds can be kept in tables, following + * a more declarative approach rather than procedural. + */ + +#undef _MMIO +#undef MCR_REG +#define _MMIO(x) _XE_RTP_REG(x) +#define MCR_REG(x) _XE_RTP_MCR_REG(x) + +static bool match_14011060649(const struct xe_gt *gt, + const struct xe_hw_engine *hwe) +{ + return hwe->instance % 2 == 0; +} + +static const struct xe_rtp_entry gt_was[] = { + { XE_RTP_NAME("14011060649"), + XE_RTP_RULES(MEDIA_VERSION_RANGE(1200, 1255), + ENGINE_CLASS(VIDEO_DECODE), + FUNC(match_14011060649)), + XE_RTP_SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS, + XE_RTP_FLAG(FOREACH_ENGINE)) + }, + { XE_RTP_NAME("16010515920"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), + STEP(A0, B0), + ENGINE_CLASS(VIDEO_DECODE)), + XE_RTP_SET(VDBOX_CGCTL3F18(0), ALNUNIT_CLKGATE_DIS, + XE_RTP_FLAG(FOREACH_ENGINE)) + }, + { XE_RTP_NAME("22010523718"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10)), + XE_RTP_SET(UNSLICE_UNIT_LEVEL_CLKGATE, CG3DDISCFEG_CLKGATE_DIS) + }, + { XE_RTP_NAME("14011006942"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10)), + XE_RTP_SET(GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE, DSS_ROUTER_CLKGATE_DIS) + }, + { XE_RTP_NAME("14010948348"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), + XE_RTP_SET(UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS) + }, + { XE_RTP_NAME("14011037102"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), + XE_RTP_SET(UNSLCGCTL9444, LTCDD_CLKGATE_DIS) + }, + { XE_RTP_NAME("14011371254"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), + XE_RTP_SET(GEN11_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS) + }, + { XE_RTP_NAME("14011431319/0"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), + XE_RTP_SET(UNSLCGCTL9440, + GAMTLBOACS_CLKGATE_DIS | + GAMTLBVDBOX7_CLKGATE_DIS | GAMTLBVDBOX6_CLKGATE_DIS | + GAMTLBVDBOX5_CLKGATE_DIS | GAMTLBVDBOX4_CLKGATE_DIS | + GAMTLBVDBOX3_CLKGATE_DIS | GAMTLBVDBOX2_CLKGATE_DIS | + GAMTLBVDBOX1_CLKGATE_DIS | GAMTLBVDBOX0_CLKGATE_DIS | + GAMTLBKCR_CLKGATE_DIS | GAMTLBGUC_CLKGATE_DIS | + GAMTLBBLT_CLKGATE_DIS) + }, + { XE_RTP_NAME("14011431319/1"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), + XE_RTP_SET(UNSLCGCTL9444, + GAMTLBGFXA0_CLKGATE_DIS | GAMTLBGFXA1_CLKGATE_DIS | + GAMTLBCOMPA0_CLKGATE_DIS | GAMTLBCOMPA1_CLKGATE_DIS | + GAMTLBCOMPB0_CLKGATE_DIS | GAMTLBCOMPB1_CLKGATE_DIS | + GAMTLBCOMPC0_CLKGATE_DIS | GAMTLBCOMPC1_CLKGATE_DIS | + GAMTLBCOMPD0_CLKGATE_DIS | GAMTLBCOMPD1_CLKGATE_DIS | + GAMTLBMERT_CLKGATE_DIS | + GAMTLBVEBOX3_CLKGATE_DIS | GAMTLBVEBOX2_CLKGATE_DIS | + GAMTLBVEBOX1_CLKGATE_DIS | GAMTLBVEBOX0_CLKGATE_DIS) + }, + { XE_RTP_NAME("14010569222"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), + XE_RTP_SET(UNSLICE_UNIT_LEVEL_CLKGATE, GAMEDIA_CLKGATE_DIS) + }, + { XE_RTP_NAME("14011028019"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), + XE_RTP_SET(SSMCGCTL9530, RTFUNIT_CLKGATE_DIS) + }, + { XE_RTP_NAME("14014830051"), + XE_RTP_RULES(PLATFORM(DG2)), + XE_RTP_CLR(SARB_CHICKEN1, COMP_CKN_IN) + }, + { XE_RTP_NAME("14015795083"), + XE_RTP_RULES(PLATFORM(DG2)), + XE_RTP_CLR(GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE) + }, + { XE_RTP_NAME("14011059788"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)), + XE_RTP_SET(GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE) + }, + { XE_RTP_NAME("1409420604"), + XE_RTP_RULES(PLATFORM(DG1)), + XE_RTP_SET(SUBSLICE_UNIT_LEVEL_CLKGATE2, CPSSUNIT_CLKGATE_DIS) + }, + { XE_RTP_NAME("1408615072"), + XE_RTP_RULES(PLATFORM(DG1)), + XE_RTP_SET(UNSLICE_UNIT_LEVEL_CLKGATE2, VSUNIT_CLKGATE_DIS_TGL) + }, + {} +}; + +static const struct xe_rtp_entry engine_was[] = { + { XE_RTP_NAME("14015227452"), + XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), + XE_RTP_SET(GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE, + XE_RTP_FLAG(MASKED_REG)) + }, + { XE_RTP_NAME("1606931601"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), + XE_RTP_SET(GEN7_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ, + XE_RTP_FLAG(MASKED_REG)) + }, + { XE_RTP_NAME("22010931296, 18011464164, 14010919138"), + XE_RTP_RULES(GRAPHICS_VERSION(1200), ENGINE_CLASS(RENDER)), + XE_RTP_SET(GEN7_FF_THREAD_MODE, GEN12_FF_TESSELATION_DOP_GATE_DISABLE) + }, + { XE_RTP_NAME("14010826681, 1606700617, 22010271021"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), + XE_RTP_SET(GEN9_CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE, + XE_RTP_FLAG(MASKED_REG)) + }, + { XE_RTP_NAME("18019627453"), + XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), + XE_RTP_SET(GEN9_CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE, + XE_RTP_FLAG(MASKED_REG)) + }, + { XE_RTP_NAME("1409804808"), + XE_RTP_RULES(GRAPHICS_VERSION(1200), + ENGINE_CLASS(RENDER), + IS_INTEGRATED), + XE_RTP_SET(GEN7_ROW_CHICKEN2, GEN12_PUSH_CONST_DEREF_HOLD_DIS, + XE_RTP_FLAG(MASKED_REG)) + }, + { XE_RTP_NAME("14010229206, 1409085225"), + XE_RTP_RULES(GRAPHICS_VERSION(1200), + ENGINE_CLASS(RENDER), + IS_INTEGRATED), + XE_RTP_SET(GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH, + XE_RTP_FLAG(MASKED_REG)) + }, + { XE_RTP_NAME("1607297627, 1607030317, 1607186500"), + XE_RTP_RULES(PLATFORM(TIGERLAKE), ENGINE_CLASS(RENDER)), + XE_RTP_SET(RING_PSMI_CTL(RENDER_RING_BASE), + GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE | + GEN8_RC_SEMA_IDLE_MSG_DISABLE, XE_RTP_FLAG(MASKED_REG)) + }, + { XE_RTP_NAME("1607297627, 1607030317, 1607186500"), + XE_RTP_RULES(PLATFORM(ROCKETLAKE), ENGINE_CLASS(RENDER)), + XE_RTP_SET(RING_PSMI_CTL(RENDER_RING_BASE), + GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE | + GEN8_RC_SEMA_IDLE_MSG_DISABLE, XE_RTP_FLAG(MASKED_REG)) + }, + { XE_RTP_NAME("1406941453"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), + XE_RTP_SET(GEN10_SAMPLER_MODE, ENABLE_SMALLPL, XE_RTP_FLAG(MASKED_REG)) + }, + { XE_RTP_NAME("FtrPerCtxtPreemptionGranularityControl"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1250), ENGINE_CLASS(RENDER)), + XE_RTP_SET(GEN7_FF_SLICE_CS_CHICKEN1, GEN9_FFSC_PERCTX_PREEMPT_CTRL, + XE_RTP_FLAG(MASKED_REG)) + }, + {} +}; + +static const struct xe_rtp_entry lrc_was[] = { + { XE_RTP_NAME("1409342910, 14010698770, 14010443199, 1408979724, 1409178076, 1409207793, 1409217633, 1409252684, 1409347922, 1409142259"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)), + XE_RTP_SET(GEN11_COMMON_SLICE_CHICKEN3, + GEN12_DISABLE_CPS_AWARE_COLOR_PIPE, + XE_RTP_FLAG(MASKED_REG)) + }, + { XE_RTP_NAME("WaDisableGPGPUMidThreadPreemption"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)), + XE_RTP_FIELD_SET(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK, + GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL, + XE_RTP_FLAG(MASKED_REG)) + }, + { XE_RTP_NAME("16011163337"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)), + /* read verification is ignored due to 1608008084. */ + XE_RTP_FIELD_SET_NO_READ_MASK(GEN12_FF_MODE2, FF_MODE2_GS_TIMER_MASK, + FF_MODE2_GS_TIMER_224) + }, + { XE_RTP_NAME("1409044764"), + XE_RTP_RULES(PLATFORM(DG1)), + XE_RTP_CLR(GEN11_COMMON_SLICE_CHICKEN3, + DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN, + XE_RTP_FLAG(MASKED_REG)) + }, + { XE_RTP_NAME("22010493298"), + XE_RTP_RULES(PLATFORM(DG1)), + XE_RTP_SET(HIZ_CHICKEN, + DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE, + XE_RTP_FLAG(MASKED_REG)) + }, + {} +}; + +/** + * xe_wa_process_gt - process GT workaround table + * @gt: GT instance to process workarounds for + * + * Process GT workaround table for this platform, saving in @gt all the + * workarounds that need to be applied at the GT level. + */ +void xe_wa_process_gt(struct xe_gt *gt) +{ + xe_rtp_process(gt_was, >->reg_sr, gt, NULL); +} + +/** + * xe_wa_process_engine - process engine workaround table + * @hwe: engine instance to process workarounds for + * + * Process engine workaround table for this platform, saving in @hwe all the + * workarounds that need to be applied at the engine level that match this + * engine. + */ +void xe_wa_process_engine(struct xe_hw_engine *hwe) +{ + xe_rtp_process(engine_was, &hwe->reg_sr, hwe->gt, hwe); +} + +/** + * xe_wa_process_lrc - process context workaround table + * @hwe: engine instance to process workarounds for + * + * Process context workaround table for this platform, saving in @hwe all the + * workarounds that need to be applied on context restore. These are workarounds + * touching registers that are part of the HW context image. + */ +void xe_wa_process_lrc(struct xe_hw_engine *hwe) +{ + xe_rtp_process(lrc_was, &hwe->reg_lrc, hwe->gt, hwe); +} diff --git a/drivers/gpu/drm/xe/xe_wa.h b/drivers/gpu/drm/xe/xe_wa.h new file mode 100644 index 000000000000..cd2307d58795 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_wa.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_WA_ +#define _XE_WA_ + +struct xe_gt; +struct xe_hw_engine; + +void xe_wa_process_gt(struct xe_gt *gt); +void xe_wa_process_engine(struct xe_hw_engine *hwe); +void xe_wa_process_lrc(struct xe_hw_engine *hwe); + +void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c new file mode 100644 index 000000000000..8a8d814a0e7a --- /dev/null +++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c @@ -0,0 +1,202 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include +#include +#include + +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_macros.h" +#include "xe_vm.h" + +static int do_compare(u64 addr, u64 value, u64 mask, u16 op) +{ + u64 rvalue; + int err; + bool passed; + + err = copy_from_user(&rvalue, u64_to_user_ptr(addr), sizeof(rvalue)); + if (err) + return -EFAULT; + + switch (op) { + case DRM_XE_UFENCE_WAIT_EQ: + passed = (rvalue & mask) == (value & mask); + break; + case DRM_XE_UFENCE_WAIT_NEQ: + passed = (rvalue & mask) != (value & mask); + break; + case DRM_XE_UFENCE_WAIT_GT: + passed = (rvalue & mask) > (value & mask); + break; + case DRM_XE_UFENCE_WAIT_GTE: + passed = (rvalue & mask) >= (value & mask); + break; + case DRM_XE_UFENCE_WAIT_LT: + passed = (rvalue & mask) < (value & mask); + break; + case DRM_XE_UFENCE_WAIT_LTE: + passed = (rvalue & mask) <= (value & mask); + break; + default: + XE_BUG_ON("Not possible"); + } + + return passed ? 0 : 1; +} + +static const enum xe_engine_class user_to_xe_engine_class[] = { + [DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER, + [DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY, + [DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE, + [DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE, + [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE, +}; + +int check_hw_engines(struct xe_device *xe, + struct drm_xe_engine_class_instance *eci, + int num_engines) +{ + int i; + + for (i = 0; i < num_engines; ++i) { + enum xe_engine_class user_class = + user_to_xe_engine_class[eci[i].engine_class]; + + if (eci[i].gt_id >= xe->info.tile_count) + return -EINVAL; + + if (!xe_gt_hw_engine(xe_device_get_gt(xe, eci[i].gt_id), + user_class, eci[i].engine_instance, true)) + return -EINVAL; + } + + return 0; +} + +#define VALID_FLAGS (DRM_XE_UFENCE_WAIT_SOFT_OP | \ + DRM_XE_UFENCE_WAIT_ABSTIME | \ + DRM_XE_UFENCE_WAIT_VM_ERROR) +#define MAX_OP DRM_XE_UFENCE_WAIT_LTE + +int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + DEFINE_WAIT_FUNC(w_wait, woken_wake_function); + struct drm_xe_wait_user_fence *args = data; + struct drm_xe_engine_class_instance eci[XE_HW_ENGINE_MAX_INSTANCE]; + struct drm_xe_engine_class_instance __user *user_eci = + u64_to_user_ptr(args->instances); + struct xe_vm *vm = NULL; + u64 addr = args->addr; + int err; + bool no_engines = args->flags & DRM_XE_UFENCE_WAIT_SOFT_OP || + args->flags & DRM_XE_UFENCE_WAIT_VM_ERROR; + unsigned long timeout = args->timeout; + + if (XE_IOCTL_ERR(xe, args->extensions)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, args->flags & ~VALID_FLAGS)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, args->op > MAX_OP)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, no_engines && + (args->num_engines || args->instances))) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, !no_engines && !args->num_engines)) + return -EINVAL; + + if (XE_IOCTL_ERR(xe, !(args->flags & DRM_XE_UFENCE_WAIT_VM_ERROR) && + addr & 0x7)) + return -EINVAL; + + if (!no_engines) { + err = copy_from_user(eci, user_eci, + sizeof(struct drm_xe_engine_class_instance) * + args->num_engines); + if (XE_IOCTL_ERR(xe, err)) + return -EFAULT; + + if (XE_IOCTL_ERR(xe, check_hw_engines(xe, eci, + args->num_engines))) + return -EINVAL; + } + + if (args->flags & DRM_XE_UFENCE_WAIT_VM_ERROR) { + if (XE_IOCTL_ERR(xe, args->vm_id >> 32)) + return -EINVAL; + + vm = xe_vm_lookup(to_xe_file(file), args->vm_id); + if (XE_IOCTL_ERR(xe, !vm)) + return -ENOENT; + + if (XE_IOCTL_ERR(xe, !vm->async_ops.error_capture.addr)) { + xe_vm_put(vm); + return -ENOTSUPP; + } + + addr = vm->async_ops.error_capture.addr; + } + + if (XE_IOCTL_ERR(xe, timeout > MAX_SCHEDULE_TIMEOUT)) + return -EINVAL; + + /* + * FIXME: Very simple implementation at the moment, single wait queue + * for everything. Could be optimized to have a wait queue for every + * hardware engine. Open coding as 'do_compare' can sleep which doesn't + * work with the wait_event_* macros. + */ + if (vm) + add_wait_queue(&vm->async_ops.error_capture.wq, &w_wait); + else + add_wait_queue(&xe->ufence_wq, &w_wait); + for (;;) { + if (vm && xe_vm_is_closed(vm)) { + err = -ENODEV; + break; + } + err = do_compare(addr, args->value, args->mask, args->op); + if (err <= 0) + break; + + if (signal_pending(current)) { + err = -ERESTARTSYS; + break; + } + + if (!timeout) { + err = -ETIME; + break; + } + + timeout = wait_woken(&w_wait, TASK_INTERRUPTIBLE, timeout); + } + if (vm) { + remove_wait_queue(&vm->async_ops.error_capture.wq, &w_wait); + xe_vm_put(vm); + } else { + remove_wait_queue(&xe->ufence_wq, &w_wait); + } + if (XE_IOCTL_ERR(xe, err < 0)) + return err; + else if (XE_IOCTL_ERR(xe, !timeout)) + return -ETIME; + + /* + * Again very simple, return the time in jiffies that has past, may need + * a more precision + */ + if (args->flags & DRM_XE_UFENCE_WAIT_ABSTIME) + args->timeout = args->timeout - timeout; + + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.h b/drivers/gpu/drm/xe/xe_wait_user_fence.h new file mode 100644 index 000000000000..0e268978f9e6 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_wait_user_fence.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_WAIT_USER_FENCE_H_ +#define _XE_WAIT_USER_FENCE_H_ + +struct drm_device; +struct drm_file; + +int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); + +#endif diff --git a/drivers/gpu/drm/xe/xe_wopcm.c b/drivers/gpu/drm/xe/xe_wopcm.c new file mode 100644 index 000000000000..e4a8d4a1899e --- /dev/null +++ b/drivers/gpu/drm/xe/xe_wopcm.c @@ -0,0 +1,263 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_device.h" +#include "xe_force_wake.h" +#include "xe_gt.h" +#include "xe_guc_reg.h" +#include "xe_mmio.h" +#include "xe_uc_fw.h" +#include "xe_wopcm.h" + +#include "i915_utils.h" + +/** + * DOC: Write Once Protected Content Memory (WOPCM) Layout + * + * The layout of the WOPCM will be fixed after writing to GuC WOPCM size and + * offset registers whose values are calculated and determined by HuC/GuC + * firmware size and set of hardware requirements/restrictions as shown below: + * + * :: + * + * +=========> +====================+ <== WOPCM Top + * ^ | HW contexts RSVD | + * | +===> +====================+ <== GuC WOPCM Top + * | ^ | | + * | | | | + * | | | | + * | GuC | | + * | WOPCM | | + * | Size +--------------------+ + * WOPCM | | GuC FW RSVD | + * | | +--------------------+ + * | | | GuC Stack RSVD | + * | | +------------------- + + * | v | GuC WOPCM RSVD | + * | +===> +====================+ <== GuC WOPCM base + * | | WOPCM RSVD | + * | +------------------- + <== HuC Firmware Top + * v | HuC FW | + * +=========> +====================+ <== WOPCM Base + * + * GuC accessible WOPCM starts at GuC WOPCM base and ends at GuC WOPCM top. + * The top part of the WOPCM is reserved for hardware contexts (e.g. RC6 + * context). + */ + +/* Default WOPCM size is 2MB from Gen11, 1MB on previous platforms */ +#define DGFX_WOPCM_SIZE SZ_4M /* FIXME: Larger size require + for 2 tile PVC, do a proper + probe sooner or later */ +#define MTL_WOPCM_SIZE SZ_4M /* FIXME: Larger size require + for MTL, do a proper probe + sooner or later */ +#define GEN11_WOPCM_SIZE SZ_2M +/* 16KB WOPCM (RSVD WOPCM) is reserved from HuC firmware top. */ +#define WOPCM_RESERVED_SIZE SZ_16K + +/* 16KB reserved at the beginning of GuC WOPCM. */ +#define GUC_WOPCM_RESERVED SZ_16K +/* 8KB from GUC_WOPCM_RESERVED is reserved for GuC stack. */ +#define GUC_WOPCM_STACK_RESERVED SZ_8K + +/* GuC WOPCM Offset value needs to be aligned to 16KB. */ +#define GUC_WOPCM_OFFSET_ALIGNMENT (1UL << GUC_WOPCM_OFFSET_SHIFT) + +/* 36KB WOPCM reserved at the end of WOPCM on GEN11. */ +#define GEN11_WOPCM_HW_CTX_RESERVED (SZ_32K + SZ_4K) + +static inline struct xe_gt *wopcm_to_gt(struct xe_wopcm *wopcm) +{ + return container_of(wopcm, struct xe_gt, uc.wopcm); +} + +static inline struct xe_device *wopcm_to_xe(struct xe_wopcm *wopcm) +{ + return gt_to_xe(wopcm_to_gt(wopcm)); +} + +static u32 context_reserved_size(void) +{ + return GEN11_WOPCM_HW_CTX_RESERVED; +} + +static bool __check_layout(struct xe_device *xe, u32 wopcm_size, + u32 guc_wopcm_base, u32 guc_wopcm_size, + u32 guc_fw_size, u32 huc_fw_size) +{ + const u32 ctx_rsvd = context_reserved_size(); + u32 size; + + size = wopcm_size - ctx_rsvd; + if (unlikely(range_overflows(guc_wopcm_base, guc_wopcm_size, size))) { + drm_err(&xe->drm, + "WOPCM: invalid GuC region layout: %uK + %uK > %uK\n", + guc_wopcm_base / SZ_1K, guc_wopcm_size / SZ_1K, + size / SZ_1K); + return false; + } + + size = guc_fw_size + GUC_WOPCM_RESERVED + GUC_WOPCM_STACK_RESERVED; + if (unlikely(guc_wopcm_size < size)) { + drm_err(&xe->drm, "WOPCM: no space for %s: %uK < %uK\n", + xe_uc_fw_type_repr(XE_UC_FW_TYPE_GUC), + guc_wopcm_size / SZ_1K, size / SZ_1K); + return false; + } + + size = huc_fw_size + WOPCM_RESERVED_SIZE; + if (unlikely(guc_wopcm_base < size)) { + drm_err(&xe->drm, "WOPCM: no space for %s: %uK < %uK\n", + xe_uc_fw_type_repr(XE_UC_FW_TYPE_HUC), + guc_wopcm_base / SZ_1K, size / SZ_1K); + return false; + } + + return true; +} + +static bool __wopcm_regs_locked(struct xe_gt *gt, + u32 *guc_wopcm_base, u32 *guc_wopcm_size) +{ + u32 reg_base = xe_mmio_read32(gt, DMA_GUC_WOPCM_OFFSET.reg); + u32 reg_size = xe_mmio_read32(gt, GUC_WOPCM_SIZE.reg); + + if (!(reg_size & GUC_WOPCM_SIZE_LOCKED) || + !(reg_base & GUC_WOPCM_OFFSET_VALID)) + return false; + + *guc_wopcm_base = reg_base & GUC_WOPCM_OFFSET_MASK; + *guc_wopcm_size = reg_size & GUC_WOPCM_SIZE_MASK; + return true; +} + +static int __wopcm_init_regs(struct xe_device *xe, struct xe_gt *gt, + struct xe_wopcm *wopcm) +{ + u32 base = wopcm->guc.base; + u32 size = wopcm->guc.size; + u32 huc_agent = xe_uc_fw_is_disabled(>->uc.huc.fw) ? 0 : + HUC_LOADING_AGENT_GUC; + u32 mask; + int err; + + XE_BUG_ON(!(base & GUC_WOPCM_OFFSET_MASK)); + XE_BUG_ON(base & ~GUC_WOPCM_OFFSET_MASK); + XE_BUG_ON(!(size & GUC_WOPCM_SIZE_MASK)); + XE_BUG_ON(size & ~GUC_WOPCM_SIZE_MASK); + + mask = GUC_WOPCM_SIZE_MASK | GUC_WOPCM_SIZE_LOCKED; + err = xe_mmio_write32_and_verify(gt, GUC_WOPCM_SIZE.reg, size, mask, + size | GUC_WOPCM_SIZE_LOCKED); + if (err) + goto err_out; + + mask = GUC_WOPCM_OFFSET_MASK | GUC_WOPCM_OFFSET_VALID | huc_agent; + err = xe_mmio_write32_and_verify(gt, DMA_GUC_WOPCM_OFFSET.reg, + base | huc_agent, mask, + base | huc_agent | + GUC_WOPCM_OFFSET_VALID); + if (err) + goto err_out; + + return 0; + +err_out: + drm_notice(&xe->drm, "Failed to init uC WOPCM registers!\n"); + drm_notice(&xe->drm, "%s(%#x)=%#x\n", "DMA_GUC_WOPCM_OFFSET", + DMA_GUC_WOPCM_OFFSET.reg, + xe_mmio_read32(gt, DMA_GUC_WOPCM_OFFSET.reg)); + drm_notice(&xe->drm, "%s(%#x)=%#x\n", "GUC_WOPCM_SIZE", + GUC_WOPCM_SIZE.reg, + xe_mmio_read32(gt, GUC_WOPCM_SIZE.reg)); + + return err; +} + +u32 xe_wopcm_size(struct xe_device *xe) +{ + return IS_DGFX(xe) ? DGFX_WOPCM_SIZE : + xe->info.platform == XE_METEORLAKE ? MTL_WOPCM_SIZE : + GEN11_WOPCM_SIZE; +} + +/** + * xe_wopcm_init() - Initialize the WOPCM structure. + * @wopcm: pointer to xe_wopcm. + * + * This function will partition WOPCM space based on GuC and HuC firmware sizes + * and will allocate max remaining for use by GuC. This function will also + * enforce platform dependent hardware restrictions on GuC WOPCM offset and + * size. It will fail the WOPCM init if any of these checks fail, so that the + * following WOPCM registers setup and GuC firmware uploading would be aborted. + */ +int xe_wopcm_init(struct xe_wopcm *wopcm) +{ + struct xe_device *xe = wopcm_to_xe(wopcm); + struct xe_gt *gt = wopcm_to_gt(wopcm); + u32 guc_fw_size = xe_uc_fw_get_upload_size(>->uc.guc.fw); + u32 huc_fw_size = xe_uc_fw_get_upload_size(>->uc.huc.fw); + u32 ctx_rsvd = context_reserved_size(); + u32 guc_wopcm_base; + u32 guc_wopcm_size; + bool locked; + int ret = 0; + + if (!guc_fw_size) + return -EINVAL; + + wopcm->size = xe_wopcm_size(xe); + drm_dbg(&xe->drm, "WOPCM: %uK\n", wopcm->size / SZ_1K); + + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); + XE_BUG_ON(guc_fw_size >= wopcm->size); + XE_BUG_ON(huc_fw_size >= wopcm->size); + XE_BUG_ON(ctx_rsvd + WOPCM_RESERVED_SIZE >= wopcm->size); + + locked = __wopcm_regs_locked(gt, &guc_wopcm_base, &guc_wopcm_size); + if (locked) { + drm_dbg(&xe->drm, "GuC WOPCM is already locked [%uK, %uK)\n", + guc_wopcm_base / SZ_1K, guc_wopcm_size / SZ_1K); + goto check; + } + + /* + * Aligned value of guc_wopcm_base will determine available WOPCM space + * for HuC firmware and mandatory reserved area. + */ + guc_wopcm_base = huc_fw_size + WOPCM_RESERVED_SIZE; + guc_wopcm_base = ALIGN(guc_wopcm_base, GUC_WOPCM_OFFSET_ALIGNMENT); + + /* + * Need to clamp guc_wopcm_base now to make sure the following math is + * correct. Formal check of whole WOPCM layout will be done below. + */ + guc_wopcm_base = min(guc_wopcm_base, wopcm->size - ctx_rsvd); + + /* Aligned remainings of usable WOPCM space can be assigned to GuC. */ + guc_wopcm_size = wopcm->size - ctx_rsvd - guc_wopcm_base; + guc_wopcm_size &= GUC_WOPCM_SIZE_MASK; + + drm_dbg(&xe->drm, "Calculated GuC WOPCM [%uK, %uK)\n", + guc_wopcm_base / SZ_1K, guc_wopcm_size / SZ_1K); + +check: + if (__check_layout(xe, wopcm->size, guc_wopcm_base, guc_wopcm_size, + guc_fw_size, huc_fw_size)) { + wopcm->guc.base = guc_wopcm_base; + wopcm->guc.size = guc_wopcm_size; + XE_BUG_ON(!wopcm->guc.base); + XE_BUG_ON(!wopcm->guc.size); + } else { + drm_notice(&xe->drm, "Unsuccessful WOPCM partitioning\n"); + return -E2BIG; + } + + if (!locked) + ret = __wopcm_init_regs(xe, gt, wopcm); + + return ret; +} diff --git a/drivers/gpu/drm/xe/xe_wopcm.h b/drivers/gpu/drm/xe/xe_wopcm.h new file mode 100644 index 000000000000..0197a282460b --- /dev/null +++ b/drivers/gpu/drm/xe/xe_wopcm.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_WOPCM_H_ +#define _XE_WOPCM_H_ + +#include "xe_wopcm_types.h" + +struct xe_device; + +int xe_wopcm_init(struct xe_wopcm *wopcm); +u32 xe_wopcm_size(struct xe_device *xe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_wopcm_types.h b/drivers/gpu/drm/xe/xe_wopcm_types.h new file mode 100644 index 000000000000..486d850c4084 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_wopcm_types.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_WOPCM_TYPES_H_ +#define _XE_WOPCM_TYPES_H_ + +#include + +/** + * struct xe_wopcm - Overall WOPCM info and WOPCM regions. + */ +struct xe_wopcm { + /** @size: Size of overall WOPCM */ + u32 size; + /** @guc: GuC WOPCM Region info */ + struct { + /** @base: GuC WOPCM base which is offset from WOPCM base */ + u32 base; + /** @size: Size of the GuC WOPCM region */ + u32 size; + } guc; +}; + +#endif diff --git a/include/drm/xe_pciids.h b/include/drm/xe_pciids.h new file mode 100644 index 000000000000..e539594ed939 --- /dev/null +++ b/include/drm/xe_pciids.h @@ -0,0 +1,195 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_PCIIDS_H_ +#define _XE_PCIIDS_H_ + +/* + * Lists below can be turned into initializers for a struct pci_device_id + * by defining INTEL_VGA_DEVICE: + * + * #define INTEL_VGA_DEVICE(id, info) { \ + * 0x8086, id, \ + * ~0, ~0, \ + * 0x030000, 0xff0000, \ + * (unsigned long) info } + * + * And then calling like: + * + * XE_TGL_12_GT1_IDS(INTEL_VGA_DEVICE, ## __VA_ARGS__) + * + * To turn them into something else, just provide a different macro passed as + * first argument. + */ + +/* TGL */ +#define XE_TGL_GT1_IDS(MACRO__, ...) \ + MACRO__(0x9A60, ## __VA_ARGS__), \ + MACRO__(0x9A68, ## __VA_ARGS__), \ + MACRO__(0x9A70, ## __VA_ARGS__) + +#define XE_TGL_GT2_IDS(MACRO__, ...) \ + MACRO__(0x9A40, ## __VA_ARGS__), \ + MACRO__(0x9A49, ## __VA_ARGS__), \ + MACRO__(0x9A59, ## __VA_ARGS__), \ + MACRO__(0x9A78, ## __VA_ARGS__), \ + MACRO__(0x9AC0, ## __VA_ARGS__), \ + MACRO__(0x9AC9, ## __VA_ARGS__), \ + MACRO__(0x9AD9, ## __VA_ARGS__), \ + MACRO__(0x9AF8, ## __VA_ARGS__) + +#define XE_TGL_IDS(MACRO__, ...) \ + XE_TGL_GT1_IDS(MACRO__, ...), \ + XE_TGL_GT2_IDS(MACRO__, ...) + +/* RKL */ +#define XE_RKL_IDS(MACRO__, ...) \ + MACRO__(0x4C80, ## __VA_ARGS__), \ + MACRO__(0x4C8A, ## __VA_ARGS__), \ + MACRO__(0x4C8B, ## __VA_ARGS__), \ + MACRO__(0x4C8C, ## __VA_ARGS__), \ + MACRO__(0x4C90, ## __VA_ARGS__), \ + MACRO__(0x4C9A, ## __VA_ARGS__) + +/* DG1 */ +#define XE_DG1_IDS(MACRO__, ...) \ + MACRO__(0x4905, ## __VA_ARGS__), \ + MACRO__(0x4906, ## __VA_ARGS__), \ + MACRO__(0x4907, ## __VA_ARGS__), \ + MACRO__(0x4908, ## __VA_ARGS__), \ + MACRO__(0x4909, ## __VA_ARGS__) + +/* ADL-S */ +#define XE_ADLS_IDS(MACRO__, ...) \ + MACRO__(0x4680, ## __VA_ARGS__), \ + MACRO__(0x4682, ## __VA_ARGS__), \ + MACRO__(0x4688, ## __VA_ARGS__), \ + MACRO__(0x468A, ## __VA_ARGS__), \ + MACRO__(0x4690, ## __VA_ARGS__), \ + MACRO__(0x4692, ## __VA_ARGS__), \ + MACRO__(0x4693, ## __VA_ARGS__) + +/* ADL-P */ +#define XE_ADLP_IDS(MACRO__, ...) \ + MACRO__(0x46A0, ## __VA_ARGS__), \ + MACRO__(0x46A1, ## __VA_ARGS__), \ + MACRO__(0x46A2, ## __VA_ARGS__), \ + MACRO__(0x46A3, ## __VA_ARGS__), \ + MACRO__(0x46A6, ## __VA_ARGS__), \ + MACRO__(0x46A8, ## __VA_ARGS__), \ + MACRO__(0x46AA, ## __VA_ARGS__), \ + MACRO__(0x462A, ## __VA_ARGS__), \ + MACRO__(0x4626, ## __VA_ARGS__), \ + MACRO__(0x4628, ## __VA_ARGS__), \ + MACRO__(0x46B0, ## __VA_ARGS__), \ + MACRO__(0x46B1, ## __VA_ARGS__), \ + MACRO__(0x46B2, ## __VA_ARGS__), \ + MACRO__(0x46B3, ## __VA_ARGS__), \ + MACRO__(0x46C0, ## __VA_ARGS__), \ + MACRO__(0x46C1, ## __VA_ARGS__), \ + MACRO__(0x46C2, ## __VA_ARGS__), \ + MACRO__(0x46C3, ## __VA_ARGS__) + +/* ADL-N */ +#define XE_ADLN_IDS(MACRO__, ...) \ + MACRO__(0x46D0, ## __VA_ARGS__), \ + MACRO__(0x46D1, ## __VA_ARGS__), \ + MACRO__(0x46D2, ## __VA_ARGS__) + +/* RPL-S */ +#define XE_RPLS_IDS(MACRO__, ...) \ + MACRO__(0xA780, ## __VA_ARGS__), \ + MACRO__(0xA781, ## __VA_ARGS__), \ + MACRO__(0xA782, ## __VA_ARGS__), \ + MACRO__(0xA783, ## __VA_ARGS__), \ + MACRO__(0xA788, ## __VA_ARGS__), \ + MACRO__(0xA789, ## __VA_ARGS__), \ + MACRO__(0xA78A, ## __VA_ARGS__), \ + MACRO__(0xA78B, ## __VA_ARGS__) + +/* RPL-U */ +#define XE_RPLU_IDS(MACRO__, ...) \ + MACRO__(0xA721, ## __VA_ARGS__), \ + MACRO__(0xA7A1, ## __VA_ARGS__), \ + MACRO__(0xA7A9, ## __VA_ARGS__) + +/* RPL-P */ +#define XE_RPLP_IDS(MACRO__, ...) \ + MACRO__(0xA720, ## __VA_ARGS__), \ + MACRO__(0xA7A0, ## __VA_ARGS__), \ + MACRO__(0xA7A8, ## __VA_ARGS__) + +/* DG2 */ +#define XE_DG2_G10_IDS(MACRO__, ...) \ + MACRO__(0x5690, ## __VA_ARGS__), \ + MACRO__(0x5691, ## __VA_ARGS__), \ + MACRO__(0x5692, ## __VA_ARGS__), \ + MACRO__(0x56A0, ## __VA_ARGS__), \ + MACRO__(0x56A1, ## __VA_ARGS__), \ + MACRO__(0x56A2, ## __VA_ARGS__) + +#define XE_DG2_G11_IDS(MACRO__, ...) \ + MACRO__(0x5693, ## __VA_ARGS__), \ + MACRO__(0x5694, ## __VA_ARGS__), \ + MACRO__(0x5695, ## __VA_ARGS__), \ + MACRO__(0x5698, ## __VA_ARGS__), \ + MACRO__(0x56A5, ## __VA_ARGS__), \ + MACRO__(0x56A6, ## __VA_ARGS__), \ + MACRO__(0x56B0, ## __VA_ARGS__), \ + MACRO__(0x56B1, ## __VA_ARGS__) + +#define XE_DG2_G12_IDS(MACRO__, ...) \ + MACRO__(0x5696, ## __VA_ARGS__), \ + MACRO__(0x5697, ## __VA_ARGS__), \ + MACRO__(0x56A3, ## __VA_ARGS__), \ + MACRO__(0x56A4, ## __VA_ARGS__), \ + MACRO__(0x56B2, ## __VA_ARGS__), \ + MACRO__(0x56B3, ## __VA_ARGS__) + +#define XE_DG2_IDS(MACRO__, ...) \ + XE_DG2_G10_IDS(MACRO__, ## __VA_ARGS__),\ + XE_DG2_G11_IDS(MACRO__, ## __VA_ARGS__),\ + XE_DG2_G12_IDS(MACRO__, ## __VA_ARGS__) + +#define XE_ATS_M150_IDS(MACRO__, ...) \ + MACRO__(0x56C0, ## __VA_ARGS__) + +#define XE_ATS_M75_IDS(MACRO__, ...) \ + MACRO__(0x56C1, ## __VA_ARGS__) + +#define XE_ATS_M_IDS(MACRO__, ...) \ + XE_ATS_M150_IDS(MACRO__, ## __VA_ARGS__),\ + XE_ATS_M75_IDS(MACRO__, ## __VA_ARGS__) + +/* MTL */ +#define XE_MTL_M_IDS(MACRO__, ...) \ + MACRO__(0x7D40, ## __VA_ARGS__), \ + MACRO__(0x7D43, ## __VA_ARGS__), \ + MACRO__(0x7DC0, ## __VA_ARGS__) + +#define XE_MTL_P_IDS(MACRO__, ...) \ + MACRO__(0x7D45, ## __VA_ARGS__), \ + MACRO__(0x7D47, ## __VA_ARGS__), \ + MACRO__(0x7D50, ## __VA_ARGS__), \ + MACRO__(0x7D55, ## __VA_ARGS__), \ + MACRO__(0x7DC5, ## __VA_ARGS__), \ + MACRO__(0x7DD0, ## __VA_ARGS__), \ + MACRO__(0x7DD5, ## __VA_ARGS__) + +#define XE_MTL_S_IDS(MACRO__, ...) \ + MACRO__(0x7D60, ## __VA_ARGS__), \ + MACRO__(0x7DE0, ## __VA_ARGS__) + +#define XE_ARL_IDS(MACRO__, ...) \ + MACRO__(0x7D66, ## __VA_ARGS__), \ + MACRO__(0x7D76, ## __VA_ARGS__) + +#define XE_MTL_IDS(MACRO__, ...) \ + XE_MTL_M_IDS(MACRO__, ## __VA_ARGS__), \ + XE_MTL_P_IDS(MACRO__, ## __VA_ARGS__), \ + XE_MTL_S_IDS(MACRO__, ## __VA_ARGS__), \ + XE_ARL_IDS(MACRO__, ## __VA_ARGS__) + +#endif diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h new file mode 100644 index 000000000000..f64b1c785fad --- /dev/null +++ b/include/uapi/drm/xe_drm.h @@ -0,0 +1,787 @@ +/* + * Copyright 2021 Intel Corporation. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef _UAPI_XE_DRM_H_ +#define _UAPI_XE_DRM_H_ + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +/* Please note that modifications to all structs defined here are + * subject to backwards-compatibility constraints. + */ + +/** + * struct i915_user_extension - Base class for defining a chain of extensions + * + * Many interfaces need to grow over time. In most cases we can simply + * extend the struct and have userspace pass in more data. Another option, + * as demonstrated by Vulkan's approach to providing extensions for forward + * and backward compatibility, is to use a list of optional structs to + * provide those extra details. + * + * The key advantage to using an extension chain is that it allows us to + * redefine the interface more easily than an ever growing struct of + * increasing complexity, and for large parts of that interface to be + * entirely optional. The downside is more pointer chasing; chasing across + * the __user boundary with pointers encapsulated inside u64. + * + * Example chaining: + * + * .. code-block:: C + * + * struct i915_user_extension ext3 { + * .next_extension = 0, // end + * .name = ..., + * }; + * struct i915_user_extension ext2 { + * .next_extension = (uintptr_t)&ext3, + * .name = ..., + * }; + * struct i915_user_extension ext1 { + * .next_extension = (uintptr_t)&ext2, + * .name = ..., + * }; + * + * Typically the struct i915_user_extension would be embedded in some uAPI + * struct, and in this case we would feed it the head of the chain(i.e ext1), + * which would then apply all of the above extensions. + * + */ +struct xe_user_extension { + /** + * @next_extension: + * + * Pointer to the next struct i915_user_extension, or zero if the end. + */ + __u64 next_extension; + /** + * @name: Name of the extension. + * + * Note that the name here is just some integer. + * + * Also note that the name space for this is not global for the whole + * driver, but rather its scope/meaning is limited to the specific piece + * of uAPI which has embedded the struct i915_user_extension. + */ + __u32 name; + /** + * @flags: MBZ + * + * All undefined bits must be zero. + */ + __u32 pad; +}; + +/* + * i915 specific ioctls. + * + * The device specific ioctl range is [DRM_COMMAND_BASE, DRM_COMMAND_END) ie + * [0x40, 0xa0) (a0 is excluded). The numbers below are defined as offset + * against DRM_COMMAND_BASE and should be between [0x0, 0x60). + */ +#define DRM_XE_DEVICE_QUERY 0x00 +#define DRM_XE_GEM_CREATE 0x01 +#define DRM_XE_GEM_MMAP_OFFSET 0x02 +#define DRM_XE_VM_CREATE 0x03 +#define DRM_XE_VM_DESTROY 0x04 +#define DRM_XE_VM_BIND 0x05 +#define DRM_XE_ENGINE_CREATE 0x06 +#define DRM_XE_ENGINE_DESTROY 0x07 +#define DRM_XE_EXEC 0x08 +#define DRM_XE_MMIO 0x09 +#define DRM_XE_ENGINE_SET_PROPERTY 0x0a +#define DRM_XE_WAIT_USER_FENCE 0x0b +#define DRM_XE_VM_MADVISE 0x0c + +/* Must be kept compact -- no holes */ +#define DRM_IOCTL_XE_DEVICE_QUERY DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_DEVICE_QUERY, struct drm_xe_device_query) +#define DRM_IOCTL_XE_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_GEM_CREATE, struct drm_xe_gem_create) +#define DRM_IOCTL_XE_GEM_MMAP_OFFSET DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_GEM_MMAP_OFFSET, struct drm_xe_gem_mmap_offset) +#define DRM_IOCTL_XE_VM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_VM_CREATE, struct drm_xe_vm_create) +#define DRM_IOCTL_XE_VM_DESTROY DRM_IOW( DRM_COMMAND_BASE + DRM_XE_VM_DESTROY, struct drm_xe_vm_destroy) +#define DRM_IOCTL_XE_VM_BIND DRM_IOW( DRM_COMMAND_BASE + DRM_XE_VM_BIND, struct drm_xe_vm_bind) +#define DRM_IOCTL_XE_ENGINE_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_ENGINE_CREATE, struct drm_xe_engine_create) +#define DRM_IOCTL_XE_ENGINE_DESTROY DRM_IOW( DRM_COMMAND_BASE + DRM_XE_ENGINE_DESTROY, struct drm_xe_engine_destroy) +#define DRM_IOCTL_XE_EXEC DRM_IOW( DRM_COMMAND_BASE + DRM_XE_EXEC, struct drm_xe_exec) +#define DRM_IOCTL_XE_MMIO DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_MMIO, struct drm_xe_mmio) +#define DRM_IOCTL_XE_ENGINE_SET_PROPERTY DRM_IOW( DRM_COMMAND_BASE + DRM_XE_ENGINE_SET_PROPERTY, struct drm_xe_engine_set_property) +#define DRM_IOCTL_XE_WAIT_USER_FENCE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence) +#define DRM_IOCTL_XE_VM_MADVISE DRM_IOW( DRM_COMMAND_BASE + DRM_XE_VM_MADVISE, struct drm_xe_vm_madvise) + +struct drm_xe_engine_class_instance { + __u16 engine_class; + +#define DRM_XE_ENGINE_CLASS_RENDER 0 +#define DRM_XE_ENGINE_CLASS_COPY 1 +#define DRM_XE_ENGINE_CLASS_VIDEO_DECODE 2 +#define DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE 3 +#define DRM_XE_ENGINE_CLASS_COMPUTE 4 + /* + * Kernel only class (not actual hardware engine class). Used for + * creating ordered queues of VM bind operations. + */ +#define DRM_XE_ENGINE_CLASS_VM_BIND 5 + + __u16 engine_instance; + __u16 gt_id; +}; + +#define XE_MEM_REGION_CLASS_SYSMEM 0 +#define XE_MEM_REGION_CLASS_VRAM 1 + +struct drm_xe_query_mem_usage { + __u32 num_regions; + __u32 pad; + + struct drm_xe_query_mem_region { + __u16 mem_class; + __u16 instance; /* unique ID even among different classes */ + __u32 pad; + __u32 min_page_size; + __u32 max_page_size; + __u64 total_size; + __u64 used; + __u64 reserved[8]; + } regions[]; +}; + +struct drm_xe_query_config { + __u32 num_params; + __u32 pad; +#define XE_QUERY_CONFIG_REV_AND_DEVICE_ID 0 +#define XE_QUERY_CONFIG_FLAGS 1 + #define XE_QUERY_CONFIG_FLAGS_HAS_VRAM (0x1 << 0) + #define XE_QUERY_CONFIG_FLAGS_USE_GUC (0x1 << 1) +#define XE_QUERY_CONFIG_MIN_ALIGNEMENT 2 +#define XE_QUERY_CONFIG_VA_BITS 3 +#define XE_QUERY_CONFIG_GT_COUNT 4 +#define XE_QUERY_CONFIG_MEM_REGION_COUNT 5 +#define XE_QUERY_CONFIG_NUM_PARAM XE_QUERY_CONFIG_MEM_REGION_COUNT + 1 + __u64 info[]; +}; + +struct drm_xe_query_gts { + __u32 num_gt; + __u32 pad; + + /* + * TODO: Perhaps info about every mem region relative to this GT? e.g. + * bandwidth between this GT and remote region? + */ + + struct drm_xe_query_gt { +#define XE_QUERY_GT_TYPE_MAIN 0 +#define XE_QUERY_GT_TYPE_REMOTE 1 +#define XE_QUERY_GT_TYPE_MEDIA 2 + __u16 type; + __u16 instance; + __u32 clock_freq; + __u64 features; + __u64 native_mem_regions; /* bit mask of instances from drm_xe_query_mem_usage */ + __u64 slow_mem_regions; /* bit mask of instances from drm_xe_query_mem_usage */ + __u64 inaccessible_mem_regions; /* bit mask of instances from drm_xe_query_mem_usage */ + __u64 reserved[8]; + } gts[]; +}; + +struct drm_xe_query_topology_mask { + /** @gt_id: GT ID the mask is associated with */ + __u16 gt_id; + + /** @type: type of mask */ + __u16 type; +#define XE_TOPO_DSS_GEOMETRY (1 << 0) +#define XE_TOPO_DSS_COMPUTE (1 << 1) +#define XE_TOPO_EU_PER_DSS (1 << 2) + + /** @num_bytes: number of bytes in requested mask */ + __u32 num_bytes; + + /** @mask: little-endian mask of @num_bytes */ + __u8 mask[]; +}; + +struct drm_xe_device_query { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @query: The type of data to query */ + __u32 query; + +#define DRM_XE_DEVICE_QUERY_ENGINES 0 +#define DRM_XE_DEVICE_QUERY_MEM_USAGE 1 +#define DRM_XE_DEVICE_QUERY_CONFIG 2 +#define DRM_XE_DEVICE_QUERY_GTS 3 +#define DRM_XE_DEVICE_QUERY_HWCONFIG 4 +#define DRM_XE_DEVICE_QUERY_GT_TOPOLOGY 5 + + /** @size: Size of the queried data */ + __u32 size; + + /** @data: Queried data is placed here */ + __u64 data; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + +struct drm_xe_gem_create { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** + * @size: Requested size for the object + * + * The (page-aligned) allocated size for the object will be returned. + */ + __u64 size; + + /** + * @flags: Flags, currently a mask of memory instances of where BO can + * be placed + */ +#define XE_GEM_CREATE_FLAG_DEFER_BACKING (0x1 << 24) +#define XE_GEM_CREATE_FLAG_SCANOUT (0x1 << 25) + __u32 flags; + + /** + * @vm_id: Attached VM, if any + * + * If a VM is specified, this BO must: + * + * 1. Only ever be bound to that VM. + * + * 2. Cannot be exported as a PRIME fd. + */ + __u32 vm_id; + + /** + * @handle: Returned handle for the object. + * + * Object handles are nonzero. + */ + __u32 handle; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + +struct drm_xe_gem_mmap_offset { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @handle: Handle for the object being mapped. */ + __u32 handle; + + /** @flags: Must be zero */ + __u32 flags; + + /** @offset: The fake offset to use for subsequent mmap call */ + __u64 offset; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + +/** + * struct drm_xe_vm_bind_op_error_capture - format of VM bind op error capture + */ +struct drm_xe_vm_bind_op_error_capture { + /** @error: errno that occured */ + __s32 error; + /** @op: operation that encounter an error */ + __u32 op; + /** @addr: address of bind op */ + __u64 addr; + /** @size: size of bind */ + __u64 size; +}; + +/** struct drm_xe_ext_vm_set_property - VM set property extension */ +struct drm_xe_ext_vm_set_property { + /** @base: base user extension */ + struct xe_user_extension base; + + /** @property: property to set */ +#define XE_VM_PROPERTY_BIND_OP_ERROR_CAPTURE_ADDRESS 0 + __u32 property; + + /** @value: property value */ + __u64 value; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + +struct drm_xe_vm_create { + /** @extensions: Pointer to the first extension struct, if any */ +#define XE_VM_EXTENSION_SET_PROPERTY 0 + __u64 extensions; + + /** @flags: Flags */ + __u32 flags; + +#define DRM_XE_VM_CREATE_SCRATCH_PAGE (0x1 << 0) +#define DRM_XE_VM_CREATE_COMPUTE_MODE (0x1 << 1) +#define DRM_XE_VM_CREATE_ASYNC_BIND_OPS (0x1 << 2) +#define DRM_XE_VM_CREATE_FAULT_MODE (0x1 << 3) + + /** @vm_id: Returned VM ID */ + __u32 vm_id; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + +struct drm_xe_vm_destroy { + /** @vm_id: VM ID */ + __u32 vm_id; + + /** @pad: MBZ */ + __u32 pad; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + +struct drm_xe_vm_bind_op { + /** + * @obj: GEM object to operate on, MBZ for MAP_USERPTR, MBZ for UNMAP + */ + __u32 obj; + + union { + /** + * @obj_offset: Offset into the object, MBZ for CLEAR_RANGE, + * ignored for unbind + */ + __u64 obj_offset; + /** @userptr: user pointer to bind on */ + __u64 userptr; + }; + + /** + * @range: Number of bytes from the object to bind to addr, MBZ for UNMAP_ALL + */ + __u64 range; + + /** @addr: Address to operate on, MBZ for UNMAP_ALL */ + __u64 addr; + + /** + * @gt_mask: Mask for which GTs to create binds for, 0 == All GTs, + * only applies to creating new VMAs + */ + __u64 gt_mask; + + /** @op: Operation to perform (lower 16 bits) and flags (upper 16 bits) */ + __u32 op; + + /** @mem_region: Memory region to prefetch VMA to, instance not a mask */ + __u32 region; + +#define XE_VM_BIND_OP_MAP 0x0 +#define XE_VM_BIND_OP_UNMAP 0x1 +#define XE_VM_BIND_OP_MAP_USERPTR 0x2 +#define XE_VM_BIND_OP_RESTART 0x3 +#define XE_VM_BIND_OP_UNMAP_ALL 0x4 +#define XE_VM_BIND_OP_PREFETCH 0x5 + +#define XE_VM_BIND_FLAG_READONLY (0x1 << 16) + /* + * A bind ops completions are always async, hence the support for out + * sync. This flag indicates the allocation of the memory for new page + * tables and the job to program the pages tables is asynchronous + * relative to the IOCTL. That part of a bind operation can fail under + * memory pressure, the job in practice can't fail unless the system is + * totally shot. + * + * If this flag is clear and the IOCTL doesn't return an error, in + * practice the bind op is good and will complete. + * + * If this flag is set and doesn't return return an error, the bind op + * can still fail and recovery is needed. If configured, the bind op that + * caused the error will be captured in drm_xe_vm_bind_op_error_capture. + * Once the user sees the error (via a ufence + + * XE_VM_PROPERTY_BIND_OP_ERROR_CAPTURE_ADDRESS), it should free memory + * via non-async unbinds, and then restart all queue'd async binds op via + * XE_VM_BIND_OP_RESTART. Or alternatively the user should destroy the + * VM. + * + * This flag is only allowed when DRM_XE_VM_CREATE_ASYNC_BIND_OPS is + * configured in the VM and must be set if the VM is configured with + * DRM_XE_VM_CREATE_ASYNC_BIND_OPS and not in an error state. + */ +#define XE_VM_BIND_FLAG_ASYNC (0x1 << 17) + /* + * Valid on a faulting VM only, do the MAP operation immediately rather + * than differing the MAP to the page fault handler. + */ +#define XE_VM_BIND_FLAG_IMMEDIATE (0x1 << 18) + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + +struct drm_xe_vm_bind { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @vm_id: The ID of the VM to bind to */ + __u32 vm_id; + + /** + * @engine_id: engine_id, must be of class DRM_XE_ENGINE_CLASS_VM_BIND + * and engine must have same vm_id. If zero, the default VM bind engine + * is used. + */ + __u32 engine_id; + + /** @num_binds: number of binds in this IOCTL */ + __u32 num_binds; + + union { + /** @bind: used if num_binds == 1 */ + struct drm_xe_vm_bind_op bind; + /** + * @vector_of_binds: userptr to array of struct + * drm_xe_vm_bind_op if num_binds > 1 + */ + __u64 vector_of_binds; + }; + + /** @num_syncs: amount of syncs to wait on */ + __u32 num_syncs; + + /** @syncs: pointer to struct drm_xe_sync array */ + __u64 syncs; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + +/** struct drm_xe_ext_engine_set_property - engine set property extension */ +struct drm_xe_ext_engine_set_property { + /** @base: base user extension */ + struct xe_user_extension base; + + /** @property: property to set */ + __u32 property; + + /** @value: property value */ + __u64 value; +}; + +/** + * struct drm_xe_engine_set_property - engine set property + * + * Same namespace for extensions as drm_xe_engine_create + */ +struct drm_xe_engine_set_property { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @engine_id: Engine ID */ + __u32 engine_id; + + /** @property: property to set */ +#define XE_ENGINE_PROPERTY_PRIORITY 0 +#define XE_ENGINE_PROPERTY_TIMESLICE 1 +#define XE_ENGINE_PROPERTY_PREEMPTION_TIMEOUT 2 + /* + * Long running or ULLS engine mode. DMA fences not allowed in this + * mode. Must match the value of DRM_XE_VM_CREATE_COMPUTE_MODE, serves + * as a sanity check the UMD knows what it is doing. Can only be set at + * engine create time. + */ +#define XE_ENGINE_PROPERTY_COMPUTE_MODE 3 +#define XE_ENGINE_PROPERTY_PERSISTENCE 4 +#define XE_ENGINE_PROPERTY_JOB_TIMEOUT 5 +#define XE_ENGINE_PROPERTY_ACC_TRIGGER 6 +#define XE_ENGINE_PROPERTY_ACC_NOTIFY 7 +#define XE_ENGINE_PROPERTY_ACC_GRANULARITY 8 + __u32 property; + + /** @value: property value */ + __u64 value; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + +struct drm_xe_engine_create { + /** @extensions: Pointer to the first extension struct, if any */ +#define XE_ENGINE_EXTENSION_SET_PROPERTY 0 + __u64 extensions; + + /** @width: submission width (number BB per exec) for this engine */ + __u16 width; + + /** @num_placements: number of valid placements for this engine */ + __u16 num_placements; + + /** @vm_id: VM to use for this engine */ + __u32 vm_id; + + /** @flags: MBZ */ + __u32 flags; + + /** @engine_id: Returned engine ID */ + __u32 engine_id; + + /** + * @instances: user pointer to a 2-d array of struct + * drm_xe_engine_class_instance + * + * length = width (i) * num_placements (j) + * index = j + i * width + */ + __u64 instances; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + +struct drm_xe_engine_destroy { + /** @vm_id: VM ID */ + __u32 engine_id; + + /** @pad: MBZ */ + __u32 pad; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + +struct drm_xe_sync { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + __u32 flags; + +#define DRM_XE_SYNC_SYNCOBJ 0x0 +#define DRM_XE_SYNC_TIMELINE_SYNCOBJ 0x1 +#define DRM_XE_SYNC_DMA_BUF 0x2 +#define DRM_XE_SYNC_USER_FENCE 0x3 +#define DRM_XE_SYNC_SIGNAL 0x10 + + union { + __u32 handle; + /** + * @addr: Address of user fence. When sync passed in via exec + * IOCTL this a GPU address in the VM. When sync passed in via + * VM bind IOCTL this is a user pointer. In either case, it is + * the users responsibility that this address is present and + * mapped when the user fence is signalled. Must be qword + * aligned. + */ + __u64 addr; + }; + + __u64 timeline_value; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + +struct drm_xe_exec { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @engine_id: Engine ID for the batch buffer */ + __u32 engine_id; + + /** @num_syncs: Amount of struct drm_xe_sync in array. */ + __u32 num_syncs; + + /** @syncs: Pointer to struct drm_xe_sync array. */ + __u64 syncs; + + /** + * @address: address of batch buffer if num_batch_buffer == 1 or an + * array of batch buffer addresses + */ + __u64 address; + + /** + * @num_batch_buffer: number of batch buffer in this exec, must match + * the width of the engine + */ + __u16 num_batch_buffer; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + +struct drm_xe_mmio { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + __u32 addr; + + __u32 flags; + +#define DRM_XE_MMIO_8BIT 0x0 +#define DRM_XE_MMIO_16BIT 0x1 +#define DRM_XE_MMIO_32BIT 0x2 +#define DRM_XE_MMIO_64BIT 0x3 +#define DRM_XE_MMIO_BITS_MASK 0x3 +#define DRM_XE_MMIO_READ 0x4 +#define DRM_XE_MMIO_WRITE 0x8 + + __u64 value; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + +/** + * struct drm_xe_wait_user_fence - wait user fence + * + * Wait on user fence, XE will wakeup on every HW engine interrupt in the + * instances list and check if user fence is complete: + * (*addr & MASK) OP (VALUE & MASK) + * + * Returns to user on user fence completion or timeout. + */ +struct drm_xe_wait_user_fence { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + union { + /** + * @addr: user pointer address to wait on, must qword aligned + */ + __u64 addr; + /** + * @vm_id: The ID of the VM which encounter an error used with + * DRM_XE_UFENCE_WAIT_VM_ERROR. Upper 32 bits must be clear. + */ + __u64 vm_id; + }; + /** @op: wait operation (type of comparison) */ +#define DRM_XE_UFENCE_WAIT_EQ 0 +#define DRM_XE_UFENCE_WAIT_NEQ 1 +#define DRM_XE_UFENCE_WAIT_GT 2 +#define DRM_XE_UFENCE_WAIT_GTE 3 +#define DRM_XE_UFENCE_WAIT_LT 4 +#define DRM_XE_UFENCE_WAIT_LTE 5 + __u16 op; + /** @flags: wait flags */ +#define DRM_XE_UFENCE_WAIT_SOFT_OP (1 << 0) /* e.g. Wait on VM bind */ +#define DRM_XE_UFENCE_WAIT_ABSTIME (1 << 1) +#define DRM_XE_UFENCE_WAIT_VM_ERROR (1 << 2) + __u16 flags; + /** @value: compare value */ + __u64 value; + /** @mask: comparison mask */ +#define DRM_XE_UFENCE_WAIT_U8 0xffu +#define DRM_XE_UFENCE_WAIT_U16 0xffffu +#define DRM_XE_UFENCE_WAIT_U32 0xffffffffu +#define DRM_XE_UFENCE_WAIT_U64 0xffffffffffffffffu + __u64 mask; + /** @timeout: how long to wait before bailing, value in jiffies */ + __s64 timeout; + /** + * @num_engines: number of engine instances to wait on, must be zero + * when DRM_XE_UFENCE_WAIT_SOFT_OP set + */ + __u64 num_engines; + /** + * @instances: user pointer to array of drm_xe_engine_class_instance to + * wait on, must be NULL when DRM_XE_UFENCE_WAIT_SOFT_OP set + */ + __u64 instances; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + +struct drm_xe_vm_madvise { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @vm_id: The ID VM in which the VMA exists */ + __u32 vm_id; + + /** @range: Number of bytes in the VMA */ + __u64 range; + + /** @addr: Address of the VMA to operation on */ + __u64 addr; + + /* + * Setting the preferred location will trigger a migrate of the VMA + * backing store to new location if the backing store is already + * allocated. + */ +#define DRM_XE_VM_MADVISE_PREFERRED_MEM_CLASS 0 +#define DRM_XE_VM_MADVISE_PREFERRED_GT 1 + /* + * In this case lower 32 bits are mem class, upper 32 are GT. + * Combination provides a single IOCTL plus migrate VMA to preferred + * location. + */ +#define DRM_XE_VM_MADVISE_PREFERRED_MEM_CLASS_GT 2 + /* + * The CPU will do atomic memory operations to this VMA. Must be set on + * some devices for atomics to behave correctly. + */ +#define DRM_XE_VM_MADVISE_CPU_ATOMIC 3 + /* + * The device will do atomic memory operations to this VMA. Must be set + * on some devices for atomics to behave correctly. + */ +#define DRM_XE_VM_MADVISE_DEVICE_ATOMIC 4 + /* + * Priority WRT to eviction (moving from preferred memory location due + * to memory pressure). The lower the priority, the more likely to be + * evicted. + */ +#define DRM_XE_VM_MADVISE_PRIORITY 5 +#define DRM_XE_VMA_PRIORITY_LOW 0 +#define DRM_XE_VMA_PRIORITY_NORMAL 1 /* Default */ +#define DRM_XE_VMA_PRIORITY_HIGH 2 /* Must be elevated user */ + /* Pin the VMA in memory, must be elevated user */ +#define DRM_XE_VM_MADVISE_PIN 6 + + /** @property: property to set */ + __u32 property; + + /** @value: property value */ + __u64 value; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + +#if defined(__cplusplus) +} +#endif + +#endif /* _UAPI_XE_DRM_H_ */ -- cgit v1.2.3 From 0f06dc101972d598d1c6bb356436c3dbf1e4b646 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 12 Jan 2023 17:25:03 -0500 Subject: drm/xe: Implement a local xe_mmio_wait32 Then, move the i915_utils.h include to its user. The overall goal is to kill all the usages of the i915_utils stuff. Yes, wait_for also depends on , so they go together to where it is needed. It will be likely needed anyway directly for udelay or usleep_range. Signed-off-by: Rodrigo Vivi Reviewed-by: Matthew Brost --- drivers/gpu/drm/xe/xe_force_wake.c | 6 ++++++ drivers/gpu/drm/xe/xe_gt_mcr.c | 7 +++++++ drivers/gpu/drm/xe/xe_guc.c | 7 +++++++ drivers/gpu/drm/xe/xe_guc_pc.c | 7 +++++++ drivers/gpu/drm/xe/xe_mmio.h | 29 ++++++++++++++++++++--------- drivers/gpu/drm/xe/xe_pcode.c | 7 +++++++ 6 files changed, 54 insertions(+), 9 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c index 0320ce7ba3d1..31a33ee9ccb6 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.c +++ b/drivers/gpu/drm/xe/xe_force_wake.c @@ -10,6 +10,12 @@ #include "xe_mmio.h" #include "gt/intel_gt_regs.h" +/* + * FIXME: This header has been deemed evil and we need to kill it. Temporarily + * including so we can use '__mask_next_bit'. + */ +#include "i915_utils.h" + #define XE_FORCE_WAKE_ACK_TIMEOUT_MS 50 static struct xe_gt * diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index b69c0d6c6b2f..8add5ec9a307 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -11,6 +11,13 @@ #include "gt/intel_gt_regs.h" +#include +/* + * FIXME: This header has been deemed evil and we need to kill it. Temporar + * including so we can use 'wait_for'. + */ +#include "i915_utils.h" + /** * DOC: GT Multicast/Replicated (MCR) Register Support * diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 3c285d849ef6..969a2427b1f2 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -22,6 +22,13 @@ #include "i915_reg_defs.h" #include "gt/intel_gt_regs.h" +#include +/* + * FIXME: This header has been deemed evil and we need to kill it. Temporarily + * including so we can use 'wait_for' and range_overflow_t. + */ +#include "i915_utils.h" + /* TODO: move to common file */ #define GUC_PVC_MOCS_INDEX_MASK REG_GENMASK(25, 24) #define PVC_MOCS_UC_INDEX 1 diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 227e30a482e3..260ccf3fe215 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -16,6 +16,13 @@ #include "i915_reg_defs.h" #include "i915_reg.h" +#include +/* + * FIXME: This header has been deemed evil and we need to kill it. Temporarily + * including so we can use 'wait_for'. + */ +#include "i915_utils.h" + #include "intel_mchbar_regs.h" /* For GEN6_RP_STATE_CAP.reg to be merged when the definition moves to Xe */ diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h index 09d24467096f..7352b622ca87 100644 --- a/drivers/gpu/drm/xe/xe_mmio.h +++ b/drivers/gpu/drm/xe/xe_mmio.h @@ -10,13 +10,6 @@ #include "xe_gt_types.h" -/* - * FIXME: This header has been deemed evil and we need to kill it. Temporarily - * including so we can use 'wait_for' and unblock initial development. A follow - * should replace 'wait_for' with a sane version and drop including this header. - */ -#include "i915_utils.h" - struct drm_device; struct drm_file; struct xe_device; @@ -93,8 +86,26 @@ static inline int xe_mmio_wait32(struct xe_gt *gt, u32 reg, u32 val, u32 mask, u32 timeout_ms) { - return wait_for((xe_mmio_read32(gt, reg) & mask) == val, - timeout_ms); + ktime_t cur = ktime_get_raw(); + const ktime_t end = ktime_add_ms(cur, timeout_ms); + s64 wait = 10; + + for (;;) { + if ((xe_mmio_read32(gt, reg) & mask) == val) + return 0; + + cur = ktime_get_raw(); + if (!ktime_before(cur, end)) + return -ETIMEDOUT; + + if (ktime_after(ktime_add_us(cur, wait), end)) + wait = ktime_us_delta(end, cur); + + usleep_range(wait, wait << 1); + wait <<= 1; + } + + return -ETIMEDOUT; } int xe_mmio_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c index 236159c8a6c0..313ccd70d1a9 100644 --- a/drivers/gpu/drm/xe/xe_pcode.c +++ b/drivers/gpu/drm/xe/xe_pcode.c @@ -11,6 +11,13 @@ #include +#include +/* + * FIXME: This header has been deemed evil and we need to kill it. Temporarily + * including so we can use 'wait_for'. + */ +#include "i915_utils.h" + /** * DOC: PCODE * -- cgit v1.2.3 From 86011ae21c15a779dcf25b97d5670371dc14e4c3 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 12 Jan 2023 17:25:04 -0500 Subject: drm/xe: Stop using i915's range_overflows_t macro. Let's do it directly. Signed-off-by: Rodrigo Vivi Reviewed-by: Matthew Brost --- drivers/gpu/drm/xe/xe_guc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 969a2427b1f2..9234da06d205 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -25,7 +25,7 @@ #include /* * FIXME: This header has been deemed evil and we need to kill it. Temporarily - * including so we can use 'wait_for' and range_overflow_t. + * including so we can use 'wait_for'. */ #include "i915_utils.h" @@ -55,7 +55,8 @@ static u32 guc_bo_ggtt_addr(struct xe_guc *guc, u32 addr = xe_bo_ggtt_addr(bo); XE_BUG_ON(addr < xe_wopcm_size(guc_to_xe(guc))); - XE_BUG_ON(range_overflows_t(u32, addr, bo->size, GUC_GGTT_TOP)); + XE_BUG_ON(addr >= GUC_GGTT_TOP); + XE_BUG_ON(bo->size > GUC_GGTT_TOP - addr); return addr; } -- cgit v1.2.3 From 7aaec3a623adda324f2435153a105088a8556b9a Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 31 Mar 2023 14:21:34 -0400 Subject: drm/xe: Let's return last value read on xe_mmio_wait32. This is already useful because it avoids some extra reads where registers might have changed after the timeout decision. But also, it will be important to end the kill of i915's wait_for. Signed-off-by: Rodrigo Vivi Reviewed-by: Matthew Brost --- drivers/gpu/drm/xe/xe_force_wake.c | 4 ++-- drivers/gpu/drm/xe/xe_gt.c | 2 +- drivers/gpu/drm/xe/xe_guc.c | 13 ++++++------- drivers/gpu/drm/xe/xe_huc.c | 2 +- drivers/gpu/drm/xe/xe_mmio.h | 20 +++++++++++++++----- drivers/gpu/drm/xe/xe_uc_fw.c | 7 +++---- 6 files changed, 28 insertions(+), 20 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c index 31a33ee9ccb6..a203eabba4e2 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.c +++ b/drivers/gpu/drm/xe/xe_force_wake.c @@ -129,7 +129,7 @@ static int domain_wake_wait(struct xe_gt *gt, struct xe_force_wake_domain *domain) { return xe_mmio_wait32(gt, domain->reg_ack, domain->val, domain->val, - XE_FORCE_WAKE_ACK_TIMEOUT_MS); + XE_FORCE_WAKE_ACK_TIMEOUT_MS, NULL); } static void domain_sleep(struct xe_gt *gt, struct xe_force_wake_domain *domain) @@ -141,7 +141,7 @@ static int domain_sleep_wait(struct xe_gt *gt, struct xe_force_wake_domain *domain) { return xe_mmio_wait32(gt, domain->reg_ack, 0, domain->val, - XE_FORCE_WAKE_ACK_TIMEOUT_MS); + XE_FORCE_WAKE_ACK_TIMEOUT_MS, NULL); } #define for_each_fw_domain_masked(domain__, mask__, fw__, tmp__) \ diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 5f8fa9d98d5a..6a84d2a1c7f3 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -599,7 +599,7 @@ int do_gt_reset(struct xe_gt *gt) int err; xe_mmio_write32(gt, GEN6_GDRST.reg, GEN11_GRDOM_FULL); - err = xe_mmio_wait32(gt, GEN6_GDRST.reg, 0, GEN11_GRDOM_FULL, 5); + err = xe_mmio_wait32(gt, GEN6_GDRST.reg, 0, GEN11_GRDOM_FULL, 5, NULL); if (err) drm_err(&xe->drm, "GT reset failed to clear GEN11_GRDOM_FULL\n"); diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 9234da06d205..52f42bd5cad7 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -324,17 +324,17 @@ int xe_guc_reset(struct xe_guc *guc) { struct xe_device *xe = guc_to_xe(guc); struct xe_gt *gt = guc_to_gt(guc); - u32 guc_status; + u32 guc_status, gdrst; int ret; xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); xe_mmio_write32(gt, GEN6_GDRST.reg, GEN11_GRDOM_GUC); - ret = xe_mmio_wait32(gt, GEN6_GDRST.reg, 0, GEN11_GRDOM_GUC, 5); + ret = xe_mmio_wait32(gt, GEN6_GDRST.reg, 0, GEN11_GRDOM_GUC, 5, &gdrst); if (ret) { drm_err(&xe->drm, "GuC reset timed out, GEN6_GDRST=0x%8x\n", - xe_mmio_read32(gt, GEN6_GDRST.reg)); + gdrst); goto err_out; } @@ -654,7 +654,7 @@ int xe_guc_send_mmio(struct xe_guc *guc, const u32 *request, u32 len) { struct xe_device *xe = guc_to_xe(guc); struct xe_gt *gt = guc_to_gt(guc); - u32 header; + u32 header, reply; u32 reply_reg = xe_gt_is_media_type(gt) ? MEDIA_SOFT_SCRATCH(0).reg : GEN11_SOFT_SCRATCH(0).reg; int ret; @@ -691,12 +691,11 @@ retry: ret = xe_mmio_wait32(gt, reply_reg, FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_GUC), - GUC_HXG_MSG_0_ORIGIN, - 50); + GUC_HXG_MSG_0_ORIGIN, 50, &reply); if (ret) { timeout: drm_err(&xe->drm, "mmio request 0x%08x: no reply 0x%08x\n", - request[0], xe_mmio_read32(gt, reply_reg)); + request[0], reply); return ret; } diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c index 93b22fac6e14..c8c93bdf4760 100644 --- a/drivers/gpu/drm/xe/xe_huc.c +++ b/drivers/gpu/drm/xe/xe_huc.c @@ -85,7 +85,7 @@ int xe_huc_auth(struct xe_huc *huc) ret = xe_mmio_wait32(gt, GEN11_HUC_KERNEL_LOAD_INFO.reg, HUC_LOAD_SUCCESSFUL, - HUC_LOAD_SUCCESSFUL, 100); + HUC_LOAD_SUCCESSFUL, 100, NULL); if (ret) { drm_err(&xe->drm, "HuC: Firmware not verified %d\n", ret); goto fail; diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h index 7352b622ca87..ccd97a4a89c1 100644 --- a/drivers/gpu/drm/xe/xe_mmio.h +++ b/drivers/gpu/drm/xe/xe_mmio.h @@ -82,21 +82,28 @@ static inline int xe_mmio_write32_and_verify(struct xe_gt *gt, return (reg_val & mask) != eval ? -EINVAL : 0; } -static inline int xe_mmio_wait32(struct xe_gt *gt, - u32 reg, u32 val, - u32 mask, u32 timeout_ms) +static inline int xe_mmio_wait32(struct xe_gt *gt, u32 reg, u32 val, + u32 mask, u32 timeout_ms, u32 *out_val) { ktime_t cur = ktime_get_raw(); const ktime_t end = ktime_add_ms(cur, timeout_ms); + int ret = -ETIMEDOUT; s64 wait = 10; + u32 read; for (;;) { if ((xe_mmio_read32(gt, reg) & mask) == val) return 0; + read = xe_mmio_read32(gt, reg); + if ((read & mask) == val) { + ret = 0; + break; + } + cur = ktime_get_raw(); if (!ktime_before(cur, end)) - return -ETIMEDOUT; + break; if (ktime_after(ktime_add_us(cur, wait), end)) wait = ktime_us_delta(end, cur); @@ -105,7 +112,10 @@ static inline int xe_mmio_wait32(struct xe_gt *gt, wait <<= 1; } - return -ETIMEDOUT; + if (out_val) + *out_val = read; + + return ret; } int xe_mmio_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 86c47b7f0901..edd6a5d2db34 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -326,7 +326,7 @@ static int uc_fw_xfer(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags) { struct xe_device *xe = uc_fw_to_xe(uc_fw); struct xe_gt *gt = uc_fw_to_gt(uc_fw); - u32 src_offset; + u32 src_offset, dma_ctrl; int ret; xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); @@ -352,11 +352,10 @@ static int uc_fw_xfer(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags) _MASKED_BIT_ENABLE(dma_flags | START_DMA)); /* Wait for DMA to finish */ - ret = xe_mmio_wait32(gt, DMA_CTRL.reg, 0, START_DMA, 100); + ret = xe_mmio_wait32(gt, DMA_CTRL.reg, 0, START_DMA, 100, &dma_ctrl); if (ret) drm_err(&xe->drm, "DMA for %s fw failed, DMA_CTRL=%u\n", - xe_uc_fw_type_repr(uc_fw->type), - xe_mmio_read32(gt, DMA_CTRL.reg)); + xe_uc_fw_type_repr(uc_fw->type), dma_ctrl); /* Disable the bits once DMA is over */ xe_mmio_write32(gt, DMA_CTRL.reg, _MASKED_BIT_DISABLE(dma_flags)); -- cgit v1.2.3 From 2e5be5d57dbe5e04a5abbd01417fc098f8925a35 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 12 Jan 2023 17:25:06 -0500 Subject: drm/xe: Convert guc_ready to regular xe_mmio_wait32 Possible now that the wait function returns the last read value. So we can remove the users of i915's wait_for one by one... Signed-off-by: Rodrigo Vivi Reviewed-by: Matthew Brost --- drivers/gpu/drm/xe/xe_guc.c | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 52f42bd5cad7..7e8451e60d2d 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -403,24 +403,6 @@ static int guc_xfer_rsa(struct xe_guc *guc) return 0; } -/* - * Read the GuC status register (GUC_STATUS) and store it in the - * specified location; then return a boolean indicating whether - * the value matches either of two values representing completion - * of the GuC boot process. - * - * This is used for polling the GuC status in a wait_for() - * loop below. - */ -static bool guc_ready(struct xe_guc *guc, u32 *status) -{ - u32 val = xe_mmio_read32(guc_to_gt(guc), GUC_STATUS.reg); - u32 uk_val = REG_FIELD_GET(GS_UKERNEL_MASK, val); - - *status = val; - return uk_val == XE_GUC_LOAD_STATUS_READY; -} - static int guc_wait_ucode(struct xe_guc *guc) { struct xe_device *xe = guc_to_xe(guc); @@ -444,7 +426,11 @@ static int guc_wait_ucode(struct xe_guc *guc) * 200ms. Even at slowest clock, this should be sufficient. And * in the working case, a larger timeout makes no difference. */ - ret = wait_for(guc_ready(guc, &status), 200); + ret = xe_mmio_wait32(guc_to_gt(guc), GUC_STATUS.reg, + FIELD_PREP(GS_UKERNEL_MASK, + XE_GUC_LOAD_STATUS_READY), + GS_UKERNEL_MASK, 200, &status); + if (ret) { struct drm_device *drm = &xe->drm; struct drm_printer p = drm_info_printer(drm->dev); -- cgit v1.2.3 From eb04985d7211a5fc651f8cca588b2d78d3a36cee Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 12 Jan 2023 17:25:07 -0500 Subject: drm/xe: Wait for success on guc done. Rather than a constant check on proto and wait not busy, let's wait for the expected success and then check the protocol afterwards. With this, we can now use the regular xe_mmio_wait32 and kill this local need for the wait_for. Signed-off-by: Rodrigo Vivi Reviewed-by: Matthew Brost --- drivers/gpu/drm/xe/xe_guc.c | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 7e8451e60d2d..6ecf493c26b5 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -22,13 +22,6 @@ #include "i915_reg_defs.h" #include "gt/intel_gt_regs.h" -#include -/* - * FIXME: This header has been deemed evil and we need to kill it. Temporarily - * including so we can use 'wait_for'. - */ -#include "i915_utils.h" - /* TODO: move to common file */ #define GUC_PVC_MOCS_INDEX_MASK REG_GENMASK(25, 24) #define PVC_MOCS_UC_INDEX 1 @@ -688,19 +681,17 @@ timeout: header = xe_mmio_read32(gt, reply_reg); if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) == GUC_HXG_TYPE_NO_RESPONSE_BUSY) { -#define done ({ header = xe_mmio_read32(gt, reply_reg); \ - FIELD_GET(GUC_HXG_MSG_0_ORIGIN, header) != \ - GUC_HXG_ORIGIN_GUC || \ - FIELD_GET(GUC_HXG_MSG_0_TYPE, header) != \ - GUC_HXG_TYPE_NO_RESPONSE_BUSY; }) - ret = wait_for(done, 1000); - if (unlikely(ret)) - goto timeout; + ret = xe_mmio_wait32(gt, reply_reg, + FIELD_PREP(GUC_HXG_MSG_0_TYPE, + GUC_HXG_TYPE_RESPONSE_SUCCESS), + GUC_HXG_MSG_0_TYPE, 1000, &header); + if (unlikely(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, header) != - GUC_HXG_ORIGIN_GUC)) + GUC_HXG_ORIGIN_GUC)) goto proto; -#undef done + if (unlikely(ret)) + goto timeout; } if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) == -- cgit v1.2.3 From b6f468b847d09ca1fe5cea2606a323be892f8893 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 12 Jan 2023 17:25:08 -0500 Subject: drm/xe: Remove i915_utils dependency from xe_guc_pc. To make it simpler, all of the status checks also waits and times out. Also, no ktime precision is needed in this case, and we can use usleep_range because we are not in atomic paths here. Signed-off-by: Rodrigo Vivi Reviewed-by: Matthew Brost --- drivers/gpu/drm/xe/xe_guc_pc.c | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 260ccf3fe215..d751ee98de11 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -17,11 +17,6 @@ #include "i915_reg.h" #include -/* - * FIXME: This header has been deemed evil and we need to kill it. Temporarily - * including so we can use 'wait_for'. - */ -#include "i915_utils.h" #include "intel_mchbar_regs.h" @@ -135,10 +130,26 @@ pc_to_maps(struct xe_guc_pc *pc) (FIELD_PREP(HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ID, id) | \ FIELD_PREP(HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC, count)) -static bool pc_is_in_state(struct xe_guc_pc *pc, enum slpc_global_state state) +static int wait_for_pc_state(struct xe_guc_pc *pc, + enum slpc_global_state state) { + int timeout_us = 5000; /* rought 5ms, but no need for precision */ + int slept, wait = 10; + xe_device_assert_mem_access(pc_to_xe(pc)); - return slpc_shared_data_read(pc, header.global_state) == state; + + for (slept = 0; slept < timeout_us;) { + if (slpc_shared_data_read(pc, header.global_state) == state) + return 0; + + usleep_range(wait, wait << 1); + slept += wait; + wait <<= 1; + if (slept + wait > timeout_us) + wait = timeout_us - slept; + } + + return -ETIMEDOUT; } static int pc_action_reset(struct xe_guc_pc *pc) @@ -189,7 +200,7 @@ static int pc_action_query_task_state(struct xe_guc_pc *pc) 0, }; - if (!pc_is_in_state(pc, SLPC_GLOBAL_STATE_RUNNING)) + if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING)) return -EAGAIN; /* Blocking here to ensure the results are ready before reading them */ @@ -212,7 +223,7 @@ static int pc_action_set_param(struct xe_guc_pc *pc, u8 id, u32 value) value, }; - if (!pc_is_in_state(pc, SLPC_GLOBAL_STATE_RUNNING)) + if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING)) return -EAGAIN; ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); @@ -747,7 +758,7 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) if (ret) goto out; - if (wait_for(pc_is_in_state(pc, SLPC_GLOBAL_STATE_RUNNING), 5)) { + if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING)) { drm_err(&pc_to_xe(pc)->drm, "GuC PC Start failed\n"); ret = -EIO; goto out; @@ -793,7 +804,7 @@ int xe_guc_pc_stop(struct xe_guc_pc *pc) if (ret) goto out; - if (wait_for(pc_is_in_state(pc, SLPC_GLOBAL_STATE_NOT_RUNNING), 5)) { + if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_NOT_RUNNING)) { drm_err(&pc_to_xe(pc)->drm, "GuC PC Shutdown failed\n"); ret = -EIO; } -- cgit v1.2.3 From b56d208273bf5be6593d0dcd2d471f771c08a805 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 12 Jan 2023 17:25:09 -0500 Subject: drm/xe: Stop using i915_utils in xe_wopcm. We don't need any macro for a simple check we can do explicitly and clear. Signed-off-by: Rodrigo Vivi Reviewed-by: Matthew Brost --- drivers/gpu/drm/xe/xe_wopcm.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_wopcm.c b/drivers/gpu/drm/xe/xe_wopcm.c index e4a8d4a1899e..8fe182afa06c 100644 --- a/drivers/gpu/drm/xe/xe_wopcm.c +++ b/drivers/gpu/drm/xe/xe_wopcm.c @@ -11,8 +11,6 @@ #include "xe_uc_fw.h" #include "xe_wopcm.h" -#include "i915_utils.h" - /** * DOC: Write Once Protected Content Memory (WOPCM) Layout * @@ -92,7 +90,8 @@ static bool __check_layout(struct xe_device *xe, u32 wopcm_size, u32 size; size = wopcm_size - ctx_rsvd; - if (unlikely(range_overflows(guc_wopcm_base, guc_wopcm_size, size))) { + if (unlikely(guc_wopcm_base >= size || + guc_wopcm_size > size - guc_wopcm_base)) { drm_err(&xe->drm, "WOPCM: invalid GuC region layout: %uK + %uK > %uK\n", guc_wopcm_base / SZ_1K, guc_wopcm_size / SZ_1K, -- cgit v1.2.3 From eeb8019d8c6fba1eae6ef8a238b42ff9b39dbaa4 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 12 Jan 2023 17:25:10 -0500 Subject: drm/xe: Let's avoid i915_utils in the xe_force_wake. We can run the bit operation locally without yet another macro. Signed-off-by: Rodrigo Vivi Reviewed-by: Matthew Brost --- drivers/gpu/drm/xe/xe_force_wake.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c index a203eabba4e2..b87bf3b4cd52 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.c +++ b/drivers/gpu/drm/xe/xe_force_wake.c @@ -10,11 +10,6 @@ #include "xe_mmio.h" #include "gt/intel_gt_regs.h" -/* - * FIXME: This header has been deemed evil and we need to kill it. Temporarily - * including so we can use '__mask_next_bit'. - */ -#include "i915_utils.h" #define XE_FORCE_WAKE_ACK_TIMEOUT_MS 50 @@ -145,9 +140,9 @@ static int domain_sleep_wait(struct xe_gt *gt, } #define for_each_fw_domain_masked(domain__, mask__, fw__, tmp__) \ - for (tmp__ = (mask__); tmp__ ;) \ + for (tmp__ = (mask__); tmp__; tmp__ &= ~BIT(ffs(tmp__) - 1)) \ for_each_if((domain__ = ((fw__)->domains + \ - __mask_next_bit(tmp__))) && \ + (ffs(tmp__) - 1))) && \ domain__->reg_ctl) int xe_force_wake_get(struct xe_force_wake *fw, -- cgit v1.2.3 From 81593af6c88d3482997e43f0a85ccd93cc4928df Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 12 Jan 2023 17:25:11 -0500 Subject: drm/xe: Convert xe_mmio_wait32 to us so we can stop using wait_for_us. Another clean-up towards killing the usage of i915_utils.h Signed-off-by: Rodrigo Vivi Reviewed-by: Matthew Brost --- drivers/gpu/drm/xe/xe_force_wake.c | 6 ++++-- drivers/gpu/drm/xe/xe_gt.c | 3 ++- drivers/gpu/drm/xe/xe_gt_mcr.c | 9 +-------- drivers/gpu/drm/xe/xe_guc.c | 9 +++++---- drivers/gpu/drm/xe/xe_huc.c | 2 +- drivers/gpu/drm/xe/xe_mmio.h | 4 ++-- drivers/gpu/drm/xe/xe_uc_fw.c | 2 +- 7 files changed, 16 insertions(+), 19 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c index b87bf3b4cd52..1f7b68f61ec5 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.c +++ b/drivers/gpu/drm/xe/xe_force_wake.c @@ -124,7 +124,8 @@ static int domain_wake_wait(struct xe_gt *gt, struct xe_force_wake_domain *domain) { return xe_mmio_wait32(gt, domain->reg_ack, domain->val, domain->val, - XE_FORCE_WAKE_ACK_TIMEOUT_MS, NULL); + XE_FORCE_WAKE_ACK_TIMEOUT_MS * USEC_PER_MSEC, + NULL); } static void domain_sleep(struct xe_gt *gt, struct xe_force_wake_domain *domain) @@ -136,7 +137,8 @@ static int domain_sleep_wait(struct xe_gt *gt, struct xe_force_wake_domain *domain) { return xe_mmio_wait32(gt, domain->reg_ack, 0, domain->val, - XE_FORCE_WAKE_ACK_TIMEOUT_MS, NULL); + XE_FORCE_WAKE_ACK_TIMEOUT_MS * USEC_PER_MSEC, + NULL); } #define for_each_fw_domain_masked(domain__, mask__, fw__, tmp__) \ diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 6a84d2a1c7f3..bdc64219ed4c 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -599,7 +599,8 @@ int do_gt_reset(struct xe_gt *gt) int err; xe_mmio_write32(gt, GEN6_GDRST.reg, GEN11_GRDOM_FULL); - err = xe_mmio_wait32(gt, GEN6_GDRST.reg, 0, GEN11_GRDOM_FULL, 5, NULL); + err = xe_mmio_wait32(gt, GEN6_GDRST.reg, 0, GEN11_GRDOM_FULL, 5000, + NULL); if (err) drm_err(&xe->drm, "GT reset failed to clear GEN11_GRDOM_FULL\n"); diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index 8add5ec9a307..f4bfff98d5f4 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -11,13 +11,6 @@ #include "gt/intel_gt_regs.h" -#include -/* - * FIXME: This header has been deemed evil and we need to kill it. Temporar - * including so we can use 'wait_for'. - */ -#include "i915_utils.h" - /** * DOC: GT Multicast/Replicated (MCR) Register Support * @@ -383,7 +376,7 @@ static void mcr_lock(struct xe_gt *gt) * shares the same steering control register. */ if (GRAPHICS_VERx100(xe) >= 1270) - ret = wait_for_us(xe_mmio_read32(gt, STEER_SEMAPHORE) == 0x1, 10); + ret = xe_mmio_wait32(gt, STEER_SEMAPHORE, 0, 0x1, 10, NULL); drm_WARN_ON_ONCE(&xe->drm, ret == -ETIMEDOUT); } diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 6ecf493c26b5..2deb1f6544ea 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -324,7 +324,8 @@ int xe_guc_reset(struct xe_guc *guc) xe_mmio_write32(gt, GEN6_GDRST.reg, GEN11_GRDOM_GUC); - ret = xe_mmio_wait32(gt, GEN6_GDRST.reg, 0, GEN11_GRDOM_GUC, 5, &gdrst); + ret = xe_mmio_wait32(gt, GEN6_GDRST.reg, 0, GEN11_GRDOM_GUC, 5000, + &gdrst); if (ret) { drm_err(&xe->drm, "GuC reset timed out, GEN6_GDRST=0x%8x\n", gdrst); @@ -422,7 +423,7 @@ static int guc_wait_ucode(struct xe_guc *guc) ret = xe_mmio_wait32(guc_to_gt(guc), GUC_STATUS.reg, FIELD_PREP(GS_UKERNEL_MASK, XE_GUC_LOAD_STATUS_READY), - GS_UKERNEL_MASK, 200, &status); + GS_UKERNEL_MASK, 200000, &status); if (ret) { struct drm_device *drm = &xe->drm; @@ -670,7 +671,7 @@ retry: ret = xe_mmio_wait32(gt, reply_reg, FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_GUC), - GUC_HXG_MSG_0_ORIGIN, 50, &reply); + GUC_HXG_MSG_0_ORIGIN, 50000, &reply); if (ret) { timeout: drm_err(&xe->drm, "mmio request 0x%08x: no reply 0x%08x\n", @@ -685,7 +686,7 @@ timeout: ret = xe_mmio_wait32(gt, reply_reg, FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_RESPONSE_SUCCESS), - GUC_HXG_MSG_0_TYPE, 1000, &header); + GUC_HXG_MSG_0_TYPE, 1000000, &header); if (unlikely(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, header) != GUC_HXG_ORIGIN_GUC)) diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c index c8c93bdf4760..9cb15bb40a38 100644 --- a/drivers/gpu/drm/xe/xe_huc.c +++ b/drivers/gpu/drm/xe/xe_huc.c @@ -85,7 +85,7 @@ int xe_huc_auth(struct xe_huc *huc) ret = xe_mmio_wait32(gt, GEN11_HUC_KERNEL_LOAD_INFO.reg, HUC_LOAD_SUCCESSFUL, - HUC_LOAD_SUCCESSFUL, 100, NULL); + HUC_LOAD_SUCCESSFUL, 100000, NULL); if (ret) { drm_err(&xe->drm, "HuC: Firmware not verified %d\n", ret); goto fail; diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h index ccd97a4a89c1..f72edfb39cc0 100644 --- a/drivers/gpu/drm/xe/xe_mmio.h +++ b/drivers/gpu/drm/xe/xe_mmio.h @@ -83,10 +83,10 @@ static inline int xe_mmio_write32_and_verify(struct xe_gt *gt, } static inline int xe_mmio_wait32(struct xe_gt *gt, u32 reg, u32 val, - u32 mask, u32 timeout_ms, u32 *out_val) + u32 mask, u32 timeout_us, u32 *out_val) { ktime_t cur = ktime_get_raw(); - const ktime_t end = ktime_add_ms(cur, timeout_ms); + const ktime_t end = ktime_add_us(cur, timeout_us); int ret = -ETIMEDOUT; s64 wait = 10; u32 read; diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index edd6a5d2db34..bbb931bc19ce 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -352,7 +352,7 @@ static int uc_fw_xfer(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags) _MASKED_BIT_ENABLE(dma_flags | START_DMA)); /* Wait for DMA to finish */ - ret = xe_mmio_wait32(gt, DMA_CTRL.reg, 0, START_DMA, 100, &dma_ctrl); + ret = xe_mmio_wait32(gt, DMA_CTRL.reg, 0, START_DMA, 100000, &dma_ctrl); if (ret) drm_err(&xe->drm, "DMA for %s fw failed, DMA_CTRL=%u\n", xe_uc_fw_type_repr(uc_fw->type), dma_ctrl); -- cgit v1.2.3 From 7dc9b92dcfeff727776bca5ab11b3e0f3445ece2 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 12 Jan 2023 17:25:12 -0500 Subject: drm/xe: Remove i915_utils dependency from xe_pcode. Expand xe_mmio_wait32 to accept atomic and then use that directly when possible, and create own routine to wait for the pcode status. Signed-off-by: Rodrigo Vivi Reviewed-by: Matthew Brost --- drivers/gpu/drm/xe/xe_force_wake.c | 4 +- drivers/gpu/drm/xe/xe_gt.c | 2 +- drivers/gpu/drm/xe/xe_gt_mcr.c | 3 +- drivers/gpu/drm/xe/xe_guc.c | 9 ++-- drivers/gpu/drm/xe/xe_huc.c | 2 +- drivers/gpu/drm/xe/xe_mmio.h | 9 ++-- drivers/gpu/drm/xe/xe_pcode.c | 94 +++++++++++++++----------------------- drivers/gpu/drm/xe/xe_uc_fw.c | 3 +- 8 files changed, 57 insertions(+), 69 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c index 1f7b68f61ec5..d2080e6fbe10 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.c +++ b/drivers/gpu/drm/xe/xe_force_wake.c @@ -125,7 +125,7 @@ static int domain_wake_wait(struct xe_gt *gt, { return xe_mmio_wait32(gt, domain->reg_ack, domain->val, domain->val, XE_FORCE_WAKE_ACK_TIMEOUT_MS * USEC_PER_MSEC, - NULL); + NULL, false); } static void domain_sleep(struct xe_gt *gt, struct xe_force_wake_domain *domain) @@ -138,7 +138,7 @@ static int domain_sleep_wait(struct xe_gt *gt, { return xe_mmio_wait32(gt, domain->reg_ack, 0, domain->val, XE_FORCE_WAKE_ACK_TIMEOUT_MS * USEC_PER_MSEC, - NULL); + NULL, false); } #define for_each_fw_domain_masked(domain__, mask__, fw__, tmp__) \ diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index bdc64219ed4c..fd8232a4556e 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -600,7 +600,7 @@ int do_gt_reset(struct xe_gt *gt) xe_mmio_write32(gt, GEN6_GDRST.reg, GEN11_GRDOM_FULL); err = xe_mmio_wait32(gt, GEN6_GDRST.reg, 0, GEN11_GRDOM_FULL, 5000, - NULL); + NULL, false); if (err) drm_err(&xe->drm, "GT reset failed to clear GEN11_GRDOM_FULL\n"); diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index f4bfff98d5f4..ddce2c41c7f5 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -376,7 +376,8 @@ static void mcr_lock(struct xe_gt *gt) * shares the same steering control register. */ if (GRAPHICS_VERx100(xe) >= 1270) - ret = xe_mmio_wait32(gt, STEER_SEMAPHORE, 0, 0x1, 10, NULL); + ret = xe_mmio_wait32(gt, STEER_SEMAPHORE, 0, 0x1, 10, NULL, + false); drm_WARN_ON_ONCE(&xe->drm, ret == -ETIMEDOUT); } diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 2deb1f6544ea..4a7e8f9a14d5 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -325,7 +325,7 @@ int xe_guc_reset(struct xe_guc *guc) xe_mmio_write32(gt, GEN6_GDRST.reg, GEN11_GRDOM_GUC); ret = xe_mmio_wait32(gt, GEN6_GDRST.reg, 0, GEN11_GRDOM_GUC, 5000, - &gdrst); + &gdrst, false); if (ret) { drm_err(&xe->drm, "GuC reset timed out, GEN6_GDRST=0x%8x\n", gdrst); @@ -423,7 +423,7 @@ static int guc_wait_ucode(struct xe_guc *guc) ret = xe_mmio_wait32(guc_to_gt(guc), GUC_STATUS.reg, FIELD_PREP(GS_UKERNEL_MASK, XE_GUC_LOAD_STATUS_READY), - GS_UKERNEL_MASK, 200000, &status); + GS_UKERNEL_MASK, 200000, &status, false); if (ret) { struct drm_device *drm = &xe->drm; @@ -671,7 +671,7 @@ retry: ret = xe_mmio_wait32(gt, reply_reg, FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_GUC), - GUC_HXG_MSG_0_ORIGIN, 50000, &reply); + GUC_HXG_MSG_0_ORIGIN, 50000, &reply, false); if (ret) { timeout: drm_err(&xe->drm, "mmio request 0x%08x: no reply 0x%08x\n", @@ -686,7 +686,8 @@ timeout: ret = xe_mmio_wait32(gt, reply_reg, FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_RESPONSE_SUCCESS), - GUC_HXG_MSG_0_TYPE, 1000000, &header); + GUC_HXG_MSG_0_TYPE, 1000000, &header, + false); if (unlikely(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, header) != GUC_HXG_ORIGIN_GUC)) diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c index 9cb15bb40a38..82e7fb3a6292 100644 --- a/drivers/gpu/drm/xe/xe_huc.c +++ b/drivers/gpu/drm/xe/xe_huc.c @@ -85,7 +85,7 @@ int xe_huc_auth(struct xe_huc *huc) ret = xe_mmio_wait32(gt, GEN11_HUC_KERNEL_LOAD_INFO.reg, HUC_LOAD_SUCCESSFUL, - HUC_LOAD_SUCCESSFUL, 100000, NULL); + HUC_LOAD_SUCCESSFUL, 100000, NULL, false); if (ret) { drm_err(&xe->drm, "HuC: Firmware not verified %d\n", ret); goto fail; diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h index f72edfb39cc0..adc7d7484afb 100644 --- a/drivers/gpu/drm/xe/xe_mmio.h +++ b/drivers/gpu/drm/xe/xe_mmio.h @@ -82,8 +82,8 @@ static inline int xe_mmio_write32_and_verify(struct xe_gt *gt, return (reg_val & mask) != eval ? -EINVAL : 0; } -static inline int xe_mmio_wait32(struct xe_gt *gt, u32 reg, u32 val, - u32 mask, u32 timeout_us, u32 *out_val) +static inline int xe_mmio_wait32(struct xe_gt *gt, u32 reg, u32 val, u32 mask, + u32 timeout_us, u32 *out_val, bool atomic) { ktime_t cur = ktime_get_raw(); const ktime_t end = ktime_add_us(cur, timeout_us); @@ -108,7 +108,10 @@ static inline int xe_mmio_wait32(struct xe_gt *gt, u32 reg, u32 val, if (ktime_after(ktime_add_us(cur, wait), end)) wait = ktime_us_delta(end, cur); - usleep_range(wait, wait << 1); + if (atomic) + udelay(wait); + else + usleep_range(wait, wait << 1); wait <<= 1; } diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c index 313ccd70d1a9..39712e843728 100644 --- a/drivers/gpu/drm/xe/xe_pcode.c +++ b/drivers/gpu/drm/xe/xe_pcode.c @@ -12,11 +12,6 @@ #include #include -/* - * FIXME: This header has been deemed evil and we need to kill it. Temporarily - * including so we can use 'wait_for'. - */ -#include "i915_utils.h" /** * DOC: PCODE @@ -59,28 +54,24 @@ static int pcode_mailbox_status(struct xe_gt *gt) return 0; } -static bool pcode_mailbox_done(struct xe_gt *gt) -{ - lockdep_assert_held(>->pcode.lock); - return (xe_mmio_read32(gt, PCODE_MAILBOX.reg) & PCODE_READY) == 0; -} - static int pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1, - unsigned int timeout, bool return_data, bool atomic) + unsigned int timeout_ms, bool return_data, + bool atomic) { + int err; lockdep_assert_held(>->pcode.lock); - if (!pcode_mailbox_done(gt)) + if ((xe_mmio_read32(gt, PCODE_MAILBOX.reg) & PCODE_READY) != 0) return -EAGAIN; xe_mmio_write32(gt, PCODE_DATA0.reg, *data0); xe_mmio_write32(gt, PCODE_DATA1.reg, data1 ? *data1 : 0); xe_mmio_write32(gt, PCODE_MAILBOX.reg, PCODE_READY | mbox); - if (atomic) - _wait_for_atomic(pcode_mailbox_done(gt), timeout * 1000, 1); - else - wait_for(pcode_mailbox_done(gt), timeout); + err = xe_mmio_wait32(gt, PCODE_MAILBOX.reg, 0, PCODE_READY, + timeout_ms * 1000, NULL, atomic); + if (err) + return err; if (return_data) { *data0 = xe_mmio_read32(gt, PCODE_DATA0.reg); @@ -113,13 +104,26 @@ int xe_pcode_read(struct xe_gt *gt, u32 mbox, u32 *val, u32 *val1) return err; } -static bool xe_pcode_try_request(struct xe_gt *gt, u32 mbox, - u32 request, u32 reply_mask, u32 reply, - u32 *status, bool atomic) +static int xe_pcode_try_request(struct xe_gt *gt, u32 mbox, + u32 request, u32 reply_mask, u32 reply, + u32 *status, bool atomic, int timeout_us) { - *status = pcode_mailbox_rw(gt, mbox, &request, NULL, 1, true, atomic); + int slept, wait = 10; + + for (slept = 0; slept < timeout_us; slept += wait) { + *status = pcode_mailbox_rw(gt, mbox, &request, NULL, 1, true, + atomic); + if ((*status == 0) && ((request & reply_mask) == reply)) + return 0; + + if (atomic) + udelay(wait); + else + usleep_range(wait, wait << 1); + wait <<= 1; + } - return (*status == 0) && ((request & reply_mask) == reply); + return -ETIMEDOUT; } /** @@ -146,25 +150,12 @@ int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request, { u32 status; int ret; - bool atomic = false; mutex_lock(>->pcode.lock); -#define COND \ - xe_pcode_try_request(gt, mbox, request, reply_mask, reply, &status, atomic) - - /* - * Prime the PCODE by doing a request first. Normally it guarantees - * that a subsequent request, at most @timeout_base_ms later, succeeds. - * _wait_for() doesn't guarantee when its passed condition is evaluated - * first, so send the first request explicitly. - */ - if (COND) { - ret = 0; - goto out; - } - ret = _wait_for(COND, timeout_base_ms * 1000, 10, 10); - if (!ret) + ret = xe_pcode_try_request(gt, mbox, request, reply_mask, reply, &status, + false, timeout_base_ms * 1000); + if (ret) goto out; /* @@ -181,15 +172,13 @@ int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request, "PCODE timeout, retrying with preemption disabled\n"); drm_WARN_ON_ONCE(>_to_xe(gt)->drm, timeout_base_ms > 1); preempt_disable(); - atomic = true; - ret = wait_for_atomic(COND, 50); - atomic = false; + ret = xe_pcode_try_request(gt, mbox, request, reply_mask, reply, &status, + true, timeout_base_ms * 1000); preempt_enable(); out: mutex_unlock(>->pcode.lock); return status ? status : ret; -#undef COND } /** * xe_pcode_init_min_freq_table - Initialize PCODE's QOS frequency table @@ -243,16 +232,6 @@ unlock: return ret; } -static bool pcode_dgfx_status_complete(struct xe_gt *gt) -{ - u32 data = DGFX_GET_INIT_STATUS; - int status = pcode_mailbox_rw(gt, DGFX_PCODE_STATUS, - &data, NULL, 1, true, false); - - return status == 0 && - (data & DGFX_INIT_STATUS_COMPLETE) == DGFX_INIT_STATUS_COMPLETE; -} - /** * xe_pcode_init - Ensure PCODE is initialized * @gt: gt instance @@ -264,20 +243,23 @@ static bool pcode_dgfx_status_complete(struct xe_gt *gt) */ int xe_pcode_init(struct xe_gt *gt) { - int timeout = 180000; /* 3 min */ + u32 status, request = DGFX_GET_INIT_STATUS; + int timeout_us = 180000000; /* 3 min */ int ret; if (!IS_DGFX(gt_to_xe(gt))) return 0; mutex_lock(>->pcode.lock); - ret = wait_for(pcode_dgfx_status_complete(gt), timeout); + ret = xe_pcode_try_request(gt, DGFX_PCODE_STATUS, request, + DGFX_INIT_STATUS_COMPLETE, + DGFX_INIT_STATUS_COMPLETE, + &status, false, timeout_us); mutex_unlock(>->pcode.lock); if (ret) drm_err(>_to_xe(gt)->drm, - "PCODE initialization timedout after: %d min\n", - timeout / 60000); + "PCODE initialization timedout after: 3 min\n"); return ret; } diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index bbb931bc19ce..cd264cf50d30 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -352,7 +352,8 @@ static int uc_fw_xfer(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags) _MASKED_BIT_ENABLE(dma_flags | START_DMA)); /* Wait for DMA to finish */ - ret = xe_mmio_wait32(gt, DMA_CTRL.reg, 0, START_DMA, 100000, &dma_ctrl); + ret = xe_mmio_wait32(gt, DMA_CTRL.reg, 0, START_DMA, 100000, &dma_ctrl, + false); if (ret) drm_err(&xe->drm, "DMA for %s fw failed, DMA_CTRL=%u\n", xe_uc_fw_type_repr(uc_fw->type), dma_ctrl); -- cgit v1.2.3 From e9d285ff9d4998d20790395adc8a62f283bdb72b Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Thu, 12 Jan 2023 17:25:13 -0500 Subject: drm/xe/migrate: Add kerneldoc for the migrate subsystem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add kerneldoc for structs and external functions. Signed-off-by: Thomas Hellström Cc: Matthew Brost Signed-off-by: Rodrigo Vivi Reviewed-by: Mauro Carvalho Chehab --- drivers/gpu/drm/xe/xe_migrate.c | 108 +++++++++++++++++++++++++++++++++++++++- drivers/gpu/drm/xe/xe_migrate.h | 16 +++++- 2 files changed, 120 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 7fc40e8009c3..d3fa7bec78d3 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -27,16 +27,37 @@ #include "gt/intel_gpu_commands.h" +/** + * struct xe_migrate - migrate context. + */ struct xe_migrate { + /** @eng: Default engine used for migration */ struct xe_engine *eng; + /** @gt: Backpointer to the gt this struct xe_migrate belongs to. */ struct xe_gt *gt; + /** @job_mutex: Timeline mutex for @eng. */ struct mutex job_mutex; + /** @pt_bo: Page-table buffer object. */ struct xe_bo *pt_bo; + /** + * @cleared_bo: Zeroed out bo used as a source for CCS metadata clears + */ struct xe_bo *cleared_bo; + /** @batch_base_ofs: VM offset of the migration batch buffer */ u64 batch_base_ofs; + /** @usm_batch_base_ofs: VM offset of the usm batch buffer */ u64 usm_batch_base_ofs; + /** @cleared_vram_ofs: VM offset of @cleared_bo. */ u64 cleared_vram_ofs; + /** + * @fence: dma-fence representing the last migration job batch. + * Protected by @job_mutex. + */ struct dma_fence *fence; + /** + * @vm_update_sa: For integrated, used to suballocate page-tables + * out of the pt_bo. + */ struct drm_suballoc_manager vm_update_sa; }; @@ -45,6 +66,15 @@ struct xe_migrate { #define NUM_PT_SLOTS 32 #define NUM_PT_PER_BLIT (MAX_PREEMPTDISABLE_TRANSFER / SZ_2M) +/** + * xe_gt_migrate_engine() - Get this gt's migrate engine. + * @gt: The gt. + * + * Returns the default migrate engine of this gt. + * TODO: Perhaps this function is slightly misplaced, and even unneeded? + * + * Return: The default migrate engine + */ struct xe_engine *xe_gt_migrate_engine(struct xe_gt *gt) { return gt->migrate->eng; @@ -271,6 +301,12 @@ static int xe_migrate_prepare_vm(struct xe_gt *gt, struct xe_migrate *m, return 0; } +/** + * xe_migrate_init() - Initialize a migrate context + * @gt: Back-pointer to the gt we're initializing for. + * + * Return: Pointer to a migrate context on success. Error pointer on error. + */ struct xe_migrate *xe_migrate_init(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); @@ -540,6 +576,24 @@ static u32 xe_migrate_ccs_copy(struct xe_migrate *m, return flush_flags; } +/** + * xe_migrate_copy() - Copy content of TTM resources. + * @m: The migration context. + * @bo: The buffer object @src is currently bound to. + * @src: The source TTM resource. + * @dst: The dst TTM resource. + * + * Copies the contents of @src to @dst: On flat CCS devices, + * the CCS metadata is copied as well if needed, or if not present, + * the CCS metadata of @dst is cleared for security reasons. + * It's currently not possible to copy between two system resources, + * since that would require two TTM page-vectors. + * TODO: Eliminate the @bo argument and supply two TTM page-vectors. + * + * Return: Pointer to a dma_fence representing the last copy batch, or + * an error pointer on failure. If there is a failure, any copy operation + * started by the function call has been synced. + */ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, struct xe_bo *bo, struct ttm_resource *src, @@ -683,7 +737,7 @@ err: xe_bb_free(bb, NULL); err_sync: - /* Sync partial copy if any. */ + /* Sync partial copy if any. FIXME: under job_mutex? */ if (fence) { dma_fence_wait(fence, false); dma_fence_put(fence); @@ -733,6 +787,21 @@ static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, return 0; } +/** + * xe_migrate_clear() - Copy content of TTM resources. + * @m: The migration context. + * @bo: The buffer object @dst is currently bound to. + * @dst: The dst TTM resource to be cleared. + * @value: Clear value. + * + * Clear the contents of @dst. On flat CCS devices, + * the CCS metadata is cleared to zero as well on VRAM destionations. + * TODO: Eliminate the @bo argument. + * + * Return: Pointer to a dma_fence representing the last clear batch, or + * an error pointer on failure. If there is a failure, any clear operation + * started by the function call has been synced. + */ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, struct xe_bo *bo, struct ttm_resource *dst, @@ -836,7 +905,7 @@ err: mutex_unlock(&m->job_mutex); xe_bb_free(bb, NULL); err_sync: - /* Sync partial copies if any. */ + /* Sync partial copies if any. FIXME: job_mutex? */ if (fence) { dma_fence_wait(m->fence, false); dma_fence_put(fence); @@ -974,6 +1043,33 @@ static bool engine_is_idle(struct xe_engine *e) xe_lrc_seqno(&e->lrc[0]) == e->lrc[0].fence_ctx.next_seqno; } +/** + * xe_migrate_update_pgtables() - Pipelined page-table update + * @m: The migrate context. + * @vm: The vm we'll be updating. + * @bo: The bo whose dma-resv we will await before updating, or NULL if userptr. + * @eng: The engine to be used for the update or NULL if the default + * migration engine is to be used. + * @updates: An array of update descriptors. + * @num_updates: Number of descriptors in @updates. + * @syncs: Array of xe_sync_entry to await before updating. Note that waits + * will block the engine timeline. + * @num_syncs: Number of entries in @syncs. + * @pt_update: Pointer to a struct xe_migrate_pt_update, which contains + * pointers to callback functions and, if subclassed, private arguments to + * those. + * + * Perform a pipelined page-table update. The update descriptors are typically + * built under the same lock critical section as a call to this function. If + * using the default engine for the updates, they will be performed in the + * order they grab the job_mutex. If different engines are used, external + * synchronization is needed for overlapping updates to maintain page-table + * consistency. Note that the meaing of "overlapping" is that the updates + * touch the same page-table, which might be a higher-level page-directory. + * If no pipelining is needed, then updates may be performed by the cpu. + * + * Return: A dma_fence that, when signaled, indicates the update completion. + */ struct dma_fence * xe_migrate_update_pgtables(struct xe_migrate *m, struct xe_vm *vm, @@ -1157,6 +1253,14 @@ err: return ERR_PTR(err); } +/** + * xe_migrate_wait() - Complete all operations using the xe_migrate context + * @m: Migrate context to wait for. + * + * Waits until the GPU no longer uses the migrate context's default engine + * or its page-table objects. FIXME: What about separate page-table update + * engines? + */ void xe_migrate_wait(struct xe_migrate *m) { if (m->fence) diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h index 267057a3847f..b2d55283252f 100644 --- a/drivers/gpu/drm/xe/xe_migrate.h +++ b/drivers/gpu/drm/xe/xe_migrate.h @@ -23,9 +23,13 @@ struct xe_vm; struct xe_vm_pgtable_update; struct xe_vma; +/** + * struct xe_migrate_pt_update_ops - Callbacks for the + * xe_migrate_update_pgtables() function. + */ struct xe_migrate_pt_update_ops { /** - * populate() - Populate a command buffer or page-table with ptes. + * @populate: Populate a command buffer or page-table with ptes. * @pt_update: Embeddable callback argument. * @gt: The gt for the current operation. * @map: struct iosys_map into the memory to be populated. @@ -44,7 +48,7 @@ struct xe_migrate_pt_update_ops { const struct xe_vm_pgtable_update *update); /** - * pre_commit(): Callback to be called just before arming the + * @pre_commit: Callback to be called just before arming the * sched_job. * @pt_update: Pointer to embeddable callback argument. * @@ -53,8 +57,16 @@ struct xe_migrate_pt_update_ops { int (*pre_commit)(struct xe_migrate_pt_update *pt_update); }; +/** + * struct xe_migrate_pt_update - Argument to the + * struct xe_migrate_pt_update_ops callbacks. + * + * Intended to be subclassed to support additional arguments if necessary. + */ struct xe_migrate_pt_update { + /** @ops: Pointer to the struct xe_migrate_pt_update_ops callbacks */ const struct xe_migrate_pt_update_ops *ops; + /** @vma: The vma we're updating the pagetable for. */ struct xe_vma *vma; }; -- cgit v1.2.3 From 765b65e5bde79a9e8332c58f54a98e20fdb25fc7 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 12 Jan 2023 17:25:14 -0500 Subject: drm/xe: Take memory ref on kernel job creation When a job is inflight we may access memory to read the hardware seqno. All user jobs have VM open which has a ref but kernel jobs do not require VM so it is possible to not have memory ref. To avoid this, take a memory ref on kernel job creation. Signed-off-by: Matthew Brost Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_sched_job.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c index ab81bfe17e8a..d9add0370a98 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.c +++ b/drivers/gpu/drm/xe/xe_sched_job.c @@ -8,7 +8,7 @@ #include #include -#include "xe_device_types.h" +#include "xe_device.h" #include "xe_engine.h" #include "xe_gt.h" #include "xe_hw_engine_types.h" @@ -72,6 +72,11 @@ static void job_free(struct xe_sched_job *job) xe_sched_job_parallel_slab : xe_sched_job_slab, job); } +static struct xe_device *job_to_xe(struct xe_sched_job *job) +{ + return gt_to_xe(job->engine->gt); +} + struct xe_sched_job *xe_sched_job_create(struct xe_engine *e, u64 *batch_addr) { @@ -149,6 +154,11 @@ struct xe_sched_job *xe_sched_job_create(struct xe_engine *e, for (i = 0; i < width; ++i) job->batch_addr[i] = batch_addr[i]; + /* All other jobs require a VM to be open which has a ref */ + if (unlikely(e->flags & ENGINE_FLAG_KERNEL)) + xe_device_mem_access_get(job_to_xe(job)); + xe_device_assert_mem_access(job_to_xe(job)); + trace_xe_sched_job_create(job); return job; @@ -178,6 +188,8 @@ void xe_sched_job_destroy(struct kref *ref) struct xe_sched_job *job = container_of(ref, struct xe_sched_job, refcount); + if (unlikely(job->engine->flags & ENGINE_FLAG_KERNEL)) + xe_device_mem_access_put(job_to_xe(job)); xe_engine_put(job->engine); dma_fence_put(job->fence); drm_sched_job_cleanup(&job->drm); -- cgit v1.2.3 From d8b52a02cb40fe355374e8b0b89763fefc697b53 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 12 Jan 2023 17:25:17 -0500 Subject: drm/xe: Implement stolen memory. This adds support for stolen memory, with the same allocator as vram_mgr. This allows us to skip a whole lot of copy-paste, by re-using parts of xe_ttm_vram_mgr. The stolen memory may be bound using VM_BIND, so it performs like any other memory region. We should be able to map a stolen BO directly using the physical memory location instead of through GGTT even on old platforms, but I don't know what the effects are on coherency. Signed-off-by: Maarten Lankhorst Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/xe_bo.c | 173 +++++++++++++++++--- drivers/gpu/drm/xe/xe_bo.h | 16 +- drivers/gpu/drm/xe/xe_debugfs.c | 4 + drivers/gpu/drm/xe/xe_device.c | 4 + drivers/gpu/drm/xe/xe_mmio.c | 58 ++++--- drivers/gpu/drm/xe/xe_mmio.h | 1 + drivers/gpu/drm/xe/xe_pt.c | 5 +- drivers/gpu/drm/xe/xe_res_cursor.h | 47 ++++-- drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 244 +++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h | 21 +++ drivers/gpu/drm/xe/xe_ttm_vram_mgr.c | 91 +++++------ drivers/gpu/drm/xe/xe_ttm_vram_mgr.h | 18 +-- drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h | 2 + 14 files changed, 560 insertions(+), 125 deletions(-) create mode 100644 drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c create mode 100644 drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 228a87f2fe7b..f8da32b550bc 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -93,6 +93,7 @@ xe-y += xe_bb.o \ xe_sync.o \ xe_trace.o \ xe_ttm_gtt_mgr.o \ + xe_ttm_stolen_mgr.o \ xe_ttm_vram_mgr.o \ xe_tuning.o \ xe_uc.o \ diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index ef2c9196c113..f07d1cd63fdd 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -24,6 +24,7 @@ #include "xe_preempt_fence.h" #include "xe_res_cursor.h" #include "xe_trace.h" +#include "xe_ttm_stolen_mgr.h" #include "xe_vm.h" static const struct ttm_place sys_placement_flags = { @@ -42,7 +43,12 @@ static struct ttm_placement sys_placement = { bool mem_type_is_vram(u32 mem_type) { - return mem_type >= XE_PL_VRAM0; + return mem_type >= XE_PL_VRAM0 && mem_type != XE_PL_STOLEN; +} + +static bool resource_is_stolen_vram(struct xe_device *xe, struct ttm_resource *res) +{ + return res->mem_type == XE_PL_STOLEN && IS_DGFX(xe); } static bool resource_is_vram(struct ttm_resource *res) @@ -52,7 +58,13 @@ static bool resource_is_vram(struct ttm_resource *res) bool xe_bo_is_vram(struct xe_bo *bo) { - return resource_is_vram(bo->ttm.resource); + return resource_is_vram(bo->ttm.resource) || + resource_is_stolen_vram(xe_bo_device(bo), bo->ttm.resource); +} + +bool xe_bo_is_stolen(struct xe_bo *bo) +{ + return bo->ttm.resource->mem_type == XE_PL_STOLEN; } static bool xe_bo_is_user(struct xe_bo *bo) @@ -63,9 +75,9 @@ static bool xe_bo_is_user(struct xe_bo *bo) static struct xe_gt * mem_type_to_gt(struct xe_device *xe, u32 mem_type) { - XE_BUG_ON(!mem_type_is_vram(mem_type)); + XE_BUG_ON(mem_type != XE_PL_STOLEN && !mem_type_is_vram(mem_type)); - return xe_device_get_gt(xe, mem_type - XE_PL_VRAM0); + return xe_device_get_gt(xe, mem_type == XE_PL_STOLEN ? 0 : (mem_type - XE_PL_VRAM0)); } static void try_add_system(struct xe_bo *bo, struct ttm_place *places, @@ -134,6 +146,20 @@ static void try_add_vram1(struct xe_device *xe, struct xe_bo *bo, } } +static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo, + struct ttm_place *places, u32 bo_flags, u32 *c) +{ + if (bo_flags & XE_BO_CREATE_STOLEN_BIT) { + places[*c] = (struct ttm_place) { + .mem_type = XE_PL_STOLEN, + .flags = bo_flags & (XE_BO_CREATE_PINNED_BIT | + XE_BO_CREATE_GGTT_BIT) ? + TTM_PL_FLAG_CONTIGUOUS : 0, + }; + *c += 1; + } +} + static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo, u32 bo_flags) { @@ -162,6 +188,7 @@ static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo, try_add_vram1(xe, bo, places, bo_flags, &c); try_add_system(bo, places, bo_flags, &c); } + try_add_stolen(xe, bo, places, bo_flags, &c); if (!c) return -EINVAL; @@ -209,6 +236,7 @@ static void xe_evict_flags(struct ttm_buffer_object *tbo, switch (tbo->resource->mem_type) { case XE_PL_VRAM0: case XE_PL_VRAM1: + case XE_PL_STOLEN: case XE_PL_TT: default: /* for now kick out to system */ @@ -362,11 +390,12 @@ static int xe_ttm_io_mem_reserve(struct ttm_device *bdev, #if !defined(CONFIG_X86) mem->bus.caching = ttm_write_combined; #endif - break; + return 0; + case XE_PL_STOLEN: + return xe_ttm_stolen_io_mem_reserve(xe, mem); default: return -EINVAL; } - return 0; } static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo, @@ -673,14 +702,18 @@ out: } -static unsigned long xe_ttm_io_mem_pfn(struct ttm_buffer_object *bo, +static unsigned long xe_ttm_io_mem_pfn(struct ttm_buffer_object *ttm_bo, unsigned long page_offset) { - struct xe_device *xe = ttm_to_xe_device(bo->bdev); - struct xe_gt *gt = mem_type_to_gt(xe, bo->resource->mem_type); + struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev); + struct xe_bo *bo = ttm_to_xe_bo(ttm_bo); + struct xe_gt *gt = mem_type_to_gt(xe, ttm_bo->resource->mem_type); struct xe_res_cursor cursor; - xe_res_first(bo->resource, (u64)page_offset << PAGE_SHIFT, 0, &cursor); + if (ttm_bo->resource->mem_type == XE_PL_STOLEN) + return xe_ttm_stolen_io_offset(bo, page_offset << PAGE_SHIFT) >> PAGE_SHIFT; + + xe_res_first(ttm_bo->resource, (u64)page_offset << PAGE_SHIFT, 0, &cursor); return (gt->mem.vram.io_start + cursor.start) >> PAGE_SHIFT; } @@ -945,7 +978,8 @@ struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, return bo; } - if (flags & (XE_BO_CREATE_VRAM0_BIT | XE_BO_CREATE_VRAM1_BIT) && + if (flags & (XE_BO_CREATE_VRAM0_BIT | XE_BO_CREATE_VRAM1_BIT | + XE_BO_CREATE_STOLEN_BIT) && !(flags & XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) { size = ALIGN(size, SZ_64K); @@ -973,9 +1007,11 @@ struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, ctx.resv = resv; } - err = __xe_bo_placement_for_flags(xe, bo, bo->flags); - if (WARN_ON(err)) - return ERR_PTR(err); + if (!(flags & XE_BO_FIXED_PLACEMENT_BIT)) { + err = __xe_bo_placement_for_flags(xe, bo, bo->flags); + if (WARN_ON(err)) + return ERR_PTR(err); + } /* Defer populating type_sg bos */ placement = (type == ttm_bo_type_sg || @@ -993,16 +1029,73 @@ struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, return bo; } -struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_gt *gt, - struct xe_vm *vm, size_t size, - enum ttm_bo_type type, u32 flags) +static int __xe_bo_fixed_placement(struct xe_device *xe, + struct xe_bo *bo, + u32 flags, + u64 start, u64 end, u64 size) { - struct xe_bo *bo; + struct ttm_place *place = bo->placements; + + if (flags & (XE_BO_CREATE_USER_BIT|XE_BO_CREATE_SYSTEM_BIT)) + return -EINVAL; + + place->flags = TTM_PL_FLAG_CONTIGUOUS; + place->fpfn = start >> PAGE_SHIFT; + place->lpfn = end >> PAGE_SHIFT; + + switch (flags & (XE_BO_CREATE_STOLEN_BIT | + XE_BO_CREATE_VRAM0_BIT |XE_BO_CREATE_VRAM1_BIT)) { + case XE_BO_CREATE_VRAM0_BIT: + place->mem_type = XE_PL_VRAM0; + break; + case XE_BO_CREATE_VRAM1_BIT: + place->mem_type = XE_PL_VRAM1; + break; + case XE_BO_CREATE_STOLEN_BIT: + place->mem_type = XE_PL_STOLEN; + break; + + default: + /* 0 or multiple of the above set */ + return -EINVAL; + } + + bo->placement = (struct ttm_placement) { + .num_placement = 1, + .placement = place, + .num_busy_placement = 1, + .busy_placement = place, + }; + + return 0; +} + +struct xe_bo * +xe_bo_create_locked_range(struct xe_device *xe, + struct xe_gt *gt, struct xe_vm *vm, + size_t size, u64 start, u64 end, + enum ttm_bo_type type, u32 flags) +{ + struct xe_bo *bo = NULL; int err; if (vm) xe_vm_assert_held(vm); - bo = __xe_bo_create_locked(xe, NULL, gt, vm ? &vm->resv : NULL, size, + + if (start || end != ~0ULL) { + bo = xe_bo_alloc(); + if (IS_ERR(bo)) + return bo; + + flags |= XE_BO_FIXED_PLACEMENT_BIT; + err = __xe_bo_fixed_placement(xe, bo, flags, start, end, size); + if (err) { + xe_bo_free(bo); + return ERR_PTR(err); + } + } + + bo = __xe_bo_create_locked(xe, bo, gt, vm ? &vm->resv : NULL, size, type, flags); if (IS_ERR(bo)) return bo; @@ -1011,7 +1104,10 @@ struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_gt *gt, xe_vm_get(vm); bo->vm = vm; - if (flags & XE_BO_CREATE_GGTT_BIT) { + if (bo->flags & XE_BO_CREATE_GGTT_BIT) { + if (!gt && flags & XE_BO_CREATE_STOLEN_BIT) + gt = xe_device_get_gt(xe, 0); + XE_BUG_ON(!gt); err = xe_ggtt_insert_bo(gt->mem.ggtt, bo); @@ -1027,6 +1123,13 @@ err_unlock_put_bo: return ERR_PTR(err); } +struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_gt *gt, + struct xe_vm *vm, size_t size, + enum ttm_bo_type type, u32 flags) +{ + return xe_bo_create_locked_range(xe, gt, vm, size, 0, ~0ULL, type, flags); +} + struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_gt *gt, struct xe_vm *vm, size_t size, enum ttm_bo_type type, u32 flags) @@ -1039,13 +1142,21 @@ struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_gt *gt, return bo; } -struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_gt *gt, - struct xe_vm *vm, size_t size, - enum ttm_bo_type type, u32 flags) +struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_gt *gt, + struct xe_vm *vm, + size_t size, u64 offset, + enum ttm_bo_type type, u32 flags) { - struct xe_bo *bo = xe_bo_create_locked(xe, gt, vm, size, type, flags); + struct xe_bo *bo; int err; + u64 start = offset == ~0ull ? 0 : offset; + u64 end = offset == ~0ull ? offset : start + size; + + if (flags & XE_BO_CREATE_STOLEN_BIT && + xe_ttm_stolen_inaccessible(xe)) + flags |= XE_BO_CREATE_GGTT_BIT; + bo = xe_bo_create_locked_range(xe, gt, vm, size, start, end, type, flags); if (IS_ERR(bo)) return bo; @@ -1069,6 +1180,13 @@ err_put: return ERR_PTR(err); } +struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_gt *gt, + struct xe_vm *vm, size_t size, + enum ttm_bo_type type, u32 flags) +{ + return xe_bo_create_pin_map_at(xe, gt, vm, size, ~0ull, type, flags); +} + struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_gt *gt, const void *data, size_t size, enum ttm_bo_type type, u32 flags) @@ -1093,6 +1211,9 @@ static uint64_t vram_region_io_offset(struct xe_bo *bo) struct xe_device *xe = xe_bo_device(bo); struct xe_gt *gt = mem_type_to_gt(xe, bo->ttm.resource->mem_type); + if (bo->ttm.resource->mem_type == XE_PL_STOLEN) + return xe_ttm_stolen_gpu_offset(xe); + return gt->mem.vram.io_start - xe->mem.vram.io_start; } @@ -1174,7 +1295,7 @@ int xe_bo_pin(struct xe_bo *bo) bool lmem; XE_BUG_ON(!(place->flags & TTM_PL_FLAG_CONTIGUOUS)); - XE_BUG_ON(!mem_type_is_vram(place->mem_type)); + XE_BUG_ON(!mem_type_is_vram(place->mem_type) && place->mem_type != XE_PL_STOLEN); place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE, &lmem) - vram_region_io_offset(bo)) >> PAGE_SHIFT; @@ -1305,7 +1426,7 @@ dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, *is_lmem = xe_bo_is_vram(bo); - if (!*is_lmem) { + if (!*is_lmem && !xe_bo_is_stolen(bo)) { XE_BUG_ON(!bo->ttm.ttm); xe_res_first_sg(xe_bo_get_sg(bo), page << PAGE_SHIFT, diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 1a49c0a3c4c6..8d8a3332dbc8 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -12,8 +12,9 @@ #define XE_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */ -#define XE_BO_CREATE_USER_BIT BIT(1) -#define XE_BO_CREATE_SYSTEM_BIT BIT(2) +#define XE_BO_CREATE_USER_BIT BIT(0) +#define XE_BO_CREATE_SYSTEM_BIT BIT(1) +#define XE_BO_CREATE_STOLEN_BIT BIT(2) #define XE_BO_CREATE_VRAM0_BIT BIT(3) #define XE_BO_CREATE_VRAM1_BIT BIT(4) #define XE_BO_CREATE_VRAM_IF_DGFX(gt) \ @@ -24,6 +25,7 @@ #define XE_BO_CREATE_PINNED_BIT BIT(7) #define XE_BO_DEFER_BACKING BIT(8) #define XE_BO_SCANOUT_BIT BIT(9) +#define XE_BO_FIXED_PLACEMENT_BIT BIT(10) /* this one is trigger internally only */ #define XE_BO_INTERNAL_TEST BIT(30) #define XE_BO_INTERNAL_64K BIT(31) @@ -64,6 +66,7 @@ #define XE_PL_TT TTM_PL_TT #define XE_PL_VRAM0 TTM_PL_VRAM #define XE_PL_VRAM1 (XE_PL_VRAM0 + 1) +#define XE_PL_STOLEN (TTM_NUM_MEM_TYPES - 1) #define XE_BO_PROPS_INVALID (-1) @@ -76,6 +79,11 @@ struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, struct xe_gt *gt, struct dma_resv *resv, size_t size, enum ttm_bo_type type, u32 flags); +struct xe_bo * +xe_bo_create_locked_range(struct xe_device *xe, + struct xe_gt *gt, struct xe_vm *vm, + size_t size, u64 start, u64 end, + enum ttm_bo_type type, u32 flags); struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_gt *gt, struct xe_vm *vm, size_t size, enum ttm_bo_type type, u32 flags); @@ -85,6 +93,9 @@ struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_gt *gt, struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_gt *gt, struct xe_vm *vm, size_t size, enum ttm_bo_type type, u32 flags); +struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_gt *gt, + struct xe_vm *vm, size_t size, u64 offset, + enum ttm_bo_type type, u32 flags); struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_gt *gt, const void *data, size_t size, enum ttm_bo_type type, u32 flags); @@ -206,6 +217,7 @@ void xe_bo_vunmap(struct xe_bo *bo); bool mem_type_is_vram(u32 mem_type); bool xe_bo_is_vram(struct xe_bo *bo); +bool xe_bo_is_stolen(struct xe_bo *bo); bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type); diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c index 84db7b3f501e..b0f8b157ffa3 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.c +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -124,6 +124,10 @@ void xe_debugfs_register(struct xe_device *xe) man = ttm_manager_type(bdev, XE_PL_TT); ttm_resource_manager_create_debugfs(man, root, "gtt_mm"); + man = ttm_manager_type(bdev, XE_PL_STOLEN); + if (man) + ttm_resource_manager_create_debugfs(man, root, "stolen_mm"); + for_each_gt(gt, xe, id) xe_gt_debugfs_register(gt); } diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 93dea2b9c464..104ab12cc2ed 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -25,6 +25,7 @@ #include "xe_pcode.h" #include "xe_pm.h" #include "xe_query.h" +#include "xe_ttm_stolen_mgr.h" #include "xe_vm.h" #include "xe_vm_madvise.h" #include "xe_wait_user_fence.h" @@ -256,6 +257,9 @@ int xe_device_probe(struct xe_device *xe) goto err_irq_shutdown; } + /* Allocate and map stolen after potential VRAM resize */ + xe_ttm_stolen_mgr_init(xe); + for_each_gt(gt, xe, id) { err = xe_gt_init(gt); if (err) diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 42e2405f2f48..54c9362a3050 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -150,6 +150,38 @@ static bool xe_pci_resource_valid(struct pci_dev *pdev, int bar) return true; } +int xe_mmio_total_vram_size(struct xe_device *xe, u64 *vram_size, u64 *flat_ccs_base) +{ + struct xe_gt *gt = xe_device_get_gt(xe, 0); + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + int err; + u32 reg; + + if (!xe->info.has_flat_ccs) { + *vram_size = pci_resource_len(pdev, GEN12_LMEM_BAR); + if (flat_ccs_base) + *flat_ccs_base = *vram_size; + return 0; + } + + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (err) + return err; + + reg = xe_gt_mcr_unicast_read_any(gt, XEHP_TILE0_ADDR_RANGE); + *vram_size = (u64)REG_FIELD_GET(GENMASK(14, 8), reg) * SZ_1G; + if (flat_ccs_base) { + reg = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR); + *flat_ccs_base = (u64)REG_FIELD_GET(GENMASK(31, 8), reg) * SZ_64K; + } + + if (flat_ccs_base) + drm_info(&xe->drm, "lmem_size: 0x%llx flat_ccs_base: 0x%llx\n", + *vram_size, *flat_ccs_base); + + return xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); +} + int xe_mmio_probe_vram(struct xe_device *xe) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); @@ -159,7 +191,7 @@ int xe_mmio_probe_vram(struct xe_device *xe) u64 original_size; u64 current_size; u64 flat_ccs_base; - int resize_result; + int resize_result, err; if (!IS_DGFX(xe)) { xe->mem.vram.mapping = 0; @@ -184,27 +216,9 @@ int xe_mmio_probe_vram(struct xe_device *xe) original_size = pci_resource_len(pdev, GEN12_LMEM_BAR); - if (xe->info.has_flat_ccs) { - int err; - u32 reg; - - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (err) - return err; - reg = xe_gt_mcr_unicast_read_any(gt, XEHP_TILE0_ADDR_RANGE); - lmem_size = (u64)REG_FIELD_GET(GENMASK(14, 8), reg) * SZ_1G; - reg = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR); - flat_ccs_base = (u64)REG_FIELD_GET(GENMASK(31, 8), reg) * SZ_64K; - - drm_info(&xe->drm, "lmem_size: 0x%llx flat_ccs_base: 0x%llx\n", - lmem_size, flat_ccs_base); - - err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); - if (err) - return err; - } else { - flat_ccs_base = lmem_size; - } + err = xe_mmio_total_vram_size(xe, &lmem_size, &flat_ccs_base); + if (err) + return err; resize_result = xe_resize_lmem_bar(xe, lmem_size); current_size = pci_resource_len(pdev, GEN12_LMEM_BAR); diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h index adc7d7484afb..a3b144553873 100644 --- a/drivers/gpu/drm/xe/xe_mmio.h +++ b/drivers/gpu/drm/xe/xe_mmio.h @@ -130,5 +130,6 @@ static inline bool xe_mmio_in_range(const struct xe_mmio_range *range, u32 reg) } int xe_mmio_probe_vram(struct xe_device *xe); +int xe_mmio_total_vram_size(struct xe_device *xe, u64 *vram_size, u64 *flat_ccs_base); #endif diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 81193ddd0af7..45850184650c 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -12,6 +12,7 @@ #include "xe_pt_walk.h" #include "xe_vm.h" #include "xe_res_cursor.h" +#include "xe_ttm_stolen_mgr.h" struct xe_pt_dir { struct xe_pt pt; @@ -756,12 +757,14 @@ xe_pt_stage_bind(struct xe_gt *gt, struct xe_vma *vma, else xe_walk.cache = XE_CACHE_WB; } + if (xe_bo_is_stolen(bo)) + xe_walk.dma_offset = xe_ttm_stolen_gpu_offset(xe_bo_device(bo)); xe_bo_assert_held(bo); if (xe_vma_is_userptr(vma)) xe_res_first_sg(vma->userptr.sg, 0, vma->end - vma->start + 1, &curs); - else if (xe_bo_is_vram(bo)) + else if (xe_bo_is_vram(bo) || xe_bo_is_stolen(bo)) xe_res_first(bo->ttm.resource, vma->bo_offset, vma->end - vma->start + 1, &curs); else diff --git a/drivers/gpu/drm/xe/xe_res_cursor.h b/drivers/gpu/drm/xe/xe_res_cursor.h index f54409850d74..365c8ad7aeb8 100644 --- a/drivers/gpu/drm/xe/xe_res_cursor.h +++ b/drivers/gpu/drm/xe/xe_res_cursor.h @@ -33,10 +33,11 @@ #include #include "xe_bo.h" +#include "xe_device.h" #include "xe_macros.h" #include "xe_ttm_vram_mgr.h" -/* state back for walking over vram_mgr and gtt_mgr allocations */ +/* state back for walking over vram_mgr, stolen_mgr, and gtt_mgr allocations */ struct xe_res_cursor { u64 start; u64 size; @@ -44,8 +45,23 @@ struct xe_res_cursor { void *node; u32 mem_type; struct scatterlist *sgl; + struct drm_buddy *mm; }; +static struct drm_buddy *xe_res_get_buddy(struct ttm_resource *res) +{ + struct xe_device *xe = ttm_to_xe_device(res->bo->bdev); + + if (res->mem_type != XE_PL_STOLEN) { + return &xe_device_get_gt(xe, res->mem_type - XE_PL_VRAM0)->mem.vram_mgr->mm; + } else { + struct ttm_resource_manager *mgr = + ttm_manager_type(&xe->ttm, XE_PL_STOLEN); + + return &to_xe_ttm_vram_mgr(mgr)->mm; + } +} + /** * xe_res_first - initialize a xe_res_cursor * @@ -60,9 +76,6 @@ static inline void xe_res_first(struct ttm_resource *res, u64 start, u64 size, struct xe_res_cursor *cur) { - struct drm_buddy_block *block; - struct list_head *head, *next; - cur->sgl = NULL; if (!res) goto fallback; @@ -72,8 +85,13 @@ static inline void xe_res_first(struct ttm_resource *res, cur->mem_type = res->mem_type; switch (cur->mem_type) { + case XE_PL_STOLEN: case XE_PL_VRAM0: - case XE_PL_VRAM1: + case XE_PL_VRAM1: { + struct drm_buddy_block *block; + struct list_head *head, *next; + struct drm_buddy *mm = xe_res_get_buddy(res); + head = &to_xe_ttm_vram_mgr_resource(res)->blocks; block = list_first_entry_or_null(head, @@ -82,8 +100,8 @@ static inline void xe_res_first(struct ttm_resource *res, if (!block) goto fallback; - while (start >= xe_ttm_vram_mgr_block_size(block)) { - start -= xe_ttm_vram_mgr_block_size(block); + while (start >= drm_buddy_block_size(mm, block)) { + start -= drm_buddy_block_size(mm, block); next = block->link.next; if (next != head) @@ -91,12 +109,14 @@ static inline void xe_res_first(struct ttm_resource *res, link); } - cur->start = xe_ttm_vram_mgr_block_start(block) + start; - cur->size = min(xe_ttm_vram_mgr_block_size(block) - start, + cur->mm = mm; + cur->start = drm_buddy_block_offset(block) + start; + cur->size = min(drm_buddy_block_size(mm, block) - start, size); cur->remaining = size; cur->node = block; break; + } default: goto fallback; } @@ -188,6 +208,7 @@ static inline void xe_res_next(struct xe_res_cursor *cur, u64 size) } switch (cur->mem_type) { + case XE_PL_STOLEN: case XE_PL_VRAM0: case XE_PL_VRAM1: start = size - cur->size; @@ -197,15 +218,15 @@ static inline void xe_res_next(struct xe_res_cursor *cur, u64 size) block = list_entry(next, struct drm_buddy_block, link); - while (start >= xe_ttm_vram_mgr_block_size(block)) { - start -= xe_ttm_vram_mgr_block_size(block); + while (start >= drm_buddy_block_size(cur->mm, block)) { + start -= drm_buddy_block_size(cur->mm, block); next = block->link.next; block = list_entry(next, struct drm_buddy_block, link); } - cur->start = xe_ttm_vram_mgr_block_start(block) + start; - cur->size = min(xe_ttm_vram_mgr_block_size(block) - start, + cur->start = drm_buddy_block_offset(block) + start; + cur->size = min(drm_buddy_block_size(cur->mm, block) - start, cur->remaining); cur->node = block; break; diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c new file mode 100644 index 000000000000..21ca7f79e63b --- /dev/null +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -0,0 +1,244 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021-2022 Intel Corporation + * Copyright (C) 2021-2002 Red Hat + */ + +#include +#include + +#include +#include +#include + +#include "../i915/i915_reg.h" + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_mmio.h" +#include "xe_res_cursor.h" +#include "xe_ttm_stolen_mgr.h" +#include "xe_ttm_vram_mgr.h" + +bool xe_ttm_stolen_inaccessible(struct xe_device *xe) +{ + return !IS_DGFX(xe) && GRAPHICS_VERx100(xe) < 1270; +} + +struct xe_ttm_stolen_mgr { + struct xe_ttm_vram_mgr base; + + /* PCI base offset */ + resource_size_t io_base; + /* GPU base offset */ + resource_size_t stolen_base; + + void *__iomem mapping; +}; + +static inline struct xe_ttm_stolen_mgr * +to_stolen_mgr(struct ttm_resource_manager *man) +{ + return container_of(man, struct xe_ttm_stolen_mgr, base.manager); +} + +static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + struct xe_gt *gt = to_gt(xe); + u64 vram_size, stolen_size; + int err; + + err = xe_mmio_total_vram_size(xe, &vram_size, NULL); + if (err) { + drm_info(&xe->drm, "Querying total vram size failed\n"); + return 0; + } + + /* Use DSM base address instead for stolen memory */ + mgr->stolen_base = xe_mmio_read64(gt, GEN12_DSMBASE.reg) & GEN12_BDSM_MASK; + if (drm_WARN_ON(&xe->drm, vram_size < mgr->stolen_base)) + return 0; + + stolen_size = vram_size - mgr->stolen_base; + if (mgr->stolen_base + stolen_size <= pci_resource_len(pdev, 2)) + mgr->io_base = pci_resource_start(pdev, 2) + mgr->stolen_base; + + return stolen_size; +} + +static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + u32 stolen_size; + u32 ggc, gms; + + ggc = xe_mmio_read32(to_gt(xe), GGC.reg); + + /* check GGMS, should be fixed 0x3 (8MB) */ + if (drm_WARN_ON(&xe->drm, (ggc & GGMS_MASK) != GGMS_MASK)) + return 0; + + mgr->stolen_base = mgr->io_base = pci_resource_start(pdev, 2) + SZ_8M; + + /* return valid GMS value, -EIO if invalid */ + gms = REG_FIELD_GET(GMS_MASK, ggc); + switch (gms) { + case 0x0 ... 0x04: + stolen_size = gms * 32 * SZ_1M; + break; + case 0xf0 ... 0xfe: + stolen_size = (gms - 0xf0 + 1) * 4 * SZ_1M; + break; + default: + return 0; + } + + if (drm_WARN_ON(&xe->drm, stolen_size + SZ_8M > pci_resource_len(pdev, 2))) + return 0; + + return stolen_size; +} + +extern struct resource intel_graphics_stolen_res; + +static u64 detect_stolen(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) +{ +#ifdef CONFIG_X86 + /* Map into GGTT */ + mgr->io_base = pci_resource_start(to_pci_dev(xe->drm.dev), 2); + + /* Stolen memory is x86 only */ + mgr->stolen_base = intel_graphics_stolen_res.start; + return resource_size(&intel_graphics_stolen_res); +#else + return 0; +#endif +} + +void xe_ttm_stolen_mgr_init(struct xe_device *xe) +{ + struct xe_ttm_stolen_mgr *mgr = drmm_kzalloc(&xe->drm, sizeof(*mgr), GFP_KERNEL); + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + u64 stolen_size, pgsize; + int err; + + if (IS_DGFX(xe)) + stolen_size = detect_bar2_dgfx(xe, mgr); + else if (!xe_ttm_stolen_inaccessible(xe)) + stolen_size = detect_bar2_integrated(xe, mgr); + else + stolen_size = detect_stolen(xe, mgr); + + if (!stolen_size) { + drm_dbg_kms(&xe->drm, "No stolen memory support\n"); + return; + } + + pgsize = xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K; + if (pgsize < PAGE_SIZE) + pgsize = PAGE_SIZE; + + + err = __xe_ttm_vram_mgr_init(xe, &mgr->base, XE_PL_STOLEN, stolen_size, pgsize); + if (err) { + drm_dbg_kms(&xe->drm, "Stolen mgr init failed: %i\n", err); + return; + } + + drm_dbg_kms(&xe->drm, "Initialized stolen memory support with %llu bytes\n", + stolen_size); + + if (!xe_ttm_stolen_inaccessible(xe)) + mgr->mapping = devm_ioremap_wc(&pdev->dev, mgr->io_base, stolen_size); +} + +u64 xe_ttm_stolen_io_offset(struct xe_bo *bo, u32 offset) +{ + struct xe_device *xe = xe_bo_device(bo); + struct ttm_resource_manager *ttm_mgr = ttm_manager_type(&xe->ttm, XE_PL_STOLEN); + struct xe_ttm_stolen_mgr *mgr = to_stolen_mgr(ttm_mgr); + struct xe_res_cursor cur; + + if (!mgr->io_base) + return 0; + + if (!IS_DGFX(xe) && xe_ttm_stolen_inaccessible(xe)) + return mgr->io_base + xe_bo_ggtt_addr(bo) + offset; + + xe_res_first(bo->ttm.resource, offset, 4096, &cur); + return mgr->io_base + cur.start; +} + +static int __xe_ttm_stolen_io_mem_reserve_bar2(struct xe_device *xe, + struct xe_ttm_stolen_mgr *mgr, + struct ttm_resource *mem) +{ + struct xe_res_cursor cur; + + if (!mgr->io_base) + return -EIO; + + xe_res_first(mem, 0, 4096, &cur); + mem->bus.offset = cur.start; + + drm_WARN_ON(&xe->drm, !(mem->placement & TTM_PL_FLAG_CONTIGUOUS)); + WARN_ON_ONCE(1); + + if (mem->placement & TTM_PL_FLAG_CONTIGUOUS && mgr->mapping) + mem->bus.addr = (u8 *)mgr->mapping + mem->bus.offset; + + mem->bus.offset += mgr->io_base; + mem->bus.is_iomem = true; + mem->bus.caching = ttm_write_combined; + + return 0; +} + +static int __xe_ttm_stolen_io_mem_reserve_stolen(struct xe_device *xe, + struct xe_ttm_stolen_mgr *mgr, + struct ttm_resource *mem) +{ +#ifdef CONFIG_X86 + struct xe_bo *bo = ttm_to_xe_bo(mem->bo); + + /* XXX: Require BO to be mapped to GGTT? */ + if (drm_WARN_ON(&xe->drm, !(bo->flags & XE_BO_CREATE_GGTT_BIT))) + return -EIO; + + /* GGTT is always contiguously mapped */ + mem->bus.offset = xe_bo_ggtt_addr(bo) + mgr->io_base; + + mem->bus.is_iomem = true; + mem->bus.caching = ttm_write_combined; + + return 0; +#else + /* How is it even possible to get here without gen12 stolen? */ + drm_WARN_ON(&xe->drm, 1); + return -EIO; +#endif +} + +int xe_ttm_stolen_io_mem_reserve(struct xe_device *xe, struct ttm_resource *mem) +{ + struct ttm_resource_manager *ttm_mgr = ttm_manager_type(&xe->ttm, XE_PL_STOLEN); + struct xe_ttm_stolen_mgr *mgr = ttm_mgr ? to_stolen_mgr(ttm_mgr) : NULL; + + if (!mgr || !mgr->io_base) + return -EIO; + + if (!xe_ttm_stolen_inaccessible(xe)) + return __xe_ttm_stolen_io_mem_reserve_bar2(xe, mgr, mem); + else + return __xe_ttm_stolen_io_mem_reserve_stolen(xe, mgr, mem); +} + +u64 xe_ttm_stolen_gpu_offset(struct xe_device *xe) +{ + struct xe_ttm_stolen_mgr *mgr = + to_stolen_mgr(ttm_manager_type(&xe->ttm, XE_PL_STOLEN)); + + return mgr->stolen_base; +} diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h new file mode 100644 index 000000000000..ade37abb0623 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_TTM_STOLEN_MGR_H_ +#define _XE_TTM_STOLEN_MGR_H_ + +#include + +struct ttm_resource; +struct xe_bo; +struct xe_device; + +void xe_ttm_stolen_mgr_init(struct xe_device *xe); +int xe_ttm_stolen_io_mem_reserve(struct xe_device *xe, struct ttm_resource *mem); +bool xe_ttm_stolen_inaccessible(struct xe_device *xe); +u64 xe_ttm_stolen_io_offset(struct xe_bo *bo, u32 offset); +u64 xe_ttm_stolen_gpu_offset(struct xe_device *xe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c index e391e81d3640..c7e21673b8fd 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c @@ -15,25 +15,14 @@ #include "xe_res_cursor.h" #include "xe_ttm_vram_mgr.h" -static inline struct xe_ttm_vram_mgr * -to_vram_mgr(struct ttm_resource_manager *man) -{ - return container_of(man, struct xe_ttm_vram_mgr, manager); -} - -static inline struct xe_gt * -mgr_to_gt(struct xe_ttm_vram_mgr *mgr) -{ - return mgr->gt; -} - static inline struct drm_buddy_block * xe_ttm_vram_mgr_first_block(struct list_head *list) { return list_first_entry_or_null(list, struct drm_buddy_block, link); } -static inline bool xe_is_vram_mgr_blocks_contiguous(struct list_head *head) +static inline bool xe_is_vram_mgr_blocks_contiguous(struct drm_buddy *mm, + struct list_head *head) { struct drm_buddy_block *block; u64 start, size; @@ -43,12 +32,12 @@ static inline bool xe_is_vram_mgr_blocks_contiguous(struct list_head *head) return false; while (head != block->link.next) { - start = xe_ttm_vram_mgr_block_start(block); - size = xe_ttm_vram_mgr_block_size(block); + start = drm_buddy_block_offset(block); + size = drm_buddy_block_size(mm, block); block = list_entry(block->link.next, struct drm_buddy_block, link); - if (start + size != xe_ttm_vram_mgr_block_start(block)) + if (start + size != drm_buddy_block_offset(block)) return false; } @@ -61,7 +50,7 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man, struct ttm_resource **res) { u64 max_bytes, cur_size, min_block_size; - struct xe_ttm_vram_mgr *mgr = to_vram_mgr(man); + struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man); struct xe_ttm_vram_mgr_resource *vres; u64 size, remaining_size, lpfn, fpfn; struct drm_buddy *mm = &mgr->mm; @@ -70,12 +59,12 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man, int r; lpfn = (u64)place->lpfn << PAGE_SHIFT; - if (!lpfn) + if (!lpfn || lpfn > man->size) lpfn = man->size; fpfn = (u64)place->fpfn << PAGE_SHIFT; - max_bytes = mgr->gt->mem.vram.size; + max_bytes = mgr->manager.size; if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { pages_per_block = ~0ul; } else { @@ -183,7 +172,7 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man, * Compute the original_size value by subtracting the * last block size with (aligned size - original size) */ - original_size = xe_ttm_vram_mgr_block_size(block) - + original_size = drm_buddy_block_size(mm, block) - (size - cur_size); } @@ -201,8 +190,8 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man, list_for_each_entry(block, &vres->blocks, link) { unsigned long start; - start = xe_ttm_vram_mgr_block_start(block) + - xe_ttm_vram_mgr_block_size(block); + start = drm_buddy_block_offset(block) + + drm_buddy_block_size(mm, block); start >>= PAGE_SHIFT; if (start > PFN_UP(vres->base.size)) @@ -212,7 +201,7 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man, vres->base.start = max(vres->base.start, start); } - if (xe_is_vram_mgr_blocks_contiguous(&vres->blocks)) + if (xe_is_vram_mgr_blocks_contiguous(mm, &vres->blocks)) vres->base.placement |= TTM_PL_FLAG_CONTIGUOUS; *res = &vres->base; @@ -233,7 +222,7 @@ static void xe_ttm_vram_mgr_del(struct ttm_resource_manager *man, { struct xe_ttm_vram_mgr_resource *vres = to_xe_ttm_vram_mgr_resource(res); - struct xe_ttm_vram_mgr *mgr = to_vram_mgr(man); + struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man); struct drm_buddy *mm = &mgr->mm; mutex_lock(&mgr->lock); @@ -248,7 +237,7 @@ static void xe_ttm_vram_mgr_del(struct ttm_resource_manager *man, static void xe_ttm_vram_mgr_debug(struct ttm_resource_manager *man, struct drm_printer *printer) { - struct xe_ttm_vram_mgr *mgr = to_vram_mgr(man); + struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man); struct drm_buddy *mm = &mgr->mm; mutex_lock(&mgr->lock); @@ -263,54 +252,54 @@ static const struct ttm_resource_manager_func xe_ttm_vram_mgr_func = { .debug = xe_ttm_vram_mgr_debug }; -static void ttm_vram_mgr_fini(struct drm_device *drm, void *arg) +static void ttm_vram_mgr_fini(struct drm_device *dev, void *arg) { + struct xe_device *xe = to_xe_device(dev); struct xe_ttm_vram_mgr *mgr = arg; - struct xe_device *xe = gt_to_xe(mgr->gt); struct ttm_resource_manager *man = &mgr->manager; - int err; ttm_resource_manager_set_used(man, false); - err = ttm_resource_manager_evict_all(&xe->ttm, man); - if (err) + if (ttm_resource_manager_evict_all(&xe->ttm, man)) return; drm_buddy_fini(&mgr->mm); - ttm_resource_manager_cleanup(man); - ttm_set_driver_manager(&xe->ttm, XE_PL_VRAM0 + mgr->gt->info.vram_id, - NULL); + ttm_resource_manager_cleanup(&mgr->manager); + + ttm_set_driver_manager(&xe->ttm, mgr->mem_type, NULL); } -int xe_ttm_vram_mgr_init(struct xe_gt *gt, struct xe_ttm_vram_mgr *mgr) +int __xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_ttm_vram_mgr *mgr, + u32 mem_type, u64 size, u64 default_page_size) { - struct xe_device *xe = gt_to_xe(gt); struct ttm_resource_manager *man = &mgr->manager; int err; - XE_BUG_ON(xe_gt_is_media_type(gt)); - - mgr->gt = gt; man->func = &xe_ttm_vram_mgr_func; + mgr->mem_type = mem_type; + mutex_init(&mgr->lock); + mgr->default_page_size = default_page_size; - ttm_resource_manager_init(man, &xe->ttm, gt->mem.vram.size); - err = drm_buddy_init(&mgr->mm, man->size, PAGE_SIZE); - if (err) - return err; + ttm_resource_manager_init(man, &xe->ttm, size); + err = drm_buddy_init(&mgr->mm, man->size, default_page_size); - mutex_init(&mgr->lock); - mgr->default_page_size = PAGE_SIZE; + ttm_set_driver_manager(&xe->ttm, mem_type, &mgr->manager); + ttm_resource_manager_set_used(&mgr->manager, true); + + return drmm_add_action_or_reset(&xe->drm, ttm_vram_mgr_fini, mgr); +} + +int xe_ttm_vram_mgr_init(struct xe_gt *gt, struct xe_ttm_vram_mgr *mgr) +{ + struct xe_device *xe = gt_to_xe(gt); - ttm_set_driver_manager(&xe->ttm, XE_PL_VRAM0 + gt->info.vram_id, - &mgr->manager); - ttm_resource_manager_set_used(man, true); + XE_BUG_ON(xe_gt_is_media_type(gt)); - err = drmm_add_action_or_reset(&xe->drm, ttm_vram_mgr_fini, mgr); - if (err) - return err; + mgr->gt = gt; - return 0; + return __xe_ttm_vram_mgr_init(xe, mgr, XE_PL_VRAM0 + gt->info.vram_id, + gt->mem.vram.size, PAGE_SIZE); } int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe, diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h index 537fccec4318..78f332d26224 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h @@ -12,6 +12,8 @@ enum dma_data_direction; struct xe_device; struct xe_gt; +int __xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_ttm_vram_mgr *mgr, + u32 mem_type, u64 size, u64 default_page_size); int xe_ttm_vram_mgr_init(struct xe_gt *gt, struct xe_ttm_vram_mgr *mgr); int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe, struct ttm_resource *res, @@ -22,20 +24,16 @@ int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe, void xe_ttm_vram_mgr_free_sgt(struct device *dev, enum dma_data_direction dir, struct sg_table *sgt); -static inline u64 xe_ttm_vram_mgr_block_start(struct drm_buddy_block *block) -{ - return drm_buddy_block_offset(block); -} - -static inline u64 xe_ttm_vram_mgr_block_size(struct drm_buddy_block *block) -{ - return PAGE_SIZE << drm_buddy_block_order(block); -} - static inline struct xe_ttm_vram_mgr_resource * to_xe_ttm_vram_mgr_resource(struct ttm_resource *res) { return container_of(res, struct xe_ttm_vram_mgr_resource, base); } +static inline struct xe_ttm_vram_mgr * +to_xe_ttm_vram_mgr(struct ttm_resource_manager *man) +{ + return container_of(man, struct xe_ttm_vram_mgr, manager); +} + #endif diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h index 39b93c71c21b..cf02c62ff427 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h @@ -27,6 +27,8 @@ struct xe_ttm_vram_mgr { u64 default_page_size; /** @lock: protects allocations of VRAM */ struct mutex lock; + + u32 mem_type; }; /** -- cgit v1.2.3 From 60694edf668a5c837d7bf05bd2250388e2ada9a8 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 12 Jan 2023 17:25:28 -0500 Subject: drm/xe: Ensure VMA not userptr before calling xe_bo_is_stolen Fix the below splat: [ 142.510525] [IGT] xe_exec_basic: starting subtest once-userptr [ 142.511339] BUG: kernel NULL pointer dereference, address: 0000000000000228 [ 142.518311] #PF: supervisor read access in kernel mode [ 142.523458] #PF: error_code(0x0000) - not-present page [ 142.528604] PGD 0 P4D 0 [ 142.531153] Oops: 0000 [#1] PREEMPT SMP NOPTI [ 142.535518] CPU: 4 PID: 1199 Comm: kworker/u16:8 Not tainted 6.1.0-rc1-xe+ #1 [ 142.542656] Hardware name: Intel Corporation Tiger Lake Client Platform/TigerLake U DDR4 SODIMM RVP, BIOS TGLSFWI1.R00.3243.A01.2006102133 06/10/2020 [ 142.556033] Workqueue: events_unbound async_op_work_func [xe] [ 142.561810] RIP: 0010:xe_bo_is_stolen+0x0/0x20 [xe] [ 142.566709] Code: 20 c8 75 05 83 fa 07 74 05 c3 cc cc cc cc 48 8b 87 08 02 00 00 0f b6 80 2c ff ff ff c3 cc cc cc cc 66 0f 1f 84 00 00 00 00 00 <48> 8b 87 28 02 00 00 83 78 10 07 0f 94 c0 c3 cc cc cc cc 66 66 2e [ 142.585447] RSP: 0018:ffffc900019eb888 EFLAGS: 00010246 [ 142.590678] RAX: 0000000000000002 RBX: 0000000000000000 RCX: ffff88813f6a2108 [ 142.597821] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 [ 142.604962] RBP: ffffc900019ebbc0 R08: 0000000000000001 R09: 0000000000000000 [ 142.612101] R10: 0000000000000001 R11: 0000000000000001 R12: ffff88814107d600 [ 142.619242] R13: ffffc900019eba20 R14: ffff888140442000 R15: 0000000000000000 [ 142.626378] FS: 0000000000000000(0000) GS:ffff88849fa00000(0000) knlGS:0000000000000000 [ 142.634468] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 142.640219] CR2: 0000000000000228 CR3: 000000010a4c0006 CR4: 0000000000770ee0 [ 142.647361] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 142.654505] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 142.661639] PKRU: 55555554 [ 142.664367] Call Trace: [ 142.666830] [ 142.668947] __xe_pt_bind_vma+0x1a1/0xa50 [xe] [ 142.673417] ? unwind_next_frame+0x187/0x770 [ 142.677699] ? __thaw_task+0xc0/0xc0 [ 142.681293] ? __lock_acquire+0x5e4/0x26e0 [ 142.685409] ? lockdep_hardirqs_on+0xbf/0x140 [ 142.689779] ? lock_acquire+0xd2/0x310 [ 142.693548] ? mark_held_locks+0x49/0x80 [ 142.697485] ? xe_vm_bind_vma+0xf1/0x3d0 [xe] [ 142.701866] xe_vm_bind_vma+0xf1/0x3d0 [xe] [ 142.706082] xe_vm_bind+0x76/0x140 [xe] [ 142.709944] vm_bind_ioctl+0x26f/0xb40 [xe] [ 142.714161] ? async_op_work_func+0x20c/0x450 [xe] [ 142.718974] async_op_work_func+0x20c/0x450 [xe] [ 142.723620] process_one_work+0x263/0x580 [ 142.727645] ? process_one_work+0x580/0x580 [ 142.731839] worker_thread+0x4d/0x3b0 [ 142.735518] ? process_one_work+0x580/0x580 [ 142.739714] kthread+0xeb/0x120 [ 142.742872] ? kthread_complete_and_exit+0x20/0x20 [ 142.747671] ret_from_fork+0x1f/0x30 [ 142.751264] Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi Reviewed-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 45850184650c..688bc4b56294 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -757,7 +757,7 @@ xe_pt_stage_bind(struct xe_gt *gt, struct xe_vma *vma, else xe_walk.cache = XE_CACHE_WB; } - if (xe_bo_is_stolen(bo)) + if (!xe_vma_is_userptr(vma) && xe_bo_is_stolen(bo)) xe_walk.dma_offset = xe_ttm_stolen_gpu_offset(xe_bo_device(bo)); xe_bo_assert_held(bo); -- cgit v1.2.3 From da34c2cf85a4739d4e2b1b5515a0fbc8f8e60358 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 12 Jan 2023 17:25:30 -0500 Subject: drm/xe: Fake pulling gt->info.engine_mask from hwconfig blob The blob doesn't fully support this yet, so fake for now to ensure our driver load order is correct. Once the blob supports pulling gt->info.engine_mask from the blob, this patch can be removed. Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi Reviewed-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt.c | 3 +++ drivers/gpu/drm/xe/xe_gt_types.h | 6 ++++++ drivers/gpu/drm/xe/xe_pci.c | 4 ++-- 3 files changed, 11 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index fd8232a4556e..96d0f5845d87 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -450,6 +450,9 @@ static int gt_fw_domain_init(struct xe_gt *gt) if (err) goto err_force_wake; + /* XXX: Fake that we pull the engine mask from hwconfig blob */ + gt->info.engine_mask = gt->info.__engine_mask; + /* Enables per hw engine IRQs */ xe_gt_irq_postinstall(gt); diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index c80a9215098d..2dbc8cedd630 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -93,6 +93,12 @@ struct xe_gt { u32 clock_freq; /** @engine_mask: mask of engines present on GT */ u64 engine_mask; + /** + * @__engine_mask: mask of engines present on GT read from + * xe_pci.c, used to fake reading the engine_mask from the + * hwconfig blob. + */ + u64 __engine_mask; } info; /** diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 55d8a597a068..49f1f0489f1c 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -420,13 +420,13 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (id == 0) { gt->info.type = XE_GT_TYPE_MAIN; gt->info.vram_id = id; - gt->info.engine_mask = desc->platform_engine_mask; + gt->info.__engine_mask = desc->platform_engine_mask; gt->mmio.adj_limit = 0; gt->mmio.adj_offset = 0; } else { gt->info.type = desc->extra_gts[id - 1].type; gt->info.vram_id = desc->extra_gts[id - 1].vram_id; - gt->info.engine_mask = + gt->info.__engine_mask = desc->extra_gts[id - 1].engine_mask; gt->mmio.adj_limit = desc->extra_gts[id - 1].mmio_adj_limit; -- cgit v1.2.3 From 99c821b00bf65e76415bf4c8d04d4d92987505cb Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 12 Jan 2023 17:25:31 -0500 Subject: drm/xe/guc: Report submission version of GuC firmware Starting in 70.6.* GuC firmware the CSS header includes the submission version, pull this from the CSS header. Prior 70.* versions accidentally omitted this informatio so hard code to the correct values. This information will be used by VFs when communicating with the PF. Signed-off-by: Matthew Brost Reviewed-by: Philippe Lecluse Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_types.h | 9 ++++++++ drivers/gpu/drm/xe/xe_uc_fw.c | 46 ++++++++++++++++++++++++++++++++++++++- drivers/gpu/drm/xe/xe_uc_fw_abi.h | 6 ++++- 3 files changed, 59 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h index ca177853cc12..c2a484282ef2 100644 --- a/drivers/gpu/drm/xe/xe_guc_types.h +++ b/drivers/gpu/drm/xe/xe_guc_types.h @@ -51,6 +51,15 @@ struct xe_guc { /** @seqno: suspend fences seqno */ u32 seqno; } suspend; + /** @version: submission version */ + struct { + /** @major: major version of GuC submission */ + u32 major; + /** @minor: minor version of GuC submission */ + u32 minor; + /** @patch: patch version of GuC submission */ + u32 patch; + } version; } submission_state; /** @hwconfig: Hardware config state */ struct { diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index cd264cf50d30..bd89ac27828e 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -184,6 +184,40 @@ static void uc_fw_fini(struct drm_device *drm, void *arg) xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_SELECTED); } +static void guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_header *css) +{ + struct xe_gt *gt = uc_fw_to_gt(uc_fw); + struct xe_guc *guc = >->uc.guc; + + XE_BUG_ON(uc_fw->type != XE_UC_FW_TYPE_GUC); + XE_WARN_ON(uc_fw->major_ver_found < 70); + + if (uc_fw->major_ver_found > 70 || uc_fw->minor_ver_found >= 6) { + /* v70.6.0 adds CSS header support */ + guc->submission_state.version.major = + FIELD_GET(CSS_SW_VERSION_UC_MAJOR, + css->submission_version); + guc->submission_state.version.minor = + FIELD_GET(CSS_SW_VERSION_UC_MINOR, + css->submission_version); + guc->submission_state.version.patch = + FIELD_GET(CSS_SW_VERSION_UC_PATCH, + css->submission_version); + } else if (uc_fw->minor_ver_found >= 3) { + /* v70.3.0 introduced v1.1.0 */ + guc->submission_state.version.major = 1; + guc->submission_state.version.minor = 1; + guc->submission_state.version.patch = 0; + } else { + /* v70.0.0 introduced v1.0.0 */ + guc->submission_state.version.major = 1; + guc->submission_state.version.minor = 0; + guc->submission_state.version.patch = 0; + } + + uc_fw->private_data_size = css->private_data_size; +} + int xe_uc_fw_init(struct xe_uc_fw *uc_fw) { struct xe_device *xe = uc_fw_to_xe(uc_fw); @@ -278,7 +312,7 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw) } if (uc_fw->type == XE_UC_FW_TYPE_GUC) - uc_fw->private_data_size = css->private_data_size; + guc_read_css_info(uc_fw, css); obj = xe_bo_create_from_data(xe, gt, fw->data, fw->size, ttm_bo_type_kernel, @@ -403,4 +437,14 @@ void xe_uc_fw_print(struct xe_uc_fw *uc_fw, struct drm_printer *p) uc_fw->major_ver_found, uc_fw->minor_ver_found); drm_printf(p, "\tuCode: %u bytes\n", uc_fw->ucode_size); drm_printf(p, "\tRSA: %u bytes\n", uc_fw->rsa_size); + + if (uc_fw->type == XE_UC_FW_TYPE_GUC) { + struct xe_gt *gt = uc_fw_to_gt(uc_fw); + struct xe_guc *guc = >->uc.guc; + + drm_printf(p, "\tSubmit version: %u.%u.%u\n", + guc->submission_state.version.major, + guc->submission_state.version.minor, + guc->submission_state.version.patch); + } } diff --git a/drivers/gpu/drm/xe/xe_uc_fw_abi.h b/drivers/gpu/drm/xe/xe_uc_fw_abi.h index dafd26cb0c41..fc7b1855ee90 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw_abi.h +++ b/drivers/gpu/drm/xe/xe_uc_fw_abi.h @@ -69,7 +69,11 @@ struct uc_css_header { #define CSS_SW_VERSION_UC_MAJOR (0xFF << 16) #define CSS_SW_VERSION_UC_MINOR (0xFF << 8) #define CSS_SW_VERSION_UC_PATCH (0xFF << 0) - u32 reserved0[13]; + union { + u32 submission_version; /* only applies to GuC */ + u32 reserved2; + }; + u32 reserved0[12]; union { u32 private_data_size; /* only applies to GuC */ u32 reserved1; -- cgit v1.2.3 From f900725af8b66ec8484680c693fa4ae93cb7259d Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 12 Jan 2023 17:25:32 -0500 Subject: drm/xe/guc: s/xe_guc_send_mmio/xe_guc_mmio_send Now aligns with the xe_guc_ct_send naming. Signed-off-by: Matthew Brost Reviewed-by: Philippe Lecluse Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc.c | 6 +++--- drivers/gpu/drm/xe/xe_guc.h | 2 +- drivers/gpu/drm/xe/xe_guc_ct.c | 2 +- drivers/gpu/drm/xe/xe_guc_hwconfig.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 4a7e8f9a14d5..2efa01dfff6d 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -599,7 +599,7 @@ int xe_guc_suspend(struct xe_guc *guc) XE_GUC_ACTION_CLIENT_SOFT_RESET, }; - ret = xe_guc_send_mmio(guc, action, ARRAY_SIZE(action)); + ret = xe_guc_mmio_send(guc, action, ARRAY_SIZE(action)); if (ret) { drm_err(&guc_to_xe(guc)->drm, "GuC suspend: CLIENT_SOFT_RESET fail: %d!\n", ret); @@ -630,7 +630,7 @@ int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr) #define MEDIA_SOFT_SCRATCH(n) _MMIO(0x190310 + (n) * 4) #define MEDIA_SOFT_SCRATCH_COUNT 4 -int xe_guc_send_mmio(struct xe_guc *guc, const u32 *request, u32 len) +int xe_guc_mmio_send(struct xe_guc *guc, const u32 *request, u32 len) { struct xe_device *xe = guc_to_xe(guc); struct xe_gt *gt = guc_to_gt(guc); @@ -747,7 +747,7 @@ static int guc_self_cfg(struct xe_guc *guc, u16 key, u16 len, u64 val) XE_BUG_ON(len == 1 && upper_32_bits(val)); /* Self config must go over MMIO */ - ret = xe_guc_send_mmio(guc, request, ARRAY_SIZE(request)); + ret = xe_guc_mmio_send(guc, request, ARRAY_SIZE(request)); if (unlikely(ret < 0)) return ret; diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h index 72b71d75566c..923efee90991 100644 --- a/drivers/gpu/drm/xe/xe_guc.h +++ b/drivers/gpu/drm/xe/xe_guc.h @@ -22,7 +22,7 @@ int xe_guc_enable_communication(struct xe_guc *guc); int xe_guc_suspend(struct xe_guc *guc); void xe_guc_notify(struct xe_guc *guc); int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr); -int xe_guc_send_mmio(struct xe_guc *guc, const u32 *request, u32 len); +int xe_guc_mmio_send(struct xe_guc *guc, const u32 *request, u32 len); int xe_guc_self_cfg32(struct xe_guc *guc, u16 key, u32 val); int xe_guc_self_cfg64(struct xe_guc *guc, u16 key, u64 val); void xe_guc_irq_handler(struct xe_guc *guc, const u16 iir); diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 61a424c41779..f48eb01847ef 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -268,7 +268,7 @@ static int guc_ct_control_toggle(struct xe_guc_ct *ct, bool enable) enable ? GUC_CTB_CONTROL_ENABLE : GUC_CTB_CONTROL_DISABLE), }; - int ret = xe_guc_send_mmio(ct_to_guc(ct), request, ARRAY_SIZE(request)); + int ret = xe_guc_mmio_send(ct_to_guc(ct), request, ARRAY_SIZE(request)); return ret > 0 ? -EPROTO : ret; } diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.c b/drivers/gpu/drm/xe/xe_guc_hwconfig.c index 8dfd48f71a7c..57640d608787 100644 --- a/drivers/gpu/drm/xe/xe_guc_hwconfig.c +++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.c @@ -33,7 +33,7 @@ static int send_get_hwconfig(struct xe_guc *guc, u32 ggtt_addr, u32 size) size, }; - return xe_guc_send_mmio(guc, action, ARRAY_SIZE(action)); + return xe_guc_mmio_send(guc, action, ARRAY_SIZE(action)); } static int guc_hwconfig_size(struct xe_guc *guc, u32 *size) -- cgit v1.2.3 From 5e37266307df08f981d929c267bab6bfae8c4d53 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 12 Jan 2023 17:25:33 -0500 Subject: drm/xe/guc: Add support GuC MMIO send / recv SRIOV has a use case of GuC MMIO send / recv, add a function for it. Signed-off-by: Matthew Brost Reviewed-by: Philippe Lecluse Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc.c | 18 +++++++++++++++++- drivers/gpu/drm/xe/xe_guc.h | 2 ++ 2 files changed, 19 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 2efa01dfff6d..88a3a96da084 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -630,7 +630,8 @@ int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr) #define MEDIA_SOFT_SCRATCH(n) _MMIO(0x190310 + (n) * 4) #define MEDIA_SOFT_SCRATCH_COUNT 4 -int xe_guc_mmio_send(struct xe_guc *guc, const u32 *request, u32 len) +int xe_guc_mmio_send_recv(struct xe_guc *guc, const u32 *request, + u32 len, u32 *response_buf) { struct xe_device *xe = guc_to_xe(guc); struct xe_gt *gt = guc_to_gt(guc); @@ -640,6 +641,7 @@ int xe_guc_mmio_send(struct xe_guc *guc, const u32 *request, u32 len) int ret; int i; + BUILD_BUG_ON(GEN11_SOFT_SCRATCH_COUNT != MEDIA_SOFT_SCRATCH_COUNT); XE_BUG_ON(guc->ct.enabled); XE_BUG_ON(!len); XE_BUG_ON(len > GEN11_SOFT_SCRATCH_COUNT); @@ -723,10 +725,24 @@ proto: return -EPROTO; } + /* Just copy entire possible message response */ + if (response_buf) { + response_buf[0] = header; + + for (i = 1; i < GEN11_SOFT_SCRATCH_COUNT; i++) + response_buf[i] = + xe_mmio_read32(gt, reply_reg + i * sizeof(u32)); + } + /* Use data from the GuC response as our return value */ return FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, header); } +int xe_guc_mmio_send(struct xe_guc *guc, const u32 *request, u32 len) +{ + return xe_guc_mmio_send_recv(guc, request, len, NULL); +} + static int guc_self_cfg(struct xe_guc *guc, u16 key, u16 len, u64 val) { u32 request[HOST2GUC_SELF_CFG_REQUEST_MSG_LEN] = { diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h index 923efee90991..7be33458eef6 100644 --- a/drivers/gpu/drm/xe/xe_guc.h +++ b/drivers/gpu/drm/xe/xe_guc.h @@ -23,6 +23,8 @@ int xe_guc_suspend(struct xe_guc *guc); void xe_guc_notify(struct xe_guc *guc); int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr); int xe_guc_mmio_send(struct xe_guc *guc, const u32 *request, u32 len); +int xe_guc_mmio_send_recv(struct xe_guc *guc, const u32 *request, u32 len, + u32 *response_buf); int xe_guc_self_cfg32(struct xe_guc *guc, u16 key, u32 val); int xe_guc_self_cfg64(struct xe_guc *guc, u16 key, u64 val); void xe_guc_irq_handler(struct xe_guc *guc, const u16 iir); -- cgit v1.2.3 From c343bacfad5db03c4156ff3a44e3a5547afb246f Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 12 Jan 2023 17:25:36 -0500 Subject: drm/xe: Fix hidden gotcha regression with bo create The bo_create ioctl relied on the internal ordering of memory regions to be the same, make sure we don't allocate stolen instead of VRAM0. Also remove a debug warning left in. Signed-off-by: Maarten Lankhorst Reviewed-by: Philippe Lecluse Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.h | 8 +++++--- drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 1 - 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 8d8a3332dbc8..8c2cdbe51ab5 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -13,10 +13,12 @@ #define XE_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */ #define XE_BO_CREATE_USER_BIT BIT(0) +/* The bits below need to be contiguous, or things break */ #define XE_BO_CREATE_SYSTEM_BIT BIT(1) -#define XE_BO_CREATE_STOLEN_BIT BIT(2) -#define XE_BO_CREATE_VRAM0_BIT BIT(3) -#define XE_BO_CREATE_VRAM1_BIT BIT(4) +#define XE_BO_CREATE_VRAM0_BIT BIT(2) +#define XE_BO_CREATE_VRAM1_BIT BIT(3) +/* -- */ +#define XE_BO_CREATE_STOLEN_BIT BIT(4) #define XE_BO_CREATE_VRAM_IF_DGFX(gt) \ (IS_DGFX(gt_to_xe(gt)) ? XE_BO_CREATE_VRAM0_BIT << gt->info.vram_id : \ XE_BO_CREATE_SYSTEM_BIT) diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index 21ca7f79e63b..b4e9c88644e4 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -184,7 +184,6 @@ static int __xe_ttm_stolen_io_mem_reserve_bar2(struct xe_device *xe, mem->bus.offset = cur.start; drm_WARN_ON(&xe->drm, !(mem->placement & TTM_PL_FLAG_CONTIGUOUS)); - WARN_ON_ONCE(1); if (mem->placement & TTM_PL_FLAG_CONTIGUOUS && mgr->mapping) mem->bus.addr = (u8 *)mgr->mapping + mem->bus.offset; -- cgit v1.2.3 From 2c33b49a6e6f8e176735eaca9ec6170478e0a426 Mon Sep 17 00:00:00 2001 From: Philippe Lecluse Date: Thu, 12 Jan 2023 17:25:38 -0500 Subject: drm/xe: enforce GSMBASE for DG1 instead of BAR2 On DG1, BAR2 is not reliable for reporting Vram size, need to use GSMBASE. Simplify xe_mmio_total_vram_size to report vram size and usable size. Signed-off-by: Philippe Lecluse Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_mmio.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 54c9362a3050..88d475dca6db 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -150,7 +150,7 @@ static bool xe_pci_resource_valid(struct pci_dev *pdev, int bar) return true; } -int xe_mmio_total_vram_size(struct xe_device *xe, u64 *vram_size, u64 *flat_ccs_base) +int xe_mmio_total_vram_size(struct xe_device *xe, u64 *vram_size, u64 *usable_size) { struct xe_gt *gt = xe_device_get_gt(xe, 0); struct pci_dev *pdev = to_pci_dev(xe->drm.dev); @@ -159,8 +159,12 @@ int xe_mmio_total_vram_size(struct xe_device *xe, u64 *vram_size, u64 *flat_ccs_ if (!xe->info.has_flat_ccs) { *vram_size = pci_resource_len(pdev, GEN12_LMEM_BAR); - if (flat_ccs_base) - *flat_ccs_base = *vram_size; + if (usable_size) { + if (xe->info.platform == XE_DG1) + *usable_size = xe_mmio_read64(gt, GEN12_GSMBASE.reg); + else + *usable_size = *vram_size; + } return 0; } @@ -170,15 +174,13 @@ int xe_mmio_total_vram_size(struct xe_device *xe, u64 *vram_size, u64 *flat_ccs_ reg = xe_gt_mcr_unicast_read_any(gt, XEHP_TILE0_ADDR_RANGE); *vram_size = (u64)REG_FIELD_GET(GENMASK(14, 8), reg) * SZ_1G; - if (flat_ccs_base) { + if (usable_size) { reg = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR); - *flat_ccs_base = (u64)REG_FIELD_GET(GENMASK(31, 8), reg) * SZ_64K; + *usable_size = (u64)REG_FIELD_GET(GENMASK(31, 8), reg) * SZ_64K; + drm_info(&xe->drm, "lmem_size: 0x%llx usable_size: 0x%llx\n", + *vram_size, *usable_size); } - if (flat_ccs_base) - drm_info(&xe->drm, "lmem_size: 0x%llx flat_ccs_base: 0x%llx\n", - *vram_size, *flat_ccs_base); - return xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); } @@ -190,7 +192,7 @@ int xe_mmio_probe_vram(struct xe_device *xe) u64 lmem_size; u64 original_size; u64 current_size; - u64 flat_ccs_base; + u64 usable_size; int resize_result, err; if (!IS_DGFX(xe)) { @@ -212,11 +214,9 @@ int xe_mmio_probe_vram(struct xe_device *xe) } gt = xe_device_get_gt(xe, 0); - lmem_size = xe_mmio_read64(gt, GEN12_GSMBASE.reg); - original_size = pci_resource_len(pdev, GEN12_LMEM_BAR); - err = xe_mmio_total_vram_size(xe, &lmem_size, &flat_ccs_base); + err = xe_mmio_total_vram_size(xe, &lmem_size, &usable_size); if (err) return err; @@ -244,7 +244,7 @@ int xe_mmio_probe_vram(struct xe_device *xe) xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, xe->mem.vram.size); #endif - xe->mem.vram.size = min_t(u64, xe->mem.vram.size, flat_ccs_base); + xe->mem.vram.size = min_t(u64, xe->mem.vram.size, usable_size); drm_info(&xe->drm, "TOTAL VRAM: %pa, %pa\n", &xe->mem.vram.io_start, &xe->mem.vram.size); -- cgit v1.2.3 From d8731500721d5ae26819de36c63921f4baaafe00 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 12 Jan 2023 16:34:09 +0000 Subject: drm/xe/pcode: fix pcode error check On DG2 we are now getting: [ 104.456607] xe 0000:03:00.0: [drm] *ERROR* PCODE timeout, retrying with preemption disabled Looks like we just need to invert the error check for xe_pcode_try_request(), which returns zero on success. Signed-off-by: Matthew Auld Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pcode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c index 39712e843728..1a76fe478853 100644 --- a/drivers/gpu/drm/xe/xe_pcode.c +++ b/drivers/gpu/drm/xe/xe_pcode.c @@ -155,7 +155,7 @@ int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request, ret = xe_pcode_try_request(gt, mbox, request, reply_mask, reply, &status, false, timeout_base_ms * 1000); - if (ret) + if (!ret) goto out; /* -- cgit v1.2.3 From 760f168db30a5c06893e87c88f25cd3578a8453a Mon Sep 17 00:00:00 2001 From: Philippe Lecluse Date: Fri, 13 Jan 2023 14:07:17 +0000 Subject: drm/xe: fix xe_mmio_total_vram_size As also cause issue on PVC, moving back to what we did before stolen was introduced Signed-off-by: Philippe Lecluse Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_mmio.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 88d475dca6db..54d2a94a7519 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -159,12 +159,8 @@ int xe_mmio_total_vram_size(struct xe_device *xe, u64 *vram_size, u64 *usable_si if (!xe->info.has_flat_ccs) { *vram_size = pci_resource_len(pdev, GEN12_LMEM_BAR); - if (usable_size) { - if (xe->info.platform == XE_DG1) - *usable_size = xe_mmio_read64(gt, GEN12_GSMBASE.reg); - else - *usable_size = *vram_size; - } + if (usable_size) + *usable_size = min(*vram_size, xe_mmio_read64(gt, GEN12_GSMBASE.reg)); return 0; } -- cgit v1.2.3 From 4aa18ae44686144c5c5d29113d6e2c5c3ebb349d Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 22 Dec 2022 12:11:45 -0800 Subject: drm/xe/ggtt: Use BIT_ULL() for 64bit Make sure it's 64bit value in both 32b and 64b arch. Signed-off-by: Lucas De Marchi Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ggtt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index eab74a509f68..0018c8441747 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -20,8 +20,8 @@ #include "gt/intel_gt_regs.h" /* FIXME: Common file, preferably auto-gen */ -#define MTL_GGTT_PTE_PAT0 BIT(52) -#define MTL_GGTT_PTE_PAT1 BIT(53) +#define MTL_GGTT_PTE_PAT0 BIT_ULL(52) +#define MTL_GGTT_PTE_PAT1 BIT_ULL(53) u64 xe_ggtt_pte_encode(struct xe_bo *bo, u64 bo_offset) { -- cgit v1.2.3 From 857912c37ea786715e03b5bf25db07e28fc2ba73 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 22 Dec 2022 12:14:27 -0800 Subject: drm/xe: Fix some log messages on 32b Either use the proper format or cast up to 64b depending on the case. Signed-off-by: Lucas De Marchi Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_migrate.c | 6 +++--- drivers/gpu/drm/xe/xe_guc_submit.c | 2 +- drivers/gpu/drm/xe/xe_mmio.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index 0f3b819f0a34..03a60d5b42f1 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -263,9 +263,9 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) goto free_tiny; } - kunit_info(test, "Starting tests, top level PT addr: %llx, special pagetable base addr: %llx\n", - xe_bo_main_addr(m->eng->vm->pt_root[id]->bo, GEN8_PAGE_SIZE), - xe_bo_main_addr(m->pt_bo, GEN8_PAGE_SIZE)); + kunit_info(test, "Starting tests, top level PT addr: %lx, special pagetable base addr: %lx\n", + (unsigned long)xe_bo_main_addr(m->eng->vm->pt_root[id]->bo, GEN8_PAGE_SIZE), + (unsigned long)xe_bo_main_addr(m->pt_bo, GEN8_PAGE_SIZE)); /* First part of the test, are we updating our pagetable bo with a new entry? */ xe_map_wr(xe, &bo->vmap, GEN8_PAGE_SIZE * (NUM_KERNEL_PDE - 1), u64, 0xdeaddeadbeefbeef); diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index e0d424c2b78c..2d4eb527d6e8 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1630,7 +1630,7 @@ static void guc_engine_wq_print(struct xe_engine *e, struct drm_printer *p) for (i = parallel_read(xe, map, wq_desc.head); i != parallel_read(xe, map, wq_desc.tail); i = (i + sizeof(u32)) % WQ_SIZE) - drm_printf(p, "\tWQ[%ld]: 0x%08x\n", i / sizeof(u32), + drm_printf(p, "\tWQ[%zu]: 0x%08x\n", i / sizeof(u32), parallel_read(xe, map, wq[i / sizeof(u32)])); } } diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 54d2a94a7519..7c87be130e02 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -234,7 +234,7 @@ int xe_mmio_probe_vram(struct xe_device *xe) (u64)xe->mem.vram.size >> 20); if (xe->mem.vram.size < lmem_size) drm_warn(&xe->drm, "Restricting VRAM size to PCI resource size (0x%llx->0x%llx)\n", - lmem_size, xe->mem.vram.size); + lmem_size, (u64)xe->mem.vram.size); #ifdef CONFIG_64BIT xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, xe->mem.vram.size); -- cgit v1.2.3 From 9a6e6c14bfde967fca5a052cbee206d0b6169a1e Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 22 Dec 2022 12:15:24 -0800 Subject: drm/xe/mmio: Use non-atomic writeq/readq variant for 32b writeq() and readq() and other functions working on 64 bit variables are not provided by 32b arch. For that it's needed to choose between linux/io-64-nonatomic-hi-lo.h and linux/io-64-nonatomic-lo-hi.h, spliting the read/write in 2 accesses. For xe driver, it doesn't matter much, so just choose one and include in xe_mmio.h. This also removes some ifdef CONFIG_64BIT we had around because of the missing 64bit functions. Signed-off-by: Lucas De Marchi Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_mmio.c | 7 ------- drivers/gpu/drm/xe/xe_mmio.h | 1 + 2 files changed, 1 insertion(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 7c87be130e02..8a953df2b468 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -48,7 +48,6 @@ mask_err: return err; } -#ifdef CONFIG_64BIT static int _resize_bar(struct xe_device *xe, int resno, resource_size_t size) { @@ -132,9 +131,6 @@ static int xe_resize_lmem_bar(struct xe_device *xe, resource_size_t lmem_size) pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd); return ret; } -#else -static int xe_resize_lmem_bar(struct xe_device *xe, resource_size_t lmem_size) { return 0; } -#endif static bool xe_pci_resource_valid(struct pci_dev *pdev, int bar) { @@ -236,10 +232,7 @@ int xe_mmio_probe_vram(struct xe_device *xe) drm_warn(&xe->drm, "Restricting VRAM size to PCI resource size (0x%llx->0x%llx)\n", lmem_size, (u64)xe->mem.vram.size); -#ifdef CONFIG_64BIT xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, xe->mem.vram.size); -#endif - xe->mem.vram.size = min_t(u64, xe->mem.vram.size, usable_size); drm_info(&xe->drm, "TOTAL VRAM: %pa, %pa\n", &xe->mem.vram.io_start, &xe->mem.vram.size); diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h index a3b144553873..354be6fae0d4 100644 --- a/drivers/gpu/drm/xe/xe_mmio.h +++ b/drivers/gpu/drm/xe/xe_mmio.h @@ -7,6 +7,7 @@ #define _XE_MMIO_H_ #include +#include #include "xe_gt_types.h" -- cgit v1.2.3 From ebec269c522fc9bb48d11b65456b01adbdecb97d Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 22 Dec 2022 12:18:26 -0800 Subject: drm/xe: Fix tracepoints on 32b Leave the types as u64, but cast the pointers to unsigned long before assigning so the compiler doesn't throw warning about casting a pointer to integer of different size. Also, size_t should use %zu, not %ld. Signed-off-by: Lucas De Marchi Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_trace.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index a5f963f1f6eb..d9f921d46b53 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -33,10 +33,10 @@ DECLARE_EVENT_CLASS(xe_bo, TP_fast_assign( __entry->size = bo->size; __entry->flags = bo->flags; - __entry->vm = (u64)bo->vm; + __entry->vm = (unsigned long)bo->vm; ), - TP_printk("size=%ld, flags=0x%02x, vm=0x%016llx", + TP_printk("size=%zu, flags=0x%02x, vm=0x%016llx", __entry->size, __entry->flags, __entry->vm) ); @@ -186,7 +186,7 @@ DECLARE_EVENT_CLASS(xe_sched_job, atomic_read(&job->engine->guc->state); __entry->flags = job->engine->flags; __entry->error = job->fence->error; - __entry->fence = (u64)job->fence; + __entry->fence = (unsigned long)job->fence; __entry->batch_addr = (u64)job->batch_addr[0]; ), @@ -273,7 +273,7 @@ DECLARE_EVENT_CLASS(xe_hw_fence, TP_fast_assign( __entry->ctx = fence->dma.context; __entry->seqno = fence->dma.seqno; - __entry->fence = (u64)fence; + __entry->fence = (unsigned long)fence; ), TP_printk("ctx=0x%016llx, fence=0x%016llx, seqno=%u", @@ -313,7 +313,7 @@ DECLARE_EVENT_CLASS(xe_vma, ), TP_fast_assign( - __entry->vma = (u64)vma; + __entry->vma = (unsigned long)vma; __entry->asid = vma->vm->usm.asid; __entry->start = vma->start; __entry->end = vma->end; @@ -410,7 +410,7 @@ DECLARE_EVENT_CLASS(xe_vm, ), TP_fast_assign( - __entry->vm = (u64)vm; + __entry->vm = (unsigned long)vm; __entry->asid = vm->usm.asid; ), -- cgit v1.2.3 From 2c3878820bf0bbd659c2b897add8a011b5e9f2e1 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 22 Dec 2022 13:58:12 -0800 Subject: drm/xe/gt: Fix min() with u32 and u64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the following error while building for 32b: In file included from ../drivers/gpu/drm/xe/xe_gt.c:6: ../drivers/gpu/drm/xe/xe_gt.c: In function ‘gt_ttm_mgr_init’: ../include/linux/minmax.h:20:35: error: comparison of distinct pointer types lacks a cast [-Werror] 20 | (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1))) | ^~ Cast it to u64 so size of the second operand matches the first one when building it for 32 bits. Signed-off-by: Lucas De Marchi Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 96d0f5845d87..39df6945e1d9 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -183,7 +183,7 @@ static int gt_ttm_mgr_init(struct xe_gt *gt) if (err) return err; gtt_size = min(max((XE_DEFAULT_GTT_SIZE_MB << 20), - gt->mem.vram.size), + (u64)gt->mem.vram.size), gtt_size); xe->info.mem_region_mask |= BIT(gt->info.vram_id) << 1; } -- cgit v1.2.3 From baf31a20fa7f3538d68ffa5262a715eb1d699cdd Mon Sep 17 00:00:00 2001 From: Ankit Nautiyal Date: Wed, 22 Nov 2023 12:16:27 +0530 Subject: drm/i915/display: Get bigjoiner config before dsc config during readout Currently we get bigjoiner config after the dsc get config, during HW readout. Since dsc_get_config now uses bigjoiner flags/pipes to compute DSC PPS parameter pic_width, this results in a state mismatch when Bigjoiner and DSC are used together. So call get bigjoiner config before calling dsc get config function. Fixes: 8b70b5691704 ("drm/i915/vdsc: Fill the intel_dsc_get_pps_config function") Cc: Suraj Kandpal Cc: Ankit Nautiyal Cc: Animesh Manna Cc: Jani Nikula Signed-off-by: Ankit Nautiyal Reviewed-by: Suraj Kandpal Link: https://patchwork.freedesktop.org/patch/msgid/20231122064627.905828-1-ankit.k.nautiyal@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 7d48bcdd5797..4e957a8ddfdb 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -3746,8 +3746,8 @@ static bool hsw_get_pipe_config(struct intel_crtc *crtc, if (!active) goto out; - intel_dsc_get_config(pipe_config); intel_bigjoiner_get_config(pipe_config); + intel_dsc_get_config(pipe_config); if (!transcoder_is_dsi(pipe_config->cpu_transcoder) || DISPLAY_VER(dev_priv) >= 11) -- cgit v1.2.3 From 3203009fe58d407a150e1116d6900d6ddbbaa542 Mon Sep 17 00:00:00 2001 From: Mika Kahola Date: Tue, 12 Dec 2023 13:51:30 +0200 Subject: drm/i915/display: Wait for PHY readiness not needed for disabling sequence When going through the disconnection flow we don't need to wait for PHY readiness and hence we can skip the wait part. For disabling the function returns false as an indicator that the power is not enabled. After all, we are not even using the return value when Type-C is disconnecting. v2: Cleanup for increased readibility (Imre) BSpec: 65380 For VLK-53734 Signed-off-by: Mika Kahola Reviewed-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20231212115130.485911-1-mika.kahola@intel.com --- drivers/gpu/drm/i915/display/intel_tc.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c index f64d348a969e..dcf05e00e505 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.c +++ b/drivers/gpu/drm/i915/display/intel_tc.c @@ -1030,18 +1030,25 @@ static bool xelpdp_tc_phy_enable_tcss_power(struct intel_tc_port *tc, bool enabl __xelpdp_tc_phy_enable_tcss_power(tc, enable); - if ((!tc_phy_wait_for_ready(tc) || - !xelpdp_tc_phy_wait_for_tcss_power(tc, enable)) && - !drm_WARN_ON(&i915->drm, tc->mode == TC_PORT_LEGACY)) { - if (enable) { - __xelpdp_tc_phy_enable_tcss_power(tc, false); - xelpdp_tc_phy_wait_for_tcss_power(tc, false); - } + if (enable && !tc_phy_wait_for_ready(tc)) + goto out_disable; - return false; - } + if (!xelpdp_tc_phy_wait_for_tcss_power(tc, enable)) + goto out_disable; return true; + +out_disable: + if (drm_WARN_ON(&i915->drm, tc->mode == TC_PORT_LEGACY)) + return false; + + if (!enable) + return false; + + __xelpdp_tc_phy_enable_tcss_power(tc, false); + xelpdp_tc_phy_wait_for_tcss_power(tc, false); + + return false; } static void xelpdp_tc_phy_take_ownership(struct intel_tc_port *tc, bool take) -- cgit v1.2.3 From ac3420d3d428443a08b923f9118121c170192b62 Mon Sep 17 00:00:00 2001 From: Karthik Poosa Date: Mon, 4 Dec 2023 20:18:09 +0530 Subject: drm/i915/hwmon: Fix static analysis tool reported issues Updated i915 hwmon with fixes for issues reported by static analysis tool. Fixed integer overflow with upcasting. v2: - Added Fixes tag (Badal). - Updated commit message as per review comments (Anshuman). Fixes: 4c2572fe0ae7 ("drm/i915/hwmon: Expose power1_max_interval") Reviewed-by: Badal Nilawar Reviewed-by: Anshuman Gupta Signed-off-by: Karthik Poosa Signed-off-by: Anshuman Gupta Link: https://patchwork.freedesktop.org/patch/msgid/20231204144809.1518704-1-karthik.poosa@intel.com --- drivers/gpu/drm/i915/i915_hwmon.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index 975da8e7f2a9..8c3f443c8347 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -175,7 +175,7 @@ hwm_power1_max_interval_show(struct device *dev, struct device_attribute *attr, * tau4 = (4 | x) << y * but add 2 when doing the final right shift to account for units */ - tau4 = ((1 << x_w) | x) << y; + tau4 = (u64)((1 << x_w) | x) << y; /* val in hwmon interface units (millisec) */ out = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w); @@ -211,7 +211,7 @@ hwm_power1_max_interval_store(struct device *dev, r = FIELD_PREP(PKG_MAX_WIN, PKG_MAX_WIN_DEFAULT); x = REG_FIELD_GET(PKG_MAX_WIN_X, r); y = REG_FIELD_GET(PKG_MAX_WIN_Y, r); - tau4 = ((1 << x_w) | x) << y; + tau4 = (u64)((1 << x_w) | x) << y; max_win = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w); if (val > max_win) -- cgit v1.2.3 From 6c9dbee84cd005bed5f9d07b3a2797ae6414b435 Mon Sep 17 00:00:00 2001 From: Farouk Bouabid Date: Wed, 13 Dec 2023 15:50:45 +0100 Subject: drm/panel: ltk050h3146w: Set burst mode for ltk050h3148w The ltk050h3148w variant expects the horizontal component lane byte clock cycle(lbcc) to be calculated using lane_mbps (burst mode) instead of the pixel clock. Using the pixel clock rate by default for this calculation was introduced in commit ac87d23694f4 ("drm/bridge: synopsys: dw-mipi-dsi: Use pixel clock rate to calculate lbcc") and starting from commit 93e82bb4de01 ("drm/bridge: synopsys: dw-mipi-dsi: Fix hcomponent lbcc for burst mode") only panels that support burst mode can keep using the lane_mbps. So add MIPI_DSI_MODE_VIDEO_BURST as part of the mode_flags for the dsi host. Fixes: 93e82bb4de01 ("drm/bridge: synopsys: dw-mipi-dsi: Fix hcomponent lbcc for burst mode") Signed-off-by: Farouk Bouabid Reviewed-by: Jessica Zhang Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/20231213145045.41020-1-farouk.bouabid@theobroma-systems.com --- drivers/gpu/drm/panel/panel-leadtek-ltk050h3146w.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/panel/panel-leadtek-ltk050h3146w.c b/drivers/gpu/drm/panel/panel-leadtek-ltk050h3146w.c index 6e3670508e3a..30919c872ac8 100644 --- a/drivers/gpu/drm/panel/panel-leadtek-ltk050h3146w.c +++ b/drivers/gpu/drm/panel/panel-leadtek-ltk050h3146w.c @@ -326,7 +326,7 @@ static const struct drm_display_mode ltk050h3148w_mode = { static const struct ltk050h3146w_desc ltk050h3148w_data = { .mode = <k050h3148w_mode, .init = ltk050h3148w_init_sequence, - .mode_flags = MIPI_DSI_MODE_VIDEO_SYNC_PULSE, + .mode_flags = MIPI_DSI_MODE_VIDEO_SYNC_PULSE | MIPI_DSI_MODE_VIDEO_BURST, }; static int ltk050h3146w_init_sequence(struct ltk050h3146w *ctx) -- cgit v1.2.3 From e93bffc2ac0a833b42841f31fff955549d38ce98 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 11 Dec 2023 10:11:34 +0200 Subject: drm/i915: Reject async flips with bigjoiner MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently async flips are busted when bigjoiner is in use. As a short term fix simply reject async flips in that case. Cc: stable@vger.kernel.org Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/9769 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231211081134.2698-1-ville.syrjala@linux.intel.com Reviewed-by: Stanislav Lisovskiy --- drivers/gpu/drm/i915/display/intel_display.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 4e957a8ddfdb..e8b307ae9319 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -5926,6 +5926,17 @@ static int intel_async_flip_check_uapi(struct intel_atomic_state *state, return -EINVAL; } + /* + * FIXME: Bigjoiner+async flip is busted currently. + * Remove this check once the issues are fixed. + */ + if (new_crtc_state->bigjoiner_pipes) { + drm_dbg_kms(&i915->drm, + "[CRTC:%d:%s] async flip disallowed with bigjoiner\n", + crtc->base.base.id, crtc->base.name); + return -EINVAL; + } + for_each_oldnew_intel_plane_in_state(state, plane, old_plane_state, new_plane_state, i) { if (plane->pipe != crtc->pipe) -- cgit v1.2.3 From e2e1916008aacf706ffa6bba65714c6d6200b196 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 28 Nov 2023 13:51:31 +0200 Subject: drm/i915/cdclk: s/-1/~0/ when dealing with unsigned values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cdclk_pll_is_unknown() used ~0 when checking for the "VCO is unknown" value, but the assignment uses -1. They are the same in the end, but let's use the same ~0 form on both sides for consistency. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231128115138.13238-2-ville.syrjala@linux.intel.com Reviewed-by: Gustavo Sousa --- drivers/gpu/drm/i915/display/intel_cdclk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 8bb6bab7c8cd..c774e020efd2 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -1180,7 +1180,7 @@ sanitize: /* force cdclk programming */ dev_priv->display.cdclk.hw.cdclk = 0; /* force full PLL disable + enable */ - dev_priv->display.cdclk.hw.vco = -1; + dev_priv->display.cdclk.hw.vco = ~0; } static void skl_cdclk_init_hw(struct drm_i915_private *dev_priv) @@ -2075,7 +2075,7 @@ sanitize: dev_priv->display.cdclk.hw.cdclk = 0; /* force full PLL disable + enable */ - dev_priv->display.cdclk.hw.vco = -1; + dev_priv->display.cdclk.hw.vco = ~0; } static void bxt_cdclk_init_hw(struct drm_i915_private *dev_priv) -- cgit v1.2.3 From 2581547335ff8acd877f1acd4ee57527eaaa0bde Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 28 Nov 2023 13:51:32 +0200 Subject: drm/i915/cdclk: Give the squash waveform length a name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the slightly magic 'size = 16' with a bit more descriptive name. We'll have another user for this value later on. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231128115138.13238-3-ville.syrjala@linux.intel.com Reviewed-by: Gustavo Sousa --- drivers/gpu/drm/i915/display/intel_cdclk.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index c774e020efd2..a1660c6b86ec 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -1800,6 +1800,8 @@ static bool cdclk_pll_is_unknown(unsigned int vco) return vco == ~0; } +static const int cdclk_squash_len = 16; + static int cdclk_squash_divider(u16 waveform) { return hweight16(waveform ?: 0xffff); @@ -1811,7 +1813,6 @@ static bool cdclk_compute_crawl_and_squash_midpoint(struct drm_i915_private *i91 struct intel_cdclk_config *mid_cdclk_config) { u16 old_waveform, new_waveform, mid_waveform; - int size = 16; int div = 2; /* Return if PLL is in an unknown state, force a complete disable and re-enable. */ @@ -1850,7 +1851,8 @@ static bool cdclk_compute_crawl_and_squash_midpoint(struct drm_i915_private *i91 } mid_cdclk_config->cdclk = DIV_ROUND_CLOSEST(cdclk_squash_divider(mid_waveform) * - mid_cdclk_config->vco, size * div); + mid_cdclk_config->vco, + cdclk_squash_len * div); /* make sure the mid clock came out sane */ -- cgit v1.2.3 From e1a914aef28f39aec5f107f31478d95aff3ae6db Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 12 Dec 2023 00:16:36 +0200 Subject: drm/i915/cdclk: Remove the assumption that cdclk divider==2 when using squashing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently we have a hardcoded assumption that the cdclk divider (2*cd2x divider) is always 2 when squashing is used. While that is true for all current platforms it might not hold in the future. So eliminate the assumption and calculate the correct divider from the other parameters. v2: s/cd2x divider/cdclk divider/ (Gustavo) s/clock/unsquashed_cdclk/ (Gustavo) Reviewed-by: Gustavo Sousa Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231211221636.29658-1-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_cdclk.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index a1660c6b86ec..a12cf085dbdb 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -1880,9 +1880,9 @@ static void _bxt_set_cdclk(struct drm_i915_private *dev_priv, { int cdclk = cdclk_config->cdclk; int vco = cdclk_config->vco; - u32 val; + int unsquashed_cdclk; u16 waveform; - int clock; + u32 val; if (HAS_CDCLK_CRAWL(dev_priv) && dev_priv->display.cdclk.hw.vco > 0 && vco > 0 && !cdclk_pll_is_unknown(dev_priv->display.cdclk.hw.vco)) { @@ -1899,15 +1899,13 @@ static void _bxt_set_cdclk(struct drm_i915_private *dev_priv, waveform = cdclk_squash_waveform(dev_priv, cdclk); - if (waveform) - clock = vco / 2; - else - clock = cdclk; + unsquashed_cdclk = DIV_ROUND_CLOSEST(cdclk * cdclk_squash_len, + cdclk_squash_divider(waveform)); if (HAS_CDCLK_SQUASH(dev_priv)) dg2_cdclk_squash_program(dev_priv, waveform); - val = bxt_cdclk_cd2x_div_sel(dev_priv, clock, vco) | + val = bxt_cdclk_cd2x_div_sel(dev_priv, unsquashed_cdclk, vco) | bxt_cdclk_cd2x_pipe(dev_priv, pipe); /* -- cgit v1.2.3 From f23fe4d7d794c6d71dc6b8fdc510da2fc2174369 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 12 Dec 2023 00:17:59 +0200 Subject: drm/i915/cdclk: Rewrite cdclk->voltage_level selection to use tables MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cdclk->voltage_level if ladders are hard to read, especially as they're written the other way around compared to how bspec lists the limits. Let's rewrite them to use simple arrays that gives us the max cdclk for each voltage level. v2: Bump the jsl/ehl max cdclk in the table to 652.8 MHz to accommodate JSL machines in CI that boot with high cdclk Reviewed-by: Gustavo Sousa Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231211221759.29725-1-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_cdclk.c | 87 +++++++++++++++++++----------- 1 file changed, 57 insertions(+), 30 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index a12cf085dbdb..a1d87db2665b 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -1446,50 +1446,77 @@ static u8 bxt_calc_voltage_level(int cdclk) return DIV_ROUND_UP(cdclk, 25000); } +static u8 calc_voltage_level(int cdclk, int num_voltage_levels, + const int voltage_level_max_cdclk[]) +{ + int voltage_level; + + for (voltage_level = 0; voltage_level < num_voltage_levels; voltage_level++) { + if (cdclk <= voltage_level_max_cdclk[voltage_level]) + return voltage_level; + } + + MISSING_CASE(cdclk); + return num_voltage_levels - 1; +} + static u8 icl_calc_voltage_level(int cdclk) { - if (cdclk > 556800) - return 2; - else if (cdclk > 312000) - return 1; - else - return 0; + static const int icl_voltage_level_max_cdclk[] = { + [0] = 312000, + [1] = 556800, + [2] = 652800, + }; + + return calc_voltage_level(cdclk, + ARRAY_SIZE(icl_voltage_level_max_cdclk), + icl_voltage_level_max_cdclk); } static u8 ehl_calc_voltage_level(int cdclk) { - if (cdclk > 326400) - return 3; - else if (cdclk > 312000) - return 2; - else if (cdclk > 180000) - return 1; - else - return 0; + static const int ehl_voltage_level_max_cdclk[] = { + [0] = 180000, + [1] = 312000, + [2] = 326400, + /* + * Bspec lists the limit as 556.8 MHz, but some JSL + * development boards (at least) boot with 652.8 MHz + */ + [3] = 652800, + }; + + return calc_voltage_level(cdclk, + ARRAY_SIZE(ehl_voltage_level_max_cdclk), + ehl_voltage_level_max_cdclk); } static u8 tgl_calc_voltage_level(int cdclk) { - if (cdclk > 556800) - return 3; - else if (cdclk > 326400) - return 2; - else if (cdclk > 312000) - return 1; - else - return 0; + static const int tgl_voltage_level_max_cdclk[] = { + [0] = 312000, + [1] = 326400, + [2] = 556800, + [3] = 652800, + }; + + return calc_voltage_level(cdclk, + ARRAY_SIZE(tgl_voltage_level_max_cdclk), + tgl_voltage_level_max_cdclk); } static u8 rplu_calc_voltage_level(int cdclk) { - if (cdclk > 556800) - return 3; - else if (cdclk > 480000) - return 2; - else if (cdclk > 312000) - return 1; - else - return 0; + static const int rplu_voltage_level_max_cdclk[] = { + [0] = 312000, + [1] = 480000, + [2] = 556800, + [3] = 652800, + }; + + return calc_voltage_level(cdclk, + ARRAY_SIZE(rplu_voltage_level_max_cdclk), + rplu_voltage_level_max_cdclk); } static void icl_readout_refclk(struct drm_i915_private *dev_priv, -- cgit v1.2.3 From 273361f54e5bcaccdd725a9ffac14a9fac672451 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 28 Nov 2023 13:51:35 +0200 Subject: drm/i915/mtl: Fix voltage_level for cdclk==480MHz MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow MTL to use voltage level 1 for 480MHz cdclk, instead of the voltage level 2 that it's currently using. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231128115138.13238-6-ville.syrjala@linux.intel.com Reviewed-by: Gustavo Sousa --- drivers/gpu/drm/i915/display/intel_cdclk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index a1d87db2665b..c985ebb6831a 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -3516,7 +3516,7 @@ static const struct intel_cdclk_funcs mtl_cdclk_funcs = { .get_cdclk = bxt_get_cdclk, .set_cdclk = bxt_set_cdclk, .modeset_calc_cdclk = bxt_modeset_calc_cdclk, - .calc_voltage_level = tgl_calc_voltage_level, + .calc_voltage_level = rplu_calc_voltage_level, }; static const struct intel_cdclk_funcs rplu_cdclk_funcs = { -- cgit v1.2.3 From 46bdb77d8b61e560ebb95c8d3a355be84b5492d2 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 28 Nov 2023 13:51:36 +0200 Subject: drm/i915: Split intel_ddi_compute_min_voltage_level() into platform variants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The mess inside intel_ddi_compute_min_voltage_level() is illegible. Clean it up a bit by splitting the internals into per-platform functions. TODO: make it a vfunc? Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231128115138.13238-7-ville.syrjala@linux.intel.com Reviewed-by: Gustavo Sousa --- drivers/gpu/drm/i915/display/intel_ddi.c | 37 ++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 38f28c480b38..bcfcd7e789f0 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -3672,16 +3672,39 @@ static bool intel_ddi_is_audio_enabled(struct drm_i915_private *dev_priv, AUDIO_OUTPUT_ENABLE(cpu_transcoder); } +static int tgl_ddi_min_voltage_level(const struct intel_crtc_state *crtc_state) +{ + if (crtc_state->port_clock > 594000) + return 2; + else + return 0; +} + +static int jsl_ddi_min_voltage_level(const struct intel_crtc_state *crtc_state) +{ + if (crtc_state->port_clock > 594000) + return 3; + else + return 0; +} + +static int icl_ddi_min_voltage_level(const struct intel_crtc_state *crtc_state) +{ + if (crtc_state->port_clock > 594000) + return 1; + else + return 0; +} + void intel_ddi_compute_min_voltage_level(struct drm_i915_private *dev_priv, struct intel_crtc_state *crtc_state) { - if (DISPLAY_VER(dev_priv) >= 12 && crtc_state->port_clock > 594000) - crtc_state->min_voltage_level = 2; - else if ((IS_JASPERLAKE(dev_priv) || IS_ELKHARTLAKE(dev_priv)) && - crtc_state->port_clock > 594000) - crtc_state->min_voltage_level = 3; - else if (DISPLAY_VER(dev_priv) >= 11 && crtc_state->port_clock > 594000) - crtc_state->min_voltage_level = 1; + if (DISPLAY_VER(dev_priv) >= 12) + crtc_state->min_voltage_level = tgl_ddi_min_voltage_level(crtc_state); + else if (IS_JASPERLAKE(dev_priv) || IS_ELKHARTLAKE(dev_priv)) + crtc_state->min_voltage_level = jsl_ddi_min_voltage_level(crtc_state); + else if (DISPLAY_VER(dev_priv) >= 11) + crtc_state->min_voltage_level = icl_ddi_min_voltage_level(crtc_state); } static enum transcoder bdw_transcoder_master_readout(struct drm_i915_private *dev_priv, -- cgit v1.2.3 From 0656afab88a6cf0efb3fbef394b68a4451b40365 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 28 Nov 2023 13:51:37 +0200 Subject: drm/i915/mtl: Calculate the correct voltage level from port_clock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On MTL we need to bump the voltage level to only 1 (not 2) when port clock exceeds 594MHz. Make it so. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231128115138.13238-8-ville.syrjala@linux.intel.com Reviewed-by: Gustavo Sousa --- drivers/gpu/drm/i915/display/intel_ddi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index bcfcd7e789f0..dd04bd7b348c 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -3699,7 +3699,9 @@ static int icl_ddi_min_voltage_level(const struct intel_crtc_state *crtc_state) void intel_ddi_compute_min_voltage_level(struct drm_i915_private *dev_priv, struct intel_crtc_state *crtc_state) { - if (DISPLAY_VER(dev_priv) >= 12) + if (DISPLAY_VER(dev_priv) >= 14) + crtc_state->min_voltage_level = icl_ddi_min_voltage_level(crtc_state); + else if (DISPLAY_VER(dev_priv) >= 12) crtc_state->min_voltage_level = tgl_ddi_min_voltage_level(crtc_state); else if (IS_JASPERLAKE(dev_priv) || IS_ELKHARTLAKE(dev_priv)) crtc_state->min_voltage_level = jsl_ddi_min_voltage_level(crtc_state); -- cgit v1.2.3 From 8cd53c6b200e6a4522524e8cf45adc45a35814e1 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 28 Nov 2023 13:51:38 +0200 Subject: drm/i915: Simplify intel_ddi_compute_min_voltage_level() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop the redundant dev_priv parameters from intel_ddi_compute_min_voltage_level() to make life easier. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231128115138.13238-9-ville.syrjala@linux.intel.com Reviewed-by: Gustavo Sousa --- drivers/gpu/drm/i915/display/intel_ddi.c | 9 +++++---- drivers/gpu/drm/i915/display/intel_ddi.h | 3 +-- drivers/gpu/drm/i915/display/intel_dp_mst.c | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index dd04bd7b348c..12a29363e5df 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -3696,9 +3696,10 @@ static int icl_ddi_min_voltage_level(const struct intel_crtc_state *crtc_state) return 0; } -void intel_ddi_compute_min_voltage_level(struct drm_i915_private *dev_priv, - struct intel_crtc_state *crtc_state) +void intel_ddi_compute_min_voltage_level(struct intel_crtc_state *crtc_state) { + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); + if (DISPLAY_VER(dev_priv) >= 14) crtc_state->min_voltage_level = icl_ddi_min_voltage_level(crtc_state); else if (DISPLAY_VER(dev_priv) >= 12) @@ -3920,7 +3921,7 @@ static void intel_ddi_get_config(struct intel_encoder *encoder, pipe_config->lane_lat_optim_mask = bxt_ddi_phy_get_lane_lat_optim_mask(encoder); - intel_ddi_compute_min_voltage_level(dev_priv, pipe_config); + intel_ddi_compute_min_voltage_level(pipe_config); intel_hdmi_read_gcp_infoframe(encoder, pipe_config); @@ -4200,7 +4201,7 @@ static int intel_ddi_compute_config(struct intel_encoder *encoder, pipe_config->lane_lat_optim_mask = bxt_ddi_phy_calc_lane_lat_optim_mask(pipe_config->lane_count); - intel_ddi_compute_min_voltage_level(dev_priv, pipe_config); + intel_ddi_compute_min_voltage_level(pipe_config); return 0; } diff --git a/drivers/gpu/drm/i915/display/intel_ddi.h b/drivers/gpu/drm/i915/display/intel_ddi.h index 63853a1f6582..434de7196875 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.h +++ b/drivers/gpu/drm/i915/display/intel_ddi.h @@ -70,8 +70,7 @@ void intel_ddi_set_dp_msa(const struct intel_crtc_state *crtc_state, bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector); void intel_ddi_set_vc_payload_alloc(const struct intel_crtc_state *crtc_state, bool state); -void intel_ddi_compute_min_voltage_level(struct drm_i915_private *dev_priv, - struct intel_crtc_state *crtc_state); +void intel_ddi_compute_min_voltage_level(struct intel_crtc_state *crtc_state); int intel_ddi_toggle_hdcp_bits(struct intel_encoder *intel_encoder, enum transcoder cpu_transcoder, bool enable, u32 hdcp_mask); diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index e8940acea8ad..8a9432335030 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -614,7 +614,7 @@ static int intel_dp_mst_compute_config(struct intel_encoder *encoder, intel_dp_audio_compute_config(encoder, pipe_config, conn_state); - intel_ddi_compute_min_voltage_level(dev_priv, pipe_config); + intel_ddi_compute_min_voltage_level(pipe_config); intel_psr_compute_config(intel_dp, pipe_config, conn_state); -- cgit v1.2.3 From 51ea405c47f833e55d19401b35b71100197e6d5d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 11 Dec 2023 11:28:30 -0500 Subject: drm/amdgpu: fix buffer funcs setting order on suspend harder Part of commit c03581986234 ("drm/amdgpu: fix buffer funcs setting order on suspend") got dropped accidently. Add it back. Fixes: c03581986234 ("drm/amdgpu: fix buffer funcs setting order on suspend") Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 138c7b37af69..f58fb719292d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4589,8 +4589,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) amdgpu_ras_suspend(adev); - amdgpu_ttm_set_buffer_funcs_status(adev, false); - amdgpu_device_ip_suspend_phase1(adev); if (!adev->in_s0ix) -- cgit v1.2.3 From 5d1ff65f80fd8c11476bd10d10aa2b2b639de432 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 7 Dec 2023 10:54:54 +0000 Subject: drm/amd/display: Fix spelling mistake "SMC_MSG_AllowZstatesEntr" -> "SMC_MSG_AllowZstatesEntry" There is a spelling mistake in a smu_print message. Fix it. Signed-off-by: Colin Ian King Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c index d6db9d7fced2..6d4a1ffab5ed 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c @@ -361,26 +361,26 @@ void dcn35_smu_set_zstate_support(struct clk_mgr_internal *clk_mgr, enum dcn_zst case DCN_ZSTATE_SUPPORT_ALLOW: msg_id = VBIOSSMC_MSG_AllowZstatesEntry; param = (1 << 10) | (1 << 9) | (1 << 8); - smu_print("%s: SMC_MSG_AllowZstatesEntr msg = ALLOW, param = %d\n", __func__, param); + smu_print("%s: SMC_MSG_AllowZstatesEntry msg = ALLOW, param = %d\n", __func__, param); break; case DCN_ZSTATE_SUPPORT_DISALLOW: msg_id = VBIOSSMC_MSG_AllowZstatesEntry; param = 0; - smu_print("%s: SMC_MSG_AllowZstatesEntr msg_id = DISALLOW, param = %d\n", __func__, param); + smu_print("%s: SMC_MSG_AllowZstatesEntry msg_id = DISALLOW, param = %d\n", __func__, param); break; case DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY: msg_id = VBIOSSMC_MSG_AllowZstatesEntry; param = (1 << 10); - smu_print("%s: SMC_MSG_AllowZstatesEntr msg = ALLOW_Z10_ONLY, param = %d\n", __func__, param); + smu_print("%s: SMC_MSG_AllowZstatesEntry msg = ALLOW_Z10_ONLY, param = %d\n", __func__, param); break; case DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY: msg_id = VBIOSSMC_MSG_AllowZstatesEntry; param = (1 << 10) | (1 << 8); - smu_print("%s: SMC_MSG_AllowZstatesEntr msg = ALLOW_Z8_Z10_ONLY, param = %d\n", __func__, param); + smu_print("%s: SMC_MSG_AllowZstatesEntry msg = ALLOW_Z8_Z10_ONLY, param = %d\n", __func__, param); break; case DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY: -- cgit v1.2.3 From 601603105325ad4ec62db95c9bc428202ece2c8f Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Thu, 16 Nov 2023 18:57:42 -0100 Subject: drm/drm_property: make replace_property_blob_from_id a DRM helper Place it in drm_property where drm_property_replace_blob and drm_property_lookup_blob live. Then we can use the DRM helper for driver-specific KMS properties too. Reviewed-by: Harry Wentland Reviewed-by: Liviu Dudau Signed-off-by: Melissa Wen Acked-by: Maxime Ripard Signed-off-by: Alex Deucher --- drivers/gpu/drm/arm/malidp_crtc.c | 2 +- drivers/gpu/drm/drm_atomic_uapi.c | 52 ++++------------------------------ drivers/gpu/drm/drm_property.c | 59 +++++++++++++++++++++++++++++++++++++++ include/drm/drm_property.h | 6 ++++ 4 files changed, 71 insertions(+), 48 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/arm/malidp_crtc.c b/drivers/gpu/drm/arm/malidp_crtc.c index dc01c43f6193..d72c22dcf685 100644 --- a/drivers/gpu/drm/arm/malidp_crtc.c +++ b/drivers/gpu/drm/arm/malidp_crtc.c @@ -221,7 +221,7 @@ static int malidp_crtc_atomic_check_ctm(struct drm_crtc *crtc, /* * The size of the ctm is checked in - * drm_atomic_replace_property_blob_from_id. + * drm_property_replace_blob_from_id. */ ctm = (struct drm_color_ctm *)state->ctm->data; for (i = 0; i < ARRAY_SIZE(ctm->matrix); ++i) { diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index aee4a65d4959..29d4940188d4 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -362,48 +362,6 @@ static s32 __user *get_out_fence_for_connector(struct drm_atomic_state *state, return fence_ptr; } -static int -drm_atomic_replace_property_blob_from_id(struct drm_device *dev, - struct drm_property_blob **blob, - uint64_t blob_id, - ssize_t expected_size, - ssize_t expected_elem_size, - bool *replaced) -{ - struct drm_property_blob *new_blob = NULL; - - if (blob_id != 0) { - new_blob = drm_property_lookup_blob(dev, blob_id); - if (new_blob == NULL) { - drm_dbg_atomic(dev, - "cannot find blob ID %llu\n", blob_id); - return -EINVAL; - } - - if (expected_size > 0 && - new_blob->length != expected_size) { - drm_dbg_atomic(dev, - "[BLOB:%d] length %zu different from expected %zu\n", - new_blob->base.id, new_blob->length, expected_size); - drm_property_blob_put(new_blob); - return -EINVAL; - } - if (expected_elem_size > 0 && - new_blob->length % expected_elem_size != 0) { - drm_dbg_atomic(dev, - "[BLOB:%d] length %zu not divisible by element size %zu\n", - new_blob->base.id, new_blob->length, expected_elem_size); - drm_property_blob_put(new_blob); - return -EINVAL; - } - } - - *replaced |= drm_property_replace_blob(blob, new_blob); - drm_property_blob_put(new_blob); - - return 0; -} - static int drm_atomic_crtc_set_property(struct drm_crtc *crtc, struct drm_crtc_state *state, struct drm_property *property, uint64_t val) @@ -424,7 +382,7 @@ static int drm_atomic_crtc_set_property(struct drm_crtc *crtc, } else if (property == config->prop_vrr_enabled) { state->vrr_enabled = val; } else if (property == config->degamma_lut_property) { - ret = drm_atomic_replace_property_blob_from_id(dev, + ret = drm_property_replace_blob_from_id(dev, &state->degamma_lut, val, -1, sizeof(struct drm_color_lut), @@ -432,7 +390,7 @@ static int drm_atomic_crtc_set_property(struct drm_crtc *crtc, state->color_mgmt_changed |= replaced; return ret; } else if (property == config->ctm_property) { - ret = drm_atomic_replace_property_blob_from_id(dev, + ret = drm_property_replace_blob_from_id(dev, &state->ctm, val, sizeof(struct drm_color_ctm), -1, @@ -440,7 +398,7 @@ static int drm_atomic_crtc_set_property(struct drm_crtc *crtc, state->color_mgmt_changed |= replaced; return ret; } else if (property == config->gamma_lut_property) { - ret = drm_atomic_replace_property_blob_from_id(dev, + ret = drm_property_replace_blob_from_id(dev, &state->gamma_lut, val, -1, sizeof(struct drm_color_lut), @@ -581,7 +539,7 @@ static int drm_atomic_plane_set_property(struct drm_plane *plane, } else if (property == plane->color_range_property) { state->color_range = val; } else if (property == config->prop_fb_damage_clips) { - ret = drm_atomic_replace_property_blob_from_id(dev, + ret = drm_property_replace_blob_from_id(dev, &state->fb_damage_clips, val, -1, @@ -778,7 +736,7 @@ static int drm_atomic_connector_set_property(struct drm_connector *connector, if (state->link_status != DRM_LINK_STATUS_GOOD) state->link_status = val; } else if (property == config->hdr_output_metadata_property) { - ret = drm_atomic_replace_property_blob_from_id(dev, + ret = drm_property_replace_blob_from_id(dev, &state->hdr_output_metadata, val, sizeof(struct hdr_output_metadata), -1, diff --git a/drivers/gpu/drm/drm_property.c b/drivers/gpu/drm/drm_property.c index dfec479830e4..596272149a35 100644 --- a/drivers/gpu/drm/drm_property.c +++ b/drivers/gpu/drm/drm_property.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include "drm_crtc_internal.h" @@ -751,6 +752,64 @@ bool drm_property_replace_blob(struct drm_property_blob **blob, } EXPORT_SYMBOL(drm_property_replace_blob); +/** + * drm_property_replace_blob_from_id - replace a blob property taking a reference + * @dev: DRM device + * @blob: a pointer to the member blob to be replaced + * @blob_id: the id of the new blob to replace with + * @expected_size: expected size of the blob property + * @expected_elem_size: expected size of an element in the blob property + * @replaced: if the blob was in fact replaced + * + * Look up the new blob from id, take its reference, check expected sizes of + * the blob and its element and replace the old blob by the new one. Advertise + * if the replacement operation was successful. + * + * Return: true if the blob was in fact replaced. -EINVAL if the new blob was + * not found or sizes don't match. + */ +int drm_property_replace_blob_from_id(struct drm_device *dev, + struct drm_property_blob **blob, + uint64_t blob_id, + ssize_t expected_size, + ssize_t expected_elem_size, + bool *replaced) +{ + struct drm_property_blob *new_blob = NULL; + + if (blob_id != 0) { + new_blob = drm_property_lookup_blob(dev, blob_id); + if (new_blob == NULL) { + drm_dbg_atomic(dev, + "cannot find blob ID %llu\n", blob_id); + return -EINVAL; + } + + if (expected_size > 0 && + new_blob->length != expected_size) { + drm_dbg_atomic(dev, + "[BLOB:%d] length %zu different from expected %zu\n", + new_blob->base.id, new_blob->length, expected_size); + drm_property_blob_put(new_blob); + return -EINVAL; + } + if (expected_elem_size > 0 && + new_blob->length % expected_elem_size != 0) { + drm_dbg_atomic(dev, + "[BLOB:%d] length %zu not divisible by element size %zu\n", + new_blob->base.id, new_blob->length, expected_elem_size); + drm_property_blob_put(new_blob); + return -EINVAL; + } + } + + *replaced |= drm_property_replace_blob(blob, new_blob); + drm_property_blob_put(new_blob); + + return 0; +} +EXPORT_SYMBOL(drm_property_replace_blob_from_id); + int drm_mode_getblob_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { diff --git a/include/drm/drm_property.h b/include/drm/drm_property.h index 65bc9710a470..082f29156b3e 100644 --- a/include/drm/drm_property.h +++ b/include/drm/drm_property.h @@ -279,6 +279,12 @@ struct drm_property_blob *drm_property_create_blob(struct drm_device *dev, const void *data); struct drm_property_blob *drm_property_lookup_blob(struct drm_device *dev, uint32_t id); +int drm_property_replace_blob_from_id(struct drm_device *dev, + struct drm_property_blob **blob, + uint64_t blob_id, + ssize_t expected_size, + ssize_t expected_elem_size, + bool *replaced); int drm_property_replace_global_blob(struct drm_device *dev, struct drm_property_blob **replace, size_t length, -- cgit v1.2.3 From 24013b9301349881c9fcd27e7edacc672e0bf6d3 Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Thu, 16 Nov 2023 18:57:43 -0100 Subject: drm/drm_plane: track color mgmt changes per plane We will add color mgmt properties to DRM planes in the next patches and we want to track when one of this properties change to define atomic commit behaviors. Using a similar approach from CRTC color props, we set a color_mgmt_changed boolean whenever a plane color prop changes. Reviewed-by: Harry Wentland Signed-off-by: Melissa Wen Acked-by: Maxime Ripard Signed-off-by: Alex Deucher --- drivers/gpu/drm/drm_atomic.c | 1 + drivers/gpu/drm/drm_atomic_state_helper.c | 1 + include/drm/drm_plane.h | 7 +++++++ 3 files changed, 9 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index c31fc0b48c31..a91737adf8e7 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -733,6 +733,7 @@ static void drm_atomic_plane_print_state(struct drm_printer *p, drm_get_color_encoding_name(state->color_encoding)); drm_printf(p, "\tcolor-range=%s\n", drm_get_color_range_name(state->color_range)); + drm_printf(p, "\tcolor_mgmt_changed=%d\n", state->color_mgmt_changed); if (plane->funcs->atomic_print_state) plane->funcs->atomic_print_state(p, state); diff --git a/drivers/gpu/drm/drm_atomic_state_helper.c b/drivers/gpu/drm/drm_atomic_state_helper.c index 54975de44a0e..519228eb1095 100644 --- a/drivers/gpu/drm/drm_atomic_state_helper.c +++ b/drivers/gpu/drm/drm_atomic_state_helper.c @@ -352,6 +352,7 @@ void __drm_atomic_helper_plane_duplicate_state(struct drm_plane *plane, state->fence = NULL; state->commit = NULL; state->fb_damage_clips = NULL; + state->color_mgmt_changed = false; } EXPORT_SYMBOL(__drm_atomic_helper_plane_duplicate_state); diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h index c6565a6f9324..641fe298052d 100644 --- a/include/drm/drm_plane.h +++ b/include/drm/drm_plane.h @@ -251,6 +251,13 @@ struct drm_plane_state { /** @state: backpointer to global drm_atomic_state */ struct drm_atomic_state *state; + + /** + * @color_mgmt_changed: Color management properties have changed. Used + * by the atomic helpers and drivers to steer the atomic commit control + * flow. + */ + bool color_mgmt_changed : 1; }; static inline struct drm_rect -- cgit v1.2.3 From 9342a9ae54ef299ffe5e4ce3d0be6a4da5edba0e Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Thu, 16 Nov 2023 18:57:44 -0100 Subject: drm/amd/display: add driver-specific property for plane degamma LUT Hook up driver-specific atomic operations for managing AMD color properties. Create AMD driver-specific color management properties and attach them according to HW capabilities defined by `struct dc_color_caps`. First add plane degamma LUT properties that means user-blob and its size. We will add more plane color properties in the next patches. In addition, we define AMD_PRIVATE_COLOR to guard these driver-specific plane properties. Plane degamma can be used to linearize input space for arithmetical operations that are more accurate when applied in linear color. v2: - update degamma LUT prop description - move private color operations from amdgpu_display to amdgpu_dm_color v5: - get degamma blob correctly (Joshua) Reviewed-by: Harry Wentland Co-developed-by: Joshua Ashton Signed-off-by: Joshua Ashton Signed-off-by: Melissa Wen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | 12 +++ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 5 ++ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 11 +++ .../drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 25 +++++++ .../drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c | 85 ++++++++++++++++++++++ 5 files changed, 138 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index d8083972e393..a63b01aa230d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -343,6 +343,18 @@ struct amdgpu_mode_info { int disp_priority; const struct amdgpu_display_funcs *funcs; const enum drm_plane_type *plane_type; + + /* Driver-private color mgmt props */ + + /* @plane_degamma_lut_property: Plane property to set a degamma LUT to + * convert encoded values to light linear values before sampling or + * blending. + */ + struct drm_property *plane_degamma_lut_property; + /* @plane_degamma_lut_size_property: Plane property to define the max + * size of degamma LUT as supported by the driver (read-only). + */ + struct drm_property *plane_degamma_lut_size_property; }; #define AMDGPU_MAX_BL_LEVEL 0xFF diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 8e6fe9860cd6..80346bebb7fc 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4067,6 +4067,11 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev) return r; } +#ifdef AMD_PRIVATE_COLOR + if (amdgpu_dm_create_color_properties(adev)) + return -ENOMEM; +#endif + r = amdgpu_dm_audio_init(adev); if (r) { dc_release_state(state->context); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 0a9974017eba..446a3962d9c9 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -727,6 +727,16 @@ extern const struct amdgpu_ip_block_version dm_ip_block; struct dm_plane_state { struct drm_plane_state base; struct dc_plane_state *dc_state; + + /* Plane color mgmt */ + /** + * @degamma_lut: + * + * 1D LUT for mapping framebuffer/plane pixel data before sampling or + * blending operations. It's usually applied to linearize input space. + * The blob (if not NULL) is an array of &struct drm_color_lut. + */ + struct drm_property_blob *degamma_lut; }; struct dm_crtc_state { @@ -817,6 +827,7 @@ void amdgpu_dm_trigger_timing_sync(struct drm_device *dev); #define MAX_COLOR_LEGACY_LUT_ENTRIES 256 void amdgpu_dm_init_color_mod(void); +int amdgpu_dm_create_color_properties(struct amdgpu_device *adev); int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state); int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc); int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index a4cb23d059bd..421af919387f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -84,6 +84,31 @@ void amdgpu_dm_init_color_mod(void) setup_x_points_distribution(); } +#ifdef AMD_PRIVATE_COLOR +int +amdgpu_dm_create_color_properties(struct amdgpu_device *adev) +{ + struct drm_property *prop; + + prop = drm_property_create(adev_to_drm(adev), + DRM_MODE_PROP_BLOB, + "AMD_PLANE_DEGAMMA_LUT", 0); + if (!prop) + return -ENOMEM; + adev->mode_info.plane_degamma_lut_property = prop; + + prop = drm_property_create_range(adev_to_drm(adev), + DRM_MODE_PROP_IMMUTABLE, + "AMD_PLANE_DEGAMMA_LUT_SIZE", + 0, UINT_MAX); + if (!prop) + return -ENOMEM; + adev->mode_info.plane_degamma_lut_size_property = prop; + + return 0; +} +#endif + /** * __extract_blob_lut - Extracts the DRM lut and lut size from a blob. * @blob: DRM color mgmt property blob diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index 116121e647ca..b1e50cfa9f83 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -1357,6 +1357,10 @@ static struct drm_plane_state *amdgpu_dm_plane_drm_plane_duplicate_state(struct dc_plane_state_retain(dm_plane_state->dc_state); } + if (old_dm_plane_state->degamma_lut) + dm_plane_state->degamma_lut = + drm_property_blob_get(old_dm_plane_state->degamma_lut); + return &dm_plane_state->base; } @@ -1424,12 +1428,86 @@ static void amdgpu_dm_plane_drm_plane_destroy_state(struct drm_plane *plane, { struct dm_plane_state *dm_plane_state = to_dm_plane_state(state); + if (dm_plane_state->degamma_lut) + drm_property_blob_put(dm_plane_state->degamma_lut); + if (dm_plane_state->dc_state) dc_plane_state_release(dm_plane_state->dc_state); drm_atomic_helper_plane_destroy_state(plane, state); } +#ifdef AMD_PRIVATE_COLOR +static void +dm_atomic_plane_attach_color_mgmt_properties(struct amdgpu_display_manager *dm, + struct drm_plane *plane) +{ + struct amdgpu_mode_info mode_info = dm->adev->mode_info; + struct dpp_color_caps dpp_color_caps = dm->dc->caps.color.dpp; + + /* Check HW color pipeline capabilities on DPP block (pre-blending) + * before exposing related properties. + */ + if (dpp_color_caps.dgam_ram || dpp_color_caps.gamma_corr) { + drm_object_attach_property(&plane->base, + mode_info.plane_degamma_lut_property, + 0); + drm_object_attach_property(&plane->base, + mode_info.plane_degamma_lut_size_property, + MAX_COLOR_LUT_ENTRIES); + } +} + +static int +dm_atomic_plane_set_property(struct drm_plane *plane, + struct drm_plane_state *state, + struct drm_property *property, + uint64_t val) +{ + struct dm_plane_state *dm_plane_state = to_dm_plane_state(state); + struct amdgpu_device *adev = drm_to_adev(plane->dev); + bool replaced = false; + int ret; + + if (property == adev->mode_info.plane_degamma_lut_property) { + ret = drm_property_replace_blob_from_id(plane->dev, + &dm_plane_state->degamma_lut, + val, -1, + sizeof(struct drm_color_lut), + &replaced); + dm_plane_state->base.color_mgmt_changed |= replaced; + return ret; + } else { + drm_dbg_atomic(plane->dev, + "[PLANE:%d:%s] unknown property [PROP:%d:%s]]\n", + plane->base.id, plane->name, + property->base.id, property->name); + return -EINVAL; + } + + return 0; +} + +static int +dm_atomic_plane_get_property(struct drm_plane *plane, + const struct drm_plane_state *state, + struct drm_property *property, + uint64_t *val) +{ + struct dm_plane_state *dm_plane_state = to_dm_plane_state(state); + struct amdgpu_device *adev = drm_to_adev(plane->dev); + + if (property == adev->mode_info.plane_degamma_lut_property) { + *val = (dm_plane_state->degamma_lut) ? + dm_plane_state->degamma_lut->base.id : 0; + } else { + return -EINVAL; + } + + return 0; +} +#endif + static const struct drm_plane_funcs dm_plane_funcs = { .update_plane = drm_atomic_helper_update_plane, .disable_plane = drm_atomic_helper_disable_plane, @@ -1438,6 +1516,10 @@ static const struct drm_plane_funcs dm_plane_funcs = { .atomic_duplicate_state = amdgpu_dm_plane_drm_plane_duplicate_state, .atomic_destroy_state = amdgpu_dm_plane_drm_plane_destroy_state, .format_mod_supported = amdgpu_dm_plane_format_mod_supported, +#ifdef AMD_PRIVATE_COLOR + .atomic_set_property = dm_atomic_plane_set_property, + .atomic_get_property = dm_atomic_plane_get_property, +#endif }; int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, @@ -1517,6 +1599,9 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, drm_plane_helper_add(plane, &dm_plane_helper_funcs); +#ifdef AMD_PRIVATE_COLOR + dm_atomic_plane_attach_color_mgmt_properties(dm, plane); +#endif /* Create (reset) the plane state */ if (plane->funcs->reset) plane->funcs->reset(plane); -- cgit v1.2.3 From ed342a2e78c4e4a8d82c2d19c95e8a3eb092c0d0 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Fri, 1 Dec 2023 17:13:46 +0530 Subject: drm/amdgpu: Use the right method to get IP version Replace direct usage of adev->ip_versions with amdgpu_ip_version. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 3 ++- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 2 +- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 7 ++++--- drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c | 6 +++--- 5 files changed, 11 insertions(+), 9 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index f58fb719292d..85ed0d66a029 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1599,7 +1599,7 @@ bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev) if (adev->mman.keep_stolen_vga_memory) return false; - return adev->ip_versions[DCE_HWIP][0] >= IP_VERSION(3, 0, 0); + return amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0); } /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 3a632c3b1a2c..0dcff2889e25 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -1099,7 +1099,8 @@ bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev) { bool xnack_mode = true; - if (amdgpu_sriov_vf(adev) && adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)) + if (amdgpu_sriov_vf(adev) && + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2)) xnack_mode = false; return xnack_mode; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 2ac5820e9c92..473a774294ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -883,7 +883,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, * GRBM interface. */ if ((vmhub == AMDGPU_GFXHUB(0)) && - (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 4, 2))) + (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 4, 2))) RREG32_NO_KIQ(req); for (j = 0; j < adev->usec_timeout; j++) { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 80346bebb7fc..e819d05755b3 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1712,7 +1712,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) init_data.clk_reg_offsets = adev->reg_offset[CLK_HWIP][0]; /* Enable DWB for tested platforms only */ - if (adev->ip_versions[DCE_HWIP][0] >= IP_VERSION(3, 0, 0)) + if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0)) init_data.num_virtual_links = 1; INIT_LIST_HEAD(&adev->dm.da_list); @@ -8939,7 +8939,7 @@ static void dm_set_writeback(struct amdgpu_display_manager *dm, } wb_info->mcif_buf_params.p_vmid = 1; - if (adev->ip_versions[DCE_HWIP][0] >= IP_VERSION(3, 0, 0)) { + if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0)) { wb_info->mcif_warmup_params.start_address.quad_part = afb->address; wb_info->mcif_warmup_params.region_size = wb_info->mcif_buf_params.luma_pitch * wb_info->dwb_params.dest_height; @@ -9891,7 +9891,8 @@ static bool should_reset_plane(struct drm_atomic_state *state, * TODO: Remove this hack for all asics once it proves that the * fast updates works fine on DCN3.2+. */ - if (adev->ip_versions[DCE_HWIP][0] < IP_VERSION(3, 2, 0) && state->allow_modeset) + if (amdgpu_ip_version(adev, DCE_HWIP, 0) < IP_VERSION(3, 2, 0) && + state->allow_modeset) return true; /* Exit early if we know that we're adding or removing the plane. */ diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c index d8f8ad0e7137..4894f7ee737b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c @@ -224,7 +224,7 @@ int smu_v14_0_check_fw_version(struct smu_context *smu) if (smu->is_apu) adev->pm.fw_version = smu_version; - switch (adev->ip_versions[MP1_HWIP][0]) { + switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { case IP_VERSION(14, 0, 2): smu->smc_driver_if_version = SMU14_DRIVER_IF_VERSION_SMU_V14_0_2; break; @@ -235,7 +235,7 @@ int smu_v14_0_check_fw_version(struct smu_context *smu) break; default: dev_err(adev->dev, "smu unsupported IP version: 0x%x.\n", - adev->ip_versions[MP1_HWIP][0]); + amdgpu_ip_version(adev, MP1_HWIP, 0)); smu->smc_driver_if_version = SMU14_DRIVER_IF_VERSION_INV; break; } @@ -733,7 +733,7 @@ int smu_v14_0_gfx_off_control(struct smu_context *smu, bool enable) int ret = 0; struct amdgpu_device *adev = smu->adev; - switch (adev->ip_versions[MP1_HWIP][0]) { + switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { case IP_VERSION(14, 0, 2): case IP_VERSION(14, 0, 0): if (!(adev->pm.pp_feature & PP_GFXOFF_MASK)) -- cgit v1.2.3 From b70aed8f5d7686c4343f9ae618287404fa5a703e Mon Sep 17 00:00:00 2001 From: Saleemkhan Jamadar Date: Tue, 28 Nov 2023 17:02:06 +0530 Subject: drm/amdgpu/jpeg: configure doorbell for each playback Doorbell is configured during start of each playback. v1 - add comment for the doorbell programming change Signed-off-by: Saleemkhan Jamadar Acked-by: Leo Liu Reviewed-by: Veerabadhran Gopalakrishnan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c index 9df011323d4b..6ede85b28cc8 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c @@ -155,13 +155,6 @@ static int jpeg_v4_0_5_hw_init(void *handle) struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; int r; - adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, - (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0); - - WREG32_SOC15(VCN, 0, regVCN_JPEG_DB_CTRL, - ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT | - VCN_JPEG_DB_CTRL__EN_MASK); - r = amdgpu_ring_test_helper(ring); if (r) return r; @@ -336,6 +329,14 @@ static int jpeg_v4_0_5_start(struct amdgpu_device *adev) if (adev->pm.dpm_enabled) amdgpu_dpm_enable_jpeg(adev, true); + /* doorbell programming is done for every playback */ + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0); + + WREG32_SOC15(VCN, 0, regVCN_JPEG_DB_CTRL, + ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT | + VCN_JPEG_DB_CTRL__EN_MASK); + /* disable power gating */ r = jpeg_v4_0_5_disable_static_power_gating(adev); if (r) -- cgit v1.2.3 From 31e6af1ff77533df2e8e006974a9b57adece0488 Mon Sep 17 00:00:00 2001 From: Ma Jun Date: Fri, 3 Nov 2023 13:41:42 +0800 Subject: drm/amd/pm: Remove redundant function members of pptable_funcs Remove redundant functions members of pptable_funcs and change the function type as static because they are not called by other files. Signed-off-by: Ma Jun Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h | 4 --- drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c | 2 -- drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c | 2 -- .../drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 2 -- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 41 +++++++++++----------- .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 2 -- .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 2 -- 7 files changed, 20 insertions(+), 35 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h index 48f5926d8153..5e7b8f29fecc 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h @@ -212,10 +212,6 @@ int smu_v13_0_get_max_sustainable_clocks_by_dc(struct smu_context *smu, bool smu_v13_0_baco_is_support(struct smu_context *smu); -enum smu_baco_state smu_v13_0_baco_get_state(struct smu_context *smu); - -int smu_v13_0_baco_set_state(struct smu_context *smu, enum smu_baco_state state); - int smu_v13_0_baco_enter(struct smu_context *smu); int smu_v13_0_baco_exit(struct smu_context *smu); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c index 2cb6b68222ba..4cd43bbec910 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c @@ -2407,8 +2407,6 @@ static const struct pptable_funcs arcturus_ppt_funcs = { .set_azalia_d3_pme = smu_v11_0_set_azalia_d3_pme, .get_max_sustainable_clocks_by_dc = smu_v11_0_get_max_sustainable_clocks_by_dc, .baco_is_support = smu_v11_0_baco_is_support, - .baco_get_state = smu_v11_0_baco_get_state, - .baco_set_state = smu_v11_0_baco_set_state, .baco_enter = smu_v11_0_baco_enter, .baco_exit = smu_v11_0_baco_exit, .get_dpm_ultimate_freq = smu_v11_0_get_dpm_ultimate_freq, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c index a38233cc5b7f..8d1d29ffb0f1 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c @@ -3537,8 +3537,6 @@ static const struct pptable_funcs navi10_ppt_funcs = { .set_azalia_d3_pme = smu_v11_0_set_azalia_d3_pme, .get_max_sustainable_clocks_by_dc = smu_v11_0_get_max_sustainable_clocks_by_dc, .baco_is_support = smu_v11_0_baco_is_support, - .baco_get_state = smu_v11_0_baco_get_state, - .baco_set_state = smu_v11_0_baco_set_state, .baco_enter = navi10_baco_enter, .baco_exit = navi10_baco_exit, .get_dpm_ultimate_freq = smu_v11_0_get_dpm_ultimate_freq, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 1de9f8b5cc5f..21fc033528fa 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -4428,8 +4428,6 @@ static const struct pptable_funcs sienna_cichlid_ppt_funcs = { .set_azalia_d3_pme = smu_v11_0_set_azalia_d3_pme, .get_max_sustainable_clocks_by_dc = smu_v11_0_get_max_sustainable_clocks_by_dc, .baco_is_support = smu_v11_0_baco_is_support, - .baco_get_state = smu_v11_0_baco_get_state, - .baco_set_state = smu_v11_0_baco_set_state, .baco_enter = sienna_cichlid_baco_enter, .baco_exit = sienna_cichlid_baco_exit, .mode1_reset_is_support = sienna_cichlid_is_mode1_reset_supported, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index 86fc7273d588..2877e5da5b5a 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -2221,33 +2221,14 @@ static int smu_v13_0_baco_set_armd3_sequence(struct smu_context *smu, return 0; } -bool smu_v13_0_baco_is_support(struct smu_context *smu) -{ - struct smu_baco_context *smu_baco = &smu->smu_baco; - - if (amdgpu_sriov_vf(smu->adev) || - !smu_baco->platform_support) - return false; - - /* return true if ASIC is in BACO state already */ - if (smu_v13_0_baco_get_state(smu) == SMU_BACO_STATE_ENTER) - return true; - - if (smu_cmn_feature_is_supported(smu, SMU_FEATURE_BACO_BIT) && - !smu_cmn_feature_is_enabled(smu, SMU_FEATURE_BACO_BIT)) - return false; - - return true; -} - -enum smu_baco_state smu_v13_0_baco_get_state(struct smu_context *smu) +static enum smu_baco_state smu_v13_0_baco_get_state(struct smu_context *smu) { struct smu_baco_context *smu_baco = &smu->smu_baco; return smu_baco->state; } -int smu_v13_0_baco_set_state(struct smu_context *smu, +static int smu_v13_0_baco_set_state(struct smu_context *smu, enum smu_baco_state state) { struct smu_baco_context *smu_baco = &smu->smu_baco; @@ -2281,6 +2262,24 @@ int smu_v13_0_baco_set_state(struct smu_context *smu, return ret; } +bool smu_v13_0_baco_is_support(struct smu_context *smu) +{ + struct smu_baco_context *smu_baco = &smu->smu_baco; + + if (amdgpu_sriov_vf(smu->adev) || !smu_baco->platform_support) + return false; + + /* return true if ASIC is in BACO state already */ + if (smu_v13_0_baco_get_state(smu) == SMU_BACO_STATE_ENTER) + return true; + + if (smu_cmn_feature_is_supported(smu, SMU_FEATURE_BACO_BIT) && + !smu_cmn_feature_is_enabled(smu, SMU_FEATURE_BACO_BIT)) + return false; + + return true; +} + int smu_v13_0_baco_enter(struct smu_context *smu) { struct smu_baco_context *smu_baco = &smu->smu_baco; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index a4debd7ab8bd..2d25a9e47651 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -3003,8 +3003,6 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .deep_sleep_control = smu_v13_0_deep_sleep_control, .gfx_ulv_control = smu_v13_0_gfx_ulv_control, .baco_is_support = smu_v13_0_baco_is_support, - .baco_get_state = smu_v13_0_baco_get_state, - .baco_set_state = smu_v13_0_baco_set_state, .baco_enter = smu_v13_0_baco_enter, .baco_exit = smu_v13_0_baco_exit, .mode1_reset_is_support = smu_v13_0_0_is_mode1_reset_supported, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index f5596f031d00..2909ec06b1cb 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -2594,8 +2594,6 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = { .get_pp_feature_mask = smu_cmn_get_pp_feature_mask, .set_pp_feature_mask = smu_cmn_set_pp_feature_mask, .baco_is_support = smu_v13_0_baco_is_support, - .baco_get_state = smu_v13_0_baco_get_state, - .baco_set_state = smu_v13_0_baco_set_state, .baco_enter = smu_v13_0_baco_enter, .baco_exit = smu_v13_0_baco_exit, .mode1_reset_is_support = smu_v13_0_7_is_mode1_reset_supported, -- cgit v1.2.3 From 9a10bd0df618f500ca526cf99f42504900020c2c Mon Sep 17 00:00:00 2001 From: Joshua Aberback Date: Wed, 6 Dec 2023 14:52:22 -0500 Subject: drm/amd/display: Remove minor revision 5 until proper parser is ready Reviewed-by: Dillon Varone Acked-by: Aurabindo Pillai Signed-off-by: Joshua Aberback Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index aef964f1bcbe..875a064bb9a5 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -1747,7 +1747,6 @@ static enum bp_result bios_parser_get_firmware_info( result = get_firmware_info_v3_2(bp, info); break; case 4: - case 5: result = get_firmware_info_v3_4(bp, info); break; default: @@ -2398,7 +2397,6 @@ static enum bp_result get_vram_info_v30( return result; } - /* * get_integrated_info_v11 * -- cgit v1.2.3 From 7f9b4fb450a65a46df3d454a53836cad7e1c79c6 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Wed, 6 Dec 2023 14:52:23 -0500 Subject: drm/amd/display: Use explicit size for types in DCCG's struct dp_dto_params Reviewed-by: Alvin Lee Acked-by: Aurabindo Pillai Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h index 6b44557fcb1a..b9a06bf84cc9 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h @@ -59,8 +59,8 @@ enum dentist_dispclk_change_mode { struct dp_dto_params { int otg_inst; enum signal_type signal; - long long pixclk_hz; - long long refclk_hz; + uint64_t pixclk_hz; + uint64_t refclk_hz; }; enum pixel_rate_div { -- cgit v1.2.3 From af68153ffe8c4f778ba9cbe1d1725a939ab94576 Mon Sep 17 00:00:00 2001 From: Ran Shi Date: Wed, 6 Dec 2023 14:52:24 -0500 Subject: drm/amd/display: allow DP40 cables to do UHBR13.5 why: With DP2.1a expansion we are allowing DP40 cables to do UHBR13.5 how: Assume UHBR10 means UHBR13.5 also for unknown cable type and passive cable type. Reviewed-by: George Shen Acked-by: Aurabindo Pillai Signed-off-by: Ran Shi Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/link/protocols/link_dp_capability.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c index db87aa7b5c90..3c5334cdb3fb 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c @@ -412,12 +412,18 @@ static enum dc_link_rate get_cable_max_link_rate(struct dc_link *link) { enum dc_link_rate cable_max_link_rate = LINK_RATE_UNKNOWN; - if (link->dpcd_caps.cable_id.bits.UHBR10_20_CAPABILITY & DP_UHBR20) + if (link->dpcd_caps.cable_id.bits.UHBR10_20_CAPABILITY & DP_UHBR20) { cable_max_link_rate = LINK_RATE_UHBR20; - else if (link->dpcd_caps.cable_id.bits.UHBR13_5_CAPABILITY) + } else if (link->dpcd_caps.cable_id.bits.UHBR13_5_CAPABILITY) { cable_max_link_rate = LINK_RATE_UHBR13_5; - else if (link->dpcd_caps.cable_id.bits.UHBR10_20_CAPABILITY & DP_UHBR10) - cable_max_link_rate = LINK_RATE_UHBR10; + } else if (link->dpcd_caps.cable_id.bits.UHBR10_20_CAPABILITY & DP_UHBR10) { + // allow DP40 cables to do UHBR13.5 for passive or unknown cable type + if (link->dpcd_caps.cable_id.bits.CABLE_TYPE < 2) { + cable_max_link_rate = LINK_RATE_UHBR13_5; + } else { + cable_max_link_rate = LINK_RATE_UHBR10; + } + } return cable_max_link_rate; } -- cgit v1.2.3 From d0f639c5869399bf6dde4d694d5f8c0ab8c0ec46 Mon Sep 17 00:00:00 2001 From: Taimur Hassan Date: Wed, 6 Dec 2023 14:52:25 -0500 Subject: drm/amd/display: Revert "Fix conversions between bytes and KB" [Why & How] HostVMMinPageSize is expected to be in KB according to spec, the checks later down the line reflect this as well. Reviewed-by: Nicholas Kazlauskas Acked-by: Aurabindo Pillai Signed-off-by: Taimur Hassan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c index 4d1336e5afc2..180f8a98a361 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c @@ -6329,7 +6329,7 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib) mode_lib->ms.NoOfDPPThisState, mode_lib->ms.dpte_group_bytes, s->HostVMInefficiencyFactor, - mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, + mode_lib->ms.soc.hostvm_min_page_size_kbytes, mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels); s->NextMaxVStartup = s->MaxVStartupAllPlanes[j]; @@ -6542,7 +6542,7 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib) mode_lib->ms.cache_display_cfg.plane.HostVMEnable, mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels, mode_lib->ms.cache_display_cfg.plane.GPUVMEnable, - mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, + mode_lib->ms.soc.hostvm_min_page_size_kbytes, mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k], mode_lib->ms.MetaRowBytes[j][k], mode_lib->ms.DPTEBytesPerRow[j][k], @@ -7687,7 +7687,7 @@ dml_bool_t dml_core_mode_support(struct display_mode_lib_st *mode_lib) CalculateVMRowAndSwath_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; CalculateVMRowAndSwath_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; CalculateVMRowAndSwath_params->GPUVMMinPageSizeKBytes = mode_lib->ms.cache_display_cfg.plane.GPUVMMinPageSizeKBytes; - CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; + CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes; CalculateVMRowAndSwath_params->PTEBufferModeOverrideEn = mode_lib->ms.cache_display_cfg.plane.PTEBufferModeOverrideEn; CalculateVMRowAndSwath_params->PTEBufferModeOverrideVal = mode_lib->ms.cache_display_cfg.plane.PTEBufferMode; CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = mode_lib->ms.PTEBufferSizeNotExceededPerState; @@ -7957,7 +7957,7 @@ dml_bool_t dml_core_mode_support(struct display_mode_lib_st *mode_lib) UseMinimumDCFCLK_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; UseMinimumDCFCLK_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable; UseMinimumDCFCLK_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; - UseMinimumDCFCLK_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; + UseMinimumDCFCLK_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes; UseMinimumDCFCLK_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; UseMinimumDCFCLK_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled; UseMinimumDCFCLK_params->ImmediateFlipRequirement = s->ImmediateFlipRequiredFinal; @@ -8699,7 +8699,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc CalculateVMRowAndSwath_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; CalculateVMRowAndSwath_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; CalculateVMRowAndSwath_params->GPUVMMinPageSizeKBytes = mode_lib->ms.cache_display_cfg.plane.GPUVMMinPageSizeKBytes; - CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; + CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes; CalculateVMRowAndSwath_params->PTEBufferModeOverrideEn = mode_lib->ms.cache_display_cfg.plane.PTEBufferModeOverrideEn; CalculateVMRowAndSwath_params->PTEBufferModeOverrideVal = mode_lib->ms.cache_display_cfg.plane.PTEBufferMode; CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = s->dummy_boolean_array[0]; @@ -8805,7 +8805,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc mode_lib->ms.cache_display_cfg.hw.DPPPerSurface, locals->dpte_group_bytes, s->HostVMInefficiencyFactor, - mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, + mode_lib->ms.soc.hostvm_min_page_size_kbytes, mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels); locals->TCalc = 24.0 / locals->DCFCLKDeepSleep; @@ -8995,7 +8995,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc CalculatePrefetchSchedule_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable; CalculatePrefetchSchedule_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable; CalculatePrefetchSchedule_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; - CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; + CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes; CalculatePrefetchSchedule_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k]; CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled; CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired[k]; @@ -9240,7 +9240,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc mode_lib->ms.cache_display_cfg.plane.HostVMEnable, mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels, mode_lib->ms.cache_display_cfg.plane.GPUVMEnable, - mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, + mode_lib->ms.soc.hostvm_min_page_size_kbytes, locals->PDEAndMetaPTEBytesFrame[k], locals->MetaRowByte[k], locals->PixelPTEBytesPerRow[k], -- cgit v1.2.3 From 11edbb4497504540f5e73a8aabf1254b31cf0a82 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Wed, 6 Dec 2023 14:52:26 -0500 Subject: drm/amd/display: trivial comment change FP guard is valid for all recent asics, not just RV, so fix the comment. Reviewed-by: Chaitanya Dhere Acked-by: Aurabindo Pillai Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c index 3e73c4e59d40..7dacb0f82d29 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c @@ -368,7 +368,7 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p } break; -#endif /* CONFIG_DRM_AMD_DC_FP - Family RV */ +#endif /* CONFIG_DRM_AMD_DC_FP */ default: ASSERT(0); /* Unknown Asic */ break; -- cgit v1.2.3 From 2170fb03be28ad7807ea460101a60689c3f383e4 Mon Sep 17 00:00:00 2001 From: Michael Strauss Date: Wed, 6 Dec 2023 14:52:27 -0500 Subject: drm/amd/display: Revert DP2 MST hub triple display fix [WHY] Introduces regression with DP2 native displays [HOW] Revert the change Reviewed-by: Charlene Liu Acked-by: Aurabindo Pillai Signed-off-by: Michael Strauss Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c index 38fa7441df51..814dbdcf9a78 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c @@ -160,13 +160,6 @@ bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx) if (pipe_ctx->stream == NULL) return false; - /* Count MST hubs once by treating only 1st remote sink in topology as an encoder */ - if (pipe_ctx->stream->link && pipe_ctx->stream->link->remote_sinks[0]) { - return (pipe_ctx->stream_res.hpo_dp_stream_enc && - pipe_ctx->link_res.hpo_dp_link_enc && - dc_is_dp_signal(pipe_ctx->stream->signal) && - (pipe_ctx->stream->link->remote_sinks[0] == pipe_ctx->stream->sink)); - } return (pipe_ctx->stream_res.hpo_dp_stream_enc && pipe_ctx->link_res.hpo_dp_link_enc && -- cgit v1.2.3 From c1afbb715e33a2b208c27a989c5f929029ffe7d3 Mon Sep 17 00:00:00 2001 From: Fangzhi Zuo Date: Wed, 6 Dec 2023 14:52:28 -0500 Subject: drm/amd/display: Populate dtbclk from bounding box dtbclk is unavaliable from pmfw. Try to grab the value from bounding box Reviewed-by: Charlene Liu Acked-by: Aurabindo Pillai Signed-off-by: Fangzhi Zuo Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c | 14 +++++++++----- .../gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c | 5 +++-- 2 files changed, 12 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c index e9d88f52717b..3d12dabd39e4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c @@ -124,7 +124,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { .phyclk_mhz = 600.0, .phyclk_d18_mhz = 667.0, .dscclk_mhz = 186.0, - .dtbclk_mhz = 625.0, + .dtbclk_mhz = 600.0, }, { .state = 1, @@ -133,7 +133,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { .phyclk_mhz = 810.0, .phyclk_d18_mhz = 667.0, .dscclk_mhz = 209.0, - .dtbclk_mhz = 625.0, + .dtbclk_mhz = 600.0, }, { .state = 2, @@ -142,7 +142,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { .phyclk_mhz = 810.0, .phyclk_d18_mhz = 667.0, .dscclk_mhz = 209.0, - .dtbclk_mhz = 625.0, + .dtbclk_mhz = 600.0, }, { .state = 3, @@ -151,7 +151,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { .phyclk_mhz = 810.0, .phyclk_d18_mhz = 667.0, .dscclk_mhz = 371.0, - .dtbclk_mhz = 625.0, + .dtbclk_mhz = 600.0, }, { .state = 4, @@ -160,7 +160,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { .phyclk_mhz = 810.0, .phyclk_d18_mhz = 667.0, .dscclk_mhz = 417.0, - .dtbclk_mhz = 625.0, + .dtbclk_mhz = 600.0, }, }, .num_states = 5, @@ -367,6 +367,8 @@ void dcn35_update_bw_bounding_box_fpu(struct dc *dc, clock_limits[i].socclk_mhz; dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].memclk_mhz = clk_table->entries[i].memclk_mhz * clk_table->entries[i].wck_ratio; + dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz = + clock_limits[i].dtbclk_mhz; dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dcfclk_levels = clk_table->num_entries; dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_fclk_levels = @@ -379,6 +381,8 @@ void dcn35_update_bw_bounding_box_fpu(struct dc *dc, clk_table->num_entries; dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_memclk_levels = clk_table->num_entries; + dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dtbclk_levels = + clk_table->num_entries; } } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index 255af7875c08..279e7605a0a2 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -425,8 +425,9 @@ void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc, } for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_dtbclk_levels; i++) { - p->in_states->state_array[i].dtbclk_mhz = - dml2->config.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz; + if (dml2->config.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz > 0) + p->in_states->state_array[i].dtbclk_mhz = + dml2->config.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz; } for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_dispclk_levels; i++) { -- cgit v1.2.3 From bbc42960f9b68e548403e57b2cfd6e93e684864f Mon Sep 17 00:00:00 2001 From: Allen Date: Wed, 6 Dec 2023 14:52:29 -0500 Subject: drm/amd/display: Disable OPTC pg to match DC Hubp/dpp pg [Why] To match the hardware sequence Reviewed-by: Charlene Liu Acked-by: Aurabindo Pillai Signed-off-by: Allen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c index 13324422ff50..4eb744f1bc9f 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c @@ -717,6 +717,7 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_dcc = DCC_ENABLE, .disable_dpp_power_gate = true, .disable_hubp_power_gate = true, + .disable_optc_power_gate = true, /*should the same as above two*/ .disable_clock_gate = false, .disable_dsc_power_gate = true, .vsr_support = true, -- cgit v1.2.3 From fdb0ad2ff7c84bda30bfe3b8f90abd1f8d8788a0 Mon Sep 17 00:00:00 2001 From: Sung Joon Kim Date: Wed, 6 Dec 2023 14:52:30 -0500 Subject: drm/amd/display: Exit from idle state before accessing HW data [why & how] User interface cannot guarantee system is in idle state, so need to ensure we exit idle state before accessing any HW data. Reviewed-by: Nicholas Kazlauskas Acked-by: Aurabindo Pillai Signed-off-by: Sung Joon Kim Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c index 930fd929e93a..8c5e7f858be3 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c @@ -64,11 +64,15 @@ static void dmub_abm_init_ex(struct abm *abm, uint32_t backlight) static unsigned int dmub_abm_get_current_backlight_ex(struct abm *abm) { + dc_allow_idle_optimizations(abm->ctx->dc, false); + return dmub_abm_get_current_backlight(abm); } static unsigned int dmub_abm_get_target_backlight_ex(struct abm *abm) { + dc_allow_idle_optimizations(abm->ctx->dc, false); + return dmub_abm_get_target_backlight(abm); } -- cgit v1.2.3 From dd4e4bb28843393065eed279e869fac248d03f0f Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Wed, 6 Dec 2023 14:52:31 -0500 Subject: drm/amd/display: For prefetch mode > 0, extend prefetch if possible [Description] For mode programming we want to extend the prefetch as much as possible (up to oto, or as long as we can for equ) if we're not already applying the 60us prefetch requirement. This is to avoid intermittent underflow issues during prefetch. The prefetch extension is applied under the following scenarios: 1. We're in prefetch mode 1 (i.e. we don't support MCLK switch in blank) 2. We're using subvp or drr methods of p-state switch, in which case we we don't care if prefetch takes up more of the blanking time Mode programming typically chooses the smallest prefetch time possible (i.e. highest bandwidth during prefetch) presumably to create margin between p-states / c-states that happen in vblank and prefetch. Therefore we only apply this prefetch extension when p-state in vblank is not required (UCLK p-states take up the most vblank time). Reviewed-by: Jun Lei Acked-by: Aurabindo Pillai Signed-off-by: Alvin Lee Signed-off-by: Alex Deucher --- .../amd/display/dc/dml/dcn32/display_mode_vba_32.c | 3 ++ .../dc/dml/dcn32/display_mode_vba_util_32.c | 33 ++++++++++++++++++---- .../dc/dml/dcn32/display_mode_vba_util_32.h | 1 + 3 files changed, 31 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index cbdfb762c10c..6c84b0fa40f4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -813,6 +813,8 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman (v->DRAMSpeedPerState[mode_lib->vba.VoltageLevel] <= MEM_STROBE_FREQ_MHZ || v->DCFCLKPerState[mode_lib->vba.VoltageLevel] <= DCFCLK_FREQ_EXTRA_PREFETCH_REQ_MHZ) ? mode_lib->vba.ip.min_prefetch_in_strobe_us : 0, + mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] > 0 || mode_lib->vba.DRAMClockChangeRequirementFinal == false, + /* Output */ &v->DSTXAfterScaler[k], &v->DSTYAfterScaler[k], @@ -3317,6 +3319,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l v->SwathHeightCThisState[k], v->TWait, (v->DRAMSpeedPerState[i] <= MEM_STROBE_FREQ_MHZ || v->DCFCLKState[i][j] <= DCFCLK_FREQ_EXTRA_PREFETCH_REQ_MHZ) ? mode_lib->vba.ip.min_prefetch_in_strobe_us : 0, + mode_lib->vba.PrefetchModePerState[i][j] > 0 || mode_lib->vba.DRAMClockChangeRequirementFinal == false, /* Output */ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.DSTXAfterScaler[k], diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c index d940dfa5ae43..80fccd4999a5 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -3423,6 +3423,7 @@ bool dml32_CalculatePrefetchSchedule( unsigned int SwathHeightC, double TWait, double TPreReq, + bool ExtendPrefetchIfPossible, /* Output */ double *DSTXAfterScaler, double *DSTYAfterScaler, @@ -3892,12 +3893,32 @@ bool dml32_CalculatePrefetchSchedule( /* Clamp to oto for bandwidth calculation */ LinesForPrefetchBandwidth = dst_y_prefetch_oto; } else { - *DestinationLinesForPrefetch = dst_y_prefetch_equ; - TimeForFetchingMetaPTE = Tvm_equ; - TimeForFetchingRowInVBlank = Tr0_equ; - *PrefetchBandwidth = prefetch_bw_equ; - /* Clamp to equ for bandwidth calculation */ - LinesForPrefetchBandwidth = dst_y_prefetch_equ; + /* For mode programming we want to extend the prefetch as much as possible + * (up to oto, or as long as we can for equ) if we're not already applying + * the 60us prefetch requirement. This is to avoid intermittent underflow + * issues during prefetch. + * + * The prefetch extension is applied under the following scenarios: + * 1. We're in prefetch mode > 0 (i.e. we don't support MCLK switch in blank) + * 2. We're using subvp or drr methods of p-state switch, in which case we + * we don't care if prefetch takes up more of the blanking time + * + * Mode programming typically chooses the smallest prefetch time possible + * (i.e. highest bandwidth during prefetch) presumably to create margin between + * p-states / c-states that happen in vblank and prefetch. Therefore we only + * apply this prefetch extension when p-state in vblank is not required (UCLK + * p-states take up the most vblank time). + */ + if (ExtendPrefetchIfPossible && TPreReq == 0 && VStartup < MaxVStartup) { + MyError = true; + } else { + *DestinationLinesForPrefetch = dst_y_prefetch_equ; + TimeForFetchingMetaPTE = Tvm_equ; + TimeForFetchingRowInVBlank = Tr0_equ; + *PrefetchBandwidth = prefetch_bw_equ; + /* Clamp to equ for bandwidth calculation */ + LinesForPrefetchBandwidth = dst_y_prefetch_equ; + } } *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h index 592d174df6c6..5d34735df83d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h @@ -747,6 +747,7 @@ bool dml32_CalculatePrefetchSchedule( unsigned int SwathHeightC, double TWait, double TPreReq, + bool ExtendPrefetchIfPossible, /* Output */ double *DSTXAfterScaler, double *DSTYAfterScaler, -- cgit v1.2.3 From 9a902a9073c287353e25913c0761bfed49d75a88 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Wed, 6 Dec 2023 14:52:32 -0500 Subject: drm/amd/display: Force p-state disallow if leaving no plane config [Description] - When we're in a no plane config, DCN is always asserting P-State allow - This creates a scenario where the P-State blackout can start just as VUPDATE takes place and transitions the DCN config to a one where one or more HUBP's are active which can result in underflow - To fix this issue, force p-state disallow and unforce after the transition from no planes case -> one or more planes active Reviewed-by: Samson Tam Acked-by: Aurabindo Pillai Signed-off-by: Alvin Lee Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index 608221b0dd5d..c3c83178eb1e 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -1877,6 +1877,8 @@ void dcn20_program_front_end_for_ctx( int i; struct dce_hwseq *hws = dc->hwseq; DC_LOGGER_INIT(dc->ctx->logger); + unsigned int prev_hubp_count = 0; + unsigned int hubp_count = 0; if (resource_is_pipe_topology_changed(dc->current_state, context)) resource_log_pipe_topology_update(dc, context); @@ -1894,6 +1896,20 @@ void dcn20_program_front_end_for_ctx( } } + for (i = 0; i < dc->res_pool->pipe_count; i++) { + if (dc->current_state->res_ctx.pipe_ctx[i].plane_state) + prev_hubp_count++; + if (context->res_ctx.pipe_ctx[i].plane_state) + hubp_count++; + } + + if (prev_hubp_count == 0 && hubp_count > 0) { + if (dc->res_pool->hubbub->funcs->force_pstate_change_control) + dc->res_pool->hubbub->funcs->force_pstate_change_control( + dc->res_pool->hubbub, true, false); + udelay(500); + } + /* Set pipe update flags and lock pipes */ for (i = 0; i < dc->res_pool->pipe_count; i++) dcn20_detect_pipe_changes(&dc->current_state->res_ctx.pipe_ctx[i], @@ -2039,6 +2055,10 @@ void dcn20_post_unlock_program_front_end( } } + if (dc->res_pool->hubbub->funcs->force_pstate_change_control) + dc->res_pool->hubbub->funcs->force_pstate_change_control( + dc->res_pool->hubbub, false, false); + for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; -- cgit v1.2.3 From 7253c36b1febe7e76be3da26fbf875978b37e92c Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Wed, 6 Dec 2023 14:52:33 -0500 Subject: drm/amd/display: fix HW block PG sequence [why] Power up and power down has reverted programming order. also make sure disable root clock last. Reviewed-by: Muhammad Ahmed Acked-by: Aurabindo Pillai Signed-off-by: Charlene Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn35/dcn35_init.c | 3 +- .../drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c | 134 +++++++++++++++------ .../drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h | 6 +- drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h | 6 +- 4 files changed, 105 insertions(+), 44 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_init.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_init.c index 296bf3a38cb9..d594905eb246 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_init.c @@ -118,7 +118,8 @@ static const struct hw_sequencer_funcs dcn35_funcs = { .update_dsc_pg = dcn32_update_dsc_pg, .calc_blocks_to_gate = dcn35_calc_blocks_to_gate, .calc_blocks_to_ungate = dcn35_calc_blocks_to_ungate, - .block_power_control = dcn35_block_power_control, + .hw_block_power_up = dcn35_hw_block_power_up, + .hw_block_power_down = dcn35_hw_block_power_down, .root_clock_control = dcn35_root_clock_control, .set_idle_state = dcn35_set_idle_state, .get_idle_state = dcn35_get_idle_state diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index 09498aa92096..9262d3336182 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -1123,9 +1123,23 @@ void dcn35_calc_blocks_to_ungate(struct dc *dc, struct dc_state *context, update_state->pg_res_update[PG_HPO] = true; } - -void dcn35_block_power_control(struct dc *dc, - struct pg_block_update *update_state, bool power_on) +/** + * power down sequence + * ONO Region 3, DCPG 25: hpo - SKIPPED + * ONO Region 4, DCPG 0: dchubp0, dpp0 + * ONO Region 6, DCPG 1: dchubp1, dpp1 + * ONO Region 8, DCPG 2: dchubp2, dpp2 + * ONO Region 10, DCPG 3: dchubp3, dpp3 + * ONO Region 1, DCPG 23: dchubbub dchvm dchubbubmem - SKIPPED. PMFW will pwr dwn at IPS2 entry + * ONO Region 5, DCPG 16: dsc0 + * ONO Region 7, DCPG 17: dsc1 + * ONO Region 9, DCPG 18: dsc2 + * ONO Region 11, DCPG 19: dsc3 + * ONO Region 2, DCPG 24: mpc opp optc dwb + * ONO Region 0, DCPG 22: dccg dio dcio - SKIPPED. will be pwr dwn after lono timer is armed +*/ +void dcn35_hw_block_power_down(struct dc *dc, + struct pg_block_update *update_state) { int i = 0; struct pg_cntl *pg_cntl = dc->res_pool->pg_cntl; @@ -1134,66 +1148,99 @@ void dcn35_block_power_control(struct dc *dc, return; if (dc->debug.ignore_pg) return; + if (update_state->pg_res_update[PG_HPO]) { if (pg_cntl->funcs->hpo_pg_control) - pg_cntl->funcs->hpo_pg_control(pg_cntl, power_on); + pg_cntl->funcs->hpo_pg_control(pg_cntl, false); } for (i = 0; i < dc->res_pool->pipe_count; i++) { if (update_state->pg_pipe_res_update[PG_HUBP][i] && update_state->pg_pipe_res_update[PG_DPP][i]) { if (pg_cntl->funcs->hubp_dpp_pg_control) - pg_cntl->funcs->hubp_dpp_pg_control(pg_cntl, i, power_on); + pg_cntl->funcs->hubp_dpp_pg_control(pg_cntl, i, false); } - + } + for (i = 0; i < dc->res_pool->res_cap->num_dsc; i++) if (update_state->pg_pipe_res_update[PG_DSC][i]) { if (pg_cntl->funcs->dsc_pg_control) - pg_cntl->funcs->dsc_pg_control(pg_cntl, i, power_on); - } - - if (update_state->pg_pipe_res_update[PG_MPCC][i]) { - if (pg_cntl->funcs->mpcc_pg_control) - pg_cntl->funcs->mpcc_pg_control(pg_cntl, i, power_on); - } - - if (update_state->pg_pipe_res_update[PG_OPP][i]) { - if (pg_cntl->funcs->opp_pg_control) - pg_cntl->funcs->opp_pg_control(pg_cntl, i, power_on); - } - - if (update_state->pg_pipe_res_update[PG_OPTC][i]) { - if (pg_cntl->funcs->optc_pg_control) - pg_cntl->funcs->optc_pg_control(pg_cntl, i, power_on); + pg_cntl->funcs->dsc_pg_control(pg_cntl, i, false); } - } - if (update_state->pg_res_update[PG_DWB]) { - if (pg_cntl->funcs->dwb_pg_control) - pg_cntl->funcs->dwb_pg_control(pg_cntl, power_on); - } /*this will need all the clients to unregister optc interruts let dmubfw handle this*/ if (pg_cntl->funcs->plane_otg_pg_control) - pg_cntl->funcs->plane_otg_pg_control(pg_cntl, power_on); + pg_cntl->funcs->plane_otg_pg_control(pg_cntl, false); -} + //domain22, 23, 25 currently always on. -void dcn35_root_clock_control(struct dc *dc, - struct pg_block_update *update_state, bool power_on) +} +/** + * power up sequence + * ONO Region 0, DCPG 22: dccg dio dcio - SKIPPED + * ONO Region 2, DCPG 24: mpc opp optc dwb + * ONO Region 5, DCPG 16: dsc0 + * ONO Region 7, DCPG 17: dsc1 + * ONO Region 9, DCPG 18: dsc2 + * ONO Region 11, DCPG 19: dsc3 + * ONO Region 1, DCPG 23: dchubbub dchvm dchubbubmem - SKIPPED. PMFW will power up at IPS2 exit + * ONO Region 4, DCPG 0: dchubp0, dpp0 + * ONO Region 6, DCPG 1: dchubp1, dpp1 + * ONO Region 8, DCPG 2: dchubp2, dpp2 + * ONO Region 10, DCPG 3: dchubp3, dpp3 + * ONO Region 3, DCPG 25: hpo - SKIPPED + */ +void dcn35_hw_block_power_up(struct dc *dc, + struct pg_block_update *update_state) { int i = 0; struct pg_cntl *pg_cntl = dc->res_pool->pg_cntl; if (!pg_cntl) return; + if (dc->debug.ignore_pg) + return; + //domain22, 23, 25 currently always on. + /*this will need all the clients to unregister optc interruts let dmubfw handle this*/ + if (pg_cntl->funcs->plane_otg_pg_control) + pg_cntl->funcs->plane_otg_pg_control(pg_cntl, true); + + for (i = 0; i < dc->res_pool->res_cap->num_dsc; i++) + if (update_state->pg_pipe_res_update[PG_DSC][i]) { + if (pg_cntl->funcs->dsc_pg_control) + pg_cntl->funcs->dsc_pg_control(pg_cntl, i, true); + } for (i = 0; i < dc->res_pool->pipe_count; i++) { if (update_state->pg_pipe_res_update[PG_HUBP][i] && update_state->pg_pipe_res_update[PG_DPP][i]) { - if (dc->hwseq->funcs.dpp_root_clock_control) - dc->hwseq->funcs.dpp_root_clock_control(dc->hwseq, i, power_on); + if (pg_cntl->funcs->hubp_dpp_pg_control) + pg_cntl->funcs->hubp_dpp_pg_control(pg_cntl, i, true); } + } + if (update_state->pg_res_update[PG_HPO]) { + if (pg_cntl->funcs->hpo_pg_control) + pg_cntl->funcs->hpo_pg_control(pg_cntl, true); + } +} +void dcn35_root_clock_control(struct dc *dc, + struct pg_block_update *update_state, bool power_on) +{ + int i = 0; + struct pg_cntl *pg_cntl = dc->res_pool->pg_cntl; + if (!pg_cntl) + return; + /*enable root clock first when power up*/ + if (power_on) + for (i = 0; i < dc->res_pool->pipe_count; i++) { + if (update_state->pg_pipe_res_update[PG_HUBP][i] && + update_state->pg_pipe_res_update[PG_DPP][i]) { + if (dc->hwseq->funcs.dpp_root_clock_control) + dc->hwseq->funcs.dpp_root_clock_control(dc->hwseq, i, power_on); + } + } + for (i = 0; i < dc->res_pool->res_cap->num_dsc; i++) { if (update_state->pg_pipe_res_update[PG_DSC][i]) { if (power_on) { if (dc->res_pool->dccg->funcs->enable_dsc) @@ -1204,6 +1251,15 @@ void dcn35_root_clock_control(struct dc *dc, } } } + /*disable root clock first when power down*/ + if (!power_on) + for (i = 0; i < dc->res_pool->pipe_count; i++) { + if (update_state->pg_pipe_res_update[PG_HUBP][i] && + update_state->pg_pipe_res_update[PG_DPP][i]) { + if (dc->hwseq->funcs.dpp_root_clock_control) + dc->hwseq->funcs.dpp_root_clock_control(dc->hwseq, i, power_on); + } + } } void dcn35_prepare_bandwidth( @@ -1217,9 +1273,9 @@ void dcn35_prepare_bandwidth( if (dc->hwss.root_clock_control) dc->hwss.root_clock_control(dc, &pg_update_state, true); - - if (dc->hwss.block_power_control) - dc->hwss.block_power_control(dc, &pg_update_state, true); + /*power up required HW block*/ + if (dc->hwss.hw_block_power_up) + dc->hwss.hw_block_power_up(dc, &pg_update_state); } dcn20_prepare_bandwidth(dc, context); @@ -1235,9 +1291,9 @@ void dcn35_optimize_bandwidth( if (dc->hwss.calc_blocks_to_gate) { dc->hwss.calc_blocks_to_gate(dc, context, &pg_update_state); - - if (dc->hwss.block_power_control) - dc->hwss.block_power_control(dc, &pg_update_state, false); + /*try to power down unused block*/ + if (dc->hwss.hw_block_power_down) + dc->hwss.hw_block_power_down(dc, &pg_update_state); if (dc->hwss.root_clock_control) dc->hwss.root_clock_control(dc, &pg_update_state, false); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h index 0dff10d179b8..3837038dc4a8 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h @@ -63,8 +63,10 @@ void dcn35_calc_blocks_to_gate(struct dc *dc, struct dc_state *context, struct pg_block_update *update_state); void dcn35_calc_blocks_to_ungate(struct dc *dc, struct dc_state *context, struct pg_block_update *update_state); -void dcn35_block_power_control(struct dc *dc, - struct pg_block_update *update_state, bool power_on); +void dcn35_hw_block_power_up(struct dc *dc, + struct pg_block_update *update_state); +void dcn35_hw_block_power_down(struct dc *dc, + struct pg_block_update *update_state); void dcn35_root_clock_control(struct dc *dc, struct pg_block_update *update_state, bool power_on); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h index 452680fe9aab..45dc6d4e9562 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h @@ -414,8 +414,10 @@ struct hw_sequencer_funcs { struct pg_block_update *update_state); void (*calc_blocks_to_ungate)(struct dc *dc, struct dc_state *context, struct pg_block_update *update_state); - void (*block_power_control)(struct dc *dc, - struct pg_block_update *update_state, bool power_on); + void (*hw_block_power_up)(struct dc *dc, + struct pg_block_update *update_state); + void (*hw_block_power_down)(struct dc *dc, + struct pg_block_update *update_state); void (*root_clock_control)(struct dc *dc, struct pg_block_update *update_state, bool power_on); void (*set_idle_state)(const struct dc *dc, bool allow_idle); -- cgit v1.2.3 From bcbd0787f8be31b17125d05cfaf71724774b9964 Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Wed, 6 Dec 2023 14:52:34 -0500 Subject: drm/amd/display: 3.2.264 Summary: Bug fixes for: * DCN35 power gating * P-state change, & prefetch logic * ABM * DP 2.1 Acked-by: Aurabindo Pillai Signed-off-by: Aric Cyr Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 61d08349a0d7..2c85f8ee682f 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -49,7 +49,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.263" +#define DC_VER "3.2.264" #define MAX_SURFACES 3 #define MAX_PLANES 6 -- cgit v1.2.3 From 571c2fa26aa654946447c282a09d40a56c7ff128 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 19 Jun 2023 15:04:24 -0500 Subject: drm/amd/display: Disable PSR-SU on Parade 0803 TCON again When screen brightness is rapidly changed and PSR-SU is enabled the display hangs on panels with this TCON even on the latest DCN 3.1.4 microcode (0x8002a81 at this time). This was disabled previously as commit 072030b17830 ("drm/amd: Disable PSR-SU on Parade 0803 TCON") but reverted as commit 1e66a17ce546 ("Revert "drm/amd: Disable PSR-SU on Parade 0803 TCON"") in favor of testing for a new enough microcode (commit cd2e31a9ab93 ("drm/amd/display: Set minimum requirement for using PSR-SU on Phoenix")). As hangs are still happening specifically with this TCON, disable PSR-SU again for it until it can be root caused. Cc: stable@vger.kernel.org Cc: aaron.ma@canonical.com Cc: binli@gnome.org Cc: Marc Rossi Cc: Hamza Mahfooz Signed-off-by: Mario Limonciello Link: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/2046131 Acked-by: Alex Deucher Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/modules/power/power_helpers.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c index a522a7c02911..1675314a3ff2 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c @@ -839,6 +839,8 @@ bool is_psr_su_specific_panel(struct dc_link *link) ((dpcd_caps->sink_dev_id_str[1] == 0x08 && dpcd_caps->sink_dev_id_str[0] == 0x08) || (dpcd_caps->sink_dev_id_str[1] == 0x08 && dpcd_caps->sink_dev_id_str[0] == 0x07))) isPSRSUSupported = false; + else if (dpcd_caps->sink_dev_id_str[1] == 0x08 && dpcd_caps->sink_dev_id_str[0] == 0x03) + isPSRSUSupported = false; else if (dpcd_caps->psr_info.force_psrsu_cap == 0x1) isPSRSUSupported = true; } -- cgit v1.2.3 From d5a348d96e4e2b924fa83e729f8791c03a4f8e24 Mon Sep 17 00:00:00 2001 From: Joshua Ashton Date: Thu, 16 Nov 2023 18:57:45 -0100 Subject: drm/amd/display: add plane degamma TF driver-specific property Allow userspace to tell the kernel driver the input space and, therefore, uses correct predefined transfer function (TF) to go from encoded values to linear values. v2: - rename TF enum prefix from DRM_ to AMDGPU_ (Harry) - remove HLG TF Reviewed-by: Harry Wentland Signed-off-by: Joshua Ashton Co-developed-by: Melissa Wen Signed-off-by: Melissa Wen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | 5 +++++ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 19 +++++++++++++++++++ .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 21 +++++++++++++++++++++ .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c | 19 +++++++++++++++++-- 4 files changed, 62 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index a63b01aa230d..a1b7ae5c2149 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -355,6 +355,11 @@ struct amdgpu_mode_info { * size of degamma LUT as supported by the driver (read-only). */ struct drm_property *plane_degamma_lut_size_property; + /** + * @plane_degamma_tf_property: Plane pre-defined transfer function to + * to go from scanout/encoded values to linear values. + */ + struct drm_property *plane_degamma_tf_property; }; #define AMDGPU_MAX_BL_LEVEL 0xFF diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 446a3962d9c9..41bf0cf56433 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -724,6 +724,18 @@ struct amdgpu_dm_wb_connector { extern const struct amdgpu_ip_block_version dm_ip_block; +enum amdgpu_transfer_function { + AMDGPU_TRANSFER_FUNCTION_DEFAULT, + AMDGPU_TRANSFER_FUNCTION_SRGB, + AMDGPU_TRANSFER_FUNCTION_BT709, + AMDGPU_TRANSFER_FUNCTION_PQ, + AMDGPU_TRANSFER_FUNCTION_LINEAR, + AMDGPU_TRANSFER_FUNCTION_UNITY, + AMDGPU_TRANSFER_FUNCTION_GAMMA22, + AMDGPU_TRANSFER_FUNCTION_GAMMA24, + AMDGPU_TRANSFER_FUNCTION_GAMMA26, +}; + struct dm_plane_state { struct drm_plane_state base; struct dc_plane_state *dc_state; @@ -737,6 +749,13 @@ struct dm_plane_state { * The blob (if not NULL) is an array of &struct drm_color_lut. */ struct drm_property_blob *degamma_lut; + /** + * @degamma_tf: + * + * Predefined transfer function to tell DC driver the input space to + * linearize. + */ + enum amdgpu_transfer_function degamma_tf; }; struct dm_crtc_state { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 421af919387f..8e02d2b1249d 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -85,6 +85,18 @@ void amdgpu_dm_init_color_mod(void) } #ifdef AMD_PRIVATE_COLOR +static const struct drm_prop_enum_list amdgpu_transfer_function_enum_list[] = { + { AMDGPU_TRANSFER_FUNCTION_DEFAULT, "Default" }, + { AMDGPU_TRANSFER_FUNCTION_SRGB, "sRGB" }, + { AMDGPU_TRANSFER_FUNCTION_BT709, "BT.709" }, + { AMDGPU_TRANSFER_FUNCTION_PQ, "PQ (Perceptual Quantizer)" }, + { AMDGPU_TRANSFER_FUNCTION_LINEAR, "Linear" }, + { AMDGPU_TRANSFER_FUNCTION_UNITY, "Unity" }, + { AMDGPU_TRANSFER_FUNCTION_GAMMA22, "Gamma 2.2" }, + { AMDGPU_TRANSFER_FUNCTION_GAMMA24, "Gamma 2.4" }, + { AMDGPU_TRANSFER_FUNCTION_GAMMA26, "Gamma 2.6" }, +}; + int amdgpu_dm_create_color_properties(struct amdgpu_device *adev) { @@ -105,6 +117,15 @@ amdgpu_dm_create_color_properties(struct amdgpu_device *adev) return -ENOMEM; adev->mode_info.plane_degamma_lut_size_property = prop; + prop = drm_property_create_enum(adev_to_drm(adev), + DRM_MODE_PROP_ENUM, + "AMD_PLANE_DEGAMMA_TF", + amdgpu_transfer_function_enum_list, + ARRAY_SIZE(amdgpu_transfer_function_enum_list)); + if (!prop) + return -ENOMEM; + adev->mode_info.plane_degamma_tf_property = prop; + return 0; } #endif diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index b1e50cfa9f83..d5e12e05d161 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -1337,8 +1337,11 @@ static void amdgpu_dm_plane_drm_plane_reset(struct drm_plane *plane) amdgpu_state = kzalloc(sizeof(*amdgpu_state), GFP_KERNEL); WARN_ON(amdgpu_state == NULL); - if (amdgpu_state) - __drm_atomic_helper_plane_reset(plane, &amdgpu_state->base); + if (!amdgpu_state) + return; + + __drm_atomic_helper_plane_reset(plane, &amdgpu_state->base); + amdgpu_state->degamma_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT; } static struct drm_plane_state *amdgpu_dm_plane_drm_plane_duplicate_state(struct drm_plane *plane) @@ -1361,6 +1364,8 @@ static struct drm_plane_state *amdgpu_dm_plane_drm_plane_duplicate_state(struct dm_plane_state->degamma_lut = drm_property_blob_get(old_dm_plane_state->degamma_lut); + dm_plane_state->degamma_tf = old_dm_plane_state->degamma_tf; + return &dm_plane_state->base; } @@ -1455,6 +1460,9 @@ dm_atomic_plane_attach_color_mgmt_properties(struct amdgpu_display_manager *dm, drm_object_attach_property(&plane->base, mode_info.plane_degamma_lut_size_property, MAX_COLOR_LUT_ENTRIES); + drm_object_attach_property(&plane->base, + dm->adev->mode_info.plane_degamma_tf_property, + AMDGPU_TRANSFER_FUNCTION_DEFAULT); } } @@ -1477,6 +1485,11 @@ dm_atomic_plane_set_property(struct drm_plane *plane, &replaced); dm_plane_state->base.color_mgmt_changed |= replaced; return ret; + } else if (property == adev->mode_info.plane_degamma_tf_property) { + if (dm_plane_state->degamma_tf != val) { + dm_plane_state->degamma_tf = val; + dm_plane_state->base.color_mgmt_changed = 1; + } } else { drm_dbg_atomic(plane->dev, "[PLANE:%d:%s] unknown property [PROP:%d:%s]]\n", @@ -1500,6 +1513,8 @@ dm_atomic_plane_get_property(struct drm_plane *plane, if (property == adev->mode_info.plane_degamma_lut_property) { *val = (dm_plane_state->degamma_lut) ? dm_plane_state->degamma_lut->base.id : 0; + } else if (property == adev->mode_info.plane_degamma_tf_property) { + *val = dm_plane_state->degamma_tf; } else { return -EINVAL; } -- cgit v1.2.3 From 5a3b965b5810bd602d2c7d8ea79ffe8c6e81268d Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Thu, 16 Nov 2023 18:57:46 -0100 Subject: drm/amd/display: explicitly define EOTF and inverse EOTF Instead of relying on color block names to get the transfer function intention regarding encoding pixel's luminance, define supported Electro-Optical Transfer Functions (EOTFs) and inverse EOTFs, that includes pure gamma or standardized transfer functions. v3: - squash linear and unity TFs to identity (Pekka) - define the right TFs for BT.709 (Pekka and Harry) - add comment about AMD TF coefficients Suggested-by: Harry Wentland Reviewed-by: Harry Wentland Signed-off-by: Melissa Wen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 27 ++++++--- .../drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 67 +++++++++++++++++----- 2 files changed, 71 insertions(+), 23 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 41bf0cf56433..baa73a6f47a9 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -724,16 +724,27 @@ struct amdgpu_dm_wb_connector { extern const struct amdgpu_ip_block_version dm_ip_block; +/* enum amdgpu_transfer_function: pre-defined transfer function supported by AMD. + * + * It includes standardized transfer functions and pure power functions. The + * transfer function coefficients are available at modules/color/color_gamma.c + */ enum amdgpu_transfer_function { AMDGPU_TRANSFER_FUNCTION_DEFAULT, - AMDGPU_TRANSFER_FUNCTION_SRGB, - AMDGPU_TRANSFER_FUNCTION_BT709, - AMDGPU_TRANSFER_FUNCTION_PQ, - AMDGPU_TRANSFER_FUNCTION_LINEAR, - AMDGPU_TRANSFER_FUNCTION_UNITY, - AMDGPU_TRANSFER_FUNCTION_GAMMA22, - AMDGPU_TRANSFER_FUNCTION_GAMMA24, - AMDGPU_TRANSFER_FUNCTION_GAMMA26, + AMDGPU_TRANSFER_FUNCTION_SRGB_EOTF, + AMDGPU_TRANSFER_FUNCTION_BT709_INV_OETF, + AMDGPU_TRANSFER_FUNCTION_PQ_EOTF, + AMDGPU_TRANSFER_FUNCTION_IDENTITY, + AMDGPU_TRANSFER_FUNCTION_GAMMA22_EOTF, + AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF, + AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF, + AMDGPU_TRANSFER_FUNCTION_SRGB_INV_EOTF, + AMDGPU_TRANSFER_FUNCTION_BT709_OETF, + AMDGPU_TRANSFER_FUNCTION_PQ_INV_EOTF, + AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF, + AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF, + AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF, + AMDGPU_TRANSFER_FUNCTION_COUNT }; struct dm_plane_state { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 8e02d2b1249d..a32abbd93a61 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -85,18 +85,57 @@ void amdgpu_dm_init_color_mod(void) } #ifdef AMD_PRIVATE_COLOR -static const struct drm_prop_enum_list amdgpu_transfer_function_enum_list[] = { - { AMDGPU_TRANSFER_FUNCTION_DEFAULT, "Default" }, - { AMDGPU_TRANSFER_FUNCTION_SRGB, "sRGB" }, - { AMDGPU_TRANSFER_FUNCTION_BT709, "BT.709" }, - { AMDGPU_TRANSFER_FUNCTION_PQ, "PQ (Perceptual Quantizer)" }, - { AMDGPU_TRANSFER_FUNCTION_LINEAR, "Linear" }, - { AMDGPU_TRANSFER_FUNCTION_UNITY, "Unity" }, - { AMDGPU_TRANSFER_FUNCTION_GAMMA22, "Gamma 2.2" }, - { AMDGPU_TRANSFER_FUNCTION_GAMMA24, "Gamma 2.4" }, - { AMDGPU_TRANSFER_FUNCTION_GAMMA26, "Gamma 2.6" }, +static const char * const +amdgpu_transfer_function_names[] = { + [AMDGPU_TRANSFER_FUNCTION_DEFAULT] = "Default", + [AMDGPU_TRANSFER_FUNCTION_IDENTITY] = "Identity", + [AMDGPU_TRANSFER_FUNCTION_SRGB_EOTF] = "sRGB EOTF", + [AMDGPU_TRANSFER_FUNCTION_BT709_INV_OETF] = "BT.709 inv_OETF", + [AMDGPU_TRANSFER_FUNCTION_PQ_EOTF] = "PQ EOTF", + [AMDGPU_TRANSFER_FUNCTION_GAMMA22_EOTF] = "Gamma 2.2 EOTF", + [AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF] = "Gamma 2.4 EOTF", + [AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF] = "Gamma 2.6 EOTF", + [AMDGPU_TRANSFER_FUNCTION_SRGB_INV_EOTF] = "sRGB inv_EOTF", + [AMDGPU_TRANSFER_FUNCTION_BT709_OETF] = "BT.709 OETF", + [AMDGPU_TRANSFER_FUNCTION_PQ_INV_EOTF] = "PQ inv_EOTF", + [AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF] = "Gamma 2.2 inv_EOTF", + [AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF] = "Gamma 2.4 inv_EOTF", + [AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF] = "Gamma 2.6 inv_EOTF", }; +static const u32 amdgpu_eotf = + BIT(AMDGPU_TRANSFER_FUNCTION_SRGB_EOTF) | + BIT(AMDGPU_TRANSFER_FUNCTION_BT709_INV_OETF) | + BIT(AMDGPU_TRANSFER_FUNCTION_PQ_EOTF) | + BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA22_EOTF) | + BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF) | + BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF); + +static struct drm_property * +amdgpu_create_tf_property(struct drm_device *dev, + const char *name, + u32 supported_tf) +{ + u32 transfer_functions = supported_tf | + BIT(AMDGPU_TRANSFER_FUNCTION_DEFAULT) | + BIT(AMDGPU_TRANSFER_FUNCTION_IDENTITY); + struct drm_prop_enum_list enum_list[AMDGPU_TRANSFER_FUNCTION_COUNT]; + int i, len; + + len = 0; + for (i = 0; i < AMDGPU_TRANSFER_FUNCTION_COUNT; i++) { + if ((transfer_functions & BIT(i)) == 0) + continue; + + enum_list[len].type = i; + enum_list[len].name = amdgpu_transfer_function_names[i]; + len++; + } + + return drm_property_create_enum(dev, DRM_MODE_PROP_ENUM, + name, enum_list, len); +} + int amdgpu_dm_create_color_properties(struct amdgpu_device *adev) { @@ -117,11 +156,9 @@ amdgpu_dm_create_color_properties(struct amdgpu_device *adev) return -ENOMEM; adev->mode_info.plane_degamma_lut_size_property = prop; - prop = drm_property_create_enum(adev_to_drm(adev), - DRM_MODE_PROP_ENUM, - "AMD_PLANE_DEGAMMA_TF", - amdgpu_transfer_function_enum_list, - ARRAY_SIZE(amdgpu_transfer_function_enum_list)); + prop = amdgpu_create_tf_property(adev_to_drm(adev), + "AMD_PLANE_DEGAMMA_TF", + amdgpu_eotf); if (!prop) return -ENOMEM; adev->mode_info.plane_degamma_tf_property = prop; -- cgit v1.2.3 From e4cddd51bfab2a40529a4af35bd2c912b5a0c239 Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Thu, 16 Nov 2023 18:57:47 -0100 Subject: drm/amd/display: document AMDGPU pre-defined transfer functions Brief documentation about pre-defined transfer function usage on AMD display driver and standardized EOTFs and inverse EOTFs. v3: - Document BT709 OETF (Pekka) - Fix description of sRGB and pure power funcs (Pekka) v4: - Add description of linear and non-linear forms (Harry) Reviewed-by: Harry Wentland Co-developed-by: Harry Wentland Signed-off-by: Harry Wentland Signed-off-by: Melissa Wen Signed-off-by: Alex Deucher --- .../drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 62 ++++++++++++++++++++++ 1 file changed, 62 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index a32abbd93a61..49faaf606cfe 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -85,6 +85,68 @@ void amdgpu_dm_init_color_mod(void) } #ifdef AMD_PRIVATE_COLOR +/* Pre-defined Transfer Functions (TF) + * + * AMD driver supports pre-defined mathematical functions for transferring + * between encoded values and optical/linear space. Depending on HW color caps, + * ROMs and curves built by the AMD color module support these transforms. + * + * The driver-specific color implementation exposes properties for pre-blending + * degamma TF, shaper TF (before 3D LUT), and blend(dpp.ogam) TF and + * post-blending regamma (mpc.ogam) TF. However, only pre-blending degamma + * supports ROM curves. AMD color module uses pre-defined coefficients to build + * curves for the other blocks. What can be done by each color block is + * described by struct dpp_color_capsand struct mpc_color_caps. + * + * AMD driver-specific color API exposes the following pre-defined transfer + * functions: + * + * - Identity: linear/identity relationship between pixel value and + * luminance value; + * - Gamma 2.2, Gamma 2.4, Gamma 2.6: pure power functions; + * - sRGB: 2.4: The piece-wise transfer function from IEC 61966-2-1:1999; + * - BT.709: has a linear segment in the bottom part and then a power function + * with a 0.45 (~1/2.22) gamma for the rest of the range; standardized by + * ITU-R BT.709-6; + * - PQ (Perceptual Quantizer): used for HDR display, allows luminance range + * capability of 0 to 10,000 nits; standardized by SMPTE ST 2084. + * + * The AMD color model is designed with an assumption that SDR (sRGB, BT.709, + * Gamma 2.2, etc.) peak white maps (normalized to 1.0 FP) to 80 nits in the PQ + * system. This has the implication that PQ EOTF (non-linear to linear) maps to + * [0.0..125.0] where 125.0 = 10,000 nits / 80 nits. + * + * Non-linear and linear forms are described in the table below: + * + * ┌───────────┬─────────────────────┬──────────────────────┐ + * │ │ Non-linear │ Linear │ + * ├───────────┼─────────────────────┼──────────────────────┤ + * │ sRGB │ UNORM or [0.0, 1.0] │ [0.0, 1.0] │ + * ├───────────┼─────────────────────┼──────────────────────┤ + * │ BT709 │ UNORM or [0.0, 1.0] │ [0.0, 1.0] │ + * ├───────────┼─────────────────────┼──────────────────────┤ + * │ Gamma 2.x │ UNORM or [0.0, 1.0] │ [0.0, 1.0] │ + * ├───────────┼─────────────────────┼──────────────────────┤ + * │ PQ │ UNORM or FP16 CCCS* │ [0.0, 125.0] │ + * ├───────────┼─────────────────────┼──────────────────────┤ + * │ Identity │ UNORM or FP16 CCCS* │ [0.0, 1.0] or CCCS** │ + * └───────────┴─────────────────────┴──────────────────────┘ + * * CCCS: Windows canonical composition color space + * ** Respectively + * + * In the driver-specific API, color block names attached to TF properties + * suggest the intention regarding non-linear encoding pixel's luminance + * values. As some newer encodings don't use gamma curve, we make encoding and + * decoding explicit by defining an enum list of transfer functions supported + * in terms of EOTF and inverse EOTF, where: + * + * - EOTF (electro-optical transfer function): is the transfer function to go + * from the encoded value to an optical (linear) value. De-gamma functions + * traditionally do this. + * - Inverse EOTF (simply the inverse of the EOTF): is usually intended to go + * from an optical/linear space (which might have been used for blending) + * back to the encoded values. Gamma functions traditionally do this. + */ static const char * const amdgpu_transfer_function_names[] = { [AMDGPU_TRANSFER_FUNCTION_DEFAULT] = "Default", -- cgit v1.2.3 From ec7b2a55463ea50401a8146793b61ee590255a45 Mon Sep 17 00:00:00 2001 From: Joshua Ashton Date: Thu, 16 Nov 2023 18:57:48 -0100 Subject: drm/amd/display: add plane HDR multiplier driver-specific property Multiplier to 'gain' the plane. When PQ is decoded using the fixed func transfer function to the internal FP16 fb, 1.0 -> 80 nits (on AMD at least) When sRGB is decoded, 1.0 -> 1.0. Therefore, 1.0 multiplier = 80 nits for SDR content. So if you want, 203 nits for SDR content, pass in (203.0 / 80.0). v4: - comment about the PQ TF need for L-to-NL (from Harry's review) Reviewed-by: Harry Wentland Signed-off-by: Joshua Ashton Co-developed-by: Melissa Wen Signed-off-by: Melissa Wen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | 4 ++++ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 17 +++++++++++++++++ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 6 ++++++ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c | 13 +++++++++++++ 4 files changed, 40 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index a1b7ae5c2149..218623e1b9aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -360,6 +360,10 @@ struct amdgpu_mode_info { * to go from scanout/encoded values to linear values. */ struct drm_property *plane_degamma_tf_property; + /** + * @plane_hdr_mult_property: + */ + struct drm_property *plane_hdr_mult_property; }; #define AMDGPU_MAX_BL_LEVEL 0xFF diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index baa73a6f47a9..d36d68d517a8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -55,6 +55,9 @@ #define HDMI_AMD_VENDOR_SPECIFIC_DATA_BLOCK_IEEE_REGISTRATION_ID 0x00001A #define AMD_VSDB_VERSION_3_FEATURECAP_REPLAYMODE 0x40 #define HDMI_AMD_VENDOR_SPECIFIC_DATA_BLOCK_VERSION_3 0x3 + +#define AMDGPU_HDR_MULT_DEFAULT (0x100000000LL) + /* #include "include/amdgpu_dal_power_if.h" #include "amdgpu_dm_irq.h" @@ -767,6 +770,20 @@ struct dm_plane_state { * linearize. */ enum amdgpu_transfer_function degamma_tf; + /** + * @hdr_mult: + * + * Multiplier to 'gain' the plane. When PQ is decoded using the fixed + * func transfer function to the internal FP16 fb, 1.0 -> 80 nits (on + * AMD at least). When sRGB is decoded, 1.0 -> 1.0, obviously. + * Therefore, 1.0 multiplier = 80 nits for SDR content. So if you + * want, 203 nits for SDR content, pass in (203.0 / 80.0). Format is + * S31.32 sign-magnitude. + * + * HDR multiplier can wide range beyond [0.0, 1.0]. This means that PQ + * TF is needed for any subsequent linear-to-non-linear transforms. + */ + __u64 hdr_mult; }; struct dm_crtc_state { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 49faaf606cfe..b5b34a9209e4 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -225,6 +225,12 @@ amdgpu_dm_create_color_properties(struct amdgpu_device *adev) return -ENOMEM; adev->mode_info.plane_degamma_tf_property = prop; + prop = drm_property_create_range(adev_to_drm(adev), + 0, "AMD_PLANE_HDR_MULT", 0, U64_MAX); + if (!prop) + return -ENOMEM; + adev->mode_info.plane_hdr_mult_property = prop; + return 0; } #endif diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index d5e12e05d161..45a2c9b36630 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -1342,6 +1342,7 @@ static void amdgpu_dm_plane_drm_plane_reset(struct drm_plane *plane) __drm_atomic_helper_plane_reset(plane, &amdgpu_state->base); amdgpu_state->degamma_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT; + amdgpu_state->hdr_mult = AMDGPU_HDR_MULT_DEFAULT; } static struct drm_plane_state *amdgpu_dm_plane_drm_plane_duplicate_state(struct drm_plane *plane) @@ -1365,6 +1366,7 @@ static struct drm_plane_state *amdgpu_dm_plane_drm_plane_duplicate_state(struct drm_property_blob_get(old_dm_plane_state->degamma_lut); dm_plane_state->degamma_tf = old_dm_plane_state->degamma_tf; + dm_plane_state->hdr_mult = old_dm_plane_state->hdr_mult; return &dm_plane_state->base; } @@ -1464,6 +1466,10 @@ dm_atomic_plane_attach_color_mgmt_properties(struct amdgpu_display_manager *dm, dm->adev->mode_info.plane_degamma_tf_property, AMDGPU_TRANSFER_FUNCTION_DEFAULT); } + /* HDR MULT is always available */ + drm_object_attach_property(&plane->base, + dm->adev->mode_info.plane_hdr_mult_property, + AMDGPU_HDR_MULT_DEFAULT); } static int @@ -1490,6 +1496,11 @@ dm_atomic_plane_set_property(struct drm_plane *plane, dm_plane_state->degamma_tf = val; dm_plane_state->base.color_mgmt_changed = 1; } + } else if (property == adev->mode_info.plane_hdr_mult_property) { + if (dm_plane_state->hdr_mult != val) { + dm_plane_state->hdr_mult = val; + dm_plane_state->base.color_mgmt_changed = 1; + } } else { drm_dbg_atomic(plane->dev, "[PLANE:%d:%s] unknown property [PROP:%d:%s]]\n", @@ -1515,6 +1526,8 @@ dm_atomic_plane_get_property(struct drm_plane *plane, dm_plane_state->degamma_lut->base.id : 0; } else if (property == adev->mode_info.plane_degamma_tf_property) { *val = dm_plane_state->degamma_tf; + } else if (property == adev->mode_info.plane_hdr_mult_property) { + *val = dm_plane_state->hdr_mult; } else { return -EINVAL; } -- cgit v1.2.3 From a9210714d23190b44eed32f8bcadbe3b18d51a1d Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Fri, 8 Dec 2023 01:58:24 -0800 Subject: drm/amd/display: Fix memory leak in dm_set_writeback() 'wb_info' needs to be freed on error paths or it would leak the memory. Smatch pointed this out. Fixes: c81e13b929df ("drm/amd/display: Hande writeback request from userspace") Signed-off-by: Harshit Mogalapalli Reviewed-by: Alex Hung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index e819d05755b3..6efa0c1bba9d 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -8879,12 +8879,14 @@ static void dm_set_writeback(struct amdgpu_display_manager *dm, acrtc = to_amdgpu_crtc(wb_conn->encoder.crtc); if (!acrtc) { DRM_ERROR("no amdgpu_crtc found\n"); + kfree(wb_info); return; } afb = to_amdgpu_framebuffer(new_con_state->writeback_job->fb); if (!afb) { DRM_ERROR("No amdgpu_framebuffer found\n"); + kfree(wb_info); return; } -- cgit v1.2.3 From 4e95669ecb03d797355bc23871c5c43b9475d3dc Mon Sep 17 00:00:00 2001 From: Vignesh Chander Date: Fri, 8 Dec 2023 12:00:26 -0600 Subject: drm/amdgpu: xgmi_fill_topology_info 1. Use the mirrored topology info to fill links for VF. The new solution is required to simplify and optimize host driver logic. Only use the new solution for VFs that support full duplex and extended_peer_link_info otherwise the info would be incomplete. 2. avoid calling extended_link_info on VF as its not supported Signed-off-by: Vignesh Chander Reviewed-by: Zhigang Luo Reviewed-by: Jonathan Kim Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 4 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 58 +++++++++++++++++++++++++++----- 2 files changed, 52 insertions(+), 10 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index a21045d018f2..1bf975b8d083 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -1433,8 +1433,8 @@ int psp_xgmi_get_topology_info(struct psp_context *psp, get_extended_data) || amdgpu_ip_version(psp->adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6); - bool ta_port_num_support = psp->xgmi_context.xgmi_ta_caps & - EXTEND_PEER_LINK_INFO_CMD_FLAG; + bool ta_port_num_support = amdgpu_sriov_vf(psp->adev) ? 0 : + psp->xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG; /* popluate the shared output buffer rather than the cmd input buffer * with node_ids as the input for GET_PEER_LINKS command execution. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 44d8c1a11e1b..9a95b9f226b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -823,6 +823,28 @@ static int amdgpu_xgmi_initialize_hive_get_data_partition(struct amdgpu_hive_inf return 0; } +static void amdgpu_xgmi_fill_topology_info(struct amdgpu_device *adev, + struct amdgpu_device *peer_adev) +{ + struct psp_xgmi_topology_info *top_info = &adev->psp.xgmi_context.top_info; + struct psp_xgmi_topology_info *peer_info = &peer_adev->psp.xgmi_context.top_info; + + for (int i = 0; i < peer_info->num_nodes; i++) { + if (peer_info->nodes[i].node_id == adev->gmc.xgmi.node_id) { + for (int j = 0; j < top_info->num_nodes; j++) { + if (top_info->nodes[j].node_id == peer_adev->gmc.xgmi.node_id) { + peer_info->nodes[i].num_hops = top_info->nodes[j].num_hops; + peer_info->nodes[i].is_sharing_enabled = + top_info->nodes[j].is_sharing_enabled; + peer_info->nodes[i].num_links = + top_info->nodes[j].num_links; + return; + } + } + } + } +} + int amdgpu_xgmi_add_device(struct amdgpu_device *adev) { struct psp_xgmi_topology_info *top_info; @@ -897,18 +919,38 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) goto exit_unlock; } - /* get latest topology info for each device from psp */ - list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { - ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, - &tmp_adev->psp.xgmi_context.top_info, false); + if (amdgpu_sriov_vf(adev) && + adev->psp.xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG) { + /* only get topology for VF being init if it can support full duplex */ + ret = psp_xgmi_get_topology_info(&adev->psp, count, + &adev->psp.xgmi_context.top_info, false); if (ret) { - dev_err(tmp_adev->dev, + dev_err(adev->dev, "XGMI: Get topology failure on device %llx, hive %llx, ret %d", - tmp_adev->gmc.xgmi.node_id, - tmp_adev->gmc.xgmi.hive_id, ret); - /* To do : continue with some node failed or disable the whole hive */ + adev->gmc.xgmi.node_id, + adev->gmc.xgmi.hive_id, ret); + /* To do: continue with some node failed or disable the whole hive*/ goto exit_unlock; } + + /* fill the topology info for peers instead of getting from PSP */ + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { + amdgpu_xgmi_fill_topology_info(adev, tmp_adev); + } + } else { + /* get latest topology info for each device from psp */ + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { + ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, + &tmp_adev->psp.xgmi_context.top_info, false); + if (ret) { + dev_err(tmp_adev->dev, + "XGMI: Get topology failure on device %llx, hive %llx, ret %d", + tmp_adev->gmc.xgmi.node_id, + tmp_adev->gmc.xgmi.hive_id, ret); + /* To do : continue with some node failed or disable the whole hive */ + goto exit_unlock; + } + } } /* get topology again for hives that support extended data */ -- cgit v1.2.3 From 1819200166ce511ac298dc96b9b17eb655a9edc4 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Thu, 24 Aug 2023 14:41:30 -0400 Subject: drm/amdkfd: Export DMABufs from KFD using GEM handles Create GEM handles for exporting DMABufs using GEM-Prime APIs. The GEM handles are created in a drm_client_dev context to avoid exposing them in user mode contexts through a DMABuf import. Signed-off-by: Felix Kuehling Reviewed-by: Ramesh Errabolu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 11 ++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 5 ++++ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 33 +++++++++++++++++++----- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 4 +-- 4 files changed, 44 insertions(+), 9 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 2d22f7d45512..067690ba7bff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -142,6 +142,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) { int i; int last_valid_bit; + int ret; amdgpu_amdkfd_gpuvm_init_mem_limits(); @@ -160,6 +161,12 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) .enable_mes = adev->enable_mes, }; + ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", NULL); + if (ret) { + dev_err(adev->dev, "Failed to init DRM client: %d\n", ret); + return; + } + /* this is going to have a few of the MSBs set that we need to * clear */ @@ -198,6 +205,10 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev, &gpu_resources); + if (adev->kfd.init_complete) + drm_client_register(&adev->kfd.client); + else + drm_client_release(&adev->kfd.client); amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 16794c2eea35..02973f5c8caf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -33,6 +33,7 @@ #include #include #include +#include #include "amdgpu_sync.h" #include "amdgpu_vm.h" #include "amdgpu_xcp.h" @@ -83,6 +84,7 @@ struct kgd_mem { struct amdgpu_sync sync; + uint32_t gem_handle; bool aql_queue; bool is_imported; }; @@ -105,6 +107,9 @@ struct amdgpu_kfd_dev { /* HMM page migration MEMORY_DEVICE_PRIVATE mapping */ struct dev_pagemap pgmap; + + /* Client for KFD BO GEM handle allocations */ + struct drm_client_dev client; }; enum kgd_engine_type { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 73288f9ccaf8..ae7dfaf59159 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -806,13 +807,22 @@ kfd_mem_dmaunmap_attachment(struct kgd_mem *mem, static int kfd_mem_export_dmabuf(struct kgd_mem *mem) { if (!mem->dmabuf) { - struct dma_buf *ret = amdgpu_gem_prime_export( - &mem->bo->tbo.base, + struct amdgpu_device *bo_adev; + struct dma_buf *dmabuf; + int r, fd; + + bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev); + r = drm_gem_prime_handle_to_fd(&bo_adev->ddev, bo_adev->kfd.client.file, + mem->gem_handle, mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? - DRM_RDWR : 0); - if (IS_ERR(ret)) - return PTR_ERR(ret); - mem->dmabuf = ret; + DRM_RDWR : 0, &fd); + if (r) + return r; + dmabuf = dma_buf_get(fd); + close_fd(fd); + if (WARN_ON_ONCE(IS_ERR(dmabuf))) + return PTR_ERR(dmabuf); + mem->dmabuf = dmabuf; } return 0; @@ -1778,6 +1788,9 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( pr_debug("Failed to allow vma node access. ret %d\n", ret); goto err_node_allow; } + ret = drm_gem_handle_create(adev->kfd.client.file, gobj, &(*mem)->gem_handle); + if (ret) + goto err_gem_handle_create; bo = gem_to_amdgpu_bo(gobj); if (bo_type == ttm_bo_type_sg) { bo->tbo.sg = sg; @@ -1829,6 +1842,8 @@ allocate_init_user_pages_failed: err_pin_bo: err_validate_bo: remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info); + drm_gem_handle_delete(adev->kfd.client.file, (*mem)->gem_handle); +err_gem_handle_create: drm_vma_node_revoke(&gobj->vma_node, drm_priv); err_node_allow: /* Don't unreserve system mem limit twice */ @@ -1941,8 +1956,12 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( /* Free the BO*/ drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv); - if (mem->dmabuf) + if (!mem->is_imported) + drm_gem_handle_delete(adev->kfd.client.file, mem->gem_handle); + if (mem->dmabuf) { dma_buf_put(mem->dmabuf); + mem->dmabuf = NULL; + } mutex_destroy(&mem->lock); /* If this releases the last reference, it will end up calling diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index f6d4748c1980..b4cb907f80c6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1855,8 +1855,8 @@ static uint32_t get_process_num_bos(struct kfd_process *p) return num_of_bos; } -static int criu_get_prime_handle(struct kgd_mem *mem, int flags, - u32 *shared_fd) +static int criu_get_prime_handle(struct kgd_mem *mem, + int flags, u32 *shared_fd) { struct dma_buf *dmabuf; int ret; -- cgit v1.2.3 From 0188006d7c797a37c04471a2b4a34a7dfb21f363 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 19 Sep 2023 14:57:02 -0400 Subject: drm/amdkfd: Import DMABufs for interop through DRM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use drm_gem_prime_fd_to_handle to import DMABufs for interop. This ensures that a GEM handle is created on import and that obj->dma_buf will be set and remain set as long as the object is imported into KFD. Signed-off-by: Felix Kuehling Reviewed-by: Ramesh Errabolu Reviewed-by: Xiaogang.Chen Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 9 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 64 ++++++++++++++++-------- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 15 ++---- 3 files changed, 52 insertions(+), 36 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 02973f5c8caf..cf6ed5fce291 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -314,11 +314,10 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info, struct dma_fence **ef); int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev, struct kfd_vm_fault_info *info); -int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev, - struct dma_buf *dmabuf, - uint64_t va, void *drm_priv, - struct kgd_mem **mem, uint64_t *size, - uint64_t *mmap_offset); +int amdgpu_amdkfd_gpuvm_import_dmabuf_fd(struct amdgpu_device *adev, int fd, + uint64_t va, void *drm_priv, + struct kgd_mem **mem, uint64_t *size, + uint64_t *mmap_offset); int amdgpu_amdkfd_gpuvm_export_dmabuf(struct kgd_mem *mem, struct dma_buf **dmabuf); void amdgpu_amdkfd_debug_mem_fence(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index ae7dfaf59159..48697b789342 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1956,8 +1956,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( /* Free the BO*/ drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv); - if (!mem->is_imported) - drm_gem_handle_delete(adev->kfd.client.file, mem->gem_handle); + drm_gem_handle_delete(adev->kfd.client.file, mem->gem_handle); if (mem->dmabuf) { dma_buf_put(mem->dmabuf); mem->dmabuf = NULL; @@ -2313,34 +2312,26 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev, return 0; } -int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev, - struct dma_buf *dma_buf, - uint64_t va, void *drm_priv, - struct kgd_mem **mem, uint64_t *size, - uint64_t *mmap_offset) +static int import_obj_create(struct amdgpu_device *adev, + struct dma_buf *dma_buf, + struct drm_gem_object *obj, + uint64_t va, void *drm_priv, + struct kgd_mem **mem, uint64_t *size, + uint64_t *mmap_offset) { struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); - struct drm_gem_object *obj; struct amdgpu_bo *bo; int ret; - obj = amdgpu_gem_prime_import(adev_to_drm(adev), dma_buf); - if (IS_ERR(obj)) - return PTR_ERR(obj); - bo = gem_to_amdgpu_bo(obj); if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM | - AMDGPU_GEM_DOMAIN_GTT))) { + AMDGPU_GEM_DOMAIN_GTT))) /* Only VRAM and GTT BOs are supported */ - ret = -EINVAL; - goto err_put_obj; - } + return -EINVAL; *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); - if (!*mem) { - ret = -ENOMEM; - goto err_put_obj; - } + if (!*mem) + return -ENOMEM; ret = drm_vma_node_allow(&obj->vma_node, drm_priv); if (ret) @@ -2390,8 +2381,41 @@ err_remove_mem: drm_vma_node_revoke(&obj->vma_node, drm_priv); err_free_mem: kfree(*mem); + return ret; +} + +int amdgpu_amdkfd_gpuvm_import_dmabuf_fd(struct amdgpu_device *adev, int fd, + uint64_t va, void *drm_priv, + struct kgd_mem **mem, uint64_t *size, + uint64_t *mmap_offset) +{ + struct drm_gem_object *obj; + uint32_t handle; + int ret; + + ret = drm_gem_prime_fd_to_handle(&adev->ddev, adev->kfd.client.file, fd, + &handle); + if (ret) + return ret; + obj = drm_gem_object_lookup(adev->kfd.client.file, handle); + if (!obj) { + ret = -EINVAL; + goto err_release_handle; + } + + ret = import_obj_create(adev, obj->dma_buf, obj, va, drm_priv, mem, size, + mmap_offset); + if (ret) + goto err_put_obj; + + (*mem)->gem_handle = handle; + + return 0; + err_put_obj: drm_gem_object_put(obj); +err_release_handle: + drm_gem_handle_delete(adev->kfd.client.file, handle); return ret; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index b4cb907f80c6..ce4c52ec34d8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1564,16 +1564,11 @@ static int kfd_ioctl_import_dmabuf(struct file *filep, { struct kfd_ioctl_import_dmabuf_args *args = data; struct kfd_process_device *pdd; - struct dma_buf *dmabuf; int idr_handle; uint64_t size; void *mem; int r; - dmabuf = dma_buf_get(args->dmabuf_fd); - if (IS_ERR(dmabuf)) - return PTR_ERR(dmabuf); - mutex_lock(&p->mutex); pdd = kfd_process_device_data_by_id(p, args->gpu_id); if (!pdd) { @@ -1587,10 +1582,10 @@ static int kfd_ioctl_import_dmabuf(struct file *filep, goto err_unlock; } - r = amdgpu_amdkfd_gpuvm_import_dmabuf(pdd->dev->adev, dmabuf, - args->va_addr, pdd->drm_priv, - (struct kgd_mem **)&mem, &size, - NULL); + r = amdgpu_amdkfd_gpuvm_import_dmabuf_fd(pdd->dev->adev, args->dmabuf_fd, + args->va_addr, pdd->drm_priv, + (struct kgd_mem **)&mem, &size, + NULL); if (r) goto err_unlock; @@ -1601,7 +1596,6 @@ static int kfd_ioctl_import_dmabuf(struct file *filep, } mutex_unlock(&p->mutex); - dma_buf_put(dmabuf); args->handle = MAKE_HANDLE(args->gpu_id, idr_handle); @@ -1612,7 +1606,6 @@ err_free: pdd->drm_priv, NULL); err_unlock: mutex_unlock(&p->mutex); - dma_buf_put(dmabuf); return r; } -- cgit v1.2.3 From 296b29ce8acb5dbb3ca1937f1b537b3f6be0460a Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Mon, 11 Dec 2023 18:06:26 +0800 Subject: drm/amd/pm: update driver_if and ppsmc headers for coming wbrf feature Add those data structures to support Wifi RFI mitigation feature. Signed-off-by: Evan Quan Reviewed-by: Mario Limonciello Signed-off-by: Ma Jun Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 11 +++++++++++ .../drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h | 3 ++- .../drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h | 3 ++- drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h | 5 ++--- drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_7_ppsmc.h | 3 ++- 5 files changed, 19 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index c2265e027ca8..ab36402f85b1 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -1501,6 +1501,17 @@ enum smu_baco_seq { __dst_size); \ }) +typedef struct { + uint16_t LowFreq; + uint16_t HighFreq; +} WifiOneBand_t; + +typedef struct { + uint32_t WifiBandEntryNum; + WifiOneBand_t WifiBandEntry[11]; + uint32_t MmHubPadding[8]; +} WifiBandEntryTable_t; + #if !defined(SWSMU_CODE_LAYER_L2) && !defined(SWSMU_CODE_LAYER_L3) && !defined(SWSMU_CODE_LAYER_L4) int smu_get_power_limit(void *handle, uint32_t *limit, diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h index 9dd1ed5b8940..b114d14fc053 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h @@ -1615,7 +1615,8 @@ typedef struct { #define TABLE_I2C_COMMANDS 9 #define TABLE_DRIVER_INFO 10 #define TABLE_ECCINFO 11 -#define TABLE_COUNT 12 +#define TABLE_WIFIBAND 12 +#define TABLE_COUNT 13 //IH Interupt ID #define IH_INTERRUPT_ID_TO_DRIVER 0xFE diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h index 62b7c0daff68..8b1496f8ce58 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h @@ -1605,7 +1605,8 @@ typedef struct { #define TABLE_I2C_COMMANDS 9 #define TABLE_DRIVER_INFO 10 #define TABLE_ECCINFO 11 -#define TABLE_COUNT 12 +#define TABLE_WIFIBAND 12 +#define TABLE_COUNT 13 //IH Interupt ID #define IH_INTERRUPT_ID_TO_DRIVER 0xFE diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h index e2ee855c7748..e862d323caab 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h @@ -138,10 +138,9 @@ #define PPSMC_MSG_SetBadMemoryPagesRetiredFlagsPerChannel 0x4A #define PPSMC_MSG_SetPriorityDeltaGain 0x4B #define PPSMC_MSG_AllowIHHostInterrupt 0x4C - #define PPSMC_MSG_DALNotPresent 0x4E - -#define PPSMC_Message_Count 0x4F +#define PPSMC_MSG_EnableUCLKShadow 0x51 +#define PPSMC_Message_Count 0x52 //Debug Dump Message #define DEBUGSMC_MSG_TestMessage 0x1 diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_7_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_7_ppsmc.h index 6aaefca9b595..a6bf9cdd130e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_7_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_7_ppsmc.h @@ -134,6 +134,7 @@ #define PPSMC_MSG_SetBadMemoryPagesRetiredFlagsPerChannel 0x4A #define PPSMC_MSG_SetPriorityDeltaGain 0x4B #define PPSMC_MSG_AllowIHHostInterrupt 0x4C -#define PPSMC_Message_Count 0x4D +#define PPSMC_MSG_EnableUCLKShadow 0x51 +#define PPSMC_Message_Count 0x52 #endif -- cgit v1.2.3 From b8b39de646274366d17a3614fdaf65fa0716ab32 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Mon, 11 Dec 2023 18:06:27 +0800 Subject: drm/amd/pm: setup the framework to support Wifi RFI mitigation feature With WBRF feature supported, as a driver responding to the frequencies, amdgpu driver is able to do shadow pstate switching to mitigate possible interference(between its (G-)DDR memory clocks and local radio module frequency bands used by Wifi 6/6e/7). -- v1->v2: - update the prompt for feature support(Lijo) v8->v9: - update parameter document for smu_wbrf_event_handler(Simon) v9->v10: v10->v11: - correct the logics for wbrf range sorting(Lijo) v13: - Fix the format issue (IIpo Jarvinen) Signed-off-by: Evan Quan Reviewed-by: Mario Limonciello Signed-off-by: Ma Jun Signed-off-by: Ma Jun Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 17 +++ drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 191 ++++++++++++++++++++++++++ drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 22 +++ drivers/gpu/drm/amd/pm/swsmu/smu_internal.h | 3 + 5 files changed, 235 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 0af8ac81facd..82d294069276 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -252,6 +252,8 @@ extern int amdgpu_seamless; extern int amdgpu_user_partt_mode; extern int amdgpu_agp; +extern int amdgpu_wbrf; + #define AMDGPU_VM_MAX_NUM_CTX 4096 #define AMDGPU_SG_THRESHOLD (256*1024*1024) #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 8b33b130ea36..a194db09cde6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -208,6 +208,7 @@ int amdgpu_umsch_mm; int amdgpu_seamless = -1; /* auto */ uint amdgpu_debug_mask; int amdgpu_agp = -1; /* auto */ +int amdgpu_wbrf = -1; static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work); @@ -971,6 +972,22 @@ module_param_named(debug_mask, amdgpu_debug_mask, uint, 0444); MODULE_PARM_DESC(agp, "AGP (-1 = auto (default), 0 = disable, 1 = enable)"); module_param_named(agp, amdgpu_agp, int, 0444); +/** + * DOC: wbrf (int) + * Enable Wifi RFI interference mitigation feature. + * Due to electrical and mechanical constraints there may be likely interference of + * relatively high-powered harmonics of the (G-)DDR memory clocks with local radio + * module frequency bands used by Wifi 6/6e/7. To mitigate the possible RFI interference, + * with this feature enabled, PMFW will use either “shadowed P-State” or “P-State” based + * on active list of frequencies in-use (to be avoided) as part of initial setting or + * P-state transition. However, there may be potential performance impact with this + * feature enabled. + * (0 = disabled, 1 = enabled, -1 = auto (default setting, will be enabled if supported)) + */ +MODULE_PARM_DESC(wbrf, + "Enable Wifi RFI interference mitigation (0 = disabled, 1 = enabled, -1 = auto(default)"); +module_param_named(wbrf, amdgpu_wbrf, int, 0444); + /* These devices are not supported by amdgpu. * They are supported by the mach64, r128, radeon drivers */ diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index f464610a959f..a3c54a053bd5 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -1322,6 +1322,170 @@ static int smu_get_thermal_temperature_range(struct smu_context *smu) return ret; } +/** + * smu_wbrf_handle_exclusion_ranges - consume the wbrf exclusion ranges + * + * @smu: smu_context pointer + * + * Retrieve the wbrf exclusion ranges and send them to PMFW for proper handling. + * Returns 0 on success, error on failure. + */ +static int smu_wbrf_handle_exclusion_ranges(struct smu_context *smu) +{ + struct wbrf_ranges_in_out wbrf_exclusion = {0}; + struct freq_band_range *wifi_bands = wbrf_exclusion.band_list; + struct amdgpu_device *adev = smu->adev; + uint32_t num_of_wbrf_ranges = MAX_NUM_OF_WBRF_RANGES; + uint64_t start, end; + int ret, i, j; + + ret = amd_wbrf_retrieve_freq_band(adev->dev, &wbrf_exclusion); + if (ret) { + dev_err(adev->dev, "Failed to retrieve exclusion ranges!\n"); + return ret; + } + + /* + * The exclusion ranges array we got might be filled with holes and duplicate + * entries. For example: + * {(2400, 2500), (0, 0), (6882, 6962), (2400, 2500), (0, 0), (6117, 6189), (0, 0)...} + * We need to do some sortups to eliminate those holes and duplicate entries. + * Expected output: {(2400, 2500), (6117, 6189), (6882, 6962), (0, 0)...} + */ + for (i = 0; i < num_of_wbrf_ranges; i++) { + start = wifi_bands[i].start; + end = wifi_bands[i].end; + + /* get the last valid entry to fill the intermediate hole */ + if (!start && !end) { + for (j = num_of_wbrf_ranges - 1; j > i; j--) + if (wifi_bands[j].start && wifi_bands[j].end) + break; + + /* no valid entry left */ + if (j <= i) + break; + + start = wifi_bands[i].start = wifi_bands[j].start; + end = wifi_bands[i].end = wifi_bands[j].end; + wifi_bands[j].start = 0; + wifi_bands[j].end = 0; + num_of_wbrf_ranges = j; + } + + /* eliminate duplicate entries */ + for (j = i + 1; j < num_of_wbrf_ranges; j++) { + if ((wifi_bands[j].start == start) && (wifi_bands[j].end == end)) { + wifi_bands[j].start = 0; + wifi_bands[j].end = 0; + } + } + } + + /* Send the sorted wifi_bands to PMFW */ + ret = smu_set_wbrf_exclusion_ranges(smu, wifi_bands); + /* Try to set the wifi_bands again */ + if (unlikely(ret == -EBUSY)) { + mdelay(5); + ret = smu_set_wbrf_exclusion_ranges(smu, wifi_bands); + } + + return ret; +} + +/** + * smu_wbrf_event_handler - handle notify events + * + * @nb: notifier block + * @action: event type + * @_arg: event data + * + * Calls relevant amdgpu function in response to wbrf event + * notification from kernel. + */ +static int smu_wbrf_event_handler(struct notifier_block *nb, + unsigned long action, void *_arg) +{ + struct smu_context *smu = container_of(nb, struct smu_context, wbrf_notifier); + + switch (action) { + case WBRF_CHANGED: + smu_wbrf_handle_exclusion_ranges(smu); + break; + default: + return NOTIFY_DONE; + }; + + return NOTIFY_OK; +} + +/** + * smu_wbrf_support_check - check wbrf support + * + * @smu: smu_context pointer + * + * Verifies the ACPI interface whether wbrf is supported. + */ +static void smu_wbrf_support_check(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + + smu->wbrf_supported = smu_is_asic_wbrf_supported(smu) && amdgpu_wbrf && + acpi_amd_wbrf_supported_consumer(adev->dev); + + if (smu->wbrf_supported) + dev_info(adev->dev, "RF interference mitigation is supported\n"); +} + +/** + * smu_wbrf_init - init driver wbrf support + * + * @smu: smu_context pointer + * + * Verifies the AMD ACPI interfaces and registers with the wbrf + * notifier chain if wbrf feature is supported. + * Returns 0 on success, error on failure. + */ +static int smu_wbrf_init(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + int ret; + + if (!smu->wbrf_supported) + return 0; + + smu->wbrf_notifier.notifier_call = smu_wbrf_event_handler; + ret = amd_wbrf_register_notifier(&smu->wbrf_notifier); + if (ret) + return ret; + + /* + * Some wifiband exclusion ranges may be already there + * before our driver loaded. To make sure our driver + * is awared of those exclusion ranges. + */ + ret = smu_wbrf_handle_exclusion_ranges(smu); + if (ret) + dev_err(adev->dev, "Failed to handle wbrf exclusion ranges\n"); + + return ret; +} + +/** + * smu_wbrf_fini - tear down driver wbrf support + * + * @smu: smu_context pointer + * + * Unregisters with the wbrf notifier chain. + */ +static void smu_wbrf_fini(struct smu_context *smu) +{ + if (!smu->wbrf_supported) + return; + + amd_wbrf_unregister_notifier(&smu->wbrf_notifier); +} + static int smu_smc_hw_setup(struct smu_context *smu) { struct smu_feature *feature = &smu->smu_feature; @@ -1414,6 +1578,15 @@ static int smu_smc_hw_setup(struct smu_context *smu) if (ret) return ret; + /* Enable UclkShadow on wbrf supported */ + if (smu->wbrf_supported) { + ret = smu_enable_uclk_shadow(smu, true); + if (ret) { + dev_err(adev->dev, "Failed to enable UclkShadow feature to support wbrf!\n"); + return ret; + } + } + /* * With SCPM enabled, these actions(and relevant messages) are * not needed and permitted. @@ -1512,6 +1685,15 @@ static int smu_smc_hw_setup(struct smu_context *smu) */ ret = smu_set_min_dcef_deep_sleep(smu, smu->smu_table.boot_values.dcefclk / 100); + if (ret) { + dev_err(adev->dev, "Error setting min deepsleep dcefclk\n"); + return ret; + } + + /* Init wbrf support. Properly setup the notifier */ + ret = smu_wbrf_init(smu); + if (ret) + dev_err(adev->dev, "Error during wbrf init call\n"); return ret; } @@ -1567,6 +1749,13 @@ static int smu_hw_init(void *handle) return ret; } + /* + * Check whether wbrf is supported. This needs to be done + * before SMU setup starts since part of SMU configuration + * relies on this. + */ + smu_wbrf_support_check(smu); + if (smu->is_apu) { ret = smu_set_gfx_imu_enable(smu); if (ret) @@ -1733,6 +1922,8 @@ static int smu_smc_hw_cleanup(struct smu_context *smu) struct amdgpu_device *adev = smu->adev; int ret = 0; + smu_wbrf_fini(smu); + cancel_work_sync(&smu->throttling_logging_work); cancel_work_sync(&smu->interrupt_work); diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index ab36402f85b1..f2a89a783c58 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -22,6 +22,8 @@ #ifndef __AMDGPU_SMU_H__ #define __AMDGPU_SMU_H__ +#include + #include "amdgpu.h" #include "kgd_pp_interface.h" #include "dm_pp_interface.h" @@ -570,6 +572,10 @@ struct smu_context { struct delayed_work swctf_delayed_work; enum pp_xgmi_plpd_mode plpd_mode; + + /* data structures for wbrf feature support */ + bool wbrf_supported; + struct notifier_block wbrf_notifier; }; struct i2c_adapter; @@ -1375,6 +1381,22 @@ struct pptable_funcs { * @notify_rlc_state: Notify RLC power state to SMU. */ int (*notify_rlc_state)(struct smu_context *smu, bool en); + + /** + * @is_asic_wbrf_supported: check whether PMFW supports the wbrf feature + */ + bool (*is_asic_wbrf_supported)(struct smu_context *smu); + + /** + * @enable_uclk_shadow: Enable the uclk shadow feature on wbrf supported + */ + int (*enable_uclk_shadow)(struct smu_context *smu, bool enable); + + /** + * @set_wbrf_exclusion_ranges: notify SMU the wifi bands occupied + */ + int (*set_wbrf_exclusion_ranges)(struct smu_context *smu, + struct freq_band_range *exclusion_ranges); }; typedef enum { diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h index 64766ac69c53..6f4d212607d7 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h @@ -98,6 +98,9 @@ #define smu_set_config_table(smu, config_table) smu_ppt_funcs(set_config_table, -EOPNOTSUPP, smu, config_table) #define smu_init_pptable_microcode(smu) smu_ppt_funcs(init_pptable_microcode, 0, smu) #define smu_notify_rlc_state(smu, en) smu_ppt_funcs(notify_rlc_state, 0, smu, en) +#define smu_is_asic_wbrf_supported(smu) smu_ppt_funcs(is_asic_wbrf_supported, false, smu) +#define smu_enable_uclk_shadow(smu, enable) smu_ppt_funcs(enable_uclk_shadow, 0, smu, enable) +#define smu_set_wbrf_exclusion_ranges(smu, freq_band_range) smu_ppt_funcs(set_wbrf_exclusion_ranges, -EOPNOTSUPP, smu, freq_band_range) #endif #endif -- cgit v1.2.3 From 71f69557cb12a4674a05b4c5fb730880f13366b1 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Mon, 11 Dec 2023 18:06:28 +0800 Subject: drm/amd/pm: add flood detection for wbrf events To protect PMFW from being overloaded. Signed-off-by: Evan Quan Reviewed-by: Mario Limonciello Signed-off-by: Ma Jun Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 29 +++++++++++++++++++++------ drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 7 +++++++ 2 files changed, 30 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index a3c54a053bd5..d409857fd622 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -1410,7 +1410,8 @@ static int smu_wbrf_event_handler(struct notifier_block *nb, switch (action) { case WBRF_CHANGED: - smu_wbrf_handle_exclusion_ranges(smu); + schedule_delayed_work(&smu->wbrf_delayed_work, + msecs_to_jiffies(SMU_WBRF_EVENT_HANDLING_PACE)); break; default: return NOTIFY_DONE; @@ -1419,6 +1420,20 @@ static int smu_wbrf_event_handler(struct notifier_block *nb, return NOTIFY_OK; } +/** + * smu_wbrf_delayed_work_handler - callback on delayed work timer expired + * + * @work: struct work_struct pointer + * + * Flood is over and driver will consume the latest exclusion ranges. + */ +static void smu_wbrf_delayed_work_handler(struct work_struct *work) +{ + struct smu_context *smu = container_of(work, struct smu_context, wbrf_delayed_work.work); + + smu_wbrf_handle_exclusion_ranges(smu); +} + /** * smu_wbrf_support_check - check wbrf support * @@ -1448,12 +1463,13 @@ static void smu_wbrf_support_check(struct smu_context *smu) */ static int smu_wbrf_init(struct smu_context *smu) { - struct amdgpu_device *adev = smu->adev; int ret; if (!smu->wbrf_supported) return 0; + INIT_DELAYED_WORK(&smu->wbrf_delayed_work, smu_wbrf_delayed_work_handler); + smu->wbrf_notifier.notifier_call = smu_wbrf_event_handler; ret = amd_wbrf_register_notifier(&smu->wbrf_notifier); if (ret) @@ -1464,11 +1480,10 @@ static int smu_wbrf_init(struct smu_context *smu) * before our driver loaded. To make sure our driver * is awared of those exclusion ranges. */ - ret = smu_wbrf_handle_exclusion_ranges(smu); - if (ret) - dev_err(adev->dev, "Failed to handle wbrf exclusion ranges\n"); + schedule_delayed_work(&smu->wbrf_delayed_work, + msecs_to_jiffies(SMU_WBRF_EVENT_HANDLING_PACE)); - return ret; + return 0; } /** @@ -1484,6 +1499,8 @@ static void smu_wbrf_fini(struct smu_context *smu) return; amd_wbrf_unregister_notifier(&smu->wbrf_notifier); + + cancel_delayed_work_sync(&smu->wbrf_delayed_work); } static int smu_smc_hw_setup(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index f2a89a783c58..f97178a602bc 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -473,6 +473,12 @@ struct stb_context { #define WORKLOAD_POLICY_MAX 7 +/* + * Configure wbrf event handling pace as there can be only one + * event processed every SMU_WBRF_EVENT_HANDLING_PACE ms. + */ +#define SMU_WBRF_EVENT_HANDLING_PACE 10 + struct smu_context { struct amdgpu_device *adev; struct amdgpu_irq_src irq_source; @@ -576,6 +582,7 @@ struct smu_context { /* data structures for wbrf feature support */ bool wbrf_supported; struct notifier_block wbrf_notifier; + struct delayed_work wbrf_delayed_work; }; struct i2c_adapter; -- cgit v1.2.3 From 18df969b44a0bdc1f24f6ca6b10595dad6f57398 Mon Sep 17 00:00:00 2001 From: Ma Jun Date: Mon, 11 Dec 2023 18:06:29 +0800 Subject: drm/amd/pm: enable Wifi RFI mitigation feature support for SMU13.0.0 Fulfill the SMU13.0.0 support for Wifi RFI mitigation feature. -- v10->v11: - downgrade the prompt level on message failure(Lijo) v13: - Fix the format issue (IIpo Jarvinen) - Move function smu_v13_0_0_set_wbrf_exclusion_ranges to smu_v13_0.c as a generic code for later use (IIpo Jarvinen) Co-developed-by: Evan Quan Signed-off-by: Evan Quan Reviewed-by: Mario Limonciello Signed-off-by: Ma Jun Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 2 + drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h | 3 +- drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h | 4 ++ drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 48 ++++++++++++++++++++++ .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 22 ++++++++++ 5 files changed, 78 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index f97178a602bc..2aa4fea87314 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -23,6 +23,7 @@ #define __AMDGPU_SMU_H__ #include +#include #include "amdgpu.h" #include "kgd_pp_interface.h" @@ -320,6 +321,7 @@ enum smu_table_id { SMU_TABLE_PACE, SMU_TABLE_ECCINFO, SMU_TABLE_COMBO_PPTABLE, + SMU_TABLE_WIFIBAND, SMU_TABLE_COUNT, }; diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h index ea8ea99b7436..953a767613b1 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h @@ -260,7 +260,8 @@ __SMU_DUMMY_MAP(PowerDownUmsch), \ __SMU_DUMMY_MAP(SetSoftMaxVpe), \ __SMU_DUMMY_MAP(SetSoftMinVpe), \ - __SMU_DUMMY_MAP(GetMetricsVersion), + __SMU_DUMMY_MAP(GetMetricsVersion), \ + __SMU_DUMMY_MAP(EnableUCLKShadow), #undef __SMU_DUMMY_MAP #define __SMU_DUMMY_MAP(type) SMU_MSG_##type diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h index 5e7b8f29fecc..fbd57fa1a004 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h @@ -294,5 +294,9 @@ int smu_v13_0_update_pcie_parameters(struct smu_context *smu, int smu_v13_0_disable_pmfw_state(struct smu_context *smu); +int smu_v13_0_enable_uclk_shadow(struct smu_context *smu, bool enable); + +int smu_v13_0_set_wbrf_exclusion_ranges(struct smu_context *smu, + struct freq_band_range *exclusion_ranges); #endif #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index 2877e5da5b5a..771a3d457c33 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -2507,3 +2507,51 @@ int smu_v13_0_disable_pmfw_state(struct smu_context *smu) return ret == 0 ? 0 : -EINVAL; } + +int smu_v13_0_enable_uclk_shadow(struct smu_context *smu, bool enable) +{ + return smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_EnableUCLKShadow, enable, NULL); +} + +int smu_v13_0_set_wbrf_exclusion_ranges(struct smu_context *smu, + struct freq_band_range *exclusion_ranges) +{ + WifiBandEntryTable_t wifi_bands; + int valid_entries = 0; + int ret, i; + + memset(&wifi_bands, 0, sizeof(wifi_bands)); + for (i = 0; i < ARRAY_SIZE(wifi_bands.WifiBandEntry); i++) { + if (!exclusion_ranges[i].start && !exclusion_ranges[i].end) + break; + + /* PMFW expects the inputs to be in Mhz unit */ + wifi_bands.WifiBandEntry[valid_entries].LowFreq = + DIV_ROUND_DOWN_ULL(exclusion_ranges[i].start, HZ_PER_MHZ); + wifi_bands.WifiBandEntry[valid_entries++].HighFreq = + DIV_ROUND_UP_ULL(exclusion_ranges[i].end, HZ_PER_MHZ); + } + wifi_bands.WifiBandEntryNum = valid_entries; + + /* + * Per confirm with PMFW team, WifiBandEntryNum = 0 + * is a valid setting. + * + * Considering the scenarios below: + * - At first the wifi device adds an exclusion range e.g. (2400,2500) to + * BIOS and our driver gets notified. We will set WifiBandEntryNum = 1 + * and pass the WifiBandEntry (2400, 2500) to PMFW. + * + * - Later the wifi device removes the wifiband list added above and + * our driver gets notified again. At this time, driver will set + * WifiBandEntryNum = 0 and pass an empty WifiBandEntry list to PMFW. + * + * - PMFW may still need to do some uclk shadow update(e.g. switching + * from shadow clock back to primary clock) on receiving this. + */ + ret = smu_cmn_update_table(smu, SMU_TABLE_WIFIBAND, 0, &wifi_bands, true); + if (ret) + dev_warn(smu->adev->dev, "Failed to set wifiband!"); + + return ret; +} diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 2d25a9e47651..a24aa886c636 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -169,6 +169,7 @@ static struct cmn2asic_msg_mapping smu_v13_0_0_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(AllowIHHostInterrupt, PPSMC_MSG_AllowIHHostInterrupt, 0), MSG_MAP(ReenableAcDcInterrupt, PPSMC_MSG_ReenableAcDcInterrupt, 0), MSG_MAP(DALNotPresent, PPSMC_MSG_DALNotPresent, 0), + MSG_MAP(EnableUCLKShadow, PPSMC_MSG_EnableUCLKShadow, 0), }; static struct cmn2asic_mapping smu_v13_0_0_clk_map[SMU_CLK_COUNT] = { @@ -253,6 +254,7 @@ static struct cmn2asic_mapping smu_v13_0_0_table_map[SMU_TABLE_COUNT] = { TAB_MAP(I2C_COMMANDS), TAB_MAP(ECCINFO), TAB_MAP(OVERDRIVE), + TAB_MAP(WIFIBAND), }; static struct cmn2asic_mapping smu_v13_0_0_pwr_src_map[SMU_POWER_SOURCE_COUNT] = { @@ -498,6 +500,9 @@ static int smu_v13_0_0_tables_init(struct smu_context *smu) PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); SMU_TABLE_INIT(tables, SMU_TABLE_ECCINFO, sizeof(EccInfoTable_t), PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); + SMU_TABLE_INIT(tables, SMU_TABLE_WIFIBAND, + sizeof(WifiBandEntryTable_t), PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM); smu_table->metrics_table = kzalloc(sizeof(SmuMetricsExternal_t), GFP_KERNEL); if (!smu_table->metrics_table) @@ -2938,6 +2943,20 @@ static ssize_t smu_v13_0_0_get_ecc_info(struct smu_context *smu, return ret; } +static bool smu_v13_0_0_wbrf_support_check(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + + switch (adev->ip_versions[MP1_HWIP][0]) { + case IP_VERSION(13, 0, 0): + return smu->smc_fw_version >= 0x004e6300; + case IP_VERSION(13, 0, 10): + return smu->smc_fw_version >= 0x00503300; + default: + return false; + } +} + static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .get_allowed_feature_mask = smu_v13_0_0_get_allowed_feature_mask, .set_default_dpm_table = smu_v13_0_0_set_default_dpm_table, @@ -3016,6 +3035,9 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .gpo_control = smu_v13_0_gpo_control, .get_ecc_info = smu_v13_0_0_get_ecc_info, .notify_display_change = smu_v13_0_notify_display_change, + .is_asic_wbrf_supported = smu_v13_0_0_wbrf_support_check, + .enable_uclk_shadow = smu_v13_0_enable_uclk_shadow, + .set_wbrf_exclusion_ranges = smu_v13_0_set_wbrf_exclusion_ranges, }; void smu_v13_0_0_set_ppt_funcs(struct smu_context *smu) -- cgit v1.2.3 From cca850267d33f1153e16e07dc7c32ce5bc3df1fe Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Mon, 11 Dec 2023 18:06:30 +0800 Subject: drm/amd/pm: enable Wifi RFI mitigation feature support for SMU13.0.7 Fulfill the SMU13.0.7 support for Wifi RFI mitigation feature. -- v10->v11: - downgrade the prompt level on message failure(Lijo) v13: - Fix the format issue (IIpo Jarvinen) - Remove duplicate code (IIpo Jarvinen) Signed-off-by: Evan Quan Reviewed-by: Mario Limonciello Signed-off-by: Ma Jun Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 2909ec06b1cb..59606a19e3d2 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -140,6 +140,7 @@ static struct cmn2asic_msg_mapping smu_v13_0_7_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(AllowGpo, PPSMC_MSG_SetGpoAllow, 0), MSG_MAP(GetPptLimit, PPSMC_MSG_GetPptLimit, 0), MSG_MAP(NotifyPowerSource, PPSMC_MSG_NotifyPowerSource, 0), + MSG_MAP(EnableUCLKShadow, PPSMC_MSG_EnableUCLKShadow, 0), }; static struct cmn2asic_mapping smu_v13_0_7_clk_map[SMU_CLK_COUNT] = { @@ -222,6 +223,7 @@ static struct cmn2asic_mapping smu_v13_0_7_table_map[SMU_TABLE_COUNT] = { TAB_MAP(ACTIVITY_MONITOR_COEFF), [SMU_TABLE_COMBO_PPTABLE] = {1, TABLE_COMBO_PPTABLE}, TAB_MAP(OVERDRIVE), + TAB_MAP(WIFIBAND), }; static struct cmn2asic_mapping smu_v13_0_7_pwr_src_map[SMU_POWER_SOURCE_COUNT] = { @@ -512,6 +514,9 @@ static int smu_v13_0_7_tables_init(struct smu_context *smu) AMDGPU_GEM_DOMAIN_VRAM); SMU_TABLE_INIT(tables, SMU_TABLE_COMBO_PPTABLE, MP0_MP1_DATA_REGION_SIZE_COMBOPPTABLE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); + SMU_TABLE_INIT(tables, SMU_TABLE_WIFIBAND, + sizeof(WifiBandEntryTable_t), PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM); smu_table->metrics_table = kzalloc(sizeof(SmuMetricsExternal_t), GFP_KERNEL); if (!smu_table->metrics_table) @@ -2535,6 +2540,11 @@ static int smu_v13_0_7_set_df_cstate(struct smu_context *smu, NULL); } +static bool smu_v13_0_7_wbrf_support_check(struct smu_context *smu) +{ + return smu->smc_fw_version > 0x00524600; +} + static const struct pptable_funcs smu_v13_0_7_ppt_funcs = { .get_allowed_feature_mask = smu_v13_0_7_get_allowed_feature_mask, .set_default_dpm_table = smu_v13_0_7_set_default_dpm_table, @@ -2601,6 +2611,9 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = { .set_mp1_state = smu_v13_0_7_set_mp1_state, .set_df_cstate = smu_v13_0_7_set_df_cstate, .gpo_control = smu_v13_0_gpo_control, + .is_asic_wbrf_supported = smu_v13_0_7_wbrf_support_check, + .enable_uclk_shadow = smu_v13_0_enable_uclk_shadow, + .set_wbrf_exclusion_ranges = smu_v13_0_set_wbrf_exclusion_ranges, }; void smu_v13_0_7_set_ppt_funcs(struct smu_context *smu) -- cgit v1.2.3 From 94b1e028e15c94362420f9f3f711fafbf9d52996 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 7 Dec 2023 10:14:41 -0500 Subject: drm/amdgpu/sdma5.2: add begin/end_use ring callbacks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add begin/end_use ring callbacks to disallow GFXOFF when SDMA work is submitted and allow it again afterward. This should avoid corner cases where GFXOFF is erroneously entered when SDMA is still active. For now just allow/disallow GFXOFF in the begin and end helpers until we root cause the issue. This should not impact power as SDMA usage is pretty minimal and GFXOSS should not be active when SDMA is active anyway, this just makes it explicit. v2: move everything into sdma5.2 code. No reason for this to be generic at this point. v3: Add comments in new code Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2220 Reviewed-by: Mario Limonciello (v1) Tested-by: Mario Limonciello (v1) Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 5.15+ --- drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 83c240f741b5..0058f3f7cf6e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -1643,6 +1643,32 @@ static void sdma_v5_2_get_clockgating_state(void *handle, u64 *flags) *flags |= AMD_CG_SUPPORT_SDMA_LS; } +static void sdma_v5_2_ring_begin_use(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + /* SDMA 5.2.3 (RMB) FW doesn't seem to properly + * disallow GFXOFF in some cases leading to + * hangs in SDMA. Disallow GFXOFF while SDMA is active. + * We can probably just limit this to 5.2.3, + * but it shouldn't hurt for other parts since + * this GFXOFF will be disallowed anyway when SDMA is + * active, this just makes it explicit. + */ + amdgpu_gfx_off_ctrl(adev, false); +} + +static void sdma_v5_2_ring_end_use(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + /* SDMA 5.2.3 (RMB) FW doesn't seem to properly + * disallow GFXOFF in some cases leading to + * hangs in SDMA. Allow GFXOFF when SDMA is complete. + */ + amdgpu_gfx_off_ctrl(adev, true); +} + const struct amd_ip_funcs sdma_v5_2_ip_funcs = { .name = "sdma_v5_2", .early_init = sdma_v5_2_early_init, @@ -1690,6 +1716,8 @@ static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = { .test_ib = sdma_v5_2_ring_test_ib, .insert_nop = sdma_v5_2_ring_insert_nop, .pad_ib = sdma_v5_2_ring_pad_ib, + .begin_use = sdma_v5_2_ring_begin_use, + .end_use = sdma_v5_2_ring_end_use, .emit_wreg = sdma_v5_2_ring_emit_wreg, .emit_reg_wait = sdma_v5_2_ring_emit_reg_wait, .emit_reg_write_reg_wait = sdma_v5_2_ring_emit_reg_write_reg_wait, -- cgit v1.2.3 From 91963397c49aa2907aeafa52d929555dcbc9cd07 Mon Sep 17 00:00:00 2001 From: Friedrich Vock Date: Sat, 2 Dec 2023 01:17:40 +0100 Subject: drm/amdgpu: Enable tunneling on high-priority compute queues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This improves latency if the GPU is already busy with other work. This is useful for VR compositors that submit highly latency-sensitive compositing work on high-priority compute queues while the GPU is busy rendering the next frame. Userspace merge request: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26462 v2: bump driver version (Alex) Reviewed-by: Marek Olšák Signed-off-by: Friedrich Vock Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 10 ++++++---- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 3 ++- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 3 ++- 5 files changed, 13 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 82d294069276..616b6c911767 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -791,6 +791,7 @@ struct amdgpu_mqd_prop { uint64_t eop_gpu_addr; uint32_t hqd_pipe_priority; uint32_t hqd_queue_priority; + bool allow_tunneling; bool hqd_active; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index a194db09cde6..880137774b4e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -115,9 +115,10 @@ * 3.54.0 - Add AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS support * - 3.55.0 - Add AMDGPU_INFO_GPUVM_FAULT query * - 3.56.0 - Update IB start address and size alignment for decode and encode + * - 3.57.0 - Compute tunneling on GFX10+ */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 56 +#define KMS_DRIVER_MINOR 57 #define KMS_DRIVER_PATCHLEVEL 0 /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 6a80d3ec887e..45424ebf9681 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -642,6 +642,10 @@ static void amdgpu_ring_to_mqd_prop(struct amdgpu_ring *ring, struct amdgpu_mqd_prop *prop) { struct amdgpu_device *adev = ring->adev; + bool is_high_prio_compute = ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE && + amdgpu_gfx_is_high_priority_compute_queue(adev, ring); + bool is_high_prio_gfx = ring->funcs->type == AMDGPU_RING_TYPE_GFX && + amdgpu_gfx_is_high_priority_graphics_queue(adev, ring); memset(prop, 0, sizeof(*prop)); @@ -659,10 +663,8 @@ static void amdgpu_ring_to_mqd_prop(struct amdgpu_ring *ring, */ prop->hqd_active = ring->funcs->type == AMDGPU_RING_TYPE_KIQ; - if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE && - amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) || - (ring->funcs->type == AMDGPU_RING_TYPE_GFX && - amdgpu_gfx_is_high_priority_graphics_queue(adev, ring))) { + prop->allow_tunneling = is_high_prio_compute; + if (is_high_prio_compute || is_high_prio_gfx) { prop->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH; prop->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index c8a3bf01743f..73f6d7e72c73 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -6593,7 +6593,8 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_device *adev, void *m, tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); #endif tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, + prop->allow_tunneling); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); mqd->cp_hqd_pq_control = tmp; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index c659ef0f47ce..bdcf96df69e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -3847,7 +3847,8 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, + prop->allow_tunneling); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); mqd->cp_hqd_pq_control = tmp; -- cgit v1.2.3 From e747235ef3c253298157b6cd634b9b2695f33d20 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 12 Dec 2023 15:53:38 +0200 Subject: drm/radeon: include drm/drm_edid.h only where needed Including drm_edid.h from radeon_mode.h causes the rebuild of more than a hundred files when drm_edid.h is modified, while there are only a handful of files that actually need to include drm_edid.h. Signed-off-by: Jani Nikula Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/atombios_encoders.c | 1 + drivers/gpu/drm/radeon/dce3_1_afmt.c | 1 + drivers/gpu/drm/radeon/dce6_afmt.c | 1 + drivers/gpu/drm/radeon/evergreen.c | 1 + drivers/gpu/drm/radeon/evergreen_hdmi.c | 1 + drivers/gpu/drm/radeon/radeon_atombios.c | 1 + drivers/gpu/drm/radeon/radeon_audio.c | 1 + drivers/gpu/drm/radeon/radeon_audio.h | 4 +++- drivers/gpu/drm/radeon/radeon_combios.c | 1 + drivers/gpu/drm/radeon/radeon_encoders.c | 1 + drivers/gpu/drm/radeon/radeon_mode.h | 2 +- 11 files changed, 13 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c index 4aca09cab4b8..6e537c5bd295 100644 --- a/drivers/gpu/drm/radeon/atombios_encoders.c +++ b/drivers/gpu/drm/radeon/atombios_encoders.c @@ -29,6 +29,7 @@ #include #include +#include #include #include #include diff --git a/drivers/gpu/drm/radeon/dce3_1_afmt.c b/drivers/gpu/drm/radeon/dce3_1_afmt.c index e8fe239b9d79..324e9b765098 100644 --- a/drivers/gpu/drm/radeon/dce3_1_afmt.c +++ b/drivers/gpu/drm/radeon/dce3_1_afmt.c @@ -21,6 +21,7 @@ * OTHER DEALINGS IN THE SOFTWARE. */ #include +#include #include "radeon.h" #include "radeon_asic.h" diff --git a/drivers/gpu/drm/radeon/dce6_afmt.c b/drivers/gpu/drm/radeon/dce6_afmt.c index 4a1d5447eac1..4c06f47453fd 100644 --- a/drivers/gpu/drm/radeon/dce6_afmt.c +++ b/drivers/gpu/drm/radeon/dce6_afmt.c @@ -21,6 +21,7 @@ * */ #include +#include #include "dce6_afmt.h" #include "radeon.h" diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index f0ae087be914..a424b86008b8 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -26,6 +26,7 @@ #include #include +#include #include #include #include diff --git a/drivers/gpu/drm/radeon/evergreen_hdmi.c b/drivers/gpu/drm/radeon/evergreen_hdmi.c index 5f3078f8ab95..681119c91d94 100644 --- a/drivers/gpu/drm/radeon/evergreen_hdmi.c +++ b/drivers/gpu/drm/radeon/evergreen_hdmi.c @@ -26,6 +26,7 @@ */ #include +#include #include #include "evergreen_hdmi.h" #include "radeon.h" diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c index 85c4bb186203..3596ea4a8b60 100644 --- a/drivers/gpu/drm/radeon/radeon_atombios.c +++ b/drivers/gpu/drm/radeon/radeon_atombios.c @@ -27,6 +27,7 @@ #include #include +#include #include #include "radeon.h" diff --git a/drivers/gpu/drm/radeon/radeon_audio.c b/drivers/gpu/drm/radeon/radeon_audio.c index 279bf130a18c..91b58fbc2be7 100644 --- a/drivers/gpu/drm/radeon/radeon_audio.c +++ b/drivers/gpu/drm/radeon/radeon_audio.c @@ -27,6 +27,7 @@ #include #include +#include #include "dce6_afmt.h" #include "evergreen_hdmi.h" #include "radeon.h" diff --git a/drivers/gpu/drm/radeon/radeon_audio.h b/drivers/gpu/drm/radeon/radeon_audio.h index 05e67867469b..dacaaa007051 100644 --- a/drivers/gpu/drm/radeon/radeon_audio.h +++ b/drivers/gpu/drm/radeon/radeon_audio.h @@ -27,7 +27,9 @@ #include -#define RREG32_ENDPOINT(block, reg) \ +struct cea_sad; + +#define RREG32_ENDPOINT(block, reg) \ radeon_audio_endpoint_rreg(rdev, (block), (reg)) #define WREG32_ENDPOINT(block, reg, v) \ radeon_audio_endpoint_wreg(rdev, (block), (reg), (v)) diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c index 2620efc7c675..6952b1273b0f 100644 --- a/drivers/gpu/drm/radeon/radeon_combios.c +++ b/drivers/gpu/drm/radeon/radeon_combios.c @@ -28,6 +28,7 @@ #include #include +#include #include #include "radeon.h" diff --git a/drivers/gpu/drm/radeon/radeon_encoders.c b/drivers/gpu/drm/radeon/radeon_encoders.c index 9cb6401fe97e..3de3dce9e89d 100644 --- a/drivers/gpu/drm/radeon/radeon_encoders.c +++ b/drivers/gpu/drm/radeon/radeon_encoders.c @@ -26,6 +26,7 @@ #include +#include #include #include diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h index 1decdcec0264..59c4db13d90a 100644 --- a/drivers/gpu/drm/radeon/radeon_mode.h +++ b/drivers/gpu/drm/radeon/radeon_mode.h @@ -32,13 +32,13 @@ #include #include -#include #include #include #include #include #include +struct edid; struct radeon_bo; struct radeon_device; -- cgit v1.2.3 From 671994e3bf33a414dc6a8c147969dae3a15ba9de Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Thu, 16 Nov 2023 18:57:49 -0100 Subject: drm/amd/display: add plane 3D LUT driver-specific properties Add 3D LUT property for plane color transformations using a 3D lookup table. 3D LUT allows for highly accurate and complex color transformations and is suitable to adjust the balance between color channels. It's also more complex to manage and require more computational resources. Since a 3D LUT has a limited number of entries in each dimension we want to use them in an optimal fashion. This means using the 3D LUT in a colorspace that is optimized for human vision, such as sRGB, PQ, or another non-linear space. Therefore, userpace may need one 1D LUT (shaper) before it to delinearize content and another 1D LUT after 3D LUT (blend) to linearize content again for blending. The next patches add these 1D LUTs to the plane color mgmt pipeline. v3: - improve commit message about 3D LUT - describe the 3D LUT entries and size (Harry) v4: - advertise 3D LUT max size as the size of a single-dimension v5: - get lut3d blob correctly (Joshua) - fix doc about 3d-lut dimension size (Sebastian) Signed-off-by: Melissa Wen Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | 18 ++++++++++++++++ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 9 ++++++++ .../drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 14 +++++++++++++ .../drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c | 24 ++++++++++++++++++++++ 4 files changed, 65 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 218623e1b9aa..7350094f3db6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -364,6 +364,24 @@ struct amdgpu_mode_info { * @plane_hdr_mult_property: */ struct drm_property *plane_hdr_mult_property; + /** + * @plane_lut3d_property: Plane property for color transformation using + * a 3D LUT (pre-blending), a three-dimensional array where each + * element is an RGB triplet. Each dimension has the size of + * lut3d_size. The array contains samples from the approximated + * function. On AMD, values between samples are estimated by + * tetrahedral interpolation. The array is accessed with three indices, + * one for each input dimension (color channel), blue being the + * outermost dimension, red the innermost. + */ + struct drm_property *plane_lut3d_property; + /** + * @plane_degamma_lut_size_property: Plane property to define the max + * size of 3D LUT as supported by the driver (read-only). The max size + * is the max size of one dimension and, therefore, the max number of + * entries for 3D LUT array is the 3D LUT size cubed; + */ + struct drm_property *plane_lut3d_size_property; }; #define AMDGPU_MAX_BL_LEVEL 0xFF diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index d36d68d517a8..0f78024ac0e5 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -784,6 +784,11 @@ struct dm_plane_state { * TF is needed for any subsequent linear-to-non-linear transforms. */ __u64 hdr_mult; + /** + * @lut3d: 3D lookup table blob. The blob (if not NULL) is an array of + * &struct drm_color_lut. + */ + struct drm_property_blob *lut3d; }; struct dm_crtc_state { @@ -869,6 +874,10 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, void amdgpu_dm_trigger_timing_sync(struct drm_device *dev); +/* 3D LUT max size is 17x17x17 (4913 entries) */ +#define MAX_COLOR_3DLUT_SIZE 17 +#define MAX_COLOR_3DLUT_BITDEPTH 12 +/* 1D LUT size */ #define MAX_COLOR_LUT_ENTRIES 4096 /* Legacy gamm LUT users such as X doesn't like large LUT sizes */ #define MAX_COLOR_LEGACY_LUT_ENTRIES 256 diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index b5b34a9209e4..a3b4f6a4c4a8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -231,6 +231,20 @@ amdgpu_dm_create_color_properties(struct amdgpu_device *adev) return -ENOMEM; adev->mode_info.plane_hdr_mult_property = prop; + prop = drm_property_create(adev_to_drm(adev), + DRM_MODE_PROP_BLOB, + "AMD_PLANE_LUT3D", 0); + if (!prop) + return -ENOMEM; + adev->mode_info.plane_lut3d_property = prop; + + prop = drm_property_create_range(adev_to_drm(adev), + DRM_MODE_PROP_IMMUTABLE, + "AMD_PLANE_LUT3D_SIZE", 0, UINT_MAX); + if (!prop) + return -ENOMEM; + adev->mode_info.plane_lut3d_size_property = prop; + return 0; } #endif diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index 45a2c9b36630..e588cff7d6b0 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -1364,6 +1364,9 @@ static struct drm_plane_state *amdgpu_dm_plane_drm_plane_duplicate_state(struct if (old_dm_plane_state->degamma_lut) dm_plane_state->degamma_lut = drm_property_blob_get(old_dm_plane_state->degamma_lut); + if (old_dm_plane_state->lut3d) + dm_plane_state->lut3d = + drm_property_blob_get(old_dm_plane_state->lut3d); dm_plane_state->degamma_tf = old_dm_plane_state->degamma_tf; dm_plane_state->hdr_mult = old_dm_plane_state->hdr_mult; @@ -1437,6 +1440,8 @@ static void amdgpu_dm_plane_drm_plane_destroy_state(struct drm_plane *plane, if (dm_plane_state->degamma_lut) drm_property_blob_put(dm_plane_state->degamma_lut); + if (dm_plane_state->lut3d) + drm_property_blob_put(dm_plane_state->lut3d); if (dm_plane_state->dc_state) dc_plane_state_release(dm_plane_state->dc_state); @@ -1470,6 +1475,14 @@ dm_atomic_plane_attach_color_mgmt_properties(struct amdgpu_display_manager *dm, drm_object_attach_property(&plane->base, dm->adev->mode_info.plane_hdr_mult_property, AMDGPU_HDR_MULT_DEFAULT); + + if (dpp_color_caps.hw_3d_lut) { + drm_object_attach_property(&plane->base, + mode_info.plane_lut3d_property, 0); + drm_object_attach_property(&plane->base, + mode_info.plane_lut3d_size_property, + MAX_COLOR_3DLUT_SIZE); + } } static int @@ -1501,6 +1514,14 @@ dm_atomic_plane_set_property(struct drm_plane *plane, dm_plane_state->hdr_mult = val; dm_plane_state->base.color_mgmt_changed = 1; } + } else if (property == adev->mode_info.plane_lut3d_property) { + ret = drm_property_replace_blob_from_id(plane->dev, + &dm_plane_state->lut3d, + val, -1, + sizeof(struct drm_color_lut), + &replaced); + dm_plane_state->base.color_mgmt_changed |= replaced; + return ret; } else { drm_dbg_atomic(plane->dev, "[PLANE:%d:%s] unknown property [PROP:%d:%s]]\n", @@ -1528,6 +1549,9 @@ dm_atomic_plane_get_property(struct drm_plane *plane, *val = dm_plane_state->degamma_tf; } else if (property == adev->mode_info.plane_hdr_mult_property) { *val = dm_plane_state->hdr_mult; + } else if (property == adev->mode_info.plane_lut3d_property) { + *val = (dm_plane_state->lut3d) ? + dm_plane_state->lut3d->base.id : 0; } else { return -EINVAL; } -- cgit v1.2.3 From 058eb51912ca3a5fb121668b30e8e94d976afb27 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Tue, 12 Dec 2023 16:46:30 +0800 Subject: drm/amdgpu: Switch to aca bank for xgmi pcs err cnt Instead of software managed counters. Signed-off-by: Hawking Zhang Reviewed-by: Yang Wang Reviewed-by: Stanley.Yang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h | 2 ++ drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 6 ++++-- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h index e51e8918e667..b399f1b62887 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h @@ -46,6 +46,8 @@ #define MCA_REG__STATUS__ERRORCODEEXT(x) MCA_REG_FIELD(x, 21, 16) #define MCA_REG__STATUS__ERRORCODE(x) MCA_REG_FIELD(x, 15, 0) +#define MCA_REG__MISC0__ERRCNT(x) MCA_REG_FIELD(x, 43, 32) + #define MCA_REG__SYND__ERRORINFORMATION(x) MCA_REG_FIELD(x, 17, 0) enum amdgpu_mca_ip { diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index ddd782fbee7a..3998c9b31d07 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -2537,13 +2537,15 @@ static int mca_pcs_xgmi_mca_get_err_count(const struct mca_ras_info *mca_ras, st uint32_t *count) { u32 ext_error_code; + u32 err_cnt; ext_error_code = MCA_REG__STATUS__ERRORCODEEXT(entry->regs[MCA_REG_IDX_STATUS]); + err_cnt = MCA_REG__MISC0__ERRCNT(entry->regs[MCA_REG_IDX_MISC0]); if (type == AMDGPU_MCA_ERROR_TYPE_UE && ext_error_code == 0) - *count = 1; + *count = err_cnt; else if (type == AMDGPU_MCA_ERROR_TYPE_CE && ext_error_code == 6) - *count = 1; + *count = err_cnt; return 0; } -- cgit v1.2.3 From c01b9be7b20999bfeaacc1e574be0db93c14c478 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 12 Dec 2023 01:09:16 -0600 Subject: drm/amd: Fix a probing order problem on SDMA 2.4 commit 751e293f2c99 ("drm/amd: Move microcode init from sw_init to early_init for SDMA v2.4") made a fateful mistake in `adev->sdma.num_instances` wasn't declared when sdma_v2_4_init_microcode() was run. This caused probing to fail. Move the declaration to right before sdma_v2_4_init_microcode(). Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3043 Fixes: 751e293f2c99 ("drm/amd: Move microcode init from sw_init to early_init for SDMA v2.4") Reviewed-by: Alex Deucher Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 45377a175250..8d5d86675a7f 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -813,12 +813,12 @@ static int sdma_v2_4_early_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r; + adev->sdma.num_instances = SDMA_MAX_INSTANCE; + r = sdma_v2_4_init_microcode(adev); if (r) return r; - adev->sdma.num_instances = SDMA_MAX_INSTANCE; - sdma_v2_4_set_ring_funcs(adev); sdma_v2_4_set_buffer_funcs(adev); sdma_v2_4_set_vm_pte_funcs(adev); -- cgit v1.2.3 From bd33bb1409b494558a2935f7bbc7842def957fcd Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Tue, 5 Dec 2023 12:22:07 -0500 Subject: drm/amdkfd: fix mes set shader debugger process management MES provides the driver a call to explicitly flush stale process memory within the MES to avoid a race condition that results in a fatal memory violation. When SET_SHADER_DEBUGGER is called, the driver passes a memory address that represents a process context address MES uses to keep track of future per-process calls. Normally, MES will purge its process context list when the last queue has been removed. The driver, however, can call SET_SHADER_DEBUGGER regardless of whether a queue has been added or not. If SET_SHADER_DEBUGGER has been called with no queues as the last call prior to process termination, the passed process context address will still reside within MES. On a new process call to SET_SHADER_DEBUGGER, the driver may end up passing an identical process context address value (based on per-process gpu memory address) to MES but is now pointing to a new allocated buffer object during KFD process creation. Since the MES is unaware of this, access of the passed address points to the stale object within MES and triggers a fatal memory violation. The solution is for KFD to explicitly flush the process context address from MES on process termination. Note that the flush call and the MES debugger calls use the same MES interface but are separated as KFD calls to avoid conflicting with each other. Signed-off-by: Jonathan Kim Tested-by: Alice Wong Reviewed-by: Eric Huang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 31 ++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 10 ++++--- .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 1 + drivers/gpu/drm/amd/include/mes_v11_api_def.h | 3 ++- 4 files changed, 40 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index e544b823abf6..e98de23250dc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -916,6 +916,11 @@ int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev, op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER; op_input.set_shader_debugger.process_context_addr = process_context_addr; op_input.set_shader_debugger.flags.u32all = flags; + + /* use amdgpu mes_flush_shader_debugger instead */ + if (op_input.set_shader_debugger.flags.process_ctx_flush) + return -EINVAL; + op_input.set_shader_debugger.spi_gdbg_per_vmid_cntl = spi_gdbg_per_vmid_cntl; memcpy(op_input.set_shader_debugger.tcp_watch_cntl, tcp_watch_cntl, sizeof(op_input.set_shader_debugger.tcp_watch_cntl)); @@ -935,6 +940,32 @@ int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev, return r; } +int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev, + uint64_t process_context_addr) +{ + struct mes_misc_op_input op_input = {0}; + int r; + + if (!adev->mes.funcs->misc_op) { + DRM_ERROR("mes flush shader debugger is not supported!\n"); + return -EINVAL; + } + + op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER; + op_input.set_shader_debugger.process_context_addr = process_context_addr; + op_input.set_shader_debugger.flags.process_ctx_flush = true; + + amdgpu_mes_lock(&adev->mes); + + r = adev->mes.funcs->misc_op(&adev->mes, &op_input); + if (r) + DRM_ERROR("failed to set_shader_debugger\n"); + + amdgpu_mes_unlock(&adev->mes); + + return r; +} + static void amdgpu_mes_ring_to_queue_props(struct amdgpu_device *adev, struct amdgpu_ring *ring, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 894b9b133000..7d4f93fea937 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -296,9 +296,10 @@ struct mes_misc_op_input { uint64_t process_context_addr; union { struct { - uint64_t single_memop : 1; - uint64_t single_alu_op : 1; - uint64_t reserved: 30; + uint32_t single_memop : 1; + uint32_t single_alu_op : 1; + uint32_t reserved: 29; + uint32_t process_ctx_flush: 1; }; uint32_t u32all; } flags; @@ -374,7 +375,8 @@ int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev, const uint32_t *tcp_watch_cntl, uint32_t flags, bool trap_en); - +int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev, + uint64_t process_context_addr); int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id, int queue_type, int idx, struct amdgpu_mes_ctx_data *ctx_data, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 77f493262e05..8e55e78fce4e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -87,6 +87,7 @@ void kfd_process_dequeue_from_device(struct kfd_process_device *pdd) return; dev->dqm->ops.process_termination(dev->dqm, &pdd->qpd); + amdgpu_mes_flush_shader_debugger(dev->adev, pdd->proc_ctx_gpu_addr); pdd->already_dequeued = true; } diff --git a/drivers/gpu/drm/amd/include/mes_v11_api_def.h b/drivers/gpu/drm/amd/include/mes_v11_api_def.h index 1fbfd1aa987e..ec5b9ab67c5e 100644 --- a/drivers/gpu/drm/amd/include/mes_v11_api_def.h +++ b/drivers/gpu/drm/amd/include/mes_v11_api_def.h @@ -572,7 +572,8 @@ struct SET_SHADER_DEBUGGER { struct { uint32_t single_memop : 1; /* SQ_DEBUG.single_memop */ uint32_t single_alu_op : 1; /* SQ_DEBUG.single_alu_op */ - uint32_t reserved : 30; + uint32_t reserved : 29; + uint32_t process_ctx_flush : 1; }; uint32_t u32all; } flags; -- cgit v1.2.3 From f545d82479b46368bf00d0bfecf33fa914bd5f8f Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Thu, 16 Nov 2023 18:57:50 -0100 Subject: drm/amd/display: add plane shaper LUT and TF driver-specific properties On AMD HW, 3D LUT always assumes a preceding shaper 1D LUT used for delinearizing and/or normalizing the color space before applying a 3D LUT. Add pre-defined transfer function to enable delinearizing content with or without shaper LUT, where AMD color module calculates the resulted shaper curve. We apply an inverse EOTF to go from linear values to encoded values. If we are already in a non-linear space and/or don't need to normalize values, we can bypass shaper LUT with a linear transfer function that is also the default TF value. There is no shaper ROM. When setting shaper TF (!= Identity) and LUT at the same time, the color module will combine the pre-defined TF and the custom LUT values into the LUT that's actually programmed. v2: - squash commits for shaper LUT and shaper TF - define inverse EOTF as supported shaper TFs v3: - spell out TF+LUT behavior in the commit and comments (Harry) - replace BT709 EOTF by inv OETF v5: - get shaper blob correctly (Joshua) Reviewed-by: Harry Wentland Signed-off-by: Melissa Wen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | 21 ++++++++++++++ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 11 ++++++++ .../drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 29 +++++++++++++++++++ .../drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c | 33 ++++++++++++++++++++++ 4 files changed, 94 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 7350094f3db6..981a2eb25257 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -364,6 +364,27 @@ struct amdgpu_mode_info { * @plane_hdr_mult_property: */ struct drm_property *plane_hdr_mult_property; + /** + * @shaper_lut_property: Plane property to set pre-blending shaper LUT + * that converts color content before 3D LUT. If + * plane_shaper_tf_property != Identity TF, AMD color module will + * combine the user LUT values with pre-defined TF into the LUT + * parameters to be programmed. + */ + struct drm_property *plane_shaper_lut_property; + /** + * @shaper_lut_size_property: Plane property for the size of + * pre-blending shaper LUT as supported by the driver (read-only). + */ + struct drm_property *plane_shaper_lut_size_property; + /** + * @plane_shaper_tf_property: Plane property to set a predefined + * transfer function for pre-blending shaper (before applying 3D LUT) + * with or without LUT. There is no shaper ROM, but we can use AMD + * color modules to program LUT parameters from predefined TF (or + * from a combination of pre-defined TF and the custom 1D LUT). + */ + struct drm_property *plane_shaper_tf_property; /** * @plane_lut3d_property: Plane property for color transformation using * a 3D LUT (pre-blending), a three-dimensional array where each diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 0f78024ac0e5..9bee60cbc2b5 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -784,6 +784,17 @@ struct dm_plane_state { * TF is needed for any subsequent linear-to-non-linear transforms. */ __u64 hdr_mult; + /** + * @shaper_lut: shaper lookup table blob. The blob (if not NULL) is an + * array of &struct drm_color_lut. + */ + struct drm_property_blob *shaper_lut; + /** + * @shaper_tf: + * + * Predefined transfer function to delinearize color space. + */ + enum amdgpu_transfer_function shaper_tf; /** * @lut3d: 3D lookup table blob. The blob (if not NULL) is an array of * &struct drm_color_lut. diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index a3b4f6a4c4a8..866c6f1d3d99 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -173,6 +173,14 @@ static const u32 amdgpu_eotf = BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF) | BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF); +static const u32 amdgpu_inv_eotf = + BIT(AMDGPU_TRANSFER_FUNCTION_SRGB_INV_EOTF) | + BIT(AMDGPU_TRANSFER_FUNCTION_BT709_OETF) | + BIT(AMDGPU_TRANSFER_FUNCTION_PQ_INV_EOTF) | + BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF) | + BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF) | + BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF); + static struct drm_property * amdgpu_create_tf_property(struct drm_device *dev, const char *name, @@ -231,6 +239,27 @@ amdgpu_dm_create_color_properties(struct amdgpu_device *adev) return -ENOMEM; adev->mode_info.plane_hdr_mult_property = prop; + prop = drm_property_create(adev_to_drm(adev), + DRM_MODE_PROP_BLOB, + "AMD_PLANE_SHAPER_LUT", 0); + if (!prop) + return -ENOMEM; + adev->mode_info.plane_shaper_lut_property = prop; + + prop = drm_property_create_range(adev_to_drm(adev), + DRM_MODE_PROP_IMMUTABLE, + "AMD_PLANE_SHAPER_LUT_SIZE", 0, UINT_MAX); + if (!prop) + return -ENOMEM; + adev->mode_info.plane_shaper_lut_size_property = prop; + + prop = amdgpu_create_tf_property(adev_to_drm(adev), + "AMD_PLANE_SHAPER_TF", + amdgpu_inv_eotf); + if (!prop) + return -ENOMEM; + adev->mode_info.plane_shaper_tf_property = prop; + prop = drm_property_create(adev_to_drm(adev), DRM_MODE_PROP_BLOB, "AMD_PLANE_LUT3D", 0); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index e588cff7d6b0..7f0de29889f0 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -1343,6 +1343,7 @@ static void amdgpu_dm_plane_drm_plane_reset(struct drm_plane *plane) __drm_atomic_helper_plane_reset(plane, &amdgpu_state->base); amdgpu_state->degamma_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT; amdgpu_state->hdr_mult = AMDGPU_HDR_MULT_DEFAULT; + amdgpu_state->shaper_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT; } static struct drm_plane_state *amdgpu_dm_plane_drm_plane_duplicate_state(struct drm_plane *plane) @@ -1364,12 +1365,16 @@ static struct drm_plane_state *amdgpu_dm_plane_drm_plane_duplicate_state(struct if (old_dm_plane_state->degamma_lut) dm_plane_state->degamma_lut = drm_property_blob_get(old_dm_plane_state->degamma_lut); + if (old_dm_plane_state->shaper_lut) + dm_plane_state->shaper_lut = + drm_property_blob_get(old_dm_plane_state->shaper_lut); if (old_dm_plane_state->lut3d) dm_plane_state->lut3d = drm_property_blob_get(old_dm_plane_state->lut3d); dm_plane_state->degamma_tf = old_dm_plane_state->degamma_tf; dm_plane_state->hdr_mult = old_dm_plane_state->hdr_mult; + dm_plane_state->shaper_tf = old_dm_plane_state->shaper_tf; return &dm_plane_state->base; } @@ -1442,6 +1447,8 @@ static void amdgpu_dm_plane_drm_plane_destroy_state(struct drm_plane *plane, drm_property_blob_put(dm_plane_state->degamma_lut); if (dm_plane_state->lut3d) drm_property_blob_put(dm_plane_state->lut3d); + if (dm_plane_state->shaper_lut) + drm_property_blob_put(dm_plane_state->shaper_lut); if (dm_plane_state->dc_state) dc_plane_state_release(dm_plane_state->dc_state); @@ -1477,6 +1484,14 @@ dm_atomic_plane_attach_color_mgmt_properties(struct amdgpu_display_manager *dm, AMDGPU_HDR_MULT_DEFAULT); if (dpp_color_caps.hw_3d_lut) { + drm_object_attach_property(&plane->base, + mode_info.plane_shaper_lut_property, 0); + drm_object_attach_property(&plane->base, + mode_info.plane_shaper_lut_size_property, + MAX_COLOR_LUT_ENTRIES); + drm_object_attach_property(&plane->base, + mode_info.plane_shaper_tf_property, + AMDGPU_TRANSFER_FUNCTION_DEFAULT); drm_object_attach_property(&plane->base, mode_info.plane_lut3d_property, 0); drm_object_attach_property(&plane->base, @@ -1514,6 +1529,19 @@ dm_atomic_plane_set_property(struct drm_plane *plane, dm_plane_state->hdr_mult = val; dm_plane_state->base.color_mgmt_changed = 1; } + } else if (property == adev->mode_info.plane_shaper_lut_property) { + ret = drm_property_replace_blob_from_id(plane->dev, + &dm_plane_state->shaper_lut, + val, -1, + sizeof(struct drm_color_lut), + &replaced); + dm_plane_state->base.color_mgmt_changed |= replaced; + return ret; + } else if (property == adev->mode_info.plane_shaper_tf_property) { + if (dm_plane_state->shaper_tf != val) { + dm_plane_state->shaper_tf = val; + dm_plane_state->base.color_mgmt_changed = 1; + } } else if (property == adev->mode_info.plane_lut3d_property) { ret = drm_property_replace_blob_from_id(plane->dev, &dm_plane_state->lut3d, @@ -1549,6 +1577,11 @@ dm_atomic_plane_get_property(struct drm_plane *plane, *val = dm_plane_state->degamma_tf; } else if (property == adev->mode_info.plane_hdr_mult_property) { *val = dm_plane_state->hdr_mult; + } else if (property == adev->mode_info.plane_shaper_lut_property) { + *val = (dm_plane_state->shaper_lut) ? + dm_plane_state->shaper_lut->base.id : 0; + } else if (property == adev->mode_info.plane_shaper_tf_property) { + *val = dm_plane_state->shaper_tf; } else if (property == adev->mode_info.plane_lut3d_property) { *val = (dm_plane_state->lut3d) ? dm_plane_state->lut3d->base.id : 0; -- cgit v1.2.3 From 0ef47454dc82358b62a424b37c7520a84f307edb Mon Sep 17 00:00:00 2001 From: Joshua Ashton Date: Thu, 16 Nov 2023 18:57:51 -0100 Subject: drm/amd/display: add plane blend LUT and TF driver-specific properties Blend 1D LUT or a pre-defined transfer function (TF) can be set to linearize content before blending, so that it's positioned just before blending planes in the AMD color mgmt pipeline, and after 3D LUT (non-linear space). Shaper and Blend LUTs are 1D LUTs that sandwich 3D LUT. Drivers should advertize blend properties according to HW caps. There is no blend ROM for pre-defined TF. When setting blend TF (!= Identity) and LUT at the same time, the color module will combine the pre-defined TF and the custom LUT values into the LUT that's actually programmed. v3: - spell out TF+LUT behavior in the commit and comments (Harry) v5: - get blend blob correctly Reviewed-by: Harry Wentland Signed-off-by: Joshua Ashton Signed-off-by: Melissa Wen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | 22 +++++++++++++ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 12 +++++++ .../drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 21 ++++++++++++ .../drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c | 37 ++++++++++++++++++++++ 4 files changed, 92 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 981a2eb25257..6ff37272ccad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -403,6 +403,28 @@ struct amdgpu_mode_info { * entries for 3D LUT array is the 3D LUT size cubed; */ struct drm_property *plane_lut3d_size_property; + /** + * @plane_blend_lut_property: Plane property for output gamma before + * blending. Userspace set a blend LUT to convert colors after 3D LUT + * conversion. It works as a post-3DLUT 1D LUT. With shaper LUT, they + * are sandwiching 3D LUT with two 1D LUT. If plane_blend_tf_property + * != Identity TF, AMD color module will combine the user LUT values + * with pre-defined TF into the LUT parameters to be programmed. + */ + struct drm_property *plane_blend_lut_property; + /** + * @plane_blend_lut_size_property: Plane property to define the max + * size of blend LUT as supported by the driver (read-only). + */ + struct drm_property *plane_blend_lut_size_property; + /** + * @plane_blend_tf_property: Plane property to set a predefined + * transfer function for pre-blending blend/out_gamma (after applying + * 3D LUT) with or without LUT. There is no blend ROM, but we can use + * AMD color modules to program LUT parameters from predefined TF (or + * from a combination of pre-defined TF and the custom 1D LUT). + */ + struct drm_property *plane_blend_tf_property; }; #define AMDGPU_MAX_BL_LEVEL 0xFF diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 9bee60cbc2b5..a9a0bc875b78 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -800,6 +800,18 @@ struct dm_plane_state { * &struct drm_color_lut. */ struct drm_property_blob *lut3d; + /** + * @blend_lut: blend lut lookup table blob. The blob (if not NULL) is an + * array of &struct drm_color_lut. + */ + struct drm_property_blob *blend_lut; + /** + * @blend_tf: + * + * Pre-defined transfer function for converting plane pixel data before + * applying blend LUT. + */ + enum amdgpu_transfer_function blend_tf; }; struct dm_crtc_state { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 866c6f1d3d99..425676f5afb7 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -274,6 +274,27 @@ amdgpu_dm_create_color_properties(struct amdgpu_device *adev) return -ENOMEM; adev->mode_info.plane_lut3d_size_property = prop; + prop = drm_property_create(adev_to_drm(adev), + DRM_MODE_PROP_BLOB, + "AMD_PLANE_BLEND_LUT", 0); + if (!prop) + return -ENOMEM; + adev->mode_info.plane_blend_lut_property = prop; + + prop = drm_property_create_range(adev_to_drm(adev), + DRM_MODE_PROP_IMMUTABLE, + "AMD_PLANE_BLEND_LUT_SIZE", 0, UINT_MAX); + if (!prop) + return -ENOMEM; + adev->mode_info.plane_blend_lut_size_property = prop; + + prop = amdgpu_create_tf_property(adev_to_drm(adev), + "AMD_PLANE_BLEND_TF", + amdgpu_eotf); + if (!prop) + return -ENOMEM; + adev->mode_info.plane_blend_tf_property = prop; + return 0; } #endif diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index 7f0de29889f0..35a4732483ba 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -1344,6 +1344,7 @@ static void amdgpu_dm_plane_drm_plane_reset(struct drm_plane *plane) amdgpu_state->degamma_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT; amdgpu_state->hdr_mult = AMDGPU_HDR_MULT_DEFAULT; amdgpu_state->shaper_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT; + amdgpu_state->blend_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT; } static struct drm_plane_state *amdgpu_dm_plane_drm_plane_duplicate_state(struct drm_plane *plane) @@ -1371,10 +1372,14 @@ static struct drm_plane_state *amdgpu_dm_plane_drm_plane_duplicate_state(struct if (old_dm_plane_state->lut3d) dm_plane_state->lut3d = drm_property_blob_get(old_dm_plane_state->lut3d); + if (old_dm_plane_state->blend_lut) + dm_plane_state->blend_lut = + drm_property_blob_get(old_dm_plane_state->blend_lut); dm_plane_state->degamma_tf = old_dm_plane_state->degamma_tf; dm_plane_state->hdr_mult = old_dm_plane_state->hdr_mult; dm_plane_state->shaper_tf = old_dm_plane_state->shaper_tf; + dm_plane_state->blend_tf = old_dm_plane_state->blend_tf; return &dm_plane_state->base; } @@ -1449,6 +1454,8 @@ static void amdgpu_dm_plane_drm_plane_destroy_state(struct drm_plane *plane, drm_property_blob_put(dm_plane_state->lut3d); if (dm_plane_state->shaper_lut) drm_property_blob_put(dm_plane_state->shaper_lut); + if (dm_plane_state->blend_lut) + drm_property_blob_put(dm_plane_state->blend_lut); if (dm_plane_state->dc_state) dc_plane_state_release(dm_plane_state->dc_state); @@ -1498,6 +1505,17 @@ dm_atomic_plane_attach_color_mgmt_properties(struct amdgpu_display_manager *dm, mode_info.plane_lut3d_size_property, MAX_COLOR_3DLUT_SIZE); } + + if (dpp_color_caps.ogam_ram) { + drm_object_attach_property(&plane->base, + mode_info.plane_blend_lut_property, 0); + drm_object_attach_property(&plane->base, + mode_info.plane_blend_lut_size_property, + MAX_COLOR_LUT_ENTRIES); + drm_object_attach_property(&plane->base, + mode_info.plane_blend_tf_property, + AMDGPU_TRANSFER_FUNCTION_DEFAULT); + } } static int @@ -1550,6 +1568,19 @@ dm_atomic_plane_set_property(struct drm_plane *plane, &replaced); dm_plane_state->base.color_mgmt_changed |= replaced; return ret; + } else if (property == adev->mode_info.plane_blend_lut_property) { + ret = drm_property_replace_blob_from_id(plane->dev, + &dm_plane_state->blend_lut, + val, -1, + sizeof(struct drm_color_lut), + &replaced); + dm_plane_state->base.color_mgmt_changed |= replaced; + return ret; + } else if (property == adev->mode_info.plane_blend_tf_property) { + if (dm_plane_state->blend_tf != val) { + dm_plane_state->blend_tf = val; + dm_plane_state->base.color_mgmt_changed = 1; + } } else { drm_dbg_atomic(plane->dev, "[PLANE:%d:%s] unknown property [PROP:%d:%s]]\n", @@ -1585,6 +1616,12 @@ dm_atomic_plane_get_property(struct drm_plane *plane, } else if (property == adev->mode_info.plane_lut3d_property) { *val = (dm_plane_state->lut3d) ? dm_plane_state->lut3d->base.id : 0; + } else if (property == adev->mode_info.plane_blend_lut_property) { + *val = (dm_plane_state->blend_lut) ? + dm_plane_state->blend_lut->base.id : 0; + } else if (property == adev->mode_info.plane_blend_tf_property) { + *val = dm_plane_state->blend_tf; + } else { return -EINVAL; } -- cgit v1.2.3 From 0f5afa190b890052cae187496f660699f00067ef Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Thu, 16 Nov 2023 18:57:52 -0100 Subject: drm/amd/display: add CRTC gamma TF driver-specific property Add AMD pre-defined transfer function property to default DRM CRTC gamma to convert to wire encoding with or without a user gamma LUT. There is no post-blending regamma ROM for pre-defined TF. When setting Gamma TF (!= Identity) and LUT at the same time, the color module will combine the pre-defined TF and the custom LUT values into the LUT that's actually programmed. v2: - enable CRTC prop in the end of driver-specific prop sequence - define inverse EOTFs as supported regamma TFs - reword driver-specific function doc to remove shaper/3D LUT v3: - spell out TF+LUT behavior in the commit and comments (Harry) Reviewed-by: Harry Wentland Co-developed-by: Joshua Ashton Signed-off-by: Joshua Ashton Signed-off-by: Melissa Wen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | 7 +++ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 8 +++ .../drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 7 +++ .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c | 72 ++++++++++++++++++++++ 4 files changed, 94 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 6ff37272ccad..9fbe09743dde 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -425,6 +425,13 @@ struct amdgpu_mode_info { * from a combination of pre-defined TF and the custom 1D LUT). */ struct drm_property *plane_blend_tf_property; + /* @regamma_tf_property: Transfer function for CRTC regamma + * (post-blending). Possible values are defined by `enum + * amdgpu_transfer_function`. There is no regamma ROM, but we can use + * AMD color modules to program LUT parameters from predefined TF (or + * from a combination of pre-defined TF and the custom 1D LUT). + */ + struct drm_property *regamma_tf_property; }; #define AMDGPU_MAX_BL_LEVEL 0xFF diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index a9a0bc875b78..d9923c9f39b0 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -836,6 +836,14 @@ struct dm_crtc_state { struct dc_info_packet vrr_infopacket; int abm_level; + + /** + * @regamma_tf: + * + * Pre-defined transfer function for converting internal FB -> wire + * encoding. + */ + enum amdgpu_transfer_function regamma_tf; }; #define to_dm_crtc_state(x) container_of(x, struct dm_crtc_state, base) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 425676f5afb7..4150e9370daf 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -295,6 +295,13 @@ amdgpu_dm_create_color_properties(struct amdgpu_device *adev) return -ENOMEM; adev->mode_info.plane_blend_tf_property = prop; + prop = amdgpu_create_tf_property(adev_to_drm(adev), + "AMD_CRTC_REGAMMA_TF", + amdgpu_inv_eotf); + if (!prop) + return -ENOMEM; + adev->mode_info.regamma_tf_property = prop; + return 0; } #endif diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c index cb0b48bb2a7d..9224e2d7f32b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -260,6 +260,7 @@ static struct drm_crtc_state *amdgpu_dm_crtc_duplicate_state(struct drm_crtc *cr state->freesync_config = cur->freesync_config; state->cm_has_degamma = cur->cm_has_degamma; state->cm_is_degamma_srgb = cur->cm_is_degamma_srgb; + state->regamma_tf = cur->regamma_tf; state->crc_skip_count = cur->crc_skip_count; state->mpo_requested = cur->mpo_requested; /* TODO Duplicate dc_stream after objects are stream object is flattened */ @@ -296,6 +297,70 @@ static int amdgpu_dm_crtc_late_register(struct drm_crtc *crtc) } #endif +#ifdef AMD_PRIVATE_COLOR +/** + * drm_crtc_additional_color_mgmt - enable additional color properties + * @crtc: DRM CRTC + * + * This function lets the driver enable post-blending CRTC regamma transfer + * function property in addition to DRM CRTC gamma LUT. Default value means + * linear transfer function, which is the default CRTC gamma LUT behaviour + * without this property. + */ +static void +dm_crtc_additional_color_mgmt(struct drm_crtc *crtc) +{ + struct amdgpu_device *adev = drm_to_adev(crtc->dev); + + if(adev->dm.dc->caps.color.mpc.ogam_ram) + drm_object_attach_property(&crtc->base, + adev->mode_info.regamma_tf_property, + AMDGPU_TRANSFER_FUNCTION_DEFAULT); +} + +static int +amdgpu_dm_atomic_crtc_set_property(struct drm_crtc *crtc, + struct drm_crtc_state *state, + struct drm_property *property, + uint64_t val) +{ + struct amdgpu_device *adev = drm_to_adev(crtc->dev); + struct dm_crtc_state *acrtc_state = to_dm_crtc_state(state); + + if (property == adev->mode_info.regamma_tf_property) { + if (acrtc_state->regamma_tf != val) { + acrtc_state->regamma_tf = val; + acrtc_state->base.color_mgmt_changed |= 1; + } + } else { + drm_dbg_atomic(crtc->dev, + "[CRTC:%d:%s] unknown property [PROP:%d:%s]]\n", + crtc->base.id, crtc->name, + property->base.id, property->name); + return -EINVAL; + } + + return 0; +} + +static int +amdgpu_dm_atomic_crtc_get_property(struct drm_crtc *crtc, + const struct drm_crtc_state *state, + struct drm_property *property, + uint64_t *val) +{ + struct amdgpu_device *adev = drm_to_adev(crtc->dev); + struct dm_crtc_state *acrtc_state = to_dm_crtc_state(state); + + if (property == adev->mode_info.regamma_tf_property) + *val = acrtc_state->regamma_tf; + else + return -EINVAL; + + return 0; +} +#endif + /* Implemented only the options currently available for the driver */ static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = { .reset = amdgpu_dm_crtc_reset_state, @@ -314,6 +379,10 @@ static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = { #if defined(CONFIG_DEBUG_FS) .late_register = amdgpu_dm_crtc_late_register, #endif +#ifdef AMD_PRIVATE_COLOR + .atomic_set_property = amdgpu_dm_atomic_crtc_set_property, + .atomic_get_property = amdgpu_dm_atomic_crtc_get_property, +#endif }; static void amdgpu_dm_crtc_helper_disable(struct drm_crtc *crtc) @@ -489,6 +558,9 @@ int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, drm_mode_crtc_set_gamma_size(&acrtc->base, MAX_COLOR_LEGACY_LUT_ENTRIES); +#ifdef AMD_PRIVATE_COLOR + dm_crtc_additional_color_mgmt(&acrtc->base); +#endif return 0; fail: -- cgit v1.2.3 From 98fbb52772063ad2547d6d1b80ff99bc26761e79 Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Thu, 16 Nov 2023 18:57:53 -0100 Subject: drm/amd/display: add comments to describe DM crtc color mgmt behavior Describe some expected behavior of the AMD DM color mgmt programming. Reviewed-by: Harry Wentland Signed-off-by: Melissa Wen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 4150e9370daf..0b92513eab98 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -661,13 +661,25 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc) crtc->cm_is_degamma_srgb = true; stream->out_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS; stream->out_transfer_func->tf = TRANSFER_FUNCTION_SRGB; - + /* + * Note: although we pass has_rom as parameter here, we never + * actually use ROM because the color module only takes the ROM + * path if transfer_func->type == PREDEFINED. + * + * See more in mod_color_calculate_regamma_params() + */ r = __set_legacy_tf(stream->out_transfer_func, regamma_lut, regamma_size, has_rom); if (r) return r; } else if (has_regamma) { - /* If atomic regamma, CRTC RGM goes into RGM LUT. */ + /* + * CRTC RGM goes into RGM LUT. + * + * Note: there is no implicit sRGB regamma here. We are using + * degamma calculation from color module to calculate the curve + * from a linear base. + */ stream->out_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS; stream->out_transfer_func->tf = TRANSFER_FUNCTION_LINEAR; -- cgit v1.2.3 From 8b6b3f668f31a24b5406661388b9a69202e83e9d Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Thu, 16 Nov 2023 18:57:54 -0100 Subject: drm/amd/display: encapsulate atomic regamma operation We will wire up MPC 3D LUT to DM CRTC color pipeline in the next patch, but so far, only for atomic interface. By checking set_output_transfer_func in DC drivers with MPC 3D LUT support, we can verify that regamma is only programmed when 3D LUT programming fails. As a groundwork to introduce 3D LUT programming and better understand each step, detach atomic regamma programming from the crtc colocr updating code. Reviewed-by: Harry Wentland Signed-off-by: Melissa Wen Signed-off-by: Alex Deucher --- .../drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 55 ++++++++++++++-------- 1 file changed, 35 insertions(+), 20 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 0b92513eab98..dec734f03832 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -525,6 +525,37 @@ static int __set_output_tf(struct dc_transfer_func *func, return res ? 0 : -ENOMEM; } +static int amdgpu_dm_set_atomic_regamma(struct dc_stream_state *stream, + const struct drm_color_lut *regamma_lut, + uint32_t regamma_size, bool has_rom) +{ + struct dc_transfer_func *out_tf = stream->out_transfer_func; + int ret = 0; + + if (regamma_size) { + /* + * CRTC RGM goes into RGM LUT. + * + * Note: there is no implicit sRGB regamma here. We are using + * degamma calculation from color module to calculate the curve + * from a linear base. + */ + out_tf->type = TF_TYPE_DISTRIBUTED_POINTS; + out_tf->tf = TRANSFER_FUNCTION_LINEAR; + + ret = __set_output_tf(out_tf, regamma_lut, regamma_size, has_rom); + } else { + /* + * No CRTC RGM means we can just put the block into bypass + * since we don't have any plane level adjustments using it. + */ + out_tf->type = TF_TYPE_BYPASS; + out_tf->tf = TRANSFER_FUNCTION_LINEAR; + } + + return ret; +} + /** * __set_input_tf - calculates the input transfer function based on expected * input space. @@ -672,28 +703,12 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc) regamma_size, has_rom); if (r) return r; - } else if (has_regamma) { - /* - * CRTC RGM goes into RGM LUT. - * - * Note: there is no implicit sRGB regamma here. We are using - * degamma calculation from color module to calculate the curve - * from a linear base. - */ - stream->out_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS; - stream->out_transfer_func->tf = TRANSFER_FUNCTION_LINEAR; - - r = __set_output_tf(stream->out_transfer_func, regamma_lut, - regamma_size, has_rom); + } else { + regamma_size = has_regamma ? regamma_size : 0; + r = amdgpu_dm_set_atomic_regamma(stream, regamma_lut, + regamma_size, has_rom); if (r) return r; - } else { - /* - * No CRTC RGM means we can just put the block into bypass - * since we don't have any plane level adjustments using it. - */ - stream->out_transfer_func->type = TF_TYPE_BYPASS; - stream->out_transfer_func->tf = TRANSFER_FUNCTION_LINEAR; } /* -- cgit v1.2.3 From 6bd20f0f165f444c1d8184ebd238dd92966c9dca Mon Sep 17 00:00:00 2001 From: Joshua Ashton Date: Thu, 16 Nov 2023 18:57:55 -0100 Subject: drm/amd/display: add CRTC gamma TF support Add predefined transfer function programming. There is no post-blending out gamma ROM for hardcoded curves, but we can use AMD color modules to program LUT parameters from pre-defined coefficients and an empty regamma LUT (or bump up LUT parameters with pre-defined TF values). v2: - update crtc color mgmt if regamma TF differs between states (Joshua) - map inverse EOTF to DC transfer function (Melissa) v3: - update AMDGPU TF list v4: - update comment regarding regamma behavior Reviewed-by: Harry Wentland Signed-off-by: Joshua Ashton Co-developed-by: Melissa Wen Signed-off-by: Melissa Wen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 + .../drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 77 +++++++++++++++++----- 2 files changed, 61 insertions(+), 17 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 6efa0c1bba9d..22c46be291d1 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -9860,6 +9860,7 @@ skip_modeset: * when a modeset is needed, to ensure it gets reprogrammed. */ if (dm_new_crtc_state->base.color_mgmt_changed || + dm_old_crtc_state->regamma_tf != dm_new_crtc_state->regamma_tf || drm_atomic_crtc_needs_modeset(new_crtc_state)) { ret = amdgpu_dm_update_crtc_color_mgmt(dm_new_crtc_state); if (ret) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index dec734f03832..9b930e3eb79d 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -490,16 +490,18 @@ static int __set_output_tf(struct dc_transfer_func *func, struct calculate_buffer cal_buffer = {0}; bool res; - ASSERT(lut && lut_size == MAX_COLOR_LUT_ENTRIES); - cal_buffer.buffer_index = -1; - gamma = dc_create_gamma(); - if (!gamma) - return -ENOMEM; + if (lut_size) { + ASSERT(lut && lut_size == MAX_COLOR_LUT_ENTRIES); - gamma->num_entries = lut_size; - __drm_lut_to_dc_gamma(lut, gamma, false); + gamma = dc_create_gamma(); + if (!gamma) + return -ENOMEM; + + gamma->num_entries = lut_size; + __drm_lut_to_dc_gamma(lut, gamma, false); + } if (func->tf == TRANSFER_FUNCTION_LINEAR) { /* @@ -507,41 +509,49 @@ static int __set_output_tf(struct dc_transfer_func *func, * on top of a linear input. But degamma params can be used * instead to simulate this. */ - gamma->type = GAMMA_CUSTOM; + if (gamma) + gamma->type = GAMMA_CUSTOM; res = mod_color_calculate_degamma_params(NULL, func, - gamma, true); + gamma, gamma != NULL); } else { /* * Assume sRGB. The actual mapping will depend on whether the * input was legacy or not. */ - gamma->type = GAMMA_CS_TFM_1D; - res = mod_color_calculate_regamma_params(func, gamma, false, + if (gamma) + gamma->type = GAMMA_CS_TFM_1D; + res = mod_color_calculate_regamma_params(func, gamma, gamma != NULL, has_rom, NULL, &cal_buffer); } - dc_gamma_release(&gamma); + if (gamma) + dc_gamma_release(&gamma); return res ? 0 : -ENOMEM; } static int amdgpu_dm_set_atomic_regamma(struct dc_stream_state *stream, const struct drm_color_lut *regamma_lut, - uint32_t regamma_size, bool has_rom) + uint32_t regamma_size, bool has_rom, + enum dc_transfer_func_predefined tf) { struct dc_transfer_func *out_tf = stream->out_transfer_func; int ret = 0; - if (regamma_size) { + if (regamma_size || tf != TRANSFER_FUNCTION_LINEAR) { /* * CRTC RGM goes into RGM LUT. * * Note: there is no implicit sRGB regamma here. We are using * degamma calculation from color module to calculate the curve - * from a linear base. + * from a linear base if gamma TF is not set. However, if gamma + * TF (!= Linear) and LUT are set at the same time, we will use + * regamma calculation, and the color module will combine the + * pre-defined TF and the custom LUT values into the LUT that's + * actually programmed. */ out_tf->type = TF_TYPE_DISTRIBUTED_POINTS; - out_tf->tf = TRANSFER_FUNCTION_LINEAR; + out_tf->tf = tf; ret = __set_output_tf(out_tf, regamma_lut, regamma_size, has_rom); } else { @@ -587,6 +597,36 @@ static int __set_input_tf(struct dc_transfer_func *func, return res ? 0 : -ENOMEM; } +static enum dc_transfer_func_predefined +amdgpu_tf_to_dc_tf(enum amdgpu_transfer_function tf) +{ + switch (tf) + { + default: + case AMDGPU_TRANSFER_FUNCTION_DEFAULT: + case AMDGPU_TRANSFER_FUNCTION_IDENTITY: + return TRANSFER_FUNCTION_LINEAR; + case AMDGPU_TRANSFER_FUNCTION_SRGB_EOTF: + case AMDGPU_TRANSFER_FUNCTION_SRGB_INV_EOTF: + return TRANSFER_FUNCTION_SRGB; + case AMDGPU_TRANSFER_FUNCTION_BT709_OETF: + case AMDGPU_TRANSFER_FUNCTION_BT709_INV_OETF: + return TRANSFER_FUNCTION_BT709; + case AMDGPU_TRANSFER_FUNCTION_PQ_EOTF: + case AMDGPU_TRANSFER_FUNCTION_PQ_INV_EOTF: + return TRANSFER_FUNCTION_PQ; + case AMDGPU_TRANSFER_FUNCTION_GAMMA22_EOTF: + case AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF: + return TRANSFER_FUNCTION_GAMMA22; + case AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF: + case AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF: + return TRANSFER_FUNCTION_GAMMA24; + case AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF: + case AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF: + return TRANSFER_FUNCTION_GAMMA26; + } +} + /** * amdgpu_dm_verify_lut_sizes - verifies if DRM luts match the hw supported sizes * @crtc_state: the DRM CRTC state @@ -654,9 +694,12 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc) const struct drm_color_lut *degamma_lut, *regamma_lut; uint32_t degamma_size, regamma_size; bool has_regamma, has_degamma; + enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_LINEAR; bool is_legacy; int r; + tf = amdgpu_tf_to_dc_tf(crtc->regamma_tf); + r = amdgpu_dm_verify_lut_sizes(&crtc->base); if (r) return r; @@ -706,7 +749,7 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc) } else { regamma_size = has_regamma ? regamma_size : 0; r = amdgpu_dm_set_atomic_regamma(stream, regamma_lut, - regamma_size, has_rom); + regamma_size, has_rom, tf); if (r) return r; } -- cgit v1.2.3 From d9501844d53897ca7ac04697b8504940c6dfdbb3 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 12 Dec 2023 15:53:09 +0200 Subject: drm/amd: include drm/drm_edid.h only where needed Including drm_edid.h from amdgpu_mode.h causes the rebuild of literally hundreds of files when drm_edid.h is modified, while there are only a handful of files that actually need to include drm_edid.h. Signed-off-by: Jani Nikula Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c | 1 + drivers/gpu/drm/amd/amdgpu/atombios_encoders.c | 1 + drivers/gpu/drm/amd/amdgpu/dce_v10_0.c | 1 + drivers/gpu/drm/amd/amdgpu/dce_v11_0.c | 1 + drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | 1 + drivers/gpu/drm/amd/amdgpu/dce_v8_0.c | 1 + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 1 + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c | 1 + 9 files changed, 9 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 9fbe09743dde..790c08b8262d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -32,7 +32,6 @@ #include #include -#include #include #include #include @@ -51,6 +50,7 @@ struct amdgpu_device; struct amdgpu_encoder; struct amdgpu_router; struct amdgpu_hpd; +struct edid; #define to_amdgpu_crtc(x) container_of(x, struct amdgpu_crtc, base) #define to_amdgpu_connector(x) container_of(x, struct amdgpu_connector, base) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c index db6fc0cb18eb..453a4b786cfc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0+ #include +#include #include #include diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c index 3ee219aa2891..7672abe6c140 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c @@ -28,6 +28,7 @@ #include +#include #include #include "amdgpu.h" #include "amdgpu_connectors.h" diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index bb666cb7522e..587ee632a3b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -21,6 +21,7 @@ * */ +#include #include #include #include diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 7af277f61cca..f22ec27365bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -21,6 +21,7 @@ * */ +#include #include #include #include diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 143efc37a17f..4dbe9b3259b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -23,6 +23,7 @@ #include +#include #include #include #include diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index adeddfb7ff12..05bcce23385e 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -21,6 +21,7 @@ * */ +#include #include #include #include diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 19ae5587dd5a..941e96f100f4 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "dm_services.h" #include "amdgpu.h" #include "amdgpu_dm.h" diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c index b3e634b0f712..16e72d623630 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c @@ -32,6 +32,7 @@ #include "amdgpu_display.h" #include "dc.h" +#include #include #include -- cgit v1.2.3 From 88d26ea639a8e9d314e6bffef5f382167e7203e2 Mon Sep 17 00:00:00 2001 From: Joshua Ashton Date: Thu, 16 Nov 2023 18:57:56 -0100 Subject: drm/amd/display: set sdr_ref_white_level to 80 for out_transfer_func Otherwise this is just initialized to 0. This needs to actually have a value so that compute_curve can work for PQ EOTF. Reviewed-by: Harry Wentland Signed-off-by: Joshua Ashton Co-developed-by: Melissa Wen Signed-off-by: Melissa Wen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 9b930e3eb79d..83c14fb57721 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -72,6 +72,7 @@ */ #define MAX_DRM_LUT_VALUE 0xFFFF +#define SDR_WHITE_LEVEL_INIT_VALUE 80 /** * amdgpu_dm_init_color_mod - Initialize the color module. @@ -552,6 +553,7 @@ static int amdgpu_dm_set_atomic_regamma(struct dc_stream_state *stream, */ out_tf->type = TF_TYPE_DISTRIBUTED_POINTS; out_tf->tf = tf; + out_tf->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE; ret = __set_output_tf(out_tf, regamma_lut, regamma_size, has_rom); } else { -- cgit v1.2.3 From 6bed9d550e51534415a56f8de33f5b9d4e728e53 Mon Sep 17 00:00:00 2001 From: Joshua Ashton Date: Thu, 16 Nov 2023 18:57:57 -0100 Subject: drm/amd/display: mark plane as needing reset if color props change We should reset a plane state if at least one of the color management properties differs from old and new state. Reviewed-by: Harry Wentland Signed-off-by: Joshua Ashton Co-developed-by: Melissa Wen Signed-off-by: Melissa Wen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 22c46be291d1..0af5d908e169 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -9929,6 +9929,10 @@ static bool should_reset_plane(struct drm_atomic_state *state, */ for_each_oldnew_plane_in_state(state, other, old_other_state, new_other_state, i) { struct amdgpu_framebuffer *old_afb, *new_afb; + struct dm_plane_state *dm_new_other_state, *dm_old_other_state; + + dm_new_other_state = to_dm_plane_state(new_other_state); + dm_old_other_state = to_dm_plane_state(old_other_state); if (other->type == DRM_PLANE_TYPE_CURSOR) continue; @@ -9965,6 +9969,17 @@ static bool should_reset_plane(struct drm_atomic_state *state, old_other_state->color_encoding != new_other_state->color_encoding) return true; + /* HDR/Transfer Function changes. */ + if (dm_old_other_state->degamma_tf != dm_new_other_state->degamma_tf || + dm_old_other_state->degamma_lut != dm_new_other_state->degamma_lut || + dm_old_other_state->hdr_mult != dm_new_other_state->hdr_mult || + dm_old_other_state->shaper_lut != dm_new_other_state->shaper_lut || + dm_old_other_state->shaper_tf != dm_new_other_state->shaper_tf || + dm_old_other_state->lut3d != dm_new_other_state->lut3d || + dm_old_other_state->blend_lut != dm_new_other_state->blend_lut || + dm_old_other_state->blend_tf != dm_new_other_state->blend_tf) + return true; + /* Framebuffer checks fall at the end. */ if (!old_other_state->fb || !new_other_state->fb) continue; -- cgit v1.2.3 From 73e5ea616a9f8c261d07e63b421947949ad6cbce Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Thu, 16 Nov 2023 18:57:58 -0100 Subject: drm/amd/display: decouple steps for mapping CRTC degamma to DC plane The next patch adds pre-blending degamma to AMD color mgmt pipeline, but pre-blending degamma caps (DPP) is currently in use to provide DRM CRTC atomic degamma or implict degamma on legacy gamma. Detach degamma usage regarging CRTC color properties to manage plane and CRTC color correction combinations. Reviewed-by: Harry Wentland Signed-off-by: Melissa Wen Signed-off-by: Alex Deucher --- .../drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 60 +++++++++++++++------- 1 file changed, 42 insertions(+), 18 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 83c14fb57721..f13d22851fe4 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -789,20 +789,9 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc) return 0; } -/** - * amdgpu_dm_update_plane_color_mgmt: Maps DRM color management to DC plane. - * @crtc: amdgpu_dm crtc state - * @dc_plane_state: target DC surface - * - * Update the underlying dc_stream_state's input transfer function (ITF) in - * preparation for hardware commit. The transfer function used depends on - * the preparation done on the stream for color management. - * - * Returns: - * 0 on success. -ENOMEM if mem allocation fails. - */ -int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, - struct dc_plane_state *dc_plane_state) +static int +map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc, + struct dc_plane_state *dc_plane_state) { const struct drm_color_lut *degamma_lut; enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_SRGB; @@ -825,8 +814,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, °amma_size); ASSERT(degamma_size == MAX_COLOR_LUT_ENTRIES); - dc_plane_state->in_transfer_func->type = - TF_TYPE_DISTRIBUTED_POINTS; + dc_plane_state->in_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS; /* * This case isn't fully correct, but also fairly @@ -862,7 +850,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, degamma_lut, degamma_size); if (r) return r; - } else if (crtc->cm_is_degamma_srgb) { + } else { /* * For legacy gamma support we need the regamma input * in linear space. Assume that the input is sRGB. @@ -872,8 +860,44 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, if (tf != TRANSFER_FUNCTION_SRGB && !mod_color_calculate_degamma_params(NULL, - dc_plane_state->in_transfer_func, NULL, false)) + dc_plane_state->in_transfer_func, + NULL, false)) return -ENOMEM; + } + + return 0; +} + +/** + * amdgpu_dm_update_plane_color_mgmt: Maps DRM color management to DC plane. + * @crtc: amdgpu_dm crtc state + * @dc_plane_state: target DC surface + * + * Update the underlying dc_stream_state's input transfer function (ITF) in + * preparation for hardware commit. The transfer function used depends on + * the preparation done on the stream for color management. + * + * Returns: + * 0 on success. -ENOMEM if mem allocation fails. + */ +int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, + struct dc_plane_state *dc_plane_state) +{ + bool has_crtc_cm_degamma; + int ret; + + has_crtc_cm_degamma = (crtc->cm_has_degamma || crtc->cm_is_degamma_srgb); + if (has_crtc_cm_degamma){ + /* + * AMD HW doesn't have post-blending degamma caps. When DRM + * CRTC atomic degamma is set, we maps it to DPP degamma block + * (pre-blending) or, on legacy gamma, we use DPP degamma to + * linearize (implicit degamma) from sRGB/BT709 according to + * the input space. + */ + ret = map_crtc_degamma_to_dc_plane(crtc, dc_plane_state); + if (ret) + return ret; } else { /* ...Otherwise we can just bypass the DGM block. */ dc_plane_state->in_transfer_func->type = TF_TYPE_BYPASS; -- cgit v1.2.3 From 683b8c7e7a94fb7445b8d300c7404322ad040bab Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 8 Dec 2023 13:43:09 +0100 Subject: drm/amdgpu: fix tear down order in amdgpu_vm_pt_free MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When freeing PD/PT with shadows it can happen that the shadow destruction races with detaching the PD/PT from the VM causing a NULL pointer dereference in the invalidation code. Fix this by detaching the the PD/PT from the VM first and then freeing the shadow instead. Signed-off-by: Christian König Fixes: https://gitlab.freedesktop.org/drm/amd/-/issues/2867 Cc: Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index a2287bb25223..a160265ddc07 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -642,13 +642,14 @@ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry) if (!entry->bo) return; + + entry->bo->vm_bo = NULL; shadow = amdgpu_bo_shadowed(entry->bo); if (shadow) { ttm_bo_set_bulk_move(&shadow->tbo, NULL); amdgpu_bo_unref(&shadow); } ttm_bo_set_bulk_move(&entry->bo->tbo, NULL); - entry->bo->vm_bo = NULL; spin_lock(&entry->vm->status_lock); list_del(&entry->vm_status); -- cgit v1.2.3 From 980f8710075acaeb226a94cde6dda8ffad30123c Mon Sep 17 00:00:00 2001 From: Joshua Ashton Date: Thu, 16 Nov 2023 18:57:59 -0100 Subject: drm/amd/display: add plane degamma TF and LUT support Set DC plane with user degamma LUT or predefined TF from driver-specific plane color properties. If plane and CRTC degamma are set in the same time, plane degamma has priority. That means, we only set CRTC degamma if we don't have plane degamma LUT or TF to configure. We return -EINVAL if we don't have plane degamma settings, so we can continue and check CRTC degamma. Reviewed-by: Harry Wentland Signed-off-by: Joshua Ashton Signed-off-by: Melissa Wen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 +- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 1 + .../drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 70 ++++++++++++++++++++-- 3 files changed, 69 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 0af5d908e169..911db58d0e1c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -5169,7 +5169,9 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev, * Always set input transfer function, since plane state is refreshed * every time. */ - ret = amdgpu_dm_update_plane_color_mgmt(dm_crtc_state, dc_plane_state); + ret = amdgpu_dm_update_plane_color_mgmt(dm_crtc_state, + plane_state, + dc_plane_state); if (ret) return ret; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index d9923c9f39b0..16af549d0b42 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -918,6 +918,7 @@ int amdgpu_dm_create_color_properties(struct amdgpu_device *adev); int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state); int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc); int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, + struct drm_plane_state *plane_state, struct dc_plane_state *dc_plane_state); void amdgpu_dm_update_connector_after_detect( diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index f13d22851fe4..13117516f2a4 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -868,9 +868,58 @@ map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc, return 0; } +static int +__set_dm_plane_degamma(struct drm_plane_state *plane_state, + struct dc_plane_state *dc_plane_state) +{ + struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state); + const struct drm_color_lut *degamma_lut; + enum amdgpu_transfer_function tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT; + uint32_t degamma_size; + bool has_degamma_lut; + int ret; + + degamma_lut = __extract_blob_lut(dm_plane_state->degamma_lut, + °amma_size); + + has_degamma_lut = degamma_lut && + !__is_lut_linear(degamma_lut, degamma_size); + + tf = dm_plane_state->degamma_tf; + + /* If we don't have plane degamma LUT nor TF to set on DC, we have + * nothing to do here, return. + */ + if (!has_degamma_lut && tf == AMDGPU_TRANSFER_FUNCTION_DEFAULT) + return -EINVAL; + + dc_plane_state->in_transfer_func->tf = amdgpu_tf_to_dc_tf(tf); + + if (has_degamma_lut) { + ASSERT(degamma_size == MAX_COLOR_LUT_ENTRIES); + + dc_plane_state->in_transfer_func->type = + TF_TYPE_DISTRIBUTED_POINTS; + + ret = __set_input_tf(dc_plane_state->in_transfer_func, + degamma_lut, degamma_size); + if (ret) + return ret; + } else { + dc_plane_state->in_transfer_func->type = + TF_TYPE_PREDEFINED; + + if (!mod_color_calculate_degamma_params(NULL, + dc_plane_state->in_transfer_func, NULL, false)) + return -ENOMEM; + } + return 0; +} + /** * amdgpu_dm_update_plane_color_mgmt: Maps DRM color management to DC plane. * @crtc: amdgpu_dm crtc state + * @plane_state: DRM plane state * @dc_plane_state: target DC surface * * Update the underlying dc_stream_state's input transfer function (ITF) in @@ -881,13 +930,28 @@ map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc, * 0 on success. -ENOMEM if mem allocation fails. */ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, + struct drm_plane_state *plane_state, struct dc_plane_state *dc_plane_state) { bool has_crtc_cm_degamma; int ret; + /* Initially, we can just bypass the DGM block. */ + dc_plane_state->in_transfer_func->type = TF_TYPE_BYPASS; + dc_plane_state->in_transfer_func->tf = TRANSFER_FUNCTION_LINEAR; + + /* After, we start to update values according to color props */ has_crtc_cm_degamma = (crtc->cm_has_degamma || crtc->cm_is_degamma_srgb); - if (has_crtc_cm_degamma){ + + ret = __set_dm_plane_degamma(plane_state, dc_plane_state); + if (ret != -EINVAL) + return ret; + + /* If we are here, it means we don't have plane degamma settings, check + * if we have CRTC degamma waiting for mapping to pre-blending degamma + * block + */ + if (has_crtc_cm_degamma) { /* * AMD HW doesn't have post-blending degamma caps. When DRM * CRTC atomic degamma is set, we maps it to DPP degamma block @@ -898,10 +962,6 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, ret = map_crtc_degamma_to_dc_plane(crtc, dc_plane_state); if (ret) return ret; - } else { - /* ...Otherwise we can just bypass the DGM block. */ - dc_plane_state->in_transfer_func->type = TF_TYPE_BYPASS; - dc_plane_state->in_transfer_func->tf = TRANSFER_FUNCTION_LINEAR; } return 0; -- cgit v1.2.3 From ef113a3b1964b40dd87287806865b947d70f7df5 Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Thu, 16 Nov 2023 18:58:00 -0100 Subject: drm/amd/display: reject atomic commit if setting both plane and CRTC degamma DC only has pre-blending degamma caps (plane/DPP) that is currently in use for CRTC/post-blending degamma, so that we don't have HW caps to perform plane and CRTC degamma at the same time. Reject atomic updates when serspace sets both plane and CRTC degamma properties. Reviewed-by: Harry Wentland Signed-off-by: Melissa Wen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 13117516f2a4..8fc715fa426c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -944,9 +944,20 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, has_crtc_cm_degamma = (crtc->cm_has_degamma || crtc->cm_is_degamma_srgb); ret = __set_dm_plane_degamma(plane_state, dc_plane_state); - if (ret != -EINVAL) + if (ret == -ENOMEM) return ret; + /* We only have one degamma block available (pre-blending) for the + * whole color correction pipeline, so that we can't actually perform + * plane and CRTC degamma at the same time. Explicitly reject atomic + * updates when userspace sets both plane and CRTC degamma properties. + */ + if (has_crtc_cm_degamma && ret != -EINVAL){ + drm_dbg_kms(crtc->base.crtc->dev, + "doesn't support plane and CRTC degamma at the same time\n"); + return -EINVAL; + } + /* If we are here, it means we don't have plane degamma settings, check * if we have CRTC degamma waiting for mapping to pre-blending degamma * block -- cgit v1.2.3 From 889044f9e04f0829dd92640c551941bbe77bc0ea Mon Sep 17 00:00:00 2001 From: Joshua Ashton Date: Thu, 16 Nov 2023 18:58:01 -0100 Subject: drm/amd/display: add dc_fixpt_from_s3132 helper Detach value translation from CTM to reuse it for programming HDR multiplier property. Reviewed-by: Harry Wentland Signed-off-by: Joshua Ashton Signed-off-by: Melissa Wen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 8 +------- drivers/gpu/drm/amd/display/include/fixed31_32.h | 12 ++++++++++++ 2 files changed, 13 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 8fc715fa426c..c160dbef5224 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -405,7 +405,6 @@ static void __drm_lut_to_dc_gamma(const struct drm_color_lut *lut, static void __drm_ctm_to_dc_matrix(const struct drm_color_ctm *ctm, struct fixed31_32 *matrix) { - int64_t val; int i; /* @@ -424,12 +423,7 @@ static void __drm_ctm_to_dc_matrix(const struct drm_color_ctm *ctm, } /* gamut_remap_matrix[i] = ctm[i - floor(i/4)] */ - val = ctm->matrix[i - (i / 4)]; - /* If negative, convert to 2's complement. */ - if (val & (1ULL << 63)) - val = -(val & ~(1ULL << 63)); - - matrix[i].value = val; + matrix[i] = dc_fixpt_from_s3132(ctm->matrix[i - (i / 4)]); } } diff --git a/drivers/gpu/drm/amd/display/include/fixed31_32.h b/drivers/gpu/drm/amd/display/include/fixed31_32.h index d4cf7ead1d87..84da1dd34efd 100644 --- a/drivers/gpu/drm/amd/display/include/fixed31_32.h +++ b/drivers/gpu/drm/amd/display/include/fixed31_32.h @@ -69,6 +69,18 @@ static const struct fixed31_32 dc_fixpt_epsilon = { 1LL }; static const struct fixed31_32 dc_fixpt_half = { 0x80000000LL }; static const struct fixed31_32 dc_fixpt_one = { 0x100000000LL }; +static inline struct fixed31_32 dc_fixpt_from_s3132(__u64 x) +{ + struct fixed31_32 val; + + /* If negative, convert to 2's complement. */ + if (x & (1ULL << 63)) + x = -(x & ~(1ULL << 63)); + + val.value = x; + return val; +} + /* * @brief * Initialization routines -- cgit v1.2.3 From 4bc59ddf57c1f68ea035c4f242108f29d91797fd Mon Sep 17 00:00:00 2001 From: Joshua Ashton Date: Thu, 16 Nov 2023 18:58:02 -0100 Subject: drm/amd/display: add HDR multiplier support With `dc_fixpt_from_s3132()` translation, we can just use it to set hdr_mult. Reviewed-by: Harry Wentland Signed-off-by: Joshua Ashton Signed-off-by: Melissa Wen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 3 +++ 2 files changed, 4 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 911db58d0e1c..4a1139b3cbc5 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -8268,6 +8268,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, bundle->surface_updates[planes_count].gamma = dc_plane->gamma_correction; bundle->surface_updates[planes_count].in_transfer_func = dc_plane->in_transfer_func; bundle->surface_updates[planes_count].gamut_remap_matrix = &dc_plane->gamut_remap_matrix; + bundle->surface_updates[planes_count].hdr_mult = dc_plane->hdr_mult; } amdgpu_dm_plane_fill_dc_scaling_info(dm->adev, new_plane_state, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index c160dbef5224..35f148524827 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -927,6 +927,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, struct drm_plane_state *plane_state, struct dc_plane_state *dc_plane_state) { + struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state); bool has_crtc_cm_degamma; int ret; @@ -937,6 +938,8 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, /* After, we start to update values according to color props */ has_crtc_cm_degamma = (crtc->cm_has_degamma || crtc->cm_is_degamma_srgb); + dc_plane_state->hdr_mult = dc_fixpt_from_s3132(dm_plane_state->hdr_mult); + ret = __set_dm_plane_degamma(plane_state, dc_plane_state); if (ret == -ENOMEM) return ret; -- cgit v1.2.3 From aba8b76baabde681ab4ff686452005d80d949345 Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Thu, 16 Nov 2023 18:58:03 -0100 Subject: drm/amd/display: add plane shaper LUT support Map DC shaper LUT to DM plane color management. Shaper LUT can be used to delinearize and/or normalize the color space for computational efficiency and achiving specific visual styles. If a plane degamma is apply to linearize the color space, a custom shaper 1D LUT can be used just before applying 3D LUT. v2: - use DPP color caps to verify plane 3D LUT support - add debug message if shaper LUT programming fails v4: - remove helper to check 3D LUT color caps (Harry) - update desc of lut3d-setup helper from MPC to DPP v5: - remove color_mgmt_changed check that prevents color updates (Joshua) Reviewed-by: Harry Wentland Signed-off-by: Melissa Wen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 2 + .../drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 93 +++++++++++++++++++++- 3 files changed, 92 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 4a1139b3cbc5..73b12ca2fe60 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -8269,6 +8269,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, bundle->surface_updates[planes_count].in_transfer_func = dc_plane->in_transfer_func; bundle->surface_updates[planes_count].gamut_remap_matrix = &dc_plane->gamut_remap_matrix; bundle->surface_updates[planes_count].hdr_mult = dc_plane->hdr_mult; + bundle->surface_updates[planes_count].func_shaper = dc_plane->in_shaper_func; } amdgpu_dm_plane_fill_dc_scaling_info(dm->adev, new_plane_state, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 16af549d0b42..7706d412d206 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -908,6 +908,8 @@ void amdgpu_dm_trigger_timing_sync(struct drm_device *dev); /* 3D LUT max size is 17x17x17 (4913 entries) */ #define MAX_COLOR_3DLUT_SIZE 17 #define MAX_COLOR_3DLUT_BITDEPTH 12 +int amdgpu_dm_verify_lut3d_size(struct amdgpu_device *adev, + struct drm_plane_state *plane_state); /* 1D LUT size */ #define MAX_COLOR_LUT_ENTRIES 4096 /* Legacy gamm LUT users such as X doesn't like large LUT sizes */ diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 35f148524827..daa500b04cb5 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -623,6 +623,63 @@ amdgpu_tf_to_dc_tf(enum amdgpu_transfer_function tf) } } +static int amdgpu_dm_atomic_shaper_lut(const struct drm_color_lut *shaper_lut, + uint32_t shaper_size, + struct dc_transfer_func *func_shaper) +{ + int ret = 0; + + if (shaper_size) { + /* + * If user shaper LUT is set, we assume a linear color space + * (linearized by degamma 1D LUT or not). + */ + func_shaper->type = TF_TYPE_DISTRIBUTED_POINTS; + func_shaper->tf = TRANSFER_FUNCTION_LINEAR; + + ret = __set_output_tf(func_shaper, shaper_lut, shaper_size, false); + } else { + func_shaper->type = TF_TYPE_BYPASS; + func_shaper->tf = TRANSFER_FUNCTION_LINEAR; + } + + return ret; +} + +/** + * amdgpu_dm_verify_lut3d_size - verifies if 3D LUT is supported and if user + * shaper and 3D LUTs match the hw supported size + * @adev: amdgpu device + * @crtc_state: the DRM CRTC state + * + * Verifies if pre-blending (DPP) 3D LUT is supported by the HW (DCN 2.0 or + * newer) and if the user shaper and 3D LUTs match the supported size. + * + * Returns: + * 0 on success. -EINVAL if lut size are invalid. + */ +int amdgpu_dm_verify_lut3d_size(struct amdgpu_device *adev, + struct drm_plane_state *plane_state) +{ + struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state); + const struct drm_color_lut *shaper = NULL; + uint32_t exp_size, size; + bool has_3dlut = adev->dm.dc->caps.color.dpp.hw_3d_lut; + + /* shaper LUT is only available if 3D LUT color caps */ + exp_size = has_3dlut ? MAX_COLOR_LUT_ENTRIES : 0; + shaper = __extract_blob_lut(dm_plane_state->shaper_lut, &size); + + if (shaper && size != exp_size) { + drm_dbg(&adev->ddev, + "Invalid Shaper LUT size. Should be %u but got %u.\n", + exp_size, size); + return -EINVAL; + } + + return 0; +} + /** * amdgpu_dm_verify_lut_sizes - verifies if DRM luts match the hw supported sizes * @crtc_state: the DRM CRTC state @@ -910,6 +967,30 @@ __set_dm_plane_degamma(struct drm_plane_state *plane_state, return 0; } +static int +amdgpu_dm_plane_set_color_properties(struct drm_plane_state *plane_state, + struct dc_plane_state *dc_plane_state) +{ + struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state); + const struct drm_color_lut *shaper_lut; + uint32_t shaper_size; + int ret; + + dc_plane_state->hdr_mult = dc_fixpt_from_s3132(dm_plane_state->hdr_mult); + + shaper_lut = __extract_blob_lut(dm_plane_state->shaper_lut, &shaper_size); + shaper_size = shaper_lut != NULL ? shaper_size : 0; + + ret = amdgpu_dm_atomic_shaper_lut(shaper_lut, shaper_size, + dc_plane_state->in_shaper_func); + if (ret) + drm_dbg_kms(plane_state->plane->dev, + "setting plane %d shaper LUT failed.\n", + plane_state->plane->index); + + return ret; +} + /** * amdgpu_dm_update_plane_color_mgmt: Maps DRM color management to DC plane. * @crtc: amdgpu_dm crtc state @@ -927,10 +1008,16 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, struct drm_plane_state *plane_state, struct dc_plane_state *dc_plane_state) { - struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state); + struct amdgpu_device *adev = drm_to_adev(crtc->base.state->dev); bool has_crtc_cm_degamma; int ret; + ret = amdgpu_dm_verify_lut3d_size(adev, plane_state); + if (ret) { + drm_dbg_driver(&adev->ddev, "amdgpu_dm_verify_lut3d_size() failed\n"); + return ret; + } + /* Initially, we can just bypass the DGM block. */ dc_plane_state->in_transfer_func->type = TF_TYPE_BYPASS; dc_plane_state->in_transfer_func->tf = TRANSFER_FUNCTION_LINEAR; @@ -938,8 +1025,6 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, /* After, we start to update values according to color props */ has_crtc_cm_degamma = (crtc->cm_has_degamma || crtc->cm_is_degamma_srgb); - dc_plane_state->hdr_mult = dc_fixpt_from_s3132(dm_plane_state->hdr_mult); - ret = __set_dm_plane_degamma(plane_state, dc_plane_state); if (ret == -ENOMEM) return ret; @@ -972,5 +1057,5 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, return ret; } - return 0; + return amdgpu_dm_plane_set_color_properties(plane_state, dc_plane_state); } -- cgit v1.2.3 From 99de686115b00e765a5e9345e10c9d7312e4c7ea Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Thu, 16 Nov 2023 18:58:04 -0100 Subject: drm/amd/display: add plane shaper TF support Enable usage of predefined transfer func in addition to shaper 1D LUT. That means we can save some complexity by just setting a predefined curve, instead of programming a custom curve when preparing color space for applying 3D LUT. Reviewed-by: Harry Wentland Signed-off-by: Melissa Wen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index daa500b04cb5..8239904f9d90 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -624,20 +624,23 @@ amdgpu_tf_to_dc_tf(enum amdgpu_transfer_function tf) } static int amdgpu_dm_atomic_shaper_lut(const struct drm_color_lut *shaper_lut, + bool has_rom, + enum dc_transfer_func_predefined tf, uint32_t shaper_size, struct dc_transfer_func *func_shaper) { int ret = 0; - if (shaper_size) { + if (shaper_size || tf != TRANSFER_FUNCTION_LINEAR) { /* * If user shaper LUT is set, we assume a linear color space * (linearized by degamma 1D LUT or not). */ func_shaper->type = TF_TYPE_DISTRIBUTED_POINTS; - func_shaper->tf = TRANSFER_FUNCTION_LINEAR; + func_shaper->tf = tf; + func_shaper->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE; - ret = __set_output_tf(func_shaper, shaper_lut, shaper_size, false); + ret = __set_output_tf(func_shaper, shaper_lut, shaper_size, has_rom); } else { func_shaper->type = TF_TYPE_BYPASS; func_shaper->tf = TRANSFER_FUNCTION_LINEAR; @@ -972,6 +975,7 @@ amdgpu_dm_plane_set_color_properties(struct drm_plane_state *plane_state, struct dc_plane_state *dc_plane_state) { struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state); + enum amdgpu_transfer_function shaper_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT; const struct drm_color_lut *shaper_lut; uint32_t shaper_size; int ret; @@ -980,8 +984,11 @@ amdgpu_dm_plane_set_color_properties(struct drm_plane_state *plane_state, shaper_lut = __extract_blob_lut(dm_plane_state->shaper_lut, &shaper_size); shaper_size = shaper_lut != NULL ? shaper_size : 0; + shaper_tf = dm_plane_state->shaper_tf; - ret = amdgpu_dm_atomic_shaper_lut(shaper_lut, shaper_size, + ret = amdgpu_dm_atomic_shaper_lut(shaper_lut, false, + amdgpu_tf_to_dc_tf(shaper_tf), + shaper_size, dc_plane_state->in_shaper_func); if (ret) drm_dbg_kms(plane_state->plane->dev, -- cgit v1.2.3 From 69a83fd3f0a86374b2fcfab1c02363495704e652 Mon Sep 17 00:00:00 2001 From: Woody Suwalski Date: Tue, 12 Dec 2023 18:31:34 -0500 Subject: drm/radeon: Prevent multiple debug error lines on suspend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix to avoid multiple debug error lines printed on every suspend by Radeon driver's debugfs. radeon_debugfs_init() calls debugfs_create_file() for every ring. This results in printing multiple error lines to the screen and dmesg similar to this: [   92.378726] debugfs: File 'radeon_ring_gfx' in directory '0000:00:01.0' already present! [   92.378732] debugfs: File 'radeon_ring_cp1' in directory '0000:00:01.0' already present! [   92.378734] debugfs: File 'radeon_ring_cp2' in directory '0000:00:01.0' already present! [   92.378737] debugfs: File 'radeon_ring_dma1' in directory '0000:00:01.0' already present! [   92.378739] debugfs: File 'radeon_ring_dma2' in directory '0000:00:01.0' already present! [   92.380775] debugfs: File 'radeon_ring_uvd' in directory '0000:00:01.0' already present! [   92.406620] debugfs: File 'radeon_ring_vce1' in directory '0000:00:01.0' already present! [   92.406624] debugfs: File 'radeon_ring_vce2' in directory '0000:00:01.0' already present! Patch v1: The fix was to run lookup() for the file before trying to (re)create that debug file. Patch v2: Call the radeon_debugfs_init() only once when radeon ring is initialized (as suggested by Christian K. - thanks) Reviewed-by: Christian König Signed-off-by: Woody Suwalski Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_ring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index e6534fa9f1fb..38048593bb4a 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -413,6 +413,7 @@ int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsig dev_err(rdev->dev, "(%d) ring map failed\n", r); return r; } + radeon_debugfs_ring_init(rdev, ring); } ring->ptr_mask = (ring->ring_size / 4) - 1; ring->ring_free_dw = ring->ring_size / 4; @@ -421,7 +422,6 @@ int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsig ring->next_rptr_gpu_addr = rdev->wb.gpu_addr + index; ring->next_rptr_cpu_addr = &rdev->wb.wb[index/4]; } - radeon_debugfs_ring_init(rdev, ring); radeon_ring_lockup_update(rdev, ring); return 0; } -- cgit v1.2.3 From 65d2765d6291a49d5cdfc0fd88ba5689ed27dbe2 Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 4 Dec 2023 15:51:50 +0100 Subject: drm/amdgpu: warn when there are still mappings when a BO is destroyed v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This can only happen when there is a reference counting bug. v2: fix typo Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index d79b4ca1ecfc..5ad03f2afdb4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1343,6 +1343,8 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo) abo = ttm_to_amdgpu_bo(bo); + WARN_ON(abo->vm_bo); + if (abo->kfd_bo) amdgpu_amdkfd_release_notify(abo); -- cgit v1.2.3 From ab4750332dbe535243def5dcebc24ca00c1f98ac Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 7 Dec 2023 10:14:41 -0500 Subject: drm/amdgpu/sdma5.2: add begin/end_use ring callbacks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add begin/end_use ring callbacks to disallow GFXOFF when SDMA work is submitted and allow it again afterward. This should avoid corner cases where GFXOFF is erroneously entered when SDMA is still active. For now just allow/disallow GFXOFF in the begin and end helpers until we root cause the issue. This should not impact power as SDMA usage is pretty minimal and GFXOSS should not be active when SDMA is active anyway, this just makes it explicit. v2: move everything into sdma5.2 code. No reason for this to be generic at this point. v3: Add comments in new code Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2220 Reviewed-by: Mario Limonciello (v1) Tested-by: Mario Limonciello (v1) Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 5.15+ --- drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 83c240f741b5..0058f3f7cf6e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -1643,6 +1643,32 @@ static void sdma_v5_2_get_clockgating_state(void *handle, u64 *flags) *flags |= AMD_CG_SUPPORT_SDMA_LS; } +static void sdma_v5_2_ring_begin_use(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + /* SDMA 5.2.3 (RMB) FW doesn't seem to properly + * disallow GFXOFF in some cases leading to + * hangs in SDMA. Disallow GFXOFF while SDMA is active. + * We can probably just limit this to 5.2.3, + * but it shouldn't hurt for other parts since + * this GFXOFF will be disallowed anyway when SDMA is + * active, this just makes it explicit. + */ + amdgpu_gfx_off_ctrl(adev, false); +} + +static void sdma_v5_2_ring_end_use(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + /* SDMA 5.2.3 (RMB) FW doesn't seem to properly + * disallow GFXOFF in some cases leading to + * hangs in SDMA. Allow GFXOFF when SDMA is complete. + */ + amdgpu_gfx_off_ctrl(adev, true); +} + const struct amd_ip_funcs sdma_v5_2_ip_funcs = { .name = "sdma_v5_2", .early_init = sdma_v5_2_early_init, @@ -1690,6 +1716,8 @@ static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = { .test_ib = sdma_v5_2_ring_test_ib, .insert_nop = sdma_v5_2_ring_insert_nop, .pad_ib = sdma_v5_2_ring_pad_ib, + .begin_use = sdma_v5_2_ring_begin_use, + .end_use = sdma_v5_2_ring_end_use, .emit_wreg = sdma_v5_2_ring_emit_wreg, .emit_reg_wait = sdma_v5_2_ring_emit_reg_wait, .emit_reg_write_reg_wait = sdma_v5_2_ring_emit_reg_write_reg_wait, -- cgit v1.2.3 From 2c7300d357a213d4a4bda691d1d5c06251e552d0 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 12 Dec 2023 01:09:16 -0600 Subject: drm/amd: Fix a probing order problem on SDMA 2.4 commit 751e293f2c99 ("drm/amd: Move microcode init from sw_init to early_init for SDMA v2.4") made a fateful mistake in `adev->sdma.num_instances` wasn't declared when sdma_v2_4_init_microcode() was run. This caused probing to fail. Move the declaration to right before sdma_v2_4_init_microcode(). Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3043 Fixes: 751e293f2c99 ("drm/amd: Move microcode init from sw_init to early_init for SDMA v2.4") Reviewed-by: Alex Deucher Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 45377a175250..8d5d86675a7f 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -813,12 +813,12 @@ static int sdma_v2_4_early_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r; + adev->sdma.num_instances = SDMA_MAX_INSTANCE; + r = sdma_v2_4_init_microcode(adev); if (r) return r; - adev->sdma.num_instances = SDMA_MAX_INSTANCE; - sdma_v2_4_set_ring_funcs(adev); sdma_v2_4_set_buffer_funcs(adev); sdma_v2_4_set_vm_pte_funcs(adev); -- cgit v1.2.3 From ceb9a321e7639700844aa3bf234a4e0884f13b77 Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 8 Dec 2023 13:43:09 +0100 Subject: drm/amdgpu: fix tear down order in amdgpu_vm_pt_free MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When freeing PD/PT with shadows it can happen that the shadow destruction races with detaching the PD/PT from the VM causing a NULL pointer dereference in the invalidation code. Fix this by detaching the the PD/PT from the VM first and then freeing the shadow instead. Signed-off-by: Christian König Fixes: https://gitlab.freedesktop.org/drm/amd/-/issues/2867 Cc: Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index a2287bb25223..a160265ddc07 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -642,13 +642,14 @@ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry) if (!entry->bo) return; + + entry->bo->vm_bo = NULL; shadow = amdgpu_bo_shadowed(entry->bo); if (shadow) { ttm_bo_set_bulk_move(&shadow->tbo, NULL); amdgpu_bo_unref(&shadow); } ttm_bo_set_bulk_move(&entry->bo->tbo, NULL); - entry->bo->vm_bo = NULL; spin_lock(&entry->vm->status_lock); list_del(&entry->vm_status); -- cgit v1.2.3 From a4236c4b410857a70647c410e886c8a0455ec4fb Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 4 Dec 2023 15:51:50 +0100 Subject: drm/amdgpu: warn when there are still mappings when a BO is destroyed v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This can only happen when there is a reference counting bug. v2: fix typo Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index d79b4ca1ecfc..5ad03f2afdb4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1343,6 +1343,8 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo) abo = ttm_to_amdgpu_bo(bo); + WARN_ON(abo->vm_bo); + if (abo->kfd_bo) amdgpu_amdkfd_release_notify(abo); -- cgit v1.2.3 From c2949a49dfe960e952400029e14751dceff79d38 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sun, 3 Dec 2023 03:27:42 +0300 Subject: drm/msm/dpu: enable writeback on SM8350 Enable WB2 hardware block, enabling writeback support on this platform. Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/570188/ Link: https://lore.kernel.org/r/20231203002743.1291956-3-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h index bed87250e68c..194239de393e 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h @@ -30,6 +30,7 @@ static const struct dpu_mdp_cfg sm8350_mdp = { [DPU_CLK_CTRL_DMA1] = { .reg_off = 0x2b4, .bit_off = 8 }, [DPU_CLK_CTRL_DMA2] = { .reg_off = 0x2bc, .bit_off = 8 }, [DPU_CLK_CTRL_DMA3] = { .reg_off = 0x2c4, .bit_off = 8 }, + [DPU_CLK_CTRL_WB2] = { .reg_off = 0x2bc, .bit_off = 16 }, [DPU_CLK_CTRL_REG_DMA] = { .reg_off = 0x2bc, .bit_off = 20 }, }, }; @@ -297,6 +298,21 @@ static const struct dpu_dsc_cfg sm8350_dsc[] = { }, }; +static const struct dpu_wb_cfg sm8350_wb[] = { + { + .name = "wb_2", .id = WB_2, + .base = 0x65000, .len = 0x2c8, + .features = WB_SM8250_MASK, + .format_list = wb2_formats, + .num_formats = ARRAY_SIZE(wb2_formats), + .clk_ctrl = DPU_CLK_CTRL_WB2, + .xin_id = 6, + .vbif_idx = VBIF_RT, + .maxlinewidth = 4096, + .intr_wb_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 4), + }, +}; + static const struct dpu_intf_cfg sm8350_intf[] = { { .name = "intf_0", .id = INTF_0, @@ -392,6 +408,8 @@ const struct dpu_mdss_cfg dpu_sm8350_cfg = { .dsc = sm8350_dsc, .merge_3d_count = ARRAY_SIZE(sm8350_merge_3d), .merge_3d = sm8350_merge_3d, + .wb_count = ARRAY_SIZE(sm8350_wb), + .wb = sm8350_wb, .intf_count = ARRAY_SIZE(sm8350_intf), .intf = sm8350_intf, .vbif_count = ARRAY_SIZE(sdm845_vbif), -- cgit v1.2.3 From eaa647cdbf2e357b4a14903f2f1e47ed9c4f8df3 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sun, 3 Dec 2023 03:27:43 +0300 Subject: drm/msm/dpu: enable writeback on SM8450 Enable WB2 hardware block, enabling writeback support on this platform. Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/570187/ Link: https://lore.kernel.org/r/20231203002743.1291956-4-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h index 7a647e1f729d..a8a1d65a93af 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h @@ -31,6 +31,7 @@ static const struct dpu_mdp_cfg sm8450_mdp = { [DPU_CLK_CTRL_DMA1] = { .reg_off = 0x2b4, .bit_off = 8 }, [DPU_CLK_CTRL_DMA2] = { .reg_off = 0x2bc, .bit_off = 8 }, [DPU_CLK_CTRL_DMA3] = { .reg_off = 0x2c4, .bit_off = 8 }, + [DPU_CLK_CTRL_WB2] = { .reg_off = 0x2bc, .bit_off = 16 }, [DPU_CLK_CTRL_REG_DMA] = { .reg_off = 0x2bc, .bit_off = 20 }, }, }; @@ -315,6 +316,21 @@ static const struct dpu_dsc_cfg sm8450_dsc[] = { }, }; +static const struct dpu_wb_cfg sm8450_wb[] = { + { + .name = "wb_2", .id = WB_2, + .base = 0x65000, .len = 0x2c8, + .features = WB_SM8250_MASK, + .format_list = wb2_formats, + .num_formats = ARRAY_SIZE(wb2_formats), + .clk_ctrl = DPU_CLK_CTRL_WB2, + .xin_id = 6, + .vbif_idx = VBIF_RT, + .maxlinewidth = 4096, + .intr_wb_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 4), + }, +}; + static const struct dpu_intf_cfg sm8450_intf[] = { { .name = "intf_0", .id = INTF_0, @@ -410,6 +426,8 @@ const struct dpu_mdss_cfg dpu_sm8450_cfg = { .dsc = sm8450_dsc, .merge_3d_count = ARRAY_SIZE(sm8450_merge_3d), .merge_3d = sm8450_merge_3d, + .wb_count = ARRAY_SIZE(sm8450_wb), + .wb = sm8450_wb, .intf_count = ARRAY_SIZE(sm8450_intf), .intf = sm8450_intf, .vbif_count = ARRAY_SIZE(sdm845_vbif), -- cgit v1.2.3 From 2b72e50c62de60ad2d6bcd86aa38d4ccbdd633f2 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 11 Dec 2023 10:19:55 -0800 Subject: drm/msm/dpu: Ratelimit framedone timeout msgs When we start getting these, we get a *lot*. So ratelimit it to not flood dmesg. Signed-off-by: Rob Clark Reviewed-by: Abhinav Kumar Reviewed-by: Marijn Suijten Patchwork: https://patchwork.freedesktop.org/patch/571584/ Link: https://lore.kernel.org/r/20231211182000.218088-1-robdclark@gmail.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 5 ++++- drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index 250c21a4cfc7..4ceb3421003c 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -39,6 +39,9 @@ #define DPU_ERROR_ENC(e, fmt, ...) DPU_ERROR("enc%d " fmt,\ (e) ? (e)->base.base.id : -1, ##__VA_ARGS__) +#define DPU_ERROR_ENC_RATELIMITED(e, fmt, ...) DPU_ERROR_RATELIMITED("enc%d " fmt,\ + (e) ? (e)->base.base.id : -1, ##__VA_ARGS__) + /* * Two to anticipate panels that can do cmd/vid dynamic switching * plan is to create all possible physical encoder types, and switch between @@ -2279,7 +2282,7 @@ static void dpu_encoder_frame_done_timeout(struct timer_list *t) return; } - DPU_ERROR_ENC(dpu_enc, "frame done timeout\n"); + DPU_ERROR_ENC_RATELIMITED(dpu_enc, "frame done timeout\n"); if (atomic_inc_return(&dpu_enc->frame_done_timeout_cnt) == 1) msm_disp_snapshot_state(drm_enc->dev); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h index df6271017b80..9112a702a00f 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h @@ -51,6 +51,7 @@ } while (0) #define DPU_ERROR(fmt, ...) pr_err("[dpu error]" fmt, ##__VA_ARGS__) +#define DPU_ERROR_RATELIMITED(fmt, ...) pr_err_ratelimited("[dpu error]" fmt, ##__VA_ARGS__) /** * ktime_compare_safe - compare two ktime structures -- cgit v1.2.3 From c806d59695e1cdd288e11f8dcc8abdd187a3570e Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Mon, 11 Dec 2023 17:54:40 +0300 Subject: drm/msm/dpu: remove extra drm_encoder_cleanup from the error path The drmm handler will perform drm_encoder_cleanup() for us. Moreover if we call drm_encoder_cleanup() manually, the drmm_encoder_alloc_release() will spawn warnings at drivers/gpu/drm/drm_encoder.c:214. Drop these extra drm_encoder_cleanup() calls. Fixes: cd42c56d9c0b ("drm/msm/dpu: use drmm-managed allocation for dpu_encoder_virt") Signed-off-by: Dmitry Baryshkov Reported-by: Konrad Dybcio Reviewed-by: Abhinav Kumar Tested-by: Abhinav Kumar #sm8250 CI Patchwork: https://patchwork.freedesktop.org/patch/571562/ Link: https://lore.kernel.org/r/20231211145440.3647001-1-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c index dc24fe4bb3b0..d60edb93d4f7 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c @@ -586,7 +586,6 @@ static int _dpu_kms_initialize_displayport(struct drm_device *dev, rc = msm_dp_modeset_init(priv->dp[i], dev, encoder); if (rc) { DPU_ERROR("modeset_init failed for DP, rc = %d\n", rc); - drm_encoder_cleanup(encoder); return rc; } } @@ -619,7 +618,6 @@ static int _dpu_kms_initialize_hdmi(struct drm_device *dev, rc = msm_hdmi_modeset_init(priv->hdmi, dev, encoder); if (rc) { DPU_ERROR("modeset_init failed for DP, rc = %d\n", rc); - drm_encoder_cleanup(encoder); return rc; } @@ -651,7 +649,6 @@ static int _dpu_kms_initialize_writeback(struct drm_device *dev, n_formats); if (rc) { DPU_ERROR("dpu_writeback_init, rc = %d\n", rc); - drm_encoder_cleanup(encoder); return rc; } -- cgit v1.2.3 From 043e5b302625634f5590f3da09652cdfa1037851 Mon Sep 17 00:00:00 2001 From: Abhinav Kumar Date: Tue, 12 Dec 2023 12:52:39 -0800 Subject: drm/msm/dpu: add formats check for writeback encoder In preparation for adding more formats to dpu writeback add format validation to it to fail any unsupported formats. changes in v4: - change the failure message of the API drm_atomic_helper_check_wb_connector_state() to a generic one in case it checks more errors later and moreoever it already has debug message to indicate its failure - change the corresponding DPU_ERROR to DPU_DEBUG in-line with other atomic_check failure messages changes in v3: - rebase on top of msm-next - replace drm_atomic_helper_check_wb_encoder_state() with drm_atomic_helper_check_wb_connector_state() due to the rebase changes in v2: - correct some grammar in the commit text Fixes: d7d0e73f7de3 ("drm/msm/dpu: introduce the dpu_encoder_phys_* for writeback") Signed-off-by: Abhinav Kumar Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/571811/ Link: https://lore.kernel.org/r/20231212205254.12422-2-quic_abhinavk@quicinc.com [DB: removed extra debug message] Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c index bb94909caa25..798d33e3207f 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c @@ -272,6 +272,7 @@ static int dpu_encoder_phys_wb_atomic_check( { struct drm_framebuffer *fb; const struct drm_display_mode *mode = &crtc_state->mode; + int ret; DPU_DEBUG("[atomic_check:%d, \"%s\",%d,%d]\n", phys_enc->hw_wb->idx, mode->name, mode->hdisplay, mode->vdisplay); @@ -308,7 +309,7 @@ static int dpu_encoder_phys_wb_atomic_check( return -EINVAL; } - return 0; + return drm_atomic_helper_check_wb_connector_state(conn_state->connector, conn_state->state); } -- cgit v1.2.3 From ecf594453a6fbc6c06278cf815e3ece4a1b8261b Mon Sep 17 00:00:00 2001 From: Abhinav Kumar Date: Tue, 12 Dec 2023 12:52:40 -0800 Subject: drm/msm/dpu: rename dpu_encoder_phys_wb_setup_cdp to match its functionality dpu_encoder_phys_wb_setup_cdp() is not programming the chroma down prefetch block. Its setting up the display ctl path for writeback. Hence rename it to dpu_encoder_phys_wb_setup_ctl() to match what its actually doing. Fixes: d7d0e73f7de3 ("drm/msm/dpu: introduce the dpu_encoder_phys_* for writeback") Signed-off-by: Abhinav Kumar Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/571812/ Link: https://lore.kernel.org/r/20231212205254.12422-3-quic_abhinavk@quicinc.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c index 798d33e3207f..9ca41de5ff7b 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c @@ -207,10 +207,10 @@ static void dpu_encoder_phys_wb_setup_fb(struct dpu_encoder_phys *phys_enc, } /** - * dpu_encoder_phys_wb_setup_cdp - setup chroma down prefetch block + * dpu_encoder_phys_wb_setup_ctl - setup wb pipeline for ctl path * @phys_enc:Pointer to physical encoder */ -static void dpu_encoder_phys_wb_setup_cdp(struct dpu_encoder_phys *phys_enc) +static void dpu_encoder_phys_wb_setup_ctl(struct dpu_encoder_phys *phys_enc) { struct dpu_hw_wb *hw_wb; struct dpu_hw_ctl *ctl; @@ -376,7 +376,7 @@ static void dpu_encoder_phys_wb_setup( dpu_encoder_phys_wb_setup_fb(phys_enc, fb); - dpu_encoder_phys_wb_setup_cdp(phys_enc); + dpu_encoder_phys_wb_setup_ctl(phys_enc); } -- cgit v1.2.3 From 79caf2f2202b9eaad3a5a726e4b33807f67d0f1b Mon Sep 17 00:00:00 2001 From: Abhinav Kumar Date: Tue, 12 Dec 2023 12:52:41 -0800 Subject: drm/msm/dpu: fix writeback programming for YUV cases For YUV cases, setting the required format bits was missed out in the register programming. Lets fix it now in preparation of adding YUV formats support for writeback. changes in v2: - dropped the fixes tag as its not a fix but adding new functionality Signed-off-by: Abhinav Kumar Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/571814/ Link: https://lore.kernel.org/r/20231212205254.12422-4-quic_abhinavk@quicinc.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c | 1 - drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c index 9ca41de5ff7b..c6e9c11b6971 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c @@ -272,7 +272,6 @@ static int dpu_encoder_phys_wb_atomic_check( { struct drm_framebuffer *fb; const struct drm_display_mode *mode = &crtc_state->mode; - int ret; DPU_DEBUG("[atomic_check:%d, \"%s\",%d,%d]\n", phys_enc->hw_wb->idx, mode->name, mode->hdisplay, mode->vdisplay); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c index ed0e80616129..e75995f7fcea 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c @@ -89,6 +89,9 @@ static void dpu_hw_wb_setup_format(struct dpu_hw_wb *ctx, dst_format |= BIT(14); /* DST_ALPHA_X */ } + if (DPU_FORMAT_IS_YUV(fmt)) + dst_format |= BIT(15); + pattern = (fmt->element[3] << 24) | (fmt->element[2] << 16) | (fmt->element[1] << 8) | -- cgit v1.2.3 From 9c4998efec47a6f92d53bdcfea2b48783e4fedb5 Mon Sep 17 00:00:00 2001 From: Abhinav Kumar Date: Tue, 12 Dec 2023 12:52:42 -0800 Subject: drm/msm/dpu: move csc matrices to dpu_hw_util Since the type and usage of CSC matrices is spanning across DPU lets introduce a helper to the dpu_hw_util to return the CSC corresponding to the request type. This will help to add more supported CSC types such as the RGB to YUV one which is used in the case of CDM. changes in v3: - drop the extra wrapper and export the matrices directly Signed-off-by: Abhinav Kumar Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/571816/ Link: https://lore.kernel.org/r/20231212205254.12422-5-quic_abhinavk@quicinc.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h | 30 ++++++++++++++++++++++++++++ drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c | 31 +---------------------------- 2 files changed, 31 insertions(+), 30 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h index fe083b2e5696..aa50005042d1 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h @@ -19,6 +19,36 @@ #define MISR_CTRL_STATUS_CLEAR BIT(10) #define MISR_CTRL_FREE_RUN_MASK BIT(31) +static const struct dpu_csc_cfg dpu_csc_YUV2RGB_601L = { + { + /* S15.16 format */ + 0x00012A00, 0x00000000, 0x00019880, + 0x00012A00, 0xFFFF9B80, 0xFFFF3000, + 0x00012A00, 0x00020480, 0x00000000, + }, + /* signed bias */ + { 0xfff0, 0xff80, 0xff80,}, + { 0x0, 0x0, 0x0,}, + /* unsigned clamp */ + { 0x10, 0xeb, 0x10, 0xf0, 0x10, 0xf0,}, + { 0x00, 0xff, 0x00, 0xff, 0x00, 0xff,}, +}; + +static const struct dpu_csc_cfg dpu_csc10_YUV2RGB_601L = { + { + /* S15.16 format */ + 0x00012A00, 0x00000000, 0x00019880, + 0x00012A00, 0xFFFF9B80, 0xFFFF3000, + 0x00012A00, 0x00020480, 0x00000000, + }, + /* signed bias */ + { 0xffc0, 0xfe00, 0xfe00,}, + { 0x0, 0x0, 0x0,}, + /* unsigned clamp */ + { 0x40, 0x3ac, 0x40, 0x3c0, 0x40, 0x3c0,}, + { 0x00, 0x3ff, 0x00, 0x3ff, 0x00, 0x3ff,}, +}; + /* * This is the common struct maintained by each sub block * for mapping the register offsets in this block to the diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c index 3235ab132540..ff975ad51145 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c @@ -21,6 +21,7 @@ #include "dpu_kms.h" #include "dpu_formats.h" #include "dpu_hw_sspp.h" +#include "dpu_hw_util.h" #include "dpu_trace.h" #include "dpu_crtc.h" #include "dpu_vbif.h" @@ -508,36 +509,6 @@ static void _dpu_plane_setup_pixel_ext(struct dpu_hw_scaler3_cfg *scale_cfg, } } -static const struct dpu_csc_cfg dpu_csc_YUV2RGB_601L = { - { - /* S15.16 format */ - 0x00012A00, 0x00000000, 0x00019880, - 0x00012A00, 0xFFFF9B80, 0xFFFF3000, - 0x00012A00, 0x00020480, 0x00000000, - }, - /* signed bias */ - { 0xfff0, 0xff80, 0xff80,}, - { 0x0, 0x0, 0x0,}, - /* unsigned clamp */ - { 0x10, 0xeb, 0x10, 0xf0, 0x10, 0xf0,}, - { 0x00, 0xff, 0x00, 0xff, 0x00, 0xff,}, -}; - -static const struct dpu_csc_cfg dpu_csc10_YUV2RGB_601L = { - { - /* S15.16 format */ - 0x00012A00, 0x00000000, 0x00019880, - 0x00012A00, 0xFFFF9B80, 0xFFFF3000, - 0x00012A00, 0x00020480, 0x00000000, - }, - /* signed bias */ - { 0xffc0, 0xfe00, 0xfe00,}, - { 0x0, 0x0, 0x0,}, - /* unsigned clamp */ - { 0x40, 0x3ac, 0x40, 0x3c0, 0x40, 0x3c0,}, - { 0x00, 0x3ff, 0x00, 0x3ff, 0x00, 0x3ff,}, -}; - static const struct dpu_csc_cfg *_dpu_plane_get_csc(struct dpu_sw_pipe *pipe, const struct dpu_format *fmt) { -- cgit v1.2.3 From a5ec9a44d8a3ae09fdf6dfc12e89c8663bb631ea Mon Sep 17 00:00:00 2001 From: Abhinav Kumar Date: Tue, 12 Dec 2023 12:52:43 -0800 Subject: drm/msm/dpu: add cdm blocks to sc7280 dpu_hw_catalog Add CDM blocks to the sc7280 dpu_hw_catalog to support YUV format output from writeback block. changes in v3: - change the comment from sub-blk to clk for CDM changes in v2: - remove explicit zero assignment for features - move sc7280_cdm to dpu_hw_catalog from the sc7280 catalog file as its definition can be re-used Signed-off-by: Abhinav Kumar Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/571818/ Link: https://lore.kernel.org/r/20231212205254.12422-6-quic_abhinavk@quicinc.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h | 1 + drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c | 10 ++++++++++ drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h | 13 +++++++++++++ drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h | 5 +++++ 4 files changed, 29 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h index 209675de6742..19c2b7454796 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h @@ -248,6 +248,7 @@ const struct dpu_mdss_cfg dpu_sc7280_cfg = { .mdss_ver = &sc7280_mdss_ver, .caps = &sc7280_dpu_caps, .mdp = &sc7280_mdp, + .cdm = &sc7280_cdm, .ctl_count = ARRAY_SIZE(sc7280_ctl), .ctl = sc7280_ctl, .sspp_count = ARRAY_SIZE(sc7280_sspp), diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c index d52aae54bbd5..b304bebedb84 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c @@ -426,6 +426,16 @@ static const struct dpu_dsc_sub_blks dsc_sblk_1 = { .ctl = {.name = "ctl", .base = 0xF80, .len = 0x10}, }; +/************************************************************* + * CDM block config + *************************************************************/ +static const struct dpu_cdm_cfg sc7280_cdm = { + .name = "cdm_0", + .id = CDM_0, + .len = 0x228, + .base = 0x79200, +}; + /************************************************************* * VBIF sub blocks config *************************************************************/ diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h index e3c0d007481b..ba82ef4560a6 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h @@ -682,6 +682,17 @@ struct dpu_vbif_cfg { u32 memtype[MAX_XIN_COUNT]; }; +/** + * struct dpu_cdm_cfg - information of chroma down blocks + * @name string name for debug purposes + * @id enum identifying this block + * @base register offset of this block + * @features bit mask identifying sub-blocks/features + */ +struct dpu_cdm_cfg { + DPU_HW_BLK_INFO; +}; + /** * Define CDP use cases * @DPU_PERF_CDP_UDAGE_RT: real-time use cases @@ -805,6 +816,8 @@ struct dpu_mdss_cfg { u32 wb_count; const struct dpu_wb_cfg *wb; + const struct dpu_cdm_cfg *cdm; + u32 ad_count; u32 dspp_count; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h index a6702b2bfc68..f319c8232ea5 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h @@ -185,6 +185,11 @@ enum dpu_dsc { DSC_MAX }; +enum dpu_cdm { + CDM_0 = 1, + CDM_MAX +}; + enum dpu_pingpong { PINGPONG_NONE, PINGPONG_0, -- cgit v1.2.3 From e1239661c9e9a628e0871f461bafebba324658d9 Mon Sep 17 00:00:00 2001 From: Abhinav Kumar Date: Tue, 12 Dec 2023 12:52:44 -0800 Subject: drm/msm/dpu: add cdm blocks to sm8250 dpu_hw_catalog Add CDM blocks to the sm8250 dpu_hw_catalog to support YUV format output from writeback block. changes in v2: - re-use the cdm definition from sc7280 Signed-off-by: Abhinav Kumar Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/571819/ Link: https://lore.kernel.org/r/20231212205254.12422-7-quic_abhinavk@quicinc.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h index 2359c16e9206..58b0f50518c8 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h @@ -384,6 +384,7 @@ const struct dpu_mdss_cfg dpu_sm8250_cfg = { .mdss_ver = &sm8250_mdss_ver, .caps = &sm8250_dpu_caps, .mdp = &sm8250_mdp, + .cdm = &sc7280_cdm, .ctl_count = ARRAY_SIZE(sm8250_ctl), .ctl = sm8250_ctl, .sspp_count = ARRAY_SIZE(sm8250_sspp), -- cgit v1.2.3 From 0afac0ba60242ff827f4ffe56375c421a4e69a0f Mon Sep 17 00:00:00 2001 From: Abhinav Kumar Date: Tue, 12 Dec 2023 12:52:45 -0800 Subject: drm/msm/dpu: add dpu_hw_cdm abstraction for CDM block CDM block comes with its own set of registers and operations which can be done. In-line with other hardware blocks, this change adds the dpu_hw_cdm abstraction for the CDM block. changes in v4: - used FIELD_PREP() for dpu_hw_cdm_setup_cdwn() operations - change to lowercase hex in dpu_hw_cdm_bind_pingpong_blk() - move disable assignment inside else in dpu_hw_cdm_bind_pingpong_blk() changes in v3: - fix commit text from sub-blk to blk for CDM - fix kbot issue for missing static for dpu_hw_cdm_enable() - fix kbot issue for incorrect documentation style - add more documentation for enums and struct in dpu_hw_cdm.h - drop "enable" parameter from bind_pingpong_blk() as we can just use PINGPONG_NONE for disable cases - drop unnecessary bit operation for zero value of cdm_cfg changes in v2: - replace bit magic with relevant defines - use drmm_kzalloc instead of kzalloc/free - some formatting fixes - inline _setup_cdm_ops() - protect bind_pingpong_blk with core_rev check - drop setup_csc_data() and setup_cdwn() ops as they are merged into enable() Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202312101815.B3ZH7Pfy-lkp@intel.com/ Signed-off-by: Abhinav Kumar Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/571824/ Link: https://lore.kernel.org/r/20231212205254.12422-8-quic_abhinavk@quicinc.com [DB: Added linux/bitfield.h inclusion] Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/Makefile | 1 + drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cdm.c | 247 ++++++++++++++++++++++++++++ drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cdm.h | 142 ++++++++++++++++ drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h | 1 + 4 files changed, 391 insertions(+) create mode 100644 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cdm.c create mode 100644 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cdm.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile index 49671364fdcf..b1173128b5b9 100644 --- a/drivers/gpu/drm/msm/Makefile +++ b/drivers/gpu/drm/msm/Makefile @@ -63,6 +63,7 @@ msm-$(CONFIG_DRM_MSM_DPU) += \ disp/dpu1/dpu_encoder_phys_wb.o \ disp/dpu1/dpu_formats.o \ disp/dpu1/dpu_hw_catalog.o \ + disp/dpu1/dpu_hw_cdm.o \ disp/dpu1/dpu_hw_ctl.o \ disp/dpu1/dpu_hw_dsc.o \ disp/dpu1/dpu_hw_dsc_1_2.o \ diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cdm.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cdm.c new file mode 100644 index 000000000000..e9cdc7934a49 --- /dev/null +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cdm.c @@ -0,0 +1,247 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2023, The Linux Foundation. All rights reserved. + */ + +#include + +#include + +#include "dpu_hw_mdss.h" +#include "dpu_hw_util.h" +#include "dpu_hw_catalog.h" +#include "dpu_hw_cdm.h" +#include "dpu_kms.h" + +#define CDM_CSC_10_OPMODE 0x000 +#define CDM_CSC_10_BASE 0x004 + +#define CDM_CDWN2_OP_MODE 0x100 +#define CDM_CDWN2_CLAMP_OUT 0x104 +#define CDM_CDWN2_PARAMS_3D_0 0x108 +#define CDM_CDWN2_PARAMS_3D_1 0x10C +#define CDM_CDWN2_COEFF_COSITE_H_0 0x110 +#define CDM_CDWN2_COEFF_COSITE_H_1 0x114 +#define CDM_CDWN2_COEFF_COSITE_H_2 0x118 +#define CDM_CDWN2_COEFF_OFFSITE_H_0 0x11C +#define CDM_CDWN2_COEFF_OFFSITE_H_1 0x120 +#define CDM_CDWN2_COEFF_OFFSITE_H_2 0x124 +#define CDM_CDWN2_COEFF_COSITE_V 0x128 +#define CDM_CDWN2_COEFF_OFFSITE_V 0x12C +#define CDM_CDWN2_OUT_SIZE 0x130 + +#define CDM_HDMI_PACK_OP_MODE 0x200 +#define CDM_CSC_10_MATRIX_COEFF_0 0x004 + +#define CDM_MUX 0x224 + +/* CDM CDWN2 sub-block bit definitions */ +#define CDM_CDWN2_OP_MODE_EN BIT(0) +#define CDM_CDWN2_OP_MODE_ENABLE_H BIT(1) +#define CDM_CDWN2_OP_MODE_ENABLE_V BIT(2) +#define CDM_CDWN2_OP_MODE_BITS_OUT_8BIT BIT(7) +#define CDM_CDWN2_V_PIXEL_METHOD_MASK GENMASK(6, 5) +#define CDM_CDWN2_H_PIXEL_METHOD_MASK GENMASK(4, 3) + +/* CDM CSC10 sub-block bit definitions */ +#define CDM_CSC10_OP_MODE_EN BIT(0) +#define CDM_CSC10_OP_MODE_SRC_FMT_YUV BIT(1) +#define CDM_CSC10_OP_MODE_DST_FMT_YUV BIT(2) + +/* CDM HDMI pack sub-block bit definitions */ +#define CDM_HDMI_PACK_OP_MODE_EN BIT(0) + +/* + * Horizontal coefficients for cosite chroma downscale + * s13 representation of coefficients + */ +static u32 cosite_h_coeff[] = {0x00000016, 0x000001cc, 0x0100009e}; + +/* + * Horizontal coefficients for offsite chroma downscale + */ +static u32 offsite_h_coeff[] = {0x000b0005, 0x01db01eb, 0x00e40046}; + +/* + * Vertical coefficients for cosite chroma downscale + */ +static u32 cosite_v_coeff[] = {0x00080004}; +/* + * Vertical coefficients for offsite chroma downscale + */ +static u32 offsite_v_coeff[] = {0x00060002}; + +static int dpu_hw_cdm_setup_cdwn(struct dpu_hw_cdm *ctx, struct dpu_hw_cdm_cfg *cfg) +{ + struct dpu_hw_blk_reg_map *c = &ctx->hw; + u32 opmode; + u32 out_size; + + switch (cfg->h_cdwn_type) { + case CDM_CDWN_DISABLE: + opmode = 0; + break; + case CDM_CDWN_PIXEL_DROP: + opmode = CDM_CDWN2_OP_MODE_ENABLE_H | + FIELD_PREP(CDM_CDWN2_H_PIXEL_METHOD_MASK, + CDM_CDWN2_METHOD_PIXEL_DROP); + break; + case CDM_CDWN_AVG: + opmode = CDM_CDWN2_OP_MODE_ENABLE_H | + FIELD_PREP(CDM_CDWN2_H_PIXEL_METHOD_MASK, + CDM_CDWN2_METHOD_AVG); + break; + case CDM_CDWN_COSITE: + opmode = CDM_CDWN2_OP_MODE_ENABLE_H | + FIELD_PREP(CDM_CDWN2_H_PIXEL_METHOD_MASK, + CDM_CDWN2_METHOD_COSITE); + DPU_REG_WRITE(c, CDM_CDWN2_COEFF_COSITE_H_0, + cosite_h_coeff[0]); + DPU_REG_WRITE(c, CDM_CDWN2_COEFF_COSITE_H_1, + cosite_h_coeff[1]); + DPU_REG_WRITE(c, CDM_CDWN2_COEFF_COSITE_H_2, + cosite_h_coeff[2]); + break; + case CDM_CDWN_OFFSITE: + opmode = CDM_CDWN2_OP_MODE_ENABLE_H | + FIELD_PREP(CDM_CDWN2_H_PIXEL_METHOD_MASK, CDM_CDWN2_METHOD_OFFSITE); + DPU_REG_WRITE(c, CDM_CDWN2_COEFF_OFFSITE_H_0, + offsite_h_coeff[0]); + DPU_REG_WRITE(c, CDM_CDWN2_COEFF_OFFSITE_H_1, + offsite_h_coeff[1]); + DPU_REG_WRITE(c, CDM_CDWN2_COEFF_OFFSITE_H_2, + offsite_h_coeff[2]); + break; + default: + DPU_ERROR("%s invalid horz down sampling type\n", __func__); + return -EINVAL; + } + + switch (cfg->v_cdwn_type) { + case CDM_CDWN_DISABLE: + /* if its only Horizontal downsample, we dont need to do anything here */ + break; + case CDM_CDWN_PIXEL_DROP: + opmode |= CDM_CDWN2_OP_MODE_ENABLE_V | + FIELD_PREP(CDM_CDWN2_V_PIXEL_METHOD_MASK, + CDM_CDWN2_METHOD_PIXEL_DROP); + break; + case CDM_CDWN_AVG: + opmode |= CDM_CDWN2_OP_MODE_ENABLE_V | + FIELD_PREP(CDM_CDWN2_V_PIXEL_METHOD_MASK, + CDM_CDWN2_METHOD_AVG); + break; + case CDM_CDWN_COSITE: + opmode |= CDM_CDWN2_OP_MODE_ENABLE_V | + FIELD_PREP(CDM_CDWN2_V_PIXEL_METHOD_MASK, + CDM_CDWN2_METHOD_COSITE); + DPU_REG_WRITE(c, + CDM_CDWN2_COEFF_COSITE_V, + cosite_v_coeff[0]); + break; + case CDM_CDWN_OFFSITE: + opmode |= CDM_CDWN2_OP_MODE_ENABLE_V | + FIELD_PREP(CDM_CDWN2_V_PIXEL_METHOD_MASK, + CDM_CDWN2_METHOD_OFFSITE); + DPU_REG_WRITE(c, + CDM_CDWN2_COEFF_OFFSITE_V, + offsite_v_coeff[0]); + break; + default: + return -EINVAL; + } + + if (cfg->output_bit_depth != CDM_CDWN_OUTPUT_10BIT) + opmode |= CDM_CDWN2_OP_MODE_BITS_OUT_8BIT; + + if (cfg->v_cdwn_type || cfg->h_cdwn_type) + opmode |= CDM_CDWN2_OP_MODE_EN; /* EN CDWN module */ + else + opmode &= ~CDM_CDWN2_OP_MODE_EN; + + out_size = (cfg->output_width & 0xFFFF) | ((cfg->output_height & 0xFFFF) << 16); + DPU_REG_WRITE(c, CDM_CDWN2_OUT_SIZE, out_size); + DPU_REG_WRITE(c, CDM_CDWN2_OP_MODE, opmode); + DPU_REG_WRITE(c, CDM_CDWN2_CLAMP_OUT, ((0x3FF << 16) | 0x0)); + + return 0; +} + +static int dpu_hw_cdm_enable(struct dpu_hw_cdm *ctx, struct dpu_hw_cdm_cfg *cdm) +{ + struct dpu_hw_blk_reg_map *c = &ctx->hw; + const struct dpu_format *fmt; + u32 opmode = 0; + u32 csc = 0; + + if (!ctx || !cdm) + return -EINVAL; + + fmt = cdm->output_fmt; + + if (!DPU_FORMAT_IS_YUV(fmt)) + return -EINVAL; + + dpu_hw_csc_setup(&ctx->hw, CDM_CSC_10_MATRIX_COEFF_0, cdm->csc_cfg, true); + dpu_hw_cdm_setup_cdwn(ctx, cdm); + + if (cdm->output_type == CDM_CDWN_OUTPUT_HDMI) { + if (fmt->chroma_sample != DPU_CHROMA_H1V2) + return -EINVAL; /*unsupported format */ + opmode = CDM_HDMI_PACK_OP_MODE_EN; + opmode |= (fmt->chroma_sample << 1); + } + + csc |= CDM_CSC10_OP_MODE_DST_FMT_YUV; + csc &= ~CDM_CSC10_OP_MODE_SRC_FMT_YUV; + csc |= CDM_CSC10_OP_MODE_EN; + + if (ctx && ctx->ops.bind_pingpong_blk) + ctx->ops.bind_pingpong_blk(ctx, cdm->pp_id); + + DPU_REG_WRITE(c, CDM_CSC_10_OPMODE, csc); + DPU_REG_WRITE(c, CDM_HDMI_PACK_OP_MODE, opmode); + return 0; +} + +static void dpu_hw_cdm_bind_pingpong_blk(struct dpu_hw_cdm *ctx, const enum dpu_pingpong pp) +{ + struct dpu_hw_blk_reg_map *c; + int mux_cfg; + + c = &ctx->hw; + + mux_cfg = DPU_REG_READ(c, CDM_MUX); + mux_cfg &= ~0xf; + + if (pp) + mux_cfg |= (pp - PINGPONG_0) & 0x7; + else + mux_cfg |= 0xf; + + DPU_REG_WRITE(c, CDM_MUX, mux_cfg); +} + +struct dpu_hw_cdm *dpu_hw_cdm_init(struct drm_device *dev, + const struct dpu_cdm_cfg *cfg, void __iomem *addr, + const struct dpu_mdss_version *mdss_rev) +{ + struct dpu_hw_cdm *c; + + c = drmm_kzalloc(dev, sizeof(*c), GFP_KERNEL); + if (!c) + return ERR_PTR(-ENOMEM); + + c->hw.blk_addr = addr + cfg->base; + c->hw.log_mask = DPU_DBG_MASK_CDM; + + /* Assign ops */ + c->idx = cfg->id; + c->caps = cfg; + + c->ops.enable = dpu_hw_cdm_enable; + if (mdss_rev->core_major_ver >= 5) + c->ops.bind_pingpong_blk = dpu_hw_cdm_bind_pingpong_blk; + + return c; +} diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cdm.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cdm.h new file mode 100644 index 000000000000..348424df87c6 --- /dev/null +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_cdm.h @@ -0,0 +1,142 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2023, The Linux Foundation. All rights reserved. + */ + +#ifndef _DPU_HW_CDM_H +#define _DPU_HW_CDM_H + +#include "dpu_hw_mdss.h" +#include "dpu_hw_top.h" + +struct dpu_hw_cdm; + +/** + * struct dpu_hw_cdm_cfg : current configuration of CDM block + * + * @output_width: output ROI width of CDM block + * @output_height: output ROI height of CDM block + * @output_bit_depth: output bit-depth of CDM block + * @h_cdwn_type: downsample type used for horizontal pixels + * @v_cdwn_type: downsample type used for vertical pixels + * @output_fmt: handle to dpu_format of CDM block + * @csc_cfg: handle to CSC matrix programmed for CDM block + * @output_type: interface to which CDM is paired (HDMI/WB) + * @pp_id: ping-pong block to which CDM is bound to + */ +struct dpu_hw_cdm_cfg { + u32 output_width; + u32 output_height; + u32 output_bit_depth; + u32 h_cdwn_type; + u32 v_cdwn_type; + const struct dpu_format *output_fmt; + const struct dpu_csc_cfg *csc_cfg; + u32 output_type; + int pp_id; +}; + +/* + * These values are used indicate which type of downsample is used + * in the horizontal/vertical direction for the CDM block. + */ +enum dpu_hw_cdwn_type { + CDM_CDWN_DISABLE, + CDM_CDWN_PIXEL_DROP, + CDM_CDWN_AVG, + CDM_CDWN_COSITE, + CDM_CDWN_OFFSITE, +}; + +/* + * CDM block can be paired with WB or HDMI block. These values match + * the input with which the CDM block is paired. + */ +enum dpu_hw_cdwn_output_type { + CDM_CDWN_OUTPUT_HDMI, + CDM_CDWN_OUTPUT_WB, +}; + +/* + * CDM block can give an 8-bit or 10-bit output. These values + * are used to indicate the output bit depth of CDM block + */ +enum dpu_hw_cdwn_output_bit_depth { + CDM_CDWN_OUTPUT_8BIT, + CDM_CDWN_OUTPUT_10BIT, +}; + +/* + * CDM block can downsample using different methods. These values + * are used to indicate the downsample method which can be used + * either in the horizontal or vertical direction. + */ +enum dpu_hw_cdwn_op_mode_method_h_v { + CDM_CDWN2_METHOD_PIXEL_DROP, + CDM_CDWN2_METHOD_AVG, + CDM_CDWN2_METHOD_COSITE, + CDM_CDWN2_METHOD_OFFSITE +}; + +/** + * struct dpu_hw_cdm_ops : Interface to the chroma down Hw driver functions + * Assumption is these functions will be called after + * clocks are enabled + * @enable: Enables the output to interface and programs the + * output packer + * @bind_pingpong_blk: enable/disable the connection with pingpong which + * will feed pixels to this cdm + */ +struct dpu_hw_cdm_ops { + /** + * Enable the CDM module + * @cdm Pointer to chroma down context + */ + int (*enable)(struct dpu_hw_cdm *cdm, struct dpu_hw_cdm_cfg *cfg); + + /** + * Enable/disable the connection with pingpong + * @cdm Pointer to chroma down context + * @pp pingpong block id. + */ + void (*bind_pingpong_blk)(struct dpu_hw_cdm *cdm, const enum dpu_pingpong pp); +}; + +/** + * struct dpu_hw_cdm - cdm description + * @base: Hardware block base structure + * @hw: Block hardware details + * @idx: CDM index + * @caps: Pointer to cdm_cfg + * @ops: handle to operations possible for this CDM + */ +struct dpu_hw_cdm { + struct dpu_hw_blk base; + struct dpu_hw_blk_reg_map hw; + + /* chroma down */ + const struct dpu_cdm_cfg *caps; + enum dpu_cdm idx; + + /* ops */ + struct dpu_hw_cdm_ops ops; +}; + +/** + * dpu_hw_cdm_init - initializes the cdm hw driver object. + * should be called once before accessing every cdm. + * @dev: DRM device handle + * @cdm: CDM catalog entry for which driver object is required + * @addr : mapped register io address of MDSS + * @mdss_rev: mdss hw core revision + */ +struct dpu_hw_cdm *dpu_hw_cdm_init(struct drm_device *dev, + const struct dpu_cdm_cfg *cdm, void __iomem *addr, + const struct dpu_mdss_version *mdss_rev); + +static inline struct dpu_hw_cdm *to_dpu_hw_cdm(struct dpu_hw_blk *hw) +{ + return container_of(hw, struct dpu_hw_cdm, base); +} + +#endif /*_DPU_HW_CDM_H */ diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h index f319c8232ea5..9db4cf61bd29 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h @@ -466,6 +466,7 @@ struct dpu_mdss_color { #define DPU_DBG_MASK_ROT (1 << 9) #define DPU_DBG_MASK_DSPP (1 << 10) #define DPU_DBG_MASK_DSC (1 << 11) +#define DPU_DBG_MASK_CDM (1 << 12) /** * struct dpu_hw_tear_check - Struct contains parameters to configure -- cgit v1.2.3 From f58a6bf404b3cd6127d022350e67560ed323ee52 Mon Sep 17 00:00:00 2001 From: Abhinav Kumar Date: Tue, 12 Dec 2023 12:52:46 -0800 Subject: drm/msm/dpu: add cdm blocks to RM Add the RM APIs necessary to initialize and allocate CDM blocks to be used by the rest of the DPU pipeline. changes in v2: - treat cdm_init() failure as fatal - fixed the commit text Signed-off-by: Abhinav Kumar Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/571822/ Link: https://lore.kernel.org/r/20231212205254.12422-9-quic_abhinavk@quicinc.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c | 13 +++++++++++++ drivers/gpu/drm/msm/disp/dpu1/dpu_rm.h | 2 ++ 2 files changed, 15 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c index 0bb28cf4a6cb..7ed476b96304 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c @@ -8,6 +8,7 @@ #include "dpu_kms.h" #include "dpu_hw_lm.h" #include "dpu_hw_ctl.h" +#include "dpu_hw_cdm.h" #include "dpu_hw_pingpong.h" #include "dpu_hw_sspp.h" #include "dpu_hw_intf.h" @@ -176,6 +177,18 @@ int dpu_rm_init(struct drm_device *dev, rm->hw_sspp[sspp->id - SSPP_NONE] = hw; } + if (cat->cdm) { + struct dpu_hw_cdm *hw; + + hw = dpu_hw_cdm_init(dev, cat->cdm, mmio, cat->mdss_ver); + if (IS_ERR(hw)) { + rc = PTR_ERR(hw); + DPU_ERROR("failed cdm object creation: err %d\n", rc); + goto fail; + } + rm->cdm_blk = &hw->base; + } + return 0; fail: diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.h index 36752d837be4..e3f83ebc656b 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.h @@ -22,6 +22,7 @@ struct dpu_global_state; * @hw_wb: array of wb hardware resources * @dspp_blks: array of dspp hardware resources * @hw_sspp: array of sspp hardware resources + * @cdm_blk: cdm hardware resource */ struct dpu_rm { struct dpu_hw_blk *pingpong_blks[PINGPONG_MAX - PINGPONG_0]; @@ -33,6 +34,7 @@ struct dpu_rm { struct dpu_hw_blk *merge_3d_blks[MERGE_3D_MAX - MERGE_3D_0]; struct dpu_hw_blk *dsc_blks[DSC_MAX - DSC_0]; struct dpu_hw_sspp *hw_sspp[SSPP_MAX - SSPP_NONE]; + struct dpu_hw_blk *cdm_blk; }; /** -- cgit v1.2.3 From 5ef42da742e1ddf35f4a417f8117d10936f45d92 Mon Sep 17 00:00:00 2001 From: Abhinav Kumar Date: Tue, 12 Dec 2023 12:52:47 -0800 Subject: drm/msm/dpu: add support to allocate CDM from RM Even though there is usually only one CDM block, it can be used by either HDMI, DisplayPort OR Writeback interfaces. Hence its allocation needs to be tracked properly by the resource manager to ensure appropriate availability of the block. changes in v2: - move needs_cdm to topology struct Signed-off-by: Abhinav Kumar Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/571827/ Link: https://lore.kernel.org/r/20231212205254.12422-10-quic_abhinavk@quicinc.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h | 1 + drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h | 1 + drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c | 38 +++++++++++++++++++++++++++-- drivers/gpu/drm/msm/msm_drv.h | 2 ++ 4 files changed, 40 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h index 9db4cf61bd29..5df545904057 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h @@ -98,6 +98,7 @@ enum dpu_hw_blk_type { DPU_HW_BLK_DSPP, DPU_HW_BLK_MERGE_3D, DPU_HW_BLK_DSC, + DPU_HW_BLK_CDM, DPU_HW_BLK_MAX, }; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h index 9112a702a00f..d1207f4ec3ae 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h @@ -136,6 +136,7 @@ struct dpu_global_state { uint32_t ctl_to_enc_id[CTL_MAX - CTL_0]; uint32_t dspp_to_enc_id[DSPP_MAX - DSPP_0]; uint32_t dsc_to_enc_id[DSC_MAX - DSC_0]; + uint32_t cdm_to_enc_id; }; struct dpu_global_state diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c index 7ed476b96304..b58a9c2ae326 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c @@ -435,6 +435,26 @@ static int _dpu_rm_reserve_dsc(struct dpu_rm *rm, return 0; } +static int _dpu_rm_reserve_cdm(struct dpu_rm *rm, + struct dpu_global_state *global_state, + struct drm_encoder *enc) +{ + /* try allocating only one CDM block */ + if (!rm->cdm_blk) { + DPU_ERROR("CDM block does not exist\n"); + return -EIO; + } + + if (global_state->cdm_to_enc_id) { + DPU_ERROR("CDM_0 is already allocated\n"); + return -EIO; + } + + global_state->cdm_to_enc_id = enc->base.id; + + return 0; +} + static int _dpu_rm_make_reservation( struct dpu_rm *rm, struct dpu_global_state *global_state, @@ -460,6 +480,14 @@ static int _dpu_rm_make_reservation( if (ret) return ret; + if (reqs->topology.needs_cdm) { + ret = _dpu_rm_reserve_cdm(rm, global_state, enc); + if (ret) { + DPU_ERROR("unable to find CDM blk\n"); + return ret; + } + } + return ret; } @@ -470,9 +498,9 @@ static int _dpu_rm_populate_requirements( { reqs->topology = req_topology; - DRM_DEBUG_KMS("num_lm: %d num_dsc: %d num_intf: %d\n", + DRM_DEBUG_KMS("num_lm: %d num_dsc: %d num_intf: %d cdm: %d\n", reqs->topology.num_lm, reqs->topology.num_dsc, - reqs->topology.num_intf); + reqs->topology.num_intf, reqs->topology.needs_cdm); return 0; } @@ -501,6 +529,7 @@ void dpu_rm_release(struct dpu_global_state *global_state, ARRAY_SIZE(global_state->dsc_to_enc_id), enc->base.id); _dpu_rm_clear_mapping(global_state->dspp_to_enc_id, ARRAY_SIZE(global_state->dspp_to_enc_id), enc->base.id); + _dpu_rm_clear_mapping(&global_state->cdm_to_enc_id, 1, enc->base.id); } int dpu_rm_reserve( @@ -574,6 +603,11 @@ int dpu_rm_get_assigned_resources(struct dpu_rm *rm, hw_to_enc_id = global_state->dsc_to_enc_id; max_blks = ARRAY_SIZE(rm->dsc_blks); break; + case DPU_HW_BLK_CDM: + hw_blks = &rm->cdm_blk; + hw_to_enc_id = &global_state->cdm_to_enc_id; + max_blks = 1; + break; default: DPU_ERROR("blk type %d not managed by rm\n", type); return 0; diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index c0446fa66b98..16a7cbc0b7dd 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -90,12 +90,14 @@ enum msm_event_wait { * @num_intf: number of interfaces the panel is mounted on * @num_dspp: number of dspp blocks used * @num_dsc: number of Display Stream Compression (DSC) blocks used + * @needs_cdm: indicates whether cdm block is needed for this display topology */ struct msm_display_topology { u32 num_lm; u32 num_intf; u32 num_dspp; u32 num_dsc; + bool needs_cdm; }; /* Commit/Event thread specific structure */ -- cgit v1.2.3 From 53d5abe67e5870142f970064b1371a10d2fe7a15 Mon Sep 17 00:00:00 2001 From: Abhinav Kumar Date: Tue, 12 Dec 2023 12:52:48 -0800 Subject: drm/msm/dpu: add CDM related logic to dpu_hw_ctl layer CDM block will need its own logic to program the flush and active bits in the dpu_hw_ctl layer. Make necessary changes in dpu_hw_ctl to support CDM programming. changes in v3: - drop unused cdm_active as reported by kbot - retained the R-b as its a trivial change changes in v2: - remove unused empty line - pass in cdm_num to update_pending_flush_cdm() Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202312102047.S0I69pCs-lkp@intel.com/ Signed-off-by: Abhinav Kumar Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/571829/ Link: https://lore.kernel.org/r/20231212205254.12422-11-quic_abhinavk@quicinc.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c | 33 ++++++++++++++++++++++++++++++ drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.h | 12 +++++++++++ 2 files changed, 45 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c index e7b680a151d6..e76565c3e6a4 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c @@ -32,11 +32,13 @@ #define CTL_DSC_ACTIVE 0x0E8 #define CTL_WB_ACTIVE 0x0EC #define CTL_INTF_ACTIVE 0x0F4 +#define CTL_CDM_ACTIVE 0x0F8 #define CTL_FETCH_PIPE_ACTIVE 0x0FC #define CTL_MERGE_3D_FLUSH 0x100 #define CTL_DSC_FLUSH 0x104 #define CTL_WB_FLUSH 0x108 #define CTL_INTF_FLUSH 0x110 +#define CTL_CDM_FLUSH 0x114 #define CTL_INTF_MASTER 0x134 #define CTL_DSPP_n_FLUSH(n) ((0x13C) + ((n) * 4)) @@ -46,6 +48,7 @@ #define DPU_REG_RESET_TIMEOUT_US 2000 #define MERGE_3D_IDX 23 #define DSC_IDX 22 +#define CDM_IDX 26 #define INTF_IDX 31 #define WB_IDX 16 #define DSPP_IDX 29 /* From DPU hw rev 7.x.x */ @@ -107,6 +110,7 @@ static inline void dpu_hw_ctl_clear_pending_flush(struct dpu_hw_ctl *ctx) ctx->pending_wb_flush_mask = 0; ctx->pending_merge_3d_flush_mask = 0; ctx->pending_dsc_flush_mask = 0; + ctx->pending_cdm_flush_mask = 0; memset(ctx->pending_dspp_flush_mask, 0, sizeof(ctx->pending_dspp_flush_mask)); @@ -151,6 +155,10 @@ static inline void dpu_hw_ctl_trigger_flush_v1(struct dpu_hw_ctl *ctx) DPU_REG_WRITE(&ctx->hw, CTL_DSC_FLUSH, ctx->pending_dsc_flush_mask); + if (ctx->pending_flush_mask & BIT(CDM_IDX)) + DPU_REG_WRITE(&ctx->hw, CTL_CDM_FLUSH, + ctx->pending_cdm_flush_mask); + DPU_REG_WRITE(&ctx->hw, CTL_FLUSH, ctx->pending_flush_mask); } @@ -282,6 +290,13 @@ static void dpu_hw_ctl_update_pending_flush_wb(struct dpu_hw_ctl *ctx, } } +static void dpu_hw_ctl_update_pending_flush_cdm(struct dpu_hw_ctl *ctx, enum dpu_cdm cdm_num) +{ + /* update pending flush only if CDM_0 is flushed */ + if (cdm_num == CDM_0) + ctx->pending_flush_mask |= BIT(CDM_IDX); +} + static void dpu_hw_ctl_update_pending_flush_wb_v1(struct dpu_hw_ctl *ctx, enum dpu_wb wb) { @@ -310,6 +325,12 @@ static void dpu_hw_ctl_update_pending_flush_dsc_v1(struct dpu_hw_ctl *ctx, ctx->pending_flush_mask |= BIT(DSC_IDX); } +static void dpu_hw_ctl_update_pending_flush_cdm_v1(struct dpu_hw_ctl *ctx, enum dpu_cdm cdm_num) +{ + ctx->pending_cdm_flush_mask |= BIT(cdm_num - CDM_0); + ctx->pending_flush_mask |= BIT(CDM_IDX); +} + static void dpu_hw_ctl_update_pending_flush_dspp(struct dpu_hw_ctl *ctx, enum dpu_dspp dspp, u32 dspp_sub_blk) { @@ -543,6 +564,9 @@ static void dpu_hw_ctl_intf_cfg_v1(struct dpu_hw_ctl *ctx, if (cfg->dsc) DPU_REG_WRITE(c, CTL_DSC_ACTIVE, cfg->dsc); + + if (cfg->cdm) + DPU_REG_WRITE(c, CTL_CDM_ACTIVE, cfg->cdm); } static void dpu_hw_ctl_intf_cfg(struct dpu_hw_ctl *ctx, @@ -586,6 +610,7 @@ static void dpu_hw_ctl_reset_intf_cfg_v1(struct dpu_hw_ctl *ctx, u32 wb_active = 0; u32 merge3d_active = 0; u32 dsc_active; + u32 cdm_active; /* * This API resets each portion of the CTL path namely, @@ -621,6 +646,12 @@ static void dpu_hw_ctl_reset_intf_cfg_v1(struct dpu_hw_ctl *ctx, dsc_active &= ~cfg->dsc; DPU_REG_WRITE(c, CTL_DSC_ACTIVE, dsc_active); } + + if (cfg->cdm) { + cdm_active = DPU_REG_READ(c, CTL_CDM_ACTIVE); + cdm_active &= ~cfg->cdm; + DPU_REG_WRITE(c, CTL_CDM_ACTIVE, cdm_active); + } } static void dpu_hw_ctl_set_fetch_pipe_active(struct dpu_hw_ctl *ctx, @@ -654,12 +685,14 @@ static void _setup_ctl_ops(struct dpu_hw_ctl_ops *ops, ops->update_pending_flush_wb = dpu_hw_ctl_update_pending_flush_wb_v1; ops->update_pending_flush_dsc = dpu_hw_ctl_update_pending_flush_dsc_v1; + ops->update_pending_flush_cdm = dpu_hw_ctl_update_pending_flush_cdm_v1; } else { ops->trigger_flush = dpu_hw_ctl_trigger_flush; ops->setup_intf_cfg = dpu_hw_ctl_intf_cfg; ops->update_pending_flush_intf = dpu_hw_ctl_update_pending_flush_intf; ops->update_pending_flush_wb = dpu_hw_ctl_update_pending_flush_wb; + ops->update_pending_flush_cdm = dpu_hw_ctl_update_pending_flush_cdm; } ops->clear_pending_flush = dpu_hw_ctl_clear_pending_flush; ops->update_pending_flush = dpu_hw_ctl_update_pending_flush; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.h index 279ebd8dfbff..ff85b5ee0acf 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.h @@ -39,6 +39,7 @@ struct dpu_hw_stage_cfg { * @mode_3d: 3d mux configuration * @merge_3d: 3d merge block used * @intf_mode_sel: Interface mode, cmd / vid + * @cdm: CDM block used * @stream_sel: Stream selection for multi-stream interfaces * @dsc: DSC BIT masks used */ @@ -48,6 +49,7 @@ struct dpu_hw_intf_cfg { enum dpu_3d_blend_mode mode_3d; enum dpu_merge_3d merge_3d; enum dpu_ctl_mode_sel intf_mode_sel; + enum dpu_cdm cdm; int stream_sel; unsigned int dsc; }; @@ -166,6 +168,14 @@ struct dpu_hw_ctl_ops { void (*update_pending_flush_dsc)(struct dpu_hw_ctl *ctx, enum dpu_dsc blk); + /** + * OR in the given flushbits to the cached pending_(cdm_)flush_mask + * No effect on hardware + * @ctx: ctl path ctx pointer + * @cdm_num: idx of cdm to be flushed + */ + void (*update_pending_flush_cdm)(struct dpu_hw_ctl *ctx, enum dpu_cdm cdm_num); + /** * Write the value of the pending_flush_mask to hardware * @ctx : ctl path ctx pointer @@ -239,6 +249,7 @@ struct dpu_hw_ctl_ops { * @pending_intf_flush_mask: pending INTF flush * @pending_wb_flush_mask: pending WB flush * @pending_dsc_flush_mask: pending DSC flush + * @pending_cdm_flush_mask: pending CDM flush * @ops: operation list */ struct dpu_hw_ctl { @@ -256,6 +267,7 @@ struct dpu_hw_ctl { u32 pending_merge_3d_flush_mask; u32 pending_dspp_flush_mask[DSPP_MAX - DSPP_0]; u32 pending_dsc_flush_mask; + u32 pending_cdm_flush_mask; /* ops */ struct dpu_hw_ctl_ops ops; -- cgit v1.2.3 From a780a82a58eccb73fda4b0712c5cc189be7c92b3 Mon Sep 17 00:00:00 2001 From: Abhinav Kumar Date: Tue, 12 Dec 2023 12:52:49 -0800 Subject: drm/msm/dpu: add an API to setup the CDM block for writeback Add an API dpu_encoder_helper_phys_setup_cdm() which can be used by the writeback encoder to setup the CDM block. Currently, this is defined and used within the writeback's physical encoder layer however, the function can be modified to be used to setup the CDM block even for non-writeback interfaces. Until those modifications are planned and made, keep it local to writeback. changes in v3: - call bind_pingpong_blk() directly as disable() is dropped - add dpu_csc10_rgb2yuv_601l to dpu_hw_util.h and use it - fix kbot error on the function doc - document that dpu_encoder_helper_phys_setup_cdm() doesn't handle DPU_CHROMA_H1V2 changes in v2: - add the RGB2YUV CSC matrix to dpu util as needed by CDM - use dpu_hw_get_csc_cfg() to get and program CSC - drop usage of setup_csc_data() and setup_cdwn() cdm ops as they both have been merged into enable() - drop reduntant hw_cdm and hw_pp checks Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202312102149.qmbCdsg2-lkp@intel.com/ Signed-off-by: Abhinav Kumar Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/571833/ Link: https://lore.kernel.org/r/20231212205254.12422-12-quic_abhinavk@quicinc.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h | 6 ++ .../gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c | 93 +++++++++++++++++++++- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h | 14 ++++ 3 files changed, 112 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h index e2934a6702d1..bdb89cbbcfb8 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h @@ -14,8 +14,10 @@ #include "dpu_hw_intf.h" #include "dpu_hw_wb.h" #include "dpu_hw_pingpong.h" +#include "dpu_hw_cdm.h" #include "dpu_hw_ctl.h" #include "dpu_hw_top.h" +#include "dpu_hw_util.h" #include "dpu_encoder.h" #include "dpu_crtc.h" @@ -150,6 +152,7 @@ enum dpu_intr_idx { * @hw_pp: Hardware interface to the ping pong registers * @hw_intf: Hardware interface to the intf registers * @hw_wb: Hardware interface to the wb registers + * @hw_cdm: Hardware interface to the CDM registers * @dpu_kms: Pointer to the dpu_kms top level * @cached_mode: DRM mode cached at mode_set time, acted on in enable * @enabled: Whether the encoder has enabled and running a mode @@ -178,6 +181,7 @@ struct dpu_encoder_phys { struct dpu_hw_pingpong *hw_pp; struct dpu_hw_intf *hw_intf; struct dpu_hw_wb *hw_wb; + struct dpu_hw_cdm *hw_cdm; struct dpu_kms *dpu_kms; struct drm_display_mode cached_mode; enum dpu_enc_split_role split_role; @@ -207,6 +211,7 @@ static inline int dpu_encoder_phys_inc_pending(struct dpu_encoder_phys *phys) * @wbirq_refcount: Reference count of writeback interrupt * @wb_done_timeout_cnt: number of wb done irq timeout errors * @wb_cfg: writeback block config to store fb related details + * @cdm_cfg: cdm block config needed to store writeback block's CDM configuration * @wb_conn: backpointer to writeback connector * @wb_job: backpointer to current writeback job * @dest: dpu buffer layout for current writeback output buffer @@ -216,6 +221,7 @@ struct dpu_encoder_phys_wb { atomic_t wbirq_refcount; int wb_done_timeout_cnt; struct dpu_hw_wb_cfg wb_cfg; + struct dpu_hw_cdm_cfg cdm_cfg; struct drm_writeback_connector *wb_conn; struct drm_writeback_job *wb_job; struct dpu_hw_fmt_layout dest; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c index c6e9c11b6971..9c87020c7119 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c @@ -259,6 +259,96 @@ static void dpu_encoder_phys_wb_setup_ctl(struct dpu_encoder_phys *phys_enc) } } +/** + * dpu_encoder_helper_phys_setup_cdm - setup chroma down sampling block + * This API does not handle DPU_CHROMA_H1V2. + * @phys_enc:Pointer to physical encoder + */ +static void dpu_encoder_helper_phys_setup_cdm(struct dpu_encoder_phys *phys_enc) +{ + struct dpu_hw_cdm *hw_cdm; + struct dpu_hw_cdm_cfg *cdm_cfg; + struct dpu_hw_pingpong *hw_pp; + struct dpu_encoder_phys_wb *wb_enc; + const struct msm_format *format; + const struct dpu_format *dpu_fmt; + struct drm_writeback_job *wb_job; + int ret; + + if (!phys_enc) + return; + + wb_enc = to_dpu_encoder_phys_wb(phys_enc); + cdm_cfg = &wb_enc->cdm_cfg; + hw_pp = phys_enc->hw_pp; + hw_cdm = phys_enc->hw_cdm; + wb_job = wb_enc->wb_job; + + format = msm_framebuffer_format(wb_enc->wb_job->fb); + dpu_fmt = dpu_get_dpu_format_ext(format->pixel_format, wb_job->fb->modifier); + + if (!hw_cdm) + return; + + if (!DPU_FORMAT_IS_YUV(dpu_fmt)) { + DPU_DEBUG("[enc:%d] cdm_disable fmt:%x\n", DRMID(phys_enc->parent), + dpu_fmt->base.pixel_format); + if (hw_cdm->ops.bind_pingpong_blk) + hw_cdm->ops.bind_pingpong_blk(hw_cdm, PINGPONG_NONE); + + return; + } + + memset(cdm_cfg, 0, sizeof(struct dpu_hw_cdm_cfg)); + + cdm_cfg->output_width = wb_job->fb->width; + cdm_cfg->output_height = wb_job->fb->height; + cdm_cfg->output_fmt = dpu_fmt; + cdm_cfg->output_type = CDM_CDWN_OUTPUT_WB; + cdm_cfg->output_bit_depth = DPU_FORMAT_IS_DX(dpu_fmt) ? + CDM_CDWN_OUTPUT_10BIT : CDM_CDWN_OUTPUT_8BIT; + cdm_cfg->csc_cfg = &dpu_csc10_rgb2yuv_601l; + + /* enable 10 bit logic */ + switch (cdm_cfg->output_fmt->chroma_sample) { + case DPU_CHROMA_RGB: + cdm_cfg->h_cdwn_type = CDM_CDWN_DISABLE; + cdm_cfg->v_cdwn_type = CDM_CDWN_DISABLE; + break; + case DPU_CHROMA_H2V1: + cdm_cfg->h_cdwn_type = CDM_CDWN_COSITE; + cdm_cfg->v_cdwn_type = CDM_CDWN_DISABLE; + break; + case DPU_CHROMA_420: + cdm_cfg->h_cdwn_type = CDM_CDWN_COSITE; + cdm_cfg->v_cdwn_type = CDM_CDWN_OFFSITE; + break; + case DPU_CHROMA_H1V2: + default: + DPU_ERROR("[enc:%d] unsupported chroma sampling type\n", + DRMID(phys_enc->parent)); + cdm_cfg->h_cdwn_type = CDM_CDWN_DISABLE; + cdm_cfg->v_cdwn_type = CDM_CDWN_DISABLE; + break; + } + + DPU_DEBUG("[enc:%d] cdm_enable:%d,%d,%X,%d,%d,%d,%d]\n", + DRMID(phys_enc->parent), cdm_cfg->output_width, + cdm_cfg->output_height, cdm_cfg->output_fmt->base.pixel_format, + cdm_cfg->output_type, cdm_cfg->output_bit_depth, + cdm_cfg->h_cdwn_type, cdm_cfg->v_cdwn_type); + + if (hw_cdm->ops.enable) { + cdm_cfg->pp_id = hw_pp->idx; + ret = hw_cdm->ops.enable(hw_cdm, cdm_cfg); + if (ret < 0) { + DPU_ERROR("[enc:%d] failed to enable CDM; ret:%d\n", + DRMID(phys_enc->parent), ret); + return; + } + } +} + /** * dpu_encoder_phys_wb_atomic_check - verify and fixup given atomic states * @phys_enc: Pointer to physical encoder @@ -375,8 +465,9 @@ static void dpu_encoder_phys_wb_setup( dpu_encoder_phys_wb_setup_fb(phys_enc, fb); - dpu_encoder_phys_wb_setup_ctl(phys_enc); + dpu_encoder_helper_phys_setup_cdm(phys_enc); + dpu_encoder_phys_wb_setup_ctl(phys_enc); } /** diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h index aa50005042d1..c0aaad2023da 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h @@ -19,6 +19,8 @@ #define MISR_CTRL_STATUS_CLEAR BIT(10) #define MISR_CTRL_FREE_RUN_MASK BIT(31) +#define TO_S15D16(_x_)((_x_) << 7) + static const struct dpu_csc_cfg dpu_csc_YUV2RGB_601L = { { /* S15.16 format */ @@ -49,6 +51,18 @@ static const struct dpu_csc_cfg dpu_csc10_YUV2RGB_601L = { { 0x00, 0x3ff, 0x00, 0x3ff, 0x00, 0x3ff,}, }; +static const struct dpu_csc_cfg dpu_csc10_rgb2yuv_601l = { + { + TO_S15D16(0x0083), TO_S15D16(0x0102), TO_S15D16(0x0032), + TO_S15D16(0x1fb5), TO_S15D16(0x1f6c), TO_S15D16(0x00e1), + TO_S15D16(0x00e1), TO_S15D16(0x1f45), TO_S15D16(0x1fdc) + }, + { 0x00, 0x00, 0x00 }, + { 0x0040, 0x0200, 0x0200 }, + { 0x000, 0x3ff, 0x000, 0x3ff, 0x000, 0x3ff }, + { 0x040, 0x3ac, 0x040, 0x3c0, 0x040, 0x3c0 }, +}; + /* * This is the common struct maintained by each sub block * for mapping the register offsets in this block to the -- cgit v1.2.3 From f88c0c8fdb6bb49fc969e5adfcf8ee69bdb753df Mon Sep 17 00:00:00 2001 From: Abhinav Kumar Date: Tue, 12 Dec 2023 12:52:50 -0800 Subject: drm/msm/dpu: plug-in the cdm related bits to writeback setup To setup and enable CDM block for the writeback pipeline, lets add the pieces together to set the active bits and the flush bits for the CDM block. changes in v2: - passed the cdm idx to update_pending_flush_cdm() (have retained the R-b as its a minor change) Signed-off-by: Abhinav Kumar Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/571831/ Link: https://lore.kernel.org/r/20231212205254.12422-13-quic_abhinavk@quicinc.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c index 9c87020c7119..4cd2d9e3131a 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c @@ -214,6 +214,7 @@ static void dpu_encoder_phys_wb_setup_ctl(struct dpu_encoder_phys *phys_enc) { struct dpu_hw_wb *hw_wb; struct dpu_hw_ctl *ctl; + struct dpu_hw_cdm *hw_cdm; if (!phys_enc) { DPU_ERROR("invalid encoder\n"); @@ -222,6 +223,7 @@ static void dpu_encoder_phys_wb_setup_ctl(struct dpu_encoder_phys *phys_enc) hw_wb = phys_enc->hw_wb; ctl = phys_enc->hw_ctl; + hw_cdm = phys_enc->hw_cdm; if (test_bit(DPU_CTL_ACTIVE_CFG, &ctl->caps->features) && (phys_enc->hw_ctl && @@ -238,6 +240,9 @@ static void dpu_encoder_phys_wb_setup_ctl(struct dpu_encoder_phys *phys_enc) if (mode_3d && hw_pp && hw_pp->merge_3d) intf_cfg.merge_3d = hw_pp->merge_3d->idx; + if (hw_cdm) + intf_cfg.cdm = hw_cdm->idx; + if (phys_enc->hw_pp->merge_3d && phys_enc->hw_pp->merge_3d->ops.setup_3d_mode) phys_enc->hw_pp->merge_3d->ops.setup_3d_mode(phys_enc->hw_pp->merge_3d, mode_3d); @@ -411,6 +416,7 @@ static void _dpu_encoder_phys_wb_update_flush(struct dpu_encoder_phys *phys_enc) struct dpu_hw_wb *hw_wb; struct dpu_hw_ctl *hw_ctl; struct dpu_hw_pingpong *hw_pp; + struct dpu_hw_cdm *hw_cdm; u32 pending_flush = 0; if (!phys_enc) @@ -419,6 +425,7 @@ static void _dpu_encoder_phys_wb_update_flush(struct dpu_encoder_phys *phys_enc) hw_wb = phys_enc->hw_wb; hw_pp = phys_enc->hw_pp; hw_ctl = phys_enc->hw_ctl; + hw_cdm = phys_enc->hw_cdm; DPU_DEBUG("[wb:%d]\n", hw_wb->idx - WB_0); @@ -434,6 +441,9 @@ static void _dpu_encoder_phys_wb_update_flush(struct dpu_encoder_phys *phys_enc) hw_ctl->ops.update_pending_flush_merge_3d(hw_ctl, hw_pp->merge_3d->idx); + if (hw_cdm && hw_ctl->ops.update_pending_flush_cdm) + hw_ctl->ops.update_pending_flush_cdm(hw_ctl, hw_cdm->idx); + if (hw_ctl->ops.get_pending_flush) pending_flush = hw_ctl->ops.get_pending_flush(hw_ctl); -- cgit v1.2.3 From 8b45a26f2ba9a052a8b46163fbc9d0d8739ca15d Mon Sep 17 00:00:00 2001 From: Abhinav Kumar Date: Tue, 12 Dec 2023 12:52:51 -0800 Subject: drm/msm/dpu: reserve cdm blocks for writeback in case of YUV output Reserve CDM blocks for writeback if the format of the output fb is YUV. At the moment, the reservation is done only for writeback but can easily be extended by relaxing the checks once other interfaces are ready to output YUV. changes in v3: - squash CDM disable during encoder cleanup into this change changes in v2: - use needs_cdm from topology struct - drop fb related checks from atomic_mode_set() Signed-off-by: Abhinav Kumar Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/571838/ Link: https://lore.kernel.org/r/20231212205254.12422-14-quic_abhinavk@quicinc.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 37 +++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index 4ceb3421003c..54042754f011 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "msm_drv.h" #include "dpu_kms.h" @@ -26,6 +27,7 @@ #include "dpu_hw_dspp.h" #include "dpu_hw_dsc.h" #include "dpu_hw_merge3d.h" +#include "dpu_hw_cdm.h" #include "dpu_formats.h" #include "dpu_encoder_phys.h" #include "dpu_crtc.h" @@ -584,6 +586,7 @@ static int dpu_encoder_virt_atomic_check( struct drm_display_mode *adj_mode; struct msm_display_topology topology; struct dpu_global_state *global_state; + struct drm_framebuffer *fb; struct drm_dsc_config *dsc; int i = 0; int ret = 0; @@ -624,6 +627,22 @@ static int dpu_encoder_virt_atomic_check( topology = dpu_encoder_get_topology(dpu_enc, dpu_kms, adj_mode, crtc_state, dsc); + /* + * Use CDM only for writeback at the moment as other interfaces cannot handle it. + * if writeback itself cannot handle cdm for some reason it will fail in its atomic_check() + * earlier. + */ + if (dpu_enc->disp_info.intf_type == INTF_WB && conn_state->writeback_job) { + fb = conn_state->writeback_job->fb; + + if (fb && DPU_FORMAT_IS_YUV(to_dpu_format(msm_framebuffer_format(fb)))) + topology.needs_cdm = true; + if (topology.needs_cdm && !dpu_enc->cur_master->hw_cdm) + crtc_state->mode_changed = true; + else if (!topology.needs_cdm && dpu_enc->cur_master->hw_cdm) + crtc_state->mode_changed = true; + } + /* * Release and Allocate resources on every modeset * Dont allocate when active is false. @@ -1064,6 +1083,15 @@ static void dpu_encoder_virt_atomic_mode_set(struct drm_encoder *drm_enc, dpu_enc->dsc_mask = dsc_mask; + if (dpu_enc->disp_info.intf_type == INTF_WB && conn_state->writeback_job) { + struct dpu_hw_blk *hw_cdm = NULL; + + dpu_rm_get_assigned_resources(&dpu_kms->rm, global_state, + drm_enc->base.id, DPU_HW_BLK_CDM, + &hw_cdm, 1); + dpu_enc->cur_master->hw_cdm = hw_cdm ? to_dpu_hw_cdm(hw_cdm) : NULL; + } + cstate = to_dpu_crtc_state(crtc_state); for (i = 0; i < num_lm; i++) { @@ -2052,6 +2080,15 @@ void dpu_encoder_helper_phys_cleanup(struct dpu_encoder_phys *phys_enc) phys_enc->hw_pp->merge_3d->idx); } + if (phys_enc->hw_cdm) { + if (phys_enc->hw_cdm->ops.bind_pingpong_blk && phys_enc->hw_pp) + phys_enc->hw_cdm->ops.bind_pingpong_blk(phys_enc->hw_cdm, + PINGPONG_NONE); + if (phys_enc->hw_ctl->ops.update_pending_flush_cdm) + phys_enc->hw_ctl->ops.update_pending_flush_cdm(phys_enc->hw_ctl, + phys_enc->hw_cdm->idx); + } + if (dpu_enc->dsc) { dpu_encoder_unprep_dsc(dpu_enc); dpu_enc->dsc = NULL; -- cgit v1.2.3 From 8c16b988ba2d3dfc38fc8def0a9ccaefa50debd4 Mon Sep 17 00:00:00 2001 From: Abhinav Kumar Date: Tue, 12 Dec 2023 12:52:52 -0800 Subject: drm/msm/dpu: introduce separate wb2_format arrays for rgb and yuv Lets rename the existing wb2_formats array wb2_formats_rgb to indicate that it has only RGB formats and can be used on any chipset having a WB block. Introduce a new wb2_formats_rgb_yuv array to the catalog to indicate support for YUV formats to writeback in addition to RGB. Chipsets which have support for CDM block will use the newly added wb2_formats_rgb_yuv array. changes in v3: - change type of wb2_formats_rgb/wb2_formats_rgb_yuv to u32 to fix checkpatch warnings Signed-off-by: Abhinav Kumar Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/571837/ Link: https://lore.kernel.org/r/20231212205254.12422-15-quic_abhinavk@quicinc.com [DB: fixed newer catalog entries] Signed-off-by: Dmitry Baryshkov --- .../drm/msm/disp/dpu1/catalog/dpu_10_0_sm8650.h | 4 +-- .../gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h | 4 +-- .../gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h | 4 +-- .../gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h | 4 +-- .../gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h | 4 +-- .../gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h | 4 +-- .../gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h | 4 +-- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c | 37 +++++++++++++++++++++- 8 files changed, 50 insertions(+), 15 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_10_0_sm8650.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_10_0_sm8650.h index 04d2a73dd942..eb5dfff2ec4f 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_10_0_sm8650.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_10_0_sm8650.h @@ -341,8 +341,8 @@ static const struct dpu_wb_cfg sm8650_wb[] = { .name = "wb_2", .id = WB_2, .base = 0x65000, .len = 0x2c8, .features = WB_SM8250_MASK, - .format_list = wb2_formats, - .num_formats = ARRAY_SIZE(wb2_formats), + .format_list = wb2_formats_rgb, + .num_formats = ARRAY_SIZE(wb2_formats_rgb), .xin_id = 6, .vbif_idx = VBIF_RT, .maxlinewidth = 4096, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h index 58b0f50518c8..a57d50b1f028 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h @@ -336,8 +336,8 @@ static const struct dpu_wb_cfg sm8250_wb[] = { .name = "wb_2", .id = WB_2, .base = 0x65000, .len = 0x2c8, .features = WB_SM8250_MASK, - .format_list = wb2_formats, - .num_formats = ARRAY_SIZE(wb2_formats), + .format_list = wb2_formats_rgb_yuv, + .num_formats = ARRAY_SIZE(wb2_formats_rgb_yuv), .clk_ctrl = DPU_CLK_CTRL_WB2, .xin_id = 6, .vbif_idx = VBIF_RT, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h index bcfedfc8251a..7382ebb6e5b2 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h @@ -157,8 +157,8 @@ static const struct dpu_wb_cfg sc7180_wb[] = { .name = "wb_2", .id = WB_2, .base = 0x65000, .len = 0x2c8, .features = WB_SM8250_MASK, - .format_list = wb2_formats, - .num_formats = ARRAY_SIZE(wb2_formats), + .format_list = wb2_formats_rgb, + .num_formats = ARRAY_SIZE(wb2_formats_rgb), .clk_ctrl = DPU_CLK_CTRL_WB2, .xin_id = 6, .vbif_idx = VBIF_RT, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h index 194239de393e..aced16e350da 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h @@ -303,8 +303,8 @@ static const struct dpu_wb_cfg sm8350_wb[] = { .name = "wb_2", .id = WB_2, .base = 0x65000, .len = 0x2c8, .features = WB_SM8250_MASK, - .format_list = wb2_formats, - .num_formats = ARRAY_SIZE(wb2_formats), + .format_list = wb2_formats_rgb, + .num_formats = ARRAY_SIZE(wb2_formats_rgb), .clk_ctrl = DPU_CLK_CTRL_WB2, .xin_id = 6, .vbif_idx = VBIF_RT, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h index 19c2b7454796..2f153e0b5c6a 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h @@ -169,8 +169,8 @@ static const struct dpu_wb_cfg sc7280_wb[] = { .name = "wb_2", .id = WB_2, .base = 0x65000, .len = 0x2c8, .features = WB_SM8250_MASK, - .format_list = wb2_formats, - .num_formats = ARRAY_SIZE(wb2_formats), + .format_list = wb2_formats_rgb_yuv, + .num_formats = ARRAY_SIZE(wb2_formats_rgb_yuv), .clk_ctrl = DPU_CLK_CTRL_WB2, .xin_id = 6, .vbif_idx = VBIF_RT, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h index a8a1d65a93af..a1779c5597ae 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h @@ -321,8 +321,8 @@ static const struct dpu_wb_cfg sm8450_wb[] = { .name = "wb_2", .id = WB_2, .base = 0x65000, .len = 0x2c8, .features = WB_SM8250_MASK, - .format_list = wb2_formats, - .num_formats = ARRAY_SIZE(wb2_formats), + .format_list = wb2_formats_rgb, + .num_formats = ARRAY_SIZE(wb2_formats_rgb), .clk_ctrl = DPU_CLK_CTRL_WB2, .xin_id = 6, .vbif_idx = VBIF_RT, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h index bf56265967c0..ad48defa154f 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h @@ -315,8 +315,8 @@ static const struct dpu_wb_cfg sm8550_wb[] = { .name = "wb_2", .id = WB_2, .base = 0x65000, .len = 0x2c8, .features = WB_SM8250_MASK, - .format_list = wb2_formats, - .num_formats = ARRAY_SIZE(wb2_formats), + .format_list = wb2_formats_rgb, + .num_formats = ARRAY_SIZE(wb2_formats_rgb), .xin_id = 6, .vbif_idx = VBIF_RT, .maxlinewidth = 4096, diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c index b304bebedb84..54e8717403a0 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c @@ -202,7 +202,7 @@ static const u32 rotation_v2_formats[] = { /* TODO add formats after validation */ }; -static const uint32_t wb2_formats[] = { +static const u32 wb2_formats_rgb[] = { DRM_FORMAT_RGB565, DRM_FORMAT_BGR565, DRM_FORMAT_RGB888, @@ -236,6 +236,41 @@ static const uint32_t wb2_formats[] = { DRM_FORMAT_XBGR4444, }; +static const u32 wb2_formats_rgb_yuv[] = { + DRM_FORMAT_RGB565, + DRM_FORMAT_BGR565, + DRM_FORMAT_RGB888, + DRM_FORMAT_ARGB8888, + DRM_FORMAT_RGBA8888, + DRM_FORMAT_ABGR8888, + DRM_FORMAT_XRGB8888, + DRM_FORMAT_RGBX8888, + DRM_FORMAT_XBGR8888, + DRM_FORMAT_ARGB1555, + DRM_FORMAT_RGBA5551, + DRM_FORMAT_XRGB1555, + DRM_FORMAT_RGBX5551, + DRM_FORMAT_ARGB4444, + DRM_FORMAT_RGBA4444, + DRM_FORMAT_RGBX4444, + DRM_FORMAT_XRGB4444, + DRM_FORMAT_BGR565, + DRM_FORMAT_BGR888, + DRM_FORMAT_ABGR8888, + DRM_FORMAT_BGRA8888, + DRM_FORMAT_BGRX8888, + DRM_FORMAT_XBGR8888, + DRM_FORMAT_ABGR1555, + DRM_FORMAT_BGRA5551, + DRM_FORMAT_XBGR1555, + DRM_FORMAT_BGRX5551, + DRM_FORMAT_ABGR4444, + DRM_FORMAT_BGRA4444, + DRM_FORMAT_BGRX4444, + DRM_FORMAT_XBGR4444, + DRM_FORMAT_NV12, +}; + /************************************************************* * SSPP sub blocks config *************************************************************/ -- cgit v1.2.3 From 341fb24a67663ec0e6b8cd012a3892d92d133349 Mon Sep 17 00:00:00 2001 From: Abhinav Kumar Date: Tue, 12 Dec 2023 12:52:53 -0800 Subject: drm/msm/dpu: add cdm blocks to dpu snapshot Now that CDM block support has been added to DPU lets also add its entry to the DPU snapshot to help debugging. Signed-off-by: Abhinav Kumar Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/571835/ Link: https://lore.kernel.org/r/20231212205254.12422-16-quic_abhinavk@quicinc.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c index d60edb93d4f7..723cc1d82143 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c @@ -944,6 +944,10 @@ static void dpu_kms_mdp_snapshot(struct msm_disp_state *disp_state, struct msm_k } } + if (cat->cdm) + msm_disp_snapshot_add_block(disp_state, cat->cdm->len, + dpu_kms->mmio + cat->cdm->base, cat->cdm->name); + pm_runtime_put_sync(&dpu_kms->pdev->dev); } -- cgit v1.2.3 From 45284ff733e4caf6c118aae5131eb7e7cf3eea5a Mon Sep 17 00:00:00 2001 From: Paloma Arellano Date: Tue, 12 Dec 2023 15:10:58 -0800 Subject: drm/msm/dpu: Add mutex lock in control vblank irq Add a mutex lock to control vblank irq to synchronize vblank enable/disable operations happening from different threads to prevent race conditions while registering/unregistering the vblank irq callback. v4: -Removed vblank_ctl_lock from dpu_encoder_virt, so it is only a parameter of dpu_encoder_phys. -Switch from atomic refcnt to a simple int counter as mutex has now been added v3: Mistakenly did not change wording in last version. It is done now. v2: Slightly changed wording of commit message Signed-off-by: Paloma Arellano Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/571854/ Link: https://lore.kernel.org/r/20231212231101.9240-2-quic_parellan@quicinc.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 1 - drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h | 4 ++- .../gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c | 32 ++++++++++++++------- .../gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c | 33 ++++++++++++++-------- 4 files changed, 47 insertions(+), 23 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index 54042754f011..000d13fad0f7 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -2464,7 +2464,6 @@ void dpu_encoder_phys_init(struct dpu_encoder_phys *phys_enc, phys_enc->enc_spinlock = p->enc_spinlock; phys_enc->enable_state = DPU_ENC_DISABLED; - atomic_set(&phys_enc->vblank_refcount, 0); atomic_set(&phys_enc->pending_kickoff_cnt, 0); atomic_set(&phys_enc->pending_ctlstart_cnt, 0); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h index bdb89cbbcfb8..993f26343331 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h @@ -155,6 +155,7 @@ enum dpu_intr_idx { * @hw_cdm: Hardware interface to the CDM registers * @dpu_kms: Pointer to the dpu_kms top level * @cached_mode: DRM mode cached at mode_set time, acted on in enable + * @vblank_ctl_lock: Vblank ctl mutex lock to protect vblank_refcount * @enabled: Whether the encoder has enabled and running a mode * @split_role: Role to play in a split-panel configuration * @intf_mode: Interface mode @@ -184,11 +185,12 @@ struct dpu_encoder_phys { struct dpu_hw_cdm *hw_cdm; struct dpu_kms *dpu_kms; struct drm_display_mode cached_mode; + struct mutex vblank_ctl_lock; enum dpu_enc_split_role split_role; enum dpu_intf_mode intf_mode; spinlock_t *enc_spinlock; enum dpu_enc_enable_state enable_state; - atomic_t vblank_refcount; + int vblank_refcount; atomic_t vsync_cnt; atomic_t underrun_cnt; atomic_t pending_ctlstart_cnt; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c index 76f1f66e41cd..a301e2833177 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c @@ -246,7 +246,8 @@ static int dpu_encoder_phys_cmd_control_vblank_irq( return -EINVAL; } - refcount = atomic_read(&phys_enc->vblank_refcount); + mutex_lock(&phys_enc->vblank_ctl_lock); + refcount = phys_enc->vblank_refcount; /* Slave encoders don't report vblank */ if (!dpu_encoder_phys_cmd_is_master(phys_enc)) @@ -262,16 +263,24 @@ static int dpu_encoder_phys_cmd_control_vblank_irq( phys_enc->hw_pp->idx - PINGPONG_0, enable ? "true" : "false", refcount); - if (enable && atomic_inc_return(&phys_enc->vblank_refcount) == 1) - ret = dpu_core_irq_register_callback(phys_enc->dpu_kms, - phys_enc->irq[INTR_IDX_RDPTR], - dpu_encoder_phys_cmd_te_rd_ptr_irq, - phys_enc); - else if (!enable && atomic_dec_return(&phys_enc->vblank_refcount) == 0) - ret = dpu_core_irq_unregister_callback(phys_enc->dpu_kms, - phys_enc->irq[INTR_IDX_RDPTR]); + if (enable) { + if (phys_enc->vblank_refcount == 0) + ret = dpu_core_irq_register_callback(phys_enc->dpu_kms, + phys_enc->irq[INTR_IDX_RDPTR], + dpu_encoder_phys_cmd_te_rd_ptr_irq, + phys_enc); + if (!ret) + phys_enc->vblank_refcount++; + } else if (!enable) { + if (phys_enc->vblank_refcount == 1) + ret = dpu_core_irq_unregister_callback(phys_enc->dpu_kms, + phys_enc->irq[INTR_IDX_RDPTR]); + if (!ret) + phys_enc->vblank_refcount--; + } end: + mutex_unlock(&phys_enc->vblank_ctl_lock); if (ret) { DRM_ERROR("vblank irq err id:%u pp:%d ret:%d, enable %s/%d\n", DRMID(phys_enc->parent), @@ -287,7 +296,7 @@ static void dpu_encoder_phys_cmd_irq_control(struct dpu_encoder_phys *phys_enc, { trace_dpu_enc_phys_cmd_irq_ctrl(DRMID(phys_enc->parent), phys_enc->hw_pp->idx - PINGPONG_0, - enable, atomic_read(&phys_enc->vblank_refcount)); + enable, phys_enc->vblank_refcount); if (enable) { dpu_core_irq_register_callback(phys_enc->dpu_kms, @@ -728,6 +737,9 @@ struct dpu_encoder_phys *dpu_encoder_phys_cmd_init(struct drm_device *dev, dpu_encoder_phys_init(phys_enc, p); + mutex_init(&phys_enc->vblank_ctl_lock); + phys_enc->vblank_refcount = 0; + dpu_encoder_phys_cmd_init_ops(&phys_enc->ops); phys_enc->intf_mode = INTF_MODE_CMD; cmd_enc->stream_sel = 0; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c index 2b4e5b5eff44..d0f56c5c4cce 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c @@ -366,7 +366,8 @@ static int dpu_encoder_phys_vid_control_vblank_irq( int ret = 0; int refcount; - refcount = atomic_read(&phys_enc->vblank_refcount); + mutex_lock(&phys_enc->vblank_ctl_lock); + refcount = phys_enc->vblank_refcount; /* Slave encoders don't report vblank */ if (!dpu_encoder_phys_vid_is_master(phys_enc)) @@ -379,18 +380,26 @@ static int dpu_encoder_phys_vid_control_vblank_irq( } DRM_DEBUG_VBL("id:%u enable=%d/%d\n", DRMID(phys_enc->parent), enable, - atomic_read(&phys_enc->vblank_refcount)); + refcount); - if (enable && atomic_inc_return(&phys_enc->vblank_refcount) == 1) - ret = dpu_core_irq_register_callback(phys_enc->dpu_kms, - phys_enc->irq[INTR_IDX_VSYNC], - dpu_encoder_phys_vid_vblank_irq, - phys_enc); - else if (!enable && atomic_dec_return(&phys_enc->vblank_refcount) == 0) - ret = dpu_core_irq_unregister_callback(phys_enc->dpu_kms, - phys_enc->irq[INTR_IDX_VSYNC]); + if (enable) { + if (phys_enc->vblank_refcount == 0) + ret = dpu_core_irq_register_callback(phys_enc->dpu_kms, + phys_enc->irq[INTR_IDX_VSYNC], + dpu_encoder_phys_vid_vblank_irq, + phys_enc); + if (!ret) + phys_enc->vblank_refcount++; + } else if (!enable) { + if (phys_enc->vblank_refcount == 1) + ret = dpu_core_irq_unregister_callback(phys_enc->dpu_kms, + phys_enc->irq[INTR_IDX_VSYNC]); + if (!ret) + phys_enc->vblank_refcount--; + } end: + mutex_unlock(&phys_enc->vblank_ctl_lock); if (ret) { DRM_ERROR("failed: id:%u intf:%d ret:%d enable:%d refcnt:%d\n", DRMID(phys_enc->parent), @@ -614,7 +623,7 @@ static void dpu_encoder_phys_vid_irq_control(struct dpu_encoder_phys *phys_enc, trace_dpu_enc_phys_vid_irq_ctrl(DRMID(phys_enc->parent), phys_enc->hw_intf->idx - INTF_0, enable, - atomic_read(&phys_enc->vblank_refcount)); + phys_enc->vblank_refcount); if (enable) { ret = dpu_encoder_phys_vid_control_vblank_irq(phys_enc, true); @@ -707,6 +716,8 @@ struct dpu_encoder_phys *dpu_encoder_phys_vid_init(struct drm_device *dev, DPU_DEBUG_VIDENC(phys_enc, "\n"); dpu_encoder_phys_init(phys_enc, p); + mutex_init(&phys_enc->vblank_ctl_lock); + phys_enc->vblank_refcount = 0; dpu_encoder_phys_vid_init_ops(&phys_enc->ops); phys_enc->intf_mode = INTF_MODE_VIDEO; -- cgit v1.2.3 From aee797df03c69ae39822848ac36e8fd6ed41cf4e Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Wed, 13 Dec 2023 02:57:28 +0200 Subject: drm/msm/dpu: move CSC tables to dpu_hw_util.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move CSC tables out of the header file to fix following kind of warnings: In file included from drivers/gpu/drm/msm/disp/dpu1/dpu_hwio.h:8, from drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c:5: drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h:54:33: warning: ‘dpu_csc10_rgb2yuv_601l’ defined but not used [-Wunused-const-variable=] 54 | static const struct dpu_csc_cfg dpu_csc10_rgb2yuv_601l = { | ^~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h:39:33: warning: ‘dpu_csc10_YUV2RGB_601L’ defined but not used [-Wunused-const-variable=] 39 | static const struct dpu_csc_cfg dpu_csc10_YUV2RGB_601L = { | ^~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h:24:33: warning: ‘dpu_csc_YUV2RGB_601L’ defined but not used [-Wunused-const-variable=] 24 | static const struct dpu_csc_cfg dpu_csc_YUV2RGB_601L = { | ^~~~~~~~~~~~~~~~~~~~ Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/571869/ Link: https://lore.kernel.org/r/20231213005728.53060-1-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c | 44 +++++++++++++++++++++++++++++ drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h | 44 ++--------------------------- 2 files changed, 47 insertions(+), 41 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c index 0b05061e3e62..395fdcea28b9 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c @@ -561,3 +561,47 @@ bool dpu_hw_clk_force_ctrl(struct dpu_hw_blk_reg_map *c, return clk_forced_on; } + +#define TO_S15D16(_x_)((_x_) << 7) + +const struct dpu_csc_cfg dpu_csc_YUV2RGB_601L = { + { + /* S15.16 format */ + 0x00012A00, 0x00000000, 0x00019880, + 0x00012A00, 0xFFFF9B80, 0xFFFF3000, + 0x00012A00, 0x00020480, 0x00000000, + }, + /* signed bias */ + { 0xfff0, 0xff80, 0xff80,}, + { 0x0, 0x0, 0x0,}, + /* unsigned clamp */ + { 0x10, 0xeb, 0x10, 0xf0, 0x10, 0xf0,}, + { 0x00, 0xff, 0x00, 0xff, 0x00, 0xff,}, +}; + +const struct dpu_csc_cfg dpu_csc10_YUV2RGB_601L = { + { + /* S15.16 format */ + 0x00012A00, 0x00000000, 0x00019880, + 0x00012A00, 0xFFFF9B80, 0xFFFF3000, + 0x00012A00, 0x00020480, 0x00000000, + }, + /* signed bias */ + { 0xffc0, 0xfe00, 0xfe00,}, + { 0x0, 0x0, 0x0,}, + /* unsigned clamp */ + { 0x40, 0x3ac, 0x40, 0x3c0, 0x40, 0x3c0,}, + { 0x00, 0x3ff, 0x00, 0x3ff, 0x00, 0x3ff,}, +}; + +const struct dpu_csc_cfg dpu_csc10_rgb2yuv_601l = { + { + TO_S15D16(0x0083), TO_S15D16(0x0102), TO_S15D16(0x0032), + TO_S15D16(0x1fb5), TO_S15D16(0x1f6c), TO_S15D16(0x00e1), + TO_S15D16(0x00e1), TO_S15D16(0x1f45), TO_S15D16(0x1fdc) + }, + { 0x00, 0x00, 0x00 }, + { 0x0040, 0x0200, 0x0200 }, + { 0x000, 0x3ff, 0x000, 0x3ff, 0x000, 0x3ff }, + { 0x040, 0x3ac, 0x040, 0x3c0, 0x040, 0x3c0 }, +}; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h index c0aaad2023da..25cc13b57756 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h @@ -21,47 +21,9 @@ #define TO_S15D16(_x_)((_x_) << 7) -static const struct dpu_csc_cfg dpu_csc_YUV2RGB_601L = { - { - /* S15.16 format */ - 0x00012A00, 0x00000000, 0x00019880, - 0x00012A00, 0xFFFF9B80, 0xFFFF3000, - 0x00012A00, 0x00020480, 0x00000000, - }, - /* signed bias */ - { 0xfff0, 0xff80, 0xff80,}, - { 0x0, 0x0, 0x0,}, - /* unsigned clamp */ - { 0x10, 0xeb, 0x10, 0xf0, 0x10, 0xf0,}, - { 0x00, 0xff, 0x00, 0xff, 0x00, 0xff,}, -}; - -static const struct dpu_csc_cfg dpu_csc10_YUV2RGB_601L = { - { - /* S15.16 format */ - 0x00012A00, 0x00000000, 0x00019880, - 0x00012A00, 0xFFFF9B80, 0xFFFF3000, - 0x00012A00, 0x00020480, 0x00000000, - }, - /* signed bias */ - { 0xffc0, 0xfe00, 0xfe00,}, - { 0x0, 0x0, 0x0,}, - /* unsigned clamp */ - { 0x40, 0x3ac, 0x40, 0x3c0, 0x40, 0x3c0,}, - { 0x00, 0x3ff, 0x00, 0x3ff, 0x00, 0x3ff,}, -}; - -static const struct dpu_csc_cfg dpu_csc10_rgb2yuv_601l = { - { - TO_S15D16(0x0083), TO_S15D16(0x0102), TO_S15D16(0x0032), - TO_S15D16(0x1fb5), TO_S15D16(0x1f6c), TO_S15D16(0x00e1), - TO_S15D16(0x00e1), TO_S15D16(0x1f45), TO_S15D16(0x1fdc) - }, - { 0x00, 0x00, 0x00 }, - { 0x0040, 0x0200, 0x0200 }, - { 0x000, 0x3ff, 0x000, 0x3ff, 0x000, 0x3ff }, - { 0x040, 0x3ac, 0x040, 0x3c0, 0x040, 0x3c0 }, -}; +extern const struct dpu_csc_cfg dpu_csc_YUV2RGB_601L; +extern const struct dpu_csc_cfg dpu_csc10_YUV2RGB_601L; +extern const struct dpu_csc_cfg dpu_csc10_rgb2yuv_601l; /* * This is the common struct maintained by each sub block -- cgit v1.2.3 From 980fffd0c69e5df0f67ee089d405899d532aeeab Mon Sep 17 00:00:00 2001 From: Jessica Zhang Date: Wed, 13 Dec 2023 13:30:17 -0800 Subject: drm/msm/dpu: Set input_sel bit for INTF Set the input_sel bit for encoders as it was missed in the initial implementation. Reported-by: Rob Clark Closes: https://gitlab.freedesktop.org/drm/msm/-/issues/39 Fixes: 91143873a05d ("drm/msm/dpu: Add MISR register support for interface") Signed-off-by: Jessica Zhang Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/572007/ Link: https://lore.kernel.org/r/20231213-encoder-fixup-v4-1-6da6cd1bf118@quicinc.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c | 2 +- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c | 2 +- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c | 9 +++++++-- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h | 3 ++- 4 files changed, 11 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c index 0b6a0a7dcc39..226133af7840 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c @@ -322,7 +322,7 @@ static u32 dpu_hw_intf_get_line_count(struct dpu_hw_intf *intf) static void dpu_hw_intf_setup_misr(struct dpu_hw_intf *intf, bool enable, u32 frame_count) { - dpu_hw_setup_misr(&intf->hw, INTF_MISR_CTRL, enable, frame_count); + dpu_hw_setup_misr(&intf->hw, INTF_MISR_CTRL, enable, frame_count, 0x1); } static int dpu_hw_intf_collect_misr(struct dpu_hw_intf *intf, u32 *misr_value) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c index 25af52ab602f..bbc9756ecde9 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c @@ -85,7 +85,7 @@ static void dpu_hw_lm_setup_border_color(struct dpu_hw_mixer *ctx, static void dpu_hw_lm_setup_misr(struct dpu_hw_mixer *ctx, bool enable, u32 frame_count) { - dpu_hw_setup_misr(&ctx->hw, LM_MISR_CTRL, enable, frame_count); + dpu_hw_setup_misr(&ctx->hw, LM_MISR_CTRL, enable, frame_count, 0x0); } static int dpu_hw_lm_collect_misr(struct dpu_hw_mixer *ctx, u32 *misr_value) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c index 395fdcea28b9..6971ddc8679f 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c @@ -475,9 +475,13 @@ void _dpu_hw_setup_qos_lut(struct dpu_hw_blk_reg_map *c, u32 offset, cfg->danger_safe_en ? QOS_QOS_CTRL_DANGER_SAFE_EN : 0); } +/* + * note: Aside from encoders, input_sel should be set to 0x0 by default + */ void dpu_hw_setup_misr(struct dpu_hw_blk_reg_map *c, u32 misr_ctrl_offset, - bool enable, u32 frame_count) + bool enable, u32 frame_count, + u8 input_sel) { u32 config = 0; @@ -488,7 +492,8 @@ void dpu_hw_setup_misr(struct dpu_hw_blk_reg_map *c, if (enable) { config = (frame_count & MISR_FRAME_COUNT_MASK) | - MISR_CTRL_ENABLE | MISR_CTRL_FREE_RUN_MASK; + MISR_CTRL_ENABLE | MISR_CTRL_FREE_RUN_MASK | + ((input_sel & 0xF) << 24); DPU_REG_WRITE(c, misr_ctrl_offset, config); } else { diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h index 25cc13b57756..ddb3da883e32 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h @@ -363,7 +363,8 @@ void _dpu_hw_setup_qos_lut(struct dpu_hw_blk_reg_map *c, u32 offset, void dpu_hw_setup_misr(struct dpu_hw_blk_reg_map *c, u32 misr_ctrl_offset, bool enable, - u32 frame_count); + u32 frame_count, + u8 input_sel); int dpu_hw_collect_misr(struct dpu_hw_blk_reg_map *c, u32 misr_ctrl_offset, -- cgit v1.2.3 From 3313c23f3eab698bc6b904520ee608fc0f7b03d0 Mon Sep 17 00:00:00 2001 From: Jessica Zhang Date: Wed, 13 Dec 2023 13:30:18 -0800 Subject: drm/msm/dpu: Drop enable and frame_count parameters from dpu_hw_setup_misr() Drop the enable and frame_count parameters from dpu_hw_setup_misr() as they are always set to the same values. In addition, replace MISR_FRAME_COUNT_MASK with MISR_FRAME_COUNT as frame_count is always set to the same value. Fixes: 7b37523fb1d1 ("drm/msm/dpu: Move MISR methods to dpu_hw_util") Signed-off-by: Jessica Zhang Reviewed-by: Abhinav Kumar Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/572009/ Link: https://lore.kernel.org/r/20231213-encoder-fixup-v4-2-6da6cd1bf118@quicinc.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c | 4 ++-- drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 4 ++-- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c | 6 +++--- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h | 4 ++-- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c | 6 +++--- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.h | 3 ++- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c | 19 +++++-------------- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h | 9 +++------ 8 files changed, 22 insertions(+), 33 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c index a798c10036e1..88c2e51ab166 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. * Copyright (c) 2014-2021 The Linux Foundation. All rights reserved. * Copyright (C) 2013 Red Hat * Author: Rob Clark @@ -114,7 +114,7 @@ static void dpu_crtc_setup_lm_misr(struct dpu_crtc_state *crtc_state) continue; /* Calculate MISR over 1 frame */ - m->hw_lm->ops.setup_misr(m->hw_lm, true, 1); + m->hw_lm->ops.setup_misr(m->hw_lm); } } diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index 000d13fad0f7..83380bc92a00 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -2,7 +2,7 @@ /* * Copyright (C) 2013 Red Hat * Copyright (c) 2014-2018, 2020-2021 The Linux Foundation. All rights reserved. - * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. * * Author: Rob Clark */ @@ -262,7 +262,7 @@ void dpu_encoder_setup_misr(const struct drm_encoder *drm_enc) if (!phys->hw_intf || !phys->hw_intf->ops.setup_misr) continue; - phys->hw_intf->ops.setup_misr(phys->hw_intf, true, 1); + phys->hw_intf->ops.setup_misr(phys->hw_intf); } } diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c index 226133af7840..6bba531d6dc4 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. * Copyright (c) 2015-2018, The Linux Foundation. All rights reserved. */ @@ -320,9 +320,9 @@ static u32 dpu_hw_intf_get_line_count(struct dpu_hw_intf *intf) return DPU_REG_READ(c, INTF_LINE_COUNT); } -static void dpu_hw_intf_setup_misr(struct dpu_hw_intf *intf, bool enable, u32 frame_count) +static void dpu_hw_intf_setup_misr(struct dpu_hw_intf *intf) { - dpu_hw_setup_misr(&intf->hw, INTF_MISR_CTRL, enable, frame_count, 0x1); + dpu_hw_setup_misr(&intf->hw, INTF_MISR_CTRL, 0x1); } static int dpu_hw_intf_collect_misr(struct dpu_hw_intf *intf, u32 *misr_value) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h index 215401bb042e..0bd57a32144a 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. * Copyright (c) 2015-2018, The Linux Foundation. All rights reserved. */ @@ -95,7 +95,7 @@ struct dpu_hw_intf_ops { void (*bind_pingpong_blk)(struct dpu_hw_intf *intf, const enum dpu_pingpong pp); - void (*setup_misr)(struct dpu_hw_intf *intf, bool enable, u32 frame_count); + void (*setup_misr)(struct dpu_hw_intf *intf); int (*collect_misr)(struct dpu_hw_intf *intf, u32 *misr_value); // Tearcheck on INTF since DPU 5.0.0 diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c index bbc9756ecde9..1d3ccf3228c6 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. * Copyright (c) 2015-2021, The Linux Foundation. All rights reserved. */ @@ -83,9 +83,9 @@ static void dpu_hw_lm_setup_border_color(struct dpu_hw_mixer *ctx, } } -static void dpu_hw_lm_setup_misr(struct dpu_hw_mixer *ctx, bool enable, u32 frame_count) +static void dpu_hw_lm_setup_misr(struct dpu_hw_mixer *ctx) { - dpu_hw_setup_misr(&ctx->hw, LM_MISR_CTRL, enable, frame_count, 0x0); + dpu_hw_setup_misr(&ctx->hw, LM_MISR_CTRL, 0x0); } static int dpu_hw_lm_collect_misr(struct dpu_hw_mixer *ctx, u32 *misr_value) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.h index 8835fd106413..0a3381755249 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.h @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. * Copyright (c) 2015-2021, The Linux Foundation. All rights reserved. */ @@ -57,7 +58,7 @@ struct dpu_hw_lm_ops { /** * setup_misr: Enable/disable MISR */ - void (*setup_misr)(struct dpu_hw_mixer *ctx, bool enable, u32 frame_count); + void (*setup_misr)(struct dpu_hw_mixer *ctx); /** * collect_misr: Read MISR signature diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c index 6971ddc8679f..dd475827314e 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. * Copyright (c) 2015-2018, The Linux Foundation. All rights reserved. */ #define pr_fmt(fmt) "[drm:%s:%d] " fmt, __func__, __LINE__ @@ -479,9 +479,7 @@ void _dpu_hw_setup_qos_lut(struct dpu_hw_blk_reg_map *c, u32 offset, * note: Aside from encoders, input_sel should be set to 0x0 by default */ void dpu_hw_setup_misr(struct dpu_hw_blk_reg_map *c, - u32 misr_ctrl_offset, - bool enable, u32 frame_count, - u8 input_sel) + u32 misr_ctrl_offset, u8 input_sel) { u32 config = 0; @@ -490,16 +488,9 @@ void dpu_hw_setup_misr(struct dpu_hw_blk_reg_map *c, /* Clear old MISR value (in case it's read before a new value is calculated)*/ wmb(); - if (enable) { - config = (frame_count & MISR_FRAME_COUNT_MASK) | - MISR_CTRL_ENABLE | MISR_CTRL_FREE_RUN_MASK | - ((input_sel & 0xF) << 24); - - DPU_REG_WRITE(c, misr_ctrl_offset, config); - } else { - DPU_REG_WRITE(c, misr_ctrl_offset, 0); - } - + config = MISR_FRAME_COUNT | MISR_CTRL_ENABLE | MISR_CTRL_FREE_RUN_MASK | + ((input_sel & 0xF) << 24); + DPU_REG_WRITE(c, misr_ctrl_offset, config); } int dpu_hw_collect_misr(struct dpu_hw_blk_reg_map *c, diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h index ddb3da883e32..64ded69fa903 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. * Copyright (c) 2015-2021, The Linux Foundation. All rights reserved. */ @@ -13,7 +13,7 @@ #include "dpu_hw_catalog.h" #define REG_MASK(n) ((BIT(n)) - 1) -#define MISR_FRAME_COUNT_MASK 0xFF +#define MISR_FRAME_COUNT 0x1 #define MISR_CTRL_ENABLE BIT(8) #define MISR_CTRL_STATUS BIT(9) #define MISR_CTRL_STATUS_CLEAR BIT(10) @@ -361,10 +361,7 @@ void _dpu_hw_setup_qos_lut(struct dpu_hw_blk_reg_map *c, u32 offset, const struct dpu_hw_qos_cfg *cfg); void dpu_hw_setup_misr(struct dpu_hw_blk_reg_map *c, - u32 misr_ctrl_offset, - bool enable, - u32 frame_count, - u8 input_sel); + u32 misr_ctrl_offset, u8 input_sel); int dpu_hw_collect_misr(struct dpu_hw_blk_reg_map *c, u32 misr_ctrl_offset, -- cgit v1.2.3 From d4ca26ac4be0d9aea7005c40df75e6775749671b Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Tue, 7 Nov 2023 02:43:33 +0200 Subject: drm/msm/dp: call dp_display_get_next_bridge() during probe The funcion dp_display_get_next_bridge() can return -EPROBE_DEFER if the next bridge is not (yet) available. However returning -EPROBE_DEFER from msm_dp_modeset_init() is not ideal. This leads to -EPROBE return from component_bind, which can easily result in -EPROBE_DEFR loops. Signed-off-by: Dmitry Baryshkov Tested-by: Konrad Dybcio # sc8180x-primus Reviewed-by: Bjorn Andersson Reviewed-by: Kuogee Hsieh Patchwork: https://patchwork.freedesktop.org/patch/566208/ Link: https://lore.kernel.org/r/20231107004424.2112698-1-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/dp/dp_display.c | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index 0405ff08d762..d37d599aec27 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -1210,15 +1210,27 @@ static const struct msm_dp_desc *dp_display_get_desc(struct platform_device *pde return NULL; } -static int dp_auxbus_done_probe(struct drm_dp_aux *aux) +static int dp_display_get_next_bridge(struct msm_dp *dp); + +static int dp_display_probe_tail(struct device *dev) { - int rc; + struct msm_dp *dp = dev_get_drvdata(dev); + int ret; - rc = component_add(aux->dev, &dp_display_comp_ops); - if (rc) - DRM_ERROR("eDP component add failed, rc=%d\n", rc); + ret = dp_display_get_next_bridge(dp); + if (ret) + return ret; - return rc; + ret = component_add(dev, &dp_display_comp_ops); + if (ret) + DRM_ERROR("component add failed, rc=%d\n", ret); + + return ret; +} + +static int dp_auxbus_done_probe(struct drm_dp_aux *aux) +{ + return dp_display_probe_tail(aux->dev); } static int dp_display_probe(struct platform_device *pdev) @@ -1293,11 +1305,9 @@ static int dp_display_probe(struct platform_device *pdev) goto err; } } else { - rc = component_add(&pdev->dev, &dp_display_comp_ops); - if (rc) { - DRM_ERROR("component add failed, rc=%d\n", rc); + rc = dp_display_probe_tail(&pdev->dev); + if (rc) goto err; - } } return rc; @@ -1428,7 +1438,7 @@ static int dp_display_get_next_bridge(struct msm_dp *dp) * For DisplayPort interfaces external bridges are optional, so * silently ignore an error if one is not present (-ENODEV). */ - rc = devm_dp_parser_find_next_bridge(dp->drm_dev->dev, dp_priv->parser); + rc = devm_dp_parser_find_next_bridge(&dp->pdev->dev, dp_priv->parser); if (!dp->is_edp && rc == -ENODEV) return 0; @@ -1448,10 +1458,6 @@ int msm_dp_modeset_init(struct msm_dp *dp_display, struct drm_device *dev, dp_priv = container_of(dp_display, struct dp_display_private, dp_display); - ret = dp_display_get_next_bridge(dp_display); - if (ret) - return ret; - ret = dp_bridge_init(dp_display, dev, encoder); if (ret) { DRM_DEV_ERROR(dev->dev, -- cgit v1.2.3 From a7430e2bf950394cc44b523184338b092dc6aef6 Mon Sep 17 00:00:00 2001 From: Hsiao Chien Sung Date: Thu, 14 Dec 2023 13:58:35 +0800 Subject: drm/mediatek: Rename OVL_ADAPTOR_TYPE_RDMA Rename OVL_ADAPTOR_TYPE_RDMA to OVL_ADAPTOR_TYPE_MDP_RDMA to align the naming rule of mtk_ovl_adaptor_comp_id. Reviewed-by: CK Hu Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Hsiao Chien Sung Link: https://patchwork.kernel.org/project/dri-devel/patch/20231214055847.4936-12-shawn.sung@mediatek.com/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c index 6bf6367853fb..114eded8177e 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c @@ -27,7 +27,7 @@ #define MTK_OVL_ADAPTOR_LAYER_NUM 4 enum mtk_ovl_adaptor_comp_type { - OVL_ADAPTOR_TYPE_RDMA = 0, + OVL_ADAPTOR_TYPE_MDP_RDMA = 0, OVL_ADAPTOR_TYPE_MERGE, OVL_ADAPTOR_TYPE_ETHDR, OVL_ADAPTOR_TYPE_NUM, @@ -62,20 +62,20 @@ struct mtk_disp_ovl_adaptor { }; static const char * const private_comp_stem[OVL_ADAPTOR_TYPE_NUM] = { - [OVL_ADAPTOR_TYPE_RDMA] = "vdo1-rdma", + [OVL_ADAPTOR_TYPE_MDP_RDMA] = "vdo1-rdma", [OVL_ADAPTOR_TYPE_MERGE] = "merge", [OVL_ADAPTOR_TYPE_ETHDR] = "ethdr", }; static const struct ovl_adaptor_comp_match comp_matches[OVL_ADAPTOR_ID_MAX] = { - [OVL_ADAPTOR_MDP_RDMA0] = { OVL_ADAPTOR_TYPE_RDMA, 0 }, - [OVL_ADAPTOR_MDP_RDMA1] = { OVL_ADAPTOR_TYPE_RDMA, 1 }, - [OVL_ADAPTOR_MDP_RDMA2] = { OVL_ADAPTOR_TYPE_RDMA, 2 }, - [OVL_ADAPTOR_MDP_RDMA3] = { OVL_ADAPTOR_TYPE_RDMA, 3 }, - [OVL_ADAPTOR_MDP_RDMA4] = { OVL_ADAPTOR_TYPE_RDMA, 4 }, - [OVL_ADAPTOR_MDP_RDMA5] = { OVL_ADAPTOR_TYPE_RDMA, 5 }, - [OVL_ADAPTOR_MDP_RDMA6] = { OVL_ADAPTOR_TYPE_RDMA, 6 }, - [OVL_ADAPTOR_MDP_RDMA7] = { OVL_ADAPTOR_TYPE_RDMA, 7 }, + [OVL_ADAPTOR_MDP_RDMA0] = { OVL_ADAPTOR_TYPE_MDP_RDMA, 0 }, + [OVL_ADAPTOR_MDP_RDMA1] = { OVL_ADAPTOR_TYPE_MDP_RDMA, 1 }, + [OVL_ADAPTOR_MDP_RDMA2] = { OVL_ADAPTOR_TYPE_MDP_RDMA, 2 }, + [OVL_ADAPTOR_MDP_RDMA3] = { OVL_ADAPTOR_TYPE_MDP_RDMA, 3 }, + [OVL_ADAPTOR_MDP_RDMA4] = { OVL_ADAPTOR_TYPE_MDP_RDMA, 4 }, + [OVL_ADAPTOR_MDP_RDMA5] = { OVL_ADAPTOR_TYPE_MDP_RDMA, 5 }, + [OVL_ADAPTOR_MDP_RDMA6] = { OVL_ADAPTOR_TYPE_MDP_RDMA, 6 }, + [OVL_ADAPTOR_MDP_RDMA7] = { OVL_ADAPTOR_TYPE_MDP_RDMA, 7 }, [OVL_ADAPTOR_MERGE0] = { OVL_ADAPTOR_TYPE_MERGE, 1 }, [OVL_ADAPTOR_MERGE1] = { OVL_ADAPTOR_TYPE_MERGE, 2 }, [OVL_ADAPTOR_MERGE2] = { OVL_ADAPTOR_TYPE_MERGE, 3 }, @@ -388,7 +388,7 @@ static int ovl_adaptor_comp_get_id(struct device *dev, struct device_node *node, static const struct of_device_id mtk_ovl_adaptor_comp_dt_ids[] = { { .compatible = "mediatek,mt8195-vdo1-rdma", - .data = (void *)OVL_ADAPTOR_TYPE_RDMA, + .data = (void *)OVL_ADAPTOR_TYPE_MDP_RDMA, }, { .compatible = "mediatek,mt8195-disp-merge", .data = (void *)OVL_ADAPTOR_TYPE_MERGE, -- cgit v1.2.3 From 8daf02f03ca4cf70b1f985293b8515252f9767aa Mon Sep 17 00:00:00 2001 From: Hsiao Chien Sung Date: Thu, 14 Dec 2023 13:58:36 +0800 Subject: drm/mediatek: Add component ID to component match structure Add component ID to component match structure so we can configure them with a for-loop. The main reason we do such code refactoring is that there is a new hardware component called "Padding" since MT8188, while MT8195 doesn't have this module, we can't use the original logic to manage the components. While MT8195 does not define Padding in the device tree, the corresponding components will be NULL and being skipped by the functions. Reviewed-by: CK Hu Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Hsiao Chien Sung Link: https://patchwork.kernel.org/project/dri-devel/patch/20231214055847.4936-13-shawn.sung@mediatek.com/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c | 69 +++++++++++-------------- 1 file changed, 30 insertions(+), 39 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c index 114eded8177e..9f569d0f52b3 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c @@ -52,6 +52,7 @@ enum mtk_ovl_adaptor_comp_id { struct ovl_adaptor_comp_match { enum mtk_ovl_adaptor_comp_type type; + enum mtk_ddp_comp_id comp_id; int alias_id; }; @@ -68,19 +69,19 @@ static const char * const private_comp_stem[OVL_ADAPTOR_TYPE_NUM] = { }; static const struct ovl_adaptor_comp_match comp_matches[OVL_ADAPTOR_ID_MAX] = { - [OVL_ADAPTOR_MDP_RDMA0] = { OVL_ADAPTOR_TYPE_MDP_RDMA, 0 }, - [OVL_ADAPTOR_MDP_RDMA1] = { OVL_ADAPTOR_TYPE_MDP_RDMA, 1 }, - [OVL_ADAPTOR_MDP_RDMA2] = { OVL_ADAPTOR_TYPE_MDP_RDMA, 2 }, - [OVL_ADAPTOR_MDP_RDMA3] = { OVL_ADAPTOR_TYPE_MDP_RDMA, 3 }, - [OVL_ADAPTOR_MDP_RDMA4] = { OVL_ADAPTOR_TYPE_MDP_RDMA, 4 }, - [OVL_ADAPTOR_MDP_RDMA5] = { OVL_ADAPTOR_TYPE_MDP_RDMA, 5 }, - [OVL_ADAPTOR_MDP_RDMA6] = { OVL_ADAPTOR_TYPE_MDP_RDMA, 6 }, - [OVL_ADAPTOR_MDP_RDMA7] = { OVL_ADAPTOR_TYPE_MDP_RDMA, 7 }, - [OVL_ADAPTOR_MERGE0] = { OVL_ADAPTOR_TYPE_MERGE, 1 }, - [OVL_ADAPTOR_MERGE1] = { OVL_ADAPTOR_TYPE_MERGE, 2 }, - [OVL_ADAPTOR_MERGE2] = { OVL_ADAPTOR_TYPE_MERGE, 3 }, - [OVL_ADAPTOR_MERGE3] = { OVL_ADAPTOR_TYPE_MERGE, 4 }, - [OVL_ADAPTOR_ETHDR0] = { OVL_ADAPTOR_TYPE_ETHDR, 0 }, + [OVL_ADAPTOR_MDP_RDMA0] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA0, 0 }, + [OVL_ADAPTOR_MDP_RDMA1] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA1, 1 }, + [OVL_ADAPTOR_MDP_RDMA2] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA2, 2 }, + [OVL_ADAPTOR_MDP_RDMA3] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA3, 3 }, + [OVL_ADAPTOR_MDP_RDMA4] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA4, 4 }, + [OVL_ADAPTOR_MDP_RDMA5] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA5, 5 }, + [OVL_ADAPTOR_MDP_RDMA6] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA6, 6 }, + [OVL_ADAPTOR_MDP_RDMA7] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA7, 7 }, + [OVL_ADAPTOR_MERGE0] = { OVL_ADAPTOR_TYPE_MERGE, DDP_COMPONENT_MERGE1, 1 }, + [OVL_ADAPTOR_MERGE1] = { OVL_ADAPTOR_TYPE_MERGE, DDP_COMPONENT_MERGE2, 2 }, + [OVL_ADAPTOR_MERGE2] = { OVL_ADAPTOR_TYPE_MERGE, DDP_COMPONENT_MERGE3, 3 }, + [OVL_ADAPTOR_MERGE3] = { OVL_ADAPTOR_TYPE_MERGE, DDP_COMPONENT_MERGE4, 4 }, + [OVL_ADAPTOR_ETHDR0] = { OVL_ADAPTOR_TYPE_ETHDR, DDP_COMPONENT_ETHDR_MIXER, 0 }, }; void mtk_ovl_adaptor_layer_config(struct device *dev, unsigned int idx, @@ -314,36 +315,26 @@ size_t mtk_ovl_adaptor_get_num_formats(struct device *dev) void mtk_ovl_adaptor_add_comp(struct device *dev, struct mtk_mutex *mutex) { - mtk_mutex_add_comp(mutex, DDP_COMPONENT_MDP_RDMA0); - mtk_mutex_add_comp(mutex, DDP_COMPONENT_MDP_RDMA1); - mtk_mutex_add_comp(mutex, DDP_COMPONENT_MDP_RDMA2); - mtk_mutex_add_comp(mutex, DDP_COMPONENT_MDP_RDMA3); - mtk_mutex_add_comp(mutex, DDP_COMPONENT_MDP_RDMA4); - mtk_mutex_add_comp(mutex, DDP_COMPONENT_MDP_RDMA5); - mtk_mutex_add_comp(mutex, DDP_COMPONENT_MDP_RDMA6); - mtk_mutex_add_comp(mutex, DDP_COMPONENT_MDP_RDMA7); - mtk_mutex_add_comp(mutex, DDP_COMPONENT_MERGE1); - mtk_mutex_add_comp(mutex, DDP_COMPONENT_MERGE2); - mtk_mutex_add_comp(mutex, DDP_COMPONENT_MERGE3); - mtk_mutex_add_comp(mutex, DDP_COMPONENT_MERGE4); - mtk_mutex_add_comp(mutex, DDP_COMPONENT_ETHDR_MIXER); + int i; + struct mtk_disp_ovl_adaptor *ovl_adaptor = dev_get_drvdata(dev); + + for (i = 0; i < OVL_ADAPTOR_ID_MAX; i++) { + if (!ovl_adaptor->ovl_adaptor_comp[i]) + continue; + mtk_mutex_add_comp(mutex, comp_matches[i].comp_id); + } } void mtk_ovl_adaptor_remove_comp(struct device *dev, struct mtk_mutex *mutex) { - mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MDP_RDMA0); - mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MDP_RDMA1); - mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MDP_RDMA2); - mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MDP_RDMA3); - mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MDP_RDMA4); - mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MDP_RDMA5); - mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MDP_RDMA6); - mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MDP_RDMA7); - mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MERGE1); - mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MERGE2); - mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MERGE3); - mtk_mutex_remove_comp(mutex, DDP_COMPONENT_MERGE4); - mtk_mutex_remove_comp(mutex, DDP_COMPONENT_ETHDR_MIXER); + int i; + struct mtk_disp_ovl_adaptor *ovl_adaptor = dev_get_drvdata(dev); + + for (i = 0; i < OVL_ADAPTOR_ID_MAX; i++) { + if (!ovl_adaptor->ovl_adaptor_comp[i]) + continue; + mtk_mutex_remove_comp(mutex, comp_matches[i].comp_id); + } } void mtk_ovl_adaptor_connect(struct device *dev, struct device *mmsys_dev, unsigned int next) -- cgit v1.2.3 From 7bacaee4f5d0fdf00b1734c1be67cc415b3822c0 Mon Sep 17 00:00:00 2001 From: Hsiao Chien Sung Date: Thu, 14 Dec 2023 13:58:37 +0800 Subject: drm/mediatek: Manage component's clock with function pointers By registering component related functions to the pointers, we can easily manage them within a for-loop and simplify the logic of clock control significantly. Reviewed-by: CK Hu Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Hsiao Chien Sung Link: https://patchwork.kernel.org/project/dri-devel/patch/20231214055847.4936-14-shawn.sung@mediatek.com/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c | 89 ++++++++++++------------- 1 file changed, 43 insertions(+), 46 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c index 9f569d0f52b3..a815dc8e2110 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c @@ -54,6 +54,7 @@ struct ovl_adaptor_comp_match { enum mtk_ovl_adaptor_comp_type type; enum mtk_ddp_comp_id comp_id; int alias_id; + const struct mtk_ddp_comp_funcs *funcs; }; struct mtk_disp_ovl_adaptor { @@ -68,20 +69,35 @@ static const char * const private_comp_stem[OVL_ADAPTOR_TYPE_NUM] = { [OVL_ADAPTOR_TYPE_ETHDR] = "ethdr", }; +static const struct mtk_ddp_comp_funcs ethdr = { + .clk_enable = mtk_ethdr_clk_enable, + .clk_disable = mtk_ethdr_clk_disable, +}; + +static const struct mtk_ddp_comp_funcs merge = { + .clk_enable = mtk_merge_clk_enable, + .clk_disable = mtk_merge_clk_disable, +}; + +static const struct mtk_ddp_comp_funcs rdma = { + .clk_enable = mtk_mdp_rdma_clk_enable, + .clk_disable = mtk_mdp_rdma_clk_disable, +}; + static const struct ovl_adaptor_comp_match comp_matches[OVL_ADAPTOR_ID_MAX] = { - [OVL_ADAPTOR_MDP_RDMA0] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA0, 0 }, - [OVL_ADAPTOR_MDP_RDMA1] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA1, 1 }, - [OVL_ADAPTOR_MDP_RDMA2] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA2, 2 }, - [OVL_ADAPTOR_MDP_RDMA3] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA3, 3 }, - [OVL_ADAPTOR_MDP_RDMA4] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA4, 4 }, - [OVL_ADAPTOR_MDP_RDMA5] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA5, 5 }, - [OVL_ADAPTOR_MDP_RDMA6] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA6, 6 }, - [OVL_ADAPTOR_MDP_RDMA7] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA7, 7 }, - [OVL_ADAPTOR_MERGE0] = { OVL_ADAPTOR_TYPE_MERGE, DDP_COMPONENT_MERGE1, 1 }, - [OVL_ADAPTOR_MERGE1] = { OVL_ADAPTOR_TYPE_MERGE, DDP_COMPONENT_MERGE2, 2 }, - [OVL_ADAPTOR_MERGE2] = { OVL_ADAPTOR_TYPE_MERGE, DDP_COMPONENT_MERGE3, 3 }, - [OVL_ADAPTOR_MERGE3] = { OVL_ADAPTOR_TYPE_MERGE, DDP_COMPONENT_MERGE4, 4 }, - [OVL_ADAPTOR_ETHDR0] = { OVL_ADAPTOR_TYPE_ETHDR, DDP_COMPONENT_ETHDR_MIXER, 0 }, + [OVL_ADAPTOR_MDP_RDMA0] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA0, 0, &rdma }, + [OVL_ADAPTOR_MDP_RDMA1] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA1, 1, &rdma }, + [OVL_ADAPTOR_MDP_RDMA2] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA2, 2, &rdma }, + [OVL_ADAPTOR_MDP_RDMA3] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA3, 3, &rdma }, + [OVL_ADAPTOR_MDP_RDMA4] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA4, 4, &rdma }, + [OVL_ADAPTOR_MDP_RDMA5] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA5, 5, &rdma }, + [OVL_ADAPTOR_MDP_RDMA6] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA6, 6, &rdma }, + [OVL_ADAPTOR_MDP_RDMA7] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA7, 7, &rdma }, + [OVL_ADAPTOR_MERGE0] = { OVL_ADAPTOR_TYPE_MERGE, DDP_COMPONENT_MERGE1, 1, &merge }, + [OVL_ADAPTOR_MERGE1] = { OVL_ADAPTOR_TYPE_MERGE, DDP_COMPONENT_MERGE2, 2, &merge }, + [OVL_ADAPTOR_MERGE2] = { OVL_ADAPTOR_TYPE_MERGE, DDP_COMPONENT_MERGE3, 3, &merge }, + [OVL_ADAPTOR_MERGE3] = { OVL_ADAPTOR_TYPE_MERGE, DDP_COMPONENT_MERGE4, 4, &merge }, + [OVL_ADAPTOR_ETHDR0] = { OVL_ADAPTOR_TYPE_ETHDR, DDP_COMPONENT_ETHDR_MIXER, 0, ðdr }, }; void mtk_ovl_adaptor_layer_config(struct device *dev, unsigned int idx, @@ -197,40 +213,25 @@ int mtk_ovl_adaptor_clk_enable(struct device *dev) ret = pm_runtime_get_sync(comp); if (ret < 0) { dev_err(dev, "Failed to enable power domain %d, err %d\n", i, ret); - goto pwr_err; + goto error; } } for (i = 0; i < OVL_ADAPTOR_ID_MAX; i++) { comp = ovl_adaptor->ovl_adaptor_comp[i]; - - if (i < OVL_ADAPTOR_MERGE0) - ret = mtk_mdp_rdma_clk_enable(comp); - else if (i < OVL_ADAPTOR_ETHDR0) - ret = mtk_merge_clk_enable(comp); - else - ret = mtk_ethdr_clk_enable(comp); + if (!comp || !comp_matches[i].funcs->clk_enable) + continue; + ret = comp_matches[i].funcs->clk_enable(comp); if (ret) { dev_err(dev, "Failed to enable clock %d, err %d\n", i, ret); - goto clk_err; + while (--i >= 0) + comp_matches[i].funcs->clk_disable(comp); + i = OVL_ADAPTOR_MERGE0; + goto error; } } - - return ret; - -clk_err: - while (--i >= 0) { - comp = ovl_adaptor->ovl_adaptor_comp[i]; - if (i < OVL_ADAPTOR_MERGE0) - mtk_mdp_rdma_clk_disable(comp); - else if (i < OVL_ADAPTOR_ETHDR0) - mtk_merge_clk_disable(comp); - else - mtk_ethdr_clk_disable(comp); - } - i = OVL_ADAPTOR_MERGE0; - -pwr_err: + return 0; +error: while (--i >= 0) pm_runtime_put(ovl_adaptor->ovl_adaptor_comp[i]); @@ -245,15 +246,11 @@ void mtk_ovl_adaptor_clk_disable(struct device *dev) for (i = 0; i < OVL_ADAPTOR_ID_MAX; i++) { comp = ovl_adaptor->ovl_adaptor_comp[i]; - - if (i < OVL_ADAPTOR_MERGE0) { - mtk_mdp_rdma_clk_disable(comp); + if (!comp || !comp_matches[i].funcs->clk_disable) + continue; + comp_matches[i].funcs->clk_disable(comp); + if (i < OVL_ADAPTOR_MERGE0) pm_runtime_put(comp); - } else if (i < OVL_ADAPTOR_ETHDR0) { - mtk_merge_clk_disable(comp); - } else { - mtk_ethdr_clk_disable(comp); - } } } -- cgit v1.2.3 From b97fa2f3e19b9bbac934a2cfa9218aa67f12240e Mon Sep 17 00:00:00 2001 From: Hsiao Chien Sung Date: Thu, 14 Dec 2023 13:58:38 +0800 Subject: drm/mediatek: Power on/off devices with function pointers Different from OVL, OVL adaptor is a pseudo device so we didn't define it in the device tree, consequently, pm_runtime_resume_and_get() called by .atomic_enable() powers on no device. For this reason, we implement a function to power on the RDMAs in OVL adaptor, and the system will make sure the IOMMUs are powered on as well because of the device link (iommus) in the RDMA nodes in DTS. This patch separates power and clock management process, it would be easier to maintain and add extensions. Reviewed-by: CK Hu Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Hsiao Chien Sung Link: https://patchwork.kernel.org/project/dri-devel/patch/20231214055847.4936-15-shawn.sung@mediatek.com/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_disp_drv.h | 4 ++ drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c | 75 ++++++++++++++++++++----- drivers/gpu/drm/mediatek/mtk_drm_crtc.c | 10 ++-- drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c | 2 + drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h | 20 +++++++ drivers/gpu/drm/mediatek/mtk_mdp_rdma.c | 16 ++++++ 6 files changed, 107 insertions(+), 20 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/mediatek/mtk_disp_drv.h b/drivers/gpu/drm/mediatek/mtk_disp_drv.h index 1311562d25cc..2d426775b7ea 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_drv.h +++ b/drivers/gpu/drm/mediatek/mtk_disp_drv.h @@ -110,6 +110,8 @@ void mtk_ovl_adaptor_connect(struct device *dev, struct device *mmsys_dev, unsigned int next); void mtk_ovl_adaptor_disconnect(struct device *dev, struct device *mmsys_dev, unsigned int next); +int mtk_ovl_adaptor_power_on(struct device *dev); +void mtk_ovl_adaptor_power_off(struct device *dev); int mtk_ovl_adaptor_clk_enable(struct device *dev); void mtk_ovl_adaptor_clk_disable(struct device *dev); void mtk_ovl_adaptor_config(struct device *dev, unsigned int w, @@ -151,6 +153,8 @@ void mtk_rdma_disable_vblank(struct device *dev); const u32 *mtk_rdma_get_formats(struct device *dev); size_t mtk_rdma_get_num_formats(struct device *dev); +int mtk_mdp_rdma_power_on(struct device *dev); +void mtk_mdp_rdma_power_off(struct device *dev); int mtk_mdp_rdma_clk_enable(struct device *dev); void mtk_mdp_rdma_clk_disable(struct device *dev); void mtk_mdp_rdma_start(struct device *dev, struct cmdq_pkt *cmdq_pkt); diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c index a815dc8e2110..e00d8a395ca0 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c @@ -80,6 +80,8 @@ static const struct mtk_ddp_comp_funcs merge = { }; static const struct mtk_ddp_comp_funcs rdma = { + .power_on = mtk_mdp_rdma_power_on, + .power_off = mtk_mdp_rdma_power_off, .clk_enable = mtk_mdp_rdma_clk_enable, .clk_disable = mtk_mdp_rdma_clk_disable, }; @@ -201,21 +203,72 @@ void mtk_ovl_adaptor_stop(struct device *dev) mtk_ethdr_stop(ovl_adaptor->ovl_adaptor_comp[OVL_ADAPTOR_ETHDR0]); } -int mtk_ovl_adaptor_clk_enable(struct device *dev) +/** + * power_off - Power off the devices in OVL adaptor + * @dev: Device to be powered off + * @num: Number of the devices to be powered off + * + * Calls the .power_off() ovl_adaptor component callback if it is present. + */ +static inline void power_off(struct device *dev, int num) { struct mtk_disp_ovl_adaptor *ovl_adaptor = dev_get_drvdata(dev); - struct device *comp; - int ret; int i; - for (i = 0; i < OVL_ADAPTOR_MERGE0; i++) { - comp = ovl_adaptor->ovl_adaptor_comp[i]; - ret = pm_runtime_get_sync(comp); + if (num > OVL_ADAPTOR_ID_MAX) + num = OVL_ADAPTOR_ID_MAX; + + for (i = num - 1; i >= 0; i--) { + if (!ovl_adaptor->ovl_adaptor_comp[i] || + !comp_matches[i].funcs->power_off) + continue; + + comp_matches[i].funcs->power_off(ovl_adaptor->ovl_adaptor_comp[i]); + } +} + +/** + * mtk_ovl_adaptor_power_on - Power on the devices in OVL adaptor + * @dev: Device to be powered on + * + * Different from OVL, OVL adaptor is a pseudo device so + * we didn't define it in the device tree, pm_runtime_resume_and_get() + * called by .atomic_enable() power on no device in OVL adaptor, + * we have to implement a function to do the job instead. + * + * Return: Zero for success or negative number for failure. + */ +int mtk_ovl_adaptor_power_on(struct device *dev) +{ + struct mtk_disp_ovl_adaptor *ovl_adaptor = dev_get_drvdata(dev); + int i, ret; + + for (i = 0; i < OVL_ADAPTOR_ID_MAX; i++) { + if (!ovl_adaptor->ovl_adaptor_comp[i] || + !comp_matches[i].funcs->power_on) + continue; + + ret = comp_matches[i].funcs->power_on(ovl_adaptor->ovl_adaptor_comp[i]); if (ret < 0) { dev_err(dev, "Failed to enable power domain %d, err %d\n", i, ret); - goto error; + power_off(dev, i); + return ret; } } + return 0; +} + +void mtk_ovl_adaptor_power_off(struct device *dev) +{ + power_off(dev, OVL_ADAPTOR_ID_MAX); +} + +int mtk_ovl_adaptor_clk_enable(struct device *dev) +{ + struct mtk_disp_ovl_adaptor *ovl_adaptor = dev_get_drvdata(dev); + struct device *comp; + int ret; + int i; for (i = 0; i < OVL_ADAPTOR_ID_MAX; i++) { comp = ovl_adaptor->ovl_adaptor_comp[i]; @@ -226,16 +279,10 @@ int mtk_ovl_adaptor_clk_enable(struct device *dev) dev_err(dev, "Failed to enable clock %d, err %d\n", i, ret); while (--i >= 0) comp_matches[i].funcs->clk_disable(comp); - i = OVL_ADAPTOR_MERGE0; - goto error; + return ret; } } return 0; -error: - while (--i >= 0) - pm_runtime_put(ovl_adaptor->ovl_adaptor_comp[i]); - - return ret; } void mtk_ovl_adaptor_clk_disable(struct device *dev) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c index c277b9fae950..4d5ff39dc2ef 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c @@ -721,7 +721,7 @@ static void mtk_drm_crtc_atomic_enable(struct drm_crtc *crtc, DRM_DEBUG_DRIVER("%s %d\n", __func__, crtc->base.id); - ret = pm_runtime_resume_and_get(comp->dev); + ret = mtk_ddp_comp_power_on(comp); if (ret < 0) { DRM_DEV_ERROR(comp->dev, "Failed to enable power domain: %d\n", ret); return; @@ -731,7 +731,7 @@ static void mtk_drm_crtc_atomic_enable(struct drm_crtc *crtc, ret = mtk_crtc_ddp_hw_init(mtk_crtc); if (ret) { - pm_runtime_put(comp->dev); + mtk_ddp_comp_power_off(comp); return; } @@ -744,7 +744,7 @@ static void mtk_drm_crtc_atomic_disable(struct drm_crtc *crtc, { struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc); struct mtk_ddp_comp *comp = mtk_crtc->ddp_comp[0]; - int i, ret; + int i; DRM_DEBUG_DRIVER("%s %d\n", __func__, crtc->base.id); if (!mtk_crtc->enabled) @@ -774,9 +774,7 @@ static void mtk_drm_crtc_atomic_disable(struct drm_crtc *crtc, drm_crtc_vblank_off(crtc); mtk_crtc_ddp_hw_fini(mtk_crtc); - ret = pm_runtime_put(comp->dev); - if (ret < 0) - DRM_DEV_ERROR(comp->dev, "Failed to disable power domain: %d\n", ret); + mtk_ddp_comp_power_off(comp); mtk_crtc->enabled = false; } diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c index 3046c0409353..a9b5a21cde2d 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c @@ -398,6 +398,8 @@ static const struct mtk_ddp_comp_funcs ddp_ufoe = { }; static const struct mtk_ddp_comp_funcs ddp_ovl_adaptor = { + .power_on = mtk_ovl_adaptor_power_on, + .power_off = mtk_ovl_adaptor_power_off, .clk_enable = mtk_ovl_adaptor_clk_enable, .clk_disable = mtk_ovl_adaptor_clk_disable, .config = mtk_ovl_adaptor_config, diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h index 4bae55bdb034..15b2eafff438 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h +++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h @@ -7,6 +7,7 @@ #define MTK_DRM_DDP_COMP_H #include +#include #include #include #include @@ -46,6 +47,8 @@ enum mtk_ddp_comp_type { struct mtk_ddp_comp; struct cmdq_pkt; struct mtk_ddp_comp_funcs { + int (*power_on)(struct device *dev); + void (*power_off)(struct device *dev); int (*clk_enable)(struct device *dev); void (*clk_disable)(struct device *dev); void (*config)(struct device *dev, unsigned int w, @@ -92,6 +95,23 @@ struct mtk_ddp_comp { const struct mtk_ddp_comp_funcs *funcs; }; +static inline int mtk_ddp_comp_power_on(struct mtk_ddp_comp *comp) +{ + if (comp->funcs && comp->funcs->power_on) + return comp->funcs->power_on(comp->dev); + else + return pm_runtime_resume_and_get(comp->dev); + return 0; +} + +static inline void mtk_ddp_comp_power_off(struct mtk_ddp_comp *comp) +{ + if (comp->funcs && comp->funcs->power_off) + comp->funcs->power_off(comp->dev); + else + pm_runtime_put(comp->dev); +} + static inline int mtk_ddp_comp_clk_enable(struct mtk_ddp_comp *comp) { if (comp->funcs && comp->funcs->clk_enable) diff --git a/drivers/gpu/drm/mediatek/mtk_mdp_rdma.c b/drivers/gpu/drm/mediatek/mtk_mdp_rdma.c index c3adaeefd551..dc7c9e991990 100644 --- a/drivers/gpu/drm/mediatek/mtk_mdp_rdma.c +++ b/drivers/gpu/drm/mediatek/mtk_mdp_rdma.c @@ -242,6 +242,22 @@ size_t mtk_mdp_rdma_get_num_formats(struct device *dev) return ARRAY_SIZE(formats); } +int mtk_mdp_rdma_power_on(struct device *dev) +{ + int ret = pm_runtime_resume_and_get(dev); + + if (ret < 0) { + dev_err(dev, "Failed to power on: %d\n", ret); + return ret; + } + return 0; +} + +void mtk_mdp_rdma_power_off(struct device *dev) +{ + pm_runtime_put(dev); +} + int mtk_mdp_rdma_clk_enable(struct device *dev) { struct mtk_mdp_rdma *rdma = dev_get_drvdata(dev); -- cgit v1.2.3 From c90ca391c1e4b7fca1648e5015a57e39080e7ab3 Mon Sep 17 00:00:00 2001 From: Hsiao Chien Sung Date: Thu, 14 Dec 2023 13:58:39 +0800 Subject: drm/mediatek: Start/Stop components with function pointers By registering component related functions to the pointers, we can easily manage them within a for-loop and simplify the logic of component start/stop process. Reviewed-by: CK Hu Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Hsiao Chien Sung Link: https://patchwork.kernel.org/project/dri-devel/patch/20231214055847.4936-16-shawn.sung@mediatek.com/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c index e00d8a395ca0..65c5153bdcd8 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c @@ -72,6 +72,8 @@ static const char * const private_comp_stem[OVL_ADAPTOR_TYPE_NUM] = { static const struct mtk_ddp_comp_funcs ethdr = { .clk_enable = mtk_ethdr_clk_enable, .clk_disable = mtk_ethdr_clk_disable, + .start = mtk_ethdr_start, + .stop = mtk_ethdr_stop, }; static const struct mtk_ddp_comp_funcs merge = { @@ -191,16 +193,30 @@ void mtk_ovl_adaptor_config(struct device *dev, unsigned int w, void mtk_ovl_adaptor_start(struct device *dev) { + int i; struct mtk_disp_ovl_adaptor *ovl_adaptor = dev_get_drvdata(dev); - mtk_ethdr_start(ovl_adaptor->ovl_adaptor_comp[OVL_ADAPTOR_ETHDR0]); + for (i = 0; i < OVL_ADAPTOR_ID_MAX; i++) { + if (!ovl_adaptor->ovl_adaptor_comp[i] || + !comp_matches[i].funcs->start) + continue; + + comp_matches[i].funcs->start(ovl_adaptor->ovl_adaptor_comp[i]); + } } void mtk_ovl_adaptor_stop(struct device *dev) { + int i; struct mtk_disp_ovl_adaptor *ovl_adaptor = dev_get_drvdata(dev); - mtk_ethdr_stop(ovl_adaptor->ovl_adaptor_comp[OVL_ADAPTOR_ETHDR0]); + for (i = 0; i < OVL_ADAPTOR_ID_MAX; i++) { + if (!ovl_adaptor->ovl_adaptor_comp[i] || + !comp_matches[i].funcs->stop) + continue; + + comp_matches[i].funcs->stop(ovl_adaptor->ovl_adaptor_comp[i]); + } } /** -- cgit v1.2.3 From 9c5a05fc8fca2d3be919f92816fc8d11ebdfd7be Mon Sep 17 00:00:00 2001 From: Hsiao Chien Sung Date: Thu, 14 Dec 2023 13:58:40 +0800 Subject: drm/mediatek: Sort OVL adaptor components Sort OVL adaptor components' names in alphabetical order. Reviewed-by: CK Hu Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Hsiao Chien Sung Link: https://patchwork.kernel.org/project/dri-devel/patch/20231214055847.4936-17-shawn.sung@mediatek.com/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c index 65c5153bdcd8..30f2475d1c0b 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c @@ -27,13 +27,14 @@ #define MTK_OVL_ADAPTOR_LAYER_NUM 4 enum mtk_ovl_adaptor_comp_type { - OVL_ADAPTOR_TYPE_MDP_RDMA = 0, - OVL_ADAPTOR_TYPE_MERGE, OVL_ADAPTOR_TYPE_ETHDR, + OVL_ADAPTOR_TYPE_MDP_RDMA, + OVL_ADAPTOR_TYPE_MERGE, OVL_ADAPTOR_TYPE_NUM, }; enum mtk_ovl_adaptor_comp_id { + OVL_ADAPTOR_ETHDR0, OVL_ADAPTOR_MDP_RDMA0, OVL_ADAPTOR_MDP_RDMA1, OVL_ADAPTOR_MDP_RDMA2, @@ -46,7 +47,6 @@ enum mtk_ovl_adaptor_comp_id { OVL_ADAPTOR_MERGE1, OVL_ADAPTOR_MERGE2, OVL_ADAPTOR_MERGE3, - OVL_ADAPTOR_ETHDR0, OVL_ADAPTOR_ID_MAX }; @@ -64,9 +64,9 @@ struct mtk_disp_ovl_adaptor { }; static const char * const private_comp_stem[OVL_ADAPTOR_TYPE_NUM] = { + [OVL_ADAPTOR_TYPE_ETHDR] = "ethdr", [OVL_ADAPTOR_TYPE_MDP_RDMA] = "vdo1-rdma", [OVL_ADAPTOR_TYPE_MERGE] = "merge", - [OVL_ADAPTOR_TYPE_ETHDR] = "ethdr", }; static const struct mtk_ddp_comp_funcs ethdr = { @@ -89,6 +89,7 @@ static const struct mtk_ddp_comp_funcs rdma = { }; static const struct ovl_adaptor_comp_match comp_matches[OVL_ADAPTOR_ID_MAX] = { + [OVL_ADAPTOR_ETHDR0] = { OVL_ADAPTOR_TYPE_ETHDR, DDP_COMPONENT_ETHDR_MIXER, 0, ðdr }, [OVL_ADAPTOR_MDP_RDMA0] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA0, 0, &rdma }, [OVL_ADAPTOR_MDP_RDMA1] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA1, 1, &rdma }, [OVL_ADAPTOR_MDP_RDMA2] = { OVL_ADAPTOR_TYPE_MDP_RDMA, DDP_COMPONENT_MDP_RDMA2, 2, &rdma }, @@ -101,7 +102,6 @@ static const struct ovl_adaptor_comp_match comp_matches[OVL_ADAPTOR_ID_MAX] = { [OVL_ADAPTOR_MERGE1] = { OVL_ADAPTOR_TYPE_MERGE, DDP_COMPONENT_MERGE2, 2, &merge }, [OVL_ADAPTOR_MERGE2] = { OVL_ADAPTOR_TYPE_MERGE, DDP_COMPONENT_MERGE3, 3, &merge }, [OVL_ADAPTOR_MERGE3] = { OVL_ADAPTOR_TYPE_MERGE, DDP_COMPONENT_MERGE4, 4, &merge }, - [OVL_ADAPTOR_ETHDR0] = { OVL_ADAPTOR_TYPE_ETHDR, DDP_COMPONENT_ETHDR_MIXER, 0, ðdr }, }; void mtk_ovl_adaptor_layer_config(struct device *dev, unsigned int idx, @@ -399,6 +399,7 @@ void mtk_ovl_adaptor_remove_comp(struct device *dev, struct mtk_mutex *mutex) void mtk_ovl_adaptor_connect(struct device *dev, struct device *mmsys_dev, unsigned int next) { + mtk_mmsys_ddp_connect(mmsys_dev, DDP_COMPONENT_ETHDR_MIXER, next); mtk_mmsys_ddp_connect(mmsys_dev, DDP_COMPONENT_MDP_RDMA0, DDP_COMPONENT_MERGE1); mtk_mmsys_ddp_connect(mmsys_dev, DDP_COMPONENT_MDP_RDMA1, DDP_COMPONENT_MERGE1); mtk_mmsys_ddp_connect(mmsys_dev, DDP_COMPONENT_MDP_RDMA2, DDP_COMPONENT_MERGE2); @@ -406,11 +407,11 @@ void mtk_ovl_adaptor_connect(struct device *dev, struct device *mmsys_dev, unsig mtk_mmsys_ddp_connect(mmsys_dev, DDP_COMPONENT_MERGE2, DDP_COMPONENT_ETHDR_MIXER); mtk_mmsys_ddp_connect(mmsys_dev, DDP_COMPONENT_MERGE3, DDP_COMPONENT_ETHDR_MIXER); mtk_mmsys_ddp_connect(mmsys_dev, DDP_COMPONENT_MERGE4, DDP_COMPONENT_ETHDR_MIXER); - mtk_mmsys_ddp_connect(mmsys_dev, DDP_COMPONENT_ETHDR_MIXER, next); } void mtk_ovl_adaptor_disconnect(struct device *dev, struct device *mmsys_dev, unsigned int next) { + mtk_mmsys_ddp_disconnect(mmsys_dev, DDP_COMPONENT_ETHDR_MIXER, next); mtk_mmsys_ddp_disconnect(mmsys_dev, DDP_COMPONENT_MDP_RDMA0, DDP_COMPONENT_MERGE1); mtk_mmsys_ddp_disconnect(mmsys_dev, DDP_COMPONENT_MDP_RDMA1, DDP_COMPONENT_MERGE1); mtk_mmsys_ddp_disconnect(mmsys_dev, DDP_COMPONENT_MDP_RDMA2, DDP_COMPONENT_MERGE2); @@ -418,7 +419,6 @@ void mtk_ovl_adaptor_disconnect(struct device *dev, struct device *mmsys_dev, un mtk_mmsys_ddp_disconnect(mmsys_dev, DDP_COMPONENT_MERGE2, DDP_COMPONENT_ETHDR_MIXER); mtk_mmsys_ddp_disconnect(mmsys_dev, DDP_COMPONENT_MERGE3, DDP_COMPONENT_ETHDR_MIXER); mtk_mmsys_ddp_disconnect(mmsys_dev, DDP_COMPONENT_MERGE4, DDP_COMPONENT_ETHDR_MIXER); - mtk_mmsys_ddp_disconnect(mmsys_dev, DDP_COMPONENT_ETHDR_MIXER, next); } static int ovl_adaptor_comp_get_id(struct device *dev, struct device_node *node, -- cgit v1.2.3 From 1168bb692bb9c45a31deda73cc3c87d368c7f490 Mon Sep 17 00:00:00 2001 From: Hsiao Chien Sung Date: Thu, 14 Dec 2023 13:58:41 +0800 Subject: drm/mediatek: Refine device table of OVL adaptor - Adjust indentation to align with other files - Sort device table in alphabetical order - Add sentinel to device table Reviewed-by: CK Hu Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Hsiao Chien Sung Link: https://patchwork.kernel.org/project/dri-devel/patch/20231214055847.4936-18-shawn.sung@mediatek.com/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c index 30f2475d1c0b..92eeb1005ec3 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl_adaptor.c @@ -437,17 +437,10 @@ static int ovl_adaptor_comp_get_id(struct device *dev, struct device_node *node, } static const struct of_device_id mtk_ovl_adaptor_comp_dt_ids[] = { - { - .compatible = "mediatek,mt8195-vdo1-rdma", - .data = (void *)OVL_ADAPTOR_TYPE_MDP_RDMA, - }, { - .compatible = "mediatek,mt8195-disp-merge", - .data = (void *)OVL_ADAPTOR_TYPE_MERGE, - }, { - .compatible = "mediatek,mt8195-disp-ethdr", - .data = (void *)OVL_ADAPTOR_TYPE_ETHDR, - }, - {}, + { .compatible = "mediatek,mt8195-disp-ethdr", .data = (void *)OVL_ADAPTOR_TYPE_ETHDR }, + { .compatible = "mediatek,mt8195-disp-merge", .data = (void *)OVL_ADAPTOR_TYPE_MERGE }, + { .compatible = "mediatek,mt8195-vdo1-rdma", .data = (void *)OVL_ADAPTOR_TYPE_MDP_RDMA }, + { /* sentinel */ } }; static int compare_of(struct device *dev, void *data) -- cgit v1.2.3 From ba527e9a11b33cc7315171807add5b2f594b23ac Mon Sep 17 00:00:00 2001 From: Hsiao Chien Sung Date: Thu, 14 Dec 2023 13:58:42 +0800 Subject: drm/mediatek: Support MT8188 Padding in display driver Padding is a new display module on MT8188, it provides ability to add pixels to width and height of a layer with specified colors. Due to hardware design, Mixer in VDOSYS1 requires width of a layer to be 2-pixel-align, or 4-pixel-align when ETHDR is enabled, we need Padding to deal with odd width. Reviewed-by: CK Hu Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Hsiao Chien Sung Link: https://patchwork.kernel.org/project/dri-devel/patch/20231214055847.4936-19-shawn.sung@mediatek.com/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/Makefile | 3 +- drivers/gpu/drm/mediatek/mtk_disp_drv.h | 4 + drivers/gpu/drm/mediatek/mtk_drm_drv.c | 1 + drivers/gpu/drm/mediatek/mtk_drm_drv.h | 2 +- drivers/gpu/drm/mediatek/mtk_padding.c | 160 ++++++++++++++++++++++++++++++++ 5 files changed, 168 insertions(+), 2 deletions(-) create mode 100644 drivers/gpu/drm/mediatek/mtk_padding.c (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/mediatek/Makefile b/drivers/gpu/drm/mediatek/Makefile index d4d193f60271..5e4436403b8d 100644 --- a/drivers/gpu/drm/mediatek/Makefile +++ b/drivers/gpu/drm/mediatek/Makefile @@ -16,7 +16,8 @@ mediatek-drm-y := mtk_disp_aal.o \ mtk_dsi.o \ mtk_dpi.o \ mtk_ethdr.o \ - mtk_mdp_rdma.o + mtk_mdp_rdma.o \ + mtk_padding.o obj-$(CONFIG_DRM_MEDIATEK) += mediatek-drm.o diff --git a/drivers/gpu/drm/mediatek/mtk_disp_drv.h b/drivers/gpu/drm/mediatek/mtk_disp_drv.h index 2d426775b7ea..74fa56339383 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_drv.h +++ b/drivers/gpu/drm/mediatek/mtk_disp_drv.h @@ -164,4 +164,8 @@ void mtk_mdp_rdma_config(struct device *dev, struct mtk_mdp_rdma_cfg *cfg, const u32 *mtk_mdp_rdma_get_formats(struct device *dev); size_t mtk_mdp_rdma_get_num_formats(struct device *dev); +int mtk_padding_clk_enable(struct device *dev); +void mtk_padding_clk_disable(struct device *dev); +void mtk_padding_start(struct device *dev); +void mtk_padding_stop(struct device *dev); #endif diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index 48581da51857..f7504d1edc62 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -996,6 +996,7 @@ static struct platform_driver * const mtk_drm_drivers[] = { &mtk_dsi_driver, &mtk_ethdr_driver, &mtk_mdp_rdma_driver, + &mtk_padding_driver, }; static int __init mtk_drm_init(void) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.h b/drivers/gpu/drm/mediatek/mtk_drm_drv.h index 6f98fff4f1a4..33fadb08dc1c 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.h +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.h @@ -77,5 +77,5 @@ extern struct platform_driver mtk_dpi_driver; extern struct platform_driver mtk_dsi_driver; extern struct platform_driver mtk_ethdr_driver; extern struct platform_driver mtk_mdp_rdma_driver; - +extern struct platform_driver mtk_padding_driver; #endif /* MTK_DRM_DRV_H */ diff --git a/drivers/gpu/drm/mediatek/mtk_padding.c b/drivers/gpu/drm/mediatek/mtk_padding.c new file mode 100644 index 000000000000..0d6451c149b6 --- /dev/null +++ b/drivers/gpu/drm/mediatek/mtk_padding.c @@ -0,0 +1,160 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2023 MediaTek Inc. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "mtk_disp_drv.h" +#include "mtk_drm_crtc.h" +#include "mtk_drm_ddp_comp.h" + +#define PADDING_CONTROL_REG 0x00 +#define PADDING_BYPASS BIT(0) +#define PADDING_ENABLE BIT(1) +#define PADDING_PIC_SIZE_REG 0x04 +#define PADDING_H_REG 0x08 /* horizontal */ +#define PADDING_V_REG 0x0c /* vertical */ +#define PADDING_COLOR_REG 0x10 + +/** + * struct mtk_padding - Basic information of the Padding + * @clk: Clock of the module + * @reg: Virtual address of the Padding for CPU to access + * @cmdq_reg: CMDQ setting of the Padding + * + * Every Padding should have different clock source, register base, and + * CMDQ settings, we stored these differences all together. + */ +struct mtk_padding { + struct clk *clk; + void __iomem *reg; + struct cmdq_client_reg cmdq_reg; +}; + +int mtk_padding_clk_enable(struct device *dev) +{ + struct mtk_padding *padding = dev_get_drvdata(dev); + + return clk_prepare_enable(padding->clk); +} + +void mtk_padding_clk_disable(struct device *dev) +{ + struct mtk_padding *padding = dev_get_drvdata(dev); + + clk_disable_unprepare(padding->clk); +} + +void mtk_padding_start(struct device *dev) +{ + struct mtk_padding *padding = dev_get_drvdata(dev); + + writel(PADDING_ENABLE | PADDING_BYPASS, + padding->reg + PADDING_CONTROL_REG); + + /* + * Notice that even the padding is in bypass mode, + * all the settings must be cleared to 0 or + * undefined behaviors could happen + */ + writel(0, padding->reg + PADDING_PIC_SIZE_REG); + writel(0, padding->reg + PADDING_H_REG); + writel(0, padding->reg + PADDING_V_REG); + writel(0, padding->reg + PADDING_COLOR_REG); +} + +void mtk_padding_stop(struct device *dev) +{ + struct mtk_padding *padding = dev_get_drvdata(dev); + + writel(0, padding->reg + PADDING_CONTROL_REG); +} + +static int mtk_padding_bind(struct device *dev, struct device *master, void *data) +{ + return 0; +} + +static void mtk_padding_unbind(struct device *dev, struct device *master, void *data) +{ +} + +static const struct component_ops mtk_padding_component_ops = { + .bind = mtk_padding_bind, + .unbind = mtk_padding_unbind, +}; + +static int mtk_padding_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct mtk_padding *priv; + struct resource *res; + int ret; + + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->clk = devm_clk_get(dev, NULL); + if (IS_ERR(priv->clk)) { + dev_err(dev, "failed to get clk\n"); + return PTR_ERR(priv->clk); + } + + priv->reg = devm_platform_get_and_ioremap_resource(pdev, 0, &res); + if (IS_ERR(priv->reg)) { + dev_err(dev, "failed to do ioremap\n"); + return PTR_ERR(priv->reg); + } + +#if IS_REACHABLE(CONFIG_MTK_CMDQ) + ret = cmdq_dev_get_client_reg(dev, &priv->cmdq_reg, 0); + if (ret) { + dev_err(dev, "failed to get gce client reg\n"); + return ret; + } +#endif + + platform_set_drvdata(pdev, priv); + + ret = devm_pm_runtime_enable(dev); + if (ret) + return ret; + + ret = component_add(dev, &mtk_padding_component_ops); + if (ret) { + pm_runtime_disable(dev); + return dev_err_probe(dev, ret, "failed to add component\n"); + } + + return 0; +} + +static int mtk_padding_remove(struct platform_device *pdev) +{ + component_del(&pdev->dev, &mtk_padding_component_ops); + return 0; +} + +static const struct of_device_id mtk_padding_driver_dt_match[] = { + { .compatible = "mediatek,mt8188-disp-padding" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, mtk_padding_driver_dt_match); + +struct platform_driver mtk_padding_driver = { + .probe = mtk_padding_probe, + .remove = mtk_padding_remove, + .driver = { + .name = "mediatek-disp-padding", + .owner = THIS_MODULE, + .of_match_table = mtk_padding_driver_dt_match, + }, +}; -- cgit v1.2.3 From 21b287146adf39304193e4c49198021e06a28ded Mon Sep 17 00:00:00 2001 From: Hsiao Chien Sung Date: Thu, 14 Dec 2023 13:58:44 +0800 Subject: drm/mediatek: Return error if MDP RDMA failed to enable the clock Return the result of clk_prepare_enable() instead of always returns 0. Fixes: f8946e2b6bb2 ("drm/mediatek: Add display MDP RDMA support for MT8195") Reviewed-by: CK Hu Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Hsiao Chien Sung Link: https://patchwork.kernel.org/project/dri-devel/patch/20231214055847.4936-21-shawn.sung@mediatek.com/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_mdp_rdma.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/mediatek/mtk_mdp_rdma.c b/drivers/gpu/drm/mediatek/mtk_mdp_rdma.c index dc7c9e991990..ee9ce9b6d078 100644 --- a/drivers/gpu/drm/mediatek/mtk_mdp_rdma.c +++ b/drivers/gpu/drm/mediatek/mtk_mdp_rdma.c @@ -262,8 +262,7 @@ int mtk_mdp_rdma_clk_enable(struct device *dev) { struct mtk_mdp_rdma *rdma = dev_get_drvdata(dev); - clk_prepare_enable(rdma->clk); - return 0; + return clk_prepare_enable(rdma->clk); } void mtk_mdp_rdma_clk_disable(struct device *dev) -- cgit v1.2.3 From 8ac6935e5689a491f0bec78fec732722b3dad094 Mon Sep 17 00:00:00 2001 From: Hsiao Chien Sung Date: Thu, 14 Dec 2023 13:58:45 +0800 Subject: drm/mediatek: Remove the redundant driver data for DPI DPI input is in 1T2P mode on both MT8195 and MT8188. Remove the redundant driver data to align the settings, or the screen will glitch. Fixes: 2847cd7e6403 ("drm/mediatek: Add mt8188 dpi compatibles and platform data") Reviewed-by: CK Hu Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Hsiao Chien Sung Link: https://patchwork.kernel.org/project/dri-devel/patch/20231214055847.4936-22-shawn.sung@mediatek.com/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_dpi.c | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/mediatek/mtk_dpi.c b/drivers/gpu/drm/mediatek/mtk_dpi.c index 4e3d9f7b4d8c..beb7d9d08e97 100644 --- a/drivers/gpu/drm/mediatek/mtk_dpi.c +++ b/drivers/gpu/drm/mediatek/mtk_dpi.c @@ -966,20 +966,6 @@ static const struct mtk_dpi_conf mt8186_conf = { .csc_enable_bit = CSC_ENABLE, }; -static const struct mtk_dpi_conf mt8188_dpintf_conf = { - .cal_factor = mt8195_dpintf_calculate_factor, - .max_clock_khz = 600000, - .output_fmts = mt8195_output_fmts, - .num_output_fmts = ARRAY_SIZE(mt8195_output_fmts), - .pixels_per_iter = 4, - .input_2pixel = false, - .dimension_mask = DPINTF_HPW_MASK, - .hvsize_mask = DPINTF_HSIZE_MASK, - .channel_swap_shift = DPINTF_CH_SWAP, - .yuv422_en_bit = DPINTF_YUV422_EN, - .csc_enable_bit = DPINTF_CSC_ENABLE, -}; - static const struct mtk_dpi_conf mt8192_conf = { .cal_factor = mt8183_calculate_factor, .reg_h_fre_con = 0xe0, @@ -1103,7 +1089,7 @@ static const struct of_device_id mtk_dpi_of_ids[] = { { .compatible = "mediatek,mt8173-dpi", .data = &mt8173_conf }, { .compatible = "mediatek,mt8183-dpi", .data = &mt8183_conf }, { .compatible = "mediatek,mt8186-dpi", .data = &mt8186_conf }, - { .compatible = "mediatek,mt8188-dp-intf", .data = &mt8188_dpintf_conf }, + { .compatible = "mediatek,mt8188-dp-intf", .data = &mt8195_dpintf_conf }, { .compatible = "mediatek,mt8192-dpi", .data = &mt8192_conf }, { .compatible = "mediatek,mt8195-dp-intf", .data = &mt8195_dpintf_conf }, { /* sentinel */ }, -- cgit v1.2.3 From 73b5ab27ab2ee616f2709dc212c2b0007894a12e Mon Sep 17 00:00:00 2001 From: Hsiao Chien Sung Date: Thu, 14 Dec 2023 13:58:46 +0800 Subject: drm/mediatek: Fix underrun in VDO1 when switches off the layer Do not reset Merge while using CMDQ because reset API doesn't wait for frame done event as CMDQ does and could lead to underrun when the layer is switching off. Fixes: aaf94f7c3ae6 ("drm/mediatek: Add display merge async reset control") Reviewed-by: CK Hu Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Hsiao Chien Sung Link: https://patchwork.kernel.org/project/dri-devel/patch/20231214055847.4936-23-shawn.sung@mediatek.com/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_disp_merge.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/mediatek/mtk_disp_merge.c b/drivers/gpu/drm/mediatek/mtk_disp_merge.c index e525a6b9e5b0..22f768d923d5 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_merge.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_merge.c @@ -103,7 +103,7 @@ void mtk_merge_stop_cmdq(struct device *dev, struct cmdq_pkt *cmdq_pkt) mtk_ddp_write(cmdq_pkt, 0, &priv->cmdq_reg, priv->regs, DISP_REG_MERGE_CTRL); - if (priv->async_clk) + if (!cmdq_pkt && priv->async_clk) reset_control_reset(priv->reset_ctl); } -- cgit v1.2.3 From 486c95af5d76047d5cb50727270b1961dacb9380 Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Thu, 16 Nov 2023 18:58:05 -0100 Subject: drm/amd/display: add plane 3D LUT support Wire up DC 3D LUT to DM plane color management (pre-blending). On AMD display HW, 3D LUT comes after a shaper curve and we always have to program a shaper curve to delinearize or normalize the color space before applying a 3D LUT (since we have a reduced number of LUT entries). In this version, the default values of 3D LUT for size and bit_depth are 17x17x17 and 12-bit, but we already provide here a more generic mechanisms to program other supported values (9x9x9 size and 10-bit). v2: - started with plane 3D LUT instead of CRTC 3D LUT support v4: - lut3d_size is the max dimension size instead of # of entries Reviewed-by: Harry Wentland Signed-off-by: Melissa Wen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 + .../drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 102 ++++++++++++++++++++- 2 files changed, 99 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 73b12ca2fe60..131174c17cf0 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -8270,6 +8270,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, bundle->surface_updates[planes_count].gamut_remap_matrix = &dc_plane->gamut_remap_matrix; bundle->surface_updates[planes_count].hdr_mult = dc_plane->hdr_mult; bundle->surface_updates[planes_count].func_shaper = dc_plane->in_shaper_func; + bundle->surface_updates[planes_count].lut3d_func = dc_plane->lut3d_func; } amdgpu_dm_plane_fill_dc_scaling_info(dm->adev, new_plane_state, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 8239904f9d90..f55e0a69dd32 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -623,6 +623,86 @@ amdgpu_tf_to_dc_tf(enum amdgpu_transfer_function tf) } } +static void __to_dc_lut3d_color(struct dc_rgb *rgb, + const struct drm_color_lut lut, + int bit_precision) +{ + rgb->red = drm_color_lut_extract(lut.red, bit_precision); + rgb->green = drm_color_lut_extract(lut.green, bit_precision); + rgb->blue = drm_color_lut_extract(lut.blue, bit_precision); +} + +static void __drm_3dlut_to_dc_3dlut(const struct drm_color_lut *lut, + uint32_t lut3d_size, + struct tetrahedral_params *params, + bool use_tetrahedral_9, + int bit_depth) +{ + struct dc_rgb *lut0; + struct dc_rgb *lut1; + struct dc_rgb *lut2; + struct dc_rgb *lut3; + int lut_i, i; + + + if (use_tetrahedral_9) { + lut0 = params->tetrahedral_9.lut0; + lut1 = params->tetrahedral_9.lut1; + lut2 = params->tetrahedral_9.lut2; + lut3 = params->tetrahedral_9.lut3; + } else { + lut0 = params->tetrahedral_17.lut0; + lut1 = params->tetrahedral_17.lut1; + lut2 = params->tetrahedral_17.lut2; + lut3 = params->tetrahedral_17.lut3; + } + + for (lut_i = 0, i = 0; i < lut3d_size - 4; lut_i++, i += 4) { + /* + * We should consider the 3D LUT RGB values are distributed + * along four arrays lut0-3 where the first sizes 1229 and the + * other 1228. The bit depth supported for 3dlut channel is + * 12-bit, but DC also supports 10-bit. + * + * TODO: improve color pipeline API to enable the userspace set + * bit depth and 3D LUT size/stride, as specified by VA-API. + */ + __to_dc_lut3d_color(&lut0[lut_i], lut[i], bit_depth); + __to_dc_lut3d_color(&lut1[lut_i], lut[i + 1], bit_depth); + __to_dc_lut3d_color(&lut2[lut_i], lut[i + 2], bit_depth); + __to_dc_lut3d_color(&lut3[lut_i], lut[i + 3], bit_depth); + } + /* lut0 has 1229 points (lut_size/4 + 1) */ + __to_dc_lut3d_color(&lut0[lut_i], lut[i], bit_depth); +} + +/* amdgpu_dm_atomic_lut3d - set DRM 3D LUT to DC stream + * @drm_lut3d: user 3D LUT + * @drm_lut3d_size: size of 3D LUT + * @lut3d: DC 3D LUT + * + * Map user 3D LUT data to DC 3D LUT and all necessary bits to program it + * on DCN accordingly. + */ +static void amdgpu_dm_atomic_lut3d(const struct drm_color_lut *drm_lut3d, + uint32_t drm_lut3d_size, + struct dc_3dlut *lut) +{ + if (!drm_lut3d_size) { + lut->state.bits.initialized = 0; + } else { + /* Stride and bit depth are not programmable by API yet. + * Therefore, only supports 17x17x17 3D LUT (12-bit). + */ + lut->lut_3d.use_tetrahedral_9 = false; + lut->lut_3d.use_12bits = true; + lut->state.bits.initialized = 1; + __drm_3dlut_to_dc_3dlut(drm_lut3d, drm_lut3d_size, &lut->lut_3d, + lut->lut_3d.use_tetrahedral_9, + MAX_COLOR_3DLUT_BITDEPTH); + } +} + static int amdgpu_dm_atomic_shaper_lut(const struct drm_color_lut *shaper_lut, bool has_rom, enum dc_transfer_func_predefined tf, @@ -665,8 +745,8 @@ int amdgpu_dm_verify_lut3d_size(struct amdgpu_device *adev, struct drm_plane_state *plane_state) { struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state); - const struct drm_color_lut *shaper = NULL; - uint32_t exp_size, size; + const struct drm_color_lut *shaper = NULL, *lut3d = NULL; + uint32_t exp_size, size, dim_size = MAX_COLOR_3DLUT_SIZE; bool has_3dlut = adev->dm.dc->caps.color.dpp.hw_3d_lut; /* shaper LUT is only available if 3D LUT color caps */ @@ -680,6 +760,17 @@ int amdgpu_dm_verify_lut3d_size(struct amdgpu_device *adev, return -EINVAL; } + /* The number of 3D LUT entries is the dimension size cubed */ + exp_size = has_3dlut ? dim_size * dim_size * dim_size : 0; + lut3d = __extract_blob_lut(dm_plane_state->lut3d, &size); + + if (lut3d && size != exp_size) { + drm_dbg(&adev->ddev, + "Invalid 3D LUT size. Should be %u but got %u.\n", + exp_size, size); + return -EINVAL; + } + return 0; } @@ -976,8 +1067,8 @@ amdgpu_dm_plane_set_color_properties(struct drm_plane_state *plane_state, { struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state); enum amdgpu_transfer_function shaper_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT; - const struct drm_color_lut *shaper_lut; - uint32_t shaper_size; + const struct drm_color_lut *shaper_lut, *lut3d; + uint32_t shaper_size, lut3d_size; int ret; dc_plane_state->hdr_mult = dc_fixpt_from_s3132(dm_plane_state->hdr_mult); @@ -985,7 +1076,10 @@ amdgpu_dm_plane_set_color_properties(struct drm_plane_state *plane_state, shaper_lut = __extract_blob_lut(dm_plane_state->shaper_lut, &shaper_size); shaper_size = shaper_lut != NULL ? shaper_size : 0; shaper_tf = dm_plane_state->shaper_tf; + lut3d = __extract_blob_lut(dm_plane_state->lut3d, &lut3d_size); + lut3d_size = lut3d != NULL ? lut3d_size : 0; + amdgpu_dm_atomic_lut3d(lut3d, lut3d_size, dc_plane_state->lut3d_func); ret = amdgpu_dm_atomic_shaper_lut(shaper_lut, false, amdgpu_tf_to_dc_tf(shaper_tf), shaper_size, -- cgit v1.2.3 From 8d26795ae61a5f64ba7db4f3240dc9ab2138d361 Mon Sep 17 00:00:00 2001 From: Joshua Ashton Date: Thu, 16 Nov 2023 18:58:06 -0100 Subject: drm/amd/display: handle empty LUTs in __set_input_tf Unlike degamma, blend gamma doesn't support hardcoded curve (predefined/ROM), but we can use AMD color module to fill blend gamma parameters when we have non-linear plane gamma TF without plane gamma LUT. The regular degamma path doesn't hit this. Reviewed-by: Harry Wentland Signed-off-by: Joshua Ashton Signed-off-by: Melissa Wen Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index f55e0a69dd32..738399538432 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -578,17 +578,21 @@ static int __set_input_tf(struct dc_transfer_func *func, struct dc_gamma *gamma = NULL; bool res; - gamma = dc_create_gamma(); - if (!gamma) - return -ENOMEM; + if (lut_size) { + gamma = dc_create_gamma(); + if (!gamma) + return -ENOMEM; - gamma->type = GAMMA_CUSTOM; - gamma->num_entries = lut_size; + gamma->type = GAMMA_CUSTOM; + gamma->num_entries = lut_size; - __drm_lut_to_dc_gamma(lut, gamma, false); + __drm_lut_to_dc_gamma(lut, gamma, false); + } - res = mod_color_calculate_degamma_params(NULL, func, gamma, true); - dc_gamma_release(&gamma); + res = mod_color_calculate_degamma_params(NULL, func, gamma, gamma != NULL); + + if (gamma) + dc_gamma_release(&gamma); return res ? 0 : -ENOMEM; } -- cgit v1.2.3 From 783ed4460fe55b01ff32a7c6ad8239974874a16a Mon Sep 17 00:00:00 2001 From: Joshua Ashton Date: Thu, 16 Nov 2023 18:58:07 -0100 Subject: drm/amd/display: add plane blend LUT and TF support Map plane blend properties to DPP blend gamma. Plane blend is a post-3D LUT curve that linearizes color space for blending. It may be defined by a user-blob LUT and/or predefined transfer function. As hardcoded curve (ROM) is not supported on blend gamma, we use AMD color module to fill parameters when setting non-linear TF with empty LUT. v2: - rename DRM TFs to AMDGPU TFs Reviewed-by: Harry Wentland Signed-off-by: Joshua Ashton Signed-off-by: Melissa Wen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 + .../drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 56 ++++++++++++++++++++-- 2 files changed, 53 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 131174c17cf0..7fd85ec7f811 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -8271,6 +8271,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, bundle->surface_updates[planes_count].hdr_mult = dc_plane->hdr_mult; bundle->surface_updates[planes_count].func_shaper = dc_plane->in_shaper_func; bundle->surface_updates[planes_count].lut3d_func = dc_plane->lut3d_func; + bundle->surface_updates[planes_count].blend_tf = dc_plane->blend_tf; } amdgpu_dm_plane_fill_dc_scaling_info(dm->adev, new_plane_state, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 738399538432..27ddc6576719 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -733,6 +733,35 @@ static int amdgpu_dm_atomic_shaper_lut(const struct drm_color_lut *shaper_lut, return ret; } +static int amdgpu_dm_atomic_blend_lut(const struct drm_color_lut *blend_lut, + bool has_rom, + enum dc_transfer_func_predefined tf, + uint32_t blend_size, + struct dc_transfer_func *func_blend) +{ + int ret = 0; + + if (blend_size || tf != TRANSFER_FUNCTION_LINEAR) { + /* + * DRM plane gamma LUT or TF means we are linearizing color + * space before blending (similar to degamma programming). As + * we don't have hardcoded curve support, or we use AMD color + * module to fill the parameters that will be translated to HW + * points. + */ + func_blend->type = TF_TYPE_DISTRIBUTED_POINTS; + func_blend->tf = tf; + func_blend->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE; + + ret = __set_input_tf(func_blend, blend_lut, blend_size); + } else { + func_blend->type = TF_TYPE_BYPASS; + func_blend->tf = TRANSFER_FUNCTION_LINEAR; + } + + return ret; +} + /** * amdgpu_dm_verify_lut3d_size - verifies if 3D LUT is supported and if user * shaper and 3D LUTs match the hw supported size @@ -1071,8 +1100,9 @@ amdgpu_dm_plane_set_color_properties(struct drm_plane_state *plane_state, { struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state); enum amdgpu_transfer_function shaper_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT; - const struct drm_color_lut *shaper_lut, *lut3d; - uint32_t shaper_size, lut3d_size; + enum amdgpu_transfer_function blend_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT; + const struct drm_color_lut *shaper_lut, *lut3d, *blend_lut; + uint32_t shaper_size, lut3d_size, blend_size; int ret; dc_plane_state->hdr_mult = dc_fixpt_from_s3132(dm_plane_state->hdr_mult); @@ -1088,12 +1118,30 @@ amdgpu_dm_plane_set_color_properties(struct drm_plane_state *plane_state, amdgpu_tf_to_dc_tf(shaper_tf), shaper_size, dc_plane_state->in_shaper_func); - if (ret) + if (ret) { drm_dbg_kms(plane_state->plane->dev, "setting plane %d shaper LUT failed.\n", plane_state->plane->index); - return ret; + return ret; + } + + blend_tf = dm_plane_state->blend_tf; + blend_lut = __extract_blob_lut(dm_plane_state->blend_lut, &blend_size); + blend_size = blend_lut != NULL ? blend_size : 0; + + ret = amdgpu_dm_atomic_blend_lut(blend_lut, false, + amdgpu_tf_to_dc_tf(blend_tf), + blend_size, dc_plane_state->blend_tf); + if (ret) { + drm_dbg_kms(plane_state->plane->dev, + "setting plane %d gamma lut failed.\n", + plane_state->plane->index); + + return ret; + } + + return 0; } /** -- cgit v1.2.3 From f81996637000a050477d597ef99e832079f99bd2 Mon Sep 17 00:00:00 2001 From: Joshua Ashton Date: Thu, 16 Nov 2023 18:58:08 -0100 Subject: drm/amd/display: allow newer DC hardware to use degamma ROM for PQ/HLG Need to funnel the color caps through to these functions so it can check that the hardware is capable. v2: - remove redundant color caps assignment on plane degamma map (Harry) - pass color caps to degamma params v3: - remove unused color_caps parameter from set_color_properties (Harry) Reviewed-by: Harry Wentland Signed-off-by: Joshua Ashton Signed-off-by: Melissa Wen Signed-off-by: Alex Deucher --- .../drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 29 ++++++++++++++-------- 1 file changed, 18 insertions(+), 11 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 27ddc6576719..3a1ca13eaee4 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -565,6 +565,7 @@ static int amdgpu_dm_set_atomic_regamma(struct dc_stream_state *stream, /** * __set_input_tf - calculates the input transfer function based on expected * input space. + * @caps: dc color capabilities * @func: transfer function * @lut: lookup table that defines the color space * @lut_size: size of respective lut. @@ -572,7 +573,7 @@ static int amdgpu_dm_set_atomic_regamma(struct dc_stream_state *stream, * Returns: * 0 in case of success. -ENOMEM if fails. */ -static int __set_input_tf(struct dc_transfer_func *func, +static int __set_input_tf(struct dc_color_caps *caps, struct dc_transfer_func *func, const struct drm_color_lut *lut, uint32_t lut_size) { struct dc_gamma *gamma = NULL; @@ -589,7 +590,7 @@ static int __set_input_tf(struct dc_transfer_func *func, __drm_lut_to_dc_gamma(lut, gamma, false); } - res = mod_color_calculate_degamma_params(NULL, func, gamma, gamma != NULL); + res = mod_color_calculate_degamma_params(caps, func, gamma, gamma != NULL); if (gamma) dc_gamma_release(&gamma); @@ -753,7 +754,7 @@ static int amdgpu_dm_atomic_blend_lut(const struct drm_color_lut *blend_lut, func_blend->tf = tf; func_blend->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE; - ret = __set_input_tf(func_blend, blend_lut, blend_size); + ret = __set_input_tf(NULL, func_blend, blend_lut, blend_size); } else { func_blend->type = TF_TYPE_BYPASS; func_blend->tf = TRANSFER_FUNCTION_LINEAR; @@ -969,7 +970,8 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc) static int map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc, - struct dc_plane_state *dc_plane_state) + struct dc_plane_state *dc_plane_state, + struct dc_color_caps *caps) { const struct drm_color_lut *degamma_lut; enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_SRGB; @@ -1024,7 +1026,7 @@ map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc, dc_plane_state->in_transfer_func->tf = TRANSFER_FUNCTION_LINEAR; - r = __set_input_tf(dc_plane_state->in_transfer_func, + r = __set_input_tf(caps, dc_plane_state->in_transfer_func, degamma_lut, degamma_size); if (r) return r; @@ -1037,7 +1039,7 @@ map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc, dc_plane_state->in_transfer_func->tf = tf; if (tf != TRANSFER_FUNCTION_SRGB && - !mod_color_calculate_degamma_params(NULL, + !mod_color_calculate_degamma_params(caps, dc_plane_state->in_transfer_func, NULL, false)) return -ENOMEM; @@ -1048,7 +1050,8 @@ map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc, static int __set_dm_plane_degamma(struct drm_plane_state *plane_state, - struct dc_plane_state *dc_plane_state) + struct dc_plane_state *dc_plane_state, + struct dc_color_caps *color_caps) { struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state); const struct drm_color_lut *degamma_lut; @@ -1079,7 +1082,7 @@ __set_dm_plane_degamma(struct drm_plane_state *plane_state, dc_plane_state->in_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS; - ret = __set_input_tf(dc_plane_state->in_transfer_func, + ret = __set_input_tf(color_caps, dc_plane_state->in_transfer_func, degamma_lut, degamma_size); if (ret) return ret; @@ -1087,7 +1090,7 @@ __set_dm_plane_degamma(struct drm_plane_state *plane_state, dc_plane_state->in_transfer_func->type = TF_TYPE_PREDEFINED; - if (!mod_color_calculate_degamma_params(NULL, + if (!mod_color_calculate_degamma_params(color_caps, dc_plane_state->in_transfer_func, NULL, false)) return -ENOMEM; } @@ -1162,6 +1165,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, struct dc_plane_state *dc_plane_state) { struct amdgpu_device *adev = drm_to_adev(crtc->base.state->dev); + struct dc_color_caps *color_caps = NULL; bool has_crtc_cm_degamma; int ret; @@ -1171,6 +1175,9 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, return ret; } + if (dc_plane_state->ctx && dc_plane_state->ctx->dc) + color_caps = &dc_plane_state->ctx->dc->caps.color; + /* Initially, we can just bypass the DGM block. */ dc_plane_state->in_transfer_func->type = TF_TYPE_BYPASS; dc_plane_state->in_transfer_func->tf = TRANSFER_FUNCTION_LINEAR; @@ -1178,7 +1185,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, /* After, we start to update values according to color props */ has_crtc_cm_degamma = (crtc->cm_has_degamma || crtc->cm_is_degamma_srgb); - ret = __set_dm_plane_degamma(plane_state, dc_plane_state); + ret = __set_dm_plane_degamma(plane_state, dc_plane_state, color_caps); if (ret == -ENOMEM) return ret; @@ -1205,7 +1212,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, * linearize (implicit degamma) from sRGB/BT709 according to * the input space. */ - ret = map_crtc_degamma_to_dc_plane(crtc, dc_plane_state); + ret = map_crtc_degamma_to_dc_plane(crtc, dc_plane_state, color_caps); if (ret) return ret; } -- cgit v1.2.3 From 94aeb4117343d072e3a35b9595bcbfc0058ee724 Mon Sep 17 00:00:00 2001 From: "Wang, Beyond" Date: Tue, 12 Dec 2023 21:03:04 +0800 Subject: drm/amdgpu: fix ftrace event amdgpu_bo_move always move on same heap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Issue: during evict or validate happened on amdgpu_bo, the 'from' and 'to' is always same in ftrace event of amdgpu_bo_move where calling the 'trace_amdgpu_bo_move', the comment says move_notify is called before move happens, but actually it is called after move happens, here the new_mem is same as bo->resource Fix: move trace_amdgpu_bo_move from move_notify to amdgpu_bo_move Signed-off-by: Wang, Beyond Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 13 +------------ drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 4 +--- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 5 +++-- 3 files changed, 5 insertions(+), 17 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 5ad03f2afdb4..425cebcc5cbf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1245,19 +1245,15 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer, * amdgpu_bo_move_notify - notification about a memory move * @bo: pointer to a buffer object * @evict: if this move is evicting the buffer from the graphics address space - * @new_mem: new information of the bufer object * * Marks the corresponding &amdgpu_bo buffer object as invalid, also performs * bookkeeping. * TTM driver callback which is called when ttm moves a buffer. */ -void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, - bool evict, - struct ttm_resource *new_mem) +void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); struct amdgpu_bo *abo; - struct ttm_resource *old_mem = bo->resource; if (!amdgpu_bo_is_amdgpu_bo(bo)) return; @@ -1274,13 +1270,6 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, /* remember the eviction */ if (evict) atomic64_inc(&adev->num_evictions); - - /* update statistics */ - if (!new_mem) - return; - - /* move_notify is called before move happens */ - trace_amdgpu_bo_move(abo, new_mem->mem_type, old_mem->mem_type); } void amdgpu_bo_get_memory(struct amdgpu_bo *bo, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index d28e21baef16..a3ea8a82db23 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -344,9 +344,7 @@ int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata, int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer, size_t buffer_size, uint32_t *metadata_size, uint64_t *flags); -void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, - bool evict, - struct ttm_resource *new_mem); +void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict); void amdgpu_bo_release_notify(struct ttm_buffer_object *bo); vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo); void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index ab4a762aed5b..75c9fd2c6c2a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -545,10 +545,11 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, return r; } + trace_amdgpu_bo_move(abo, new_mem->mem_type, old_mem->mem_type); out: /* update statistics */ atomic64_add(bo->base.size, &adev->num_bytes_moved); - amdgpu_bo_move_notify(bo, evict, new_mem); + amdgpu_bo_move_notify(bo, evict); return 0; } @@ -1553,7 +1554,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, static void amdgpu_bo_delete_mem_notify(struct ttm_buffer_object *bo) { - amdgpu_bo_move_notify(bo, false, NULL); + amdgpu_bo_move_notify(bo, false); } static struct ttm_device_funcs amdgpu_bo_driver = { -- cgit v1.2.3 From a2f2f43f74cd050146cd2660bbc3c7e1e7c0da0b Mon Sep 17 00:00:00 2001 From: Peyton Lee Date: Tue, 12 Dec 2023 10:12:33 +0800 Subject: drm/amd/pm: support return vpe clock table pm supports return vpe clock table and soc clock table Signed-off-by: Peyton Lee Reviewed-by: Li Ma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dm_pp_smu.h | 2 ++ drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 10 ++++++++++ drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h | 1 + drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c | 20 ++++++++++++++++++++ 4 files changed, 33 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h index 4440d08743aa..bd7ba0a25198 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h +++ b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h @@ -247,6 +247,7 @@ struct pp_smu_funcs_nv { #define PP_SMU_NUM_MEMCLK_DPM_LEVELS 4 #define PP_SMU_NUM_DCLK_DPM_LEVELS 8 #define PP_SMU_NUM_VCLK_DPM_LEVELS 8 +#define PP_SMU_NUM_VPECLK_DPM_LEVELS 8 struct dpm_clock { uint32_t Freq; // In MHz @@ -262,6 +263,7 @@ struct dpm_clocks { struct dpm_clock MemClocks[PP_SMU_NUM_MEMCLK_DPM_LEVELS]; struct dpm_clock VClocks[PP_SMU_NUM_VCLK_DPM_LEVELS]; struct dpm_clock DClocks[PP_SMU_NUM_DCLK_DPM_LEVELS]; + struct dpm_clock VPEClocks[PP_SMU_NUM_VPECLK_DPM_LEVELS]; }; diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index 97b40c5fa1ff..6627ee07d52d 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -616,6 +616,16 @@ void amdgpu_dpm_enable_jpeg(struct amdgpu_device *adev, bool enable) enable ? "enable" : "disable", ret); } +void amdgpu_dpm_enable_vpe(struct amdgpu_device *adev, bool enable) +{ + int ret = 0; + + ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VPE, !enable); + if (ret) + DRM_ERROR("Dpm %s vpe failed, ret = %d.\n", + enable ? "enable" : "disable", ret); +} + int amdgpu_pm_load_smu_firmware(struct amdgpu_device *adev, uint32_t *smu_version) { const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h index d76b0a60db44..3047ffe7f244 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h @@ -445,6 +445,7 @@ void amdgpu_dpm_compute_clocks(struct amdgpu_device *adev); void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable); void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable); void amdgpu_dpm_enable_jpeg(struct amdgpu_device *adev, bool enable); +void amdgpu_dpm_enable_vpe(struct amdgpu_device *adev, bool enable); int amdgpu_pm_load_smu_firmware(struct amdgpu_device *adev, uint32_t *smu_version); int amdgpu_dpm_handle_passthrough_sbr(struct amdgpu_device *adev, bool enable); int amdgpu_dpm_send_hbm_bad_pages_num(struct amdgpu_device *adev, uint32_t size); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c index 94ccdbfd7090..47fdbae4adfc 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c @@ -1085,6 +1085,25 @@ static int smu_v14_0_0_set_umsch_mm_enable(struct smu_context *smu, 0, NULL); } +static int smu_14_0_0_get_dpm_table(struct smu_context *smu, struct dpm_clocks *clock_table) +{ + DpmClocks_t *clk_table = smu->smu_table.clocks_table; + uint8_t idx; + + /* Only the Clock information of SOC and VPE is copied to provide VPE DPM settings for use. */ + for (idx = 0; idx < NUM_SOCCLK_DPM_LEVELS; idx++) { + clock_table->SocClocks[idx].Freq = (idx < clk_table->NumSocClkLevelsEnabled) ? clk_table->SocClocks[idx]:0; + clock_table->SocClocks[idx].Vol = 0; + } + + for (idx = 0; idx < NUM_VPE_DPM_LEVELS; idx++) { + clock_table->VPEClocks[idx].Freq = (idx < clk_table->VpeClkLevelsEnabled) ? clk_table->VPEClocks[idx]:0; + clock_table->VPEClocks[idx].Vol = 0; + } + + return 0; +} + static const struct pptable_funcs smu_v14_0_0_ppt_funcs = { .check_fw_status = smu_v14_0_check_fw_status, .check_fw_version = smu_v14_0_check_fw_version, @@ -1115,6 +1134,7 @@ static const struct pptable_funcs smu_v14_0_0_ppt_funcs = { .set_gfx_power_up_by_imu = smu_v14_0_set_gfx_power_up_by_imu, .dpm_set_vpe_enable = smu_v14_0_0_set_vpe_enable, .dpm_set_umsch_mm_enable = smu_v14_0_0_set_umsch_mm_enable, + .get_dpm_clock_table = smu_14_0_0_get_dpm_table, }; static void smu_v14_0_0_set_smu_mailbox_registers(struct smu_context *smu) -- cgit v1.2.3 From cb19dc4a64598ffbfd4354083f809fae082fa4c3 Mon Sep 17 00:00:00 2001 From: Joshua Ashton Date: Thu, 16 Nov 2023 18:58:09 -0100 Subject: drm/amd/display: copy 3D LUT settings from crtc state to stream_update When commiting planes, we copy color mgmt resources to the stream state. Do the same for shaper and 3D LUTs. Reviewed-by: Harry Wentland Signed-off-by: Joshua Ashton Co-developed-by: Melissa Wen Signed-off-by: Melissa Wen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 7fd85ec7f811..25ec9129758b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -8483,6 +8483,10 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, &acrtc_state->stream->csc_color_matrix; bundle->stream_update.out_transfer_func = acrtc_state->stream->out_transfer_func; + bundle->stream_update.lut3d_func = + (struct dc_3dlut *) acrtc_state->stream->lut3d_func; + bundle->stream_update.func_shaper = + (struct dc_transfer_func *) acrtc_state->stream->func_shaper; } acrtc_state->stream->abm_level = acrtc_state->abm_level; -- cgit v1.2.3 From b8b92c1bd7788b1f13d547ee2ce8a93baf55b814 Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Thu, 16 Nov 2023 18:58:10 -0100 Subject: drm/amd/display: add plane CTM driver-specific property Plane CTM for pre-blending color space conversion. Only enable driver-specific plane CTM property on drivers that support both pre- and post-blending gamut remap matrix, i.e., DCN3+ family. Otherwise it conflits with DRM CRTC CTM property. Reviewed-by: Harry Wentland Signed-off-by: Melissa Wen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | 2 ++ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 7 +++++++ .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 7 +++++++ .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c | 21 +++++++++++++++++++++ 4 files changed, 37 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 790c08b8262d..2e4911050cc5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -364,6 +364,8 @@ struct amdgpu_mode_info { * @plane_hdr_mult_property: */ struct drm_property *plane_hdr_mult_property; + + struct drm_property *plane_ctm_property; /** * @shaper_lut_property: Plane property to set pre-blending shaper LUT * that converts color content before 3D LUT. If diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 7706d412d206..2d5af83d40b5 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -784,6 +784,13 @@ struct dm_plane_state { * TF is needed for any subsequent linear-to-non-linear transforms. */ __u64 hdr_mult; + /** + * @ctm: + * + * Color transformation matrix. The blob (if not NULL) is a &struct + * drm_color_ctm_3x4. + */ + struct drm_property_blob *ctm; /** * @shaper_lut: shaper lookup table blob. The blob (if not NULL) is an * array of &struct drm_color_lut. diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 3a1ca13eaee4..3eed47736b26 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -240,6 +240,13 @@ amdgpu_dm_create_color_properties(struct amdgpu_device *adev) return -ENOMEM; adev->mode_info.plane_hdr_mult_property = prop; + prop = drm_property_create(adev_to_drm(adev), + DRM_MODE_PROP_BLOB, + "AMD_PLANE_CTM", 0); + if (!prop) + return -ENOMEM; + adev->mode_info.plane_ctm_property = prop; + prop = drm_property_create(adev_to_drm(adev), DRM_MODE_PROP_BLOB, "AMD_PLANE_SHAPER_LUT", 0); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index 35a4732483ba..f10c5154d06a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -1366,6 +1366,9 @@ static struct drm_plane_state *amdgpu_dm_plane_drm_plane_duplicate_state(struct if (old_dm_plane_state->degamma_lut) dm_plane_state->degamma_lut = drm_property_blob_get(old_dm_plane_state->degamma_lut); + if (old_dm_plane_state->ctm) + dm_plane_state->ctm = + drm_property_blob_get(old_dm_plane_state->ctm); if (old_dm_plane_state->shaper_lut) dm_plane_state->shaper_lut = drm_property_blob_get(old_dm_plane_state->shaper_lut); @@ -1450,6 +1453,8 @@ static void amdgpu_dm_plane_drm_plane_destroy_state(struct drm_plane *plane, if (dm_plane_state->degamma_lut) drm_property_blob_put(dm_plane_state->degamma_lut); + if (dm_plane_state->ctm) + drm_property_blob_put(dm_plane_state->ctm); if (dm_plane_state->lut3d) drm_property_blob_put(dm_plane_state->lut3d); if (dm_plane_state->shaper_lut) @@ -1490,6 +1495,11 @@ dm_atomic_plane_attach_color_mgmt_properties(struct amdgpu_display_manager *dm, dm->adev->mode_info.plane_hdr_mult_property, AMDGPU_HDR_MULT_DEFAULT); + /* Only enable plane CTM if both DPP and MPC gamut remap is available. */ + if (dm->dc->caps.color.mpc.gamut_remap) + drm_object_attach_property(&plane->base, + dm->adev->mode_info.plane_ctm_property, 0); + if (dpp_color_caps.hw_3d_lut) { drm_object_attach_property(&plane->base, mode_info.plane_shaper_lut_property, 0); @@ -1547,6 +1557,14 @@ dm_atomic_plane_set_property(struct drm_plane *plane, dm_plane_state->hdr_mult = val; dm_plane_state->base.color_mgmt_changed = 1; } + } else if (property == adev->mode_info.plane_ctm_property) { + ret = drm_property_replace_blob_from_id(plane->dev, + &dm_plane_state->ctm, + val, + sizeof(struct drm_color_ctm), -1, + &replaced); + dm_plane_state->base.color_mgmt_changed |= replaced; + return ret; } else if (property == adev->mode_info.plane_shaper_lut_property) { ret = drm_property_replace_blob_from_id(plane->dev, &dm_plane_state->shaper_lut, @@ -1608,6 +1626,9 @@ dm_atomic_plane_get_property(struct drm_plane *plane, *val = dm_plane_state->degamma_tf; } else if (property == adev->mode_info.plane_hdr_mult_property) { *val = dm_plane_state->hdr_mult; + } else if (property == adev->mode_info.plane_ctm_property) { + *val = (dm_plane_state->ctm) ? + dm_plane_state->ctm->base.id : 0; } else if (property == adev->mode_info.plane_shaper_lut_property) { *val = (dm_plane_state->shaper_lut) ? dm_plane_state->shaper_lut->base.id : 0; -- cgit v1.2.3 From 5f82a0c90ccaf0d1390b5c1b83a83d38bca526da Mon Sep 17 00:00:00 2001 From: Peyton Lee Date: Tue, 12 Dec 2023 11:11:24 +0800 Subject: drm/amdgpu/vpe: enable vpe dpm enable vpe dpm Signed-off-by: Peyton Lee Reviewed-by: Lang Yu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c | 249 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h | 12 ++ drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c | 15 ++ 3 files changed, 276 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c index e81579708e96..6f149b54d4d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c @@ -26,6 +26,7 @@ #include "amdgpu.h" #include "amdgpu_ucode.h" #include "amdgpu_vpe.h" +#include "amdgpu_smu.h" #include "soc15_common.h" #include "vpe_v6_1.h" @@ -33,8 +34,186 @@ /* VPE CSA resides in the 4th page of CSA */ #define AMDGPU_CSA_VPE_OFFSET (4096 * 3) +/* 1 second timeout */ +#define VPE_IDLE_TIMEOUT msecs_to_jiffies(1000) + +#define VPE_MAX_DPM_LEVEL 4 +#define FIXED1_8_BITS_PER_FRACTIONAL_PART 8 +#define GET_PRATIO_INTEGER_PART(x) ((x) >> FIXED1_8_BITS_PER_FRACTIONAL_PART) + static void vpe_set_ring_funcs(struct amdgpu_device *adev); +static inline uint16_t div16_u16_rem(uint16_t dividend, uint16_t divisor, uint16_t *remainder) +{ + *remainder = dividend % divisor; + return dividend / divisor; +} + +static inline uint16_t complete_integer_division_u16( + uint16_t dividend, + uint16_t divisor, + uint16_t *remainder) +{ + return div16_u16_rem(dividend, divisor, (uint16_t *)remainder); +} + +static uint16_t vpe_u1_8_from_fraction(uint16_t numerator, uint16_t denominator) +{ + bool arg1_negative = numerator < 0; + bool arg2_negative = denominator < 0; + + uint16_t arg1_value = (uint16_t)(arg1_negative ? -numerator : numerator); + uint16_t arg2_value = (uint16_t)(arg2_negative ? -denominator : denominator); + + uint16_t remainder; + + /* determine integer part */ + uint16_t res_value = complete_integer_division_u16( + arg1_value, arg2_value, &remainder); + + if (res_value > 127 /* CHAR_MAX */) + return 0; + + /* determine fractional part */ + { + unsigned int i = FIXED1_8_BITS_PER_FRACTIONAL_PART; + + do { + remainder <<= 1; + + res_value <<= 1; + + if (remainder >= arg2_value) { + res_value |= 1; + remainder -= arg2_value; + } + } while (--i != 0); + } + + /* round up LSB */ + { + uint16_t summand = (remainder << 1) >= arg2_value; + + if ((res_value + summand) > 32767 /* SHRT_MAX */) + return 0; + + res_value += summand; + } + + if (arg1_negative ^ arg2_negative) + res_value = -res_value; + + return res_value; +} + +static uint16_t vpe_internal_get_pratio(uint16_t from_frequency, uint16_t to_frequency) +{ + uint16_t pratio = vpe_u1_8_from_fraction(from_frequency, to_frequency); + + if (GET_PRATIO_INTEGER_PART(pratio) > 1) + pratio = 0; + + return pratio; +} + +/* + * VPE has 4 DPM levels from level 0 (lowerest) to 3 (highest), + * VPE FW will dynamically decide which level should be used according to current loading. + * + * Get VPE and SOC clocks from PM, and select the appropriate four clock values, + * calculate the ratios of adjusting from one clock to another. + * The VPE FW can then request the appropriate frequency from the PMFW. + */ +int amdgpu_vpe_configure_dpm(struct amdgpu_vpe *vpe) +{ + struct amdgpu_device *adev = vpe->ring.adev; + uint32_t dpm_ctl; + + if (adev->pm.dpm_enabled) { + struct dpm_clocks clock_table = { 0 }; + struct dpm_clock *VPEClks; + struct dpm_clock *SOCClks; + uint32_t idx; + uint32_t pratio_vmax_vnorm = 0, pratio_vnorm_vmid = 0, pratio_vmid_vmin = 0; + uint16_t pratio_vmin_freq = 0, pratio_vmid_freq = 0, pratio_vnorm_freq = 0, pratio_vmax_freq = 0; + + dpm_ctl = RREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_enable)); + dpm_ctl |= 1; /* DPM enablement */ + WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_enable), dpm_ctl); + + /* Get VPECLK and SOCCLK */ + if (amdgpu_dpm_get_dpm_clock_table(adev, &clock_table)) { + dev_dbg(adev->dev, "%s: get clock failed!\n", __func__); + goto disable_dpm; + } + + SOCClks = clock_table.SocClocks; + VPEClks = clock_table.VPEClocks; + + /* vpe dpm only cares 4 levels. */ + for (idx = 0; idx < VPE_MAX_DPM_LEVEL; idx++) { + uint32_t soc_dpm_level; + uint32_t min_freq; + + if (idx == 0) + soc_dpm_level = 0; + else + soc_dpm_level = (idx * 2) + 1; + + /* clamp the max level */ + if (soc_dpm_level > PP_SMU_NUM_VPECLK_DPM_LEVELS - 1) + soc_dpm_level = PP_SMU_NUM_VPECLK_DPM_LEVELS - 1; + + min_freq = (SOCClks[soc_dpm_level].Freq < VPEClks[soc_dpm_level].Freq) ? + SOCClks[soc_dpm_level].Freq : VPEClks[soc_dpm_level].Freq; + + switch (idx) { + case 0: + pratio_vmin_freq = min_freq; + break; + case 1: + pratio_vmid_freq = min_freq; + break; + case 2: + pratio_vnorm_freq = min_freq; + break; + case 3: + pratio_vmax_freq = min_freq; + break; + default: + break; + } + } + + if (pratio_vmin_freq && pratio_vmid_freq && pratio_vnorm_freq && pratio_vmax_freq) { + uint32_t pratio_ctl; + + pratio_vmax_vnorm = (uint32_t)vpe_internal_get_pratio(pratio_vmax_freq, pratio_vnorm_freq); + pratio_vnorm_vmid = (uint32_t)vpe_internal_get_pratio(pratio_vnorm_freq, pratio_vmid_freq); + pratio_vmid_vmin = (uint32_t)vpe_internal_get_pratio(pratio_vmid_freq, pratio_vmin_freq); + + pratio_ctl = pratio_vmax_vnorm | (pratio_vnorm_vmid << 9) | (pratio_vmid_vmin << 18); + WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_pratio), pratio_ctl); /* PRatio */ + WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_request_interval), 24000); /* 1ms, unit=1/24MHz */ + WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_decision_threshold), 1200000); /* 50ms */ + WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_busy_clamp_threshold), 1200000);/* 50ms */ + WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_idle_clamp_threshold), 1200000);/* 50ms */ + dev_dbg(adev->dev, "%s: configure vpe dpm pratio done!\n", __func__); + } else { + dev_dbg(adev->dev, "%s: invalid pratio parameters!\n", __func__); + goto disable_dpm; + } + } + return 0; + +disable_dpm: + dpm_ctl = RREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_enable)); + dpm_ctl &= 0xfffffffe; /* Disable DPM */ + WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_enable), dpm_ctl); + dev_dbg(adev->dev, "%s: disable vpe dpm\n", __func__); + return 0; +} + int amdgpu_vpe_psp_update_sram(struct amdgpu_device *adev) { struct amdgpu_firmware_info ucode = { @@ -134,6 +313,19 @@ static int vpe_early_init(void *handle) return 0; } +static void vpe_idle_work_handler(struct work_struct *work) +{ + struct amdgpu_device *adev = + container_of(work, struct amdgpu_device, vpe.idle_work.work); + unsigned int fences = 0; + + fences += amdgpu_fence_count_emitted(&adev->vpe.ring); + + if (fences == 0) + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, AMD_PG_STATE_GATE); + else + schedule_delayed_work(&adev->vpe.idle_work, VPE_IDLE_TIMEOUT); +} static int vpe_common_init(struct amdgpu_vpe *vpe) { @@ -150,6 +342,9 @@ static int vpe_common_init(struct amdgpu_vpe *vpe) return r; } + vpe->context_started = false; + INIT_DELAYED_WORK(&adev->vpe.idle_work, vpe_idle_work_handler); + return 0; } @@ -219,6 +414,9 @@ static int vpe_hw_fini(void *handle) vpe_ring_stop(vpe); + /* Power off VPE */ + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, AMD_PG_STATE_GATE); + return 0; } @@ -226,6 +424,8 @@ static int vpe_suspend(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + cancel_delayed_work_sync(&adev->vpe.idle_work); + return vpe_hw_fini(adev); } @@ -430,6 +630,21 @@ static int vpe_set_clockgating_state(void *handle, static int vpe_set_powergating_state(void *handle, enum amd_powergating_state state) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_vpe *vpe = &adev->vpe; + + if (!adev->pm.dpm_enabled) + dev_err(adev->dev, "Without PM, cannot support powergating\n"); + + dev_dbg(adev->dev, "%s: %s!\n", __func__, (state == AMD_PG_STATE_GATE) ? "GATE":"UNGATE"); + + if (state == AMD_PG_STATE_GATE) { + amdgpu_dpm_enable_vpe(adev, false); + vpe->context_started = false; + } else { + amdgpu_dpm_enable_vpe(adev, true); + } + return 0; } @@ -595,6 +810,38 @@ err0: return ret; } +static void vpe_ring_begin_use(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_vpe *vpe = &adev->vpe; + + cancel_delayed_work_sync(&adev->vpe.idle_work); + + /* Power on VPE and notify VPE of new context */ + if (!vpe->context_started) { + uint32_t context_notify; + + /* Power on VPE */ + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, AMD_PG_STATE_UNGATE); + + /* Indicates that a job from a new context has been submitted. */ + context_notify = RREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.context_indicator)); + if ((context_notify & 0x1) == 0) + context_notify |= 0x1; + else + context_notify &= ~(0x1); + WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.context_indicator), context_notify); + vpe->context_started = true; + } +} + +static void vpe_ring_end_use(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + schedule_delayed_work(&adev->vpe.idle_work, VPE_IDLE_TIMEOUT); +} + static const struct amdgpu_ring_funcs vpe_ring_funcs = { .type = AMDGPU_RING_TYPE_VPE, .align_mask = 0xf, @@ -625,6 +872,8 @@ static const struct amdgpu_ring_funcs vpe_ring_funcs = { .init_cond_exec = vpe_ring_init_cond_exec, .patch_cond_exec = vpe_ring_patch_cond_exec, .preempt_ib = vpe_ring_preempt_ib, + .begin_use = vpe_ring_begin_use, + .end_use = vpe_ring_end_use, }; static void vpe_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h index 29d56f7ae4a9..1153ddaea64d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h @@ -47,6 +47,15 @@ struct vpe_regs { uint32_t queue0_rb_wptr_lo; uint32_t queue0_rb_wptr_hi; uint32_t queue0_preempt; + + uint32_t dpm_enable; + uint32_t dpm_pratio; + uint32_t dpm_request_interval; + uint32_t dpm_decision_threshold; + uint32_t dpm_busy_clamp_threshold; + uint32_t dpm_idle_clamp_threshold; + uint32_t dpm_request_lv; + uint32_t context_indicator; }; struct amdgpu_vpe { @@ -63,12 +72,15 @@ struct amdgpu_vpe { struct amdgpu_bo *cmdbuf_obj; uint64_t cmdbuf_gpu_addr; uint32_t *cmdbuf_cpu_addr; + struct delayed_work idle_work; + bool context_started; }; int amdgpu_vpe_psp_update_sram(struct amdgpu_device *adev); int amdgpu_vpe_init_microcode(struct amdgpu_vpe *vpe); int amdgpu_vpe_ring_init(struct amdgpu_vpe *vpe); int amdgpu_vpe_ring_fini(struct amdgpu_vpe *vpe); +int amdgpu_vpe_configure_dpm(struct amdgpu_vpe *vpe); #define vpe_ring_init(vpe) ((vpe)->funcs->ring_init ? (vpe)->funcs->ring_init((vpe)) : 0) #define vpe_ring_start(vpe) ((vpe)->funcs->ring_start ? (vpe)->funcs->ring_start((vpe)) : 0) diff --git a/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c b/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c index 174f13eff575..d20060a51e05 100644 --- a/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c @@ -96,6 +96,10 @@ static int vpe_v6_1_load_microcode(struct amdgpu_vpe *vpe) adev->vpe.cmdbuf_cpu_addr[1] = f32_cntl; amdgpu_vpe_psp_update_sram(adev); + + /* Config DPM */ + amdgpu_vpe_configure_dpm(vpe); + return 0; } @@ -128,6 +132,8 @@ static int vpe_v6_1_load_microcode(struct amdgpu_vpe *vpe) } vpe_v6_1_halt(vpe, false); + /* Config DPM */ + amdgpu_vpe_configure_dpm(vpe); return 0; } @@ -264,6 +270,15 @@ static int vpe_v6_1_set_regs(struct amdgpu_vpe *vpe) vpe->regs.queue0_rb_wptr_hi = regVPEC_QUEUE0_RB_WPTR_HI; vpe->regs.queue0_preempt = regVPEC_QUEUE0_PREEMPT; + vpe->regs.dpm_enable = regVPEC_PUB_DUMMY2; + vpe->regs.dpm_pratio = regVPEC_QUEUE6_DUMMY4; + vpe->regs.dpm_request_interval = regVPEC_QUEUE5_DUMMY3; + vpe->regs.dpm_decision_threshold = regVPEC_QUEUE5_DUMMY4; + vpe->regs.dpm_busy_clamp_threshold = regVPEC_QUEUE7_DUMMY2; + vpe->regs.dpm_idle_clamp_threshold = regVPEC_QUEUE7_DUMMY3; + vpe->regs.dpm_request_lv = regVPEC_QUEUE7_DUMMY1; + vpe->regs.context_indicator = regVPEC_QUEUE6_DUMMY3; + return 0; } -- cgit v1.2.3 From 34dc227bf2f34085313be39d76b12f08bfe8efc0 Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Wed, 13 Dec 2023 14:58:34 +0800 Subject: drm/amd/pm: add power save mode workload for smu 13.0.10 add power save mode workload for smu 13.0.10, so that in compute mode, pmfw will add margin since some applications requres higher margin. Signed-off-by: Kenneth Feng Reviewed-by: Likun Gao Signed-off-by: Alex Deucher --- .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 23 ++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index a24aa886c636..231122622a9c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -2545,16 +2545,19 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, workload_mask = 1 << workload_type; - /* Add optimizations for SMU13.0.0. Reuse the power saving profile */ - if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_COMPUTE && - (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 0)) && - ((smu->adev->pm.fw_version == 0x004e6601) || - (smu->adev->pm.fw_version >= 0x004e7400))) { - workload_type = smu_cmn_to_asic_specific_index(smu, - CMN2ASIC_MAPPING_WORKLOAD, - PP_SMC_POWER_PROFILE_POWERSAVING); - if (workload_type >= 0) - workload_mask |= 1 << workload_type; + /* Add optimizations for SMU13.0.0/10. Reuse the power saving profile */ + if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_COMPUTE) { + if ((amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 0) && + ((smu->adev->pm.fw_version == 0x004e6601) || + (smu->adev->pm.fw_version >= 0x004e7300))) || + (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) && + smu->adev->pm.fw_version >= 0x00504500)) { + workload_type = smu_cmn_to_asic_specific_index(smu, + CMN2ASIC_MAPPING_WORKLOAD, + PP_SMC_POWER_PROFILE_POWERSAVING); + if (workload_type >= 0) + workload_mask |= 1 << workload_type; + } } return smu_cmn_send_smc_msg_with_param(smu, -- cgit v1.2.3 From 3dad69090743c5f4642aeb628b8542a1e335dded Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Thu, 16 Nov 2023 18:58:11 -0100 Subject: drm/amd/display: add plane CTM support Map the plane CTM driver-specific property to DC plane, instead of DC stream. The remaining steps to program DPP block are already implemented on DC shared-code. v3: - fix comment about plane and CRTC CTMs priorities (Harry) Reviewed-by: Harry Wentland Signed-off-by: Melissa Wen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 + .../drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 26 ++++++++++++++++++++++ 2 files changed, 27 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 25ec9129758b..d7ac020bd8af 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -9983,6 +9983,7 @@ static bool should_reset_plane(struct drm_atomic_state *state, if (dm_old_other_state->degamma_tf != dm_new_other_state->degamma_tf || dm_old_other_state->degamma_lut != dm_new_other_state->degamma_lut || dm_old_other_state->hdr_mult != dm_new_other_state->hdr_mult || + dm_old_other_state->ctm != dm_new_other_state->ctm || dm_old_other_state->shaper_lut != dm_new_other_state->shaper_lut || dm_old_other_state->shaper_tf != dm_new_other_state->shaper_tf || dm_old_other_state->lut3d != dm_new_other_state->lut3d || diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 3eed47736b26..d52c3333ea13 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -1172,6 +1172,8 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, struct dc_plane_state *dc_plane_state) { struct amdgpu_device *adev = drm_to_adev(crtc->base.state->dev); + struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state); + struct drm_color_ctm *ctm = NULL; struct dc_color_caps *color_caps = NULL; bool has_crtc_cm_degamma; int ret; @@ -1224,5 +1226,29 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, return ret; } + /* Setup CRTC CTM. */ + if (dm_plane_state->ctm) { + ctm = (struct drm_color_ctm *)dm_plane_state->ctm->data; + /* + * DCN2 and older don't support both pre-blending and + * post-blending gamut remap. For this HW family, if we have + * the plane and CRTC CTMs simultaneously, CRTC CTM takes + * priority, and we discard plane CTM, as implemented in + * dcn10_program_gamut_remap(). However, DCN3+ has DPP + * (pre-blending) and MPC (post-blending) `gamut remap` blocks; + * therefore, we can program plane and CRTC CTMs together by + * mapping CRTC CTM to MPC and keeping plane CTM setup at DPP, + * as it's done by dcn30_program_gamut_remap(). + */ + __drm_ctm_to_dc_matrix(ctm, dc_plane_state->gamut_remap_matrix.matrix); + + dc_plane_state->gamut_remap_matrix.enable_remap = true; + dc_plane_state->input_csc_color_matrix.enable_adjustment = false; + } else { + /* Bypass CTM. */ + dc_plane_state->gamut_remap_matrix.enable_remap = false; + dc_plane_state->input_csc_color_matrix.enable_adjustment = false; + } + return amdgpu_dm_plane_set_color_properties(plane_state, dc_plane_state); } -- cgit v1.2.3 From 6872a189be508b9383bc081d462a5d99cbb8319d Mon Sep 17 00:00:00 2001 From: Joshua Ashton Date: Thu, 16 Nov 2023 18:58:12 -0100 Subject: drm/amd/display: Add 3x4 CTM support for plane CTM Create drm_color_ctm_3x4 to support 3x4-dimension plane CTM matrix and convert DRM CTM to DC CSC float matrix. v3: - rename ctm2 to ctm_3x4 (Harry) Reviewed-by: Harry Wentland Signed-off-by: Joshua Ashton Signed-off-by: Alex Deucher --- .../drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 28 +++++++++++++++++++--- .../drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c | 2 +- include/uapi/drm/drm_mode.h | 8 +++++++ 3 files changed, 34 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index d52c3333ea13..96aecc1a71a3 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -434,6 +434,28 @@ static void __drm_ctm_to_dc_matrix(const struct drm_color_ctm *ctm, } } +/** + * __drm_ctm_3x4_to_dc_matrix - converts a DRM CTM 3x4 to a DC CSC float matrix + * @ctm: DRM color transformation matrix with 3x4 dimensions + * @matrix: DC CSC float matrix + * + * The matrix needs to be a 3x4 (12 entry) matrix. + */ +static void __drm_ctm_3x4_to_dc_matrix(const struct drm_color_ctm_3x4 *ctm, + struct fixed31_32 *matrix) +{ + int i; + + /* The format provided is S31.32, using signed-magnitude representation. + * Our fixed31_32 is also S31.32, but is using 2's complement. We have + * to convert from signed-magnitude to 2's complement. + */ + for (i = 0; i < 12; i++) { + /* gamut_remap_matrix[i] = ctm[i - floor(i/4)] */ + matrix[i] = dc_fixpt_from_s3132(ctm->matrix[i]); + } +} + /** * __set_legacy_tf - Calculates the legacy transfer function * @func: transfer function @@ -1173,7 +1195,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, { struct amdgpu_device *adev = drm_to_adev(crtc->base.state->dev); struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state); - struct drm_color_ctm *ctm = NULL; + struct drm_color_ctm_3x4 *ctm = NULL; struct dc_color_caps *color_caps = NULL; bool has_crtc_cm_degamma; int ret; @@ -1228,7 +1250,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, /* Setup CRTC CTM. */ if (dm_plane_state->ctm) { - ctm = (struct drm_color_ctm *)dm_plane_state->ctm->data; + ctm = (struct drm_color_ctm_3x4 *)dm_plane_state->ctm->data; /* * DCN2 and older don't support both pre-blending and * post-blending gamut remap. For this HW family, if we have @@ -1240,7 +1262,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, * mapping CRTC CTM to MPC and keeping plane CTM setup at DPP, * as it's done by dcn30_program_gamut_remap(). */ - __drm_ctm_to_dc_matrix(ctm, dc_plane_state->gamut_remap_matrix.matrix); + __drm_ctm_3x4_to_dc_matrix(ctm, dc_plane_state->gamut_remap_matrix.matrix); dc_plane_state->gamut_remap_matrix.enable_remap = true; dc_plane_state->input_csc_color_matrix.enable_adjustment = false; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index f10c5154d06a..8a4c40b4c27e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -1561,7 +1561,7 @@ dm_atomic_plane_set_property(struct drm_plane *plane, ret = drm_property_replace_blob_from_id(plane->dev, &dm_plane_state->ctm, val, - sizeof(struct drm_color_ctm), -1, + sizeof(struct drm_color_ctm_3x4), -1, &replaced); dm_plane_state->base.color_mgmt_changed |= replaced; return ret; diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index 95630f170110..39d9ac0c0a80 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -846,6 +846,14 @@ struct drm_color_ctm { __u64 matrix[9]; }; +struct drm_color_ctm_3x4 { + /* + * Conversion matrix with 3x4 dimensions in S31.32 sign-magnitude + * (not two's complement!) format. + */ + __u64 matrix[12]; +}; + struct drm_color_lut { /* * Values are mapped linearly to 0.0 - 1.0 range, with 0x0 == 0.0 and -- cgit v1.2.3 From afe58346d5d3887b3e49ff623d2f2e471f232a8d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 27 Nov 2023 17:26:29 -0500 Subject: drm/amdgpu/debugfs: fix error code when smc register accessors are NULL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Should be -EOPNOTSUPP. Fixes: 5104fdf50d32 ("drm/amdgpu: Fix a null pointer access when the smc_rreg pointer is NULL") Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 424bed738296..2cebf2145d9a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -755,7 +755,7 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf, int r; if (!adev->smc_rreg) - return -EPERM; + return -EOPNOTSUPP; if (size & 0x3 || *pos & 0x3) return -EINVAL; @@ -814,7 +814,7 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user * int r; if (!adev->smc_wreg) - return -EPERM; + return -EOPNOTSUPP; if (size & 0x3 || *pos & 0x3) return -EINVAL; -- cgit v1.2.3 From 804c49ef30735d70c1df0c58ebec313149a3933c Mon Sep 17 00:00:00 2001 From: Yang Li Date: Thu, 14 Dec 2023 09:01:54 +0800 Subject: drm/amd/pm: Remove unneeded semicolon ./drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c:1418:2-3: Unneeded semicolon Reported-by: Abaci Robot Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=7743 Signed-off-by: Yang Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index d409857fd622..c16703868e5c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -1415,7 +1415,7 @@ static int smu_wbrf_event_handler(struct notifier_block *nb, break; default: return NOTIFY_DONE; - }; + } return NOTIFY_OK; } -- cgit v1.2.3 From ac16667237a82e2597e329eb9bc520d1cf9dff30 Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Thu, 14 Dec 2023 23:24:11 +0800 Subject: drm/amd/pm: fix a double-free in si_dpm_init When the allocation of adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries fails, amdgpu_free_extended_power_table is called to free some fields of adev. However, when the control flow returns to si_dpm_sw_init, it goes to label dpm_failed and calls si_dpm_fini, which calls amdgpu_free_extended_power_table again and free those fields again. Thus a double-free is triggered. Fixes: 841686df9f7d ("drm/amdgpu: add SI DPM support (v4)") Signed-off-by: Zhipeng Lu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c index fc8e4ac6c8e7..df4f20293c16 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c @@ -7379,10 +7379,9 @@ static int si_dpm_init(struct amdgpu_device *adev) kcalloc(4, sizeof(struct amdgpu_clock_voltage_dependency_entry), GFP_KERNEL); - if (!adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries) { - amdgpu_free_extended_power_table(adev); + if (!adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries) return -ENOMEM; - } + adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.count = 4; adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[0].clk = 0; adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[0].v = 0; -- cgit v1.2.3 From 28dd788382c43b330480f57cd34cde0840896743 Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Fri, 15 Dec 2023 00:24:58 +0800 Subject: drivers/amd/pm: fix a use-after-free in kv_parse_power_table When ps allocated by kzalloc equals to NULL, kv_parse_power_table frees adev->pm.dpm.ps that allocated before. However, after the control flow goes through the following call chains: kv_parse_power_table |-> kv_dpm_init |-> kv_dpm_sw_init |-> kv_dpm_fini The adev->pm.dpm.ps is used in the for loop of kv_dpm_fini after its first free in kv_parse_power_table and causes a use-after-free bug. Fixes: a2e73f56fa62 ("drm/amdgpu: Add support for CIK parts") Signed-off-by: Zhipeng Lu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c index 5d28c951a319..5cb4725c773f 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c @@ -2735,10 +2735,8 @@ static int kv_parse_power_table(struct amdgpu_device *adev) non_clock_info = (struct _ATOM_PPLIB_NONCLOCK_INFO *) &non_clock_info_array->nonClockInfo[non_clock_array_index]; ps = kzalloc(sizeof(struct kv_ps), GFP_KERNEL); - if (ps == NULL) { - kfree(adev->pm.dpm.ps); + if (ps == NULL) return -ENOMEM; - } adev->pm.dpm.ps[i].ps_priv = ps; k = 0; idx = (u8 *)&power_state->v2.clockInfoIndex[0]; -- cgit v1.2.3 From c2709b2d6a537ca0fa0f1da36fdaf07e48ef447d Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Fri, 15 Dec 2023 00:58:42 +0800 Subject: gpu/drm/radeon: fix two memleaks in radeon_vm_init When radeon_bo_create and radeon_vm_clear_bo fail, the vm->page_tables allocated before need to be freed. However, neither radeon_vm_init itself nor its caller have done such deallocation. Fixes: 6d2f2944e95e ("drm/radeon: use normal BOs for the page tables v4") Signed-off-by: Zhipeng Lu Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_vm.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 987cabbf1318..c38b4d5d6a14 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -1204,13 +1204,17 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) r = radeon_bo_create(rdev, pd_size, align, true, RADEON_GEM_DOMAIN_VRAM, 0, NULL, NULL, &vm->page_directory); - if (r) + if (r) { + kfree(vm->page_tables); + vm->page_tables = NULL; return r; - + } r = radeon_vm_clear_bo(rdev, vm->page_directory); if (r) { radeon_bo_unref(&vm->page_directory); vm->page_directory = NULL; + kfree(vm->page_tables); + vm->page_tables = NULL; return r; } -- cgit v1.2.3 From a6582701178a47c4d0cb2188c965c59c0c0647c8 Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Fri, 15 Dec 2023 00:59:38 +0800 Subject: drm/amd/pm: fix a double-free in amdgpu_parse_extended_power_table The amdgpu_free_extended_power_table is called in every error-handling paths of amdgpu_parse_extended_power_table. However, after the following call chain of returning: amdgpu_parse_extended_power_table |-> kv_dpm_init / si_dpm_init (the only two caller of amdgpu_parse_extended_power_table) |-> kv_dpm_sw_init / si_dpm_sw_init (the only caller of kv_dpm_init / si_dpm_init, accordingly) |-> kv_dpm_fini / si_dpm_fini (goto dpm_failed in xx_dpm_sw_init) |-> amdgpu_free_extended_power_table As above, the amdgpu_free_extended_power_table is called twice in this returning chain and thus a double-free is triggered. Similarily, the last kfree in amdgpu_parse_extended_power_table also cause a double free with amdgpu_free_extended_power_table in kv_dpm_fini. Fixes: 84176663e70d ("drm/amd/pm: create a new holder for those APIs used only by legacy ASICs(si/kv)") Signed-off-by: Zhipeng Lu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c | 52 +++++++------------------- 1 file changed, 13 insertions(+), 39 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c index 81fb4e5dd804..60377747bab4 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c @@ -272,10 +272,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev) le16_to_cpu(power_info->pplib4.usVddcDependencyOnSCLKOffset)); ret = amdgpu_parse_clk_voltage_dep_table(&adev->pm.dpm.dyn_state.vddc_dependency_on_sclk, dep_table); - if (ret) { - amdgpu_free_extended_power_table(adev); + if (ret) return ret; - } } if (power_info->pplib4.usVddciDependencyOnMCLKOffset) { dep_table = (ATOM_PPLIB_Clock_Voltage_Dependency_Table *) @@ -283,10 +281,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev) le16_to_cpu(power_info->pplib4.usVddciDependencyOnMCLKOffset)); ret = amdgpu_parse_clk_voltage_dep_table(&adev->pm.dpm.dyn_state.vddci_dependency_on_mclk, dep_table); - if (ret) { - amdgpu_free_extended_power_table(adev); + if (ret) return ret; - } } if (power_info->pplib4.usVddcDependencyOnMCLKOffset) { dep_table = (ATOM_PPLIB_Clock_Voltage_Dependency_Table *) @@ -294,10 +290,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev) le16_to_cpu(power_info->pplib4.usVddcDependencyOnMCLKOffset)); ret = amdgpu_parse_clk_voltage_dep_table(&adev->pm.dpm.dyn_state.vddc_dependency_on_mclk, dep_table); - if (ret) { - amdgpu_free_extended_power_table(adev); + if (ret) return ret; - } } if (power_info->pplib4.usMvddDependencyOnMCLKOffset) { dep_table = (ATOM_PPLIB_Clock_Voltage_Dependency_Table *) @@ -305,10 +299,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev) le16_to_cpu(power_info->pplib4.usMvddDependencyOnMCLKOffset)); ret = amdgpu_parse_clk_voltage_dep_table(&adev->pm.dpm.dyn_state.mvdd_dependency_on_mclk, dep_table); - if (ret) { - amdgpu_free_extended_power_table(adev); + if (ret) return ret; - } } if (power_info->pplib4.usMaxClockVoltageOnDCOffset) { ATOM_PPLIB_Clock_Voltage_Limit_Table *clk_v = @@ -339,10 +331,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev) kcalloc(psl->ucNumEntries, sizeof(struct amdgpu_phase_shedding_limits_entry), GFP_KERNEL); - if (!adev->pm.dpm.dyn_state.phase_shedding_limits_table.entries) { - amdgpu_free_extended_power_table(adev); + if (!adev->pm.dpm.dyn_state.phase_shedding_limits_table.entries) return -ENOMEM; - } entry = &psl->entries[0]; for (i = 0; i < psl->ucNumEntries; i++) { @@ -383,10 +373,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev) ATOM_PPLIB_CAC_Leakage_Record *entry; u32 size = cac_table->ucNumEntries * sizeof(struct amdgpu_cac_leakage_table); adev->pm.dpm.dyn_state.cac_leakage_table.entries = kzalloc(size, GFP_KERNEL); - if (!adev->pm.dpm.dyn_state.cac_leakage_table.entries) { - amdgpu_free_extended_power_table(adev); + if (!adev->pm.dpm.dyn_state.cac_leakage_table.entries) return -ENOMEM; - } entry = &cac_table->entries[0]; for (i = 0; i < cac_table->ucNumEntries; i++) { if (adev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_EVV) { @@ -438,10 +426,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev) sizeof(struct amdgpu_vce_clock_voltage_dependency_entry); adev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries = kzalloc(size, GFP_KERNEL); - if (!adev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries) { - amdgpu_free_extended_power_table(adev); + if (!adev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries) return -ENOMEM; - } adev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.count = limits->numEntries; entry = &limits->entries[0]; @@ -493,10 +479,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev) sizeof(struct amdgpu_uvd_clock_voltage_dependency_entry); adev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries = kzalloc(size, GFP_KERNEL); - if (!adev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries) { - amdgpu_free_extended_power_table(adev); + if (!adev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries) return -ENOMEM; - } adev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.count = limits->numEntries; entry = &limits->entries[0]; @@ -525,10 +509,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev) sizeof(struct amdgpu_clock_voltage_dependency_entry); adev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.entries = kzalloc(size, GFP_KERNEL); - if (!adev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.entries) { - amdgpu_free_extended_power_table(adev); + if (!adev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.entries) return -ENOMEM; - } adev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.count = limits->numEntries; entry = &limits->entries[0]; @@ -548,10 +530,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev) le16_to_cpu(ext_hdr->usPPMTableOffset)); adev->pm.dpm.dyn_state.ppm_table = kzalloc(sizeof(struct amdgpu_ppm_table), GFP_KERNEL); - if (!adev->pm.dpm.dyn_state.ppm_table) { - amdgpu_free_extended_power_table(adev); + if (!adev->pm.dpm.dyn_state.ppm_table) return -ENOMEM; - } adev->pm.dpm.dyn_state.ppm_table->ppm_design = ppm->ucPpmDesign; adev->pm.dpm.dyn_state.ppm_table->cpu_core_number = le16_to_cpu(ppm->usCpuCoreNumber); @@ -583,10 +563,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev) sizeof(struct amdgpu_clock_voltage_dependency_entry); adev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.entries = kzalloc(size, GFP_KERNEL); - if (!adev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.entries) { - amdgpu_free_extended_power_table(adev); + if (!adev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.entries) return -ENOMEM; - } adev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.count = limits->numEntries; entry = &limits->entries[0]; @@ -606,10 +584,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev) ATOM_PowerTune_Table *pt; adev->pm.dpm.dyn_state.cac_tdp_table = kzalloc(sizeof(struct amdgpu_cac_tdp_table), GFP_KERNEL); - if (!adev->pm.dpm.dyn_state.cac_tdp_table) { - amdgpu_free_extended_power_table(adev); + if (!adev->pm.dpm.dyn_state.cac_tdp_table) return -ENOMEM; - } if (rev > 0) { ATOM_PPLIB_POWERTUNE_Table_V1 *ppt = (ATOM_PPLIB_POWERTUNE_Table_V1 *) (mode_info->atom_context->bios + data_offset + @@ -645,10 +621,8 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev) ret = amdgpu_parse_clk_voltage_dep_table( &adev->pm.dpm.dyn_state.vddgfx_dependency_on_sclk, dep_table); - if (ret) { - kfree(adev->pm.dpm.dyn_state.vddgfx_dependency_on_sclk.entries); + if (ret) return ret; - } } } -- cgit v1.2.3 From 8b881b5d6fe9ebb7736097f37103c9b07ea45642 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 14 Dec 2023 10:41:41 -0500 Subject: drm/amd/display: fix documentation for amdgpu_dm_verify_lut3d_size() It takes the plane state rather than the crtc state. Fixes: aba8b76baabd ("drm/amd/display: add plane shaper LUT support") Reported-by: Stephen Rothwell Reviewed-by: Melissa Wen Signed-off-by: Alex Deucher Cc: Melissa Wen Cc: Harry.Wentland@amd.com --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 96aecc1a71a3..c6ed0d854b01 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -796,7 +796,7 @@ static int amdgpu_dm_atomic_blend_lut(const struct drm_color_lut *blend_lut, * amdgpu_dm_verify_lut3d_size - verifies if 3D LUT is supported and if user * shaper and 3D LUTs match the hw supported size * @adev: amdgpu device - * @crtc_state: the DRM CRTC state + * @plane_state: the DRM plane state * * Verifies if pre-blending (DPP) 3D LUT is supported by the HW (DCN 2.0 or * newer) and if the user shaper and 3D LUTs match the supported size. -- cgit v1.2.3 From c6ef0a2265c518aa6699b64d10a7e5a9049ac96a Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Thu, 14 Dec 2023 18:45:16 -0100 Subject: drm/amd/display: fix documentation for dm_crtc_additional_color_mgmt() warning: expecting prototype for drm_crtc_additional_color_mgmt(). Prototype was for dm_crtc_additional_color_mgmt() instead Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202312141801.o9eBCxt9-lkp@intel.com/ Signed-off-by: Melissa Wen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c index 9224e2d7f32b..7545a184e43a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -299,7 +299,7 @@ static int amdgpu_dm_crtc_late_register(struct drm_crtc *crtc) #ifdef AMD_PRIVATE_COLOR /** - * drm_crtc_additional_color_mgmt - enable additional color properties + * dm_crtc_additional_color_mgmt - enable additional color properties * @crtc: DRM CRTC * * This function lets the driver enable post-blending CRTC regamma transfer -- cgit v1.2.3 From 97bb5e691189d342fc617dc0f1ab3e51a3676602 Mon Sep 17 00:00:00 2001 From: Haridhar Kalvala Date: Wed, 13 Dec 2023 12:16:12 +0530 Subject: drm/i915: Add Wa_14019877138 Enable Force Dispatch Ends Collection for DG2. BSpec: 46001 Signed-off-by: Haridhar Kalvala Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20231213064612.480032-1-haridhar.kalvala@intel.com --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 3 +++ drivers/gpu/drm/i915/gt/intel_workarounds.c | 3 +++ 2 files changed, 6 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 9de41703fae5..50962cfd1353 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -469,6 +469,9 @@ #define XEHP_PSS_MODE2 MCR_REG(0x703c) #define SCOREBOARD_STALL_FLUSH_CONTROL REG_BIT(5) +#define XEHP_PSS_CHICKEN MCR_REG(0x7044) +#define FD_END_COLLECT REG_BIT(5) + #define GEN7_SC_INSTDONE _MMIO(0x7100) #define GEN12_SC_INSTDONE_EXTRA _MMIO(0x7104) #define GEN12_SC_INSTDONE_EXTRA2 _MMIO(0x7108) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 4cbf9e512645..3eacbc50caf8 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -777,6 +777,9 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine, /* Wa_18019271663:dg2 */ wa_masked_en(wal, CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE); + + /* Wa_14019877138:dg2 */ + wa_mcr_masked_en(wal, XEHP_PSS_CHICKEN, FD_END_COLLECT); } static void xelpg_ctx_gt_tuning_init(struct intel_engine_cs *engine, -- cgit v1.2.3 From 46dec61643d7047c9b5929f98a2b7fa4fa93a7dc Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 8 Dec 2023 11:46:53 +0100 Subject: drm/nouveau: Fixup gk20a instobj hierarchy Commit 12c9b05da918 ("drm/nouveau/imem: support allocations not preserved across suspend") uses container_of() to cast from struct nvkm_memory to struct nvkm_instobj, assuming that all instance objects are derived from struct nvkm_instobj. For the gk20a family that's not the case and they are derived from struct nvkm_memory instead. This causes some subtle data corruption (nvkm_instobj.preserve ends up mapping to gk20a_instobj.vaddr) that causes a NULL pointer dereference in gk20a_instobj_acquire_iommu() (and possibly elsewhere) and also prevents suspend/resume from working. Fix this by making struct gk20a_instobj derive from struct nvkm_instobj instead. Fixes: 12c9b05da918 ("drm/nouveau/imem: support allocations not preserved across suspend") Reported-by: Jonathan Hunter Signed-off-by: Thierry Reding Tested-by: Jon Hunter Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20231208104653.1917055-1-thierry.reding@gmail.com --- drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c index 1b811d6972a1..201022ae9214 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c @@ -49,14 +49,14 @@ #include struct gk20a_instobj { - struct nvkm_memory memory; + struct nvkm_instobj base; struct nvkm_mm_node *mn; struct gk20a_instmem *imem; /* CPU mapping */ u32 *vaddr; }; -#define gk20a_instobj(p) container_of((p), struct gk20a_instobj, memory) +#define gk20a_instobj(p) container_of((p), struct gk20a_instobj, base.memory) /* * Used for objects allocated using the DMA API @@ -148,7 +148,7 @@ gk20a_instobj_iommu_recycle_vaddr(struct gk20a_instobj_iommu *obj) list_del(&obj->vaddr_node); vunmap(obj->base.vaddr); obj->base.vaddr = NULL; - imem->vaddr_use -= nvkm_memory_size(&obj->base.memory); + imem->vaddr_use -= nvkm_memory_size(&obj->base.base.memory); nvkm_debug(&imem->base.subdev, "vaddr used: %x/%x\n", imem->vaddr_use, imem->vaddr_max); } @@ -283,7 +283,7 @@ gk20a_instobj_map(struct nvkm_memory *memory, u64 offset, struct nvkm_vmm *vmm, { struct gk20a_instobj *node = gk20a_instobj(memory); struct nvkm_vmm_map map = { - .memory = &node->memory, + .memory = &node->base.memory, .offset = offset, .mem = node->mn, }; @@ -391,8 +391,8 @@ gk20a_instobj_ctor_dma(struct gk20a_instmem *imem, u32 npages, u32 align, return -ENOMEM; *_node = &node->base; - nvkm_memory_ctor(&gk20a_instobj_func_dma, &node->base.memory); - node->base.memory.ptrs = &gk20a_instobj_ptrs; + nvkm_memory_ctor(&gk20a_instobj_func_dma, &node->base.base.memory); + node->base.base.memory.ptrs = &gk20a_instobj_ptrs; node->base.vaddr = dma_alloc_attrs(dev, npages << PAGE_SHIFT, &node->handle, GFP_KERNEL, @@ -438,8 +438,8 @@ gk20a_instobj_ctor_iommu(struct gk20a_instmem *imem, u32 npages, u32 align, *_node = &node->base; node->dma_addrs = (void *)(node->pages + npages); - nvkm_memory_ctor(&gk20a_instobj_func_iommu, &node->base.memory); - node->base.memory.ptrs = &gk20a_instobj_ptrs; + nvkm_memory_ctor(&gk20a_instobj_func_iommu, &node->base.base.memory); + node->base.base.memory.ptrs = &gk20a_instobj_ptrs; /* Allocate backing memory */ for (i = 0; i < npages; i++) { @@ -533,7 +533,7 @@ gk20a_instobj_new(struct nvkm_instmem *base, u32 size, u32 align, bool zero, else ret = gk20a_instobj_ctor_dma(imem, size >> PAGE_SHIFT, align, &node); - *pmemory = node ? &node->memory : NULL; + *pmemory = node ? &node->base.memory : NULL; if (ret) return ret; -- cgit v1.2.3 From 7ba84cbf18c7a53107c64880d9c90f18fa68b481 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Wed, 13 Dec 2023 19:43:57 -0500 Subject: drm/nouveau/kms/nv50-: Don't allow inheritance of headless iors Turns out we made a silly mistake when coming up with OR inheritance on nouveau. On pre-DCB 4.1, iors are statically routed to output paths via the DCB. On later generations iors are only routed to an output path if they're actually being used. Unfortunately, it appears with NVIF_OUTP_INHERIT_V0 we make the mistake of assuming the later is true on all generations, which is currently leading us to return bogus ior -> head assignments through nvif, which causes WARN_ON(). So - fix this by verifying that we actually know that there's a head assigned to an ior before allowing it to be inherited through nvif. This -should- hopefully fix the WARN_ON on GT218 reported by Borislav. Signed-off-by: Lyude Paul Cc: Borislav Petkov Reported-by: Borislav Petkov (AMD) Tested-by: Borislav Petkov (AMD) Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20231214004359.1028109-1-lyude@redhat.com --- drivers/gpu/drm/nouveau/nvkm/engine/disp/uoutp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/uoutp.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/uoutp.c index e4279f1772a1..377d0e0cef84 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/uoutp.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/uoutp.c @@ -385,7 +385,7 @@ nvkm_uoutp_mthd_inherit(struct nvkm_outp *outp, void *argv, u32 argc) /* Ensure an ior is hooked up to this outp already */ ior = outp->func->inherit(outp); - if (!ior) + if (!ior || !ior->arm.head) return -ENODEV; /* With iors, there will be a separate output path for each type of connector - and all of -- cgit v1.2.3 From 937d02cc79c6828fef28a4d80d8d0ad2f7bf2b62 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Thu, 14 Dec 2023 00:05:26 +0200 Subject: drm/i915/mtl: Fix HDMI/DP PLL clock selection Select the HDMI specific PLL clock only for HDMI outputs. Fixes: 62618c7f117e ("drm/i915/mtl: C20 PLL programming") Cc: Mika Kahola Cc: Radhakrishna Sripada Reviewed-by: Radhakrishna Sripada Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20231213220526.1828827-1-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_cx0_phy.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy.c b/drivers/gpu/drm/i915/display/intel_cx0_phy.c index 4e6ea71ff629..884a1da36089 100644 --- a/drivers/gpu/drm/i915/display/intel_cx0_phy.c +++ b/drivers/gpu/drm/i915/display/intel_cx0_phy.c @@ -2468,7 +2468,8 @@ static void intel_program_port_clock_ctl(struct intel_encoder *encoder, val |= XELPDP_FORWARD_CLOCK_UNGATE; - if (is_hdmi_frl(crtc_state->port_clock)) + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI) && + is_hdmi_frl(crtc_state->port_clock)) val |= XELPDP_DDI_CLOCK_SELECT(XELPDP_DDI_CLOCK_SELECT_DIV18CLK); else val |= XELPDP_DDI_CLOCK_SELECT(XELPDP_DDI_CLOCK_SELECT_MAXPCLK); -- cgit v1.2.3 From e6174e8e19e8fd26016c941c7271868326cd861a Mon Sep 17 00:00:00 2001 From: Zhao Liu Date: Sun, 3 Dec 2023 21:29:39 +0800 Subject: drm/i915: Use kmap_local_page() in gem/i915_gem_object.c The use of kmap_atomic() is being deprecated in favor of kmap_local_page()[1], and this patch converts the call from kmap_atomic() to kmap_local_page(). The main difference between atomic and local mappings is that local mappings doesn't disable page faults or preemption (the preemption is disabled for !PREEMPT_RT case, otherwise it only disables migration). With kmap_local_page(), we can avoid the often unwanted side effect of unnecessary page faults and preemption disables. There're 2 reasons why i915_gem_object_read_from_page_kmap() doesn't need to disable pagefaults and preemption for mapping: 1. The flush operation is safe. In drm/i915/gem/i915_gem_object.c, i915_gem_object_read_from_page_kmap() calls drm_clflush_virt_range() to use CLFLUSHOPT or WBINVD to flush. Since CLFLUSHOPT is global on x86 and WBINVD is called on each cpu in drm_clflush_virt_range(), the flush operation is global. 2. Any context switch caused by preemption or page faults (page fault may cause sleep) doesn't affect the validity of local mapping. Therefore, i915_gem_object_read_from_page_kmap() is a function where the use of kmap_local_page() in place of kmap_atomic() is correctly suited. Convert the calls of kmap_atomic() / kunmap_atomic() to kmap_local_page() / kunmap_local(). And remove the redundant variable that stores the address of the mapped page since kunmap_local() can accept any pointer within the page. [1]: https://lore.kernel.org/all/20220813220034.806698-1-ira.weiny@intel.com Suggested-by: Dave Hansen Suggested-by: Ira Weiny Suggested-by: Fabio M. De Francesco Signed-off-by: Zhao Liu Reviewed-by: Ira Weiny Reviewed-by: Fabio M. De Francesco Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20231203132947.2328805-2-zhao1.liu@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_object.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 25eeeb863209..58e6c680fe0d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -500,17 +500,15 @@ static void i915_gem_object_read_from_page_kmap(struct drm_i915_gem_object *obj, u64 offset, void *dst, int size) { pgoff_t idx = offset >> PAGE_SHIFT; - void *src_map; void *src_ptr; - src_map = kmap_atomic(i915_gem_object_get_page(obj, idx)); - - src_ptr = src_map + offset_in_page(offset); + src_ptr = kmap_local_page(i915_gem_object_get_page(obj, idx)) + + offset_in_page(offset); if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) drm_clflush_virt_range(src_ptr, size); memcpy(dst, src_ptr, size); - kunmap_atomic(src_map); + kunmap_local(src_ptr); } static void -- cgit v1.2.3 From f4d88908cd9a430a7473eea6ff2300a3b728e11c Mon Sep 17 00:00:00 2001 From: Zhao Liu Date: Sun, 3 Dec 2023 21:29:40 +0800 Subject: drm/i915: Use memcpy_[from/to]_page() in gem/i915_gem_pyhs.c The use of kmap_atomic() is being deprecated in favor of kmap_local_page()[1], and this patch converts the call from kmap_atomic() + memcpy() to memcpy_[from/to]_page(), which use kmap_local_page() to build local mapping and then do memcpy(). The main difference between atomic and local mappings is that local mappings doesn't disable page faults or preemption (the preemption is disabled for !PREEMPT_RT case, otherwise it only disables migration). With kmap_local_page(), we can avoid the often unwanted side effect of unnecessary page faults and preemption disables. In drm/i915/gem/i915_gem_phys.c, the functions i915_gem_object_get_pages_phys() and i915_gem_object_put_pages_phys() don't need to disable pagefaults and preemption for mapping because of 2 reasons: 1. The flush operation is safe. In drm/i915/gem/i915_gem_object.c, i915_gem_object_get_pages_phys() and i915_gem_object_put_pages_phys() calls drm_clflush_virt_range() to use CLFLUSHOPT or WBINVD to flush. Since CLFLUSHOPT is global on x86 and WBINVD is called on each cpu in drm_clflush_virt_range(), the flush operation is global. 2. Any context switch caused by preemption or page faults (page fault may cause sleep) doesn't affect the validity of local mapping. Therefore, i915_gem_object_get_pages_phys() and i915_gem_object_put_pages_phys() are two functions where the uses of local mappings in place of atomic mappings are correctly suited. Convert the calls of kmap_atomic() / kunmap_atomic() + memcpy() to memcpy_from_page() and memcpy_to_page(). [1]: https://lore.kernel.org/all/20220813220034.806698-1-ira.weiny@intel.com Suggested-by: Dave Hansen Suggested-by: Ira Weiny Suggested-by: Fabio M. De Francesco Signed-off-by: Zhao Liu Reviewed-by: Ira Weiny Reviewed-by: Fabio M. De Francesco Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20231203132947.2328805-3-zhao1.liu@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_phys.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c index 5df128e2f4dc..ef85c6dc9fd5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c @@ -65,16 +65,13 @@ static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) dst = vaddr; for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { struct page *page; - void *src; page = shmem_read_mapping_page(mapping, i); if (IS_ERR(page)) goto err_st; - src = kmap_atomic(page); - memcpy(dst, src, PAGE_SIZE); + memcpy_from_page(dst, page, 0, PAGE_SIZE); drm_clflush_virt_range(dst, PAGE_SIZE); - kunmap_atomic(src); put_page(page); dst += PAGE_SIZE; @@ -113,16 +110,13 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj, for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { struct page *page; - char *dst; page = shmem_read_mapping_page(mapping, i); if (IS_ERR(page)) continue; - dst = kmap_atomic(page); drm_clflush_virt_range(src, PAGE_SIZE); - memcpy(dst, src, PAGE_SIZE); - kunmap_atomic(dst); + memcpy_to_page(page, 0, src, PAGE_SIZE); set_page_dirty(page); if (obj->mm.madv == I915_MADV_WILLNEED) -- cgit v1.2.3 From 756eed0f2602f73df8d6c5bc8418ecd11cce9803 Mon Sep 17 00:00:00 2001 From: Zhao Liu Date: Sun, 3 Dec 2023 21:29:41 +0800 Subject: drm/i915: Use kmap_local_page() in gem/i915_gem_shmem.c The use of kmap_atomic() is being deprecated in favor of kmap_local_page()[1]. The main difference between atomic and local mappings is that local mappings doesn't disable page faults or preemption (the preemption is disabled for !PREEMPT_RT case, otherwise it only disables migration). With kmap_local_page(), we can avoid the often unwanted side effect of unnecessary page faults or preemption disables. In drm/i915/gem/i915_gem_shmem.c, the function shmem_pwrite() need to disable pagefault to eliminate the potential recursion fault[2]. But here __copy_from_user_inatomic() doesn't need to disable preemption and local mapping is valid for sched in/out. So it can use kmap_local_page() / kunmap_local() with pagefault_disable() / pagefault_enable() to replace atomic mapping. [1]: https://lore.kernel.org/all/20220813220034.806698-1-ira.weiny@intel.com [2]: https://patchwork.freedesktop.org/patch/295840/ Suggested-by: Ira Weiny Suggested-by: Fabio M. De Francesco Signed-off-by: Zhao Liu Reviewed-by: Ira Weiny Reviewed-by: Fabio M. De Francesco Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20231203132947.2328805-4-zhao1.liu@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 73a4a4eb29e0..38b72d86560f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -485,11 +485,13 @@ shmem_pwrite(struct drm_i915_gem_object *obj, if (err < 0) return err; - vaddr = kmap_atomic(page); + vaddr = kmap_local_page(page); + pagefault_disable(); unwritten = __copy_from_user_inatomic(vaddr + pg, user_data, len); - kunmap_atomic(vaddr); + pagefault_enable(); + kunmap_local(vaddr); err = aops->write_end(obj->base.filp, mapping, offset, len, len - unwritten, page, data); -- cgit v1.2.3 From 1fcb967595a5156da2f081a5ade319c60fc5af72 Mon Sep 17 00:00:00 2001 From: Zhao Liu Date: Sun, 3 Dec 2023 21:29:42 +0800 Subject: drm/i915: Use kmap_local_page() in gem/selftests/huge_pages.c The use of kmap_atomic() is being deprecated in favor of kmap_local_page()[1], and this patch converts the call from kmap_atomic() to kmap_local_page(). The main difference between atomic and local mappings is that local mappings doesn't disable page faults or preemption (the preemption is disabled for !PREEMPT_RT case, otherwise it only disables migration). With kmap_local_page(), we can avoid the often unwanted side effect of unnecessary page faults or preemption disables. In drm/i915/gem/selftests/huge_pages.c, function __cpu_check_shmem() mainly uses mapping to flush cache and check the value. There're 2 reasons why __cpu_check_shmem() doesn't need to disable pagefaults and preemption for mapping: 1. The flush operation is safe. Function __cpu_check_shmem() calls drm_clflush_virt_range() to use CLFLUSHOPT or WBINVD to flush. Since CLFLUSHOPT is global on x86 and WBINVD is called on each cpu in drm_clflush_virt_range(), the flush operation is global. 2. Any context switch caused by preemption or page faults (page fault may cause sleep) doesn't affect the validity of local mapping. Therefore, __cpu_check_shmem() is a function where the use of kmap_local_page() in place of kmap_atomic() is correctly suited. Convert the calls of kmap_atomic() / kunmap_atomic() to kmap_local_page() / kunmap_local(). [1]: https://lore.kernel.org/all/20220813220034.806698-1-ira.weiny@intel.com Suggested-by: Dave Hansen Suggested-by: Ira Weiny Suggested-by: Fabio M. De Francesco Signed-off-by: Zhao Liu Reviewed-by: Ira Weiny Reviewed-by: Fabio M. De Francesco Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20231203132947.2328805-5-zhao1.liu@linux.intel.com --- drivers/gpu/drm/i915/gem/selftests/huge_pages.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index 6b9f6cf50bf6..c9e6d77abab0 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -1082,7 +1082,7 @@ __cpu_check_shmem(struct drm_i915_gem_object *obj, u32 dword, u32 val) goto err_unlock; for (n = 0; n < obj->base.size >> PAGE_SHIFT; ++n) { - u32 *ptr = kmap_atomic(i915_gem_object_get_page(obj, n)); + u32 *ptr = kmap_local_page(i915_gem_object_get_page(obj, n)); if (needs_flush & CLFLUSH_BEFORE) drm_clflush_virt_range(ptr, PAGE_SIZE); @@ -1090,12 +1090,12 @@ __cpu_check_shmem(struct drm_i915_gem_object *obj, u32 dword, u32 val) if (ptr[dword] != val) { pr_err("n=%lu ptr[%u]=%u, val=%u\n", n, dword, ptr[dword], val); - kunmap_atomic(ptr); + kunmap_local(ptr); err = -EINVAL; break; } - kunmap_atomic(ptr); + kunmap_local(ptr); } i915_gem_object_finish_access(obj); -- cgit v1.2.3 From 40b399000665ee154927a8e0d7b0c7e7505bbaef Mon Sep 17 00:00:00 2001 From: Zhao Liu Date: Sun, 3 Dec 2023 21:29:43 +0800 Subject: drm/i915: Use kmap_local_page() in gem/selftests/i915_gem_coherency.c The use of kmap_atomic() is being deprecated in favor of kmap_local_page()[1], and this patch converts the call from kmap_atomic() to kmap_local_page(). The main difference between atomic and local mappings is that local mappings doesn't disable page faults or preemption (the preemption is disabled for !PREEMPT_RT case, otherwise it only disables migration).. With kmap_local_page(), we can avoid the often unwanted side effect of unnecessary page faults or preemption disables. In drm/i915/gem/selftests/i915_gem_coherency.c, functions cpu_set() and cpu_get() mainly uses mapping to flush cache and assign the value. There're 2 reasons why cpu_set() and cpu_get() don't need to disable pagefaults and preemption for mapping: 1. The flush operation is safe. cpu_set() and cpu_get() call drm_clflush_virt_range() to use CLFLUSHOPT or WBINVD to flush. Since CLFLUSHOPT is global on x86 and WBINVD is called on each cpu in drm_clflush_virt_range(), the flush operation is global. 2. Any context switch caused by preemption or page faults (page fault may cause sleep) doesn't affect the validity of local mapping. Therefore, cpu_set() and cpu_get() are functions where the use of kmap_local_page() in place of kmap_atomic() is correctly suited. Convert the calls of kmap_atomic() / kunmap_atomic() to kmap_local_page() / kunmap_local(). [1]: https://lore.kernel.org/all/20220813220034.806698-1-ira.weiny@intel.com Suggested-by: Dave Hansen Suggested-by: Ira Weiny Suggested-by: Fabio M. De Francesco Signed-off-by: Zhao Liu Reviewed-by: Ira Weiny Reviewed-by: Fabio M. De Francesco Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20231203132947.2328805-6-zhao1.liu@linux.intel.com --- drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c index 3fd68a099a85..2a0c0634d446 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c @@ -24,7 +24,6 @@ static int cpu_set(struct context *ctx, unsigned long offset, u32 v) { unsigned int needs_clflush; struct page *page; - void *map; u32 *cpu; int err; @@ -34,8 +33,7 @@ static int cpu_set(struct context *ctx, unsigned long offset, u32 v) goto out; page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT); - map = kmap_atomic(page); - cpu = map + offset_in_page(offset); + cpu = kmap_local_page(page) + offset_in_page(offset); if (needs_clflush & CLFLUSH_BEFORE) drm_clflush_virt_range(cpu, sizeof(*cpu)); @@ -45,7 +43,7 @@ static int cpu_set(struct context *ctx, unsigned long offset, u32 v) if (needs_clflush & CLFLUSH_AFTER) drm_clflush_virt_range(cpu, sizeof(*cpu)); - kunmap_atomic(map); + kunmap_local(cpu); i915_gem_object_finish_access(ctx->obj); out: @@ -57,7 +55,6 @@ static int cpu_get(struct context *ctx, unsigned long offset, u32 *v) { unsigned int needs_clflush; struct page *page; - void *map; u32 *cpu; int err; @@ -67,15 +64,14 @@ static int cpu_get(struct context *ctx, unsigned long offset, u32 *v) goto out; page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT); - map = kmap_atomic(page); - cpu = map + offset_in_page(offset); + cpu = kmap_local_page(page) + offset_in_page(offset); if (needs_clflush & CLFLUSH_BEFORE) drm_clflush_virt_range(cpu, sizeof(*cpu)); *v = *cpu; - kunmap_atomic(map); + kunmap_local(cpu); i915_gem_object_finish_access(ctx->obj); out: -- cgit v1.2.3 From b1c51b0e2e7cb98f643a801c50f8ad76ebc36450 Mon Sep 17 00:00:00 2001 From: Zhao Liu Date: Sun, 3 Dec 2023 21:29:44 +0800 Subject: drm/i915: Use kmap_local_page() in gem/selftests/i915_gem_context.c The use of kmap_atomic() is being deprecated in favor of kmap_local_page()[1], and this patch converts the call from kmap_atomic() to kmap_local_page(). The main difference between atomic and local mappings is that local mappings doesn't disable page faults or preemption. With kmap_local_page(), we can avoid the often unwanted side effect of unnecessary page faults or preemption disables. In drm/i915/gem/selftests/i915_gem_context.c, functions cpu_fill() and cpu_check() mainly uses mapping to flush cache and check/assign the value. There're 2 reasons why cpu_fill() and cpu_check() don't need to disable pagefaults and preemption for mapping: 1. The flush operation is safe. cpu_fill() and cpu_check() call drm_clflush_virt_range() to use CLFLUSHOPT or WBINVD to flush. Since CLFLUSHOPT is global on x86 and WBINVD is called on each cpu in drm_clflush_virt_range(), the flush operation is global. 2. Any context switch caused by preemption or page faults (page fault may cause sleep) doesn't affect the validity of local mapping. Therefore, cpu_fill() and cpu_check() are functions where the use of kmap_local_page() in place of kmap_atomic() is correctly suited. Convert the calls of kmap_atomic() / kunmap_atomic() to kmap_local_page() / kunmap_local(). [1]: https://lore.kernel.org/all/20220813220034.806698-1-ira.weiny@intel.com Suggested-by: Dave Hansen Suggested-by: Ira Weiny Suggested-by: Fabio M. De Francesco Signed-off-by: Zhao Liu Reviewed-by: Ira Weiny Reviewed-by: Fabio M. De Francesco Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20231203132947.2328805-7-zhao1.liu@linux.intel.com --- drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index 7021b6e9b219..89d4dc8b60c6 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -489,12 +489,12 @@ static int cpu_fill(struct drm_i915_gem_object *obj, u32 value) for (n = 0; n < real_page_count(obj); n++) { u32 *map; - map = kmap_atomic(i915_gem_object_get_page(obj, n)); + map = kmap_local_page(i915_gem_object_get_page(obj, n)); for (m = 0; m < DW_PER_PAGE; m++) map[m] = value; if (!has_llc) drm_clflush_virt_range(map, PAGE_SIZE); - kunmap_atomic(map); + kunmap_local(map); } i915_gem_object_finish_access(obj); @@ -520,7 +520,7 @@ static noinline int cpu_check(struct drm_i915_gem_object *obj, for (n = 0; n < real_page_count(obj); n++) { u32 *map, m; - map = kmap_atomic(i915_gem_object_get_page(obj, n)); + map = kmap_local_page(i915_gem_object_get_page(obj, n)); if (needs_flush & CLFLUSH_BEFORE) drm_clflush_virt_range(map, PAGE_SIZE); @@ -546,7 +546,7 @@ static noinline int cpu_check(struct drm_i915_gem_object *obj, } out_unmap: - kunmap_atomic(map); + kunmap_local(map); if (err) break; } -- cgit v1.2.3 From 55a6e46180cb8b36fb1076501b569bfd42df1644 Mon Sep 17 00:00:00 2001 From: Zhao Liu Date: Sun, 3 Dec 2023 21:29:45 +0800 Subject: drm/i915: Use memcpy_from_page() in gt/uc/intel_uc_fw.c The use of kmap_atomic() is being deprecated in favor of kmap_local_page()[1], and this patch converts the call from kmap_atomic() to kmap_local_page(). The main difference between atomic and local mappings is that local mappings doesn't disable page faults or preemption (the preemption is disabled for !PREEMPT_RT case, otherwise it only disables migration). With kmap_local_page(), we can avoid the often unwanted side effect of unnecessary page faults or preemption disables. In drm/i915/gt/uc/intel_us_fw.c, the function intel_uc_fw_copy_rsa() just use the mapping to do memory copy so it doesn't need to disable pagefaults and preemption for mapping. Thus the local mapping without atomic context (not disable pagefaults / preemption) is enough. Therefore, intel_uc_fw_copy_rsa() is a function where the use of memcpy_from_page() with kmap_local_page() in place of memcpy() with kmap_atomic() is correctly suited. Convert the calls of memcpy() with kmap_atomic() / kunmap_atomic() to memcpy_from_page() which uses local mapping to copy. [1]: https://lore.kernel.org/all/20220813220034.806698-1-ira.weiny@intel.com/T/#u Suggested-by: Ira Weiny Suggested-by: Fabio M. De Francesco Signed-off-by: Zhao Liu Reviewed-by: Ira Weiny Reviewed-by: Fabio M. De Francesco Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20231203132947.2328805-8-zhao1.liu@linux.intel.com --- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 362639162ed6..756093eaf2ad 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -1343,16 +1343,13 @@ size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len) for_each_sgt_page(page, iter, uc_fw->obj->mm.pages) { u32 len = min_t(u32, size, PAGE_SIZE - offset); - void *vaddr; if (idx > 0) { idx--; continue; } - vaddr = kmap_atomic(page); - memcpy(dst, vaddr + offset, len); - kunmap_atomic(vaddr); + memcpy_from_page(dst, page, offset, len); offset = 0; dst += len; -- cgit v1.2.3 From e4865c60dd6e312e58c85247e48899af7e19041a Mon Sep 17 00:00:00 2001 From: Zhao Liu Date: Sun, 3 Dec 2023 21:29:46 +0800 Subject: drm/i915: Use kmap_local_page() in i915_cmd_parser.c The use of kmap_atomic() is being deprecated in favor of kmap_local_page()[1], and this patch converts the call from kmap_atomic() to kmap_local_page(). The main difference between atomic and local mappings is that local mappings doesn't disable page faults or preemption (the preemption is disabled for !PREEMPT_RT case, otherwise it only disables migration). With kmap_local_page(), we can avoid the often unwanted side effect of unnecessary page faults and preemption disables. There're 2 reasons why function copy_batch() doesn't need to disable pagefaults and preemption for mapping: 1. The flush operation is safe. In i915_cmd_parser.c, copy_batch() calls drm_clflush_virt_range() to use CLFLUSHOPT or WBINVD to flush. Since CLFLUSHOPT is global on x86 and WBINVD is called on each cpu in drm_clflush_virt_range(), the flush operation is global. 2. Any context switch caused by preemption or page faults (page fault may cause sleep) doesn't affect the validity of local mapping. Therefore, copy_batch() is a function where the use of kmap_local_page() in place of kmap_atomic() is correctly suited. Convert the calls of kmap_atomic() / kunmap_atomic() to kmap_local_page() / kunmap_local(). [1]: https://lore.kernel.org/all/20220813220034.806698-1-ira.weiny@intel.com Suggested-by: Dave Hansen Suggested-by: Ira Weiny Suggested-by: Fabio M. De Francesco Signed-off-by: Zhao Liu Reviewed-by: Ira Weiny Reviewed-by: Fabio M. De Francesco Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20231203132947.2328805-9-zhao1.liu@linux.intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index ddf49c2dbb91..2905df83e180 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -1211,11 +1211,11 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, for (n = offset >> PAGE_SHIFT; remain; n++) { int len = min(remain, PAGE_SIZE - x); - src = kmap_atomic(i915_gem_object_get_page(src_obj, n)); + src = kmap_local_page(i915_gem_object_get_page(src_obj, n)); if (src_needs_clflush) drm_clflush_virt_range(src + x, len); memcpy(ptr, src + x, len); - kunmap_atomic(src); + kunmap_local(src); ptr += len; remain -= len; -- cgit v1.2.3 From 31accc37eaee98a90b25809ed58c6ee4956ab642 Mon Sep 17 00:00:00 2001 From: Zhao Liu Date: Sun, 3 Dec 2023 21:29:47 +0800 Subject: drm/i915: Use kmap_local_page() in gem/i915_gem_execbuffer.c The use of kmap_atomic() is being deprecated in favor of kmap_local_page()[1], and this patch converts the calls from kmap_atomic() to kmap_local_page(). The main difference between atomic and local mappings is that local mappings doesn't disable page faults or preemption (the preemption is disabled for !PREEMPT_RT case, otherwise it only disables migration). With kmap_local_page(), we can avoid the often unwanted side effect of unnecessary page faults and preemption disables. In i915_gem_execbuffer.c, eb->reloc_cache.vaddr is mapped by kmap_atomic() in eb_relocate_entry(), and is unmapped by kunmap_atomic() in reloc_cache_reset(). And this mapping/unmapping occurs in two places: one is in eb_relocate_vma(), and another is in eb_relocate_vma_slow(). The function eb_relocate_vma() or eb_relocate_vma_slow() doesn't need to disable pagefaults and preemption during the above mapping/ unmapping. So it can simply use kmap_local_page() / kunmap_local() that can instead do the mapping / unmapping regardless of the context. Convert the calls of kmap_atomic() / kunmap_atomic() to kmap_local_page() / kunmap_local(). [1]: https://lore.kernel.org/all/20220813220034.806698-1-ira.weiny@intel.com Suggested-by: Ira Weiny Suggested-by: Fabio M. De Francesco Signed-off-by: Zhao Liu Reviewed-by: Ira Weiny Reviewed-by: Fabio M. De Francesco Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20231203132947.2328805-10-zhao1.liu@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index b1aa62dfb155..12cb2ef8dbd6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -1158,7 +1158,7 @@ static void reloc_cache_unmap(struct reloc_cache *cache) vaddr = unmask_page(cache->vaddr); if (cache->vaddr & KMAP) - kunmap_atomic(vaddr); + kunmap_local(vaddr); else io_mapping_unmap_atomic((void __iomem *)vaddr); } @@ -1174,7 +1174,7 @@ static void reloc_cache_remap(struct reloc_cache *cache, if (cache->vaddr & KMAP) { struct page *page = i915_gem_object_get_page(obj, cache->page); - vaddr = kmap_atomic(page); + vaddr = kmap_local_page(page); cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr; } else { @@ -1204,7 +1204,7 @@ static void reloc_cache_reset(struct reloc_cache *cache, struct i915_execbuffer if (cache->vaddr & CLFLUSH_AFTER) mb(); - kunmap_atomic(vaddr); + kunmap_local(vaddr); i915_gem_object_finish_access(obj); } else { struct i915_ggtt *ggtt = cache_to_ggtt(cache); @@ -1236,7 +1236,7 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj, struct page *page; if (cache->vaddr) { - kunmap_atomic(unmask_page(cache->vaddr)); + kunmap_local(unmask_page(cache->vaddr)); } else { unsigned int flushes; int err; @@ -1258,7 +1258,7 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj, if (!obj->mm.dirty) set_page_dirty(page); - vaddr = kmap_atomic(page); + vaddr = kmap_local_page(page); cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr; cache->page = pageno; -- cgit v1.2.3 From b39610c773431ac7991cf6235e26d693ccabd9e9 Mon Sep 17 00:00:00 2001 From: Donald Robson Date: Fri, 8 Dec 2023 16:08:25 +0000 Subject: drm/imagination: Fixed infinite loop in pvr_vm_mips_map() Unwinding loop in error path for this function uses unsigned limit variable, causing the promotion of the signed counter variable. --> 204 for (; pfn >= start_pfn; pfn--) ^^^^^^^^^^^^^^^^ If start_pfn can be zero then this is an endless loop. I've seen this code in other places as well. This loop is slightly off as well. It should decrement pfn on the first iteration. Fix by making the loop limit variables signed. Also fix missing predecrement by modifying to while loop. Reported-by: Dan Carpenter Signed-off-by: Donald Robson Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20231208160825.92933-1-donald.robson@imgtec.com --- drivers/gpu/drm/imagination/pvr_vm_mips.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/imagination/pvr_vm_mips.c b/drivers/gpu/drm/imagination/pvr_vm_mips.c index 2bc7181a4c3e..b7fef3c797e6 100644 --- a/drivers/gpu/drm/imagination/pvr_vm_mips.c +++ b/drivers/gpu/drm/imagination/pvr_vm_mips.c @@ -152,8 +152,8 @@ pvr_vm_mips_map(struct pvr_device *pvr_dev, struct pvr_fw_object *fw_obj) u64 end; u32 cache_policy; u32 pte_flags; - u32 start_pfn; - u32 end_pfn; + s32 start_pfn; + s32 end_pfn; s32 pfn; int err; @@ -201,7 +201,7 @@ pvr_vm_mips_map(struct pvr_device *pvr_dev, struct pvr_fw_object *fw_obj) return 0; err_unmap_pages: - for (; pfn >= start_pfn; pfn--) + while (--pfn >= start_pfn) WRITE_ONCE(mips_data->pt[pfn], 0); pvr_mmu_flush_request_all(pvr_dev); -- cgit v1.2.3 From f1f55ed3ffe4212f5c96106bf6396c461a2bf223 Mon Sep 17 00:00:00 2001 From: Donald Robson Date: Fri, 8 Dec 2023 16:30:19 +0000 Subject: drm/imagination: Fixed oops when misusing ioctl CREATE_HWRT_DATASET While writing the matching IGT suite I discovered that it's possible to cause a kernel oops when using DRM_IOCTL_PVR_CREATE_HWRT_DATASET when the call to hwrt_init_common_fw_structure() fails. Use an unwind-type error path to avoid cleaning up the object using the the release function before it is fully resolved. Signed-off-by: Donald Robson Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20231208163019.95913-1-donald.robson@imgtec.com --- drivers/gpu/drm/imagination/pvr_hwrt.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/imagination/pvr_hwrt.c b/drivers/gpu/drm/imagination/pvr_hwrt.c index c4213c18489e..54f88d6c01e5 100644 --- a/drivers/gpu/drm/imagination/pvr_hwrt.c +++ b/drivers/gpu/drm/imagination/pvr_hwrt.c @@ -458,7 +458,7 @@ pvr_hwrt_dataset_create(struct pvr_file *pvr_file, struct drm_pvr_ioctl_create_hwrt_dataset_args *args) { struct pvr_hwrt_dataset *hwrt; - int err; + int err, i = 0; /* Create and fill out the kernel structure */ hwrt = kzalloc(sizeof(*hwrt), GFP_KERNEL); @@ -466,35 +466,36 @@ pvr_hwrt_dataset_create(struct pvr_file *pvr_file, if (!hwrt) return ERR_PTR(-ENOMEM); - kref_init(&hwrt->ref_count); - err = hwrt_init_kernel_structure(pvr_file, args, hwrt); if (err < 0) goto err_free; err = hwrt_init_common_fw_structure(pvr_file, args, hwrt); if (err < 0) - goto err_free; + goto err_fini_kernel_structure; - for (int i = 0; i < ARRAY_SIZE(hwrt->data); i++) { + for (; i < ARRAY_SIZE(hwrt->data); i++) { err = hwrt_data_init_fw_structure(pvr_file, hwrt, args, &args->rt_data_args[i], &hwrt->data[i]); - if (err < 0) { - i--; - /* Destroy already created structures. */ - for (; i >= 0; i--) - hwrt_data_fini_fw_structure(hwrt, i); - goto err_free; - } + if (err < 0) + goto err_fini_data_structures; hwrt->data[i].hwrt_dataset = hwrt; } + kref_init(&hwrt->ref_count); return hwrt; +err_fini_data_structures: + while (--i >= 0) + hwrt_data_fini_fw_structure(hwrt, i); + +err_fini_kernel_structure: + hwrt_fini_kernel_structure(hwrt); + err_free: - pvr_hwrt_dataset_put(hwrt); + kfree(hwrt); return ERR_PTR(err); } -- cgit v1.2.3 From f175498378bdae2ebcf61170a2a866cb96e8a69a Mon Sep 17 00:00:00 2001 From: Donald Robson Date: Wed, 13 Dec 2023 14:44:30 +0000 Subject: drm/imagination: Fix ERR_PTR test on pointer to pointer. drivers/gpu/drm/imagination/pvr_vm.c:631 pvr_vm_create_context() error: 'vm_ctx->mmu_ctx' dereferencing possible ERR_PTR() 612 vm_ctx->mmu_ctx = pvr_mmu_context_create(pvr_dev); 613 err = PTR_ERR_OR_ZERO(&vm_ctx->mmu_ctx); ^ The address is never an error pointer so this will always return 0. Remove the ampersand. Reported-by: Dan Carpenter Signed-off-by: Donald Robson Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20231213144431.94956-1-donald.robson@imgtec.com --- drivers/gpu/drm/imagination/pvr_vm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/imagination/pvr_vm.c b/drivers/gpu/drm/imagination/pvr_vm.c index 82690cee978c..598d79d7c7d0 100644 --- a/drivers/gpu/drm/imagination/pvr_vm.c +++ b/drivers/gpu/drm/imagination/pvr_vm.c @@ -568,7 +568,7 @@ pvr_vm_create_context(struct pvr_device *pvr_dev, bool is_userspace_context) 0, 1ULL << device_addr_bits, 0, 0, &pvr_vm_gpuva_ops); vm_ctx->mmu_ctx = pvr_mmu_context_create(pvr_dev); - err = PTR_ERR_OR_ZERO(&vm_ctx->mmu_ctx); + err = PTR_ERR_OR_ZERO(vm_ctx->mmu_ctx); if (err) { vm_ctx->mmu_ctx = NULL; goto err_put_ctx; -- cgit v1.2.3 From 8a53e29fe05c56f643eaab285f224c09b9c3dd4c Mon Sep 17 00:00:00 2001 From: Donald Robson Date: Wed, 13 Dec 2023 14:44:31 +0000 Subject: drm/imagination: Fix error path in pvr_vm_create_context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is possible to double free the vm_ctx->mmu_ctx object in this function.     630 err_page_table_destroy: --> 631         pvr_mmu_context_destroy(vm_ctx->mmu_ctx); The pvr_vm_context_put() function does:         kref_put(&vm_ctx->ref_count, pvr_vm_context_release); Here the pvr_vm_context_release() will call:         pvr_mmu_context_destroy(vm_ctx->mmu_ctx); Refactor to an unwind style. Reported-by: Dan Carpenter Signed-off-by: Donald Robson Reviewed-by: Dan Carpenter Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20231213144431.94956-2-donald.robson@imgtec.com --- drivers/gpu/drm/imagination/pvr_vm.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/imagination/pvr_vm.c b/drivers/gpu/drm/imagination/pvr_vm.c index 598d79d7c7d0..e59517ba039e 100644 --- a/drivers/gpu/drm/imagination/pvr_vm.c +++ b/drivers/gpu/drm/imagination/pvr_vm.c @@ -556,23 +556,12 @@ pvr_vm_create_context(struct pvr_device *pvr_dev, bool is_userspace_context) if (!vm_ctx) return ERR_PTR(-ENOMEM); - drm_gem_private_object_init(&pvr_dev->base, &vm_ctx->dummy_gem, 0); - vm_ctx->pvr_dev = pvr_dev; - kref_init(&vm_ctx->ref_count); - mutex_init(&vm_ctx->lock); - - drm_gpuvm_init(&vm_ctx->gpuvm_mgr, - is_userspace_context ? "PowerVR-user-VM" : "PowerVR-FW-VM", - 0, &pvr_dev->base, &vm_ctx->dummy_gem, - 0, 1ULL << device_addr_bits, 0, 0, &pvr_vm_gpuva_ops); vm_ctx->mmu_ctx = pvr_mmu_context_create(pvr_dev); err = PTR_ERR_OR_ZERO(vm_ctx->mmu_ctx); - if (err) { - vm_ctx->mmu_ctx = NULL; - goto err_put_ctx; - } + if (err) + goto err_free; if (is_userspace_context) { err = pvr_fw_object_create(pvr_dev, sizeof(struct rogue_fwif_fwmemcontext), @@ -583,13 +572,22 @@ pvr_vm_create_context(struct pvr_device *pvr_dev, bool is_userspace_context) goto err_page_table_destroy; } + drm_gem_private_object_init(&pvr_dev->base, &vm_ctx->dummy_gem, 0); + drm_gpuvm_init(&vm_ctx->gpuvm_mgr, + is_userspace_context ? "PowerVR-user-VM" : "PowerVR-FW-VM", + 0, &pvr_dev->base, &vm_ctx->dummy_gem, + 0, 1ULL << device_addr_bits, 0, 0, &pvr_vm_gpuva_ops); + + mutex_init(&vm_ctx->lock); + kref_init(&vm_ctx->ref_count); + return vm_ctx; err_page_table_destroy: pvr_mmu_context_destroy(vm_ctx->mmu_ctx); -err_put_ctx: - pvr_vm_context_put(vm_ctx); +err_free: + kfree(vm_ctx); return ERR_PTR(err); } -- cgit v1.2.3 From 24149412dfc71f7f4a54868702e9145e396263d3 Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Wed, 13 Dec 2023 22:08:03 -0500 Subject: drm/amdkfd: only flush mes process context if mes support is there Fix up on mes process context flush to prevent non-mes devices from spamming error messages or running into undefined behaviour during process termination. Fixes: bd33bb1409b4 ("drm/amdkfd: fix mes set shader debugger process management") Signed-off-by: Jonathan Kim Reviewed-by: Eric Huang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 8e55e78fce4e..43eff221eae5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -87,7 +87,8 @@ void kfd_process_dequeue_from_device(struct kfd_process_device *pdd) return; dev->dqm->ops.process_termination(dev->dqm, &pdd->qpd); - amdgpu_mes_flush_shader_debugger(dev->adev, pdd->proc_ctx_gpu_addr); + if (dev->kfd->shared_resources.enable_mes) + amdgpu_mes_flush_shader_debugger(dev->adev, pdd->proc_ctx_gpu_addr); pdd->already_dequeued = true; } -- cgit v1.2.3 From 65a618dd73216e111baab144a837f842dbb6a738 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Thu, 14 Dec 2023 09:42:03 -0500 Subject: drm/amdkfd: svm range always mapped flag not working on APU On gfx943 APU there is no VRAM and page migration, queue CWSR area, svm range with always mapped flag, is not mapped to GPU correctly. This works fine if retry fault on CWSR area can be recovered, but could cause deadlock if there is another retry fault recover waiting for CWSR to finish. Fix this by mapping svm range with always mapped flag to GPU with ACCESS attribute if XNACK ON. There is side effect, because all GPUs have ACCESS attribute by default on new svm range with XNACK on, the CWSR area will be mapped to all GPUs after this change. This side effect will be fixed with Thunk change to set CWSR svm range with ACCESS_IN_PLACE attribute on the GPU that user queue is created. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 617d20f0380f..fb0011f80277 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1607,18 +1607,24 @@ static int svm_range_validate_and_map(struct mm_struct *mm, if (test_bit(gpuidx, prange->bitmap_access)) bitmap_set(ctx->bitmap, gpuidx, 1); } + + /* + * If prange is already mapped or with always mapped flag, + * update mapping on GPUs with ACCESS attribute + */ + if (bitmap_empty(ctx->bitmap, MAX_GPU_INSTANCE)) { + if (prange->mapped_to_gpu || + prange->flags & KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED) + bitmap_copy(ctx->bitmap, prange->bitmap_access, MAX_GPU_INSTANCE); + } } else { bitmap_or(ctx->bitmap, prange->bitmap_access, prange->bitmap_aip, MAX_GPU_INSTANCE); } if (bitmap_empty(ctx->bitmap, MAX_GPU_INSTANCE)) { - bitmap_copy(ctx->bitmap, prange->bitmap_access, MAX_GPU_INSTANCE); - if (!prange->mapped_to_gpu || - bitmap_empty(ctx->bitmap, MAX_GPU_INSTANCE)) { - r = 0; - goto free_ctx; - } + r = 0; + goto free_ctx; } if (prange->actual_loc && !prange->ttm_res) { -- cgit v1.2.3 From 78b4dfd35999e22b4f589a3e070c4aa5f07ce3a2 Mon Sep 17 00:00:00 2001 From: James Zhu Date: Fri, 8 Dec 2023 17:41:25 -0500 Subject: drm/amdgpu: increase hmm range get pages timeout When application tries to allocate all system memory and cause memory to swap out. Needs more time for hmm_range_fault to validate the remaining page for allocation. To be safe, increase timeout value to 1 second for 64MB range. Signed-off-by: James Zhu Acked-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c index 081267161d40..b24eb5821fd1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c @@ -190,8 +190,8 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, pr_debug("hmm range: start = 0x%lx, end = 0x%lx", hmm_range->start, hmm_range->end); - /* Assuming 128MB takes maximum 1 second to fault page address */ - timeout = max((hmm_range->end - hmm_range->start) >> 27, 1UL); + /* Assuming 64MB takes maximum 1 second to fault page address */ + timeout = max((hmm_range->end - hmm_range->start) >> 26, 1UL); timeout *= HMM_RANGE_DEFAULT_TIMEOUT; timeout = jiffies + msecs_to_jiffies(timeout); -- cgit v1.2.3 From 0c8c0e7a9eebc2de03d161de4376e0d9158b6817 Mon Sep 17 00:00:00 2001 From: James Zhu Date: Fri, 8 Dec 2023 17:49:54 -0500 Subject: drm/amdgpu: make an improvement on amdgpu_hmm_range_get_pages Only schedule when hmm_range_fault returns error. Signed-off-by: James Zhu Acked-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c index b24eb5821fd1..55b65fc04b65 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c @@ -199,6 +199,7 @@ retry: hmm_range->notifier_seq = mmu_interval_read_begin(notifier); r = hmm_range_fault(hmm_range); if (unlikely(r)) { + schedule(); /* * FIXME: This timeout should encompass the retry from * mmu_interval_read_retry() as well. @@ -212,7 +213,6 @@ retry: break; hmm_range->hmm_pfns += MAX_WALK_BYTE >> PAGE_SHIFT; hmm_range->start = hmm_range->end; - schedule(); } while (hmm_range->end < end); hmm_range->start = start; -- cgit v1.2.3 From 7046ca9c1ba64938f1b498026419d47b0993c69f Mon Sep 17 00:00:00 2001 From: Li Ma Date: Fri, 15 Dec 2023 11:37:20 +0800 Subject: drm/amd/swsmu: remove duplicate definition of smu v14_0_0 driver if version There is a repeated define of smu v14_0_0 driver if version, so delete one in driver if header. Signed-off-by: Li Ma Reviewed-by: Kenneth Feng Reviewed-by: Yifan Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h | 5 ----- drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h | 2 +- 2 files changed, 1 insertion(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h index 8f42771e1f0a..5bb7a63c0602 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h @@ -24,11 +24,6 @@ #ifndef SMU14_DRIVER_IF_V14_0_0_H #define SMU14_DRIVER_IF_V14_0_0_H -// *** IMPORTANT *** -// SMU TEAM: Always increment the interface version if -// any structure is changed in this file -#define PMFW_DRIVER_IF_VERSION 7 - typedef struct { int32_t value; uint32_t numFractionalBits; diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h index a5b569976f19..3f7463c1c1a9 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h @@ -26,8 +26,8 @@ #include "amdgpu_smu.h" #define SMU14_DRIVER_IF_VERSION_INV 0xFFFFFFFF +#define SMU14_DRIVER_IF_VERSION_SMU_V14_0_0 0x7 #define SMU14_DRIVER_IF_VERSION_SMU_V14_0_2 0x1 -#define SMU14_DRIVER_IF_VERSION_SMU_V14_0_0 0x6 #define FEATURE_MASK(feature) (1ULL << feature) -- cgit v1.2.3 From 0f657938e4345a77be871d906f3e0de3c58a7a49 Mon Sep 17 00:00:00 2001 From: Samson Tam Date: Tue, 28 Nov 2023 16:53:12 -0500 Subject: drm/amd/display: do not send commands to DMUB if DMUB is inactive from S3 [Why] On resume from S3, may get apply_idle_optimizations call while DMUB is inactive which will just time out. [How] Set and track power state in dmub_srv and check power state before sending commands to DMUB. Add interface in both dmub_srv and dc_dmub_srv Reviewed-by: Nicholas Kazlauskas Acked-by: Wayne Lin Signed-off-by: Samson Tam Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +++ drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 14 ++++++++++++++ drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h | 2 ++ drivers/gpu/drm/amd/display/dmub/dmub_srv.h | 21 +++++++++++++++++++++ drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c | 15 +++++++++++++++ 5 files changed, 55 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index d7ac020bd8af..54861136dafd 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2687,6 +2687,7 @@ static int dm_suspend(void *handle) hpd_rx_irq_work_suspend(dm); dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D3); + dc_dmub_srv_set_power_state(dm->dc->ctx->dmub_srv, DC_ACPI_CM_POWER_STATE_D3); return 0; } @@ -2882,6 +2883,7 @@ static int dm_resume(void *handle) if (r) DRM_ERROR("DMUB interface failed to initialize: status=%d\n", r); + dc_dmub_srv_set_power_state(dm->dc->ctx->dmub_srv, DC_ACPI_CM_POWER_STATE_D0); dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0); dc_resume(dm->dc); @@ -2932,6 +2934,7 @@ static int dm_resume(void *handle) } /* power on hardware */ + dc_dmub_srv_set_power_state(dm->dc->ctx->dmub_srv, DC_ACPI_CM_POWER_STATE_D0); dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0); /* program HPD filter */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index 53400cc05b5b..1a4d615ccdec 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -1268,3 +1268,17 @@ void dc_dmub_srv_exit_low_power_state(const struct dc *dc) ASSERT(0); } +void dc_dmub_srv_set_power_state(struct dc_dmub_srv *dc_dmub_srv, enum dc_acpi_cm_power_state powerState) +{ + struct dmub_srv *dmub; + + if (!dc_dmub_srv) + return; + + dmub = dc_dmub_srv->dmub; + + if (powerState == DC_ACPI_CM_POWER_STATE_D0) + dmub_srv_set_power_state(dmub, DMUB_POWER_STATE_D0); + else + dmub_srv_set_power_state(dmub, DMUB_POWER_STATE_D3); +} diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h index d4a60f53faab..c25ce7546f71 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h @@ -102,4 +102,6 @@ void dc_dmub_srv_subvp_save_surf_addr(const struct dc_dmub_srv *dc_dmub_srv, con bool dc_dmub_srv_is_hw_pwr_up(struct dc_dmub_srv *dc_dmub_srv, bool wait); void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle); void dc_dmub_srv_exit_low_power_state(const struct dc *dc); + +void dc_dmub_srv_set_power_state(struct dc_dmub_srv *dc_dmub_srv, enum dc_acpi_cm_power_state powerState); #endif /* _DMUB_DC_SRV_H_ */ diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h index df63aa8f01e9..d1a4ed6f5916 100644 --- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h @@ -150,6 +150,13 @@ enum dmub_memory_access_type { DMUB_MEMORY_ACCESS_DMA }; +/* enum dmub_power_state type - to track DC power state in dmub_srv */ +enum dmub_srv_power_state_type { + DMUB_POWER_STATE_UNDEFINED = 0, + DMUB_POWER_STATE_D0 = 1, + DMUB_POWER_STATE_D3 = 8 +}; + /** * struct dmub_region - dmub hw memory region * @base: base address for region, must be 256 byte aligned @@ -485,6 +492,8 @@ struct dmub_srv { /* Feature capabilities reported by fw */ struct dmub_feature_caps feature_caps; struct dmub_visual_confirm_color visual_confirm_color; + + enum dmub_srv_power_state_type power_state; }; /** @@ -889,6 +898,18 @@ enum dmub_status dmub_srv_clear_inbox0_ack(struct dmub_srv *dmub); */ void dmub_srv_subvp_save_surf_addr(struct dmub_srv *dmub, const struct dc_plane_address *addr, uint8_t subvp_index); +/** + * dmub_srv_set_power_state() - Track DC power state in dmub_srv + * @dmub: The dmub service + * @power_state: DC power state setting + * + * Store DC power state in dmub_srv. If dmub_srv is in D3, then don't send messages to DMUB + * + * Return: + * void + */ +void dmub_srv_set_power_state(struct dmub_srv *dmub, enum dmub_srv_power_state_type dmub_srv_power_state); + #if defined(__cplusplus) } #endif diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c index a329a6ecf4dd..53ac1c66dd86 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c @@ -713,6 +713,7 @@ enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub, dmub->hw_funcs.reset_release(dmub); dmub->hw_init = true; + dmub->power_state = DMUB_POWER_STATE_D0; return DMUB_STATUS_OK; } @@ -766,6 +767,9 @@ enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub, if (!dmub->hw_init) return DMUB_STATUS_INVALID; + if (dmub->power_state != DMUB_POWER_STATE_D0) + return DMUB_STATUS_INVALID; + if (dmub->inbox1_rb.rptr > dmub->inbox1_rb.capacity || dmub->inbox1_rb.wrpt > dmub->inbox1_rb.capacity) { return DMUB_STATUS_HW_FAILURE; @@ -784,6 +788,9 @@ enum dmub_status dmub_srv_cmd_execute(struct dmub_srv *dmub) if (!dmub->hw_init) return DMUB_STATUS_INVALID; + if (dmub->power_state != DMUB_POWER_STATE_D0) + return DMUB_STATUS_INVALID; + /** * Read back all the queued commands to ensure that they've * been flushed to framebuffer memory. Otherwise DMCUB might @@ -1100,3 +1107,11 @@ void dmub_srv_subvp_save_surf_addr(struct dmub_srv *dmub, const struct dc_plane_ subvp_index); } } + +void dmub_srv_set_power_state(struct dmub_srv *dmub, enum dmub_srv_power_state_type dmub_srv_power_state) +{ + if (!dmub || !dmub->hw_init) + return; + + dmub->power_state = dmub_srv_power_state; +} -- cgit v1.2.3 From c57a0f50c060b7c58f974306fe103eabb881ccbc Mon Sep 17 00:00:00 2001 From: Muhammad Ahmed Date: Fri, 1 Dec 2023 19:11:50 -0500 Subject: drm/amd/display: remove HPO PG in driver side [why & how] Add config to make HPO PG handled in dmubfw ips entry/exit Reviewed-by: Charlene Liu Acked-by: Wayne Lin Signed-off-by: Muhammad Ahmed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c index 4eb744f1bc9f..4e1db842b98c 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c @@ -718,6 +718,7 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_dpp_power_gate = true, .disable_hubp_power_gate = true, .disable_optc_power_gate = true, /*should the same as above two*/ + .disable_hpo_power_gate = true, /*dmubfw force domain25 on*/ .disable_clock_gate = false, .disable_dsc_power_gate = true, .vsr_support = true, -- cgit v1.2.3 From 669080888691c312cc926322a7b24600121c90fb Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Mon, 4 Dec 2023 15:19:34 -0500 Subject: drm/amd/display: Revert " drm/amd/display: Use channel_width = 2 for vram table 3.0" [Description] Revert commit fec05adc40c2 ("drm/amd/display: Use channel_width = 2 for vram table 3.0") Because the issue is being fixed from VBIOS side. Reviewed-by: Samson Tam Acked-by: Wayne Lin Signed-off-by: Alvin Lee Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index 875a064bb9a5..fcd65a2057ad 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -2386,13 +2386,7 @@ static enum bp_result get_vram_info_v30( return BP_RESULT_BADBIOSTABLE; info->num_chans = info_v30->channel_num; - /* As suggested by VBIOS we should always use - * dram_channel_width_bytes = 2 when using VRAM - * table version 3.0. This is because the channel_width - * param in the VRAM info table is changed in 7000 series and - * no longer represents the memory channel width. - */ - info->dram_channel_width_bytes = 2; + info->dram_channel_width_bytes = (1 << info_v30->channel_width) / 8; return result; } -- cgit v1.2.3 From 86b9357c1bbe993e74a304b3f7783d7d0c79c40c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 15 Dec 2023 20:21:52 -0500 Subject: drivers/gpu/drm/i915/i915_memcpy.c: fix missing includes Signed-off-by: Kent Overstreet --- drivers/gpu/drm/i915/i915_memcpy.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_memcpy.c b/drivers/gpu/drm/i915/i915_memcpy.c index 1b021a4902de..ba82277254b7 100644 --- a/drivers/gpu/drm/i915/i915_memcpy.c +++ b/drivers/gpu/drm/i915/i915_memcpy.c @@ -23,6 +23,8 @@ */ #include +#include +#include #include #include "i915_memcpy.h" -- cgit v1.2.3 From 648d7be8ecf47b0556e32550145c70db153b16fb Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 11 Dec 2023 23:37:47 +0200 Subject: drm/i915/dmc: Don't enable any pipe DMC events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pipe DMC seems to be making a mess of things in ADL. Various weird symptoms have been observed such as missing vblank irqs, typicalle happening when using multiple displays. Keep all pipe DMC event handlers disabled until needed (which is never atm). This is also what Windows does on ADL+. We can also drop DG2 from disable_all_flip_queue_events() since on DG2 the pipe DMC is the one that handles the flip queue events. Cc: stable@vger.kernel.org Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/8685 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231211213750.27109-2-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak --- drivers/gpu/drm/i915/display/intel_dmc.c | 43 ++++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c index 63e080e07023..073b85b57679 100644 --- a/drivers/gpu/drm/i915/display/intel_dmc.c +++ b/drivers/gpu/drm/i915/display/intel_dmc.c @@ -389,7 +389,7 @@ disable_all_flip_queue_events(struct drm_i915_private *i915) enum intel_dmc_id dmc_id; /* TODO: check if the following applies to all D13+ platforms. */ - if (!IS_DG2(i915) && !IS_TIGERLAKE(i915)) + if (!IS_TIGERLAKE(i915)) return; for_each_dmc_id(dmc_id) { @@ -493,6 +493,45 @@ void intel_dmc_disable_pipe(struct drm_i915_private *i915, enum pipe pipe) intel_de_rmw(i915, PIPEDMC_CONTROL(pipe), PIPEDMC_ENABLE, 0); } +static bool is_dmc_evt_ctl_reg(struct drm_i915_private *i915, + enum intel_dmc_id dmc_id, i915_reg_t reg) +{ + u32 offset = i915_mmio_reg_offset(reg); + u32 start = i915_mmio_reg_offset(DMC_EVT_CTL(i915, dmc_id, 0)); + u32 end = i915_mmio_reg_offset(DMC_EVT_CTL(i915, dmc_id, DMC_EVENT_HANDLER_COUNT_GEN12)); + + return offset >= start && offset < end; +} + +static bool disable_dmc_evt(struct drm_i915_private *i915, + enum intel_dmc_id dmc_id, + i915_reg_t reg, u32 data) +{ + if (!is_dmc_evt_ctl_reg(i915, dmc_id, reg)) + return false; + + /* keep all pipe DMC events disabled by default */ + if (dmc_id != DMC_FW_MAIN) + return true; + + return false; +} + +static u32 dmc_mmiodata(struct drm_i915_private *i915, + struct intel_dmc *dmc, + enum intel_dmc_id dmc_id, int i) +{ + if (disable_dmc_evt(i915, dmc_id, + dmc->dmc_info[dmc_id].mmioaddr[i], + dmc->dmc_info[dmc_id].mmiodata[i])) + return REG_FIELD_PREP(DMC_EVT_CTL_TYPE_MASK, + DMC_EVT_CTL_TYPE_EDGE_0_1) | + REG_FIELD_PREP(DMC_EVT_CTL_EVENT_ID_MASK, + DMC_EVT_CTL_EVENT_ID_FALSE); + else + return dmc->dmc_info[dmc_id].mmiodata[i]; +} + /** * intel_dmc_load_program() - write the firmware from memory to register. * @i915: i915 drm device. @@ -532,7 +571,7 @@ void intel_dmc_load_program(struct drm_i915_private *i915) for_each_dmc_id(dmc_id) { for (i = 0; i < dmc->dmc_info[dmc_id].mmio_count; i++) { intel_de_write(i915, dmc->dmc_info[dmc_id].mmioaddr[i], - dmc->dmc_info[dmc_id].mmiodata[i]); + dmc_mmiodata(i915, dmc, dmc_id, i)); } } -- cgit v1.2.3 From be2fce7891e20bdd1c785dd590c59d0ad6a1525a Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 11 Dec 2023 23:37:48 +0200 Subject: drm/i915/dmc: Also disable the flip queue event on TGL main DMC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unlike later platforms TGL has its flip queue event (CLK_MSEC) on the main DMC (as opposed to the pipe DMC). Currently we're doing a second pass to disable that, but let's just follow the same approach as the later platforms and never even enable the event in the first place. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231211213750.27109-3-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak --- drivers/gpu/drm/i915/display/intel_dmc.c | 83 ++------------------------------ 1 file changed, 5 insertions(+), 78 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c index 073b85b57679..9385898e3aa5 100644 --- a/drivers/gpu/drm/i915/display/intel_dmc.c +++ b/drivers/gpu/drm/i915/display/intel_dmc.c @@ -335,77 +335,6 @@ static void disable_event_handler(struct drm_i915_private *i915, intel_de_write(i915, htp_reg, 0); } -static void -disable_flip_queue_event(struct drm_i915_private *i915, - i915_reg_t ctl_reg, i915_reg_t htp_reg) -{ - u32 event_ctl; - u32 event_htp; - - event_ctl = intel_de_read(i915, ctl_reg); - event_htp = intel_de_read(i915, htp_reg); - if (event_ctl != (DMC_EVT_CTL_ENABLE | - DMC_EVT_CTL_RECURRING | - REG_FIELD_PREP(DMC_EVT_CTL_TYPE_MASK, - DMC_EVT_CTL_TYPE_EDGE_0_1) | - REG_FIELD_PREP(DMC_EVT_CTL_EVENT_ID_MASK, - DMC_EVT_CTL_EVENT_ID_CLK_MSEC)) || - !event_htp) { - drm_dbg_kms(&i915->drm, - "Unexpected DMC event configuration (control %08x htp %08x)\n", - event_ctl, event_htp); - return; - } - - disable_event_handler(i915, ctl_reg, htp_reg); -} - -static bool -get_flip_queue_event_regs(struct drm_i915_private *i915, enum intel_dmc_id dmc_id, - i915_reg_t *ctl_reg, i915_reg_t *htp_reg) -{ - if (dmc_id == DMC_FW_MAIN) { - if (DISPLAY_VER(i915) == 12) { - *ctl_reg = DMC_EVT_CTL(i915, dmc_id, 3); - *htp_reg = DMC_EVT_HTP(i915, dmc_id, 3); - - return true; - } - } else if (dmc_id >= DMC_FW_PIPEA && dmc_id <= DMC_FW_PIPED) { - if (IS_DG2(i915)) { - *ctl_reg = DMC_EVT_CTL(i915, dmc_id, 2); - *htp_reg = DMC_EVT_HTP(i915, dmc_id, 2); - - return true; - } - } - - return false; -} - -static void -disable_all_flip_queue_events(struct drm_i915_private *i915) -{ - enum intel_dmc_id dmc_id; - - /* TODO: check if the following applies to all D13+ platforms. */ - if (!IS_TIGERLAKE(i915)) - return; - - for_each_dmc_id(dmc_id) { - i915_reg_t ctl_reg; - i915_reg_t htp_reg; - - if (!has_dmc_id_fw(i915, dmc_id)) - continue; - - if (!get_flip_queue_event_regs(i915, dmc_id, &ctl_reg, &htp_reg)) - continue; - - disable_flip_queue_event(i915, ctl_reg, htp_reg); - } -} - static void disable_all_event_handlers(struct drm_i915_private *i915) { enum intel_dmc_id dmc_id; @@ -514,6 +443,11 @@ static bool disable_dmc_evt(struct drm_i915_private *i915, if (dmc_id != DMC_FW_MAIN) return true; + /* also disable the flip queue event on the main DMC on TGL */ + if (IS_TIGERLAKE(i915) && + REG_FIELD_GET(DMC_EVT_CTL_EVENT_ID_MASK, data) == DMC_EVT_CTL_EVENT_ID_CLK_MSEC) + return true; + return false; } @@ -579,13 +513,6 @@ void intel_dmc_load_program(struct drm_i915_private *i915) gen9_set_dc_state_debugmask(i915); - /* - * Flip queue events need to be disabled before enabling DC5/6. - * i915 doesn't use the flip queue feature, so disable it already - * here. - */ - disable_all_flip_queue_events(i915); - pipedmc_clock_gating_wa(i915, false); } -- cgit v1.2.3 From e1a4e3cb3ac67ced1fe9e83fea6d8d91f7c4e864 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 13 Dec 2023 17:08:07 +0200 Subject: drm/i915/dmc: Also disable HRR event on TGL/ADLS main DMC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unlike later platforms TGL/ADLS has the half refresh rate (HRR) event on the main DMC (as opposed to the pipe DMC). Since we're disabling that event on all later platforms already let's do the same on TGL/ADLS as well. There is supposedly a bit somewhere (DMC_CHICKEN on TGL) to make the handler not do anything, but we don't currently have code to frob it. Though that bit should be off by default, the ADL+ experience has shown us that trusting any of this isn't a good idea. So seems safer to just disable all event handlers we know that we don't need. Also the TGL/ADLS DMC firmware is apparently using the wrong event (undelayed vblank) here anyway. It should be using the delayed vblank event instead (like ADL+ firmware does), but they didn't release a firmware fix for this and instead just hacked around this in the Windows driver code :/ v2: Also disable the event on ADLS (Imre) Cc: Imre Deak Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231213150807.21331-1-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak --- drivers/gpu/drm/i915/display/intel_dmc.c | 5 +++++ drivers/gpu/drm/i915/display/intel_dmc_regs.h | 1 + 2 files changed, 6 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c index 9385898e3aa5..fcc7283b7dcd 100644 --- a/drivers/gpu/drm/i915/display/intel_dmc.c +++ b/drivers/gpu/drm/i915/display/intel_dmc.c @@ -448,6 +448,11 @@ static bool disable_dmc_evt(struct drm_i915_private *i915, REG_FIELD_GET(DMC_EVT_CTL_EVENT_ID_MASK, data) == DMC_EVT_CTL_EVENT_ID_CLK_MSEC) return true; + /* also disable the HRR event on the main DMC on TGL/ADLS */ + if ((IS_TIGERLAKE(i915) || IS_ALDERLAKE_S(i915)) && + REG_FIELD_GET(DMC_EVT_CTL_EVENT_ID_MASK, data) == DMC_EVT_CTL_EVENT_ID_VBLANK_A) + return true; + return false; } diff --git a/drivers/gpu/drm/i915/display/intel_dmc_regs.h b/drivers/gpu/drm/i915/display/intel_dmc_regs.h index cf10094acae3..90d0dbb41cfe 100644 --- a/drivers/gpu/drm/i915/display/intel_dmc_regs.h +++ b/drivers/gpu/drm/i915/display/intel_dmc_regs.h @@ -60,6 +60,7 @@ #define DMC_EVT_CTL_EVENT_ID_MASK REG_GENMASK(15, 8) #define DMC_EVT_CTL_EVENT_ID_FALSE 0x01 +#define DMC_EVT_CTL_EVENT_ID_VBLANK_A 0x32 /* main DMC */ /* An event handler scheduled to run at a 1 kHz frequency. */ #define DMC_EVT_CTL_EVENT_ID_CLK_MSEC 0xbf -- cgit v1.2.3 From 5f23cea2d9ccc94c5de236312649fe85b89d6f26 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 11 Dec 2023 23:37:50 +0200 Subject: drm/i915/dmc: Print out the DMC mmio register list at fw load time MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To help with debugging print out the mmio list contained in the DMC firmware. Also highlight the event registers, and whether we're going to disable them or not. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231211213750.27109-5-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak --- drivers/gpu/drm/i915/display/intel_dmc.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c index fcc7283b7dcd..b70502586ab9 100644 --- a/drivers/gpu/drm/i915/display/intel_dmc.c +++ b/drivers/gpu/drm/i915/display/intel_dmc.c @@ -432,6 +432,16 @@ static bool is_dmc_evt_ctl_reg(struct drm_i915_private *i915, return offset >= start && offset < end; } +static bool is_dmc_evt_htp_reg(struct drm_i915_private *i915, + enum intel_dmc_id dmc_id, i915_reg_t reg) +{ + u32 offset = i915_mmio_reg_offset(reg); + u32 start = i915_mmio_reg_offset(DMC_EVT_HTP(i915, dmc_id, 0)); + u32 end = i915_mmio_reg_offset(DMC_EVT_HTP(i915, dmc_id, DMC_EVENT_HANDLER_COUNT_GEN12)); + + return offset >= start && offset < end; +} + static bool disable_dmc_evt(struct drm_i915_private *i915, enum intel_dmc_id dmc_id, i915_reg_t reg, u32 data) @@ -713,9 +723,17 @@ static u32 parse_dmc_fw_header(struct intel_dmc *dmc, return 0; } + drm_dbg_kms(&i915->drm, "DMC %d:\n", dmc_id); for (i = 0; i < mmio_count; i++) { dmc_info->mmioaddr[i] = _MMIO(mmioaddr[i]); dmc_info->mmiodata[i] = mmiodata[i]; + + drm_dbg_kms(&i915->drm, " mmio[%d]: 0x%x = 0x%x%s%s\n", + i, mmioaddr[i], mmiodata[i], + is_dmc_evt_ctl_reg(i915, dmc_id, dmc_info->mmioaddr[i]) ? " (EVT_CTL)" : + is_dmc_evt_htp_reg(i915, dmc_id, dmc_info->mmioaddr[i]) ? " (EVT_HTP)" : "", + disable_dmc_evt(i915, dmc_id, dmc_info->mmioaddr[i], + dmc_info->mmiodata[i]) ? " (disabling)" : ""); } dmc_info->mmio_count = mmio_count; dmc_info->start_mmioaddr = start_mmioaddr; -- cgit v1.2.3 From 716c3cf21784479a1934b670ec67f320cbb5d308 Mon Sep 17 00:00:00 2001 From: Jouni Högander Date: Tue, 14 Nov 2023 15:41:41 +0200 Subject: drm/i915/display: Remove dead code around intel_atomic_helper->free_list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After switching to directly using dma_fence instead of i915_sw_fence we have left some dead code around intel_atomic_helper->free_list. Remove that dead code. v2: Remove intel_atomic_state->freed as well Signed-off-by: Jouni Högander Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231114134141.2527694-1-jouni.hogander@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 20 -------------------- drivers/gpu/drm/i915/display/intel_display_core.h | 6 ------ drivers/gpu/drm/i915/display/intel_display_driver.c | 7 ------- drivers/gpu/drm/i915/display/intel_display_types.h | 2 -- 4 files changed, 35 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index e8b307ae9319..66b4c3f3ceb1 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -6974,24 +6974,6 @@ static void skl_commit_modeset_enables(struct intel_atomic_state *state) drm_WARN_ON(&dev_priv->drm, update_pipes); } -static void intel_atomic_helper_free_state(struct drm_i915_private *dev_priv) -{ - struct intel_atomic_state *state, *next; - struct llist_node *freed; - - freed = llist_del_all(&dev_priv->display.atomic_helper.free_list); - llist_for_each_entry_safe(state, next, freed, freed) - drm_atomic_state_put(&state->base); -} - -void intel_atomic_helper_free_state_worker(struct work_struct *work) -{ - struct drm_i915_private *dev_priv = - container_of(work, typeof(*dev_priv), display.atomic_helper.free_work); - - intel_atomic_helper_free_state(dev_priv); -} - static void intel_atomic_commit_fence_wait(struct intel_atomic_state *intel_state) { struct drm_i915_private *i915 = to_i915(intel_state->base.dev); @@ -7027,8 +7009,6 @@ static void intel_atomic_cleanup_work(struct work_struct *work) drm_atomic_helper_cleanup_planes(&i915->drm, &state->base); drm_atomic_helper_commit_cleanup_done(&state->base); drm_atomic_state_put(&state->base); - - intel_atomic_helper_free_state(i915); } static void intel_atomic_prepare_plane_clear_colors(struct intel_atomic_state *state) diff --git a/drivers/gpu/drm/i915/display/intel_display_core.h b/drivers/gpu/drm/i915/display/intel_display_core.h index 3308f07968ff..7349a2b928c0 100644 --- a/drivers/gpu/drm/i915/display/intel_display_core.h +++ b/drivers/gpu/drm/i915/display/intel_display_core.h @@ -295,12 +295,6 @@ struct intel_display { const struct intel_audio_funcs *audio; } funcs; - /* Grouping using anonymous structs. Keep sorted. */ - struct intel_atomic_helper { - struct llist_head free_list; - struct work_struct free_work; - } atomic_helper; - struct { /* backlight registers and fields in struct intel_panel */ struct mutex lock; diff --git a/drivers/gpu/drm/i915/display/intel_display_driver.c b/drivers/gpu/drm/i915/display/intel_display_driver.c index 62f7b10484be..9df9097a0255 100644 --- a/drivers/gpu/drm/i915/display/intel_display_driver.c +++ b/drivers/gpu/drm/i915/display/intel_display_driver.c @@ -259,10 +259,6 @@ int intel_display_driver_probe_noirq(struct drm_i915_private *i915) if (ret) goto cleanup_vga_client_pw_domain_dmc; - init_llist_head(&i915->display.atomic_helper.free_list); - INIT_WORK(&i915->display.atomic_helper.free_work, - intel_atomic_helper_free_state_worker); - intel_init_quirks(i915); intel_fbc_init(i915); @@ -430,9 +426,6 @@ void intel_display_driver_remove(struct drm_i915_private *i915) flush_workqueue(i915->display.wq.flip); flush_workqueue(i915->display.wq.modeset); - flush_work(&i915->display.atomic_helper.free_work); - drm_WARN_ON(&i915->drm, !llist_empty(&i915->display.atomic_helper.free_list)); - /* * MST topology needs to be suspended so we don't have any calls to * fbdev after it's finalized. MST will be destroyed later as part of diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 341d80c2b9de..3fdd8a517983 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -684,8 +684,6 @@ struct intel_atomic_state { bool skip_intermediate_wm; bool rps_interactive; - - struct llist_node freed; }; struct intel_plane_state { -- cgit v1.2.3 From 6bf3549384033102986a3514744e080d3bfca7cf Mon Sep 17 00:00:00 2001 From: Ankit Nautiyal Date: Wed, 22 Nov 2023 12:16:27 +0530 Subject: drm/i915/display: Get bigjoiner config before dsc config during readout Currently we get bigjoiner config after the dsc get config, during HW readout. Since dsc_get_config now uses bigjoiner flags/pipes to compute DSC PPS parameter pic_width, this results in a state mismatch when Bigjoiner and DSC are used together. So call get bigjoiner config before calling dsc get config function. Fixes: 8b70b5691704 ("drm/i915/vdsc: Fill the intel_dsc_get_pps_config function") Cc: Suraj Kandpal Cc: Ankit Nautiyal Cc: Animesh Manna Cc: Jani Nikula Signed-off-by: Ankit Nautiyal Reviewed-by: Suraj Kandpal Link: https://patchwork.freedesktop.org/patch/msgid/20231122064627.905828-1-ankit.k.nautiyal@intel.com (cherry picked from commit baf31a20fa7f3538d68ffa5262a715eb1d699cdd) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 63ba4d54a715..db7ada7907f3 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -3747,8 +3747,8 @@ static bool hsw_get_pipe_config(struct intel_crtc *crtc, if (!active) goto out; - intel_dsc_get_config(pipe_config); intel_bigjoiner_get_config(pipe_config); + intel_dsc_get_config(pipe_config); if (!transcoder_is_dsi(pipe_config->cpu_transcoder) || DISPLAY_VER(dev_priv) >= 11) -- cgit v1.2.3 From 768f17fd25e4a98bf5166148629ecf6f647d5efc Mon Sep 17 00:00:00 2001 From: Karthik Poosa Date: Mon, 4 Dec 2023 20:18:09 +0530 Subject: drm/i915/hwmon: Fix static analysis tool reported issues Updated i915 hwmon with fixes for issues reported by static analysis tool. Fixed integer overflow with upcasting. v2: - Added Fixes tag (Badal). - Updated commit message as per review comments (Anshuman). Fixes: 4c2572fe0ae7 ("drm/i915/hwmon: Expose power1_max_interval") Reviewed-by: Badal Nilawar Reviewed-by: Anshuman Gupta Signed-off-by: Karthik Poosa Signed-off-by: Anshuman Gupta Link: https://patchwork.freedesktop.org/patch/msgid/20231204144809.1518704-1-karthik.poosa@intel.com (cherry picked from commit ac3420d3d428443a08b923f9118121c170192b62) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_hwmon.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index 975da8e7f2a9..8c3f443c8347 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -175,7 +175,7 @@ hwm_power1_max_interval_show(struct device *dev, struct device_attribute *attr, * tau4 = (4 | x) << y * but add 2 when doing the final right shift to account for units */ - tau4 = ((1 << x_w) | x) << y; + tau4 = (u64)((1 << x_w) | x) << y; /* val in hwmon interface units (millisec) */ out = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w); @@ -211,7 +211,7 @@ hwm_power1_max_interval_store(struct device *dev, r = FIELD_PREP(PKG_MAX_WIN, PKG_MAX_WIN_DEFAULT); x = REG_FIELD_GET(PKG_MAX_WIN_X, r); y = REG_FIELD_GET(PKG_MAX_WIN_Y, r); - tau4 = ((1 << x_w) | x) << y; + tau4 = (u64)((1 << x_w) | x) << y; max_win = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w); if (val > max_win) -- cgit v1.2.3 From 88a173e5dd05e788068e8fa20a8c37c44bd8f416 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 11 Dec 2023 10:11:34 +0200 Subject: drm/i915: Reject async flips with bigjoiner MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently async flips are busted when bigjoiner is in use. As a short term fix simply reject async flips in that case. Cc: stable@vger.kernel.org Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/9769 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231211081134.2698-1-ville.syrjala@linux.intel.com Reviewed-by: Stanislav Lisovskiy (cherry picked from commit e93bffc2ac0a833b42841f31fff955549d38ce98) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index db7ada7907f3..df582ff81b45 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -6033,6 +6033,17 @@ static int intel_async_flip_check_uapi(struct intel_atomic_state *state, return -EINVAL; } + /* + * FIXME: Bigjoiner+async flip is busted currently. + * Remove this check once the issues are fixed. + */ + if (new_crtc_state->bigjoiner_pipes) { + drm_dbg_kms(&i915->drm, + "[CRTC:%d:%s] async flip disallowed with bigjoiner\n", + crtc->base.base.id, crtc->base.name); + return -EINVAL; + } + for_each_oldnew_intel_plane_in_state(state, plane, old_plane_state, new_plane_state, i) { if (plane->pipe != crtc->pipe) -- cgit v1.2.3 From dbcab554f777390d9bb6a808ed0cd90ee59bb44e Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Thu, 14 Dec 2023 00:05:26 +0200 Subject: drm/i915/mtl: Fix HDMI/DP PLL clock selection Select the HDMI specific PLL clock only for HDMI outputs. Fixes: 62618c7f117e ("drm/i915/mtl: C20 PLL programming") Cc: Mika Kahola Cc: Radhakrishna Sripada Reviewed-by: Radhakrishna Sripada Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20231213220526.1828827-1-imre.deak@intel.com (cherry picked from commit 937d02cc79c6828fef28a4d80d8d0ad2f7bf2b62) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_cx0_phy.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy.c b/drivers/gpu/drm/i915/display/intel_cx0_phy.c index d414f6b7f993..ccf225afeb2a 100644 --- a/drivers/gpu/drm/i915/display/intel_cx0_phy.c +++ b/drivers/gpu/drm/i915/display/intel_cx0_phy.c @@ -2465,7 +2465,8 @@ static void intel_program_port_clock_ctl(struct intel_encoder *encoder, val |= XELPDP_FORWARD_CLOCK_UNGATE; - if (is_hdmi_frl(crtc_state->port_clock)) + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI) && + is_hdmi_frl(crtc_state->port_clock)) val |= XELPDP_DDI_CLOCK_SELECT(XELPDP_DDI_CLOCK_SELECT_DIV18CLK); else val |= XELPDP_DDI_CLOCK_SELECT(XELPDP_DDI_CLOCK_SELECT_MAXPCLK); -- cgit v1.2.3 From 49e0a85ec3441edc6c77aa40206d6e5ee4597efc Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 11 Dec 2023 23:37:47 +0200 Subject: drm/i915/dmc: Don't enable any pipe DMC events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pipe DMC seems to be making a mess of things in ADL. Various weird symptoms have been observed such as missing vblank irqs, typicalle happening when using multiple displays. Keep all pipe DMC event handlers disabled until needed (which is never atm). This is also what Windows does on ADL+. We can also drop DG2 from disable_all_flip_queue_events() since on DG2 the pipe DMC is the one that handles the flip queue events. Cc: stable@vger.kernel.org Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/8685 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231211213750.27109-2-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak (cherry picked from commit 648d7be8ecf47b0556e32550145c70db153b16fb) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_dmc.c | 43 ++++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c index 63e080e07023..073b85b57679 100644 --- a/drivers/gpu/drm/i915/display/intel_dmc.c +++ b/drivers/gpu/drm/i915/display/intel_dmc.c @@ -389,7 +389,7 @@ disable_all_flip_queue_events(struct drm_i915_private *i915) enum intel_dmc_id dmc_id; /* TODO: check if the following applies to all D13+ platforms. */ - if (!IS_DG2(i915) && !IS_TIGERLAKE(i915)) + if (!IS_TIGERLAKE(i915)) return; for_each_dmc_id(dmc_id) { @@ -493,6 +493,45 @@ void intel_dmc_disable_pipe(struct drm_i915_private *i915, enum pipe pipe) intel_de_rmw(i915, PIPEDMC_CONTROL(pipe), PIPEDMC_ENABLE, 0); } +static bool is_dmc_evt_ctl_reg(struct drm_i915_private *i915, + enum intel_dmc_id dmc_id, i915_reg_t reg) +{ + u32 offset = i915_mmio_reg_offset(reg); + u32 start = i915_mmio_reg_offset(DMC_EVT_CTL(i915, dmc_id, 0)); + u32 end = i915_mmio_reg_offset(DMC_EVT_CTL(i915, dmc_id, DMC_EVENT_HANDLER_COUNT_GEN12)); + + return offset >= start && offset < end; +} + +static bool disable_dmc_evt(struct drm_i915_private *i915, + enum intel_dmc_id dmc_id, + i915_reg_t reg, u32 data) +{ + if (!is_dmc_evt_ctl_reg(i915, dmc_id, reg)) + return false; + + /* keep all pipe DMC events disabled by default */ + if (dmc_id != DMC_FW_MAIN) + return true; + + return false; +} + +static u32 dmc_mmiodata(struct drm_i915_private *i915, + struct intel_dmc *dmc, + enum intel_dmc_id dmc_id, int i) +{ + if (disable_dmc_evt(i915, dmc_id, + dmc->dmc_info[dmc_id].mmioaddr[i], + dmc->dmc_info[dmc_id].mmiodata[i])) + return REG_FIELD_PREP(DMC_EVT_CTL_TYPE_MASK, + DMC_EVT_CTL_TYPE_EDGE_0_1) | + REG_FIELD_PREP(DMC_EVT_CTL_EVENT_ID_MASK, + DMC_EVT_CTL_EVENT_ID_FALSE); + else + return dmc->dmc_info[dmc_id].mmiodata[i]; +} + /** * intel_dmc_load_program() - write the firmware from memory to register. * @i915: i915 drm device. @@ -532,7 +571,7 @@ void intel_dmc_load_program(struct drm_i915_private *i915) for_each_dmc_id(dmc_id) { for (i = 0; i < dmc->dmc_info[dmc_id].mmio_count; i++) { intel_de_write(i915, dmc->dmc_info[dmc_id].mmioaddr[i], - dmc->dmc_info[dmc_id].mmiodata[i]); + dmc_mmiodata(i915, dmc, dmc_id, i)); } } -- cgit v1.2.3 From c8048dd0b07df68724805254b9e994d99e9a7af4 Mon Sep 17 00:00:00 2001 From: "Nícolas F. R. A. Prado" Date: Tue, 21 Nov 2023 09:29:27 -0500 Subject: drm/mediatek: dp: Add phy_mtk_dp module as pre-dependency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The mtk_dp driver registers a phy device which is handled by the phy_mtk_dp driver and assumes that the phy probe will complete synchronously, proceeding to make use of functionality exposed by that driver right away. This assumption however is false when the phy driver is built as a module, causing the mtk_dp driver to fail probe in this case. Add the phy_mtk_dp module as a pre-dependency to the mtk_dp module to ensure the phy module has been loaded before the dp, so that the phy probe happens synchrounously and the mtk_dp driver can probe successfully even with the phy driver built as a module. Suggested-by: AngeloGioacchino Del Regno Fixes: f70ac097a2cf ("drm/mediatek: Add MT8195 Embedded DisplayPort driver") Signed-off-by: Nícolas F. R. A. Prado Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Guillaume Ranquet Link: https://patchwork.kernel.org/project/dri-devel/patch/20231121142938.460846-1-nfraprado@collabora.com/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_dp.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/mediatek/mtk_dp.c b/drivers/gpu/drm/mediatek/mtk_dp.c index e4c16ba9902d..2136a596efa1 100644 --- a/drivers/gpu/drm/mediatek/mtk_dp.c +++ b/drivers/gpu/drm/mediatek/mtk_dp.c @@ -2818,3 +2818,4 @@ MODULE_AUTHOR("Markus Schneider-Pargmann "); MODULE_AUTHOR("Bo-Chen Chen "); MODULE_DESCRIPTION("MediaTek DisplayPort Driver"); MODULE_LICENSE("GPL"); +MODULE_SOFTDEP("pre: phy_mtk_dp"); -- cgit v1.2.3 From 3164c8a70073d43629b4e11e083d3d2798f7750f Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 14 Dec 2023 12:37:51 -0800 Subject: drm/bridge: parade-ps8640: Never store more than msg->size bytes in AUX xfer While testing, I happened to notice a random crash that looked like: Kernel panic - not syncing: stack-protector: Kernel stack is corrupted in: drm_dp_dpcd_probe+0x120/0x120 Analysis of drm_dp_dpcd_probe() shows that we pass in a 1-byte buffer (allocated on the stack) to the aux->transfer() function. Presumably if the aux->transfer() writes more than one byte to this buffer then we're in a bad shape. Dropping into kgdb, I noticed that "aux->transfer" pointed at ps8640_aux_transfer(). Reading through ps8640_aux_transfer(), I can see that there are cases where it could write more bytes to msg->buffer than were specified by msg->size. This could happen if the hardware reported back something bogus to us. Let's fix this so we never write more than msg->size bytes. We'll still read all the bytes from the hardware just in case the hardware requires it since the aux transfer data comes through an auto-incrementing register. NOTE: I have no actual way to reproduce this issue but it seems likely this is what was happening in the crash I looked at. Fixes: 13afcdd7277e ("drm/bridge: parade-ps8640: Add support for AUX channel") Reviewed-by: Stephen Boyd Reviewed-by: Guenter Roeck Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20231214123752.v3.1.I9d1afcaad76a3e2c0ca046dc4adbc2b632c22eda@changeid --- drivers/gpu/drm/bridge/parade-ps8640.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/bridge/parade-ps8640.c b/drivers/gpu/drm/bridge/parade-ps8640.c index 8161b1a1a4b1..d264b80d909d 100644 --- a/drivers/gpu/drm/bridge/parade-ps8640.c +++ b/drivers/gpu/drm/bridge/parade-ps8640.c @@ -330,11 +330,12 @@ static ssize_t ps8640_aux_transfer_msg(struct drm_dp_aux *aux, return ret; } - buf[i] = data; + if (i < msg->size) + buf[i] = data; } } - return len; + return min(len, msg->size); } static ssize_t ps8640_aux_transfer(struct drm_dp_aux *aux, -- cgit v1.2.3 From aca58eac52b88138ab98c814afb389a381725cd7 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 14 Dec 2023 12:37:52 -0800 Subject: drm/bridge: ti-sn65dsi86: Never store more than msg->size bytes in AUX xfer For aux reads, the value `msg->size` indicates the size of the buffer provided by `msg->buffer`. We should never in any circumstances write more bytes to the buffer since it may overflow the buffer. In the ti-sn65dsi86 driver there is one code path that reads the transfer length from hardware. Even though it's never been seen to be a problem, we should make extra sure that the hardware isn't increasing the length since doing so would cause us to overrun the buffer. Fixes: 982f589bde7a ("drm/bridge: ti-sn65dsi86: Update reply on aux failures") Reviewed-by: Stephen Boyd Reviewed-by: Guenter Roeck Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20231214123752.v3.2.I7b83c0f31aeedc6b1dc98c7c741d3e1f94f040f8@changeid --- drivers/gpu/drm/bridge/ti-sn65dsi86.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c index c45c07840f64..b5464199b633 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c @@ -527,6 +527,7 @@ static ssize_t ti_sn_aux_transfer(struct drm_dp_aux *aux, u32 request_val = AUX_CMD_REQ(msg->request); u8 *buf = msg->buffer; unsigned int len = msg->size; + unsigned int short_len; unsigned int val; int ret; u8 addr_len[SN_AUX_LENGTH_REG + 1 - SN_AUX_ADDR_19_16_REG]; @@ -600,7 +601,8 @@ static ssize_t ti_sn_aux_transfer(struct drm_dp_aux *aux, } if (val & AUX_IRQ_STATUS_AUX_SHORT) { - ret = regmap_read(pdata->regmap, SN_AUX_LENGTH_REG, &len); + ret = regmap_read(pdata->regmap, SN_AUX_LENGTH_REG, &short_len); + len = min(len, short_len); if (ret) goto exit; } else if (val & AUX_IRQ_STATUS_NAT_I2C_FAIL) { -- cgit v1.2.3 From e847934bb124b2ad14bf967d6682e43b0b94c78a Mon Sep 17 00:00:00 2001 From: David Gow Date: Tue, 28 Nov 2023 15:24:06 +0800 Subject: drm/tests: Use KUNIT_DEFINE_ACTION_WRAPPER() In order to pass functions to kunit_add_action(), they need to be of the kunit_action_t type. While casting the function pointer can work, it will break control-flow integrity. drm_kunit_helpers already defines wrappers, but we now have a macro which does this automatically. Using this greatly reduces the boilerplate needed. Acked-by: Maxime Ripard Signed-off-by: David Gow Signed-off-by: Shuah Khan --- drivers/gpu/drm/tests/drm_kunit_helpers.c | 30 +++++++++--------------------- 1 file changed, 9 insertions(+), 21 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/tests/drm_kunit_helpers.c b/drivers/gpu/drm/tests/drm_kunit_helpers.c index bccb33b900f3..c251e6b34de0 100644 --- a/drivers/gpu/drm/tests/drm_kunit_helpers.c +++ b/drivers/gpu/drm/tests/drm_kunit_helpers.c @@ -27,27 +27,15 @@ static struct platform_driver fake_platform_driver = { }, }; -static void kunit_action_platform_driver_unregister(void *ptr) -{ - struct platform_driver *drv = ptr; - - platform_driver_unregister(drv); - -} - -static void kunit_action_platform_device_put(void *ptr) -{ - struct platform_device *pdev = ptr; - - platform_device_put(pdev); -} - -static void kunit_action_platform_device_del(void *ptr) -{ - struct platform_device *pdev = ptr; - - platform_device_del(pdev); -} +KUNIT_DEFINE_ACTION_WRAPPER(kunit_action_platform_driver_unregister, + platform_driver_unregister, + struct platform_driver *); +KUNIT_DEFINE_ACTION_WRAPPER(kunit_action_platform_device_put, + platform_device_put, + struct platform_device *); +KUNIT_DEFINE_ACTION_WRAPPER(kunit_action_platform_device_del, + platform_device_del, + struct platform_device *); /** * drm_kunit_helper_alloc_device - Allocate a mock device for a KUnit test -- cgit v1.2.3 From a08d4d6284393d44ef4e076288c31d04fc469a58 Mon Sep 17 00:00:00 2001 From: David Gow Date: Tue, 28 Nov 2023 15:24:07 +0800 Subject: drm/vc4: tests: Use KUNIT_DEFINE_ACTION_WRAPPER In order to pass functions to kunit_add_action(), they need to be of the kunit_action_t type. While casting the function pointer can work, it will break control-flow integrity. vc4_mock already defines such a wrapper for drm_dev_unregister(), but it involves less boilerplate to use the new macro, so replace the manual implementation. Signed-off-by: David Gow Reviewed-by: Maxime Ripard Signed-off-by: Shuah Khan --- drivers/gpu/drm/vc4/tests/vc4_mock.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/vc4/tests/vc4_mock.c b/drivers/gpu/drm/vc4/tests/vc4_mock.c index 63ca46f4cb35..becb3dbaa548 100644 --- a/drivers/gpu/drm/vc4/tests/vc4_mock.c +++ b/drivers/gpu/drm/vc4/tests/vc4_mock.c @@ -153,12 +153,9 @@ static int __build_mock(struct kunit *test, struct drm_device *drm, return 0; } -static void kunit_action_drm_dev_unregister(void *ptr) -{ - struct drm_device *drm = ptr; - - drm_dev_unregister(drm); -} +KUNIT_DEFINE_ACTION_WRAPPER(kunit_action_drm_dev_unregister, + drm_dev_unregister, + struct drm_device *); static struct vc4_dev *__mock_device(struct kunit *test, bool is_vc5) { -- cgit v1.2.3 From d393acce7b3f046a1086362317a05f2cac01fa89 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Fri, 15 Dec 2023 15:39:12 +0800 Subject: drm/tests: Switch to kunit devices Kunit recently gained helpers to create test managed devices. This means that we no longer have to roll our own helpers in KMS and we can reuse them. Signed-off-by: Maxime Ripard Tested-by: David Gow Signed-off-by: David Gow Signed-off-by: Shuah Khan --- drivers/gpu/drm/tests/drm_kunit_helpers.c | 66 ++----------------------------- 1 file changed, 3 insertions(+), 63 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/tests/drm_kunit_helpers.c b/drivers/gpu/drm/tests/drm_kunit_helpers.c index c251e6b34de0..ca4f8e4c5d5d 100644 --- a/drivers/gpu/drm/tests/drm_kunit_helpers.c +++ b/drivers/gpu/drm/tests/drm_kunit_helpers.c @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -15,28 +16,6 @@ static const struct drm_mode_config_funcs drm_mode_config_funcs = { }; -static int fake_probe(struct platform_device *pdev) -{ - return 0; -} - -static struct platform_driver fake_platform_driver = { - .probe = fake_probe, - .driver = { - .name = KUNIT_DEVICE_NAME, - }, -}; - -KUNIT_DEFINE_ACTION_WRAPPER(kunit_action_platform_driver_unregister, - platform_driver_unregister, - struct platform_driver *); -KUNIT_DEFINE_ACTION_WRAPPER(kunit_action_platform_device_put, - platform_device_put, - struct platform_device *); -KUNIT_DEFINE_ACTION_WRAPPER(kunit_action_platform_device_del, - platform_device_del, - struct platform_device *); - /** * drm_kunit_helper_alloc_device - Allocate a mock device for a KUnit test * @test: The test context object @@ -54,34 +33,7 @@ KUNIT_DEFINE_ACTION_WRAPPER(kunit_action_platform_device_del, */ struct device *drm_kunit_helper_alloc_device(struct kunit *test) { - struct platform_device *pdev; - int ret; - - ret = platform_driver_register(&fake_platform_driver); - KUNIT_ASSERT_EQ(test, ret, 0); - - ret = kunit_add_action_or_reset(test, - kunit_action_platform_driver_unregister, - &fake_platform_driver); - KUNIT_ASSERT_EQ(test, ret, 0); - - pdev = platform_device_alloc(KUNIT_DEVICE_NAME, PLATFORM_DEVID_NONE); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, pdev); - - ret = kunit_add_action_or_reset(test, - kunit_action_platform_device_put, - pdev); - KUNIT_ASSERT_EQ(test, ret, 0); - - ret = platform_device_add(pdev); - KUNIT_ASSERT_EQ(test, ret, 0); - - ret = kunit_add_action_or_reset(test, - kunit_action_platform_device_del, - pdev); - KUNIT_ASSERT_EQ(test, ret, 0); - - return &pdev->dev; + return kunit_device_register(test, KUNIT_DEVICE_NAME); } EXPORT_SYMBOL_GPL(drm_kunit_helper_alloc_device); @@ -94,19 +46,7 @@ EXPORT_SYMBOL_GPL(drm_kunit_helper_alloc_device); */ void drm_kunit_helper_free_device(struct kunit *test, struct device *dev) { - struct platform_device *pdev = to_platform_device(dev); - - kunit_release_action(test, - kunit_action_platform_device_del, - pdev); - - kunit_release_action(test, - kunit_action_platform_device_put, - pdev); - - kunit_release_action(test, - kunit_action_platform_driver_unregister, - &fake_platform_driver); + kunit_device_unregister(test, dev); } EXPORT_SYMBOL_GPL(drm_kunit_helper_free_device); -- cgit v1.2.3 From 6914968a0b52507bf19d85e5fb9e35272e17cd35 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sun, 17 Dec 2023 01:59:10 +0200 Subject: drm/bridge: properly refcount DT nodes in aux bridge drivers The aux-bridge and aux-hpd-bridge drivers didn't call of_node_get() on the device nodes further used for dev->of_node and platform data. When bridge devices are released, the reference counts are decreased, resulting in refcount underflow / use-after-free warnings. Get corresponding refcounts during AUX bridge allocation. Reported-by: Luca Weiss Fixes: 2a04739139b2 ("drm/bridge: add transparent bridge helper") Fixes: 26f4bac3d884 ("drm/bridge: aux-hpd: Replace of_device.h with explicit include") Reviewed-by: Neil Armstrong Signed-off-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20231216235910.911958-1-dmitry.baryshkov@linaro.org Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/bridge/aux-bridge.c | 3 ++- drivers/gpu/drm/bridge/aux-hpd-bridge.c | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/bridge/aux-bridge.c b/drivers/gpu/drm/bridge/aux-bridge.c index 49d7c2ab1ecc..b29980f95379 100644 --- a/drivers/gpu/drm/bridge/aux-bridge.c +++ b/drivers/gpu/drm/bridge/aux-bridge.c @@ -6,6 +6,7 @@ */ #include #include +#include #include #include @@ -57,7 +58,7 @@ int drm_aux_bridge_register(struct device *parent) adev->id = ret; adev->name = "aux_bridge"; adev->dev.parent = parent; - adev->dev.of_node = parent->of_node; + adev->dev.of_node = of_node_get(parent->of_node); adev->dev.release = drm_aux_bridge_release; ret = auxiliary_device_init(adev); diff --git a/drivers/gpu/drm/bridge/aux-hpd-bridge.c b/drivers/gpu/drm/bridge/aux-hpd-bridge.c index 1999a053d59b..bb55f697a181 100644 --- a/drivers/gpu/drm/bridge/aux-hpd-bridge.c +++ b/drivers/gpu/drm/bridge/aux-hpd-bridge.c @@ -68,9 +68,9 @@ struct device *drm_dp_hpd_bridge_register(struct device *parent, adev->id = ret; adev->name = "dp_hpd_bridge"; adev->dev.parent = parent; - adev->dev.of_node = parent->of_node; + adev->dev.of_node = of_node_get(parent->of_node); adev->dev.release = drm_aux_hpd_bridge_release; - adev->dev.platform_data = np; + adev->dev.platform_data = of_node_get(np); ret = auxiliary_device_init(adev); if (ret) { -- cgit v1.2.3 From 35ba6bd582cf926a082296b7e9a876ec81136cb1 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Mon, 18 Dec 2023 09:04:54 -0800 Subject: drm/bridge: ps8640: Fix size mismatch warning w/ len After commit 26195af57798 ("drm/bridge: ps8640: Drop the ability of ps8640 to fetch the EDID"), I got an error compiling: error: comparison of distinct pointer types ('typeof (len) *' (aka 'unsigned int *') and 'typeof (msg->size) *' (aka 'unsigned long *')) [-Werror,-Wcompare-distinct-pointer-types] Fix it by declaring the `len` as size_t. The above error only shows up on downstream kernels without commit d03eba99f5bf ("minmax: allow min()/max()/clamp() if the arguments have the same signedness."), but since commit 26195af57798 ("drm/bridge: ps8640: Drop the ability of ps8640 to fetch the EDID") is a "Fix" that will likely be backported it seems nice to make it easy. ...plus it's more correct to declare `len` as size_t anyway. Fixes: 26195af57798 ("drm/bridge: ps8640: Drop the ability of ps8640 to fetch the EDID") Reviewed-by: Stephen Boyd Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20231218090454.1.I5c6eb80b2f746439c4b58efab788e00701d08759@changeid --- drivers/gpu/drm/bridge/parade-ps8640.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/bridge/parade-ps8640.c b/drivers/gpu/drm/bridge/parade-ps8640.c index d264b80d909d..541e4f5afc4c 100644 --- a/drivers/gpu/drm/bridge/parade-ps8640.c +++ b/drivers/gpu/drm/bridge/parade-ps8640.c @@ -210,7 +210,7 @@ static ssize_t ps8640_aux_transfer_msg(struct drm_dp_aux *aux, struct ps8640 *ps_bridge = aux_to_ps8640(aux); struct regmap *map = ps_bridge->regmap[PAGE0_DP_CNTL]; struct device *dev = &ps_bridge->page[PAGE0_DP_CNTL]->dev; - unsigned int len = msg->size; + size_t len = msg->size; unsigned int data; unsigned int base; int ret; -- cgit v1.2.3 From 6e4337f695c25162f0296934152506ad596fcebf Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Thu, 30 Nov 2023 18:54:48 -0500 Subject: drm/amd/display: Unify optimize_required flags and VRR adjustments [why] There is only a single call to dc_post_update_surfaces_to_stream so there is no need to have two flags to control it. Unifying this to a single flag allows dc_stream_adjust_vmin_vmax to skip actual programming when there is no change required. [how] Remove wm_optimze_required flag and set only optimize_required in its place. Then in dc_stream_adjust_vmin_vmax, check that the stream timing range matches the requested one and skip programming if they are equal. Reviewed-by: Jun Lei Acked-by: Wayne Lin Signed-off-by: Aric Cyr Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 14 +++++--------- drivers/gpu/drm/amd/display/dc/dc.h | 1 - drivers/gpu/drm/amd/display/dc/dc_stream.h | 2 -- drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c | 2 +- drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c | 8 ++++---- 5 files changed, 10 insertions(+), 17 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 9d3925603979..0d42074f33a6 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -409,9 +409,12 @@ bool dc_stream_adjust_vmin_vmax(struct dc *dc, * avoid conflicting with firmware updates. */ if (dc->ctx->dce_version > DCE_VERSION_MAX) - if (dc->optimized_required || dc->wm_optimized_required) + if (dc->optimized_required) return false; + if (!memcmp(&stream->adjust, adjust, sizeof(*adjust))) + return true; + stream->adjust.v_total_max = adjust->v_total_max; stream->adjust.v_total_mid = adjust->v_total_mid; stream->adjust.v_total_mid_frame_num = adjust->v_total_mid_frame_num; @@ -2223,7 +2226,6 @@ void dc_post_update_surfaces_to_stream(struct dc *dc) } dc->optimized_required = false; - dc->wm_optimized_required = false; } static void init_state(struct dc *dc, struct dc_state *context) @@ -2743,8 +2745,6 @@ enum surface_update_type dc_check_update_surfaces_for_stream( } else if (memcmp(&dc->current_state->bw_ctx.bw.dcn.clk, &dc->clk_mgr->clks, offsetof(struct dc_clocks, prev_p_state_change_support)) != 0) { dc->optimized_required = true; } - - dc->optimized_required |= dc->wm_optimized_required; } return type; @@ -2952,9 +2952,6 @@ static void copy_stream_update_to_stream(struct dc *dc, if (update->vrr_active_fixed) stream->vrr_active_fixed = *update->vrr_active_fixed; - if (update->crtc_timing_adjust) - stream->adjust = *update->crtc_timing_adjust; - if (update->dpms_off) stream->dpms_off = *update->dpms_off; @@ -4401,8 +4398,7 @@ static bool full_update_required(struct dc *dc, stream_update->mst_bw_update || stream_update->func_shaper || stream_update->lut3d_func || - stream_update->pending_test_pattern || - stream_update->crtc_timing_adjust)) + stream_update->pending_test_pattern)) return true; if (stream) { diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 2c85f8ee682f..c87ec3e0a8cf 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -1026,7 +1026,6 @@ struct dc { /* Require to optimize clocks and bandwidth for added/removed planes */ bool optimized_required; - bool wm_optimized_required; bool idle_optimizations_allowed; bool enable_c20_dtm_b0; diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index 4ac48c346a33..27118e672754 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -130,7 +130,6 @@ union stream_update_flags { uint32_t wb_update:1; uint32_t dsc_changed : 1; uint32_t mst_bw : 1; - uint32_t crtc_timing_adjust : 1; uint32_t fams_changed : 1; } bits; @@ -342,7 +341,6 @@ struct dc_stream_update { struct dc_3dlut *lut3d_func; struct test_pattern *pending_test_pattern; - struct dc_crtc_timing_adjust *crtc_timing_adjust; }; bool dc_is_stream_unchanged( diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index cdb903116eb7..0d9e41315f84 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -3068,7 +3068,7 @@ void dcn10_prepare_bandwidth( context, false); - dc->wm_optimized_required = hubbub->funcs->program_watermarks(hubbub, + dc->optimized_required |= hubbub->funcs->program_watermarks(hubbub, &context->bw_ctx.bw.dcn.watermarks, dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000, true); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index c3c83178eb1e..5bb5e2960276 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -2146,10 +2146,10 @@ void dcn20_prepare_bandwidth( } /* program dchubbub watermarks: - * For assigning wm_optimized_required, use |= operator since we don't want + * For assigning optimized_required, use |= operator since we don't want * to clear the value if the optimize has not happened yet */ - dc->wm_optimized_required |= hubbub->funcs->program_watermarks(hubbub, + dc->optimized_required |= hubbub->funcs->program_watermarks(hubbub, &context->bw_ctx.bw.dcn.watermarks, dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000, false); @@ -2162,10 +2162,10 @@ void dcn20_prepare_bandwidth( if (hubbub->funcs->program_compbuf_size) { if (context->bw_ctx.dml.ip.min_comp_buffer_size_kbytes) { compbuf_size_kb = context->bw_ctx.dml.ip.min_comp_buffer_size_kbytes; - dc->wm_optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.dml.ip.min_comp_buffer_size_kbytes); + dc->optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.dml.ip.min_comp_buffer_size_kbytes); } else { compbuf_size_kb = context->bw_ctx.bw.dcn.compbuf_size_kb; - dc->wm_optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.bw.dcn.compbuf_size_kb); + dc->optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.bw.dcn.compbuf_size_kb); } hubbub->funcs->program_compbuf_size(hubbub, compbuf_size_kb, false); -- cgit v1.2.3 From a0d25fcd75d40441712ff210cba2e49fc771a8b3 Mon Sep 17 00:00:00 2001 From: Johnson Chen Date: Mon, 4 Dec 2023 18:48:15 -0500 Subject: drm/amd/display: Add function for dumping clk registers [why] Allow devs to check raw clk register values by dumping them on the log [how] Add clk register dump implementation Reviewed-by: Charlene Liu Acked-by: Wayne Lin Signed-off-by: Johnson Chen Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 7 +++---- drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 8d4c0b209872..789091f3bdc8 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -50,6 +50,7 @@ #include "dc_dmub_srv.h" #include "link.h" #include "logger_types.h" + #undef DC_LOGGER #define DC_LOGGER \ clk_mgr->base.base.ctx->logger @@ -417,9 +418,8 @@ bool dcn35_are_clock_states_equal(struct dc_clocks *a, } static void dcn35_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass, - struct clk_mgr *clk_mgr_base, struct clk_log_info *log_info) + struct clk_mgr_dcn35 *clk_mgr) { - } static struct clk_bw_params dcn35_bw_params = { @@ -986,7 +986,6 @@ void dcn35_clk_mgr_construct( struct dccg *dccg) { struct dcn35_smu_dpm_clks smu_dpm_clks = { 0 }; - struct clk_log_info log_info = {0}; clk_mgr->base.base.ctx = ctx; clk_mgr->base.base.funcs = &dcn35_funcs; @@ -1039,7 +1038,7 @@ void dcn35_clk_mgr_construct( dcn35_bw_params.wm_table = ddr5_wm_table; } /* Saved clocks configured at boot for debug purposes */ - dcn35_dump_clk_registers(&clk_mgr->base.base.boot_snapshot, &clk_mgr->base.base, &log_info); + dcn35_dump_clk_registers(&clk_mgr->base.base.boot_snapshot, clk_mgr); clk_mgr->base.base.dprefclk_khz = dcn35_smu_get_dprefclk(&clk_mgr->base); clk_mgr->base.base.clks.ref_dtbclk_khz = 600000; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h index fa9614bcb160..cbba39d251e5 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h @@ -62,6 +62,25 @@ struct dcn3_clk_internal { uint32_t CLK4_CLK0_CURRENT_CNT; //fclk }; +struct dcn35_clk_internal { + int dummy; + uint32_t CLK1_CLK0_CURRENT_CNT; //dispclk + uint32_t CLK1_CLK1_CURRENT_CNT; //dppclk + uint32_t CLK1_CLK2_CURRENT_CNT; //dprefclk + uint32_t CLK1_CLK3_CURRENT_CNT; //dcfclk + uint32_t CLK1_CLK4_CURRENT_CNT; //dtbclk + //uint32_t CLK1_CLK5_CURRENT_CNT; //dpiaclk + //uint32_t CLK1_CLK6_CURRENT_CNT; //srdbgclk + uint32_t CLK1_CLK3_DS_CNTL; //dcf_deep_sleep_divider + uint32_t CLK1_CLK3_ALLOW_DS; //dcf_deep_sleep_allow + + uint32_t CLK1_CLK0_BYPASS_CNTL; //dispclk bypass + uint32_t CLK1_CLK1_BYPASS_CNTL; //dppclk bypass + uint32_t CLK1_CLK2_BYPASS_CNTL; //dprefclk bypass + uint32_t CLK1_CLK3_BYPASS_CNTL; //dcfclk bypass + uint32_t CLK1_CLK4_BYPASS_CNTL; //dtbclk bypass +}; + struct dcn301_clk_internal { int dummy; uint32_t CLK1_CLK0_CURRENT_CNT; //dispclk -- cgit v1.2.3 From 4e08378b2dc1fbe64c9e1730f3260672b22fac03 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 14 Dec 2023 15:07:32 -0600 Subject: drm/amd/display: Add a new DC debug mask for PSR-SU Some issues have been raised that appear to be tied to PSR-SU. To allow users to confirm they're tied to PSR-SU without turning off PSR entirely introduce a new debug mask: amdgpu.dcdebugmask=0x200 Reviewed-by: Harry Wentland Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c | 3 +++ drivers/gpu/drm/amd/include/amd_shared.h | 1 + 2 files changed, 4 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c index 08ce3bb8f640..1f08c6564c3b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c @@ -51,6 +51,9 @@ static bool link_supports_psrsu(struct dc_link *link) !link->dpcd_caps.psr_info.psr2_su_y_granularity_cap) return false; + if (amdgpu_dc_debug_mask & DC_DISABLE_PSR_SU) + return false; + return dc_dmub_check_min_version(dc->ctx->dmub_srv->dmub); } diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index bf7f258c324a..c739f12fb937 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -258,6 +258,7 @@ enum DC_DEBUG_MASK { DC_DISABLE_REPLAY = 0x50, DC_ENABLE_DPIA_TRACE = 0x80, DC_ENABLE_DML2 = 0x100, + DC_DISABLE_PSR_SU = 0x200, }; enum amd_dpm_forced_level; -- cgit v1.2.3 From 65550a9cc5c371b4027c8e8199293899cb2f5af7 Mon Sep 17 00:00:00 2001 From: Hamza Mahfooz Date: Fri, 15 Dec 2023 10:37:39 -0500 Subject: drm/amd/display: disable FPO and SubVP for older DMUB versions on DCN32x There have recently been changes that break backwards compatibility, that were introduced into DMUB firmware (for DCN32x) concerning FPO and SubVP. So, since those are just power optimization features, we can just disable them unless the user is using a new enough version of DMUB firmware. Cc: stable@vger.kernel.org Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2870 Fixes: ed6e2782e974 ("drm/amd/display: For cursor P-State allow for SubVP") Reported-by: Mikhail Gavrilov Closes: https://lore.kernel.org/r/CABXGCsNRb0QbF2pKLJMDhVOKxyGD6-E+8p-4QO6FOWa6zp22_A@mail.gmail.com/ Reviewed-by: Harry Wentland Signed-off-by: Hamza Mahfooz Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c index 5c323718ec90..0f0972ad441a 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c @@ -960,6 +960,12 @@ void dcn32_init_hw(struct dc *dc) dc->caps.dmub_caps.subvp_psr = dc->ctx->dmub_srv->dmub->feature_caps.subvp_psr_support; dc->caps.dmub_caps.gecc_enable = dc->ctx->dmub_srv->dmub->feature_caps.gecc_enable; dc->caps.dmub_caps.mclk_sw = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch; + + if (dc->ctx->dmub_srv->dmub->fw_version < + DMUB_FW_VERSION(7, 0, 35)) { + dc->debug.force_disable_subvp = true; + dc->debug.disable_fpo_optimizations = true; + } } } -- cgit v1.2.3 From 5dd0bd06cb6c02b445d28144a83c561225c2fa5f Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Tue, 5 Dec 2023 14:55:31 +0800 Subject: drm/amd/display: Add case for dcn35 to support usb4 dmub hpd event [Why & how] Refactor dc_is_dmub_outbox_supported() a bit and add case for dcn35 to register dmub outbox notification irq to handle usb4 relevant hpd event. Reviewed-by: Roman Li Reviewed-by: Jun Lei Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 0d42074f33a6..445f87fa0eac 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -5089,18 +5089,28 @@ void dc_mclk_switch_using_fw_based_vblank_stretch_shut_down(struct dc *dc) */ bool dc_is_dmub_outbox_supported(struct dc *dc) { - /* DCN31 B0 USB4 DPIA needs dmub notifications for interrupts */ - if (dc->ctx->asic_id.chip_family == FAMILY_YELLOW_CARP && - dc->ctx->asic_id.hw_internal_rev == YELLOW_CARP_B0 && - !dc->debug.dpia_debug.bits.disable_dpia) - return true; + switch (dc->ctx->asic_id.chip_family) { - if (dc->ctx->asic_id.chip_family == AMDGPU_FAMILY_GC_11_0_1 && - !dc->debug.dpia_debug.bits.disable_dpia) - return true; + case FAMILY_YELLOW_CARP: + /* DCN31 B0 USB4 DPIA needs dmub notifications for interrupts */ + if (dc->ctx->asic_id.hw_internal_rev == YELLOW_CARP_B0 && + !dc->debug.dpia_debug.bits.disable_dpia) + return true; + break; + + case AMDGPU_FAMILY_GC_11_0_1: + case AMDGPU_FAMILY_GC_11_5_0: + if (!dc->debug.dpia_debug.bits.disable_dpia) + return true; + break; + + default: + break; + } /* dmub aux needs dmub notifications to be enabled */ return dc->debug.enable_dmub_aux_for_legacy_ddc; + } /** -- cgit v1.2.3 From 6fb12518ca58412dc51054e2a7400afb41328d85 Mon Sep 17 00:00:00 2001 From: Josip Pavic Date: Tue, 5 Dec 2023 12:01:05 -0500 Subject: drm/amd/display: make flip_timestamp_in_us a 64-bit variable [Why] This variable currently overflows after about 71 minutes. This doesn't cause any known functional issues but it does make debugging more difficult. [How] Make it a 64-bit variable. Reviewed-by: Aric Cyr Acked-by: Wayne Lin Signed-off-by: Josip Pavic Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_hw_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h index e2a3aa8812df..811474f4419b 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h @@ -244,7 +244,7 @@ enum pixel_format { #define DC_MAX_DIRTY_RECTS 3 struct dc_flip_addrs { struct dc_plane_address address; - unsigned int flip_timestamp_in_us; + unsigned long long flip_timestamp_in_us; bool flip_immediate; /* TODO: add flip duration for FreeSync */ bool triplebuffer_flips; -- cgit v1.2.3 From e48c8cbeebbd7e2e4d3fe8508b4beb7c00800de4 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 15 Dec 2023 14:37:45 -0600 Subject: drm/amd: Add missing definitions for `SMU_MAX_LEVELS_VDDGFX` It is reported that on a Topaz dGPU the kernel emits: amdgpu: can't get the mac of 5 This is because there is no definition for max levels of VDDGFX declared for SMU71 or SMU7. The correct definition is VDDC so use this. Link: https://gitlab.freedesktop.org/drm/amd/-/issues/3049 Reviewed-by: Alex Deucher Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c | 1 + drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c | 1 + 2 files changed, 2 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c b/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c index 9e4228232f02..ad1fd3150d03 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c @@ -2298,6 +2298,7 @@ static uint32_t ci_get_mac_definition(uint32_t value) case SMU_MAX_ENTRIES_SMIO: return SMU7_MAX_ENTRIES_SMIO; case SMU_MAX_LEVELS_VDDC: + case SMU_MAX_LEVELS_VDDGFX: return SMU7_MAX_LEVELS_VDDC; case SMU_MAX_LEVELS_VDDCI: return SMU7_MAX_LEVELS_VDDCI; diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c b/drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c index 97d9802fe673..17d2f5bff4a7 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c @@ -2263,6 +2263,7 @@ static uint32_t iceland_get_mac_definition(uint32_t value) case SMU_MAX_ENTRIES_SMIO: return SMU71_MAX_ENTRIES_SMIO; case SMU_MAX_LEVELS_VDDC: + case SMU_MAX_LEVELS_VDDGFX: return SMU71_MAX_LEVELS_VDDC; case SMU_MAX_LEVELS_VDDCI: return SMU71_MAX_LEVELS_VDDCI; -- cgit v1.2.3 From 006ad514a50cc49d904fd004b69c842ddfaabf1f Mon Sep 17 00:00:00 2001 From: Xiaogang Chen Date: Fri, 15 Dec 2023 17:14:07 -0600 Subject: drm/amdkfd: Use partial hmm page walk during buffer validation in SVM SVM uses hmm page walk to valid buffer before map to gpu vm. After have partial migration/mapping do validation on same vm range as migration/map do instead of whole svm range that can be very large. This change is expected to improve svm code performance. Signed-off-by: Xiaogang Chen Reviewed-by: Philip Yang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 35 +++++--------- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 79 +++++++++++++++----------------- 2 files changed, 48 insertions(+), 66 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index b854cbf06dce..d630100b9e91 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -260,19 +260,6 @@ static void svm_migrate_put_sys_page(unsigned long addr) put_page(page); } -static unsigned long svm_migrate_successful_pages(struct migrate_vma *migrate) -{ - unsigned long cpages = 0; - unsigned long i; - - for (i = 0; i < migrate->npages; i++) { - if (migrate->src[i] & MIGRATE_PFN_VALID && - migrate->src[i] & MIGRATE_PFN_MIGRATE) - cpages++; - } - return cpages; -} - static unsigned long svm_migrate_unsuccessful_pages(struct migrate_vma *migrate) { unsigned long upages = 0; @@ -402,6 +389,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, struct dma_fence *mfence = NULL; struct migrate_vma migrate = { 0 }; unsigned long cpages = 0; + unsigned long mpages = 0; dma_addr_t *scratch; void *buf; int r = -ENOMEM; @@ -450,12 +438,13 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, r = svm_migrate_copy_to_vram(node, prange, &migrate, &mfence, scratch, ttm_res_offset); migrate_vma_pages(&migrate); - pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n", - svm_migrate_successful_pages(&migrate), cpages, migrate.npages); - svm_migrate_copy_done(adev, mfence); migrate_vma_finalize(&migrate); + mpages = cpages - svm_migrate_unsuccessful_pages(&migrate); + pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n", + mpages, cpages, migrate.npages); + kfd_smi_event_migration_end(node, p->lead_thread->pid, start >> PAGE_SHIFT, end >> PAGE_SHIFT, 0, node->id, trigger); @@ -465,12 +454,12 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, out_free: kvfree(buf); out: - if (!r && cpages) { + if (!r && mpages) { pdd = svm_range_get_pdd_by_node(prange, node); if (pdd) - WRITE_ONCE(pdd->page_in, pdd->page_in + cpages); + WRITE_ONCE(pdd->page_in, pdd->page_in + mpages); - return cpages; + return mpages; } return r; } @@ -498,7 +487,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, struct vm_area_struct *vma; uint64_t ttm_res_offset; struct kfd_node *node; - unsigned long cpages = 0; + unsigned long mpages = 0; long r = 0; if (start_mgr < prange->start || last_mgr > prange->last) { @@ -540,15 +529,15 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, pr_debug("failed %ld to migrate\n", r); break; } else { - cpages += r; + mpages += r; } ttm_res_offset += next - addr; addr = next; } - if (cpages) { + if (mpages) { prange->actual_loc = best_loc; - prange->vram_pages = prange->vram_pages + cpages; + prange->vram_pages += mpages; } else if (!prange->actual_loc) { /* if no page migrated and all pages from prange are at * sys ram drop svm_bo got from svm_range_vram_node_new diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index fb0011f80277..aacd85a4fce1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -158,13 +158,12 @@ svm_is_valid_dma_mapping_addr(struct device *dev, dma_addr_t dma_addr) static int svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, unsigned long offset, unsigned long npages, - unsigned long *hmm_pfns, uint32_t gpuidx, uint64_t *vram_pages) + unsigned long *hmm_pfns, uint32_t gpuidx) { enum dma_data_direction dir = DMA_BIDIRECTIONAL; dma_addr_t *addr = prange->dma_addr[gpuidx]; struct device *dev = adev->dev; struct page *page; - uint64_t vram_pages_dev; int i, r; if (!addr) { @@ -174,7 +173,6 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, prange->dma_addr[gpuidx] = addr; } - vram_pages_dev = 0; addr += offset; for (i = 0; i < npages; i++) { if (svm_is_valid_dma_mapping_addr(dev, addr[i])) @@ -184,7 +182,6 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, if (is_zone_device_page(page)) { struct amdgpu_device *bo_adev = prange->svm_bo->node->adev; - vram_pages_dev++; addr[i] = (hmm_pfns[i] << PAGE_SHIFT) + bo_adev->vm_manager.vram_base_offset - bo_adev->kfd.pgmap.range.start; @@ -201,14 +198,14 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, pr_debug_ratelimited("dma mapping 0x%llx for page addr 0x%lx\n", addr[i] >> PAGE_SHIFT, page_to_pfn(page)); } - *vram_pages = vram_pages_dev; + return 0; } static int svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap, unsigned long offset, unsigned long npages, - unsigned long *hmm_pfns, uint64_t *vram_pages) + unsigned long *hmm_pfns) { struct kfd_process *p; uint32_t gpuidx; @@ -227,7 +224,7 @@ svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap, } r = svm_range_dma_map_dev(pdd->dev->adev, prange, offset, npages, - hmm_pfns, gpuidx, vram_pages); + hmm_pfns, gpuidx); if (r) break; } @@ -885,14 +882,29 @@ static void svm_range_debug_dump(struct svm_range_list *svms) static void * svm_range_copy_array(void *psrc, size_t size, uint64_t num_elements, - uint64_t offset) + uint64_t offset, uint64_t *vram_pages) { + unsigned char *src = (unsigned char *)psrc + offset; unsigned char *dst; + uint64_t i; dst = kvmalloc_array(num_elements, size, GFP_KERNEL); if (!dst) return NULL; - memcpy(dst, (unsigned char *)psrc + offset, num_elements * size); + + if (!vram_pages) { + memcpy(dst, src, num_elements * size); + return (void *)dst; + } + + *vram_pages = 0; + for (i = 0; i < num_elements; i++) { + dma_addr_t *temp; + temp = (dma_addr_t *)dst + i; + *temp = *((dma_addr_t *)src + i); + if (*temp&SVM_RANGE_VRAM_DOMAIN) + (*vram_pages)++; + } return (void *)dst; } @@ -906,7 +918,7 @@ svm_range_copy_dma_addrs(struct svm_range *dst, struct svm_range *src) if (!src->dma_addr[i]) continue; dst->dma_addr[i] = svm_range_copy_array(src->dma_addr[i], - sizeof(*src->dma_addr[i]), src->npages, 0); + sizeof(*src->dma_addr[i]), src->npages, 0, NULL); if (!dst->dma_addr[i]) return -ENOMEM; } @@ -917,7 +929,7 @@ svm_range_copy_dma_addrs(struct svm_range *dst, struct svm_range *src) static int svm_range_split_array(void *ppnew, void *ppold, size_t size, uint64_t old_start, uint64_t old_n, - uint64_t new_start, uint64_t new_n) + uint64_t new_start, uint64_t new_n, uint64_t *new_vram_pages) { unsigned char *new, *old, *pold; uint64_t d; @@ -929,11 +941,12 @@ svm_range_split_array(void *ppnew, void *ppold, size_t size, return 0; d = (new_start - old_start) * size; - new = svm_range_copy_array(pold, size, new_n, d); + /* get dma addr array for new range and calculte its vram page number */ + new = svm_range_copy_array(pold, size, new_n, d, new_vram_pages); if (!new) return -ENOMEM; d = (new_start == old_start) ? new_n * size : 0; - old = svm_range_copy_array(pold, size, old_n, d); + old = svm_range_copy_array(pold, size, old_n, d, NULL); if (!old) { kvfree(new); return -ENOMEM; @@ -955,10 +968,13 @@ svm_range_split_pages(struct svm_range *new, struct svm_range *old, for (i = 0; i < MAX_GPU_INSTANCE; i++) { r = svm_range_split_array(&new->dma_addr[i], &old->dma_addr[i], sizeof(*old->dma_addr[i]), old->start, - npages, new->start, new->npages); + npages, new->start, new->npages, + old->actual_loc ? &new->vram_pages : NULL); if (r) return r; } + if (old->actual_loc) + old->vram_pages -= new->vram_pages; return 0; } @@ -982,11 +998,6 @@ svm_range_split_nodes(struct svm_range *new, struct svm_range *old, new->svm_bo = svm_range_bo_ref(old->svm_bo); new->ttm_res = old->ttm_res; - /* set new's vram_pages as old range's now, the acurate vram_pages - * will be updated during mapping - */ - new->vram_pages = min(old->vram_pages, new->npages); - spin_lock(&new->svm_bo->list_lock); list_add(&new->svm_bo_list, &new->svm_bo->range_list); spin_unlock(&new->svm_bo->list_lock); @@ -1109,7 +1120,7 @@ static int svm_range_split_tail(struct svm_range *prange, uint64_t new_last, struct list_head *insert_list, struct list_head *remap_list) { - struct svm_range *tail; + struct svm_range *tail = NULL; int r = svm_range_split(prange, prange->start, new_last, &tail); if (!r) { @@ -1124,7 +1135,7 @@ static int svm_range_split_head(struct svm_range *prange, uint64_t new_start, struct list_head *insert_list, struct list_head *remap_list) { - struct svm_range *head; + struct svm_range *head = NULL; int r = svm_range_split(prange, new_start, prange->last, &head); if (!r) { @@ -1573,7 +1584,6 @@ static int svm_range_validate_and_map(struct mm_struct *mm, struct svm_validate_context *ctx; unsigned long start, end, addr; struct kfd_process *p; - uint64_t vram_pages; void *owner; int32_t idx; int r = 0; @@ -1648,15 +1658,13 @@ static int svm_range_validate_and_map(struct mm_struct *mm, } } - vram_pages = 0; - start = prange->start << PAGE_SHIFT; - end = (prange->last + 1) << PAGE_SHIFT; + start = map_start << PAGE_SHIFT; + end = (map_last + 1) << PAGE_SHIFT; for (addr = start; !r && addr < end; ) { struct hmm_range *hmm_range; unsigned long map_start_vma; unsigned long map_last_vma; struct vm_area_struct *vma; - uint64_t vram_pages_vma; unsigned long next = 0; unsigned long offset; unsigned long npages; @@ -1683,13 +1691,11 @@ static int svm_range_validate_and_map(struct mm_struct *mm, } if (!r) { - offset = (addr - start) >> PAGE_SHIFT; + offset = (addr >> PAGE_SHIFT) - prange->start; r = svm_range_dma_map(prange, ctx->bitmap, offset, npages, - hmm_range->hmm_pfns, &vram_pages_vma); + hmm_range->hmm_pfns); if (r) pr_debug("failed %d to dma map range\n", r); - else - vram_pages += vram_pages_vma; } svm_range_lock(prange); @@ -1722,19 +1728,6 @@ static int svm_range_validate_and_map(struct mm_struct *mm, addr = next; } - if (addr == end) { - prange->vram_pages = vram_pages; - - /* if prange does not include any vram page and it - * has not released svm_bo drop its svm_bo reference - * and set its actaul_loc to sys ram - */ - if (!vram_pages && prange->ttm_res) { - prange->actual_loc = 0; - svm_range_vram_node_free(prange); - } - } - svm_range_unreserve_bos(ctx); if (!r) prange->validate_timestamp = ktime_get_boottime(); -- cgit v1.2.3 From 8b09656b22c052d02e4761eb4cbe611289866245 Mon Sep 17 00:00:00 2001 From: Samson Tam Date: Tue, 5 Dec 2023 21:25:36 -0500 Subject: drm/amd/display: skip error logging when DMUB is inactive from S3 [Why] On resume from S3, while DMUB is inactive, DMUB queue and execute calls will not work. Skip reporting errors in these scenarios [How] Add new return code during DMUB queue and execute calls when DMUB is in S3 state. Skip logging errors in these scenarios Reviewed-by: Alvin Lee Acked-by: Wayne Lin Signed-off-by: Samson Tam Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 34 +++++++++++++++++-------- drivers/gpu/drm/amd/display/dmub/dmub_srv.h | 1 + drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c | 4 +-- 3 files changed, 27 insertions(+), 12 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index 1a4d615ccdec..eb6f5640f19a 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -140,7 +140,10 @@ bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv, if (status == DMUB_STATUS_QUEUE_FULL) { /* Execute and wait for queue to become empty again. */ - dmub_srv_cmd_execute(dmub); + status = dmub_srv_cmd_execute(dmub); + if (status == DMUB_STATUS_POWER_STATE_D3) + return false; + dmub_srv_wait_for_idle(dmub, 100000); /* Requeue the command. */ @@ -148,16 +151,20 @@ bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv, } if (status != DMUB_STATUS_OK) { - DC_ERROR("Error queueing DMUB command: status=%d\n", status); - dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); + if (status != DMUB_STATUS_POWER_STATE_D3) { + DC_ERROR("Error queueing DMUB command: status=%d\n", status); + dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); + } return false; } } status = dmub_srv_cmd_execute(dmub); if (status != DMUB_STATUS_OK) { - DC_ERROR("Error starting DMUB execution: status=%d\n", status); - dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); + if (status != DMUB_STATUS_POWER_STATE_D3) { + DC_ERROR("Error starting DMUB execution: status=%d\n", status); + dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); + } return false; } @@ -218,7 +225,10 @@ bool dc_dmub_srv_cmd_run_list(struct dc_dmub_srv *dc_dmub_srv, unsigned int coun if (status == DMUB_STATUS_QUEUE_FULL) { /* Execute and wait for queue to become empty again. */ - dmub_srv_cmd_execute(dmub); + status = dmub_srv_cmd_execute(dmub); + if (status == DMUB_STATUS_POWER_STATE_D3) + return false; + dmub_srv_wait_for_idle(dmub, 100000); /* Requeue the command. */ @@ -226,16 +236,20 @@ bool dc_dmub_srv_cmd_run_list(struct dc_dmub_srv *dc_dmub_srv, unsigned int coun } if (status != DMUB_STATUS_OK) { - DC_ERROR("Error queueing DMUB command: status=%d\n", status); - dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); + if (status != DMUB_STATUS_POWER_STATE_D3) { + DC_ERROR("Error queueing DMUB command: status=%d\n", status); + dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); + } return false; } } status = dmub_srv_cmd_execute(dmub); if (status != DMUB_STATUS_OK) { - DC_ERROR("Error starting DMUB execution: status=%d\n", status); - dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); + if (status != DMUB_STATUS_POWER_STATE_D3) { + DC_ERROR("Error starting DMUB execution: status=%d\n", status); + dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); + } return false; } diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h index d1a4ed6f5916..c78c9224ab60 100644 --- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h @@ -86,6 +86,7 @@ enum dmub_status { DMUB_STATUS_TIMEOUT, DMUB_STATUS_INVALID, DMUB_STATUS_HW_FAILURE, + DMUB_STATUS_POWER_STATE_D3 }; /* enum dmub_asic - dmub asic identifier */ diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c index 53ac1c66dd86..9ad738805320 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c @@ -768,7 +768,7 @@ enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub, return DMUB_STATUS_INVALID; if (dmub->power_state != DMUB_POWER_STATE_D0) - return DMUB_STATUS_INVALID; + return DMUB_STATUS_POWER_STATE_D3; if (dmub->inbox1_rb.rptr > dmub->inbox1_rb.capacity || dmub->inbox1_rb.wrpt > dmub->inbox1_rb.capacity) { @@ -789,7 +789,7 @@ enum dmub_status dmub_srv_cmd_execute(struct dmub_srv *dmub) return DMUB_STATUS_INVALID; if (dmub->power_state != DMUB_POWER_STATE_D0) - return DMUB_STATUS_INVALID; + return DMUB_STATUS_POWER_STATE_D3; /** * Read back all the queued commands to ensure that they've -- cgit v1.2.3 From 72eaa723187b87f1793529eaadbcfaa836c17812 Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Wed, 6 Dec 2023 17:14:48 -0500 Subject: drm/amd/display: get dprefclk ss info from integration info table [why & how] we have two SSC_En: we get ssc_info from dce_info for MPLL_SSC_EN. we used to call VBIOS cmdtbl's smu_info's SS persentage for DPRECLK SS info, is used for DP AUDIO and VBIOS' smu_info table was from systemIntegrationInfoTable. since dcn35 VBIOS removed smu_info, driver need to use integrationInfotable directly. Reviewed-by: Nicholas Kazlauskas Acked-by: Wayne Lin Signed-off-by: Charlene Liu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c | 19 ++++++++++++++----- .../drm/amd/display/include/grph_object_ctrl_defs.h | 2 ++ 2 files changed, 16 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index fcd65a2057ad..960c4b4f6ddf 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -1014,13 +1014,20 @@ static enum bp_result get_ss_info_v4_5( DC_LOG_BIOS("AS_SIGNAL_TYPE_HDMI ss_percentage: %d\n", ss_info->spread_spectrum_percentage); break; case AS_SIGNAL_TYPE_DISPLAY_PORT: - ss_info->spread_spectrum_percentage = + if (bp->base.integrated_info) { + DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", bp->base.integrated_info->gpuclk_ss_percentage); + ss_info->spread_spectrum_percentage = + bp->base.integrated_info->gpuclk_ss_percentage; + ss_info->type.CENTER_MODE = + bp->base.integrated_info->gpuclk_ss_type; + } else { + ss_info->spread_spectrum_percentage = disp_cntl_tbl->dp_ss_percentage; - ss_info->spread_spectrum_range = + ss_info->spread_spectrum_range = disp_cntl_tbl->dp_ss_rate_10hz * 10; - if (disp_cntl_tbl->dp_ss_mode & ATOM_SS_CENTRE_SPREAD_MODE) - ss_info->type.CENTER_MODE = true; - + if (disp_cntl_tbl->dp_ss_mode & ATOM_SS_CENTRE_SPREAD_MODE) + ss_info->type.CENTER_MODE = true; + } DC_LOG_BIOS("AS_SIGNAL_TYPE_DISPLAY_PORT ss_percentage: %d\n", ss_info->spread_spectrum_percentage); break; case AS_SIGNAL_TYPE_GPU_PLL: @@ -2813,6 +2820,8 @@ static enum bp_result get_integrated_info_v2_2( info->ma_channel_number = info_v2_2->umachannelnumber; info->dp_ss_control = le16_to_cpu(info_v2_2->reserved1); + info->gpuclk_ss_percentage = info_v2_2->gpuclk_ss_percentage; + info->gpuclk_ss_type = info_v2_2->gpuclk_ss_type; for (i = 0; i < NUMBER_OF_UCHAR_FOR_GUID; ++i) { info->ext_disp_conn_info.gu_id[i] = diff --git a/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h b/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h index bc96d0211360..813463ffe15c 100644 --- a/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h +++ b/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h @@ -417,6 +417,8 @@ struct integrated_info { /* V2.1 */ struct edp_info edp1_info; struct edp_info edp2_info; + uint32_t gpuclk_ss_percentage; + uint32_t gpuclk_ss_type; }; /* -- cgit v1.2.3 From 3582e0ba8a675d72c3cc6dd1b847e6aa757845da Mon Sep 17 00:00:00 2001 From: Josip Pavic Date: Tue, 5 Dec 2023 16:57:27 -0500 Subject: drm/amd/display: dereference variable before checking for zero [Why] Driver incorrectly checks if pointer variable OutBpp is null instead of if the value being pointed to is zero. [How] Dereference OutBpp before checking for a value of zero. Reviewed-by: Chaitanya Dhere Acked-by: Wayne Lin Signed-off-by: Josip Pavic Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c index 180f8a98a361..b95bf27f2fe2 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c @@ -5420,7 +5420,7 @@ static void CalculateOutputLink( *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); - if (OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { + if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { *RequiresDSC = true; LinkDSCEnable = true; *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, -- cgit v1.2.3 From b5882675074086245589daa21c9d2b205810b83f Mon Sep 17 00:00:00 2001 From: George Shen Date: Tue, 5 Dec 2023 18:34:47 -0500 Subject: drm/amd/display: Set test_pattern_changed update flag on pipe enable [Why] In certain cases, ODM pipe split can occur while stream already has test pattern enabled. The new pipe used in the ODM combine config must be configured to output the test pattern in this case. [How] If the stream is configured to output test pattern, then set the test_pattern_changed update flag for the new pipe when it gets enabled. Reviewed-by: Alvin Lee Acked-by: Wayne Lin Signed-off-by: George Shen Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index 5bb5e2960276..2c8eff815ec1 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -1400,6 +1400,10 @@ static void dcn20_detect_pipe_changes(struct pipe_ctx *old_pipe, struct pipe_ctx new_pipe->update_flags.bits.scaler = 1; new_pipe->update_flags.bits.viewport = 1; new_pipe->update_flags.bits.det_size = 1; + if (new_pipe->stream->test_pattern.type != DP_TEST_PATTERN_VIDEO_MODE && + new_pipe->stream_res.test_pattern_params.width != 0 && + new_pipe->stream_res.test_pattern_params.height != 0) + new_pipe->update_flags.bits.test_pattern_changed = 1; if (!new_pipe->top_pipe && !new_pipe->prev_odm_pipe) { new_pipe->update_flags.bits.odm = 1; new_pipe->update_flags.bits.global_sync = 1; -- cgit v1.2.3 From ec39a6d00382dfd23bf74ec28c7cf4b87884ae1b Mon Sep 17 00:00:00 2001 From: Muhammad Ahmed Date: Wed, 6 Dec 2023 18:07:57 -0500 Subject: drm/amd/display: add debug option for ExtendedVBlank DLG adjust [why & how] Add new option for debug usage Reviewed-by: Charlene Liu Acked-by: Wayne Lin Signed-off-by: Muhammad Ahmed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 1 + drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index c87ec3e0a8cf..ce00a6eeb164 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -979,6 +979,7 @@ struct dc_debug_options { unsigned int ips2_eval_delay_us; unsigned int ips2_entry_delay_us; bool disable_timeout; + bool disable_extblankadj; }; struct gpu_info_soc_bounding_box_v1_0; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index 2c8eff815ec1..72c580ec650c 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -2221,7 +2221,8 @@ void dcn20_optimize_bandwidth( dc->clk_mgr, context, true); - if (context->bw_ctx.bw.dcn.clk.zstate_support == DCN_ZSTATE_SUPPORT_ALLOW) { + if (context->bw_ctx.bw.dcn.clk.zstate_support == DCN_ZSTATE_SUPPORT_ALLOW && + !dc->debug.disable_extblankadj) { for (i = 0; i < dc->res_pool->pipe_count; ++i) { struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; -- cgit v1.2.3 From 0d26644bc57d8737c8e2fb3145366f7d0b941935 Mon Sep 17 00:00:00 2001 From: Allen Pan Date: Thu, 7 Dec 2023 17:26:10 -0500 Subject: drm/amd/display: fix usb-c connector_type [why] BIOS switches to use USB-C connector type 0x18, but VBIOS's objectInfo table not supported yet. driver needs to patch it based on enc_cap from system integration info table. Reviewed-by: Charlene Liu Acked-by: Wayne Lin Signed-off-by: Allen Pan Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c index f91e08895275..da94e5309fba 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c @@ -256,6 +256,10 @@ void dcn35_link_encoder_construct( enc10->base.features.flags.bits.IS_UHBR10_CAPABLE = bp_cap_info.DP_UHBR10_EN; enc10->base.features.flags.bits.IS_UHBR13_5_CAPABLE = bp_cap_info.DP_UHBR13_5_EN; enc10->base.features.flags.bits.IS_UHBR20_CAPABLE = bp_cap_info.DP_UHBR20_EN; + if (bp_cap_info.DP_IS_USB_C) { + /*BIOS not switch to use CONNECTOR_ID_USBC = 24 yet*/ + enc10->base.features.flags.bits.DP_IS_USB_C = 1; + } } else { DC_LOG_WARNING("%s: Failed to get encoder_cap_info from VBIOS with error code %d!\n", @@ -264,4 +268,5 @@ void dcn35_link_encoder_construct( } if (enc10->base.ctx->dc->debug.hdmi20_disable) enc10->base.features.flags.bits.HDMI_6GB_EN = 0; + } -- cgit v1.2.3 From 8e57c06bf4b0f51a4d6958e15e1a99c9520d00fa Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Mon, 4 Dec 2023 14:10:05 -0500 Subject: drm/amd/display: Refactor DMCUB enter/exit idle interface [Why] We can hang in place trying to send commands when the DMCUB isn't powered on. [How] We need to exit out of the idle state prior to sending a command, but the process that performs the exit also invokes a command itself. Fixing this issue involves the following: 1. Using a software state to track whether or not we need to start the process to exit idle or notify idle. It's possible for the hardware to have exited an idle state without driver knowledge, but entering one is always restricted to a driver allow - which makes the SW state vs HW state mismatch issue purely one of optimization, which should seldomly be hit, if at all. 2. Refactor any instances of exit/notify idle to use a single wrapper that maintains this SW state. This works simialr to dc_allow_idle_optimizations, but works at the DMCUB level and makes sure the state is marked prior to any notify/exit idle so we don't enter an infinite loop. 3. Make sure we exit out of idle prior to sending any commands or waiting for DMCUB idle. This patch takes care of 1/2. A future patch will take care of wrapping DMCUB command submission with calls to this new interface. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Hansen Dsouza Acked-by: Wayne Lin Signed-off-by: Nicholas Kazlauskas Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 +-- drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 37 ++++++++++++++++++++-- drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h | 6 ++-- .../drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c | 8 ++--- 4 files changed, 43 insertions(+), 12 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 54861136dafd..97776ba1c70a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2856,7 +2856,7 @@ static int dm_resume(void *handle) bool need_hotplug = false; if (dm->dc->caps.ips_support) { - dc_dmub_srv_exit_low_power_state(dm->dc); + dc_dmub_srv_apply_idle_power_optimizations(dm->dc, false); } if (amdgpu_in_reset(adev)) { @@ -9001,7 +9001,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) if (new_con_state->crtc && new_con_state->crtc->state->active && drm_atomic_crtc_needs_modeset(new_con_state->crtc->state)) { - dc_dmub_srv_exit_low_power_state(dm->dc); + dc_dmub_srv_apply_idle_power_optimizations(dm->dc, false); break; } } diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index eb6f5640f19a..ccfe2b6046fd 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -1162,6 +1162,9 @@ bool dc_dmub_srv_is_hw_pwr_up(struct dc_dmub_srv *dc_dmub_srv, bool wait) struct dc_context *dc_ctx = dc_dmub_srv->ctx; enum dmub_status status; + if (!dc_dmub_srv || !dc_dmub_srv->dmub) + return true; + if (dc_dmub_srv->ctx->dc->debug.dmcub_emulation) return true; @@ -1183,7 +1186,7 @@ bool dc_dmub_srv_is_hw_pwr_up(struct dc_dmub_srv *dc_dmub_srv, bool wait) return true; } -void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle) +static void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle) { union dmub_rb_cmd cmd = {0}; @@ -1207,7 +1210,7 @@ void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle) dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } -void dc_dmub_srv_exit_low_power_state(const struct dc *dc) +static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) { const uint32_t max_num_polls = 10000; uint32_t allow_state = 0; @@ -1220,6 +1223,9 @@ void dc_dmub_srv_exit_low_power_state(const struct dc *dc) if (!dc->idle_optimizations_allowed) return; + if (!dc->ctx->dmub_srv || !dc->ctx->dmub_srv->dmub) + return; + if (dc->hwss.get_idle_state && dc->hwss.set_idle_state && dc->clk_mgr->funcs->exit_low_power_state) { @@ -1296,3 +1302,30 @@ void dc_dmub_srv_set_power_state(struct dc_dmub_srv *dc_dmub_srv, enum dc_acpi_c else dmub_srv_set_power_state(dmub, DMUB_POWER_STATE_D3); } + +void dc_dmub_srv_apply_idle_power_optimizations(const struct dc *dc, bool allow_idle) +{ + struct dc_dmub_srv *dc_dmub_srv = dc->ctx->dmub_srv; + + if (!dc_dmub_srv || !dc_dmub_srv->dmub) + return; + + if (dc_dmub_srv->idle_allowed == allow_idle) + return; + + /* + * Entering a low power state requires a driver notification. + * Powering up the hardware requires notifying PMFW and DMCUB. + * Clearing the driver idle allow requires a DMCUB command. + * DMCUB commands requires the DMCUB to be powered up and restored. + * + * Exit out early to prevent an infinite loop of DMCUB commands + * triggering exit low power - use software state to track this. + */ + dc_dmub_srv->idle_allowed = allow_idle; + + if (!allow_idle) + dc_dmub_srv_exit_low_power_state(dc); + else + dc_dmub_srv_notify_idle(dc, allow_idle); +} diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h index c25ce7546f71..b63cba6235fc 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h @@ -50,6 +50,8 @@ struct dc_dmub_srv { struct dc_context *ctx; void *dm; + + bool idle_allowed; }; void dc_dmub_srv_wait_idle(struct dc_dmub_srv *dc_dmub_srv); @@ -100,8 +102,8 @@ void dc_dmub_srv_enable_dpia_trace(const struct dc *dc); void dc_dmub_srv_subvp_save_surf_addr(const struct dc_dmub_srv *dc_dmub_srv, const struct dc_plane_address *addr, uint8_t subvp_index); bool dc_dmub_srv_is_hw_pwr_up(struct dc_dmub_srv *dc_dmub_srv, bool wait); -void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle); -void dc_dmub_srv_exit_low_power_state(const struct dc *dc); + +void dc_dmub_srv_apply_idle_power_optimizations(const struct dc *dc, bool allow_idle); void dc_dmub_srv_set_power_state(struct dc_dmub_srv *dc_dmub_srv, enum dc_acpi_cm_power_state powerState); #endif /* _DMUB_DC_SRV_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index 9262d3336182..f48001317fab 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -687,11 +687,7 @@ bool dcn35_apply_idle_power_optimizations(struct dc *dc, bool enable) } // TODO: review other cases when idle optimization is allowed - - if (!enable) - dc_dmub_srv_exit_low_power_state(dc); - else - dc_dmub_srv_notify_idle(dc, enable); + dc_dmub_srv_apply_idle_power_optimizations(dc, enable); return true; } @@ -701,7 +697,7 @@ void dcn35_z10_restore(const struct dc *dc) if (dc->debug.disable_z10) return; - dc_dmub_srv_exit_low_power_state(dc); + dc_dmub_srv_apply_idle_power_optimizations(dc, false); dcn31_z10_restore(dc); } -- cgit v1.2.3 From 8892780834ae294bc3697c7d0e056d7743900b39 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Mon, 4 Dec 2023 16:35:04 -0500 Subject: drm/amd/display: Wake DMCUB before sending a command [Why] We can hang in place trying to send commands when the DMCUB isn't powered on. [How] For functions that execute within a DC context or DC lock we can wrap the direct calls to dm_execute_dmub_cmd/list with code that exits idle power optimizations and reallows once we're done with the command submission on success. For DM direct submissions the DM will need to manage the enter/exit sequencing manually. We cannot invoke a DMCUB command directly within the DM execution helper or we can deadlock. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Hansen Dsouza Acked-by: Wayne Lin Signed-off-by: Nicholas Kazlauskas Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- .../gpu/drm/amd/display/dc/bios/command_table2.c | 12 ++--- .../amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c | 2 +- .../amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c | 2 +- .../amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c | 2 +- .../amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c | 2 +- .../amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 2 +- drivers/gpu/drm/amd/display/dc/core/dc.c | 12 ++--- .../gpu/drm/amd/display/dc/core/dc_hw_sequencer.c | 2 +- drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 53 +++++++++++++++++++--- drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h | 40 ++++++++++++++++ drivers/gpu/drm/amd/display/dc/dc_helper.c | 6 +-- drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c | 14 +++--- .../gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c | 2 +- drivers/gpu/drm/amd/display/dc/dce/dmub_outbox.c | 2 +- drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c | 14 +++--- drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c | 2 +- .../amd/display/dc/dcn31/dcn31_dio_link_encoder.c | 4 +- .../drm/amd/display/dc/dcn31/dcn31_panel_cntl.c | 4 +- .../drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c | 4 +- .../drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c | 8 ++-- .../drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c | 4 +- .../drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c | 6 +-- .../display/dc/link/protocols/link_dp_capability.c | 2 +- .../amd/display/dc/link/protocols/link_dp_dpia.c | 3 +- 25 files changed, 143 insertions(+), 63 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 97776ba1c70a..dfab1627890a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -10910,7 +10910,7 @@ static bool dm_edid_parser_send_cea(struct amdgpu_display_manager *dm, input->cea_total_length = total_length; memcpy(input->payload, data, length); - res = dm_execute_dmub_cmd(dm->dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY); + res = dc_wake_and_execute_dmub_cmd(dm->dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY); if (!res) { DRM_ERROR("EDID CEA parser failed\n"); return false; diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c index ab0adabf9dd4..293a919d605d 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c @@ -123,7 +123,7 @@ static void encoder_control_dmcub( sizeof(cmd.digx_encoder_control.header); cmd.digx_encoder_control.encoder_control.dig.stream_param = *dig; - dm_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } static enum bp_result encoder_control_digx_v1_5( @@ -259,7 +259,7 @@ static void transmitter_control_dmcub( sizeof(cmd.dig1_transmitter_control.header); cmd.dig1_transmitter_control.transmitter_control.dig = *dig; - dm_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } static enum bp_result transmitter_control_v1_6( @@ -321,7 +321,7 @@ static void transmitter_control_dmcub_v1_7( sizeof(cmd.dig1_transmitter_control.header); cmd.dig1_transmitter_control.transmitter_control.dig_v1_7 = *dig; - dm_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } static enum bp_result transmitter_control_v1_7( @@ -429,7 +429,7 @@ static void set_pixel_clock_dmcub( sizeof(cmd.set_pixel_clock.header); cmd.set_pixel_clock.pixel_clock.clk = *clk; - dm_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } static enum bp_result set_pixel_clock_v7( @@ -796,7 +796,7 @@ static void enable_disp_power_gating_dmcub( sizeof(cmd.enable_disp_power_gating.header); cmd.enable_disp_power_gating.power_gating.pwr = *pwr; - dm_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } static enum bp_result enable_disp_power_gating_v2_1( @@ -1006,7 +1006,7 @@ static void enable_lvtma_control_dmcub( pwrseq_instance; cmd.lvtma_control.data.bypass_panel_control_wait = bypass_panel_control_wait; - dm_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } static enum bp_result enable_lvtma_control( diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c index 3db4ef564b99..ce1386e22576 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c @@ -253,7 +253,7 @@ void dcn31_update_clocks(struct clk_mgr *clk_mgr_base, cmd.notify_clocks.clocks.dispclk_khz = clk_mgr_base->clks.dispclk_khz; cmd.notify_clocks.clocks.dppclk_khz = clk_mgr_base->clks.dppclk_khz; - dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } static int get_vco_frequency_from_reg(struct clk_mgr_internal *clk_mgr) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c index 7326b7565846..757528256326 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c @@ -284,7 +284,7 @@ void dcn314_update_clocks(struct clk_mgr *clk_mgr_base, cmd.notify_clocks.clocks.dispclk_khz = clk_mgr_base->clks.dispclk_khz; cmd.notify_clocks.clocks.dppclk_khz = clk_mgr_base->clks.dppclk_khz; - dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } static int get_vco_frequency_from_reg(struct clk_mgr_internal *clk_mgr) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c index 8776055bbeaa..644da4637320 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c @@ -232,7 +232,7 @@ static void dcn315_update_clocks(struct clk_mgr *clk_mgr_base, cmd.notify_clocks.clocks.dispclk_khz = clk_mgr_base->clks.dispclk_khz; cmd.notify_clocks.clocks.dppclk_khz = clk_mgr_base->clks.dppclk_khz; - dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } static void dcn315_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass, diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c index 09151cc56ce4..12f3e8aa46d8 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c @@ -239,7 +239,7 @@ static void dcn316_update_clocks(struct clk_mgr *clk_mgr_base, cmd.notify_clocks.clocks.dispclk_khz = clk_mgr_base->clks.dispclk_khz; cmd.notify_clocks.clocks.dppclk_khz = clk_mgr_base->clks.dppclk_khz; - dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } static void dcn316_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass, diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 789091f3bdc8..9c660d1facc7 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -343,7 +343,7 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base, cmd.notify_clocks.clocks.dispclk_khz = clk_mgr_base->clks.dispclk_khz; cmd.notify_clocks.clocks.dppclk_khz = clk_mgr_base->clks.dppclk_khz; - dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } static int get_vco_frequency_from_reg(struct clk_mgr_internal *clk_mgr) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 445f87fa0eac..f2fc7df68b58 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -522,7 +522,7 @@ dc_stream_forward_dmub_crc_window(struct dc_dmub_srv *dmub_srv, cmd.secure_display.roi_info.y_end = rect->y + rect->height; } - dm_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT); + dc_wake_and_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT); } static inline void @@ -3381,7 +3381,7 @@ void dc_dmub_update_dirty_rect(struct dc *dc, update_dirty_rect->panel_inst = panel_inst; update_dirty_rect->pipe_idx = j; - dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT); + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT); } } } @@ -5207,7 +5207,7 @@ bool dc_process_dmub_aux_transfer_async(struct dc *dc, ); } - dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); return true; } @@ -5261,7 +5261,7 @@ bool dc_process_dmub_set_config_async(struct dc *dc, cmd.set_config_access.set_config_control.cmd_pkt.msg_type = payload->msg_type; cmd.set_config_access.set_config_control.cmd_pkt.msg_data = payload->msg_data; - if (!dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) { + if (!dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) { /* command is not processed by dmub */ notify->sc_status = SET_CONFIG_UNKNOWN_ERROR; return is_cmd_complete; @@ -5304,7 +5304,7 @@ enum dc_status dc_process_dmub_set_mst_slots(const struct dc *dc, cmd.set_mst_alloc_slots.mst_slots_control.instance = dc->links[link_index]->ddc_hw_inst; cmd.set_mst_alloc_slots.mst_slots_control.mst_alloc_slots = mst_alloc_slots; - if (!dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) + if (!dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) /* command is not processed by dmub */ return DC_ERROR_UNEXPECTED; @@ -5342,7 +5342,7 @@ void dc_process_dmub_dpia_hpd_int_enable(const struct dc *dc, cmd.dpia_hpd_int_enable.header.type = DMUB_CMD__DPIA_HPD_INT_ENABLE; cmd.dpia_hpd_int_enable.enable = hpd_int_enable; - dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); DC_LOG_DEBUG("%s: hpd_int_enable(%d)\n", __func__, hpd_int_enable); } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c index fe07160932d6..fc18b9dc946f 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c @@ -724,7 +724,7 @@ void hwss_send_dmcub_cmd(union block_sequence_params *params) union dmub_rb_cmd *cmd = params->send_dmcub_cmd_params.cmd; enum dm_dmub_wait_type wait_type = params->send_dmcub_cmd_params.wait_type; - dm_execute_dmub_cmd(ctx, cmd, wait_type); + dc_wake_and_execute_dmub_cmd(ctx, cmd, wait_type); } void hwss_program_manual_trigger(union block_sequence_params *params) diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index ccfe2b6046fd..fea13bcd4dc7 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -360,7 +360,7 @@ void dc_dmub_srv_drr_update_cmd(struct dc *dc, uint32_t tg_inst, uint32_t vtotal cmd.drr_update.header.payload_bytes = sizeof(cmd.drr_update) - sizeof(cmd.drr_update.header); // Send the command to the DMCUB. - dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } void dc_dmub_srv_set_drr_manual_trigger_cmd(struct dc *dc, uint32_t tg_inst) @@ -374,7 +374,7 @@ void dc_dmub_srv_set_drr_manual_trigger_cmd(struct dc *dc, uint32_t tg_inst) cmd.drr_update.header.payload_bytes = sizeof(cmd.drr_update) - sizeof(cmd.drr_update.header); // Send the command to the DMCUB. - dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } static uint8_t dc_dmub_srv_get_pipes_for_stream(struct dc *dc, struct dc_stream_state *stream) @@ -467,7 +467,7 @@ bool dc_dmub_srv_p_state_delegate(struct dc *dc, bool should_manage_pstate, stru sizeof(cmd.fw_assisted_mclk_switch) - sizeof(cmd.fw_assisted_mclk_switch.header); // Send the command to the DMCUB. - dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); return true; } @@ -488,7 +488,7 @@ void dc_dmub_srv_query_caps_cmd(struct dc_dmub_srv *dc_dmub_srv) cmd.query_feature_caps.header.payload_bytes = sizeof(struct dmub_cmd_query_feature_caps_data); /* If command was processed, copy feature caps to dmub srv */ - if (dm_execute_dmub_cmd(dc_dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) && + if (dc_wake_and_execute_dmub_cmd(dc_dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) && cmd.query_feature_caps.header.ret_status == 0) { memcpy(&dc_dmub_srv->dmub->feature_caps, &cmd.query_feature_caps.query_feature_caps_data, @@ -513,7 +513,7 @@ void dc_dmub_srv_get_visual_confirm_color_cmd(struct dc *dc, struct pipe_ctx *pi cmd.visual_confirm_color.visual_confirm_color_data.visual_confirm_color.panel_inst = panel_inst; // If command was processed, copy feature caps to dmub srv - if (dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) && + if (dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) && cmd.visual_confirm_color.header.ret_status == 0) { memcpy(&dc->ctx->dmub_srv->dmub->visual_confirm_color, &cmd.visual_confirm_color.visual_confirm_color_data, @@ -875,7 +875,7 @@ void dc_dmub_setup_subvp_dmub_command(struct dc *dc, cmd.fw_assisted_mclk_switch_v2.config_data.watermark_a_cache = wm_val_refclk < 0xFFFF ? wm_val_refclk : 0xFFFF; } - dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } bool dc_dmub_srv_get_diagnostic_data(struct dc_dmub_srv *dc_dmub_srv, struct dmub_diagnostic_data *diag_data) @@ -1112,7 +1112,7 @@ void dc_send_update_cursor_info_to_dmu( pipe_idx, pCtx->plane_res.hubp, pCtx->plane_res.dpp); /* Combine 2nd cmds update_curosr_info to DMU */ - dm_execute_dmub_cmd_list(pCtx->stream->ctx, 2, cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd_list(pCtx->stream->ctx, 2, cmd, DM_DMUB_WAIT_TYPE_WAIT); } } @@ -1207,6 +1207,7 @@ static void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle) dc->hwss.set_idle_state(dc, true); } + /* NOTE: This does not use the "wake" interface since this is part of the wake path. */ dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } @@ -1329,3 +1330,41 @@ void dc_dmub_srv_apply_idle_power_optimizations(const struct dc *dc, bool allow_ else dc_dmub_srv_notify_idle(dc, allow_idle); } + +bool dc_wake_and_execute_dmub_cmd(const struct dc_context *ctx, union dmub_rb_cmd *cmd, + enum dm_dmub_wait_type wait_type) +{ + return dc_wake_and_execute_dmub_cmd_list(ctx, 1, cmd, wait_type); +} + +bool dc_wake_and_execute_dmub_cmd_list(const struct dc_context *ctx, unsigned int count, + union dmub_rb_cmd *cmd, enum dm_dmub_wait_type wait_type) +{ + struct dc_dmub_srv *dc_dmub_srv = ctx->dmub_srv; + bool result = false, reallow_idle = false; + + if (!dc_dmub_srv || !dc_dmub_srv->dmub) + return false; + + if (count == 0) + return true; + + if (dc_dmub_srv->idle_allowed) { + dc_dmub_srv_apply_idle_power_optimizations(ctx->dc, false); + reallow_idle = true; + } + + /* + * These may have different implementations in DM, so ensure + * that we guide it to the expected helper. + */ + if (count > 1) + result = dm_execute_dmub_cmd_list(ctx, count, cmd, wait_type); + else + result = dm_execute_dmub_cmd(ctx, cmd, wait_type); + + if (result && reallow_idle) + dc_dmub_srv_apply_idle_power_optimizations(ctx->dc, true); + + return result; +} diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h index b63cba6235fc..784ca3e44414 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h @@ -106,4 +106,44 @@ bool dc_dmub_srv_is_hw_pwr_up(struct dc_dmub_srv *dc_dmub_srv, bool wait); void dc_dmub_srv_apply_idle_power_optimizations(const struct dc *dc, bool allow_idle); void dc_dmub_srv_set_power_state(struct dc_dmub_srv *dc_dmub_srv, enum dc_acpi_cm_power_state powerState); + +/** + * dc_wake_and_execute_dmub_cmd() - Wrapper for DMUB command execution. + * + * Refer to dc_wake_and_execute_dmub_cmd_list() for usage and limitations, + * This function is a convenience wrapper for a single command execution. + * + * @ctx: DC context + * @cmd: The command to send/receive + * @wait_type: The wait behavior for the execution + * + * Return: true on command submission success, false otherwise + */ +bool dc_wake_and_execute_dmub_cmd(const struct dc_context *ctx, union dmub_rb_cmd *cmd, + enum dm_dmub_wait_type wait_type); + +/** + * dc_wake_and_execute_dmub_cmd_list() - Wrapper for DMUB command list execution. + * + * If the DMCUB hardware was asleep then it wakes the DMUB before + * executing the command and attempts to re-enter if the command + * submission was successful. + * + * This should be the preferred command submission interface provided + * the DC lock is acquired. + * + * Entry/exit out of idle power optimizations would need to be + * manually performed otherwise through dc_allow_idle_optimizations(). + * + * @ctx: DC context + * @count: Number of commands to send/receive + * @cmd: Array of commands to send + * @wait_type: The wait behavior for the execution + * + * Return: true on command submission success, false otherwise + */ +bool dc_wake_and_execute_dmub_cmd_list(const struct dc_context *ctx, unsigned int count, + union dmub_rb_cmd *cmd, enum dm_dmub_wait_type wait_type); + + #endif /* _DMUB_DC_SRV_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_helper.c b/drivers/gpu/drm/amd/display/dc/dc_helper.c index cb6eaddab720..8f9a67825615 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dc_helper.c @@ -50,7 +50,7 @@ static inline void submit_dmub_read_modify_write( cmd_buf->header.payload_bytes = sizeof(struct dmub_cmd_read_modify_write_sequence) * offload->reg_seq_count; - dm_execute_dmub_cmd(ctx, &offload->cmd_data, DM_DMUB_WAIT_TYPE_NO_WAIT); + dc_wake_and_execute_dmub_cmd(ctx, &offload->cmd_data, DM_DMUB_WAIT_TYPE_NO_WAIT); memset(cmd_buf, 0, sizeof(*cmd_buf)); @@ -67,7 +67,7 @@ static inline void submit_dmub_burst_write( cmd_buf->header.payload_bytes = sizeof(uint32_t) * offload->reg_seq_count; - dm_execute_dmub_cmd(ctx, &offload->cmd_data, DM_DMUB_WAIT_TYPE_NO_WAIT); + dc_wake_and_execute_dmub_cmd(ctx, &offload->cmd_data, DM_DMUB_WAIT_TYPE_NO_WAIT); memset(cmd_buf, 0, sizeof(*cmd_buf)); @@ -80,7 +80,7 @@ static inline void submit_dmub_reg_wait( { struct dmub_rb_cmd_reg_wait *cmd_buf = &offload->cmd_data.reg_wait; - dm_execute_dmub_cmd(ctx, &offload->cmd_data, DM_DMUB_WAIT_TYPE_NO_WAIT); + dc_wake_and_execute_dmub_cmd(ctx, &offload->cmd_data, DM_DMUB_WAIT_TYPE_NO_WAIT); memset(cmd_buf, 0, sizeof(*cmd_buf)); offload->reg_seq_count = 0; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c index 42c802afc468..4cff36351f40 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c @@ -76,7 +76,7 @@ static void dmub_abm_enable_fractional_pwm(struct dc_context *dc) cmd.abm_set_pwm_frac.abm_set_pwm_frac_data.panel_mask = panel_mask; cmd.abm_set_pwm_frac.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_pwm_frac_data); - dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } void dmub_abm_init(struct abm *abm, uint32_t backlight) @@ -155,7 +155,7 @@ bool dmub_abm_set_level(struct abm *abm, uint32_t level, uint8_t panel_mask) cmd.abm_set_level.abm_set_level_data.panel_mask = panel_mask; cmd.abm_set_level.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_level_data); - dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); return true; } @@ -186,7 +186,7 @@ void dmub_abm_init_config(struct abm *abm, cmd.abm_init_config.header.payload_bytes = sizeof(struct dmub_cmd_abm_init_config_data); - dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } @@ -203,7 +203,7 @@ bool dmub_abm_set_pause(struct abm *abm, bool pause, unsigned int panel_inst, un cmd.abm_pause.abm_pause_data.panel_mask = panel_mask; cmd.abm_set_level.header.payload_bytes = sizeof(struct dmub_cmd_abm_pause_data); - dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); return true; } @@ -246,7 +246,7 @@ bool dmub_abm_save_restore( cmd.abm_save_restore.header.payload_bytes = sizeof(struct dmub_rb_cmd_abm_save_restore); - dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); // Copy iramtable data into local structure memcpy((void *)pData, dc->dmub_srv->dmub->scratch_mem_fb.cpu_addr, bytes); @@ -274,7 +274,7 @@ bool dmub_abm_set_pipe(struct abm *abm, cmd.abm_set_pipe.abm_set_pipe_data.ramping_boundary = ramping_boundary; cmd.abm_set_pipe.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_pipe_data); - dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); return true; } @@ -296,7 +296,7 @@ bool dmub_abm_set_backlight_level(struct abm *abm, cmd.abm_set_backlight.abm_set_backlight_data.panel_mask = (0x01 << panel_inst); cmd.abm_set_backlight.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_backlight_data); - dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); return true; } diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c index 2aa0e01a6891..ba1fec3016d5 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c @@ -47,7 +47,7 @@ void dmub_hw_lock_mgr_cmd(struct dc_dmub_srv *dmub_srv, if (!lock) cmd.lock_hw.lock_hw_data.should_release = 1; - dm_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } void dmub_hw_lock_mgr_inbox0_cmd(struct dc_dmub_srv *dmub_srv, diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_outbox.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_outbox.c index d8009b2dc56a..98a778996e1a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_outbox.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_outbox.c @@ -48,5 +48,5 @@ void dmub_enable_outbox_notification(struct dc_dmub_srv *dmub_srv) sizeof(cmd.outbox1_enable.header); cmd.outbox1_enable.enable = true; - dm_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c index 9d4170a356a2..3d7cef17f881 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c @@ -171,7 +171,7 @@ static bool dmub_psr_set_version(struct dmub_psr *dmub, struct dc_stream_state * cmd.psr_set_version.psr_set_version_data.panel_inst = panel_inst; cmd.psr_set_version.header.payload_bytes = sizeof(struct dmub_cmd_psr_set_version_data); - dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); return true; } @@ -199,7 +199,7 @@ static void dmub_psr_enable(struct dmub_psr *dmub, bool enable, bool wait, uint8 cmd.psr_enable.header.payload_bytes = 0; // Send header only - dm_execute_dmub_cmd(dc->dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc->dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); /* Below loops 1000 x 500us = 500 ms. * Exit PSR may need to wait 1-2 frames to power up. Timeout after at @@ -248,7 +248,7 @@ static void dmub_psr_set_level(struct dmub_psr *dmub, uint16_t psr_level, uint8_ cmd.psr_set_level.psr_set_level_data.psr_level = psr_level; cmd.psr_set_level.psr_set_level_data.cmd_version = DMUB_CMD_PSR_CONTROL_VERSION_1; cmd.psr_set_level.psr_set_level_data.panel_inst = panel_inst; - dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } /* @@ -267,7 +267,7 @@ static void dmub_psr_set_sink_vtotal_in_psr_active(struct dmub_psr *dmub, cmd.psr_set_vtotal.psr_set_vtotal_data.psr_vtotal_idle = psr_vtotal_idle; cmd.psr_set_vtotal.psr_set_vtotal_data.psr_vtotal_su = psr_vtotal_su; - dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } /* @@ -286,7 +286,7 @@ static void dmub_psr_set_power_opt(struct dmub_psr *dmub, unsigned int power_opt cmd.psr_set_power_opt.psr_set_power_opt_data.power_opt = power_opt; cmd.psr_set_power_opt.psr_set_power_opt_data.panel_inst = panel_inst; - dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } /* @@ -423,7 +423,7 @@ static bool dmub_psr_copy_settings(struct dmub_psr *dmub, copy_settings_data->relock_delay_frame_cnt = 2; copy_settings_data->dsc_slice_height = psr_context->dsc_slice_height; - dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); return true; } @@ -444,7 +444,7 @@ static void dmub_psr_force_static(struct dmub_psr *dmub, uint8_t panel_inst) cmd.psr_force_static.header.sub_type = DMUB_CMD__PSR_FORCE_STATIC; cmd.psr_enable.header.payload_bytes = 0; - dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } /* diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c index 68cad55c72ab..e13d69a22c1c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c @@ -691,7 +691,7 @@ static void dmcub_PLAT_54186_wa(struct hubp *hubp, cmd.PLAT_54186_wa.flip.flip_params.vmid = flip_regs->vmid; PERF_TRACE(); // TODO: remove after performance is stable. - dm_execute_dmub_cmd(hubp->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(hubp->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); PERF_TRACE(); // TODO: remove after performance is stable. } diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c index 4596f3bac1b4..26be5fee7411 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c @@ -125,7 +125,7 @@ static bool query_dp_alt_from_dmub(struct link_encoder *enc, cmd->query_dp_alt.header.payload_bytes = sizeof(cmd->query_dp_alt.data); cmd->query_dp_alt.data.phy_id = phy_id_from_transmitter(enc10->base.transmitter); - if (!dm_execute_dmub_cmd(enc->ctx, cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) + if (!dc_wake_and_execute_dmub_cmd(enc->ctx, cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) return false; return true; @@ -436,7 +436,7 @@ static bool link_dpia_control(struct dc_context *dc_ctx, cmd.dig1_dpia_control.dpia_control = *dpia_control; - dm_execute_dmub_cmd(dc_ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc_ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); return true; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c index d849b1eaa4a5..03248422d6ff 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c @@ -52,7 +52,7 @@ static bool dcn31_query_backlight_info(struct panel_cntl *panel_cntl, union dmub cmd->panel_cntl.header.payload_bytes = sizeof(cmd->panel_cntl.data); cmd->panel_cntl.data.pwrseq_inst = dcn31_panel_cntl->base.pwrseq_inst; - return dm_execute_dmub_cmd(dc_dmub_srv->ctx, cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY); + return dc_wake_and_execute_dmub_cmd(dc_dmub_srv->ctx, cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY); } static uint32_t dcn31_get_16_bit_backlight_from_pwm(struct panel_cntl *panel_cntl) @@ -85,7 +85,7 @@ static uint32_t dcn31_panel_cntl_hw_init(struct panel_cntl *panel_cntl) panel_cntl->stored_backlight_registers.LVTMA_PWRSEQ_REF_DIV_BL_PWM_REF_DIV; cmd.panel_cntl.data.bl_pwm_ref_div2 = panel_cntl->stored_backlight_registers.PANEL_PWRSEQ_REF_DIV2; - if (!dm_execute_dmub_cmd(dc_dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) + if (!dc_wake_and_execute_dmub_cmd(dc_dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) return 0; panel_cntl->stored_backlight_registers.BL_PWM_CNTL = cmd.panel_cntl.data.bl_pwm_cntl; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c index 08783ad097d2..8e88dcaf88f5 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c @@ -154,7 +154,7 @@ static bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst, cmd.abm_set_pipe.abm_set_pipe_data.ramping_boundary = ramping_boundary; cmd.abm_set_pipe.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_pipe_data); - dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); return true; } @@ -173,7 +173,7 @@ static void dmub_abm_set_backlight(struct dc_context *dc, uint32_t backlight_pwm cmd.abm_set_backlight.abm_set_backlight_data.panel_mask = (0x01 << panel_inst); cmd.abm_set_backlight.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_backlight_data); - dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } void dcn21_set_abm_immediate_disable(struct pipe_ctx *pipe_ctx) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c index fd8a8c10a201..d337578d7e73 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c @@ -754,7 +754,7 @@ bool dcn30_apply_idle_power_optimizations(struct dc *dc, bool enable) cmd.mall.header.sub_type = DMUB_CMD__MALL_ACTION_NO_DF_REQ; cmd.mall.header.payload_bytes = sizeof(cmd.mall) - sizeof(cmd.mall.header); - dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT); + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT); return true; } @@ -876,7 +876,7 @@ bool dcn30_apply_idle_power_optimizations(struct dc *dc, bool enable) cmd.mall.cursor_height = cursor_attr.height; cmd.mall.cursor_pitch = cursor_attr.pitch; - dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); /* Use copied cursor, and it's okay to not switch back */ cursor_attr.address.quad_part = cmd.mall.cursor_copy_dst.quad_part; @@ -892,7 +892,7 @@ bool dcn30_apply_idle_power_optimizations(struct dc *dc, bool enable) cmd.mall.tmr_scale = tmr_scale; cmd.mall.debug_bits = dc->debug.mall_error_as_fatal; - dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT); + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT); return true; } @@ -909,7 +909,7 @@ bool dcn30_apply_idle_power_optimizations(struct dc *dc, bool enable) cmd.mall.header.payload_bytes = sizeof(cmd.mall) - sizeof(cmd.mall.header); - dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); return true; } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c index 5d62805f3bdf..994250b6f2ef 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c @@ -416,7 +416,7 @@ void dcn31_z10_save_init(struct dc *dc) cmd.dcn_restore.header.type = DMUB_CMD__IDLE_OPT; cmd.dcn_restore.header.sub_type = DMUB_CMD__IDLE_OPT_DCN_SAVE_INIT; - dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } void dcn31_z10_restore(const struct dc *dc) @@ -434,7 +434,7 @@ void dcn31_z10_restore(const struct dc *dc) cmd.dcn_restore.header.type = DMUB_CMD__IDLE_OPT; cmd.dcn_restore.header.sub_type = DMUB_CMD__IDLE_OPT_DCN_RESTORE; - dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } void dcn31_hubp_pg_control(struct dce_hwseq *hws, unsigned int hubp_inst, bool power_on) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c index 0f0972ad441a..cb9d8389329f 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c @@ -277,7 +277,7 @@ bool dcn32_apply_idle_power_optimizations(struct dc *dc, bool enable) cmd.cab.header.sub_type = DMUB_CMD__CAB_NO_DCN_REQ; cmd.cab.header.payload_bytes = sizeof(cmd.cab) - sizeof(cmd.cab.header); - dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT); + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT); return true; } @@ -311,7 +311,7 @@ bool dcn32_apply_idle_power_optimizations(struct dc *dc, bool enable) cmd.cab.header.payload_bytes = sizeof(cmd.cab) - sizeof(cmd.cab.header); cmd.cab.cab_alloc_ways = (uint8_t)ways; - dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT); + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT); return true; } @@ -327,7 +327,7 @@ bool dcn32_apply_idle_power_optimizations(struct dc *dc, bool enable) cmd.cab.header.payload_bytes = sizeof(cmd.cab) - sizeof(cmd.cab.header); - dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); return true; } diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c index 3c5334cdb3fb..289f5d133342 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c @@ -1398,7 +1398,7 @@ static bool get_usbc_cable_id(struct dc_link *link, union dp_cable_id *cable_id) cmd.cable_id.header.payload_bytes = sizeof(cmd.cable_id.data); cmd.cable_id.data.input.phy_inst = resource_transmitter_to_phy_idx( link->dc, link->link_enc->transmitter); - if (dm_execute_dmub_cmd(link->dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) && + if (dc_wake_and_execute_dmub_cmd(link->dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) && cmd.cable_id.header.ret_status == 1) { cable_id->raw = cmd.cable_id.data.output_raw; DC_LOG_DC("usbc_cable_id = %d.\n", cable_id->raw); diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c index 0bb749133909..982eda3c46f5 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c @@ -90,7 +90,8 @@ bool dpia_query_hpd_status(struct dc_link *link) cmd.query_hpd.data.ch_type = AUX_CHANNEL_DPIA; /* Return HPD status reported by DMUB if query successfully executed. */ - if (dm_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) && cmd.query_hpd.data.status == AUX_RET_SUCCESS) + if (dc_wake_and_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) && + cmd.query_hpd.data.status == AUX_RET_SUCCESS) is_hpd_high = cmd.query_hpd.data.result; DC_LOG_DEBUG("%s: link(%d) dpia(%d) cmd_status(%d) result(%d)\n", -- cgit v1.2.3 From e5ffd1263dd5b44929c676171802e7b6af483f21 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Tue, 5 Dec 2023 11:22:56 -0500 Subject: drm/amd/display: Wake DMCUB before executing GPINT commands [Why] DMCUB can be in idle when we attempt to interface with the HW through the GPINT mailbox resulting in a system hang. [How] Add dc_wake_and_execute_gpint() to wrap the wake, execute, sleep sequence. If the GPINT executes successfully then DMCUB will be put back into sleep after the optional response is returned. It functions similar to the inbox command interface. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Hansen Dsouza Acked-by: Wayne Lin Signed-off-by: Nicholas Kazlauskas Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 29 ++------- drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 72 +++++++++++++++++----- drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h | 11 ++++ drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c | 19 ++---- 4 files changed, 77 insertions(+), 54 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index 98b41ec7288e..68a846323912 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -2976,7 +2976,6 @@ static int dmub_trace_mask_set(void *data, u64 val) struct amdgpu_device *adev = data; struct dmub_srv *srv = adev->dm.dc->ctx->dmub_srv->dmub; enum dmub_gpint_command cmd; - enum dmub_status status; u64 mask = 0xffff; u8 shift = 0; u32 res; @@ -3003,13 +3002,7 @@ static int dmub_trace_mask_set(void *data, u64 val) break; } - status = dmub_srv_send_gpint_command(srv, cmd, res, 30); - - if (status == DMUB_STATUS_TIMEOUT) - return -ETIMEDOUT; - else if (status == DMUB_STATUS_INVALID) - return -EINVAL; - else if (status != DMUB_STATUS_OK) + if (!dc_wake_and_execute_gpint(adev->dm.dc->ctx, cmd, res, NULL, DM_DMUB_WAIT_TYPE_WAIT)) return -EIO; usleep_range(100, 1000); @@ -3026,7 +3019,6 @@ static int dmub_trace_mask_show(void *data, u64 *val) enum dmub_gpint_command cmd = DMUB_GPINT__GET_TRACE_BUFFER_MASK_WORD0; struct amdgpu_device *adev = data; struct dmub_srv *srv = adev->dm.dc->ctx->dmub_srv->dmub; - enum dmub_status status; u8 shift = 0; u64 raw = 0; u64 res = 0; @@ -3036,23 +3028,12 @@ static int dmub_trace_mask_show(void *data, u64 *val) return -EINVAL; while (i < 4) { - status = dmub_srv_send_gpint_command(srv, cmd, 0, 30); - - if (status == DMUB_STATUS_OK) { - status = dmub_srv_get_gpint_response(srv, (u32 *) &raw); - - if (status == DMUB_STATUS_INVALID) - return -EINVAL; - else if (status != DMUB_STATUS_OK) - return -EIO; - } else if (status == DMUB_STATUS_TIMEOUT) { - return -ETIMEDOUT; - } else if (status == DMUB_STATUS_INVALID) { - return -EINVAL; - } else { + uint32_t response; + + if (!dc_wake_and_execute_gpint(adev->dm.dc->ctx, cmd, 0, &response, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) return -EIO; - } + raw = response; usleep_range(100, 1000); cmd++; diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index fea13bcd4dc7..4b93e7a529d5 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -301,17 +301,11 @@ bool dc_dmub_srv_optimized_init_done(struct dc_dmub_srv *dc_dmub_srv) bool dc_dmub_srv_notify_stream_mask(struct dc_dmub_srv *dc_dmub_srv, unsigned int stream_mask) { - struct dmub_srv *dmub; - const uint32_t timeout = 30; - if (!dc_dmub_srv || !dc_dmub_srv->dmub) return false; - dmub = dc_dmub_srv->dmub; - - return dmub_srv_send_gpint_command( - dmub, DMUB_GPINT__IDLE_OPT_NOTIFY_STREAM_MASK, - stream_mask, timeout) == DMUB_STATUS_OK; + return dc_wake_and_execute_gpint(dc_dmub_srv->ctx, DMUB_GPINT__IDLE_OPT_NOTIFY_STREAM_MASK, + stream_mask, NULL, DM_DMUB_WAIT_TYPE_WAIT); } bool dc_dmub_srv_is_restore_required(struct dc_dmub_srv *dc_dmub_srv) @@ -1126,25 +1120,20 @@ bool dc_dmub_check_min_version(struct dmub_srv *srv) void dc_dmub_srv_enable_dpia_trace(const struct dc *dc) { struct dc_dmub_srv *dc_dmub_srv = dc->ctx->dmub_srv; - struct dmub_srv *dmub; - enum dmub_status status; - static const uint32_t timeout_us = 30; if (!dc_dmub_srv || !dc_dmub_srv->dmub) { DC_LOG_ERROR("%s: invalid parameters.", __func__); return; } - dmub = dc_dmub_srv->dmub; - - status = dmub_srv_send_gpint_command(dmub, DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD1, 0x0010, timeout_us); - if (status != DMUB_STATUS_OK) { + if (!dc_wake_and_execute_gpint(dc->ctx, DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD1, + 0x0010, NULL, DM_DMUB_WAIT_TYPE_WAIT)) { DC_LOG_ERROR("timeout updating trace buffer mask word\n"); return; } - status = dmub_srv_send_gpint_command(dmub, DMUB_GPINT__UPDATE_TRACE_BUFFER_MASK, 0x0000, timeout_us); - if (status != DMUB_STATUS_OK) { + if (!dc_wake_and_execute_gpint(dc->ctx, DMUB_GPINT__UPDATE_TRACE_BUFFER_MASK, + 0x0000, NULL, DM_DMUB_WAIT_TYPE_WAIT)) { DC_LOG_ERROR("timeout updating trace buffer mask word\n"); return; } @@ -1368,3 +1357,52 @@ bool dc_wake_and_execute_dmub_cmd_list(const struct dc_context *ctx, unsigned in return result; } + +static bool dc_dmub_execute_gpint(const struct dc_context *ctx, enum dmub_gpint_command command_code, + uint16_t param, uint32_t *response, enum dm_dmub_wait_type wait_type) +{ + struct dc_dmub_srv *dc_dmub_srv = ctx->dmub_srv; + const uint32_t wait_us = wait_type == DM_DMUB_WAIT_TYPE_NO_WAIT ? 0 : 30; + enum dmub_status status; + + if (response) + *response = 0; + + if (!dc_dmub_srv || !dc_dmub_srv->dmub) + return false; + + status = dmub_srv_send_gpint_command(dc_dmub_srv->dmub, command_code, param, wait_us); + if (status != DMUB_STATUS_OK) { + if (status == DMUB_STATUS_TIMEOUT && wait_type == DM_DMUB_WAIT_TYPE_NO_WAIT) + return true; + + return false; + } + + if (response && wait_type == DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) + dmub_srv_get_gpint_response(dc_dmub_srv->dmub, response); + + return true; +} + +bool dc_wake_and_execute_gpint(const struct dc_context *ctx, enum dmub_gpint_command command_code, + uint16_t param, uint32_t *response, enum dm_dmub_wait_type wait_type) +{ + struct dc_dmub_srv *dc_dmub_srv = ctx->dmub_srv; + bool result = false, reallow_idle = false; + + if (!dc_dmub_srv || !dc_dmub_srv->dmub) + return false; + + if (dc_dmub_srv->idle_allowed) { + dc_dmub_srv_apply_idle_power_optimizations(ctx->dc, false); + reallow_idle = true; + } + + result = dc_dmub_execute_gpint(ctx, command_code, param, response, wait_type); + + if (result && reallow_idle) + dc_dmub_srv_apply_idle_power_optimizations(ctx->dc, true); + + return result; +} diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h index 784ca3e44414..952bfb368886 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h @@ -145,5 +145,16 @@ bool dc_wake_and_execute_dmub_cmd(const struct dc_context *ctx, union dmub_rb_cm bool dc_wake_and_execute_dmub_cmd_list(const struct dc_context *ctx, unsigned int count, union dmub_rb_cmd *cmd, enum dm_dmub_wait_type wait_type); +/** + * dc_wake_and_execute_gpint() + * + * @ctx: DC context + * @command_code: The command ID to send to DMCUB + * @param: The parameter to message DMCUB + * @response: Optional response out value - may be NULL. + * @wait_type: The wait behavior for the execution + */ +bool dc_wake_and_execute_gpint(const struct dc_context *ctx, enum dmub_gpint_command command_code, + uint16_t param, uint32_t *response, enum dm_dmub_wait_type wait_type); #endif /* _DMUB_DC_SRV_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c index 3d7cef17f881..3e243e407bb8 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c @@ -105,23 +105,18 @@ static enum dc_psr_state convert_psr_state(uint32_t raw_state) */ static void dmub_psr_get_state(struct dmub_psr *dmub, enum dc_psr_state *state, uint8_t panel_inst) { - struct dmub_srv *srv = dmub->ctx->dmub_srv->dmub; uint32_t raw_state = 0; uint32_t retry_count = 0; - enum dmub_status status; do { // Send gpint command and wait for ack - status = dmub_srv_send_gpint_command(srv, DMUB_GPINT__GET_PSR_STATE, panel_inst, 30); - - if (status == DMUB_STATUS_OK) { - // GPINT was executed, get response - dmub_srv_get_gpint_response(srv, &raw_state); + if (dc_wake_and_execute_gpint(dmub->ctx, DMUB_GPINT__GET_PSR_STATE, panel_inst, &raw_state, + DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) { *state = convert_psr_state(raw_state); - } else + } else { // Return invalid state when GPINT times out *state = PSR_STATE_INVALID; - + } } while (++retry_count <= 1000 && *state == PSR_STATE_INVALID); // Assert if max retry hit @@ -452,13 +447,11 @@ static void dmub_psr_force_static(struct dmub_psr *dmub, uint8_t panel_inst) */ static void dmub_psr_get_residency(struct dmub_psr *dmub, uint32_t *residency, uint8_t panel_inst) { - struct dmub_srv *srv = dmub->ctx->dmub_srv->dmub; uint16_t param = (uint16_t)(panel_inst << 8); /* Send gpint command and wait for ack */ - dmub_srv_send_gpint_command(srv, DMUB_GPINT__PSR_RESIDENCY, param, 30); - - dmub_srv_get_gpint_response(srv, residency); + dc_wake_and_execute_gpint(dmub->ctx, DMUB_GPINT__PSR_RESIDENCY, param, residency, + DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY); } static const struct dmub_psr_funcs psr_funcs = { -- cgit v1.2.3 From 09a4ec5da92c84952db117f0d576fdd8368c873a Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Fri, 17 Nov 2023 16:37:50 -0500 Subject: drm/amd/display: Refactor dc_state interface [WHY?] Part of the dc_state interface that deals with adding streams and planes should remain public, while others that deal with internal status' and subvp should be private to DC. [HOW?] Move and rename the public functions to dc_state.h and private functions to dc_state_priv.h. Also add some additional functions for extracting subvp meta data from the state. Reviewed-by: Nicholas Kazlauskas Reviewed-by: Jun Lei Acked-by: Wayne Lin Signed-off-by: Dillon Varone Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 45 +- drivers/gpu/drm/amd/display/dc/Makefile | 2 +- drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c | 3 +- drivers/gpu/drm/amd/display/dc/core/dc.c | 171 ++----- .../gpu/drm/amd/display/dc/core/dc_hw_sequencer.c | 8 +- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 294 ++---------- drivers/gpu/drm/amd/display/dc/core/dc_state.c | 527 +++++++++++++++++++++ drivers/gpu/drm/amd/display/dc/core/dc_stream.c | 33 +- drivers/gpu/drm/amd/display/dc/core/dc_surface.c | 6 +- drivers/gpu/drm/amd/display/dc/dc.h | 16 +- drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 31 +- drivers/gpu/drm/amd/display/dc/dc_plane.h | 38 ++ drivers/gpu/drm/amd/display/dc/dc_plane_priv.h | 34 ++ drivers/gpu/drm/amd/display/dc/dc_state.h | 74 +++ drivers/gpu/drm/amd/display/dc/dc_state_priv.h | 97 ++++ drivers/gpu/drm/amd/display/dc/dc_stream.h | 44 -- drivers/gpu/drm/amd/display/dc/dc_stream_priv.h | 35 ++ drivers/gpu/drm/amd/display/dc/dc_types.h | 5 + .../amd/display/dc/dcn32/dcn32_resource_helpers.c | 28 +- .../gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c | 5 +- .../gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 45 +- .../gpu/drm/amd/display/dc/dml2/dml2_dc_types.h | 1 + .../drm/amd/display/dc/dml2/dml2_mall_phantom.c | 73 +-- .../amd/display/dc/dml2/dml2_translation_helper.c | 10 +- drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c | 6 +- drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c | 2 +- drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h | 32 +- .../drm/amd/display/dc/hwss/dce110/dce110_hwseq.c | 3 +- .../drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c | 17 +- .../drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c | 42 +- .../drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c | 4 +- .../drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c | 36 +- .../drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c | 3 +- drivers/gpu/drm/amd/display/dc/inc/resource.h | 1 - .../amd/display/dc/resource/dcn32/dcn32_resource.c | 83 ++-- .../display/dc/resource/dcn321/dcn321_resource.c | 21 +- .../amd/display/dc/resource/dcn35/dcn35_resource.c | 2 + 37 files changed, 1171 insertions(+), 706 deletions(-) create mode 100644 drivers/gpu/drm/amd/display/dc/core/dc_state.c create mode 100644 drivers/gpu/drm/amd/display/dc/dc_plane.h create mode 100644 drivers/gpu/drm/amd/display/dc/dc_plane_priv.h create mode 100644 drivers/gpu/drm/amd/display/dc/dc_state.h create mode 100644 drivers/gpu/drm/amd/display/dc/dc_state_priv.h create mode 100644 drivers/gpu/drm/amd/display/dc/dc_stream_priv.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index dfab1627890a..b3ffb25dc6b1 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -37,6 +37,7 @@ #include "dc/dc_dmub_srv.h" #include "dc/dc_edid_parser.h" #include "dc/dc_stat.h" +#include "dc/dc_state.h" #include "amdgpu_dm_trace.h" #include "dpcd_defs.h" #include "link/protocols/link_dpcd.h" @@ -2607,7 +2608,7 @@ static enum dc_status amdgpu_dm_commit_zero_streams(struct dc *dc) memset(del_streams, 0, sizeof(del_streams)); - context = dc_create_state(dc); + context = dc_state_create(dc); if (context == NULL) goto context_alloc_fail; @@ -2622,12 +2623,12 @@ static enum dc_status amdgpu_dm_commit_zero_streams(struct dc *dc) /* Remove all planes for removed streams and then remove the streams */ for (i = 0; i < del_streams_count; i++) { - if (!dc_rem_all_planes_for_stream(dc, del_streams[i], context)) { + if (!dc_state_rem_all_planes_for_stream(dc, del_streams[i], context)) { res = DC_FAIL_DETACH_SURFACES; goto fail; } - res = dc_remove_stream_from_ctx(dc, context, del_streams[i]); + res = dc_state_remove_stream(dc, context, del_streams[i]); if (res != DC_OK) goto fail; } @@ -2635,7 +2636,7 @@ static enum dc_status amdgpu_dm_commit_zero_streams(struct dc *dc) res = dc_commit_streams(dc, context->streams, context->stream_count); fail: - dc_release_state(context); + dc_state_release(context); context_alloc_fail: return res; @@ -2662,7 +2663,7 @@ static int dm_suspend(void *handle) dc_allow_idle_optimizations(adev->dm.dc, false); - dm->cached_dc_state = dc_copy_state(dm->dc->current_state); + dm->cached_dc_state = dc_state_create_copy(dm->dc->current_state); dm_gpureset_toggle_interrupts(adev, dm->cached_dc_state, false); @@ -2909,7 +2910,7 @@ static int dm_resume(void *handle) dm_gpureset_toggle_interrupts(adev, dm->cached_dc_state, true); - dc_release_state(dm->cached_dc_state); + dc_state_release(dm->cached_dc_state); dm->cached_dc_state = NULL; amdgpu_dm_irq_resume_late(adev); @@ -2919,8 +2920,8 @@ static int dm_resume(void *handle) return 0; } /* Recreate dc_state - DC invalidates it when setting power state to S3. */ - dc_release_state(dm_state->context); - dm_state->context = dc_create_state(dm->dc); + dc_state_release(dm_state->context); + dm_state->context = dc_state_create(dm->dc); /* TODO: Remove dc_state->dccg, use dc->dccg directly. */ dc_resource_state_construct(dm->dc, dm_state->context); @@ -3998,7 +3999,7 @@ dm_atomic_duplicate_state(struct drm_private_obj *obj) old_state = to_dm_atomic_state(obj->state); if (old_state && old_state->context) - new_state->context = dc_copy_state(old_state->context); + new_state->context = dc_state_create_copy(old_state->context); if (!new_state->context) { kfree(new_state); @@ -4014,7 +4015,7 @@ static void dm_atomic_destroy_state(struct drm_private_obj *obj, struct dm_atomic_state *dm_state = to_dm_atomic_state(state); if (dm_state && dm_state->context) - dc_release_state(dm_state->context); + dc_state_release(dm_state->context); kfree(dm_state); } @@ -4050,7 +4051,7 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev) if (!state) return -ENOMEM; - state->context = dc_create_state(adev->dm.dc); + state->context = dc_state_create(adev->dm.dc); if (!state->context) { kfree(state); return -ENOMEM; @@ -4065,7 +4066,7 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev) r = amdgpu_display_modeset_create_props(adev); if (r) { - dc_release_state(state->context); + dc_state_release(state->context); kfree(state); return r; } @@ -4077,7 +4078,7 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev) r = amdgpu_dm_audio_init(adev); if (r) { - dc_release_state(state->context); + dc_state_release(state->context); kfree(state); return r; } @@ -6658,7 +6659,7 @@ static enum dc_status dm_validate_stream_and_context(struct dc *dc, if (!dc_plane_state) goto cleanup; - dc_state = dc_create_state(dc); + dc_state = dc_state_create(dc); if (!dc_state) goto cleanup; @@ -6685,9 +6686,9 @@ static enum dc_status dm_validate_stream_and_context(struct dc *dc, dc_result = dc_validate_plane(dc, dc_plane_state); if (dc_result == DC_OK) - dc_result = dc_add_stream_to_ctx(dc, dc_state, stream); + dc_result = dc_state_add_stream(dc, dc_state, stream); - if (dc_result == DC_OK && !dc_add_plane_to_context( + if (dc_result == DC_OK && !dc_state_add_plane( dc, stream, dc_plane_state, @@ -6699,7 +6700,7 @@ static enum dc_status dm_validate_stream_and_context(struct dc *dc, cleanup: if (dc_state) - dc_release_state(dc_state); + dc_state_release(dc_state); if (dc_plane_state) dc_plane_state_release(dc_plane_state); @@ -8858,7 +8859,7 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state, dc_stream_get_status(dm_new_crtc_state->stream); if (!status) - status = dc_stream_get_status_from_state(dc_state, + status = dc_state_get_stream_status(dc_state, dm_new_crtc_state->stream); if (!status) drm_err(dev, @@ -9783,7 +9784,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm, crtc->base.id); /* i.e. reset mode */ - if (dc_remove_stream_from_ctx( + if (dc_state_remove_stream( dm->dc, dm_state->context, dm_old_crtc_state->stream) != DC_OK) { @@ -9826,7 +9827,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm, DRM_DEBUG_ATOMIC("Enabling DRM crtc: %d\n", crtc->base.id); - if (dc_add_stream_to_ctx( + if (dc_state_add_stream( dm->dc, dm_state->context, dm_new_crtc_state->stream) != DC_OK) { @@ -10148,7 +10149,7 @@ static int dm_update_plane_state(struct dc *dc, if (ret) return ret; - if (!dc_remove_plane_from_context( + if (!dc_state_remove_plane( dc, dm_old_crtc_state->stream, dm_old_plane_state->dc_state, @@ -10226,7 +10227,7 @@ static int dm_update_plane_state(struct dc *dc, * state. It'll be released when the atomic state is * cleaned. */ - if (!dc_add_plane_to_context( + if (!dc_state_add_plane( dc, dm_new_crtc_state->stream, dc_new_plane_state, diff --git a/drivers/gpu/drm/amd/display/dc/Makefile b/drivers/gpu/drm/amd/display/dc/Makefile index 390e7a99be54..bae4fdddbf97 100644 --- a/drivers/gpu/drm/amd/display/dc/Makefile +++ b/drivers/gpu/drm/amd/display/dc/Makefile @@ -62,7 +62,7 @@ AMD_DC = $(addsuffix /Makefile, $(addprefix $(FULL_AMD_DISPLAY_PATH)/dc/,$(DC_LI include $(AMD_DC) DISPLAY_CORE = dc.o dc_stat.o dc_resource.o dc_hw_sequencer.o dc_sink.o \ -dc_surface.o dc_debug.o dc_stream.o dc_link_enc_cfg.o dc_link_exports.o +dc_surface.o dc_debug.o dc_stream.o dc_link_enc_cfg.o dc_link_exports.o dc_state.o DISPLAY_CORE += dc_vm_helper.o diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c index 7dacb0f82d29..28a2a837d2f0 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c @@ -29,6 +29,7 @@ #include "dc_types.h" #include "dccg.h" #include "clk_mgr_internal.h" +#include "dc_state_priv.h" #include "link.h" #include "dce100/dce_clk_mgr.h" @@ -63,7 +64,7 @@ int clk_mgr_helper_get_active_display_cnt( /* Don't count SubVP phantom pipes as part of active * display count */ - if (stream->mall_stream_config.type == SUBVP_PHANTOM) + if (dc_state_get_stream_subvp_type(context, stream) == SUBVP_PHANTOM) continue; /* diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index f2fc7df68b58..f5250022b98e 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -34,6 +34,8 @@ #include "dce/dce_hwseq.h" #include "resource.h" +#include "dc_state.h" +#include "dc_state_priv.h" #include "gpio_service_interface.h" #include "clk_mgr.h" @@ -811,7 +813,7 @@ static void dc_destruct(struct dc *dc) link_enc_cfg_init(dc, dc->current_state); if (dc->current_state) { - dc_release_state(dc->current_state); + dc_state_release(dc->current_state); dc->current_state = NULL; } @@ -1028,7 +1030,7 @@ static bool dc_construct(struct dc *dc, * on creation it copies the contents of dc->dml */ - dc->current_state = dc_create_state(dc); + dc->current_state = dc_state_create(dc); if (!dc->current_state) { dm_error("%s: failed to create validate ctx\n", __func__); @@ -1118,7 +1120,7 @@ static void dc_update_viusal_confirm_color(struct dc *dc, struct dc_state *conte static void disable_dangling_plane(struct dc *dc, struct dc_state *context) { int i, j; - struct dc_state *dangling_context = dc_create_state(dc); + struct dc_state *dangling_context = dc_state_create(dc); struct dc_state *current_ctx; struct pipe_ctx *pipe; struct timing_generator *tg; @@ -1164,6 +1166,7 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context) } if (should_disable && old_stream) { + bool is_phantom = dc_state_get_stream_subvp_type(dc->current_state, old_stream) == SUBVP_PHANTOM; pipe = &dc->current_state->res_ctx.pipe_ctx[i]; tg = pipe->stream_res.tg; /* When disabling plane for a phantom pipe, we must turn on the @@ -1172,18 +1175,19 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context) * state that can result in underflow or hang when enabling it * again for different use. */ - if (old_stream->mall_stream_config.type == SUBVP_PHANTOM) { + if (is_phantom) { if (tg->funcs->enable_crtc) { int main_pipe_width, main_pipe_height; + struct dc_stream_state *old_paired_stream = dc_state_get_paired_subvp_stream(dc->current_state, old_stream); - main_pipe_width = old_stream->mall_stream_config.paired_stream->dst.width; - main_pipe_height = old_stream->mall_stream_config.paired_stream->dst.height; + main_pipe_width = old_paired_stream->dst.width; + main_pipe_height = old_paired_stream->dst.height; if (dc->hwss.blank_phantom) dc->hwss.blank_phantom(dc, tg, main_pipe_width, main_pipe_height); tg->funcs->enable_crtc(tg); } } - dc_rem_all_planes_for_stream(dc, old_stream, dangling_context); + dc_state_rem_all_planes_for_stream(dc, old_stream, dangling_context); disable_all_writeback_pipes_for_stream(dc, old_stream, dangling_context); if (pipe->stream && pipe->plane_state) @@ -1206,7 +1210,7 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context) * The OTG is set to disable on falling edge of VUPDATE so the plane disable * will still get it's double buffer update. */ - if (old_stream->mall_stream_config.type == SUBVP_PHANTOM) { + if (is_phantom) { if (tg->funcs->disable_phantom_crtc) tg->funcs->disable_phantom_crtc(tg); } @@ -1215,7 +1219,7 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context) current_ctx = dc->current_state; dc->current_state = dangling_context; - dc_release_state(current_ctx); + dc_state_release(current_ctx); } static void disable_vbios_mode_if_required( @@ -1287,7 +1291,7 @@ static void wait_for_no_pipes_pending(struct dc *dc, struct dc_state *context) int count = 0; struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - if (!pipe->plane_state || pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) + if (!pipe->plane_state || dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) continue; /* Timeout 100 ms */ @@ -1513,7 +1517,7 @@ static void program_timing_sync( } for (k = 0; k < group_size; k++) { - struct dc_stream_status *status = dc_stream_get_status_from_state(ctx, pipe_set[k]->stream); + struct dc_stream_status *status = dc_state_get_stream_status(ctx, pipe_set[k]->stream); status->timing_sync_info.group_id = num_group; status->timing_sync_info.group_size = group_size; @@ -1840,7 +1844,7 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i]; /* Check old context for SubVP */ - subvp_prev_use |= (old_pipe->stream && old_pipe->stream->mall_stream_config.type == SUBVP_PHANTOM); + subvp_prev_use |= (dc_state_get_pipe_subvp_type(dc->current_state, old_pipe) == SUBVP_PHANTOM); if (subvp_prev_use) break; } @@ -1998,9 +2002,9 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c old_state = dc->current_state; dc->current_state = context; - dc_release_state(old_state); + dc_state_release(old_state); - dc_retain_state(dc->current_state); + dc_state_retain(dc->current_state); return result; } @@ -2071,7 +2075,7 @@ enum dc_status dc_commit_streams(struct dc *dc, if (handle_exit_odm2to1) res = commit_minimal_transition_state(dc, dc->current_state); - context = dc_create_state(dc); + context = dc_state_create(dc); if (!context) goto context_alloc_fail; @@ -2091,7 +2095,7 @@ enum dc_status dc_commit_streams(struct dc *dc, streams[i]->out.otg_offset = context->stream_status[j].primary_otg_inst; if (dc_is_embedded_signal(streams[i]->signal)) { - struct dc_stream_status *status = dc_stream_get_status_from_state(context, streams[i]); + struct dc_stream_status *status = dc_state_get_stream_status(context, streams[i]); if (dc->hwss.is_abm_supported) status->is_abm_supported = dc->hwss.is_abm_supported(dc, context, streams[i]); @@ -2102,7 +2106,7 @@ enum dc_status dc_commit_streams(struct dc *dc, } fail: - dc_release_state(context); + dc_state_release(context); context_alloc_fail: @@ -2156,7 +2160,7 @@ static bool is_flip_pending_in_pipes(struct dc *dc, struct dc_state *context) pipe = &context->res_ctx.pipe_ctx[i]; // Don't check flip pending on phantom pipes - if (!pipe->plane_state || (pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM)) + if (!pipe->plane_state || (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM)) continue; /* Must set to false to start with, due to OR in update function */ @@ -2228,103 +2232,6 @@ void dc_post_update_surfaces_to_stream(struct dc *dc) dc->optimized_required = false; } -static void init_state(struct dc *dc, struct dc_state *context) -{ - /* Each context must have their own instance of VBA and in order to - * initialize and obtain IP and SOC the base DML instance from DC is - * initially copied into every context - */ - memcpy(&context->bw_ctx.dml, &dc->dml, sizeof(struct display_mode_lib)); -} - -struct dc_state *dc_create_state(struct dc *dc) -{ - struct dc_state *context = kvzalloc(sizeof(struct dc_state), - GFP_KERNEL); - - if (!context) - return NULL; - - init_state(dc, context); - -#ifdef CONFIG_DRM_AMD_DC_FP - if (dc->debug.using_dml2) { - dml2_create(dc, &dc->dml2_options, &context->bw_ctx.dml2); - } -#endif - kref_init(&context->refcount); - - return context; -} - -struct dc_state *dc_copy_state(struct dc_state *src_ctx) -{ - int i, j; - struct dc_state *new_ctx = kvmalloc(sizeof(struct dc_state), GFP_KERNEL); - - if (!new_ctx) - return NULL; - memcpy(new_ctx, src_ctx, sizeof(struct dc_state)); - -#ifdef CONFIG_DRM_AMD_DC_FP - if (new_ctx->bw_ctx.dml2 && !dml2_create_copy(&new_ctx->bw_ctx.dml2, src_ctx->bw_ctx.dml2)) { - dc_release_state(new_ctx); - return NULL; - } -#endif - - for (i = 0; i < MAX_PIPES; i++) { - struct pipe_ctx *cur_pipe = &new_ctx->res_ctx.pipe_ctx[i]; - - if (cur_pipe->top_pipe) - cur_pipe->top_pipe = &new_ctx->res_ctx.pipe_ctx[cur_pipe->top_pipe->pipe_idx]; - - if (cur_pipe->bottom_pipe) - cur_pipe->bottom_pipe = &new_ctx->res_ctx.pipe_ctx[cur_pipe->bottom_pipe->pipe_idx]; - - if (cur_pipe->prev_odm_pipe) - cur_pipe->prev_odm_pipe = &new_ctx->res_ctx.pipe_ctx[cur_pipe->prev_odm_pipe->pipe_idx]; - - if (cur_pipe->next_odm_pipe) - cur_pipe->next_odm_pipe = &new_ctx->res_ctx.pipe_ctx[cur_pipe->next_odm_pipe->pipe_idx]; - - } - - for (i = 0; i < new_ctx->stream_count; i++) { - dc_stream_retain(new_ctx->streams[i]); - for (j = 0; j < new_ctx->stream_status[i].plane_count; j++) - dc_plane_state_retain( - new_ctx->stream_status[i].plane_states[j]); - } - - kref_init(&new_ctx->refcount); - - return new_ctx; -} - -void dc_retain_state(struct dc_state *context) -{ - kref_get(&context->refcount); -} - -static void dc_state_free(struct kref *kref) -{ - struct dc_state *context = container_of(kref, struct dc_state, refcount); - dc_resource_state_destruct(context); - -#ifdef CONFIG_DRM_AMD_DC_FP - dml2_destroy(context->bw_ctx.dml2); - context->bw_ctx.dml2 = 0; -#endif - - kvfree(context); -} - -void dc_release_state(struct dc_state *context) -{ - kref_put(&context->refcount, dc_state_free); -} - bool dc_set_generic_gpio_for_stereo(bool enable, struct gpio_service *gpio_service) { @@ -2992,7 +2899,7 @@ static void copy_stream_update_to_stream(struct dc *dc, update->dsc_config->num_slices_v != 0); /* Use temporarry context for validating new DSC config */ - struct dc_state *dsc_validate_context = dc_create_state(dc); + struct dc_state *dsc_validate_context = dc_state_create(dc); if (dsc_validate_context) { dc_resource_state_copy_construct(dc->current_state, dsc_validate_context); @@ -3005,7 +2912,7 @@ static void copy_stream_update_to_stream(struct dc *dc, update->dsc_config = NULL; } - dc_release_state(dsc_validate_context); + dc_state_release(dsc_validate_context); } else { DC_ERROR("Failed to allocate new validate context for DSC change\n"); update->dsc_config = NULL; @@ -3104,7 +3011,7 @@ static bool update_planes_and_stream_state(struct dc *dc, new_planes[i] = srf_updates[i].surface; /* initialize scratch memory for building context */ - context = dc_create_state(dc); + context = dc_state_create(dc); if (context == NULL) { DC_ERROR("Failed to allocate new validate context!\n"); return false; @@ -3120,14 +3027,14 @@ static bool update_planes_and_stream_state(struct dc *dc, dc->res_pool->funcs->remove_phantom_pipes(dc, context, false); /*remove old surfaces from context */ - if (!dc_rem_all_planes_for_stream(dc, stream, context)) { + if (!dc_state_rem_all_planes_for_stream(dc, stream, context)) { BREAK_TO_DEBUGGER(); goto fail; } /* add surface to context */ - if (!dc_add_all_planes_for_stream(dc, stream, new_planes, surface_count, context)) { + if (!dc_state_add_all_planes_for_stream(dc, stream, new_planes, surface_count, context)) { BREAK_TO_DEBUGGER(); goto fail; @@ -3185,7 +3092,7 @@ static bool update_planes_and_stream_state(struct dc *dc, return true; fail: - dc_release_state(context); + dc_state_release(context); return false; @@ -3626,7 +3533,7 @@ static void commit_planes_for_stream(struct dc *dc, struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i]; // Check old context for SubVP - subvp_prev_use |= (old_pipe->stream && old_pipe->stream->mall_stream_config.type == SUBVP_PHANTOM); + subvp_prev_use |= (dc_state_get_pipe_subvp_type(dc->current_state, old_pipe) == SUBVP_PHANTOM); if (subvp_prev_use) break; } @@ -3634,7 +3541,7 @@ static void commit_planes_for_stream(struct dc *dc, for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - if (pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { + if (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) { subvp_curr_use = true; break; } @@ -4019,7 +3926,7 @@ static bool could_mpcc_tree_change_for_active_pipes(struct dc *dc, for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i]; - if (pipe->stream && pipe->stream->mall_stream_config.type != SUBVP_NONE) { + if (dc_state_get_pipe_subvp_type(dc->current_state, pipe) != SUBVP_NONE) { subvp_active = true; break; } @@ -4056,7 +3963,7 @@ struct pipe_split_policy_backup { static void release_minimal_transition_state(struct dc *dc, struct dc_state *context, struct pipe_split_policy_backup *policy) { - dc_release_state(context); + dc_state_release(context); /* restore previous pipe split and odm policy */ if (!dc->config.is_vmin_only_asic) dc->debug.pipe_split_policy = policy->mpc_policy; @@ -4067,7 +3974,7 @@ static void release_minimal_transition_state(struct dc *dc, static struct dc_state *create_minimal_transition_state(struct dc *dc, struct dc_state *base_context, struct pipe_split_policy_backup *policy) { - struct dc_state *minimal_transition_context = dc_create_state(dc); + struct dc_state *minimal_transition_context = dc_state_create(dc); unsigned int i, j; if (!dc->config.is_vmin_only_asic) { @@ -4211,7 +4118,7 @@ static bool commit_minimal_transition_state(struct dc *dc, for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i]; - if (pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { + if (pipe->stream && dc_state_get_pipe_subvp_type(dc->current_state, pipe) == SUBVP_PHANTOM) { subvp_in_use = true; break; } @@ -4523,7 +4430,7 @@ bool dc_update_planes_and_stream(struct dc *dc, if (dc->res_pool->funcs->save_mall_state) dc->res_pool->funcs->save_mall_state(dc, context, &mall_temp_config); if (!commit_minimal_transition_state(dc, context)) { - dc_release_state(context); + dc_state_release(context); return false; } if (dc->res_pool->funcs->restore_mall_state) @@ -4593,7 +4500,7 @@ bool dc_update_planes_and_stream(struct dc *dc, struct dc_state *old = dc->current_state; dc->current_state = context; - dc_release_state(old); + dc_state_release(old); // clear any forced full updates for (i = 0; i < dc->res_pool->pipe_count; i++) { @@ -4652,7 +4559,7 @@ void dc_commit_updates_for_stream(struct dc *dc, if (update_type >= UPDATE_TYPE_FULL) { /* initialize scratch memory for building context */ - context = dc_create_state(dc); + context = dc_state_create(dc); if (context == NULL) { DC_ERROR("Failed to allocate new validate context!\n"); return; @@ -4698,7 +4605,7 @@ void dc_commit_updates_for_stream(struct dc *dc, if (update_type >= UPDATE_TYPE_FULL) { if (!dc->res_pool->funcs->validate_bandwidth(dc, context, false)) { DC_ERROR("Mode validation failed for stream update!\n"); - dc_release_state(context); + dc_state_release(context); return; } } @@ -4731,7 +4638,7 @@ void dc_commit_updates_for_stream(struct dc *dc, struct dc_state *old = dc->current_state; dc->current_state = context; - dc_release_state(old); + dc_state_release(old); for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c index fc18b9dc946f..170bad6f33c7 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c @@ -31,6 +31,7 @@ #include "basics/dc_common.h" #include "resource.h" #include "dc_dmub_srv.h" +#include "dc_state_priv.h" #define NUM_ELEMENTS(a) (sizeof(a) / sizeof((a)[0])) @@ -440,8 +441,7 @@ void get_subvp_visual_confirm_color( for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - if (pipe->stream && pipe->stream->mall_stream_config.paired_stream && - pipe->stream->mall_stream_config.type == SUBVP_MAIN) { + if (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) { /* SubVP enable - red */ color->color_g_y = 0; color->color_b_cb = 0; @@ -454,7 +454,7 @@ void get_subvp_visual_confirm_color( } } - if (enable_subvp && pipe_ctx->stream->mall_stream_config.type == SUBVP_NONE) { + if (enable_subvp && dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_NONE) { color->color_r_cr = 0; if (pipe_ctx->stream->allow_freesync == 1) { /* SubVP enable and DRR on - green */ @@ -529,7 +529,7 @@ void hwss_build_fast_sequence(struct dc *dc, } if (dc->hwss.update_plane_addr && current_mpc_pipe->plane_state->update_flags.bits.addr_update) { if (resource_is_pipe_type(current_mpc_pipe, OTG_MASTER) && - current_mpc_pipe->stream->mall_stream_config.type == SUBVP_MAIN) { + dc_state_get_pipe_subvp_type(NULL, pipe_ctx) == SUBVP_MAIN) { block_sequence[*num_steps].params.subvp_save_surf_addr.dc_dmub_srv = dc->ctx->dmub_srv; block_sequence[*num_steps].params.subvp_save_surf_addr.addr = ¤t_mpc_pipe->plane_state->address; block_sequence[*num_steps].params.subvp_save_surf_addr.subvp_index = current_mpc_pipe->subvp_index; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 4382d9ae4292..4a6a7d7557e0 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -2993,189 +2993,6 @@ bool resource_update_pipes_for_plane_with_slice_count( return result; } -bool dc_add_plane_to_context( - const struct dc *dc, - struct dc_stream_state *stream, - struct dc_plane_state *plane_state, - struct dc_state *context) -{ - struct resource_pool *pool = dc->res_pool; - struct pipe_ctx *otg_master_pipe; - struct dc_stream_status *stream_status = NULL; - bool added = false; - - stream_status = dc_stream_get_status_from_state(context, stream); - if (stream_status == NULL) { - dm_error("Existing stream not found; failed to attach surface!\n"); - goto out; - } else if (stream_status->plane_count == MAX_SURFACE_NUM) { - dm_error("Surface: can not attach plane_state %p! Maximum is: %d\n", - plane_state, MAX_SURFACE_NUM); - goto out; - } - - otg_master_pipe = resource_get_otg_master_for_stream( - &context->res_ctx, stream); - added = resource_append_dpp_pipes_for_plane_composition(context, - dc->current_state, pool, otg_master_pipe, plane_state); - - if (added) { - stream_status->plane_states[stream_status->plane_count] = - plane_state; - stream_status->plane_count++; - dc_plane_state_retain(plane_state); - } - -out: - return added; -} - -bool dc_remove_plane_from_context( - const struct dc *dc, - struct dc_stream_state *stream, - struct dc_plane_state *plane_state, - struct dc_state *context) -{ - int i; - struct dc_stream_status *stream_status = NULL; - struct resource_pool *pool = dc->res_pool; - - if (!plane_state) - return true; - - for (i = 0; i < context->stream_count; i++) - if (context->streams[i] == stream) { - stream_status = &context->stream_status[i]; - break; - } - - if (stream_status == NULL) { - dm_error("Existing stream not found; failed to remove plane.\n"); - return false; - } - - resource_remove_dpp_pipes_for_plane_composition( - context, pool, plane_state); - - for (i = 0; i < stream_status->plane_count; i++) { - if (stream_status->plane_states[i] == plane_state) { - dc_plane_state_release(stream_status->plane_states[i]); - break; - } - } - - if (i == stream_status->plane_count) { - dm_error("Existing plane_state not found; failed to detach it!\n"); - return false; - } - - stream_status->plane_count--; - - /* Start at the plane we've just released, and move all the planes one index forward to "trim" the array */ - for (; i < stream_status->plane_count; i++) - stream_status->plane_states[i] = stream_status->plane_states[i + 1]; - - stream_status->plane_states[stream_status->plane_count] = NULL; - - if (stream_status->plane_count == 0 && dc->config.enable_windowed_mpo_odm) - /* ODM combine could prevent us from supporting more planes - * we will reset ODM slice count back to 1 when all planes have - * been removed to maximize the amount of planes supported when - * new planes are added. - */ - resource_update_pipes_for_stream_with_slice_count( - context, dc->current_state, dc->res_pool, stream, 1); - - return true; -} - -/** - * dc_rem_all_planes_for_stream - Remove planes attached to the target stream. - * - * @dc: Current dc state. - * @stream: Target stream, which we want to remove the attached plans. - * @context: New context. - * - * Return: - * Return true if DC was able to remove all planes from the target - * stream, otherwise, return false. - */ -bool dc_rem_all_planes_for_stream( - const struct dc *dc, - struct dc_stream_state *stream, - struct dc_state *context) -{ - int i, old_plane_count; - struct dc_stream_status *stream_status = NULL; - struct dc_plane_state *del_planes[MAX_SURFACE_NUM] = { 0 }; - - for (i = 0; i < context->stream_count; i++) - if (context->streams[i] == stream) { - stream_status = &context->stream_status[i]; - break; - } - - if (stream_status == NULL) { - dm_error("Existing stream %p not found!\n", stream); - return false; - } - - old_plane_count = stream_status->plane_count; - - for (i = 0; i < old_plane_count; i++) - del_planes[i] = stream_status->plane_states[i]; - - for (i = 0; i < old_plane_count; i++) - if (!dc_remove_plane_from_context(dc, stream, del_planes[i], context)) - return false; - - return true; -} - -static bool add_all_planes_for_stream( - const struct dc *dc, - struct dc_stream_state *stream, - const struct dc_validation_set set[], - int set_count, - struct dc_state *context) -{ - int i, j; - - for (i = 0; i < set_count; i++) - if (set[i].stream == stream) - break; - - if (i == set_count) { - dm_error("Stream %p not found in set!\n", stream); - return false; - } - - for (j = 0; j < set[i].plane_count; j++) - if (!dc_add_plane_to_context(dc, stream, set[i].plane_states[j], context)) - return false; - - return true; -} - -bool dc_add_all_planes_for_stream( - const struct dc *dc, - struct dc_stream_state *stream, - struct dc_plane_state * const *plane_states, - int plane_count, - struct dc_state *context) -{ - struct dc_validation_set set; - int i; - - set.stream = stream; - set.plane_count = plane_count; - - for (i = 0; i < plane_count; i++) - set.plane_states[i] = plane_states[i]; - - return add_all_planes_for_stream(dc, stream, &set, 1, context); -} - bool dc_is_timing_changed(struct dc_stream_state *cur_stream, struct dc_stream_state *new_stream) { @@ -3327,84 +3144,6 @@ static struct audio *find_first_free_audio( return NULL; } -/* - * dc_add_stream_to_ctx() - Add a new dc_stream_state to a dc_state. - */ -enum dc_status dc_add_stream_to_ctx( - struct dc *dc, - struct dc_state *new_ctx, - struct dc_stream_state *stream) -{ - enum dc_status res; - DC_LOGGER_INIT(dc->ctx->logger); - - if (new_ctx->stream_count >= dc->res_pool->timing_generator_count) { - DC_LOG_WARNING("Max streams reached, can't add stream %p !\n", stream); - return DC_ERROR_UNEXPECTED; - } - - new_ctx->streams[new_ctx->stream_count] = stream; - dc_stream_retain(stream); - new_ctx->stream_count++; - - res = resource_add_otg_master_for_stream_output( - new_ctx, dc->res_pool, stream); - if (res != DC_OK) - DC_LOG_WARNING("Adding stream %p to context failed with err %d!\n", stream, res); - - return res; -} - -/* - * dc_remove_stream_from_ctx() - Remove a stream from a dc_state. - */ -enum dc_status dc_remove_stream_from_ctx( - struct dc *dc, - struct dc_state *new_ctx, - struct dc_stream_state *stream) -{ - int i; - struct dc_context *dc_ctx = dc->ctx; - struct pipe_ctx *del_pipe = resource_get_otg_master_for_stream( - &new_ctx->res_ctx, stream); - - if (!del_pipe) { - DC_ERROR("Pipe not found for stream %p !\n", stream); - return DC_ERROR_UNEXPECTED; - } - - resource_update_pipes_for_stream_with_slice_count(new_ctx, - dc->current_state, dc->res_pool, stream, 1); - resource_remove_otg_master_for_stream_output( - new_ctx, dc->res_pool, stream); - - for (i = 0; i < new_ctx->stream_count; i++) - if (new_ctx->streams[i] == stream) - break; - - if (new_ctx->streams[i] != stream) { - DC_ERROR("Context doesn't have stream %p !\n", stream); - return DC_ERROR_UNEXPECTED; - } - - dc_stream_release(new_ctx->streams[i]); - new_ctx->stream_count--; - - /* Trim back arrays */ - for (; i < new_ctx->stream_count; i++) { - new_ctx->streams[i] = new_ctx->streams[i + 1]; - new_ctx->stream_status[i] = new_ctx->stream_status[i + 1]; - } - - new_ctx->streams[new_ctx->stream_count] = NULL; - memset( - &new_ctx->stream_status[new_ctx->stream_count], - 0, - sizeof(new_ctx->stream_status[0])); - - return DC_OK; -} - static struct dc_stream_state *find_pll_sharable_stream( struct dc_stream_state *stream_needs_pll, struct dc_state *context) @@ -3855,6 +3594,31 @@ static bool planes_changed_for_existing_stream(struct dc_state *context, return false; } +static bool add_all_planes_for_stream( + const struct dc *dc, + struct dc_stream_state *stream, + const struct dc_validation_set set[], + int set_count, + struct dc_state *state) +{ + int i, j; + + for (i = 0; i < set_count; i++) + if (set[i].stream == stream) + break; + + if (i == set_count) { + dm_error("Stream %p not found in set!\n", stream); + return false; + } + + for (j = 0; j < set[i].plane_count; j++) + if (!dc_state_add_plane(dc, stream, set[i].plane_states[j], state)) + return false; + + return true; +} + /** * dc_validate_with_context - Validate and update the potential new stream in the context object * @@ -3960,7 +3724,7 @@ enum dc_status dc_validate_with_context(struct dc *dc, unchanged_streams[i], set, set_count)) { - if (!dc_rem_all_planes_for_stream(dc, + if (!dc_state_rem_all_planes_for_stream(dc, unchanged_streams[i], context)) { res = DC_FAIL_DETACH_SURFACES; @@ -3982,12 +3746,12 @@ enum dc_status dc_validate_with_context(struct dc *dc, } } - if (!dc_rem_all_planes_for_stream(dc, del_streams[i], context)) { + if (!dc_state_rem_all_planes_for_stream(dc, del_streams[i], context)) { res = DC_FAIL_DETACH_SURFACES; goto fail; } - res = dc_remove_stream_from_ctx(dc, context, del_streams[i]); + res = dc_state_remove_stream(dc, context, del_streams[i]); if (res != DC_OK) goto fail; } @@ -4010,7 +3774,7 @@ enum dc_status dc_validate_with_context(struct dc *dc, /* Add new streams and then add all planes for the new stream */ for (i = 0; i < add_streams_count; i++) { calculate_phy_pix_clks(add_streams[i]); - res = dc_add_stream_to_ctx(dc, context, add_streams[i]); + res = dc_state_add_stream(dc, context, add_streams[i]); if (res != DC_OK) goto fail; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_state.c b/drivers/gpu/drm/amd/display/dc/core/dc_state.c new file mode 100644 index 000000000000..ec5b0d114c7c --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/core/dc_state.c @@ -0,0 +1,527 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ +#include "core_types.h" +#include "core_status.h" +#include "dc_state.h" +#include "dc_state_priv.h" +#include "dc_stream_priv.h" +#include "dc_plane_priv.h" + +#include "dm_services.h" +#include "resource.h" + +#include "dml2/dml2_wrapper.h" +#include "dml2/dml2_internal_types.h" + +#define DC_LOGGER \ + dc->ctx->logger +#define DC_LOGGER_INIT(logger) + +/* Public dc_state functions */ +static void init_state(struct dc *dc, struct dc_state *state) +{ + /* Each context must have their own instance of VBA and in order to + * initialize and obtain IP and SOC the base DML instance from DC is + * initially copied into every context + */ + memcpy(&state->bw_ctx.dml, &dc->dml, sizeof(struct display_mode_lib)); +} + +struct dc_state *dc_state_create(struct dc *dc) +{ + struct dc_state *state = kvzalloc(sizeof(struct dc_state), + GFP_KERNEL); + + if (!state) + return NULL; + + init_state(dc, state); + +#ifdef CONFIG_DRM_AMD_DC_FP + if (dc->debug.using_dml2) + dml2_create(dc, &dc->dml2_options, &state->bw_ctx.dml2); +#endif + + kref_init(&state->refcount); + + return state; +} + +struct dc_state *dc_state_create_copy(struct dc_state *src_state) +{ + int i, j; + struct dc_state *new_state = kvmalloc(sizeof(struct dc_state), GFP_KERNEL); + + if (!new_state) + return NULL; + + memcpy(new_state, src_state, sizeof(struct dc_state)); + +#ifdef CONFIG_DRM_AMD_DC_FP + if (new_state->bw_ctx.dml2 && !dml2_create_copy(&new_state->bw_ctx.dml2, src_state->bw_ctx.dml2)) { + dc_state_release(new_state); + return NULL; + } +#endif + + for (i = 0; i < MAX_PIPES; i++) { + struct pipe_ctx *cur_pipe = &new_state->res_ctx.pipe_ctx[i]; + + if (cur_pipe->top_pipe) + cur_pipe->top_pipe = &new_state->res_ctx.pipe_ctx[cur_pipe->top_pipe->pipe_idx]; + + if (cur_pipe->bottom_pipe) + cur_pipe->bottom_pipe = &new_state->res_ctx.pipe_ctx[cur_pipe->bottom_pipe->pipe_idx]; + + if (cur_pipe->prev_odm_pipe) + cur_pipe->prev_odm_pipe = &new_state->res_ctx.pipe_ctx[cur_pipe->prev_odm_pipe->pipe_idx]; + + if (cur_pipe->next_odm_pipe) + cur_pipe->next_odm_pipe = &new_state->res_ctx.pipe_ctx[cur_pipe->next_odm_pipe->pipe_idx]; + } + + for (i = 0; i < new_state->stream_count; i++) { + dc_stream_retain(new_state->streams[i]); + for (j = 0; j < new_state->stream_status[i].plane_count; j++) + dc_plane_state_retain( + new_state->stream_status[i].plane_states[j]); + } + + kref_init(&new_state->refcount); + + return new_state; +} + +void dc_state_retain(struct dc_state *context) +{ + kref_get(&context->refcount); +} + +static void dc_state_free(struct kref *kref) +{ + struct dc_state *state = container_of(kref, struct dc_state, refcount); + + dc_resource_state_destruct(state); + +#ifdef CONFIG_DRM_AMD_DC_FP + dml2_destroy(state->bw_ctx.dml2); + state->bw_ctx.dml2 = 0; +#endif + + kvfree(state); +} + +void dc_state_release(struct dc_state *state) +{ + kref_put(&state->refcount, dc_state_free); +} +/* + * dc_state_add_stream() - Add a new dc_stream_state to a dc_state. + */ +enum dc_status dc_state_add_stream( + struct dc *dc, + struct dc_state *state, + struct dc_stream_state *stream) +{ + enum dc_status res; + + DC_LOGGER_INIT(dc->ctx->logger); + + if (state->stream_count >= dc->res_pool->timing_generator_count) { + DC_LOG_WARNING("Max streams reached, can't add stream %p !\n", stream); + return DC_ERROR_UNEXPECTED; + } + + state->streams[state->stream_count] = stream; + dc_stream_retain(stream); + state->stream_count++; + + res = resource_add_otg_master_for_stream_output( + state, dc->res_pool, stream); + if (res != DC_OK) + DC_LOG_WARNING("Adding stream %p to context failed with err %d!\n", stream, res); + + return res; +} + +/* + * dc_state_remove_stream() - Remove a stream from a dc_state. + */ +enum dc_status dc_state_remove_stream( + struct dc *dc, + struct dc_state *state, + struct dc_stream_state *stream) +{ + int i; + struct pipe_ctx *del_pipe = resource_get_otg_master_for_stream( + &state->res_ctx, stream); + + if (!del_pipe) { + dm_error("Pipe not found for stream %p !\n", stream); + return DC_ERROR_UNEXPECTED; + } + + resource_update_pipes_for_stream_with_slice_count(state, + dc->current_state, dc->res_pool, stream, 1); + resource_remove_otg_master_for_stream_output( + state, dc->res_pool, stream); + + for (i = 0; i < state->stream_count; i++) + if (state->streams[i] == stream) + break; + + if (state->streams[i] != stream) { + dm_error("Context doesn't have stream %p !\n", stream); + return DC_ERROR_UNEXPECTED; + } + + dc_stream_release(state->streams[i]); + state->stream_count--; + + /* Trim back arrays */ + for (; i < state->stream_count; i++) { + state->streams[i] = state->streams[i + 1]; + state->stream_status[i] = state->stream_status[i + 1]; + } + + state->streams[state->stream_count] = NULL; + memset( + &state->stream_status[state->stream_count], + 0, + sizeof(state->stream_status[0])); + + return DC_OK; +} + +bool dc_state_add_plane( + const struct dc *dc, + struct dc_stream_state *stream, + struct dc_plane_state *plane_state, + struct dc_state *state) +{ + struct resource_pool *pool = dc->res_pool; + struct pipe_ctx *otg_master_pipe; + struct dc_stream_status *stream_status = NULL; + bool added = false; + + stream_status = dc_state_get_stream_status(state, stream); + if (stream_status == NULL) { + dm_error("Existing stream not found; failed to attach surface!\n"); + goto out; + } else if (stream_status->plane_count == MAX_SURFACE_NUM) { + dm_error("Surface: can not attach plane_state %p! Maximum is: %d\n", + plane_state, MAX_SURFACE_NUM); + goto out; + } + + otg_master_pipe = resource_get_otg_master_for_stream( + &state->res_ctx, stream); + added = resource_append_dpp_pipes_for_plane_composition(state, + dc->current_state, pool, otg_master_pipe, plane_state); + + if (added) { + stream_status->plane_states[stream_status->plane_count] = + plane_state; + stream_status->plane_count++; + dc_plane_state_retain(plane_state); + } + +out: + return added; +} + +bool dc_state_remove_plane( + const struct dc *dc, + struct dc_stream_state *stream, + struct dc_plane_state *plane_state, + struct dc_state *state) +{ + int i; + struct dc_stream_status *stream_status = NULL; + struct resource_pool *pool = dc->res_pool; + + if (!plane_state) + return true; + + for (i = 0; i < state->stream_count; i++) + if (state->streams[i] == stream) { + stream_status = &state->stream_status[i]; + break; + } + + if (stream_status == NULL) { + dm_error("Existing stream not found; failed to remove plane.\n"); + return false; + } + + resource_remove_dpp_pipes_for_plane_composition( + state, pool, plane_state); + + for (i = 0; i < stream_status->plane_count; i++) { + if (stream_status->plane_states[i] == plane_state) { + dc_plane_state_release(stream_status->plane_states[i]); + break; + } + } + + if (i == stream_status->plane_count) { + dm_error("Existing plane_state not found; failed to detach it!\n"); + return false; + } + + stream_status->plane_count--; + + /* Start at the plane we've just released, and move all the planes one index forward to "trim" the array */ + for (; i < stream_status->plane_count; i++) + stream_status->plane_states[i] = stream_status->plane_states[i + 1]; + + stream_status->plane_states[stream_status->plane_count] = NULL; + + if (stream_status->plane_count == 0 && dc->config.enable_windowed_mpo_odm) + /* ODM combine could prevent us from supporting more planes + * we will reset ODM slice count back to 1 when all planes have + * been removed to maximize the amount of planes supported when + * new planes are added. + */ + resource_update_pipes_for_stream_with_slice_count( + state, dc->current_state, dc->res_pool, stream, 1); + + return true; +} + +/** + * dc_state_rem_all_planes_for_stream - Remove planes attached to the target stream. + * + * @dc: Current dc state. + * @stream: Target stream, which we want to remove the attached plans. + * @context: New context. + * + * Return: + * Return true if DC was able to remove all planes from the target + * stream, otherwise, return false. + */ +bool dc_state_rem_all_planes_for_stream( + const struct dc *dc, + struct dc_stream_state *stream, + struct dc_state *state) +{ + int i, old_plane_count; + struct dc_stream_status *stream_status = NULL; + struct dc_plane_state *del_planes[MAX_SURFACE_NUM] = { 0 }; + + for (i = 0; i < state->stream_count; i++) + if (state->streams[i] == stream) { + stream_status = &state->stream_status[i]; + break; + } + + if (stream_status == NULL) { + dm_error("Existing stream %p not found!\n", stream); + return false; + } + + old_plane_count = stream_status->plane_count; + + for (i = 0; i < old_plane_count; i++) + del_planes[i] = stream_status->plane_states[i]; + + for (i = 0; i < old_plane_count; i++) + if (!dc_state_remove_plane(dc, stream, del_planes[i], state)) + return false; + + return true; +} + +bool dc_state_add_all_planes_for_stream( + const struct dc *dc, + struct dc_stream_state *stream, + struct dc_plane_state * const *plane_states, + int plane_count, + struct dc_state *state) +{ + int i; + bool result = true; + + for (i = 0; i < plane_count; i++) + if (!dc_state_add_plane(dc, stream, plane_states[i], state)) { + result = false; + break; + } + + return result; +} + +/* Private dc_state functions */ + +/** + * dc_state_get_stream_status - Get stream status from given dc state + * @state: DC state to find the stream status in + * @stream: The stream to get the stream status for + * + * The given stream is expected to exist in the given dc state. Otherwise, NULL + * will be returned. + */ +struct dc_stream_status *dc_state_get_stream_status( + struct dc_state *state, + struct dc_stream_state *stream) +{ + uint8_t i; + + if (state == NULL) + return NULL; + + for (i = 0; i < state->stream_count; i++) { + if (stream == state->streams[i]) + return &state->stream_status[i]; + } + + return NULL; +} + +enum mall_stream_type dc_state_get_pipe_subvp_type(const struct dc_state *state, + const struct pipe_ctx *pipe_ctx) +{ + if (pipe_ctx->stream == NULL) + return SUBVP_NONE; + + return pipe_ctx->stream->mall_stream_config.type; +} + +enum mall_stream_type dc_state_get_stream_subvp_type(const struct dc_state *state, + const struct dc_stream_state *stream) +{ + return stream->mall_stream_config.type; +} + +struct dc_stream_state *dc_state_get_paired_subvp_stream(const struct dc_state *state, + const struct dc_stream_state *stream) +{ + return stream->mall_stream_config.paired_stream; +} + +struct dc_stream_state *dc_state_create_phantom_stream(const struct dc *dc, + struct dc_state *state, + struct dc_stream_state *main_stream) +{ + struct dc_stream_state *phantom_stream = dc_create_stream_for_sink(main_stream->sink); + + if (phantom_stream != NULL) { + phantom_stream->signal = SIGNAL_TYPE_VIRTUAL; + phantom_stream->dpms_off = true; + } + + return phantom_stream; +} + +void dc_state_release_phantom_stream(const struct dc *dc, + struct dc_state *state, + struct dc_stream_state *phantom_stream) +{ + dc_stream_release(phantom_stream); +} + +struct dc_plane_state *dc_state_create_phantom_plane(struct dc *dc, + struct dc_state *state, + struct dc_plane_state *main_plane) +{ + struct dc_plane_state *phantom_plane = dc_create_plane_state(dc); + + if (phantom_plane != NULL) + phantom_plane->is_phantom = true; + + return phantom_plane; +} + +void dc_state_release_phantom_plane(const struct dc *dc, + struct dc_state *state, + struct dc_plane_state *phantom_plane) +{ + dc_plane_state_release(phantom_plane); +} + +/* add phantom streams to context and generate correct meta inside dc_state */ +enum dc_status dc_state_add_phantom_stream(struct dc *dc, + struct dc_state *state, + struct dc_stream_state *phantom_stream, + struct dc_stream_state *main_stream) +{ + enum dc_status res = dc_state_add_stream(dc, state, phantom_stream); + + /* setup subvp meta */ + phantom_stream->mall_stream_config.type = SUBVP_PHANTOM; + phantom_stream->mall_stream_config.paired_stream = main_stream; + main_stream->mall_stream_config.type = SUBVP_MAIN; + main_stream->mall_stream_config.paired_stream = phantom_stream; + + return res; +} + +enum dc_status dc_state_remove_phantom_stream(struct dc *dc, + struct dc_state *state, + struct dc_stream_state *phantom_stream) +{ + /* reset subvp meta */ + phantom_stream->mall_stream_config.paired_stream->mall_stream_config.type = SUBVP_NONE; + phantom_stream->mall_stream_config.paired_stream->mall_stream_config.paired_stream = NULL; + + /* remove stream from state */ + return dc_state_remove_stream(dc, state, phantom_stream); +} + +bool dc_state_add_phantom_plane( + const struct dc *dc, + struct dc_stream_state *phantom_stream, + struct dc_plane_state *phantom_plane, + struct dc_state *state) +{ + return dc_state_add_plane(dc, phantom_stream, phantom_plane, state); +} + +bool dc_state_remove_phantom_plane( + const struct dc *dc, + struct dc_stream_state *phantom_stream, + struct dc_plane_state *phantom_plane, + struct dc_state *state) +{ + return dc_state_remove_plane(dc, phantom_stream, phantom_plane, state); +} + +bool dc_state_rem_all_phantom_planes_for_stream( + const struct dc *dc, + struct dc_stream_state *phantom_stream, + struct dc_state *state) +{ + return dc_state_rem_all_planes_for_stream(dc, phantom_stream, state); +} + +bool dc_state_add_all_phantom_planes_for_stream( + const struct dc *dc, + struct dc_stream_state *phantom_stream, + struct dc_plane_state * const *phantom_planes, + int plane_count, + struct dc_state *state) +{ + return dc_state_add_all_planes_for_stream(dc, phantom_stream, phantom_planes, plane_count, state); +} diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index 38cd29b210c0..86de4bf2ce13 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -31,6 +31,8 @@ #include "ipp.h" #include "timing_generator.h" #include "dc_dmub_srv.h" +#include "dc_state_priv.h" +#include "dc_stream_priv.h" #define DC_LOGGER dc->ctx->logger @@ -54,7 +56,7 @@ void update_stream_signal(struct dc_stream_state *stream, struct dc_sink *sink) } } -static bool dc_stream_construct(struct dc_stream_state *stream, +bool dc_stream_construct(struct dc_stream_state *stream, struct dc_sink *dc_sink_data) { uint32_t i = 0; @@ -127,7 +129,7 @@ static bool dc_stream_construct(struct dc_stream_state *stream, return true; } -static void dc_stream_destruct(struct dc_stream_state *stream) +void dc_stream_destruct(struct dc_stream_state *stream) { dc_sink_release(stream->sink); if (stream->out_transfer_func != NULL) { @@ -208,31 +210,6 @@ struct dc_stream_state *dc_copy_stream(const struct dc_stream_state *stream) return new_stream; } -/** - * dc_stream_get_status_from_state - Get stream status from given dc state - * @state: DC state to find the stream status in - * @stream: The stream to get the stream status for - * - * The given stream is expected to exist in the given dc state. Otherwise, NULL - * will be returned. - */ -struct dc_stream_status *dc_stream_get_status_from_state( - struct dc_state *state, - struct dc_stream_state *stream) -{ - uint8_t i; - - if (state == NULL) - return NULL; - - for (i = 0; i < state->stream_count; i++) { - if (stream == state->streams[i]) - return &state->stream_status[i]; - } - - return NULL; -} - /** * dc_stream_get_status() - Get current stream status of the given stream state * @stream: The stream to get the stream status for. @@ -244,7 +221,7 @@ struct dc_stream_status *dc_stream_get_status( struct dc_stream_state *stream) { struct dc *dc = stream->ctx->dc; - return dc_stream_get_status_from_state(dc->current_state, stream); + return dc_state_get_stream_status(dc->current_state, stream); } static void program_cursor_attributes( diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c index a80e45300783..19a2c7140ae8 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c @@ -32,10 +32,12 @@ #include "transform.h" #include "dpp.h" +#include "dc_plane_priv.h" + /******************************************************************************* * Private functions ******************************************************************************/ -static void dc_plane_construct(struct dc_context *ctx, struct dc_plane_state *plane_state) +void dc_plane_construct(struct dc_context *ctx, struct dc_plane_state *plane_state) { plane_state->ctx = ctx; @@ -63,7 +65,7 @@ static void dc_plane_construct(struct dc_context *ctx, struct dc_plane_state *pl } -static void dc_plane_destruct(struct dc_plane_state *plane_state) +void dc_plane_destruct(struct dc_plane_state *plane_state) { if (plane_state->gamma_correction != NULL) { dc_gamma_release(&plane_state->gamma_correction); diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index ce00a6eeb164..d638679cf31a 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -27,6 +27,8 @@ #define DC_INTERFACE_H_ #include "dc_types.h" +#include "dc_state.h" +#include "dc_plane.h" #include "grph_object_defs.h" #include "logger_types.h" #include "hdcp_msg_types.h" @@ -1386,16 +1388,6 @@ struct dc_surface_update { const struct colorspace_transform *gamut_remap_matrix; }; -/* - * Create a new surface with default parameters; - */ -struct dc_plane_state *dc_create_plane_state(struct dc *dc); -const struct dc_plane_status *dc_plane_get_status( - const struct dc_plane_state *plane_state); - -void dc_plane_state_retain(struct dc_plane_state *plane_state); -void dc_plane_state_release(struct dc_plane_state *plane_state); - void dc_gamma_retain(struct dc_gamma *dc_gamma); void dc_gamma_release(struct dc_gamma **dc_gamma); struct dc_gamma *dc_create_gamma(void); @@ -1486,10 +1478,6 @@ enum dc_status dc_commit_streams(struct dc *dc, struct dc_stream_state *streams[], uint8_t stream_count); -struct dc_state *dc_create_state(struct dc *dc); -struct dc_state *dc_copy_state(struct dc_state *src_ctx); -void dc_retain_state(struct dc_state *context); -void dc_release_state(struct dc_state *context); struct dc_plane_state *dc_get_surface_for_mpcc(struct dc *dc, struct dc_stream_state *stream, diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index 4b93e7a529d5..704d4ff722f0 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -33,6 +33,7 @@ #include "cursor_reg_cache.h" #include "resource.h" #include "clk_mgr.h" +#include "dc_state_priv.h" #define CTX dc_dmub_srv->ctx #define DC_LOGGER CTX->logger @@ -532,12 +533,14 @@ void dc_dmub_srv_get_visual_confirm_color_cmd(struct dc *dc, struct pipe_ctx *pi * 3. Populate the drr_info with the min and max supported vtotal values */ static void populate_subvp_cmd_drr_info(struct dc *dc, + struct dc_state *context, struct pipe_ctx *subvp_pipe, struct pipe_ctx *vblank_pipe, struct dmub_cmd_fw_assisted_mclk_switch_pipe_data_v2 *pipe_data) { + struct dc_stream_state *phantom_stream = dc_state_get_paired_subvp_stream(context, subvp_pipe->stream); struct dc_crtc_timing *main_timing = &subvp_pipe->stream->timing; - struct dc_crtc_timing *phantom_timing = &subvp_pipe->stream->mall_stream_config.paired_stream->timing; + struct dc_crtc_timing *phantom_timing = &phantom_stream->timing; struct dc_crtc_timing *drr_timing = &vblank_pipe->stream->timing; uint16_t drr_frame_us = 0; uint16_t min_drr_supported_us = 0; @@ -625,7 +628,7 @@ static void populate_subvp_cmd_vblank_pipe_info(struct dc *dc, continue; // Find the SubVP pipe - if (pipe->stream->mall_stream_config.type == SUBVP_MAIN) + if (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) break; } @@ -642,7 +645,7 @@ static void populate_subvp_cmd_vblank_pipe_info(struct dc *dc, if (vblank_pipe->stream->ignore_msa_timing_param && (vblank_pipe->stream->allow_freesync || vblank_pipe->stream->vrr_active_variable || vblank_pipe->stream->vrr_active_fixed)) - populate_subvp_cmd_drr_info(dc, pipe, vblank_pipe, pipe_data); + populate_subvp_cmd_drr_info(dc, context, pipe, vblank_pipe, pipe_data); } /** @@ -667,10 +670,15 @@ static void update_subvp_prefetch_end_to_mall_start(struct dc *dc, uint32_t subvp0_prefetch_us = 0; uint32_t subvp1_prefetch_us = 0; uint32_t prefetch_delta_us = 0; - struct dc_crtc_timing *phantom_timing0 = &subvp_pipes[0]->stream->mall_stream_config.paired_stream->timing; - struct dc_crtc_timing *phantom_timing1 = &subvp_pipes[1]->stream->mall_stream_config.paired_stream->timing; + struct dc_stream_state *phantom_stream0 = NULL; + struct dc_stream_state *phantom_stream1 = NULL; + struct dc_crtc_timing *phantom_timing0 = &phantom_stream0->timing; + struct dc_crtc_timing *phantom_timing1 = &phantom_stream1->timing; struct dmub_cmd_fw_assisted_mclk_switch_pipe_data_v2 *pipe_data = NULL; + phantom_stream0 = dc_state_get_paired_subvp_stream(context, subvp_pipes[0]->stream); + phantom_stream1 = dc_state_get_paired_subvp_stream(context, subvp_pipes[1]->stream); + subvp0_prefetch_us = div64_u64(((uint64_t)(phantom_timing0->v_total - phantom_timing0->v_front_porch) * (uint64_t)phantom_timing0->h_total * 1000000), (((uint64_t)phantom_timing0->pix_clk_100hz * 100) + dc->caps.subvp_prefetch_end_to_mall_start_us)); @@ -720,8 +728,9 @@ static void populate_subvp_cmd_pipe_info(struct dc *dc, uint32_t j; struct dmub_cmd_fw_assisted_mclk_switch_pipe_data_v2 *pipe_data = &cmd->fw_assisted_mclk_switch_v2.config_data.pipe_data[cmd_pipe_index]; + struct dc_stream_state *phantom_stream = dc_state_get_paired_subvp_stream(context, subvp_pipe->stream); struct dc_crtc_timing *main_timing = &subvp_pipe->stream->timing; - struct dc_crtc_timing *phantom_timing = &subvp_pipe->stream->mall_stream_config.paired_stream->timing; + struct dc_crtc_timing *phantom_timing = &phantom_stream->timing; uint32_t out_num_stream, out_den_stream, out_num_plane, out_den_plane, out_num, out_den; pipe_data->mode = SUBVP; @@ -775,7 +784,7 @@ static void populate_subvp_cmd_pipe_info(struct dc *dc, for (j = 0; j < dc->res_pool->pipe_count; j++) { struct pipe_ctx *phantom_pipe = &context->res_ctx.pipe_ctx[j]; - if (phantom_pipe->stream == subvp_pipe->stream->mall_stream_config.paired_stream) { + if (phantom_pipe->stream == dc_state_get_paired_subvp_stream(context, subvp_pipe->stream)) { pipe_data->pipe_config.subvp_data.phantom_pipe_index = phantom_pipe->stream_res.tg->inst; if (phantom_pipe->bottom_pipe) { pipe_data->pipe_config.subvp_data.phantom_split_pipe_index = phantom_pipe->bottom_pipe->plane_res.hubp->inst; @@ -809,6 +818,7 @@ void dc_dmub_setup_subvp_dmub_command(struct dc *dc, union dmub_rb_cmd cmd; struct pipe_ctx *subvp_pipes[2]; uint32_t wm_val_refclk = 0; + enum mall_stream_type pipe_mall_type; memset(&cmd, 0, sizeof(cmd)); // FW command for SUBVP @@ -824,7 +834,7 @@ void dc_dmub_setup_subvp_dmub_command(struct dc *dc, */ if (resource_is_pipe_type(pipe, OTG_MASTER) && resource_is_pipe_type(pipe, DPP_PIPE) && - pipe->stream->mall_stream_config.type == SUBVP_MAIN) + dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) subvp_pipes[subvp_count++] = pipe; } @@ -832,6 +842,7 @@ void dc_dmub_setup_subvp_dmub_command(struct dc *dc, // For each pipe that is a "main" SUBVP pipe, fill in pipe data for DMUB SUBVP cmd for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe); if (!pipe->stream) continue; @@ -843,11 +854,11 @@ void dc_dmub_setup_subvp_dmub_command(struct dc *dc, if (resource_is_pipe_type(pipe, OTG_MASTER) && resource_is_pipe_type(pipe, DPP_PIPE) && pipe->stream->mall_stream_config.paired_stream && - pipe->stream->mall_stream_config.type == SUBVP_MAIN) { + pipe_mall_type == SUBVP_MAIN) { populate_subvp_cmd_pipe_info(dc, context, &cmd, pipe, cmd_pipe_index++); } else if (resource_is_pipe_type(pipe, OTG_MASTER) && resource_is_pipe_type(pipe, DPP_PIPE) && - pipe->stream->mall_stream_config.type == SUBVP_NONE) { + pipe_mall_type == SUBVP_NONE) { // Don't need to check for ActiveDRAMClockChangeMargin < 0, not valid in cases where // we run through DML without calculating "natural" P-state support populate_subvp_cmd_vblank_pipe_info(dc, context, &cmd, pipe, cmd_pipe_index++); diff --git a/drivers/gpu/drm/amd/display/dc/dc_plane.h b/drivers/gpu/drm/amd/display/dc/dc_plane.h new file mode 100644 index 000000000000..ef380cae816a --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dc_plane.h @@ -0,0 +1,38 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef _DC_PLANE_H_ +#define _DC_PLANE_H_ + +#include "dc.h" +#include "dc_hw_types.h" + +struct dc_plane_state *dc_create_plane_state(struct dc *dc); +const struct dc_plane_status *dc_plane_get_status( + const struct dc_plane_state *plane_state); +void dc_plane_state_retain(struct dc_plane_state *plane_state); +void dc_plane_state_release(struct dc_plane_state *plane_state); + +#endif /* _DC_PLANE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_plane_priv.h b/drivers/gpu/drm/amd/display/dc/dc_plane_priv.h new file mode 100644 index 000000000000..9ee184c1df00 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dc_plane_priv.h @@ -0,0 +1,34 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef _DC_PLANE_PRIV_H_ +#define _DC_PLANE_PRIV_H_ + +#include "dc_plane.h" + +void dc_plane_construct(struct dc_context *ctx, struct dc_plane_state *plane_state); +void dc_plane_destruct(struct dc_plane_state *plane_state); + +#endif /* _DC_PLANE_PRIV_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_state.h b/drivers/gpu/drm/amd/display/dc/dc_state.h new file mode 100644 index 000000000000..df1a8b85a652 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dc_state.h @@ -0,0 +1,74 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef _DC_STATE_H_ +#define _DC_STATE_H_ + +#include "dc.h" +#include "inc/core_status.h" + + +struct dc_state *dc_state_create(struct dc *dc); +struct dc_state *dc_state_create_copy(struct dc_state *src_state); +void dc_state_retain(struct dc_state *state); +void dc_state_release(struct dc_state *state); + +enum dc_status dc_state_add_stream(struct dc *dc, + struct dc_state *state, + struct dc_stream_state *stream); + +enum dc_status dc_state_remove_stream( + struct dc *dc, + struct dc_state *state, + struct dc_stream_state *stream); + +bool dc_state_add_plane( + const struct dc *dc, + struct dc_stream_state *stream, + struct dc_plane_state *plane_state, + struct dc_state *state); + +bool dc_state_remove_plane( + const struct dc *dc, + struct dc_stream_state *stream, + struct dc_plane_state *plane_state, + struct dc_state *state); + +bool dc_state_rem_all_planes_for_stream( + const struct dc *dc, + struct dc_stream_state *stream, + struct dc_state *state); + +bool dc_state_add_all_planes_for_stream( + const struct dc *dc, + struct dc_stream_state *stream, + struct dc_plane_state * const *plane_states, + int plane_count, + struct dc_state *state); + +struct dc_stream_status *dc_state_get_stream_status( + struct dc_state *state, + struct dc_stream_state *stream); +#endif /* _DC_STATE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_state_priv.h b/drivers/gpu/drm/amd/display/dc/dc_state_priv.h new file mode 100644 index 000000000000..16c09caa1317 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dc_state_priv.h @@ -0,0 +1,97 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef _DC_STATE_PRIV_H_ +#define _DC_STATE_PRIV_H_ + +#include "dc_state.h" +#include "dc_stream.h" + +struct dc_stream_status *dc_state_get_stream_status( + struct dc_state *state, + struct dc_stream_state *stream); + +/* Get the type of the provided resource (none, phantom, main) based on the provided + * context. If the context is unavailable, determine only if phantom or not. + */ +enum mall_stream_type dc_state_get_pipe_subvp_type(const struct dc_state *state, + const struct pipe_ctx *pipe_ctx); +enum mall_stream_type dc_state_get_stream_subvp_type(const struct dc_state *state, + const struct dc_stream_state *stream); + +/* Gets the phantom stream if main is provided, gets the main if phantom is provided.*/ +struct dc_stream_state *dc_state_get_paired_subvp_stream(const struct dc_state *state, + const struct dc_stream_state *stream); + +/* allocate's phantom stream or plane and returns pointer to the object */ +struct dc_stream_state *dc_state_create_phantom_stream(const struct dc *dc, + struct dc_state *state, + struct dc_stream_state *main_stream); +struct dc_plane_state *dc_state_create_phantom_plane(struct dc *dc, + struct dc_state *state, + struct dc_plane_state *main_plane); + +/* deallocate's phantom stream or plane */ +void dc_state_release_phantom_stream(const struct dc *dc, + struct dc_state *state, + struct dc_stream_state *phantom_stream); +void dc_state_release_phantom_plane(const struct dc *dc, + struct dc_state *state, + struct dc_plane_state *phantom_plane); + +/* add/remove phantom stream to context and generate subvp meta data */ +enum dc_status dc_state_add_phantom_stream(struct dc *dc, + struct dc_state *state, + struct dc_stream_state *phantom_stream, + struct dc_stream_state *main_stream); +enum dc_status dc_state_remove_phantom_stream(struct dc *dc, + struct dc_state *state, + struct dc_stream_state *phantom_stream); + +bool dc_state_add_phantom_plane( + const struct dc *dc, + struct dc_stream_state *phantom_stream, + struct dc_plane_state *phantom_plane, + struct dc_state *state); + +bool dc_state_remove_phantom_plane( + const struct dc *dc, + struct dc_stream_state *phantom_stream, + struct dc_plane_state *phantom_plane, + struct dc_state *state); + +bool dc_state_rem_all_phantom_planes_for_stream( + const struct dc *dc, + struct dc_stream_state *phantom_stream, + struct dc_state *state); + +bool dc_state_add_all_phantom_planes_for_stream( + const struct dc *dc, + struct dc_stream_state *phantom_stream, + struct dc_plane_state * const *phantom_planes, + int plane_count, + struct dc_state *state); + +#endif /* _DC_STATE_PRIV_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index 27118e672754..f617428c6a57 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -146,12 +146,6 @@ struct test_pattern { #define SUBVP_DRR_MARGIN_US 100 // 100us for DRR margin (SubVP + DRR) -enum mall_stream_type { - SUBVP_NONE, // subvp not in use - SUBVP_MAIN, // subvp in use, this stream is main stream - SUBVP_PHANTOM, // subvp in use, this stream is a phantom stream -}; - struct mall_stream_config { /* MALL stream config to indicate if the stream is phantom or not. * We will use a phantom stream to indicate that the pipe is phantom. @@ -413,41 +407,6 @@ bool dc_stream_get_scanoutpos(const struct dc_stream_state *stream, uint32_t *h_position, uint32_t *v_position); -enum dc_status dc_add_stream_to_ctx( - struct dc *dc, - struct dc_state *new_ctx, - struct dc_stream_state *stream); - -enum dc_status dc_remove_stream_from_ctx( - struct dc *dc, - struct dc_state *new_ctx, - struct dc_stream_state *stream); - - -bool dc_add_plane_to_context( - const struct dc *dc, - struct dc_stream_state *stream, - struct dc_plane_state *plane_state, - struct dc_state *context); - -bool dc_remove_plane_from_context( - const struct dc *dc, - struct dc_stream_state *stream, - struct dc_plane_state *plane_state, - struct dc_state *context); - -bool dc_rem_all_planes_for_stream( - const struct dc *dc, - struct dc_stream_state *stream, - struct dc_state *context); - -bool dc_add_all_planes_for_stream( - const struct dc *dc, - struct dc_stream_state *stream, - struct dc_plane_state * const *plane_states, - int plane_count, - struct dc_state *context); - bool dc_stream_add_writeback(struct dc *dc, struct dc_stream_state *stream, struct dc_writeback_info *wb_info); @@ -516,9 +475,6 @@ void update_stream_signal(struct dc_stream_state *stream, struct dc_sink *sink); void dc_stream_retain(struct dc_stream_state *dc_stream); void dc_stream_release(struct dc_stream_state *dc_stream); -struct dc_stream_status *dc_stream_get_status_from_state( - struct dc_state *state, - struct dc_stream_state *stream); struct dc_stream_status *dc_stream_get_status( struct dc_stream_state *dc_stream); diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream_priv.h b/drivers/gpu/drm/amd/display/dc/dc_stream_priv.h new file mode 100644 index 000000000000..710d3b04c7e8 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dc_stream_priv.h @@ -0,0 +1,35 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef _DC_STREAM_PRIV_H_ +#define _DC_STREAM_PRIV_H_ + +#include "dc_stream.h" + +bool dc_stream_construct(struct dc_stream_state *stream, + struct dc_sink *dc_sink_data); +void dc_stream_destruct(struct dc_stream_state *stream); + +#endif // _DC_STREAM_PRIV_H_ diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index 7313cfe69498..870cd3932015 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -1161,4 +1161,9 @@ enum dc_hpd_enable_select { HPD_EN_FOR_SECONDARY_EDP_ONLY, }; +enum mall_stream_type { + SUBVP_NONE, // subvp not in use + SUBVP_MAIN, // subvp in use, this stream is main stream + SUBVP_PHANTOM, // subvp in use, this stream is a phantom stream +}; #endif /* DC_TYPES_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c index e8159a459bce..b3d360741175 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c @@ -28,6 +28,7 @@ #include "dcn20/dcn20_resource.h" #include "dml/dcn32/display_mode_vba_util_32.h" #include "dml/dcn32/dcn32_fpu.h" +#include "dc_state_priv.h" static bool is_dual_plane(enum surface_pixel_format format) { @@ -190,7 +191,7 @@ bool dcn32_subvp_in_use(struct dc *dc, for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - if (pipe->stream && pipe->stream->mall_stream_config.type != SUBVP_NONE) + if (dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_NONE) return true; } return false; @@ -264,18 +265,17 @@ static void override_det_for_subvp(struct dc *dc, struct dc_state *context, uint // Do not override if a stream has multiple planes for (i = 0; i < context->stream_count; i++) { - if (context->stream_status[i].plane_count > 1) { + if (context->stream_status[i].plane_count > 1) return; - } - if (context->streams[i]->mall_stream_config.type != SUBVP_PHANTOM) { + + if (dc_state_get_stream_subvp_type(context, context->streams[i]) != SUBVP_PHANTOM) stream_count++; - } } for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; - if (pipe_ctx->stream && pipe_ctx->plane_state && pipe_ctx->stream->mall_stream_config.type != SUBVP_PHANTOM) { + if (pipe_ctx->stream && pipe_ctx->plane_state && dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) { if (dcn32_allow_subvp_high_refresh_rate(dc, context, pipe_ctx)) { if (pipe_ctx->stream->timing.v_addressable == 1080 && pipe_ctx->stream->timing.h_addressable == 1920) { @@ -290,7 +290,7 @@ static void override_det_for_subvp(struct dc *dc, struct dc_state *context, uint for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; - if (pipe_ctx->stream && pipe_ctx->plane_state && pipe_ctx->stream->mall_stream_config.type != SUBVP_PHANTOM) { + if (pipe_ctx->stream && pipe_ctx->plane_state && dc_state_get_pipe_subvp_type(context, pipe_ctx)) { if (pipe_ctx->stream->timing.v_addressable == 1080 && pipe_ctx->stream->timing.h_addressable == 1920) { if (pipe_segments[i] > 4) pipe_segments[i] = 4; @@ -337,14 +337,14 @@ void dcn32_determine_det_override(struct dc *dc, for (i = 0; i < context->stream_count; i++) { /* Don't count SubVP streams for DET allocation */ - if (context->streams[i]->mall_stream_config.type != SUBVP_PHANTOM) + if (dc_state_get_stream_subvp_type(context, context->streams[i]) != SUBVP_PHANTOM) stream_count++; } if (stream_count > 0) { stream_segments = 18 / stream_count; for (i = 0; i < context->stream_count; i++) { - if (context->streams[i]->mall_stream_config.type == SUBVP_PHANTOM) + if (dc_state_get_stream_subvp_type(context, context->streams[i]) == SUBVP_PHANTOM) continue; if (context->stream_status[i].plane_count > 0) @@ -716,10 +716,11 @@ bool dcn32_subvp_drr_admissable(struct dc *dc, struct dc_state *context) for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + enum mall_stream_type pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe); if (resource_is_pipe_type(pipe, OPP_HEAD) && resource_is_pipe_type(pipe, DPP_PIPE)) { - if (pipe->stream->mall_stream_config.type == SUBVP_MAIN) { + if (pipe_mall_type == SUBVP_MAIN) { subvp_count++; subvp_disallow |= disallow_subvp_in_active_plus_blank(pipe); @@ -728,7 +729,7 @@ bool dcn32_subvp_drr_admissable(struct dc *dc, struct dc_state *context) refresh_rate = div_u64(refresh_rate, pipe->stream->timing.v_total); refresh_rate = div_u64(refresh_rate, pipe->stream->timing.h_total); } - if (pipe->stream->mall_stream_config.type == SUBVP_NONE) { + if (pipe_mall_type == SUBVP_NONE) { non_subvp_pipes++; drr_psr_capable = (drr_psr_capable || dcn32_is_psr_capable(pipe)); if (pipe->stream->ignore_msa_timing_param && @@ -776,10 +777,11 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + enum mall_stream_type pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe); if (resource_is_pipe_type(pipe, OPP_HEAD) && resource_is_pipe_type(pipe, DPP_PIPE)) { - if (pipe->stream->mall_stream_config.type == SUBVP_MAIN) { + if (pipe_mall_type == SUBVP_MAIN) { subvp_count++; subvp_disallow |= disallow_subvp_in_active_plus_blank(pipe); @@ -788,7 +790,7 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int refresh_rate = div_u64(refresh_rate, pipe->stream->timing.v_total); refresh_rate = div_u64(refresh_rate, pipe->stream->timing.h_total); } - if (pipe->stream->mall_stream_config.type == SUBVP_NONE) { + if (pipe_mall_type == SUBVP_NONE) { non_subvp_pipes++; vblank_psr_capable = (vblank_psr_capable || dcn32_is_psr_capable(pipe)); if (pipe->stream->ignore_msa_timing_param && diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c index ec77b2b41ba3..1ec8ca1fdb4f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c @@ -33,6 +33,7 @@ #include "link.h" #include "dcn20_fpu.h" +#include "dc_state_priv.h" #define DC_LOGGER \ dc->ctx->logger @@ -1074,7 +1075,7 @@ void dcn20_calculate_dlg_params(struct dc *dc, pipes[pipe_idx].pipe.dest.vupdate_width = get_vupdate_width(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); pipes[pipe_idx].pipe.dest.vready_offset = get_vready_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); - if (context->res_ctx.pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM) { + if (dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[i]) == SUBVP_PHANTOM) { // Phantom pipe requires that DET_SIZE = 0 and no unbounded requests context->res_ctx.pipe_ctx[i].det_buffer_size_kb = 0; context->res_ctx.pipe_ctx[i].unbounded_req = false; @@ -1424,7 +1425,7 @@ int dcn20_populate_dml_pipes_from_context(struct dc *dc, */ if (res_ctx->pipe_ctx[i].plane_state && (res_ctx->pipe_ctx[i].plane_state->address.type == PLN_ADDR_TYPE_VIDEO_PROGRESSIVE || - res_ctx->pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM)) + dc_state_get_pipe_subvp_type(context, &res_ctx->pipe_ctx[i]) == SUBVP_PHANTOM)) pipes[pipe_cnt].pipe.src.num_cursors = 0; else pipes[pipe_cnt].pipe.src.num_cursors = dc->dml.ip.number_of_cursors; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index de209ca0cf8c..50d223fa1e49 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -32,6 +32,7 @@ #include "clk_mgr/dcn32/dcn32_smu13_driver_if.h" #include "dcn30/dcn30_resource.h" #include "link.h" +#include "dc_state_priv.h" #define DC_LOGGER_INIT(logger) @@ -341,7 +342,7 @@ void dcn32_helper_populate_phantom_dlg_params(struct dc *dc, if (!pipe->stream) continue; - if (pipe->plane_state && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { + if (pipe->plane_state && dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) { pipes[pipe_idx].pipe.dest.vstartup_start = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); pipes[pipe_idx].pipe.dest.vupdate_offset = @@ -624,7 +625,7 @@ static bool dcn32_assign_subvp_pipe(struct dc *dc, if (pipe->plane_state && !pipe->top_pipe && !dcn32_is_center_timing(pipe) && !(pipe->stream->timing.pix_clk_100hz / 10000 > DCN3_2_MAX_SUBVP_PIXEL_RATE_MHZ) && (!dcn32_is_psr_capable(pipe) || (context->stream_count == 1 && dc->caps.dmub_caps.subvp_psr)) && - pipe->stream->mall_stream_config.type == SUBVP_NONE && + dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_NONE && (refresh_rate < 120 || dcn32_allow_subvp_high_refresh_rate(dc, context, pipe)) && !pipe->plane_state->address.tmz_surface && (vba->ActiveDRAMClockChangeLatencyMarginPerState[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]] <= 0 || @@ -682,7 +683,7 @@ static bool dcn32_enough_pipes_for_subvp(struct dc *dc, struct dc_state *context // Find the minimum pipe split count for non SubVP pipes if (resource_is_pipe_type(pipe, OPP_HEAD) && - pipe->stream->mall_stream_config.type == SUBVP_NONE) { + dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_NONE) { split_cnt = 0; while (pipe) { split_cnt++; @@ -735,8 +736,8 @@ static bool subvp_subvp_schedulable(struct dc *dc, struct dc_state *context) * and also to store the two main SubVP pipe pointers in subvp_pipes[2]. */ if (pipe->stream && pipe->plane_state && !pipe->top_pipe && - pipe->stream->mall_stream_config.type == SUBVP_MAIN) { - phantom = pipe->stream->mall_stream_config.paired_stream; + dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) { + phantom = dc_state_get_paired_subvp_stream(context, pipe->stream); microschedule_lines = (phantom->timing.v_total - phantom->timing.v_front_porch) + phantom->timing.v_addressable; @@ -804,6 +805,7 @@ static bool subvp_drr_schedulable(struct dc *dc, struct dc_state *context) int16_t stretched_drr_us = 0; int16_t drr_stretched_vblank_us = 0; int16_t max_vblank_mallregion = 0; + struct dc_stream_state *phantom_stream; // Find SubVP pipe for (i = 0; i < dc->res_pool->pipe_count; i++) { @@ -816,7 +818,7 @@ static bool subvp_drr_schedulable(struct dc *dc, struct dc_state *context) continue; // Find the SubVP pipe - if (pipe->stream->mall_stream_config.type == SUBVP_MAIN) + if (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) break; } @@ -829,13 +831,14 @@ static bool subvp_drr_schedulable(struct dc *dc, struct dc_state *context) !resource_is_pipe_type(pipe, DPP_PIPE)) continue; - if (drr_pipe->stream->mall_stream_config.type == SUBVP_NONE && drr_pipe->stream->ignore_msa_timing_param && + if (dc_state_get_pipe_subvp_type(context, drr_pipe) == SUBVP_NONE && drr_pipe->stream->ignore_msa_timing_param && (drr_pipe->stream->allow_freesync || drr_pipe->stream->vrr_active_variable || drr_pipe->stream->vrr_active_fixed)) break; } + phantom_stream = dc_state_get_paired_subvp_stream(context, pipe->stream); main_timing = &pipe->stream->timing; - phantom_timing = &pipe->stream->mall_stream_config.paired_stream->timing; + phantom_timing = &phantom_stream->timing; drr_timing = &drr_pipe->stream->timing; prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total / (double)(phantom_timing->pix_clk_100hz * 100) * 1000000 + @@ -895,6 +898,8 @@ static bool subvp_vblank_schedulable(struct dc *dc, struct dc_state *context) struct dc_crtc_timing *main_timing = NULL; struct dc_crtc_timing *phantom_timing = NULL; struct dc_crtc_timing *vblank_timing = NULL; + struct dc_stream_state *phantom_stream; + enum mall_stream_type pipe_mall_type; /* For SubVP + VBLANK/DRR cases, we assume there can only be * a single VBLANK/DRR display. If DML outputs SubVP + VBLANK @@ -904,6 +909,7 @@ static bool subvp_vblank_schedulable(struct dc *dc, struct dc_state *context) */ for (i = 0; i < dc->res_pool->pipe_count; i++) { pipe = &context->res_ctx.pipe_ctx[i]; + pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe); // We check for master pipe, but it shouldn't matter since we only need // the pipe for timing info (stream should be same for any pipe splits) @@ -911,18 +917,19 @@ static bool subvp_vblank_schedulable(struct dc *dc, struct dc_state *context) !resource_is_pipe_type(pipe, DPP_PIPE)) continue; - if (!found && pipe->stream->mall_stream_config.type == SUBVP_NONE) { + if (!found && pipe_mall_type == SUBVP_NONE) { // Found pipe which is not SubVP or Phantom (i.e. the VBLANK pipe). vblank_index = i; found = true; } - if (!subvp_pipe && pipe->stream->mall_stream_config.type == SUBVP_MAIN) + if (!subvp_pipe && pipe_mall_type == SUBVP_MAIN) subvp_pipe = pipe; } if (found) { + phantom_stream = dc_state_get_paired_subvp_stream(context, subvp_pipe->stream); main_timing = &subvp_pipe->stream->timing; - phantom_timing = &subvp_pipe->stream->mall_stream_config.paired_stream->timing; + phantom_timing = &phantom_stream->timing; vblank_timing = &context->res_ctx.pipe_ctx[vblank_index].stream->timing; // Prefetch time is equal to VACTIVE + BP + VSYNC of the phantom pipe // Also include the prefetch end to mallstart delay time @@ -977,7 +984,7 @@ static bool subvp_subvp_admissable(struct dc *dc, continue; if (pipe->plane_state && !pipe->top_pipe && - pipe->stream->mall_stream_config.type == SUBVP_MAIN) { + dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) { refresh_rate = (pipe->stream->timing.pix_clk_100hz * (uint64_t)100 + pipe->stream->timing.v_total * pipe->stream->timing.h_total - (uint64_t)1); refresh_rate = div_u64(refresh_rate, pipe->stream->timing.v_total); @@ -1026,23 +1033,23 @@ static bool subvp_validate_static_schedulability(struct dc *dc, for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + enum mall_stream_type pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe); if (!pipe->stream) continue; if (pipe->plane_state && !pipe->top_pipe) { - if (pipe->stream->mall_stream_config.type == SUBVP_MAIN) + if (pipe_mall_type == SUBVP_MAIN) subvp_count++; - if (pipe->stream->mall_stream_config.type == SUBVP_NONE) { + if (pipe_mall_type == SUBVP_NONE) non_subvp_pipes++; - } } // Count how many planes that aren't SubVP/phantom are capable of VACTIVE // switching (SubVP + VACTIVE unsupported). In situations where we force // SubVP for a VACTIVE plane, we don't want to increment the vactive_count. if (vba->ActiveDRAMClockChangeLatencyMarginPerState[vlevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]] > 0 && - pipe->stream->mall_stream_config.type == SUBVP_NONE) { + pipe_mall_type == SUBVP_NONE) { vactive_count++; } pipe_idx++; @@ -1078,7 +1085,7 @@ static void assign_subvp_index(struct dc *dc, struct dc_state *context) struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; if (resource_is_pipe_type(pipe_ctx, OTG_MASTER) && - pipe_ctx->stream->mall_stream_config.type == SUBVP_MAIN) { + dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_MAIN) { pipe_ctx->subvp_index = index++; } else { pipe_ctx->subvp_index = 0; @@ -1684,7 +1691,7 @@ static void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context, pipes[pipe_idx].pipe.dest.vready_offset = get_vready_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); - if (context->res_ctx.pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM) { + if (dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[i]) == SUBVP_PHANTOM) { // Phantom pipe requires that DET_SIZE = 0 and no unbounded requests context->res_ctx.pipe_ctx[i].det_buffer_size_kb = 0; context->res_ctx.pipe_ctx[i].unbounded_req = false; @@ -1716,7 +1723,7 @@ static void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context, context->res_ctx.pipe_ctx[i].plane_state != context->res_ctx.pipe_ctx[i].top_pipe->plane_state) && context->res_ctx.pipe_ctx[i].prev_odm_pipe == NULL) { /* SS: all active surfaces stored in MALL */ - if (context->res_ctx.pipe_ctx[i].stream->mall_stream_config.type != SUBVP_PHANTOM) { + if (dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[i]) != SUBVP_PHANTOM) { context->bw_ctx.bw.dcn.mall_ss_size_bytes += context->res_ctx.pipe_ctx[i].surface_size_in_mall_bytes; if (context->res_ctx.pipe_ctx[i].stream->link->psr_settings.psr_version == DC_PSR_VERSION_UNSUPPORTED) { diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_types.h index e85866db80ff..7ca7f2a743c2 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_types.h @@ -38,5 +38,6 @@ #include "core_types.h" #include "dsc.h" #include "clk_mgr.h" +#include "dc_state_priv.h" #endif //__DML2_DC_TYPES_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c index 32f8a43af3d6..0c146af34d82 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c @@ -51,7 +51,7 @@ unsigned int dml2_helper_calculate_num_ways_for_subvp(struct dml2_context *ctx, // Find the phantom pipes if (pipe->stream && pipe->plane_state && !pipe->top_pipe && !pipe->prev_odm_pipe && - pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { + ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) { bytes_per_pixel = pipe->plane_state->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 ? 8 : 4; mblk_width = ctx->config.mall_cfg.mblk_width_pixels; mblk_height = bytes_per_pixel == 4 ? mblk_width = ctx->config.mall_cfg.mblk_height_4bpe_pixels : ctx->config.mall_cfg.mblk_height_8bpe_pixels; @@ -253,7 +253,7 @@ static bool assign_subvp_pipe(struct dml2_context *ctx, struct dc_state *context * to combine this with SubVP can cause issues with the scheduling). */ if (pipe->plane_state && !pipe->top_pipe && - pipe->stream->mall_stream_config.type == SUBVP_NONE && refresh_rate < 120 && + ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe) == SUBVP_NONE && refresh_rate < 120 && vba->ActiveDRAMClockChangeLatencyMarginPerState[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]] <= 0) { while (pipe) { num_pipes++; @@ -317,7 +317,7 @@ static bool enough_pipes_for_subvp(struct dml2_context *ctx, struct dc_state *st // Find the minimum pipe split count for non SubVP pipes if (pipe->stream && !pipe->top_pipe && - pipe->stream->mall_stream_config.type == SUBVP_NONE) { + ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(state, pipe) == SUBVP_NONE) { split_cnt = 0; while (pipe) { split_cnt++; @@ -372,8 +372,8 @@ static bool subvp_subvp_schedulable(struct dml2_context *ctx, struct dc_state *c * and also to store the two main SubVP pipe pointers in subvp_pipes[2]. */ if (pipe->stream && pipe->plane_state && !pipe->top_pipe && - pipe->stream->mall_stream_config.type == SUBVP_MAIN) { - phantom = pipe->stream->mall_stream_config.paired_stream; + ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) { + phantom = ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(context, pipe->stream); microschedule_lines = (phantom->timing.v_total - phantom->timing.v_front_porch) + phantom->timing.v_addressable; @@ -435,6 +435,7 @@ bool dml2_svp_drr_schedulable(struct dml2_context *ctx, struct dc_state *context struct pipe_ctx *pipe = NULL; struct dc_crtc_timing *main_timing = NULL; struct dc_crtc_timing *phantom_timing = NULL; + struct dc_stream_state *phantom_stream; int16_t prefetch_us = 0; int16_t mall_region_us = 0; int16_t drr_frame_us = 0; // nominal frame time @@ -453,12 +454,13 @@ bool dml2_svp_drr_schedulable(struct dml2_context *ctx, struct dc_state *context continue; // Find the SubVP pipe - if (pipe->stream->mall_stream_config.type == SUBVP_MAIN) + if (ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) break; } + phantom_stream = ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(context, pipe->stream); main_timing = &pipe->stream->timing; - phantom_timing = &pipe->stream->mall_stream_config.paired_stream->timing; + phantom_timing = &phantom_stream->timing; prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total / (double)(phantom_timing->pix_clk_100hz * 100) * 1000000 + ctx->config.svp_pstate.subvp_prefetch_end_to_mall_start_us; @@ -519,6 +521,8 @@ static bool subvp_vblank_schedulable(struct dml2_context *ctx, struct dc_state * struct dc_crtc_timing *main_timing = NULL; struct dc_crtc_timing *phantom_timing = NULL; struct dc_crtc_timing *vblank_timing = NULL; + struct dc_stream_state *phantom_stream; + enum mall_stream_type pipe_mall_type; /* For SubVP + VBLANK/DRR cases, we assume there can only be * a single VBLANK/DRR display. If DML outputs SubVP + VBLANK @@ -528,19 +532,20 @@ static bool subvp_vblank_schedulable(struct dml2_context *ctx, struct dc_state * */ for (i = 0; i < ctx->config.dcn_pipe_count; i++) { pipe = &context->res_ctx.pipe_ctx[i]; + pipe_mall_type = ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe); // We check for master pipe, but it shouldn't matter since we only need // the pipe for timing info (stream should be same for any pipe splits) if (!pipe->stream || !pipe->plane_state || pipe->top_pipe || pipe->prev_odm_pipe) continue; - if (!found && pipe->stream->mall_stream_config.type == SUBVP_NONE) { + if (!found && pipe_mall_type == SUBVP_NONE) { // Found pipe which is not SubVP or Phantom (i.e. the VBLANK pipe). vblank_index = i; found = true; } - if (!subvp_pipe && pipe->stream->mall_stream_config.type == SUBVP_MAIN) + if (!subvp_pipe && pipe_mall_type == SUBVP_MAIN) subvp_pipe = pipe; } // Use ignore_msa_timing_param flag to identify as DRR @@ -548,8 +553,9 @@ static bool subvp_vblank_schedulable(struct dml2_context *ctx, struct dc_state * // SUBVP + DRR case schedulable = dml2_svp_drr_schedulable(ctx, context, &context->res_ctx.pipe_ctx[vblank_index].stream->timing); } else if (found) { + phantom_stream = ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(context, subvp_pipe->stream); main_timing = &subvp_pipe->stream->timing; - phantom_timing = &subvp_pipe->stream->mall_stream_config.paired_stream->timing; + phantom_timing = &phantom_stream->timing; vblank_timing = &context->res_ctx.pipe_ctx[vblank_index].stream->timing; // Prefetch time is equal to VACTIVE + BP + VSYNC of the phantom pipe // Also include the prefetch end to mallstart delay time @@ -602,19 +608,20 @@ bool dml2_svp_validate_static_schedulability(struct dml2_context *ctx, struct dc for (i = 0, pipe_idx = 0; i < ctx->config.dcn_pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + enum mall_stream_type pipe_mall_type = ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe); if (!pipe->stream) continue; if (pipe->plane_state && !pipe->top_pipe && - pipe->stream->mall_stream_config.type == SUBVP_MAIN) + pipe_mall_type == SUBVP_MAIN) subvp_count++; // Count how many planes that aren't SubVP/phantom are capable of VACTIVE // switching (SubVP + VACTIVE unsupported). In situations where we force // SubVP for a VACTIVE plane, we don't want to increment the vactive_count. if (vba->ActiveDRAMClockChangeLatencyMargin[vba->pipe_plane[pipe_idx]] > 0 && - pipe->stream->mall_stream_config.type == SUBVP_NONE) { + pipe_mall_type == SUBVP_NONE) { vactive_count++; } pipe_idx++; @@ -708,14 +715,10 @@ static void set_phantom_stream_timing(struct dml2_context *ctx, struct dc_state static struct dc_stream_state *enable_phantom_stream(struct dml2_context *ctx, struct dc_state *state, unsigned int dc_pipe_idx, unsigned int svp_height, unsigned int vstartup) { struct pipe_ctx *ref_pipe = &state->res_ctx.pipe_ctx[dc_pipe_idx]; - struct dc_stream_state *phantom_stream = ctx->config.svp_pstate.callbacks.create_stream_for_sink(ref_pipe->stream->sink); - - phantom_stream->signal = SIGNAL_TYPE_VIRTUAL; - phantom_stream->dpms_off = true; - phantom_stream->mall_stream_config.type = SUBVP_PHANTOM; - phantom_stream->mall_stream_config.paired_stream = ref_pipe->stream; - ref_pipe->stream->mall_stream_config.type = SUBVP_MAIN; - ref_pipe->stream->mall_stream_config.paired_stream = phantom_stream; + struct dc_stream_state *phantom_stream = ctx->config.svp_pstate.callbacks.create_phantom_stream( + ctx->config.svp_pstate.callbacks.dc, + state, + ref_pipe->stream); /* stream has limited viewport and small timing */ memcpy(&phantom_stream->timing, &ref_pipe->stream->timing, sizeof(phantom_stream->timing)); @@ -723,7 +726,10 @@ static struct dc_stream_state *enable_phantom_stream(struct dml2_context *ctx, s memcpy(&phantom_stream->dst, &ref_pipe->stream->dst, sizeof(phantom_stream->dst)); set_phantom_stream_timing(ctx, state, ref_pipe, phantom_stream, dc_pipe_idx, svp_height, vstartup); - ctx->config.svp_pstate.callbacks.add_stream_to_ctx(ctx->config.svp_pstate.callbacks.dc, state, phantom_stream); + ctx->config.svp_pstate.callbacks.add_phantom_stream(ctx->config.svp_pstate.callbacks.dc, + state, + phantom_stream, + ref_pipe->stream); return phantom_stream; } @@ -740,7 +746,10 @@ static void enable_phantom_plane(struct dml2_context *ctx, if (curr_pipe->top_pipe && curr_pipe->top_pipe->plane_state == curr_pipe->plane_state) { phantom_plane = prev_phantom_plane; } else { - phantom_plane = ctx->config.svp_pstate.callbacks.create_plane(ctx->config.svp_pstate.callbacks.dc); + phantom_plane = ctx->config.svp_pstate.callbacks.create_phantom_plane( + ctx->config.svp_pstate.callbacks.dc, + state, + curr_pipe->plane_state); } memcpy(&phantom_plane->address, &curr_pipe->plane_state->address, sizeof(phantom_plane->address)); @@ -763,9 +772,7 @@ static void enable_phantom_plane(struct dml2_context *ctx, phantom_plane->clip_rect.y = 0; phantom_plane->clip_rect.height = phantom_stream->timing.v_addressable; - phantom_plane->is_phantom = true; - - ctx->config.svp_pstate.callbacks.add_plane_to_context(ctx->config.svp_pstate.callbacks.dc, phantom_stream, phantom_plane, state); + ctx->config.svp_pstate.callbacks.add_phantom_plane(ctx->config.svp_pstate.callbacks.dc, phantom_stream, phantom_plane, state); curr_pipe = curr_pipe->bottom_pipe; prev_phantom_plane = phantom_plane; @@ -790,7 +797,7 @@ static void add_phantom_pipes_for_main_pipe(struct dml2_context *ctx, struct dc_ // We determine which phantom pipes were added by comparing with // the phantom stream. if (pipe->plane_state && pipe->stream && pipe->stream == phantom_stream && - pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { + ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(state, pipe) == SUBVP_PHANTOM) { pipe->stream->use_dynamic_meta = false; pipe->plane_state->flip_immediate = false; if (!ctx->config.svp_pstate.callbacks.build_scaling_params(pipe)) { @@ -822,7 +829,7 @@ static bool remove_all_planes_for_stream(struct dml2_context *ctx, struct dc_str del_planes[i] = stream_status->plane_states[i]; for (i = 0; i < old_plane_count; i++) - if (!ctx->config.svp_pstate.callbacks.remove_plane_from_context(ctx->config.svp_pstate.callbacks.dc, stream, del_planes[i], context)) + if (!ctx->config.svp_pstate.callbacks.remove_phantom_plane(ctx->config.svp_pstate.callbacks.dc, stream, del_planes[i], context)) return false; return true; @@ -838,19 +845,23 @@ bool dml2_svp_remove_all_phantom_pipes(struct dml2_context *ctx, struct dc_state for (i = 0; i < ctx->config.dcn_pipe_count; i++) { struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i]; // build scaling params for phantom pipes - if (pipe->plane_state && pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { + if (pipe->plane_state && pipe->stream && ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(state, pipe) == SUBVP_PHANTOM) { phantom_plane = pipe->plane_state; phantom_stream = pipe->stream; remove_all_planes_for_stream(ctx, pipe->stream, state); - ctx->config.svp_pstate.callbacks.remove_stream_from_ctx(ctx->config.svp_pstate.callbacks.dc, state, pipe->stream); + ctx->config.svp_pstate.callbacks.remove_phantom_stream(ctx->config.svp_pstate.callbacks.dc, state, pipe->stream); /* Ref count is incremented on allocation and also when added to the context. * Therefore we must call release for the the phantom plane and stream once * they are removed from the ctx to finally decrement the refcount to 0 to free. */ - ctx->config.svp_pstate.callbacks.plane_state_release(phantom_plane); - ctx->config.svp_pstate.callbacks.stream_release(phantom_stream); + ctx->config.svp_pstate.callbacks.release_phantom_plane(ctx->config.svp_pstate.callbacks.dc, + state, + phantom_plane); + ctx->config.svp_pstate.callbacks.release_phantom_stream(ctx->config.svp_pstate.callbacks.dc, + state, + phantom_stream); removed_pipe = true; } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index 279e7605a0a2..bc2959fda5be 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -1051,6 +1051,7 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat { int i = 0, j = 0; int disp_cfg_stream_location, disp_cfg_plane_location; + enum mall_stream_type stream_mall_type; for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id_valid[i] = false; @@ -1071,6 +1072,7 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat for (i = 0; i < context->stream_count; i++) { disp_cfg_stream_location = map_stream_to_dml_display_cfg(dml2, context->streams[i], dml_dispcfg); + stream_mall_type = dc_state_get_stream_subvp_type(context, context->streams[i]); if (disp_cfg_stream_location < 0) disp_cfg_stream_location = dml_dispcfg->num_timings++; @@ -1115,10 +1117,10 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat populate_dml_surface_cfg_from_plane_state(dml2->v20.dml_core_ctx.project, &dml_dispcfg->surface, disp_cfg_plane_location, context->stream_status[i].plane_states[j]); populate_dml_plane_cfg_from_plane_state(&dml_dispcfg->plane, disp_cfg_plane_location, context->stream_status[i].plane_states[j], context); - if (context->streams[i]->mall_stream_config.type == SUBVP_MAIN) { + if (stream_mall_type == SUBVP_MAIN) { dml_dispcfg->plane.UseMALLForPStateChange[disp_cfg_plane_location] = dml_use_mall_pstate_change_sub_viewport; dml_dispcfg->plane.UseMALLForStaticScreen[disp_cfg_plane_location] = dml_use_mall_static_screen_optimize; - } else if (context->streams[i]->mall_stream_config.type == SUBVP_PHANTOM) { + } else if (stream_mall_type == SUBVP_PHANTOM) { dml_dispcfg->plane.UseMALLForPStateChange[disp_cfg_plane_location] = dml_use_mall_pstate_change_phantom_pipe; dml_dispcfg->plane.UseMALLForStaticScreen[disp_cfg_plane_location] = dml_use_mall_static_screen_disable; dml2->v20.dml_core_ctx.policy.ImmediateFlipRequirement[disp_cfg_plane_location] = dml_immediate_flip_not_required; @@ -1147,9 +1149,9 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat break; } - if (context->streams[i]->mall_stream_config.type == SUBVP_MAIN) + if (stream_mall_type == SUBVP_MAIN) dml_dispcfg->plane.UseMALLForPStateChange[disp_cfg_plane_location] = dml_use_mall_pstate_change_sub_viewport; - else if (context->streams[i]->mall_stream_config.type == SUBVP_PHANTOM) + else if (stream_mall_type == SUBVP_PHANTOM) dml_dispcfg->plane.UseMALLForPStateChange[disp_cfg_plane_location] = dml_use_mall_pstate_change_phantom_pipe; dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id[disp_cfg_plane_location] = context->streams[i]->stream_id; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c index 814dbdcf9a78..9156e2ea55f5 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c @@ -279,6 +279,7 @@ static void populate_pipe_ctx_dlg_params_from_dml(struct pipe_ctx *pipe_ctx, str void dml2_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *context, struct resource_context *out_new_hw_state, struct dml2_context *in_ctx, unsigned int pipe_cnt) { unsigned int dc_pipe_ctx_index, dml_pipe_idx, plane_id; + enum mall_stream_type pipe_mall_type; bool unbounded_req_enabled = false; struct dml2_calculate_rq_and_dlg_params_scratch *s = &in_ctx->v20.scratch.calculate_rq_and_dlg_params_scratch; @@ -326,7 +327,8 @@ void dml2_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *cont */ populate_pipe_ctx_dlg_params_from_dml(&context->res_ctx.pipe_ctx[dc_pipe_ctx_index], &context->bw_ctx.dml2->v20.dml_core_ctx, dml_pipe_idx); - if (context->res_ctx.pipe_ctx[dc_pipe_ctx_index].stream->mall_stream_config.type == SUBVP_PHANTOM) { + pipe_mall_type = dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[dc_pipe_ctx_index]); + if (pipe_mall_type == SUBVP_PHANTOM) { // Phantom pipe requires that DET_SIZE = 0 and no unbounded requests context->res_ctx.pipe_ctx[dc_pipe_ctx_index].det_buffer_size_kb = 0; context->res_ctx.pipe_ctx[dc_pipe_ctx_index].unbounded_req = false; @@ -353,7 +355,7 @@ void dml2_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *cont context->res_ctx.pipe_ctx[dc_pipe_ctx_index].plane_state != context->res_ctx.pipe_ctx[dc_pipe_ctx_index].top_pipe->plane_state) && context->res_ctx.pipe_ctx[dc_pipe_ctx_index].prev_odm_pipe == NULL) { /* SS: all active surfaces stored in MALL */ - if (context->res_ctx.pipe_ctx[dc_pipe_ctx_index].stream->mall_stream_config.type != SUBVP_PHANTOM) { + if (pipe_mall_type != SUBVP_PHANTOM) { context->bw_ctx.bw.dcn.mall_ss_size_bytes += context->res_ctx.pipe_ctx[dc_pipe_ctx_index].surface_size_in_mall_bytes; } else { /* SUBVP: phantom surfaces only stored in MALL */ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c index 9d354fde6908..638591ed057d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c @@ -418,7 +418,7 @@ static int find_drr_eligible_stream(struct dc_state *display_state) int i; for (i = 0; i < display_state->stream_count; i++) { - if (display_state->streams[i]->mall_stream_config.type == SUBVP_NONE + if (dc_state_get_stream_subvp_type(display_state, display_state->streams[i]) == SUBVP_NONE && display_state->streams[i]->ignore_msa_timing_param) { // Use ignore_msa_timing_param flag to identify as DRR return i; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h index 0de6886969c6..505a5d7ae20d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h @@ -93,15 +93,31 @@ struct dml2_dc_callbacks { struct dml2_dc_svp_callbacks { struct dc *dc; bool (*build_scaling_params)(struct pipe_ctx *pipe_ctx); - struct dc_stream_state* (*create_stream_for_sink)(struct dc_sink *dc_sink_data); - struct dc_plane_state* (*create_plane)(struct dc *dc); - enum dc_status (*add_stream_to_ctx)(struct dc *dc, struct dc_state *new_ctx, struct dc_stream_state *dc_stream); - bool (*add_plane_to_context)(const struct dc *dc, struct dc_stream_state *stream, struct dc_plane_state *plane_state, struct dc_state *context); - bool (*remove_plane_from_context)(const struct dc *dc, struct dc_stream_state *stream, struct dc_plane_state *plane_state, struct dc_state *context); - enum dc_status (*remove_stream_from_ctx)(struct dc *dc, struct dc_state *new_ctx, struct dc_stream_state *stream); - void (*plane_state_release)(struct dc_plane_state *plane_state); - void (*stream_release)(struct dc_stream_state *stream); + struct dc_stream_state* (*create_phantom_stream)(const struct dc *dc, + struct dc_state *state, + struct dc_stream_state *main_stream); + struct dc_plane_state* (*create_phantom_plane)(struct dc *dc, + struct dc_state *state, + struct dc_plane_state *main_plane); + enum dc_status (*add_phantom_stream)(struct dc *dc, + struct dc_state *state, + struct dc_stream_state *phantom_stream, + struct dc_stream_state *main_stream); + bool (*add_phantom_plane)(const struct dc *dc, struct dc_stream_state *stream, struct dc_plane_state *plane_state, struct dc_state *context); + bool (*remove_phantom_plane)(const struct dc *dc, struct dc_stream_state *stream, struct dc_plane_state *plane_state, struct dc_state *context); + enum dc_status (*remove_phantom_stream)(struct dc *dc, + struct dc_state *state, + struct dc_stream_state *stream); + void (*release_phantom_plane)(const struct dc *dc, + struct dc_state *state, + struct dc_plane_state *plane); + void (*release_phantom_stream)(const struct dc *dc, + struct dc_state *state, + struct dc_stream_state *stream); void (*release_dsc)(struct resource_context *res_ctx, const struct resource_pool *pool, struct display_stream_compressor **dsc); + enum mall_stream_type (*get_pipe_subvp_type)(const struct dc_state *state, const struct pipe_ctx *pipe_ctx); + enum mall_stream_type (*get_stream_subvp_type)(const struct dc_state *state, const struct dc_stream_state *stream); + struct dc_stream_state *(*get_paired_subvp_stream)(const struct dc_state *state, const struct dc_stream_state *stream); }; struct dml2_clks_table_entry { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index c73fe5e9b361..968f6ba7ccf6 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -55,6 +55,7 @@ #include "audio.h" #include "reg_helper.h" #include "panel_cntl.h" +#include "dc_state_priv.h" #include "dpcd_defs.h" /* include DCE11 register header files */ #include "dce/dce_11_0_d.h" @@ -1596,7 +1597,7 @@ static enum dc_status apply_single_controller_ctx_to_hw( * is constructed with the same sink). Make sure not to override * and link programming on the main. */ - if (pipe_ctx->stream->mall_stream_config.type != SUBVP_PHANTOM) { + if (dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) { pipe_ctx->stream->link->psr_settings.psr_feature_enabled = false; pipe_ctx->stream->link->replay_settings.replay_feature_enabled = false; } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index 0d9e41315f84..96fb563b1009 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -56,6 +56,7 @@ #include "dc_trace.h" #include "dce/dmub_outbox.h" #include "link.h" +#include "dc_state_priv.h" #define DC_LOGGER \ dc_logger @@ -115,7 +116,7 @@ void dcn10_lock_all_pipes(struct dc *dc, !pipe_ctx->stream || (!pipe_ctx->plane_state && !old_pipe_ctx->plane_state) || !tg->funcs->is_tg_enabled(tg) || - pipe_ctx->stream->mall_stream_config.type == SUBVP_PHANTOM) + dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_PHANTOM) continue; if (lock) @@ -1200,7 +1201,7 @@ void dcn10_plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx) mpc->funcs->remove_mpcc(mpc, mpc_tree_params, mpcc_to_remove); // Phantom pipes have OTG disabled by default, so MPCC_STATUS will never assert idle, // so don't wait for MPCC_IDLE in the programming sequence - if (opp != NULL && !pipe_ctx->plane_state->is_phantom) + if (opp != NULL && dc_state_get_pipe_subvp_type(NULL, pipe_ctx) != SUBVP_PHANTOM) opp->mpcc_disconnect_pending[pipe_ctx->plane_res.mpcc_inst] = true; dc->optimized_required = true; @@ -2276,7 +2277,7 @@ void dcn10_enable_timing_synchronization( DC_SYNC_INFO("Setting up OTG reset trigger\n"); for (i = 1; i < group_size; i++) { - if (grouped_pipes[i]->stream && grouped_pipes[i]->stream->mall_stream_config.type == SUBVP_PHANTOM) + if (grouped_pipes[i]->stream && dc_state_get_pipe_subvp_type(NULL, grouped_pipes[i]) == SUBVP_PHANTOM) continue; opp = grouped_pipes[i]->stream_res.opp; @@ -2296,14 +2297,14 @@ void dcn10_enable_timing_synchronization( if (grouped_pipes[i]->stream == NULL) continue; - if (grouped_pipes[i]->stream && grouped_pipes[i]->stream->mall_stream_config.type == SUBVP_PHANTOM) + if (grouped_pipes[i]->stream && dc_state_get_pipe_subvp_type(NULL, grouped_pipes[i]) == SUBVP_PHANTOM) continue; grouped_pipes[i]->stream->vblank_synchronized = false; } for (i = 1; i < group_size; i++) { - if (grouped_pipes[i]->stream && grouped_pipes[i]->stream->mall_stream_config.type == SUBVP_PHANTOM) + if (grouped_pipes[i]->stream && dc_state_get_pipe_subvp_type(NULL, grouped_pipes[i]) == SUBVP_PHANTOM) continue; grouped_pipes[i]->stream_res.tg->funcs->enable_reset_trigger( @@ -2317,11 +2318,11 @@ void dcn10_enable_timing_synchronization( * synchronized. Look at last pipe programmed to reset. */ - if (grouped_pipes[1]->stream && grouped_pipes[1]->stream->mall_stream_config.type != SUBVP_PHANTOM) + if (grouped_pipes[1]->stream && dc_state_get_pipe_subvp_type(NULL, grouped_pipes[1]) != SUBVP_PHANTOM) wait_for_reset_trigger_to_occur(dc_ctx, grouped_pipes[1]->stream_res.tg); for (i = 1; i < group_size; i++) { - if (grouped_pipes[i]->stream && grouped_pipes[i]->stream->mall_stream_config.type == SUBVP_PHANTOM) + if (grouped_pipes[i]->stream && dc_state_get_pipe_subvp_type(NULL, grouped_pipes[i]) == SUBVP_PHANTOM) continue; grouped_pipes[i]->stream_res.tg->funcs->disable_reset_trigger( @@ -2329,7 +2330,7 @@ void dcn10_enable_timing_synchronization( } for (i = 1; i < group_size; i++) { - if (grouped_pipes[i]->stream && grouped_pipes[i]->stream->mall_stream_config.type == SUBVP_PHANTOM) + if (dc_state_get_pipe_subvp_type(NULL, grouped_pipes[i]) == SUBVP_PHANTOM) continue; opp = grouped_pipes[i]->stream_res.opp; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index 72c580ec650c..0c9f4ea109cb 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -55,6 +55,7 @@ #include "inc/link_enc_cfg.h" #include "link_hwss.h" #include "link.h" +#include "dc_state_priv.h" #define DC_LOGGER \ dc_logger @@ -625,7 +626,7 @@ void dcn20_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx) void dcn20_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx) { - bool is_phantom = pipe_ctx->plane_state && pipe_ctx->plane_state->is_phantom; + bool is_phantom = dc_state_get_pipe_subvp_type(NULL, pipe_ctx) == SUBVP_PHANTOM; struct timing_generator *tg = is_phantom ? pipe_ctx->stream_res.tg : NULL; DC_LOGGER_INIT(dc->ctx->logger); @@ -847,7 +848,7 @@ enum dc_status dcn20_enable_stream_timing( /* TODO enable stream if timing changed */ /* TODO unblank stream if DP */ - if (pipe_ctx->stream && pipe_ctx->stream->mall_stream_config.type == SUBVP_PHANTOM) { + if (pipe_ctx->stream && dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_PHANTOM) { if (pipe_ctx->stream_res.tg && pipe_ctx->stream_res.tg->funcs->phantom_crtc_post_enable) pipe_ctx->stream_res.tg->funcs->phantom_crtc_post_enable(pipe_ctx->stream_res.tg); } @@ -1370,6 +1371,9 @@ void dcn20_pipe_control_lock( static void dcn20_detect_pipe_changes(struct pipe_ctx *old_pipe, struct pipe_ctx *new_pipe) { + bool old_is_phantom = dc_state_get_pipe_subvp_type(NULL, old_pipe) == SUBVP_PHANTOM; + bool new_is_phantom = dc_state_get_pipe_subvp_type(NULL, new_pipe) == SUBVP_PHANTOM; + new_pipe->update_flags.raw = 0; /* If non-phantom pipe is being transitioned to a phantom pipe, @@ -1379,8 +1383,8 @@ static void dcn20_detect_pipe_changes(struct pipe_ctx *old_pipe, struct pipe_ctx * be different). The post_unlock sequence will set the correct * update flags to enable the phantom pipe. */ - if (old_pipe->plane_state && !old_pipe->plane_state->is_phantom && - new_pipe->plane_state && new_pipe->plane_state->is_phantom) { + if (old_pipe->plane_state && !old_is_phantom && + new_pipe->plane_state && new_is_phantom) { new_pipe->update_flags.bits.disable = 1; return; } @@ -1416,14 +1420,14 @@ static void dcn20_detect_pipe_changes(struct pipe_ctx *old_pipe, struct pipe_ctx * The remove-add sequence of the phantom pipe always results in the pipe * being blanked in enable_stream_timing (DPG). */ - if (new_pipe->stream && new_pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) + if (new_pipe->stream && dc_state_get_pipe_subvp_type(NULL, new_pipe) == SUBVP_PHANTOM) new_pipe->update_flags.bits.enable = 1; /* Phantom pipes are effectively disabled, if the pipe was previously phantom * we have to enable */ - if (old_pipe->plane_state && old_pipe->plane_state->is_phantom && - new_pipe->plane_state && !new_pipe->plane_state->is_phantom) + if (old_pipe->plane_state && old_is_phantom && + new_pipe->plane_state && !new_is_phantom) new_pipe->update_flags.bits.enable = 1; if (old_pipe->plane_state && !new_pipe->plane_state) { @@ -1560,6 +1564,7 @@ static void dcn20_update_dchubp_dpp( struct dc_plane_state *plane_state = pipe_ctx->plane_state; struct dccg *dccg = dc->res_pool->dccg; bool viewport_changed = false; + enum mall_stream_type pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe_ctx); if (pipe_ctx->update_flags.bits.dppclk) dpp->funcs->dpp_dppclk_control(dpp, false, true); @@ -1705,7 +1710,7 @@ static void dcn20_update_dchubp_dpp( pipe_ctx->update_flags.bits.plane_changed || plane_state->update_flags.bits.addr_update) { if (resource_is_pipe_type(pipe_ctx, OTG_MASTER) && - pipe_ctx->stream->mall_stream_config.type == SUBVP_MAIN) { + pipe_mall_type == SUBVP_MAIN) { union block_sequence_params params; params.subvp_save_surf_addr.dc_dmub_srv = dc->ctx->dmub_srv; @@ -1719,7 +1724,7 @@ static void dcn20_update_dchubp_dpp( if (pipe_ctx->update_flags.bits.enable) hubp->funcs->set_blank(hubp, false); /* If the stream paired with this plane is phantom, the plane is also phantom */ - if (pipe_ctx->stream && pipe_ctx->stream->mall_stream_config.type == SUBVP_PHANTOM + if (pipe_ctx->stream && pipe_mall_type == SUBVP_PHANTOM && hubp->funcs->phantom_hubp_post_enable) hubp->funcs->phantom_hubp_post_enable(hubp); } @@ -1926,15 +1931,16 @@ void dcn20_program_front_end_for_ctx( struct dc_stream_state *stream = dc->current_state->res_ctx.pipe_ctx[i].stream; if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable && stream && - dc->current_state->res_ctx.pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM) { + dc_state_get_pipe_subvp_type(dc->current_state, &dc->current_state->res_ctx.pipe_ctx[i]) == SUBVP_PHANTOM) { struct timing_generator *tg = dc->current_state->res_ctx.pipe_ctx[i].stream_res.tg; if (tg->funcs->enable_crtc) { if (dc->hwss.blank_phantom) { int main_pipe_width, main_pipe_height; + struct dc_stream_state *phantom_stream = dc_state_get_paired_subvp_stream(dc->current_state, dc->current_state->res_ctx.pipe_ctx[i].stream); - main_pipe_width = dc->current_state->res_ctx.pipe_ctx[i].stream->mall_stream_config.paired_stream->dst.width; - main_pipe_height = dc->current_state->res_ctx.pipe_ctx[i].stream->mall_stream_config.paired_stream->dst.height; + main_pipe_width = phantom_stream->dst.width; + main_pipe_height = phantom_stream->dst.height; dc->hwss.blank_phantom(dc, tg, main_pipe_width, main_pipe_height); } tg->funcs->enable_crtc(tg); @@ -1963,7 +1969,7 @@ void dcn20_program_front_end_for_ctx( * DET allocation. */ if (hubbub->funcs->program_det_size && (context->res_ctx.pipe_ctx[i].update_flags.bits.disable || - (context->res_ctx.pipe_ctx[i].plane_state && context->res_ctx.pipe_ctx[i].plane_state->is_phantom))) + (context->res_ctx.pipe_ctx[i].plane_state && dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[i]) == SUBVP_PHANTOM))) hubbub->funcs->program_det_size(hubbub, dc->current_state->res_ctx.pipe_ctx[i].plane_res.hubp->inst, 0); hws->funcs.plane_atomic_disconnect(dc, &dc->current_state->res_ctx.pipe_ctx[i]); DC_LOG_DC("Reset mpcc for pipe %d\n", dc->current_state->res_ctx.pipe_ctx[i].pipe_idx); @@ -1988,7 +1994,7 @@ void dcn20_program_front_end_for_ctx( * but the MPO still exists until the double buffered update of the main pipe so we * will get a frame of underflow if the phantom pipe is programmed here. */ - if (pipe->stream && pipe->stream->mall_stream_config.type != SUBVP_PHANTOM) + if (pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM) dcn20_program_pipe(dc, pipe, context); } @@ -2050,7 +2056,7 @@ void dcn20_post_unlock_program_front_end( struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; // Don't check flip pending on phantom pipes if (pipe->plane_state && !pipe->top_pipe && pipe->update_flags.bits.enable && - pipe->stream->mall_stream_config.type != SUBVP_PHANTOM) { + dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM) { struct hubp *hubp = pipe->plane_res.hubp; int j = 0; for (j = 0; j < TIMEOUT_FOR_PIPE_ENABLE_US / polling_interval_us @@ -2073,7 +2079,7 @@ void dcn20_post_unlock_program_front_end( * programming sequence). */ while (pipe) { - if (pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { + if (pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) { /* When turning on the phantom pipe we want to run through the * entire enable sequence, so apply all the "enable" flags. */ @@ -2143,7 +2149,7 @@ void dcn20_prepare_bandwidth( struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; // At optimize don't restore the original watermark value - if (pipe->stream && pipe->stream->mall_stream_config.type != SUBVP_NONE) { + if (pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_NONE) { context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 4U * 1000U * 1000U * 1000U; break; } @@ -2187,7 +2193,7 @@ void dcn20_optimize_bandwidth( struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; // At optimize don't need to restore the original watermark value - if (pipe->stream && pipe->stream->mall_stream_config.type != SUBVP_NONE) { + if (pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_NONE) { context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 4U * 1000U * 1000U * 1000U; break; } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c index d337578d7e73..327c227a10ea 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c @@ -51,7 +51,7 @@ #include "dcn20/dcn20_hwseq.h" #include "dcn30/dcn30_resource.h" #include "link.h" - +#include "dc_state_priv.h" @@ -966,7 +966,7 @@ void dcn30_hardware_release(struct dc *dc) if (!pipe->stream) continue; - if (pipe->stream->mall_stream_config.type == SUBVP_MAIN) { + if (dc_state_get_pipe_subvp_type(dc->current_state, pipe) == SUBVP_MAIN) { subvp_in_use = true; break; } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c index cb9d8389329f..d3d16c7af38c 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c @@ -51,6 +51,7 @@ #include "dcn32/dcn32_resource.h" #include "link.h" #include "../dcn20/dcn20_hwseq.h" +#include "dc_state_priv.h" #define DC_LOGGER_INIT(logger) @@ -349,7 +350,7 @@ void dcn32_commit_subvp_config(struct dc *dc, struct dc_state *context) struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; if (pipe_ctx->stream && pipe_ctx->stream->mall_stream_config.paired_stream && - pipe_ctx->stream->mall_stream_config.type == SUBVP_MAIN) { + dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_MAIN) { // There is at least 1 SubVP pipe, so enable SubVP enable_subvp = true; break; @@ -375,18 +376,20 @@ void dcn32_subvp_pipe_control_lock(struct dc *dc, bool subvp_immediate_flip = false; bool subvp_in_use = false; struct pipe_ctx *pipe; + enum mall_stream_type pipe_mall_type = SUBVP_NONE; for (i = 0; i < dc->res_pool->pipe_count; i++) { pipe = &context->res_ctx.pipe_ctx[i]; + pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe); - if (pipe->stream && pipe->plane_state && pipe->stream->mall_stream_config.type == SUBVP_MAIN) { + if (pipe->stream && pipe->plane_state && pipe_mall_type == SUBVP_MAIN) { subvp_in_use = true; break; } } if (top_pipe_to_program && top_pipe_to_program->stream && top_pipe_to_program->plane_state) { - if (top_pipe_to_program->stream->mall_stream_config.type == SUBVP_MAIN && + if (dc_state_get_pipe_subvp_type(context, top_pipe_to_program) == SUBVP_MAIN && top_pipe_to_program->plane_state->flip_immediate) subvp_immediate_flip = true; } @@ -398,7 +401,7 @@ void dcn32_subvp_pipe_control_lock(struct dc *dc, if (!lock) { for (i = 0; i < dc->res_pool->pipe_count; i++) { pipe = &context->res_ctx.pipe_ctx[i]; - if (pipe->stream && pipe->plane_state && pipe->stream->mall_stream_config.type == SUBVP_MAIN && + if (pipe->stream && pipe->plane_state && pipe_mall_type == SUBVP_MAIN && should_lock_all_pipes) pipe->stream_res.tg->funcs->wait_for_state(pipe->stream_res.tg, CRTC_STATE_VBLANK); } @@ -420,7 +423,7 @@ void dcn32_subvp_pipe_control_lock_fast(union block_sequence_params *params) bool subvp_immediate_flip = false; if (pipe_ctx && pipe_ctx->stream && pipe_ctx->plane_state) { - if (pipe_ctx->stream->mall_stream_config.type == SUBVP_MAIN && + if (dc_state_get_pipe_subvp_type(NULL, pipe_ctx) == SUBVP_MAIN && pipe_ctx->plane_state->flip_immediate) subvp_immediate_flip = true; } @@ -609,7 +612,7 @@ void dcn32_update_force_pstate(struct dc *dc, struct dc_state *context) struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; struct hubp *hubp = pipe->plane_res.hubp; - if (!pipe->stream || !(pipe->stream->mall_stream_config.type == SUBVP_MAIN || + if (!pipe->stream || !(dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN || pipe->stream->fpo_in_use)) { if (hubp && hubp->funcs->hubp_update_force_pstate_disallow) hubp->funcs->hubp_update_force_pstate_disallow(hubp, false); @@ -624,7 +627,7 @@ void dcn32_update_force_pstate(struct dc *dc, struct dc_state *context) struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; struct hubp *hubp = pipe->plane_res.hubp; - if (pipe->stream && (pipe->stream->mall_stream_config.type == SUBVP_MAIN || + if (pipe->stream && (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN || pipe->stream->fpo_in_use)) { if (hubp && hubp->funcs->hubp_update_force_pstate_disallow) hubp->funcs->hubp_update_force_pstate_disallow(hubp, true); @@ -671,8 +674,8 @@ void dcn32_update_mall_sel(struct dc *dc, struct dc_state *context) if (cursor_size > 16384) cache_cursor = true; - if (pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { - hubp->funcs->hubp_update_mall_sel(hubp, 1, false); + if (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) { + hubp->funcs->hubp_update_mall_sel(hubp, 1, false); } else { // MALL not supported with Stereo3D hubp->funcs->hubp_update_mall_sel(hubp, @@ -714,9 +717,8 @@ void dcn32_program_mall_pipe_config(struct dc *dc, struct dc_state *context) * see if CURSOR_REQ_MODE will be back to 1 for SubVP * when it should be 0 for MPO */ - if (pipe->stream->mall_stream_config.type == SUBVP_MAIN) { + if (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) hubp->funcs->hubp_prepare_subvp_buffering(hubp, true); - } } } } @@ -1228,7 +1230,7 @@ void dcn32_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_ continue; if ((pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal)) - && pipe->stream->mall_stream_config.type != SUBVP_PHANTOM) { + && dc_state_get_pipe_subvp_type(dc->current_state, pipe) != SUBVP_PHANTOM) { pipe->stream_res.tg->funcs->disable_crtc(pipe->stream_res.tg); reset_sync_context_for_pipe(dc, context, i); otg_disabled[i] = true; @@ -1379,7 +1381,7 @@ void dcn32_update_phantom_vp_position(struct dc *dc, for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - if (pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_MAIN && + if (pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN && pipe->stream->mall_stream_config.paired_stream == phantom_pipe->stream) { if (pipe->plane_state && pipe->plane_state->update_flags.bits.position_change) { @@ -1405,7 +1407,7 @@ void dcn32_update_phantom_vp_position(struct dc *dc, void dcn32_apply_update_flags_for_phantom(struct pipe_ctx *phantom_pipe) { phantom_pipe->update_flags.raw = 0; - if (phantom_pipe->stream && phantom_pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { + if (dc_state_get_pipe_subvp_type(NULL, phantom_pipe) == SUBVP_PHANTOM) { if (resource_is_pipe_type(phantom_pipe, DPP_PIPE)) { phantom_pipe->update_flags.bits.enable = 1; phantom_pipe->update_flags.bits.mpcc = 1; @@ -1491,8 +1493,8 @@ void dcn32_enable_phantom_streams(struct dc *dc, struct dc_state *context) * pipe, wait for the double buffer update to complete first before we do * ANY phantom pipe programming. */ - if (pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM && - old_pipe->stream && old_pipe->stream->mall_stream_config.type != SUBVP_PHANTOM) { + if (pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM && + old_pipe->stream && dc_state_get_pipe_subvp_type(dc->current_state, old_pipe) != SUBVP_PHANTOM) { old_pipe->stream_res.tg->funcs->wait_for_state( old_pipe->stream_res.tg, CRTC_STATE_VBLANK); @@ -1504,7 +1506,7 @@ void dcn32_enable_phantom_streams(struct dc *dc, struct dc_state *context) for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i]; - if (new_pipe->stream && new_pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { + if (new_pipe->stream && dc_state_get_pipe_subvp_type(context, new_pipe) == SUBVP_PHANTOM) { // If old context or new context has phantom pipes, apply // the phantom timings now. We can't change the phantom // pipe configuration safely without driver acquiring diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index f48001317fab..56a4bc476684 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -56,6 +56,7 @@ #include "dcn30/dcn30_cm_common.h" #include "dcn31/dcn31_hwseq.h" #include "dcn20/dcn20_hwseq.h" +#include "dc_state_priv.h" #define DC_LOGGER_INIT(logger) \ struct dal_logger *dc_logger = logger @@ -948,7 +949,7 @@ void dcn35_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx) void dcn35_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx) { struct dce_hwseq *hws = dc->hwseq; - bool is_phantom = pipe_ctx->plane_state && pipe_ctx->plane_state->is_phantom; + bool is_phantom = dc_state_get_pipe_subvp_type(NULL, pipe_ctx) == SUBVP_PHANTOM; struct timing_generator *tg = is_phantom ? pipe_ctx->stream_res.tg : NULL; DC_LOGGER_INIT(dc->ctx->logger); diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h index 0458d2d749f4..c7a00a28c3b0 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/resource.h +++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h @@ -622,5 +622,4 @@ enum dc_status update_dp_encoder_resources_for_test_harness(const struct dc *dc, struct pipe_ctx *pipe_ctx); bool check_subvp_sw_cursor_fallback_req(const struct dc *dc, struct dc_stream_state *stream); - #endif /* DRIVERS_GPU_DRM_AMD_DC_DEV_DC_INC_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c index f6cbcc9b4006..dbcb9c5ea9af 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c @@ -89,6 +89,8 @@ #include "dcn20/dcn20_vmid.h" #include "dml/dcn32/dcn32_fpu.h" +#include "dc_state_priv.h" + #include "dml2/dml2_wrapper.h" #define DC_LOGGER_INIT(logger) @@ -1644,7 +1646,7 @@ static void dcn32_enable_phantom_plane(struct dc *dc, if (curr_pipe->top_pipe && curr_pipe->top_pipe->plane_state == curr_pipe->plane_state) phantom_plane = prev_phantom_plane; else - phantom_plane = dc_create_plane_state(dc); + phantom_plane = dc_state_create_phantom_plane(dc, context, curr_pipe->plane_state); memcpy(&phantom_plane->address, &curr_pipe->plane_state->address, sizeof(phantom_plane->address)); memcpy(&phantom_plane->scaling_quality, &curr_pipe->plane_state->scaling_quality, @@ -1665,9 +1667,7 @@ static void dcn32_enable_phantom_plane(struct dc *dc, phantom_plane->clip_rect.y = 0; phantom_plane->clip_rect.height = phantom_stream->src.height; - phantom_plane->is_phantom = true; - - dc_add_plane_to_context(dc, phantom_stream, phantom_plane, context); + dc_state_add_phantom_plane(dc, phantom_stream, phantom_plane, context); curr_pipe = curr_pipe->bottom_pipe; prev_phantom_plane = phantom_plane; @@ -1683,13 +1683,7 @@ static struct dc_stream_state *dcn32_enable_phantom_stream(struct dc *dc, struct dc_stream_state *phantom_stream = NULL; struct pipe_ctx *ref_pipe = &context->res_ctx.pipe_ctx[dc_pipe_idx]; - phantom_stream = dc_create_stream_for_sink(ref_pipe->stream->sink); - phantom_stream->signal = SIGNAL_TYPE_VIRTUAL; - phantom_stream->dpms_off = true; - phantom_stream->mall_stream_config.type = SUBVP_PHANTOM; - phantom_stream->mall_stream_config.paired_stream = ref_pipe->stream; - ref_pipe->stream->mall_stream_config.type = SUBVP_MAIN; - ref_pipe->stream->mall_stream_config.paired_stream = phantom_stream; + phantom_stream = dc_state_create_phantom_stream(dc, context, ref_pipe->stream); /* stream has limited viewport and small timing */ memcpy(&phantom_stream->timing, &ref_pipe->stream->timing, sizeof(phantom_stream->timing)); @@ -1699,7 +1693,7 @@ static struct dc_stream_state *dcn32_enable_phantom_stream(struct dc *dc, dcn32_set_phantom_stream_timing(dc, context, ref_pipe, phantom_stream, pipes, pipe_cnt, dc_pipe_idx); DC_FP_END(); - dc_add_stream_to_ctx(dc, context, phantom_stream); + dc_state_add_phantom_stream(dc, context, phantom_stream, ref_pipe->stream); return phantom_stream; } @@ -1714,7 +1708,7 @@ void dcn32_retain_phantom_pipes(struct dc *dc, struct dc_state *context) if (resource_is_pipe_type(pipe, OTG_MASTER) && resource_is_pipe_type(pipe, DPP_PIPE) && - pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { + dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) { phantom_plane = pipe->plane_state; phantom_stream = pipe->stream; @@ -1735,41 +1729,33 @@ bool dcn32_remove_phantom_pipes(struct dc *dc, struct dc_state *context, bool fa for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; // build scaling params for phantom pipes - if (pipe->plane_state && pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { + if (pipe->plane_state && pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) { phantom_plane = pipe->plane_state; phantom_stream = pipe->stream; - dc_rem_all_planes_for_stream(dc, pipe->stream, context); - dc_remove_stream_from_ctx(dc, context, pipe->stream); + dc_state_rem_all_planes_for_stream(dc, pipe->stream, context); + + /* For non-full updates, a shallow copy of the current state + * is created. In this case we don't want to erase the current + * state (there can be 2 HIRQL threads, one in flip, and one in + * checkMPO) that can cause a race condition. + * + * This is just a workaround, needs a proper fix. + */ + if (!fast_update) + dc_state_remove_phantom_stream(dc, context, pipe->stream); + else + dc_state_remove_stream(dc, context, pipe->stream); /* Ref count is incremented on allocation and also when added to the context. * Therefore we must call release for the the phantom plane and stream once * they are removed from the ctx to finally decrement the refcount to 0 to free. */ - dc_plane_state_release(phantom_plane); - dc_stream_release(phantom_stream); + dc_state_release_phantom_plane(dc, context, phantom_plane); + dc_state_release_phantom_stream(dc, context, phantom_stream); removed_pipe = true; } - - /* For non-full updates, a shallow copy of the current state - * is created. In this case we don't want to erase the current - * state (there can be 2 HIRQL threads, one in flip, and one in - * checkMPO) that can cause a race condition. - * - * This is just a workaround, needs a proper fix. - */ - if (!fast_update) { - // Clear all phantom stream info - if (pipe->stream) { - pipe->stream->mall_stream_config.type = SUBVP_NONE; - pipe->stream->mall_stream_config.paired_stream = NULL; - } - - if (pipe->plane_state) { - pipe->plane_state->is_phantom = false; - } - } } return removed_pipe; } @@ -1798,7 +1784,7 @@ void dcn32_add_phantom_pipes(struct dc *dc, struct dc_state *context, // We determine which phantom pipes were added by comparing with // the phantom stream. if (pipe->plane_state && pipe->stream && pipe->stream == phantom_stream && - pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { + dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) { pipe->stream->use_dynamic_meta = false; pipe->plane_state->flip_immediate = false; if (!resource_build_scaling_params(pipe)) { @@ -1933,7 +1919,7 @@ int dcn32_populate_dml_pipes_from_context( * This is just a workaround -- needs a proper fix. */ if (!fast_validate) { - switch (pipe->stream->mall_stream_config.type) { + switch (dc_state_get_pipe_subvp_type(context, pipe)) { case SUBVP_MAIN: pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_sub_viewport; subvp_in_use = true; @@ -2454,16 +2440,19 @@ static bool dcn32_resource_construct( dc->dml2_options.callbacks.get_opp_head = &resource_get_opp_head; dc->dml2_options.svp_pstate.callbacks.dc = dc; - dc->dml2_options.svp_pstate.callbacks.add_plane_to_context = &dc_add_plane_to_context; - dc->dml2_options.svp_pstate.callbacks.add_stream_to_ctx = &dc_add_stream_to_ctx; + dc->dml2_options.svp_pstate.callbacks.add_phantom_plane = &dc_state_add_phantom_plane; + dc->dml2_options.svp_pstate.callbacks.add_phantom_stream = &dc_state_add_phantom_stream; dc->dml2_options.svp_pstate.callbacks.build_scaling_params = &resource_build_scaling_params; - dc->dml2_options.svp_pstate.callbacks.create_plane = &dc_create_plane_state; - dc->dml2_options.svp_pstate.callbacks.remove_plane_from_context = &dc_remove_plane_from_context; - dc->dml2_options.svp_pstate.callbacks.remove_stream_from_ctx = &dc_remove_stream_from_ctx; - dc->dml2_options.svp_pstate.callbacks.create_stream_for_sink = &dc_create_stream_for_sink; - dc->dml2_options.svp_pstate.callbacks.plane_state_release = &dc_plane_state_release; - dc->dml2_options.svp_pstate.callbacks.stream_release = &dc_stream_release; + dc->dml2_options.svp_pstate.callbacks.create_phantom_plane = &dc_state_create_phantom_plane; + dc->dml2_options.svp_pstate.callbacks.remove_phantom_plane = &dc_state_remove_phantom_plane; + dc->dml2_options.svp_pstate.callbacks.remove_phantom_stream = &dc_state_remove_phantom_stream; + dc->dml2_options.svp_pstate.callbacks.create_phantom_stream = &dc_state_create_phantom_stream; + dc->dml2_options.svp_pstate.callbacks.release_phantom_plane = &dc_state_release_phantom_plane; + dc->dml2_options.svp_pstate.callbacks.release_phantom_stream = &dc_state_release_phantom_stream; dc->dml2_options.svp_pstate.callbacks.release_dsc = &dcn20_release_dsc; + dc->dml2_options.svp_pstate.callbacks.get_pipe_subvp_type = &dc_state_get_pipe_subvp_type; + dc->dml2_options.svp_pstate.callbacks.get_stream_subvp_type = &dc_state_get_stream_subvp_type; + dc->dml2_options.svp_pstate.callbacks.get_paired_subvp_stream = &dc_state_get_paired_subvp_stream; dc->dml2_options.svp_pstate.subvp_fw_processing_delay_us = dc->caps.subvp_fw_processing_delay_us; dc->dml2_options.svp_pstate.subvp_prefetch_end_to_mall_start_us = dc->caps.subvp_prefetch_end_to_mall_start_us; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c index 12986fe0b289..8c278c2fd287 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c @@ -92,6 +92,8 @@ #include "vm_helper.h" #include "dcn20/dcn20_vmid.h" +#include "dc_state_priv.h" + #define DC_LOGGER_INIT(logger) enum dcn321_clk_src_array_id { @@ -2008,16 +2010,19 @@ static bool dcn321_resource_construct( dc->dml2_options.callbacks.get_opp_head = &resource_get_opp_head; dc->dml2_options.svp_pstate.callbacks.dc = dc; - dc->dml2_options.svp_pstate.callbacks.add_plane_to_context = &dc_add_plane_to_context; - dc->dml2_options.svp_pstate.callbacks.add_stream_to_ctx = &dc_add_stream_to_ctx; + dc->dml2_options.svp_pstate.callbacks.add_phantom_plane = &dc_state_add_phantom_plane; + dc->dml2_options.svp_pstate.callbacks.add_phantom_stream = &dc_state_add_phantom_stream; dc->dml2_options.svp_pstate.callbacks.build_scaling_params = &resource_build_scaling_params; - dc->dml2_options.svp_pstate.callbacks.create_plane = &dc_create_plane_state; - dc->dml2_options.svp_pstate.callbacks.remove_plane_from_context = &dc_remove_plane_from_context; - dc->dml2_options.svp_pstate.callbacks.remove_stream_from_ctx = &dc_remove_stream_from_ctx; - dc->dml2_options.svp_pstate.callbacks.create_stream_for_sink = &dc_create_stream_for_sink; - dc->dml2_options.svp_pstate.callbacks.plane_state_release = &dc_plane_state_release; - dc->dml2_options.svp_pstate.callbacks.stream_release = &dc_stream_release; + dc->dml2_options.svp_pstate.callbacks.create_phantom_plane = &dc_state_create_phantom_plane; + dc->dml2_options.svp_pstate.callbacks.remove_phantom_plane = &dc_state_remove_phantom_plane; + dc->dml2_options.svp_pstate.callbacks.remove_phantom_stream = &dc_state_remove_phantom_stream; + dc->dml2_options.svp_pstate.callbacks.create_phantom_stream = &dc_state_create_phantom_stream; + dc->dml2_options.svp_pstate.callbacks.release_phantom_plane = &dc_state_release_phantom_plane; + dc->dml2_options.svp_pstate.callbacks.release_phantom_stream = &dc_state_release_phantom_stream; dc->dml2_options.svp_pstate.callbacks.release_dsc = &dcn20_release_dsc; + dc->dml2_options.svp_pstate.callbacks.get_pipe_subvp_type = &dc_state_get_pipe_subvp_type; + dc->dml2_options.svp_pstate.callbacks.get_stream_subvp_type = &dc_state_get_stream_subvp_type; + dc->dml2_options.svp_pstate.callbacks.get_paired_subvp_stream = &dc_state_get_paired_subvp_stream; dc->dml2_options.svp_pstate.subvp_fw_processing_delay_us = dc->caps.subvp_fw_processing_delay_us; dc->dml2_options.svp_pstate.subvp_prefetch_end_to_mall_start_us = dc->caps.subvp_prefetch_end_to_mall_start_us; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c index 4e1db842b98c..d0c3c5b47094 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c @@ -102,6 +102,8 @@ #include "vm_helper.h" #include "dcn20/dcn20_vmid.h" +#include "dc_state_priv.h" + #include "link_enc_cfg.h" #define DC_LOGGER_INIT(logger) -- cgit v1.2.3 From 012a04b1d6af629077bf98e172d946bf893a4726 Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Tue, 21 Nov 2023 15:07:01 -0500 Subject: drm/amd/display: Refactor phantom resource allocation [WHY?] Phantom streams and planes were previously not referenced explcitly on creation. [HOW?] To reduce memory management complexity, add an additional phantom streams and planes reference into dc_state, and move mall_stream_config to stream_status inside the state to make it safe to modify in shallow copies. Also consildates any logic that is affected by this change to dc_state. Reviewed-by: Nicholas Kazlauskas Reviewed-by: Jun Lei Acked-by: Wayne Lin Signed-off-by: Dillon Varone Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 9 +- drivers/gpu/drm/amd/display/dc/core/dc.c | 91 ++--- .../gpu/drm/amd/display/dc/core/dc_hw_sequencer.c | 11 +- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 128 +----- drivers/gpu/drm/amd/display/dc/core/dc_state.c | 428 ++++++++++++++++++--- drivers/gpu/drm/amd/display/dc/core/dc_stream.c | 13 +- drivers/gpu/drm/amd/display/dc/dc.h | 18 +- drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 7 +- drivers/gpu/drm/amd/display/dc/dc_state.h | 6 +- drivers/gpu/drm/amd/display/dc/dc_state_priv.h | 15 +- drivers/gpu/drm/amd/display/dc/dc_stream.h | 30 +- drivers/gpu/drm/amd/display/dc/dc_stream_priv.h | 2 + .../amd/display/dc/dcn32/dcn32_resource_helpers.c | 65 ---- .../gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 6 +- .../drm/amd/display/dc/dml2/dml2_mall_phantom.c | 30 +- drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h | 5 +- .../drm/amd/display/dc/hwss/dce110/dce110_hwseq.c | 7 +- .../drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c | 25 +- .../drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.h | 7 +- .../drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c | 27 +- .../drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h | 2 +- .../drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.c | 8 +- .../drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.h | 2 +- .../drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c | 42 +- .../drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c | 8 +- .../drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h | 2 +- drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h | 8 +- .../drm/amd/display/dc/hwss/hw_sequencer_private.h | 1 + drivers/gpu/drm/amd/display/dc/inc/core_types.h | 20 +- drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h | 1 + .../amd/display/dc/resource/dcn32/dcn32_resource.c | 85 ---- .../amd/display/dc/resource/dcn32/dcn32_resource.h | 14 - .../display/dc/resource/dcn321/dcn321_resource.c | 4 - 33 files changed, 578 insertions(+), 549 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index b3ffb25dc6b1..fc230ecc7180 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2608,12 +2608,10 @@ static enum dc_status amdgpu_dm_commit_zero_streams(struct dc *dc) memset(del_streams, 0, sizeof(del_streams)); - context = dc_state_create(dc); + context = dc_state_create_current_copy(dc); if (context == NULL) goto context_alloc_fail; - dc_resource_state_copy_construct_current(dc, context); - /* First remove from context all streams */ for (i = 0; i < context->stream_count; i++) { struct dc_stream_state *stream = context->streams[i]; @@ -2923,7 +2921,6 @@ static int dm_resume(void *handle) dc_state_release(dm_state->context); dm_state->context = dc_state_create(dm->dc); /* TODO: Remove dc_state->dccg, use dc->dccg directly. */ - dc_resource_state_construct(dm->dc, dm_state->context); /* Before powering on DC we need to re-initialize DMUB. */ dm_dmub_hw_resume(adev); @@ -4051,14 +4048,12 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev) if (!state) return -ENOMEM; - state->context = dc_state_create(adev->dm.dc); + state->context = dc_state_create_current_copy(adev->dm.dc); if (!state->context) { kfree(state); return -ENOMEM; } - dc_resource_state_copy_construct_current(adev->dm.dc, state->context); - drm_atomic_private_obj_init(adev_to_drm(adev), &adev->dm.atomic_obj, &state->base, diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index f5250022b98e..630a55b2c1d4 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1046,8 +1046,6 @@ static bool dc_construct(struct dc *dc, if (!create_link_encoders(dc)) goto fail; - dc_resource_state_construct(dc, dc->current_state); - return true; fail: @@ -1120,7 +1118,7 @@ static void dc_update_viusal_confirm_color(struct dc *dc, struct dc_state *conte static void disable_dangling_plane(struct dc *dc, struct dc_state *context) { int i, j; - struct dc_state *dangling_context = dc_state_create(dc); + struct dc_state *dangling_context = dc_state_create_current_copy(dc); struct dc_state *current_ctx; struct pipe_ctx *pipe; struct timing_generator *tg; @@ -1128,8 +1126,6 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context) if (dangling_context == NULL) return; - dc_resource_state_copy_construct(dc->current_state, dangling_context); - for (i = 0; i < dc->res_pool->pipe_count; i++) { struct dc_stream_state *old_stream = dc->current_state->res_ctx.pipe_ctx[i].stream; @@ -1187,7 +1183,11 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context) tg->funcs->enable_crtc(tg); } } - dc_state_rem_all_planes_for_stream(dc, old_stream, dangling_context); + + if (is_phantom) + dc_state_rem_all_phantom_planes_for_stream(dc, old_stream, dangling_context, true); + else + dc_state_rem_all_planes_for_stream(dc, old_stream, dangling_context); disable_all_writeback_pipes_for_stream(dc, old_stream, dangling_context); if (pipe->stream && pipe->plane_state) @@ -1562,7 +1562,7 @@ static void program_timing_sync( if (group_size > 1) { if (sync_type == TIMING_SYNCHRONIZABLE) { dc->hwss.enable_timing_synchronization( - dc, group_index, group_size, pipe_set); + dc, ctx, group_index, group_size, pipe_set); } else if (sync_type == VBLANK_SYNCHRONIZABLE) { dc->hwss.enable_vblanks_synchronization( @@ -2075,12 +2075,10 @@ enum dc_status dc_commit_streams(struct dc *dc, if (handle_exit_odm2to1) res = commit_minimal_transition_state(dc, dc->current_state); - context = dc_state_create(dc); + context = dc_state_create_current_copy(dc); if (!context) goto context_alloc_fail; - dc_resource_state_copy_construct_current(dc, context); - res = dc_validate_with_context(dc, set, stream_count, context, false); if (res != DC_OK) { BREAK_TO_DEBUGGER(); @@ -2218,7 +2216,7 @@ void dc_post_update_surfaces_to_stream(struct dc *dc) if (context->res_ctx.pipe_ctx[i].stream == NULL || context->res_ctx.pipe_ctx[i].plane_state == NULL) { context->res_ctx.pipe_ctx[i].pipe_idx = i; - dc->hwss.disable_plane(dc, &context->res_ctx.pipe_ctx[i]); + dc->hwss.disable_plane(dc, context, &context->res_ctx.pipe_ctx[i]); } process_deferred_updates(dc); @@ -2899,11 +2897,9 @@ static void copy_stream_update_to_stream(struct dc *dc, update->dsc_config->num_slices_v != 0); /* Use temporarry context for validating new DSC config */ - struct dc_state *dsc_validate_context = dc_state_create(dc); + struct dc_state *dsc_validate_context = dc_state_create_copy(dc->current_state); if (dsc_validate_context) { - dc_resource_state_copy_construct(dc->current_state, dsc_validate_context); - stream->timing.dsc_cfg = *update->dsc_config; stream->timing.flags.DSC = enable_dsc; if (!dc->res_pool->funcs->validate_bandwidth(dc, dsc_validate_context, true)) { @@ -3011,20 +3007,17 @@ static bool update_planes_and_stream_state(struct dc *dc, new_planes[i] = srf_updates[i].surface; /* initialize scratch memory for building context */ - context = dc_state_create(dc); + context = dc_state_create_copy(dc->current_state); if (context == NULL) { DC_ERROR("Failed to allocate new validate context!\n"); return false; } - dc_resource_state_copy_construct( - dc->current_state, context); - /* For each full update, remove all existing phantom pipes first. * Ensures that we have enough pipes for newly added MPO planes */ - if (dc->res_pool->funcs->remove_phantom_pipes) - dc->res_pool->funcs->remove_phantom_pipes(dc, context, false); + dc_state_remove_phantom_streams_and_planes(dc, context); + dc_state_release_phantom_streams_and_planes(dc, context); /*remove old surfaces from context */ if (!dc_state_rem_all_planes_for_stream(dc, stream, context)) { @@ -3059,19 +3052,6 @@ static bool update_planes_and_stream_state(struct dc *dc, if (update_type == UPDATE_TYPE_FULL) { if (!dc->res_pool->funcs->validate_bandwidth(dc, context, false)) { - /* For phantom pipes we remove and create a new set of phantom pipes - * for each full update (because we don't know if we'll need phantom - * pipes until after the first round of validation). However, if validation - * fails we need to keep the existing phantom pipes (because we don't update - * the dc->current_state). - * - * The phantom stream/plane refcount is decremented for validation because - * we assume it'll be removed (the free comes when the dc_state is freed), - * but if validation fails we have to increment back the refcount so it's - * consistent. - */ - if (dc->res_pool->funcs->retain_phantom_pipes) - dc->res_pool->funcs->retain_phantom_pipes(dc, dc->current_state); BREAK_TO_DEBUGGER(); goto fail; } @@ -3390,6 +3370,7 @@ static void commit_planes_for_stream_fast(struct dc *dc, { int i, j; struct pipe_ctx *top_pipe_to_program = NULL; + struct dc_stream_status *stream_status = NULL; dc_z10_restore(dc); top_pipe_to_program = resource_get_otg_master_for_stream( @@ -3425,6 +3406,8 @@ static void commit_planes_for_stream_fast(struct dc *dc, } } + stream_status = dc_state_get_stream_status(context, stream); + build_dmub_cmd_list(dc, srf_updates, surface_count, @@ -3437,7 +3420,8 @@ static void commit_planes_for_stream_fast(struct dc *dc, context->dmub_cmd_count, context->block_sequence, &(context->block_sequence_steps), - top_pipe_to_program); + top_pipe_to_program, + stream_status); hwss_execute_sequence(dc, context->block_sequence, context->block_sequence_steps); @@ -3974,7 +3958,7 @@ static void release_minimal_transition_state(struct dc *dc, static struct dc_state *create_minimal_transition_state(struct dc *dc, struct dc_state *base_context, struct pipe_split_policy_backup *policy) { - struct dc_state *minimal_transition_context = dc_state_create(dc); + struct dc_state *minimal_transition_context = NULL; unsigned int i, j; if (!dc->config.is_vmin_only_asic) { @@ -3986,7 +3970,7 @@ static struct dc_state *create_minimal_transition_state(struct dc *dc, policy->subvp_policy = dc->debug.force_disable_subvp; dc->debug.force_disable_subvp = true; - dc_resource_state_copy_construct(base_context, minimal_transition_context); + minimal_transition_context = dc_state_create_copy(base_context); /* commit minimal state */ if (dc->res_pool->funcs->validate_bandwidth(dc, minimal_transition_context, false)) { @@ -4018,7 +4002,6 @@ static bool commit_minimal_transition_state_for_windowed_mpo_odm(struct dc *dc, bool success = false; struct dc_state *minimal_transition_context; struct pipe_split_policy_backup policy; - struct mall_temp_config mall_temp_config; /* commit based on new context */ /* Since all phantom pipes are removed in full validation, @@ -4027,8 +4010,6 @@ static bool commit_minimal_transition_state_for_windowed_mpo_odm(struct dc *dc, * pipe as subvp/phantom will be cleared (dc copy constructor * creates a shallow copy). */ - if (dc->res_pool->funcs->save_mall_state) - dc->res_pool->funcs->save_mall_state(dc, context, &mall_temp_config); minimal_transition_context = create_minimal_transition_state(dc, context, &policy); if (minimal_transition_context) { @@ -4041,16 +4022,6 @@ static bool commit_minimal_transition_state_for_windowed_mpo_odm(struct dc *dc, success = dc_commit_state_no_check(dc, minimal_transition_context) == DC_OK; } release_minimal_transition_state(dc, minimal_transition_context, &policy); - if (dc->res_pool->funcs->restore_mall_state) - dc->res_pool->funcs->restore_mall_state(dc, context, &mall_temp_config); - /* If we do a minimal transition with plane removal and the context - * has subvp we also have to retain back the phantom stream / planes - * since the refcount is decremented as part of the min transition - * (we commit a state with no subvp, so the phantom streams / planes - * had to be removed). - */ - if (dc->res_pool->funcs->retain_phantom_pipes) - dc->res_pool->funcs->retain_phantom_pipes(dc, context); } if (!success) { @@ -4383,7 +4354,6 @@ bool dc_update_planes_and_stream(struct dc *dc, struct dc_state *context; enum surface_update_type update_type; int i; - struct mall_temp_config mall_temp_config; struct dc_fast_update fast_update[MAX_SURFACES] = {0}; /* In cases where MPO and split or ODM are used transitions can @@ -4427,23 +4397,10 @@ bool dc_update_planes_and_stream(struct dc *dc, * pipe as subvp/phantom will be cleared (dc copy constructor * creates a shallow copy). */ - if (dc->res_pool->funcs->save_mall_state) - dc->res_pool->funcs->save_mall_state(dc, context, &mall_temp_config); if (!commit_minimal_transition_state(dc, context)) { dc_state_release(context); return false; } - if (dc->res_pool->funcs->restore_mall_state) - dc->res_pool->funcs->restore_mall_state(dc, context, &mall_temp_config); - - /* If we do a minimal transition with plane removal and the context - * has subvp we also have to retain back the phantom stream / planes - * since the refcount is decremented as part of the min transition - * (we commit a state with no subvp, so the phantom streams / planes - * had to be removed). - */ - if (dc->res_pool->funcs->retain_phantom_pipes) - dc->res_pool->funcs->retain_phantom_pipes(dc, context); update_type = UPDATE_TYPE_FULL; } @@ -4559,14 +4516,12 @@ void dc_commit_updates_for_stream(struct dc *dc, if (update_type >= UPDATE_TYPE_FULL) { /* initialize scratch memory for building context */ - context = dc_state_create(dc); + context = dc_state_create_copy(state); if (context == NULL) { DC_ERROR("Failed to allocate new validate context!\n"); return; } - dc_resource_state_copy_construct(state, context); - for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i]; struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i]; @@ -4711,7 +4666,7 @@ void dc_set_power_state( switch (power_state) { case DC_ACPI_CM_POWER_STATE_D0: - dc_resource_state_construct(dc, dc->current_state); + dc_state_construct(dc, dc->current_state); dc_z10_restore(dc); @@ -4726,7 +4681,7 @@ void dc_set_power_state( default: ASSERT(dc->current_state->stream_count == 0); - dc_resource_state_destruct(dc->current_state); + dc_state_destruct(dc->current_state); break; } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c index 170bad6f33c7..48fdfacc413a 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c @@ -473,7 +473,8 @@ void hwss_build_fast_sequence(struct dc *dc, unsigned int dmub_cmd_count, struct block_sequence block_sequence[], int *num_steps, - struct pipe_ctx *pipe_ctx) + struct pipe_ctx *pipe_ctx, + struct dc_stream_status *stream_status) { struct dc_plane_state *plane = pipe_ctx->plane_state; struct dc_stream_state *stream = pipe_ctx->stream; @@ -490,7 +491,8 @@ void hwss_build_fast_sequence(struct dc *dc, if (dc->hwss.subvp_pipe_control_lock_fast) { block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.dc = dc; block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.lock = true; - block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.pipe_ctx = pipe_ctx; + block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.subvp_immediate_flip = + plane->flip_immediate && stream_status->mall_stream_config.type == SUBVP_MAIN; block_sequence[*num_steps].func = DMUB_SUBVP_PIPE_CONTROL_LOCK_FAST; (*num_steps)++; } @@ -529,7 +531,7 @@ void hwss_build_fast_sequence(struct dc *dc, } if (dc->hwss.update_plane_addr && current_mpc_pipe->plane_state->update_flags.bits.addr_update) { if (resource_is_pipe_type(current_mpc_pipe, OTG_MASTER) && - dc_state_get_pipe_subvp_type(NULL, pipe_ctx) == SUBVP_MAIN) { + stream_status->mall_stream_config.type == SUBVP_MAIN) { block_sequence[*num_steps].params.subvp_save_surf_addr.dc_dmub_srv = dc->ctx->dmub_srv; block_sequence[*num_steps].params.subvp_save_surf_addr.addr = ¤t_mpc_pipe->plane_state->address; block_sequence[*num_steps].params.subvp_save_surf_addr.subvp_index = current_mpc_pipe->subvp_index; @@ -612,7 +614,8 @@ void hwss_build_fast_sequence(struct dc *dc, if (dc->hwss.subvp_pipe_control_lock_fast) { block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.dc = dc; block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.lock = false; - block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.pipe_ctx = pipe_ctx; + block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.subvp_immediate_flip = + plane->flip_immediate && stream_status->mall_stream_config.type == SUBVP_MAIN; block_sequence[*num_steps].func = DMUB_SUBVP_PIPE_CONTROL_LOCK_FAST; (*num_steps)++; } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 4a6a7d7557e0..716b59bd03b6 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -42,6 +42,7 @@ #include "link_enc_cfg.h" #include "link.h" #include "clk_mgr.h" +#include "dc_state_priv.h" #include "virtual/virtual_link_hwss.h" #include "link/hwss/link_hwss_dio.h" #include "link/hwss/link_hwss_dpia.h" @@ -3523,34 +3524,6 @@ enum dc_status resource_map_pool_resources( return DC_ERROR_UNEXPECTED; } -/** - * dc_resource_state_copy_construct_current() - Creates a new dc_state from existing state - * - * @dc: copy out of dc->current_state - * @dst_ctx: copy into this - * - * This function makes a shallow copy of the current DC state and increments - * refcounts on existing streams and planes. - */ -void dc_resource_state_copy_construct_current( - const struct dc *dc, - struct dc_state *dst_ctx) -{ - dc_resource_state_copy_construct(dc->current_state, dst_ctx); -} - - -void dc_resource_state_construct( - const struct dc *dc, - struct dc_state *dst_ctx) -{ - dst_ctx->clk_mgr = dc->clk_mgr; - - /* Initialise DIG link encoder resource tracking variables. */ - link_enc_cfg_init(dc, dst_ctx); -} - - bool dc_resource_is_dsc_encoding_supported(const struct dc *dc) { if (dc->res_pool == NULL) @@ -3724,6 +3697,7 @@ enum dc_status dc_validate_with_context(struct dc *dc, unchanged_streams[i], set, set_count)) { + if (!dc_state_rem_all_planes_for_stream(dc, unchanged_streams[i], context)) { @@ -3746,12 +3720,24 @@ enum dc_status dc_validate_with_context(struct dc *dc, } } - if (!dc_state_rem_all_planes_for_stream(dc, del_streams[i], context)) { - res = DC_FAIL_DETACH_SURFACES; - goto fail; + if (dc_state_get_stream_subvp_type(context, del_streams[i]) == SUBVP_PHANTOM) { + /* remove phantoms specifically */ + if (!dc_state_rem_all_phantom_planes_for_stream(dc, del_streams[i], context, true)) { + res = DC_FAIL_DETACH_SURFACES; + goto fail; + } + + res = dc_state_remove_phantom_stream(dc, context, del_streams[i]); + dc_state_release_phantom_stream(dc, context, del_streams[i]); + } else { + if (!dc_state_rem_all_planes_for_stream(dc, del_streams[i], context)) { + res = DC_FAIL_DETACH_SURFACES; + goto fail; + } + + res = dc_state_remove_stream(dc, context, del_streams[i]); } - res = dc_state_remove_stream(dc, context, del_streams[i]); if (res != DC_OK) goto fail; } @@ -4280,84 +4266,6 @@ static void set_vtem_info_packet( *info_packet = stream->vtem_infopacket; } -void dc_resource_state_destruct(struct dc_state *context) -{ - int i, j; - - for (i = 0; i < context->stream_count; i++) { - for (j = 0; j < context->stream_status[i].plane_count; j++) - dc_plane_state_release( - context->stream_status[i].plane_states[j]); - - context->stream_status[i].plane_count = 0; - dc_stream_release(context->streams[i]); - context->streams[i] = NULL; - } - context->stream_count = 0; - context->stream_mask = 0; - memset(&context->res_ctx, 0, sizeof(context->res_ctx)); - memset(&context->pp_display_cfg, 0, sizeof(context->pp_display_cfg)); - memset(&context->dcn_bw_vars, 0, sizeof(context->dcn_bw_vars)); - context->clk_mgr = NULL; - memset(&context->bw_ctx.bw, 0, sizeof(context->bw_ctx.bw)); - memset(context->block_sequence, 0, sizeof(context->block_sequence)); - context->block_sequence_steps = 0; - memset(context->dc_dmub_cmd, 0, sizeof(context->dc_dmub_cmd)); - context->dmub_cmd_count = 0; - memset(&context->perf_params, 0, sizeof(context->perf_params)); - memset(&context->scratch, 0, sizeof(context->scratch)); -} - -void dc_resource_state_copy_construct( - const struct dc_state *src_ctx, - struct dc_state *dst_ctx) -{ - int i, j; - struct kref refcount = dst_ctx->refcount; -#ifdef CONFIG_DRM_AMD_DC_FP - struct dml2_context *dml2 = NULL; - - // Need to preserve allocated dml2 context - if (src_ctx->clk_mgr && src_ctx->clk_mgr->ctx->dc->debug.using_dml2) - dml2 = dst_ctx->bw_ctx.dml2; -#endif - - *dst_ctx = *src_ctx; - -#ifdef CONFIG_DRM_AMD_DC_FP - // Preserve allocated dml2 context - if (src_ctx->clk_mgr && src_ctx->clk_mgr->ctx->dc->debug.using_dml2) - dst_ctx->bw_ctx.dml2 = dml2; -#endif - - for (i = 0; i < MAX_PIPES; i++) { - struct pipe_ctx *cur_pipe = &dst_ctx->res_ctx.pipe_ctx[i]; - - if (cur_pipe->top_pipe) - cur_pipe->top_pipe = &dst_ctx->res_ctx.pipe_ctx[cur_pipe->top_pipe->pipe_idx]; - - if (cur_pipe->bottom_pipe) - cur_pipe->bottom_pipe = &dst_ctx->res_ctx.pipe_ctx[cur_pipe->bottom_pipe->pipe_idx]; - - if (cur_pipe->next_odm_pipe) - cur_pipe->next_odm_pipe = &dst_ctx->res_ctx.pipe_ctx[cur_pipe->next_odm_pipe->pipe_idx]; - - if (cur_pipe->prev_odm_pipe) - cur_pipe->prev_odm_pipe = &dst_ctx->res_ctx.pipe_ctx[cur_pipe->prev_odm_pipe->pipe_idx]; - } - - for (i = 0; i < dst_ctx->stream_count; i++) { - dc_stream_retain(dst_ctx->streams[i]); - for (j = 0; j < dst_ctx->stream_status[i].plane_count; j++) - dc_plane_state_retain( - dst_ctx->stream_status[i].plane_states[j]); - } - - /* context refcount should not be overridden */ - dst_ctx->refcount = refcount; - -} - struct clock_source *dc_resource_find_first_free_pll( struct resource_context *res_ctx, const struct resource_pool *pool) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_state.c b/drivers/gpu/drm/amd/display/dc/core/dc_state.c index ec5b0d114c7c..66a70a60c479 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_state.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_state.c @@ -31,6 +31,7 @@ #include "dm_services.h" #include "resource.h" +#include "link_enc_cfg.h" #include "dml2/dml2_wrapper.h" #include "dml2/dml2_internal_types.h" @@ -39,7 +40,144 @@ dc->ctx->logger #define DC_LOGGER_INIT(logger) -/* Public dc_state functions */ +/* Private dc_state helper functions */ +static bool dc_state_track_phantom_stream(struct dc_state *state, + struct dc_stream_state *phantom_stream) +{ + if (state->phantom_stream_count >= MAX_PHANTOM_PIPES) + return false; + + state->phantom_streams[state->phantom_stream_count++] = phantom_stream; + + return true; +} + +static bool dc_state_untrack_phantom_stream(struct dc_state *state, struct dc_stream_state *phantom_stream) +{ + bool res = false; + int i; + + /* first find phantom stream in the dc_state */ + for (i = 0; i < state->phantom_stream_count; i++) { + if (state->phantom_streams[i] == phantom_stream) { + state->phantom_streams[i] = NULL; + res = true; + break; + } + } + + /* failed to find stream in state */ + if (!res) + return res; + + /* trim back phantom streams */ + state->phantom_stream_count--; + for (; i < state->phantom_stream_count; i++) + state->phantom_streams[i] = state->phantom_streams[i + 1]; + + return res; +} + +static bool dc_state_is_phantom_stream_tracked(struct dc_state *state, struct dc_stream_state *phantom_stream) +{ + int i; + + for (i = 0; i < state->phantom_stream_count; i++) { + if (state->phantom_streams[i] == phantom_stream) + return true; + } + + return false; +} + +static bool dc_state_track_phantom_plane(struct dc_state *state, + struct dc_plane_state *phantom_plane) +{ + if (state->phantom_plane_count >= MAX_PHANTOM_PIPES) + return false; + + state->phantom_planes[state->phantom_plane_count++] = phantom_plane; + + return true; +} + +static bool dc_state_untrack_phantom_plane(struct dc_state *state, struct dc_plane_state *phantom_plane) +{ + bool res = false; + int i; + + /* first find phantom plane in the dc_state */ + for (i = 0; i < state->phantom_plane_count; i++) { + if (state->phantom_planes[i] == phantom_plane) { + state->phantom_planes[i] = NULL; + res = true; + break; + } + } + + /* failed to find plane in state */ + if (!res) + return res; + + /* trim back phantom planes */ + state->phantom_plane_count--; + for (; i < state->phantom_plane_count; i++) + state->phantom_planes[i] = state->phantom_planes[i + 1]; + + return res; +} + +static bool dc_state_is_phantom_plane_tracked(struct dc_state *state, struct dc_plane_state *phantom_plane) +{ + int i; + + for (i = 0; i < state->phantom_plane_count; i++) { + if (state->phantom_planes[i] == phantom_plane) + return true; + } + + return false; +} + +static void dc_state_copy_internal(struct dc_state *dst_state, struct dc_state *src_state) +{ + int i, j; + + memcpy(dst_state, src_state, sizeof(struct dc_state)); + + for (i = 0; i < MAX_PIPES; i++) { + struct pipe_ctx *cur_pipe = &dst_state->res_ctx.pipe_ctx[i]; + + if (cur_pipe->top_pipe) + cur_pipe->top_pipe = &dst_state->res_ctx.pipe_ctx[cur_pipe->top_pipe->pipe_idx]; + + if (cur_pipe->bottom_pipe) + cur_pipe->bottom_pipe = &dst_state->res_ctx.pipe_ctx[cur_pipe->bottom_pipe->pipe_idx]; + + if (cur_pipe->prev_odm_pipe) + cur_pipe->prev_odm_pipe = &dst_state->res_ctx.pipe_ctx[cur_pipe->prev_odm_pipe->pipe_idx]; + + if (cur_pipe->next_odm_pipe) + cur_pipe->next_odm_pipe = &dst_state->res_ctx.pipe_ctx[cur_pipe->next_odm_pipe->pipe_idx]; + } + + /* retain phantoms */ + for (i = 0; i < dst_state->phantom_stream_count; i++) + dc_stream_retain(dst_state->phantom_streams[i]); + + for (i = 0; i < dst_state->phantom_plane_count; i++) + dc_plane_state_retain(dst_state->phantom_planes[i]); + + /* retain streams and planes */ + for (i = 0; i < dst_state->stream_count; i++) { + dc_stream_retain(dst_state->streams[i]); + for (j = 0; j < dst_state->stream_status[i].plane_count; j++) + dc_plane_state_retain( + dst_state->stream_status[i].plane_states[j]); + } + +} + static void init_state(struct dc *dc, struct dc_state *state) { /* Each context must have their own instance of VBA and in order to @@ -49,6 +187,7 @@ static void init_state(struct dc *dc, struct dc_state *state) memcpy(&state->bw_ctx.dml, &dc->dml, sizeof(struct display_mode_lib)); } +/* Public dc_state functions */ struct dc_state *dc_state_create(struct dc *dc) { struct dc_state *state = kvzalloc(sizeof(struct dc_state), @@ -58,6 +197,7 @@ struct dc_state *dc_state_create(struct dc *dc) return NULL; init_state(dc, state); + dc_state_construct(dc, state); #ifdef CONFIG_DRM_AMD_DC_FP if (dc->debug.using_dml2) @@ -69,61 +209,112 @@ struct dc_state *dc_state_create(struct dc *dc) return state; } +void dc_state_copy(struct dc_state *dst_state, struct dc_state *src_state) +{ + struct kref refcount = dst_state->refcount; + + dc_state_copy_internal(dst_state, src_state); + +#ifdef CONFIG_DRM_AMD_DC_FP + if (src_state->bw_ctx.dml2) + dml2_copy(dst_state->bw_ctx.dml2, src_state->bw_ctx.dml2); +#endif + + /* context refcount should not be overridden */ + dst_state->refcount = refcount; +} + struct dc_state *dc_state_create_copy(struct dc_state *src_state) { - int i, j; - struct dc_state *new_state = kvmalloc(sizeof(struct dc_state), GFP_KERNEL); + struct dc_state *new_state; + new_state = kvmalloc(sizeof(struct dc_state), + GFP_KERNEL); if (!new_state) return NULL; - memcpy(new_state, src_state, sizeof(struct dc_state)); + dc_state_copy_internal(new_state, src_state); #ifdef CONFIG_DRM_AMD_DC_FP - if (new_state->bw_ctx.dml2 && !dml2_create_copy(&new_state->bw_ctx.dml2, src_state->bw_ctx.dml2)) { + if (src_state->bw_ctx.dml2 && + !dml2_create_copy(&new_state->bw_ctx.dml2, src_state->bw_ctx.dml2)) { dc_state_release(new_state); return NULL; } #endif - for (i = 0; i < MAX_PIPES; i++) { - struct pipe_ctx *cur_pipe = &new_state->res_ctx.pipe_ctx[i]; + kref_init(&new_state->refcount); - if (cur_pipe->top_pipe) - cur_pipe->top_pipe = &new_state->res_ctx.pipe_ctx[cur_pipe->top_pipe->pipe_idx]; + return new_state; +} - if (cur_pipe->bottom_pipe) - cur_pipe->bottom_pipe = &new_state->res_ctx.pipe_ctx[cur_pipe->bottom_pipe->pipe_idx]; +void dc_state_copy_current(struct dc *dc, struct dc_state *dst_state) +{ + dc_state_copy(dst_state, dc->current_state); +} - if (cur_pipe->prev_odm_pipe) - cur_pipe->prev_odm_pipe = &new_state->res_ctx.pipe_ctx[cur_pipe->prev_odm_pipe->pipe_idx]; +struct dc_state *dc_state_create_current_copy(struct dc *dc) +{ + return dc_state_create_copy(dc->current_state); +} - if (cur_pipe->next_odm_pipe) - cur_pipe->next_odm_pipe = &new_state->res_ctx.pipe_ctx[cur_pipe->next_odm_pipe->pipe_idx]; - } +void dc_state_construct(struct dc *dc, struct dc_state *state) +{ + state->clk_mgr = dc->clk_mgr; - for (i = 0; i < new_state->stream_count; i++) { - dc_stream_retain(new_state->streams[i]); - for (j = 0; j < new_state->stream_status[i].plane_count; j++) - dc_plane_state_retain( - new_state->stream_status[i].plane_states[j]); + /* Initialise DIG link encoder resource tracking variables. */ + link_enc_cfg_init(dc, state); +} + +void dc_state_destruct(struct dc_state *state) +{ + int i, j; + + for (i = 0; i < state->stream_count; i++) { + for (j = 0; j < state->stream_status[i].plane_count; j++) + dc_plane_state_release( + state->stream_status[i].plane_states[j]); + + state->stream_status[i].plane_count = 0; + dc_stream_release(state->streams[i]); + state->streams[i] = NULL; } + state->stream_count = 0; - kref_init(&new_state->refcount); + /* release tracked phantoms */ + for (i = 0; i < state->phantom_stream_count; i++) { + dc_stream_release(state->phantom_streams[i]); + state->phantom_streams[i] = NULL; + } - return new_state; + for (i = 0; i < state->phantom_plane_count; i++) { + dc_plane_state_release(state->phantom_planes[i]); + state->phantom_planes[i] = NULL; + } + state->stream_mask = 0; + memset(&state->res_ctx, 0, sizeof(state->res_ctx)); + memset(&state->pp_display_cfg, 0, sizeof(state->pp_display_cfg)); + memset(&state->dcn_bw_vars, 0, sizeof(state->dcn_bw_vars)); + state->clk_mgr = NULL; + memset(&state->bw_ctx.bw, 0, sizeof(state->bw_ctx.bw)); + memset(state->block_sequence, 0, sizeof(state->block_sequence)); + state->block_sequence_steps = 0; + memset(state->dc_dmub_cmd, 0, sizeof(state->dc_dmub_cmd)); + state->dmub_cmd_count = 0; + memset(&state->perf_params, 0, sizeof(state->perf_params)); + memset(&state->scratch, 0, sizeof(state->scratch)); } -void dc_state_retain(struct dc_state *context) +void dc_state_retain(struct dc_state *state) { - kref_get(&context->refcount); + kref_get(&state->refcount); } static void dc_state_free(struct kref *kref) { struct dc_state *state = container_of(kref, struct dc_state, refcount); - dc_resource_state_destruct(state); + dc_state_destruct(state); #ifdef CONFIG_DRM_AMD_DC_FP dml2_destroy(state->bw_ctx.dml2); @@ -403,35 +594,65 @@ struct dc_stream_status *dc_state_get_stream_status( enum mall_stream_type dc_state_get_pipe_subvp_type(const struct dc_state *state, const struct pipe_ctx *pipe_ctx) { - if (pipe_ctx->stream == NULL) - return SUBVP_NONE; - - return pipe_ctx->stream->mall_stream_config.type; + return dc_state_get_stream_subvp_type(state, pipe_ctx->stream); } enum mall_stream_type dc_state_get_stream_subvp_type(const struct dc_state *state, const struct dc_stream_state *stream) { - return stream->mall_stream_config.type; + int i; + + enum mall_stream_type type = SUBVP_NONE; + + for (i = 0; i < state->stream_count; i++) { + if (state->streams[i] == stream) { + type = state->stream_status[i].mall_stream_config.type; + break; + } + } + + return type; } struct dc_stream_state *dc_state_get_paired_subvp_stream(const struct dc_state *state, const struct dc_stream_state *stream) { - return stream->mall_stream_config.paired_stream; + int i; + + struct dc_stream_state *paired_stream = NULL; + + for (i = 0; i < state->stream_count; i++) { + if (state->streams[i] == stream) { + paired_stream = state->stream_status[i].mall_stream_config.paired_stream; + break; + } + } + + return paired_stream; } struct dc_stream_state *dc_state_create_phantom_stream(const struct dc *dc, struct dc_state *state, struct dc_stream_state *main_stream) { - struct dc_stream_state *phantom_stream = dc_create_stream_for_sink(main_stream->sink); + struct dc_stream_state *phantom_stream; - if (phantom_stream != NULL) { - phantom_stream->signal = SIGNAL_TYPE_VIRTUAL; - phantom_stream->dpms_off = true; + DC_LOGGER_INIT(dc->ctx->logger); + + phantom_stream = dc_create_stream_for_sink(main_stream->sink); + + if (!phantom_stream) { + DC_LOG_ERROR("Failed to allocate phantom stream.\n"); + return NULL; } + /* track phantom stream in dc_state */ + dc_state_track_phantom_stream(state, phantom_stream); + + phantom_stream->is_phantom = true; + phantom_stream->signal = SIGNAL_TYPE_VIRTUAL; + phantom_stream->dpms_off = true; + return phantom_stream; } @@ -439,6 +660,13 @@ void dc_state_release_phantom_stream(const struct dc *dc, struct dc_state *state, struct dc_stream_state *phantom_stream) { + DC_LOGGER_INIT(dc->ctx->logger); + + if (!dc_state_untrack_phantom_stream(state, phantom_stream)) { + DC_LOG_ERROR("Failed to free phantom stream %p in dc state %p.\n", phantom_stream, state); + return; + } + dc_stream_release(phantom_stream); } @@ -448,8 +676,17 @@ struct dc_plane_state *dc_state_create_phantom_plane(struct dc *dc, { struct dc_plane_state *phantom_plane = dc_create_plane_state(dc); - if (phantom_plane != NULL) - phantom_plane->is_phantom = true; + DC_LOGGER_INIT(dc->ctx->logger); + + if (!phantom_plane) { + DC_LOG_ERROR("Failed to allocate phantom plane.\n"); + return NULL; + } + + /* track phantom inside dc_state */ + dc_state_track_phantom_plane(state, phantom_plane); + + phantom_plane->is_phantom = true; return phantom_plane; } @@ -458,6 +695,13 @@ void dc_state_release_phantom_plane(const struct dc *dc, struct dc_state *state, struct dc_plane_state *phantom_plane) { + DC_LOGGER_INIT(dc->ctx->logger); + + if (!dc_state_untrack_phantom_plane(state, phantom_plane)) { + DC_LOG_ERROR("Failed to free phantom plane %p in dc state %p.\n", phantom_plane, state); + return; + } + dc_plane_state_release(phantom_plane); } @@ -467,13 +711,23 @@ enum dc_status dc_state_add_phantom_stream(struct dc *dc, struct dc_stream_state *phantom_stream, struct dc_stream_state *main_stream) { + struct dc_stream_status *main_stream_status; + struct dc_stream_status *phantom_stream_status; enum dc_status res = dc_state_add_stream(dc, state, phantom_stream); + /* check if stream is tracked */ + if (res == DC_OK && !dc_state_is_phantom_stream_tracked(state, phantom_stream)) { + /* stream must be tracked if added to state */ + dc_state_track_phantom_stream(state, phantom_stream); + } + /* setup subvp meta */ - phantom_stream->mall_stream_config.type = SUBVP_PHANTOM; - phantom_stream->mall_stream_config.paired_stream = main_stream; - main_stream->mall_stream_config.type = SUBVP_MAIN; - main_stream->mall_stream_config.paired_stream = phantom_stream; + main_stream_status = dc_state_get_stream_status(state, main_stream); + phantom_stream_status = dc_state_get_stream_status(state, phantom_stream); + phantom_stream_status->mall_stream_config.type = SUBVP_PHANTOM; + phantom_stream_status->mall_stream_config.paired_stream = main_stream; + main_stream_status->mall_stream_config.type = SUBVP_MAIN; + main_stream_status->mall_stream_config.paired_stream = phantom_stream; return res; } @@ -482,9 +736,18 @@ enum dc_status dc_state_remove_phantom_stream(struct dc *dc, struct dc_state *state, struct dc_stream_state *phantom_stream) { + struct dc_stream_status *main_stream_status; + struct dc_stream_status *phantom_stream_status; + /* reset subvp meta */ - phantom_stream->mall_stream_config.paired_stream->mall_stream_config.type = SUBVP_NONE; - phantom_stream->mall_stream_config.paired_stream->mall_stream_config.paired_stream = NULL; + phantom_stream_status = dc_state_get_stream_status(state, phantom_stream); + main_stream_status = dc_state_get_stream_status(state, phantom_stream_status->mall_stream_config.paired_stream); + phantom_stream_status->mall_stream_config.type = SUBVP_NONE; + phantom_stream_status->mall_stream_config.paired_stream = NULL; + if (main_stream_status) { + main_stream_status->mall_stream_config.type = SUBVP_NONE; + main_stream_status->mall_stream_config.paired_stream = NULL; + } /* remove stream from state */ return dc_state_remove_stream(dc, state, phantom_stream); @@ -496,7 +759,15 @@ bool dc_state_add_phantom_plane( struct dc_plane_state *phantom_plane, struct dc_state *state) { - return dc_state_add_plane(dc, phantom_stream, phantom_plane, state); + bool res = dc_state_add_plane(dc, phantom_stream, phantom_plane, state); + + /* check if stream is tracked */ + if (res && !dc_state_is_phantom_plane_tracked(state, phantom_plane)) { + /* stream must be tracked if added to state */ + dc_state_track_phantom_plane(state, phantom_plane); + } + + return res; } bool dc_state_remove_phantom_plane( @@ -511,9 +782,37 @@ bool dc_state_remove_phantom_plane( bool dc_state_rem_all_phantom_planes_for_stream( const struct dc *dc, struct dc_stream_state *phantom_stream, - struct dc_state *state) + struct dc_state *state, + bool should_release_planes) { - return dc_state_rem_all_planes_for_stream(dc, phantom_stream, state); + int i, old_plane_count; + struct dc_stream_status *stream_status = NULL; + struct dc_plane_state *del_planes[MAX_SURFACE_NUM] = { 0 }; + + for (i = 0; i < state->stream_count; i++) + if (state->streams[i] == phantom_stream) { + stream_status = &state->stream_status[i]; + break; + } + + if (stream_status == NULL) { + dm_error("Existing stream %p not found!\n", phantom_stream); + return false; + } + + old_plane_count = stream_status->plane_count; + + for (i = 0; i < old_plane_count; i++) + del_planes[i] = stream_status->plane_states[i]; + + for (i = 0; i < old_plane_count; i++) { + if (!dc_state_remove_plane(dc, phantom_stream, del_planes[i], state)) + return false; + if (should_release_planes) + dc_state_release_phantom_plane(dc, state, del_planes[i]); + } + + return true; } bool dc_state_add_all_phantom_planes_for_stream( @@ -525,3 +824,38 @@ bool dc_state_add_all_phantom_planes_for_stream( { return dc_state_add_all_planes_for_stream(dc, phantom_stream, phantom_planes, plane_count, state); } + +bool dc_state_remove_phantom_streams_and_planes( + struct dc *dc, + struct dc_state *state) +{ + int i; + bool removed_phantom = false; + struct dc_stream_state *phantom_stream = NULL; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i]; + + if (pipe->plane_state && pipe->stream && dc_state_get_pipe_subvp_type(state, pipe) == SUBVP_PHANTOM) { + phantom_stream = pipe->stream; + + dc_state_rem_all_phantom_planes_for_stream(dc, phantom_stream, state, false); + dc_state_remove_phantom_stream(dc, state, phantom_stream); + removed_phantom = true; + } + } + return removed_phantom; +} + +void dc_state_release_phantom_streams_and_planes( + struct dc *dc, + struct dc_state *state) +{ + int i; + + for (i = 0; i < state->phantom_stream_count; i++) + dc_state_release_phantom_stream(dc, state, state->phantom_streams[i]); + + for (i = 0; i < state->phantom_plane_count; i++) + dc_state_release_phantom_plane(dc, state, state->phantom_planes[i]); +} diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index 86de4bf2ce13..f2b265ed7fc2 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -123,8 +123,7 @@ bool dc_stream_construct(struct dc_stream_state *stream, } stream->out_transfer_func->type = TF_TYPE_BYPASS; - stream->stream_id = stream->ctx->dc_stream_id_count; - stream->ctx->dc_stream_id_count++; + dc_stream_assign_stream_id(stream); return true; } @@ -138,6 +137,13 @@ void dc_stream_destruct(struct dc_stream_state *stream) } } +void dc_stream_assign_stream_id(struct dc_stream_state *stream) +{ + /* MSB is reserved to indicate phantoms */ + stream->stream_id = stream->ctx->dc_stream_id_count; + stream->ctx->dc_stream_id_count++; +} + void dc_stream_retain(struct dc_stream_state *stream) { kref_get(&stream->refcount); @@ -198,8 +204,7 @@ struct dc_stream_state *dc_copy_stream(const struct dc_stream_state *stream) if (new_stream->out_transfer_func) dc_transfer_func_retain(new_stream->out_transfer_func); - new_stream->stream_id = new_stream->ctx->dc_stream_id_count; - new_stream->ctx->dc_stream_id_count++; + dc_stream_assign_stream_id(new_stream); /* If using dynamic encoder assignment, wait till stream committed to assign encoder. */ if (new_stream->ctx->dc->res_pool->funcs->link_encs_assign) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index d638679cf31a..a270b4bf7b95 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -1388,6 +1388,9 @@ struct dc_surface_update { const struct colorspace_transform *gamut_remap_matrix; }; +/* + * Create a new surface with default parameters; + */ void dc_gamma_retain(struct dc_gamma *dc_gamma); void dc_gamma_release(struct dc_gamma **dc_gamma); struct dc_gamma *dc_create_gamma(void); @@ -1451,27 +1454,12 @@ enum dc_status dc_validate_global_state( struct dc_state *new_ctx, bool fast_validate); - -void dc_resource_state_construct( - const struct dc *dc, - struct dc_state *dst_ctx); - bool dc_acquire_release_mpc_3dlut( struct dc *dc, bool acquire, struct dc_stream_state *stream, struct dc_3dlut **lut, struct dc_transfer_func **shaper); -void dc_resource_state_copy_construct( - const struct dc_state *src_ctx, - struct dc_state *dst_ctx); - -void dc_resource_state_copy_construct_current( - const struct dc *dc, - struct dc_state *dst_ctx); - -void dc_resource_state_destruct(struct dc_state *context); - bool dc_resource_is_dsc_encoding_supported(const struct dc *dc); enum dc_status dc_commit_streams(struct dc *dc, diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index 704d4ff722f0..1d315f7cdce3 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -672,12 +672,14 @@ static void update_subvp_prefetch_end_to_mall_start(struct dc *dc, uint32_t prefetch_delta_us = 0; struct dc_stream_state *phantom_stream0 = NULL; struct dc_stream_state *phantom_stream1 = NULL; - struct dc_crtc_timing *phantom_timing0 = &phantom_stream0->timing; - struct dc_crtc_timing *phantom_timing1 = &phantom_stream1->timing; + struct dc_crtc_timing *phantom_timing0 = NULL; + struct dc_crtc_timing *phantom_timing1 = NULL; struct dmub_cmd_fw_assisted_mclk_switch_pipe_data_v2 *pipe_data = NULL; phantom_stream0 = dc_state_get_paired_subvp_stream(context, subvp_pipes[0]->stream); phantom_stream1 = dc_state_get_paired_subvp_stream(context, subvp_pipes[1]->stream); + phantom_timing0 = &phantom_stream0->timing; + phantom_timing1 = &phantom_stream1->timing; subvp0_prefetch_us = div64_u64(((uint64_t)(phantom_timing0->v_total - phantom_timing0->v_front_porch) * (uint64_t)phantom_timing0->h_total * 1000000), @@ -853,7 +855,6 @@ void dc_dmub_setup_subvp_dmub_command(struct dc *dc, */ if (resource_is_pipe_type(pipe, OTG_MASTER) && resource_is_pipe_type(pipe, DPP_PIPE) && - pipe->stream->mall_stream_config.paired_stream && pipe_mall_type == SUBVP_MAIN) { populate_subvp_cmd_pipe_info(dc, context, &cmd, pipe, cmd_pipe_index++); } else if (resource_is_pipe_type(pipe, OTG_MASTER) && diff --git a/drivers/gpu/drm/amd/display/dc/dc_state.h b/drivers/gpu/drm/amd/display/dc/dc_state.h index df1a8b85a652..d167fdbfa8a9 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_state.h +++ b/drivers/gpu/drm/amd/display/dc/dc_state.h @@ -29,9 +29,13 @@ #include "dc.h" #include "inc/core_status.h" - struct dc_state *dc_state_create(struct dc *dc); +void dc_state_copy(struct dc_state *dst_state, struct dc_state *src_state); struct dc_state *dc_state_create_copy(struct dc_state *src_state); +void dc_state_copy_current(struct dc *dc, struct dc_state *dst_state); +struct dc_state *dc_state_create_current_copy(struct dc *dc); +void dc_state_construct(struct dc *dc, struct dc_state *state); +void dc_state_destruct(struct dc_state *state); void dc_state_retain(struct dc_state *state); void dc_state_release(struct dc_state *state); diff --git a/drivers/gpu/drm/amd/display/dc/dc_state_priv.h b/drivers/gpu/drm/amd/display/dc/dc_state_priv.h index 16c09caa1317..c1f44e09a6c1 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_state_priv.h +++ b/drivers/gpu/drm/amd/display/dc/dc_state_priv.h @@ -29,10 +29,6 @@ #include "dc_state.h" #include "dc_stream.h" -struct dc_stream_status *dc_state_get_stream_status( - struct dc_state *state, - struct dc_stream_state *stream); - /* Get the type of the provided resource (none, phantom, main) based on the provided * context. If the context is unavailable, determine only if phantom or not. */ @@ -85,7 +81,8 @@ bool dc_state_remove_phantom_plane( bool dc_state_rem_all_phantom_planes_for_stream( const struct dc *dc, struct dc_stream_state *phantom_stream, - struct dc_state *state); + struct dc_state *state, + bool should_release_planes); bool dc_state_add_all_phantom_planes_for_stream( const struct dc *dc, @@ -94,4 +91,12 @@ bool dc_state_add_all_phantom_planes_for_stream( int plane_count, struct dc_state *state); +bool dc_state_remove_phantom_streams_and_planes( + struct dc *dc, + struct dc_state *state); + +void dc_state_release_phantom_streams_and_planes( + struct dc *dc, + struct dc_state *state); + #endif /* _DC_STATE_PRIV_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index f617428c6a57..a23eebd9933b 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -38,6 +38,14 @@ struct timing_sync_info { bool master; }; +struct mall_stream_config { + /* MALL stream config to indicate if the stream is phantom or not. + * We will use a phantom stream to indicate that the pipe is phantom. + */ + enum mall_stream_type type; + struct dc_stream_state *paired_stream; // master / slave stream +}; + struct dc_stream_status { int primary_otg_inst; int stream_enc_inst; @@ -50,6 +58,7 @@ struct dc_stream_status { struct timing_sync_info timing_sync_info; struct dc_plane_state *plane_states[MAX_SURFACE_NUM]; bool is_abm_supported; + struct mall_stream_config mall_stream_config; }; enum hubp_dmdata_mode { @@ -146,25 +155,6 @@ struct test_pattern { #define SUBVP_DRR_MARGIN_US 100 // 100us for DRR margin (SubVP + DRR) -struct mall_stream_config { - /* MALL stream config to indicate if the stream is phantom or not. - * We will use a phantom stream to indicate that the pipe is phantom. - */ - enum mall_stream_type type; - struct dc_stream_state *paired_stream; // master / slave stream -}; - -/* Temp struct used to save and restore MALL config - * during validation. - * - * TODO: Move MALL config into dc_state instead of stream struct - * to avoid needing to save/restore. - */ -struct mall_temp_config { - struct mall_stream_config mall_stream_config[MAX_PIPES]; - bool is_phantom_plane[MAX_PIPES]; -}; - struct dc_stream_debug_options { char force_odm_combine_segments; }; @@ -294,7 +284,7 @@ struct dc_stream_state { bool has_non_synchronizable_pclk; bool vblank_synchronized; bool fpo_in_use; - struct mall_stream_config mall_stream_config; + bool is_phantom; }; #define ABM_LEVEL_IMMEDIATE_DISABLE 255 diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream_priv.h b/drivers/gpu/drm/amd/display/dc/dc_stream_priv.h index 710d3b04c7e8..7476fd52ce2b 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream_priv.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream_priv.h @@ -32,4 +32,6 @@ bool dc_stream_construct(struct dc_stream_state *stream, struct dc_sink *dc_sink_data); void dc_stream_destruct(struct dc_stream_state *stream); +void dc_stream_assign_stream_id(struct dc_stream_state *stream); + #endif // _DC_STREAM_PRIV_H_ diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c index b3d360741175..1743ebcd6b2e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c @@ -430,71 +430,6 @@ void dcn32_set_det_allocations(struct dc *dc, struct dc_state *context, dcn32_determine_det_override(dc, context, pipes); } -/** - * dcn32_save_mall_state(): Save MALL (SubVP) state for fast validation cases - * - * This function saves the MALL (SubVP) case for fast validation cases. For fast validation, - * there are situations where a shallow copy of the dc->current_state is created for the - * validation. In this case we want to save and restore the mall config because we always - * teardown subvp at the beginning of validation (and don't attempt to add it back if it's - * fast validation). If we don't restore the subvp config in cases of fast validation + - * shallow copy of the dc->current_state, the dc->current_state will have a partially - * removed subvp state when we did not intend to remove it. - * - * NOTE: This function ONLY works if the streams are not moved to a different pipe in the - * validation. We don't expect this to happen in fast_validation=1 cases. - * - * @dc: Current DC state - * @context: New DC state to be programmed - * @temp_config: struct used to cache the existing MALL state - * - * Return: void - */ -void dcn32_save_mall_state(struct dc *dc, - struct dc_state *context, - struct mall_temp_config *temp_config) -{ - uint32_t i; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - if (pipe->stream) - temp_config->mall_stream_config[i] = pipe->stream->mall_stream_config; - - if (pipe->plane_state) - temp_config->is_phantom_plane[i] = pipe->plane_state->is_phantom; - } -} - -/** - * dcn32_restore_mall_state(): Restore MALL (SubVP) state for fast validation cases - * - * Restore the MALL state based on the previously saved state from dcn32_save_mall_state - * - * @dc: Current DC state - * @context: New DC state to be programmed, restore MALL state into here - * @temp_config: struct that has the cached MALL state - * - * Return: void - */ -void dcn32_restore_mall_state(struct dc *dc, - struct dc_state *context, - struct mall_temp_config *temp_config) -{ - uint32_t i; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - if (pipe->stream) - pipe->stream->mall_stream_config = temp_config->mall_stream_config[i]; - - if (pipe->plane_state) - pipe->plane_state->is_phantom = temp_config->is_phantom_plane[i]; - } -} - #define MAX_STRETCHED_V_BLANK 1000 // in micro-seconds (must ensure to match value in FW) /* * Scaling factor for v_blank stretch calculations considering timing in diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 50d223fa1e49..10c890a9cb7c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -1539,7 +1539,8 @@ static void dcn32_full_validate_bw_helper(struct dc *dc, // If SubVP pipe config is unsupported (or cannot be used for UCLK switching) // remove phantom pipes and repopulate dml pipes if (!found_supported_config) { - dc->res_pool->funcs->remove_phantom_pipes(dc, context, false); + dc_state_remove_phantom_streams_and_planes(dc, context); + dc_state_release_phantom_streams_and_planes(dc, context); vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] = dm_dram_clock_change_unsupported; *pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, false); @@ -1937,7 +1938,8 @@ bool dcn32_internal_validate_bw(struct dc *dc, return false; // For each full update, remove all existing phantom pipes first - dc->res_pool->funcs->remove_phantom_pipes(dc, context, fast_validate); + dc_state_remove_phantom_streams_and_planes(dc, context); + dc_state_release_phantom_streams_and_planes(dc, context); dc->res_pool->funcs->update_soc_for_wm_a(dc, context); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c index 0c146af34d82..282d70e2b18a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c @@ -807,7 +807,7 @@ static void add_phantom_pipes_for_main_pipe(struct dml2_context *ctx, struct dc_ } } -static bool remove_all_planes_for_stream(struct dml2_context *ctx, struct dc_stream_state *stream, struct dc_state *context) +static bool remove_all_phantom_planes_for_stream(struct dml2_context *ctx, struct dc_stream_state *stream, struct dc_state *context) { int i, old_plane_count; struct dc_stream_status *stream_status = NULL; @@ -828,9 +828,11 @@ static bool remove_all_planes_for_stream(struct dml2_context *ctx, struct dc_str for (i = 0; i < old_plane_count; i++) del_planes[i] = stream_status->plane_states[i]; - for (i = 0; i < old_plane_count; i++) + for (i = 0; i < old_plane_count; i++) { if (!ctx->config.svp_pstate.callbacks.remove_phantom_plane(ctx->config.svp_pstate.callbacks.dc, stream, del_planes[i], context)) return false; + ctx->config.svp_pstate.callbacks.release_phantom_plane(ctx->config.svp_pstate.callbacks.dc, context, del_planes[i]); + } return true; } @@ -839,39 +841,21 @@ bool dml2_svp_remove_all_phantom_pipes(struct dml2_context *ctx, struct dc_state { int i; bool removed_pipe = false; - struct dc_plane_state *phantom_plane = NULL; struct dc_stream_state *phantom_stream = NULL; for (i = 0; i < ctx->config.dcn_pipe_count; i++) { struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i]; // build scaling params for phantom pipes if (pipe->plane_state && pipe->stream && ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(state, pipe) == SUBVP_PHANTOM) { - phantom_plane = pipe->plane_state; phantom_stream = pipe->stream; - remove_all_planes_for_stream(ctx, pipe->stream, state); - ctx->config.svp_pstate.callbacks.remove_phantom_stream(ctx->config.svp_pstate.callbacks.dc, state, pipe->stream); - - /* Ref count is incremented on allocation and also when added to the context. - * Therefore we must call release for the the phantom plane and stream once - * they are removed from the ctx to finally decrement the refcount to 0 to free. - */ - ctx->config.svp_pstate.callbacks.release_phantom_plane(ctx->config.svp_pstate.callbacks.dc, - state, - phantom_plane); - ctx->config.svp_pstate.callbacks.release_phantom_stream(ctx->config.svp_pstate.callbacks.dc, - state, - phantom_stream); + remove_all_phantom_planes_for_stream(ctx, phantom_stream, state); + ctx->config.svp_pstate.callbacks.remove_phantom_stream(ctx->config.svp_pstate.callbacks.dc, state, phantom_stream); + ctx->config.svp_pstate.callbacks.release_phantom_stream(ctx->config.svp_pstate.callbacks.dc, state, phantom_stream); removed_pipe = true; } - // Clear all phantom stream info - if (pipe->stream) { - pipe->stream->mall_stream_config.type = SUBVP_NONE; - pipe->stream->mall_stream_config.paired_stream = NULL; - } - if (pipe->plane_state) { pipe->plane_state->is_phantom = false; } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h index 505a5d7ae20d..ee0eb184eb6d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h @@ -104,7 +104,10 @@ struct dml2_dc_svp_callbacks { struct dc_stream_state *phantom_stream, struct dc_stream_state *main_stream); bool (*add_phantom_plane)(const struct dc *dc, struct dc_stream_state *stream, struct dc_plane_state *plane_state, struct dc_state *context); - bool (*remove_phantom_plane)(const struct dc *dc, struct dc_stream_state *stream, struct dc_plane_state *plane_state, struct dc_state *context); + bool (*remove_phantom_plane)(const struct dc *dc, + struct dc_stream_state *stream, + struct dc_plane_state *plane_state, + struct dc_state *context); enum dc_status (*remove_phantom_stream)(struct dc *dc, struct dc_state *state, struct dc_stream_state *stream); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index 968f6ba7ccf6..762e674f9998 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -1685,7 +1685,7 @@ static void disable_vga_and_power_gate_all_controllers( true); dc->current_state->res_ctx.pipe_ctx[i].pipe_idx = i; - dc->hwss.disable_plane(dc, + dc->hwss.disable_plane(dc, dc->current_state, &dc->current_state->res_ctx.pipe_ctx[i]); } } @@ -2134,7 +2134,7 @@ static void dce110_reset_hw_ctx_wrap( old_clk)) old_clk->funcs->cs_power_down(old_clk); - dc->hwss.disable_plane(dc, pipe_ctx_old); + dc->hwss.disable_plane(dc, dc->current_state, pipe_ctx_old); pipe_ctx_old->stream = NULL; } @@ -2498,6 +2498,7 @@ static bool wait_for_reset_trigger_to_occur( /* Enable timing synchronization for a group of Timing Generators. */ static void dce110_enable_timing_synchronization( struct dc *dc, + struct dc_state *state, int group_index, int group_size, struct pipe_ctx *grouped_pipes[]) @@ -2843,7 +2844,7 @@ static void dce110_post_unlock_program_front_end( { } -static void dce110_power_down_fe(struct dc *dc, struct pipe_ctx *pipe_ctx) +static void dce110_power_down_fe(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx) { struct dce_hwseq *hws = dc->hwseq; int fe_idx = pipe_ctx->plane_res.mi ? diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index 96fb563b1009..d0247cd7833f 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -1181,7 +1181,9 @@ void dcn10_verify_allow_pstate_change_high(struct dc *dc) } /* trigger HW to start disconnect plane from stream on the next vsync */ -void dcn10_plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx) +void dcn10_plane_atomic_disconnect(struct dc *dc, + struct dc_state *state, + struct pipe_ctx *pipe_ctx) { struct dce_hwseq *hws = dc->hwseq; struct hubp *hubp = pipe_ctx->plane_res.hubp; @@ -1201,7 +1203,7 @@ void dcn10_plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx) mpc->funcs->remove_mpcc(mpc, mpc_tree_params, mpcc_to_remove); // Phantom pipes have OTG disabled by default, so MPCC_STATUS will never assert idle, // so don't wait for MPCC_IDLE in the programming sequence - if (opp != NULL && dc_state_get_pipe_subvp_type(NULL, pipe_ctx) != SUBVP_PHANTOM) + if (opp != NULL && dc_state_get_pipe_subvp_type(state, pipe_ctx) != SUBVP_PHANTOM) opp->mpcc_disconnect_pending[pipe_ctx->plane_res.mpcc_inst] = true; dc->optimized_required = true; @@ -1291,7 +1293,7 @@ void dcn10_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx) pipe_ctx->plane_state = NULL; } -void dcn10_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx) +void dcn10_disable_plane(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx) { struct dce_hwseq *hws = dc->hwseq; DC_LOGGER_INIT(dc->ctx->logger); @@ -1417,12 +1419,12 @@ void dcn10_init_pipes(struct dc *dc, struct dc_state *context) dc->res_pool->opps[i]->mpcc_disconnect_pending[pipe_ctx->plane_res.mpcc_inst] = true; pipe_ctx->stream_res.opp = dc->res_pool->opps[i]; - hws->funcs.plane_atomic_disconnect(dc, pipe_ctx); + hws->funcs.plane_atomic_disconnect(dc, context, pipe_ctx); if (tg->funcs->is_tg_enabled(tg)) tg->funcs->unlock(tg); - dc->hwss.disable_plane(dc, pipe_ctx); + dc->hwss.disable_plane(dc, context, pipe_ctx); pipe_ctx->stream_res.tg = NULL; pipe_ctx->plane_res.hubp = NULL; @@ -2263,6 +2265,7 @@ void dcn10_enable_vblanks_synchronization( void dcn10_enable_timing_synchronization( struct dc *dc, + struct dc_state *state, int group_index, int group_size, struct pipe_ctx *grouped_pipes[]) @@ -2277,7 +2280,7 @@ void dcn10_enable_timing_synchronization( DC_SYNC_INFO("Setting up OTG reset trigger\n"); for (i = 1; i < group_size; i++) { - if (grouped_pipes[i]->stream && dc_state_get_pipe_subvp_type(NULL, grouped_pipes[i]) == SUBVP_PHANTOM) + if (grouped_pipes[i]->stream && dc_state_get_pipe_subvp_type(state, grouped_pipes[i]) == SUBVP_PHANTOM) continue; opp = grouped_pipes[i]->stream_res.opp; @@ -2297,14 +2300,14 @@ void dcn10_enable_timing_synchronization( if (grouped_pipes[i]->stream == NULL) continue; - if (grouped_pipes[i]->stream && dc_state_get_pipe_subvp_type(NULL, grouped_pipes[i]) == SUBVP_PHANTOM) + if (grouped_pipes[i]->stream && dc_state_get_pipe_subvp_type(state, grouped_pipes[i]) == SUBVP_PHANTOM) continue; grouped_pipes[i]->stream->vblank_synchronized = false; } for (i = 1; i < group_size; i++) { - if (grouped_pipes[i]->stream && dc_state_get_pipe_subvp_type(NULL, grouped_pipes[i]) == SUBVP_PHANTOM) + if (grouped_pipes[i]->stream && dc_state_get_pipe_subvp_type(state, grouped_pipes[i]) == SUBVP_PHANTOM) continue; grouped_pipes[i]->stream_res.tg->funcs->enable_reset_trigger( @@ -2318,11 +2321,11 @@ void dcn10_enable_timing_synchronization( * synchronized. Look at last pipe programmed to reset. */ - if (grouped_pipes[1]->stream && dc_state_get_pipe_subvp_type(NULL, grouped_pipes[1]) != SUBVP_PHANTOM) + if (grouped_pipes[1]->stream && dc_state_get_pipe_subvp_type(state, grouped_pipes[1]) != SUBVP_PHANTOM) wait_for_reset_trigger_to_occur(dc_ctx, grouped_pipes[1]->stream_res.tg); for (i = 1; i < group_size; i++) { - if (grouped_pipes[i]->stream && dc_state_get_pipe_subvp_type(NULL, grouped_pipes[i]) == SUBVP_PHANTOM) + if (grouped_pipes[i]->stream && dc_state_get_pipe_subvp_type(state, grouped_pipes[i]) == SUBVP_PHANTOM) continue; grouped_pipes[i]->stream_res.tg->funcs->disable_reset_trigger( @@ -3022,7 +3025,7 @@ void dcn10_post_unlock_program_front_end( for (i = 0; i < dc->res_pool->pipe_count; i++) if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable) - dc->hwss.disable_plane(dc, &dc->current_state->res_ctx.pipe_ctx[i]); + dc->hwss.disable_plane(dc, dc->current_state, &dc->current_state->res_ctx.pipe_ctx[i]); for (i = 0; i < dc->res_pool->pipe_count; i++) if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable) { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.h index ef6d56da417c..bc5dd68a2408 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.h @@ -75,7 +75,7 @@ void dcn10_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx); void dcn10_reset_hw_ctx_wrap( struct dc *dc, struct dc_state *context); -void dcn10_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx); +void dcn10_disable_plane(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx); void dcn10_lock_all_pipes( struct dc *dc, struct dc_state *context, @@ -108,13 +108,16 @@ void dcn10_power_down_on_boot(struct dc *dc); enum dc_status dce110_apply_ctx_to_hw( struct dc *dc, struct dc_state *context); -void dcn10_plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx); +void dcn10_plane_atomic_disconnect(struct dc *dc, + struct dc_state *state, + struct pipe_ctx *pipe_ctx); void dcn10_update_dchub(struct dce_hwseq *hws, struct dchub_init_data *dh_data); void dcn10_update_pending_status(struct pipe_ctx *pipe_ctx); void dce110_power_down(struct dc *dc); void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context); void dcn10_enable_timing_synchronization( struct dc *dc, + struct dc_state *state, int group_index, int group_size, struct pipe_ctx *grouped_pipes[]); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index 0c9f4ea109cb..88fe102f8288 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -624,9 +624,9 @@ void dcn20_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx) } -void dcn20_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx) +void dcn20_disable_plane(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx) { - bool is_phantom = dc_state_get_pipe_subvp_type(NULL, pipe_ctx) == SUBVP_PHANTOM; + bool is_phantom = dc_state_get_pipe_subvp_type(state, pipe_ctx) == SUBVP_PHANTOM; struct timing_generator *tg = is_phantom ? pipe_ctx->stream_res.tg : NULL; DC_LOGGER_INIT(dc->ctx->logger); @@ -1369,10 +1369,13 @@ void dcn20_pipe_control_lock( } } -static void dcn20_detect_pipe_changes(struct pipe_ctx *old_pipe, struct pipe_ctx *new_pipe) +static void dcn20_detect_pipe_changes(struct dc_state *old_state, + struct dc_state *new_state, + struct pipe_ctx *old_pipe, + struct pipe_ctx *new_pipe) { - bool old_is_phantom = dc_state_get_pipe_subvp_type(NULL, old_pipe) == SUBVP_PHANTOM; - bool new_is_phantom = dc_state_get_pipe_subvp_type(NULL, new_pipe) == SUBVP_PHANTOM; + bool old_is_phantom = dc_state_get_pipe_subvp_type(old_state, old_pipe) == SUBVP_PHANTOM; + bool new_is_phantom = dc_state_get_pipe_subvp_type(new_state, new_pipe) == SUBVP_PHANTOM; new_pipe->update_flags.raw = 0; @@ -1420,7 +1423,7 @@ static void dcn20_detect_pipe_changes(struct pipe_ctx *old_pipe, struct pipe_ctx * The remove-add sequence of the phantom pipe always results in the pipe * being blanked in enable_stream_timing (DPG). */ - if (new_pipe->stream && dc_state_get_pipe_subvp_type(NULL, new_pipe) == SUBVP_PHANTOM) + if (new_pipe->stream && dc_state_get_pipe_subvp_type(new_state, new_pipe) == SUBVP_PHANTOM) new_pipe->update_flags.bits.enable = 1; /* Phantom pipes are effectively disabled, if the pipe was previously phantom @@ -1782,7 +1785,7 @@ static void dcn20_program_pipe( pipe_ctx->pipe_dlg_param.vupdate_offset, pipe_ctx->pipe_dlg_param.vupdate_width); - if (pipe_ctx->stream->mall_stream_config.type != SUBVP_PHANTOM) + if (dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE); pipe_ctx->stream_res.tg->funcs->set_vtg_params( @@ -1921,7 +1924,7 @@ void dcn20_program_front_end_for_ctx( /* Set pipe update flags and lock pipes */ for (i = 0; i < dc->res_pool->pipe_count; i++) - dcn20_detect_pipe_changes(&dc->current_state->res_ctx.pipe_ctx[i], + dcn20_detect_pipe_changes(dc->current_state, context, &dc->current_state->res_ctx.pipe_ctx[i], &context->res_ctx.pipe_ctx[i]); /* When disabling phantom pipes, turn on phantom OTG first (so we can get double @@ -1971,7 +1974,7 @@ void dcn20_program_front_end_for_ctx( if (hubbub->funcs->program_det_size && (context->res_ctx.pipe_ctx[i].update_flags.bits.disable || (context->res_ctx.pipe_ctx[i].plane_state && dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[i]) == SUBVP_PHANTOM))) hubbub->funcs->program_det_size(hubbub, dc->current_state->res_ctx.pipe_ctx[i].plane_res.hubp->inst, 0); - hws->funcs.plane_atomic_disconnect(dc, &dc->current_state->res_ctx.pipe_ctx[i]); + hws->funcs.plane_atomic_disconnect(dc, dc->current_state, &dc->current_state->res_ctx.pipe_ctx[i]); DC_LOG_DC("Reset mpcc for pipe %d\n", dc->current_state->res_ctx.pipe_ctx[i].pipe_idx); } @@ -2044,7 +2047,7 @@ void dcn20_post_unlock_program_front_end( for (i = 0; i < dc->res_pool->pipe_count; i++) if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable) - dc->hwss.disable_plane(dc, &dc->current_state->res_ctx.pipe_ctx[i]); + dc->hwss.disable_plane(dc, dc->current_state, &dc->current_state->res_ctx.pipe_ctx[i]); /* * If we are enabling a pipe, we need to wait for pending clear as this is a critical @@ -2954,7 +2957,7 @@ void dcn20_fpga_init_hw(struct dc *dc) dc->res_pool->opps[i]->mpcc_disconnect_pending[pipe_ctx->plane_res.mpcc_inst] = true; pipe_ctx->stream_res.opp = dc->res_pool->opps[i]; /*to do*/ - hws->funcs.plane_atomic_disconnect(dc, pipe_ctx); + hws->funcs.plane_atomic_disconnect(dc, context, pipe_ctx); } /* initialize DWB pointer to MCIF_WB */ @@ -2971,7 +2974,7 @@ void dcn20_fpga_init_hw(struct dc *dc) for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; - dc->hwss.disable_plane(dc, pipe_ctx); + dc->hwss.disable_plane(dc, context, pipe_ctx); pipe_ctx->stream_res.tg = NULL; pipe_ctx->plane_res.hubp = NULL; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h index ab02e4e9c8c2..b94c85340abf 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h @@ -52,7 +52,7 @@ void dcn20_program_output_csc(struct dc *dc, void dcn20_enable_stream(struct pipe_ctx *pipe_ctx); void dcn20_unblank_stream(struct pipe_ctx *pipe_ctx, struct dc_link_settings *link_settings); -void dcn20_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx); +void dcn20_disable_plane(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx); void dcn20_disable_pixel_data( struct dc *dc, struct pipe_ctx *pipe_ctx, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.c index d3fe6092f50e..d5769f38874f 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.c @@ -320,7 +320,7 @@ void dcn201_init_hw(struct dc *dc) res_pool->opps[i]->mpcc_disconnect_pending[pipe_ctx->plane_res.mpcc_inst] = true; pipe_ctx->stream_res.opp = res_pool->opps[i]; /*To do: number of MPCC != number of opp*/ - hws->funcs.plane_atomic_disconnect(dc, pipe_ctx); + hws->funcs.plane_atomic_disconnect(dc, context, pipe_ctx); } /* initialize DWB pointer to MCIF_WB */ @@ -337,7 +337,7 @@ void dcn201_init_hw(struct dc *dc) for (i = 0; i < res_pool->pipe_count; i++) { struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; - dc->hwss.disable_plane(dc, pipe_ctx); + dc->hwss.disable_plane(dc, context, pipe_ctx); pipe_ctx->stream_res.tg = NULL; pipe_ctx->plane_res.hubp = NULL; @@ -369,7 +369,9 @@ void dcn201_init_hw(struct dc *dc) } /* trigger HW to start disconnect plane from stream on the next vsync */ -void dcn201_plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx) +void dcn201_plane_atomic_disconnect(struct dc *dc, + struct dc_state *state, + struct pipe_ctx *pipe_ctx) { struct dce_hwseq *hws = dc->hwseq; struct hubp *hubp = pipe_ctx->plane_res.hubp; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.h index 26cd62be6418..6a50a9894be6 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.h @@ -33,7 +33,7 @@ void dcn201_init_hw(struct dc *dc); void dcn201_unblank_stream(struct pipe_ctx *pipe_ctx, struct dc_link_settings *link_settings); void dcn201_update_plane_addr(const struct dc *dc, struct pipe_ctx *pipe_ctx); -void dcn201_plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx); +void dcn201_plane_atomic_disconnect(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx); void dcn201_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx); void dcn201_set_cursor_attribute(struct pipe_ctx *pipe_ctx); void dcn201_pipe_control_lock( diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c index d3d16c7af38c..6f360f008f67 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c @@ -349,8 +349,7 @@ void dcn32_commit_subvp_config(struct dc *dc, struct dc_state *context) for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; - if (pipe_ctx->stream && pipe_ctx->stream->mall_stream_config.paired_stream && - dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_MAIN) { + if (pipe_ctx->stream && dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_MAIN) { // There is at least 1 SubVP pipe, so enable SubVP enable_subvp = true; break; @@ -419,14 +418,7 @@ void dcn32_subvp_pipe_control_lock_fast(union block_sequence_params *params) { struct dc *dc = params->subvp_pipe_control_lock_fast_params.dc; bool lock = params->subvp_pipe_control_lock_fast_params.lock; - struct pipe_ctx *pipe_ctx = params->subvp_pipe_control_lock_fast_params.pipe_ctx; - bool subvp_immediate_flip = false; - - if (pipe_ctx && pipe_ctx->stream && pipe_ctx->plane_state) { - if (dc_state_get_pipe_subvp_type(NULL, pipe_ctx) == SUBVP_MAIN && - pipe_ctx->plane_state->flip_immediate) - subvp_immediate_flip = true; - } + bool subvp_immediate_flip = params->subvp_pipe_control_lock_fast_params.subvp_immediate_flip; // Don't need to lock for DRR VSYNC flips -- FW will wait for DRR pending update cleared. if (subvp_immediate_flip) { @@ -1382,7 +1374,7 @@ void dcn32_update_phantom_vp_position(struct dc *dc, struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; if (pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN && - pipe->stream->mall_stream_config.paired_stream == phantom_pipe->stream) { + dc_state_get_paired_subvp_stream(context, pipe->stream) == phantom_pipe->stream) { if (pipe->plane_state && pipe->plane_state->update_flags.bits.position_change) { phantom_plane->src_rect.x = pipe->plane_state->src_rect.x; @@ -1407,21 +1399,19 @@ void dcn32_update_phantom_vp_position(struct dc *dc, void dcn32_apply_update_flags_for_phantom(struct pipe_ctx *phantom_pipe) { phantom_pipe->update_flags.raw = 0; - if (dc_state_get_pipe_subvp_type(NULL, phantom_pipe) == SUBVP_PHANTOM) { - if (resource_is_pipe_type(phantom_pipe, DPP_PIPE)) { - phantom_pipe->update_flags.bits.enable = 1; - phantom_pipe->update_flags.bits.mpcc = 1; - phantom_pipe->update_flags.bits.dppclk = 1; - phantom_pipe->update_flags.bits.hubp_interdependent = 1; - phantom_pipe->update_flags.bits.hubp_rq_dlg_ttu = 1; - phantom_pipe->update_flags.bits.gamut_remap = 1; - phantom_pipe->update_flags.bits.scaler = 1; - phantom_pipe->update_flags.bits.viewport = 1; - phantom_pipe->update_flags.bits.det_size = 1; - if (resource_is_pipe_type(phantom_pipe, OTG_MASTER)) { - phantom_pipe->update_flags.bits.odm = 1; - phantom_pipe->update_flags.bits.global_sync = 1; - } + if (resource_is_pipe_type(phantom_pipe, DPP_PIPE)) { + phantom_pipe->update_flags.bits.enable = 1; + phantom_pipe->update_flags.bits.mpcc = 1; + phantom_pipe->update_flags.bits.dppclk = 1; + phantom_pipe->update_flags.bits.hubp_interdependent = 1; + phantom_pipe->update_flags.bits.hubp_rq_dlg_ttu = 1; + phantom_pipe->update_flags.bits.gamut_remap = 1; + phantom_pipe->update_flags.bits.scaler = 1; + phantom_pipe->update_flags.bits.viewport = 1; + phantom_pipe->update_flags.bits.det_size = 1; + if (resource_is_pipe_type(phantom_pipe, OTG_MASTER)) { + phantom_pipe->update_flags.bits.odm = 1; + phantom_pipe->update_flags.bits.global_sync = 1; } } } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index 56a4bc476684..582852ed21fb 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -814,12 +814,12 @@ void dcn35_init_pipes(struct dc *dc, struct dc_state *context) dc->res_pool->opps[i]->mpcc_disconnect_pending[pipe_ctx->plane_res.mpcc_inst] = true; pipe_ctx->stream_res.opp = dc->res_pool->opps[i]; - hws->funcs.plane_atomic_disconnect(dc, pipe_ctx); + hws->funcs.plane_atomic_disconnect(dc, context, pipe_ctx); if (tg->funcs->is_tg_enabled(tg)) tg->funcs->unlock(tg); - dc->hwss.disable_plane(dc, pipe_ctx); + dc->hwss.disable_plane(dc, context, pipe_ctx); pipe_ctx->stream_res.tg = NULL; pipe_ctx->plane_res.hubp = NULL; @@ -946,10 +946,10 @@ void dcn35_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx) pipe_ctx->plane_state = NULL; } -void dcn35_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx) +void dcn35_disable_plane(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx) { struct dce_hwseq *hws = dc->hwseq; - bool is_phantom = dc_state_get_pipe_subvp_type(NULL, pipe_ctx) == SUBVP_PHANTOM; + bool is_phantom = dc_state_get_pipe_subvp_type(state, pipe_ctx) == SUBVP_PHANTOM; struct timing_generator *tg = is_phantom ? pipe_ctx->stream_res.tg : NULL; DC_LOGGER_INIT(dc->ctx->logger); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h index 3837038dc4a8..b7bafe7fe2fd 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h @@ -57,7 +57,7 @@ void dcn35_init_pipes(struct dc *dc, struct dc_state *context); void dcn35_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx); void dcn35_enable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx, struct dc_state *context); -void dcn35_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx); +void dcn35_disable_plane(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx); void dcn35_calc_blocks_to_gate(struct dc *dc, struct dc_state *context, struct pg_block_update *update_state); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h index 45dc6d4e9562..6ca8c5488d50 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h @@ -50,7 +50,7 @@ struct pg_block_update; struct subvp_pipe_control_lock_fast_params { struct dc *dc; bool lock; - struct pipe_ctx *pipe_ctx; + bool subvp_immediate_flip; }; struct pipe_control_lock_params { @@ -200,7 +200,7 @@ struct hw_sequencer_funcs { struct dc_state *context); enum dc_status (*apply_ctx_to_hw)(struct dc *dc, struct dc_state *context); - void (*disable_plane)(struct dc *dc, struct pipe_ctx *pipe_ctx); + void (*disable_plane)(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx); void (*disable_pixel_data)(struct dc *dc, struct pipe_ctx *pipe_ctx, bool blank); void (*apply_ctx_for_surface)(struct dc *dc, const struct dc_stream_state *stream, @@ -248,6 +248,7 @@ struct hw_sequencer_funcs { void (*enable_per_frame_crtc_position_reset)(struct dc *dc, int group_size, struct pipe_ctx *grouped_pipes[]); void (*enable_timing_synchronization)(struct dc *dc, + struct dc_state *state, int group_index, int group_size, struct pipe_ctx *grouped_pipes[]); void (*enable_vblanks_synchronization)(struct dc *dc, @@ -474,7 +475,8 @@ void hwss_build_fast_sequence(struct dc *dc, unsigned int dmub_cmd_count, struct block_sequence block_sequence[], int *num_steps, - struct pipe_ctx *pipe_ctx); + struct pipe_ctx *pipe_ctx, + struct dc_stream_status *stream_status); void hwss_send_dmcub_cmd(union block_sequence_params *params); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h index 82c592166875..6137cf09aa54 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h @@ -79,6 +79,7 @@ struct hwseq_private_funcs { void (*update_plane_addr)(const struct dc *dc, struct pipe_ctx *pipe_ctx); void (*plane_atomic_disconnect)(struct dc *dc, + struct dc_state *state, struct pipe_ctx *pipe_ctx); void (*update_mpcc)(struct dc *dc, struct pipe_ctx *pipe_ctx); bool (*set_input_transfer_func)(struct dc *dc, diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index 10397d4dfb07..c3b973abb89a 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -200,11 +200,7 @@ struct resource_funcs { unsigned int pipe_cnt, unsigned int index); - bool (*remove_phantom_pipes)(struct dc *dc, struct dc_state *context, bool fast_update); - void (*retain_phantom_pipes)(struct dc *dc, struct dc_state *context); void (*get_panel_config_defaults)(struct dc_panel_config *panel_config); - void (*save_mall_state)(struct dc *dc, struct dc_state *context, struct mall_temp_config *temp_config); - void (*restore_mall_state)(struct dc *dc, struct dc_state *context, struct mall_temp_config *temp_config); void (*build_pipe_pix_clk_params)(struct pipe_ctx *pipe_ctx); }; @@ -526,6 +522,14 @@ struct dc_state { * @stream_status: Planes status on a given stream */ struct dc_stream_status stream_status[MAX_PIPES]; + /** + * @phantom_streams: Stream state properties for phantoms + */ + struct dc_stream_state *phantom_streams[MAX_PHANTOM_PIPES]; + /** + * @phantom_planes: Planes state properties for phantoms + */ + struct dc_plane_state *phantom_planes[MAX_PHANTOM_PIPES]; /** * @stream_count: Total of streams in use @@ -533,6 +537,14 @@ struct dc_state { uint8_t stream_count; uint8_t stream_mask; + /** + * @stream_count: Total phantom streams in use + */ + uint8_t phantom_stream_count; + /** + * @stream_count: Total phantom planes in use + */ + uint8_t phantom_plane_count; /** * @res_ctx: Persistent state of resources */ diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h index b95ae9596c3b..dcae23faeee3 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h @@ -43,6 +43,7 @@ * to be used inside loops and for determining array sizes. */ #define MAX_PIPES 6 +#define MAX_PHANTOM_PIPES (MAX_PIPES / 2) #define MAX_DIG_LINK_ENCODERS 7 #define MAX_DWB_PIPES 1 #define MAX_HPO_DP2_ENCODERS 4 diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c index dbcb9c5ea9af..c4d71e7f18af 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c @@ -1697,69 +1697,6 @@ static struct dc_stream_state *dcn32_enable_phantom_stream(struct dc *dc, return phantom_stream; } -void dcn32_retain_phantom_pipes(struct dc *dc, struct dc_state *context) -{ - int i; - struct dc_plane_state *phantom_plane = NULL; - struct dc_stream_state *phantom_stream = NULL; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - if (resource_is_pipe_type(pipe, OTG_MASTER) && - resource_is_pipe_type(pipe, DPP_PIPE) && - dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) { - phantom_plane = pipe->plane_state; - phantom_stream = pipe->stream; - - dc_plane_state_retain(phantom_plane); - dc_stream_retain(phantom_stream); - } - } -} - -// return true if removed piped from ctx, false otherwise -bool dcn32_remove_phantom_pipes(struct dc *dc, struct dc_state *context, bool fast_update) -{ - int i; - bool removed_pipe = false; - struct dc_plane_state *phantom_plane = NULL; - struct dc_stream_state *phantom_stream = NULL; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - // build scaling params for phantom pipes - if (pipe->plane_state && pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) { - phantom_plane = pipe->plane_state; - phantom_stream = pipe->stream; - - dc_state_rem_all_planes_for_stream(dc, pipe->stream, context); - - /* For non-full updates, a shallow copy of the current state - * is created. In this case we don't want to erase the current - * state (there can be 2 HIRQL threads, one in flip, and one in - * checkMPO) that can cause a race condition. - * - * This is just a workaround, needs a proper fix. - */ - if (!fast_update) - dc_state_remove_phantom_stream(dc, context, pipe->stream); - else - dc_state_remove_stream(dc, context, pipe->stream); - - /* Ref count is incremented on allocation and also when added to the context. - * Therefore we must call release for the the phantom plane and stream once - * they are removed from the ctx to finally decrement the refcount to 0 to free. - */ - dc_state_release_phantom_plane(dc, context, phantom_plane); - dc_state_release_phantom_stream(dc, context, phantom_stream); - - removed_pipe = true; - } - } - return removed_pipe; -} - /* TODO: Input to this function should indicate which pipe indexes (or streams) * require a phantom pipe / stream */ @@ -1803,7 +1740,6 @@ static bool dml1_validate(struct dc *dc, struct dc_state *context, bool fast_val int vlevel = 0; int pipe_cnt = 0; display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_KERNEL); - struct mall_temp_config mall_temp_config; /* To handle Freesync properly, setting FreeSync DML parameters * to its default state for the first stage of validation @@ -1813,29 +1749,12 @@ static bool dml1_validate(struct dc *dc, struct dc_state *context, bool fast_val DC_LOGGER_INIT(dc->ctx->logger); - /* For fast validation, there are situations where a shallow copy of - * of the dc->current_state is created for the validation. In this case - * we want to save and restore the mall config because we always - * teardown subvp at the beginning of validation (and don't attempt - * to add it back if it's fast validation). If we don't restore the - * subvp config in cases of fast validation + shallow copy of the - * dc->current_state, the dc->current_state will have a partially - * removed subvp state when we did not intend to remove it. - */ - if (fast_validate) { - memset(&mall_temp_config, 0, sizeof(mall_temp_config)); - dcn32_save_mall_state(dc, context, &mall_temp_config); - } - BW_VAL_TRACE_COUNT(); DC_FP_START(); out = dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate); DC_FP_END(); - if (fast_validate) - dcn32_restore_mall_state(dc, context, &mall_temp_config); - if (pipe_cnt == 0) goto validate_out; @@ -2023,10 +1942,6 @@ static struct resource_funcs dcn32_res_pool_funcs = { .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, .update_soc_for_wm_a = dcn30_update_soc_for_wm_a, .add_phantom_pipes = dcn32_add_phantom_pipes, - .remove_phantom_pipes = dcn32_remove_phantom_pipes, - .retain_phantom_pipes = dcn32_retain_phantom_pipes, - .save_mall_state = dcn32_save_mall_state, - .restore_mall_state = dcn32_restore_mall_state, .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params, }; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h index 9ca799da1a56..b27fadd452bb 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h @@ -91,12 +91,6 @@ bool dcn32_release_post_bldn_3dlut( struct dc_3dlut **lut, struct dc_transfer_func **shaper); -bool dcn32_remove_phantom_pipes(struct dc *dc, - struct dc_state *context, bool fast_update); - -void dcn32_retain_phantom_pipes(struct dc *dc, - struct dc_state *context); - void dcn32_add_phantom_pipes(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, @@ -169,14 +163,6 @@ void dcn32_determine_det_override(struct dc *dc, void dcn32_set_det_allocations(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes); -void dcn32_save_mall_state(struct dc *dc, - struct dc_state *context, - struct mall_temp_config *temp_config); - -void dcn32_restore_mall_state(struct dc *dc, - struct dc_state *context, - struct mall_temp_config *temp_config); - struct dc_stream_state *dcn32_can_support_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, const struct dc_state *context); bool dcn32_allow_subvp_with_active_margin(struct pipe_ctx *pipe); diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c index 8c278c2fd287..74412e5f03fe 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c @@ -1607,10 +1607,6 @@ static struct resource_funcs dcn321_res_pool_funcs = { .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, .update_soc_for_wm_a = dcn30_update_soc_for_wm_a, .add_phantom_pipes = dcn32_add_phantom_pipes, - .remove_phantom_pipes = dcn32_remove_phantom_pipes, - .retain_phantom_pipes = dcn32_retain_phantom_pipes, - .save_mall_state = dcn32_save_mall_state, - .restore_mall_state = dcn32_restore_mall_state, .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params, }; -- cgit v1.2.3 From b03281e925f996ffc850ad25de10f4586a8c7435 Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Mon, 11 Dec 2023 16:49:20 -0500 Subject: drm/amd/display: Fix null reference to state when getting subvp type [WHY&HOW] Need to provide valid pointer to dc_state when getting subvp pipe type. Reviewed-by: Alvin Lee Acked-by: Wayne Lin Signed-off-by: Dillon Varone Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index d0247cd7833f..632aa091b6b6 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -2333,7 +2333,7 @@ void dcn10_enable_timing_synchronization( } for (i = 1; i < group_size; i++) { - if (dc_state_get_pipe_subvp_type(NULL, grouped_pipes[i]) == SUBVP_PHANTOM) + if (dc_state_get_pipe_subvp_type(state, grouped_pipes[i]) == SUBVP_PHANTOM) continue; opp = grouped_pipes[i]->stream_res.opp; -- cgit v1.2.3 From 760ed918fb1f857490868e4bc91265a4d5d37f37 Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Wed, 13 Dec 2023 15:04:42 -0500 Subject: drm/amd/display: Create dc_state after resource initialization [WHY&HOW] After refactoring dc_state, it is always constructed at the time of its creation. Construction can only happen after dc resources are initialized, so move creation to be after this. Reviewed-by: George Shen Acked-by: Wayne Lin Signed-off-by: Dillon Varone Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 630a55b2c1d4..043913d65b16 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1025,6 +1025,15 @@ static bool dc_construct(struct dc *dc, } #endif + if (!create_links(dc, init_params->num_virtual_links)) + goto fail; + + /* Create additional DIG link encoder objects if fewer than the platform + * supports were created during link construction. + */ + if (!create_link_encoders(dc)) + goto fail; + /* Creation of current_state must occur after dc->dml * is initialized in dc_create_resource_pool because * on creation it copies the contents of dc->dml @@ -1037,15 +1046,6 @@ static bool dc_construct(struct dc *dc, goto fail; } - if (!create_links(dc, init_params->num_virtual_links)) - goto fail; - - /* Create additional DIG link encoder objects if fewer than the platform - * supports were created during link construction. - */ - if (!create_link_encoders(dc)) - goto fail; - return true; fail: -- cgit v1.2.3 From 08daec77fddf23cd246a0662c6dc0d60229caaee Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Wed, 13 Dec 2023 16:39:22 -0500 Subject: drm/amd/display: Deep copy dml2_context when copying dc_state [WHY&HOW] dml2_context should be deep copied from src to dst dc_state. Reviewed-by: George Shen Acked-by: Wayne Lin Signed-off-by: Dillon Varone Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_state.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_state.c b/drivers/gpu/drm/amd/display/dc/core/dc_state.c index 66a70a60c479..dd52cab7ecdf 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_state.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_state.c @@ -212,10 +212,14 @@ struct dc_state *dc_state_create(struct dc *dc) void dc_state_copy(struct dc_state *dst_state, struct dc_state *src_state) { struct kref refcount = dst_state->refcount; +#ifdef CONFIG_DRM_AMD_DC_FP + struct dml2_context *dst_dml2 = dst_state->bw_ctx.dml2; +#endif dc_state_copy_internal(dst_state, src_state); #ifdef CONFIG_DRM_AMD_DC_FP + dst_state->bw_ctx.dml2 = dst_dml2; if (src_state->bw_ctx.dml2) dml2_copy(dst_state->bw_ctx.dml2, src_state->bw_ctx.dml2); #endif -- cgit v1.2.3 From 5a82b8d6c05f9b30828ede1b103b9ee5cb5c912e Mon Sep 17 00:00:00 2001 From: Michael Strauss Date: Thu, 30 Nov 2023 10:44:36 -0500 Subject: drm/amd/display: Fix lightup regression with DP2 single display configs [WHY] Previous fix for multiple displays downstream of DP2 MST hub caused regression [HOW] Match sink IDs instead of sink struct addresses Reviewed-by: Nicholas Kazlauskas Reviewed-by: Charlene Liu Acked-by: Wayne Lin Signed-off-by: Michael Strauss Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c index 9156e2ea55f5..8c0794af5ade 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c @@ -161,6 +161,14 @@ bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx) if (pipe_ctx->stream == NULL) return false; + /* Count MST hubs once by treating only 1st remote sink in topology as an encoder */ + if (pipe_ctx->stream->link && pipe_ctx->stream->link->remote_sinks[0]) { + return (pipe_ctx->stream_res.hpo_dp_stream_enc && + pipe_ctx->link_res.hpo_dp_link_enc && + dc_is_dp_signal(pipe_ctx->stream->signal) && + (pipe_ctx->stream->link->remote_sinks[0]->sink_id == pipe_ctx->stream->sink->sink_id)); + } + return (pipe_ctx->stream_res.hpo_dp_stream_enc && pipe_ctx->link_res.hpo_dp_link_enc && dc_is_dp_signal(pipe_ctx->stream->signal)); -- cgit v1.2.3 From dff45f03f508c92cd8eb2050e27b726726b8ae0b Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Fri, 8 Dec 2023 11:56:56 -0500 Subject: drm/amd/display: Only clear symclk otg flag for HDMI [Description] There is a corner case where the symclk otg flag is cleared when disabling the phantom pipe for subvp (because the phantom and main pipe share the same link). This is undesired because we need the maintain the correct symclk otg flag state for the main pipe. For now only clear the flag only for HDMI signal type, since it's only set for HDMI signal type (phantom is virtual). The ideal solution is to not clear it if the stream is phantom but currently there's a bug that doesn't allow us to do this. Once this issue is fixed the proper fix can be implemented. Reviewed-by: Samson Tam Acked-by: Wayne Lin Signed-off-by: Alvin Lee Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c | 3 ++- drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c | 3 ++- drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c | 3 ++- drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c | 3 ++- 4 files changed, 8 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index 762e674f9998..6457099ed588 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -2125,7 +2125,8 @@ static void dce110_reset_hw_ctx_wrap( BREAK_TO_DEBUGGER(); } pipe_ctx_old->stream_res.tg->funcs->disable_crtc(pipe_ctx_old->stream_res.tg); - pipe_ctx_old->stream->link->phy_state.symclk_ref_cnts.otg = 0; + if (dc_is_hdmi_tmds_signal(pipe_ctx_old->stream->signal)) + pipe_ctx_old->stream->link->phy_state.symclk_ref_cnts.otg = 0; pipe_ctx_old->plane_res.mi->funcs->free_mem_input( pipe_ctx_old->plane_res.mi, dc->current_state->stream_count); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index 632aa091b6b6..1c5e3bb6f0ee 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -1058,7 +1058,8 @@ static void dcn10_reset_back_end_for_pipe( if (pipe_ctx->stream_res.tg->funcs->set_drr) pipe_ctx->stream_res.tg->funcs->set_drr( pipe_ctx->stream_res.tg, NULL); - pipe_ctx->stream->link->phy_state.symclk_ref_cnts.otg = 0; + if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal)) + pipe_ctx->stream->link->phy_state.symclk_ref_cnts.otg = 0; } for (i = 0; i < dc->res_pool->pipe_count; i++) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index 88fe102f8288..0dfcb3cdcd20 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -2624,7 +2624,8 @@ static void dcn20_reset_back_end_for_pipe( * the case where the same symclk is shared across multiple otg * instances */ - link->phy_state.symclk_ref_cnts.otg = 0; + if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal)) + link->phy_state.symclk_ref_cnts.otg = 0; if (link->phy_state.symclk_state == SYMCLK_ON_TX_OFF) { link_hwss->disable_link_output(link, &pipe_ctx->link_res, pipe_ctx->stream->signal); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c index 994250b6f2ef..260860c259f3 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c @@ -524,7 +524,8 @@ static void dcn31_reset_back_end_for_pipe( if (pipe_ctx->stream_res.tg->funcs->set_odm_bypass) pipe_ctx->stream_res.tg->funcs->set_odm_bypass( pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing); - pipe_ctx->stream->link->phy_state.symclk_ref_cnts.otg = 0; + if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal)) + pipe_ctx->stream->link->phy_state.symclk_ref_cnts.otg = 0; if (pipe_ctx->stream_res.tg->funcs->set_drr) pipe_ctx->stream_res.tg->funcs->set_drr( -- cgit v1.2.3 From e7b2b108cdeab76a7e7324459e50b0c1214c0386 Mon Sep 17 00:00:00 2001 From: Ilya Bakoulin Date: Fri, 8 Dec 2023 12:19:33 -0500 Subject: drm/amd/display: Fix hang/underflow when transitioning to ODM4:1 [Why] Under some circumstances, disabling an OPTC and attempting to reclaim its OPP(s) for a different OPTC could cause a hang/underflow due to OPPs not being properly disconnected from the disabled OPTC. [How] Ensure that all OPPs are unassigned from an OPTC when it gets disabled. Reviewed-by: Alvin Lee Acked-by: Wayne Lin Signed-off-by: Ilya Bakoulin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c | 7 +++++++ drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c | 7 +++++++ 2 files changed, 14 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c index a2c4db2cebdd..91ea0d4da06a 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c @@ -172,6 +172,13 @@ static bool optc32_disable_crtc(struct timing_generator *optc) REG_UPDATE(OTG_CONTROL, OTG_MASTER_EN, 0); + REG_UPDATE_5(OPTC_DATA_SOURCE_SELECT, + OPTC_SEG0_SRC_SEL, 0xf, + OPTC_SEG1_SRC_SEL, 0xf, + OPTC_SEG2_SRC_SEL, 0xf, + OPTC_SEG3_SRC_SEL, 0xf, + OPTC_NUM_OF_INPUT_SEGMENT, 0); + REG_UPDATE(CONTROL, VTG0_ENABLE, 0); diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c index a4a39f1638cf..08a59cf449ca 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c @@ -144,6 +144,13 @@ static bool optc35_disable_crtc(struct timing_generator *optc) REG_UPDATE(OTG_CONTROL, OTG_MASTER_EN, 0); + REG_UPDATE_5(OPTC_DATA_SOURCE_SELECT, + OPTC_SEG0_SRC_SEL, 0xf, + OPTC_SEG1_SRC_SEL, 0xf, + OPTC_SEG2_SRC_SEL, 0xf, + OPTC_SEG3_SRC_SEL, 0xf, + OPTC_NUM_OF_INPUT_SEGMENT, 0); + REG_UPDATE(CONTROL, VTG0_ENABLE, 0); -- cgit v1.2.3 From 85fce153995e177ca307786b4ecf190b4daa540c Mon Sep 17 00:00:00 2001 From: Allen Pan Date: Fri, 8 Dec 2023 18:49:19 -0500 Subject: drm/amd/display: change static screen wait frame_count for ips [Why] the original wait for 2 static frames before enter static screen was not good enough for IPS-enabled case since enter/exit takes more time. [How] Changed logic for hardcoded wait frame values. Reviewed-by: Charlene Liu Acked-by: Wayne Lin Signed-off-by: Allen Pan Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 1 + drivers/gpu/drm/amd/display/dc/dcn35/dcn35_init.c | 2 +- .../drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c | 41 ++++++++++++++++++++++ .../drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h | 4 +++ .../amd/display/dc/resource/dcn35/dcn35_resource.c | 3 +- 5 files changed, 49 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index a270b4bf7b95..f622f4f0e1a0 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -982,6 +982,7 @@ struct dc_debug_options { unsigned int ips2_entry_delay_us; bool disable_timeout; bool disable_extblankadj; + unsigned int static_screen_wait_frames; }; struct gpu_info_soc_bounding_box_v1_0; diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_init.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_init.c index d594905eb246..a630aa77dcec 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_init.c @@ -68,7 +68,7 @@ static const struct hw_sequencer_funcs dcn35_funcs = { .prepare_bandwidth = dcn35_prepare_bandwidth, .optimize_bandwidth = dcn35_optimize_bandwidth, .update_bandwidth = dcn20_update_bandwidth, - .set_drr = dcn10_set_drr, + .set_drr = dcn35_set_drr, .get_position = dcn10_get_position, .set_static_screen_control = dcn30_set_static_screen_control, .setup_stereo = dcn10_setup_stereo, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index 582852ed21fb..ad710b4036de 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -1312,3 +1312,44 @@ uint32_t dcn35_get_idle_state(const struct dc *dc) return 0; } + +void dcn35_set_drr(struct pipe_ctx **pipe_ctx, + int num_pipes, struct dc_crtc_timing_adjust adjust) +{ + int i = 0; + struct drr_params params = {0}; + // DRR set trigger event mapped to OTG_TRIG_A (bit 11) for manual control flow + unsigned int event_triggers = 0x800; + // Note DRR trigger events are generated regardless of whether num frames met. + unsigned int num_frames = 2; + + params.vertical_total_max = adjust.v_total_max; + params.vertical_total_min = adjust.v_total_min; + params.vertical_total_mid = adjust.v_total_mid; + params.vertical_total_mid_frame_num = adjust.v_total_mid_frame_num; + + for (i = 0; i < num_pipes; i++) { + if ((pipe_ctx[i]->stream_res.tg != NULL) && pipe_ctx[i]->stream_res.tg->funcs) { + struct dc_crtc_timing *timing = &pipe_ctx[i]->stream->timing; + struct dc *dc = pipe_ctx[i]->stream->ctx->dc; + + if (dc->debug.static_screen_wait_frames) { + unsigned int frame_rate = timing->pix_clk_100hz / (timing->h_total * timing->v_total); + + if (frame_rate >= 120 && dc->caps.ips_support && + dc->config.disable_ips != DMUB_IPS_DISABLE_ALL) { + /*ips enable case*/ + num_frames = 2 * (frame_rate % 60); + } + } + if (pipe_ctx[i]->stream_res.tg->funcs->set_drr) + pipe_ctx[i]->stream_res.tg->funcs->set_drr( + pipe_ctx[i]->stream_res.tg, ¶ms); + if (adjust.v_total_max != 0 && adjust.v_total_min != 0) + if (pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control) + pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control( + pipe_ctx[i]->stream_res.tg, + event_triggers, num_frames); + } + } +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h index b7bafe7fe2fd..fd66316e33de 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h @@ -86,4 +86,8 @@ void dcn35_dsc_pg_control( void dcn35_set_idle_state(const struct dc *dc, bool allow_idle); uint32_t dcn35_get_idle_state(const struct dc *dc); + +void dcn35_set_drr(struct pipe_ctx **pipe_ctx, + int num_pipes, struct dc_crtc_timing_adjust adjust); + #endif /* __DC_HWSS_DCN35_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c index d0c3c5b47094..810cfe213632 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c @@ -780,7 +780,8 @@ static const struct dc_debug_options debug_defaults_drv = { .ignore_pg = true, .psp_disabled_wa = true, .ips2_eval_delay_us = 200, - .ips2_entry_delay_us = 400 + .ips2_entry_delay_us = 400, + .static_screen_wait_frames = 2, }; static const struct dc_panel_config panel_config_defaults = { -- cgit v1.2.3 From 0061080e5d1982e4dd424c4ba1d6ae20f11eb03d Mon Sep 17 00:00:00 2001 From: Anthony Koo Date: Sat, 9 Dec 2023 12:35:25 -0500 Subject: drm/amd/display: [FW Promotion] Release 0.0.197.0 - Remove unused dmub_fw_boot_options flag Acked-by: Wayne Lin Signed-off-by: Anthony Koo Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index 3c092064c72e..c64b6c848ef7 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -653,7 +653,7 @@ union dmub_fw_boot_options { uint32_t gpint_scratch8: 1; /* 1 if GPINT is in scratch8*/ uint32_t usb4_cm_version: 1; /**< 1 CM support */ uint32_t dpia_hpd_int_enable_supported: 1; /* 1 if dpia hpd int enable supported */ - uint32_t usb4_dpia_bw_alloc_supported: 1; /* 1 if USB4 dpia BW allocation supported */ + uint32_t reserved0: 1; uint32_t disable_clk_ds: 1; /* 1 if disallow dispclk_ds and dppclk_ds*/ uint32_t disable_timeout_recovery : 1; /* 1 if timeout recovery should be disabled */ uint32_t ips_pg_disable: 1; /* 1 to disable ONO domains power gating*/ -- cgit v1.2.3 From 731b2f6e6be4a4946724e47c15cba1e40568ad13 Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Sun, 10 Dec 2023 21:20:56 -0500 Subject: drm/amd/display: 3.2.265 This DC patchset brings improvements in multiple areas. In summary, we highlight: - change static screen wait frame_count for ips - Fix hang/underflow when transitioning to ODM4:1 - Only clear symclk otg flag for HDMI - Fix lightup regression with DP2 single display configs - Refactor phantom resource allocation - Refactor dc_state interface - Wake DMCUB before executing GPINT commands - Wake DMCUB before sending a command - Refactor DMCUB enter/exit idle interface - enable dcn35 idle power optimization - fix usb-c connector_type - add debug option for ExtendedVBlank DLG adjust - Set test_pattern_changed update flag on pipe enable - dereference variable before checking for zero - get dprefclk ss info from integration info table - skip error logging when DMUB is inactive from S3 - make flip_timestamp_in_us a 64-bit variable - Add case for dcn35 to support usb4 dmub hpd event - Add function for dumping clk registers - Unify optimize_required flags and VRR adjustments - Revert using channel_width as 2 for vram table 3.0 - remove HPO PG in driver side - do not send commands to DMUB if DMUB is inactive from S3 Acked-by: Wayne Lin Signed-off-by: Aric Cyr Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index f622f4f0e1a0..3ca72c097aa1 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -51,7 +51,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.264" +#define DC_VER "3.2.265" #define MAX_SURFACES 3 #define MAX_PLANES 6 -- cgit v1.2.3 From 9f91e983ee82d3b6f6d713e1c84ebb8d53180b3d Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Tue, 12 Dec 2023 17:26:58 +0800 Subject: drm/amdgpu: MCA supports recording umc address information MCA supports recording umc address information. V2: Move err_addr variable from struct ras_err_node to struct ras_err_info. Signed-off-by: YiPeng Chai Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c | 13 +++++++++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 22 +++++++++++++++------- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 13 +++++++++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 2 +- drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 4 ++-- 8 files changed, 46 insertions(+), 20 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c index 210aea590a52..8911310f98df 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c @@ -218,6 +218,7 @@ static void amdgpu_mca_smu_mca_bank_dump(struct amdgpu_device *adev, int idx, st int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, struct ras_err_data *err_data) { struct amdgpu_smuio_mcm_config_info mcm_info; + struct ras_err_addr err_addr = {0}; struct mca_bank_set mca_set; struct mca_bank_node *node; struct mca_bank_entry *entry; @@ -246,10 +247,18 @@ int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_blo mcm_info.socket_id = entry->info.socket_id; mcm_info.die_id = entry->info.aid; + if (blk == AMDGPU_RAS_BLOCK__UMC) { + err_addr.err_status = entry->regs[MCA_REG_IDX_STATUS]; + err_addr.err_ipid = entry->regs[MCA_REG_IDX_IPID]; + err_addr.err_addr = entry->regs[MCA_REG_IDX_ADDR]; + } + if (type == AMDGPU_MCA_ERROR_TYPE_UE) - amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, (uint64_t)count); + amdgpu_ras_error_statistic_ue_count(err_data, + &mcm_info, &err_addr, (uint64_t)count); else - amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, (uint64_t)count); + amdgpu_ras_error_statistic_ce_count(err_data, + &mcm_info, &err_addr, (uint64_t)count); } out_mca_release: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index bacb59d8b701..bad62141f708 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1156,8 +1156,10 @@ static void amdgpu_rasmgr_error_data_statistic_update(struct ras_manager *obj, s for_each_ras_error(err_node, err_data) { err_info = &err_node->err_info; - amdgpu_ras_error_statistic_ce_count(&obj->err_data, &err_info->mcm_info, err_info->ce_count); - amdgpu_ras_error_statistic_ue_count(&obj->err_data, &err_info->mcm_info, err_info->ue_count); + amdgpu_ras_error_statistic_ce_count(&obj->err_data, + &err_info->mcm_info, NULL, err_info->ce_count); + amdgpu_ras_error_statistic_ue_count(&obj->err_data, + &err_info->mcm_info, NULL, err_info->ue_count); } } else { /* for legacy asic path which doesn't has error source info */ @@ -3691,7 +3693,8 @@ static int ras_err_info_cmp(void *priv, const struct list_head *a, const struct } static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_data, - struct amdgpu_smuio_mcm_config_info *mcm_info) + struct amdgpu_smuio_mcm_config_info *mcm_info, + struct ras_err_addr *err_addr) { struct ras_err_node *err_node; @@ -3705,6 +3708,9 @@ static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_d memcpy(&err_node->err_info.mcm_info, mcm_info, sizeof(*mcm_info)); + if (err_addr) + memcpy(&err_node->err_info.err_addr, err_addr, sizeof(*err_addr)); + err_data->err_list_count++; list_add_tail(&err_node->node, &err_data->err_node_list); list_sort(NULL, &err_data->err_node_list, ras_err_info_cmp); @@ -3713,7 +3719,8 @@ static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_d } int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data, - struct amdgpu_smuio_mcm_config_info *mcm_info, u64 count) + struct amdgpu_smuio_mcm_config_info *mcm_info, + struct ras_err_addr *err_addr, u64 count) { struct ras_err_info *err_info; @@ -3723,7 +3730,7 @@ int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data, if (!count) return 0; - err_info = amdgpu_ras_error_get_info(err_data, mcm_info); + err_info = amdgpu_ras_error_get_info(err_data, mcm_info, err_addr); if (!err_info) return -EINVAL; @@ -3734,7 +3741,8 @@ int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data, } int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data, - struct amdgpu_smuio_mcm_config_info *mcm_info, u64 count) + struct amdgpu_smuio_mcm_config_info *mcm_info, + struct ras_err_addr *err_addr, u64 count) { struct ras_err_info *err_info; @@ -3744,7 +3752,7 @@ int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data, if (!count) return 0; - err_info = amdgpu_ras_error_get_info(err_data, mcm_info); + err_info = amdgpu_ras_error_get_info(err_data, mcm_info, err_addr); if (!err_info) return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 6a941eb8fb8f..76fb85628716 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -452,10 +452,17 @@ struct ras_fs_data { char debugfs_name[32]; }; +struct ras_err_addr { + uint64_t err_status; + uint64_t err_ipid; + uint64_t err_addr; +}; + struct ras_err_info { struct amdgpu_smuio_mcm_config_info mcm_info; u64 ce_count; u64 ue_count; + struct ras_err_addr err_addr; }; struct ras_err_node { @@ -806,8 +813,10 @@ void amdgpu_ras_inst_reset_ras_error_count(struct amdgpu_device *adev, int amdgpu_ras_error_data_init(struct ras_err_data *err_data); void amdgpu_ras_error_data_fini(struct ras_err_data *err_data); int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data, - struct amdgpu_smuio_mcm_config_info *mcm_info, u64 count); + struct amdgpu_smuio_mcm_config_info *mcm_info, + struct ras_err_addr *err_addr, u64 count); int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data, - struct amdgpu_smuio_mcm_config_info *mcm_info, u64 count); + struct amdgpu_smuio_mcm_config_info *mcm_info, + struct ras_err_addr *err_addr, u64 count); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 9a95b9f226b8..a6c88f2fe6e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -1313,10 +1313,10 @@ static void __xgmi_v6_4_0_query_error_count(struct amdgpu_device *adev, struct a switch (xgmi_v6_4_0_pcs_mca_get_error_type(adev, status)) { case AMDGPU_MCA_ERROR_TYPE_UE: - amdgpu_ras_error_statistic_ue_count(err_data, mcm_info, 1ULL); + amdgpu_ras_error_statistic_ue_count(err_data, mcm_info, NULL, 1ULL); break; case AMDGPU_MCA_ERROR_TYPE_CE: - amdgpu_ras_error_statistic_ce_count(err_data, mcm_info, 1ULL); + amdgpu_ras_error_statistic_ce_count(err_data, mcm_info, NULL, 1ULL); break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 00b21ece081f..131cddbdda0d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -3828,8 +3828,8 @@ static void gfx_v9_4_3_inst_query_ras_err_count(struct amdgpu_device *adev, /* the caller should make sure initialize value of * err_data->ue_count and err_data->ce_count */ - amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count); - amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, ce_count); + amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, ue_count); + amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, ce_count); } static void gfx_v9_4_3_inst_reset_ras_err_count(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c index 9b0146732e13..fb53aacdcba2 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c @@ -652,8 +652,8 @@ static void mmhub_v1_8_inst_query_ras_error_count(struct amdgpu_device *adev, AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, &ue_count); - amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, ce_count); - amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count); + amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, ce_count); + amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, ue_count); } static void mmhub_v1_8_query_ras_error_count(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 0f24af6f2810..2d688dca26be 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -2156,7 +2156,7 @@ static void sdma_v4_4_2_inst_query_ras_error_count(struct amdgpu_device *adev, AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, &ue_count); - amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count); + amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, ue_count); } static void sdma_v4_4_2_query_ras_error_count(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index e9c2ff74f0bc..8d60c39ae1c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -166,8 +166,8 @@ static int umc_v12_0_query_error_count(struct amdgpu_device *adev, umc_v12_0_query_correctable_error_count(adev, umc_reg_offset, &ce_count); umc_v12_0_query_uncorrectable_error_count(adev, umc_reg_offset, &ue_count); - amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count); - amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, ce_count); + amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, ue_count); + amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, ce_count); return 0; } -- cgit v1.2.3 From a8c77a121ce12d5ce5500f5777e00e5a841ad51a Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Wed, 13 Dec 2023 16:14:40 +0800 Subject: drm/amdgpu: Add poison mode check error condition for umc v12_0 Add poison mode check error condition for umc v12_0. Signed-off-by: YiPeng Chai Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 20 +++++++++++++++----- drivers/gpu/drm/amd/amdgpu/umc_v12_0.h | 4 ++-- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 4 ++-- 3 files changed, 19 insertions(+), 9 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index 8d60c39ae1c5..8430888760ba 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -88,16 +88,26 @@ static void umc_v12_0_reset_error_count(struct amdgpu_device *adev) umc_v12_0_reset_error_count_per_channel, NULL); } -bool umc_v12_0_is_uncorrectable_error(uint64_t mc_umc_status) +bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t mc_umc_status) { + if (amdgpu_ras_is_poison_mode_supported(adev) && + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1)) + return true; + return ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 || REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 || REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)); } -bool umc_v12_0_is_correctable_error(uint64_t mc_umc_status) +bool umc_v12_0_is_correctable_error(struct amdgpu_device *adev, uint64_t mc_umc_status) { + if (amdgpu_ras_is_poison_mode_supported(adev) && + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1)) + return false; + return (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1 || (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 && @@ -105,7 +115,7 @@ bool umc_v12_0_is_correctable_error(uint64_t mc_umc_status) /* Identify data parity error in replay mode */ ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 0x5 || REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 0xb) && - !(umc_v12_0_is_uncorrectable_error(mc_umc_status))))); + !(umc_v12_0_is_uncorrectable_error(adev, mc_umc_status))))); } static void umc_v12_0_query_correctable_error_count(struct amdgpu_device *adev, @@ -124,7 +134,7 @@ static void umc_v12_0_query_correctable_error_count(struct amdgpu_device *adev, mc_umc_status = RREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4); - if (umc_v12_0_is_correctable_error(mc_umc_status)) + if (umc_v12_0_is_correctable_error(adev, mc_umc_status)) *error_count += 1; } @@ -142,7 +152,7 @@ static void umc_v12_0_query_uncorrectable_error_count(struct amdgpu_device *adev mc_umc_status = RREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4); - if (umc_v12_0_is_uncorrectable_error(mc_umc_status)) + if (umc_v12_0_is_uncorrectable_error(adev, mc_umc_status)) *error_count += 1; } diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h index b34b1e358f8b..17b4b52d6f13 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h @@ -117,8 +117,8 @@ (pa) |= (UMC_V12_0_CHANNEL_HASH_CH6(channel_idx, pa) << UMC_V12_0_PA_CH6_BIT); \ } while (0) -bool umc_v12_0_is_uncorrectable_error(uint64_t mc_umc_status); -bool umc_v12_0_is_correctable_error(uint64_t mc_umc_status); +bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t mc_umc_status); +bool umc_v12_0_is_correctable_error(struct amdgpu_device *adev, uint64_t mc_umc_status); extern const uint32_t umc_v12_0_channel_idx_tbl[] diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 3998c9b31d07..6cbd335a804b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -2524,9 +2524,9 @@ static int mca_umc_mca_get_err_count(const struct mca_ras_info *mca_ras, struct return 0; } - if (type == AMDGPU_MCA_ERROR_TYPE_UE && umc_v12_0_is_uncorrectable_error(status0)) + if (type == AMDGPU_MCA_ERROR_TYPE_UE && umc_v12_0_is_uncorrectable_error(adev, status0)) *count = 1; - else if (type == AMDGPU_MCA_ERROR_TYPE_CE && umc_v12_0_is_correctable_error(status0)) + else if (type == AMDGPU_MCA_ERROR_TYPE_CE && umc_v12_0_is_correctable_error(adev, status0)) *count = 1; return 0; -- cgit v1.2.3 From 6fe08f56db798659beca41ab5b1727a31518f794 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Wed, 13 Dec 2023 16:15:30 +0800 Subject: drm/amd/pm: smu v13_0_6 supports ecc info by default smu v13_0_6 supports ecc info by default. Signed-off-by: YiPeng Chai Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 6cbd335a804b..81b217bbdebb 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -2864,6 +2864,13 @@ static int smu_v13_0_6_select_xgmi_plpd_policy(struct smu_context *smu, return ret; } +static ssize_t smu_v13_0_6_get_ecc_info(struct smu_context *smu, + void *table) +{ + /* Support ecc info by default */ + return 0; +} + static const struct pptable_funcs smu_v13_0_6_ppt_funcs = { /* init dpm */ .get_allowed_feature_mask = smu_v13_0_6_get_allowed_feature_mask, @@ -2918,6 +2925,7 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = { .i2c_init = smu_v13_0_6_i2c_control_init, .i2c_fini = smu_v13_0_6_i2c_control_fini, .send_hbm_bad_pages_num = smu_v13_0_6_smu_send_hbm_bad_page_num, + .get_ecc_info = smu_v13_0_6_get_ecc_info, }; void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu) -- cgit v1.2.3 From 99cab331a4ee621e3604542ca88f9d76f2865aef Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Tue, 12 Dec 2023 17:23:23 +0800 Subject: drm/amdgpu: Add umc page retirement for umc v12_0 Add umc page retirement for umc v12_0. V2: 1. Changed umc page retirement check condition to call umc_v12_0_is_uncorrectable_error. 2. Use memset to clear the contents of the umc error address structure. Signed-off-by: YiPeng Chai Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 56 ++++++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/umc_v12_0.h | 4 +++ 2 files changed, 60 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index 8430888760ba..7458a218e89d 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -26,6 +26,7 @@ #include "amdgpu.h" #include "umc/umc_12_0_0_offset.h" #include "umc/umc_12_0_0_sh_mask.h" +#include "mp/mp_13_0_6_sh_mask.h" const uint32_t umc_v12_0_channel_idx_tbl[] @@ -370,6 +371,59 @@ static int umc_v12_0_err_cnt_init_per_channel(struct amdgpu_device *adev, return 0; } +static void umc_v12_0_ecc_info_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + amdgpu_mca_smu_log_ras_error(adev, + AMDGPU_RAS_BLOCK__UMC, AMDGPU_MCA_ERROR_TYPE_CE, ras_error_status); + amdgpu_mca_smu_log_ras_error(adev, + AMDGPU_RAS_BLOCK__UMC, AMDGPU_MCA_ERROR_TYPE_UE, ras_error_status); +} + +static void umc_v12_0_ecc_info_query_ras_error_address(struct amdgpu_device *adev, + void *ras_error_status) +{ + struct ras_err_node *err_node; + uint64_t mc_umc_status; + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + + for_each_ras_error(err_node, err_data) { + mc_umc_status = err_node->err_info.err_addr.err_status; + if (!mc_umc_status) + continue; + + if (umc_v12_0_is_uncorrectable_error(adev, mc_umc_status)) { + uint64_t mca_addr, err_addr, mca_ipid; + uint32_t InstanceIdLo; + struct amdgpu_smuio_mcm_config_info *mcm_info; + + mcm_info = &err_node->err_info.mcm_info; + mca_addr = err_node->err_info.err_addr.err_addr; + mca_ipid = err_node->err_info.err_addr.err_ipid; + + err_addr = REG_GET_FIELD(mca_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); + InstanceIdLo = REG_GET_FIELD(mca_ipid, MCMP1_IPIDT0, InstanceIdLo); + + dev_info(adev->dev, "UMC:IPID:0x%llx, aid:%d, inst:%d, ch:%d, err_addr:0x%llx\n", + mca_ipid, + mcm_info->die_id, + MCA_IPID_LO_2_UMC_INST(InstanceIdLo), + MCA_IPID_LO_2_UMC_CH(InstanceIdLo), + err_addr); + + umc_v12_0_convert_error_address(adev, + err_data, err_addr, + MCA_IPID_LO_2_UMC_CH(InstanceIdLo), + MCA_IPID_LO_2_UMC_INST(InstanceIdLo), + mcm_info->die_id); + + /* Clear umc error address content */ + memset(&err_node->err_info.err_addr, + 0, sizeof(err_node->err_info.err_addr)); + } + } +} + static void umc_v12_0_err_cnt_init(struct amdgpu_device *adev) { amdgpu_umc_loop_channels(adev, @@ -396,4 +450,6 @@ struct amdgpu_umc_ras umc_v12_0_ras = { }, .err_cnt_init = umc_v12_0_err_cnt_init, .query_ras_poison_mode = umc_v12_0_query_ras_poison_mode, + .ecc_info_query_ras_error_count = umc_v12_0_ecc_info_query_ras_error_count, + .ecc_info_query_ras_error_address = umc_v12_0_ecc_info_query_ras_error_address, }; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h index 17b4b52d6f13..e8de3a92251a 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h @@ -117,6 +117,10 @@ (pa) |= (UMC_V12_0_CHANNEL_HASH_CH6(channel_idx, pa) << UMC_V12_0_PA_CH6_BIT); \ } while (0) +#define MCA_IPID_LO_2_UMC_CH(_ipid_lo) (((((_ipid_lo) >> 20) & 0x1) * 4) + \ + (((_ipid_lo) >> 12) & 0xF)) +#define MCA_IPID_LO_2_UMC_INST(_ipid_lo) (((_ipid_lo) >> 21) & 0x7) + bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t mc_umc_status); bool umc_v12_0_is_correctable_error(struct amdgpu_device *adev, uint64_t mc_umc_status); -- cgit v1.2.3 From 87825c860eb8e4b80391c51ea1bb99e5cbac0025 Mon Sep 17 00:00:00 2001 From: ZhenGuo Yin Date: Tue, 19 Dec 2023 14:39:42 +0800 Subject: drm/amdgpu: re-create idle bo's PTE during VM state machine reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Idle bo's PTE needs to be re-created when resetting VM state machine. Set idle bo's vm_bo as moved to mark it as invalid. Fixes: 55bf196f60df ("drm/amdgpu: reset VM when an error is detected") Signed-off-by: ZhenGuo Yin Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 7da71b6a9dc6..b8fcb6c55698 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -285,6 +285,7 @@ static void amdgpu_vm_bo_reset_state_machine(struct amdgpu_vm *vm) list_for_each_entry_safe(vm_bo, tmp, &vm->idle, vm_status) { struct amdgpu_bo *bo = vm_bo->bo; + vm_bo->moved = true; if (!bo || bo->tbo.type != ttm_bo_type_kernel) list_move(&vm_bo->vm_status, &vm_bo->vm->moved); else if (bo->parent) -- cgit v1.2.3 From 9dda0c07f00f511c112af135aa1ee349345037fa Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Mon, 4 Dec 2023 15:19:34 -0500 Subject: drm/amd/display: Revert " drm/amd/display: Use channel_width = 2 for vram table 3.0" [Description] Revert commit fec05adc40c2 ("drm/amd/display: Use channel_width = 2 for vram table 3.0") Because the issue is being fixed from VBIOS side. Reviewed-by: Samson Tam Acked-by: Wayne Lin Signed-off-by: Alvin Lee Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index 6a96810a477e..7cdb1a8a0ba0 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -2386,13 +2386,7 @@ static enum bp_result get_vram_info_v30( return BP_RESULT_BADBIOSTABLE; info->num_chans = info_v30->channel_num; - /* As suggested by VBIOS we should always use - * dram_channel_width_bytes = 2 when using VRAM - * table version 3.0. This is because the channel_width - * param in the VRAM info table is changed in 7000 series and - * no longer represents the memory channel width. - */ - info->dram_channel_width_bytes = 2; + info->dram_channel_width_bytes = (1 << info_v30->channel_width) / 8; return result; } -- cgit v1.2.3 From ebab8c3eb6a6515dc14cd93fc29dd287709da6d3 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Thu, 14 Dec 2023 09:42:03 -0500 Subject: drm/amdkfd: svm range always mapped flag not working on APU On gfx943 APU there is no VRAM and page migration, queue CWSR area, svm range with always mapped flag, is not mapped to GPU correctly. This works fine if retry fault on CWSR area can be recovered, but could cause deadlock if there is another retry fault recover waiting for CWSR to finish. Fix this by mapping svm range with always mapped flag to GPU with ACCESS attribute if XNACK ON. There is side effect, because all GPUs have ACCESS attribute by default on new svm range with XNACK on, the CWSR area will be mapped to all GPUs after this change. This side effect will be fixed with Thunk change to set CWSR svm range with ACCESS_IN_PLACE attribute on the GPU that user queue is created. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index f2f3c338fd94..a15bfb5223e8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1653,18 +1653,24 @@ static int svm_range_validate_and_map(struct mm_struct *mm, if (test_bit(gpuidx, prange->bitmap_access)) bitmap_set(ctx->bitmap, gpuidx, 1); } + + /* + * If prange is already mapped or with always mapped flag, + * update mapping on GPUs with ACCESS attribute + */ + if (bitmap_empty(ctx->bitmap, MAX_GPU_INSTANCE)) { + if (prange->mapped_to_gpu || + prange->flags & KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED) + bitmap_copy(ctx->bitmap, prange->bitmap_access, MAX_GPU_INSTANCE); + } } else { bitmap_or(ctx->bitmap, prange->bitmap_access, prange->bitmap_aip, MAX_GPU_INSTANCE); } if (bitmap_empty(ctx->bitmap, MAX_GPU_INSTANCE)) { - bitmap_copy(ctx->bitmap, prange->bitmap_access, MAX_GPU_INSTANCE); - if (!prange->mapped_to_gpu || - bitmap_empty(ctx->bitmap, MAX_GPU_INSTANCE)) { - r = 0; - goto free_ctx; - } + r = 0; + goto free_ctx; } if (prange->actual_loc && !prange->ttm_res) { -- cgit v1.2.3 From 3248211dd971ed2b614307eb42cecee3e6feecff Mon Sep 17 00:00:00 2001 From: Hamza Mahfooz Date: Fri, 15 Dec 2023 10:37:39 -0500 Subject: drm/amd/display: disable FPO and SubVP for older DMUB versions on DCN32x There have recently been changes that break backwards compatibility, that were introduced into DMUB firmware (for DCN32x) concerning FPO and SubVP. So, since those are just power optimization features, we can just disable them unless the user is using a new enough version of DMUB firmware. Cc: stable@vger.kernel.org Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2870 Fixes: ed6e2782e974 ("drm/amd/display: For cursor P-State allow for SubVP") Reported-by: Mikhail Gavrilov Closes: https://lore.kernel.org/r/CABXGCsNRb0QbF2pKLJMDhVOKxyGD6-E+8p-4QO6FOWa6zp22_A@mail.gmail.com/ Reviewed-by: Harry Wentland Signed-off-by: Hamza Mahfooz Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c index 5f7f474ef51c..c1a9b746c43f 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c @@ -960,6 +960,12 @@ void dcn32_init_hw(struct dc *dc) dc->caps.dmub_caps.subvp_psr = dc->ctx->dmub_srv->dmub->feature_caps.subvp_psr_support; dc->caps.dmub_caps.gecc_enable = dc->ctx->dmub_srv->dmub->feature_caps.gecc_enable; dc->caps.dmub_caps.mclk_sw = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch; + + if (dc->ctx->dmub_srv->dmub->fw_version < + DMUB_FW_VERSION(7, 0, 35)) { + dc->debug.force_disable_subvp = true; + dc->debug.disable_fpo_optimizations = true; + } } } -- cgit v1.2.3 From 989824589f793120833bef13aa4e21f5a836a707 Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Tue, 5 Dec 2023 14:55:31 +0800 Subject: drm/amd/display: Add case for dcn35 to support usb4 dmub hpd event [Why & how] Refactor dc_is_dmub_outbox_supported() a bit and add case for dcn35 to register dmub outbox notification irq to handle usb4 relevant hpd event. Reviewed-by: Roman Li Reviewed-by: Jun Lei Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 76b47f178127..5c1185206645 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -5095,18 +5095,28 @@ void dc_mclk_switch_using_fw_based_vblank_stretch_shut_down(struct dc *dc) */ bool dc_is_dmub_outbox_supported(struct dc *dc) { - /* DCN31 B0 USB4 DPIA needs dmub notifications for interrupts */ - if (dc->ctx->asic_id.chip_family == FAMILY_YELLOW_CARP && - dc->ctx->asic_id.hw_internal_rev == YELLOW_CARP_B0 && - !dc->debug.dpia_debug.bits.disable_dpia) - return true; + switch (dc->ctx->asic_id.chip_family) { - if (dc->ctx->asic_id.chip_family == AMDGPU_FAMILY_GC_11_0_1 && - !dc->debug.dpia_debug.bits.disable_dpia) - return true; + case FAMILY_YELLOW_CARP: + /* DCN31 B0 USB4 DPIA needs dmub notifications for interrupts */ + if (dc->ctx->asic_id.hw_internal_rev == YELLOW_CARP_B0 && + !dc->debug.dpia_debug.bits.disable_dpia) + return true; + break; + + case AMDGPU_FAMILY_GC_11_0_1: + case AMDGPU_FAMILY_GC_11_5_0: + if (!dc->debug.dpia_debug.bits.disable_dpia) + return true; + break; + + default: + break; + } /* dmub aux needs dmub notifications to be enabled */ return dc->debug.enable_dmub_aux_for_legacy_ddc; + } /** -- cgit v1.2.3 From 51e7b64690776a9981355428b537af9048308a95 Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Wed, 6 Dec 2023 17:14:48 -0500 Subject: drm/amd/display: get dprefclk ss info from integration info table [why & how] we have two SSC_En: we get ssc_info from dce_info for MPLL_SSC_EN. we used to call VBIOS cmdtbl's smu_info's SS persentage for DPRECLK SS info, is used for DP AUDIO and VBIOS' smu_info table was from systemIntegrationInfoTable. since dcn35 VBIOS removed smu_info, driver need to use integrationInfotable directly. Reviewed-by: Nicholas Kazlauskas Acked-by: Wayne Lin Signed-off-by: Charlene Liu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c | 19 ++++++++++++++----- .../drm/amd/display/include/grph_object_ctrl_defs.h | 2 ++ 2 files changed, 16 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index 7cdb1a8a0ba0..2d1f5efa9091 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -1014,13 +1014,20 @@ static enum bp_result get_ss_info_v4_5( DC_LOG_BIOS("AS_SIGNAL_TYPE_HDMI ss_percentage: %d\n", ss_info->spread_spectrum_percentage); break; case AS_SIGNAL_TYPE_DISPLAY_PORT: - ss_info->spread_spectrum_percentage = + if (bp->base.integrated_info) { + DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", bp->base.integrated_info->gpuclk_ss_percentage); + ss_info->spread_spectrum_percentage = + bp->base.integrated_info->gpuclk_ss_percentage; + ss_info->type.CENTER_MODE = + bp->base.integrated_info->gpuclk_ss_type; + } else { + ss_info->spread_spectrum_percentage = disp_cntl_tbl->dp_ss_percentage; - ss_info->spread_spectrum_range = + ss_info->spread_spectrum_range = disp_cntl_tbl->dp_ss_rate_10hz * 10; - if (disp_cntl_tbl->dp_ss_mode & ATOM_SS_CENTRE_SPREAD_MODE) - ss_info->type.CENTER_MODE = true; - + if (disp_cntl_tbl->dp_ss_mode & ATOM_SS_CENTRE_SPREAD_MODE) + ss_info->type.CENTER_MODE = true; + } DC_LOG_BIOS("AS_SIGNAL_TYPE_DISPLAY_PORT ss_percentage: %d\n", ss_info->spread_spectrum_percentage); break; case AS_SIGNAL_TYPE_GPU_PLL: @@ -2814,6 +2821,8 @@ static enum bp_result get_integrated_info_v2_2( info->ma_channel_number = info_v2_2->umachannelnumber; info->dp_ss_control = le16_to_cpu(info_v2_2->reserved1); + info->gpuclk_ss_percentage = info_v2_2->gpuclk_ss_percentage; + info->gpuclk_ss_type = info_v2_2->gpuclk_ss_type; for (i = 0; i < NUMBER_OF_UCHAR_FOR_GUID; ++i) { info->ext_disp_conn_info.gu_id[i] = diff --git a/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h b/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h index bc96d0211360..813463ffe15c 100644 --- a/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h +++ b/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h @@ -417,6 +417,8 @@ struct integrated_info { /* V2.1 */ struct edp_info edp1_info; struct edp_info edp2_info; + uint32_t gpuclk_ss_percentage; + uint32_t gpuclk_ss_type; }; /* -- cgit v1.2.3 From 81b9aeb7b995f3870d691ec5ea95518d5b169203 Mon Sep 17 00:00:00 2001 From: Josip Pavic Date: Tue, 5 Dec 2023 16:57:27 -0500 Subject: drm/amd/display: dereference variable before checking for zero [Why] Driver incorrectly checks if pointer variable OutBpp is null instead of if the value being pointed to is zero. [How] Dereference OutBpp before checking for a value of zero. Reviewed-by: Chaitanya Dhere Acked-by: Wayne Lin Signed-off-by: Josip Pavic Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c index 180f8a98a361..b95bf27f2fe2 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c @@ -5420,7 +5420,7 @@ static void CalculateOutputLink( *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); - if (OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { + if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) { *RequiresDSC = true; LinkDSCEnable = true; *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, -- cgit v1.2.3 From 4a0057afa35872a5f2e65576785844688dd9fa5e Mon Sep 17 00:00:00 2001 From: ZhenGuo Yin Date: Tue, 19 Dec 2023 14:39:42 +0800 Subject: drm/amdgpu: re-create idle bo's PTE during VM state machine reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Idle bo's PTE needs to be re-created when resetting VM state machine. Set idle bo's vm_bo as moved to mark it as invalid. Fixes: 55bf196f60df ("drm/amdgpu: reset VM when an error is detected") Signed-off-by: ZhenGuo Yin Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index d1b8afd105c9..5baefb548a29 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -285,6 +285,7 @@ static void amdgpu_vm_bo_reset_state_machine(struct amdgpu_vm *vm) list_for_each_entry_safe(vm_bo, tmp, &vm->idle, vm_status) { struct amdgpu_bo *bo = vm_bo->bo; + vm_bo->moved = true; if (!bo || bo->tbo.type != ttm_bo_type_kernel) list_move(&vm_bo->vm_status, &vm_bo->vm->moved); else if (bo->parent) -- cgit v1.2.3 From 1ef151d7aa0a36050fab8063ec35b2c7c0f9870c Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 13 Jan 2023 15:09:07 -0800 Subject: drm/xe: Add documentation for mem_type mem_type field was added in commit d8b52a02cb40 ("drm/xe: Implement stolen memory.") to designate the TTM memory type for that mgr. Add kernel-doc with its description. Signed-off-by: Lucas De Marchi Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h index cf02c62ff427..39aa2ec1b968 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h @@ -27,7 +27,7 @@ struct xe_ttm_vram_mgr { u64 default_page_size; /** @lock: protects allocations of VRAM */ struct mutex lock; - + /** @mem_type: The TTM memory type */ u32 mem_type; }; -- cgit v1.2.3 From a02a0c6d53099579e3b7aa811e1e254a11681c8a Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 5 Dec 2022 21:07:56 -0800 Subject: drm/xe: Add min config for kunit integration ARCH=um Some of the tests may benefit from running with ARCH=um, forgoing any additional setup on the CI build side. Add min config for that. Tested with: ./tools/testing/kunit/kunit.py build \ --kunitconfig drivers/gpu/drm/xe/.kunitconfig \ --jobs $(nproc) \ --build_dir build_kunit Signed-off-by: Lucas De Marchi Cc: Mauro Carvalho Chehab Signed-off-by: Rodrigo Vivi Reviewed-by: Mauro Carvalho Chehab --- drivers/gpu/drm/xe/.kunitconfig | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 drivers/gpu/drm/xe/.kunitconfig (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/.kunitconfig b/drivers/gpu/drm/xe/.kunitconfig new file mode 100644 index 000000000000..ce8a4348669d --- /dev/null +++ b/drivers/gpu/drm/xe/.kunitconfig @@ -0,0 +1,7 @@ +# xe dependencies +CONFIG_KUNIT=y +CONFIG_PCI=y +CONFIG_DRM=y +CONFIG_DRM_XE=y +CONFIG_EXPERT=y +CONFIG_DRM_XE_KUNIT_TEST=y -- cgit v1.2.3 From 1598955dfce242113c4ba2cbdb5d4c7c28695a70 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 19 Jan 2023 21:18:44 +0100 Subject: drm/xe/Kconfig.debug: select DEBUG_FS for KUnit runs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit KUnit reuquires debugfs, as otherwise, it won't build: $ make ARCH=x86_64 O=.kunit --jobs=8 ERROR:root:../drivers/gpu/drm/xe/display/intel_display_debugfs.c:1612:6: error: redefinition of ‘intel_display_debugfs_register’ 1612 | void intel_display_debugfs_register(struct drm_i915_private *i915) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In file included from ../drivers/gpu/drm/xe/display/intel_display_debugfs.c:18: ../drivers/gpu/drm/xe/display/intel_display_debugfs.h:18:20: note: previous definition of ‘intel_display_debugfs_register’ with type ‘void(struct xe_device *)’ 18 | static inline void intel_display_debugfs_register(struct drm_i915_private *i915) {} | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ../drivers/gpu/drm/xe/display/intel_display_debugfs.c:1935:6: error: redefinition of ‘intel_connector_debugfs_add’ 1935 | void intel_connector_debugfs_add(struct intel_connector *intel_connector) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~ ../drivers/gpu/drm/xe/display/intel_display_debugfs.h:19:20: note: previous definition of ‘intel_connector_debugfs_add’ with type ‘void(struct intel_connector *)’ 19 | static inline void intel_connector_debugfs_add(struct intel_connector *connector) {} | ^~~~~~~~~~~~~~~~~~~~~~~~~~~ ../drivers/gpu/drm/xe/display/intel_display_debugfs.c:1993:6: error: redefinition of ‘intel_crtc_debugfs_add’ 1993 | void intel_crtc_debugfs_add(struct drm_crtc *crtc) | ^~~~~~~~~~~~~~~~~~~~~~ ../drivers/gpu/drm/xe/display/intel_display_debugfs.h:20:20: note: previous definition of ‘intel_crtc_debugfs_add’ with type ‘void(struct drm_crtc *)’ 20 | static inline void intel_crtc_debugfs_add(struct drm_crtc *crtc) {} | ^~~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Mauro Carvalho Chehab Cc: Thomas Hellström Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/.kunitconfig | 1 + drivers/gpu/drm/xe/Kconfig.debug | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/.kunitconfig b/drivers/gpu/drm/xe/.kunitconfig index ce8a4348669d..aaf30db71cea 100644 --- a/drivers/gpu/drm/xe/.kunitconfig +++ b/drivers/gpu/drm/xe/.kunitconfig @@ -1,6 +1,7 @@ # xe dependencies CONFIG_KUNIT=y CONFIG_PCI=y +CONFIG_DEBUG_FS=y CONFIG_DRM=y CONFIG_DRM_XE=y CONFIG_EXPERT=y diff --git a/drivers/gpu/drm/xe/Kconfig.debug b/drivers/gpu/drm/xe/Kconfig.debug index b61fd43a76fe..565be3f6b9b9 100644 --- a/drivers/gpu/drm/xe/Kconfig.debug +++ b/drivers/gpu/drm/xe/Kconfig.debug @@ -63,7 +63,7 @@ config DRM_XE_SIMPLE_ERROR_CAPTURE config DRM_XE_KUNIT_TEST tristate "KUnit tests for the drm xe driver" if !KUNIT_ALL_TESTS - depends on DRM_XE && KUNIT + depends on DRM_XE && KUNIT && DEBUG_FS default KUNIT_ALL_TESTS select DRM_EXPORT_FOR_TESTS if m help -- cgit v1.2.3 From a4c75c0fd613a1cfb7f5ba6b494b80b40adbc78f Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 19 Jan 2023 21:26:49 +0100 Subject: drm/xe: KUnit tests depend on CONFIG_DRM_FBDEV_EMULATION MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ERROR:root:../drivers/gpu/drm/xe/display/intel_fbdev.c:585:5: error: redefinition of ‘intel_fbdev_init’ 585 | int intel_fbdev_init(struct drm_device *dev) | ^~~~~~~~~~~~~~~~ In file included from ../drivers/gpu/drm/xe/display/intel_fbdev.c:55: ../drivers/gpu/drm/xe/display/intel_fbdev.h:26:19: note: previous definition of ‘intel_fbdev_init’ with type ‘int(struct drm_device *)’ 26 | static inline int intel_fbdev_init(struct drm_device *dev) | ^~~~~~~~~~~~~~~~ ../drivers/gpu/drm/xe/display/intel_fbdev.c:626:6: error: redefinition of ‘intel_fbdev_initial_config_async’ 626 | void intel_fbdev_initial_config_async(struct drm_device *dev) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ../drivers/gpu/drm/xe/display/intel_fbdev.h:31:20: note: previous definition of ‘intel_fbdev_initial_config_async’ with type ‘void(struct drm_device *)’ 31 | static inline void intel_fbdev_initial_config_async(struct drm_device *dev) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ../drivers/gpu/drm/xe/display/intel_fbdev.c:646:6: error: redefinition of ‘intel_fbdev_unregister’ 646 | void intel_fbdev_unregister(struct drm_i915_private *dev_priv) | ^~~~~~~~~~~~~~~~~~~~~~ ../drivers/gpu/drm/xe/display/intel_fbdev.h:35:20: note: previous definition of ‘intel_fbdev_unregister’ with type ‘void(struct xe_device *)’ 35 | static inline void intel_fbdev_unregister(struct drm_i915_private *dev_priv) | ^~~~~~~~~~~~~~~~~~~~~~ ../drivers/gpu/drm/xe/display/intel_fbdev.c:661:6: error: redefinition of ‘intel_fbdev_fini’ 661 | void intel_fbdev_fini(struct drm_i915_private *dev_priv) | ^~~~~~~~~~~~~~~~ ../drivers/gpu/drm/xe/display/intel_fbdev.h:39:20: note: previous definition of ‘intel_fbdev_fini’ with type ‘void(struct xe_device *)’ 39 | static inline void intel_fbdev_fini(struct drm_i915_private *dev_priv) | ^~~~~~~~~~~~~~~~ ../drivers/gpu/drm/xe/display/intel_fbdev.c:692:6: error: redefinition of ‘intel_fbdev_set_suspend’ 692 | void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous) | ^~~~~~~~~~~~~~~~~~~~~~~ ../drivers/gpu/drm/xe/display/intel_fbdev.h:43:20: note: previous definition of ‘intel_fbdev_set_suspend’ with type ‘void(struct drm_device *, int, bool)’ {aka ‘void(struct drm_device *, int, _Bool)’} 43 | static inline void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous) | ^~~~~~~~~~~~~~~~~~~~~~~ ../drivers/gpu/drm/xe/display/intel_fbdev.c:751:6: error: redefinition of ‘intel_fbdev_output_poll_changed’ 751 | void intel_fbdev_output_poll_changed(struct drm_device *dev) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ../drivers/gpu/drm/xe/display/intel_fbdev.h:47:20: note: previous definition of ‘intel_fbdev_output_poll_changed’ with type ‘void(struct drm_device *)’ 47 | static inline void intel_fbdev_output_poll_changed(struct drm_device *dev) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ../drivers/gpu/drm/xe/display/intel_fbdev.c:770:6: error: redefinition of ‘intel_fbdev_restore_mode’ 770 | void intel_fbdev_restore_mode(struct drm_device *dev) | ^~~~~~~~~~~~~~~~~~~~~~~~ ../drivers/gpu/drm/xe/display/intel_fbdev.h:51:20: note: previous definition of ‘intel_fbdev_restore_mode’ with type ‘void(struct drm_device *)’ 51 | static inline void intel_fbdev_restore_mode(struct drm_device *dev) | ^~~~~~~~~~~~~~~~~~~~~~~~ ../drivers/gpu/drm/xe/display/intel_fbdev.c:785:27: error: redefinition of ‘intel_fbdev_framebuffer’ 785 | struct intel_framebuffer *intel_fbdev_framebuffer(struct intel_fbdev *fbdev) | ^~~~~~~~~~~~~~~~~~~~~~~ ../drivers/gpu/drm/xe/display/intel_fbdev.h:54:41: note: previous definition of ‘intel_fbdev_framebuffer’ with type ‘struct intel_framebuffer *(struct intel_fbdev *)’ 54 | static inline struct intel_framebuffer *intel_fbdev_framebuffer(struct intel_fbdev *fbdev) | ^~~~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Mauro Carvalho Chehab Cc: Thomas Hellström Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/.kunitconfig | 3 +++ drivers/gpu/drm/xe/Kconfig.debug | 1 + 2 files changed, 4 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/.kunitconfig b/drivers/gpu/drm/xe/.kunitconfig index aaf30db71cea..06ed30420a8d 100644 --- a/drivers/gpu/drm/xe/.kunitconfig +++ b/drivers/gpu/drm/xe/.kunitconfig @@ -3,6 +3,9 @@ CONFIG_KUNIT=y CONFIG_PCI=y CONFIG_DEBUG_FS=y CONFIG_DRM=y +CONFIG_DRM_FBDEV_EMULATION=y +CONFIG_DRM_KMS_HELPER=y CONFIG_DRM_XE=y CONFIG_EXPERT=y +CONFIG_FB=y CONFIG_DRM_XE_KUNIT_TEST=y diff --git a/drivers/gpu/drm/xe/Kconfig.debug b/drivers/gpu/drm/xe/Kconfig.debug index 565be3f6b9b9..9c773dd74cbd 100644 --- a/drivers/gpu/drm/xe/Kconfig.debug +++ b/drivers/gpu/drm/xe/Kconfig.debug @@ -64,6 +64,7 @@ config DRM_XE_SIMPLE_ERROR_CAPTURE config DRM_XE_KUNIT_TEST tristate "KUnit tests for the drm xe driver" if !KUNIT_ALL_TESTS depends on DRM_XE && KUNIT && DEBUG_FS + depends on FB && FB = DRM_KMS_HELPER && DRM_FBDEV_EMULATION default KUNIT_ALL_TESTS select DRM_EXPORT_FOR_TESTS if m help -- cgit v1.2.3 From 9484c7dce4e99a38970baebe9ffdd5d76d757f2c Mon Sep 17 00:00:00 2001 From: Balasubramani Vivekanandan Date: Fri, 20 Jan 2023 16:43:27 +0530 Subject: drm/xe/gt: Enable interrupt while initializing root gt At present the interrupts are enabled while initializing the last GT. But this is incorrect for a Multi-GT platform, as root GT initialization will fail with interrupt disabled. Interrupts are required for the GuC submission triggered during initialization. Enable the interrupt during the root GT initialization. Signed-off-by: Balasubramani Vivekanandan Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index df2e3573201d..a2caa20f2fb3 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -334,7 +334,7 @@ static void dg1_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) GEN3_IRQ_INIT(gt, GEN11_GU_MISC_, ~GEN11_GU_MISC_GSE, GEN11_GU_MISC_GSE); - if (gt->info.id + 1 == xe->info.tile_count) + if (gt->info.id == XE_GT0) dg1_intr_enable(xe, true); } -- cgit v1.2.3 From 6c8c1e74faecb6ca3057f154e911a52cf6a53d32 Mon Sep 17 00:00:00 2001 From: Philippe Lecluse Date: Fri, 20 Jan 2023 16:30:25 +0100 Subject: drm/xe: Fix Meteor Lake rsa issue on guc loading [ 117.901473] xe 0000:00:02.0: [drm] GuC load failed: status = 0x400000A0 [ 117.901506] xe 0000:00:02.0: [drm] GuC load failed: status: Reset = 0, BootROM = 0x50, UKernel = 0x00, MIA = 0x00, Auth = 0x01 Signed-off-by: Philippe Lecluse Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 39df6945e1d9..61a6430cb435 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -436,6 +436,8 @@ static int gt_fw_domain_init(struct xe_gt *gt) if (err) goto err_hw_fence_irq; + setup_private_ppat(gt); + if (!xe_gt_is_media_type(gt)) { err = xe_ggtt_init(gt, gt->mem.ggtt); if (err) @@ -488,8 +490,6 @@ static int all_fw_domain_init(struct xe_gt *gt) if (err) goto err_hw_fence_irq; - setup_private_ppat(gt); - xe_reg_sr_apply_mmio(>->reg_sr, gt); err = xe_gt_clock_init(gt); -- cgit v1.2.3 From b3ab1b918e59c84ddaf190f75ba93be6cdea1fcb Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 19 Jan 2023 12:41:17 -0500 Subject: drm/xe/guc_pc: Fix Meteor Lake registers. When adding the frequency management, Meteor Lake platform was left behind. Handling it properly now. Signed-off-by: Rodrigo Vivi Reviewed-by: Matthew Brost Cc: Francois Dugast Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_pc.c | 70 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 66 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index d751ee98de11..3ba0c8a35109 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -38,6 +38,9 @@ #define GEN12_RPSTAT1 _MMIO(0x1381b4) #define GEN12_CAGF_MASK REG_GENMASK(19, 11) +#define MTL_MIRROR_TARGET_WP1 _MMIO(0xc60) +#define MTL_CAGF_MASK REG_GENMASK(8, 0) + #define GT_FREQUENCY_MULTIPLIER 50 #define GEN9_FREQ_SCALER 3 @@ -312,7 +315,20 @@ static int pc_set_max_freq(struct xe_guc_pc *pc, u32 freq) freq); } -static void pc_update_rp_values(struct xe_guc_pc *pc) +static void mtl_update_rpe_value(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + u32 reg; + + if (xe_gt_is_media_type(gt)) + reg = xe_mmio_read32(gt, MTL_MPE_FREQUENCY.reg); + else + reg = xe_mmio_read32(gt, MTL_GT_RPE_FREQUENCY.reg); + + pc->rpe_freq = REG_FIELD_GET(MTL_RPE_MASK, reg) * GT_FREQUENCY_MULTIPLIER; +} + +static void tgl_update_rpe_value(struct xe_guc_pc *pc) { struct xe_gt *gt = pc_to_gt(pc); struct xe_device *xe = gt_to_xe(gt); @@ -329,6 +345,17 @@ static void pc_update_rp_values(struct xe_guc_pc *pc) reg = xe_mmio_read32(gt, GEN10_FREQ_INFO_REC.reg); pc->rpe_freq = REG_FIELD_GET(RPE_MASK, reg) * GT_FREQUENCY_MULTIPLIER; +} + +static void pc_update_rp_values(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + struct xe_device *xe = gt_to_xe(gt); + + if (xe->info.platform == XE_METEORLAKE) + mtl_update_rpe_value(pc); + else + tgl_update_rpe_value(pc); /* * RPe is decided at runtime by PCODE. In the rare case where that's @@ -343,6 +370,7 @@ static ssize_t freq_act_show(struct device *dev, { struct kobject *kobj = &dev->kobj; struct xe_gt *gt = kobj_to_gt(kobj); + struct xe_device *xe = gt_to_xe(gt); u32 freq; ssize_t ret; @@ -355,10 +383,17 @@ static ssize_t freq_act_show(struct device *dev, return ret; xe_device_mem_access_get(gt_to_xe(gt)); - freq = xe_mmio_read32(gt, GEN12_RPSTAT1.reg); + + if (xe->info.platform == XE_METEORLAKE) { + freq = xe_mmio_read32(gt, MTL_MIRROR_TARGET_WP1.reg); + freq = REG_FIELD_GET(MTL_CAGF_MASK, freq); + } else { + freq = xe_mmio_read32(gt, GEN12_RPSTAT1.reg); + freq = REG_FIELD_GET(GEN12_CAGF_MASK, freq); + } + xe_device_mem_access_put(gt_to_xe(gt)); - freq = REG_FIELD_GET(GEN12_CAGF_MASK, freq); ret = sysfs_emit(buf, "%d\n", decode_freq(freq)); XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); @@ -607,7 +642,24 @@ static const struct attribute *pc_attrs[] = { NULL }; -static void pc_init_fused_rp_values(struct xe_guc_pc *pc) +static void mtl_init_fused_rp_values(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + u32 reg; + + xe_device_assert_mem_access(pc_to_xe(pc)); + + if (xe_gt_is_media_type(gt)) + reg = xe_mmio_read32(gt, MTL_MEDIAP_STATE_CAP.reg); + else + reg = xe_mmio_read32(gt, MTL_RP_STATE_CAP.reg); + pc->rp0_freq = REG_FIELD_GET(MTL_RP0_CAP_MASK, reg) * + GT_FREQUENCY_MULTIPLIER; + pc->rpn_freq = REG_FIELD_GET(MTL_RPN_CAP_MASK, reg) * + GT_FREQUENCY_MULTIPLIER; +} + +static void tgl_init_fused_rp_values(struct xe_guc_pc *pc) { struct xe_gt *gt = pc_to_gt(pc); struct xe_device *xe = gt_to_xe(gt); @@ -623,6 +675,16 @@ static void pc_init_fused_rp_values(struct xe_guc_pc *pc) pc->rpn_freq = REG_FIELD_GET(RPN_MASK, reg) * GT_FREQUENCY_MULTIPLIER; } +static void pc_init_fused_rp_values(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + struct xe_device *xe = gt_to_xe(gt); + + if (xe->info.platform == XE_METEORLAKE) + mtl_init_fused_rp_values(pc); + else + tgl_init_fused_rp_values(pc); +} static int pc_adjust_freq_bounds(struct xe_guc_pc *pc) { int ret; -- cgit v1.2.3 From a93bcc3acf1fdf55b1906e37744ebab9be884a5d Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 20 Jan 2023 15:43:58 +0100 Subject: drm/xe: skip Kunit tests requiring real hardware when running on UML Some tests are meant to run only on real hardware. Skip those, if no device was found. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pci.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 49f1f0489f1c..b61bde17f123 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -615,16 +615,23 @@ void xe_unregister_pci_driver(void) } #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) -static int dev_to_xe_device_fn(struct device *dev, void *data) +struct kunit_test_data { + int ndevs; + xe_device_fn xe_fn; +}; + +static int dev_to_xe_device_fn(struct device *dev, void *__data) { struct drm_device *drm = dev_get_drvdata(dev); - int (*xe_fn)(struct xe_device *xe) = data; + struct kunit_test_data *data = __data; int ret = 0; int idx; + data->ndevs++; + if (drm_dev_enter(drm, &idx)) - ret = xe_fn(to_xe_device(dev_get_drvdata(dev))); + ret = data->xe_fn(to_xe_device(dev_get_drvdata(dev))); drm_dev_exit(idx); return ret; @@ -645,7 +652,18 @@ static int dev_to_xe_device_fn(struct device *dev, void *data) */ int xe_call_for_each_device(xe_device_fn xe_fn) { - return driver_for_each_device(&xe_pci_driver.driver, NULL, - xe_fn, dev_to_xe_device_fn); + int ret; + struct kunit_test_data data = { + .xe_fn = xe_fn, + .ndevs = 0, + }; + + ret = driver_for_each_device(&xe_pci_driver.driver, NULL, + &data, dev_to_xe_device_fn); + + if (!data.ndevs) + kunit_skip(current->kunit_test, "test runs only on hardware\n"); + + return ret; } #endif -- cgit v1.2.3 From 8375e58c3ac96a43603530a6f02fc81a455982e7 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Mon, 23 Jan 2023 18:17:56 +0100 Subject: drm/xe: Use global macros to set PM functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This aligns with other drivers and fixes build failure when CONFIG_PM_SLEEP is not set, such as on RISC-V. Signed-off-by: Francois Dugast Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pci.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index b61bde17f123..67fd9c3818f9 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -584,15 +584,8 @@ static int xe_pci_runtime_idle(struct device *dev) } static const struct dev_pm_ops xe_pm_ops = { - .suspend = xe_pci_suspend, - .resume = xe_pci_resume, - .freeze = xe_pci_suspend, - .thaw = xe_pci_resume, - .poweroff = xe_pci_suspend, - .restore = xe_pci_resume, - .runtime_suspend = xe_pci_runtime_suspend, - .runtime_resume = xe_pci_runtime_resume, - .runtime_idle = xe_pci_runtime_idle, + SET_SYSTEM_SLEEP_PM_OPS(xe_pci_suspend, xe_pci_resume) + SET_RUNTIME_PM_OPS(xe_pci_runtime_suspend, xe_pci_runtime_resume, xe_pci_runtime_idle) }; static struct pci_driver xe_pci_driver = { -- cgit v1.2.3 From 3949d57f1ef62ea00344617fd638ed6c778db8d8 Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Mon, 23 Jan 2023 08:43:10 -0800 Subject: drm/xe/uapi: Rename XE_ENGINE_PROPERTY_X to XE_ENGINE_SET_PROPERTY_X MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Engine property get uAPI will be added, so to avoid ambiguity here renaming XE_ENGINE_PROPERTY_X to XE_ENGINE_SET_PROPERTY_X. No changes in behavior. Cc: Matthew Brost Cc: Maarten Lankhorst Signed-off-by: José Roberto de Souza Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_engine.c | 18 +++++++++--------- include/uapi/drm/xe_drm.h | 18 +++++++++--------- 2 files changed, 18 insertions(+), 18 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c index 63219bd98be7..1b85bf4abe3d 100644 --- a/drivers/gpu/drm/xe/xe_engine.c +++ b/drivers/gpu/drm/xe/xe_engine.c @@ -314,15 +314,15 @@ typedef int (*xe_engine_set_property_fn)(struct xe_device *xe, u64 value, bool create); static const xe_engine_set_property_fn engine_set_property_funcs[] = { - [XE_ENGINE_PROPERTY_PRIORITY] = engine_set_priority, - [XE_ENGINE_PROPERTY_TIMESLICE] = engine_set_timeslice, - [XE_ENGINE_PROPERTY_PREEMPTION_TIMEOUT] = engine_set_preemption_timeout, - [XE_ENGINE_PROPERTY_COMPUTE_MODE] = engine_set_compute_mode, - [XE_ENGINE_PROPERTY_PERSISTENCE] = engine_set_persistence, - [XE_ENGINE_PROPERTY_JOB_TIMEOUT] = engine_set_job_timeout, - [XE_ENGINE_PROPERTY_ACC_TRIGGER] = engine_set_acc_trigger, - [XE_ENGINE_PROPERTY_ACC_NOTIFY] = engine_set_acc_notify, - [XE_ENGINE_PROPERTY_ACC_GRANULARITY] = engine_set_acc_granularity, + [XE_ENGINE_SET_PROPERTY_PRIORITY] = engine_set_priority, + [XE_ENGINE_SET_PROPERTY_TIMESLICE] = engine_set_timeslice, + [XE_ENGINE_SET_PROPERTY_PREEMPTION_TIMEOUT] = engine_set_preemption_timeout, + [XE_ENGINE_SET_PROPERTY_COMPUTE_MODE] = engine_set_compute_mode, + [XE_ENGINE_SET_PROPERTY_PERSISTENCE] = engine_set_persistence, + [XE_ENGINE_SET_PROPERTY_JOB_TIMEOUT] = engine_set_job_timeout, + [XE_ENGINE_SET_PROPERTY_ACC_TRIGGER] = engine_set_acc_trigger, + [XE_ENGINE_SET_PROPERTY_ACC_NOTIFY] = engine_set_acc_notify, + [XE_ENGINE_SET_PROPERTY_ACC_GRANULARITY] = engine_set_acc_granularity, }; static int engine_user_ext_set_property(struct xe_device *xe, diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index f64b1c785fad..8dc8ebbaf337 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -511,21 +511,21 @@ struct drm_xe_engine_set_property { __u32 engine_id; /** @property: property to set */ -#define XE_ENGINE_PROPERTY_PRIORITY 0 -#define XE_ENGINE_PROPERTY_TIMESLICE 1 -#define XE_ENGINE_PROPERTY_PREEMPTION_TIMEOUT 2 +#define XE_ENGINE_SET_PROPERTY_PRIORITY 0 +#define XE_ENGINE_SET_PROPERTY_TIMESLICE 1 +#define XE_ENGINE_SET_PROPERTY_PREEMPTION_TIMEOUT 2 /* * Long running or ULLS engine mode. DMA fences not allowed in this * mode. Must match the value of DRM_XE_VM_CREATE_COMPUTE_MODE, serves * as a sanity check the UMD knows what it is doing. Can only be set at * engine create time. */ -#define XE_ENGINE_PROPERTY_COMPUTE_MODE 3 -#define XE_ENGINE_PROPERTY_PERSISTENCE 4 -#define XE_ENGINE_PROPERTY_JOB_TIMEOUT 5 -#define XE_ENGINE_PROPERTY_ACC_TRIGGER 6 -#define XE_ENGINE_PROPERTY_ACC_NOTIFY 7 -#define XE_ENGINE_PROPERTY_ACC_GRANULARITY 8 +#define XE_ENGINE_SET_PROPERTY_COMPUTE_MODE 3 +#define XE_ENGINE_SET_PROPERTY_PERSISTENCE 4 +#define XE_ENGINE_SET_PROPERTY_JOB_TIMEOUT 5 +#define XE_ENGINE_SET_PROPERTY_ACC_TRIGGER 6 +#define XE_ENGINE_SET_PROPERTY_ACC_NOTIFY 7 +#define XE_ENGINE_SET_PROPERTY_ACC_GRANULARITY 8 __u32 property; /** @value: property value */ -- cgit v1.2.3 From 19431b029b8b5d095e77767f269cb142c687084e Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Mon, 23 Jan 2023 09:11:32 -0800 Subject: drm/xe/uapi: Add XE_ENGINE_GET_PROPERTY uAPI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is intended to get some properties that are of interest of UMDs like the ban state. Cc: Matthew Brost Cc: Maarten Lankhorst Signed-off-by: José Roberto de Souza Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 2 ++ drivers/gpu/drm/xe/xe_engine.c | 26 ++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_engine.h | 2 ++ include/uapi/drm/xe_drm.h | 22 +++++++++++++++++++++- 4 files changed, 51 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 104ab12cc2ed..9881b591bfdd 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -89,6 +89,8 @@ static const struct drm_ioctl_desc xe_ioctls[] = { DRM_IOCTL_DEF_DRV(XE_VM_BIND, xe_vm_bind_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(XE_ENGINE_CREATE, xe_engine_create_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_ENGINE_GET_PROPERTY, xe_engine_get_property_ioctl, + DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(XE_ENGINE_DESTROY, xe_engine_destroy_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(XE_EXEC, xe_exec_ioctl, DRM_RENDER_ALLOW), diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c index 1b85bf4abe3d..b69dcbef0824 100644 --- a/drivers/gpu/drm/xe/xe_engine.c +++ b/drivers/gpu/drm/xe/xe_engine.c @@ -639,6 +639,32 @@ put_rpm: return err; } +int xe_engine_get_property_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_file *xef = to_xe_file(file); + struct drm_xe_engine_get_property *args = data; + struct xe_engine *e; + + mutex_lock(&xef->engine.lock); + e = xa_load(&xef->engine.xa, args->engine_id); + mutex_unlock(&xef->engine.lock); + + if (XE_IOCTL_ERR(xe, !e)) + return -ENOENT; + + switch (args->property) { + case XE_ENGINE_GET_PROPERTY_BAN: + args->value = !!(e->flags & ENGINE_FLAG_BANNED); + break; + default: + return -EINVAL; + } + + return 0; +} + static void engine_kill_compute(struct xe_engine *e) { if (!xe_vm_in_compute_mode(e->vm)) diff --git a/drivers/gpu/drm/xe/xe_engine.h b/drivers/gpu/drm/xe/xe_engine.h index 4d1b609fea7e..a3a44534003f 100644 --- a/drivers/gpu/drm/xe/xe_engine.h +++ b/drivers/gpu/drm/xe/xe_engine.h @@ -50,5 +50,7 @@ int xe_engine_destroy_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int xe_engine_set_property_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +int xe_engine_get_property_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); #endif diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 8dc8ebbaf337..756c5994ae63 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -118,6 +118,7 @@ struct xe_user_extension { #define DRM_XE_ENGINE_SET_PROPERTY 0x0a #define DRM_XE_WAIT_USER_FENCE 0x0b #define DRM_XE_VM_MADVISE 0x0c +#define DRM_XE_ENGINE_GET_PROPERTY 0x0d /* Must be kept compact -- no holes */ #define DRM_IOCTL_XE_DEVICE_QUERY DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_DEVICE_QUERY, struct drm_xe_device_query) @@ -127,6 +128,7 @@ struct xe_user_extension { #define DRM_IOCTL_XE_VM_DESTROY DRM_IOW( DRM_COMMAND_BASE + DRM_XE_VM_DESTROY, struct drm_xe_vm_destroy) #define DRM_IOCTL_XE_VM_BIND DRM_IOW( DRM_COMMAND_BASE + DRM_XE_VM_BIND, struct drm_xe_vm_bind) #define DRM_IOCTL_XE_ENGINE_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_ENGINE_CREATE, struct drm_xe_engine_create) +#define DRM_IOCTL_XE_ENGINE_GET_PROPERTY DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_ENGINE_GET_PROPERTY, struct drm_xe_engine_get_property) #define DRM_IOCTL_XE_ENGINE_DESTROY DRM_IOW( DRM_COMMAND_BASE + DRM_XE_ENGINE_DESTROY, struct drm_xe_engine_destroy) #define DRM_IOCTL_XE_EXEC DRM_IOW( DRM_COMMAND_BASE + DRM_XE_EXEC, struct drm_xe_exec) #define DRM_IOCTL_XE_MMIO DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_MMIO, struct drm_xe_mmio) @@ -568,8 +570,26 @@ struct drm_xe_engine_create { __u64 reserved[2]; }; +struct drm_xe_engine_get_property { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @engine_id: Engine ID */ + __u32 engine_id; + + /** @property: property to get */ +#define XE_ENGINE_GET_PROPERTY_BAN 0 + __u32 property; + + /** @value: property value */ + __u64 value; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + struct drm_xe_engine_destroy { - /** @vm_id: VM ID */ + /** @engine_id: Engine ID */ __u32 engine_id; /** @pad: MBZ */ -- cgit v1.2.3 From 09a68b4a76e3d870d2fad34099d27cc7e2c9939b Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Mon, 23 Jan 2023 15:41:58 +0100 Subject: drm/xe: Convert memory device refcount to s32 The comparison with < 0 suggests that the memory device access should be signed to handle underflow. This makes it work more reliably. As a result, the max refcount is now S32_MAX instead. Signed-off-by: Maarten Lankhorst Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 2 +- drivers/gpu/drm/xe/xe_device_types.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 9881b591bfdd..98f08cd9d4b0 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -351,7 +351,7 @@ void xe_device_mem_access_get(struct xe_device *xe) if (resumed) xe_pm_runtime_put(xe); - XE_WARN_ON(xe->mem_access.ref == U32_MAX); + XE_WARN_ON(xe->mem_access.ref == S32_MAX); } void xe_device_mem_access_put(struct xe_device *xe) diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index d62ee85bfcbe..81bc293fb240 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -173,7 +173,7 @@ struct xe_device { /** @lock: protect the ref count */ struct mutex lock; /** @ref: ref count of memory accesses */ - u32 ref; + s32 ref; /** @hold_rpm: need to put rpm ref back at the end */ bool hold_rpm; } mem_access; -- cgit v1.2.3 From 9b6483af3709386fe0e544bfa8cc01f8a92e0d57 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 24 Jan 2023 18:28:03 +0100 Subject: drm/xe: Map initial FB at the same place in GGTT too I saw a flicker when booting xe, and it's very likely that the original FB was not mapped at the same place when inheriting, fix it. Signed-off-by: Maarten Lankhorst Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 7 ++++++- drivers/gpu/drm/xe/xe_ggtt.c | 16 +++++++++++++--- drivers/gpu/drm/xe/xe_ggtt.h | 1 + 3 files changed, 20 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index f07d1cd63fdd..1fcde1e93301 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1110,7 +1110,12 @@ xe_bo_create_locked_range(struct xe_device *xe, XE_BUG_ON(!gt); - err = xe_ggtt_insert_bo(gt->mem.ggtt, bo); + if (flags & XE_BO_CREATE_STOLEN_BIT && + flags & XE_BO_FIXED_PLACEMENT_BIT) { + err = xe_ggtt_insert_bo_at(gt->mem.ggtt, bo, start); + } else { + err = xe_ggtt_insert_bo(gt->mem.ggtt, bo); + } if (err) goto err_unlock_put_bo; } diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 0018c8441747..b1b9fc57a5db 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -256,7 +256,7 @@ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) xe_ggtt_invalidate(ggtt->gt); } -int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) +static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, u64 start, u64 end) { int err; @@ -271,12 +271,22 @@ int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) return err; mutex_lock(&ggtt->lock); - err = drm_mm_insert_node(&ggtt->mm, &bo->ggtt_node, bo->size); + err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node, bo->size, 0, 0, start, end, 0); if (!err) xe_ggtt_map_bo(ggtt, bo); mutex_unlock(&ggtt->lock); - return 0; + return err; +} + +int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, u64 ofs) +{ + return __xe_ggtt_insert_bo_at(ggtt, bo, ofs, ofs + bo->size); +} + +int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) +{ + return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX); } void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node) diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h index 289c6852ad1a..ab9cfdab5cca 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.h +++ b/drivers/gpu/drm/xe/xe_ggtt.h @@ -23,6 +23,7 @@ int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node); void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); +int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, u64 ofs); void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); #endif -- cgit v1.2.3 From f3edf6917ca8e4e11a6af39e926558d4609dd9ea Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 20 Jan 2023 11:48:53 +0000 Subject: drm/xe/bo: reduce xe_bo_create_pin_map() restrictions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On DGFX this blows up if can call this with a system memory object: XE_BUG_ON(!mem_type_is_vram(place->mem_type) && place->mem_type != XE_PL_STOLEN); If we consider dpt it looks like we can already in theory hit this, if we run out of vram and stolen vram. It at least seems reasonable to allow calling this on any object which supports CPU access. Note this also changes the behaviour with stolen VRAM and suspend, such that we no longer attempt to migrate stolen objects into system memory. However nothing in stolen should ever need to be restored (same on integrated), so should be fine. Also on small-bar systems the stolen portion is pretty much always non-CPU accessible, and currently pinned objects use plain memcpy when being moved, which doesn't play nicely. Signed-off-by: Matthew Auld Reviewed-by: Matthew Brost Cc: Thomas Hellström Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 1fcde1e93301..3c9d90dcf125 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1290,25 +1290,26 @@ int xe_bo_pin(struct xe_bo *bo) return err; /* - * For pinned objects in on DGFX, we expect these objects to be in - * contiguous VRAM memory. Required eviction / restore during suspend / - * resume (force restore to same physical address). + * For pinned objects in on DGFX, which are also in vram, we expect + * these to be in contiguous VRAM memory. Required eviction / restore + * during suspend / resume (force restore to same physical address). */ if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) && bo->flags & XE_BO_INTERNAL_TEST)) { struct ttm_place *place = &(bo->placements[0]); bool lmem; - XE_BUG_ON(!(place->flags & TTM_PL_FLAG_CONTIGUOUS)); - XE_BUG_ON(!mem_type_is_vram(place->mem_type) && place->mem_type != XE_PL_STOLEN); + if (mem_type_is_vram(place->mem_type)) { + XE_BUG_ON(!(place->flags & TTM_PL_FLAG_CONTIGUOUS)); - place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE, &lmem) - - vram_region_io_offset(bo)) >> PAGE_SHIFT; - place->lpfn = place->fpfn + (bo->size >> PAGE_SHIFT); + place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE, &lmem) - + vram_region_io_offset(bo)) >> PAGE_SHIFT; + place->lpfn = place->fpfn + (bo->size >> PAGE_SHIFT); - spin_lock(&xe->pinned.lock); - list_add_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present); - spin_unlock(&xe->pinned.lock); + spin_lock(&xe->pinned.lock); + list_add_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present); + spin_unlock(&xe->pinned.lock); + } } ttm_bo_pin(&bo->ttm); @@ -1364,11 +1365,15 @@ void xe_bo_unpin(struct xe_bo *bo) if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) && bo->flags & XE_BO_INTERNAL_TEST)) { - XE_BUG_ON(list_empty(&bo->pinned_link)); + struct ttm_place *place = &(bo->placements[0]); - spin_lock(&xe->pinned.lock); - list_del_init(&bo->pinned_link); - spin_unlock(&xe->pinned.lock); + if (mem_type_is_vram(place->mem_type)) { + XE_BUG_ON(list_empty(&bo->pinned_link)); + + spin_lock(&xe->pinned.lock); + list_del_init(&bo->pinned_link); + spin_unlock(&xe->pinned.lock); + } } ttm_bo_unpin(&bo->ttm); -- cgit v1.2.3 From e63f81adcc4283aed7d4fe5da1219881cc6f67d4 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 22 Dec 2022 14:09:02 +0000 Subject: drm/xe/ppgtt: clear the scratch page MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to ensure we don't leak the contents to userspace. Signed-off-by: Matthew Auld Reviewed-by: Matthew Brost Cc: Thomas Hellström Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pt.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 688bc4b56294..d7fb1ddb8789 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -333,14 +333,16 @@ int xe_pt_create_scratch(struct xe_device *xe, struct xe_gt *gt, u8 id = gt->info.id; int i; - vm->scratch_bo[id] = xe_bo_create(xe, gt, vm, SZ_4K, - ttm_bo_type_kernel, - XE_BO_CREATE_VRAM_IF_DGFX(gt) | - XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT | - XE_BO_CREATE_PINNED_BIT); + vm->scratch_bo[id] = xe_bo_create_pin_map(xe, gt, vm, SZ_4K, + ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(gt) | + XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT | + XE_BO_CREATE_PINNED_BIT); if (IS_ERR(vm->scratch_bo[id])) return PTR_ERR(vm->scratch_bo[id]); - xe_bo_pin(vm->scratch_bo[id]); + + xe_map_memset(vm->xe, &vm->scratch_bo[id]->vmap, 0, 0, + vm->scratch_bo[id]->size); for (i = 0; i < vm->pt_root[id]->level; i++) { vm->scratch_pt[id][i] = xe_pt_create(vm, gt, i); -- cgit v1.2.3 From b1e52b65712969a74f0ba9ffbf67dde98ce33c2f Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 19 Jan 2023 12:16:51 +0000 Subject: drm/xe/ppgtt: fix scratch page usage on DG2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On DG2 when running the xe_vm IGT, the kernel generates loads of CAT errors and GT resets (sometimes at least). On small-bar systems seems to trigger a lot more easily (maybe due to difference in allocation strategy). Appears to be related to scratch, since we seem to use the 64K TLB hint on scratch entries, even though the scratch page is a 4K vram page. Bumping the scratch page size and physical alignment seems to fix it. Or at least we no longer hit: [ 148.872683] xe 0000:03:00.0: [drm] Engine memory cat error: guc_id=0 [ 148.872701] xe 0000:03:00.0: [drm] Engine memory cat error: guc_id=0 [ 148.875108] WARNING: CPU: 0 PID: 953 at drivers/gpu/drm/xe/xe_guc_submit.c:797 However to keep things simple, so we don't have to deal with 64K TLB hints, just move the scratch page into system memory on platforms that require 64K VRAM pages. Signed-off-by: Matthew Auld Reviewed-by: Matthew Brost Cc: Thomas Hellström Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pt.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index d7fb1ddb8789..01673fe96930 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -175,8 +175,6 @@ static u64 __xe_pt_empty_pte(struct xe_gt *gt, struct xe_vm *vm, if (level == 0) { u64 empty = gen8_pte_encode(NULL, vm->scratch_bo[id], 0, XE_CACHE_WB, 0, 0); - if (vm->flags & XE_VM_FLAGS_64K) - empty |= GEN12_PTE_PS64; return empty; } else { @@ -331,13 +329,23 @@ int xe_pt_create_scratch(struct xe_device *xe, struct xe_gt *gt, struct xe_vm *vm) { u8 id = gt->info.id; + unsigned int flags; int i; + /* + * So we don't need to worry about 64K TLB hints when dealing with + * scratch entires, rather keep the scratch page in system memory on + * platforms where 64K pages are needed for VRAM. + */ + flags = XE_BO_CREATE_PINNED_BIT; + if (vm->flags & XE_VM_FLAGS_64K) + flags |= XE_BO_CREATE_SYSTEM_BIT; + else + flags |= XE_BO_CREATE_VRAM_IF_DGFX(gt); + vm->scratch_bo[id] = xe_bo_create_pin_map(xe, gt, vm, SZ_4K, ttm_bo_type_kernel, - XE_BO_CREATE_VRAM_IF_DGFX(gt) | - XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT | - XE_BO_CREATE_PINNED_BIT); + flags); if (IS_ERR(vm->scratch_bo[id])) return PTR_ERR(vm->scratch_bo[id]); -- cgit v1.2.3 From 5e53d1e806aeb2b05c85d24cd75f848631e8a121 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 26 Jan 2023 11:31:34 +0000 Subject: drm/xe/ggtt: fix alignment usage for DG2 Spec says we need to use 64K VRAM pages for GGTT on platforms like DG2. In GGTT this just means aligning the GTT address to 64K and ensuring that we have 16 consecutive entries each pointing to the respective 4K entry. We already ensure we have 64K pages underneath, so it's just a case of forcing the GTT alignment. Signed-off-by: Matthew Auld Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ggtt.c | 25 +++++++++++++++++++++---- drivers/gpu/drm/xe/xe_ggtt_types.h | 3 +++ 2 files changed, 24 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index b1b9fc57a5db..e9273b5d2a9f 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -108,6 +108,9 @@ int xe_ggtt_init_noalloc(struct xe_gt *gt, struct xe_ggtt *ggtt) ggtt->gsm = gt->mmio.regs + SZ_8M; ggtt->size = (gsm_size / 8) * (u64)GEN8_PAGE_SIZE; + if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) + ggtt->flags |= XE_GGTT_FLAGS_64K; + /* * 8B per entry, each points to a 4KB page. * @@ -256,7 +259,8 @@ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) xe_ggtt_invalidate(ggtt->gt); } -static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, u64 start, u64 end) +static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, + u64 start, u64 end, u64 alignment) { int err; @@ -271,7 +275,8 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, u64 st return err; mutex_lock(&ggtt->lock); - err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node, bo->size, 0, 0, start, end, 0); + err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node, bo->size, + alignment, 0, start, end, 0); if (!err) xe_ggtt_map_bo(ggtt, bo); mutex_unlock(&ggtt->lock); @@ -281,12 +286,24 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, u64 st int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, u64 ofs) { - return __xe_ggtt_insert_bo_at(ggtt, bo, ofs, ofs + bo->size); + if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K) { + if (XE_WARN_ON(!IS_ALIGNED(ofs, SZ_64K)) || + XE_WARN_ON(!IS_ALIGNED(bo->size, SZ_64K))) + return -EINVAL; + } + + return __xe_ggtt_insert_bo_at(ggtt, bo, ofs, ofs + bo->size, 0); } int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) { - return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX); + u64 alignment; + + alignment = GEN8_PAGE_SIZE; + if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K) + alignment = SZ_64K; + + return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX, alignment); } void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node) diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h index e04193001763..ea70aaef4b31 100644 --- a/drivers/gpu/drm/xe/xe_ggtt_types.h +++ b/drivers/gpu/drm/xe/xe_ggtt_types.h @@ -16,6 +16,9 @@ struct xe_ggtt { u64 size; +#define XE_GGTT_FLAGS_64K BIT(0) + unsigned int flags; + struct xe_bo *scratch; struct mutex lock; -- cgit v1.2.3 From c5151fa80060a869c0308067e758a271c217ff61 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 16 Jan 2023 10:46:21 +0000 Subject: drm/xe/ggtt: fix GGTT scratch usage for DG2 Scratch page is in VRAM, and therefore requires 64K GTT layout. In GGTT world this just means having 16 consecutive entries, with 64K GTT alignment for the GTT address of the first entry (also matching physical alignment). However to keep things simple just dump it into system memory, like we already do for ppGTT. While we are here, also give it known default value. Signed-off-by: Matthew Auld Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ggtt.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index e9273b5d2a9f..baa080cd1133 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -13,6 +13,7 @@ #include "xe_device.h" #include "xe_bo.h" #include "xe_gt.h" +#include "xe_map.h" #include "xe_mmio.h" #include "xe_wopcm.h" @@ -152,23 +153,30 @@ static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt) int xe_ggtt_init(struct xe_gt *gt, struct xe_ggtt *ggtt) { struct xe_device *xe = gt_to_xe(gt); + unsigned int flags; int err; - ggtt->scratch = xe_bo_create_locked(xe, gt, NULL, GEN8_PAGE_SIZE, - ttm_bo_type_kernel, - XE_BO_CREATE_VRAM_IF_DGFX(gt) | - XE_BO_CREATE_PINNED_BIT); + /* + * So we don't need to worry about 64K GGTT layout when dealing with + * scratch entires, rather keep the scratch page in system memory on + * platforms where 64K pages are needed for VRAM. + */ + flags = XE_BO_CREATE_PINNED_BIT; + if (ggtt->flags & XE_GGTT_FLAGS_64K) + flags |= XE_BO_CREATE_SYSTEM_BIT; + else + flags |= XE_BO_CREATE_VRAM_IF_DGFX(gt); + + ggtt->scratch = xe_bo_create_pin_map(xe, gt, NULL, GEN8_PAGE_SIZE, + ttm_bo_type_kernel, + flags); + if (IS_ERR(ggtt->scratch)) { err = PTR_ERR(ggtt->scratch); goto err; } - err = xe_bo_pin(ggtt->scratch); - xe_bo_unlock_no_vm(ggtt->scratch); - if (err) { - xe_bo_put(ggtt->scratch); - goto err; - } + xe_map_memset(xe, &ggtt->scratch->vmap, 0, 0, ggtt->scratch->size); xe_ggtt_initial_clear(ggtt); return 0; -- cgit v1.2.3 From e89b384cde622f6f553a740c73870327ee86fcc5 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 6 Jan 2023 11:34:57 -0800 Subject: drm/xe/migrate: Update emit_pte to cope with a size level than 4k MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit emit_pte assumes the size argument is 4k aligned, this may not be true for the PTEs emitted for CSS as seen by below call stack: [ 56.734228] xe_migrate_copy:585: size=327680, ccs_start=327680, css_size=1280,4096 [ 56.734250] xe_migrate_copy:643: size=262144 [ 56.734252] emit_pte:404: ptes=64 [ 56.734255] emit_pte:418: chunk=64 [ 56.734257] xe_migrate_copy:650: size=1024 @ CCS emit PTE [ 56.734259] emit_pte:404: ptes=1 [ 56.734261] emit_pte:418: chunk=1 [ 56.734339] xe_migrate_copy:643: size=65536 [ 56.734342] emit_pte:404: ptes=16 [ 56.734344] emit_pte:418: chunk=16 [ 56.734346] xe_migrate_copy:650: size=256 # CCS emit PTE [ 56.734348] emit_pte:404: ptes=1 [ 56.734350] emit_pte:418: chunk=1 [ 56.734352] xe_res_next:174: size=4096, remaining=0 Update emit_pte to handle sizes less than 4k. Signed-off-by: Matthew Brost Reviewed-by: Thomas Hellström Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_migrate.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index d3fa7bec78d3..377ab019b4c8 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -453,11 +453,8 @@ static void emit_pte(struct xe_migrate *m, while (chunk--) { u64 addr; - XE_BUG_ON(cur->start & (PAGE_SIZE - 1)); - + addr = xe_res_dma(cur) & PAGE_MASK; if (is_vram) { - addr = cur->start; - /* Is this a 64K PTE entry? */ if ((m->eng->vm->flags & XE_VM_FLAGS_64K) && !(cur_ofs & (16 * 8 - 1))) { @@ -466,14 +463,12 @@ static void emit_pte(struct xe_migrate *m, } addr |= GEN12_PPGTT_PTE_LM; - } else { - addr = xe_res_dma(cur); } addr |= PPAT_CACHED | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; bb->cs[bb->len++] = lower_32_bits(addr); bb->cs[bb->len++] = upper_32_bits(addr); - xe_res_next(cur, PAGE_SIZE); + xe_res_next(cur, min(size, (u32)PAGE_SIZE)); cur_ofs += 8; } } @@ -615,13 +610,13 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, bool copy_system_ccs = copy_ccs && (!src_is_vram || !dst_is_vram); if (!src_is_vram) - xe_res_first_sg(xe_bo_get_sg(bo), 0, bo->size, &src_it); + xe_res_first_sg(xe_bo_get_sg(bo), 0, size, &src_it); else - xe_res_first(src, 0, bo->size, &src_it); + xe_res_first(src, 0, size, &src_it); if (!dst_is_vram) - xe_res_first_sg(xe_bo_get_sg(bo), 0, bo->size, &dst_it); + xe_res_first_sg(xe_bo_get_sg(bo), 0, size, &dst_it); else - xe_res_first(dst, 0, bo->size, &dst_it); + xe_res_first(dst, 0, size, &dst_it); if (copy_system_ccs) xe_res_first_sg(xe_bo_get_sg(bo), xe_bo_ccs_pages_start(bo), -- cgit v1.2.3 From 5b643660875d01c203782a86ac5e3353849bc513 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 20 Jan 2023 09:17:50 -0800 Subject: drm/xe: Don't process TLB invalidation done in CT fast-path We can't currently do this due to TLB invalidation done handler expecting the seqno being received in-order, with the fast-path a TLB invalidation done could pass one being processed in the slow-path in an extreme corner case. Remove TLB invalidation done from the fast-path for now and in a follow up reenable this once the TLB invalidation done handler can deal with out of order seqno. Signed-off-by: Matthew Brost Reviewed-by: Niranjana Vishwanathapura Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_ct.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index f48eb01847ef..6e25c1d5d43e 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -966,7 +966,14 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path) return 0; switch (FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1])) { - case XE_GUC_ACTION_TLB_INVALIDATION_DONE: + /* + * FIXME: We really should process + * XE_GUC_ACTION_TLB_INVALIDATION_DONE here in the fast-path as + * these critical for page fault performance. We currently can't + * due to TLB invalidation done algorithm expecting the seqno + * returned in-order. With some small changes to the algorithm + * and locking we should be able to support out-of-order seqno. + */ case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC: break; /* Process these in fast-path */ default: -- cgit v1.2.3 From a9351846d94568d96e7400be343392c58e4f82e6 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 17 Jan 2023 20:31:24 -0800 Subject: drm/xe: Break of TLB invalidation into its own file TLB invalidation is used by more than USM (page faults) so break this code out into its own file. Signed-off-by: Matthew Brost Reviewed-by: Niranjana Vishwanathapura Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/xe_gt.c | 5 ++ drivers/gpu/drm/xe/xe_gt_debugfs.c | 1 + drivers/gpu/drm/xe/xe_gt_pagefault.c | 99 +----------------------- drivers/gpu/drm/xe/xe_gt_pagefault.h | 3 - drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 115 ++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h | 19 +++++ drivers/gpu/drm/xe/xe_guc_ct.c | 1 + drivers/gpu/drm/xe/xe_vm.c | 1 + 9 files changed, 146 insertions(+), 99 deletions(-) create mode 100644 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c create mode 100644 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index f8da32b550bc..998f7044b047 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -57,6 +57,7 @@ xe-y += xe_bb.o \ xe_gt_mcr.o \ xe_gt_pagefault.o \ xe_gt_sysfs.o \ + xe_gt_tlb_invalidation.o \ xe_gt_topology.o \ xe_guc.o \ xe_guc_ads.o \ diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 61a6430cb435..96136f130eda 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -19,6 +19,7 @@ #include "xe_gt_mcr.h" #include "xe_gt_pagefault.h" #include "xe_gt_sysfs.h" +#include "xe_gt_tlb_invalidation.h" #include "xe_gt_topology.h" #include "xe_hw_fence.h" #include "xe_irq.h" @@ -571,6 +572,10 @@ int xe_gt_init(struct xe_gt *gt) xe_hw_fence_irq_init(>->fence_irq[i]); } + err = xe_gt_tlb_invalidation_init(gt); + if (err) + return err; + err = xe_gt_pagefault_init(gt); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index cd1888784141..01303bbe073c 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -12,6 +12,7 @@ #include "xe_gt_debugfs.h" #include "xe_gt_mcr.h" #include "xe_gt_pagefault.h" +#include "xe_gt_tlb_invalidation.h" #include "xe_gt_topology.h" #include "xe_hw_engine.h" #include "xe_macros.h" diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 7125113b7390..93a8efe5d0a0 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -10,9 +10,10 @@ #include "xe_bo.h" #include "xe_gt.h" +#include "xe_gt_pagefault.h" +#include "xe_gt_tlb_invalidation.h" #include "xe_guc.h" #include "xe_guc_ct.h" -#include "xe_gt_pagefault.h" #include "xe_migrate.h" #include "xe_pt.h" #include "xe_trace.h" @@ -61,40 +62,6 @@ guc_to_gt(struct xe_guc *guc) return container_of(guc, struct xe_gt, uc.guc); } -static int send_tlb_invalidation(struct xe_guc *guc) -{ - struct xe_gt *gt = guc_to_gt(guc); - u32 action[] = { - XE_GUC_ACTION_TLB_INVALIDATION, - 0, - XE_GUC_TLB_INVAL_FULL << XE_GUC_TLB_INVAL_TYPE_SHIFT | - XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT | - XE_GUC_TLB_INVAL_FLUSH_CACHE, - }; - int seqno; - int ret; - - /* - * XXX: The seqno algorithm relies on TLB invalidation being processed - * in order which they currently are, if that changes the algorithm will - * need to be updated. - */ - mutex_lock(&guc->ct.lock); - seqno = gt->usm.tlb_invalidation_seqno; - action[1] = seqno; - gt->usm.tlb_invalidation_seqno = (gt->usm.tlb_invalidation_seqno + 1) % - TLB_INVALIDATION_SEQNO_MAX; - if (!gt->usm.tlb_invalidation_seqno) - gt->usm.tlb_invalidation_seqno = 1; - ret = xe_guc_ct_send_locked(&guc->ct, action, ARRAY_SIZE(action), - G2H_LEN_DW_TLB_INVALIDATE, 1); - if (!ret) - ret = seqno; - mutex_unlock(&guc->ct.lock); - - return ret; -} - static bool access_is_atomic(enum access_type access_type) { return access_type == ACCESS_TYPE_ATOMIC; @@ -278,7 +245,7 @@ unlock_vm: * defer TLB invalidate + fault response to a callback of fence * too */ - ret = send_tlb_invalidation(>->uc.guc); + ret = xe_gt_tlb_invalidation(gt); if (ret >= 0) ret = 0; } @@ -433,7 +400,6 @@ int xe_gt_pagefault_init(struct xe_gt *gt) if (!xe->info.supports_usm) return 0; - gt->usm.tlb_invalidation_seqno = 1; for (i = 0; i < NUM_PF_QUEUE; ++i) { gt->usm.pf_queue[i].gt = gt; spin_lock_init(>->usm.pf_queue[i].lock); @@ -482,65 +448,6 @@ void xe_gt_pagefault_reset(struct xe_gt *gt) } } -int xe_gt_tlb_invalidation(struct xe_gt *gt) -{ - return send_tlb_invalidation(>->uc.guc); -} - -static bool tlb_invalidation_seqno_past(struct xe_gt *gt, int seqno) -{ - if (gt->usm.tlb_invalidation_seqno_recv >= seqno) - return true; - - if (seqno - gt->usm.tlb_invalidation_seqno_recv > - (TLB_INVALIDATION_SEQNO_MAX / 2)) - return true; - - return false; -} - -int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno) -{ - struct xe_device *xe = gt_to_xe(gt); - struct xe_guc *guc = >->uc.guc; - int ret; - - /* - * XXX: See above, this algorithm only works if seqno are always in - * order - */ - ret = wait_event_timeout(guc->ct.wq, - tlb_invalidation_seqno_past(gt, seqno), - HZ / 5); - if (!ret) { - drm_err(&xe->drm, "TLB invalidation time'd out, seqno=%d, recv=%d\n", - seqno, gt->usm.tlb_invalidation_seqno_recv); - return -ETIME; - } - - return 0; -} - -int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len) -{ - struct xe_gt *gt = guc_to_gt(guc); - int expected_seqno; - - if (unlikely(len != 1)) - return -EPROTO; - - /* Sanity check on seqno */ - expected_seqno = (gt->usm.tlb_invalidation_seqno_recv + 1) % - TLB_INVALIDATION_SEQNO_MAX; - XE_WARN_ON(expected_seqno != msg[0]); - - gt->usm.tlb_invalidation_seqno_recv = msg[0]; - smp_wmb(); - wake_up_all(&guc->ct.wq); - - return 0; -} - static int granularity_in_byte(int val) { switch (val) { diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.h b/drivers/gpu/drm/xe/xe_gt_pagefault.h index 35f68027cc9c..839c065a5e4c 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.h +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.h @@ -13,10 +13,7 @@ struct xe_guc; int xe_gt_pagefault_init(struct xe_gt *gt); void xe_gt_pagefault_reset(struct xe_gt *gt); -int xe_gt_tlb_invalidation(struct xe_gt *gt); -int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno); int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len); -int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len); int xe_guc_access_counter_notify_handler(struct xe_guc *guc, u32 *msg, u32 len); #endif /* _XE_GT_PAGEFAULT_ */ diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c new file mode 100644 index 000000000000..fea7a557d213 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include "xe_gt.h" +#include "xe_gt_tlb_invalidation.h" +#include "xe_guc.h" +#include "xe_guc_ct.h" + +static struct xe_gt * +guc_to_gt(struct xe_guc *guc) +{ + return container_of(guc, struct xe_gt, uc.guc); +} + +int xe_gt_tlb_invalidation_init(struct xe_gt *gt) +{ + gt->usm.tlb_invalidation_seqno = 1; + + return 0; +} + +static int send_tlb_invalidation(struct xe_guc *guc) +{ + struct xe_gt *gt = guc_to_gt(guc); + u32 action[] = { + XE_GUC_ACTION_TLB_INVALIDATION, + 0, + XE_GUC_TLB_INVAL_FULL << XE_GUC_TLB_INVAL_TYPE_SHIFT | + XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT | + XE_GUC_TLB_INVAL_FLUSH_CACHE, + }; + int seqno; + int ret; + + /* + * XXX: The seqno algorithm relies on TLB invalidation being processed + * in order which they currently are, if that changes the algorithm will + * need to be updated. + */ + mutex_lock(&guc->ct.lock); + seqno = gt->usm.tlb_invalidation_seqno; + action[1] = seqno; + gt->usm.tlb_invalidation_seqno = (gt->usm.tlb_invalidation_seqno + 1) % + TLB_INVALIDATION_SEQNO_MAX; + if (!gt->usm.tlb_invalidation_seqno) + gt->usm.tlb_invalidation_seqno = 1; + ret = xe_guc_ct_send_locked(&guc->ct, action, ARRAY_SIZE(action), + G2H_LEN_DW_TLB_INVALIDATE, 1); + if (!ret) + ret = seqno; + mutex_unlock(&guc->ct.lock); + + return ret; +} + +int xe_gt_tlb_invalidation(struct xe_gt *gt) +{ + return send_tlb_invalidation(>->uc.guc); +} + +static bool tlb_invalidation_seqno_past(struct xe_gt *gt, int seqno) +{ + if (gt->usm.tlb_invalidation_seqno_recv >= seqno) + return true; + + if (seqno - gt->usm.tlb_invalidation_seqno_recv > + (TLB_INVALIDATION_SEQNO_MAX / 2)) + return true; + + return false; +} + +int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno) +{ + struct xe_device *xe = gt_to_xe(gt); + struct xe_guc *guc = >->uc.guc; + int ret; + + /* + * XXX: See above, this algorithm only works if seqno are always in + * order + */ + ret = wait_event_timeout(guc->ct.wq, + tlb_invalidation_seqno_past(gt, seqno), + HZ / 5); + if (!ret) { + drm_err(&xe->drm, "TLB invalidation time'd out, seqno=%d, recv=%d\n", + seqno, gt->usm.tlb_invalidation_seqno_recv); + return -ETIME; + } + + return 0; +} + +int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len) +{ + struct xe_gt *gt = guc_to_gt(guc); + int expected_seqno; + + if (unlikely(len != 1)) + return -EPROTO; + + /* Sanity check on seqno */ + expected_seqno = (gt->usm.tlb_invalidation_seqno_recv + 1) % + TLB_INVALIDATION_SEQNO_MAX; + XE_WARN_ON(expected_seqno != msg[0]); + + gt->usm.tlb_invalidation_seqno_recv = msg[0]; + smp_wmb(); + wake_up_all(&guc->ct.wq); + + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h new file mode 100644 index 000000000000..f1c3b34b1993 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GT_TLB_INVALIDATION_H_ +#define _XE_GT_TLB_INVALIDATION_H_ + +#include + +struct xe_gt; +struct xe_guc; + +int xe_gt_tlb_invalidation_init(struct xe_gt *gt); +int xe_gt_tlb_invalidation(struct xe_gt *gt); +int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno); +int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len); + +#endif /* _XE_GT_TLB_INVALIDATION_ */ diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 6e25c1d5d43e..84d4302d4e72 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -15,6 +15,7 @@ #include "xe_guc.h" #include "xe_guc_ct.h" #include "xe_gt_pagefault.h" +#include "xe_gt_tlb_invalidation.h" #include "xe_guc_submit.h" #include "xe_map.h" #include "xe_trace.h" diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index d47a8617c5b6..c548cd04f9cf 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -19,6 +19,7 @@ #include "xe_engine.h" #include "xe_gt.h" #include "xe_gt_pagefault.h" +#include "xe_gt_tlb_invalidation.h" #include "xe_migrate.h" #include "xe_pm.h" #include "xe_preempt_fence.h" -- cgit v1.2.3 From 62ad062150c2ab72b0881c2f24f710e4c0bc4cd7 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 17 Jan 2023 20:49:38 -0800 Subject: drm/xe: Move TLB invalidation variable to own sub-structure in GT TLB invalidations no longer just restricted to USM, move the variables to own sub-structure. Signed-off-by: Matthew Brost Reviewed-by: Niranjana Vishwanathapura Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_debugfs.c | 6 ++++-- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 20 ++++++++++---------- drivers/gpu/drm/xe/xe_gt_types.h | 22 +++++++++++----------- 3 files changed, 25 insertions(+), 23 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index 01303bbe073c..ea308b123474 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -11,13 +11,15 @@ #include "xe_gt.h" #include "xe_gt_debugfs.h" #include "xe_gt_mcr.h" -#include "xe_gt_pagefault.h" -#include "xe_gt_tlb_invalidation.h" #include "xe_gt_topology.h" #include "xe_hw_engine.h" #include "xe_macros.h" #include "xe_uc_debugfs.h" +#ifdef CONFIG_DRM_XE_DEBUG +#include "xe_gt_tlb_invalidation.h" +#endif + static struct xe_gt *node_to_gt(struct drm_info_node *node) { return node->info_ent->data; diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index fea7a557d213..a39a2fb163ae 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -16,7 +16,7 @@ guc_to_gt(struct xe_guc *guc) int xe_gt_tlb_invalidation_init(struct xe_gt *gt) { - gt->usm.tlb_invalidation_seqno = 1; + gt->tlb_invalidation.seqno = 1; return 0; } @@ -40,12 +40,12 @@ static int send_tlb_invalidation(struct xe_guc *guc) * need to be updated. */ mutex_lock(&guc->ct.lock); - seqno = gt->usm.tlb_invalidation_seqno; + seqno = gt->tlb_invalidation.seqno; action[1] = seqno; - gt->usm.tlb_invalidation_seqno = (gt->usm.tlb_invalidation_seqno + 1) % + gt->tlb_invalidation.seqno = (gt->tlb_invalidation.seqno + 1) % TLB_INVALIDATION_SEQNO_MAX; - if (!gt->usm.tlb_invalidation_seqno) - gt->usm.tlb_invalidation_seqno = 1; + if (!gt->tlb_invalidation.seqno) + gt->tlb_invalidation.seqno = 1; ret = xe_guc_ct_send_locked(&guc->ct, action, ARRAY_SIZE(action), G2H_LEN_DW_TLB_INVALIDATE, 1); if (!ret) @@ -62,10 +62,10 @@ int xe_gt_tlb_invalidation(struct xe_gt *gt) static bool tlb_invalidation_seqno_past(struct xe_gt *gt, int seqno) { - if (gt->usm.tlb_invalidation_seqno_recv >= seqno) + if (gt->tlb_invalidation.seqno_recv >= seqno) return true; - if (seqno - gt->usm.tlb_invalidation_seqno_recv > + if (seqno - gt->tlb_invalidation.seqno_recv > (TLB_INVALIDATION_SEQNO_MAX / 2)) return true; @@ -87,7 +87,7 @@ int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno) HZ / 5); if (!ret) { drm_err(&xe->drm, "TLB invalidation time'd out, seqno=%d, recv=%d\n", - seqno, gt->usm.tlb_invalidation_seqno_recv); + seqno, gt->tlb_invalidation.seqno_recv); return -ETIME; } @@ -103,11 +103,11 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len) return -EPROTO; /* Sanity check on seqno */ - expected_seqno = (gt->usm.tlb_invalidation_seqno_recv + 1) % + expected_seqno = (gt->tlb_invalidation.seqno_recv + 1) % TLB_INVALIDATION_SEQNO_MAX; XE_WARN_ON(expected_seqno != msg[0]); - gt->usm.tlb_invalidation_seqno_recv = msg[0]; + gt->tlb_invalidation.seqno_recv = msg[0]; smp_wmb(); wake_up_all(&guc->ct.wq); diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 2dbc8cedd630..3bfce7abe857 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -160,6 +160,17 @@ struct xe_gt { struct work_struct worker; } reset; + /** @tlb_invalidation: TLB invalidation state */ + struct { + /** @seqno: TLB invalidation seqno, protected by CT lock */ +#define TLB_INVALIDATION_SEQNO_MAX 0x100000 + int seqno; + /** + * @seqno_recv: last received TLB invalidation seqno, protected by CT lock + */ + int seqno_recv; + } tlb_invalidation; + /** @usm: unified shared memory state */ struct { /** @@ -175,17 +186,6 @@ struct xe_gt { * operations (e.g. mmigrations, fixing page tables) */ u16 reserved_bcs_instance; - /** - * @tlb_invalidation_seqno: TLB invalidation seqno, protected by - * CT lock - */ -#define TLB_INVALIDATION_SEQNO_MAX 0x100000 - int tlb_invalidation_seqno; - /** - * @tlb_invalidation_seqno_recv: last received TLB invalidation - * seqno, protected by CT lock - */ - int tlb_invalidation_seqno_recv; /** @pf_wq: page fault work queue, unbound, high priority */ struct workqueue_struct *pf_wq; /** @acc_wq: access counter work queue, unbound, high priority */ -- cgit v1.2.3 From fc108a8b759f52b879e9a39642ee7988d251e453 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 17 Jan 2023 21:11:43 -0800 Subject: drm/xe: Add TLB invalidation fence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fence will be signaled when TLB invalidation completion. Signed-off-by: Matthew Brost Suggested-by: Thomas Hellström Reviewed-by: Niranjana Vishwanathapura Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt.c | 1 + drivers/gpu/drm/xe/xe_gt_debugfs.c | 2 +- drivers/gpu/drm/xe/xe_gt_pagefault.c | 2 +- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 43 +++++++++++++++++++++-- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h | 6 +++- drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h | 26 ++++++++++++++ drivers/gpu/drm/xe/xe_gt_types.h | 5 +++ drivers/gpu/drm/xe/xe_vm.c | 2 +- 8 files changed, 80 insertions(+), 7 deletions(-) create mode 100644 drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 96136f130eda..28bbb3159531 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -669,6 +669,7 @@ static int gt_reset(struct xe_gt *gt) xe_uc_stop_prepare(>->uc); xe_gt_pagefault_reset(gt); + xe_gt_tlb_invalidation_reset(gt); err = xe_uc_stop(>->uc); if (err) diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index ea308b123474..946398f08bb5 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -99,7 +99,7 @@ static int invalidate_tlb(struct seq_file *m, void *data) int seqno; int ret = 0; - seqno = xe_gt_tlb_invalidation(gt); + seqno = xe_gt_tlb_invalidation(gt, NULL); XE_WARN_ON(seqno < 0); if (seqno > 0) ret = xe_gt_tlb_invalidation_wait(gt, seqno); diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 93a8efe5d0a0..705093cb63d7 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -245,7 +245,7 @@ unlock_vm: * defer TLB invalidate + fault response to a callback of fence * too */ - ret = xe_gt_tlb_invalidation(gt); + ret = xe_gt_tlb_invalidation(gt, NULL); if (ret >= 0) ret = 0; } diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index a39a2fb163ae..0058a155eeb9 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -17,11 +17,27 @@ guc_to_gt(struct xe_guc *guc) int xe_gt_tlb_invalidation_init(struct xe_gt *gt) { gt->tlb_invalidation.seqno = 1; + INIT_LIST_HEAD(>->tlb_invalidation.pending_fences); return 0; } -static int send_tlb_invalidation(struct xe_guc *guc) +void xe_gt_tlb_invalidation_reset(struct xe_gt *gt) +{ + struct xe_gt_tlb_invalidation_fence *fence, *next; + + mutex_lock(>->uc.guc.ct.lock); + list_for_each_entry_safe(fence, next, + >->tlb_invalidation.pending_fences, link) { + list_del(&fence->link); + dma_fence_signal(&fence->base); + dma_fence_put(&fence->base); + } + mutex_unlock(>->uc.guc.ct.lock); +} + +static int send_tlb_invalidation(struct xe_guc *guc, + struct xe_gt_tlb_invalidation_fence *fence) { struct xe_gt *gt = guc_to_gt(guc); u32 action[] = { @@ -41,6 +57,15 @@ static int send_tlb_invalidation(struct xe_guc *guc) */ mutex_lock(&guc->ct.lock); seqno = gt->tlb_invalidation.seqno; + if (fence) { + /* + * FIXME: How to deal TLB invalidation timeout, right now we + * just have an endless fence which isn't ideal. + */ + fence->seqno = seqno; + list_add_tail(&fence->link, + >->tlb_invalidation.pending_fences); + } action[1] = seqno; gt->tlb_invalidation.seqno = (gt->tlb_invalidation.seqno + 1) % TLB_INVALIDATION_SEQNO_MAX; @@ -55,9 +80,10 @@ static int send_tlb_invalidation(struct xe_guc *guc) return ret; } -int xe_gt_tlb_invalidation(struct xe_gt *gt) +int xe_gt_tlb_invalidation(struct xe_gt *gt, + struct xe_gt_tlb_invalidation_fence *fence) { - return send_tlb_invalidation(>->uc.guc); + return send_tlb_invalidation(>->uc.guc, fence); } static bool tlb_invalidation_seqno_past(struct xe_gt *gt, int seqno) @@ -97,8 +123,11 @@ int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno) int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len) { struct xe_gt *gt = guc_to_gt(guc); + struct xe_gt_tlb_invalidation_fence *fence; int expected_seqno; + lockdep_assert_held(&guc->ct.lock); + if (unlikely(len != 1)) return -EPROTO; @@ -111,5 +140,13 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len) smp_wmb(); wake_up_all(&guc->ct.wq); + fence = list_first_entry_or_null(>->tlb_invalidation.pending_fences, + typeof(*fence), link); + if (fence && tlb_invalidation_seqno_past(gt, fence->seqno)) { + list_del(&fence->link); + dma_fence_signal(&fence->base); + dma_fence_put(&fence->base); + } + return 0; } diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h index f1c3b34b1993..7e6fbf46f0e3 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h @@ -8,11 +8,15 @@ #include +#include "xe_gt_tlb_invalidation_types.h" + struct xe_gt; struct xe_guc; int xe_gt_tlb_invalidation_init(struct xe_gt *gt); -int xe_gt_tlb_invalidation(struct xe_gt *gt); +void xe_gt_tlb_invalidation_reset(struct xe_gt *gt); +int xe_gt_tlb_invalidation(struct xe_gt *gt, + struct xe_gt_tlb_invalidation_fence *fence); int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno); int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len); diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h new file mode 100644 index 000000000000..ab57c14c6d14 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GT_TLB_INVALIDATION_TYPES_H_ +#define _XE_GT_TLB_INVALIDATION_TYPES_H_ + +#include + +/** + * struct xe_gt_tlb_invalidation_fence - XE GT TLB invalidation fence + * + * Optionally passed to xe_gt_tlb_invalidation and will be signaled upon TLB + * invalidation completion. + */ +struct xe_gt_tlb_invalidation_fence { + /** @base: dma fence base */ + struct dma_fence base; + /** @link: link into list of pending tlb fences */ + struct list_head link; + /** @seqno: seqno of TLB invalidation to signal fence one */ + int seqno; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 3bfce7abe857..a755e3a86552 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -169,6 +169,11 @@ struct xe_gt { * @seqno_recv: last received TLB invalidation seqno, protected by CT lock */ int seqno_recv; + /** + * @pending_fences: list of pending fences waiting TLB + * invaliations, protected by CT lock + */ + struct list_head pending_fences; } tlb_invalidation; /** @usm: unified shared memory state */ diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index c548cd04f9cf..aae9acc7759a 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3345,7 +3345,7 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) if (xe_pt_zap_ptes(gt, vma)) { gt_needs_invalidate |= BIT(id); xe_device_wmb(xe); - seqno[id] = xe_gt_tlb_invalidation(gt); + seqno[id] = xe_gt_tlb_invalidation(gt, NULL); if (seqno[id] < 0) return seqno[id]; } -- cgit v1.2.3 From f4a8add94f2f28bd215b07b72abcbd2fd17d2012 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 18 Jan 2023 14:43:56 -0800 Subject: drm/xe: Invalidate TLB after unbind is complete MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This gets tricky as we can't do the TLB invalidation until the unbind operation is done on the hardware and we can't signal the unbind as complete until the TLB invalidation is done. To work around this we create an unbind fence which does a TLB invalidation after unbind is done on the hardware, signals on TLB invalidation completion, and this fence is installed in the BO dma-resv slot and installed in out-syncs for the unbind operation. Signed-off-by: Matthew Brost Suggested-by: Niranjana Vishwanathapura Reviewed-by: Niranjana Vishwanathapura Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 2 + drivers/gpu/drm/xe/xe_gt_types.h | 9 +++ drivers/gpu/drm/xe/xe_pt.c | 96 +++++++++++++++++++++++++++++ 3 files changed, 107 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 0058a155eeb9..23094d364583 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -18,6 +18,8 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt) { gt->tlb_invalidation.seqno = 1; INIT_LIST_HEAD(>->tlb_invalidation.pending_fences); + spin_lock_init(>->tlb_invalidation.lock); + gt->tlb_invalidation.fence_context = dma_fence_context_alloc(1); return 0; } diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index a755e3a86552..3b2d9842add7 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -174,6 +174,15 @@ struct xe_gt { * invaliations, protected by CT lock */ struct list_head pending_fences; + /** @fence_context: context for TLB invalidation fences */ + u64 fence_context; + /** + * @fence_seqno: seqno to TLB invalidation fences, protected by + * tlb_invalidation.lock + */ + u32 fence_seqno; + /** @lock: protects TLB invalidation fences */ + spinlock_t lock; } tlb_invalidation; /** @usm: unified shared memory state */ diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 01673fe96930..65a6f54b22a9 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -6,6 +6,7 @@ #include "xe_bo.h" #include "xe_device.h" #include "xe_gt.h" +#include "xe_gt_tlb_invalidation.h" #include "xe_migrate.h" #include "xe_pt.h" #include "xe_pt_types.h" @@ -1461,6 +1462,83 @@ static const struct xe_migrate_pt_update_ops userptr_unbind_ops = { .pre_commit = xe_pt_userptr_pre_commit, }; +struct invalidation_fence { + struct xe_gt_tlb_invalidation_fence base; + struct xe_gt *gt; + struct dma_fence *fence; + struct dma_fence_cb cb; + struct work_struct work; +}; + +static const char * +invalidation_fence_get_driver_name(struct dma_fence *dma_fence) +{ + return "xe"; +} + +static const char * +invalidation_fence_get_timeline_name(struct dma_fence *dma_fence) +{ + return "invalidation_fence"; +} + +static const struct dma_fence_ops invalidation_fence_ops = { + .get_driver_name = invalidation_fence_get_driver_name, + .get_timeline_name = invalidation_fence_get_timeline_name, +}; + +static void invalidation_fence_cb(struct dma_fence *fence, + struct dma_fence_cb *cb) +{ + struct invalidation_fence *ifence = + container_of(cb, struct invalidation_fence, cb); + + queue_work(system_wq, &ifence->work); + dma_fence_put(ifence->fence); +} + +static void invalidation_fence_work_func(struct work_struct *w) +{ + struct invalidation_fence *ifence = + container_of(w, struct invalidation_fence, work); + + xe_gt_tlb_invalidation(ifence->gt, &ifence->base); +} + +static int invalidation_fence_init(struct xe_gt *gt, + struct invalidation_fence *ifence, + struct dma_fence *fence) +{ + int ret; + + spin_lock_irq(>->tlb_invalidation.lock); + dma_fence_init(&ifence->base.base, &invalidation_fence_ops, + >->tlb_invalidation.lock, + gt->tlb_invalidation.fence_context, + ++gt->tlb_invalidation.fence_seqno); + spin_unlock_irq(>->tlb_invalidation.lock); + + INIT_LIST_HEAD(&ifence->base.link); + + dma_fence_get(&ifence->base.base); /* Ref for caller */ + ifence->fence = fence; + ifence->gt = gt; + + INIT_WORK(&ifence->work, invalidation_fence_work_func); + ret = dma_fence_add_callback(fence, &ifence->cb, invalidation_fence_cb); + if (ret == -ENOENT) { + dma_fence_put(ifence->fence); /* Usually dropped in CB */ + invalidation_fence_work_func(&ifence->work); + } else if (ret) { + dma_fence_put(&ifence->base.base); /* Caller ref */ + dma_fence_put(&ifence->base.base); /* Creation ref */ + } + + XE_WARN_ON(ret && ret != -ENOENT); + + return ret && ret != -ENOENT ? ret : 0; +} + /** * __xe_pt_unbind_vma() - Disconnect and free a page-table tree for the vma * address range. @@ -1496,6 +1574,7 @@ __xe_pt_unbind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e, struct xe_vm *vm = vma->vm; u32 num_entries; struct dma_fence *fence = NULL; + struct invalidation_fence *ifence; LLIST_HEAD(deferred); xe_bo_assert_held(vma->bo); @@ -1511,6 +1590,10 @@ __xe_pt_unbind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e, xe_vm_dbg_print_entries(gt_to_xe(gt), entries, num_entries); + ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); + if (!ifence) + return ERR_PTR(-ENOMEM); + /* * Even if we were already evicted and unbind to destroy, we need to * clear again here. The eviction may have updated pagetables at a @@ -1523,6 +1606,17 @@ __xe_pt_unbind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e, syncs, num_syncs, &unbind_pt_update.base); if (!IS_ERR(fence)) { + int err; + + /* TLB invalidation must be done before signaling unbind */ + err = invalidation_fence_init(gt, ifence, fence); + if (err) { + dma_fence_put(fence); + kfree(ifence); + return ERR_PTR(err); + } + fence = &ifence->base.base; + /* add shared fence now for pagetable delayed destroy */ dma_resv_add_fence(&vm->resv, fence, DMA_RESV_USAGE_BOOKKEEP); @@ -1534,6 +1628,8 @@ __xe_pt_unbind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e, xe_pt_commit_unbind(vma, entries, num_entries, unbind_pt_update.locked ? &deferred : NULL); vma->gt_present &= ~BIT(gt->info.id); + } else { + kfree(ifence); } if (!vma->gt_present) -- cgit v1.2.3 From c6b0948ff8d0842b55f05b794590ffc0a44c0656 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 20 Jan 2023 09:38:03 -0800 Subject: drm/xe: Kernel doc GT TLB invalidations Document all exported functions. Signed-off-by: Matthew Brost Reviewed-by: Niranjana Vishwanathapura Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 52 ++++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 23094d364583..1cb4d3a6bc57 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -14,6 +14,15 @@ guc_to_gt(struct xe_guc *guc) return container_of(guc, struct xe_gt, uc.guc); } +/** + * xe_gt_tlb_invalidation_init - Initialize GT TLB invalidation state + * @gt: graphics tile + * + * Initialize GT TLB invalidation state, purely software initialization, should + * be called once during driver load. + * + * Return: 0 on success, negative error code on error. + */ int xe_gt_tlb_invalidation_init(struct xe_gt *gt) { gt->tlb_invalidation.seqno = 1; @@ -24,7 +33,13 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt) return 0; } -void xe_gt_tlb_invalidation_reset(struct xe_gt *gt) +/** + * xe_gt_tlb_invalidation_reset - Initialize GT TLB invalidation reset + * @gt: graphics tile + * + * Signal any pending invalidation fences, should be called during a GT reset + */ + void xe_gt_tlb_invalidation_reset(struct xe_gt *gt) { struct xe_gt_tlb_invalidation_fence *fence, *next; @@ -82,6 +97,19 @@ static int send_tlb_invalidation(struct xe_guc *guc, return ret; } +/** + * xe_gt_tlb_invalidation - Issue a TLB invalidation on this GT + * @gt: graphics tile + * @fence: invalidation fence which will be signal on TLB invalidation + * completion, can be NULL + * + * Issue a full TLB invalidation on the GT. Completion of TLB is asynchronous + * and caller can either use the invalidation fence or seqno + + * xe_gt_tlb_invalidation_wait to wait for completion. + * + * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success, + * negative error code on error. + */ int xe_gt_tlb_invalidation(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence) { @@ -100,6 +128,16 @@ static bool tlb_invalidation_seqno_past(struct xe_gt *gt, int seqno) return false; } +/** + * xe_gt_tlb_invalidation_wait - Wait for TLB to complete + * @gt: graphics tile + * @seqno: seqno to wait which was returned from xe_gt_tlb_invalidation + * + * Wait for 200ms for a TLB invalidation to complete, in practice we always + * should receive the TLB invalidation within 200ms. + * + * Return: 0 on success, -ETIME on TLB invalidation timeout + */ int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno) { struct xe_device *xe = gt_to_xe(gt); @@ -122,6 +160,18 @@ int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno) return 0; } +/** + * xe_guc_tlb_invalidation_done_handler - TLB invalidation done handler + * @guc: guc + * @msg: message indicating TLB invalidation done + * @len: length of message + * + * Parse seqno of TLB invalidation, wake any waiters for seqno, and signal any + * invalidation fences for seqno. Algorithm for this depends on seqno being + * received in-order and asserts this assumption. + * + * Return: 0 on success, -EPROTO for malformed messages. + */ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len) { struct xe_gt *gt = guc_to_gt(guc); -- cgit v1.2.3 From 24b52db6ae00d8e8c4a7af5622890b70d4de51b9 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 19 Jan 2023 19:21:35 -0800 Subject: drm/xe: Add TLB invalidation fence ftrace This will help debug issues with TLB invalidation fences. Signed-off-by: Matthew Brost Reviewed-by: Niranjana Vishwanathapura Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 5 +++ drivers/gpu/drm/xe/xe_pt.c | 5 +++ drivers/gpu/drm/xe/xe_trace.h | 50 +++++++++++++++++++++++++++++ 3 files changed, 60 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 1cb4d3a6bc57..4d179357ce65 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -7,6 +7,7 @@ #include "xe_gt_tlb_invalidation.h" #include "xe_guc.h" #include "xe_guc_ct.h" +#include "xe_trace.h" static struct xe_gt * guc_to_gt(struct xe_guc *guc) @@ -82,6 +83,7 @@ static int send_tlb_invalidation(struct xe_guc *guc, fence->seqno = seqno; list_add_tail(&fence->link, >->tlb_invalidation.pending_fences); + trace_xe_gt_tlb_invalidation_fence_send(fence); } action[1] = seqno; gt->tlb_invalidation.seqno = (gt->tlb_invalidation.seqno + 1) % @@ -194,7 +196,10 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len) fence = list_first_entry_or_null(>->tlb_invalidation.pending_fences, typeof(*fence), link); + if (fence) + trace_xe_gt_tlb_invalidation_fence_recv(fence); if (fence && tlb_invalidation_seqno_past(gt, fence->seqno)) { + trace_xe_gt_tlb_invalidation_fence_signal(fence); list_del(&fence->link); dma_fence_signal(&fence->base); dma_fence_put(&fence->base); diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 65a6f54b22a9..b220d1d5cfe3 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -13,6 +13,7 @@ #include "xe_pt_walk.h" #include "xe_vm.h" #include "xe_res_cursor.h" +#include "xe_trace.h" #include "xe_ttm_stolen_mgr.h" struct xe_pt_dir { @@ -1493,6 +1494,7 @@ static void invalidation_fence_cb(struct dma_fence *fence, struct invalidation_fence *ifence = container_of(cb, struct invalidation_fence, cb); + trace_xe_gt_tlb_invalidation_fence_cb(&ifence->base); queue_work(system_wq, &ifence->work); dma_fence_put(ifence->fence); } @@ -1502,6 +1504,7 @@ static void invalidation_fence_work_func(struct work_struct *w) struct invalidation_fence *ifence = container_of(w, struct invalidation_fence, work); + trace_xe_gt_tlb_invalidation_fence_work_func(&ifence->base); xe_gt_tlb_invalidation(ifence->gt, &ifence->base); } @@ -1511,6 +1514,8 @@ static int invalidation_fence_init(struct xe_gt *gt, { int ret; + trace_xe_gt_tlb_invalidation_fence_create(&ifence->base); + spin_lock_irq(>->tlb_invalidation.lock); dma_fence_init(&ifence->base.base, &invalidation_fence_ops, >->tlb_invalidation.lock, diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index d9f921d46b53..b5b0f1bff7ec 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -16,10 +16,60 @@ #include "xe_engine_types.h" #include "xe_gpu_scheduler_types.h" #include "xe_gt_types.h" +#include "xe_gt_tlb_invalidation_types.h" #include "xe_guc_engine_types.h" #include "xe_sched_job.h" #include "xe_vm_types.h" +DECLARE_EVENT_CLASS(xe_gt_tlb_invalidation_fence, + TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence), + TP_ARGS(fence), + + TP_STRUCT__entry( + __field(u64, fence) + __field(int, seqno) + ), + + TP_fast_assign( + __entry->fence = (u64)fence; + __entry->seqno = fence->seqno; + ), + + TP_printk("fence=0x%016llx, seqno=%d", + __entry->fence, __entry->seqno) +); + +DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_create, + TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence), + TP_ARGS(fence) +); + +DEFINE_EVENT(xe_gt_tlb_invalidation_fence, + xe_gt_tlb_invalidation_fence_work_func, + TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence), + TP_ARGS(fence) +); + +DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_cb, + TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence), + TP_ARGS(fence) +); + +DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_send, + TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence), + TP_ARGS(fence) +); + +DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_recv, + TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence), + TP_ARGS(fence) +); + +DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_signal, + TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence), + TP_ARGS(fence) +); + DECLARE_EVENT_CLASS(xe_bo, TP_PROTO(struct xe_bo *bo), TP_ARGS(bo), -- cgit v1.2.3 From 38224c00d9c284030d60be83571e5f1bd5fc79c6 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 24 Jan 2023 10:35:59 -0800 Subject: drm/xe: Add TDR for invalidation fence timeout cleanup Endless fences are not good, add a TDR to cleanup any invalidation fences which have not received an invalidation message within a timeout period. Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi Reviewed-by: Niranjana Vishwanathapura --- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 58 +++++++++++++++++++++-- drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h | 2 + drivers/gpu/drm/xe/xe_gt_types.h | 5 ++ drivers/gpu/drm/xe/xe_trace.h | 5 ++ 4 files changed, 65 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 4d179357ce65..9e026fd0a45d 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -9,12 +9,45 @@ #include "xe_guc_ct.h" #include "xe_trace.h" +#define TLB_TIMEOUT (HZ / 4) + static struct xe_gt * guc_to_gt(struct xe_guc *guc) { return container_of(guc, struct xe_gt, uc.guc); } +static void xe_gt_tlb_fence_timeout(struct work_struct *work) +{ + struct xe_gt *gt = container_of(work, struct xe_gt, + tlb_invalidation.fence_tdr.work); + struct xe_gt_tlb_invalidation_fence *fence, *next; + + mutex_lock(>->uc.guc.ct.lock); + list_for_each_entry_safe(fence, next, + >->tlb_invalidation.pending_fences, link) { + s64 since_inval_ms = ktime_ms_delta(ktime_get(), + fence->invalidation_time); + + if (msecs_to_jiffies(since_inval_ms) < TLB_TIMEOUT) + break; + + trace_xe_gt_tlb_invalidation_fence_timeout(fence); + drm_err(>_to_xe(gt)->drm, "TLB invalidation fence timeout, seqno=%d", + fence->seqno); + + list_del(&fence->link); + fence->base.error = -ETIME; + dma_fence_signal(&fence->base); + dma_fence_put(&fence->base); + } + if (!list_empty(>->tlb_invalidation.pending_fences)) + queue_delayed_work(system_wq, + >->tlb_invalidation.fence_tdr, + TLB_TIMEOUT); + mutex_unlock(>->uc.guc.ct.lock); +} + /** * xe_gt_tlb_invalidation_init - Initialize GT TLB invalidation state * @gt: graphics tile @@ -30,6 +63,8 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt) INIT_LIST_HEAD(>->tlb_invalidation.pending_fences); spin_lock_init(>->tlb_invalidation.lock); gt->tlb_invalidation.fence_context = dma_fence_context_alloc(1); + INIT_DELAYED_WORK(>->tlb_invalidation.fence_tdr, + xe_gt_tlb_fence_timeout); return 0; } @@ -44,6 +79,8 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt) { struct xe_gt_tlb_invalidation_fence *fence, *next; + cancel_delayed_work(>->tlb_invalidation.fence_tdr); + mutex_lock(>->uc.guc.ct.lock); list_for_each_entry_safe(fence, next, >->tlb_invalidation.pending_fences, link) { @@ -67,6 +104,7 @@ static int send_tlb_invalidation(struct xe_guc *guc, }; int seqno; int ret; + bool queue_work; /* * XXX: The seqno algorithm relies on TLB invalidation being processed @@ -76,10 +114,7 @@ static int send_tlb_invalidation(struct xe_guc *guc, mutex_lock(&guc->ct.lock); seqno = gt->tlb_invalidation.seqno; if (fence) { - /* - * FIXME: How to deal TLB invalidation timeout, right now we - * just have an endless fence which isn't ideal. - */ + queue_work = list_empty(>->tlb_invalidation.pending_fences); fence->seqno = seqno; list_add_tail(&fence->link, >->tlb_invalidation.pending_fences); @@ -92,6 +127,13 @@ static int send_tlb_invalidation(struct xe_guc *guc, gt->tlb_invalidation.seqno = 1; ret = xe_guc_ct_send_locked(&guc->ct, action, ARRAY_SIZE(action), G2H_LEN_DW_TLB_INVALIDATE, 1); + if (!ret && fence) { + fence->invalidation_time = ktime_get(); + if (queue_work) + queue_delayed_work(system_wq, + >->tlb_invalidation.fence_tdr, + TLB_TIMEOUT); + } if (!ret) ret = seqno; mutex_unlock(&guc->ct.lock); @@ -152,7 +194,7 @@ int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno) */ ret = wait_event_timeout(guc->ct.wq, tlb_invalidation_seqno_past(gt, seqno), - HZ / 5); + TLB_TIMEOUT); if (!ret) { drm_err(&xe->drm, "TLB invalidation time'd out, seqno=%d, recv=%d\n", seqno, gt->tlb_invalidation.seqno_recv); @@ -201,6 +243,12 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len) if (fence && tlb_invalidation_seqno_past(gt, fence->seqno)) { trace_xe_gt_tlb_invalidation_fence_signal(fence); list_del(&fence->link); + if (!list_empty(>->tlb_invalidation.pending_fences)) + mod_delayed_work(system_wq, + >->tlb_invalidation.fence_tdr, + TLB_TIMEOUT); + else + cancel_delayed_work(>->tlb_invalidation.fence_tdr); dma_fence_signal(&fence->base); dma_fence_put(&fence->base); } diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h index ab57c14c6d14..934c828efe31 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h @@ -21,6 +21,8 @@ struct xe_gt_tlb_invalidation_fence { struct list_head link; /** @seqno: seqno of TLB invalidation to signal fence one */ int seqno; + /** @invalidation_time: time of TLB invalidation */ + ktime_t invalidation_time; }; #endif diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 3b2d9842add7..a40fab262ac9 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -174,6 +174,11 @@ struct xe_gt { * invaliations, protected by CT lock */ struct list_head pending_fences; + /** + * @fence_tdr: schedules a delayed call to + * xe_gt_tlb_fence_timeout after the timeut interval is over. + */ + struct delayed_work fence_tdr; /** @fence_context: context for TLB invalidation fences */ u64 fence_context; /** diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index b5b0f1bff7ec..d1cd4b57a974 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -70,6 +70,11 @@ DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_signal, TP_ARGS(fence) ); +DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_timeout, + TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence), + TP_ARGS(fence) +); + DECLARE_EVENT_CLASS(xe_bo, TP_PROTO(struct xe_bo *bo), TP_ARGS(bo), -- cgit v1.2.3 From a12d9216740c23dc7f526db108b4a82f1e0807e2 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 24 Jan 2023 16:14:55 -0800 Subject: drm/xe: Only set VM->asid for platforms that support a ASID This will help with TLB invalidation as the ASID in TLB invalidate should be zero for platforms that do not support a ASID. Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi Reviewed-by: Niranjana Vishwanathapura --- drivers/gpu/drm/xe/xe_vm.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index aae9acc7759a..83f8c8a186d8 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1429,10 +1429,12 @@ static void vm_destroy_work_func(struct work_struct *w) xe_device_mem_access_put(xe); xe_pm_runtime_put(xe); - mutex_lock(&xe->usm.lock); - lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid); - XE_WARN_ON(lookup != vm); - mutex_unlock(&xe->usm.lock); + if (xe->info.supports_usm) { + mutex_lock(&xe->usm.lock); + lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid); + XE_WARN_ON(lookup != vm); + mutex_unlock(&xe->usm.lock); + } } /* @@ -1917,16 +1919,18 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, return err; } - mutex_lock(&xe->usm.lock); - err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, - XA_LIMIT(0, XE_MAX_ASID - 1), - &xe->usm.next_asid, GFP_KERNEL); - mutex_unlock(&xe->usm.lock); - if (err) { - xe_vm_close_and_put(vm); - return err; + if (xe->info.supports_usm) { + mutex_lock(&xe->usm.lock); + err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, + XA_LIMIT(0, XE_MAX_ASID - 1), + &xe->usm.next_asid, GFP_KERNEL); + mutex_unlock(&xe->usm.lock); + if (err) { + xe_vm_close_and_put(vm); + return err; + } + vm->usm.asid = asid; } - vm->usm.asid = asid; args->vm_id = id; -- cgit v1.2.3 From 0335b53cc48cab91bb089ee5c7558cc84da3958d Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 24 Jan 2023 16:21:11 -0800 Subject: drm/xe: Delete debugfs entry to issue TLB invalidation Not used, let's remove this. Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi Reviewed-by: Niranjana Vishwanathapura --- drivers/gpu/drm/xe/xe_gt_debugfs.c | 24 ------------------------ 1 file changed, 24 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index 946398f08bb5..daae42d3ab3b 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -16,10 +16,6 @@ #include "xe_macros.h" #include "xe_uc_debugfs.h" -#ifdef CONFIG_DRM_XE_DEBUG -#include "xe_gt_tlb_invalidation.h" -#endif - static struct xe_gt *node_to_gt(struct drm_info_node *node) { return node->info_ent->data; @@ -92,32 +88,12 @@ static int steering(struct seq_file *m, void *data) return 0; } -#ifdef CONFIG_DRM_XE_DEBUG -static int invalidate_tlb(struct seq_file *m, void *data) -{ - struct xe_gt *gt = node_to_gt(m->private); - int seqno; - int ret = 0; - - seqno = xe_gt_tlb_invalidation(gt, NULL); - XE_WARN_ON(seqno < 0); - if (seqno > 0) - ret = xe_gt_tlb_invalidation_wait(gt, seqno); - XE_WARN_ON(ret < 0); - - return 0; -} -#endif - static const struct drm_info_list debugfs_list[] = { {"hw_engines", hw_engines, 0}, {"force_reset", force_reset, 0}, {"sa_info", sa_info, 0}, {"topology", topology, 0}, {"steering", steering, 0}, -#ifdef CONFIG_DRM_XE_DEBUG - {"invalidate_tlb", invalidate_tlb, 0}, -#endif }; void xe_gt_debugfs_register(struct xe_gt *gt) -- cgit v1.2.3 From 9d25e284ea468930b0310b432784eef45e83e378 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 24 Jan 2023 16:33:09 -0800 Subject: drm/xe: Add has_range_tlb_invalidation device attribute This will help implementing range based TLB invalidations. Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi Reviewed-by: Niranjana Vishwanathapura --- drivers/gpu/drm/xe/xe_device_types.h | 2 ++ drivers/gpu/drm/xe/xe_pci.c | 4 ++++ 2 files changed, 6 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 81bc293fb240..ef723b08de89 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -87,6 +87,8 @@ struct xe_device { bool has_flat_ccs; /** @has_4tile: Whether tile-4 tiling is supported */ bool has_4tile; + /** @has_range_tlb_invalidation: Has range based TLB invalidations */ + bool has_range_tlb_invalidation; } info; /** @irq: device interrupt state */ diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 67fd9c3818f9..2482ce8e3df4 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -70,6 +70,7 @@ struct xe_device_desc { bool supports_usm; bool has_flat_ccs; bool has_4tile; + bool has_range_tlb_invalidation; }; #define PLATFORM(x) \ @@ -139,6 +140,7 @@ static const struct xe_device_desc dg1_desc = { .require_force_probe = true, \ .graphics_ver = 12, \ .graphics_rel = 50, \ + .has_range_tlb_invalidation = true, \ .has_flat_ccs = true, \ .dma_mask_size = 46, \ .max_tiles = 1, \ @@ -255,6 +257,7 @@ static const struct xe_device_desc mtl_desc = { .max_tiles = 2, .vm_max_level = 3, .media_ver = 13, + .has_range_tlb_invalidation = true, PLATFORM(XE_METEORLAKE), .extra_gts = xelpmp_gts, .platform_engine_mask = MTL_MAIN_ENGINES, @@ -407,6 +410,7 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) xe->info.supports_usm = desc->supports_usm; xe->info.has_flat_ccs = desc->has_flat_ccs; xe->info.has_4tile = desc->has_4tile; + xe->info.has_range_tlb_invalidation = desc->has_range_tlb_invalidation; spd = subplatform_get(xe, desc); xe->info.subplatform = spd ? spd->subplatform : XE_SUBPLATFORM_NONE; -- cgit v1.2.3 From 332dd0116c82a75df175a459fa69dda3f23491a7 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 24 Jan 2023 16:21:58 -0800 Subject: drm/xe: Add range based TLB invalidations If the platform supports range based TLB invalidations use them. Hide these details in the xe_gt_tlb_invalidation layer. Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi Reviewed-by: Niranjana Vishwanathapura --- drivers/gpu/drm/xe/xe_gt_pagefault.c | 7 +-- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 87 ++++++++++++++++++++++++----- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h | 4 +- drivers/gpu/drm/xe/xe_pt.c | 9 ++- drivers/gpu/drm/xe/xe_vm.c | 2 +- 5 files changed, 84 insertions(+), 25 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 705093cb63d7..e1a5a3a70c92 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -240,12 +240,7 @@ unlock_vm: goto retry_userptr; if (!ret) { - /* - * FIXME: Doing a full TLB invalidation for now, likely could - * defer TLB invalidate + fault response to a callback of fence - * too - */ - ret = xe_gt_tlb_invalidation(gt, NULL); + ret = xe_gt_tlb_invalidation(gt, NULL, vma); if (ret >= 0) ret = 0; } diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 9e026fd0a45d..0b37cd09a59a 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -92,16 +92,10 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt) } static int send_tlb_invalidation(struct xe_guc *guc, - struct xe_gt_tlb_invalidation_fence *fence) + struct xe_gt_tlb_invalidation_fence *fence, + u32 *action, int len) { struct xe_gt *gt = guc_to_gt(guc); - u32 action[] = { - XE_GUC_ACTION_TLB_INVALIDATION, - 0, - XE_GUC_TLB_INVAL_FULL << XE_GUC_TLB_INVAL_TYPE_SHIFT | - XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT | - XE_GUC_TLB_INVAL_FLUSH_CACHE, - }; int seqno; int ret; bool queue_work; @@ -125,7 +119,7 @@ static int send_tlb_invalidation(struct xe_guc *guc, TLB_INVALIDATION_SEQNO_MAX; if (!gt->tlb_invalidation.seqno) gt->tlb_invalidation.seqno = 1; - ret = xe_guc_ct_send_locked(&guc->ct, action, ARRAY_SIZE(action), + ret = xe_guc_ct_send_locked(&guc->ct, action, len, G2H_LEN_DW_TLB_INVALIDATE, 1); if (!ret && fence) { fence->invalidation_time = ktime_get(); @@ -146,18 +140,83 @@ static int send_tlb_invalidation(struct xe_guc *guc, * @gt: graphics tile * @fence: invalidation fence which will be signal on TLB invalidation * completion, can be NULL + * @vma: VMA to invalidate * - * Issue a full TLB invalidation on the GT. Completion of TLB is asynchronous - * and caller can either use the invalidation fence or seqno + - * xe_gt_tlb_invalidation_wait to wait for completion. + * Issue a range based TLB invalidation if supported, if not fallback to a full + * TLB invalidation. Completion of TLB is asynchronous and caller can either use + * the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for + * completion. * * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success, * negative error code on error. */ int xe_gt_tlb_invalidation(struct xe_gt *gt, - struct xe_gt_tlb_invalidation_fence *fence) + struct xe_gt_tlb_invalidation_fence *fence, + struct xe_vma *vma) { - return send_tlb_invalidation(>->uc.guc, fence); + struct xe_device *xe = gt_to_xe(gt); +#define MAX_TLB_INVALIDATION_LEN 7 + u32 action[MAX_TLB_INVALIDATION_LEN]; + int len = 0; + + XE_BUG_ON(!vma); + + if (!xe->info.has_range_tlb_invalidation) { + action[len++] = XE_GUC_ACTION_TLB_INVALIDATION; + action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */ +#define MAKE_INVAL_OP(type) ((type << XE_GUC_TLB_INVAL_TYPE_SHIFT) | \ + XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT | \ + XE_GUC_TLB_INVAL_FLUSH_CACHE) + action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL); + } else { + u64 start = vma->start; + u64 length = vma->end - vma->start + 1; + u64 align, end; + + if (length < SZ_4K) + length = SZ_4K; + + /* + * We need to invalidate a higher granularity if start address + * is not aligned to length. When start is not aligned with + * length we need to find the length large enough to create an + * address mask covering the required range. + */ + align = roundup_pow_of_two(length); + start = ALIGN_DOWN(vma->start, align); + end = ALIGN(vma->start + length, align); + length = align; + while (start + length < end) { + length <<= 1; + start = ALIGN_DOWN(vma->start, length); + } + + /* + * Minimum invalidation size for a 2MB page that the hardware + * expects is 16MB + */ + if (length >= SZ_2M) { + length = max_t(u64, SZ_16M, length); + start = ALIGN_DOWN(vma->start, length); + } + + XE_BUG_ON(length < SZ_4K); + XE_BUG_ON(!is_power_of_2(length)); + XE_BUG_ON(length & GENMASK(ilog2(SZ_16M) - 1, ilog2(SZ_2M) + 1)); + XE_BUG_ON(!IS_ALIGNED(start, length)); + + action[len++] = XE_GUC_ACTION_TLB_INVALIDATION; + action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */ + action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE); + action[len++] = vma->vm->usm.asid; + action[len++] = lower_32_bits(start); + action[len++] = upper_32_bits(start); + action[len++] = ilog2(length) - ilog2(SZ_4K); + } + + XE_BUG_ON(len > MAX_TLB_INVALIDATION_LEN); + + return send_tlb_invalidation(>->uc.guc, fence, action, len); } static bool tlb_invalidation_seqno_past(struct xe_gt *gt, int seqno) diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h index 7e6fbf46f0e3..b4c4f717bc8a 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h @@ -12,11 +12,13 @@ struct xe_gt; struct xe_guc; +struct xe_vma; int xe_gt_tlb_invalidation_init(struct xe_gt *gt); void xe_gt_tlb_invalidation_reset(struct xe_gt *gt); int xe_gt_tlb_invalidation(struct xe_gt *gt, - struct xe_gt_tlb_invalidation_fence *fence); + struct xe_gt_tlb_invalidation_fence *fence, + struct xe_vma *vma); int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno); int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len); diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index b220d1d5cfe3..cde75708d843 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1466,6 +1466,7 @@ static const struct xe_migrate_pt_update_ops userptr_unbind_ops = { struct invalidation_fence { struct xe_gt_tlb_invalidation_fence base; struct xe_gt *gt; + struct xe_vma *vma; struct dma_fence *fence; struct dma_fence_cb cb; struct work_struct work; @@ -1505,12 +1506,13 @@ static void invalidation_fence_work_func(struct work_struct *w) container_of(w, struct invalidation_fence, work); trace_xe_gt_tlb_invalidation_fence_work_func(&ifence->base); - xe_gt_tlb_invalidation(ifence->gt, &ifence->base); + xe_gt_tlb_invalidation(ifence->gt, &ifence->base, ifence->vma); } static int invalidation_fence_init(struct xe_gt *gt, struct invalidation_fence *ifence, - struct dma_fence *fence) + struct dma_fence *fence, + struct xe_vma *vma) { int ret; @@ -1528,6 +1530,7 @@ static int invalidation_fence_init(struct xe_gt *gt, dma_fence_get(&ifence->base.base); /* Ref for caller */ ifence->fence = fence; ifence->gt = gt; + ifence->vma = vma; INIT_WORK(&ifence->work, invalidation_fence_work_func); ret = dma_fence_add_callback(fence, &ifence->cb, invalidation_fence_cb); @@ -1614,7 +1617,7 @@ __xe_pt_unbind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e, int err; /* TLB invalidation must be done before signaling unbind */ - err = invalidation_fence_init(gt, ifence, fence); + err = invalidation_fence_init(gt, ifence, fence, vma); if (err) { dma_fence_put(fence); kfree(ifence); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 83f8c8a186d8..4fc8e24f93ce 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3349,7 +3349,7 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) if (xe_pt_zap_ptes(gt, vma)) { gt_needs_invalidate |= BIT(id); xe_device_wmb(xe); - seqno[id] = xe_gt_tlb_invalidation(gt, NULL); + seqno[id] = xe_gt_tlb_invalidation(gt, NULL, vma); if (seqno[id] < 0) return seqno[id]; } -- cgit v1.2.3 From 74a8b2c6e2d6f17fcd9977de298eff20a46b0af7 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 25 Jan 2023 10:36:05 -0800 Subject: drm/xe: Propagate error from bind operations to async fence If an bind operation fails we need to report it via the async fence. Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi Reviewed-by: Niranjana Vishwanathapura --- drivers/gpu/drm/xe/xe_vm.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 4fc8e24f93ce..8ba548e49add 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1641,6 +1641,7 @@ err_fences: struct async_op_fence { struct dma_fence fence; + struct dma_fence *wait_fence; struct dma_fence_cb cb; struct xe_vm *vm; wait_queue_head_t wq; @@ -1668,8 +1669,10 @@ static void async_op_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) struct async_op_fence *afence = container_of(cb, struct async_op_fence, cb); + afence->fence.error = afence->wait_fence->error; dma_fence_signal(&afence->fence); xe_vm_put(afence->vm); + dma_fence_put(afence->wait_fence); dma_fence_put(&afence->fence); } @@ -1685,13 +1688,17 @@ static void add_async_op_fence_cb(struct xe_vm *vm, wake_up_all(&afence->wq); } + afence->wait_fence = dma_fence_get(fence); afence->vm = xe_vm_get(vm); dma_fence_get(&afence->fence); ret = dma_fence_add_callback(fence, &afence->cb, async_op_fence_cb); - if (ret == -ENOENT) + if (ret == -ENOENT) { + afence->fence.error = afence->wait_fence->error; dma_fence_signal(&afence->fence); + } if (ret) { xe_vm_put(vm); + dma_fence_put(afence->wait_fence); dma_fence_put(&afence->fence); } XE_WARN_ON(ret && ret != -ENOENT); -- cgit v1.2.3 From da3799c975726572066f1c6bc6a6f65cb1f01c84 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 30 Jan 2023 10:55:35 -0800 Subject: drm/xe: Use GuC to do GGTT invalidations for the GuC firmware Only the GuC should be issuing TLB invalidations if it is enabled. Part of this patch is sanitize the device on driver unload to ensure we do not send GuC based TLB invalidations during driver unload. Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi Reviewed-by: Niranjana Vishwanathapura --- drivers/gpu/drm/xe/xe_device.c | 14 ++++++++++ drivers/gpu/drm/xe/xe_ggtt.c | 12 ++++++-- drivers/gpu/drm/xe/xe_gt.c | 13 +++++++++ drivers/gpu/drm/xe/xe_gt.h | 1 + drivers/gpu/drm/xe/xe_gt_pagefault.c | 2 +- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 43 +++++++++++++++++++++-------- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h | 7 +++-- drivers/gpu/drm/xe/xe_guc.c | 2 ++ drivers/gpu/drm/xe/xe_guc_types.h | 2 ++ drivers/gpu/drm/xe/xe_pt.c | 2 +- drivers/gpu/drm/xe/xe_uc.c | 9 ++++-- drivers/gpu/drm/xe/xe_uc.h | 1 + drivers/gpu/drm/xe/xe_vm.c | 2 +- 13 files changed, 89 insertions(+), 21 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 98f08cd9d4b0..8fe0324ccef3 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -215,6 +215,16 @@ err_put: return ERR_PTR(err); } +static void xe_device_sanitize(struct drm_device *drm, void *arg) +{ + struct xe_device *xe = arg; + struct xe_gt *gt; + u8 id; + + for_each_gt(gt, xe, id) + xe_gt_sanitize(gt); +} + int xe_device_probe(struct xe_device *xe) { struct xe_gt *gt; @@ -274,6 +284,10 @@ int xe_device_probe(struct xe_device *xe) xe_debugfs_register(xe); + err = drmm_add_action_or_reset(&xe->drm, xe_device_sanitize, xe); + if (err) + return err; + return 0; err_irq_shutdown: diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index baa080cd1133..20450ed8400b 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -13,6 +13,7 @@ #include "xe_device.h" #include "xe_bo.h" #include "xe_gt.h" +#include "xe_gt_tlb_invalidation.h" #include "xe_map.h" #include "xe_mmio.h" #include "xe_wopcm.h" @@ -200,10 +201,17 @@ void xe_ggtt_invalidate(struct xe_gt *gt) * therefore flushing WC buffers. Is that really true here? */ xe_mmio_write32(gt, GFX_FLSH_CNTL_GEN6.reg, GFX_FLSH_CNTL_EN); - if (xe_device_guc_submission_enabled(gt_to_xe(gt))) { + + if (gt->uc.guc.submission_state.enabled) { + int seqno; + + seqno = xe_gt_tlb_invalidation_guc(gt); + XE_WARN_ON(seqno <= 0); + if (seqno > 0) + xe_gt_tlb_invalidation_wait(gt, seqno); + } else if (xe_device_guc_submission_enabled(gt_to_xe(gt))) { struct xe_device *xe = gt_to_xe(gt); - /* TODO: also use vfunc here */ if (xe->info.platform == XE_PVC) { xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC1.reg, PVC_GUC_TLB_INV_DESC1_INVALIDATE); diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 28bbb3159531..0e0d5cadb3e7 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -196,6 +196,15 @@ static int gt_ttm_mgr_init(struct xe_gt *gt) return 0; } +void xe_gt_sanitize(struct xe_gt *gt) +{ + /* + * FIXME: if xe_uc_sanitize is called here, on TGL driver will not + * reload + */ + gt->uc.guc.submission_state.enabled = false; +} + static void gt_fini(struct drm_device *drm, void *arg) { struct xe_gt *gt = arg; @@ -662,6 +671,8 @@ static int gt_reset(struct xe_gt *gt) drm_info(&xe->drm, "GT reset started\n"); + xe_gt_sanitize(gt); + xe_device_mem_access_get(gt_to_xe(gt)); err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); if (err) @@ -742,6 +753,8 @@ int xe_gt_suspend(struct xe_gt *gt) if (!xe_device_guc_submission_enabled(gt_to_xe(gt))) return -ENODEV; + xe_gt_sanitize(gt); + xe_device_mem_access_get(gt_to_xe(gt)); err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); if (err) diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h index 5dc08a993cfe..5635f2803170 100644 --- a/drivers/gpu/drm/xe/xe_gt.h +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -26,6 +26,7 @@ int xe_gt_suspend(struct xe_gt *gt); int xe_gt_resume(struct xe_gt *gt); void xe_gt_reset_async(struct xe_gt *gt); void xe_gt_migrate_wait(struct xe_gt *gt); +void xe_gt_sanitize(struct xe_gt *gt); struct xe_gt *xe_find_full_gt(struct xe_gt *gt); diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index e1a5a3a70c92..ce79eb48feb8 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -240,7 +240,7 @@ unlock_vm: goto retry_userptr; if (!ret) { - ret = xe_gt_tlb_invalidation(gt, NULL, vma); + ret = xe_gt_tlb_invalidation_vma(gt, NULL, vma); if (ret >= 0) ret = 0; } diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 0b37cd09a59a..f6a2dd26cad4 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -135,8 +135,34 @@ static int send_tlb_invalidation(struct xe_guc *guc, return ret; } +#define MAKE_INVAL_OP(type) ((type << XE_GUC_TLB_INVAL_TYPE_SHIFT) | \ + XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT | \ + XE_GUC_TLB_INVAL_FLUSH_CACHE) + /** - * xe_gt_tlb_invalidation - Issue a TLB invalidation on this GT + * xe_gt_tlb_invalidation_guc - Issue a TLB invalidation on this GT for the GuC + * @gt: graphics tile + * + * Issue a TLB invalidation for the GuC. Completion of TLB is asynchronous and + * caller can use seqno + xe_gt_tlb_invalidation_wait to wait for completion. + * + * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success, + * negative error code on error. + */ +int xe_gt_tlb_invalidation_guc(struct xe_gt *gt) +{ + u32 action[] = { + XE_GUC_ACTION_TLB_INVALIDATION, + 0, /* seqno, replaced in send_tlb_invalidation */ + MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC), + }; + + return send_tlb_invalidation(>->uc.guc, NULL, action, + ARRAY_SIZE(action)); +} + +/** + * xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA * @gt: graphics tile * @fence: invalidation fence which will be signal on TLB invalidation * completion, can be NULL @@ -150,9 +176,9 @@ static int send_tlb_invalidation(struct xe_guc *guc, * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success, * negative error code on error. */ -int xe_gt_tlb_invalidation(struct xe_gt *gt, - struct xe_gt_tlb_invalidation_fence *fence, - struct xe_vma *vma) +int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, + struct xe_gt_tlb_invalidation_fence *fence, + struct xe_vma *vma) { struct xe_device *xe = gt_to_xe(gt); #define MAX_TLB_INVALIDATION_LEN 7 @@ -161,12 +187,9 @@ int xe_gt_tlb_invalidation(struct xe_gt *gt, XE_BUG_ON(!vma); + action[len++] = XE_GUC_ACTION_TLB_INVALIDATION; + action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */ if (!xe->info.has_range_tlb_invalidation) { - action[len++] = XE_GUC_ACTION_TLB_INVALIDATION; - action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */ -#define MAKE_INVAL_OP(type) ((type << XE_GUC_TLB_INVAL_TYPE_SHIFT) | \ - XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT | \ - XE_GUC_TLB_INVAL_FLUSH_CACHE) action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL); } else { u64 start = vma->start; @@ -205,8 +228,6 @@ int xe_gt_tlb_invalidation(struct xe_gt *gt, XE_BUG_ON(length & GENMASK(ilog2(SZ_16M) - 1, ilog2(SZ_2M) + 1)); XE_BUG_ON(!IS_ALIGNED(start, length)); - action[len++] = XE_GUC_ACTION_TLB_INVALIDATION; - action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */ action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE); action[len++] = vma->vm->usm.asid; action[len++] = lower_32_bits(start); diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h index b4c4f717bc8a..b333c1709397 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h @@ -16,9 +16,10 @@ struct xe_vma; int xe_gt_tlb_invalidation_init(struct xe_gt *gt); void xe_gt_tlb_invalidation_reset(struct xe_gt *gt); -int xe_gt_tlb_invalidation(struct xe_gt *gt, - struct xe_gt_tlb_invalidation_fence *fence, - struct xe_vma *vma); +int xe_gt_tlb_invalidation_guc(struct xe_gt *gt); +int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, + struct xe_gt_tlb_invalidation_fence *fence, + struct xe_vma *vma); int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno); int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len); diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 88a3a96da084..5cdfdfd0de40 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -309,6 +309,7 @@ int xe_guc_init_post_hwconfig(struct xe_guc *guc) int xe_guc_post_load_init(struct xe_guc *guc) { xe_guc_ads_populate_post_load(&guc->ads); + guc->submission_state.enabled = true; return 0; } @@ -795,6 +796,7 @@ void xe_guc_sanitize(struct xe_guc *guc) { xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE); xe_guc_ct_disable(&guc->ct); + guc->submission_state.enabled = false; } int xe_guc_reset_prepare(struct xe_guc *guc) diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h index c2a484282ef2..ac7eec28934d 100644 --- a/drivers/gpu/drm/xe/xe_guc_types.h +++ b/drivers/gpu/drm/xe/xe_guc_types.h @@ -60,6 +60,8 @@ struct xe_guc { /** @patch: patch version of GuC submission */ u32 patch; } version; + /** @enabled: submission is enabled */ + bool enabled; } submission_state; /** @hwconfig: Hardware config state */ struct { diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index cde75708d843..3333b413686e 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1506,7 +1506,7 @@ static void invalidation_fence_work_func(struct work_struct *w) container_of(w, struct invalidation_fence, work); trace_xe_gt_tlb_invalidation_fence_work_func(&ifence->base); - xe_gt_tlb_invalidation(ifence->gt, &ifence->base, ifence->vma); + xe_gt_tlb_invalidation_vma(ifence->gt, &ifence->base, ifence->vma); } static int invalidation_fence_init(struct xe_gt *gt, diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index 938d14698003..7886c8b85397 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -88,10 +88,15 @@ static int uc_reset(struct xe_uc *uc) return 0; } -static int uc_sanitize(struct xe_uc *uc) +void xe_uc_sanitize(struct xe_uc *uc) { xe_huc_sanitize(&uc->huc); xe_guc_sanitize(&uc->guc); +} + +static int xe_uc_sanitize_reset(struct xe_uc *uc) +{ + xe_uc_sanitize(uc); return uc_reset(uc); } @@ -129,7 +134,7 @@ int xe_uc_init_hw(struct xe_uc *uc) if (!xe_device_guc_submission_enabled(uc_to_xe(uc))) return 0; - ret = uc_sanitize(uc); + ret = xe_uc_sanitize_reset(uc); if (ret) return ret; diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h index 380e722f95fc..d6efc9ef00d3 100644 --- a/drivers/gpu/drm/xe/xe_uc.h +++ b/drivers/gpu/drm/xe/xe_uc.h @@ -17,5 +17,6 @@ void xe_uc_stop_prepare(struct xe_uc *uc); int xe_uc_stop(struct xe_uc *uc); int xe_uc_start(struct xe_uc *uc); int xe_uc_suspend(struct xe_uc *uc); +void xe_uc_sanitize(struct xe_uc *uc); #endif diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 8ba548e49add..4bbb0d0b0928 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3356,7 +3356,7 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) if (xe_pt_zap_ptes(gt, vma)) { gt_needs_invalidate |= BIT(id); xe_device_wmb(xe); - seqno[id] = xe_gt_tlb_invalidation(gt, NULL, vma); + seqno[id] = xe_gt_tlb_invalidation_vma(gt, NULL, vma); if (seqno[id] < 0) return seqno[id]; } -- cgit v1.2.3 From c3ca5465564e7b6459e868b3433fff4e44a7fd64 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 25 Jan 2023 15:27:21 -0800 Subject: drm/xe: Lock GGTT on when restoring kernel BOs Make lockdep happy as we required to hold the GGTT when calling xe_ggtt_map_bo. Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi Reviewed-by: Niranjana Vishwanathapura --- drivers/gpu/drm/xe/xe_bo_evict.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c index 7046dc203138..3fb3c8c77efa 100644 --- a/drivers/gpu/drm/xe/xe_bo_evict.c +++ b/drivers/gpu/drm/xe/xe_bo_evict.c @@ -147,8 +147,11 @@ int xe_bo_restore_kernel(struct xe_device *xe) return ret; } - if (bo->flags & XE_BO_CREATE_GGTT_BIT) + if (bo->flags & XE_BO_CREATE_GGTT_BIT) { + mutex_lock(&bo->gt->mem.ggtt->lock); xe_ggtt_map_bo(bo->gt->mem.ggtt, bo); + mutex_unlock(&bo->gt->mem.ggtt->lock); + } /* * We expect validate to trigger a move VRAM and our move code -- cgit v1.2.3 From bae8ddae1881f645d679cd8189de995c26e9d694 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 26 Jan 2023 09:54:20 -0800 Subject: drm/xe: Propagate VM unbind error to invalidation fence If a VM unbind hits an error, do not issue a TLB invalidation and propagate the error the invalidation fence. Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi Reviewed-by: Niranjana Vishwanathapura --- drivers/gpu/drm/xe/xe_pt.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 3333b413686e..4299689fe6a8 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1496,7 +1496,13 @@ static void invalidation_fence_cb(struct dma_fence *fence, container_of(cb, struct invalidation_fence, cb); trace_xe_gt_tlb_invalidation_fence_cb(&ifence->base); - queue_work(system_wq, &ifence->work); + if (!ifence->fence->error) { + queue_work(system_wq, &ifence->work); + } else { + ifence->base.base.error = ifence->fence->error; + dma_fence_signal(&ifence->base.base); + dma_fence_put(&ifence->base.base); + } dma_fence_put(ifence->fence); } -- cgit v1.2.3 From 9f9f09d4071685855d43a77c8799578d26ba3f24 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 26 Jan 2023 10:05:53 -0800 Subject: drm/xe: Signal invalidation fence immediately if CT send fails This means we are in the middle of a GT reset and no need to do TLB invalidation so just signal invalidation fence immediately. Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi Reviewed-by: Niranjana Vishwanathapura --- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index f6a2dd26cad4..2521c8a65690 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -69,6 +69,15 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt) return 0; } +static void +invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence) +{ + trace_xe_gt_tlb_invalidation_fence_signal(fence); + list_del(&fence->link); + dma_fence_signal(&fence->base); + dma_fence_put(&fence->base); +} + /** * xe_gt_tlb_invalidation_reset - Initialize GT TLB invalidation reset * @gt: graphics tile @@ -83,11 +92,8 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt) mutex_lock(>->uc.guc.ct.lock); list_for_each_entry_safe(fence, next, - >->tlb_invalidation.pending_fences, link) { - list_del(&fence->link); - dma_fence_signal(&fence->base); - dma_fence_put(&fence->base); - } + >->tlb_invalidation.pending_fences, link) + invalidation_fence_signal(fence); mutex_unlock(>->uc.guc.ct.lock); } @@ -130,6 +136,8 @@ static int send_tlb_invalidation(struct xe_guc *guc, } if (!ret) ret = seqno; + if (ret < 0 && fence) + invalidation_fence_signal(fence); mutex_unlock(&guc->ct.lock); return ret; @@ -321,16 +329,13 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len) if (fence) trace_xe_gt_tlb_invalidation_fence_recv(fence); if (fence && tlb_invalidation_seqno_past(gt, fence->seqno)) { - trace_xe_gt_tlb_invalidation_fence_signal(fence); - list_del(&fence->link); + invalidation_fence_signal(fence); if (!list_empty(>->tlb_invalidation.pending_fences)) mod_delayed_work(system_wq, >->tlb_invalidation.fence_tdr, TLB_TIMEOUT); else cancel_delayed_work(>->tlb_invalidation.fence_tdr); - dma_fence_signal(&fence->base); - dma_fence_put(&fence->base); } return 0; -- cgit v1.2.3 From 5669899e9b3c3f38252902141483f5a09c8eedd3 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 27 Jan 2023 12:53:14 -0800 Subject: drm/xe: Add has_asid to device info Rather than alias supports_usm to ASIS support, add an explicit variable to indicate ASID support. Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi Reviewed-by: Niranjana Vishwanathapura --- drivers/gpu/drm/xe/xe_device_types.h | 2 ++ drivers/gpu/drm/xe/xe_lrc.c | 4 ++-- drivers/gpu/drm/xe/xe_pci.c | 3 +++ drivers/gpu/drm/xe/xe_vm.c | 4 ++-- 4 files changed, 9 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index ef723b08de89..4c4a912141a9 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -81,6 +81,8 @@ struct xe_device { u8 media_ver; /** @supports_usm: Supports unified shared memory */ bool supports_usm; + /** @has_asid: Has address space ID */ + bool has_asid; /** @enable_guc: GuC submission enabled */ bool enable_guc; /** @has_flat_ccs: Whether flat CCS metadata is used */ diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 056c2c5a0b81..347ff9b34494 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -682,14 +682,14 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL, RING_CTL_SIZE(lrc->ring.size) | RING_VALID); - if (xe->info.supports_usm && vm) { + if (xe->info.has_asid && vm) xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, (e->usm.acc_granularity << ACC_GRANULARITY_S) | vm->usm.asid); + if (xe->info.supports_usm && vm) xe_lrc_write_ctx_reg(lrc, PVC_CTX_ACC_CTR_THOLD, (e->usm.acc_notify << ACC_NOTIFY_S) | e->usm.acc_trigger); - } lrc->desc = GEN8_CTX_VALID; lrc->desc |= INTEL_LEGACY_64B_CONTEXT << GEN8_CTX_ADDRESSING_MODE_SHIFT; diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 2482ce8e3df4..c159306e04cf 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -71,6 +71,7 @@ struct xe_device_desc { bool has_flat_ccs; bool has_4tile; bool has_range_tlb_invalidation; + bool has_asid; }; #define PLATFORM(x) \ @@ -225,6 +226,7 @@ static const __maybe_unused struct xe_device_desc pvc_desc = { .max_tiles = 2, .vm_max_level = 4, .supports_usm = true, + .has_asid = true, }; #define MTL_MEDIA_ENGINES \ @@ -408,6 +410,7 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) xe->info.vm_max_level = desc->vm_max_level; xe->info.media_ver = desc->media_ver; xe->info.supports_usm = desc->supports_usm; + xe->info.has_asid = desc->has_asid; xe->info.has_flat_ccs = desc->has_flat_ccs; xe->info.has_4tile = desc->has_4tile; xe->info.has_range_tlb_invalidation = desc->has_range_tlb_invalidation; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 4bbb0d0b0928..04481851fa00 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1429,7 +1429,7 @@ static void vm_destroy_work_func(struct work_struct *w) xe_device_mem_access_put(xe); xe_pm_runtime_put(xe); - if (xe->info.supports_usm) { + if (xe->info.has_asid) { mutex_lock(&xe->usm.lock); lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid); XE_WARN_ON(lookup != vm); @@ -1926,7 +1926,7 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, return err; } - if (xe->info.supports_usm) { + if (xe->info.has_asid) { mutex_lock(&xe->usm.lock); err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, XA_LIMIT(0, XE_MAX_ASID - 1), -- cgit v1.2.3 From 5387e865d90e927ba0af9d37855c9bd47cc9d00a Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 27 Jan 2023 13:00:28 -0800 Subject: drm/xe: Add TLB invalidation fence after rebinds issued from execs If we add an TLB invalidation fence for rebinds issued from execs we should be able to drop the TLB invalidation from the ring operations. Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi Reviewed-by: Niranjana Vishwanathapura --- drivers/gpu/drm/xe/xe_pt.c | 200 +++++++++++++++++++++++++-------------------- 1 file changed, 110 insertions(+), 90 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 4299689fe6a8..23f308184ba1 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1156,6 +1156,96 @@ static const struct xe_migrate_pt_update_ops userptr_bind_ops = { .pre_commit = xe_pt_userptr_pre_commit, }; +struct invalidation_fence { + struct xe_gt_tlb_invalidation_fence base; + struct xe_gt *gt; + struct xe_vma *vma; + struct dma_fence *fence; + struct dma_fence_cb cb; + struct work_struct work; +}; + +static const char * +invalidation_fence_get_driver_name(struct dma_fence *dma_fence) +{ + return "xe"; +} + +static const char * +invalidation_fence_get_timeline_name(struct dma_fence *dma_fence) +{ + return "invalidation_fence"; +} + +static const struct dma_fence_ops invalidation_fence_ops = { + .get_driver_name = invalidation_fence_get_driver_name, + .get_timeline_name = invalidation_fence_get_timeline_name, +}; + +static void invalidation_fence_cb(struct dma_fence *fence, + struct dma_fence_cb *cb) +{ + struct invalidation_fence *ifence = + container_of(cb, struct invalidation_fence, cb); + + trace_xe_gt_tlb_invalidation_fence_cb(&ifence->base); + if (!ifence->fence->error) { + queue_work(system_wq, &ifence->work); + } else { + ifence->base.base.error = ifence->fence->error; + dma_fence_signal(&ifence->base.base); + dma_fence_put(&ifence->base.base); + } + dma_fence_put(ifence->fence); +} + +static void invalidation_fence_work_func(struct work_struct *w) +{ + struct invalidation_fence *ifence = + container_of(w, struct invalidation_fence, work); + + trace_xe_gt_tlb_invalidation_fence_work_func(&ifence->base); + xe_gt_tlb_invalidation_vma(ifence->gt, &ifence->base, ifence->vma); +} + +static int invalidation_fence_init(struct xe_gt *gt, + struct invalidation_fence *ifence, + struct dma_fence *fence, + struct xe_vma *vma) +{ + int ret; + + trace_xe_gt_tlb_invalidation_fence_create(&ifence->base); + + spin_lock_irq(>->tlb_invalidation.lock); + dma_fence_init(&ifence->base.base, &invalidation_fence_ops, + >->tlb_invalidation.lock, + gt->tlb_invalidation.fence_context, + ++gt->tlb_invalidation.fence_seqno); + spin_unlock_irq(>->tlb_invalidation.lock); + + INIT_LIST_HEAD(&ifence->base.link); + + dma_fence_get(&ifence->base.base); /* Ref for caller */ + ifence->fence = fence; + ifence->gt = gt; + ifence->vma = vma; + + INIT_WORK(&ifence->work, invalidation_fence_work_func); + ret = dma_fence_add_callback(fence, &ifence->cb, invalidation_fence_cb); + if (ret == -ENOENT) { + dma_fence_put(ifence->fence); /* Usually dropped in CB */ + invalidation_fence_work_func(&ifence->work); + } else if (ret) { + dma_fence_put(&ifence->base.base); /* Caller ref */ + dma_fence_put(&ifence->base.base); /* Creation ref */ + } + + XE_WARN_ON(ret && ret != -ENOENT); + + return ret && ret != -ENOENT ? ret : 0; +} + /** * __xe_pt_bind_vma() - Build and connect a page-table tree for the vma * address range. @@ -1194,6 +1284,7 @@ __xe_pt_bind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e, struct xe_vm *vm = vma->vm; u32 num_entries; struct dma_fence *fence; + struct invalidation_fence *ifence = NULL; int err; bind_pt_update.locked = false; @@ -1212,6 +1303,12 @@ __xe_pt_bind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e, xe_vm_dbg_print_entries(gt_to_xe(gt), entries, num_entries); + if (rebind && !xe_vm_no_dma_fences(vma->vm)) { + ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); + if (!ifence) + return ERR_PTR(-ENOMEM); + } + fence = xe_migrate_update_pgtables(gt->migrate, vm, vma->bo, e ? e : vm->eng[gt->info.id], @@ -1221,6 +1318,18 @@ __xe_pt_bind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e, if (!IS_ERR(fence)) { LLIST_HEAD(deferred); + /* TLB invalidation must be done before signaling rebind */ + if (rebind && !xe_vm_no_dma_fences(vma->vm)) { + int err = invalidation_fence_init(gt, ifence, fence, + vma); + if (err) { + dma_fence_put(fence); + kfree(ifence); + return ERR_PTR(err); + } + fence = &ifence->base.base; + } + /* add shared fence now for pagetable delayed destroy */ dma_resv_add_fence(&vm->resv, fence, !rebind && vma->last_munmap_rebind ? @@ -1246,6 +1355,7 @@ __xe_pt_bind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e, queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work); } else { + kfree(ifence); if (bind_pt_update.locked) up_read(&vm->userptr.notifier_lock); xe_pt_abort_bind(vma, entries, num_entries); @@ -1463,96 +1573,6 @@ static const struct xe_migrate_pt_update_ops userptr_unbind_ops = { .pre_commit = xe_pt_userptr_pre_commit, }; -struct invalidation_fence { - struct xe_gt_tlb_invalidation_fence base; - struct xe_gt *gt; - struct xe_vma *vma; - struct dma_fence *fence; - struct dma_fence_cb cb; - struct work_struct work; -}; - -static const char * -invalidation_fence_get_driver_name(struct dma_fence *dma_fence) -{ - return "xe"; -} - -static const char * -invalidation_fence_get_timeline_name(struct dma_fence *dma_fence) -{ - return "invalidation_fence"; -} - -static const struct dma_fence_ops invalidation_fence_ops = { - .get_driver_name = invalidation_fence_get_driver_name, - .get_timeline_name = invalidation_fence_get_timeline_name, -}; - -static void invalidation_fence_cb(struct dma_fence *fence, - struct dma_fence_cb *cb) -{ - struct invalidation_fence *ifence = - container_of(cb, struct invalidation_fence, cb); - - trace_xe_gt_tlb_invalidation_fence_cb(&ifence->base); - if (!ifence->fence->error) { - queue_work(system_wq, &ifence->work); - } else { - ifence->base.base.error = ifence->fence->error; - dma_fence_signal(&ifence->base.base); - dma_fence_put(&ifence->base.base); - } - dma_fence_put(ifence->fence); -} - -static void invalidation_fence_work_func(struct work_struct *w) -{ - struct invalidation_fence *ifence = - container_of(w, struct invalidation_fence, work); - - trace_xe_gt_tlb_invalidation_fence_work_func(&ifence->base); - xe_gt_tlb_invalidation_vma(ifence->gt, &ifence->base, ifence->vma); -} - -static int invalidation_fence_init(struct xe_gt *gt, - struct invalidation_fence *ifence, - struct dma_fence *fence, - struct xe_vma *vma) -{ - int ret; - - trace_xe_gt_tlb_invalidation_fence_create(&ifence->base); - - spin_lock_irq(>->tlb_invalidation.lock); - dma_fence_init(&ifence->base.base, &invalidation_fence_ops, - >->tlb_invalidation.lock, - gt->tlb_invalidation.fence_context, - ++gt->tlb_invalidation.fence_seqno); - spin_unlock_irq(>->tlb_invalidation.lock); - - INIT_LIST_HEAD(&ifence->base.link); - - dma_fence_get(&ifence->base.base); /* Ref for caller */ - ifence->fence = fence; - ifence->gt = gt; - ifence->vma = vma; - - INIT_WORK(&ifence->work, invalidation_fence_work_func); - ret = dma_fence_add_callback(fence, &ifence->cb, invalidation_fence_cb); - if (ret == -ENOENT) { - dma_fence_put(ifence->fence); /* Usually dropped in CB */ - invalidation_fence_work_func(&ifence->work); - } else if (ret) { - dma_fence_put(&ifence->base.base); /* Caller ref */ - dma_fence_put(&ifence->base.base); /* Creation ref */ - } - - XE_WARN_ON(ret && ret != -ENOENT); - - return ret && ret != -ENOENT ? ret : 0; -} - /** * __xe_pt_unbind_vma() - Disconnect and free a page-table tree for the vma * address range. -- cgit v1.2.3 From 50a48cca608102a53a0961bd95aefb53a8ced3ab Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 26 Jan 2023 10:40:41 -0800 Subject: drm/xe: Drop TLB invalidation from ring operations Now that we issue TLB invalidations on unbinds and rebind from execs we no longer need to issue TLB invalidations from the ring operations. Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi Reviewed-by: Niranjana Vishwanathapura --- drivers/gpu/drm/xe/xe_ring_ops.c | 40 +--------------------------------------- 1 file changed, 1 insertion(+), 39 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index fda7978a63e0..54db4ca19a36 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -83,31 +83,6 @@ static int emit_flush_invalidate(u32 flag, u32 *dw, int i) return i; } -static int emit_pipe_invalidate(u32 mask_flags, u32 *dw, int i) -{ - u32 flags = PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_COMMAND_CACHE_INVALIDATE | - PIPE_CONTROL_TLB_INVALIDATE | - PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE | - PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | - PIPE_CONTROL_VF_CACHE_INVALIDATE | - PIPE_CONTROL_CONST_CACHE_INVALIDATE | - PIPE_CONTROL_STATE_CACHE_INVALIDATE | - PIPE_CONTROL_QW_WRITE | - PIPE_CONTROL_STORE_DATA_INDEX; - - flags &= ~mask_flags; - - dw[i++] = GFX_OP_PIPE_CONTROL(6); - dw[i++] = flags; - dw[i++] = LRC_PPHWSP_SCRATCH_ADDR; - dw[i++] = 0; - dw[i++] = 0; - dw[i++] = 0; - - return i; -} - #define MI_STORE_QWORD_IMM_GEN8_POSTED (MI_INSTR(0x20, 3) | (1 << 21)) static int emit_store_imm_ppgtt_posted(u64 addr, u64 value, @@ -148,11 +123,6 @@ static void __emit_job_gen12_copy(struct xe_sched_job *job, struct xe_lrc *lrc, u32 dw[MAX_JOB_SIZE_DW], i = 0; u32 ppgtt_flag = get_ppgtt_flag(job); - /* XXX: Conditional flushing possible */ - dw[i++] = preparser_disable(true); - i = emit_flush_invalidate(0, dw, i); - dw[i++] = preparser_disable(false); - i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), seqno, dw, i); @@ -181,9 +151,7 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, struct xe_device *xe = gt_to_xe(gt); bool decode = job->engine->class == XE_ENGINE_CLASS_VIDEO_DECODE; - /* XXX: Conditional flushing possible */ dw[i++] = preparser_disable(true); - i = emit_flush_invalidate(decode ? MI_INVALIDATE_BSD : 0, dw, i); /* Wa_1809175790 */ if (!xe->info.has_flat_ccs) { if (decode) @@ -244,15 +212,8 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, struct xe_gt *gt = job->engine->gt; struct xe_device *xe = gt_to_xe(gt); bool pvc = xe->info.platform == XE_PVC; - u32 mask_flags = 0; - /* XXX: Conditional flushing possible */ dw[i++] = preparser_disable(true); - if (pvc) - mask_flags = PIPE_CONTROL_3D_ARCH_FLAGS; - else if (job->engine->class == XE_ENGINE_CLASS_COMPUTE) - mask_flags = PIPE_CONTROL_3D_ENGINE_FLAGS; - i = emit_pipe_invalidate(mask_flags, dw, i); /* Wa_1809175790 */ if (!xe->info.has_flat_ccs) i = emit_aux_table_inv(gt, GEN12_CCS_AUX_INV.reg, dw, i); @@ -287,6 +248,7 @@ static void emit_migration_job_gen12(struct xe_sched_job *job, i = emit_bb_start(job->batch_addr[0], BIT(8), dw, i); + /* XXX: Do we need this? Leaving for now. */ dw[i++] = preparser_disable(true); i = emit_flush_invalidate(0, dw, i); dw[i++] = preparser_disable(false); -- cgit v1.2.3 From 77775e24e684c761d44ba2f804581c0c42e0ad38 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 31 Jan 2023 23:36:39 +0100 Subject: drm/xe: Add debugfs for dumping GGTT mappings Adding a debugfs dump of GGTT was useful for some debugging I did, and easy to add. Might be useful for others too. Signed-off-by: Maarten Lankhorst Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ggtt.c | 13 +++++++++++++ drivers/gpu/drm/xe/xe_ggtt.h | 4 ++++ drivers/gpu/drm/xe/xe_gt_debugfs.c | 10 ++++++++++ 3 files changed, 27 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 20450ed8400b..907a603572b2 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -345,3 +345,16 @@ void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) xe_ggtt_remove_node(ggtt, &bo->ggtt_node); } + +int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p) +{ + int err; + + err = mutex_lock_interruptible(&ggtt->lock); + if (err) + return err; + + drm_mm_print(&ggtt->mm, p); + mutex_unlock(&ggtt->lock); + return err; +} diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h index ab9cfdab5cca..3469aa2b1a02 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.h +++ b/drivers/gpu/drm/xe/xe_ggtt.h @@ -8,6 +8,8 @@ #include "xe_ggtt_types.h" +struct drm_printer; + u64 xe_ggtt_pte_encode(struct xe_bo *bo, u64 bo_offset); void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte); void xe_ggtt_invalidate(struct xe_gt *gt); @@ -26,4 +28,6 @@ int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, u64 ofs); void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); +int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p); + #endif diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index daae42d3ab3b..c320e58810ce 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -8,6 +8,7 @@ #include "xe_device.h" #include "xe_force_wake.h" +#include "xe_ggtt.h" #include "xe_gt.h" #include "xe_gt_debugfs.h" #include "xe_gt_mcr.h" @@ -88,12 +89,21 @@ static int steering(struct seq_file *m, void *data) return 0; } +static int ggtt(struct seq_file *m, void *data) +{ + struct xe_gt *gt = node_to_gt(m->private); + struct drm_printer p = drm_seq_file_printer(m); + + return xe_ggtt_dump(gt->mem.ggtt, &p); +} + static const struct drm_info_list debugfs_list[] = { {"hw_engines", hw_engines, 0}, {"force_reset", force_reset, 0}, {"sa_info", sa_info, 0}, {"topology", topology, 0}, {"steering", steering, 0}, + {"ggtt", ggtt, 0}, }; void xe_gt_debugfs_register(struct xe_gt *gt) -- cgit v1.2.3 From 62421b45d431dc6f023334800eae1bffb1e77eb2 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 20 Jan 2023 16:59:09 -0800 Subject: drm/xe: Fix typo in MCR documentation Add missing "multicast" word and adapt/wrap the rest of the sentence. Signed-off-by: Lucas De Marchi Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_mcr.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index ddce2c41c7f5..7c97031cd716 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -23,12 +23,12 @@ * * MMIO accesses to MCR registers are controlled according to the settings * programmed in the platform's MCR_SELECTOR register(s). MMIO writes to MCR - * registers can be done in either a (i.e., a single write updates all + * registers can be done in either multicast (a single write updates all * instances of the register to the same value) or unicast (a write updates only - * one specific instance). Reads of MCR registers always operate in a unicast - * manner regardless of how the multicast/unicast bit is set in MCR_SELECTOR. - * Selection of a specific MCR instance for unicast operations is referred to - * as "steering." + * one specific instance) form. Reads of MCR registers always operate in a + * unicast manner regardless of how the multicast/unicast bit is set in + * MCR_SELECTOR. Selection of a specific MCR instance for unicast operations is + * referred to as "steering." * * If MCR register operations are steered toward a hardware unit that is * fused off or currently powered down due to power gating, the MMIO operation -- cgit v1.2.3 From b799aa5a04d09c4b3abe79b1c6563d54823410e6 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 25 Jan 2023 14:14:38 -0800 Subject: drm/xe: Fix xe_tuning include xe_tuning.c should include xe_tuning.h, not xe_wa.h Signed-off-by: Lucas De Marchi Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_tuning.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index e043db037368..96b16b8d03cf 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -3,7 +3,7 @@ * Copyright © 2022 Intel Corporation */ -#include "xe_wa.h" +#include "xe_tuning.h" #include "xe_platform_types.h" #include "xe_gt_types.h" -- cgit v1.2.3 From 2679be71f1372e8fac07d1be5443a5ba26b27345 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 23 Jan 2023 09:38:27 -0800 Subject: drm/xe: Remove TODO from rtp infra The function pointer is already present as match_func, inside struct xe_rtp_rule and handled as so instead of inside rtp_regval as originally thought out when this was written. Signed-off-by: Lucas De Marchi Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_rtp_types.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h index b55b556a2495..630a2ec53fc6 100644 --- a/drivers/gpu/drm/xe/xe_rtp_types.h +++ b/drivers/gpu/drm/xe/xe_rtp_types.h @@ -24,10 +24,6 @@ enum { struct xe_rtp_regval { /** @reg: Register */ u32 reg; - /* - * TODO: maybe we need a union here with a func pointer for cases - * that are too specific to be generalized - */ /** @clr_bits: bits to clear when updating register */ u32 clr_bits; /** @set_bits: bits to set when updating register */ -- cgit v1.2.3 From 43f98df1f5f0ef94d79ba2ef4f841a3f547f7a04 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 25 Jan 2023 13:10:24 -0800 Subject: drm/xe: Remove TODO from workaround documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LRC workarounds are already implemented: remove leftover TODO. Signed-off-by: Lucas De Marchi Reviewed-by: José Roberto de Souza Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_wa.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index b56141ba7145..699a39fb786a 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -35,9 +35,6 @@ * creation to have a "primed golden context", i.e. a context image that * already contains the changes needed to all the registers. * - * TODO: Although these workarounds are maintained here, they are not - * currently being applied. - * * - Engine workarounds: the list of these WAs is applied whenever the specific * engine is reset. It's also possible that a set of engine classes share a * common power domain and they are reset together. This happens on some -- cgit v1.2.3 From 3319b213d7c8bdeaa001fec7b60aefa2390112d4 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 30 Jan 2023 14:14:37 -0800 Subject: drm/xe/mcr: Use designated init for xe_steering_types There is already a BUILD_BUG_ON() check to make sure the size follow the number of steering types. Also make sure the right index is being used for each steering type. Signed-off-by: Lucas De Marchi Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_mcr.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index 7c97031cd716..d7c89f7b56e2 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -249,12 +249,12 @@ static const struct { const char *name; void (*init)(struct xe_gt *); } xe_steering_types[] = { - { "L3BANK", init_steering_l3bank }, - { "MSLICE", init_steering_mslice }, - { "LNCF", NULL }, /* initialized by mslice init */ - { "DSS", init_steering_dss }, - { "OADDRM", init_steering_oaddrm }, - { "INSTANCE 0", init_steering_inst0 }, + [L3BANK] = { "L3BANK", init_steering_l3bank }, + [MSLICE] = { "MSLICE", init_steering_mslice }, + [LNCF] = { "LNCF", NULL }, /* initialized by mslice init */ + [DSS] = { "DSS", init_steering_dss }, + [OADDRM] = { "OADDRM", init_steering_oaddrm }, + [INSTANCE0] = { "INSTANCE 0", init_steering_inst0 }, }; void xe_gt_mcr_init(struct xe_gt *gt) -- cgit v1.2.3 From 564d64f83de9759c1faa4a64ee4aed8465281ecb Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 30 Jan 2023 17:08:37 -0800 Subject: drm/xe/mcr: Add SQIDI steering for DG2 Like detailed in commit 927dfdd09d8c ("drm/i915/dg2: Add SQIDI steering"), some registers are expected to have the selector initialized just once and never set to anything else. For xe, the registers with SQIDI replication type (SF and MCFG) were missing, resulting in warnings like: [ 410.685565] xe 0000:03:00.0: Did not find MCR register 0x8724 in any MCR steering table While adding these registers, abstract the handling for "dg2_gam_ranges", moving them together with SF/MCFG to a dedicated table. This also avoids that range to be checked for platforms other than DG2. For DG2, this is the new steering output: # cat /sys/kernel/debug/dri/0/gt0/steering ... IMPLICIT steering: group=0x0, instance=0x0 0x000b00 - 0x000bff 0x001000 - 0x001fff 0x004000 - 0x004aff 0x008700 - 0x0087ff 0x00c800 - 0x00cfff 0x00f000 - 0x00ffff Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt.c | 2 + drivers/gpu/drm/xe/xe_gt_mcr.c | 80 ++++++++++++++++++++++++---------------- drivers/gpu/drm/xe/xe_gt_mcr.h | 2 + drivers/gpu/drm/xe/xe_gt_types.h | 7 ++++ 4 files changed, 59 insertions(+), 32 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 0e0d5cadb3e7..20dbc08d3685 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -500,6 +500,7 @@ static int all_fw_domain_init(struct xe_gt *gt) if (err) goto err_hw_fence_irq; + xe_gt_mcr_set_implicit_defaults(gt); xe_reg_sr_apply_mmio(>->reg_sr, gt); err = xe_gt_clock_init(gt); @@ -633,6 +634,7 @@ static int do_gt_restart(struct xe_gt *gt) setup_private_ppat(gt); + xe_gt_mcr_set_implicit_defaults(gt); xe_reg_sr_apply_mmio(>->reg_sr, gt); err = xe_wopcm_init(>->uc.wopcm); diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index d7c89f7b56e2..bb71071c3435 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -155,15 +155,13 @@ static const struct xe_mmio_range xelpmp_oaddrm_steering_table[] = { {}, }; -/* - * DG2 GAM registers are a special case; this table is checked directly in - * xe_gt_mcr_get_nonterminated_steering and is not hooked up via - * gt->steering[]. - */ -static const struct xe_mmio_range dg2_gam_ranges[] = { - { 0x004000, 0x004AFF }, - { 0x00C800, 0x00CFFF }, - { 0x00F000, 0x00FFFF }, +static const struct xe_mmio_range dg2_implicit_steering_table[] = { + { 0x000B00, 0x000BFF }, /* SF (SQIDI replication) */ + { 0x001000, 0x001FFF }, /* SF (SQIDI replication) */ + { 0x004000, 0x004AFF }, /* GAM (MSLICE replication) */ + { 0x008700, 0x0087FF }, /* MCFG (SQIDI replication) */ + { 0x00C800, 0x00CFFF }, /* GAM (MSLICE replication) */ + { 0x00F000, 0x00FFFF }, /* GAM (MSLICE replication) */ {}, }; @@ -255,12 +253,14 @@ static const struct { [DSS] = { "DSS", init_steering_dss }, [OADDRM] = { "OADDRM", init_steering_oaddrm }, [INSTANCE0] = { "INSTANCE 0", init_steering_inst0 }, + [IMPLICIT_STEERING] = { "IMPLICIT", NULL }, }; void xe_gt_mcr_init(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); + BUILD_BUG_ON(IMPLICIT_STEERING + 1 != NUM_STEERING_TYPES); BUILD_BUG_ON(ARRAY_SIZE(xe_steering_types) != NUM_STEERING_TYPES); spin_lock_init(>->mcr_lock); @@ -280,6 +280,7 @@ void xe_gt_mcr_init(struct xe_gt *gt) gt->steering[MSLICE].ranges = xehp_mslice_steering_table; gt->steering[LNCF].ranges = xehp_lncf_steering_table; gt->steering[DSS].ranges = xehp_dss_steering_table; + gt->steering[IMPLICIT_STEERING].ranges = dg2_implicit_steering_table; } else { gt->steering[L3BANK].ranges = xelp_l3bank_steering_table; gt->steering[DSS].ranges = xelp_dss_steering_table; @@ -291,6 +292,33 @@ void xe_gt_mcr_init(struct xe_gt *gt) xe_steering_types[i].init(gt); } +/** + * xe_gt_mcr_set_implicit_defaults - Initialize steer control registers + * @gt: GT structure + * + * Some register ranges don't need to have their steering control registers + * changed on each access - it's sufficient to set them once on initialization. + * This function sets those registers for each platform * + */ +void xe_gt_mcr_set_implicit_defaults(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + if (xe->info.platform == XE_DG2) { + u32 steer_val = REG_FIELD_PREP(GEN11_MCR_SLICE_MASK, 0) | + REG_FIELD_PREP(GEN11_MCR_SUBSLICE_MASK, 2); + + xe_mmio_write32(gt, MCFG_MCR_SELECTOR.reg, steer_val); + xe_mmio_write32(gt, SF_MCR_SELECTOR.reg, steer_val); + /* + * For GAM registers, all reads should be directed to instance 1 + * (unicast reads against other instances are not allowed), + * and instance 1 is already the hardware's default steering + * target, which we never change + */ + } +} + /* * xe_gt_mcr_get_nonterminated_steering - find group/instance values that * will steer a register to a non-terminated instance @@ -305,14 +333,15 @@ void xe_gt_mcr_init(struct xe_gt *gt) * steering. * * Returns true if the caller should steer to the @group/@instance values - * returned. Returns false if the caller need not perform any steering (i.e., - * the DG2 GAM range special case). + * returned. Returns false if the caller need not perform any steering */ static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt, i915_mcr_reg_t reg, u8 *group, u8 *instance) { - for (int type = 0; type < NUM_STEERING_TYPES; type++) { + const struct xe_mmio_range *implicit_ranges; + + for (int type = 0; type < IMPLICIT_STEERING; type++) { if (!gt->steering[type].ranges) continue; @@ -325,27 +354,15 @@ static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt, } } - /* - * All MCR registers should usually be part of one of the steering - * ranges we're tracking. However there's one special case: DG2 - * GAM registers are technically multicast registers, but are special - * in a number of ways: - * - they have their own dedicated steering control register (they - * don't share 0xFDC with other MCR classes) - * - all reads should be directed to instance 1 (unicast reads against - * other instances are not allowed), and instance 1 is already the - * the hardware's default steering target, which we never change - * - * Ultimately this means that we can just treat them as if they were - * unicast registers and all operations will work properly. - */ - for (int i = 0; dg2_gam_ranges[i].end > 0; i++) - if (xe_mmio_in_range(&dg2_gam_ranges[i], reg.reg)) - return false; + implicit_ranges = gt->steering[IMPLICIT_STEERING].ranges; + if (implicit_ranges) + for (int i = 0; implicit_ranges[i].end > 0; i++) + if (xe_mmio_in_range(&implicit_ranges[i], reg.reg)) + return false; /* - * Not found in a steering table and not a DG2 GAM register? We'll - * just steer to 0/0 as a guess and raise a warning. + * Not found in a steering table and not a register with implicit + * steering. Just steer to 0/0 as a guess and raise a warning. */ drm_WARN(>_to_xe(gt)->drm, true, "Did not find MCR register %#x in any MCR steering table\n", @@ -467,7 +484,6 @@ u32 xe_gt_mcr_unicast_read_any(struct xe_gt *gt, i915_mcr_reg_t reg) group, instance, 0); mcr_unlock(gt); } else { - /* DG2 GAM special case rules; treat as if unicast */ val = xe_mmio_read32(gt, reg.reg); } diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.h b/drivers/gpu/drm/xe/xe_gt_mcr.h index 62ec6eb654a0..c31987d2177c 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.h +++ b/drivers/gpu/drm/xe/xe_gt_mcr.h @@ -13,6 +13,8 @@ struct xe_gt; void xe_gt_mcr_init(struct xe_gt *gt); +void xe_gt_mcr_set_implicit_defaults(struct xe_gt *gt); + u32 xe_gt_mcr_unicast_read(struct xe_gt *gt, i915_mcr_reg_t reg, int group, int instance); u32 xe_gt_mcr_unicast_read_any(struct xe_gt *gt, i915_mcr_reg_t reg); diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index a40fab262ac9..b01edd3fdc4d 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -66,6 +66,13 @@ enum xe_steering_type { */ INSTANCE0, + /* + * Register ranges that don't need special steering for each register: + * it's sufficient to keep the HW-default for the selector, or only + * change it once, on GT initialization. This needs to be the last + * steering type. + */ + IMPLICIT_STEERING, NUM_STEERING_TYPES }; -- cgit v1.2.3 From 3747c88428a199620ca626a196781516c6da12e6 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 25 Jan 2023 15:03:07 -0800 Subject: drm/xe: Rename xe_rtp_regval to xe_rtp_action It's true that the struct records the register and the value (in form of 2 masks) to restore, but it also records more fields important to the application of workarounds/tuning, etc. One important part is what is the macro used to record these fields: SET/CLR/WR/FIELD_SET/etc. Thinking of the table as a set of rules + actions is more intuitive than rules + regval. Signed-off-by: Lucas De Marchi Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_rtp.c | 16 ++++++++-------- drivers/gpu/drm/xe/xe_rtp.h | 26 +++++++++++++------------- drivers/gpu/drm/xe/xe_rtp_types.h | 9 ++++++--- 3 files changed, 27 insertions(+), 24 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index 9e8d0e43c643..d3484b906d4a 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -91,13 +91,13 @@ static void rtp_add_sr_entry(const struct xe_rtp_entry *entry, u32 mmio_base, struct xe_reg_sr *sr) { - u32 reg = entry->regval.reg + mmio_base; + u32 reg = entry->action.reg + mmio_base; struct xe_reg_sr_entry sr_entry = { - .clr_bits = entry->regval.clr_bits, - .set_bits = entry->regval.set_bits, - .read_mask = entry->regval.read_mask, - .masked_reg = entry->regval.flags & XE_RTP_FLAG_MASKED_REG, - .reg_type = entry->regval.reg_type, + .clr_bits = entry->action.clr_bits, + .set_bits = entry->action.set_bits, + .read_mask = entry->action.read_mask, + .masked_reg = entry->action.flags & XE_RTP_FLAG_MASKED_REG, + .reg_type = entry->action.reg_type, }; xe_reg_sr_add(sr, reg, &sr_entry); @@ -124,7 +124,7 @@ void xe_rtp_process(const struct xe_rtp_entry *entries, struct xe_reg_sr *sr, for (entry = entries; entry && entry->name; entry++) { u32 mmio_base = 0; - if (entry->regval.flags & XE_RTP_FLAG_FOREACH_ENGINE) { + if (entry->action.flags & XE_RTP_FLAG_FOREACH_ENGINE) { struct xe_hw_engine *each_hwe; enum xe_hw_engine_id id; @@ -135,7 +135,7 @@ void xe_rtp_process(const struct xe_rtp_entry *entries, struct xe_reg_sr *sr, rtp_add_sr_entry(entry, gt, mmio_base, sr); } } else if (rule_matches(gt, hwe, entry)) { - if (entry->regval.flags & XE_RTP_FLAG_ENGINE_BASE) + if (entry->action.flags & XE_RTP_FLAG_ENGINE_BASE) mmio_base = hwe->mmio_base; rtp_add_sr_entry(entry, gt, mmio_base, sr); diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index d4e11fdde77f..d86c6ba92b03 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -199,21 +199,21 @@ struct xe_reg_sr; * XE_RTP_WR - Helper to write a value to the register, overriding all the bits * @reg_: Register * @val_: Value to set - * @...: Additional fields to override in the struct xe_rtp_regval entry + * @...: Additional fields to override in the struct xe_rtp_action entry * * The correspondent notation in bspec is: * * REGNAME = VALUE */ #define XE_RTP_WR(reg_, val_, ...) \ - .regval = { .reg = reg_, .clr_bits = ~0u, .set_bits = (val_), \ + .action = { .reg = reg_, .clr_bits = ~0u, .set_bits = (val_), \ .read_mask = (~0u), ##__VA_ARGS__ } /** * XE_RTP_SET - Set bits from @val_ in the register. * @reg_: Register * @val_: Bits to set in the register - * @...: Additional fields to override in the struct xe_rtp_regval entry + * @...: Additional fields to override in the struct xe_rtp_action entry * * For masked registers this translates to a single write, while for other * registers it's a RMW. The correspondent bspec notation is (example for bits 2 @@ -223,14 +223,14 @@ struct xe_reg_sr; * REGNAME[5] = 1 */ #define XE_RTP_SET(reg_, val_, ...) \ - .regval = { .reg = reg_, .clr_bits = (val_), .set_bits = (val_), \ + .action = { .reg = reg_, .clr_bits = (val_), .set_bits = (val_), \ .read_mask = (val_), ##__VA_ARGS__ } /** * XE_RTP_CLR: Clear bits from @val_ in the register. * @reg_: Register * @val_: Bits to clear in the register - * @...: Additional fields to override in the struct xe_rtp_regval entry + * @...: Additional fields to override in the struct xe_rtp_action entry * * For masked registers this translates to a single write, while for other * registers it's a RMW. The correspondent bspec notation is (example for bits 2 @@ -240,7 +240,7 @@ struct xe_reg_sr; * REGNAME[5] = 0 */ #define XE_RTP_CLR(reg_, val_, ...) \ - .regval = { .reg = reg_, .clr_bits = (val_), .set_bits = 0, \ + .action = { .reg = reg_, .clr_bits = (val_), .set_bits = 0, \ .read_mask = (val_), ##__VA_ARGS__ } /** @@ -248,7 +248,7 @@ struct xe_reg_sr; * @reg_: Register * @mask_bits_: Mask of bits to be changed in the register, forming a field * @val_: Value to set in the field denoted by @mask_bits_ - * @...: Additional fields to override in the struct xe_rtp_regval entry + * @...: Additional fields to override in the struct xe_rtp_action entry * * For masked registers this translates to a single write, while for other * registers it's a RMW. The correspondent bspec notation is: @@ -256,25 +256,25 @@ struct xe_reg_sr; * REGNAME[:] = VALUE */ #define XE_RTP_FIELD_SET(reg_, mask_bits_, val_, ...) \ - .regval = { .reg = reg_, .clr_bits = (mask_bits_), .set_bits = (val_),\ + .action = { .reg = reg_, .clr_bits = (mask_bits_), .set_bits = (val_),\ .read_mask = (mask_bits_), ##__VA_ARGS__ } #define XE_RTP_FIELD_SET_NO_READ_MASK(reg_, mask_bits_, val_, ...) \ - .regval = { .reg = reg_, .clr_bits = (mask_bits_), .set_bits = (val_),\ + .action = { .reg = reg_, .clr_bits = (mask_bits_), .set_bits = (val_),\ .read_mask = 0, ##__VA_ARGS__ } /** * XE_WHITELIST_REGISTER - Add register to userspace whitelist * @reg_: Register * @flags_: Whitelist-specific flags to set - * @...: Additional fields to override in the struct xe_rtp_regval entry + * @...: Additional fields to override in the struct xe_rtp_action entry * * Add a register to the whitelist, allowing userspace to modify the ster with * regular user privileges. */ #define XE_WHITELIST_REGISTER(reg_, flags_, ...) \ /* TODO fail build if ((flags) & ~(RING_FORCE_TO_NONPRIV_MASK_VALID)) */\ - .regval = { .reg = reg_, .set_bits = (flags_), \ + .action = { .reg = reg_, .set_bits = (flags_), \ .clr_bits = RING_FORCE_TO_NONPRIV_MASK_VALID, \ ##__VA_ARGS__ } @@ -287,12 +287,12 @@ struct xe_reg_sr; #define XE_RTP_NAME(s_) .name = (s_) /** - * XE_RTP_FLAG - Helper to add multiple flags to a struct xe_rtp_regval entry + * XE_RTP_FLAG - Helper to add multiple flags to a struct xe_rtp_action entry * @f1_: Last part of a ``XE_RTP_FLAG_*`` * @...: Additional flags, defined like @f1_ * * Helper to automatically add a ``XE_RTP_FLAG_`` prefix to @f1_ so it can be - * easily used to define struct xe_rtp_regval entries. Example: + * easily used to define struct xe_rtp_action entries. Example: * * .. code-block:: c * diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h index 630a2ec53fc6..9cd722f310cb 100644 --- a/drivers/gpu/drm/xe/xe_rtp_types.h +++ b/drivers/gpu/drm/xe/xe_rtp_types.h @@ -19,9 +19,12 @@ enum { }; /** - * struct xe_rtp_regval - register and value for rtp table + * struct xe_rtp_action - action to take for any matching rule + * + * This struct records what action should be taken in a register that has a + * matching rule. Example of actions: set/clear bits. */ -struct xe_rtp_regval { +struct xe_rtp_action { /** @reg: Register */ u32 reg; /** @clr_bits: bits to clear when updating register */ @@ -93,7 +96,7 @@ struct xe_rtp_rule { /** struct xe_rtp_entry - Entry in an rtp table */ struct xe_rtp_entry { const char *name; - const struct xe_rtp_regval regval; + const struct xe_rtp_action action; const struct xe_rtp_rule *rules; unsigned int n_rules; }; -- cgit v1.2.3 From 944a5e993a3e8a54ec56feec3253bb6b6f5c90d7 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 25 Jan 2023 16:40:02 -0800 Subject: drm/xe/rtp: Split action and entry flags Entry flags is meant for the whole entry, including the rule evaluation. Action flags are for flags applied to the register or action being taken. Since there's only one action per entry, the distinction was not important and a u8 was spared. However more and more workarounds are needing multiple actions. This prepares for multiple action support. Right now there are these action flags: - XE_RTP_ACTION_FLAG_MASKED_REG: register in the action is a masked register - XE_RTP_ACTION_FLAG_ENGINE_BASE: the engine base should be added to the register in order to form the real address And this entry flag: - XE_RTP_ENTRY_FLAG_FOREACH_ENGINE: the rules should be evaluated for each engine on the gt. It also automatically implies XE_RTP_ACTION_FLAG_ENGINE_BASE. Since there are likely not that many rules, reduce n_rules to u8 so the overall entry size doesn't increase more than needed. Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_reg_whitelist.c | 2 +- drivers/gpu/drm/xe/xe_rtp.c | 38 +++++++++++++++++++------------- drivers/gpu/drm/xe/xe_rtp.h | 41 ++++++++++++++++++++++++++++------- drivers/gpu/drm/xe/xe_rtp_types.h | 9 ++++---- drivers/gpu/drm/xe/xe_wa.c | 36 +++++++++++++++--------------- 5 files changed, 80 insertions(+), 46 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c index 2e0c87b72395..469b274198b1 100644 --- a/drivers/gpu/drm/xe/xe_reg_whitelist.c +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c @@ -42,7 +42,7 @@ static const struct xe_rtp_entry register_whitelist[] = { XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1260), FUNC(match_not_render)), XE_WHITELIST_REGISTER(RING_CTX_TIMESTAMP(0), RING_FORCE_TO_NONPRIV_ACCESS_RD, - XE_RTP_FLAG(ENGINE_BASE)) + XE_RTP_ACTION_FLAG(ENGINE_BASE)) }, { XE_RTP_NAME("16014440446_part_1"), XE_RTP_RULES(PLATFORM(PVC)), diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index d3484b906d4a..11135db1a19d 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -96,13 +96,30 @@ static void rtp_add_sr_entry(const struct xe_rtp_entry *entry, .clr_bits = entry->action.clr_bits, .set_bits = entry->action.set_bits, .read_mask = entry->action.read_mask, - .masked_reg = entry->action.flags & XE_RTP_FLAG_MASKED_REG, + .masked_reg = entry->action.flags & XE_RTP_ACTION_FLAG_MASKED_REG, .reg_type = entry->action.reg_type, }; xe_reg_sr_add(sr, reg, &sr_entry); } +static void rtp_process_one(const struct xe_rtp_entry *entry, struct xe_gt *gt, + struct xe_hw_engine *hwe, struct xe_reg_sr *sr) +{ + u32 mmio_base; + + if (!rule_matches(gt, hwe, entry)) + return; + + if ((entry->flags & XE_RTP_ENTRY_FLAG_FOREACH_ENGINE) || + (entry->action.flags & XE_RTP_ACTION_FLAG_ENGINE_BASE)) + mmio_base = hwe->mmio_base; + else + mmio_base = 0; + + rtp_add_sr_entry(entry, gt, mmio_base, sr); +} + /** * xe_rtp_process - Process all rtp @entries, adding the matching ones to @sr * @entries: Table with RTP definitions @@ -122,23 +139,14 @@ void xe_rtp_process(const struct xe_rtp_entry *entries, struct xe_reg_sr *sr, const struct xe_rtp_entry *entry; for (entry = entries; entry && entry->name; entry++) { - u32 mmio_base = 0; - - if (entry->action.flags & XE_RTP_FLAG_FOREACH_ENGINE) { + if (entry->flags & XE_RTP_ENTRY_FLAG_FOREACH_ENGINE) { struct xe_hw_engine *each_hwe; enum xe_hw_engine_id id; - for_each_hw_engine(each_hwe, gt, id) { - mmio_base = each_hwe->mmio_base; - - if (rule_matches(gt, each_hwe, entry)) - rtp_add_sr_entry(entry, gt, mmio_base, sr); - } - } else if (rule_matches(gt, hwe, entry)) { - if (entry->action.flags & XE_RTP_FLAG_ENGINE_BASE) - mmio_base = hwe->mmio_base; - - rtp_add_sr_entry(entry, gt, mmio_base, sr); + for_each_hw_engine(each_hwe, gt, id) + rtp_process_one(entry, gt, each_hwe, sr); + } else { + rtp_process_one(entry, gt, hwe, sr); } } } diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index d86c6ba92b03..5d9ad31b0048 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -53,7 +53,8 @@ struct xe_reg_sr; * Helper macros for concatenating prefix - do not use them directly outside * this header */ -#define __ADD_XE_RTP_FLAG_PREFIX(x) CONCATENATE(XE_RTP_FLAG_, x) | +#define __ADD_XE_RTP_ENTRY_FLAG_PREFIX(x) CONCATENATE(XE_RTP_ENTRY_FLAG_, x) | +#define __ADD_XE_RTP_ACTION_FLAG_PREFIX(x) CONCATENATE(XE_RTP_ACTION_FLAG_, x) | #define __ADD_XE_RTP_RULE_PREFIX(x) CONCATENATE(XE_RTP_RULE_, x) , /* @@ -287,26 +288,50 @@ struct xe_reg_sr; #define XE_RTP_NAME(s_) .name = (s_) /** - * XE_RTP_FLAG - Helper to add multiple flags to a struct xe_rtp_action entry - * @f1_: Last part of a ``XE_RTP_FLAG_*`` + * XE_RTP_ENTRY_FLAG - Helper to add multiple flags to a struct xe_rtp_entry + * @f1_: Last part of a ``XE_RTP_ENTRY_FLAG_*`` * @...: Additional flags, defined like @f1_ * - * Helper to automatically add a ``XE_RTP_FLAG_`` prefix to @f1_ so it can be - * easily used to define struct xe_rtp_action entries. Example: + * Helper to automatically add a ``XE_RTP_ENTRY_FLAG_`` prefix to @f1_ so it can + * be easily used to define struct xe_rtp_action entries. Example: * * .. code-block:: c * * const struct xe_rtp_entry wa_entries[] = { * ... * { XE_RTP_NAME("test-entry"), - * XE_RTP_FLAG(FOREACH_ENGINE, MASKED_REG), + * ... + * XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), + * ... + * }, + * ... + * }; + */ +#define XE_RTP_ENTRY_FLAG(f1_, ...) \ + .flags = (CALL_FOR_EACH(__ADD_XE_RTP_ENTRY_FLAG_PREFIX, f1_, ##__VA_ARGS__) 0) + +/** + * XE_RTP_ACTION_FLAG - Helper to add multiple flags to a struct xe_rtp_action + * @f1_: Last part of a ``XE_RTP_ENTRY_*`` + * @...: Additional flags, defined like @f1_ + * + * Helper to automatically add a ``XE_RTP_ACTION_FLAG_`` prefix to @f1_ so it + * can be easily used to define struct xe_rtp_action entries. Example: + * + * .. code-block:: c + * + * const struct xe_rtp_entry wa_entries[] = { + * ... + * { XE_RTP_NAME("test-entry"), + * ... + * XE_RTP_SET(..., XE_RTP_ACTION_FLAG(FOREACH_ENGINE)), * ... * }, * ... * }; */ -#define XE_RTP_FLAG(f1_, ...) \ - .flags = (CALL_FOR_EACH(__ADD_XE_RTP_FLAG_PREFIX, f1_, ##__VA_ARGS__) 0) +#define XE_RTP_ACTION_FLAG(f1_, ...) \ + .flags = (CALL_FOR_EACH(__ADD_XE_RTP_ACTION_FLAG_PREFIX, f1_, ##__VA_ARGS__) 0) /** * XE_RTP_RULES - Helper to set multiple rules to a struct xe_rtp_entry entry diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h index 9cd722f310cb..f7efb17d00e5 100644 --- a/drivers/gpu/drm/xe/xe_rtp_types.h +++ b/drivers/gpu/drm/xe/xe_rtp_types.h @@ -34,9 +34,8 @@ struct xe_rtp_action { #define XE_RTP_NOCHECK .read_mask = 0 /** @read_mask: mask for bits to consider when reading value back */ u32 read_mask; -#define XE_RTP_FLAG_FOREACH_ENGINE BIT(0) -#define XE_RTP_FLAG_MASKED_REG BIT(1) -#define XE_RTP_FLAG_ENGINE_BASE BIT(2) +#define XE_RTP_ACTION_FLAG_MASKED_REG BIT(0) +#define XE_RTP_ACTION_FLAG_ENGINE_BASE BIT(1) /** @flags: flags to apply on rule evaluation or action */ u8 flags; /** @reg_type: register type, see ``XE_RTP_REG_*`` */ @@ -98,7 +97,9 @@ struct xe_rtp_entry { const char *name; const struct xe_rtp_action action; const struct xe_rtp_rule *rules; - unsigned int n_rules; + u8 n_rules; +#define XE_RTP_ENTRY_FLAG_FOREACH_ENGINE BIT(0) + u8 flags; }; #endif diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 699a39fb786a..c1c098994c84 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -103,15 +103,15 @@ static const struct xe_rtp_entry gt_was[] = { XE_RTP_RULES(MEDIA_VERSION_RANGE(1200, 1255), ENGINE_CLASS(VIDEO_DECODE), FUNC(match_14011060649)), - XE_RTP_SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS, - XE_RTP_FLAG(FOREACH_ENGINE)) + XE_RTP_SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS), + XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), }, { XE_RTP_NAME("16010515920"), XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0), ENGINE_CLASS(VIDEO_DECODE)), - XE_RTP_SET(VDBOX_CGCTL3F18(0), ALNUNIT_CLKGATE_DIS, - XE_RTP_FLAG(FOREACH_ENGINE)) + XE_RTP_SET(VDBOX_CGCTL3F18(0), ALNUNIT_CLKGATE_DIS), + XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), }, { XE_RTP_NAME("22010523718"), XE_RTP_RULES(SUBPLATFORM(DG2, G10)), @@ -191,12 +191,12 @@ static const struct xe_rtp_entry engine_was[] = { { XE_RTP_NAME("14015227452"), XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), XE_RTP_SET(GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE, - XE_RTP_FLAG(MASKED_REG)) + XE_RTP_ACTION_FLAG(MASKED_REG)) }, { XE_RTP_NAME("1606931601"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), XE_RTP_SET(GEN7_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ, - XE_RTP_FLAG(MASKED_REG)) + XE_RTP_ACTION_FLAG(MASKED_REG)) }, { XE_RTP_NAME("22010931296, 18011464164, 14010919138"), XE_RTP_RULES(GRAPHICS_VERSION(1200), ENGINE_CLASS(RENDER)), @@ -205,47 +205,47 @@ static const struct xe_rtp_entry engine_was[] = { { XE_RTP_NAME("14010826681, 1606700617, 22010271021"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), XE_RTP_SET(GEN9_CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE, - XE_RTP_FLAG(MASKED_REG)) + XE_RTP_ACTION_FLAG(MASKED_REG)) }, { XE_RTP_NAME("18019627453"), XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), XE_RTP_SET(GEN9_CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE, - XE_RTP_FLAG(MASKED_REG)) + XE_RTP_ACTION_FLAG(MASKED_REG)) }, { XE_RTP_NAME("1409804808"), XE_RTP_RULES(GRAPHICS_VERSION(1200), ENGINE_CLASS(RENDER), IS_INTEGRATED), XE_RTP_SET(GEN7_ROW_CHICKEN2, GEN12_PUSH_CONST_DEREF_HOLD_DIS, - XE_RTP_FLAG(MASKED_REG)) + XE_RTP_ACTION_FLAG(MASKED_REG)) }, { XE_RTP_NAME("14010229206, 1409085225"), XE_RTP_RULES(GRAPHICS_VERSION(1200), ENGINE_CLASS(RENDER), IS_INTEGRATED), XE_RTP_SET(GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH, - XE_RTP_FLAG(MASKED_REG)) + XE_RTP_ACTION_FLAG(MASKED_REG)) }, { XE_RTP_NAME("1607297627, 1607030317, 1607186500"), XE_RTP_RULES(PLATFORM(TIGERLAKE), ENGINE_CLASS(RENDER)), XE_RTP_SET(RING_PSMI_CTL(RENDER_RING_BASE), GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE | - GEN8_RC_SEMA_IDLE_MSG_DISABLE, XE_RTP_FLAG(MASKED_REG)) + GEN8_RC_SEMA_IDLE_MSG_DISABLE, XE_RTP_ACTION_FLAG(MASKED_REG)) }, { XE_RTP_NAME("1607297627, 1607030317, 1607186500"), XE_RTP_RULES(PLATFORM(ROCKETLAKE), ENGINE_CLASS(RENDER)), XE_RTP_SET(RING_PSMI_CTL(RENDER_RING_BASE), GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE | - GEN8_RC_SEMA_IDLE_MSG_DISABLE, XE_RTP_FLAG(MASKED_REG)) + GEN8_RC_SEMA_IDLE_MSG_DISABLE, XE_RTP_ACTION_FLAG(MASKED_REG)) }, { XE_RTP_NAME("1406941453"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), - XE_RTP_SET(GEN10_SAMPLER_MODE, ENABLE_SMALLPL, XE_RTP_FLAG(MASKED_REG)) + XE_RTP_SET(GEN10_SAMPLER_MODE, ENABLE_SMALLPL, XE_RTP_ACTION_FLAG(MASKED_REG)) }, { XE_RTP_NAME("FtrPerCtxtPreemptionGranularityControl"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1250), ENGINE_CLASS(RENDER)), XE_RTP_SET(GEN7_FF_SLICE_CS_CHICKEN1, GEN9_FFSC_PERCTX_PREEMPT_CTRL, - XE_RTP_FLAG(MASKED_REG)) + XE_RTP_ACTION_FLAG(MASKED_REG)) }, {} }; @@ -255,13 +255,13 @@ static const struct xe_rtp_entry lrc_was[] = { XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)), XE_RTP_SET(GEN11_COMMON_SLICE_CHICKEN3, GEN12_DISABLE_CPS_AWARE_COLOR_PIPE, - XE_RTP_FLAG(MASKED_REG)) + XE_RTP_ACTION_FLAG(MASKED_REG)) }, { XE_RTP_NAME("WaDisableGPGPUMidThreadPreemption"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)), XE_RTP_FIELD_SET(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK, GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL, - XE_RTP_FLAG(MASKED_REG)) + XE_RTP_ACTION_FLAG(MASKED_REG)) }, { XE_RTP_NAME("16011163337"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)), @@ -273,13 +273,13 @@ static const struct xe_rtp_entry lrc_was[] = { XE_RTP_RULES(PLATFORM(DG1)), XE_RTP_CLR(GEN11_COMMON_SLICE_CHICKEN3, DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN, - XE_RTP_FLAG(MASKED_REG)) + XE_RTP_ACTION_FLAG(MASKED_REG)) }, { XE_RTP_NAME("22010493298"), XE_RTP_RULES(PLATFORM(DG1)), XE_RTP_SET(HIZ_CHICKEN, DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE, - XE_RTP_FLAG(MASKED_REG)) + XE_RTP_ACTION_FLAG(MASKED_REG)) }, {} }; -- cgit v1.2.3 From 844c0700a675a5e30644c867ae7b30cb680d176d Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 25 Jan 2023 23:33:38 -0800 Subject: drm/xe/rtp: Support multiple actions per entry Just like there is support for multiple rules per entry in an rtp table, also support multiple actions. This makes it easier to add support for workarounds that need to change multiple registers. It also makes it slightly more readable as now the action part resembles the rule part. Signed-off-by: Lucas De Marchi Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_reg_whitelist.c | 31 ++++---- drivers/gpu/drm/xe/xe_rtp.c | 30 ++++--- drivers/gpu/drm/xe/xe_rtp.h | 97 +++++++++++++++------- drivers/gpu/drm/xe/xe_rtp_types.h | 3 +- drivers/gpu/drm/xe/xe_tuning.c | 7 +- drivers/gpu/drm/xe/xe_wa.c | 146 +++++++++++++++++----------------- 6 files changed, 179 insertions(+), 135 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c index 469b274198b1..a34617a642ec 100644 --- a/drivers/gpu/drm/xe/xe_reg_whitelist.c +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c @@ -26,35 +26,32 @@ static bool match_not_render(const struct xe_gt *gt, static const struct xe_rtp_entry register_whitelist[] = { { XE_RTP_NAME("WaAllowPMDepthAndInvocationCountAccessFromUMD, 1408556865"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), - XE_WHITELIST_REGISTER(PS_INVOCATION_COUNT, - RING_FORCE_TO_NONPRIV_ACCESS_RD | - RING_FORCE_TO_NONPRIV_RANGE_4) + XE_RTP_ACTIONS(WHITELIST(PS_INVOCATION_COUNT, + RING_FORCE_TO_NONPRIV_ACCESS_RD | + RING_FORCE_TO_NONPRIV_RANGE_4)) }, { XE_RTP_NAME("1508744258, 14012131227, 1808121037"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), - XE_WHITELIST_REGISTER(GEN7_COMMON_SLICE_CHICKEN1, 0) + XE_RTP_ACTIONS(WHITELIST(GEN7_COMMON_SLICE_CHICKEN1, 0)) }, { XE_RTP_NAME("1806527549"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), - XE_WHITELIST_REGISTER(HIZ_CHICKEN, 0) + XE_RTP_ACTIONS(WHITELIST(HIZ_CHICKEN, 0)) }, { XE_RTP_NAME("allow_read_ctx_timestamp"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1260), FUNC(match_not_render)), - XE_WHITELIST_REGISTER(RING_CTX_TIMESTAMP(0), + XE_RTP_ACTIONS(WHITELIST(RING_CTX_TIMESTAMP(0), RING_FORCE_TO_NONPRIV_ACCESS_RD, - XE_RTP_ACTION_FLAG(ENGINE_BASE)) + XE_RTP_ACTION_FLAG(ENGINE_BASE))) }, - { XE_RTP_NAME("16014440446_part_1"), + { XE_RTP_NAME("16014440446"), XE_RTP_RULES(PLATFORM(PVC)), - XE_WHITELIST_REGISTER(_MMIO(0x4400), - RING_FORCE_TO_NONPRIV_DENY | - RING_FORCE_TO_NONPRIV_RANGE_64) - }, - { XE_RTP_NAME("16014440446_part_2"), - XE_RTP_RULES(PLATFORM(PVC)), - XE_WHITELIST_REGISTER(_MMIO(0x4500), - RING_FORCE_TO_NONPRIV_DENY | - RING_FORCE_TO_NONPRIV_RANGE_64) + XE_RTP_ACTIONS(WHITELIST(_MMIO(0x4400), + RING_FORCE_TO_NONPRIV_DENY | + RING_FORCE_TO_NONPRIV_RANGE_64), + WHITELIST(_MMIO(0x4500), + RING_FORCE_TO_NONPRIV_DENY | + RING_FORCE_TO_NONPRIV_RANGE_64)) }, {} }; diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index 11135db1a19d..5b1316b588d8 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -86,18 +86,18 @@ static bool rule_matches(struct xe_gt *gt, return true; } -static void rtp_add_sr_entry(const struct xe_rtp_entry *entry, +static void rtp_add_sr_entry(const struct xe_rtp_action *action, struct xe_gt *gt, u32 mmio_base, struct xe_reg_sr *sr) { - u32 reg = entry->action.reg + mmio_base; + u32 reg = action->reg + mmio_base; struct xe_reg_sr_entry sr_entry = { - .clr_bits = entry->action.clr_bits, - .set_bits = entry->action.set_bits, - .read_mask = entry->action.read_mask, - .masked_reg = entry->action.flags & XE_RTP_ACTION_FLAG_MASKED_REG, - .reg_type = entry->action.reg_type, + .clr_bits = action->clr_bits, + .set_bits = action->set_bits, + .read_mask = action->read_mask, + .masked_reg = action->flags & XE_RTP_ACTION_FLAG_MASKED_REG, + .reg_type = action->reg_type, }; xe_reg_sr_add(sr, reg, &sr_entry); @@ -106,18 +106,22 @@ static void rtp_add_sr_entry(const struct xe_rtp_entry *entry, static void rtp_process_one(const struct xe_rtp_entry *entry, struct xe_gt *gt, struct xe_hw_engine *hwe, struct xe_reg_sr *sr) { + const struct xe_rtp_action *action; u32 mmio_base; + unsigned int i; if (!rule_matches(gt, hwe, entry)) return; - if ((entry->flags & XE_RTP_ENTRY_FLAG_FOREACH_ENGINE) || - (entry->action.flags & XE_RTP_ACTION_FLAG_ENGINE_BASE)) - mmio_base = hwe->mmio_base; - else - mmio_base = 0; + for (action = &entry->actions[0]; i < entry->n_actions; action++, i++) { + if ((entry->flags & XE_RTP_ENTRY_FLAG_FOREACH_ENGINE) || + (action->flags & XE_RTP_ACTION_FLAG_ENGINE_BASE)) + mmio_base = hwe->mmio_base; + else + mmio_base = 0; - rtp_add_sr_entry(entry, gt, mmio_base, sr); + rtp_add_sr_entry(action, gt, mmio_base, sr); + } } /** diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index 5d9ad31b0048..1ac3fd1c0734 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -44,10 +44,8 @@ struct xe_reg_sr; #define CALL_FOR_EACH(MACRO_, x, ...) \ _CALL_FOR_EACH(COUNT_ARGS(x, ##__VA_ARGS__), MACRO_, x, ##__VA_ARGS__) -#define _XE_RTP_REG(x_) (x_), \ - .reg_type = XE_RTP_REG_REGULAR -#define _XE_RTP_MCR_REG(x_) (x_), \ - .reg_type = XE_RTP_REG_MCR +#define _XE_RTP_REG(x_) (x_), XE_RTP_REG_REGULAR +#define _XE_RTP_MCR_REG(x_) (x_), XE_RTP_REG_MCR /* * Helper macros for concatenating prefix - do not use them directly outside @@ -56,6 +54,7 @@ struct xe_reg_sr; #define __ADD_XE_RTP_ENTRY_FLAG_PREFIX(x) CONCATENATE(XE_RTP_ENTRY_FLAG_, x) | #define __ADD_XE_RTP_ACTION_FLAG_PREFIX(x) CONCATENATE(XE_RTP_ACTION_FLAG_, x) | #define __ADD_XE_RTP_RULE_PREFIX(x) CONCATENATE(XE_RTP_RULE_, x) , +#define __ADD_XE_RTP_ACTION_PREFIX(x) CONCATENATE(XE_RTP_ACTION_, x) , /* * Macros to encode rules to match against platform, IP version, stepping, etc. @@ -197,8 +196,10 @@ struct xe_reg_sr; { .match_type = XE_RTP_MATCH_DISCRETE } /** - * XE_RTP_WR - Helper to write a value to the register, overriding all the bits + * XE_RTP_ACTION_WR - Helper to write a value to the register, overriding all + * the bits * @reg_: Register + * @reg_type_: Register type - automatically expanded by MCR_REG/_MMIO * @val_: Value to set * @...: Additional fields to override in the struct xe_rtp_action entry * @@ -206,13 +207,15 @@ struct xe_reg_sr; * * REGNAME = VALUE */ -#define XE_RTP_WR(reg_, val_, ...) \ - .action = { .reg = reg_, .clr_bits = ~0u, .set_bits = (val_), \ - .read_mask = (~0u), ##__VA_ARGS__ } +#define XE_RTP_ACTION_WR(reg_, reg_type_, val_, ...) \ + { .reg = (reg_), .reg_type = (reg_type_), \ + .clr_bits = ~0u, .set_bits = (val_), \ + .read_mask = (~0u), ##__VA_ARGS__ } /** - * XE_RTP_SET - Set bits from @val_ in the register. + * XE_RTP_ACTION_SET - Set bits from @val_ in the register. * @reg_: Register + * @reg_type_: Register type - automatically expanded by MCR_REG/_MMIO * @val_: Bits to set in the register * @...: Additional fields to override in the struct xe_rtp_action entry * @@ -223,13 +226,15 @@ struct xe_reg_sr; * REGNAME[2] = 1 * REGNAME[5] = 1 */ -#define XE_RTP_SET(reg_, val_, ...) \ - .action = { .reg = reg_, .clr_bits = (val_), .set_bits = (val_), \ - .read_mask = (val_), ##__VA_ARGS__ } +#define XE_RTP_ACTION_SET(reg_, reg_type_, val_, ...) \ + { .reg = (reg_), .reg_type = (reg_type_), \ + .clr_bits = (val_), .set_bits = (val_), \ + .read_mask = (val_), ##__VA_ARGS__ } /** - * XE_RTP_CLR: Clear bits from @val_ in the register. + * XE_RTP_ACTION_CLR: Clear bits from @val_ in the register. * @reg_: Register + * @reg_type_: Register type - automatically expanded by MCR_REG/_MMIO * @val_: Bits to clear in the register * @...: Additional fields to override in the struct xe_rtp_action entry * @@ -240,13 +245,15 @@ struct xe_reg_sr; * REGNAME[2] = 0 * REGNAME[5] = 0 */ -#define XE_RTP_CLR(reg_, val_, ...) \ - .action = { .reg = reg_, .clr_bits = (val_), .set_bits = 0, \ - .read_mask = (val_), ##__VA_ARGS__ } +#define XE_RTP_ACTION_CLR(reg_, reg_type_, val_, ...) \ + { .reg = (reg_), .reg_type = (reg_type_), \ + .clr_bits = (val_), .set_bits = 0, \ + .read_mask = (val_), ##__VA_ARGS__ } /** - * XE_RTP_FIELD_SET: Set a bit range, defined by @mask_bits_, to the value in + * XE_RTP_ACTION_FIELD_SET: Set a bit range * @reg_: Register + * @reg_type_: Register type - automatically expanded by MCR_REG/_MMIO * @mask_bits_: Mask of bits to be changed in the register, forming a field * @val_: Value to set in the field denoted by @mask_bits_ * @...: Additional fields to override in the struct xe_rtp_action entry @@ -256,28 +263,31 @@ struct xe_reg_sr; * * REGNAME[:] = VALUE */ -#define XE_RTP_FIELD_SET(reg_, mask_bits_, val_, ...) \ - .action = { .reg = reg_, .clr_bits = (mask_bits_), .set_bits = (val_),\ - .read_mask = (mask_bits_), ##__VA_ARGS__ } +#define XE_RTP_ACTION_FIELD_SET(reg_, reg_type_, mask_bits_, val_, ...) \ + { .reg = (reg_), .reg_type = (reg_type_), \ + .clr_bits = (mask_bits_), .set_bits = (val_), \ + .read_mask = (mask_bits_), ##__VA_ARGS__ } -#define XE_RTP_FIELD_SET_NO_READ_MASK(reg_, mask_bits_, val_, ...) \ - .action = { .reg = reg_, .clr_bits = (mask_bits_), .set_bits = (val_),\ - .read_mask = 0, ##__VA_ARGS__ } +#define XE_RTP_ACTION_FIELD_SET_NO_READ_MASK(reg_, reg_type_, mask_bits_, val_, ...) \ + { .reg = (reg_), .reg_type = (reg_type_), \ + .clr_bits = (mask_bits_), .set_bits = (val_), \ + .read_mask = 0, ##__VA_ARGS__ } /** - * XE_WHITELIST_REGISTER - Add register to userspace whitelist + * XE_RTP_ACTION_WHITELIST - Add register to userspace whitelist * @reg_: Register - * @flags_: Whitelist-specific flags to set + * @reg_type_: Register type - automatically expanded by MCR_REG/_MMIO + * @val_: Whitelist-specific flags to set * @...: Additional fields to override in the struct xe_rtp_action entry * * Add a register to the whitelist, allowing userspace to modify the ster with * regular user privileges. */ -#define XE_WHITELIST_REGISTER(reg_, flags_, ...) \ +#define XE_RTP_ACTION_WHITELIST(reg_, reg_type_, val_, ...) \ /* TODO fail build if ((flags) & ~(RING_FORCE_TO_NONPRIV_MASK_VALID)) */\ - .action = { .reg = reg_, .set_bits = (flags_), \ - .clr_bits = RING_FORCE_TO_NONPRIV_MASK_VALID, \ - ##__VA_ARGS__ } + { .reg = (reg_), .reg_type = (reg_type_), .set_bits = (val_), \ + .clr_bits = RING_FORCE_TO_NONPRIV_MASK_VALID, \ + ##__VA_ARGS__ } /** * XE_RTP_NAME - Helper to set the name in xe_rtp_entry @@ -324,7 +334,7 @@ struct xe_reg_sr; * ... * { XE_RTP_NAME("test-entry"), * ... - * XE_RTP_SET(..., XE_RTP_ACTION_FLAG(FOREACH_ENGINE)), + * XE_RTP_ACTION_SET(..., XE_RTP_ACTION_FLAG(FOREACH_ENGINE)), * ... * }, * ... @@ -359,6 +369,33 @@ struct xe_reg_sr; CALL_FOR_EACH(__ADD_XE_RTP_RULE_PREFIX, r1, ##__VA_ARGS__) \ } +/** + * XE_RTP_ACTIONS - Helper to set multiple actions to a struct xe_rtp_entry + * @a1: Action to take. Last part of XE_RTP_ACTION_* + * @...: Additional rules, defined like @r1 + * + * At least one rule is needed and up to 4 are supported. Multiple rules are + * AND'ed together, i.e. all the rules must evaluate to true for the entry to + * be processed. See XE_RTP_MATCH_* for the possible match rules. Example: + * + * .. code-block:: c + * + * const struct xe_rtp_entry wa_entries[] = { + * ... + * { XE_RTP_NAME("test-entry"), + * XE_RTP_RULES(...), + * XE_RTP_ACTIONS(SET(..), SET(...), CLR(...)), + * ... + * }, + * ... + * }; + */ +#define XE_RTP_ACTIONS(a1, ...) \ + .n_actions = COUNT_ARGS(a1, ##__VA_ARGS__), \ + .actions = (struct xe_rtp_action[]) { \ + CALL_FOR_EACH(__ADD_XE_RTP_ACTION_PREFIX, a1, ##__VA_ARGS__) \ + } + void xe_rtp_process(const struct xe_rtp_entry *entries, struct xe_reg_sr *sr, struct xe_gt *gt, struct xe_hw_engine *hwe); diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h index f7efb17d00e5..fac0bd6d5b1e 100644 --- a/drivers/gpu/drm/xe/xe_rtp_types.h +++ b/drivers/gpu/drm/xe/xe_rtp_types.h @@ -95,9 +95,10 @@ struct xe_rtp_rule { /** struct xe_rtp_entry - Entry in an rtp table */ struct xe_rtp_entry { const char *name; - const struct xe_rtp_action action; + const struct xe_rtp_action *actions; const struct xe_rtp_rule *rules; u8 n_rules; + u8 n_actions; #define XE_RTP_ENTRY_FLAG_FOREACH_ENGINE BIT(0) u8 flags; }; diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index 96b16b8d03cf..3cc32e3e7a90 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -19,7 +19,7 @@ static const struct xe_rtp_entry gt_tunings[] = { { XE_RTP_NAME("Tuning: 32B Access Enable"), XE_RTP_RULES(PLATFORM(DG2)), - XE_RTP_SET(XEHP_SQCM, EN_32B_ACCESS) + XE_RTP_ACTIONS(SET(XEHP_SQCM, EN_32B_ACCESS)) }, {} }; @@ -27,8 +27,9 @@ static const struct xe_rtp_entry gt_tunings[] = { static const struct xe_rtp_entry context_tunings[] = { { XE_RTP_NAME("1604555607"), XE_RTP_RULES(GRAPHICS_VERSION(1200)), - XE_RTP_FIELD_SET_NO_READ_MASK(XEHP_FF_MODE2, FF_MODE2_TDS_TIMER_MASK, - FF_MODE2_TDS_TIMER_128) + XE_RTP_ACTIONS(FIELD_SET_NO_READ_MASK(XEHP_FF_MODE2, + FF_MODE2_TDS_TIMER_MASK, + FF_MODE2_TDS_TIMER_128)) }, {} }; diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index c1c098994c84..9d2e4555091c 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -103,86 +103,83 @@ static const struct xe_rtp_entry gt_was[] = { XE_RTP_RULES(MEDIA_VERSION_RANGE(1200, 1255), ENGINE_CLASS(VIDEO_DECODE), FUNC(match_14011060649)), - XE_RTP_SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS), + XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS)), XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), }, { XE_RTP_NAME("16010515920"), XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0), ENGINE_CLASS(VIDEO_DECODE)), - XE_RTP_SET(VDBOX_CGCTL3F18(0), ALNUNIT_CLKGATE_DIS), + XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F18(0), ALNUNIT_CLKGATE_DIS)), XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), }, { XE_RTP_NAME("22010523718"), XE_RTP_RULES(SUBPLATFORM(DG2, G10)), - XE_RTP_SET(UNSLICE_UNIT_LEVEL_CLKGATE, CG3DDISCFEG_CLKGATE_DIS) + XE_RTP_ACTIONS(SET(UNSLICE_UNIT_LEVEL_CLKGATE, CG3DDISCFEG_CLKGATE_DIS)) }, { XE_RTP_NAME("14011006942"), XE_RTP_RULES(SUBPLATFORM(DG2, G10)), - XE_RTP_SET(GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE, DSS_ROUTER_CLKGATE_DIS) + XE_RTP_ACTIONS(SET(GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE, DSS_ROUTER_CLKGATE_DIS)) }, { XE_RTP_NAME("14010948348"), XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), - XE_RTP_SET(UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS) + XE_RTP_ACTIONS(SET(UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS)) }, { XE_RTP_NAME("14011037102"), XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), - XE_RTP_SET(UNSLCGCTL9444, LTCDD_CLKGATE_DIS) + XE_RTP_ACTIONS(SET(UNSLCGCTL9444, LTCDD_CLKGATE_DIS)) }, { XE_RTP_NAME("14011371254"), XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), - XE_RTP_SET(GEN11_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS) + XE_RTP_ACTIONS(SET(GEN11_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS)) }, - { XE_RTP_NAME("14011431319/0"), + { XE_RTP_NAME("14011431319"), XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), - XE_RTP_SET(UNSLCGCTL9440, - GAMTLBOACS_CLKGATE_DIS | - GAMTLBVDBOX7_CLKGATE_DIS | GAMTLBVDBOX6_CLKGATE_DIS | - GAMTLBVDBOX5_CLKGATE_DIS | GAMTLBVDBOX4_CLKGATE_DIS | - GAMTLBVDBOX3_CLKGATE_DIS | GAMTLBVDBOX2_CLKGATE_DIS | - GAMTLBVDBOX1_CLKGATE_DIS | GAMTLBVDBOX0_CLKGATE_DIS | - GAMTLBKCR_CLKGATE_DIS | GAMTLBGUC_CLKGATE_DIS | - GAMTLBBLT_CLKGATE_DIS) - }, - { XE_RTP_NAME("14011431319/1"), - XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), - XE_RTP_SET(UNSLCGCTL9444, - GAMTLBGFXA0_CLKGATE_DIS | GAMTLBGFXA1_CLKGATE_DIS | - GAMTLBCOMPA0_CLKGATE_DIS | GAMTLBCOMPA1_CLKGATE_DIS | - GAMTLBCOMPB0_CLKGATE_DIS | GAMTLBCOMPB1_CLKGATE_DIS | - GAMTLBCOMPC0_CLKGATE_DIS | GAMTLBCOMPC1_CLKGATE_DIS | - GAMTLBCOMPD0_CLKGATE_DIS | GAMTLBCOMPD1_CLKGATE_DIS | - GAMTLBMERT_CLKGATE_DIS | - GAMTLBVEBOX3_CLKGATE_DIS | GAMTLBVEBOX2_CLKGATE_DIS | - GAMTLBVEBOX1_CLKGATE_DIS | GAMTLBVEBOX0_CLKGATE_DIS) + XE_RTP_ACTIONS(SET(UNSLCGCTL9440, + GAMTLBOACS_CLKGATE_DIS | + GAMTLBVDBOX7_CLKGATE_DIS | GAMTLBVDBOX6_CLKGATE_DIS | + GAMTLBVDBOX5_CLKGATE_DIS | GAMTLBVDBOX4_CLKGATE_DIS | + GAMTLBVDBOX3_CLKGATE_DIS | GAMTLBVDBOX2_CLKGATE_DIS | + GAMTLBVDBOX1_CLKGATE_DIS | GAMTLBVDBOX0_CLKGATE_DIS | + GAMTLBKCR_CLKGATE_DIS | GAMTLBGUC_CLKGATE_DIS | + GAMTLBBLT_CLKGATE_DIS), + SET(UNSLCGCTL9444, + GAMTLBGFXA0_CLKGATE_DIS | GAMTLBGFXA1_CLKGATE_DIS | + GAMTLBCOMPA0_CLKGATE_DIS | GAMTLBCOMPA1_CLKGATE_DIS | + GAMTLBCOMPB0_CLKGATE_DIS | GAMTLBCOMPB1_CLKGATE_DIS | + GAMTLBCOMPC0_CLKGATE_DIS | GAMTLBCOMPC1_CLKGATE_DIS | + GAMTLBCOMPD0_CLKGATE_DIS | GAMTLBCOMPD1_CLKGATE_DIS | + GAMTLBMERT_CLKGATE_DIS | + GAMTLBVEBOX3_CLKGATE_DIS | GAMTLBVEBOX2_CLKGATE_DIS | + GAMTLBVEBOX1_CLKGATE_DIS | GAMTLBVEBOX0_CLKGATE_DIS)) }, { XE_RTP_NAME("14010569222"), XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), - XE_RTP_SET(UNSLICE_UNIT_LEVEL_CLKGATE, GAMEDIA_CLKGATE_DIS) + XE_RTP_ACTIONS(SET(UNSLICE_UNIT_LEVEL_CLKGATE, GAMEDIA_CLKGATE_DIS)) }, { XE_RTP_NAME("14011028019"), XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), - XE_RTP_SET(SSMCGCTL9530, RTFUNIT_CLKGATE_DIS) + XE_RTP_ACTIONS(SET(SSMCGCTL9530, RTFUNIT_CLKGATE_DIS)) }, { XE_RTP_NAME("14014830051"), XE_RTP_RULES(PLATFORM(DG2)), - XE_RTP_CLR(SARB_CHICKEN1, COMP_CKN_IN) + XE_RTP_ACTIONS(CLR(SARB_CHICKEN1, COMP_CKN_IN)) }, { XE_RTP_NAME("14015795083"), XE_RTP_RULES(PLATFORM(DG2)), - XE_RTP_CLR(GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE) + XE_RTP_ACTIONS(CLR(GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE)) }, { XE_RTP_NAME("14011059788"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)), - XE_RTP_SET(GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE) + XE_RTP_ACTIONS(SET(GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE)) }, { XE_RTP_NAME("1409420604"), XE_RTP_RULES(PLATFORM(DG1)), - XE_RTP_SET(SUBSLICE_UNIT_LEVEL_CLKGATE2, CPSSUNIT_CLKGATE_DIS) + XE_RTP_ACTIONS(SET(SUBSLICE_UNIT_LEVEL_CLKGATE2, CPSSUNIT_CLKGATE_DIS)) }, { XE_RTP_NAME("1408615072"), XE_RTP_RULES(PLATFORM(DG1)), - XE_RTP_SET(UNSLICE_UNIT_LEVEL_CLKGATE2, VSUNIT_CLKGATE_DIS_TGL) + XE_RTP_ACTIONS(SET(UNSLICE_UNIT_LEVEL_CLKGATE2, VSUNIT_CLKGATE_DIS_TGL)) }, {} }; @@ -190,62 +187,67 @@ static const struct xe_rtp_entry gt_was[] = { static const struct xe_rtp_entry engine_was[] = { { XE_RTP_NAME("14015227452"), XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), - XE_RTP_SET(GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE, - XE_RTP_ACTION_FLAG(MASKED_REG)) + XE_RTP_ACTIONS(SET(GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE, + XE_RTP_ACTION_FLAG(MASKED_REG))) }, { XE_RTP_NAME("1606931601"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), - XE_RTP_SET(GEN7_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ, - XE_RTP_ACTION_FLAG(MASKED_REG)) + XE_RTP_ACTIONS(SET(GEN7_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ, + XE_RTP_ACTION_FLAG(MASKED_REG))) }, { XE_RTP_NAME("22010931296, 18011464164, 14010919138"), XE_RTP_RULES(GRAPHICS_VERSION(1200), ENGINE_CLASS(RENDER)), - XE_RTP_SET(GEN7_FF_THREAD_MODE, GEN12_FF_TESSELATION_DOP_GATE_DISABLE) + XE_RTP_ACTIONS(SET(GEN7_FF_THREAD_MODE, + GEN12_FF_TESSELATION_DOP_GATE_DISABLE)) }, { XE_RTP_NAME("14010826681, 1606700617, 22010271021"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), - XE_RTP_SET(GEN9_CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE, - XE_RTP_ACTION_FLAG(MASKED_REG)) + XE_RTP_ACTIONS(SET(GEN9_CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE, + XE_RTP_ACTION_FLAG(MASKED_REG))) }, { XE_RTP_NAME("18019627453"), XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), - XE_RTP_SET(GEN9_CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE, - XE_RTP_ACTION_FLAG(MASKED_REG)) + XE_RTP_ACTIONS(SET(GEN9_CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE, + XE_RTP_ACTION_FLAG(MASKED_REG))) }, { XE_RTP_NAME("1409804808"), XE_RTP_RULES(GRAPHICS_VERSION(1200), ENGINE_CLASS(RENDER), IS_INTEGRATED), - XE_RTP_SET(GEN7_ROW_CHICKEN2, GEN12_PUSH_CONST_DEREF_HOLD_DIS, - XE_RTP_ACTION_FLAG(MASKED_REG)) + XE_RTP_ACTIONS(SET(GEN7_ROW_CHICKEN2, GEN12_PUSH_CONST_DEREF_HOLD_DIS, + XE_RTP_ACTION_FLAG(MASKED_REG))) }, { XE_RTP_NAME("14010229206, 1409085225"), XE_RTP_RULES(GRAPHICS_VERSION(1200), ENGINE_CLASS(RENDER), IS_INTEGRATED), - XE_RTP_SET(GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH, - XE_RTP_ACTION_FLAG(MASKED_REG)) + XE_RTP_ACTIONS(SET(GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH, + XE_RTP_ACTION_FLAG(MASKED_REG))) }, { XE_RTP_NAME("1607297627, 1607030317, 1607186500"), XE_RTP_RULES(PLATFORM(TIGERLAKE), ENGINE_CLASS(RENDER)), - XE_RTP_SET(RING_PSMI_CTL(RENDER_RING_BASE), - GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE | - GEN8_RC_SEMA_IDLE_MSG_DISABLE, XE_RTP_ACTION_FLAG(MASKED_REG)) + XE_RTP_ACTIONS(SET(RING_PSMI_CTL(RENDER_RING_BASE), + GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE | + GEN8_RC_SEMA_IDLE_MSG_DISABLE, + XE_RTP_ACTION_FLAG(MASKED_REG))) }, { XE_RTP_NAME("1607297627, 1607030317, 1607186500"), XE_RTP_RULES(PLATFORM(ROCKETLAKE), ENGINE_CLASS(RENDER)), - XE_RTP_SET(RING_PSMI_CTL(RENDER_RING_BASE), - GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE | - GEN8_RC_SEMA_IDLE_MSG_DISABLE, XE_RTP_ACTION_FLAG(MASKED_REG)) + XE_RTP_ACTIONS(SET(RING_PSMI_CTL(RENDER_RING_BASE), + GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE | + GEN8_RC_SEMA_IDLE_MSG_DISABLE, + XE_RTP_ACTION_FLAG(MASKED_REG))) }, { XE_RTP_NAME("1406941453"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), - XE_RTP_SET(GEN10_SAMPLER_MODE, ENABLE_SMALLPL, XE_RTP_ACTION_FLAG(MASKED_REG)) + XE_RTP_ACTIONS(SET(GEN10_SAMPLER_MODE, ENABLE_SMALLPL, + XE_RTP_ACTION_FLAG(MASKED_REG))) }, { XE_RTP_NAME("FtrPerCtxtPreemptionGranularityControl"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1250), ENGINE_CLASS(RENDER)), - XE_RTP_SET(GEN7_FF_SLICE_CS_CHICKEN1, GEN9_FFSC_PERCTX_PREEMPT_CTRL, - XE_RTP_ACTION_FLAG(MASKED_REG)) + XE_RTP_ACTIONS(SET(GEN7_FF_SLICE_CS_CHICKEN1, + GEN9_FFSC_PERCTX_PREEMPT_CTRL, + XE_RTP_ACTION_FLAG(MASKED_REG))) }, {} }; @@ -253,33 +255,35 @@ static const struct xe_rtp_entry engine_was[] = { static const struct xe_rtp_entry lrc_was[] = { { XE_RTP_NAME("1409342910, 14010698770, 14010443199, 1408979724, 1409178076, 1409207793, 1409217633, 1409252684, 1409347922, 1409142259"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)), - XE_RTP_SET(GEN11_COMMON_SLICE_CHICKEN3, - GEN12_DISABLE_CPS_AWARE_COLOR_PIPE, - XE_RTP_ACTION_FLAG(MASKED_REG)) + XE_RTP_ACTIONS(SET(GEN11_COMMON_SLICE_CHICKEN3, + GEN12_DISABLE_CPS_AWARE_COLOR_PIPE, + XE_RTP_ACTION_FLAG(MASKED_REG))) }, { XE_RTP_NAME("WaDisableGPGPUMidThreadPreemption"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)), - XE_RTP_FIELD_SET(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK, - GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL, - XE_RTP_ACTION_FLAG(MASKED_REG)) + XE_RTP_ACTIONS(FIELD_SET(GEN8_CS_CHICKEN1, + GEN9_PREEMPT_GPGPU_LEVEL_MASK, + GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL, + XE_RTP_ACTION_FLAG(MASKED_REG))) }, { XE_RTP_NAME("16011163337"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)), /* read verification is ignored due to 1608008084. */ - XE_RTP_FIELD_SET_NO_READ_MASK(GEN12_FF_MODE2, FF_MODE2_GS_TIMER_MASK, - FF_MODE2_GS_TIMER_224) + XE_RTP_ACTIONS(FIELD_SET_NO_READ_MASK(GEN12_FF_MODE2, + FF_MODE2_GS_TIMER_MASK, + FF_MODE2_GS_TIMER_224)) }, { XE_RTP_NAME("1409044764"), XE_RTP_RULES(PLATFORM(DG1)), - XE_RTP_CLR(GEN11_COMMON_SLICE_CHICKEN3, - DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN, - XE_RTP_ACTION_FLAG(MASKED_REG)) + XE_RTP_ACTIONS(CLR(GEN11_COMMON_SLICE_CHICKEN3, + DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN, + XE_RTP_ACTION_FLAG(MASKED_REG))) }, { XE_RTP_NAME("22010493298"), XE_RTP_RULES(PLATFORM(DG1)), - XE_RTP_SET(HIZ_CHICKEN, - DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE, - XE_RTP_ACTION_FLAG(MASKED_REG)) + XE_RTP_ACTIONS(SET(HIZ_CHICKEN, + DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE, + XE_RTP_ACTION_FLAG(MASKED_REG))) }, {} }; -- cgit v1.2.3 From dc97898e8121878829ee3cf48fa8ce154807f90b Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Thu, 16 Feb 2023 06:16:44 -0800 Subject: drm/xe: Initialize ret in mcr_lock() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ret is not initialized in mcr_lock() when running in platforms with graphics IP version < 1270, this could cause drm_WARN_ON_ONCE() to hit eventually(what just happened to me). Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Reviewed-by: Matt Roper Signed-off-by: José Roberto de Souza Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_mcr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index bb71071c3435..7617f0340879 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -383,7 +383,7 @@ static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt, static void mcr_lock(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); - int ret; + int ret = 0; spin_lock(>->mcr_lock); -- cgit v1.2.3 From 780637e28783af505864151da78e713f62ed64ae Mon Sep 17 00:00:00 2001 From: Carlos Santa Date: Wed, 15 Feb 2023 12:34:25 -0800 Subject: drm/xe: Update the list of devices to add even more TGL devices The list of GTs got splitted a while back between GT1 and GT2 on TGL. References: https://patchwork.freedesktop.org/patch/388414/ CC: Rodrigo Vivi Signed-off-by: Carlos Santa Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pci.c | 2 +- include/drm/xe_pciids.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index c159306e04cf..2b1c80bb6e46 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -279,7 +279,7 @@ static const struct xe_device_desc mtl_desc = { * PCI ID matches, otherwise we'll use the wrong info struct above. */ static const struct pci_device_id pciidlist[] = { - XE_TGL_GT2_IDS(INTEL_VGA_DEVICE, &tgl_desc), + XE_TGL_IDS(INTEL_VGA_DEVICE, &tgl_desc), XE_DG1_IDS(INTEL_VGA_DEVICE, &dg1_desc), XE_ATS_M_IDS(INTEL_VGA_DEVICE, &ats_m_desc), XE_DG2_IDS(INTEL_VGA_DEVICE, &dg2_desc), diff --git a/include/drm/xe_pciids.h b/include/drm/xe_pciids.h index e539594ed939..b9e9f5b2b0ac 100644 --- a/include/drm/xe_pciids.h +++ b/include/drm/xe_pciids.h @@ -41,8 +41,8 @@ MACRO__(0x9AF8, ## __VA_ARGS__) #define XE_TGL_IDS(MACRO__, ...) \ - XE_TGL_GT1_IDS(MACRO__, ...), \ - XE_TGL_GT2_IDS(MACRO__, ...) + XE_TGL_GT1_IDS(MACRO__, ## __VA_ARGS__),\ + XE_TGL_GT2_IDS(MACRO__, ## __VA_ARGS__) /* RKL */ #define XE_RKL_IDS(MACRO__, ...) \ -- cgit v1.2.3 From ba00da78ce4d2a7fe7ef245e1168b7946827995d Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 17 Feb 2023 12:12:17 -0500 Subject: drm/xe: Remove unseless xe_force_wake_prune. (!(gt->info.engine_mask & BIT(i))) cases are already handled in the init function. And these masks are not modified between the init and the prune. Suggested-by: Matt Roper Signed-off-by: Rodrigo Vivi Reviewed-by: Matt Roper --- drivers/gpu/drm/xe/xe_force_wake.c | 15 --------------- drivers/gpu/drm/xe/xe_force_wake.h | 2 -- drivers/gpu/drm/xe/xe_gt.c | 2 -- 3 files changed, 19 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c index d2080e6fbe10..5bd87118ac73 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.c +++ b/drivers/gpu/drm/xe/xe_force_wake.c @@ -100,21 +100,6 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw) } } -void xe_force_wake_prune(struct xe_gt *gt, struct xe_force_wake *fw) -{ - int i, j; - - /* Call after fuses have been read, prune domains that are fused off */ - - for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) - if (!(gt->info.engine_mask & BIT(i))) - fw->domains[XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j].reg_ctl = 0; - - for (i = XE_HW_ENGINE_VECS0, j =0; i <= XE_HW_ENGINE_VECS3; ++i, ++j) - if (!(gt->info.engine_mask & BIT(i))) - fw->domains[XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j].reg_ctl = 0; -} - static void domain_wake(struct xe_gt *gt, struct xe_force_wake_domain *domain) { xe_mmio_write32(gt, domain->reg_ctl, domain->mask | domain->val); diff --git a/drivers/gpu/drm/xe/xe_force_wake.h b/drivers/gpu/drm/xe/xe_force_wake.h index 5adb8daa3b71..7c534cdd5fe9 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.h +++ b/drivers/gpu/drm/xe/xe_force_wake.h @@ -15,8 +15,6 @@ void xe_force_wake_init_gt(struct xe_gt *gt, struct xe_force_wake *fw); void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw); -void xe_force_wake_prune(struct xe_gt *gt, - struct xe_force_wake *fw); int xe_force_wake_get(struct xe_force_wake *fw, enum xe_force_wake_domains domains); int xe_force_wake_put(struct xe_force_wake *fw, diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 20dbc08d3685..c17279653561 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -602,8 +602,6 @@ int xe_gt_init(struct xe_gt *gt) if (err) return err; - xe_force_wake_prune(gt, gt_to_fw(gt)); - err = drmm_add_action_or_reset(>_to_xe(gt)->drm, gt_fini, gt); if (err) return err; -- cgit v1.2.3 From 0d83be772c1f8e0d3db4a26a5f1308e058a98354 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 15 Feb 2023 10:28:43 +0000 Subject: drm/xe/mmio: fix forcewake ref leak in xe_mmio_ioctl Make sure we properly release the forcewake ref on all error paths. v2(Lucas): - Make it less verbose and just fold the unimplemented options into the default. The exact return value doesn't seem to matter for the corresponding IGT. - Replace the user triggerable WARN() with drm_dbg(). Signed-off-by: Matthew Auld Reviewed-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_mmio.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 8a953df2b468..ba327b1e8dea 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -425,31 +425,28 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data, if (args->flags & DRM_XE_MMIO_WRITE) { switch (bits_flag) { - case DRM_XE_MMIO_8BIT: - return -EINVAL; /* TODO */ - case DRM_XE_MMIO_16BIT: - return -EINVAL; /* TODO */ case DRM_XE_MMIO_32BIT: - if (XE_IOCTL_ERR(xe, args->value > U32_MAX)) - return -EINVAL; + if (XE_IOCTL_ERR(xe, args->value > U32_MAX)) { + ret = -EINVAL; + goto exit; + } xe_mmio_write32(to_gt(xe), args->addr, args->value); break; case DRM_XE_MMIO_64BIT: xe_mmio_write64(to_gt(xe), args->addr, args->value); break; default: - drm_WARN(&xe->drm, 1, "Invalid MMIO bit size"); - ret = -EINVAL; + drm_dbg(&xe->drm, "Invalid MMIO bit size"); + fallthrough; + case DRM_XE_MMIO_8BIT: /* TODO */ + case DRM_XE_MMIO_16BIT: /* TODO */ + ret = -ENOTSUPP; goto exit; } } if (args->flags & DRM_XE_MMIO_READ) { switch (bits_flag) { - case DRM_XE_MMIO_8BIT: - return -EINVAL; /* TODO */ - case DRM_XE_MMIO_16BIT: - return -EINVAL; /* TODO */ case DRM_XE_MMIO_32BIT: args->value = xe_mmio_read32(to_gt(xe), args->addr); break; @@ -457,8 +454,11 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data, args->value = xe_mmio_read64(to_gt(xe), args->addr); break; default: - drm_WARN(&xe->drm, 1, "Invalid MMIO bit size"); - ret = -EINVAL; + drm_dbg(&xe->drm, "Invalid MMIO bit size"); + fallthrough; + case DRM_XE_MMIO_8BIT: /* TODO */ + case DRM_XE_MMIO_16BIT: /* TODO */ + ret = -ENOTSUPP; } } -- cgit v1.2.3 From 6062acc1b8664ade91b4609ea056badd6f1e6802 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 15 Feb 2023 10:28:44 +0000 Subject: drm/xe/stolen: don't map stolen on small-bar The driver should still be functional with small-bar, just that the vram size is clamped to the BAR size (until we add proper support for tiered vram). For stolen vram we shouldn't iomap anything if the BAR size doesn't also contain the stolen portion, since on discrete the stolen portion is always at the end of normal vram. Stolen should still be functional, just that allocating CPU visible io memory will always return an error. v2 (Lucas) - Mention in the commit message that stolen vram is always as the end of normal vram, which is why stolen in not mappable on small-bar systems. - Just make xe_ttm_stolen_inaccessible() return true for such cases. Also rename to xe_ttm_stolen_cpu_inaccessible to better describe that we are talking about direct CPU access. Plus add some kernel-doc. Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/209 Reported-by: Lucas De Marchi Signed-off-by: Matthew Auld Reviewed-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 2 +- drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 47 +++++++++++++++++++++++++--------- drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h | 2 +- 3 files changed, 37 insertions(+), 14 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 3c9d90dcf125..b4fa856b79d1 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1158,7 +1158,7 @@ struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_gt *gt, u64 end = offset == ~0ull ? offset : start + size; if (flags & XE_BO_CREATE_STOLEN_BIT && - xe_ttm_stolen_inaccessible(xe)) + xe_ttm_stolen_cpu_inaccessible(xe)) flags |= XE_BO_CREATE_GGTT_BIT; bo = xe_bo_create_locked_range(xe, gt, vm, size, start, end, type, flags); diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index b4e9c88644e4..e20c567f276f 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -21,11 +21,6 @@ #include "xe_ttm_stolen_mgr.h" #include "xe_ttm_vram_mgr.h" -bool xe_ttm_stolen_inaccessible(struct xe_device *xe) -{ - return !IS_DGFX(xe) && GRAPHICS_VERx100(xe) < 1270; -} - struct xe_ttm_stolen_mgr { struct xe_ttm_vram_mgr base; @@ -43,6 +38,34 @@ to_stolen_mgr(struct ttm_resource_manager *man) return container_of(man, struct xe_ttm_stolen_mgr, base.manager); } +/** + * xe_ttm_stolen_cpu_inaccessible - Can we directly CPU access stolen memory for + * this device. + * + * On some integrated platforms we can't directly access stolen via the CPU + * (like some normal system memory). Also on small-bar systems for discrete, + * since stolen is always as the end of normal VRAM, and the BAR likely doesn't + * stretch that far. However CPU access of stolen is generally rare, and at + * least on discrete should not be needed. + * + * If this is indeed inaccessible then we fallback to using the GGTT mappable + * aperture for CPU access. On discrete platforms we have no such thing, so when + * later attempting to CPU map the memory an error is instead thrown. + */ +bool xe_ttm_stolen_cpu_inaccessible(struct xe_device *xe) +{ + struct ttm_resource_manager *ttm_mgr = + ttm_manager_type(&xe->ttm, XE_PL_STOLEN); + struct xe_ttm_stolen_mgr *mgr; + + if (!ttm_mgr) + return true; + + mgr = to_stolen_mgr(ttm_mgr); + + return !mgr->io_base || GRAPHICS_VERx100(xe) < 1270; +} + static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); @@ -126,7 +149,7 @@ void xe_ttm_stolen_mgr_init(struct xe_device *xe) if (IS_DGFX(xe)) stolen_size = detect_bar2_dgfx(xe, mgr); - else if (!xe_ttm_stolen_inaccessible(xe)) + else if (GRAPHICS_VERx100(xe) >= 1270) stolen_size = detect_bar2_integrated(xe, mgr); else stolen_size = detect_stolen(xe, mgr); @@ -140,7 +163,6 @@ void xe_ttm_stolen_mgr_init(struct xe_device *xe) if (pgsize < PAGE_SIZE) pgsize = PAGE_SIZE; - err = __xe_ttm_vram_mgr_init(xe, &mgr->base, XE_PL_STOLEN, stolen_size, pgsize); if (err) { drm_dbg_kms(&xe->drm, "Stolen mgr init failed: %i\n", err); @@ -150,7 +172,7 @@ void xe_ttm_stolen_mgr_init(struct xe_device *xe) drm_dbg_kms(&xe->drm, "Initialized stolen memory support with %llu bytes\n", stolen_size); - if (!xe_ttm_stolen_inaccessible(xe)) + if (!xe_ttm_stolen_cpu_inaccessible(xe)) mgr->mapping = devm_ioremap_wc(&pdev->dev, mgr->io_base, stolen_size); } @@ -161,10 +183,9 @@ u64 xe_ttm_stolen_io_offset(struct xe_bo *bo, u32 offset) struct xe_ttm_stolen_mgr *mgr = to_stolen_mgr(ttm_mgr); struct xe_res_cursor cur; - if (!mgr->io_base) - return 0; + XE_BUG_ON(!mgr->io_base); - if (!IS_DGFX(xe) && xe_ttm_stolen_inaccessible(xe)) + if (!IS_DGFX(xe) && xe_ttm_stolen_cpu_inaccessible(xe)) return mgr->io_base + xe_bo_ggtt_addr(bo) + offset; xe_res_first(bo->ttm.resource, offset, 4096, &cur); @@ -202,6 +223,8 @@ static int __xe_ttm_stolen_io_mem_reserve_stolen(struct xe_device *xe, #ifdef CONFIG_X86 struct xe_bo *bo = ttm_to_xe_bo(mem->bo); + XE_BUG_ON(IS_DGFX(xe)); + /* XXX: Require BO to be mapped to GGTT? */ if (drm_WARN_ON(&xe->drm, !(bo->flags & XE_BO_CREATE_GGTT_BIT))) return -EIO; @@ -228,7 +251,7 @@ int xe_ttm_stolen_io_mem_reserve(struct xe_device *xe, struct ttm_resource *mem) if (!mgr || !mgr->io_base) return -EIO; - if (!xe_ttm_stolen_inaccessible(xe)) + if (!xe_ttm_stolen_cpu_inaccessible(xe)) return __xe_ttm_stolen_io_mem_reserve_bar2(xe, mgr, mem); else return __xe_ttm_stolen_io_mem_reserve_stolen(xe, mgr, mem); diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h index ade37abb0623..2fda97b97a05 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h @@ -14,7 +14,7 @@ struct xe_device; void xe_ttm_stolen_mgr_init(struct xe_device *xe); int xe_ttm_stolen_io_mem_reserve(struct xe_device *xe, struct ttm_resource *mem); -bool xe_ttm_stolen_inaccessible(struct xe_device *xe); +bool xe_ttm_stolen_cpu_inaccessible(struct xe_device *xe); u64 xe_ttm_stolen_io_offset(struct xe_bo *bo, u32 offset); u64 xe_ttm_stolen_gpu_offset(struct xe_device *xe); -- cgit v1.2.3 From b47b0ef1ba34e351228b57ce7ba74efc6d7b2c24 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 15 Feb 2023 10:28:45 +0000 Subject: drm/xe/query: zero the region info There are also some reserved fields in here which are not currently cleared when handing back to userspace. Otherwise we might run into issues if we later wish to use them. Signed-off-by: Matthew Auld Reviewed-by: Lucas De Marchi lucas.demarchi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_query.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 6e904e97f456..f9f21bd1bfd7 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -115,16 +115,13 @@ static int query_memory_usage(struct xe_device *xe, return -EINVAL; } - usage = kmalloc(size, GFP_KERNEL); + usage = kzalloc(size, GFP_KERNEL); if (XE_IOCTL_ERR(xe, !usage)) return -ENOMEM; - usage->pad = 0; - man = ttm_manager_type(&xe->ttm, XE_PL_TT); usage->regions[0].mem_class = XE_MEM_REGION_CLASS_SYSMEM; usage->regions[0].instance = 0; - usage->regions[0].pad = 0; usage->regions[0].min_page_size = PAGE_SIZE; usage->regions[0].max_page_size = PAGE_SIZE; usage->regions[0].total_size = man->size << PAGE_SHIFT; @@ -138,7 +135,6 @@ static int query_memory_usage(struct xe_device *xe, XE_MEM_REGION_CLASS_VRAM; usage->regions[usage->num_regions].instance = usage->num_regions; - usage->regions[usage->num_regions].pad = 0; usage->regions[usage->num_regions].min_page_size = xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : PAGE_SIZE; -- cgit v1.2.3 From 671ca05d7c9766407d7d7e4785d52e4a15d56027 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 21 Feb 2023 15:33:43 -0800 Subject: drm/xe: Make local functions static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A few static functions not being declared like that break the build with W=1, like e.g. cc1: all warnings being treated as errors make[2]: *** [../scripts/Makefile.build:250: drivers/gpu/drm/xe/xe_gt.o] Error 1 ../drivers/gpu/drm/xe/xe_guc.c:240:6: error: no previous prototype for ‘guc_write_params’ [-Werror=missing-prototypes] 240 | void guc_write_params(struct xe_guc *guc) | ^~~~~~~~~~~~~~~~ Make them static. Signed-off-by: Lucas De Marchi Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt.c | 6 +++--- drivers/gpu/drm/xe/xe_guc.c | 4 ++-- drivers/gpu/drm/xe/xe_irq.c | 2 +- drivers/gpu/drm/xe/xe_wait_user_fence.c | 6 +++--- 4 files changed, 9 insertions(+), 9 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index c17279653561..809e9b14c314 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -218,7 +218,7 @@ static void gt_fini(struct drm_device *drm, void *arg) static void gt_reset_worker(struct work_struct *w); -int emit_nop_job(struct xe_gt *gt, struct xe_engine *e) +static int emit_nop_job(struct xe_gt *gt, struct xe_engine *e) { struct xe_sched_job *job; struct xe_bb *bb; @@ -252,7 +252,7 @@ int emit_nop_job(struct xe_gt *gt, struct xe_engine *e) return 0; } -int emit_wa_job(struct xe_gt *gt, struct xe_engine *e) +static int emit_wa_job(struct xe_gt *gt, struct xe_engine *e) { struct xe_reg_sr *sr = &e->hwe->reg_lrc; struct xe_reg_sr_entry *entry; @@ -609,7 +609,7 @@ int xe_gt_init(struct xe_gt *gt) return 0; } -int do_gt_reset(struct xe_gt *gt) +static int do_gt_reset(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); int err; diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 5cdfdfd0de40..32bcc40463e1 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -237,7 +237,7 @@ static void guc_init_params(struct xe_guc *guc) * transfer. These parameters are read by the firmware on startup * and cannot be changed thereafter. */ -void guc_write_params(struct xe_guc *guc) +static void guc_write_params(struct xe_guc *guc) { struct xe_gt *gt = guc_to_gt(guc); int i; @@ -560,7 +560,7 @@ static void guc_handle_mmio_msg(struct xe_guc *guc) "Received early GuC exception notification!\n"); } -void guc_enable_irq(struct xe_guc *guc) +static void guc_enable_irq(struct xe_guc *guc) { struct xe_gt *gt = guc_to_gt(guc); u32 events = xe_gt_is_media_type(gt) ? diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index a2caa20f2fb3..ab703f1c8b58 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -448,7 +448,7 @@ static void dg1_irq_reset(struct xe_gt *gt) GEN3_IRQ_RESET(gt, GEN8_PCU_); } -void xe_irq_reset(struct xe_device *xe) +static void xe_irq_reset(struct xe_device *xe) { struct xe_gt *gt; u8 id; diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c index 8a8d814a0e7a..977c963a8cd0 100644 --- a/drivers/gpu/drm/xe/xe_wait_user_fence.c +++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c @@ -56,9 +56,9 @@ static const enum xe_engine_class user_to_xe_engine_class[] = { [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE, }; -int check_hw_engines(struct xe_device *xe, - struct drm_xe_engine_class_instance *eci, - int num_engines) +static int check_hw_engines(struct xe_device *xe, + struct drm_xe_engine_class_instance *eci, + int num_engines) { int i; -- cgit v1.2.3 From 3dbec4703ee7b67a8dba47e5f1e668b7b17aeb1b Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 21 Feb 2023 15:33:44 -0800 Subject: drm/xe: Fix application of LRC tunings LRC tunings were added after the gt ones and didn't add the call in xe_gt_record_default_lrcs() to process them like is done for workarounds. Add such a function and call it from xe_gt_record_default_lrcs(). Signed-off-by: Lucas De Marchi Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt.c | 1 + drivers/gpu/drm/xe/xe_tuning.c | 15 ++++++++++++++- drivers/gpu/drm/xe/xe_tuning.h | 2 ++ 3 files changed, 17 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 809e9b14c314..5a3c8fd5936a 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -320,6 +320,7 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt) xe_reg_sr_init(&hwe->reg_lrc, "LRC", xe); xe_wa_process_lrc(hwe); + xe_tuning_process_lrc(hwe); default_lrc = drmm_kzalloc(&xe->drm, xe_lrc_size(xe, hwe->class), diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index 3cc32e3e7a90..595eb2de90ad 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -24,7 +24,7 @@ static const struct xe_rtp_entry gt_tunings[] = { {} }; -static const struct xe_rtp_entry context_tunings[] = { +static const struct xe_rtp_entry lrc_tunings[] = { { XE_RTP_NAME("1604555607"), XE_RTP_RULES(GRAPHICS_VERSION(1200)), XE_RTP_ACTIONS(FIELD_SET_NO_READ_MASK(XEHP_FF_MODE2, @@ -38,3 +38,16 @@ void xe_tuning_process_gt(struct xe_gt *gt) { xe_rtp_process(gt_tunings, >->reg_sr, gt, NULL); } + +/** + * xe_tuning_process_lrc - process lrc tunings + * @hwe: engine instance to process tunings for + * + * Process LRC table for this platform, saving in @hwe all the tunings that need + * to be applied on context restore. These are tunings touching registers that + * are part of the HW context image. + */ +void xe_tuning_process_lrc(struct xe_hw_engine *hwe) +{ + xe_rtp_process(lrc_tunings, &hwe->reg_lrc, hwe->gt, hwe); +} diff --git a/drivers/gpu/drm/xe/xe_tuning.h b/drivers/gpu/drm/xe/xe_tuning.h index 66dbc93192bd..2b95b0c8effc 100644 --- a/drivers/gpu/drm/xe/xe_tuning.h +++ b/drivers/gpu/drm/xe/xe_tuning.h @@ -7,7 +7,9 @@ #define _XE_TUNING_ struct xe_gt; +struct xe_hw_engine; void xe_tuning_process_gt(struct xe_gt *gt); +void xe_tuning_process_lrc(struct xe_hw_engine *hwe); #endif -- cgit v1.2.3 From 220d957b5954ee4631fe781adfbfae8592b34811 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 21 Feb 2023 15:33:46 -0800 Subject: drm/xe: Remove unused functions xe_gt_topology_dss_group_mask and xe_gt_topology_count_dss are probably leftover from initial implementation - they are not called from anywhere. Remove those functions. Signed-off-by: Lucas De Marchi Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_topology.c | 25 ------------------------- 1 file changed, 25 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c index 8e02e362ba27..3dd7cbbff071 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.c +++ b/drivers/gpu/drm/xe/xe_gt_topology.c @@ -94,31 +94,6 @@ xe_gt_topology_init(struct xe_gt *gt) xe_gt_topology_dump(gt, &p); } -unsigned int -xe_gt_topology_count_dss(xe_dss_mask_t mask) -{ - return bitmap_weight(mask, XE_MAX_DSS_FUSE_BITS); -} - -u64 -xe_gt_topology_dss_group_mask(xe_dss_mask_t mask, int grpsize) -{ - xe_dss_mask_t per_dss_mask = {}; - u64 grpmask = 0; - - WARN_ON(DIV_ROUND_UP(XE_MAX_DSS_FUSE_BITS, grpsize) > BITS_PER_TYPE(grpmask)); - - bitmap_fill(per_dss_mask, grpsize); - for (int i = 0; !bitmap_empty(mask, XE_MAX_DSS_FUSE_BITS); i++) { - if (bitmap_intersects(mask, per_dss_mask, grpsize)) - grpmask |= BIT(i); - - bitmap_shift_right(mask, mask, grpsize, XE_MAX_DSS_FUSE_BITS); - } - - return grpmask; -} - void xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p) { -- cgit v1.2.3 From 1d1b9262c5cb3c7c3d2a9f63e207dbb3d17bb3cc Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 21 Feb 2023 15:33:47 -0800 Subject: drm/xe: Add missing doc for xe parameter Fix the following warning: ../drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c:55: warning: Function parameter or member 'xe' not described in 'xe_ttm_stolen_cpu_inaccessible' Signed-off-by: Lucas De Marchi Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index e20c567f276f..097454f78286 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -41,6 +41,7 @@ to_stolen_mgr(struct ttm_resource_manager *man) /** * xe_ttm_stolen_cpu_inaccessible - Can we directly CPU access stolen memory for * this device. + * @xe: xe device * * On some integrated platforms we can't directly access stolen via the CPU * (like some normal system memory). Also on small-bar systems for discrete, -- cgit v1.2.3 From cb30cfdce50011ea53f5425b8be264f26cef60d8 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 21 Feb 2023 15:33:48 -0800 Subject: drm/xe: Add missing include xe_wait_user_fence.h Make xe_wait_user_fence.c include xe_wait_user_fence.h so it doesn't rely on indirect includes and also doesn't fail the build due to missing prototype for xe_wait_user_fence_ioctl(). Signed-off-by: Lucas De Marchi Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_wait_user_fence.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c index 977c963a8cd0..15c2e5aa08d2 100644 --- a/drivers/gpu/drm/xe/xe_wait_user_fence.c +++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c @@ -3,6 +3,8 @@ * Copyright © 2022 Intel Corporation */ +#include "xe_wait_user_fence.h" + #include #include #include -- cgit v1.2.3 From e50bbbb9baf64dfe77f236636961b1ceb1b4c19d Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 21 Feb 2023 16:27:05 -0800 Subject: drm/xe: Remove duplicate media_ver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit media_verx100 supersedes the info from media_ver. Leave media_ver in the struct xe_device_desc, used in xe_pci.c since it's easier to define common parts of the platforms like that. However all the rest of the driver should be using media_verx100 that is more future proof. Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/216 Signed-off-by: Lucas De Marchi Reviewed-by: José Roberto de Souza Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device_types.h | 2 -- drivers/gpu/drm/xe/xe_mmio.c | 2 +- drivers/gpu/drm/xe/xe_pci.c | 1 - 3 files changed, 1 insertion(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 4c4a912141a9..c9f74dc4c9fd 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -77,8 +77,6 @@ struct xe_device { u8 tile_count; /** @vm_max_level: Max VM level */ u8 vm_max_level; - /** @media_ver: Media version */ - u8 media_ver; /** @supports_usm: Supports unified shared memory */ bool supports_usm; /** @has_asid: Has address space ID */ diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index ba327b1e8dea..c414ece6dfe3 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -287,7 +287,7 @@ static void xe_mmio_probe_tiles(struct xe_device *xe) mtcfg = xe_mmio_read64(gt, XEHP_MTCFG_ADDR.reg); adj_tile_count = xe->info.tile_count = REG_FIELD_GET(TILE_COUNT, mtcfg) + 1; - if (xe->info.media_ver >= 13) + if (xe->info.media_verx100 >= 1300) xe->info.tile_count *= 2; drm_info(&xe->drm, "tile_count: %d, adj_tile_count %d\n", diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 2b1c80bb6e46..88e28649c400 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -408,7 +408,6 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) xe->info.vram_flags = desc->vram_flags; xe->info.tile_count = desc->max_tiles; xe->info.vm_max_level = desc->vm_max_level; - xe->info.media_ver = desc->media_ver; xe->info.supports_usm = desc->supports_usm; xe->info.has_asid = desc->has_asid; xe->info.has_flat_ccs = desc->has_flat_ccs; -- cgit v1.2.3 From 5a4a8e8b3b0be40c7cdf928ad8b6cfe6e5c465fd Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 21 Feb 2023 11:39:50 -0800 Subject: drm/xe: Remove outdated build workaround Use the more common "call cc-disable-warning" way to disable warnings. Signed-off-by: Lucas De Marchi Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Makefile | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 998f7044b047..58ee9e82156d 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -13,14 +13,8 @@ # will most likely get a sudden build breakage... Hopefully we will fix # new warnings before CI updates! subdir-ccflags-y := -Wall -Wextra -# making these call cc-disable-warning breaks when trying to build xe.mod.o -# by calling make M=drivers/gpu/drm/xe. This doesn't happen in upstream tree, -# so it was somehow fixed by the changes in the build system. Move it back to -# $(call cc-disable-warning, ...) after rebase. -subdir-ccflags-y += -Wno-unused-parameter -subdir-ccflags-y += -Wno-type-limits -#subdir-ccflags-y += $(call cc-disable-warning, unused-parameter) -#subdir-ccflags-y += $(call cc-disable-warning, type-limits) +subdir-ccflags-y += $(call cc-disable-warning, unused-parameter) +subdir-ccflags-y += $(call cc-disable-warning, type-limits) subdir-ccflags-y += $(call cc-disable-warning, missing-field-initializers) subdir-ccflags-y += $(call cc-disable-warning, unused-but-set-variable) # clang warnings -- cgit v1.2.3 From 84ff55006578d169b9331014bd34f0da2ca0616b Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 21 Feb 2023 11:39:52 -0800 Subject: drm/xe/guc: Remove i915_regs.h include i915_regs.h is not needed, particularly in a header file. What is needed is i915_reg_defs.h for use of _MMIO() and similar macros. Signed-off-by: Lucas De Marchi Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_reg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_reg.h b/drivers/gpu/drm/xe/xe_guc_reg.h index 1e16a9b76ddc..513a7e0c8a5a 100644 --- a/drivers/gpu/drm/xe/xe_guc_reg.h +++ b/drivers/gpu/drm/xe/xe_guc_reg.h @@ -9,7 +9,7 @@ #include #include -#include "i915_reg.h" +#include "i915_reg_defs.h" /* Definitions of GuC H/W registers, bits, etc */ -- cgit v1.2.3 From 5b7e50e2ea1745bd09c3d99a4f7c49d630124825 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 22 Feb 2023 12:18:45 +0000 Subject: drm/xe/pm: fix unbalanced ref handling In local_pci_probe() the core kernel increments the rpm for the device, just before calling into the probe hook. If the driver/device supports runtime pm it is then meant to put this ref during probe (like we do in xe_pm_runtime_init()). However when removing the device we then also need to take the reference back, otherwise the ref that is put in pci_device_remove() will be unbalanced when for example unloading the driver, leading to warnings like: [ 3808.596345] xe 0000:03:00.0: Runtime PM usage count underflow! Fix this by incrementing the rpm ref when removing the device. v2: - Improve the terminology in the commit message; s/drop/put/ etc (Lucas & Rodrigo) - Also call pm_runtime_forbid(dev) (Rodrigo) Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/193 Signed-off-by: Matthew Auld Cc: Lucas De Marchi Cc: Rodrigo Vivi Reviewed-by: Rodrigo Vivi Reviewed-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pci.c | 1 + drivers/gpu/drm/xe/xe_pm.c | 8 ++++++++ drivers/gpu/drm/xe/xe_pm.h | 1 + 3 files changed, 10 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 88e28649c400..3474b5c9f174 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -364,6 +364,7 @@ static void xe_pci_remove(struct pci_dev *pdev) return; xe_device_remove(xe); + xe_pm_runtime_fini(xe); pci_set_drvdata(pdev, NULL); } diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index fb0355530e7b..0ef92b746595 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -117,6 +117,14 @@ void xe_pm_runtime_init(struct xe_device *xe) pm_runtime_put_autosuspend(dev); } +void xe_pm_runtime_fini(struct xe_device *xe) +{ + struct device *dev = xe->drm.dev; + + pm_runtime_get_sync(dev); + pm_runtime_forbid(dev); +} + int xe_pm_runtime_suspend(struct xe_device *xe) { struct xe_gt *gt; diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h index b8c5f9558e26..6a885585f653 100644 --- a/drivers/gpu/drm/xe/xe_pm.h +++ b/drivers/gpu/drm/xe/xe_pm.h @@ -14,6 +14,7 @@ int xe_pm_suspend(struct xe_device *xe); int xe_pm_resume(struct xe_device *xe); void xe_pm_runtime_init(struct xe_device *xe); +void xe_pm_runtime_fini(struct xe_device *xe); int xe_pm_runtime_suspend(struct xe_device *xe); int xe_pm_runtime_resume(struct xe_device *xe); int xe_pm_runtime_get(struct xe_device *xe); -- cgit v1.2.3 From 353dfaaa31648c4e6f7f3fee5001f047ebf3ed67 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 22 Feb 2023 21:00:35 -0800 Subject: drm/xe: Fix kunit integration due to missing prototypes In order to avoid -Werror=missing-prototypes, add the prototypes in a separate tests/_test.h file that is included by both the implementation (tests/xe_.c, injected in xe.ko) and the kunit module (tests/xe__test.c -> xe--test.ko). v2: Add header and don't add ifdef to files that are already not built when not using kunit (Matt Auld) Signed-off-by: Lucas De Marchi Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_bo.c | 2 ++ drivers/gpu/drm/xe/tests/xe_bo_test.c | 5 ++--- drivers/gpu/drm/xe/tests/xe_bo_test.h | 14 ++++++++++++++ drivers/gpu/drm/xe/tests/xe_dma_buf.c | 2 ++ drivers/gpu/drm/xe/tests/xe_dma_buf_test.c | 4 ++-- drivers/gpu/drm/xe/tests/xe_dma_buf_test.h | 13 +++++++++++++ drivers/gpu/drm/xe/tests/xe_migrate.c | 2 ++ drivers/gpu/drm/xe/tests/xe_migrate_test.c | 4 ++-- drivers/gpu/drm/xe/tests/xe_migrate_test.h | 13 +++++++++++++ 9 files changed, 52 insertions(+), 7 deletions(-) create mode 100644 drivers/gpu/drm/xe/tests/xe_bo_test.h create mode 100644 drivers/gpu/drm/xe/tests/xe_dma_buf_test.h create mode 100644 drivers/gpu/drm/xe/tests/xe_migrate_test.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index 87ac21cc8ca9..f03fb907b59a 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -5,6 +5,8 @@ #include +#include "tests/xe_bo_test.h" + #include "xe_bo_evict.h" #include "xe_pci.h" diff --git a/drivers/gpu/drm/xe/tests/xe_bo_test.c b/drivers/gpu/drm/xe/tests/xe_bo_test.c index c8fa29b0b3b2..92dda4fca21b 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo_test.c +++ b/drivers/gpu/drm/xe/tests/xe_bo_test.c @@ -3,10 +3,9 @@ * Copyright © 2022 Intel Corporation */ -#include +#include "xe_bo_test.h" -void xe_ccs_migrate_kunit(struct kunit *test); -void xe_bo_evict_kunit(struct kunit *test); +#include static struct kunit_case xe_bo_tests[] = { KUNIT_CASE(xe_ccs_migrate_kunit), diff --git a/drivers/gpu/drm/xe/tests/xe_bo_test.h b/drivers/gpu/drm/xe/tests/xe_bo_test.h new file mode 100644 index 000000000000..d751a618c0c8 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_bo_test.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 AND MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef __XE_BO_TEST_H__ +#define __XE_BO_TEST_H__ + +struct kunit; + +void xe_ccs_migrate_kunit(struct kunit *test); +void xe_bo_evict_kunit(struct kunit *test); + +#endif diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c index 615d22e3f731..e66a8361ae1f 100644 --- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c @@ -5,6 +5,8 @@ #include +#include "tests/xe_dma_buf_test.h" + #include "xe_pci.h" static bool p2p_enabled(struct dma_buf_test_params *params) diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c index 7bb292da1193..a1adfd1e1605 100644 --- a/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c +++ b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c @@ -3,9 +3,9 @@ * Copyright © 2022 Intel Corporation */ -#include +#include "xe_dma_buf_test.h" -void xe_dma_buf_kunit(struct kunit *test); +#include static struct kunit_case xe_dma_buf_tests[] = { KUNIT_CASE(xe_dma_buf_kunit), diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf_test.h b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.h new file mode 100644 index 000000000000..4e9a8bef5751 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 AND MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef __XE_DMA_BUF_TEST_H__ +#define __XE_DMA_BUF_TEST_H__ + +struct kunit; + +void xe_dma_buf_kunit(struct kunit *test); + +#endif diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index 03a60d5b42f1..0de17e90aba9 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -5,6 +5,8 @@ #include +#include "tests/xe_migrate_test.h" + #include "xe_pci.h" static bool sanity_fence_failed(struct xe_device *xe, struct dma_fence *fence, diff --git a/drivers/gpu/drm/xe/tests/xe_migrate_test.c b/drivers/gpu/drm/xe/tests/xe_migrate_test.c index ad779e2bd071..d6be360c3b6d 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate_test.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate_test.c @@ -3,9 +3,9 @@ * Copyright © 2022 Intel Corporation */ -#include +#include "xe_migrate_test.h" -void xe_migrate_sanity_kunit(struct kunit *test); +#include static struct kunit_case xe_migrate_tests[] = { KUNIT_CASE(xe_migrate_sanity_kunit), diff --git a/drivers/gpu/drm/xe/tests/xe_migrate_test.h b/drivers/gpu/drm/xe/tests/xe_migrate_test.h new file mode 100644 index 000000000000..db1f8ef035bb --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_migrate_test.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 AND MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef __XE_MIGRATE_TEST_H__ +#define __XE_MIGRATE_TEST_H__ + +struct kunit; + +void xe_migrate_sanity_kunit(struct kunit *test); + +#endif -- cgit v1.2.3 From 74f800c7a982db1d10e2c0c0a0164ee1db878652 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 24 Feb 2023 11:08:14 -0800 Subject: drm/xe: Remove gen-based mmio offsets from hw engine init During early generations of Intel GPUs, hardware engines would sometimes move to new MMIO offsets from one platform/generation to the next. These days engines the hardware teams put more effort into ensuring that engines stay at consistent locations; even major design changes (like the introduction of standalone media) keep the MMIO locations of the engines constant. Since all platforms supported by the Xe driver are new enough to have a single MMIO offset for each engine (and since our crystal ball says that these offsets are very unlikely to change again in the foreseeable future), we can simplify the driver's engine definitions and remove the gen-based MMIO bases. Signed-off-by: Matt Roper Reviewed-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_hw_engine.c | 132 ++++++++------------------------------ 1 file changed, 28 insertions(+), 104 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index fd89dd90131c..986f675aaf88 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -31,11 +31,7 @@ struct engine_info { unsigned int class : 8; unsigned int instance : 8; enum xe_force_wake_domains domain; - /* mmio bases table *must* be sorted in reverse graphics_ver order */ - struct engine_mmio_base { - unsigned int graphics_ver : 8; - unsigned int base : 24; - } mmio_bases[MAX_MMIO_BASES]; + u32 mmio_base; }; static const struct engine_info engine_infos[] = { @@ -44,90 +40,70 @@ static const struct engine_info engine_infos[] = { .class = XE_ENGINE_CLASS_RENDER, .instance = 0, .domain = XE_FW_RENDER, - .mmio_bases = { - { .graphics_ver = 1, .base = RENDER_RING_BASE } - }, + .mmio_base = RENDER_RING_BASE, }, [XE_HW_ENGINE_BCS0] = { .name = "bcs0", .class = XE_ENGINE_CLASS_COPY, .instance = 0, .domain = XE_FW_RENDER, - .mmio_bases = { - { .graphics_ver = 6, .base = BLT_RING_BASE } - }, + .mmio_base = BLT_RING_BASE, }, [XE_HW_ENGINE_BCS1] = { .name = "bcs1", .class = XE_ENGINE_CLASS_COPY, .instance = 1, .domain = XE_FW_RENDER, - .mmio_bases = { - { .graphics_ver = 12, .base = XEHPC_BCS1_RING_BASE } - }, + .mmio_base = XEHPC_BCS1_RING_BASE, }, [XE_HW_ENGINE_BCS2] = { .name = "bcs2", .class = XE_ENGINE_CLASS_COPY, .instance = 2, .domain = XE_FW_RENDER, - .mmio_bases = { - { .graphics_ver = 12, .base = XEHPC_BCS2_RING_BASE } - }, + .mmio_base = XEHPC_BCS2_RING_BASE, }, [XE_HW_ENGINE_BCS3] = { .name = "bcs3", .class = XE_ENGINE_CLASS_COPY, .instance = 3, .domain = XE_FW_RENDER, - .mmio_bases = { - { .graphics_ver = 12, .base = XEHPC_BCS3_RING_BASE } - }, + .mmio_base = XEHPC_BCS3_RING_BASE, }, [XE_HW_ENGINE_BCS4] = { .name = "bcs4", .class = XE_ENGINE_CLASS_COPY, .instance = 4, .domain = XE_FW_RENDER, - .mmio_bases = { - { .graphics_ver = 12, .base = XEHPC_BCS4_RING_BASE } - }, + .mmio_base = XEHPC_BCS4_RING_BASE, }, [XE_HW_ENGINE_BCS5] = { .name = "bcs5", .class = XE_ENGINE_CLASS_COPY, .instance = 5, .domain = XE_FW_RENDER, - .mmio_bases = { - { .graphics_ver = 12, .base = XEHPC_BCS5_RING_BASE } - }, + .mmio_base = XEHPC_BCS5_RING_BASE, }, [XE_HW_ENGINE_BCS6] = { .name = "bcs6", .class = XE_ENGINE_CLASS_COPY, .instance = 6, .domain = XE_FW_RENDER, - .mmio_bases = { - { .graphics_ver = 12, .base = XEHPC_BCS6_RING_BASE } - }, + .mmio_base = XEHPC_BCS6_RING_BASE, }, [XE_HW_ENGINE_BCS7] = { .name = "bcs7", .class = XE_ENGINE_CLASS_COPY, .instance = 7, .domain = XE_FW_RENDER, - .mmio_bases = { - { .graphics_ver = 12, .base = XEHPC_BCS7_RING_BASE } - }, + .mmio_base = XEHPC_BCS7_RING_BASE, }, [XE_HW_ENGINE_BCS8] = { .name = "bcs8", .class = XE_ENGINE_CLASS_COPY, .instance = 8, .domain = XE_FW_RENDER, - .mmio_bases = { - { .graphics_ver = 12, .base = XEHPC_BCS8_RING_BASE } - }, + .mmio_base = XEHPC_BCS8_RING_BASE, }, [XE_HW_ENGINE_VCS0] = { @@ -135,166 +111,115 @@ static const struct engine_info engine_infos[] = { .class = XE_ENGINE_CLASS_VIDEO_DECODE, .instance = 0, .domain = XE_FW_MEDIA_VDBOX0, - .mmio_bases = { - { .graphics_ver = 11, .base = GEN11_BSD_RING_BASE }, - { .graphics_ver = 6, .base = GEN6_BSD_RING_BASE }, - { .graphics_ver = 4, .base = BSD_RING_BASE } - }, + .mmio_base = GEN11_BSD_RING_BASE, }, [XE_HW_ENGINE_VCS1] = { .name = "vcs1", .class = XE_ENGINE_CLASS_VIDEO_DECODE, .instance = 1, .domain = XE_FW_MEDIA_VDBOX1, - .mmio_bases = { - { .graphics_ver = 11, .base = GEN11_BSD2_RING_BASE }, - { .graphics_ver = 8, .base = GEN8_BSD2_RING_BASE } - }, + .mmio_base = GEN11_BSD2_RING_BASE, }, [XE_HW_ENGINE_VCS2] = { .name = "vcs2", .class = XE_ENGINE_CLASS_VIDEO_DECODE, .instance = 2, .domain = XE_FW_MEDIA_VDBOX2, - .mmio_bases = { - { .graphics_ver = 11, .base = GEN11_BSD3_RING_BASE } - }, + .mmio_base = GEN11_BSD3_RING_BASE, }, [XE_HW_ENGINE_VCS3] = { .name = "vcs3", .class = XE_ENGINE_CLASS_VIDEO_DECODE, .instance = 3, .domain = XE_FW_MEDIA_VDBOX3, - .mmio_bases = { - { .graphics_ver = 11, .base = GEN11_BSD4_RING_BASE } - }, + .mmio_base = GEN11_BSD4_RING_BASE, }, [XE_HW_ENGINE_VCS4] = { .name = "vcs4", .class = XE_ENGINE_CLASS_VIDEO_DECODE, .instance = 4, .domain = XE_FW_MEDIA_VDBOX4, - .mmio_bases = { - { .graphics_ver = 12, .base = XEHP_BSD5_RING_BASE } - }, + .mmio_base = XEHP_BSD5_RING_BASE, }, [XE_HW_ENGINE_VCS5] = { .name = "vcs5", .class = XE_ENGINE_CLASS_VIDEO_DECODE, .instance = 5, .domain = XE_FW_MEDIA_VDBOX5, - .mmio_bases = { - { .graphics_ver = 12, .base = XEHP_BSD6_RING_BASE } - }, + .mmio_base = XEHP_BSD6_RING_BASE, }, [XE_HW_ENGINE_VCS6] = { .name = "vcs6", .class = XE_ENGINE_CLASS_VIDEO_DECODE, .instance = 6, .domain = XE_FW_MEDIA_VDBOX6, - .mmio_bases = { - { .graphics_ver = 12, .base = XEHP_BSD7_RING_BASE } - }, + .mmio_base = XEHP_BSD7_RING_BASE, }, [XE_HW_ENGINE_VCS7] = { .name = "vcs7", .class = XE_ENGINE_CLASS_VIDEO_DECODE, .instance = 7, .domain = XE_FW_MEDIA_VDBOX7, - .mmio_bases = { - { .graphics_ver = 12, .base = XEHP_BSD8_RING_BASE } - }, + .mmio_base = XEHP_BSD8_RING_BASE, }, [XE_HW_ENGINE_VECS0] = { .name = "vecs0", .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, .instance = 0, .domain = XE_FW_MEDIA_VEBOX0, - .mmio_bases = { - { .graphics_ver = 11, .base = GEN11_VEBOX_RING_BASE }, - { .graphics_ver = 7, .base = VEBOX_RING_BASE } - }, + .mmio_base = GEN11_VEBOX_RING_BASE, }, [XE_HW_ENGINE_VECS1] = { .name = "vecs1", .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, .instance = 1, .domain = XE_FW_MEDIA_VEBOX1, - .mmio_bases = { - { .graphics_ver = 11, .base = GEN11_VEBOX2_RING_BASE } - }, + .mmio_base = GEN11_VEBOX2_RING_BASE, }, [XE_HW_ENGINE_VECS2] = { .name = "vecs2", .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, .instance = 2, .domain = XE_FW_MEDIA_VEBOX2, - .mmio_bases = { - { .graphics_ver = 12, .base = XEHP_VEBOX3_RING_BASE } - }, + .mmio_base = XEHP_VEBOX3_RING_BASE, }, [XE_HW_ENGINE_VECS3] = { .name = "vecs3", .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, .instance = 3, .domain = XE_FW_MEDIA_VEBOX3, - .mmio_bases = { - { .graphics_ver = 12, .base = XEHP_VEBOX4_RING_BASE } - }, + .mmio_base = XEHP_VEBOX4_RING_BASE, }, [XE_HW_ENGINE_CCS0] = { .name = "ccs0", .class = XE_ENGINE_CLASS_COMPUTE, .instance = 0, .domain = XE_FW_RENDER, - .mmio_bases = { - { .graphics_ver = 12, .base = GEN12_COMPUTE0_RING_BASE }, - }, + .mmio_base = GEN12_COMPUTE0_RING_BASE, }, [XE_HW_ENGINE_CCS1] = { .name = "ccs1", .class = XE_ENGINE_CLASS_COMPUTE, .instance = 1, .domain = XE_FW_RENDER, - .mmio_bases = { - { .graphics_ver = 12, .base = GEN12_COMPUTE1_RING_BASE }, - }, + .mmio_base = GEN12_COMPUTE1_RING_BASE, }, [XE_HW_ENGINE_CCS2] = { .name = "ccs2", .class = XE_ENGINE_CLASS_COMPUTE, .instance = 2, .domain = XE_FW_RENDER, - .mmio_bases = { - { .graphics_ver = 12, .base = GEN12_COMPUTE2_RING_BASE }, - }, + .mmio_base = GEN12_COMPUTE2_RING_BASE, }, [XE_HW_ENGINE_CCS3] = { .name = "ccs3", .class = XE_ENGINE_CLASS_COMPUTE, .instance = 3, .domain = XE_FW_RENDER, - .mmio_bases = { - { .graphics_ver = 12, .base = GEN12_COMPUTE3_RING_BASE }, - }, + .mmio_base = GEN12_COMPUTE3_RING_BASE, }, }; -static u32 engine_info_mmio_base(const struct engine_info *info, - unsigned int graphics_ver) -{ - int i; - - for (i = 0; i < MAX_MMIO_BASES; i++) - if (graphics_ver >= info->mmio_bases[i].graphics_ver) - break; - - XE_BUG_ON(i == MAX_MMIO_BASES); - XE_BUG_ON(!info->mmio_bases[i].base); - - return info->mmio_bases[i].base; -} - static void hw_engine_fini(struct drm_device *drm, void *arg) { struct xe_hw_engine *hwe = arg; @@ -346,7 +271,6 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe) static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe, enum xe_hw_engine_id id) { - struct xe_device *xe = gt_to_xe(gt); const struct engine_info *info; if (WARN_ON(id >= ARRAY_SIZE(engine_infos) || !engine_infos[id].name)) @@ -362,7 +286,7 @@ static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe, hwe->gt = gt; hwe->class = info->class; hwe->instance = info->instance; - hwe->mmio_base = engine_info_mmio_base(info, GRAPHICS_VER(xe)); + hwe->mmio_base = info->mmio_base; hwe->domain = info->domain; hwe->name = info->name; hwe->fence_irq = >->fence_irq[info->class]; -- cgit v1.2.3 From ba11f1b7ea5b59fdf58e5dec7b73fa914de65f8d Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 24 Feb 2023 14:16:01 -0800 Subject: drm/xe: Assume MTL's forcewake register continues to future platforms Starting with MTL, the GT forcewake ack register moved from 0x130044 to 0xDFC. We expect this change to carry forward to future platforms as well, so forcewake initialization should use an IP version check instead of matching the MTL platform specifically. The (re)definition of FORCEWAKE_ACK_GT_MTL in the forcewake file is also unnecessary; we can take the definition that already exists in the dedicated register header. Bspec: 65031, 64629 Signed-off-by: Matt Roper Reviewed-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_force_wake.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c index 5bd87118ac73..21d04a02847b 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.c +++ b/drivers/gpu/drm/xe/xe_force_wake.c @@ -36,8 +36,6 @@ static void domain_init(struct xe_force_wake_domain *domain, domain->mask = mask; } -#define FORCEWAKE_ACK_GT_MTL _MMIO(0xdfc) - void xe_force_wake_init_gt(struct xe_gt *gt, struct xe_force_wake *fw) { struct xe_device *xe = gt_to_xe(gt); @@ -48,7 +46,7 @@ void xe_force_wake_init_gt(struct xe_gt *gt, struct xe_force_wake *fw) /* Assuming gen11+ so assert this assumption is correct */ XE_BUG_ON(GRAPHICS_VER(gt_to_xe(gt)) < 11); - if (xe->info.platform == XE_METEORLAKE) { + if (xe->info.graphics_verx100 >= 1270) { domain_init(&fw->domains[XE_FW_DOMAIN_ID_GT], XE_FW_DOMAIN_ID_GT, FORCEWAKE_GT_GEN9.reg, -- cgit v1.2.3 From ea9f879d037ff4d7851f35ba91dc774dd9033308 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 24 Feb 2023 16:15:38 -0800 Subject: drm/xe: Sort includes Sort includes and split them in blocks: 1) .h corresponding to the .c. Example: xe_bb.c should have a "#include "xe_bb.h" first. 2) #include 3) #include 4) local includes 5) i915 includes This is accomplished by running `clang-format --style=file -i --sort-includes drivers/gpu/drm/xe/*.[ch]` and ignoring all the changes after the includes. There are also some manual tweaks to split the blocks. v2: Also sort includes in headers Signed-off-by: Lucas De Marchi Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bb.c | 3 ++- drivers/gpu/drm/xe/xe_bo.c | 1 - drivers/gpu/drm/xe/xe_bo_evict.c | 3 ++- drivers/gpu/drm/xe/xe_debugfs.c | 3 ++- drivers/gpu/drm/xe/xe_device.c | 8 ++++---- drivers/gpu/drm/xe/xe_device.h | 2 +- drivers/gpu/drm/xe/xe_dma_buf.c | 9 ++++----- drivers/gpu/drm/xe/xe_engine.c | 3 ++- drivers/gpu/drm/xe/xe_exec.c | 3 ++- drivers/gpu/drm/xe/xe_execlist.c | 10 +++++----- drivers/gpu/drm/xe/xe_force_wake.c | 5 +++-- drivers/gpu/drm/xe/xe_ggtt.c | 6 +++--- drivers/gpu/drm/xe/xe_gt.c | 3 ++- drivers/gpu/drm/xe/xe_gt_clock.c | 7 ++++--- drivers/gpu/drm/xe/xe_gt_debugfs.c | 3 ++- drivers/gpu/drm/xe/xe_gt_mcr.c | 3 ++- drivers/gpu/drm/xe/xe_gt_pagefault.c | 3 ++- drivers/gpu/drm/xe/xe_gt_sysfs.c | 5 ++++- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 3 ++- drivers/gpu/drm/xe/xe_gt_topology.c | 3 ++- drivers/gpu/drm/xe/xe_guc.c | 14 ++++++++------ drivers/gpu/drm/xe/xe_guc.h | 2 +- drivers/gpu/drm/xe/xe_guc_ads.c | 6 ++++-- drivers/gpu/drm/xe/xe_guc_ct.c | 5 +++-- drivers/gpu/drm/xe/xe_guc_ct_types.h | 2 +- drivers/gpu/drm/xe/xe_guc_debugfs.c | 3 ++- drivers/gpu/drm/xe/xe_guc_fwif.h | 4 ++-- drivers/gpu/drm/xe/xe_guc_hwconfig.c | 3 ++- drivers/gpu/drm/xe/xe_guc_log.c | 3 ++- drivers/gpu/drm/xe/xe_guc_pc.c | 13 ++++++++----- drivers/gpu/drm/xe/xe_guc_pc_types.h | 2 +- drivers/gpu/drm/xe/xe_guc_submit.c | 9 +++++---- drivers/gpu/drm/xe/xe_huc.c | 3 ++- drivers/gpu/drm/xe/xe_huc_debugfs.c | 3 ++- drivers/gpu/drm/xe/xe_hw_engine.c | 2 +- drivers/gpu/drm/xe/xe_hw_fence_types.h | 2 +- drivers/gpu/drm/xe/xe_irq.c | 6 ++++-- drivers/gpu/drm/xe/xe_lrc.c | 6 +++--- drivers/gpu/drm/xe/xe_migrate.c | 12 +++++++----- drivers/gpu/drm/xe/xe_mmio.c | 2 +- drivers/gpu/drm/xe/xe_mocs.c | 5 +++-- drivers/gpu/drm/xe/xe_module.c | 2 ++ drivers/gpu/drm/xe/xe_pci.c | 4 ++-- drivers/gpu/drm/xe/xe_pcode.c | 9 ++++----- drivers/gpu/drm/xe/xe_pm.c | 5 +++-- drivers/gpu/drm/xe/xe_preempt_fence.c | 3 ++- drivers/gpu/drm/xe/xe_pt.c | 5 +++-- drivers/gpu/drm/xe/xe_query.c | 12 +++++++----- drivers/gpu/drm/xe/xe_reg_sr.c | 4 ++-- drivers/gpu/drm/xe/xe_reg_sr_types.h | 2 +- drivers/gpu/drm/xe/xe_reg_whitelist.c | 6 +++--- drivers/gpu/drm/xe/xe_ring_ops.c | 5 +++-- drivers/gpu/drm/xe/xe_rtp.h | 2 +- drivers/gpu/drm/xe/xe_sa.c | 4 +++- drivers/gpu/drm/xe/xe_sync.c | 5 +++-- drivers/gpu/drm/xe/xe_trace.h | 4 ++-- drivers/gpu/drm/xe/xe_ttm_gtt_mgr.c | 2 +- drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 2 +- drivers/gpu/drm/xe/xe_ttm_vram_mgr.c | 2 +- drivers/gpu/drm/xe/xe_tuning.c | 2 +- drivers/gpu/drm/xe/xe_uc.c | 5 +++-- drivers/gpu/drm/xe/xe_uc_fw.h | 4 ++-- drivers/gpu/drm/xe/xe_uc_fw_abi.h | 2 +- drivers/gpu/drm/xe/xe_vm.c | 2 +- drivers/gpu/drm/xe/xe_vm_madvise.c | 8 +++++--- drivers/gpu/drm/xe/xe_wopcm.c | 3 ++- 66 files changed, 172 insertions(+), 125 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c index 8b9209571fd0..d10448d1b4d7 100644 --- a/drivers/gpu/drm/xe/xe_bb.c +++ b/drivers/gpu/drm/xe/xe_bb.c @@ -4,10 +4,11 @@ */ #include "xe_bb.h" -#include "xe_sa.h" + #include "xe_device.h" #include "xe_engine_types.h" #include "xe_hw_fence.h" +#include "xe_sa.h" #include "xe_sched_job.h" #include "xe_vm_types.h" diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index b4fa856b79d1..3e5393e00b43 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -3,7 +3,6 @@ * Copyright © 2021 Intel Corporation */ - #include "xe_bo.h" #include diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c index 3fb3c8c77efa..bbf89a58cdf5 100644 --- a/drivers/gpu/drm/xe/xe_bo_evict.c +++ b/drivers/gpu/drm/xe/xe_bo_evict.c @@ -3,8 +3,9 @@ * Copyright © 2022 Intel Corporation */ -#include "xe_bo.h" #include "xe_bo_evict.h" + +#include "xe_bo.h" #include "xe_device.h" #include "xe_ggtt.h" #include "xe_gt.h" diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c index b0f8b157ffa3..7827a785b020 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.c +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -3,13 +3,14 @@ * Copyright © 2022 Intel Corporation */ +#include "xe_debugfs.h" + #include #include #include "xe_bo.h" #include "xe_device.h" -#include "xe_debugfs.h" #include "xe_gt_debugfs.h" #include "xe_step.h" diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 8fe0324ccef3..6d7d57d08a99 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -5,12 +5,12 @@ #include "xe_device.h" -#include #include +#include +#include #include -#include #include -#include +#include #include "xe_bo.h" #include "xe_debugfs.h" @@ -20,8 +20,8 @@ #include "xe_exec.h" #include "xe_gt.h" #include "xe_irq.h" -#include "xe_module.h" #include "xe_mmio.h" +#include "xe_module.h" #include "xe_pcode.h" #include "xe_pm.h" #include "xe_query.h" diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index 88d55671b068..ed55ef567d18 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -12,8 +12,8 @@ struct xe_file; #include #include "xe_device_types.h" -#include "xe_macros.h" #include "xe_force_wake.h" +#include "xe_macros.h" #include "gt/intel_gpu_commands.h" diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c index d09ff25bd940..9b252cc782b7 100644 --- a/drivers/gpu/drm/xe/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/xe_dma_buf.c @@ -3,20 +3,19 @@ * Copyright © 2022 Intel Corporation */ +#include "xe_dma_buf.h" + +#include #include +#include #include #include - #include -#include -#include - #include "tests/xe_test.h" #include "xe_bo.h" #include "xe_device.h" -#include "xe_dma_buf.h" #include "xe_ttm_vram_mgr.h" #include "xe_vm.h" diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c index b69dcbef0824..519fbbcabdb9 100644 --- a/drivers/gpu/drm/xe/xe_engine.c +++ b/drivers/gpu/drm/xe/xe_engine.c @@ -5,10 +5,11 @@ #include "xe_engine.h" +#include + #include #include #include -#include #include "xe_device.h" #include "xe_gt.h" diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 00f298acc436..97fd1a311f2d 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -3,6 +3,8 @@ * Copyright © 2022 Intel Corporation */ +#include "xe_exec.h" + #include #include #include @@ -10,7 +12,6 @@ #include "xe_bo.h" #include "xe_device.h" #include "xe_engine.h" -#include "xe_exec.h" #include "xe_macros.h" #include "xe_sched_job.h" #include "xe_sync.h" diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index 47587571123a..8ff1f36f89f7 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -3,15 +3,15 @@ * Copyright © 2021 Intel Corporation */ -#include - #include "xe_execlist.h" +#include + #include "xe_bo.h" #include "xe_device.h" #include "xe_engine.h" -#include "xe_hw_fence.h" #include "xe_gt.h" +#include "xe_hw_fence.h" #include "xe_lrc.h" #include "xe_macros.h" #include "xe_mmio.h" @@ -19,11 +19,11 @@ #include "xe_ring_ops_types.h" #include "xe_sched_job.h" -#include "i915_reg.h" +#include "gt/intel_engine_regs.h" #include "gt/intel_gpu_commands.h" #include "gt/intel_gt_regs.h" #include "gt/intel_lrc_reg.h" -#include "gt/intel_engine_regs.h" +#include "i915_reg.h" #define XE_EXECLIST_HANG_LIMIT 1 diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c index 21d04a02847b..1ead587cd5c9 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.c +++ b/drivers/gpu/drm/xe/xe_force_wake.c @@ -3,13 +3,14 @@ * Copyright © 2022 Intel Corporation */ +#include "xe_force_wake.h" + #include -#include "xe_force_wake.h" #include "xe_gt.h" #include "xe_mmio.h" -#include "gt/intel_gt_regs.h" +#include "gt/intel_gt_regs.h" #define XE_FORCE_WAKE_ACK_TIMEOUT_MS 50 diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 907a603572b2..3730bbeb26b2 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -6,20 +6,20 @@ #include "xe_ggtt.h" #include -#include #include +#include -#include "xe_device.h" #include "xe_bo.h" +#include "xe_device.h" #include "xe_gt.h" #include "xe_gt_tlb_invalidation.h" #include "xe_map.h" #include "xe_mmio.h" #include "xe_wopcm.h" -#include "i915_reg.h" #include "gt/intel_gt_regs.h" +#include "i915_reg.h" /* FIXME: Common file, preferably auto-gen */ #define MTL_GGTT_PTE_PAT0 BIT_ULL(52) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 5a3c8fd5936a..74e9445befe4 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -3,6 +3,8 @@ * Copyright © 2022 Intel Corporation */ +#include "xe_gt.h" + #include #include @@ -14,7 +16,6 @@ #include "xe_execlist.h" #include "xe_force_wake.h" #include "xe_ggtt.h" -#include "xe_gt.h" #include "xe_gt_clock.h" #include "xe_gt_mcr.h" #include "xe_gt_pagefault.h" diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c index 575433e9718a..2d9f2aa42bad 100644 --- a/drivers/gpu/drm/xe/xe_gt_clock.c +++ b/drivers/gpu/drm/xe/xe_gt_clock.c @@ -3,15 +3,16 @@ * Copyright © 2022 Intel Corporation */ -#include "i915_reg.h" -#include "gt/intel_gt_regs.h" +#include "xe_gt_clock.h" #include "xe_device.h" #include "xe_gt.h" -#include "xe_gt_clock.h" #include "xe_macros.h" #include "xe_mmio.h" +#include "gt/intel_gt_regs.h" +#include "i915_reg.h" + static u32 read_reference_ts_freq(struct xe_gt *gt) { u32 ts_override = xe_mmio_read32(gt, GEN9_TIMESTAMP_OVERRIDE.reg); diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index c320e58810ce..78942e12e76c 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -3,6 +3,8 @@ * Copyright © 2022 Intel Corporation */ +#include "xe_gt_debugfs.h" + #include #include @@ -10,7 +12,6 @@ #include "xe_force_wake.h" #include "xe_ggtt.h" #include "xe_gt.h" -#include "xe_gt_debugfs.h" #include "xe_gt_mcr.h" #include "xe_gt_topology.h" #include "xe_hw_engine.h" diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index 7617f0340879..8fa59988d08e 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -3,8 +3,9 @@ * Copyright © 2022 Intel Corporation */ -#include "xe_gt.h" #include "xe_gt_mcr.h" + +#include "xe_gt.h" #include "xe_gt_topology.h" #include "xe_gt_types.h" #include "xe_mmio.h" diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index ce79eb48feb8..0e7047b89a83 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -3,6 +3,8 @@ * Copyright © 2022 Intel Corporation */ +#include "xe_gt_pagefault.h" + #include #include @@ -10,7 +12,6 @@ #include "xe_bo.h" #include "xe_gt.h" -#include "xe_gt_pagefault.h" #include "xe_gt_tlb_invalidation.h" #include "xe_guc.h" #include "xe_guc_ct.h" diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs.c b/drivers/gpu/drm/xe/xe_gt_sysfs.c index 2d966d935b8e..c01cc689058c 100644 --- a/drivers/gpu/drm/xe/xe_gt_sysfs.c +++ b/drivers/gpu/drm/xe/xe_gt_sysfs.c @@ -3,11 +3,14 @@ * Copyright © 2022 Intel Corporation */ +#include "xe_gt_sysfs.h" + #include #include + #include + #include "xe_gt.h" -#include "xe_gt_sysfs.h" static void xe_gt_sysfs_kobj_release(struct kobject *kobj) { diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 2521c8a65690..f279e21300aa 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -3,8 +3,9 @@ * Copyright © 2023 Intel Corporation */ -#include "xe_gt.h" #include "xe_gt_tlb_invalidation.h" + +#include "xe_gt.h" #include "xe_guc.h" #include "xe_guc_ct.h" #include "xe_trace.h" diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c index 3dd7cbbff071..c76aaea1887c 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.c +++ b/drivers/gpu/drm/xe/xe_gt_topology.c @@ -3,10 +3,11 @@ * Copyright © 2022 Intel Corporation */ +#include "xe_gt_topology.h" + #include #include "xe_gt.h" -#include "xe_gt_topology.h" #include "xe_mmio.h" #define XE_MAX_DSS_FUSE_BITS (32 * XE_MAX_DSS_FUSE_REGS) diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 32bcc40463e1..db3d8c947603 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -3,24 +3,26 @@ * Copyright © 2022 Intel Corporation */ +#include "xe_guc.h" + #include "xe_bo.h" #include "xe_device.h" -#include "xe_guc.h" +#include "xe_force_wake.h" +#include "xe_gt.h" #include "xe_guc_ads.h" #include "xe_guc_ct.h" #include "xe_guc_hwconfig.h" #include "xe_guc_log.h" -#include "xe_guc_reg.h" #include "xe_guc_pc.h" +#include "xe_guc_reg.h" #include "xe_guc_submit.h" -#include "xe_gt.h" +#include "xe_mmio.h" #include "xe_platform_types.h" #include "xe_uc_fw.h" #include "xe_wopcm.h" -#include "xe_mmio.h" -#include "xe_force_wake.h" -#include "i915_reg_defs.h" + #include "gt/intel_gt_regs.h" +#include "i915_reg_defs.h" /* TODO: move to common file */ #define GUC_PVC_MOCS_INDEX_MASK REG_GENMASK(25, 24) diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h index 7be33458eef6..74a74051f354 100644 --- a/drivers/gpu/drm/xe/xe_guc.h +++ b/drivers/gpu/drm/xe/xe_guc.h @@ -6,8 +6,8 @@ #ifndef _XE_GUC_H_ #define _XE_GUC_H_ -#include "xe_hw_engine_types.h" #include "xe_guc_types.h" +#include "xe_hw_engine_types.h" #include "xe_macros.h" struct drm_printer; diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 0c08cecaca40..a4e947f0c557 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -3,20 +3,22 @@ * Copyright © 2022 Intel Corporation */ +#include "xe_guc_ads.h" + #include #include "xe_bo.h" #include "xe_gt.h" #include "xe_guc.h" -#include "xe_guc_ads.h" #include "xe_guc_reg.h" #include "xe_hw_engine.h" #include "xe_lrc.h" #include "xe_map.h" #include "xe_mmio.h" #include "xe_platform_types.h" -#include "gt/intel_gt_regs.h" + #include "gt/intel_engine_regs.h" +#include "gt/intel_gt_regs.h" /* Slack of a few additional entries per engine */ #define ADS_REGSET_EXTRA_MAX 8 diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 84d4302d4e72..5e00b75d3ca2 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -3,6 +3,8 @@ * Copyright © 2022 Intel Corporation */ +#include "xe_guc_ct.h" + #include #include #include @@ -12,10 +14,9 @@ #include "xe_bo.h" #include "xe_device.h" #include "xe_gt.h" -#include "xe_guc.h" -#include "xe_guc_ct.h" #include "xe_gt_pagefault.h" #include "xe_gt_tlb_invalidation.h" +#include "xe_guc.h" #include "xe_guc_submit.h" #include "xe_map.h" #include "xe_trace.h" diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h index 17b148bf3735..e0f9063e9b65 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct_types.h +++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h @@ -6,8 +6,8 @@ #ifndef _XE_GUC_CT_TYPES_H_ #define _XE_GUC_CT_TYPES_H_ -#include #include +#include #include #include #include diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.c b/drivers/gpu/drm/xe/xe_guc_debugfs.c index 916e9633b322..6b72db4d5bb2 100644 --- a/drivers/gpu/drm/xe/xe_guc_debugfs.c +++ b/drivers/gpu/drm/xe/xe_guc_debugfs.c @@ -3,6 +3,8 @@ * Copyright © 2022 Intel Corporation */ +#include "xe_guc_debugfs.h" + #include #include @@ -10,7 +12,6 @@ #include "xe_gt.h" #include "xe_guc.h" #include "xe_guc_ct.h" -#include "xe_guc_debugfs.h" #include "xe_guc_log.h" #include "xe_macros.h" diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h index f562404a6cf7..20155ba4ef07 100644 --- a/drivers/gpu/drm/xe/xe_guc_fwif.h +++ b/drivers/gpu/drm/xe/xe_guc_fwif.h @@ -10,9 +10,9 @@ #include "abi/guc_actions_abi.h" #include "abi/guc_actions_slpc_abi.h" -#include "abi/guc_errors_abi.h" -#include "abi/guc_communication_mmio_abi.h" #include "abi/guc_communication_ctb_abi.h" +#include "abi/guc_communication_mmio_abi.h" +#include "abi/guc_errors_abi.h" #include "abi/guc_klvs_abi.h" #include "abi/guc_messages_abi.h" diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.c b/drivers/gpu/drm/xe/xe_guc_hwconfig.c index 57640d608787..a6982f323ed1 100644 --- a/drivers/gpu/drm/xe/xe_guc_hwconfig.c +++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.c @@ -3,13 +3,14 @@ * Copyright © 2022 Intel Corporation */ +#include "xe_guc_hwconfig.h" + #include #include "xe_bo.h" #include "xe_device.h" #include "xe_gt.h" #include "xe_guc.h" -#include "xe_guc_hwconfig.h" #include "xe_map.h" static struct xe_gt * diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c index 7ec1b2bb1f8e..9a7b5d5906c1 100644 --- a/drivers/gpu/drm/xe/xe_guc_log.c +++ b/drivers/gpu/drm/xe/xe_guc_log.c @@ -3,11 +3,12 @@ * Copyright © 2022 Intel Corporation */ +#include "xe_guc_log.h" + #include #include "xe_bo.h" #include "xe_gt.h" -#include "xe_guc_log.h" #include "xe_map.h" #include "xe_module.h" diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 3ba0c8a35109..28b86e8f3f6e 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -3,21 +3,24 @@ * Copyright © 2022 Intel Corporation */ +#include "xe_guc_pc.h" + +#include + #include + #include "xe_bo.h" #include "xe_device.h" #include "xe_gt.h" -#include "xe_gt_types.h" #include "xe_gt_sysfs.h" +#include "xe_gt_types.h" #include "xe_guc_ct.h" #include "xe_map.h" #include "xe_mmio.h" #include "xe_pcode.h" -#include "i915_reg_defs.h" -#include "i915_reg.h" - -#include +#include "i915_reg.h" +#include "i915_reg_defs.h" #include "intel_mchbar_regs.h" /* For GEN6_RP_STATE_CAP.reg to be merged when the definition moves to Xe */ diff --git a/drivers/gpu/drm/xe/xe_guc_pc_types.h b/drivers/gpu/drm/xe/xe_guc_pc_types.h index 39548e03acf4..2afd0dbc3542 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc_types.h +++ b/drivers/gpu/drm/xe/xe_guc_pc_types.h @@ -6,8 +6,8 @@ #ifndef _XE_GUC_PC_TYPES_H_ #define _XE_GUC_PC_TYPES_H_ -#include #include +#include /** * struct xe_guc_pc - GuC Power Conservation (PC) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 2d4eb527d6e8..6469d3cd3beb 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -3,6 +3,8 @@ * Copyright © 2022 Intel Corporation */ +#include "xe_guc_submit.h" + #include #include #include @@ -13,13 +15,12 @@ #include "xe_device.h" #include "xe_engine.h" +#include "xe_force_wake.h" +#include "xe_gpu_scheduler.h" +#include "xe_gt.h" #include "xe_guc.h" #include "xe_guc_ct.h" #include "xe_guc_engine_types.h" -#include "xe_guc_submit.h" -#include "xe_gt.h" -#include "xe_force_wake.h" -#include "xe_gpu_scheduler.h" #include "xe_hw_engine.h" #include "xe_hw_fence.h" #include "xe_lrc.h" diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c index 82e7fb3a6292..a9448c6f6418 100644 --- a/drivers/gpu/drm/xe/xe_huc.c +++ b/drivers/gpu/drm/xe/xe_huc.c @@ -3,13 +3,14 @@ * Copyright © 2022 Intel Corporation */ +#include "xe_huc.h" + #include "xe_bo.h" #include "xe_device.h" #include "xe_force_wake.h" #include "xe_gt.h" #include "xe_guc.h" #include "xe_guc_reg.h" -#include "xe_huc.h" #include "xe_mmio.h" #include "xe_uc_fw.h" diff --git a/drivers/gpu/drm/xe/xe_huc_debugfs.c b/drivers/gpu/drm/xe/xe_huc_debugfs.c index 268bac36336a..ee3d8315036a 100644 --- a/drivers/gpu/drm/xe/xe_huc_debugfs.c +++ b/drivers/gpu/drm/xe/xe_huc_debugfs.c @@ -3,13 +3,14 @@ * Copyright © 2022 Intel Corporation */ +#include "xe_huc_debugfs.h" + #include #include #include "xe_device.h" #include "xe_gt.h" #include "xe_huc.h" -#include "xe_huc_debugfs.h" #include "xe_macros.h" static struct xe_gt * diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 986f675aaf88..074133d44009 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -22,8 +22,8 @@ #include "xe_wa.h" #include "gt/intel_engine_regs.h" -#include "i915_reg.h" #include "gt/intel_gt_regs.h" +#include "i915_reg.h" #define MAX_MMIO_BASES 3 struct engine_info { diff --git a/drivers/gpu/drm/xe/xe_hw_fence_types.h b/drivers/gpu/drm/xe/xe_hw_fence_types.h index a78e50eb3cb8..b33c4956e8ea 100644 --- a/drivers/gpu/drm/xe/xe_hw_fence_types.h +++ b/drivers/gpu/drm/xe/xe_hw_fence_types.h @@ -6,8 +6,8 @@ #ifndef _XE_HW_FENCE_TYPES_H_ #define _XE_HW_FENCE_TYPES_H_ -#include #include +#include #include #include #include diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index ab703f1c8b58..46431f0e4af8 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -3,19 +3,21 @@ * Copyright © 2021 Intel Corporation */ +#include "xe_irq.h" + #include #include #include "xe_device.h" #include "xe_drv.h" -#include "xe_guc.h" #include "xe_gt.h" +#include "xe_guc.h" #include "xe_hw_engine.h" #include "xe_mmio.h" -#include "i915_reg.h" #include "gt/intel_gt_regs.h" +#include "i915_reg.h" static void gen3_assert_iir_is_zero(struct xe_gt *gt, i915_reg_t reg) { diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 347ff9b34494..ed6fcf7620e1 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -9,15 +9,15 @@ #include "xe_device.h" #include "xe_engine_types.h" #include "xe_gt.h" -#include "xe_map.h" #include "xe_hw_fence.h" +#include "xe_map.h" #include "xe_vm.h" -#include "i915_reg.h" +#include "gt/intel_engine_regs.h" #include "gt/intel_gpu_commands.h" #include "gt/intel_gt_regs.h" #include "gt/intel_lrc_reg.h" -#include "gt/intel_engine_regs.h" +#include "i915_reg.h" #define GEN8_CTX_VALID (1 << 0) #define GEN8_CTX_L3LLC_COHERENT (1 << 5) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 377ab019b4c8..bbab524dcee6 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -2,8 +2,15 @@ /* * Copyright © 2020 Intel Corporation */ + #include "xe_migrate.h" +#include + +#include +#include +#include + #include "xe_bb.h" #include "xe_bo.h" #include "xe_engine.h" @@ -20,11 +27,6 @@ #include "xe_trace.h" #include "xe_vm.h" -#include -#include -#include -#include - #include "gt/intel_gpu_commands.h" /** diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index c414ece6dfe3..a117437f8482 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -14,9 +14,9 @@ #include "xe_macros.h" #include "xe_module.h" -#include "i915_reg.h" #include "gt/intel_engine_regs.h" #include "gt/intel_gt_regs.h" +#include "i915_reg.h" #define XEHP_MTCFG_ADDR _MMIO(0x101800) #define TILE_COUNT REG_GENMASK(15, 8) diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index 86b966fffbe5..d91054c78702 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -3,13 +3,14 @@ * Copyright © 2022 Intel Corporation */ +#include "xe_mocs.h" + #include "xe_bo.h" #include "xe_device.h" #include "xe_engine.h" #include "xe_gt.h" -#include "xe_platform_types.h" #include "xe_mmio.h" -#include "xe_mocs.h" +#include "xe_platform_types.h" #include "xe_step_types.h" #include "gt/intel_gt_regs.h" diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index cc862553a252..3f5d03a58696 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -3,6 +3,8 @@ * Copyright © 2021 Intel Corporation */ +#include "xe_module.h" + #include #include diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 3474b5c9f174..884f9b16c9de 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -10,12 +10,12 @@ #include #include -#include #include +#include #include -#include "xe_drv.h" #include "xe_device.h" +#include "xe_drv.h" #include "xe_macros.h" #include "xe_module.h" #include "xe_pm.h" diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c index 1a76fe478853..fb1ce2d49bec 100644 --- a/drivers/gpu/drm/xe/xe_pcode.c +++ b/drivers/gpu/drm/xe/xe_pcode.c @@ -3,15 +3,14 @@ * Copyright © 2022 Intel Corporation */ -#include "xe_pcode_api.h" #include "xe_pcode.h" -#include "xe_gt.h" -#include "xe_mmio.h" - +#include #include -#include +#include "xe_gt.h" +#include "xe_mmio.h" +#include "xe_pcode_api.h" /** * DOC: PCODE diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index 0ef92b746595..9a74d15052c4 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -3,6 +3,8 @@ * Copyright © 2022 Intel Corporation */ +#include "xe_pm.h" + #include #include @@ -10,9 +12,8 @@ #include "xe_bo.h" #include "xe_bo_evict.h" #include "xe_device.h" -#include "xe_pm.h" -#include "xe_gt.h" #include "xe_ggtt.h" +#include "xe_gt.h" #include "xe_irq.h" #include "xe_pcode.h" diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.c b/drivers/gpu/drm/xe/xe_preempt_fence.c index 6ab9ff442766..78ad8c209873 100644 --- a/drivers/gpu/drm/xe/xe_preempt_fence.c +++ b/drivers/gpu/drm/xe/xe_preempt_fence.c @@ -3,10 +3,11 @@ * Copyright © 2022 Intel Corporation */ +#include "xe_preempt_fence.h" + #include #include "xe_engine.h" -#include "xe_preempt_fence.h" #include "xe_vm.h" static void preempt_fence_work_func(struct work_struct *w) diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 23f308184ba1..00d9fff53828 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -3,18 +3,19 @@ * Copyright © 2022 Intel Corporation */ +#include "xe_pt.h" + #include "xe_bo.h" #include "xe_device.h" #include "xe_gt.h" #include "xe_gt_tlb_invalidation.h" #include "xe_migrate.h" -#include "xe_pt.h" #include "xe_pt_types.h" #include "xe_pt_walk.h" -#include "xe_vm.h" #include "xe_res_cursor.h" #include "xe_trace.h" #include "xe_ttm_stolen_mgr.h" +#include "xe_vm.h" struct xe_pt_dir { struct xe_pt pt; diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index f9f21bd1bfd7..0f70945176f6 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -3,17 +3,19 @@ * Copyright © 2022 Intel Corporation */ -#include -#include +#include "xe_query.h" + #include +#include +#include + #include "xe_bo.h" #include "xe_device.h" -#include "xe_gt.h" -#include "xe_macros.h" -#include "xe_query.h" #include "xe_ggtt.h" +#include "xe_gt.h" #include "xe_guc_hwconfig.h" +#include "xe_macros.h" static const enum xe_engine_class xe_to_user_engine_class[] = { [XE_ENGINE_CLASS_RENDER] = DRM_XE_ENGINE_CLASS_RENDER, diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c index 16e025dcf2cc..4d12f8a3043f 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr.c +++ b/drivers/gpu/drm/xe/xe_reg_sr.c @@ -9,16 +9,16 @@ #include #include -#include #include +#include -#include "xe_rtp_types.h" #include "xe_device_types.h" #include "xe_force_wake.h" #include "xe_gt.h" #include "xe_gt_mcr.h" #include "xe_macros.h" #include "xe_mmio.h" +#include "xe_rtp_types.h" #include "gt/intel_engine_regs.h" #include "gt/intel_gt_regs.h" diff --git a/drivers/gpu/drm/xe/xe_reg_sr_types.h b/drivers/gpu/drm/xe/xe_reg_sr_types.h index 2fa7ff3966ba..b234a8673e54 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr_types.h +++ b/drivers/gpu/drm/xe/xe_reg_sr_types.h @@ -6,8 +6,8 @@ #ifndef _XE_REG_SR_TYPES_ #define _XE_REG_SR_TYPES_ -#include #include +#include #include "i915_reg_defs.h" diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c index a34617a642ec..f6ce801215e7 100644 --- a/drivers/gpu/drm/xe/xe_reg_whitelist.c +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c @@ -5,12 +5,12 @@ #include "xe_reg_whitelist.h" -#include "xe_platform_types.h" #include "xe_gt_types.h" +#include "xe_platform_types.h" #include "xe_rtp.h" -#include "../i915/gt/intel_engine_regs.h" -#include "../i915/gt/intel_gt_regs.h" +#include "gt/intel_engine_regs.h" +#include "gt/intel_gt_regs.h" #undef _MMIO #undef MCR_REG diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index 54db4ca19a36..2e118d37b88c 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -3,18 +3,19 @@ * Copyright © 2022 Intel Corporation */ +#include "xe_ring_ops.h" + #include "xe_engine_types.h" #include "xe_gt.h" #include "xe_lrc.h" #include "xe_macros.h" -#include "xe_ring_ops.h" #include "xe_sched_job.h" #include "xe_vm_types.h" -#include "i915_reg.h" #include "gt/intel_gpu_commands.h" #include "gt/intel_gt_regs.h" #include "gt/intel_lrc_reg.h" +#include "i915_reg.h" static u32 preparser_disable(bool state) { diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index 1ac3fd1c0734..d6ba0b7e5042 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -6,8 +6,8 @@ #ifndef _XE_RTP_ #define _XE_RTP_ -#include #include +#include #include "xe_rtp_types.h" diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c index 7403410cd806..96c4b0ef24fe 100644 --- a/drivers/gpu/drm/xe/xe_sa.c +++ b/drivers/gpu/drm/xe/xe_sa.c @@ -3,14 +3,16 @@ * Copyright © 2022 Intel Corporation */ +#include "xe_sa.h" + #include + #include #include "xe_bo.h" #include "xe_device.h" #include "xe_gt.h" #include "xe_map.h" -#include "xe_sa.h" static void xe_sa_bo_manager_fini(struct drm_device *drm, void *arg) { diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index 0fbd8d0978cf..99f1ed87196d 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -8,13 +8,14 @@ #include #include #include -#include + #include #include +#include #include "xe_device_types.h" -#include "xe_sched_job_types.h" #include "xe_macros.h" +#include "xe_sched_job_types.h" #define SYNC_FLAGS_TYPE_MASK 0x3 #define SYNC_FLAGS_FENCE_INSTALLED 0x10000 diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index d1cd4b57a974..878ab4115d91 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -9,14 +9,14 @@ #if !defined(_XE_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) #define _XE_TRACE_H_ -#include #include +#include #include "xe_bo_types.h" #include "xe_engine_types.h" #include "xe_gpu_scheduler_types.h" -#include "xe_gt_types.h" #include "xe_gt_tlb_invalidation_types.h" +#include "xe_gt_types.h" #include "xe_guc_engine_types.h" #include "xe_sched_job.h" #include "xe_vm_types.h" diff --git a/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.c b/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.c index a0ba8bba84d1..8075781070f2 100644 --- a/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.c @@ -6,8 +6,8 @@ #include -#include #include +#include #include #include "xe_bo.h" diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index 097454f78286..fe0f707ad054 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -8,8 +8,8 @@ #include #include -#include #include +#include #include "../i915/i915_reg.h" diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c index c7e21673b8fd..643365b18bc7 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c @@ -6,8 +6,8 @@ #include -#include #include +#include #include "xe_bo.h" #include "xe_device.h" diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index 595eb2de90ad..a3872f0330cb 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -5,8 +5,8 @@ #include "xe_tuning.h" -#include "xe_platform_types.h" #include "xe_gt_types.h" +#include "xe_platform_types.h" #include "xe_rtp.h" #include "gt/intel_gt_regs.h" diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index 7886c8b85397..4ccf2b3435e1 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -3,13 +3,14 @@ * Copyright © 2022 Intel Corporation */ +#include "xe_uc.h" + #include "xe_device.h" -#include "xe_huc.h" #include "xe_gt.h" #include "xe_guc.h" #include "xe_guc_pc.h" #include "xe_guc_submit.h" -#include "xe_uc.h" +#include "xe_huc.h" #include "xe_uc_fw.h" #include "xe_wopcm.h" diff --git a/drivers/gpu/drm/xe/xe_uc_fw.h b/drivers/gpu/drm/xe/xe_uc_fw.h index b0df5064b27d..ca64d379bb5e 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.h +++ b/drivers/gpu/drm/xe/xe_uc_fw.h @@ -8,9 +8,9 @@ #include -#include "xe_uc_fw_types.h" -#include "xe_uc_fw_abi.h" #include "xe_macros.h" +#include "xe_uc_fw_abi.h" +#include "xe_uc_fw_types.h" struct drm_printer; diff --git a/drivers/gpu/drm/xe/xe_uc_fw_abi.h b/drivers/gpu/drm/xe/xe_uc_fw_abi.h index fc7b1855ee90..89e994ed4e00 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw_abi.h +++ b/drivers/gpu/drm/xe/xe_uc_fw_abi.h @@ -6,8 +6,8 @@ #ifndef _XE_UC_FW_ABI_H #define _XE_UC_FW_ABI_H -#include #include +#include /** * DOC: Firmware Layout diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 04481851fa00..fcac31f11706 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -25,8 +25,8 @@ #include "xe_preempt_fence.h" #include "xe_pt.h" #include "xe_res_cursor.h" -#include "xe_trace.h" #include "xe_sync.h" +#include "xe_trace.h" #define TEST_VM_ASYNC_OPS_ERROR diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c index 4498aa2fbd47..89a02c8e0424 100644 --- a/drivers/gpu/drm/xe/xe_vm_madvise.c +++ b/drivers/gpu/drm/xe/xe_vm_madvise.c @@ -3,13 +3,15 @@ * Copyright © 2021 Intel Corporation */ -#include -#include +#include "xe_vm_madvise.h" + #include +#include +#include + #include "xe_bo.h" #include "xe_vm.h" -#include "xe_vm_madvise.h" static int madvise_preferred_mem_class(struct xe_device *xe, struct xe_vm *vm, struct xe_vma **vmas, int num_vmas, diff --git a/drivers/gpu/drm/xe/xe_wopcm.c b/drivers/gpu/drm/xe/xe_wopcm.c index 8fe182afa06c..7bb880355f6b 100644 --- a/drivers/gpu/drm/xe/xe_wopcm.c +++ b/drivers/gpu/drm/xe/xe_wopcm.c @@ -3,13 +3,14 @@ * Copyright © 2022 Intel Corporation */ +#include "xe_wopcm.h" + #include "xe_device.h" #include "xe_force_wake.h" #include "xe_gt.h" #include "xe_guc_reg.h" #include "xe_mmio.h" #include "xe_uc_fw.h" -#include "xe_wopcm.h" /** * DOC: Write Once Protected Content Memory (WOPCM) Layout -- cgit v1.2.3 From b79e8fd954c48fba74b2c3807f6093ce40e9ab7f Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 24 Feb 2023 16:15:39 -0800 Subject: drm/xe: Remove dependency on intel_engine_regs.h Create regs/xe_engine_regs.h file with all the registers and bit definitions used by the xe driver. Eventually the registers may be defined in a different way and since xe doesn't supported below gen12, the number of registers touched is much smaller, so create a new header. The definitions themselves are direct copy from the gt/intel_engine_regs.h file, just sorting the registers by address. Cleaning those up and adhering to a common coding style is left for later. Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_engine_regs.h | 98 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_execlist.c | 2 +- drivers/gpu/drm/xe/xe_guc_ads.c | 3 +- drivers/gpu/drm/xe/xe_hw_engine.c | 2 +- drivers/gpu/drm/xe/xe_lrc.c | 2 +- drivers/gpu/drm/xe/xe_mmio.c | 2 +- drivers/gpu/drm/xe/xe_reg_sr.c | 2 +- drivers/gpu/drm/xe/xe_reg_whitelist.c | 2 +- drivers/gpu/drm/xe/xe_wa.c | 2 +- 9 files changed, 107 insertions(+), 8 deletions(-) create mode 100644 drivers/gpu/drm/xe/regs/xe_engine_regs.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h new file mode 100644 index 000000000000..6dfa3cf2fd43 --- /dev/null +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -0,0 +1,98 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_ENGINE_REGS_H_ +#define _XE_ENGINE_REGS_H_ + +#include + +#include "i915_reg_defs.h" + +#define RING_TAIL(base) _MMIO((base) + 0x30) + +#define RING_HEAD(base) _MMIO((base) + 0x34) +#define HEAD_ADDR 0x001FFFFC + +#define RING_START(base) _MMIO((base) + 0x38) + +#define RING_CTL(base) _MMIO((base) + 0x3c) +#define RING_CTL_SIZE(size) ((size) - PAGE_SIZE) /* in bytes -> pages */ +#define RING_CTL_SIZE(size) ((size) - PAGE_SIZE) /* in bytes -> pages */ + +#define RING_PSMI_CTL(base) _MMIO((base) + 0x50) +#define GEN8_RC_SEMA_IDLE_MSG_DISABLE REG_BIT(12) +#define GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE REG_BIT(7) + +#define RING_ACTHD_UDW(base) _MMIO((base) + 0x5c) +#define RING_DMA_FADD_UDW(base) _MMIO((base) + 0x60) +#define RING_IPEIR(base) _MMIO((base) + 0x64) +#define RING_IPEHR(base) _MMIO((base) + 0x68) +#define RING_ACTHD(base) _MMIO((base) + 0x74) +#define RING_DMA_FADD(base) _MMIO((base) + 0x78) +#define RING_HWS_PGA(base) _MMIO((base) + 0x80) +#define IPEIR(base) _MMIO((base) + 0x88) +#define IPEHR(base) _MMIO((base) + 0x8c) +#define RING_HWSTAM(base) _MMIO((base) + 0x98) +#define RING_MI_MODE(base) _MMIO((base) + 0x9c) +#define RING_NOPID(base) _MMIO((base) + 0x94) + +#define RING_IMR(base) _MMIO((base) + 0xa8) +#define RING_MAX_NONPRIV_SLOTS 12 + +#define RING_EIR(base) _MMIO((base) + 0xb0) +#define RING_EMR(base) _MMIO((base) + 0xb4) +#define RING_ESR(base) _MMIO((base) + 0xb8) +#define RING_BBADDR(base) _MMIO((base) + 0x140) +#define RING_BBADDR_UDW(base) _MMIO((base) + 0x168) +#define RING_EXECLIST_STATUS_LO(base) _MMIO((base) + 0x234) +#define RING_EXECLIST_STATUS_HI(base) _MMIO((base) + 0x234 + 4) + +#define RING_CONTEXT_CONTROL(base) _MMIO((base) + 0x244) +#define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH REG_BIT(3) +#define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT REG_BIT(0) + +#define RING_MODE_GEN7(base) _MMIO((base) + 0x29c) +#define GEN11_GFX_DISABLE_LEGACY_MODE (1 << 3) + +#define RING_TIMESTAMP(base) _MMIO((base) + 0x358) + +#define RING_TIMESTAMP_UDW(base) _MMIO((base) + 0x358 + 4) +#define RING_VALID_MASK 0x00000001 +#define RING_VALID 0x00000001 +#define STOP_RING REG_BIT(8) +#define TAIL_ADDR 0x001FFFF8 + +#define RING_CTX_TIMESTAMP(base) _MMIO((base) + 0x3a8) + +#define RING_FORCE_TO_NONPRIV(base, i) _MMIO(((base) + 0x4d0) + (i) * 4) +#define RING_FORCE_TO_NONPRIV_DENY REG_BIT(30) +#define RING_FORCE_TO_NONPRIV_ADDRESS_MASK REG_GENMASK(25, 2) +#define RING_FORCE_TO_NONPRIV_ACCESS_RW (0 << 28) +#define RING_FORCE_TO_NONPRIV_ACCESS_RD (1 << 28) +#define RING_FORCE_TO_NONPRIV_ACCESS_WR (2 << 28) +#define RING_FORCE_TO_NONPRIV_ACCESS_INVALID (3 << 28) +#define RING_FORCE_TO_NONPRIV_ACCESS_MASK (3 << 28) +#define RING_FORCE_TO_NONPRIV_RANGE_1 (0 << 0) +#define RING_FORCE_TO_NONPRIV_RANGE_4 (1 << 0) +#define RING_FORCE_TO_NONPRIV_RANGE_16 (2 << 0) +#define RING_FORCE_TO_NONPRIV_RANGE_64 (3 << 0) +#define RING_FORCE_TO_NONPRIV_RANGE_MASK (3 << 0) +#define RING_FORCE_TO_NONPRIV_MASK_VALID (RING_FORCE_TO_NONPRIV_RANGE_MASK | \ + RING_FORCE_TO_NONPRIV_ACCESS_MASK | \ + RING_FORCE_TO_NONPRIV_DENY) +#define RING_MAX_NONPRIV_SLOTS 12 + +#define RING_EXECLIST_SQ_CONTENTS(base) _MMIO((base) + 0x510) + +#define RING_EXECLIST_CONTROL(base) _MMIO((base) + 0x550) +#define EL_CTRL_LOAD REG_BIT(0) + +#define VDBOX_CGCTL3F10(base) _MMIO((base) + 0x3f10) +#define IECPUNIT_CLKGATE_DIS REG_BIT(22) + +#define VDBOX_CGCTL3F18(base) _MMIO((base) + 0x3f18) +#define ALNUNIT_CLKGATE_DIS REG_BIT(13) + +#endif diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index 8ff1f36f89f7..e700737a213d 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -7,6 +7,7 @@ #include +#include "regs/xe_engine_regs.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_engine.h" @@ -19,7 +20,6 @@ #include "xe_ring_ops_types.h" #include "xe_sched_job.h" -#include "gt/intel_engine_regs.h" #include "gt/intel_gpu_commands.h" #include "gt/intel_gt_regs.h" #include "gt/intel_lrc_reg.h" diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index a4e947f0c557..6cd07f51b828 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -7,6 +7,7 @@ #include +#include "regs/xe_engine_regs.h" #include "xe_bo.h" #include "xe_gt.h" #include "xe_guc.h" @@ -17,9 +18,9 @@ #include "xe_mmio.h" #include "xe_platform_types.h" -#include "gt/intel_engine_regs.h" #include "gt/intel_gt_regs.h" + /* Slack of a few additional entries per engine */ #define ADS_REGSET_EXTRA_MAX 8 diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 074133d44009..f7c5f709b088 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -7,6 +7,7 @@ #include +#include "regs/xe_engine_regs.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_execlist.h" @@ -21,7 +22,6 @@ #include "xe_sched_job.h" #include "xe_wa.h" -#include "gt/intel_engine_regs.h" #include "gt/intel_gt_regs.h" #include "i915_reg.h" diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index ed6fcf7620e1..887d9189fcec 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -5,6 +5,7 @@ #include "xe_lrc.h" +#include "regs/xe_engine_regs.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_engine_types.h" @@ -13,7 +14,6 @@ #include "xe_map.h" #include "xe_vm.h" -#include "gt/intel_engine_regs.h" #include "gt/intel_gpu_commands.h" #include "gt/intel_gt_regs.h" #include "gt/intel_lrc_reg.h" diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index a117437f8482..07db7912a931 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -8,13 +8,13 @@ #include #include +#include "regs/xe_engine_regs.h" #include "xe_device.h" #include "xe_gt.h" #include "xe_gt_mcr.h" #include "xe_macros.h" #include "xe_module.h" -#include "gt/intel_engine_regs.h" #include "gt/intel_gt_regs.h" #include "i915_reg.h" diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c index 4d12f8a3043f..359de4724bb5 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr.c +++ b/drivers/gpu/drm/xe/xe_reg_sr.c @@ -12,6 +12,7 @@ #include #include +#include "regs/xe_engine_regs.h" #include "xe_device_types.h" #include "xe_force_wake.h" #include "xe_gt.h" @@ -20,7 +21,6 @@ #include "xe_mmio.h" #include "xe_rtp_types.h" -#include "gt/intel_engine_regs.h" #include "gt/intel_gt_regs.h" #define XE_REG_SR_GROW_STEP_DEFAULT 16 diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c index f6ce801215e7..5aa73c1c4097 100644 --- a/drivers/gpu/drm/xe/xe_reg_whitelist.c +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c @@ -5,11 +5,11 @@ #include "xe_reg_whitelist.h" +#include "regs/xe_engine_regs.h" #include "xe_gt_types.h" #include "xe_platform_types.h" #include "xe_rtp.h" -#include "gt/intel_engine_regs.h" #include "gt/intel_gt_regs.h" #undef _MMIO diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 9d2e4555091c..92065341c001 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -7,6 +7,7 @@ #include +#include "regs/xe_engine_regs.h" #include "xe_device_types.h" #include "xe_force_wake.h" #include "xe_gt.h" @@ -16,7 +17,6 @@ #include "xe_rtp.h" #include "xe_step.h" -#include "gt/intel_engine_regs.h" #include "gt/intel_gt_regs.h" #include "i915_reg.h" -- cgit v1.2.3 From 226bfec858c93797dbd3d47d1418ed68684fa752 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 24 Feb 2023 16:15:40 -0800 Subject: drm/xe: Remove dependency on intel_gt_regs.h Create regs/xe_gt_regs.h file with all the registers and bit definitions used by the xe driver. Eventually the registers may be defined in a different way and since xe doesn't supported below gen12, the number of registers touched is much smaller, so create a new header. The definitions themselves are direct copy from the gt/intel_gt_regs.h file, just sorting the registers by address. Cleaning those up and adhering to a common coding style is left for later. v2: Make the change to MCR_REG location in a separate patch to go through the i915 branch (Matt Roper / Rodrigo) Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 288 ++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_execlist.c | 2 +- drivers/gpu/drm/xe/xe_force_wake.c | 3 +- drivers/gpu/drm/xe/xe_ggtt.c | 2 +- drivers/gpu/drm/xe/xe_gt.c | 3 +- drivers/gpu/drm/xe/xe_gt_clock.c | 2 +- drivers/gpu/drm/xe/xe_gt_mcr.c | 3 +- drivers/gpu/drm/xe/xe_guc.c | 2 +- drivers/gpu/drm/xe/xe_guc_ads.c | 4 +- drivers/gpu/drm/xe/xe_guc_pc.c | 2 +- drivers/gpu/drm/xe/xe_hw_engine.c | 2 +- drivers/gpu/drm/xe/xe_irq.c | 2 +- drivers/gpu/drm/xe/xe_lrc.c | 2 +- drivers/gpu/drm/xe/xe_mmio.c | 2 +- drivers/gpu/drm/xe/xe_mocs.c | 3 +- drivers/gpu/drm/xe/xe_reg_sr.c | 3 +- drivers/gpu/drm/xe/xe_reg_whitelist.c | 3 +- drivers/gpu/drm/xe/xe_ring_ops.c | 2 +- drivers/gpu/drm/xe/xe_tuning.c | 3 +- drivers/gpu/drm/xe/xe_wa.c | 2 +- 20 files changed, 307 insertions(+), 28 deletions(-) create mode 100644 drivers/gpu/drm/xe/regs/xe_gt_regs.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h new file mode 100644 index 000000000000..3eb92c975085 --- /dev/null +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -0,0 +1,288 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GT_REGS_H_ +#define _XE_GT_REGS_H_ + +#include "i915_reg_defs.h" + +/* RPM unit config (Gen8+) */ +#define RPM_CONFIG0 _MMIO(0xd00) +#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT 3 +#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK (0x7 << GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT) +#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ 0 +#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ 1 +#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ 2 +#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ 3 +#define GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT 1 +#define GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK (0x3 << GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT) + +#define FORCEWAKE_ACK_MEDIA_VDBOX_GEN11(n) _MMIO(0xd50 + (n) * 4) +#define FORCEWAKE_ACK_MEDIA_VEBOX_GEN11(n) _MMIO(0xd70 + (n) * 4) +#define FORCEWAKE_ACK_RENDER_GEN9 _MMIO(0xd84) +#define FORCEWAKE_ACK_GT_MTL _MMIO(0xdfc) + +#define GEN9_LNCFCMOCS(i) _MMIO(0xb020 + (i) * 4) /* L3 Cache Control */ +#define LNCFCMOCS_REG_COUNT 32 + +#define MCFG_MCR_SELECTOR _MMIO(0xfd0) +#define MTL_MCR_SELECTOR _MMIO(0xfd4) +#define SF_MCR_SELECTOR _MMIO(0xfd8) +#define GEN8_MCR_SELECTOR _MMIO(0xfdc) +#define GAM_MCR_SELECTOR _MMIO(0xfe0) +#define GEN11_MCR_MULTICAST REG_BIT(31) +#define GEN11_MCR_SLICE(slice) (((slice) & 0xf) << 27) +#define GEN11_MCR_SLICE_MASK GEN11_MCR_SLICE(0xf) +#define GEN11_MCR_SUBSLICE(subslice) (((subslice) & 0x7) << 24) +#define GEN11_MCR_SUBSLICE_MASK GEN11_MCR_SUBSLICE(0x7) +#define MTL_MCR_GROUPID REG_GENMASK(11, 8) +#define MTL_MCR_INSTANCEID REG_GENMASK(3, 0) + +#define GEN7_FF_SLICE_CS_CHICKEN1 _MMIO(0x20e0) +#define GEN9_FFSC_PERCTX_PREEMPT_CTRL (1 << 14) + +#define GEN9_CS_DEBUG_MODE1 _MMIO(0x20ec) +#define FF_DOP_CLOCK_GATE_DISABLE REG_BIT(1) + +#define PS_INVOCATION_COUNT _MMIO(0x2348) + +#define GEN8_CS_CHICKEN1 _MMIO(0x2580) +#define GEN9_PREEMPT_3D_OBJECT_LEVEL (1 << 0) +#define GEN9_PREEMPT_GPGPU_LEVEL(hi, lo) (((hi) << 2) | ((lo) << 1)) +#define GEN9_PREEMPT_GPGPU_MID_THREAD_LEVEL GEN9_PREEMPT_GPGPU_LEVEL(0, 0) +#define GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL GEN9_PREEMPT_GPGPU_LEVEL(0, 1) +#define GEN9_PREEMPT_GPGPU_COMMAND_LEVEL GEN9_PREEMPT_GPGPU_LEVEL(1, 0) +#define GEN9_PREEMPT_GPGPU_LEVEL_MASK GEN9_PREEMPT_GPGPU_LEVEL(1, 1) + +#define GEN12_GLOBAL_MOCS(i) _MMIO(0x4000 + (i) * 4) /* Global MOCS regs */ +#define GEN12_CCS_AUX_INV _MMIO(0x4208) + +#define GEN12_VD0_AUX_INV _MMIO(0x4218) +#define GEN12_VE0_AUX_INV _MMIO(0x4238) + +#define GEN12_VE1_AUX_INV _MMIO(0x42b8) +#define AUX_INV REG_BIT(0) + +#define GEN12_PAT_INDEX(index) _MMIO(0x4800 + (index) * 4) +#define XEHP_TILE0_ADDR_RANGE MCR_REG(0x4900) +#define XEHP_FLAT_CCS_BASE_ADDR MCR_REG(0x4910) + +#define GEN12_FF_MODE2 _MMIO(0x6604) +#define XEHP_FF_MODE2 MCR_REG(0x6604) +#define FF_MODE2_GS_TIMER_MASK REG_GENMASK(31, 24) +#define FF_MODE2_GS_TIMER_224 REG_FIELD_PREP(FF_MODE2_GS_TIMER_MASK, 224) +#define FF_MODE2_TDS_TIMER_MASK REG_GENMASK(23, 16) +#define FF_MODE2_TDS_TIMER_128 REG_FIELD_PREP(FF_MODE2_TDS_TIMER_MASK, 4) + +#define HIZ_CHICKEN _MMIO(0x7018) +#define DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE REG_BIT(14) + +/* GEN7 chicken */ +#define GEN7_COMMON_SLICE_CHICKEN1 _MMIO(0x7010) + +#define GEN11_COMMON_SLICE_CHICKEN3 _MMIO(0x7304) +#define XEHP_COMMON_SLICE_CHICKEN3 MCR_REG(0x7304) +#define DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN REG_BIT(12) +#define XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE REG_BIT(12) +#define GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC REG_BIT(11) +#define GEN12_DISABLE_CPS_AWARE_COLOR_PIPE REG_BIT(9) + +#define XEHP_SQCM MCR_REG(0x8724) +#define EN_32B_ACCESS REG_BIT(30) + +#define GEN10_MIRROR_FUSE3 _MMIO(0x9118) +#define GEN10_L3BANK_PAIR_COUNT 4 +#define GEN10_L3BANK_MASK 0x0F +/* on Xe_HP the same fuses indicates mslices instead of L3 banks */ +#define GEN12_MAX_MSLICES 4 +#define GEN12_MEML3_EN_MASK 0x0F + +/* Fuse readout registers for GT */ +#define XEHP_FUSE4 _MMIO(0x9114) +#define GT_L3_EXC_MASK REG_GENMASK(6, 4) + +#define GEN11_GT_VEBOX_VDBOX_DISABLE _MMIO(0x9140) +#define GEN11_GT_VDBOX_DISABLE_MASK 0xff +#define GEN11_GT_VEBOX_DISABLE_SHIFT 16 +#define GEN11_GT_VEBOX_DISABLE_MASK (0x0f << GEN11_GT_VEBOX_DISABLE_SHIFT) + +#define GEN6_GDRST _MMIO(0x941c) +#define GEN11_GRDOM_GUC REG_BIT(3) +#define GEN6_GRDOM_FULL (1 << 0) +#define GEN11_GRDOM_FULL GEN6_GRDOM_FULL + +#define GEN7_MISCCPCTL _MMIO(0x9424) +#define GEN7_DOP_CLOCK_GATE_ENABLE (1 << 0) +#define GEN12_DOP_CLOCK_GATE_RENDER_ENABLE REG_BIT(1) + +#define UNSLCGCTL9430 _MMIO(0x9430) +#define MSQDUNIT_CLKGATE_DIS REG_BIT(3) + +#define UNSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9434) +#define VFUNIT_CLKGATE_DIS REG_BIT(20) +#define TSGUNIT_CLKGATE_DIS REG_BIT(17) /* XEHPSDV */ +#define CG3DDISCFEG_CLKGATE_DIS REG_BIT(17) /* DG2 */ +#define GAMEDIA_CLKGATE_DIS REG_BIT(11) +#define HSUNIT_CLKGATE_DIS REG_BIT(8) +#define VSUNIT_CLKGATE_DIS REG_BIT(3) + +#define UNSLCGCTL9440 _MMIO(0x9440) +#define GAMTLBOACS_CLKGATE_DIS REG_BIT(28) +#define GAMTLBVDBOX5_CLKGATE_DIS REG_BIT(27) +#define GAMTLBVDBOX6_CLKGATE_DIS REG_BIT(26) +#define GAMTLBVDBOX3_CLKGATE_DIS REG_BIT(24) +#define GAMTLBVDBOX4_CLKGATE_DIS REG_BIT(23) +#define GAMTLBVDBOX7_CLKGATE_DIS REG_BIT(22) +#define GAMTLBVDBOX2_CLKGATE_DIS REG_BIT(21) +#define GAMTLBVDBOX0_CLKGATE_DIS REG_BIT(17) +#define GAMTLBKCR_CLKGATE_DIS REG_BIT(16) +#define GAMTLBGUC_CLKGATE_DIS REG_BIT(15) +#define GAMTLBBLT_CLKGATE_DIS REG_BIT(14) +#define GAMTLBVDBOX1_CLKGATE_DIS REG_BIT(6) + +#define UNSLCGCTL9444 _MMIO(0x9444) +#define GAMTLBGFXA0_CLKGATE_DIS REG_BIT(30) +#define GAMTLBGFXA1_CLKGATE_DIS REG_BIT(29) +#define GAMTLBCOMPA0_CLKGATE_DIS REG_BIT(28) +#define GAMTLBCOMPA1_CLKGATE_DIS REG_BIT(27) +#define GAMTLBCOMPB0_CLKGATE_DIS REG_BIT(26) +#define GAMTLBCOMPB1_CLKGATE_DIS REG_BIT(25) +#define GAMTLBCOMPC0_CLKGATE_DIS REG_BIT(24) +#define GAMTLBCOMPC1_CLKGATE_DIS REG_BIT(23) +#define GAMTLBCOMPD0_CLKGATE_DIS REG_BIT(22) +#define GAMTLBCOMPD1_CLKGATE_DIS REG_BIT(21) +#define GAMTLBMERT_CLKGATE_DIS REG_BIT(20) +#define GAMTLBVEBOX3_CLKGATE_DIS REG_BIT(19) +#define GAMTLBVEBOX2_CLKGATE_DIS REG_BIT(18) +#define GAMTLBVEBOX1_CLKGATE_DIS REG_BIT(17) +#define GAMTLBVEBOX0_CLKGATE_DIS REG_BIT(16) +#define LTCDD_CLKGATE_DIS REG_BIT(10) + +#define GEN11_SLICE_UNIT_LEVEL_CLKGATE _MMIO(0x94d4) +#define XEHP_SLICE_UNIT_LEVEL_CLKGATE MCR_REG(0x94d4) +#define SARBUNIT_CLKGATE_DIS (1 << 5) +#define RCCUNIT_CLKGATE_DIS (1 << 7) +#define MSCUNIT_CLKGATE_DIS (1 << 10) +#define NODEDSS_CLKGATE_DIS REG_BIT(12) +#define L3_CLKGATE_DIS REG_BIT(16) +#define L3_CR2X_CLKGATE_DIS REG_BIT(17) + +#define UNSLICE_UNIT_LEVEL_CLKGATE2 _MMIO(0x94e4) +#define VSUNIT_CLKGATE_DIS_TGL REG_BIT(19) +#define PSDUNIT_CLKGATE_DIS REG_BIT(5) + +#define GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE MCR_REG(0x9524) +#define DSS_ROUTER_CLKGATE_DIS REG_BIT(28) +#define GWUNIT_CLKGATE_DIS REG_BIT(16) + +#define SUBSLICE_UNIT_LEVEL_CLKGATE2 MCR_REG(0x9528) +#define CPSSUNIT_CLKGATE_DIS REG_BIT(9) + +#define SSMCGCTL9530 MCR_REG(0x9530) +#define RTFUNIT_CLKGATE_DIS REG_BIT(18) + +#define GEN10_DFR_RATIO_EN_AND_CHICKEN MCR_REG(0x9550) +#define DFR_DISABLE (1 << 9) + +#define GEN6_RPNSWREQ _MMIO(0xa008) +#define GEN6_RC_CONTROL _MMIO(0xa090) +#define GEN6_RC_STATE _MMIO(0xa094) + +#define GEN6_PMINTRMSK _MMIO(0xa168) +#define GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC (1 << 31) +#define ARAT_EXPIRED_INTRMSK (1 << 9) + +#define FORCEWAKE_GT_GEN9 _MMIO(0xa188) + +#define GEN9_PG_ENABLE _MMIO(0xa210) + +/* GPM unit config (Gen9+) */ +#define CTC_MODE _MMIO(0xa26c) +#define CTC_SOURCE_PARAMETER_MASK 1 +#define CTC_SOURCE_CRYSTAL_CLOCK 0 +#define CTC_SOURCE_DIVIDE_LOGIC 1 +#define CTC_SHIFT_PARAMETER_SHIFT 1 +#define CTC_SHIFT_PARAMETER_MASK (0x3 << CTC_SHIFT_PARAMETER_SHIFT) + +#define FORCEWAKE_RENDER_GEN9 _MMIO(0xa278) +#define FORCEWAKE_MEDIA_VDBOX_GEN11(n) _MMIO(0xa540 + (n) * 4) +#define FORCEWAKE_MEDIA_VEBOX_GEN11(n) _MMIO(0xa560 + (n) * 4) + +#define GEN10_SAMPLER_MODE MCR_REG(0xe18c) +#define ENABLE_SMALLPL REG_BIT(15) +#define SC_DISABLE_POWER_OPTIMIZATION_EBB REG_BIT(9) +#define GEN11_SAMPLER_ENABLE_HEADLESS_MSG REG_BIT(5) + +#define GEN9_ROW_CHICKEN4 MCR_REG(0xe48c) +#define GEN12_DISABLE_GRF_CLEAR REG_BIT(13) +#define XEHP_DIS_BBL_SYSPIPE REG_BIT(11) +#define GEN12_DISABLE_TDL_PUSH REG_BIT(9) +#define GEN11_DIS_PICK_2ND_EU REG_BIT(7) +#define GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX REG_BIT(4) +#define THREAD_EX_ARB_MODE REG_GENMASK(3, 2) +#define THREAD_EX_ARB_MODE_RR_AFTER_DEP REG_FIELD_PREP(THREAD_EX_ARB_MODE, 0x2) + +#define GEN7_ROW_CHICKEN2 _MMIO(0xe4f4) +#define GEN12_DISABLE_READ_SUPPRESSION REG_BIT(15) +#define GEN12_DISABLE_EARLY_READ REG_BIT(14) +#define GEN12_ENABLE_LARGE_GRF_MODE REG_BIT(12) +#define GEN12_PUSH_CONST_DEREF_HOLD_DIS REG_BIT(8) +#define GEN12_DISABLE_DOP_GATING REG_BIT(0) + +#define SARB_CHICKEN1 MCR_REG(0xe90c) +#define COMP_CKN_IN REG_GENMASK(30, 29) + +#define GEN12_RCU_MODE _MMIO(0x14800) +#define GEN12_RCU_MODE_CCS_ENABLE REG_BIT(0) + +#define FORCEWAKE_ACK_GT_GEN9 _MMIO(0x130044) +#define FORCEWAKE_KERNEL BIT(0) +#define FORCEWAKE_USER BIT(1) +#define FORCEWAKE_KERNEL_FALLBACK BIT(15) + +#define GEN6_GT_CORE_STATUS _MMIO(0x138060) +#define GEN6_RC0 0 +#define GEN6_RC6 3 + +#define GEN6_GT_GFX_RC6_LOCKED _MMIO(0x138104) +#define GEN6_GT_GFX_RC6 _MMIO(0x138108) + +#define GFX_FLSH_CNTL_GEN6 _MMIO(0x101008) +#define GFX_FLSH_CNTL_EN (1 << 0) + +#define GEN11_GT_INTR_DW(x) _MMIO(0x190018 + ((x) * 4)) + +#define GEN11_GUC_SG_INTR_ENABLE _MMIO(0x190038) +#define ENGINE1_MASK REG_GENMASK(31, 16) +#define ENGINE0_MASK REG_GENMASK(15, 0) + +#define GEN11_GPM_WGBOXPERF_INTR_ENABLE _MMIO(0x19003c) + +#define GEN11_INTR_IDENTITY_REG(x) _MMIO(0x190060 + ((x) * 4)) +#define GEN11_INTR_DATA_VALID (1 << 31) +#define GEN11_INTR_ENGINE_INSTANCE(x) (((x) & GENMASK(25, 20)) >> 20) +#define GEN11_INTR_ENGINE_CLASS(x) (((x) & GENMASK(18, 16)) >> 16) +#define GEN11_INTR_ENGINE_INTR(x) ((x) & 0xffff) +#define OTHER_GUC_INSTANCE 0 + +#define GEN11_RENDER_COPY_INTR_ENABLE _MMIO(0x190030) +#define GEN11_VCS_VECS_INTR_ENABLE _MMIO(0x190034) +#define GEN12_CCS_RSVD_INTR_ENABLE _MMIO(0x190048) +#define GEN11_IIR_REG_SELECTOR(x) _MMIO(0x190070 + ((x) * 4)) +#define GEN11_RCS0_RSVD_INTR_MASK _MMIO(0x190090) +#define GEN11_BCS_RSVD_INTR_MASK _MMIO(0x1900a0) +#define GEN11_VCS0_VCS1_INTR_MASK _MMIO(0x1900a8) +#define GEN11_VCS2_VCS3_INTR_MASK _MMIO(0x1900ac) +#define GEN11_VECS0_VECS1_INTR_MASK _MMIO(0x1900d0) +#define GEN11_GUC_SG_INTR_MASK _MMIO(0x1900e8) +#define GEN11_GPM_WGBOXPERF_INTR_MASK _MMIO(0x1900ec) +#define GEN12_CCS0_CCS1_INTR_MASK _MMIO(0x190100) +#define GEN12_CCS2_CCS3_INTR_MASK _MMIO(0x190104) +#define XEHPC_BCS1_BCS2_INTR_MASK _MMIO(0x190110) +#define XEHPC_BCS3_BCS4_INTR_MASK _MMIO(0x190114) +#define XEHPC_BCS5_BCS6_INTR_MASK _MMIO(0x190118) +#define XEHPC_BCS7_BCS8_INTR_MASK _MMIO(0x19011c) + +#endif diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index e700737a213d..fe20c6128134 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -8,6 +8,7 @@ #include #include "regs/xe_engine_regs.h" +#include "regs/xe_gt_regs.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_engine.h" @@ -21,7 +22,6 @@ #include "xe_sched_job.h" #include "gt/intel_gpu_commands.h" -#include "gt/intel_gt_regs.h" #include "gt/intel_lrc_reg.h" #include "i915_reg.h" diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c index 1ead587cd5c9..77a210acfac3 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.c +++ b/drivers/gpu/drm/xe/xe_force_wake.c @@ -7,11 +7,10 @@ #include +#include "regs/xe_gt_regs.h" #include "xe_gt.h" #include "xe_mmio.h" -#include "gt/intel_gt_regs.h" - #define XE_FORCE_WAKE_ACK_TIMEOUT_MS 50 static struct xe_gt * diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 3730bbeb26b2..3bf437321149 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -10,6 +10,7 @@ #include #include +#include "regs/xe_gt_regs.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_gt.h" @@ -18,7 +19,6 @@ #include "xe_mmio.h" #include "xe_wopcm.h" -#include "gt/intel_gt_regs.h" #include "i915_reg.h" /* FIXME: Common file, preferably auto-gen */ diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 74e9445befe4..343370b44506 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -9,6 +9,7 @@ #include +#include "regs/xe_gt_regs.h" #include "xe_bb.h" #include "xe_bo.h" #include "xe_device.h" @@ -41,8 +42,6 @@ #include "xe_wa.h" #include "xe_wopcm.h" -#include "gt/intel_gt_regs.h" - struct xe_gt *xe_find_full_gt(struct xe_gt *gt) { struct xe_gt *search; diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c index 2d9f2aa42bad..fd0ca33925cd 100644 --- a/drivers/gpu/drm/xe/xe_gt_clock.c +++ b/drivers/gpu/drm/xe/xe_gt_clock.c @@ -5,12 +5,12 @@ #include "xe_gt_clock.h" +#include "regs/xe_gt_regs.h" #include "xe_device.h" #include "xe_gt.h" #include "xe_macros.h" #include "xe_mmio.h" -#include "gt/intel_gt_regs.h" #include "i915_reg.h" static u32 read_reference_ts_freq(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index 8fa59988d08e..10eff02cc7db 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -5,13 +5,12 @@ #include "xe_gt_mcr.h" +#include "regs/xe_gt_regs.h" #include "xe_gt.h" #include "xe_gt_topology.h" #include "xe_gt_types.h" #include "xe_mmio.h" -#include "gt/intel_gt_regs.h" - /** * DOC: GT Multicast/Replicated (MCR) Register Support * diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index db3d8c947603..661effa9830f 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -5,6 +5,7 @@ #include "xe_guc.h" +#include "regs/xe_gt_regs.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_force_wake.h" @@ -21,7 +22,6 @@ #include "xe_uc_fw.h" #include "xe_wopcm.h" -#include "gt/intel_gt_regs.h" #include "i915_reg_defs.h" /* TODO: move to common file */ diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 6cd07f51b828..49725093fb47 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -8,6 +8,7 @@ #include #include "regs/xe_engine_regs.h" +#include "regs/xe_gt_regs.h" #include "xe_bo.h" #include "xe_gt.h" #include "xe_guc.h" @@ -18,9 +19,6 @@ #include "xe_mmio.h" #include "xe_platform_types.h" -#include "gt/intel_gt_regs.h" - - /* Slack of a few additional entries per engine */ #define ADS_REGSET_EXTRA_MAX 8 diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 28b86e8f3f6e..f983f47cefb6 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -9,6 +9,7 @@ #include +#include "regs/xe_gt_regs.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_gt.h" @@ -31,7 +32,6 @@ #define GEN10_FREQ_INFO_REC _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5ef0) #define RPE_MASK REG_GENMASK(15, 8) -#include "gt/intel_gt_regs.h" /* For GEN6_RPNSWREQ.reg to be merged when the definition moves to Xe */ #define REQ_RATIO_MASK REG_GENMASK(31, 23) diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index f7c5f709b088..5e7f21b319bb 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -8,6 +8,7 @@ #include #include "regs/xe_engine_regs.h" +#include "regs/xe_gt_regs.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_execlist.h" @@ -22,7 +23,6 @@ #include "xe_sched_job.h" #include "xe_wa.h" -#include "gt/intel_gt_regs.h" #include "i915_reg.h" #define MAX_MMIO_BASES 3 diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 46431f0e4af8..04b3801fc0a8 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -9,6 +9,7 @@ #include +#include "regs/xe_gt_regs.h" #include "xe_device.h" #include "xe_drv.h" #include "xe_gt.h" @@ -16,7 +17,6 @@ #include "xe_hw_engine.h" #include "xe_mmio.h" -#include "gt/intel_gt_regs.h" #include "i915_reg.h" static void gen3_assert_iir_is_zero(struct xe_gt *gt, i915_reg_t reg) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 887d9189fcec..bf12f71fbe72 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -6,6 +6,7 @@ #include "xe_lrc.h" #include "regs/xe_engine_regs.h" +#include "regs/xe_gt_regs.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_engine_types.h" @@ -15,7 +16,6 @@ #include "xe_vm.h" #include "gt/intel_gpu_commands.h" -#include "gt/intel_gt_regs.h" #include "gt/intel_lrc_reg.h" #include "i915_reg.h" diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 07db7912a931..5e6ca0d2076a 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -9,13 +9,13 @@ #include #include "regs/xe_engine_regs.h" +#include "regs/xe_gt_regs.h" #include "xe_device.h" #include "xe_gt.h" #include "xe_gt_mcr.h" #include "xe_macros.h" #include "xe_module.h" -#include "gt/intel_gt_regs.h" #include "i915_reg.h" #define XEHP_MTCFG_ADDR _MMIO(0x101800) diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index d91054c78702..7ff81041d5ce 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -5,6 +5,7 @@ #include "xe_mocs.h" +#include "regs/xe_gt_regs.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_engine.h" @@ -13,8 +14,6 @@ #include "xe_platform_types.h" #include "xe_step_types.h" -#include "gt/intel_gt_regs.h" - #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) #define mocs_dbg drm_dbg #else diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c index 359de4724bb5..d67516469710 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr.c +++ b/drivers/gpu/drm/xe/xe_reg_sr.c @@ -13,6 +13,7 @@ #include #include "regs/xe_engine_regs.h" +#include "regs/xe_gt_regs.h" #include "xe_device_types.h" #include "xe_force_wake.h" #include "xe_gt.h" @@ -21,8 +22,6 @@ #include "xe_mmio.h" #include "xe_rtp_types.h" -#include "gt/intel_gt_regs.h" - #define XE_REG_SR_GROW_STEP_DEFAULT 16 static void reg_sr_fini(struct drm_device *drm, void *arg) diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c index 5aa73c1c4097..2dd10e62718f 100644 --- a/drivers/gpu/drm/xe/xe_reg_whitelist.c +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c @@ -6,12 +6,11 @@ #include "xe_reg_whitelist.h" #include "regs/xe_engine_regs.h" +#include "regs/xe_gt_regs.h" #include "xe_gt_types.h" #include "xe_platform_types.h" #include "xe_rtp.h" -#include "gt/intel_gt_regs.h" - #undef _MMIO #undef MCR_REG #define _MMIO(x) _XE_RTP_REG(x) diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index 2e118d37b88c..1b633222fda6 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -5,6 +5,7 @@ #include "xe_ring_ops.h" +#include "regs/xe_gt_regs.h" #include "xe_engine_types.h" #include "xe_gt.h" #include "xe_lrc.h" @@ -13,7 +14,6 @@ #include "xe_vm_types.h" #include "gt/intel_gpu_commands.h" -#include "gt/intel_gt_regs.h" #include "gt/intel_lrc_reg.h" #include "i915_reg.h" diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index a3872f0330cb..624b257ecfbc 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -5,12 +5,11 @@ #include "xe_tuning.h" +#include "regs/xe_gt_regs.h" #include "xe_gt_types.h" #include "xe_platform_types.h" #include "xe_rtp.h" -#include "gt/intel_gt_regs.h" - #undef _MMIO #undef MCR_REG #define _MMIO(x) _XE_RTP_REG(x) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 92065341c001..155cfd1dcc50 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -8,6 +8,7 @@ #include #include "regs/xe_engine_regs.h" +#include "regs/xe_gt_regs.h" #include "xe_device_types.h" #include "xe_force_wake.h" #include "xe_gt.h" @@ -17,7 +18,6 @@ #include "xe_rtp.h" #include "xe_step.h" -#include "gt/intel_gt_regs.h" #include "i915_reg.h" /** -- cgit v1.2.3 From 0992884d09cc1c91e9c3310a9204eb080db37714 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 24 Feb 2023 16:15:41 -0800 Subject: drm/xe: Remove dependency on intel_lrc_reg.h Create regs/xe_lrc_layout.h file with all the offsets used by the xe driver. Eventually the xe driver may use a different way to define them since it doesn't supported below gen12. v2: Rename file to intel_lrc_layout.h since it's not really about registers (Matt Roper) Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_lrc_layout.h | 17 +++++++++++++++++ drivers/gpu/drm/xe/xe_execlist.c | 2 +- drivers/gpu/drm/xe/xe_guc_submit.c | 3 +-- drivers/gpu/drm/xe/xe_lrc.c | 2 +- drivers/gpu/drm/xe/xe_ring_ops.c | 2 +- 5 files changed, 21 insertions(+), 5 deletions(-) create mode 100644 drivers/gpu/drm/xe/regs/xe_lrc_layout.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h new file mode 100644 index 000000000000..4be81abc86ad --- /dev/null +++ b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_LRC_LAYOUT_H_ +#define _XE_LRC_LAYOUT_H_ + +#define CTX_CONTEXT_CONTROL (0x02 + 1) +#define CTX_RING_HEAD (0x04 + 1) +#define CTX_RING_TAIL (0x06 + 1) +#define CTX_RING_START (0x08 + 1) +#define CTX_RING_CTL (0x0a + 1) +#define CTX_PDP0_UDW (0x30 + 1) +#define CTX_PDP0_LDW (0x32 + 1) + +#endif diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index fe20c6128134..d788a6e894a6 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -9,6 +9,7 @@ #include "regs/xe_engine_regs.h" #include "regs/xe_gt_regs.h" +#include "regs/xe_lrc_layout.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_engine.h" @@ -22,7 +23,6 @@ #include "xe_sched_job.h" #include "gt/intel_gpu_commands.h" -#include "gt/intel_lrc_reg.h" #include "i915_reg.h" #define XE_EXECLIST_HANG_LIMIT 1 diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 6469d3cd3beb..aa21f2bb5cba 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -13,6 +13,7 @@ #include +#include "regs/xe_lrc_layout.h" #include "xe_device.h" #include "xe_engine.h" #include "xe_force_wake.h" @@ -32,8 +33,6 @@ #include "xe_trace.h" #include "xe_vm.h" -#include "gt/intel_lrc_reg.h" - static struct xe_gt * guc_to_gt(struct xe_guc *guc) { diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index bf12f71fbe72..5baa3cf53852 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -7,6 +7,7 @@ #include "regs/xe_engine_regs.h" #include "regs/xe_gt_regs.h" +#include "regs/xe_lrc_layout.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_engine_types.h" @@ -16,7 +17,6 @@ #include "xe_vm.h" #include "gt/intel_gpu_commands.h" -#include "gt/intel_lrc_reg.h" #include "i915_reg.h" #define GEN8_CTX_VALID (1 << 0) diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index 1b633222fda6..6275f8c34878 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -6,6 +6,7 @@ #include "xe_ring_ops.h" #include "regs/xe_gt_regs.h" +#include "regs/xe_lrc_layout.h" #include "xe_engine_types.h" #include "xe_gt.h" #include "xe_lrc.h" @@ -14,7 +15,6 @@ #include "xe_vm_types.h" #include "gt/intel_gpu_commands.h" -#include "gt/intel_lrc_reg.h" #include "i915_reg.h" static u32 preparser_disable(bool state) -- cgit v1.2.3 From 63955b3bfa0b69fd86b9e827e0f14f3fa4508826 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 24 Feb 2023 16:15:42 -0800 Subject: drm/xe: Remove dependency on intel_gpu_commands.h Copy the macros used by xe in intel_gpu_commands.h to regs/xe_gpu_commands.h. PIPE_CONTROL_3D_ENGINE_FLAGS and PIPE_CONTROL_3D_ARCH_FLAGS were already defined in drivers/gpu/drm/xe/xe_ring_ops.c and only used there. So let that define to be used instead of also adding to the new header. v2: Let PIPE_CONTROL_3D_ENGINE_FLAGS/PIPE_CONTROL_3D_ARCH_FLAGS in the only .c that uses it instead of redefining (Matt Roper) Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gpu_commands.h | 79 +++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_bb.c | 3 +- drivers/gpu/drm/xe/xe_device.h | 3 +- drivers/gpu/drm/xe/xe_execlist.c | 2 +- drivers/gpu/drm/xe/xe_lrc.c | 2 +- drivers/gpu/drm/xe/xe_migrate.c | 3 +- drivers/gpu/drm/xe/xe_ring_ops.c | 49 +++++++++---------- 7 files changed, 109 insertions(+), 32 deletions(-) create mode 100644 drivers/gpu/drm/xe/regs/xe_gpu_commands.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h new file mode 100644 index 000000000000..288576035ce3 --- /dev/null +++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GPU_COMMANDS_H_ +#define _XE_GPU_COMMANDS_H_ + +#define INSTR_CLIENT_SHIFT 29 +#define INSTR_MI_CLIENT 0x0 +#define __INSTR(client) ((client) << INSTR_CLIENT_SHIFT) + +#define MI_INSTR(opcode, flags) \ + (__INSTR(INSTR_MI_CLIENT) | (opcode) << 23 | (flags)) + +#define MI_NOOP MI_INSTR(0, 0) +#define MI_USER_INTERRUPT MI_INSTR(0x02, 0) + +#define MI_ARB_ON_OFF MI_INSTR(0x08, 0) +#define MI_ARB_ENABLE (1<<0) +#define MI_ARB_DISABLE (0<<0) + +#define MI_BATCH_BUFFER_END MI_INSTR(0x0a, 0) +#define MI_STORE_DATA_IMM MI_INSTR(0x20, 0) + +#define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1) +#define MI_LRI_LRM_CS_MMIO REG_BIT(19) +#define MI_LRI_MMIO_REMAP_EN REG_BIT(17) +#define MI_LRI_FORCE_POSTED (1<<12) + +#define MI_FLUSH_DW MI_INSTR(0x26, 1) +#define MI_FLUSH_DW_STORE_INDEX (1<<21) +#define MI_INVALIDATE_TLB (1<<18) +#define MI_FLUSH_DW_CCS (1<<16) +#define MI_FLUSH_DW_OP_STOREDW (1<<14) +#define MI_FLUSH_DW_USE_GTT (1<<2) + +#define MI_BATCH_BUFFER_START_GEN8 MI_INSTR(0x31, 1) + +#define XY_CTRL_SURF_COPY_BLT ((2 << 29) | (0x48 << 22) | 3) +#define SRC_ACCESS_TYPE_SHIFT 21 +#define DST_ACCESS_TYPE_SHIFT 20 +#define CCS_SIZE_MASK 0x3FF +#define CCS_SIZE_SHIFT 8 +#define XY_CTRL_SURF_MOCS_MASK GENMASK(31, 25) +#define NUM_CCS_BYTES_PER_BLOCK 256 +#define NUM_BYTES_PER_CCS_BYTE 256 +#define NUM_CCS_BLKS_PER_XFER 1024 + +#define XY_FAST_COLOR_BLT_CMD (2 << 29 | 0x44 << 22) +#define XY_FAST_COLOR_BLT_DEPTH_32 (2 << 19) +#define XY_FAST_COLOR_BLT_DW 16 +#define XY_FAST_COLOR_BLT_MOCS_MASK GENMASK(27, 21) +#define XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT 31 + +#define GEN9_XY_FAST_COPY_BLT_CMD (2 << 29 | 0x42 << 22) +#define BLT_DEPTH_32 (3<<24) + +#define GFX_OP_PIPE_CONTROL(len) ((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2)) +#define PIPE_CONTROL_TILE_CACHE_FLUSH (1<<28) +#define PIPE_CONTROL_AMFS_FLUSH (1<<25) +#define PIPE_CONTROL_GLOBAL_GTT_IVB (1<<24) +#define PIPE_CONTROL_CS_STALL (1<<20) +#define PIPE_CONTROL_GLOBAL_SNAPSHOT_RESET (1<<19) +#define PIPE_CONTROL_PSD_SYNC (1<<17) +#define PIPE_CONTROL_QW_WRITE (1<<14) +#define PIPE_CONTROL_DEPTH_STALL (1<<13) +#define PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH (1<<12) +#define PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE (1<<10) +#define PIPE_CONTROL_INDIRECT_STATE_DISABLE (1<<9) +#define PIPE_CONTROL_FLUSH_ENABLE (1<<7) +#define PIPE_CONTROL_DC_FLUSH_ENABLE (1<<5) +#define PIPE_CONTROL_VF_CACHE_INVALIDATE (1<<4) +#define PIPE_CONTROL_STALL_AT_SCOREBOARD (1<<1) +#define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1<<0) + +#define MI_ARB_CHECK MI_INSTR(0x05, 0) + +#endif diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c index d10448d1b4d7..5b24018e2a80 100644 --- a/drivers/gpu/drm/xe/xe_bb.c +++ b/drivers/gpu/drm/xe/xe_bb.c @@ -5,6 +5,7 @@ #include "xe_bb.h" +#include "regs/xe_gpu_commands.h" #include "xe_device.h" #include "xe_engine_types.h" #include "xe_hw_fence.h" @@ -12,8 +13,6 @@ #include "xe_sched_job.h" #include "xe_vm_types.h" -#include "gt/intel_gpu_commands.h" - struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm) { struct xe_bb *bb = kmalloc(sizeof(*bb), GFP_KERNEL); diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index ed55ef567d18..263620953c3b 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -11,12 +11,11 @@ struct xe_file; #include +#include "regs/xe_gpu_commands.h" #include "xe_device_types.h" #include "xe_force_wake.h" #include "xe_macros.h" -#include "gt/intel_gpu_commands.h" - static inline struct xe_device *to_xe_device(const struct drm_device *dev) { return container_of(dev, struct xe_device, drm); diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index d788a6e894a6..8441ce24cfcf 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -8,6 +8,7 @@ #include #include "regs/xe_engine_regs.h" +#include "regs/xe_gpu_commands.h" #include "regs/xe_gt_regs.h" #include "regs/xe_lrc_layout.h" #include "xe_bo.h" @@ -22,7 +23,6 @@ #include "xe_ring_ops_types.h" #include "xe_sched_job.h" -#include "gt/intel_gpu_commands.h" #include "i915_reg.h" #define XE_EXECLIST_HANG_LIMIT 1 diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 5baa3cf53852..4435ec750489 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -6,6 +6,7 @@ #include "xe_lrc.h" #include "regs/xe_engine_regs.h" +#include "regs/xe_gpu_commands.h" #include "regs/xe_gt_regs.h" #include "regs/xe_lrc_layout.h" #include "xe_bo.h" @@ -16,7 +17,6 @@ #include "xe_map.h" #include "xe_vm.h" -#include "gt/intel_gpu_commands.h" #include "i915_reg.h" #define GEN8_CTX_VALID (1 << 0) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index bbab524dcee6..e2ee51381ac1 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -11,6 +11,7 @@ #include #include +#include "regs/xe_gpu_commands.h" #include "xe_bb.h" #include "xe_bo.h" #include "xe_engine.h" @@ -27,8 +28,6 @@ #include "xe_trace.h" #include "xe_vm.h" -#include "gt/intel_gpu_commands.h" - /** * struct xe_migrate - migrate context. */ diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index 6275f8c34878..ef8cef20acd6 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -5,6 +5,7 @@ #include "xe_ring_ops.h" +#include "regs/xe_gpu_commands.h" #include "regs/xe_gt_regs.h" #include "regs/xe_lrc_layout.h" #include "xe_engine_types.h" @@ -14,9 +15,32 @@ #include "xe_sched_job.h" #include "xe_vm_types.h" -#include "gt/intel_gpu_commands.h" #include "i915_reg.h" +/* + * 3D-related flags that can't be set on _engines_ that lack access to the 3D + * pipeline (i.e., CCS engines). + */ +#define PIPE_CONTROL_3D_ENGINE_FLAGS (\ + PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | \ + PIPE_CONTROL_DEPTH_CACHE_FLUSH | \ + PIPE_CONTROL_TILE_CACHE_FLUSH | \ + PIPE_CONTROL_DEPTH_STALL | \ + PIPE_CONTROL_STALL_AT_SCOREBOARD | \ + PIPE_CONTROL_PSD_SYNC | \ + PIPE_CONTROL_AMFS_FLUSH | \ + PIPE_CONTROL_VF_CACHE_INVALIDATE | \ + PIPE_CONTROL_GLOBAL_SNAPSHOT_RESET) + +/* 3D-related flags that can't be set on _platforms_ that lack a 3D pipeline */ +#define PIPE_CONTROL_3D_ARCH_FLAGS ( \ + PIPE_CONTROL_3D_ENGINE_FLAGS | \ + PIPE_CONTROL_INDIRECT_STATE_DISABLE | \ + PIPE_CONTROL_FLUSH_ENABLE | \ + PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | \ + PIPE_CONTROL_DC_FLUSH_ENABLE) + + static u32 preparser_disable(bool state) { return MI_ARB_CHECK | BIT(8) | state; @@ -181,29 +205,6 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, xe_lrc_write_ring(lrc, dw, i * sizeof(*dw)); } -/* - * 3D-related flags that can't be set on _engines_ that lack access to the 3D - * pipeline (i.e., CCS engines). - */ -#define PIPE_CONTROL_3D_ENGINE_FLAGS (\ - PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | \ - PIPE_CONTROL_DEPTH_CACHE_FLUSH | \ - PIPE_CONTROL_TILE_CACHE_FLUSH | \ - PIPE_CONTROL_DEPTH_STALL | \ - PIPE_CONTROL_STALL_AT_SCOREBOARD | \ - PIPE_CONTROL_PSD_SYNC | \ - PIPE_CONTROL_AMFS_FLUSH | \ - PIPE_CONTROL_VF_CACHE_INVALIDATE | \ - PIPE_CONTROL_GLOBAL_SNAPSHOT_RESET) - -/* 3D-related flags that can't be set on _platforms_ that lack a 3D pipeline */ -#define PIPE_CONTROL_3D_ARCH_FLAGS ( \ - PIPE_CONTROL_3D_ENGINE_FLAGS | \ - PIPE_CONTROL_INDIRECT_STATE_DISABLE | \ - PIPE_CONTROL_FLUSH_ENABLE | \ - PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | \ - PIPE_CONTROL_DC_FLUSH_ENABLE) - static void __emit_job_gen12_render_compute(struct xe_sched_job *job, struct xe_lrc *lrc, u64 batch_addr, u32 seqno) -- cgit v1.2.3 From c584148145f73819a5ed968dc64ae10060fcd2c5 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 24 Feb 2023 16:15:43 -0800 Subject: drm/xe: Remove dependency on i915_reg.h Copy the macros used by xe in i915_reg.h to regs/xe_regs.h. A minimal cleanup is done while copying so they adhere minimally to the coding style. Further reordering and cleaning is left for later. Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_regs.h | 108 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_execlist.c | 3 +- drivers/gpu/drm/xe/xe_ggtt.c | 3 +- drivers/gpu/drm/xe/xe_gt_clock.c | 3 +- drivers/gpu/drm/xe/xe_guc_pc.c | 3 +- drivers/gpu/drm/xe/xe_hw_engine.c | 3 +- drivers/gpu/drm/xe/xe_irq.c | 3 +- drivers/gpu/drm/xe/xe_lrc.c | 3 +- drivers/gpu/drm/xe/xe_mmio.c | 3 +- drivers/gpu/drm/xe/xe_pci.c | 3 +- drivers/gpu/drm/xe/xe_ring_ops.c | 4 +- drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 3 +- drivers/gpu/drm/xe/xe_wa.c | 3 +- 13 files changed, 120 insertions(+), 25 deletions(-) create mode 100644 drivers/gpu/drm/xe/regs/xe_regs.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h new file mode 100644 index 000000000000..a697162e1a77 --- /dev/null +++ b/drivers/gpu/drm/xe/regs/xe_regs.h @@ -0,0 +1,108 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ +#ifndef _XE_REGS_H_ +#define _XE_REGS_H_ + +#include "i915_reg_defs.h" + +#define GU_CNTL _MMIO(0x101010) +#define LMEM_INIT REG_BIT(7) + +#define RENDER_RING_BASE 0x02000 +#define GEN11_BSD_RING_BASE 0x1c0000 +#define GEN11_BSD2_RING_BASE 0x1c4000 +#define GEN11_BSD3_RING_BASE 0x1d0000 +#define GEN11_BSD4_RING_BASE 0x1d4000 +#define XEHP_BSD5_RING_BASE 0x1e0000 +#define XEHP_BSD6_RING_BASE 0x1e4000 +#define XEHP_BSD7_RING_BASE 0x1f0000 +#define XEHP_BSD8_RING_BASE 0x1f4000 +#define VEBOX_RING_BASE 0x1a000 +#define GEN11_VEBOX_RING_BASE 0x1c8000 +#define GEN11_VEBOX2_RING_BASE 0x1d8000 +#define XEHP_VEBOX3_RING_BASE 0x1e8000 +#define XEHP_VEBOX4_RING_BASE 0x1f8000 +#define GEN12_COMPUTE0_RING_BASE 0x1a000 +#define GEN12_COMPUTE1_RING_BASE 0x1c000 +#define GEN12_COMPUTE2_RING_BASE 0x1e000 +#define GEN12_COMPUTE3_RING_BASE 0x26000 +#define BLT_RING_BASE 0x22000 +#define XEHPC_BCS1_RING_BASE 0x3e0000 +#define XEHPC_BCS2_RING_BASE 0x3e2000 +#define XEHPC_BCS3_RING_BASE 0x3e4000 +#define XEHPC_BCS4_RING_BASE 0x3e6000 +#define XEHPC_BCS5_RING_BASE 0x3e8000 +#define XEHPC_BCS6_RING_BASE 0x3ea000 +#define XEHPC_BCS7_RING_BASE 0x3ec000 +#define XEHPC_BCS8_RING_BASE 0x3ee000 +#define GT_WAIT_SEMAPHORE_INTERRUPT REG_BIT(11) +#define GT_CONTEXT_SWITCH_INTERRUPT (1 << 8) +#define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT (1 << 4) +#define GT_CS_MASTER_ERROR_INTERRUPT REG_BIT(3) +#define GT_RENDER_USER_INTERRUPT (1 << 0) + +#define GEN7_FF_THREAD_MODE _MMIO(0x20a0) +#define GEN12_FF_TESSELATION_DOP_GATE_DISABLE BIT(19) + +#define PVC_RP_STATE_CAP _MMIO(0x281014) +#define MTL_RP_STATE_CAP _MMIO(0x138000) + +#define MTL_MEDIAP_STATE_CAP _MMIO(0x138020) +#define MTL_RP0_CAP_MASK REG_GENMASK(8, 0) +#define MTL_RPN_CAP_MASK REG_GENMASK(24, 16) + +#define MTL_GT_RPE_FREQUENCY _MMIO(0x13800c) +#define MTL_MPE_FREQUENCY _MMIO(0x13802c) +#define MTL_RPE_MASK REG_GENMASK(8, 0) + +#define TRANSCODER_A_OFFSET 0x60000 +#define TRANSCODER_B_OFFSET 0x61000 +#define TRANSCODER_C_OFFSET 0x62000 +#define TRANSCODER_D_OFFSET 0x63000 +#define TRANSCODER_DSI0_OFFSET 0x6b000 +#define TRANSCODER_DSI1_OFFSET 0x6b800 +#define PIPE_A_OFFSET 0x70000 +#define PIPE_B_OFFSET 0x71000 +#define PIPE_C_OFFSET 0x72000 +#define PIPE_D_OFFSET 0x73000 +#define PIPE_DSI0_OFFSET 0x7b000 +#define PIPE_DSI1_OFFSET 0x7b800 + +#define GEN8_PCU_ISR _MMIO(0x444e0) +#define GEN8_PCU_IMR _MMIO(0x444e4) +#define GEN8_PCU_IIR _MMIO(0x444e8) +#define GEN8_PCU_IER _MMIO(0x444ec) + +#define GEN11_GU_MISC_ISR _MMIO(0x444f0) +#define GEN11_GU_MISC_IMR _MMIO(0x444f4) +#define GEN11_GU_MISC_IIR _MMIO(0x444f8) +#define GEN11_GU_MISC_IER _MMIO(0x444fc) +#define GEN11_GU_MISC_GSE (1 << 27) + +#define GEN11_GFX_MSTR_IRQ _MMIO(0x190010) +#define GEN11_MASTER_IRQ (1 << 31) +#define GEN11_GU_MISC_IRQ (1 << 29) +#define GEN11_DISPLAY_IRQ (1 << 16) +#define GEN11_GT_DW_IRQ(x) (1 << (x)) + +#define DG1_MSTR_TILE_INTR _MMIO(0x190008) +#define DG1_MSTR_IRQ REG_BIT(31) +#define DG1_MSTR_TILE(t) REG_BIT(t) + +#define GEN9_TIMESTAMP_OVERRIDE _MMIO(0x44074) +#define GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_SHIFT 0 +#define GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK 0x3ff +#define GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT 12 +#define GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK (0xf << 12) + +#define GGC _MMIO(0x108040) +#define GMS_MASK REG_GENMASK(15, 8) +#define GGMS_MASK REG_GENMASK(7, 6) + +#define GEN12_GSMBASE _MMIO(0x108100) +#define GEN12_DSMBASE _MMIO(0x1080C0) +#define GEN12_BDSM_MASK REG_GENMASK64(63, 20) + +#endif diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index 8441ce24cfcf..be47d28da4c7 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -11,6 +11,7 @@ #include "regs/xe_gpu_commands.h" #include "regs/xe_gt_regs.h" #include "regs/xe_lrc_layout.h" +#include "regs/xe_regs.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_engine.h" @@ -23,8 +24,6 @@ #include "xe_ring_ops_types.h" #include "xe_sched_job.h" -#include "i915_reg.h" - #define XE_EXECLIST_HANG_LIMIT 1 #define GEN11_SW_CTX_ID_SHIFT 37 diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 3bf437321149..d6ebc1d77f4d 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -11,6 +11,7 @@ #include #include "regs/xe_gt_regs.h" +#include "regs/xe_regs.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_gt.h" @@ -19,8 +20,6 @@ #include "xe_mmio.h" #include "xe_wopcm.h" -#include "i915_reg.h" - /* FIXME: Common file, preferably auto-gen */ #define MTL_GGTT_PTE_PAT0 BIT_ULL(52) #define MTL_GGTT_PTE_PAT1 BIT_ULL(53) diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c index fd0ca33925cd..60a2966bc1fd 100644 --- a/drivers/gpu/drm/xe/xe_gt_clock.c +++ b/drivers/gpu/drm/xe/xe_gt_clock.c @@ -6,13 +6,12 @@ #include "xe_gt_clock.h" #include "regs/xe_gt_regs.h" +#include "regs/xe_regs.h" #include "xe_device.h" #include "xe_gt.h" #include "xe_macros.h" #include "xe_mmio.h" -#include "i915_reg.h" - static u32 read_reference_ts_freq(struct xe_gt *gt) { u32 ts_override = xe_mmio_read32(gt, GEN9_TIMESTAMP_OVERRIDE.reg); diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index f983f47cefb6..acaba5c375e5 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -10,6 +10,7 @@ #include #include "regs/xe_gt_regs.h" +#include "regs/xe_regs.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_gt.h" @@ -20,8 +21,6 @@ #include "xe_mmio.h" #include "xe_pcode.h" -#include "i915_reg.h" -#include "i915_reg_defs.h" #include "intel_mchbar_regs.h" /* For GEN6_RP_STATE_CAP.reg to be merged when the definition moves to Xe */ diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 5e7f21b319bb..ae541b5e50f3 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -9,6 +9,7 @@ #include "regs/xe_engine_regs.h" #include "regs/xe_gt_regs.h" +#include "regs/xe_regs.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_execlist.h" @@ -23,8 +24,6 @@ #include "xe_sched_job.h" #include "xe_wa.h" -#include "i915_reg.h" - #define MAX_MMIO_BASES 3 struct engine_info { const char *name; diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 04b3801fc0a8..071ccc75b71b 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -10,6 +10,7 @@ #include #include "regs/xe_gt_regs.h" +#include "regs/xe_regs.h" #include "xe_device.h" #include "xe_drv.h" #include "xe_gt.h" @@ -17,8 +18,6 @@ #include "xe_hw_engine.h" #include "xe_mmio.h" -#include "i915_reg.h" - static void gen3_assert_iir_is_zero(struct xe_gt *gt, i915_reg_t reg) { u32 val = xe_mmio_read32(gt, reg.reg); diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 4435ec750489..af4518a82db2 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -9,6 +9,7 @@ #include "regs/xe_gpu_commands.h" #include "regs/xe_gt_regs.h" #include "regs/xe_lrc_layout.h" +#include "regs/xe_regs.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_engine_types.h" @@ -17,8 +18,6 @@ #include "xe_map.h" #include "xe_vm.h" -#include "i915_reg.h" - #define GEN8_CTX_VALID (1 << 0) #define GEN8_CTX_L3LLC_COHERENT (1 << 5) #define GEN8_CTX_PRIVILEGE (1 << 8) diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 5e6ca0d2076a..65b0df9bb579 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -10,14 +10,13 @@ #include "regs/xe_engine_regs.h" #include "regs/xe_gt_regs.h" +#include "regs/xe_regs.h" #include "xe_device.h" #include "xe_gt.h" #include "xe_gt_mcr.h" #include "xe_macros.h" #include "xe_module.h" -#include "i915_reg.h" - #define XEHP_MTCFG_ADDR _MMIO(0x101800) #define TILE_COUNT REG_GENMASK(15, 8) #define GEN12_LMEM_BAR 2 diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 884f9b16c9de..6dcefb8cc7c3 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -14,6 +14,7 @@ #include #include +#include "regs/xe_regs.h" #include "xe_device.h" #include "xe_drv.h" #include "xe_macros.h" @@ -21,8 +22,6 @@ #include "xe_pm.h" #include "xe_step.h" -#include "i915_reg.h" - #define DEV_INFO_FOR_EACH_FLAG(func) \ func(require_force_probe); \ func(is_dgfx); \ diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index ef8cef20acd6..7dd886536fbc 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -8,6 +8,7 @@ #include "regs/xe_gpu_commands.h" #include "regs/xe_gt_regs.h" #include "regs/xe_lrc_layout.h" +#include "regs/xe_regs.h" #include "xe_engine_types.h" #include "xe_gt.h" #include "xe_lrc.h" @@ -15,8 +16,6 @@ #include "xe_sched_job.h" #include "xe_vm_types.h" -#include "i915_reg.h" - /* * 3D-related flags that can't be set on _engines_ that lack access to the 3D * pipeline (i.e., CCS engines). @@ -40,7 +39,6 @@ PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | \ PIPE_CONTROL_DC_FLUSH_ENABLE) - static u32 preparser_disable(bool state) { return MI_ARB_CHECK | BIT(8) | state; diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index fe0f707ad054..2e8d07ad42ae 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -11,8 +11,7 @@ #include #include -#include "../i915/i915_reg.h" - +#include "regs/xe_regs.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_gt.h" diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 155cfd1dcc50..df72b15dfeb0 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -9,6 +9,7 @@ #include "regs/xe_engine_regs.h" #include "regs/xe_gt_regs.h" +#include "regs/xe_regs.h" #include "xe_device_types.h" #include "xe_force_wake.h" #include "xe_gt.h" @@ -18,8 +19,6 @@ #include "xe_rtp.h" #include "xe_step.h" -#include "i915_reg.h" - /** * DOC: Hardware workarounds * -- cgit v1.2.3 From e12ef39272a3690bc779e2d4d812e36c0e7d45f8 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 24 Feb 2023 16:15:44 -0800 Subject: drm/xe/guc_pc: Move gt register to the proper place Move a few defines from xe_guc_pc.c to the right register, now that there is one: xe_gt_regs.h. Signed-off-by: Lucas De Marchi Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 2 ++ drivers/gpu/drm/xe/xe_guc_pc.c | 6 ------ 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 3eb92c975085..df2677c9c9f6 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -187,6 +187,7 @@ #define DFR_DISABLE (1 << 9) #define GEN6_RPNSWREQ _MMIO(0xa008) +#define REQ_RATIO_MASK REG_GENMASK(31, 23) #define GEN6_RC_CONTROL _MMIO(0xa090) #define GEN6_RC_STATE _MMIO(0xa094) @@ -243,6 +244,7 @@ #define FORCEWAKE_KERNEL_FALLBACK BIT(15) #define GEN6_GT_CORE_STATUS _MMIO(0x138060) +#define RCN_MASK REG_GENMASK(2, 0) #define GEN6_RC0 0 #define GEN6_RC6 3 diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index acaba5c375e5..d91dad8638ef 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -31,12 +31,6 @@ #define GEN10_FREQ_INFO_REC _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5ef0) #define RPE_MASK REG_GENMASK(15, 8) -/* For GEN6_RPNSWREQ.reg to be merged when the definition moves to Xe */ -#define REQ_RATIO_MASK REG_GENMASK(31, 23) - -/* For GEN6_GT_CORE_STATUS.reg to be merged when the definition moves to Xe */ -#define RCN_MASK REG_GENMASK(2, 0) - #define GEN12_RPSTAT1 _MMIO(0x1381b4) #define GEN12_CAGF_MASK REG_GENMASK(19, 11) -- cgit v1.2.3 From 5ec15f83117f2f89af39109c264c1fb0bbf8b5f0 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 24 Feb 2023 16:15:45 -0800 Subject: drm/xe: Remove dependency on intel_mchbar_regs.h The only thing really needed is the base offset, MCHBAR_MIRROR_BASE_SNB. Remove the include and just define it inplace. Signed-off-by: Lucas De Marchi Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_pc.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index d91dad8638ef..5a8d827ba770 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -21,12 +21,12 @@ #include "xe_mmio.h" #include "xe_pcode.h" -#include "intel_mchbar_regs.h" +#define MCHBAR_MIRROR_BASE_SNB 0x140000 -/* For GEN6_RP_STATE_CAP.reg to be merged when the definition moves to Xe */ -#define RP0_MASK REG_GENMASK(7, 0) -#define RP1_MASK REG_GENMASK(15, 8) -#define RPN_MASK REG_GENMASK(23, 16) +#define GEN6_RP_STATE_CAP _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5998) +#define RP0_MASK REG_GENMASK(7, 0) +#define RP1_MASK REG_GENMASK(15, 8) +#define RPN_MASK REG_GENMASK(23, 16) #define GEN10_FREQ_INFO_REC _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5ef0) #define RPE_MASK REG_GENMASK(15, 8) -- cgit v1.2.3 From 3457388fcd145d64e6852ca60084e822bec81e9f Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 24 Feb 2023 16:15:48 -0800 Subject: drm/xe: Prefer single underscore for header guards Keep header guards consistent with regard to ifdef used. Prefer the more commonly used in the driver. $ git grep "ifndef __XE_" -- drivers/gpu/drm/xe | wc -l 8 $ git grep "ifndef _XE_" -- drivers/gpu/drm/xe | wc -l 112 Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_bo_test.h | 4 ++-- drivers/gpu/drm/xe/tests/xe_dma_buf_test.h | 4 ++-- drivers/gpu/drm/xe/tests/xe_migrate_test.h | 4 ++-- drivers/gpu/drm/xe/tests/xe_test.h | 4 ++-- drivers/gpu/drm/xe/xe_gt_topology.h | 6 +++--- drivers/gpu/drm/xe/xe_map.h | 4 ++-- drivers/gpu/drm/xe/xe_migrate.h | 4 ++-- drivers/gpu/drm/xe/xe_res_cursor.h | 4 ++-- 8 files changed, 17 insertions(+), 17 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_bo_test.h b/drivers/gpu/drm/xe/tests/xe_bo_test.h index d751a618c0c8..0113ab45066a 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo_test.h +++ b/drivers/gpu/drm/xe/tests/xe_bo_test.h @@ -3,8 +3,8 @@ * Copyright © 2023 Intel Corporation */ -#ifndef __XE_BO_TEST_H__ -#define __XE_BO_TEST_H__ +#ifndef _XE_BO_TEST_H_ +#define _XE_BO_TEST_H_ struct kunit; diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf_test.h b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.h index 4e9a8bef5751..e6b464ddd526 100644 --- a/drivers/gpu/drm/xe/tests/xe_dma_buf_test.h +++ b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.h @@ -3,8 +3,8 @@ * Copyright © 2023 Intel Corporation */ -#ifndef __XE_DMA_BUF_TEST_H__ -#define __XE_DMA_BUF_TEST_H__ +#ifndef _XE_DMA_BUF_TEST_H_ +#define _XE_DMA_BUF_TEST_H_ struct kunit; diff --git a/drivers/gpu/drm/xe/tests/xe_migrate_test.h b/drivers/gpu/drm/xe/tests/xe_migrate_test.h index db1f8ef035bb..7c645c66824f 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate_test.h +++ b/drivers/gpu/drm/xe/tests/xe_migrate_test.h @@ -3,8 +3,8 @@ * Copyright © 2023 Intel Corporation */ -#ifndef __XE_MIGRATE_TEST_H__ -#define __XE_MIGRATE_TEST_H__ +#ifndef _XE_MIGRATE_TEST_H_ +#define _XE_MIGRATE_TEST_H_ struct kunit; diff --git a/drivers/gpu/drm/xe/tests/xe_test.h b/drivers/gpu/drm/xe/tests/xe_test.h index 1ec502b5acf3..00c8a3f9af81 100644 --- a/drivers/gpu/drm/xe/tests/xe_test.h +++ b/drivers/gpu/drm/xe/tests/xe_test.h @@ -3,8 +3,8 @@ * Copyright © 2022 Intel Corporation */ -#ifndef __XE_TEST_H__ -#define __XE_TEST_H__ +#ifndef _XE_TEST_H_ +#define _XE_TEST_H_ #include diff --git a/drivers/gpu/drm/xe/xe_gt_topology.h b/drivers/gpu/drm/xe/xe_gt_topology.h index 7a0abc64084f..b2540dc266f2 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.h +++ b/drivers/gpu/drm/xe/xe_gt_topology.h @@ -3,8 +3,8 @@ * Copyright © 2022 Intel Corporation */ -#ifndef __XE_GT_TOPOLOGY_H__ -#define __XE_GT_TOPOLOGY_H__ +#ifndef _XE_GT_TOPOLOGY_H_ +#define _XE_GT_TOPOLOGY_H_ #include "xe_gt_types.h" @@ -17,4 +17,4 @@ void xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p); unsigned int xe_dss_mask_group_ffs(xe_dss_mask_t mask, int groupsize, int groupnum); -#endif /* __XE_GT_TOPOLOGY_H__ */ +#endif /* _XE_GT_TOPOLOGY_H_ */ diff --git a/drivers/gpu/drm/xe/xe_map.h b/drivers/gpu/drm/xe/xe_map.h index 0bac1f73a80d..032c2e8b5438 100644 --- a/drivers/gpu/drm/xe/xe_map.h +++ b/drivers/gpu/drm/xe/xe_map.h @@ -3,8 +3,8 @@ * Copyright © 2022 Intel Corporation */ -#ifndef __XE_MAP_H__ -#define __XE_MAP_H__ +#ifndef _XE_MAP_H_ +#define _XE_MAP_H_ #include diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h index b2d55283252f..a569851db6f7 100644 --- a/drivers/gpu/drm/xe/xe_migrate.h +++ b/drivers/gpu/drm/xe/xe_migrate.h @@ -3,8 +3,8 @@ * Copyright © 2020 Intel Corporation */ -#ifndef __XE_MIGRATE__ -#define __XE_MIGRATE__ +#ifndef _XE_MIGRATE_ +#define _XE_MIGRATE_ #include diff --git a/drivers/gpu/drm/xe/xe_res_cursor.h b/drivers/gpu/drm/xe/xe_res_cursor.h index 365c8ad7aeb8..4e99fae26b4c 100644 --- a/drivers/gpu/drm/xe/xe_res_cursor.h +++ b/drivers/gpu/drm/xe/xe_res_cursor.h @@ -21,8 +21,8 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef __XE_RES_CURSOR_H__ -#define __XE_RES_CURSOR_H__ +#ifndef _XE_RES_CURSOR_H_ +#define _XE_RES_CURSOR_H_ #include -- cgit v1.2.3 From 8cb49012ac171698b1253dea45e56c284e997d38 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Sat, 25 Feb 2023 12:10:39 -0800 Subject: drm/xe: Do not spread i915_reg_defs.h include Reduce the use of i915_reg_defs.h so it can be encapsulated in a single place. 1) If it was being included by mistake, remove 2) If it was included for FIELD_GET()/FIELD_PREP()/GENMASK() and the like, just include 3) If it was included to be able to define additional registers, move the registers to the relavant headers (regs/xe_regs.h or regs/xe_gt_regs.h) v2: - Squash commit fixing i915_reg_defs.h include and with the one introducing regs/xe_reg_defs.h - Remove more cases of i915_reg_defs.h being used when all it was needed was linux/bitfield.h (Matt Roper) - Move some registers to the corresponding regs/*.h file (Matt Roper) Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Signed-off-by: Rodrigo Vivi [Rodrigo squashed here the removal of the i915 include] --- drivers/gpu/drm/xe/Makefile | 3 --- drivers/gpu/drm/xe/regs/xe_engine_regs.h | 2 +- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 8 +++++++- drivers/gpu/drm/xe/regs/xe_reg_defs.h | 11 +++++++++++ drivers/gpu/drm/xe/regs/xe_regs.h | 4 +++- drivers/gpu/drm/xe/xe_device.c | 3 +-- drivers/gpu/drm/xe/xe_gt_mcr.h | 2 +- drivers/gpu/drm/xe/xe_gt_pagefault.c | 1 + drivers/gpu/drm/xe/xe_gt_topology.c | 15 +++++---------- drivers/gpu/drm/xe/xe_guc.c | 8 -------- drivers/gpu/drm/xe/xe_guc_reg.h | 6 +++++- drivers/gpu/drm/xe/xe_migrate.c | 1 + drivers/gpu/drm/xe/xe_pcode_api.h | 2 ++ drivers/gpu/drm/xe/xe_reg_sr_types.h | 2 -- drivers/gpu/drm/xe/xe_rtp.h | 2 -- drivers/gpu/drm/xe/xe_rtp_types.h | 2 -- drivers/gpu/drm/xe/xe_step.c | 2 ++ 17 files changed, 40 insertions(+), 34 deletions(-) create mode 100644 drivers/gpu/drm/xe/regs/xe_reg_defs.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 58ee9e82156d..b3426f328d21 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -100,9 +100,6 @@ xe-y += xe_bb.o \ xe_wa.o \ xe_wopcm.o -# XXX: Needed for i915 register definitions. Will be removed after xe-regs. -subdir-ccflags-y += -I$(srctree)/drivers/gpu/drm/i915/ - obj-$(CONFIG_DRM_XE) += xe.o obj-$(CONFIG_DRM_XE_KUNIT_TEST) += tests/ \ diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index 6dfa3cf2fd43..2aa67d001c34 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -8,7 +8,7 @@ #include -#include "i915_reg_defs.h" +#include "regs/xe_reg_defs.h" #define RING_TAIL(base) _MMIO((base) + 0x30) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index df2677c9c9f6..47377d2167e0 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -6,7 +6,7 @@ #ifndef _XE_GT_REGS_H_ #define _XE_GT_REGS_H_ -#include "i915_reg_defs.h" +#include "regs/xe_reg_defs.h" /* RPM unit config (Gen8+) */ #define RPM_CONFIG0 _MMIO(0xd00) @@ -108,6 +108,12 @@ #define GEN11_GT_VEBOX_DISABLE_SHIFT 16 #define GEN11_GT_VEBOX_DISABLE_MASK (0x0f << GEN11_GT_VEBOX_DISABLE_SHIFT) +#define XELP_EU_ENABLE _MMIO(0x9134) /* "_DISABLE" on Xe_LP */ +#define XELP_EU_MASK REG_GENMASK(7, 0) +#define XELP_GT_GEOMETRY_DSS_ENABLE _MMIO(0x913c) +#define XEHP_GT_COMPUTE_DSS_ENABLE _MMIO(0x9144) +#define XEHPC_GT_COMPUTE_DSS_ENABLE_EXT _MMIO(0x9148) + #define GEN6_GDRST _MMIO(0x941c) #define GEN11_GRDOM_GUC REG_BIT(3) #define GEN6_GRDOM_FULL (1 << 0) diff --git a/drivers/gpu/drm/xe/regs/xe_reg_defs.h b/drivers/gpu/drm/xe/regs/xe_reg_defs.h new file mode 100644 index 000000000000..5f6735697d9c --- /dev/null +++ b/drivers/gpu/drm/xe/regs/xe_reg_defs.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_REG_DEFS_H_ +#define _XE_REG_DEFS_H_ + +#include "../../i915/i915_reg_defs.h" + +#endif diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h index a697162e1a77..2e7fbdedb5eb 100644 --- a/drivers/gpu/drm/xe/regs/xe_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_regs.h @@ -5,7 +5,7 @@ #ifndef _XE_REGS_H_ #define _XE_REGS_H_ -#include "i915_reg_defs.h" +#include "regs/xe_reg_defs.h" #define GU_CNTL _MMIO(0x101010) #define LMEM_INIT REG_BIT(7) @@ -70,6 +70,8 @@ #define PIPE_DSI0_OFFSET 0x7b000 #define PIPE_DSI1_OFFSET 0x7b800 +#define SOFTWARE_FLAGS_SPR33 _MMIO(0x4f084) + #define GEN8_PCU_ISR _MMIO(0x444e0) #define GEN8_PCU_IMR _MMIO(0x444e4) #define GEN8_PCU_IIR _MMIO(0x444e8) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 6d7d57d08a99..00e8ed235353 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -12,6 +12,7 @@ #include #include +#include "regs/xe_regs.h" #include "xe_bo.h" #include "xe_debugfs.h" #include "xe_dma_buf.h" @@ -335,8 +336,6 @@ static void device_kill_persitent_engines(struct xe_device *xe, mutex_unlock(&xe->persitent_engines.lock); } -#define SOFTWARE_FLAGS_SPR33 _MMIO(0x4F084) - void xe_device_wmb(struct xe_device *xe) { struct xe_gt *gt = xe_device_get_gt(xe, 0); diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.h b/drivers/gpu/drm/xe/xe_gt_mcr.h index c31987d2177c..2a6cd38c8cb7 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.h +++ b/drivers/gpu/drm/xe/xe_gt_mcr.h @@ -6,7 +6,7 @@ #ifndef _XE_GT_MCR_H_ #define _XE_GT_MCR_H_ -#include "i915_reg_defs.h" +#include "regs/xe_reg_defs.h" struct drm_printer; struct xe_gt; diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 0e7047b89a83..1677640e1075 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -5,6 +5,7 @@ #include "xe_gt_pagefault.h" +#include #include #include diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c index c76aaea1887c..2123f84be336 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.c +++ b/drivers/gpu/drm/xe/xe_gt_topology.c @@ -7,18 +7,13 @@ #include +#include "regs/xe_gt_regs.h" #include "xe_gt.h" #include "xe_mmio.h" #define XE_MAX_DSS_FUSE_BITS (32 * XE_MAX_DSS_FUSE_REGS) #define XE_MAX_EU_FUSE_BITS (32 * XE_MAX_EU_FUSE_REGS) -#define XELP_EU_ENABLE 0x9134 /* "_DISABLE" on Xe_LP */ -#define XELP_EU_MASK REG_GENMASK(7, 0) -#define XELP_GT_GEOMETRY_DSS_ENABLE 0x913c -#define XEHP_GT_COMPUTE_DSS_ENABLE 0x9144 -#define XEHPC_GT_COMPUTE_DSS_ENABLE_EXT 0x9148 - static void load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...) { @@ -41,7 +36,7 @@ static void load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask) { struct xe_device *xe = gt_to_xe(gt); - u32 reg = xe_mmio_read32(gt, XELP_EU_ENABLE); + u32 reg = xe_mmio_read32(gt, XELP_EU_ENABLE.reg); u32 val = 0; int i; @@ -86,10 +81,10 @@ xe_gt_topology_init(struct xe_gt *gt) } load_dss_mask(gt, gt->fuse_topo.g_dss_mask, num_geometry_regs, - XELP_GT_GEOMETRY_DSS_ENABLE); + XELP_GT_GEOMETRY_DSS_ENABLE.reg); load_dss_mask(gt, gt->fuse_topo.c_dss_mask, num_compute_regs, - XEHP_GT_COMPUTE_DSS_ENABLE, - XEHPC_GT_COMPUTE_DSS_ENABLE_EXT); + XEHP_GT_COMPUTE_DSS_ENABLE.reg, + XEHPC_GT_COMPUTE_DSS_ENABLE_EXT.reg); load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss); xe_gt_topology_dump(gt, &p); diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 661effa9830f..58b9841616e4 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -22,14 +22,6 @@ #include "xe_uc_fw.h" #include "xe_wopcm.h" -#include "i915_reg_defs.h" - -/* TODO: move to common file */ -#define GUC_PVC_MOCS_INDEX_MASK REG_GENMASK(25, 24) -#define PVC_MOCS_UC_INDEX 1 -#define PVC_GUC_MOCS_INDEX(index) REG_FIELD_PREP(GUC_PVC_MOCS_INDEX_MASK,\ - index) - static struct xe_gt * guc_to_gt(struct xe_guc *guc) { diff --git a/drivers/gpu/drm/xe/xe_guc_reg.h b/drivers/gpu/drm/xe/xe_guc_reg.h index 513a7e0c8a5a..efd60c186bbc 100644 --- a/drivers/gpu/drm/xe/xe_guc_reg.h +++ b/drivers/gpu/drm/xe/xe_guc_reg.h @@ -9,7 +9,7 @@ #include #include -#include "i915_reg_defs.h" +#include "regs/xe_reg_defs.h" /* Definitions of GuC H/W registers, bits, etc */ @@ -93,6 +93,10 @@ #define GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA (1<<10) #define GUC_ENABLE_MIA_CLOCK_GATING (1<<15) #define GUC_GEN10_SHIM_WC_ENABLE (1<<21) +#define PVC_GUC_MOCS_INDEX_MASK REG_GENMASK(25, 24) +#define PVC_MOCS_UC_INDEX 1 +#define PVC_GUC_MOCS_INDEX(index) REG_FIELD_PREP(PVC_GUC_MOCS_INDEX_MASK,\ + index) #define GUC_SEND_INTERRUPT _MMIO(0xc4c8) #define GUC_SEND_TRIGGER (1<<0) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index e2ee51381ac1..79aa3508ae3e 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -5,6 +5,7 @@ #include "xe_migrate.h" +#include #include #include diff --git a/drivers/gpu/drm/xe/xe_pcode_api.h b/drivers/gpu/drm/xe/xe_pcode_api.h index 0762c8a912c7..4e689cd4b23b 100644 --- a/drivers/gpu/drm/xe/xe_pcode_api.h +++ b/drivers/gpu/drm/xe/xe_pcode_api.h @@ -5,6 +5,8 @@ /* Internal to xe_pcode */ +#include "regs/xe_reg_defs.h" + #define PCODE_MAILBOX _MMIO(0x138124) #define PCODE_READY REG_BIT(31) #define PCODE_MB_PARAM2 REG_GENMASK(23, 16) diff --git a/drivers/gpu/drm/xe/xe_reg_sr_types.h b/drivers/gpu/drm/xe/xe_reg_sr_types.h index b234a8673e54..0e6d542ff1b4 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr_types.h +++ b/drivers/gpu/drm/xe/xe_reg_sr_types.h @@ -9,8 +9,6 @@ #include #include -#include "i915_reg_defs.h" - struct xe_reg_sr_entry { u32 clr_bits; u32 set_bits; diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index d6ba0b7e5042..bd44fd8bbe05 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -11,8 +11,6 @@ #include "xe_rtp_types.h" -#include "i915_reg_defs.h" - /* * Register table poke infrastructure */ diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h index fac0bd6d5b1e..e87f1b280d96 100644 --- a/drivers/gpu/drm/xe/xe_rtp_types.h +++ b/drivers/gpu/drm/xe/xe_rtp_types.h @@ -8,8 +8,6 @@ #include -#include "i915_reg_defs.h" - struct xe_hw_engine; struct xe_gt; diff --git a/drivers/gpu/drm/xe/xe_step.c b/drivers/gpu/drm/xe/xe_step.c index ca77d0971529..14f482f29ae4 100644 --- a/drivers/gpu/drm/xe/xe_step.c +++ b/drivers/gpu/drm/xe/xe_step.c @@ -5,6 +5,8 @@ #include "xe_step.h" +#include + #include "xe_device.h" #include "xe_platform_types.h" -- cgit v1.2.3 From 7bc08d2f49b065cbabca8caad142df147b96dfff Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 23 Feb 2023 10:57:34 -0800 Subject: drm/xe/mocs: Drop unwanted TGL table TGL/RKL/ADLS/ADLP are all supposed to use the same MOCS table, with values defined in the bspec. Any entries listed in the bspec as reserved/error/undefined should always be initialized to the most cached and least coherent setting possible so that any userspace accidentally referencing those undefined entries will only experience an increase in coherency if spec updates down the road start defining real values. The TGL and gen12 table entries defined in the driver today are identical except that the TGL includes one additional (incorrect) setting for table index 1. Furthermore, the TGL-specific initialization does not define a dedicated value for info->unused_entries_index, so this incorrect table entry 1 also gets used to populate the MOCS registers for all reserved/unused table entries. This incorrect behavior is a holdover from i915 where the platform was enabled with an incorrect setting and by the time we noticed, it was too late to fix the table without breaking ABI compatibility (and on TGL we did indeed have some buggy userspace that was referencing the 'reserved' entry 1). Since the Xe driver starts fresh with a clean slate on ABI, there's no need to repeat the mistakes of i915 here. v2: - Reword/clarify commit message. (Lucas) Bspec: 45101 Signed-off-by: Matt Roper Reviewed-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_mocs.c | 46 -------------------------------------------- 1 file changed, 46 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index 7ff81041d5ce..618b0069bcba 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -247,47 +247,6 @@ struct xe_mocs_info { LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ L3_1_UC) -static const struct xe_mocs_entry tgl_mocs_desc[] = { - /* - * NOTE: - * Reserved and unspecified MOCS indices have been set to (L3 + LCC). - * These reserved entries should never be used, they may be changed - * to low performant variants with better coherency in the future if - * more entries are needed. We are programming index XE_MOCS_PTE(1) - * only, __init_mocs_table() take care to program unused index with - * this entry. - */ - MOCS_ENTRY(XE_MOCS_PTE, - LE_0_PAGETABLE | LE_TC_0_PAGETABLE, - L3_1_UC), - GEN11_MOCS_ENTRIES, - - /* Implicitly enable L1 - HDC:L1 + L3 + LLC */ - MOCS_ENTRY(48, - LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), - L3_3_WB), - /* Implicitly enable L1 - HDC:L1 + L3 */ - MOCS_ENTRY(49, - LE_1_UC | LE_TC_1_LLC, - L3_3_WB), - /* Implicitly enable L1 - HDC:L1 + LLC */ - MOCS_ENTRY(50, - LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), - L3_1_UC), - /* Implicitly enable L1 - HDC:L1 */ - MOCS_ENTRY(51, - LE_1_UC | LE_TC_1_LLC, - L3_1_UC), - /* HW Special Case (CCS) */ - MOCS_ENTRY(60, - LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), - L3_1_UC), - /* HW Special Case (Displayable) */ - MOCS_ENTRY(61, - LE_1_UC | LE_TC_1_LLC, - L3_3_WB), -}; - static const struct xe_mocs_entry dg1_mocs_desc[] = { /* UC */ MOCS_ENTRY(1, 0, L3_1_UC), @@ -422,11 +381,6 @@ static unsigned int get_mocs_settings(struct xe_device *xe, info->unused_entries_index = 5; break; case XE_TIGERLAKE: - info->size = ARRAY_SIZE(tgl_mocs_desc); - info->table = tgl_mocs_desc; - info->n_entries = GEN9_NUM_MOCS_ENTRIES; - info->uc_index = 3; - break; case XE_ALDERLAKE_S: case XE_ALDERLAKE_P: info->size = ARRAY_SIZE(gen12_mocs_desc); -- cgit v1.2.3 From 579a6546d33c92d810d19e971fd85ee4d0b9a5ce Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 23 Feb 2023 10:57:35 -0800 Subject: drm/xe/mocs: Add missing RKL handling RKL should use the same "gen12" MOCS handling as TGL/ADL-S/ADL-P. Bspec: 45101 Signed-off-by: Matt Roper Reviewed-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_mocs.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index 618b0069bcba..7f0dd7e7364d 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -381,6 +381,7 @@ static unsigned int get_mocs_settings(struct xe_device *xe, info->unused_entries_index = 5; break; case XE_TIGERLAKE: + case XE_ROCKETLAKE: case XE_ALDERLAKE_S: case XE_ALDERLAKE_P: info->size = ARRAY_SIZE(gen12_mocs_desc); -- cgit v1.2.3 From d1000e3fc9fa6bfb88d37a177542b9b24802081f Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 23 Feb 2023 10:57:36 -0800 Subject: drm/xe/mocs: Drop xe_mocs_info_index The values in the xe_mocs_info_index enum only match old pre-gen12 hardware not supported by the Xe driver. The only usage of this enum was to set a default value for info->unused_entries_index, but this is unnecessary since every platform in the subsequent switch statement sets a proper platform-specific value (and the XE_MOCS_PTE default doesn't even make sense since the hardware dropped the "use PAT settings" capability in gen12). v2: - Add a check that unusued_entries_index is non-zero; even for platforms where this is a valid table entry, it's never the one we want this value assigned to. (Lucas) Signed-off-by: Matt Roper Reviewed-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_mocs.c | 40 ++++++++++++---------------------------- 1 file changed, 12 insertions(+), 28 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index 7f0dd7e7364d..65295cd4f4ad 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -23,30 +23,6 @@ static inline void mocs_dbg(const struct drm_device *dev, { /* noop */ } #endif -/* - * MOCS indexes used for GPU surfaces, defining the cacheability of the - * surface data and the coherency for this data wrt. CPU vs. GPU accesses. - */ -enum xe_mocs_info_index { - /* - * Not cached anywhere, coherency between CPU and GPU accesses is - * guaranteed. - */ - XE_MOCS_UNCACHED, - /* - * Cacheability and coherency controlled by the kernel automatically - * based on the xxxx IOCTL setting and the current - * usage of the surface (used for display scanout or not). - */ - XE_MOCS_PTE, - /* - * Cached in all GPU caches available on the platform. - * Coherency between CPU and GPU accesses to the surface is not - * guaranteed without extra synchronization. - */ - XE_MOCS_CACHED, -}; - enum { HAS_GLOBAL_MOCS = BIT(0), HAS_RENDER_L3CC = BIT(1), @@ -341,7 +317,6 @@ static unsigned int get_mocs_settings(struct xe_device *xe, memset(info, 0, sizeof(struct xe_mocs_info)); - info->unused_entries_index = XE_MOCS_PTE; switch (xe->info.platform) { case XE_PVC: info->size = ARRAY_SIZE(pvc_mocs_desc); @@ -395,6 +370,16 @@ static unsigned int get_mocs_settings(struct xe_device *xe, return 0; } + /* + * Index 0 is a reserved/unused table entry on most platforms, but + * even on those where it does represent a legitimate MOCS entry, it + * never represents the "most cached, least coherent" behavior we want + * to populate undefined table rows with. So if unused_entries_index + * is still 0 at this point, we'll assume that it was omitted by + * mistake in the switch statement above. + */ + XE_WARN_ON(info->unused_entries_index == 0); + if (XE_WARN_ON(info->size > info->n_entries)) return 0; @@ -406,9 +391,8 @@ static unsigned int get_mocs_settings(struct xe_device *xe, } /* - * Get control_value from MOCS entry taking into account when it's not used - * then if unused_entries_index is non-zero then its value will be returned - * otherwise XE_MOCS_PTE's value is returned in this case. + * Get control_value from MOCS entry. If the table entry is not defined, the + * settings from unused_entries_index will be returned. */ static u32 get_entry_control(const struct xe_mocs_info *info, unsigned int index) -- cgit v1.2.3 From 6c57023ec42713e6cb91fdfbbd77147979e597e2 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 23 Feb 2023 10:57:37 -0800 Subject: drm/xe/mocs: Drop duplicate assignment of uc_index The DG1 branch needlessly assigns uc_index twice. Drop the second instance. Signed-off-by: Matt Roper Reviewed-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_mocs.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index 65295cd4f4ad..e24d0dbc178e 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -352,7 +352,6 @@ static unsigned int get_mocs_settings(struct xe_device *xe, info->table = dg1_mocs_desc; info->uc_index = 1; info->n_entries = GEN9_NUM_MOCS_ENTRIES; - info->uc_index = 1; info->unused_entries_index = 5; break; case XE_TIGERLAKE: -- cgit v1.2.3 From ee17e7f34a5e8a996da0c54e31584c5b089d65ff Mon Sep 17 00:00:00 2001 From: Philippe Lecluse Date: Thu, 23 Feb 2023 10:57:38 -0800 Subject: drm/xe/mocs: add MTL mocs It was incorrectly using dg2_mocs for now. v2 (MattR): - Use REG_GENMASK/REG_FIELD_PREP for bitfields - Add bspec references Bspec: 45101, 45410, 63882 Signed-off-by: Philippe Lecluse Signed-off-by: Matt Roper Reviewed-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_mocs.c | 69 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 65 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index e24d0dbc178e..e00fa2155dc4 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -62,6 +62,10 @@ struct xe_mocs_info { #define L3_GLBGO(value) ((value) << 6) #define L3_LKUP(value) ((value) << 7) +/* Defines for the tables (GLOB_MOCS_0 - GLOB_MOCS_16) */ +#define _L4_CACHEABILITY REG_GENMASK(3, 2) +#define IG_PAT REG_BIT(8) + /* Helper defines */ #define GEN9_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */ #define PVC_NUM_MOCS_ENTRIES 3 @@ -89,6 +93,12 @@ struct xe_mocs_info { #define L3_2_RESERVED _L3_CACHEABILITY(2) #define L3_3_WB _L3_CACHEABILITY(3) +/* L4 caching options */ +#define L4_0_WB REG_FIELD_PREP(_L4_CACHEABILITY, 0) +#define L4_1_WT REG_FIELD_PREP(_L4_CACHEABILITY, 1) +#define L4_2_RESERVED REG_FIELD_PREP(_L4_CACHEABILITY, 2) +#define L4_3_UC REG_FIELD_PREP(_L4_CACHEABILITY, 3) + #define MOCS_ENTRY(__idx, __control_value, __l3cc_value) \ [__idx] = { \ .control_value = __control_value, \ @@ -310,6 +320,57 @@ static const struct xe_mocs_entry pvc_mocs_desc[] = { MOCS_ENTRY(2, 0, L3_3_WB), }; +static const struct xe_mocs_entry mtl_mocs_desc[] = { + /* Error - Reserved for Non-Use */ + MOCS_ENTRY(0, + 0, + L3_LKUP(1) | L3_3_WB), + /* Cached - L3 + L4 */ + MOCS_ENTRY(1, + IG_PAT, + L3_LKUP(1) | L3_3_WB), + /* L4 - GO:L3 */ + MOCS_ENTRY(2, + IG_PAT, + L3_LKUP(1) | L3_1_UC), + /* Uncached - GO:L3 */ + MOCS_ENTRY(3, + IG_PAT | L4_3_UC, + L3_LKUP(1) | L3_1_UC), + /* L4 - GO:Mem */ + MOCS_ENTRY(4, + IG_PAT, + L3_LKUP(1) | L3_GLBGO(1) | L3_1_UC), + /* Uncached - GO:Mem */ + MOCS_ENTRY(5, + IG_PAT | L4_3_UC, + L3_LKUP(1) | L3_GLBGO(1) | L3_1_UC), + /* L4 - L3:NoLKUP; GO:L3 */ + MOCS_ENTRY(6, + IG_PAT, + L3_1_UC), + /* Uncached - L3:NoLKUP; GO:L3 */ + MOCS_ENTRY(7, + IG_PAT | L4_3_UC, + L3_1_UC), + /* L4 - L3:NoLKUP; GO:Mem */ + MOCS_ENTRY(8, + IG_PAT, + L3_GLBGO(1) | L3_1_UC), + /* Uncached - L3:NoLKUP; GO:Mem */ + MOCS_ENTRY(9, + IG_PAT | L4_3_UC, + L3_GLBGO(1) | L3_1_UC), + /* Display - L3; L4:WT */ + MOCS_ENTRY(14, + IG_PAT | L4_1_WT, + L3_LKUP(1) | L3_3_WB), + /* CCS - Non-Displayable */ + MOCS_ENTRY(15, + IG_PAT, + L3_GLBGO(1) | L3_1_UC), +}; + static unsigned int get_mocs_settings(struct xe_device *xe, struct xe_mocs_info *info) { @@ -327,11 +388,11 @@ static unsigned int get_mocs_settings(struct xe_device *xe, info->unused_entries_index = 2; break; case XE_METEORLAKE: - info->size = ARRAY_SIZE(dg2_mocs_desc); - info->table = dg2_mocs_desc; + info->size = ARRAY_SIZE(mtl_mocs_desc); + info->table = mtl_mocs_desc; info->n_entries = MTL_NUM_MOCS_ENTRIES; - info->uc_index = 1; - info->unused_entries_index = 3; + info->uc_index = 9; + info->unused_entries_index = 1; break; case XE_DG2: if (xe->info.subplatform == XE_SUBPLATFORM_DG2_G10 && -- cgit v1.2.3 From f659ac1564d96b1ba19694db9899d6fb18ffc3e7 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 23 Feb 2023 10:57:39 -0800 Subject: drm/xe/mocs: LNCF MOCS settings only need to be restored on pre-Xe_HP Reprogramming the LNCF MOCS registers on render domain reset is not intended to be regular driver programming, but rather the implementation of a specific workaround (Wa_1607983814). This workaround no longer applies on Xe_HP any beyond, so we can expect that these registers, like the rest of the LNCF/LBCF registers, will maintain their values through all engine resets. We should only add these registers to the GuC's save/restore list on platforms that need the workaround. Furthermore, xe_mocs_init_engine() appears to be another attempt to satisfy this same workaround. This is unnecessary on the Xe driver since even on platforms where the workaround is necessary, all single-engine resets are initiated by the GuC and thus the GuC will take care of saving/restoring these registers. The only host-initiated resets we have in Xe are full GT resets which will already (re)initialize these registers as part of the regular xe_mocs_init() flow. v2: - Add needs_wa_1607983814() so that calculate_regset_size() doesn't overallocate regset space when the workaround isn't needed. (Lucas) - On platforms affected by Wa_1607983814, only add the LNCF MOCS registers to the render engine's GuC save/restore list; resets of other engines don't need to save/restore these. Cc: Lucas De Marchi Signed-off-by: Matt Roper Reviewed-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_execlist.c | 2 +- drivers/gpu/drm/xe/xe_guc_ads.c | 20 ++++++++++++++++---- drivers/gpu/drm/xe/xe_guc_submit.c | 1 - drivers/gpu/drm/xe/xe_mocs.c | 13 ------------- drivers/gpu/drm/xe/xe_mocs.h | 1 - 5 files changed, 17 insertions(+), 20 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index be47d28da4c7..ae7ee56f1b1b 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -460,7 +460,7 @@ static void execlist_engine_suspend_wait(struct xe_engine *e) static void execlist_engine_resume(struct xe_engine *e) { - xe_mocs_init_engine(e); + /* NIY */ } static const struct xe_engine_ops execlist_engine_ops = { diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 49725093fb47..7a892ff7aba3 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -207,6 +207,11 @@ static void guc_ads_fini(struct drm_device *drm, void *arg) xe_bo_unpin_map_no_vm(ads->bo); } +static bool needs_wa_1607983814(struct xe_device *xe) +{ + return GRAPHICS_VERx100(xe) < 1250; +} + static size_t calculate_regset_size(struct xe_gt *gt) { struct xe_reg_sr_entry *sr_entry; @@ -219,7 +224,10 @@ static size_t calculate_regset_size(struct xe_gt *gt) xa_for_each(&hwe->reg_sr.xa, sr_idx, sr_entry) count++; - count += (ADS_REGSET_EXTRA_MAX + LNCFCMOCS_REG_COUNT) * XE_NUM_HW_ENGINES; + count += ADS_REGSET_EXTRA_MAX * XE_NUM_HW_ENGINES; + + if (needs_wa_1607983814(gt_to_xe(gt))) + count += LNCFCMOCS_REG_COUNT; return count * sizeof(struct guc_mmio_reg); } @@ -431,6 +439,7 @@ static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads, struct iosys_map *regset_map, struct xe_hw_engine *hwe) { + struct xe_device *xe = ads_to_xe(ads); struct xe_hw_engine *hwe_rcs_reset_domain = xe_gt_any_hw_engine_by_reset_domain(hwe->gt, XE_ENGINE_CLASS_RENDER); struct xe_reg_sr_entry *entry; @@ -465,9 +474,12 @@ static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads, e->reg, e->flags, count++); } - for (i = 0; i < LNCFCMOCS_REG_COUNT; i++) { - guc_mmio_regset_write_one(ads, regset_map, - GEN9_LNCFCMOCS(i).reg, 0, count++); + /* Wa_1607983814 */ + if (needs_wa_1607983814(xe) && hwe->class == XE_ENGINE_CLASS_RENDER) { + for (i = 0; i < LNCFCMOCS_REG_COUNT; i++) { + guc_mmio_regset_write_one(ads, regset_map, + GEN9_LNCFCMOCS(i).reg, 0, count++); + } } XE_BUG_ON(ads->regset_size < (count * sizeof(struct guc_mmio_reg))); diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index aa21f2bb5cba..0ba6f5dcd029 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1270,7 +1270,6 @@ static void guc_engine_resume(struct xe_engine *e) XE_BUG_ON(e->guc->suspend_pending); - xe_mocs_init_engine(e); guc_engine_add_msg(e, msg, RESUME); } diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index e00fa2155dc4..ef237853fdab 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -517,19 +517,6 @@ static void init_l3cc_table(struct xe_gt *gt, } } -void xe_mocs_init_engine(const struct xe_engine *engine) -{ - struct xe_mocs_info table; - unsigned int flags; - - flags = get_mocs_settings(engine->gt->xe, &table); - if (!flags) - return; - - if (flags & HAS_RENDER_L3CC && engine->class == XE_ENGINE_CLASS_RENDER) - init_l3cc_table(engine->gt, &table); -} - void xe_mocs_init(struct xe_gt *gt) { struct xe_mocs_info table; diff --git a/drivers/gpu/drm/xe/xe_mocs.h b/drivers/gpu/drm/xe/xe_mocs.h index aba1abe216ab..63500a1d6660 100644 --- a/drivers/gpu/drm/xe/xe_mocs.h +++ b/drivers/gpu/drm/xe/xe_mocs.h @@ -11,7 +11,6 @@ struct xe_engine; struct xe_gt; -void xe_mocs_init_engine(const struct xe_engine *engine); void xe_mocs_init(struct xe_gt *gt); /** -- cgit v1.2.3 From 90385dcfc040648e928a883298a19e2afbba41e5 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 23 Feb 2023 10:57:40 -0800 Subject: drm/xe/mocs: Drop HAS_RENDER_L3CC flag The HAS_RENDER_L3CC is set unconditionally so there's no need to keep it as a dedicated flag. For error checking purposes, we can just make sure the 'table' field is initialized properly. Cc: Lucas De Marchi Suggested-by: Lucas De Marchi Signed-off-by: Matt Roper Reviewed-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_mocs.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index ef237853fdab..e09c6242aafc 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -25,7 +25,6 @@ static inline void mocs_dbg(const struct drm_device *dev, enum { HAS_GLOBAL_MOCS = BIT(0), - HAS_RENDER_L3CC = BIT(1), }; struct xe_mocs_entry { @@ -440,10 +439,11 @@ static unsigned int get_mocs_settings(struct xe_device *xe, */ XE_WARN_ON(info->unused_entries_index == 0); - if (XE_WARN_ON(info->size > info->n_entries)) + if (XE_WARN_ON(info->size > info->n_entries)) { + info->table = NULL; return 0; + } - flags = HAS_RENDER_L3CC; if (!IS_DGFX(xe)) flags |= HAS_GLOBAL_MOCS; @@ -538,6 +538,6 @@ void xe_mocs_init(struct xe_gt *gt) * sure the LNCFCMOCSx registers are programmed for the subsequent * memory transactions including guc transactions */ - if (flags & HAS_RENDER_L3CC) + if (table.table) init_l3cc_table(gt, &table); } -- cgit v1.2.3 From e103c45f501a32eaa9e0a12db1c1e167b06f78cf Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 22 Dec 2022 10:53:59 +0000 Subject: drm/xe: prefer xe_bo_create_pin_map() With small-bar we likely want to annotate all the kernel users that require CPU access with vram. If xe_bo_create_pin_map() is the central place for that then we should have a central place to annotate. This also simplifies the code and fixes what appears to be a double xe_bo_put(hwe->hwsp) in the error handling. Signed-off-by: Matthew Auld Cc: Lucas De Marchi Reviewed-by: Maarten Lankhorst Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_hw_engine.c | 23 +++-------------- drivers/gpu/drm/xe/xe_lrc.c | 53 +++++++++++++-------------------------- drivers/gpu/drm/xe/xe_lrc_types.h | 1 - 3 files changed, 22 insertions(+), 55 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index ae541b5e50f3..b035e2fa6744 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -310,24 +310,14 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe, xe_reg_sr_apply_mmio(&hwe->reg_sr, gt); xe_reg_sr_apply_whitelist(&hwe->reg_whitelist, hwe->mmio_base, gt); - hwe->hwsp = xe_bo_create_locked(xe, gt, NULL, SZ_4K, ttm_bo_type_kernel, - XE_BO_CREATE_VRAM_IF_DGFX(gt) | - XE_BO_CREATE_GGTT_BIT); + hwe->hwsp = xe_bo_create_pin_map(xe, gt, NULL, SZ_4K, ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(gt) | + XE_BO_CREATE_GGTT_BIT); if (IS_ERR(hwe->hwsp)) { err = PTR_ERR(hwe->hwsp); goto err_name; } - err = xe_bo_pin(hwe->hwsp); - if (err) - goto err_unlock_put_hwsp; - - err = xe_bo_vmap(hwe->hwsp); - if (err) - goto err_unpin_hwsp; - - xe_bo_unlock_no_vm(hwe->hwsp); - err = xe_lrc_init(&hwe->kernel_lrc, hwe, NULL, NULL, SZ_16K); if (err) goto err_hwsp; @@ -353,15 +343,10 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe, return 0; -err_unpin_hwsp: - xe_bo_unpin(hwe->hwsp); -err_unlock_put_hwsp: - xe_bo_unlock_no_vm(hwe->hwsp); - xe_bo_put(hwe->hwsp); err_kernel_lrc: xe_lrc_finish(&hwe->kernel_lrc); err_hwsp: - xe_bo_put(hwe->hwsp); + xe_bo_unpin_map_no_vm(hwe->hwsp); err_name: hwe->name = NULL; diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index af4518a82db2..9140b057a5ba 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -615,7 +615,11 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, lrc->flags = 0; - lrc->bo = xe_bo_create_locked(xe, hwe->gt, vm, + /* + * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address + * via VM bind calls. + */ + lrc->bo = xe_bo_create_pin_map(xe, hwe->gt, vm, ring_size + xe_lrc_size(xe, hwe->class), ttm_bo_type_kernel, XE_BO_CREATE_VRAM_IF_DGFX(hwe->gt) | @@ -628,21 +632,6 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, else lrc->full_gt = hwe->gt; - /* - * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address - * via VM bind calls. - */ - err = xe_bo_pin(lrc->bo); - if (err) - goto err_unlock_put_bo; - lrc->flags |= XE_LRC_PINNED; - - err = xe_bo_vmap(lrc->bo); - if (err) - goto err_unpin_bo; - - xe_bo_unlock_vm_held(lrc->bo); - lrc->ring.size = ring_size; lrc->ring.tail = 0; @@ -652,8 +641,8 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, if (!gt->default_lrc[hwe->class]) { init_data = empty_lrc_data(hwe); if (!init_data) { - xe_lrc_finish(lrc); - return -ENOMEM; + err = -ENOMEM; + goto err_lrc_finish; } } @@ -710,12 +699,8 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, return 0; -err_unpin_bo: - if (lrc->flags & XE_LRC_PINNED) - xe_bo_unpin(lrc->bo); -err_unlock_put_bo: - xe_bo_unlock_vm_held(lrc->bo); - xe_bo_put(lrc->bo); +err_lrc_finish: + xe_lrc_finish(lrc); return err; } @@ -724,17 +709,15 @@ void xe_lrc_finish(struct xe_lrc *lrc) struct ww_acquire_ctx ww; xe_hw_fence_ctx_finish(&lrc->fence_ctx); - if (lrc->flags & XE_LRC_PINNED) { - if (lrc->bo->vm) - xe_vm_lock(lrc->bo->vm, &ww, 0, false); - else - xe_bo_lock_no_vm(lrc->bo, NULL); - xe_bo_unpin(lrc->bo); - if (lrc->bo->vm) - xe_vm_unlock(lrc->bo->vm, &ww); - else - xe_bo_unlock_no_vm(lrc->bo); - } + if (lrc->bo->vm) + xe_vm_lock(lrc->bo->vm, &ww, 0, false); + else + xe_bo_lock_no_vm(lrc->bo, NULL); + xe_bo_unpin(lrc->bo); + if (lrc->bo->vm) + xe_vm_unlock(lrc->bo->vm, &ww); + else + xe_bo_unlock_no_vm(lrc->bo); xe_bo_put(lrc->bo); } diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h index 2827efa2091d..8fe08535873d 100644 --- a/drivers/gpu/drm/xe/xe_lrc_types.h +++ b/drivers/gpu/drm/xe/xe_lrc_types.h @@ -25,7 +25,6 @@ struct xe_lrc { /** @flags: LRC flags */ u32 flags; -#define XE_LRC_PINNED BIT(1) /** @ring: submission ring state */ struct { -- cgit v1.2.3 From d79bdcdf06a3b421ac386f3513365f0bf2a5649a Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 22 Dec 2022 10:36:47 +0000 Subject: drm/xe/bo: explicitly reject zero sized BO In the depths of ttm, when allocating the vma node this should result in -ENOSPC it seems. However we should probably rather reject as part of our own ioctl sanity checking, and then treat as programmer error in the lower levels. Signed-off-by: Matthew Auld Cc: Lucas De Marchi Reviewed-by: Maarten Lankhorst Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 3e5393e00b43..09b8db6d7ba3 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -971,6 +971,9 @@ struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, /* Only kernel objects should set GT */ XE_BUG_ON(gt && type != ttm_bo_type_kernel); + if (XE_WARN_ON(!size)) + return ERR_PTR(-EINVAL); + if (!bo) { bo = xe_bo_alloc(); if (IS_ERR(bo)) @@ -1524,6 +1527,9 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_ERR(xe, args->handle)) return -EINVAL; + if (XE_IOCTL_ERR(xe, !args->size)) + return -EINVAL; + if (XE_IOCTL_ERR(xe, args->size > SIZE_MAX)) return -EINVAL; -- cgit v1.2.3 From 3ea9f1f1f699c44b3064006b51566ed6accc6a53 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 24 Feb 2023 16:21:37 -0800 Subject: drm/xe/device: Prefer the drm-managed mutex_init There's inconsistent use of mutex_init(), in xe_device_create(), with several of them never calling mutex_destroy() in xe_device_destroy(). Migrate all of them to drmm_mutex_init(), so the destroy part is automatically called. Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230225002138.1759016-2-lucas.demarchi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 00e8ed235353..8a9f1e5ce34d 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -155,7 +155,6 @@ static void xe_device_destroy(struct drm_device *dev, void *dummy) struct xe_device *xe = to_xe_device(dev); destroy_workqueue(xe->ordered_wq); - mutex_destroy(&xe->persitent_engines.lock); ttm_device_fini(&xe->ttm); } @@ -187,10 +186,10 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, init_waitqueue_head(&xe->ufence_wq); - mutex_init(&xe->usm.lock); + drmm_mutex_init(&xe->drm, &xe->usm.lock); xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC1); - mutex_init(&xe->persitent_engines.lock); + drmm_mutex_init(&xe->drm, &xe->persitent_engines.lock); INIT_LIST_HEAD(&xe->persitent_engines.list); spin_lock_init(&xe->pinned.lock); @@ -200,14 +199,15 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0); - mutex_init(&xe->sb_lock); + drmm_mutex_init(&xe->drm, &xe->sb_lock); xe->enabled_irq_mask = ~0; err = drmm_add_action_or_reset(&xe->drm, xe_device_destroy, NULL); if (err) goto err_put; - mutex_init(&xe->mem_access.lock); + drmm_mutex_init(&xe->drm, &xe->mem_access.lock); + return xe; err_put: -- cgit v1.2.3 From 541623a406fe1fd516ac9564b2388a3ec31610fe Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 1 Mar 2023 17:34:05 -0800 Subject: drm/xe: Fix typo persitent->persistent Fix typo as noticed by Matt Roper: git grep -l persitent | xargs sed -i 's/persitent/persistent/g' ... and then fix coding style issues. Signed-off-by: Lucas De Marchi Reviewed-by: Maarten Lankhorst Link: https://lore.kernel.org/r/20230302013411.3262608-2-lucas.demarchi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 46 ++++++++++++++++++------------------ drivers/gpu/drm/xe/xe_device.h | 6 ++--- drivers/gpu/drm/xe/xe_device_types.h | 8 +++---- drivers/gpu/drm/xe/xe_engine.c | 6 ++--- drivers/gpu/drm/xe/xe_engine_types.h | 6 ++--- drivers/gpu/drm/xe/xe_execlist.c | 2 +- drivers/gpu/drm/xe/xe_guc_submit.c | 2 +- 7 files changed, 38 insertions(+), 38 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 8a9f1e5ce34d..49ce11fc1174 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -51,8 +51,8 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file) return 0; } -static void device_kill_persitent_engines(struct xe_device *xe, - struct xe_file *xef); +static void device_kill_persistent_engines(struct xe_device *xe, + struct xe_file *xef); static void xe_file_close(struct drm_device *dev, struct drm_file *file) { @@ -69,7 +69,7 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file) } mutex_unlock(&xef->engine.lock); mutex_destroy(&xef->engine.lock); - device_kill_persitent_engines(xe, xef); + device_kill_persistent_engines(xe, xef); mutex_lock(&xef->vm.lock); xa_for_each(&xef->vm.xa, idx, vm) @@ -189,8 +189,8 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, drmm_mutex_init(&xe->drm, &xe->usm.lock); xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC1); - drmm_mutex_init(&xe->drm, &xe->persitent_engines.lock); - INIT_LIST_HEAD(&xe->persitent_engines.list); + drmm_mutex_init(&xe->drm, &xe->persistent_engines.lock); + INIT_LIST_HEAD(&xe->persistent_engines.list); spin_lock_init(&xe->pinned.lock); INIT_LIST_HEAD(&xe->pinned.kernel_bo_present); @@ -305,35 +305,35 @@ void xe_device_shutdown(struct xe_device *xe) { } -void xe_device_add_persitent_engines(struct xe_device *xe, struct xe_engine *e) +void xe_device_add_persistent_engines(struct xe_device *xe, struct xe_engine *e) { - mutex_lock(&xe->persitent_engines.lock); - list_add_tail(&e->persitent.link, &xe->persitent_engines.list); - mutex_unlock(&xe->persitent_engines.lock); + mutex_lock(&xe->persistent_engines.lock); + list_add_tail(&e->persistent.link, &xe->persistent_engines.list); + mutex_unlock(&xe->persistent_engines.lock); } -void xe_device_remove_persitent_engines(struct xe_device *xe, - struct xe_engine *e) +void xe_device_remove_persistent_engines(struct xe_device *xe, + struct xe_engine *e) { - mutex_lock(&xe->persitent_engines.lock); - if (!list_empty(&e->persitent.link)) - list_del(&e->persitent.link); - mutex_unlock(&xe->persitent_engines.lock); + mutex_lock(&xe->persistent_engines.lock); + if (!list_empty(&e->persistent.link)) + list_del(&e->persistent.link); + mutex_unlock(&xe->persistent_engines.lock); } -static void device_kill_persitent_engines(struct xe_device *xe, - struct xe_file *xef) +static void device_kill_persistent_engines(struct xe_device *xe, + struct xe_file *xef) { struct xe_engine *e, *next; - mutex_lock(&xe->persitent_engines.lock); - list_for_each_entry_safe(e, next, &xe->persitent_engines.list, - persitent.link) - if (e->persitent.xef == xef) { + mutex_lock(&xe->persistent_engines.lock); + list_for_each_entry_safe(e, next, &xe->persistent_engines.list, + persistent.link) + if (e->persistent.xef == xef) { xe_engine_kill(e); - list_del_init(&e->persitent.link); + list_del_init(&e->persistent.link); } - mutex_unlock(&xe->persitent_engines.lock); + mutex_unlock(&xe->persistent_engines.lock); } void xe_device_wmb(struct xe_device *xe) diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index 263620953c3b..25c5087f5aad 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -37,9 +37,9 @@ int xe_device_probe(struct xe_device *xe); void xe_device_remove(struct xe_device *xe); void xe_device_shutdown(struct xe_device *xe); -void xe_device_add_persitent_engines(struct xe_device *xe, struct xe_engine *e); -void xe_device_remove_persitent_engines(struct xe_device *xe, - struct xe_engine *e); +void xe_device_add_persistent_engines(struct xe_device *xe, struct xe_engine *e); +void xe_device_remove_persistent_engines(struct xe_device *xe, + struct xe_engine *e); void xe_device_wmb(struct xe_device *xe); diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index c9f74dc4c9fd..00b1db28a4b4 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -138,13 +138,13 @@ struct xe_device { struct mutex lock; } usm; - /** @persitent_engines: engines that are closed but still running */ + /** @persistent_engines: engines that are closed but still running */ struct { - /** @lock: protects persitent engines */ + /** @lock: protects persistent engines */ struct mutex lock; - /** @list: list of persitent engines */ + /** @list: list of persistent engines */ struct list_head list; - } persitent_engines; + } persistent_engines; /** @pinned: pinned BO state */ struct { diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c index 519fbbcabdb9..3e40fb6d3f98 100644 --- a/drivers/gpu/drm/xe/xe_engine.c +++ b/drivers/gpu/drm/xe/xe_engine.c @@ -47,7 +47,7 @@ static struct xe_engine *__xe_engine_create(struct xe_device *xe, e->fence_irq = >->fence_irq[hwe->class]; e->ring_ops = gt->ring_ops[hwe->class]; e->ops = gt->engine_ops; - INIT_LIST_HEAD(&e->persitent.link); + INIT_LIST_HEAD(&e->persistent.link); INIT_LIST_HEAD(&e->compute.link); INIT_LIST_HEAD(&e->multi_gt_link); @@ -620,7 +620,7 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data, goto put_engine; } - e->persitent.xef = xef; + e->persistent.xef = xef; mutex_lock(&xef->engine.lock); err = xa_alloc(&xef->engine.xa, &id, e, xa_limit_32b, GFP_KERNEL); @@ -716,7 +716,7 @@ int xe_engine_destroy_ioctl(struct drm_device *dev, void *data, if (!(e->flags & ENGINE_FLAG_PERSISTENT)) xe_engine_kill(e); else - xe_device_add_persitent_engines(xe, e); + xe_device_add_persistent_engines(xe, e); trace_xe_engine_close(e); xe_engine_put(e); diff --git a/drivers/gpu/drm/xe/xe_engine_types.h b/drivers/gpu/drm/xe/xe_engine_types.h index 3dfa1c14e181..2f6f0f2a0a8b 100644 --- a/drivers/gpu/drm/xe/xe_engine_types.h +++ b/drivers/gpu/drm/xe/xe_engine_types.h @@ -94,14 +94,14 @@ struct xe_engine { }; /** - * @persitent: persitent engine state + * @persistent: persistent engine state */ struct { /** @xef: file which this engine belongs to */ struct xe_file *xef; - /** @link: link in list of persitent engines */ + /** @link: link in list of persistent engines */ struct list_head link; - } persitent; + } persistent; union { /** diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index ae7ee56f1b1b..e540e5d287a0 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -400,7 +400,7 @@ static void execlist_engine_fini_async(struct work_struct *w) spin_unlock_irqrestore(&exl->port->lock, flags); if (e->flags & ENGINE_FLAG_PERSISTENT) - xe_device_remove_persitent_engines(gt_to_xe(e->gt), e); + xe_device_remove_persistent_engines(gt_to_xe(e->gt), e); drm_sched_entity_fini(&exl->entity); drm_sched_fini(&exl->sched); kfree(exl); diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 0ba6f5dcd029..d0b48c885fda 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -890,7 +890,7 @@ static void __guc_engine_fini_async(struct work_struct *w) trace_xe_engine_destroy(e); if (e->flags & ENGINE_FLAG_PERSISTENT) - xe_device_remove_persitent_engines(gt_to_xe(e->gt), e); + xe_device_remove_persistent_engines(gt_to_xe(e->gt), e); release_guc_id(guc, e); xe_sched_entity_fini(&ge->entity); xe_sched_fini(&ge->sched); -- cgit v1.2.3 From 63239946bc0101c2b10c119c77cd4b132d2c6484 Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Thu, 2 Mar 2023 08:00:38 -0800 Subject: drm/xe: Fix size of xe_eu_mask_t MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit XE_MAX_DSS_FUSE_REGS was being used to calculate the size of xe_eu_mask_t while it should use XE_MAX_EU_FUSE_REGS. There are no know issues about this but fixing it anyways. Reviewed-by: Lucas De Marchi Signed-off-by: José Roberto de Souza Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index b01edd3fdc4d..74b4e6776bf1 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -31,7 +31,7 @@ enum xe_gt_type { #define XE_MAX_EU_FUSE_REGS 1 typedef unsigned long xe_dss_mask_t[BITS_TO_LONGS(32 * XE_MAX_DSS_FUSE_REGS)]; -typedef unsigned long xe_eu_mask_t[BITS_TO_LONGS(32 * XE_MAX_DSS_FUSE_REGS)]; +typedef unsigned long xe_eu_mask_t[BITS_TO_LONGS(32 * XE_MAX_EU_FUSE_REGS)]; struct xe_mmio_range { u32 start; -- cgit v1.2.3 From cedbc0b75790a1ee4f0bad0124c84b6813c2ef8c Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 6 Mar 2023 13:24:50 -0800 Subject: drm/xe: Fix duplicated setting for register 0x6604 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The following warning shows up for TGL: [drm:xe_reg_sr_add [xe]] *ERROR* Discarding save-restore reg 6604 (clear: 00ff0000, set: 00040000, masked: no): ret=-22 [drm:xe_reg_sr_add [xe]] *ERROR* Discarding save-restore reg 6604 (clear: 00ff0000, set: 00040000, masked: no): ret=-22 That is because the same register is being set both by the WAs and the tunings. Like was done in i915, prefer the tuning over the workaround since that is applicable for more platforms. Also fix the tuning: it was incorrectly using the MCR version of the register, but that only became true in XEHP. References: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/233 Reported-by: José Roberto de Souza Signed-off-by: Lucas De Marchi Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20230306212450.803557-1-lucas.demarchi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 1 - drivers/gpu/drm/xe/xe_tuning.c | 11 ++++++----- drivers/gpu/drm/xe/xe_wa.c | 10 +++------- 3 files changed, 9 insertions(+), 13 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 47377d2167e0..6a728d2809c5 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -70,7 +70,6 @@ #define XEHP_FLAT_CCS_BASE_ADDR MCR_REG(0x4910) #define GEN12_FF_MODE2 _MMIO(0x6604) -#define XEHP_FF_MODE2 MCR_REG(0x6604) #define FF_MODE2_GS_TIMER_MASK REG_GENMASK(31, 24) #define FF_MODE2_GS_TIMER_224 REG_FIELD_PREP(FF_MODE2_GS_TIMER_MASK, 224) #define FF_MODE2_TDS_TIMER_MASK REG_GENMASK(23, 16) diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index 624b257ecfbc..2861a014c85c 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -24,11 +24,12 @@ static const struct xe_rtp_entry gt_tunings[] = { }; static const struct xe_rtp_entry lrc_tunings[] = { - { XE_RTP_NAME("1604555607"), - XE_RTP_RULES(GRAPHICS_VERSION(1200)), - XE_RTP_ACTIONS(FIELD_SET_NO_READ_MASK(XEHP_FF_MODE2, - FF_MODE2_TDS_TIMER_MASK, - FF_MODE2_TDS_TIMER_128)) + { XE_RTP_NAME("Tuning: ganged timer, also known as 16011163337"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)), + /* read verification is ignored due to 1608008084. */ + XE_RTP_ACTIONS(FIELD_SET_NO_READ_MASK(GEN12_FF_MODE2, + FF_MODE2_GS_TIMER_MASK, + FF_MODE2_GS_TIMER_224)) }, {} }; diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index df72b15dfeb0..71e9e1a111f8 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -265,13 +265,9 @@ static const struct xe_rtp_entry lrc_was[] = { GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL, XE_RTP_ACTION_FLAG(MASKED_REG))) }, - { XE_RTP_NAME("16011163337"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)), - /* read verification is ignored due to 1608008084. */ - XE_RTP_ACTIONS(FIELD_SET_NO_READ_MASK(GEN12_FF_MODE2, - FF_MODE2_GS_TIMER_MASK, - FF_MODE2_GS_TIMER_224)) - }, + + /* DG1 */ + { XE_RTP_NAME("1409044764"), XE_RTP_RULES(PLATFORM(DG1)), XE_RTP_ACTIONS(CLR(GEN11_COMMON_SLICE_CHICKEN3, -- cgit v1.2.3 From 7dae750dde42459483054384a5d234b54e643cdd Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 6 Mar 2023 08:57:57 -0800 Subject: drm/xe: Fix ROW_CHICKEN2 define When this register was added in xe for some workarounds, it was copied from i915 before the registers got changed to add the MCR annotation. The register 0xe4f4 is MCR since gen8, long before any GPU supported by the xe driver. Replace all occurrences with the right register. Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230306165757.633796-1-lucas.demarchi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 2 +- drivers/gpu/drm/xe/xe_wa.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 6a728d2809c5..d3b862e4cd0d 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -230,7 +230,7 @@ #define THREAD_EX_ARB_MODE REG_GENMASK(3, 2) #define THREAD_EX_ARB_MODE_RR_AFTER_DEP REG_FIELD_PREP(THREAD_EX_ARB_MODE, 0x2) -#define GEN7_ROW_CHICKEN2 _MMIO(0xe4f4) +#define GEN8_ROW_CHICKEN2 MCR_REG(0xe4f4) #define GEN12_DISABLE_READ_SUPPRESSION REG_BIT(15) #define GEN12_DISABLE_EARLY_READ REG_BIT(14) #define GEN12_ENABLE_LARGE_GRF_MODE REG_BIT(12) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 71e9e1a111f8..03c5b01a14e4 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -191,7 +191,7 @@ static const struct xe_rtp_entry engine_was[] = { }, { XE_RTP_NAME("1606931601"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(GEN7_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ, + XE_RTP_ACTIONS(SET(GEN8_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ, XE_RTP_ACTION_FLAG(MASKED_REG))) }, { XE_RTP_NAME("22010931296, 18011464164, 14010919138"), @@ -213,7 +213,7 @@ static const struct xe_rtp_entry engine_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(1200), ENGINE_CLASS(RENDER), IS_INTEGRATED), - XE_RTP_ACTIONS(SET(GEN7_ROW_CHICKEN2, GEN12_PUSH_CONST_DEREF_HOLD_DIS, + XE_RTP_ACTIONS(SET(GEN8_ROW_CHICKEN2, GEN12_PUSH_CONST_DEREF_HOLD_DIS, XE_RTP_ACTION_FLAG(MASKED_REG))) }, { XE_RTP_NAME("14010229206, 1409085225"), -- cgit v1.2.3 From 282c683a56e9713a3b70c4cffd17cb48bdbacca2 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Thu, 2 Mar 2023 09:54:59 +0100 Subject: drm/xe/tests: Remove CONFIG_FB dependency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We currently don't have any tests that explicitly depends on this config option, so remove that build dependency. Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Kconfig.debug | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Kconfig.debug b/drivers/gpu/drm/xe/Kconfig.debug index 9c773dd74cbd..565be3f6b9b9 100644 --- a/drivers/gpu/drm/xe/Kconfig.debug +++ b/drivers/gpu/drm/xe/Kconfig.debug @@ -64,7 +64,6 @@ config DRM_XE_SIMPLE_ERROR_CAPTURE config DRM_XE_KUNIT_TEST tristate "KUnit tests for the drm xe driver" if !KUNIT_ALL_TESTS depends on DRM_XE && KUNIT && DEBUG_FS - depends on FB && FB = DRM_KMS_HELPER && DRM_FBDEV_EMULATION default KUNIT_ALL_TESTS select DRM_EXPORT_FOR_TESTS if m help -- cgit v1.2.3 From 907a319c8c8e125224b088f91f468f549f1e1da7 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Thu, 2 Mar 2023 10:01:41 +0100 Subject: drm/xe/tests: Grab a memory access reference around the migrate sanity test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It appears we don't hold a memory access reference for the accesses in this test, which may results in printed warnings and possibly the GT not woken up for the memory accesses. Add a memory access reference around the test. Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_migrate.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index 0de17e90aba9..b7e4a126e8b7 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -366,7 +366,9 @@ static int migrate_test_run_device(struct xe_device *xe) kunit_info(test, "Testing gt id %d.\n", id); xe_vm_lock(m->eng->vm, &ww, 0, true); + xe_device_mem_access_get(xe); xe_migrate_sanity_test(m, test); + xe_device_mem_access_put(xe); xe_vm_unlock(m->eng->vm, &ww); } -- cgit v1.2.3 From b99cb6216bdf350e2d94c547c27f063b4434ae5d Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Mon, 6 Mar 2023 05:34:59 -0800 Subject: drm/xe/migrate: Fix number of PT structs in docbook Update xe_migrate_doc.h with 32 page table structs (not 48) v2: minor typo fix Signed-off-by: Niranjana Vishwanathapura Reviewed-by: Maarten Lankhorst Signed-off-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230306133459.7803-1-niranjana.vishwanathapura@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_migrate_doc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_migrate_doc.h b/drivers/gpu/drm/xe/xe_migrate_doc.h index 6a68fdff08dc..63c7d67b5b62 100644 --- a/drivers/gpu/drm/xe/xe_migrate_doc.h +++ b/drivers/gpu/drm/xe/xe_migrate_doc.h @@ -21,7 +21,7 @@ * table BOs for updates, and identity map the entire device's VRAM with 1 GB * pages. * - * Currently the page structure consists of 48 phyiscal pages with 16 being + * Currently the page structure consists of 32 physical pages with 16 being * reserved for BO mapping during copies and clear, 1 reserved for kernel binds, * several pages are needed to setup the identity mappings (exact number based * on how many bits of address space the device has), and the rest are reserved -- cgit v1.2.3 From 6db7761bbca649319096431c38670c596107596d Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Wed, 8 Mar 2023 17:23:22 +0100 Subject: drm/xe/stolen: Exclude reserved lmem portion The address set by firmware in GEN12_DSMBASE in driver initialization doesn't mean "anything above that and until end of lmem is part of DSM". In fact, there may be a few KB that is not part of DSM on the end of lmem. How large is that space is platform-dependent, but since it's always less than the DSM granularity, it can be simplified by simply aligning the size down. Suggested-by: Lucas De Marchi Signed-off-by: Nirmoy Das Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index 2e8d07ad42ae..1116e217ebc2 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -88,7 +88,13 @@ static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) if (mgr->stolen_base + stolen_size <= pci_resource_len(pdev, 2)) mgr->io_base = pci_resource_start(pdev, 2) + mgr->stolen_base; - return stolen_size; + /* + * There may be few KB of platform dependent reserved memory at the end + * of lmem which is not part of the DSM. Such reserved memory portion is + * always less then DSM granularity so align down the stolen_size to DSM + * granularity to accommodate such reserve lmem portion. + */ + return ALIGN_DOWN(stolen_size, SZ_1M); } static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) -- cgit v1.2.3 From 39fd0b4507c3ba86ef04827208dd3aa85d2d796e Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 7 Mar 2023 16:55:08 -0800 Subject: drm/xe/guc: Handle regset overflow check for entire GT Checking whether a single engine's register save/restore entries overflow the expected/pre-allocated GuC ADS regset area isn't terribly useful; we actually want to check whether the combined entries from all engines on the GT overflow the regset space. Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230308005509.2975663-1-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_ads.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 7a892ff7aba3..fd9911ffeae4 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -482,8 +482,6 @@ static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads, } } - XE_BUG_ON(ads->regset_size < (count * sizeof(struct guc_mmio_reg))); - return count; } @@ -496,6 +494,7 @@ static void guc_mmio_reg_state_init(struct xe_guc_ads *ads) u32 addr = xe_bo_ggtt_addr(ads->bo) + regset_offset; struct iosys_map regset_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), regset_offset); + unsigned int regset_used = 0; for_each_hw_engine(hwe, gt, id) { unsigned int count; @@ -521,7 +520,11 @@ static void guc_mmio_reg_state_init(struct xe_guc_ads *ads) addr += count * sizeof(struct guc_mmio_reg); iosys_map_incr(®set_map, count * sizeof(struct guc_mmio_reg)); + + regset_used += count * sizeof(struct guc_mmio_reg); } + + XE_BUG_ON(regset_used > ads->regset_size); } static void guc_um_init_params(struct xe_guc_ads *ads) -- cgit v1.2.3 From 2a8477f7614a62b41b034e3eaf017d41e8a58ce9 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 8 Mar 2023 12:30:08 +0000 Subject: drm/xe: s/lmem/vram/ This seems to be the preferred nomenclature in xe. Currently we are intermixing vram and lmem, which is confusing. v2 (Gwan-gyeong Mun & Lucas): - Rather apply to the entire driver Signed-off-by: Matthew Auld Cc: Maarten Lankhorst Cc: Gwan-gyeong Mun Cc: Lucas De Marchi Acked-by: Lucas De Marchi Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Kconfig.debug | 2 +- drivers/gpu/drm/xe/tests/xe_migrate.c | 2 +- drivers/gpu/drm/xe/xe_bo.c | 10 ++++----- drivers/gpu/drm/xe/xe_bo.h | 6 +++--- drivers/gpu/drm/xe/xe_ggtt.c | 6 +++--- drivers/gpu/drm/xe/xe_migrate.c | 12 +++++------ drivers/gpu/drm/xe/xe_mmio.c | 40 +++++++++++++++++------------------ drivers/gpu/drm/xe/xe_module.c | 6 +++--- drivers/gpu/drm/xe/xe_module.h | 2 +- drivers/gpu/drm/xe/xe_pt.c | 12 +++++------ drivers/gpu/drm/xe/xe_vm.c | 10 ++++----- 11 files changed, 54 insertions(+), 54 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Kconfig.debug b/drivers/gpu/drm/xe/Kconfig.debug index 565be3f6b9b9..93b284cdd0a2 100644 --- a/drivers/gpu/drm/xe/Kconfig.debug +++ b/drivers/gpu/drm/xe/Kconfig.debug @@ -41,7 +41,7 @@ config DRM_XE_DEBUG_VM If in doubt, say "N". config DRM_XE_DEBUG_MEM - bool "Enable passing SYS/LMEM addresses to user space" + bool "Enable passing SYS/VRAM addresses to user space" default n help Pass object location trough uapi. Intended for extended diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index b7e4a126e8b7..ac659b94e7f5 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -129,7 +129,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, } dma_fence_put(fence); - /* Try to copy 0xc0 from sysmem to lmem with 2MB or 64KiB/4KiB pages */ + /* Try to copy 0xc0 from sysmem to vram with 2MB or 64KiB/4KiB pages */ xe_map_memset(xe, &sysmem->vmap, 0, 0xc0, sysmem->size); xe_map_memset(xe, &bo->vmap, 0, 0xd0, bo->size); diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 09b8db6d7ba3..cfb79519b673 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1299,12 +1299,12 @@ int xe_bo_pin(struct xe_bo *bo) if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) && bo->flags & XE_BO_INTERNAL_TEST)) { struct ttm_place *place = &(bo->placements[0]); - bool lmem; + bool vram; if (mem_type_is_vram(place->mem_type)) { XE_BUG_ON(!(place->flags & TTM_PL_FLAG_CONTIGUOUS)); - place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE, &lmem) - + place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE, &vram) - vram_region_io_offset(bo)) >> PAGE_SHIFT; place->lpfn = place->fpfn + (bo->size >> PAGE_SHIFT); @@ -1424,7 +1424,7 @@ bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo) } dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, - size_t page_size, bool *is_lmem) + size_t page_size, bool *is_vram) { struct xe_res_cursor cur; u64 page; @@ -1436,9 +1436,9 @@ dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, page = offset >> PAGE_SHIFT; offset &= (PAGE_SIZE - 1); - *is_lmem = xe_bo_is_vram(bo); + *is_vram = xe_bo_is_vram(bo); - if (!*is_lmem && !xe_bo_is_stolen(bo)) { + if (!*is_vram && !xe_bo_is_stolen(bo)) { XE_BUG_ON(!bo->ttm.ttm); xe_res_first_sg(xe_bo_get_sg(bo), page << PAGE_SHIFT, diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 8c2cdbe51ab5..4350845542bf 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -196,14 +196,14 @@ static inline void xe_bo_unpin_map_no_vm(struct xe_bo *bo) bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo); dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, - size_t page_size, bool *is_lmem); + size_t page_size, bool *is_vram); static inline dma_addr_t xe_bo_main_addr(struct xe_bo *bo, size_t page_size) { - bool is_lmem; + bool is_vram; - return xe_bo_addr(bo, 0, page_size, &is_lmem); + return xe_bo_addr(bo, 0, page_size, &is_vram); } static inline u32 diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index d6ebc1d77f4d..99bc9036c7a0 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -28,12 +28,12 @@ u64 xe_ggtt_pte_encode(struct xe_bo *bo, u64 bo_offset) { struct xe_device *xe = xe_bo_device(bo); u64 pte; - bool is_lmem; + bool is_vram; - pte = xe_bo_addr(bo, bo_offset, GEN8_PAGE_SIZE, &is_lmem); + pte = xe_bo_addr(bo, bo_offset, GEN8_PAGE_SIZE, &is_vram); pte |= GEN8_PAGE_PRESENT; - if (is_lmem) + if (is_vram) pte |= GEN12_GGTT_PTE_LM; /* FIXME: vfunc + pass in caching rules */ diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 79aa3508ae3e..4a9fe1f7128d 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -222,15 +222,15 @@ static int xe_migrate_prepare_vm(struct xe_gt *gt, struct xe_migrate *m, level++; } } else { - bool is_lmem; - u64 batch_addr = xe_bo_addr(batch, 0, GEN8_PAGE_SIZE, &is_lmem); + bool is_vram; + u64 batch_addr = xe_bo_addr(batch, 0, GEN8_PAGE_SIZE, &is_vram); m->batch_base_ofs = xe_migrate_vram_ofs(batch_addr); if (xe->info.supports_usm) { batch = gt->usm.bb_pool.bo; batch_addr = xe_bo_addr(batch, 0, GEN8_PAGE_SIZE, - &is_lmem); + &is_vram); m->usm_batch_base_ofs = xe_migrate_vram_ofs(batch_addr); } } @@ -933,12 +933,12 @@ static void write_pgtable(struct xe_gt *gt, struct xe_bb *bb, u64 ppgtt_ofs, */ XE_BUG_ON(update->qwords > 0x1ff); if (!ppgtt_ofs) { - bool is_lmem; + bool is_vram; ppgtt_ofs = xe_migrate_vram_ofs(xe_bo_addr(update->pt_bo, 0, GEN8_PAGE_SIZE, - &is_lmem)); - XE_BUG_ON(!is_lmem); + &is_vram)); + XE_BUG_ON(!is_vram); } do { diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 65b0df9bb579..e5bd4609aaee 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -68,7 +68,7 @@ _resize_bar(struct xe_device *xe, int resno, resource_size_t size) return 1; } -static int xe_resize_lmem_bar(struct xe_device *xe, resource_size_t lmem_size) +static int xe_resize_vram_bar(struct xe_device *xe, resource_size_t vram_size) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); struct pci_bus *root = pdev->bus; @@ -78,31 +78,31 @@ static int xe_resize_lmem_bar(struct xe_device *xe, resource_size_t lmem_size) u32 pci_cmd; int i; int ret; - u64 force_lmem_bar_size = xe_force_lmem_bar_size; + u64 force_vram_bar_size = xe_force_vram_bar_size; current_size = roundup_pow_of_two(pci_resource_len(pdev, GEN12_LMEM_BAR)); - if (force_lmem_bar_size) { + if (force_vram_bar_size) { u32 bar_sizes; - rebar_size = force_lmem_bar_size * (resource_size_t)SZ_1M; + rebar_size = force_vram_bar_size * (resource_size_t)SZ_1M; bar_sizes = pci_rebar_get_possible_sizes(pdev, GEN12_LMEM_BAR); if (rebar_size == current_size) return 0; if (!(bar_sizes & BIT(pci_rebar_bytes_to_size(rebar_size))) || - rebar_size >= roundup_pow_of_two(lmem_size)) { - rebar_size = lmem_size; + rebar_size >= roundup_pow_of_two(vram_size)) { + rebar_size = vram_size; drm_info(&xe->drm, "Given bar size is not within supported size, setting it to default: %llu\n", - (u64)lmem_size >> 20); + (u64)vram_size >> 20); } } else { rebar_size = current_size; - if (rebar_size != roundup_pow_of_two(lmem_size)) - rebar_size = lmem_size; + if (rebar_size != roundup_pow_of_two(vram_size)) + rebar_size = vram_size; else return 0; } @@ -117,7 +117,7 @@ static int xe_resize_lmem_bar(struct xe_device *xe, resource_size_t lmem_size) } if (!root_res) { - drm_info(&xe->drm, "Can't resize LMEM BAR - platform support is missing\n"); + drm_info(&xe->drm, "Can't resize VRAM BAR - platform support is missing\n"); return -1; } @@ -168,7 +168,7 @@ int xe_mmio_total_vram_size(struct xe_device *xe, u64 *vram_size, u64 *usable_si if (usable_size) { reg = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR); *usable_size = (u64)REG_FIELD_GET(GENMASK(31, 8), reg) * SZ_64K; - drm_info(&xe->drm, "lmem_size: 0x%llx usable_size: 0x%llx\n", + drm_info(&xe->drm, "vram_size: 0x%llx usable_size: 0x%llx\n", *vram_size, *usable_size); } @@ -180,7 +180,7 @@ int xe_mmio_probe_vram(struct xe_device *xe) struct pci_dev *pdev = to_pci_dev(xe->drm.dev); struct xe_gt *gt; u8 id; - u64 lmem_size; + u64 vram_size; u64 original_size; u64 current_size; u64 usable_size; @@ -207,29 +207,29 @@ int xe_mmio_probe_vram(struct xe_device *xe) gt = xe_device_get_gt(xe, 0); original_size = pci_resource_len(pdev, GEN12_LMEM_BAR); - err = xe_mmio_total_vram_size(xe, &lmem_size, &usable_size); + err = xe_mmio_total_vram_size(xe, &vram_size, &usable_size); if (err) return err; - resize_result = xe_resize_lmem_bar(xe, lmem_size); + resize_result = xe_resize_vram_bar(xe, vram_size); current_size = pci_resource_len(pdev, GEN12_LMEM_BAR); xe->mem.vram.io_start = pci_resource_start(pdev, GEN12_LMEM_BAR); - xe->mem.vram.size = min(current_size, lmem_size); + xe->mem.vram.size = min(current_size, vram_size); if (!xe->mem.vram.size) return -EIO; if (resize_result > 0) - drm_info(&xe->drm, "Successfully resize LMEM from %lluMiB to %lluMiB\n", + drm_info(&xe->drm, "Successfully resize VRAM from %lluMiB to %lluMiB\n", (u64)original_size >> 20, (u64)current_size >> 20); - else if (xe->mem.vram.size < lmem_size && !xe_force_lmem_bar_size) + else if (xe->mem.vram.size < vram_size && !xe_force_vram_bar_size) drm_info(&xe->drm, "Using a reduced BAR size of %lluMiB. Consider enabling 'Resizable BAR' support in your BIOS.\n", (u64)xe->mem.vram.size >> 20); - if (xe->mem.vram.size < lmem_size) + if (xe->mem.vram.size < vram_size) drm_warn(&xe->drm, "Restricting VRAM size to PCI resource size (0x%llx->0x%llx)\n", - lmem_size, (u64)xe->mem.vram.size); + vram_size, (u64)xe->mem.vram.size); xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, xe->mem.vram.size); xe->mem.vram.size = min_t(u64, xe->mem.vram.size, usable_size); @@ -360,7 +360,7 @@ int xe_mmio_init(struct xe_device *xe) * and we should not continue with driver initialization. */ if (IS_DGFX(xe) && !(xe_mmio_read32(gt, GU_CNTL.reg) & LMEM_INIT)) { - drm_err(&xe->drm, "LMEM not initialized by firmware\n"); + drm_err(&xe->drm, "VRAM not initialized by firmware\n"); return -ENODEV; } diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index 3f5d03a58696..e8ee7a9b0878 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -18,9 +18,9 @@ bool enable_guc = true; module_param_named_unsafe(enable_guc, enable_guc, bool, 0444); MODULE_PARM_DESC(enable_guc, "Enable GuC submission"); -u32 xe_force_lmem_bar_size; -module_param_named(lmem_bar_size, xe_force_lmem_bar_size, uint, 0600); -MODULE_PARM_DESC(lmem_bar_size, "Set the lmem bar size(in MiB)"); +u32 xe_force_vram_bar_size; +module_param_named(vram_bar_size, xe_force_vram_bar_size, uint, 0600); +MODULE_PARM_DESC(vram_bar_size, "Set the vram bar size(in MiB)"); int xe_guc_log_level = 5; module_param_named(guc_log_level, xe_guc_log_level, int, 0600); diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h index 2c6ee46f5595..86916c176382 100644 --- a/drivers/gpu/drm/xe/xe_module.h +++ b/drivers/gpu/drm/xe/xe_module.h @@ -8,6 +8,6 @@ /* Module modprobe variables */ extern bool enable_guc; extern bool enable_display; -extern u32 xe_force_lmem_bar_size; +extern u32 xe_force_vram_bar_size; extern int xe_guc_log_level; extern char *xe_param_force_probe; diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 00d9fff53828..64da98152455 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -61,12 +61,12 @@ u64 gen8_pde_encode(struct xe_bo *bo, u64 bo_offset, const enum xe_cache_level level) { u64 pde; - bool is_lmem; + bool is_vram; - pde = xe_bo_addr(bo, bo_offset, GEN8_PAGE_SIZE, &is_lmem); + pde = xe_bo_addr(bo, bo_offset, GEN8_PAGE_SIZE, &is_vram); pde |= GEN8_PAGE_PRESENT | GEN8_PAGE_RW; - XE_WARN_ON(IS_DGFX(xe_bo_device(bo)) && !is_lmem); + XE_WARN_ON(IS_DGFX(xe_bo_device(bo)) && !is_vram); /* FIXME: I don't think the PPAT handling is correct for MTL */ @@ -79,13 +79,13 @@ u64 gen8_pde_encode(struct xe_bo *bo, u64 bo_offset, } static dma_addr_t vma_addr(struct xe_vma *vma, u64 offset, - size_t page_size, bool *is_lmem) + size_t page_size, bool *is_vram) { if (xe_vma_is_userptr(vma)) { struct xe_res_cursor cur; u64 page; - *is_lmem = false; + *is_vram = false; page = offset >> PAGE_SHIFT; offset &= (PAGE_SIZE - 1); @@ -93,7 +93,7 @@ static dma_addr_t vma_addr(struct xe_vma *vma, u64 offset, &cur); return xe_res_dma(&cur) + offset; } else { - return xe_bo_addr(vma->bo, offset, page_size, is_lmem); + return xe_bo_addr(vma->bo, offset, page_size, is_vram); } } diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index fcac31f11706..a8254a4148f7 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3379,7 +3379,7 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id) { struct rb_node *node; - bool is_lmem; + bool is_vram; uint64_t addr; if (!down_read_trylock(&vm->lock)) { @@ -3387,8 +3387,8 @@ int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id) return 0; } if (vm->pt_root[gt_id]) { - addr = xe_bo_addr(vm->pt_root[gt_id]->bo, 0, GEN8_PAGE_SIZE, &is_lmem); - drm_printf(p, " VM root: A:0x%llx %s\n", addr, is_lmem ? "LMEM" : "SYS"); + addr = xe_bo_addr(vm->pt_root[gt_id]->bo, 0, GEN8_PAGE_SIZE, &is_vram); + drm_printf(p, " VM root: A:0x%llx %s\n", addr, is_vram ? "VRAM" : "SYS"); } for (node = rb_first(&vm->vmas); node; node = rb_next(node)) { @@ -3401,11 +3401,11 @@ int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id) xe_res_first_sg(vma->userptr.sg, 0, GEN8_PAGE_SIZE, &cur); addr = xe_res_dma(&cur); } else { - addr = xe_bo_addr(vma->bo, 0, GEN8_PAGE_SIZE, &is_lmem); + addr = xe_bo_addr(vma->bo, 0, GEN8_PAGE_SIZE, &is_vram); } drm_printf(p, " [%016llx-%016llx] S:0x%016llx A:%016llx %s\n", vma->start, vma->end, vma->end - vma->start + 1ull, - addr, is_userptr ? "USR" : is_lmem ? "VRAM" : "SYS"); + addr, is_userptr ? "USR" : is_vram ? "VRAM" : "SYS"); } up_read(&vm->lock); -- cgit v1.2.3 From 7c7225ddaa343a3f380f8b92cd2b30e1b5701cb1 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 8 Mar 2023 16:55:29 -0800 Subject: drm/xe: Separate engine fuse handling into dedicated functions The single function to handle fuse registers for all types of engines is becoming a bit long and hard to follow (and we haven't even added the compute engines yet). Let's split it into dedicated functions for each engine class. v2: - Add note about BCS0 always being present. (Bala) - Add forcewake assertion to read_copy_fuses. (Bala) Cc: Balasubramani Vivekanandan Reviewed-by: Balasubramani Vivekanandan Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230309005530.3140173-1-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_hw_engine.c | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index b035e2fa6744..abfd35491b47 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -369,29 +369,29 @@ static void hw_engine_setup_logical_mapping(struct xe_gt *gt) } } -static void read_fuses(struct xe_gt *gt) +static void read_media_fuses(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); u32 media_fuse; u16 vdbox_mask; u16 vebox_mask; - u32 bcs_mask; int i, j; xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); + media_fuse = xe_mmio_read32(gt, GEN11_GT_VEBOX_VDBOX_DISABLE.reg); + /* - * FIXME: Hack job, thinking we should have table of vfuncs for each - * class which picks the correct vfunc based on IP version. + * Pre-Xe_HP platforms had register bits representing absent engines, + * whereas Xe_HP and beyond have bits representing present engines. + * Invert the polarity on old platforms so that we can use common + * handling below. */ - - media_fuse = xe_mmio_read32(gt, GEN11_GT_VEBOX_VDBOX_DISABLE.reg); if (GRAPHICS_VERx100(xe) < 1250) media_fuse = ~media_fuse; - vdbox_mask = media_fuse & GEN11_GT_VDBOX_DISABLE_MASK; - vebox_mask = (media_fuse & GEN11_GT_VEBOX_DISABLE_MASK) >> - GEN11_GT_VEBOX_DISABLE_SHIFT; + vdbox_mask = REG_FIELD_GET(GEN11_GT_VDBOX_DISABLE_MASK, media_fuse); + vebox_mask = REG_FIELD_GET(GEN11_GT_VEBOX_DISABLE_MASK, media_fuse); for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) { if (!(gt->info.engine_mask & BIT(i))) @@ -412,28 +412,37 @@ static void read_fuses(struct xe_gt *gt) drm_info(&xe->drm, "vecs%u fused off\n", j); } } +} + +static void read_copy_fuses(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + u32 bcs_mask; + + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); bcs_mask = xe_mmio_read32(gt, GEN10_MIRROR_FUSE3.reg); bcs_mask = REG_FIELD_GET(GEN12_MEML3_EN_MASK, bcs_mask); - for (i = XE_HW_ENGINE_BCS1, j = 0; i <= XE_HW_ENGINE_BCS8; ++i, ++j) { + /* BCS0 is always present; only BCS1-BCS8 may be fused off */ + for (int i = XE_HW_ENGINE_BCS1, j = 0; i <= XE_HW_ENGINE_BCS8; ++i, ++j) { if (!(gt->info.engine_mask & BIT(i))) continue; - if (!(BIT(j/2) & bcs_mask)) { + if (!(BIT(j / 2) & bcs_mask)) { gt->info.engine_mask &= ~BIT(i); drm_info(&xe->drm, "bcs%u fused off\n", j); } } - - /* TODO: compute engines */ } int xe_hw_engines_init_early(struct xe_gt *gt) { int i; - read_fuses(gt); + read_media_fuses(gt); + read_copy_fuses(gt); + /* TODO: compute engines */ for (i = 0; i < ARRAY_SIZE(gt->hw_engines); i++) hw_engine_init_early(gt, >->hw_engines[i], i); -- cgit v1.2.3 From 13fb0c98723f54a884090864983fff4953deb185 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 8 Mar 2023 16:55:30 -0800 Subject: drm/xe: Add support for CCS engine fusing For Xe_HP platforms that can have multiple CCS engines, the presence/absence of each CCS is inferred by the presence/absence of any DSS in the corresponding quadrant of the GT's DSS mask. This handling is only needed on platforms that can have more than one CCS. The CCS is never fused off on platforms like MTL that can only have one. v2: - Add extra warnings to try to catch mistakes where the register counts in get_num_dss_regs() are updated without corresponding updates to the register parameters passed to load_dss_mask(). (Lucas) - Add kerneldoc for xe_gt_topology_has_dss_in_quadrant() and clarify why we care about quadrants of the DSS space. (Lucas) - Ensure CCS engine counting treats engine mask as 64-bit. (Lucas) Cc: Lucas De Marchi Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230309005530.3140173-2-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_topology.c | 65 ++++++++++++++++++++++++++++++------- drivers/gpu/drm/xe/xe_gt_topology.h | 3 ++ drivers/gpu/drm/xe/xe_hw_engine.c | 29 ++++++++++++++++- 3 files changed, 85 insertions(+), 12 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c index 2123f84be336..f2cbee53462b 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.c +++ b/drivers/gpu/drm/xe/xe_gt_topology.c @@ -62,6 +62,21 @@ load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask) bitmap_from_arr32(mask, &val, XE_MAX_EU_FUSE_BITS); } +static void +get_num_dss_regs(struct xe_device *xe, int *geometry_regs, int *compute_regs) +{ + if (GRAPHICS_VERx100(xe) == 1260) { + *geometry_regs = 0; + *compute_regs = 2; + } else if (GRAPHICS_VERx100(xe) >= 1250) { + *geometry_regs = 1; + *compute_regs = 1; + } else { + *geometry_regs = 1; + *compute_regs = 0; + } +} + void xe_gt_topology_init(struct xe_gt *gt) { @@ -69,18 +84,17 @@ xe_gt_topology_init(struct xe_gt *gt) struct drm_printer p = drm_debug_printer("GT topology"); int num_geometry_regs, num_compute_regs; - if (GRAPHICS_VERx100(xe) == 1260) { - num_geometry_regs = 0; - num_compute_regs = 2; - } else if (GRAPHICS_VERx100(xe) >= 1250) { - num_geometry_regs = 1; - num_compute_regs = 1; - } else { - num_geometry_regs = 1; - num_compute_regs = 0; - } + get_num_dss_regs(xe, &num_geometry_regs, &num_compute_regs); - load_dss_mask(gt, gt->fuse_topo.g_dss_mask, num_geometry_regs, + /* + * Register counts returned shouldn't exceed the number of registers + * passed as parameters below. + */ + drm_WARN_ON(&xe->drm, num_geometry_regs > 1); + drm_WARN_ON(&xe->drm, num_compute_regs > 2); + + load_dss_mask(gt, gt->fuse_topo.g_dss_mask, + num_geometry_regs, XELP_GT_GEOMETRY_DSS_ENABLE.reg); load_dss_mask(gt, gt->fuse_topo.c_dss_mask, num_compute_regs, XEHP_GT_COMPUTE_DSS_ENABLE.reg, @@ -113,3 +127,32 @@ xe_dss_mask_group_ffs(xe_dss_mask_t mask, int groupsize, int groupnum) { return find_next_bit(mask, XE_MAX_DSS_FUSE_BITS, groupnum * groupsize); } + +/** + * xe_gt_topology_has_dss_in_quadrant - check fusing of DSS in GT quadrant + * @gt: GT to check + * @quad: Which quadrant of the DSS space to check + * + * Since Xe_HP platforms can have up to four CCS engines, those engines + * are each logically associated with a quarter of the possible DSS. If there + * are no DSS present in one of the four quadrants of the DSS space, the + * corresponding CCS engine is also not available for use. + * + * Returns false if all DSS in a quadrant of the GT are fused off, else true. + */ +bool xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad) +{ + struct xe_device *xe = gt_to_xe(gt); + xe_dss_mask_t all_dss; + int g_dss_regs, c_dss_regs, dss_per_quad, quad_first; + + bitmap_or(all_dss, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask, + XE_MAX_DSS_FUSE_BITS); + + get_num_dss_regs(xe, &g_dss_regs, &c_dss_regs); + dss_per_quad = 32 * max(g_dss_regs, c_dss_regs) / 4; + + quad_first = xe_dss_mask_group_ffs(all_dss, dss_per_quad, quad); + + return quad_first < (quad + 1) * dss_per_quad; +} diff --git a/drivers/gpu/drm/xe/xe_gt_topology.h b/drivers/gpu/drm/xe/xe_gt_topology.h index b2540dc266f2..f47ab1b1269c 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.h +++ b/drivers/gpu/drm/xe/xe_gt_topology.h @@ -17,4 +17,7 @@ void xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p); unsigned int xe_dss_mask_group_ffs(xe_dss_mask_t mask, int groupsize, int groupnum); +bool +xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad); + #endif /* _XE_GT_TOPOLOGY_H_ */ diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index abfd35491b47..63a4efd5edcc 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -436,13 +436,40 @@ static void read_copy_fuses(struct xe_gt *gt) } } +static void read_compute_fuses(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + /* + * CCS fusing based on DSS masks only applies to platforms that can + * have more than one CCS. + */ + if (hweight64(gt->info.engine_mask & + GENMASK_ULL(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0)) <= 1) + return; + + /* + * CCS availability on Xe_HP is inferred from the presence of DSS in + * each quadrant. + */ + for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) { + if (!(gt->info.engine_mask & BIT(i))) + continue; + + if (!xe_gt_topology_has_dss_in_quadrant(gt, j)) { + gt->info.engine_mask &= ~BIT(i); + drm_info(&xe->drm, "ccs%u fused off\n", j); + } + } +} + int xe_hw_engines_init_early(struct xe_gt *gt) { int i; read_media_fuses(gt); read_copy_fuses(gt); - /* TODO: compute engines */ + read_compute_fuses(gt); for (i = 0; i < ARRAY_SIZE(gt->hw_engines); i++) hw_engine_init_early(gt, >->hw_engines[i], i); -- cgit v1.2.3 From 4b1430f77553ca3e4f9033d4d614b193da233a30 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Wed, 8 Mar 2023 19:49:22 +0100 Subject: drm/xe/vm: Use the correct vma destroy sequence on userptr failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the below warning by using the correct vma destroy sequence: [ 92.204921] ------------[ cut here ]------------ [ 92.204954] WARNING: CPU: 3 PID: 2449 at drivers/gpu/drm/xe/xe_vm.c:933 xe_vma_destroy+0x280/0x290 [xe] [ 92.205002] Modules linked in: ccm nft_objref cmac nf_conntrack_netbios_ns nf_conntrack_broadcast nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat ip6table_nat ip6table_mangle ip6table_raw ip6table_security iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 iptable_mangle iptable_raw iptable_security ip_set nf_tables nfnetlink ip6table_filter iptable_filter bnep sunrpc vfat fat iwlmvm mac80211 intel_rapl_msr ee1004 ppdev intel_rapl_common snd_hda_codec_realtek libarc4 iTCO_wdt snd_hda_codec_generic intel_pmc_bxt x86_pkg_temp_thermal iTCO_vendor_support intel_powerclamp coretemp intel_cstate iwlwifi btusb btrtl btbcm snd_hda_intel btintel snd_intel_dspcfg eeepc_wmi snd_hda_codec asus_wmi bluetooth snd_hwdep snd_seq ledtrig_audio snd_hda_core snd_seq_device sparse_keymap cfg80211 snd_pcm intel_uncore joydev platform_profile mei_me wmi_bmof intel_wmi_thunderbolt snd_timer pcspkr ecdh_generic i2c_i801 snd [ 92.205060] ecc mei rfkill soundcore idma64 i2c_smbus parport_pc parport acpi_pad acpi_tad xe drm_ttm_helper ttm i2c_algo_bit drm_suballoc_helper kunit drm_buddy gpu_sched drm_display_helper drm_kms_helper drm crct10dif_pclmul crc32_pclmul crc32c_intel nvme nvme_core e1000e ghash_clmulni_intel drm_panel_orientation_quirks video wmi pinctrl_tigerlake usb_storage ip6_tables ip_tables fuse [ 92.205242] CPU: 3 PID: 2449 Comm: xe_vm Tainted: G U 6.1.0+ #120 [ 92.205254] Hardware name: ASUS System Product Name/PRIME B560M-A AC, BIOS 0403 01/26/2021 [ 92.205266] RIP: 0010:xe_vma_destroy+0x280/0x290 [xe] [ 92.205299] Code: 74 15 48 8b 93 a0 01 00 00 48 8b 83 a8 01 00 00 48 89 42 08 48 89 10 4c 89 ab a0 01 00 00 4c 89 ab a8 01 00 00 e9 1b fe ff ff <0f> 0b e9 a3 fe ff ff 0f 0b e9 82 fe ff ff 66 90 0f 1f 44 00 00 48 [ 92.205322] RSP: 0018:ffffaadd465c3a58 EFLAGS: 00010246 [ 92.205331] RAX: 0000000000000000 RBX: ffff9706d53ed400 RCX: 0000000000000001 [ 92.205341] RDX: ffff9706d53ed480 RSI: ffffffffa756dc2b RDI: ffffffffa760a05e [ 92.205351] RBP: 0000000000000000 R08: 0000000000000000 R09: 000000002c5370a2 [ 92.205361] R10: ffff9706ca520000 R11: 0000000022c5370a R12: ffff9706cad03800 [ 92.205370] R13: 000000000004ffff R14: fffffffffffffff2 R15: 0000000000000000 [ 92.205380] FS: 00007fe98203a940(0000) GS:ffff970dffac0000(0000) knlGS:0000000000000000 [ 92.205392] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 92.205400] CR2: 00007fe982ccb000 CR3: 000000010d6e6003 CR4: 0000000000770ee0 [ 92.205410] PKRU: 55555554 [ 92.205415] Call Trace: [ 92.205419] [ 92.205426] vm_bind_ioctl_lookup_vma+0x9bb/0xbf0 [xe] [ 92.205461] ? lock_is_held_type+0xe3/0x140 [ 92.205472] ? xe_vm_find_overlapping_vma+0x77/0x90 [xe] [ 92.205503] ? __vm_bind_ioctl_lookup_vma.constprop.0+0x9e/0xe0 [xe] [ 92.205533] ? __lock_acquire+0x3a3/0x1fb0 [ 92.205543] ? register_lock_class+0x38/0x480 [ 92.205550] ? __lock_acquire+0x3a3/0x1fb0 [ 92.205558] ? __lock_acquire+0x3a3/0x1fb0 [ 92.205567] ? __lock_acquire+0x3a3/0x1fb0 [ 92.205579] ? lock_acquire+0xbf/0x2b0 [ 92.205586] ? lock_acquire+0xcf/0x2b0 [ 92.205597] xe_vm_bind_ioctl+0x977/0x1c30 [xe] [ 92.205630] ? find_held_lock+0x2b/0x80 [ 92.205640] ? lock_release+0x131/0x2c0 [ 92.205648] ? xe_vm_ttm_bo+0x40/0x40 [xe] [ 92.205677] drm_ioctl_kernel+0xa1/0x150 [drm] [ 92.205706] drm_ioctl+0x221/0x420 [drm] [ 92.205727] ? xe_vm_ttm_bo+0x40/0x40 [xe] [ 92.205764] __x64_sys_ioctl+0x8d/0xd0 [ 92.205774] do_syscall_64+0x37/0x90 [ 92.205781] entry_SYSCALL_64_after_hwframe+0x63/0xcd [ 92.205790] RIP: 0033:0x7fe982be8d6f [ 92.205797] Code: 00 48 89 44 24 18 31 c0 48 8d 44 24 60 c7 04 24 10 00 00 00 48 89 44 24 08 48 8d 44 24 20 48 89 44 24 10 b8 10 00 00 00 0f 05 <89> c2 3d 00 f0 ff ff 77 18 48 8b 44 24 18 64 48 2b 04 25 28 00 00 [ 92.205821] RSP: 002b:00007ffde9f9c560 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [ 92.205832] RAX: ffffffffffffffda RBX: 00007fadeadbe000 RCX: 00007fe982be8d6f [ 92.205842] RDX: 00007ffde9f9c5f0 RSI: 0000000040786445 RDI: 0000000000000003 [ 92.205851] RBP: 00007ffde9f9c5f0 R08: 00007fadeadbe000 R09: 0000000000040000 [ 92.205861] R10: 0000000000000003 R11: 0000000000000246 R12: 0000000040786445 [ 92.205871] R13: 0000000000000003 R14: 0000000000000003 R15: 00007fe982e02000 [ 92.205888] [ 92.205892] irq event stamp: 82723 [ 92.205897] hardirqs last enabled at (82731): [] __up_console_sem+0x5e/0x70 [ 92.205910] hardirqs last disabled at (82738): [] __up_console_sem+0x43/0x70 [ 92.205922] softirqs last enabled at (82182): [] __irq_exit_rcu+0xed/0x160 [ 92.205935] softirqs last disabled at (82163): [] __irq_exit_rcu+0xed/0x160 [ 92.205947] ---[ end trace 0000000000000000 ]--- Reported-by: Francois Dugast Signed-off-by: Thomas Hellström Reviewed-by: Niranjana Vishwanathapura Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index a8254a4148f7..6cc3204adaa8 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2835,7 +2835,8 @@ static struct xe_vma *vm_bind_ioctl_lookup_vma(struct xe_vm *vm, err = xe_vma_userptr_pin_pages(vma); if (err) { - xe_vma_destroy(vma, NULL); + prep_vma_destroy(vm, vma); + xe_vma_destroy_unlocked(vma); return ERR_PTR(err); } else { -- cgit v1.2.3 From e84535d86043af8fc9edcbbeb00f2e47e8ccb130 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 6 Mar 2023 16:40:27 -0800 Subject: drm/xe/mcr: Add L3BANK steering for DG2 Some register ranges with replication type L3BANK were missing from the driver table. The following warning was triggering when adding a workaround touching the register 0xb188: xe 0000:03:00.0: Did not find MCR register 0xb188 in any MCR steering table Add the L3BANK ranges according to the spec. v2: - Fix typo in one of the ranges: s/0x00BCFF/0x008CFF/ (Matt Roper) - Add termination rule in the init function for L3BANK (Matt Roper) Bspec: 66534 Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_mcr.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index 10eff02cc7db..ab8fc649ba52 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -47,6 +47,12 @@ static const struct xe_mmio_range xelp_l3bank_steering_table[] = { {}, }; +static const struct xe_mmio_range xehp_l3bank_steering_table[] = { + { 0x008C80, 0x008CFF }, + { 0x00B100, 0x00B3FF }, + {}, +}; + /* * Although the bspec lists more "MSLICE" ranges than shown here, some of those * are of a "GAM" subclass that has special rules and doesn't need to be @@ -180,6 +186,18 @@ static void init_steering_l3bank(struct xe_gt *gt) gt->steering[L3BANK].group_target = __ffs(mslice_mask); gt->steering[L3BANK].instance_target = bank_mask & BIT(0) ? 0 : 2; + } else if (gt_to_xe(gt)->info.platform == XE_DG2) { + u32 mslice_mask = REG_FIELD_GET(GEN12_MEML3_EN_MASK, + xe_mmio_read32(gt, GEN10_MIRROR_FUSE3.reg)); + u32 bank = __ffs(mslice_mask) * 8; + + /* + * Like mslice registers, look for a valid mslice and steer to + * the first L3BANK of that quad. Access to the Nth L3 bank is + * split between the first bits of group and instance + */ + gt->steering[L3BANK].group_target = (bank >> 2) & 0x7; + gt->steering[L3BANK].instance_target = bank & 0x3; } else { u32 fuse = REG_FIELD_GET(GEN10_L3BANK_MASK, ~xe_mmio_read32(gt, GEN10_MIRROR_FUSE3.reg)); @@ -277,6 +295,7 @@ void xe_gt_mcr_init(struct xe_gt *gt) gt->steering[INSTANCE0].ranges = xehpc_instance0_steering_table; gt->steering[DSS].ranges = xehpc_dss_steering_table; } else if (xe->info.platform == XE_DG2) { + gt->steering[L3BANK].ranges = xehp_l3bank_steering_table; gt->steering[MSLICE].ranges = xehp_mslice_steering_table; gt->steering[LNCF].ranges = xehp_lncf_steering_table; gt->steering[DSS].ranges = xehp_dss_steering_table; -- cgit v1.2.3 From 6b980aa88d403db3e4cf5b58965dfa9a5f27c740 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 8 Mar 2023 18:18:57 -0800 Subject: drm/xe/mcr: Document how to initialize group/instance Add a sentence about the initialization so it's clear for newcomers how to tweak the init functions for new platforms. Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_mcr.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index ab8fc649ba52..909059112179 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -34,7 +34,10 @@ * fused off or currently powered down due to power gating, the MMIO operation * is "terminated" by the hardware. Terminated read operations will return a * value of zero and terminated unicast write operations will be silently - * ignored. + * ignored. During device initialization, the goal of the various + * ``init_steering_*()`` functions is to apply the platform-specific rules for + * each MCR register type to identify a steering target that will select a + * non-terminated instance. */ enum { -- cgit v1.2.3 From 8846ffb457587e5d393a83ce977c3db7c800fe58 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 3 Mar 2023 22:26:55 -0800 Subject: drm/xe: Allow const propagation in gt_to_xe() Replace the inline function with a _Generic() so gt_to_xe() can work with a const struct xe_gt*, which leads to a const struct xe *. This allows a const gt being passed around and when the xe device is needed, compiler won't issue a warning that calling gt_to_xe() would discard the const. Rather, just propagate the const to the xe pointer being returned. Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h index 5635f2803170..086369f7ee6d 100644 --- a/drivers/gpu/drm/xe/xe_gt.h +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -49,10 +49,10 @@ static inline bool xe_gt_is_media_type(struct xe_gt *gt) return gt->info.type == XE_GT_TYPE_MEDIA; } -static inline struct xe_device * gt_to_xe(struct xe_gt *gt) -{ - return gt->xe; -} +#define gt_to_xe(gt__) \ + _Generic(gt__, \ + const struct xe_gt *: (const struct xe_device *)((gt__)->xe), \ + struct xe_gt *: (gt__)->xe) static inline bool xe_gt_is_usm_hwe(struct xe_gt *gt, struct xe_hw_engine *hwe) { -- cgit v1.2.3 From 1415283befa0e47df1270d10356a074793664757 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 3 Mar 2023 22:30:04 -0800 Subject: drm/xe: Constify xe_dss_mask_group_ffs() Due to how xe_dss_mask_t is implemented, the type is a pointer. Since this is only used for looking up the bits, make it const so it can be used together with a const gt passed around. Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_topology.c | 2 +- drivers/gpu/drm/xe/xe_gt_topology.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c index f2cbee53462b..14cf135fd648 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.c +++ b/drivers/gpu/drm/xe/xe_gt_topology.c @@ -123,7 +123,7 @@ xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p) * groupsize and groupnum are non-zero. */ unsigned int -xe_dss_mask_group_ffs(xe_dss_mask_t mask, int groupsize, int groupnum) +xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum) { return find_next_bit(mask, XE_MAX_DSS_FUSE_BITS, groupnum * groupsize); } diff --git a/drivers/gpu/drm/xe/xe_gt_topology.h b/drivers/gpu/drm/xe/xe_gt_topology.h index f47ab1b1269c..5f35deed9128 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.h +++ b/drivers/gpu/drm/xe/xe_gt_topology.h @@ -15,7 +15,7 @@ void xe_gt_topology_init(struct xe_gt *gt); void xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p); unsigned int -xe_dss_mask_group_ffs(xe_dss_mask_t mask, int groupsize, int groupnum); +xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum); bool xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad); -- cgit v1.2.3 From 4c128558fe16b77013a251bcc3af8caa77fb7732 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 1 Mar 2023 01:31:06 -0800 Subject: drm/xe/rtp: Move match function from wa to rtp Match functions are generally useful for other parts of the code (e.g. xe_tuning.c). Move and rename the single one available to create a place where similar match functions can be added. Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_rtp.c | 6 ++++++ drivers/gpu/drm/xe/xe_rtp.h | 12 ++++++++++++ drivers/gpu/drm/xe/xe_wa.c | 8 +------- 3 files changed, 19 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index 5b1316b588d8..c04eca290ef0 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -154,3 +154,9 @@ void xe_rtp_process(const struct xe_rtp_entry *entries, struct xe_reg_sr *sr, } } } + +bool xe_rtp_match_even_instance(const struct xe_gt *gt, + const struct xe_hw_engine *hwe) +{ + return hwe->instance % 2 == 0; +} diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index bd44fd8bbe05..9bd2532442ed 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -397,4 +397,16 @@ struct xe_reg_sr; void xe_rtp_process(const struct xe_rtp_entry *entries, struct xe_reg_sr *sr, struct xe_gt *gt, struct xe_hw_engine *hwe); +/* Match functions to be used with XE_RTP_MATCH_FUNC */ + +/** + * xe_rtp_match_even_instance - Match if engine instance is even + * @gt: GT structure + * @hwe: Engine instance + * + * Returns: true if engine instance is even, false otherwise + */ +bool xe_rtp_match_even_instance(const struct xe_gt *gt, + const struct xe_hw_engine *hwe); + #endif diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 03c5b01a14e4..67539f9d70b4 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -91,17 +91,11 @@ #define _MMIO(x) _XE_RTP_REG(x) #define MCR_REG(x) _XE_RTP_MCR_REG(x) -static bool match_14011060649(const struct xe_gt *gt, - const struct xe_hw_engine *hwe) -{ - return hwe->instance % 2 == 0; -} - static const struct xe_rtp_entry gt_was[] = { { XE_RTP_NAME("14011060649"), XE_RTP_RULES(MEDIA_VERSION_RANGE(1200, 1255), ENGINE_CLASS(VIDEO_DECODE), - FUNC(match_14011060649)), + FUNC(xe_rtp_match_even_instance)), XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS)), XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), }, -- cgit v1.2.3 From 043790f3edb554f8db3e841fd17a33b622bc2b31 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 1 Mar 2023 01:31:07 -0800 Subject: drm/xe/rtp: Add match for render reset domain This allows to create WA/tuning rules that match the first engine that is either of compute or render class. This matters for platforms that don't have a render engine and that may have arbitrary compute engines fused off: some register programming need to be added to one of those engines. Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_hw_engine_types.h | 5 +++++ drivers/gpu/drm/xe/xe_rtp.c | 10 ++++++++++ drivers/gpu/drm/xe/xe_rtp.h | 18 ++++++++++++++++++ 3 files changed, 33 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h index 05a2fdc381d7..2c40384957da 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_types.h +++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h @@ -23,6 +23,7 @@ enum xe_engine_class { enum xe_hw_engine_id { XE_HW_ENGINE_RCS0, +#define XE_HW_ENGINE_RCS_MASK GENMASK_ULL(XE_HW_ENGINE_RCS0, XE_HW_ENGINE_RCS0) XE_HW_ENGINE_BCS0, XE_HW_ENGINE_BCS1, XE_HW_ENGINE_BCS2, @@ -32,6 +33,7 @@ enum xe_hw_engine_id { XE_HW_ENGINE_BCS6, XE_HW_ENGINE_BCS7, XE_HW_ENGINE_BCS8, +#define XE_HW_ENGINE_BCS_MASK GENMASK_ULL(XE_HW_ENGINE_BCS8, XE_HW_ENGINE_BCS0) XE_HW_ENGINE_VCS0, XE_HW_ENGINE_VCS1, XE_HW_ENGINE_VCS2, @@ -40,14 +42,17 @@ enum xe_hw_engine_id { XE_HW_ENGINE_VCS5, XE_HW_ENGINE_VCS6, XE_HW_ENGINE_VCS7, +#define XE_HW_ENGINE_VCS_MASK GENMASK_ULL(XE_HW_ENGINE_VCS7, XE_HW_ENGINE_VCS0) XE_HW_ENGINE_VECS0, XE_HW_ENGINE_VECS1, XE_HW_ENGINE_VECS2, XE_HW_ENGINE_VECS3, +#define XE_HW_ENGINE_VECS_MASK GENMASK_ULL(XE_HW_ENGINE_VECS3, XE_HW_ENGINE_VECS0) XE_HW_ENGINE_CCS0, XE_HW_ENGINE_CCS1, XE_HW_ENGINE_CCS2, XE_HW_ENGINE_CCS3, +#define XE_HW_ENGINE_CCS_MASK GENMASK_ULL(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0) XE_NUM_HW_ENGINES, }; diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index c04eca290ef0..0d2f51bb06e8 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -160,3 +160,13 @@ bool xe_rtp_match_even_instance(const struct xe_gt *gt, { return hwe->instance % 2 == 0; } + +bool xe_rtp_match_first_render_or_compute(const struct xe_gt *gt, + const struct xe_hw_engine *hwe) +{ + u64 render_compute_mask = gt->info.engine_mask & + (XE_HW_ENGINE_CCS_MASK | XE_HW_ENGINE_RCS_MASK); + + return render_compute_mask && + hwe->engine_id == __ffs(render_compute_mask); +} diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index 9bd2532442ed..433f0cbff57f 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -409,4 +409,22 @@ void xe_rtp_process(const struct xe_rtp_entry *entries, struct xe_reg_sr *sr, bool xe_rtp_match_even_instance(const struct xe_gt *gt, const struct xe_hw_engine *hwe); +/* + * xe_rtp_match_first_render_or_compute - Match if it's first render or compute + * engine in the GT + * + * @gt: GT structure + * @hwe: Engine instance + * + * Registers on the render reset domain need to have their values re-applied + * when any of those engines are reset. Since the engines reset together, a + * programming can be set to just one of them. For simplicity the first engine + * of either render or compute class can be chosen. + * + * Returns: true if engine id is the first to match the render reset domain, + * false otherwise. + */ +bool xe_rtp_match_first_render_or_compute(const struct xe_gt *gt, + const struct xe_hw_engine *hwe); + #endif -- cgit v1.2.3 From f647eff1725430dd835ac05a9f8f1661e2765f8e Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 1 Mar 2023 01:31:08 -0800 Subject: drm/xe: Remove dump function from reg_sr The dump function was originally added with the idea that it could be re-used both for printing the reg-sr data and saving it to pass to GuC via ADS. This was not used by the GuC integration, so remove it now to give place to a new debug. Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_reg_sr.c | 26 -------------------------- drivers/gpu/drm/xe/xe_reg_sr.h | 2 -- drivers/gpu/drm/xe/xe_reg_sr_types.h | 5 ----- 3 files changed, 33 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c index d67516469710..b2370241ebf9 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr.c +++ b/drivers/gpu/drm/xe/xe_reg_sr.c @@ -43,32 +43,6 @@ int xe_reg_sr_init(struct xe_reg_sr *sr, const char *name, struct xe_device *xe) return drmm_add_action_or_reset(&xe->drm, reg_sr_fini, sr); } -int xe_reg_sr_dump_kv(struct xe_reg_sr *sr, - struct xe_reg_sr_kv **dst) -{ - struct xe_reg_sr_kv *iter; - struct xe_reg_sr_entry *entry; - unsigned long idx; - - if (xa_empty(&sr->xa)) { - *dst = NULL; - return 0; - } - - *dst = kmalloc_array(sr->pool.used, sizeof(**dst), GFP_KERNEL); - if (!*dst) - return -ENOMEM; - - iter = *dst; - xa_for_each(&sr->xa, idx, entry) { - iter->k = idx; - iter->v = *entry; - iter++; - } - - return 0; -} - static struct xe_reg_sr_entry *alloc_entry(struct xe_reg_sr *sr) { if (sr->pool.used == sr->pool.allocated) { diff --git a/drivers/gpu/drm/xe/xe_reg_sr.h b/drivers/gpu/drm/xe/xe_reg_sr.h index c3a9db251e92..9f47230c8ddc 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr.h +++ b/drivers/gpu/drm/xe/xe_reg_sr.h @@ -16,8 +16,6 @@ struct xe_device; struct xe_gt; int xe_reg_sr_init(struct xe_reg_sr *sr, const char *name, struct xe_device *xe); -int xe_reg_sr_dump_kv(struct xe_reg_sr *sr, - struct xe_reg_sr_kv **dst); int xe_reg_sr_add(struct xe_reg_sr *sr, u32 reg, const struct xe_reg_sr_entry *e); diff --git a/drivers/gpu/drm/xe/xe_reg_sr_types.h b/drivers/gpu/drm/xe/xe_reg_sr_types.h index 0e6d542ff1b4..3d2257891005 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr_types.h +++ b/drivers/gpu/drm/xe/xe_reg_sr_types.h @@ -23,11 +23,6 @@ struct xe_reg_sr_entry { u8 reg_type; }; -struct xe_reg_sr_kv { - u32 k; - struct xe_reg_sr_entry v; -}; - struct xe_reg_sr { struct { struct xe_reg_sr_entry *arr; -- cgit v1.2.3 From 766849c4accad67f8affa37c580d44f48be193b6 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 1 Mar 2023 01:31:09 -0800 Subject: drm/xe: Name LRC wa after the engine it belongs This makes it easier when printing the register-save-restore values to know what is the engine. Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 343370b44506..daa433d0f2f5 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -318,7 +318,7 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt) if (gt->default_lrc[hwe->class]) continue; - xe_reg_sr_init(&hwe->reg_lrc, "LRC", xe); + xe_reg_sr_init(&hwe->reg_lrc, hwe->name, xe); xe_wa_process_lrc(hwe); xe_tuning_process_lrc(hwe); -- cgit v1.2.3 From 91ed180b419a1b2ccf9cc41999cb87eb9805fa38 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 1 Mar 2023 01:31:12 -0800 Subject: drm/xe/pvc: Remove A* steppings The PVC pre-production A* steppings are not going to be supported in xe driver - the steppings are important for the WAs and since we are not adding the pre-productions ones, there is no need to add the stepping. Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_step.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_step.c b/drivers/gpu/drm/xe/xe_step.c index 14f482f29ae4..ee927dfd3eb3 100644 --- a/drivers/gpu/drm/xe/xe_step.c +++ b/drivers/gpu/drm/xe/xe_step.c @@ -79,14 +79,12 @@ static const struct xe_step_info dg2_g12_revid_step_tbl[] = { }; static const struct xe_step_info pvc_revid_step_tbl[] = { - [0x3] = { .graphics = STEP_A0 }, [0x5] = { .graphics = STEP_B0 }, [0x6] = { .graphics = STEP_B1 }, [0x7] = { .graphics = STEP_C0 }, }; static const int pvc_basedie_subids[] = { - [0x0] = STEP_A0, [0x3] = STEP_B0, [0x4] = STEP_B1, [0x5] = STEP_B3, -- cgit v1.2.3 From 6d4f49b7dec3126c6d5491bcea5ae815b025d042 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 9 Mar 2023 14:17:46 +0200 Subject: drm/xe: make compound literal initialization const Be careful about having const in the compound literal initialization to keep the initializers in rodata. Here, the impact is 1.8k of mutable data moved to rodata. add/remove: 0/1 grow/shrink: 0/0 up/down: 0/-1804 (-1804) Data old new delta __compound_literal 1804 - -1804 Total: Before=42425, After=40621, chg -4.25% add/remove: 0/0 grow/shrink: 1/0 up/down: 1804/0 (1804) RO Data old new delta __compound_literal 7696 9500 +1804 Total: Before=138535, After=140339, chg +1.30% Signed-off-by: Jani Nikula Reviewed-by: Rodrigo Vivi Signed-off-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230309121746.479146-1-jani.nikula@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_rtp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index 433f0cbff57f..ac983ce93684 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -363,7 +363,7 @@ struct xe_reg_sr; */ #define XE_RTP_RULES(r1, ...) \ .n_rules = COUNT_ARGS(r1, ##__VA_ARGS__), \ - .rules = (struct xe_rtp_rule[]) { \ + .rules = (const struct xe_rtp_rule[]) { \ CALL_FOR_EACH(__ADD_XE_RTP_RULE_PREFIX, r1, ##__VA_ARGS__) \ } @@ -390,7 +390,7 @@ struct xe_reg_sr; */ #define XE_RTP_ACTIONS(a1, ...) \ .n_actions = COUNT_ARGS(a1, ##__VA_ARGS__), \ - .actions = (struct xe_rtp_action[]) { \ + .actions = (const struct xe_rtp_action[]) { \ CALL_FOR_EACH(__ADD_XE_RTP_ACTION_PREFIX, a1, ##__VA_ARGS__) \ } -- cgit v1.2.3 From 11823d48abce17d45e7e8c9bd525203f0096c6e8 Mon Sep 17 00:00:00 2001 From: Riana Tauro Date: Thu, 9 Mar 2023 18:48:56 +0530 Subject: drm/xe: Fix overflow in vram manager The overflow caused xe_bo_restore_kernel to return an error Fix overflow in vram manager alloc function. Signed-off-by: Riana Tauro Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ttm_vram_mgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c index 643365b18bc7..159ca7105df1 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c @@ -118,7 +118,7 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man, cur_size = size; - if (fpfn + size != place->lpfn << PAGE_SHIFT) { + if (fpfn + size != (u64)place->lpfn << PAGE_SHIFT) { /* * Except for actual range allocation, modify the size and * min_block_size conforming to continuous flag enablement -- cgit v1.2.3 From ddad061e8fbcba69bbdd9ee05b1749810c419920 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Tue, 14 Mar 2023 08:58:37 +0000 Subject: drm/xe: one more s/lmem/vram/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Looks to have been introduced in some very recent changes, in-between merging the driver wide s/lmem/vram/. Signed-off-by: Matthew Auld Cc: Gwan-gyeong Mun Cc: Lucas De Marchi Cc: Rodrigo Vivi Reviewed-by: Thomas Hellström Reviewed-by: Gwan-gyeong Mun Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index 1116e217ebc2..27cc31f022a5 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -90,9 +90,9 @@ static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) /* * There may be few KB of platform dependent reserved memory at the end - * of lmem which is not part of the DSM. Such reserved memory portion is + * of vram which is not part of the DSM. Such reserved memory portion is * always less then DSM granularity so align down the stolen_size to DSM - * granularity to accommodate such reserve lmem portion. + * granularity to accommodate such reserve vram portion. */ return ALIGN_DOWN(stolen_size, SZ_1M); } -- cgit v1.2.3 From 69db25e447b8a3b9153db8a9004c50b080d0497e Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Tue, 14 Mar 2023 08:58:38 +0000 Subject: drm/xe: add xe_ttm_stolen_cpu_access_needs_ggtt() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit xe_ttm_stolen_cpu_inaccessible() was originally meant to just cover the case where stolen is not directly CPU accessible on some older integrated platforms, and as such a GGTT mapping was also required for CPU access (as per the check in xe_bo_create_pin_map_at()). However with small-bar systems on dgfx we have one more case where stolen is also inaccessible, however here we don't have any fallback GGTT mode for CPU access. Fix the check in xe_bo_create_pin_map_at() to make this distinction clear. In such a case the later vmap() will fail anyway. v2: fix kernel-doc warning v3: Simplify further and remove cpu_inaccessible() Suggested-by: Maarten Lankhorst Signed-off-by: Matthew Auld Cc: Gwan-gyeong Mun Reviewed-by: Thomas Hellström Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 2 +- drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 39 +++++++++++----------------------- drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h | 2 +- 3 files changed, 14 insertions(+), 29 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index cfb79519b673..5e309b26f75c 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1160,7 +1160,7 @@ struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_gt *gt, u64 end = offset == ~0ull ? offset : start + size; if (flags & XE_BO_CREATE_STOLEN_BIT && - xe_ttm_stolen_cpu_inaccessible(xe)) + xe_ttm_stolen_cpu_access_needs_ggtt(xe)) flags |= XE_BO_CREATE_GGTT_BIT; bo = xe_bo_create_locked_range(xe, gt, vm, size, start, end, type, flags); diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index 27cc31f022a5..9629b1a677f2 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -38,32 +38,17 @@ to_stolen_mgr(struct ttm_resource_manager *man) } /** - * xe_ttm_stolen_cpu_inaccessible - Can we directly CPU access stolen memory for - * this device. + * xe_ttm_stolen_cpu_access_needs_ggtt() - If we can't directly CPU access + * stolen, can we then fallback to mapping through the GGTT. * @xe: xe device * - * On some integrated platforms we can't directly access stolen via the CPU - * (like some normal system memory). Also on small-bar systems for discrete, - * since stolen is always as the end of normal VRAM, and the BAR likely doesn't - * stretch that far. However CPU access of stolen is generally rare, and at - * least on discrete should not be needed. - * - * If this is indeed inaccessible then we fallback to using the GGTT mappable - * aperture for CPU access. On discrete platforms we have no such thing, so when - * later attempting to CPU map the memory an error is instead thrown. + * Some older integrated platforms don't support reliable CPU access for stolen, + * however on such hardware we can always use the mappable part of the GGTT for + * CPU access. Check if that's the case for this device. */ -bool xe_ttm_stolen_cpu_inaccessible(struct xe_device *xe) +bool xe_ttm_stolen_cpu_access_needs_ggtt(struct xe_device *xe) { - struct ttm_resource_manager *ttm_mgr = - ttm_manager_type(&xe->ttm, XE_PL_STOLEN); - struct xe_ttm_stolen_mgr *mgr; - - if (!ttm_mgr) - return true; - - mgr = to_stolen_mgr(ttm_mgr); - - return !mgr->io_base || GRAPHICS_VERx100(xe) < 1270; + return GRAPHICS_VERx100(xe) < 1270 && !IS_DGFX(xe); } static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) @@ -178,7 +163,7 @@ void xe_ttm_stolen_mgr_init(struct xe_device *xe) drm_dbg_kms(&xe->drm, "Initialized stolen memory support with %llu bytes\n", stolen_size); - if (!xe_ttm_stolen_cpu_inaccessible(xe)) + if (mgr->io_base && !xe_ttm_stolen_cpu_access_needs_ggtt(xe)) mgr->mapping = devm_ioremap_wc(&pdev->dev, mgr->io_base, stolen_size); } @@ -191,7 +176,7 @@ u64 xe_ttm_stolen_io_offset(struct xe_bo *bo, u32 offset) XE_BUG_ON(!mgr->io_base); - if (!IS_DGFX(xe) && xe_ttm_stolen_cpu_inaccessible(xe)) + if (xe_ttm_stolen_cpu_access_needs_ggtt(xe)) return mgr->io_base + xe_bo_ggtt_addr(bo) + offset; xe_res_first(bo->ttm.resource, offset, 4096, &cur); @@ -257,10 +242,10 @@ int xe_ttm_stolen_io_mem_reserve(struct xe_device *xe, struct ttm_resource *mem) if (!mgr || !mgr->io_base) return -EIO; - if (!xe_ttm_stolen_cpu_inaccessible(xe)) - return __xe_ttm_stolen_io_mem_reserve_bar2(xe, mgr, mem); - else + if (xe_ttm_stolen_cpu_access_needs_ggtt(xe)) return __xe_ttm_stolen_io_mem_reserve_stolen(xe, mgr, mem); + else + return __xe_ttm_stolen_io_mem_reserve_bar2(xe, mgr, mem); } u64 xe_ttm_stolen_gpu_offset(struct xe_device *xe) diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h index 2fda97b97a05..1777245ff810 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h @@ -14,7 +14,7 @@ struct xe_device; void xe_ttm_stolen_mgr_init(struct xe_device *xe); int xe_ttm_stolen_io_mem_reserve(struct xe_device *xe, struct ttm_resource *mem); -bool xe_ttm_stolen_cpu_inaccessible(struct xe_device *xe); +bool xe_ttm_stolen_cpu_access_needs_ggtt(struct xe_device *xe); u64 xe_ttm_stolen_io_offset(struct xe_bo *bo, u32 offset); u64 xe_ttm_stolen_gpu_offset(struct xe_device *xe); -- cgit v1.2.3 From 143800547b96dfc56d1f50a135c367fbfd40fd5d Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 13 Mar 2023 17:29:59 -0700 Subject: drm/xe/rtp: Add match helper for gslice fused off Add match helper to detect when the first gslice is fused off, as needed by future workarounds. v2: - Add warning if called on a platform without geometry pipeline (Matt Roper) - Hardcode 4 as the number of gslices, which matches all the currently supported platforms. PVC doesn't have geometry pipeline and shouldn't use this function (Matt Roper) Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230314003012.2600353-2-lucas.demarchi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_rtp.c | 16 ++++++++++++++++ drivers/gpu/drm/xe/xe_rtp.h | 11 +++++++++++ 2 files changed, 27 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index 0d2f51bb06e8..cb9dd894547d 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -8,6 +8,7 @@ #include #include "xe_gt.h" +#include "xe_gt_topology.h" #include "xe_macros.h" #include "xe_reg_sr.h" @@ -170,3 +171,18 @@ bool xe_rtp_match_first_render_or_compute(const struct xe_gt *gt, return render_compute_mask && hwe->engine_id == __ffs(render_compute_mask); } + +bool xe_rtp_match_first_gslice_fused_off(const struct xe_gt *gt, + const struct xe_hw_engine *hwe) +{ + unsigned int dss_per_gslice = 4; + unsigned int dss; + + if (drm_WARN(>_to_xe(gt)->drm, !gt->fuse_topo.g_dss_mask, + "Checking gslice for platform without geometry pipeline\n")) + return false; + + dss = xe_dss_mask_group_ffs(gt->fuse_topo.g_dss_mask, 0, 0); + + return dss >= dss_per_gslice; +} diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index ac983ce93684..a3be7c77753a 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -427,4 +427,15 @@ bool xe_rtp_match_even_instance(const struct xe_gt *gt, bool xe_rtp_match_first_render_or_compute(const struct xe_gt *gt, const struct xe_hw_engine *hwe); +/* + * xe_rtp_match_first_gslice_fused_off - Match when first gslice is fused off + * + * @gt: GT structure + * @hwe: Engine instance + * + * Returns: true if first gslice is fused off, false otherwise. + */ +bool xe_rtp_match_first_gslice_fused_off(const struct xe_gt *gt, + const struct xe_hw_engine *hwe); + #endif -- cgit v1.2.3 From 5be84050ddce298503e7290d375b6dcf3ce920d2 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 13 Mar 2023 17:30:00 -0700 Subject: drm/xe/reg_sr: Tweak verbosity for register printing If there is no register to save-restore or whitelist, just return. This drops some noise from the log, particurlarly for platforms with several engines like PVC: [drm:xe_reg_sr_apply_mmio [xe]] Applying bcs0 save-restore MMIOs [drm:xe_reg_sr_apply_whitelist [xe]] Whitelisting bcs0 registers [drm:xe_reg_sr_apply_mmio [xe]] Applying bcs1 save-restore MMIOs [drm:xe_reg_sr_apply_whitelist [xe]] Whitelisting bcs1 registers [drm:xe_reg_sr_apply_mmio [xe]] Applying bcs2 save-restore MMIOs [drm:xe_reg_sr_apply_whitelist [xe]] Whitelisting bcs2 registers [drm:xe_reg_sr_apply_mmio [xe]] Applying bcs5 save-restore MMIOs [drm:xe_reg_sr_apply_whitelist [xe]] Whitelisting bcs5 registers [drm:xe_reg_sr_apply_mmio [xe]] Applying bcs6 save-restore MMIOs [drm:xe_reg_sr_apply_whitelist [xe]] Whitelisting bcs6 registers [drm:xe_reg_sr_apply_mmio [xe]] Applying bcs7 save-restore MMIOs [drm:xe_reg_sr_apply_whitelist [xe]] Whitelisting bcs7 registers [drm:xe_reg_sr_apply_mmio [xe]] Applying bcs8 save-restore MMIOs [drm:xe_reg_sr_apply_whitelist [xe]] Whitelisting bcs8 registers [drm:xe_reg_sr_apply_mmio [xe]] Applying ccs0 save-restore MMIOs [drm:xe_reg_sr_apply_mmio [xe]] REG[0x20e4] = 0x00008000 [drm:xe_reg_sr_apply_mmio [xe]] REG[0xb01c] = 0x00000001 [drm:xe_reg_sr_apply_mmio [xe]] REG[0xe48c] = 0x00000800 [drm:xe_reg_sr_apply_mmio [xe]] REG[0xe7c8] = 0x40000000 ... On a PVC system it should show something like below. Whitelist calls are still there since they aren't actually empty - driver just doesn't print each individual entry. This will be fixed in future. [drm:xe_reg_sr_apply_whitelist [xe]] Whitelisting bcs0 registers [drm:xe_reg_sr_apply_whitelist [xe]] Whitelisting bcs1 registers [drm:xe_reg_sr_apply_whitelist [xe]] Whitelisting bcs2 registers [drm:xe_reg_sr_apply_whitelist [xe]] Whitelisting bcs5 registers [drm:xe_reg_sr_apply_whitelist [xe]] Whitelisting bcs6 registers [drm:xe_reg_sr_apply_whitelist [xe]] Whitelisting bcs7 registers [drm:xe_reg_sr_apply_whitelist [xe]] Whitelisting bcs8 registers [drm:xe_reg_sr_apply_mmio [xe]] Applying ccs0 save-restore MMIOs [drm:xe_reg_sr_apply_mmio [xe]] REG[0x20e4] = 0x00008000 [drm:xe_reg_sr_apply_mmio [xe]] REG[0xb01c] = 0x00000001 [drm:xe_reg_sr_apply_mmio [xe]] REG[0xe48c] = 0x00000800 [drm:xe_reg_sr_apply_mmio [xe]] REG[0xe7c8] = 0x40000000 v2: Only tweak log verbosity, leave the whitelist printout for later since decoding the whitelist is more complex. Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230314003012.2600353-3-lucas.demarchi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_reg_sr.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c index b2370241ebf9..3d041c9330df 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr.c +++ b/drivers/gpu/drm/xe/xe_reg_sr.c @@ -167,6 +167,9 @@ void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt) unsigned long reg; int err; + if (xa_empty(&sr->xa)) + return; + drm_dbg(&xe->drm, "Applying %s save-restore MMIOs\n", sr->name); err = xe_force_wake_get(>->mmio.fw, XE_FORCEWAKE_ALL); @@ -194,6 +197,9 @@ void xe_reg_sr_apply_whitelist(struct xe_reg_sr *sr, u32 mmio_base, unsigned int slot = 0; int err; + if (xa_empty(&sr->xa)) + return; + drm_dbg(&xe->drm, "Whitelisting %s registers\n", sr->name); err = xe_force_wake_get(>->mmio.fw, XE_FORCEWAKE_ALL); -- cgit v1.2.3 From d855d2246ea6b04cbda372846b21c040fb068575 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 13 Mar 2023 17:30:01 -0700 Subject: drm/xe: Print whitelist while applying Besides printing the various register save-restore, it's also useful to know the register being allowed/denied access from unprivileged batch buffers. Print them during device probe. Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230314003012.2600353-4-lucas.demarchi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_reg_sr.c | 4 ++++ drivers/gpu/drm/xe/xe_reg_whitelist.c | 41 +++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_reg_whitelist.h | 7 ++++++ 3 files changed, 52 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c index 3d041c9330df..ef12de48ab73 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr.c +++ b/drivers/gpu/drm/xe/xe_reg_sr.c @@ -20,6 +20,7 @@ #include "xe_gt_mcr.h" #include "xe_macros.h" #include "xe_mmio.h" +#include "xe_reg_whitelist.h" #include "xe_rtp_types.h" #define XE_REG_SR_GROW_STEP_DEFAULT 16 @@ -193,6 +194,7 @@ void xe_reg_sr_apply_whitelist(struct xe_reg_sr *sr, u32 mmio_base, { struct xe_device *xe = gt_to_xe(gt); struct xe_reg_sr_entry *entry; + struct drm_printer p; unsigned long reg; unsigned int slot = 0; int err; @@ -206,7 +208,9 @@ void xe_reg_sr_apply_whitelist(struct xe_reg_sr *sr, u32 mmio_base, if (err) goto err_force_wake; + p = drm_debug_printer(KBUILD_MODNAME); xa_for_each(&sr->xa, reg, entry) { + xe_reg_whitelist_print_entry(&p, 0, reg, entry); xe_mmio_write32(gt, RING_FORCE_TO_NONPRIV(mmio_base, slot).reg, reg | entry->set_bits); slot++; diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c index 2dd10e62718f..6c8577e8dba6 100644 --- a/drivers/gpu/drm/xe/xe_reg_whitelist.c +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c @@ -67,3 +67,44 @@ void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe) { xe_rtp_process(register_whitelist, &hwe->reg_whitelist, hwe->gt, hwe); } + +/** + * xe_reg_whitelist_print_entry - print one whitelist entry + * @p: DRM printer + * @indent: indent level + * @reg: register allowed/denied + * @entry: save-restore entry + * + * Print details about the entry added to allow/deny access + */ +void xe_reg_whitelist_print_entry(struct drm_printer *p, unsigned int indent, + u32 reg, struct xe_reg_sr_entry *entry) +{ + u32 val = entry->set_bits; + const char *access_str = "(invalid)"; + unsigned range_bit = 2; + u32 range_start, range_end; + bool deny; + + deny = val & RING_FORCE_TO_NONPRIV_DENY; + + switch (val & RING_FORCE_TO_NONPRIV_RANGE_MASK) { + case RING_FORCE_TO_NONPRIV_RANGE_4: range_bit = 4; break; + case RING_FORCE_TO_NONPRIV_RANGE_16: range_bit = 6; break; + case RING_FORCE_TO_NONPRIV_RANGE_64: range_bit = 8; break; + } + + range_start = reg & REG_GENMASK(25, range_bit); + range_end = range_start | REG_GENMASK(range_bit, 0); + + switch (val & RING_FORCE_TO_NONPRIV_ACCESS_MASK) { + case RING_FORCE_TO_NONPRIV_ACCESS_RW: access_str = "rw"; break; + case RING_FORCE_TO_NONPRIV_ACCESS_RD: access_str = "read"; break; + case RING_FORCE_TO_NONPRIV_ACCESS_WR: access_str = "write"; break; + } + + drm_printf_indent(p, indent, "REG[0x%x-0x%x]: %s %s access\n", + range_start, range_end, + deny ? "deny" : "allow", + access_str); +} diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.h b/drivers/gpu/drm/xe/xe_reg_whitelist.h index 6e861b1bdb01..c76d81c528da 100644 --- a/drivers/gpu/drm/xe/xe_reg_whitelist.h +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.h @@ -6,8 +6,15 @@ #ifndef _XE_REG_WHITELIST_ #define _XE_REG_WHITELIST_ +#include + +struct drm_printer; struct xe_hw_engine; +struct xe_reg_sr_entry; void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe); +void xe_reg_whitelist_print_entry(struct drm_printer *p, unsigned int indent, + u32 reg, struct xe_reg_sr_entry *entry); + #endif -- cgit v1.2.3 From 6647e2fe23f595dc46780b7cc26be872ca168643 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 13 Mar 2023 17:30:02 -0700 Subject: drm/xe/debugfs: Dump register save-restore tables Add debugfs entry to dump the final tables with register save-restore information. For the workarounds, this has a format a little bit different than when the values are applied because we don't want to read the values from the HW when dumping via debugfs. For whitelist it just re-uses the print function added for when the whitelist is being built. Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230314003012.2600353-5-lucas.demarchi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_debugfs.c | 30 ++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_reg_sr.c | 21 +++++++++++++++++++++ drivers/gpu/drm/xe/xe_reg_sr.h | 2 ++ drivers/gpu/drm/xe/xe_reg_whitelist.c | 18 ++++++++++++++++++ drivers/gpu/drm/xe/xe_reg_whitelist.h | 3 +++ 5 files changed, 74 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index 78942e12e76c..9fab8017490f 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -16,6 +16,8 @@ #include "xe_gt_topology.h" #include "xe_hw_engine.h" #include "xe_macros.h" +#include "xe_reg_sr.h" +#include "xe_reg_whitelist.h" #include "xe_uc_debugfs.h" static struct xe_gt *node_to_gt(struct drm_info_node *node) @@ -98,6 +100,33 @@ static int ggtt(struct seq_file *m, void *data) return xe_ggtt_dump(gt->mem.ggtt, &p); } +static int register_save_restore(struct seq_file *m, void *data) +{ + struct xe_gt *gt = node_to_gt(m->private); + struct drm_printer p = drm_seq_file_printer(m); + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + + xe_reg_sr_dump(>->reg_sr, &p); + drm_printf(&p, "\n"); + + drm_printf(&p, "Engine\n"); + for_each_hw_engine(hwe, gt, id) + xe_reg_sr_dump(&hwe->reg_sr, &p); + drm_printf(&p, "\n"); + + drm_printf(&p, "LRC\n"); + for_each_hw_engine(hwe, gt, id) + xe_reg_sr_dump(&hwe->reg_lrc, &p); + drm_printf(&p, "\n"); + + drm_printf(&p, "Whitelist\n"); + for_each_hw_engine(hwe, gt, id) + xe_reg_whitelist_dump(&hwe->reg_whitelist, &p); + + return 0; +} + static const struct drm_info_list debugfs_list[] = { {"hw_engines", hw_engines, 0}, {"force_reset", force_reset, 0}, @@ -105,6 +134,7 @@ static const struct drm_info_list debugfs_list[] = { {"topology", topology, 0}, {"steering", steering, 0}, {"ggtt", ggtt, 0}, + {"register-save-restore", register_save_restore, 0}, }; void xe_gt_debugfs_register(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c index ef12de48ab73..37ae8412cb00 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr.c +++ b/drivers/gpu/drm/xe/xe_reg_sr.c @@ -229,3 +229,24 @@ void xe_reg_sr_apply_whitelist(struct xe_reg_sr *sr, u32 mmio_base, err_force_wake: drm_err(&xe->drm, "Failed to apply, err=%d\n", err); } + +/** + * xe_reg_sr_dump - print all save/restore entries + * @sr: Save/restore entries + * @p: DRM printer + */ +void xe_reg_sr_dump(struct xe_reg_sr *sr, struct drm_printer *p) +{ + struct xe_reg_sr_entry *entry; + unsigned long reg; + + if (!sr->name || xa_empty(&sr->xa)) + return; + + drm_printf(p, "%s\n", sr->name); + xa_for_each(&sr->xa, reg, entry) + drm_printf(p, "\tREG[0x%lx] clr=0x%08x set=0x%08x masked=%s mcr=%s\n", + reg, entry->clr_bits, entry->set_bits, + str_yes_no(entry->masked_reg), + str_yes_no(entry->reg_type == XE_RTP_REG_MCR)); +} diff --git a/drivers/gpu/drm/xe/xe_reg_sr.h b/drivers/gpu/drm/xe/xe_reg_sr.h index 9f47230c8ddc..3af369089faa 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr.h +++ b/drivers/gpu/drm/xe/xe_reg_sr.h @@ -14,8 +14,10 @@ struct xe_device; struct xe_gt; +struct drm_printer; int xe_reg_sr_init(struct xe_reg_sr *sr, const char *name, struct xe_device *xe); +void xe_reg_sr_dump(struct xe_reg_sr *sr, struct drm_printer *p); int xe_reg_sr_add(struct xe_reg_sr *sr, u32 reg, const struct xe_reg_sr_entry *e); diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c index 6c8577e8dba6..c4b3a2045299 100644 --- a/drivers/gpu/drm/xe/xe_reg_whitelist.c +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c @@ -108,3 +108,21 @@ void xe_reg_whitelist_print_entry(struct drm_printer *p, unsigned int indent, deny ? "deny" : "allow", access_str); } + +/** + * xe_reg_whitelist_dump - print all whitelist entries + * @sr: Save/restore entries + * @p: DRM printer + */ +void xe_reg_whitelist_dump(struct xe_reg_sr *sr, struct drm_printer *p) +{ + struct xe_reg_sr_entry *entry; + unsigned long reg; + + if (!sr->name || xa_empty(&sr->xa)) + return; + + drm_printf(p, "%s\n", sr->name); + xa_for_each(&sr->xa, reg, entry) + xe_reg_whitelist_print_entry(p, 1, reg, entry); +} diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.h b/drivers/gpu/drm/xe/xe_reg_whitelist.h index c76d81c528da..69b121d377da 100644 --- a/drivers/gpu/drm/xe/xe_reg_whitelist.h +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.h @@ -10,6 +10,7 @@ struct drm_printer; struct xe_hw_engine; +struct xe_reg_sr; struct xe_reg_sr_entry; void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe); @@ -17,4 +18,6 @@ void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe); void xe_reg_whitelist_print_entry(struct drm_printer *p, unsigned int indent, u32 reg, struct xe_reg_sr_entry *entry); +void xe_reg_whitelist_dump(struct xe_reg_sr *sr, struct drm_printer *p); + #endif -- cgit v1.2.3 From 6b5ccd6360e29e67a760f82d0b28cf7c058732f7 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 13 Mar 2023 17:30:03 -0700 Subject: drm/xe: Reorder WAs to consider the platform Now that number of platforms is growing, it's getting hard to know the workarounds for each platform. Split the entries inside the same table so the workarounds checking IP version are listed first, followed by each platform. Next step when it grows too much is to split in smaller tables. Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230314003012.2600353-6-lucas.demarchi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_wa.c | 93 +++++++++++++++++++++++++++------------------- 1 file changed, 54 insertions(+), 39 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 67539f9d70b4..155cabe16e2e 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -99,6 +99,24 @@ static const struct xe_rtp_entry gt_was[] = { XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS)), XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), }, + { XE_RTP_NAME("14011059788"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)), + XE_RTP_ACTIONS(SET(GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE)) + }, + + /* DG1 */ + + { XE_RTP_NAME("1409420604"), + XE_RTP_RULES(PLATFORM(DG1)), + XE_RTP_ACTIONS(SET(SUBSLICE_UNIT_LEVEL_CLKGATE2, CPSSUNIT_CLKGATE_DIS)) + }, + { XE_RTP_NAME("1408615072"), + XE_RTP_RULES(PLATFORM(DG1)), + XE_RTP_ACTIONS(SET(UNSLICE_UNIT_LEVEL_CLKGATE2, VSUNIT_CLKGATE_DIS_TGL)) + }, + + /* DG2 */ + { XE_RTP_NAME("16010515920"), XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0), @@ -162,47 +180,15 @@ static const struct xe_rtp_entry gt_was[] = { XE_RTP_RULES(PLATFORM(DG2)), XE_RTP_ACTIONS(CLR(GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE)) }, - { XE_RTP_NAME("14011059788"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)), - XE_RTP_ACTIONS(SET(GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE)) - }, - { XE_RTP_NAME("1409420604"), - XE_RTP_RULES(PLATFORM(DG1)), - XE_RTP_ACTIONS(SET(SUBSLICE_UNIT_LEVEL_CLKGATE2, CPSSUNIT_CLKGATE_DIS)) - }, - { XE_RTP_NAME("1408615072"), - XE_RTP_RULES(PLATFORM(DG1)), - XE_RTP_ACTIONS(SET(UNSLICE_UNIT_LEVEL_CLKGATE2, VSUNIT_CLKGATE_DIS_TGL)) - }, {} }; static const struct xe_rtp_entry engine_was[] = { - { XE_RTP_NAME("14015227452"), - XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE, - XE_RTP_ACTION_FLAG(MASKED_REG))) - }, - { XE_RTP_NAME("1606931601"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(GEN8_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ, - XE_RTP_ACTION_FLAG(MASKED_REG))) - }, { XE_RTP_NAME("22010931296, 18011464164, 14010919138"), XE_RTP_RULES(GRAPHICS_VERSION(1200), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(GEN7_FF_THREAD_MODE, GEN12_FF_TESSELATION_DOP_GATE_DISABLE)) }, - { XE_RTP_NAME("14010826681, 1606700617, 22010271021"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(GEN9_CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE, - XE_RTP_ACTION_FLAG(MASKED_REG))) - }, - { XE_RTP_NAME("18019627453"), - XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(GEN9_CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE, - XE_RTP_ACTION_FLAG(MASKED_REG))) - }, { XE_RTP_NAME("1409804808"), XE_RTP_RULES(GRAPHICS_VERSION(1200), ENGINE_CLASS(RENDER), @@ -217,6 +203,30 @@ static const struct xe_rtp_entry engine_was[] = { XE_RTP_ACTIONS(SET(GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH, XE_RTP_ACTION_FLAG(MASKED_REG))) }, + { XE_RTP_NAME("1606931601"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(GEN8_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ, + XE_RTP_ACTION_FLAG(MASKED_REG))) + }, + { XE_RTP_NAME("14010826681, 1606700617, 22010271021"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(GEN9_CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE, + XE_RTP_ACTION_FLAG(MASKED_REG))) + }, + { XE_RTP_NAME("1406941453"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(GEN10_SAMPLER_MODE, ENABLE_SMALLPL, + XE_RTP_ACTION_FLAG(MASKED_REG))) + }, + { XE_RTP_NAME("FtrPerCtxtPreemptionGranularityControl"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1250), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(GEN7_FF_SLICE_CS_CHICKEN1, + GEN9_FFSC_PERCTX_PREEMPT_CTRL, + XE_RTP_ACTION_FLAG(MASKED_REG))) + }, + + /* TGL */ + { XE_RTP_NAME("1607297627, 1607030317, 1607186500"), XE_RTP_RULES(PLATFORM(TIGERLAKE), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(RING_PSMI_CTL(RENDER_RING_BASE), @@ -224,6 +234,9 @@ static const struct xe_rtp_entry engine_was[] = { GEN8_RC_SEMA_IDLE_MSG_DISABLE, XE_RTP_ACTION_FLAG(MASKED_REG))) }, + + /* RKL */ + { XE_RTP_NAME("1607297627, 1607030317, 1607186500"), XE_RTP_RULES(PLATFORM(ROCKETLAKE), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(RING_PSMI_CTL(RENDER_RING_BASE), @@ -231,15 +244,17 @@ static const struct xe_rtp_entry engine_was[] = { GEN8_RC_SEMA_IDLE_MSG_DISABLE, XE_RTP_ACTION_FLAG(MASKED_REG))) }, - { XE_RTP_NAME("1406941453"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(GEN10_SAMPLER_MODE, ENABLE_SMALLPL, + + /* DG2 */ + + { XE_RTP_NAME("14015227452"), + XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE, XE_RTP_ACTION_FLAG(MASKED_REG))) }, - { XE_RTP_NAME("FtrPerCtxtPreemptionGranularityControl"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1250), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(GEN7_FF_SLICE_CS_CHICKEN1, - GEN9_FFSC_PERCTX_PREEMPT_CTRL, + { XE_RTP_NAME("18019627453"), + XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(GEN9_CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE, XE_RTP_ACTION_FLAG(MASKED_REG))) }, {} -- cgit v1.2.3 From a19220fa5f1a740d98654ee1d6cf11a8e0158018 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 13 Mar 2023 17:30:04 -0700 Subject: drm/xe: Add PVC gt workarounds Synchronize with i915 the PVC gt workarounds as of committ commit 4d14d7717f19 ("drm/i915/selftest: Fix ktime_get() and h/w access order"). v2: Add masked flag to XEHPC_LNCFMISCCFGREG0 (Matt Roper) Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230314003012.2600353-7-lucas.demarchi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 9 +++++++++ drivers/gpu/drm/xe/xe_wa.c | 19 +++++++++++++++++++ 2 files changed, 28 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index d3b862e4cd0d..411cdbae1894 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -216,6 +216,15 @@ #define FORCEWAKE_MEDIA_VDBOX_GEN11(n) _MMIO(0xa540 + (n) * 4) #define FORCEWAKE_MEDIA_VEBOX_GEN11(n) _MMIO(0xa560 + (n) * 4) +#define XEHPC_LNCFMISCCFGREG0 MCR_REG(0xb01c) +#define XEHPC_OVRLSCCC REG_BIT(0) + +#define RENDER_MOD_CTRL MCR_REG(0xcf2c) +#define COMP_MOD_CTRL MCR_REG(0xcf30) +#define XEHP_VDBX_MOD_CTRL MCR_REG(0xcf34) +#define XEHP_VEBX_MOD_CTRL MCR_REG(0xcf38) +#define FORCE_MISS_FTLB REG_BIT(3) + #define GEN10_SAMPLER_MODE MCR_REG(0xe18c) #define ENABLE_SMALLPL REG_BIT(15) #define SC_DISABLE_POWER_OPTIMIZATION_EBB REG_BIT(9) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 155cabe16e2e..e8d523033b87 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -180,6 +180,25 @@ static const struct xe_rtp_entry gt_was[] = { XE_RTP_RULES(PLATFORM(DG2)), XE_RTP_ACTIONS(CLR(GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE)) }, + + /* PVC */ + + { XE_RTP_NAME("14015795083"), + XE_RTP_RULES(PLATFORM(PVC)), + XE_RTP_ACTIONS(CLR(GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE)) + }, + { XE_RTP_NAME("18018781329"), + XE_RTP_RULES(PLATFORM(PVC)), + XE_RTP_ACTIONS(SET(RENDER_MOD_CTRL, FORCE_MISS_FTLB), + SET(COMP_MOD_CTRL, FORCE_MISS_FTLB), + SET(XEHP_VDBX_MOD_CTRL, FORCE_MISS_FTLB), + SET(XEHP_VEBX_MOD_CTRL, FORCE_MISS_FTLB)) + }, + { XE_RTP_NAME("16016694945"), + XE_RTP_RULES(PLATFORM(PVC)), + XE_RTP_ACTIONS(SET(XEHPC_LNCFMISCCFGREG0, XEHPC_OVRLSCCC, + XE_RTP_ACTION_FLAG(MASKED_REG))) + }, {} }; -- cgit v1.2.3 From 4688d9ce2e3d0ad59147970295018cec4c67afa5 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 13 Mar 2023 17:30:05 -0700 Subject: drm/xe: Add PVC engine workarounds Sync PVC engine workarounds with i915. v2: Remove 16016694945. It was added by mistake. It's a GT workaround, already present in the GT table (Matt Roper) Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230314003012.2600353-8-lucas.demarchi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 9 +++++++++ drivers/gpu/drm/xe/xe_wa.c | 22 ++++++++++++++++++++++ 2 files changed, 31 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 411cdbae1894..e55c2f83b353 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -43,6 +43,9 @@ #define GEN7_FF_SLICE_CS_CHICKEN1 _MMIO(0x20e0) #define GEN9_FFSC_PERCTX_PREEMPT_CTRL (1 << 14) +#define FF_SLICE_CS_CHICKEN2 _MMIO(0x20e4) +#define PERF_FIX_BALANCING_CFE_DISABLE REG_BIT(15) + #define GEN9_CS_DEBUG_MODE1 _MMIO(0x20ec) #define FF_DOP_CLOCK_GATE_DISABLE REG_BIT(1) @@ -230,6 +233,9 @@ #define SC_DISABLE_POWER_OPTIMIZATION_EBB REG_BIT(9) #define GEN11_SAMPLER_ENABLE_HEADLESS_MSG REG_BIT(5) +#define CACHE_MODE_SS MCR_REG(0xe420) +#define DISABLE_ECC REG_BIT(5) + #define GEN9_ROW_CHICKEN4 MCR_REG(0xe48c) #define GEN12_DISABLE_GRF_CLEAR REG_BIT(13) #define XEHP_DIS_BBL_SYSPIPE REG_BIT(11) @@ -246,6 +252,9 @@ #define GEN12_PUSH_CONST_DEREF_HOLD_DIS REG_BIT(8) #define GEN12_DISABLE_DOP_GATING REG_BIT(0) +#define LSC_CHICKEN_BIT_0 MCR_REG(0xe7c8) +#define DISABLE_D8_D16_COASLESCE REG_BIT(30) + #define SARB_CHICKEN1 MCR_REG(0xe90c) #define COMP_CKN_IN REG_GENMASK(30, 29) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index e8d523033b87..4fe01168f45f 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -276,6 +276,28 @@ static const struct xe_rtp_entry engine_was[] = { XE_RTP_ACTIONS(SET(GEN9_CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE, XE_RTP_ACTION_FLAG(MASKED_REG))) }, + + /* PVC */ + + { XE_RTP_NAME("22014226127"), + XE_RTP_RULES(PLATFORM(PVC), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE)) + }, + { XE_RTP_NAME("14015227452"), + XE_RTP_RULES(PLATFORM(PVC), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE, + XE_RTP_ACTION_FLAG(MASKED_REG))) + }, + { XE_RTP_NAME("16015675438"), + XE_RTP_RULES(PLATFORM(PVC), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN2, PERF_FIX_BALANCING_CFE_DISABLE, + XE_RTP_ACTION_FLAG(MASKED_REG))) + }, + { XE_RTP_NAME("14014999345"), + XE_RTP_RULES(PLATFORM(PVC), ENGINE_CLASS(COMPUTE), STEP(B0, C0)), + XE_RTP_ACTIONS(SET(CACHE_MODE_SS, DISABLE_ECC, + XE_RTP_ACTION_FLAG(MASKED_REG))) + }, {} }; -- cgit v1.2.3 From 911aeb0f61b8cb9b903105d2e585e80baadb513b Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 13 Mar 2023 17:30:06 -0700 Subject: drm/xe: Add missing DG2 gt workarounds and tunings Synchronize with i915 the DG2 gt workarounds as of commit 4d14d7717f19 ("drm/i915/selftest: Fix ktime_get() and h/w access order"). Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230314003012.2600353-9-lucas.demarchi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 17 ++++++++++++++++- drivers/gpu/drm/xe/xe_tuning.c | 4 ++++ drivers/gpu/drm/xe/xe_wa.c | 34 +++++++++++++++++++++++++++++++++- 3 files changed, 53 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index e55c2f83b353..f1a9d065120e 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -168,7 +168,6 @@ #define GAMTLBVEBOX0_CLKGATE_DIS REG_BIT(16) #define LTCDD_CLKGATE_DIS REG_BIT(10) -#define GEN11_SLICE_UNIT_LEVEL_CLKGATE _MMIO(0x94d4) #define XEHP_SLICE_UNIT_LEVEL_CLKGATE MCR_REG(0x94d4) #define SARBUNIT_CLKGATE_DIS (1 << 5) #define RCCUNIT_CLKGATE_DIS (1 << 7) @@ -222,12 +221,28 @@ #define XEHPC_LNCFMISCCFGREG0 MCR_REG(0xb01c) #define XEHPC_OVRLSCCC REG_BIT(0) +#define XEHP_L3NODEARBCFG MCR_REG(0xb0b4) +#define XEHP_LNESPARE REG_BIT(19) + +#define XEHP_L3SCQREG7 MCR_REG(0xb188) +#define BLEND_FILL_CACHING_OPT_DIS REG_BIT(3) + +#define XEHP_MERT_MOD_CTRL MCR_REG(0xcf28) #define RENDER_MOD_CTRL MCR_REG(0xcf2c) #define COMP_MOD_CTRL MCR_REG(0xcf30) #define XEHP_VDBX_MOD_CTRL MCR_REG(0xcf34) #define XEHP_VEBX_MOD_CTRL MCR_REG(0xcf38) #define FORCE_MISS_FTLB REG_BIT(3) +#define XEHP_GAMSTLB_CTRL MCR_REG(0xcf4c) +#define CONTROL_BLOCK_CLKGATE_DIS REG_BIT(12) +#define EGRESS_BLOCK_CLKGATE_DIS REG_BIT(11) +#define TAG_BLOCK_CLKGATE_DIS REG_BIT(7) + +#define XEHP_GAMCNTRL_CTRL MCR_REG(0xcf54) +#define INVALIDATION_BROADCAST_MODE_DIS REG_BIT(12) +#define GLOBAL_INVALIDATION_MODE REG_BIT(2) + #define GEN10_SAMPLER_MODE MCR_REG(0xe18c) #define ENABLE_SMALLPL REG_BIT(15) #define SC_DISABLE_POWER_OPTIMIZATION_EBB REG_BIT(9) diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index 2861a014c85c..47b27dccb385 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -16,6 +16,10 @@ #define MCR_REG(x) _XE_RTP_MCR_REG(x) static const struct xe_rtp_entry gt_tunings[] = { + { XE_RTP_NAME("Tuning: Blend Fill Caching Optimization Disable"), + XE_RTP_RULES(PLATFORM(DG2)), + XE_RTP_ACTIONS(SET(XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS)) + }, { XE_RTP_NAME("Tuning: 32B Access Enable"), XE_RTP_RULES(PLATFORM(DG2)), XE_RTP_ACTIONS(SET(XEHP_SQCM, EN_32B_ACCESS)) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 4fe01168f45f..13c8dbf49cba 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -132,6 +132,14 @@ static const struct xe_rtp_entry gt_was[] = { XE_RTP_RULES(SUBPLATFORM(DG2, G10)), XE_RTP_ACTIONS(SET(GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE, DSS_ROUTER_CLKGATE_DIS)) }, + { XE_RTP_NAME("14012362059"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), + XE_RTP_ACTIONS(SET(XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB)) + }, + { XE_RTP_NAME("14012362059"), + XE_RTP_RULES(SUBPLATFORM(DG2, G11), STEP(A0, B0)), + XE_RTP_ACTIONS(SET(XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB)) + }, { XE_RTP_NAME("14010948348"), XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), XE_RTP_ACTIONS(SET(UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS)) @@ -142,7 +150,7 @@ static const struct xe_rtp_entry gt_was[] = { }, { XE_RTP_NAME("14011371254"), XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), - XE_RTP_ACTIONS(SET(GEN11_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS)) + XE_RTP_ACTIONS(SET(XEHP_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS)) }, { XE_RTP_NAME("14011431319"), XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), @@ -172,6 +180,13 @@ static const struct xe_rtp_entry gt_was[] = { XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), XE_RTP_ACTIONS(SET(SSMCGCTL9530, RTFUNIT_CLKGATE_DIS)) }, + { XE_RTP_NAME("14010680813"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), + XE_RTP_ACTIONS(SET(XEHP_GAMSTLB_CTRL, + CONTROL_BLOCK_CLKGATE_DIS | + EGRESS_BLOCK_CLKGATE_DIS | + TAG_BLOCK_CLKGATE_DIS)) + }, { XE_RTP_NAME("14014830051"), XE_RTP_RULES(PLATFORM(DG2)), XE_RTP_ACTIONS(CLR(SARB_CHICKEN1, COMP_CKN_IN)) @@ -180,6 +195,23 @@ static const struct xe_rtp_entry gt_was[] = { XE_RTP_RULES(PLATFORM(DG2)), XE_RTP_ACTIONS(CLR(GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE)) }, + { XE_RTP_NAME("18018781329"), + XE_RTP_RULES(PLATFORM(DG2)), + XE_RTP_ACTIONS(SET(RENDER_MOD_CTRL, FORCE_MISS_FTLB), + SET(COMP_MOD_CTRL, FORCE_MISS_FTLB), + SET(XEHP_VDBX_MOD_CTRL, FORCE_MISS_FTLB), + SET(XEHP_VEBX_MOD_CTRL, FORCE_MISS_FTLB)) + }, + { XE_RTP_NAME("1509235366"), + XE_RTP_RULES(PLATFORM(DG2)), + XE_RTP_ACTIONS(SET(XEHP_GAMCNTRL_CTRL, + INVALIDATION_BROADCAST_MODE_DIS | + GLOBAL_INVALIDATION_MODE)) + }, + { XE_RTP_NAME("14010648519"), + XE_RTP_RULES(PLATFORM(DG2)), + XE_RTP_ACTIONS(SET(XEHP_L3NODEARBCFG, XEHP_LNESPARE)) + }, /* PVC */ -- cgit v1.2.3 From 4d5ab1216385941fa9336b13cb27c259b149ab43 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 13 Mar 2023 17:30:07 -0700 Subject: drm/xe: Add missing DG2 engine workarounds Synchronize with i915 the DG2 gt workarounds as of commit 4d14d7717f19 ("drm/i915/selftest: Fix ktime_get() and h/w access order"). A few simplifications were done when the WA should be applied to some steps of a subplatform and all the steppings of the other subplatforms. This happened with Wa_1509727124, Wa_22012856258 and a few others. In figure the pre-production steppings will be removed, so this can be already simplified a little bit. v2: - Make 1308578152 conditional on first gslice fused off - Add the missing Wa_1608949956/Wa_14010198302 (Matt Roper) v3: - Do not duplicate the implementation of 18019627453 since it's already covered by other WA numbers in graphics versions 1200 and 1210 Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230314003012.2600353-10-lucas.demarchi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 27 +++++ drivers/gpu/drm/xe/xe_wa.c | 191 ++++++++++++++++++++++++++++++++++- 2 files changed, 213 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index f1a9d065120e..9320cb01d424 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -48,6 +48,7 @@ #define GEN9_CS_DEBUG_MODE1 _MMIO(0x20ec) #define FF_DOP_CLOCK_GATE_DISABLE REG_BIT(1) +#define GEN12_REPLAY_MODE_GRANULARITY REG_BIT(0) #define PS_INVOCATION_COUNT _MMIO(0x2348) @@ -91,6 +92,9 @@ #define GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC REG_BIT(11) #define GEN12_DISABLE_CPS_AWARE_COLOR_PIPE REG_BIT(9) +#define VFG_PREEMPTION_CHICKEN _MMIO(0x83b4) +#define POLYGON_TRIFAN_LINELOOP_DISABLE REG_BIT(4) + #define XEHP_SQCM MCR_REG(0x8724) #define EN_32B_ACCESS REG_BIT(30) @@ -248,8 +252,13 @@ #define SC_DISABLE_POWER_OPTIMIZATION_EBB REG_BIT(9) #define GEN11_SAMPLER_ENABLE_HEADLESS_MSG REG_BIT(5) +#define GEN9_HALF_SLICE_CHICKEN7 MCR_REG(0xe194) +#define DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA REG_BIT(15) + #define CACHE_MODE_SS MCR_REG(0xe420) +#define ENABLE_EU_COUNT_FOR_TDL_FLUSH REG_BIT(10) #define DISABLE_ECC REG_BIT(5) +#define ENABLE_PREFETCH_INTO_IC REG_BIT(3) #define GEN9_ROW_CHICKEN4 MCR_REG(0xe48c) #define GEN12_DISABLE_GRF_CLEAR REG_BIT(13) @@ -260,6 +269,10 @@ #define THREAD_EX_ARB_MODE REG_GENMASK(3, 2) #define THREAD_EX_ARB_MODE_RR_AFTER_DEP REG_FIELD_PREP(THREAD_EX_ARB_MODE, 0x2) +#define GEN8_ROW_CHICKEN MCR_REG(0xe4f0) +#define UGM_BACKUP_MODE REG_BIT(13) +#define MDQ_ARBITRATION_MODE REG_BIT(12) + #define GEN8_ROW_CHICKEN2 MCR_REG(0xe4f4) #define GEN12_DISABLE_READ_SUPPRESSION REG_BIT(15) #define GEN12_DISABLE_EARLY_READ REG_BIT(14) @@ -267,8 +280,22 @@ #define GEN12_PUSH_CONST_DEREF_HOLD_DIS REG_BIT(8) #define GEN12_DISABLE_DOP_GATING REG_BIT(0) +#define XEHP_HDC_CHICKEN0 MCR_REG(0xe5f0) +#define LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK REG_GENMASK(13, 11) + +#define RT_CTRL MCR_REG(0xe530) +#define DIS_NULL_QUERY REG_BIT(10) + #define LSC_CHICKEN_BIT_0 MCR_REG(0xe7c8) #define DISABLE_D8_D16_COASLESCE REG_BIT(30) +#define FORCE_1_SUB_MESSAGE_PER_FRAGMENT REG_BIT(15) + +#define LSC_CHICKEN_BIT_0_UDW MCR_REG(0xe7c8 + 4) +#define DIS_CHAIN_2XSIMD8 REG_BIT(55 - 32) +#define FORCE_SLM_FENCE_SCOPE_TO_TILE REG_BIT(42 - 32) +#define FORCE_UGM_FENCE_SCOPE_TO_TILE REG_BIT(41 - 32) +#define MAXREQS_PER_BANK REG_GENMASK(39 - 32, 37 - 32) +#define DISABLE_128B_EVICTION_COMMAND_UDW REG_BIT(36 - 32) #define SARB_CHICKEN1 MCR_REG(0xe90c) #define COMP_CKN_IN REG_GENMASK(30, 29) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 13c8dbf49cba..306541b229bf 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -91,6 +91,9 @@ #define _MMIO(x) _XE_RTP_REG(x) #define MCR_REG(x) _XE_RTP_MCR_REG(x) +__diag_push(); +__diag_ignore_all("-Woverride-init", "Allow field overrides in table"); + static const struct xe_rtp_entry gt_was[] = { { XE_RTP_NAME("14011060649"), XE_RTP_RULES(MEDIA_VERSION_RANGE(1200, 1255), @@ -259,8 +262,8 @@ static const struct xe_rtp_entry engine_was[] = { XE_RTP_ACTIONS(SET(GEN8_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ, XE_RTP_ACTION_FLAG(MASKED_REG))) }, - { XE_RTP_NAME("14010826681, 1606700617, 22010271021"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), + { XE_RTP_NAME("14010826681, 1606700617, 22010271021, 18019627453"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1255), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(GEN9_CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE, XE_RTP_ACTION_FLAG(MASKED_REG))) }, @@ -298,16 +301,192 @@ static const struct xe_rtp_entry engine_was[] = { /* DG2 */ + { XE_RTP_NAME("22013037850"), + XE_RTP_RULES(PLATFORM(DG2), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, + DISABLE_128B_EVICTION_COMMAND_UDW)) + }, + { XE_RTP_NAME("22014226127"), + XE_RTP_RULES(PLATFORM(DG2), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE)) + }, + { XE_RTP_NAME("18017747507"), + XE_RTP_RULES(PLATFORM(DG2), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(VFG_PREEMPTION_CHICKEN, + POLYGON_TRIFAN_LINELOOP_DISABLE, + XE_RTP_ACTION_FLAG(MASKED_REG))) + }, + { XE_RTP_NAME("22012826095, 22013059131"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(B0, C0), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(FIELD_SET(LSC_CHICKEN_BIT_0_UDW, + MAXREQS_PER_BANK, + REG_FIELD_PREP(MAXREQS_PER_BANK, 2))) + }, + { XE_RTP_NAME("22012826095, 22013059131"), + XE_RTP_RULES(SUBPLATFORM(DG2, G11), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(FIELD_SET(LSC_CHICKEN_BIT_0_UDW, + MAXREQS_PER_BANK, + REG_FIELD_PREP(MAXREQS_PER_BANK, 2))) + }, + { XE_RTP_NAME("22013059131"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(B0, C0), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, FORCE_1_SUB_MESSAGE_PER_FRAGMENT)) + }, + { XE_RTP_NAME("22013059131"), + XE_RTP_RULES(SUBPLATFORM(DG2, G11), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, FORCE_1_SUB_MESSAGE_PER_FRAGMENT)) + }, + { XE_RTP_NAME("14010918519"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, + FORCE_SLM_FENCE_SCOPE_TO_TILE | + FORCE_UGM_FENCE_SCOPE_TO_TILE, + /* + * Ignore read back as it always returns 0 in these + * steps + */ + .read_mask = 0)) + }, { XE_RTP_NAME("14015227452"), - XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), + XE_RTP_RULES(PLATFORM(DG2), + FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE, XE_RTP_ACTION_FLAG(MASKED_REG))) }, - { XE_RTP_NAME("18019627453"), + { XE_RTP_NAME("16015675438"), + XE_RTP_RULES(PLATFORM(DG2), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN2, + PERF_FIX_BALANCING_CFE_DISABLE, + XE_RTP_ACTION_FLAG(MASKED_REG))) + }, + { XE_RTP_NAME("16011620976, 22015475538"), + XE_RTP_RULES(PLATFORM(DG2), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8)) + }, + { XE_RTP_NAME("22012654132"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, C0), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(CACHE_MODE_SS, ENABLE_PREFETCH_INTO_IC, + XE_RTP_ACTION_FLAG(MASKED_REG), + /* + * Register can't be read back for verification on + * DG2 due to Wa_14012342262 + */ + .read_mask = 0)) + }, + { XE_RTP_NAME("22012654132"), + XE_RTP_RULES(SUBPLATFORM(DG2, G11), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(CACHE_MODE_SS, ENABLE_PREFETCH_INTO_IC, + XE_RTP_ACTION_FLAG(MASKED_REG), + /* + * Register can't be read back for verification on + * DG2 due to Wa_14012342262 + */ + .read_mask = 0)) + }, + { XE_RTP_NAME("1509727124"), XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(GEN9_CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE, + XE_RTP_ACTIONS(SET(GEN10_SAMPLER_MODE, SC_DISABLE_POWER_OPTIMIZATION_EBB, + XE_RTP_ACTION_FLAG(MASKED_REG))) + }, + { XE_RTP_NAME("22012856258"), + XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(GEN8_ROW_CHICKEN2, GEN12_DISABLE_READ_SUPPRESSION, + XE_RTP_ACTION_FLAG(MASKED_REG))) + }, + { XE_RTP_NAME("14013392000"), + XE_RTP_RULES(SUBPLATFORM(DG2, G11), STEP(A0, B0), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(GEN8_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE, + XE_RTP_ACTION_FLAG(MASKED_REG))) + }, + { XE_RTP_NAME("14012419201"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(GEN9_ROW_CHICKEN4, + GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX, + XE_RTP_ACTION_FLAG(MASKED_REG))) + }, + { XE_RTP_NAME("14012419201"), + XE_RTP_RULES(SUBPLATFORM(DG2, G11), STEP(A0, B0), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(GEN9_ROW_CHICKEN4, + GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX, + XE_RTP_ACTION_FLAG(MASKED_REG))) + }, + { XE_RTP_NAME("1308578152"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(B0, C0), ENGINE_CLASS(RENDER), + FUNC(xe_rtp_match_first_gslice_fused_off)), + XE_RTP_ACTIONS(CLR(GEN9_CS_DEBUG_MODE1, + GEN12_REPLAY_MODE_GRANULARITY, + XE_RTP_ACTION_FLAG(MASKED_REG))) + }, + { XE_RTP_NAME("22010960976, 14013347512"), + XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(CLR(XEHP_HDC_CHICKEN0, + LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK, + XE_RTP_ACTION_FLAG(MASKED_REG))) + }, + { XE_RTP_NAME("1608949956, 14010198302"), + XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(GEN8_ROW_CHICKEN, + MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE, XE_RTP_ACTION_FLAG(MASKED_REG))) }, + { XE_RTP_NAME("22010430635"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(GEN9_ROW_CHICKEN4, + GEN12_DISABLE_GRF_CLEAR, + XE_RTP_ACTION_FLAG(MASKED_REG))) + }, + { XE_RTP_NAME("14013202645"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(B0, C0), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(RT_CTRL, DIS_NULL_QUERY)) + }, + { XE_RTP_NAME("14013202645"), + XE_RTP_RULES(SUBPLATFORM(DG2, G11), STEP(A0, B0), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(RT_CTRL, DIS_NULL_QUERY)) + }, + { XE_RTP_NAME("22012532006"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, C0), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(GEN9_HALF_SLICE_CHICKEN7, + DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA, + XE_RTP_ACTION_FLAG(MASKED_REG))) + }, + { XE_RTP_NAME("22012532006"), + XE_RTP_RULES(SUBPLATFORM(DG2, G11), STEP(A0, B0), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(GEN9_HALF_SLICE_CHICKEN7, + DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA, + XE_RTP_ACTION_FLAG(MASKED_REG))) + }, + { XE_RTP_NAME("22014600077"), + XE_RTP_RULES(SUBPLATFORM(DG2, G11), STEP(B0, FOREVER), + ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(CACHE_MODE_SS, + ENABLE_EU_COUNT_FOR_TDL_FLUSH, + XE_RTP_ACTION_FLAG(MASKED_REG), + /* + * Wa_14012342262 write-only reg, so skip + * verification + */ + .read_mask = 0)) + }, + { XE_RTP_NAME("22014600077"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(CACHE_MODE_SS, + ENABLE_EU_COUNT_FOR_TDL_FLUSH, + XE_RTP_ACTION_FLAG(MASKED_REG), + /* + * Wa_14012342262 write-only reg, so skip + * verification + */ + .read_mask = 0)) + }, /* PVC */ @@ -365,6 +544,8 @@ static const struct xe_rtp_entry lrc_was[] = { {} }; +__diag_pop(); + /** * xe_wa_process_gt - process GT workaround table * @gt: GT instance to process workarounds for -- cgit v1.2.3 From 11f78b130835695150ddeae98a90d433e5b02d1e Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 13 Mar 2023 17:30:08 -0700 Subject: drm/xe: Add missing DG2 lrc tunings Synchronize with i915 the DG2 tunings as of commit 4d14d7717f19 ("drm/i915/selftest: Fix ktime_get() and h/w access order"). Contrary to the tuning "gang timer" for TGL, there is no quick justification for why the read back is disabled in i915. Keep it with that flag for now. That can be tentatively removed later when the read values are checked. v2: Use XEHP_FF_MODE2 instead of GEN12_FF_MODE2 (Matt Roper) Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230314003012.2600353-11-lucas.demarchi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 7 +++++++ drivers/gpu/drm/xe/xe_tuning.c | 20 ++++++++++++++++++++ 2 files changed, 27 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 9320cb01d424..97a9d78e8831 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -73,7 +73,11 @@ #define XEHP_TILE0_ADDR_RANGE MCR_REG(0x4900) #define XEHP_FLAT_CCS_BASE_ADDR MCR_REG(0x4910) +#define CHICKEN_RASTER_2 MCR_REG(0x6208) +#define TBIMR_FAST_CLIP REG_BIT(5) + #define GEN12_FF_MODE2 _MMIO(0x6604) +#define XEHP_FF_MODE2 MCR_REG(0x6604) #define FF_MODE2_GS_TIMER_MASK REG_GENMASK(31, 24) #define FF_MODE2_GS_TIMER_224 REG_FIELD_PREP(FF_MODE2_GS_TIMER_MASK, 224) #define FF_MODE2_TDS_TIMER_MASK REG_GENMASK(23, 16) @@ -228,6 +232,9 @@ #define XEHP_L3NODEARBCFG MCR_REG(0xb0b4) #define XEHP_LNESPARE REG_BIT(19) +#define XEHP_L3SQCREG5 MCR_REG(0xb158) +#define L3_PWM_TIMER_INIT_VAL_MASK REG_GENMASK(9, 0) + #define XEHP_L3SCQREG7 MCR_REG(0xb188) #define BLEND_FILL_CACHING_OPT_DIS REG_BIT(3) diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index 47b27dccb385..7ff5eb762da5 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -35,6 +35,26 @@ static const struct xe_rtp_entry lrc_tunings[] = { FF_MODE2_GS_TIMER_MASK, FF_MODE2_GS_TIMER_224)) }, + + /* DG2 */ + + { XE_RTP_NAME("Tuning: L3 cache"), + XE_RTP_RULES(PLATFORM(DG2)), + XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, + REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f))) + }, + { XE_RTP_NAME("Tuning: TDS gang timer"), + XE_RTP_RULES(PLATFORM(DG2)), + /* read verification is ignored as in i915 - need to check enabling */ + XE_RTP_ACTIONS(FIELD_SET_NO_READ_MASK(XEHP_FF_MODE2, + FF_MODE2_TDS_TIMER_MASK, + FF_MODE2_TDS_TIMER_128)) + }, + { XE_RTP_NAME("Tuning: TBIMR fast clip"), + XE_RTP_RULES(PLATFORM(DG2)), + XE_RTP_ACTIONS(SET(CHICKEN_RASTER_2, TBIMR_FAST_CLIP, + XE_RTP_ACTION_FLAG(MASKED_REG))) + }, {} }; -- cgit v1.2.3 From 8cd7e9759766d717cf4c7be53e17acf6dff19283 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 13 Mar 2023 17:30:09 -0700 Subject: drm/xe: Add missing DG2 lrc workarounds Synchronize with i915 the DG2 lrc workarounds as of commit 4d14d7717f19 ("drm/i915/selftest: Fix ktime_get() and h/w access order"). A few simplifications were done when the WA should be applied to some steps of a subplatform and all the steppings of the other subplatforms. In this case, it was simply applied to all the steppings, which only means applying it to a few more A* steppings. The implementation of the workaround 16011186671 triggers a bug in the RTP infra: it's not possible to set the flag the usual way when having multiple actions in the entry. This may be fixed later, but for now it's sufficient to just set the flag directly without the helper macro. v2: Fix 14014947963 to use FIELD_SET (Matt Roper) Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230314003012.2600353-12-lucas.demarchi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 19 ++++++++++++++ drivers/gpu/drm/xe/xe_wa.c | 51 ++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 97a9d78e8831..a079e1aef5a4 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -73,9 +73,16 @@ #define XEHP_TILE0_ADDR_RANGE MCR_REG(0x4900) #define XEHP_FLAT_CCS_BASE_ADDR MCR_REG(0x4910) +#define CHICKEN_RASTER_1 MCR_REG(0x6204) +#define DIS_SF_ROUND_NEAREST_EVEN REG_BIT(8) + #define CHICKEN_RASTER_2 MCR_REG(0x6208) #define TBIMR_FAST_CLIP REG_BIT(5) +#define VFLSKPD MCR_REG(0x62a8) +#define DIS_OVER_FETCH_CACHE REG_BIT(1) +#define DIS_MULT_MISS_RD_SQUASH REG_BIT(0) + #define GEN12_FF_MODE2 _MMIO(0x6604) #define XEHP_FF_MODE2 MCR_REG(0x6604) #define FF_MODE2_GS_TIMER_MASK REG_GENMASK(31, 24) @@ -83,6 +90,12 @@ #define FF_MODE2_TDS_TIMER_MASK REG_GENMASK(23, 16) #define FF_MODE2_TDS_TIMER_128 REG_FIELD_PREP(FF_MODE2_TDS_TIMER_MASK, 4) +#define CACHE_MODE_1 _MMIO(0x7004) +#define MSAA_OPTIMIZATION_REDUC_DISABLE REG_BIT(11) + +#define XEHP_PSS_MODE2 MCR_REG(0x703c) +#define SCOREBOARD_STALL_FLUSH_CONTROL REG_BIT(5) + #define HIZ_CHICKEN _MMIO(0x7018) #define DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE REG_BIT(14) @@ -96,6 +109,12 @@ #define GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC REG_BIT(11) #define GEN12_DISABLE_CPS_AWARE_COLOR_PIPE REG_BIT(9) +#define XEHP_SLICE_COMMON_ECO_CHICKEN1 MCR_REG(0x731c) +#define MSC_MSAA_REODER_BUF_BYPASS_DISABLE REG_BIT(14) + +#define VF_PREEMPTION _MMIO(0x83a4) +#define PREEMPTION_VERTEX_COUNT REG_GENMASK(15, 0) + #define VFG_PREEMPTION_CHICKEN _MMIO(0x83b4) #define POLYGON_TRIFAN_LINELOOP_DISABLE REG_BIT(4) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 306541b229bf..0621706f46e6 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -541,6 +541,57 @@ static const struct xe_rtp_entry lrc_was[] = { DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE, XE_RTP_ACTION_FLAG(MASKED_REG))) }, + + /* DG2 */ + + { XE_RTP_NAME("16011186671"), + XE_RTP_RULES(SUBPLATFORM(DG2, G11), STEP(A0, B0)), + XE_RTP_ACTIONS(CLR(VFLSKPD, DIS_MULT_MISS_RD_SQUASH, + .flags = XE_RTP_ACTION_FLAG_MASKED_REG), + SET(VFLSKPD, DIS_OVER_FETCH_CACHE, + .flags = XE_RTP_ACTION_FLAG_MASKED_REG)) + }, + { XE_RTP_NAME("14010469329"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), + XE_RTP_ACTIONS(SET(XEHP_COMMON_SLICE_CHICKEN3, + XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE, + XE_RTP_ACTION_FLAG(MASKED_REG))) + }, + { XE_RTP_NAME("14010698770, 22010613112, 22010465075"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), + XE_RTP_ACTIONS(SET(XEHP_COMMON_SLICE_CHICKEN3, + GEN12_DISABLE_CPS_AWARE_COLOR_PIPE, + XE_RTP_ACTION_FLAG(MASKED_REG))) + }, + { XE_RTP_NAME("16013271637"), + XE_RTP_RULES(PLATFORM(DG2)), + XE_RTP_ACTIONS(SET(XEHP_SLICE_COMMON_ECO_CHICKEN1, + MSC_MSAA_REODER_BUF_BYPASS_DISABLE, + XE_RTP_ACTION_FLAG(MASKED_REG))) + }, + { XE_RTP_NAME("14014947963"), + XE_RTP_RULES(PLATFORM(DG2)), + XE_RTP_ACTIONS(FIELD_SET(VF_PREEMPTION, + PREEMPTION_VERTEX_COUNT, + 0x4000, + XE_RTP_ACTION_FLAG(MASKED_REG))) + }, + { XE_RTP_NAME("18018764978"), + XE_RTP_RULES(PLATFORM(DG2)), + XE_RTP_ACTIONS(SET(XEHP_PSS_MODE2, + SCOREBOARD_STALL_FLUSH_CONTROL, + XE_RTP_ACTION_FLAG(MASKED_REG))) + }, + { XE_RTP_NAME("15010599737"), + XE_RTP_RULES(PLATFORM(DG2)), + XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN, + XE_RTP_ACTION_FLAG(MASKED_REG))) + }, + { XE_RTP_NAME("18019271663"), + XE_RTP_RULES(PLATFORM(DG2)), + XE_RTP_ACTIONS(SET(CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE, + XE_RTP_ACTION_FLAG(MASKED_REG))) + }, {} }; -- cgit v1.2.3 From 95ff48c2e7a6f4968b1f795462e7e3af334c2749 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 13 Mar 2023 17:30:10 -0700 Subject: drm/xe: Add missing ADL-P engine workaround Add the one missing workaround for ADL-P when comparing to i915 up to commit 7cdae9e9ee5e ("drm/i915: Move DG2 tuning to the right function"). Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230314003012.2600353-13-lucas.demarchi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_wa.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 0621706f46e6..e21c7ec53b2f 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -299,6 +299,16 @@ static const struct xe_rtp_entry engine_was[] = { XE_RTP_ACTION_FLAG(MASKED_REG))) }, + /* ADL-P */ + + { XE_RTP_NAME("1607297627, 1607030317, 1607186500"), + XE_RTP_RULES(PLATFORM(ALDERLAKE_P), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(RING_PSMI_CTL(RENDER_RING_BASE), + GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE | + GEN8_RC_SEMA_IDLE_MSG_DISABLE, + XE_RTP_ACTION_FLAG(MASKED_REG))) + }, + /* DG2 */ { XE_RTP_NAME("22013037850"), -- cgit v1.2.3 From fd93946d594efc6df3f48c684ce87cbbde82dcb9 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 13 Mar 2023 17:30:11 -0700 Subject: drm/xe: Add missing LRC workarounds for graphics 1200 Synchronize LRC workarounds for graphics version 1200 with i915 up to commit 7cdae9e9ee5e ("drm/i915: Move DG2 tuning to the right function"). These were probably missed for TGL/RKL before because in i915 it uses a !IS_DG1() condition. Avoid a similar issue by just checking the graphics version 1200 since DG1 is 1210. Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230314003012.2600353-14-lucas.demarchi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 4 ++++ drivers/gpu/drm/xe/xe_wa.c | 10 ++++++++++ 2 files changed, 14 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index a079e1aef5a4..73b0c0bdde5d 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -98,10 +98,14 @@ #define HIZ_CHICKEN _MMIO(0x7018) #define DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE REG_BIT(14) +#define HZ_DEPTH_TEST_LE_GE_OPT_DISABLE REG_BIT(13) /* GEN7 chicken */ #define GEN7_COMMON_SLICE_CHICKEN1 _MMIO(0x7010) +#define COMMON_SLICE_CHICKEN4 _MMIO(0x7300) +#define DISABLE_TDC_LOAD_BALANCING_CALC REG_BIT(6) + #define GEN11_COMMON_SLICE_CHICKEN3 _MMIO(0x7304) #define XEHP_COMMON_SLICE_CHICKEN3 MCR_REG(0x7304) #define DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN REG_BIT(12) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index e21c7ec53b2f..59d2daab5929 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -536,6 +536,16 @@ static const struct xe_rtp_entry lrc_was[] = { GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL, XE_RTP_ACTION_FLAG(MASKED_REG))) }, + { XE_RTP_NAME("1806527549"), + XE_RTP_RULES(GRAPHICS_VERSION(1200)), + XE_RTP_ACTIONS(SET(HIZ_CHICKEN, HZ_DEPTH_TEST_LE_GE_OPT_DISABLE, + XE_RTP_ACTION_FLAG(MASKED_REG))) + }, + { XE_RTP_NAME("1606376872"), + XE_RTP_RULES(GRAPHICS_VERSION(1200)), + XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, DISABLE_TDC_LOAD_BALANCING_CALC, + XE_RTP_ACTION_FLAG(MASKED_REG))) + }, /* DG1 */ -- cgit v1.2.3 From 5fd92bdd54e2f0e0611e690f3e03d6d3fa9621d8 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 9 Mar 2023 14:21:33 +0200 Subject: drm/xe/irq: the irq handler local variable need not be static It's just a local variable. Signed-off-by: Jani Nikula Reviewed-by: Matt Roper Reviewed-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 071ccc75b71b..9527e7fb9b6e 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -516,7 +516,7 @@ static void irq_uninstall(struct drm_device *drm, void *arg) int xe_irq_install(struct xe_device *xe) { int irq = to_pci_dev(xe->drm.dev)->irq; - static irq_handler_t irq_handler; + irq_handler_t irq_handler; int err; irq_handler = xe_irq_handler(xe); -- cgit v1.2.3 From 8eb7ad99ae66b4244a1239bfa8723d1a06beddb9 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 10 Mar 2023 09:13:39 +0100 Subject: drm/xe/xe_uc_fw: Use firmware files from standard locations The GuC/HuC firmware files used by Xe drivers are the same as used by i915. Use the already-known location to find those firmware files, for a couple of reasons: 1. Avoid having the same firmware placed on two different places on MODULE_FIRMWARE(), if both 915 and xe drivers are compiled; 2. Having firmware files located on different locations may end creating bigger initramfs, as the same files will be copied twice my mkinitrd/dracut/...; 3. this is the place where those firmware files are located at https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git Upstream doesn't expect them to have on other places; 4. When built with display support, DMC firmware will be loaded from i915/ directory. It is very confusing to have some firmware files on a different place for the same driver. Cc: Matthew Brost Cc: Lucas de Marchi Cc: Rodrigo Vivi Cc: Thomas Hellstrom Cc: Daniel Vetter Cc: David Airlie Signed-off-by: Mauro Carvalho Chehab [ Mostly agree with the direction of "use the firmware blobs from upstream at their current location for these platforms". Previous directory was not wrong as the plan was to have it handled in the upstream firmware repo. For future platforms the location can be changed if the support is only in xe ] Signed-off-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230310081338.3275583-1-mauro.chehab@linux.intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_uc_fw.c | 4 ++-- drivers/gpu/drm/xe/xe_uc_fw.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index bd89ac27828e..47b51ad5b015 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -55,12 +55,12 @@ static struct xe_device *uc_fw_to_xe(struct xe_uc_fw *uc_fw) fw_def(TIGERLAKE, 0, huc_def(tgl, 7, 9, 3)) #define __MAKE_UC_FW_PATH_MAJOR(prefix_, name_, major_) \ - "xe/" \ + "i915/" \ __stringify(prefix_) "_" name_ "_" \ __stringify(major_) ".bin" #define __MAKE_UC_FW_PATH(prefix_, name_, major_, minor_, patch_) \ - "xe/" \ + "i915/" \ __stringify(prefix_) name_ \ __stringify(major_) "." \ __stringify(minor_) "." \ diff --git a/drivers/gpu/drm/xe/xe_uc_fw.h b/drivers/gpu/drm/xe/xe_uc_fw.h index ca64d379bb5e..bf31c3bb0e0f 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.h +++ b/drivers/gpu/drm/xe/xe_uc_fw.h @@ -175,6 +175,6 @@ static inline u32 xe_uc_fw_get_upload_size(struct xe_uc_fw *uc_fw) return __xe_uc_fw_get_upload_size(uc_fw); } -#define XE_UC_FIRMWARE_URL "https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git/tree/xe" +#define XE_UC_FIRMWARE_URL "https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git/tree/i915" #endif -- cgit v1.2.3 From 7c51050b3b0799f5d74331a7eb81a7066d520731 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Thu, 9 Mar 2023 17:20:20 +0100 Subject: drm/xe: Use a define to set initial seqno for fences MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also for HW fences, write the initial seqno - 1 to the HW completed seqno to initialize. v2: - Use __dma_fence_is_later() to compare hw fence seqnos. (Matthew Auld) Signed-off-by: Thomas Hellström Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_engine.c | 5 +++-- drivers/gpu/drm/xe/xe_hw_fence.c | 4 ++-- drivers/gpu/drm/xe/xe_hw_fence.h | 2 ++ drivers/gpu/drm/xe/xe_lrc.c | 3 +++ 4 files changed, 10 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c index 3e40fb6d3f98..edd1192f6ec5 100644 --- a/drivers/gpu/drm/xe/xe_engine.c +++ b/drivers/gpu/drm/xe/xe_engine.c @@ -13,6 +13,7 @@ #include "xe_device.h" #include "xe_gt.h" +#include "xe_hw_fence.h" #include "xe_lrc.h" #include "xe_macros.h" #include "xe_migrate.h" @@ -57,11 +58,11 @@ static struct xe_engine *__xe_engine_create(struct xe_device *xe, if (xe_engine_is_parallel(e)) { e->parallel.composite_fence_ctx = dma_fence_context_alloc(1); - e->parallel.composite_fence_seqno = 1; + e->parallel.composite_fence_seqno = XE_FENCE_INITIAL_SEQNO; } if (e->flags & ENGINE_FLAG_VM) { e->bind.fence_ctx = dma_fence_context_alloc(1); - e->bind.fence_seqno = 1; + e->bind.fence_seqno = XE_FENCE_INITIAL_SEQNO; } for (i = 0; i < width; ++i) { diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c b/drivers/gpu/drm/xe/xe_hw_fence.c index e56ca2867545..ffe1a3992ef5 100644 --- a/drivers/gpu/drm/xe/xe_hw_fence.c +++ b/drivers/gpu/drm/xe/xe_hw_fence.c @@ -129,7 +129,7 @@ void xe_hw_fence_ctx_init(struct xe_hw_fence_ctx *ctx, struct xe_gt *gt, ctx->gt = gt; ctx->irq = irq; ctx->dma_fence_ctx = dma_fence_context_alloc(1); - ctx->next_seqno = 1; + ctx->next_seqno = XE_FENCE_INITIAL_SEQNO; sprintf(ctx->name, "%s", name); } @@ -165,7 +165,7 @@ static bool xe_hw_fence_signaled(struct dma_fence *dma_fence) u32 seqno = xe_map_rd(xe, &fence->seqno_map, 0, u32); return dma_fence->error || - (s32)fence->dma.seqno <= (s32)seqno; + !__dma_fence_is_later(dma_fence->seqno, seqno, dma_fence->ops); } static bool xe_hw_fence_enable_signaling(struct dma_fence *dma_fence) diff --git a/drivers/gpu/drm/xe/xe_hw_fence.h b/drivers/gpu/drm/xe/xe_hw_fence.h index 07f202db6526..523c2611ef5d 100644 --- a/drivers/gpu/drm/xe/xe_hw_fence.h +++ b/drivers/gpu/drm/xe/xe_hw_fence.h @@ -8,6 +8,8 @@ #include "xe_hw_fence_types.h" +#define XE_FENCE_INITIAL_SEQNO 1 + int xe_hw_fence_module_init(void); void xe_hw_fence_module_exit(void); diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 9140b057a5ba..fb8c6f7d6528 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -697,6 +697,9 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE; xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable)); + map = __xe_lrc_seqno_map(lrc); + xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); + return 0; err_lrc_finish: -- cgit v1.2.3 From fc1cc680304db1c452156968f4ab95f9c553f746 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Fri, 10 Mar 2023 17:56:55 +0100 Subject: drm/xe/migrate: Update cpu page-table updates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Don't wait for GPU to be able to update page-tables using CPU. Putting ourselves to sleep may be more of a problem than using GPU for page-table updates. Also allow the vm to be NULL since the migrate kunit test uses NULL for vm. Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_migrate.c | 30 ++++++++++-------------------- 1 file changed, 10 insertions(+), 20 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 4a9fe1f7128d..366892198d11 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -979,25 +979,13 @@ xe_migrate_update_pgtables_cpu(struct xe_migrate *m, int err; u32 i; - /* Wait on BO moves for 10 ms, then fall back to GPU job */ - if (bo) { - long wait; + if (bo && !dma_resv_test_signaled(bo->ttm.base.resv, + DMA_RESV_USAGE_KERNEL)) + return ERR_PTR(-ETIME); - wait = dma_resv_wait_timeout(bo->ttm.base.resv, - DMA_RESV_USAGE_KERNEL, - true, HZ / 100); - if (wait <= 0) - return ERR_PTR(-ETIME); - } - if (wait_vm) { - long wait; - - wait = dma_resv_wait_timeout(&vm->resv, - DMA_RESV_USAGE_BOOKKEEP, - true, HZ / 100); - if (wait <= 0) - return ERR_PTR(-ETIME); - } + if (wait_vm && !dma_resv_test_signaled(&vm->resv, + DMA_RESV_USAGE_BOOKKEEP)) + return ERR_PTR(-ETIME); if (ops->pre_commit) { err = ops->pre_commit(pt_update); @@ -1011,8 +999,10 @@ xe_migrate_update_pgtables_cpu(struct xe_migrate *m, update->ofs, update->qwords, update); } - trace_xe_vm_cpu_bind(vm); - xe_device_wmb(vm->xe); + if (vm) { + trace_xe_vm_cpu_bind(vm); + xe_device_wmb(vm->xe); + } fence = dma_fence_get_stub(); -- cgit v1.2.3 From 17a28ea23c4087cf4580744a70105ccc83efc769 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Fri, 10 Mar 2023 16:41:08 +0100 Subject: drm/xe/tests: Support CPU page-table updates in the migrate test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The migrate test currently supports only GPU pagetable updates and will thus break if we fix the CPU pagetable update selection. Fix the migrate test first. Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_migrate.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index ac659b94e7f5..a3bace16282e 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -66,9 +66,16 @@ sanity_populate_cb(struct xe_migrate_pt_update *pt_update, { int i; u64 *ptr = dst; - - for (i = 0; i < num_qwords; i++) - ptr[i] = (qword_ofs + i - update->ofs) * 0x1111111111111111ULL; + u64 value; + + for (i = 0; i < num_qwords; i++) { + value = (qword_ofs + i - update->ofs) * 0x1111111111111111ULL; + if (map) + xe_map_wr(gt_to_xe(gt), map, (qword_ofs + i) * + sizeof(u64), u64, value); + else + ptr[i] = value; + } } static const struct xe_migrate_pt_update_ops sanity_ops = { -- cgit v1.2.3 From 155c9165542863c97b5284afa37e3d8e385a8815 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Fri, 10 Mar 2023 12:03:47 +0100 Subject: drm/xe: Introduce xe_engine_is_idle() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce xe_engine_is_idle, and replace the static function in xe_migrate.c. The latter had two flaws. First the seqno == 1 test might return a false true value each time the seqno counter wrapped, Second, the cur_seqno == next_seqno test would never return true. Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_engine.c | 23 +++++++++++++++++++++++ drivers/gpu/drm/xe/xe_engine.h | 2 ++ drivers/gpu/drm/xe/xe_migrate.c | 8 +------- 3 files changed, 26 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c index edd1192f6ec5..8011f5827cbe 100644 --- a/drivers/gpu/drm/xe/xe_engine.c +++ b/drivers/gpu/drm/xe/xe_engine.c @@ -683,6 +683,29 @@ static void engine_kill_compute(struct xe_engine *e) up_write(&e->vm->lock); } +/** + * xe_engine_is_idle() - Whether an engine is idle. + * @engine: The engine + * + * FIXME: Need to determine what to use as the short-lived + * timeline lock for the engines, so that the return value + * of this function becomes more than just an advisory + * snapshot in time. The timeline lock must protect the + * seqno from racing submissions on the same engine. + * Typically vm->resv, but user-created timeline locks use the migrate vm + * and never grabs the migrate vm->resv so we have a race there. + * + * Return: True if the engine is idle, false otherwise. + */ +bool xe_engine_is_idle(struct xe_engine *engine) +{ + if (XE_WARN_ON(xe_engine_is_parallel(engine))) + return false; + + return xe_lrc_seqno(&engine->lrc[0]) == + engine->lrc[0].fence_ctx.next_seqno - 1; +} + void xe_engine_kill(struct xe_engine *e) { struct xe_engine *engine = e, *next; diff --git a/drivers/gpu/drm/xe/xe_engine.h b/drivers/gpu/drm/xe/xe_engine.h index a3a44534003f..1cf7f23c4afd 100644 --- a/drivers/gpu/drm/xe/xe_engine.h +++ b/drivers/gpu/drm/xe/xe_engine.h @@ -42,6 +42,8 @@ static inline bool xe_engine_is_parallel(struct xe_engine *engine) return engine->width > 1; } +bool xe_engine_is_idle(struct xe_engine *engine); + void xe_engine_kill(struct xe_engine *e); int xe_engine_create_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 366892198d11..4a600c64b5d0 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -1024,12 +1024,6 @@ static bool no_in_syncs(struct xe_sync_entry *syncs, u32 num_syncs) return true; } -static bool engine_is_idle(struct xe_engine *e) -{ - return !e || e->lrc[0].fence_ctx.next_seqno == 1 || - xe_lrc_seqno(&e->lrc[0]) == e->lrc[0].fence_ctx.next_seqno; -} - /** * xe_migrate_update_pgtables() - Pipelined page-table update * @m: The migrate context. @@ -1082,7 +1076,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m, bool first_munmap_rebind = vma && vma->first_munmap_rebind; /* Use the CPU if no in syncs and engine is idle */ - if (no_in_syncs(syncs, num_syncs) && engine_is_idle(eng)) { + if (no_in_syncs(syncs, num_syncs) && (!eng || xe_engine_is_idle(eng))) { fence = xe_migrate_update_pgtables_cpu(m, vm, bo, updates, num_updates, first_munmap_rebind, -- cgit v1.2.3 From a5dfb471bba18fc38dc623ff1fa4387f48dacba6 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Fri, 10 Mar 2023 12:07:12 +0100 Subject: drm/xe: Use a small negative initial seqno MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Causes an early 32-bit wrap and may thus help CI catch wrapping errors that may otherwise not show early enough. Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_hw_fence.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_hw_fence.h b/drivers/gpu/drm/xe/xe_hw_fence.h index 523c2611ef5d..cfe5fd603787 100644 --- a/drivers/gpu/drm/xe/xe_hw_fence.h +++ b/drivers/gpu/drm/xe/xe_hw_fence.h @@ -8,7 +8,8 @@ #include "xe_hw_fence_types.h" -#define XE_FENCE_INITIAL_SEQNO 1 +/* Cause an early wrap to catch wrapping errors */ +#define XE_FENCE_INITIAL_SEQNO (-127) int xe_hw_fence_module_init(void); void xe_hw_fence_module_exit(void); -- cgit v1.2.3 From 7cba3396fd7e87a976b8ad1e30d734b72dec7e31 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Mon, 13 Mar 2023 20:37:24 +0100 Subject: drm/xe/tests: Test both CPU- and GPU page-table updates with the migrate test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a test parameter to force GPU page-table updates with the migrate test and test both CPU- and GPU updates. Also provide some timing results. Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_migrate.c | 24 ++++++++++++++++++++++-- drivers/gpu/drm/xe/tests/xe_test.h | 1 + drivers/gpu/drm/xe/xe_migrate.c | 17 +++++++++++++++++ 3 files changed, 40 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index a3bace16282e..e50b6ceb56e6 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -64,6 +64,8 @@ sanity_populate_cb(struct xe_migrate_pt_update *pt_update, u32 qword_ofs, u32 num_qwords, const struct xe_vm_pgtable_update *update) { + struct migrate_test_params *p = + to_migrate_test_params(xe_cur_kunit_priv(XE_TEST_LIVE_MIGRATE)); int i; u64 *ptr = dst; u64 value; @@ -76,6 +78,10 @@ sanity_populate_cb(struct xe_migrate_pt_update *pt_update, else ptr[i] = value; } + + kunit_info(xe_cur_kunit(), "Used %s.\n", map ? "CPU" : "GPU"); + if (p->force_gpu && map) + KUNIT_FAIL(xe_cur_kunit(), "GPU pagetable update used CPU.\n"); } static const struct xe_migrate_pt_update_ops sanity_ops = { @@ -177,11 +183,12 @@ out_unlock: } static void test_pt_update(struct xe_migrate *m, struct xe_bo *pt, - struct kunit *test) + struct kunit *test, bool force_gpu) { struct xe_device *xe = gt_to_xe(m->gt); struct dma_fence *fence; u64 retval, expected; + ktime_t then, now; int i; struct xe_vm_pgtable_update update = { @@ -192,16 +199,26 @@ static void test_pt_update(struct xe_migrate *m, struct xe_bo *pt, struct xe_migrate_pt_update pt_update = { .ops = &sanity_ops, }; + struct migrate_test_params p = { + .base.id = XE_TEST_LIVE_MIGRATE, + .force_gpu = force_gpu, + }; + test->priv = &p; /* Test xe_migrate_update_pgtables() updates the pagetable as expected */ expected = 0xf0f0f0f0f0f0f0f0ULL; xe_map_memset(xe, &pt->vmap, 0, (u8)expected, pt->size); + then = ktime_get(); fence = xe_migrate_update_pgtables(m, NULL, NULL, m->eng, &update, 1, NULL, 0, &pt_update); + now = ktime_get(); if (sanity_fence_failed(xe, fence, "Migration pagetable update", test)) return; + kunit_info(test, "Updating without syncing took %llu us,\n", + (unsigned long long)ktime_to_us(ktime_sub(now, then))); + dma_fence_put(fence); retval = xe_map_rd(xe, &pt->vmap, 0, u64); check(retval, expected, "PTE[0] must stay untouched", test); @@ -344,7 +361,10 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) test_copy(m, big, test); } - test_pt_update(m, pt, test); + kunit_info(test, "Testing page table update using CPU if GPU idle.\n"); + test_pt_update(m, pt, test, false); + kunit_info(test, "Testing page table update using GPU\n"); + test_pt_update(m, pt, test, true); out: xe_bb_free(bb, NULL); diff --git a/drivers/gpu/drm/xe/tests/xe_test.h b/drivers/gpu/drm/xe/tests/xe_test.h index 00c8a3f9af81..7a1ae213e750 100644 --- a/drivers/gpu/drm/xe/tests/xe_test.h +++ b/drivers/gpu/drm/xe/tests/xe_test.h @@ -18,6 +18,7 @@ */ enum xe_test_priv_id { XE_TEST_LIVE_DMA_BUF, + XE_TEST_LIVE_MIGRATE, }; /** diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 4a600c64b5d0..3ee3d707a8ca 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -13,6 +13,7 @@ #include #include "regs/xe_gpu_commands.h" +#include "tests/xe_test.h" #include "xe_bb.h" #include "xe_bo.h" #include "xe_engine.h" @@ -967,6 +968,16 @@ struct xe_vm *xe_migrate_get_vm(struct xe_migrate *m) return xe_vm_get(m->eng->vm); } +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) +struct migrate_test_params { + struct xe_test_priv base; + bool force_gpu; +}; + +#define to_migrate_test_params(_priv) \ + container_of(_priv, struct migrate_test_params, base) +#endif + static struct dma_fence * xe_migrate_update_pgtables_cpu(struct xe_migrate *m, struct xe_vm *vm, struct xe_bo *bo, @@ -974,11 +985,17 @@ xe_migrate_update_pgtables_cpu(struct xe_migrate *m, u32 num_updates, bool wait_vm, struct xe_migrate_pt_update *pt_update) { + XE_TEST_DECLARE(struct migrate_test_params *test = + to_migrate_test_params + (xe_cur_kunit_priv(XE_TEST_LIVE_MIGRATE));) const struct xe_migrate_pt_update_ops *ops = pt_update->ops; struct dma_fence *fence; int err; u32 i; + if (XE_TEST_ONLY(test && test->force_gpu)) + return ERR_PTR(-ETIME); + if (bo && !dma_resv_test_signaled(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL)) return ERR_PTR(-ETIME); -- cgit v1.2.3 From 8e41443e1bb7a9aa03263ab9e317ef04927be5aa Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Tue, 14 Mar 2023 15:56:44 +0100 Subject: drm/xe/vm: Defer vm rebind until next exec if nothing to execute MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If all compute engines of a vm in compute mode are idle, defer a rebind to the next exec to avoid the VM unnecessarily trying to make memory resident and compete with other VMs for available memory space. Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_exec.c | 1 + drivers/gpu/drm/xe/xe_vm.c | 18 ++++++++++++++++++ drivers/gpu/drm/xe/xe_vm.h | 17 +++++++++++++++++ drivers/gpu/drm/xe/xe_vm_types.h | 5 +++++ 4 files changed, 41 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 97fd1a311f2d..ea869f2452ef 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -364,6 +364,7 @@ retry: &job->drm.s_fence->finished); xe_sched_job_push(job); + xe_vm_reactivate_rebind(vm); err_repin: if (!xe_vm_no_dma_fences(vm)) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 6cc3204adaa8..207d20da5c68 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -226,6 +226,19 @@ static int wait_for_existing_preempt_fences(struct xe_vm *vm) return 0; } +static bool xe_vm_is_idle(struct xe_vm *vm) +{ + struct xe_engine *e; + + xe_vm_assert_held(vm); + list_for_each_entry(e, &vm->preempt.engines, compute.link) { + if (!xe_engine_is_idle(e)) + return false; + } + + return true; +} + static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list) { struct list_head *link; @@ -548,6 +561,11 @@ retry: if (err) goto out_unlock_outer; + if (xe_vm_is_idle(vm)) { + vm->preempt.rebind_deactivated = true; + goto out_unlock; + } + /* Fresh preempt fences already installed. Everyting is running. */ if (!preempt_fences_waiting(vm)) goto out_unlock; diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 3468ed9d0528..748dc16ebed9 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -98,6 +98,23 @@ extern struct ttm_device_funcs xe_ttm_funcs; struct ttm_buffer_object *xe_vm_ttm_bo(struct xe_vm *vm); +/** + * xe_vm_reactivate_rebind() - Reactivate the rebind functionality on compute + * vms. + * @vm: The vm. + * + * If the rebind functionality on a compute vm was disabled due + * to nothing to execute. Reactivate it and run the rebind worker. + * This function should be called after submitting a batch to a compute vm. + */ +static inline void xe_vm_reactivate_rebind(struct xe_vm *vm) +{ + if (xe_vm_in_compute_mode(vm) && vm->preempt.rebind_deactivated) { + vm->preempt.rebind_deactivated = false; + queue_work(system_unbound_wq, &vm->preempt.rebind_work); + } +} + static inline bool xe_vma_is_userptr(struct xe_vma *vma) { return !vma->bo; diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 2a3b911ab358..fada7896867f 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -293,6 +293,11 @@ struct xe_vm { struct list_head engines; /** @num_engines: number user engines attached to this VM */ int num_engines; + /** + * @rebind_deactivated: Whether rebind has been temporarily deactivated + * due to no work available. Protected by the vm resv. + */ + bool rebind_deactivated; /** * @rebind_work: worker to rebind invalidated userptrs / evicted * BOs -- cgit v1.2.3 From 2492f4544e6f81c3bb37abdcbc027bf7934b0310 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Tue, 14 Mar 2023 08:58:39 +0000 Subject: drm/xe/vram: start tracking the io_size First step towards supporting small-bar is to track the io_size for vram. We can longer assume that the io_size == vram size. This way we know how much is CPU accessible via the BAR, and how much is not. Effectively giving us a two tiered vram, where in some later patches we can support different allocation strategies depending on if the memory needs to be CPU accessible or not. Note as this stage we still clamp the vram size to the usable vram size. Only in the final patch do we turn this on for real, and allow distinct io_size and vram_size. v2: (Lucas): - Improve the commit message, plus improve the kernel-doc for the io_size to give a better sense of what it actually is. Signed-off-by: Matthew Auld Cc: Gwan-gyeong Mun Cc: Lucas De Marchi Reviewed-by: Maarten Lankhorst Reviewed-by: Gwan-gyeong Mun Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device_types.h | 14 ++++++++++-- drivers/gpu/drm/xe/xe_gt_types.h | 14 ++++++++++-- drivers/gpu/drm/xe/xe_mmio.c | 44 +++++++++++++++++++++++++----------- 3 files changed, 55 insertions(+), 17 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 00b1db28a4b4..8d99450f0bf4 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -115,9 +115,19 @@ struct xe_device { struct { /** @vram: VRAM info for device */ struct { - /** @io_start: start address of VRAM */ + /** @io_start: IO start address of VRAM */ resource_size_t io_start; - /** @size: size of VRAM */ + /** + * @io_size: IO size of VRAM. + * + * This represents how much of VRAM we can access via + * the CPU through the VRAM BAR. This can be smaller + * than @size, in which case only part of VRAM is CPU + * accessible (typically the first 256M). This + * configuration is known as small-bar. + */ + resource_size_t io_size; + /** @size: Total size of VRAM */ resource_size_t size; /** @mapping: pointer to VRAM mappable space */ void *__iomem mapping; diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 74b4e6776bf1..8f29aba455e0 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -143,9 +143,19 @@ struct xe_gt { * (virtual split), can be subset of global device VRAM */ struct { - /** @io_start: start address of VRAM */ + /** @io_start: IO start address of this VRAM instance */ resource_size_t io_start; - /** @size: size of VRAM */ + /** + * @io_size: IO size of this VRAM instance + * + * This represents how much of this VRAM we can access + * via the CPU through the VRAM BAR. This can be smaller + * than @size, in which case only part of VRAM is CPU + * accessible (typically the first 256M). This + * configuration is known as small-bar. + */ + resource_size_t io_size; + /** @size: size of VRAM. */ resource_size_t size; /** @mapping: pointer to VRAM mappable space */ void *__iomem mapping; diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index e5bd4609aaee..5cacaa05759a 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -182,7 +182,6 @@ int xe_mmio_probe_vram(struct xe_device *xe) u8 id; u64 vram_size; u64 original_size; - u64 current_size; u64 usable_size; int resize_result, err; @@ -190,11 +189,13 @@ int xe_mmio_probe_vram(struct xe_device *xe) xe->mem.vram.mapping = 0; xe->mem.vram.size = 0; xe->mem.vram.io_start = 0; + xe->mem.vram.io_size = 0; for_each_gt(gt, xe, id) { gt->mem.vram.mapping = 0; gt->mem.vram.size = 0; gt->mem.vram.io_start = 0; + gt->mem.vram.io_size = 0; } return 0; } @@ -212,10 +213,10 @@ int xe_mmio_probe_vram(struct xe_device *xe) return err; resize_result = xe_resize_vram_bar(xe, vram_size); - current_size = pci_resource_len(pdev, GEN12_LMEM_BAR); xe->mem.vram.io_start = pci_resource_start(pdev, GEN12_LMEM_BAR); - - xe->mem.vram.size = min(current_size, vram_size); + xe->mem.vram.io_size = min(usable_size, + pci_resource_len(pdev, GEN12_LMEM_BAR)); + xe->mem.vram.size = xe->mem.vram.io_size; if (!xe->mem.vram.size) return -EIO; @@ -223,15 +224,15 @@ int xe_mmio_probe_vram(struct xe_device *xe) if (resize_result > 0) drm_info(&xe->drm, "Successfully resize VRAM from %lluMiB to %lluMiB\n", (u64)original_size >> 20, - (u64)current_size >> 20); - else if (xe->mem.vram.size < vram_size && !xe_force_vram_bar_size) + (u64)xe->mem.vram.io_size >> 20); + else if (xe->mem.vram.io_size < usable_size && !xe_force_vram_bar_size) drm_info(&xe->drm, "Using a reduced BAR size of %lluMiB. Consider enabling 'Resizable BAR' support in your BIOS.\n", (u64)xe->mem.vram.size >> 20); if (xe->mem.vram.size < vram_size) drm_warn(&xe->drm, "Restricting VRAM size to PCI resource size (0x%llx->0x%llx)\n", vram_size, (u64)xe->mem.vram.size); - xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, xe->mem.vram.size); + xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, xe->mem.vram.io_size); xe->mem.vram.size = min_t(u64, xe->mem.vram.size, usable_size); drm_info(&xe->drm, "TOTAL VRAM: %pa, %pa\n", &xe->mem.vram.io_start, &xe->mem.vram.size); @@ -239,7 +240,7 @@ int xe_mmio_probe_vram(struct xe_device *xe) /* FIXME: Assuming equally partitioned VRAM, incorrect */ if (xe->info.tile_count > 1) { u8 adj_tile_count = xe->info.tile_count; - resource_size_t size, io_start; + resource_size_t size, io_start, io_size; for_each_gt(gt, xe, id) if (xe_gt_is_media_type(gt)) @@ -249,15 +250,31 @@ int xe_mmio_probe_vram(struct xe_device *xe) size = xe->mem.vram.size / adj_tile_count; io_start = xe->mem.vram.io_start; + io_size = xe->mem.vram.io_size; for_each_gt(gt, xe, id) { - if (id && !xe_gt_is_media_type(gt)) - io_start += size; + if (id && !xe_gt_is_media_type(gt)) { + io_size -= min(io_size, size); + io_start += io_size; + } gt->mem.vram.size = size; - gt->mem.vram.io_start = io_start; - gt->mem.vram.mapping = xe->mem.vram.mapping + - (io_start - xe->mem.vram.io_start); + + /* + * XXX: multi-tile small-bar might be wild. Hopefully + * full tile without any mappable vram is not something + * we care about. + */ + + gt->mem.vram.io_size = min(size, io_size); + if (io_size) { + gt->mem.vram.io_start = io_start; + gt->mem.vram.mapping = xe->mem.vram.mapping + + (io_start - xe->mem.vram.io_start); + } else { + drm_err(&xe->drm, "Tile without any CPU visible VRAM. Aborting.\n"); + return -ENODEV; + } drm_info(&xe->drm, "VRAM[%u, %u]: %pa, %pa\n", id, gt->info.vram_id, >->mem.vram.io_start, @@ -266,6 +283,7 @@ int xe_mmio_probe_vram(struct xe_device *xe) } else { gt->mem.vram.size = xe->mem.vram.size; gt->mem.vram.io_start = xe->mem.vram.io_start; + gt->mem.vram.io_size = xe->mem.vram.io_size; gt->mem.vram.mapping = xe->mem.vram.mapping; drm_info(&xe->drm, "VRAM: %pa\n", >->mem.vram.size); -- cgit v1.2.3 From 436dbd6bffbf895ea151cf21af410ec1978cc10d Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 17 Mar 2023 15:34:41 -0700 Subject: drm/xe/mcr: Separate version from engine type selection In order to improve readability and make it more future proof, split the engine type from the graphics/platform checks. Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230317223441.3891073-1-lucas.demarchi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_mcr.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index 909059112179..5412f77bc26f 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -290,22 +290,24 @@ void xe_gt_mcr_init(struct xe_gt *gt) drm_WARN_ON(&xe->drm, MEDIA_VER(xe) < 13); gt->steering[OADDRM].ranges = xelpmp_oaddrm_steering_table; - } else if (GRAPHICS_VERx100(xe) >= 1270) { - gt->steering[INSTANCE0].ranges = xelpg_instance0_steering_table; - gt->steering[L3BANK].ranges = xelpg_l3bank_steering_table; - gt->steering[DSS].ranges = xelpg_dss_steering_table; - } else if (xe->info.platform == XE_PVC) { - gt->steering[INSTANCE0].ranges = xehpc_instance0_steering_table; - gt->steering[DSS].ranges = xehpc_dss_steering_table; - } else if (xe->info.platform == XE_DG2) { - gt->steering[L3BANK].ranges = xehp_l3bank_steering_table; - gt->steering[MSLICE].ranges = xehp_mslice_steering_table; - gt->steering[LNCF].ranges = xehp_lncf_steering_table; - gt->steering[DSS].ranges = xehp_dss_steering_table; - gt->steering[IMPLICIT_STEERING].ranges = dg2_implicit_steering_table; } else { - gt->steering[L3BANK].ranges = xelp_l3bank_steering_table; - gt->steering[DSS].ranges = xelp_dss_steering_table; + if (GRAPHICS_VERx100(xe) >= 1270) { + gt->steering[INSTANCE0].ranges = xelpg_instance0_steering_table; + gt->steering[L3BANK].ranges = xelpg_l3bank_steering_table; + gt->steering[DSS].ranges = xelpg_dss_steering_table; + } else if (xe->info.platform == XE_PVC) { + gt->steering[INSTANCE0].ranges = xehpc_instance0_steering_table; + gt->steering[DSS].ranges = xehpc_dss_steering_table; + } else if (xe->info.platform == XE_DG2) { + gt->steering[L3BANK].ranges = xehp_l3bank_steering_table; + gt->steering[MSLICE].ranges = xehp_mslice_steering_table; + gt->steering[LNCF].ranges = xehp_lncf_steering_table; + gt->steering[DSS].ranges = xehp_dss_steering_table; + gt->steering[IMPLICIT_STEERING].ranges = dg2_implicit_steering_table; + } else { + gt->steering[L3BANK].ranges = xelp_l3bank_steering_table; + gt->steering[DSS].ranges = xelp_dss_steering_table; + } } /* Select non-terminated steering target for each type */ -- cgit v1.2.3 From 1a653b879d6e408813096434ece5fa46c0752343 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Tue, 14 Mar 2023 08:58:40 +0000 Subject: drm/xe/buddy: remove the virtualized start Hopefully not needed anymore. We can add a .compatible() hook once we need to differentiate between mappable and non-mappable vram. If the allocation is not contiguous then the start value is kind of meaningless, so rather just mark as invalid. In upstream, TTM wants to eventually remove the ttm_resource.start usage. References: 544432703b2f ("drm/ttm: Add new callbacks to ttm res mgr") Signed-off-by: Matthew Auld Cc: Lucas De Marchi Reviewed-by: Maarten Lankhorst Reviewed-by: Gwan-gyeong Mun Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 6 ++++++ drivers/gpu/drm/xe/xe_ttm_vram_mgr.c | 29 ++++++++++++++--------------- 2 files changed, 20 insertions(+), 15 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 5e309b26f75c..3ca28f84dff7 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -672,6 +672,12 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, void *new_addr = gt->mem.vram.mapping + (new_mem->start << PAGE_SHIFT); + if (XE_WARN_ON(new_mem->start == XE_BO_INVALID_OFFSET)) { + ret = -EINVAL; + xe_device_mem_access_put(xe); + goto out; + } + XE_BUG_ON(new_mem->start != bo->placements->fpfn); diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c index 159ca7105df1..bafcadaed6b0 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c @@ -54,7 +54,6 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man, struct xe_ttm_vram_mgr_resource *vres; u64 size, remaining_size, lpfn, fpfn; struct drm_buddy *mm = &mgr->mm; - struct drm_buddy_block *block; unsigned long pages_per_block; int r; @@ -186,24 +185,24 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man, list_splice_tail(trim_list, &vres->blocks); } - vres->base.start = 0; - list_for_each_entry(block, &vres->blocks, link) { - unsigned long start; + if (!(vres->base.placement & TTM_PL_FLAG_CONTIGUOUS) && + xe_is_vram_mgr_blocks_contiguous(mm, &vres->blocks)) + vres->base.placement |= TTM_PL_FLAG_CONTIGUOUS; - start = drm_buddy_block_offset(block) + - drm_buddy_block_size(mm, block); - start >>= PAGE_SHIFT; + /* + * For some kernel objects we still rely on the start when io mapping + * the object. + */ + if (vres->base.placement & TTM_PL_FLAG_CONTIGUOUS) { + struct drm_buddy_block *block = list_first_entry(&vres->blocks, + typeof(*block), + link); - if (start > PFN_UP(vres->base.size)) - start -= PFN_UP(vres->base.size); - else - start = 0; - vres->base.start = max(vres->base.start, start); + vres->base.start = drm_buddy_block_offset(block) >> PAGE_SHIFT; + } else { + vres->base.start = XE_BO_INVALID_OFFSET; } - if (xe_is_vram_mgr_blocks_contiguous(mm, &vres->blocks)) - vres->base.placement |= TTM_PL_FLAG_CONTIGUOUS; - *res = &vres->base; return 0; -- cgit v1.2.3 From eb230dc47dd6f543ae2ff9c85bbe86243502e171 Mon Sep 17 00:00:00 2001 From: Balasubramani Vivekanandan Date: Fri, 17 Mar 2023 22:23:35 +0530 Subject: drm/xe: Use max wopcm size when validating the preset GuC wopcm size When the GuC wopcm base and size registers are populated by BIOS/IFWI, validate the parameters against the maximum allowed wopcm size. Bpsec: 44982 Signed-off-by: Balasubramani Vivekanandan Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_wopcm.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_wopcm.c b/drivers/gpu/drm/xe/xe_wopcm.c index 7bb880355f6b..c8cc3f5e6154 100644 --- a/drivers/gpu/drm/xe/xe_wopcm.c +++ b/drivers/gpu/drm/xe/xe_wopcm.c @@ -54,6 +54,8 @@ for MTL, do a proper probe sooner or later */ #define GEN11_WOPCM_SIZE SZ_2M + +#define GEN12_MAX_WOPCM_SIZE SZ_8M /* 16KB WOPCM (RSVD WOPCM) is reserved from HuC firmware top. */ #define WOPCM_RESERVED_SIZE SZ_16K @@ -221,6 +223,13 @@ int xe_wopcm_init(struct xe_wopcm *wopcm) if (locked) { drm_dbg(&xe->drm, "GuC WOPCM is already locked [%uK, %uK)\n", guc_wopcm_base / SZ_1K, guc_wopcm_size / SZ_1K); + /* + * When the GuC wopcm base and size are preprogrammed by + * BIOS/IFWI, check against the max allowed wopcm size to + * validate if the programmed values align to the wopcm layout. + */ + wopcm->size = GEN12_MAX_WOPCM_SIZE; + goto check; } -- cgit v1.2.3 From 11a2407ed5f017edcea436220ebba7c8619924f2 Mon Sep 17 00:00:00 2001 From: Balasubramani Vivekanandan Date: Fri, 17 Mar 2023 21:05:30 +0530 Subject: drm/xe: Stop accepting value in xe_migrate_clear Although xe_migrate_clear() has a value argument, currently the driver is only passing 0 at all the places this function is invoked with the exception the kunit tests are using the parameter to validate this function with different values. xe_migrate_clear() is failing on platforms with link copy engines because xe_migrate_clear() via emit_clear() is using the blitter instruction XY_FAST_COLOR_BLT to clear the memory. But this instruction is not supported by link copy engine. So the solution is to use the alternate instruction MEM_SET when platform contains link copy engine. But MEM_SET instruction accepts only 8-bit value for setting whereas the value agrument of xe_migrate_clear() is 32-bit. So instead of spreading this limitation around all invocations of xe_migrate_clear() and causing more confusion, it was decided to not accept any value itself as driver does not really need this currently. All the kunit tests are adapted as per the new function prototype. This will be followed by a patch to add support for link copy engines. Signed-off-by: Balasubramani Vivekanandan Reviewed-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gpu_commands.h | 9 +++++ drivers/gpu/drm/xe/tests/xe_bo.c | 2 +- drivers/gpu/drm/xe/tests/xe_migrate.c | 18 ++++----- drivers/gpu/drm/xe/xe_bo.c | 2 +- drivers/gpu/drm/xe/xe_device_types.h | 2 + drivers/gpu/drm/xe/xe_migrate.c | 65 +++++++++++++++++++++++++------ drivers/gpu/drm/xe/xe_migrate.h | 3 +- drivers/gpu/drm/xe/xe_pci.c | 3 ++ 8 files changed, 80 insertions(+), 24 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h index 288576035ce3..e60372a82723 100644 --- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h +++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h @@ -6,6 +6,8 @@ #ifndef _XE_GPU_COMMANDS_H_ #define _XE_GPU_COMMANDS_H_ +#include "regs/xe_reg_defs.h" + #define INSTR_CLIENT_SHIFT 29 #define INSTR_MI_CLIENT 0x0 #define __INSTR(client) ((client) << INSTR_CLIENT_SHIFT) @@ -56,6 +58,13 @@ #define GEN9_XY_FAST_COPY_BLT_CMD (2 << 29 | 0x42 << 22) #define BLT_DEPTH_32 (3<<24) +#define PVC_MEM_SET_CMD (2 << 29 | 0x5b << 22) +#define PVC_MEM_SET_CMD_LEN_DW 7 +#define PVC_MS_MATRIX REG_BIT(17) +#define PVC_MS_DATA_FIELD GENMASK(31, 24) +/* Bspec lists field as [6:0], but index alone is from [6:1] */ +#define PVC_MS_MOCS_INDEX_MASK GENMASK(6, 1) + #define GFX_OP_PIPE_CONTROL(len) ((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2)) #define PIPE_CONTROL_TILE_CACHE_FLUSH (1<<28) #define PIPE_CONTROL_AMFS_FLUSH (1<<25) diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index f03fb907b59a..3c60cbdf516c 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -32,7 +32,7 @@ static int ccs_test_migrate(struct xe_gt *gt, struct xe_bo *bo, /* Optionally clear bo *and* CCS data in VRAM. */ if (clear) { - fence = xe_migrate_clear(gt->migrate, bo, bo->ttm.resource, 0); + fence = xe_migrate_clear(gt->migrate, bo, bo->ttm.resource); if (IS_ERR(fence)) { KUNIT_FAIL(test, "Failed to submit bo clear.\n"); return PTR_ERR(fence); diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index e50b6ceb56e6..17829f878757 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -99,7 +99,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, struct kunit *test) { struct xe_device *xe = gt_to_xe(m->gt); - u64 retval, expected = 0xc0c0c0c0c0c0c0c0ULL; + u64 retval, expected = 0; bool big = bo->size >= SZ_2M; struct dma_fence *fence; const char *str = big ? "Copying big bo" : "Copying small bo"; @@ -130,7 +130,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, } xe_map_memset(xe, &sysmem->vmap, 0, 0xd0, sysmem->size); - fence = xe_migrate_clear(m, sysmem, sysmem->ttm.resource, 0xc0c0c0c0); + fence = xe_migrate_clear(m, sysmem, sysmem->ttm.resource); if (!sanity_fence_failed(xe, fence, big ? "Clearing sysmem big bo" : "Clearing sysmem small bo", test)) { retval = xe_map_rd(xe, &sysmem->vmap, 0, u64); @@ -311,10 +311,10 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) bb->len = 0; bb->cs[bb->len++] = MI_BATCH_BUFFER_END; xe_map_wr(xe, &pt->vmap, 0, u32, 0xdeaddead); - expected = 0x12345678U; + expected = 0; emit_clear(m->gt, bb, xe_migrate_vm_addr(NUM_KERNEL_PDE - 1, 0), 4, 4, - expected, IS_DGFX(xe)); + IS_DGFX(xe)); run_sanity_job(m, xe, bb, 1, "Writing to our newly mapped pagetable", test); @@ -326,8 +326,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) /* Clear a small bo */ kunit_info(test, "Clearing small buffer object\n"); xe_map_memset(xe, &tiny->vmap, 0, 0x22, tiny->size); - expected = 0x224488ff; - fence = xe_migrate_clear(m, tiny, tiny->ttm.resource, expected); + expected = 0; + fence = xe_migrate_clear(m, tiny, tiny->ttm.resource); if (sanity_fence_failed(xe, fence, "Clearing small bo", test)) goto out; @@ -342,11 +342,11 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) test_copy(m, tiny, test); } - /* Clear a big bo with a fixed value */ + /* Clear a big bo */ kunit_info(test, "Clearing big buffer object\n"); xe_map_memset(xe, &big->vmap, 0, 0x11, big->size); - expected = 0x11223344U; - fence = xe_migrate_clear(m, big, big->ttm.resource, expected); + expected = 0; + fence = xe_migrate_clear(m, big, big->ttm.resource); if (sanity_fence_failed(xe, fence, "Clearing big bo", test)) goto out; diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 3ca28f84dff7..ba156a85460c 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -686,7 +686,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, } } else { if (move_lacks_source) - fence = xe_migrate_clear(gt->migrate, bo, new_mem, 0); + fence = xe_migrate_clear(gt->migrate, bo, new_mem); else fence = xe_migrate_copy(gt->migrate, bo, old_mem, new_mem); if (IS_ERR(fence)) { diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 8d99450f0bf4..377a8979bc06 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -89,6 +89,8 @@ struct xe_device { bool has_4tile; /** @has_range_tlb_invalidation: Has range based TLB invalidations */ bool has_range_tlb_invalidation; + /** @has_link_copy_engines: Whether the platform has link copy engines */ + bool has_link_copy_engine; } info; /** @irq: device interrupt state */ diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 3ee3d707a8ca..9102fa1d8759 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -747,14 +747,35 @@ err_sync: return fence; } -static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, - u32 size, u32 pitch, u32 value, bool is_vram) +static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, + u32 size, u32 pitch) { + u32 *cs = bb->cs + bb->len; + u32 mocs = xe_mocs_index_to_value(gt->mocs.uc_index); + u32 len = PVC_MEM_SET_CMD_LEN_DW; + + *cs++ = PVC_MEM_SET_CMD | PVC_MS_MATRIX | (len - 2); + *cs++ = pitch - 1; + *cs++ = (size / pitch) - 1; + *cs++ = pitch - 1; + *cs++ = lower_32_bits(src_ofs); + *cs++ = upper_32_bits(src_ofs); + *cs++ = FIELD_PREP(PVC_MS_MOCS_INDEX_MASK, mocs); + + XE_BUG_ON(cs - bb->cs != len + bb->len); + + bb->len += len; +} + +static void emit_clear_main_copy(struct xe_gt *gt, struct xe_bb *bb, + u64 src_ofs, u32 size, u32 pitch, bool is_vram) +{ + struct xe_device *xe = gt_to_xe(gt); u32 *cs = bb->cs + bb->len; u32 len = XY_FAST_COLOR_BLT_DW; u32 mocs = xe_mocs_index_to_value(gt->mocs.uc_index); - if (GRAPHICS_VERx100(gt->xe) < 1250) + if (GRAPHICS_VERx100(xe) < 1250) len = 11; *cs++ = XY_FAST_COLOR_BLT_CMD | XY_FAST_COLOR_BLT_DEPTH_32 | @@ -766,7 +787,7 @@ static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, *cs++ = lower_32_bits(src_ofs); *cs++ = upper_32_bits(src_ofs); *cs++ = (is_vram ? 0x0 : 0x1) << XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT; - *cs++ = value; + *cs++ = 0; *cs++ = 0; *cs++ = 0; *cs++ = 0; @@ -780,7 +801,30 @@ static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, } XE_BUG_ON(cs - bb->cs != len + bb->len); + bb->len += len; +} + +static u32 emit_clear_cmd_len(struct xe_device *xe) +{ + if (xe->info.has_link_copy_engine) + return PVC_MEM_SET_CMD_LEN_DW; + else + return XY_FAST_COLOR_BLT_DW; +} + +static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, + u32 size, u32 pitch, bool is_vram) +{ + struct xe_device *xe = gt_to_xe(gt); + + if (xe->info.has_link_copy_engine) { + emit_clear_link_copy(gt, bb, src_ofs, size, pitch); + + } else { + emit_clear_main_copy(gt, bb, src_ofs, size, pitch, + is_vram); + } return 0; } @@ -790,10 +834,9 @@ static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, * @m: The migration context. * @bo: The buffer object @dst is currently bound to. * @dst: The dst TTM resource to be cleared. - * @value: Clear value. * - * Clear the contents of @dst. On flat CCS devices, - * the CCS metadata is cleared to zero as well on VRAM destionations. + * Clear the contents of @dst to zero. On flat CCS devices, + * the CCS metadata is cleared to zero as well on VRAM destinations. * TODO: Eliminate the @bo argument. * * Return: Pointer to a dma_fence representing the last clear batch, or @@ -802,8 +845,7 @@ static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, */ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, struct xe_bo *bo, - struct ttm_resource *dst, - u32 value) + struct ttm_resource *dst) { bool clear_vram = mem_type_is_vram(dst->mem_type); struct xe_gt *gt = m->gt; @@ -837,7 +879,8 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, batch_size = 2 + pte_update_size(m, clear_vram, &src_it, &clear_L0, &clear_L0_ofs, &clear_L0_pt, - XY_FAST_COLOR_BLT_DW, 0, NUM_PT_PER_BLIT); + emit_clear_cmd_len(xe), 0, + NUM_PT_PER_BLIT); if (xe_device_has_flat_ccs(xe) && clear_vram) batch_size += EMIT_COPY_CCS_DW; @@ -868,7 +911,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, update_idx = bb->len; emit_clear(gt, bb, clear_L0_ofs, clear_L0, GEN8_PAGE_SIZE, - value, clear_vram); + clear_vram); if (xe_device_has_flat_ccs(xe) && clear_vram) { emit_copy_ccs(gt, bb, clear_L0_ofs, true, m->cleared_vram_ofs, false, clear_L0); diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h index a569851db6f7..1ff6e0a90de5 100644 --- a/drivers/gpu/drm/xe/xe_migrate.h +++ b/drivers/gpu/drm/xe/xe_migrate.h @@ -79,8 +79,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, struct dma_fence *xe_migrate_clear(struct xe_migrate *m, struct xe_bo *bo, - struct ttm_resource *dst, - u32 value); + struct ttm_resource *dst); struct xe_vm *xe_migrate_get_vm(struct xe_migrate *m); diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 6dcefb8cc7c3..0a3b61f08d37 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -71,6 +71,7 @@ struct xe_device_desc { bool has_4tile; bool has_range_tlb_invalidation; bool has_asid; + bool has_link_copy_engine; }; #define PLATFORM(x) \ @@ -226,6 +227,7 @@ static const __maybe_unused struct xe_device_desc pvc_desc = { .vm_max_level = 4, .supports_usm = true, .has_asid = true, + .has_link_copy_engine = true, }; #define MTL_MEDIA_ENGINES \ @@ -413,6 +415,7 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) xe->info.has_flat_ccs = desc->has_flat_ccs; xe->info.has_4tile = desc->has_4tile; xe->info.has_range_tlb_invalidation = desc->has_range_tlb_invalidation; + xe->info.has_link_copy_engine = desc->has_link_copy_engine; spd = subplatform_get(xe, desc); xe->info.subplatform = spd ? spd->subplatform : XE_SUBPLATFORM_NONE; -- cgit v1.2.3 From 793e6612deea5cf8117100b1d47754800b24dcfa Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Tue, 21 Mar 2023 11:44:06 +0000 Subject: drm/xe/buddy: add visible tracking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the allocation code with the i915 version. This simplifies the code a little, and importantly we get the accounting at the mgr level, which is useful for debug (and maybe userspace), plus per resource tracking so we can easily check if a resource is using one or pages in the mappable part of vram (useful for eviction), or if the resource is completely within the mappable portion (useful for checking if the resource can be safely CPU mapped). v2: Fix missing PAGE_SHIFT v3: (Gwan-gyeong Mun) - Fix incorrect usage of ilog2(mm.chunk_size). - Fix calculation when checking for impossible allocation sizes, also check much earlier. v4: (Gwan-gyeong Mun) - Fix calculation when extending the [fpfn, lpfn] range due to the roundup_pow_of_two(). v5: (Gwan-gyeong Mun) - Move the check for running out of mappable VRAM to before doing any of the roundup_pow_of_two(). v6: (Jani) - Stop abusing BUG_ON(). We can easily just use WARN_ON() here and return a proper error to the caller, which is much nicer if we ever trigger these. Signed-off-by: Matthew Auld Cc: Gwan-gyeong Mun Cc: Thomas Hellström Cc: Maarten Lankhorst Cc: Lucas De Marchi Cc: Jani Nikula Reviewed-by: Gwan-gyeong Mun Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 18 ++- drivers/gpu/drm/xe/xe_ttm_vram_mgr.c | 211 +++++++++++++++-------------- drivers/gpu/drm/xe/xe_ttm_vram_mgr.h | 3 +- drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h | 6 + 4 files changed, 129 insertions(+), 109 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index 9629b1a677f2..31887fec1073 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -135,7 +135,7 @@ void xe_ttm_stolen_mgr_init(struct xe_device *xe) { struct xe_ttm_stolen_mgr *mgr = drmm_kzalloc(&xe->drm, sizeof(*mgr), GFP_KERNEL); struct pci_dev *pdev = to_pci_dev(xe->drm.dev); - u64 stolen_size, pgsize; + u64 stolen_size, io_size, pgsize; int err; if (IS_DGFX(xe)) @@ -154,7 +154,17 @@ void xe_ttm_stolen_mgr_init(struct xe_device *xe) if (pgsize < PAGE_SIZE) pgsize = PAGE_SIZE; - err = __xe_ttm_vram_mgr_init(xe, &mgr->base, XE_PL_STOLEN, stolen_size, pgsize); + /* + * We don't try to attempt partial visible support for stolen vram, + * since stolen is always at the end of vram, and the BAR size is pretty + * much always 256M, with small-bar. + */ + io_size = 0; + if (mgr->io_base && !xe_ttm_stolen_cpu_access_needs_ggtt(xe)) + io_size = stolen_size; + + err = __xe_ttm_vram_mgr_init(xe, &mgr->base, XE_PL_STOLEN, stolen_size, + io_size, pgsize); if (err) { drm_dbg_kms(&xe->drm, "Stolen mgr init failed: %i\n", err); return; @@ -163,8 +173,8 @@ void xe_ttm_stolen_mgr_init(struct xe_device *xe) drm_dbg_kms(&xe->drm, "Initialized stolen memory support with %llu bytes\n", stolen_size); - if (mgr->io_base && !xe_ttm_stolen_cpu_access_needs_ggtt(xe)) - mgr->mapping = devm_ioremap_wc(&pdev->dev, mgr->io_base, stolen_size); + if (io_size) + mgr->mapping = devm_ioremap_wc(&pdev->dev, mgr->io_base, io_size); } u64 xe_ttm_stolen_io_offset(struct xe_bo *bo, u32 offset) diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c index bafcadaed6b0..78979c0b6024 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c @@ -49,45 +49,29 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man, const struct ttm_place *place, struct ttm_resource **res) { - u64 max_bytes, cur_size, min_block_size; struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man); struct xe_ttm_vram_mgr_resource *vres; - u64 size, remaining_size, lpfn, fpfn; struct drm_buddy *mm = &mgr->mm; - unsigned long pages_per_block; - int r; - - lpfn = (u64)place->lpfn << PAGE_SHIFT; - if (!lpfn || lpfn > man->size) - lpfn = man->size; + u64 size, remaining_size, min_page_size; + unsigned long lpfn; + int err; - fpfn = (u64)place->fpfn << PAGE_SHIFT; + lpfn = place->lpfn; + if (!lpfn || lpfn > man->size >> PAGE_SHIFT) + lpfn = man->size >> PAGE_SHIFT; - max_bytes = mgr->manager.size; - if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { - pages_per_block = ~0ul; - } else { -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - pages_per_block = HPAGE_PMD_NR; -#else - /* default to 2MB */ - pages_per_block = 2UL << (20UL - PAGE_SHIFT); -#endif - - pages_per_block = max_t(uint32_t, pages_per_block, - tbo->page_alignment); - } + if (tbo->base.size >> PAGE_SHIFT > (lpfn - place->fpfn)) + return -E2BIG; /* don't trigger eviction for the impossible */ vres = kzalloc(sizeof(*vres), GFP_KERNEL); if (!vres) return -ENOMEM; ttm_resource_init(tbo, place, &vres->base); - remaining_size = vres->base.size; /* bail out quickly if there's likely not enough VRAM for this BO */ - if (ttm_resource_manager_usage(man) > max_bytes) { - r = -ENOSPC; + if (ttm_resource_manager_usage(man) > man->size) { + err = -ENOSPC; goto error_fini; } @@ -96,95 +80,100 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man, if (place->flags & TTM_PL_FLAG_TOPDOWN) vres->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION; - if (fpfn || lpfn != man->size) - /* Allocate blocks in desired range */ + if (place->fpfn || lpfn != man->size >> PAGE_SHIFT) vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; - mutex_lock(&mgr->lock); - while (remaining_size) { - if (tbo->page_alignment) - min_block_size = tbo->page_alignment << PAGE_SHIFT; - else - min_block_size = mgr->default_page_size; - - XE_BUG_ON(min_block_size < mm->chunk_size); - - /* Limit maximum size to 2GiB due to SG table limitations */ - size = min(remaining_size, 2ULL << 30); - - if (size >= pages_per_block << PAGE_SHIFT) - min_block_size = pages_per_block << PAGE_SHIFT; - - cur_size = size; - - if (fpfn + size != (u64)place->lpfn << PAGE_SHIFT) { - /* - * Except for actual range allocation, modify the size and - * min_block_size conforming to continuous flag enablement - */ - if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { - size = roundup_pow_of_two(size); - min_block_size = size; - /* - * Modify the size value if size is not - * aligned with min_block_size - */ - } else if (!IS_ALIGNED(size, min_block_size)) { - size = round_up(size, min_block_size); - } - } + if (WARN_ON(!vres->base.size)) { + err = -EINVAL; + goto error_fini; + } + size = vres->base.size; - r = drm_buddy_alloc_blocks(mm, fpfn, - lpfn, - size, - min_block_size, - &vres->blocks, - vres->flags); - if (unlikely(r)) - goto error_free_blocks; + min_page_size = mgr->default_page_size; + if (tbo->page_alignment) + min_page_size = tbo->page_alignment << PAGE_SHIFT; + + if (WARN_ON(min_page_size < mm->chunk_size)) { + err = -EINVAL; + goto error_fini; + } - if (size > remaining_size) - remaining_size = 0; - else - remaining_size -= size; + if (WARN_ON(min_page_size > SZ_2G)) { /* FIXME: sg limit */ + err = -EINVAL; + goto error_fini; } - mutex_unlock(&mgr->lock); - if (cur_size != size) { - struct drm_buddy_block *block; - struct list_head *trim_list; - u64 original_size; - LIST_HEAD(temp); + if (WARN_ON((size > SZ_2G && + (vres->base.placement & TTM_PL_FLAG_CONTIGUOUS)))) { + err = -EINVAL; + goto error_fini; + } - trim_list = &vres->blocks; - original_size = vres->base.size; + if (WARN_ON(!IS_ALIGNED(size, min_page_size))) { + err = -EINVAL; + goto error_fini; + } + mutex_lock(&mgr->lock); + if (lpfn <= mgr->visible_size >> PAGE_SHIFT && size > mgr->visible_avail) { + mutex_unlock(&mgr->lock); + err = -ENOSPC; + goto error_fini; + } + + if (place->fpfn + (size >> PAGE_SHIFT) != place->lpfn && + place->flags & TTM_PL_FLAG_CONTIGUOUS) { + size = roundup_pow_of_two(size); + min_page_size = size; + + lpfn = max_t(unsigned long, place->fpfn + (size >> PAGE_SHIFT), lpfn); + } + + remaining_size = size; + do { /* - * If size value is rounded up to min_block_size, trim the last - * block to the required size + * Limit maximum size to 2GiB due to SG table limitations. + * FIXME: Should maybe be handled as part of sg construction. */ - if (!list_is_singular(&vres->blocks)) { - block = list_last_entry(&vres->blocks, typeof(*block), link); - list_move_tail(&block->link, &temp); - trim_list = &temp; - /* - * Compute the original_size value by subtracting the - * last block size with (aligned size - original size) - */ - original_size = drm_buddy_block_size(mm, block) - - (size - cur_size); - } + u64 alloc_size = min_t(u64, remaining_size, SZ_2G); + + err = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT, + (u64)lpfn << PAGE_SHIFT, + alloc_size, + min_page_size, + &vres->blocks, + vres->flags); + if (err) + goto error_free_blocks; - mutex_lock(&mgr->lock); - drm_buddy_block_trim(mm, - original_size, - trim_list); - mutex_unlock(&mgr->lock); + remaining_size -= alloc_size; + } while (remaining_size); - if (!list_empty(&temp)) - list_splice_tail(trim_list, &vres->blocks); + if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { + if (!drm_buddy_block_trim(mm, vres->base.size, &vres->blocks)) + size = vres->base.size; } + if (lpfn <= mgr->visible_size >> PAGE_SHIFT) { + vres->used_visible_size = size; + } else { + struct drm_buddy_block *block; + + list_for_each_entry(block, &vres->blocks, link) { + u64 start = drm_buddy_block_offset(block); + + if (start < mgr->visible_size) { + u64 end = start + drm_buddy_block_size(mm, block); + + vres->used_visible_size += + min(end, mgr->visible_size) - start; + } + } + } + + mgr->visible_avail -= vres->used_visible_size; + mutex_unlock(&mgr->lock); + if (!(vres->base.placement & TTM_PL_FLAG_CONTIGUOUS) && xe_is_vram_mgr_blocks_contiguous(mm, &vres->blocks)) vres->base.placement |= TTM_PL_FLAG_CONTIGUOUS; @@ -213,7 +202,7 @@ error_fini: ttm_resource_fini(man, &vres->base); kfree(vres); - return r; + return err; } static void xe_ttm_vram_mgr_del(struct ttm_resource_manager *man, @@ -226,6 +215,7 @@ static void xe_ttm_vram_mgr_del(struct ttm_resource_manager *man, mutex_lock(&mgr->lock); drm_buddy_free_list(mm, &vres->blocks); + mgr->visible_avail += vres->used_visible_size; mutex_unlock(&mgr->lock); ttm_resource_fini(man, res); @@ -240,6 +230,13 @@ static void xe_ttm_vram_mgr_debug(struct ttm_resource_manager *man, struct drm_buddy *mm = &mgr->mm; mutex_lock(&mgr->lock); + drm_printf(printer, "default_page_size: %lluKiB\n", + mgr->default_page_size >> 10); + drm_printf(printer, "visible_avail: %lluMiB\n", + (u64)mgr->visible_avail >> 20); + drm_printf(printer, "visible_size: %lluMiB\n", + (u64)mgr->visible_size >> 20); + drm_buddy_print(mm, printer); mutex_unlock(&mgr->lock); drm_printf(printer, "man size:%llu\n", man->size); @@ -262,6 +259,8 @@ static void ttm_vram_mgr_fini(struct drm_device *dev, void *arg) if (ttm_resource_manager_evict_all(&xe->ttm, man)) return; + WARN_ON_ONCE(mgr->visible_avail != mgr->visible_size); + drm_buddy_fini(&mgr->mm); ttm_resource_manager_cleanup(&mgr->manager); @@ -270,7 +269,8 @@ static void ttm_vram_mgr_fini(struct drm_device *dev, void *arg) } int __xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_ttm_vram_mgr *mgr, - u32 mem_type, u64 size, u64 default_page_size) + u32 mem_type, u64 size, u64 io_size, + u64 default_page_size) { struct ttm_resource_manager *man = &mgr->manager; int err; @@ -279,6 +279,8 @@ int __xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_ttm_vram_mgr *mgr, mgr->mem_type = mem_type; mutex_init(&mgr->lock); mgr->default_page_size = default_page_size; + mgr->visible_size = io_size; + mgr->visible_avail = io_size; ttm_resource_manager_init(man, &xe->ttm, size); err = drm_buddy_init(&mgr->mm, man->size, default_page_size); @@ -298,7 +300,8 @@ int xe_ttm_vram_mgr_init(struct xe_gt *gt, struct xe_ttm_vram_mgr *mgr) mgr->gt = gt; return __xe_ttm_vram_mgr_init(xe, mgr, XE_PL_VRAM0 + gt->info.vram_id, - gt->mem.vram.size, PAGE_SIZE); + gt->mem.vram.size, gt->mem.vram.io_size, + PAGE_SIZE); } int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe, diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h index 78f332d26224..35e5367a79fb 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h @@ -13,7 +13,8 @@ struct xe_device; struct xe_gt; int __xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_ttm_vram_mgr *mgr, - u32 mem_type, u64 size, u64 default_page_size); + u32 mem_type, u64 size, u64 io_size, + u64 default_page_size); int xe_ttm_vram_mgr_init(struct xe_gt *gt, struct xe_ttm_vram_mgr *mgr); int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe, struct ttm_resource *res, diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h index 39aa2ec1b968..3d9417ff7434 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h @@ -23,6 +23,10 @@ struct xe_ttm_vram_mgr { struct ttm_resource_manager manager; /** @mm: DRM buddy allocator which manages the VRAM */ struct drm_buddy mm; + /** @visible_size: Proped size of the CPU visible portion */ + u64 visible_size; + /** @visible_avail: CPU visible portion still unallocated */ + u64 visible_avail; /** @default_page_size: default page size */ u64 default_page_size; /** @lock: protects allocations of VRAM */ @@ -39,6 +43,8 @@ struct xe_ttm_vram_mgr_resource { struct ttm_resource base; /** @blocks: list of DRM buddy blocks */ struct list_head blocks; + /** @used_visible_size: How many CPU visible bytes this resource is using */ + u64 used_visible_size; /** @flags: flags associated with the resource */ unsigned long flags; }; -- cgit v1.2.3 From ce79c6c43af7280c1f26d700959d04a7e62092af Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Tue, 21 Mar 2023 11:44:07 +0000 Subject: drm/xe/buddy: add compatible and intersects hooks Copy this from i915. We need .compatible for vram -> vram transfers, so they don't just get nooped by ttm, if need to move something from mappable to non-mappble or vice versa. The .intersects is needed for eviction, to determine if a victim resource is worth eviction. e.g if we need mappable space there is no point in evicting a resource that has zero mappable pages. Signed-off-by: Matthew Auld Cc: Lucas De Marchi Reviewed-by: Maarten Lankhorst Reviewed-by: Gwan-gyeong Mun Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ttm_vram_mgr.c | 62 ++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c index 78979c0b6024..73836b9b7fed 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c @@ -242,9 +242,71 @@ static void xe_ttm_vram_mgr_debug(struct ttm_resource_manager *man, drm_printf(printer, "man size:%llu\n", man->size); } +static bool xe_ttm_vram_mgr_intersects(struct ttm_resource_manager *man, + struct ttm_resource *res, + const struct ttm_place *place, + size_t size) +{ + struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man); + struct xe_ttm_vram_mgr_resource *vres = + to_xe_ttm_vram_mgr_resource(res); + struct drm_buddy *mm = &mgr->mm; + struct drm_buddy_block *block; + + if (!place->fpfn && !place->lpfn) + return true; + + if (!place->fpfn && place->lpfn == mgr->visible_size >> PAGE_SHIFT) + return vres->used_visible_size > 0; + + list_for_each_entry(block, &vres->blocks, link) { + unsigned long fpfn = + drm_buddy_block_offset(block) >> PAGE_SHIFT; + unsigned long lpfn = fpfn + + (drm_buddy_block_size(mm, block) >> PAGE_SHIFT); + + if (place->fpfn < lpfn && place->lpfn > fpfn) + return true; + } + + return false; +} + +static bool xe_ttm_vram_mgr_compatible(struct ttm_resource_manager *man, + struct ttm_resource *res, + const struct ttm_place *place, + size_t size) +{ + struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man); + struct xe_ttm_vram_mgr_resource *vres = + to_xe_ttm_vram_mgr_resource(res); + struct drm_buddy *mm = &mgr->mm; + struct drm_buddy_block *block; + + if (!place->fpfn && !place->lpfn) + return true; + + if (!place->fpfn && place->lpfn == mgr->visible_size >> PAGE_SHIFT) + return vres->used_visible_size == size; + + list_for_each_entry(block, &vres->blocks, link) { + unsigned long fpfn = + drm_buddy_block_offset(block) >> PAGE_SHIFT; + unsigned long lpfn = fpfn + + (drm_buddy_block_size(mm, block) >> PAGE_SHIFT); + + if (fpfn < place->fpfn || lpfn > place->lpfn) + return false; + } + + return true; +} + static const struct ttm_resource_manager_func xe_ttm_vram_mgr_func = { .alloc = xe_ttm_vram_mgr_new, .free = xe_ttm_vram_mgr_del, + .intersects = xe_ttm_vram_mgr_intersects, + .compatible = xe_ttm_vram_mgr_compatible, .debug = xe_ttm_vram_mgr_debug }; -- cgit v1.2.3 From 044f0cfb19473cd1b60a69c802cac0651066fa21 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 20 Mar 2023 10:46:24 -0700 Subject: drm/xe: Drop zero length arrays Zero-length arrays as fake flexible arrays are deprecated and we are moving towards adopting C99 flexible-array members instead. Reviewed-by: Matthew Auld Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_engine_types.h | 2 +- drivers/gpu/drm/xe/xe_sched_job_types.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_engine_types.h b/drivers/gpu/drm/xe/xe_engine_types.h index 2f6f0f2a0a8b..a0cd80cb9d7b 100644 --- a/drivers/gpu/drm/xe/xe_engine_types.h +++ b/drivers/gpu/drm/xe/xe_engine_types.h @@ -164,7 +164,7 @@ struct xe_engine { /** @entity: DRM sched entity for this engine (1 to 1 relationship) */ struct drm_sched_entity *entity; /** @lrc: logical ring context for this engine */ - struct xe_lrc lrc[0]; + struct xe_lrc lrc[]; }; /** diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h index fd1d75996127..5534bfacaa16 100644 --- a/drivers/gpu/drm/xe/xe_sched_job_types.h +++ b/drivers/gpu/drm/xe/xe_sched_job_types.h @@ -40,7 +40,7 @@ struct xe_sched_job { /** @migrate_flush_flags: Additional flush flags for migration jobs */ u32 migrate_flush_flags; /** @batch_addr: batch buffer address of job */ - u64 batch_addr[0]; + u64 batch_addr[]; }; #endif -- cgit v1.2.3 From 38c04b47cec861cf4007b3e53cbf584e494e2762 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 28 Feb 2023 11:17:30 +0100 Subject: drm/xe: Use atomic instead of mutex for xe_device_mem_access_ongoing xe_guc_ct_fast_path() is called from an irq context, and cannot lock the mutex used by xe_device_mem_access_ongoing(). Fortunately it is easy to fix, and the atomic guarantees are good enough to ensure xe->mem_access.hold_rpm is set before last ref is dropped. As far as I can tell, the runtime ref in device access should be killable, but don't dare to do it yet. Signed-off-by: Maarten Lankhorst Reviewed-by: Matthew Brost Acked-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 18 ++++++++---------- drivers/gpu/drm/xe/xe_device.h | 14 ++++---------- drivers/gpu/drm/xe/xe_device_types.h | 4 +--- 3 files changed, 13 insertions(+), 23 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 49ce11fc1174..ffacf80c8942 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -206,8 +206,6 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, if (err) goto err_put; - drmm_mutex_init(&xe->drm, &xe->mem_access.lock); - return xe; err_put: @@ -354,25 +352,25 @@ u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size) void xe_device_mem_access_get(struct xe_device *xe) { bool resumed = xe_pm_runtime_resume_if_suspended(xe); + int ref = atomic_inc_return(&xe->mem_access.ref); - mutex_lock(&xe->mem_access.lock); - if (xe->mem_access.ref++ == 0) + if (ref == 1) xe->mem_access.hold_rpm = xe_pm_runtime_get_if_active(xe); - mutex_unlock(&xe->mem_access.lock); /* The usage counter increased if device was immediately resumed */ if (resumed) xe_pm_runtime_put(xe); - XE_WARN_ON(xe->mem_access.ref == S32_MAX); + XE_WARN_ON(ref == S32_MAX); } void xe_device_mem_access_put(struct xe_device *xe) { - mutex_lock(&xe->mem_access.lock); - if (--xe->mem_access.ref == 0 && xe->mem_access.hold_rpm) + bool hold = xe->mem_access.hold_rpm; + int ref = atomic_dec_return(&xe->mem_access.ref); + + if (!ref && hold) xe_pm_runtime_put(xe); - mutex_unlock(&xe->mem_access.lock); - XE_WARN_ON(xe->mem_access.ref < 0); + XE_WARN_ON(ref < 0); } diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index 25c5087f5aad..d277f8985f7b 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -90,20 +90,14 @@ static inline struct xe_force_wake * gt_to_fw(struct xe_gt *gt) void xe_device_mem_access_get(struct xe_device *xe); void xe_device_mem_access_put(struct xe_device *xe); -static inline void xe_device_assert_mem_access(struct xe_device *xe) +static inline bool xe_device_mem_access_ongoing(struct xe_device *xe) { - XE_WARN_ON(!xe->mem_access.ref); + return atomic_read(&xe->mem_access.ref); } -static inline bool xe_device_mem_access_ongoing(struct xe_device *xe) +static inline void xe_device_assert_mem_access(struct xe_device *xe) { - bool ret; - - mutex_lock(&xe->mem_access.lock); - ret = xe->mem_access.ref; - mutex_unlock(&xe->mem_access.lock); - - return ret; + XE_WARN_ON(!xe_device_mem_access_ongoing(xe)); } static inline bool xe_device_in_fault_mode(struct xe_device *xe) diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 377a8979bc06..3917b9152eb9 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -184,10 +184,8 @@ struct xe_device { * triggering additional actions when they occur. */ struct { - /** @lock: protect the ref count */ - struct mutex lock; /** @ref: ref count of memory accesses */ - s32 ref; + atomic_t ref; /** @hold_rpm: need to put rpm ref back at the end */ bool hold_rpm; } mem_access; -- cgit v1.2.3 From 4f1411e2dab7a398c31cebbeedebbe11b239c9d9 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 21 Mar 2023 18:16:47 -0700 Subject: drm/xe: Reinstate render / compute cache invalidation in ring ops Render / compute engines have additional caches (not just TLBs) that need to be invalidated each batch, reinstate these invalidations in ring ops. Reviewed-by: Matt Roper Suggested-by: Matt Roper Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gpu_commands.h | 5 +++++ drivers/gpu/drm/xe/xe_ring_ops.c | 30 ++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h index e60372a82723..9d6508d74d62 100644 --- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h +++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h @@ -66,20 +66,25 @@ #define PVC_MS_MOCS_INDEX_MASK GENMASK(6, 1) #define GFX_OP_PIPE_CONTROL(len) ((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2)) +#define PIPE_CONTROL_COMMAND_CACHE_INVALIDATE (1<<29) #define PIPE_CONTROL_TILE_CACHE_FLUSH (1<<28) #define PIPE_CONTROL_AMFS_FLUSH (1<<25) #define PIPE_CONTROL_GLOBAL_GTT_IVB (1<<24) +#define PIPE_CONTROL_STORE_DATA_INDEX (1<<21) #define PIPE_CONTROL_CS_STALL (1<<20) #define PIPE_CONTROL_GLOBAL_SNAPSHOT_RESET (1<<19) #define PIPE_CONTROL_PSD_SYNC (1<<17) #define PIPE_CONTROL_QW_WRITE (1<<14) #define PIPE_CONTROL_DEPTH_STALL (1<<13) #define PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH (1<<12) +#define PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE (1<<11) #define PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE (1<<10) #define PIPE_CONTROL_INDIRECT_STATE_DISABLE (1<<9) #define PIPE_CONTROL_FLUSH_ENABLE (1<<7) #define PIPE_CONTROL_DC_FLUSH_ENABLE (1<<5) #define PIPE_CONTROL_VF_CACHE_INVALIDATE (1<<4) +#define PIPE_CONTROL_CONST_CACHE_INVALIDATE (1<<3) +#define PIPE_CONTROL_STATE_CACHE_INVALIDATE (1<<2) #define PIPE_CONTROL_STALL_AT_SCOREBOARD (1<<1) #define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1<<0) diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index 7dd886536fbc..5480746d40e8 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -106,6 +106,30 @@ static int emit_flush_invalidate(u32 flag, u32 *dw, int i) return i; } +static int emit_pipe_invalidate(u32 mask_flags, u32 *dw, int i) +{ + u32 flags = PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_COMMAND_CACHE_INVALIDATE | + PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE | + PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | + PIPE_CONTROL_VF_CACHE_INVALIDATE | + PIPE_CONTROL_CONST_CACHE_INVALIDATE | + PIPE_CONTROL_STATE_CACHE_INVALIDATE | + PIPE_CONTROL_QW_WRITE | + PIPE_CONTROL_STORE_DATA_INDEX; + + flags &= ~mask_flags; + + dw[i++] = GFX_OP_PIPE_CONTROL(6); + dw[i++] = flags; + dw[i++] = LRC_PPHWSP_SCRATCH_ADDR; + dw[i++] = 0; + dw[i++] = 0; + dw[i++] = 0; + + return i; +} + #define MI_STORE_QWORD_IMM_GEN8_POSTED (MI_INSTR(0x20, 3) | (1 << 21)) static int emit_store_imm_ppgtt_posted(u64 addr, u64 value, @@ -212,8 +236,14 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, struct xe_gt *gt = job->engine->gt; struct xe_device *xe = gt_to_xe(gt); bool pvc = xe->info.platform == XE_PVC; + u32 mask_flags = 0; dw[i++] = preparser_disable(true); + if (pvc) + mask_flags = PIPE_CONTROL_3D_ARCH_FLAGS; + else if (job->engine->class == XE_ENGINE_CLASS_COMPUTE) + mask_flags = PIPE_CONTROL_3D_ENGINE_FLAGS; + i = emit_pipe_invalidate(mask_flags, dw, i); /* Wa_1809175790 */ if (!xe->info.has_flat_ccs) i = emit_aux_table_inv(gt, GEN12_CCS_AUX_INV.reg, dw, i); -- cgit v1.2.3 From 99c5952fe36107ee57fa0ad7115ffa76222a8810 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 22 Mar 2023 10:35:45 +0000 Subject: drm/xe/gt: some error handling fixes Make sure we pass along the correct errors. Signed-off-by: Matthew Auld Reviewed-by: Gwan-gyeong Mun Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index daa433d0f2f5..07464ba42746 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -234,7 +234,7 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_engine *e) job = xe_bb_create_wa_job(e, bb, batch_ofs); if (IS_ERR(job)) { xe_bb_free(bb, NULL); - return PTR_ERR(bb); + return PTR_ERR(job); } xe_sched_job_arm(job); @@ -285,7 +285,7 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_engine *e) job = xe_bb_create_wa_job(e, bb, batch_ofs); if (IS_ERR(job)) { xe_bb_free(bb, NULL); - return PTR_ERR(bb); + return PTR_ERR(job); } xe_sched_job_arm(job); @@ -545,8 +545,10 @@ static int all_fw_domain_init(struct xe_gt *gt) if (!xe_gt_is_media_type(gt)) { gt->migrate = xe_migrate_init(gt); - if (IS_ERR(gt->migrate)) + if (IS_ERR(gt->migrate)) { + err = PTR_ERR(gt->migrate); goto err_force_wake; + } } else { gt->migrate = xe_find_full_gt(gt)->migrate; } -- cgit v1.2.3 From 59ea53eecb7154a2ac8aa39f21f16a144be3eecc Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 23 Mar 2023 09:25:00 -0700 Subject: drm/xe: Use BO's GT to determine dma_offset when programming PTEs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rather than using the passed in GT, use the BO's GT determine dma_offset when programming PTEs as these two GT's could differ (i.e. mapping a BO from a remote GT). The BO's GT is correct GT to use as this where BO resides, while the passed in GT is where the mapping is created. v2: (Thomas) - Kernel doc, extra new line (CI) - Rebase to tip Reviewed-by: Thomas Hellström Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 13 +++++++++++++ drivers/gpu/drm/xe/xe_bo.h | 2 ++ drivers/gpu/drm/xe/xe_pt.c | 4 +++- 3 files changed, 18 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index ba156a85460c..42a5978ecc74 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -79,6 +79,19 @@ mem_type_to_gt(struct xe_device *xe, u32 mem_type) return xe_device_get_gt(xe, mem_type == XE_PL_STOLEN ? 0 : (mem_type - XE_PL_VRAM0)); } +/** + * xe_bo_to_gt() - Get a GT from a BO's memory location + * @bo: The buffer object + * + * Get a GT from a BO's memory location, should be called on BOs in VRAM only. + * + * Return: xe_gt object which is closest to the BO + */ +struct xe_gt *xe_bo_to_gt(struct xe_bo *bo) +{ + return mem_type_to_gt(xe_bo_device(bo), bo->ttm.resource->mem_type); +} + static void try_add_system(struct xe_bo *bo, struct ttm_place *places, u32 bo_flags, u32 *c) { diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 4350845542bf..e38894c1255d 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -105,6 +105,8 @@ struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_gt *gt, int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo, u32 bo_flags); +struct xe_gt *xe_bo_to_gt(struct xe_bo *bo); + static inline struct xe_bo *ttm_to_xe_bo(const struct ttm_buffer_object *bo) { return container_of(bo, struct xe_bo, ttm); diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 64da98152455..7aa12f86e55b 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -758,10 +758,12 @@ xe_pt_stage_bind(struct xe_gt *gt, struct xe_vma *vma, int ret; if (is_vram) { + struct xe_gt *bo_gt = xe_bo_to_gt(bo); + xe_walk.default_pte = GEN12_PPGTT_PTE_LM; if (vma && vma->use_atomic_access_pte_bit) xe_walk.default_pte |= GEN12_USM_PPGTT_PTE_AE; - xe_walk.dma_offset = gt->mem.vram.io_start - + xe_walk.dma_offset = bo_gt->mem.vram.io_start - gt_to_xe(gt)->mem.vram.io_start; xe_walk.cache = XE_CACHE_WB; } else { -- cgit v1.2.3 From b4eecedc75c1b75eee359c806fc964f70e0fc983 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 20 Mar 2023 13:58:36 -0700 Subject: drm/xe: Fix potential deadlock handling page faults Within a class the GuC will hault scheduling if the head of the queue can't be scheduled the queue will block. This can lead to deadlock if BCS0-7 all have faults and another engine on BCS0-7 is at head of the GuC scheduling queue as the migration engine used to fix tthe fault will be blocked. To work around this set the migration engine to the highest priority when servicing page faults. v2 (Maarten): Set priority to kernel once at creation Signed-off-by: Matthew Brost Reviewed-by: Brian Welty Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_migrate.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 9102fa1d8759..b9b2a64447d8 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -360,6 +360,8 @@ struct xe_migrate *xe_migrate_init(struct xe_gt *gt) xe_vm_close_and_put(vm); return ERR_CAST(m->eng); } + if (xe->info.supports_usm) + m->eng->priority = XE_ENGINE_PRIORITY_KERNEL; mutex_init(&m->job_mutex); -- cgit v1.2.3 From 85ea2bd2fd18ec43e2569da3e21c91fc6832b464 Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Thu, 23 Mar 2023 15:46:50 -0700 Subject: drm/xe/huc: Support for loading unversiond HuC Follow the new direction of firmware and add macro support for loading unversioned HuC. Keep HuC versioned loading support as well for platforms that fall under force_probe support Add check to ensure driver does not do any version check for HuC if going through unversioned load. v2: unversioned firmware to be the default for platforms not under force_probe. Maintain versioned firmware macro support for platforms under force-probe protection. v3: Minor style and naming adjustments (Lucas) Cc: Matt Roper Cc: Daniele Ceraolo Spurio Signed-off-by: Anusha Srivatsa Reviewed-by: Lucas De Marchi Signed-off-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230323224651.1187366-2-lucas.demarchi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_uc_fw.c | 53 +++++++++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 20 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 47b51ad5b015..f3e4e3774d68 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -50,18 +50,22 @@ static struct xe_device *uc_fw_to_xe(struct xe_uc_fw *uc_fw) fw_def(DG1, 0, guc_def(dg1, 70, 5, 2)) \ fw_def(TIGERLAKE, 0, guc_def(tgl, 70, 5, 2)) -#define XE_HUC_FIRMWARE_DEFS(fw_def, huc_def) \ - fw_def(DG1, 0, huc_def(dg1, 7, 9, 3)) \ - fw_def(TIGERLAKE, 0, huc_def(tgl, 7, 9, 3)) +#define XE_HUC_FIRMWARE_DEFS(fw_def, huc_def, huc_ver) \ + fw_def(DG1, 0, huc_def(dg1)) \ + fw_def(TIGERLAKE, 0, huc_def(tgl)) + +#define __MAKE_HUC_FW_PATH(prefix_, name_) \ + "i915/" \ + __stringify(prefix_) "_" name_ ".bin" #define __MAKE_UC_FW_PATH_MAJOR(prefix_, name_, major_) \ "i915/" \ __stringify(prefix_) "_" name_ "_" \ __stringify(major_) ".bin" -#define __MAKE_UC_FW_PATH(prefix_, name_, major_, minor_, patch_) \ +#define __MAKE_UC_FW_PATH_FULL_VER(prefix_, name_, major_, minor_, patch_) \ "i915/" \ - __stringify(prefix_) name_ \ + __stringify(prefix_) "_" name_ "_" \ __stringify(major_) "." \ __stringify(minor_) "." \ __stringify(patch_) ".bin" @@ -69,15 +73,19 @@ static struct xe_device *uc_fw_to_xe(struct xe_uc_fw *uc_fw) #define MAKE_GUC_FW_PATH(prefix_, major_, minor_, patch_) \ __MAKE_UC_FW_PATH_MAJOR(prefix_, "guc", major_) -#define MAKE_HUC_FW_PATH(prefix_, major_, minor_, bld_num_) \ - __MAKE_UC_FW_PATH(prefix_, "_huc_", major_, minor_, bld_num_) +#define MAKE_HUC_FW_PATH(prefix_) \ + __MAKE_HUC_FW_PATH(prefix_, "huc") + +#define MAKE_HUC_FW_PATH_FULL_VER(prefix_, major_, minor_, patch_) \ + __MAKE_UC_FW_PATH_FULL_VER(prefix_, "huc", major_, minor_, patch_) + /* All blobs need to be declared via MODULE_FIRMWARE() */ #define XE_UC_MODULE_FW(platform_, revid_, uc_) \ MODULE_FIRMWARE(uc_); XE_GUC_FIRMWARE_DEFS(XE_UC_MODULE_FW, MAKE_GUC_FW_PATH) -XE_HUC_FIRMWARE_DEFS(XE_UC_MODULE_FW, MAKE_HUC_FW_PATH) +XE_HUC_FIRMWARE_DEFS(XE_UC_MODULE_FW, MAKE_HUC_FW_PATH, MAKE_HUC_FW_PATH_FULL_VER) /* The below structs and macros are used to iterate across the list of blobs */ struct __packed uc_fw_blob { @@ -93,9 +101,12 @@ struct __packed uc_fw_blob { UC_FW_BLOB(major_, minor_, \ MAKE_GUC_FW_PATH(prefix_, major_, minor_, patch_)) -#define HUC_FW_BLOB(prefix_, major_, minor_, bld_num_) \ +#define HUC_FW_BLOB(prefix_) \ + UC_FW_BLOB(0, 0, MAKE_HUC_FW_PATH(prefix_)) + +#define HUC_FW_VERSION_BLOB(prefix_, major_, minor_, bld_num_) \ UC_FW_BLOB(major_, minor_, \ - MAKE_HUC_FW_PATH(prefix_, major_, minor_, bld_num_)) + MAKE_HUC_FW_PATH_FULL_VER(prefix_, major_, minor_, bld_num_)) struct __packed uc_fw_platform_requirement { enum xe_platform p; @@ -122,7 +133,7 @@ uc_fw_auto_select(struct xe_device *xe, struct xe_uc_fw *uc_fw) XE_GUC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB) }; static const struct uc_fw_platform_requirement blobs_huc[] = { - XE_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB) + XE_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB, HUC_FW_VERSION_BLOB) }; static const struct fw_blobs_by_type blobs_all[XE_UC_FW_NUM_TYPES] = { [XE_UC_FW_TYPE_GUC] = { blobs_guc, ARRAY_SIZE(blobs_guc) }, @@ -299,15 +310,17 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw) uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MINOR, css->sw_version); - if (uc_fw->major_ver_found != uc_fw->major_ver_wanted || - uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) { - drm_notice(&xe->drm, "%s firmware %s: unexpected version: %u.%u != %u.%u\n", - xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, - uc_fw->major_ver_found, uc_fw->minor_ver_found, - uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); - if (!xe_uc_fw_is_overridden(uc_fw)) { - err = -ENOEXEC; - goto fail; + if (uc_fw->major_ver_wanted) { + if (uc_fw->major_ver_found != uc_fw->major_ver_wanted || + uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) { + drm_notice(&xe->drm, "%s firmware %s: unexpected version: %u.%u != %u.%u\n", + xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, + uc_fw->major_ver_found, uc_fw->minor_ver_found, + uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); + if (!xe_uc_fw_is_overridden(uc_fw)) { + err = -ENOEXEC; + goto fail; + } } } -- cgit v1.2.3 From 9bddebf1f0f6e7a8a6418dfc14fdaa6233ba0524 Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Thu, 23 Mar 2023 15:46:51 -0700 Subject: drm/xe: Load HuC on Alderlake S Alderlake S uses TGL HuC. Signed-off-by: Anusha Srivatsa Reviewed-by: Lucas De Marchi Signed-off-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230323224651.1187366-3-lucas.demarchi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_uc_fw.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index f3e4e3774d68..5c3789f67049 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -51,6 +51,7 @@ static struct xe_device *uc_fw_to_xe(struct xe_uc_fw *uc_fw) fw_def(TIGERLAKE, 0, guc_def(tgl, 70, 5, 2)) #define XE_HUC_FIRMWARE_DEFS(fw_def, huc_def, huc_ver) \ + fw_def(ALDERLAKE_S, 0, huc_def(tgl)) \ fw_def(DG1, 0, huc_def(dg1)) \ fw_def(TIGERLAKE, 0, huc_def(tgl)) -- cgit v1.2.3 From ef5e3c2f703d05c9d296d8f8ad0a0f48f6c1fcc9 Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Thu, 23 Mar 2023 12:24:59 -0700 Subject: drm/xe: Add max engine priority to xe query MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Intel Vulkan driver needs to know what is the maximum priority to fill a device info struct for applications. Right now we getting this information by creating a engine and setting priorities from min to high to know what is the maximum priority for running process but this leads to info messages to be printed to dmesg: xe 0000:03:00.0: [drm] Ioctl argument check failed at drivers/gpu/drm/xe/xe_engine.c:178: value == DRM_SCHED_PRIORITY_HIGH && !capable(CAP_SYS_NICE) It does not cause any harm but when executing a test suite like crucible it causes thousands of those messages to be printed. So here adding one more property to drm_xe_query_config to fetch the max engine priority. Cc: Matthew Brost Reviewed-by: Matthew Brost Signed-off-by: José Roberto de Souza Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_engine.c | 10 ++++++++-- drivers/gpu/drm/xe/xe_engine.h | 1 + drivers/gpu/drm/xe/xe_query.c | 3 +++ include/uapi/drm/xe_drm.h | 3 ++- 4 files changed, 14 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c index 8011f5827cbe..141cb223ba02 100644 --- a/drivers/gpu/drm/xe/xe_engine.c +++ b/drivers/gpu/drm/xe/xe_engine.c @@ -169,14 +169,20 @@ struct xe_engine *xe_engine_lookup(struct xe_file *xef, u32 id) return e; } +enum xe_engine_priority +xe_engine_device_get_max_priority(struct xe_device *xe) +{ + return capable(CAP_SYS_NICE) ? XE_ENGINE_PRIORITY_HIGH : + XE_ENGINE_PRIORITY_NORMAL; +} + static int engine_set_priority(struct xe_device *xe, struct xe_engine *e, u64 value, bool create) { if (XE_IOCTL_ERR(xe, value > XE_ENGINE_PRIORITY_HIGH)) return -EINVAL; - if (XE_IOCTL_ERR(xe, value == XE_ENGINE_PRIORITY_HIGH && - !capable(CAP_SYS_NICE))) + if (XE_IOCTL_ERR(xe, value > xe_engine_device_get_max_priority(xe))) return -EPERM; return e->ops->set_priority(e, value); diff --git a/drivers/gpu/drm/xe/xe_engine.h b/drivers/gpu/drm/xe/xe_engine.h index 1cf7f23c4afd..b95d9b040877 100644 --- a/drivers/gpu/drm/xe/xe_engine.h +++ b/drivers/gpu/drm/xe/xe_engine.h @@ -54,5 +54,6 @@ int xe_engine_set_property_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int xe_engine_get_property_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +enum xe_engine_priority xe_engine_device_get_max_priority(struct xe_device *xe); #endif diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 0f70945176f6..dd64ff0d2a57 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -12,6 +12,7 @@ #include "xe_bo.h" #include "xe_device.h" +#include "xe_engine.h" #include "xe_ggtt.h" #include "xe_gt.h" #include "xe_guc_hwconfig.h" @@ -194,6 +195,8 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query) config->info[XE_QUERY_CONFIG_GT_COUNT] = xe->info.tile_count; config->info[XE_QUERY_CONFIG_MEM_REGION_COUNT] = hweight_long(xe->info.mem_region_mask); + config->info[XE_QUERY_CONFIG_MAX_ENGINE_PRIORITY] = + xe_engine_device_get_max_priority(xe); if (copy_to_user(query_ptr, config, size)) { kfree(config); diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 32a4265de402..b3bcb7106850 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -184,7 +184,8 @@ struct drm_xe_query_config { #define XE_QUERY_CONFIG_VA_BITS 3 #define XE_QUERY_CONFIG_GT_COUNT 4 #define XE_QUERY_CONFIG_MEM_REGION_COUNT 5 -#define XE_QUERY_CONFIG_NUM_PARAM XE_QUERY_CONFIG_MEM_REGION_COUNT + 1 +#define XE_QUERY_CONFIG_MAX_ENGINE_PRIORITY 6 +#define XE_QUERY_CONFIG_NUM_PARAM XE_QUERY_CONFIG_MAX_ENGINE_PRIORITY + 1 __u64 info[]; }; -- cgit v1.2.3 From cf667aec0abeda839937cbd92884799b19df1ab7 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 24 Mar 2023 09:33:36 -0700 Subject: drm/xe: Decrement fault mode counts in xe_vm_close_and_put Rather waiting for the VM to be destroyed (all refs to VM go to zero), drop the fault mode counts when the VM is closed in xe_vm_close_and_put. This avoids a window where user space can create a faulting VM, close it, and a subsequent creation of a non-faulting VM fails. v2 (Lucas): Drop VLK reference in commit message Reviewed-by: Maarten Lankhorst Suggested-by: Niranjana Vishwanathapura Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 207d20da5c68..49aa4ddedbf2 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1427,6 +1427,13 @@ void xe_vm_close_and_put(struct xe_vm *vm) XE_WARN_ON(!list_empty(&vm->extobj.list)); up_write(&vm->lock); + mutex_lock(&xe->usm.lock); + if (vm->flags & XE_VM_FLAG_FAULT_MODE) + xe->usm.num_vm_in_fault_mode--; + else if (!(vm->flags & XE_VM_FLAG_MIGRATION)) + xe->usm.num_vm_in_non_fault_mode--; + mutex_unlock(&xe->usm.lock); + xe_vm_put(vm); } @@ -1469,18 +1476,10 @@ static void vm_destroy_work_func(struct work_struct *w) } xe_vm_unlock(vm, &ww); - mutex_lock(&xe->usm.lock); - if (vm->flags & XE_VM_FLAG_FAULT_MODE) - xe->usm.num_vm_in_fault_mode--; - else if (!(vm->flags & XE_VM_FLAG_MIGRATION)) - xe->usm.num_vm_in_non_fault_mode--; - mutex_unlock(&xe->usm.lock); - trace_xe_vm_free(vm); dma_fence_put(vm->rebind_fence); dma_resv_fini(&vm->resv); kfree(vm); - } void xe_vm_free(struct kref *ref) -- cgit v1.2.3 From 576c6380da47592dc793669c6738742385f1bbf1 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 24 Mar 2023 14:04:10 -0700 Subject: drm/xe/pat: Move PAT setup to a dedicated file PAT handling is growing in complexity and will continue to do so in upcoming platforms. Separate it out to a dedicated file to keep things tidy. The code is moved as-is here (aside from a few unused #define's that are just dropped); further changes will come in future patches. Reviewed-by: Nirmoy Das Link: https://lore.kernel.org/r/20230324210415.2434992-2-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/regs/xe_gt_regs.h | 1 - drivers/gpu/drm/xe/xe_gt.c | 82 ++---------------------------------- drivers/gpu/drm/xe/xe_pat.c | 81 +++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_pat.h | 13 ++++++ 5 files changed, 98 insertions(+), 80 deletions(-) create mode 100644 drivers/gpu/drm/xe/xe_pat.c create mode 100644 drivers/gpu/drm/xe/xe_pat.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index b3426f328d21..6ef80889fddb 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -71,6 +71,7 @@ xe-y += xe_bb.o \ xe_mmio.o \ xe_mocs.o \ xe_module.o \ + xe_pat.o \ xe_pci.o \ xe_pcode.o \ xe_pm.o \ diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 73b0c0bdde5d..e33885f429b5 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -69,7 +69,6 @@ #define GEN12_VE1_AUX_INV _MMIO(0x42b8) #define AUX_INV REG_BIT(0) -#define GEN12_PAT_INDEX(index) _MMIO(0x4800 + (index) * 4) #define XEHP_TILE0_ADDR_RANGE MCR_REG(0x4900) #define XEHP_FLAT_CCS_BASE_ADDR MCR_REG(0x4910) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 07464ba42746..245117e67e9b 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -29,6 +29,7 @@ #include "xe_map.h" #include "xe_migrate.h" #include "xe_mmio.h" +#include "xe_pat.h" #include "xe_mocs.h" #include "xe_reg_sr.h" #include "xe_ring_ops.h" @@ -92,83 +93,6 @@ int xe_gt_alloc(struct xe_device *xe, struct xe_gt *gt) return 0; } -/* FIXME: These should be in a common file */ -#define CHV_PPAT_SNOOP REG_BIT(6) -#define GEN8_PPAT_AGE(x) ((x)<<4) -#define GEN8_PPAT_LLCeLLC (3<<2) -#define GEN8_PPAT_LLCELLC (2<<2) -#define GEN8_PPAT_LLC (1<<2) -#define GEN8_PPAT_WB (3<<0) -#define GEN8_PPAT_WT (2<<0) -#define GEN8_PPAT_WC (1<<0) -#define GEN8_PPAT_UC (0<<0) -#define GEN8_PPAT_ELLC_OVERRIDE (0<<2) -#define GEN8_PPAT(i, x) ((u64)(x) << ((i) * 8)) -#define GEN12_PPAT_CLOS(x) ((x)<<2) - -static void tgl_setup_private_ppat(struct xe_gt *gt) -{ - /* TGL doesn't support LLC or AGE settings */ - xe_mmio_write32(gt, GEN12_PAT_INDEX(0).reg, GEN8_PPAT_WB); - xe_mmio_write32(gt, GEN12_PAT_INDEX(1).reg, GEN8_PPAT_WC); - xe_mmio_write32(gt, GEN12_PAT_INDEX(2).reg, GEN8_PPAT_WT); - xe_mmio_write32(gt, GEN12_PAT_INDEX(3).reg, GEN8_PPAT_UC); - xe_mmio_write32(gt, GEN12_PAT_INDEX(4).reg, GEN8_PPAT_WB); - xe_mmio_write32(gt, GEN12_PAT_INDEX(5).reg, GEN8_PPAT_WB); - xe_mmio_write32(gt, GEN12_PAT_INDEX(6).reg, GEN8_PPAT_WB); - xe_mmio_write32(gt, GEN12_PAT_INDEX(7).reg, GEN8_PPAT_WB); -} - -static void pvc_setup_private_ppat(struct xe_gt *gt) -{ - xe_mmio_write32(gt, GEN12_PAT_INDEX(0).reg, GEN8_PPAT_UC); - xe_mmio_write32(gt, GEN12_PAT_INDEX(1).reg, GEN8_PPAT_WC); - xe_mmio_write32(gt, GEN12_PAT_INDEX(2).reg, GEN8_PPAT_WT); - xe_mmio_write32(gt, GEN12_PAT_INDEX(3).reg, GEN8_PPAT_WB); - xe_mmio_write32(gt, GEN12_PAT_INDEX(4).reg, - GEN12_PPAT_CLOS(1) | GEN8_PPAT_WT); - xe_mmio_write32(gt, GEN12_PAT_INDEX(5).reg, - GEN12_PPAT_CLOS(1) | GEN8_PPAT_WB); - xe_mmio_write32(gt, GEN12_PAT_INDEX(6).reg, - GEN12_PPAT_CLOS(2) | GEN8_PPAT_WT); - xe_mmio_write32(gt, GEN12_PAT_INDEX(7).reg, - GEN12_PPAT_CLOS(2) | GEN8_PPAT_WB); -} - -#define MTL_PPAT_L4_CACHE_POLICY_MASK REG_GENMASK(3, 2) -#define MTL_PAT_INDEX_COH_MODE_MASK REG_GENMASK(1, 0) -#define MTL_PPAT_3_UC REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 3) -#define MTL_PPAT_1_WT REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 1) -#define MTL_PPAT_0_WB REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 0) -#define MTL_3_COH_2W REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 3) -#define MTL_2_COH_1W REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 2) -#define MTL_0_COH_NON REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 0) - -static void mtl_setup_private_ppat(struct xe_gt *gt) -{ - xe_mmio_write32(gt, GEN12_PAT_INDEX(0).reg, MTL_PPAT_0_WB); - xe_mmio_write32(gt, GEN12_PAT_INDEX(1).reg, - MTL_PPAT_1_WT | MTL_2_COH_1W); - xe_mmio_write32(gt, GEN12_PAT_INDEX(2).reg, - MTL_PPAT_3_UC | MTL_2_COH_1W); - xe_mmio_write32(gt, GEN12_PAT_INDEX(3).reg, - MTL_PPAT_0_WB | MTL_2_COH_1W); - xe_mmio_write32(gt, GEN12_PAT_INDEX(4).reg, - MTL_PPAT_0_WB | MTL_3_COH_2W); -} - -static void setup_private_ppat(struct xe_gt *gt) -{ - struct xe_device *xe = gt_to_xe(gt); - - if (xe->info.platform == XE_METEORLAKE) - mtl_setup_private_ppat(gt); - else if (xe->info.platform == XE_PVC) - pvc_setup_private_ppat(gt); - else - tgl_setup_private_ppat(gt); -} - static int gt_ttm_mgr_init(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); @@ -447,7 +371,7 @@ static int gt_fw_domain_init(struct xe_gt *gt) if (err) goto err_hw_fence_irq; - setup_private_ppat(gt); + xe_pat_init(gt); if (!xe_gt_is_media_type(gt)) { err = xe_ggtt_init(gt, gt->mem.ggtt); @@ -633,7 +557,7 @@ static int do_gt_restart(struct xe_gt *gt) enum xe_hw_engine_id id; int err; - setup_private_ppat(gt); + xe_pat_init(gt); xe_gt_mcr_set_implicit_defaults(gt); xe_reg_sr_apply_mmio(>->reg_sr, gt); diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c new file mode 100644 index 000000000000..1cbb3d6ea949 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include "xe_pat.h" + +#include "regs/xe_reg_defs.h" +#include "xe_gt.h" +#include "xe_mmio.h" + +#define GEN12_PAT_INDEX(index) _MMIO(0x4800 + (index) * 4) + +#define GEN8_PPAT_WB (3<<0) +#define GEN8_PPAT_WT (2<<0) +#define GEN8_PPAT_WC (1<<0) +#define GEN8_PPAT_UC (0<<0) +#define GEN12_PPAT_CLOS(x) ((x)<<2) + +static void tgl_setup_private_ppat(struct xe_gt *gt) +{ + /* TGL doesn't support LLC or AGE settings */ + xe_mmio_write32(gt, GEN12_PAT_INDEX(0).reg, GEN8_PPAT_WB); + xe_mmio_write32(gt, GEN12_PAT_INDEX(1).reg, GEN8_PPAT_WC); + xe_mmio_write32(gt, GEN12_PAT_INDEX(2).reg, GEN8_PPAT_WT); + xe_mmio_write32(gt, GEN12_PAT_INDEX(3).reg, GEN8_PPAT_UC); + xe_mmio_write32(gt, GEN12_PAT_INDEX(4).reg, GEN8_PPAT_WB); + xe_mmio_write32(gt, GEN12_PAT_INDEX(5).reg, GEN8_PPAT_WB); + xe_mmio_write32(gt, GEN12_PAT_INDEX(6).reg, GEN8_PPAT_WB); + xe_mmio_write32(gt, GEN12_PAT_INDEX(7).reg, GEN8_PPAT_WB); +} + +static void pvc_setup_private_ppat(struct xe_gt *gt) +{ + xe_mmio_write32(gt, GEN12_PAT_INDEX(0).reg, GEN8_PPAT_UC); + xe_mmio_write32(gt, GEN12_PAT_INDEX(1).reg, GEN8_PPAT_WC); + xe_mmio_write32(gt, GEN12_PAT_INDEX(2).reg, GEN8_PPAT_WT); + xe_mmio_write32(gt, GEN12_PAT_INDEX(3).reg, GEN8_PPAT_WB); + xe_mmio_write32(gt, GEN12_PAT_INDEX(4).reg, + GEN12_PPAT_CLOS(1) | GEN8_PPAT_WT); + xe_mmio_write32(gt, GEN12_PAT_INDEX(5).reg, + GEN12_PPAT_CLOS(1) | GEN8_PPAT_WB); + xe_mmio_write32(gt, GEN12_PAT_INDEX(6).reg, + GEN12_PPAT_CLOS(2) | GEN8_PPAT_WT); + xe_mmio_write32(gt, GEN12_PAT_INDEX(7).reg, + GEN12_PPAT_CLOS(2) | GEN8_PPAT_WB); +} + +#define MTL_PPAT_L4_CACHE_POLICY_MASK REG_GENMASK(3, 2) +#define MTL_PAT_INDEX_COH_MODE_MASK REG_GENMASK(1, 0) +#define MTL_PPAT_3_UC REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 3) +#define MTL_PPAT_1_WT REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 1) +#define MTL_PPAT_0_WB REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 0) +#define MTL_3_COH_2W REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 3) +#define MTL_2_COH_1W REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 2) +#define MTL_0_COH_NON REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 0) + +static void mtl_setup_private_ppat(struct xe_gt *gt) +{ + xe_mmio_write32(gt, GEN12_PAT_INDEX(0).reg, MTL_PPAT_0_WB); + xe_mmio_write32(gt, GEN12_PAT_INDEX(1).reg, + MTL_PPAT_1_WT | MTL_2_COH_1W); + xe_mmio_write32(gt, GEN12_PAT_INDEX(2).reg, + MTL_PPAT_3_UC | MTL_2_COH_1W); + xe_mmio_write32(gt, GEN12_PAT_INDEX(3).reg, + MTL_PPAT_0_WB | MTL_2_COH_1W); + xe_mmio_write32(gt, GEN12_PAT_INDEX(4).reg, + MTL_PPAT_0_WB | MTL_3_COH_2W); +} + +void xe_pat_init(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + if (xe->info.platform == XE_METEORLAKE) + mtl_setup_private_ppat(gt); + else if (xe->info.platform == XE_PVC) + pvc_setup_private_ppat(gt); + else + tgl_setup_private_ppat(gt); +} diff --git a/drivers/gpu/drm/xe/xe_pat.h b/drivers/gpu/drm/xe/xe_pat.h new file mode 100644 index 000000000000..659de4008131 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pat.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_PAT_H_ +#define _XE_PAT_H_ + +struct xe_gt; + +void xe_pat_init(struct xe_gt *gt); + +#endif -- cgit v1.2.3 From 4f843703133970c852cf4661e584bdea55fd1a7a Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 24 Mar 2023 14:04:11 -0700 Subject: drm/xe/pat: Use table-based programming of PAT settings Provide per-platform tables of PAT values rather than per-platform functions. This will simplify the handling of unicast vs MCR registers in the upcoming patches. Reviewed-by: Nirmoy Das Link: https://lore.kernel.org/r/20230324210415.2434992-3-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pat.c | 77 +++++++++++++++++++++------------------------ 1 file changed, 35 insertions(+), 42 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index 1cbb3d6ea949..d1a3e170fb33 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -17,34 +17,27 @@ #define GEN8_PPAT_UC (0<<0) #define GEN12_PPAT_CLOS(x) ((x)<<2) -static void tgl_setup_private_ppat(struct xe_gt *gt) -{ - /* TGL doesn't support LLC or AGE settings */ - xe_mmio_write32(gt, GEN12_PAT_INDEX(0).reg, GEN8_PPAT_WB); - xe_mmio_write32(gt, GEN12_PAT_INDEX(1).reg, GEN8_PPAT_WC); - xe_mmio_write32(gt, GEN12_PAT_INDEX(2).reg, GEN8_PPAT_WT); - xe_mmio_write32(gt, GEN12_PAT_INDEX(3).reg, GEN8_PPAT_UC); - xe_mmio_write32(gt, GEN12_PAT_INDEX(4).reg, GEN8_PPAT_WB); - xe_mmio_write32(gt, GEN12_PAT_INDEX(5).reg, GEN8_PPAT_WB); - xe_mmio_write32(gt, GEN12_PAT_INDEX(6).reg, GEN8_PPAT_WB); - xe_mmio_write32(gt, GEN12_PAT_INDEX(7).reg, GEN8_PPAT_WB); -} +const u32 tgl_pat_table[] = { + [0] = GEN8_PPAT_WB, + [1] = GEN8_PPAT_WC, + [2] = GEN8_PPAT_WT, + [3] = GEN8_PPAT_UC, + [4] = GEN8_PPAT_WB, + [5] = GEN8_PPAT_WB, + [6] = GEN8_PPAT_WB, + [7] = GEN8_PPAT_WB, +}; -static void pvc_setup_private_ppat(struct xe_gt *gt) -{ - xe_mmio_write32(gt, GEN12_PAT_INDEX(0).reg, GEN8_PPAT_UC); - xe_mmio_write32(gt, GEN12_PAT_INDEX(1).reg, GEN8_PPAT_WC); - xe_mmio_write32(gt, GEN12_PAT_INDEX(2).reg, GEN8_PPAT_WT); - xe_mmio_write32(gt, GEN12_PAT_INDEX(3).reg, GEN8_PPAT_WB); - xe_mmio_write32(gt, GEN12_PAT_INDEX(4).reg, - GEN12_PPAT_CLOS(1) | GEN8_PPAT_WT); - xe_mmio_write32(gt, GEN12_PAT_INDEX(5).reg, - GEN12_PPAT_CLOS(1) | GEN8_PPAT_WB); - xe_mmio_write32(gt, GEN12_PAT_INDEX(6).reg, - GEN12_PPAT_CLOS(2) | GEN8_PPAT_WT); - xe_mmio_write32(gt, GEN12_PAT_INDEX(7).reg, - GEN12_PPAT_CLOS(2) | GEN8_PPAT_WB); -} +const u32 pvc_pat_table[] = { + [0] = GEN8_PPAT_UC, + [1] = GEN8_PPAT_WC, + [2] = GEN8_PPAT_WT, + [3] = GEN8_PPAT_WB, + [4] = GEN12_PPAT_CLOS(1) | GEN8_PPAT_WT, + [5] = GEN12_PPAT_CLOS(1) | GEN8_PPAT_WB, + [6] = GEN12_PPAT_CLOS(2) | GEN8_PPAT_WT, + [7] = GEN12_PPAT_CLOS(2) | GEN8_PPAT_WB, +}; #define MTL_PPAT_L4_CACHE_POLICY_MASK REG_GENMASK(3, 2) #define MTL_PAT_INDEX_COH_MODE_MASK REG_GENMASK(1, 0) @@ -55,27 +48,27 @@ static void pvc_setup_private_ppat(struct xe_gt *gt) #define MTL_2_COH_1W REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 2) #define MTL_0_COH_NON REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 0) -static void mtl_setup_private_ppat(struct xe_gt *gt) -{ - xe_mmio_write32(gt, GEN12_PAT_INDEX(0).reg, MTL_PPAT_0_WB); - xe_mmio_write32(gt, GEN12_PAT_INDEX(1).reg, - MTL_PPAT_1_WT | MTL_2_COH_1W); - xe_mmio_write32(gt, GEN12_PAT_INDEX(2).reg, - MTL_PPAT_3_UC | MTL_2_COH_1W); - xe_mmio_write32(gt, GEN12_PAT_INDEX(3).reg, - MTL_PPAT_0_WB | MTL_2_COH_1W); - xe_mmio_write32(gt, GEN12_PAT_INDEX(4).reg, - MTL_PPAT_0_WB | MTL_3_COH_2W); -} +const u32 mtl_pat_table[] = { + [0] = MTL_PPAT_0_WB, + [1] = MTL_PPAT_1_WT | MTL_2_COH_1W, + [2] = MTL_PPAT_3_UC | MTL_2_COH_1W, + [3] = MTL_PPAT_0_WB | MTL_2_COH_1W, + [4] = MTL_PPAT_0_WB | MTL_3_COH_2W, +}; + +#define PROGRAM_PAT_UNICAST(gt, table) do { \ + for (int i = 0; i < ARRAY_SIZE(table); i++) \ + xe_mmio_write32(gt, GEN12_PAT_INDEX(i).reg, table[i]); \ +} while (0) void xe_pat_init(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); if (xe->info.platform == XE_METEORLAKE) - mtl_setup_private_ppat(gt); + PROGRAM_PAT_UNICAST(gt, mtl_pat_table); else if (xe->info.platform == XE_PVC) - pvc_setup_private_ppat(gt); + PROGRAM_PAT_UNICAST(gt, pvc_pat_table); else - tgl_setup_private_ppat(gt); + PROGRAM_PAT_UNICAST(gt, tgl_pat_table); } -- cgit v1.2.3 From 152d7f2db978780f6c7e95711c00dc1e0888535b Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 24 Mar 2023 14:04:12 -0700 Subject: drm/xe/pat: Handle unicast vs MCR PAT registers The PAT_INDEX registers are MCR registers on some platforms and unicast on others. On MTL the handling even varies between GTs: the primary GT uses MCR registers while the media GT uses unicast registers. Let's add proper MCR programming on the relevant platforms/GTs. Given that we PAT tables to change pretty regularly on future platforms, we'll make PAT programming an exception to the usual model of assuming new platforms should inherit the previous platform's behavior. Instead we'll raise a warning if the current platform isn't handled in the if/else ladder. This should help prevent subtle cache misbehavior if we forget to add the table for a new platform. Bspec: 66534, 67609, 67788 Reviewed-by: Nirmoy Das Link: https://lore.kernel.org/r/20230324210415.2434992-4-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pat.c | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index d1a3e170fb33..6e3a74aa46e7 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -7,9 +7,10 @@ #include "regs/xe_reg_defs.h" #include "xe_gt.h" +#include "xe_gt_mcr.h" #include "xe_mmio.h" -#define GEN12_PAT_INDEX(index) _MMIO(0x4800 + (index) * 4) +#define _PAT_INDEX(index) (0x4800 + (index) * 4) #define GEN8_PPAT_WB (3<<0) #define GEN8_PPAT_WT (2<<0) @@ -58,17 +59,39 @@ const u32 mtl_pat_table[] = { #define PROGRAM_PAT_UNICAST(gt, table) do { \ for (int i = 0; i < ARRAY_SIZE(table); i++) \ - xe_mmio_write32(gt, GEN12_PAT_INDEX(i).reg, table[i]); \ + xe_mmio_write32(gt, _PAT_INDEX(i), table[i]); \ +} while (0) + +#define PROGRAM_PAT_MCR(gt, table) do { \ + for (int i = 0; i < ARRAY_SIZE(table); i++) \ + xe_gt_mcr_multicast_write(gt, MCR_REG(_PAT_INDEX(i)), table[i]); \ } while (0) void xe_pat_init(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); - if (xe->info.platform == XE_METEORLAKE) - PROGRAM_PAT_UNICAST(gt, mtl_pat_table); - else if (xe->info.platform == XE_PVC) - PROGRAM_PAT_UNICAST(gt, pvc_pat_table); - else + if (xe->info.platform == XE_METEORLAKE) { + if (xe_gt_is_media_type(gt)) + PROGRAM_PAT_UNICAST(gt, mtl_pat_table); + else + PROGRAM_PAT_MCR(gt, mtl_pat_table); + } else if (xe->info.platform == XE_PVC) { + PROGRAM_PAT_MCR(gt, pvc_pat_table); + } else if (xe->info.platform == XE_DG2) { + PROGRAM_PAT_MCR(gt, pvc_pat_table); + } else if (GRAPHICS_VERx100(xe) <= 1210) { PROGRAM_PAT_UNICAST(gt, tgl_pat_table); + } else { + /* + * Going forward we expect to need new PAT settings for most + * new platforms; failure to provide a new table can easily + * lead to subtle, hard-to-debug problems. If none of the + * conditions above match the platform we're running on we'll + * raise an error rather than trying to silently inherit the + * most recent platform's behavior. + */ + drm_err(&xe->drm, "Missing PAT table for platform with graphics version %d.%2d!\n", + GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100); + } } -- cgit v1.2.3 From 366974e4a69c09a441eca7802028e60b39903386 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 24 Mar 2023 14:04:13 -0700 Subject: drm/xe/pat: Clean up PAT register definitions Replace the deprecated "GEN" terminology in the PAT definitions. Acked-by: Nirmoy Das Link: https://lore.kernel.org/r/20230324210415.2434992-5-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pat.c | 73 +++++++++++++++++++++++---------------------- 1 file changed, 38 insertions(+), 35 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index 6e3a74aa46e7..d2935ef0e274 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -12,49 +12,52 @@ #define _PAT_INDEX(index) (0x4800 + (index) * 4) -#define GEN8_PPAT_WB (3<<0) -#define GEN8_PPAT_WT (2<<0) -#define GEN8_PPAT_WC (1<<0) -#define GEN8_PPAT_UC (0<<0) -#define GEN12_PPAT_CLOS(x) ((x)<<2) +#define MTL_L4_POLICY_MASK REG_GENMASK(3, 2) +#define MTL_PAT_3_UC REG_FIELD_PREP(MTL_L4_POLICY_MASK, 3) +#define MTL_PAT_1_WT REG_FIELD_PREP(MTL_L4_POLICY_MASK, 1) +#define MTL_PAT_0_WB REG_FIELD_PREP(MTL_L4_POLICY_MASK, 0) +#define MTL_INDEX_COH_MODE_MASK REG_GENMASK(1, 0) +#define MTL_3_COH_2W REG_FIELD_PREP(MTL_INDEX_COH_MODE_MASK, 3) +#define MTL_2_COH_1W REG_FIELD_PREP(MTL_INDEX_COH_MODE_MASK, 2) +#define MTL_0_COH_NON REG_FIELD_PREP(MTL_INDEX_COH_MODE_MASK, 0) + +#define PVC_CLOS_LEVEL_MASK REG_GENMASK(3, 2) +#define PVC_PAT_CLOS(x) REG_FIELD_PREP(PVC_CLOS_LEVEL_MASK, x) + +#define TGL_MEM_TYPE_MASK REG_GENMASK(1, 0) +#define TGL_PAT_WB REG_FIELD_PREP(TGL_MEM_TYPE_MASK, 3) +#define TGL_PAT_WT REG_FIELD_PREP(TGL_MEM_TYPE_MASK, 2) +#define TGL_PAT_WC REG_FIELD_PREP(TGL_MEM_TYPE_MASK, 1) +#define TGL_PAT_UC REG_FIELD_PREP(TGL_MEM_TYPE_MASK, 0) const u32 tgl_pat_table[] = { - [0] = GEN8_PPAT_WB, - [1] = GEN8_PPAT_WC, - [2] = GEN8_PPAT_WT, - [3] = GEN8_PPAT_UC, - [4] = GEN8_PPAT_WB, - [5] = GEN8_PPAT_WB, - [6] = GEN8_PPAT_WB, - [7] = GEN8_PPAT_WB, + [0] = TGL_PAT_WB, + [1] = TGL_PAT_WC, + [2] = TGL_PAT_WT, + [3] = TGL_PAT_UC, + [4] = TGL_PAT_WB, + [5] = TGL_PAT_WB, + [6] = TGL_PAT_WB, + [7] = TGL_PAT_WB, }; const u32 pvc_pat_table[] = { - [0] = GEN8_PPAT_UC, - [1] = GEN8_PPAT_WC, - [2] = GEN8_PPAT_WT, - [3] = GEN8_PPAT_WB, - [4] = GEN12_PPAT_CLOS(1) | GEN8_PPAT_WT, - [5] = GEN12_PPAT_CLOS(1) | GEN8_PPAT_WB, - [6] = GEN12_PPAT_CLOS(2) | GEN8_PPAT_WT, - [7] = GEN12_PPAT_CLOS(2) | GEN8_PPAT_WB, + [0] = TGL_PAT_UC, + [1] = TGL_PAT_WC, + [2] = TGL_PAT_WT, + [3] = TGL_PAT_WB, + [4] = PVC_PAT_CLOS(1) | TGL_PAT_WT, + [5] = PVC_PAT_CLOS(1) | TGL_PAT_WB, + [6] = PVC_PAT_CLOS(2) | TGL_PAT_WT, + [7] = PVC_PAT_CLOS(2) | TGL_PAT_WB, }; -#define MTL_PPAT_L4_CACHE_POLICY_MASK REG_GENMASK(3, 2) -#define MTL_PAT_INDEX_COH_MODE_MASK REG_GENMASK(1, 0) -#define MTL_PPAT_3_UC REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 3) -#define MTL_PPAT_1_WT REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 1) -#define MTL_PPAT_0_WB REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 0) -#define MTL_3_COH_2W REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 3) -#define MTL_2_COH_1W REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 2) -#define MTL_0_COH_NON REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 0) - const u32 mtl_pat_table[] = { - [0] = MTL_PPAT_0_WB, - [1] = MTL_PPAT_1_WT | MTL_2_COH_1W, - [2] = MTL_PPAT_3_UC | MTL_2_COH_1W, - [3] = MTL_PPAT_0_WB | MTL_2_COH_1W, - [4] = MTL_PPAT_0_WB | MTL_3_COH_2W, + [0] = MTL_PAT_0_WB, + [1] = MTL_PAT_1_WT | MTL_2_COH_1W, + [2] = MTL_PAT_3_UC | MTL_2_COH_1W, + [3] = MTL_PAT_0_WB | MTL_2_COH_1W, + [4] = MTL_PAT_0_WB | MTL_3_COH_2W, }; #define PROGRAM_PAT_UNICAST(gt, table) do { \ -- cgit v1.2.3 From f16a3f6335e84c07de4b5dd263f0c26e3a3fa5a4 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 24 Mar 2023 14:04:14 -0700 Subject: drm/xe/mtl: Fix PAT table coherency settings Re-sync our MTL PAT table with the bspec. 1-way coherency should only be set on table entry 3. We do not want an incorrect setting here to accidentally paper over other bugs elsewhere in the driver. Bspec: 45101 Reviewed-by: Nirmoy Das Link: https://lore.kernel.org/r/20230324210415.2434992-6-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index d2935ef0e274..e83f7895b853 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -54,8 +54,8 @@ const u32 pvc_pat_table[] = { const u32 mtl_pat_table[] = { [0] = MTL_PAT_0_WB, - [1] = MTL_PAT_1_WT | MTL_2_COH_1W, - [2] = MTL_PAT_3_UC | MTL_2_COH_1W, + [1] = MTL_PAT_1_WT, + [2] = MTL_PAT_3_UC, [3] = MTL_PAT_0_WB | MTL_2_COH_1W, [4] = MTL_PAT_0_WB | MTL_3_COH_2W, }; -- cgit v1.2.3 From 7321a713c6c952d66d5fae8e8478c904b61bb735 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 24 Mar 2023 14:04:15 -0700 Subject: drm/xe/mtl: Handle PAT_INDEX offset jump Starting with MTL, the number of entries in the PAT table increased to 16. The register offset jumped between index 7 and index 8, so a slight adjustment is needed to ensure the PAT_INDEX macros select the proper offset for the upper half of the table. Note that although there are 16 registers in the hardware, the driver is currently only asked to program the first 5, and we leave the rest at their hardware default values. That means we don't actually touch the upper half of the PAT table in the driver today and this patch won't have any functional effect [yet]. Bspec: 44235 Reviewed-by: Nirmoy Das Link: https://lore.kernel.org/r/20230324210415.2434992-7-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pat.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index e83f7895b853..b59b6a2347bb 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -10,7 +10,9 @@ #include "xe_gt_mcr.h" #include "xe_mmio.h" -#define _PAT_INDEX(index) (0x4800 + (index) * 4) +#define _PAT_INDEX(index) _PICK_EVEN_2RANGES(index, 8, \ + 0x4800, 0x4804, \ + 0x4848, 0x484c) #define MTL_L4_POLICY_MASK REG_GENMASK(3, 2) #define MTL_PAT_3_UC REG_FIELD_PREP(MTL_L4_POLICY_MASK, 3) -- cgit v1.2.3 From 8deba79f5deb0a751894a0cf74eff3806e7adfb4 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 23 Mar 2023 11:59:21 +0000 Subject: drm/xe: add XE_BO_CREATE_VRAM_MASK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So we don't have to keep repeating VRAM0 | VRAM1. Also if there are ever more instances, then we have less places to update. Suggested-by: José Roberto de Souza Signed-off-by: Matthew Auld Reviewed-by: José Roberto de Souza Reviewed-by: Gwan-gyeong Mun Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 8 +++----- drivers/gpu/drm/xe/xe_bo.h | 2 ++ 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 42a5978ecc74..448c4305480c 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -999,8 +999,7 @@ struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, return bo; } - if (flags & (XE_BO_CREATE_VRAM0_BIT | XE_BO_CREATE_VRAM1_BIT | - XE_BO_CREATE_STOLEN_BIT) && + if (flags & (XE_BO_CREATE_VRAM_MASK | XE_BO_CREATE_STOLEN_BIT) && !(flags & XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) { size = ALIGN(size, SZ_64K); @@ -1064,8 +1063,7 @@ static int __xe_bo_fixed_placement(struct xe_device *xe, place->fpfn = start >> PAGE_SHIFT; place->lpfn = end >> PAGE_SHIFT; - switch (flags & (XE_BO_CREATE_STOLEN_BIT | - XE_BO_CREATE_VRAM0_BIT |XE_BO_CREATE_VRAM1_BIT)) { + switch (flags & (XE_BO_CREATE_STOLEN_BIT | XE_BO_CREATE_VRAM_MASK)) { case XE_BO_CREATE_VRAM0_BIT: place->mem_type = XE_PL_VRAM0; break; @@ -1771,7 +1769,7 @@ bool xe_bo_needs_ccs_pages(struct xe_bo *bo) { return bo->ttm.type == ttm_bo_type_device && !(bo->flags & XE_BO_CREATE_SYSTEM_BIT) && - (bo->flags & (XE_BO_CREATE_VRAM0_BIT | XE_BO_CREATE_VRAM1_BIT)); + (bo->flags & XE_BO_CREATE_VRAM_MASK); } /** diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index e38894c1255d..7b2104d1bda7 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -17,6 +17,8 @@ #define XE_BO_CREATE_SYSTEM_BIT BIT(1) #define XE_BO_CREATE_VRAM0_BIT BIT(2) #define XE_BO_CREATE_VRAM1_BIT BIT(3) +#define XE_BO_CREATE_VRAM_MASK (XE_BO_CREATE_VRAM0_BIT | \ + XE_BO_CREATE_VRAM1_BIT) /* -- */ #define XE_BO_CREATE_STOLEN_BIT BIT(4) #define XE_BO_CREATE_VRAM_IF_DGFX(gt) \ -- cgit v1.2.3 From e7dc1341f0dab3363baac28044b46237ed251802 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 23 Mar 2023 11:59:22 +0000 Subject: drm/xe/bo: refactor try_add_vram MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Get rid of some of the duplication here. In a future patch we need to also consider [fpfn, lpfn], so better adjust in only one place. Suggested-by: José Roberto de Souza Signed-off-by: Matthew Auld Reviewed-by: José Roberto de Souza Reviewed-by: Gwan-gyeong Mun Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 86 +++++++++++++++++----------------------------- 1 file changed, 31 insertions(+), 55 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 448c4305480c..fc8cb96b2cb9 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -106,55 +106,42 @@ static void try_add_system(struct xe_bo *bo, struct ttm_place *places, } } -static void try_add_vram0(struct xe_device *xe, struct xe_bo *bo, - struct ttm_place *places, u32 bo_flags, u32 *c) +static void add_vram(struct xe_device *xe, struct xe_bo *bo, + struct ttm_place *places, u32 bo_flags, u32 mem_type, u32 *c) { - struct xe_gt *gt; + struct xe_gt *gt = mem_type_to_gt(xe, mem_type); - if (bo_flags & XE_BO_CREATE_VRAM0_BIT) { - gt = mem_type_to_gt(xe, XE_PL_VRAM0); - XE_BUG_ON(!gt->mem.vram.size); + XE_BUG_ON(!gt->mem.vram.size); - places[*c] = (struct ttm_place) { - .mem_type = XE_PL_VRAM0, - /* - * For eviction / restore on suspend / resume objects - * pinned in VRAM must be contiguous - */ - .flags = bo_flags & (XE_BO_CREATE_PINNED_BIT | - XE_BO_CREATE_GGTT_BIT) ? - TTM_PL_FLAG_CONTIGUOUS : 0, - }; - *c += 1; + places[*c] = (struct ttm_place) { + .mem_type = mem_type, + /* + * For eviction / restore on suspend / resume objects + * pinned in VRAM must be contiguous + */ + .flags = bo_flags & (XE_BO_CREATE_PINNED_BIT | + XE_BO_CREATE_GGTT_BIT) ? + TTM_PL_FLAG_CONTIGUOUS : 0, + }; + *c += 1; - if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID) - bo->props.preferred_mem_type = XE_PL_VRAM0; - } + if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID) + bo->props.preferred_mem_type = mem_type; } -static void try_add_vram1(struct xe_device *xe, struct xe_bo *bo, - struct ttm_place *places, u32 bo_flags, u32 *c) +static void try_add_vram(struct xe_device *xe, struct xe_bo *bo, + struct ttm_place *places, u32 bo_flags, u32 *c) { - struct xe_gt *gt; - - if (bo_flags & XE_BO_CREATE_VRAM1_BIT) { - gt = mem_type_to_gt(xe, XE_PL_VRAM1); - XE_BUG_ON(!gt->mem.vram.size); - - places[*c] = (struct ttm_place) { - .mem_type = XE_PL_VRAM1, - /* - * For eviction / restore on suspend / resume objects - * pinned in VRAM must be contiguous - */ - .flags = bo_flags & (XE_BO_CREATE_PINNED_BIT | - XE_BO_CREATE_GGTT_BIT) ? - TTM_PL_FLAG_CONTIGUOUS : 0, - }; - *c += 1; - - if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID) - bo->props.preferred_mem_type = XE_PL_VRAM1; + if (bo->props.preferred_gt == XE_GT1) { + if (bo_flags & XE_BO_CREATE_VRAM1_BIT) + add_vram(xe, bo, places, bo_flags, XE_PL_VRAM1, c); + if (bo_flags & XE_BO_CREATE_VRAM0_BIT) + add_vram(xe, bo, places, bo_flags, XE_PL_VRAM0, c); + } else { + if (bo_flags & XE_BO_CREATE_VRAM0_BIT) + add_vram(xe, bo, places, bo_flags, XE_PL_VRAM0, c); + if (bo_flags & XE_BO_CREATE_VRAM1_BIT) + add_vram(xe, bo, places, bo_flags, XE_PL_VRAM1, c); } } @@ -184,20 +171,9 @@ static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo, if (bo->props.preferred_mem_class == XE_MEM_REGION_CLASS_SYSMEM) { try_add_system(bo, places, bo_flags, &c); - if (bo->props.preferred_gt == XE_GT1) { - try_add_vram1(xe, bo, places, bo_flags, &c); - try_add_vram0(xe, bo, places, bo_flags, &c); - } else { - try_add_vram0(xe, bo, places, bo_flags, &c); - try_add_vram1(xe, bo, places, bo_flags, &c); - } - } else if (bo->props.preferred_gt == XE_GT1) { - try_add_vram1(xe, bo, places, bo_flags, &c); - try_add_vram0(xe, bo, places, bo_flags, &c); - try_add_system(bo, places, bo_flags, &c); + try_add_vram(xe, bo, places, bo_flags, &c); } else { - try_add_vram0(xe, bo, places, bo_flags, &c); - try_add_vram1(xe, bo, places, bo_flags, &c); + try_add_vram(xe, bo, places, bo_flags, &c); try_add_system(bo, places, bo_flags, &c); } try_add_stolen(xe, bo, places, bo_flags, &c); -- cgit v1.2.3 From 011d8fa362962424c3f444c1dac3653f86f350b3 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 27 Mar 2023 10:58:24 -0700 Subject: drm/xe/pat: Define PAT tables as static The tables are only used within this file; there's no reason for them not to be static. Reviewed-by: Nirmoy Das Link: https://lore.kernel.org/r/20230327175824.2967914-1-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pat.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index b59b6a2347bb..c2faf0931649 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -32,7 +32,7 @@ #define TGL_PAT_WC REG_FIELD_PREP(TGL_MEM_TYPE_MASK, 1) #define TGL_PAT_UC REG_FIELD_PREP(TGL_MEM_TYPE_MASK, 0) -const u32 tgl_pat_table[] = { +static const u32 tgl_pat_table[] = { [0] = TGL_PAT_WB, [1] = TGL_PAT_WC, [2] = TGL_PAT_WT, @@ -43,7 +43,7 @@ const u32 tgl_pat_table[] = { [7] = TGL_PAT_WB, }; -const u32 pvc_pat_table[] = { +static const u32 pvc_pat_table[] = { [0] = TGL_PAT_UC, [1] = TGL_PAT_WC, [2] = TGL_PAT_WT, @@ -54,7 +54,7 @@ const u32 pvc_pat_table[] = { [7] = PVC_PAT_CLOS(2) | TGL_PAT_WB, }; -const u32 mtl_pat_table[] = { +static const u32 mtl_pat_table[] = { [0] = MTL_PAT_0_WB, [1] = MTL_PAT_1_WT, [2] = MTL_PAT_3_UC, -- cgit v1.2.3 From 33de290bd1792b7e60b1379f1eb9185c481e06eb Mon Sep 17 00:00:00 2001 From: "Chang, Bruce" Date: Thu, 23 Mar 2023 19:38:58 +0000 Subject: drm/xe: don't auto fall back to execlist mode if guc failed to init In general, this is due to FW load failure, should just report error and fail the probe so that user can easily retry again. Reviewed-by: Matthew Brost Signed-off-by: Bruce Chang Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt.c | 4 ++-- drivers/gpu/drm/xe/xe_uc.c | 3 --- 2 files changed, 2 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 245117e67e9b..6322e0689a9e 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -379,9 +379,9 @@ static int gt_fw_domain_init(struct xe_gt *gt) goto err_force_wake; } - /* Allow driver to load if uC init fails (likely missing firmware) */ err = xe_uc_init(>->uc); - XE_WARN_ON(err); + if (err) + goto err_force_wake; err = xe_uc_init_hwconfig(>->uc); if (err) diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index 4ccf2b3435e1..70eabf567156 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -54,9 +54,6 @@ int xe_uc_init(struct xe_uc *uc) return 0; err: - /* If any uC firmwares not found, fall back to execlists */ - xe_device_guc_submission_disable(uc_to_xe(uc)); - return ret; } -- cgit v1.2.3 From 3d4451d30f36ffe21f8c5eea7db9678330ee83c4 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 28 Mar 2023 12:30:39 -0700 Subject: drm/xe: Better error messages for xe_gt_record_default_lrcs Add some error messages describing the problem when xe_gt_record_default_lrcs fails. Signed-off-by: Matthew Brost Reviewed-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 6322e0689a9e..fd7a5b43ba3e 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -23,6 +23,7 @@ #include "xe_gt_sysfs.h" #include "xe_gt_tlb_invalidation.h" #include "xe_gt_topology.h" +#include "xe_guc_engine_types.h" #include "xe_hw_fence.h" #include "xe_irq.h" #include "xe_lrc.h" @@ -257,30 +258,43 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt) hwe, ENGINE_FLAG_WA); if (IS_ERR(e)) { err = PTR_ERR(e); + drm_err(&xe->drm, "gt%d, hwe %s, xe_engine_create,e failed=%d", + gt->info.id, hwe->name, err); goto put_vm; } /* Prime golden LRC with known good state */ err = emit_wa_job(gt, e); - if (err) + if (err) { + drm_err(&xe->drm, "gt%d, hwe %s, guc_id=%d, emit_wa_job,e failed=%d", + gt->info.id, hwe->name, e->guc->id, err); goto put_engine; + } nop_e = xe_engine_create(xe, vm, BIT(hwe->logical_instance), 1, hwe, ENGINE_FLAG_WA); if (IS_ERR(nop_e)) { err = PTR_ERR(nop_e); + drm_err(&xe->drm, "gt%d, hwe %s, xe_engine_create,nop_e failed=%d", + gt->info.id, hwe->name, err); goto put_engine; } /* Switch to different LRC */ err = emit_nop_job(gt, nop_e); - if (err) + if (err) { + drm_err(&xe->drm, "gt%d, hwe %s, guc_id=%d, emit_nop_job,nop_e failed=%d", + gt->info.id, hwe->name, nop_e->guc->id, err); goto put_nop_e; + } /* Reload golden LRC to record the effect of any indirect W/A */ err = emit_nop_job(gt, e); - if (err) + if (err) { + drm_err(&xe->drm, "gt%d, hwe %s, guc_id=%d, emit_nop_job,e failed=%d", + gt->info.id, hwe->name, e->guc->id, err); goto put_nop_e; + } xe_map_memcpy_from(xe, default_lrc, &e->lrc[0].bo->vmap, -- cgit v1.2.3 From 681818fdb97de821cc1ee6b81c7a09f3ef8fc96d Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 29 Mar 2023 10:33:32 -0700 Subject: drm/xe: Include hardware prefetch buffer in batchbuffer allocations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hardware prefetches several cachelines of data from batchbuffers before they are parsed. This prefetching only stops when the parser encounters an MI_BATCH_BUFFER_END instruction (or a nested MI_BATCH_BUFFER_START), so we must ensure that there is enough padding at the end of the batchbuffer to prevent the prefetcher from running past the end of the allocation and potentially faulting. Bspec: 45717 Link: https://lore.kernel.org/r/20230329173334.4015124-2-matthew.d.roper@intel.com Signed-off-by: Matt Roper Reviewed-by: José Roberto de Souza Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bb.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c index 5b24018e2a80..f326f117ba3b 100644 --- a/drivers/gpu/drm/xe/xe_bb.c +++ b/drivers/gpu/drm/xe/xe_bb.c @@ -8,11 +8,26 @@ #include "regs/xe_gpu_commands.h" #include "xe_device.h" #include "xe_engine_types.h" +#include "xe_gt.h" #include "xe_hw_fence.h" #include "xe_sa.h" #include "xe_sched_job.h" #include "xe_vm_types.h" +static int bb_prefetch(struct xe_gt *gt) +{ + struct xe_device *xe = gt->xe; + + if (GRAPHICS_VERx100(xe) >= 1250 && !xe_gt_is_media_type(gt)) + /* + * RCS and CCS require 1K, although other engines would be + * okay with 512. + */ + return SZ_1K; + else + return SZ_512; +} + struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm) { struct xe_bb *bb = kmalloc(sizeof(*bb), GFP_KERNEL); @@ -21,8 +36,14 @@ struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm) if (!bb) return ERR_PTR(-ENOMEM); - bb->bo = xe_sa_bo_new(!usm ? >->kernel_bb_pool : - >->usm.bb_pool, 4 * dwords + 4); + /* + * We need to allocate space for the requested number of dwords, + * one additional MI_BATCH_BUFFER_END dword, and additional buffer + * space to accomodate the platform-specific hardware prefetch + * requirements. + */ + bb->bo = xe_sa_bo_new(!usm ? >->kernel_bb_pool : >->usm.bb_pool, + 4 * (dwords + 1) + bb_prefetch(gt)); if (IS_ERR(bb->bo)) { err = PTR_ERR(bb->bo); goto err; -- cgit v1.2.3 From 9b36f7af2024ef30866f5fa0b1132ca924fd81fc Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 29 Mar 2023 10:33:33 -0700 Subject: drm/xe: Adjust batchbuffer space warning when creating a job MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We should WARN (not BUG) when creating a job if the batchbuffer does not have sufficient space and padding. The hardware prefetch requirements should also be considered. Link: https://lore.kernel.org/r/20230329173334.4015124-3-matthew.d.roper@intel.com Signed-off-by: Matt Roper Reviewed-by: José Roberto de Souza Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c index f326f117ba3b..7172801ee570 100644 --- a/drivers/gpu/drm/xe/xe_bb.c +++ b/drivers/gpu/drm/xe/xe_bb.c @@ -63,10 +63,10 @@ __xe_bb_create_job(struct xe_engine *kernel_eng, struct xe_bb *bb, u64 *addr) { u32 size = drm_suballoc_size(bb->bo); - XE_BUG_ON((bb->len * 4 + 1) > size); - bb->cs[bb->len++] = MI_BATCH_BUFFER_END; + WARN_ON(bb->len * 4 + bb_prefetch(kernel_eng->gt) > size); + xe_sa_bo_flush_write(bb->bo); return xe_sched_job_create(kernel_eng, addr); -- cgit v1.2.3 From 1bf1d86f12d4d07108d480878193acd1e4d87668 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 29 Mar 2023 10:33:34 -0700 Subject: drm/xe: Don't emit extra MI_BATCH_BUFFER_END in WA batchbuffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The MI_BATCH_BUFFER_END is already added automatically by __xe_bb_create_job(); including it in the construction of the workaround batchbuffer results in an unnecessary duplicate. Link: https://lore.kernel.org/r/20230329173334.4015124-4-matthew.d.roper@intel.com Signed-off-by: Matt Roper Reviewed-by: José Roberto de Souza Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index fd7a5b43ba3e..bc821f431c45 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -203,8 +203,6 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_engine *e) bb->cs[bb->len++] = entry->set_bits; } } - bb->cs[bb->len++] = MI_NOOP; - bb->cs[bb->len++] = MI_BATCH_BUFFER_END; batch_ofs = xe_bo_ggtt_addr(gt->kernel_bb_pool.bo); job = xe_bb_create_wa_job(e, bb, batch_ofs); -- cgit v1.2.3 From 6b8ddaf3721e86bacc0be72bf12fa76233b9becf Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 23 Mar 2023 22:17:52 -0700 Subject: drm/xe: Remove unused revid from firmware name The rev field is always 0 so it ends up never used. In i915 it was introduced because of CML: up to rev 5 it reuses the guc and huc firmware blobs from KBL. After that there is a specific firmware for that platform. This can be reintroduced later if ever needed. With the removal of revid the packed attribute in uc_fw_platform_requirement, which is there only for reducing the space these tables take, can also be removed since it has even more limited usefulness: currently there's only padding of 2 bytes. Remove the attribute to avoid the unaligned access. $ pahole -C uc_fw_platform_requirement build64/drivers/gpu/drm/xe/xe_uc_fw.o struct uc_fw_platform_requirement { enum xe_platform p; /* 0 4 */ const struct uc_fw_blob blob; /* 4 10 */ /* size: 16, cachelines: 1, members: 2 */ /* padding: 2 */ /* last cacheline: 16 bytes */ }; Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230324051754.1346390-2-lucas.demarchi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_uc_fw.c | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 5c3789f67049..a9107e86b81f 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -39,21 +39,21 @@ static struct xe_device *uc_fw_to_xe(struct xe_uc_fw *uc_fw) /* * List of required GuC and HuC binaries per-platform. - * Must be ordered based on platform + revid, from newer to older. + * Must be ordered based on platform, from newer to older. */ #define XE_GUC_FIRMWARE_DEFS(fw_def, guc_def) \ - fw_def(METEORLAKE, 0, guc_def(mtl, 70, 5, 2)) \ - fw_def(ALDERLAKE_P, 0, guc_def(adlp, 70, 5, 2)) \ - fw_def(ALDERLAKE_S, 0, guc_def(tgl, 70, 5, 2)) \ - fw_def(PVC, 0, guc_def(pvc, 70, 5, 2)) \ - fw_def(DG2, 0, guc_def(dg2, 70, 5, 2)) \ - fw_def(DG1, 0, guc_def(dg1, 70, 5, 2)) \ - fw_def(TIGERLAKE, 0, guc_def(tgl, 70, 5, 2)) + fw_def(METEORLAKE, guc_def(mtl, 70, 5, 2)) \ + fw_def(ALDERLAKE_P, guc_def(adlp, 70, 5, 2)) \ + fw_def(ALDERLAKE_S, guc_def(tgl, 70, 5, 2)) \ + fw_def(PVC, guc_def(pvc, 70, 5, 2)) \ + fw_def(DG2, guc_def(dg2, 70, 5, 2)) \ + fw_def(DG1, guc_def(dg1, 70, 5, 2)) \ + fw_def(TIGERLAKE, guc_def(tgl, 70, 5, 2)) #define XE_HUC_FIRMWARE_DEFS(fw_def, huc_def, huc_ver) \ - fw_def(ALDERLAKE_S, 0, huc_def(tgl)) \ - fw_def(DG1, 0, huc_def(dg1)) \ - fw_def(TIGERLAKE, 0, huc_def(tgl)) + fw_def(ALDERLAKE_S, huc_def(tgl)) \ + fw_def(DG1, huc_def(dg1)) \ + fw_def(TIGERLAKE, huc_def(tgl)) #define __MAKE_HUC_FW_PATH(prefix_, name_) \ "i915/" \ @@ -82,7 +82,7 @@ static struct xe_device *uc_fw_to_xe(struct xe_uc_fw *uc_fw) /* All blobs need to be declared via MODULE_FIRMWARE() */ -#define XE_UC_MODULE_FW(platform_, revid_, uc_) \ +#define XE_UC_MODULE_FW(platform_, uc_) \ MODULE_FIRMWARE(uc_); XE_GUC_FIRMWARE_DEFS(XE_UC_MODULE_FW, MAKE_GUC_FW_PATH) @@ -109,16 +109,14 @@ struct __packed uc_fw_blob { UC_FW_BLOB(major_, minor_, \ MAKE_HUC_FW_PATH_FULL_VER(prefix_, major_, minor_, bld_num_)) -struct __packed uc_fw_platform_requirement { +struct uc_fw_platform_requirement { enum xe_platform p; - u8 rev; /* first platform rev using this FW */ const struct uc_fw_blob blob; }; -#define MAKE_FW_LIST(platform_, revid_, uc_) \ +#define MAKE_FW_LIST(platform_, uc_) \ { \ .p = XE_##platform_, \ - .rev = revid_, \ .blob = uc_, \ }, @@ -143,7 +141,6 @@ uc_fw_auto_select(struct xe_device *xe, struct xe_uc_fw *uc_fw) static const struct uc_fw_platform_requirement *fw_blobs; enum xe_platform p = xe->info.platform; u32 fw_count; - u8 rev = xe->info.revid; int i; XE_BUG_ON(uc_fw->type >= ARRAY_SIZE(blobs_all)); @@ -151,7 +148,7 @@ uc_fw_auto_select(struct xe_device *xe, struct xe_uc_fw *uc_fw) fw_count = blobs_all[uc_fw->type].count; for (i = 0; i < fw_count && p <= fw_blobs[i].p; i++) { - if (p == fw_blobs[i].p && rev >= fw_blobs[i].rev) { + if (p == fw_blobs[i].p) { const struct uc_fw_blob *blob = &fw_blobs[i].blob; uc_fw->path = blob->path; -- cgit v1.2.3 From f7339fe79654c2b63634d65eb72c089d45029065 Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Thu, 30 Mar 2023 21:41:05 +0000 Subject: drm/xe/tests: Use proper batch base address In xe_migrate_sanity_kunit test, use proper batch base address by considering usm case. Reviewed-by: Matthew Brost Signed-off-by: Niranjana Vishwanathapura Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_migrate.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index 17829f878757..90f4e1c4f029 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -35,8 +35,9 @@ static int run_sanity_job(struct xe_migrate *m, struct xe_device *xe, struct xe_bb *bb, u32 second_idx, const char *str, struct kunit *test) { + u64 batch_base = xe_migrate_batch_base(m, xe->info.supports_usm); struct xe_sched_job *job = xe_bb_create_migration_job(m->eng, bb, - m->batch_base_ofs, + batch_base, second_idx); struct dma_fence *fence; -- cgit v1.2.3 From 370997d168d64e84c12164bffdd326fd240a9790 Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Fri, 31 Mar 2023 16:40:12 +0000 Subject: drm/xe/tests: Set correct expectation In xe_migrate_sanity_kunit test, use correct expected value as the expected value was not only used for the xe_migrate_clear(), but also for the xe_migrate_copy() operation. v2: Add 'Fixes' tag and update commit text Fixes: 11a2407ed5f0 ("drm/xe: Stop accepting value in xe_migrate_clear") Reviewed-by: Matthew Brost Signed-off-by: Niranjana Vishwanathapura Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_migrate.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index 90f4e1c4f029..862d11b2210f 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -147,6 +147,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, xe_map_memset(xe, &sysmem->vmap, 0, 0xc0, sysmem->size); xe_map_memset(xe, &bo->vmap, 0, 0xd0, bo->size); + expected = 0xc0c0c0c0c0c0c0c0; fence = xe_migrate_copy(m, sysmem, sysmem->ttm.resource, bo->ttm.resource); if (!sanity_fence_failed(xe, fence, big ? "Copying big bo sysmem -> vram" : -- cgit v1.2.3 From c33a721943f46851f10eb34852a3fd1fedcd3639 Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Fri, 31 Mar 2023 16:52:50 +0000 Subject: drm/xe: Use proper vram offset In xe_migrate functions, use proper vram io offset of the tiles while calculating addresses. Reviewed-by: Matthew Brost Signed-off-by: Niranjana Vishwanathapura Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 12 ++++++------ drivers/gpu/drm/xe/xe_bo.h | 1 + drivers/gpu/drm/xe/xe_migrate.c | 13 ++++++++----- 3 files changed, 15 insertions(+), 11 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index fc8cb96b2cb9..1835f049c21e 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1206,12 +1206,12 @@ struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_gt *gt, * XXX: This is in the VM bind data path, likely should calculate this once and * store, with a recalculation if the BO is moved. */ -static uint64_t vram_region_io_offset(struct xe_bo *bo) +uint64_t vram_region_io_offset(struct ttm_resource *res) { - struct xe_device *xe = xe_bo_device(bo); - struct xe_gt *gt = mem_type_to_gt(xe, bo->ttm.resource->mem_type); + struct xe_device *xe = ttm_to_xe_device(res->bo->bdev); + struct xe_gt *gt = mem_type_to_gt(xe, res->mem_type); - if (bo->ttm.resource->mem_type == XE_PL_STOLEN) + if (res->mem_type == XE_PL_STOLEN) return xe_ttm_stolen_gpu_offset(xe); return gt->mem.vram.io_start - xe->mem.vram.io_start; @@ -1298,7 +1298,7 @@ int xe_bo_pin(struct xe_bo *bo) XE_BUG_ON(!(place->flags & TTM_PL_FLAG_CONTIGUOUS)); place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE, &vram) - - vram_region_io_offset(bo)) >> PAGE_SHIFT; + vram_region_io_offset(bo->ttm.resource)) >> PAGE_SHIFT; place->lpfn = place->fpfn + (bo->size >> PAGE_SHIFT); spin_lock(&xe->pinned.lock); @@ -1442,7 +1442,7 @@ dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, xe_res_first(bo->ttm.resource, page << PAGE_SHIFT, page_size, &cur); - return cur.start + offset + vram_region_io_offset(bo); + return cur.start + offset + vram_region_io_offset(bo->ttm.resource); } } diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 7b2104d1bda7..dd58edcb9398 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -224,6 +224,7 @@ void xe_bo_vunmap(struct xe_bo *bo); bool mem_type_is_vram(u32 mem_type); bool xe_bo_is_vram(struct xe_bo *bo); bool xe_bo_is_stolen(struct xe_bo *bo); +uint64_t vram_region_io_offset(struct ttm_resource *res); bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type); diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index b9b2a64447d8..13cfb7ad2850 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -392,6 +392,7 @@ static u64 xe_migrate_res_sizes(struct xe_res_cursor *cur) static u32 pte_update_size(struct xe_migrate *m, bool is_vram, + struct ttm_resource *res, struct xe_res_cursor *cur, u64 *L0, u64 *L0_ofs, u32 *L0_pt, u32 cmd_size, u32 pt_ofs, u32 avail_pts) @@ -417,7 +418,8 @@ static u32 pte_update_size(struct xe_migrate *m, cmds += cmd_size; } else { /* Offset into identity map. */ - *L0_ofs = xe_migrate_vram_ofs(cur->start); + *L0_ofs = xe_migrate_vram_ofs(cur->start + + vram_region_io_offset(res)); cmds += cmd_size; } @@ -467,6 +469,7 @@ static void emit_pte(struct xe_migrate *m, addr |= GEN12_PTE_PS64; } + addr += vram_region_io_offset(bo->ttm.resource); addr |= GEN12_PPGTT_PTE_LM; } addr |= PPAT_CACHED | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; @@ -646,17 +649,17 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, src_L0 = min(src_L0, dst_L0); - batch_size += pte_update_size(m, src_is_vram, &src_it, &src_L0, + batch_size += pte_update_size(m, src_is_vram, src, &src_it, &src_L0, &src_L0_ofs, &src_L0_pt, 0, 0, NUM_PT_PER_BLIT); - batch_size += pte_update_size(m, dst_is_vram, &dst_it, &src_L0, + batch_size += pte_update_size(m, dst_is_vram, dst, &dst_it, &src_L0, &dst_L0_ofs, &dst_L0_pt, 0, NUM_PT_PER_BLIT, NUM_PT_PER_BLIT); if (copy_system_ccs) { ccs_size = xe_device_ccs_bytes(xe, src_L0); - batch_size += pte_update_size(m, false, &ccs_it, &ccs_size, + batch_size += pte_update_size(m, false, NULL, &ccs_it, &ccs_size, &ccs_ofs, &ccs_pt, 0, 2 * NUM_PT_PER_BLIT, NUM_PT_PER_BLIT); @@ -879,7 +882,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, /* Calculate final sizes and batch size.. */ batch_size = 2 + - pte_update_size(m, clear_vram, &src_it, + pte_update_size(m, clear_vram, src, &src_it, &clear_L0, &clear_L0_ofs, &clear_L0_pt, emit_clear_cmd_len(xe), 0, NUM_PT_PER_BLIT); -- cgit v1.2.3 From 96578d106b30dc3a6550624477a092d793052660 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 31 Mar 2023 16:09:02 -0700 Subject: drm/xe: Fix platform order Platform order in enum xe_platform started to be used by some parts of the code, like the GuC/HuC firmware loading logic. The order itself is not very important, but it's better to follow a convention: as was documented in the comment above the enum, reorder the platforms by graphics version. While at it, remove the gen terminology. v2: - Use "graphics version" instead of chronological order (Matt Roper) - Also change pciidlist to follow the same order - Remove "gen" from comments around enum xe_platform Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230331230902.1603294-1-lucas.demarchi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pci.c | 4 ++-- drivers/gpu/drm/xe/xe_platform_types.h | 12 +++++++----- drivers/gpu/drm/xe/xe_uc_fw.c | 4 ++-- 3 files changed, 11 insertions(+), 9 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 0a3b61f08d37..e7bfcc5f51c2 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -281,11 +281,11 @@ static const struct xe_device_desc mtl_desc = { */ static const struct pci_device_id pciidlist[] = { XE_TGL_IDS(INTEL_VGA_DEVICE, &tgl_desc), + XE_ADLS_IDS(INTEL_VGA_DEVICE, &adl_s_desc), + XE_ADLP_IDS(INTEL_VGA_DEVICE, &adl_p_desc), XE_DG1_IDS(INTEL_VGA_DEVICE, &dg1_desc), XE_ATS_M_IDS(INTEL_VGA_DEVICE, &ats_m_desc), XE_DG2_IDS(INTEL_VGA_DEVICE, &dg2_desc), - XE_ADLS_IDS(INTEL_VGA_DEVICE, &adl_s_desc), - XE_ADLP_IDS(INTEL_VGA_DEVICE, &adl_p_desc), XE_MTL_IDS(INTEL_VGA_DEVICE, &mtl_desc), { } }; diff --git a/drivers/gpu/drm/xe/xe_platform_types.h b/drivers/gpu/drm/xe/xe_platform_types.h index 72612c832e88..80c19bffe79c 100644 --- a/drivers/gpu/drm/xe/xe_platform_types.h +++ b/drivers/gpu/drm/xe/xe_platform_types.h @@ -6,27 +6,29 @@ #ifndef _XE_PLATFORM_INFO_TYPES_H_ #define _XE_PLATFORM_INFO_TYPES_H_ -/* Keep in gen based order, and chronological order within a gen */ +/* + * Keep this in graphics version based order and chronological order within a + * version + */ enum xe_platform { XE_PLATFORM_UNINITIALIZED = 0, - /* gen12 */ XE_TIGERLAKE, XE_ROCKETLAKE, + XE_ALDERLAKE_S, + XE_ALDERLAKE_P, XE_DG1, XE_DG2, XE_PVC, - XE_ALDERLAKE_S, - XE_ALDERLAKE_P, XE_METEORLAKE, }; enum xe_subplatform { XE_SUBPLATFORM_UNINITIALIZED = 0, XE_SUBPLATFORM_NONE, + XE_SUBPLATFORM_ADLP_RPLU, XE_SUBPLATFORM_DG2_G10, XE_SUBPLATFORM_DG2_G11, XE_SUBPLATFORM_DG2_G12, - XE_SUBPLATFORM_ADLP_RPLU, }; #endif diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index a9107e86b81f..777fa6f523dc 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -43,11 +43,11 @@ static struct xe_device *uc_fw_to_xe(struct xe_uc_fw *uc_fw) */ #define XE_GUC_FIRMWARE_DEFS(fw_def, guc_def) \ fw_def(METEORLAKE, guc_def(mtl, 70, 5, 2)) \ - fw_def(ALDERLAKE_P, guc_def(adlp, 70, 5, 2)) \ - fw_def(ALDERLAKE_S, guc_def(tgl, 70, 5, 2)) \ fw_def(PVC, guc_def(pvc, 70, 5, 2)) \ fw_def(DG2, guc_def(dg2, 70, 5, 2)) \ fw_def(DG1, guc_def(dg1, 70, 5, 2)) \ + fw_def(ALDERLAKE_P, guc_def(adlp, 70, 5, 2)) \ + fw_def(ALDERLAKE_S, guc_def(tgl, 70, 5, 2)) \ fw_def(TIGERLAKE, guc_def(tgl, 70, 5, 2)) #define XE_HUC_FIRMWARE_DEFS(fw_def, huc_def, huc_ver) \ -- cgit v1.2.3 From 1a545ed74b33eaf6dee6d4159be07819ad89a569 Mon Sep 17 00:00:00 2001 From: "Chang, Bruce" Date: Mon, 3 Apr 2023 22:20:31 +0000 Subject: drm/xe: fix pvc unload issue Currently, unload pvc driver will generate a null dereference and the call stack is as below. [ 4850.618000] Call Trace: [ 4850.620740] [ 4850.623134] ttm_bo_cleanup_memtype_use+0x3f/0x50 [ttm] [ 4850.628661] ttm_bo_release+0x154/0x2c0 [ttm] [ 4850.633317] ? drm_buddy_fini+0x62/0x80 [drm_buddy] [ 4850.638487] ? __kmem_cache_free+0x27d/0x2c0 [ 4850.643054] ttm_bo_put+0x38/0x60 [ttm] [ 4850.647190] xe_gem_object_free+0x1f/0x30 [xe] [ 4850.651945] drm_gem_object_free+0x1e/0x30 [drm] [ 4850.656904] ggtt_fini_noalloc+0x9d/0xe0 [xe] [ 4850.661574] drm_managed_release+0xb5/0x150 [drm] [ 4850.666617] drm_dev_release+0x30/0x50 [drm] [ 4850.671209] devm_drm_dev_init_release+0x3c/0x60 [drm] There are a couple issues, but the main one is due to TTM has only one TTM_PL_TT region, but since pvc has 2 tiles and tries to setup 1 TTM_PL_TT each tile. The second will overwrite the first one. During unload time, the first tile will reset the TTM_PL_TT manger and when the second tile is trying to free Bo and it will generate the null reference since the TTM manage is already got reset to 0. The fix is to use one global TTM_PL_TT manager. v2: make gtt mgr global and change the name to sys_mgr Cc: Stuart Summers Cc: Matthew Brost Cc: Vivi, Rodrigo Signed-off-by: Bruce Chang Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Makefile | 2 +- drivers/gpu/drm/xe/xe_device.c | 3 + drivers/gpu/drm/xe/xe_device.h | 1 + drivers/gpu/drm/xe/xe_device_types.h | 2 + drivers/gpu/drm/xe/xe_gt.c | 18 ----- drivers/gpu/drm/xe/xe_gt_types.h | 2 - drivers/gpu/drm/xe/xe_ttm_gtt_mgr.c | 130 ------------------------------ drivers/gpu/drm/xe/xe_ttm_gtt_mgr.h | 16 ---- drivers/gpu/drm/xe/xe_ttm_gtt_mgr_types.h | 18 ----- drivers/gpu/drm/xe/xe_ttm_sys_mgr.c | 115 ++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_ttm_sys_mgr.h | 13 +++ 11 files changed, 135 insertions(+), 185 deletions(-) delete mode 100644 drivers/gpu/drm/xe/xe_ttm_gtt_mgr.c delete mode 100644 drivers/gpu/drm/xe/xe_ttm_gtt_mgr.h delete mode 100644 drivers/gpu/drm/xe/xe_ttm_gtt_mgr_types.h create mode 100644 drivers/gpu/drm/xe/xe_ttm_sys_mgr.c create mode 100644 drivers/gpu/drm/xe/xe_ttm_sys_mgr.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 6ef80889fddb..42459727e67a 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -88,7 +88,7 @@ xe-y += xe_bb.o \ xe_step.o \ xe_sync.o \ xe_trace.o \ - xe_ttm_gtt_mgr.o \ + xe_ttm_sys_mgr.o \ xe_ttm_stolen_mgr.o \ xe_ttm_vram_mgr.o \ xe_tuning.o \ diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index ffacf80c8942..b13bbdeeef51 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -27,6 +27,7 @@ #include "xe_pm.h" #include "xe_query.h" #include "xe_ttm_stolen_mgr.h" +#include "xe_ttm_sys_mgr.h" #include "xe_vm.h" #include "xe_vm_madvise.h" #include "xe_wait_user_fence.h" @@ -262,6 +263,8 @@ int xe_device_probe(struct xe_device *xe) if (err) goto err_irq_shutdown; + xe_ttm_sys_mgr_init(xe); + for_each_gt(gt, xe, id) { err = xe_gt_init_noalloc(gt); if (err) diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index d277f8985f7b..cbae480a2092 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -116,4 +116,5 @@ static inline bool xe_device_has_flat_ccs(struct xe_device *xe) } u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size); + #endif diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 3917b9152eb9..74326091bf98 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -134,6 +134,8 @@ struct xe_device { /** @mapping: pointer to VRAM mappable space */ void *__iomem mapping; } vram; + /** @sys_mgr: system TTM manager */ + struct ttm_resource_manager sys_mgr; } mem; /** @usm: unified memory state */ diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index bc821f431c45..daaf93e23bbf 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -36,7 +36,6 @@ #include "xe_ring_ops.h" #include "xe_sa.h" #include "xe_sched_job.h" -#include "xe_ttm_gtt_mgr.h" #include "xe_ttm_vram_mgr.h" #include "xe_tuning.h" #include "xe_uc.h" @@ -77,16 +76,11 @@ int xe_gt_alloc(struct xe_device *xe, struct xe_gt *gt) if (!gt->mem.vram_mgr) return -ENOMEM; - gt->mem.gtt_mgr = drmm_kzalloc(drm, sizeof(*gt->mem.gtt_mgr), - GFP_KERNEL); - if (!gt->mem.gtt_mgr) - return -ENOMEM; } else { struct xe_gt *full_gt = xe_find_full_gt(gt); gt->mem.ggtt = full_gt->mem.ggtt; gt->mem.vram_mgr = full_gt->mem.vram_mgr; - gt->mem.gtt_mgr = full_gt->mem.gtt_mgr; } gt->ordered_wq = alloc_ordered_workqueue("gt-ordered-wq", 0); @@ -98,26 +92,14 @@ static int gt_ttm_mgr_init(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); int err; - struct sysinfo si; - u64 gtt_size; - - si_meminfo(&si); - gtt_size = (u64)si.totalram * si.mem_unit * 3/4; if (gt->mem.vram.size) { err = xe_ttm_vram_mgr_init(gt, gt->mem.vram_mgr); if (err) return err; - gtt_size = min(max((XE_DEFAULT_GTT_SIZE_MB << 20), - (u64)gt->mem.vram.size), - gtt_size); xe->info.mem_region_mask |= BIT(gt->info.vram_id) << 1; } - err = xe_ttm_gtt_mgr_init(gt, gt->mem.gtt_mgr, gtt_size); - if (err) - return err; - return 0; } diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 8f29aba455e0..9d3117fad2e4 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -162,8 +162,6 @@ struct xe_gt { } vram; /** @vram_mgr: VRAM TTM manager */ struct xe_ttm_vram_mgr *vram_mgr; - /** @gtt_mr: GTT TTM manager */ - struct xe_ttm_gtt_mgr *gtt_mgr; /** @ggtt: Global graphics translation table */ struct xe_ggtt *ggtt; } mem; diff --git a/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.c b/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.c deleted file mode 100644 index 8075781070f2..000000000000 --- a/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.c +++ /dev/null @@ -1,130 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2021-2022 Intel Corporation - * Copyright (C) 2021-2002 Red Hat - */ - -#include - -#include -#include -#include - -#include "xe_bo.h" -#include "xe_gt.h" -#include "xe_ttm_gtt_mgr.h" - -struct xe_ttm_gtt_node { - struct ttm_buffer_object *tbo; - struct ttm_range_mgr_node base; -}; - -static inline struct xe_ttm_gtt_mgr * -to_gtt_mgr(struct ttm_resource_manager *man) -{ - return container_of(man, struct xe_ttm_gtt_mgr, manager); -} - -static inline struct xe_ttm_gtt_node * -to_xe_ttm_gtt_node(struct ttm_resource *res) -{ - return container_of(res, struct xe_ttm_gtt_node, base.base); -} - -static int xe_ttm_gtt_mgr_new(struct ttm_resource_manager *man, - struct ttm_buffer_object *tbo, - const struct ttm_place *place, - struct ttm_resource **res) -{ - struct xe_ttm_gtt_node *node; - int r; - - node = kzalloc(struct_size(node, base.mm_nodes, 1), GFP_KERNEL); - if (!node) - return -ENOMEM; - - node->tbo = tbo; - ttm_resource_init(tbo, place, &node->base.base); - - if (!(place->flags & TTM_PL_FLAG_TEMPORARY) && - ttm_resource_manager_usage(man) > (man->size << PAGE_SHIFT)) { - r = -ENOSPC; - goto err_fini; - } - - node->base.mm_nodes[0].start = 0; - node->base.mm_nodes[0].size = PFN_UP(node->base.base.size); - node->base.base.start = XE_BO_INVALID_OFFSET; - - *res = &node->base.base; - - return 0; - -err_fini: - ttm_resource_fini(man, &node->base.base); - kfree(node); - return r; -} - -static void xe_ttm_gtt_mgr_del(struct ttm_resource_manager *man, - struct ttm_resource *res) -{ - struct xe_ttm_gtt_node *node = to_xe_ttm_gtt_node(res); - - ttm_resource_fini(man, res); - kfree(node); -} - -static void xe_ttm_gtt_mgr_debug(struct ttm_resource_manager *man, - struct drm_printer *printer) -{ - -} - -static const struct ttm_resource_manager_func xe_ttm_gtt_mgr_func = { - .alloc = xe_ttm_gtt_mgr_new, - .free = xe_ttm_gtt_mgr_del, - .debug = xe_ttm_gtt_mgr_debug -}; - -static void ttm_gtt_mgr_fini(struct drm_device *drm, void *arg) -{ - struct xe_ttm_gtt_mgr *mgr = arg; - struct xe_device *xe = gt_to_xe(mgr->gt); - struct ttm_resource_manager *man = &mgr->manager; - int err; - - ttm_resource_manager_set_used(man, false); - - err = ttm_resource_manager_evict_all(&xe->ttm, man); - if (err) - return; - - ttm_resource_manager_cleanup(man); - ttm_set_driver_manager(&xe->ttm, XE_PL_TT, NULL); -} - -int xe_ttm_gtt_mgr_init(struct xe_gt *gt, struct xe_ttm_gtt_mgr *mgr, - u64 gtt_size) -{ - struct xe_device *xe = gt_to_xe(gt); - struct ttm_resource_manager *man = &mgr->manager; - int err; - - XE_BUG_ON(xe_gt_is_media_type(gt)); - - mgr->gt = gt; - man->use_tt = true; - man->func = &xe_ttm_gtt_mgr_func; - - ttm_resource_manager_init(man, &xe->ttm, gtt_size >> PAGE_SHIFT); - - ttm_set_driver_manager(&xe->ttm, XE_PL_TT, &mgr->manager); - ttm_resource_manager_set_used(man, true); - - err = drmm_add_action_or_reset(&xe->drm, ttm_gtt_mgr_fini, mgr); - if (err) - return err; - - return 0; -} diff --git a/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.h b/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.h deleted file mode 100644 index d1d57cb9c2b8..000000000000 --- a/drivers/gpu/drm/xe/xe_ttm_gtt_mgr.h +++ /dev/null @@ -1,16 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2022 Intel Corporation - */ - -#ifndef _XE_TTGM_GTT_MGR_H_ -#define _XE_TTGM_GTT_MGR_H_ - -#include "xe_ttm_gtt_mgr_types.h" - -struct xe_gt; - -int xe_ttm_gtt_mgr_init(struct xe_gt *gt, struct xe_ttm_gtt_mgr *mgr, - u64 gtt_size); - -#endif diff --git a/drivers/gpu/drm/xe/xe_ttm_gtt_mgr_types.h b/drivers/gpu/drm/xe/xe_ttm_gtt_mgr_types.h deleted file mode 100644 index c66737488326..000000000000 --- a/drivers/gpu/drm/xe/xe_ttm_gtt_mgr_types.h +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2022 Intel Corporation - */ - -#ifndef _XE_TTM_GTT_MGR_TYPES_H_ -#define _XE_TTM_GTT_MGR_TYPES_H_ - -#include - -struct xe_gt; - -struct xe_ttm_gtt_mgr { - struct xe_gt *gt; - struct ttm_resource_manager manager; -}; - -#endif diff --git a/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c new file mode 100644 index 000000000000..5b0674bbb8ed --- /dev/null +++ b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021-2022 Intel Corporation + * Copyright (C) 2021-2002 Red Hat + */ + +#include "xe_ttm_sys_mgr.h" + +#include + +#include +#include +#include + +#include "xe_bo.h" +#include "xe_gt.h" + +struct xe_ttm_sys_node { + struct ttm_buffer_object *tbo; + struct ttm_range_mgr_node base; +}; + +static inline struct xe_ttm_sys_node * +to_xe_ttm_sys_node(struct ttm_resource *res) +{ + return container_of(res, struct xe_ttm_sys_node, base.base); +} + +static int xe_ttm_sys_mgr_new(struct ttm_resource_manager *man, + struct ttm_buffer_object *tbo, + const struct ttm_place *place, + struct ttm_resource **res) +{ + struct xe_ttm_sys_node *node; + int r; + + node = kzalloc(struct_size(node, base.mm_nodes, 1), GFP_KERNEL); + if (!node) + return -ENOMEM; + + node->tbo = tbo; + ttm_resource_init(tbo, place, &node->base.base); + + if (!(place->flags & TTM_PL_FLAG_TEMPORARY) && + ttm_resource_manager_usage(man) > (man->size << PAGE_SHIFT)) { + r = -ENOSPC; + goto err_fini; + } + + node->base.mm_nodes[0].start = 0; + node->base.mm_nodes[0].size = PFN_UP(node->base.base.size); + node->base.base.start = XE_BO_INVALID_OFFSET; + + *res = &node->base.base; + + return 0; + +err_fini: + ttm_resource_fini(man, &node->base.base); + kfree(node); + return r; +} + +static void xe_ttm_sys_mgr_del(struct ttm_resource_manager *man, + struct ttm_resource *res) +{ + struct xe_ttm_sys_node *node = to_xe_ttm_sys_node(res); + + ttm_resource_fini(man, res); + kfree(node); +} + +static void xe_ttm_sys_mgr_debug(struct ttm_resource_manager *man, + struct drm_printer *printer) +{ + +} + +static const struct ttm_resource_manager_func xe_ttm_sys_mgr_func = { + .alloc = xe_ttm_sys_mgr_new, + .free = xe_ttm_sys_mgr_del, + .debug = xe_ttm_sys_mgr_debug +}; + +static void ttm_sys_mgr_fini(struct drm_device *drm, void *arg) +{ + struct xe_device *xe = (struct xe_device *)arg; + struct ttm_resource_manager *man = &xe->mem.sys_mgr; + int err; + + ttm_resource_manager_set_used(man, false); + + err = ttm_resource_manager_evict_all(&xe->ttm, man); + if (err) + return; + + ttm_resource_manager_cleanup(man); + ttm_set_driver_manager(&xe->ttm, XE_PL_TT, NULL); +} + +int xe_ttm_sys_mgr_init(struct xe_device *xe) +{ + struct ttm_resource_manager *man = &xe->mem.sys_mgr; + struct sysinfo si; + u64 gtt_size; + + si_meminfo(&si); + gtt_size = (u64)si.totalram * si.mem_unit * 3/4; + man->use_tt = true; + man->func = &xe_ttm_sys_mgr_func; + ttm_resource_manager_init(man, &xe->ttm, gtt_size >> PAGE_SHIFT); + ttm_set_driver_manager(&xe->ttm, XE_PL_TT, man); + ttm_resource_manager_set_used(man, true); + return drmm_add_action_or_reset(&xe->drm, ttm_sys_mgr_fini, xe); +} diff --git a/drivers/gpu/drm/xe/xe_ttm_sys_mgr.h b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.h new file mode 100644 index 000000000000..e8f5cd395b28 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_TTM_SYS_MGR_H_ +#define _XE_TTM_SYS_MGR_H_ + +struct xe_device; + +int xe_ttm_sys_mgr_init(struct xe_device *xe); + +#endif -- cgit v1.2.3 From 06d06064f725c207a4d14b7410f5498d68c1fb86 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 31 Mar 2023 17:20:59 -0700 Subject: drm/xe/irq: Drop gen3_ prefixes "Gen" terminology should be avoided in the Xe driver and "gen3" refers to platforms that are 9 (!!) graphics generations earlier than the oldest supported by the Xe driver, so this prefix really doesn't make sense. Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230401002106.588656-2-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_irq.c | 42 ++++++++++++++++++++---------------------- 1 file changed, 20 insertions(+), 22 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 9527e7fb9b6e..afaebc0c589e 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -18,7 +18,7 @@ #include "xe_hw_engine.h" #include "xe_mmio.h" -static void gen3_assert_iir_is_zero(struct xe_gt *gt, i915_reg_t reg) +static void assert_iir_is_zero(struct xe_gt *gt, i915_reg_t reg) { u32 val = xe_mmio_read32(gt, reg.reg); @@ -34,24 +34,24 @@ static void gen3_assert_iir_is_zero(struct xe_gt *gt, i915_reg_t reg) xe_mmio_read32(gt, reg.reg); } -static void gen3_irq_init(struct xe_gt *gt, - i915_reg_t imr, u32 imr_val, - i915_reg_t ier, u32 ier_val, - i915_reg_t iir) +static void irq_init(struct xe_gt *gt, + i915_reg_t imr, u32 imr_val, + i915_reg_t ier, u32 ier_val, + i915_reg_t iir) { - gen3_assert_iir_is_zero(gt, iir); + assert_iir_is_zero(gt, iir); xe_mmio_write32(gt, ier.reg, ier_val); xe_mmio_write32(gt, imr.reg, imr_val); xe_mmio_read32(gt, imr.reg); } -#define GEN3_IRQ_INIT(gt, type, imr_val, ier_val) \ - gen3_irq_init((gt), \ - type##IMR, imr_val, \ - type##IER, ier_val, \ - type##IIR) +#define IRQ_INIT(gt, type, imr_val, ier_val) \ + irq_init((gt), \ + type##IMR, imr_val, \ + type##IER, ier_val, \ + type##IIR) -static void gen3_irq_reset(struct xe_gt *gt, i915_reg_t imr, i915_reg_t iir, +static void irq_reset(struct xe_gt *gt, i915_reg_t imr, i915_reg_t iir, i915_reg_t ier) { xe_mmio_write32(gt, imr.reg, 0xffffffff); @@ -65,8 +65,8 @@ static void gen3_irq_reset(struct xe_gt *gt, i915_reg_t imr, i915_reg_t iir, xe_mmio_write32(gt, iir.reg, 0xffffffff); xe_mmio_read32(gt, iir.reg); } -#define GEN3_IRQ_RESET(gt, type) \ - gen3_irq_reset((gt), type##IMR, type##IIR, type##IER) +#define IRQ_RESET(gt, type) \ + irq_reset((gt), type##IMR, type##IIR, type##IER) static u32 gen11_intr_disable(struct xe_gt *gt) { @@ -172,8 +172,7 @@ static void gen11_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) gen11_gt_irq_postinstall(xe, gt); - GEN3_IRQ_INIT(gt, GEN11_GU_MISC_, ~GEN11_GU_MISC_GSE, - GEN11_GU_MISC_GSE); + IRQ_INIT(gt, GEN11_GU_MISC_, ~GEN11_GU_MISC_GSE, GEN11_GU_MISC_GSE); gen11_intr_enable(gt, true); } @@ -332,8 +331,7 @@ static void dg1_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) { gen11_gt_irq_postinstall(xe, gt); - GEN3_IRQ_INIT(gt, GEN11_GU_MISC_, ~GEN11_GU_MISC_GSE, - GEN11_GU_MISC_GSE); + IRQ_INIT(gt, GEN11_GU_MISC_, ~GEN11_GU_MISC_GSE, GEN11_GU_MISC_GSE); if (gt->info.id == XE_GT0) dg1_intr_enable(xe, true); @@ -434,8 +432,8 @@ static void gen11_irq_reset(struct xe_gt *gt) gen11_gt_irq_reset(gt); - GEN3_IRQ_RESET(gt, GEN11_GU_MISC_); - GEN3_IRQ_RESET(gt, GEN8_PCU_); + IRQ_RESET(gt, GEN11_GU_MISC_); + IRQ_RESET(gt, GEN8_PCU_); } static void dg1_irq_reset(struct xe_gt *gt) @@ -445,8 +443,8 @@ static void dg1_irq_reset(struct xe_gt *gt) gen11_gt_irq_reset(gt); - GEN3_IRQ_RESET(gt, GEN11_GU_MISC_); - GEN3_IRQ_RESET(gt, GEN8_PCU_); + IRQ_RESET(gt, GEN11_GU_MISC_); + IRQ_RESET(gt, GEN8_PCU_); } static void xe_irq_reset(struct xe_device *xe) -- cgit v1.2.3 From 9293b67de6602bcf0415da0f3ae3dbf98396183c Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 31 Mar 2023 17:21:00 -0700 Subject: drm/xe/irq: Add helpers to find ISR/IIR/IMR/IER registers For cases where IRQ_INIT and IRQ_RESET are used, the relevant interrupt registers are always consecutive and ordered ISR, IMR, IIR, IER. Adding helpers to look these up from a base offset will let us eliminate some of the CPP pasting and simplify other upcoming patches. v2: - s/_REGS/_OFFSET/ for consistency. (Lucas) - Move IMR/IIR/IER helpers into xe_irq.c; they aren't needed anywhere else. (Lucas) Cc: Lucas De Marchi Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230401002106.588656-3-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_regs.h | 11 ++--------- drivers/gpu/drm/xe/xe_irq.c | 32 ++++++++++++++++++++------------ 2 files changed, 22 insertions(+), 21 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h index 2e7fbdedb5eb..61b6b356c90e 100644 --- a/drivers/gpu/drm/xe/regs/xe_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_regs.h @@ -72,15 +72,8 @@ #define SOFTWARE_FLAGS_SPR33 _MMIO(0x4f084) -#define GEN8_PCU_ISR _MMIO(0x444e0) -#define GEN8_PCU_IMR _MMIO(0x444e4) -#define GEN8_PCU_IIR _MMIO(0x444e8) -#define GEN8_PCU_IER _MMIO(0x444ec) - -#define GEN11_GU_MISC_ISR _MMIO(0x444f0) -#define GEN11_GU_MISC_IMR _MMIO(0x444f4) -#define GEN11_GU_MISC_IIR _MMIO(0x444f8) -#define GEN11_GU_MISC_IER _MMIO(0x444fc) +#define PCU_IRQ_OFFSET 0x444e0 +#define GU_MISC_IRQ_OFFSET 0x444f0 #define GEN11_GU_MISC_GSE (1 << 27) #define GEN11_GFX_MSTR_IRQ _MMIO(0x190010) diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index afaebc0c589e..64e0e74f66a2 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -18,6 +18,14 @@ #include "xe_hw_engine.h" #include "xe_mmio.h" +/* + * Interrupt registers for a unit are always consecutive and ordered + * ISR, IMR, IIR, IER. + */ +#define IMR(offset) _MMIO(offset + 0x4) +#define IIR(offset) _MMIO(offset + 0x8) +#define IER(offset) _MMIO(offset + 0xc) + static void assert_iir_is_zero(struct xe_gt *gt, i915_reg_t reg) { u32 val = xe_mmio_read32(gt, reg.reg); @@ -47,9 +55,9 @@ static void irq_init(struct xe_gt *gt, } #define IRQ_INIT(gt, type, imr_val, ier_val) \ irq_init((gt), \ - type##IMR, imr_val, \ - type##IER, ier_val, \ - type##IIR) + IMR(type), imr_val, \ + IER(type), ier_val, \ + IIR(type)) static void irq_reset(struct xe_gt *gt, i915_reg_t imr, i915_reg_t iir, i915_reg_t ier) @@ -66,7 +74,7 @@ static void irq_reset(struct xe_gt *gt, i915_reg_t imr, i915_reg_t iir, xe_mmio_read32(gt, iir.reg); } #define IRQ_RESET(gt, type) \ - irq_reset((gt), type##IMR, type##IIR, type##IER) + irq_reset((gt), IMR(type), IIR(type), IER(type)) static u32 gen11_intr_disable(struct xe_gt *gt) { @@ -89,9 +97,9 @@ gen11_gu_misc_irq_ack(struct xe_gt *gt, const u32 master_ctl) if (!(master_ctl & GEN11_GU_MISC_IRQ)) return 0; - iir = xe_mmio_read32(gt, GEN11_GU_MISC_IIR.reg); + iir = xe_mmio_read32(gt, IIR(GU_MISC_IRQ_OFFSET).reg); if (likely(iir)) - xe_mmio_write32(gt, GEN11_GU_MISC_IIR.reg, iir); + xe_mmio_write32(gt, IIR(GU_MISC_IRQ_OFFSET).reg, iir); return iir; } @@ -172,7 +180,7 @@ static void gen11_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) gen11_gt_irq_postinstall(xe, gt); - IRQ_INIT(gt, GEN11_GU_MISC_, ~GEN11_GU_MISC_GSE, GEN11_GU_MISC_GSE); + IRQ_INIT(gt, GU_MISC_IRQ_OFFSET, ~GEN11_GU_MISC_GSE, GEN11_GU_MISC_GSE); gen11_intr_enable(gt, true); } @@ -331,7 +339,7 @@ static void dg1_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) { gen11_gt_irq_postinstall(xe, gt); - IRQ_INIT(gt, GEN11_GU_MISC_, ~GEN11_GU_MISC_GSE, GEN11_GU_MISC_GSE); + IRQ_INIT(gt, GU_MISC_IRQ_OFFSET, ~GEN11_GU_MISC_GSE, GEN11_GU_MISC_GSE); if (gt->info.id == XE_GT0) dg1_intr_enable(xe, true); @@ -432,8 +440,8 @@ static void gen11_irq_reset(struct xe_gt *gt) gen11_gt_irq_reset(gt); - IRQ_RESET(gt, GEN11_GU_MISC_); - IRQ_RESET(gt, GEN8_PCU_); + IRQ_RESET(gt, GU_MISC_IRQ_OFFSET); + IRQ_RESET(gt, PCU_IRQ_OFFSET); } static void dg1_irq_reset(struct xe_gt *gt) @@ -443,8 +451,8 @@ static void dg1_irq_reset(struct xe_gt *gt) gen11_gt_irq_reset(gt); - IRQ_RESET(gt, GEN11_GU_MISC_); - IRQ_RESET(gt, GEN8_PCU_); + IRQ_RESET(gt, GU_MISC_IRQ_OFFSET); + IRQ_RESET(gt, PCU_IRQ_OFFSET); } static void xe_irq_reset(struct xe_device *xe) -- cgit v1.2.3 From ca14d553434ed1e1522afb8f37ed7b6fb2b9f043 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 31 Mar 2023 17:21:01 -0700 Subject: drm/xe/irq: Drop IRQ_INIT and IRQ_RESET macros It's no longer necessary to wrap these operations in macros; a simple function will suffice. Also switch to function names that more clearly describe what operation is being performed: unmask_and_enable() and mask_and_disable(). Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230401002106.588656-4-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_irq.c | 63 +++++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 31 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 64e0e74f66a2..202d96182411 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -42,39 +42,40 @@ static void assert_iir_is_zero(struct xe_gt *gt, i915_reg_t reg) xe_mmio_read32(gt, reg.reg); } -static void irq_init(struct xe_gt *gt, - i915_reg_t imr, u32 imr_val, - i915_reg_t ier, u32 ier_val, - i915_reg_t iir) +/* + * Unmask and enable the specified interrupts. Does not check current state, + * so any bits not specified here will become masked and disabled. + */ +static void unmask_and_enable(struct xe_gt *gt, u32 irqregs, u32 bits) { - assert_iir_is_zero(gt, iir); + /* + * If we're just enabling an interrupt now, it shouldn't already + * be raised in the IIR. + */ + assert_iir_is_zero(gt, IIR(irqregs)); + + xe_mmio_write32(gt, IER(irqregs).reg, bits); + xe_mmio_write32(gt, IMR(irqregs).reg, ~bits); - xe_mmio_write32(gt, ier.reg, ier_val); - xe_mmio_write32(gt, imr.reg, imr_val); - xe_mmio_read32(gt, imr.reg); + /* Posting read */ + xe_mmio_read32(gt, IMR(irqregs).reg); } -#define IRQ_INIT(gt, type, imr_val, ier_val) \ - irq_init((gt), \ - IMR(type), imr_val, \ - IER(type), ier_val, \ - IIR(type)) - -static void irq_reset(struct xe_gt *gt, i915_reg_t imr, i915_reg_t iir, - i915_reg_t ier) + +/* Mask and disable all interrupts. */ +static void mask_and_disable(struct xe_gt *gt, u32 irqregs) { - xe_mmio_write32(gt, imr.reg, 0xffffffff); - xe_mmio_read32(gt, imr.reg); + xe_mmio_write32(gt, IMR(irqregs).reg, ~0); + /* Posting read */ + xe_mmio_read32(gt, IMR(irqregs).reg); - xe_mmio_write32(gt, ier.reg, 0); + xe_mmio_write32(gt, IER(irqregs).reg, 0); /* IIR can theoretically queue up two events. Be paranoid. */ - xe_mmio_write32(gt, iir.reg, 0xffffffff); - xe_mmio_read32(gt, iir.reg); - xe_mmio_write32(gt, iir.reg, 0xffffffff); - xe_mmio_read32(gt, iir.reg); + xe_mmio_write32(gt, IIR(irqregs).reg, ~0); + xe_mmio_read32(gt, IIR(irqregs).reg); + xe_mmio_write32(gt, IIR(irqregs).reg, ~0); + xe_mmio_read32(gt, IIR(irqregs).reg); } -#define IRQ_RESET(gt, type) \ - irq_reset((gt), IMR(type), IIR(type), IER(type)) static u32 gen11_intr_disable(struct xe_gt *gt) { @@ -180,7 +181,7 @@ static void gen11_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) gen11_gt_irq_postinstall(xe, gt); - IRQ_INIT(gt, GU_MISC_IRQ_OFFSET, ~GEN11_GU_MISC_GSE, GEN11_GU_MISC_GSE); + unmask_and_enable(gt, GU_MISC_IRQ_OFFSET, GEN11_GU_MISC_GSE); gen11_intr_enable(gt, true); } @@ -339,7 +340,7 @@ static void dg1_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) { gen11_gt_irq_postinstall(xe, gt); - IRQ_INIT(gt, GU_MISC_IRQ_OFFSET, ~GEN11_GU_MISC_GSE, GEN11_GU_MISC_GSE); + unmask_and_enable(gt, GU_MISC_IRQ_OFFSET, GEN11_GU_MISC_GSE); if (gt->info.id == XE_GT0) dg1_intr_enable(xe, true); @@ -440,8 +441,8 @@ static void gen11_irq_reset(struct xe_gt *gt) gen11_gt_irq_reset(gt); - IRQ_RESET(gt, GU_MISC_IRQ_OFFSET); - IRQ_RESET(gt, PCU_IRQ_OFFSET); + mask_and_disable(gt, GU_MISC_IRQ_OFFSET); + mask_and_disable(gt, PCU_IRQ_OFFSET); } static void dg1_irq_reset(struct xe_gt *gt) @@ -451,8 +452,8 @@ static void dg1_irq_reset(struct xe_gt *gt) gen11_gt_irq_reset(gt); - IRQ_RESET(gt, GU_MISC_IRQ_OFFSET); - IRQ_RESET(gt, PCU_IRQ_OFFSET); + mask_and_disable(gt, GU_MISC_IRQ_OFFSET); + mask_and_disable(gt, PCU_IRQ_OFFSET); } static void xe_irq_reset(struct xe_device *xe) -- cgit v1.2.3 From 6b7ece97dd21d2b80a41f6192f89f8848c3b1d76 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 31 Mar 2023 17:21:02 -0700 Subject: drm/xe/irq: Drop unnecessary GEN11_ and GEN12_ register prefixes Any interrupt registers that were introduced by platforms i915 considered to be "gen11" or "gen12" are present on all platforms that the Xe driver supports; drop the unnecessary prefixes. While working in the area, also convert a few open-coded bit manipulations over to REG_BIT and REG_FIELD_GET notation. Cc: Lucas De Marchi Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230401002106.588656-5-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi [Rodrigo: removed display. That was later squashed to the xe Display patch] --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 42 +++++++------- drivers/gpu/drm/xe/regs/xe_regs.h | 12 ++-- drivers/gpu/drm/xe/xe_guc.c | 6 +- drivers/gpu/drm/xe/xe_irq.c | 109 +++++++++++++++++------------------ 4 files changed, 84 insertions(+), 85 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index e33885f429b5..23d3b8f7e349 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -348,34 +348,34 @@ #define GFX_FLSH_CNTL_GEN6 _MMIO(0x101008) #define GFX_FLSH_CNTL_EN (1 << 0) -#define GEN11_GT_INTR_DW(x) _MMIO(0x190018 + ((x) * 4)) +#define GT_INTR_DW(x) _MMIO(0x190018 + ((x) * 4)) -#define GEN11_GUC_SG_INTR_ENABLE _MMIO(0x190038) +#define GUC_SG_INTR_ENABLE _MMIO(0x190038) #define ENGINE1_MASK REG_GENMASK(31, 16) #define ENGINE0_MASK REG_GENMASK(15, 0) -#define GEN11_GPM_WGBOXPERF_INTR_ENABLE _MMIO(0x19003c) +#define GPM_WGBOXPERF_INTR_ENABLE _MMIO(0x19003c) -#define GEN11_INTR_IDENTITY_REG(x) _MMIO(0x190060 + ((x) * 4)) -#define GEN11_INTR_DATA_VALID (1 << 31) -#define GEN11_INTR_ENGINE_INSTANCE(x) (((x) & GENMASK(25, 20)) >> 20) -#define GEN11_INTR_ENGINE_CLASS(x) (((x) & GENMASK(18, 16)) >> 16) -#define GEN11_INTR_ENGINE_INTR(x) ((x) & 0xffff) +#define INTR_IDENTITY_REG(x) _MMIO(0x190060 + ((x) * 4)) +#define INTR_DATA_VALID REG_BIT(31) +#define INTR_ENGINE_INSTANCE(x) REG_FIELD_GET(GENMASK(25, 20), x) +#define INTR_ENGINE_CLASS(x) REG_FIELD_GET(GENMASK(18, 16), x) +#define INTR_ENGINE_INTR(x) REG_FIELD_GET(GENMASK(15, 0), x) #define OTHER_GUC_INSTANCE 0 -#define GEN11_RENDER_COPY_INTR_ENABLE _MMIO(0x190030) -#define GEN11_VCS_VECS_INTR_ENABLE _MMIO(0x190034) -#define GEN12_CCS_RSVD_INTR_ENABLE _MMIO(0x190048) -#define GEN11_IIR_REG_SELECTOR(x) _MMIO(0x190070 + ((x) * 4)) -#define GEN11_RCS0_RSVD_INTR_MASK _MMIO(0x190090) -#define GEN11_BCS_RSVD_INTR_MASK _MMIO(0x1900a0) -#define GEN11_VCS0_VCS1_INTR_MASK _MMIO(0x1900a8) -#define GEN11_VCS2_VCS3_INTR_MASK _MMIO(0x1900ac) -#define GEN11_VECS0_VECS1_INTR_MASK _MMIO(0x1900d0) -#define GEN11_GUC_SG_INTR_MASK _MMIO(0x1900e8) -#define GEN11_GPM_WGBOXPERF_INTR_MASK _MMIO(0x1900ec) -#define GEN12_CCS0_CCS1_INTR_MASK _MMIO(0x190100) -#define GEN12_CCS2_CCS3_INTR_MASK _MMIO(0x190104) +#define RENDER_COPY_INTR_ENABLE _MMIO(0x190030) +#define VCS_VECS_INTR_ENABLE _MMIO(0x190034) +#define CCS_RSVD_INTR_ENABLE _MMIO(0x190048) +#define IIR_REG_SELECTOR(x) _MMIO(0x190070 + ((x) * 4)) +#define RCS0_RSVD_INTR_MASK _MMIO(0x190090) +#define BCS_RSVD_INTR_MASK _MMIO(0x1900a0) +#define VCS0_VCS1_INTR_MASK _MMIO(0x1900a8) +#define VCS2_VCS3_INTR_MASK _MMIO(0x1900ac) +#define VECS0_VECS1_INTR_MASK _MMIO(0x1900d0) +#define GUC_SG_INTR_MASK _MMIO(0x1900e8) +#define GPM_WGBOXPERF_INTR_MASK _MMIO(0x1900ec) +#define CCS0_CCS1_INTR_MASK _MMIO(0x190100) +#define CCS2_CCS3_INTR_MASK _MMIO(0x190104) #define XEHPC_BCS1_BCS2_INTR_MASK _MMIO(0x190110) #define XEHPC_BCS3_BCS4_INTR_MASK _MMIO(0x190114) #define XEHPC_BCS5_BCS6_INTR_MASK _MMIO(0x190118) diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h index 61b6b356c90e..c2a278b25fc9 100644 --- a/drivers/gpu/drm/xe/regs/xe_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_regs.h @@ -74,13 +74,13 @@ #define PCU_IRQ_OFFSET 0x444e0 #define GU_MISC_IRQ_OFFSET 0x444f0 -#define GEN11_GU_MISC_GSE (1 << 27) +#define GU_MISC_GSE REG_BIT(27) -#define GEN11_GFX_MSTR_IRQ _MMIO(0x190010) -#define GEN11_MASTER_IRQ (1 << 31) -#define GEN11_GU_MISC_IRQ (1 << 29) -#define GEN11_DISPLAY_IRQ (1 << 16) -#define GEN11_GT_DW_IRQ(x) (1 << (x)) +#define GFX_MSTR_IRQ _MMIO(0x190010) +#define MASTER_IRQ REG_BIT(31) +#define GU_MISC_IRQ REG_BIT(29) +#define DISPLAY_IRQ REG_BIT(16) +#define GT_DW_IRQ(x) REG_BIT(x) #define DG1_MSTR_TILE_INTR _MMIO(0x190008) #define DG1_MSTR_IRQ REG_BIT(31) diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 58b9841616e4..ee71b969bcbf 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -561,12 +561,12 @@ static void guc_enable_irq(struct xe_guc *guc) REG_FIELD_PREP(ENGINE0_MASK, GUC_INTR_GUC2HOST) : REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST); - xe_mmio_write32(gt, GEN11_GUC_SG_INTR_ENABLE.reg, + xe_mmio_write32(gt, GUC_SG_INTR_ENABLE.reg, REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST)); if (xe_gt_is_media_type(gt)) - xe_mmio_rmw32(gt, GEN11_GUC_SG_INTR_MASK.reg, events, 0); + xe_mmio_rmw32(gt, GUC_SG_INTR_MASK.reg, events, 0); else - xe_mmio_write32(gt, GEN11_GUC_SG_INTR_MASK.reg, ~events); + xe_mmio_write32(gt, GUC_SG_INTR_MASK.reg, ~events); } int xe_guc_enable_communication(struct xe_guc *guc) diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 202d96182411..69963f36dbc7 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -79,7 +79,7 @@ static void mask_and_disable(struct xe_gt *gt, u32 irqregs) static u32 gen11_intr_disable(struct xe_gt *gt) { - xe_mmio_write32(gt, GEN11_GFX_MSTR_IRQ.reg, 0); + xe_mmio_write32(gt, GFX_MSTR_IRQ.reg, 0); /* * Now with master disabled, get a sample of level indications @@ -87,7 +87,7 @@ static u32 gen11_intr_disable(struct xe_gt *gt) * New indications can and will light up during processing, * and will generate new interrupt after enabling master. */ - return xe_mmio_read32(gt, GEN11_GFX_MSTR_IRQ.reg); + return xe_mmio_read32(gt, GFX_MSTR_IRQ.reg); } static u32 @@ -95,7 +95,7 @@ gen11_gu_misc_irq_ack(struct xe_gt *gt, const u32 master_ctl) { u32 iir; - if (!(master_ctl & GEN11_GU_MISC_IRQ)) + if (!(master_ctl & GU_MISC_IRQ)) return 0; iir = xe_mmio_read32(gt, IIR(GU_MISC_IRQ_OFFSET).reg); @@ -107,9 +107,9 @@ gen11_gu_misc_irq_ack(struct xe_gt *gt, const u32 master_ctl) static inline void gen11_intr_enable(struct xe_gt *gt, bool stall) { - xe_mmio_write32(gt, GEN11_GFX_MSTR_IRQ.reg, GEN11_MASTER_IRQ); + xe_mmio_write32(gt, GFX_MSTR_IRQ.reg, MASTER_IRQ); if (stall) - xe_mmio_read32(gt, GEN11_GFX_MSTR_IRQ.reg); + xe_mmio_read32(gt, GFX_MSTR_IRQ.reg); } static void gen11_gt_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) @@ -132,14 +132,14 @@ static void gen11_gt_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) smask = irqs << 16; /* Enable RCS, BCS, VCS and VECS class interrupts. */ - xe_mmio_write32(gt, GEN11_RENDER_COPY_INTR_ENABLE.reg, dmask); - xe_mmio_write32(gt, GEN11_VCS_VECS_INTR_ENABLE.reg, dmask); + xe_mmio_write32(gt, RENDER_COPY_INTR_ENABLE.reg, dmask); + xe_mmio_write32(gt, VCS_VECS_INTR_ENABLE.reg, dmask); if (ccs_mask) - xe_mmio_write32(gt, GEN12_CCS_RSVD_INTR_ENABLE.reg, smask); + xe_mmio_write32(gt, CCS_RSVD_INTR_ENABLE.reg, smask); /* Unmask irqs on RCS, BCS, VCS and VECS engines. */ - xe_mmio_write32(gt, GEN11_RCS0_RSVD_INTR_MASK.reg, ~smask); - xe_mmio_write32(gt, GEN11_BCS_RSVD_INTR_MASK.reg, ~smask); + xe_mmio_write32(gt, RCS0_RSVD_INTR_MASK.reg, ~smask); + xe_mmio_write32(gt, BCS_RSVD_INTR_MASK.reg, ~smask); if (bcs_mask & (BIT(1)|BIT(2))) xe_mmio_write32(gt, XEHPC_BCS1_BCS2_INTR_MASK.reg, ~dmask); if (bcs_mask & (BIT(3)|BIT(4))) @@ -148,31 +148,31 @@ static void gen11_gt_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) xe_mmio_write32(gt, XEHPC_BCS5_BCS6_INTR_MASK.reg, ~dmask); if (bcs_mask & (BIT(7)|BIT(8))) xe_mmio_write32(gt, XEHPC_BCS7_BCS8_INTR_MASK.reg, ~dmask); - xe_mmio_write32(gt, GEN11_VCS0_VCS1_INTR_MASK.reg, ~dmask); - xe_mmio_write32(gt, GEN11_VCS2_VCS3_INTR_MASK.reg, ~dmask); + xe_mmio_write32(gt, VCS0_VCS1_INTR_MASK.reg, ~dmask); + xe_mmio_write32(gt, VCS2_VCS3_INTR_MASK.reg, ~dmask); //if (HAS_ENGINE(gt, VCS4) || HAS_ENGINE(gt, VCS5)) - // intel_uncore_write(uncore, GEN12_VCS4_VCS5_INTR_MASK, ~dmask); + // intel_uncore_write(uncore, VCS4_VCS5_INTR_MASK, ~dmask); //if (HAS_ENGINE(gt, VCS6) || HAS_ENGINE(gt, VCS7)) - // intel_uncore_write(uncore, GEN12_VCS6_VCS7_INTR_MASK, ~dmask); - xe_mmio_write32(gt, GEN11_VECS0_VECS1_INTR_MASK.reg, ~dmask); + // intel_uncore_write(uncore, VCS6_VCS7_INTR_MASK, ~dmask); + xe_mmio_write32(gt, VECS0_VECS1_INTR_MASK.reg, ~dmask); //if (HAS_ENGINE(gt, VECS2) || HAS_ENGINE(gt, VECS3)) - // intel_uncore_write(uncore, GEN12_VECS2_VECS3_INTR_MASK, ~dmask); + // intel_uncore_write(uncore, VECS2_VECS3_INTR_MASK, ~dmask); if (ccs_mask & (BIT(0)|BIT(1))) - xe_mmio_write32(gt, GEN12_CCS0_CCS1_INTR_MASK.reg, ~dmask); + xe_mmio_write32(gt, CCS0_CCS1_INTR_MASK.reg, ~dmask); if (ccs_mask & (BIT(2)|BIT(3))) - xe_mmio_write32(gt, GEN12_CCS2_CCS3_INTR_MASK.reg, ~dmask); + xe_mmio_write32(gt, CCS2_CCS3_INTR_MASK.reg, ~dmask); /* * RPS interrupts will get enabled/disabled on demand when RPS itself * is enabled/disabled. */ /* TODO: gt->pm_ier, gt->pm_imr */ - xe_mmio_write32(gt, GEN11_GPM_WGBOXPERF_INTR_ENABLE.reg, 0); - xe_mmio_write32(gt, GEN11_GPM_WGBOXPERF_INTR_MASK.reg, ~0); + xe_mmio_write32(gt, GPM_WGBOXPERF_INTR_ENABLE.reg, 0); + xe_mmio_write32(gt, GPM_WGBOXPERF_INTR_MASK.reg, ~0); /* Same thing for GuC interrupts */ - xe_mmio_write32(gt, GEN11_GUC_SG_INTR_ENABLE.reg, 0); - xe_mmio_write32(gt, GEN11_GUC_SG_INTR_MASK.reg, ~0); + xe_mmio_write32(gt, GUC_SG_INTR_ENABLE.reg, 0); + xe_mmio_write32(gt, GUC_SG_INTR_MASK.reg, ~0); } static void gen11_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) @@ -181,7 +181,7 @@ static void gen11_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) gen11_gt_irq_postinstall(xe, gt); - unmask_and_enable(gt, GU_MISC_IRQ_OFFSET, GEN11_GU_MISC_GSE); + unmask_and_enable(gt, GU_MISC_IRQ_OFFSET, GU_MISC_GSE); gen11_intr_enable(gt, true); } @@ -197,7 +197,7 @@ gen11_gt_engine_identity(struct xe_device *xe, lockdep_assert_held(&xe->irq.lock); - xe_mmio_write32(gt, GEN11_IIR_REG_SELECTOR(bank).reg, BIT(bit)); + xe_mmio_write32(gt, IIR_REG_SELECTOR(bank).reg, BIT(bit)); /* * NB: Specs do not specify how long to spin wait, @@ -205,18 +205,17 @@ gen11_gt_engine_identity(struct xe_device *xe, */ timeout_ts = (local_clock() >> 10) + 100; do { - ident = xe_mmio_read32(gt, GEN11_INTR_IDENTITY_REG(bank).reg); - } while (!(ident & GEN11_INTR_DATA_VALID) && + ident = xe_mmio_read32(gt, INTR_IDENTITY_REG(bank).reg); + } while (!(ident & INTR_DATA_VALID) && !time_after32(local_clock() >> 10, timeout_ts)); - if (unlikely(!(ident & GEN11_INTR_DATA_VALID))) { + if (unlikely(!(ident & INTR_DATA_VALID))) { drm_err(&xe->drm, "INTR_IDENTITY_REG%u:%u 0x%08x not valid!\n", bank, bit, ident); return 0; } - xe_mmio_write32(gt, GEN11_INTR_IDENTITY_REG(bank).reg, - GEN11_INTR_DATA_VALID); + xe_mmio_write32(gt, INTR_IDENTITY_REG(bank).reg, INTR_DATA_VALID); return ident; } @@ -250,24 +249,24 @@ static void gen11_gt_irq_handler(struct xe_device *xe, struct xe_gt *gt, spin_lock(&xe->irq.lock); for (bank = 0; bank < 2; bank++) { - if (!(master_ctl & GEN11_GT_DW_IRQ(bank))) + if (!(master_ctl & GT_DW_IRQ(bank))) continue; if (!xe_gt_is_media_type(gt)) { intr_dw[bank] = - xe_mmio_read32(gt, GEN11_GT_INTR_DW(bank).reg); + xe_mmio_read32(gt, GT_INTR_DW(bank).reg); for_each_set_bit(bit, intr_dw + bank, 32) identity[bit] = gen11_gt_engine_identity(xe, gt, bank, bit); - xe_mmio_write32(gt, GEN11_GT_INTR_DW(bank).reg, + xe_mmio_write32(gt, GT_INTR_DW(bank).reg, intr_dw[bank]); } for_each_set_bit(bit, intr_dw + bank, 32) { - class = GEN11_INTR_ENGINE_CLASS(identity[bit]); - instance = GEN11_INTR_ENGINE_INSTANCE(identity[bit]); - intr_vec = GEN11_INTR_ENGINE_INTR(identity[bit]); + class = INTR_ENGINE_CLASS(identity[bit]); + instance = INTR_ENGINE_INSTANCE(identity[bit]); + intr_vec = INTR_ENGINE_INTR(identity[bit]); if (class == XE_ENGINE_CLASS_OTHER) { gen11_gt_other_irq_handler(gt, instance, @@ -340,7 +339,7 @@ static void dg1_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) { gen11_gt_irq_postinstall(xe, gt); - unmask_and_enable(gt, GU_MISC_IRQ_OFFSET, GEN11_GU_MISC_GSE); + unmask_and_enable(gt, GU_MISC_IRQ_OFFSET, GU_MISC_GSE); if (gt->info.id == XE_GT0) dg1_intr_enable(xe, true); @@ -368,7 +367,7 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg) continue; if (!xe_gt_is_media_type(gt)) - master_ctl = xe_mmio_read32(gt, GEN11_GFX_MSTR_IRQ.reg); + master_ctl = xe_mmio_read32(gt, GFX_MSTR_IRQ.reg); /* * We might be in irq handler just when PCIe DPC is initiated @@ -382,7 +381,7 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg) } if (!xe_gt_is_media_type(gt)) - xe_mmio_write32(gt, GEN11_GFX_MSTR_IRQ.reg, master_ctl); + xe_mmio_write32(gt, GFX_MSTR_IRQ.reg, master_ctl); gen11_gt_irq_handler(xe, gt, master_ctl, intr_dw, identity); } @@ -399,14 +398,14 @@ static void gen11_gt_irq_reset(struct xe_gt *gt) u32 bcs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COPY); /* Disable RCS, BCS, VCS and VECS class engines. */ - xe_mmio_write32(gt, GEN11_RENDER_COPY_INTR_ENABLE.reg, 0); - xe_mmio_write32(gt, GEN11_VCS_VECS_INTR_ENABLE.reg, 0); + xe_mmio_write32(gt, RENDER_COPY_INTR_ENABLE.reg, 0); + xe_mmio_write32(gt, VCS_VECS_INTR_ENABLE.reg, 0); if (ccs_mask) - xe_mmio_write32(gt, GEN12_CCS_RSVD_INTR_ENABLE.reg, 0); + xe_mmio_write32(gt, CCS_RSVD_INTR_ENABLE.reg, 0); /* Restore masks irqs on RCS, BCS, VCS and VECS engines. */ - xe_mmio_write32(gt, GEN11_RCS0_RSVD_INTR_MASK.reg, ~0); - xe_mmio_write32(gt, GEN11_BCS_RSVD_INTR_MASK.reg, ~0); + xe_mmio_write32(gt, RCS0_RSVD_INTR_MASK.reg, ~0); + xe_mmio_write32(gt, BCS_RSVD_INTR_MASK.reg, ~0); if (bcs_mask & (BIT(1)|BIT(2))) xe_mmio_write32(gt, XEHPC_BCS1_BCS2_INTR_MASK.reg, ~0); if (bcs_mask & (BIT(3)|BIT(4))) @@ -415,24 +414,24 @@ static void gen11_gt_irq_reset(struct xe_gt *gt) xe_mmio_write32(gt, XEHPC_BCS5_BCS6_INTR_MASK.reg, ~0); if (bcs_mask & (BIT(7)|BIT(8))) xe_mmio_write32(gt, XEHPC_BCS7_BCS8_INTR_MASK.reg, ~0); - xe_mmio_write32(gt, GEN11_VCS0_VCS1_INTR_MASK.reg, ~0); - xe_mmio_write32(gt, GEN11_VCS2_VCS3_INTR_MASK.reg, ~0); + xe_mmio_write32(gt, VCS0_VCS1_INTR_MASK.reg, ~0); + xe_mmio_write32(gt, VCS2_VCS3_INTR_MASK.reg, ~0); // if (HAS_ENGINE(gt, VCS4) || HAS_ENGINE(gt, VCS5)) -// xe_mmio_write32(xe, GEN12_VCS4_VCS5_INTR_MASK.reg, ~0); +// xe_mmio_write32(xe, VCS4_VCS5_INTR_MASK.reg, ~0); // if (HAS_ENGINE(gt, VCS6) || HAS_ENGINE(gt, VCS7)) -// xe_mmio_write32(xe, GEN12_VCS6_VCS7_INTR_MASK.reg, ~0); - xe_mmio_write32(gt, GEN11_VECS0_VECS1_INTR_MASK.reg, ~0); +// xe_mmio_write32(xe, VCS6_VCS7_INTR_MASK.reg, ~0); + xe_mmio_write32(gt, VECS0_VECS1_INTR_MASK.reg, ~0); // if (HAS_ENGINE(gt, VECS2) || HAS_ENGINE(gt, VECS3)) -// xe_mmio_write32(xe, GEN12_VECS2_VECS3_INTR_MASK.reg, ~0); +// xe_mmio_write32(xe, VECS2_VECS3_INTR_MASK.reg, ~0); if (ccs_mask & (BIT(0)|BIT(1))) - xe_mmio_write32(gt, GEN12_CCS0_CCS1_INTR_MASK.reg, ~0); + xe_mmio_write32(gt, CCS0_CCS1_INTR_MASK.reg, ~0); if (ccs_mask & (BIT(2)|BIT(3))) - xe_mmio_write32(gt, GEN12_CCS2_CCS3_INTR_MASK.reg, ~0); + xe_mmio_write32(gt, CCS2_CCS3_INTR_MASK.reg, ~0); - xe_mmio_write32(gt, GEN11_GPM_WGBOXPERF_INTR_ENABLE.reg, 0); - xe_mmio_write32(gt, GEN11_GPM_WGBOXPERF_INTR_MASK.reg, ~0); - xe_mmio_write32(gt, GEN11_GUC_SG_INTR_ENABLE.reg, 0); - xe_mmio_write32(gt, GEN11_GUC_SG_INTR_MASK.reg, ~0); + xe_mmio_write32(gt, GPM_WGBOXPERF_INTR_ENABLE.reg, 0); + xe_mmio_write32(gt, GPM_WGBOXPERF_INTR_MASK.reg, ~0); + xe_mmio_write32(gt, GUC_SG_INTR_ENABLE.reg, 0); + xe_mmio_write32(gt, GUC_SG_INTR_MASK.reg, ~0); } static void gen11_irq_reset(struct xe_gt *gt) -- cgit v1.2.3 From dd12b0ff2cf29904194bc8a5f0a8bc7a2b7041fa Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 31 Mar 2023 17:21:03 -0700 Subject: drm/xe/irq: Rename and clarify top-level interrupt handling routines Platforms supported by the Xe driver handle top-level interrupts in one of two ways: - Xe_LP platforms only have a "graphics master" register and lack a "master tile" register, so top-level interrupt detection and enable/disable happens in the graphics master. - Xe_LP+ (aka DG1) and beyond have a "master tile" interrupt register that controls the enable/disable of top-level interrupts and must also be consulted to determine which tiles have received interrupts before the driver moves on the process the graphics master register. For functions that are only relevant to the first set of platforms, rename the function prefix to Xe_LP since "gen11" doesn't make sense in the Xe driver. Also add some comments briefly describing the two top-level handlers. Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230401002106.588656-6-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_irq.c | 46 +++++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 22 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 69963f36dbc7..4bdcccda7169 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -77,7 +77,7 @@ static void mask_and_disable(struct xe_gt *gt, u32 irqregs) xe_mmio_read32(gt, IIR(irqregs).reg); } -static u32 gen11_intr_disable(struct xe_gt *gt) +static u32 xelp_intr_disable(struct xe_gt *gt) { xe_mmio_write32(gt, GFX_MSTR_IRQ.reg, 0); @@ -105,7 +105,7 @@ gen11_gu_misc_irq_ack(struct xe_gt *gt, const u32 master_ctl) return iir; } -static inline void gen11_intr_enable(struct xe_gt *gt, bool stall) +static inline void xelp_intr_enable(struct xe_gt *gt, bool stall) { xe_mmio_write32(gt, GFX_MSTR_IRQ.reg, MASTER_IRQ); if (stall) @@ -175,7 +175,7 @@ static void gen11_gt_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) xe_mmio_write32(gt, GUC_SG_INTR_MASK.reg, ~0); } -static void gen11_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) +static void xelp_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) { /* TODO: PCH */ @@ -183,7 +183,7 @@ static void gen11_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) unmask_and_enable(gt, GU_MISC_IRQ_OFFSET, GU_MISC_GSE); - gen11_intr_enable(gt, true); + xelp_intr_enable(gt, true); } static u32 @@ -285,7 +285,11 @@ static void gen11_gt_irq_handler(struct xe_device *xe, struct xe_gt *gt, spin_unlock(&xe->irq.lock); } -static irqreturn_t gen11_irq_handler(int irq, void *arg) +/* + * Top-level interrupt handler for Xe_LP platforms (which did not have + * a "master tile" interrupt register. + */ +static irqreturn_t xelp_irq_handler(int irq, void *arg) { struct xe_device *xe = arg; struct xe_gt *gt = xe_device_get_gt(xe, 0); /* Only 1 GT here */ @@ -293,9 +297,9 @@ static irqreturn_t gen11_irq_handler(int irq, void *arg) long unsigned int intr_dw[2]; u32 identity[32]; - master_ctl = gen11_intr_disable(gt); + master_ctl = xelp_intr_disable(gt); if (!master_ctl) { - gen11_intr_enable(gt, false); + xelp_intr_enable(gt, false); return IRQ_NONE; } @@ -303,7 +307,7 @@ static irqreturn_t gen11_irq_handler(int irq, void *arg) gu_misc_iir = gen11_gu_misc_irq_ack(gt, master_ctl); - gen11_intr_enable(gt, false); + xelp_intr_enable(gt, false); return IRQ_HANDLED; } @@ -345,6 +349,11 @@ static void dg1_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) dg1_intr_enable(xe, true); } +/* + * Top-level interrupt handler for Xe_LP+ and beyond. These platforms have + * a "master tile" interrupt register which must be consulted before the + * "graphics master" interrupt register. + */ static irqreturn_t dg1_irq_handler(int irq, void *arg) { struct xe_device *xe = arg; @@ -434,9 +443,9 @@ static void gen11_gt_irq_reset(struct xe_gt *gt) xe_mmio_write32(gt, GUC_SG_INTR_MASK.reg, ~0); } -static void gen11_irq_reset(struct xe_gt *gt) +static void xelp_irq_reset(struct xe_gt *gt) { - gen11_intr_disable(gt); + xelp_intr_disable(gt); gen11_gt_irq_reset(gt); @@ -461,13 +470,10 @@ static void xe_irq_reset(struct xe_device *xe) u8 id; for_each_gt(gt, xe, id) { - if (GRAPHICS_VERx100(xe) >= 1210) { + if (GRAPHICS_VERx100(xe) >= 1210) dg1_irq_reset(gt); - } else if (GRAPHICS_VER(xe) >= 11) { - gen11_irq_reset(gt); - } else { - drm_err(&xe->drm, "No interrupt reset hook"); - } + else + xelp_irq_reset(gt); } } @@ -477,10 +483,8 @@ void xe_gt_irq_postinstall(struct xe_gt *gt) if (GRAPHICS_VERx100(xe) >= 1210) dg1_irq_postinstall(xe, gt); - else if (GRAPHICS_VER(xe) >= 11) - gen11_irq_postinstall(xe, gt); else - drm_err(&xe->drm, "No interrupt postinstall hook"); + xelp_irq_postinstall(xe, gt); } static void xe_irq_postinstall(struct xe_device *xe) @@ -496,10 +500,8 @@ static irq_handler_t xe_irq_handler(struct xe_device *xe) { if (GRAPHICS_VERx100(xe) >= 1210) { return dg1_irq_handler; - } else if (GRAPHICS_VER(xe) >= 11) { - return gen11_irq_handler; } else { - return NULL; + return xelp_irq_handler; } } -- cgit v1.2.3 From c94cd8f2d2784dff57581389f59d3051bc312fc2 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 31 Mar 2023 17:21:04 -0700 Subject: drm/xe/irq: Drop remaining "gen11_" prefix from IRQ functions The remaining "gen11_*" IRQ functions are common to all platforms supported by the Xe driver. Drop the unnecessary prefix. Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230401002106.588656-7-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_irq.c | 46 ++++++++++++++++++++++----------------------- 1 file changed, 22 insertions(+), 24 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 4bdcccda7169..f5b038cb1860 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -91,7 +91,7 @@ static u32 xelp_intr_disable(struct xe_gt *gt) } static u32 -gen11_gu_misc_irq_ack(struct xe_gt *gt, const u32 master_ctl) +gu_misc_irq_ack(struct xe_gt *gt, const u32 master_ctl) { u32 iir; @@ -112,7 +112,7 @@ static inline void xelp_intr_enable(struct xe_gt *gt, bool stall) xe_mmio_read32(gt, GFX_MSTR_IRQ.reg); } -static void gen11_gt_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) +static void gt_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) { u32 irqs, dmask, smask; u32 ccs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COMPUTE); @@ -179,7 +179,7 @@ static void xelp_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) { /* TODO: PCH */ - gen11_gt_irq_postinstall(xe, gt); + gt_irq_postinstall(xe, gt); unmask_and_enable(gt, GU_MISC_IRQ_OFFSET, GU_MISC_GSE); @@ -187,10 +187,10 @@ static void xelp_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) } static u32 -gen11_gt_engine_identity(struct xe_device *xe, - struct xe_gt *gt, - const unsigned int bank, - const unsigned int bit) +gt_engine_identity(struct xe_device *xe, + struct xe_gt *gt, + const unsigned int bank, + const unsigned int bit) { u32 timeout_ts; u32 ident; @@ -223,7 +223,7 @@ gen11_gt_engine_identity(struct xe_device *xe, #define OTHER_MEDIA_GUC_INSTANCE 16 static void -gen11_gt_other_irq_handler(struct xe_gt *gt, const u8 instance, const u16 iir) +gt_other_irq_handler(struct xe_gt *gt, const u8 instance, const u16 iir) { if (instance == OTHER_GUC_INSTANCE && !xe_gt_is_media_type(gt)) return xe_guc_irq_handler(>->uc.guc, iir); @@ -237,9 +237,9 @@ gen11_gt_other_irq_handler(struct xe_gt *gt, const u8 instance, const u16 iir) } } -static void gen11_gt_irq_handler(struct xe_device *xe, struct xe_gt *gt, - u32 master_ctl, long unsigned int *intr_dw, - u32 *identity) +static void gt_irq_handler(struct xe_device *xe, struct xe_gt *gt, + u32 master_ctl, long unsigned int *intr_dw, + u32 *identity) { unsigned int bank, bit; u16 instance, intr_vec; @@ -256,9 +256,8 @@ static void gen11_gt_irq_handler(struct xe_device *xe, struct xe_gt *gt, intr_dw[bank] = xe_mmio_read32(gt, GT_INTR_DW(bank).reg); for_each_set_bit(bit, intr_dw + bank, 32) - identity[bit] = gen11_gt_engine_identity(xe, gt, - bank, - bit); + identity[bit] = gt_engine_identity(xe, gt, + bank, bit); xe_mmio_write32(gt, GT_INTR_DW(bank).reg, intr_dw[bank]); } @@ -269,8 +268,7 @@ static void gen11_gt_irq_handler(struct xe_device *xe, struct xe_gt *gt, intr_vec = INTR_ENGINE_INTR(identity[bit]); if (class == XE_ENGINE_CLASS_OTHER) { - gen11_gt_other_irq_handler(gt, instance, - intr_vec); + gt_other_irq_handler(gt, instance, intr_vec); continue; } @@ -303,9 +301,9 @@ static irqreturn_t xelp_irq_handler(int irq, void *arg) return IRQ_NONE; } - gen11_gt_irq_handler(xe, gt, master_ctl, intr_dw, identity); + gt_irq_handler(xe, gt, master_ctl, intr_dw, identity); - gu_misc_iir = gen11_gu_misc_irq_ack(gt, master_ctl); + gu_misc_iir = gu_misc_irq_ack(gt, master_ctl); xelp_intr_enable(gt, false); @@ -341,7 +339,7 @@ static void dg1_intr_enable(struct xe_device *xe, bool stall) static void dg1_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) { - gen11_gt_irq_postinstall(xe, gt); + gt_irq_postinstall(xe, gt); unmask_and_enable(gt, GU_MISC_IRQ_OFFSET, GU_MISC_GSE); @@ -391,17 +389,17 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg) if (!xe_gt_is_media_type(gt)) xe_mmio_write32(gt, GFX_MSTR_IRQ.reg, master_ctl); - gen11_gt_irq_handler(xe, gt, master_ctl, intr_dw, identity); + gt_irq_handler(xe, gt, master_ctl, intr_dw, identity); } - gu_misc_iir = gen11_gu_misc_irq_ack(gt, master_ctl); + gu_misc_iir = gu_misc_irq_ack(gt, master_ctl); dg1_intr_enable(xe, false); return IRQ_HANDLED; } -static void gen11_gt_irq_reset(struct xe_gt *gt) +static void gt_irq_reset(struct xe_gt *gt) { u32 ccs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COMPUTE); u32 bcs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COPY); @@ -447,7 +445,7 @@ static void xelp_irq_reset(struct xe_gt *gt) { xelp_intr_disable(gt); - gen11_gt_irq_reset(gt); + gt_irq_reset(gt); mask_and_disable(gt, GU_MISC_IRQ_OFFSET); mask_and_disable(gt, PCU_IRQ_OFFSET); @@ -458,7 +456,7 @@ static void dg1_irq_reset(struct xe_gt *gt) if (gt->info.id == 0) dg1_intr_disable(gt_to_xe(gt)); - gen11_gt_irq_reset(gt); + gt_irq_reset(gt); mask_and_disable(gt, GU_MISC_IRQ_OFFSET); mask_and_disable(gt, PCU_IRQ_OFFSET); -- cgit v1.2.3 From bf26d6984c28f319eeca22bc8b76399e93613dea Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 31 Mar 2023 17:21:05 -0700 Subject: drm/xe/irq: Drop commented-out code for non-existent media engines Although the hardware team has set aside some register bits for extra media engines, no platform supported by the Xe driver today has VCS4-7 or VECS2-3. Drop the corresponding code (which was already commented out); we can bring it back easily enough if such engines show up on a future platform. Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230401002106.588656-8-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_irq.c | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index f5b038cb1860..62ecd71be063 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -150,13 +150,7 @@ static void gt_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) xe_mmio_write32(gt, XEHPC_BCS7_BCS8_INTR_MASK.reg, ~dmask); xe_mmio_write32(gt, VCS0_VCS1_INTR_MASK.reg, ~dmask); xe_mmio_write32(gt, VCS2_VCS3_INTR_MASK.reg, ~dmask); - //if (HAS_ENGINE(gt, VCS4) || HAS_ENGINE(gt, VCS5)) - // intel_uncore_write(uncore, VCS4_VCS5_INTR_MASK, ~dmask); - //if (HAS_ENGINE(gt, VCS6) || HAS_ENGINE(gt, VCS7)) - // intel_uncore_write(uncore, VCS6_VCS7_INTR_MASK, ~dmask); xe_mmio_write32(gt, VECS0_VECS1_INTR_MASK.reg, ~dmask); - //if (HAS_ENGINE(gt, VECS2) || HAS_ENGINE(gt, VECS3)) - // intel_uncore_write(uncore, VECS2_VECS3_INTR_MASK, ~dmask); if (ccs_mask & (BIT(0)|BIT(1))) xe_mmio_write32(gt, CCS0_CCS1_INTR_MASK.reg, ~dmask); if (ccs_mask & (BIT(2)|BIT(3))) @@ -423,13 +417,7 @@ static void gt_irq_reset(struct xe_gt *gt) xe_mmio_write32(gt, XEHPC_BCS7_BCS8_INTR_MASK.reg, ~0); xe_mmio_write32(gt, VCS0_VCS1_INTR_MASK.reg, ~0); xe_mmio_write32(gt, VCS2_VCS3_INTR_MASK.reg, ~0); -// if (HAS_ENGINE(gt, VCS4) || HAS_ENGINE(gt, VCS5)) -// xe_mmio_write32(xe, VCS4_VCS5_INTR_MASK.reg, ~0); -// if (HAS_ENGINE(gt, VCS6) || HAS_ENGINE(gt, VCS7)) -// xe_mmio_write32(xe, VCS6_VCS7_INTR_MASK.reg, ~0); xe_mmio_write32(gt, VECS0_VECS1_INTR_MASK.reg, ~0); -// if (HAS_ENGINE(gt, VECS2) || HAS_ENGINE(gt, VECS3)) -// xe_mmio_write32(xe, VECS2_VECS3_INTR_MASK.reg, ~0); if (ccs_mask & (BIT(0)|BIT(1))) xe_mmio_write32(gt, CCS0_CCS1_INTR_MASK.reg, ~0); if (ccs_mask & (BIT(2)|BIT(3))) -- cgit v1.2.3 From b73d520b3d0ff559da7e15a49ef12a591c61105a Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 31 Mar 2023 17:21:06 -0700 Subject: drm/xe/irq: Don't clobber display interrupts on multi-tile platforms Although our only multi-tile platform today (PVC) doesn't support display, it's possible that some future multi-tile platform will. If/when this happens, display interrupts (both traditional display and ASLE backlight interrupts raised as a Gunit interrupt) should be delivered to the primary tile. Save away tile0's master_ctl value so that it can still be used for display interrupt handling after the GT loop. Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230401002106.588656-9-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_irq.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 62ecd71be063..e812a5b66a6b 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -350,7 +350,7 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg) { struct xe_device *xe = arg; struct xe_gt *gt; - u32 master_tile_ctl, master_ctl = 0, gu_misc_iir; + u32 master_tile_ctl, master_ctl = 0, tile0_master_ctl = 0, gu_misc_iir; long unsigned int intr_dw[2]; u32 identity[32]; u8 id; @@ -384,9 +384,17 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg) if (!xe_gt_is_media_type(gt)) xe_mmio_write32(gt, GFX_MSTR_IRQ.reg, master_ctl); gt_irq_handler(xe, gt, master_ctl, intr_dw, identity); + + /* + * Save primary tile's master interrupt register for display + * processing below. + */ + if (id == 0) + tile0_master_ctl = master_ctl; } - gu_misc_iir = gu_misc_irq_ack(gt, master_ctl); + /* Gunit GSE interrupts can trigger display backlight operations */ + gu_misc_iir = gu_misc_irq_ack(gt, tile0_master_ctl); dg1_intr_enable(xe, false); -- cgit v1.2.3 From d19ad0e80ebe3da48dc8122d6beca9d3d35df454 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Sat, 1 Apr 2023 01:51:45 -0700 Subject: drm/xe: Extract function to initialize xe->info Extract the part setting up from xe->info from xe_pci_probe() into its own function. This pairs nicely with the display counterpart, avoids info initialization to be placed elsewhere and helps future improvements to build fake devices for tests. While at it, normalize the names a little bit: the _get() suffix may be mistaken by lock-related operation, so rename the function to "find_subplatform()". Also rename the variable to subplatform_desc to make it easier to understand, even if longer. Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230401085151.1786204-2-lucas.demarchi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pci.c | 98 +++++++++++++++++++++++++-------------------- 1 file changed, 54 insertions(+), 44 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index e7bfcc5f51c2..ebd27b917c59 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -343,7 +343,7 @@ static bool id_blocked(u16 device_id) } static const struct xe_subplatform_desc * -subplatform_get(const struct xe_device *xe, const struct xe_device_desc *desc) +find_subplatform(const struct xe_device *xe, const struct xe_device_desc *desc) { const struct xe_subplatform_desc *sp; const u16 *id; @@ -356,49 +356,12 @@ subplatform_get(const struct xe_device *xe, const struct xe_device_desc *desc) return NULL; } -static void xe_pci_remove(struct pci_dev *pdev) +static void xe_info_init(struct xe_device *xe, + const struct xe_device_desc *desc, + const struct xe_subplatform_desc *subplatform_desc) { - struct xe_device *xe; - - xe = pci_get_drvdata(pdev); - if (!xe) /* driver load aborted, nothing to cleanup */ - return; - - xe_device_remove(xe); - xe_pm_runtime_fini(xe); - pci_set_drvdata(pdev, NULL); -} - -static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) -{ - const struct xe_device_desc *desc = (void *)ent->driver_data; - const struct xe_subplatform_desc *spd; - struct xe_device *xe; struct xe_gt *gt; u8 id; - int err; - - if (desc->require_force_probe && !id_forced(pdev->device)) { - dev_info(&pdev->dev, - "Your graphics device %04x is not officially supported\n" - "by xe driver in this kernel version. To force Xe probe,\n" - "use xe.force_probe='%04x' and i915.force_probe='!%04x'\n" - "module parameters or CONFIG_DRM_XE_FORCE_PROBE='%04x' and\n" - "CONFIG_DRM_I915_FORCE_PROBE='!%04x' configuration options.\n", - pdev->device, pdev->device, pdev->device, - pdev->device, pdev->device); - return -ENODEV; - } - - if (id_blocked(pdev->device)) { - dev_info(&pdev->dev, "Probe blocked for device [%04x:%04x].\n", - pdev->vendor, pdev->device); - return -ENODEV; - } - - xe = xe_device_create(pdev, ent); - if (IS_ERR(xe)) - return PTR_ERR(xe); xe->info.graphics_verx100 = desc->graphics_ver * 100 + desc->graphics_rel; @@ -417,8 +380,8 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) xe->info.has_range_tlb_invalidation = desc->has_range_tlb_invalidation; xe->info.has_link_copy_engine = desc->has_link_copy_engine; - spd = subplatform_get(xe, desc); - xe->info.subplatform = spd ? spd->subplatform : XE_SUBPLATFORM_NONE; + xe->info.subplatform = subplatform_desc ? + subplatform_desc->subplatform : XE_SUBPLATFORM_NONE; xe->info.step = xe_step_get(xe); for (id = 0; id < xe->info.tile_count; ++id) { @@ -443,9 +406,56 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) desc->extra_gts[id - 1].mmio_adj_offset; } } +} + +static void xe_pci_remove(struct pci_dev *pdev) +{ + struct xe_device *xe; + + xe = pci_get_drvdata(pdev); + if (!xe) /* driver load aborted, nothing to cleanup */ + return; + + xe_device_remove(xe); + xe_pm_runtime_fini(xe); + pci_set_drvdata(pdev, NULL); +} + +static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + const struct xe_device_desc *desc = (void *)ent->driver_data; + const struct xe_subplatform_desc *subplatform_desc; + struct xe_device *xe; + int err; + + if (desc->require_force_probe && !id_forced(pdev->device)) { + dev_info(&pdev->dev, + "Your graphics device %04x is not officially supported\n" + "by xe driver in this kernel version. To force Xe probe,\n" + "use xe.force_probe='%04x' and i915.force_probe='!%04x'\n" + "module parameters or CONFIG_DRM_XE_FORCE_PROBE='%04x' and\n" + "CONFIG_DRM_I915_FORCE_PROBE='!%04x' configuration options.\n", + pdev->device, pdev->device, pdev->device, + pdev->device, pdev->device); + return -ENODEV; + } + + if (id_blocked(pdev->device)) { + dev_info(&pdev->dev, "Probe blocked for device [%04x:%04x].\n", + pdev->vendor, pdev->device); + return -ENODEV; + } + + xe = xe_device_create(pdev, ent); + if (IS_ERR(xe)) + return PTR_ERR(xe); + + subplatform_desc = find_subplatform(xe, desc); + xe_info_init(xe, desc, subplatform_desc); drm_dbg(&xe->drm, "%s %s %04x:%04x dgfx:%d gfx100:%d media100:%d dma_m_s:%d tc:%d", - desc->platform_name, spd ? spd->name : "", + desc->platform_name, + subplatform_desc ? subplatform_desc->name : "", xe->info.devid, xe->info.revid, xe->info.is_dgfx, xe->info.graphics_verx100, xe->info.media_verx100, -- cgit v1.2.3 From af049be5a33e12fb993028eb378fd61545e72f5e Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Sat, 1 Apr 2023 01:51:46 -0700 Subject: drm/xe: Move test infra out of xe_pci.[ch] Move code out of xe_pci.[ch] into tests/*.[ch], like is done in other similar compilation units. Even if this is not part of "tests for xe_pci.c", they are functions exported and required by other tests. It's better not to clutter the module headers and sources with them. Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230401085151.1786204-3-lucas.demarchi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_bo.c | 2 ++ drivers/gpu/drm/xe/tests/xe_dma_buf.c | 1 + drivers/gpu/drm/xe/tests/xe_migrate.c | 1 + drivers/gpu/drm/xe/tests/xe_pci.c | 62 ++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/tests/xe_pci_test.h | 15 ++++++++ drivers/gpu/drm/xe/xe_pci.c | 52 +--------------------------- drivers/gpu/drm/xe/xe_pci.h | 9 ----- 7 files changed, 82 insertions(+), 60 deletions(-) create mode 100644 drivers/gpu/drm/xe/tests/xe_pci.c create mode 100644 drivers/gpu/drm/xe/tests/xe_pci_test.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index 3c60cbdf516c..aa433a7b59b7 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -6,6 +6,8 @@ #include #include "tests/xe_bo_test.h" +#include "tests/xe_pci_test.h" +#include "tests/xe_test.h" #include "xe_bo_evict.h" #include "xe_pci.h" diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c index e66a8361ae1f..cf9dddf1a8d7 100644 --- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c @@ -6,6 +6,7 @@ #include #include "tests/xe_dma_buf_test.h" +#include "tests/xe_pci_test.h" #include "xe_pci.h" diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index 862d11b2210f..eeb6c3be2e37 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -6,6 +6,7 @@ #include #include "tests/xe_migrate_test.h" +#include "tests/xe_pci_test.h" #include "xe_pci.h" diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c new file mode 100644 index 000000000000..643bddb35214 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_pci.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 AND MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include "tests/xe_pci_test.h" + +#include "tests/xe_test.h" + +#include + +struct kunit_test_data { + int ndevs; + xe_device_fn xe_fn; +}; + +static int dev_to_xe_device_fn(struct device *dev, void *__data) + +{ + struct drm_device *drm = dev_get_drvdata(dev); + struct kunit_test_data *data = __data; + int ret = 0; + int idx; + + data->ndevs++; + + if (drm_dev_enter(drm, &idx)) + ret = data->xe_fn(to_xe_device(dev_get_drvdata(dev))); + drm_dev_exit(idx); + + return ret; +} + +/** + * xe_call_for_each_device - Iterate over all devices this driver binds to + * @xe_fn: Function to call for each device. + * + * This function iterated over all devices this driver binds to, and calls + * @xe_fn: for each one of them. If the called function returns anything else + * than 0, iteration is stopped and the return value is returned by this + * function. Across each function call, drm_dev_enter() / drm_dev_exit() is + * called for the corresponding drm device. + * + * Return: Zero or the error code of a call to @xe_fn returning an error + * code. + */ +int xe_call_for_each_device(xe_device_fn xe_fn) +{ + int ret; + struct kunit_test_data data = { + .xe_fn = xe_fn, + .ndevs = 0, + }; + + ret = driver_for_each_device(&xe_pci_driver.driver, NULL, + &data, dev_to_xe_device_fn); + + if (!data.ndevs) + kunit_skip(current->kunit_test, "test runs only on hardware\n"); + + return ret; +} diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.h b/drivers/gpu/drm/xe/tests/xe_pci_test.h new file mode 100644 index 000000000000..de65d8c9ccb5 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_pci_test.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 AND MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_PCI_TEST_H_ +#define _XE_PCI_TEST_H_ + +struct xe_device; + +typedef int (*xe_device_fn)(struct xe_device *); + +int xe_call_for_each_device(xe_device_fn xe_fn); + +#endif diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index ebd27b917c59..c567436afcdc 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -627,55 +627,5 @@ void xe_unregister_pci_driver(void) } #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) -struct kunit_test_data { - int ndevs; - xe_device_fn xe_fn; -}; - -static int dev_to_xe_device_fn(struct device *dev, void *__data) - -{ - struct drm_device *drm = dev_get_drvdata(dev); - struct kunit_test_data *data = __data; - int ret = 0; - int idx; - - data->ndevs++; - - if (drm_dev_enter(drm, &idx)) - ret = data->xe_fn(to_xe_device(dev_get_drvdata(dev))); - drm_dev_exit(idx); - - return ret; -} - -/** - * xe_call_for_each_device - Iterate over all devices this driver binds to - * @xe_fn: Function to call for each device. - * - * This function iterated over all devices this driver binds to, and calls - * @xe_fn: for each one of them. If the called function returns anything else - * than 0, iteration is stopped and the return value is returned by this - * function. Across each function call, drm_dev_enter() / drm_dev_exit() is - * called for the corresponding drm device. - * - * Return: Zero or the error code of a call to @xe_fn returning an error - * code. - */ -int xe_call_for_each_device(xe_device_fn xe_fn) -{ - int ret; - struct kunit_test_data data = { - .xe_fn = xe_fn, - .ndevs = 0, - }; - - ret = driver_for_each_device(&xe_pci_driver.driver, NULL, - &data, dev_to_xe_device_fn); - - if (!data.ndevs) - kunit_skip(current->kunit_test, "test runs only on hardware\n"); - - return ret; -} +#include "tests/xe_pci.c" #endif diff --git a/drivers/gpu/drm/xe/xe_pci.h b/drivers/gpu/drm/xe/xe_pci.h index 9e3089549d5f..611c1209b14c 100644 --- a/drivers/gpu/drm/xe/xe_pci.h +++ b/drivers/gpu/drm/xe/xe_pci.h @@ -6,16 +6,7 @@ #ifndef _XE_PCI_H_ #define _XE_PCI_H_ -#include "tests/xe_test.h" - int xe_register_pci_driver(void); void xe_unregister_pci_driver(void); -#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) -struct xe_device; - -typedef int (*xe_device_fn)(struct xe_device *); - -int xe_call_for_each_device(xe_device_fn xe_fn); -#endif #endif -- cgit v1.2.3 From 60d5c6abc289cc5d561758e71fb2c392c1ec2161 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Sat, 1 Apr 2023 01:51:47 -0700 Subject: drm/xe: Use symbol namespace for kunit tests Instead of simply using EXPORT_SYMBOL() to export the functions needed in xe.ko to be be called across modules, use EXPORT_SYMBOL_IF_KUNIT() which will export the symbol under the EXPORTED_FOR_KUNIT_TESTING namespace. This avoids accidentally "leaking" these functions and letting them be called from outside the kunit tests. If these functiosn are accidentally called from another module, they receive a modpost error like below: ERROR: modpost: module XXXXXXX uses symbol xe_ccs_migrate_kunit from namespace EXPORTED_FOR_KUNIT_TESTING, but does not import it. Signed-off-by: Lucas De Marchi Reviewed-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/20230401085151.1786204-4-lucas.demarchi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_bo.c | 5 +++-- drivers/gpu/drm/xe/tests/xe_bo_test.c | 1 + drivers/gpu/drm/xe/tests/xe_dma_buf.c | 3 ++- drivers/gpu/drm/xe/tests/xe_dma_buf_test.c | 1 + drivers/gpu/drm/xe/tests/xe_migrate.c | 3 ++- drivers/gpu/drm/xe/tests/xe_migrate_test.c | 1 + 6 files changed, 10 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index aa433a7b59b7..9bd381e5b7a6 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -4,6 +4,7 @@ */ #include +#include #include "tests/xe_bo_test.h" #include "tests/xe_pci_test.h" @@ -166,7 +167,7 @@ void xe_ccs_migrate_kunit(struct kunit *test) { xe_call_for_each_device(ccs_test_run_device); } -EXPORT_SYMBOL(xe_ccs_migrate_kunit); +EXPORT_SYMBOL_IF_KUNIT(xe_ccs_migrate_kunit); static int evict_test_run_gt(struct xe_device *xe, struct xe_gt *gt, struct kunit *test) { @@ -304,4 +305,4 @@ void xe_bo_evict_kunit(struct kunit *test) { xe_call_for_each_device(evict_test_run_device); } -EXPORT_SYMBOL(xe_bo_evict_kunit); +EXPORT_SYMBOL_IF_KUNIT(xe_bo_evict_kunit); diff --git a/drivers/gpu/drm/xe/tests/xe_bo_test.c b/drivers/gpu/drm/xe/tests/xe_bo_test.c index 92dda4fca21b..1c868e3635bc 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo_test.c +++ b/drivers/gpu/drm/xe/tests/xe_bo_test.c @@ -22,3 +22,4 @@ kunit_test_suite(xe_bo_test_suite); MODULE_AUTHOR("Intel Corporation"); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c index cf9dddf1a8d7..513a3b3362e9 100644 --- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c @@ -4,6 +4,7 @@ */ #include +#include #include "tests/xe_dma_buf_test.h" #include "tests/xe_pci_test.h" @@ -259,4 +260,4 @@ void xe_dma_buf_kunit(struct kunit *test) { xe_call_for_each_device(dma_buf_run_device); } -EXPORT_SYMBOL(xe_dma_buf_kunit); +EXPORT_SYMBOL_IF_KUNIT(xe_dma_buf_kunit); diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c index a1adfd1e1605..35312bfd5fb7 100644 --- a/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c +++ b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c @@ -21,3 +21,4 @@ kunit_test_suite(xe_dma_buf_test_suite); MODULE_AUTHOR("Intel Corporation"); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index eeb6c3be2e37..cdcecf8d5eef 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -4,6 +4,7 @@ */ #include +#include #include "tests/xe_migrate_test.h" #include "tests/xe_pci_test.h" @@ -409,4 +410,4 @@ void xe_migrate_sanity_kunit(struct kunit *test) { xe_call_for_each_device(migrate_test_run_device); } -EXPORT_SYMBOL(xe_migrate_sanity_kunit); +EXPORT_SYMBOL_IF_KUNIT(xe_migrate_sanity_kunit); diff --git a/drivers/gpu/drm/xe/tests/xe_migrate_test.c b/drivers/gpu/drm/xe/tests/xe_migrate_test.c index d6be360c3b6d..39179eae890b 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate_test.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate_test.c @@ -21,3 +21,4 @@ kunit_test_suite(xe_migrate_test_suite); MODULE_AUTHOR("Intel Corporation"); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); -- cgit v1.2.3 From e460410023d95b0845aa99f2d9c0625b143ca593 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Sat, 1 Apr 2023 01:51:48 -0700 Subject: drm/xe: Generalize fake device creation Instead of requiring tests to initialize a fake device an keep it in sync with xe_pci.c when it's platform-dependent, export a function from xe_pci.c to be used and piggy back on the device info creation. For simpler tests that don't need any specific platform and just need a fake xe device to pass around, xe_pci_fake_device_init_any() can be used. Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230401085151.1786204-5-lucas.demarchi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_pci.c | 47 ++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/tests/xe_pci_test.h | 16 ++++++++++++ drivers/gpu/drm/xe/xe_pci.c | 2 +- 3 files changed, 64 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c index 643bddb35214..cc65ac5657b3 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci.c +++ b/drivers/gpu/drm/xe/tests/xe_pci.c @@ -8,6 +8,7 @@ #include "tests/xe_test.h" #include +#include struct kunit_test_data { int ndevs; @@ -60,3 +61,49 @@ int xe_call_for_each_device(xe_device_fn xe_fn) return ret; } + +int xe_pci_fake_device_init(struct xe_device *xe, enum xe_platform platform, + enum xe_subplatform subplatform) +{ + const struct pci_device_id *ent = pciidlist; + const struct xe_device_desc *desc; + const struct xe_subplatform_desc *subplatform_desc; + + if (platform == XE_TEST_PLATFORM_ANY) { + desc = (const void *)ent->driver_data; + subplatform_desc = NULL; + goto done; + } + + for (ent = pciidlist; ent->device; ent++) { + desc = (const void *)ent->driver_data; + if (desc->platform == platform) + break; + } + + if (!ent->device) + return -ENODEV; + + if (subplatform == XE_TEST_SUBPLATFORM_ANY) { + subplatform_desc = desc->subplatforms; + goto done; + } + + for (subplatform_desc = desc->subplatforms; + subplatform_desc && subplatform_desc->subplatform; + subplatform_desc++) + if (subplatform_desc->subplatform == subplatform) + break; + + if (subplatform == XE_SUBPLATFORM_NONE && subplatform_desc) + return -ENODEV; + + if (subplatform != XE_SUBPLATFORM_NONE && !subplatform_desc) + return -ENODEV; + +done: + xe_info_init(xe, desc, subplatform_desc); + + return 0; +} +EXPORT_SYMBOL_IF_KUNIT(xe_pci_fake_device_init); diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.h b/drivers/gpu/drm/xe/tests/xe_pci_test.h index de65d8c9ccb5..43294e8c62bb 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci_test.h +++ b/drivers/gpu/drm/xe/tests/xe_pci_test.h @@ -6,10 +6,26 @@ #ifndef _XE_PCI_TEST_H_ #define _XE_PCI_TEST_H_ +#include "xe_platform_types.h" + struct xe_device; +/* + * Some defines just for clarity: these mean the test doesn't care about what + * platform it will get since it doesn't depend on any platform-specific bits + */ +#define XE_TEST_PLATFORM_ANY XE_PLATFORM_UNINITIALIZED +#define XE_TEST_SUBPLATFORM_ANY XE_SUBPLATFORM_UNINITIALIZED + typedef int (*xe_device_fn)(struct xe_device *); int xe_call_for_each_device(xe_device_fn xe_fn); +int xe_pci_fake_device_init(struct xe_device *xe, enum xe_platform platform, + enum xe_subplatform subplatform); + +#define xe_pci_fake_device_init_any(xe__) \ + xe_pci_fake_device_init(xe__, XE_TEST_PLATFORM_ANY, \ + XE_TEST_SUBPLATFORM_ANY) + #endif diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index c567436afcdc..f6050a17c950 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -423,7 +423,7 @@ static void xe_pci_remove(struct pci_dev *pdev) static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { - const struct xe_device_desc *desc = (void *)ent->driver_data; + const struct xe_device_desc *desc = (const void *)ent->driver_data; const struct xe_subplatform_desc *subplatform_desc; struct xe_device *xe; int err; -- cgit v1.2.3 From 7bf350ecb240c9db63031e3a1b6c99acd73c90ed Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Sat, 1 Apr 2023 01:51:49 -0700 Subject: drm/xe/reg_sr: Save errors for kunit integration When there's an entry that is dropped when xe_reg_sr_add(), there's not much we can do other than reporting the error - it's for certain a driver issue or conflicting workarounds/tunings. Save the number of errors to be used later by kunit to report where it happens. Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230401085151.1786204-6-lucas.demarchi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_reg_sr.c | 8 ++++++++ drivers/gpu/drm/xe/xe_reg_sr_types.h | 4 ++++ 2 files changed, 12 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c index 37ae8412cb00..f97673be2e62 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr.c +++ b/drivers/gpu/drm/xe/xe_reg_sr.c @@ -82,6 +82,13 @@ static bool compatible_entries(const struct xe_reg_sr_entry *e1, return true; } +static void reg_sr_inc_error(struct xe_reg_sr *sr) +{ +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) + sr->errors++; +#endif +} + int xe_reg_sr_add(struct xe_reg_sr *sr, u32 reg, const struct xe_reg_sr_entry *e) { @@ -119,6 +126,7 @@ fail: DRM_ERROR("Discarding save-restore reg %04lx (clear: %08x, set: %08x, masked: %s): ret=%d\n", idx, e->clr_bits, e->set_bits, str_yes_no(e->masked_reg), ret); + reg_sr_inc_error(sr); return ret; } diff --git a/drivers/gpu/drm/xe/xe_reg_sr_types.h b/drivers/gpu/drm/xe/xe_reg_sr_types.h index 3d2257891005..91469784fd90 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr_types.h +++ b/drivers/gpu/drm/xe/xe_reg_sr_types.h @@ -32,6 +32,10 @@ struct xe_reg_sr { } pool; struct xarray xa; const char *name; + +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) + unsigned int errors; +#endif }; #endif -- cgit v1.2.3 From 4cc0440229c61dca680f5acaf2e529e67f9bde72 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Sat, 1 Apr 2023 01:51:50 -0700 Subject: drm/xe: Add basic unit tests for rtp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add some basic unit tests for rtp. This is intended to prove the functionality of the rtp itself, like coalescing entries, rejecting non-disjoint values, etc. Contrary to the other tests in xe, this is a unit test to test the sw-side only, so it can be executed on any machine - it doesn't interact with the real hardware. Running it produces the following output: $ ./tools/testing/kunit/kunit.py run --raw_output-kunit \ --kunitconfig drivers/gpu/drm/xe/.kunitconfig xe_rtp ... [01:26:27] Starting KUnit Kernel (1/1)... KTAP version 1 1..1 KTAP version 1 # Subtest: xe_rtp 1..1 KTAP version 1 # Subtest: xe_rtp_process_tests ok 1 coalesce-same-reg ok 2 no-match-no-add ok 3 no-match-no-add-multiple-rules ok 4 two-regs-two-entries ok 5 clr-one-set-other ok 6 set-field [drm:xe_reg_sr_add] *ERROR* Discarding save-restore reg 0001 (clear: 00000001, set: 00000001, masked: no): ret=-22 ok 7 conflict-duplicate [drm:xe_reg_sr_add] *ERROR* Discarding save-restore reg 0001 (clear: 00000003, set: 00000000, masked: no): ret=-22 ok 8 conflict-not-disjoint [drm:xe_reg_sr_add] *ERROR* Discarding save-restore reg 0001 (clear: 00000002, set: 00000002, masked: no): ret=-22 [drm:xe_reg_sr_add] *ERROR* Discarding save-restore reg 0001 (clear: 00000001, set: 00000001, masked: yes): ret=-22 ok 9 conflict-reg-type # xe_rtp_process_tests: pass:9 fail:0 skip:0 total:9 ok 1 xe_rtp_process_tests # Totals: pass:9 fail:0 skip:0 total:9 ok 1 xe_rtp ... Note that the ERRORs in the kernel log are expected since it's testing incompatible entries. v2: - Use parameterized table for tests (Michał Winiarski) - Move everything to the xe_rtp_test.ko and only add a few exports to the right namespace - Add more tests to cover FIELD_SET, CLR, partially true rules, etc Signed-off-by: Lucas De Marchi Reviewed-by: Maarten Lankhorst # v1 Reviewed-by: Michał Winiarski Link: https://lore.kernel.org/r/20230401085151.1786204-7-lucas.demarchi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Kconfig.debug | 1 + drivers/gpu/drm/xe/tests/Makefile | 7 +- drivers/gpu/drm/xe/tests/xe_rtp_test.c | 318 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_reg_sr.c | 2 + drivers/gpu/drm/xe/xe_rtp.c | 3 + 5 files changed, 329 insertions(+), 2 deletions(-) create mode 100644 drivers/gpu/drm/xe/tests/xe_rtp_test.c (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Kconfig.debug b/drivers/gpu/drm/xe/Kconfig.debug index 93b284cdd0a2..11bb13c73e7b 100644 --- a/drivers/gpu/drm/xe/Kconfig.debug +++ b/drivers/gpu/drm/xe/Kconfig.debug @@ -66,6 +66,7 @@ config DRM_XE_KUNIT_TEST depends on DRM_XE && KUNIT && DEBUG_FS default KUNIT_ALL_TESTS select DRM_EXPORT_FOR_TESTS if m + select DRM_KUNIT_TEST_HELPERS help Choose this option to allow the driver to perform selftests under the kunit framework diff --git a/drivers/gpu/drm/xe/tests/Makefile b/drivers/gpu/drm/xe/tests/Makefile index 47056b6459e3..c5c2f108d017 100644 --- a/drivers/gpu/drm/xe/tests/Makefile +++ b/drivers/gpu/drm/xe/tests/Makefile @@ -1,4 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_DRM_XE_KUNIT_TEST) += xe_bo_test.o xe_dma_buf_test.o \ - xe_migrate_test.o +obj-$(CONFIG_DRM_XE_KUNIT_TEST) += \ + xe_bo_test.o \ + xe_dma_buf_test.o \ + xe_migrate_test.o \ + xe_rtp_test.o diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c new file mode 100644 index 000000000000..29e112c108c6 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c @@ -0,0 +1,318 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright © 2023 Intel Corporation + */ + +#include +#include + +#include +#include + +#include + +#include "regs/xe_gt_regs.h" +#include "regs/xe_reg_defs.h" +#include "xe_device_types.h" +#include "xe_pci_test.h" +#include "xe_reg_sr.h" +#include "xe_rtp.h" + +#undef _MMIO +#undef MCR_REG +#define _MMIO(x) _XE_RTP_REG(x) +#define MCR_REG(x) _XE_RTP_MCR_REG(x) + +#define REGULAR_REG1 _MMIO(1) +#define REGULAR_REG2 _MMIO(2) +#define REGULAR_REG3 _MMIO(3) +#define MCR_REG1 MCR_REG(1) +#define MCR_REG2 MCR_REG(2) +#define MCR_REG3 MCR_REG(3) + +struct rtp_test_case { + const char *name; + struct { + u32 offset; + u32 type; + } expected_reg; + u32 expected_set_bits; + u32 expected_clr_bits; + unsigned long expected_count; + unsigned int expected_sr_errors; + const struct xe_rtp_entry *entries; +}; + +static bool match_yes(const struct xe_gt *gt, const struct xe_hw_engine *hwe) +{ + return true; +} + +static bool match_no(const struct xe_gt *gt, const struct xe_hw_engine *hwe) +{ + return false; +} + +static const struct rtp_test_case cases[] = { + { + .name = "coalesce-same-reg", + .expected_reg = { REGULAR_REG1 }, + .expected_set_bits = REG_BIT(0) | REG_BIT(1), + .expected_clr_bits = REG_BIT(0) | REG_BIT(1), + .expected_count = 1, + /* Different bits on the same register: create a single entry */ + .entries = (const struct xe_rtp_entry[]) { + { XE_RTP_NAME("basic-1"), + XE_RTP_RULES(FUNC(match_yes)), + XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0))) + }, + { XE_RTP_NAME("basic-2"), + XE_RTP_RULES(FUNC(match_yes)), + XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(1))) + }, + {} + }, + }, + { + .name = "no-match-no-add", + .expected_reg = { REGULAR_REG1 }, + .expected_set_bits = REG_BIT(0), + .expected_clr_bits = REG_BIT(0), + .expected_count = 1, + /* Don't coalesce second entry since rules don't match */ + .entries = (const struct xe_rtp_entry[]) { + { XE_RTP_NAME("basic-1"), + XE_RTP_RULES(FUNC(match_yes)), + XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0))) + }, + { XE_RTP_NAME("basic-2"), + XE_RTP_RULES(FUNC(match_no)), + XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(1))) + }, + {} + }, + }, + { + .name = "no-match-no-add-multiple-rules", + .expected_reg = { REGULAR_REG1 }, + .expected_set_bits = REG_BIT(0), + .expected_clr_bits = REG_BIT(0), + .expected_count = 1, + /* Don't coalesce second entry due to one of the rules */ + .entries = (const struct xe_rtp_entry[]) { + { XE_RTP_NAME("basic-1"), + XE_RTP_RULES(FUNC(match_yes)), + XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0))) + }, + { XE_RTP_NAME("basic-2"), + XE_RTP_RULES(FUNC(match_yes), FUNC(match_no)), + XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(1))) + }, + {} + }, + }, + { + .name = "two-regs-two-entries", + .expected_reg = { REGULAR_REG1 }, + .expected_set_bits = REG_BIT(0), + .expected_clr_bits = REG_BIT(0), + .expected_count = 2, + /* Same bits on different registers are not coalesced */ + .entries = (const struct xe_rtp_entry[]) { + { XE_RTP_NAME("basic-1"), + XE_RTP_RULES(FUNC(match_yes)), + XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0))) + }, + { XE_RTP_NAME("basic-2"), + XE_RTP_RULES(FUNC(match_yes)), + XE_RTP_ACTIONS(SET(REGULAR_REG2, REG_BIT(0))) + }, + {} + }, + }, + { + .name = "clr-one-set-other", + .expected_reg = { REGULAR_REG1 }, + .expected_set_bits = REG_BIT(0), + .expected_clr_bits = REG_BIT(1) | REG_BIT(0), + .expected_count = 1, + /* Check clr vs set actions on different bits */ + .entries = (const struct xe_rtp_entry[]) { + { XE_RTP_NAME("basic-1"), + XE_RTP_RULES(FUNC(match_yes)), + XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0))) + }, + { XE_RTP_NAME("basic-2"), + XE_RTP_RULES(FUNC(match_yes)), + XE_RTP_ACTIONS(CLR(REGULAR_REG1, REG_BIT(1))) + }, + {} + }, + }, + { +#define TEMP_MASK REG_GENMASK(10, 8) +#define TEMP_FIELD REG_FIELD_PREP(TEMP_MASK, 2) + .name = "set-field", + .expected_reg = { REGULAR_REG1 }, + .expected_set_bits = TEMP_FIELD, + .expected_clr_bits = TEMP_MASK, + .expected_count = 1, + /* Check FIELD_SET works */ + .entries = (const struct xe_rtp_entry[]) { + { XE_RTP_NAME("basic-1"), + XE_RTP_RULES(FUNC(match_yes)), + XE_RTP_ACTIONS(FIELD_SET(REGULAR_REG1, + TEMP_MASK, TEMP_FIELD)) + }, + {} + }, +#undef TEMP_MASK +#undef TEMP_FIELD + }, + { + .name = "conflict-duplicate", + .expected_reg = { REGULAR_REG1 }, + .expected_set_bits = REG_BIT(0), + .expected_clr_bits = REG_BIT(0), + .expected_count = 1, + .expected_sr_errors = 1, + .entries = (const struct xe_rtp_entry[]) { + { XE_RTP_NAME("basic-1"), + XE_RTP_RULES(FUNC(match_yes)), + XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0))) + }, + /* drop: setting same values twice */ + { XE_RTP_NAME("basic-2"), + XE_RTP_RULES(FUNC(match_yes)), + XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0))) + }, + {} + }, + }, + { + .name = "conflict-not-disjoint", + .expected_reg = { REGULAR_REG1 }, + .expected_set_bits = REG_BIT(0), + .expected_clr_bits = REG_BIT(0), + .expected_count = 1, + .expected_sr_errors = 1, + .entries = (const struct xe_rtp_entry[]) { + { XE_RTP_NAME("basic-1"), + XE_RTP_RULES(FUNC(match_yes)), + XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0))) + }, + /* drop: bits are not disjoint with previous entries */ + { XE_RTP_NAME("basic-2"), + XE_RTP_RULES(FUNC(match_yes)), + XE_RTP_ACTIONS(CLR(REGULAR_REG1, REG_GENMASK(1, 0))) + }, + {} + }, + }, + { + .name = "conflict-reg-type", + .expected_reg = { REGULAR_REG1 }, + .expected_set_bits = REG_BIT(0), + .expected_clr_bits = REG_BIT(0), + .expected_count = 1, + .expected_sr_errors = 2, + .entries = (const struct xe_rtp_entry[]) { + { XE_RTP_NAME("basic-1"), + XE_RTP_RULES(FUNC(match_yes)), + XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0))) + }, + /* drop: regular vs MCR */ + { XE_RTP_NAME("basic-2"), + XE_RTP_RULES(FUNC(match_yes)), + XE_RTP_ACTIONS(SET(MCR_REG1, REG_BIT(1))) + }, + /* drop: regular vs masked */ + { XE_RTP_NAME("basic-3"), + XE_RTP_RULES(FUNC(match_yes)), + XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0), + XE_RTP_ACTION_FLAG(MASKED_REG))) + }, + {} + }, + }, +}; + +static void xe_rtp_process_tests(struct kunit *test) +{ + const struct rtp_test_case *param = test->param_value; + struct xe_device *xe = test->priv; + struct xe_reg_sr *reg_sr = &xe->gt[0].reg_sr; + const struct xe_reg_sr_entry *sre, *sr_entry = NULL; + unsigned long idx, count = 0; + + xe_reg_sr_init(reg_sr, "xe_rtp_tests", xe); + xe_rtp_process(param->entries, reg_sr, &xe->gt[0], NULL); + + xa_for_each(®_sr->xa, idx, sre) { + if (idx == param->expected_reg.offset) + sr_entry = sre; + + count++; + } + + KUNIT_EXPECT_EQ(test, count, param->expected_count); + KUNIT_EXPECT_EQ(test, sr_entry->clr_bits, param->expected_clr_bits); + KUNIT_EXPECT_EQ(test, sr_entry->set_bits, param->expected_set_bits); + KUNIT_EXPECT_EQ(test, sr_entry->reg_type, param->expected_reg.type); + KUNIT_EXPECT_EQ(test, reg_sr->errors, param->expected_sr_errors); +} + +static void rtp_desc(const struct rtp_test_case *t, char *desc) +{ + strscpy(desc, t->name, KUNIT_PARAM_DESC_SIZE); +} + +KUNIT_ARRAY_PARAM(rtp, cases, rtp_desc); + +static int xe_rtp_test_init(struct kunit *test) +{ + struct xe_device *xe; + struct device *dev; + int ret; + + dev = drm_kunit_helper_alloc_device(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); + + xe = drm_kunit_helper_alloc_drm_device(test, dev, + struct xe_device, + drm, DRIVER_GEM); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xe); + + ret = xe_pci_fake_device_init_any(xe); + KUNIT_ASSERT_EQ(test, ret, 0); + + xe->drm.dev = dev; + test->priv = xe; + + return 0; +} + +static void xe_rtp_test_exit(struct kunit *test) +{ + struct xe_device *xe = test->priv; + + drm_kunit_helper_free_device(test, xe->drm.dev); +} + +static struct kunit_case xe_rtp_tests[] = { + KUNIT_CASE_PARAM(xe_rtp_process_tests, rtp_gen_params), + {} +}; + +static struct kunit_suite xe_rtp_test_suite = { + .name = "xe_rtp", + .init = xe_rtp_test_init, + .exit = xe_rtp_test_exit, + .test_cases = xe_rtp_tests, +}; + +kunit_test_suite(xe_rtp_test_suite); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c index f97673be2e62..ff83da4cf4a7 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr.c +++ b/drivers/gpu/drm/xe/xe_reg_sr.c @@ -5,6 +5,7 @@ #include "xe_reg_sr.h" +#include #include #include #include @@ -43,6 +44,7 @@ int xe_reg_sr_init(struct xe_reg_sr *sr, const char *name, struct xe_device *xe) return drmm_add_action_or_reset(&xe->drm, reg_sr_fini, sr); } +EXPORT_SYMBOL_IF_KUNIT(xe_reg_sr_init); static struct xe_reg_sr_entry *alloc_entry(struct xe_reg_sr *sr) { diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index cb9dd894547d..20acd43cb60b 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -5,6 +5,8 @@ #include "xe_rtp.h" +#include + #include #include "xe_gt.h" @@ -155,6 +157,7 @@ void xe_rtp_process(const struct xe_rtp_entry *entries, struct xe_reg_sr *sr, } } } +EXPORT_SYMBOL_IF_KUNIT(xe_rtp_process); bool xe_rtp_match_even_instance(const struct xe_gt *gt, const struct xe_hw_engine *hwe) -- cgit v1.2.3 From b9d773fc515a2d57ca96a6a368ac6e8845b2b3c5 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Sat, 1 Apr 2023 01:51:51 -0700 Subject: drm/xe: Add test for GT workarounds and tunings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to avoid mistakes when populating the workarounds, it's good to be able to test if the entries added are all compatible for a certain platform. The platform itself is not needed as long as we create fake devices with enough configuration for the RTP helpers to process the tables. Common mistakes that can be avoided: - Entries clashing the bitfields being updated - Register type being mixed (MCR vs regular / masked vs regular) - Unexpected errors while adding the reg_sr entry To test, inject a duplicate entry in gt_was, but with platform == tigerlake rather than the currenct graphics version check: { XE_RTP_NAME("14011059788"), XE_RTP_RULES(PLATFORM(TIGERLAKE)), XE_RTP_ACTIONS(SET(GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE)) }, This produces the following result: $ ./tools/testing/kunit/kunit.py run \ --kunitconfig drivers/gpu/drm/xe/.kunitconfig xe_wa [14:18:02] Starting KUnit Kernel (1/1)... [14:18:02] ============================================================ [14:18:02] ==================== xe_wa (1 subtest) ===================== [14:18:02] ======================== xe_wa_gt ========================= [14:18:02] [drm:xe_reg_sr_add] *ERROR* Discarding save-restore reg 9550 (clear: 00000200, set: 00000200, masked: no): ret=-22 [14:18:02] # xe_wa_gt: ASSERTION FAILED at drivers/gpu/drm/xe/tests/xe_wa_test.c:116 [14:18:02] Expected gt->reg_sr.errors == 0, but [14:18:02] gt->reg_sr.errors == 1 (0x1) [14:18:02] [FAILED] TIGERLAKE (B0) [14:18:02] [PASSED] DG1 (A0) [14:18:02] [PASSED] DG1 (B0) ... Signed-off-by: Lucas De Marchi Reviewed-by: Michał Winiarski Link: https://lore.kernel.org/r/20230401085151.1786204-8-lucas.demarchi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/Makefile | 3 +- drivers/gpu/drm/xe/tests/xe_wa_test.c | 136 ++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_tuning.c | 3 + drivers/gpu/drm/xe/xe_wa.c | 2 + 4 files changed, 143 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/xe/tests/xe_wa_test.c (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/Makefile b/drivers/gpu/drm/xe/tests/Makefile index c5c2f108d017..56919abb3f2a 100644 --- a/drivers/gpu/drm/xe/tests/Makefile +++ b/drivers/gpu/drm/xe/tests/Makefile @@ -4,4 +4,5 @@ obj-$(CONFIG_DRM_XE_KUNIT_TEST) += \ xe_bo_test.o \ xe_dma_buf_test.o \ xe_migrate_test.o \ - xe_rtp_test.o + xe_rtp_test.o \ + xe_wa_test.o diff --git a/drivers/gpu/drm/xe/tests/xe_wa_test.c b/drivers/gpu/drm/xe/tests/xe_wa_test.c new file mode 100644 index 000000000000..7a86be830b93 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_wa_test.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright © 2023 Intel Corporation + */ + +#include +#include + +#include + +#include "xe_device.h" +#include "xe_pci_test.h" +#include "xe_reg_sr.h" +#include "xe_tuning.h" +#include "xe_wa.h" + +struct platform_test_case { + const char *name; + enum xe_platform platform; + enum xe_subplatform subplatform; + struct xe_step_info step; +}; + +#define PLATFORM_CASE(platform__, graphics_step__) \ + { \ + .name = #platform__ " (" #graphics_step__ ")", \ + .platform = XE_ ## platform__, \ + .subplatform = XE_SUBPLATFORM_NONE, \ + .step = { .graphics = STEP_ ## graphics_step__ } \ + } + + +#define SUBPLATFORM_CASE(platform__, subplatform__, graphics_step__) \ + { \ + .name = #platform__ "_" #subplatform__ " (" #graphics_step__ ")", \ + .platform = XE_ ## platform__, \ + .subplatform = XE_SUBPLATFORM_ ## platform__ ## _ ## subplatform__, \ + .step = { .graphics = STEP_ ## graphics_step__ } \ + } + +static const struct platform_test_case cases[] = { + PLATFORM_CASE(TIGERLAKE, B0), + PLATFORM_CASE(DG1, A0), + PLATFORM_CASE(DG1, B0), + PLATFORM_CASE(ALDERLAKE_S, A0), + PLATFORM_CASE(ALDERLAKE_S, B0), + PLATFORM_CASE(ALDERLAKE_S, C0), + PLATFORM_CASE(ALDERLAKE_S, D0), + SUBPLATFORM_CASE(DG2, G10, A0), + SUBPLATFORM_CASE(DG2, G10, A1), + SUBPLATFORM_CASE(DG2, G10, B0), + SUBPLATFORM_CASE(DG2, G10, C0), + SUBPLATFORM_CASE(DG2, G11, A0), + SUBPLATFORM_CASE(DG2, G11, B0), + SUBPLATFORM_CASE(DG2, G11, B1), + SUBPLATFORM_CASE(DG2, G12, A0), + SUBPLATFORM_CASE(DG2, G12, A1), + PLATFORM_CASE(PVC, B0), + PLATFORM_CASE(PVC, B1), + PLATFORM_CASE(PVC, C0), +}; + +static void platform_desc(const struct platform_test_case *t, char *desc) +{ + strscpy(desc, t->name, KUNIT_PARAM_DESC_SIZE); +} + +KUNIT_ARRAY_PARAM(platform, cases, platform_desc); + +static int xe_wa_test_init(struct kunit *test) +{ + const struct platform_test_case *param = test->param_value; + struct xe_device *xe; + struct device *dev; + int ret; + + dev = drm_kunit_helper_alloc_device(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); + + xe = drm_kunit_helper_alloc_drm_device(test, dev, + struct xe_device, + drm, DRIVER_GEM); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xe); + + ret = xe_pci_fake_device_init(xe, param->platform, param->subplatform); + KUNIT_ASSERT_EQ(test, ret, 0); + + xe->info.step = param->step; + + /* TODO: init hw engines for engine/LRC WAs */ + xe->drm.dev = dev; + test->priv = xe; + + return 0; +} + +static void xe_wa_test_exit(struct kunit *test) +{ + struct xe_device *xe = test->priv; + + drm_kunit_helper_free_device(test, xe->drm.dev); +} + +static void xe_wa_gt(struct kunit *test) +{ + struct xe_device *xe = test->priv; + struct xe_gt *gt; + int id; + + for_each_gt(gt, xe, id) { + xe_reg_sr_init(>->reg_sr, "GT", xe); + + xe_wa_process_gt(gt); + xe_tuning_process_gt(gt); + + KUNIT_ASSERT_EQ(test, gt->reg_sr.errors, 0); + } +} + +static struct kunit_case xe_wa_tests[] = { + KUNIT_CASE_PARAM(xe_wa_gt, platform_gen_params), + {} +}; + +static struct kunit_suite xe_rtp_test_suite = { + .name = "xe_wa", + .init = xe_wa_test_init, + .exit = xe_wa_test_exit, + .test_cases = xe_wa_tests, +}; + +kunit_test_suite(xe_rtp_test_suite); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index 7ff5eb762da5..27cf1330facd 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -5,6 +5,8 @@ #include "xe_tuning.h" +#include + #include "regs/xe_gt_regs.h" #include "xe_gt_types.h" #include "xe_platform_types.h" @@ -62,6 +64,7 @@ void xe_tuning_process_gt(struct xe_gt *gt) { xe_rtp_process(gt_tunings, >->reg_sr, gt, NULL); } +EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_gt); /** * xe_tuning_process_lrc - process lrc tunings diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 59d2daab5929..a7d681b7538d 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -5,6 +5,7 @@ #include "xe_wa.h" +#include #include #include "regs/xe_engine_regs.h" @@ -628,6 +629,7 @@ void xe_wa_process_gt(struct xe_gt *gt) { xe_rtp_process(gt_was, >->reg_sr, gt, NULL); } +EXPORT_SYMBOL_IF_KUNIT(xe_wa_process_gt); /** * xe_wa_process_engine - process engine workaround table -- cgit v1.2.3 From ad55ead7f3c7b041dbf058a9c4b954be5929bb5e Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 23 Mar 2023 22:17:54 -0700 Subject: drm/xe: Update GuC/HuC firmware autoselect logic Update the logic to autoselect GuC/HuC for the platforms with the following improvements: - Document what is the firmware file that is expected to be loaded and what is checked from blob headers - When the platform is under force-probe it's desired to enforce the full-version requirement so the correct firmware is used before widespread adoption and backward-compatibility commitments - Directory from which we expect firmware blobs to be available in upstream linux-firmware repository depends on the platform: for the ones supported by i915 it uses the i915/ directory, but the ones expected to be supported by xe, it's on the xe/ directory. This means that for platforms in the intersection, the firmware is loaded from a different directory, but that is not much important in the firmware repo and it avoids firmware duplication. - Make the table with the firmware definitions clearly state the versions being expected. Now with macros to select the version it's possible to choose between full-version/major-version for GuC and full-version/no-version for HuC. These are similar to the macros used in i915, but implemented in a slightly different way to avoid duplicating the macros for each firmware/type and functionality, besides adding the support for different directories. - There is no check added regarding force-probe since xe should reuse the same firmware files published for i915 for past platforms. This can be improved later with additional kunit checking against a hardcoded list of platforms that falls in this category. - As mentioned in the TODO, the major version fallback was not implemented before as currently each platform only supports one major. That can be easily added later. - GuC version for MTL and PVC were updated to 70.6.4, using the exact full version, while the After this the GuC firmware used by PVC changes to pvc_guc_70.5.2.bin since it's using a file not published yet. Signed-off-by: Lucas De Marchi Reviewed-by: Anusha Srivatsa Link: https://lore.kernel.org/r/20230324051754.1346390-4-lucas.demarchi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_uc_fw.c | 315 ++++++++++++++++++++++-------------- drivers/gpu/drm/xe/xe_uc_fw.h | 2 +- drivers/gpu/drm/xe/xe_uc_fw_types.h | 7 + 3 files changed, 204 insertions(+), 120 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 777fa6f523dc..2beee7f8eff7 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -17,6 +17,137 @@ #include "xe_mmio.h" #include "xe_uc_fw.h" +/* + * List of required GuC and HuC binaries per-platform. They must be ordered + * based on platform, from newer to older. + * + * Versioning follows the guidelines from + * Documentation/driver-api/firmware/firmware-usage-guidelines.rst. There is a + * distinction for platforms being officially supported by the driver or not. + * Platforms not available publicly or not yet officially supported by the + * driver (under force-probe), use the mmp_ver(): the firmware autoselect logic + * will select the firmware from disk with filename that matches the full + * "mpp version", i.e. major.minor.patch. mmp_ver() should only be used for + * this case. + * + * For platforms officially supported by the driver, the filename always only + * ever contains the major version (GuC) or no version at all (HuC). + * + * After loading the file, the driver parses the versions embedded in the blob. + * The major version needs to match a major version supported by the driver (if + * any). The minor version is also checked and a notice emitted to the log if + * the version found is smaller than the version wanted. This is done only for + * informational purposes so users may have a chance to upgrade, but the driver + * still loads and use the older firmware. + * + * Examples: + * + * 1) Platform officially supported by i915 - using Tigerlake as example. + * Driver loads the following firmware blobs from disk: + * + * - i915/tgl_guc_.bin + * - i915/tgl_huc.bin + * + * number for GuC is checked that it matches the version inside + * the blob. version is checked and if smaller than the expected + * an info message is emitted about that. + * + * 1) XE_, still under require_force_probe. Using + * "wipplat" as a short-name. Driver loads the following firmware blobs + * from disk: + * + * - xe/wipplat_guc_...bin + * - xe/wipplat_huc_...bin + * + * and are checked that they match the version inside + * the blob. Both of them need to match exactly what the driver is + * expecting, otherwise it fails. + * + * 3) Platform officially supported by xe and out of force-probe. Using + * "plat" as a short-name. Except for the different directory, the + * behavior is the same as (1). Driver loads the following firmware + * blobs from disk: + * + * - xe/plat_guc_.bin + * - xe/plat_huc.bin + * + * number for GuC is checked that it matches the version inside + * the blob. version is checked and if smaller than the expected + * an info message is emitted about that. + * + * For the platforms already released with a major version, they should never be + * removed from the table. Instead new entries with newer versions may be added + * before them, so they take precedence. + * + * TODO: Currently there's no fallback on major version. That's because xe + * driver only supports the one major version of each firmware in the table. + * This needs to be fixed when the major version of GuC is updated. + */ + +struct uc_fw_entry { + enum xe_platform platform; + struct { + const char *path; + u16 major; + u16 minor; + bool full_ver_required; + }; +}; + +struct fw_blobs_by_type { + const struct uc_fw_entry *entries; + u32 count; +}; + +#define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver) \ + fw_def(METEORLAKE, mmp_ver( i915, guc, mtl, 70, 6, 4)) \ + fw_def(PVC, mmp_ver( xe, guc, pvc, 70, 6, 4)) \ + fw_def(DG2, major_ver(i915, guc, dg2, 70, 5)) \ + fw_def(DG1, major_ver(i915, guc, dg1, 70, 5)) \ + fw_def(ALDERLAKE_P, major_ver(i915, guc, adlp, 70, 5)) \ + fw_def(ALDERLAKE_S, major_ver(i915, guc, tgl, 70, 5)) \ + fw_def(TIGERLAKE, major_ver(i915, guc, tgl, 70, 5)) + +#define XE_HUC_FIRMWARE_DEFS(fw_def, mmp_ver, no_ver) \ + fw_def(ALDERLAKE_S, no_ver(i915, huc, tgl)) \ + fw_def(DG1, no_ver(i915, huc, dg1)) \ + fw_def(TIGERLAKE, no_ver(i915, huc, tgl)) + +#define MAKE_FW_PATH(dir__, uc__, shortname__, version__) \ + __stringify(dir__) "/" __stringify(shortname__) "_" __stringify(uc__) version__ ".bin" + +#define fw_filename_mmp_ver(dir_, uc_, shortname_, a, b, c) \ + MAKE_FW_PATH(dir_, uc_, shortname_, "_" __stringify(a ## . ## b ## . ## c)) +#define fw_filename_major_ver(dir_, uc_, shortname_, a, b) \ + MAKE_FW_PATH(dir_, uc_, shortname_, "_" __stringify(a)) +#define fw_filename_no_ver(dir_, uc_, shortname_) \ + MAKE_FW_PATH(dir_, uc_, shortname_, "") + +#define uc_fw_entry_mmp_ver(dir_, uc_, shortname_, a, b, c) \ + { fw_filename_mmp_ver(dir_, uc_, shortname_, a, b, c), \ + a, b, true } +#define uc_fw_entry_major_ver(dir_, uc_, shortname_, a, b) \ + { fw_filename_major_ver(dir_, uc_, shortname_, a, b), \ + a, b } +#define uc_fw_entry_no_ver(dir_, uc_, shortname_) \ + { fw_filename_no_ver(dir_, uc_, shortname_), \ + 0, 0 } + +/* All blobs need to be declared via MODULE_FIRMWARE() */ +#define XE_UC_MODULE_FIRMWARE(platform__, fw_filename) \ + MODULE_FIRMWARE(fw_filename); + +#define XE_UC_FW_ENTRY(platform__, entry__) \ + { \ + .platform = XE_ ## platform__, \ + entry__, \ + }, + +XE_GUC_FIRMWARE_DEFS(XE_UC_MODULE_FIRMWARE, \ + fw_filename_mmp_ver, fw_filename_major_ver) +XE_HUC_FIRMWARE_DEFS(XE_UC_MODULE_FIRMWARE, \ + fw_filename_mmp_ver, fw_filename_no_ver) + static struct xe_gt * __uc_fw_to_gt(struct xe_uc_fw *uc_fw, enum xe_uc_fw_type type) { @@ -37,123 +168,38 @@ static struct xe_device *uc_fw_to_xe(struct xe_uc_fw *uc_fw) return gt_to_xe(uc_fw_to_gt(uc_fw)); } -/* - * List of required GuC and HuC binaries per-platform. - * Must be ordered based on platform, from newer to older. - */ -#define XE_GUC_FIRMWARE_DEFS(fw_def, guc_def) \ - fw_def(METEORLAKE, guc_def(mtl, 70, 5, 2)) \ - fw_def(PVC, guc_def(pvc, 70, 5, 2)) \ - fw_def(DG2, guc_def(dg2, 70, 5, 2)) \ - fw_def(DG1, guc_def(dg1, 70, 5, 2)) \ - fw_def(ALDERLAKE_P, guc_def(adlp, 70, 5, 2)) \ - fw_def(ALDERLAKE_S, guc_def(tgl, 70, 5, 2)) \ - fw_def(TIGERLAKE, guc_def(tgl, 70, 5, 2)) - -#define XE_HUC_FIRMWARE_DEFS(fw_def, huc_def, huc_ver) \ - fw_def(ALDERLAKE_S, huc_def(tgl)) \ - fw_def(DG1, huc_def(dg1)) \ - fw_def(TIGERLAKE, huc_def(tgl)) - -#define __MAKE_HUC_FW_PATH(prefix_, name_) \ - "i915/" \ - __stringify(prefix_) "_" name_ ".bin" - -#define __MAKE_UC_FW_PATH_MAJOR(prefix_, name_, major_) \ - "i915/" \ - __stringify(prefix_) "_" name_ "_" \ - __stringify(major_) ".bin" - -#define __MAKE_UC_FW_PATH_FULL_VER(prefix_, name_, major_, minor_, patch_) \ - "i915/" \ - __stringify(prefix_) "_" name_ "_" \ - __stringify(major_) "." \ - __stringify(minor_) "." \ - __stringify(patch_) ".bin" - -#define MAKE_GUC_FW_PATH(prefix_, major_, minor_, patch_) \ - __MAKE_UC_FW_PATH_MAJOR(prefix_, "guc", major_) - -#define MAKE_HUC_FW_PATH(prefix_) \ - __MAKE_HUC_FW_PATH(prefix_, "huc") - -#define MAKE_HUC_FW_PATH_FULL_VER(prefix_, major_, minor_, patch_) \ - __MAKE_UC_FW_PATH_FULL_VER(prefix_, "huc", major_, minor_, patch_) - - -/* All blobs need to be declared via MODULE_FIRMWARE() */ -#define XE_UC_MODULE_FW(platform_, uc_) \ - MODULE_FIRMWARE(uc_); - -XE_GUC_FIRMWARE_DEFS(XE_UC_MODULE_FW, MAKE_GUC_FW_PATH) -XE_HUC_FIRMWARE_DEFS(XE_UC_MODULE_FW, MAKE_HUC_FW_PATH, MAKE_HUC_FW_PATH_FULL_VER) - -/* The below structs and macros are used to iterate across the list of blobs */ -struct __packed uc_fw_blob { - u8 major; - u8 minor; - const char *path; -}; - -#define UC_FW_BLOB(major_, minor_, path_) \ - { .major = major_, .minor = minor_, .path = path_ } - -#define GUC_FW_BLOB(prefix_, major_, minor_, patch_) \ - UC_FW_BLOB(major_, minor_, \ - MAKE_GUC_FW_PATH(prefix_, major_, minor_, patch_)) - -#define HUC_FW_BLOB(prefix_) \ - UC_FW_BLOB(0, 0, MAKE_HUC_FW_PATH(prefix_)) - -#define HUC_FW_VERSION_BLOB(prefix_, major_, minor_, bld_num_) \ - UC_FW_BLOB(major_, minor_, \ - MAKE_HUC_FW_PATH_FULL_VER(prefix_, major_, minor_, bld_num_)) - -struct uc_fw_platform_requirement { - enum xe_platform p; - const struct uc_fw_blob blob; -}; - -#define MAKE_FW_LIST(platform_, uc_) \ -{ \ - .p = XE_##platform_, \ - .blob = uc_, \ -}, - -struct fw_blobs_by_type { - const struct uc_fw_platform_requirement *blobs; - u32 count; -}; - static void uc_fw_auto_select(struct xe_device *xe, struct xe_uc_fw *uc_fw) { - static const struct uc_fw_platform_requirement blobs_guc[] = { - XE_GUC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB) + static const struct uc_fw_entry entries_guc[] = { + XE_GUC_FIRMWARE_DEFS(XE_UC_FW_ENTRY, + uc_fw_entry_mmp_ver, + uc_fw_entry_major_ver) }; - static const struct uc_fw_platform_requirement blobs_huc[] = { - XE_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB, HUC_FW_VERSION_BLOB) + static const struct uc_fw_entry entries_huc[] = { + XE_HUC_FIRMWARE_DEFS(XE_UC_FW_ENTRY, + uc_fw_entry_mmp_ver, + uc_fw_entry_no_ver) }; static const struct fw_blobs_by_type blobs_all[XE_UC_FW_NUM_TYPES] = { - [XE_UC_FW_TYPE_GUC] = { blobs_guc, ARRAY_SIZE(blobs_guc) }, - [XE_UC_FW_TYPE_HUC] = { blobs_huc, ARRAY_SIZE(blobs_huc) }, + [XE_UC_FW_TYPE_GUC] = { entries_guc, ARRAY_SIZE(entries_guc) }, + [XE_UC_FW_TYPE_HUC] = { entries_huc, ARRAY_SIZE(entries_huc) }, }; - static const struct uc_fw_platform_requirement *fw_blobs; + static const struct uc_fw_entry *entries; enum xe_platform p = xe->info.platform; - u32 fw_count; + u32 count; int i; XE_BUG_ON(uc_fw->type >= ARRAY_SIZE(blobs_all)); - fw_blobs = blobs_all[uc_fw->type].blobs; - fw_count = blobs_all[uc_fw->type].count; - - for (i = 0; i < fw_count && p <= fw_blobs[i].p; i++) { - if (p == fw_blobs[i].p) { - const struct uc_fw_blob *blob = &fw_blobs[i].blob; - - uc_fw->path = blob->path; - uc_fw->major_ver_wanted = blob->major; - uc_fw->minor_ver_wanted = blob->minor; + entries = blobs_all[uc_fw->type].entries; + count = blobs_all[uc_fw->type].count; + + for (i = 0; i < count && p <= entries[i].platform; i++) { + if (p == entries[i].platform) { + uc_fw->path = entries[i].path; + uc_fw->major_ver_wanted = entries[i].major; + uc_fw->minor_ver_wanted = entries[i].minor; + uc_fw->full_ver_required = entries[i].full_ver_required; break; } } @@ -227,6 +273,47 @@ static void guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_header *css) uc_fw->private_data_size = css->private_data_size; } +static int uc_fw_check_version_requirements(struct xe_uc_fw *uc_fw) +{ + struct xe_device *xe = uc_fw_to_xe(uc_fw); + + /* Driver has no requirement on any version, any is good. */ + if (!uc_fw->major_ver_wanted) + return 0; + + /* + * If full version is required, both major and minor should match. + * Otherwise, at least the major version. + */ + if (uc_fw->major_ver_wanted != uc_fw->major_ver_found || + (uc_fw->full_ver_required && + uc_fw->minor_ver_wanted != uc_fw->minor_ver_found)) { + drm_notice(&xe->drm, "%s firmware %s: unexpected version: %u.%u != %u.%u\n", + xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, + uc_fw->major_ver_found, uc_fw->minor_ver_found, + uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); + goto fail; + } + + if (uc_fw->minor_ver_wanted > uc_fw->minor_ver_found) { + drm_notice(&xe->drm, "%s firmware (%u.%u) is recommended, but only (%u.%u) was found in %s\n", + xe_uc_fw_type_repr(uc_fw->type), + uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted, + uc_fw->major_ver_found, uc_fw->minor_ver_found, + uc_fw->path); + drm_info(&xe->drm, "Consider updating your linux-firmware pkg or downloading from %s\n", + XE_UC_FIRMWARE_URL); + } + + return 0; + +fail: + if (xe_uc_fw_is_overridden(uc_fw)) + return 0; + + return -ENOEXEC; +} + int xe_uc_fw_init(struct xe_uc_fw *uc_fw) { struct xe_device *xe = uc_fw_to_xe(uc_fw); @@ -308,19 +395,9 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw) uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MINOR, css->sw_version); - if (uc_fw->major_ver_wanted) { - if (uc_fw->major_ver_found != uc_fw->major_ver_wanted || - uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) { - drm_notice(&xe->drm, "%s firmware %s: unexpected version: %u.%u != %u.%u\n", - xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, - uc_fw->major_ver_found, uc_fw->minor_ver_found, - uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); - if (!xe_uc_fw_is_overridden(uc_fw)) { - err = -ENOEXEC; - goto fail; - } - } - } + err = uc_fw_check_version_requirements(uc_fw); + if (err) + goto fail; if (uc_fw->type == XE_UC_FW_TYPE_GUC) guc_read_css_info(uc_fw, css); diff --git a/drivers/gpu/drm/xe/xe_uc_fw.h b/drivers/gpu/drm/xe/xe_uc_fw.h index bf31c3bb0e0f..e16267e71280 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.h +++ b/drivers/gpu/drm/xe/xe_uc_fw.h @@ -175,6 +175,6 @@ static inline u32 xe_uc_fw_get_upload_size(struct xe_uc_fw *uc_fw) return __xe_uc_fw_get_upload_size(uc_fw); } -#define XE_UC_FIRMWARE_URL "https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git/tree/i915" +#define XE_UC_FIRMWARE_URL "https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git" #endif diff --git a/drivers/gpu/drm/xe/xe_uc_fw_types.h b/drivers/gpu/drm/xe/xe_uc_fw_types.h index 1cfd30a655df..837f49a2347e 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw_types.h +++ b/drivers/gpu/drm/xe/xe_uc_fw_types.h @@ -78,6 +78,13 @@ struct xe_uc_fw { const char *path; /** @user_overridden: user provided path to uC firmware via modparam */ bool user_overridden; + /** + * @full_ver_required: driver still under development and not ready + * for backward-compatible firmware. To be used only for **new** + * platforms, i.e. still under require_force_probe protection and not + * supported by i915. + */ + bool full_ver_required; /** @size: size of uC firmware including css header */ size_t size; -- cgit v1.2.3 From 1c060057ec29e0305aa314c19a80090c21524faa Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 5 Apr 2023 16:20:03 -0700 Subject: drm/xe: Always write GEN12_RCU_MODE.GEN12_RCU_MODE_CCS_ENABLE for CCS engines If CCS0 was fused we did not write this register thus CCS engine were not enabled resulting in driver load failures. Signed-off-by: Matthew Brost Reviewed-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_hw_engine.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 63a4efd5edcc..4b56c35b988d 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -253,7 +253,7 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe) u32 ccs_mask = xe_hw_engine_mask_per_class(hwe->gt, XE_ENGINE_CLASS_COMPUTE); - if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask & BIT(0)) + if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask) xe_mmio_write32(hwe->gt, GEN12_RCU_MODE.reg, _MASKED_BIT_ENABLE(GEN12_RCU_MODE_CCS_ENABLE)); -- cgit v1.2.3 From 61e72e77b66259945fca89dcbfea32f7cbfc3b07 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 5 Apr 2023 15:47:25 -0700 Subject: drm/xe: Always log GuC/HuC firmware versions When debugging issues related to GuC/HuC, it's important to know what is the firmware version being used. The version from the filename can't be relied upon, also because it normally only contains the major version (except for the ones under experimental support). Log the version from the blob after reading the CSS header. Example: xe 0000:03:00.0: [drm] Using GuC firmware (70.5) from i915/dg2_guc_70.bin Signed-off-by: Lucas De Marchi Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20230405224725.1993719-1-lucas.demarchi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_uc_fw.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 2beee7f8eff7..7a410c106df4 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -395,6 +395,11 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw) uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MINOR, css->sw_version); + drm_info(&xe->drm, "Using %s firmware (%u.%u) from %s\n", + xe_uc_fw_type_repr(uc_fw->type), + uc_fw->major_ver_found, uc_fw->minor_ver_found, + uc_fw->path); + err = uc_fw_check_version_requirements(uc_fw); if (err) goto fail; -- cgit v1.2.3 From c8d72dfb288740a59afaf135da15db598fae0475 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 6 Apr 2023 16:56:13 -0700 Subject: drm/xe: Start splitting xe_device_desc into graphics/media structures Rather than storing all characteristics for an entire platform in the xe_device_desc structure, create secondary graphics and media structures to hold traits and feature flags specific to those IPs. This will eventually allow us to assign the graphics and media characteristics at runtime based on the contents of the relevant GMD_ID registers. For now, just move the IP versions into the new structures to keep things simple. Other IP-specific fields will migrate to these structures in future patches. Note that there's one functional change introduced by this: previously PVC was recognized as media version 12.60. That's technically true, but in practice the media engines are fused off on all production hardware. By simply not assigning a media IP structure to PVC it will effectively be treated as IP version 0.0 now (which the rest of the driver should treat as non-existent media). v2: - Split the new structures out to their own header. This will ease the addition of KUnit tests later. Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230406235621.1914492-2-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pci.c | 94 +++++++++++++++++++++++++++------------ drivers/gpu/drm/xe/xe_pci_types.h | 21 +++++++++ 2 files changed, 86 insertions(+), 29 deletions(-) create mode 100644 drivers/gpu/drm/xe/xe_pci_types.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index f6050a17c950..ef5a668be449 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -19,6 +19,7 @@ #include "xe_drv.h" #include "xe_macros.h" #include "xe_module.h" +#include "xe_pci_types.h" #include "xe_pm.h" #include "xe_step.h" @@ -42,10 +43,8 @@ struct xe_gt_desc { }; struct xe_device_desc { - u8 graphics_ver; - u8 graphics_rel; - u8 media_ver; - u8 media_rel; + const struct xe_graphics_desc *graphics; + const struct xe_media_desc *media; u64 platform_engine_mask; /* Engines supported by the HW */ @@ -80,17 +79,57 @@ struct xe_device_desc { #define NOP(x) x +static const struct xe_graphics_desc graphics_xelp = { + .ver = 12, + .rel = 0, +}; + +static const struct xe_graphics_desc graphics_xelpp = { + .ver = 12, + .rel = 10, +}; + +static const struct xe_graphics_desc graphics_xehpg = { + .ver = 12, + .rel = 55, +}; + +static const struct xe_graphics_desc graphics_xehpc = { + .ver = 12, + .rel = 60, +}; + +static const struct xe_graphics_desc graphics_xelpg = { + .ver = 12, + .rel = 70, +}; + +static const struct xe_media_desc media_xem = { + .ver = 12, + .rel = 0, +}; + +static const struct xe_media_desc media_xehpm = { + .ver = 12, + .rel = 55, +}; + +static const struct xe_media_desc media_xelpmp = { + .ver = 13, + .rel = 0, +}; + /* Keep in gen based order, and chronological order within a gen */ #define GEN12_FEATURES \ .require_force_probe = true, \ - .graphics_ver = 12, \ - .media_ver = 12, \ .dma_mask_size = 39, \ .max_tiles = 1, \ .vm_max_level = 3, \ .vram_flags = 0 static const struct xe_device_desc tgl_desc = { + .graphics = &graphics_xelp, + .media = &media_xem, GEN12_FEATURES, PLATFORM(XE_TIGERLAKE), .platform_engine_mask = @@ -100,6 +139,8 @@ static const struct xe_device_desc tgl_desc = { }; static const struct xe_device_desc adl_s_desc = { + .graphics = &graphics_xelp, + .media = &media_xem, GEN12_FEATURES, PLATFORM(XE_ALDERLAKE_S), .platform_engine_mask = @@ -111,6 +152,8 @@ static const struct xe_device_desc adl_s_desc = { static const u16 adlp_rplu_ids[] = { XE_RPLU_IDS(NOP), 0 }; static const struct xe_device_desc adl_p_desc = { + .graphics = &graphics_xelp, + .media = &media_xem, GEN12_FEATURES, PLATFORM(XE_ALDERLAKE_P), .platform_engine_mask = @@ -127,9 +170,10 @@ static const struct xe_device_desc adl_p_desc = { .is_dgfx = 1 static const struct xe_device_desc dg1_desc = { + .graphics = &graphics_xelpp, + .media = &media_xem, GEN12_FEATURES, DGFX_FEATURES, - .graphics_rel = 10, PLATFORM(XE_DG1), .platform_engine_mask = BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) | @@ -139,26 +183,18 @@ static const struct xe_device_desc dg1_desc = { #define XE_HP_FEATURES \ .require_force_probe = true, \ - .graphics_ver = 12, \ - .graphics_rel = 50, \ .has_range_tlb_invalidation = true, \ .has_flat_ccs = true, \ .dma_mask_size = 46, \ .max_tiles = 1, \ .vm_max_level = 3 -#define XE_HPM_FEATURES \ - .media_ver = 12, \ - .media_rel = 50 - static const u16 dg2_g10_ids[] = { XE_DG2_G10_IDS(NOP), XE_ATS_M150_IDS(NOP), 0 }; static const u16 dg2_g11_ids[] = { XE_DG2_G11_IDS(NOP), XE_ATS_M75_IDS(NOP), 0 }; static const u16 dg2_g12_ids[] = { XE_DG2_G12_IDS(NOP), 0 }; #define DG2_FEATURES \ DGFX_FEATURES, \ - .graphics_rel = 55, \ - .media_rel = 55, \ PLATFORM(XE_DG2), \ .subplatforms = (const struct xe_subplatform_desc[]) { \ { XE_SUBPLATFORM_DG2_G10, "G10", dg2_g10_ids }, \ @@ -177,15 +213,17 @@ static const u16 dg2_g12_ids[] = { XE_DG2_G12_IDS(NOP), 0 }; .has_4tile = 1 static const struct xe_device_desc ats_m_desc = { + .graphics = &graphics_xehpg, + .media = &media_xehpm, XE_HP_FEATURES, - XE_HPM_FEATURES, DG2_FEATURES, }; static const struct xe_device_desc dg2_desc = { + .graphics = &graphics_xehpg, + .media = &media_xehpm, XE_HP_FEATURES, - XE_HPM_FEATURES, DG2_FEATURES, }; @@ -212,14 +250,12 @@ static const struct xe_gt_desc pvc_gts[] = { }; static const __maybe_unused struct xe_device_desc pvc_desc = { + .graphics = &graphics_xehpc, XE_HP_FEATURES, - XE_HPM_FEATURES, DGFX_FEATURES, PLATFORM(XE_PVC), .extra_gts = pvc_gts, - .graphics_rel = 60, .has_flat_ccs = 0, - .media_rel = 60, .platform_engine_mask = PVC_ENGINES, .vram_flags = XE_VRAM_FLAGS_NEED64K, .dma_mask_size = 52, @@ -250,16 +286,15 @@ static const struct xe_gt_desc xelpmp_gts[] = { static const struct xe_device_desc mtl_desc = { /* - * Real graphics IP version will be obtained from hardware GMD_ID - * register. Value provided here is just for sanity checking. + * FIXME: Real graphics/media IP will be mapped from hardware + * GMD_ID register. Hardcoded assignments here will go away soon. */ + .graphics = &graphics_xelpg, + .media = &media_xelpmp, .require_force_probe = true, - .graphics_ver = 12, - .graphics_rel = 70, .dma_mask_size = 46, .max_tiles = 2, .vm_max_level = 3, - .media_ver = 13, .has_range_tlb_invalidation = true, PLATFORM(XE_METEORLAKE), .extra_gts = xelpmp_gts, @@ -363,10 +398,11 @@ static void xe_info_init(struct xe_device *xe, struct xe_gt *gt; u8 id; - xe->info.graphics_verx100 = desc->graphics_ver * 100 + - desc->graphics_rel; - xe->info.media_verx100 = desc->media_ver * 100 + - desc->media_rel; + xe->info.graphics_verx100 = desc->graphics->ver * 100 + + desc->graphics->rel; + if (desc->media) + xe->info.media_verx100 = desc->media->ver * 100 + + desc->media->rel; xe->info.is_dgfx = desc->is_dgfx; xe->info.platform = desc->platform; xe->info.dma_mask_size = desc->dma_mask_size; diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h new file mode 100644 index 000000000000..cc372694ecca --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pci_types.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_PCI_TYPES_H_ +#define _XE_PCI_TYPES_H_ + +#include + +struct xe_graphics_desc { + u8 ver; + u8 rel; +}; + +struct xe_media_desc { + u8 ver; + u8 rel; +}; + +#endif -- cgit v1.2.3 From c94f32e4f5453a55c1c83a81481784f617f96df8 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 6 Apr 2023 16:56:14 -0700 Subject: drm/xe: Set require_force_probe in each platform's description Set require_force_probe explicitly in each platform's description structure rather than embedding it within the FOO_FEATURES macros. Even though we expect all platforms currently supported by the Xe driver to be under force_probe protection, this will help prepare for some other upcoming restructuring. Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230406235621.1914492-3-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pci.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index ef5a668be449..06c194ab56dd 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -121,7 +121,6 @@ static const struct xe_media_desc media_xelpmp = { /* Keep in gen based order, and chronological order within a gen */ #define GEN12_FEATURES \ - .require_force_probe = true, \ .dma_mask_size = 39, \ .max_tiles = 1, \ .vm_max_level = 3, \ @@ -132,6 +131,7 @@ static const struct xe_device_desc tgl_desc = { .media = &media_xem, GEN12_FEATURES, PLATFORM(XE_TIGERLAKE), + .require_force_probe = true, .platform_engine_mask = BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) | BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VCS0) | @@ -143,6 +143,7 @@ static const struct xe_device_desc adl_s_desc = { .media = &media_xem, GEN12_FEATURES, PLATFORM(XE_ALDERLAKE_S), + .require_force_probe = true, .platform_engine_mask = BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) | BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VCS0) | @@ -156,6 +157,7 @@ static const struct xe_device_desc adl_p_desc = { .media = &media_xem, GEN12_FEATURES, PLATFORM(XE_ALDERLAKE_P), + .require_force_probe = true, .platform_engine_mask = BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) | BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VCS0) | @@ -175,6 +177,7 @@ static const struct xe_device_desc dg1_desc = { GEN12_FEATURES, DGFX_FEATURES, PLATFORM(XE_DG1), + .require_force_probe = true, .platform_engine_mask = BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) | BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VCS0) | @@ -182,7 +185,6 @@ static const struct xe_device_desc dg1_desc = { }; #define XE_HP_FEATURES \ - .require_force_probe = true, \ .has_range_tlb_invalidation = true, \ .has_flat_ccs = true, \ .dma_mask_size = 46, \ @@ -208,13 +210,13 @@ static const u16 dg2_g12_ids[] = { XE_DG2_G12_IDS(NOP), 0 }; BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) | \ BIT(XE_HW_ENGINE_CCS0) | BIT(XE_HW_ENGINE_CCS1) | \ BIT(XE_HW_ENGINE_CCS2) | BIT(XE_HW_ENGINE_CCS3), \ - .require_force_probe = true, \ .vram_flags = XE_VRAM_FLAGS_NEED64K, \ .has_4tile = 1 static const struct xe_device_desc ats_m_desc = { .graphics = &graphics_xehpg, .media = &media_xehpm, + .require_force_probe = true, XE_HP_FEATURES, DG2_FEATURES, @@ -223,6 +225,7 @@ static const struct xe_device_desc ats_m_desc = { static const struct xe_device_desc dg2_desc = { .graphics = &graphics_xehpg, .media = &media_xehpm, + .require_force_probe = true, XE_HP_FEATURES, DG2_FEATURES, @@ -254,6 +257,7 @@ static const __maybe_unused struct xe_device_desc pvc_desc = { XE_HP_FEATURES, DGFX_FEATURES, PLATFORM(XE_PVC), + .require_force_probe = true, .extra_gts = pvc_gts, .has_flat_ccs = 0, .platform_engine_mask = PVC_ENGINES, -- cgit v1.2.3 From ce22dece001d6dfedbff0b63596e9aaa5b5ae78b Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 6 Apr 2023 16:56:15 -0700 Subject: drm/xe: Move most platform traits to graphics IP Most of the traits currently in the device descriptor structures are either tied to the graphics IP or should be inferred from the graphics IP. This becomes important on MTL and beyond where IP versions are supposed to be detected from the hardware's GMD_ID registers rather than mapped from PCI devid. Engine masks are left where they are for now; they'll be dealt with separately in a future patch. Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230406235621.1914492-4-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pci.c | 99 ++++++++++++++++++--------------------- drivers/gpu/drm/xe/xe_pci_types.h | 11 +++++ 2 files changed, 56 insertions(+), 54 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 06c194ab56dd..b981b1e62bfa 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -53,24 +53,18 @@ struct xe_device_desc { const struct xe_subplatform_desc *subplatforms; const struct xe_gt_desc *extra_gts; - u8 dma_mask_size; /* available DMA address bits */ - u8 gt; /* GT number, 0 if undefined */ #define DEFINE_FLAG(name) u8 name:1 DEV_INFO_FOR_EACH_FLAG(DEFINE_FLAG); #undef DEFINE_FLAG - u8 vram_flags; - u8 max_tiles; - u8 vm_max_level; - - bool supports_usm; - bool has_flat_ccs; + /* + * FIXME: Xe doesn't care about presence/lack of 4tile since we can + * already determine that from the graphics IP version. This flag + * should eventually move entirely into the display code's own logic. + */ bool has_4tile; - bool has_range_tlb_invalidation; - bool has_asid; - bool has_link_copy_engine; }; #define PLATFORM(x) \ @@ -82,26 +76,57 @@ struct xe_device_desc { static const struct xe_graphics_desc graphics_xelp = { .ver = 12, .rel = 0, + + .dma_mask_size = 39, + .vm_max_level = 3, }; static const struct xe_graphics_desc graphics_xelpp = { .ver = 12, .rel = 10, + + .dma_mask_size = 39, + .vm_max_level = 3, }; +#define XE_HP_FEATURES \ + .has_range_tlb_invalidation = true, \ + .has_flat_ccs = true, \ + .dma_mask_size = 46, \ + .vm_max_level = 3 + static const struct xe_graphics_desc graphics_xehpg = { .ver = 12, .rel = 55, + + XE_HP_FEATURES, + .vram_flags = XE_VRAM_FLAGS_NEED64K, }; static const struct xe_graphics_desc graphics_xehpc = { .ver = 12, .rel = 60, + + XE_HP_FEATURES, + .dma_mask_size = 52, + .max_tiles = 2, + .vm_max_level = 4, + .vram_flags = XE_VRAM_FLAGS_NEED64K, + + .has_asid = 1, + .has_flat_ccs = 0, + .has_link_copy_engine = 1, + .supports_usm = 1, }; static const struct xe_graphics_desc graphics_xelpg = { .ver = 12, .rel = 70, + + XE_HP_FEATURES, + .max_tiles = 2, + + .has_flat_ccs = 0, }; static const struct xe_media_desc media_xem = { @@ -119,17 +144,9 @@ static const struct xe_media_desc media_xelpmp = { .rel = 0, }; -/* Keep in gen based order, and chronological order within a gen */ -#define GEN12_FEATURES \ - .dma_mask_size = 39, \ - .max_tiles = 1, \ - .vm_max_level = 3, \ - .vram_flags = 0 - static const struct xe_device_desc tgl_desc = { .graphics = &graphics_xelp, .media = &media_xem, - GEN12_FEATURES, PLATFORM(XE_TIGERLAKE), .require_force_probe = true, .platform_engine_mask = @@ -141,7 +158,6 @@ static const struct xe_device_desc tgl_desc = { static const struct xe_device_desc adl_s_desc = { .graphics = &graphics_xelp, .media = &media_xem, - GEN12_FEATURES, PLATFORM(XE_ALDERLAKE_S), .require_force_probe = true, .platform_engine_mask = @@ -155,7 +171,6 @@ static const u16 adlp_rplu_ids[] = { XE_RPLU_IDS(NOP), 0 }; static const struct xe_device_desc adl_p_desc = { .graphics = &graphics_xelp, .media = &media_xem, - GEN12_FEATURES, PLATFORM(XE_ALDERLAKE_P), .require_force_probe = true, .platform_engine_mask = @@ -174,7 +189,6 @@ static const struct xe_device_desc adl_p_desc = { static const struct xe_device_desc dg1_desc = { .graphics = &graphics_xelpp, .media = &media_xem, - GEN12_FEATURES, DGFX_FEATURES, PLATFORM(XE_DG1), .require_force_probe = true, @@ -184,13 +198,6 @@ static const struct xe_device_desc dg1_desc = { BIT(XE_HW_ENGINE_VCS2), }; -#define XE_HP_FEATURES \ - .has_range_tlb_invalidation = true, \ - .has_flat_ccs = true, \ - .dma_mask_size = 46, \ - .max_tiles = 1, \ - .vm_max_level = 3 - static const u16 dg2_g10_ids[] = { XE_DG2_G10_IDS(NOP), XE_ATS_M150_IDS(NOP), 0 }; static const u16 dg2_g11_ids[] = { XE_DG2_G11_IDS(NOP), XE_ATS_M75_IDS(NOP), 0 }; static const u16 dg2_g12_ids[] = { XE_DG2_G12_IDS(NOP), 0 }; @@ -210,14 +217,12 @@ static const u16 dg2_g12_ids[] = { XE_DG2_G12_IDS(NOP), 0 }; BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) | \ BIT(XE_HW_ENGINE_CCS0) | BIT(XE_HW_ENGINE_CCS1) | \ BIT(XE_HW_ENGINE_CCS2) | BIT(XE_HW_ENGINE_CCS3), \ - .vram_flags = XE_VRAM_FLAGS_NEED64K, \ .has_4tile = 1 static const struct xe_device_desc ats_m_desc = { .graphics = &graphics_xehpg, .media = &media_xehpm, .require_force_probe = true, - XE_HP_FEATURES, DG2_FEATURES, }; @@ -226,7 +231,6 @@ static const struct xe_device_desc dg2_desc = { .graphics = &graphics_xehpg, .media = &media_xehpm, .require_force_probe = true, - XE_HP_FEATURES, DG2_FEATURES, }; @@ -254,20 +258,11 @@ static const struct xe_gt_desc pvc_gts[] = { static const __maybe_unused struct xe_device_desc pvc_desc = { .graphics = &graphics_xehpc, - XE_HP_FEATURES, DGFX_FEATURES, PLATFORM(XE_PVC), .require_force_probe = true, .extra_gts = pvc_gts, - .has_flat_ccs = 0, .platform_engine_mask = PVC_ENGINES, - .vram_flags = XE_VRAM_FLAGS_NEED64K, - .dma_mask_size = 52, - .max_tiles = 2, - .vm_max_level = 4, - .supports_usm = true, - .has_asid = true, - .has_link_copy_engine = true, }; #define MTL_MEDIA_ENGINES \ @@ -296,10 +291,6 @@ static const struct xe_device_desc mtl_desc = { .graphics = &graphics_xelpg, .media = &media_xelpmp, .require_force_probe = true, - .dma_mask_size = 46, - .max_tiles = 2, - .vm_max_level = 3, - .has_range_tlb_invalidation = true, PLATFORM(XE_METEORLAKE), .extra_gts = xelpmp_gts, .platform_engine_mask = MTL_MAIN_ENGINES, @@ -409,16 +400,16 @@ static void xe_info_init(struct xe_device *xe, desc->media->rel; xe->info.is_dgfx = desc->is_dgfx; xe->info.platform = desc->platform; - xe->info.dma_mask_size = desc->dma_mask_size; - xe->info.vram_flags = desc->vram_flags; - xe->info.tile_count = desc->max_tiles; - xe->info.vm_max_level = desc->vm_max_level; - xe->info.supports_usm = desc->supports_usm; - xe->info.has_asid = desc->has_asid; - xe->info.has_flat_ccs = desc->has_flat_ccs; + xe->info.dma_mask_size = desc->graphics->dma_mask_size; + xe->info.vram_flags = desc->graphics->vram_flags; + xe->info.tile_count = desc->graphics->max_tiles ?: 1; + xe->info.vm_max_level = desc->graphics->vm_max_level; + xe->info.supports_usm = desc->graphics->supports_usm; + xe->info.has_asid = desc->graphics->has_asid; + xe->info.has_flat_ccs = desc->graphics->has_flat_ccs; xe->info.has_4tile = desc->has_4tile; - xe->info.has_range_tlb_invalidation = desc->has_range_tlb_invalidation; - xe->info.has_link_copy_engine = desc->has_link_copy_engine; + xe->info.has_range_tlb_invalidation = desc->graphics->has_range_tlb_invalidation; + xe->info.has_link_copy_engine = desc->graphics->has_link_copy_engine; xe->info.subplatform = subplatform_desc ? subplatform_desc->subplatform : XE_SUBPLATFORM_NONE; diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h index cc372694ecca..e1749ceee9e0 100644 --- a/drivers/gpu/drm/xe/xe_pci_types.h +++ b/drivers/gpu/drm/xe/xe_pci_types.h @@ -11,6 +11,17 @@ struct xe_graphics_desc { u8 ver; u8 rel; + + u8 dma_mask_size; /* available DMA address bits */ + u8 max_tiles; /* defaults to 1 if unset */ + u8 vm_max_level; + u8 vram_flags; + + u8 has_asid:1; + u8 has_flat_ccs:1; + u8 has_link_copy_engine:1; + u8 has_range_tlb_invalidation:1; + u8 supports_usm:1; }; struct xe_media_desc { -- cgit v1.2.3 From 33b270d9392825874c4e484e8652dad2cf901c97 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 6 Apr 2023 16:56:16 -0700 Subject: drm/xe: Move engine masks into IP descriptor structures Break the top-level platform_engine_mask field into separate hw_engine_mask fields in the graphics and media structures. Since hardware has more flexibility to mix-and-match IP versions going forward, this allows each IP to list exactly which engines it provides; the final per-GT engine list can then be constructured from those: * On platforms without a standalone media GT (i.e., media IP versions prior to 13), the primary GT's engine list is the union of the graphics IP's engine list and the media IP's engine list. * Otherwise, GT0's engine list is the graphics IP's engine list. * For GT1 and beyond, the type of GT determines which IP's engine list is used. Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230406235621.1914492-5-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pci.c | 93 ++++++++++++++++++--------------------- drivers/gpu/drm/xe/xe_pci_types.h | 4 ++ 2 files changed, 46 insertions(+), 51 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index b981b1e62bfa..198cae9c5116 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -37,7 +37,6 @@ struct xe_subplatform_desc { struct xe_gt_desc { enum xe_gt_type type; u8 vram_id; - u64 engine_mask; u32 mmio_adj_limit; u32 mmio_adj_offset; }; @@ -46,8 +45,6 @@ struct xe_device_desc { const struct xe_graphics_desc *graphics; const struct xe_media_desc *media; - u64 platform_engine_mask; /* Engines supported by the HW */ - enum xe_platform platform; const char *platform_name; const struct xe_subplatform_desc *subplatforms; @@ -77,6 +74,8 @@ static const struct xe_graphics_desc graphics_xelp = { .ver = 12, .rel = 0, + .hw_engine_mask = BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0), + .dma_mask_size = 39, .vm_max_level = 3, }; @@ -85,6 +84,8 @@ static const struct xe_graphics_desc graphics_xelpp = { .ver = 12, .rel = 10, + .hw_engine_mask = BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0), + .dma_mask_size = 39, .vm_max_level = 3, }; @@ -99,6 +100,11 @@ static const struct xe_graphics_desc graphics_xehpg = { .ver = 12, .rel = 55, + .hw_engine_mask = + BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) | + BIT(XE_HW_ENGINE_CCS0) | BIT(XE_HW_ENGINE_CCS1) | + BIT(XE_HW_ENGINE_CCS2) | BIT(XE_HW_ENGINE_CCS3), + XE_HP_FEATURES, .vram_flags = XE_VRAM_FLAGS_NEED64K, }; @@ -107,6 +113,15 @@ static const struct xe_graphics_desc graphics_xehpc = { .ver = 12, .rel = 60, + .hw_engine_mask = + BIT(XE_HW_ENGINE_BCS0) | BIT(XE_HW_ENGINE_BCS1) | + BIT(XE_HW_ENGINE_BCS2) | BIT(XE_HW_ENGINE_BCS3) | + BIT(XE_HW_ENGINE_BCS4) | BIT(XE_HW_ENGINE_BCS5) | + BIT(XE_HW_ENGINE_BCS6) | BIT(XE_HW_ENGINE_BCS7) | + BIT(XE_HW_ENGINE_BCS8) | + BIT(XE_HW_ENGINE_CCS0) | BIT(XE_HW_ENGINE_CCS1) | + BIT(XE_HW_ENGINE_CCS2) | BIT(XE_HW_ENGINE_CCS3), + XE_HP_FEATURES, .dma_mask_size = 52, .max_tiles = 2, @@ -123,6 +138,10 @@ static const struct xe_graphics_desc graphics_xelpg = { .ver = 12, .rel = 70, + .hw_engine_mask = + BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) | + BIT(XE_HW_ENGINE_CCS0), + XE_HP_FEATURES, .max_tiles = 2, @@ -132,16 +151,28 @@ static const struct xe_graphics_desc graphics_xelpg = { static const struct xe_media_desc media_xem = { .ver = 12, .rel = 0, + + .hw_engine_mask = + BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) | + BIT(XE_HW_ENGINE_VECS0), }; static const struct xe_media_desc media_xehpm = { .ver = 12, .rel = 55, + + .hw_engine_mask = + BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) | + BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VECS1), }; static const struct xe_media_desc media_xelpmp = { .ver = 13, .rel = 0, + + .hw_engine_mask = + BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) | + BIT(XE_HW_ENGINE_VECS0), /* TODO: add GSC0 */ }; static const struct xe_device_desc tgl_desc = { @@ -149,10 +180,6 @@ static const struct xe_device_desc tgl_desc = { .media = &media_xem, PLATFORM(XE_TIGERLAKE), .require_force_probe = true, - .platform_engine_mask = - BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) | - BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VCS0) | - BIT(XE_HW_ENGINE_VCS2), }; static const struct xe_device_desc adl_s_desc = { @@ -160,10 +187,6 @@ static const struct xe_device_desc adl_s_desc = { .media = &media_xem, PLATFORM(XE_ALDERLAKE_S), .require_force_probe = true, - .platform_engine_mask = - BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) | - BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VCS0) | - BIT(XE_HW_ENGINE_VCS2), }; static const u16 adlp_rplu_ids[] = { XE_RPLU_IDS(NOP), 0 }; @@ -173,10 +196,6 @@ static const struct xe_device_desc adl_p_desc = { .media = &media_xem, PLATFORM(XE_ALDERLAKE_P), .require_force_probe = true, - .platform_engine_mask = - BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) | - BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VCS0) | - BIT(XE_HW_ENGINE_VCS2), .subplatforms = (const struct xe_subplatform_desc[]) { { XE_SUBPLATFORM_ADLP_RPLU, "RPLU", adlp_rplu_ids }, {}, @@ -192,10 +211,6 @@ static const struct xe_device_desc dg1_desc = { DGFX_FEATURES, PLATFORM(XE_DG1), .require_force_probe = true, - .platform_engine_mask = - BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) | - BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VCS0) | - BIT(XE_HW_ENGINE_VCS2), }; static const u16 dg2_g10_ids[] = { XE_DG2_G10_IDS(NOP), XE_ATS_M150_IDS(NOP), 0 }; @@ -211,12 +226,6 @@ static const u16 dg2_g12_ids[] = { XE_DG2_G12_IDS(NOP), 0 }; { XE_SUBPLATFORM_DG2_G12, "G12", dg2_g12_ids }, \ { } \ }, \ - .platform_engine_mask = \ - BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) | \ - BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VECS1) | \ - BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) | \ - BIT(XE_HW_ENGINE_CCS0) | BIT(XE_HW_ENGINE_CCS1) | \ - BIT(XE_HW_ENGINE_CCS2) | BIT(XE_HW_ENGINE_CCS3), \ .has_4tile = 1 static const struct xe_device_desc ats_m_desc = { @@ -235,22 +244,10 @@ static const struct xe_device_desc dg2_desc = { DG2_FEATURES, }; -#define PVC_ENGINES \ - BIT(XE_HW_ENGINE_BCS0) | BIT(XE_HW_ENGINE_BCS1) | \ - BIT(XE_HW_ENGINE_BCS2) | BIT(XE_HW_ENGINE_BCS3) | \ - BIT(XE_HW_ENGINE_BCS4) | BIT(XE_HW_ENGINE_BCS5) | \ - BIT(XE_HW_ENGINE_BCS6) | BIT(XE_HW_ENGINE_BCS7) | \ - BIT(XE_HW_ENGINE_BCS8) | \ - BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS1) | \ - BIT(XE_HW_ENGINE_VCS2) | \ - BIT(XE_HW_ENGINE_CCS0) | BIT(XE_HW_ENGINE_CCS1) | \ - BIT(XE_HW_ENGINE_CCS2) | BIT(XE_HW_ENGINE_CCS3) - static const struct xe_gt_desc pvc_gts[] = { { .type = XE_GT_TYPE_REMOTE, .vram_id = 1, - .engine_mask = PVC_ENGINES, .mmio_adj_limit = 0, .mmio_adj_offset = 0, }, @@ -262,27 +259,17 @@ static const __maybe_unused struct xe_device_desc pvc_desc = { PLATFORM(XE_PVC), .require_force_probe = true, .extra_gts = pvc_gts, - .platform_engine_mask = PVC_ENGINES, }; -#define MTL_MEDIA_ENGINES \ - BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) | \ - BIT(XE_HW_ENGINE_VECS0) /* TODO: GSC0 */ - static const struct xe_gt_desc xelpmp_gts[] = { { .type = XE_GT_TYPE_MEDIA, .vram_id = 0, - .engine_mask = MTL_MEDIA_ENGINES, .mmio_adj_limit = 0x40000, .mmio_adj_offset = 0x380000, }, }; -#define MTL_MAIN_ENGINES \ - BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) | \ - BIT(XE_HW_ENGINE_CCS0) - static const struct xe_device_desc mtl_desc = { /* * FIXME: Real graphics/media IP will be mapped from hardware @@ -293,7 +280,6 @@ static const struct xe_device_desc mtl_desc = { .require_force_probe = true, PLATFORM(XE_METEORLAKE), .extra_gts = xelpmp_gts, - .platform_engine_mask = MTL_MAIN_ENGINES, }; #undef PLATFORM @@ -423,14 +409,19 @@ static void xe_info_init(struct xe_device *xe, if (id == 0) { gt->info.type = XE_GT_TYPE_MAIN; gt->info.vram_id = id; - gt->info.__engine_mask = desc->platform_engine_mask; + + gt->info.__engine_mask = desc->graphics->hw_engine_mask; + if (MEDIA_VER(xe) < 13 && desc->media) + gt->info.__engine_mask |= desc->media->hw_engine_mask; + gt->mmio.adj_limit = 0; gt->mmio.adj_offset = 0; } else { gt->info.type = desc->extra_gts[id - 1].type; gt->info.vram_id = desc->extra_gts[id - 1].vram_id; - gt->info.__engine_mask = - desc->extra_gts[id - 1].engine_mask; + gt->info.__engine_mask = (gt->info.type == XE_GT_TYPE_MEDIA) ? + desc->media->hw_engine_mask : + desc->graphics->hw_engine_mask; gt->mmio.adj_limit = desc->extra_gts[id - 1].mmio_adj_limit; gt->mmio.adj_offset = diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h index e1749ceee9e0..1b3dff8886ad 100644 --- a/drivers/gpu/drm/xe/xe_pci_types.h +++ b/drivers/gpu/drm/xe/xe_pci_types.h @@ -17,6 +17,8 @@ struct xe_graphics_desc { u8 vm_max_level; u8 vram_flags; + u64 hw_engine_mask; /* hardware engines provided by graphics IP */ + u8 has_asid:1; u8 has_flat_ccs:1; u8 has_link_copy_engine:1; @@ -27,6 +29,8 @@ struct xe_graphics_desc { struct xe_media_desc { u8 ver; u8 rel; + + u64 hw_engine_mask; /* hardware engines provided by media IP */ }; #endif -- cgit v1.2.3 From bd75664b9c3ff1829bc5acfd6789c0094e7bd617 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 6 Apr 2023 16:56:17 -0700 Subject: drm/xe: Clarify GT counting logic The total number of GTs supported by a platform should be one primary GT, one media GT (if media version >= 13), and a number of remote tile GTs dependent on the graphics IP present. Express this more clearly in the device setup. Note that xe->info.tile_count is inaccurately named; the rest of the driver treats this as the GT count, not just the tile count. This will need to be cleaned up at some point down the road. Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230406235621.1914492-6-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pci.c | 18 ++++++++++++++---- drivers/gpu/drm/xe/xe_pci_types.h | 3 ++- 2 files changed, 16 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 198cae9c5116..0697496c26d0 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -124,7 +124,7 @@ static const struct xe_graphics_desc graphics_xehpc = { XE_HP_FEATURES, .dma_mask_size = 52, - .max_tiles = 2, + .max_remote_tiles = 1, .vm_max_level = 4, .vram_flags = XE_VRAM_FLAGS_NEED64K, @@ -143,8 +143,6 @@ static const struct xe_graphics_desc graphics_xelpg = { BIT(XE_HW_ENGINE_CCS0), XE_HP_FEATURES, - .max_tiles = 2, - .has_flat_ccs = 0, }; @@ -388,7 +386,6 @@ static void xe_info_init(struct xe_device *xe, xe->info.platform = desc->platform; xe->info.dma_mask_size = desc->graphics->dma_mask_size; xe->info.vram_flags = desc->graphics->vram_flags; - xe->info.tile_count = desc->graphics->max_tiles ?: 1; xe->info.vm_max_level = desc->graphics->vm_max_level; xe->info.supports_usm = desc->graphics->supports_usm; xe->info.has_asid = desc->graphics->has_asid; @@ -397,6 +394,19 @@ static void xe_info_init(struct xe_device *xe, xe->info.has_range_tlb_invalidation = desc->graphics->has_range_tlb_invalidation; xe->info.has_link_copy_engine = desc->graphics->has_link_copy_engine; + /* + * All platforms have at least one primary GT. Any platform with media + * version 13 or higher has an additional dedicated media GT. And + * depending on the graphics IP there may be additional "remote tiles." + * All of these together determine the overall GT count. + * + * FIXME: 'tile_count' here is misnamed since the rest of the driver + * treats it as the number of GTs rather than just the number of tiles. + */ + xe->info.tile_count = 1 + desc->graphics->max_remote_tiles; + if (MEDIA_VER(xe) >= 13) + xe->info.tile_count++; + xe->info.subplatform = subplatform_desc ? subplatform_desc->subplatform : XE_SUBPLATFORM_NONE; xe->info.step = xe_step_get(xe); diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h index 1b3dff8886ad..f4bc5ef1bce3 100644 --- a/drivers/gpu/drm/xe/xe_pci_types.h +++ b/drivers/gpu/drm/xe/xe_pci_types.h @@ -13,12 +13,13 @@ struct xe_graphics_desc { u8 rel; u8 dma_mask_size; /* available DMA address bits */ - u8 max_tiles; /* defaults to 1 if unset */ u8 vm_max_level; u8 vram_flags; u64 hw_engine_mask; /* hardware engines provided by graphics IP */ + u8 max_remote_tiles:2; + u8 has_asid:1; u8 has_flat_ccs:1; u8 has_link_copy_engine:1; -- cgit v1.2.3 From 9a08b2b935cedec1c563b03999cb37bfbeeb8b22 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 6 Apr 2023 16:56:18 -0700 Subject: drm/xe: Add printable name to IP descriptors Printing the name, along with the IP version number, will help reduce confusion about which IP is present on a platform. Cc: Lucas De Marchi Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230406235621.1914492-7-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device_types.h | 4 ++++ drivers/gpu/drm/xe/xe_pci.c | 21 ++++++++++++++++++--- drivers/gpu/drm/xe/xe_pci_types.h | 2 ++ 3 files changed, 24 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 74326091bf98..87b92f5f078d 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -51,6 +51,10 @@ struct xe_device { /** @info: device info */ struct intel_device_info { + /** @graphics_name: graphics IP name */ + const char *graphics_name; + /** @media_name: media IP name */ + const char *media_name; /** @graphics_verx100: graphics IP version */ u32 graphics_verx100; /** @media_verx100: media IP version */ diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 0697496c26d0..a9233d3a8ff1 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -71,6 +71,7 @@ struct xe_device_desc { #define NOP(x) x static const struct xe_graphics_desc graphics_xelp = { + .name = "Xe_LP", .ver = 12, .rel = 0, @@ -81,6 +82,7 @@ static const struct xe_graphics_desc graphics_xelp = { }; static const struct xe_graphics_desc graphics_xelpp = { + .name = "Xe_LP+", .ver = 12, .rel = 10, @@ -97,6 +99,7 @@ static const struct xe_graphics_desc graphics_xelpp = { .vm_max_level = 3 static const struct xe_graphics_desc graphics_xehpg = { + .name = "Xe_HPG", .ver = 12, .rel = 55, @@ -110,6 +113,7 @@ static const struct xe_graphics_desc graphics_xehpg = { }; static const struct xe_graphics_desc graphics_xehpc = { + .name = "Xe_HPC", .ver = 12, .rel = 60, @@ -135,6 +139,7 @@ static const struct xe_graphics_desc graphics_xehpc = { }; static const struct xe_graphics_desc graphics_xelpg = { + .name = "Xe_LPG", .ver = 12, .rel = 70, @@ -147,6 +152,7 @@ static const struct xe_graphics_desc graphics_xelpg = { }; static const struct xe_media_desc media_xem = { + .name = "Xe_M", .ver = 12, .rel = 0, @@ -156,6 +162,7 @@ static const struct xe_media_desc media_xem = { }; static const struct xe_media_desc media_xehpm = { + .name = "Xe_HPM", .ver = 12, .rel = 55, @@ -165,6 +172,7 @@ static const struct xe_media_desc media_xehpm = { }; static const struct xe_media_desc media_xelpmp = { + .name = "Xe_LPM+", .ver = 13, .rel = 0, @@ -384,6 +392,8 @@ static void xe_info_init(struct xe_device *xe, desc->media->rel; xe->info.is_dgfx = desc->is_dgfx; xe->info.platform = desc->platform; + xe->info.graphics_name = desc->graphics->name; + xe->info.media_name = desc->media ? desc->media->name : "none"; xe->info.dma_mask_size = desc->graphics->dma_mask_size; xe->info.vram_flags = desc->graphics->vram_flags; xe->info.vm_max_level = desc->graphics->vm_max_level; @@ -485,12 +495,17 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) subplatform_desc = find_subplatform(xe, desc); xe_info_init(xe, desc, subplatform_desc); - drm_dbg(&xe->drm, "%s %s %04x:%04x dgfx:%d gfx100:%d media100:%d dma_m_s:%d tc:%d", + drm_dbg(&xe->drm, "%s %s %04x:%04x dgfx:%d gfx:%s (%d.%02d) media:%s (%d.%02d) dma_m_s:%d tc:%d", desc->platform_name, subplatform_desc ? subplatform_desc->name : "", xe->info.devid, xe->info.revid, - xe->info.is_dgfx, xe->info.graphics_verx100, - xe->info.media_verx100, + xe->info.is_dgfx, + xe->info.graphics_name, + xe->info.graphics_verx100 / 100, + xe->info.graphics_verx100 % 100, + xe->info.media_name, + xe->info.media_verx100 / 100, + xe->info.media_verx100 % 100, xe->info.dma_mask_size, xe->info.tile_count); drm_dbg(&xe->drm, "Stepping = (G:%s, M:%s, D:%s, B:%s)\n", diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h index f4bc5ef1bce3..e479c1c4ed30 100644 --- a/drivers/gpu/drm/xe/xe_pci_types.h +++ b/drivers/gpu/drm/xe/xe_pci_types.h @@ -9,6 +9,7 @@ #include struct xe_graphics_desc { + const char *name; u8 ver; u8 rel; @@ -28,6 +29,7 @@ struct xe_graphics_desc { }; struct xe_media_desc { + const char *name; u8 ver; u8 rel; -- cgit v1.2.3 From 5822bba943ad2ecb386e8a27614e753ad7e285fa Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 6 Apr 2023 16:56:19 -0700 Subject: drm/xe: Select graphics/media descriptors from GMD_ID If graphics_desc and media_desc are not specified in a platform's xe_device_desc, treat this as an indication that the IP version should be determined from the hardware's GMD_ID register. Note that leaving media_desc unset for a platform that simply doesn't have the IP (e.g., PVC) is also okay --- a read of the GMD_ID register offset will be attempted, but since there's no register at that location a value of '0' will be returned, effectively disabling media support. Mapping of version -> IP description is done via a table lookup; this table will be re-used in future patches for some KUnit testing. v2: - Drop dummy structures. NULL can be safely used for both the GMD_ID cases and the "media not present case." - Use a table-based lookup of GMD_ID versions rather than a simple switch statement; the table will allow us to easily perform kunit testing of all the IP descriptors. Cc: Lucas De Marchi Cc: Balasubramani Vivekanandan Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230406235621.1914492-8-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 6 ++ drivers/gpu/drm/xe/xe_pci.c | 184 ++++++++++++++++++++++++++++------- drivers/gpu/drm/xe/xe_pci_types.h | 5 + 3 files changed, 159 insertions(+), 36 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 23d3b8f7e349..8fc7677e2d13 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -22,6 +22,12 @@ #define FORCEWAKE_ACK_MEDIA_VDBOX_GEN11(n) _MMIO(0xd50 + (n) * 4) #define FORCEWAKE_ACK_MEDIA_VEBOX_GEN11(n) _MMIO(0xd70 + (n) * 4) #define FORCEWAKE_ACK_RENDER_GEN9 _MMIO(0xd84) + +#define GMD_ID _MMIO(0xd8c) +#define GMD_ID_ARCH_MASK REG_GENMASK(31, 22) +#define GMD_ID_RELEASE_MASK REG_GENMASK(21, 14) +#define GMD_ID_STEP REG_GENMASK(5, 0) + #define FORCEWAKE_ACK_GT_MTL _MMIO(0xdfc) #define GEN9_LNCFCMOCS(i) _MMIO(0xb020 + (i) * 4) /* L3 Cache Control */ diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index a9233d3a8ff1..7dab489cb5e8 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -15,6 +15,7 @@ #include #include "regs/xe_regs.h" +#include "regs/xe_gt_regs.h" #include "xe_device.h" #include "xe_drv.h" #include "xe_macros.h" @@ -140,9 +141,6 @@ static const struct xe_graphics_desc graphics_xehpc = { static const struct xe_graphics_desc graphics_xelpg = { .name = "Xe_LPG", - .ver = 12, - .rel = 70, - .hw_engine_mask = BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) | BIT(XE_HW_ENGINE_CCS0), @@ -173,9 +171,6 @@ static const struct xe_media_desc media_xehpm = { static const struct xe_media_desc media_xelpmp = { .name = "Xe_LPM+", - .ver = 13, - .rel = 0, - .hw_engine_mask = BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) | BIT(XE_HW_ENGINE_VECS0), /* TODO: add GSC0 */ @@ -277,12 +272,7 @@ static const struct xe_gt_desc xelpmp_gts[] = { }; static const struct xe_device_desc mtl_desc = { - /* - * FIXME: Real graphics/media IP will be mapped from hardware - * GMD_ID register. Hardcoded assignments here will go away soon. - */ - .graphics = &graphics_xelpg, - .media = &media_xelpmp, + /* .graphics and .media determined via GMD_ID */ .require_force_probe = true, PLATFORM(XE_METEORLAKE), .extra_gts = xelpmp_gts, @@ -290,6 +280,17 @@ static const struct xe_device_desc mtl_desc = { #undef PLATFORM +/* Map of GMD_ID values to graphics IP */ +static struct gmdid_map graphics_ip_map[] = { + { 1270, &graphics_xelpg }, + { 1271, &graphics_xelpg }, +}; + +/* Map of GMD_ID values to media IP */ +static struct gmdid_map media_ip_map[] = { + { 1300, &media_xelpmp }, +}; + #define INTEL_VGA_DEVICE(id, info) { \ PCI_DEVICE(PCI_VENDOR_ID_INTEL, id), \ PCI_BASE_CLASS_DISPLAY << 16, 0xff << 16, \ @@ -378,31 +379,135 @@ find_subplatform(const struct xe_device *xe, const struct xe_device_desc *desc) return NULL; } -static void xe_info_init(struct xe_device *xe, +static u32 peek_gmdid(struct xe_device *xe, u32 gmdid_offset) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + void __iomem *map = pci_iomap_range(pdev, 0, gmdid_offset, sizeof(u32)); + u32 ver; + + if (!map) { + drm_err(&xe->drm, "Failed to read GMD_ID (%#x) from PCI BAR.\n", + gmdid_offset); + return 0; + } + + ver = ioread32(map); + pci_iounmap(pdev, map); + + return REG_FIELD_GET(GMD_ID_ARCH_MASK, ver) * 100 + + REG_FIELD_GET(GMD_ID_RELEASE_MASK, ver); +} + +static void handle_gmdid(struct xe_device *xe, const struct xe_device_desc *desc, - const struct xe_subplatform_desc *subplatform_desc) + const struct xe_graphics_desc **graphics, + const struct xe_media_desc **media) +{ + u32 ver; + + if (desc->graphics) { + /* + * Pre-GMD_ID platform; device descriptor already points to + * the appropriate graphics descriptor. + */ + *graphics = desc->graphics; + xe->info.graphics_verx100 = (*graphics)->ver * 100 + (*graphics)->rel; + } else { + /* + * GMD_ID platform; read IP version from hardware and select + * graphics descriptor based on the result. + */ + ver = peek_gmdid(xe, GMD_ID.reg); + for (int i = 0; i < ARRAY_SIZE(graphics_ip_map); i++) { + if (ver == graphics_ip_map[i].ver) { + xe->info.graphics_verx100 = ver; + *graphics = graphics_ip_map[i].ip; + + break; + } + } + + if (!xe->info.graphics_verx100) { + drm_err(&xe->drm, "Hardware reports unknown graphics version %u.%02u\n", + ver / 100, ver % 100); + } + } + + if (desc->media) { + /* + * Pre-GMD_ID platform; device descriptor already points to + * the appropriate media descriptor. + */ + *media = desc->media; + xe->info.media_verx100 = (*media)->ver * 100 + (*media)->rel; + } else { + /* + * GMD_ID platform; read IP version from hardware and select + * media descriptor based on the result. + * + * desc->media can also be NULL for a pre-GMD_ID platform that + * simply doesn't have media (e.g., PVC); in that case the + * attempt to read GMD_ID will return 0 (since there's no + * register at that location). + */ + ver = peek_gmdid(xe, GMD_ID.reg + 0x380000); + if (ver == 0) + return; + + for (int i = 0; i < ARRAY_SIZE(media_ip_map); i++) { + if (ver == media_ip_map[i].ver) { + xe->info.media_verx100 = ver; + *media = media_ip_map[i].ip; + + break; + } + } + + if (!xe->info.media_verx100) { + drm_err(&xe->drm, "Hardware reports unknown media version %u.%02u\n", + ver / 100, ver % 100); + } + } +} + + +static int xe_info_init(struct xe_device *xe, + const struct xe_device_desc *desc, + const struct xe_subplatform_desc *subplatform_desc) { + const struct xe_graphics_desc *graphics_desc = NULL; + const struct xe_media_desc *media_desc = NULL; struct xe_gt *gt; u8 id; - xe->info.graphics_verx100 = desc->graphics->ver * 100 + - desc->graphics->rel; - if (desc->media) - xe->info.media_verx100 = desc->media->ver * 100 + - desc->media->rel; + /* + * If this platform supports GMD_ID, we'll detect the proper IP + * descriptor to use from hardware registers. + */ + handle_gmdid(xe, desc, &graphics_desc, &media_desc); + + /* + * If we couldn't detect the graphics IP, that's considered a fatal + * error and we should abort driver load. Failing to detect media + * IP is non-fatal; we'll just proceed without enabling media support. + */ + if (!graphics_desc) + return -ENODEV; + xe->info.is_dgfx = desc->is_dgfx; xe->info.platform = desc->platform; - xe->info.graphics_name = desc->graphics->name; - xe->info.media_name = desc->media ? desc->media->name : "none"; - xe->info.dma_mask_size = desc->graphics->dma_mask_size; - xe->info.vram_flags = desc->graphics->vram_flags; - xe->info.vm_max_level = desc->graphics->vm_max_level; - xe->info.supports_usm = desc->graphics->supports_usm; - xe->info.has_asid = desc->graphics->has_asid; - xe->info.has_flat_ccs = desc->graphics->has_flat_ccs; + xe->info.graphics_name = graphics_desc->name; + xe->info.media_name = media_desc ? media_desc->name : "none"; xe->info.has_4tile = desc->has_4tile; - xe->info.has_range_tlb_invalidation = desc->graphics->has_range_tlb_invalidation; - xe->info.has_link_copy_engine = desc->graphics->has_link_copy_engine; + + xe->info.dma_mask_size = graphics_desc->dma_mask_size; + xe->info.vram_flags = graphics_desc->vram_flags; + xe->info.vm_max_level = graphics_desc->vm_max_level; + xe->info.supports_usm = graphics_desc->supports_usm; + xe->info.has_asid = graphics_desc->has_asid; + xe->info.has_flat_ccs = graphics_desc->has_flat_ccs; + xe->info.has_range_tlb_invalidation = graphics_desc->has_range_tlb_invalidation; + xe->info.has_link_copy_engine = graphics_desc->has_link_copy_engine; /* * All platforms have at least one primary GT. Any platform with media @@ -413,7 +518,7 @@ static void xe_info_init(struct xe_device *xe, * FIXME: 'tile_count' here is misnamed since the rest of the driver * treats it as the number of GTs rather than just the number of tiles. */ - xe->info.tile_count = 1 + desc->graphics->max_remote_tiles; + xe->info.tile_count = 1 + graphics_desc->max_remote_tiles; if (MEDIA_VER(xe) >= 13) xe->info.tile_count++; @@ -430,9 +535,9 @@ static void xe_info_init(struct xe_device *xe, gt->info.type = XE_GT_TYPE_MAIN; gt->info.vram_id = id; - gt->info.__engine_mask = desc->graphics->hw_engine_mask; - if (MEDIA_VER(xe) < 13 && desc->media) - gt->info.__engine_mask |= desc->media->hw_engine_mask; + gt->info.__engine_mask = graphics_desc->hw_engine_mask; + if (MEDIA_VER(xe) < 13 && media_desc) + gt->info.__engine_mask |= media_desc->hw_engine_mask; gt->mmio.adj_limit = 0; gt->mmio.adj_offset = 0; @@ -440,14 +545,16 @@ static void xe_info_init(struct xe_device *xe, gt->info.type = desc->extra_gts[id - 1].type; gt->info.vram_id = desc->extra_gts[id - 1].vram_id; gt->info.__engine_mask = (gt->info.type == XE_GT_TYPE_MEDIA) ? - desc->media->hw_engine_mask : - desc->graphics->hw_engine_mask; + media_desc->hw_engine_mask : + graphics_desc->hw_engine_mask; gt->mmio.adj_limit = desc->extra_gts[id - 1].mmio_adj_limit; gt->mmio.adj_offset = desc->extra_gts[id - 1].mmio_adj_offset; } } + + return 0; } static void xe_pci_remove(struct pci_dev *pdev) @@ -494,7 +601,12 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) subplatform_desc = find_subplatform(xe, desc); - xe_info_init(xe, desc, subplatform_desc); + err = xe_info_init(xe, desc, subplatform_desc); + if (err) { + drm_dev_put(&xe->drm); + return err; + } + drm_dbg(&xe->drm, "%s %s %04x:%04x dgfx:%d gfx:%s (%d.%02d) media:%s (%d.%02d) dma_m_s:%d tc:%d", desc->platform_name, subplatform_desc ? subplatform_desc->name : "", diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h index e479c1c4ed30..ba31b933eb8e 100644 --- a/drivers/gpu/drm/xe/xe_pci_types.h +++ b/drivers/gpu/drm/xe/xe_pci_types.h @@ -36,4 +36,9 @@ struct xe_media_desc { u64 hw_engine_mask; /* hardware engines provided by media IP */ }; +struct gmdid_map { + unsigned int ver; + const void *ip; +}; + #endif -- cgit v1.2.3 From 3713ed52ef2bc9272afdd195fe24b011a4dcd44d Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 6 Apr 2023 16:56:20 -0700 Subject: drm/xe: Add KUnit test for xe_pci.c IP engine lists Add a simple KUnit test to ensure that the hardware engine lists for GMD_ID IP definitions are sensible (i.e., no graphics engines defined for the media IP and vice versa). Only the IP descriptors for GMD_ID platforms are checked for now. Presumably the engine lists on older pre-GMD_ID platforms shouldn't be changing. We can extend the KUnit testing in the future if we decide we want to check those as well. v2: - Add missing 'const' in xe_call_for_each_media_ip to avoid compiler warning. Cc: Lucas De Marchi Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230406235621.1914492-9-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/Makefile | 1 + drivers/gpu/drm/xe/tests/xe_pci.c | 44 ++++++++++++++++++++ drivers/gpu/drm/xe/tests/xe_pci_test.c | 74 ++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/tests/xe_pci_test.h | 6 +++ 4 files changed, 125 insertions(+) create mode 100644 drivers/gpu/drm/xe/tests/xe_pci_test.c (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/Makefile b/drivers/gpu/drm/xe/tests/Makefile index 56919abb3f2a..51f1a7f017d4 100644 --- a/drivers/gpu/drm/xe/tests/Makefile +++ b/drivers/gpu/drm/xe/tests/Makefile @@ -4,5 +4,6 @@ obj-$(CONFIG_DRM_XE_KUNIT_TEST) += \ xe_bo_test.o \ xe_dma_buf_test.o \ xe_migrate_test.o \ + xe_pci_test.o \ xe_rtp_test.o \ xe_wa_test.o diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c index cc65ac5657b3..2178ad71c0da 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci.c +++ b/drivers/gpu/drm/xe/tests/xe_pci.c @@ -62,6 +62,50 @@ int xe_call_for_each_device(xe_device_fn xe_fn) return ret; } +/** + * xe_call_for_each_graphics_ip - Iterate over all recognized graphics IPs + * @xe_fn: Function to call for each device. + * + * This function iterates over the descriptors for all graphics IPs recognized + * by the driver and calls @xe_fn: for each one of them. + */ +void xe_call_for_each_graphics_ip(xe_graphics_fn xe_fn) +{ + const struct xe_graphics_desc *ip, *last = NULL; + + for (int i = 0; i < ARRAY_SIZE(graphics_ip_map); i++) { + ip = graphics_ip_map[i].ip; + if (ip == last) + continue; + + xe_fn(ip); + last = ip; + } +} +EXPORT_SYMBOL_IF_KUNIT(xe_call_for_each_graphics_ip); + +/** + * xe_call_for_each_media_ip - Iterate over all recognized media IPs + * @xe_fn: Function to call for each device. + * + * This function iterates over the descriptors for all media IPs recognized + * by the driver and calls @xe_fn: for each one of them. + */ +void xe_call_for_each_media_ip(xe_media_fn xe_fn) +{ + const struct xe_media_desc *ip, *last = NULL; + + for (int i = 0; i < ARRAY_SIZE(media_ip_map); i++) { + ip = media_ip_map[i].ip; + if (ip == last) + continue; + + xe_fn(ip); + last = ip; + } +} +EXPORT_SYMBOL_IF_KUNIT(xe_call_for_each_media_ip); + int xe_pci_fake_device_init(struct xe_device *xe, enum xe_platform platform, enum xe_subplatform subplatform) { diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.c b/drivers/gpu/drm/xe/tests/xe_pci_test.c new file mode 100644 index 000000000000..9c6f6c2c6c6e --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_pci_test.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright © 2023 Intel Corporation + */ + +#include +#include + +#include + +#include "tests/xe_test.h" + +#include "xe_device.h" +#include "xe_pci_test.h" +#include "xe_pci_types.h" + +static void check_graphics_ip(const struct xe_graphics_desc *graphics) +{ + struct kunit *test = xe_cur_kunit(); + u64 mask = graphics->hw_engine_mask; + + /* RCS, CCS, and BCS engines are allowed on the graphics IP */ + mask &= ~(XE_HW_ENGINE_RCS_MASK | + XE_HW_ENGINE_CCS_MASK | + XE_HW_ENGINE_BCS_MASK); + + /* Any remaining engines are an error */ + KUNIT_ASSERT_EQ(test, mask, 0); +} + +static void check_media_ip(const struct xe_media_desc *media) +{ + struct kunit *test = xe_cur_kunit(); + u64 mask = media->hw_engine_mask; + + /* + * VCS and VECS engines are allowed on the media IP + * + * TODO: Add GSCCS once support is added to the driver. + */ + mask &= ~(XE_HW_ENGINE_VCS_MASK | + XE_HW_ENGINE_VECS_MASK); + + /* Any remaining engines are an error */ + KUNIT_ASSERT_EQ(test, mask, 0); +} + +static void xe_gmdid_graphics_ip(struct kunit *test) +{ + xe_call_for_each_graphics_ip(check_graphics_ip); +} + +static void xe_gmdid_media_ip(struct kunit *test) +{ + xe_call_for_each_media_ip(check_media_ip); +} + +static struct kunit_case xe_pci_tests[] = { + KUNIT_CASE(xe_gmdid_graphics_ip), + KUNIT_CASE(xe_gmdid_media_ip), + {} +}; + +static struct kunit_suite xe_pci_test_suite = { + .name = "xe_pci", + .test_cases = xe_pci_tests, +}; + +kunit_test_suite(xe_pci_test_suite); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); + diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.h b/drivers/gpu/drm/xe/tests/xe_pci_test.h index 43294e8c62bb..cc0f1d141a4d 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci_test.h +++ b/drivers/gpu/drm/xe/tests/xe_pci_test.h @@ -9,6 +9,8 @@ #include "xe_platform_types.h" struct xe_device; +struct xe_graphics_desc; +struct xe_media_desc; /* * Some defines just for clarity: these mean the test doesn't care about what @@ -18,8 +20,12 @@ struct xe_device; #define XE_TEST_SUBPLATFORM_ANY XE_SUBPLATFORM_UNINITIALIZED typedef int (*xe_device_fn)(struct xe_device *); +typedef void (*xe_graphics_fn)(const struct xe_graphics_desc *); +typedef void (*xe_media_fn)(const struct xe_media_desc *); int xe_call_for_each_device(xe_device_fn xe_fn); +void xe_call_for_each_graphics_ip(xe_graphics_fn xe_fn); +void xe_call_for_each_media_ip(xe_media_fn xe_fn); int xe_pci_fake_device_init(struct xe_device *xe, enum xe_platform platform, enum xe_subplatform subplatform); -- cgit v1.2.3 From 21cc8aadddf9feca921389beafaad40224f8d219 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 6 Apr 2023 16:56:21 -0700 Subject: drm/xe: Clean up xe_device_desc Now that most of the characteristics of a device are associated with the graphics and media IPs, the remaining contents of xe_device_desc can be cleaned up a bit: * 'gt' is unused; drop it * DEV_INFO_FOR_EACH_FLAG only covers two flags and is only used in this one file; drop the unnecessary macro complexity * Convert .has_4tile to a single bitfield bit so that it can be packed with the other feature flags * Move 'platform' lower in the structure for better packing Cc: Lucas De Marchi Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230406235621.1914492-10-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pci.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 7dab489cb5e8..2524ee1c73e3 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -24,11 +24,6 @@ #include "xe_pm.h" #include "xe_step.h" -#define DEV_INFO_FOR_EACH_FLAG(func) \ - func(require_force_probe); \ - func(is_dgfx); \ - /* Keep has_* in alphabetical order */ \ - struct xe_subplatform_desc { enum xe_subplatform subplatform; const char *name; @@ -46,23 +41,20 @@ struct xe_device_desc { const struct xe_graphics_desc *graphics; const struct xe_media_desc *media; - enum xe_platform platform; const char *platform_name; const struct xe_subplatform_desc *subplatforms; const struct xe_gt_desc *extra_gts; - u8 gt; /* GT number, 0 if undefined */ - -#define DEFINE_FLAG(name) u8 name:1 - DEV_INFO_FOR_EACH_FLAG(DEFINE_FLAG); -#undef DEFINE_FLAG + enum xe_platform platform; + u8 require_force_probe:1; + u8 is_dgfx:1; /* * FIXME: Xe doesn't care about presence/lack of 4tile since we can * already determine that from the graphics IP version. This flag * should eventually move entirely into the display code's own logic. */ - bool has_4tile; + u8 has_4tile:1; }; #define PLATFORM(x) \ -- cgit v1.2.3 From 36919ebeaacab3409c8266248221f392ee7ea9d8 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 6 Apr 2023 16:18:45 +0100 Subject: drm/xe: fix suspend-resume for dgfx This stopped working now that TTM treats moving a pinned object through ttm_bo_validate() as an error, for the general case. Add some new routines to handle the new special casing needed for suspend-resume. Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/244 Signed-off-by: Matthew Auld Cc: Matthew Brost Reviewed-by: Rodrigo Vivi Tested-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 129 +++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_bo.h | 3 + drivers/gpu/drm/xe/xe_bo_evict.c | 8 +-- 3 files changed, 136 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 1835f049c21e..9a565203deac 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -696,6 +696,135 @@ out: } +/** + * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory + * @bo: The buffer object to move. + * + * On successful completion, the object memory will be moved to sytem memory. + * This function blocks until the object has been fully moved. + * + * This is needed to for special handling of pinned VRAM object during + * suspend-resume. + * + * Return: 0 on success. Negative error code on failure. + */ +int xe_bo_evict_pinned(struct xe_bo *bo) +{ + struct ttm_place place = { + .mem_type = XE_PL_TT, + }; + struct ttm_placement placement = { + .placement = &place, + .num_placement = 1, + }; + struct ttm_operation_ctx ctx = { + .interruptible = false, + }; + struct ttm_resource *new_mem; + int ret; + + xe_bo_assert_held(bo); + + if (WARN_ON(!bo->ttm.resource)) + return -EINVAL; + + if (WARN_ON(!xe_bo_is_pinned(bo))) + return -EINVAL; + + if (WARN_ON(!xe_bo_is_vram(bo))) + return -EINVAL; + + ret = ttm_bo_mem_space(&bo->ttm, &placement, &new_mem, &ctx); + if (ret) + return ret; + + if (!bo->ttm.ttm) { + bo->ttm.ttm = xe_ttm_tt_create(&bo->ttm, 0); + if (!bo->ttm.ttm) { + ret = -ENOMEM; + goto err_res_free; + } + } + + ret = ttm_tt_populate(bo->ttm.bdev, bo->ttm.ttm, &ctx); + if (ret) + goto err_res_free; + + ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1); + if (ret) + goto err_res_free; + + ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL); + if (ret) + goto err_res_free; + + dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL, + false, MAX_SCHEDULE_TIMEOUT); + + return 0; + +err_res_free: + ttm_resource_free(&bo->ttm, &new_mem); + return ret; +} + +/** + * xe_bo_restore_pinned() - Restore a pinned VRAM object + * @bo: The buffer object to move. + * + * On successful completion, the object memory will be moved back to VRAM. + * This function blocks until the object has been fully moved. + * + * This is needed to for special handling of pinned VRAM object during + * suspend-resume. + * + * Return: 0 on success. Negative error code on failure. + */ +int xe_bo_restore_pinned(struct xe_bo *bo) +{ + struct ttm_operation_ctx ctx = { + .interruptible = false, + }; + struct ttm_resource *new_mem; + int ret; + + xe_bo_assert_held(bo); + + if (WARN_ON(!bo->ttm.resource)) + return -EINVAL; + + if (WARN_ON(!xe_bo_is_pinned(bo))) + return -EINVAL; + + if (WARN_ON(xe_bo_is_vram(bo) || !bo->ttm.ttm)) + return -EINVAL; + + ret = ttm_bo_mem_space(&bo->ttm, &bo->placement, &new_mem, &ctx); + if (ret) + return ret; + + ret = ttm_tt_populate(bo->ttm.bdev, bo->ttm.ttm, &ctx); + if (ret) + goto err_res_free; + + ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1); + if (ret) + goto err_res_free; + + ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL); + if (ret) + goto err_res_free; + + dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL, + false, MAX_SCHEDULE_TIMEOUT); + + return 0; + +err_res_free: + ttm_resource_free(&bo->ttm, &new_mem); + return ret; +} + static unsigned long xe_ttm_io_mem_pfn(struct ttm_buffer_object *ttm_bo, unsigned long page_offset) { diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index dd58edcb9398..effa9d0cf0f6 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -231,6 +231,9 @@ bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type); int xe_bo_migrate(struct xe_bo *bo, u32 mem_type); int xe_bo_evict(struct xe_bo *bo, bool force_alloc); +int xe_bo_evict_pinned(struct xe_bo *bo); +int xe_bo_restore_pinned(struct xe_bo *bo); + extern struct ttm_device_funcs xe_ttm_funcs; int xe_gem_create_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c index bbf89a58cdf5..6642c5f52009 100644 --- a/drivers/gpu/drm/xe/xe_bo_evict.c +++ b/drivers/gpu/drm/xe/xe_bo_evict.c @@ -63,7 +63,7 @@ int xe_bo_evict_all(struct xe_device *xe) spin_unlock(&xe->pinned.lock); xe_bo_lock(bo, &ww, 0, false); - ret = xe_bo_evict(bo, true); + ret = xe_bo_evict_pinned(bo); xe_bo_unlock(bo, &ww); xe_bo_put(bo); if (ret) { @@ -97,7 +97,7 @@ int xe_bo_evict_all(struct xe_device *xe) spin_unlock(&xe->pinned.lock); xe_bo_lock(bo, &ww, 0, false); - ret = xe_bo_evict(bo, true); + ret = xe_bo_evict_pinned(bo); xe_bo_unlock(bo, &ww); xe_bo_put(bo); if (ret) @@ -141,7 +141,7 @@ int xe_bo_restore_kernel(struct xe_device *xe) spin_unlock(&xe->pinned.lock); xe_bo_lock(bo, &ww, 0, false); - ret = xe_bo_validate(bo, NULL, false); + ret = xe_bo_restore_pinned(bo); xe_bo_unlock(bo, &ww); if (ret) { xe_bo_put(bo); @@ -205,7 +205,7 @@ int xe_bo_restore_user(struct xe_device *xe) spin_unlock(&xe->pinned.lock); xe_bo_lock(bo, &ww, 0, false); - ret = xe_bo_validate(bo, NULL, false); + ret = xe_bo_restore_pinned(bo); xe_bo_unlock(bo, &ww); xe_bo_put(bo); if (ret) { -- cgit v1.2.3 From 2988cf02ee303a96052a6c486b9bbb6e4fd5c030 Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Fri, 7 Apr 2023 13:55:22 -0700 Subject: drm/xe: Fix memory use after free The wait_event_timeout() on g2h_fence.wq which is declared on stack can return before the wake_up() gets called, resulting in a stack out of bound access when wake_up() accesses the g2h_fene.wq. Do not declare g2h_fence related wait_queue_head_t on stack. Fixes the below KASAN BUG and associated kernel crashes. BUG: KASAN: stack-out-of-bounds in do_raw_spin_lock+0x6f/0x1e0 Read of size 4 at addr ffff88826252f4ac by task kworker/u128:5/467 CPU: 25 PID: 467 Comm: kworker/u128:5 Tainted: G U 6.3.0-rc4-xe #1 Workqueue: events_unbound g2h_worker_func [xe] Call Trace: dump_stack_lvl+0x64/0xb0 print_report+0xc2/0x600 kasan_report+0x96/0xc0 do_raw_spin_lock+0x6f/0x1e0 _raw_spin_lock_irqsave+0x47/0x60 __wake_up_common_lock+0xc0/0x150 dequeue_one_g2h+0x20f/0x6a0 [xe] g2h_worker_func+0xa9/0x180 [xe] process_one_work+0x527/0x990 worker_thread+0x2d1/0x640 kthread+0x174/0x1b0 ret_from_fork+0x29/0x50 Tested-by: Matt Roper Reviewed-by: Bruce Chang Signed-off-by: Niranjana Vishwanathapura Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_ct.c | 7 +++---- drivers/gpu/drm/xe/xe_guc_ct_types.h | 2 ++ 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 5e00b75d3ca2..9055ff133a7c 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -23,7 +23,6 @@ /* Used when a CT send wants to block and / or receive data */ struct g2h_fence { - wait_queue_head_t wq; u32 *response_buffer; u32 seqno; u16 response_len; @@ -142,6 +141,7 @@ int xe_guc_ct_init(struct xe_guc_ct *ct) ct->fence_context = dma_fence_context_alloc(1); INIT_WORK(&ct->g2h_worker, g2h_worker_func); init_waitqueue_head(&ct->wq); + init_waitqueue_head(&ct->g2h_fence_wq); primelockdep(ct); @@ -484,7 +484,6 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, void *ptr; g2h_fence->seqno = (ct->fence_seqno++ & 0xffff); - init_waitqueue_head(&g2h_fence->wq); ptr = xa_store(&ct->fence_lookup, g2h_fence->seqno, g2h_fence, GFP_ATOMIC); @@ -709,7 +708,7 @@ retry_same_fence: return ret; } - ret = wait_event_timeout(g2h_fence.wq, g2h_fence.done, HZ); + ret = wait_event_timeout(ct->g2h_fence_wq, g2h_fence.done, HZ); if (!ret) { drm_err(&xe->drm, "Timed out wait for G2H, fence %u, action %04x", g2h_fence.seqno, action[0]); @@ -801,7 +800,7 @@ static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len) g2h_fence->done = true; smp_mb(); - wake_up(&g2h_fence->wq); + wake_up_all(&ct->g2h_fence_wq); return 0; } diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h index e0f9063e9b65..fd27dacf00c5 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct_types.h +++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h @@ -74,6 +74,8 @@ struct xe_guc_ct { struct xarray fence_lookup; /** @wq: wait queue used for reliable CT sends and freeing G2H credits */ wait_queue_head_t wq; + /** @g2h_fence_wq: wait queue used for G2H fencing */ + wait_queue_head_t g2h_fence_wq; #ifdef XE_GUC_CT_SELFTEST /** @suppress_irq_handler: force flow control to sender */ bool suppress_irq_handler; -- cgit v1.2.3 From 0a12a612c870231172d30196e6245ea471fabaed Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 10 Apr 2023 13:02:29 -0700 Subject: drm/xe: Let primary and media GT share a kernel_bb_pool The media GT requires a valid gt->kernel_bb_pool during driver probe to allocate the WA and NOOP batchbuffers used to record default context images. Dynamically allocate the bb_pools so that the primary and media GT can use the same pool during driver init. The media GT still shouldn't be need the USM pool, so only hook up the kernel_bb_pool for now. Cc: Maarten Lankhorst Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20230410200229.2726648-1-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bb.c | 2 +- drivers/gpu/drm/xe/xe_gt.c | 32 +++++++++++++++++++------------- drivers/gpu/drm/xe/xe_gt_debugfs.c | 4 ++-- drivers/gpu/drm/xe/xe_gt_types.h | 10 +++++++--- drivers/gpu/drm/xe/xe_migrate.c | 4 ++-- drivers/gpu/drm/xe/xe_sa.c | 23 ++++++++++++++++------- drivers/gpu/drm/xe/xe_sa.h | 4 +--- 7 files changed, 48 insertions(+), 31 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c index 7172801ee570..3deb2d55f421 100644 --- a/drivers/gpu/drm/xe/xe_bb.c +++ b/drivers/gpu/drm/xe/xe_bb.c @@ -42,7 +42,7 @@ struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm) * space to accomodate the platform-specific hardware prefetch * requirements. */ - bb->bo = xe_sa_bo_new(!usm ? >->kernel_bb_pool : >->usm.bb_pool, + bb->bo = xe_sa_bo_new(!usm ? gt->kernel_bb_pool : gt->usm.bb_pool, 4 * (dwords + 1) + bb_prefetch(gt)); if (IS_ERR(bb->bo)) { err = PTR_ERR(bb->bo); diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index daaf93e23bbf..4186f7f0d42f 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -137,7 +137,7 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_engine *e) if (IS_ERR(bb)) return PTR_ERR(bb); - batch_ofs = xe_bo_ggtt_addr(gt->kernel_bb_pool.bo); + batch_ofs = xe_bo_ggtt_addr(gt->kernel_bb_pool->bo); job = xe_bb_create_wa_job(e, bb, batch_ofs); if (IS_ERR(job)) { xe_bb_free(bb, NULL); @@ -186,7 +186,7 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_engine *e) } } - batch_ofs = xe_bo_ggtt_addr(gt->kernel_bb_pool.bo); + batch_ofs = xe_bo_ggtt_addr(gt->kernel_bb_pool->bo); job = xe_bb_create_wa_job(e, bb, batch_ofs); if (IS_ERR(job)) { xe_bb_free(bb, NULL); @@ -439,26 +439,32 @@ static int all_fw_domain_init(struct xe_gt *gt) if (err) goto err_force_wake; - /* - * FIXME: This should be ok as SA should only be used by gt->migrate and - * vm->gt->migrate and both should be pointing to a non-media GT. But to - * realy safe, convert gt->kernel_bb_pool to a pointer and point a media - * GT to the kernel_bb_pool on a real tile. - */ if (!xe_gt_is_media_type(gt)) { - err = xe_sa_bo_manager_init(gt, >->kernel_bb_pool, SZ_1M, 16); - if (err) + gt->kernel_bb_pool = xe_sa_bo_manager_init(gt, SZ_1M, 16); + if (IS_ERR(gt->kernel_bb_pool)) { + err = PTR_ERR(gt->kernel_bb_pool); goto err_force_wake; + } /* * USM has its only SA pool to non-block behind user operations */ if (gt_to_xe(gt)->info.supports_usm) { - err = xe_sa_bo_manager_init(gt, >->usm.bb_pool, - SZ_1M, 16); - if (err) + gt->usm.bb_pool = xe_sa_bo_manager_init(gt, SZ_1M, 16); + if (IS_ERR(gt->usm.bb_pool)) { + err = PTR_ERR(gt->usm.bb_pool); goto err_force_wake; + } } + } else { + struct xe_gt *full_gt = xe_find_full_gt(gt); + + /* + * Media GT's kernel_bb_pool is only used while recording the + * default context during GT init. The USM pool should never + * be needed on the media GT. + */ + gt->kernel_bb_pool = full_gt->kernel_bb_pool; } if (!xe_gt_is_media_type(gt)) { diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index 9fab8017490f..c45486c2015a 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -66,8 +66,8 @@ static int sa_info(struct seq_file *m, void *data) struct xe_gt *gt = node_to_gt(m->private); struct drm_printer p = drm_seq_file_printer(m); - drm_suballoc_dump_debug_info(>->kernel_bb_pool.base, &p, - gt->kernel_bb_pool.gpu_addr); + drm_suballoc_dump_debug_info(>->kernel_bb_pool->base, &p, + gt->kernel_bb_pool->gpu_addr); return 0; } diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 9d3117fad2e4..7c47d67aa8be 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -214,7 +214,7 @@ struct xe_gt { * behind any user operations which may have resulted in a * fault. */ - struct xe_sa_manager bb_pool; + struct xe_sa_manager *bb_pool; /** * @reserved_bcs_instance: reserved BCS instance used for USM * operations (e.g. mmigrations, fixing page tables) @@ -304,8 +304,12 @@ struct xe_gt { /** @hw_engines: hardware engines on the GT */ struct xe_hw_engine hw_engines[XE_NUM_HW_ENGINES]; - /** @kernel_bb_pool: Pool from which batchbuffers are allocated */ - struct xe_sa_manager kernel_bb_pool; + /** + * @kernel_bb_pool: Pool from which batchbuffers are allocated. + * + * Media GT shares a pool with its primary GT. + */ + struct xe_sa_manager *kernel_bb_pool; /** @migrate: Migration helper for vram blits and clearing */ struct xe_migrate *migrate; diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 13cfb7ad2850..2169d687ba3f 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -161,7 +161,7 @@ static int xe_migrate_prepare_vm(struct xe_gt *gt, struct xe_migrate *m, u32 num_entries = NUM_PT_SLOTS, num_level = vm->pt_root[id]->level; u32 map_ofs, level, i; struct xe_device *xe = gt_to_xe(m->gt); - struct xe_bo *bo, *batch = gt->kernel_bb_pool.bo; + struct xe_bo *bo, *batch = gt->kernel_bb_pool->bo; u64 entry; int ret; @@ -229,7 +229,7 @@ static int xe_migrate_prepare_vm(struct xe_gt *gt, struct xe_migrate *m, m->batch_base_ofs = xe_migrate_vram_ofs(batch_addr); if (xe->info.supports_usm) { - batch = gt->usm.bb_pool.bo; + batch = gt->usm.bb_pool->bo; batch_addr = xe_bo_addr(batch, 0, GEN8_PAGE_SIZE, &is_vram); m->usm_batch_base_ofs = xe_migrate_vram_ofs(batch_addr); diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c index 96c4b0ef24fe..c16f7c14ff52 100644 --- a/drivers/gpu/drm/xe/xe_sa.c +++ b/drivers/gpu/drm/xe/xe_sa.c @@ -33,13 +33,18 @@ static void xe_sa_bo_manager_fini(struct drm_device *drm, void *arg) sa_manager->bo = NULL; } -int xe_sa_bo_manager_init(struct xe_gt *gt, - struct xe_sa_manager *sa_manager, - u32 size, u32 align) +struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_gt *gt, u32 size, u32 align) { struct xe_device *xe = gt_to_xe(gt); u32 managed_size = size - SZ_4K; struct xe_bo *bo; + int ret; + + struct xe_sa_manager *sa_manager = drmm_kzalloc(>_to_xe(gt)->drm, + sizeof(*sa_manager), + GFP_KERNEL); + if (!sa_manager) + return ERR_PTR(-ENOMEM); sa_manager->bo = NULL; @@ -49,7 +54,7 @@ int xe_sa_bo_manager_init(struct xe_gt *gt, if (IS_ERR(bo)) { drm_err(&xe->drm, "failed to allocate bo for sa manager: %ld\n", PTR_ERR(bo)); - return PTR_ERR(bo); + return (struct xe_sa_manager *)bo; } sa_manager->bo = bo; @@ -61,15 +66,19 @@ int xe_sa_bo_manager_init(struct xe_gt *gt, if (!sa_manager->cpu_ptr) { xe_bo_unpin_map_no_vm(sa_manager->bo); sa_manager->bo = NULL; - return -ENOMEM; + return ERR_PTR(-ENOMEM); } } else { sa_manager->cpu_ptr = bo->vmap.vaddr; memset(sa_manager->cpu_ptr, 0, bo->ttm.base.size); } - return drmm_add_action_or_reset(&xe->drm, xe_sa_bo_manager_fini, - sa_manager); + ret = drmm_add_action_or_reset(&xe->drm, xe_sa_bo_manager_fini, + sa_manager); + if (ret) + return ERR_PTR(ret); + + return sa_manager; } struct drm_suballoc *xe_sa_bo_new(struct xe_sa_manager *sa_manager, diff --git a/drivers/gpu/drm/xe/xe_sa.h b/drivers/gpu/drm/xe/xe_sa.h index 742282ef7179..3063fb34c720 100644 --- a/drivers/gpu/drm/xe/xe_sa.h +++ b/drivers/gpu/drm/xe/xe_sa.h @@ -11,9 +11,7 @@ struct dma_fence; struct xe_bo; struct xe_gt; -int xe_sa_bo_manager_init(struct xe_gt *gt, - struct xe_sa_manager *sa_manager, - u32 size, u32 align); +struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_gt *gt, u32 size, u32 align); struct drm_suballoc *xe_sa_bo_new(struct xe_sa_manager *sa_manager, u32 size); -- cgit v1.2.3 From 67f2f0d7371709cb91d46d4c557aaa28b902674c Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 10 Apr 2023 14:26:58 -0700 Subject: drm/xe: Don't grab runtime PM ref in engine create IOCTL A VM had a runtime PM ref, a engine can't be created without a VM, and the engine holds a ref to the VM thus this is unnecessary. Beyond that taking a ref in the engine create IOCTL and dropping it in the destroy IOCTL is wrong as a user doesn't have to call the destroy IOCTL (e.g. they can just kill the process or close the driver FD). If a user does this PM refs are leaked. Reviewed-by: Rodrigo Vivi Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_engine.c | 43 +++++++++++++----------------------------- 1 file changed, 13 insertions(+), 30 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c index 141cb223ba02..5666c8e00c97 100644 --- a/drivers/gpu/drm/xe/xe_engine.c +++ b/drivers/gpu/drm/xe/xe_engine.c @@ -539,8 +539,6 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_ERR(xe, eci[0].gt_id >= xe->info.tile_count)) return -EINVAL; - xe_pm_runtime_get(xe); - if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) { for_each_gt(gt, xe, id) { struct xe_engine *new; @@ -552,16 +550,12 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data, logical_mask = bind_engine_logical_mask(xe, gt, eci, args->width, args->num_placements); - if (XE_IOCTL_ERR(xe, !logical_mask)) { - err = -EINVAL; - goto put_rpm; - } + if (XE_IOCTL_ERR(xe, !logical_mask)) + return -EINVAL; hwe = find_hw_engine(xe, eci[0]); - if (XE_IOCTL_ERR(xe, !hwe)) { - err = -EINVAL; - goto put_rpm; - } + if (XE_IOCTL_ERR(xe, !hwe)) + return -EINVAL; migrate_vm = xe_migrate_get_vm(gt->migrate); new = xe_engine_create(xe, migrate_vm, logical_mask, @@ -576,7 +570,7 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data, err = PTR_ERR(new); if (e) goto put_engine; - goto put_rpm; + return err; } if (id == 0) e = new; @@ -589,30 +583,22 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data, logical_mask = calc_validate_logical_mask(xe, gt, eci, args->width, args->num_placements); - if (XE_IOCTL_ERR(xe, !logical_mask)) { - err = -EINVAL; - goto put_rpm; - } + if (XE_IOCTL_ERR(xe, !logical_mask)) + return -EINVAL; hwe = find_hw_engine(xe, eci[0]); - if (XE_IOCTL_ERR(xe, !hwe)) { - err = -EINVAL; - goto put_rpm; - } + if (XE_IOCTL_ERR(xe, !hwe)) + return -EINVAL; vm = xe_vm_lookup(xef, args->vm_id); - if (XE_IOCTL_ERR(xe, !vm)) { - err = -ENOENT; - goto put_rpm; - } + if (XE_IOCTL_ERR(xe, !vm)) + return -ENOENT; e = xe_engine_create(xe, vm, logical_mask, args->width, hwe, ENGINE_FLAG_PERSISTENT); xe_vm_put(vm); - if (IS_ERR(e)) { - err = PTR_ERR(e); - goto put_rpm; - } + if (IS_ERR(e)) + return PTR_ERR(e); } if (args->extensions) { @@ -642,8 +628,6 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data, put_engine: xe_engine_kill(e); xe_engine_put(e); -put_rpm: - xe_pm_runtime_put(xe); return err; } @@ -750,7 +734,6 @@ int xe_engine_destroy_ioctl(struct drm_device *dev, void *data, trace_xe_engine_close(e); xe_engine_put(e); - xe_pm_runtime_put(xe); return 0; } -- cgit v1.2.3 From 689f40f520b6434db29f7b3d7c64b3305b310992 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 10 Apr 2023 11:39:08 -0700 Subject: drm/xe: Use packed bitfields for xe->info feature flags Replace 'bool' fields with single bits to allow the various device feature flags to pack more tightly. Reviewed-by: Lucas De Marchi Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20230410183910.2696628-1-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device_types.h | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 87b92f5f078d..f1011ddb5850 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -61,8 +61,6 @@ struct xe_device { u32 media_verx100; /** @mem_region_mask: mask of valid memory regions */ u32 mem_region_mask; - /** @is_dgfx: is discrete device */ - bool is_dgfx; /** @platform: XE platform enum */ enum xe_platform platform; /** @subplatform: XE subplatform enum */ @@ -81,20 +79,23 @@ struct xe_device { u8 tile_count; /** @vm_max_level: Max VM level */ u8 vm_max_level; + + /** @is_dgfx: is discrete device */ + u8 is_dgfx:1; /** @supports_usm: Supports unified shared memory */ - bool supports_usm; + u8 supports_usm:1; /** @has_asid: Has address space ID */ - bool has_asid; + u8 has_asid:1; /** @enable_guc: GuC submission enabled */ - bool enable_guc; + u8 enable_guc:1; /** @has_flat_ccs: Whether flat CCS metadata is used */ - bool has_flat_ccs; + u8 has_flat_ccs:1; /** @has_4tile: Whether tile-4 tiling is supported */ - bool has_4tile; + u8 has_4tile:1; /** @has_range_tlb_invalidation: Has range based TLB invalidations */ - bool has_range_tlb_invalidation; + u8 has_range_tlb_invalidation:1; /** @has_link_copy_engines: Whether the platform has link copy engines */ - bool has_link_copy_engine; + u8 has_link_copy_engine:1; } info; /** @irq: device interrupt state */ -- cgit v1.2.3 From bf08dd47d1567cb922d60a669e5a8a0c40253840 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 10 Apr 2023 11:39:09 -0700 Subject: drm/xe: Track whether platform has LLC Some driver initialization is conditional on the presence of an LLC. Add an extra feature flag to support this. Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230410183910.2696628-2-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device_types.h | 2 ++ drivers/gpu/drm/xe/xe_pci.c | 5 +++++ 2 files changed, 7 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index f1011ddb5850..f3cf5a4e5ab2 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -92,6 +92,8 @@ struct xe_device { u8 has_flat_ccs:1; /** @has_4tile: Whether tile-4 tiling is supported */ u8 has_4tile:1; + /** @has_llc: Device has a shared CPU+GPU last level cache */ + u8 has_llc:1; /** @has_range_tlb_invalidation: Has range based TLB invalidations */ u8 has_range_tlb_invalidation:1; /** @has_link_copy_engines: Whether the platform has link copy engines */ diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 2524ee1c73e3..1ea175d6a7d3 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -55,6 +55,7 @@ struct xe_device_desc { * should eventually move entirely into the display code's own logic. */ u8 has_4tile:1; + u8 has_llc:1; }; #define PLATFORM(x) \ @@ -172,6 +173,7 @@ static const struct xe_device_desc tgl_desc = { .graphics = &graphics_xelp, .media = &media_xem, PLATFORM(XE_TIGERLAKE), + .has_llc = 1, .require_force_probe = true, }; @@ -179,6 +181,7 @@ static const struct xe_device_desc adl_s_desc = { .graphics = &graphics_xelp, .media = &media_xem, PLATFORM(XE_ALDERLAKE_S), + .has_llc = 1, .require_force_probe = true, }; @@ -188,6 +191,7 @@ static const struct xe_device_desc adl_p_desc = { .graphics = &graphics_xelp, .media = &media_xem, PLATFORM(XE_ALDERLAKE_P), + .has_llc = 1, .require_force_probe = true, .subplatforms = (const struct xe_subplatform_desc[]) { { XE_SUBPLATFORM_ADLP_RPLU, "RPLU", adlp_rplu_ids }, @@ -491,6 +495,7 @@ static int xe_info_init(struct xe_device *xe, xe->info.graphics_name = graphics_desc->name; xe->info.media_name = media_desc ? media_desc->name : "none"; xe->info.has_4tile = desc->has_4tile; + xe->info.has_llc = desc->has_llc; xe->info.dma_mask_size = graphics_desc->dma_mask_size; xe->info.vram_flags = graphics_desc->vram_flags; -- cgit v1.2.3 From 3c6be2542e353268b27ca4d3cc433c9e6a49bd26 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 10 Apr 2023 11:39:10 -0700 Subject: drm/xe: Only request PCODE_WRITE_MIN_FREQ_TABLE on LLC platforms PCODE_WRITE_MIN_FREQ_TABLE is only applicable to platforms with an LLC. Change the discrete GPU check to an LLC check instead; this take care of skipping not only the discrete platforms, but also integrated platforms like MTL that do not have an LLC. Fixes MTL dmesg error: xe 0000:00:02.0: [drm] *ERROR* PCODE Mailbox failed: 1 Illegal Command Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230410183910.2696628-3-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pcode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c index fb1ce2d49bec..99bb730684ed 100644 --- a/drivers/gpu/drm/xe/xe_pcode.c +++ b/drivers/gpu/drm/xe/xe_pcode.c @@ -210,7 +210,7 @@ int xe_pcode_init_min_freq_table(struct xe_gt *gt, u32 min_gt_freq, int ret; u32 freq; - if (IS_DGFX(gt_to_xe(gt))) + if (!gt_to_xe(gt)->info.has_llc) return 0; if (max_gt_freq <= min_gt_freq) -- cgit v1.2.3 From 94324e6bed4b5d973c0df5d2d7d0f50503306a28 Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Thu, 6 Apr 2023 10:58:11 -0700 Subject: drm/xe: GuC and HuC loading support for RKL Rocketlake uses TGL GuC and HuC Cc: Lucas De Marchi Signed-off-by: Anusha Srivatsa Reviewed-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_uc_fw.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 7a410c106df4..2c2080928a82 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -106,11 +106,13 @@ struct fw_blobs_by_type { fw_def(DG1, major_ver(i915, guc, dg1, 70, 5)) \ fw_def(ALDERLAKE_P, major_ver(i915, guc, adlp, 70, 5)) \ fw_def(ALDERLAKE_S, major_ver(i915, guc, tgl, 70, 5)) \ + fw_def(ROCKETLAKE, major_ver(i915, guc, tgl, 70, 5)) \ fw_def(TIGERLAKE, major_ver(i915, guc, tgl, 70, 5)) #define XE_HUC_FIRMWARE_DEFS(fw_def, mmp_ver, no_ver) \ fw_def(ALDERLAKE_S, no_ver(i915, huc, tgl)) \ fw_def(DG1, no_ver(i915, huc, dg1)) \ + fw_def(ROCKETLAKE, no_ver(i915, huc, tgl)) \ fw_def(TIGERLAKE, no_ver(i915, huc, tgl)) #define MAKE_FW_PATH(dir__, uc__, shortname__, version__) \ -- cgit v1.2.3 From 221896e54a30282e7dce2f7f228d4f49b2b970c2 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Tue, 11 Apr 2023 11:04:58 +0100 Subject: drm/xe/mmio: stop incorrectly triggering drm_warn CI keeps triggering: xe 0000:03:00.0: [drm] Restricting VRAM size to PCI resource size (0x400000000->0x3fa000000) Due to usable_size vs vram_size differences. However, we only want to trigger the drm_warn() to let developers know that the system they are using is going clamp the VRAM size to match the IO size, where they can likely only use 256M of VRAM. Once we properly support small-bar we can revisit this. v2 (Lucas): Drop the TODO for now Signed-off-by: Matthew Auld Cc: Gwan-gyeong Mun Reviewed-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_mmio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 5cacaa05759a..98357c1f109f 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -228,9 +228,9 @@ int xe_mmio_probe_vram(struct xe_device *xe) else if (xe->mem.vram.io_size < usable_size && !xe_force_vram_bar_size) drm_info(&xe->drm, "Using a reduced BAR size of %lluMiB. Consider enabling 'Resizable BAR' support in your BIOS.\n", (u64)xe->mem.vram.size >> 20); - if (xe->mem.vram.size < vram_size) + if (usable_size > xe->mem.vram.io_size) drm_warn(&xe->drm, "Restricting VRAM size to PCI resource size (0x%llx->0x%llx)\n", - vram_size, (u64)xe->mem.vram.size); + usable_size, xe->mem.vram.io_size); xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, xe->mem.vram.io_size); xe->mem.vram.size = min_t(u64, xe->mem.vram.size, usable_size); -- cgit v1.2.3 From a8a39c15b011b8ed986f55c6e52e015b0d81da8a Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Tue, 11 Apr 2023 15:53:51 -0700 Subject: drm/xe: Add Rocketlake device info Add missing device info for Rocketlake. While at it, also set the value for IS_ROCKETLAKE macro which is right now set to 0. v2: Also add abox_mask to the device info(Lucas) v3: rebase v4: Set IS_ROCKETLAKE (Anusha) Cc: Matt Roper Signed-off-by: Anusha Srivatsa Tested-by: Anusha Srivatsa Reviewed-by: Lucas De Marchi (v2) Reviewed-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pci.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 1ea175d6a7d3..df7590b7dc2c 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -177,6 +177,13 @@ static const struct xe_device_desc tgl_desc = { .require_force_probe = true, }; +static const struct xe_device_desc rkl_desc = { + .graphics = &graphics_xelp, + .media = &media_xem, + PLATFORM(XE_ROCKETLAKE), + .require_force_probe = true, +}; + static const struct xe_device_desc adl_s_desc = { .graphics = &graphics_xelp, .media = &media_xem, @@ -300,6 +307,7 @@ static struct gmdid_map media_ip_map[] = { */ static const struct pci_device_id pciidlist[] = { XE_TGL_IDS(INTEL_VGA_DEVICE, &tgl_desc), + XE_RKL_IDS(INTEL_VGA_DEVICE, &rkl_desc), XE_ADLS_IDS(INTEL_VGA_DEVICE, &adl_s_desc), XE_ADLP_IDS(INTEL_VGA_DEVICE, &adl_p_desc), XE_DG1_IDS(INTEL_VGA_DEVICE, &dg1_desc), -- cgit v1.2.3 From fa4fe0db0885b089200cc336207e40f6902ebbb2 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Tue, 18 Apr 2023 13:41:47 +0100 Subject: drm/xe/tlb: fix expected_seqno calculation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It looks like when tlb_invalidation.seqno overflows TLB_INVALIDATION_SEQNO_MAX, we start counting again from one, as per send_tlb_invalidation(). This is also inline with initial value we give it in xe_gt_tlb_invalidation_init(). When calculating the expected_seqno we should also take this into account. While we are here also print out the values if we ever trigger the warning. v2 (José): - drm_WARN_ON() is preferred over plain WARN_ON(), since it gives information on the originating device. Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/248 Signed-off-by: Matthew Auld Cc: José Roberto de Souza Reviewed-by: José Roberto de Souza Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index f279e21300aa..604f189dbd70 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -319,7 +319,12 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len) /* Sanity check on seqno */ expected_seqno = (gt->tlb_invalidation.seqno_recv + 1) % TLB_INVALIDATION_SEQNO_MAX; - XE_WARN_ON(expected_seqno != msg[0]); + if (!expected_seqno) + expected_seqno = 1; + if (drm_WARN_ON(>->xe->drm, expected_seqno != msg[0])) { + drm_err(>->xe->drm, "TLB expected_seqno(%d) != msg(%u)\n", + expected_seqno, msg[0]); + } gt->tlb_invalidation.seqno_recv = msg[0]; smp_wmb(); -- cgit v1.2.3 From 79f2432e3138a3240a99441fc077181e2e8c8fb9 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 19 Apr 2023 15:49:09 -0700 Subject: drm/xe/sr: Apply masked registers properly The 'clear' field for register save/restore entries was being placed in the value bits of the register rather than the mask bits; make sure it gets shifted into the mask bits. Cc: Lucas De Marchi Cc: Matt Atwood Reviewed-by: Matt Atwood Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230419224909.4000920-1-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_reg_sr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c index ff83da4cf4a7..e38397fc771a 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr.c +++ b/drivers/gpu/drm/xe/xe_reg_sr.c @@ -148,7 +148,7 @@ static void apply_one_mmio(struct xe_gt *gt, u32 reg, * supposed to set all bits. */ if (entry->masked_reg) - val = (entry->clr_bits ?: entry->set_bits << 16); + val = (entry->clr_bits ?: entry->set_bits) << 16; else if (entry->clr_bits + 1) val = (entry->reg_type == XE_RTP_REG_MCR ? xe_gt_mcr_unicast_read_any(gt, MCR_REG(reg)) : -- cgit v1.2.3 From 1a9d163c4243c679e7a8d4c4abd787e40249485f Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 6 Apr 2023 17:26:24 +0100 Subject: drm/xe/sched_job: prefer dma_fence_is_later MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Doesn't look like we are accounting for seqno wrap. Just use __dma_fence_is_later() like we already do for xe_hw_fence_signaled(). Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Matthew Brost Reviewed-by: Thomas Hellström Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_sched_job.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c index d9add0370a98..795146dfd663 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.c +++ b/drivers/gpu/drm/xe/xe_sched_job.c @@ -229,7 +229,9 @@ bool xe_sched_job_started(struct xe_sched_job *job) { struct xe_lrc *lrc = job->engine->lrc; - return xe_lrc_start_seqno(lrc) >= xe_sched_job_seqno(job); + return !__dma_fence_is_later(xe_sched_job_seqno(job), + xe_lrc_start_seqno(lrc), + job->fence->ops); } bool xe_sched_job_completed(struct xe_sched_job *job) @@ -241,7 +243,8 @@ bool xe_sched_job_completed(struct xe_sched_job *job) * parallel handshake is done. */ - return xe_lrc_seqno(lrc) >= xe_sched_job_seqno(job); + return !__dma_fence_is_later(xe_sched_job_seqno(job), xe_lrc_seqno(lrc), + job->fence->ops); } void xe_sched_job_arm(struct xe_sched_job *job) -- cgit v1.2.3 From 7500477ded53343921b24e7ec5770197af710d94 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 6 Apr 2023 17:26:25 +0100 Subject: drm/xe/lrc: give start_seqno a better default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If looking at the initial engine dump we should expect this to match XE_FENCE_INITIAL_SEQNO - 1. Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Matthew Brost Reviewed-by: Thomas Hellström Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_lrc.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index fb8c6f7d6528..ae605e7805de 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -700,6 +700,9 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, map = __xe_lrc_seqno_map(lrc); xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); + map = __xe_lrc_start_seqno_map(lrc); + xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); + return 0; err_lrc_finish: -- cgit v1.2.3 From d33dc1dc29cab7871f9b0adee7b94b4dc5de5cb1 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 21 Apr 2023 07:50:05 -0700 Subject: drm/xe: Fix xe_mmio_rmw32 operation xe_mmio_rmw32 was failing to invert the passed in mask, resulting in a register update that wasn't the expected RMW operation. Fortunately the impact of this mistake was limited, since this function isn't heavily used in Xe right now; this will mostly fix some GuC PM interrupt unmasking. v2: - Rename parameters as 'clr' and 'set' to clarify semantics. (Lucas) Cc: Lucas De Marchi Cc: Maarten Lankhorst Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230421145006.10940-1-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_mmio.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h index 354be6fae0d4..be7ba2813d58 100644 --- a/drivers/gpu/drm/xe/xe_mmio.h +++ b/drivers/gpu/drm/xe/xe_mmio.h @@ -42,13 +42,13 @@ static inline u32 xe_mmio_read32(struct xe_gt *gt, u32 reg) return readl(gt->mmio.regs + reg); } -static inline u32 xe_mmio_rmw32(struct xe_gt *gt, u32 reg, u32 mask, - u32 val) +static inline u32 xe_mmio_rmw32(struct xe_gt *gt, u32 reg, u32 clr, + u32 set) { u32 old, reg_val; old = xe_mmio_read32(gt, reg); - reg_val = (old & mask) | val; + reg_val = (old & ~clr) | set; xe_mmio_write32(gt, reg, reg_val); return old; -- cgit v1.2.3 From e881b1292f1791826476f1a2eaf80cc85e2677c5 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 18 Apr 2023 16:02:47 -0700 Subject: drm/xe: Drop GFX_FLSH_CNTL_GEN6 write during GGTT invalidation The write of GFX_FLSH_CNTL_GEN6 was inherited from the i915 codebase where it was used to force a flush of the write-combine buffer in cases where the GSM/GGTT were mapped as WC. Since Xe never uses WC mappings of the GGTT, this register write is unnecessary. Furthermore, this register was removed on Xe_HP-based platforms, so this write winds up clobbering an unrelated register. v2: - Also drop GFX_FLSH_CNTL_GEN6 from the register file now that it's no longer used. (Lucas) Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230418230247.3802438-1-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 3 --- drivers/gpu/drm/xe/xe_ggtt.c | 5 ----- 2 files changed, 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 8fc7677e2d13..c1d73f3e7bc3 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -351,9 +351,6 @@ #define GEN6_GT_GFX_RC6_LOCKED _MMIO(0x138104) #define GEN6_GT_GFX_RC6 _MMIO(0x138108) -#define GFX_FLSH_CNTL_GEN6 _MMIO(0x101008) -#define GFX_FLSH_CNTL_EN (1 << 0) - #define GT_INTR_DW(x) _MMIO(0x190018 + ((x) * 4)) #define GUC_SG_INTR_ENABLE _MMIO(0x190038) diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 99bc9036c7a0..0fda9a18049b 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -196,11 +196,6 @@ void xe_ggtt_invalidate(struct xe_gt *gt) { /* TODO: vfunc for GuC vs. non-GuC */ - /* TODO: i915 makes comments about this being uncached and - * therefore flushing WC buffers. Is that really true here? - */ - xe_mmio_write32(gt, GFX_FLSH_CNTL_GEN6.reg, GFX_FLSH_CNTL_EN); - if (gt->uc.guc.submission_state.enabled) { int seqno; -- cgit v1.2.3 From 96cb46df567e04bcc569ffde9c426b078c5601b1 Mon Sep 17 00:00:00 2001 From: Balasubramani Vivekanandan Date: Tue, 25 Apr 2023 16:31:07 +0530 Subject: drm/xe: Keep all resize bar related prints inside xe_resize_vram_bar xe_resize_vram_bar() function is already printing the status of bar resizing. It has prints covering both success and failure. There is no need of additional prints in the caller which were not so easily to follow. Modified all BAR size prints to consistently print the size in MiB. Signed-off-by: Balasubramani Vivekanandan Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_mmio.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 98357c1f109f..5536f84682c0 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -59,7 +59,7 @@ _resize_bar(struct xe_device *xe, int resno, resource_size_t size) ret = pci_resize_resource(pdev, resno, bar_size); if (ret) { - drm_info(&xe->drm, "Failed to resize BAR%d to %dM (%pe)\n", + drm_info(&xe->drm, "Failed to resize BAR%d to %dM (%pe). Consider enabling 'Resizable BAR' support in your BIOS\n", resno, 1 << bar_size, ERR_PTR(ret)); return -1; } @@ -95,7 +95,7 @@ static int xe_resize_vram_bar(struct xe_device *xe, resource_size_t vram_size) rebar_size >= roundup_pow_of_two(vram_size)) { rebar_size = vram_size; drm_info(&xe->drm, - "Given bar size is not within supported size, setting it to default: %llu\n", + "Given bar size is not within supported size, setting it to default: %lluMiB\n", (u64)vram_size >> 20); } } else { @@ -107,6 +107,9 @@ static int xe_resize_vram_bar(struct xe_device *xe, resource_size_t vram_size) return 0; } + drm_info(&xe->drm, "Resizing bar from %lluMiB -> %lluMiB\n", + (u64)current_size >> 20, (u64)rebar_size >> 20); + while (root->parent) root = root->parent; @@ -117,7 +120,7 @@ static int xe_resize_vram_bar(struct xe_device *xe, resource_size_t vram_size) } if (!root_res) { - drm_info(&xe->drm, "Can't resize VRAM BAR - platform support is missing\n"); + drm_info(&xe->drm, "Can't resize VRAM BAR - platform support is missing. Consider enabling 'Resizable BAR' support in your BIOS\n"); return -1; } @@ -183,7 +186,7 @@ int xe_mmio_probe_vram(struct xe_device *xe) u64 vram_size; u64 original_size; u64 usable_size; - int resize_result, err; + int err; if (!IS_DGFX(xe)) { xe->mem.vram.mapping = 0; @@ -212,7 +215,7 @@ int xe_mmio_probe_vram(struct xe_device *xe) if (err) return err; - resize_result = xe_resize_vram_bar(xe, vram_size); + xe_resize_vram_bar(xe, vram_size); xe->mem.vram.io_start = pci_resource_start(pdev, GEN12_LMEM_BAR); xe->mem.vram.io_size = min(usable_size, pci_resource_len(pdev, GEN12_LMEM_BAR)); @@ -221,16 +224,9 @@ int xe_mmio_probe_vram(struct xe_device *xe) if (!xe->mem.vram.size) return -EIO; - if (resize_result > 0) - drm_info(&xe->drm, "Successfully resize VRAM from %lluMiB to %lluMiB\n", - (u64)original_size >> 20, - (u64)xe->mem.vram.io_size >> 20); - else if (xe->mem.vram.io_size < usable_size && !xe_force_vram_bar_size) - drm_info(&xe->drm, "Using a reduced BAR size of %lluMiB. Consider enabling 'Resizable BAR' support in your BIOS.\n", - (u64)xe->mem.vram.size >> 20); if (usable_size > xe->mem.vram.io_size) - drm_warn(&xe->drm, "Restricting VRAM size to PCI resource size (0x%llx->0x%llx)\n", - usable_size, xe->mem.vram.io_size); + drm_warn(&xe->drm, "Restricting VRAM size to PCI resource size (%lluMiB->%lluMiB)\n", + (u64)usable_size >> 20, (u64)xe->mem.vram.io_size >> 20); xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, xe->mem.vram.io_size); xe->mem.vram.size = min_t(u64, xe->mem.vram.size, usable_size); -- cgit v1.2.3 From a180f4e13c4473f4e66e5666dbb6157d56d83dcf Mon Sep 17 00:00:00 2001 From: Riana Tauro Date: Thu, 20 Apr 2023 11:26:48 +0530 Subject: drm/xe/guc_pc: Reorder forcewake and xe_pm_runtime calls When the device is runtime suspended, reading some of the sysfs entries under device/gt#/ causes a resume error This is due to the ordering of pm_runtime and forcewake calls. Reorder to wake up using xe_pm_runtime_get and then forcewake v2: add goto statements (Rodrigo) Signed-off-by: Riana Tauro Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_pc.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 5a8d827ba770..b853831b342b 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -370,15 +370,14 @@ static ssize_t freq_act_show(struct device *dev, u32 freq; ssize_t ret; + xe_device_mem_access_get(gt_to_xe(gt)); /* * When in RC6, actual frequency is 0. Let's block RC6 so we are able * to verify that our freq requests are really happening. */ ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); if (ret) - return ret; - - xe_device_mem_access_get(gt_to_xe(gt)); + goto out; if (xe->info.platform == XE_METEORLAKE) { freq = xe_mmio_read32(gt, MTL_MIRROR_TARGET_WP1.reg); @@ -388,11 +387,11 @@ static ssize_t freq_act_show(struct device *dev, freq = REG_FIELD_GET(GEN12_CAGF_MASK, freq); } - xe_device_mem_access_put(gt_to_xe(gt)); - ret = sysfs_emit(buf, "%d\n", decode_freq(freq)); XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); +out: + xe_device_mem_access_put(gt_to_xe(gt)); return ret; } static DEVICE_ATTR_RO(freq_act); @@ -405,22 +404,23 @@ static ssize_t freq_cur_show(struct device *dev, u32 freq; ssize_t ret; + xe_device_mem_access_get(gt_to_xe(gt)); /* * GuC SLPC plays with cur freq request when GuCRC is enabled * Block RC6 for a more reliable read. */ ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); if (ret) - return ret; + goto out; - xe_device_mem_access_get(gt_to_xe(gt)); freq = xe_mmio_read32(gt, GEN6_RPNSWREQ.reg); - xe_device_mem_access_put(gt_to_xe(gt)); freq = REG_FIELD_GET(REQ_RATIO_MASK, freq); ret = sysfs_emit(buf, "%d\n", decode_freq(freq)); XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); +out: + xe_device_mem_access_put(gt_to_xe(gt)); return ret; } static DEVICE_ATTR_RO(freq_cur); @@ -610,17 +610,17 @@ static ssize_t rc6_residency_show(struct device *dev, u32 reg; ssize_t ret; + xe_device_mem_access_get(pc_to_xe(pc)); ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); if (ret) - return ret; + goto out; - xe_device_mem_access_get(pc_to_xe(pc)); reg = xe_mmio_read32(gt, GEN6_GT_GFX_RC6.reg); - xe_device_mem_access_put(pc_to_xe(pc)); - ret = sysfs_emit(buff, "%u\n", reg); XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); +out: + xe_device_mem_access_put(pc_to_xe(pc)); return ret; } static DEVICE_ATTR_RO(rc6_residency); -- cgit v1.2.3 From fdb3abcebba5d4a647739bb79a3818bd81956f64 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Tue, 25 Apr 2023 10:51:16 +0200 Subject: drm/xe: Fix build without CONFIG_PM_SLEEP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Build without CONFIG_PM_SLEEP (such as for riscv) was failing due to unused xe_pci_runtime_* functions. Signed-off-by: Francois Dugast Reviewed-by: Thomas Hellström Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index df7590b7dc2c..5f750edce542 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -702,7 +702,6 @@ static int xe_pci_resume(struct device *dev) return 0; } -#endif static int xe_pci_runtime_suspend(struct device *dev) { @@ -765,6 +764,7 @@ static int xe_pci_runtime_idle(struct device *dev) return 0; } +#endif static const struct dev_pm_ops xe_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(xe_pci_suspend, xe_pci_resume) -- cgit v1.2.3 From a121594006813eff7864a63e14573f3f5523e29c Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Wed, 26 Apr 2023 09:20:05 -0700 Subject: drm/xe: Limit the system memory size to half of the system memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ttm_global_init() imposes this limitation. Cc: Matthew Auld Reviewed-by: Matthew Auld Signed-off-by: José Roberto de Souza Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ttm_sys_mgr.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c index 5b0674bbb8ed..3e1fa0c832ca 100644 --- a/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c @@ -105,7 +105,10 @@ int xe_ttm_sys_mgr_init(struct xe_device *xe) u64 gtt_size; si_meminfo(&si); - gtt_size = (u64)si.totalram * si.mem_unit * 3/4; + gtt_size = (u64)si.totalram * si.mem_unit; + /* TTM limits allocation of all TTM devices by 50% of system memory */ + gtt_size /= 2; + man->use_tt = true; man->func = &xe_ttm_sys_mgr_func; ttm_resource_manager_init(man, &xe->ttm, gtt_size >> PAGE_SHIFT); -- cgit v1.2.3 From 052df73b9e90305487ad9349d0fc8b59ddb6007b Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 26 Apr 2023 09:09:40 -0400 Subject: drm/xe: Update comment on why d3cold is still blocked. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The main issue with buddy allocator was fixed, but then we ended up on other issues, so we need to step back and rethink our strategy with D3cold. So, let's update the comment with a todo list so we don't get tempted in removing it before we are really ready. Cc: Matthew Auld Cc: Thomas Hellström Cc: Riana Tauro Signed-off-by: Rodrigo Vivi Reviewed-by: Matthew Auld --- drivers/gpu/drm/xe/xe_pci.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 5f750edce542..c1f2f63548d3 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -755,10 +755,15 @@ static int xe_pci_runtime_idle(struct device *dev) struct xe_device *xe = pdev_to_xe_device(pdev); /* - * FIXME: d3cold should be allowed (true) if + * TODO: d3cold should be allowed (true) if * (IS_DGFX(xe) && !xe_device_mem_access_ongoing(xe)) - * however the change to the buddy allocator broke the - * xe_bo_restore_kernel when the pci device is disabled + * but maybe include some other conditions. So, before + * we can re-enable the D3cold, we need to: + * 1. rewrite the VRAM save / restore to avoid buffer object locks + * 2. block D3cold if we have a big amount of device memory in use + * in order to reduce the latency. + * 3. at resume, detect if we really lost power and avoid memory + * restoration if we were only up to d3cold */ xe->d3cold_allowed = false; -- cgit v1.2.3 From 9d3c8fb98ba31873c0ebbc42c5d8133fa59f7ac7 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 26 Apr 2023 12:07:20 -0400 Subject: drm/xe: Fix print of RING_EXECLIST_SQ_CONTENTS_HI On xe_hw_engine_print_state we were printing: value_of(0x510) + 4 instead of value_of(0x514) as desired. So, let's properly define a RING_EXECLIST_SQ_CONTENTS_HI register to fix the issue and also to avoid other issues like that. Signed-off-by: Rodrigo Vivi Reviewed-by: Lucas De Marchi --- drivers/gpu/drm/xe/regs/xe_engine_regs.h | 3 ++- drivers/gpu/drm/xe/xe_execlist.c | 4 ++-- drivers/gpu/drm/xe/xe_hw_engine.c | 4 ++-- 3 files changed, 6 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index 2aa67d001c34..a1e1d1c206fa 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -84,7 +84,8 @@ RING_FORCE_TO_NONPRIV_DENY) #define RING_MAX_NONPRIV_SLOTS 12 -#define RING_EXECLIST_SQ_CONTENTS(base) _MMIO((base) + 0x510) +#define RING_EXECLIST_SQ_CONTENTS_LO(base) _MMIO((base) + 0x510) +#define RING_EXECLIST_SQ_CONTENTS_HI(base) _MMIO((base) + 0x510 + 4) #define RING_EXECLIST_CONTROL(base) _MMIO((base) + 0x550) #define EL_CTRL_LOAD REG_BIT(0) diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index e540e5d287a0..64b520ddca9c 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -84,9 +84,9 @@ static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc, xe_mmio_write32(gt, RING_MODE_GEN7(hwe->mmio_base).reg, _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE)); - xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS(hwe->mmio_base).reg + 0, + xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS_LO(hwe->mmio_base).reg, lower_32_bits(lrc_desc)); - xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS(hwe->mmio_base).reg + 4, + xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS_HI(hwe->mmio_base).reg, upper_32_bits(lrc_desc)); xe_mmio_write32(gt, RING_EXECLIST_CONTROL(hwe->mmio_base).reg, EL_CTRL_LOAD); diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 4b56c35b988d..23b9f120c258 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -528,10 +528,10 @@ void xe_hw_engine_print_state(struct xe_hw_engine *hwe, struct drm_printer *p) hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0).reg)); drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_LO: 0x%08x\n", hw_engine_mmio_read32(hwe, - RING_EXECLIST_SQ_CONTENTS(0).reg)); + RING_EXECLIST_SQ_CONTENTS_LO(0).reg)); drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_HI: 0x%08x\n", hw_engine_mmio_read32(hwe, - RING_EXECLIST_SQ_CONTENTS(0).reg) + 4); + RING_EXECLIST_SQ_CONTENTS_HI(0).reg)); drm_printf(p, "\tRING_EXECLIST_CONTROL: 0x%08x\n", hw_engine_mmio_read32(hwe, RING_EXECLIST_CONTROL(0).reg)); -- cgit v1.2.3 From 58e19acf0cdf3f18c1c868165f45d3ea626b9c3f Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 12 Apr 2023 16:28:41 -0700 Subject: drm/xe: Cleanup page-related defines Rename the following defines to lose the GEN* prefixes since they don't make sense for xe: GEN8_PTE_SHIFT -> XE_PTE_SHIFT GEN8_PAGE_SIZE -> XE_PAGE_SIZE GEN8_PTE_MASK -> XE_PTE_MASK GEN8_PDE_SHIFT -> XE_PDE_SHIFT GEN8_PDES -> XE_PDES GEN8_PDE_MASK -> XE_PDE_MASK GEN8_64K_PTE_SHIFT -> XE_64K_PTE_SHIFT GEN8_64K_PAGE_SIZE -> XE_64K_PAGE_SIZE GEN8_64K_PTE_MASK -> XE_64K_PTE_MASK GEN8_64K_PDE_MASK -> XE_64K_PDE_MASK GEN8_PDE_PS_2M -> XE_PDE_PS_2M GEN8_PDPE_PS_1G -> XE_PDPE_PS_1G GEN8_PDE_IPS_64K -> XE_PDE_IPS_64K GEN12_GGTT_PTE_LM -> XE_GGTT_PTE_LM GEN12_USM_PPGTT_PTE_AE -> XE_USM_PPGTT_PTE_AE GEN12_PPGTT_PTE_LM -> XE_PPGTT_PTE_LM GEN12_PDE_64K -> XE_PDE_64K GEN12_PTE_PS64 -> XE_PTE_PS64 GEN8_PAGE_PRESENT -> XE_PAGE_PRESENT GEN8_PAGE_RW -> XE_PAGE_RW PTE_READ_ONLY -> XE_PTE_READ_ONLY Keep an XE_ prefix to make sure we don't mix the defines for the CPU (e.g. PAGE_SIZE) with the ones fro the GPU). Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_migrate.c | 15 ++++---- drivers/gpu/drm/xe/xe_bo.h | 52 +++++++++++++------------- drivers/gpu/drm/xe/xe_ggtt.c | 24 ++++++------ drivers/gpu/drm/xe/xe_migrate.c | 69 ++++++++++++++++++----------------- drivers/gpu/drm/xe/xe_pt.c | 40 ++++++++++---------- drivers/gpu/drm/xe/xe_vm.c | 20 +++++----- 6 files changed, 112 insertions(+), 108 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index cdcecf8d5eef..0f4371ad1fd9 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -265,7 +265,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) goto vunmap; } - pt = xe_bo_create_pin_map(xe, m->gt, m->eng->vm, GEN8_PAGE_SIZE, + pt = xe_bo_create_pin_map(xe, m->gt, m->eng->vm, XE_PAGE_SIZE, ttm_bo_type_kernel, XE_BO_CREATE_VRAM_IF_DGFX(m->gt) | XE_BO_CREATE_PINNED_BIT); @@ -294,20 +294,21 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) } kunit_info(test, "Starting tests, top level PT addr: %lx, special pagetable base addr: %lx\n", - (unsigned long)xe_bo_main_addr(m->eng->vm->pt_root[id]->bo, GEN8_PAGE_SIZE), - (unsigned long)xe_bo_main_addr(m->pt_bo, GEN8_PAGE_SIZE)); + (unsigned long)xe_bo_main_addr(m->eng->vm->pt_root[id]->bo, XE_PAGE_SIZE), + (unsigned long)xe_bo_main_addr(m->pt_bo, XE_PAGE_SIZE)); /* First part of the test, are we updating our pagetable bo with a new entry? */ - xe_map_wr(xe, &bo->vmap, GEN8_PAGE_SIZE * (NUM_KERNEL_PDE - 1), u64, 0xdeaddeadbeefbeef); + xe_map_wr(xe, &bo->vmap, XE_PAGE_SIZE * (NUM_KERNEL_PDE - 1), u64, + 0xdeaddeadbeefbeef); expected = gen8_pte_encode(NULL, pt, 0, XE_CACHE_WB, 0, 0); if (m->eng->vm->flags & XE_VM_FLAGS_64K) - expected |= GEN12_PTE_PS64; + expected |= XE_PTE_PS64; xe_res_first(pt->ttm.resource, 0, pt->size, &src_it); emit_pte(m, bb, NUM_KERNEL_PDE - 1, xe_bo_is_vram(pt), - &src_it, GEN8_PAGE_SIZE, pt); + &src_it, XE_PAGE_SIZE, pt); run_sanity_job(m, xe, bb, bb->len, "Writing PTE for our fake PT", test); - retval = xe_map_rd(xe, &bo->vmap, GEN8_PAGE_SIZE * (NUM_KERNEL_PDE - 1), + retval = xe_map_rd(xe, &bo->vmap, XE_PAGE_SIZE * (NUM_KERNEL_PDE - 1), u64); check(retval, expected, "PTE entry write", test); diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index effa9d0cf0f6..8354d05ccdf3 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -39,32 +39,32 @@ #define PPAT_CACHED BIT_ULL(7) #define PPAT_DISPLAY_ELLC BIT_ULL(4) -#define GEN8_PTE_SHIFT 12 -#define GEN8_PAGE_SIZE (1 << GEN8_PTE_SHIFT) -#define GEN8_PTE_MASK (GEN8_PAGE_SIZE - 1) -#define GEN8_PDE_SHIFT (GEN8_PTE_SHIFT - 3) -#define GEN8_PDES (1 << GEN8_PDE_SHIFT) -#define GEN8_PDE_MASK (GEN8_PDES - 1) - -#define GEN8_64K_PTE_SHIFT 16 -#define GEN8_64K_PAGE_SIZE (1 << GEN8_64K_PTE_SHIFT) -#define GEN8_64K_PTE_MASK (GEN8_64K_PAGE_SIZE - 1) -#define GEN8_64K_PDE_MASK (GEN8_PDE_MASK >> 4) - -#define GEN8_PDE_PS_2M BIT_ULL(7) -#define GEN8_PDPE_PS_1G BIT_ULL(7) -#define GEN8_PDE_IPS_64K BIT_ULL(11) - -#define GEN12_GGTT_PTE_LM BIT_ULL(1) -#define GEN12_USM_PPGTT_PTE_AE BIT_ULL(10) -#define GEN12_PPGTT_PTE_LM BIT_ULL(11) -#define GEN12_PDE_64K BIT_ULL(6) -#define GEN12_PTE_PS64 BIT_ULL(8) - -#define GEN8_PAGE_PRESENT BIT_ULL(0) -#define GEN8_PAGE_RW BIT_ULL(1) - -#define PTE_READ_ONLY BIT(0) +#define XE_PTE_SHIFT 12 +#define XE_PAGE_SIZE (1 << XE_PTE_SHIFT) +#define XE_PTE_MASK (XE_PAGE_SIZE - 1) +#define XE_PDE_SHIFT (XE_PTE_SHIFT - 3) +#define XE_PDES (1 << XE_PDE_SHIFT) +#define XE_PDE_MASK (XE_PDES - 1) + +#define XE_64K_PTE_SHIFT 16 +#define XE_64K_PAGE_SIZE (1 << XE_64K_PTE_SHIFT) +#define XE_64K_PTE_MASK (XE_64K_PAGE_SIZE - 1) +#define XE_64K_PDE_MASK (XE_PDE_MASK >> 4) + +#define XE_PDE_PS_2M BIT_ULL(7) +#define XE_PDPE_PS_1G BIT_ULL(7) +#define XE_PDE_IPS_64K BIT_ULL(11) + +#define XE_GGTT_PTE_LM BIT_ULL(1) +#define XE_USM_PPGTT_PTE_AE BIT_ULL(10) +#define XE_PPGTT_PTE_LM BIT_ULL(11) +#define XE_PDE_64K BIT_ULL(6) +#define XE_PTE_PS64 BIT_ULL(8) + +#define XE_PAGE_PRESENT BIT_ULL(0) +#define XE_PAGE_RW BIT_ULL(1) + +#define XE_PTE_READ_ONLY BIT(0) #define XE_PL_SYSTEM TTM_PL_SYSTEM #define XE_PL_TT TTM_PL_TT diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 0fda9a18049b..dbc45ef084b4 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -30,11 +30,11 @@ u64 xe_ggtt_pte_encode(struct xe_bo *bo, u64 bo_offset) u64 pte; bool is_vram; - pte = xe_bo_addr(bo, bo_offset, GEN8_PAGE_SIZE, &is_vram); - pte |= GEN8_PAGE_PRESENT; + pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE, &is_vram); + pte |= XE_PAGE_PRESENT; if (is_vram) - pte |= GEN12_GGTT_PTE_LM; + pte |= XE_GGTT_PTE_LM; /* FIXME: vfunc + pass in caching rules */ if (xe->info.platform == XE_METEORLAKE) { @@ -56,10 +56,10 @@ static unsigned int probe_gsm_size(struct pci_dev *pdev) void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte) { - XE_BUG_ON(addr & GEN8_PTE_MASK); + XE_BUG_ON(addr & XE_PTE_MASK); XE_BUG_ON(addr >= ggtt->size); - writeq(pte, &ggtt->gsm[addr >> GEN8_PTE_SHIFT]); + writeq(pte, &ggtt->gsm[addr >> XE_PTE_SHIFT]); } static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size) @@ -76,7 +76,7 @@ static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size) while (start < end) { xe_ggtt_set_pte(ggtt, start, scratch_pte); - start += GEN8_PAGE_SIZE; + start += XE_PAGE_SIZE; } } @@ -107,7 +107,7 @@ int xe_ggtt_init_noalloc(struct xe_gt *gt, struct xe_ggtt *ggtt) } ggtt->gsm = gt->mmio.regs + SZ_8M; - ggtt->size = (gsm_size / 8) * (u64)GEN8_PAGE_SIZE; + ggtt->size = (gsm_size / 8) * (u64) XE_PAGE_SIZE; if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) ggtt->flags |= XE_GGTT_FLAGS_64K; @@ -167,7 +167,7 @@ int xe_ggtt_init(struct xe_gt *gt, struct xe_ggtt *ggtt) else flags |= XE_BO_CREATE_VRAM_IF_DGFX(gt); - ggtt->scratch = xe_bo_create_pin_map(xe, gt, NULL, GEN8_PAGE_SIZE, + ggtt->scratch = xe_bo_create_pin_map(xe, gt, NULL, XE_PAGE_SIZE, ttm_bo_type_kernel, flags); @@ -224,8 +224,8 @@ void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix) scratch_pte = xe_ggtt_pte_encode(ggtt->scratch, 0); printk("%sGlobal GTT:", prefix); - for (addr = 0; addr < ggtt->size; addr += GEN8_PAGE_SIZE) { - unsigned int i = addr / GEN8_PAGE_SIZE; + for (addr = 0; addr < ggtt->size; addr += XE_PAGE_SIZE) { + unsigned int i = addr / XE_PAGE_SIZE; XE_BUG_ON(addr > U32_MAX); if (ggtt->gsm[i] == scratch_pte) @@ -261,7 +261,7 @@ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) u64 start = bo->ggtt_node.start; u64 offset, pte; - for (offset = 0; offset < bo->size; offset += GEN8_PAGE_SIZE) { + for (offset = 0; offset < bo->size; offset += XE_PAGE_SIZE) { pte = xe_ggtt_pte_encode(bo, offset); xe_ggtt_set_pte(ggtt, start + offset, pte); } @@ -309,7 +309,7 @@ int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) { u64 alignment; - alignment = GEN8_PAGE_SIZE; + alignment = XE_PAGE_SIZE; if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K) alignment = SZ_64K; diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 2169d687ba3f..a8e66b84dc63 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -147,7 +147,7 @@ static int xe_migrate_create_cleared_bo(struct xe_migrate *m, struct xe_vm *vm) return PTR_ERR(m->cleared_bo); xe_map_memset(xe, &m->cleared_bo->vmap, 0, 0x00, cleared_size); - vram_addr = xe_bo_addr(m->cleared_bo, 0, GEN8_PAGE_SIZE, &is_vram); + vram_addr = xe_bo_addr(m->cleared_bo, 0, XE_PAGE_SIZE, &is_vram); XE_BUG_ON(!is_vram); m->cleared_vram_ofs = xe_migrate_vram_ofs(vram_addr); @@ -166,9 +166,9 @@ static int xe_migrate_prepare_vm(struct xe_gt *gt, struct xe_migrate *m, int ret; /* Can't bump NUM_PT_SLOTS too high */ - BUILD_BUG_ON(NUM_PT_SLOTS > SZ_2M/GEN8_PAGE_SIZE); + BUILD_BUG_ON(NUM_PT_SLOTS > SZ_2M/XE_PAGE_SIZE); /* Must be a multiple of 64K to support all platforms */ - BUILD_BUG_ON(NUM_PT_SLOTS * GEN8_PAGE_SIZE % SZ_64K); + BUILD_BUG_ON(NUM_PT_SLOTS * XE_PAGE_SIZE % SZ_64K); /* And one slot reserved for the 4KiB page table updates */ BUILD_BUG_ON(!(NUM_KERNEL_PDE & 1)); @@ -176,7 +176,7 @@ static int xe_migrate_prepare_vm(struct xe_gt *gt, struct xe_migrate *m, XE_BUG_ON(m->batch_base_ofs + batch->size >= SZ_2M); bo = xe_bo_create_pin_map(vm->xe, m->gt, vm, - num_entries * GEN8_PAGE_SIZE, + num_entries * XE_PAGE_SIZE, ttm_bo_type_kernel, XE_BO_CREATE_VRAM_IF_DGFX(m->gt) | XE_BO_CREATE_PINNED_BIT); @@ -189,14 +189,14 @@ static int xe_migrate_prepare_vm(struct xe_gt *gt, struct xe_migrate *m, return ret; } - entry = gen8_pde_encode(bo, bo->size - GEN8_PAGE_SIZE, XE_CACHE_WB); + entry = gen8_pde_encode(bo, bo->size - XE_PAGE_SIZE, XE_CACHE_WB); xe_pt_write(xe, &vm->pt_root[id]->bo->vmap, 0, entry); - map_ofs = (num_entries - num_level) * GEN8_PAGE_SIZE; + map_ofs = (num_entries - num_level) * XE_PAGE_SIZE; /* Map the entire BO in our level 0 pt */ for (i = 0, level = 0; i < num_entries; level++) { - entry = gen8_pte_encode(NULL, bo, i * GEN8_PAGE_SIZE, + entry = gen8_pte_encode(NULL, bo, i * XE_PAGE_SIZE, XE_CACHE_WB, 0, 0); xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64, entry); @@ -211,10 +211,10 @@ static int xe_migrate_prepare_vm(struct xe_gt *gt, struct xe_migrate *m, XE_BUG_ON(xe->info.supports_usm); /* Write out batch too */ - m->batch_base_ofs = NUM_PT_SLOTS * GEN8_PAGE_SIZE; + m->batch_base_ofs = NUM_PT_SLOTS * XE_PAGE_SIZE; for (i = 0; i < batch->size; - i += vm->flags & XE_VM_FLAGS_64K ? GEN8_64K_PAGE_SIZE : - GEN8_PAGE_SIZE) { + i += vm->flags & XE_VM_FLAGS_64K ? XE_64K_PAGE_SIZE : + XE_PAGE_SIZE) { entry = gen8_pte_encode(NULL, batch, i, XE_CACHE_WB, 0, 0); @@ -224,13 +224,13 @@ static int xe_migrate_prepare_vm(struct xe_gt *gt, struct xe_migrate *m, } } else { bool is_vram; - u64 batch_addr = xe_bo_addr(batch, 0, GEN8_PAGE_SIZE, &is_vram); + u64 batch_addr = xe_bo_addr(batch, 0, XE_PAGE_SIZE, &is_vram); m->batch_base_ofs = xe_migrate_vram_ofs(batch_addr); if (xe->info.supports_usm) { batch = gt->usm.bb_pool->bo; - batch_addr = xe_bo_addr(batch, 0, GEN8_PAGE_SIZE, + batch_addr = xe_bo_addr(batch, 0, XE_PAGE_SIZE, &is_vram); m->usm_batch_base_ofs = xe_migrate_vram_ofs(batch_addr); } @@ -240,20 +240,20 @@ static int xe_migrate_prepare_vm(struct xe_gt *gt, struct xe_migrate *m, u32 flags = 0; if (vm->flags & XE_VM_FLAGS_64K && level == 1) - flags = GEN12_PDE_64K; + flags = XE_PDE_64K; entry = gen8_pde_encode(bo, map_ofs + (level - 1) * - GEN8_PAGE_SIZE, XE_CACHE_WB); - xe_map_wr(xe, &bo->vmap, map_ofs + GEN8_PAGE_SIZE * level, u64, + XE_PAGE_SIZE, XE_CACHE_WB); + xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE * level, u64, entry | flags); } /* Write PDE's that point to our BO. */ for (i = 0; i < num_entries - num_level; i++) { - entry = gen8_pde_encode(bo, i * GEN8_PAGE_SIZE, + entry = gen8_pde_encode(bo, i * XE_PAGE_SIZE, XE_CACHE_WB); - xe_map_wr(xe, &bo->vmap, map_ofs + GEN8_PAGE_SIZE + + xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE + (i + 1) * 8, u64, entry); } @@ -262,9 +262,9 @@ static int xe_migrate_prepare_vm(struct xe_gt *gt, struct xe_migrate *m, u64 pos, ofs, flags; level = 2; - ofs = map_ofs + GEN8_PAGE_SIZE * level + 256 * 8; - flags = GEN8_PAGE_RW | GEN8_PAGE_PRESENT | PPAT_CACHED | - GEN12_PPGTT_PTE_LM | GEN8_PDPE_PS_1G; + ofs = map_ofs + XE_PAGE_SIZE * level + 256 * 8; + flags = XE_PAGE_RW | XE_PAGE_PRESENT | PPAT_CACHED | + XE_PPGTT_PTE_LM | XE_PDPE_PS_1G; /* * Use 1GB pages, it shouldn't matter the physical amount of @@ -294,10 +294,10 @@ static int xe_migrate_prepare_vm(struct xe_gt *gt, struct xe_migrate *m, * the different addresses in VM. */ #define NUM_VMUSA_UNIT_PER_PAGE 32 -#define VM_SA_UPDATE_UNIT_SIZE (GEN8_PAGE_SIZE / NUM_VMUSA_UNIT_PER_PAGE) +#define VM_SA_UPDATE_UNIT_SIZE (XE_PAGE_SIZE / NUM_VMUSA_UNIT_PER_PAGE) #define NUM_VMUSA_WRITES_PER_UNIT (VM_SA_UPDATE_UNIT_SIZE / sizeof(u64)) drm_suballoc_manager_init(&m->vm_update_sa, - (map_ofs / GEN8_PAGE_SIZE - NUM_KERNEL_PDE) * + (map_ofs / XE_PAGE_SIZE - NUM_KERNEL_PDE) * NUM_VMUSA_UNIT_PER_PAGE, 0); m->pt_bo = bo; @@ -403,7 +403,7 @@ static u32 pte_update_size(struct xe_migrate *m, if (!is_vram) { /* Clip L0 to available size */ u64 size = min(*L0, (u64)avail_pts * SZ_2M); - u64 num_4k_pages = DIV_ROUND_UP(size, GEN8_PAGE_SIZE); + u64 num_4k_pages = DIV_ROUND_UP(size, XE_PAGE_SIZE); *L0 = size; *L0_ofs = xe_migrate_vm_addr(pt_ofs, 0); @@ -433,7 +433,7 @@ static void emit_pte(struct xe_migrate *m, u32 size, struct xe_bo *bo) { u32 ptes; - u64 ofs = at_pt * GEN8_PAGE_SIZE; + u64 ofs = at_pt * XE_PAGE_SIZE; u64 cur_ofs; /* @@ -443,7 +443,7 @@ static void emit_pte(struct xe_migrate *m, * on running tests. */ - ptes = DIV_ROUND_UP(size, GEN8_PAGE_SIZE); + ptes = DIV_ROUND_UP(size, XE_PAGE_SIZE); while (ptes) { u32 chunk = min(0x1ffU, ptes); @@ -466,13 +466,13 @@ static void emit_pte(struct xe_migrate *m, if ((m->eng->vm->flags & XE_VM_FLAGS_64K) && !(cur_ofs & (16 * 8 - 1))) { XE_WARN_ON(!IS_ALIGNED(addr, SZ_64K)); - addr |= GEN12_PTE_PS64; + addr |= XE_PTE_PS64; } addr += vram_region_io_offset(bo->ttm.resource); - addr |= GEN12_PPGTT_PTE_LM; + addr |= XE_PPGTT_PTE_LM; } - addr |= PPAT_CACHED | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; + addr |= PPAT_CACHED | XE_PAGE_PRESENT | XE_PAGE_RW; bb->cs[bb->len++] = lower_32_bits(addr); bb->cs[bb->len++] = upper_32_bits(addr); @@ -697,7 +697,8 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, bb->cs[bb->len++] = MI_BATCH_BUFFER_END; update_idx = bb->len; - emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0, GEN8_PAGE_SIZE); + emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0, + XE_PAGE_SIZE); flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs, src_is_vram, dst_L0_ofs, dst_is_vram, src_L0, ccs_ofs, copy_ccs); @@ -915,7 +916,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, bb->cs[bb->len++] = MI_BATCH_BUFFER_END; update_idx = bb->len; - emit_clear(gt, bb, clear_L0_ofs, clear_L0, GEN8_PAGE_SIZE, + emit_clear(gt, bb, clear_L0_ofs, clear_L0, XE_PAGE_SIZE, clear_vram); if (xe_device_has_flat_ccs(xe) && clear_vram) { emit_copy_ccs(gt, bb, clear_L0_ofs, true, @@ -985,7 +986,7 @@ static void write_pgtable(struct xe_gt *gt, struct xe_bb *bb, u64 ppgtt_ofs, bool is_vram; ppgtt_ofs = xe_migrate_vram_ofs(xe_bo_addr(update->pt_bo, 0, - GEN8_PAGE_SIZE, + XE_PAGE_SIZE, &is_vram)); XE_BUG_ON(!is_vram); } @@ -1202,7 +1203,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m, /* Map our PT's to gtt */ bb->cs[bb->len++] = MI_STORE_DATA_IMM | BIT(21) | (num_updates * 2 + 1); - bb->cs[bb->len++] = ppgtt_ofs * GEN8_PAGE_SIZE + page_ofs; + bb->cs[bb->len++] = ppgtt_ofs * XE_PAGE_SIZE + page_ofs; bb->cs[bb->len++] = 0; /* upper_32_bits */ for (i = 0; i < num_updates; i++) { @@ -1220,9 +1221,9 @@ xe_migrate_update_pgtables(struct xe_migrate *m, update_idx = bb->len; addr = xe_migrate_vm_addr(ppgtt_ofs, 0) + - (page_ofs / sizeof(u64)) * GEN8_PAGE_SIZE; + (page_ofs / sizeof(u64)) * XE_PAGE_SIZE; for (i = 0; i < num_updates; i++) - write_pgtable(m->gt, bb, addr + i * GEN8_PAGE_SIZE, + write_pgtable(m->gt, bb, addr + i * XE_PAGE_SIZE, &updates[i], pt_update); } else { /* phys pages, no preamble required */ diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 7aa12f86e55b..f15282996c3b 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -63,8 +63,8 @@ u64 gen8_pde_encode(struct xe_bo *bo, u64 bo_offset, u64 pde; bool is_vram; - pde = xe_bo_addr(bo, bo_offset, GEN8_PAGE_SIZE, &is_vram); - pde |= GEN8_PAGE_PRESENT | GEN8_PAGE_RW; + pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE, &is_vram); + pde |= XE_PAGE_PRESENT | XE_PAGE_RW; XE_WARN_ON(IS_DGFX(xe_bo_device(bo)) && !is_vram); @@ -100,10 +100,10 @@ static dma_addr_t vma_addr(struct xe_vma *vma, u64 offset, static u64 __gen8_pte_encode(u64 pte, enum xe_cache_level cache, u32 flags, u32 pt_level) { - pte |= GEN8_PAGE_PRESENT | GEN8_PAGE_RW; + pte |= XE_PAGE_PRESENT | XE_PAGE_RW; - if (unlikely(flags & PTE_READ_ONLY)) - pte &= ~GEN8_PAGE_RW; + if (unlikely(flags & XE_PTE_READ_ONLY)) + pte &= ~XE_PAGE_RW; /* FIXME: I don't think the PPAT handling is correct for MTL */ @@ -120,9 +120,9 @@ static u64 __gen8_pte_encode(u64 pte, enum xe_cache_level cache, u32 flags, } if (pt_level == 1) - pte |= GEN8_PDE_PS_2M; + pte |= XE_PDE_PS_2M; else if (pt_level == 2) - pte |= GEN8_PDPE_PS_1G; + pte |= XE_PDPE_PS_1G; /* XXX: Does hw support 1 GiB pages? */ XE_BUG_ON(pt_level > 2); @@ -152,14 +152,14 @@ u64 gen8_pte_encode(struct xe_vma *vma, struct xe_bo *bo, bool is_vram; if (vma) - pte = vma_addr(vma, offset, GEN8_PAGE_SIZE, &is_vram); + pte = vma_addr(vma, offset, XE_PAGE_SIZE, &is_vram); else - pte = xe_bo_addr(bo, offset, GEN8_PAGE_SIZE, &is_vram); + pte = xe_bo_addr(bo, offset, XE_PAGE_SIZE, &is_vram); if (is_vram) { - pte |= GEN12_PPGTT_PTE_LM; + pte |= XE_PPGTT_PTE_LM; if (vma && vma->use_atomic_access_pte_bit) - pte |= GEN12_USM_PPGTT_PTE_AE; + pte |= XE_USM_PPGTT_PTE_AE; } return __gen8_pte_encode(pte, cache, flags, pt_level); @@ -210,7 +210,7 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_gt *gt, int err; size = !level ? sizeof(struct xe_pt) : sizeof(struct xe_pt_dir) + - GEN8_PDES * sizeof(struct xe_ptw *); + XE_PDES * sizeof(struct xe_ptw *); pt = kzalloc(size, GFP_KERNEL); if (!pt) return ERR_PTR(-ENOMEM); @@ -264,7 +264,7 @@ void xe_pt_populate_empty(struct xe_gt *gt, struct xe_vm *vm, xe_map_memset(vm->xe, map, 0, 0, SZ_4K); } else { empty = __xe_pt_empty_pte(gt, vm, pt->level); - for (i = 0; i < GEN8_PDES; i++) + for (i = 0; i < XE_PDES; i++) xe_pt_write(vm->xe, map, i, empty); } } @@ -279,7 +279,7 @@ void xe_pt_populate_empty(struct xe_gt *gt, struct xe_vm *vm, */ unsigned int xe_pt_shift(unsigned int level) { - return GEN8_PTE_SHIFT + GEN8_PDE_SHIFT * level; + return XE_PTE_SHIFT + XE_PDE_SHIFT * level; } /** @@ -306,7 +306,7 @@ void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred) if (pt->level > 0 && pt->num_live) { struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt); - for (i = 0; i < GEN8_PDES; i++) { + for (i = 0; i < XE_PDES; i++) { if (xe_pt_entry(pt_dir, i)) xe_pt_destroy(xe_pt_entry(pt_dir, i), flags, deferred); @@ -488,7 +488,7 @@ xe_pt_new_shared(struct xe_walk_update *wupd, struct xe_pt *parent, entry->qwords = 0; if (alloc_entries) { - entry->pt_entries = kmalloc_array(GEN8_PDES, + entry->pt_entries = kmalloc_array(XE_PDES, sizeof(*entry->pt_entries), GFP_KERNEL); if (!entry->pt_entries) @@ -648,7 +648,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, */ if (level == 0 && !xe_parent->is_compact) { if (xe_pt_is_pte_ps64K(addr, next, xe_walk)) - pte |= GEN12_PTE_PS64; + pte |= XE_PTE_PS64; else if (XE_WARN_ON(xe_walk->needs_64K)) return -EINVAL; } @@ -698,7 +698,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, if (GRAPHICS_VERx100(xe_walk->gt->xe) >= 1250 && level == 1 && covers && xe_pt_scan_64K(addr, next, xe_walk)) { walk->shifts = xe_compact_pt_shifts; - flags |= GEN12_PDE_64K; + flags |= XE_PDE_64K; xe_child->is_compact = true; } @@ -760,9 +760,9 @@ xe_pt_stage_bind(struct xe_gt *gt, struct xe_vma *vma, if (is_vram) { struct xe_gt *bo_gt = xe_bo_to_gt(bo); - xe_walk.default_pte = GEN12_PPGTT_PTE_LM; + xe_walk.default_pte = XE_PPGTT_PTE_LM; if (vma && vma->use_atomic_access_pte_bit) - xe_walk.default_pte |= GEN12_USM_PPGTT_PTE_AE; + xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; xe_walk.dma_offset = bo_gt->mem.vram.io_start - gt_to_xe(gt)->mem.vram.io_start; xe_walk.cache = XE_CACHE_WB; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 49aa4ddedbf2..e634bb96f9cc 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -59,7 +59,7 @@ int xe_vma_userptr_pin_pages(struct xe_vma *vma) bool in_kthread = !current->mm; unsigned long notifier_seq; int pinned, ret, i; - bool read_only = vma->pte_flags & PTE_READ_ONLY; + bool read_only = vma->pte_flags & XE_PTE_READ_ONLY; lockdep_assert_held(&vm->lock); XE_BUG_ON(!xe_vma_is_userptr(vma)); @@ -844,7 +844,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, vma->start = start; vma->end = end; if (read_only) - vma->pte_flags = PTE_READ_ONLY; + vma->pte_flags = XE_PTE_READ_ONLY; if (gt_mask) { vma->gt_mask = gt_mask; @@ -899,7 +899,7 @@ static void xe_vma_destroy_late(struct xe_vma *vma) { struct xe_vm *vm = vma->vm; struct xe_device *xe = vm->xe; - bool read_only = vma->pte_flags & PTE_READ_ONLY; + bool read_only = vma->pte_flags & XE_PTE_READ_ONLY; if (xe_vma_is_userptr(vma)) { if (vma->userptr.sg) { @@ -1960,7 +1960,7 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM) /* Warning: Security issue - never enable by default */ - args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, GEN8_PAGE_SIZE); + args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE); #endif return 0; @@ -2617,7 +2617,7 @@ static struct xe_vma *vm_unbind_lookup_vmas(struct xe_vm *vm, first->userptr.ptr, first->start, lookup->start - 1, - (first->pte_flags & PTE_READ_ONLY), + (first->pte_flags & XE_PTE_READ_ONLY), first->gt_mask); if (first->bo) xe_bo_unlock(first->bo, &ww); @@ -2648,7 +2648,7 @@ static struct xe_vma *vm_unbind_lookup_vmas(struct xe_vm *vm, last->userptr.ptr + chunk, last->start + chunk, last->end, - (last->pte_flags & PTE_READ_ONLY), + (last->pte_flags & XE_PTE_READ_ONLY), last->gt_mask); if (last->bo) xe_bo_unlock(last->bo, &ww); @@ -3405,7 +3405,8 @@ int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id) return 0; } if (vm->pt_root[gt_id]) { - addr = xe_bo_addr(vm->pt_root[gt_id]->bo, 0, GEN8_PAGE_SIZE, &is_vram); + addr = xe_bo_addr(vm->pt_root[gt_id]->bo, 0, XE_PAGE_SIZE, + &is_vram); drm_printf(p, " VM root: A:0x%llx %s\n", addr, is_vram ? "VRAM" : "SYS"); } @@ -3416,10 +3417,11 @@ int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id) if (is_userptr) { struct xe_res_cursor cur; - xe_res_first_sg(vma->userptr.sg, 0, GEN8_PAGE_SIZE, &cur); + xe_res_first_sg(vma->userptr.sg, 0, XE_PAGE_SIZE, + &cur); addr = xe_res_dma(&cur); } else { - addr = xe_bo_addr(vma->bo, 0, GEN8_PAGE_SIZE, &is_vram); + addr = xe_bo_addr(vma->bo, 0, XE_PAGE_SIZE, &is_vram); } drm_printf(p, " [%016llx-%016llx] S:0x%016llx A:%016llx %s\n", vma->start, vma->end, vma->end - vma->start + 1ull, -- cgit v1.2.3 From bb36f4b4ed279c7deed936957f733b2af0d3d78f Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 12 Apr 2023 16:28:44 -0700 Subject: drm/xe: Rename RC0/RC6 macros Follow up commits will mass-remove the gen prefix/suffix. For GEN6_RC0 and GEN6_RC6 that would make the variable too short and easy to conflict. So, add "GT_" prefix that is also part of the register name. Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 4 ++-- drivers/gpu/drm/xe/xe_guc_pc.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index c1d73f3e7bc3..fb91f04c3c6c 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -345,8 +345,8 @@ #define GEN6_GT_CORE_STATUS _MMIO(0x138060) #define RCN_MASK REG_GENMASK(2, 0) -#define GEN6_RC0 0 -#define GEN6_RC6 3 +#define GT_RC0 0 +#define GT_RC6 3 #define GEN6_GT_GFX_RC6_LOCKED _MMIO(0x138104) #define GEN6_GT_GFX_RC6 _MMIO(0x138108) diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index b853831b342b..0b6d0577a8a7 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -592,9 +592,9 @@ static ssize_t rc_status_show(struct device *dev, xe_device_mem_access_put(gt_to_xe(gt)); switch (REG_FIELD_GET(RCN_MASK, reg)) { - case GEN6_RC6: + case GT_RC6: return sysfs_emit(buff, "rc6\n"); - case GEN6_RC0: + case GT_RC0: return sysfs_emit(buff, "rc0\n"); default: return -ENOENT; -- cgit v1.2.3 From 56492dacee943dd8241e29fe6a2d698d0029035c Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 12 Apr 2023 16:28:45 -0700 Subject: drm/xe: Rename instruction field to avoid confusion There was both BLT_DEPTH_32 and XY_FAST_COLOR_BLT_DEPTH_32 - also add the prefix to the first to make it clear this is about the FAST_**COPY** operation. While at it, remove the GEN9_ prefix. Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gpu_commands.h | 4 ++-- drivers/gpu/drm/xe/xe_migrate.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h index 9d6508d74d62..05531d43514f 100644 --- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h +++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h @@ -55,8 +55,8 @@ #define XY_FAST_COLOR_BLT_MOCS_MASK GENMASK(27, 21) #define XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT 31 -#define GEN9_XY_FAST_COPY_BLT_CMD (2 << 29 | 0x42 << 22) -#define BLT_DEPTH_32 (3<<24) +#define XY_FAST_COPY_BLT_CMD (2 << 29 | 0x42 << 22) +#define XY_FAST_COPY_BLT_DEPTH_32 (3<<24) #define PVC_MEM_SET_CMD (2 << 29 | 0x5b << 22) #define PVC_MEM_SET_CMD_LEN_DW 7 diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index a8e66b84dc63..d7da5bf2d984 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -518,8 +518,8 @@ static void emit_copy(struct xe_gt *gt, struct xe_bb *bb, XE_BUG_ON(pitch / 4 > S16_MAX); XE_BUG_ON(pitch > U16_MAX); - bb->cs[bb->len++] = GEN9_XY_FAST_COPY_BLT_CMD | (10 - 2); - bb->cs[bb->len++] = BLT_DEPTH_32 | pitch; + bb->cs[bb->len++] = XY_FAST_COPY_BLT_CMD | (10 - 2); + bb->cs[bb->len++] = XY_FAST_COPY_BLT_DEPTH_32 | pitch; bb->cs[bb->len++] = 0; bb->cs[bb->len++] = (size / pitch) << 16 | pitch / 4; bb->cs[bb->len++] = lower_32_bits(dst_ofs); -- cgit v1.2.3 From e8178f8076dedf8526f8dc78f8fb9b3017991641 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Sun, 16 Apr 2023 23:54:14 -0700 Subject: drm/xe/guc: Rename GEN11_SOFT_SCRATCH for clarity That register is a completely different register, it's not the same as SOFT_SCRATCH for GEN11 and beyond. Rename to to the same name as the bspec uses, including the new variant for media. Also, move the definitions to the guc header. Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc.c | 26 +++++++++++--------------- drivers/gpu/drm/xe/xe_guc_reg.h | 9 ++++++--- 2 files changed, 17 insertions(+), 18 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index ee71b969bcbf..ff2df4f30e97 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -622,9 +622,6 @@ int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr) return xe_guc_ct_send_block(&guc->ct, action, ARRAY_SIZE(action)); } -#define MEDIA_SOFT_SCRATCH(n) _MMIO(0x190310 + (n) * 4) -#define MEDIA_SOFT_SCRATCH_COUNT 4 - int xe_guc_mmio_send_recv(struct xe_guc *guc, const u32 *request, u32 len, u32 *response_buf) { @@ -632,15 +629,17 @@ int xe_guc_mmio_send_recv(struct xe_guc *guc, const u32 *request, struct xe_gt *gt = guc_to_gt(guc); u32 header, reply; u32 reply_reg = xe_gt_is_media_type(gt) ? - MEDIA_SOFT_SCRATCH(0).reg : GEN11_SOFT_SCRATCH(0).reg; + MED_VF_SW_FLAG(0).reg : VF_SW_FLAG(0).reg; + const u32 LAST_INDEX = VF_SW_FLAG_COUNT; int ret; int i; - BUILD_BUG_ON(GEN11_SOFT_SCRATCH_COUNT != MEDIA_SOFT_SCRATCH_COUNT); + BUILD_BUG_ON(VF_SW_FLAG_COUNT != MED_VF_SW_FLAG_COUNT); + XE_BUG_ON(guc->ct.enabled); XE_BUG_ON(!len); - XE_BUG_ON(len > GEN11_SOFT_SCRATCH_COUNT); - XE_BUG_ON(len > MEDIA_SOFT_SCRATCH_COUNT); + XE_BUG_ON(len > VF_SW_FLAG_COUNT); + XE_BUG_ON(len > MED_VF_SW_FLAG_COUNT); XE_BUG_ON(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, request[0]) != GUC_HXG_ORIGIN_HOST); XE_BUG_ON(FIELD_GET(GUC_HXG_MSG_0_TYPE, request[0]) != @@ -650,17 +649,14 @@ retry: /* Not in critical data-path, just do if else for GT type */ if (xe_gt_is_media_type(gt)) { for (i = 0; i < len; ++i) - xe_mmio_write32(gt, MEDIA_SOFT_SCRATCH(i).reg, + xe_mmio_write32(gt, MED_VF_SW_FLAG(i).reg, request[i]); -#define LAST_INDEX MEDIA_SOFT_SCRATCH_COUNT - 1 - xe_mmio_read32(gt, MEDIA_SOFT_SCRATCH(LAST_INDEX).reg); + xe_mmio_read32(gt, MED_VF_SW_FLAG(LAST_INDEX).reg); } else { for (i = 0; i < len; ++i) - xe_mmio_write32(gt, GEN11_SOFT_SCRATCH(i).reg, + xe_mmio_write32(gt, VF_SW_FLAG(i).reg, request[i]); -#undef LAST_INDEX -#define LAST_INDEX GEN11_SOFT_SCRATCH_COUNT - 1 - xe_mmio_read32(gt, GEN11_SOFT_SCRATCH(LAST_INDEX).reg); + xe_mmio_read32(gt, VF_SW_FLAG(LAST_INDEX).reg); } xe_guc_notify(guc); @@ -724,7 +720,7 @@ proto: if (response_buf) { response_buf[0] = header; - for (i = 1; i < GEN11_SOFT_SCRATCH_COUNT; i++) + for (i = 1; i < VF_SW_FLAG_COUNT; i++) response_buf[i] = xe_mmio_read32(gt, reply_reg + i * sizeof(u32)); } diff --git a/drivers/gpu/drm/xe/xe_guc_reg.h b/drivers/gpu/drm/xe/xe_guc_reg.h index efd60c186bbc..0cd38d51cc60 100644 --- a/drivers/gpu/drm/xe/xe_guc_reg.h +++ b/drivers/gpu/drm/xe/xe_guc_reg.h @@ -35,9 +35,6 @@ #define SOFT_SCRATCH(n) _MMIO(0xc180 + (n) * 4) #define SOFT_SCRATCH_COUNT 16 -#define GEN11_SOFT_SCRATCH(n) _MMIO(0x190240 + (n) * 4) -#define GEN11_SOFT_SCRATCH_COUNT 4 - #define UOS_RSA_SCRATCH(i) _MMIO(0xc200 + (i) * 4) #define UOS_RSA_SCRATCH_COUNT 64 @@ -130,6 +127,12 @@ struct guc_doorbell_info { #define GUC_WD_VECS_IER _MMIO(0xC558) #define GUC_PM_P24C_IER _MMIO(0xC55C) +#define VF_SW_FLAG(n) _MMIO(0x190240 + (n) * 4) +#define VF_SW_FLAG_COUNT 4 + +#define MED_VF_SW_FLAG(n) _MMIO(0x190310 + (n) * 4) +#define MED_VF_SW_FLAG_COUNT 4 + /* GuC Interrupt Vector */ #define GUC_INTR_GUC2HOST BIT(15) #define GUC_INTR_EXEC_ERROR BIT(14) -- cgit v1.2.3 From a9b1a1361472f9094a6a3d6216d46d14b5bcc6f5 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Sat, 15 Apr 2023 23:37:12 -0700 Subject: drm/xe/guc: Move GuC registers to regs/ There's no good reason to keep the GuC registers outside the regs/ directory: move the header with GuC registers under that. Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_guc_regs.h | 154 ++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_guc.c | 2 +- drivers/gpu/drm/xe/xe_guc_ads.c | 2 +- drivers/gpu/drm/xe/xe_guc_reg.h | 154 ---------------------------------- drivers/gpu/drm/xe/xe_huc.c | 2 +- drivers/gpu/drm/xe/xe_uc_fw.c | 2 +- drivers/gpu/drm/xe/xe_wopcm.c | 2 +- 7 files changed, 159 insertions(+), 159 deletions(-) create mode 100644 drivers/gpu/drm/xe/regs/xe_guc_regs.h delete mode 100644 drivers/gpu/drm/xe/xe_guc_reg.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_guc_regs.h b/drivers/gpu/drm/xe/regs/xe_guc_regs.h new file mode 100644 index 000000000000..011868ff38aa --- /dev/null +++ b/drivers/gpu/drm/xe/regs/xe_guc_regs.h @@ -0,0 +1,154 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_REGS_H_ +#define _XE_GUC_REGS_H_ + +#include +#include + +#include "regs/xe_reg_defs.h" + +/* Definitions of GuC H/W registers, bits, etc */ + +#define GUC_STATUS _MMIO(0xc000) +#define GS_RESET_SHIFT 0 +#define GS_MIA_IN_RESET (0x01 << GS_RESET_SHIFT) +#define GS_BOOTROM_SHIFT 1 +#define GS_BOOTROM_MASK (0x7F << GS_BOOTROM_SHIFT) +#define GS_BOOTROM_RSA_FAILED (0x50 << GS_BOOTROM_SHIFT) +#define GS_BOOTROM_JUMP_PASSED (0x76 << GS_BOOTROM_SHIFT) +#define GS_UKERNEL_SHIFT 8 +#define GS_UKERNEL_MASK (0xFF << GS_UKERNEL_SHIFT) +#define GS_MIA_SHIFT 16 +#define GS_MIA_MASK (0x07 << GS_MIA_SHIFT) +#define GS_MIA_CORE_STATE (0x01 << GS_MIA_SHIFT) +#define GS_MIA_HALT_REQUESTED (0x02 << GS_MIA_SHIFT) +#define GS_MIA_ISR_ENTRY (0x04 << GS_MIA_SHIFT) +#define GS_AUTH_STATUS_SHIFT 30 +#define GS_AUTH_STATUS_MASK (0x03 << GS_AUTH_STATUS_SHIFT) +#define GS_AUTH_STATUS_BAD (0x01 << GS_AUTH_STATUS_SHIFT) +#define GS_AUTH_STATUS_GOOD (0x02 << GS_AUTH_STATUS_SHIFT) + +#define SOFT_SCRATCH(n) _MMIO(0xc180 + (n) * 4) +#define SOFT_SCRATCH_COUNT 16 + +#define UOS_RSA_SCRATCH(i) _MMIO(0xc200 + (i) * 4) +#define UOS_RSA_SCRATCH_COUNT 64 + +#define DMA_ADDR_0_LOW _MMIO(0xc300) +#define DMA_ADDR_0_HIGH _MMIO(0xc304) +#define DMA_ADDR_1_LOW _MMIO(0xc308) +#define DMA_ADDR_1_HIGH _MMIO(0xc30c) +#define DMA_ADDRESS_SPACE_WOPCM (7 << 16) +#define DMA_ADDRESS_SPACE_GTT (8 << 16) +#define DMA_COPY_SIZE _MMIO(0xc310) +#define DMA_CTRL _MMIO(0xc314) +#define HUC_UKERNEL (1<<9) +#define UOS_MOVE (1<<4) +#define START_DMA (1<<0) +#define DMA_GUC_WOPCM_OFFSET _MMIO(0xc340) +#define GUC_WOPCM_OFFSET_VALID (1<<0) +#define HUC_LOADING_AGENT_VCR (0<<1) +#define HUC_LOADING_AGENT_GUC (1<<1) +#define GUC_WOPCM_OFFSET_SHIFT 14 +#define GUC_WOPCM_OFFSET_MASK (0x3ffff << GUC_WOPCM_OFFSET_SHIFT) +#define GUC_MAX_IDLE_COUNT _MMIO(0xC3E4) + +#define HUC_STATUS2 _MMIO(0xD3B0) +#define HUC_FW_VERIFIED (1<<7) + +#define GEN11_HUC_KERNEL_LOAD_INFO _MMIO(0xC1DC) +#define HUC_LOAD_SUCCESSFUL (1 << 0) + +#define GUC_WOPCM_SIZE _MMIO(0xc050) +#define GUC_WOPCM_SIZE_LOCKED (1<<0) +#define GUC_WOPCM_SIZE_SHIFT 12 +#define GUC_WOPCM_SIZE_MASK (0xfffff << GUC_WOPCM_SIZE_SHIFT) + +#define GEN8_GT_PM_CONFIG _MMIO(0x138140) +#define GEN9LP_GT_PM_CONFIG _MMIO(0x138140) +#define GEN9_GT_PM_CONFIG _MMIO(0x13816c) +#define GT_DOORBELL_ENABLE (1<<0) + +#define GEN8_GTCR _MMIO(0x4274) +#define GEN8_GTCR_INVALIDATE (1<<0) + +#define GEN12_GUC_TLB_INV_CR _MMIO(0xcee8) +#define GEN12_GUC_TLB_INV_CR_INVALIDATE (1 << 0) + +#define GUC_ARAT_C6DIS _MMIO(0xA178) + +#define GUC_SHIM_CONTROL _MMIO(0xc064) +#define GUC_DISABLE_SRAM_INIT_TO_ZEROES (1<<0) +#define GUC_ENABLE_READ_CACHE_LOGIC (1<<1) +#define GUC_ENABLE_MIA_CACHING (1<<2) +#define GUC_GEN10_MSGCH_ENABLE (1<<4) +#define GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA (1<<9) +#define GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA (1<<10) +#define GUC_ENABLE_MIA_CLOCK_GATING (1<<15) +#define GUC_GEN10_SHIM_WC_ENABLE (1<<21) +#define PVC_GUC_MOCS_INDEX_MASK REG_GENMASK(25, 24) +#define PVC_MOCS_UC_INDEX 1 +#define PVC_GUC_MOCS_INDEX(index) REG_FIELD_PREP(PVC_GUC_MOCS_INDEX_MASK,\ + index) + +#define GUC_SEND_INTERRUPT _MMIO(0xc4c8) +#define GUC_SEND_TRIGGER (1<<0) +#define GEN11_GUC_HOST_INTERRUPT _MMIO(0x1901f0) + +#define GUC_NUM_DOORBELLS 256 + +/* format of the HW-monitored doorbell cacheline */ +struct guc_doorbell_info { + u32 db_status; +#define GUC_DOORBELL_DISABLED 0 +#define GUC_DOORBELL_ENABLED 1 + + u32 cookie; + u32 reserved[14]; +} __packed; + +#define GEN8_DRBREGL(x) _MMIO(0x1000 + (x) * 8) +#define GEN8_DRB_VALID (1<<0) +#define GEN8_DRBREGU(x) _MMIO(0x1000 + (x) * 8 + 4) + +#define GEN12_DIST_DBS_POPULATED _MMIO(0xd08) +#define GEN12_DOORBELLS_PER_SQIDI_SHIFT 16 +#define GEN12_DOORBELLS_PER_SQIDI (0xff) +#define GEN12_SQIDIS_DOORBELL_EXIST (0xffff) + +#define DE_GUCRMR _MMIO(0x44054) + +#define GUC_BCS_RCS_IER _MMIO(0xC550) +#define GUC_VCS2_VCS1_IER _MMIO(0xC554) +#define GUC_WD_VECS_IER _MMIO(0xC558) +#define GUC_PM_P24C_IER _MMIO(0xC55C) + +#define VF_SW_FLAG(n) _MMIO(0x190240 + (n) * 4) +#define VF_SW_FLAG_COUNT 4 + +#define MED_VF_SW_FLAG(n) _MMIO(0x190310 + (n) * 4) +#define MED_VF_SW_FLAG_COUNT 4 + +/* GuC Interrupt Vector */ +#define GUC_INTR_GUC2HOST BIT(15) +#define GUC_INTR_EXEC_ERROR BIT(14) +#define GUC_INTR_DISPLAY_EVENT BIT(13) +#define GUC_INTR_SEM_SIG BIT(12) +#define GUC_INTR_IOMMU2GUC BIT(11) +#define GUC_INTR_DOORBELL_RANG BIT(10) +#define GUC_INTR_DMA_DONE BIT(9) +#define GUC_INTR_FATAL_ERROR BIT(8) +#define GUC_INTR_NOTIF_ERROR BIT(7) +#define GUC_INTR_SW_INT_6 BIT(6) +#define GUC_INTR_SW_INT_5 BIT(5) +#define GUC_INTR_SW_INT_4 BIT(4) +#define GUC_INTR_SW_INT_3 BIT(3) +#define GUC_INTR_SW_INT_2 BIT(2) +#define GUC_INTR_SW_INT_1 BIT(1) +#define GUC_INTR_SW_INT_0 BIT(0) + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index ff2df4f30e97..e00177f4d294 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -6,6 +6,7 @@ #include "xe_guc.h" #include "regs/xe_gt_regs.h" +#include "regs/xe_guc_regs.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_force_wake.h" @@ -15,7 +16,6 @@ #include "xe_guc_hwconfig.h" #include "xe_guc_log.h" #include "xe_guc_pc.h" -#include "xe_guc_reg.h" #include "xe_guc_submit.h" #include "xe_mmio.h" #include "xe_platform_types.h" diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index fd9911ffeae4..fe1d5be1241e 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -9,10 +9,10 @@ #include "regs/xe_engine_regs.h" #include "regs/xe_gt_regs.h" +#include "regs/xe_guc_regs.h" #include "xe_bo.h" #include "xe_gt.h" #include "xe_guc.h" -#include "xe_guc_reg.h" #include "xe_hw_engine.h" #include "xe_lrc.h" #include "xe_map.h" diff --git a/drivers/gpu/drm/xe/xe_guc_reg.h b/drivers/gpu/drm/xe/xe_guc_reg.h deleted file mode 100644 index 0cd38d51cc60..000000000000 --- a/drivers/gpu/drm/xe/xe_guc_reg.h +++ /dev/null @@ -1,154 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2022 Intel Corporation - */ - -#ifndef _XE_GUC_REG_H_ -#define _XE_GUC_REG_H_ - -#include -#include - -#include "regs/xe_reg_defs.h" - -/* Definitions of GuC H/W registers, bits, etc */ - -#define GUC_STATUS _MMIO(0xc000) -#define GS_RESET_SHIFT 0 -#define GS_MIA_IN_RESET (0x01 << GS_RESET_SHIFT) -#define GS_BOOTROM_SHIFT 1 -#define GS_BOOTROM_MASK (0x7F << GS_BOOTROM_SHIFT) -#define GS_BOOTROM_RSA_FAILED (0x50 << GS_BOOTROM_SHIFT) -#define GS_BOOTROM_JUMP_PASSED (0x76 << GS_BOOTROM_SHIFT) -#define GS_UKERNEL_SHIFT 8 -#define GS_UKERNEL_MASK (0xFF << GS_UKERNEL_SHIFT) -#define GS_MIA_SHIFT 16 -#define GS_MIA_MASK (0x07 << GS_MIA_SHIFT) -#define GS_MIA_CORE_STATE (0x01 << GS_MIA_SHIFT) -#define GS_MIA_HALT_REQUESTED (0x02 << GS_MIA_SHIFT) -#define GS_MIA_ISR_ENTRY (0x04 << GS_MIA_SHIFT) -#define GS_AUTH_STATUS_SHIFT 30 -#define GS_AUTH_STATUS_MASK (0x03 << GS_AUTH_STATUS_SHIFT) -#define GS_AUTH_STATUS_BAD (0x01 << GS_AUTH_STATUS_SHIFT) -#define GS_AUTH_STATUS_GOOD (0x02 << GS_AUTH_STATUS_SHIFT) - -#define SOFT_SCRATCH(n) _MMIO(0xc180 + (n) * 4) -#define SOFT_SCRATCH_COUNT 16 - -#define UOS_RSA_SCRATCH(i) _MMIO(0xc200 + (i) * 4) -#define UOS_RSA_SCRATCH_COUNT 64 - -#define DMA_ADDR_0_LOW _MMIO(0xc300) -#define DMA_ADDR_0_HIGH _MMIO(0xc304) -#define DMA_ADDR_1_LOW _MMIO(0xc308) -#define DMA_ADDR_1_HIGH _MMIO(0xc30c) -#define DMA_ADDRESS_SPACE_WOPCM (7 << 16) -#define DMA_ADDRESS_SPACE_GTT (8 << 16) -#define DMA_COPY_SIZE _MMIO(0xc310) -#define DMA_CTRL _MMIO(0xc314) -#define HUC_UKERNEL (1<<9) -#define UOS_MOVE (1<<4) -#define START_DMA (1<<0) -#define DMA_GUC_WOPCM_OFFSET _MMIO(0xc340) -#define GUC_WOPCM_OFFSET_VALID (1<<0) -#define HUC_LOADING_AGENT_VCR (0<<1) -#define HUC_LOADING_AGENT_GUC (1<<1) -#define GUC_WOPCM_OFFSET_SHIFT 14 -#define GUC_WOPCM_OFFSET_MASK (0x3ffff << GUC_WOPCM_OFFSET_SHIFT) -#define GUC_MAX_IDLE_COUNT _MMIO(0xC3E4) - -#define HUC_STATUS2 _MMIO(0xD3B0) -#define HUC_FW_VERIFIED (1<<7) - -#define GEN11_HUC_KERNEL_LOAD_INFO _MMIO(0xC1DC) -#define HUC_LOAD_SUCCESSFUL (1 << 0) - -#define GUC_WOPCM_SIZE _MMIO(0xc050) -#define GUC_WOPCM_SIZE_LOCKED (1<<0) -#define GUC_WOPCM_SIZE_SHIFT 12 -#define GUC_WOPCM_SIZE_MASK (0xfffff << GUC_WOPCM_SIZE_SHIFT) - -#define GEN8_GT_PM_CONFIG _MMIO(0x138140) -#define GEN9LP_GT_PM_CONFIG _MMIO(0x138140) -#define GEN9_GT_PM_CONFIG _MMIO(0x13816c) -#define GT_DOORBELL_ENABLE (1<<0) - -#define GEN8_GTCR _MMIO(0x4274) -#define GEN8_GTCR_INVALIDATE (1<<0) - -#define GEN12_GUC_TLB_INV_CR _MMIO(0xcee8) -#define GEN12_GUC_TLB_INV_CR_INVALIDATE (1 << 0) - -#define GUC_ARAT_C6DIS _MMIO(0xA178) - -#define GUC_SHIM_CONTROL _MMIO(0xc064) -#define GUC_DISABLE_SRAM_INIT_TO_ZEROES (1<<0) -#define GUC_ENABLE_READ_CACHE_LOGIC (1<<1) -#define GUC_ENABLE_MIA_CACHING (1<<2) -#define GUC_GEN10_MSGCH_ENABLE (1<<4) -#define GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA (1<<9) -#define GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA (1<<10) -#define GUC_ENABLE_MIA_CLOCK_GATING (1<<15) -#define GUC_GEN10_SHIM_WC_ENABLE (1<<21) -#define PVC_GUC_MOCS_INDEX_MASK REG_GENMASK(25, 24) -#define PVC_MOCS_UC_INDEX 1 -#define PVC_GUC_MOCS_INDEX(index) REG_FIELD_PREP(PVC_GUC_MOCS_INDEX_MASK,\ - index) - -#define GUC_SEND_INTERRUPT _MMIO(0xc4c8) -#define GUC_SEND_TRIGGER (1<<0) -#define GEN11_GUC_HOST_INTERRUPT _MMIO(0x1901f0) - -#define GUC_NUM_DOORBELLS 256 - -/* format of the HW-monitored doorbell cacheline */ -struct guc_doorbell_info { - u32 db_status; -#define GUC_DOORBELL_DISABLED 0 -#define GUC_DOORBELL_ENABLED 1 - - u32 cookie; - u32 reserved[14]; -} __packed; - -#define GEN8_DRBREGL(x) _MMIO(0x1000 + (x) * 8) -#define GEN8_DRB_VALID (1<<0) -#define GEN8_DRBREGU(x) _MMIO(0x1000 + (x) * 8 + 4) - -#define GEN12_DIST_DBS_POPULATED _MMIO(0xd08) -#define GEN12_DOORBELLS_PER_SQIDI_SHIFT 16 -#define GEN12_DOORBELLS_PER_SQIDI (0xff) -#define GEN12_SQIDIS_DOORBELL_EXIST (0xffff) - -#define DE_GUCRMR _MMIO(0x44054) - -#define GUC_BCS_RCS_IER _MMIO(0xC550) -#define GUC_VCS2_VCS1_IER _MMIO(0xC554) -#define GUC_WD_VECS_IER _MMIO(0xC558) -#define GUC_PM_P24C_IER _MMIO(0xC55C) - -#define VF_SW_FLAG(n) _MMIO(0x190240 + (n) * 4) -#define VF_SW_FLAG_COUNT 4 - -#define MED_VF_SW_FLAG(n) _MMIO(0x190310 + (n) * 4) -#define MED_VF_SW_FLAG_COUNT 4 - -/* GuC Interrupt Vector */ -#define GUC_INTR_GUC2HOST BIT(15) -#define GUC_INTR_EXEC_ERROR BIT(14) -#define GUC_INTR_DISPLAY_EVENT BIT(13) -#define GUC_INTR_SEM_SIG BIT(12) -#define GUC_INTR_IOMMU2GUC BIT(11) -#define GUC_INTR_DOORBELL_RANG BIT(10) -#define GUC_INTR_DMA_DONE BIT(9) -#define GUC_INTR_FATAL_ERROR BIT(8) -#define GUC_INTR_NOTIF_ERROR BIT(7) -#define GUC_INTR_SW_INT_6 BIT(6) -#define GUC_INTR_SW_INT_5 BIT(5) -#define GUC_INTR_SW_INT_4 BIT(4) -#define GUC_INTR_SW_INT_3 BIT(3) -#define GUC_INTR_SW_INT_2 BIT(2) -#define GUC_INTR_SW_INT_1 BIT(1) -#define GUC_INTR_SW_INT_0 BIT(0) - -#endif diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c index a9448c6f6418..a1c3e54faa6e 100644 --- a/drivers/gpu/drm/xe/xe_huc.c +++ b/drivers/gpu/drm/xe/xe_huc.c @@ -5,12 +5,12 @@ #include "xe_huc.h" +#include "regs/xe_guc_regs.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_force_wake.h" #include "xe_gt.h" #include "xe_guc.h" -#include "xe_guc_reg.h" #include "xe_mmio.h" #include "xe_uc_fw.h" diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 2c2080928a82..bb8d98645332 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -8,11 +8,11 @@ #include +#include "regs/xe_guc_regs.h" #include "xe_bo.h" #include "xe_device_types.h" #include "xe_force_wake.h" #include "xe_gt.h" -#include "xe_guc_reg.h" #include "xe_map.h" #include "xe_mmio.h" #include "xe_uc_fw.h" diff --git a/drivers/gpu/drm/xe/xe_wopcm.c b/drivers/gpu/drm/xe/xe_wopcm.c index c8cc3f5e6154..7b5014aea9c8 100644 --- a/drivers/gpu/drm/xe/xe_wopcm.c +++ b/drivers/gpu/drm/xe/xe_wopcm.c @@ -5,10 +5,10 @@ #include "xe_wopcm.h" +#include "regs/xe_guc_regs.h" #include "xe_device.h" #include "xe_force_wake.h" #include "xe_gt.h" -#include "xe_guc_reg.h" #include "xe_mmio.h" #include "xe_uc_fw.h" -- cgit v1.2.3 From 1bd4db39dee51161c48e8669e410fff0a0f69be1 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 27 Apr 2023 13:44:46 +0200 Subject: drm/xe: Remove extra xe_mmio_read32 from xe_mmio_wait32 Commit 7aaec3a623ad ("drm/xe: Let's return last value read on xe_mmio_wait32.") mentions that we should return the last value read, but we never actually return it. This breaks display which depends on the value being actually returned where needed. Signed-off-by: Maarten Lankhorst Cc: Rodrigo Vivi Fixes: 7aaec3a623ad ("drm/xe: Let's return last value read on xe_mmio_wait32.") Reviewed-by: Lucas De Marchi Reviewed-by: Mika Kuoppala Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/257 Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_mmio.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h index be7ba2813d58..b72a0a75259f 100644 --- a/drivers/gpu/drm/xe/xe_mmio.h +++ b/drivers/gpu/drm/xe/xe_mmio.h @@ -93,9 +93,6 @@ static inline int xe_mmio_wait32(struct xe_gt *gt, u32 reg, u32 val, u32 mask, u32 read; for (;;) { - if ((xe_mmio_read32(gt, reg) & mask) == val) - return 0; - read = xe_mmio_read32(gt, reg); if ((read & mask) == val) { ret = 0; -- cgit v1.2.3 From 7b829f6dd638c2cb45c7710bc7cd1d0395ea9bc1 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 27 Apr 2023 15:32:47 -0700 Subject: drm/xe/guc: Convert GuC registers to REG_FIELD/REG_BIT Cleanup GuC register declarations by converting them to use REG_FIELD, REG_BIT and REG_GENMASK. While converting, also reorder the bitfields so they follow the convention of declaring the higher bits first. v2: - Drop unused HUC_LOADING_AGENT_VCR and DMA_ADDRESS_SPACE_GTT (Matt Roper) - Simplify HUC_LOADING_AGENT_GUC define (Matt Roper) Reviewed-by: Matt Atwood Link: https://lore.kernel.org/r/20230427223256.1432787-2-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_guc_regs.h | 165 ++++++++++++++++------------------ drivers/gpu/drm/xe/xe_ggtt.c | 6 +- drivers/gpu/drm/xe/xe_guc.c | 8 +- drivers/gpu/drm/xe/xe_guc_ads.c | 3 +- 4 files changed, 86 insertions(+), 96 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_guc_regs.h b/drivers/gpu/drm/xe/regs/xe_guc_regs.h index 011868ff38aa..1960f9e78ec4 100644 --- a/drivers/gpu/drm/xe/regs/xe_guc_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_guc_regs.h @@ -14,23 +14,18 @@ /* Definitions of GuC H/W registers, bits, etc */ #define GUC_STATUS _MMIO(0xc000) -#define GS_RESET_SHIFT 0 -#define GS_MIA_IN_RESET (0x01 << GS_RESET_SHIFT) -#define GS_BOOTROM_SHIFT 1 -#define GS_BOOTROM_MASK (0x7F << GS_BOOTROM_SHIFT) -#define GS_BOOTROM_RSA_FAILED (0x50 << GS_BOOTROM_SHIFT) -#define GS_BOOTROM_JUMP_PASSED (0x76 << GS_BOOTROM_SHIFT) -#define GS_UKERNEL_SHIFT 8 -#define GS_UKERNEL_MASK (0xFF << GS_UKERNEL_SHIFT) -#define GS_MIA_SHIFT 16 -#define GS_MIA_MASK (0x07 << GS_MIA_SHIFT) -#define GS_MIA_CORE_STATE (0x01 << GS_MIA_SHIFT) -#define GS_MIA_HALT_REQUESTED (0x02 << GS_MIA_SHIFT) -#define GS_MIA_ISR_ENTRY (0x04 << GS_MIA_SHIFT) -#define GS_AUTH_STATUS_SHIFT 30 -#define GS_AUTH_STATUS_MASK (0x03 << GS_AUTH_STATUS_SHIFT) -#define GS_AUTH_STATUS_BAD (0x01 << GS_AUTH_STATUS_SHIFT) -#define GS_AUTH_STATUS_GOOD (0x02 << GS_AUTH_STATUS_SHIFT) +#define GS_AUTH_STATUS_MASK REG_GENMASK(31, 30) +#define GS_AUTH_STATUS_BAD REG_FIELD_PREP(GS_AUTH_STATUS_MASK, 0x1) +#define GS_AUTH_STATUS_GOOD REG_FIELD_PREP(GS_AUTH_STATUS_MASK, 0x2) +#define GS_MIA_MASK REG_GENMASK(18, 16) +#define GS_MIA_CORE_STATE REG_FIELD_PREP(GS_MIA_MASK, 0x1) +#define GS_MIA_HALT_REQUESTED REG_FIELD_PREP(GS_MIA_MASK, 0x2) +#define GS_MIA_ISR_ENTRY REG_FIELD_PREP(GS_MIA_MASK, 0x4) +#define GS_UKERNEL_MASK REG_GENMASK(15, 8) +#define GS_BOOTROM_MASK REG_GENMASK(7, 1) +#define GS_BOOTROM_RSA_FAILED REG_FIELD_PREP(GS_BOOTROM_MASK, 0x50) +#define GS_BOOTROM_JUMP_PASSED REG_FIELD_PREP(GS_BOOTROM_MASK, 0x76) +#define GS_MIA_IN_RESET REG_BIT(0) #define SOFT_SCRATCH(n) _MMIO(0xc180 + (n) * 4) #define SOFT_SCRATCH_COUNT 16 @@ -42,90 +37,86 @@ #define DMA_ADDR_0_HIGH _MMIO(0xc304) #define DMA_ADDR_1_LOW _MMIO(0xc308) #define DMA_ADDR_1_HIGH _MMIO(0xc30c) -#define DMA_ADDRESS_SPACE_WOPCM (7 << 16) -#define DMA_ADDRESS_SPACE_GTT (8 << 16) +#define DMA_ADDR_SPACE_MASK REG_GENMASK(20, 16) +#define DMA_ADDRESS_SPACE_WOPCM REG_FIELD_PREP(DMA_ADDR_SPACE_MASK, 7) #define DMA_COPY_SIZE _MMIO(0xc310) #define DMA_CTRL _MMIO(0xc314) -#define HUC_UKERNEL (1<<9) -#define UOS_MOVE (1<<4) -#define START_DMA (1<<0) +#define HUC_UKERNEL REG_BIT(9) +#define UOS_MOVE REG_BIT(4) +#define START_DMA REG_BIT(0) #define DMA_GUC_WOPCM_OFFSET _MMIO(0xc340) -#define GUC_WOPCM_OFFSET_VALID (1<<0) -#define HUC_LOADING_AGENT_VCR (0<<1) -#define HUC_LOADING_AGENT_GUC (1<<1) #define GUC_WOPCM_OFFSET_SHIFT 14 -#define GUC_WOPCM_OFFSET_MASK (0x3ffff << GUC_WOPCM_OFFSET_SHIFT) -#define GUC_MAX_IDLE_COUNT _MMIO(0xC3E4) +#define GUC_WOPCM_OFFSET_MASK REG_GENMASK(31, GUC_WOPCM_OFFSET_SHIFT) +#define HUC_LOADING_AGENT_GUC REG_BIT(1) +#define GUC_WOPCM_OFFSET_VALID REG_BIT(0) +#define GUC_MAX_IDLE_COUNT _MMIO(0xc3e4) -#define HUC_STATUS2 _MMIO(0xD3B0) -#define HUC_FW_VERIFIED (1<<7) +#define HUC_STATUS2 _MMIO(0xd3b0) +#define HUC_FW_VERIFIED REG_BIT(7) -#define GEN11_HUC_KERNEL_LOAD_INFO _MMIO(0xC1DC) -#define HUC_LOAD_SUCCESSFUL (1 << 0) +#define GEN11_HUC_KERNEL_LOAD_INFO _MMIO(0xc1dc) +#define HUC_LOAD_SUCCESSFUL REG_BIT(0) #define GUC_WOPCM_SIZE _MMIO(0xc050) -#define GUC_WOPCM_SIZE_LOCKED (1<<0) -#define GUC_WOPCM_SIZE_SHIFT 12 -#define GUC_WOPCM_SIZE_MASK (0xfffff << GUC_WOPCM_SIZE_SHIFT) +#define GUC_WOPCM_SIZE_MASK REG_GENMASK(31, 12) +#define GUC_WOPCM_SIZE_LOCKED REG_BIT(0) #define GEN8_GT_PM_CONFIG _MMIO(0x138140) #define GEN9LP_GT_PM_CONFIG _MMIO(0x138140) #define GEN9_GT_PM_CONFIG _MMIO(0x13816c) -#define GT_DOORBELL_ENABLE (1<<0) +#define GT_DOORBELL_ENABLE REG_BIT(0) #define GEN8_GTCR _MMIO(0x4274) -#define GEN8_GTCR_INVALIDATE (1<<0) - -#define GEN12_GUC_TLB_INV_CR _MMIO(0xcee8) -#define GEN12_GUC_TLB_INV_CR_INVALIDATE (1 << 0) - -#define GUC_ARAT_C6DIS _MMIO(0xA178) - -#define GUC_SHIM_CONTROL _MMIO(0xc064) -#define GUC_DISABLE_SRAM_INIT_TO_ZEROES (1<<0) -#define GUC_ENABLE_READ_CACHE_LOGIC (1<<1) -#define GUC_ENABLE_MIA_CACHING (1<<2) -#define GUC_GEN10_MSGCH_ENABLE (1<<4) -#define GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA (1<<9) -#define GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA (1<<10) -#define GUC_ENABLE_MIA_CLOCK_GATING (1<<15) -#define GUC_GEN10_SHIM_WC_ENABLE (1<<21) +#define GEN8_GTCR_INVALIDATE REG_BIT(0) + +#define GEN12_GUC_TLB_INV_CR _MMIO(0xcee8) +#define GEN12_GUC_TLB_INV_CR_INVALIDATE REG_BIT(0) + +#define GUC_ARAT_C6DIS _MMIO(0xa178) + +#define GUC_SHIM_CONTROL _MMIO(0xc064) #define PVC_GUC_MOCS_INDEX_MASK REG_GENMASK(25, 24) -#define PVC_MOCS_UC_INDEX 1 -#define PVC_GUC_MOCS_INDEX(index) REG_FIELD_PREP(PVC_GUC_MOCS_INDEX_MASK,\ +#define PVC_GUC_MOCS_UC_INDEX 1 +#define PVC_GUC_MOCS_INDEX(index) REG_FIELD_PREP(PVC_GUC_MOCS_INDEX_MASK, \ index) +#define GUC_GEN10_SHIM_WC_ENABLE REG_BIT(21) +#define GUC_ENABLE_MIA_CLOCK_GATING REG_BIT(15) +#define GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA REG_BIT(10) +#define GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA REG_BIT(9) +#define GUC_GEN10_MSGCH_ENABLE REG_BIT(4) +#define GUC_ENABLE_MIA_CACHING REG_BIT(2) +#define GUC_ENABLE_READ_CACHE_LOGIC REG_BIT(1) +#define GUC_DISABLE_SRAM_INIT_TO_ZEROES REG_BIT(0) -#define GUC_SEND_INTERRUPT _MMIO(0xc4c8) -#define GUC_SEND_TRIGGER (1<<0) -#define GEN11_GUC_HOST_INTERRUPT _MMIO(0x1901f0) -#define GUC_NUM_DOORBELLS 256 +#define GUC_SEND_INTERRUPT _MMIO(0xc4c8) +#define GUC_SEND_TRIGGER REG_BIT(0) +#define GEN11_GUC_HOST_INTERRUPT _MMIO(0x1901f0) + +#define GUC_NUM_DOORBELLS 256 /* format of the HW-monitored doorbell cacheline */ struct guc_doorbell_info { u32 db_status; -#define GUC_DOORBELL_DISABLED 0 -#define GUC_DOORBELL_ENABLED 1 +#define GUC_DOORBELL_DISABLED 0 +#define GUC_DOORBELL_ENABLED 1 u32 cookie; u32 reserved[14]; } __packed; -#define GEN8_DRBREGL(x) _MMIO(0x1000 + (x) * 8) -#define GEN8_DRB_VALID (1<<0) -#define GEN8_DRBREGU(x) _MMIO(0x1000 + (x) * 8 + 4) +#define GEN8_DRBREGL(x) _MMIO(0x1000 + (x) * 8) +#define GEN8_DRB_VALID REG_BIT(0) +#define GEN8_DRBREGU(x) _MMIO(0x1000 + (x) * 8 + 4) #define GEN12_DIST_DBS_POPULATED _MMIO(0xd08) -#define GEN12_DOORBELLS_PER_SQIDI_SHIFT 16 -#define GEN12_DOORBELLS_PER_SQIDI (0xff) -#define GEN12_SQIDIS_DOORBELL_EXIST (0xffff) - -#define DE_GUCRMR _MMIO(0x44054) +#define GEN12_DOORBELLS_PER_SQIDI_MASK REG_GENMASK(23, 16) +#define GEN12_SQIDIS_DOORBELL_EXIST_MASK REG_GENMASK(15, 0) -#define GUC_BCS_RCS_IER _MMIO(0xC550) -#define GUC_VCS2_VCS1_IER _MMIO(0xC554) -#define GUC_WD_VECS_IER _MMIO(0xC558) -#define GUC_PM_P24C_IER _MMIO(0xC55C) +#define GUC_BCS_RCS_IER _MMIO(0xC550) +#define GUC_VCS2_VCS1_IER _MMIO(0xC554) +#define GUC_WD_VECS_IER _MMIO(0xC558) +#define GUC_PM_P24C_IER _MMIO(0xC55C) #define VF_SW_FLAG(n) _MMIO(0x190240 + (n) * 4) #define VF_SW_FLAG_COUNT 4 @@ -134,21 +125,21 @@ struct guc_doorbell_info { #define MED_VF_SW_FLAG_COUNT 4 /* GuC Interrupt Vector */ -#define GUC_INTR_GUC2HOST BIT(15) -#define GUC_INTR_EXEC_ERROR BIT(14) -#define GUC_INTR_DISPLAY_EVENT BIT(13) -#define GUC_INTR_SEM_SIG BIT(12) -#define GUC_INTR_IOMMU2GUC BIT(11) -#define GUC_INTR_DOORBELL_RANG BIT(10) -#define GUC_INTR_DMA_DONE BIT(9) -#define GUC_INTR_FATAL_ERROR BIT(8) -#define GUC_INTR_NOTIF_ERROR BIT(7) -#define GUC_INTR_SW_INT_6 BIT(6) -#define GUC_INTR_SW_INT_5 BIT(5) -#define GUC_INTR_SW_INT_4 BIT(4) -#define GUC_INTR_SW_INT_3 BIT(3) -#define GUC_INTR_SW_INT_2 BIT(2) -#define GUC_INTR_SW_INT_1 BIT(1) -#define GUC_INTR_SW_INT_0 BIT(0) +#define GUC_INTR_GUC2HOST BIT(15) +#define GUC_INTR_EXEC_ERROR BIT(14) +#define GUC_INTR_DISPLAY_EVENT BIT(13) +#define GUC_INTR_SEM_SIG BIT(12) +#define GUC_INTR_IOMMU2GUC BIT(11) +#define GUC_INTR_DOORBELL_RANG BIT(10) +#define GUC_INTR_DMA_DONE BIT(9) +#define GUC_INTR_FATAL_ERROR BIT(8) +#define GUC_INTR_NOTIF_ERROR BIT(7) +#define GUC_INTR_SW_INT_6 BIT(6) +#define GUC_INTR_SW_INT_5 BIT(5) +#define GUC_INTR_SW_INT_4 BIT(4) +#define GUC_INTR_SW_INT_3 BIT(3) +#define GUC_INTR_SW_INT_2 BIT(2) +#define GUC_INTR_SW_INT_1 BIT(1) +#define GUC_INTR_SW_INT_0 BIT(0) #endif diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index dbc45ef084b4..10a262a0c4cd 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -186,11 +186,11 @@ err: } #define GEN12_GUC_TLB_INV_CR _MMIO(0xcee8) -#define GEN12_GUC_TLB_INV_CR_INVALIDATE (1 << 0) +#define GEN12_GUC_TLB_INV_CR_INVALIDATE REG_BIT(0) #define PVC_GUC_TLB_INV_DESC0 _MMIO(0xcf7c) -#define PVC_GUC_TLB_INV_DESC0_VALID (1 << 0) +#define PVC_GUC_TLB_INV_DESC0_VALID REG_BIT(0) #define PVC_GUC_TLB_INV_DESC1 _MMIO(0xcf80) -#define PVC_GUC_TLB_INV_DESC1_INVALIDATE (1 << 6) +#define PVC_GUC_TLB_INV_DESC1_INVALIDATE REG_BIT(6) void xe_ggtt_invalidate(struct xe_gt *gt) { diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index e00177f4d294..d18f2e25ce56 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -357,7 +357,7 @@ static void guc_prepare_xfer(struct xe_guc *guc) GUC_ENABLE_MIA_CACHING; if (xe->info.platform == XE_PVC) - shim_flags |= PVC_GUC_MOCS_INDEX(PVC_MOCS_UC_INDEX); + shim_flags |= PVC_GUC_MOCS_INDEX(PVC_GUC_MOCS_UC_INDEX); /* Must program this register before loading the ucode with DMA */ xe_mmio_write32(gt, GUC_SHIM_CONTROL.reg, shim_flags); @@ -848,11 +848,11 @@ void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p) drm_printf(p, "\nGuC status 0x%08x:\n", status); drm_printf(p, "\tBootrom status = 0x%x\n", - (status & GS_BOOTROM_MASK) >> GS_BOOTROM_SHIFT); + REG_FIELD_GET(GS_BOOTROM_MASK, status)); drm_printf(p, "\tuKernel status = 0x%x\n", - (status & GS_UKERNEL_MASK) >> GS_UKERNEL_SHIFT); + REG_FIELD_GET(GS_UKERNEL_MASK, status)); drm_printf(p, "\tMIA Core status = 0x%x\n", - (status & GS_MIA_MASK) >> GS_MIA_SHIFT); + REG_FIELD_GET(GS_MIA_MASK, status)); drm_printf(p, "\tLog level = %d\n", xe_guc_log_get_level(&guc->log)); diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index fe1d5be1241e..d4fc2d357a78 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -561,8 +561,7 @@ static void guc_doorbell_init(struct xe_guc_ads *ads) ads_blob_write(ads, system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI], - ((distdbreg >> GEN12_DOORBELLS_PER_SQIDI_SHIFT) - & GEN12_DOORBELLS_PER_SQIDI) + 1); + REG_FIELD_GET(GEN12_DOORBELLS_PER_SQIDI_MASK, distdbreg) + 1); } } -- cgit v1.2.3 From d9b79ad275e7a98c566b3ac4b32950142d6bf9ad Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 27 Apr 2023 15:32:48 -0700 Subject: drm/xe: Drop gen afixes from registers The defines for the registers were brought over from i915 while bootstrapping the driver. As xe supports TGL and later only, it doesn't make sense to keep the GEN* prefixes and suffixes in the registers: TGL is graphics version 12, previously called "GEN12". So drop the prefix everywhere. v2: - Also drop _TGL suffix and reword commit message as suggested by Matt Roper. While at it, rename VSUNIT_CLKGATE_DIS_TGL to VSUNIT_CLKGATE2_DIS with the additional "2", so it doesn't clash with the define for the other register Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230427223256.1432787-3-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_engine_regs.h | 8 +- drivers/gpu/drm/xe/regs/xe_gpu_commands.h | 2 +- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 191 +++++++++++++++--------------- drivers/gpu/drm/xe/regs/xe_guc_regs.h | 32 +++-- drivers/gpu/drm/xe/regs/xe_regs.h | 41 ++++--- drivers/gpu/drm/xe/xe_execlist.c | 8 +- drivers/gpu/drm/xe/xe_force_wake.c | 18 +-- drivers/gpu/drm/xe/xe_ggtt.c | 12 +- drivers/gpu/drm/xe/xe_gt.c | 4 +- drivers/gpu/drm/xe/xe_gt_clock.c | 26 ++-- drivers/gpu/drm/xe/xe_gt_mcr.c | 30 ++--- drivers/gpu/drm/xe/xe_guc.c | 10 +- drivers/gpu/drm/xe/xe_guc_ads.c | 11 +- drivers/gpu/drm/xe/xe_guc_pc.c | 12 +- drivers/gpu/drm/xe/xe_huc.c | 4 +- drivers/gpu/drm/xe/xe_hw_engine.c | 44 +++---- drivers/gpu/drm/xe/xe_mmio.c | 3 +- drivers/gpu/drm/xe/xe_mocs.c | 7 +- drivers/gpu/drm/xe/xe_reg_whitelist.c | 2 +- drivers/gpu/drm/xe/xe_ring_ops.c | 8 +- drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 2 +- drivers/gpu/drm/xe/xe_tuning.c | 2 +- drivers/gpu/drm/xe/xe_wa.c | 86 +++++++------- 23 files changed, 280 insertions(+), 283 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index a1e1d1c206fa..9d61f5941289 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -22,8 +22,8 @@ #define RING_CTL_SIZE(size) ((size) - PAGE_SIZE) /* in bytes -> pages */ #define RING_PSMI_CTL(base) _MMIO((base) + 0x50) -#define GEN8_RC_SEMA_IDLE_MSG_DISABLE REG_BIT(12) -#define GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE REG_BIT(7) +#define RC_SEMA_IDLE_MSG_DISABLE REG_BIT(12) +#define WAIT_FOR_EVENT_POWER_DOWN_DISABLE REG_BIT(7) #define RING_ACTHD_UDW(base) _MMIO((base) + 0x5c) #define RING_DMA_FADD_UDW(base) _MMIO((base) + 0x60) @@ -53,8 +53,8 @@ #define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH REG_BIT(3) #define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT REG_BIT(0) -#define RING_MODE_GEN7(base) _MMIO((base) + 0x29c) -#define GEN11_GFX_DISABLE_LEGACY_MODE (1 << 3) +#define RING_MODE(base) _MMIO((base) + 0x29c) +#define GFX_DISABLE_LEGACY_MODE (1 << 3) #define RING_TIMESTAMP(base) _MMIO((base) + 0x358) diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h index 05531d43514f..0f9c5b0b8a3b 100644 --- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h +++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h @@ -37,7 +37,7 @@ #define MI_FLUSH_DW_OP_STOREDW (1<<14) #define MI_FLUSH_DW_USE_GTT (1<<2) -#define MI_BATCH_BUFFER_START_GEN8 MI_INSTR(0x31, 1) +#define MI_BATCH_BUFFER_START MI_INSTR(0x31, 1) #define XY_CTRL_SURF_COPY_BLT ((2 << 29) | (0x48 << 22) | 3) #define SRC_ACCESS_TYPE_SHIFT 21 diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index fb91f04c3c6c..5a0a08c84f3d 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -10,18 +10,18 @@ /* RPM unit config (Gen8+) */ #define RPM_CONFIG0 _MMIO(0xd00) -#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT 3 -#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK (0x7 << GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT) -#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ 0 -#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ 1 -#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ 2 -#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ 3 -#define GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT 1 -#define GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK (0x3 << GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT) - -#define FORCEWAKE_ACK_MEDIA_VDBOX_GEN11(n) _MMIO(0xd50 + (n) * 4) -#define FORCEWAKE_ACK_MEDIA_VEBOX_GEN11(n) _MMIO(0xd70 + (n) * 4) -#define FORCEWAKE_ACK_RENDER_GEN9 _MMIO(0xd84) +#define RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT 3 +#define RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK (0x7 << RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT) +#define RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ 0 +#define RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ 1 +#define RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ 2 +#define RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ 3 +#define RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT 1 +#define RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK (0x3 << RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT) + +#define FORCEWAKE_ACK_MEDIA_VDBOX(n) _MMIO(0xd50 + (n) * 4) +#define FORCEWAKE_ACK_MEDIA_VEBOX(n) _MMIO(0xd70 + (n) * 4) +#define FORCEWAKE_ACK_RENDER _MMIO(0xd84) #define GMD_ID _MMIO(0xd8c) #define GMD_ID_ARCH_MASK REG_GENMASK(31, 22) @@ -30,49 +30,49 @@ #define FORCEWAKE_ACK_GT_MTL _MMIO(0xdfc) -#define GEN9_LNCFCMOCS(i) _MMIO(0xb020 + (i) * 4) /* L3 Cache Control */ +#define LNCFCMOCS(i) _MMIO(0xb020 + (i) * 4) /* L3 Cache Control */ #define LNCFCMOCS_REG_COUNT 32 #define MCFG_MCR_SELECTOR _MMIO(0xfd0) #define MTL_MCR_SELECTOR _MMIO(0xfd4) #define SF_MCR_SELECTOR _MMIO(0xfd8) -#define GEN8_MCR_SELECTOR _MMIO(0xfdc) +#define MCR_SELECTOR _MMIO(0xfdc) #define GAM_MCR_SELECTOR _MMIO(0xfe0) -#define GEN11_MCR_MULTICAST REG_BIT(31) -#define GEN11_MCR_SLICE(slice) (((slice) & 0xf) << 27) -#define GEN11_MCR_SLICE_MASK GEN11_MCR_SLICE(0xf) -#define GEN11_MCR_SUBSLICE(subslice) (((subslice) & 0x7) << 24) -#define GEN11_MCR_SUBSLICE_MASK GEN11_MCR_SUBSLICE(0x7) +#define MCR_MULTICAST REG_BIT(31) +#define MCR_SLICE(slice) (((slice) & 0xf) << 27) +#define MCR_SLICE_MASK MCR_SLICE(0xf) +#define MCR_SUBSLICE(subslice) (((subslice) & 0x7) << 24) +#define MCR_SUBSLICE_MASK MCR_SUBSLICE(0x7) #define MTL_MCR_GROUPID REG_GENMASK(11, 8) #define MTL_MCR_INSTANCEID REG_GENMASK(3, 0) -#define GEN7_FF_SLICE_CS_CHICKEN1 _MMIO(0x20e0) -#define GEN9_FFSC_PERCTX_PREEMPT_CTRL (1 << 14) +#define FF_SLICE_CS_CHICKEN1 _MMIO(0x20e0) +#define FFSC_PERCTX_PREEMPT_CTRL (1 << 14) #define FF_SLICE_CS_CHICKEN2 _MMIO(0x20e4) #define PERF_FIX_BALANCING_CFE_DISABLE REG_BIT(15) -#define GEN9_CS_DEBUG_MODE1 _MMIO(0x20ec) +#define CS_DEBUG_MODE1 _MMIO(0x20ec) #define FF_DOP_CLOCK_GATE_DISABLE REG_BIT(1) -#define GEN12_REPLAY_MODE_GRANULARITY REG_BIT(0) +#define REPLAY_MODE_GRANULARITY REG_BIT(0) #define PS_INVOCATION_COUNT _MMIO(0x2348) -#define GEN8_CS_CHICKEN1 _MMIO(0x2580) -#define GEN9_PREEMPT_3D_OBJECT_LEVEL (1 << 0) -#define GEN9_PREEMPT_GPGPU_LEVEL(hi, lo) (((hi) << 2) | ((lo) << 1)) -#define GEN9_PREEMPT_GPGPU_MID_THREAD_LEVEL GEN9_PREEMPT_GPGPU_LEVEL(0, 0) -#define GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL GEN9_PREEMPT_GPGPU_LEVEL(0, 1) -#define GEN9_PREEMPT_GPGPU_COMMAND_LEVEL GEN9_PREEMPT_GPGPU_LEVEL(1, 0) -#define GEN9_PREEMPT_GPGPU_LEVEL_MASK GEN9_PREEMPT_GPGPU_LEVEL(1, 1) +#define CS_CHICKEN1 _MMIO(0x2580) +#define PREEMPT_3D_OBJECT_LEVEL (1 << 0) +#define PREEMPT_GPGPU_LEVEL(hi, lo) (((hi) << 2) | ((lo) << 1)) +#define PREEMPT_GPGPU_MID_THREAD_LEVEL PREEMPT_GPGPU_LEVEL(0, 0) +#define PREEMPT_GPGPU_THREAD_GROUP_LEVEL PREEMPT_GPGPU_LEVEL(0, 1) +#define PREEMPT_GPGPU_COMMAND_LEVEL PREEMPT_GPGPU_LEVEL(1, 0) +#define PREEMPT_GPGPU_LEVEL_MASK PREEMPT_GPGPU_LEVEL(1, 1) -#define GEN12_GLOBAL_MOCS(i) _MMIO(0x4000 + (i) * 4) /* Global MOCS regs */ -#define GEN12_CCS_AUX_INV _MMIO(0x4208) +#define GLOBAL_MOCS(i) _MMIO(0x4000 + (i) * 4) /* Global MOCS regs */ +#define CCS_AUX_INV _MMIO(0x4208) -#define GEN12_VD0_AUX_INV _MMIO(0x4218) -#define GEN12_VE0_AUX_INV _MMIO(0x4238) +#define VD0_AUX_INV _MMIO(0x4218) +#define VE0_AUX_INV _MMIO(0x4238) -#define GEN12_VE1_AUX_INV _MMIO(0x42b8) +#define VE1_AUX_INV _MMIO(0x42b8) #define AUX_INV REG_BIT(0) #define XEHP_TILE0_ADDR_RANGE MCR_REG(0x4900) @@ -88,7 +88,7 @@ #define DIS_OVER_FETCH_CACHE REG_BIT(1) #define DIS_MULT_MISS_RD_SQUASH REG_BIT(0) -#define GEN12_FF_MODE2 _MMIO(0x6604) +#define FF_MODE2 _MMIO(0x6604) #define XEHP_FF_MODE2 MCR_REG(0x6604) #define FF_MODE2_GS_TIMER_MASK REG_GENMASK(31, 24) #define FF_MODE2_GS_TIMER_224 REG_FIELD_PREP(FF_MODE2_GS_TIMER_MASK, 224) @@ -101,22 +101,21 @@ #define XEHP_PSS_MODE2 MCR_REG(0x703c) #define SCOREBOARD_STALL_FLUSH_CONTROL REG_BIT(5) -#define HIZ_CHICKEN _MMIO(0x7018) +#define HIZ_CHICKEN _MMIO(0x7018) #define DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE REG_BIT(14) -#define HZ_DEPTH_TEST_LE_GE_OPT_DISABLE REG_BIT(13) +#define HZ_DEPTH_TEST_LE_GE_OPT_DISABLE REG_BIT(13) -/* GEN7 chicken */ -#define GEN7_COMMON_SLICE_CHICKEN1 _MMIO(0x7010) +#define COMMON_SLICE_CHICKEN1 _MMIO(0x7010) #define COMMON_SLICE_CHICKEN4 _MMIO(0x7300) #define DISABLE_TDC_LOAD_BALANCING_CALC REG_BIT(6) -#define GEN11_COMMON_SLICE_CHICKEN3 _MMIO(0x7304) -#define XEHP_COMMON_SLICE_CHICKEN3 MCR_REG(0x7304) +#define COMMON_SLICE_CHICKEN3 _MMIO(0x7304) +#define XEHP_COMMON_SLICE_CHICKEN3 MCR_REG(0x7304) #define DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN REG_BIT(12) -#define XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE REG_BIT(12) -#define GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC REG_BIT(11) -#define GEN12_DISABLE_CPS_AWARE_COLOR_PIPE REG_BIT(9) +#define XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE REG_BIT(12) +#define BLEND_EMB_FIX_DISABLE_IN_RCC REG_BIT(11) +#define DISABLE_CPS_AWARE_COLOR_PIPE REG_BIT(9) #define XEHP_SLICE_COMMON_ECO_CHICKEN1 MCR_REG(0x731c) #define MSC_MSAA_REODER_BUF_BYPASS_DISABLE REG_BIT(14) @@ -130,21 +129,21 @@ #define XEHP_SQCM MCR_REG(0x8724) #define EN_32B_ACCESS REG_BIT(30) -#define GEN10_MIRROR_FUSE3 _MMIO(0x9118) -#define GEN10_L3BANK_PAIR_COUNT 4 -#define GEN10_L3BANK_MASK 0x0F +#define MIRROR_FUSE3 _MMIO(0x9118) +#define L3BANK_PAIR_COUNT 4 +#define L3BANK_MASK 0x0F /* on Xe_HP the same fuses indicates mslices instead of L3 banks */ -#define GEN12_MAX_MSLICES 4 -#define GEN12_MEML3_EN_MASK 0x0F +#define MAX_MSLICES 4 +#define MEML3_EN_MASK 0x0F /* Fuse readout registers for GT */ #define XEHP_FUSE4 _MMIO(0x9114) #define GT_L3_EXC_MASK REG_GENMASK(6, 4) -#define GEN11_GT_VEBOX_VDBOX_DISABLE _MMIO(0x9140) -#define GEN11_GT_VDBOX_DISABLE_MASK 0xff -#define GEN11_GT_VEBOX_DISABLE_SHIFT 16 -#define GEN11_GT_VEBOX_DISABLE_MASK (0x0f << GEN11_GT_VEBOX_DISABLE_SHIFT) +#define GT_VEBOX_VDBOX_DISABLE _MMIO(0x9140) +#define GT_VDBOX_DISABLE_MASK 0xff +#define GT_VEBOX_DISABLE_SHIFT 16 +#define GT_VEBOX_DISABLE_MASK (0x0f << GT_VEBOX_DISABLE_SHIFT) #define XELP_EU_ENABLE _MMIO(0x9134) /* "_DISABLE" on Xe_LP */ #define XELP_EU_MASK REG_GENMASK(7, 0) @@ -152,14 +151,13 @@ #define XEHP_GT_COMPUTE_DSS_ENABLE _MMIO(0x9144) #define XEHPC_GT_COMPUTE_DSS_ENABLE_EXT _MMIO(0x9148) -#define GEN6_GDRST _MMIO(0x941c) -#define GEN11_GRDOM_GUC REG_BIT(3) -#define GEN6_GRDOM_FULL (1 << 0) -#define GEN11_GRDOM_FULL GEN6_GRDOM_FULL +#define GDRST _MMIO(0x941c) +#define GRDOM_GUC REG_BIT(3) +#define GRDOM_FULL REG_BIT(0) -#define GEN7_MISCCPCTL _MMIO(0x9424) -#define GEN7_DOP_CLOCK_GATE_ENABLE (1 << 0) -#define GEN12_DOP_CLOCK_GATE_RENDER_ENABLE REG_BIT(1) +#define MISCCPCTL _MMIO(0x9424) +#define DOP_CLOCK_GATE_RENDER_ENABLE REG_BIT(1) +#define DOP_CLOCK_GATE_ENABLE REG_BIT((0) #define UNSLCGCTL9430 _MMIO(0x9430) #define MSQDUNIT_CLKGATE_DIS REG_BIT(3) @@ -213,10 +211,9 @@ #define L3_CR2X_CLKGATE_DIS REG_BIT(17) #define UNSLICE_UNIT_LEVEL_CLKGATE2 _MMIO(0x94e4) -#define VSUNIT_CLKGATE_DIS_TGL REG_BIT(19) -#define PSDUNIT_CLKGATE_DIS REG_BIT(5) +#define VSUNIT_CLKGATE2_DIS REG_BIT(19) -#define GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE MCR_REG(0x9524) +#define SUBSLICE_UNIT_LEVEL_CLKGATE MCR_REG(0x9524) #define DSS_ROUTER_CLKGATE_DIS REG_BIT(28) #define GWUNIT_CLKGATE_DIS REG_BIT(16) @@ -226,21 +223,21 @@ #define SSMCGCTL9530 MCR_REG(0x9530) #define RTFUNIT_CLKGATE_DIS REG_BIT(18) -#define GEN10_DFR_RATIO_EN_AND_CHICKEN MCR_REG(0x9550) +#define DFR_RATIO_EN_AND_CHICKEN MCR_REG(0x9550) #define DFR_DISABLE (1 << 9) -#define GEN6_RPNSWREQ _MMIO(0xa008) +#define RPNSWREQ _MMIO(0xa008) #define REQ_RATIO_MASK REG_GENMASK(31, 23) -#define GEN6_RC_CONTROL _MMIO(0xa090) -#define GEN6_RC_STATE _MMIO(0xa094) +#define RC_CONTROL _MMIO(0xa090) +#define RC_STATE _MMIO(0xa094) -#define GEN6_PMINTRMSK _MMIO(0xa168) -#define GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC (1 << 31) +#define PMINTRMSK _MMIO(0xa168) +#define PMINTR_DISABLE_REDIRECT_TO_GUC (1 << 31) #define ARAT_EXPIRED_INTRMSK (1 << 9) -#define FORCEWAKE_GT_GEN9 _MMIO(0xa188) +#define FORCEWAKE_GT _MMIO(0xa188) -#define GEN9_PG_ENABLE _MMIO(0xa210) +#define PG_ENABLE _MMIO(0xa210) /* GPM unit config (Gen9+) */ #define CTC_MODE _MMIO(0xa26c) @@ -250,9 +247,9 @@ #define CTC_SHIFT_PARAMETER_SHIFT 1 #define CTC_SHIFT_PARAMETER_MASK (0x3 << CTC_SHIFT_PARAMETER_SHIFT) -#define FORCEWAKE_RENDER_GEN9 _MMIO(0xa278) -#define FORCEWAKE_MEDIA_VDBOX_GEN11(n) _MMIO(0xa540 + (n) * 4) -#define FORCEWAKE_MEDIA_VEBOX_GEN11(n) _MMIO(0xa560 + (n) * 4) +#define FORCEWAKE_RENDER _MMIO(0xa278) +#define FORCEWAKE_MEDIA_VDBOX(n) _MMIO(0xa540 + (n) * 4) +#define FORCEWAKE_MEDIA_VEBOX(n) _MMIO(0xa560 + (n) * 4) #define XEHPC_LNCFMISCCFGREG0 MCR_REG(0xb01c) #define XEHPC_OVRLSCCC REG_BIT(0) @@ -282,12 +279,12 @@ #define INVALIDATION_BROADCAST_MODE_DIS REG_BIT(12) #define GLOBAL_INVALIDATION_MODE REG_BIT(2) -#define GEN10_SAMPLER_MODE MCR_REG(0xe18c) +#define SAMPLER_MODE MCR_REG(0xe18c) #define ENABLE_SMALLPL REG_BIT(15) #define SC_DISABLE_POWER_OPTIMIZATION_EBB REG_BIT(9) -#define GEN11_SAMPLER_ENABLE_HEADLESS_MSG REG_BIT(5) +#define SAMPLER_ENABLE_HEADLESS_MSG REG_BIT(5) -#define GEN9_HALF_SLICE_CHICKEN7 MCR_REG(0xe194) +#define HALF_SLICE_CHICKEN7 MCR_REG(0xe194) #define DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA REG_BIT(15) #define CACHE_MODE_SS MCR_REG(0xe420) @@ -295,27 +292,27 @@ #define DISABLE_ECC REG_BIT(5) #define ENABLE_PREFETCH_INTO_IC REG_BIT(3) -#define GEN9_ROW_CHICKEN4 MCR_REG(0xe48c) -#define GEN12_DISABLE_GRF_CLEAR REG_BIT(13) +#define ROW_CHICKEN4 MCR_REG(0xe48c) +#define DISABLE_GRF_CLEAR REG_BIT(13) #define XEHP_DIS_BBL_SYSPIPE REG_BIT(11) -#define GEN12_DISABLE_TDL_PUSH REG_BIT(9) -#define GEN11_DIS_PICK_2ND_EU REG_BIT(7) -#define GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX REG_BIT(4) +#define DISABLE_TDL_PUSH REG_BIT(9) +#define DIS_PICK_2ND_EU REG_BIT(7) +#define DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX REG_BIT(4) #define THREAD_EX_ARB_MODE REG_GENMASK(3, 2) #define THREAD_EX_ARB_MODE_RR_AFTER_DEP REG_FIELD_PREP(THREAD_EX_ARB_MODE, 0x2) -#define GEN8_ROW_CHICKEN MCR_REG(0xe4f0) +#define ROW_CHICKEN MCR_REG(0xe4f0) #define UGM_BACKUP_MODE REG_BIT(13) #define MDQ_ARBITRATION_MODE REG_BIT(12) -#define GEN8_ROW_CHICKEN2 MCR_REG(0xe4f4) -#define GEN12_DISABLE_READ_SUPPRESSION REG_BIT(15) -#define GEN12_DISABLE_EARLY_READ REG_BIT(14) -#define GEN12_ENABLE_LARGE_GRF_MODE REG_BIT(12) -#define GEN12_PUSH_CONST_DEREF_HOLD_DIS REG_BIT(8) -#define GEN12_DISABLE_DOP_GATING REG_BIT(0) +#define ROW_CHICKEN2 MCR_REG(0xe4f4) +#define DISABLE_READ_SUPPRESSION REG_BIT(15) +#define DISABLE_EARLY_READ REG_BIT(14) +#define ENABLE_LARGE_GRF_MODE REG_BIT(12) +#define PUSH_CONST_DEREF_HOLD_DIS REG_BIT(8) +#define DISABLE_DOP_GATING REG_BIT(0) -#define XEHP_HDC_CHICKEN0 MCR_REG(0xe5f0) +#define XEHP_HDC_CHICKEN0 MCR_REG(0xe5f0) #define LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK REG_GENMASK(13, 11) #define RT_CTRL MCR_REG(0xe530) @@ -335,21 +332,21 @@ #define SARB_CHICKEN1 MCR_REG(0xe90c) #define COMP_CKN_IN REG_GENMASK(30, 29) -#define GEN12_RCU_MODE _MMIO(0x14800) -#define GEN12_RCU_MODE_CCS_ENABLE REG_BIT(0) +#define RCU_MODE _MMIO(0x14800) +#define RCU_MODE_CCS_ENABLE REG_BIT(0) -#define FORCEWAKE_ACK_GT_GEN9 _MMIO(0x130044) +#define FORCEWAKE_ACK_GT _MMIO(0x130044) #define FORCEWAKE_KERNEL BIT(0) #define FORCEWAKE_USER BIT(1) #define FORCEWAKE_KERNEL_FALLBACK BIT(15) -#define GEN6_GT_CORE_STATUS _MMIO(0x138060) +#define GT_CORE_STATUS _MMIO(0x138060) #define RCN_MASK REG_GENMASK(2, 0) #define GT_RC0 0 #define GT_RC6 3 -#define GEN6_GT_GFX_RC6_LOCKED _MMIO(0x138104) -#define GEN6_GT_GFX_RC6 _MMIO(0x138108) +#define GT_GFX_RC6_LOCKED _MMIO(0x138104) +#define GT_GFX_RC6 _MMIO(0x138108) #define GT_INTR_DW(x) _MMIO(0x190018 + ((x) * 4)) diff --git a/drivers/gpu/drm/xe/regs/xe_guc_regs.h b/drivers/gpu/drm/xe/regs/xe_guc_regs.h index 1960f9e78ec4..bc9b42b38795 100644 --- a/drivers/gpu/drm/xe/regs/xe_guc_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_guc_regs.h @@ -54,23 +54,21 @@ #define HUC_STATUS2 _MMIO(0xd3b0) #define HUC_FW_VERIFIED REG_BIT(7) -#define GEN11_HUC_KERNEL_LOAD_INFO _MMIO(0xc1dc) +#define HUC_KERNEL_LOAD_INFO _MMIO(0xc1dc) #define HUC_LOAD_SUCCESSFUL REG_BIT(0) #define GUC_WOPCM_SIZE _MMIO(0xc050) #define GUC_WOPCM_SIZE_MASK REG_GENMASK(31, 12) #define GUC_WOPCM_SIZE_LOCKED REG_BIT(0) -#define GEN8_GT_PM_CONFIG _MMIO(0x138140) -#define GEN9LP_GT_PM_CONFIG _MMIO(0x138140) -#define GEN9_GT_PM_CONFIG _MMIO(0x13816c) +#define GT_PM_CONFIG _MMIO(0x13816c) #define GT_DOORBELL_ENABLE REG_BIT(0) -#define GEN8_GTCR _MMIO(0x4274) -#define GEN8_GTCR_INVALIDATE REG_BIT(0) +#define GTCR _MMIO(0x4274) +#define GTCR_INVALIDATE REG_BIT(0) -#define GEN12_GUC_TLB_INV_CR _MMIO(0xcee8) -#define GEN12_GUC_TLB_INV_CR_INVALIDATE REG_BIT(0) +#define GUC_TLB_INV_CR _MMIO(0xcee8) +#define GUC_TLB_INV_CR_INVALIDATE REG_BIT(0) #define GUC_ARAT_C6DIS _MMIO(0xa178) @@ -79,11 +77,11 @@ #define PVC_GUC_MOCS_UC_INDEX 1 #define PVC_GUC_MOCS_INDEX(index) REG_FIELD_PREP(PVC_GUC_MOCS_INDEX_MASK, \ index) -#define GUC_GEN10_SHIM_WC_ENABLE REG_BIT(21) +#define GUC_SHIM_WC_ENABLE REG_BIT(21) #define GUC_ENABLE_MIA_CLOCK_GATING REG_BIT(15) #define GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA REG_BIT(10) #define GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA REG_BIT(9) -#define GUC_GEN10_MSGCH_ENABLE REG_BIT(4) +#define GUC_MSGCH_ENABLE REG_BIT(4) #define GUC_ENABLE_MIA_CACHING REG_BIT(2) #define GUC_ENABLE_READ_CACHE_LOGIC REG_BIT(1) #define GUC_DISABLE_SRAM_INIT_TO_ZEROES REG_BIT(0) @@ -91,7 +89,7 @@ #define GUC_SEND_INTERRUPT _MMIO(0xc4c8) #define GUC_SEND_TRIGGER REG_BIT(0) -#define GEN11_GUC_HOST_INTERRUPT _MMIO(0x1901f0) +#define GUC_HOST_INTERRUPT _MMIO(0x1901f0) #define GUC_NUM_DOORBELLS 256 @@ -105,13 +103,13 @@ struct guc_doorbell_info { u32 reserved[14]; } __packed; -#define GEN8_DRBREGL(x) _MMIO(0x1000 + (x) * 8) -#define GEN8_DRB_VALID REG_BIT(0) -#define GEN8_DRBREGU(x) _MMIO(0x1000 + (x) * 8 + 4) +#define DRBREGL(x) _MMIO(0x1000 + (x) * 8) +#define DRB_VALID REG_BIT(0) +#define DRBREGU(x) _MMIO(0x1000 + (x) * 8 + 4) -#define GEN12_DIST_DBS_POPULATED _MMIO(0xd08) -#define GEN12_DOORBELLS_PER_SQIDI_MASK REG_GENMASK(23, 16) -#define GEN12_SQIDIS_DOORBELL_EXIST_MASK REG_GENMASK(15, 0) +#define DIST_DBS_POPULATED _MMIO(0xd08) +#define DOORBELLS_PER_SQIDI_MASK REG_GENMASK(23, 16) +#define SQIDIS_DOORBELL_EXIST_MASK REG_GENMASK(15, 0) #define GUC_BCS_RCS_IER _MMIO(0xC550) #define GUC_VCS2_VCS1_IER _MMIO(0xC554) diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h index c2a278b25fc9..50fc3c469086 100644 --- a/drivers/gpu/drm/xe/regs/xe_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_regs.h @@ -11,23 +11,22 @@ #define LMEM_INIT REG_BIT(7) #define RENDER_RING_BASE 0x02000 -#define GEN11_BSD_RING_BASE 0x1c0000 -#define GEN11_BSD2_RING_BASE 0x1c4000 -#define GEN11_BSD3_RING_BASE 0x1d0000 -#define GEN11_BSD4_RING_BASE 0x1d4000 +#define BSD_RING_BASE 0x1c0000 +#define BSD2_RING_BASE 0x1c4000 +#define BSD3_RING_BASE 0x1d0000 +#define BSD4_RING_BASE 0x1d4000 #define XEHP_BSD5_RING_BASE 0x1e0000 #define XEHP_BSD6_RING_BASE 0x1e4000 #define XEHP_BSD7_RING_BASE 0x1f0000 #define XEHP_BSD8_RING_BASE 0x1f4000 -#define VEBOX_RING_BASE 0x1a000 -#define GEN11_VEBOX_RING_BASE 0x1c8000 -#define GEN11_VEBOX2_RING_BASE 0x1d8000 +#define VEBOX_RING_BASE 0x1c8000 +#define VEBOX2_RING_BASE 0x1d8000 #define XEHP_VEBOX3_RING_BASE 0x1e8000 #define XEHP_VEBOX4_RING_BASE 0x1f8000 -#define GEN12_COMPUTE0_RING_BASE 0x1a000 -#define GEN12_COMPUTE1_RING_BASE 0x1c000 -#define GEN12_COMPUTE2_RING_BASE 0x1e000 -#define GEN12_COMPUTE3_RING_BASE 0x26000 +#define COMPUTE0_RING_BASE 0x1a000 +#define COMPUTE1_RING_BASE 0x1c000 +#define COMPUTE2_RING_BASE 0x1e000 +#define COMPUTE3_RING_BASE 0x26000 #define BLT_RING_BASE 0x22000 #define XEHPC_BCS1_RING_BASE 0x3e0000 #define XEHPC_BCS2_RING_BASE 0x3e2000 @@ -43,8 +42,8 @@ #define GT_CS_MASTER_ERROR_INTERRUPT REG_BIT(3) #define GT_RENDER_USER_INTERRUPT (1 << 0) -#define GEN7_FF_THREAD_MODE _MMIO(0x20a0) -#define GEN12_FF_TESSELATION_DOP_GATE_DISABLE BIT(19) +#define FF_THREAD_MODE _MMIO(0x20a0) +#define FF_TESSELATION_DOP_GATE_DISABLE BIT(19) #define PVC_RP_STATE_CAP _MMIO(0x281014) #define MTL_RP_STATE_CAP _MMIO(0x138000) @@ -86,18 +85,18 @@ #define DG1_MSTR_IRQ REG_BIT(31) #define DG1_MSTR_TILE(t) REG_BIT(t) -#define GEN9_TIMESTAMP_OVERRIDE _MMIO(0x44074) -#define GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_SHIFT 0 -#define GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK 0x3ff -#define GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT 12 -#define GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK (0xf << 12) +#define TIMESTAMP_OVERRIDE _MMIO(0x44074) +#define TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_SHIFT 0 +#define TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK 0x3ff +#define TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT 12 +#define TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK (0xf << 12) #define GGC _MMIO(0x108040) #define GMS_MASK REG_GENMASK(15, 8) #define GGMS_MASK REG_GENMASK(7, 6) -#define GEN12_GSMBASE _MMIO(0x108100) -#define GEN12_DSMBASE _MMIO(0x1080C0) -#define GEN12_BDSM_MASK REG_GENMASK64(63, 20) +#define GSMBASE _MMIO(0x108100) +#define DSMBASE _MMIO(0x1080C0) +#define BDSM_MASK REG_GENMASK64(63, 20) #endif diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index 64b520ddca9c..d524ac5c7b57 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -60,8 +60,8 @@ static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc, } if (hwe->class == XE_ENGINE_CLASS_COMPUTE) - xe_mmio_write32(hwe->gt, GEN12_RCU_MODE.reg, - _MASKED_BIT_ENABLE(GEN12_RCU_MODE_CCS_ENABLE)); + xe_mmio_write32(hwe->gt, RCU_MODE.reg, + _MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE)); xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); lrc->ring.old_tail = lrc->ring.tail; @@ -81,8 +81,8 @@ static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc, xe_mmio_write32(gt, RING_HWS_PGA(hwe->mmio_base).reg, xe_bo_ggtt_addr(hwe->hwsp)); xe_mmio_read32(gt, RING_HWS_PGA(hwe->mmio_base).reg); - xe_mmio_write32(gt, RING_MODE_GEN7(hwe->mmio_base).reg, - _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE)); + xe_mmio_write32(gt, RING_MODE(hwe->mmio_base).reg, + _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE)); xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS_LO(hwe->mmio_base).reg, lower_32_bits(lrc_desc)); diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c index 77a210acfac3..53d73f36a121 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.c +++ b/drivers/gpu/drm/xe/xe_force_wake.c @@ -49,14 +49,14 @@ void xe_force_wake_init_gt(struct xe_gt *gt, struct xe_force_wake *fw) if (xe->info.graphics_verx100 >= 1270) { domain_init(&fw->domains[XE_FW_DOMAIN_ID_GT], XE_FW_DOMAIN_ID_GT, - FORCEWAKE_GT_GEN9.reg, + FORCEWAKE_GT.reg, FORCEWAKE_ACK_GT_MTL.reg, BIT(0), BIT(16)); } else { domain_init(&fw->domains[XE_FW_DOMAIN_ID_GT], XE_FW_DOMAIN_ID_GT, - FORCEWAKE_GT_GEN9.reg, - FORCEWAKE_ACK_GT_GEN9.reg, + FORCEWAKE_GT.reg, + FORCEWAKE_ACK_GT.reg, BIT(0), BIT(16)); } } @@ -71,8 +71,8 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw) if (!xe_gt_is_media_type(gt)) domain_init(&fw->domains[XE_FW_DOMAIN_ID_RENDER], XE_FW_DOMAIN_ID_RENDER, - FORCEWAKE_RENDER_GEN9.reg, - FORCEWAKE_ACK_RENDER_GEN9.reg, + FORCEWAKE_RENDER.reg, + FORCEWAKE_ACK_RENDER.reg, BIT(0), BIT(16)); for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) { @@ -81,8 +81,8 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw) domain_init(&fw->domains[XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j], XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j, - FORCEWAKE_MEDIA_VDBOX_GEN11(j).reg, - FORCEWAKE_ACK_MEDIA_VDBOX_GEN11(j).reg, + FORCEWAKE_MEDIA_VDBOX(j).reg, + FORCEWAKE_ACK_MEDIA_VDBOX(j).reg, BIT(0), BIT(16)); } @@ -92,8 +92,8 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw) domain_init(&fw->domains[XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j], XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j, - FORCEWAKE_MEDIA_VEBOX_GEN11(j).reg, - FORCEWAKE_ACK_MEDIA_VEBOX_GEN11(j).reg, + FORCEWAKE_MEDIA_VEBOX(j).reg, + FORCEWAKE_ACK_MEDIA_VEBOX(j).reg, BIT(0), BIT(16)); } } diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 10a262a0c4cd..fc580d961dbb 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -185,12 +185,12 @@ err: return err; } -#define GEN12_GUC_TLB_INV_CR _MMIO(0xcee8) -#define GEN12_GUC_TLB_INV_CR_INVALIDATE REG_BIT(0) +#define GUC_TLB_INV_CR _MMIO(0xcee8) +#define GUC_TLB_INV_CR_INVALIDATE REG_BIT(0) #define PVC_GUC_TLB_INV_DESC0 _MMIO(0xcf7c) -#define PVC_GUC_TLB_INV_DESC0_VALID REG_BIT(0) +#define PVC_GUC_TLB_INV_DESC0_VALID REG_BIT(0) #define PVC_GUC_TLB_INV_DESC1 _MMIO(0xcf80) -#define PVC_GUC_TLB_INV_DESC1_INVALIDATE REG_BIT(6) +#define PVC_GUC_TLB_INV_DESC1_INVALIDATE REG_BIT(6) void xe_ggtt_invalidate(struct xe_gt *gt) { @@ -212,8 +212,8 @@ void xe_ggtt_invalidate(struct xe_gt *gt) xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC0.reg, PVC_GUC_TLB_INV_DESC0_VALID); } else - xe_mmio_write32(gt, GEN12_GUC_TLB_INV_CR.reg, - GEN12_GUC_TLB_INV_CR_INVALIDATE); + xe_mmio_write32(gt, GUC_TLB_INV_CR.reg, + GUC_TLB_INV_CR_INVALIDATE); } } diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 4186f7f0d42f..0d4664e344da 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -541,8 +541,8 @@ static int do_gt_reset(struct xe_gt *gt) struct xe_device *xe = gt_to_xe(gt); int err; - xe_mmio_write32(gt, GEN6_GDRST.reg, GEN11_GRDOM_FULL); - err = xe_mmio_wait32(gt, GEN6_GDRST.reg, 0, GEN11_GRDOM_FULL, 5000, + xe_mmio_write32(gt, GDRST.reg, GRDOM_FULL); + err = xe_mmio_wait32(gt, GDRST.reg, 0, GRDOM_FULL, 5000, NULL, false); if (err) drm_err(&xe->drm, diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c index 60a2966bc1fd..1b7d00284535 100644 --- a/drivers/gpu/drm/xe/xe_gt_clock.c +++ b/drivers/gpu/drm/xe/xe_gt_clock.c @@ -14,16 +14,16 @@ static u32 read_reference_ts_freq(struct xe_gt *gt) { - u32 ts_override = xe_mmio_read32(gt, GEN9_TIMESTAMP_OVERRIDE.reg); + u32 ts_override = xe_mmio_read32(gt, TIMESTAMP_OVERRIDE.reg); u32 base_freq, frac_freq; - base_freq = ((ts_override & GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK) >> - GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_SHIFT) + 1; + base_freq = ((ts_override & TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK) >> + TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_SHIFT) + 1; base_freq *= 1000000; frac_freq = ((ts_override & - GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK) >> - GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT); + TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK) >> + TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT); frac_freq = 1000000 / (frac_freq + 1); return base_freq + frac_freq; @@ -36,17 +36,17 @@ static u32 get_crystal_clock_freq(u32 rpm_config_reg) const u32 f25_mhz = 25000000; const u32 f38_4_mhz = 38400000; u32 crystal_clock = - (rpm_config_reg & GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >> - GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT; + (rpm_config_reg & RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >> + RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT; switch (crystal_clock) { - case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ: + case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ : return f24_mhz; - case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ: + case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ : return f19_2_mhz; - case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ: + case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ : return f38_4_mhz; - case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ: + case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ : return f25_mhz; default: XE_BUG_ON("NOT_POSSIBLE"); @@ -74,8 +74,8 @@ int xe_gt_clock_init(struct xe_gt *gt) * register increments from this frequency (it might * increment only every few clock cycle). */ - freq >>= 3 - ((c0 & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >> - GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT); + freq >>= 3 - ((c0 & RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >> + RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT); } gt->info.clock_freq = freq; diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index 5412f77bc26f..aa04ba5a6dbe 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -177,8 +177,8 @@ static const struct xe_mmio_range dg2_implicit_steering_table[] = { static void init_steering_l3bank(struct xe_gt *gt) { if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) { - u32 mslice_mask = REG_FIELD_GET(GEN12_MEML3_EN_MASK, - xe_mmio_read32(gt, GEN10_MIRROR_FUSE3.reg)); + u32 mslice_mask = REG_FIELD_GET(MEML3_EN_MASK, + xe_mmio_read32(gt, MIRROR_FUSE3.reg)); u32 bank_mask = REG_FIELD_GET(GT_L3_EXC_MASK, xe_mmio_read32(gt, XEHP_FUSE4.reg)); @@ -190,8 +190,8 @@ static void init_steering_l3bank(struct xe_gt *gt) gt->steering[L3BANK].instance_target = bank_mask & BIT(0) ? 0 : 2; } else if (gt_to_xe(gt)->info.platform == XE_DG2) { - u32 mslice_mask = REG_FIELD_GET(GEN12_MEML3_EN_MASK, - xe_mmio_read32(gt, GEN10_MIRROR_FUSE3.reg)); + u32 mslice_mask = REG_FIELD_GET(MEML3_EN_MASK, + xe_mmio_read32(gt, MIRROR_FUSE3.reg)); u32 bank = __ffs(mslice_mask) * 8; /* @@ -202,8 +202,8 @@ static void init_steering_l3bank(struct xe_gt *gt) gt->steering[L3BANK].group_target = (bank >> 2) & 0x7; gt->steering[L3BANK].instance_target = bank & 0x3; } else { - u32 fuse = REG_FIELD_GET(GEN10_L3BANK_MASK, - ~xe_mmio_read32(gt, GEN10_MIRROR_FUSE3.reg)); + u32 fuse = REG_FIELD_GET(L3BANK_MASK, + ~xe_mmio_read32(gt, MIRROR_FUSE3.reg)); gt->steering[L3BANK].group_target = 0; /* unused */ gt->steering[L3BANK].instance_target = __ffs(fuse); @@ -212,8 +212,8 @@ static void init_steering_l3bank(struct xe_gt *gt) static void init_steering_mslice(struct xe_gt *gt) { - u32 mask = REG_FIELD_GET(GEN12_MEML3_EN_MASK, - xe_mmio_read32(gt, GEN10_MIRROR_FUSE3.reg)); + u32 mask = REG_FIELD_GET(MEML3_EN_MASK, + xe_mmio_read32(gt, MIRROR_FUSE3.reg)); /* * mslice registers are valid (not terminated) if either the meml3 @@ -329,8 +329,8 @@ void xe_gt_mcr_set_implicit_defaults(struct xe_gt *gt) struct xe_device *xe = gt_to_xe(gt); if (xe->info.platform == XE_DG2) { - u32 steer_val = REG_FIELD_PREP(GEN11_MCR_SLICE_MASK, 0) | - REG_FIELD_PREP(GEN11_MCR_SUBSLICE_MASK, 2); + u32 steer_val = REG_FIELD_PREP(MCR_SLICE_MASK, 0) | + REG_FIELD_PREP(MCR_SUBSLICE_MASK, 2); xe_mmio_write32(gt, MCFG_MCR_SELECTOR.reg, steer_val); xe_mmio_write32(gt, SF_MCR_SELECTOR.reg, steer_val); @@ -448,9 +448,9 @@ static u32 rw_with_mcr_steering(struct xe_gt *gt, i915_mcr_reg_t reg, u8 rw_flag steer_val = REG_FIELD_PREP(MTL_MCR_GROUPID, group) | REG_FIELD_PREP(MTL_MCR_INSTANCEID, instance); } else { - steer_reg = GEN8_MCR_SELECTOR.reg; - steer_val = REG_FIELD_PREP(GEN11_MCR_SLICE_MASK, group) | - REG_FIELD_PREP(GEN11_MCR_SUBSLICE_MASK, instance); + steer_reg = MCR_SELECTOR.reg; + steer_val = REG_FIELD_PREP(MCR_SLICE_MASK, group) | + REG_FIELD_PREP(MCR_SUBSLICE_MASK, instance); } /* @@ -461,7 +461,7 @@ static u32 rw_with_mcr_steering(struct xe_gt *gt, i915_mcr_reg_t reg, u8 rw_flag * No need to save old steering reg value. */ if (rw_flag == MCR_OP_READ) - steer_val |= GEN11_MCR_MULTICAST; + steer_val |= MCR_MULTICAST; xe_mmio_write32(gt, steer_reg, steer_val); @@ -477,7 +477,7 @@ static u32 rw_with_mcr_steering(struct xe_gt *gt, i915_mcr_reg_t reg, u8 rw_flag * operation. */ if (rw_flag == MCR_OP_WRITE) - xe_mmio_write32(gt, steer_reg, GEN11_MCR_MULTICAST); + xe_mmio_write32(gt, steer_reg, MCR_MULTICAST); return val; } diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index d18f2e25ce56..4e9e9b1aad02 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -278,7 +278,7 @@ int xe_guc_init(struct xe_guc *guc) if (xe_gt_is_media_type(gt)) guc->notify_reg = MEDIA_GUC_HOST_INTERRUPT.reg; else - guc->notify_reg = GEN11_GUC_HOST_INTERRUPT.reg; + guc->notify_reg = GUC_HOST_INTERRUPT.reg; xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE); @@ -317,9 +317,9 @@ int xe_guc_reset(struct xe_guc *guc) xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); - xe_mmio_write32(gt, GEN6_GDRST.reg, GEN11_GRDOM_GUC); + xe_mmio_write32(gt, GDRST.reg, GRDOM_GUC); - ret = xe_mmio_wait32(gt, GEN6_GDRST.reg, 0, GEN11_GRDOM_GUC, 5000, + ret = xe_mmio_wait32(gt, GDRST.reg, 0, GRDOM_GUC, 5000, &gdrst, false); if (ret) { drm_err(&xe->drm, "GuC reset timed out, GEN6_GDRST=0x%8x\n", @@ -362,7 +362,7 @@ static void guc_prepare_xfer(struct xe_guc *guc) /* Must program this register before loading the ucode with DMA */ xe_mmio_write32(gt, GUC_SHIM_CONTROL.reg, shim_flags); - xe_mmio_write32(gt, GEN9_GT_PM_CONFIG.reg, GT_DOORBELL_ENABLE); + xe_mmio_write32(gt, GT_PM_CONFIG.reg, GT_DOORBELL_ENABLE); } /* @@ -575,7 +575,7 @@ int xe_guc_enable_communication(struct xe_guc *guc) guc_enable_irq(guc); - xe_mmio_rmw32(guc_to_gt(guc), GEN6_PMINTRMSK.reg, + xe_mmio_rmw32(guc_to_gt(guc), PMINTRMSK.reg, ARAT_EXPIRED_INTRMSK, 0); err = xe_guc_ct_enable(&guc->ct); diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index d4fc2d357a78..6a723bda2aa9 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -450,10 +450,10 @@ static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads, u32 flags; bool skip; } *e, extra_regs[] = { - { .reg = RING_MODE_GEN7(hwe->mmio_base).reg, }, + { .reg = RING_MODE(hwe->mmio_base).reg, }, { .reg = RING_HWS_PGA(hwe->mmio_base).reg, }, { .reg = RING_IMR(hwe->mmio_base).reg, }, - { .reg = GEN12_RCU_MODE.reg, .flags = 0x3, + { .reg = RCU_MODE.reg, .flags = 0x3, .skip = hwe != hwe_rcs_reset_domain }, }; u32 i; @@ -478,7 +478,8 @@ static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads, if (needs_wa_1607983814(xe) && hwe->class == XE_ENGINE_CLASS_RENDER) { for (i = 0; i < LNCFCMOCS_REG_COUNT; i++) { guc_mmio_regset_write_one(ads, regset_map, - GEN9_LNCFCMOCS(i).reg, 0, count++); + LNCFCMOCS(i).reg, 0, + count++); } } @@ -557,11 +558,11 @@ static void guc_doorbell_init(struct xe_guc_ads *ads) if (GRAPHICS_VER(xe) >= 12 && !IS_DGFX(xe)) { u32 distdbreg = - xe_mmio_read32(gt, GEN12_DIST_DBS_POPULATED.reg); + xe_mmio_read32(gt, DIST_DBS_POPULATED.reg); ads_blob_write(ads, system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI], - REG_FIELD_GET(GEN12_DOORBELLS_PER_SQIDI_MASK, distdbreg) + 1); + REG_FIELD_GET(DOORBELLS_PER_SQIDI_MASK, distdbreg) + 1); } } diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 0b6d0577a8a7..6d59e36b6e5c 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -413,7 +413,7 @@ static ssize_t freq_cur_show(struct device *dev, if (ret) goto out; - freq = xe_mmio_read32(gt, GEN6_RPNSWREQ.reg); + freq = xe_mmio_read32(gt, RPNSWREQ.reg); freq = REG_FIELD_GET(REQ_RATIO_MASK, freq); ret = sysfs_emit(buf, "%d\n", decode_freq(freq)); @@ -588,7 +588,7 @@ static ssize_t rc_status_show(struct device *dev, u32 reg; xe_device_mem_access_get(gt_to_xe(gt)); - reg = xe_mmio_read32(gt, GEN6_GT_CORE_STATUS.reg); + reg = xe_mmio_read32(gt, GT_CORE_STATUS.reg); xe_device_mem_access_put(gt_to_xe(gt)); switch (REG_FIELD_GET(RCN_MASK, reg)) { @@ -615,7 +615,7 @@ static ssize_t rc6_residency_show(struct device *dev, if (ret) goto out; - reg = xe_mmio_read32(gt, GEN6_GT_GFX_RC6.reg); + reg = xe_mmio_read32(gt, GT_GFX_RC6.reg); ret = sysfs_emit(buff, "%u\n", reg); XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); @@ -745,9 +745,9 @@ static int pc_gucrc_disable(struct xe_guc_pc *pc) if (ret) return ret; - xe_mmio_write32(gt, GEN9_PG_ENABLE.reg, 0); - xe_mmio_write32(gt, GEN6_RC_CONTROL.reg, 0); - xe_mmio_write32(gt, GEN6_RC_STATE.reg, 0); + xe_mmio_write32(gt, PG_ENABLE.reg, 0); + xe_mmio_write32(gt, RC_CONTROL.reg, 0); + xe_mmio_write32(gt, RC_STATE.reg, 0); XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); return 0; diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c index a1c3e54faa6e..55dcaab34ea4 100644 --- a/drivers/gpu/drm/xe/xe_huc.c +++ b/drivers/gpu/drm/xe/xe_huc.c @@ -84,7 +84,7 @@ int xe_huc_auth(struct xe_huc *huc) goto fail; } - ret = xe_mmio_wait32(gt, GEN11_HUC_KERNEL_LOAD_INFO.reg, + ret = xe_mmio_wait32(gt, HUC_KERNEL_LOAD_INFO.reg, HUC_LOAD_SUCCESSFUL, HUC_LOAD_SUCCESSFUL, 100000, NULL, false); if (ret) { @@ -126,7 +126,7 @@ void xe_huc_print_info(struct xe_huc *huc, struct drm_printer *p) return; drm_printf(p, "\nHuC status: 0x%08x\n", - xe_mmio_read32(gt, GEN11_HUC_KERNEL_LOAD_INFO.reg)); + xe_mmio_read32(gt, HUC_KERNEL_LOAD_INFO.reg)); xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); } diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 23b9f120c258..795302bcd3ae 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -110,28 +110,28 @@ static const struct engine_info engine_infos[] = { .class = XE_ENGINE_CLASS_VIDEO_DECODE, .instance = 0, .domain = XE_FW_MEDIA_VDBOX0, - .mmio_base = GEN11_BSD_RING_BASE, + .mmio_base = BSD_RING_BASE, }, [XE_HW_ENGINE_VCS1] = { .name = "vcs1", .class = XE_ENGINE_CLASS_VIDEO_DECODE, .instance = 1, .domain = XE_FW_MEDIA_VDBOX1, - .mmio_base = GEN11_BSD2_RING_BASE, + .mmio_base = BSD2_RING_BASE, }, [XE_HW_ENGINE_VCS2] = { .name = "vcs2", .class = XE_ENGINE_CLASS_VIDEO_DECODE, .instance = 2, .domain = XE_FW_MEDIA_VDBOX2, - .mmio_base = GEN11_BSD3_RING_BASE, + .mmio_base = BSD3_RING_BASE, }, [XE_HW_ENGINE_VCS3] = { .name = "vcs3", .class = XE_ENGINE_CLASS_VIDEO_DECODE, .instance = 3, .domain = XE_FW_MEDIA_VDBOX3, - .mmio_base = GEN11_BSD4_RING_BASE, + .mmio_base = BSD4_RING_BASE, }, [XE_HW_ENGINE_VCS4] = { .name = "vcs4", @@ -166,14 +166,14 @@ static const struct engine_info engine_infos[] = { .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, .instance = 0, .domain = XE_FW_MEDIA_VEBOX0, - .mmio_base = GEN11_VEBOX_RING_BASE, + .mmio_base = VEBOX_RING_BASE, }, [XE_HW_ENGINE_VECS1] = { .name = "vecs1", .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, .instance = 1, .domain = XE_FW_MEDIA_VEBOX1, - .mmio_base = GEN11_VEBOX2_RING_BASE, + .mmio_base = VEBOX2_RING_BASE, }, [XE_HW_ENGINE_VECS2] = { .name = "vecs2", @@ -194,28 +194,28 @@ static const struct engine_info engine_infos[] = { .class = XE_ENGINE_CLASS_COMPUTE, .instance = 0, .domain = XE_FW_RENDER, - .mmio_base = GEN12_COMPUTE0_RING_BASE, + .mmio_base = COMPUTE0_RING_BASE, }, [XE_HW_ENGINE_CCS1] = { .name = "ccs1", .class = XE_ENGINE_CLASS_COMPUTE, .instance = 1, .domain = XE_FW_RENDER, - .mmio_base = GEN12_COMPUTE1_RING_BASE, + .mmio_base = COMPUTE1_RING_BASE, }, [XE_HW_ENGINE_CCS2] = { .name = "ccs2", .class = XE_ENGINE_CLASS_COMPUTE, .instance = 2, .domain = XE_FW_RENDER, - .mmio_base = GEN12_COMPUTE2_RING_BASE, + .mmio_base = COMPUTE2_RING_BASE, }, [XE_HW_ENGINE_CCS3] = { .name = "ccs3", .class = XE_ENGINE_CLASS_COMPUTE, .instance = 3, .domain = XE_FW_RENDER, - .mmio_base = GEN12_COMPUTE3_RING_BASE, + .mmio_base = COMPUTE3_RING_BASE, }, }; @@ -254,14 +254,14 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe) xe_hw_engine_mask_per_class(hwe->gt, XE_ENGINE_CLASS_COMPUTE); if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask) - xe_mmio_write32(hwe->gt, GEN12_RCU_MODE.reg, - _MASKED_BIT_ENABLE(GEN12_RCU_MODE_CCS_ENABLE)); + xe_mmio_write32(hwe->gt, RCU_MODE.reg, + _MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE)); hw_engine_mmio_write32(hwe, RING_HWSTAM(0).reg, ~0x0); hw_engine_mmio_write32(hwe, RING_HWS_PGA(0).reg, xe_bo_ggtt_addr(hwe->hwsp)); - hw_engine_mmio_write32(hwe, RING_MODE_GEN7(0).reg, - _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE)); + hw_engine_mmio_write32(hwe, RING_MODE(0).reg, + _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE)); hw_engine_mmio_write32(hwe, RING_MI_MODE(0).reg, _MASKED_BIT_DISABLE(STOP_RING)); hw_engine_mmio_read32(hwe, RING_MI_MODE(0).reg); @@ -379,7 +379,7 @@ static void read_media_fuses(struct xe_gt *gt) xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); - media_fuse = xe_mmio_read32(gt, GEN11_GT_VEBOX_VDBOX_DISABLE.reg); + media_fuse = xe_mmio_read32(gt, GT_VEBOX_VDBOX_DISABLE.reg); /* * Pre-Xe_HP platforms had register bits representing absent engines, @@ -390,8 +390,8 @@ static void read_media_fuses(struct xe_gt *gt) if (GRAPHICS_VERx100(xe) < 1250) media_fuse = ~media_fuse; - vdbox_mask = REG_FIELD_GET(GEN11_GT_VDBOX_DISABLE_MASK, media_fuse); - vebox_mask = REG_FIELD_GET(GEN11_GT_VEBOX_DISABLE_MASK, media_fuse); + vdbox_mask = REG_FIELD_GET(GT_VDBOX_DISABLE_MASK, media_fuse); + vebox_mask = REG_FIELD_GET(GT_VEBOX_DISABLE_MASK, media_fuse); for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) { if (!(gt->info.engine_mask & BIT(i))) @@ -421,8 +421,8 @@ static void read_copy_fuses(struct xe_gt *gt) xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); - bcs_mask = xe_mmio_read32(gt, GEN10_MIRROR_FUSE3.reg); - bcs_mask = REG_FIELD_GET(GEN12_MEML3_EN_MASK, bcs_mask); + bcs_mask = xe_mmio_read32(gt, MIRROR_FUSE3.reg); + bcs_mask = REG_FIELD_GET(MEML3_EN_MASK, bcs_mask); /* BCS0 is always present; only BCS1-BCS8 may be fused off */ for (int i = XE_HW_ENGINE_BCS1, j = 0; i <= XE_HW_ENGINE_BCS8; ++i, ++j) { @@ -546,7 +546,7 @@ void xe_hw_engine_print_state(struct xe_hw_engine *hwe, struct drm_printer *p) drm_printf(p, "\tRING_MODE: 0x%08x\n", hw_engine_mmio_read32(hwe, RING_MI_MODE(0).reg)); drm_printf(p, "\tRING_MODE_GEN7: 0x%08x\n", - hw_engine_mmio_read32(hwe, RING_MODE_GEN7(0).reg)); + hw_engine_mmio_read32(hwe, RING_MODE(0).reg)); drm_printf(p, "\tRING_IMR: 0x%08x\n", hw_engine_mmio_read32(hwe, RING_IMR(0).reg)); @@ -573,8 +573,8 @@ void xe_hw_engine_print_state(struct xe_hw_engine *hwe, struct drm_printer *p) hw_engine_mmio_read32(hwe, IPEHR(0).reg)); if (hwe->class == XE_ENGINE_CLASS_COMPUTE) - drm_printf(p, "\tGEN12_RCU_MODE: 0x%08x\n", - xe_mmio_read32(hwe->gt, GEN12_RCU_MODE.reg)); + drm_printf(p, "\tRCU_MODE: 0x%08x\n", + xe_mmio_read32(hwe->gt, RCU_MODE.reg)); } diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 5536f84682c0..9b466803c68e 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -158,7 +158,8 @@ int xe_mmio_total_vram_size(struct xe_device *xe, u64 *vram_size, u64 *usable_si if (!xe->info.has_flat_ccs) { *vram_size = pci_resource_len(pdev, GEN12_LMEM_BAR); if (usable_size) - *usable_size = min(*vram_size, xe_mmio_read64(gt, GEN12_GSMBASE.reg)); + *usable_size = min(*vram_size, + xe_mmio_read64(gt, GSMBASE.reg)); return 0; } diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index e09c6242aafc..67c63facdbf9 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -512,8 +512,9 @@ static void init_l3cc_table(struct xe_gt *gt, (l3cc = l3cc_combine(get_entry_l3cc(info, 2 * i), get_entry_l3cc(info, 2 * i + 1))), 1 : 0; i++) { - mocs_dbg(>->xe->drm, "%d 0x%x 0x%x\n", i, GEN9_LNCFCMOCS(i).reg, l3cc); - xe_mmio_write32(gt, GEN9_LNCFCMOCS(i).reg, l3cc); + mocs_dbg(>->xe->drm, "%d 0x%x 0x%x\n", i, LNCFCMOCS(i).reg, + l3cc); + xe_mmio_write32(gt, LNCFCMOCS(i).reg, l3cc); } } @@ -531,7 +532,7 @@ void xe_mocs_init(struct xe_gt *gt) gt->mocs.wb_index = table.wb_index; if (flags & HAS_GLOBAL_MOCS) - __init_mocs_table(gt, &table, GEN12_GLOBAL_MOCS(0).reg); + __init_mocs_table(gt, &table, GLOBAL_MOCS(0).reg); /* * Initialize the L3CC table as part of mocs initalization to make diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c index c4b3a2045299..5a2665706912 100644 --- a/drivers/gpu/drm/xe/xe_reg_whitelist.c +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c @@ -31,7 +31,7 @@ static const struct xe_rtp_entry register_whitelist[] = { }, { XE_RTP_NAME("1508744258, 14012131227, 1808121037"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(WHITELIST(GEN7_COMMON_SLICE_CHICKEN1, 0)) + XE_RTP_ACTIONS(WHITELIST(COMMON_SLICE_CHICKEN1, 0)) }, { XE_RTP_NAME("1806527549"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index 5480746d40e8..4c5f46f89241 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -85,7 +85,7 @@ static int emit_flush_imm_ggtt(u32 addr, u32 value, u32 *dw, int i) static int emit_bb_start(u64 batch_addr, u32 ppgtt_flag, u32 *dw, int i) { - dw[i++] = MI_BATCH_BUFFER_START_GEN8 | ppgtt_flag; + dw[i++] = MI_BATCH_BUFFER_START | ppgtt_flag; dw[i++] = lower_32_bits(batch_addr); dw[i++] = upper_32_bits(batch_addr); @@ -202,9 +202,9 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, /* Wa_1809175790 */ if (!xe->info.has_flat_ccs) { if (decode) - i = emit_aux_table_inv(gt, GEN12_VD0_AUX_INV.reg, dw, i); + i = emit_aux_table_inv(gt, VD0_AUX_INV.reg, dw, i); else - i = emit_aux_table_inv(gt, GEN12_VE0_AUX_INV.reg, dw, i); + i = emit_aux_table_inv(gt, VE0_AUX_INV.reg, dw, i); } dw[i++] = preparser_disable(false); @@ -246,7 +246,7 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, i = emit_pipe_invalidate(mask_flags, dw, i); /* Wa_1809175790 */ if (!xe->info.has_flat_ccs) - i = emit_aux_table_inv(gt, GEN12_CCS_AUX_INV.reg, dw, i); + i = emit_aux_table_inv(gt, CCS_AUX_INV.reg, dw, i); dw[i++] = preparser_disable(false); i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index 31887fec1073..9ce0a0585539 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -65,7 +65,7 @@ static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) } /* Use DSM base address instead for stolen memory */ - mgr->stolen_base = xe_mmio_read64(gt, GEN12_DSMBASE.reg) & GEN12_BDSM_MASK; + mgr->stolen_base = xe_mmio_read64(gt, DSMBASE.reg) & BDSM_MASK; if (drm_WARN_ON(&xe->drm, vram_size < mgr->stolen_base)) return 0; diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index 27cf1330facd..43912312cfba 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -33,7 +33,7 @@ static const struct xe_rtp_entry lrc_tunings[] = { { XE_RTP_NAME("Tuning: ganged timer, also known as 16011163337"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)), /* read verification is ignored due to 1608008084. */ - XE_RTP_ACTIONS(FIELD_SET_NO_READ_MASK(GEN12_FF_MODE2, + XE_RTP_ACTIONS(FIELD_SET_NO_READ_MASK(FF_MODE2, FF_MODE2_GS_TIMER_MASK, FF_MODE2_GS_TIMER_224)) }, diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index a7d681b7538d..7a9bf588301e 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -105,7 +105,7 @@ static const struct xe_rtp_entry gt_was[] = { }, { XE_RTP_NAME("14011059788"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)), - XE_RTP_ACTIONS(SET(GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE)) + XE_RTP_ACTIONS(SET(DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE)) }, /* DG1 */ @@ -116,7 +116,7 @@ static const struct xe_rtp_entry gt_was[] = { }, { XE_RTP_NAME("1408615072"), XE_RTP_RULES(PLATFORM(DG1)), - XE_RTP_ACTIONS(SET(UNSLICE_UNIT_LEVEL_CLKGATE2, VSUNIT_CLKGATE_DIS_TGL)) + XE_RTP_ACTIONS(SET(UNSLICE_UNIT_LEVEL_CLKGATE2, VSUNIT_CLKGATE2_DIS)) }, /* DG2 */ @@ -134,7 +134,7 @@ static const struct xe_rtp_entry gt_was[] = { }, { XE_RTP_NAME("14011006942"), XE_RTP_RULES(SUBPLATFORM(DG2, G10)), - XE_RTP_ACTIONS(SET(GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE, DSS_ROUTER_CLKGATE_DIS)) + XE_RTP_ACTIONS(SET(SUBSLICE_UNIT_LEVEL_CLKGATE, DSS_ROUTER_CLKGATE_DIS)) }, { XE_RTP_NAME("14012362059"), XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), @@ -197,7 +197,7 @@ static const struct xe_rtp_entry gt_was[] = { }, { XE_RTP_NAME("14015795083"), XE_RTP_RULES(PLATFORM(DG2)), - XE_RTP_ACTIONS(CLR(GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE)) + XE_RTP_ACTIONS(CLR(MISCCPCTL, DOP_CLOCK_GATE_RENDER_ENABLE)) }, { XE_RTP_NAME("18018781329"), XE_RTP_RULES(PLATFORM(DG2)), @@ -221,7 +221,7 @@ static const struct xe_rtp_entry gt_was[] = { { XE_RTP_NAME("14015795083"), XE_RTP_RULES(PLATFORM(PVC)), - XE_RTP_ACTIONS(CLR(GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE)) + XE_RTP_ACTIONS(CLR(MISCCPCTL, DOP_CLOCK_GATE_RENDER_ENABLE)) }, { XE_RTP_NAME("18018781329"), XE_RTP_RULES(PLATFORM(PVC)), @@ -241,42 +241,42 @@ static const struct xe_rtp_entry gt_was[] = { static const struct xe_rtp_entry engine_was[] = { { XE_RTP_NAME("22010931296, 18011464164, 14010919138"), XE_RTP_RULES(GRAPHICS_VERSION(1200), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(GEN7_FF_THREAD_MODE, - GEN12_FF_TESSELATION_DOP_GATE_DISABLE)) + XE_RTP_ACTIONS(SET(FF_THREAD_MODE, + FF_TESSELATION_DOP_GATE_DISABLE)) }, { XE_RTP_NAME("1409804808"), XE_RTP_RULES(GRAPHICS_VERSION(1200), ENGINE_CLASS(RENDER), IS_INTEGRATED), - XE_RTP_ACTIONS(SET(GEN8_ROW_CHICKEN2, GEN12_PUSH_CONST_DEREF_HOLD_DIS, + XE_RTP_ACTIONS(SET(ROW_CHICKEN2, PUSH_CONST_DEREF_HOLD_DIS, XE_RTP_ACTION_FLAG(MASKED_REG))) }, { XE_RTP_NAME("14010229206, 1409085225"), XE_RTP_RULES(GRAPHICS_VERSION(1200), ENGINE_CLASS(RENDER), IS_INTEGRATED), - XE_RTP_ACTIONS(SET(GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH, + XE_RTP_ACTIONS(SET(ROW_CHICKEN4, DISABLE_TDL_PUSH, XE_RTP_ACTION_FLAG(MASKED_REG))) }, { XE_RTP_NAME("1606931601"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(GEN8_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ, + XE_RTP_ACTIONS(SET(ROW_CHICKEN2, DISABLE_EARLY_READ, XE_RTP_ACTION_FLAG(MASKED_REG))) }, { XE_RTP_NAME("14010826681, 1606700617, 22010271021, 18019627453"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1255), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(GEN9_CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE, + XE_RTP_ACTIONS(SET(CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE, XE_RTP_ACTION_FLAG(MASKED_REG))) }, { XE_RTP_NAME("1406941453"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(GEN10_SAMPLER_MODE, ENABLE_SMALLPL, + XE_RTP_ACTIONS(SET(SAMPLER_MODE, ENABLE_SMALLPL, XE_RTP_ACTION_FLAG(MASKED_REG))) }, { XE_RTP_NAME("FtrPerCtxtPreemptionGranularityControl"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1250), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(GEN7_FF_SLICE_CS_CHICKEN1, - GEN9_FFSC_PERCTX_PREEMPT_CTRL, + XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN1, + FFSC_PERCTX_PREEMPT_CTRL, XE_RTP_ACTION_FLAG(MASKED_REG))) }, @@ -285,8 +285,8 @@ static const struct xe_rtp_entry engine_was[] = { { XE_RTP_NAME("1607297627, 1607030317, 1607186500"), XE_RTP_RULES(PLATFORM(TIGERLAKE), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(RING_PSMI_CTL(RENDER_RING_BASE), - GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE | - GEN8_RC_SEMA_IDLE_MSG_DISABLE, + WAIT_FOR_EVENT_POWER_DOWN_DISABLE | + RC_SEMA_IDLE_MSG_DISABLE, XE_RTP_ACTION_FLAG(MASKED_REG))) }, @@ -295,8 +295,8 @@ static const struct xe_rtp_entry engine_was[] = { { XE_RTP_NAME("1607297627, 1607030317, 1607186500"), XE_RTP_RULES(PLATFORM(ROCKETLAKE), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(RING_PSMI_CTL(RENDER_RING_BASE), - GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE | - GEN8_RC_SEMA_IDLE_MSG_DISABLE, + WAIT_FOR_EVENT_POWER_DOWN_DISABLE | + RC_SEMA_IDLE_MSG_DISABLE, XE_RTP_ACTION_FLAG(MASKED_REG))) }, @@ -305,8 +305,8 @@ static const struct xe_rtp_entry engine_was[] = { { XE_RTP_NAME("1607297627, 1607030317, 1607186500"), XE_RTP_RULES(PLATFORM(ALDERLAKE_P), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(RING_PSMI_CTL(RENDER_RING_BASE), - GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE | - GEN8_RC_SEMA_IDLE_MSG_DISABLE, + WAIT_FOR_EVENT_POWER_DOWN_DISABLE | + RC_SEMA_IDLE_MSG_DISABLE, XE_RTP_ACTION_FLAG(MASKED_REG))) }, @@ -366,7 +366,7 @@ static const struct xe_rtp_entry engine_was[] = { { XE_RTP_NAME("14015227452"), XE_RTP_RULES(PLATFORM(DG2), FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE, + XE_RTP_ACTIONS(SET(ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE, XE_RTP_ACTION_FLAG(MASKED_REG))) }, { XE_RTP_NAME("16015675438"), @@ -405,36 +405,36 @@ static const struct xe_rtp_entry engine_was[] = { }, { XE_RTP_NAME("1509727124"), XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(GEN10_SAMPLER_MODE, SC_DISABLE_POWER_OPTIMIZATION_EBB, + XE_RTP_ACTIONS(SET(SAMPLER_MODE, SC_DISABLE_POWER_OPTIMIZATION_EBB, XE_RTP_ACTION_FLAG(MASKED_REG))) }, { XE_RTP_NAME("22012856258"), XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(GEN8_ROW_CHICKEN2, GEN12_DISABLE_READ_SUPPRESSION, + XE_RTP_ACTIONS(SET(ROW_CHICKEN2, DISABLE_READ_SUPPRESSION, XE_RTP_ACTION_FLAG(MASKED_REG))) }, { XE_RTP_NAME("14013392000"), XE_RTP_RULES(SUBPLATFORM(DG2, G11), STEP(A0, B0), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(GEN8_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE, + XE_RTP_ACTIONS(SET(ROW_CHICKEN2, ENABLE_LARGE_GRF_MODE, XE_RTP_ACTION_FLAG(MASKED_REG))) }, { XE_RTP_NAME("14012419201"), XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(GEN9_ROW_CHICKEN4, - GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX, + XE_RTP_ACTIONS(SET(ROW_CHICKEN4, + DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX, XE_RTP_ACTION_FLAG(MASKED_REG))) }, { XE_RTP_NAME("14012419201"), XE_RTP_RULES(SUBPLATFORM(DG2, G11), STEP(A0, B0), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(GEN9_ROW_CHICKEN4, - GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX, + XE_RTP_ACTIONS(SET(ROW_CHICKEN4, + DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX, XE_RTP_ACTION_FLAG(MASKED_REG))) }, { XE_RTP_NAME("1308578152"), XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(B0, C0), ENGINE_CLASS(RENDER), FUNC(xe_rtp_match_first_gslice_fused_off)), - XE_RTP_ACTIONS(CLR(GEN9_CS_DEBUG_MODE1, - GEN12_REPLAY_MODE_GRANULARITY, + XE_RTP_ACTIONS(CLR(CS_DEBUG_MODE1, + REPLAY_MODE_GRANULARITY, XE_RTP_ACTION_FLAG(MASKED_REG))) }, { XE_RTP_NAME("22010960976, 14013347512"), @@ -445,14 +445,14 @@ static const struct xe_rtp_entry engine_was[] = { }, { XE_RTP_NAME("1608949956, 14010198302"), XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(GEN8_ROW_CHICKEN, + XE_RTP_ACTIONS(SET(ROW_CHICKEN, MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE, XE_RTP_ACTION_FLAG(MASKED_REG))) }, { XE_RTP_NAME("22010430635"), XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(GEN9_ROW_CHICKEN4, - GEN12_DISABLE_GRF_CLEAR, + XE_RTP_ACTIONS(SET(ROW_CHICKEN4, + DISABLE_GRF_CLEAR, XE_RTP_ACTION_FLAG(MASKED_REG))) }, { XE_RTP_NAME("14013202645"), @@ -465,13 +465,13 @@ static const struct xe_rtp_entry engine_was[] = { }, { XE_RTP_NAME("22012532006"), XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, C0), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(GEN9_HALF_SLICE_CHICKEN7, + XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA, XE_RTP_ACTION_FLAG(MASKED_REG))) }, { XE_RTP_NAME("22012532006"), XE_RTP_RULES(SUBPLATFORM(DG2, G11), STEP(A0, B0), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(GEN9_HALF_SLICE_CHICKEN7, + XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA, XE_RTP_ACTION_FLAG(MASKED_REG))) }, @@ -507,7 +507,7 @@ static const struct xe_rtp_entry engine_was[] = { }, { XE_RTP_NAME("14015227452"), XE_RTP_RULES(PLATFORM(PVC), FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE, + XE_RTP_ACTIONS(SET(ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE, XE_RTP_ACTION_FLAG(MASKED_REG))) }, { XE_RTP_NAME("16015675438"), @@ -526,15 +526,15 @@ static const struct xe_rtp_entry engine_was[] = { static const struct xe_rtp_entry lrc_was[] = { { XE_RTP_NAME("1409342910, 14010698770, 14010443199, 1408979724, 1409178076, 1409207793, 1409217633, 1409252684, 1409347922, 1409142259"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)), - XE_RTP_ACTIONS(SET(GEN11_COMMON_SLICE_CHICKEN3, - GEN12_DISABLE_CPS_AWARE_COLOR_PIPE, + XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN3, + DISABLE_CPS_AWARE_COLOR_PIPE, XE_RTP_ACTION_FLAG(MASKED_REG))) }, { XE_RTP_NAME("WaDisableGPGPUMidThreadPreemption"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)), - XE_RTP_ACTIONS(FIELD_SET(GEN8_CS_CHICKEN1, - GEN9_PREEMPT_GPGPU_LEVEL_MASK, - GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL, + XE_RTP_ACTIONS(FIELD_SET(CS_CHICKEN1, + PREEMPT_GPGPU_LEVEL_MASK, + PREEMPT_GPGPU_THREAD_GROUP_LEVEL, XE_RTP_ACTION_FLAG(MASKED_REG))) }, { XE_RTP_NAME("1806527549"), @@ -552,7 +552,7 @@ static const struct xe_rtp_entry lrc_was[] = { { XE_RTP_NAME("1409044764"), XE_RTP_RULES(PLATFORM(DG1)), - XE_RTP_ACTIONS(CLR(GEN11_COMMON_SLICE_CHICKEN3, + XE_RTP_ACTIONS(CLR(COMMON_SLICE_CHICKEN3, DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN, XE_RTP_ACTION_FLAG(MASKED_REG))) }, @@ -581,7 +581,7 @@ static const struct xe_rtp_entry lrc_was[] = { { XE_RTP_NAME("14010698770, 22010613112, 22010465075"), XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), XE_RTP_ACTIONS(SET(XEHP_COMMON_SLICE_CHICKEN3, - GEN12_DISABLE_CPS_AWARE_COLOR_PIPE, + DISABLE_CPS_AWARE_COLOR_PIPE, XE_RTP_ACTION_FLAG(MASKED_REG))) }, { XE_RTP_NAME("16013271637"), -- cgit v1.2.3 From 5f230a144a33d9a33448063a23d65c53b6d84cea Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 27 Apr 2023 15:32:49 -0700 Subject: drm/xe: Use REG_FIELD/REG_BIT for all regs/*.h Convert the macro declarations to the equivalent GENMASK and and bitfield prep for all registers. v2 (Matt Roper): - Fix wrong conversion of RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK - Reorder fields of XEHP_SLICE_UNIT_LEVEL_CLKGATE for consistency - Simplify CTC_SOURCE_* by only defining CTC_SOURCE_DIVIDE_LOGIC as REG_BIT(0) v3: Also remove DOP_CLOCK_GATE_ENABLE that is unused and wrongly defined Reviewed-by: Matt Atwood Link: https://lore.kernel.org/r/20230427223256.1432787-4-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_engine_regs.h | 24 ++++++------- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 58 ++++++++++++++------------------ drivers/gpu/drm/xe/regs/xe_regs.h | 12 +++---- drivers/gpu/drm/xe/xe_gt_clock.c | 19 +++++------ 4 files changed, 50 insertions(+), 63 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index 9d61f5941289..f1e75703e4bc 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -22,7 +22,7 @@ #define RING_CTL_SIZE(size) ((size) - PAGE_SIZE) /* in bytes -> pages */ #define RING_PSMI_CTL(base) _MMIO((base) + 0x50) -#define RC_SEMA_IDLE_MSG_DISABLE REG_BIT(12) +#define RC_SEMA_IDLE_MSG_DISABLE REG_BIT(12) #define WAIT_FOR_EVENT_POWER_DOWN_DISABLE REG_BIT(7) #define RING_ACTHD_UDW(base) _MMIO((base) + 0x5c) @@ -54,7 +54,7 @@ #define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT REG_BIT(0) #define RING_MODE(base) _MMIO((base) + 0x29c) -#define GFX_DISABLE_LEGACY_MODE (1 << 3) +#define GFX_DISABLE_LEGACY_MODE REG_BIT(3) #define RING_TIMESTAMP(base) _MMIO((base) + 0x358) @@ -68,17 +68,17 @@ #define RING_FORCE_TO_NONPRIV(base, i) _MMIO(((base) + 0x4d0) + (i) * 4) #define RING_FORCE_TO_NONPRIV_DENY REG_BIT(30) +#define RING_FORCE_TO_NONPRIV_ACCESS_MASK REG_GENMASK(29, 28) +#define RING_FORCE_TO_NONPRIV_ACCESS_RW REG_FIELD_PREP(RING_FORCE_TO_NONPRIV_ACCESS_MASK, 0) +#define RING_FORCE_TO_NONPRIV_ACCESS_RD REG_FIELD_PREP(RING_FORCE_TO_NONPRIV_ACCESS_MASK, 1) +#define RING_FORCE_TO_NONPRIV_ACCESS_WR REG_FIELD_PREP(RING_FORCE_TO_NONPRIV_ACCESS_MASK, 2) +#define RING_FORCE_TO_NONPRIV_ACCESS_INVALID REG_FIELD_PREP(RING_FORCE_TO_NONPRIV_ACCESS_MASK, 3) #define RING_FORCE_TO_NONPRIV_ADDRESS_MASK REG_GENMASK(25, 2) -#define RING_FORCE_TO_NONPRIV_ACCESS_RW (0 << 28) -#define RING_FORCE_TO_NONPRIV_ACCESS_RD (1 << 28) -#define RING_FORCE_TO_NONPRIV_ACCESS_WR (2 << 28) -#define RING_FORCE_TO_NONPRIV_ACCESS_INVALID (3 << 28) -#define RING_FORCE_TO_NONPRIV_ACCESS_MASK (3 << 28) -#define RING_FORCE_TO_NONPRIV_RANGE_1 (0 << 0) -#define RING_FORCE_TO_NONPRIV_RANGE_4 (1 << 0) -#define RING_FORCE_TO_NONPRIV_RANGE_16 (2 << 0) -#define RING_FORCE_TO_NONPRIV_RANGE_64 (3 << 0) -#define RING_FORCE_TO_NONPRIV_RANGE_MASK (3 << 0) +#define RING_FORCE_TO_NONPRIV_RANGE_MASK REG_GENMASK(1, 0) +#define RING_FORCE_TO_NONPRIV_RANGE_1 REG_FIELD_PREP(RING_FORCE_TO_NONPRIV_RANGE_MASK, 0) +#define RING_FORCE_TO_NONPRIV_RANGE_4 REG_FIELD_PREP(RING_FORCE_TO_NONPRIV_RANGE_MASK, 1) +#define RING_FORCE_TO_NONPRIV_RANGE_16 REG_FIELD_PREP(RING_FORCE_TO_NONPRIV_RANGE_MASK, 2) +#define RING_FORCE_TO_NONPRIV_RANGE_64 REG_FIELD_PREP(RING_FORCE_TO_NONPRIV_RANGE_MASK, 3) #define RING_FORCE_TO_NONPRIV_MASK_VALID (RING_FORCE_TO_NONPRIV_RANGE_MASK | \ RING_FORCE_TO_NONPRIV_ACCESS_MASK | \ RING_FORCE_TO_NONPRIV_DENY) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 5a0a08c84f3d..657b8cc961bb 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -9,15 +9,13 @@ #include "regs/xe_reg_defs.h" /* RPM unit config (Gen8+) */ -#define RPM_CONFIG0 _MMIO(0xd00) -#define RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT 3 -#define RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK (0x7 << RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT) -#define RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ 0 +#define RPM_CONFIG0 _MMIO(0xd00) +#define RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK REG_GENMASK(5, 3) +#define RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ 0 #define RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ 1 #define RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ 2 -#define RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ 3 -#define RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT 1 -#define RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK (0x3 << RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT) +#define RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ 3 +#define RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK REG_GENMASK(2, 1) #define FORCEWAKE_ACK_MEDIA_VDBOX(n) _MMIO(0xd50 + (n) * 4) #define FORCEWAKE_ACK_MEDIA_VEBOX(n) _MMIO(0xd70 + (n) * 4) @@ -39,15 +37,15 @@ #define MCR_SELECTOR _MMIO(0xfdc) #define GAM_MCR_SELECTOR _MMIO(0xfe0) #define MCR_MULTICAST REG_BIT(31) -#define MCR_SLICE(slice) (((slice) & 0xf) << 27) -#define MCR_SLICE_MASK MCR_SLICE(0xf) -#define MCR_SUBSLICE(subslice) (((subslice) & 0x7) << 24) -#define MCR_SUBSLICE_MASK MCR_SUBSLICE(0x7) +#define MCR_SLICE_MASK REG_GENMASK(30, 27) +#define MCR_SLICE(slice) REG_FIELD_PREP(MCR_SLICE_MASK, slice) +#define MCR_SUBSLICE_MASK REG_GENMASK(26, 24) +#define MCR_SUBSLICE(subslice) REG_FIELD_PREP(MCR_SUBSLICE_MASK, subslice) #define MTL_MCR_GROUPID REG_GENMASK(11, 8) #define MTL_MCR_INSTANCEID REG_GENMASK(3, 0) #define FF_SLICE_CS_CHICKEN1 _MMIO(0x20e0) -#define FFSC_PERCTX_PREEMPT_CTRL (1 << 14) +#define FFSC_PERCTX_PREEMPT_CTRL REG_BIT(14) #define FF_SLICE_CS_CHICKEN2 _MMIO(0x20e4) #define PERF_FIX_BALANCING_CFE_DISABLE REG_BIT(15) @@ -59,12 +57,12 @@ #define PS_INVOCATION_COUNT _MMIO(0x2348) #define CS_CHICKEN1 _MMIO(0x2580) -#define PREEMPT_3D_OBJECT_LEVEL (1 << 0) #define PREEMPT_GPGPU_LEVEL(hi, lo) (((hi) << 2) | ((lo) << 1)) #define PREEMPT_GPGPU_MID_THREAD_LEVEL PREEMPT_GPGPU_LEVEL(0, 0) #define PREEMPT_GPGPU_THREAD_GROUP_LEVEL PREEMPT_GPGPU_LEVEL(0, 1) #define PREEMPT_GPGPU_COMMAND_LEVEL PREEMPT_GPGPU_LEVEL(1, 0) #define PREEMPT_GPGPU_LEVEL_MASK PREEMPT_GPGPU_LEVEL(1, 1) +#define PREEMPT_3D_OBJECT_LEVEL REG_BIT(0) #define GLOBAL_MOCS(i) _MMIO(0x4000 + (i) * 4) /* Global MOCS regs */ #define CCS_AUX_INV _MMIO(0x4208) @@ -131,19 +129,18 @@ #define MIRROR_FUSE3 _MMIO(0x9118) #define L3BANK_PAIR_COUNT 4 -#define L3BANK_MASK 0x0F +#define L3BANK_MASK REG_GENMASK(3, 0) /* on Xe_HP the same fuses indicates mslices instead of L3 banks */ #define MAX_MSLICES 4 -#define MEML3_EN_MASK 0x0F +#define MEML3_EN_MASK REG_GENMASK(3, 0) /* Fuse readout registers for GT */ #define XEHP_FUSE4 _MMIO(0x9114) #define GT_L3_EXC_MASK REG_GENMASK(6, 4) #define GT_VEBOX_VDBOX_DISABLE _MMIO(0x9140) -#define GT_VDBOX_DISABLE_MASK 0xff -#define GT_VEBOX_DISABLE_SHIFT 16 -#define GT_VEBOX_DISABLE_MASK (0x0f << GT_VEBOX_DISABLE_SHIFT) +#define GT_VEBOX_DISABLE_MASK REG_GENMASK(19, 16) +#define GT_VDBOX_DISABLE_MASK REG_GENMASK(7, 0) #define XELP_EU_ENABLE _MMIO(0x9134) /* "_DISABLE" on Xe_LP */ #define XELP_EU_MASK REG_GENMASK(7, 0) @@ -157,7 +154,6 @@ #define MISCCPCTL _MMIO(0x9424) #define DOP_CLOCK_GATE_RENDER_ENABLE REG_BIT(1) -#define DOP_CLOCK_GATE_ENABLE REG_BIT((0) #define UNSLCGCTL9430 _MMIO(0x9430) #define MSQDUNIT_CLKGATE_DIS REG_BIT(3) @@ -203,12 +199,12 @@ #define LTCDD_CLKGATE_DIS REG_BIT(10) #define XEHP_SLICE_UNIT_LEVEL_CLKGATE MCR_REG(0x94d4) -#define SARBUNIT_CLKGATE_DIS (1 << 5) -#define RCCUNIT_CLKGATE_DIS (1 << 7) -#define MSCUNIT_CLKGATE_DIS (1 << 10) -#define NODEDSS_CLKGATE_DIS REG_BIT(12) -#define L3_CLKGATE_DIS REG_BIT(16) #define L3_CR2X_CLKGATE_DIS REG_BIT(17) +#define L3_CLKGATE_DIS REG_BIT(16) +#define NODEDSS_CLKGATE_DIS REG_BIT(12) +#define MSCUNIT_CLKGATE_DIS REG_BIT(10) +#define RCCUNIT_CLKGATE_DIS REG_BIT(7) +#define SARBUNIT_CLKGATE_DIS REG_BIT(5) #define UNSLICE_UNIT_LEVEL_CLKGATE2 _MMIO(0x94e4) #define VSUNIT_CLKGATE2_DIS REG_BIT(19) @@ -224,7 +220,7 @@ #define RTFUNIT_CLKGATE_DIS REG_BIT(18) #define DFR_RATIO_EN_AND_CHICKEN MCR_REG(0x9550) -#define DFR_DISABLE (1 << 9) +#define DFR_DISABLE REG_BIT(9) #define RPNSWREQ _MMIO(0xa008) #define REQ_RATIO_MASK REG_GENMASK(31, 23) @@ -232,20 +228,16 @@ #define RC_STATE _MMIO(0xa094) #define PMINTRMSK _MMIO(0xa168) -#define PMINTR_DISABLE_REDIRECT_TO_GUC (1 << 31) -#define ARAT_EXPIRED_INTRMSK (1 << 9) +#define PMINTR_DISABLE_REDIRECT_TO_GUC REG_BIT(31) +#define ARAT_EXPIRED_INTRMSK REG_BIT(9) #define FORCEWAKE_GT _MMIO(0xa188) #define PG_ENABLE _MMIO(0xa210) -/* GPM unit config (Gen9+) */ #define CTC_MODE _MMIO(0xa26c) -#define CTC_SOURCE_PARAMETER_MASK 1 -#define CTC_SOURCE_CRYSTAL_CLOCK 0 -#define CTC_SOURCE_DIVIDE_LOGIC 1 -#define CTC_SHIFT_PARAMETER_SHIFT 1 -#define CTC_SHIFT_PARAMETER_MASK (0x3 << CTC_SHIFT_PARAMETER_SHIFT) +#define CTC_SHIFT_PARAMETER_MASK REG_GENMASK(2, 1) +#define CTC_SOURCE_DIVIDE_LOGIC REG_BIT(0) #define FORCEWAKE_RENDER _MMIO(0xa278) #define FORCEWAKE_MEDIA_VDBOX(n) _MMIO(0xa540 + (n) * 4) diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h index 50fc3c469086..9d18430fd225 100644 --- a/drivers/gpu/drm/xe/regs/xe_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_regs.h @@ -37,10 +37,10 @@ #define XEHPC_BCS7_RING_BASE 0x3ec000 #define XEHPC_BCS8_RING_BASE 0x3ee000 #define GT_WAIT_SEMAPHORE_INTERRUPT REG_BIT(11) -#define GT_CONTEXT_SWITCH_INTERRUPT (1 << 8) -#define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT (1 << 4) +#define GT_CONTEXT_SWITCH_INTERRUPT REG_BIT(8) +#define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT REG_BIT(4) #define GT_CS_MASTER_ERROR_INTERRUPT REG_BIT(3) -#define GT_RENDER_USER_INTERRUPT (1 << 0) +#define GT_RENDER_USER_INTERRUPT REG_BIT(0) #define FF_THREAD_MODE _MMIO(0x20a0) #define FF_TESSELATION_DOP_GATE_DISABLE BIT(19) @@ -86,10 +86,8 @@ #define DG1_MSTR_TILE(t) REG_BIT(t) #define TIMESTAMP_OVERRIDE _MMIO(0x44074) -#define TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_SHIFT 0 -#define TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK 0x3ff -#define TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT 12 -#define TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK (0xf << 12) +#define TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK REG_GENMASK(15, 12) +#define TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK REG_GENMASK(9, 0) #define GGC _MMIO(0x108040) #define GMS_MASK REG_GENMASK(15, 8) diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c index 1b7d00284535..49625d49bdcc 100644 --- a/drivers/gpu/drm/xe/xe_gt_clock.c +++ b/drivers/gpu/drm/xe/xe_gt_clock.c @@ -17,13 +17,12 @@ static u32 read_reference_ts_freq(struct xe_gt *gt) u32 ts_override = xe_mmio_read32(gt, TIMESTAMP_OVERRIDE.reg); u32 base_freq, frac_freq; - base_freq = ((ts_override & TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK) >> - TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_SHIFT) + 1; + base_freq = REG_FIELD_GET(TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK, + ts_override) + 1; base_freq *= 1000000; - frac_freq = ((ts_override & - TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK) >> - TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT); + frac_freq = REG_FIELD_GET(TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK, + ts_override); frac_freq = 1000000 / (frac_freq + 1); return base_freq + frac_freq; @@ -35,9 +34,8 @@ static u32 get_crystal_clock_freq(u32 rpm_config_reg) const u32 f24_mhz = 24000000; const u32 f25_mhz = 25000000; const u32 f38_4_mhz = 38400000; - u32 crystal_clock = - (rpm_config_reg & RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >> - RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT; + u32 crystal_clock = REG_FIELD_GET(RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK, + rpm_config_reg); switch (crystal_clock) { case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ : @@ -62,7 +60,7 @@ int xe_gt_clock_init(struct xe_gt *gt) /* Assuming gen11+ so assert this assumption is correct */ XE_BUG_ON(GRAPHICS_VER(gt_to_xe(gt)) < 11); - if ((ctc_reg & CTC_SOURCE_PARAMETER_MASK) == CTC_SOURCE_DIVIDE_LOGIC) { + if (ctc_reg & CTC_SOURCE_DIVIDE_LOGIC) { freq = read_reference_ts_freq(gt); } else { u32 c0 = xe_mmio_read32(gt, RPM_CONFIG0.reg); @@ -74,8 +72,7 @@ int xe_gt_clock_init(struct xe_gt *gt) * register increments from this frequency (it might * increment only every few clock cycle). */ - freq >>= 3 - ((c0 & RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >> - RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT); + freq >>= 3 - REG_FIELD_GET(RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, c0); } gt->info.clock_freq = freq; -- cgit v1.2.3 From 143e3bc7832f85676d0e4235d4238f0c9b0682da Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 27 Apr 2023 15:32:50 -0700 Subject: drm/xe: Clarify register types on PAT programming Clarify a few things related to the PAT programming, particularly on MTL: - The register type doesn't change depending on the GT - what happens is that media GT writes to other set of registers that are not MCR - Remove "UNICAST": otherwise it's confusing why it's not using MCR registers with the unicast function variant Also, there isn't much reason to keep those parts as macros: promote them to proper functions and let the compiler inline if it sees fit. Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230427223256.1432787-5-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pat.c | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index c2faf0931649..fcf6ae2c92cc 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -62,31 +62,36 @@ static const u32 mtl_pat_table[] = { [4] = MTL_PAT_0_WB | MTL_3_COH_2W, }; -#define PROGRAM_PAT_UNICAST(gt, table) do { \ - for (int i = 0; i < ARRAY_SIZE(table); i++) \ - xe_mmio_write32(gt, _PAT_INDEX(i), table[i]); \ -} while (0) +static void program_pat(struct xe_gt *gt, const u32 table[], int n_entries) +{ + for (int i = 0; i < n_entries; i++) + xe_mmio_write32(gt, _PAT_INDEX(i), table[i]); +} -#define PROGRAM_PAT_MCR(gt, table) do { \ - for (int i = 0; i < ARRAY_SIZE(table); i++) \ - xe_gt_mcr_multicast_write(gt, MCR_REG(_PAT_INDEX(i)), table[i]); \ -} while (0) +static void program_pat_mcr(struct xe_gt *gt, const u32 table[], int n_entries) +{ + for (int i = 0; i < n_entries; i++) + xe_gt_mcr_multicast_write(gt, MCR_REG(_PAT_INDEX(i)), table[i]); +} void xe_pat_init(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); if (xe->info.platform == XE_METEORLAKE) { + /* + * SAMedia register offsets are adjusted by the write methods + * and they target registers that are not MCR, while for normal + * GT they are MCR + */ if (xe_gt_is_media_type(gt)) - PROGRAM_PAT_UNICAST(gt, mtl_pat_table); + program_pat(gt, mtl_pat_table, ARRAY_SIZE(mtl_pat_table)); else - PROGRAM_PAT_MCR(gt, mtl_pat_table); - } else if (xe->info.platform == XE_PVC) { - PROGRAM_PAT_MCR(gt, pvc_pat_table); - } else if (xe->info.platform == XE_DG2) { - PROGRAM_PAT_MCR(gt, pvc_pat_table); + program_pat_mcr(gt, mtl_pat_table, ARRAY_SIZE(mtl_pat_table)); + } else if (xe->info.platform == XE_PVC || xe->info.platform == XE_DG2) { + program_pat_mcr(gt, pvc_pat_table, ARRAY_SIZE(pvc_pat_table)); } else if (GRAPHICS_VERx100(xe) <= 1210) { - PROGRAM_PAT_UNICAST(gt, tgl_pat_table); + program_pat(gt, tgl_pat_table, ARRAY_SIZE(tgl_pat_table)); } else { /* * Going forward we expect to need new PAT settings for most -- cgit v1.2.3 From 36e22be498fb8361ef411ac7d8cf9404338f6fc2 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 27 Apr 2023 15:32:52 -0700 Subject: drm/xe: Introduce xe_reg/xe_reg_mcr Stop using i915 types for registers. Use our own types. Differently from i915, this will keep under the register definition the knowledge for the different types of registers. For now, the "flags"/"options" are mcr and masked, although only the former is being used. Additionally MCR registers have their own type. The only place that should really look inside a xe_mcr_reg_t is that code dealing with the steering and using other APIs when the register is MCR has been a source of problem in the past. Most of the driver is agnostic to the register differences since they either use the definition from the header or already call the correct MCR_REG()/_MMIO() macros. By embeding the struct xe_reg inside the struct it's also possible to guarantee the compiler will break if using RANDOM_MCR_REG.reg is attempted, since now the u32 is inside the inner struct. v2: - Deep a dedicated type for MCR registers to avoid misuse (Matt Roper, Jani) - Drop the typedef and just use a struct since it's not an opaque type (Jani) - Add more kernel-doc v3: - Use only 22 bits for the register address since all the platforms supported so far have only 4MB of MMIO per tile (Matt Roper) Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230427223256.1432787-7-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_reg_defs.h | 95 +++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_gt_mcr.c | 44 ++++++++++------ drivers/gpu/drm/xe/xe_gt_mcr.h | 11 ++-- drivers/gpu/drm/xe/xe_irq.c | 2 +- drivers/gpu/drm/xe/xe_mmio.c | 2 +- 5 files changed, 131 insertions(+), 23 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_reg_defs.h b/drivers/gpu/drm/xe/regs/xe_reg_defs.h index 5f6735697d9c..e31137e2c42f 100644 --- a/drivers/gpu/drm/xe/regs/xe_reg_defs.h +++ b/drivers/gpu/drm/xe/regs/xe_reg_defs.h @@ -8,4 +8,99 @@ #include "../../i915/i915_reg_defs.h" +/** + * struct xe_reg - Register definition + * + * Register defintion to be used by the individual register. Although the same + * definition is used for xe_reg and xe_reg_mcr, they use different internal + * APIs for accesses. + */ +struct xe_reg { + union { + struct { + /** @reg: address */ + u32 reg:22; + /** + * @masked: register is "masked", with upper 16bits used + * to identify the bits that are updated on the lower + * bits + */ + u32 masked:1; + /** + * @mcr: register is multicast/replicated in the + * hardware and needs special handling. Any register + * with this set should also use a type of xe_reg_mcr_t. + * It's only here so the few places that deal with MCR + * registers specially (xe_sr.c) and tests using the raw + * value can inspect it. + */ + u32 mcr:1; + }; + /** @raw: Raw value with both address and options */ + u32 raw; + }; +}; + +/** + * struct xe_reg_mcr - MCR register definition + * + * MCR register is the same as a regular register, but uses another type since + * the internal API used for accessing them is different: it's never correct to + * use regular MMIO access. + */ +struct xe_reg_mcr { + /** @__reg: The register */ + struct xe_reg __reg; +}; + + +/** + * XE_REG_OPTION_MASKED - Register is "masked", with upper 16 bits marking the + * read/written bits on the lower 16 bits. + * + * To be used with XE_REG(). XE_REG_MCR() and XE_REG_INITIALIZER() + */ +#define XE_REG_OPTION_MASKED .masked = 1 + +/** + * XE_REG_INITIALIZER - Initializer for xe_reg_t. + * @r_: Register offset + * @...: Additional options like access mode. See struct xe_reg for available + * options. + * + * Register field is mandatory, and additional options may be passed as + * arguments. Usually ``XE_REG()`` should be preferred since it creates an + * object of the right type. However when initializing static const storage, + * where a compound statement is not allowed, this can be used instead. + */ +#define XE_REG_INITIALIZER(r_, ...) { .reg = r_, __VA_ARGS__ } + + +/** + * XE_REG - Create a struct xe_reg from offset and additional flags + * @r_: Register offset + * @...: Additional options like access mode. See struct xe_reg for available + * options. + */ +#define XE_REG(r_, ...) ((const struct xe_reg)XE_REG_INITIALIZER(r_, ##__VA_ARGS__)) + +/** + * XE_REG_MCR - Create a struct xe_reg_mcr from offset and additional flags + * @r_: Register offset + * @...: Additional options like access mode. See struct xe_reg for available + * options. + */ +#define XE_REG_MCR(r_, ...) ((const struct xe_reg_mcr){ \ + .__reg = XE_REG_INITIALIZER(r_, ##__VA_ARGS__, .mcr = 1) \ + }) + +/* + * TODO: remove these once the register declarations are not using them anymore + */ +#undef _MMIO +#undef MCR_REG +#define _MMIO(r_) ((const struct xe_reg){ .reg = r_ }) +#define MCR_REG(r_) ((const struct xe_reg_mcr){ .__reg.reg = r_, \ + .__reg.mcr = 1 }) + #endif diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index aa04ba5a6dbe..55b240a5eaa7 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -40,6 +40,11 @@ * non-terminated instance. */ +static inline struct xe_reg to_xe_reg(struct xe_reg_mcr reg_mcr) +{ + return reg_mcr.__reg; +} + enum { MCR_OP_READ, MCR_OP_WRITE @@ -360,9 +365,10 @@ void xe_gt_mcr_set_implicit_defaults(struct xe_gt *gt) * returned. Returns false if the caller need not perform any steering */ static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt, - i915_mcr_reg_t reg, + struct xe_reg_mcr reg_mcr, u8 *group, u8 *instance) { + const struct xe_reg reg = to_xe_reg(reg_mcr); const struct xe_mmio_range *implicit_ranges; for (int type = 0; type < IMPLICIT_STEERING; type++) { @@ -436,9 +442,10 @@ static void mcr_unlock(struct xe_gt *gt) { * * Caller needs to make sure the relevant forcewake wells are up. */ -static u32 rw_with_mcr_steering(struct xe_gt *gt, i915_mcr_reg_t reg, u8 rw_flag, - int group, int instance, u32 value) +static u32 rw_with_mcr_steering(struct xe_gt *gt, struct xe_reg_mcr reg_mcr, + u8 rw_flag, int group, int instance, u32 value) { + const struct xe_reg reg = to_xe_reg(reg_mcr); u32 steer_reg, steer_val, val = 0; lockdep_assert_held(>->mcr_lock); @@ -485,7 +492,7 @@ static u32 rw_with_mcr_steering(struct xe_gt *gt, i915_mcr_reg_t reg, u8 rw_flag /** * xe_gt_mcr_unicast_read_any - reads a non-terminated instance of an MCR register * @gt: GT structure - * @reg: register to read + * @reg_mcr: register to read * * Reads a GT MCR register. The read will be steered to a non-terminated * instance (i.e., one that isn't fused off or powered down by power gating). @@ -494,17 +501,19 @@ static u32 rw_with_mcr_steering(struct xe_gt *gt, i915_mcr_reg_t reg, u8 rw_flag * * Returns the value from a non-terminated instance of @reg. */ -u32 xe_gt_mcr_unicast_read_any(struct xe_gt *gt, i915_mcr_reg_t reg) +u32 xe_gt_mcr_unicast_read_any(struct xe_gt *gt, struct xe_reg_mcr reg_mcr) { + const struct xe_reg reg = to_xe_reg(reg_mcr); u8 group, instance; u32 val; bool steer; - steer = xe_gt_mcr_get_nonterminated_steering(gt, reg, &group, &instance); + steer = xe_gt_mcr_get_nonterminated_steering(gt, reg_mcr, + &group, &instance); if (steer) { mcr_lock(gt); - val = rw_with_mcr_steering(gt, reg, MCR_OP_READ, + val = rw_with_mcr_steering(gt, reg_mcr, MCR_OP_READ, group, instance, 0); mcr_unlock(gt); } else { @@ -517,7 +526,7 @@ u32 xe_gt_mcr_unicast_read_any(struct xe_gt *gt, i915_mcr_reg_t reg) /** * xe_gt_mcr_unicast_read - read a specific instance of an MCR register * @gt: GT structure - * @reg: the MCR register to read + * @reg_mcr: the MCR register to read * @group: the MCR group * @instance: the MCR instance * @@ -525,13 +534,13 @@ u32 xe_gt_mcr_unicast_read_any(struct xe_gt *gt, i915_mcr_reg_t reg) * group/instance. */ u32 xe_gt_mcr_unicast_read(struct xe_gt *gt, - i915_mcr_reg_t reg, + struct xe_reg_mcr reg_mcr, int group, int instance) { u32 val; mcr_lock(gt); - val = rw_with_mcr_steering(gt, reg, MCR_OP_READ, group, instance, 0); + val = rw_with_mcr_steering(gt, reg_mcr, MCR_OP_READ, group, instance, 0); mcr_unlock(gt); return val; @@ -540,7 +549,7 @@ u32 xe_gt_mcr_unicast_read(struct xe_gt *gt, /** * xe_gt_mcr_unicast_write - write a specific instance of an MCR register * @gt: GT structure - * @reg: the MCR register to write + * @reg_mcr: the MCR register to write * @value: value to write * @group: the MCR group * @instance: the MCR instance @@ -548,24 +557,27 @@ u32 xe_gt_mcr_unicast_read(struct xe_gt *gt, * Write an MCR register in unicast mode after steering toward a specific * group/instance. */ -void xe_gt_mcr_unicast_write(struct xe_gt *gt, i915_mcr_reg_t reg, u32 value, - int group, int instance) +void xe_gt_mcr_unicast_write(struct xe_gt *gt, struct xe_reg_mcr reg_mcr, + u32 value, int group, int instance) { mcr_lock(gt); - rw_with_mcr_steering(gt, reg, MCR_OP_WRITE, group, instance, value); + rw_with_mcr_steering(gt, reg_mcr, MCR_OP_WRITE, group, instance, value); mcr_unlock(gt); } /** * xe_gt_mcr_multicast_write - write a value to all instances of an MCR register * @gt: GT structure - * @reg: the MCR register to write + * @reg_mcr: the MCR register to write * @value: value to write * * Write an MCR register in multicast mode to update all instances. */ -void xe_gt_mcr_multicast_write(struct xe_gt *gt, i915_mcr_reg_t reg, u32 value) +void xe_gt_mcr_multicast_write(struct xe_gt *gt, struct xe_reg_mcr reg_mcr, + u32 value) { + struct xe_reg reg = to_xe_reg(reg_mcr); + /* * Synchronize with any unicast operations. Once we have exclusive * access, the MULTICAST bit should already be set, so there's no need diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.h b/drivers/gpu/drm/xe/xe_gt_mcr.h index 2a6cd38c8cb7..27ca1bc880a0 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.h +++ b/drivers/gpu/drm/xe/xe_gt_mcr.h @@ -15,13 +15,14 @@ void xe_gt_mcr_init(struct xe_gt *gt); void xe_gt_mcr_set_implicit_defaults(struct xe_gt *gt); -u32 xe_gt_mcr_unicast_read(struct xe_gt *gt, i915_mcr_reg_t reg, +u32 xe_gt_mcr_unicast_read(struct xe_gt *gt, struct xe_reg_mcr mcr_reg, int group, int instance); -u32 xe_gt_mcr_unicast_read_any(struct xe_gt *gt, i915_mcr_reg_t reg); +u32 xe_gt_mcr_unicast_read_any(struct xe_gt *gt, struct xe_reg_mcr mcr_reg); -void xe_gt_mcr_unicast_write(struct xe_gt *gt, i915_mcr_reg_t reg, u32 value, - int group, int instance); -void xe_gt_mcr_multicast_write(struct xe_gt *gt, i915_mcr_reg_t reg, u32 value); +void xe_gt_mcr_unicast_write(struct xe_gt *gt, struct xe_reg_mcr mcr_reg, + u32 value, int group, int instance); +void xe_gt_mcr_multicast_write(struct xe_gt *gt, struct xe_reg_mcr mcr_reg, + u32 value); void xe_gt_mcr_steering_dump(struct xe_gt *gt, struct drm_printer *p); diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index e812a5b66a6b..9dd730d707e5 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -26,7 +26,7 @@ #define IIR(offset) _MMIO(offset + 0x8) #define IER(offset) _MMIO(offset + 0xc) -static void assert_iir_is_zero(struct xe_gt *gt, i915_reg_t reg) +static void assert_iir_is_zero(struct xe_gt *gt, struct xe_reg reg) { u32 val = xe_mmio_read32(gt, reg.reg); diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 9b466803c68e..24a3c1842144 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -393,7 +393,7 @@ int xe_mmio_init(struct xe_device *xe) DRM_XE_MMIO_READ |\ DRM_XE_MMIO_WRITE) -static const i915_reg_t mmio_read_whitelist[] = { +static const struct xe_reg mmio_read_whitelist[] = { RING_TIMESTAMP(RENDER_RING_BASE), }; -- cgit v1.2.3 From 3512a78a3cefcd9ec0177771f637de0fe4a64ea2 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 27 Apr 2023 15:32:53 -0700 Subject: drm/xe: Use XE_REG/XE_REG_MCR These should replace the _MMIO() and MCR_REG() from i915, with the goal of being more extensible, allowing to pass the additional fields for struct xe_reg and struct xe_reg_mcr. Replace all uses of _MMIO() and MCR_REG() in xe. Since the RTP, reg-save-restore and WA infra are not ready to use the new type, just undef the macro like was done for the i915 types previously. That conversion will come later. v2: Remove MEDIA_SOFT_SCRATCH_COUNT/MEDIA_SOFT_SCRATCH re-added by mistake (Matt Roper) Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230427223256.1432787-8-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_engine_regs.h | 74 +++++----- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 235 ++++++++++++++++--------------- drivers/gpu/drm/xe/regs/xe_guc_regs.h | 64 ++++----- drivers/gpu/drm/xe/regs/xe_reg_defs.h | 9 -- drivers/gpu/drm/xe/regs/xe_regs.h | 28 ++-- drivers/gpu/drm/xe/tests/xe_rtp_test.c | 20 +-- drivers/gpu/drm/xe/xe_ggtt.c | 6 +- drivers/gpu/drm/xe/xe_guc.c | 4 +- drivers/gpu/drm/xe/xe_guc_pc.c | 14 +- drivers/gpu/drm/xe/xe_irq.c | 6 +- drivers/gpu/drm/xe/xe_mmio.c | 2 +- drivers/gpu/drm/xe/xe_mocs.c | 4 +- drivers/gpu/drm/xe/xe_pat.c | 2 +- drivers/gpu/drm/xe/xe_pcode_api.h | 6 +- drivers/gpu/drm/xe/xe_reg_sr.c | 4 +- drivers/gpu/drm/xe/xe_reg_whitelist.c | 12 +- drivers/gpu/drm/xe/xe_rtp.h | 10 +- drivers/gpu/drm/xe/xe_tuning.c | 8 +- drivers/gpu/drm/xe/xe_wa.c | 8 +- 19 files changed, 254 insertions(+), 262 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index f1e75703e4bc..80b66844a8ec 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -10,63 +10,63 @@ #include "regs/xe_reg_defs.h" -#define RING_TAIL(base) _MMIO((base) + 0x30) +#define RING_TAIL(base) XE_REG((base) + 0x30) -#define RING_HEAD(base) _MMIO((base) + 0x34) +#define RING_HEAD(base) XE_REG((base) + 0x34) #define HEAD_ADDR 0x001FFFFC -#define RING_START(base) _MMIO((base) + 0x38) +#define RING_START(base) XE_REG((base) + 0x38) -#define RING_CTL(base) _MMIO((base) + 0x3c) +#define RING_CTL(base) XE_REG((base) + 0x3c) #define RING_CTL_SIZE(size) ((size) - PAGE_SIZE) /* in bytes -> pages */ #define RING_CTL_SIZE(size) ((size) - PAGE_SIZE) /* in bytes -> pages */ -#define RING_PSMI_CTL(base) _MMIO((base) + 0x50) +#define RING_PSMI_CTL(base) XE_REG((base) + 0x50) #define RC_SEMA_IDLE_MSG_DISABLE REG_BIT(12) #define WAIT_FOR_EVENT_POWER_DOWN_DISABLE REG_BIT(7) -#define RING_ACTHD_UDW(base) _MMIO((base) + 0x5c) -#define RING_DMA_FADD_UDW(base) _MMIO((base) + 0x60) -#define RING_IPEIR(base) _MMIO((base) + 0x64) -#define RING_IPEHR(base) _MMIO((base) + 0x68) -#define RING_ACTHD(base) _MMIO((base) + 0x74) -#define RING_DMA_FADD(base) _MMIO((base) + 0x78) -#define RING_HWS_PGA(base) _MMIO((base) + 0x80) -#define IPEIR(base) _MMIO((base) + 0x88) -#define IPEHR(base) _MMIO((base) + 0x8c) -#define RING_HWSTAM(base) _MMIO((base) + 0x98) -#define RING_MI_MODE(base) _MMIO((base) + 0x9c) -#define RING_NOPID(base) _MMIO((base) + 0x94) - -#define RING_IMR(base) _MMIO((base) + 0xa8) +#define RING_ACTHD_UDW(base) XE_REG((base) + 0x5c) +#define RING_DMA_FADD_UDW(base) XE_REG((base) + 0x60) +#define RING_IPEIR(base) XE_REG((base) + 0x64) +#define RING_IPEHR(base) XE_REG((base) + 0x68) +#define RING_ACTHD(base) XE_REG((base) + 0x74) +#define RING_DMA_FADD(base) XE_REG((base) + 0x78) +#define RING_HWS_PGA(base) XE_REG((base) + 0x80) +#define IPEIR(base) XE_REG((base) + 0x88) +#define IPEHR(base) XE_REG((base) + 0x8c) +#define RING_HWSTAM(base) XE_REG((base) + 0x98) +#define RING_MI_MODE(base) XE_REG((base) + 0x9c) +#define RING_NOPID(base) XE_REG((base) + 0x94) + +#define RING_IMR(base) XE_REG((base) + 0xa8) #define RING_MAX_NONPRIV_SLOTS 12 -#define RING_EIR(base) _MMIO((base) + 0xb0) -#define RING_EMR(base) _MMIO((base) + 0xb4) -#define RING_ESR(base) _MMIO((base) + 0xb8) -#define RING_BBADDR(base) _MMIO((base) + 0x140) -#define RING_BBADDR_UDW(base) _MMIO((base) + 0x168) -#define RING_EXECLIST_STATUS_LO(base) _MMIO((base) + 0x234) -#define RING_EXECLIST_STATUS_HI(base) _MMIO((base) + 0x234 + 4) +#define RING_EIR(base) XE_REG((base) + 0xb0) +#define RING_EMR(base) XE_REG((base) + 0xb4) +#define RING_ESR(base) XE_REG((base) + 0xb8) +#define RING_BBADDR(base) XE_REG((base) + 0x140) +#define RING_BBADDR_UDW(base) XE_REG((base) + 0x168) +#define RING_EXECLIST_STATUS_LO(base) XE_REG((base) + 0x234) +#define RING_EXECLIST_STATUS_HI(base) XE_REG((base) + 0x234 + 4) -#define RING_CONTEXT_CONTROL(base) _MMIO((base) + 0x244) +#define RING_CONTEXT_CONTROL(base) XE_REG((base) + 0x244) #define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH REG_BIT(3) #define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT REG_BIT(0) -#define RING_MODE(base) _MMIO((base) + 0x29c) +#define RING_MODE(base) XE_REG((base) + 0x29c) #define GFX_DISABLE_LEGACY_MODE REG_BIT(3) -#define RING_TIMESTAMP(base) _MMIO((base) + 0x358) +#define RING_TIMESTAMP(base) XE_REG((base) + 0x358) -#define RING_TIMESTAMP_UDW(base) _MMIO((base) + 0x358 + 4) +#define RING_TIMESTAMP_UDW(base) XE_REG((base) + 0x358 + 4) #define RING_VALID_MASK 0x00000001 #define RING_VALID 0x00000001 #define STOP_RING REG_BIT(8) #define TAIL_ADDR 0x001FFFF8 -#define RING_CTX_TIMESTAMP(base) _MMIO((base) + 0x3a8) +#define RING_CTX_TIMESTAMP(base) XE_REG((base) + 0x3a8) -#define RING_FORCE_TO_NONPRIV(base, i) _MMIO(((base) + 0x4d0) + (i) * 4) +#define RING_FORCE_TO_NONPRIV(base, i) XE_REG(((base) + 0x4d0) + (i) * 4) #define RING_FORCE_TO_NONPRIV_DENY REG_BIT(30) #define RING_FORCE_TO_NONPRIV_ACCESS_MASK REG_GENMASK(29, 28) #define RING_FORCE_TO_NONPRIV_ACCESS_RW REG_FIELD_PREP(RING_FORCE_TO_NONPRIV_ACCESS_MASK, 0) @@ -84,16 +84,16 @@ RING_FORCE_TO_NONPRIV_DENY) #define RING_MAX_NONPRIV_SLOTS 12 -#define RING_EXECLIST_SQ_CONTENTS_LO(base) _MMIO((base) + 0x510) -#define RING_EXECLIST_SQ_CONTENTS_HI(base) _MMIO((base) + 0x510 + 4) +#define RING_EXECLIST_SQ_CONTENTS_LO(base) XE_REG((base) + 0x510) +#define RING_EXECLIST_SQ_CONTENTS_HI(base) XE_REG((base) + 0x510 + 4) -#define RING_EXECLIST_CONTROL(base) _MMIO((base) + 0x550) +#define RING_EXECLIST_CONTROL(base) XE_REG((base) + 0x550) #define EL_CTRL_LOAD REG_BIT(0) -#define VDBOX_CGCTL3F10(base) _MMIO((base) + 0x3f10) +#define VDBOX_CGCTL3F10(base) XE_REG((base) + 0x3f10) #define IECPUNIT_CLKGATE_DIS REG_BIT(22) -#define VDBOX_CGCTL3F18(base) _MMIO((base) + 0x3f18) +#define VDBOX_CGCTL3F18(base) XE_REG((base) + 0x3f18) #define ALNUNIT_CLKGATE_DIS REG_BIT(13) #endif diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 657b8cc961bb..5648305a8f5a 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -9,7 +9,7 @@ #include "regs/xe_reg_defs.h" /* RPM unit config (Gen8+) */ -#define RPM_CONFIG0 _MMIO(0xd00) +#define RPM_CONFIG0 XE_REG(0xd00) #define RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK REG_GENMASK(5, 3) #define RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ 0 #define RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ 1 @@ -17,25 +17,26 @@ #define RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ 3 #define RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK REG_GENMASK(2, 1) -#define FORCEWAKE_ACK_MEDIA_VDBOX(n) _MMIO(0xd50 + (n) * 4) -#define FORCEWAKE_ACK_MEDIA_VEBOX(n) _MMIO(0xd70 + (n) * 4) -#define FORCEWAKE_ACK_RENDER _MMIO(0xd84) +#define FORCEWAKE_ACK_MEDIA_VDBOX(n) XE_REG(0xd50 + (n) * 4) +#define FORCEWAKE_ACK_MEDIA_VEBOX(n) XE_REG(0xd70 + (n) * 4) +#define FORCEWAKE_ACK_RENDER XE_REG(0xd84) -#define GMD_ID _MMIO(0xd8c) +#define GMD_ID XE_REG(0xd8c) #define GMD_ID_ARCH_MASK REG_GENMASK(31, 22) #define GMD_ID_RELEASE_MASK REG_GENMASK(21, 14) #define GMD_ID_STEP REG_GENMASK(5, 0) -#define FORCEWAKE_ACK_GT_MTL _MMIO(0xdfc) +#define FORCEWAKE_ACK_GT_MTL XE_REG(0xdfc) -#define LNCFCMOCS(i) _MMIO(0xb020 + (i) * 4) /* L3 Cache Control */ +/* L3 Cache Control */ +#define LNCFCMOCS(i) XE_REG(0xb020 + (i) * 4) #define LNCFCMOCS_REG_COUNT 32 -#define MCFG_MCR_SELECTOR _MMIO(0xfd0) -#define MTL_MCR_SELECTOR _MMIO(0xfd4) -#define SF_MCR_SELECTOR _MMIO(0xfd8) -#define MCR_SELECTOR _MMIO(0xfdc) -#define GAM_MCR_SELECTOR _MMIO(0xfe0) +#define MCFG_MCR_SELECTOR XE_REG(0xfd0) +#define MTL_MCR_SELECTOR XE_REG(0xfd4) +#define SF_MCR_SELECTOR XE_REG(0xfd8) +#define MCR_SELECTOR XE_REG(0xfdc) +#define GAM_MCR_SELECTOR XE_REG(0xfe0) #define MCR_MULTICAST REG_BIT(31) #define MCR_SLICE_MASK REG_GENMASK(30, 27) #define MCR_SLICE(slice) REG_FIELD_PREP(MCR_SLICE_MASK, slice) @@ -44,19 +45,19 @@ #define MTL_MCR_GROUPID REG_GENMASK(11, 8) #define MTL_MCR_INSTANCEID REG_GENMASK(3, 0) -#define FF_SLICE_CS_CHICKEN1 _MMIO(0x20e0) +#define FF_SLICE_CS_CHICKEN1 XE_REG(0x20e0) #define FFSC_PERCTX_PREEMPT_CTRL REG_BIT(14) -#define FF_SLICE_CS_CHICKEN2 _MMIO(0x20e4) +#define FF_SLICE_CS_CHICKEN2 XE_REG(0x20e4) #define PERF_FIX_BALANCING_CFE_DISABLE REG_BIT(15) -#define CS_DEBUG_MODE1 _MMIO(0x20ec) +#define CS_DEBUG_MODE1 XE_REG(0x20ec) #define FF_DOP_CLOCK_GATE_DISABLE REG_BIT(1) #define REPLAY_MODE_GRANULARITY REG_BIT(0) -#define PS_INVOCATION_COUNT _MMIO(0x2348) +#define PS_INVOCATION_COUNT XE_REG(0x2348) -#define CS_CHICKEN1 _MMIO(0x2580) +#define CS_CHICKEN1 XE_REG(0x2580) #define PREEMPT_GPGPU_LEVEL(hi, lo) (((hi) << 2) | ((lo) << 1)) #define PREEMPT_GPGPU_MID_THREAD_LEVEL PREEMPT_GPGPU_LEVEL(0, 0) #define PREEMPT_GPGPU_THREAD_GROUP_LEVEL PREEMPT_GPGPU_LEVEL(0, 1) @@ -64,70 +65,70 @@ #define PREEMPT_GPGPU_LEVEL_MASK PREEMPT_GPGPU_LEVEL(1, 1) #define PREEMPT_3D_OBJECT_LEVEL REG_BIT(0) -#define GLOBAL_MOCS(i) _MMIO(0x4000 + (i) * 4) /* Global MOCS regs */ -#define CCS_AUX_INV _MMIO(0x4208) +#define GLOBAL_MOCS(i) XE_REG(0x4000 + (i) * 4) /* Global MOCS regs */ +#define CCS_AUX_INV XE_REG(0x4208) -#define VD0_AUX_INV _MMIO(0x4218) -#define VE0_AUX_INV _MMIO(0x4238) +#define VD0_AUX_INV XE_REG(0x4218) +#define VE0_AUX_INV XE_REG(0x4238) -#define VE1_AUX_INV _MMIO(0x42b8) +#define VE1_AUX_INV XE_REG(0x42b8) #define AUX_INV REG_BIT(0) -#define XEHP_TILE0_ADDR_RANGE MCR_REG(0x4900) -#define XEHP_FLAT_CCS_BASE_ADDR MCR_REG(0x4910) +#define XEHP_TILE0_ADDR_RANGE XE_REG_MCR(0x4900) +#define XEHP_FLAT_CCS_BASE_ADDR XE_REG_MCR(0x4910) -#define CHICKEN_RASTER_1 MCR_REG(0x6204) +#define CHICKEN_RASTER_1 XE_REG_MCR(0x6204) #define DIS_SF_ROUND_NEAREST_EVEN REG_BIT(8) -#define CHICKEN_RASTER_2 MCR_REG(0x6208) +#define CHICKEN_RASTER_2 XE_REG_MCR(0x6208) #define TBIMR_FAST_CLIP REG_BIT(5) -#define VFLSKPD MCR_REG(0x62a8) +#define VFLSKPD XE_REG_MCR(0x62a8) #define DIS_OVER_FETCH_CACHE REG_BIT(1) #define DIS_MULT_MISS_RD_SQUASH REG_BIT(0) -#define FF_MODE2 _MMIO(0x6604) -#define XEHP_FF_MODE2 MCR_REG(0x6604) +#define FF_MODE2 XE_REG(0x6604) +#define XEHP_FF_MODE2 XE_REG_MCR(0x6604) #define FF_MODE2_GS_TIMER_MASK REG_GENMASK(31, 24) #define FF_MODE2_GS_TIMER_224 REG_FIELD_PREP(FF_MODE2_GS_TIMER_MASK, 224) #define FF_MODE2_TDS_TIMER_MASK REG_GENMASK(23, 16) #define FF_MODE2_TDS_TIMER_128 REG_FIELD_PREP(FF_MODE2_TDS_TIMER_MASK, 4) -#define CACHE_MODE_1 _MMIO(0x7004) +#define CACHE_MODE_1 XE_REG(0x7004) #define MSAA_OPTIMIZATION_REDUC_DISABLE REG_BIT(11) -#define XEHP_PSS_MODE2 MCR_REG(0x703c) +#define XEHP_PSS_MODE2 XE_REG_MCR(0x703c) #define SCOREBOARD_STALL_FLUSH_CONTROL REG_BIT(5) -#define HIZ_CHICKEN _MMIO(0x7018) +#define HIZ_CHICKEN XE_REG(0x7018) #define DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE REG_BIT(14) #define HZ_DEPTH_TEST_LE_GE_OPT_DISABLE REG_BIT(13) -#define COMMON_SLICE_CHICKEN1 _MMIO(0x7010) +#define COMMON_SLICE_CHICKEN1 XE_REG(0x7010) -#define COMMON_SLICE_CHICKEN4 _MMIO(0x7300) +#define COMMON_SLICE_CHICKEN4 XE_REG(0x7300) #define DISABLE_TDC_LOAD_BALANCING_CALC REG_BIT(6) -#define COMMON_SLICE_CHICKEN3 _MMIO(0x7304) -#define XEHP_COMMON_SLICE_CHICKEN3 MCR_REG(0x7304) +#define COMMON_SLICE_CHICKEN3 XE_REG(0x7304) +#define XEHP_COMMON_SLICE_CHICKEN3 XE_REG_MCR(0x7304) #define DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN REG_BIT(12) #define XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE REG_BIT(12) #define BLEND_EMB_FIX_DISABLE_IN_RCC REG_BIT(11) #define DISABLE_CPS_AWARE_COLOR_PIPE REG_BIT(9) -#define XEHP_SLICE_COMMON_ECO_CHICKEN1 MCR_REG(0x731c) +#define XEHP_SLICE_COMMON_ECO_CHICKEN1 XE_REG_MCR(0x731c) #define MSC_MSAA_REODER_BUF_BYPASS_DISABLE REG_BIT(14) -#define VF_PREEMPTION _MMIO(0x83a4) +#define VF_PREEMPTION XE_REG(0x83a4) #define PREEMPTION_VERTEX_COUNT REG_GENMASK(15, 0) -#define VFG_PREEMPTION_CHICKEN _MMIO(0x83b4) +#define VFG_PREEMPTION_CHICKEN XE_REG(0x83b4) #define POLYGON_TRIFAN_LINELOOP_DISABLE REG_BIT(4) -#define XEHP_SQCM MCR_REG(0x8724) +#define XEHP_SQCM XE_REG_MCR(0x8724) #define EN_32B_ACCESS REG_BIT(30) -#define MIRROR_FUSE3 _MMIO(0x9118) +#define MIRROR_FUSE3 XE_REG(0x9118) #define L3BANK_PAIR_COUNT 4 #define L3BANK_MASK REG_GENMASK(3, 0) /* on Xe_HP the same fuses indicates mslices instead of L3 banks */ @@ -135,30 +136,30 @@ #define MEML3_EN_MASK REG_GENMASK(3, 0) /* Fuse readout registers for GT */ -#define XEHP_FUSE4 _MMIO(0x9114) +#define XEHP_FUSE4 XE_REG(0x9114) #define GT_L3_EXC_MASK REG_GENMASK(6, 4) -#define GT_VEBOX_VDBOX_DISABLE _MMIO(0x9140) +#define GT_VEBOX_VDBOX_DISABLE XE_REG(0x9140) #define GT_VEBOX_DISABLE_MASK REG_GENMASK(19, 16) #define GT_VDBOX_DISABLE_MASK REG_GENMASK(7, 0) -#define XELP_EU_ENABLE _MMIO(0x9134) /* "_DISABLE" on Xe_LP */ +#define XELP_EU_ENABLE XE_REG(0x9134) /* "_DISABLE" on Xe_LP */ #define XELP_EU_MASK REG_GENMASK(7, 0) -#define XELP_GT_GEOMETRY_DSS_ENABLE _MMIO(0x913c) -#define XEHP_GT_COMPUTE_DSS_ENABLE _MMIO(0x9144) -#define XEHPC_GT_COMPUTE_DSS_ENABLE_EXT _MMIO(0x9148) +#define XELP_GT_GEOMETRY_DSS_ENABLE XE_REG(0x913c) +#define XEHP_GT_COMPUTE_DSS_ENABLE XE_REG(0x9144) +#define XEHPC_GT_COMPUTE_DSS_ENABLE_EXT XE_REG(0x9148) -#define GDRST _MMIO(0x941c) +#define GDRST XE_REG(0x941c) #define GRDOM_GUC REG_BIT(3) #define GRDOM_FULL REG_BIT(0) -#define MISCCPCTL _MMIO(0x9424) +#define MISCCPCTL XE_REG(0x9424) #define DOP_CLOCK_GATE_RENDER_ENABLE REG_BIT(1) -#define UNSLCGCTL9430 _MMIO(0x9430) +#define UNSLCGCTL9430 XE_REG(0x9430) #define MSQDUNIT_CLKGATE_DIS REG_BIT(3) -#define UNSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9434) +#define UNSLICE_UNIT_LEVEL_CLKGATE XE_REG(0x9434) #define VFUNIT_CLKGATE_DIS REG_BIT(20) #define TSGUNIT_CLKGATE_DIS REG_BIT(17) /* XEHPSDV */ #define CG3DDISCFEG_CLKGATE_DIS REG_BIT(17) /* DG2 */ @@ -166,7 +167,7 @@ #define HSUNIT_CLKGATE_DIS REG_BIT(8) #define VSUNIT_CLKGATE_DIS REG_BIT(3) -#define UNSLCGCTL9440 _MMIO(0x9440) +#define UNSLCGCTL9440 XE_REG(0x9440) #define GAMTLBOACS_CLKGATE_DIS REG_BIT(28) #define GAMTLBVDBOX5_CLKGATE_DIS REG_BIT(27) #define GAMTLBVDBOX6_CLKGATE_DIS REG_BIT(26) @@ -180,7 +181,7 @@ #define GAMTLBBLT_CLKGATE_DIS REG_BIT(14) #define GAMTLBVDBOX1_CLKGATE_DIS REG_BIT(6) -#define UNSLCGCTL9444 _MMIO(0x9444) +#define UNSLCGCTL9444 XE_REG(0x9444) #define GAMTLBGFXA0_CLKGATE_DIS REG_BIT(30) #define GAMTLBGFXA1_CLKGATE_DIS REG_BIT(29) #define GAMTLBCOMPA0_CLKGATE_DIS REG_BIT(28) @@ -198,7 +199,7 @@ #define GAMTLBVEBOX0_CLKGATE_DIS REG_BIT(16) #define LTCDD_CLKGATE_DIS REG_BIT(10) -#define XEHP_SLICE_UNIT_LEVEL_CLKGATE MCR_REG(0x94d4) +#define XEHP_SLICE_UNIT_LEVEL_CLKGATE XE_REG_MCR(0x94d4) #define L3_CR2X_CLKGATE_DIS REG_BIT(17) #define L3_CLKGATE_DIS REG_BIT(16) #define NODEDSS_CLKGATE_DIS REG_BIT(12) @@ -206,85 +207,85 @@ #define RCCUNIT_CLKGATE_DIS REG_BIT(7) #define SARBUNIT_CLKGATE_DIS REG_BIT(5) -#define UNSLICE_UNIT_LEVEL_CLKGATE2 _MMIO(0x94e4) +#define UNSLICE_UNIT_LEVEL_CLKGATE2 XE_REG(0x94e4) #define VSUNIT_CLKGATE2_DIS REG_BIT(19) -#define SUBSLICE_UNIT_LEVEL_CLKGATE MCR_REG(0x9524) +#define SUBSLICE_UNIT_LEVEL_CLKGATE XE_REG_MCR(0x9524) #define DSS_ROUTER_CLKGATE_DIS REG_BIT(28) #define GWUNIT_CLKGATE_DIS REG_BIT(16) -#define SUBSLICE_UNIT_LEVEL_CLKGATE2 MCR_REG(0x9528) +#define SUBSLICE_UNIT_LEVEL_CLKGATE2 XE_REG_MCR(0x9528) #define CPSSUNIT_CLKGATE_DIS REG_BIT(9) -#define SSMCGCTL9530 MCR_REG(0x9530) +#define SSMCGCTL9530 XE_REG_MCR(0x9530) #define RTFUNIT_CLKGATE_DIS REG_BIT(18) -#define DFR_RATIO_EN_AND_CHICKEN MCR_REG(0x9550) +#define DFR_RATIO_EN_AND_CHICKEN XE_REG_MCR(0x9550) #define DFR_DISABLE REG_BIT(9) -#define RPNSWREQ _MMIO(0xa008) +#define RPNSWREQ XE_REG(0xa008) #define REQ_RATIO_MASK REG_GENMASK(31, 23) -#define RC_CONTROL _MMIO(0xa090) -#define RC_STATE _MMIO(0xa094) +#define RC_CONTROL XE_REG(0xa090) +#define RC_STATE XE_REG(0xa094) -#define PMINTRMSK _MMIO(0xa168) +#define PMINTRMSK XE_REG(0xa168) #define PMINTR_DISABLE_REDIRECT_TO_GUC REG_BIT(31) #define ARAT_EXPIRED_INTRMSK REG_BIT(9) -#define FORCEWAKE_GT _MMIO(0xa188) +#define FORCEWAKE_GT XE_REG(0xa188) -#define PG_ENABLE _MMIO(0xa210) +#define PG_ENABLE XE_REG(0xa210) -#define CTC_MODE _MMIO(0xa26c) +#define CTC_MODE XE_REG(0xa26c) #define CTC_SHIFT_PARAMETER_MASK REG_GENMASK(2, 1) #define CTC_SOURCE_DIVIDE_LOGIC REG_BIT(0) -#define FORCEWAKE_RENDER _MMIO(0xa278) -#define FORCEWAKE_MEDIA_VDBOX(n) _MMIO(0xa540 + (n) * 4) -#define FORCEWAKE_MEDIA_VEBOX(n) _MMIO(0xa560 + (n) * 4) +#define FORCEWAKE_RENDER XE_REG(0xa278) +#define FORCEWAKE_MEDIA_VDBOX(n) XE_REG(0xa540 + (n) * 4) +#define FORCEWAKE_MEDIA_VEBOX(n) XE_REG(0xa560 + (n) * 4) -#define XEHPC_LNCFMISCCFGREG0 MCR_REG(0xb01c) +#define XEHPC_LNCFMISCCFGREG0 XE_REG_MCR(0xb01c) #define XEHPC_OVRLSCCC REG_BIT(0) -#define XEHP_L3NODEARBCFG MCR_REG(0xb0b4) +#define XEHP_L3NODEARBCFG XE_REG_MCR(0xb0b4) #define XEHP_LNESPARE REG_BIT(19) -#define XEHP_L3SQCREG5 MCR_REG(0xb158) +#define XEHP_L3SQCREG5 XE_REG_MCR(0xb158) #define L3_PWM_TIMER_INIT_VAL_MASK REG_GENMASK(9, 0) -#define XEHP_L3SCQREG7 MCR_REG(0xb188) +#define XEHP_L3SCQREG7 XE_REG_MCR(0xb188) #define BLEND_FILL_CACHING_OPT_DIS REG_BIT(3) -#define XEHP_MERT_MOD_CTRL MCR_REG(0xcf28) -#define RENDER_MOD_CTRL MCR_REG(0xcf2c) -#define COMP_MOD_CTRL MCR_REG(0xcf30) -#define XEHP_VDBX_MOD_CTRL MCR_REG(0xcf34) -#define XEHP_VEBX_MOD_CTRL MCR_REG(0xcf38) +#define XEHP_MERT_MOD_CTRL XE_REG_MCR(0xcf28) +#define RENDER_MOD_CTRL XE_REG_MCR(0xcf2c) +#define COMP_MOD_CTRL XE_REG_MCR(0xcf30) +#define XEHP_VDBX_MOD_CTRL XE_REG_MCR(0xcf34) +#define XEHP_VEBX_MOD_CTRL XE_REG_MCR(0xcf38) #define FORCE_MISS_FTLB REG_BIT(3) -#define XEHP_GAMSTLB_CTRL MCR_REG(0xcf4c) +#define XEHP_GAMSTLB_CTRL XE_REG_MCR(0xcf4c) #define CONTROL_BLOCK_CLKGATE_DIS REG_BIT(12) #define EGRESS_BLOCK_CLKGATE_DIS REG_BIT(11) #define TAG_BLOCK_CLKGATE_DIS REG_BIT(7) -#define XEHP_GAMCNTRL_CTRL MCR_REG(0xcf54) +#define XEHP_GAMCNTRL_CTRL XE_REG_MCR(0xcf54) #define INVALIDATION_BROADCAST_MODE_DIS REG_BIT(12) #define GLOBAL_INVALIDATION_MODE REG_BIT(2) -#define SAMPLER_MODE MCR_REG(0xe18c) +#define SAMPLER_MODE XE_REG_MCR(0xe18c) #define ENABLE_SMALLPL REG_BIT(15) #define SC_DISABLE_POWER_OPTIMIZATION_EBB REG_BIT(9) #define SAMPLER_ENABLE_HEADLESS_MSG REG_BIT(5) -#define HALF_SLICE_CHICKEN7 MCR_REG(0xe194) +#define HALF_SLICE_CHICKEN7 XE_REG_MCR(0xe194) #define DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA REG_BIT(15) -#define CACHE_MODE_SS MCR_REG(0xe420) +#define CACHE_MODE_SS XE_REG_MCR(0xe420) #define ENABLE_EU_COUNT_FOR_TDL_FLUSH REG_BIT(10) #define DISABLE_ECC REG_BIT(5) #define ENABLE_PREFETCH_INTO_IC REG_BIT(3) -#define ROW_CHICKEN4 MCR_REG(0xe48c) +#define ROW_CHICKEN4 XE_REG_MCR(0xe48c) #define DISABLE_GRF_CLEAR REG_BIT(13) #define XEHP_DIS_BBL_SYSPIPE REG_BIT(11) #define DISABLE_TDL_PUSH REG_BIT(9) @@ -293,84 +294,84 @@ #define THREAD_EX_ARB_MODE REG_GENMASK(3, 2) #define THREAD_EX_ARB_MODE_RR_AFTER_DEP REG_FIELD_PREP(THREAD_EX_ARB_MODE, 0x2) -#define ROW_CHICKEN MCR_REG(0xe4f0) +#define ROW_CHICKEN XE_REG_MCR(0xe4f0) #define UGM_BACKUP_MODE REG_BIT(13) #define MDQ_ARBITRATION_MODE REG_BIT(12) -#define ROW_CHICKEN2 MCR_REG(0xe4f4) +#define ROW_CHICKEN2 XE_REG_MCR(0xe4f4) #define DISABLE_READ_SUPPRESSION REG_BIT(15) #define DISABLE_EARLY_READ REG_BIT(14) #define ENABLE_LARGE_GRF_MODE REG_BIT(12) #define PUSH_CONST_DEREF_HOLD_DIS REG_BIT(8) #define DISABLE_DOP_GATING REG_BIT(0) -#define XEHP_HDC_CHICKEN0 MCR_REG(0xe5f0) +#define XEHP_HDC_CHICKEN0 XE_REG_MCR(0xe5f0) #define LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK REG_GENMASK(13, 11) -#define RT_CTRL MCR_REG(0xe530) +#define RT_CTRL XE_REG_MCR(0xe530) #define DIS_NULL_QUERY REG_BIT(10) -#define LSC_CHICKEN_BIT_0 MCR_REG(0xe7c8) +#define LSC_CHICKEN_BIT_0 XE_REG_MCR(0xe7c8) #define DISABLE_D8_D16_COASLESCE REG_BIT(30) #define FORCE_1_SUB_MESSAGE_PER_FRAGMENT REG_BIT(15) -#define LSC_CHICKEN_BIT_0_UDW MCR_REG(0xe7c8 + 4) +#define LSC_CHICKEN_BIT_0_UDW XE_REG_MCR(0xe7c8 + 4) #define DIS_CHAIN_2XSIMD8 REG_BIT(55 - 32) #define FORCE_SLM_FENCE_SCOPE_TO_TILE REG_BIT(42 - 32) #define FORCE_UGM_FENCE_SCOPE_TO_TILE REG_BIT(41 - 32) #define MAXREQS_PER_BANK REG_GENMASK(39 - 32, 37 - 32) #define DISABLE_128B_EVICTION_COMMAND_UDW REG_BIT(36 - 32) -#define SARB_CHICKEN1 MCR_REG(0xe90c) +#define SARB_CHICKEN1 XE_REG_MCR(0xe90c) #define COMP_CKN_IN REG_GENMASK(30, 29) -#define RCU_MODE _MMIO(0x14800) +#define RCU_MODE XE_REG(0x14800) #define RCU_MODE_CCS_ENABLE REG_BIT(0) -#define FORCEWAKE_ACK_GT _MMIO(0x130044) +#define FORCEWAKE_ACK_GT XE_REG(0x130044) #define FORCEWAKE_KERNEL BIT(0) #define FORCEWAKE_USER BIT(1) #define FORCEWAKE_KERNEL_FALLBACK BIT(15) -#define GT_CORE_STATUS _MMIO(0x138060) +#define GT_CORE_STATUS XE_REG(0x138060) #define RCN_MASK REG_GENMASK(2, 0) #define GT_RC0 0 #define GT_RC6 3 -#define GT_GFX_RC6_LOCKED _MMIO(0x138104) -#define GT_GFX_RC6 _MMIO(0x138108) +#define GT_GFX_RC6_LOCKED XE_REG(0x138104) +#define GT_GFX_RC6 XE_REG(0x138108) -#define GT_INTR_DW(x) _MMIO(0x190018 + ((x) * 4)) +#define GT_INTR_DW(x) XE_REG(0x190018 + ((x) * 4)) -#define GUC_SG_INTR_ENABLE _MMIO(0x190038) +#define GUC_SG_INTR_ENABLE XE_REG(0x190038) #define ENGINE1_MASK REG_GENMASK(31, 16) #define ENGINE0_MASK REG_GENMASK(15, 0) -#define GPM_WGBOXPERF_INTR_ENABLE _MMIO(0x19003c) +#define GPM_WGBOXPERF_INTR_ENABLE XE_REG(0x19003c) -#define INTR_IDENTITY_REG(x) _MMIO(0x190060 + ((x) * 4)) +#define INTR_IDENTITY_REG(x) XE_REG(0x190060 + ((x) * 4)) #define INTR_DATA_VALID REG_BIT(31) #define INTR_ENGINE_INSTANCE(x) REG_FIELD_GET(GENMASK(25, 20), x) #define INTR_ENGINE_CLASS(x) REG_FIELD_GET(GENMASK(18, 16), x) #define INTR_ENGINE_INTR(x) REG_FIELD_GET(GENMASK(15, 0), x) #define OTHER_GUC_INSTANCE 0 -#define RENDER_COPY_INTR_ENABLE _MMIO(0x190030) -#define VCS_VECS_INTR_ENABLE _MMIO(0x190034) -#define CCS_RSVD_INTR_ENABLE _MMIO(0x190048) -#define IIR_REG_SELECTOR(x) _MMIO(0x190070 + ((x) * 4)) -#define RCS0_RSVD_INTR_MASK _MMIO(0x190090) -#define BCS_RSVD_INTR_MASK _MMIO(0x1900a0) -#define VCS0_VCS1_INTR_MASK _MMIO(0x1900a8) -#define VCS2_VCS3_INTR_MASK _MMIO(0x1900ac) -#define VECS0_VECS1_INTR_MASK _MMIO(0x1900d0) -#define GUC_SG_INTR_MASK _MMIO(0x1900e8) -#define GPM_WGBOXPERF_INTR_MASK _MMIO(0x1900ec) -#define CCS0_CCS1_INTR_MASK _MMIO(0x190100) -#define CCS2_CCS3_INTR_MASK _MMIO(0x190104) -#define XEHPC_BCS1_BCS2_INTR_MASK _MMIO(0x190110) -#define XEHPC_BCS3_BCS4_INTR_MASK _MMIO(0x190114) -#define XEHPC_BCS5_BCS6_INTR_MASK _MMIO(0x190118) -#define XEHPC_BCS7_BCS8_INTR_MASK _MMIO(0x19011c) +#define RENDER_COPY_INTR_ENABLE XE_REG(0x190030) +#define VCS_VECS_INTR_ENABLE XE_REG(0x190034) +#define CCS_RSVD_INTR_ENABLE XE_REG(0x190048) +#define IIR_REG_SELECTOR(x) XE_REG(0x190070 + ((x) * 4)) +#define RCS0_RSVD_INTR_MASK XE_REG(0x190090) +#define BCS_RSVD_INTR_MASK XE_REG(0x1900a0) +#define VCS0_VCS1_INTR_MASK XE_REG(0x1900a8) +#define VCS2_VCS3_INTR_MASK XE_REG(0x1900ac) +#define VECS0_VECS1_INTR_MASK XE_REG(0x1900d0) +#define GUC_SG_INTR_MASK XE_REG(0x1900e8) +#define GPM_WGBOXPERF_INTR_MASK XE_REG(0x1900ec) +#define CCS0_CCS1_INTR_MASK XE_REG(0x190100) +#define CCS2_CCS3_INTR_MASK XE_REG(0x190104) +#define XEHPC_BCS1_BCS2_INTR_MASK XE_REG(0x190110) +#define XEHPC_BCS3_BCS4_INTR_MASK XE_REG(0x190114) +#define XEHPC_BCS5_BCS6_INTR_MASK XE_REG(0x190118) +#define XEHPC_BCS7_BCS8_INTR_MASK XE_REG(0x19011c) #endif diff --git a/drivers/gpu/drm/xe/regs/xe_guc_regs.h b/drivers/gpu/drm/xe/regs/xe_guc_regs.h index bc9b42b38795..37e0ac550931 100644 --- a/drivers/gpu/drm/xe/regs/xe_guc_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_guc_regs.h @@ -13,7 +13,7 @@ /* Definitions of GuC H/W registers, bits, etc */ -#define GUC_STATUS _MMIO(0xc000) +#define GUC_STATUS XE_REG(0xc000) #define GS_AUTH_STATUS_MASK REG_GENMASK(31, 30) #define GS_AUTH_STATUS_BAD REG_FIELD_PREP(GS_AUTH_STATUS_MASK, 0x1) #define GS_AUTH_STATUS_GOOD REG_FIELD_PREP(GS_AUTH_STATUS_MASK, 0x2) @@ -27,52 +27,52 @@ #define GS_BOOTROM_JUMP_PASSED REG_FIELD_PREP(GS_BOOTROM_MASK, 0x76) #define GS_MIA_IN_RESET REG_BIT(0) -#define SOFT_SCRATCH(n) _MMIO(0xc180 + (n) * 4) +#define SOFT_SCRATCH(n) XE_REG(0xc180 + (n) * 4) #define SOFT_SCRATCH_COUNT 16 -#define UOS_RSA_SCRATCH(i) _MMIO(0xc200 + (i) * 4) +#define UOS_RSA_SCRATCH(i) XE_REG(0xc200 + (i) * 4) #define UOS_RSA_SCRATCH_COUNT 64 -#define DMA_ADDR_0_LOW _MMIO(0xc300) -#define DMA_ADDR_0_HIGH _MMIO(0xc304) -#define DMA_ADDR_1_LOW _MMIO(0xc308) -#define DMA_ADDR_1_HIGH _MMIO(0xc30c) +#define DMA_ADDR_0_LOW XE_REG(0xc300) +#define DMA_ADDR_0_HIGH XE_REG(0xc304) +#define DMA_ADDR_1_LOW XE_REG(0xc308) +#define DMA_ADDR_1_HIGH XE_REG(0xc30c) #define DMA_ADDR_SPACE_MASK REG_GENMASK(20, 16) #define DMA_ADDRESS_SPACE_WOPCM REG_FIELD_PREP(DMA_ADDR_SPACE_MASK, 7) -#define DMA_COPY_SIZE _MMIO(0xc310) -#define DMA_CTRL _MMIO(0xc314) +#define DMA_COPY_SIZE XE_REG(0xc310) +#define DMA_CTRL XE_REG(0xc314) #define HUC_UKERNEL REG_BIT(9) #define UOS_MOVE REG_BIT(4) #define START_DMA REG_BIT(0) -#define DMA_GUC_WOPCM_OFFSET _MMIO(0xc340) +#define DMA_GUC_WOPCM_OFFSET XE_REG(0xc340) #define GUC_WOPCM_OFFSET_SHIFT 14 #define GUC_WOPCM_OFFSET_MASK REG_GENMASK(31, GUC_WOPCM_OFFSET_SHIFT) #define HUC_LOADING_AGENT_GUC REG_BIT(1) #define GUC_WOPCM_OFFSET_VALID REG_BIT(0) -#define GUC_MAX_IDLE_COUNT _MMIO(0xc3e4) +#define GUC_MAX_IDLE_COUNT XE_REG(0xc3e4) -#define HUC_STATUS2 _MMIO(0xd3b0) +#define HUC_STATUS2 XE_REG(0xd3b0) #define HUC_FW_VERIFIED REG_BIT(7) -#define HUC_KERNEL_LOAD_INFO _MMIO(0xc1dc) +#define HUC_KERNEL_LOAD_INFO XE_REG(0xc1dc) #define HUC_LOAD_SUCCESSFUL REG_BIT(0) -#define GUC_WOPCM_SIZE _MMIO(0xc050) +#define GUC_WOPCM_SIZE XE_REG(0xc050) #define GUC_WOPCM_SIZE_MASK REG_GENMASK(31, 12) #define GUC_WOPCM_SIZE_LOCKED REG_BIT(0) -#define GT_PM_CONFIG _MMIO(0x13816c) +#define GT_PM_CONFIG XE_REG(0x13816c) #define GT_DOORBELL_ENABLE REG_BIT(0) -#define GTCR _MMIO(0x4274) +#define GTCR XE_REG(0x4274) #define GTCR_INVALIDATE REG_BIT(0) -#define GUC_TLB_INV_CR _MMIO(0xcee8) +#define GUC_TLB_INV_CR XE_REG(0xcee8) #define GUC_TLB_INV_CR_INVALIDATE REG_BIT(0) -#define GUC_ARAT_C6DIS _MMIO(0xa178) +#define GUC_ARAT_C6DIS XE_REG(0xa178) -#define GUC_SHIM_CONTROL _MMIO(0xc064) +#define GUC_SHIM_CONTROL XE_REG(0xc064) #define PVC_GUC_MOCS_INDEX_MASK REG_GENMASK(25, 24) #define PVC_GUC_MOCS_UC_INDEX 1 #define PVC_GUC_MOCS_INDEX(index) REG_FIELD_PREP(PVC_GUC_MOCS_INDEX_MASK, \ @@ -87,9 +87,9 @@ #define GUC_DISABLE_SRAM_INIT_TO_ZEROES REG_BIT(0) -#define GUC_SEND_INTERRUPT _MMIO(0xc4c8) +#define GUC_SEND_INTERRUPT XE_REG(0xc4c8) #define GUC_SEND_TRIGGER REG_BIT(0) -#define GUC_HOST_INTERRUPT _MMIO(0x1901f0) +#define GUC_HOST_INTERRUPT XE_REG(0x1901f0) #define GUC_NUM_DOORBELLS 256 @@ -103,24 +103,24 @@ struct guc_doorbell_info { u32 reserved[14]; } __packed; -#define DRBREGL(x) _MMIO(0x1000 + (x) * 8) +#define DRBREGL(x) XE_REG(0x1000 + (x) * 8) #define DRB_VALID REG_BIT(0) -#define DRBREGU(x) _MMIO(0x1000 + (x) * 8 + 4) +#define DRBREGU(x) XE_REG(0x1000 + (x) * 8 + 4) -#define DIST_DBS_POPULATED _MMIO(0xd08) +#define DIST_DBS_POPULATED XE_REG(0xd08) #define DOORBELLS_PER_SQIDI_MASK REG_GENMASK(23, 16) #define SQIDIS_DOORBELL_EXIST_MASK REG_GENMASK(15, 0) -#define GUC_BCS_RCS_IER _MMIO(0xC550) -#define GUC_VCS2_VCS1_IER _MMIO(0xC554) -#define GUC_WD_VECS_IER _MMIO(0xC558) -#define GUC_PM_P24C_IER _MMIO(0xC55C) +#define GUC_BCS_RCS_IER XE_REG(0xC550) +#define GUC_VCS2_VCS1_IER XE_REG(0xC554) +#define GUC_WD_VECS_IER XE_REG(0xC558) +#define GUC_PM_P24C_IER XE_REG(0xC55C) -#define VF_SW_FLAG(n) _MMIO(0x190240 + (n) * 4) -#define VF_SW_FLAG_COUNT 4 +#define VF_SW_FLAG(n) XE_REG(0x190240 + (n) * 4) +#define VF_SW_FLAG_COUNT 4 -#define MED_VF_SW_FLAG(n) _MMIO(0x190310 + (n) * 4) -#define MED_VF_SW_FLAG_COUNT 4 +#define MED_VF_SW_FLAG(n) XE_REG(0x190310 + (n) * 4) +#define MED_VF_SW_FLAG_COUNT 4 /* GuC Interrupt Vector */ #define GUC_INTR_GUC2HOST BIT(15) diff --git a/drivers/gpu/drm/xe/regs/xe_reg_defs.h b/drivers/gpu/drm/xe/regs/xe_reg_defs.h index e31137e2c42f..787f223bc727 100644 --- a/drivers/gpu/drm/xe/regs/xe_reg_defs.h +++ b/drivers/gpu/drm/xe/regs/xe_reg_defs.h @@ -94,13 +94,4 @@ struct xe_reg_mcr { .__reg = XE_REG_INITIALIZER(r_, ##__VA_ARGS__, .mcr = 1) \ }) -/* - * TODO: remove these once the register declarations are not using them anymore - */ -#undef _MMIO -#undef MCR_REG -#define _MMIO(r_) ((const struct xe_reg){ .reg = r_ }) -#define MCR_REG(r_) ((const struct xe_reg_mcr){ .__reg.reg = r_, \ - .__reg.mcr = 1 }) - #endif diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h index 9d18430fd225..e0734c8f922c 100644 --- a/drivers/gpu/drm/xe/regs/xe_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_regs.h @@ -7,7 +7,7 @@ #include "regs/xe_reg_defs.h" -#define GU_CNTL _MMIO(0x101010) +#define GU_CNTL XE_REG(0x101010) #define LMEM_INIT REG_BIT(7) #define RENDER_RING_BASE 0x02000 @@ -42,18 +42,18 @@ #define GT_CS_MASTER_ERROR_INTERRUPT REG_BIT(3) #define GT_RENDER_USER_INTERRUPT REG_BIT(0) -#define FF_THREAD_MODE _MMIO(0x20a0) +#define FF_THREAD_MODE XE_REG(0x20a0) #define FF_TESSELATION_DOP_GATE_DISABLE BIT(19) -#define PVC_RP_STATE_CAP _MMIO(0x281014) -#define MTL_RP_STATE_CAP _MMIO(0x138000) +#define PVC_RP_STATE_CAP XE_REG(0x281014) +#define MTL_RP_STATE_CAP XE_REG(0x138000) -#define MTL_MEDIAP_STATE_CAP _MMIO(0x138020) +#define MTL_MEDIAP_STATE_CAP XE_REG(0x138020) #define MTL_RP0_CAP_MASK REG_GENMASK(8, 0) #define MTL_RPN_CAP_MASK REG_GENMASK(24, 16) -#define MTL_GT_RPE_FREQUENCY _MMIO(0x13800c) -#define MTL_MPE_FREQUENCY _MMIO(0x13802c) +#define MTL_GT_RPE_FREQUENCY XE_REG(0x13800c) +#define MTL_MPE_FREQUENCY XE_REG(0x13802c) #define MTL_RPE_MASK REG_GENMASK(8, 0) #define TRANSCODER_A_OFFSET 0x60000 @@ -69,32 +69,32 @@ #define PIPE_DSI0_OFFSET 0x7b000 #define PIPE_DSI1_OFFSET 0x7b800 -#define SOFTWARE_FLAGS_SPR33 _MMIO(0x4f084) +#define SOFTWARE_FLAGS_SPR33 XE_REG(0x4f084) #define PCU_IRQ_OFFSET 0x444e0 #define GU_MISC_IRQ_OFFSET 0x444f0 #define GU_MISC_GSE REG_BIT(27) -#define GFX_MSTR_IRQ _MMIO(0x190010) +#define GFX_MSTR_IRQ XE_REG(0x190010) #define MASTER_IRQ REG_BIT(31) #define GU_MISC_IRQ REG_BIT(29) #define DISPLAY_IRQ REG_BIT(16) #define GT_DW_IRQ(x) REG_BIT(x) -#define DG1_MSTR_TILE_INTR _MMIO(0x190008) +#define DG1_MSTR_TILE_INTR XE_REG(0x190008) #define DG1_MSTR_IRQ REG_BIT(31) #define DG1_MSTR_TILE(t) REG_BIT(t) -#define TIMESTAMP_OVERRIDE _MMIO(0x44074) +#define TIMESTAMP_OVERRIDE XE_REG(0x44074) #define TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK REG_GENMASK(15, 12) #define TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK REG_GENMASK(9, 0) -#define GGC _MMIO(0x108040) +#define GGC XE_REG(0x108040) #define GMS_MASK REG_GENMASK(15, 8) #define GGMS_MASK REG_GENMASK(7, 6) -#define GSMBASE _MMIO(0x108100) -#define DSMBASE _MMIO(0x1080C0) +#define GSMBASE XE_REG(0x108100) +#define DSMBASE XE_REG(0x1080C0) #define BDSM_MASK REG_GENMASK64(63, 20) #endif diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c index 29e112c108c6..51d215f08113 100644 --- a/drivers/gpu/drm/xe/tests/xe_rtp_test.c +++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c @@ -18,17 +18,17 @@ #include "xe_reg_sr.h" #include "xe_rtp.h" -#undef _MMIO -#undef MCR_REG -#define _MMIO(x) _XE_RTP_REG(x) -#define MCR_REG(x) _XE_RTP_MCR_REG(x) +#undef XE_REG +#undef XE_REG_MCR +#define XE_REG(x, ...) _XE_RTP_REG(x) +#define XE_REG_MCR(x, ...) _XE_RTP_MCR_REG(x) -#define REGULAR_REG1 _MMIO(1) -#define REGULAR_REG2 _MMIO(2) -#define REGULAR_REG3 _MMIO(3) -#define MCR_REG1 MCR_REG(1) -#define MCR_REG2 MCR_REG(2) -#define MCR_REG3 MCR_REG(3) +#define REGULAR_REG1 XE_REG(1) +#define REGULAR_REG2 XE_REG(2) +#define REGULAR_REG3 XE_REG(3) +#define MCR_REG1 XE_REG_MCR(1) +#define MCR_REG2 XE_REG_MCR(2) +#define MCR_REG3 XE_REG_MCR(3) struct rtp_test_case { const char *name; diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index fc580d961dbb..4e5ad616063d 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -185,11 +185,11 @@ err: return err; } -#define GUC_TLB_INV_CR _MMIO(0xcee8) +#define GUC_TLB_INV_CR XE_REG(0xcee8) #define GUC_TLB_INV_CR_INVALIDATE REG_BIT(0) -#define PVC_GUC_TLB_INV_DESC0 _MMIO(0xcf7c) +#define PVC_GUC_TLB_INV_DESC0 XE_REG(0xcf7c) #define PVC_GUC_TLB_INV_DESC0_VALID REG_BIT(0) -#define PVC_GUC_TLB_INV_DESC1 _MMIO(0xcf80) +#define PVC_GUC_TLB_INV_DESC1 XE_REG(0xcf80) #define PVC_GUC_TLB_INV_DESC1_INVALIDATE REG_BIT(6) void xe_ggtt_invalidate(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 4e9e9b1aad02..89d20faced19 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -22,6 +22,8 @@ #include "xe_uc_fw.h" #include "xe_wopcm.h" +#define MEDIA_GUC_HOST_INTERRUPT XE_REG(0x190304) + static struct xe_gt * guc_to_gt(struct xe_guc *guc) { @@ -244,8 +246,6 @@ static void guc_write_params(struct xe_guc *guc) xe_mmio_write32(gt, SOFT_SCRATCH(1 + i).reg, guc->params[i]); } -#define MEDIA_GUC_HOST_INTERRUPT _MMIO(0x190304) - int xe_guc_init(struct xe_guc *guc) { struct xe_device *xe = guc_to_xe(guc); diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 6d59e36b6e5c..72d460d5323b 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -23,18 +23,18 @@ #define MCHBAR_MIRROR_BASE_SNB 0x140000 -#define GEN6_RP_STATE_CAP _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5998) -#define RP0_MASK REG_GENMASK(7, 0) -#define RP1_MASK REG_GENMASK(15, 8) -#define RPN_MASK REG_GENMASK(23, 16) +#define GEN6_RP_STATE_CAP XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x5998) +#define RP0_MASK REG_GENMASK(7, 0) +#define RP1_MASK REG_GENMASK(15, 8) +#define RPN_MASK REG_GENMASK(23, 16) -#define GEN10_FREQ_INFO_REC _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5ef0) +#define GEN10_FREQ_INFO_REC XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x5ef0) #define RPE_MASK REG_GENMASK(15, 8) -#define GEN12_RPSTAT1 _MMIO(0x1381b4) +#define GEN12_RPSTAT1 XE_REG(0x1381b4) #define GEN12_CAGF_MASK REG_GENMASK(19, 11) -#define MTL_MIRROR_TARGET_WP1 _MMIO(0xc60) +#define MTL_MIRROR_TARGET_WP1 XE_REG(0xc60) #define MTL_CAGF_MASK REG_GENMASK(8, 0) #define GT_FREQUENCY_MULTIPLIER 50 diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 9dd730d707e5..2fffb2865cab 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -22,9 +22,9 @@ * Interrupt registers for a unit are always consecutive and ordered * ISR, IMR, IIR, IER. */ -#define IMR(offset) _MMIO(offset + 0x4) -#define IIR(offset) _MMIO(offset + 0x8) -#define IER(offset) _MMIO(offset + 0xc) +#define IMR(offset) XE_REG(offset + 0x4) +#define IIR(offset) XE_REG(offset + 0x8) +#define IER(offset) XE_REG(offset + 0xc) static void assert_iir_is_zero(struct xe_gt *gt, struct xe_reg reg) { diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 24a3c1842144..3b719c774efa 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -17,7 +17,7 @@ #include "xe_macros.h" #include "xe_module.h" -#define XEHP_MTCFG_ADDR _MMIO(0x101800) +#define XEHP_MTCFG_ADDR XE_REG(0x101800) #define TILE_COUNT REG_GENMASK(15, 8) #define GEN12_LMEM_BAR 2 diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index 67c63facdbf9..f2ceecd536ed 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -477,8 +477,8 @@ static void __init_mocs_table(struct xe_gt *gt, for (i = 0; i < info->n_entries ? (mocs = get_entry_control(info, i)), 1 : 0; i++) { - mocs_dbg(>->xe->drm, "%d 0x%x 0x%x\n", i, _MMIO(addr + i * 4).reg, mocs); - xe_mmio_write32(gt, _MMIO(addr + i * 4).reg, mocs); + mocs_dbg(>->xe->drm, "%d 0x%x 0x%x\n", i, XE_REG(addr + i * 4).reg, mocs); + xe_mmio_write32(gt, XE_REG(addr + i * 4).reg, mocs); } } diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index fcf6ae2c92cc..abee41fa3cb9 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -71,7 +71,7 @@ static void program_pat(struct xe_gt *gt, const u32 table[], int n_entries) static void program_pat_mcr(struct xe_gt *gt, const u32 table[], int n_entries) { for (int i = 0; i < n_entries; i++) - xe_gt_mcr_multicast_write(gt, MCR_REG(_PAT_INDEX(i)), table[i]); + xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_INDEX(i)), table[i]); } void xe_pat_init(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_pcode_api.h b/drivers/gpu/drm/xe/xe_pcode_api.h index 4e689cd4b23b..837ff7c71280 100644 --- a/drivers/gpu/drm/xe/xe_pcode_api.h +++ b/drivers/gpu/drm/xe/xe_pcode_api.h @@ -7,7 +7,7 @@ #include "regs/xe_reg_defs.h" -#define PCODE_MAILBOX _MMIO(0x138124) +#define PCODE_MAILBOX XE_REG(0x138124) #define PCODE_READY REG_BIT(31) #define PCODE_MB_PARAM2 REG_GENMASK(23, 16) #define PCODE_MB_PARAM1 REG_GENMASK(15, 8) @@ -22,8 +22,8 @@ #define PCODE_GT_RATIO_OUT_OF_RANGE 0x10 #define PCODE_REJECTED 0x11 -#define PCODE_DATA0 _MMIO(0x138128) -#define PCODE_DATA1 _MMIO(0x13812C) +#define PCODE_DATA0 XE_REG(0x138128) +#define PCODE_DATA1 XE_REG(0x13812C) /* Min Freq QOS Table */ #define PCODE_WRITE_MIN_FREQ_TABLE 0x8 diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c index e38397fc771a..b7bbba929170 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr.c +++ b/drivers/gpu/drm/xe/xe_reg_sr.c @@ -151,7 +151,7 @@ static void apply_one_mmio(struct xe_gt *gt, u32 reg, val = (entry->clr_bits ?: entry->set_bits) << 16; else if (entry->clr_bits + 1) val = (entry->reg_type == XE_RTP_REG_MCR ? - xe_gt_mcr_unicast_read_any(gt, MCR_REG(reg)) : + xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(reg)) : xe_mmio_read32(gt, reg)) & (~entry->clr_bits); else val = 0; @@ -166,7 +166,7 @@ static void apply_one_mmio(struct xe_gt *gt, u32 reg, drm_dbg(&xe->drm, "REG[0x%x] = 0x%08x", reg, val); if (entry->reg_type == XE_RTP_REG_MCR) - xe_gt_mcr_multicast_write(gt, MCR_REG(reg), val); + xe_gt_mcr_multicast_write(gt, XE_REG_MCR(reg), val); else xe_mmio_write32(gt, reg, val); } diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c index 5a2665706912..310d5dfe30d5 100644 --- a/drivers/gpu/drm/xe/xe_reg_whitelist.c +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c @@ -11,10 +11,10 @@ #include "xe_platform_types.h" #include "xe_rtp.h" -#undef _MMIO -#undef MCR_REG -#define _MMIO(x) _XE_RTP_REG(x) -#define MCR_REG(x) _XE_RTP_MCR_REG(x) +#undef XE_REG +#undef XE_REG_MCR +#define XE_REG(x, ...) _XE_RTP_REG(x) +#define XE_REG_MCR(x, ...) _XE_RTP_MCR_REG(x) static bool match_not_render(const struct xe_gt *gt, const struct xe_hw_engine *hwe) @@ -45,10 +45,10 @@ static const struct xe_rtp_entry register_whitelist[] = { }, { XE_RTP_NAME("16014440446"), XE_RTP_RULES(PLATFORM(PVC)), - XE_RTP_ACTIONS(WHITELIST(_MMIO(0x4400), + XE_RTP_ACTIONS(WHITELIST(XE_REG(0x4400), RING_FORCE_TO_NONPRIV_DENY | RING_FORCE_TO_NONPRIV_RANGE_64), - WHITELIST(_MMIO(0x4500), + WHITELIST(XE_REG(0x4500), RING_FORCE_TO_NONPRIV_DENY | RING_FORCE_TO_NONPRIV_RANGE_64)) }, diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index a3be7c77753a..c0c587a73980 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -197,7 +197,7 @@ struct xe_reg_sr; * XE_RTP_ACTION_WR - Helper to write a value to the register, overriding all * the bits * @reg_: Register - * @reg_type_: Register type - automatically expanded by MCR_REG/_MMIO + * @reg_type_: Register type - automatically expanded by XE_REG * @val_: Value to set * @...: Additional fields to override in the struct xe_rtp_action entry * @@ -213,7 +213,7 @@ struct xe_reg_sr; /** * XE_RTP_ACTION_SET - Set bits from @val_ in the register. * @reg_: Register - * @reg_type_: Register type - automatically expanded by MCR_REG/_MMIO + * @reg_type_: Register type - automatically expanded by XE_REG * @val_: Bits to set in the register * @...: Additional fields to override in the struct xe_rtp_action entry * @@ -232,7 +232,7 @@ struct xe_reg_sr; /** * XE_RTP_ACTION_CLR: Clear bits from @val_ in the register. * @reg_: Register - * @reg_type_: Register type - automatically expanded by MCR_REG/_MMIO + * @reg_type_: Register type - automatically expanded by XE_REG * @val_: Bits to clear in the register * @...: Additional fields to override in the struct xe_rtp_action entry * @@ -251,7 +251,7 @@ struct xe_reg_sr; /** * XE_RTP_ACTION_FIELD_SET: Set a bit range * @reg_: Register - * @reg_type_: Register type - automatically expanded by MCR_REG/_MMIO + * @reg_type_: Register type - automatically expanded by XE_REG * @mask_bits_: Mask of bits to be changed in the register, forming a field * @val_: Value to set in the field denoted by @mask_bits_ * @...: Additional fields to override in the struct xe_rtp_action entry @@ -274,7 +274,7 @@ struct xe_reg_sr; /** * XE_RTP_ACTION_WHITELIST - Add register to userspace whitelist * @reg_: Register - * @reg_type_: Register type - automatically expanded by MCR_REG/_MMIO + * @reg_type_: Register type - automatically expanded by XE_REG * @val_: Whitelist-specific flags to set * @...: Additional fields to override in the struct xe_rtp_action entry * diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index 43912312cfba..f6eefa951175 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -12,10 +12,10 @@ #include "xe_platform_types.h" #include "xe_rtp.h" -#undef _MMIO -#undef MCR_REG -#define _MMIO(x) _XE_RTP_REG(x) -#define MCR_REG(x) _XE_RTP_MCR_REG(x) +#undef XE_REG +#undef XE_REG_MCR +#define XE_REG(x, ...) _XE_RTP_REG(x) +#define XE_REG_MCR(x, ...) _XE_RTP_MCR_REG(x) static const struct xe_rtp_entry gt_tunings[] = { { XE_RTP_NAME("Tuning: Blend Fill Caching Optimization Disable"), diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 7a9bf588301e..ed3fa51ccd24 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -87,10 +87,10 @@ * a more declarative approach rather than procedural. */ -#undef _MMIO -#undef MCR_REG -#define _MMIO(x) _XE_RTP_REG(x) -#define MCR_REG(x) _XE_RTP_MCR_REG(x) +#undef XE_REG +#undef XE_REG_MCR +#define XE_REG(x, ...) _XE_RTP_REG(x) +#define XE_REG_MCR(x, ...) _XE_RTP_MCR_REG(x) __diag_push(); __diag_ignore_all("-Woverride-init", "Allow field overrides in table"); -- cgit v1.2.3 From ca2acce76d81fda9520b8b797119deddbe660968 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 27 Apr 2023 15:32:54 -0700 Subject: drm/xe: Annotate masked registers used by RTP Go over all registers used in xe_rtp tables and mark the registers as masked if they were passed a XE_RTP_ACTION_FLAG(MASKED_REG) flag. This will allow the flag to be removed in future when xe_rtp starts using the real xe_reg_t type. Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230427223256.1432787-9-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_engine_regs.h | 2 +- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 48 ++++++++++++++++---------------- 2 files changed, 25 insertions(+), 25 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index 80b66844a8ec..f6b3b99a562a 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -21,7 +21,7 @@ #define RING_CTL_SIZE(size) ((size) - PAGE_SIZE) /* in bytes -> pages */ #define RING_CTL_SIZE(size) ((size) - PAGE_SIZE) /* in bytes -> pages */ -#define RING_PSMI_CTL(base) XE_REG((base) + 0x50) +#define RING_PSMI_CTL(base) XE_REG((base) + 0x50, XE_REG_OPTION_MASKED) #define RC_SEMA_IDLE_MSG_DISABLE REG_BIT(12) #define WAIT_FOR_EVENT_POWER_DOWN_DISABLE REG_BIT(7) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 5648305a8f5a..8dd3bf2f6377 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -45,19 +45,19 @@ #define MTL_MCR_GROUPID REG_GENMASK(11, 8) #define MTL_MCR_INSTANCEID REG_GENMASK(3, 0) -#define FF_SLICE_CS_CHICKEN1 XE_REG(0x20e0) +#define FF_SLICE_CS_CHICKEN1 XE_REG(0x20e0, XE_REG_OPTION_MASKED) #define FFSC_PERCTX_PREEMPT_CTRL REG_BIT(14) -#define FF_SLICE_CS_CHICKEN2 XE_REG(0x20e4) +#define FF_SLICE_CS_CHICKEN2 XE_REG(0x20e4, XE_REG_OPTION_MASKED) #define PERF_FIX_BALANCING_CFE_DISABLE REG_BIT(15) -#define CS_DEBUG_MODE1 XE_REG(0x20ec) +#define CS_DEBUG_MODE1 XE_REG(0x20ec, XE_REG_OPTION_MASKED) #define FF_DOP_CLOCK_GATE_DISABLE REG_BIT(1) #define REPLAY_MODE_GRANULARITY REG_BIT(0) #define PS_INVOCATION_COUNT XE_REG(0x2348) -#define CS_CHICKEN1 XE_REG(0x2580) +#define CS_CHICKEN1 XE_REG(0x2580, XE_REG_OPTION_MASKED) #define PREEMPT_GPGPU_LEVEL(hi, lo) (((hi) << 2) | ((lo) << 1)) #define PREEMPT_GPGPU_MID_THREAD_LEVEL PREEMPT_GPGPU_LEVEL(0, 0) #define PREEMPT_GPGPU_THREAD_GROUP_LEVEL PREEMPT_GPGPU_LEVEL(0, 1) @@ -77,13 +77,13 @@ #define XEHP_TILE0_ADDR_RANGE XE_REG_MCR(0x4900) #define XEHP_FLAT_CCS_BASE_ADDR XE_REG_MCR(0x4910) -#define CHICKEN_RASTER_1 XE_REG_MCR(0x6204) +#define CHICKEN_RASTER_1 XE_REG_MCR(0x6204, XE_REG_OPTION_MASKED) #define DIS_SF_ROUND_NEAREST_EVEN REG_BIT(8) -#define CHICKEN_RASTER_2 XE_REG_MCR(0x6208) +#define CHICKEN_RASTER_2 XE_REG_MCR(0x6208, XE_REG_OPTION_MASKED) #define TBIMR_FAST_CLIP REG_BIT(5) -#define VFLSKPD XE_REG_MCR(0x62a8) +#define VFLSKPD XE_REG_MCR(0x62a8, XE_REG_OPTION_MASKED) #define DIS_OVER_FETCH_CACHE REG_BIT(1) #define DIS_MULT_MISS_RD_SQUASH REG_BIT(0) @@ -94,35 +94,35 @@ #define FF_MODE2_TDS_TIMER_MASK REG_GENMASK(23, 16) #define FF_MODE2_TDS_TIMER_128 REG_FIELD_PREP(FF_MODE2_TDS_TIMER_MASK, 4) -#define CACHE_MODE_1 XE_REG(0x7004) +#define CACHE_MODE_1 XE_REG(0x7004, XE_REG_OPTION_MASKED) #define MSAA_OPTIMIZATION_REDUC_DISABLE REG_BIT(11) -#define XEHP_PSS_MODE2 XE_REG_MCR(0x703c) +#define XEHP_PSS_MODE2 XE_REG_MCR(0x703c, XE_REG_OPTION_MASKED) #define SCOREBOARD_STALL_FLUSH_CONTROL REG_BIT(5) -#define HIZ_CHICKEN XE_REG(0x7018) +#define HIZ_CHICKEN XE_REG(0x7018, XE_REG_OPTION_MASKED) #define DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE REG_BIT(14) #define HZ_DEPTH_TEST_LE_GE_OPT_DISABLE REG_BIT(13) #define COMMON_SLICE_CHICKEN1 XE_REG(0x7010) -#define COMMON_SLICE_CHICKEN4 XE_REG(0x7300) +#define COMMON_SLICE_CHICKEN4 XE_REG(0x7300, XE_REG_OPTION_MASKED) #define DISABLE_TDC_LOAD_BALANCING_CALC REG_BIT(6) -#define COMMON_SLICE_CHICKEN3 XE_REG(0x7304) -#define XEHP_COMMON_SLICE_CHICKEN3 XE_REG_MCR(0x7304) +#define COMMON_SLICE_CHICKEN3 XE_REG(0x7304, XE_REG_OPTION_MASKED) +#define XEHP_COMMON_SLICE_CHICKEN3 XE_REG_MCR(0x7304, XE_REG_OPTION_MASKED) #define DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN REG_BIT(12) #define XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE REG_BIT(12) #define BLEND_EMB_FIX_DISABLE_IN_RCC REG_BIT(11) #define DISABLE_CPS_AWARE_COLOR_PIPE REG_BIT(9) -#define XEHP_SLICE_COMMON_ECO_CHICKEN1 XE_REG_MCR(0x731c) +#define XEHP_SLICE_COMMON_ECO_CHICKEN1 XE_REG_MCR(0x731c, XE_REG_OPTION_MASKED) #define MSC_MSAA_REODER_BUF_BYPASS_DISABLE REG_BIT(14) -#define VF_PREEMPTION XE_REG(0x83a4) +#define VF_PREEMPTION XE_REG(0x83a4, XE_REG_OPTION_MASKED) #define PREEMPTION_VERTEX_COUNT REG_GENMASK(15, 0) -#define VFG_PREEMPTION_CHICKEN XE_REG(0x83b4) +#define VFG_PREEMPTION_CHICKEN XE_REG(0x83b4, XE_REG_OPTION_MASKED) #define POLYGON_TRIFAN_LINELOOP_DISABLE REG_BIT(4) #define XEHP_SQCM XE_REG_MCR(0x8724) @@ -244,7 +244,7 @@ #define FORCEWAKE_MEDIA_VDBOX(n) XE_REG(0xa540 + (n) * 4) #define FORCEWAKE_MEDIA_VEBOX(n) XE_REG(0xa560 + (n) * 4) -#define XEHPC_LNCFMISCCFGREG0 XE_REG_MCR(0xb01c) +#define XEHPC_LNCFMISCCFGREG0 XE_REG_MCR(0xb01c, XE_REG_OPTION_MASKED) #define XEHPC_OVRLSCCC REG_BIT(0) #define XEHP_L3NODEARBCFG XE_REG_MCR(0xb0b4) @@ -272,20 +272,20 @@ #define INVALIDATION_BROADCAST_MODE_DIS REG_BIT(12) #define GLOBAL_INVALIDATION_MODE REG_BIT(2) -#define SAMPLER_MODE XE_REG_MCR(0xe18c) +#define SAMPLER_MODE XE_REG_MCR(0xe18c, XE_REG_OPTION_MASKED) #define ENABLE_SMALLPL REG_BIT(15) #define SC_DISABLE_POWER_OPTIMIZATION_EBB REG_BIT(9) #define SAMPLER_ENABLE_HEADLESS_MSG REG_BIT(5) -#define HALF_SLICE_CHICKEN7 XE_REG_MCR(0xe194) +#define HALF_SLICE_CHICKEN7 XE_REG_MCR(0xe194, XE_REG_OPTION_MASKED) #define DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA REG_BIT(15) -#define CACHE_MODE_SS XE_REG_MCR(0xe420) +#define CACHE_MODE_SS XE_REG_MCR(0xe420, XE_REG_OPTION_MASKED) #define ENABLE_EU_COUNT_FOR_TDL_FLUSH REG_BIT(10) #define DISABLE_ECC REG_BIT(5) #define ENABLE_PREFETCH_INTO_IC REG_BIT(3) -#define ROW_CHICKEN4 XE_REG_MCR(0xe48c) +#define ROW_CHICKEN4 XE_REG_MCR(0xe48c, XE_REG_OPTION_MASKED) #define DISABLE_GRF_CLEAR REG_BIT(13) #define XEHP_DIS_BBL_SYSPIPE REG_BIT(11) #define DISABLE_TDL_PUSH REG_BIT(9) @@ -294,18 +294,18 @@ #define THREAD_EX_ARB_MODE REG_GENMASK(3, 2) #define THREAD_EX_ARB_MODE_RR_AFTER_DEP REG_FIELD_PREP(THREAD_EX_ARB_MODE, 0x2) -#define ROW_CHICKEN XE_REG_MCR(0xe4f0) +#define ROW_CHICKEN XE_REG_MCR(0xe4f0, XE_REG_OPTION_MASKED) #define UGM_BACKUP_MODE REG_BIT(13) #define MDQ_ARBITRATION_MODE REG_BIT(12) -#define ROW_CHICKEN2 XE_REG_MCR(0xe4f4) +#define ROW_CHICKEN2 XE_REG_MCR(0xe4f4, XE_REG_OPTION_MASKED) #define DISABLE_READ_SUPPRESSION REG_BIT(15) #define DISABLE_EARLY_READ REG_BIT(14) #define ENABLE_LARGE_GRF_MODE REG_BIT(12) #define PUSH_CONST_DEREF_HOLD_DIS REG_BIT(8) #define DISABLE_DOP_GATING REG_BIT(0) -#define XEHP_HDC_CHICKEN0 XE_REG_MCR(0xe5f0) +#define XEHP_HDC_CHICKEN0 XE_REG_MCR(0xe5f0, XE_REG_OPTION_MASKED) #define LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK REG_GENMASK(13, 11) #define RT_CTRL XE_REG_MCR(0xe530) -- cgit v1.2.3 From 07fbd1f85df18a9a33556de76499fd3693639a7d Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 27 Apr 2023 15:32:55 -0700 Subject: drm/xe: Plumb xe_reg into WAs, rtp, etc Now that struct xe_reg and struct xe_reg_mcr are types that can be used by xe, convert more of the driver to use them. Some notes about the conversions: - The RTP tables don't need the MASKED flags anymore in the actions as that information now comes from the register definition - There is no need for the _XE_RTP_REG/_XE_RTP_REG_MCR macros and the register types on RTP infra: that comes from the register definitions. - When declaring the RTP entries, there is no need anymore to undef XE_REG and friends: the RTP macros deal with removing the cast where needed due to not being able to use a compound statement for initialization in the tables - The index in the reg-sr xarray is the register offset only. Otherwise we wouldn't catch mistakes about adding both a MCR-style and normal-style registers. For that, the register is now also part of the entry, so the options can be compared to check for compatible entries. In order to be able to accomplish this, some improvements are needed on the RTP macros. Change its implementation to concentrate on "pasting a prefix to each argument" rather than the more general "call any macro for each argument". Hopefully this will avoid trying to extend this infra and making it more complex. With the use of tuples for building the arguments, it's not possible to pass additional register fields and using xe_reg in the RTP tables. xe_mmio_* still need to be converted, from u32 to xe_reg, but that is left for another change. Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230427223256.1432787-10-lucas.demarchi@intel.com Link: https://lore.kernel.org/r/20230427223256.1432787-6-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_rtp_test.c | 41 ++++---- drivers/gpu/drm/xe/xe_guc_ads.c | 2 +- drivers/gpu/drm/xe/xe_reg_sr.c | 51 +++++----- drivers/gpu/drm/xe/xe_reg_sr.h | 3 +- drivers/gpu/drm/xe/xe_reg_sr_types.h | 10 +- drivers/gpu/drm/xe/xe_reg_whitelist.c | 4 +- drivers/gpu/drm/xe/xe_rtp.c | 7 +- drivers/gpu/drm/xe/xe_rtp.h | 166 +++++++++++++++++++-------------- drivers/gpu/drm/xe/xe_rtp_types.h | 22 ++--- drivers/gpu/drm/xe/xe_tuning.c | 7 +- drivers/gpu/drm/xe/xe_wa.c | 134 +++++++++----------------- 11 files changed, 206 insertions(+), 241 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c index 51d215f08113..ad2fe8a39a78 100644 --- a/drivers/gpu/drm/xe/tests/xe_rtp_test.c +++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c @@ -18,25 +18,21 @@ #include "xe_reg_sr.h" #include "xe_rtp.h" -#undef XE_REG -#undef XE_REG_MCR -#define XE_REG(x, ...) _XE_RTP_REG(x) -#define XE_REG_MCR(x, ...) _XE_RTP_MCR_REG(x) - #define REGULAR_REG1 XE_REG(1) #define REGULAR_REG2 XE_REG(2) #define REGULAR_REG3 XE_REG(3) #define MCR_REG1 XE_REG_MCR(1) #define MCR_REG2 XE_REG_MCR(2) #define MCR_REG3 XE_REG_MCR(3) +#define MASKED_REG1 XE_REG(1, XE_REG_OPTION_MASKED) + +#undef XE_REG_MCR +#define XE_REG_MCR(...) XE_REG(__VA_ARGS__, .mcr = 1) struct rtp_test_case { const char *name; - struct { - u32 offset; - u32 type; - } expected_reg; - u32 expected_set_bits; + struct xe_reg expected_reg; + u32 expected_set_bits; u32 expected_clr_bits; unsigned long expected_count; unsigned int expected_sr_errors; @@ -56,7 +52,7 @@ static bool match_no(const struct xe_gt *gt, const struct xe_hw_engine *hwe) static const struct rtp_test_case cases[] = { { .name = "coalesce-same-reg", - .expected_reg = { REGULAR_REG1 }, + .expected_reg = REGULAR_REG1, .expected_set_bits = REG_BIT(0) | REG_BIT(1), .expected_clr_bits = REG_BIT(0) | REG_BIT(1), .expected_count = 1, @@ -75,7 +71,7 @@ static const struct rtp_test_case cases[] = { }, { .name = "no-match-no-add", - .expected_reg = { REGULAR_REG1 }, + .expected_reg = REGULAR_REG1, .expected_set_bits = REG_BIT(0), .expected_clr_bits = REG_BIT(0), .expected_count = 1, @@ -94,7 +90,7 @@ static const struct rtp_test_case cases[] = { }, { .name = "no-match-no-add-multiple-rules", - .expected_reg = { REGULAR_REG1 }, + .expected_reg = REGULAR_REG1, .expected_set_bits = REG_BIT(0), .expected_clr_bits = REG_BIT(0), .expected_count = 1, @@ -113,7 +109,7 @@ static const struct rtp_test_case cases[] = { }, { .name = "two-regs-two-entries", - .expected_reg = { REGULAR_REG1 }, + .expected_reg = REGULAR_REG1, .expected_set_bits = REG_BIT(0), .expected_clr_bits = REG_BIT(0), .expected_count = 2, @@ -132,7 +128,7 @@ static const struct rtp_test_case cases[] = { }, { .name = "clr-one-set-other", - .expected_reg = { REGULAR_REG1 }, + .expected_reg = REGULAR_REG1, .expected_set_bits = REG_BIT(0), .expected_clr_bits = REG_BIT(1) | REG_BIT(0), .expected_count = 1, @@ -153,7 +149,7 @@ static const struct rtp_test_case cases[] = { #define TEMP_MASK REG_GENMASK(10, 8) #define TEMP_FIELD REG_FIELD_PREP(TEMP_MASK, 2) .name = "set-field", - .expected_reg = { REGULAR_REG1 }, + .expected_reg = REGULAR_REG1, .expected_set_bits = TEMP_FIELD, .expected_clr_bits = TEMP_MASK, .expected_count = 1, @@ -171,7 +167,7 @@ static const struct rtp_test_case cases[] = { }, { .name = "conflict-duplicate", - .expected_reg = { REGULAR_REG1 }, + .expected_reg = REGULAR_REG1, .expected_set_bits = REG_BIT(0), .expected_clr_bits = REG_BIT(0), .expected_count = 1, @@ -191,7 +187,7 @@ static const struct rtp_test_case cases[] = { }, { .name = "conflict-not-disjoint", - .expected_reg = { REGULAR_REG1 }, + .expected_reg = REGULAR_REG1, .expected_set_bits = REG_BIT(0), .expected_clr_bits = REG_BIT(0), .expected_count = 1, @@ -211,7 +207,7 @@ static const struct rtp_test_case cases[] = { }, { .name = "conflict-reg-type", - .expected_reg = { REGULAR_REG1 }, + .expected_reg = REGULAR_REG1, .expected_set_bits = REG_BIT(0), .expected_clr_bits = REG_BIT(0), .expected_count = 1, @@ -229,8 +225,7 @@ static const struct rtp_test_case cases[] = { /* drop: regular vs masked */ { XE_RTP_NAME("basic-3"), XE_RTP_RULES(FUNC(match_yes)), - XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0), - XE_RTP_ACTION_FLAG(MASKED_REG))) + XE_RTP_ACTIONS(SET(MASKED_REG1, REG_BIT(0))) }, {} }, @@ -249,7 +244,7 @@ static void xe_rtp_process_tests(struct kunit *test) xe_rtp_process(param->entries, reg_sr, &xe->gt[0], NULL); xa_for_each(®_sr->xa, idx, sre) { - if (idx == param->expected_reg.offset) + if (idx == param->expected_reg.reg) sr_entry = sre; count++; @@ -258,7 +253,7 @@ static void xe_rtp_process_tests(struct kunit *test) KUNIT_EXPECT_EQ(test, count, param->expected_count); KUNIT_EXPECT_EQ(test, sr_entry->clr_bits, param->expected_clr_bits); KUNIT_EXPECT_EQ(test, sr_entry->set_bits, param->expected_set_bits); - KUNIT_EXPECT_EQ(test, sr_entry->reg_type, param->expected_reg.type); + KUNIT_EXPECT_EQ(test, sr_entry->reg.raw, param->expected_reg.raw); KUNIT_EXPECT_EQ(test, reg_sr->errors, param->expected_sr_errors); } diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 6a723bda2aa9..676137dcb510 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -461,7 +461,7 @@ static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads, BUILD_BUG_ON(ARRAY_SIZE(extra_regs) > ADS_REGSET_EXTRA_MAX); xa_for_each(&hwe->reg_sr.xa, idx, entry) { - u32 flags = entry->masked_reg ? GUC_REGSET_MASKED : 0; + u32 flags = entry->reg.masked ? GUC_REGSET_MASKED : 0; guc_mmio_regset_write_one(ads, regset_map, idx, flags, count++); } diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c index b7bbba929170..d129e6d7cb1f 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr.c +++ b/drivers/gpu/drm/xe/xe_reg_sr.c @@ -75,10 +75,7 @@ static bool compatible_entries(const struct xe_reg_sr_entry *e1, e1->clr_bits & e2->set_bits || e1->set_bits & e2->clr_bits) return false; - if (e1->masked_reg != e2->masked_reg) - return false; - - if (e1->reg_type != e2->reg_type) + if (e1->reg.raw != e2->reg.raw) return false; return true; @@ -91,10 +88,10 @@ static void reg_sr_inc_error(struct xe_reg_sr *sr) #endif } -int xe_reg_sr_add(struct xe_reg_sr *sr, u32 reg, +int xe_reg_sr_add(struct xe_reg_sr *sr, const struct xe_reg_sr_entry *e) { - unsigned long idx = reg; + unsigned long idx = e->reg.reg; struct xe_reg_sr_entry *pentry = xa_load(&sr->xa, idx); int ret; @@ -125,18 +122,30 @@ int xe_reg_sr_add(struct xe_reg_sr *sr, u32 reg, return 0; fail: - DRM_ERROR("Discarding save-restore reg %04lx (clear: %08x, set: %08x, masked: %s): ret=%d\n", + DRM_ERROR("Discarding save-restore reg %04lx (clear: %08x, set: %08x, masked: %s, mcr: %s): ret=%d\n", idx, e->clr_bits, e->set_bits, - str_yes_no(e->masked_reg), ret); + str_yes_no(e->reg.masked), + str_yes_no(e->reg.mcr), + ret); reg_sr_inc_error(sr); return ret; } -static void apply_one_mmio(struct xe_gt *gt, u32 reg, - struct xe_reg_sr_entry *entry) +/* + * Convert back from encoded value to type-safe, only to be used when reg.mcr + * is true + */ +static struct xe_reg_mcr to_xe_reg_mcr(const struct xe_reg reg) +{ + return (const struct xe_reg_mcr){.__reg.raw = reg.raw }; +} + +static void apply_one_mmio(struct xe_gt *gt, struct xe_reg_sr_entry *entry) { struct xe_device *xe = gt_to_xe(gt); + struct xe_reg reg = entry->reg; + struct xe_reg_mcr reg_mcr = to_xe_reg_mcr(reg); u32 val; /* @@ -147,12 +156,12 @@ static void apply_one_mmio(struct xe_gt *gt, u32 reg, * When it's not masked, we have to read it from hardware, unless we are * supposed to set all bits. */ - if (entry->masked_reg) + if (reg.masked) val = (entry->clr_bits ?: entry->set_bits) << 16; else if (entry->clr_bits + 1) - val = (entry->reg_type == XE_RTP_REG_MCR ? - xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(reg)) : - xe_mmio_read32(gt, reg)) & (~entry->clr_bits); + val = (reg.mcr ? + xe_gt_mcr_unicast_read_any(gt, reg_mcr) : + xe_mmio_read32(gt, reg.reg)) & (~entry->clr_bits); else val = 0; @@ -163,12 +172,12 @@ static void apply_one_mmio(struct xe_gt *gt, u32 reg, */ val |= entry->set_bits; - drm_dbg(&xe->drm, "REG[0x%x] = 0x%08x", reg, val); + drm_dbg(&xe->drm, "REG[0x%x] = 0x%08x", reg.reg, val); - if (entry->reg_type == XE_RTP_REG_MCR) - xe_gt_mcr_multicast_write(gt, XE_REG_MCR(reg), val); + if (entry->reg.mcr) + xe_gt_mcr_multicast_write(gt, reg_mcr, val); else - xe_mmio_write32(gt, reg, val); + xe_mmio_write32(gt, reg.reg, val); } void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt) @@ -188,7 +197,7 @@ void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt) goto err_force_wake; xa_for_each(&sr->xa, reg, entry) - apply_one_mmio(gt, reg, entry); + apply_one_mmio(gt, entry); err = xe_force_wake_put(>->mmio.fw, XE_FORCEWAKE_ALL); XE_WARN_ON(err); @@ -257,6 +266,6 @@ void xe_reg_sr_dump(struct xe_reg_sr *sr, struct drm_printer *p) xa_for_each(&sr->xa, reg, entry) drm_printf(p, "\tREG[0x%lx] clr=0x%08x set=0x%08x masked=%s mcr=%s\n", reg, entry->clr_bits, entry->set_bits, - str_yes_no(entry->masked_reg), - str_yes_no(entry->reg_type == XE_RTP_REG_MCR)); + str_yes_no(entry->reg.masked), + str_yes_no(entry->reg.mcr)); } diff --git a/drivers/gpu/drm/xe/xe_reg_sr.h b/drivers/gpu/drm/xe/xe_reg_sr.h index 3af369089faa..0bfe66ea29bf 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr.h +++ b/drivers/gpu/drm/xe/xe_reg_sr.h @@ -19,8 +19,7 @@ struct drm_printer; int xe_reg_sr_init(struct xe_reg_sr *sr, const char *name, struct xe_device *xe); void xe_reg_sr_dump(struct xe_reg_sr *sr, struct drm_printer *p); -int xe_reg_sr_add(struct xe_reg_sr *sr, u32 reg, - const struct xe_reg_sr_entry *e); +int xe_reg_sr_add(struct xe_reg_sr *sr, const struct xe_reg_sr_entry *e); void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt); void xe_reg_sr_apply_whitelist(struct xe_reg_sr *sr, u32 mmio_base, struct xe_gt *gt); diff --git a/drivers/gpu/drm/xe/xe_reg_sr_types.h b/drivers/gpu/drm/xe/xe_reg_sr_types.h index 91469784fd90..ad48a52b824a 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr_types.h +++ b/drivers/gpu/drm/xe/xe_reg_sr_types.h @@ -9,18 +9,14 @@ #include #include +#include "regs/xe_reg_defs.h" + struct xe_reg_sr_entry { + struct xe_reg reg; u32 clr_bits; u32 set_bits; /* Mask for bits to consider when reading value back */ u32 read_mask; - /* - * "Masked registers" are marked in spec as register with the upper 16 - * bits as a mask for the bits that is being updated on the lower 16 - * bits when writing to it. - */ - u8 masked_reg; - u8 reg_type; }; struct xe_reg_sr { diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c index 310d5dfe30d5..7a2bb60ebd85 100644 --- a/drivers/gpu/drm/xe/xe_reg_whitelist.c +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c @@ -11,10 +11,8 @@ #include "xe_platform_types.h" #include "xe_rtp.h" -#undef XE_REG #undef XE_REG_MCR -#define XE_REG(x, ...) _XE_RTP_REG(x) -#define XE_REG_MCR(x, ...) _XE_RTP_MCR_REG(x) +#define XE_REG_MCR(...) XE_REG(__VA_ARGS__, .mcr = 1) static bool match_not_render(const struct xe_gt *gt, const struct xe_hw_engine *hwe) diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index 20acd43cb60b..f2a0e8eb4936 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -94,16 +94,15 @@ static void rtp_add_sr_entry(const struct xe_rtp_action *action, u32 mmio_base, struct xe_reg_sr *sr) { - u32 reg = action->reg + mmio_base; struct xe_reg_sr_entry sr_entry = { + .reg = action->reg, .clr_bits = action->clr_bits, .set_bits = action->set_bits, .read_mask = action->read_mask, - .masked_reg = action->flags & XE_RTP_ACTION_FLAG_MASKED_REG, - .reg_type = action->reg_type, }; - xe_reg_sr_add(sr, reg, &sr_entry); + sr_entry.reg.reg += mmio_base; + xe_reg_sr_add(sr, &sr_entry); } static void rtp_process_one(const struct xe_rtp_entry *entry, struct xe_gt *gt, diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index c0c587a73980..afbf5a2674f4 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -22,43 +22,77 @@ struct xe_reg_sr; /* * Helper macros - not to be used outside this header. */ -/* This counts to 12. Any more, it will return 13th argument. */ -#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n -#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) +#define _XE_ESC(...) __VA_ARGS__ +#define _XE_COUNT_ARGS(...) _XE_ESC(__XE_COUNT_ARGS(__VA_ARGS__,5,4,3,2,1,)) +#define __XE_COUNT_ARGS(_,_5,_4,_3,_2,X_,...) X_ -#define __CONCAT(a, b) a ## b -#define CONCATENATE(a, b) __CONCAT(a, b) +#define _XE_FIRST(...) _XE_ESC(__XE_FIRST(__VA_ARGS__,)) +#define __XE_FIRST(x_,...) x_ +#define _XE_TUPLE_TAIL(...) _XE_ESC(__XE_TUPLE_TAIL(__VA_ARGS__)) +#define __XE_TUPLE_TAIL(x_,...) (__VA_ARGS__) -#define __CALL_FOR_EACH_1(MACRO_, x, ...) MACRO_(x) -#define __CALL_FOR_EACH_2(MACRO_, x, ...) \ - MACRO_(x) __CALL_FOR_EACH_1(MACRO_, ##__VA_ARGS__) -#define __CALL_FOR_EACH_3(MACRO_, x, ...) \ - MACRO_(x) __CALL_FOR_EACH_2(MACRO_, ##__VA_ARGS__) -#define __CALL_FOR_EACH_4(MACRO_, x, ...) \ - MACRO_(x) __CALL_FOR_EACH_3(MACRO_, ##__VA_ARGS__) +#define _XE_DROP_FIRST(x_, ...) __VA_ARGS__ -#define _CALL_FOR_EACH(NARGS_, MACRO_, x, ...) \ - CONCATENATE(__CALL_FOR_EACH_, NARGS_)(MACRO_, x, ##__VA_ARGS__) -#define CALL_FOR_EACH(MACRO_, x, ...) \ - _CALL_FOR_EACH(COUNT_ARGS(x, ##__VA_ARGS__), MACRO_, x, ##__VA_ARGS__) +#define _XE_RTP_CONCAT(a, b) __XE_RTP_CONCAT(a, b) +#define __XE_RTP_CONCAT(a, b) XE_RTP_ ## a ## b -#define _XE_RTP_REG(x_) (x_), XE_RTP_REG_REGULAR -#define _XE_RTP_MCR_REG(x_) (x_), XE_RTP_REG_MCR +#define __XE_RTP_PASTE_SEP_COMMA , +#define __XE_RTP_PASTE_SEP_BITWISE_OR | /* - * Helper macros for concatenating prefix - do not use them directly outside - * this header + * XE_RTP_PASTE_FOREACH - Paste XE_RTP_<@prefix_> on each element of the tuple + * @args, with the end result separated by @sep_. @sep must be one of the + * previously declared macros __XE_RTP_PASTE_SEP_*, or declared with such + * prefix. + * + * Examples: + * + * 1) XE_RTP_PASTE_FOREACH(TEST_, COMMA, (FOO, BAR)) + * expands to: + * + * XE_RTP_TEST_FOO , XE_RTP_TEST_BAR + * + * 2) XE_RTP_PASTE_FOREACH(TEST2_, COMMA, (FOO)) + * expands to: + * + * XE_RTP_TEST2_FOO + * + * 3) XE_RTP_PASTE_FOREACH(TEST3, BITWISE_OR, (FOO, BAR)) + * expands to: + * + * XE_RTP_TEST3_FOO | XE_RTP_TEST3_BAR + * + * 4) #define __XE_RTP_PASTE_SEP_MY_SEP BANANA + * XE_RTP_PASTE_FOREACH(TEST_, MY_SEP, (FOO, BAR)) + * expands to: + * + * XE_RTP_TEST_FOO BANANA XE_RTP_TEST_BAR */ -#define __ADD_XE_RTP_ENTRY_FLAG_PREFIX(x) CONCATENATE(XE_RTP_ENTRY_FLAG_, x) | -#define __ADD_XE_RTP_ACTION_FLAG_PREFIX(x) CONCATENATE(XE_RTP_ACTION_FLAG_, x) | -#define __ADD_XE_RTP_RULE_PREFIX(x) CONCATENATE(XE_RTP_RULE_, x) , -#define __ADD_XE_RTP_ACTION_PREFIX(x) CONCATENATE(XE_RTP_ACTION_, x) , +#define XE_RTP_PASTE_FOREACH(prefix_, sep_, args_) _XE_ESC(_XE_RTP_CONCAT(PASTE_,_XE_COUNT_ARGS args_)(prefix_, sep_, args_)) +#define XE_RTP_PASTE_1(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) +#define XE_RTP_PASTE_2(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_1(prefix_, sep_, _XE_TUPLE_TAIL args_) +#define XE_RTP_PASTE_3(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_2(prefix_, sep_, _XE_TUPLE_TAIL args_) +#define XE_RTP_PASTE_4(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_3(prefix_, sep_, _XE_TUPLE_TAIL args_) + + +/* + * XE_RTP_DROP_CAST - Drop cast to convert a compound statement to a initializer + * + * Example: + * + * #define foo(a_) ((struct foo){ .a = a_ }) + * XE_RTP_DROP_CAST(foo(10)) + * expands to: + * + * { .a = 10 } + */ +#define XE_RTP_DROP_CAST(...) _XE_ESC(_XE_DROP_FIRST _XE_ESC __VA_ARGS__) + /* * Macros to encode rules to match against platform, IP version, stepping, etc. * Shouldn't be used directly - see XE_RTP_RULES() */ - #define _XE_RTP_RULE_PLATFORM(plat__) \ { .match_type = XE_RTP_MATCH_PLATFORM, .platform = plat__ } @@ -197,7 +231,6 @@ struct xe_reg_sr; * XE_RTP_ACTION_WR - Helper to write a value to the register, overriding all * the bits * @reg_: Register - * @reg_type_: Register type - automatically expanded by XE_REG * @val_: Value to set * @...: Additional fields to override in the struct xe_rtp_action entry * @@ -205,15 +238,14 @@ struct xe_reg_sr; * * REGNAME = VALUE */ -#define XE_RTP_ACTION_WR(reg_, reg_type_, val_, ...) \ - { .reg = (reg_), .reg_type = (reg_type_), \ +#define XE_RTP_ACTION_WR(reg_, val_, ...) \ + { .reg = XE_RTP_DROP_CAST(reg_), \ .clr_bits = ~0u, .set_bits = (val_), \ .read_mask = (~0u), ##__VA_ARGS__ } /** * XE_RTP_ACTION_SET - Set bits from @val_ in the register. * @reg_: Register - * @reg_type_: Register type - automatically expanded by XE_REG * @val_: Bits to set in the register * @...: Additional fields to override in the struct xe_rtp_action entry * @@ -224,15 +256,14 @@ struct xe_reg_sr; * REGNAME[2] = 1 * REGNAME[5] = 1 */ -#define XE_RTP_ACTION_SET(reg_, reg_type_, val_, ...) \ - { .reg = (reg_), .reg_type = (reg_type_), \ - .clr_bits = (val_), .set_bits = (val_), \ - .read_mask = (val_), ##__VA_ARGS__ } +#define XE_RTP_ACTION_SET(reg_, val_, ...) \ + { .reg = XE_RTP_DROP_CAST(reg_), \ + .clr_bits = val_, .set_bits = val_, \ + .read_mask = val_, ##__VA_ARGS__ } /** * XE_RTP_ACTION_CLR: Clear bits from @val_ in the register. * @reg_: Register - * @reg_type_: Register type - automatically expanded by XE_REG * @val_: Bits to clear in the register * @...: Additional fields to override in the struct xe_rtp_action entry * @@ -243,15 +274,14 @@ struct xe_reg_sr; * REGNAME[2] = 0 * REGNAME[5] = 0 */ -#define XE_RTP_ACTION_CLR(reg_, reg_type_, val_, ...) \ - { .reg = (reg_), .reg_type = (reg_type_), \ - .clr_bits = (val_), .set_bits = 0, \ - .read_mask = (val_), ##__VA_ARGS__ } +#define XE_RTP_ACTION_CLR(reg_, val_, ...) \ + { .reg = XE_RTP_DROP_CAST(reg_), \ + .clr_bits = val_, .set_bits = 0, \ + .read_mask = val_, ##__VA_ARGS__ } /** * XE_RTP_ACTION_FIELD_SET: Set a bit range * @reg_: Register - * @reg_type_: Register type - automatically expanded by XE_REG * @mask_bits_: Mask of bits to be changed in the register, forming a field * @val_: Value to set in the field denoted by @mask_bits_ * @...: Additional fields to override in the struct xe_rtp_action entry @@ -261,29 +291,29 @@ struct xe_reg_sr; * * REGNAME[:] = VALUE */ -#define XE_RTP_ACTION_FIELD_SET(reg_, reg_type_, mask_bits_, val_, ...) \ - { .reg = (reg_), .reg_type = (reg_type_), \ - .clr_bits = (mask_bits_), .set_bits = (val_), \ - .read_mask = (mask_bits_), ##__VA_ARGS__ } +#define XE_RTP_ACTION_FIELD_SET(reg_, mask_bits_, val_, ...) \ + { .reg = XE_RTP_DROP_CAST(reg_), \ + .clr_bits = mask_bits_, .set_bits = val_, \ + .read_mask = mask_bits_, ##__VA_ARGS__ } -#define XE_RTP_ACTION_FIELD_SET_NO_READ_MASK(reg_, reg_type_, mask_bits_, val_, ...) \ - { .reg = (reg_), .reg_type = (reg_type_), \ +#define XE_RTP_ACTION_FIELD_SET_NO_READ_MASK(reg_, mask_bits_, val_, ...) \ + { .reg = XE_RTP_DROP_CAST(reg_), \ .clr_bits = (mask_bits_), .set_bits = (val_), \ .read_mask = 0, ##__VA_ARGS__ } /** * XE_RTP_ACTION_WHITELIST - Add register to userspace whitelist * @reg_: Register - * @reg_type_: Register type - automatically expanded by XE_REG * @val_: Whitelist-specific flags to set * @...: Additional fields to override in the struct xe_rtp_action entry * * Add a register to the whitelist, allowing userspace to modify the ster with * regular user privileges. */ -#define XE_RTP_ACTION_WHITELIST(reg_, reg_type_, val_, ...) \ +#define XE_RTP_ACTION_WHITELIST(reg_, val_, ...) \ /* TODO fail build if ((flags) & ~(RING_FORCE_TO_NONPRIV_MASK_VALID)) */\ - { .reg = (reg_), .reg_type = (reg_type_), .set_bits = (val_), \ + { .reg = XE_RTP_DROP_CAST(reg_), \ + .set_bits = val_, \ .clr_bits = RING_FORCE_TO_NONPRIV_MASK_VALID, \ ##__VA_ARGS__ } @@ -297,11 +327,10 @@ struct xe_reg_sr; /** * XE_RTP_ENTRY_FLAG - Helper to add multiple flags to a struct xe_rtp_entry - * @f1_: Last part of a ``XE_RTP_ENTRY_FLAG_*`` - * @...: Additional flags, defined like @f1_ + * @...: Entry flags, without the ``XE_RTP_ENTRY_FLAG_`` prefix * - * Helper to automatically add a ``XE_RTP_ENTRY_FLAG_`` prefix to @f1_ so it can - * be easily used to define struct xe_rtp_action entries. Example: + * Helper to automatically add a ``XE_RTP_ENTRY_FLAG_`` prefix to the flags + * when defining struct xe_rtp_entry entries. Example: * * .. code-block:: c * @@ -315,16 +344,15 @@ struct xe_reg_sr; * ... * }; */ -#define XE_RTP_ENTRY_FLAG(f1_, ...) \ - .flags = (CALL_FOR_EACH(__ADD_XE_RTP_ENTRY_FLAG_PREFIX, f1_, ##__VA_ARGS__) 0) +#define XE_RTP_ENTRY_FLAG(...) \ + .flags = (XE_RTP_PASTE_FOREACH(ENTRY_FLAG_, BITWISE_OR, (__VA_ARGS__))) /** * XE_RTP_ACTION_FLAG - Helper to add multiple flags to a struct xe_rtp_action - * @f1_: Last part of a ``XE_RTP_ENTRY_*`` - * @...: Additional flags, defined like @f1_ + * @...: Action flags, without the ``XE_RTP_ACTION_FLAG_`` prefix * - * Helper to automatically add a ``XE_RTP_ACTION_FLAG_`` prefix to @f1_ so it - * can be easily used to define struct xe_rtp_action entries. Example: + * Helper to automatically add a ``XE_RTP_ACTION_FLAG_`` prefix to the flags + * when defining struct xe_rtp_action entries. Example: * * .. code-block:: c * @@ -338,13 +366,12 @@ struct xe_reg_sr; * ... * }; */ -#define XE_RTP_ACTION_FLAG(f1_, ...) \ - .flags = (CALL_FOR_EACH(__ADD_XE_RTP_ACTION_FLAG_PREFIX, f1_, ##__VA_ARGS__) 0) +#define XE_RTP_ACTION_FLAG(...) \ + .flags = (XE_RTP_PASTE_FOREACH(ACTION_FLAG_, BITWISE_OR, (__VA_ARGS__))) /** * XE_RTP_RULES - Helper to set multiple rules to a struct xe_rtp_entry entry - * @r1: Last part of XE_RTP_MATCH_* - * @...: Additional rules, defined like @r1 + * @...: Rules * * At least one rule is needed and up to 4 are supported. Multiple rules are * AND'ed together, i.e. all the rules must evaluate to true for the entry to @@ -361,16 +388,15 @@ struct xe_reg_sr; * ... * }; */ -#define XE_RTP_RULES(r1, ...) \ - .n_rules = COUNT_ARGS(r1, ##__VA_ARGS__), \ +#define XE_RTP_RULES(...) \ + .n_rules = _XE_COUNT_ARGS(__VA_ARGS__), \ .rules = (const struct xe_rtp_rule[]) { \ - CALL_FOR_EACH(__ADD_XE_RTP_RULE_PREFIX, r1, ##__VA_ARGS__) \ + XE_RTP_PASTE_FOREACH(RULE_, COMMA, (__VA_ARGS__)) \ } /** * XE_RTP_ACTIONS - Helper to set multiple actions to a struct xe_rtp_entry - * @a1: Action to take. Last part of XE_RTP_ACTION_* - * @...: Additional rules, defined like @r1 + * @...: Actions to be taken * * At least one rule is needed and up to 4 are supported. Multiple rules are * AND'ed together, i.e. all the rules must evaluate to true for the entry to @@ -388,10 +414,10 @@ struct xe_reg_sr; * ... * }; */ -#define XE_RTP_ACTIONS(a1, ...) \ - .n_actions = COUNT_ARGS(a1, ##__VA_ARGS__), \ +#define XE_RTP_ACTIONS(...) \ + .n_actions = _XE_COUNT_ARGS(__VA_ARGS__), \ .actions = (const struct xe_rtp_action[]) { \ - CALL_FOR_EACH(__ADD_XE_RTP_ACTION_PREFIX, a1, ##__VA_ARGS__) \ + XE_RTP_PASTE_FOREACH(ACTION_, COMMA, (__VA_ARGS__)) \ } void xe_rtp_process(const struct xe_rtp_entry *entries, struct xe_reg_sr *sr, diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h index e87f1b280d96..12df8a9e9c45 100644 --- a/drivers/gpu/drm/xe/xe_rtp_types.h +++ b/drivers/gpu/drm/xe/xe_rtp_types.h @@ -8,14 +8,11 @@ #include +#include "regs/xe_reg_defs.h" + struct xe_hw_engine; struct xe_gt; -enum { - XE_RTP_REG_REGULAR, - XE_RTP_REG_MCR, -}; - /** * struct xe_rtp_action - action to take for any matching rule * @@ -24,20 +21,17 @@ enum { */ struct xe_rtp_action { /** @reg: Register */ - u32 reg; + struct xe_reg reg; /** @clr_bits: bits to clear when updating register */ - u32 clr_bits; + u32 clr_bits; /** @set_bits: bits to set when updating register */ - u32 set_bits; + u32 set_bits; #define XE_RTP_NOCHECK .read_mask = 0 /** @read_mask: mask for bits to consider when reading value back */ - u32 read_mask; -#define XE_RTP_ACTION_FLAG_MASKED_REG BIT(0) -#define XE_RTP_ACTION_FLAG_ENGINE_BASE BIT(1) + u32 read_mask; +#define XE_RTP_ACTION_FLAG_ENGINE_BASE BIT(0) /** @flags: flags to apply on rule evaluation or action */ - u8 flags; - /** @reg_type: register type, see ``XE_RTP_REG_*`` */ - u8 reg_type; + u8 flags; }; enum { diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index f6eefa951175..5fc6a408429b 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -12,10 +12,8 @@ #include "xe_platform_types.h" #include "xe_rtp.h" -#undef XE_REG #undef XE_REG_MCR -#define XE_REG(x, ...) _XE_RTP_REG(x) -#define XE_REG_MCR(x, ...) _XE_RTP_MCR_REG(x) +#define XE_REG_MCR(...) XE_REG(__VA_ARGS__, .mcr = 1) static const struct xe_rtp_entry gt_tunings[] = { { XE_RTP_NAME("Tuning: Blend Fill Caching Optimization Disable"), @@ -54,8 +52,7 @@ static const struct xe_rtp_entry lrc_tunings[] = { }, { XE_RTP_NAME("Tuning: TBIMR fast clip"), XE_RTP_RULES(PLATFORM(DG2)), - XE_RTP_ACTIONS(SET(CHICKEN_RASTER_2, TBIMR_FAST_CLIP, - XE_RTP_ACTION_FLAG(MASKED_REG))) + XE_RTP_ACTIONS(SET(CHICKEN_RASTER_2, TBIMR_FAST_CLIP)) }, {} }; diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index ed3fa51ccd24..b0bb2f4438f4 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -87,10 +87,8 @@ * a more declarative approach rather than procedural. */ -#undef XE_REG #undef XE_REG_MCR -#define XE_REG(x, ...) _XE_RTP_REG(x) -#define XE_REG_MCR(x, ...) _XE_RTP_MCR_REG(x) +#define XE_REG_MCR(...) XE_REG(__VA_ARGS__, .mcr = 1) __diag_push(); __diag_ignore_all("-Woverride-init", "Allow field overrides in table"); @@ -232,8 +230,7 @@ static const struct xe_rtp_entry gt_was[] = { }, { XE_RTP_NAME("16016694945"), XE_RTP_RULES(PLATFORM(PVC)), - XE_RTP_ACTIONS(SET(XEHPC_LNCFMISCCFGREG0, XEHPC_OVRLSCCC, - XE_RTP_ACTION_FLAG(MASKED_REG))) + XE_RTP_ACTIONS(SET(XEHPC_LNCFMISCCFGREG0, XEHPC_OVRLSCCC)) }, {} }; @@ -248,36 +245,30 @@ static const struct xe_rtp_entry engine_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(1200), ENGINE_CLASS(RENDER), IS_INTEGRATED), - XE_RTP_ACTIONS(SET(ROW_CHICKEN2, PUSH_CONST_DEREF_HOLD_DIS, - XE_RTP_ACTION_FLAG(MASKED_REG))) + XE_RTP_ACTIONS(SET(ROW_CHICKEN2, PUSH_CONST_DEREF_HOLD_DIS)) }, { XE_RTP_NAME("14010229206, 1409085225"), XE_RTP_RULES(GRAPHICS_VERSION(1200), ENGINE_CLASS(RENDER), IS_INTEGRATED), - XE_RTP_ACTIONS(SET(ROW_CHICKEN4, DISABLE_TDL_PUSH, - XE_RTP_ACTION_FLAG(MASKED_REG))) + XE_RTP_ACTIONS(SET(ROW_CHICKEN4, DISABLE_TDL_PUSH)) }, { XE_RTP_NAME("1606931601"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(ROW_CHICKEN2, DISABLE_EARLY_READ, - XE_RTP_ACTION_FLAG(MASKED_REG))) + XE_RTP_ACTIONS(SET(ROW_CHICKEN2, DISABLE_EARLY_READ)) }, { XE_RTP_NAME("14010826681, 1606700617, 22010271021, 18019627453"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1255), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE, - XE_RTP_ACTION_FLAG(MASKED_REG))) + XE_RTP_ACTIONS(SET(CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE)) }, { XE_RTP_NAME("1406941453"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(SAMPLER_MODE, ENABLE_SMALLPL, - XE_RTP_ACTION_FLAG(MASKED_REG))) + XE_RTP_ACTIONS(SET(SAMPLER_MODE, ENABLE_SMALLPL)) }, { XE_RTP_NAME("FtrPerCtxtPreemptionGranularityControl"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1250), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN1, - FFSC_PERCTX_PREEMPT_CTRL, - XE_RTP_ACTION_FLAG(MASKED_REG))) + FFSC_PERCTX_PREEMPT_CTRL)) }, /* TGL */ @@ -286,8 +277,7 @@ static const struct xe_rtp_entry engine_was[] = { XE_RTP_RULES(PLATFORM(TIGERLAKE), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(RING_PSMI_CTL(RENDER_RING_BASE), WAIT_FOR_EVENT_POWER_DOWN_DISABLE | - RC_SEMA_IDLE_MSG_DISABLE, - XE_RTP_ACTION_FLAG(MASKED_REG))) + RC_SEMA_IDLE_MSG_DISABLE)) }, /* RKL */ @@ -296,8 +286,7 @@ static const struct xe_rtp_entry engine_was[] = { XE_RTP_RULES(PLATFORM(ROCKETLAKE), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(RING_PSMI_CTL(RENDER_RING_BASE), WAIT_FOR_EVENT_POWER_DOWN_DISABLE | - RC_SEMA_IDLE_MSG_DISABLE, - XE_RTP_ACTION_FLAG(MASKED_REG))) + RC_SEMA_IDLE_MSG_DISABLE)) }, /* ADL-P */ @@ -306,8 +295,7 @@ static const struct xe_rtp_entry engine_was[] = { XE_RTP_RULES(PLATFORM(ALDERLAKE_P), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(RING_PSMI_CTL(RENDER_RING_BASE), WAIT_FOR_EVENT_POWER_DOWN_DISABLE | - RC_SEMA_IDLE_MSG_DISABLE, - XE_RTP_ACTION_FLAG(MASKED_REG))) + RC_SEMA_IDLE_MSG_DISABLE)) }, /* DG2 */ @@ -324,8 +312,7 @@ static const struct xe_rtp_entry engine_was[] = { { XE_RTP_NAME("18017747507"), XE_RTP_RULES(PLATFORM(DG2), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(VFG_PREEMPTION_CHICKEN, - POLYGON_TRIFAN_LINELOOP_DISABLE, - XE_RTP_ACTION_FLAG(MASKED_REG))) + POLYGON_TRIFAN_LINELOOP_DISABLE)) }, { XE_RTP_NAME("22012826095, 22013059131"), XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(B0, C0), @@ -366,15 +353,13 @@ static const struct xe_rtp_entry engine_was[] = { { XE_RTP_NAME("14015227452"), XE_RTP_RULES(PLATFORM(DG2), FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE, - XE_RTP_ACTION_FLAG(MASKED_REG))) + XE_RTP_ACTIONS(SET(ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE)) }, { XE_RTP_NAME("16015675438"), XE_RTP_RULES(PLATFORM(DG2), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN2, - PERF_FIX_BALANCING_CFE_DISABLE, - XE_RTP_ACTION_FLAG(MASKED_REG))) + PERF_FIX_BALANCING_CFE_DISABLE)) }, { XE_RTP_NAME("16011620976, 22015475538"), XE_RTP_RULES(PLATFORM(DG2), @@ -385,7 +370,6 @@ static const struct xe_rtp_entry engine_was[] = { XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, C0), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(CACHE_MODE_SS, ENABLE_PREFETCH_INTO_IC, - XE_RTP_ACTION_FLAG(MASKED_REG), /* * Register can't be read back for verification on * DG2 due to Wa_14012342262 @@ -396,7 +380,6 @@ static const struct xe_rtp_entry engine_was[] = { XE_RTP_RULES(SUBPLATFORM(DG2, G11), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(CACHE_MODE_SS, ENABLE_PREFETCH_INTO_IC, - XE_RTP_ACTION_FLAG(MASKED_REG), /* * Register can't be read back for verification on * DG2 due to Wa_14012342262 @@ -405,55 +388,46 @@ static const struct xe_rtp_entry engine_was[] = { }, { XE_RTP_NAME("1509727124"), XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(SAMPLER_MODE, SC_DISABLE_POWER_OPTIMIZATION_EBB, - XE_RTP_ACTION_FLAG(MASKED_REG))) + XE_RTP_ACTIONS(SET(SAMPLER_MODE, SC_DISABLE_POWER_OPTIMIZATION_EBB)) }, { XE_RTP_NAME("22012856258"), XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(ROW_CHICKEN2, DISABLE_READ_SUPPRESSION, - XE_RTP_ACTION_FLAG(MASKED_REG))) + XE_RTP_ACTIONS(SET(ROW_CHICKEN2, DISABLE_READ_SUPPRESSION)) }, { XE_RTP_NAME("14013392000"), XE_RTP_RULES(SUBPLATFORM(DG2, G11), STEP(A0, B0), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(ROW_CHICKEN2, ENABLE_LARGE_GRF_MODE, - XE_RTP_ACTION_FLAG(MASKED_REG))) + XE_RTP_ACTIONS(SET(ROW_CHICKEN2, ENABLE_LARGE_GRF_MODE)) }, { XE_RTP_NAME("14012419201"), XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(ROW_CHICKEN4, - DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX, - XE_RTP_ACTION_FLAG(MASKED_REG))) + DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX)) }, { XE_RTP_NAME("14012419201"), XE_RTP_RULES(SUBPLATFORM(DG2, G11), STEP(A0, B0), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(ROW_CHICKEN4, - DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX, - XE_RTP_ACTION_FLAG(MASKED_REG))) + DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX)) }, { XE_RTP_NAME("1308578152"), XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(B0, C0), ENGINE_CLASS(RENDER), FUNC(xe_rtp_match_first_gslice_fused_off)), XE_RTP_ACTIONS(CLR(CS_DEBUG_MODE1, - REPLAY_MODE_GRANULARITY, - XE_RTP_ACTION_FLAG(MASKED_REG))) + REPLAY_MODE_GRANULARITY)) }, { XE_RTP_NAME("22010960976, 14013347512"), XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(CLR(XEHP_HDC_CHICKEN0, - LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK, - XE_RTP_ACTION_FLAG(MASKED_REG))) + LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK)) }, { XE_RTP_NAME("1608949956, 14010198302"), XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(ROW_CHICKEN, - MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE, - XE_RTP_ACTION_FLAG(MASKED_REG))) + MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE)) }, { XE_RTP_NAME("22010430635"), XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(ROW_CHICKEN4, - DISABLE_GRF_CLEAR, - XE_RTP_ACTION_FLAG(MASKED_REG))) + DISABLE_GRF_CLEAR)) }, { XE_RTP_NAME("14013202645"), XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(B0, C0), ENGINE_CLASS(RENDER)), @@ -466,21 +440,18 @@ static const struct xe_rtp_entry engine_was[] = { { XE_RTP_NAME("22012532006"), XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, C0), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, - DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA, - XE_RTP_ACTION_FLAG(MASKED_REG))) + DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA)) }, { XE_RTP_NAME("22012532006"), XE_RTP_RULES(SUBPLATFORM(DG2, G11), STEP(A0, B0), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, - DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA, - XE_RTP_ACTION_FLAG(MASKED_REG))) + DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA)) }, { XE_RTP_NAME("22014600077"), XE_RTP_RULES(SUBPLATFORM(DG2, G11), STEP(B0, FOREVER), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(CACHE_MODE_SS, ENABLE_EU_COUNT_FOR_TDL_FLUSH, - XE_RTP_ACTION_FLAG(MASKED_REG), /* * Wa_14012342262 write-only reg, so skip * verification @@ -491,7 +462,6 @@ static const struct xe_rtp_entry engine_was[] = { XE_RTP_RULES(SUBPLATFORM(DG2, G10), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(CACHE_MODE_SS, ENABLE_EU_COUNT_FOR_TDL_FLUSH, - XE_RTP_ACTION_FLAG(MASKED_REG), /* * Wa_14012342262 write-only reg, so skip * verification @@ -507,18 +477,15 @@ static const struct xe_rtp_entry engine_was[] = { }, { XE_RTP_NAME("14015227452"), XE_RTP_RULES(PLATFORM(PVC), FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE, - XE_RTP_ACTION_FLAG(MASKED_REG))) + XE_RTP_ACTIONS(SET(ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE)) }, { XE_RTP_NAME("16015675438"), XE_RTP_RULES(PLATFORM(PVC), FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN2, PERF_FIX_BALANCING_CFE_DISABLE, - XE_RTP_ACTION_FLAG(MASKED_REG))) + XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN2, PERF_FIX_BALANCING_CFE_DISABLE)) }, { XE_RTP_NAME("14014999345"), XE_RTP_RULES(PLATFORM(PVC), ENGINE_CLASS(COMPUTE), STEP(B0, C0)), - XE_RTP_ACTIONS(SET(CACHE_MODE_SS, DISABLE_ECC, - XE_RTP_ACTION_FLAG(MASKED_REG))) + XE_RTP_ACTIONS(SET(CACHE_MODE_SS, DISABLE_ECC)) }, {} }; @@ -527,25 +494,21 @@ static const struct xe_rtp_entry lrc_was[] = { { XE_RTP_NAME("1409342910, 14010698770, 14010443199, 1408979724, 1409178076, 1409207793, 1409217633, 1409252684, 1409347922, 1409142259"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)), XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN3, - DISABLE_CPS_AWARE_COLOR_PIPE, - XE_RTP_ACTION_FLAG(MASKED_REG))) + DISABLE_CPS_AWARE_COLOR_PIPE)) }, { XE_RTP_NAME("WaDisableGPGPUMidThreadPreemption"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)), XE_RTP_ACTIONS(FIELD_SET(CS_CHICKEN1, PREEMPT_GPGPU_LEVEL_MASK, - PREEMPT_GPGPU_THREAD_GROUP_LEVEL, - XE_RTP_ACTION_FLAG(MASKED_REG))) + PREEMPT_GPGPU_THREAD_GROUP_LEVEL)) }, { XE_RTP_NAME("1806527549"), XE_RTP_RULES(GRAPHICS_VERSION(1200)), - XE_RTP_ACTIONS(SET(HIZ_CHICKEN, HZ_DEPTH_TEST_LE_GE_OPT_DISABLE, - XE_RTP_ACTION_FLAG(MASKED_REG))) + XE_RTP_ACTIONS(SET(HIZ_CHICKEN, HZ_DEPTH_TEST_LE_GE_OPT_DISABLE)) }, { XE_RTP_NAME("1606376872"), XE_RTP_RULES(GRAPHICS_VERSION(1200)), - XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, DISABLE_TDC_LOAD_BALANCING_CALC, - XE_RTP_ACTION_FLAG(MASKED_REG))) + XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, DISABLE_TDC_LOAD_BALANCING_CALC)) }, /* DG1 */ @@ -553,65 +516,54 @@ static const struct xe_rtp_entry lrc_was[] = { { XE_RTP_NAME("1409044764"), XE_RTP_RULES(PLATFORM(DG1)), XE_RTP_ACTIONS(CLR(COMMON_SLICE_CHICKEN3, - DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN, - XE_RTP_ACTION_FLAG(MASKED_REG))) + DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN)) }, { XE_RTP_NAME("22010493298"), XE_RTP_RULES(PLATFORM(DG1)), XE_RTP_ACTIONS(SET(HIZ_CHICKEN, - DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE, - XE_RTP_ACTION_FLAG(MASKED_REG))) + DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE)) }, /* DG2 */ { XE_RTP_NAME("16011186671"), XE_RTP_RULES(SUBPLATFORM(DG2, G11), STEP(A0, B0)), - XE_RTP_ACTIONS(CLR(VFLSKPD, DIS_MULT_MISS_RD_SQUASH, - .flags = XE_RTP_ACTION_FLAG_MASKED_REG), - SET(VFLSKPD, DIS_OVER_FETCH_CACHE, - .flags = XE_RTP_ACTION_FLAG_MASKED_REG)) + XE_RTP_ACTIONS(CLR(VFLSKPD, DIS_MULT_MISS_RD_SQUASH), + SET(VFLSKPD, DIS_OVER_FETCH_CACHE)) }, { XE_RTP_NAME("14010469329"), XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), XE_RTP_ACTIONS(SET(XEHP_COMMON_SLICE_CHICKEN3, - XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE, - XE_RTP_ACTION_FLAG(MASKED_REG))) + XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE)) }, { XE_RTP_NAME("14010698770, 22010613112, 22010465075"), XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), XE_RTP_ACTIONS(SET(XEHP_COMMON_SLICE_CHICKEN3, - DISABLE_CPS_AWARE_COLOR_PIPE, - XE_RTP_ACTION_FLAG(MASKED_REG))) + DISABLE_CPS_AWARE_COLOR_PIPE)) }, { XE_RTP_NAME("16013271637"), XE_RTP_RULES(PLATFORM(DG2)), XE_RTP_ACTIONS(SET(XEHP_SLICE_COMMON_ECO_CHICKEN1, - MSC_MSAA_REODER_BUF_BYPASS_DISABLE, - XE_RTP_ACTION_FLAG(MASKED_REG))) + MSC_MSAA_REODER_BUF_BYPASS_DISABLE)) }, { XE_RTP_NAME("14014947963"), XE_RTP_RULES(PLATFORM(DG2)), XE_RTP_ACTIONS(FIELD_SET(VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, - 0x4000, - XE_RTP_ACTION_FLAG(MASKED_REG))) + 0x4000)) }, { XE_RTP_NAME("18018764978"), XE_RTP_RULES(PLATFORM(DG2)), XE_RTP_ACTIONS(SET(XEHP_PSS_MODE2, - SCOREBOARD_STALL_FLUSH_CONTROL, - XE_RTP_ACTION_FLAG(MASKED_REG))) + SCOREBOARD_STALL_FLUSH_CONTROL)) }, { XE_RTP_NAME("15010599737"), XE_RTP_RULES(PLATFORM(DG2)), - XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN, - XE_RTP_ACTION_FLAG(MASKED_REG))) + XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN)) }, { XE_RTP_NAME("18019271663"), XE_RTP_RULES(PLATFORM(DG2)), - XE_RTP_ACTIONS(SET(CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE, - XE_RTP_ACTION_FLAG(MASKED_REG))) + XE_RTP_ACTIONS(SET(CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE)) }, {} }; -- cgit v1.2.3 From 9a56502fe1815f0032eea07ce3584acf17173ce1 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 27 Apr 2023 15:32:56 -0700 Subject: drm/xe: Move helper macros to separate header The macros to handle the RTP tables are very scary, but shouldn't be used outside of the header adding the infra. Move it to a separate header and make sure it's only included when it can be. Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230427223256.1432787-11-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Makefile | 6 ++- drivers/gpu/drm/xe/xe_rtp.h | 75 +++------------------------------- drivers/gpu/drm/xe/xe_rtp_helpers.h | 81 +++++++++++++++++++++++++++++++++++++ 3 files changed, 90 insertions(+), 72 deletions(-) create mode 100644 drivers/gpu/drm/xe/xe_rtp_helpers.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 42459727e67a..71c604ecff53 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -103,10 +103,12 @@ xe-y += xe_bb.o \ obj-$(CONFIG_DRM_XE) += xe.o obj-$(CONFIG_DRM_XE_KUNIT_TEST) += tests/ -\ + # header test +hdrtest_find_args := -not -path xe_rtp_helpers.h + always-$(CONFIG_DRM_XE_WERROR) += \ - $(patsubst %.h,%.hdrtest, $(shell cd $(srctree)/$(src) && find * -name '*.h')) + $(patsubst %.h,%.hdrtest, $(shell cd $(srctree)/$(src) && find * -name '*.h' $(hdrtest_find_args))) quiet_cmd_hdrtest = HDRTEST $(patsubst %.hdrtest,%.h,$@) cmd_hdrtest = $(CC) -DHDRTEST $(filter-out $(CFLAGS_GCOV), $(c_flags)) -S -o /dev/null -x c /dev/null -include $<; touch $@ diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index afbf5a2674f4..8a89ad45589a 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -9,8 +9,13 @@ #include #include +#define _XE_RTP_INCLUDE_PRIVATE_HELPERS + +#include "xe_rtp_helpers.h" #include "xe_rtp_types.h" +#undef _XE_RTP_INCLUDE_PRIVATE_HELPERS + /* * Register table poke infrastructure */ @@ -19,76 +24,6 @@ struct xe_hw_engine; struct xe_gt; struct xe_reg_sr; -/* - * Helper macros - not to be used outside this header. - */ -#define _XE_ESC(...) __VA_ARGS__ -#define _XE_COUNT_ARGS(...) _XE_ESC(__XE_COUNT_ARGS(__VA_ARGS__,5,4,3,2,1,)) -#define __XE_COUNT_ARGS(_,_5,_4,_3,_2,X_,...) X_ - -#define _XE_FIRST(...) _XE_ESC(__XE_FIRST(__VA_ARGS__,)) -#define __XE_FIRST(x_,...) x_ -#define _XE_TUPLE_TAIL(...) _XE_ESC(__XE_TUPLE_TAIL(__VA_ARGS__)) -#define __XE_TUPLE_TAIL(x_,...) (__VA_ARGS__) - -#define _XE_DROP_FIRST(x_, ...) __VA_ARGS__ - -#define _XE_RTP_CONCAT(a, b) __XE_RTP_CONCAT(a, b) -#define __XE_RTP_CONCAT(a, b) XE_RTP_ ## a ## b - -#define __XE_RTP_PASTE_SEP_COMMA , -#define __XE_RTP_PASTE_SEP_BITWISE_OR | - -/* - * XE_RTP_PASTE_FOREACH - Paste XE_RTP_<@prefix_> on each element of the tuple - * @args, with the end result separated by @sep_. @sep must be one of the - * previously declared macros __XE_RTP_PASTE_SEP_*, or declared with such - * prefix. - * - * Examples: - * - * 1) XE_RTP_PASTE_FOREACH(TEST_, COMMA, (FOO, BAR)) - * expands to: - * - * XE_RTP_TEST_FOO , XE_RTP_TEST_BAR - * - * 2) XE_RTP_PASTE_FOREACH(TEST2_, COMMA, (FOO)) - * expands to: - * - * XE_RTP_TEST2_FOO - * - * 3) XE_RTP_PASTE_FOREACH(TEST3, BITWISE_OR, (FOO, BAR)) - * expands to: - * - * XE_RTP_TEST3_FOO | XE_RTP_TEST3_BAR - * - * 4) #define __XE_RTP_PASTE_SEP_MY_SEP BANANA - * XE_RTP_PASTE_FOREACH(TEST_, MY_SEP, (FOO, BAR)) - * expands to: - * - * XE_RTP_TEST_FOO BANANA XE_RTP_TEST_BAR - */ -#define XE_RTP_PASTE_FOREACH(prefix_, sep_, args_) _XE_ESC(_XE_RTP_CONCAT(PASTE_,_XE_COUNT_ARGS args_)(prefix_, sep_, args_)) -#define XE_RTP_PASTE_1(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) -#define XE_RTP_PASTE_2(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_1(prefix_, sep_, _XE_TUPLE_TAIL args_) -#define XE_RTP_PASTE_3(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_2(prefix_, sep_, _XE_TUPLE_TAIL args_) -#define XE_RTP_PASTE_4(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_3(prefix_, sep_, _XE_TUPLE_TAIL args_) - - -/* - * XE_RTP_DROP_CAST - Drop cast to convert a compound statement to a initializer - * - * Example: - * - * #define foo(a_) ((struct foo){ .a = a_ }) - * XE_RTP_DROP_CAST(foo(10)) - * expands to: - * - * { .a = 10 } - */ -#define XE_RTP_DROP_CAST(...) _XE_ESC(_XE_DROP_FIRST _XE_ESC __VA_ARGS__) - - /* * Macros to encode rules to match against platform, IP version, stepping, etc. * Shouldn't be used directly - see XE_RTP_RULES() diff --git a/drivers/gpu/drm/xe/xe_rtp_helpers.h b/drivers/gpu/drm/xe/xe_rtp_helpers.h new file mode 100644 index 000000000000..1beea434d52d --- /dev/null +++ b/drivers/gpu/drm/xe/xe_rtp_helpers.h @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_RTP_HELPERS_ +#define _XE_RTP_HELPERS_ + +#ifndef _XE_RTP_INCLUDE_PRIVATE_HELPERS +#error "This header is supposed to be included by xe_rtp.h only" +#endif + +/* + * Helper macros - not to be used outside this header. + */ +#define _XE_ESC(...) __VA_ARGS__ +#define _XE_COUNT_ARGS(...) _XE_ESC(__XE_COUNT_ARGS(__VA_ARGS__,5,4,3,2,1,)) +#define __XE_COUNT_ARGS(_,_5,_4,_3,_2,X_,...) X_ + +#define _XE_FIRST(...) _XE_ESC(__XE_FIRST(__VA_ARGS__,)) +#define __XE_FIRST(x_,...) x_ +#define _XE_TUPLE_TAIL(...) _XE_ESC(__XE_TUPLE_TAIL(__VA_ARGS__)) +#define __XE_TUPLE_TAIL(x_,...) (__VA_ARGS__) + +#define _XE_DROP_FIRST(x_, ...) __VA_ARGS__ + +#define _XE_RTP_CONCAT(a, b) __XE_RTP_CONCAT(a, b) +#define __XE_RTP_CONCAT(a, b) XE_RTP_ ## a ## b + +#define __XE_RTP_PASTE_SEP_COMMA , +#define __XE_RTP_PASTE_SEP_BITWISE_OR | + +/* + * XE_RTP_PASTE_FOREACH - Paste XE_RTP_<@prefix_> on each element of the tuple + * @args, with the end result separated by @sep_. @sep must be one of the + * previously declared macros __XE_RTP_PASTE_SEP_*, or declared with such + * prefix. + * + * Examples: + * + * 1) XE_RTP_PASTE_FOREACH(TEST_, COMMA, (FOO, BAR)) + * expands to: + * + * XE_RTP_TEST_FOO , XE_RTP_TEST_BAR + * + * 2) XE_RTP_PASTE_FOREACH(TEST2_, COMMA, (FOO)) + * expands to: + * + * XE_RTP_TEST2_FOO + * + * 3) XE_RTP_PASTE_FOREACH(TEST3, BITWISE_OR, (FOO, BAR)) + * expands to: + * + * XE_RTP_TEST3_FOO | XE_RTP_TEST3_BAR + * + * 4) #define __XE_RTP_PASTE_SEP_MY_SEP BANANA + * XE_RTP_PASTE_FOREACH(TEST_, MY_SEP, (FOO, BAR)) + * expands to: + * + * XE_RTP_TEST_FOO BANANA XE_RTP_TEST_BAR + */ +#define XE_RTP_PASTE_FOREACH(prefix_, sep_, args_) _XE_ESC(_XE_RTP_CONCAT(PASTE_,_XE_COUNT_ARGS args_)(prefix_, sep_, args_)) +#define XE_RTP_PASTE_1(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) +#define XE_RTP_PASTE_2(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_1(prefix_, sep_, _XE_TUPLE_TAIL args_) +#define XE_RTP_PASTE_3(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_2(prefix_, sep_, _XE_TUPLE_TAIL args_) +#define XE_RTP_PASTE_4(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_3(prefix_, sep_, _XE_TUPLE_TAIL args_) + +/* + * XE_RTP_DROP_CAST - Drop cast to convert a compound statement to a initializer + * + * Example: + * + * #define foo(a_) ((struct foo){ .a = a_ }) + * XE_RTP_DROP_CAST(foo(10)) + * expands to: + * + * { .a = 10 } + */ +#define XE_RTP_DROP_CAST(...) _XE_ESC(_XE_DROP_FIRST _XE_ESC __VA_ARGS__) + +#endif -- cgit v1.2.3 From ad799e4ace0dd8b81ff698dc92d6f1419fc49d4f Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 27 Apr 2023 11:44:09 -0700 Subject: drm/xe: Fix media detection for pre-GMD_ID platforms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reading the GMD_ID register on platforms before that register became available is not reliable. The assumption was that since the register was not allocated, it would return 0. But on PVC for example it returns garbage (or a very specific number), triggering the following error: xe 0000:8c:00.0: [drm] *ERROR* Hardware reports unknown media version 1025.55 Fix it by stop relying on the value returned by that registers on platforms before GMD_ID. Instead this relies on the graphics description struct being already pre-set on the device: this can only ever be true for platforms before the GMD_ID support. In that case, GMD_ID is skipped and the hardcoded values are used. This should also help on early bring-up in case the GMD_ID returns something not expected and we need to temporarily hardcode values. With this, PVC doesn't trigger the error and goes straight to: xe 0000:8c:00.0: [drm:xe_display_info_init [xe]] No display IP, skipping xe 0000:8c:00.0: [drm:xe_pci_probe [xe]] XE_PVC 0bd5:002f dgfx:1 gfx:Xe_HPC (12.60) media:none (0.00) dma_m_s:52 tc:2 xe 0000:8c:00.0: [drm:xe_pci_probe [xe]] Stepping = (G:C0, M:**, D:**, B:B3) Fixes: 5822bba943ad ("drm/xe: Select graphics/media descriptors from GMD_ID") Reviewed-by: José Roberto de Souza Link: https://lore.kernel.org/r/20230427184408.1340988-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pci.c | 113 +++++++++++++++++++++----------------------- 1 file changed, 55 insertions(+), 58 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index c1f2f63548d3..473ee8df2db2 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -38,7 +38,9 @@ struct xe_gt_desc { }; struct xe_device_desc { + /* Should only ever be set for platforms without GMD_ID */ const struct xe_graphics_desc *graphics; + /* Should only ever be set for platforms without GMD_ID */ const struct xe_media_desc *media; const char *platform_name; @@ -402,6 +404,30 @@ static u32 peek_gmdid(struct xe_device *xe, u32 gmdid_offset) REG_FIELD_GET(GMD_ID_RELEASE_MASK, ver); } +/* + * Pre-GMD_ID platform: device descriptor already points to the appropriate + * graphics descriptor. Simply forward the description and calculate the version + * appropriately. "graphics" should be present in all such platforms, while + * media is optional. + */ +static void handle_pre_gmdid(struct xe_device *xe, + const struct xe_device_desc *desc, + const struct xe_graphics_desc **graphics, + const struct xe_media_desc **media) +{ + *graphics = desc->graphics; + xe->info.graphics_verx100 = (*graphics)->ver * 100 + (*graphics)->rel; + + *media = desc->media; + if (*media) + xe->info.media_verx100 = (*media)->ver * 100 + (*media)->rel; + +} + +/* + * GMD_ID platform: read IP version from hardware and select graphics descriptor + * based on the result. + */ static void handle_gmdid(struct xe_device *xe, const struct xe_device_desc *desc, const struct xe_graphics_desc **graphics, @@ -409,69 +435,35 @@ static void handle_gmdid(struct xe_device *xe, { u32 ver; - if (desc->graphics) { - /* - * Pre-GMD_ID platform; device descriptor already points to - * the appropriate graphics descriptor. - */ - *graphics = desc->graphics; - xe->info.graphics_verx100 = (*graphics)->ver * 100 + (*graphics)->rel; - } else { - /* - * GMD_ID platform; read IP version from hardware and select - * graphics descriptor based on the result. - */ - ver = peek_gmdid(xe, GMD_ID.reg); - for (int i = 0; i < ARRAY_SIZE(graphics_ip_map); i++) { - if (ver == graphics_ip_map[i].ver) { - xe->info.graphics_verx100 = ver; - *graphics = graphics_ip_map[i].ip; - - break; - } - } + ver = peek_gmdid(xe, GMD_ID.reg); + for (int i = 0; i < ARRAY_SIZE(graphics_ip_map); i++) { + if (ver == graphics_ip_map[i].ver) { + xe->info.graphics_verx100 = ver; + *graphics = graphics_ip_map[i].ip; - if (!xe->info.graphics_verx100) { - drm_err(&xe->drm, "Hardware reports unknown graphics version %u.%02u\n", - ver / 100, ver % 100); + break; } } - if (desc->media) { - /* - * Pre-GMD_ID platform; device descriptor already points to - * the appropriate media descriptor. - */ - *media = desc->media; - xe->info.media_verx100 = (*media)->ver * 100 + (*media)->rel; - } else { - /* - * GMD_ID platform; read IP version from hardware and select - * media descriptor based on the result. - * - * desc->media can also be NULL for a pre-GMD_ID platform that - * simply doesn't have media (e.g., PVC); in that case the - * attempt to read GMD_ID will return 0 (since there's no - * register at that location). - */ - ver = peek_gmdid(xe, GMD_ID.reg + 0x380000); - if (ver == 0) - return; - - for (int i = 0; i < ARRAY_SIZE(media_ip_map); i++) { - if (ver == media_ip_map[i].ver) { - xe->info.media_verx100 = ver; - *media = media_ip_map[i].ip; - - break; - } - } + if (!xe->info.graphics_verx100) { + drm_err(&xe->drm, "Hardware reports unknown graphics version %u.%02u\n", + ver / 100, ver % 100); + } + + ver = peek_gmdid(xe, GMD_ID.reg + 0x380000); + for (int i = 0; i < ARRAY_SIZE(media_ip_map); i++) { + if (ver == media_ip_map[i].ver) { + xe->info.media_verx100 = ver; + *media = media_ip_map[i].ip; - if (!xe->info.media_verx100) { - drm_err(&xe->drm, "Hardware reports unknown media version %u.%02u\n", - ver / 100, ver % 100); + break; } } + + if (!xe->info.media_verx100) { + drm_err(&xe->drm, "Hardware reports unknown media version %u.%02u\n", + ver / 100, ver % 100); + } } @@ -486,9 +478,14 @@ static int xe_info_init(struct xe_device *xe, /* * If this platform supports GMD_ID, we'll detect the proper IP - * descriptor to use from hardware registers. + * descriptor to use from hardware registers. desc->graphics will only + * ever be set at this point for platforms before GMD_ID. In that case + * the IP descriptions and versions are simply derived from that. */ - handle_gmdid(xe, desc, &graphics_desc, &media_desc); + if (desc->graphics) + handle_pre_gmdid(xe, desc, &graphics_desc, &media_desc); + else + handle_gmdid(xe, desc, &graphics_desc, &media_desc); /* * If we couldn't detect the graphics IP, that's considered a fatal -- cgit v1.2.3 From 4c69e4b4c60a855e6726034e68d0f23029c19301 Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Tue, 25 Apr 2023 12:26:24 -0700 Subject: drm/xe: Enable Raptorlake-P MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Raptorlake-P was tested and it is working as the same as Alderlake-P. Reviewed-by: Rodrigo Vivi Signed-off-by: José Roberto de Souza Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pci.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 473ee8df2db2..13a5ce18ee05 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -312,6 +312,7 @@ static const struct pci_device_id pciidlist[] = { XE_RKL_IDS(INTEL_VGA_DEVICE, &rkl_desc), XE_ADLS_IDS(INTEL_VGA_DEVICE, &adl_s_desc), XE_ADLP_IDS(INTEL_VGA_DEVICE, &adl_p_desc), + XE_RPLP_IDS(INTEL_VGA_DEVICE, &adl_p_desc), XE_DG1_IDS(INTEL_VGA_DEVICE, &dg1_desc), XE_ATS_M_IDS(INTEL_VGA_DEVICE, &ats_m_desc), XE_DG2_IDS(INTEL_VGA_DEVICE, &dg2_desc), -- cgit v1.2.3 From 9bc252522dbb0e6c34e9e0e26a599fa28555d907 Mon Sep 17 00:00:00 2001 From: Gustavo Sousa Date: Wed, 3 May 2023 14:49:22 -0300 Subject: drm/xe: Include only relevant header in xe_module.h Things defined in are not really used by that header. Replace that with , to have bool and u32 available. Signed-off-by: Gustavo Sousa Reviewed-by: Matthew Brost Signed-off-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230503174922.252111-1-gustavo.sousa@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_module.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h index 86916c176382..7169907c3365 100644 --- a/drivers/gpu/drm/xe/xe_module.h +++ b/drivers/gpu/drm/xe/xe_module.h @@ -3,7 +3,7 @@ * Copyright © 2023 Intel Corporation */ -#include +#include /* Module modprobe variables */ extern bool enable_guc; -- cgit v1.2.3 From e3ec5e75911b04b5e9ce67907024d7c5d9a6cb99 Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Fri, 14 Apr 2023 15:08:33 -0700 Subject: drm/xe: Set default MOCS value for cs instructions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CS instructions that dont have a explicit MOCS field will use this default MOCS value. To do this, it was necessary to initialize part of the mocs earlier and add new function that loads another array of rtp entries set during run-time. This is still missing to handle of mocs read for platforms with HAS_L3_CCS_READ(aka PVC). v2: - move to xe_hw_engine.c - remove CMD_CCTL auxiliary macros v3: - rebased Bspec: 45826 Cc: Matt Roper Signed-off-by: José Roberto de Souza Reviewed-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_engine_regs.h | 12 +++++++++++ drivers/gpu/drm/xe/xe_gt.c | 2 ++ drivers/gpu/drm/xe/xe_hw_engine.c | 35 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_mocs.c | 11 ++++++++-- drivers/gpu/drm/xe/xe_mocs.h | 1 + 5 files changed, 59 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index f6b3b99a562a..717d560626ce 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -44,6 +44,18 @@ #define RING_EIR(base) XE_REG((base) + 0xb0) #define RING_EMR(base) XE_REG((base) + 0xb4) #define RING_ESR(base) XE_REG((base) + 0xb8) + +#define RING_CMD_CCTL(base) XE_REG((base) + 0xc4, XE_REG_OPTION_MASKED) +/* + * CMD_CCTL read/write fields take a MOCS value and _not_ a table index. + * The lsb of each can be considered a separate enabling bit for encryption. + * 6:0 == default MOCS value for reads => 6:1 == table index for reads. + * 13:7 == default MOCS value for writes => 13:8 == table index for writes. + * 15:14 == Reserved => 31:30 are set to 0. + */ +#define CMD_CCTL_WRITE_OVERRIDE_MASK REG_GENMASK(13, 8) +#define CMD_CCTL_READ_OVERRIDE_MASK REG_GENMASK(6, 1) + #define RING_BBADDR(base) XE_REG((base) + 0x140) #define RING_BBADDR_UDW(base) XE_REG((base) + 0x168) #define RING_EXECLIST_STATUS_LO(base) XE_REG((base) + 0x234) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 0d4664e344da..603bb3ae3e37 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -390,6 +390,8 @@ static int gt_fw_domain_init(struct xe_gt *gt) /* Rerun MCR init as we now have hw engine list */ xe_gt_mcr_init(gt); + xe_mocs_init_early(gt); + err = xe_hw_engines_init_early(gt); if (err) goto err_force_wake; diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 795302bcd3ae..04ec276cfcf5 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -21,6 +21,7 @@ #include "xe_macros.h" #include "xe_mmio.h" #include "xe_reg_sr.h" +#include "xe_rtp.h" #include "xe_sched_job.h" #include "xe_wa.h" @@ -267,6 +268,39 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe) hw_engine_mmio_read32(hwe, RING_MI_MODE(0).reg); } +static void +hw_engine_setup_default_state(struct xe_hw_engine *hwe) +{ + struct xe_gt *gt = hwe->gt; + const u8 mocs_write_idx = gt->mocs.uc_index; + /* TODO: missing handling of HAS_L3_CCS_READ platforms */ + const u8 mocs_read_idx = gt->mocs.uc_index; + u32 ring_cmd_cctl_val = REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, mocs_write_idx) | + REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, mocs_read_idx); + const struct xe_rtp_entry engine_was[] = { + /* + * RING_CMD_CCTL specifies the default MOCS entry that will be + * used by the command streamer when executing commands that + * don't have a way to explicitly specify a MOCS setting. + * The default should usually reference whichever MOCS entry + * corresponds to uncached behavior, although use of a WB cached + * entry is recommended by the spec in certain circumstances on + * specific platforms. + */ + { XE_RTP_NAME("RING_CMD_CCTL_default_MOCS"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED)), + XE_RTP_ACTIONS(FIELD_SET(RING_CMD_CCTL(0), + CMD_CCTL_WRITE_OVERRIDE_MASK | + CMD_CCTL_READ_OVERRIDE_MASK, + ring_cmd_cctl_val, + XE_RTP_ACTION_FLAG(ENGINE_BASE))) + }, + {} + }; + + xe_rtp_process(engine_was, &hwe->reg_sr, gt, hwe); +} + static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe, enum xe_hw_engine_id id) { @@ -293,6 +327,7 @@ static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe, xe_reg_sr_init(&hwe->reg_sr, hwe->name, gt_to_xe(gt)); xe_wa_process_engine(hwe); + hw_engine_setup_default_state(hwe); xe_reg_sr_init(&hwe->reg_whitelist, hwe->name, gt_to_xe(gt)); xe_reg_whitelist_process_engine(hwe); diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index f2ceecd536ed..0d07811a573f 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -518,6 +518,15 @@ static void init_l3cc_table(struct xe_gt *gt, } } +void xe_mocs_init_early(struct xe_gt *gt) +{ + struct xe_mocs_info table; + + get_mocs_settings(gt->xe, &table); + gt->mocs.uc_index = table.uc_index; + gt->mocs.wb_index = table.wb_index; +} + void xe_mocs_init(struct xe_gt *gt) { struct xe_mocs_info table; @@ -528,8 +537,6 @@ void xe_mocs_init(struct xe_gt *gt) */ flags = get_mocs_settings(gt->xe, &table); mocs_dbg(>->xe->drm, "flag:0x%x\n", flags); - gt->mocs.uc_index = table.uc_index; - gt->mocs.wb_index = table.wb_index; if (flags & HAS_GLOBAL_MOCS) __init_mocs_table(gt, &table, GLOBAL_MOCS(0).reg); diff --git a/drivers/gpu/drm/xe/xe_mocs.h b/drivers/gpu/drm/xe/xe_mocs.h index 63500a1d6660..25f7b35a76da 100644 --- a/drivers/gpu/drm/xe/xe_mocs.h +++ b/drivers/gpu/drm/xe/xe_mocs.h @@ -11,6 +11,7 @@ struct xe_engine; struct xe_gt; +void xe_mocs_init_early(struct xe_gt *gt); void xe_mocs_init(struct xe_gt *gt); /** -- cgit v1.2.3 From bb95a4f9f5c2e9b0a43590958ba1430519592909 Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Fri, 14 Apr 2023 20:50:33 -0700 Subject: drm/xe: Set default MOCS value for copy cs instructions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit copy cs instructions that dont have a explict MOCS field will use this default MOCS value. v2: - move to xe_hw_engine.c - remove BLIT_CCTL auxiliary macros - removed MASKED_REG v3: - rebased v4: - process workaround in hwe->reg_lrc v5: - add a new function and call it from xe_gt_record_default_lrcs() because hwe->reg_lrc is initialized later BSpec: 45807 Cc: Matt Roper Signed-off-by: José Roberto de Souza Reviewed-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_engine_regs.h | 6 ++++++ drivers/gpu/drm/xe/xe_gt.c | 1 + drivers/gpu/drm/xe/xe_hw_engine.c | 29 +++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_hw_engine.h | 1 + 4 files changed, 37 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index 717d560626ce..79873bf64e8d 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -58,6 +58,12 @@ #define RING_BBADDR(base) XE_REG((base) + 0x140) #define RING_BBADDR_UDW(base) XE_REG((base) + 0x168) + +/* Handling MOCS value in BLIT_CCTL like it was done CMD_CCTL */ +#define BLIT_CCTL(base) XE_REG((base) + 0x204) +#define BLIT_CCTL_DST_MOCS_MASK REG_GENMASK(14, 9) +#define BLIT_CCTL_SRC_MOCS_MASK REG_GENMASK(6, 1) + #define RING_EXECLIST_STATUS_LO(base) XE_REG((base) + 0x234) #define RING_EXECLIST_STATUS_HI(base) XE_REG((base) + 0x234 + 4) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 603bb3ae3e37..3afca3dd9657 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -225,6 +225,7 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt) xe_reg_sr_init(&hwe->reg_lrc, hwe->name, xe); xe_wa_process_lrc(hwe); + xe_hw_engine_setup_default_lrc_state(hwe); xe_tuning_process_lrc(hwe); default_lrc = drmm_kzalloc(&xe->drm, diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 04ec276cfcf5..a9adac0624f6 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -268,6 +268,35 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe) hw_engine_mmio_read32(hwe, RING_MI_MODE(0).reg); } +void +xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe) +{ + struct xe_gt *gt = hwe->gt; + const u8 mocs_write_idx = gt->mocs.uc_index; + const u8 mocs_read_idx = gt->mocs.uc_index; + u32 blit_cctl_val = REG_FIELD_PREP(BLIT_CCTL_DST_MOCS_MASK, mocs_write_idx) | + REG_FIELD_PREP(BLIT_CCTL_SRC_MOCS_MASK, mocs_read_idx); + const struct xe_rtp_entry lrc_was[] = { + /* + * Some blitter commands do not have a field for MOCS, those + * commands will use MOCS index pointed by BLIT_CCTL. + * BLIT_CCTL registers are needed to be programmed to un-cached. + */ + { XE_RTP_NAME("BLIT_CCTL_default_MOCS"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED), + ENGINE_CLASS(COPY)), + XE_RTP_ACTIONS(FIELD_SET(BLIT_CCTL(0), + BLIT_CCTL_DST_MOCS_MASK | + BLIT_CCTL_SRC_MOCS_MASK, + blit_cctl_val, + XE_RTP_ACTION_FLAG(ENGINE_BASE))) + }, + {} + }; + + xe_rtp_process(lrc_was, &hwe->reg_lrc, gt, hwe); +} + static void hw_engine_setup_default_state(struct xe_hw_engine *hwe) { diff --git a/drivers/gpu/drm/xe/xe_hw_engine.h b/drivers/gpu/drm/xe/xe_hw_engine.h index ceab65397256..013efcd6d8c5 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.h +++ b/drivers/gpu/drm/xe/xe_hw_engine.h @@ -17,6 +17,7 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe); void xe_hw_engine_print_state(struct xe_hw_engine *hwe, struct drm_printer *p); u32 xe_hw_engine_mask_per_class(struct xe_gt *gt, enum xe_engine_class engine_class); +void xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe); bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe); static inline bool xe_hw_engine_is_valid(struct xe_hw_engine *hwe) -- cgit v1.2.3 From 116d32515214910d8a34538dbd09ef26a878d5ae Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Thu, 4 May 2023 22:29:43 +0200 Subject: drm/xe: Fix splat during error dump Allow xe_bo_addr without lock to print debug information, such as from xe_analyze_vm. Signed-off-by: Francois Dugast Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 19 +++++++++++++++---- drivers/gpu/drm/xe/xe_bo.h | 2 ++ drivers/gpu/drm/xe/xe_vm.c | 2 +- 3 files changed, 18 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 9a565203deac..4693372ec82e 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1545,15 +1545,18 @@ bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo) return false; } -dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, +/* + * Resolve a BO address. There is no assert to check if the proper lock is held + * so it should only be used in cases where it is not fatal to get the wrong + * address, such as printing debug information, but not in cases where memory is + * written based on this result. + */ +dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size, bool *is_vram) { struct xe_res_cursor cur; u64 page; - if (!READ_ONCE(bo->ttm.pin_count)) - xe_bo_assert_held(bo); - XE_BUG_ON(page_size > PAGE_SIZE); page = offset >> PAGE_SHIFT; offset &= (PAGE_SIZE - 1); @@ -1575,6 +1578,14 @@ dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, } } +dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, + size_t page_size, bool *is_vram) +{ + if (!READ_ONCE(bo->ttm.pin_count)) + xe_bo_assert_held(bo); + return __xe_bo_addr(bo, offset, page_size, is_vram); +} + int xe_bo_vmap(struct xe_bo *bo) { void *virtual; diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 8354d05ccdf3..7e111332c35a 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -199,6 +199,8 @@ static inline void xe_bo_unpin_map_no_vm(struct xe_bo *bo) } bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo); +dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset, + size_t page_size, bool *is_vram); dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size, bool *is_vram); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index e634bb96f9cc..06b559ff80bf 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3421,7 +3421,7 @@ int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id) &cur); addr = xe_res_dma(&cur); } else { - addr = xe_bo_addr(vma->bo, 0, XE_PAGE_SIZE, &is_vram); + addr = __xe_bo_addr(vma->bo, 0, XE_PAGE_SIZE, &is_vram); } drm_printf(p, " [%016llx-%016llx] S:0x%016llx A:%016llx %s\n", vma->start, vma->end, vma->end - vma->start + 1ull, -- cgit v1.2.3 From 14dac5a5748cc477f5d8887a45ca32011b9ffea3 Mon Sep 17 00:00:00 2001 From: Christopher Snowhill Date: Mon, 24 Apr 2023 19:19:21 -0700 Subject: drm/xe: Enable the compat ioctl functionality MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is required at the minimum for the DRM UAPI to function from 32-bit userspace with a 64-bit kernel. Signed-off-by: Christopher Snowhill Reviewed-by: Thomas Hellström Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index b13bbdeeef51..2f8777f365a4 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -112,7 +112,7 @@ static const struct file_operations xe_driver_fops = { .mmap = drm_gem_mmap, .poll = drm_poll, .read = drm_read, -// .compat_ioctl = i915_ioc32_compat_ioctl, + .compat_ioctl = drm_compat_ioctl, .llseek = noop_llseek, }; -- cgit v1.2.3 From dbeb2bd25350c7e771547638e266ce16030ba91c Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 4 May 2023 00:32:44 -0700 Subject: drm/xe: Do not mark 1809175790 as a WA Additional programming annotated with Wa_ should be reserved to those that have a official workaround. Just pointing to a bug or additional reference can be done with something else. Copy what i915 does and refer to it as "hsdes: ....". Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20230504073250.1436293-2-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ring_ops.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index 4c5f46f89241..c1b738e033c7 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -199,7 +199,8 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, bool decode = job->engine->class == XE_ENGINE_CLASS_VIDEO_DECODE; dw[i++] = preparser_disable(true); - /* Wa_1809175790 */ + + /* hsdes: 1809175790 */ if (!xe->info.has_flat_ccs) { if (decode) i = emit_aux_table_inv(gt, VD0_AUX_INV.reg, dw, i); @@ -244,9 +245,11 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, else if (job->engine->class == XE_ENGINE_CLASS_COMPUTE) mask_flags = PIPE_CONTROL_3D_ENGINE_FLAGS; i = emit_pipe_invalidate(mask_flags, dw, i); - /* Wa_1809175790 */ + + /* hsdes: 1809175790 */ if (!xe->info.has_flat_ccs) i = emit_aux_table_inv(gt, CCS_AUX_INV.reg, dw, i); + dw[i++] = preparser_disable(false); i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), -- cgit v1.2.3 From 215bb2ce605bb182939e4dee445b6d95e0d1b843 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 4 May 2023 00:32:45 -0700 Subject: drm/xe: Fix comment on Wa_22013088509 On i915 the "see comment about Wa_22013088509" referred to the comment in the graphics version >= 11 branch, where there were more details about it. From the platforms supported by xe, only PVC needs Wa_22013088509, but as the comment says, it's simpler to do it for all platforms as there is no downside. Bring the missing comment over from i915 and reword it to fit xe better. Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20230504073250.1436293-3-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_mcr.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index 55b240a5eaa7..02afb313bfea 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -461,9 +461,16 @@ static u32 rw_with_mcr_steering(struct xe_gt *gt, struct xe_reg_mcr reg_mcr, } /* - * Always leave the hardware in multicast mode when doing reads - * (see comment about Wa_22013088509 below) and only change it - * to unicast mode when doing writes of a specific instance. + * Always leave the hardware in multicast mode when doing reads and only + * change it to unicast mode when doing writes of a specific instance. + * + * The setting of the multicast/unicast bit usually wouldn't matter for + * read operations (which always return the value from a single register + * instance regardless of how that bit is set), but some platforms may + * have workarounds requiring us to remain in multicast mode for reads, + * e.g. Wa_22013088509 on PVC. There's no real downside to this, so + * we'll just go ahead and do so on all platforms; we'll only clear the + * multicast bit from the mask when explicitly doing a write operation. * * No need to save old steering reg value. */ -- cgit v1.2.3 From a31153fcb1dc2baaf13e520f71f332d4eae28b52 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 4 May 2023 00:32:46 -0700 Subject: drm/xe/guc: Remove special handling for PVC A* The rest of the driver doesn't really support PVC before B0 stepping. Drop the special handling in xe_guc.c. Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20230504073250.1436293-4-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 89d20faced19..62b4fcf84acf 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -143,8 +143,7 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc) /* Wa_16011759253 */ /* Wa_22011383443 */ - if (IS_SUBPLATFORM_STEP(xe, XE_DG2, XE_SUBPLATFORM_DG2_G10, STEP_A0, STEP_B0) || - IS_PLATFORM_STEP(xe, XE_PVC, STEP_A0, STEP_B0)) + if (IS_SUBPLATFORM_STEP(xe, XE_DG2, XE_SUBPLATFORM_DG2_G10, STEP_A0, STEP_B0)) flags |= GUC_WA_GAM_CREDITS; /* Wa_14014475959 */ @@ -164,11 +163,8 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc) /* * Wa_2201180203 - * GUC_WA_PRE_PARSER causes media workload hang for PVC A0 and PCIe - * errors. Disable this for PVC A0 steppings. */ - if (GRAPHICS_VER(xe) <= 12 && - !IS_PLATFORM_STEP(xe, XE_PVC, STEP_A0, STEP_B0)) + if (GRAPHICS_VER(xe) <= 12) flags |= GUC_WA_PRE_PARSER; /* Wa_16011777198 */ @@ -180,9 +176,6 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc) /* * Wa_22012727170 * Wa_22012727685 - * - * This WA is applicable to PVC CT A0, but causes media regressions. - * Drop the WA for PVC. */ if (IS_SUBPLATFORM_STEP(xe, XE_DG2, XE_SUBPLATFORM_DG2_G10, STEP_A0, STEP_C0) || IS_SUBPLATFORM_STEP(xe, XE_DG2, XE_SUBPLATFORM_DG2_G11, STEP_A0, @@ -194,10 +187,9 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc) flags |= GUC_WA_RCS_REGS_IN_CCS_REGS_LIST; /* Wa_1509372804 */ - if (IS_PLATFORM_STEP(xe, XE_PVC, STEP_A0, STEP_C0)) + if (IS_PLATFORM_STEP(xe, XE_PVC, STEP_B0, STEP_C0)) flags |= GUC_WA_RENDER_RST_RC6_EXIT; - return flags; } -- cgit v1.2.3 From 98ce59e9ba5cd513bd57e0f4558a33833e07f7e8 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 28 Apr 2023 23:23:27 -0700 Subject: drm/xe/guc: Handle RCU_MODE as masked from definition guc_mmio_regset_write() had a flags for the registers to be added to the GuC's regset list. The only register actually using that was RCU_MODE, but it was setting the flags to a bogus value. From struct xe_guc_fwif.h, #define GUC_REGSET_MASKED BIT(0) #define GUC_REGSET_MASKED_WITH_VALUE BIT(2) #define GUC_REGSET_RESTORE_ONLY BIT(3) Cross checking with i915, the only flag to set in RCU_MODE is GUC_REGSET_MASKED. That can be done automatically from the register, as long as the definition is correct. Add the XE_REG_OPTION_MASKED annotation to RCU_MODE and kill the "flags" field in guc_mmio_regset_write(): guc_mmio_regset_write_one() can decide that based on the register being passed. Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20230429062332.354139-3-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 2 +- drivers/gpu/drm/xe/xe_guc_ads.c | 31 ++++++++++++------------------- 2 files changed, 13 insertions(+), 20 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 8dd3bf2f6377..4a38f78277b5 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -325,7 +325,7 @@ #define SARB_CHICKEN1 XE_REG_MCR(0xe90c) #define COMP_CKN_IN REG_GENMASK(30, 29) -#define RCU_MODE XE_REG(0x14800) +#define RCU_MODE XE_REG(0x14800, XE_REG_OPTION_MASKED) #define RCU_MODE_CCS_ENABLE REG_BIT(0) #define FORCEWAKE_ACK_GT XE_REG(0x130044) diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 676137dcb510..84c2d7c624c6 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -422,12 +422,12 @@ static void guc_capture_list_init(struct xe_guc_ads *ads) static void guc_mmio_regset_write_one(struct xe_guc_ads *ads, struct iosys_map *regset_map, - u32 reg, u32 flags, + struct xe_reg reg, unsigned int n_entry) { struct guc_mmio_reg entry = { - .offset = reg, - .flags = flags, + .offset = reg.reg, + .flags = reg.masked ? GUC_REGSET_MASKED : 0, /* TODO: steering */ }; @@ -446,40 +446,33 @@ static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads, unsigned long idx; unsigned count = 0; const struct { - u32 reg; - u32 flags; + struct xe_reg reg; bool skip; } *e, extra_regs[] = { - { .reg = RING_MODE(hwe->mmio_base).reg, }, - { .reg = RING_HWS_PGA(hwe->mmio_base).reg, }, - { .reg = RING_IMR(hwe->mmio_base).reg, }, - { .reg = RCU_MODE.reg, .flags = 0x3, - .skip = hwe != hwe_rcs_reset_domain }, + { .reg = RING_MODE(hwe->mmio_base), }, + { .reg = RING_HWS_PGA(hwe->mmio_base), }, + { .reg = RING_IMR(hwe->mmio_base), }, + { .reg = RCU_MODE, .skip = hwe != hwe_rcs_reset_domain }, }; u32 i; BUILD_BUG_ON(ARRAY_SIZE(extra_regs) > ADS_REGSET_EXTRA_MAX); - xa_for_each(&hwe->reg_sr.xa, idx, entry) { - u32 flags = entry->reg.masked ? GUC_REGSET_MASKED : 0; - - guc_mmio_regset_write_one(ads, regset_map, idx, flags, count++); - } + xa_for_each(&hwe->reg_sr.xa, idx, entry) + guc_mmio_regset_write_one(ads, regset_map, entry->reg, count++); for (e = extra_regs; e < extra_regs + ARRAY_SIZE(extra_regs); e++) { if (e->skip) continue; - guc_mmio_regset_write_one(ads, regset_map, - e->reg, e->flags, count++); + guc_mmio_regset_write_one(ads, regset_map, e->reg, count++); } /* Wa_1607983814 */ if (needs_wa_1607983814(xe) && hwe->class == XE_ENGINE_CLASS_RENDER) { for (i = 0; i < LNCFCMOCS_REG_COUNT; i++) { guc_mmio_regset_write_one(ads, regset_map, - LNCFCMOCS(i).reg, 0, - count++); + LNCFCMOCS(i), count++); } } -- cgit v1.2.3 From a56d8dabf134e30ed898128aae6ca830c03b6abb Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Fri, 5 May 2023 14:40:19 +0200 Subject: drm/xe: Do not sleep in atomic Set atomic in xe_mmio_wait32() otherwise we would be scheduling in atomic context. Fixes: 7dc9b92dcfef ("drm/xe: Remove i915_utils dependency from xe_pcode.") Cc: Rodrigo Vivi Signed-off-by: Nirmoy Das Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_mcr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index 02afb313bfea..125c63bdc9b5 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -424,7 +424,7 @@ static void mcr_lock(struct xe_gt *gt) */ if (GRAPHICS_VERx100(xe) >= 1270) ret = xe_mmio_wait32(gt, STEER_SEMAPHORE, 0, 0x1, 10, NULL, - false); + true); drm_WARN_ON_ONCE(&xe->drm, ret == -ETIMEDOUT); } -- cgit v1.2.3 From a5cecbac92d5a50dd2f70a01dc53e19312f4081f Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Fri, 5 May 2023 15:34:33 +0200 Subject: drm/xe: Print GT info on TLB inv failure Print GT info on TLB inv failure for better debugbility. Signed-off-by: Nirmoy Das Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 604f189dbd70..9e7fe8d9bca4 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -34,8 +34,8 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work) break; trace_xe_gt_tlb_invalidation_fence_timeout(fence); - drm_err(>_to_xe(gt)->drm, "TLB invalidation fence timeout, seqno=%d", - fence->seqno); + drm_err(>_to_xe(gt)->drm, "gt%d: TLB invalidation fence timeout, seqno=%d", + gt->info.id, fence->seqno); list_del(&fence->link); fence->base.error = -ETIME; @@ -285,8 +285,8 @@ int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno) tlb_invalidation_seqno_past(gt, seqno), TLB_TIMEOUT); if (!ret) { - drm_err(&xe->drm, "TLB invalidation time'd out, seqno=%d, recv=%d\n", - seqno, gt->tlb_invalidation.seqno_recv); + drm_err(&xe->drm, "gt%d: TLB invalidation time'd out, seqno=%d, recv=%d\n", + gt->info.id, seqno, gt->tlb_invalidation.seqno_recv); return -ETIME; } -- cgit v1.2.3 From e3e4964d335c73e931ea21c8f318d419d3cdb4cc Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Wed, 12 Apr 2023 14:09:23 +0300 Subject: drm/xe: destroy clients engine and vm xarrays on close xe_file_close cleanups the xarrays but forgets to destroy them causing a memleak in xarray internals. Found with kmemleak. Signed-off-by: Mika Kuoppala Reviewed-by: Christoph Manszewski Reviewed-by: Lucas De Marchi Signed-off-by: Maarten Lankhorst Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 2f8777f365a4..e686c25a0ad1 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -69,6 +69,7 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file) xe_engine_put(e); } mutex_unlock(&xef->engine.lock); + xa_destroy(&xef->engine.xa); mutex_destroy(&xef->engine.lock); device_kill_persistent_engines(xe, xef); @@ -76,6 +77,7 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file) xa_for_each(&xef->vm.xa, idx, vm) xe_vm_close_and_put(vm); mutex_unlock(&xef->vm.lock); + xa_destroy(&xef->vm.xa); mutex_destroy(&xef->vm.lock); kfree(xef); -- cgit v1.2.3 From 9ca14f94d294862d6f5ee30a6b73f295cfaa5d08 Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Mon, 8 May 2023 05:22:23 +0000 Subject: drm/xe: Handle -EDEADLK case in preempt worker MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With multiple active VMs, under memory pressure, it is possible that ttm_bo_validate() run into -EDEADLK in ttm_mem_evict_wait_busy() and return -ENOMEM. Until ttm properly handles locking in such scenarios, best thing the driver can do is unwind the lock and retry. Update preempt worker to retry validating BOs with a timeout upon -ENOMEM. v2: revert retry timeout upon -EAGAIN (Thomas) Reviewed-by: Thomas Hellström Signed-off-by: Niranjana Vishwanathapura Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 06b559ff80bf..d9579bf5002d 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -508,6 +509,8 @@ void xe_vm_unlock_dma_resv(struct xe_vm *vm, kvfree(tv); } +#define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000 + static void preempt_rebind_work_func(struct work_struct *w) { struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work); @@ -519,6 +522,7 @@ static void preempt_rebind_work_func(struct work_struct *w) struct dma_fence *rebind_fence; unsigned int fence_count = 0; LIST_HEAD(preempt_fences); + ktime_t end = 0; int err; long wait; int __maybe_unused tries = 0; @@ -637,6 +641,24 @@ out_unlock_outer: trace_xe_vm_rebind_worker_retry(vm); goto retry; } + + /* + * With multiple active VMs, under memory pressure, it is possible that + * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM. + * Until ttm properly handles locking in such scenarios, best thing the + * driver can do is retry with a timeout. Killing the VM or putting it + * in error state after timeout or other error scenarios is still TBD. + */ + if (err == -ENOMEM) { + ktime_t cur = ktime_get(); + + end = end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS); + if (ktime_before(cur, end)) { + msleep(20); + trace_xe_vm_rebind_worker_retry(vm); + goto retry; + } + } up_write(&vm->lock); free_preempt_fences(&preempt_fences); -- cgit v1.2.3 From 34f89ac8e66cd5121fb05c765acc3c67ddbef7a0 Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Tue, 9 May 2023 05:08:24 +0000 Subject: drm/xe: Handle -EDEADLK case in exec ioctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With multiple active VMs, under memory pressure, it is possible that ttm_bo_validate() run into -EDEADLK in ttm_mem_evict_wait_busy() and return -ENOMEM. Until ttm properly handles locking in such scenarios, best thing the driver can do is unwind the lock and retry. Update xe_exec_begin to retry validating BOs with a timeout upon -ENOMEM. Reviewed-by: Thomas Hellström Signed-off-by: Niranjana Vishwanathapura Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_exec.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index ea869f2452ef..3db1b159586e 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "xe_bo.h" #include "xe_device.h" @@ -91,6 +92,8 @@ * Unlock all */ +#define XE_EXEC_BIND_RETRY_TIMEOUT_MS 1000 + static int xe_exec_begin(struct xe_engine *e, struct ww_acquire_ctx *ww, struct ttm_validate_buffer tv_onstack[], struct ttm_validate_buffer **tv, @@ -99,12 +102,14 @@ static int xe_exec_begin(struct xe_engine *e, struct ww_acquire_ctx *ww, struct xe_vm *vm = e->vm; struct xe_vma *vma; LIST_HEAD(dups); - int err; + ktime_t end = 0; + int err = 0; *tv = NULL; if (xe_vm_no_dma_fences(e->vm)) return 0; +retry: err = xe_vm_lock_dma_resv(vm, ww, tv_onstack, tv, objs, true, 1); if (err) return err; @@ -122,11 +127,27 @@ static int xe_exec_begin(struct xe_engine *e, struct ww_acquire_ctx *ww, if (err) { xe_vm_unlock_dma_resv(vm, tv_onstack, *tv, ww, objs); *tv = NULL; - return err; + break; + } + } + + /* + * With multiple active VMs, under memory pressure, it is possible that + * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM. + * Until ttm properly handles locking in such scenarios, best thing the + * driver can do is retry with a timeout. + */ + if (err == -ENOMEM) { + ktime_t cur = ktime_get(); + + end = end ? : ktime_add_ms(cur, XE_EXEC_BIND_RETRY_TIMEOUT_MS); + if (ktime_before(cur, end)) { + msleep(20); + goto retry; } } - return 0; + return err; } static void xe_exec_end(struct xe_engine *e, -- cgit v1.2.3 From ce8bf5bd059542431230eac216693a579dc09dba Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 8 May 2023 15:53:19 -0700 Subject: drm/xe/mmio: Use struct xe_reg Convert all the callers to deal with xe_mmio_*() using struct xe_reg instead of plain u32. In a few places there was also a rename s/reg/reg_val/ when dealing with the value returned so it doesn't get mixed up with the register address. Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20230508225322.2692066-2-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 2 +- drivers/gpu/drm/xe/xe_execlist.c | 18 ++-- drivers/gpu/drm/xe/xe_force_wake.c | 25 +++--- drivers/gpu/drm/xe/xe_force_wake_types.h | 6 +- drivers/gpu/drm/xe/xe_ggtt.c | 6 +- drivers/gpu/drm/xe/xe_gt.c | 4 +- drivers/gpu/drm/xe/xe_gt_clock.c | 6 +- drivers/gpu/drm/xe/xe_gt_mcr.c | 37 +++++---- drivers/gpu/drm/xe/xe_gt_topology.c | 18 ++-- drivers/gpu/drm/xe/xe_guc.c | 61 +++++++------- drivers/gpu/drm/xe/xe_guc_ads.c | 3 +- drivers/gpu/drm/xe/xe_guc_pc.c | 32 +++---- drivers/gpu/drm/xe/xe_guc_types.h | 3 +- drivers/gpu/drm/xe/xe_huc.c | 4 +- drivers/gpu/drm/xe/xe_hw_engine.c | 85 ++++++++++--------- drivers/gpu/drm/xe/xe_irq.c | 138 +++++++++++++++---------------- drivers/gpu/drm/xe/xe_mmio.c | 31 ++++--- drivers/gpu/drm/xe/xe_mmio.h | 57 +++++++------ drivers/gpu/drm/xe/xe_mocs.c | 8 +- drivers/gpu/drm/xe/xe_pat.c | 14 +++- drivers/gpu/drm/xe/xe_pcode.c | 16 ++-- drivers/gpu/drm/xe/xe_reg_sr.c | 14 ++-- drivers/gpu/drm/xe/xe_ring_ops.c | 11 +-- drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 4 +- drivers/gpu/drm/xe/xe_uc_fw.c | 16 ++-- drivers/gpu/drm/xe/xe_wopcm.c | 12 +-- 26 files changed, 331 insertions(+), 300 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index e686c25a0ad1..8039142ae1a1 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -345,7 +345,7 @@ void xe_device_wmb(struct xe_device *xe) wmb(); if (IS_DGFX(xe)) - xe_mmio_write32(gt, SOFTWARE_FLAGS_SPR33.reg, 0); + xe_mmio_write32(gt, SOFTWARE_FLAGS_SPR33, 0); } u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size) diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index d524ac5c7b57..b0ccc4ff8461 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -60,7 +60,7 @@ static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc, } if (hwe->class == XE_ENGINE_CLASS_COMPUTE) - xe_mmio_write32(hwe->gt, RCU_MODE.reg, + xe_mmio_write32(hwe->gt, RCU_MODE, _MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE)); xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); @@ -78,17 +78,17 @@ static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc, */ wmb(); - xe_mmio_write32(gt, RING_HWS_PGA(hwe->mmio_base).reg, + xe_mmio_write32(gt, RING_HWS_PGA(hwe->mmio_base), xe_bo_ggtt_addr(hwe->hwsp)); - xe_mmio_read32(gt, RING_HWS_PGA(hwe->mmio_base).reg); - xe_mmio_write32(gt, RING_MODE(hwe->mmio_base).reg, + xe_mmio_read32(gt, RING_HWS_PGA(hwe->mmio_base)); + xe_mmio_write32(gt, RING_MODE(hwe->mmio_base), _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE)); - xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS_LO(hwe->mmio_base).reg, + xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS_LO(hwe->mmio_base), lower_32_bits(lrc_desc)); - xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS_HI(hwe->mmio_base).reg, + xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS_HI(hwe->mmio_base), upper_32_bits(lrc_desc)); - xe_mmio_write32(gt, RING_EXECLIST_CONTROL(hwe->mmio_base).reg, + xe_mmio_write32(gt, RING_EXECLIST_CONTROL(hwe->mmio_base), EL_CTRL_LOAD); } @@ -173,8 +173,8 @@ static u64 read_execlist_status(struct xe_hw_engine *hwe) struct xe_gt *gt = hwe->gt; u32 hi, lo; - lo = xe_mmio_read32(gt, RING_EXECLIST_STATUS_LO(hwe->mmio_base).reg); - hi = xe_mmio_read32(gt, RING_EXECLIST_STATUS_HI(hwe->mmio_base).reg); + lo = xe_mmio_read32(gt, RING_EXECLIST_STATUS_LO(hwe->mmio_base)); + hi = xe_mmio_read32(gt, RING_EXECLIST_STATUS_HI(hwe->mmio_base)); printk(KERN_INFO "EXECLIST_STATUS %d:%d = 0x%08x %08x\n", hwe->class, hwe->instance, hi, lo); diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c index 53d73f36a121..363b81c3d746 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.c +++ b/drivers/gpu/drm/xe/xe_force_wake.c @@ -8,6 +8,7 @@ #include #include "regs/xe_gt_regs.h" +#include "regs/xe_reg_defs.h" #include "xe_gt.h" #include "xe_mmio.h" @@ -27,7 +28,7 @@ fw_to_xe(struct xe_force_wake *fw) static void domain_init(struct xe_force_wake_domain *domain, enum xe_force_wake_domain_id id, - u32 reg, u32 ack, u32 val, u32 mask) + struct xe_reg reg, struct xe_reg ack, u32 val, u32 mask) { domain->id = id; domain->reg_ctl = reg; @@ -49,14 +50,14 @@ void xe_force_wake_init_gt(struct xe_gt *gt, struct xe_force_wake *fw) if (xe->info.graphics_verx100 >= 1270) { domain_init(&fw->domains[XE_FW_DOMAIN_ID_GT], XE_FW_DOMAIN_ID_GT, - FORCEWAKE_GT.reg, - FORCEWAKE_ACK_GT_MTL.reg, + FORCEWAKE_GT, + FORCEWAKE_ACK_GT_MTL, BIT(0), BIT(16)); } else { domain_init(&fw->domains[XE_FW_DOMAIN_ID_GT], XE_FW_DOMAIN_ID_GT, - FORCEWAKE_GT.reg, - FORCEWAKE_ACK_GT.reg, + FORCEWAKE_GT, + FORCEWAKE_ACK_GT, BIT(0), BIT(16)); } } @@ -71,8 +72,8 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw) if (!xe_gt_is_media_type(gt)) domain_init(&fw->domains[XE_FW_DOMAIN_ID_RENDER], XE_FW_DOMAIN_ID_RENDER, - FORCEWAKE_RENDER.reg, - FORCEWAKE_ACK_RENDER.reg, + FORCEWAKE_RENDER, + FORCEWAKE_ACK_RENDER, BIT(0), BIT(16)); for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) { @@ -81,8 +82,8 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw) domain_init(&fw->domains[XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j], XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j, - FORCEWAKE_MEDIA_VDBOX(j).reg, - FORCEWAKE_ACK_MEDIA_VDBOX(j).reg, + FORCEWAKE_MEDIA_VDBOX(j), + FORCEWAKE_ACK_MEDIA_VDBOX(j), BIT(0), BIT(16)); } @@ -92,8 +93,8 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw) domain_init(&fw->domains[XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j], XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j, - FORCEWAKE_MEDIA_VEBOX(j).reg, - FORCEWAKE_ACK_MEDIA_VEBOX(j).reg, + FORCEWAKE_MEDIA_VEBOX(j), + FORCEWAKE_ACK_MEDIA_VEBOX(j), BIT(0), BIT(16)); } } @@ -128,7 +129,7 @@ static int domain_sleep_wait(struct xe_gt *gt, for (tmp__ = (mask__); tmp__; tmp__ &= ~BIT(ffs(tmp__) - 1)) \ for_each_if((domain__ = ((fw__)->domains + \ (ffs(tmp__) - 1))) && \ - domain__->reg_ctl) + domain__->reg_ctl.reg) int xe_force_wake_get(struct xe_force_wake *fw, enum xe_force_wake_domains domains) diff --git a/drivers/gpu/drm/xe/xe_force_wake_types.h b/drivers/gpu/drm/xe/xe_force_wake_types.h index 208dd629d7b1..cb782696855b 100644 --- a/drivers/gpu/drm/xe/xe_force_wake_types.h +++ b/drivers/gpu/drm/xe/xe_force_wake_types.h @@ -9,6 +9,8 @@ #include #include +#include "regs/xe_reg_defs.h" + enum xe_force_wake_domain_id { XE_FW_DOMAIN_ID_GT = 0, XE_FW_DOMAIN_ID_RENDER, @@ -56,9 +58,9 @@ struct xe_force_wake_domain { /** @id: domain force wake id */ enum xe_force_wake_domain_id id; /** @reg_ctl: domain wake control register address */ - u32 reg_ctl; + struct xe_reg reg_ctl; /** @reg_ack: domain ack register address */ - u32 reg_ack; + struct xe_reg reg_ack; /** @val: domain wake write value */ u32 val; /** @mask: domain mask */ diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 4e5ad616063d..98903354b436 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -207,12 +207,12 @@ void xe_ggtt_invalidate(struct xe_gt *gt) struct xe_device *xe = gt_to_xe(gt); if (xe->info.platform == XE_PVC) { - xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC1.reg, + xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC1, PVC_GUC_TLB_INV_DESC1_INVALIDATE); - xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC0.reg, + xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC0, PVC_GUC_TLB_INV_DESC0_VALID); } else - xe_mmio_write32(gt, GUC_TLB_INV_CR.reg, + xe_mmio_write32(gt, GUC_TLB_INV_CR, GUC_TLB_INV_CR_INVALIDATE); } } diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 3afca3dd9657..cbe063a40aca 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -544,8 +544,8 @@ static int do_gt_reset(struct xe_gt *gt) struct xe_device *xe = gt_to_xe(gt); int err; - xe_mmio_write32(gt, GDRST.reg, GRDOM_FULL); - err = xe_mmio_wait32(gt, GDRST.reg, 0, GRDOM_FULL, 5000, + xe_mmio_write32(gt, GDRST, GRDOM_FULL); + err = xe_mmio_wait32(gt, GDRST, 0, GRDOM_FULL, 5000, NULL, false); if (err) drm_err(&xe->drm, diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c index 49625d49bdcc..7cf11078ff57 100644 --- a/drivers/gpu/drm/xe/xe_gt_clock.c +++ b/drivers/gpu/drm/xe/xe_gt_clock.c @@ -14,7 +14,7 @@ static u32 read_reference_ts_freq(struct xe_gt *gt) { - u32 ts_override = xe_mmio_read32(gt, TIMESTAMP_OVERRIDE.reg); + u32 ts_override = xe_mmio_read32(gt, TIMESTAMP_OVERRIDE); u32 base_freq, frac_freq; base_freq = REG_FIELD_GET(TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK, @@ -54,7 +54,7 @@ static u32 get_crystal_clock_freq(u32 rpm_config_reg) int xe_gt_clock_init(struct xe_gt *gt) { - u32 ctc_reg = xe_mmio_read32(gt, CTC_MODE.reg); + u32 ctc_reg = xe_mmio_read32(gt, CTC_MODE); u32 freq = 0; /* Assuming gen11+ so assert this assumption is correct */ @@ -63,7 +63,7 @@ int xe_gt_clock_init(struct xe_gt *gt) if (ctc_reg & CTC_SOURCE_DIVIDE_LOGIC) { freq = read_reference_ts_freq(gt); } else { - u32 c0 = xe_mmio_read32(gt, RPM_CONFIG0.reg); + u32 c0 = xe_mmio_read32(gt, RPM_CONFIG0); freq = get_crystal_clock_freq(c0); diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index 125c63bdc9b5..c6b9e9869fee 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -40,6 +40,8 @@ * non-terminated instance. */ +#define STEER_SEMAPHORE XE_REG(0xFD0) + static inline struct xe_reg to_xe_reg(struct xe_reg_mcr reg_mcr) { return reg_mcr.__reg; @@ -183,9 +185,9 @@ static void init_steering_l3bank(struct xe_gt *gt) { if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) { u32 mslice_mask = REG_FIELD_GET(MEML3_EN_MASK, - xe_mmio_read32(gt, MIRROR_FUSE3.reg)); + xe_mmio_read32(gt, MIRROR_FUSE3)); u32 bank_mask = REG_FIELD_GET(GT_L3_EXC_MASK, - xe_mmio_read32(gt, XEHP_FUSE4.reg)); + xe_mmio_read32(gt, XEHP_FUSE4)); /* * Group selects mslice, instance selects bank within mslice. @@ -196,7 +198,7 @@ static void init_steering_l3bank(struct xe_gt *gt) bank_mask & BIT(0) ? 0 : 2; } else if (gt_to_xe(gt)->info.platform == XE_DG2) { u32 mslice_mask = REG_FIELD_GET(MEML3_EN_MASK, - xe_mmio_read32(gt, MIRROR_FUSE3.reg)); + xe_mmio_read32(gt, MIRROR_FUSE3)); u32 bank = __ffs(mslice_mask) * 8; /* @@ -208,7 +210,7 @@ static void init_steering_l3bank(struct xe_gt *gt) gt->steering[L3BANK].instance_target = bank & 0x3; } else { u32 fuse = REG_FIELD_GET(L3BANK_MASK, - ~xe_mmio_read32(gt, MIRROR_FUSE3.reg)); + ~xe_mmio_read32(gt, MIRROR_FUSE3)); gt->steering[L3BANK].group_target = 0; /* unused */ gt->steering[L3BANK].instance_target = __ffs(fuse); @@ -218,7 +220,7 @@ static void init_steering_l3bank(struct xe_gt *gt) static void init_steering_mslice(struct xe_gt *gt) { u32 mask = REG_FIELD_GET(MEML3_EN_MASK, - xe_mmio_read32(gt, MIRROR_FUSE3.reg)); + xe_mmio_read32(gt, MIRROR_FUSE3)); /* * mslice registers are valid (not terminated) if either the meml3 @@ -337,8 +339,8 @@ void xe_gt_mcr_set_implicit_defaults(struct xe_gt *gt) u32 steer_val = REG_FIELD_PREP(MCR_SLICE_MASK, 0) | REG_FIELD_PREP(MCR_SUBSLICE_MASK, 2); - xe_mmio_write32(gt, MCFG_MCR_SELECTOR.reg, steer_val); - xe_mmio_write32(gt, SF_MCR_SELECTOR.reg, steer_val); + xe_mmio_write32(gt, MCFG_MCR_SELECTOR, steer_val); + xe_mmio_write32(gt, SF_MCR_SELECTOR, steer_val); /* * For GAM registers, all reads should be directed to instance 1 * (unicast reads against other instances are not allowed), @@ -376,7 +378,7 @@ static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt, continue; for (int i = 0; gt->steering[type].ranges[i].end > 0; i++) { - if (xe_mmio_in_range(>->steering[type].ranges[i], reg.reg)) { + if (xe_mmio_in_range(>->steering[type].ranges[i], reg)) { *group = gt->steering[type].group_target; *instance = gt->steering[type].instance_target; return true; @@ -387,7 +389,7 @@ static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt, implicit_ranges = gt->steering[IMPLICIT_STEERING].ranges; if (implicit_ranges) for (int i = 0; implicit_ranges[i].end > 0; i++) - if (xe_mmio_in_range(&implicit_ranges[i], reg.reg)) + if (xe_mmio_in_range(&implicit_ranges[i], reg)) return false; /* @@ -403,8 +405,6 @@ static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt, return true; } -#define STEER_SEMAPHORE 0xFD0 - /* * Obtain exclusive access to MCR steering. On MTL and beyond we also need * to synchronize with external clients (e.g., firmware), so a semaphore @@ -446,16 +446,17 @@ static u32 rw_with_mcr_steering(struct xe_gt *gt, struct xe_reg_mcr reg_mcr, u8 rw_flag, int group, int instance, u32 value) { const struct xe_reg reg = to_xe_reg(reg_mcr); - u32 steer_reg, steer_val, val = 0; + struct xe_reg steer_reg; + u32 steer_val, val = 0; lockdep_assert_held(>->mcr_lock); if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) { - steer_reg = MTL_MCR_SELECTOR.reg; + steer_reg = MTL_MCR_SELECTOR; steer_val = REG_FIELD_PREP(MTL_MCR_GROUPID, group) | REG_FIELD_PREP(MTL_MCR_INSTANCEID, instance); } else { - steer_reg = MCR_SELECTOR.reg; + steer_reg = MCR_SELECTOR; steer_val = REG_FIELD_PREP(MCR_SLICE_MASK, group) | REG_FIELD_PREP(MCR_SUBSLICE_MASK, instance); } @@ -480,9 +481,9 @@ static u32 rw_with_mcr_steering(struct xe_gt *gt, struct xe_reg_mcr reg_mcr, xe_mmio_write32(gt, steer_reg, steer_val); if (rw_flag == MCR_OP_READ) - val = xe_mmio_read32(gt, reg.reg); + val = xe_mmio_read32(gt, reg); else - xe_mmio_write32(gt, reg.reg, value); + xe_mmio_write32(gt, reg, value); /* * If we turned off the multicast bit (during a write) we're required @@ -524,7 +525,7 @@ u32 xe_gt_mcr_unicast_read_any(struct xe_gt *gt, struct xe_reg_mcr reg_mcr) group, instance, 0); mcr_unlock(gt); } else { - val = xe_mmio_read32(gt, reg.reg); + val = xe_mmio_read32(gt, reg); } return val; @@ -591,7 +592,7 @@ void xe_gt_mcr_multicast_write(struct xe_gt *gt, struct xe_reg_mcr reg_mcr, * to touch the steering register. */ mcr_lock(gt); - xe_mmio_write32(gt, reg.reg, value); + xe_mmio_write32(gt, reg, value); mcr_unlock(gt); } diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c index 14cf135fd648..7c3e347e4d74 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.c +++ b/drivers/gpu/drm/xe/xe_gt_topology.c @@ -26,7 +26,7 @@ load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...) va_start(argp, numregs); for (i = 0; i < numregs; i++) - fuse_val[i] = xe_mmio_read32(gt, va_arg(argp, u32)); + fuse_val[i] = xe_mmio_read32(gt, va_arg(argp, struct xe_reg)); va_end(argp); bitmap_from_arr32(mask, fuse_val, numregs * 32); @@ -36,7 +36,7 @@ static void load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask) { struct xe_device *xe = gt_to_xe(gt); - u32 reg = xe_mmio_read32(gt, XELP_EU_ENABLE.reg); + u32 reg_val = xe_mmio_read32(gt, XELP_EU_ENABLE); u32 val = 0; int i; @@ -47,15 +47,15 @@ load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask) * of enable). */ if (GRAPHICS_VERx100(xe) < 1250) - reg = ~reg & XELP_EU_MASK; + reg_val = ~reg_val & XELP_EU_MASK; /* On PVC, one bit = one EU */ if (GRAPHICS_VERx100(xe) == 1260) { - val = reg; + val = reg_val; } else { /* All other platforms, one bit = 2 EU */ - for (i = 0; i < fls(reg); i++) - if (reg & BIT(i)) + for (i = 0; i < fls(reg_val); i++) + if (reg_val & BIT(i)) val |= 0x3 << 2 * i; } @@ -95,10 +95,10 @@ xe_gt_topology_init(struct xe_gt *gt) load_dss_mask(gt, gt->fuse_topo.g_dss_mask, num_geometry_regs, - XELP_GT_GEOMETRY_DSS_ENABLE.reg); + XELP_GT_GEOMETRY_DSS_ENABLE); load_dss_mask(gt, gt->fuse_topo.c_dss_mask, num_compute_regs, - XEHP_GT_COMPUTE_DSS_ENABLE.reg, - XEHPC_GT_COMPUTE_DSS_ENABLE_EXT.reg); + XEHP_GT_COMPUTE_DSS_ENABLE, + XEHPC_GT_COMPUTE_DSS_ENABLE_EXT); load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss); xe_gt_topology_dump(gt, &p); diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 62b4fcf84acf..e8a126ad400f 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -232,10 +232,10 @@ static void guc_write_params(struct xe_guc *guc) xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); - xe_mmio_write32(gt, SOFT_SCRATCH(0).reg, 0); + xe_mmio_write32(gt, SOFT_SCRATCH(0), 0); for (i = 0; i < GUC_CTL_MAX_DWORDS; i++) - xe_mmio_write32(gt, SOFT_SCRATCH(1 + i).reg, guc->params[i]); + xe_mmio_write32(gt, SOFT_SCRATCH(1 + i), guc->params[i]); } int xe_guc_init(struct xe_guc *guc) @@ -268,9 +268,9 @@ int xe_guc_init(struct xe_guc *guc) guc_init_params(guc); if (xe_gt_is_media_type(gt)) - guc->notify_reg = MEDIA_GUC_HOST_INTERRUPT.reg; + guc->notify_reg = MEDIA_GUC_HOST_INTERRUPT; else - guc->notify_reg = GUC_HOST_INTERRUPT.reg; + guc->notify_reg = GUC_HOST_INTERRUPT; xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE); @@ -309,9 +309,9 @@ int xe_guc_reset(struct xe_guc *guc) xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); - xe_mmio_write32(gt, GDRST.reg, GRDOM_GUC); + xe_mmio_write32(gt, GDRST, GRDOM_GUC); - ret = xe_mmio_wait32(gt, GDRST.reg, 0, GRDOM_GUC, 5000, + ret = xe_mmio_wait32(gt, GDRST, 0, GRDOM_GUC, 5000, &gdrst, false); if (ret) { drm_err(&xe->drm, "GuC reset timed out, GEN6_GDRST=0x%8x\n", @@ -319,7 +319,7 @@ int xe_guc_reset(struct xe_guc *guc) goto err_out; } - guc_status = xe_mmio_read32(gt, GUC_STATUS.reg); + guc_status = xe_mmio_read32(gt, GUC_STATUS); if (!(guc_status & GS_MIA_IN_RESET)) { drm_err(&xe->drm, "GuC status: 0x%x, MIA core expected to be in reset\n", @@ -352,9 +352,9 @@ static void guc_prepare_xfer(struct xe_guc *guc) shim_flags |= PVC_GUC_MOCS_INDEX(PVC_GUC_MOCS_UC_INDEX); /* Must program this register before loading the ucode with DMA */ - xe_mmio_write32(gt, GUC_SHIM_CONTROL.reg, shim_flags); + xe_mmio_write32(gt, GUC_SHIM_CONTROL, shim_flags); - xe_mmio_write32(gt, GT_PM_CONFIG.reg, GT_DOORBELL_ENABLE); + xe_mmio_write32(gt, GT_PM_CONFIG, GT_DOORBELL_ENABLE); } /* @@ -370,7 +370,7 @@ static int guc_xfer_rsa(struct xe_guc *guc) if (guc->fw.rsa_size > 256) { u32 rsa_ggtt_addr = xe_bo_ggtt_addr(guc->fw.bo) + xe_uc_fw_rsa_offset(&guc->fw); - xe_mmio_write32(gt, UOS_RSA_SCRATCH(0).reg, rsa_ggtt_addr); + xe_mmio_write32(gt, UOS_RSA_SCRATCH(0), rsa_ggtt_addr); return 0; } @@ -379,7 +379,7 @@ static int guc_xfer_rsa(struct xe_guc *guc) return -ENOMEM; for (i = 0; i < UOS_RSA_SCRATCH_COUNT; i++) - xe_mmio_write32(gt, UOS_RSA_SCRATCH(i).reg, rsa[i]); + xe_mmio_write32(gt, UOS_RSA_SCRATCH(i), rsa[i]); return 0; } @@ -407,7 +407,7 @@ static int guc_wait_ucode(struct xe_guc *guc) * 200ms. Even at slowest clock, this should be sufficient. And * in the working case, a larger timeout makes no difference. */ - ret = xe_mmio_wait32(guc_to_gt(guc), GUC_STATUS.reg, + ret = xe_mmio_wait32(guc_to_gt(guc), GUC_STATUS, FIELD_PREP(GS_UKERNEL_MASK, XE_GUC_LOAD_STATUS_READY), GS_UKERNEL_MASK, 200000, &status, false); @@ -435,7 +435,7 @@ static int guc_wait_ucode(struct xe_guc *guc) XE_GUC_LOAD_STATUS_EXCEPTION) { drm_info(drm, "GuC firmware exception. EIP: %#x\n", xe_mmio_read32(guc_to_gt(guc), - SOFT_SCRATCH(13).reg)); + SOFT_SCRATCH(13))); ret = -ENXIO; } @@ -532,10 +532,10 @@ static void guc_handle_mmio_msg(struct xe_guc *guc) xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); - msg = xe_mmio_read32(gt, SOFT_SCRATCH(15).reg); + msg = xe_mmio_read32(gt, SOFT_SCRATCH(15)); msg &= XE_GUC_RECV_MSG_EXCEPTION | XE_GUC_RECV_MSG_CRASH_DUMP_POSTED; - xe_mmio_write32(gt, SOFT_SCRATCH(15).reg, 0); + xe_mmio_write32(gt, SOFT_SCRATCH(15), 0); if (msg & XE_GUC_RECV_MSG_CRASH_DUMP_POSTED) drm_err(&guc_to_xe(guc)->drm, @@ -553,12 +553,12 @@ static void guc_enable_irq(struct xe_guc *guc) REG_FIELD_PREP(ENGINE0_MASK, GUC_INTR_GUC2HOST) : REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST); - xe_mmio_write32(gt, GUC_SG_INTR_ENABLE.reg, + xe_mmio_write32(gt, GUC_SG_INTR_ENABLE, REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST)); if (xe_gt_is_media_type(gt)) - xe_mmio_rmw32(gt, GUC_SG_INTR_MASK.reg, events, 0); + xe_mmio_rmw32(gt, GUC_SG_INTR_MASK, events, 0); else - xe_mmio_write32(gt, GUC_SG_INTR_MASK.reg, ~events); + xe_mmio_write32(gt, GUC_SG_INTR_MASK, ~events); } int xe_guc_enable_communication(struct xe_guc *guc) @@ -567,7 +567,7 @@ int xe_guc_enable_communication(struct xe_guc *guc) guc_enable_irq(guc); - xe_mmio_rmw32(guc_to_gt(guc), PMINTRMSK.reg, + xe_mmio_rmw32(guc_to_gt(guc), PMINTRMSK, ARAT_EXPIRED_INTRMSK, 0); err = xe_guc_ct_enable(&guc->ct); @@ -620,8 +620,8 @@ int xe_guc_mmio_send_recv(struct xe_guc *guc, const u32 *request, struct xe_device *xe = guc_to_xe(guc); struct xe_gt *gt = guc_to_gt(guc); u32 header, reply; - u32 reply_reg = xe_gt_is_media_type(gt) ? - MED_VF_SW_FLAG(0).reg : VF_SW_FLAG(0).reg; + struct xe_reg reply_reg = xe_gt_is_media_type(gt) ? + MED_VF_SW_FLAG(0) : VF_SW_FLAG(0); const u32 LAST_INDEX = VF_SW_FLAG_COUNT; int ret; int i; @@ -641,14 +641,14 @@ retry: /* Not in critical data-path, just do if else for GT type */ if (xe_gt_is_media_type(gt)) { for (i = 0; i < len; ++i) - xe_mmio_write32(gt, MED_VF_SW_FLAG(i).reg, + xe_mmio_write32(gt, MED_VF_SW_FLAG(i), request[i]); - xe_mmio_read32(gt, MED_VF_SW_FLAG(LAST_INDEX).reg); + xe_mmio_read32(gt, MED_VF_SW_FLAG(LAST_INDEX)); } else { for (i = 0; i < len; ++i) - xe_mmio_write32(gt, VF_SW_FLAG(i).reg, + xe_mmio_write32(gt, VF_SW_FLAG(i), request[i]); - xe_mmio_read32(gt, VF_SW_FLAG(LAST_INDEX).reg); + xe_mmio_read32(gt, VF_SW_FLAG(LAST_INDEX)); } xe_guc_notify(guc); @@ -712,9 +712,10 @@ proto: if (response_buf) { response_buf[0] = header; - for (i = 1; i < VF_SW_FLAG_COUNT; i++) - response_buf[i] = - xe_mmio_read32(gt, reply_reg + i * sizeof(u32)); + for (i = 1; i < VF_SW_FLAG_COUNT; i++) { + reply_reg.reg += i * sizeof(u32); + response_buf[i] = xe_mmio_read32(gt, reply_reg); + } } /* Use data from the GuC response as our return value */ @@ -836,7 +837,7 @@ void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p) if (err) return; - status = xe_mmio_read32(gt, GUC_STATUS.reg); + status = xe_mmio_read32(gt, GUC_STATUS); drm_printf(p, "\nGuC status 0x%08x:\n", status); drm_printf(p, "\tBootrom status = 0x%x\n", @@ -851,7 +852,7 @@ void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p) drm_puts(p, "\nScratch registers:\n"); for (i = 0; i < SOFT_SCRATCH_COUNT; i++) { drm_printf(p, "\t%2d: \t0x%x\n", - i, xe_mmio_read32(gt, SOFT_SCRATCH(i).reg)); + i, xe_mmio_read32(gt, SOFT_SCRATCH(i))); } xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 84c2d7c624c6..683f2df09c49 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -428,7 +428,6 @@ static void guc_mmio_regset_write_one(struct xe_guc_ads *ads, struct guc_mmio_reg entry = { .offset = reg.reg, .flags = reg.masked ? GUC_REGSET_MASKED : 0, - /* TODO: steering */ }; xe_map_memcpy_to(ads_to_xe(ads), regset_map, n_entry * sizeof(entry), @@ -551,7 +550,7 @@ static void guc_doorbell_init(struct xe_guc_ads *ads) if (GRAPHICS_VER(xe) >= 12 && !IS_DGFX(xe)) { u32 distdbreg = - xe_mmio_read32(gt, DIST_DBS_POPULATED.reg); + xe_mmio_read32(gt, DIST_DBS_POPULATED); ads_blob_write(ads, system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI], diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 72d460d5323b..e799faa1c6b8 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -317,9 +317,9 @@ static void mtl_update_rpe_value(struct xe_guc_pc *pc) u32 reg; if (xe_gt_is_media_type(gt)) - reg = xe_mmio_read32(gt, MTL_MPE_FREQUENCY.reg); + reg = xe_mmio_read32(gt, MTL_MPE_FREQUENCY); else - reg = xe_mmio_read32(gt, MTL_GT_RPE_FREQUENCY.reg); + reg = xe_mmio_read32(gt, MTL_GT_RPE_FREQUENCY); pc->rpe_freq = REG_FIELD_GET(MTL_RPE_MASK, reg) * GT_FREQUENCY_MULTIPLIER; } @@ -336,9 +336,9 @@ static void tgl_update_rpe_value(struct xe_guc_pc *pc) * PCODE at a different register */ if (xe->info.platform == XE_PVC) - reg = xe_mmio_read32(gt, PVC_RP_STATE_CAP.reg); + reg = xe_mmio_read32(gt, PVC_RP_STATE_CAP); else - reg = xe_mmio_read32(gt, GEN10_FREQ_INFO_REC.reg); + reg = xe_mmio_read32(gt, GEN10_FREQ_INFO_REC); pc->rpe_freq = REG_FIELD_GET(RPE_MASK, reg) * GT_FREQUENCY_MULTIPLIER; } @@ -380,10 +380,10 @@ static ssize_t freq_act_show(struct device *dev, goto out; if (xe->info.platform == XE_METEORLAKE) { - freq = xe_mmio_read32(gt, MTL_MIRROR_TARGET_WP1.reg); + freq = xe_mmio_read32(gt, MTL_MIRROR_TARGET_WP1); freq = REG_FIELD_GET(MTL_CAGF_MASK, freq); } else { - freq = xe_mmio_read32(gt, GEN12_RPSTAT1.reg); + freq = xe_mmio_read32(gt, GEN12_RPSTAT1); freq = REG_FIELD_GET(GEN12_CAGF_MASK, freq); } @@ -413,7 +413,7 @@ static ssize_t freq_cur_show(struct device *dev, if (ret) goto out; - freq = xe_mmio_read32(gt, RPNSWREQ.reg); + freq = xe_mmio_read32(gt, RPNSWREQ); freq = REG_FIELD_GET(REQ_RATIO_MASK, freq); ret = sysfs_emit(buf, "%d\n", decode_freq(freq)); @@ -588,7 +588,7 @@ static ssize_t rc_status_show(struct device *dev, u32 reg; xe_device_mem_access_get(gt_to_xe(gt)); - reg = xe_mmio_read32(gt, GT_CORE_STATUS.reg); + reg = xe_mmio_read32(gt, GT_CORE_STATUS); xe_device_mem_access_put(gt_to_xe(gt)); switch (REG_FIELD_GET(RCN_MASK, reg)) { @@ -615,7 +615,7 @@ static ssize_t rc6_residency_show(struct device *dev, if (ret) goto out; - reg = xe_mmio_read32(gt, GT_GFX_RC6.reg); + reg = xe_mmio_read32(gt, GT_GFX_RC6); ret = sysfs_emit(buff, "%u\n", reg); XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); @@ -646,9 +646,9 @@ static void mtl_init_fused_rp_values(struct xe_guc_pc *pc) xe_device_assert_mem_access(pc_to_xe(pc)); if (xe_gt_is_media_type(gt)) - reg = xe_mmio_read32(gt, MTL_MEDIAP_STATE_CAP.reg); + reg = xe_mmio_read32(gt, MTL_MEDIAP_STATE_CAP); else - reg = xe_mmio_read32(gt, MTL_RP_STATE_CAP.reg); + reg = xe_mmio_read32(gt, MTL_RP_STATE_CAP); pc->rp0_freq = REG_FIELD_GET(MTL_RP0_CAP_MASK, reg) * GT_FREQUENCY_MULTIPLIER; pc->rpn_freq = REG_FIELD_GET(MTL_RPN_CAP_MASK, reg) * @@ -664,9 +664,9 @@ static void tgl_init_fused_rp_values(struct xe_guc_pc *pc) xe_device_assert_mem_access(pc_to_xe(pc)); if (xe->info.platform == XE_PVC) - reg = xe_mmio_read32(gt, PVC_RP_STATE_CAP.reg); + reg = xe_mmio_read32(gt, PVC_RP_STATE_CAP); else - reg = xe_mmio_read32(gt, GEN6_RP_STATE_CAP.reg); + reg = xe_mmio_read32(gt, GEN6_RP_STATE_CAP); pc->rp0_freq = REG_FIELD_GET(RP0_MASK, reg) * GT_FREQUENCY_MULTIPLIER; pc->rpn_freq = REG_FIELD_GET(RPN_MASK, reg) * GT_FREQUENCY_MULTIPLIER; } @@ -745,9 +745,9 @@ static int pc_gucrc_disable(struct xe_guc_pc *pc) if (ret) return ret; - xe_mmio_write32(gt, PG_ENABLE.reg, 0); - xe_mmio_write32(gt, RC_CONTROL.reg, 0); - xe_mmio_write32(gt, RC_STATE.reg, 0); + xe_mmio_write32(gt, PG_ENABLE, 0); + xe_mmio_write32(gt, RC_CONTROL, 0); + xe_mmio_write32(gt, RC_STATE, 0); XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); return 0; diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h index ac7eec28934d..a304dce4e9f4 100644 --- a/drivers/gpu/drm/xe/xe_guc_types.h +++ b/drivers/gpu/drm/xe/xe_guc_types.h @@ -9,6 +9,7 @@ #include #include +#include "regs/xe_reg_defs.h" #include "xe_guc_ads_types.h" #include "xe_guc_ct_types.h" #include "xe_guc_fwif.h" @@ -74,7 +75,7 @@ struct xe_guc { /** * @notify_reg: Register which is written to notify GuC of H2G messages */ - u32 notify_reg; + struct xe_reg notify_reg; /** @params: Control params for fw initialization */ u32 params[GUC_CTL_MAX_DWORDS]; }; diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c index 55dcaab34ea4..e0377083d1f2 100644 --- a/drivers/gpu/drm/xe/xe_huc.c +++ b/drivers/gpu/drm/xe/xe_huc.c @@ -84,7 +84,7 @@ int xe_huc_auth(struct xe_huc *huc) goto fail; } - ret = xe_mmio_wait32(gt, HUC_KERNEL_LOAD_INFO.reg, + ret = xe_mmio_wait32(gt, HUC_KERNEL_LOAD_INFO, HUC_LOAD_SUCCESSFUL, HUC_LOAD_SUCCESSFUL, 100000, NULL, false); if (ret) { @@ -126,7 +126,7 @@ void xe_huc_print_info(struct xe_huc *huc, struct drm_printer *p) return; drm_printf(p, "\nHuC status: 0x%08x\n", - xe_mmio_read32(gt, HUC_KERNEL_LOAD_INFO.reg)); + xe_mmio_read32(gt, HUC_KERNEL_LOAD_INFO)); xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); } diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index a9adac0624f6..5e275aff8974 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -233,20 +233,25 @@ static void hw_engine_fini(struct drm_device *drm, void *arg) hwe->gt = NULL; } -static void hw_engine_mmio_write32(struct xe_hw_engine *hwe, u32 reg, u32 val) +static void hw_engine_mmio_write32(struct xe_hw_engine *hwe, struct xe_reg reg, + u32 val) { - XE_BUG_ON(reg & hwe->mmio_base); + XE_BUG_ON(reg.reg & hwe->mmio_base); xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain); - xe_mmio_write32(hwe->gt, reg + hwe->mmio_base, val); + reg.reg += hwe->mmio_base; + + xe_mmio_write32(hwe->gt, reg, val); } -static u32 hw_engine_mmio_read32(struct xe_hw_engine *hwe, u32 reg) +static u32 hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg) { - XE_BUG_ON(reg & hwe->mmio_base); + XE_BUG_ON(reg.reg & hwe->mmio_base); xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain); - return xe_mmio_read32(hwe->gt, reg + hwe->mmio_base); + reg.reg += hwe->mmio_base; + + return xe_mmio_read32(hwe->gt, reg); } void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe) @@ -255,17 +260,17 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe) xe_hw_engine_mask_per_class(hwe->gt, XE_ENGINE_CLASS_COMPUTE); if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask) - xe_mmio_write32(hwe->gt, RCU_MODE.reg, + xe_mmio_write32(hwe->gt, RCU_MODE, _MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE)); - hw_engine_mmio_write32(hwe, RING_HWSTAM(0).reg, ~0x0); - hw_engine_mmio_write32(hwe, RING_HWS_PGA(0).reg, + hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0); + hw_engine_mmio_write32(hwe, RING_HWS_PGA(0), xe_bo_ggtt_addr(hwe->hwsp)); - hw_engine_mmio_write32(hwe, RING_MODE(0).reg, + hw_engine_mmio_write32(hwe, RING_MODE(0), _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE)); - hw_engine_mmio_write32(hwe, RING_MI_MODE(0).reg, + hw_engine_mmio_write32(hwe, RING_MI_MODE(0), _MASKED_BIT_DISABLE(STOP_RING)); - hw_engine_mmio_read32(hwe, RING_MI_MODE(0).reg); + hw_engine_mmio_read32(hwe, RING_MI_MODE(0)); } void @@ -443,7 +448,7 @@ static void read_media_fuses(struct xe_gt *gt) xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); - media_fuse = xe_mmio_read32(gt, GT_VEBOX_VDBOX_DISABLE.reg); + media_fuse = xe_mmio_read32(gt, GT_VEBOX_VDBOX_DISABLE); /* * Pre-Xe_HP platforms had register bits representing absent engines, @@ -485,7 +490,7 @@ static void read_copy_fuses(struct xe_gt *gt) xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); - bcs_mask = xe_mmio_read32(gt, MIRROR_FUSE3.reg); + bcs_mask = xe_mmio_read32(gt, MIRROR_FUSE3); bcs_mask = REG_FIELD_GET(MEML3_EN_MASK, bcs_mask); /* BCS0 is always present; only BCS1-BCS8 may be fused off */ @@ -582,63 +587,63 @@ void xe_hw_engine_print_state(struct xe_hw_engine *hwe, struct drm_printer *p) drm_printf(p, "\tMMIO base: 0x%08x\n", hwe->mmio_base); drm_printf(p, "\tHWSTAM: 0x%08x\n", - hw_engine_mmio_read32(hwe, RING_HWSTAM(0).reg)); + hw_engine_mmio_read32(hwe, RING_HWSTAM(0))); drm_printf(p, "\tRING_HWS_PGA: 0x%08x\n", - hw_engine_mmio_read32(hwe, RING_HWS_PGA(0).reg)); + hw_engine_mmio_read32(hwe, RING_HWS_PGA(0))); drm_printf(p, "\tRING_EXECLIST_STATUS_LO: 0x%08x\n", - hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0).reg)); + hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0))); drm_printf(p, "\tRING_EXECLIST_STATUS_HI: 0x%08x\n", - hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0).reg)); + hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0))); drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_LO: 0x%08x\n", hw_engine_mmio_read32(hwe, - RING_EXECLIST_SQ_CONTENTS_LO(0).reg)); + RING_EXECLIST_SQ_CONTENTS_LO(0))); drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_HI: 0x%08x\n", hw_engine_mmio_read32(hwe, - RING_EXECLIST_SQ_CONTENTS_HI(0).reg)); + RING_EXECLIST_SQ_CONTENTS_HI(0))); drm_printf(p, "\tRING_EXECLIST_CONTROL: 0x%08x\n", - hw_engine_mmio_read32(hwe, RING_EXECLIST_CONTROL(0).reg)); + hw_engine_mmio_read32(hwe, RING_EXECLIST_CONTROL(0))); drm_printf(p, "\tRING_START: 0x%08x\n", - hw_engine_mmio_read32(hwe, RING_START(0).reg)); + hw_engine_mmio_read32(hwe, RING_START(0))); drm_printf(p, "\tRING_HEAD: 0x%08x\n", - hw_engine_mmio_read32(hwe, RING_HEAD(0).reg) & HEAD_ADDR); + hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR); drm_printf(p, "\tRING_TAIL: 0x%08x\n", - hw_engine_mmio_read32(hwe, RING_TAIL(0).reg) & TAIL_ADDR); + hw_engine_mmio_read32(hwe, RING_TAIL(0)) & TAIL_ADDR); drm_printf(p, "\tRING_CTL: 0x%08x\n", - hw_engine_mmio_read32(hwe, RING_CTL(0).reg)); + hw_engine_mmio_read32(hwe, RING_CTL(0))); drm_printf(p, "\tRING_MODE: 0x%08x\n", - hw_engine_mmio_read32(hwe, RING_MI_MODE(0).reg)); + hw_engine_mmio_read32(hwe, RING_MI_MODE(0))); drm_printf(p, "\tRING_MODE_GEN7: 0x%08x\n", - hw_engine_mmio_read32(hwe, RING_MODE(0).reg)); + hw_engine_mmio_read32(hwe, RING_MODE(0))); drm_printf(p, "\tRING_IMR: 0x%08x\n", - hw_engine_mmio_read32(hwe, RING_IMR(0).reg)); + hw_engine_mmio_read32(hwe, RING_IMR(0))); drm_printf(p, "\tRING_ESR: 0x%08x\n", - hw_engine_mmio_read32(hwe, RING_ESR(0).reg)); + hw_engine_mmio_read32(hwe, RING_ESR(0))); drm_printf(p, "\tRING_EMR: 0x%08x\n", - hw_engine_mmio_read32(hwe, RING_EMR(0).reg)); + hw_engine_mmio_read32(hwe, RING_EMR(0))); drm_printf(p, "\tRING_EIR: 0x%08x\n", - hw_engine_mmio_read32(hwe, RING_EIR(0).reg)); + hw_engine_mmio_read32(hwe, RING_EIR(0))); drm_printf(p, "\tACTHD: 0x%08x_%08x\n", - hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0).reg), - hw_engine_mmio_read32(hwe, RING_ACTHD(0).reg)); + hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0)), + hw_engine_mmio_read32(hwe, RING_ACTHD(0))); drm_printf(p, "\tBBADDR: 0x%08x_%08x\n", - hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0).reg), - hw_engine_mmio_read32(hwe, RING_BBADDR(0).reg)); + hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0)), + hw_engine_mmio_read32(hwe, RING_BBADDR(0))); drm_printf(p, "\tDMA_FADDR: 0x%08x_%08x\n", - hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0).reg), - hw_engine_mmio_read32(hwe, RING_DMA_FADD(0).reg)); + hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0)), + hw_engine_mmio_read32(hwe, RING_DMA_FADD(0))); drm_printf(p, "\tIPEIR: 0x%08x\n", - hw_engine_mmio_read32(hwe, IPEIR(0).reg)); + hw_engine_mmio_read32(hwe, IPEIR(0))); drm_printf(p, "\tIPEHR: 0x%08x\n\n", - hw_engine_mmio_read32(hwe, IPEHR(0).reg)); + hw_engine_mmio_read32(hwe, IPEHR(0))); if (hwe->class == XE_ENGINE_CLASS_COMPUTE) drm_printf(p, "\tRCU_MODE: 0x%08x\n", - xe_mmio_read32(hwe->gt, RCU_MODE.reg)); + xe_mmio_read32(hwe->gt, RCU_MODE)); } diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 2fffb2865cab..7c58cf526951 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -28,7 +28,7 @@ static void assert_iir_is_zero(struct xe_gt *gt, struct xe_reg reg) { - u32 val = xe_mmio_read32(gt, reg.reg); + u32 val = xe_mmio_read32(gt, reg); if (val == 0) return; @@ -36,10 +36,10 @@ static void assert_iir_is_zero(struct xe_gt *gt, struct xe_reg reg) drm_WARN(>_to_xe(gt)->drm, 1, "Interrupt register 0x%x is not zero: 0x%08x\n", reg.reg, val); - xe_mmio_write32(gt, reg.reg, 0xffffffff); - xe_mmio_read32(gt, reg.reg); - xe_mmio_write32(gt, reg.reg, 0xffffffff); - xe_mmio_read32(gt, reg.reg); + xe_mmio_write32(gt, reg, 0xffffffff); + xe_mmio_read32(gt, reg); + xe_mmio_write32(gt, reg, 0xffffffff); + xe_mmio_read32(gt, reg); } /* @@ -54,32 +54,32 @@ static void unmask_and_enable(struct xe_gt *gt, u32 irqregs, u32 bits) */ assert_iir_is_zero(gt, IIR(irqregs)); - xe_mmio_write32(gt, IER(irqregs).reg, bits); - xe_mmio_write32(gt, IMR(irqregs).reg, ~bits); + xe_mmio_write32(gt, IER(irqregs), bits); + xe_mmio_write32(gt, IMR(irqregs), ~bits); /* Posting read */ - xe_mmio_read32(gt, IMR(irqregs).reg); + xe_mmio_read32(gt, IMR(irqregs)); } /* Mask and disable all interrupts. */ static void mask_and_disable(struct xe_gt *gt, u32 irqregs) { - xe_mmio_write32(gt, IMR(irqregs).reg, ~0); + xe_mmio_write32(gt, IMR(irqregs), ~0); /* Posting read */ - xe_mmio_read32(gt, IMR(irqregs).reg); + xe_mmio_read32(gt, IMR(irqregs)); - xe_mmio_write32(gt, IER(irqregs).reg, 0); + xe_mmio_write32(gt, IER(irqregs), 0); /* IIR can theoretically queue up two events. Be paranoid. */ - xe_mmio_write32(gt, IIR(irqregs).reg, ~0); - xe_mmio_read32(gt, IIR(irqregs).reg); - xe_mmio_write32(gt, IIR(irqregs).reg, ~0); - xe_mmio_read32(gt, IIR(irqregs).reg); + xe_mmio_write32(gt, IIR(irqregs), ~0); + xe_mmio_read32(gt, IIR(irqregs)); + xe_mmio_write32(gt, IIR(irqregs), ~0); + xe_mmio_read32(gt, IIR(irqregs)); } static u32 xelp_intr_disable(struct xe_gt *gt) { - xe_mmio_write32(gt, GFX_MSTR_IRQ.reg, 0); + xe_mmio_write32(gt, GFX_MSTR_IRQ, 0); /* * Now with master disabled, get a sample of level indications @@ -87,7 +87,7 @@ static u32 xelp_intr_disable(struct xe_gt *gt) * New indications can and will light up during processing, * and will generate new interrupt after enabling master. */ - return xe_mmio_read32(gt, GFX_MSTR_IRQ.reg); + return xe_mmio_read32(gt, GFX_MSTR_IRQ); } static u32 @@ -98,18 +98,18 @@ gu_misc_irq_ack(struct xe_gt *gt, const u32 master_ctl) if (!(master_ctl & GU_MISC_IRQ)) return 0; - iir = xe_mmio_read32(gt, IIR(GU_MISC_IRQ_OFFSET).reg); + iir = xe_mmio_read32(gt, IIR(GU_MISC_IRQ_OFFSET)); if (likely(iir)) - xe_mmio_write32(gt, IIR(GU_MISC_IRQ_OFFSET).reg, iir); + xe_mmio_write32(gt, IIR(GU_MISC_IRQ_OFFSET), iir); return iir; } static inline void xelp_intr_enable(struct xe_gt *gt, bool stall) { - xe_mmio_write32(gt, GFX_MSTR_IRQ.reg, MASTER_IRQ); + xe_mmio_write32(gt, GFX_MSTR_IRQ, MASTER_IRQ); if (stall) - xe_mmio_read32(gt, GFX_MSTR_IRQ.reg); + xe_mmio_read32(gt, GFX_MSTR_IRQ); } static void gt_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) @@ -132,41 +132,41 @@ static void gt_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) smask = irqs << 16; /* Enable RCS, BCS, VCS and VECS class interrupts. */ - xe_mmio_write32(gt, RENDER_COPY_INTR_ENABLE.reg, dmask); - xe_mmio_write32(gt, VCS_VECS_INTR_ENABLE.reg, dmask); + xe_mmio_write32(gt, RENDER_COPY_INTR_ENABLE, dmask); + xe_mmio_write32(gt, VCS_VECS_INTR_ENABLE, dmask); if (ccs_mask) - xe_mmio_write32(gt, CCS_RSVD_INTR_ENABLE.reg, smask); + xe_mmio_write32(gt, CCS_RSVD_INTR_ENABLE, smask); /* Unmask irqs on RCS, BCS, VCS and VECS engines. */ - xe_mmio_write32(gt, RCS0_RSVD_INTR_MASK.reg, ~smask); - xe_mmio_write32(gt, BCS_RSVD_INTR_MASK.reg, ~smask); + xe_mmio_write32(gt, RCS0_RSVD_INTR_MASK, ~smask); + xe_mmio_write32(gt, BCS_RSVD_INTR_MASK, ~smask); if (bcs_mask & (BIT(1)|BIT(2))) - xe_mmio_write32(gt, XEHPC_BCS1_BCS2_INTR_MASK.reg, ~dmask); + xe_mmio_write32(gt, XEHPC_BCS1_BCS2_INTR_MASK, ~dmask); if (bcs_mask & (BIT(3)|BIT(4))) - xe_mmio_write32(gt, XEHPC_BCS3_BCS4_INTR_MASK.reg, ~dmask); + xe_mmio_write32(gt, XEHPC_BCS3_BCS4_INTR_MASK, ~dmask); if (bcs_mask & (BIT(5)|BIT(6))) - xe_mmio_write32(gt, XEHPC_BCS5_BCS6_INTR_MASK.reg, ~dmask); + xe_mmio_write32(gt, XEHPC_BCS5_BCS6_INTR_MASK, ~dmask); if (bcs_mask & (BIT(7)|BIT(8))) - xe_mmio_write32(gt, XEHPC_BCS7_BCS8_INTR_MASK.reg, ~dmask); - xe_mmio_write32(gt, VCS0_VCS1_INTR_MASK.reg, ~dmask); - xe_mmio_write32(gt, VCS2_VCS3_INTR_MASK.reg, ~dmask); - xe_mmio_write32(gt, VECS0_VECS1_INTR_MASK.reg, ~dmask); + xe_mmio_write32(gt, XEHPC_BCS7_BCS8_INTR_MASK, ~dmask); + xe_mmio_write32(gt, VCS0_VCS1_INTR_MASK, ~dmask); + xe_mmio_write32(gt, VCS2_VCS3_INTR_MASK, ~dmask); + xe_mmio_write32(gt, VECS0_VECS1_INTR_MASK, ~dmask); if (ccs_mask & (BIT(0)|BIT(1))) - xe_mmio_write32(gt, CCS0_CCS1_INTR_MASK.reg, ~dmask); + xe_mmio_write32(gt, CCS0_CCS1_INTR_MASK, ~dmask); if (ccs_mask & (BIT(2)|BIT(3))) - xe_mmio_write32(gt, CCS2_CCS3_INTR_MASK.reg, ~dmask); + xe_mmio_write32(gt, CCS2_CCS3_INTR_MASK, ~dmask); /* * RPS interrupts will get enabled/disabled on demand when RPS itself * is enabled/disabled. */ /* TODO: gt->pm_ier, gt->pm_imr */ - xe_mmio_write32(gt, GPM_WGBOXPERF_INTR_ENABLE.reg, 0); - xe_mmio_write32(gt, GPM_WGBOXPERF_INTR_MASK.reg, ~0); + xe_mmio_write32(gt, GPM_WGBOXPERF_INTR_ENABLE, 0); + xe_mmio_write32(gt, GPM_WGBOXPERF_INTR_MASK, ~0); /* Same thing for GuC interrupts */ - xe_mmio_write32(gt, GUC_SG_INTR_ENABLE.reg, 0); - xe_mmio_write32(gt, GUC_SG_INTR_MASK.reg, ~0); + xe_mmio_write32(gt, GUC_SG_INTR_ENABLE, 0); + xe_mmio_write32(gt, GUC_SG_INTR_MASK, ~0); } static void xelp_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) @@ -191,7 +191,7 @@ gt_engine_identity(struct xe_device *xe, lockdep_assert_held(&xe->irq.lock); - xe_mmio_write32(gt, IIR_REG_SELECTOR(bank).reg, BIT(bit)); + xe_mmio_write32(gt, IIR_REG_SELECTOR(bank), BIT(bit)); /* * NB: Specs do not specify how long to spin wait, @@ -199,7 +199,7 @@ gt_engine_identity(struct xe_device *xe, */ timeout_ts = (local_clock() >> 10) + 100; do { - ident = xe_mmio_read32(gt, INTR_IDENTITY_REG(bank).reg); + ident = xe_mmio_read32(gt, INTR_IDENTITY_REG(bank)); } while (!(ident & INTR_DATA_VALID) && !time_after32(local_clock() >> 10, timeout_ts)); @@ -209,7 +209,7 @@ gt_engine_identity(struct xe_device *xe, return 0; } - xe_mmio_write32(gt, INTR_IDENTITY_REG(bank).reg, INTR_DATA_VALID); + xe_mmio_write32(gt, INTR_IDENTITY_REG(bank), INTR_DATA_VALID); return ident; } @@ -248,11 +248,11 @@ static void gt_irq_handler(struct xe_device *xe, struct xe_gt *gt, if (!xe_gt_is_media_type(gt)) { intr_dw[bank] = - xe_mmio_read32(gt, GT_INTR_DW(bank).reg); + xe_mmio_read32(gt, GT_INTR_DW(bank)); for_each_set_bit(bit, intr_dw + bank, 32) identity[bit] = gt_engine_identity(xe, gt, bank, bit); - xe_mmio_write32(gt, GT_INTR_DW(bank).reg, + xe_mmio_write32(gt, GT_INTR_DW(bank), intr_dw[bank]); } @@ -310,14 +310,14 @@ static u32 dg1_intr_disable(struct xe_device *xe) u32 val; /* First disable interrupts */ - xe_mmio_write32(gt, DG1_MSTR_TILE_INTR.reg, 0); + xe_mmio_write32(gt, DG1_MSTR_TILE_INTR, 0); /* Get the indication levels and ack the master unit */ - val = xe_mmio_read32(gt, DG1_MSTR_TILE_INTR.reg); + val = xe_mmio_read32(gt, DG1_MSTR_TILE_INTR); if (unlikely(!val)) return 0; - xe_mmio_write32(gt, DG1_MSTR_TILE_INTR.reg, val); + xe_mmio_write32(gt, DG1_MSTR_TILE_INTR, val); return val; } @@ -326,9 +326,9 @@ static void dg1_intr_enable(struct xe_device *xe, bool stall) { struct xe_gt *gt = xe_device_get_gt(xe, 0); - xe_mmio_write32(gt, DG1_MSTR_TILE_INTR.reg, DG1_MSTR_IRQ); + xe_mmio_write32(gt, DG1_MSTR_TILE_INTR, DG1_MSTR_IRQ); if (stall) - xe_mmio_read32(gt, DG1_MSTR_TILE_INTR.reg); + xe_mmio_read32(gt, DG1_MSTR_TILE_INTR); } static void dg1_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) @@ -368,7 +368,7 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg) continue; if (!xe_gt_is_media_type(gt)) - master_ctl = xe_mmio_read32(gt, GFX_MSTR_IRQ.reg); + master_ctl = xe_mmio_read32(gt, GFX_MSTR_IRQ); /* * We might be in irq handler just when PCIe DPC is initiated @@ -382,7 +382,7 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg) } if (!xe_gt_is_media_type(gt)) - xe_mmio_write32(gt, GFX_MSTR_IRQ.reg, master_ctl); + xe_mmio_write32(gt, GFX_MSTR_IRQ, master_ctl); gt_irq_handler(xe, gt, master_ctl, intr_dw, identity); /* @@ -407,34 +407,34 @@ static void gt_irq_reset(struct xe_gt *gt) u32 bcs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COPY); /* Disable RCS, BCS, VCS and VECS class engines. */ - xe_mmio_write32(gt, RENDER_COPY_INTR_ENABLE.reg, 0); - xe_mmio_write32(gt, VCS_VECS_INTR_ENABLE.reg, 0); + xe_mmio_write32(gt, RENDER_COPY_INTR_ENABLE, 0); + xe_mmio_write32(gt, VCS_VECS_INTR_ENABLE, 0); if (ccs_mask) - xe_mmio_write32(gt, CCS_RSVD_INTR_ENABLE.reg, 0); + xe_mmio_write32(gt, CCS_RSVD_INTR_ENABLE, 0); /* Restore masks irqs on RCS, BCS, VCS and VECS engines. */ - xe_mmio_write32(gt, RCS0_RSVD_INTR_MASK.reg, ~0); - xe_mmio_write32(gt, BCS_RSVD_INTR_MASK.reg, ~0); + xe_mmio_write32(gt, RCS0_RSVD_INTR_MASK, ~0); + xe_mmio_write32(gt, BCS_RSVD_INTR_MASK, ~0); if (bcs_mask & (BIT(1)|BIT(2))) - xe_mmio_write32(gt, XEHPC_BCS1_BCS2_INTR_MASK.reg, ~0); + xe_mmio_write32(gt, XEHPC_BCS1_BCS2_INTR_MASK, ~0); if (bcs_mask & (BIT(3)|BIT(4))) - xe_mmio_write32(gt, XEHPC_BCS3_BCS4_INTR_MASK.reg, ~0); + xe_mmio_write32(gt, XEHPC_BCS3_BCS4_INTR_MASK, ~0); if (bcs_mask & (BIT(5)|BIT(6))) - xe_mmio_write32(gt, XEHPC_BCS5_BCS6_INTR_MASK.reg, ~0); + xe_mmio_write32(gt, XEHPC_BCS5_BCS6_INTR_MASK, ~0); if (bcs_mask & (BIT(7)|BIT(8))) - xe_mmio_write32(gt, XEHPC_BCS7_BCS8_INTR_MASK.reg, ~0); - xe_mmio_write32(gt, VCS0_VCS1_INTR_MASK.reg, ~0); - xe_mmio_write32(gt, VCS2_VCS3_INTR_MASK.reg, ~0); - xe_mmio_write32(gt, VECS0_VECS1_INTR_MASK.reg, ~0); + xe_mmio_write32(gt, XEHPC_BCS7_BCS8_INTR_MASK, ~0); + xe_mmio_write32(gt, VCS0_VCS1_INTR_MASK, ~0); + xe_mmio_write32(gt, VCS2_VCS3_INTR_MASK, ~0); + xe_mmio_write32(gt, VECS0_VECS1_INTR_MASK, ~0); if (ccs_mask & (BIT(0)|BIT(1))) - xe_mmio_write32(gt, CCS0_CCS1_INTR_MASK.reg, ~0); + xe_mmio_write32(gt, CCS0_CCS1_INTR_MASK, ~0); if (ccs_mask & (BIT(2)|BIT(3))) - xe_mmio_write32(gt, CCS2_CCS3_INTR_MASK.reg, ~0); + xe_mmio_write32(gt, CCS2_CCS3_INTR_MASK, ~0); - xe_mmio_write32(gt, GPM_WGBOXPERF_INTR_ENABLE.reg, 0); - xe_mmio_write32(gt, GPM_WGBOXPERF_INTR_MASK.reg, ~0); - xe_mmio_write32(gt, GUC_SG_INTR_ENABLE.reg, 0); - xe_mmio_write32(gt, GUC_SG_INTR_MASK.reg, ~0); + xe_mmio_write32(gt, GPM_WGBOXPERF_INTR_ENABLE, 0); + xe_mmio_write32(gt, GPM_WGBOXPERF_INTR_MASK, ~0); + xe_mmio_write32(gt, GUC_SG_INTR_ENABLE, 0); + xe_mmio_write32(gt, GUC_SG_INTR_MASK, ~0); } static void xelp_irq_reset(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 3b719c774efa..0e91004fa06d 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -153,13 +153,13 @@ int xe_mmio_total_vram_size(struct xe_device *xe, u64 *vram_size, u64 *usable_si struct xe_gt *gt = xe_device_get_gt(xe, 0); struct pci_dev *pdev = to_pci_dev(xe->drm.dev); int err; - u32 reg; + u32 reg_val; if (!xe->info.has_flat_ccs) { *vram_size = pci_resource_len(pdev, GEN12_LMEM_BAR); if (usable_size) *usable_size = min(*vram_size, - xe_mmio_read64(gt, GSMBASE.reg)); + xe_mmio_read64(gt, GSMBASE)); return 0; } @@ -167,11 +167,11 @@ int xe_mmio_total_vram_size(struct xe_device *xe, u64 *vram_size, u64 *usable_si if (err) return err; - reg = xe_gt_mcr_unicast_read_any(gt, XEHP_TILE0_ADDR_RANGE); - *vram_size = (u64)REG_FIELD_GET(GENMASK(14, 8), reg) * SZ_1G; + reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_TILE0_ADDR_RANGE); + *vram_size = (u64)REG_FIELD_GET(GENMASK(14, 8), reg_val) * SZ_1G; if (usable_size) { - reg = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR); - *usable_size = (u64)REG_FIELD_GET(GENMASK(31, 8), reg) * SZ_64K; + reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR); + *usable_size = (u64)REG_FIELD_GET(GENMASK(31, 8), reg_val) * SZ_64K; drm_info(&xe->drm, "vram_size: 0x%llx usable_size: 0x%llx\n", *vram_size, *usable_size); } @@ -298,7 +298,7 @@ static void xe_mmio_probe_tiles(struct xe_device *xe) if (xe->info.tile_count == 1) return; - mtcfg = xe_mmio_read64(gt, XEHP_MTCFG_ADDR.reg); + mtcfg = xe_mmio_read64(gt, XEHP_MTCFG_ADDR); adj_tile_count = xe->info.tile_count = REG_FIELD_GET(TILE_COUNT, mtcfg) + 1; if (xe->info.media_verx100 >= 1300) @@ -374,7 +374,7 @@ int xe_mmio_init(struct xe_device *xe) * keep the GT powered down; we won't be able to communicate with it * and we should not continue with driver initialization. */ - if (IS_DGFX(xe) && !(xe_mmio_read32(gt, GU_CNTL.reg) & LMEM_INIT)) { + if (IS_DGFX(xe) && !(xe_mmio_read32(gt, GU_CNTL) & LMEM_INIT)) { drm_err(&xe->drm, "VRAM not initialized by firmware\n"); return -ENODEV; } @@ -403,6 +403,7 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data, struct xe_device *xe = to_xe_device(dev); struct drm_xe_mmio *args = data; unsigned int bits_flag, bytes; + struct xe_reg reg; bool allowed; int ret = 0; @@ -435,6 +436,12 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_ERR(xe, args->addr + bytes > xe->mmio.size)) return -EINVAL; + /* + * TODO: migrate to xe_gt_mcr to lookup the mmio range and handle + * multicast registers. Steering would need uapi extension. + */ + reg = XE_REG(args->addr); + xe_force_wake_get(gt_to_fw(&xe->gt[0]), XE_FORCEWAKE_ALL); if (args->flags & DRM_XE_MMIO_WRITE) { @@ -444,10 +451,10 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data, ret = -EINVAL; goto exit; } - xe_mmio_write32(to_gt(xe), args->addr, args->value); + xe_mmio_write32(to_gt(xe), reg, args->value); break; case DRM_XE_MMIO_64BIT: - xe_mmio_write64(to_gt(xe), args->addr, args->value); + xe_mmio_write64(to_gt(xe), reg, args->value); break; default: drm_dbg(&xe->drm, "Invalid MMIO bit size"); @@ -462,10 +469,10 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data, if (args->flags & DRM_XE_MMIO_READ) { switch (bits_flag) { case DRM_XE_MMIO_32BIT: - args->value = xe_mmio_read32(to_gt(xe), args->addr); + args->value = xe_mmio_read32(to_gt(xe), reg); break; case DRM_XE_MMIO_64BIT: - args->value = xe_mmio_read64(to_gt(xe), args->addr); + args->value = xe_mmio_read64(to_gt(xe), reg); break; default: drm_dbg(&xe->drm, "Invalid MMIO bit size"); diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h index b72a0a75259f..f9a23b4ef77d 100644 --- a/drivers/gpu/drm/xe/xe_mmio.h +++ b/drivers/gpu/drm/xe/xe_mmio.h @@ -9,6 +9,7 @@ #include #include +#include "regs/xe_reg_defs.h" #include "xe_gt_types.h" struct drm_device; @@ -17,33 +18,33 @@ struct xe_device; int xe_mmio_init(struct xe_device *xe); -static inline u8 xe_mmio_read8(struct xe_gt *gt, u32 reg) +static inline u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg) { - if (reg < gt->mmio.adj_limit) - reg += gt->mmio.adj_offset; + if (reg.reg < gt->mmio.adj_limit) + reg.reg += gt->mmio.adj_offset; - return readb(gt->mmio.regs + reg); + return readb(gt->mmio.regs + reg.reg); } static inline void xe_mmio_write32(struct xe_gt *gt, - u32 reg, u32 val) + struct xe_reg reg, u32 val) { - if (reg < gt->mmio.adj_limit) - reg += gt->mmio.adj_offset; + if (reg.reg < gt->mmio.adj_limit) + reg.reg += gt->mmio.adj_offset; - writel(val, gt->mmio.regs + reg); + writel(val, gt->mmio.regs + reg.reg); } -static inline u32 xe_mmio_read32(struct xe_gt *gt, u32 reg) +static inline u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg) { - if (reg < gt->mmio.adj_limit) - reg += gt->mmio.adj_offset; + if (reg.reg < gt->mmio.adj_limit) + reg.reg += gt->mmio.adj_offset; - return readl(gt->mmio.regs + reg); + return readl(gt->mmio.regs + reg.reg); } -static inline u32 xe_mmio_rmw32(struct xe_gt *gt, u32 reg, u32 clr, - u32 set) +static inline u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr, + u32 set) { u32 old, reg_val; @@ -55,24 +56,24 @@ static inline u32 xe_mmio_rmw32(struct xe_gt *gt, u32 reg, u32 clr, } static inline void xe_mmio_write64(struct xe_gt *gt, - u32 reg, u64 val) + struct xe_reg reg, u64 val) { - if (reg < gt->mmio.adj_limit) - reg += gt->mmio.adj_offset; + if (reg.reg < gt->mmio.adj_limit) + reg.reg += gt->mmio.adj_offset; - writeq(val, gt->mmio.regs + reg); + writeq(val, gt->mmio.regs + reg.reg); } -static inline u64 xe_mmio_read64(struct xe_gt *gt, u32 reg) +static inline u64 xe_mmio_read64(struct xe_gt *gt, struct xe_reg reg) { - if (reg < gt->mmio.adj_limit) - reg += gt->mmio.adj_offset; + if (reg.reg < gt->mmio.adj_limit) + reg.reg += gt->mmio.adj_offset; - return readq(gt->mmio.regs + reg); + return readq(gt->mmio.regs + reg.reg); } static inline int xe_mmio_write32_and_verify(struct xe_gt *gt, - u32 reg, u32 val, + struct xe_reg reg, u32 val, u32 mask, u32 eval) { u32 reg_val; @@ -83,8 +84,9 @@ static inline int xe_mmio_write32_and_verify(struct xe_gt *gt, return (reg_val & mask) != eval ? -EINVAL : 0; } -static inline int xe_mmio_wait32(struct xe_gt *gt, u32 reg, u32 val, u32 mask, - u32 timeout_us, u32 *out_val, bool atomic) +static inline int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 val, + u32 mask, u32 timeout_us, u32 *out_val, + bool atomic) { ktime_t cur = ktime_get_raw(); const ktime_t end = ktime_add_us(cur, timeout_us); @@ -122,9 +124,10 @@ static inline int xe_mmio_wait32(struct xe_gt *gt, u32 reg, u32 val, u32 mask, int xe_mmio_ioctl(struct drm_device *dev, void *data, struct drm_file *file); -static inline bool xe_mmio_in_range(const struct xe_mmio_range *range, u32 reg) +static inline bool xe_mmio_in_range(const struct xe_mmio_range *range, + struct xe_reg reg) { - return range && reg >= range->start && reg <= range->end; + return range && reg.reg >= range->start && reg.reg <= range->end; } int xe_mmio_probe_vram(struct xe_device *xe); diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index 0d07811a573f..f30e1a0ce5dc 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -477,8 +477,10 @@ static void __init_mocs_table(struct xe_gt *gt, for (i = 0; i < info->n_entries ? (mocs = get_entry_control(info, i)), 1 : 0; i++) { - mocs_dbg(>->xe->drm, "%d 0x%x 0x%x\n", i, XE_REG(addr + i * 4).reg, mocs); - xe_mmio_write32(gt, XE_REG(addr + i * 4).reg, mocs); + struct xe_reg reg = XE_REG(addr + i * 4); + + mocs_dbg(>->xe->drm, "%d 0x%x 0x%x\n", i, reg.reg, mocs); + xe_mmio_write32(gt, reg, mocs); } } @@ -514,7 +516,7 @@ static void init_l3cc_table(struct xe_gt *gt, i++) { mocs_dbg(>->xe->drm, "%d 0x%x 0x%x\n", i, LNCFCMOCS(i).reg, l3cc); - xe_mmio_write32(gt, LNCFCMOCS(i).reg, l3cc); + xe_mmio_write32(gt, LNCFCMOCS(i), l3cc); } } diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index abee41fa3cb9..b56a65779d26 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -64,14 +64,20 @@ static const u32 mtl_pat_table[] = { static void program_pat(struct xe_gt *gt, const u32 table[], int n_entries) { - for (int i = 0; i < n_entries; i++) - xe_mmio_write32(gt, _PAT_INDEX(i), table[i]); + for (int i = 0; i < n_entries; i++) { + struct xe_reg reg = XE_REG(_PAT_INDEX(i)); + + xe_mmio_write32(gt, reg, table[i]); + } } static void program_pat_mcr(struct xe_gt *gt, const u32 table[], int n_entries) { - for (int i = 0; i < n_entries; i++) - xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_INDEX(i)), table[i]); + for (int i = 0; i < n_entries; i++) { + struct xe_reg_mcr reg_mcr = XE_REG_MCR(_PAT_INDEX(i)); + + xe_gt_mcr_multicast_write(gt, reg_mcr, table[i]); + } } void xe_pat_init(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c index 99bb730684ed..7ab70a83f88d 100644 --- a/drivers/gpu/drm/xe/xe_pcode.c +++ b/drivers/gpu/drm/xe/xe_pcode.c @@ -43,7 +43,7 @@ static int pcode_mailbox_status(struct xe_gt *gt) lockdep_assert_held(>->pcode.lock); - err = xe_mmio_read32(gt, PCODE_MAILBOX.reg) & PCODE_ERROR_MASK; + err = xe_mmio_read32(gt, PCODE_MAILBOX) & PCODE_ERROR_MASK; if (err) { drm_err(>_to_xe(gt)->drm, "PCODE Mailbox failed: %d %s", err, err_decode[err].str ?: "Unknown"); @@ -60,22 +60,22 @@ static int pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1, int err; lockdep_assert_held(>->pcode.lock); - if ((xe_mmio_read32(gt, PCODE_MAILBOX.reg) & PCODE_READY) != 0) + if ((xe_mmio_read32(gt, PCODE_MAILBOX) & PCODE_READY) != 0) return -EAGAIN; - xe_mmio_write32(gt, PCODE_DATA0.reg, *data0); - xe_mmio_write32(gt, PCODE_DATA1.reg, data1 ? *data1 : 0); - xe_mmio_write32(gt, PCODE_MAILBOX.reg, PCODE_READY | mbox); + xe_mmio_write32(gt, PCODE_DATA0, *data0); + xe_mmio_write32(gt, PCODE_DATA1, data1 ? *data1 : 0); + xe_mmio_write32(gt, PCODE_MAILBOX, PCODE_READY | mbox); - err = xe_mmio_wait32(gt, PCODE_MAILBOX.reg, 0, PCODE_READY, + err = xe_mmio_wait32(gt, PCODE_MAILBOX, 0, PCODE_READY, timeout_ms * 1000, NULL, atomic); if (err) return err; if (return_data) { - *data0 = xe_mmio_read32(gt, PCODE_DATA0.reg); + *data0 = xe_mmio_read32(gt, PCODE_DATA0); if (data1) - *data1 = xe_mmio_read32(gt, PCODE_DATA1.reg); + *data1 = xe_mmio_read32(gt, PCODE_DATA1); } return pcode_mailbox_status(gt); diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c index d129e6d7cb1f..f75ef8d7500a 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr.c +++ b/drivers/gpu/drm/xe/xe_reg_sr.c @@ -161,7 +161,7 @@ static void apply_one_mmio(struct xe_gt *gt, struct xe_reg_sr_entry *entry) else if (entry->clr_bits + 1) val = (reg.mcr ? xe_gt_mcr_unicast_read_any(gt, reg_mcr) : - xe_mmio_read32(gt, reg.reg)) & (~entry->clr_bits); + xe_mmio_read32(gt, reg)) & (~entry->clr_bits); else val = 0; @@ -177,7 +177,7 @@ static void apply_one_mmio(struct xe_gt *gt, struct xe_reg_sr_entry *entry) if (entry->reg.mcr) xe_gt_mcr_multicast_write(gt, reg_mcr, val); else - xe_mmio_write32(gt, reg.reg, val); + xe_mmio_write32(gt, reg, val); } void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt) @@ -230,15 +230,17 @@ void xe_reg_sr_apply_whitelist(struct xe_reg_sr *sr, u32 mmio_base, p = drm_debug_printer(KBUILD_MODNAME); xa_for_each(&sr->xa, reg, entry) { xe_reg_whitelist_print_entry(&p, 0, reg, entry); - xe_mmio_write32(gt, RING_FORCE_TO_NONPRIV(mmio_base, slot).reg, + xe_mmio_write32(gt, RING_FORCE_TO_NONPRIV(mmio_base, slot), reg | entry->set_bits); slot++; } /* And clear the rest just in case of garbage */ - for (; slot < RING_MAX_NONPRIV_SLOTS; slot++) - xe_mmio_write32(gt, RING_FORCE_TO_NONPRIV(mmio_base, slot).reg, - RING_NOPID(mmio_base).reg); + for (; slot < RING_MAX_NONPRIV_SLOTS; slot++) { + u32 addr = RING_NOPID(mmio_base).reg; + + xe_mmio_write32(gt, RING_FORCE_TO_NONPRIV(mmio_base, slot), addr); + } err = xe_force_wake_put(>->mmio.fw, XE_FORCEWAKE_ALL); XE_WARN_ON(err); diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index c1b738e033c7..ce829bd48825 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -44,10 +44,11 @@ static u32 preparser_disable(bool state) return MI_ARB_CHECK | BIT(8) | state; } -static int emit_aux_table_inv(struct xe_gt *gt, u32 addr, u32 *dw, int i) +static int emit_aux_table_inv(struct xe_gt *gt, struct xe_reg reg, + u32 *dw, int i) { dw[i++] = MI_LOAD_REGISTER_IMM(1) | MI_LRI_MMIO_REMAP_EN; - dw[i++] = addr + gt->mmio.adj_offset; + dw[i++] = reg.reg + gt->mmio.adj_offset; dw[i++] = AUX_INV; dw[i++] = MI_NOOP; @@ -203,9 +204,9 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, /* hsdes: 1809175790 */ if (!xe->info.has_flat_ccs) { if (decode) - i = emit_aux_table_inv(gt, VD0_AUX_INV.reg, dw, i); + i = emit_aux_table_inv(gt, VD0_AUX_INV, dw, i); else - i = emit_aux_table_inv(gt, VE0_AUX_INV.reg, dw, i); + i = emit_aux_table_inv(gt, VE0_AUX_INV, dw, i); } dw[i++] = preparser_disable(false); @@ -248,7 +249,7 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, /* hsdes: 1809175790 */ if (!xe->info.has_flat_ccs) - i = emit_aux_table_inv(gt, CCS_AUX_INV.reg, dw, i); + i = emit_aux_table_inv(gt, CCS_AUX_INV, dw, i); dw[i++] = preparser_disable(false); diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index 9ce0a0585539..a3855870321f 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -65,7 +65,7 @@ static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) } /* Use DSM base address instead for stolen memory */ - mgr->stolen_base = xe_mmio_read64(gt, DSMBASE.reg) & BDSM_MASK; + mgr->stolen_base = xe_mmio_read64(gt, DSMBASE) & BDSM_MASK; if (drm_WARN_ON(&xe->drm, vram_size < mgr->stolen_base)) return 0; @@ -88,7 +88,7 @@ static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr u32 stolen_size; u32 ggc, gms; - ggc = xe_mmio_read32(to_gt(xe), GGC.reg); + ggc = xe_mmio_read32(to_gt(xe), GGC); /* check GGMS, should be fixed 0x3 (8MB) */ if (drm_WARN_ON(&xe->drm, (ggc & GGMS_MASK) != GGMS_MASK)) diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index bb8d98645332..ed37437600f0 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -462,33 +462,33 @@ static int uc_fw_xfer(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags) /* Set the source address for the uCode */ src_offset = uc_fw_ggtt_offset(uc_fw); - xe_mmio_write32(gt, DMA_ADDR_0_LOW.reg, lower_32_bits(src_offset)); - xe_mmio_write32(gt, DMA_ADDR_0_HIGH.reg, upper_32_bits(src_offset)); + xe_mmio_write32(gt, DMA_ADDR_0_LOW, lower_32_bits(src_offset)); + xe_mmio_write32(gt, DMA_ADDR_0_HIGH, upper_32_bits(src_offset)); /* Set the DMA destination */ - xe_mmio_write32(gt, DMA_ADDR_1_LOW.reg, offset); - xe_mmio_write32(gt, DMA_ADDR_1_HIGH.reg, DMA_ADDRESS_SPACE_WOPCM); + xe_mmio_write32(gt, DMA_ADDR_1_LOW, offset); + xe_mmio_write32(gt, DMA_ADDR_1_HIGH, DMA_ADDRESS_SPACE_WOPCM); /* * Set the transfer size. The header plus uCode will be copied to WOPCM * via DMA, excluding any other components */ - xe_mmio_write32(gt, DMA_COPY_SIZE.reg, + xe_mmio_write32(gt, DMA_COPY_SIZE, sizeof(struct uc_css_header) + uc_fw->ucode_size); /* Start the DMA */ - xe_mmio_write32(gt, DMA_CTRL.reg, + xe_mmio_write32(gt, DMA_CTRL, _MASKED_BIT_ENABLE(dma_flags | START_DMA)); /* Wait for DMA to finish */ - ret = xe_mmio_wait32(gt, DMA_CTRL.reg, 0, START_DMA, 100000, &dma_ctrl, + ret = xe_mmio_wait32(gt, DMA_CTRL, 0, START_DMA, 100000, &dma_ctrl, false); if (ret) drm_err(&xe->drm, "DMA for %s fw failed, DMA_CTRL=%u\n", xe_uc_fw_type_repr(uc_fw->type), dma_ctrl); /* Disable the bits once DMA is over */ - xe_mmio_write32(gt, DMA_CTRL.reg, _MASKED_BIT_DISABLE(dma_flags)); + xe_mmio_write32(gt, DMA_CTRL, _MASKED_BIT_DISABLE(dma_flags)); return ret; } diff --git a/drivers/gpu/drm/xe/xe_wopcm.c b/drivers/gpu/drm/xe/xe_wopcm.c index 7b5014aea9c8..11eea970c207 100644 --- a/drivers/gpu/drm/xe/xe_wopcm.c +++ b/drivers/gpu/drm/xe/xe_wopcm.c @@ -124,8 +124,8 @@ static bool __check_layout(struct xe_device *xe, u32 wopcm_size, static bool __wopcm_regs_locked(struct xe_gt *gt, u32 *guc_wopcm_base, u32 *guc_wopcm_size) { - u32 reg_base = xe_mmio_read32(gt, DMA_GUC_WOPCM_OFFSET.reg); - u32 reg_size = xe_mmio_read32(gt, GUC_WOPCM_SIZE.reg); + u32 reg_base = xe_mmio_read32(gt, DMA_GUC_WOPCM_OFFSET); + u32 reg_size = xe_mmio_read32(gt, GUC_WOPCM_SIZE); if (!(reg_size & GUC_WOPCM_SIZE_LOCKED) || !(reg_base & GUC_WOPCM_OFFSET_VALID)) @@ -152,13 +152,13 @@ static int __wopcm_init_regs(struct xe_device *xe, struct xe_gt *gt, XE_BUG_ON(size & ~GUC_WOPCM_SIZE_MASK); mask = GUC_WOPCM_SIZE_MASK | GUC_WOPCM_SIZE_LOCKED; - err = xe_mmio_write32_and_verify(gt, GUC_WOPCM_SIZE.reg, size, mask, + err = xe_mmio_write32_and_verify(gt, GUC_WOPCM_SIZE, size, mask, size | GUC_WOPCM_SIZE_LOCKED); if (err) goto err_out; mask = GUC_WOPCM_OFFSET_MASK | GUC_WOPCM_OFFSET_VALID | huc_agent; - err = xe_mmio_write32_and_verify(gt, DMA_GUC_WOPCM_OFFSET.reg, + err = xe_mmio_write32_and_verify(gt, DMA_GUC_WOPCM_OFFSET, base | huc_agent, mask, base | huc_agent | GUC_WOPCM_OFFSET_VALID); @@ -171,10 +171,10 @@ err_out: drm_notice(&xe->drm, "Failed to init uC WOPCM registers!\n"); drm_notice(&xe->drm, "%s(%#x)=%#x\n", "DMA_GUC_WOPCM_OFFSET", DMA_GUC_WOPCM_OFFSET.reg, - xe_mmio_read32(gt, DMA_GUC_WOPCM_OFFSET.reg)); + xe_mmio_read32(gt, DMA_GUC_WOPCM_OFFSET)); drm_notice(&xe->drm, "%s(%#x)=%#x\n", "GUC_WOPCM_SIZE", GUC_WOPCM_SIZE.reg, - xe_mmio_read32(gt, GUC_WOPCM_SIZE.reg)); + xe_mmio_read32(gt, GUC_WOPCM_SIZE)); return err; } -- cgit v1.2.3 From ee21379acc1a5c0de612097de74213aa7015471b Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 8 May 2023 15:53:21 -0700 Subject: drm/xe: Rename reg field to addr Rename the address field to "addr" rather than "reg" so it's easier to understand what it is. Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20230508225322.2692066-4-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_reg_defs.h | 6 +++--- drivers/gpu/drm/xe/tests/xe_rtp_test.c | 2 +- drivers/gpu/drm/xe/xe_force_wake.c | 2 +- drivers/gpu/drm/xe/xe_gt_mcr.c | 2 +- drivers/gpu/drm/xe/xe_guc.c | 2 +- drivers/gpu/drm/xe/xe_guc_ads.c | 2 +- drivers/gpu/drm/xe/xe_hw_engine.c | 8 ++++---- drivers/gpu/drm/xe/xe_irq.c | 2 +- drivers/gpu/drm/xe/xe_mmio.c | 2 +- drivers/gpu/drm/xe/xe_mmio.h | 32 ++++++++++++++++---------------- drivers/gpu/drm/xe/xe_mocs.c | 6 +++--- drivers/gpu/drm/xe/xe_pci.c | 4 ++-- drivers/gpu/drm/xe/xe_reg_sr.c | 6 +++--- drivers/gpu/drm/xe/xe_ring_ops.c | 2 +- drivers/gpu/drm/xe/xe_rtp.c | 2 +- drivers/gpu/drm/xe/xe_wopcm.c | 4 ++-- 16 files changed, 42 insertions(+), 42 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_reg_defs.h b/drivers/gpu/drm/xe/regs/xe_reg_defs.h index 787f223bc727..478787c75e29 100644 --- a/drivers/gpu/drm/xe/regs/xe_reg_defs.h +++ b/drivers/gpu/drm/xe/regs/xe_reg_defs.h @@ -18,8 +18,8 @@ struct xe_reg { union { struct { - /** @reg: address */ - u32 reg:22; + /** @addr: address */ + u32 addr:22; /** * @masked: register is "masked", with upper 16bits used * to identify the bits that are updated on the lower @@ -73,7 +73,7 @@ struct xe_reg_mcr { * object of the right type. However when initializing static const storage, * where a compound statement is not allowed, this can be used instead. */ -#define XE_REG_INITIALIZER(r_, ...) { .reg = r_, __VA_ARGS__ } +#define XE_REG_INITIALIZER(r_, ...) { .addr = r_, __VA_ARGS__ } /** diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c index ad2fe8a39a78..4b2aac5ccf28 100644 --- a/drivers/gpu/drm/xe/tests/xe_rtp_test.c +++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c @@ -244,7 +244,7 @@ static void xe_rtp_process_tests(struct kunit *test) xe_rtp_process(param->entries, reg_sr, &xe->gt[0], NULL); xa_for_each(®_sr->xa, idx, sre) { - if (idx == param->expected_reg.reg) + if (idx == param->expected_reg.addr) sr_entry = sre; count++; diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c index 363b81c3d746..f0f0592fc598 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.c +++ b/drivers/gpu/drm/xe/xe_force_wake.c @@ -129,7 +129,7 @@ static int domain_sleep_wait(struct xe_gt *gt, for (tmp__ = (mask__); tmp__; tmp__ &= ~BIT(ffs(tmp__) - 1)) \ for_each_if((domain__ = ((fw__)->domains + \ (ffs(tmp__) - 1))) && \ - domain__->reg_ctl.reg) + domain__->reg_ctl.addr) int xe_force_wake_get(struct xe_force_wake *fw, enum xe_force_wake_domains domains) diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index c6b9e9869fee..3db550c85e32 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -398,7 +398,7 @@ static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt, */ drm_WARN(>_to_xe(gt)->drm, true, "Did not find MCR register %#x in any MCR steering table\n", - reg.reg); + reg.addr); *group = 0; *instance = 0; diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index e8a126ad400f..eb4af4c71124 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -713,7 +713,7 @@ proto: response_buf[0] = header; for (i = 1; i < VF_SW_FLAG_COUNT; i++) { - reply_reg.reg += i * sizeof(u32); + reply_reg.addr += i * sizeof(u32); response_buf[i] = xe_mmio_read32(gt, reply_reg); } } diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 683f2df09c49..6d550d746909 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -426,7 +426,7 @@ static void guc_mmio_regset_write_one(struct xe_guc_ads *ads, unsigned int n_entry) { struct guc_mmio_reg entry = { - .offset = reg.reg, + .offset = reg.addr, .flags = reg.masked ? GUC_REGSET_MASKED : 0, }; diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 5e275aff8974..696b9d949163 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -236,20 +236,20 @@ static void hw_engine_fini(struct drm_device *drm, void *arg) static void hw_engine_mmio_write32(struct xe_hw_engine *hwe, struct xe_reg reg, u32 val) { - XE_BUG_ON(reg.reg & hwe->mmio_base); + XE_BUG_ON(reg.addr & hwe->mmio_base); xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain); - reg.reg += hwe->mmio_base; + reg.addr += hwe->mmio_base; xe_mmio_write32(hwe->gt, reg, val); } static u32 hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg) { - XE_BUG_ON(reg.reg & hwe->mmio_base); + XE_BUG_ON(reg.addr & hwe->mmio_base); xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain); - reg.reg += hwe->mmio_base; + reg.addr += hwe->mmio_base; return xe_mmio_read32(hwe->gt, reg); } diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 7c58cf526951..1c26ec5ab4f0 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -35,7 +35,7 @@ static void assert_iir_is_zero(struct xe_gt *gt, struct xe_reg reg) drm_WARN(>_to_xe(gt)->drm, 1, "Interrupt register 0x%x is not zero: 0x%08x\n", - reg.reg, val); + reg.addr, val); xe_mmio_write32(gt, reg, 0xffffffff); xe_mmio_read32(gt, reg); xe_mmio_write32(gt, reg, 0xffffffff); diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 0e91004fa06d..c7fbb1cc1f64 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -421,7 +421,7 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data, unsigned int i; for (i = 0; i < ARRAY_SIZE(mmio_read_whitelist); i++) { - if (mmio_read_whitelist[i].reg == args->addr) { + if (mmio_read_whitelist[i].addr == args->addr) { allowed = true; break; } diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h index f9a23b4ef77d..1407f1189b0d 100644 --- a/drivers/gpu/drm/xe/xe_mmio.h +++ b/drivers/gpu/drm/xe/xe_mmio.h @@ -20,27 +20,27 @@ int xe_mmio_init(struct xe_device *xe); static inline u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg) { - if (reg.reg < gt->mmio.adj_limit) - reg.reg += gt->mmio.adj_offset; + if (reg.addr < gt->mmio.adj_limit) + reg.addr += gt->mmio.adj_offset; - return readb(gt->mmio.regs + reg.reg); + return readb(gt->mmio.regs + reg.addr); } static inline void xe_mmio_write32(struct xe_gt *gt, struct xe_reg reg, u32 val) { - if (reg.reg < gt->mmio.adj_limit) - reg.reg += gt->mmio.adj_offset; + if (reg.addr < gt->mmio.adj_limit) + reg.addr += gt->mmio.adj_offset; - writel(val, gt->mmio.regs + reg.reg); + writel(val, gt->mmio.regs + reg.addr); } static inline u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg) { - if (reg.reg < gt->mmio.adj_limit) - reg.reg += gt->mmio.adj_offset; + if (reg.addr < gt->mmio.adj_limit) + reg.addr += gt->mmio.adj_offset; - return readl(gt->mmio.regs + reg.reg); + return readl(gt->mmio.regs + reg.addr); } static inline u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr, @@ -58,18 +58,18 @@ static inline u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr, static inline void xe_mmio_write64(struct xe_gt *gt, struct xe_reg reg, u64 val) { - if (reg.reg < gt->mmio.adj_limit) - reg.reg += gt->mmio.adj_offset; + if (reg.addr < gt->mmio.adj_limit) + reg.addr += gt->mmio.adj_offset; - writeq(val, gt->mmio.regs + reg.reg); + writeq(val, gt->mmio.regs + reg.addr); } static inline u64 xe_mmio_read64(struct xe_gt *gt, struct xe_reg reg) { - if (reg.reg < gt->mmio.adj_limit) - reg.reg += gt->mmio.adj_offset; + if (reg.addr < gt->mmio.adj_limit) + reg.addr += gt->mmio.adj_offset; - return readq(gt->mmio.regs + reg.reg); + return readq(gt->mmio.regs + reg.addr); } static inline int xe_mmio_write32_and_verify(struct xe_gt *gt, @@ -127,7 +127,7 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data, static inline bool xe_mmio_in_range(const struct xe_mmio_range *range, struct xe_reg reg) { - return range && reg.reg >= range->start && reg.reg <= range->end; + return range && reg.addr >= range->start && reg.addr <= range->end; } int xe_mmio_probe_vram(struct xe_device *xe); diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index f30e1a0ce5dc..817afd301d52 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -479,7 +479,7 @@ static void __init_mocs_table(struct xe_gt *gt, i++) { struct xe_reg reg = XE_REG(addr + i * 4); - mocs_dbg(>->xe->drm, "%d 0x%x 0x%x\n", i, reg.reg, mocs); + mocs_dbg(>->xe->drm, "%d 0x%x 0x%x\n", i, reg.addr, mocs); xe_mmio_write32(gt, reg, mocs); } } @@ -514,7 +514,7 @@ static void init_l3cc_table(struct xe_gt *gt, (l3cc = l3cc_combine(get_entry_l3cc(info, 2 * i), get_entry_l3cc(info, 2 * i + 1))), 1 : 0; i++) { - mocs_dbg(>->xe->drm, "%d 0x%x 0x%x\n", i, LNCFCMOCS(i).reg, + mocs_dbg(>->xe->drm, "%d 0x%x 0x%x\n", i, LNCFCMOCS(i).addr, l3cc); xe_mmio_write32(gt, LNCFCMOCS(i), l3cc); } @@ -541,7 +541,7 @@ void xe_mocs_init(struct xe_gt *gt) mocs_dbg(>->xe->drm, "flag:0x%x\n", flags); if (flags & HAS_GLOBAL_MOCS) - __init_mocs_table(gt, &table, GLOBAL_MOCS(0).reg); + __init_mocs_table(gt, &table, GLOBAL_MOCS(0).addr); /* * Initialize the L3CC table as part of mocs initalization to make diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 13a5ce18ee05..2ad3ad275e8a 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -436,7 +436,7 @@ static void handle_gmdid(struct xe_device *xe, { u32 ver; - ver = peek_gmdid(xe, GMD_ID.reg); + ver = peek_gmdid(xe, GMD_ID.addr); for (int i = 0; i < ARRAY_SIZE(graphics_ip_map); i++) { if (ver == graphics_ip_map[i].ver) { xe->info.graphics_verx100 = ver; @@ -451,7 +451,7 @@ static void handle_gmdid(struct xe_device *xe, ver / 100, ver % 100); } - ver = peek_gmdid(xe, GMD_ID.reg + 0x380000); + ver = peek_gmdid(xe, GMD_ID.addr + 0x380000); for (int i = 0; i < ARRAY_SIZE(media_ip_map); i++) { if (ver == media_ip_map[i].ver) { xe->info.media_verx100 = ver; diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c index f75ef8d7500a..434133444d74 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr.c +++ b/drivers/gpu/drm/xe/xe_reg_sr.c @@ -91,7 +91,7 @@ static void reg_sr_inc_error(struct xe_reg_sr *sr) int xe_reg_sr_add(struct xe_reg_sr *sr, const struct xe_reg_sr_entry *e) { - unsigned long idx = e->reg.reg; + unsigned long idx = e->reg.addr; struct xe_reg_sr_entry *pentry = xa_load(&sr->xa, idx); int ret; @@ -172,7 +172,7 @@ static void apply_one_mmio(struct xe_gt *gt, struct xe_reg_sr_entry *entry) */ val |= entry->set_bits; - drm_dbg(&xe->drm, "REG[0x%x] = 0x%08x", reg.reg, val); + drm_dbg(&xe->drm, "REG[0x%x] = 0x%08x", reg.addr, val); if (entry->reg.mcr) xe_gt_mcr_multicast_write(gt, reg_mcr, val); @@ -237,7 +237,7 @@ void xe_reg_sr_apply_whitelist(struct xe_reg_sr *sr, u32 mmio_base, /* And clear the rest just in case of garbage */ for (; slot < RING_MAX_NONPRIV_SLOTS; slot++) { - u32 addr = RING_NOPID(mmio_base).reg; + u32 addr = RING_NOPID(mmio_base).addr; xe_mmio_write32(gt, RING_FORCE_TO_NONPRIV(mmio_base, slot), addr); } diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index ce829bd48825..06364bb2e95b 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -48,7 +48,7 @@ static int emit_aux_table_inv(struct xe_gt *gt, struct xe_reg reg, u32 *dw, int i) { dw[i++] = MI_LOAD_REGISTER_IMM(1) | MI_LRI_MMIO_REMAP_EN; - dw[i++] = reg.reg + gt->mmio.adj_offset; + dw[i++] = reg.addr + gt->mmio.adj_offset; dw[i++] = AUX_INV; dw[i++] = MI_NOOP; diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index f2a0e8eb4936..0c6a23e14a71 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -101,7 +101,7 @@ static void rtp_add_sr_entry(const struct xe_rtp_action *action, .read_mask = action->read_mask, }; - sr_entry.reg.reg += mmio_base; + sr_entry.reg.addr += mmio_base; xe_reg_sr_add(sr, &sr_entry); } diff --git a/drivers/gpu/drm/xe/xe_wopcm.c b/drivers/gpu/drm/xe/xe_wopcm.c index 11eea970c207..35fde8965bca 100644 --- a/drivers/gpu/drm/xe/xe_wopcm.c +++ b/drivers/gpu/drm/xe/xe_wopcm.c @@ -170,10 +170,10 @@ static int __wopcm_init_regs(struct xe_device *xe, struct xe_gt *gt, err_out: drm_notice(&xe->drm, "Failed to init uC WOPCM registers!\n"); drm_notice(&xe->drm, "%s(%#x)=%#x\n", "DMA_GUC_WOPCM_OFFSET", - DMA_GUC_WOPCM_OFFSET.reg, + DMA_GUC_WOPCM_OFFSET.addr, xe_mmio_read32(gt, DMA_GUC_WOPCM_OFFSET)); drm_notice(&xe->drm, "%s(%#x)=%#x\n", "GUC_WOPCM_SIZE", - GUC_WOPCM_SIZE.reg, + GUC_WOPCM_SIZE.addr, xe_mmio_read32(gt, GUC_WOPCM_SIZE)); return err; -- cgit v1.2.3 From 50f1f0591638ec43eb041e27ab5e4eae47882cbc Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 8 May 2023 15:53:22 -0700 Subject: drm/xe: Fix indent in xe_hw_engine_print_state() Fix the indent to align with open parenthesis, following the coding style. Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20230508225322.2692066-5-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_hw_engine.c | 66 +++++++++++++++++++-------------------- 1 file changed, 33 insertions(+), 33 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 696b9d949163..751f6c3bba17 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -580,70 +580,70 @@ void xe_hw_engine_print_state(struct xe_hw_engine *hwe, struct drm_printer *p) return; drm_printf(p, "%s (physical), logical instance=%d\n", hwe->name, - hwe->logical_instance); + hwe->logical_instance); drm_printf(p, "\tForcewake: domain 0x%x, ref %d\n", - hwe->domain, - xe_force_wake_ref(gt_to_fw(hwe->gt), hwe->domain)); + hwe->domain, + xe_force_wake_ref(gt_to_fw(hwe->gt), hwe->domain)); drm_printf(p, "\tMMIO base: 0x%08x\n", hwe->mmio_base); drm_printf(p, "\tHWSTAM: 0x%08x\n", - hw_engine_mmio_read32(hwe, RING_HWSTAM(0))); + hw_engine_mmio_read32(hwe, RING_HWSTAM(0))); drm_printf(p, "\tRING_HWS_PGA: 0x%08x\n", - hw_engine_mmio_read32(hwe, RING_HWS_PGA(0))); + hw_engine_mmio_read32(hwe, RING_HWS_PGA(0))); drm_printf(p, "\tRING_EXECLIST_STATUS_LO: 0x%08x\n", - hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0))); + hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0))); drm_printf(p, "\tRING_EXECLIST_STATUS_HI: 0x%08x\n", - hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0))); + hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0))); drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_LO: 0x%08x\n", - hw_engine_mmio_read32(hwe, + hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_LO(0))); drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_HI: 0x%08x\n", - hw_engine_mmio_read32(hwe, + hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_HI(0))); drm_printf(p, "\tRING_EXECLIST_CONTROL: 0x%08x\n", - hw_engine_mmio_read32(hwe, RING_EXECLIST_CONTROL(0))); + hw_engine_mmio_read32(hwe, RING_EXECLIST_CONTROL(0))); drm_printf(p, "\tRING_START: 0x%08x\n", - hw_engine_mmio_read32(hwe, RING_START(0))); + hw_engine_mmio_read32(hwe, RING_START(0))); drm_printf(p, "\tRING_HEAD: 0x%08x\n", - hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR); + hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR); drm_printf(p, "\tRING_TAIL: 0x%08x\n", - hw_engine_mmio_read32(hwe, RING_TAIL(0)) & TAIL_ADDR); + hw_engine_mmio_read32(hwe, RING_TAIL(0)) & TAIL_ADDR); drm_printf(p, "\tRING_CTL: 0x%08x\n", - hw_engine_mmio_read32(hwe, RING_CTL(0))); + hw_engine_mmio_read32(hwe, RING_CTL(0))); drm_printf(p, "\tRING_MODE: 0x%08x\n", - hw_engine_mmio_read32(hwe, RING_MI_MODE(0))); + hw_engine_mmio_read32(hwe, RING_MI_MODE(0))); drm_printf(p, "\tRING_MODE_GEN7: 0x%08x\n", - hw_engine_mmio_read32(hwe, RING_MODE(0))); + hw_engine_mmio_read32(hwe, RING_MODE(0))); drm_printf(p, "\tRING_IMR: 0x%08x\n", - hw_engine_mmio_read32(hwe, RING_IMR(0))); + hw_engine_mmio_read32(hwe, RING_IMR(0))); drm_printf(p, "\tRING_ESR: 0x%08x\n", - hw_engine_mmio_read32(hwe, RING_ESR(0))); + hw_engine_mmio_read32(hwe, RING_ESR(0))); drm_printf(p, "\tRING_EMR: 0x%08x\n", - hw_engine_mmio_read32(hwe, RING_EMR(0))); + hw_engine_mmio_read32(hwe, RING_EMR(0))); drm_printf(p, "\tRING_EIR: 0x%08x\n", - hw_engine_mmio_read32(hwe, RING_EIR(0))); - - drm_printf(p, "\tACTHD: 0x%08x_%08x\n", - hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0)), - hw_engine_mmio_read32(hwe, RING_ACTHD(0))); - drm_printf(p, "\tBBADDR: 0x%08x_%08x\n", - hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0)), - hw_engine_mmio_read32(hwe, RING_BBADDR(0))); - drm_printf(p, "\tDMA_FADDR: 0x%08x_%08x\n", - hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0)), - hw_engine_mmio_read32(hwe, RING_DMA_FADD(0))); + hw_engine_mmio_read32(hwe, RING_EIR(0))); + + drm_printf(p, "\tACTHD: 0x%08x_%08x\n", + hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0)), + hw_engine_mmio_read32(hwe, RING_ACTHD(0))); + drm_printf(p, "\tBBADDR: 0x%08x_%08x\n", + hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0)), + hw_engine_mmio_read32(hwe, RING_BBADDR(0))); + drm_printf(p, "\tDMA_FADDR: 0x%08x_%08x\n", + hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0)), + hw_engine_mmio_read32(hwe, RING_DMA_FADD(0))); drm_printf(p, "\tIPEIR: 0x%08x\n", - hw_engine_mmio_read32(hwe, IPEIR(0))); + hw_engine_mmio_read32(hwe, IPEIR(0))); drm_printf(p, "\tIPEHR: 0x%08x\n\n", - hw_engine_mmio_read32(hwe, IPEHR(0))); + hw_engine_mmio_read32(hwe, IPEHR(0))); if (hwe->class == XE_ENGINE_CLASS_COMPUTE) drm_printf(p, "\tRCU_MODE: 0x%08x\n", - xe_mmio_read32(hwe->gt, RCU_MODE)); + xe_mmio_read32(hwe->gt, RCU_MODE)); } -- cgit v1.2.3 From a2db3192115d8cafa3dcae024873957929a4eae0 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 5 May 2023 15:49:10 +0100 Subject: drm/xe: fix tlb_invalidation_seqno_past() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Checking seqno_recv >= seqno looks like it will incorrectly report true when the seqno has wrapped (not unlikely given TLB_INVALIDATION_SEQNO_MAX). Calling xe_gt_tlb_invalidation_wait() might then return before the flush has been completed by the GuC. Fix this by treating a large negative delta as an indication that the seqno has wrapped around. Similar to how we treat a large positive delta as an indication that the seqno_recv must have wrapped around, but in that case the seqno has likely also signalled. It looks like we could also potentially make the seqno use the full 32bits as supported by the GuC. Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Matthew Brost Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 9e7fe8d9bca4..c815a42e2cdb 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -251,14 +251,15 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, static bool tlb_invalidation_seqno_past(struct xe_gt *gt, int seqno) { - if (gt->tlb_invalidation.seqno_recv >= seqno) - return true; + if (seqno - gt->tlb_invalidation.seqno_recv < + -(TLB_INVALIDATION_SEQNO_MAX / 2)) + return false; if (seqno - gt->tlb_invalidation.seqno_recv > (TLB_INVALIDATION_SEQNO_MAX / 2)) return true; - return false; + return gt->tlb_invalidation.seqno_recv >= seqno; } /** -- cgit v1.2.3 From 5737f74e294775b9fa7fb07f80212c5bdffd5476 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 19 Apr 2023 14:37:02 -0700 Subject: drm/xe/adlp: Add revid => step mapping Setup the mapping from PCI revid to IP stepping for ADL-P (and its RPL-P subplatform) in case this information becomes important for implementing workarounds. Bspec: 55376 Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230419213703.3993439-1-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_step.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_step.c b/drivers/gpu/drm/xe/xe_step.c index ee927dfd3eb3..bcdb4601c2f6 100644 --- a/drivers/gpu/drm/xe/xe_step.c +++ b/drivers/gpu/drm/xe/xe_step.c @@ -60,6 +60,17 @@ static const struct xe_step_info adls_revids[] = { [0xC] = { COMMON_GT_MEDIA_STEP(D0), .display = STEP_C0 }, }; +static const struct xe_step_info adlp_revids[] = { + [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A0 }, + [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_B0 }, + [0x8] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_C0 }, + [0xC] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_D0 }, +}; + +static const struct xe_step_info adlp_rpl_revids[] = { + [0x4] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_E0 }, +}; + static const struct xe_step_info dg2_g10_revid_step_tbl[] = { [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A0 }, [0x1] = { COMMON_GT_MEDIA_STEP(A1), .display = STEP_A0 }, @@ -118,6 +129,12 @@ struct xe_step_info xe_step_get(struct xe_device *xe) } else if (xe->info.subplatform == XE_SUBPLATFORM_DG2_G12) { revids = dg2_g12_revid_step_tbl; size = ARRAY_SIZE(dg2_g12_revid_step_tbl); + } else if (xe->info.subplatform == XE_SUBPLATFORM_ADLP_RPLU) { + revids = adlp_rpl_revids; + size = ARRAY_SIZE(adlp_rpl_revids); + } else if (xe->info.platform == XE_ALDERLAKE_P) { + revids = adlp_revids; + size = ARRAY_SIZE(adlp_revids); } else if (xe->info.platform == XE_ALDERLAKE_S) { revids = adls_revids; size = ARRAY_SIZE(adls_revids); -- cgit v1.2.3 From 500f90620cce13e8fd9e7dfc19701d753c4b3625 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 19 Apr 2023 14:37:03 -0700 Subject: drm/xe/adln: Enable ADL-N ADL-N is pretty much the same as ADL-P (i.e., Xe_LP graphics + Xe_M media + Xe_LPD display). However unlike ADL-P, there's no GuC hwconfig support so the "tgl" GuC firmware should be loaded (i.e., the same situation as ADL-S). Acked-by: Nirmoy Das Reviewed-by: Ravi Kumar Vodapalli Link: https://lore.kernel.org/r/20230419213703.3993439-2-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_mocs.c | 1 + drivers/gpu/drm/xe/xe_pci.c | 9 +++++++++ drivers/gpu/drm/xe/xe_platform_types.h | 1 + drivers/gpu/drm/xe/xe_step.c | 7 +++++++ drivers/gpu/drm/xe/xe_uc_fw.c | 1 + 5 files changed, 19 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index 817afd301d52..c7a9e733ef3b 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -418,6 +418,7 @@ static unsigned int get_mocs_settings(struct xe_device *xe, case XE_ROCKETLAKE: case XE_ALDERLAKE_S: case XE_ALDERLAKE_P: + case XE_ALDERLAKE_N: info->size = ARRAY_SIZE(gen12_mocs_desc); info->table = gen12_mocs_desc; info->n_entries = GEN9_NUM_MOCS_ENTRIES; diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 2ad3ad275e8a..f0d0e999aa56 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -208,6 +208,14 @@ static const struct xe_device_desc adl_p_desc = { }, }; +static const struct xe_device_desc adl_n_desc = { + .graphics = &graphics_xelp, + .media = &media_xem, + PLATFORM(XE_ALDERLAKE_N), + .has_llc = 1, + .require_force_probe = true, +}; + #define DGFX_FEATURES \ .is_dgfx = 1 @@ -312,6 +320,7 @@ static const struct pci_device_id pciidlist[] = { XE_RKL_IDS(INTEL_VGA_DEVICE, &rkl_desc), XE_ADLS_IDS(INTEL_VGA_DEVICE, &adl_s_desc), XE_ADLP_IDS(INTEL_VGA_DEVICE, &adl_p_desc), + XE_ADLN_IDS(INTEL_VGA_DEVICE, &adl_n_desc), XE_RPLP_IDS(INTEL_VGA_DEVICE, &adl_p_desc), XE_DG1_IDS(INTEL_VGA_DEVICE, &dg1_desc), XE_ATS_M_IDS(INTEL_VGA_DEVICE, &ats_m_desc), diff --git a/drivers/gpu/drm/xe/xe_platform_types.h b/drivers/gpu/drm/xe/xe_platform_types.h index 80c19bffe79c..abbb8a1f29a8 100644 --- a/drivers/gpu/drm/xe/xe_platform_types.h +++ b/drivers/gpu/drm/xe/xe_platform_types.h @@ -16,6 +16,7 @@ enum xe_platform { XE_ROCKETLAKE, XE_ALDERLAKE_S, XE_ALDERLAKE_P, + XE_ALDERLAKE_N, XE_DG1, XE_DG2, XE_PVC, diff --git a/drivers/gpu/drm/xe/xe_step.c b/drivers/gpu/drm/xe/xe_step.c index bcdb4601c2f6..a443d9bd7bbb 100644 --- a/drivers/gpu/drm/xe/xe_step.c +++ b/drivers/gpu/drm/xe/xe_step.c @@ -71,6 +71,10 @@ static const struct xe_step_info adlp_rpl_revids[] = { [0x4] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_E0 }, }; +static const struct xe_step_info adln_revids[] = { + [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_D0 }, +}; + static const struct xe_step_info dg2_g10_revid_step_tbl[] = { [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A0 }, [0x1] = { COMMON_GT_MEDIA_STEP(A1), .display = STEP_A0 }, @@ -129,6 +133,9 @@ struct xe_step_info xe_step_get(struct xe_device *xe) } else if (xe->info.subplatform == XE_SUBPLATFORM_DG2_G12) { revids = dg2_g12_revid_step_tbl; size = ARRAY_SIZE(dg2_g12_revid_step_tbl); + } else if (xe->info.platform == XE_ALDERLAKE_N) { + revids = adln_revids; + size = ARRAY_SIZE(adln_revids); } else if (xe->info.subplatform == XE_SUBPLATFORM_ADLP_RPLU) { revids = adlp_rpl_revids; size = ARRAY_SIZE(adlp_rpl_revids); diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index ed37437600f0..609ca3f2ffa4 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -104,6 +104,7 @@ struct fw_blobs_by_type { fw_def(PVC, mmp_ver( xe, guc, pvc, 70, 6, 4)) \ fw_def(DG2, major_ver(i915, guc, dg2, 70, 5)) \ fw_def(DG1, major_ver(i915, guc, dg1, 70, 5)) \ + fw_def(ALDERLAKE_N, major_ver(i915, guc, tgl, 70, 5)) \ fw_def(ALDERLAKE_P, major_ver(i915, guc, adlp, 70, 5)) \ fw_def(ALDERLAKE_S, major_ver(i915, guc, tgl, 70, 5)) \ fw_def(ROCKETLAKE, major_ver(i915, guc, tgl, 70, 5)) \ -- cgit v1.2.3 From 85635f5d47d7304a44bc45b419f8f31423712ef8 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 12 May 2023 16:36:49 -0700 Subject: drm/xe: Load HuC on Alderlake P Alderlake P uses TGL HuC and it was not added together with ADL-S, because it was failing for unrelated reasons. Now that those are fixed, allow it to load HuC. # cat /sys/kernel/debug/dri/0/gt0/uc/huc_info HuC firmware: i915/tgl_huc.bin status: RUNNING version: wanted 0.0, found 7.9 uCode: 589504 bytes RSA: 256 bytes HuC status: 0x00090001 Reviewed-by: Anusha Srivatsa Link: https://lore.kernel.org/r/20230512233649.3218736-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_uc_fw.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 609ca3f2ffa4..5703213bdf1b 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -111,6 +111,7 @@ struct fw_blobs_by_type { fw_def(TIGERLAKE, major_ver(i915, guc, tgl, 70, 5)) #define XE_HUC_FIRMWARE_DEFS(fw_def, mmp_ver, no_ver) \ + fw_def(ALDERLAKE_P, no_ver(i915, huc, tgl)) \ fw_def(ALDERLAKE_S, no_ver(i915, huc, tgl)) \ fw_def(DG1, no_ver(i915, huc, dg1)) \ fw_def(ROCKETLAKE, no_ver(i915, huc, tgl)) \ -- cgit v1.2.3 From d0e96f3d5255f62bc9721392b198acc4d302de32 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Fri, 12 May 2023 14:10:04 +0200 Subject: drm/xe: Remove unused define Signed-off-by: Francois Dugast Reviewed-by: Matt Atwood Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_drv.h | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_drv.h b/drivers/gpu/drm/xe/xe_drv.h index 0377e5e4e35f..d45b71426cc8 100644 --- a/drivers/gpu/drm/xe/xe_drv.h +++ b/drivers/gpu/drm/xe/xe_drv.h @@ -11,7 +11,6 @@ #define DRIVER_NAME "xe" #define DRIVER_DESC "Intel Xe Graphics" #define DRIVER_DATE "20201103" -#define DRIVER_TIMESTAMP 1604406085 /* Interface history: * -- cgit v1.2.3 From a029aecaa42018a9ebc90fbf6e2920acfc4c6b3f Mon Sep 17 00:00:00 2001 From: Gustavo Sousa Date: Thu, 11 May 2023 16:48:21 -0300 Subject: drm/xe: Get rid of MAKE_INIT_EXIT_FUNCS There is not much of a benefit from using that macro as of now and it hurts grepability or other ways of cross-referencing. Cc: Jani Nikula Reviewed-by: Matt Atwood Signed-off-by: Gustavo Sousa Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_module.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index e8ee7a9b0878..e2a61aaf50b6 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -35,12 +35,16 @@ struct init_funcs { int (*init)(void); void (*exit)(void); }; -#define MAKE_INIT_EXIT_FUNCS(name) \ - { .init = xe_##name##_module_init, \ - .exit = xe_##name##_module_exit, } + static const struct init_funcs init_funcs[] = { - MAKE_INIT_EXIT_FUNCS(hw_fence), - MAKE_INIT_EXIT_FUNCS(sched_job), + { + .init = xe_hw_fence_module_init, + .exit = xe_hw_fence_module_exit, + }, + { + .init = xe_sched_job_module_init, + .exit = xe_sched_job_module_exit, + }, }; static int __init xe_init(void) -- cgit v1.2.3 From 9afd4b2d2a8df9023849ddd25d5e064b6555ee34 Mon Sep 17 00:00:00 2001 From: Gustavo Sousa Date: Thu, 11 May 2023 16:48:22 -0300 Subject: drm/xe: Call exit functions when xe_register_pci_driver() fails Move xe_register_pci_driver() and xe_unregister_pci_driver() to init_funcs to make sure that exit functions are also called when xe_register_pci_driver() fails. Note that this also allows adding init functions to be run after xe_register_pci_driver(). v2: - Move functions to init_funcs instead of having a special case for xe_register_pci_driver(). (Jani) Cc: Jani Nikula Reviewed-by: Matt Atwood Signed-off-by: Gustavo Sousa Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_module.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index e2a61aaf50b6..496a9001dc3e 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -45,6 +45,10 @@ static const struct init_funcs init_funcs[] = { .init = xe_sched_job_module_init, .exit = xe_sched_job_module_exit, }, + { + .init = xe_register_pci_driver, + .exit = xe_unregister_pci_driver, + }, }; static int __init xe_init(void) @@ -60,15 +64,13 @@ static int __init xe_init(void) } } - return xe_register_pci_driver(); + return 0; } static void __exit xe_exit(void) { int i; - xe_unregister_pci_driver(); - for (i = ARRAY_SIZE(init_funcs) - 1; i >= 0; i--) init_funcs[i].exit(); } -- cgit v1.2.3 From ed1df9897434a1da3f86c868825450fef47def23 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Sun, 16 Apr 2023 16:17:43 -0700 Subject: drm/xe: Allow compute VMs to output dma-fences on binds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Binds are not long running jobs thus we can export dma-fences even if a VM is in compute mode. Signed-off-by: Matthew Brost Reviewed-by: Thomas Hellström Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index d9579bf5002d..62496a4008d2 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3166,7 +3166,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], &syncs_user[num_syncs], false, - xe_vm_no_dma_fences(vm)); + xe_vm_in_fault_mode(vm)); if (err) goto free_syncs; } -- cgit v1.2.3 From 7cabe5580cb9dc16dcda0a163dc718e069c4c199 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Sun, 16 Apr 2023 16:14:26 -0700 Subject: drm/xe: Allow dma-fences as in-syncs for compute / faulting VM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is allowed and encouraged by the dma-fencing rules. This along with allowing compute VMs to export dma-fences on binds will result in a simpler compute UMD. Signed-off-by: Matthew Brost Reviewed-by: Thomas Hellström Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_sync.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index 99f1ed87196d..1e4e4acb2c4a 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -105,6 +105,7 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, { struct drm_xe_sync sync_in; int err; + bool signal; if (copy_from_user(&sync_in, sync_user, sizeof(*sync_user))) return -EFAULT; @@ -113,9 +114,10 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, ~(SYNC_FLAGS_TYPE_MASK | DRM_XE_SYNC_SIGNAL))) return -EINVAL; + signal = sync_in.flags & DRM_XE_SYNC_SIGNAL; switch (sync_in.flags & SYNC_FLAGS_TYPE_MASK) { case DRM_XE_SYNC_SYNCOBJ: - if (XE_IOCTL_ERR(xe, no_dma_fences)) + if (XE_IOCTL_ERR(xe, no_dma_fences && signal)) return -ENOTSUPP; if (XE_IOCTL_ERR(xe, upper_32_bits(sync_in.addr))) @@ -125,7 +127,7 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, if (XE_IOCTL_ERR(xe, !sync->syncobj)) return -ENOENT; - if (!(sync_in.flags & DRM_XE_SYNC_SIGNAL)) { + if (!signal) { sync->fence = drm_syncobj_fence_get(sync->syncobj); if (XE_IOCTL_ERR(xe, !sync->fence)) return -EINVAL; @@ -133,7 +135,7 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, break; case DRM_XE_SYNC_TIMELINE_SYNCOBJ: - if (XE_IOCTL_ERR(xe, no_dma_fences)) + if (XE_IOCTL_ERR(xe, no_dma_fences && signal)) return -ENOTSUPP; if (XE_IOCTL_ERR(xe, upper_32_bits(sync_in.addr))) @@ -146,7 +148,7 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, if (XE_IOCTL_ERR(xe, !sync->syncobj)) return -ENOENT; - if (sync_in.flags & DRM_XE_SYNC_SIGNAL) { + if (signal) { sync->chain_fence = dma_fence_chain_alloc(); if (!sync->chain_fence) return -ENOMEM; @@ -168,7 +170,7 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, break; case DRM_XE_SYNC_USER_FENCE: - if (XE_IOCTL_ERR(xe, !(sync_in.flags & DRM_XE_SYNC_SIGNAL))) + if (XE_IOCTL_ERR(xe, !signal)) return -ENOTSUPP; if (XE_IOCTL_ERR(xe, sync_in.addr & 0x7)) -- cgit v1.2.3 From 75a6aadb9ae71a046534fb781b7c832c6586131b Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Mon, 16 Jan 2023 20:52:57 +0100 Subject: drm/xe: Introduce GT oriented log messages While debugging GT related problems, it's good to know which GT was reporting problems. Introduce helper macros to allow prefix GT logs with GT identifier. We will use them in upcoming patches. v2: use xe_ prefix (Lucas) v3: use correct include Signed-off-by: Michal Wajdeczko Cc: Lucas De Marchi Cc: Jani Nikula Cc: Rodrigo Vivi Reviewed-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_printk.h | 46 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 drivers/gpu/drm/xe/xe_gt_printk.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_printk.h b/drivers/gpu/drm/xe/xe_gt_printk.h new file mode 100644 index 000000000000..0b801429cf1a --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_printk.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GT_PRINTK_H_ +#define _XE_GT_PRINTK_H_ + +#include + +#include "xe_device_types.h" + +#define xe_gt_printk(_gt, _level, _fmt, ...) \ + drm_##_level(&(_gt)->xe->drm, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__) + +#define xe_gt_err(_gt, _fmt, ...) \ + xe_gt_printk((_gt), err, _fmt, ##__VA_ARGS__) + +#define xe_gt_warn(_gt, _fmt, ...) \ + xe_gt_printk((_gt), warn, _fmt, ##__VA_ARGS__) + +#define xe_gt_notice(_gt, _fmt, ...) \ + xe_gt_printk((_gt), notice, _fmt, ##__VA_ARGS__) + +#define xe_gt_info(_gt, _fmt, ...) \ + xe_gt_printk((_gt), info, _fmt, ##__VA_ARGS__) + +#define xe_gt_dbg(_gt, _fmt, ...) \ + xe_gt_printk((_gt), dbg, _fmt, ##__VA_ARGS__) + +#define xe_gt_err_ratelimited(_gt, _fmt, ...) \ + xe_gt_printk((_gt), err_ratelimited, _fmt, ##__VA_ARGS__) + +#define xe_gt_WARN(_gt, _condition, _fmt, ...) \ + drm_WARN(&(_gt)->xe->drm, _condition, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__) + +#define xe_gt_WARN_ONCE(_gt, _condition, _fmt, ...) \ + drm_WARN_ONCE(&(_gt)->xe->drm, _condition, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__) + +#define xe_gt_WARN_ON(_gt, _condition) \ + xe_gt_WARN((_gt), _condition, "%s(%s)", "gt_WARN_ON", __stringify(_condition)) + +#define xe_gt_WARN_ON_ONCE(_gt, _condition) \ + xe_gt_WARN_ONCE((_gt), _condition, "%s(%s)", "gt_WARN_ON_ONCE", __stringify(_condition)) + +#endif -- cgit v1.2.3 From 3e535bd504057bab1970b2dd1b594908ca3de74d Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Sun, 12 Mar 2023 15:48:21 +0100 Subject: drm/xe: Use GT oriented log messages in xe_gt.c Replace generic log messages with ones dedicated for the GT. While around replace errno logs from plain %d to pretty %pe. v2: rebased v3: unify errno logs Signed-off-by: Michal Wajdeczko Cc: Rodrigo Vivi Cc: Matt Roper Reviewed-by: Rodrigo Vivi Reviewed-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt.c | 49 +++++++++++++++++++++------------------------- 1 file changed, 22 insertions(+), 27 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index cbe063a40aca..80d42c7c7cfa 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -20,6 +20,7 @@ #include "xe_gt_clock.h" #include "xe_gt_mcr.h" #include "xe_gt_pagefault.h" +#include "xe_gt_printk.h" #include "xe_gt_sysfs.h" #include "xe_gt_tlb_invalidation.h" #include "xe_gt_topology.h" @@ -239,16 +240,16 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt) hwe, ENGINE_FLAG_WA); if (IS_ERR(e)) { err = PTR_ERR(e); - drm_err(&xe->drm, "gt%d, hwe %s, xe_engine_create,e failed=%d", - gt->info.id, hwe->name, err); + xe_gt_err(gt, "hwe %s: xe_engine_create failed (%pe)\n", + hwe->name, e); goto put_vm; } /* Prime golden LRC with known good state */ err = emit_wa_job(gt, e); if (err) { - drm_err(&xe->drm, "gt%d, hwe %s, guc_id=%d, emit_wa_job,e failed=%d", - gt->info.id, hwe->name, e->guc->id, err); + xe_gt_err(gt, "hwe %s: emit_wa_job failed (%pe) guc_id=%u\n", + hwe->name, ERR_PTR(err), e->guc->id); goto put_engine; } @@ -256,24 +257,24 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt) 1, hwe, ENGINE_FLAG_WA); if (IS_ERR(nop_e)) { err = PTR_ERR(nop_e); - drm_err(&xe->drm, "gt%d, hwe %s, xe_engine_create,nop_e failed=%d", - gt->info.id, hwe->name, err); + xe_gt_err(gt, "hwe %s: nop xe_engine_create failed (%pe)\n", + hwe->name, nop_e); goto put_engine; } /* Switch to different LRC */ err = emit_nop_job(gt, nop_e); if (err) { - drm_err(&xe->drm, "gt%d, hwe %s, guc_id=%d, emit_nop_job,nop_e failed=%d", - gt->info.id, hwe->name, nop_e->guc->id, err); + xe_gt_err(gt, "hwe %s: nop emit_nop_job failed (%pe) guc_id=%u\n", + hwe->name, ERR_PTR(err), nop_e->guc->id); goto put_nop_e; } /* Reload golden LRC to record the effect of any indirect W/A */ err = emit_nop_job(gt, e); if (err) { - drm_err(&xe->drm, "gt%d, hwe %s, guc_id=%d, emit_nop_job,e failed=%d", - gt->info.id, hwe->name, e->guc->id, err); + xe_gt_err(gt, "hwe %s: emit_nop_job failed (%pe) guc_id=%u\n", + hwe->name, ERR_PTR(err), e->guc->id); goto put_nop_e; } @@ -541,15 +542,14 @@ int xe_gt_init(struct xe_gt *gt) static int do_gt_reset(struct xe_gt *gt) { - struct xe_device *xe = gt_to_xe(gt); int err; xe_mmio_write32(gt, GDRST, GRDOM_FULL); err = xe_mmio_wait32(gt, GDRST, 0, GRDOM_FULL, 5000, NULL, false); if (err) - drm_err(&xe->drm, - "GT reset failed to clear GEN11_GRDOM_FULL\n"); + xe_gt_err(gt, "failed to clear GEN11_GRDOM_FULL (%pe)\n", + ERR_PTR(err)); return err; } @@ -592,14 +592,13 @@ static int do_gt_restart(struct xe_gt *gt) static int gt_reset(struct xe_gt *gt) { - struct xe_device *xe = gt_to_xe(gt); int err; /* We only support GT resets with GuC submission */ if (!xe_device_guc_submission_enabled(gt_to_xe(gt))) return -ENODEV; - drm_info(&xe->drm, "GT reset started\n"); + xe_gt_info(gt, "reset started\n"); xe_gt_sanitize(gt); @@ -628,7 +627,7 @@ static int gt_reset(struct xe_gt *gt) err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); XE_WARN_ON(err); - drm_info(&xe->drm, "GT reset done\n"); + xe_gt_info(gt, "reset done\n"); return 0; @@ -637,7 +636,7 @@ err_out: err_msg: XE_WARN_ON(xe_uc_start(>->uc)); xe_device_mem_access_put(gt_to_xe(gt)); - drm_err(&xe->drm, "GT reset failed, err=%d\n", err); + xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err)); return err; } @@ -651,15 +650,13 @@ static void gt_reset_worker(struct work_struct *w) void xe_gt_reset_async(struct xe_gt *gt) { - struct xe_device *xe = gt_to_xe(gt); - - drm_info(&xe->drm, "Try GT reset\n"); + xe_gt_info(gt, "trying reset\n"); /* Don't do a reset while one is already in flight */ if (xe_uc_reset_prepare(>->uc)) return; - drm_info(&xe->drm, "Doing GT reset\n"); + xe_gt_info(gt, "reset queued\n"); queue_work(gt->ordered_wq, >->reset.worker); } @@ -676,7 +673,6 @@ void xe_gt_suspend_prepare(struct xe_gt *gt) int xe_gt_suspend(struct xe_gt *gt) { - struct xe_device *xe = gt_to_xe(gt); int err; /* For now suspend/resume is only allowed with GuC */ @@ -696,7 +692,7 @@ int xe_gt_suspend(struct xe_gt *gt) xe_device_mem_access_put(gt_to_xe(gt)); XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); - drm_info(&xe->drm, "GT suspended\n"); + xe_gt_info(gt, "suspended\n"); return 0; @@ -704,14 +700,13 @@ err_force_wake: XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); err_msg: xe_device_mem_access_put(gt_to_xe(gt)); - drm_err(&xe->drm, "GT suspend failed: %d\n", err); + xe_gt_err(gt, "suspend failed (%pe)\n", ERR_PTR(err)); return err; } int xe_gt_resume(struct xe_gt *gt) { - struct xe_device *xe = gt_to_xe(gt); int err; xe_device_mem_access_get(gt_to_xe(gt)); @@ -725,7 +720,7 @@ int xe_gt_resume(struct xe_gt *gt) xe_device_mem_access_put(gt_to_xe(gt)); XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); - drm_info(&xe->drm, "GT resumed\n"); + xe_gt_info(gt, "resumed\n"); return 0; @@ -733,7 +728,7 @@ err_force_wake: XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); err_msg: xe_device_mem_access_put(gt_to_xe(gt)); - drm_err(&xe->drm, "GT resume failed: %d\n", err); + xe_gt_err(gt, "resume failed (%pe)\n", ERR_PTR(err)); return err; } -- cgit v1.2.3 From e799485044cb3c0019a226ff3a92a532ca2a4e7e Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 18 May 2023 17:12:39 -0400 Subject: drm/xe: Introduce the dev_coredump infrastructure. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The goal is to use devcoredump infrastructure to report error states captured at the crash time. The error state will contain useful information for GPU hang debug, such as INSTDONE registers and the current buffers getting executed, as well as any other information that helps user space and allow later replays of the error. The proposal here is to avoid a Xe only error_state like i915 and use a standard dev_coredump infrastructure to expose the error state. For our own case, the data is only useful if it is a snapshot of the time when the GPU crash has happened, since we reset the GPU immediately after and the registers might have changed. So the proposal here is to have an internal snapshot to be printed out later. Also, usually a subsequent GPU hang can be only a cause of the initial one. So we only save the 'first' hang. The dev_coredump has a delayed work queue where it remove the coredump and free all the data within a few moments of the error. When that happens we also reset our capture state and allow further snapshots. Right now this infra only print out the time of the hang. More information will be migrated here on subsequent work. Also, in order to organize the dump better, the goal is to propose dev_coredump changes itself to allow multiple files and different controls. But for now we start Xe usage of it without any dependency on dev_coredump core changes. v2: Add dma_fence annotation for capture that might happen during long running. (Thomas and Matt) Use xe->drm.primary->index on drm_info msg. (Jani) v3: checkpatch fixes v4: Fix building and locking issues found by Francois. Actually let's kill all of the locking in here. gt_reset serialization already guarantee that there will be only one capture at the same time. Also, the devcoredump has its own locking to protect the free and reads and drivers don't need to duplicate it. Besides this, the dma_fence locking was pushed to a following patch since it is not needed in this one. Fix a use after free identified by KASAN: Do not stash the faulty_engine since that will be freed somewhere else. v5: Fix Uptime - ktime_get_boottime actually returns the Uptime. (Francois) Cc: Thomas Hellström Cc: Matthew Brost Cc: Jani Nikula Cc: Daniel Vetter Cc: Francois Dugast Signed-off-by: Rodrigo Vivi Reviewed-by: Matthew Brost --- drivers/gpu/drm/xe/Kconfig | 1 + drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/xe_devcoredump.c | 126 ++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_devcoredump.h | 20 +++++ drivers/gpu/drm/xe/xe_devcoredump_types.h | 45 +++++++++++ drivers/gpu/drm/xe/xe_device_types.h | 4 + drivers/gpu/drm/xe/xe_guc_submit.c | 2 + 7 files changed, 199 insertions(+) create mode 100644 drivers/gpu/drm/xe/xe_devcoredump.c create mode 100644 drivers/gpu/drm/xe/xe_devcoredump.h create mode 100644 drivers/gpu/drm/xe/xe_devcoredump_types.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index 62f54e6d62d9..0a4854a59c90 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -23,6 +23,7 @@ config DRM_XE select DRM_TTM_HELPER select DRM_SCHED select MMU_NOTIFIER + select WANT_DEV_COREDUMP help Experimental driver for Intel Xe series GPUs diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 71c604ecff53..5d277d060eba 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -37,6 +37,7 @@ xe-y += xe_bb.o \ xe_bo.o \ xe_bo_evict.o \ xe_debugfs.o \ + xe_devcoredump.o \ xe_device.o \ xe_dma_buf.o \ xe_engine.o \ diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c new file mode 100644 index 000000000000..561db73a3c8c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include "xe_devcoredump.h" +#include "xe_devcoredump_types.h" + +#include +#include + +#include "xe_engine.h" +#include "xe_gt.h" + +/** + * DOC: Xe device coredump + * + * Devices overview: + * Xe uses dev_coredump infrastructure for exposing the crash errors in a + * standardized way. + * devcoredump exposes a temporary device under /sys/class/devcoredump/ + * which is linked with our card device directly. + * The core dump can be accessed either from + * /sys/class/drm/card/device/devcoredump/ or from + * /sys/class/devcoredump/devcd where + * /sys/class/devcoredump/devcd/failing_device is a link to + * /sys/class/drm/card/device/. + * + * Snapshot at hang: + * The 'data' file is printed with a drm_printer pointer at devcoredump read + * time. For this reason, we need to take snapshots from when the hang has + * happened, and not only when the user is reading the file. Otherwise the + * information is outdated since the resets might have happened in between. + * + * 'First' failure snapshot: + * In general, the first hang is the most critical one since the following hangs + * can be a consequence of the initial hang. For this reason we only take the + * snapshot of the 'first' failure and ignore subsequent calls of this function, + * at least while the coredump device is alive. Dev_coredump has a delayed work + * queue that will eventually delete the device and free all the dump + * information. + */ + +#ifdef CONFIG_DEV_COREDUMP + +static struct xe_device *coredump_to_xe(const struct xe_devcoredump *coredump) +{ + return container_of(coredump, struct xe_device, devcoredump); +} + +static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, + size_t count, void *data, size_t datalen) +{ + struct xe_devcoredump *coredump = data; + struct xe_devcoredump_snapshot *ss; + struct drm_printer p; + struct drm_print_iterator iter; + struct timespec64 ts; + + iter.data = buffer; + iter.offset = 0; + iter.start = offset; + iter.remain = count; + + ss = &coredump->snapshot; + p = drm_coredump_printer(&iter); + + drm_printf(&p, "**** Xe Device Coredump ****\n"); + drm_printf(&p, "kernel: " UTS_RELEASE "\n"); + drm_printf(&p, "module: " KBUILD_MODNAME "\n"); + + ts = ktime_to_timespec64(ss->snapshot_time); + drm_printf(&p, "Snapshot time: %lld.%09ld\n", ts.tv_sec, ts.tv_nsec); + ts = ktime_to_timespec64(ss->boot_time); + drm_printf(&p, "Uptime: %lld.%09ld\n", ts.tv_sec, ts.tv_nsec); + + return count - iter.remain; +} + +static void xe_devcoredump_free(void *data) +{ + struct xe_devcoredump *coredump = data; + + coredump->captured = false; + drm_info(&coredump_to_xe(coredump)->drm, + "Xe device coredump has been deleted.\n"); +} + +static void devcoredump_snapshot(struct xe_devcoredump *coredump, + struct xe_engine *e) +{ + struct xe_devcoredump_snapshot *ss = &coredump->snapshot; + + ss->snapshot_time = ktime_get_real(); + ss->boot_time = ktime_get_boottime(); +} + +/** + * xe_devcoredump - Take the required snapshots and initialize coredump device. + * @e: The faulty xe_engine, where the issue was detected. + * + * This function should be called at the crash time within the serialized + * gt_reset. It is skipped if we still have the core dump device available + * with the information of the 'first' snapshot. + */ +void xe_devcoredump(struct xe_engine *e) +{ + struct xe_device *xe = gt_to_xe(e->gt); + struct xe_devcoredump *coredump = &xe->devcoredump; + + if (coredump->captured) { + drm_dbg(&xe->drm, "Multiple hangs are occurring, but only the first snapshot was taken\n"); + return; + } + + coredump->captured = true; + devcoredump_snapshot(coredump, e); + + drm_info(&xe->drm, "Xe device coredump has been created\n"); + drm_info(&xe->drm, "Check your /sys/class/drm/card%d/device/devcoredump/data\n", + xe->drm.primary->index); + + dev_coredumpm(xe->drm.dev, THIS_MODULE, coredump, 0, GFP_KERNEL, + xe_devcoredump_read, xe_devcoredump_free); +} +#endif diff --git a/drivers/gpu/drm/xe/xe_devcoredump.h b/drivers/gpu/drm/xe/xe_devcoredump.h new file mode 100644 index 000000000000..854882129227 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_devcoredump.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_DEVCOREDUMP_H_ +#define _XE_DEVCOREDUMP_H_ + +struct xe_device; +struct xe_engine; + +#ifdef CONFIG_DEV_COREDUMP +void xe_devcoredump(struct xe_engine *e); +#else +static inline void xe_devcoredump(struct xe_engine *e) +{ +} +#endif + +#endif diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h new file mode 100644 index 000000000000..52bd27ca1036 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_DEVCOREDUMP_TYPES_H_ +#define _XE_DEVCOREDUMP_TYPES_H_ + +#include +#include + +struct xe_device; + +/** + * struct xe_devcoredump_snapshot - Crash snapshot + * + * This struct contains all the useful information quickly captured at the time + * of the crash. So, any subsequent reads of the coredump points to a data that + * shows the state of the GPU of when the issue has happened. + */ +struct xe_devcoredump_snapshot { + /** @snapshot_time: Time of this capture. */ + ktime_t snapshot_time; + /** @boot_time: Relative boot time so the uptime can be calculated. */ + ktime_t boot_time; +}; + +/** + * struct xe_devcoredump - Xe devcoredump main structure + * + * This struct represents the live and active dev_coredump node. + * It is created/populated at the time of a crash/error. Then it + * is read later when user access the device coredump data file + * for reading the information. + */ +struct xe_devcoredump { + /** @xe: Xe device. */ + struct xe_device *xe; + /** @captured: The snapshot of the first hang has already been taken. */ + bool captured; + /** @snapshot: Snapshot is captured at time of the first crash */ + struct xe_devcoredump_snapshot snapshot; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index f3cf5a4e5ab2..91edbe4a3730 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -12,6 +12,7 @@ #include #include +#include "xe_devcoredump_types.h" #include "xe_gt_types.h" #include "xe_platform_types.h" #include "xe_step_types.h" @@ -49,6 +50,9 @@ struct xe_device { /** @drm: drm device */ struct drm_device drm; + /** @devcoredump: device coredump */ + struct xe_devcoredump devcoredump; + /** @info: device info */ struct intel_device_info { /** @graphics_name: graphics IP name */ diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index d0b48c885fda..55b51ff791b8 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -14,6 +14,7 @@ #include #include "regs/xe_lrc_layout.h" +#include "xe_devcoredump.h" #include "xe_device.h" #include "xe_engine.h" #include "xe_force_wake.h" @@ -800,6 +801,7 @@ guc_engine_timedout_job(struct drm_sched_job *drm_job) drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx", xe_sched_job_seqno(job), e->guc->id, e->flags); simple_error_capture(e); + xe_devcoredump(e); } else { drm_dbg(&xe->drm, "Timedout signaled job: seqno=%u, guc_id=%d, flags=0x%lx", xe_sched_job_seqno(job), e->guc->id, e->flags); -- cgit v1.2.3 From 656d29506ca89b4af1d2380ff4cab15f40ae9e19 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 16 May 2023 10:54:07 -0400 Subject: drm/xe: Do not take any action if our device was removed. Unfortunately devcoredump infrastructure does not provide and interface for us to force the device removal upon the pci_remove time of our device. The devcoredump is linked at the device level, so when in use it will prevent the module removal, but it doesn't prevent the call of the pci_remove callback. This callback cannot fail anyway and we end up clearing and freeing the entire pci device. Hence, after we removed the pci device, we shouldn't allow any read or free operations to avoid segmentation fault. Signed-off-by: Rodrigo Vivi Reviewed-by: Matthew Brost --- drivers/gpu/drm/xe/xe_devcoredump.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index 561db73a3c8c..00b9cc44c773 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -57,6 +57,10 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, struct drm_print_iterator iter; struct timespec64 ts; + /* Our device is gone already... */ + if (!data || !coredump_to_xe(coredump)) + return -ENODEV; + iter.data = buffer; iter.offset = 0; iter.start = offset; @@ -81,6 +85,10 @@ static void xe_devcoredump_free(void *data) { struct xe_devcoredump *coredump = data; + /* Our device is gone. Nothing to do... */ + if (!data || !coredump_to_xe(coredump)) + return; + coredump->captured = false; drm_info(&coredump_to_xe(coredump)->drm, "Xe device coredump has been deleted.\n"); -- cgit v1.2.3 From a7ca8157ec7b59b597ba47cb98eaa82cb0b1d4af Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 16 May 2023 10:54:08 -0400 Subject: drm/xe: Extract non mapped regions out of GuC CTB into its own struct. No functional change here. The goal is to have a clear split between the mapped portions of the CTB and the static information, so we can easily capture snapshots that will be used for later read out with the devcoredump infrastructure. Signed-off-by: Rodrigo Vivi Reviewed-by: Matthew Brost --- drivers/gpu/drm/xe/xe_guc_ct.c | 155 ++++++++++++++++++----------------- drivers/gpu/drm/xe/xe_guc_ct_types.h | 20 +++-- 2 files changed, 95 insertions(+), 80 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 9055ff133a7c..e16e5fe37ed4 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -172,13 +172,14 @@ int xe_guc_ct_init(struct xe_guc_ct *ct) static void guc_ct_ctb_h2g_init(struct xe_device *xe, struct guc_ctb *h2g, struct iosys_map *map) { - h2g->size = CTB_H2G_BUFFER_SIZE / sizeof(u32); - h2g->resv_space = 0; - h2g->tail = 0; - h2g->head = 0; - h2g->space = CIRC_SPACE(h2g->tail, h2g->head, h2g->size) - - h2g->resv_space; - h2g->broken = false; + h2g->info.size = CTB_H2G_BUFFER_SIZE / sizeof(u32); + h2g->info.resv_space = 0; + h2g->info.tail = 0; + h2g->info.head = 0; + h2g->info.space = CIRC_SPACE(h2g->info.tail, h2g->info.head, + h2g->info.size) - + h2g->info.resv_space; + h2g->info.broken = false; h2g->desc = *map; xe_map_memset(xe, &h2g->desc, 0, 0, sizeof(struct guc_ct_buffer_desc)); @@ -189,13 +190,14 @@ static void guc_ct_ctb_h2g_init(struct xe_device *xe, struct guc_ctb *h2g, static void guc_ct_ctb_g2h_init(struct xe_device *xe, struct guc_ctb *g2h, struct iosys_map *map) { - g2h->size = CTB_G2H_BUFFER_SIZE / sizeof(u32); - g2h->resv_space = G2H_ROOM_BUFFER_SIZE / sizeof(u32); - g2h->head = 0; - g2h->tail = 0; - g2h->space = CIRC_SPACE(g2h->tail, g2h->head, g2h->size) - - g2h->resv_space; - g2h->broken = false; + g2h->info.size = CTB_G2H_BUFFER_SIZE / sizeof(u32); + g2h->info.resv_space = G2H_ROOM_BUFFER_SIZE / sizeof(u32); + g2h->info.head = 0; + g2h->info.tail = 0; + g2h->info.space = CIRC_SPACE(g2h->info.tail, g2h->info.head, + g2h->info.size) - + g2h->info.resv_space; + g2h->info.broken = false; g2h->desc = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE); xe_map_memset(xe, &g2h->desc, 0, 0, sizeof(struct guc_ct_buffer_desc)); @@ -212,7 +214,7 @@ static int guc_ct_ctb_h2g_register(struct xe_guc_ct *ct) desc_addr = xe_bo_ggtt_addr(ct->bo); ctb_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE * 2; - size = ct->ctbs.h2g.size * sizeof(u32); + size = ct->ctbs.h2g.info.size * sizeof(u32); err = xe_guc_self_cfg64(guc, GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR_KEY, @@ -240,7 +242,7 @@ static int guc_ct_ctb_g2h_register(struct xe_guc_ct *ct) desc_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE; ctb_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE * 2 + CTB_H2G_BUFFER_SIZE; - size = ct->ctbs.g2h.size * sizeof(u32); + size = ct->ctbs.g2h.info.size * sizeof(u32); err = xe_guc_self_cfg64(guc, GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR_KEY, @@ -329,11 +331,12 @@ static bool h2g_has_room(struct xe_guc_ct *ct, u32 cmd_len) lockdep_assert_held(&ct->lock); - if (cmd_len > h2g->space) { - h2g->head = desc_read(ct_to_xe(ct), h2g, head); - h2g->space = CIRC_SPACE(h2g->tail, h2g->head, h2g->size) - - h2g->resv_space; - if (cmd_len > h2g->space) + if (cmd_len > h2g->info.space) { + h2g->info.head = desc_read(ct_to_xe(ct), h2g, head); + h2g->info.space = CIRC_SPACE(h2g->info.tail, h2g->info.head, + h2g->info.size) - + h2g->info.resv_space; + if (cmd_len > h2g->info.space) return false; } @@ -344,7 +347,7 @@ static bool g2h_has_room(struct xe_guc_ct *ct, u32 g2h_len) { lockdep_assert_held(&ct->lock); - return ct->ctbs.g2h.space > g2h_len; + return ct->ctbs.g2h.info.space > g2h_len; } static int has_room(struct xe_guc_ct *ct, u32 cmd_len, u32 g2h_len) @@ -360,16 +363,16 @@ static int has_room(struct xe_guc_ct *ct, u32 cmd_len, u32 g2h_len) static void h2g_reserve_space(struct xe_guc_ct *ct, u32 cmd_len) { lockdep_assert_held(&ct->lock); - ct->ctbs.h2g.space -= cmd_len; + ct->ctbs.h2g.info.space -= cmd_len; } static void g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h) { - XE_BUG_ON(g2h_len > ct->ctbs.g2h.space); + XE_BUG_ON(g2h_len > ct->ctbs.g2h.info.space); if (g2h_len) { spin_lock_irq(&ct->fast_lock); - ct->ctbs.g2h.space -= g2h_len; + ct->ctbs.g2h.info.space -= g2h_len; ct->g2h_outstanding += num_g2h; spin_unlock_irq(&ct->fast_lock); } @@ -378,10 +381,10 @@ static void g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h) static void __g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len) { lockdep_assert_held(&ct->fast_lock); - XE_WARN_ON(ct->ctbs.g2h.space + g2h_len > - ct->ctbs.g2h.size - ct->ctbs.g2h.resv_space); + XE_WARN_ON(ct->ctbs.g2h.info.space + g2h_len > + ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space); - ct->ctbs.g2h.space += g2h_len; + ct->ctbs.g2h.info.space += g2h_len; --ct->g2h_outstanding; } @@ -400,20 +403,21 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len, u32 cmd[GUC_CTB_MSG_MAX_LEN / sizeof(u32)]; u32 cmd_len = len + GUC_CTB_HDR_LEN; u32 cmd_idx = 0, i; - u32 tail = h2g->tail; + u32 tail = h2g->info.tail; struct iosys_map map = IOSYS_MAP_INIT_OFFSET(&h2g->cmds, tail * sizeof(u32)); lockdep_assert_held(&ct->lock); XE_BUG_ON(len * sizeof(u32) > GUC_CTB_MSG_MAX_LEN); - XE_BUG_ON(tail > h2g->size); + XE_BUG_ON(tail > h2g->info.size); /* Command will wrap, zero fill (NOPs), return and check credits again */ - if (tail + cmd_len > h2g->size) { - xe_map_memset(xe, &map, 0, 0, (h2g->size - tail) * sizeof(u32)); - h2g_reserve_space(ct, (h2g->size - tail)); - h2g->tail = 0; - desc_write(xe, h2g, tail, h2g->tail); + if (tail + cmd_len > h2g->info.size) { + xe_map_memset(xe, &map, 0, 0, + (h2g->info.size - tail) * sizeof(u32)); + h2g_reserve_space(ct, (h2g->info.size - tail)); + h2g->info.tail = 0; + desc_write(xe, h2g, tail, h2g->info.tail); return -EAGAIN; } @@ -445,11 +449,11 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len, xe_device_wmb(ct_to_xe(ct)); /* Update local copies */ - h2g->tail = (tail + cmd_len) % h2g->size; + h2g->info.tail = (tail + cmd_len) % h2g->info.size; h2g_reserve_space(ct, cmd_len); /* Update descriptor */ - desc_write(xe, h2g, tail, h2g->tail); + desc_write(xe, h2g, tail, h2g->info.tail); return 0; } @@ -466,7 +470,7 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, XE_BUG_ON(!g2h_len && num_g2h); lockdep_assert_held(&ct->lock); - if (unlikely(ct->ctbs.h2g.broken)) { + if (unlikely(ct->ctbs.h2g.info.broken)) { ret = -EPIPE; goto out; } @@ -554,8 +558,9 @@ try_again: if (sleep_period_ms == 1024) goto broken; - trace_xe_guc_ct_h2g_flow_control(h2g->head, h2g->tail, - h2g->size, h2g->space, + trace_xe_guc_ct_h2g_flow_control(h2g->info.head, h2g->info.tail, + h2g->info.size, + h2g->info.space, len + GUC_CTB_HDR_LEN); msleep(sleep_period_ms); sleep_period_ms <<= 1; @@ -565,15 +570,16 @@ try_again: struct xe_device *xe = ct_to_xe(ct); struct guc_ctb *g2h = &ct->ctbs.g2h; - trace_xe_guc_ct_g2h_flow_control(g2h->head, + trace_xe_guc_ct_g2h_flow_control(g2h->info.head, desc_read(xe, g2h, tail), - g2h->size, g2h->space, + g2h->info.size, + g2h->info.space, g2h_fence ? GUC_CTB_HXG_MSG_MAX_LEN : g2h_len); #define g2h_avail(ct) \ - (desc_read(ct_to_xe(ct), (&ct->ctbs.g2h), tail) != ct->ctbs.g2h.head) + (desc_read(ct_to_xe(ct), (&ct->ctbs.g2h), tail) != ct->ctbs.g2h.info.head) if (!wait_event_timeout(ct->wq, !ct->g2h_outstanding || g2h_avail(ct), HZ)) goto broken; @@ -590,7 +596,7 @@ try_again: broken: drm_err(drm, "No forward process on H2G, reset required"); xe_guc_ct_print(ct, &p); - ct->ctbs.h2g.broken = true; + ct->ctbs.h2g.info.broken = true; return -EDEADLK; } @@ -656,7 +662,7 @@ static bool retry_failure(struct xe_guc_ct *ct, int ret) return false; #define ct_alive(ct) \ - (ct->enabled && !ct->ctbs.h2g.broken && !ct->ctbs.g2h.broken) + (ct->enabled && !ct->ctbs.h2g.info.broken && !ct->ctbs.g2h.info.broken) if (!wait_event_interruptible_timeout(ct->wq, ct_alive(ct), HZ * 5)) return false; #undef ct_alive @@ -821,7 +827,7 @@ static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) drm_err(&xe->drm, "G2H channel broken on read, origin=%d, reset required\n", origin); - ct->ctbs.g2h.broken = true; + ct->ctbs.g2h.info.broken = true; return -EPROTO; } @@ -840,7 +846,7 @@ static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) drm_err(&xe->drm, "G2H channel broken on read, type=%d, reset required\n", type); - ct->ctbs.g2h.broken = true; + ct->ctbs.g2h.info.broken = true; ret = -EOPNOTSUPP; } @@ -919,36 +925,37 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path) if (!ct->enabled) return -ENODEV; - if (g2h->broken) + if (g2h->info.broken) return -EPIPE; /* Calculate DW available to read */ tail = desc_read(xe, g2h, tail); - avail = tail - g2h->head; + avail = tail - g2h->info.head; if (unlikely(avail == 0)) return 0; if (avail < 0) - avail += g2h->size; + avail += g2h->info.size; /* Read header */ - xe_map_memcpy_from(xe, msg, &g2h->cmds, sizeof(u32) * g2h->head, sizeof(u32)); + xe_map_memcpy_from(xe, msg, &g2h->cmds, sizeof(u32) * g2h->info.head, + sizeof(u32)); len = FIELD_GET(GUC_CTB_MSG_0_NUM_DWORDS, msg[0]) + GUC_CTB_MSG_MIN_LEN; if (len > avail) { drm_err(&xe->drm, "G2H channel broken on read, avail=%d, len=%d, reset required\n", avail, len); - g2h->broken = true; + g2h->info.broken = true; return -EPROTO; } - head = (g2h->head + 1) % g2h->size; + head = (g2h->info.head + 1) % g2h->info.size; avail = len - 1; /* Read G2H message */ - if (avail + head > g2h->size) { - u32 avail_til_wrap = g2h->size - head; + if (avail + head > g2h->info.size) { + u32 avail_til_wrap = g2h->info.size - head; xe_map_memcpy_from(xe, msg + 1, &g2h->cmds, sizeof(u32) * head, @@ -983,8 +990,8 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path) } /* Update local / descriptor header */ - g2h->head = (head + avail) % g2h->size; - desc_write(xe, g2h, head, g2h->head); + g2h->info.head = (head + avail) % g2h->info.size; + desc_write(xe, g2h, head, g2h->info.head); return len; } @@ -1093,12 +1100,12 @@ static void guc_ct_ctb_print(struct xe_device *xe, struct guc_ctb *ctb, { u32 head, tail; - drm_printf(p, "\tsize: %d\n", ctb->size); - drm_printf(p, "\tresv_space: %d\n", ctb->resv_space); - drm_printf(p, "\thead: %d\n", ctb->head); - drm_printf(p, "\ttail: %d\n", ctb->tail); - drm_printf(p, "\tspace: %d\n", ctb->space); - drm_printf(p, "\tbroken: %d\n", ctb->broken); + drm_printf(p, "\tsize: %d\n", ctb->info.size); + drm_printf(p, "\tresv_space: %d\n", ctb->info.resv_space); + drm_printf(p, "\thead: %d\n", ctb->info.head); + drm_printf(p, "\ttail: %d\n", ctb->info.tail); + drm_printf(p, "\tspace: %d\n", ctb->info.space); + drm_printf(p, "\tbroken: %d\n", ctb->info.broken); head = desc_read(xe, ctb, head); tail = desc_read(xe, ctb, tail); @@ -1114,7 +1121,7 @@ static void guc_ct_ctb_print(struct xe_device *xe, struct guc_ctb *ctb, drm_printf(p, "\tcmd[%d]: 0x%08x\n", head, xe_map_rd(xe, &map, 0, u32)); ++head; - if (head == ctb->size) { + if (head == ctb->info.size) { head = 0; map = ctb->cmds; } else { @@ -1168,12 +1175,12 @@ void xe_guc_ct_selftest(struct xe_guc_ct *ct, struct drm_printer *p) if (!ret) { xe_guc_ct_irq_handler(ct); msleep(200); - if (g2h->space != - CIRC_SPACE(0, 0, g2h->size) - g2h->resv_space) { + if (g2h->info.space != + CIRC_SPACE(0, 0, g2h->info.size) - g2h->info.resv_space) { drm_printf(p, "Mismatch on space %d, %d\n", - g2h->space, - CIRC_SPACE(0, 0, g2h->size) - - g2h->resv_space); + g2h->info.space, + CIRC_SPACE(0, 0, g2h->info.size) - + g2h->info.resv_space); ret = -EIO; } if (ct->g2h_outstanding) { @@ -1185,12 +1192,12 @@ void xe_guc_ct_selftest(struct xe_guc_ct *ct, struct drm_printer *p) /* Check failure path for blocking CTs too */ xe_guc_ct_send_block(ct, bad_action, ARRAY_SIZE(bad_action)); - if (g2h->space != - CIRC_SPACE(0, 0, g2h->size) - g2h->resv_space) { + if (g2h->info.space != + CIRC_SPACE(0, 0, g2h->info.size) - g2h->info.resv_space) { drm_printf(p, "Mismatch on space %d, %d\n", - g2h->space, - CIRC_SPACE(0, 0, g2h->size) - - g2h->resv_space); + g2h->info.space, + CIRC_SPACE(0, 0, g2h->info.size) - + g2h->info.resv_space); ret = -EIO; } if (ct->g2h_outstanding) { diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h index fd27dacf00c5..64e3dd14d4b2 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct_types.h +++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h @@ -19,13 +19,9 @@ struct xe_bo; /** - * struct guc_ctb - GuC command transport buffer (CTB) + * struct guc_ctb_info - GuC command transport buffer (CTB) info */ -struct guc_ctb { - /** @desc: dma buffer map for CTB descriptor */ - struct iosys_map desc; - /** @cmds: dma buffer map for CTB commands */ - struct iosys_map cmds; +struct guc_ctb_info { /** @size: size of CTB commands (DW) */ u32 size; /** @resv_space: reserved space of CTB commands (DW) */ @@ -40,6 +36,18 @@ struct guc_ctb { bool broken; }; +/** + * struct guc_ctb - GuC command transport buffer (CTB) + */ +struct guc_ctb { + /** @desc: dma buffer map for CTB descriptor */ + struct iosys_map desc; + /** @cmds: dma buffer map for CTB commands */ + struct iosys_map cmds; + /** @info: CTB info */ + struct guc_ctb_info info; +}; + /** * struct xe_guc_ct - GuC command transport (CT) layer * -- cgit v1.2.3 From 513260dfd150a49ad117f1b7c50097a1d74c0085 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 16 May 2023 10:54:09 -0400 Subject: drm/xe: Convert GuC CT print to snapshot capture and print. The goal is to allow for a snapshot capture to be taken at the time of the crash, while the print out can happen at a later time through the exposed devcoredump virtual device. v2: Handle memory allocation failures. (Matthew) Do not use GFP_ATOMIC on cases like debugfs prints. (Matthew) v3: checkpatch fixes v4: Do not use atomic in the g2h_worker_func (Matthew) Signed-off-by: Rodrigo Vivi Cc: Matthew Brost Reviewed-by: Matthew Brost --- drivers/gpu/drm/xe/xe_guc.c | 2 +- drivers/gpu/drm/xe/xe_guc_ct.c | 166 ++++++++++++++++++++++++++++++----- drivers/gpu/drm/xe/xe_guc_ct.h | 8 +- drivers/gpu/drm/xe/xe_guc_ct_types.h | 26 ++++++ drivers/gpu/drm/xe/xe_guc_submit.c | 2 +- 5 files changed, 179 insertions(+), 25 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index eb4af4c71124..b72407e24d09 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -857,6 +857,6 @@ void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p) xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); - xe_guc_ct_print(&guc->ct, p); + xe_guc_ct_print(&guc->ct, p, false); xe_guc_submit_print(guc, p); } diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index e16e5fe37ed4..e8c2edb1359d 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -595,7 +595,7 @@ try_again: broken: drm_err(drm, "No forward process on H2G, reset required"); - xe_guc_ct_print(ct, &p); + xe_guc_ct_print(ct, &p, true); ct->ctbs.h2g.info.broken = true; return -EDEADLK; @@ -1088,38 +1088,40 @@ static void g2h_worker_func(struct work_struct *w) struct drm_device *drm = &ct_to_xe(ct)->drm; struct drm_printer p = drm_info_printer(drm->dev); - xe_guc_ct_print(ct, &p); + xe_guc_ct_print(ct, &p, false); kick_reset(ct); } } while (ret == 1); xe_device_mem_access_put(ct_to_xe(ct)); } -static void guc_ct_ctb_print(struct xe_device *xe, struct guc_ctb *ctb, - struct drm_printer *p) +static void guc_ctb_snapshot_capture(struct xe_device *xe, struct guc_ctb *ctb, + struct guc_ctb_snapshot *snapshot, + bool atomic) { u32 head, tail; - drm_printf(p, "\tsize: %d\n", ctb->info.size); - drm_printf(p, "\tresv_space: %d\n", ctb->info.resv_space); - drm_printf(p, "\thead: %d\n", ctb->info.head); - drm_printf(p, "\ttail: %d\n", ctb->info.tail); - drm_printf(p, "\tspace: %d\n", ctb->info.space); - drm_printf(p, "\tbroken: %d\n", ctb->info.broken); + xe_map_memcpy_from(xe, &snapshot->desc, &ctb->desc, 0, + sizeof(struct guc_ct_buffer_desc)); + memcpy(&snapshot->info, &ctb->info, sizeof(struct guc_ctb_info)); - head = desc_read(xe, ctb, head); - tail = desc_read(xe, ctb, tail); - drm_printf(p, "\thead (memory): %d\n", head); - drm_printf(p, "\ttail (memory): %d\n", tail); - drm_printf(p, "\tstatus (memory): 0x%x\n", desc_read(xe, ctb, status)); + snapshot->cmds = kmalloc_array(ctb->info.size, sizeof(u32), + atomic ? GFP_ATOMIC : GFP_KERNEL); + + if (!snapshot->cmds) { + drm_err(&xe->drm, "Skipping CTB commands snapshot. Only CTB info will be available.\n"); + return; + } + + head = snapshot->desc.head; + tail = snapshot->desc.tail; if (head != tail) { struct iosys_map map = IOSYS_MAP_INIT_OFFSET(&ctb->cmds, head * sizeof(u32)); while (head != tail) { - drm_printf(p, "\tcmd[%d]: 0x%08x\n", head, - xe_map_rd(xe, &map, 0, u32)); + snapshot->cmds[head] = xe_map_rd(xe, &map, 0, u32); ++head; if (head == ctb->info.size) { head = 0; @@ -1131,20 +1133,140 @@ static void guc_ct_ctb_print(struct xe_device *xe, struct guc_ctb *ctb, } } -void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p) +static void guc_ctb_snapshot_print(struct guc_ctb_snapshot *snapshot, + struct drm_printer *p) +{ + u32 head, tail; + + drm_printf(p, "\tsize: %d\n", snapshot->info.size); + drm_printf(p, "\tresv_space: %d\n", snapshot->info.space); + drm_printf(p, "\thead: %d\n", snapshot->info.head); + drm_printf(p, "\ttail: %d\n", snapshot->info.tail); + drm_printf(p, "\tspace: %d\n", snapshot->info.space); + drm_printf(p, "\tbroken: %d\n", snapshot->info.broken); + drm_printf(p, "\thead (memory): %d\n", snapshot->desc.head); + drm_printf(p, "\ttail (memory): %d\n", snapshot->desc.tail); + drm_printf(p, "\tstatus (memory): 0x%x\n", snapshot->desc.status); + + if (!snapshot->cmds) + return; + + head = snapshot->desc.head; + tail = snapshot->desc.tail; + + while (head != tail) { + drm_printf(p, "\tcmd[%d]: 0x%08x\n", head, + snapshot->cmds[head]); + ++head; + if (head == snapshot->info.size) + head = 0; + } +} + +static void guc_ctb_snapshot_free(struct guc_ctb_snapshot *snapshot) { + kfree(snapshot->cmds); +} + +/** + * xe_guc_ct_snapshot_capture - Take a quick snapshot of the CT state. + * @ct: GuC CT object. + * @atomic: Boolean to indicate if this is called from atomic context like + * reset or CTB handler or from some regular path like debugfs. + * + * This can be printed out in a later stage like during dev_coredump + * analysis. + * + * Returns: a GuC CT snapshot object that must be freed by the caller + * by using `xe_guc_ct_snapshot_free`. + */ +struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct, + bool atomic) +{ + struct xe_device *xe = ct_to_xe(ct); + struct xe_guc_ct_snapshot *snapshot; + + snapshot = kzalloc(sizeof(*snapshot), + atomic ? GFP_ATOMIC : GFP_KERNEL); + + if (!snapshot) { + drm_err(&xe->drm, "Skipping CTB snapshot entirely.\n"); + return NULL; + } + if (ct->enabled) { + snapshot->ct_enabled = true; + guc_ctb_snapshot_capture(xe, &ct->ctbs.h2g, + &snapshot->h2g, atomic); + guc_ctb_snapshot_capture(xe, &ct->ctbs.g2h, + &snapshot->g2h, atomic); + } + + return snapshot; +} + +/** + * xe_guc_ct_snapshot_print - Print out a given GuC CT snapshot. + * @snapshot: GuC CT snapshot object. + * @p: drm_printer where it will be printed out. + * + * This function prints out a given GuC CT snapshot object. + */ +void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot, + struct drm_printer *p) +{ + if (!snapshot) + return; + + if (snapshot->ct_enabled) { drm_puts(p, "\nH2G CTB (all sizes in DW):\n"); - guc_ct_ctb_print(ct_to_xe(ct), &ct->ctbs.h2g, p); + guc_ctb_snapshot_print(&snapshot->h2g, p); drm_puts(p, "\nG2H CTB (all sizes in DW):\n"); - guc_ct_ctb_print(ct_to_xe(ct), &ct->ctbs.g2h, p); - drm_printf(p, "\tg2h outstanding: %d\n", ct->g2h_outstanding); + guc_ctb_snapshot_print(&snapshot->g2h, p); + + drm_printf(p, "\tg2h outstanding: %d\n", + snapshot->g2h_outstanding); } else { drm_puts(p, "\nCT disabled\n"); } } +/** + * xe_guc_ct_snapshot_free - Free all allocated objects for a given snapshot. + * @snapshot: GuC CT snapshot object. + * + * This function free all the memory that needed to be allocated at capture + * time. + */ +void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot) +{ + if (!snapshot) + return; + + guc_ctb_snapshot_free(&snapshot->h2g); + guc_ctb_snapshot_free(&snapshot->g2h); + kfree(snapshot); +} + +/** + * xe_guc_ct_print - GuC CT Print. + * @ct: GuC CT. + * @p: drm_printer where it will be printed out. + * @atomic: Boolean to indicate if this is called from atomic context like + * reset or CTB handler or from some regular path like debugfs. + * + * This function quickly capture a snapshot and immediately print it out. + */ +void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool atomic) +{ + struct xe_guc_ct_snapshot *snapshot; + + snapshot = xe_guc_ct_snapshot_capture(ct, atomic); + xe_guc_ct_snapshot_print(snapshot, p); + xe_guc_ct_snapshot_free(snapshot); +} + #ifdef XE_GUC_CT_SELFTEST /* * Disable G2H processing in IRQ handler to force xe_guc_ct_send to enter flow @@ -1166,7 +1288,7 @@ void xe_guc_ct_selftest(struct xe_guc_ct *ct, struct drm_printer *p) ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 4, 1); if (ret) { drm_printf(p, "Aborted pass %d, ret %d\n", i, ret); - xe_guc_ct_print(ct, p); + xe_guc_ct_print(ct, p, true); break; } } diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h index 49fb74f91e4d..3e04ee64652c 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.h +++ b/drivers/gpu/drm/xe/xe_guc_ct.h @@ -13,9 +13,15 @@ struct drm_printer; int xe_guc_ct_init(struct xe_guc_ct *ct); int xe_guc_ct_enable(struct xe_guc_ct *ct); void xe_guc_ct_disable(struct xe_guc_ct *ct); -void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p); void xe_guc_ct_fast_path(struct xe_guc_ct *ct); +struct xe_guc_ct_snapshot * +xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct, bool atomic); +void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot, + struct drm_printer *p); +void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot); +void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool atomic); + static inline void xe_guc_ct_irq_handler(struct xe_guc_ct *ct) { wake_up_all(&ct->wq); diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h index 64e3dd14d4b2..93046d95b009 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct_types.h +++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h @@ -48,6 +48,32 @@ struct guc_ctb { struct guc_ctb_info info; }; +/** + * struct guc_ctb_snapshot - GuC command transport buffer (CTB) snapshot + */ +struct guc_ctb_snapshot { + /** @desc: snapshot of the CTB descriptor */ + struct guc_ct_buffer_desc desc; + /** @cmds: snapshot of the CTB commands */ + u32 *cmds; + /** @info: snapshot of the CTB info */ + struct guc_ctb_info info; +}; + +/** + * struct xe_guc_ct_snapshot - GuC command transport (CT) snapshot + */ +struct xe_guc_ct_snapshot { + /** @ct_enabled: CT enabled info at capture time. */ + bool ct_enabled; + /** @g2h_outstanding: G2H outstanding info at the capture time */ + u32 g2h_outstanding; + /** @g2h: G2H CTB snapshot */ + struct guc_ctb_snapshot g2h; + /** @h2g: H2G CTB snapshot */ + struct guc_ctb_snapshot h2g; +}; + /** * struct xe_guc_ct - GuC command transport (CT) layer * diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 55b51ff791b8..f587aa48c5bd 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -764,7 +764,7 @@ static void simple_error_capture(struct xe_engine *e) } xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL); - xe_guc_ct_print(&guc->ct, &p); + xe_guc_ct_print(&guc->ct, &p, true); guc_engine_print(e, &p); for_each_hw_engine(hwe, guc_to_gt(guc), id) { if (hwe->class != e->hwe->class || -- cgit v1.2.3 From 5ed53446325475514b78f9072a2f85ca24fc9548 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 16 May 2023 10:54:10 -0400 Subject: drm/xe: Add GuC CT snapshot to xe_devcoredump. Let's start to move our existent logs to devcoredump one by one. Any format change should come on follow-up work. v2: Rebase and add the dma_fence locking annotation here. Signed-off-by: Rodrigo Vivi Reviewed-by: Matthew Brost --- drivers/gpu/drm/xe/xe_devcoredump.c | 17 +++++++++++++++++ drivers/gpu/drm/xe/xe_devcoredump_types.h | 4 ++++ 2 files changed, 21 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index 00b9cc44c773..b0e3db148ce2 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -11,6 +11,7 @@ #include "xe_engine.h" #include "xe_gt.h" +#include "xe_guc_ct.h" /** * DOC: Xe device coredump @@ -48,6 +49,11 @@ static struct xe_device *coredump_to_xe(const struct xe_devcoredump *coredump) return container_of(coredump, struct xe_device, devcoredump); } +static struct xe_guc *engine_to_guc(struct xe_engine *e) +{ + return &e->gt->uc.guc; +} + static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, size_t count, void *data, size_t datalen) { @@ -78,6 +84,9 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, ts = ktime_to_timespec64(ss->boot_time); drm_printf(&p, "Uptime: %lld.%09ld\n", ts.tv_sec, ts.tv_nsec); + drm_printf(&p, "\n**** GuC CT ****\n"); + xe_guc_ct_snapshot_print(coredump->snapshot.ct, &p); + return count - iter.remain; } @@ -89,6 +98,8 @@ static void xe_devcoredump_free(void *data) if (!data || !coredump_to_xe(coredump)) return; + xe_guc_ct_snapshot_free(coredump->snapshot.ct); + coredump->captured = false; drm_info(&coredump_to_xe(coredump)->drm, "Xe device coredump has been deleted.\n"); @@ -98,9 +109,15 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, struct xe_engine *e) { struct xe_devcoredump_snapshot *ss = &coredump->snapshot; + struct xe_guc *guc = engine_to_guc(e); + bool cookie; ss->snapshot_time = ktime_get_real(); ss->boot_time = ktime_get_boottime(); + + cookie = dma_fence_begin_signalling(); + coredump->snapshot.ct = xe_guc_ct_snapshot_capture(&guc->ct, true); + dma_fence_end_signalling(cookie); } /** diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h index 52bd27ca1036..4e3371c7b9c5 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump_types.h +++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h @@ -23,6 +23,10 @@ struct xe_devcoredump_snapshot { ktime_t snapshot_time; /** @boot_time: Relative boot time so the uptime can be calculated. */ ktime_t boot_time; + + /* GuC snapshots */ + /** @ct_snapshot: GuC CT snapshot */ + struct xe_guc_ct_snapshot *ct; }; /** -- cgit v1.2.3 From 1825c492daafc39e2eaeacc0f05372aca4ab6f7f Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 16 May 2023 10:54:11 -0400 Subject: drm/xe: Introduce guc_submit_types.h with relevant structs. These structs and definitions are only used for the guc_submit and they were added specifically for the parallel submission. While doing that also delete the unused struct guc_wq_item. v2: checkpatch fixes. Cc: Matthew Brost Signed-off-by: Rodrigo Vivi Reviewed-by: Matthew Brost --- drivers/gpu/drm/xe/xe_guc_fwif.h | 29 --------------- drivers/gpu/drm/xe/xe_guc_submit.c | 42 ++++++--------------- drivers/gpu/drm/xe/xe_guc_submit_types.h | 64 ++++++++++++++++++++++++++++++++ 3 files changed, 76 insertions(+), 59 deletions(-) create mode 100644 drivers/gpu/drm/xe/xe_guc_submit_types.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h index 20155ba4ef07..27d132ce2087 100644 --- a/drivers/gpu/drm/xe/xe_guc_fwif.h +++ b/drivers/gpu/drm/xe/xe_guc_fwif.h @@ -46,35 +46,6 @@ #define GUC_MAX_ENGINE_CLASSES 16 #define GUC_MAX_INSTANCES_PER_CLASS 32 -/* Work item for submitting workloads into work queue of GuC. */ -#define WQ_STATUS_ACTIVE 1 -#define WQ_STATUS_SUSPENDED 2 -#define WQ_STATUS_CMD_ERROR 3 -#define WQ_STATUS_ENGINE_ID_NOT_USED 4 -#define WQ_STATUS_SUSPENDED_FROM_RESET 5 -#define WQ_TYPE_NOOP 0x4 -#define WQ_TYPE_MULTI_LRC 0x5 -#define WQ_TYPE_MASK GENMASK(7, 0) -#define WQ_LEN_MASK GENMASK(26, 16) - -#define WQ_GUC_ID_MASK GENMASK(15, 0) -#define WQ_RING_TAIL_MASK GENMASK(28, 18) - -struct guc_wq_item { - u32 header; - u32 context_desc; - u32 submit_element_info; - u32 fence_id; -} __packed; - -struct guc_sched_wq_desc { - u32 head; - u32 tail; - u32 error_offset; - u32 wq_status; - u32 reserved[28]; -} __packed; - /* Helper for context registration H2G */ struct guc_ctxt_registration_info { u32 flags; diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index f587aa48c5bd..914e9ffef43f 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -23,6 +23,7 @@ #include "xe_guc.h" #include "xe_guc_ct.h" #include "xe_guc_engine_types.h" +#include "xe_guc_submit_types.h" #include "xe_hw_engine.h" #include "xe_hw_fence.h" #include "xe_lrc.h" @@ -379,32 +380,12 @@ static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_engine *e) __guc_engine_policy_action_size(&policy), 0, 0); } -#define PARALLEL_SCRATCH_SIZE 2048 -#define WQ_SIZE (PARALLEL_SCRATCH_SIZE / 2) -#define WQ_OFFSET (PARALLEL_SCRATCH_SIZE - WQ_SIZE) -#define CACHELINE_BYTES 64 - -struct sync_semaphore { - u32 semaphore; - u8 unused[CACHELINE_BYTES - sizeof(u32)]; -}; - -struct parallel_scratch { - struct guc_sched_wq_desc wq_desc; - - struct sync_semaphore go; - struct sync_semaphore join[XE_HW_ENGINE_MAX_INSTANCE]; - - u8 unused[WQ_OFFSET - sizeof(struct guc_sched_wq_desc) - - sizeof(struct sync_semaphore) * (XE_HW_ENGINE_MAX_INSTANCE + 1)]; - - u32 wq[WQ_SIZE / sizeof(u32)]; -}; - #define parallel_read(xe_, map_, field_) \ - xe_map_rd_field(xe_, &map_, 0, struct parallel_scratch, field_) + xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ + field_) #define parallel_write(xe_, map_, field_, val_) \ - xe_map_wr_field(xe_, &map_, 0, struct parallel_scratch, field_, val_) + xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ + field_, val_) static void __register_mlrc_engine(struct xe_guc *guc, struct xe_engine *e, @@ -487,13 +468,13 @@ static void register_engine(struct xe_engine *e) struct iosys_map map = xe_lrc_parallel_map(lrc); info.wq_desc_lo = lower_32_bits(ggtt_addr + - offsetof(struct parallel_scratch, wq_desc)); + offsetof(struct guc_submit_parallel_scratch, wq_desc)); info.wq_desc_hi = upper_32_bits(ggtt_addr + - offsetof(struct parallel_scratch, wq_desc)); + offsetof(struct guc_submit_parallel_scratch, wq_desc)); info.wq_base_lo = lower_32_bits(ggtt_addr + - offsetof(struct parallel_scratch, wq[0])); + offsetof(struct guc_submit_parallel_scratch, wq[0])); info.wq_base_hi = upper_32_bits(ggtt_addr + - offsetof(struct parallel_scratch, wq[0])); + offsetof(struct guc_submit_parallel_scratch, wq[0])); info.wq_size = WQ_SIZE; e->guc->wqi_head = 0; @@ -595,8 +576,8 @@ static void wq_item_append(struct xe_engine *e) XE_BUG_ON(i != wqi_size / sizeof(u32)); - iosys_map_incr(&map, offsetof(struct parallel_scratch, - wq[e->guc->wqi_tail / sizeof(u32)])); + iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch, + wq[e->guc->wqi_tail / sizeof(u32)])); xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size); e->guc->wqi_tail += wqi_size; XE_BUG_ON(e->guc->wqi_tail > WQ_SIZE); @@ -1672,6 +1653,7 @@ static void guc_engine_print(struct xe_engine *e, struct drm_printer *p) guc_engine_wq_print(e, p); spin_lock(&sched->base.job_list_lock); + list_for_each_entry(job, &sched->base.pending_list, drm.list) drm_printf(p, "\tJob: seqno=%d, fence=%d, finished=%d\n", xe_sched_job_seqno(job), diff --git a/drivers/gpu/drm/xe/xe_guc_submit_types.h b/drivers/gpu/drm/xe/xe_guc_submit_types.h new file mode 100644 index 000000000000..d23759959be9 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_submit_types.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GUC_SUBMIT_TYPES_H_ +#define _XE_GUC_SUBMIT_TYPES_H_ + +#include "xe_hw_engine_types.h" + +/* Work item for submitting workloads into work queue of GuC. */ +#define WQ_STATUS_ACTIVE 1 +#define WQ_STATUS_SUSPENDED 2 +#define WQ_STATUS_CMD_ERROR 3 +#define WQ_STATUS_ENGINE_ID_NOT_USED 4 +#define WQ_STATUS_SUSPENDED_FROM_RESET 5 +#define WQ_TYPE_NOOP 0x4 +#define WQ_TYPE_MULTI_LRC 0x5 +#define WQ_TYPE_MASK GENMASK(7, 0) +#define WQ_LEN_MASK GENMASK(26, 16) + +#define WQ_GUC_ID_MASK GENMASK(15, 0) +#define WQ_RING_TAIL_MASK GENMASK(28, 18) + +#define PARALLEL_SCRATCH_SIZE 2048 +#define WQ_SIZE (PARALLEL_SCRATCH_SIZE / 2) +#define WQ_OFFSET (PARALLEL_SCRATCH_SIZE - WQ_SIZE) +#define CACHELINE_BYTES 64 + +struct guc_sched_wq_desc { + u32 head; + u32 tail; + u32 error_offset; + u32 wq_status; + u32 reserved[28]; +} __packed; + +struct sync_semaphore { + u32 semaphore; + u8 unused[CACHELINE_BYTES - sizeof(u32)]; +}; + +/** + * Struct guc_submit_parallel_scratch - A scratch shared mapped buffer. + */ +struct guc_submit_parallel_scratch { + /** @wq_desc: Guc scheduler workqueue descriptor */ + struct guc_sched_wq_desc wq_desc; + + /** @go: Go Semaphore */ + struct sync_semaphore go; + /** @join: Joined semaphore for the relevant hw engine instances */ + struct sync_semaphore join[XE_HW_ENGINE_MAX_INSTANCE]; + + /** @unused: Unused/Reserved memory space */ + u8 unused[WQ_OFFSET - sizeof(struct guc_sched_wq_desc) - + sizeof(struct sync_semaphore) * + (XE_HW_ENGINE_MAX_INSTANCE + 1)]; + + /** @wq: Workqueue info */ + u32 wq[WQ_SIZE / sizeof(u32)]; +}; + +#endif -- cgit v1.2.3 From bbdf97c140064975552bedb70b2b4329ab758f0b Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 16 May 2023 10:54:12 -0400 Subject: drm/xe: Convert GuC Engine print to snapshot capture and print. The goal is to allow for a snapshot capture to be taken at the time of the crash, while the print out can happen at a later time through the exposed devcoredump virtual device. v2: Handle memory allocation failures. (Matthew) Do not use GFP_ATOMIC on cases like debugfs prints. (Matthew) v3: checkpatch v4: pending_list allocation needs to be atomic because of the spin_lock. (Matthew) get back to GFP_ATOMIC only. (lockdep). Cc: Matthew Brost Signed-off-by: Rodrigo Vivi Reviewed-by: Matthew Brost --- drivers/gpu/drm/xe/xe_guc_submit.c | 237 ++++++++++++++++++++++++++----- drivers/gpu/drm/xe/xe_guc_submit.h | 10 +- drivers/gpu/drm/xe/xe_guc_submit_types.h | 91 ++++++++++++ 3 files changed, 298 insertions(+), 40 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 914e9ffef43f..3ff133e8463c 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1594,75 +1594,234 @@ int xe_guc_engine_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len) return 0; } -static void guc_engine_wq_print(struct xe_engine *e, struct drm_printer *p) +static void +guc_engine_wq_snapshot_capture(struct xe_engine *e, + struct xe_guc_submit_engine_snapshot *snapshot) { struct xe_guc *guc = engine_to_guc(e); struct xe_device *xe = guc_to_xe(guc); struct iosys_map map = xe_lrc_parallel_map(e->lrc); int i; + snapshot->guc.wqi_head = e->guc->wqi_head; + snapshot->guc.wqi_tail = e->guc->wqi_tail; + snapshot->parallel.wq_desc.head = parallel_read(xe, map, wq_desc.head); + snapshot->parallel.wq_desc.tail = parallel_read(xe, map, wq_desc.tail); + snapshot->parallel.wq_desc.status = parallel_read(xe, map, + wq_desc.wq_status); + + if (snapshot->parallel.wq_desc.head != + snapshot->parallel.wq_desc.tail) { + for (i = snapshot->parallel.wq_desc.head; + i != snapshot->parallel.wq_desc.tail; + i = (i + sizeof(u32)) % WQ_SIZE) + snapshot->parallel.wq[i / sizeof(u32)] = + parallel_read(xe, map, wq[i / sizeof(u32)]); + } +} + +static void +guc_engine_wq_snapshot_print(struct xe_guc_submit_engine_snapshot *snapshot, + struct drm_printer *p) +{ + int i; + drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n", - e->guc->wqi_head, parallel_read(xe, map, wq_desc.head)); + snapshot->guc.wqi_head, snapshot->parallel.wq_desc.head); drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n", - e->guc->wqi_tail, parallel_read(xe, map, wq_desc.tail)); - drm_printf(p, "\tWQ status: %u\n", - parallel_read(xe, map, wq_desc.wq_status)); - if (parallel_read(xe, map, wq_desc.head) != - parallel_read(xe, map, wq_desc.tail)) { - for (i = parallel_read(xe, map, wq_desc.head); - i != parallel_read(xe, map, wq_desc.tail); + snapshot->guc.wqi_tail, snapshot->parallel.wq_desc.tail); + drm_printf(p, "\tWQ status: %u\n", snapshot->parallel.wq_desc.status); + + if (snapshot->parallel.wq_desc.head != + snapshot->parallel.wq_desc.tail) { + for (i = snapshot->parallel.wq_desc.head; + i != snapshot->parallel.wq_desc.tail; i = (i + sizeof(u32)) % WQ_SIZE) drm_printf(p, "\tWQ[%zu]: 0x%08x\n", i / sizeof(u32), - parallel_read(xe, map, wq[i / sizeof(u32)])); + snapshot->parallel.wq[i / sizeof(u32)]); } } -static void guc_engine_print(struct xe_engine *e, struct drm_printer *p) +/** + * xe_guc_engine_snapshot_capture - Take a quick snapshot of the GuC Engine. + * @e: Xe Engine. + * + * This can be printed out in a later stage like during dev_coredump + * analysis. + * + * Returns: a GuC Submit Engine snapshot object that must be freed by the + * caller, using `xe_guc_engine_snapshot_free`. + */ +struct xe_guc_submit_engine_snapshot * +xe_guc_engine_snapshot_capture(struct xe_engine *e) { + struct xe_guc *guc = engine_to_guc(e); + struct xe_device *xe = guc_to_xe(guc); struct xe_gpu_scheduler *sched = &e->guc->sched; struct xe_sched_job *job; + struct xe_guc_submit_engine_snapshot *snapshot; + int i; + + snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC); + + if (!snapshot) { + drm_err(&xe->drm, "Skipping GuC Engine snapshot entirely.\n"); + return NULL; + } + + snapshot->guc.id = e->guc->id; + memcpy(&snapshot->name, &e->name, sizeof(snapshot->name)); + snapshot->class = e->class; + snapshot->logical_mask = e->logical_mask; + snapshot->width = e->width; + snapshot->refcount = kref_read(&e->refcount); + snapshot->sched_timeout = sched->base.timeout; + snapshot->sched_props.timeslice_us = e->sched_props.timeslice_us; + snapshot->sched_props.preempt_timeout_us = + e->sched_props.preempt_timeout_us; + + snapshot->lrc = kmalloc_array(e->width, sizeof(struct lrc_snapshot), + GFP_ATOMIC); + + if (!snapshot->lrc) { + drm_err(&xe->drm, "Skipping GuC Engine LRC snapshot.\n"); + } else { + for (i = 0; i < e->width; ++i) { + struct xe_lrc *lrc = e->lrc + i; + + snapshot->lrc[i].context_desc = + lower_32_bits(xe_lrc_ggtt_addr(lrc)); + snapshot->lrc[i].head = xe_lrc_ring_head(lrc); + snapshot->lrc[i].tail.internal = lrc->ring.tail; + snapshot->lrc[i].tail.memory = + xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL); + snapshot->lrc[i].start_seqno = xe_lrc_start_seqno(lrc); + snapshot->lrc[i].seqno = xe_lrc_seqno(lrc); + } + } + + snapshot->schedule_state = atomic_read(&e->guc->state); + snapshot->engine_flags = e->flags; + + snapshot->parallel_execution = xe_engine_is_parallel(e); + if (snapshot->parallel_execution) + guc_engine_wq_snapshot_capture(e, snapshot); + + spin_lock(&sched->base.job_list_lock); + snapshot->pending_list_size = list_count_nodes(&sched->base.pending_list); + snapshot->pending_list = kmalloc_array(snapshot->pending_list_size, + sizeof(struct pending_list_snapshot), + GFP_ATOMIC); + + if (!snapshot->pending_list) { + drm_err(&xe->drm, "Skipping GuC Engine pending_list snapshot.\n"); + } else { + i = 0; + list_for_each_entry(job, &sched->base.pending_list, drm.list) { + snapshot->pending_list[i].seqno = + xe_sched_job_seqno(job); + snapshot->pending_list[i].fence = + dma_fence_is_signaled(job->fence) ? 1 : 0; + snapshot->pending_list[i].finished = + dma_fence_is_signaled(&job->drm.s_fence->finished) + ? 1 : 0; + i++; + } + } + + spin_unlock(&sched->base.job_list_lock); + + return snapshot; +} + +/** + * xe_guc_engine_snapshot_print - Print out a given GuC Engine snapshot. + * @snapshot: GuC Submit Engine snapshot object. + * @p: drm_printer where it will be printed out. + * + * This function prints out a given GuC Submit Engine snapshot object. + */ +void +xe_guc_engine_snapshot_print(struct xe_guc_submit_engine_snapshot *snapshot, + struct drm_printer *p) +{ int i; - drm_printf(p, "\nGuC ID: %d\n", e->guc->id); - drm_printf(p, "\tName: %s\n", e->name); - drm_printf(p, "\tClass: %d\n", e->class); - drm_printf(p, "\tLogical mask: 0x%x\n", e->logical_mask); - drm_printf(p, "\tWidth: %d\n", e->width); - drm_printf(p, "\tRef: %d\n", kref_read(&e->refcount)); - drm_printf(p, "\tTimeout: %ld (ms)\n", sched->base.timeout); - drm_printf(p, "\tTimeslice: %u (us)\n", e->sched_props.timeslice_us); + if (!snapshot) + return; + + drm_printf(p, "\nGuC ID: %d\n", snapshot->guc.id); + drm_printf(p, "\tName: %s\n", snapshot->name); + drm_printf(p, "\tClass: %d\n", snapshot->class); + drm_printf(p, "\tLogical mask: 0x%x\n", snapshot->logical_mask); + drm_printf(p, "\tWidth: %d\n", snapshot->width); + drm_printf(p, "\tRef: %d\n", snapshot->refcount); + drm_printf(p, "\tTimeout: %ld (ms)\n", snapshot->sched_timeout); + drm_printf(p, "\tTimeslice: %u (us)\n", + snapshot->sched_props.timeslice_us); drm_printf(p, "\tPreempt timeout: %u (us)\n", - e->sched_props.preempt_timeout_us); - for (i = 0; i < e->width; ++i ) { - struct xe_lrc *lrc = e->lrc + i; + snapshot->sched_props.preempt_timeout_us); + for (i = 0; snapshot->lrc && i < snapshot->width; ++i) { drm_printf(p, "\tHW Context Desc: 0x%08x\n", - lower_32_bits(xe_lrc_ggtt_addr(lrc))); + snapshot->lrc[i].context_desc); drm_printf(p, "\tLRC Head: (memory) %u\n", - xe_lrc_ring_head(lrc)); + snapshot->lrc[i].head); drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n", - lrc->ring.tail, - xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL)); + snapshot->lrc[i].tail.internal, + snapshot->lrc[i].tail.memory); drm_printf(p, "\tStart seqno: (memory) %d\n", - xe_lrc_start_seqno(lrc)); - drm_printf(p, "\tSeqno: (memory) %d\n", xe_lrc_seqno(lrc)); + snapshot->lrc[i].start_seqno); + drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->lrc[i].seqno); } - drm_printf(p, "\tSchedule State: 0x%x\n", atomic_read(&e->guc->state)); - drm_printf(p, "\tFlags: 0x%lx\n", e->flags); - if (xe_engine_is_parallel(e)) - guc_engine_wq_print(e, p); + drm_printf(p, "\tSchedule State: 0x%x\n", snapshot->schedule_state); + drm_printf(p, "\tFlags: 0x%lx\n", snapshot->engine_flags); - spin_lock(&sched->base.job_list_lock); + if (snapshot->parallel_execution) + guc_engine_wq_snapshot_print(snapshot, p); - list_for_each_entry(job, &sched->base.pending_list, drm.list) + for (i = 0; snapshot->pending_list && i < snapshot->pending_list_size; + i++) drm_printf(p, "\tJob: seqno=%d, fence=%d, finished=%d\n", - xe_sched_job_seqno(job), - dma_fence_is_signaled(job->fence) ? 1 : 0, - dma_fence_is_signaled(&job->drm.s_fence->finished) ? - 1 : 0); - spin_unlock(&sched->base.job_list_lock); + snapshot->pending_list[i].seqno, + snapshot->pending_list[i].fence, + snapshot->pending_list[i].finished); +} + +/** + * xe_guc_engine_snapshot_free - Free all allocated objects for a given + * snapshot. + * @snapshot: GuC Submit Engine snapshot object. + * + * This function free all the memory that needed to be allocated at capture + * time. + */ +void xe_guc_engine_snapshot_free(struct xe_guc_submit_engine_snapshot *snapshot) +{ + if (!snapshot) + return; + + kfree(snapshot->lrc); + kfree(snapshot->pending_list); + kfree(snapshot); } +static void guc_engine_print(struct xe_engine *e, struct drm_printer *p) +{ + struct xe_guc_submit_engine_snapshot *snapshot; + + snapshot = xe_guc_engine_snapshot_capture(e); + xe_guc_engine_snapshot_print(snapshot, p); + xe_guc_engine_snapshot_free(snapshot); +} + +/** + * xe_guc_submit_print - GuC Submit Print. + * @guc: GuC. + * @p: drm_printer where it will be printed out. + * + * This function capture and prints snapshots of **all** GuC Engines. + */ void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p) { struct xe_engine *e; diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h index 8002734d6f24..4153c2d22013 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.h +++ b/drivers/gpu/drm/xe/xe_guc_submit.h @@ -13,7 +13,6 @@ struct xe_engine; struct xe_guc; int xe_guc_submit_init(struct xe_guc *guc); -void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p); int xe_guc_submit_reset_prepare(struct xe_guc *guc); void xe_guc_submit_reset_wait(struct xe_guc *guc); @@ -27,4 +26,13 @@ int xe_guc_engine_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, u32 len); int xe_guc_engine_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len); +struct xe_guc_submit_engine_snapshot * +xe_guc_engine_snapshot_capture(struct xe_engine *e); +void +xe_guc_engine_snapshot_print(struct xe_guc_submit_engine_snapshot *snapshot, + struct drm_printer *p); +void +xe_guc_engine_snapshot_free(struct xe_guc_submit_engine_snapshot *snapshot); +void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p); + #endif diff --git a/drivers/gpu/drm/xe/xe_guc_submit_types.h b/drivers/gpu/drm/xe/xe_guc_submit_types.h index d23759959be9..88e855dae056 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit_types.h +++ b/drivers/gpu/drm/xe/xe_guc_submit_types.h @@ -61,4 +61,95 @@ struct guc_submit_parallel_scratch { u32 wq[WQ_SIZE / sizeof(u32)]; }; +struct lrc_snapshot { + u32 context_desc; + u32 head; + struct { + u32 internal; + u32 memory; + } tail; + u32 start_seqno; + u32 seqno; +}; + +struct pending_list_snapshot { + u32 seqno; + bool fence; + bool finished; +}; + +/** + * struct xe_guc_submit_engine_snapshot - Snapshot for devcoredump + */ +struct xe_guc_submit_engine_snapshot { + /** @name: name of this engine */ + char name[MAX_FENCE_NAME_LEN]; + /** @class: class of this engine */ + enum xe_engine_class class; + /** + * @logical_mask: logical mask of where job submitted to engine can run + */ + u32 logical_mask; + /** @width: width (number BB submitted per exec) of this engine */ + u16 width; + /** @refcount: ref count of this engine */ + u32 refcount; + /** + * @sched_timeout: the time after which a job is removed from the + * scheduler. + */ + long sched_timeout; + + /** @sched_props: scheduling properties */ + struct { + /** @timeslice_us: timeslice period in micro-seconds */ + u32 timeslice_us; + /** @preempt_timeout_us: preemption timeout in micro-seconds */ + u32 preempt_timeout_us; + } sched_props; + + /** @lrc: LRC Snapshot */ + struct lrc_snapshot *lrc; + + /** @schedule_state: Schedule State at the moment of Crash */ + u32 schedule_state; + /** @engine_flags: Flags of the faulty engine */ + unsigned long engine_flags; + + /** @guc: GuC Engine Snapshot */ + struct { + /** @wqi_head: work queue item head */ + u32 wqi_head; + /** @wqi_tail: work queue item tail */ + u32 wqi_tail; + /** @id: GuC id for this xe_engine */ + u16 id; + } guc; + + /** + * @parallel_execution: Indication if the failure was during parallel + * execution + */ + bool parallel_execution; + /** @parallel: snapshot of the useful parallel scratch */ + struct { + /** @wq_desc: Workqueue description */ + struct { + /** @head: Workqueue Head */ + u32 head; + /** @tail: Workqueue Tail */ + u32 tail; + /** @status: Workqueue Status */ + u32 status; + } wq_desc; + /** @wq: Workqueue Items */ + u32 wq[WQ_SIZE / sizeof(u32)]; + } parallel; + + /** @pending_list_size: Size of the pending list snapshot array */ + int pending_list_size; + /** @pending_list: snapshot of the pending list info */ + struct pending_list_snapshot *pending_list; +}; + #endif -- cgit v1.2.3 From 3847ec03ddd4b688cd02929356ee979acddfa03f Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 16 May 2023 10:54:13 -0400 Subject: drm/xe: Add GuC Submit Engine snapshot to xe_devcoredump. Let's start to move our existent logs to devcoredump one by one. Any format change should come on follow-up work. Signed-off-by: Rodrigo Vivi Reviewed-by: Matthew Brost --- drivers/gpu/drm/xe/xe_devcoredump.c | 4 ++++ drivers/gpu/drm/xe/xe_devcoredump_types.h | 2 ++ 2 files changed, 6 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index b0e3db148ce2..7296c0137b47 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -12,6 +12,7 @@ #include "xe_engine.h" #include "xe_gt.h" #include "xe_guc_ct.h" +#include "xe_guc_submit.h" /** * DOC: Xe device coredump @@ -86,6 +87,7 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, drm_printf(&p, "\n**** GuC CT ****\n"); xe_guc_ct_snapshot_print(coredump->snapshot.ct, &p); + xe_guc_engine_snapshot_print(coredump->snapshot.ge, &p); return count - iter.remain; } @@ -99,6 +101,7 @@ static void xe_devcoredump_free(void *data) return; xe_guc_ct_snapshot_free(coredump->snapshot.ct); + xe_guc_engine_snapshot_free(coredump->snapshot.ge); coredump->captured = false; drm_info(&coredump_to_xe(coredump)->drm, @@ -117,6 +120,7 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, cookie = dma_fence_begin_signalling(); coredump->snapshot.ct = xe_guc_ct_snapshot_capture(&guc->ct, true); + coredump->snapshot.ge = xe_guc_engine_snapshot_capture(e); dma_fence_end_signalling(cookie); } diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h index 4e3371c7b9c5..7c6453224139 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump_types.h +++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h @@ -27,6 +27,8 @@ struct xe_devcoredump_snapshot { /* GuC snapshots */ /** @ct_snapshot: GuC CT snapshot */ struct xe_guc_ct_snapshot *ct; + /** @ge: Guc Engine snapshot */ + struct xe_guc_submit_engine_snapshot *ge; }; /** -- cgit v1.2.3 From a4db55558785191a9ff0d295ccf181f18856cb58 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 16 May 2023 10:54:14 -0400 Subject: drm/xe: Convert Xe HW Engine print to snapshot capture and print. The goal is to allow for a snapshot capture to be taken at the time of the crash, while the print out can happen at a later time through the exposed devcoredump virtual device. v2: Addressing these Matthew comments: - Handle memory allocation failures. - Do not use GFP_ATOMIC on cases like debugfs prints. - placement of @reg doc. - identation issues. v3: checkpatch v4: Rebase and get back to GFP_ATOMIC only. Signed-off-by: Rodrigo Vivi Cc: Matthew Brost Reviewed-by: Matthew Brost --- drivers/gpu/drm/xe/xe_gt_debugfs.c | 2 +- drivers/gpu/drm/xe/xe_guc_submit.c | 2 +- drivers/gpu/drm/xe/xe_hw_engine.c | 209 +++++++++++++++++++++++--------- drivers/gpu/drm/xe/xe_hw_engine.h | 8 +- drivers/gpu/drm/xe/xe_hw_engine_types.h | 78 ++++++++++++ 5 files changed, 240 insertions(+), 59 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index c45486c2015a..8bf441e850a0 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -42,7 +42,7 @@ static int hw_engines(struct seq_file *m, void *data) } for_each_hw_engine(hwe, gt, id) - xe_hw_engine_print_state(hwe, &p); + xe_hw_engine_print(hwe, &p); xe_device_mem_access_put(xe); err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 3ff133e8463c..7be06320dbd7 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -751,7 +751,7 @@ static void simple_error_capture(struct xe_engine *e) if (hwe->class != e->hwe->class || !(BIT(hwe->logical_instance) & adj_logical_mask)) continue; - xe_hw_engine_print_state(hwe, &p); + xe_hw_engine_print(hwe, &p); } xe_analyze_vm(&p, e->vm, e->gt->info.id); xe_force_wake_put(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL); diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 751f6c3bba17..71ac4defb947 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -574,77 +574,174 @@ void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec) xe_hw_fence_irq_run(hwe->fence_irq); } -void xe_hw_engine_print_state(struct xe_hw_engine *hwe, struct drm_printer *p) +/** + * xe_hw_engine_snapshot_capture - Take a quick snapshot of the HW Engine. + * @hwe: Xe HW Engine. + * + * This can be printed out in a later stage like during dev_coredump + * analysis. + * + * Returns: a Xe HW Engine snapshot object that must be freed by the + * caller, using `xe_hw_engine_snapshot_free`. + */ +struct xe_hw_engine_snapshot * +xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe) { + struct xe_hw_engine_snapshot *snapshot; + int len; + if (!xe_hw_engine_is_valid(hwe)) + return NULL; + + snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC); + + if (!snapshot) + return NULL; + + len = strlen(hwe->name) + 1; + snapshot->name = kzalloc(len, GFP_ATOMIC); + if (snapshot->name) + strscpy(snapshot->name, hwe->name, len); + + snapshot->class = hwe->class; + snapshot->logical_instance = hwe->logical_instance; + snapshot->forcewake.domain = hwe->domain; + snapshot->forcewake.ref = xe_force_wake_ref(gt_to_fw(hwe->gt), + hwe->domain); + snapshot->mmio_base = hwe->mmio_base; + + snapshot->reg.ring_hwstam = hw_engine_mmio_read32(hwe, RING_HWSTAM(0)); + snapshot->reg.ring_hws_pga = hw_engine_mmio_read32(hwe, + RING_HWS_PGA(0)); + snapshot->reg.ring_execlist_status_lo = + hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0)); + snapshot->reg.ring_execlist_status_hi = + hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0)); + snapshot->reg.ring_execlist_sq_contents_lo = + hw_engine_mmio_read32(hwe, + RING_EXECLIST_SQ_CONTENTS_LO(0)); + snapshot->reg.ring_execlist_sq_contents_hi = + hw_engine_mmio_read32(hwe, + RING_EXECLIST_SQ_CONTENTS_HI(0)); + snapshot->reg.ring_execlist_control = + hw_engine_mmio_read32(hwe, RING_EXECLIST_CONTROL(0)); + snapshot->reg.ring_start = hw_engine_mmio_read32(hwe, RING_START(0)); + snapshot->reg.ring_head = + hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR; + snapshot->reg.ring_tail = + hw_engine_mmio_read32(hwe, RING_TAIL(0)) & TAIL_ADDR; + snapshot->reg.ring_ctl = hw_engine_mmio_read32(hwe, RING_CTL(0)); + snapshot->reg.ring_mi_mode = + hw_engine_mmio_read32(hwe, RING_MI_MODE(0)); + snapshot->reg.ring_mode = hw_engine_mmio_read32(hwe, RING_MODE(0)); + snapshot->reg.ring_imr = hw_engine_mmio_read32(hwe, RING_IMR(0)); + snapshot->reg.ring_esr = hw_engine_mmio_read32(hwe, RING_ESR(0)); + snapshot->reg.ring_emr = hw_engine_mmio_read32(hwe, RING_EMR(0)); + snapshot->reg.ring_eir = hw_engine_mmio_read32(hwe, RING_EIR(0)); + snapshot->reg.ring_acthd_udw = + hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0)); + snapshot->reg.ring_acthd = hw_engine_mmio_read32(hwe, RING_ACTHD(0)); + snapshot->reg.ring_bbaddr_udw = + hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0)); + snapshot->reg.ring_bbaddr = hw_engine_mmio_read32(hwe, RING_BBADDR(0)); + snapshot->reg.ring_dma_fadd_udw = + hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0)); + snapshot->reg.ring_dma_fadd = + hw_engine_mmio_read32(hwe, RING_DMA_FADD(0)); + snapshot->reg.ipeir = hw_engine_mmio_read32(hwe, IPEIR(0)); + snapshot->reg.ipehr = hw_engine_mmio_read32(hwe, IPEHR(0)); + + if (snapshot->class == XE_ENGINE_CLASS_COMPUTE) + snapshot->reg.rcu_mode = xe_mmio_read32(hwe->gt, RCU_MODE); + + return snapshot; +} + +/** + * xe_hw_engine_snapshot_print - Print out a given Xe HW Engine snapshot. + * @snapshot: Xe HW Engine snapshot object. + * @p: drm_printer where it will be printed out. + * + * This function prints out a given Xe HW Engine snapshot object. + */ +void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, + struct drm_printer *p) +{ + if (!snapshot) return; - drm_printf(p, "%s (physical), logical instance=%d\n", hwe->name, - hwe->logical_instance); + drm_printf(p, "%s (physical), logical instance=%d\n", + snapshot->name ? snapshot->name : "", + snapshot->logical_instance); drm_printf(p, "\tForcewake: domain 0x%x, ref %d\n", - hwe->domain, - xe_force_wake_ref(gt_to_fw(hwe->gt), hwe->domain)); - drm_printf(p, "\tMMIO base: 0x%08x\n", hwe->mmio_base); - - drm_printf(p, "\tHWSTAM: 0x%08x\n", - hw_engine_mmio_read32(hwe, RING_HWSTAM(0))); - drm_printf(p, "\tRING_HWS_PGA: 0x%08x\n", - hw_engine_mmio_read32(hwe, RING_HWS_PGA(0))); - + snapshot->forcewake.domain, snapshot->forcewake.ref); + drm_printf(p, "\tHWSTAM: 0x%08x\n", snapshot->reg.ring_hwstam); + drm_printf(p, "\tRING_HWS_PGA: 0x%08x\n", snapshot->reg.ring_hws_pga); drm_printf(p, "\tRING_EXECLIST_STATUS_LO: 0x%08x\n", - hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0))); + snapshot->reg.ring_execlist_status_lo); drm_printf(p, "\tRING_EXECLIST_STATUS_HI: 0x%08x\n", - hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0))); + snapshot->reg.ring_execlist_status_hi); drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_LO: 0x%08x\n", - hw_engine_mmio_read32(hwe, - RING_EXECLIST_SQ_CONTENTS_LO(0))); + snapshot->reg.ring_execlist_sq_contents_lo); drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_HI: 0x%08x\n", - hw_engine_mmio_read32(hwe, - RING_EXECLIST_SQ_CONTENTS_HI(0))); + snapshot->reg.ring_execlist_sq_contents_hi); drm_printf(p, "\tRING_EXECLIST_CONTROL: 0x%08x\n", - hw_engine_mmio_read32(hwe, RING_EXECLIST_CONTROL(0))); - - drm_printf(p, "\tRING_START: 0x%08x\n", - hw_engine_mmio_read32(hwe, RING_START(0))); - drm_printf(p, "\tRING_HEAD: 0x%08x\n", - hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR); - drm_printf(p, "\tRING_TAIL: 0x%08x\n", - hw_engine_mmio_read32(hwe, RING_TAIL(0)) & TAIL_ADDR); - drm_printf(p, "\tRING_CTL: 0x%08x\n", - hw_engine_mmio_read32(hwe, RING_CTL(0))); + snapshot->reg.ring_execlist_control); + drm_printf(p, "\tRING_START: 0x%08x\n", snapshot->reg.ring_start); + drm_printf(p, "\tRING_HEAD: 0x%08x\n", snapshot->reg.ring_head); + drm_printf(p, "\tRING_TAIL: 0x%08x\n", snapshot->reg.ring_tail); + drm_printf(p, "\tRING_CTL: 0x%08x\n", snapshot->reg.ring_ctl); + drm_printf(p, "\tRING_MODE: 0x%08x\n", snapshot->reg.ring_mi_mode); drm_printf(p, "\tRING_MODE: 0x%08x\n", - hw_engine_mmio_read32(hwe, RING_MI_MODE(0))); - drm_printf(p, "\tRING_MODE_GEN7: 0x%08x\n", - hw_engine_mmio_read32(hwe, RING_MODE(0))); - - drm_printf(p, "\tRING_IMR: 0x%08x\n", - hw_engine_mmio_read32(hwe, RING_IMR(0))); - drm_printf(p, "\tRING_ESR: 0x%08x\n", - hw_engine_mmio_read32(hwe, RING_ESR(0))); - drm_printf(p, "\tRING_EMR: 0x%08x\n", - hw_engine_mmio_read32(hwe, RING_EMR(0))); - drm_printf(p, "\tRING_EIR: 0x%08x\n", - hw_engine_mmio_read32(hwe, RING_EIR(0))); - - drm_printf(p, "\tACTHD: 0x%08x_%08x\n", - hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0)), - hw_engine_mmio_read32(hwe, RING_ACTHD(0))); - drm_printf(p, "\tBBADDR: 0x%08x_%08x\n", - hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0)), - hw_engine_mmio_read32(hwe, RING_BBADDR(0))); + snapshot->reg.ring_mode); + drm_printf(p, "\tRING_IMR: 0x%08x\n", snapshot->reg.ring_imr); + drm_printf(p, "\tRING_ESR: 0x%08x\n", snapshot->reg.ring_esr); + drm_printf(p, "\tRING_EMR: 0x%08x\n", snapshot->reg.ring_emr); + drm_printf(p, "\tRING_EIR: 0x%08x\n", snapshot->reg.ring_eir); + drm_printf(p, "\tACTHD: 0x%08x_%08x\n", snapshot->reg.ring_acthd_udw, + snapshot->reg.ring_acthd); + drm_printf(p, "\tBBADDR: 0x%08x_%08x\n", snapshot->reg.ring_bbaddr_udw, + snapshot->reg.ring_bbaddr); drm_printf(p, "\tDMA_FADDR: 0x%08x_%08x\n", - hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0)), - hw_engine_mmio_read32(hwe, RING_DMA_FADD(0))); + snapshot->reg.ring_dma_fadd_udw, + snapshot->reg.ring_dma_fadd); + drm_printf(p, "\tIPEIR: 0x%08x\n", snapshot->reg.ipeir); + drm_printf(p, "\tIPEHR: 0x%08x\n\n", snapshot->reg.ipehr); + if (snapshot->class == XE_ENGINE_CLASS_COMPUTE) + drm_printf(p, "\tRCU_MODE: 0x%08x\n", + snapshot->reg.rcu_mode); +} - drm_printf(p, "\tIPEIR: 0x%08x\n", - hw_engine_mmio_read32(hwe, IPEIR(0))); - drm_printf(p, "\tIPEHR: 0x%08x\n\n", - hw_engine_mmio_read32(hwe, IPEHR(0))); +/** + * xe_hw_engine_snapshot_free - Free all allocated objects for a given snapshot. + * @snapshot: Xe HW Engine snapshot object. + * + * This function free all the memory that needed to be allocated at capture + * time. + */ +void xe_hw_engine_snapshot_free(struct xe_hw_engine_snapshot *snapshot) +{ + if (!snapshot) + return; - if (hwe->class == XE_ENGINE_CLASS_COMPUTE) - drm_printf(p, "\tRCU_MODE: 0x%08x\n", - xe_mmio_read32(hwe->gt, RCU_MODE)); + kfree(snapshot->name); + kfree(snapshot); +} + +/** + * xe_hw_engine_print - Xe HW Engine Print. + * @hwe: Hardware Engine. + * @p: drm_printer. + * + * This function quickly capture a snapshot and immediately print it out. + */ +void xe_hw_engine_print(struct xe_hw_engine *hwe, struct drm_printer *p) +{ + struct xe_hw_engine_snapshot *snapshot; + snapshot = xe_hw_engine_snapshot_capture(hwe); + xe_hw_engine_snapshot_print(snapshot, p); + xe_hw_engine_snapshot_free(snapshot); } u32 xe_hw_engine_mask_per_class(struct xe_gt *gt, diff --git a/drivers/gpu/drm/xe/xe_hw_engine.h b/drivers/gpu/drm/xe/xe_hw_engine.h index 013efcd6d8c5..7eca9d53c7b1 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.h +++ b/drivers/gpu/drm/xe/xe_hw_engine.h @@ -14,9 +14,15 @@ int xe_hw_engines_init_early(struct xe_gt *gt); int xe_hw_engines_init(struct xe_gt *gt); void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec); void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe); -void xe_hw_engine_print_state(struct xe_hw_engine *hwe, struct drm_printer *p); u32 xe_hw_engine_mask_per_class(struct xe_gt *gt, enum xe_engine_class engine_class); + +struct xe_hw_engine_snapshot * +xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe); +void xe_hw_engine_snapshot_free(struct xe_hw_engine_snapshot *snapshot); +void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, + struct drm_printer *p); +void xe_hw_engine_print(struct xe_hw_engine *hwe, struct drm_printer *p); void xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe); bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe); diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h index 2c40384957da..d788e67312b9 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_types.h +++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h @@ -109,4 +109,82 @@ struct xe_hw_engine { enum xe_hw_engine_id engine_id; }; +/** + * struct xe_hw_engine_snapshot - Hardware engine snapshot + * + * Contains the snapshot of useful hardware engine info and registers. + */ +struct xe_hw_engine_snapshot { + /** @name: name of the hw engine */ + char *name; + /** @class: class of this hw engine */ + enum xe_engine_class class; + /** @logical_instance: logical instance of this hw engine */ + u16 logical_instance; + /** @forcewake: Force Wake information snapshot */ + struct { + /** @domain: force wake domain of this hw engine */ + enum xe_force_wake_domains domain; + /** @ref: Forcewake ref for the above domain */ + int ref; + } forcewake; + /** @mmio_base: MMIO base address of this hw engine*/ + u32 mmio_base; + /** @reg: Useful MMIO register snapshot */ + struct { + /** @ring_hwstam: RING_HWSTAM */ + u32 ring_hwstam; + /** @ring_hws_pga: RING_HWS_PGA */ + u32 ring_hws_pga; + /** @ring_execlist_status_lo: RING_EXECLIST_STATUS_LO */ + u32 ring_execlist_status_lo; + /** @ring_execlist_status_hi: RING_EXECLIST_STATUS_HI */ + u32 ring_execlist_status_hi; + /** @ring_execlist_sq_contents_lo: RING_EXECLIST_SQ_CONTENTS */ + u32 ring_execlist_sq_contents_lo; + /** @ring_execlist_sq_contents_hi: RING_EXECLIST_SQ_CONTENTS + 4 */ + u32 ring_execlist_sq_contents_hi; + /** @ring_execlist_control: RING_EXECLIST_CONTROL */ + u32 ring_execlist_control; + /** @ring_start: RING_START */ + u32 ring_start; + /** @ring_head: RING_HEAD */ + u32 ring_head; + /** @ring_tail: RING_TAIL */ + u32 ring_tail; + /** @ring_ctl: RING_CTL */ + u32 ring_ctl; + /** @ring_mi_mode: RING_MI_MODE */ + u32 ring_mi_mode; + /** @ring_mode: RING_MODE */ + u32 ring_mode; + /** @ring_imr: RING_IMR */ + u32 ring_imr; + /** @ring_esr: RING_ESR */ + u32 ring_esr; + /** @ring_emr: RING_EMR */ + u32 ring_emr; + /** @ring_eir: RING_EIR */ + u32 ring_eir; + /** @ring_acthd_udw: RING_ACTHD_UDW */ + u32 ring_acthd_udw; + /** @ring_acthd: RING_ACTHD */ + u32 ring_acthd; + /** @ring_bbaddr_udw: RING_BBADDR_UDW */ + u32 ring_bbaddr_udw; + /** @ring_bbaddr: RING_BBADDR */ + u32 ring_bbaddr; + /** @ring_dma_fadd_udw: RING_DMA_FADD_UDW */ + u32 ring_dma_fadd_udw; + /** @ring_dma_fadd: RING_DMA_FADD */ + u32 ring_dma_fadd; + /** @ipeir: IPEIR */ + u32 ipeir; + /** @ipehr: IPEHR */ + u32 ipehr; + /** @rcu_mode: RCU_MODE */ + u32 rcu_mode; + } reg; +}; + #endif -- cgit v1.2.3 From 01a87f3181caab1b5eca8ae5a7436c1031b6f5a8 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 16 May 2023 10:54:15 -0400 Subject: drm/xe: Add HW Engine snapshot to xe_devcoredump. Let's continue to add our existent simple logs to devcoredump one by one. Any format change should come on follow-up work. v2: remove unnecessary, and now duplicated, dma_fence annotation. (Matthew) v3: avoid for_each with faulty_engine since that can be already freed at the time of the read/free. Instead, iterate in the full array of hw_engines. (Kasan) Cc: Francois Dugast Cc: Matthew Brost Signed-off-by: Rodrigo Vivi Reviewed-by: Matthew Brost Reviewed-by: Francois Dugast --- drivers/gpu/drm/xe/xe_devcoredump.c | 41 +++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_devcoredump_types.h | 4 +++ 2 files changed, 45 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index 7296c0137b47..f53f4b51233a 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -9,10 +9,13 @@ #include #include +#include "xe_device.h" #include "xe_engine.h" +#include "xe_force_wake.h" #include "xe_gt.h" #include "xe_guc_ct.h" #include "xe_guc_submit.h" +#include "xe_hw_engine.h" /** * DOC: Xe device coredump @@ -63,6 +66,7 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, struct drm_printer p; struct drm_print_iterator iter; struct timespec64 ts; + int i; /* Our device is gone already... */ if (!data || !coredump_to_xe(coredump)) @@ -89,12 +93,19 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, xe_guc_ct_snapshot_print(coredump->snapshot.ct, &p); xe_guc_engine_snapshot_print(coredump->snapshot.ge, &p); + drm_printf(&p, "\n**** HW Engines ****\n"); + for (i = 0; i < XE_NUM_HW_ENGINES; i++) + if (coredump->snapshot.hwe[i]) + xe_hw_engine_snapshot_print(coredump->snapshot.hwe[i], + &p); + return count - iter.remain; } static void xe_devcoredump_free(void *data) { struct xe_devcoredump *coredump = data; + int i; /* Our device is gone. Nothing to do... */ if (!data || !coredump_to_xe(coredump)) @@ -102,6 +113,9 @@ static void xe_devcoredump_free(void *data) xe_guc_ct_snapshot_free(coredump->snapshot.ct); xe_guc_engine_snapshot_free(coredump->snapshot.ge); + for (i = 0; i < XE_NUM_HW_ENGINES; i++) + if (coredump->snapshot.hwe[i]) + xe_hw_engine_snapshot_free(coredump->snapshot.hwe[i]); coredump->captured = false; drm_info(&coredump_to_xe(coredump)->drm, @@ -113,14 +127,41 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, { struct xe_devcoredump_snapshot *ss = &coredump->snapshot; struct xe_guc *guc = engine_to_guc(e); + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + u32 adj_logical_mask = e->logical_mask; + u32 width_mask = (0x1 << e->width) - 1; + int i; bool cookie; ss->snapshot_time = ktime_get_real(); ss->boot_time = ktime_get_boottime(); cookie = dma_fence_begin_signalling(); + for (i = 0; e->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) { + if (adj_logical_mask & BIT(i)) { + adj_logical_mask |= width_mask << i; + i += e->width; + } else { + ++i; + } + } + + xe_force_wake_get(gt_to_fw(e->gt), XE_FORCEWAKE_ALL); + coredump->snapshot.ct = xe_guc_ct_snapshot_capture(&guc->ct, true); coredump->snapshot.ge = xe_guc_engine_snapshot_capture(e); + + for_each_hw_engine(hwe, e->gt, id) { + if (hwe->class != e->hwe->class || + !(BIT(hwe->logical_instance) & adj_logical_mask)) { + coredump->snapshot.hwe[id] = NULL; + continue; + } + coredump->snapshot.hwe[id] = xe_hw_engine_snapshot_capture(hwe); + } + + xe_force_wake_put(gt_to_fw(e->gt), XE_FORCEWAKE_ALL); dma_fence_end_signalling(cookie); } diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h index 7c6453224139..350b905d1797 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump_types.h +++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h @@ -9,6 +9,8 @@ #include #include +#include "xe_hw_engine_types.h" + struct xe_device; /** @@ -29,6 +31,8 @@ struct xe_devcoredump_snapshot { struct xe_guc_ct_snapshot *ct; /** @ge: Guc Engine snapshot */ struct xe_guc_submit_engine_snapshot *ge; + /** @hwe: HW Engine snapshot array */ + struct xe_hw_engine_snapshot *hwe[XE_NUM_HW_ENGINES]; }; /** -- cgit v1.2.3 From 328f3414b13c06a85e447d6f2d5abd70b547c3ee Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 16 May 2023 10:54:16 -0400 Subject: drm/xe: Limit CONFIG_DRM_XE_SIMPLE_ERROR_CAPTURE to itself. There are multiple kind of config prints and with the upcoming devcoredump there will be another layer. Let's limit the config to the top level functions and leave the clean-up work for the compilers so we don't create a spider-web of configs. No functional change. Just a preparation for the devcoredump. Signed-off-by: Rodrigo Vivi Reviewed-by: Matthew Brost --- drivers/gpu/drm/xe/xe_vm.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 62496a4008d2..40295beea3a2 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3415,7 +3415,6 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) return 0; } -#if IS_ENABLED(CONFIG_DRM_XE_SIMPLE_ERROR_CAPTURE) int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id) { struct rb_node *node; @@ -3453,9 +3452,3 @@ int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id) return 0; } -#else -int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id) -{ - return 0; -} -#endif -- cgit v1.2.3 From 5013ad8dd75fdc035ff068980c91cf2ea821d142 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 11 May 2023 20:19:09 +0200 Subject: drm/xe: Move Media GuC register definition to regs/ This GuC register can be moved together with the rest of the GuC register definitions and be named in a similar way. v2: fix placement Bspec: 63363 Signed-off-by: Michal Wajdeczko Reviewed-by: Matt Atwood #v1 Cc: Lucas De Marchi Reviewed-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_guc_regs.h | 2 ++ drivers/gpu/drm/xe/xe_guc.c | 4 +--- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_guc_regs.h b/drivers/gpu/drm/xe/regs/xe_guc_regs.h index 37e0ac550931..b4f27cadb68f 100644 --- a/drivers/gpu/drm/xe/regs/xe_guc_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_guc_regs.h @@ -119,6 +119,8 @@ struct guc_doorbell_info { #define VF_SW_FLAG(n) XE_REG(0x190240 + (n) * 4) #define VF_SW_FLAG_COUNT 4 +#define MED_GUC_HOST_INTERRUPT XE_REG(0x190304) + #define MED_VF_SW_FLAG(n) XE_REG(0x190310 + (n) * 4) #define MED_VF_SW_FLAG_COUNT 4 diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index b72407e24d09..92d732690252 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -22,8 +22,6 @@ #include "xe_uc_fw.h" #include "xe_wopcm.h" -#define MEDIA_GUC_HOST_INTERRUPT XE_REG(0x190304) - static struct xe_gt * guc_to_gt(struct xe_guc *guc) { @@ -268,7 +266,7 @@ int xe_guc_init(struct xe_guc *guc) guc_init_params(guc); if (xe_gt_is_media_type(gt)) - guc->notify_reg = MEDIA_GUC_HOST_INTERRUPT; + guc->notify_reg = MED_GUC_HOST_INTERRUPT; else guc->notify_reg = GUC_HOST_INTERRUPT; -- cgit v1.2.3 From 915757a6cbf1d77877374627a284cafe9c0de7cd Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 19 May 2023 11:19:02 +0200 Subject: drm/xe: Change GuC interrupt data Both GUC_HOST_INTERRUPT and MED_GUC_HOST_INTERRUPT can pass additional payload data to the GuC but this capability is not used by the firmware yet. Stop using value mandated by legacy GuC interrupt register and use default notify value (zero) instead. Bspec: 49813, 63363 Signed-off-by: Michal Wajdeczko Reviewed-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 92d732690252..71f18b32d09b 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -598,8 +598,14 @@ int xe_guc_suspend(struct xe_guc *guc) void xe_guc_notify(struct xe_guc *guc) { struct xe_gt *gt = guc_to_gt(guc); + const u32 default_notify_data = 0; - xe_mmio_write32(gt, guc->notify_reg, GUC_SEND_TRIGGER); + /* + * Both GUC_HOST_INTERRUPT and MED_GUC_HOST_INTERRUPT can pass + * additional payload data to the GuC but this capability is not + * used by the firmware yet. Use default value in the meantime. + */ + xe_mmio_write32(gt, guc->notify_reg, default_notify_data); } int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr) -- cgit v1.2.3 From 82f428b627607cd4ae0355c09b3164961b041505 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 22 May 2023 11:52:52 +0100 Subject: drm/xe: fix kernel-doc issues drivers/gpu/drm/xe/xe_guc_submit_types.h:47: warning: cannot understand function prototype: 'struct guc_submit_parallel_scratch ' drivers/gpu/drm/xe/xe_devcoredump_types.h:38: warning: Function parameter or member 'ct' not described in 'xe_devcoredump_snapshot' CI doesn't appear to be running BAT anymore, assuming this is caused by the CI.Hooks now failing due to above warnings. Signed-off-by: Matthew Auld Cc: Rodrigo Vivi Reviewed-by: Nirmoy Das Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_devcoredump_types.h | 2 +- drivers/gpu/drm/xe/xe_guc_submit_types.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h index 350b905d1797..c0d711eb6ab3 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump_types.h +++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h @@ -27,7 +27,7 @@ struct xe_devcoredump_snapshot { ktime_t boot_time; /* GuC snapshots */ - /** @ct_snapshot: GuC CT snapshot */ + /** @ct: GuC CT snapshot */ struct xe_guc_ct_snapshot *ct; /** @ge: Guc Engine snapshot */ struct xe_guc_submit_engine_snapshot *ge; diff --git a/drivers/gpu/drm/xe/xe_guc_submit_types.h b/drivers/gpu/drm/xe/xe_guc_submit_types.h index 88e855dae056..6765b2c6eab1 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit_types.h +++ b/drivers/gpu/drm/xe/xe_guc_submit_types.h @@ -41,7 +41,7 @@ struct sync_semaphore { }; /** - * Struct guc_submit_parallel_scratch - A scratch shared mapped buffer. + * struct guc_submit_parallel_scratch - A scratch shared mapped buffer. */ struct guc_submit_parallel_scratch { /** @wq_desc: Guc scheduler workqueue descriptor */ -- cgit v1.2.3 From 6fedf8426d377ea9b57c91870d495006a683605e Mon Sep 17 00:00:00 2001 From: Gustavo Sousa Date: Fri, 19 May 2023 16:48:02 -0300 Subject: drm/xe: Do not forget to drm_dev_put() in xe_pci_probe() The function drm_dev_put() should also be called if xe_device_probe() fails. v2: - Improve commit message. (Lucas) Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230519194802.578182-1-gustavo.sousa@intel.com Signed-off-by: Gustavo Sousa Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pci.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index f0d0e999aa56..c7184e49b10b 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -614,10 +614,8 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) subplatform_desc = find_subplatform(xe, desc); err = xe_info_init(xe, desc, subplatform_desc); - if (err) { - drm_dev_put(&xe->drm); - return err; - } + if (err) + goto err_drm_put; drm_dbg(&xe->drm, "%s %s %04x:%04x dgfx:%d gfx:%s (%d.%02d) media:%s (%d.%02d) dma_m_s:%d tc:%d", desc->platform_name, @@ -640,10 +638,8 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_drvdata(pdev, xe); err = pci_enable_device(pdev); - if (err) { - drm_dev_put(&xe->drm); - return err; - } + if (err) + goto err_drm_put; pci_set_master(pdev); @@ -651,14 +647,20 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) drm_dbg(&xe->drm, "can't enable MSI"); err = xe_device_probe(xe); - if (err) { - pci_disable_device(pdev); - return err; - } + if (err) + goto err_pci_disable; xe_pm_runtime_init(xe); return 0; + +err_pci_disable: + pci_disable_device(pdev); + +err_drm_put: + drm_dev_put(&xe->drm); + + return err; } static void xe_pci_shutdown(struct pci_dev *pdev) -- cgit v1.2.3 From b67ece5b173375451de5c3a562c43aaf410001c5 Mon Sep 17 00:00:00 2001 From: Gustavo Sousa Date: Thu, 18 May 2023 18:56:50 -0300 Subject: drm/xe: Call drmm_add_action_or_reset() early in xe_device_create() Otherwise no cleanup is actually done if we branch to err_put. This works for now: currently we do know that, once inside xe_device_destroy(), ttm_device_init() was successful so we can safely call ttm_device_fini(); and, for xe->ordered_wq, there is an upcoming commit to check its value before calling destroy_workqueue(). However, we might need change this in the future if we have more initializers called that can fail in a way that we can not know which one was it once inside xe_device_destroy(). Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20230518215651.502159-2-gustavo.sousa@intel.com Signed-off-by: Gustavo Sousa Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 8039142ae1a1..42456d044827 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -181,6 +181,10 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, if (WARN_ON(err)) goto err_put; + err = drmm_add_action_or_reset(&xe->drm, xe_device_destroy, NULL); + if (err) + goto err_put; + xe->info.devid = pdev->device; xe->info.revid = pdev->revision; xe->info.enable_guc = enable_guc; @@ -205,10 +209,6 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, drmm_mutex_init(&xe->drm, &xe->sb_lock); xe->enabled_irq_mask = ~0; - err = drmm_add_action_or_reset(&xe->drm, xe_device_destroy, NULL); - if (err) - goto err_put; - return xe; err_put: -- cgit v1.2.3 From c93b6de7cc7610a269afe0e84a0b3e2b81a746cd Mon Sep 17 00:00:00 2001 From: Gustavo Sousa Date: Thu, 18 May 2023 18:56:51 -0300 Subject: drm/xe: Fail xe_device_create() if wq allocation fails Let's make sure we give the driver a valid workqueue. While at it, also make sure to call destroy_workqueue() only if the workqueue is a valid one. That is necessary because xe_device_destroy() is indirectly called as part of the cleanup process of a failed xe_device_create(). Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20230518215651.502159-3-gustavo.sousa@intel.com Signed-off-by: Gustavo Sousa Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 42456d044827..2c65eb84e6e9 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include "regs/xe_regs.h" @@ -157,7 +158,9 @@ static void xe_device_destroy(struct drm_device *dev, void *dummy) { struct xe_device *xe = to_xe_device(dev); - destroy_workqueue(xe->ordered_wq); + if (xe->ordered_wq) + destroy_workqueue(xe->ordered_wq); + ttm_device_fini(&xe->ttm); } @@ -205,6 +208,11 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, INIT_LIST_HEAD(&xe->pinned.evicted); xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0); + if (!xe->ordered_wq) { + drm_err(&xe->drm, "Failed to allocate xe-ordered-wq\n"); + err = -ENOMEM; + goto err_put; + } drmm_mutex_init(&xe->drm, &xe->sb_lock); xe->enabled_irq_mask = ~0; -- cgit v1.2.3 From 6ed6ba32dba14ef851ecb7190597d6bac77618e2 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 24 May 2023 11:59:52 -0700 Subject: drm/xe: Add stepping support for GMD_ID platforms For platforms with GMD_ID registers, the IP stepping should be determined from the 'revid' field of those registers rather than from the PCI revid. The hardware teams have indicated that they plan to keep the revid => stepping mapping consistent across all GMD_ID platforms, with major steppings (A0, B0, C0, etc.) having revids that are multiples of 4, and minor steppings (A1, A2, A3, etc.) taking the intermediate values. For now we'll trust that hardware follows through on this plan; if they have to change direction in the future (e.g., they wind up needing something like an "A4" that doesn't fit this scheme), we can add a GMD_ID-based lookup table when the time comes. v2: - Set xe->info.platform before finding stepping; the pre-GMD_ID code relies on this value to pick a lookup table. v3: - Also set xe->info.subplatform before picking the stepping for pre-GMD_ID lookup. Reviewed-by: Balasubramani Vivekanandan Link: https://lore.kernel.org/r/20230524185952.666158-1-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 2 +- drivers/gpu/drm/xe/xe_pci.c | 28 ++++++++++++++-------- drivers/gpu/drm/xe/xe_step.c | 45 +++++++++++++++++++++++++++++++++++- drivers/gpu/drm/xe/xe_step.h | 5 +++- drivers/gpu/drm/xe/xe_step_types.h | 13 +++++------ 5 files changed, 74 insertions(+), 19 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 4a38f78277b5..5c239989608f 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -24,7 +24,7 @@ #define GMD_ID XE_REG(0xd8c) #define GMD_ID_ARCH_MASK REG_GENMASK(31, 22) #define GMD_ID_RELEASE_MASK REG_GENMASK(21, 14) -#define GMD_ID_STEP REG_GENMASK(5, 0) +#define GMD_ID_REVID REG_GENMASK(5, 0) #define FORCEWAKE_ACK_GT_MTL XE_REG(0xdfc) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index c7184e49b10b..50027eb642ea 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -441,11 +441,14 @@ static void handle_pre_gmdid(struct xe_device *xe, static void handle_gmdid(struct xe_device *xe, const struct xe_device_desc *desc, const struct xe_graphics_desc **graphics, - const struct xe_media_desc **media) + const struct xe_media_desc **media, + u32 *graphics_revid, + u32 *media_revid) { u32 ver; ver = peek_gmdid(xe, GMD_ID.addr); + *graphics_revid = REG_FIELD_GET(GMD_ID_REVID, ver); for (int i = 0; i < ARRAY_SIZE(graphics_ip_map); i++) { if (ver == graphics_ip_map[i].ver) { xe->info.graphics_verx100 = ver; @@ -461,6 +464,7 @@ static void handle_gmdid(struct xe_device *xe, } ver = peek_gmdid(xe, GMD_ID.addr + 0x380000); + *media_revid = REG_FIELD_GET(GMD_ID_REVID, ver); for (int i = 0; i < ARRAY_SIZE(media_ip_map); i++) { if (ver == media_ip_map[i].ver) { xe->info.media_verx100 = ver; @@ -483,19 +487,30 @@ static int xe_info_init(struct xe_device *xe, { const struct xe_graphics_desc *graphics_desc = NULL; const struct xe_media_desc *media_desc = NULL; + u32 graphics_gmdid_revid = 0, media_gmdid_revid = 0; struct xe_gt *gt; u8 id; + xe->info.platform = desc->platform; + xe->info.subplatform = subplatform_desc ? + subplatform_desc->subplatform : XE_SUBPLATFORM_NONE; + /* * If this platform supports GMD_ID, we'll detect the proper IP * descriptor to use from hardware registers. desc->graphics will only * ever be set at this point for platforms before GMD_ID. In that case * the IP descriptions and versions are simply derived from that. */ - if (desc->graphics) + if (desc->graphics) { handle_pre_gmdid(xe, desc, &graphics_desc, &media_desc); - else - handle_gmdid(xe, desc, &graphics_desc, &media_desc); + xe->info.step = xe_step_pre_gmdid_get(xe); + } else { + handle_gmdid(xe, desc, &graphics_desc, &media_desc, + &graphics_gmdid_revid, &media_gmdid_revid); + xe->info.step = xe_step_gmdid_get(xe, + graphics_gmdid_revid, + media_gmdid_revid); + } /* * If we couldn't detect the graphics IP, that's considered a fatal @@ -506,7 +521,6 @@ static int xe_info_init(struct xe_device *xe, return -ENODEV; xe->info.is_dgfx = desc->is_dgfx; - xe->info.platform = desc->platform; xe->info.graphics_name = graphics_desc->name; xe->info.media_name = media_desc ? media_desc->name : "none"; xe->info.has_4tile = desc->has_4tile; @@ -534,10 +548,6 @@ static int xe_info_init(struct xe_device *xe, if (MEDIA_VER(xe) >= 13) xe->info.tile_count++; - xe->info.subplatform = subplatform_desc ? - subplatform_desc->subplatform : XE_SUBPLATFORM_NONE; - xe->info.step = xe_step_get(xe); - for (id = 0; id < xe->info.tile_count; ++id) { gt = xe->gt + id; gt->info.id = id; diff --git a/drivers/gpu/drm/xe/xe_step.c b/drivers/gpu/drm/xe/xe_step.c index a443d9bd7bbb..1baf79ba02ad 100644 --- a/drivers/gpu/drm/xe/xe_step.c +++ b/drivers/gpu/drm/xe/xe_step.c @@ -107,7 +107,14 @@ static const int pvc_basedie_subids[] = { __diag_pop(); -struct xe_step_info xe_step_get(struct xe_device *xe) +/** + * xe_step_pre_gmdid_get - Determine IP steppings from PCI revid + * @xe: Xe device + * + * Convert the PCI revid into proper IP steppings. This should only be + * used on platforms that do not have GMD_ID support. + */ +struct xe_step_info xe_step_pre_gmdid_get(struct xe_device *xe) { const struct xe_step_info *revids = NULL; struct xe_step_info step = {}; @@ -198,6 +205,42 @@ struct xe_step_info xe_step_get(struct xe_device *xe) return step; } +/** + * xe_step_gmdid_get - Determine IP steppings from GMD_ID revid fields + * @xe: Xe device + * @graphics_gmdid_revid: value of graphics GMD_ID register's revid field + * @media_gmdid_revid: value of media GMD_ID register's revid field + * + * Convert the revid fields of the GMD_ID registers into proper IP steppings. + * + * GMD_ID revid values are currently expected to have consistent meanings on + * all platforms: major steppings (A0, B0, etc.) are 4 apart, with minor + * steppings (A1, A2, etc.) taking the values in between. + */ +struct xe_step_info xe_step_gmdid_get(struct xe_device *xe, + u32 graphics_gmdid_revid, + u32 media_gmdid_revid) +{ + struct xe_step_info step = { + .graphics = STEP_A0 + graphics_gmdid_revid, + .media = STEP_A0 + media_gmdid_revid, + }; + + if (step.graphics >= STEP_FUTURE) { + step.graphics = STEP_FUTURE; + drm_dbg(&xe->drm, "Graphics GMD_ID revid value %d treated as future stepping\n", + graphics_gmdid_revid); + } + + if (step.media >= STEP_FUTURE) { + step.media = STEP_FUTURE; + drm_dbg(&xe->drm, "Media GMD_ID revid value %d treated as future stepping\n", + graphics_gmdid_revid); + } + + return step; +} + #define STEP_NAME_CASE(name) \ case STEP_##name: \ return #name; diff --git a/drivers/gpu/drm/xe/xe_step.h b/drivers/gpu/drm/xe/xe_step.h index 0c596c8579fb..a384b640f2af 100644 --- a/drivers/gpu/drm/xe/xe_step.h +++ b/drivers/gpu/drm/xe/xe_step.h @@ -12,7 +12,10 @@ struct xe_device; -struct xe_step_info xe_step_get(struct xe_device *xe); +struct xe_step_info xe_step_pre_gmdid_get(struct xe_device *xe); +struct xe_step_info xe_step_gmdid_get(struct xe_device *xe, + u32 graphics_gmdid_revid, + u32 media_gmdid_revid); const char *xe_step_name(enum xe_step step); #endif diff --git a/drivers/gpu/drm/xe/xe_step_types.h b/drivers/gpu/drm/xe/xe_step_types.h index b7859f9647ca..ccc9b4795e95 100644 --- a/drivers/gpu/drm/xe/xe_step_types.h +++ b/drivers/gpu/drm/xe/xe_step_types.h @@ -21,21 +21,20 @@ struct xe_step_info { func(A0) \ func(A1) \ func(A2) \ + func(A3) \ func(B0) \ func(B1) \ func(B2) \ func(B3) \ func(C0) \ func(C1) \ + func(C2) \ + func(C3) \ func(D0) \ func(D1) \ - func(E0) \ - func(F0) \ - func(G0) \ - func(H0) \ - func(I0) \ - func(I1) \ - func(J0) + func(D2) \ + func(D3) \ + func(E0) /* * Symbolic steppings that do not match the hardware. These are valid both as gt -- cgit v1.2.3 From 1b1d3710380d5f0517dcaabe1b96b6401f68ec37 Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Tue, 16 May 2023 03:26:53 +0000 Subject: drm/xe: Apply upper limit to sg element size The iommu_dma_map_sg() function ensures iova allocation doesn't cross dma segment boundary. It does so by padding some sg elements. This can cause overflow, ending up with sg->length being set to 0. Avoid this by halving the maximum segment size (rounded down to PAGE_SIZE). Specify maximum segment size for sg elements by using sg_alloc_table_from_pages_segment() to allocate sg_table. v2: Use correct max segment size in dma_set_max_seg_size() call Signed-off-by: Niranjana Vishwanathapura Reviewed-by: Bruce Chang Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 8 +++++--- drivers/gpu/drm/xe/xe_bo.h | 24 ++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_mmio.c | 7 ++----- drivers/gpu/drm/xe/xe_vm.c | 8 +++++--- 4 files changed, 36 insertions(+), 11 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 4693372ec82e..7a5118bf4dc0 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -251,9 +251,11 @@ static int xe_tt_map_sg(struct ttm_tt *tt) if (xe_tt->sg) return 0; - ret = sg_alloc_table_from_pages(&xe_tt->sgt, tt->pages, num_pages, - 0, (u64)num_pages << PAGE_SHIFT, - GFP_KERNEL); + ret = sg_alloc_table_from_pages_segment(&xe_tt->sgt, tt->pages, + num_pages, 0, + (u64)num_pages << PAGE_SHIFT, + xe_sg_segment_size(xe_tt->dev), + GFP_KERNEL); if (ret) return ret; diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 7e111332c35a..2d08622f58a7 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -296,6 +296,30 @@ void xe_bo_put_commit(struct llist_head *deferred); struct sg_table *xe_bo_get_sg(struct xe_bo *bo); +/* + * xe_sg_segment_size() - Provides upper limit for sg segment size. + * @dev: device pointer + * + * Returns the maximum segment size for the 'struct scatterlist' + * elements. + */ +static inline unsigned int xe_sg_segment_size(struct device *dev) +{ + struct scatterlist __maybe_unused sg; + size_t max = BIT_ULL(sizeof(sg.length) * 8) - 1; + + max = min_t(size_t, max, dma_max_mapping_size(dev)); + + /* + * The iommu_dma_map_sg() function ensures iova allocation doesn't + * cross dma segment boundary. It does so by padding some sg elements. + * This can cause overflow, ending up with sg->length being set to 0. + * Avoid this by ensuring maximum segment size is half of 'max' + * rounded down to PAGE_SIZE. + */ + return round_down(max / 2, PAGE_SIZE); +} + #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) /** * xe_bo_is_mem_type - Whether the bo currently resides in the given diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index c7fbb1cc1f64..4c270a07136e 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -11,6 +11,7 @@ #include "regs/xe_engine_regs.h" #include "regs/xe_gt_regs.h" #include "regs/xe_regs.h" +#include "xe_bo.h" #include "xe_device.h" #include "xe_gt.h" #include "xe_gt_mcr.h" @@ -26,11 +27,7 @@ static int xe_set_dma_info(struct xe_device *xe) unsigned int mask_size = xe->info.dma_mask_size; int err; - /* - * We don't have a max segment size, so set it to the max so sg's - * debugging layer doesn't complain - */ - dma_set_max_seg_size(xe->drm.dev, UINT_MAX); + dma_set_max_seg_size(xe->drm.dev, xe_sg_segment_size(xe->drm.dev)); err = dma_set_mask(xe->drm.dev, DMA_BIT_MASK(mask_size)); if (err) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 40295beea3a2..25a61735aac8 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -117,9 +117,11 @@ mm_closed: if (ret) goto out; - ret = sg_alloc_table_from_pages(&vma->userptr.sgt, pages, pinned, - 0, (u64)pinned << PAGE_SHIFT, - GFP_KERNEL); + ret = sg_alloc_table_from_pages_segment(&vma->userptr.sgt, pages, + pinned, 0, + (u64)pinned << PAGE_SHIFT, + xe_sg_segment_size(xe->drm.dev), + GFP_KERNEL); if (ret) { vma->userptr.sg = NULL; goto out; -- cgit v1.2.3 From 1799c761c48059366f081adeef718fa13d4bb133 Mon Sep 17 00:00:00 2001 From: Christopher Snowhill Date: Wed, 24 May 2023 18:56:07 -0700 Subject: drm/xe: Validate uAPI padding and reserved fields MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Padding and reserved fields are declared such that they must be zeroed, so verify that they're all zero in the respective ioctl functions. Derived from original patch by mlankhorst. v2: Removed extensions checks where there were none originally. (José) Moved extraneous parentheses to the correct places. (Lucas) Signed-off-by: Maarten Lankhorst Signed-off-by: Christopher Snowhill Reviewed-by: José Roberto de Souza Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 6 ++++-- drivers/gpu/drm/xe/xe_engine.c | 18 ++++++++++++++---- drivers/gpu/drm/xe/xe_exec.c | 4 +++- drivers/gpu/drm/xe/xe_mmio.c | 3 ++- drivers/gpu/drm/xe/xe_query.c | 3 ++- drivers/gpu/drm/xe/xe_sync.c | 4 +++- drivers/gpu/drm/xe/xe_vm.c | 22 +++++++++++++++++++--- drivers/gpu/drm/xe/xe_vm_madvise.c | 4 +++- drivers/gpu/drm/xe/xe_wait_user_fence.c | 3 ++- 9 files changed, 52 insertions(+), 15 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 7a5118bf4dc0..798b9938e534 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1646,7 +1646,8 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, u32 handle; int err; - if (XE_IOCTL_ERR(xe, args->extensions)) + if (XE_IOCTL_ERR(xe, args->extensions) || XE_IOCTL_ERR(xe, args->pad) || + XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1])) return -EINVAL; if (XE_IOCTL_ERR(xe, args->flags & @@ -1716,7 +1717,8 @@ int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data, struct drm_xe_gem_mmap_offset *args = data; struct drm_gem_object *gem_obj; - if (XE_IOCTL_ERR(xe, args->extensions)) + if (XE_IOCTL_ERR(xe, args->extensions) || + XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1])) return -EINVAL; if (XE_IOCTL_ERR(xe, args->flags)) diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c index 5666c8e00c97..4fca422e9e7b 100644 --- a/drivers/gpu/drm/xe/xe_engine.c +++ b/drivers/gpu/drm/xe/xe_engine.c @@ -348,7 +348,8 @@ static int engine_user_ext_set_property(struct xe_device *xe, return -EFAULT; if (XE_IOCTL_ERR(xe, ext.property >= - ARRAY_SIZE(engine_set_property_funcs))) + ARRAY_SIZE(engine_set_property_funcs)) || + XE_IOCTL_ERR(xe, ext.pad)) return -EINVAL; idx = array_index_nospec(ext.property, ARRAY_SIZE(engine_set_property_funcs)); @@ -380,7 +381,8 @@ static int engine_user_extensions(struct xe_device *xe, struct xe_engine *e, if (XE_IOCTL_ERR(xe, err)) return -EFAULT; - if (XE_IOCTL_ERR(xe, ext.name >= + if (XE_IOCTL_ERR(xe, ext.pad) || + XE_IOCTL_ERR(xe, ext.name >= ARRAY_SIZE(engine_user_extension_funcs))) return -EINVAL; @@ -523,7 +525,8 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data, int len; int err; - if (XE_IOCTL_ERR(xe, args->flags)) + if (XE_IOCTL_ERR(xe, args->flags) || + XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1])) return -EINVAL; len = args->width * args->num_placements; @@ -639,6 +642,9 @@ int xe_engine_get_property_ioctl(struct drm_device *dev, void *data, struct drm_xe_engine_get_property *args = data; struct xe_engine *e; + if (XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1])) + return -EINVAL; + mutex_lock(&xef->engine.lock); e = xa_load(&xef->engine.xa, args->engine_id); mutex_unlock(&xef->engine.lock); @@ -718,7 +724,8 @@ int xe_engine_destroy_ioctl(struct drm_device *dev, void *data, struct drm_xe_engine_destroy *args = data; struct xe_engine *e; - if (XE_IOCTL_ERR(xe, args->pad)) + if (XE_IOCTL_ERR(xe, args->pad) || + XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1])) return -EINVAL; mutex_lock(&xef->engine.lock); @@ -748,6 +755,9 @@ int xe_engine_set_property_ioctl(struct drm_device *dev, void *data, int ret; u32 idx; + if (XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1])) + return -EINVAL; + e = xe_engine_lookup(xef, args->engine_id); if (XE_IOCTL_ERR(xe, !e)) return -ENOENT; diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 3db1b159586e..e44076ee2e11 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -181,7 +181,9 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) bool write_locked; int err = 0; - if (XE_IOCTL_ERR(xe, args->extensions)) + if (XE_IOCTL_ERR(xe, args->extensions) || + XE_IOCTL_ERR(xe, args->pad[0] || args->pad[1] || args->pad[2]) || + XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1])) return -EINVAL; engine = xe_engine_lookup(xef, args->engine_id); diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 4c270a07136e..87dd417e3f08 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -404,7 +404,8 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data, bool allowed; int ret = 0; - if (XE_IOCTL_ERR(xe, args->extensions)) + if (XE_IOCTL_ERR(xe, args->extensions) || + XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1])) return -EINVAL; if (XE_IOCTL_ERR(xe, args->flags & ~VALID_MMIO_FLAGS)) diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index dd64ff0d2a57..b10959fde43b 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -374,7 +374,8 @@ int xe_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file) struct drm_xe_device_query *query = data; u32 idx; - if (XE_IOCTL_ERR(xe, query->extensions != 0)) + if (XE_IOCTL_ERR(xe, query->extensions) || + XE_IOCTL_ERR(xe, query->reserved[0] || query->reserved[1])) return -EINVAL; if (XE_IOCTL_ERR(xe, query->query > ARRAY_SIZE(xe_query_funcs))) diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index 1e4e4acb2c4a..5acb37a8b2ab 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -111,7 +111,9 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, return -EFAULT; if (XE_IOCTL_ERR(xe, sync_in.flags & - ~(SYNC_FLAGS_TYPE_MASK | DRM_XE_SYNC_SIGNAL))) + ~(SYNC_FLAGS_TYPE_MASK | DRM_XE_SYNC_SIGNAL)) || + XE_IOCTL_ERR(xe, sync_in.pad) || + XE_IOCTL_ERR(xe, sync_in.reserved[0] || sync_in.reserved[1])) return -EINVAL; signal = sync_in.flags & DRM_XE_SYNC_SIGNAL; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 25a61735aac8..ffa102870d1f 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1856,7 +1856,9 @@ static int vm_user_ext_set_property(struct xe_device *xe, struct xe_vm *vm, return -EFAULT; if (XE_IOCTL_ERR(xe, ext.property >= - ARRAY_SIZE(vm_set_property_funcs))) + ARRAY_SIZE(vm_set_property_funcs)) || + XE_IOCTL_ERR(xe, ext.pad) || + XE_IOCTL_ERR(xe, ext.reserved[0] || ext.reserved[1])) return -EINVAL; return vm_set_property_funcs[ext.property](xe, vm, ext.value); @@ -1884,7 +1886,8 @@ static int vm_user_extensions(struct xe_device *xe, struct xe_vm *vm, if (XE_IOCTL_ERR(xe, err)) return -EFAULT; - if (XE_IOCTL_ERR(xe, ext.name >= + if (XE_IOCTL_ERR(xe, ext.pad) || + XE_IOCTL_ERR(xe, ext.name >= ARRAY_SIZE(vm_user_extension_funcs))) return -EINVAL; @@ -1915,6 +1918,9 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, int err; u32 flags = 0; + if (XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1])) + return -EINVAL; + if (XE_IOCTL_ERR(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS)) return -EINVAL; @@ -1998,7 +2004,8 @@ int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, struct drm_xe_vm_destroy *args = data; struct xe_vm *vm; - if (XE_IOCTL_ERR(xe, args->pad)) + if (XE_IOCTL_ERR(xe, args->pad) || + XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1])) return -EINVAL; vm = xe_vm_lookup(xef, args->vm_id); @@ -2914,6 +2921,8 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, int i; if (XE_IOCTL_ERR(xe, args->extensions) || + XE_IOCTL_ERR(xe, args->pad || args->pad2) || + XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1]) || XE_IOCTL_ERR(xe, !args->num_binds) || XE_IOCTL_ERR(xe, args->num_binds > MAX_BINDS)) return -EINVAL; @@ -2946,6 +2955,13 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, u64 obj_offset = (*bind_ops)[i].obj_offset; u32 region = (*bind_ops)[i].region; + if (XE_IOCTL_ERR(xe, (*bind_ops)[i].pad) || + XE_IOCTL_ERR(xe, (*bind_ops)[i].reserved[0] || + (*bind_ops)[i].reserved[1])) { + err = -EINVAL; + goto free_bind_ops; + } + if (i == 0) { *async = !!(op & XE_VM_BIND_FLAG_ASYNC); } else if (XE_IOCTL_ERR(xe, !*async) || diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c index 89a02c8e0424..6c196431a60e 100644 --- a/drivers/gpu/drm/xe/xe_vm_madvise.c +++ b/drivers/gpu/drm/xe/xe_vm_madvise.c @@ -301,7 +301,9 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct xe_vma **vmas = NULL; int num_vmas = 0, err = 0, idx; - if (XE_IOCTL_ERR(xe, args->extensions)) + if (XE_IOCTL_ERR(xe, args->extensions) || + XE_IOCTL_ERR(xe, args->pad || args->pad2) || + XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1])) return -EINVAL; if (XE_IOCTL_ERR(xe, args->property > ARRAY_SIZE(madvise_funcs))) diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c index 15c2e5aa08d2..6c8a60c60087 100644 --- a/drivers/gpu/drm/xe/xe_wait_user_fence.c +++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c @@ -100,7 +100,8 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data, args->flags & DRM_XE_UFENCE_WAIT_VM_ERROR; unsigned long timeout = args->timeout; - if (XE_IOCTL_ERR(xe, args->extensions)) + if (XE_IOCTL_ERR(xe, args->extensions) || XE_IOCTL_ERR(xe, args->pad) || + XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1])) return -EINVAL; if (XE_IOCTL_ERR(xe, args->flags & ~VALID_FLAGS)) -- cgit v1.2.3 From 58e30342c75d38606e30e02ef125252b10829450 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 24 May 2023 12:26:35 -0700 Subject: drm/xe/pvc: Don't try to invalidate AuxCCS TLB Generally !has_flatccs implies that a platform has AuxCCS compression and thus needs to invalidate the AuxCCS TLB. However PVC is a special case because it has no compression of either type (FlatCCS or AuxCCS) so we should avoid writing to non-existent AuxCCS registers. Reviewed-by: Haridhar Kalvala Link: https://lore.kernel.org/r/20230524192635.673293-1-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ring_ops.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index 06364bb2e95b..a09ee8c736b5 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -190,6 +190,18 @@ static void __emit_job_gen12_copy(struct xe_sched_job *job, struct xe_lrc *lrc, xe_lrc_write_ring(lrc, dw, i * sizeof(*dw)); } +static bool has_aux_ccs(struct xe_device *xe) +{ + /* + * PVC is a special case that has no compression of either type + * (FlatCCS or AuxCCS). + */ + if (xe->info.platform == XE_PVC) + return false; + + return !xe->info.has_flat_ccs; +} + static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, u64 batch_addr, u32 seqno) { @@ -202,7 +214,7 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, dw[i++] = preparser_disable(true); /* hsdes: 1809175790 */ - if (!xe->info.has_flat_ccs) { + if (has_aux_ccs(xe)) { if (decode) i = emit_aux_table_inv(gt, VD0_AUX_INV, dw, i); else @@ -248,7 +260,7 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, i = emit_pipe_invalidate(mask_flags, dw, i); /* hsdes: 1809175790 */ - if (!xe->info.has_flat_ccs) + if (has_aux_ccs(xe)) i = emit_aux_table_inv(gt, CCS_AUX_INV, dw, i); dw[i++] = preparser_disable(false); -- cgit v1.2.3 From a9bd807eb16be11e11f6c6d3921119381cc43135 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 26 May 2023 09:43:38 -0700 Subject: drm/xe: Fix Wa_22011802037 annotation It was missing one digit, so not showing up as a proper WA number. Add the missing number and annotate it with a FIXME as there are more to be implemented to consider this WA done: ensure CS is stop before doing a reset, wait for pending. Also, this WA applies to platforms up to graphics version 1270 (with the exception of MTL A*, that are not supported in xe). Fix platform check. Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/284 Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230526164358.86393-2-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 71f18b32d09b..a8e249205bff 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -160,9 +160,11 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc) flags |= GUC_WA_DUAL_QUEUE; /* - * Wa_2201180203 + * Wa_22011802037: FIXME - there's more to be done than simply setting + * this flag: make sure each CS is stopped when preparing for GT reset + * and wait for pending MI_FW. */ - if (GRAPHICS_VER(xe) <= 12) + if (GRAPHICS_VERx100(xe) < 1270) flags |= GUC_WA_PRE_PARSER; /* Wa_16011777198 */ -- cgit v1.2.3 From 72906d340b60f3dae545deef77376a0f598bece7 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 26 May 2023 09:43:39 -0700 Subject: drm/xe/rtp: Split rtp process initialization The selection between hwe and gt is exposed to the outside of rtp, by the xe_rtp_process() function. However it doesn't make seense from the caller point of view to pass a hwe and a gt as argument since the gt should always be the one containing the hwe. This clarifies the interface by separating the context creation into an initializer. The initializer then passes the correct value and there should never be a case with hwe and gt set: when hwe is passed, the gt is the one containing it. Internally the functions continue receiving the argument separately. v2: Leave the device-only context to a separate patch if they are indeed needed later Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230526164358.86393-3-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_rtp_test.c | 3 +- drivers/gpu/drm/xe/xe_hw_engine.c | 8 +++-- drivers/gpu/drm/xe/xe_reg_whitelist.c | 4 ++- drivers/gpu/drm/xe/xe_rtp.c | 63 +++++++++++++++++++++++++--------- drivers/gpu/drm/xe/xe_rtp.h | 9 +++-- drivers/gpu/drm/xe/xe_rtp_types.h | 13 +++++++ drivers/gpu/drm/xe/xe_tuning.c | 8 +++-- drivers/gpu/drm/xe/xe_wa.c | 12 +++++-- 8 files changed, 91 insertions(+), 29 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c index 4b2aac5ccf28..f96ef1987719 100644 --- a/drivers/gpu/drm/xe/tests/xe_rtp_test.c +++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c @@ -238,10 +238,11 @@ static void xe_rtp_process_tests(struct kunit *test) struct xe_device *xe = test->priv; struct xe_reg_sr *reg_sr = &xe->gt[0].reg_sr; const struct xe_reg_sr_entry *sre, *sr_entry = NULL; + struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(&xe->gt[0]); unsigned long idx, count = 0; xe_reg_sr_init(reg_sr, "xe_rtp_tests", xe); - xe_rtp_process(param->entries, reg_sr, &xe->gt[0], NULL); + xe_rtp_process(&ctx, param->entries, reg_sr); xa_for_each(®_sr->xa, idx, sre) { if (idx == param->expected_reg.addr) diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 71ac4defb947..25b96f40d5a7 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -281,6 +281,7 @@ xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe) const u8 mocs_read_idx = gt->mocs.uc_index; u32 blit_cctl_val = REG_FIELD_PREP(BLIT_CCTL_DST_MOCS_MASK, mocs_write_idx) | REG_FIELD_PREP(BLIT_CCTL_SRC_MOCS_MASK, mocs_read_idx); + struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); const struct xe_rtp_entry lrc_was[] = { /* * Some blitter commands do not have a field for MOCS, those @@ -299,7 +300,7 @@ xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe) {} }; - xe_rtp_process(lrc_was, &hwe->reg_lrc, gt, hwe); + xe_rtp_process(&ctx, lrc_was, &hwe->reg_lrc); } static void @@ -311,7 +312,8 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe) const u8 mocs_read_idx = gt->mocs.uc_index; u32 ring_cmd_cctl_val = REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, mocs_write_idx) | REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, mocs_read_idx); - const struct xe_rtp_entry engine_was[] = { + struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); + const struct xe_rtp_entry engine_entries[] = { /* * RING_CMD_CCTL specifies the default MOCS entry that will be * used by the command streamer when executing commands that @@ -332,7 +334,7 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe) {} }; - xe_rtp_process(engine_was, &hwe->reg_sr, gt, hwe); + xe_rtp_process(&ctx, engine_entries, &hwe->reg_sr); } static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe, diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c index 7a2bb60ebd85..98f678d74445 100644 --- a/drivers/gpu/drm/xe/xe_reg_whitelist.c +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c @@ -63,7 +63,9 @@ static const struct xe_rtp_entry register_whitelist[] = { */ void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe) { - xe_rtp_process(register_whitelist, &hwe->reg_whitelist, hwe->gt, hwe); + struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); + + xe_rtp_process(&ctx, register_whitelist, &hwe->reg_whitelist); } /** diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index 0c6a23e14a71..5be25fe0e8e4 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -23,11 +23,11 @@ * the values to the registers that have matching rules. */ -static bool rule_matches(struct xe_gt *gt, +static bool rule_matches(const struct xe_device *xe, + struct xe_gt *gt, struct xe_hw_engine *hwe, const struct xe_rtp_entry *entry) { - const struct xe_device *xe = gt_to_xe(gt); const struct xe_rtp_rule *r; unsigned int i; bool match; @@ -62,22 +62,27 @@ static bool rule_matches(struct xe_gt *gt, match = xe->info.step.graphics >= r->step_start && xe->info.step.graphics < r->step_end; break; + case XE_RTP_MATCH_INTEGRATED: + match = !xe->info.is_dgfx; + break; + case XE_RTP_MATCH_DISCRETE: + match = xe->info.is_dgfx; + break; case XE_RTP_MATCH_ENGINE_CLASS: + if (drm_WARN_ON(&xe->drm, !hwe)) + return false; + match = hwe->class == r->engine_class; break; case XE_RTP_MATCH_NOT_ENGINE_CLASS: + if (drm_WARN_ON(&xe->drm, !hwe)) + return false; + match = hwe->class != r->engine_class; break; case XE_RTP_MATCH_FUNC: match = r->match_func(gt, hwe); break; - case XE_RTP_MATCH_INTEGRATED: - match = !xe->info.is_dgfx; - break; - case XE_RTP_MATCH_DISCRETE: - match = xe->info.is_dgfx; - break; - default: XE_WARN_ON(r->match_type); } @@ -105,14 +110,15 @@ static void rtp_add_sr_entry(const struct xe_rtp_action *action, xe_reg_sr_add(sr, &sr_entry); } -static void rtp_process_one(const struct xe_rtp_entry *entry, struct xe_gt *gt, +static void rtp_process_one(const struct xe_rtp_entry *entry, + struct xe_device *xe, struct xe_gt *gt, struct xe_hw_engine *hwe, struct xe_reg_sr *sr) { const struct xe_rtp_action *action; u32 mmio_base; unsigned int i; - if (!rule_matches(gt, hwe, entry)) + if (!rule_matches(xe, gt, hwe, entry)) return; for (action = &entry->actions[0]; i < entry->n_actions; action++, i++) { @@ -126,23 +132,46 @@ static void rtp_process_one(const struct xe_rtp_entry *entry, struct xe_gt *gt, } } +static void rtp_get_context(struct xe_rtp_process_ctx *ctx, + struct xe_hw_engine **hwe, + struct xe_gt **gt, + struct xe_device **xe) +{ + switch (ctx->type) { + case XE_RTP_PROCESS_TYPE_GT: + *hwe = NULL; + *gt = ctx->gt; + *xe = gt_to_xe(*gt); + break; + case XE_RTP_PROCESS_TYPE_ENGINE: + *hwe = ctx->hwe; + *gt = (*hwe)->gt; + *xe = gt_to_xe(*gt); + break; + }; +} + /** * xe_rtp_process - Process all rtp @entries, adding the matching ones to @sr + * @ctx: The context for processing the table, with one of device, gt or hwe * @entries: Table with RTP definitions * @sr: Where to add an entry to with the values for matching. This can be * viewed as the "coalesced view" of multiple the tables. The bits for each * register set are expected not to collide with previously added entries - * @gt: The GT to be used for matching rules - * @hwe: Engine instance to use for matching rules and as mmio base * * Walk the table pointed by @entries (with an empty sentinel) and add all * entries with matching rules to @sr. If @hwe is not NULL, its mmio_base is * used to calculate the right register offset */ -void xe_rtp_process(const struct xe_rtp_entry *entries, struct xe_reg_sr *sr, - struct xe_gt *gt, struct xe_hw_engine *hwe) +void xe_rtp_process(struct xe_rtp_process_ctx *ctx, + const struct xe_rtp_entry *entries, struct xe_reg_sr *sr) { const struct xe_rtp_entry *entry; + struct xe_hw_engine *hwe = NULL; + struct xe_gt *gt = NULL; + struct xe_device *xe = NULL; + + rtp_get_context(ctx, &hwe, >, &xe); for (entry = entries; entry && entry->name; entry++) { if (entry->flags & XE_RTP_ENTRY_FLAG_FOREACH_ENGINE) { @@ -150,9 +179,9 @@ void xe_rtp_process(const struct xe_rtp_entry *entries, struct xe_reg_sr *sr, enum xe_hw_engine_id id; for_each_hw_engine(each_hwe, gt, id) - rtp_process_one(entry, gt, each_hwe, sr); + rtp_process_one(entry, xe, gt, each_hwe, sr); } else { - rtp_process_one(entry, gt, hwe, sr); + rtp_process_one(entry, xe, gt, hwe, sr); } } } diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index 8a89ad45589a..c4b718b9632e 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -355,8 +355,13 @@ struct xe_reg_sr; XE_RTP_PASTE_FOREACH(ACTION_, COMMA, (__VA_ARGS__)) \ } -void xe_rtp_process(const struct xe_rtp_entry *entries, struct xe_reg_sr *sr, - struct xe_gt *gt, struct xe_hw_engine *hwe); +#define XE_RTP_PROCESS_CTX_INITIALIZER(arg__) _Generic((arg__), \ + struct xe_hw_engine *: (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_ENGINE }, \ + struct xe_gt *: (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_GT }) + +void xe_rtp_process(struct xe_rtp_process_ctx *ctx, + const struct xe_rtp_entry *entries, + struct xe_reg_sr *sr); /* Match functions to be used with XE_RTP_MATCH_FUNC */ diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h index 12df8a9e9c45..5afacbd9083d 100644 --- a/drivers/gpu/drm/xe/xe_rtp_types.h +++ b/drivers/gpu/drm/xe/xe_rtp_types.h @@ -95,4 +95,17 @@ struct xe_rtp_entry { u8 flags; }; +enum xe_rtp_process_type { + XE_RTP_PROCESS_TYPE_GT, + XE_RTP_PROCESS_TYPE_ENGINE, +}; + +struct xe_rtp_process_ctx { + union { + struct xe_gt *gt; + struct xe_hw_engine *hwe; + }; + enum xe_rtp_process_type type; +}; + #endif diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index 5fc6a408429b..c2810ede3a65 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -59,7 +59,9 @@ static const struct xe_rtp_entry lrc_tunings[] = { void xe_tuning_process_gt(struct xe_gt *gt) { - xe_rtp_process(gt_tunings, >->reg_sr, gt, NULL); + struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt); + + xe_rtp_process(&ctx, gt_tunings, >->reg_sr); } EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_gt); @@ -73,5 +75,7 @@ EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_gt); */ void xe_tuning_process_lrc(struct xe_hw_engine *hwe) { - xe_rtp_process(lrc_tunings, &hwe->reg_lrc, hwe->gt, hwe); + struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); + + xe_rtp_process(&ctx, lrc_tunings, &hwe->reg_lrc); } diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index b0bb2f4438f4..4b236b6f4c8e 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -579,7 +579,9 @@ __diag_pop(); */ void xe_wa_process_gt(struct xe_gt *gt) { - xe_rtp_process(gt_was, >->reg_sr, gt, NULL); + struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt); + + xe_rtp_process(&ctx, gt_was, >->reg_sr); } EXPORT_SYMBOL_IF_KUNIT(xe_wa_process_gt); @@ -593,7 +595,9 @@ EXPORT_SYMBOL_IF_KUNIT(xe_wa_process_gt); */ void xe_wa_process_engine(struct xe_hw_engine *hwe) { - xe_rtp_process(engine_was, &hwe->reg_sr, hwe->gt, hwe); + struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); + + xe_rtp_process(&ctx, engine_was, &hwe->reg_sr); } /** @@ -606,5 +610,7 @@ void xe_wa_process_engine(struct xe_hw_engine *hwe) */ void xe_wa_process_lrc(struct xe_hw_engine *hwe) { - xe_rtp_process(lrc_was, &hwe->reg_lrc, hwe->gt, hwe); + struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); + + xe_rtp_process(&ctx, lrc_was, &hwe->reg_lrc); } -- cgit v1.2.3 From cc982f0c168149def829f204b575fad546e9d043 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 26 May 2023 09:43:40 -0700 Subject: drm/xe/rtp: Replace XE_WARN_ON Now that rule_matches() always has an xe pointer, replace the XE_WARN_ON with the more appropriate drm_warn(). Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230526164358.86393-4-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_rtp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index 5be25fe0e8e4..5dcdfe45f0cb 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -84,7 +84,9 @@ static bool rule_matches(const struct xe_device *xe, match = r->match_func(gt, hwe); break; default: - XE_WARN_ON(r->match_type); + drm_warn(&xe->drm, "Invalid RTP match %u\n", + r->match_type); + match = false; } if (!match) -- cgit v1.2.3 From 91042671d9f3102c7e100d2e9275cae13eb63462 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 26 May 2023 09:43:41 -0700 Subject: drm/xe/rtp: Add "_sr" to entry/function names The xe_rtp_process() function and xe_rtp_entry depend on the save-restore struct. In future it will be desired to process rtp rules, regardless of adding them to a save-restore. Rename the struct and function so the intent is clear and the name is freed for future uses. Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230526164358.86393-5-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_rtp_test.c | 22 +++++++++++----------- drivers/gpu/drm/xe/xe_hw_engine.c | 8 ++++---- drivers/gpu/drm/xe/xe_reg_whitelist.c | 4 ++-- drivers/gpu/drm/xe/xe_rtp.c | 26 ++++++++++++++------------ drivers/gpu/drm/xe/xe_rtp.h | 20 ++++++++++---------- drivers/gpu/drm/xe/xe_rtp_types.h | 4 ++-- drivers/gpu/drm/xe/xe_tuning.c | 8 ++++---- drivers/gpu/drm/xe/xe_wa.c | 12 ++++++------ 8 files changed, 53 insertions(+), 51 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c index f96ef1987719..ab6f7a47db50 100644 --- a/drivers/gpu/drm/xe/tests/xe_rtp_test.c +++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c @@ -36,7 +36,7 @@ struct rtp_test_case { u32 expected_clr_bits; unsigned long expected_count; unsigned int expected_sr_errors; - const struct xe_rtp_entry *entries; + const struct xe_rtp_entry_sr *entries; }; static bool match_yes(const struct xe_gt *gt, const struct xe_hw_engine *hwe) @@ -57,7 +57,7 @@ static const struct rtp_test_case cases[] = { .expected_clr_bits = REG_BIT(0) | REG_BIT(1), .expected_count = 1, /* Different bits on the same register: create a single entry */ - .entries = (const struct xe_rtp_entry[]) { + .entries = (const struct xe_rtp_entry_sr[]) { { XE_RTP_NAME("basic-1"), XE_RTP_RULES(FUNC(match_yes)), XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0))) @@ -76,7 +76,7 @@ static const struct rtp_test_case cases[] = { .expected_clr_bits = REG_BIT(0), .expected_count = 1, /* Don't coalesce second entry since rules don't match */ - .entries = (const struct xe_rtp_entry[]) { + .entries = (const struct xe_rtp_entry_sr[]) { { XE_RTP_NAME("basic-1"), XE_RTP_RULES(FUNC(match_yes)), XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0))) @@ -95,7 +95,7 @@ static const struct rtp_test_case cases[] = { .expected_clr_bits = REG_BIT(0), .expected_count = 1, /* Don't coalesce second entry due to one of the rules */ - .entries = (const struct xe_rtp_entry[]) { + .entries = (const struct xe_rtp_entry_sr[]) { { XE_RTP_NAME("basic-1"), XE_RTP_RULES(FUNC(match_yes)), XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0))) @@ -114,7 +114,7 @@ static const struct rtp_test_case cases[] = { .expected_clr_bits = REG_BIT(0), .expected_count = 2, /* Same bits on different registers are not coalesced */ - .entries = (const struct xe_rtp_entry[]) { + .entries = (const struct xe_rtp_entry_sr[]) { { XE_RTP_NAME("basic-1"), XE_RTP_RULES(FUNC(match_yes)), XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0))) @@ -133,7 +133,7 @@ static const struct rtp_test_case cases[] = { .expected_clr_bits = REG_BIT(1) | REG_BIT(0), .expected_count = 1, /* Check clr vs set actions on different bits */ - .entries = (const struct xe_rtp_entry[]) { + .entries = (const struct xe_rtp_entry_sr[]) { { XE_RTP_NAME("basic-1"), XE_RTP_RULES(FUNC(match_yes)), XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0))) @@ -154,7 +154,7 @@ static const struct rtp_test_case cases[] = { .expected_clr_bits = TEMP_MASK, .expected_count = 1, /* Check FIELD_SET works */ - .entries = (const struct xe_rtp_entry[]) { + .entries = (const struct xe_rtp_entry_sr[]) { { XE_RTP_NAME("basic-1"), XE_RTP_RULES(FUNC(match_yes)), XE_RTP_ACTIONS(FIELD_SET(REGULAR_REG1, @@ -172,7 +172,7 @@ static const struct rtp_test_case cases[] = { .expected_clr_bits = REG_BIT(0), .expected_count = 1, .expected_sr_errors = 1, - .entries = (const struct xe_rtp_entry[]) { + .entries = (const struct xe_rtp_entry_sr[]) { { XE_RTP_NAME("basic-1"), XE_RTP_RULES(FUNC(match_yes)), XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0))) @@ -192,7 +192,7 @@ static const struct rtp_test_case cases[] = { .expected_clr_bits = REG_BIT(0), .expected_count = 1, .expected_sr_errors = 1, - .entries = (const struct xe_rtp_entry[]) { + .entries = (const struct xe_rtp_entry_sr[]) { { XE_RTP_NAME("basic-1"), XE_RTP_RULES(FUNC(match_yes)), XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0))) @@ -212,7 +212,7 @@ static const struct rtp_test_case cases[] = { .expected_clr_bits = REG_BIT(0), .expected_count = 1, .expected_sr_errors = 2, - .entries = (const struct xe_rtp_entry[]) { + .entries = (const struct xe_rtp_entry_sr[]) { { XE_RTP_NAME("basic-1"), XE_RTP_RULES(FUNC(match_yes)), XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0))) @@ -242,7 +242,7 @@ static void xe_rtp_process_tests(struct kunit *test) unsigned long idx, count = 0; xe_reg_sr_init(reg_sr, "xe_rtp_tests", xe); - xe_rtp_process(&ctx, param->entries, reg_sr); + xe_rtp_process_to_sr(&ctx, param->entries, reg_sr); xa_for_each(®_sr->xa, idx, sre) { if (idx == param->expected_reg.addr) diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 25b96f40d5a7..7e4b0b465244 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -282,7 +282,7 @@ xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe) u32 blit_cctl_val = REG_FIELD_PREP(BLIT_CCTL_DST_MOCS_MASK, mocs_write_idx) | REG_FIELD_PREP(BLIT_CCTL_SRC_MOCS_MASK, mocs_read_idx); struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); - const struct xe_rtp_entry lrc_was[] = { + const struct xe_rtp_entry_sr lrc_was[] = { /* * Some blitter commands do not have a field for MOCS, those * commands will use MOCS index pointed by BLIT_CCTL. @@ -300,7 +300,7 @@ xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe) {} }; - xe_rtp_process(&ctx, lrc_was, &hwe->reg_lrc); + xe_rtp_process_to_sr(&ctx, lrc_was, &hwe->reg_lrc); } static void @@ -313,7 +313,7 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe) u32 ring_cmd_cctl_val = REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, mocs_write_idx) | REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, mocs_read_idx); struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); - const struct xe_rtp_entry engine_entries[] = { + const struct xe_rtp_entry_sr engine_entries[] = { /* * RING_CMD_CCTL specifies the default MOCS entry that will be * used by the command streamer when executing commands that @@ -334,7 +334,7 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe) {} }; - xe_rtp_process(&ctx, engine_entries, &hwe->reg_sr); + xe_rtp_process_to_sr(&ctx, engine_entries, &hwe->reg_sr); } static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe, diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c index 98f678d74445..70892f134718 100644 --- a/drivers/gpu/drm/xe/xe_reg_whitelist.c +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c @@ -20,7 +20,7 @@ static bool match_not_render(const struct xe_gt *gt, return hwe->class != XE_ENGINE_CLASS_RENDER; } -static const struct xe_rtp_entry register_whitelist[] = { +static const struct xe_rtp_entry_sr register_whitelist[] = { { XE_RTP_NAME("WaAllowPMDepthAndInvocationCountAccessFromUMD, 1408556865"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(WHITELIST(PS_INVOCATION_COUNT, @@ -65,7 +65,7 @@ void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe) { struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); - xe_rtp_process(&ctx, register_whitelist, &hwe->reg_whitelist); + xe_rtp_process_to_sr(&ctx, register_whitelist, &hwe->reg_whitelist); } /** diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index 5dcdfe45f0cb..0be1f4cfc4d5 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -26,7 +26,7 @@ static bool rule_matches(const struct xe_device *xe, struct xe_gt *gt, struct xe_hw_engine *hwe, - const struct xe_rtp_entry *entry) + const struct xe_rtp_entry_sr *entry) { const struct xe_rtp_rule *r; unsigned int i; @@ -112,9 +112,9 @@ static void rtp_add_sr_entry(const struct xe_rtp_action *action, xe_reg_sr_add(sr, &sr_entry); } -static void rtp_process_one(const struct xe_rtp_entry *entry, - struct xe_device *xe, struct xe_gt *gt, - struct xe_hw_engine *hwe, struct xe_reg_sr *sr) +static void rtp_process_one_sr(const struct xe_rtp_entry_sr *entry, + struct xe_device *xe, struct xe_gt *gt, + struct xe_hw_engine *hwe, struct xe_reg_sr *sr) { const struct xe_rtp_action *action; u32 mmio_base; @@ -154,10 +154,11 @@ static void rtp_get_context(struct xe_rtp_process_ctx *ctx, } /** - * xe_rtp_process - Process all rtp @entries, adding the matching ones to @sr + * xe_rtp_process_to_sr - Process all rtp @entries, adding the matching ones to + * the save-restore argument. * @ctx: The context for processing the table, with one of device, gt or hwe * @entries: Table with RTP definitions - * @sr: Where to add an entry to with the values for matching. This can be + * @sr: Save-restore struct where matching rules execute the action. This can be * viewed as the "coalesced view" of multiple the tables. The bits for each * register set are expected not to collide with previously added entries * @@ -165,10 +166,11 @@ static void rtp_get_context(struct xe_rtp_process_ctx *ctx, * entries with matching rules to @sr. If @hwe is not NULL, its mmio_base is * used to calculate the right register offset */ -void xe_rtp_process(struct xe_rtp_process_ctx *ctx, - const struct xe_rtp_entry *entries, struct xe_reg_sr *sr) +void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx, + const struct xe_rtp_entry_sr *entries, + struct xe_reg_sr *sr) { - const struct xe_rtp_entry *entry; + const struct xe_rtp_entry_sr *entry; struct xe_hw_engine *hwe = NULL; struct xe_gt *gt = NULL; struct xe_device *xe = NULL; @@ -181,13 +183,13 @@ void xe_rtp_process(struct xe_rtp_process_ctx *ctx, enum xe_hw_engine_id id; for_each_hw_engine(each_hwe, gt, id) - rtp_process_one(entry, xe, gt, each_hwe, sr); + rtp_process_one_sr(entry, xe, gt, each_hwe, sr); } else { - rtp_process_one(entry, xe, gt, hwe, sr); + rtp_process_one_sr(entry, xe, gt, hwe, sr); } } } -EXPORT_SYMBOL_IF_KUNIT(xe_rtp_process); +EXPORT_SYMBOL_IF_KUNIT(xe_rtp_process_to_sr); bool xe_rtp_match_even_instance(const struct xe_gt *gt, const struct xe_hw_engine *hwe) diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index c4b718b9632e..179b497186d2 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -261,7 +261,7 @@ struct xe_reg_sr; #define XE_RTP_NAME(s_) .name = (s_) /** - * XE_RTP_ENTRY_FLAG - Helper to add multiple flags to a struct xe_rtp_entry + * XE_RTP_ENTRY_FLAG - Helper to add multiple flags to a struct xe_rtp_entry_sr * @...: Entry flags, without the ``XE_RTP_ENTRY_FLAG_`` prefix * * Helper to automatically add a ``XE_RTP_ENTRY_FLAG_`` prefix to the flags @@ -269,7 +269,7 @@ struct xe_reg_sr; * * .. code-block:: c * - * const struct xe_rtp_entry wa_entries[] = { + * const struct xe_rtp_entry_sr wa_entries[] = { * ... * { XE_RTP_NAME("test-entry"), * ... @@ -291,7 +291,7 @@ struct xe_reg_sr; * * .. code-block:: c * - * const struct xe_rtp_entry wa_entries[] = { + * const struct xe_rtp_entry_sr wa_entries[] = { * ... * { XE_RTP_NAME("test-entry"), * ... @@ -305,7 +305,7 @@ struct xe_reg_sr; .flags = (XE_RTP_PASTE_FOREACH(ACTION_FLAG_, BITWISE_OR, (__VA_ARGS__))) /** - * XE_RTP_RULES - Helper to set multiple rules to a struct xe_rtp_entry entry + * XE_RTP_RULES - Helper to set multiple rules to a struct xe_rtp_entry_sr entry * @...: Rules * * At least one rule is needed and up to 4 are supported. Multiple rules are @@ -314,7 +314,7 @@ struct xe_reg_sr; * * .. code-block:: c * - * const struct xe_rtp_entry wa_entries[] = { + * const struct xe_rtp_entry_sr wa_entries[] = { * ... * { XE_RTP_NAME("test-entry"), * XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), @@ -330,7 +330,7 @@ struct xe_reg_sr; } /** - * XE_RTP_ACTIONS - Helper to set multiple actions to a struct xe_rtp_entry + * XE_RTP_ACTIONS - Helper to set multiple actions to a struct xe_rtp_entry_sr * @...: Actions to be taken * * At least one rule is needed and up to 4 are supported. Multiple rules are @@ -339,7 +339,7 @@ struct xe_reg_sr; * * .. code-block:: c * - * const struct xe_rtp_entry wa_entries[] = { + * const struct xe_rtp_entry_sr wa_entries[] = { * ... * { XE_RTP_NAME("test-entry"), * XE_RTP_RULES(...), @@ -359,9 +359,9 @@ struct xe_reg_sr; struct xe_hw_engine *: (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_ENGINE }, \ struct xe_gt *: (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_GT }) -void xe_rtp_process(struct xe_rtp_process_ctx *ctx, - const struct xe_rtp_entry *entries, - struct xe_reg_sr *sr); +void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx, + const struct xe_rtp_entry_sr *entries, + struct xe_reg_sr *sr); /* Match functions to be used with XE_RTP_MATCH_FUNC */ diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h index 5afacbd9083d..66d1cb0aaa08 100644 --- a/drivers/gpu/drm/xe/xe_rtp_types.h +++ b/drivers/gpu/drm/xe/xe_rtp_types.h @@ -84,8 +84,8 @@ struct xe_rtp_rule { }; }; -/** struct xe_rtp_entry - Entry in an rtp table */ -struct xe_rtp_entry { +/** struct xe_rtp_entry_sr - Entry in an rtp table */ +struct xe_rtp_entry_sr { const char *name; const struct xe_rtp_action *actions; const struct xe_rtp_rule *rules; diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index c2810ede3a65..412e59de9842 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -15,7 +15,7 @@ #undef XE_REG_MCR #define XE_REG_MCR(...) XE_REG(__VA_ARGS__, .mcr = 1) -static const struct xe_rtp_entry gt_tunings[] = { +static const struct xe_rtp_entry_sr gt_tunings[] = { { XE_RTP_NAME("Tuning: Blend Fill Caching Optimization Disable"), XE_RTP_RULES(PLATFORM(DG2)), XE_RTP_ACTIONS(SET(XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS)) @@ -27,7 +27,7 @@ static const struct xe_rtp_entry gt_tunings[] = { {} }; -static const struct xe_rtp_entry lrc_tunings[] = { +static const struct xe_rtp_entry_sr lrc_tunings[] = { { XE_RTP_NAME("Tuning: ganged timer, also known as 16011163337"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)), /* read verification is ignored due to 1608008084. */ @@ -61,7 +61,7 @@ void xe_tuning_process_gt(struct xe_gt *gt) { struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt); - xe_rtp_process(&ctx, gt_tunings, >->reg_sr); + xe_rtp_process_to_sr(&ctx, gt_tunings, >->reg_sr); } EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_gt); @@ -77,5 +77,5 @@ void xe_tuning_process_lrc(struct xe_hw_engine *hwe) { struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); - xe_rtp_process(&ctx, lrc_tunings, &hwe->reg_lrc); + xe_rtp_process_to_sr(&ctx, lrc_tunings, &hwe->reg_lrc); } diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 4b236b6f4c8e..557e90d79f0b 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -93,7 +93,7 @@ __diag_push(); __diag_ignore_all("-Woverride-init", "Allow field overrides in table"); -static const struct xe_rtp_entry gt_was[] = { +static const struct xe_rtp_entry_sr gt_was[] = { { XE_RTP_NAME("14011060649"), XE_RTP_RULES(MEDIA_VERSION_RANGE(1200, 1255), ENGINE_CLASS(VIDEO_DECODE), @@ -235,7 +235,7 @@ static const struct xe_rtp_entry gt_was[] = { {} }; -static const struct xe_rtp_entry engine_was[] = { +static const struct xe_rtp_entry_sr engine_was[] = { { XE_RTP_NAME("22010931296, 18011464164, 14010919138"), XE_RTP_RULES(GRAPHICS_VERSION(1200), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(FF_THREAD_MODE, @@ -490,7 +490,7 @@ static const struct xe_rtp_entry engine_was[] = { {} }; -static const struct xe_rtp_entry lrc_was[] = { +static const struct xe_rtp_entry_sr lrc_was[] = { { XE_RTP_NAME("1409342910, 14010698770, 14010443199, 1408979724, 1409178076, 1409207793, 1409217633, 1409252684, 1409347922, 1409142259"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)), XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN3, @@ -581,7 +581,7 @@ void xe_wa_process_gt(struct xe_gt *gt) { struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt); - xe_rtp_process(&ctx, gt_was, >->reg_sr); + xe_rtp_process_to_sr(&ctx, gt_was, >->reg_sr); } EXPORT_SYMBOL_IF_KUNIT(xe_wa_process_gt); @@ -597,7 +597,7 @@ void xe_wa_process_engine(struct xe_hw_engine *hwe) { struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); - xe_rtp_process(&ctx, engine_was, &hwe->reg_sr); + xe_rtp_process_to_sr(&ctx, engine_was, &hwe->reg_sr); } /** @@ -612,5 +612,5 @@ void xe_wa_process_lrc(struct xe_hw_engine *hwe) { struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); - xe_rtp_process(&ctx, lrc_was, &hwe->reg_lrc); + xe_rtp_process_to_sr(&ctx, lrc_was, &hwe->reg_lrc); } -- cgit v1.2.3 From cefeb7634136b7273dff7fe20cedc95e01e51209 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 26 May 2023 09:43:42 -0700 Subject: drm/xe/rtp: Allow to track active workarounds Add the metadata in struct xe_rtp_process_ctx, to be set by xe_rtp_process_ctx_enable_active_tracking(), so rtp knows how to mark the active entries while processing the table. This can be used by the WA infra to record what are the active workarounds. Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230526164358.86393-6-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_rtp.c | 48 +++++++++++++++++++++++++++++++++++---- drivers/gpu/drm/xe/xe_rtp.h | 4 ++++ drivers/gpu/drm/xe/xe_rtp_types.h | 2 ++ 3 files changed, 50 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index 0be1f4cfc4d5..29cf92f9b7b3 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -112,7 +112,7 @@ static void rtp_add_sr_entry(const struct xe_rtp_action *action, xe_reg_sr_add(sr, &sr_entry); } -static void rtp_process_one_sr(const struct xe_rtp_entry_sr *entry, +static bool rtp_process_one_sr(const struct xe_rtp_entry_sr *entry, struct xe_device *xe, struct xe_gt *gt, struct xe_hw_engine *hwe, struct xe_reg_sr *sr) { @@ -121,7 +121,7 @@ static void rtp_process_one_sr(const struct xe_rtp_entry_sr *entry, unsigned int i; if (!rule_matches(xe, gt, hwe, entry)) - return; + return false; for (action = &entry->actions[0]; i < entry->n_actions; action++, i++) { if ((entry->flags & XE_RTP_ENTRY_FLAG_FOREACH_ENGINE) || @@ -132,6 +132,8 @@ static void rtp_process_one_sr(const struct xe_rtp_entry_sr *entry, rtp_add_sr_entry(action, gt, mmio_base, sr); } + + return true; } static void rtp_get_context(struct xe_rtp_process_ctx *ctx, @@ -153,6 +155,38 @@ static void rtp_get_context(struct xe_rtp_process_ctx *ctx, }; } +/** + * xe_rtp_process_ctx_enable_active_tracking - Enable tracking of active entries + * + * Set additional metadata to track what entries are considered "active", i.e. + * their rules match the condition. Bits are never cleared: entries with + * matching rules set the corresponding bit in the bitmap. + * + * @ctx: The context for processing the table + * @active_entries: bitmap to store the active entries + * @n_entries: number of entries to be processed + */ +void xe_rtp_process_ctx_enable_active_tracking(struct xe_rtp_process_ctx *ctx, + unsigned long *active_entries, + size_t n_entries) +{ + ctx->active_entries = active_entries; + ctx->n_entries = n_entries; +} + +static void rtp_mark_active(struct xe_device *xe, + struct xe_rtp_process_ctx *ctx, + unsigned int bit) +{ + if (!ctx->active_entries) + return; + + if (drm_WARN_ON(&xe->drm, bit > ctx->n_entries)) + return; + + bitmap_set(ctx->active_entries, bit, 1); +} + /** * xe_rtp_process_to_sr - Process all rtp @entries, adding the matching ones to * the save-restore argument. @@ -178,15 +212,21 @@ void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx, rtp_get_context(ctx, &hwe, >, &xe); for (entry = entries; entry && entry->name; entry++) { + bool match = false; + if (entry->flags & XE_RTP_ENTRY_FLAG_FOREACH_ENGINE) { struct xe_hw_engine *each_hwe; enum xe_hw_engine_id id; for_each_hw_engine(each_hwe, gt, id) - rtp_process_one_sr(entry, xe, gt, each_hwe, sr); + match |= rtp_process_one_sr(entry, xe, gt, + each_hwe, sr); } else { - rtp_process_one_sr(entry, xe, gt, hwe, sr); + match = rtp_process_one_sr(entry, xe, gt, hwe, sr); } + + if (match) + rtp_mark_active(xe, ctx, entry - entries); } } EXPORT_SYMBOL_IF_KUNIT(xe_rtp_process_to_sr); diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index 179b497186d2..e69f514ee6c4 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -359,6 +359,10 @@ struct xe_reg_sr; struct xe_hw_engine *: (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_ENGINE }, \ struct xe_gt *: (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_GT }) +void xe_rtp_process_ctx_enable_active_tracking(struct xe_rtp_process_ctx *ctx, + unsigned long *active_entries, + size_t n_entries); + void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx, const struct xe_rtp_entry_sr *entries, struct xe_reg_sr *sr); diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h index 66d1cb0aaa08..03d97b666c64 100644 --- a/drivers/gpu/drm/xe/xe_rtp_types.h +++ b/drivers/gpu/drm/xe/xe_rtp_types.h @@ -106,6 +106,8 @@ struct xe_rtp_process_ctx { struct xe_hw_engine *hwe; }; enum xe_rtp_process_type type; + unsigned long *active_entries; + size_t n_entries; }; #endif -- cgit v1.2.3 From 49d329a0824df79bb04d720ccdc9dbc257ec7e6b Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 26 May 2023 09:43:43 -0700 Subject: drm/xe/wa: Track gt/engine/lrc active workarounds Allocate the data to track workarounds on each gt of the device, and pass that to RTP so the active workarounds are tracked. Even if the workarounds available until now are mostly device or platform centric, with the different IP versions for media and graphics starting with MTL, it's possible that some workarounds need to be applied only on select GTs. Also, given the workaround database is per IP block, for tracking purposes there is no need to differentiate the workarounds per engine class. Hence the bitmask to track active workarounds can be tracked per GT. v2: Move the tracking from per-device to per-GT basis (Matt Roper) Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230526164358.86393-7-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt.c | 5 +++++ drivers/gpu/drm/xe/xe_gt_types.h | 10 ++++++++++ drivers/gpu/drm/xe/xe_wa.c | 37 +++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_wa.h | 1 + 4 files changed, 53 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 80d42c7c7cfa..d139554316d4 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -315,6 +315,11 @@ int xe_gt_init_early(struct xe_gt *gt) return err; xe_reg_sr_init(>->reg_sr, "GT", gt_to_xe(gt)); + + err = xe_wa_init(gt); + if (err) + return err; + xe_wa_process_gt(gt); xe_tuning_process_gt(gt); diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 7c47d67aa8be..017ab60f2498 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -359,6 +359,16 @@ struct xe_gt { * of a steered operation */ spinlock_t mcr_lock; + + /** @wa_active: keep track of active workarounds */ + struct { + /** @gt: bitmap with active GT workarounds */ + unsigned long *gt; + /** @engine: bitmap with active engine workarounds */ + unsigned long *engine; + /** @lrc: bitmap with active LRC workarounds */ + unsigned long *lrc; + } wa_active; }; #endif diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 557e90d79f0b..665714abc5f0 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -5,6 +5,7 @@ #include "xe_wa.h" +#include #include #include @@ -581,6 +582,8 @@ void xe_wa_process_gt(struct xe_gt *gt) { struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt); + xe_rtp_process_ctx_enable_active_tracking(&ctx, gt->wa_active.gt, + ARRAY_SIZE(gt_was)); xe_rtp_process_to_sr(&ctx, gt_was, >->reg_sr); } EXPORT_SYMBOL_IF_KUNIT(xe_wa_process_gt); @@ -597,6 +600,8 @@ void xe_wa_process_engine(struct xe_hw_engine *hwe) { struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); + xe_rtp_process_ctx_enable_active_tracking(&ctx, hwe->gt->wa_active.engine, + ARRAY_SIZE(engine_was)); xe_rtp_process_to_sr(&ctx, engine_was, &hwe->reg_sr); } @@ -612,5 +617,37 @@ void xe_wa_process_lrc(struct xe_hw_engine *hwe) { struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); + xe_rtp_process_ctx_enable_active_tracking(&ctx, hwe->gt->wa_active.lrc, + ARRAY_SIZE(lrc_was)); xe_rtp_process_to_sr(&ctx, lrc_was, &hwe->reg_lrc); } + +/** + * xe_wa_init - initialize gt with workaround bookkeeping + * @gt: GT instance to initialize + * + * Returns 0 for success, negative error code otherwise. + */ +int xe_wa_init(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + size_t n_lrc, n_engine, n_gt, total; + unsigned long *p; + + n_gt = BITS_TO_LONGS(ARRAY_SIZE(gt_was)); + n_engine = BITS_TO_LONGS(ARRAY_SIZE(engine_was)); + n_lrc = BITS_TO_LONGS(ARRAY_SIZE(lrc_was)); + total = n_gt + n_engine + n_lrc; + + p = drmm_kzalloc(&xe->drm, sizeof(*p) * total, GFP_KERNEL); + if (!p) + return -ENOMEM; + + gt->wa_active.gt = p; + p += n_gt; + gt->wa_active.engine = p; + p += n_engine; + gt->wa_active.lrc = p; + + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_wa.h b/drivers/gpu/drm/xe/xe_wa.h index cd2307d58795..eae05bcecc68 100644 --- a/drivers/gpu/drm/xe/xe_wa.h +++ b/drivers/gpu/drm/xe/xe_wa.h @@ -9,6 +9,7 @@ struct xe_gt; struct xe_hw_engine; +int xe_wa_init(struct xe_gt *gt); void xe_wa_process_gt(struct xe_gt *gt); void xe_wa_process_engine(struct xe_hw_engine *hwe); void xe_wa_process_lrc(struct xe_hw_engine *hwe); -- cgit v1.2.3 From 40a627cafe02d44d24fa800b1d93c5d17b4649a5 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 26 May 2023 09:43:44 -0700 Subject: drm/xe/debugfs: Dump active workarounds Add a "workarounds" node in debugfs that can dump all the active workarounds using the information recorded by rtp infra when those workarounds were processed. v2: move workarounds to be reported per-GT Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230526164358.86393-8-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_debugfs.c | 12 ++++++++++++ drivers/gpu/drm/xe/xe_wa.c | 17 +++++++++++++++++ drivers/gpu/drm/xe/xe_wa.h | 2 ++ 3 files changed, 31 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index 8bf441e850a0..339ecd5fad9b 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -19,6 +19,7 @@ #include "xe_reg_sr.h" #include "xe_reg_whitelist.h" #include "xe_uc_debugfs.h" +#include "xe_wa.h" static struct xe_gt *node_to_gt(struct drm_info_node *node) { @@ -127,6 +128,16 @@ static int register_save_restore(struct seq_file *m, void *data) return 0; } +static int workarounds(struct seq_file *m, void *data) +{ + struct xe_gt *gt = node_to_gt(m->private); + struct drm_printer p = drm_seq_file_printer(m); + + xe_wa_dump(gt, &p); + + return 0; +} + static const struct drm_info_list debugfs_list[] = { {"hw_engines", hw_engines, 0}, {"force_reset", force_reset, 0}, @@ -135,6 +146,7 @@ static const struct drm_info_list debugfs_list[] = { {"steering", steering, 0}, {"ggtt", ggtt, 0}, {"register-save-restore", register_save_restore, 0}, + {"workarounds", workarounds, 0}, }; void xe_gt_debugfs_register(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 665714abc5f0..910579453316 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -651,3 +651,20 @@ int xe_wa_init(struct xe_gt *gt) return 0; } + +void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p) +{ + size_t idx; + + drm_printf(p, "GT Workarounds\n"); + for_each_set_bit(idx, gt->wa_active.gt, ARRAY_SIZE(gt_was)) + drm_printf_indent(p, 1, "%s\n", gt_was[idx].name); + + drm_printf(p, "\nEngine Workarounds\n"); + for_each_set_bit(idx, gt->wa_active.engine, ARRAY_SIZE(engine_was)) + drm_printf_indent(p, 1, "%s\n", engine_was[idx].name); + + drm_printf(p, "\nLRC Workarounds\n"); + for_each_set_bit(idx, gt->wa_active.lrc, ARRAY_SIZE(lrc_was)) + drm_printf_indent(p, 1, "%s\n", lrc_was[idx].name); +} diff --git a/drivers/gpu/drm/xe/xe_wa.h b/drivers/gpu/drm/xe/xe_wa.h index eae05bcecc68..defefa5d9611 100644 --- a/drivers/gpu/drm/xe/xe_wa.h +++ b/drivers/gpu/drm/xe/xe_wa.h @@ -6,6 +6,7 @@ #ifndef _XE_WA_ #define _XE_WA_ +struct drm_printer; struct xe_gt; struct xe_hw_engine; @@ -15,5 +16,6 @@ void xe_wa_process_engine(struct xe_hw_engine *hwe); void xe_wa_process_lrc(struct xe_hw_engine *hwe); void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe); +void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p); #endif -- cgit v1.2.3 From 00a5912c020df0bd4b752db714cb7256a83c0701 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 26 May 2023 09:43:45 -0700 Subject: drm/xe/rtp: Rename STEP to GRAPHICS_STEP Rename the RTP match in order to prepare the code base to check for the media version. Up until MTL, the graphics vs media distinction wrt to stepping was not ver relevant as they were the same GT. However, with MTL this is no longer true. Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230526164358.86393-9-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_rtp.c | 2 +- drivers/gpu/drm/xe/xe_rtp.h | 16 +++++----- drivers/gpu/drm/xe/xe_rtp_types.h | 2 +- drivers/gpu/drm/xe/xe_wa.c | 64 ++++++++++++++++++++++----------------- 4 files changed, 47 insertions(+), 37 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index 29cf92f9b7b3..2ac7b47942fe 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -57,7 +57,7 @@ static bool rule_matches(const struct xe_device *xe, match = xe->info.media_verx100 >= r->ver_start && xe->info.media_verx100 <= r->ver_end; break; - case XE_RTP_MATCH_STEP: + case XE_RTP_MATCH_GRAPHICS_STEP: /* TODO: match media/display */ match = xe->info.step.graphics >= r->step_start && xe->info.step.graphics < r->step_end; diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index e69f514ee6c4..7ba9d2ecab92 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -35,8 +35,8 @@ struct xe_reg_sr; { .match_type = XE_RTP_MATCH_SUBPLATFORM, \ .platform = plat__, .subplatform = sub__ } -#define _XE_RTP_RULE_STEP(start__, end__) \ - { .match_type = XE_RTP_MATCH_STEP, \ +#define _XE_RTP_RULE_GRAPHICS_STEP(start__, end__) \ + { .match_type = XE_RTP_MATCH_GRAPHICS_STEP, \ .step_start = start__, .step_end = end__ } #define _XE_RTP_RULE_ENGINE_CLASS(cls__) \ @@ -63,17 +63,17 @@ struct xe_reg_sr; _XE_RTP_RULE_SUBPLATFORM(XE_##plat_, XE_SUBPLATFORM_##plat_##_##sub_) /** - * XE_RTP_RULE_STEP - Create rule matching platform stepping + * XE_RTP_RULE_GRAPHICS_STEP - Create rule matching graphics stepping * @start_: First stepping matching the rule * @end_: First stepping that does not match the rule * - * Note that the range matching this rule [ @start_, @end_ ), i.e. inclusive on - * the left, exclusive on the right. + * Note that the range matching this rule is [ @start_, @end_ ), i.e. inclusive + * on the left, exclusive on the right. * * Refer to XE_RTP_RULES() for expected usage. */ -#define XE_RTP_RULE_STEP(start_, end_) \ - _XE_RTP_RULE_STEP(STEP_##start_, STEP_##end_) +#define XE_RTP_RULE_GRAPHICS_STEP(start_, end_) \ + _XE_RTP_RULE_GRAPHICS_STEP(STEP_##start_, STEP_##end_) /** * XE_RTP_RULE_ENGINE_CLASS - Create rule matching an engine class @@ -317,7 +317,7 @@ struct xe_reg_sr; * const struct xe_rtp_entry_sr wa_entries[] = { * ... * { XE_RTP_NAME("test-entry"), - * XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), + * XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)), * ... * }, * ... diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h index 03d97b666c64..52adbf7de752 100644 --- a/drivers/gpu/drm/xe/xe_rtp_types.h +++ b/drivers/gpu/drm/xe/xe_rtp_types.h @@ -39,11 +39,11 @@ enum { XE_RTP_MATCH_SUBPLATFORM, XE_RTP_MATCH_GRAPHICS_VERSION, XE_RTP_MATCH_GRAPHICS_VERSION_RANGE, + XE_RTP_MATCH_GRAPHICS_STEP, XE_RTP_MATCH_MEDIA_VERSION, XE_RTP_MATCH_MEDIA_VERSION_RANGE, XE_RTP_MATCH_INTEGRATED, XE_RTP_MATCH_DISCRETE, - XE_RTP_MATCH_STEP, XE_RTP_MATCH_ENGINE_CLASS, XE_RTP_MATCH_NOT_ENGINE_CLASS, XE_RTP_MATCH_FUNC, diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 910579453316..d703dc0f7b21 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -122,7 +122,7 @@ static const struct xe_rtp_entry_sr gt_was[] = { { XE_RTP_NAME("16010515920"), XE_RTP_RULES(SUBPLATFORM(DG2, G10), - STEP(A0, B0), + GRAPHICS_STEP(A0, B0), ENGINE_CLASS(VIDEO_DECODE)), XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F18(0), ALNUNIT_CLKGATE_DIS)), XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), @@ -136,27 +136,27 @@ static const struct xe_rtp_entry_sr gt_was[] = { XE_RTP_ACTIONS(SET(SUBSLICE_UNIT_LEVEL_CLKGATE, DSS_ROUTER_CLKGATE_DIS)) }, { XE_RTP_NAME("14012362059"), - XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)), XE_RTP_ACTIONS(SET(XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB)) }, { XE_RTP_NAME("14012362059"), - XE_RTP_RULES(SUBPLATFORM(DG2, G11), STEP(A0, B0)), + XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0)), XE_RTP_ACTIONS(SET(XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB)) }, { XE_RTP_NAME("14010948348"), - XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)), XE_RTP_ACTIONS(SET(UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS)) }, { XE_RTP_NAME("14011037102"), - XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)), XE_RTP_ACTIONS(SET(UNSLCGCTL9444, LTCDD_CLKGATE_DIS)) }, { XE_RTP_NAME("14011371254"), - XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)), XE_RTP_ACTIONS(SET(XEHP_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS)) }, { XE_RTP_NAME("14011431319"), - XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)), XE_RTP_ACTIONS(SET(UNSLCGCTL9440, GAMTLBOACS_CLKGATE_DIS | GAMTLBVDBOX7_CLKGATE_DIS | GAMTLBVDBOX6_CLKGATE_DIS | @@ -176,15 +176,15 @@ static const struct xe_rtp_entry_sr gt_was[] = { GAMTLBVEBOX1_CLKGATE_DIS | GAMTLBVEBOX0_CLKGATE_DIS)) }, { XE_RTP_NAME("14010569222"), - XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)), XE_RTP_ACTIONS(SET(UNSLICE_UNIT_LEVEL_CLKGATE, GAMEDIA_CLKGATE_DIS)) }, { XE_RTP_NAME("14011028019"), - XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)), XE_RTP_ACTIONS(SET(SSMCGCTL9530, RTFUNIT_CLKGATE_DIS)) }, { XE_RTP_NAME("14010680813"), - XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)), XE_RTP_ACTIONS(SET(XEHP_GAMSTLB_CTRL, CONTROL_BLOCK_CLKGATE_DIS | EGRESS_BLOCK_CLKGATE_DIS | @@ -316,7 +316,7 @@ static const struct xe_rtp_entry_sr engine_was[] = { POLYGON_TRIFAN_LINELOOP_DISABLE)) }, { XE_RTP_NAME("22012826095, 22013059131"), - XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(B0, C0), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(B0, C0), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(FIELD_SET(LSC_CHICKEN_BIT_0_UDW, MAXREQS_PER_BANK, @@ -330,7 +330,7 @@ static const struct xe_rtp_entry_sr engine_was[] = { REG_FIELD_PREP(MAXREQS_PER_BANK, 2))) }, { XE_RTP_NAME("22013059131"), - XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(B0, C0), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(B0, C0), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, FORCE_1_SUB_MESSAGE_PER_FRAGMENT)) }, @@ -368,7 +368,7 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8)) }, { XE_RTP_NAME("22012654132"), - XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, C0), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, C0), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(CACHE_MODE_SS, ENABLE_PREFETCH_INTO_IC, /* @@ -396,21 +396,25 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_ACTIONS(SET(ROW_CHICKEN2, DISABLE_READ_SUPPRESSION)) }, { XE_RTP_NAME("14013392000"), - XE_RTP_RULES(SUBPLATFORM(DG2, G11), STEP(A0, B0), ENGINE_CLASS(RENDER)), + XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0), + ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(ROW_CHICKEN2, ENABLE_LARGE_GRF_MODE)) }, { XE_RTP_NAME("14012419201"), - XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0), ENGINE_CLASS(RENDER)), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0), + ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(ROW_CHICKEN4, DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX)) }, { XE_RTP_NAME("14012419201"), - XE_RTP_RULES(SUBPLATFORM(DG2, G11), STEP(A0, B0), ENGINE_CLASS(RENDER)), + XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0), + ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(ROW_CHICKEN4, DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX)) }, { XE_RTP_NAME("1308578152"), - XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(B0, C0), ENGINE_CLASS(RENDER), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(B0, C0), + ENGINE_CLASS(RENDER), FUNC(xe_rtp_match_first_gslice_fused_off)), XE_RTP_ACTIONS(CLR(CS_DEBUG_MODE1, REPLAY_MODE_GRANULARITY)) @@ -426,30 +430,35 @@ static const struct xe_rtp_entry_sr engine_was[] = { MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE)) }, { XE_RTP_NAME("22010430635"), - XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0), ENGINE_CLASS(RENDER)), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0), + ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(ROW_CHICKEN4, DISABLE_GRF_CLEAR)) }, { XE_RTP_NAME("14013202645"), - XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(B0, C0), ENGINE_CLASS(RENDER)), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(B0, C0), + ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(RT_CTRL, DIS_NULL_QUERY)) }, { XE_RTP_NAME("14013202645"), - XE_RTP_RULES(SUBPLATFORM(DG2, G11), STEP(A0, B0), ENGINE_CLASS(RENDER)), + XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0), + ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(RT_CTRL, DIS_NULL_QUERY)) }, { XE_RTP_NAME("22012532006"), - XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, C0), ENGINE_CLASS(RENDER)), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, C0), + ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA)) }, { XE_RTP_NAME("22012532006"), - XE_RTP_RULES(SUBPLATFORM(DG2, G11), STEP(A0, B0), ENGINE_CLASS(RENDER)), + XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0), + ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA)) }, { XE_RTP_NAME("22014600077"), - XE_RTP_RULES(SUBPLATFORM(DG2, G11), STEP(B0, FOREVER), + XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(B0, FOREVER), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(CACHE_MODE_SS, ENABLE_EU_COUNT_FOR_TDL_FLUSH, @@ -485,7 +494,8 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN2, PERF_FIX_BALANCING_CFE_DISABLE)) }, { XE_RTP_NAME("14014999345"), - XE_RTP_RULES(PLATFORM(PVC), ENGINE_CLASS(COMPUTE), STEP(B0, C0)), + XE_RTP_RULES(PLATFORM(PVC), ENGINE_CLASS(COMPUTE), + GRAPHICS_STEP(B0, C0)), XE_RTP_ACTIONS(SET(CACHE_MODE_SS, DISABLE_ECC)) }, {} @@ -528,17 +538,17 @@ static const struct xe_rtp_entry_sr lrc_was[] = { /* DG2 */ { XE_RTP_NAME("16011186671"), - XE_RTP_RULES(SUBPLATFORM(DG2, G11), STEP(A0, B0)), + XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0)), XE_RTP_ACTIONS(CLR(VFLSKPD, DIS_MULT_MISS_RD_SQUASH), SET(VFLSKPD, DIS_OVER_FETCH_CACHE)) }, { XE_RTP_NAME("14010469329"), - XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)), XE_RTP_ACTIONS(SET(XEHP_COMMON_SLICE_CHICKEN3, XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE)) }, { XE_RTP_NAME("14010698770, 22010613112, 22010465075"), - XE_RTP_RULES(SUBPLATFORM(DG2, G10), STEP(A0, B0)), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)), XE_RTP_ACTIONS(SET(XEHP_COMMON_SLICE_CHICKEN3, DISABLE_CPS_AWARE_COLOR_PIPE)) }, -- cgit v1.2.3 From ed73d03c0803bdb70d7e56c7d8a2518fb9376047 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 26 May 2023 09:43:46 -0700 Subject: drm/xe/rtp: Add check for media stepping Start differentiating the media and graphics stepping as it will be important for MTL. Note that RTP is still not prepared to handle the different types of GT, i.e. checking for graphics version/range/stepping on a media gt or vice versa still matches regardless of the gt being passed as parameter. Changing it to accommodate MTL is left for a future patch. Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230526164358.86393-10-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_rtp.c | 12 +++++++----- drivers/gpu/drm/xe/xe_rtp.h | 17 +++++++++++++++++ drivers/gpu/drm/xe/xe_rtp_types.h | 1 + 3 files changed, 25 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index 2ac7b47942fe..70769852a93d 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -43,13 +43,16 @@ static bool rule_matches(const struct xe_device *xe, xe->info.subplatform == r->subplatform; break; case XE_RTP_MATCH_GRAPHICS_VERSION: - /* TODO: match display */ match = xe->info.graphics_verx100 == r->ver_start; break; case XE_RTP_MATCH_GRAPHICS_VERSION_RANGE: match = xe->info.graphics_verx100 >= r->ver_start && xe->info.graphics_verx100 <= r->ver_end; break; + case XE_RTP_MATCH_GRAPHICS_STEP: + match = xe->info.step.graphics >= r->step_start && + xe->info.step.graphics < r->step_end; + break; case XE_RTP_MATCH_MEDIA_VERSION: match = xe->info.media_verx100 == r->ver_start; break; @@ -57,10 +60,9 @@ static bool rule_matches(const struct xe_device *xe, match = xe->info.media_verx100 >= r->ver_start && xe->info.media_verx100 <= r->ver_end; break; - case XE_RTP_MATCH_GRAPHICS_STEP: - /* TODO: match media/display */ - match = xe->info.step.graphics >= r->step_start && - xe->info.step.graphics < r->step_end; + case XE_RTP_MATCH_MEDIA_STEP: + match = xe->info.step.media >= r->step_start && + xe->info.step.media < r->step_end; break; case XE_RTP_MATCH_INTEGRATED: match = !xe->info.is_dgfx; diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index 7ba9d2ecab92..d55701d2f39b 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -39,6 +39,10 @@ struct xe_reg_sr; { .match_type = XE_RTP_MATCH_GRAPHICS_STEP, \ .step_start = start__, .step_end = end__ } +#define _XE_RTP_RULE_MEDIA_STEP(start__, end__) \ + { .match_type = XE_RTP_MATCH_MEDIA_STEP, \ + .step_start = start__, .step_end = end__ } + #define _XE_RTP_RULE_ENGINE_CLASS(cls__) \ { .match_type = XE_RTP_MATCH_ENGINE_CLASS, \ .engine_class = (cls__) } @@ -75,6 +79,19 @@ struct xe_reg_sr; #define XE_RTP_RULE_GRAPHICS_STEP(start_, end_) \ _XE_RTP_RULE_GRAPHICS_STEP(STEP_##start_, STEP_##end_) +/** + * XE_RTP_RULE_MEDIA_STEP - Create rule matching media stepping + * @start_: First stepping matching the rule + * @end_: First stepping that does not match the rule + * + * Note that the range matching this rule is [ @start_, @end_ ), i.e. inclusive + * on the left, exclusive on the right. + * + * Refer to XE_RTP_RULES() for expected usage. + */ +#define XE_RTP_RULE_MEDIA_STEP(start_, end_) \ + _XE_RTP_RULE_MEDIA_STEP(STEP_##start_, STEP_##end_) + /** * XE_RTP_RULE_ENGINE_CLASS - Create rule matching an engine class * @cls_: Engine class to match diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h index 52adbf7de752..af49cbf98407 100644 --- a/drivers/gpu/drm/xe/xe_rtp_types.h +++ b/drivers/gpu/drm/xe/xe_rtp_types.h @@ -42,6 +42,7 @@ enum { XE_RTP_MATCH_GRAPHICS_STEP, XE_RTP_MATCH_MEDIA_VERSION, XE_RTP_MATCH_MEDIA_VERSION_RANGE, + XE_RTP_MATCH_MEDIA_STEP, XE_RTP_MATCH_INTEGRATED, XE_RTP_MATCH_DISCRETE, XE_RTP_MATCH_ENGINE_CLASS, -- cgit v1.2.3 From fe19328b900cc2c92054259e16d99023111c57f3 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 26 May 2023 09:43:47 -0700 Subject: drm/xe/rtp: Add support for entries with no action Add a separate struct to hold entries in a table that has no action associated with each of them. The goal is that the caller in future can set a per-context callback, or just use the active entry marking feature. Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230526164358.86393-11-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_rtp.c | 65 ++++++++++++++++++++++++++++++++++----- drivers/gpu/drm/xe/xe_rtp.h | 3 ++ drivers/gpu/drm/xe/xe_rtp_types.h | 7 +++++ 3 files changed, 67 insertions(+), 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index 70769852a93d..ebcfb04c391a 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -26,14 +26,14 @@ static bool rule_matches(const struct xe_device *xe, struct xe_gt *gt, struct xe_hw_engine *hwe, - const struct xe_rtp_entry_sr *entry) + const struct xe_rtp_rule *rules, + unsigned int n_rules) { const struct xe_rtp_rule *r; unsigned int i; bool match; - for (r = entry->rules, i = 0; i < entry->n_rules; - r = &entry->rules[++i]) { + for (r = rules, i = 0; i < n_rules; r = &rules[++i]) { switch (r->match_type) { case XE_RTP_MATCH_PLATFORM: match = xe->info.platform == r->platform; @@ -122,7 +122,7 @@ static bool rtp_process_one_sr(const struct xe_rtp_entry_sr *entry, u32 mmio_base; unsigned int i; - if (!rule_matches(xe, gt, hwe, entry)) + if (!rule_matches(xe, gt, hwe, entry->rules, entry->n_rules)) return false; for (action = &entry->actions[0]; i < entry->n_actions; action++, i++) { @@ -178,15 +178,18 @@ void xe_rtp_process_ctx_enable_active_tracking(struct xe_rtp_process_ctx *ctx, static void rtp_mark_active(struct xe_device *xe, struct xe_rtp_process_ctx *ctx, - unsigned int bit) + unsigned int first, unsigned int last) { if (!ctx->active_entries) return; - if (drm_WARN_ON(&xe->drm, bit > ctx->n_entries)) + if (drm_WARN_ON(&xe->drm, last > ctx->n_entries)) return; - bitmap_set(ctx->active_entries, bit, 1); + if (first == last) + bitmap_set(ctx->active_entries, first, 1); + else + bitmap_set(ctx->active_entries, first, last - first + 2); } /** @@ -228,11 +231,57 @@ void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx, } if (match) - rtp_mark_active(xe, ctx, entry - entries); + rtp_mark_active(xe, ctx, entry - entries, + entry - entries); } } EXPORT_SYMBOL_IF_KUNIT(xe_rtp_process_to_sr); +/** + * xe_rtp_process - Process all rtp @entries, without running any action + * @ctx: The context for processing the table, with one of device, gt or hwe + * @entries: Table with RTP definitions + * + * Walk the table pointed by @entries (with an empty sentinel), executing the + * rules. A few differences from xe_rtp_process_to_sr(): + * + * 1. There is no action associated with each entry since this uses + * struct xe_rtp_entry. Its main use is for marking active workarounds via + * xe_rtp_process_ctx_enable_active_tracking(). + * 2. There is support for OR operations by having entries with no name. + */ +void xe_rtp_process(struct xe_rtp_process_ctx *ctx, + const struct xe_rtp_entry *entries) +{ + const struct xe_rtp_entry *entry, *first_entry; + struct xe_hw_engine *hwe; + struct xe_gt *gt; + struct xe_device *xe; + + rtp_get_context(ctx, &hwe, >, &xe); + + first_entry = entries; + if (drm_WARN_ON(&xe->drm, !first_entry->name)) + return; + + for (entry = entries; entry && entry->rules; entry++) { + if (entry->name) + first_entry = entry; + + if (!rule_matches(xe, gt, hwe, entry->rules, entry->n_rules)) + continue; + + /* Fast-forward entry, eliminating the OR'ed entries */ + for (entry++; entry && entry->rules; entry++) + if (entry->name) + break; + entry--; + + rtp_mark_active(xe, ctx, first_entry - entries, + entry - entries); + } +} + bool xe_rtp_match_even_instance(const struct xe_gt *gt, const struct xe_hw_engine *hwe) { diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index d55701d2f39b..8581bd9b1426 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -384,6 +384,9 @@ void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx, const struct xe_rtp_entry_sr *entries, struct xe_reg_sr *sr); +void xe_rtp_process(struct xe_rtp_process_ctx *ctx, + const struct xe_rtp_entry *entries); + /* Match functions to be used with XE_RTP_MATCH_FUNC */ /** diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h index af49cbf98407..d170532a98a5 100644 --- a/drivers/gpu/drm/xe/xe_rtp_types.h +++ b/drivers/gpu/drm/xe/xe_rtp_types.h @@ -96,6 +96,13 @@ struct xe_rtp_entry_sr { u8 flags; }; +/** struct xe_rtp_entry - Entry in an rtp table, with no action associated */ +struct xe_rtp_entry { + const char *name; + const struct xe_rtp_rule *rules; + u8 n_rules; +}; + enum xe_rtp_process_type { XE_RTP_PROCESS_TYPE_GT, XE_RTP_PROCESS_TYPE_ENGINE, -- cgit v1.2.3 From 464f2243c1fb139d8200e96648131197bf50fb27 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 26 May 2023 09:43:48 -0700 Subject: drm/xe: Include build directory When doing out-of-tree builds with O= or KBUILD_OUTPUT=, it's important to also add the directory where the target is saved. Otherwise any file generated by the build system may not be available for other targets depending on it. The $(obj) is added automatically when building the entire kernel, but it's not added when M=drivers/gpu/drm/xe is added. Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230526164358.86393-12-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 5d277d060eba..db88c9d84569 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -27,7 +27,7 @@ subdir-ccflags-$(CONFIG_DRM_XE_WERROR) += -Werror # Fine grained warnings disable CFLAGS_xe_pci.o = $(call cc-disable-warning, override-init) -subdir-ccflags-y += -I$(srctree)/$(src) +subdir-ccflags-y += -I$(obj) -I$(srctree)/$(src) # Please keep these build lists sorted! -- cgit v1.2.3 From 9616e74b796c752ec29c3c83f3e33277d2b25b8e Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 26 May 2023 09:43:49 -0700 Subject: drm/xe: Add support for OOB workarounds There are WAs that, due to their nature, cannot be applied from a central place like xe_wa.c. Those are peppered around the rest of the code, as needed. Now they have a new name: "out-of-band workarounds". These workarounds have their names and rules still grouped in xe_wa.c, inside the xe_wa_oob array, which is generated at compile time by xe_wa_oob.rules and the hostprog xe_gen_wa_oob. The code generation guarantees that the header xe_wa_oob.h contains the IDs for the workarounds that match the index in the table. This way the runtime checks that are spread throughout the code are simple tests against the bitmap saved during initialization. v2: Fix prev_name tracking not working when it's empty, i.e. when there is more than 1 continuation rule. Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230526164358.86393-13-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/.gitignore | 2 + drivers/gpu/drm/xe/Makefile | 13 +++ drivers/gpu/drm/xe/xe_gen_wa_oob.c | 165 +++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_gt.c | 1 + drivers/gpu/drm/xe/xe_gt_types.h | 2 + drivers/gpu/drm/xe/xe_wa.c | 40 ++++++++- drivers/gpu/drm/xe/xe_wa.h | 9 ++ drivers/gpu/drm/xe/xe_wa_oob.rules | 0 8 files changed, 228 insertions(+), 4 deletions(-) create mode 100644 drivers/gpu/drm/xe/xe_gen_wa_oob.c create mode 100644 drivers/gpu/drm/xe/xe_wa_oob.rules (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/.gitignore b/drivers/gpu/drm/xe/.gitignore index 81972dce1aff..8778bf132674 100644 --- a/drivers/gpu/drm/xe/.gitignore +++ b/drivers/gpu/drm/xe/.gitignore @@ -1,2 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only *.hdrtest +/generated +/xe_gen_wa_oob diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index db88c9d84569..b9d3553ab476 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -29,6 +29,19 @@ CFLAGS_xe_pci.o = $(call cc-disable-warning, override-init) subdir-ccflags-y += -I$(obj) -I$(srctree)/$(src) +# generated sources +hostprogs := xe_gen_wa_oob + +generated_oob := $(obj)/generated/xe_wa_oob.c $(obj)/generated/xe_wa_oob.h + +quiet_cmd_wa_oob = GEN $(notdir $(generated_oob)) + cmd_wa_oob = mkdir -p $(@D); $^ $(generated_oob) + +$(generated_oob) &: $(obj)/xe_gen_wa_oob $(srctree)/$(src)/xe_wa_oob.rules + $(call cmd,wa_oob) + +$(obj)/xe_wa.o: $(generated_oob) + # Please keep these build lists sorted! # core driver code diff --git a/drivers/gpu/drm/xe/xe_gen_wa_oob.c b/drivers/gpu/drm/xe/xe_gen_wa_oob.c new file mode 100644 index 000000000000..106ee2b027f0 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gen_wa_oob.c @@ -0,0 +1,165 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include + +#define HEADER \ + "// SPDX-License-Identifier: MIT\n" \ + "\n" \ + "/*\n" \ + " * DO NOT MODIFY.\n" \ + " *\n" \ + " * This file was generated from rules: %s\n" \ + " */\n" \ + "#ifndef _GENERATED_XE_WA_OOB_\n" \ + "#define _GENERATED_XE_WA_OOB_\n" \ + "\n" \ + "enum {\n" + +#define FOOTER \ + "};\n" \ + "\n" \ + "#endif\n" + +static void print_usage(FILE *f) +{ + fprintf(f, "usage: %s \n", + program_invocation_short_name); +} + +static void print_parse_error(const char *err_msg, const char *line, + unsigned int lineno) +{ + fprintf(stderr, "ERROR: %s\nERROR: %u: %.60s\n", + err_msg, lineno, line); +} + +static char *strip(char *line, size_t linelen) +{ + while (isspace(*(line + linelen))) + linelen--; + + line[linelen - 1] = '\0'; + + return line + strspn(line, " \f\n\r\t\v"); +} + +#define MAX_LINE_LEN 4096 +static int parse(FILE *input, FILE *csource, FILE *cheader) +{ + char line[MAX_LINE_LEN + 1]; + char *name, *prev_name = NULL, *rules; + unsigned int lineno = 0, idx = 0; + + while (fgets(line, sizeof(line), input)) { + size_t linelen; + bool is_continuation; + + if (line[0] == '\0' || line[0] == '#' || line[0] == '\n') { + lineno++; + continue; + } + + linelen = strlen(line); + if (linelen == MAX_LINE_LEN) { + print_parse_error("line too long", line, lineno); + return -EINVAL; + } + + is_continuation = isspace(line[0]); + name = strip(line, linelen); + + if (!is_continuation) { + name = strtok(name, " \t"); + rules = strtok(NULL, ""); + } else { + if (!prev_name) { + print_parse_error("invalid rule continuation", + line, lineno); + return -EINVAL; + } + + rules = name; + name = NULL; + } + + if (rules[0] == '\0') { + print_parse_error("invalid empty rule\n", line, lineno); + return -EINVAL; + } + + if (name) { + fprintf(cheader, "\tXE_WA_OOB_%s = %u,\n", name, idx); + fprintf(csource, "{ XE_RTP_NAME(\"%s\"), XE_RTP_RULES(%s) },\n", + name, rules); + } else { + fprintf(csource, "{ XE_RTP_NAME(NULL), XE_RTP_RULES(%s) },\n", + rules); + } + + idx++; + lineno++; + if (!is_continuation) + prev_name = name; + } + + fprintf(cheader, "\t_XE_WA_OOB_COUNT = %u\n", idx); + + return 0; +} + +int main(int argc, const char *argv[]) +{ + enum { + ARGS_INPUT, + ARGS_CSOURCE, + ARGS_CHEADER, + _ARGS_COUNT + }; + struct { + const char *fn; + const char *mode; + FILE *f; + } args[] = { + [ARGS_INPUT] = { .fn = argv[1], .mode = "r" }, + [ARGS_CSOURCE] = { .fn = argv[2], .mode = "w" }, + [ARGS_CHEADER] = { .fn = argv[3], .mode = "w" }, + }; + int ret = 1; + + if (argc < 3) { + fprintf(stderr, "ERROR: wrong arguments\n"); + print_usage(stderr); + return 1; + } + + for (int i = 0; i < _ARGS_COUNT; i++) { + args[i].f = fopen(args[i].fn, args[i].mode); + if (!args[i].f) { + fprintf(stderr, "ERROR: Can't open %s: %m\n", + args[i].fn); + goto err; + } + } + + fprintf(args[ARGS_CHEADER].f, HEADER, args[ARGS_INPUT].fn); + ret = parse(args[ARGS_INPUT].f, args[ARGS_CSOURCE].f, + args[ARGS_CHEADER].f); + if (!ret) + fprintf(args[ARGS_CHEADER].f, FOOTER); + +err: + for (int i = 0; i < _ARGS_COUNT; i++) { + if (args[i].f) + fclose(args[i].f); + } + + return ret; +} diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index d139554316d4..18eda5b1377f 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -321,6 +321,7 @@ int xe_gt_init_early(struct xe_gt *gt) return err; xe_wa_process_gt(gt); + xe_wa_process_oob(gt); xe_tuning_process_gt(gt); return 0; diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 017ab60f2498..b83c834e7ced 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -368,6 +368,8 @@ struct xe_gt { unsigned long *engine; /** @lrc: bitmap with active LRC workarounds */ unsigned long *lrc; + /** @oob: bitmap with active OOB workaroudns */ + unsigned long *oob; } wa_active; }; diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index d703dc0f7b21..d9906f326d38 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -9,6 +9,7 @@ #include #include +#include "generated/xe_wa_oob.h" #include "regs/xe_engine_regs.h" #include "regs/xe_gt_regs.h" #include "regs/xe_regs.h" @@ -73,8 +74,8 @@ * engine registers are restored in a context restore sequence. This is * currently not used in the driver. * - * - Other: There are WAs that, due to their nature, cannot be applied from a - * central place. Those are peppered around the rest of the code, as needed. + * - Other/OOB: There are WAs that, due to their nature, cannot be applied from + * a central place. Those are peppered around the rest of the code, as needed. * Workarounds related to the display IP are the main example. * * .. [1] Technically, some registers are powercontext saved & restored, so they @@ -579,8 +580,31 @@ static const struct xe_rtp_entry_sr lrc_was[] = { {} }; +static __maybe_unused const struct xe_rtp_entry oob_was[] = { +#include + {} +}; + +static_assert(ARRAY_SIZE(oob_was) - 1 == _XE_WA_OOB_COUNT); + __diag_pop(); +/** + * xe_wa_process_oob - process OOB workaround table + * @gt: GT instance to process workarounds for + * + * Process OOB workaround table for this platform, marking in @gt the + * workarounds that are active. + */ +void xe_wa_process_oob(struct xe_gt *gt) +{ + struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt); + + xe_rtp_process_ctx_enable_active_tracking(&ctx, gt->wa_active.oob, + ARRAY_SIZE(oob_was)); + xe_rtp_process(&ctx, oob_was); +} + /** * xe_wa_process_gt - process GT workaround table * @gt: GT instance to process workarounds for @@ -641,13 +665,14 @@ void xe_wa_process_lrc(struct xe_hw_engine *hwe) int xe_wa_init(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); - size_t n_lrc, n_engine, n_gt, total; + size_t n_oob, n_lrc, n_engine, n_gt, total; unsigned long *p; n_gt = BITS_TO_LONGS(ARRAY_SIZE(gt_was)); n_engine = BITS_TO_LONGS(ARRAY_SIZE(engine_was)); n_lrc = BITS_TO_LONGS(ARRAY_SIZE(lrc_was)); - total = n_gt + n_engine + n_lrc; + n_oob = BITS_TO_LONGS(ARRAY_SIZE(oob_was)); + total = n_gt + n_engine + n_lrc + n_oob; p = drmm_kzalloc(&xe->drm, sizeof(*p) * total, GFP_KERNEL); if (!p) @@ -658,6 +683,8 @@ int xe_wa_init(struct xe_gt *gt) gt->wa_active.engine = p; p += n_engine; gt->wa_active.lrc = p; + p += n_lrc; + gt->wa_active.oob = p; return 0; } @@ -677,4 +704,9 @@ void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p) drm_printf(p, "\nLRC Workarounds\n"); for_each_set_bit(idx, gt->wa_active.lrc, ARRAY_SIZE(lrc_was)) drm_printf_indent(p, 1, "%s\n", lrc_was[idx].name); + + drm_printf(p, "\nOOB Workarounds\n"); + for_each_set_bit(idx, gt->wa_active.oob, ARRAY_SIZE(oob_was)) + if (oob_was[idx].name) + drm_printf_indent(p, 1, "%s\n", oob_was[idx].name); } diff --git a/drivers/gpu/drm/xe/xe_wa.h b/drivers/gpu/drm/xe/xe_wa.h index defefa5d9611..cfe685989524 100644 --- a/drivers/gpu/drm/xe/xe_wa.h +++ b/drivers/gpu/drm/xe/xe_wa.h @@ -11,6 +11,7 @@ struct xe_gt; struct xe_hw_engine; int xe_wa_init(struct xe_gt *gt); +void xe_wa_process_oob(struct xe_gt *gt); void xe_wa_process_gt(struct xe_gt *gt); void xe_wa_process_engine(struct xe_hw_engine *hwe); void xe_wa_process_lrc(struct xe_hw_engine *hwe); @@ -18,4 +19,12 @@ void xe_wa_process_lrc(struct xe_hw_engine *hwe); void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe); void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p); +/** + * XE_WA - Out-of-band workarounds, that don't fit the lifecycle any + * other more specific type + * @gt__: gt instance + * @id__: XE_OOB_, as generated by build system in generated/xe_wa_oob.h + */ +#define XE_WA(gt__, id__) test_bit(XE_WA_OOB_ ## id__, (gt__)->wa_active.oob) + #endif diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules new file mode 100644 index 000000000000..e69de29bb2d1 -- cgit v1.2.3 From 7d356b25b32eec2a33bf2bc67974ef56f0778a7c Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 26 May 2023 09:43:50 -0700 Subject: drm/xe/guc: Port Wa_22012773006 to xe_wa Let xe_guc.c start using XE_WA() for workarounds, starting from a simple one: Wa_22012773006. It's also changed to start with graphics version 12, since that is the first supported by xe. Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230526164358.86393-14-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Makefile | 2 +- drivers/gpu/drm/xe/xe_guc.c | 6 +++--- drivers/gpu/drm/xe/xe_wa_oob.rules | 1 + 3 files changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index b9d3553ab476..a685e39d6b44 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -40,7 +40,7 @@ quiet_cmd_wa_oob = GEN $(notdir $(generated_oob)) $(generated_oob) &: $(obj)/xe_gen_wa_oob $(srctree)/$(src)/xe_wa_oob.rules $(call cmd,wa_oob) -$(obj)/xe_wa.o: $(generated_oob) +$(obj)/xe_guc.o $(obj)/xe_wa.o: $(generated_oob) # Please keep these build lists sorted! diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index a8e249205bff..08362db6a886 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -5,6 +5,7 @@ #include "xe_guc.h" +#include "generated/xe_wa_oob.h" #include "regs/xe_gt_regs.h" #include "regs/xe_guc_regs.h" #include "xe_bo.h" @@ -20,6 +21,7 @@ #include "xe_mmio.h" #include "xe_platform_types.h" #include "xe_uc_fw.h" +#include "xe_wa.h" #include "xe_wopcm.h" static struct xe_gt * @@ -134,9 +136,7 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc) struct xe_gt *gt = guc_to_gt(guc); u32 flags = 0; - /* Wa_22012773006:gen11,gen12 < XeHP */ - if (GRAPHICS_VER(xe) >= 11 && - GRAPHICS_VERx100(xe) < 1250) + if (XE_WA(gt, 22012773006)) flags |= GUC_WA_POLLCS; /* Wa_16011759253 */ diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index e69de29bb2d1..b54f70eeccf4 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -0,0 +1 @@ +22012773006 GRAPHICS_VERSION_RANGE(1200, 1250) -- cgit v1.2.3 From fb395db74b91dc60d928d7bd3f1c4b845efd950a Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 26 May 2023 09:43:51 -0700 Subject: drm/xe/guc: Port Wa_16011759253 to xe_wa Port Wa_16011759253 to oob. Wa_22011383443, that has the same action, doesn't need to be ported as it targets early PVC steppings. Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230526164358.86393-15-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc.c | 4 +--- drivers/gpu/drm/xe/xe_wa_oob.rules | 1 + 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 08362db6a886..1b3fbbd74923 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -139,9 +139,7 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc) if (XE_WA(gt, 22012773006)) flags |= GUC_WA_POLLCS; - /* Wa_16011759253 */ - /* Wa_22011383443 */ - if (IS_SUBPLATFORM_STEP(xe, XE_DG2, XE_SUBPLATFORM_DG2_G10, STEP_A0, STEP_B0)) + if (XE_WA(gt, 16011759253)) flags |= GUC_WA_GAM_CREDITS; /* Wa_14014475959 */ diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index b54f70eeccf4..9b29a0dd0934 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -1 +1,2 @@ 22012773006 GRAPHICS_VERSION_RANGE(1200, 1250) +16011759253 SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0) -- cgit v1.2.3 From 57a148d63d0b67822c44ba7253625c8dd3c13531 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 26 May 2023 09:43:52 -0700 Subject: drm/xe/guc: Port Wa_14012197797/Wa_22011391025 to xe_wa Wa_14012197797 and Wa_22011391025 apply to DG2 using the same action. They apply to slightly different conditions. Add both to the oob rules so they are both reported as active. Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230526164358.86393-16-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc.c | 9 +-------- drivers/gpu/drm/xe/xe_wa_oob.rules | 2 ++ 2 files changed, 3 insertions(+), 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 1b3fbbd74923..54aaf6e6b577 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -147,14 +147,7 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc) xe->info.platform == XE_DG2) flags |= GUC_WA_HOLD_CCS_SWITCHOUT; - /* - * Wa_14012197797 - * Wa_22011391025 - * - * The same WA bit is used for both and 22011391025 is applicable to - * all DG2. - */ - if (xe->info.platform == XE_DG2) + if (XE_WA(gt, 22011391025) || XE_WA(gt, 14012197797)) flags |= GUC_WA_DUAL_QUEUE; /* diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 9b29a0dd0934..77ac4a4a3296 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -1,2 +1,4 @@ 22012773006 GRAPHICS_VERSION_RANGE(1200, 1250) 16011759253 SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0) +22011391025 PLATFORM(DG2) +14012197797 PLATFORM(DG2), GRAPHICS_STEP(A0, B0) -- cgit v1.2.3 From bb0f2e05ad6c5a9f1fa325f847ea5a82002ede1d Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 26 May 2023 09:43:53 -0700 Subject: drm/xe/guc: Port Wa_16011777198 to xe_wa Port Wa_16011777198 to xe_wa so it's reported as active. Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230526164358.86393-17-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc.c | 5 +---- drivers/gpu/drm/xe/xe_wa_oob.rules | 2 ++ 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 54aaf6e6b577..5eccc4b67381 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -158,10 +158,7 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc) if (GRAPHICS_VERx100(xe) < 1270) flags |= GUC_WA_PRE_PARSER; - /* Wa_16011777198 */ - if (IS_SUBPLATFORM_STEP(xe, XE_DG2, XE_SUBPLATFORM_DG2_G10, STEP_A0, STEP_C0) || - IS_SUBPLATFORM_STEP(xe, XE_DG2, XE_SUBPLATFORM_DG2_G11, STEP_A0, - STEP_B0)) + if (XE_WA(gt, 16011777198)) flags |= GUC_WA_RCS_RESET_BEFORE_RC6; /* diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 77ac4a4a3296..3d4304b7111e 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -2,3 +2,5 @@ 16011759253 SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0) 22011391025 PLATFORM(DG2) 14012197797 PLATFORM(DG2), GRAPHICS_STEP(A0, B0) +16011777198 SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, C0) + SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0) -- cgit v1.2.3 From 63bbd800ff013d2e6053ce94524e3219cabd8315 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 26 May 2023 09:43:54 -0700 Subject: drm/xe/guc: Port Wa_22012727170/Wa_22012727685 to xe_wa Wa_22012727170 and Wa_22012727685 apply to DG2 using the same action and conditions. Add both to the oob rules so they are both reported as active. Do not Wa_22012727170 to PVC and MTL since only early A* steppings are affected. v2: Remove DG2_G10 from Wa_22012727685 to match current WA database (Matt Roper) v3: GRAPHICS_STEP(A0, FOREVER) can be left alone for DG2 as this means all steppings Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230526164358.86393-18-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc.c | 8 +------- drivers/gpu/drm/xe/xe_wa_oob.rules | 3 +++ 2 files changed, 4 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 5eccc4b67381..1291f71348db 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -161,13 +161,7 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc) if (XE_WA(gt, 16011777198)) flags |= GUC_WA_RCS_RESET_BEFORE_RC6; - /* - * Wa_22012727170 - * Wa_22012727685 - */ - if (IS_SUBPLATFORM_STEP(xe, XE_DG2, XE_SUBPLATFORM_DG2_G10, STEP_A0, STEP_C0) || - IS_SUBPLATFORM_STEP(xe, XE_DG2, XE_SUBPLATFORM_DG2_G11, STEP_A0, - STEP_FOREVER)) + if (XE_WA(gt, 22012727170) || XE_WA(gt, 22012727685)) flags |= GUC_WA_CONTEXT_ISOLATION; /* Wa_16015675438, Wa_18020744125 */ diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 3d4304b7111e..5b1beb2cf19f 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -4,3 +4,6 @@ 14012197797 PLATFORM(DG2), GRAPHICS_STEP(A0, B0) 16011777198 SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, C0) SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0) +22012727170 SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, C0) + SUBPLATFORM(DG2, G11) +22012727685 SUBPLATFORM(DG2, G11) -- cgit v1.2.3 From 2b48b0df30cea3a617a69e44ca69bec7f01ed276 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 26 May 2023 09:43:55 -0700 Subject: drm/xe/guc: Port Wa_16015675438/Wa_18020744125 to xe_wa Wa_16015675438 and Wa_18020744125 apply to DG2 using the same action and conditions. Add both to the oob rules so they are both reported as active. Note that previously they were not checking by platform or IP version, hence making them not future-proof. Those workarounds should only be active in PVC and DG2, besides the check for "no render engine". v2: From current WA database, Wa_16015675438 applies to all DG2 subplatforms except G11. Migrate condition to use subplatform and remove G11 from the match (Matt Roper) Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230526164358.86393-19-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc.c | 4 ++-- drivers/gpu/drm/xe/xe_wa_oob.rules | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 1291f71348db..cc58a2092236 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -164,8 +164,8 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc) if (XE_WA(gt, 22012727170) || XE_WA(gt, 22012727685)) flags |= GUC_WA_CONTEXT_ISOLATION; - /* Wa_16015675438, Wa_18020744125 */ - if (!xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_RENDER)) + if ((XE_WA(gt, 16015675438) || XE_WA(gt, 18020744125)) && + !xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_RENDER)) flags |= GUC_WA_RCS_REGS_IN_CCS_REGS_LIST; /* Wa_1509372804 */ diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 5b1beb2cf19f..ebb576f27b3e 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -7,3 +7,7 @@ 22012727170 SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, C0) SUBPLATFORM(DG2, G11) 22012727685 SUBPLATFORM(DG2, G11) +16015675438 PLATFORM(PVC) + SUBPLATFORM(DG2, G10) + SUBPLATFORM(DG2, G12) +18020744125 PLATFORM(PVC) -- cgit v1.2.3 From 5e782507f67ab378046f6fcb9de03fd25693fdc4 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 26 May 2023 09:43:56 -0700 Subject: drm/xe/guc: Port Wa_1509372804 to xe_wa Port Wa_1509372804 to xe_wa so it's reported as active. v2: Match workaround database, starting from A0 stepping (Matt Roper) Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230526164358.86393-20-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc.c | 3 +-- drivers/gpu/drm/xe/xe_wa_oob.rules | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index cc58a2092236..3ed460d3b6ca 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -168,8 +168,7 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc) !xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_RENDER)) flags |= GUC_WA_RCS_REGS_IN_CCS_REGS_LIST; - /* Wa_1509372804 */ - if (IS_PLATFORM_STEP(xe, XE_PVC, STEP_B0, STEP_C0)) + if (XE_WA(gt, 1509372804)) flags |= GUC_WA_RENDER_RST_RC6_EXIT; return flags; diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index ebb576f27b3e..f6c4a0e055e0 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -11,3 +11,4 @@ SUBPLATFORM(DG2, G10) SUBPLATFORM(DG2, G12) 18020744125 PLATFORM(PVC) +1509372804 PLATFORM(PVC), GRAPHICS_STEP(A0, C0) -- cgit v1.2.3 From 3e488e98fb9eb4cd9220417e69e75c8271294a02 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 26 May 2023 09:43:57 -0700 Subject: drm/xe/rtp: Also check gt type When running rules on MTL and beyond that have media as a standalone GT, the rule should only match if the gt passed as parameter match the version/range/stepping that the rule is checking. This allows workarounds affecting only the media GT to be applied only on that GT and vice-versa. For platforms before MTL, the GT will not be of media type, even if it includes media engines. Make sure to cover that case by checking if the platforma has standalone media. Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230526164358.86393-21-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_rtp.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index ebcfb04c391a..43a86358efb6 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -23,6 +23,11 @@ * the values to the registers that have matching rules. */ +static bool has_samedia(const struct xe_device *xe) +{ + return xe->info.media_verx100 >= 1300; +} + static bool rule_matches(const struct xe_device *xe, struct xe_gt *gt, struct xe_hw_engine *hwe, @@ -43,26 +48,32 @@ static bool rule_matches(const struct xe_device *xe, xe->info.subplatform == r->subplatform; break; case XE_RTP_MATCH_GRAPHICS_VERSION: - match = xe->info.graphics_verx100 == r->ver_start; + match = xe->info.graphics_verx100 == r->ver_start && + (!has_samedia(xe) || !xe_gt_is_media_type(gt)); break; case XE_RTP_MATCH_GRAPHICS_VERSION_RANGE: match = xe->info.graphics_verx100 >= r->ver_start && - xe->info.graphics_verx100 <= r->ver_end; + xe->info.graphics_verx100 <= r->ver_end && + (!has_samedia(xe) || !xe_gt_is_media_type(gt)); break; case XE_RTP_MATCH_GRAPHICS_STEP: match = xe->info.step.graphics >= r->step_start && - xe->info.step.graphics < r->step_end; + xe->info.step.graphics < r->step_end && + (!has_samedia(xe) || !xe_gt_is_media_type(gt)); break; case XE_RTP_MATCH_MEDIA_VERSION: - match = xe->info.media_verx100 == r->ver_start; + match = xe->info.media_verx100 == r->ver_start && + (!has_samedia(xe) || xe_gt_is_media_type(gt)); break; case XE_RTP_MATCH_MEDIA_VERSION_RANGE: match = xe->info.media_verx100 >= r->ver_start && - xe->info.media_verx100 <= r->ver_end; + xe->info.media_verx100 <= r->ver_end && + (!has_samedia(xe) || xe_gt_is_media_type(gt)); break; case XE_RTP_MATCH_MEDIA_STEP: match = xe->info.step.media >= r->step_start && - xe->info.step.media < r->step_end; + xe->info.step.media < r->step_end && + (!has_samedia(xe) || xe_gt_is_media_type(gt)); break; case XE_RTP_MATCH_INTEGRATED: match = !xe->info.is_dgfx; -- cgit v1.2.3 From 87c299fa3a97740ddc0fa9b19ee4054004686f76 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 26 May 2023 09:43:58 -0700 Subject: drm/xe/guc: Port Wa_14014475959 to xe_wa and fix it Port Wa_14014475959 to xe_wa fixing its condition. The workaround should only be applied on the primary GT, not on media. So just checking by MTL platform is not enough: checking GT is of the right type is also needed. Since the GRAPHICS_STEP() does checks the GT type, we could leave the first check as a platform one: it'd would be easier to understand and not go out of sync with the graphics_ip_map[] in drivers/gpu/drm/xe/xe_pci.c. However it also means that new platforms using the same IP wouldn't match. Prefer using the IP version. Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230526164358.86393-22-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc.c | 4 +--- drivers/gpu/drm/xe/xe_wa_oob.rules | 2 ++ 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 3ed460d3b6ca..ecc843d91f62 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -142,9 +142,7 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc) if (XE_WA(gt, 16011759253)) flags |= GUC_WA_GAM_CREDITS; - /* Wa_14014475959 */ - if (IS_PLATFORM_STEP(xe, XE_METEORLAKE, STEP_A0, STEP_B0) || - xe->info.platform == XE_DG2) + if (XE_WA(gt, 14014475959)) flags |= GUC_WA_HOLD_CCS_SWITCHOUT; if (XE_WA(gt, 22011391025) || XE_WA(gt, 14012197797)) diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index f6c4a0e055e0..1ecb10390b28 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -1,5 +1,7 @@ 22012773006 GRAPHICS_VERSION_RANGE(1200, 1250) 16011759253 SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0) +14014475959 GRAPHICS_VERSION_RANGE(1270, 1271), GRAPHICS_STEP(A0, B0) + PLATFORM(DG2) 22011391025 PLATFORM(DG2) 14012197797 PLATFORM(DG2), GRAPHICS_STEP(A0, B0) 16011777198 SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, C0) -- cgit v1.2.3 From 9922bb40e2ef98c17fb142d22843c0c70ba35e5b Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Wed, 24 May 2023 16:51:42 +0000 Subject: drm/xe: Fix the migrate selftest for integrated GPUs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The TTM resource cursor was set up incorrectly. Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_migrate.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index 0f4371ad1fd9..f8ee9b9fca99 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -303,9 +303,14 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) expected = gen8_pte_encode(NULL, pt, 0, XE_CACHE_WB, 0, 0); if (m->eng->vm->flags & XE_VM_FLAGS_64K) expected |= XE_PTE_PS64; - xe_res_first(pt->ttm.resource, 0, pt->size, &src_it); + if (xe_bo_is_vram(pt)) + xe_res_first(pt->ttm.resource, 0, pt->size, &src_it); + else + xe_res_first_sg(xe_bo_get_sg(pt), 0, pt->size, &src_it); + emit_pte(m, bb, NUM_KERNEL_PDE - 1, xe_bo_is_vram(pt), &src_it, XE_PAGE_SIZE, pt); + run_sanity_job(m, xe, bb, bb->len, "Writing PTE for our fake PT", test); retval = xe_map_rd(xe, &bo->vmap, XE_PAGE_SIZE * (NUM_KERNEL_PDE - 1), -- cgit v1.2.3 From 3690a01ba926e3f1314d805d1af500fcf3edef7e Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Wed, 24 May 2023 16:52:29 +0000 Subject: drm/xe: Support copying of data between system memory bos MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Modify the xe_migrate_copy() function somewhat to explicitly allow copying of data between two buffer objects including system memory buffer objects. Update the migrate test accordingly. v2: - Check that buffer object sizes match when copying (Matthew Auld) Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_migrate.c | 16 ++++++-------- drivers/gpu/drm/xe/xe_bo.c | 2 +- drivers/gpu/drm/xe/xe_migrate.c | 40 +++++++++++++++++++++++------------ drivers/gpu/drm/xe/xe_migrate.h | 3 ++- 4 files changed, 35 insertions(+), 26 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index f8ee9b9fca99..4a3ca2960fd5 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -150,7 +150,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, xe_map_memset(xe, &bo->vmap, 0, 0xd0, bo->size); expected = 0xc0c0c0c0c0c0c0c0; - fence = xe_migrate_copy(m, sysmem, sysmem->ttm.resource, + fence = xe_migrate_copy(m, sysmem, bo, sysmem->ttm.resource, bo->ttm.resource); if (!sanity_fence_failed(xe, fence, big ? "Copying big bo sysmem -> vram" : "Copying small bo sysmem -> vram", test)) { @@ -167,7 +167,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, xe_map_memset(xe, &sysmem->vmap, 0, 0xd0, sysmem->size); xe_map_memset(xe, &bo->vmap, 0, 0xc0, bo->size); - fence = xe_migrate_copy(m, sysmem, bo->ttm.resource, + fence = xe_migrate_copy(m, bo, sysmem, bo->ttm.resource, sysmem->ttm.resource); if (!sanity_fence_failed(xe, fence, big ? "Copying big bo vram -> sysmem" : "Copying small bo vram -> sysmem", test)) { @@ -347,10 +347,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) retval = xe_map_rd(xe, &tiny->vmap, tiny->size - 4, u32); check(retval, expected, "Command clear small last value", test); - if (IS_DGFX(xe)) { - kunit_info(test, "Copying small buffer object to system\n"); - test_copy(m, tiny, test); - } + kunit_info(test, "Copying small buffer object to system\n"); + test_copy(m, tiny, test); /* Clear a big bo */ kunit_info(test, "Clearing big buffer object\n"); @@ -366,10 +364,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) retval = xe_map_rd(xe, &big->vmap, big->size - 4, u32); check(retval, expected, "Command clear big last value", test); - if (IS_DGFX(xe)) { - kunit_info(test, "Copying big buffer object to system\n"); - test_copy(m, big, test); - } + kunit_info(test, "Copying big buffer object to system\n"); + test_copy(m, big, test); kunit_info(test, "Testing page table update using CPU if GPU idle.\n"); test_pt_update(m, pt, test, false); diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 798b9938e534..71864ef95328 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -679,7 +679,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, if (move_lacks_source) fence = xe_migrate_clear(gt->migrate, bo, new_mem); else - fence = xe_migrate_copy(gt->migrate, bo, old_mem, new_mem); + fence = xe_migrate_copy(gt->migrate, bo, bo, old_mem, new_mem); if (IS_ERR(fence)) { ret = PTR_ERR(fence); xe_device_mem_access_put(xe); diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index d7da5bf2d984..7d0a23577d36 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -582,30 +582,31 @@ static u32 xe_migrate_ccs_copy(struct xe_migrate *m, /** * xe_migrate_copy() - Copy content of TTM resources. * @m: The migration context. - * @bo: The buffer object @src is currently bound to. + * @src_bo: The buffer object @src is currently bound to. + * @dst_bo: If copying between resources created for the same bo, set this to + * the same value as @src_bo. If copying between buffer objects, set it to + * the buffer object @dst is currently bound to. * @src: The source TTM resource. * @dst: The dst TTM resource. * * Copies the contents of @src to @dst: On flat CCS devices, * the CCS metadata is copied as well if needed, or if not present, * the CCS metadata of @dst is cleared for security reasons. - * It's currently not possible to copy between two system resources, - * since that would require two TTM page-vectors. - * TODO: Eliminate the @bo argument and supply two TTM page-vectors. * * Return: Pointer to a dma_fence representing the last copy batch, or * an error pointer on failure. If there is a failure, any copy operation * started by the function call has been synced. */ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, - struct xe_bo *bo, + struct xe_bo *src_bo, + struct xe_bo *dst_bo, struct ttm_resource *src, struct ttm_resource *dst) { struct xe_gt *gt = m->gt; struct xe_device *xe = gt_to_xe(gt); struct dma_fence *fence = NULL; - u64 size = bo->size; + u64 size = src_bo->size; struct xe_res_cursor src_it, dst_it, ccs_it; u64 src_L0_ofs, dst_L0_ofs; u32 src_L0_pt, dst_L0_pt; @@ -614,20 +615,28 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, int err; bool src_is_vram = mem_type_is_vram(src->mem_type); bool dst_is_vram = mem_type_is_vram(dst->mem_type); - bool copy_ccs = xe_device_has_flat_ccs(xe) && xe_bo_needs_ccs_pages(bo); + bool copy_ccs = xe_device_has_flat_ccs(xe) && + xe_bo_needs_ccs_pages(src_bo) && xe_bo_needs_ccs_pages(dst_bo); bool copy_system_ccs = copy_ccs && (!src_is_vram || !dst_is_vram); + /* Copying CCS between two different BOs is not supported yet. */ + if (XE_WARN_ON(copy_ccs && src_bo != dst_bo)) + return ERR_PTR(-EINVAL); + + if (src_bo != dst_bo && XE_WARN_ON(src_bo->size != dst_bo->size)) + return ERR_PTR(-EINVAL); + if (!src_is_vram) - xe_res_first_sg(xe_bo_get_sg(bo), 0, size, &src_it); + xe_res_first_sg(xe_bo_get_sg(src_bo), 0, size, &src_it); else xe_res_first(src, 0, size, &src_it); if (!dst_is_vram) - xe_res_first_sg(xe_bo_get_sg(bo), 0, size, &dst_it); + xe_res_first_sg(xe_bo_get_sg(dst_bo), 0, size, &dst_it); else xe_res_first(dst, 0, size, &dst_it); if (copy_system_ccs) - xe_res_first_sg(xe_bo_get_sg(bo), xe_bo_ccs_pages_start(bo), + xe_res_first_sg(xe_bo_get_sg(src_bo), xe_bo_ccs_pages_start(src_bo), PAGE_ALIGN(xe_device_ccs_bytes(xe, size)), &ccs_it); @@ -681,18 +690,18 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, if (!src_is_vram) emit_pte(m, bb, src_L0_pt, src_is_vram, &src_it, src_L0, - bo); + src_bo); else xe_res_next(&src_it, src_L0); if (!dst_is_vram) emit_pte(m, bb, dst_L0_pt, dst_is_vram, &dst_it, src_L0, - bo); + dst_bo); else xe_res_next(&dst_it, src_L0); if (copy_system_ccs) - emit_pte(m, bb, ccs_pt, false, &ccs_it, ccs_size, bo); + emit_pte(m, bb, ccs_pt, false, &ccs_it, ccs_size, src_bo); bb->cs[bb->len++] = MI_BATCH_BUFFER_END; update_idx = bb->len; @@ -714,8 +723,11 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, xe_sched_job_add_migrate_flush(job, flush_flags); if (!fence) { - err = job_add_deps(job, bo->ttm.base.resv, + err = job_add_deps(job, src_bo->ttm.base.resv, DMA_RESV_USAGE_BOOKKEEP); + if (!err && src_bo != dst_bo) + err = job_add_deps(job, dst_bo->ttm.base.resv, + DMA_RESV_USAGE_BOOKKEEP); if (err) goto err_job; } diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h index 1ff6e0a90de5..c283b626c21c 100644 --- a/drivers/gpu/drm/xe/xe_migrate.h +++ b/drivers/gpu/drm/xe/xe_migrate.h @@ -73,7 +73,8 @@ struct xe_migrate_pt_update { struct xe_migrate *xe_migrate_init(struct xe_gt *gt); struct dma_fence *xe_migrate_copy(struct xe_migrate *m, - struct xe_bo *bo, + struct xe_bo *src_bo, + struct xe_bo *dst_bo, struct ttm_resource *src, struct ttm_resource *dst); -- cgit v1.2.3 From 38453f826db89045d505c2122fd8e25cd6099007 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 25 May 2023 12:45:42 +0100 Subject: drm/xe/bo: further limit where CCS pages are needed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No need to allocate extra pages for this if we know flat-ccs AUX state is not even possible, like for normal system memory objects. Signed-off-by: Matthew Auld Cc: Thomas Hellström Reviewed-by: Nirmoy Das Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 71864ef95328..394e4bfae6e1 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -285,6 +285,7 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo, struct xe_bo *bo = ttm_to_xe_bo(ttm_bo); struct xe_device *xe = xe_bo_device(bo); struct xe_ttm_tt *tt; + unsigned long extra_pages; int err; tt = kzalloc(sizeof(*tt), GFP_KERNEL); @@ -293,12 +294,15 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo, tt->dev = xe->drm.dev; + extra_pages = 0; + if (xe_bo_needs_ccs_pages(bo)) + extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, bo->size), + PAGE_SIZE); + /* TODO: Select caching mode */ err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags, bo->flags & XE_BO_SCANOUT_BIT ? ttm_write_combined : ttm_cached, - DIV_ROUND_UP(xe_device_ccs_bytes(xe_bo_device(bo), - bo->ttm.base.size), - PAGE_SIZE)); + extra_pages); if (err) { kfree(tt); return NULL; -- cgit v1.2.3 From a2f9f4ff07aac81e80ff1e0913fdbfdde6ba6665 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 25 May 2023 12:45:43 +0100 Subject: drm/xe/migrate: retain CCS aux state for vram -> vram MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no mention that migrate_copy() will skip copying the CCS aux state for all types of vram -> vram transfers. Currently we don't need such a facility but might be surprising if we ever do. v2: (Lucas): - s/lmem/vram/ in the commit message - Tidy up the code a bit; use one emit_copy_ccs() v3: - Reword the commit message Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Lucas De Marchi Acked-by: Nirmoy Das Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_migrate.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 7d0a23577d36..9a676287e741 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -552,11 +552,19 @@ static u32 xe_migrate_ccs_copy(struct xe_migrate *m, if (xe_device_has_flat_ccs(gt_to_xe(gt)) && !copy_ccs && dst_is_vram) { /* - * If the bo doesn't have any CCS metadata attached, we still - * need to clear it for security reasons. + * If the src is already in vram, then it should already + * have been cleared by us, or has been populated by the + * user. Make sure we copy the CCS aux state as-is. + * + * Otherwise if the bo doesn't have any CCS metadata attached, + * we still need to clear it for security reasons. */ - emit_copy_ccs(gt, bb, dst_ofs, true, m->cleared_vram_ofs, false, - dst_size); + u64 ccs_src_ofs = src_is_vram ? src_ofs : m->cleared_vram_ofs; + + emit_copy_ccs(gt, bb, + dst_ofs, true, + ccs_src_ofs, src_is_vram, dst_size); + flush_flags = MI_FLUSH_DW_CCS; } else if (copy_ccs) { if (!src_is_vram) -- cgit v1.2.3 From 565ce72e1c2d540d36ade02e6a7479c4c6a7f2d4 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 24 May 2023 18:56:53 +0100 Subject: drm/xe: don't allocate under ct->lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Seems to be a sensitive lock, where ct->lock looks to be primed with fs_reclaim, so holding that and then allocating memory will cause lockdep to complain. We need to change the ordering wrt to grabbing the ct->lock and potentially grabbing the runtime_pm, since some of the runtime_pm routines can allocate memory (or at least that's what lockdep seems to suggest). Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Rodrigo Vivi Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 4 ++++ drivers/gpu/drm/xe/xe_guc_ct.c | 13 +++++++------ 2 files changed, 11 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index c815a42e2cdb..20f8f0aae6b4 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -5,6 +5,7 @@ #include "xe_gt_tlb_invalidation.h" +#include "xe_device.h" #include "xe_gt.h" #include "xe_guc.h" #include "xe_guc_ct.h" @@ -112,6 +113,8 @@ static int send_tlb_invalidation(struct xe_guc *guc, * in order which they currently are, if that changes the algorithm will * need to be updated. */ + + xe_device_mem_access_get(gt->xe); mutex_lock(&guc->ct.lock); seqno = gt->tlb_invalidation.seqno; if (fence) { @@ -140,6 +143,7 @@ static int send_tlb_invalidation(struct xe_guc *guc, if (ret < 0 && fence) invalidation_fence_signal(fence); mutex_unlock(&guc->ct.lock); + xe_device_mem_access_put(gt->xe); return ret; } diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index e8c2edb1359d..9dc906f2651a 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -498,26 +498,22 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, } } - xe_device_mem_access_get(ct_to_xe(ct)); retry: ret = has_room(ct, len + GUC_CTB_HDR_LEN, g2h_len); if (unlikely(ret)) - goto put_wa; + goto out; ret = h2g_write(ct, action, len, g2h_fence ? g2h_fence->seqno : 0, !!g2h_fence); if (unlikely(ret)) { if (ret == -EAGAIN) goto retry; - goto put_wa; + goto out; } g2h_reserve_space(ct, g2h_len, num_g2h); xe_guc_notify(ct_to_guc(ct)); -put_wa: - xe_device_mem_access_put(ct_to_xe(ct)); out: - return ret; } @@ -539,6 +535,7 @@ static int guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len, XE_BUG_ON(g2h_len && g2h_fence); lockdep_assert_held(&ct->lock); + xe_device_assert_mem_access(ct_to_xe(ct)); try_again: ret = __guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, @@ -608,10 +605,14 @@ static int guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len, XE_BUG_ON(g2h_len && g2h_fence); + xe_device_mem_access_get(ct_to_xe(ct)); + mutex_lock(&ct->lock); ret = guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, g2h_fence); mutex_unlock(&ct->lock); + xe_device_mem_access_put(ct_to_xe(ct)); + return ret; } -- cgit v1.2.3 From 3af4365003971946fdd2cca44858d6d16929f2d3 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 24 May 2023 18:56:54 +0100 Subject: drm/xe: keep pulling mem_access_get further back MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lockdep is unhappy about ggtt->lock -> runtime_pm, where it seems to think this can somehow get inverted. The ggtt->lock looks like a potentially sensitive driver lock, so likely a sensible move to never call the runtime_pm routines while holding it. Actually it looks like d3cold wants to grab this, so perhaps this can indeed deadlock. v2: - Don't forget about xe_gt_tlb_invalidation_vma(), which now needs explicit access_get. Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Rodrigo Vivi Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ggtt.c | 6 ++++++ drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 10 ++++++---- 2 files changed, 12 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 98903354b436..1ed22b5f89ad 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -142,12 +142,14 @@ static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt) u64 start, end; /* Display may have allocated inside ggtt, so be careful with clearing here */ + xe_device_mem_access_get(ggtt->gt->xe); mutex_lock(&ggtt->lock); drm_mm_for_each_hole(hole, &ggtt->mm, start, end) xe_ggtt_clear(ggtt, start, end - start); xe_ggtt_invalidate(ggtt->gt); mutex_unlock(&ggtt->lock); + xe_device_mem_access_put(ggtt->gt->xe); } int xe_ggtt_init(struct xe_gt *gt, struct xe_ggtt *ggtt) @@ -284,12 +286,14 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, if (err) return err; + xe_device_mem_access_get(ggtt->gt->xe); mutex_lock(&ggtt->lock); err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node, bo->size, alignment, 0, start, end, 0); if (!err) xe_ggtt_map_bo(ggtt, bo); mutex_unlock(&ggtt->lock); + xe_device_mem_access_put(ggtt->gt->xe); return err; } @@ -318,6 +322,7 @@ int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node) { + xe_device_mem_access_get(ggtt->gt->xe); mutex_lock(&ggtt->lock); xe_ggtt_clear(ggtt, node->start, node->size); @@ -327,6 +332,7 @@ void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node) xe_ggtt_invalidate(ggtt->gt); mutex_unlock(&ggtt->lock); + xe_device_mem_access_put(ggtt->gt->xe); } void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 20f8f0aae6b4..44e442bf306c 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -114,7 +114,6 @@ static int send_tlb_invalidation(struct xe_guc *guc, * need to be updated. */ - xe_device_mem_access_get(gt->xe); mutex_lock(&guc->ct.lock); seqno = gt->tlb_invalidation.seqno; if (fence) { @@ -143,7 +142,6 @@ static int send_tlb_invalidation(struct xe_guc *guc, if (ret < 0 && fence) invalidation_fence_signal(fence); mutex_unlock(&guc->ct.lock); - xe_device_mem_access_put(gt->xe); return ret; } @@ -196,7 +194,7 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, struct xe_device *xe = gt_to_xe(gt); #define MAX_TLB_INVALIDATION_LEN 7 u32 action[MAX_TLB_INVALIDATION_LEN]; - int len = 0; + int len = 0, ret; XE_BUG_ON(!vma); @@ -250,7 +248,11 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, XE_BUG_ON(len > MAX_TLB_INVALIDATION_LEN); - return send_tlb_invalidation(>->uc.guc, fence, action, len); + xe_device_mem_access_get(gt->xe); + ret = send_tlb_invalidation(>->uc.guc, fence, action, len); + xe_device_mem_access_put(gt->xe); + + return ret; } static bool tlb_invalidation_seqno_past(struct xe_gt *gt, int seqno) -- cgit v1.2.3 From 094d739f4dbb6322ae21b3dab8e6a7d272347dc7 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 25 May 2023 21:14:29 +0200 Subject: drm/xe: Prevent evicting for page tables MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When creating page tables from xe_exec_ioctl, we may end up freeing memory we just validated. To be certain this does not happen, do not allow the current reservation to be evicted from the ioctl. Callchain: [ 109.008522] xe_bo_move_notify+0x5c/0xf0 [xe] [ 109.008548] xe_bo_move+0x90/0x510 [xe] [ 109.008573] ttm_bo_handle_move_mem+0xb7/0x170 [ttm] [ 109.008581] ttm_bo_swapout+0x15e/0x360 [ttm] [ 109.008586] ttm_device_swapout+0xc2/0x110 [ttm] [ 109.008592] ttm_global_swapout+0x47/0xc0 [ttm] [ 109.008598] ttm_tt_populate+0x7a/0x130 [ttm] [ 109.008603] ttm_bo_handle_move_mem+0x160/0x170 [ttm] [ 109.008609] ttm_bo_validate+0xe5/0x1d0 [ttm] [ 109.008614] ttm_bo_init_reserved+0xac/0x190 [ttm] [ 109.008620] __xe_bo_create_locked+0x153/0x260 [xe] [ 109.008645] xe_bo_create_locked_range+0x77/0x360 [xe] [ 109.008671] xe_bo_create_pin_map_at+0x33/0x1f0 [xe] [ 109.008695] xe_bo_create_pin_map+0x11/0x20 [xe] [ 109.008721] xe_pt_create+0x69/0xf0 [xe] [ 109.008749] xe_pt_stage_bind_entry+0x208/0x430 [xe] [ 109.008776] xe_pt_walk_range+0xe9/0x2a0 [xe] [ 109.008802] xe_pt_walk_range+0x223/0x2a0 [xe] [ 109.008828] xe_pt_walk_range+0x223/0x2a0 [xe] [ 109.008853] __xe_pt_bind_vma+0x28d/0xbd0 [xe] [ 109.008878] xe_vm_bind_vma+0xc7/0x2f0 [xe] [ 109.008904] xe_vm_rebind+0x72/0x160 [xe] [ 109.008930] xe_exec_ioctl+0x22b/0xa70 [xe] [ 109.008955] drm_ioctl_kernel+0xb9/0x150 [drm] [ 109.008972] drm_ioctl+0x210/0x430 [drm] [ 109.008988] __x64_sys_ioctl+0x85/0xb0 [ 109.008990] do_syscall_64+0x38/0x90 [ 109.008991] entry_SYSCALL_64_after_hwframe+0x72/0xdc Original warning: [ 5613.149126] WARNING: CPU: 3 PID: 45883 at drivers/gpu/drm/xe/xe_vm.c:504 xe_vm_unlock_dma_resv+0x43/0x50 [xe] ... [ 5613.226398] RIP: 0010:xe_vm_unlock_dma_resv+0x43/0x50 [xe] [ 5613.316098] Call Trace: [ 5613.318595] [ 5613.320743] xe_exec_ioctl+0x383/0x8a0 [xe] [ 5613.325278] ? __is_insn_slot_addr+0x8e/0x110 [ 5613.329719] ? __is_insn_slot_addr+0x8e/0x110 [ 5613.334116] ? kernel_text_address+0x75/0xf0 [ 5613.338429] ? __pfx_stack_trace_consume_entry+0x10/0x10 [ 5613.343778] ? __kernel_text_address+0x9/0x40 [ 5613.348181] ? unwind_get_return_address+0x1a/0x30 [ 5613.353013] ? __pfx_stack_trace_consume_entry+0x10/0x10 [ 5613.358362] ? arch_stack_walk+0x99/0xf0 [ 5613.362329] ? rcu_read_lock_sched_held+0xb/0x70 [ 5613.366996] ? lock_acquire+0x287/0x2f0 [ 5613.370873] ? rcu_read_lock_sched_held+0xb/0x70 [ 5613.375530] ? rcu_read_lock_sched_held+0xb/0x70 [ 5613.380181] ? lock_release+0x225/0x2e0 [ 5613.384059] ? __pfx_xe_exec_ioctl+0x10/0x10 [xe] [ 5613.389092] drm_ioctl_kernel+0xc0/0x170 [ 5613.393068] drm_ioctl+0x1b7/0x490 [ 5613.396519] ? __pfx_xe_exec_ioctl+0x10/0x10 [xe] [ 5613.401547] ? lock_release+0x225/0x2e0 [ 5613.405432] __x64_sys_ioctl+0x8a/0xb0 [ 5613.409232] do_syscall_64+0x37/0x90 Signed-off-by: Maarten Lankhorst Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/239 Reviewed-by: Thomas Hellström Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 2 +- drivers/gpu/drm/xe/xe_bo.h | 7 ++++--- drivers/gpu/drm/xe/xe_pt.c | 3 ++- 3 files changed, 7 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 394e4bfae6e1..39b3b9aa7c27 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1134,7 +1134,7 @@ struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size); if (resv) { - ctx.allow_res_evict = true; + ctx.allow_res_evict = !(flags & XE_BO_CREATE_NO_RESV_EVICT); ctx.resv = resv; } diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 2d08622f58a7..7ede50f2cbf3 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -27,9 +27,10 @@ #define XE_BO_CREATE_GGTT_BIT BIT(5) #define XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT BIT(6) #define XE_BO_CREATE_PINNED_BIT BIT(7) -#define XE_BO_DEFER_BACKING BIT(8) -#define XE_BO_SCANOUT_BIT BIT(9) -#define XE_BO_FIXED_PLACEMENT_BIT BIT(10) +#define XE_BO_CREATE_NO_RESV_EVICT BIT(8) +#define XE_BO_DEFER_BACKING BIT(9) +#define XE_BO_SCANOUT_BIT BIT(10) +#define XE_BO_FIXED_PLACEMENT_BIT BIT(11) /* this one is trigger internally only */ #define XE_BO_INTERNAL_TEST BIT(30) #define XE_BO_INTERNAL_64K BIT(31) diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index f15282996c3b..30de6e902a8e 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -219,7 +219,8 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_gt *gt, ttm_bo_type_kernel, XE_BO_CREATE_VRAM_IF_DGFX(gt) | XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT | - XE_BO_CREATE_PINNED_BIT); + XE_BO_CREATE_PINNED_BIT | + XE_BO_CREATE_NO_RESV_EVICT); if (IS_ERR(bo)) { err = PTR_ERR(bo); goto err_kfree; -- cgit v1.2.3 From 61f288a8972253f4168f37331e26b6b0f7c9bc9d Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Thu, 25 May 2023 15:43:23 -0400 Subject: drm/xe: Rework size helper to be a little more correct The _total_vram_size helper is device based and is not complete. Teach the helper to be tile aware and add the ability to size DG1 correctly. Reviewed-by: Matthew Auld Signed-off-by: Michael J. Ruhl Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 2 +- drivers/gpu/drm/xe/xe_mmio.c | 80 ++++++++++++++++++++++------------ drivers/gpu/drm/xe/xe_mmio.h | 6 ++- drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 34 ++++++++------- 4 files changed, 74 insertions(+), 48 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 5c239989608f..d8b480f69c5f 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -74,7 +74,7 @@ #define VE1_AUX_INV XE_REG(0x42b8) #define AUX_INV REG_BIT(0) -#define XEHP_TILE0_ADDR_RANGE XE_REG_MCR(0x4900) +#define XEHP_TILE_ADDR_RANGE(_idx) XE_REG_MCR(0x4900 + (_idx) * 4) #define XEHP_FLAT_CCS_BASE_ADDR XE_REG_MCR(0x4910) #define CHICKEN_RASTER_1 XE_REG_MCR(0x6204, XE_REG_OPTION_MASKED) diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 87dd417e3f08..5e8791bd0b16 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: MIT /* - * Copyright © 2021 Intel Corporation + * Copyright © 2021-2023 Intel Corporation */ #include "xe_mmio.h" @@ -20,7 +20,6 @@ #define XEHP_MTCFG_ADDR XE_REG(0x101800) #define TILE_COUNT REG_GENMASK(15, 8) -#define GEN12_LMEM_BAR 2 static int xe_set_dma_info(struct xe_device *xe) { @@ -145,34 +144,56 @@ static bool xe_pci_resource_valid(struct pci_dev *pdev, int bar) return true; } -int xe_mmio_total_vram_size(struct xe_device *xe, u64 *vram_size, u64 *usable_size) +/** + * xe_mmio_tile_vram_size() - Collect vram size and offset information + * @gt: tile to get info for + * @vram_size: available vram (size - device reserved portions) + * @tile_size: actual vram size + * @tile_offset: physical start point in the vram address space + * + * There are 4 places for size information: + * - io size (from pci_resource_len of LMEM bar) (only used for small bar and DG1) + * - TILEx size (actual vram size) + * - GSMBASE offset (TILEx - "stolen") + * - CSSBASE offset (TILEx - CSS space necessary) + * + * CSSBASE is always a lower/smaller offset then GSMBASE. + * + * The actual available size of memory is to the CCS or GSM base. + * NOTE: multi-tile bases will include the tile offset. + * + */ +int xe_mmio_tile_vram_size(struct xe_gt *gt, u64 *vram_size, u64 *tile_size, u64 *tile_offset) { - struct xe_gt *gt = xe_device_get_gt(xe, 0); - struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + u64 offset; int err; - u32 reg_val; - - if (!xe->info.has_flat_ccs) { - *vram_size = pci_resource_len(pdev, GEN12_LMEM_BAR); - if (usable_size) - *usable_size = min(*vram_size, - xe_mmio_read64(gt, GSMBASE)); - return 0; - } + u32 reg; err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); if (err) return err; - reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_TILE0_ADDR_RANGE); - *vram_size = (u64)REG_FIELD_GET(GENMASK(14, 8), reg_val) * SZ_1G; - if (usable_size) { - reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR); - *usable_size = (u64)REG_FIELD_GET(GENMASK(31, 8), reg_val) * SZ_64K; - drm_info(&xe->drm, "vram_size: 0x%llx usable_size: 0x%llx\n", - *vram_size, *usable_size); + /* actual size */ + if (unlikely(gt->xe->info.platform == XE_DG1)) { + *tile_size = pci_resource_len(to_pci_dev(gt->xe->drm.dev), GEN12_LMEM_BAR); + *tile_offset = 0; + } else { + reg = xe_gt_mcr_unicast_read_any(gt, XEHP_TILE_ADDR_RANGE(gt->info.id)); + *tile_size = (u64)REG_FIELD_GET(GENMASK(14, 8), reg) * SZ_1G; + *tile_offset = (u64)REG_FIELD_GET(GENMASK(7, 1), reg) * SZ_1G; } + /* minus device usage */ + if (gt->xe->info.has_flat_ccs) { + reg = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR); + offset = (u64)REG_FIELD_GET(GENMASK(31, 8), reg) * SZ_64K; + } else { + offset = xe_mmio_read64(gt, GSMBASE); + } + + /* remove the tile offset so we have just the available size */ + *vram_size = offset - *tile_offset; + return xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); } @@ -180,11 +201,12 @@ int xe_mmio_probe_vram(struct xe_device *xe) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); struct xe_gt *gt; - u8 id; - u64 vram_size; u64 original_size; - u64 usable_size; + u64 tile_offset; + u64 tile_size; + u64 vram_size; int err; + u8 id; if (!IS_DGFX(xe)) { xe->mem.vram.mapping = 0; @@ -209,25 +231,25 @@ int xe_mmio_probe_vram(struct xe_device *xe) gt = xe_device_get_gt(xe, 0); original_size = pci_resource_len(pdev, GEN12_LMEM_BAR); - err = xe_mmio_total_vram_size(xe, &vram_size, &usable_size); + err = xe_mmio_tile_vram_size(gt, &vram_size, &tile_size, &tile_offset); if (err) return err; xe_resize_vram_bar(xe, vram_size); xe->mem.vram.io_start = pci_resource_start(pdev, GEN12_LMEM_BAR); - xe->mem.vram.io_size = min(usable_size, + xe->mem.vram.io_size = min(vram_size, pci_resource_len(pdev, GEN12_LMEM_BAR)); xe->mem.vram.size = xe->mem.vram.io_size; if (!xe->mem.vram.size) return -EIO; - if (usable_size > xe->mem.vram.io_size) + if (vram_size > xe->mem.vram.io_size) drm_warn(&xe->drm, "Restricting VRAM size to PCI resource size (%lluMiB->%lluMiB)\n", - (u64)usable_size >> 20, (u64)xe->mem.vram.io_size >> 20); + (u64)vram_size >> 20, (u64)xe->mem.vram.io_size >> 20); xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, xe->mem.vram.io_size); - xe->mem.vram.size = min_t(u64, xe->mem.vram.size, usable_size); + xe->mem.vram.size = min_t(u64, xe->mem.vram.size, vram_size); drm_info(&xe->drm, "TOTAL VRAM: %pa, %pa\n", &xe->mem.vram.io_start, &xe->mem.vram.size); diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h index 1407f1189b0d..da91729a3854 100644 --- a/drivers/gpu/drm/xe/xe_mmio.h +++ b/drivers/gpu/drm/xe/xe_mmio.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: MIT */ /* - * Copyright © 2021 Intel Corporation + * Copyright © 2021-2023 Intel Corporation */ #ifndef _XE_MMIO_H_ @@ -16,6 +16,8 @@ struct drm_device; struct drm_file; struct xe_device; +#define GEN12_LMEM_BAR 2 + int xe_mmio_init(struct xe_device *xe); static inline u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg) @@ -131,6 +133,6 @@ static inline bool xe_mmio_in_range(const struct xe_mmio_range *range, } int xe_mmio_probe_vram(struct xe_device *xe); -int xe_mmio_total_vram_size(struct xe_device *xe, u64 *vram_size, u64 *flat_ccs_base); +int xe_mmio_tile_vram_size(struct xe_gt *gt, u64 *vram_size, u64 *tile_size, u64 *tile_base); #endif diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index a3855870321f..d49b2cfeba92 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: MIT /* - * Copyright © 2021-2022 Intel Corporation + * Copyright © 2021-2023 Intel Corporation * Copyright (C) 2021-2002 Red Hat */ @@ -51,27 +51,29 @@ bool xe_ttm_stolen_cpu_access_needs_ggtt(struct xe_device *xe) return GRAPHICS_VERx100(xe) < 1270 && !IS_DGFX(xe); } -static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) +static s64 detect_bar2_dgfx(struct xe_gt *gt, struct xe_ttm_stolen_mgr *mgr) { - struct pci_dev *pdev = to_pci_dev(xe->drm.dev); - struct xe_gt *gt = to_gt(xe); - u64 vram_size, stolen_size; - int err; - - err = xe_mmio_total_vram_size(xe, &vram_size, NULL); - if (err) { - drm_info(&xe->drm, "Querying total vram size failed\n"); + struct pci_dev *pdev = to_pci_dev(gt->xe->drm.dev); + u64 stolen_size; + u64 tile_offset; + u64 tile_size; + u64 vram_size; + + if (xe_mmio_tile_vram_size(gt, &vram_size, &tile_size, &tile_offset)) { + drm_err(>->xe->drm, "Querying total vram size failed\n"); return 0; } /* Use DSM base address instead for stolen memory */ - mgr->stolen_base = xe_mmio_read64(gt, DSMBASE) & BDSM_MASK; - if (drm_WARN_ON(&xe->drm, vram_size < mgr->stolen_base)) + mgr->stolen_base = (xe_mmio_read64(gt, DSMBASE) & BDSM_MASK) - tile_offset; + if (drm_WARN_ON(>->xe->drm, tile_size < mgr->stolen_base)) return 0; - stolen_size = vram_size - mgr->stolen_base; - if (mgr->stolen_base + stolen_size <= pci_resource_len(pdev, 2)) - mgr->io_base = pci_resource_start(pdev, 2) + mgr->stolen_base; + stolen_size = tile_size - mgr->stolen_base; + + /* Verify usage fits in the actual resource available */ + if (mgr->stolen_base + stolen_size <= pci_resource_len(pdev, GEN12_LMEM_BAR)) + mgr->io_base = gt->mem.vram.io_start + mgr->stolen_base; /* * There may be few KB of platform dependent reserved memory at the end @@ -139,7 +141,7 @@ void xe_ttm_stolen_mgr_init(struct xe_device *xe) int err; if (IS_DGFX(xe)) - stolen_size = detect_bar2_dgfx(xe, mgr); + stolen_size = detect_bar2_dgfx(to_gt(xe), mgr); else if (GRAPHICS_VERx100(xe) >= 1270) stolen_size = detect_bar2_integrated(xe, mgr); else -- cgit v1.2.3 From 7f075300a31829a6a5a388313f1a67e31eba012e Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Thu, 25 May 2023 15:43:24 -0400 Subject: drm/xe: Simplify rebar sizing "Right sizing" the PCI BAR is not necessary. If rebar is needed size to the maximum available. Preserve the force_vram_bar_size sizing. Reviewed-by: Matthew Auld Signed-off-by: Michael J. Ruhl Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device_types.h | 14 ++-- drivers/gpu/drm/xe/xe_gt_types.h | 12 ++-- drivers/gpu/drm/xe/xe_mmio.c | 131 ++++++++++++++++++++--------------- 3 files changed, 89 insertions(+), 68 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 91edbe4a3730..0c31b341162a 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: MIT */ /* - * Copyright © 2022 Intel Corporation + * Copyright © 2022-2023 Intel Corporation */ #ifndef _XE_DEVICE_TYPES_H_ @@ -133,11 +133,13 @@ struct xe_device { /** * @io_size: IO size of VRAM. * - * This represents how much of VRAM we can access via - * the CPU through the VRAM BAR. This can be smaller - * than @size, in which case only part of VRAM is CPU - * accessible (typically the first 256M). This - * configuration is known as small-bar. + * This represents how much of VRAM the CPU can access + * via the VRAM BAR. + * On systems that do not support large BAR IO space, + * this can be smaller than the actual memory size, in + * which case only part of VRAM is CPU accessible + * (typically the first 256M). This configuration is + * known as small-bar. */ resource_size_t io_size; /** @size: Total size of VRAM */ diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index b83c834e7ced..993f855025fd 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: MIT */ /* - * Copyright © 2022 Intel Corporation + * Copyright © 2022-2023 Intel Corporation */ #ifndef _XE_GT_TYPES_H_ @@ -148,11 +148,11 @@ struct xe_gt { /** * @io_size: IO size of this VRAM instance * - * This represents how much of this VRAM we can access - * via the CPU through the VRAM BAR. This can be smaller - * than @size, in which case only part of VRAM is CPU - * accessible (typically the first 256M). This - * configuration is known as small-bar. + * This represents how much of the VRAM the CPU can access + * via the VRAM BAR. + * This can be smaller than the actual @size, in which + * case only part of VRAM is CPU accessible (typically + * the first 256M). This configuration is known as small-bar. */ resource_size_t io_size; /** @size: size of VRAM. */ diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 5e8791bd0b16..665fcb23bbbb 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -3,6 +3,8 @@ * Copyright © 2021-2023 Intel Corporation */ +#include + #include "xe_mmio.h" #include @@ -21,6 +23,8 @@ #define XEHP_MTCFG_ADDR XE_REG(0x101800) #define TILE_COUNT REG_GENMASK(15, 8) +#define BAR_SIZE_SHIFT 20 + static int xe_set_dma_info(struct xe_device *xe) { unsigned int mask_size = xe->info.dma_mask_size; @@ -57,49 +61,61 @@ _resize_bar(struct xe_device *xe, int resno, resource_size_t size) if (ret) { drm_info(&xe->drm, "Failed to resize BAR%d to %dM (%pe). Consider enabling 'Resizable BAR' support in your BIOS\n", resno, 1 << bar_size, ERR_PTR(ret)); - return -1; + return ret; } drm_info(&xe->drm, "BAR%d resized to %dM\n", resno, 1 << bar_size); - return 1; + return ret; } -static int xe_resize_vram_bar(struct xe_device *xe, resource_size_t vram_size) +/* + * if force_vram_bar_size is set, attempt to set to the requested size + * else set to maximum possible size + */ +static int xe_resize_vram_bar(struct xe_device *xe) { + u64 force_vram_bar_size = xe_force_vram_bar_size; struct pci_dev *pdev = to_pci_dev(xe->drm.dev); struct pci_bus *root = pdev->bus; - struct resource *root_res; - resource_size_t rebar_size; resource_size_t current_size; + resource_size_t rebar_size; + struct resource *root_res; + u32 bar_size_mask; u32 pci_cmd; int i; int ret; - u64 force_vram_bar_size = xe_force_vram_bar_size; - current_size = roundup_pow_of_two(pci_resource_len(pdev, GEN12_LMEM_BAR)); + /* gather some relevant info */ + current_size = pci_resource_len(pdev, GEN12_LMEM_BAR); + bar_size_mask = pci_rebar_get_possible_sizes(pdev, GEN12_LMEM_BAR); + if (!bar_size_mask) + return 0; + + /* set to a specific size? */ if (force_vram_bar_size) { - u32 bar_sizes; + u32 bar_size_bit; rebar_size = force_vram_bar_size * (resource_size_t)SZ_1M; - bar_sizes = pci_rebar_get_possible_sizes(pdev, GEN12_LMEM_BAR); - if (rebar_size == current_size) - return 0; + bar_size_bit = bar_size_mask & BIT(pci_rebar_bytes_to_size(rebar_size)); - if (!(bar_sizes & BIT(pci_rebar_bytes_to_size(rebar_size))) || - rebar_size >= roundup_pow_of_two(vram_size)) { - rebar_size = vram_size; + if (!bar_size_bit) { drm_info(&xe->drm, - "Given bar size is not within supported size, setting it to default: %lluMiB\n", - (u64)vram_size >> 20); + "Requested size: %lluMiB is not supported by rebar sizes: 0x%x. Leaving default: %lluMiB\n", + (u64)rebar_size >> 20, bar_size_mask, (u64)current_size >> 20); + return 0; } + + rebar_size = 1ULL << (__fls(bar_size_bit) + BAR_SIZE_SHIFT); + + if (rebar_size == current_size) + return 0; } else { - rebar_size = current_size; + rebar_size = 1ULL << (__fls(bar_size_mask) + BAR_SIZE_SHIFT); - if (rebar_size != roundup_pow_of_two(vram_size)) - rebar_size = vram_size; - else + /* only resize if larger than current */ + if (rebar_size <= current_size) return 0; } @@ -144,6 +160,31 @@ static bool xe_pci_resource_valid(struct pci_dev *pdev, int bar) return true; } +static int xe_determine_lmem_bar_size(struct xe_device *xe) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + int err; + + if (!xe_pci_resource_valid(pdev, GEN12_LMEM_BAR)) { + drm_err(&xe->drm, "pci resource is not valid\n"); + return -ENXIO; + } + + err = xe_resize_vram_bar(xe); + if (err) + return err; + + xe->mem.vram.io_start = pci_resource_start(pdev, GEN12_LMEM_BAR); + xe->mem.vram.io_size = pci_resource_len(pdev, GEN12_LMEM_BAR); + if (!xe->mem.vram.io_size) + return -EIO; + + /* set up a map to the total memory area. */ + xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, xe->mem.vram.io_size); + + return 0; +} + /** * xe_mmio_tile_vram_size() - Collect vram size and offset information * @gt: tile to get info for @@ -199,59 +240,37 @@ int xe_mmio_tile_vram_size(struct xe_gt *gt, u64 *vram_size, u64 *tile_size, u64 int xe_mmio_probe_vram(struct xe_device *xe) { - struct pci_dev *pdev = to_pci_dev(xe->drm.dev); struct xe_gt *gt; - u64 original_size; u64 tile_offset; u64 tile_size; u64 vram_size; int err; u8 id; - if (!IS_DGFX(xe)) { - xe->mem.vram.mapping = 0; - xe->mem.vram.size = 0; - xe->mem.vram.io_start = 0; - xe->mem.vram.io_size = 0; - - for_each_gt(gt, xe, id) { - gt->mem.vram.mapping = 0; - gt->mem.vram.size = 0; - gt->mem.vram.io_start = 0; - gt->mem.vram.io_size = 0; - } + if (!IS_DGFX(xe)) return 0; - } - - if (!xe_pci_resource_valid(pdev, GEN12_LMEM_BAR)) { - drm_err(&xe->drm, "pci resource is not valid\n"); - return -ENXIO; - } + /* Get the size of the gt0 vram for later accessibility comparison */ gt = xe_device_get_gt(xe, 0); - original_size = pci_resource_len(pdev, GEN12_LMEM_BAR); - err = xe_mmio_tile_vram_size(gt, &vram_size, &tile_size, &tile_offset); if (err) return err; - xe_resize_vram_bar(xe, vram_size); - xe->mem.vram.io_start = pci_resource_start(pdev, GEN12_LMEM_BAR); - xe->mem.vram.io_size = min(vram_size, - pci_resource_len(pdev, GEN12_LMEM_BAR)); - xe->mem.vram.size = xe->mem.vram.io_size; - - if (!xe->mem.vram.size) - return -EIO; + err = xe_determine_lmem_bar_size(xe); + if (err) + return err; - if (vram_size > xe->mem.vram.io_size) - drm_warn(&xe->drm, "Restricting VRAM size to PCI resource size (%lluMiB->%lluMiB)\n", - (u64)vram_size >> 20, (u64)xe->mem.vram.io_size >> 20); + /* small bar issues will only cover gt0 sizes */ + if (xe->mem.vram.io_size < vram_size) + drm_warn(&xe->drm, "Restricting VRAM size to PCI resource size (0x%llx->0x%llx)\n", + vram_size, (u64)xe->mem.vram.io_size); - xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, xe->mem.vram.io_size); - xe->mem.vram.size = min_t(u64, xe->mem.vram.size, vram_size); + /* Limit size to available memory to account for the current memory algorithm */ + xe->mem.vram.io_size = min_t(u64, xe->mem.vram.io_size, vram_size); + xe->mem.vram.size = xe->mem.vram.io_size; - drm_info(&xe->drm, "TOTAL VRAM: %pa, %pa\n", &xe->mem.vram.io_start, &xe->mem.vram.size); + drm_info(&xe->drm, "VISIBLE VRAM: %pa, %pa\n", &xe->mem.vram.io_start, + &xe->mem.vram.io_size); /* FIXME: Assuming equally partitioned VRAM, incorrect */ if (xe->info.tile_count > 1) { -- cgit v1.2.3 From 2d830096e41403ba67c9d066de2fb818f81d9591 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Thu, 25 May 2023 15:43:25 -0400 Subject: drm/xe: Size GT device memory correctly The current method of sizing GT device memory is not quite right. Update the algorithm to use the relevant HW information and offsets to set up the sizing correctly. Update the stolen memory sizing to reflect the changes, and to be GT specific. Reviewed-by: Matthew Auld Signed-off-by: Michael J. Ruhl Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device_types.h | 2 + drivers/gpu/drm/xe/xe_gt_types.h | 2 + drivers/gpu/drm/xe/xe_mmio.c | 93 ++++++++++++++++++------------------ 3 files changed, 51 insertions(+), 46 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 0c31b341162a..5b3f270bf790 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -144,6 +144,8 @@ struct xe_device { resource_size_t io_size; /** @size: Total size of VRAM */ resource_size_t size; + /** @base: Offset to apply for Device Physical Address control */ + resource_size_t base; /** @mapping: pointer to VRAM mappable space */ void *__iomem mapping; } vram; diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 993f855025fd..093d650c35f4 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -155,6 +155,8 @@ struct xe_gt { * the first 256M). This configuration is known as small-bar. */ resource_size_t io_size; + /** @base: offset of VRAM starting base */ + resource_size_t base; /** @size: size of VRAM. */ resource_size_t size; /** @mapping: pointer to VRAM mappable space */ diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 665fcb23bbbb..d3b57669c9a7 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -179,6 +179,8 @@ static int xe_determine_lmem_bar_size(struct xe_device *xe) if (!xe->mem.vram.io_size) return -EIO; + xe->mem.vram.base = 0; /* DPA offset */ + /* set up a map to the total memory area. */ xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, xe->mem.vram.io_size); @@ -240,6 +242,9 @@ int xe_mmio_tile_vram_size(struct xe_gt *gt, u64 *vram_size, u64 *tile_size, u64 int xe_mmio_probe_vram(struct xe_device *xe) { + resource_size_t io_size; + u64 available_size = 0; + u64 total_size = 0; struct xe_gt *gt; u64 tile_offset; u64 tile_size; @@ -265,64 +270,60 @@ int xe_mmio_probe_vram(struct xe_device *xe) drm_warn(&xe->drm, "Restricting VRAM size to PCI resource size (0x%llx->0x%llx)\n", vram_size, (u64)xe->mem.vram.io_size); - /* Limit size to available memory to account for the current memory algorithm */ - xe->mem.vram.io_size = min_t(u64, xe->mem.vram.io_size, vram_size); - xe->mem.vram.size = xe->mem.vram.io_size; - drm_info(&xe->drm, "VISIBLE VRAM: %pa, %pa\n", &xe->mem.vram.io_start, &xe->mem.vram.io_size); - /* FIXME: Assuming equally partitioned VRAM, incorrect */ - if (xe->info.tile_count > 1) { - u8 adj_tile_count = xe->info.tile_count; - resource_size_t size, io_start, io_size; + io_size = xe->mem.vram.io_size; - for_each_gt(gt, xe, id) - if (xe_gt_is_media_type(gt)) - --adj_tile_count; + /* gt specific ranges */ + for_each_gt(gt, xe, id) { + if (xe_gt_is_media_type(gt)) + continue; - XE_BUG_ON(!adj_tile_count); + err = xe_mmio_tile_vram_size(gt, &vram_size, &tile_size, &tile_offset); + if (err) + return err; - size = xe->mem.vram.size / adj_tile_count; - io_start = xe->mem.vram.io_start; - io_size = xe->mem.vram.io_size; + gt->mem.vram.io_start = xe->mem.vram.io_start + tile_offset; + gt->mem.vram.io_size = min_t(u64, vram_size, io_size); - for_each_gt(gt, xe, id) { - if (id && !xe_gt_is_media_type(gt)) { - io_size -= min(io_size, size); - io_start += io_size; - } + if (!gt->mem.vram.io_size) { + drm_err(&xe->drm, "Tile without any CPU visible VRAM. Aborting.\n"); + return -ENODEV; + } - gt->mem.vram.size = size; - - /* - * XXX: multi-tile small-bar might be wild. Hopefully - * full tile without any mappable vram is not something - * we care about. - */ - - gt->mem.vram.io_size = min(size, io_size); - if (io_size) { - gt->mem.vram.io_start = io_start; - gt->mem.vram.mapping = xe->mem.vram.mapping + - (io_start - xe->mem.vram.io_start); - } else { - drm_err(&xe->drm, "Tile without any CPU visible VRAM. Aborting.\n"); - return -ENODEV; - } + gt->mem.vram.base = tile_offset; + + /* small bar can limit the visible size. size accordingly */ + gt->mem.vram.size = min_t(u64, vram_size, io_size); + gt->mem.vram.mapping = xe->mem.vram.mapping + tile_offset; - drm_info(&xe->drm, "VRAM[%u, %u]: %pa, %pa\n", - id, gt->info.vram_id, >->mem.vram.io_start, - >->mem.vram.size); + drm_info(&xe->drm, "VRAM[%u, %u]: %pa, %pa\n", id, gt->info.vram_id, + >->mem.vram.io_start, >->mem.vram.size); + + if (gt->mem.vram.io_size < gt->mem.vram.size) + drm_info(&xe->drm, "VRAM[%u, %u]: CPU access limited to %pa\n", id, + gt->info.vram_id, >->mem.vram.io_size); + + /* calculate total size using tile size to get the correct HW sizing */ + total_size += tile_size; + available_size += vram_size; + + if (total_size > xe->mem.vram.io_size) { + drm_warn(&xe->drm, "VRAM: %pa is larger than resource %pa\n", + &total_size, &xe->mem.vram.io_size); } - } else { - gt->mem.vram.size = xe->mem.vram.size; - gt->mem.vram.io_start = xe->mem.vram.io_start; - gt->mem.vram.io_size = xe->mem.vram.io_size; - gt->mem.vram.mapping = xe->mem.vram.mapping; - drm_info(&xe->drm, "VRAM: %pa\n", >->mem.vram.size); + io_size -= min_t(u64, tile_size, io_size); } + + xe->mem.vram.size = total_size; + + drm_info(&xe->drm, "Total VRAM: %pa, %pa\n", &xe->mem.vram.io_start, + &xe->mem.vram.size); + drm_info(&xe->drm, "Available VRAM: %pa, %pa\n", &xe->mem.vram.io_start, + &available_size); + return 0; } -- cgit v1.2.3 From fb31517cd712f9a29608bc24fbcaf45d14e9c40e Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Thu, 25 May 2023 15:43:26 -0400 Subject: drm/xe: Rename GPU offset helper to reflect true usage The _io_offset helper function is returning an offset into the GPU address space. Using the CPU address offset (io_) is not correct. Rename to reflect usage. Update to use GPU offset information. Update PT dma_offset to use the helper Reviewed-by: Matthew Auld Signed-off-by: Michael J. Ruhl Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 8 ++++---- drivers/gpu/drm/xe/xe_bo.h | 2 +- drivers/gpu/drm/xe/xe_migrate.c | 4 ++-- drivers/gpu/drm/xe/xe_pt.c | 5 +---- 4 files changed, 8 insertions(+), 11 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 39b3b9aa7c27..e766f8955718 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1341,7 +1341,7 @@ struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_gt *gt, * XXX: This is in the VM bind data path, likely should calculate this once and * store, with a recalculation if the BO is moved. */ -uint64_t vram_region_io_offset(struct ttm_resource *res) +uint64_t vram_region_gpu_offset(struct ttm_resource *res) { struct xe_device *xe = ttm_to_xe_device(res->bo->bdev); struct xe_gt *gt = mem_type_to_gt(xe, res->mem_type); @@ -1349,7 +1349,7 @@ uint64_t vram_region_io_offset(struct ttm_resource *res) if (res->mem_type == XE_PL_STOLEN) return xe_ttm_stolen_gpu_offset(xe); - return gt->mem.vram.io_start - xe->mem.vram.io_start; + return xe->mem.vram.base + gt->mem.vram.base; } /** @@ -1433,7 +1433,7 @@ int xe_bo_pin(struct xe_bo *bo) XE_BUG_ON(!(place->flags & TTM_PL_FLAG_CONTIGUOUS)); place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE, &vram) - - vram_region_io_offset(bo->ttm.resource)) >> PAGE_SHIFT; + vram_region_gpu_offset(bo->ttm.resource)) >> PAGE_SHIFT; place->lpfn = place->fpfn + (bo->size >> PAGE_SHIFT); spin_lock(&xe->pinned.lock); @@ -1580,7 +1580,7 @@ dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset, xe_res_first(bo->ttm.resource, page << PAGE_SHIFT, page_size, &cur); - return cur.start + offset + vram_region_io_offset(bo->ttm.resource); + return cur.start + offset + vram_region_gpu_offset(bo->ttm.resource); } } diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 7ede50f2cbf3..e6d08fa9c992 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -227,7 +227,7 @@ void xe_bo_vunmap(struct xe_bo *bo); bool mem_type_is_vram(u32 mem_type); bool xe_bo_is_vram(struct xe_bo *bo); bool xe_bo_is_stolen(struct xe_bo *bo); -uint64_t vram_region_io_offset(struct ttm_resource *res); +uint64_t vram_region_gpu_offset(struct ttm_resource *res); bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type); diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 9a676287e741..7a2188f02a86 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -419,7 +419,7 @@ static u32 pte_update_size(struct xe_migrate *m, } else { /* Offset into identity map. */ *L0_ofs = xe_migrate_vram_ofs(cur->start + - vram_region_io_offset(res)); + vram_region_gpu_offset(res)); cmds += cmd_size; } @@ -469,7 +469,7 @@ static void emit_pte(struct xe_migrate *m, addr |= XE_PTE_PS64; } - addr += vram_region_io_offset(bo->ttm.resource); + addr += vram_region_gpu_offset(bo->ttm.resource); addr |= XE_PPGTT_PTE_LM; } addr |= PPAT_CACHED | XE_PAGE_PRESENT | XE_PAGE_RW; diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 30de6e902a8e..2a5481111a5f 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -759,13 +759,10 @@ xe_pt_stage_bind(struct xe_gt *gt, struct xe_vma *vma, int ret; if (is_vram) { - struct xe_gt *bo_gt = xe_bo_to_gt(bo); - xe_walk.default_pte = XE_PPGTT_PTE_LM; if (vma && vma->use_atomic_access_pte_bit) xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; - xe_walk.dma_offset = bo_gt->mem.vram.io_start - - gt_to_xe(gt)->mem.vram.io_start; + xe_walk.dma_offset = vram_region_gpu_offset(bo->ttm.resource); xe_walk.cache = XE_CACHE_WB; } else { if (!xe_vma_is_userptr(vma) && bo->flags & XE_BO_SCANOUT_BIT) -- cgit v1.2.3 From 4e40483644098ef75ea1344e5cdc9285e30c28ae Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Tue, 23 May 2023 13:14:45 -0700 Subject: drm/xe: Replace PVC check by engine type check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit __emit_job_gen12_render_compute() masks some PIPE_CONTROL bits that do not exist in platforms without render engine. So here replacing the PVC check by something more generic that will support any future platforms without render engine. Reviewed-by: Thomas Hellström Signed-off-by: José Roberto de Souza Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ring_ops.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index a09ee8c736b5..a70fa6d9ae60 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -249,11 +249,11 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, u32 ppgtt_flag = get_ppgtt_flag(job); struct xe_gt *gt = job->engine->gt; struct xe_device *xe = gt_to_xe(gt); - bool pvc = xe->info.platform == XE_PVC; + bool lacks_render = !(xe->gt[0].info.engine_mask & XE_HW_ENGINE_RCS_MASK); u32 mask_flags = 0; dw[i++] = preparser_disable(true); - if (pvc) + if (lacks_render) mask_flags = PIPE_CONTROL_3D_ARCH_FLAGS; else if (job->engine->class == XE_ENGINE_CLASS_COMPUTE) mask_flags = PIPE_CONTROL_3D_ENGINE_FLAGS; @@ -275,7 +275,7 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, job->user_fence.value, dw, i); - i = emit_pipe_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, pvc, dw, i); + i = emit_pipe_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, lacks_render, dw, i); i = emit_user_interrupt(dw, i); -- cgit v1.2.3 From dbd6c64c99a8eb5ed85adec5a24e30a62ace7b91 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 1 Jun 2023 13:35:05 +0100 Subject: drm/xe/vm: fix double list add MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It looks like the driver only wants to track one vma for each external object per vm. However it looks like bo_has_vm_references_locked() will ignore any vma that is marked as vma->destroyed (not actually destroyed yet). If we then mark our externally tracked vma as destroyed and then create a new vma for the same object and vm, we can have two externally tracked vma for the same object and vm. When the destroy actually happens it tries to move the external tracking to a different vma, but in this case it is already being tracked, leading to double list add errors. It should be safe to simply drop the destroyed check in bo_has_vm_references(), since the actual destroy will switch the external tracking to the next available vma. Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/290 Cc: Maarten Lankhorst Cc: Thomas Hellström Signed-off-by: Matthew Auld Reviewed-by: Maarten Lankhorst Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index ffa102870d1f..5af370640fb1 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -964,7 +964,7 @@ bo_has_vm_references_locked(struct xe_bo *bo, struct xe_vm *vm, struct xe_vma *vma; list_for_each_entry(vma, &bo->vmas, bo_link) { - if (vma != ignore && vma->vm == vm && !vma->destroyed) + if (vma != ignore && vma->vm == vm) return vma; } -- cgit v1.2.3 From dbc4f5d15a8eecf0f5e7ba1a8e563c31237f6adb Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 1 Jun 2023 14:52:14 -0700 Subject: drm/xe/mtl: Disable media GT Xe incorrectly conflates the concept of 'tile' and 'GT.' Since MTL's media support is not yet functioning properly, let's just disable it completely for now while we fix the fundamental driver design. Support for media GTs on platforms like MTL will be re-added later. v2: - Drop some unrelated code cleanup that didn't belong in this patch. (Lucas) v3: - Drop unnecessary xe_gt.h include. (Gustavo) Cc: Lucas De Marchi Cc: Gustavo Sousa Reviewed-by: Matt Atwood Reviewed-by: Lucas De Marchi Acked-by: Gustavo Sousa Link: https://lore.kernel.org/r/20230601215244.678611-2-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_mmio.c | 2 -- drivers/gpu/drm/xe/xe_pci.c | 12 ------------ 2 files changed, 14 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index d3b57669c9a7..ef2353eef6fe 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -340,8 +340,6 @@ static void xe_mmio_probe_tiles(struct xe_device *xe) mtcfg = xe_mmio_read64(gt, XEHP_MTCFG_ADDR); adj_tile_count = xe->info.tile_count = REG_FIELD_GET(TILE_COUNT, mtcfg) + 1; - if (xe->info.media_verx100 >= 1300) - xe->info.tile_count *= 2; drm_info(&xe->drm, "tile_count: %d, adj_tile_count %d\n", xe->info.tile_count, adj_tile_count); diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 50027eb642ea..c8784f41506e 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -275,20 +275,10 @@ static const __maybe_unused struct xe_device_desc pvc_desc = { .extra_gts = pvc_gts, }; -static const struct xe_gt_desc xelpmp_gts[] = { - { - .type = XE_GT_TYPE_MEDIA, - .vram_id = 0, - .mmio_adj_limit = 0x40000, - .mmio_adj_offset = 0x380000, - }, -}; - static const struct xe_device_desc mtl_desc = { /* .graphics and .media determined via GMD_ID */ .require_force_probe = true, PLATFORM(XE_METEORLAKE), - .extra_gts = xelpmp_gts, }; #undef PLATFORM @@ -545,8 +535,6 @@ static int xe_info_init(struct xe_device *xe, * treats it as the number of GTs rather than just the number of tiles. */ xe->info.tile_count = 1 + graphics_desc->max_remote_tiles; - if (MEDIA_VER(xe) >= 13) - xe->info.tile_count++; for (id = 0; id < xe->info.tile_count; ++id) { gt = xe->gt + id; -- cgit v1.2.3 From a5edc7cdb3875115d1798f4d2057569cf257e7d2 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 1 Jun 2023 14:52:15 -0700 Subject: drm/xe: Introduce xe_tile Create a new xe_tile structure to begin separating the concept of "tile" from "GT." A tile is effectively a complete GPU, and a GT is just one part of that. On platforms like MTL, there's only a single full GPU (tile) which has its IP blocks provided by two GTs. In contrast, a "multi-tile" platform like PVC is basically multiple complete GPUs packed behind a single PCI device. For now, just create xe_tile as a simple wrapper around xe_gt. The items in xe_gt that are truly tied to the tile rather than the GT will be moved in future patches. Support for multiple GTs per tile (i.e., the MTL standalone media case) will also be re-introduced in a future patch. v2: - Fix kunit test build - Move hunk from next patch to use local tile variable rather than direct xe->tiles[id] accesses. (Lucas) - Mention compute in kerneldoc. (Rodrigo) Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230601215244.678611-3-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_bo.c | 2 +- drivers/gpu/drm/xe/tests/xe_rtp_test.c | 6 +++-- drivers/gpu/drm/xe/xe_device.h | 11 +++++++--- drivers/gpu/drm/xe/xe_device_types.h | 40 +++++++++++++++++++++++++++++++--- drivers/gpu/drm/xe/xe_gt_types.h | 15 ++++++++----- drivers/gpu/drm/xe/xe_mmio.c | 13 ++++++----- drivers/gpu/drm/xe/xe_pci.c | 7 +++++- drivers/gpu/drm/xe/xe_ring_ops.c | 2 +- drivers/gpu/drm/xe/xe_vm.c | 2 +- drivers/gpu/drm/xe/xe_vm_types.h | 8 +++---- 10 files changed, 79 insertions(+), 27 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index 9bd381e5b7a6..6075f12a1962 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -174,7 +174,7 @@ static int evict_test_run_gt(struct xe_device *xe, struct xe_gt *gt, struct kuni struct xe_bo *bo, *external; unsigned int bo_flags = XE_BO_CREATE_USER_BIT | XE_BO_CREATE_VRAM_IF_DGFX(gt); - struct xe_vm *vm = xe_migrate_get_vm(xe->gt[0].migrate); + struct xe_vm *vm = xe_migrate_get_vm(xe_device_get_root_tile(xe)->primary_gt.migrate); struct ww_acquire_ctx ww; int err, i; diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c index ab6f7a47db50..45f2614f91ec 100644 --- a/drivers/gpu/drm/xe/tests/xe_rtp_test.c +++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c @@ -13,6 +13,7 @@ #include "regs/xe_gt_regs.h" #include "regs/xe_reg_defs.h" +#include "xe_device.h" #include "xe_device_types.h" #include "xe_pci_test.h" #include "xe_reg_sr.h" @@ -236,9 +237,10 @@ static void xe_rtp_process_tests(struct kunit *test) { const struct rtp_test_case *param = test->param_value; struct xe_device *xe = test->priv; - struct xe_reg_sr *reg_sr = &xe->gt[0].reg_sr; + struct xe_gt *gt = &xe_device_get_root_tile(xe)->primary_gt; + struct xe_reg_sr *reg_sr = >->reg_sr; const struct xe_reg_sr_entry *sre, *sr_entry = NULL; - struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(&xe->gt[0]); + struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt); unsigned long idx, count = 0; xe_reg_sr_init(reg_sr, "xe_rtp_tests", xe); diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index cbae480a2092..f7acaf51a1fc 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -48,12 +48,17 @@ static inline struct xe_file *to_xe_file(const struct drm_file *file) return file->driver_priv; } +static inline struct xe_tile *xe_device_get_root_tile(struct xe_device *xe) +{ + return &xe->tiles[0]; +} + static inline struct xe_gt *xe_device_get_gt(struct xe_device *xe, u8 gt_id) { struct xe_gt *gt; - XE_BUG_ON(gt_id > XE_MAX_GT); - gt = xe->gt + gt_id; + XE_BUG_ON(gt_id > XE_MAX_TILES_PER_DEVICE); + gt = &xe->tiles[gt_id].primary_gt; XE_BUG_ON(gt->info.id != gt_id); XE_BUG_ON(gt->info.type == XE_GT_TYPE_UNINITIALIZED); @@ -65,7 +70,7 @@ static inline struct xe_gt *xe_device_get_gt(struct xe_device *xe, u8 gt_id) */ static inline struct xe_gt *to_gt(struct xe_device *xe) { - return xe->gt; + return &xe_device_get_root_tile(xe)->primary_gt; } static inline bool xe_device_guc_submission_enabled(struct xe_device *xe) diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 5b3f270bf790..b76344a9c33b 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -29,7 +29,7 @@ #define XE_GT0 0 #define XE_GT1 1 -#define XE_MAX_GT (XE_GT1 + 1) +#define XE_MAX_TILES_PER_DEVICE (XE_GT1 + 1) #define XE_MAX_ASID (BIT(20)) @@ -43,6 +43,40 @@ (_xe)->info.step.graphics >= (min_step) && \ (_xe)->info.step.graphics < (max_step)) +#define tile_to_xe(tile__) \ + _Generic(tile__, \ + const struct xe_tile *: (const struct xe_device *)((tile__)->xe), \ + struct xe_tile *: (tile__)->xe) + +/** + * struct xe_tile - hardware tile structure + * + * From a driver perspective, a "tile" is effectively a complete GPU, containing + * an SGunit, 1-2 GTs, and (for discrete platforms) VRAM. + * + * Multi-tile platforms effectively bundle multiple GPUs behind a single PCI + * device and designate one "root" tile as being responsible for external PCI + * communication. PCI BAR0 exposes the GGTT and MMIO register space for each + * tile in a stacked layout, and PCI BAR2 exposes the local memory associated + * with each tile similarly. Device-wide interrupts can be enabled/disabled + * at the root tile, and the MSTR_TILE_INTR register will report which tiles + * have interrupts that need servicing. + */ +struct xe_tile { + /** @xe: Backpointer to tile's PCI device */ + struct xe_device *xe; + + /** @id: ID of the tile */ + u8 id; + + /** + * @primary_gt: Primary GT + */ + struct xe_gt primary_gt; + + /* TODO: Add media GT here */ +}; + /** * struct xe_device - Top level struct of XE device */ @@ -193,8 +227,8 @@ struct xe_device { /** @ordered_wq: used to serialize compute mode resume */ struct workqueue_struct *ordered_wq; - /** @gt: graphics tile */ - struct xe_gt gt[XE_MAX_GT]; + /** @tiles: device tiles */ + struct xe_tile tiles[XE_MAX_TILES_PER_DEVICE]; /** * @mem_access: keep track of memory access in the device, possibly diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 093d650c35f4..456e3e447a2e 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -77,12 +77,17 @@ enum xe_steering_type { }; /** - * struct xe_gt - Top level struct of a graphics tile + * struct xe_gt - A "Graphics Technology" unit of the GPU * - * A graphics tile may be a physical split (duplicate pieces of silicon, - * different GGTT + VRAM) or a virtual split (shared GGTT + VRAM). Either way - * this structure encapsulates of everything a GT is (MMIO, VRAM, memory - * management, microcontrols, and a hardware set of engines). + * A GT ("Graphics Technology") is the subset of a GPU primarily responsible + * for implementing the graphics, compute, and/or media IP. It encapsulates + * the hardware engines, programmable execution units, and GuC. Each GT has + * its own handling of power management (RC6+forcewake) and multicast register + * steering. + * + * A GPU/tile may have a single GT that supplies all graphics, compute, and + * media functionality, or the graphics/compute and media may be split into + * separate GTs within a tile. */ struct xe_gt { /** @xe: backpointer to XE device */ diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index ef2353eef6fe..9bc5715e9ebe 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -438,6 +438,7 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct xe_device *xe = to_xe_device(dev); + struct xe_gt *gt = xe_device_get_gt(xe, 0); struct drm_xe_mmio *args = data; unsigned int bits_flag, bytes; struct xe_reg reg; @@ -480,7 +481,7 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data, */ reg = XE_REG(args->addr); - xe_force_wake_get(gt_to_fw(&xe->gt[0]), XE_FORCEWAKE_ALL); + xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); if (args->flags & DRM_XE_MMIO_WRITE) { switch (bits_flag) { @@ -489,10 +490,10 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data, ret = -EINVAL; goto exit; } - xe_mmio_write32(to_gt(xe), reg, args->value); + xe_mmio_write32(gt, reg, args->value); break; case DRM_XE_MMIO_64BIT: - xe_mmio_write64(to_gt(xe), reg, args->value); + xe_mmio_write64(gt, reg, args->value); break; default: drm_dbg(&xe->drm, "Invalid MMIO bit size"); @@ -507,10 +508,10 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data, if (args->flags & DRM_XE_MMIO_READ) { switch (bits_flag) { case DRM_XE_MMIO_32BIT: - args->value = xe_mmio_read32(to_gt(xe), reg); + args->value = xe_mmio_read32(gt, reg); break; case DRM_XE_MMIO_64BIT: - args->value = xe_mmio_read64(to_gt(xe), reg); + args->value = xe_mmio_read64(gt, reg); break; default: drm_dbg(&xe->drm, "Invalid MMIO bit size"); @@ -522,7 +523,7 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data, } exit: - xe_force_wake_put(gt_to_fw(&xe->gt[0]), XE_FORCEWAKE_ALL); + xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); return ret; } diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index c8784f41506e..d56b1c566d81 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -478,6 +478,7 @@ static int xe_info_init(struct xe_device *xe, const struct xe_graphics_desc *graphics_desc = NULL; const struct xe_media_desc *media_desc = NULL; u32 graphics_gmdid_revid = 0, media_gmdid_revid = 0; + struct xe_tile *tile; struct xe_gt *gt; u8 id; @@ -537,7 +538,11 @@ static int xe_info_init(struct xe_device *xe, xe->info.tile_count = 1 + graphics_desc->max_remote_tiles; for (id = 0; id < xe->info.tile_count; ++id) { - gt = xe->gt + id; + tile = &xe->tiles[id]; + tile->xe = xe; + tile->id = id; + + gt = &tile->primary_gt; gt->info.id = id; gt->xe = xe; diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index a70fa6d9ae60..45117a2ab1a0 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -249,7 +249,7 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, u32 ppgtt_flag = get_ppgtt_flag(job); struct xe_gt *gt = job->engine->gt; struct xe_device *xe = gt_to_xe(gt); - bool lacks_render = !(xe->gt[0].info.engine_mask & XE_HW_ENGINE_RCS_MASK); + bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK); u32 mask_flags = 0; dw[i++] = preparser_disable(true); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 5af370640fb1..798cba1bda6b 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3389,7 +3389,7 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) struct xe_device *xe = vma->vm->xe; struct xe_gt *gt; u32 gt_needs_invalidate = 0; - int seqno[XE_MAX_GT]; + int seqno[XE_MAX_TILES_PER_DEVICE]; u8 id; int ret; diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index fada7896867f..203ba9d946b8 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -159,7 +159,7 @@ struct xe_vm { struct kref refcount; /* engine used for (un)binding vma's */ - struct xe_engine *eng[XE_MAX_GT]; + struct xe_engine *eng[XE_MAX_TILES_PER_DEVICE]; /** Protects @rebind_list and the page-table structures */ struct dma_resv resv; @@ -167,9 +167,9 @@ struct xe_vm { u64 size; struct rb_root vmas; - struct xe_pt *pt_root[XE_MAX_GT]; - struct xe_bo *scratch_bo[XE_MAX_GT]; - struct xe_pt *scratch_pt[XE_MAX_GT][XE_VM_MAX_LEVEL]; + struct xe_pt *pt_root[XE_MAX_TILES_PER_DEVICE]; + struct xe_bo *scratch_bo[XE_MAX_TILES_PER_DEVICE]; + struct xe_pt *scratch_pt[XE_MAX_TILES_PER_DEVICE][XE_VM_MAX_LEVEL]; /** @flags: flags for this VM, statically setup a creation time */ #define XE_VM_FLAGS_64K BIT(0) -- cgit v1.2.3 From f79ee3013ad57021f4557cd3aa964a14b5c94bd4 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 1 Jun 2023 14:52:16 -0700 Subject: drm/xe: Add backpointer from gt to tile Rather than a backpointer to the xe_device, a GT should have a backpointer to its tile (which can then be used to lookup the device if necessary). The gt_to_xe() helper macro (which moves from xe_gt.h to xe_gt_types.h) can and should still be used to jump directly from an xe_gt to xe_device. v2: - Fix kunit test build - Move a couple changes to the previous patch. (Lucas) Reviewed-by: Matt Atwood Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230601215244.678611-4-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_bo.c | 2 +- drivers/gpu/drm/xe/xe_bb.c | 2 +- drivers/gpu/drm/xe/xe_ggtt.c | 12 ++++++------ drivers/gpu/drm/xe/xe_gt.h | 5 ----- drivers/gpu/drm/xe/xe_gt_printk.h | 6 +++--- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 8 ++++---- drivers/gpu/drm/xe/xe_gt_types.h | 14 ++++++++++++-- drivers/gpu/drm/xe/xe_mmio.c | 6 +++--- drivers/gpu/drm/xe/xe_mocs.c | 14 +++++++------- drivers/gpu/drm/xe/xe_pci.c | 2 +- drivers/gpu/drm/xe/xe_pt.c | 2 +- drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 6 +++--- 12 files changed, 42 insertions(+), 37 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index 6075f12a1962..8f3afdc6cca6 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -90,7 +90,7 @@ static int ccs_test_migrate(struct xe_gt *gt, struct xe_bo *bo, } /* Check last CCS value, or at least last value in page. */ - offset = xe_device_ccs_bytes(gt->xe, bo->size); + offset = xe_device_ccs_bytes(gt_to_xe(gt), bo->size); offset = min_t(u32, offset, PAGE_SIZE) / sizeof(u64) - 1; if (cpu_map[offset] != get_val) { KUNIT_FAIL(test, diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c index 3deb2d55f421..bf7c94b769d7 100644 --- a/drivers/gpu/drm/xe/xe_bb.c +++ b/drivers/gpu/drm/xe/xe_bb.c @@ -16,7 +16,7 @@ static int bb_prefetch(struct xe_gt *gt) { - struct xe_device *xe = gt->xe; + struct xe_device *xe = gt_to_xe(gt); if (GRAPHICS_VERx100(xe) >= 1250 && !xe_gt_is_media_type(gt)) /* diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 1ed22b5f89ad..4eefb2b3166c 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -142,14 +142,14 @@ static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt) u64 start, end; /* Display may have allocated inside ggtt, so be careful with clearing here */ - xe_device_mem_access_get(ggtt->gt->xe); + xe_device_mem_access_get(gt_to_xe(ggtt->gt)); mutex_lock(&ggtt->lock); drm_mm_for_each_hole(hole, &ggtt->mm, start, end) xe_ggtt_clear(ggtt, start, end - start); xe_ggtt_invalidate(ggtt->gt); mutex_unlock(&ggtt->lock); - xe_device_mem_access_put(ggtt->gt->xe); + xe_device_mem_access_put(gt_to_xe(ggtt->gt)); } int xe_ggtt_init(struct xe_gt *gt, struct xe_ggtt *ggtt) @@ -286,14 +286,14 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, if (err) return err; - xe_device_mem_access_get(ggtt->gt->xe); + xe_device_mem_access_get(gt_to_xe(ggtt->gt)); mutex_lock(&ggtt->lock); err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node, bo->size, alignment, 0, start, end, 0); if (!err) xe_ggtt_map_bo(ggtt, bo); mutex_unlock(&ggtt->lock); - xe_device_mem_access_put(ggtt->gt->xe); + xe_device_mem_access_put(gt_to_xe(ggtt->gt)); return err; } @@ -322,7 +322,7 @@ int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node) { - xe_device_mem_access_get(ggtt->gt->xe); + xe_device_mem_access_get(gt_to_xe(ggtt->gt)); mutex_lock(&ggtt->lock); xe_ggtt_clear(ggtt, node->start, node->size); @@ -332,7 +332,7 @@ void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node) xe_ggtt_invalidate(ggtt->gt); mutex_unlock(&ggtt->lock); - xe_device_mem_access_put(ggtt->gt->xe); + xe_device_mem_access_put(gt_to_xe(ggtt->gt)); } void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h index 086369f7ee6d..f4e98f499b36 100644 --- a/drivers/gpu/drm/xe/xe_gt.h +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -49,11 +49,6 @@ static inline bool xe_gt_is_media_type(struct xe_gt *gt) return gt->info.type == XE_GT_TYPE_MEDIA; } -#define gt_to_xe(gt__) \ - _Generic(gt__, \ - const struct xe_gt *: (const struct xe_device *)((gt__)->xe), \ - struct xe_gt *: (gt__)->xe) - static inline bool xe_gt_is_usm_hwe(struct xe_gt *gt, struct xe_hw_engine *hwe) { struct xe_device *xe = gt_to_xe(gt); diff --git a/drivers/gpu/drm/xe/xe_gt_printk.h b/drivers/gpu/drm/xe/xe_gt_printk.h index 0b801429cf1a..5991bcadd47e 100644 --- a/drivers/gpu/drm/xe/xe_gt_printk.h +++ b/drivers/gpu/drm/xe/xe_gt_printk.h @@ -11,7 +11,7 @@ #include "xe_device_types.h" #define xe_gt_printk(_gt, _level, _fmt, ...) \ - drm_##_level(&(_gt)->xe->drm, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__) + drm_##_level(>_to_xe(_gt)->drm, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__) #define xe_gt_err(_gt, _fmt, ...) \ xe_gt_printk((_gt), err, _fmt, ##__VA_ARGS__) @@ -32,10 +32,10 @@ xe_gt_printk((_gt), err_ratelimited, _fmt, ##__VA_ARGS__) #define xe_gt_WARN(_gt, _condition, _fmt, ...) \ - drm_WARN(&(_gt)->xe->drm, _condition, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__) + drm_WARN(>_to_xe(_gt)->drm, _condition, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__) #define xe_gt_WARN_ONCE(_gt, _condition, _fmt, ...) \ - drm_WARN_ONCE(&(_gt)->xe->drm, _condition, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__) + drm_WARN_ONCE(>_to_xe(_gt)->drm, _condition, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__) #define xe_gt_WARN_ON(_gt, _condition) \ xe_gt_WARN((_gt), _condition, "%s(%s)", "gt_WARN_ON", __stringify(_condition)) diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 44e442bf306c..2fcb477604e2 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -248,9 +248,9 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, XE_BUG_ON(len > MAX_TLB_INVALIDATION_LEN); - xe_device_mem_access_get(gt->xe); + xe_device_mem_access_get(xe); ret = send_tlb_invalidation(>->uc.guc, fence, action, len); - xe_device_mem_access_put(gt->xe); + xe_device_mem_access_put(xe); return ret; } @@ -328,8 +328,8 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len) TLB_INVALIDATION_SEQNO_MAX; if (!expected_seqno) expected_seqno = 1; - if (drm_WARN_ON(>->xe->drm, expected_seqno != msg[0])) { - drm_err(>->xe->drm, "TLB expected_seqno(%d) != msg(%u)\n", + if (drm_WARN_ON(>_to_xe(gt)->drm, expected_seqno != msg[0])) { + drm_err(>_to_xe(gt)->drm, "TLB expected_seqno(%d) != msg(%u)\n", expected_seqno, msg[0]); } diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 456e3e447a2e..11605a99ad66 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -76,6 +76,16 @@ enum xe_steering_type { NUM_STEERING_TYPES }; +#define gt_to_tile(gt__) \ + _Generic(gt__, \ + const struct xe_gt *: (const struct xe_tile *)((gt__)->tile), \ + struct xe_gt *: (gt__)->tile) + +#define gt_to_xe(gt__) \ + _Generic(gt__, \ + const struct xe_gt *: (const struct xe_device *)(gt_to_tile(gt__)->xe), \ + struct xe_gt *: gt_to_tile(gt__)->xe) + /** * struct xe_gt - A "Graphics Technology" unit of the GPU * @@ -90,8 +100,8 @@ enum xe_steering_type { * separate GTs within a tile. */ struct xe_gt { - /** @xe: backpointer to XE device */ - struct xe_device *xe; + /** @tile: Backpointer to GT's tile */ + struct xe_tile *tile; /** @info: GT info */ struct { diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 9bc5715e9ebe..79f902d2faea 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -217,8 +217,8 @@ int xe_mmio_tile_vram_size(struct xe_gt *gt, u64 *vram_size, u64 *tile_size, u64 return err; /* actual size */ - if (unlikely(gt->xe->info.platform == XE_DG1)) { - *tile_size = pci_resource_len(to_pci_dev(gt->xe->drm.dev), GEN12_LMEM_BAR); + if (unlikely(gt_to_xe(gt)->info.platform == XE_DG1)) { + *tile_size = pci_resource_len(to_pci_dev(gt_to_xe(gt)->drm.dev), GEN12_LMEM_BAR); *tile_offset = 0; } else { reg = xe_gt_mcr_unicast_read_any(gt, XEHP_TILE_ADDR_RANGE(gt->info.id)); @@ -227,7 +227,7 @@ int xe_mmio_tile_vram_size(struct xe_gt *gt, u64 *vram_size, u64 *tile_size, u64 } /* minus device usage */ - if (gt->xe->info.has_flat_ccs) { + if (gt_to_xe(gt)->info.has_flat_ccs) { reg = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR); offset = (u64)REG_FIELD_GET(GENMASK(31, 8), reg) * SZ_64K; } else { diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index c7a9e733ef3b..86277ecb749b 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -472,7 +472,7 @@ static void __init_mocs_table(struct xe_gt *gt, unsigned int i; u32 mocs; - mocs_dbg(>->xe->drm, "entries:%d\n", info->n_entries); + mocs_dbg(>_to_xe(gt)->drm, "entries:%d\n", info->n_entries); drm_WARN_ONCE(&xe->drm, !info->unused_entries_index, "Unused entries index should have been defined\n"); for (i = 0; @@ -480,7 +480,7 @@ static void __init_mocs_table(struct xe_gt *gt, i++) { struct xe_reg reg = XE_REG(addr + i * 4); - mocs_dbg(>->xe->drm, "%d 0x%x 0x%x\n", i, reg.addr, mocs); + mocs_dbg(>_to_xe(gt)->drm, "%d 0x%x 0x%x\n", i, reg.addr, mocs); xe_mmio_write32(gt, reg, mocs); } } @@ -509,13 +509,13 @@ static void init_l3cc_table(struct xe_gt *gt, unsigned int i; u32 l3cc; - mocs_dbg(>->xe->drm, "entries:%d\n", info->n_entries); + mocs_dbg(>_to_xe(gt)->drm, "entries:%d\n", info->n_entries); for (i = 0; i < (info->n_entries + 1) / 2 ? (l3cc = l3cc_combine(get_entry_l3cc(info, 2 * i), get_entry_l3cc(info, 2 * i + 1))), 1 : 0; i++) { - mocs_dbg(>->xe->drm, "%d 0x%x 0x%x\n", i, LNCFCMOCS(i).addr, + mocs_dbg(>_to_xe(gt)->drm, "%d 0x%x 0x%x\n", i, LNCFCMOCS(i).addr, l3cc); xe_mmio_write32(gt, LNCFCMOCS(i), l3cc); } @@ -525,7 +525,7 @@ void xe_mocs_init_early(struct xe_gt *gt) { struct xe_mocs_info table; - get_mocs_settings(gt->xe, &table); + get_mocs_settings(gt_to_xe(gt), &table); gt->mocs.uc_index = table.uc_index; gt->mocs.wb_index = table.wb_index; } @@ -538,8 +538,8 @@ void xe_mocs_init(struct xe_gt *gt) /* * LLC and eDRAM control values are not applicable to dgfx */ - flags = get_mocs_settings(gt->xe, &table); - mocs_dbg(>->xe->drm, "flag:0x%x\n", flags); + flags = get_mocs_settings(gt_to_xe(gt), &table); + mocs_dbg(>_to_xe(gt)->drm, "flag:0x%x\n", flags); if (flags & HAS_GLOBAL_MOCS) __init_mocs_table(gt, &table, GLOBAL_MOCS(0).addr); diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index d56b1c566d81..f0db422def9d 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -544,7 +544,7 @@ static int xe_info_init(struct xe_device *xe, gt = &tile->primary_gt; gt->info.id = id; - gt->xe = xe; + gt->tile = tile; if (id == 0) { gt->info.type = XE_GT_TYPE_MAIN; diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 2a5481111a5f..e2cd1946af5a 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -696,7 +696,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, * TODO: Suballocate the pt bo to avoid wasting a lot of * memory. */ - if (GRAPHICS_VERx100(xe_walk->gt->xe) >= 1250 && level == 1 && + if (GRAPHICS_VERx100(gt_to_xe(xe_walk->gt)) >= 1250 && level == 1 && covers && xe_pt_scan_64K(addr, next, xe_walk)) { walk->shifts = xe_compact_pt_shifts; flags |= XE_PDE_64K; diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index d49b2cfeba92..49470f0722bd 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -53,20 +53,20 @@ bool xe_ttm_stolen_cpu_access_needs_ggtt(struct xe_device *xe) static s64 detect_bar2_dgfx(struct xe_gt *gt, struct xe_ttm_stolen_mgr *mgr) { - struct pci_dev *pdev = to_pci_dev(gt->xe->drm.dev); + struct pci_dev *pdev = to_pci_dev(gt_to_xe(gt)->drm.dev); u64 stolen_size; u64 tile_offset; u64 tile_size; u64 vram_size; if (xe_mmio_tile_vram_size(gt, &vram_size, &tile_size, &tile_offset)) { - drm_err(>->xe->drm, "Querying total vram size failed\n"); + drm_err(>_to_xe(gt)->drm, "Querying total vram size failed\n"); return 0; } /* Use DSM base address instead for stolen memory */ mgr->stolen_base = (xe_mmio_read64(gt, DSMBASE) & BDSM_MASK) - tile_offset; - if (drm_WARN_ON(>->xe->drm, tile_size < mgr->stolen_base)) + if (drm_WARN_ON(>_to_xe(gt)->drm, tile_size < mgr->stolen_base)) return 0; stolen_size = tile_size - mgr->stolen_base; -- cgit v1.2.3 From 3643e6371542cc4782d3700f07130c9d250666d8 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 1 Jun 2023 14:52:18 -0700 Subject: drm/xe: Add for_each_tile iterator As we start splitting tile handling out from GT handling, we'll need to be able to iterate over tiles separately from GTs. This iterator will be used in upcoming patches. v2: - s/(id__++)/(id__)++/ (Gustavo) Cc: Gustavo Sousa Reviewed-by: Lucas De Marchi Acked-by: Gustavo Sousa Link: https://lore.kernel.org/r/20230601215244.678611-6-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.h | 4 ++++ drivers/gpu/drm/xe/xe_pci.c | 3 +-- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index f7acaf51a1fc..3516ac1dcbc4 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -83,6 +83,10 @@ static inline void xe_device_guc_submission_disable(struct xe_device *xe) xe->info.enable_guc = false; } +#define for_each_tile(tile__, xe__, id__) \ + for ((id__) = 0; (id__) < (xe__)->info.tile_count; (id__)++) \ + for_each_if ((tile__) = &(xe__)->tiles[(id__)]) + #define for_each_gt(gt__, xe__, id__) \ for ((id__) = 0; (id__) < (xe__)->info.tile_count; (id__++)) \ for_each_if ((gt__) = xe_device_get_gt((xe__), (id__))) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index f0db422def9d..e8931661c004 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -537,8 +537,7 @@ static int xe_info_init(struct xe_device *xe, */ xe->info.tile_count = 1 + graphics_desc->max_remote_tiles; - for (id = 0; id < xe->info.tile_count; ++id) { - tile = &xe->tiles[id]; + for_each_tile(tile, xe, id) { tile->xe = xe; tile->id = id; -- cgit v1.2.3 From 3b0d4a5579968f1c42044142a4997bab9fe7ffed Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 1 Jun 2023 14:52:19 -0700 Subject: drm/xe: Move register MMIO into xe_tile Each tile has its own register region in the BAR, containing instances of all registers for the platform. In contrast, the multiple GTs within a tile share the same MMIO space; there's just a small subset of registers (the GSI registers) which have multiple copies at different offsets (0x0 for primary GT, 0x380000 for media GT). Move the register MMIO region size/pointers to the tile structure, leaving just the GSI offset information in the GT structure. Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230601215244.678611-7-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device_types.h | 16 ++++++++++++++++ drivers/gpu/drm/xe/xe_ggtt.c | 3 ++- drivers/gpu/drm/xe/xe_gt_types.h | 9 +++------ drivers/gpu/drm/xe/xe_mmio.c | 26 ++++++++++++++------------ drivers/gpu/drm/xe/xe_mmio.h | 21 ++++++++++++++++----- 5 files changed, 51 insertions(+), 24 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index b76344a9c33b..107a947a7361 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -75,6 +75,22 @@ struct xe_tile { struct xe_gt primary_gt; /* TODO: Add media GT here */ + + /** + * @mmio: MMIO info for a tile. + * + * Each tile has its own 16MB space in BAR0, laid out as: + * * 0-4MB: registers + * * 4MB-8MB: reserved + * * 8MB-16MB: global GTT + */ + struct { + /** @size: size of tile's MMIO space */ + size_t size; + + /** @regs: pointer to tile's MMIO space (starting with registers) */ + void *regs; + } mmio; }; /** diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 4eefb2b3166c..cd8ada94e688 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -93,6 +93,7 @@ static void ggtt_fini_noalloc(struct drm_device *drm, void *arg) int xe_ggtt_init_noalloc(struct xe_gt *gt, struct xe_ggtt *ggtt) { struct xe_device *xe = gt_to_xe(gt); + struct xe_tile *tile = gt_to_tile(gt); struct pci_dev *pdev = to_pci_dev(xe->drm.dev); unsigned int gsm_size; @@ -106,7 +107,7 @@ int xe_ggtt_init_noalloc(struct xe_gt *gt, struct xe_ggtt *ggtt) return -ENOMEM; } - ggtt->gsm = gt->mmio.regs + SZ_8M; + ggtt->gsm = tile->mmio.regs + SZ_8M; ggtt->size = (gsm_size / 8) * (u64) XE_PAGE_SIZE; if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 11605a99ad66..81e6ab0c77e0 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -124,14 +124,11 @@ struct xe_gt { } info; /** - * @mmio: mmio info for GT, can be subset of the global device mmio - * space + * @mmio: mmio info for GT. All GTs within a tile share the same + * register space, but have their own copy of GSI registers at a + * specific offset, as well as their own forcewake handling. */ struct { - /** @size: size of MMIO space on GT */ - size_t size; - /** @regs: pointer to MMIO space on GT */ - void *regs; /** @fw: force wake for GT */ struct xe_force_wake fw; /** diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 79f902d2faea..b27103080ca9 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -346,6 +346,7 @@ static void xe_mmio_probe_tiles(struct xe_device *xe) if (xe->info.tile_count > 1) { const int mmio_bar = 0; + struct xe_tile *tile; size_t size; void *regs; @@ -359,11 +360,11 @@ static void xe_mmio_probe_tiles(struct xe_device *xe) size = xe->mmio.size / adj_tile_count; regs = xe->mmio.regs; - for_each_gt(gt, xe, id) { - if (id && !xe_gt_is_media_type(gt)) - regs += size; - gt->mmio.size = size; - gt->mmio.regs = regs; + for_each_tile(tile, xe, id) { + tile->mmio.size = size; + tile->mmio.regs = regs; + + regs += size; } } } @@ -379,15 +380,16 @@ static void mmio_fini(struct drm_device *drm, void *arg) int xe_mmio_init(struct xe_device *xe) { + struct xe_tile *root_tile = xe_device_get_root_tile(xe); struct xe_gt *gt = xe_device_get_gt(xe, 0); const int mmio_bar = 0; int err; /* - * Map the entire BAR, which includes registers (0-4MB), reserved space - * (4MB-8MB), and GGTT (8MB-16MB). Other parts of the driver (GTs, - * GGTTs) will derive the pointers they need from the mapping in the - * device structure. + * Map the first 16MB of th BAR, which includes the registers (0-4MB), + * reserved space (4MB-8MB), and GGTT (8MB-16MB) for a single tile. + * This will get remapped later if we determine that we're running + * on a multi-tile system. */ xe->mmio.size = SZ_16M; xe->mmio.regs = pci_iomap(to_pci_dev(xe->drm.dev), mmio_bar, @@ -401,9 +403,9 @@ int xe_mmio_init(struct xe_device *xe) if (err) return err; - /* 1 GT for now, 1 to 1 mapping, may change on multi-GT devices */ - gt->mmio.size = xe->mmio.size; - gt->mmio.regs = xe->mmio.regs; + /* Setup first tile; other tiles (if present) will be setup later. */ + root_tile->mmio.size = xe->mmio.size; + root_tile->mmio.regs = xe->mmio.regs; /* * The boot firmware initializes local memory and assesses its health. diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h index da91729a3854..0ba7aa790f0b 100644 --- a/drivers/gpu/drm/xe/xe_mmio.h +++ b/drivers/gpu/drm/xe/xe_mmio.h @@ -10,6 +10,7 @@ #include #include "regs/xe_reg_defs.h" +#include "xe_device_types.h" #include "xe_gt_types.h" struct drm_device; @@ -22,27 +23,33 @@ int xe_mmio_init(struct xe_device *xe); static inline u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg) { + struct xe_tile *tile = gt_to_tile(gt); + if (reg.addr < gt->mmio.adj_limit) reg.addr += gt->mmio.adj_offset; - return readb(gt->mmio.regs + reg.addr); + return readb(tile->mmio.regs + reg.addr); } static inline void xe_mmio_write32(struct xe_gt *gt, struct xe_reg reg, u32 val) { + struct xe_tile *tile = gt_to_tile(gt); + if (reg.addr < gt->mmio.adj_limit) reg.addr += gt->mmio.adj_offset; - writel(val, gt->mmio.regs + reg.addr); + writel(val, tile->mmio.regs + reg.addr); } static inline u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg) { + struct xe_tile *tile = gt_to_tile(gt); + if (reg.addr < gt->mmio.adj_limit) reg.addr += gt->mmio.adj_offset; - return readl(gt->mmio.regs + reg.addr); + return readl(tile->mmio.regs + reg.addr); } static inline u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr, @@ -60,18 +67,22 @@ static inline u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr, static inline void xe_mmio_write64(struct xe_gt *gt, struct xe_reg reg, u64 val) { + struct xe_tile *tile = gt_to_tile(gt); + if (reg.addr < gt->mmio.adj_limit) reg.addr += gt->mmio.adj_offset; - writeq(val, gt->mmio.regs + reg.addr); + writeq(val, tile->mmio.regs + reg.addr); } static inline u64 xe_mmio_read64(struct xe_gt *gt, struct xe_reg reg) { + struct xe_tile *tile = gt_to_tile(gt); + if (reg.addr < gt->mmio.adj_limit) reg.addr += gt->mmio.adj_offset; - return readq(gt->mmio.regs + reg.addr); + return readq(tile->mmio.regs + reg.addr); } static inline int xe_mmio_write32_and_verify(struct xe_gt *gt, -- cgit v1.2.3 From ad703e06376d5d71acf61cac0c136b53959506bc Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 1 Jun 2023 14:52:21 -0700 Subject: drm/xe: Move GGTT from GT to tile The GGTT exists at the tile level. When a tile contains multiple GTs, they share the same GGTT. v2: - Include some changes that were mis-squashed into the VRAM patch. (Gustavo) Cc: Gustavo Sousa Reviewed-by: Lucas De Marchi Acked-by: Gustavo Sousa Link: https://lore.kernel.org/r/20230601215244.678611-9-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/xe_bo.c | 6 ++--- drivers/gpu/drm/xe/xe_bo_evict.c | 8 +++--- drivers/gpu/drm/xe/xe_device.c | 18 ++++++++++--- drivers/gpu/drm/xe/xe_device_types.h | 8 ++++++ drivers/gpu/drm/xe/xe_ggtt.c | 42 +++++++++++++++-------------- drivers/gpu/drm/xe/xe_ggtt.h | 6 ++--- drivers/gpu/drm/xe/xe_ggtt_types.h | 2 +- drivers/gpu/drm/xe/xe_gt.c | 10 +------ drivers/gpu/drm/xe/xe_gt_debugfs.c | 2 +- drivers/gpu/drm/xe/xe_gt_types.h | 3 --- drivers/gpu/drm/xe/xe_tile.c | 52 ++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_tile.h | 14 ++++++++++ 13 files changed, 125 insertions(+), 47 deletions(-) create mode 100644 drivers/gpu/drm/xe/xe_tile.c create mode 100644 drivers/gpu/drm/xe/xe_tile.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index a685e39d6b44..c914d02d8a8c 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -101,6 +101,7 @@ xe-y += xe_bb.o \ xe_sched_job.o \ xe_step.o \ xe_sync.o \ + xe_tile.o \ xe_trace.o \ xe_ttm_sys_mgr.o \ xe_ttm_stolen_mgr.o \ diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index e766f8955718..915d4c4b15c4 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -964,7 +964,7 @@ static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo) WARN_ON(!list_empty(&bo->vmas)); if (bo->ggtt_node.size) - xe_ggtt_remove_bo(bo->gt->mem.ggtt, bo); + xe_ggtt_remove_bo(gt_to_tile(bo->gt)->mem.ggtt, bo); if (bo->vm && xe_bo_is_user(bo)) xe_vm_put(bo->vm); @@ -1242,9 +1242,9 @@ xe_bo_create_locked_range(struct xe_device *xe, if (flags & XE_BO_CREATE_STOLEN_BIT && flags & XE_BO_FIXED_PLACEMENT_BIT) { - err = xe_ggtt_insert_bo_at(gt->mem.ggtt, bo, start); + err = xe_ggtt_insert_bo_at(gt_to_tile(gt)->mem.ggtt, bo, start); } else { - err = xe_ggtt_insert_bo(gt->mem.ggtt, bo); + err = xe_ggtt_insert_bo(gt_to_tile(gt)->mem.ggtt, bo); } if (err) goto err_unlock_put_bo; diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c index 6642c5f52009..a72963c54bf3 100644 --- a/drivers/gpu/drm/xe/xe_bo_evict.c +++ b/drivers/gpu/drm/xe/xe_bo_evict.c @@ -149,9 +149,11 @@ int xe_bo_restore_kernel(struct xe_device *xe) } if (bo->flags & XE_BO_CREATE_GGTT_BIT) { - mutex_lock(&bo->gt->mem.ggtt->lock); - xe_ggtt_map_bo(bo->gt->mem.ggtt, bo); - mutex_unlock(&bo->gt->mem.ggtt->lock); + struct xe_tile *tile = gt_to_tile(bo->gt); + + mutex_lock(&tile->mem.ggtt->lock); + xe_ggtt_map_bo(tile->mem.ggtt, bo); + mutex_unlock(&tile->mem.ggtt->lock); } /* diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 2c65eb84e6e9..0657842d8db2 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -27,6 +27,7 @@ #include "xe_pcode.h" #include "xe_pm.h" #include "xe_query.h" +#include "xe_tile.h" #include "xe_ttm_stolen_mgr.h" #include "xe_ttm_sys_mgr.h" #include "xe_vm.h" @@ -237,14 +238,19 @@ static void xe_device_sanitize(struct drm_device *drm, void *arg) int xe_device_probe(struct xe_device *xe) { + struct xe_tile *tile; struct xe_gt *gt; int err; u8 id; xe->info.mem_region_mask = 1; - for_each_gt(gt, xe, id) { - err = xe_gt_alloc(xe, gt); + for_each_tile(tile, xe, id) { + err = xe_tile_alloc(tile); + if (err) + return err; + + err = xe_gt_alloc(xe, &tile->primary_gt); if (err) return err; } @@ -275,8 +281,12 @@ int xe_device_probe(struct xe_device *xe) xe_ttm_sys_mgr_init(xe); - for_each_gt(gt, xe, id) { - err = xe_gt_init_noalloc(gt); + for_each_tile(tile, xe, id) { + err = xe_tile_init_noalloc(tile); + if (err) + goto err_irq_shutdown; + + err = xe_gt_init_noalloc(&tile->primary_gt); if (err) goto err_irq_shutdown; } diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 107a947a7361..358b70ae888d 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -17,6 +17,8 @@ #include "xe_platform_types.h" #include "xe_step_types.h" +struct xe_ggtt; + #define XE_BO_INVALID_OFFSET LONG_MAX #define GRAPHICS_VER(xe) ((xe)->info.graphics_verx100 / 100) @@ -91,6 +93,12 @@ struct xe_tile { /** @regs: pointer to tile's MMIO space (starting with registers) */ void *regs; } mmio; + + /** @mem: memory management info for tile */ + struct { + /** @ggtt: Global graphics translation table */ + struct xe_ggtt *ggtt; + } mem; }; /** diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index cd8ada94e688..ff70a01f1591 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -90,24 +90,19 @@ static void ggtt_fini_noalloc(struct drm_device *drm, void *arg) xe_bo_unpin_map_no_vm(ggtt->scratch); } -int xe_ggtt_init_noalloc(struct xe_gt *gt, struct xe_ggtt *ggtt) +int xe_ggtt_init_noalloc(struct xe_ggtt *ggtt) { - struct xe_device *xe = gt_to_xe(gt); - struct xe_tile *tile = gt_to_tile(gt); + struct xe_device *xe = tile_to_xe(ggtt->tile); struct pci_dev *pdev = to_pci_dev(xe->drm.dev); unsigned int gsm_size; - XE_BUG_ON(xe_gt_is_media_type(gt)); - - ggtt->gt = gt; - gsm_size = probe_gsm_size(pdev); if (gsm_size == 0) { drm_err(&xe->drm, "Hardware reported no preallocated GSM\n"); return -ENOMEM; } - ggtt->gsm = tile->mmio.regs + SZ_8M; + ggtt->gsm = ggtt->tile->mmio.regs + SZ_8M; ggtt->size = (gsm_size / 8) * (u64) XE_PAGE_SIZE; if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) @@ -143,19 +138,20 @@ static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt) u64 start, end; /* Display may have allocated inside ggtt, so be careful with clearing here */ - xe_device_mem_access_get(gt_to_xe(ggtt->gt)); + xe_device_mem_access_get(tile_to_xe(ggtt->tile)); mutex_lock(&ggtt->lock); drm_mm_for_each_hole(hole, &ggtt->mm, start, end) xe_ggtt_clear(ggtt, start, end - start); - xe_ggtt_invalidate(ggtt->gt); + xe_ggtt_invalidate(ggtt); mutex_unlock(&ggtt->lock); - xe_device_mem_access_put(gt_to_xe(ggtt->gt)); + xe_device_mem_access_put(tile_to_xe(ggtt->tile)); } -int xe_ggtt_init(struct xe_gt *gt, struct xe_ggtt *ggtt) +int xe_ggtt_init(struct xe_ggtt *ggtt) { - struct xe_device *xe = gt_to_xe(gt); + struct xe_device *xe = tile_to_xe(ggtt->tile); + struct xe_gt *gt = &ggtt->tile->primary_gt; unsigned int flags; int err; @@ -195,8 +191,14 @@ err: #define PVC_GUC_TLB_INV_DESC1 XE_REG(0xcf80) #define PVC_GUC_TLB_INV_DESC1_INVALIDATE REG_BIT(6) -void xe_ggtt_invalidate(struct xe_gt *gt) +void xe_ggtt_invalidate(struct xe_ggtt *ggtt) { + /* + * TODO: Loop over each GT in tile once media GT support is + * re-added + */ + struct xe_gt *gt = &ggtt->tile->primary_gt; + /* TODO: vfunc for GuC vs. non-GuC */ if (gt->uc.guc.submission_state.enabled) { @@ -269,7 +271,7 @@ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) xe_ggtt_set_pte(ggtt, start + offset, pte); } - xe_ggtt_invalidate(ggtt->gt); + xe_ggtt_invalidate(ggtt); } static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, @@ -287,14 +289,14 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, if (err) return err; - xe_device_mem_access_get(gt_to_xe(ggtt->gt)); + xe_device_mem_access_get(tile_to_xe(ggtt->tile)); mutex_lock(&ggtt->lock); err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node, bo->size, alignment, 0, start, end, 0); if (!err) xe_ggtt_map_bo(ggtt, bo); mutex_unlock(&ggtt->lock); - xe_device_mem_access_put(gt_to_xe(ggtt->gt)); + xe_device_mem_access_put(tile_to_xe(ggtt->tile)); return err; } @@ -323,17 +325,17 @@ int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node) { - xe_device_mem_access_get(gt_to_xe(ggtt->gt)); + xe_device_mem_access_get(tile_to_xe(ggtt->tile)); mutex_lock(&ggtt->lock); xe_ggtt_clear(ggtt, node->start, node->size); drm_mm_remove_node(node); node->size = 0; - xe_ggtt_invalidate(ggtt->gt); + xe_ggtt_invalidate(ggtt); mutex_unlock(&ggtt->lock); - xe_device_mem_access_put(gt_to_xe(ggtt->gt)); + xe_device_mem_access_put(tile_to_xe(ggtt->tile)); } void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h index 3469aa2b1a02..8e7360926bea 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.h +++ b/drivers/gpu/drm/xe/xe_ggtt.h @@ -12,9 +12,9 @@ struct drm_printer; u64 xe_ggtt_pte_encode(struct xe_bo *bo, u64 bo_offset); void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte); -void xe_ggtt_invalidate(struct xe_gt *gt); -int xe_ggtt_init_noalloc(struct xe_gt *gt, struct xe_ggtt *ggtt); -int xe_ggtt_init(struct xe_gt *gt, struct xe_ggtt *ggtt); +void xe_ggtt_invalidate(struct xe_ggtt *ggtt); +int xe_ggtt_init_noalloc(struct xe_ggtt *ggtt); +int xe_ggtt_init(struct xe_ggtt *ggtt); void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix); int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h index ea70aaef4b31..d34b3e733945 100644 --- a/drivers/gpu/drm/xe/xe_ggtt_types.h +++ b/drivers/gpu/drm/xe/xe_ggtt_types.h @@ -12,7 +12,7 @@ struct xe_bo; struct xe_gt; struct xe_ggtt { - struct xe_gt *gt; + struct xe_tile *tile; u64 size; diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 18eda5b1377f..0f07f810bb1f 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -67,11 +67,6 @@ int xe_gt_alloc(struct xe_device *xe, struct xe_gt *gt) XE_BUG_ON(gt->info.type == XE_GT_TYPE_UNINITIALIZED); if (!xe_gt_is_media_type(gt)) { - gt->mem.ggtt = drmm_kzalloc(drm, sizeof(*gt->mem.ggtt), - GFP_KERNEL); - if (!gt->mem.ggtt) - return -ENOMEM; - gt->mem.vram_mgr = drmm_kzalloc(drm, sizeof(*gt->mem.vram_mgr), GFP_KERNEL); if (!gt->mem.vram_mgr) @@ -80,7 +75,6 @@ int xe_gt_alloc(struct xe_device *xe, struct xe_gt *gt) } else { struct xe_gt *full_gt = xe_find_full_gt(gt); - gt->mem.ggtt = full_gt->mem.ggtt; gt->mem.vram_mgr = full_gt->mem.vram_mgr; } @@ -354,8 +348,6 @@ int xe_gt_init_noalloc(struct xe_gt *gt) if (err) goto err_force_wake; - err = xe_ggtt_init_noalloc(gt, gt->mem.ggtt); - err_force_wake: err2 = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); XE_WARN_ON(err2); @@ -376,7 +368,7 @@ static int gt_fw_domain_init(struct xe_gt *gt) xe_pat_init(gt); if (!xe_gt_is_media_type(gt)) { - err = xe_ggtt_init(gt, gt->mem.ggtt); + err = xe_ggtt_init(gt_to_tile(gt)->mem.ggtt); if (err) goto err_force_wake; } diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index 339ecd5fad9b..1114254bc519 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -98,7 +98,7 @@ static int ggtt(struct seq_file *m, void *data) struct xe_gt *gt = node_to_gt(m->private); struct drm_printer p = drm_seq_file_printer(m); - return xe_ggtt_dump(gt->mem.ggtt, &p); + return xe_ggtt_dump(gt_to_tile(gt)->mem.ggtt, &p); } static int register_save_restore(struct seq_file *m, void *data) diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 81e6ab0c77e0..c06a0b27d6fc 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -14,7 +14,6 @@ #include "xe_uc_types.h" struct xe_engine_ops; -struct xe_ggtt; struct xe_migrate; struct xe_ring_ops; struct xe_ttm_gtt_mgr; @@ -176,8 +175,6 @@ struct xe_gt { } vram; /** @vram_mgr: VRAM TTM manager */ struct xe_ttm_vram_mgr *vram_mgr; - /** @ggtt: Global graphics translation table */ - struct xe_ggtt *ggtt; } mem; /** @reset: state for GT resets */ diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c new file mode 100644 index 000000000000..7ef594f301ca --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tile.c @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include + +#include "xe_device.h" +#include "xe_ggtt.h" +#include "xe_tile.h" +#include "xe_ttm_vram_mgr.h" + +/** + * xe_tile_alloc - Perform per-tile memory allocation + * @tile: Tile to perform allocations for + * + * Allocates various per-tile data structures using DRM-managed allocations. + * Does not touch the hardware. + * + * Returns -ENOMEM if allocations fail, otherwise 0. + */ +int xe_tile_alloc(struct xe_tile *tile) +{ + struct drm_device *drm = &tile_to_xe(tile)->drm; + + tile->mem.ggtt = drmm_kzalloc(drm, sizeof(*tile->mem.ggtt), + GFP_KERNEL); + if (!tile->mem.ggtt) + return -ENOMEM; + tile->mem.ggtt->tile = tile; + + return 0; +} + +/** + * xe_tile_init_noalloc - Init tile up to the point where allocations can happen. + * @tile: The tile to initialize. + * + * This function prepares the tile to allow memory allocations to VRAM, but is + * not allowed to allocate memory itself. This state is useful for display + * readout, because the inherited display framebuffer will otherwise be + * overwritten as it is usually put at the start of VRAM. + * + * Note that since this is tile initialization, it should not perform any + * GT-specific operations, and thus does not need to hold GT forcewake. + * + * Returns: 0 on success, negative error code on error. + */ +int xe_tile_init_noalloc(struct xe_tile *tile) +{ + return xe_ggtt_init_noalloc(tile->mem.ggtt); +} diff --git a/drivers/gpu/drm/xe/xe_tile.h b/drivers/gpu/drm/xe/xe_tile.h new file mode 100644 index 000000000000..77529ea136a6 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tile.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_TILE_H_ +#define _XE_TILE_H_ + +struct xe_tile; + +int xe_tile_alloc(struct xe_tile *tile); +int xe_tile_init_noalloc(struct xe_tile *tile); + +#endif -- cgit v1.2.3 From ebd288cba7db7097ad50a4736ded94cb0d92fadf Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 1 Jun 2023 14:52:23 -0700 Subject: drm/xe: Move VRAM from GT to tile On platforms with VRAM, the VRAM is associated with the tile, not the GT. v2: - Unsquash the GGTT handling back into its own patch. - Fix kunit test build v3: - Tweak the "FIXME" comment to clarify that this function will be completely gone by the end of the series. (Lucas) v4: - Move a few changes that were supposed to be part of the GGTT patch back to that commit. (Gustavo) v5: - Kerneldoc parameter name fix. Cc: Gustavo Sousa Reviewed-by: Lucas De Marchi Acked-by: Gustavo Sousa Link: https://lore.kernel.org/r/20230601215244.678611-11-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_bo.c | 6 +-- drivers/gpu/drm/xe/xe_bo.c | 44 ++++++++-------- drivers/gpu/drm/xe/xe_bo.h | 4 +- drivers/gpu/drm/xe/xe_device.c | 4 -- drivers/gpu/drm/xe/xe_device_types.h | 30 +++++++++++ drivers/gpu/drm/xe/xe_gt.c | 82 ++---------------------------- drivers/gpu/drm/xe/xe_gt.h | 1 - drivers/gpu/drm/xe/xe_gt_pagefault.c | 6 ++- drivers/gpu/drm/xe/xe_gt_types.h | 37 -------------- drivers/gpu/drm/xe/xe_irq.c | 2 +- drivers/gpu/drm/xe/xe_mmio.c | 53 ++++++++++--------- drivers/gpu/drm/xe/xe_mmio.h | 2 +- drivers/gpu/drm/xe/xe_pci.c | 2 - drivers/gpu/drm/xe/xe_query.c | 4 +- drivers/gpu/drm/xe/xe_res_cursor.h | 2 +- drivers/gpu/drm/xe/xe_tile.c | 33 +++++++++++- drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 18 ++++--- drivers/gpu/drm/xe/xe_ttm_vram_mgr.c | 16 +++--- drivers/gpu/drm/xe/xe_ttm_vram_mgr.h | 4 +- drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h | 6 +-- 20 files changed, 151 insertions(+), 205 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index 8f3afdc6cca6..6235a6c73a06 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -115,9 +115,9 @@ static void ccs_test_run_gt(struct xe_device *xe, struct xe_gt *gt, int ret; /* TODO: Sanity check */ - vram_bit = XE_BO_CREATE_VRAM0_BIT << gt->info.vram_id; + vram_bit = XE_BO_CREATE_VRAM0_BIT << gt_to_tile(gt)->id; kunit_info(test, "Testing gt id %u vram id %u\n", gt->info.id, - gt->info.vram_id); + gt_to_tile(gt)->id); bo = xe_bo_create_locked(xe, NULL, NULL, SZ_1M, ttm_bo_type_device, vram_bit); @@ -179,7 +179,7 @@ static int evict_test_run_gt(struct xe_device *xe, struct xe_gt *gt, struct kuni int err, i; kunit_info(test, "Testing device %s gt id %u vram id %u\n", - dev_name(xe->drm.dev), gt->info.id, gt->info.vram_id); + dev_name(xe->drm.dev), gt->info.id, gt_to_tile(gt)->id); for (i = 0; i < 2; ++i) { xe_vm_lock(vm, &ww, 0, false); diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 915d4c4b15c4..8ee6bad59a75 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -71,25 +71,25 @@ static bool xe_bo_is_user(struct xe_bo *bo) return bo->flags & XE_BO_CREATE_USER_BIT; } -static struct xe_gt * -mem_type_to_gt(struct xe_device *xe, u32 mem_type) +static struct xe_tile * +mem_type_to_tile(struct xe_device *xe, u32 mem_type) { XE_BUG_ON(mem_type != XE_PL_STOLEN && !mem_type_is_vram(mem_type)); - return xe_device_get_gt(xe, mem_type == XE_PL_STOLEN ? 0 : (mem_type - XE_PL_VRAM0)); + return &xe->tiles[mem_type == XE_PL_STOLEN ? 0 : (mem_type - XE_PL_VRAM0)]; } /** - * xe_bo_to_gt() - Get a GT from a BO's memory location + * xe_bo_to_tile() - Get a tile from a BO's memory location * @bo: The buffer object * - * Get a GT from a BO's memory location, should be called on BOs in VRAM only. + * Get a tile from a BO's memory location, should be called on BOs in VRAM only. * - * Return: xe_gt object which is closest to the BO + * Return: xe_tile object which is closest to the BO */ -struct xe_gt *xe_bo_to_gt(struct xe_bo *bo) +struct xe_tile *xe_bo_to_tile(struct xe_bo *bo) { - return mem_type_to_gt(xe_bo_device(bo), bo->ttm.resource->mem_type); + return mem_type_to_tile(xe_bo_device(bo), bo->ttm.resource->mem_type); } static void try_add_system(struct xe_bo *bo, struct ttm_place *places, @@ -109,9 +109,9 @@ static void try_add_system(struct xe_bo *bo, struct ttm_place *places, static void add_vram(struct xe_device *xe, struct xe_bo *bo, struct ttm_place *places, u32 bo_flags, u32 mem_type, u32 *c) { - struct xe_gt *gt = mem_type_to_gt(xe, mem_type); + struct xe_tile *tile = mem_type_to_tile(xe, mem_type); - XE_BUG_ON(!gt->mem.vram.size); + XE_BUG_ON(!tile->mem.vram.size); places[*c] = (struct ttm_place) { .mem_type = mem_type, @@ -362,7 +362,7 @@ static int xe_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resource *mem) { struct xe_device *xe = ttm_to_xe_device(bdev); - struct xe_gt *gt; + struct xe_tile *tile; switch (mem->mem_type) { case XE_PL_SYSTEM: @@ -370,15 +370,15 @@ static int xe_ttm_io_mem_reserve(struct ttm_device *bdev, return 0; case XE_PL_VRAM0: case XE_PL_VRAM1: - gt = mem_type_to_gt(xe, mem->mem_type); + tile = mem_type_to_tile(xe, mem->mem_type); mem->bus.offset = mem->start << PAGE_SHIFT; - if (gt->mem.vram.mapping && + if (tile->mem.vram.mapping && mem->placement & TTM_PL_FLAG_CONTIGUOUS) - mem->bus.addr = (u8 *)gt->mem.vram.mapping + + mem->bus.addr = (u8 *)tile->mem.vram.mapping + mem->bus.offset; - mem->bus.offset += gt->mem.vram.io_start; + mem->bus.offset += tile->mem.vram.io_start; mem->bus.is_iomem = true; #if !defined(CONFIG_X86) @@ -638,9 +638,9 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, if (bo->gt) gt = bo->gt; else if (resource_is_vram(new_mem)) - gt = mem_type_to_gt(xe, new_mem->mem_type); + gt = &mem_type_to_tile(xe, new_mem->mem_type)->primary_gt; else if (resource_is_vram(old_mem)) - gt = mem_type_to_gt(xe, old_mem->mem_type); + gt = &mem_type_to_tile(xe, old_mem->mem_type)->primary_gt; XE_BUG_ON(!gt); XE_BUG_ON(!gt->migrate); @@ -664,7 +664,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, /* Create a new VMAP once kernel BO back in VRAM */ if (!ret && resource_is_vram(new_mem)) { - void *new_addr = gt->mem.vram.mapping + + void *new_addr = gt_to_tile(gt)->mem.vram.mapping + (new_mem->start << PAGE_SHIFT); if (XE_WARN_ON(new_mem->start == XE_BO_INVALID_OFFSET)) { @@ -836,14 +836,14 @@ static unsigned long xe_ttm_io_mem_pfn(struct ttm_buffer_object *ttm_bo, { struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev); struct xe_bo *bo = ttm_to_xe_bo(ttm_bo); - struct xe_gt *gt = mem_type_to_gt(xe, ttm_bo->resource->mem_type); + struct xe_tile *tile = mem_type_to_tile(xe, ttm_bo->resource->mem_type); struct xe_res_cursor cursor; if (ttm_bo->resource->mem_type == XE_PL_STOLEN) return xe_ttm_stolen_io_offset(bo, page_offset << PAGE_SHIFT) >> PAGE_SHIFT; xe_res_first(ttm_bo->resource, (u64)page_offset << PAGE_SHIFT, 0, &cursor); - return (gt->mem.vram.io_start + cursor.start) >> PAGE_SHIFT; + return (tile->mem.vram.io_start + cursor.start) >> PAGE_SHIFT; } static void __xe_bo_vunmap(struct xe_bo *bo); @@ -1344,12 +1344,12 @@ struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_gt *gt, uint64_t vram_region_gpu_offset(struct ttm_resource *res) { struct xe_device *xe = ttm_to_xe_device(res->bo->bdev); - struct xe_gt *gt = mem_type_to_gt(xe, res->mem_type); + struct xe_tile *tile = mem_type_to_tile(xe, res->mem_type); if (res->mem_type == XE_PL_STOLEN) return xe_ttm_stolen_gpu_offset(xe); - return xe->mem.vram.base + gt->mem.vram.base; + return xe->mem.vram.base + tile->mem.vram.base; } /** diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index e6d08fa9c992..6e29e45a90f2 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -22,7 +22,7 @@ /* -- */ #define XE_BO_CREATE_STOLEN_BIT BIT(4) #define XE_BO_CREATE_VRAM_IF_DGFX(gt) \ - (IS_DGFX(gt_to_xe(gt)) ? XE_BO_CREATE_VRAM0_BIT << gt->info.vram_id : \ + (IS_DGFX(gt_to_xe(gt)) ? XE_BO_CREATE_VRAM0_BIT << gt_to_tile(gt)->id : \ XE_BO_CREATE_SYSTEM_BIT) #define XE_BO_CREATE_GGTT_BIT BIT(5) #define XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT BIT(6) @@ -108,7 +108,7 @@ struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_gt *gt, int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo, u32 bo_flags); -struct xe_gt *xe_bo_to_gt(struct xe_bo *bo); +struct xe_tile *xe_bo_to_tile(struct xe_bo *bo); static inline struct xe_bo *ttm_to_xe_bo(const struct ttm_buffer_object *bo) { diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 0657842d8db2..50ce4e97299e 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -285,10 +285,6 @@ int xe_device_probe(struct xe_device *xe) err = xe_tile_init_noalloc(tile); if (err) goto err_irq_shutdown; - - err = xe_gt_init_noalloc(&tile->primary_gt); - if (err) - goto err_irq_shutdown; } /* Allocate and map stolen after potential VRAM resize */ diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 358b70ae888d..9382d7f62f03 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -96,6 +96,36 @@ struct xe_tile { /** @mem: memory management info for tile */ struct { + /** + * @vram: VRAM info for tile. + * + * Although VRAM is associated with a specific tile, it can + * still be accessed by all tiles' GTs. + */ + struct { + /** @io_start: IO start address of this VRAM instance */ + resource_size_t io_start; + /** + * @io_size: IO size of this VRAM instance + * + * This represents how much of this VRAM we can access + * via the CPU through the VRAM BAR. This can be smaller + * than @size, in which case only part of VRAM is CPU + * accessible (typically the first 256M). This + * configuration is known as small-bar. + */ + resource_size_t io_size; + /** @base: offset of VRAM starting base */ + resource_size_t base; + /** @size: size of VRAM. */ + resource_size_t size; + /** @mapping: pointer to VRAM mappable space */ + void *__iomem mapping; + } vram; + + /** @vram_mgr: VRAM TTM manager */ + struct xe_ttm_vram_mgr *vram_mgr; + /** @ggtt: Global graphics translation table */ struct xe_ggtt *ggtt; } mem; diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 0f07f810bb1f..419fc471053c 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -37,7 +37,6 @@ #include "xe_ring_ops.h" #include "xe_sa.h" #include "xe_sched_job.h" -#include "xe_ttm_vram_mgr.h" #include "xe_tuning.h" #include "xe_uc.h" #include "xe_vm.h" @@ -46,58 +45,22 @@ struct xe_gt *xe_find_full_gt(struct xe_gt *gt) { - struct xe_gt *search; - u8 id; - - XE_BUG_ON(!xe_gt_is_media_type(gt)); - - for_each_gt(search, gt_to_xe(gt), id) { - if (search->info.vram_id == gt->info.vram_id) - return search; - } - - XE_BUG_ON("NOT POSSIBLE"); - return NULL; + /* + * FIXME: Once the code is prepared for re-enabling, this function will + * be gone. Just return the only possible gt for now. + */ + return gt; } int xe_gt_alloc(struct xe_device *xe, struct xe_gt *gt) { - struct drm_device *drm = &xe->drm; - XE_BUG_ON(gt->info.type == XE_GT_TYPE_UNINITIALIZED); - if (!xe_gt_is_media_type(gt)) { - gt->mem.vram_mgr = drmm_kzalloc(drm, sizeof(*gt->mem.vram_mgr), - GFP_KERNEL); - if (!gt->mem.vram_mgr) - return -ENOMEM; - - } else { - struct xe_gt *full_gt = xe_find_full_gt(gt); - - gt->mem.vram_mgr = full_gt->mem.vram_mgr; - } - gt->ordered_wq = alloc_ordered_workqueue("gt-ordered-wq", 0); return 0; } -static int gt_ttm_mgr_init(struct xe_gt *gt) -{ - struct xe_device *xe = gt_to_xe(gt); - int err; - - if (gt->mem.vram.size) { - err = xe_ttm_vram_mgr_init(gt, gt->mem.vram_mgr); - if (err) - return err; - xe->info.mem_region_mask |= BIT(gt->info.vram_id) << 1; - } - - return 0; -} - void xe_gt_sanitize(struct xe_gt *gt) { /* @@ -321,41 +284,6 @@ int xe_gt_init_early(struct xe_gt *gt) return 0; } -/** - * xe_gt_init_noalloc - Init GT up to the point where allocations can happen. - * @gt: The GT to initialize. - * - * This function prepares the GT to allow memory allocations to VRAM, but is not - * allowed to allocate memory itself. This state is useful for display readout, - * because the inherited display framebuffer will otherwise be overwritten as it - * is usually put at the start of VRAM. - * - * Returns: 0 on success, negative error code on error. - */ -int xe_gt_init_noalloc(struct xe_gt *gt) -{ - int err, err2; - - if (xe_gt_is_media_type(gt)) - return 0; - - xe_device_mem_access_get(gt_to_xe(gt)); - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (err) - goto err; - - err = gt_ttm_mgr_init(gt); - if (err) - goto err_force_wake; - -err_force_wake: - err2 = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); - XE_WARN_ON(err2); - xe_device_mem_access_put(gt_to_xe(gt)); -err: - return err; -} - static int gt_fw_domain_init(struct xe_gt *gt) { int err, i; diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h index f4e98f499b36..27e913e9a43a 100644 --- a/drivers/gpu/drm/xe/xe_gt.h +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -18,7 +18,6 @@ int xe_gt_alloc(struct xe_device *xe, struct xe_gt *gt); int xe_gt_init_early(struct xe_gt *gt); -int xe_gt_init_noalloc(struct xe_gt *gt); int xe_gt_init(struct xe_gt *gt); int xe_gt_record_default_lrcs(struct xe_gt *gt); void xe_gt_suspend_prepare(struct xe_gt *gt); diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 1677640e1075..f4f3d95ae6b1 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -107,6 +107,7 @@ static struct xe_vma *lookup_vma(struct xe_vm *vm, u64 page_addr) static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf) { struct xe_device *xe = gt_to_xe(gt); + struct xe_tile *tile = gt_to_tile(gt); struct xe_vm *vm; struct xe_vma *vma = NULL; struct xe_bo *bo; @@ -195,7 +196,7 @@ retry_userptr: } /* Migrate to VRAM, move should invalidate the VMA first */ - ret = xe_bo_migrate(bo, XE_PL_VRAM0 + gt->info.vram_id); + ret = xe_bo_migrate(bo, XE_PL_VRAM0 + tile->id); if (ret) goto unlock_dma_resv; } else if (bo) { @@ -498,6 +499,7 @@ static struct xe_vma *get_acc_vma(struct xe_vm *vm, struct acc *acc) static int handle_acc(struct xe_gt *gt, struct acc *acc) { struct xe_device *xe = gt_to_xe(gt); + struct xe_tile *tile = gt_to_tile(gt); struct xe_vm *vm; struct xe_vma *vma; struct xe_bo *bo; @@ -553,7 +555,7 @@ static int handle_acc(struct xe_gt *gt, struct acc *acc) goto unlock_vm; /* Migrate to VRAM, move should invalidate the VMA first */ - ret = xe_bo_migrate(bo, XE_PL_VRAM0 + gt->info.vram_id); + ret = xe_bo_migrate(bo, XE_PL_VRAM0 + tile->id); if (only_needs_bo_lock(bo)) xe_bo_unlock(bo, &ww); diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index c06a0b27d6fc..a040ec896e70 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -16,8 +16,6 @@ struct xe_engine_ops; struct xe_migrate; struct xe_ring_ops; -struct xe_ttm_gtt_mgr; -struct xe_ttm_vram_mgr; enum xe_gt_type { XE_GT_TYPE_UNINITIALIZED, @@ -108,8 +106,6 @@ struct xe_gt { enum xe_gt_type type; /** @id: id of GT */ u8 id; - /** @vram: id of the VRAM for this GT */ - u8 vram_id; /** @clock_freq: clock frequency */ u32 clock_freq; /** @engine_mask: mask of engines present on GT */ @@ -144,39 +140,6 @@ struct xe_gt { */ struct xe_reg_sr reg_sr; - /** - * @mem: memory management info for GT, multiple GTs can point to same - * objects (virtual split) - */ - struct { - /** - * @vram: VRAM info for GT, multiple GTs can point to same info - * (virtual split), can be subset of global device VRAM - */ - struct { - /** @io_start: IO start address of this VRAM instance */ - resource_size_t io_start; - /** - * @io_size: IO size of this VRAM instance - * - * This represents how much of the VRAM the CPU can access - * via the VRAM BAR. - * This can be smaller than the actual @size, in which - * case only part of VRAM is CPU accessible (typically - * the first 256M). This configuration is known as small-bar. - */ - resource_size_t io_size; - /** @base: offset of VRAM starting base */ - resource_size_t base; - /** @size: size of VRAM. */ - resource_size_t size; - /** @mapping: pointer to VRAM mappable space */ - void *__iomem mapping; - } vram; - /** @vram_mgr: VRAM TTM manager */ - struct xe_ttm_vram_mgr *vram_mgr; - } mem; - /** @reset: state for GT resets */ struct { /** diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 1c26ec5ab4f0..e9614e90efaf 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -364,7 +364,7 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg) } for_each_gt(gt, xe, id) { - if ((master_tile_ctl & DG1_MSTR_TILE(gt->info.vram_id)) == 0) + if ((master_tile_ctl & DG1_MSTR_TILE(gt_to_tile(gt)->id)) == 0) continue; if (!xe_gt_is_media_type(gt)) diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index b27103080ca9..86f010ac9ccf 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -189,7 +189,7 @@ static int xe_determine_lmem_bar_size(struct xe_device *xe) /** * xe_mmio_tile_vram_size() - Collect vram size and offset information - * @gt: tile to get info for + * @tile: tile to get info for * @vram_size: available vram (size - device reserved portions) * @tile_size: actual vram size * @tile_offset: physical start point in the vram address space @@ -206,8 +206,10 @@ static int xe_determine_lmem_bar_size(struct xe_device *xe) * NOTE: multi-tile bases will include the tile offset. * */ -int xe_mmio_tile_vram_size(struct xe_gt *gt, u64 *vram_size, u64 *tile_size, u64 *tile_offset) +int xe_mmio_tile_vram_size(struct xe_tile *tile, u64 *vram_size, u64 *tile_size, u64 *tile_offset) { + struct xe_device *xe = tile_to_xe(tile); + struct xe_gt *gt = &tile->primary_gt; u64 offset; int err; u32 reg; @@ -217,8 +219,8 @@ int xe_mmio_tile_vram_size(struct xe_gt *gt, u64 *vram_size, u64 *tile_size, u64 return err; /* actual size */ - if (unlikely(gt_to_xe(gt)->info.platform == XE_DG1)) { - *tile_size = pci_resource_len(to_pci_dev(gt_to_xe(gt)->drm.dev), GEN12_LMEM_BAR); + if (unlikely(xe->info.platform == XE_DG1)) { + *tile_size = pci_resource_len(to_pci_dev(xe->drm.dev), GEN12_LMEM_BAR); *tile_offset = 0; } else { reg = xe_gt_mcr_unicast_read_any(gt, XEHP_TILE_ADDR_RANGE(gt->info.id)); @@ -227,7 +229,7 @@ int xe_mmio_tile_vram_size(struct xe_gt *gt, u64 *vram_size, u64 *tile_size, u64 } /* minus device usage */ - if (gt_to_xe(gt)->info.has_flat_ccs) { + if (xe->info.has_flat_ccs) { reg = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR); offset = (u64)REG_FIELD_GET(GENMASK(31, 8), reg) * SZ_64K; } else { @@ -242,10 +244,10 @@ int xe_mmio_tile_vram_size(struct xe_gt *gt, u64 *vram_size, u64 *tile_size, u64 int xe_mmio_probe_vram(struct xe_device *xe) { + struct xe_tile *tile; resource_size_t io_size; u64 available_size = 0; u64 total_size = 0; - struct xe_gt *gt; u64 tile_offset; u64 tile_size; u64 vram_size; @@ -255,9 +257,9 @@ int xe_mmio_probe_vram(struct xe_device *xe) if (!IS_DGFX(xe)) return 0; - /* Get the size of the gt0 vram for later accessibility comparison */ - gt = xe_device_get_gt(xe, 0); - err = xe_mmio_tile_vram_size(gt, &vram_size, &tile_size, &tile_offset); + /* Get the size of the root tile's vram for later accessibility comparison */ + tile = xe_device_get_root_tile(xe); + err = xe_mmio_tile_vram_size(tile, &vram_size, &tile_size, &tile_offset); if (err) return err; @@ -265,7 +267,7 @@ int xe_mmio_probe_vram(struct xe_device *xe) if (err) return err; - /* small bar issues will only cover gt0 sizes */ + /* small bar issues will only cover root tile sizes */ if (xe->mem.vram.io_size < vram_size) drm_warn(&xe->drm, "Restricting VRAM size to PCI resource size (0x%llx->0x%llx)\n", vram_size, (u64)xe->mem.vram.io_size); @@ -275,35 +277,32 @@ int xe_mmio_probe_vram(struct xe_device *xe) io_size = xe->mem.vram.io_size; - /* gt specific ranges */ - for_each_gt(gt, xe, id) { - if (xe_gt_is_media_type(gt)) - continue; - - err = xe_mmio_tile_vram_size(gt, &vram_size, &tile_size, &tile_offset); + /* tile specific ranges */ + for_each_tile(tile, xe, id) { + err = xe_mmio_tile_vram_size(tile, &vram_size, &tile_size, &tile_offset); if (err) return err; - gt->mem.vram.io_start = xe->mem.vram.io_start + tile_offset; - gt->mem.vram.io_size = min_t(u64, vram_size, io_size); + tile->mem.vram.io_start = xe->mem.vram.io_start + tile_offset; + tile->mem.vram.io_size = min_t(u64, vram_size, io_size); - if (!gt->mem.vram.io_size) { + if (!tile->mem.vram.io_size) { drm_err(&xe->drm, "Tile without any CPU visible VRAM. Aborting.\n"); return -ENODEV; } - gt->mem.vram.base = tile_offset; + tile->mem.vram.base = tile_offset; /* small bar can limit the visible size. size accordingly */ - gt->mem.vram.size = min_t(u64, vram_size, io_size); - gt->mem.vram.mapping = xe->mem.vram.mapping + tile_offset; + tile->mem.vram.size = min_t(u64, vram_size, io_size); + tile->mem.vram.mapping = xe->mem.vram.mapping + tile_offset; - drm_info(&xe->drm, "VRAM[%u, %u]: %pa, %pa\n", id, gt->info.vram_id, - >->mem.vram.io_start, >->mem.vram.size); + drm_info(&xe->drm, "VRAM[%u, %u]: %pa, %pa\n", id, tile->id, + &tile->mem.vram.io_start, &tile->mem.vram.size); - if (gt->mem.vram.io_size < gt->mem.vram.size) + if (tile->mem.vram.io_size < tile->mem.vram.size) drm_info(&xe->drm, "VRAM[%u, %u]: CPU access limited to %pa\n", id, - gt->info.vram_id, >->mem.vram.io_size); + tile->id, &tile->mem.vram.io_size); /* calculate total size using tile size to get the correct HW sizing */ total_size += tile_size; @@ -329,7 +328,7 @@ int xe_mmio_probe_vram(struct xe_device *xe) static void xe_mmio_probe_tiles(struct xe_device *xe) { - struct xe_gt *gt = xe_device_get_gt(xe, 0); + struct xe_gt *gt = &xe_device_get_root_tile(xe)->primary_gt; u32 mtcfg; u8 adj_tile_count; u8 id; diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h index 0ba7aa790f0b..3c547d78afba 100644 --- a/drivers/gpu/drm/xe/xe_mmio.h +++ b/drivers/gpu/drm/xe/xe_mmio.h @@ -144,6 +144,6 @@ static inline bool xe_mmio_in_range(const struct xe_mmio_range *range, } int xe_mmio_probe_vram(struct xe_device *xe); -int xe_mmio_tile_vram_size(struct xe_gt *gt, u64 *vram_size, u64 *tile_size, u64 *tile_base); +int xe_mmio_tile_vram_size(struct xe_tile *tile, u64 *vram_size, u64 *tile_size, u64 *tile_base); #endif diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index e8931661c004..b91d52205feb 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -547,7 +547,6 @@ static int xe_info_init(struct xe_device *xe, if (id == 0) { gt->info.type = XE_GT_TYPE_MAIN; - gt->info.vram_id = id; gt->info.__engine_mask = graphics_desc->hw_engine_mask; if (MEDIA_VER(xe) < 13 && media_desc) @@ -557,7 +556,6 @@ static int xe_info_init(struct xe_device *xe, gt->mmio.adj_offset = 0; } else { gt->info.type = desc->extra_gts[id - 1].type; - gt->info.vram_id = desc->extra_gts[id - 1].vram_id; gt->info.__engine_mask = (gt->info.type == XE_GT_TYPE_MEDIA) ? media_desc->hw_engine_mask : graphics_desc->hw_engine_mask; diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index b10959fde43b..799bf68800e7 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -182,7 +182,7 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query) config->num_params = num_params; config->info[XE_QUERY_CONFIG_REV_AND_DEVICE_ID] = xe->info.devid | (xe->info.revid << 16); - if (to_gt(xe)->mem.vram.size) + if (xe_device_get_root_tile(xe)->mem.vram.size) config->info[XE_QUERY_CONFIG_FLAGS] = XE_QUERY_CONFIG_FLAGS_HAS_VRAM; if (xe->info.enable_guc) @@ -242,7 +242,7 @@ static int query_gts(struct xe_device *xe, struct drm_xe_device_query *query) gts->gts[id].native_mem_regions = 0x1; else gts->gts[id].native_mem_regions = - BIT(gt->info.vram_id) << 1; + BIT(gt_to_tile(gt)->id) << 1; gts->gts[id].slow_mem_regions = xe->info.mem_region_mask ^ gts->gts[id].native_mem_regions; } diff --git a/drivers/gpu/drm/xe/xe_res_cursor.h b/drivers/gpu/drm/xe/xe_res_cursor.h index 4e99fae26b4c..f2ba609712d3 100644 --- a/drivers/gpu/drm/xe/xe_res_cursor.h +++ b/drivers/gpu/drm/xe/xe_res_cursor.h @@ -53,7 +53,7 @@ static struct drm_buddy *xe_res_get_buddy(struct ttm_resource *res) struct xe_device *xe = ttm_to_xe_device(res->bo->bdev); if (res->mem_type != XE_PL_STOLEN) { - return &xe_device_get_gt(xe, res->mem_type - XE_PL_VRAM0)->mem.vram_mgr->mm; + return &xe->tiles[res->mem_type - XE_PL_VRAM0].mem.vram_mgr->mm; } else { struct ttm_resource_manager *mgr = ttm_manager_type(&xe->ttm, XE_PL_STOLEN); diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c index 7ef594f301ca..5530a6b6ef31 100644 --- a/drivers/gpu/drm/xe/xe_tile.c +++ b/drivers/gpu/drm/xe/xe_tile.c @@ -29,6 +29,25 @@ int xe_tile_alloc(struct xe_tile *tile) return -ENOMEM; tile->mem.ggtt->tile = tile; + tile->mem.vram_mgr = drmm_kzalloc(drm, sizeof(*tile->mem.vram_mgr), GFP_KERNEL); + if (!tile->mem.vram_mgr) + return -ENOMEM; + + return 0; +} + +static int tile_ttm_mgr_init(struct xe_tile *tile) +{ + struct xe_device *xe = tile_to_xe(tile); + int err; + + if (tile->mem.vram.size) { + err = xe_ttm_vram_mgr_init(tile, tile->mem.vram_mgr); + if (err) + return err; + xe->info.mem_region_mask |= BIT(tile->id) << 1; + } + return 0; } @@ -48,5 +67,17 @@ int xe_tile_alloc(struct xe_tile *tile) */ int xe_tile_init_noalloc(struct xe_tile *tile) { - return xe_ggtt_init_noalloc(tile->mem.ggtt); + int err; + + xe_device_mem_access_get(tile_to_xe(tile)); + + err = tile_ttm_mgr_init(tile); + if (err) + goto err_mem_access; + + err = xe_ggtt_init_noalloc(tile->mem.ggtt); + +err_mem_access: + xe_device_mem_access_put(tile_to_xe(tile)); + return err; } diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index 49470f0722bd..c68325161c19 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -51,29 +51,31 @@ bool xe_ttm_stolen_cpu_access_needs_ggtt(struct xe_device *xe) return GRAPHICS_VERx100(xe) < 1270 && !IS_DGFX(xe); } -static s64 detect_bar2_dgfx(struct xe_gt *gt, struct xe_ttm_stolen_mgr *mgr) +static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) { - struct pci_dev *pdev = to_pci_dev(gt_to_xe(gt)->drm.dev); + struct xe_tile *tile = xe_device_get_root_tile(xe); + struct xe_gt *mmio = &tile->primary_gt; + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); u64 stolen_size; u64 tile_offset; u64 tile_size; u64 vram_size; - if (xe_mmio_tile_vram_size(gt, &vram_size, &tile_size, &tile_offset)) { - drm_err(>_to_xe(gt)->drm, "Querying total vram size failed\n"); + if (xe_mmio_tile_vram_size(tile, &vram_size, &tile_size, &tile_offset)) { + drm_err(&xe->drm, "Querying total vram size failed\n"); return 0; } /* Use DSM base address instead for stolen memory */ - mgr->stolen_base = (xe_mmio_read64(gt, DSMBASE) & BDSM_MASK) - tile_offset; - if (drm_WARN_ON(>_to_xe(gt)->drm, tile_size < mgr->stolen_base)) + mgr->stolen_base = (xe_mmio_read64(mmio, DSMBASE) & BDSM_MASK) - tile_offset; + if (drm_WARN_ON(&xe->drm, tile_size < mgr->stolen_base)) return 0; stolen_size = tile_size - mgr->stolen_base; /* Verify usage fits in the actual resource available */ if (mgr->stolen_base + stolen_size <= pci_resource_len(pdev, GEN12_LMEM_BAR)) - mgr->io_base = gt->mem.vram.io_start + mgr->stolen_base; + mgr->io_base = tile->mem.vram.io_start + mgr->stolen_base; /* * There may be few KB of platform dependent reserved memory at the end @@ -141,7 +143,7 @@ void xe_ttm_stolen_mgr_init(struct xe_device *xe) int err; if (IS_DGFX(xe)) - stolen_size = detect_bar2_dgfx(to_gt(xe), mgr); + stolen_size = detect_bar2_dgfx(xe, mgr); else if (GRAPHICS_VERx100(xe) >= 1270) stolen_size = detect_bar2_integrated(xe, mgr); else diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c index 73836b9b7fed..1a84abd35fcf 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c @@ -353,16 +353,14 @@ int __xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_ttm_vram_mgr *mgr, return drmm_add_action_or_reset(&xe->drm, ttm_vram_mgr_fini, mgr); } -int xe_ttm_vram_mgr_init(struct xe_gt *gt, struct xe_ttm_vram_mgr *mgr) +int xe_ttm_vram_mgr_init(struct xe_tile *tile, struct xe_ttm_vram_mgr *mgr) { - struct xe_device *xe = gt_to_xe(gt); + struct xe_device *xe = tile_to_xe(tile); - XE_BUG_ON(xe_gt_is_media_type(gt)); + mgr->tile = tile; - mgr->gt = gt; - - return __xe_ttm_vram_mgr_init(xe, mgr, XE_PL_VRAM0 + gt->info.vram_id, - gt->mem.vram.size, gt->mem.vram.io_size, + return __xe_ttm_vram_mgr_init(xe, mgr, XE_PL_VRAM0 + tile->id, + tile->mem.vram.size, tile->mem.vram.io_size, PAGE_SIZE); } @@ -373,7 +371,7 @@ int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe, enum dma_data_direction dir, struct sg_table **sgt) { - struct xe_gt *gt = xe_device_get_gt(xe, res->mem_type - XE_PL_VRAM0); + struct xe_tile *tile = &xe->tiles[res->mem_type - XE_PL_VRAM0]; struct xe_res_cursor cursor; struct scatterlist *sg; int num_entries = 0; @@ -406,7 +404,7 @@ int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe, */ xe_res_first(res, offset, length, &cursor); for_each_sgtable_sg((*sgt), sg, i) { - phys_addr_t phys = cursor.start + gt->mem.vram.io_start; + phys_addr_t phys = cursor.start + tile->mem.vram.io_start; size_t size = cursor.size; dma_addr_t addr; diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h index 35e5367a79fb..6e1d6033d739 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h @@ -10,12 +10,12 @@ enum dma_data_direction; struct xe_device; -struct xe_gt; +struct xe_tile; int __xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_ttm_vram_mgr *mgr, u32 mem_type, u64 size, u64 io_size, u64 default_page_size); -int xe_ttm_vram_mgr_init(struct xe_gt *gt, struct xe_ttm_vram_mgr *mgr); +int xe_ttm_vram_mgr_init(struct xe_tile *tile, struct xe_ttm_vram_mgr *mgr); int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe, struct ttm_resource *res, u64 offset, u64 length, diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h index 3d9417ff7434..48bb991c14a5 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h @@ -9,7 +9,7 @@ #include #include -struct xe_gt; +struct xe_tile; /** * struct xe_ttm_vram_mgr - XE TTM VRAM manager @@ -17,8 +17,8 @@ struct xe_gt; * Manages placement of TTM resource in VRAM. */ struct xe_ttm_vram_mgr { - /** @gt: Graphics tile which the VRAM belongs to */ - struct xe_gt *gt; + /** @tile: Tile which the VRAM belongs to */ + struct xe_tile *tile; /** @manager: Base TTM resource manager */ struct ttm_resource_manager manager; /** @mm: DRM buddy allocator which manages the VRAM */ -- cgit v1.2.3 From 876611c2b75689c6bea43bdbbbef9b358f71526a Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 1 Jun 2023 14:52:25 -0700 Subject: drm/xe: Memory allocations are tile-based, not GT-based Since memory and address spaces are a tile concept rather than a GT concept, we need to plumb tile-based handling through lots of memory-related code. Note that one remaining shortcoming here that will need to be addressed before media GT support can be re-enabled is that although the address space is shared between a tile's GTs, each GT caches the PTEs independently in their own TLB and thus TLB invalidation should be handled at the GT level. v2: - Fix kunit test build. Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230601215244.678611-13-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_bo.c | 2 +- drivers/gpu/drm/xe/tests/xe_migrate.c | 21 ++--- drivers/gpu/drm/xe/xe_bb.c | 3 +- drivers/gpu/drm/xe/xe_bo.c | 67 +++++++-------- drivers/gpu/drm/xe/xe_bo.h | 18 ++-- drivers/gpu/drm/xe/xe_bo_evict.c | 2 +- drivers/gpu/drm/xe/xe_bo_types.h | 4 +- drivers/gpu/drm/xe/xe_device_types.h | 7 ++ drivers/gpu/drm/xe/xe_ggtt.c | 5 +- drivers/gpu/drm/xe/xe_gt.c | 21 +---- drivers/gpu/drm/xe/xe_gt_debugfs.c | 6 +- drivers/gpu/drm/xe/xe_gt_pagefault.c | 14 ++-- drivers/gpu/drm/xe/xe_gt_types.h | 7 -- drivers/gpu/drm/xe/xe_guc_ads.c | 5 +- drivers/gpu/drm/xe/xe_guc_ct.c | 5 +- drivers/gpu/drm/xe/xe_guc_hwconfig.c | 5 +- drivers/gpu/drm/xe/xe_guc_log.c | 6 +- drivers/gpu/drm/xe/xe_guc_pc.c | 5 +- drivers/gpu/drm/xe/xe_hw_engine.c | 5 +- drivers/gpu/drm/xe/xe_lrc.c | 13 ++- drivers/gpu/drm/xe/xe_lrc_types.h | 4 +- drivers/gpu/drm/xe/xe_migrate.c | 23 ++--- drivers/gpu/drm/xe/xe_migrate.h | 5 +- drivers/gpu/drm/xe/xe_pt.c | 144 +++++++++++++++----------------- drivers/gpu/drm/xe/xe_pt.h | 14 ++-- drivers/gpu/drm/xe/xe_sa.c | 13 ++- drivers/gpu/drm/xe/xe_sa.h | 4 +- drivers/gpu/drm/xe/xe_tile.c | 7 ++ drivers/gpu/drm/xe/xe_uc_fw.c | 5 +- drivers/gpu/drm/xe/xe_vm.c | 152 +++++++++++++++++----------------- drivers/gpu/drm/xe/xe_vm.h | 2 +- drivers/gpu/drm/xe/xe_vm_types.h | 12 +-- include/uapi/drm/xe_drm.h | 4 +- 33 files changed, 298 insertions(+), 312 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index 6235a6c73a06..f933e5df6c12 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -173,7 +173,7 @@ static int evict_test_run_gt(struct xe_device *xe, struct xe_gt *gt, struct kuni { struct xe_bo *bo, *external; unsigned int bo_flags = XE_BO_CREATE_USER_BIT | - XE_BO_CREATE_VRAM_IF_DGFX(gt); + XE_BO_CREATE_VRAM_IF_DGFX(gt_to_tile(gt)); struct xe_vm *vm = xe_migrate_get_vm(xe_device_get_root_tile(xe)->primary_gt.migrate); struct ww_acquire_ctx ww; int err, i; diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index 4a3ca2960fd5..85ef9bacfe52 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -63,7 +63,7 @@ static int run_sanity_job(struct xe_migrate *m, struct xe_device *xe, static void sanity_populate_cb(struct xe_migrate_pt_update *pt_update, - struct xe_gt *gt, struct iosys_map *map, void *dst, + struct xe_tile *tile, struct iosys_map *map, void *dst, u32 qword_ofs, u32 num_qwords, const struct xe_vm_pgtable_update *update) { @@ -76,7 +76,7 @@ sanity_populate_cb(struct xe_migrate_pt_update *pt_update, for (i = 0; i < num_qwords; i++) { value = (qword_ofs + i - update->ofs) * 0x1111111111111111ULL; if (map) - xe_map_wr(gt_to_xe(gt), map, (qword_ofs + i) * + xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) * sizeof(u64), u64, value); else ptr[i] = value; @@ -108,7 +108,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, const char *str = big ? "Copying big bo" : "Copying small bo"; int err; - struct xe_bo *sysmem = xe_bo_create_locked(xe, m->gt, NULL, + struct xe_bo *sysmem = xe_bo_create_locked(xe, gt_to_tile(m->gt), NULL, bo->size, ttm_bo_type_kernel, XE_BO_CREATE_SYSTEM_BIT); @@ -240,6 +240,7 @@ static void test_pt_update(struct xe_migrate *m, struct xe_bo *pt, static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) { struct xe_gt *gt = m->gt; + struct xe_tile *tile = gt_to_tile(m->gt); struct xe_device *xe = gt_to_xe(gt); struct xe_bo *pt, *bo = m->pt_bo, *big, *tiny; struct xe_res_cursor src_it; @@ -256,18 +257,18 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) return; } - big = xe_bo_create_pin_map(xe, m->gt, m->eng->vm, SZ_4M, + big = xe_bo_create_pin_map(xe, tile, m->eng->vm, SZ_4M, ttm_bo_type_kernel, - XE_BO_CREATE_VRAM_IF_DGFX(m->gt) | + XE_BO_CREATE_VRAM_IF_DGFX(tile) | XE_BO_CREATE_PINNED_BIT); if (IS_ERR(big)) { KUNIT_FAIL(test, "Failed to allocate bo: %li\n", PTR_ERR(big)); goto vunmap; } - pt = xe_bo_create_pin_map(xe, m->gt, m->eng->vm, XE_PAGE_SIZE, + pt = xe_bo_create_pin_map(xe, tile, m->eng->vm, XE_PAGE_SIZE, ttm_bo_type_kernel, - XE_BO_CREATE_VRAM_IF_DGFX(m->gt) | + XE_BO_CREATE_VRAM_IF_DGFX(tile) | XE_BO_CREATE_PINNED_BIT); if (IS_ERR(pt)) { KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n", @@ -275,10 +276,10 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) goto free_big; } - tiny = xe_bo_create_pin_map(xe, m->gt, m->eng->vm, + tiny = xe_bo_create_pin_map(xe, tile, m->eng->vm, 2 * SZ_4K, ttm_bo_type_kernel, - XE_BO_CREATE_VRAM_IF_DGFX(m->gt) | + XE_BO_CREATE_VRAM_IF_DGFX(tile) | XE_BO_CREATE_PINNED_BIT); if (IS_ERR(tiny)) { KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n", @@ -286,7 +287,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) goto free_pt; } - bb = xe_bb_new(m->gt, 32, xe->info.supports_usm); + bb = xe_bb_new(gt, 32, xe->info.supports_usm); if (IS_ERR(bb)) { KUNIT_FAIL(test, "Failed to create batchbuffer: %li\n", PTR_ERR(bb)); diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c index bf7c94b769d7..f9b6b7adf99f 100644 --- a/drivers/gpu/drm/xe/xe_bb.c +++ b/drivers/gpu/drm/xe/xe_bb.c @@ -30,6 +30,7 @@ static int bb_prefetch(struct xe_gt *gt) struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm) { + struct xe_tile *tile = gt_to_tile(gt); struct xe_bb *bb = kmalloc(sizeof(*bb), GFP_KERNEL); int err; @@ -42,7 +43,7 @@ struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm) * space to accomodate the platform-specific hardware prefetch * requirements. */ - bb->bo = xe_sa_bo_new(!usm ? gt->kernel_bb_pool : gt->usm.bb_pool, + bb->bo = xe_sa_bo_new(!usm ? tile->mem.kernel_bb_pool : gt->usm.bb_pool, 4 * (dwords + 1) + bb_prefetch(gt)); if (IS_ERR(bb->bo)) { err = PTR_ERR(bb->bo); diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 8ee6bad59a75..7c59487af86a 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -458,7 +458,7 @@ static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo, } xe_vm_assert_held(vm); - if (list_empty(&vma->rebind_link) && vma->gt_present) + if (list_empty(&vma->rebind_link) && vma->tile_present) list_add_tail(&vma->rebind_link, &vm->rebind_list); if (vm_resv_locked) @@ -565,7 +565,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, struct xe_bo *bo = ttm_to_xe_bo(ttm_bo); struct ttm_resource *old_mem = ttm_bo->resource; struct ttm_tt *ttm = ttm_bo->ttm; - struct xe_gt *gt = NULL; + struct xe_tile *tile = NULL; struct dma_fence *fence; bool move_lacks_source; bool needs_clear; @@ -635,15 +635,15 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, goto out; } - if (bo->gt) - gt = bo->gt; + if (bo->tile) + tile = bo->tile; else if (resource_is_vram(new_mem)) - gt = &mem_type_to_tile(xe, new_mem->mem_type)->primary_gt; + tile = mem_type_to_tile(xe, new_mem->mem_type); else if (resource_is_vram(old_mem)) - gt = &mem_type_to_tile(xe, old_mem->mem_type)->primary_gt; + tile = mem_type_to_tile(xe, old_mem->mem_type); - XE_BUG_ON(!gt); - XE_BUG_ON(!gt->migrate); + XE_BUG_ON(!tile); + XE_BUG_ON(!tile->primary_gt.migrate); trace_xe_bo_move(bo); xe_device_mem_access_get(xe); @@ -664,7 +664,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, /* Create a new VMAP once kernel BO back in VRAM */ if (!ret && resource_is_vram(new_mem)) { - void *new_addr = gt_to_tile(gt)->mem.vram.mapping + + void *new_addr = tile->mem.vram.mapping + (new_mem->start << PAGE_SHIFT); if (XE_WARN_ON(new_mem->start == XE_BO_INVALID_OFFSET)) { @@ -681,9 +681,10 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, } } else { if (move_lacks_source) - fence = xe_migrate_clear(gt->migrate, bo, new_mem); + fence = xe_migrate_clear(tile->primary_gt.migrate, bo, new_mem); else - fence = xe_migrate_copy(gt->migrate, bo, bo, old_mem, new_mem); + fence = xe_migrate_copy(tile->primary_gt.migrate, + bo, bo, old_mem, new_mem); if (IS_ERR(fence)) { ret = PTR_ERR(fence); xe_device_mem_access_put(xe); @@ -964,7 +965,7 @@ static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo) WARN_ON(!list_empty(&bo->vmas)); if (bo->ggtt_node.size) - xe_ggtt_remove_bo(gt_to_tile(bo->gt)->mem.ggtt, bo); + xe_ggtt_remove_bo(bo->tile->mem.ggtt, bo); if (bo->vm && xe_bo_is_user(bo)) xe_vm_put(bo->vm); @@ -1086,7 +1087,7 @@ void xe_bo_free(struct xe_bo *bo) } struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, - struct xe_gt *gt, struct dma_resv *resv, + struct xe_tile *tile, struct dma_resv *resv, size_t size, enum ttm_bo_type type, u32 flags) { @@ -1099,7 +1100,7 @@ struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, int err; /* Only kernel objects should set GT */ - XE_BUG_ON(gt && type != ttm_bo_type_kernel); + XE_BUG_ON(tile && type != ttm_bo_type_kernel); if (XE_WARN_ON(!size)) return ERR_PTR(-EINVAL); @@ -1120,7 +1121,7 @@ struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, alignment = SZ_4K >> PAGE_SHIFT; } - bo->gt = gt; + bo->tile = tile; bo->size = size; bo->flags = flags; bo->ttm.base.funcs = &xe_gem_object_funcs; @@ -1202,7 +1203,7 @@ static int __xe_bo_fixed_placement(struct xe_device *xe, struct xe_bo * xe_bo_create_locked_range(struct xe_device *xe, - struct xe_gt *gt, struct xe_vm *vm, + struct xe_tile *tile, struct xe_vm *vm, size_t size, u64 start, u64 end, enum ttm_bo_type type, u32 flags) { @@ -1225,7 +1226,7 @@ xe_bo_create_locked_range(struct xe_device *xe, } } - bo = __xe_bo_create_locked(xe, bo, gt, vm ? &vm->resv : NULL, size, + bo = __xe_bo_create_locked(xe, bo, tile, vm ? &vm->resv : NULL, size, type, flags); if (IS_ERR(bo)) return bo; @@ -1235,16 +1236,16 @@ xe_bo_create_locked_range(struct xe_device *xe, bo->vm = vm; if (bo->flags & XE_BO_CREATE_GGTT_BIT) { - if (!gt && flags & XE_BO_CREATE_STOLEN_BIT) - gt = xe_device_get_gt(xe, 0); + if (!tile && flags & XE_BO_CREATE_STOLEN_BIT) + tile = xe_device_get_root_tile(xe); - XE_BUG_ON(!gt); + XE_BUG_ON(!tile); if (flags & XE_BO_CREATE_STOLEN_BIT && flags & XE_BO_FIXED_PLACEMENT_BIT) { - err = xe_ggtt_insert_bo_at(gt_to_tile(gt)->mem.ggtt, bo, start); + err = xe_ggtt_insert_bo_at(tile->mem.ggtt, bo, start); } else { - err = xe_ggtt_insert_bo(gt_to_tile(gt)->mem.ggtt, bo); + err = xe_ggtt_insert_bo(tile->mem.ggtt, bo); } if (err) goto err_unlock_put_bo; @@ -1258,18 +1259,18 @@ err_unlock_put_bo: return ERR_PTR(err); } -struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_gt *gt, +struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, enum ttm_bo_type type, u32 flags) { - return xe_bo_create_locked_range(xe, gt, vm, size, 0, ~0ULL, type, flags); + return xe_bo_create_locked_range(xe, tile, vm, size, 0, ~0ULL, type, flags); } -struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_gt *gt, +struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, enum ttm_bo_type type, u32 flags) { - struct xe_bo *bo = xe_bo_create_locked(xe, gt, vm, size, type, flags); + struct xe_bo *bo = xe_bo_create_locked(xe, tile, vm, size, type, flags); if (!IS_ERR(bo)) xe_bo_unlock_vm_held(bo); @@ -1277,7 +1278,7 @@ struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_gt *gt, return bo; } -struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_gt *gt, +struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, u64 offset, enum ttm_bo_type type, u32 flags) @@ -1291,7 +1292,7 @@ struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_gt *gt, xe_ttm_stolen_cpu_access_needs_ggtt(xe)) flags |= XE_BO_CREATE_GGTT_BIT; - bo = xe_bo_create_locked_range(xe, gt, vm, size, start, end, type, flags); + bo = xe_bo_create_locked_range(xe, tile, vm, size, start, end, type, flags); if (IS_ERR(bo)) return bo; @@ -1315,18 +1316,18 @@ err_put: return ERR_PTR(err); } -struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_gt *gt, +struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, enum ttm_bo_type type, u32 flags) { - return xe_bo_create_pin_map_at(xe, gt, vm, size, ~0ull, type, flags); + return xe_bo_create_pin_map_at(xe, tile, vm, size, ~0ull, type, flags); } -struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_gt *gt, +struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, const void *data, size_t size, enum ttm_bo_type type, u32 flags) { - struct xe_bo *bo = xe_bo_create_pin_map(xe, gt, NULL, + struct xe_bo *bo = xe_bo_create_pin_map(xe, tile, NULL, ALIGN(size, PAGE_SIZE), type, flags); if (IS_ERR(bo)) @@ -1957,7 +1958,7 @@ int xe_bo_dumb_create(struct drm_file *file_priv, page_size); bo = xe_bo_create(xe, NULL, NULL, args->size, ttm_bo_type_device, - XE_BO_CREATE_VRAM_IF_DGFX(to_gt(xe)) | + XE_BO_CREATE_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) | XE_BO_CREATE_USER_BIT | XE_BO_SCANOUT_BIT); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 6e29e45a90f2..29eb7474f018 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -21,8 +21,8 @@ XE_BO_CREATE_VRAM1_BIT) /* -- */ #define XE_BO_CREATE_STOLEN_BIT BIT(4) -#define XE_BO_CREATE_VRAM_IF_DGFX(gt) \ - (IS_DGFX(gt_to_xe(gt)) ? XE_BO_CREATE_VRAM0_BIT << gt_to_tile(gt)->id : \ +#define XE_BO_CREATE_VRAM_IF_DGFX(tile) \ + (IS_DGFX(tile_to_xe(tile)) ? XE_BO_CREATE_VRAM0_BIT << (tile)->id : \ XE_BO_CREATE_SYSTEM_BIT) #define XE_BO_CREATE_GGTT_BIT BIT(5) #define XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT BIT(6) @@ -81,27 +81,27 @@ struct xe_bo *xe_bo_alloc(void); void xe_bo_free(struct xe_bo *bo); struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, - struct xe_gt *gt, struct dma_resv *resv, + struct xe_tile *tile, struct dma_resv *resv, size_t size, enum ttm_bo_type type, u32 flags); struct xe_bo * xe_bo_create_locked_range(struct xe_device *xe, - struct xe_gt *gt, struct xe_vm *vm, + struct xe_tile *tile, struct xe_vm *vm, size_t size, u64 start, u64 end, enum ttm_bo_type type, u32 flags); -struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_gt *gt, +struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, enum ttm_bo_type type, u32 flags); -struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_gt *gt, +struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, enum ttm_bo_type type, u32 flags); -struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_gt *gt, +struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, enum ttm_bo_type type, u32 flags); -struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_gt *gt, +struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, u64 offset, enum ttm_bo_type type, u32 flags); -struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_gt *gt, +struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, const void *data, size_t size, enum ttm_bo_type type, u32 flags); diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c index a72963c54bf3..9226195bd560 100644 --- a/drivers/gpu/drm/xe/xe_bo_evict.c +++ b/drivers/gpu/drm/xe/xe_bo_evict.c @@ -149,7 +149,7 @@ int xe_bo_restore_kernel(struct xe_device *xe) } if (bo->flags & XE_BO_CREATE_GGTT_BIT) { - struct xe_tile *tile = gt_to_tile(bo->gt); + struct xe_tile *tile = bo->tile; mutex_lock(&tile->mem.ggtt->lock); xe_ggtt_map_bo(tile->mem.ggtt, bo); diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index 06de3330211d..f6ee920303af 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -29,8 +29,8 @@ struct xe_bo { u32 flags; /** @vm: VM this BO is attached to, for extobj this will be NULL */ struct xe_vm *vm; - /** @gt: GT this BO is attached to (kernel BO only) */ - struct xe_gt *gt; + /** @tile: Tile this BO is attached to (kernel BO only) */ + struct xe_tile *tile; /** @vmas: List of VMAs for this BO */ struct list_head vmas; /** @placements: valid placements for this BO */ diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 9382d7f62f03..ee050b4b4d77 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -128,6 +128,13 @@ struct xe_tile { /** @ggtt: Global graphics translation table */ struct xe_ggtt *ggtt; + + /** + * @kernel_bb_pool: Pool from which batchbuffers are allocated. + * + * Media GT shares a pool with its primary GT. + */ + struct xe_sa_manager *kernel_bb_pool; } mem; }; diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index ff70a01f1591..d395d6fc1af6 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -151,7 +151,6 @@ static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt) int xe_ggtt_init(struct xe_ggtt *ggtt) { struct xe_device *xe = tile_to_xe(ggtt->tile); - struct xe_gt *gt = &ggtt->tile->primary_gt; unsigned int flags; int err; @@ -164,9 +163,9 @@ int xe_ggtt_init(struct xe_ggtt *ggtt) if (ggtt->flags & XE_GGTT_FLAGS_64K) flags |= XE_BO_CREATE_SYSTEM_BIT; else - flags |= XE_BO_CREATE_VRAM_IF_DGFX(gt); + flags |= XE_BO_CREATE_VRAM_IF_DGFX(ggtt->tile); - ggtt->scratch = xe_bo_create_pin_map(xe, gt, NULL, XE_PAGE_SIZE, + ggtt->scratch = xe_bo_create_pin_map(xe, ggtt->tile, NULL, XE_PAGE_SIZE, ttm_bo_type_kernel, flags); diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 419fc471053c..74023a5dc8b2 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -95,7 +95,7 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_engine *e) if (IS_ERR(bb)) return PTR_ERR(bb); - batch_ofs = xe_bo_ggtt_addr(gt->kernel_bb_pool->bo); + batch_ofs = xe_bo_ggtt_addr(gt_to_tile(gt)->mem.kernel_bb_pool->bo); job = xe_bb_create_wa_job(e, bb, batch_ofs); if (IS_ERR(job)) { xe_bb_free(bb, NULL); @@ -144,7 +144,7 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_engine *e) } } - batch_ofs = xe_bo_ggtt_addr(gt->kernel_bb_pool->bo); + batch_ofs = xe_bo_ggtt_addr(gt_to_tile(gt)->mem.kernel_bb_pool->bo); job = xe_bb_create_wa_job(e, bb, batch_ofs); if (IS_ERR(job)) { xe_bb_free(bb, NULL); @@ -370,31 +370,16 @@ static int all_fw_domain_init(struct xe_gt *gt) goto err_force_wake; if (!xe_gt_is_media_type(gt)) { - gt->kernel_bb_pool = xe_sa_bo_manager_init(gt, SZ_1M, 16); - if (IS_ERR(gt->kernel_bb_pool)) { - err = PTR_ERR(gt->kernel_bb_pool); - goto err_force_wake; - } - /* * USM has its only SA pool to non-block behind user operations */ if (gt_to_xe(gt)->info.supports_usm) { - gt->usm.bb_pool = xe_sa_bo_manager_init(gt, SZ_1M, 16); + gt->usm.bb_pool = xe_sa_bo_manager_init(gt_to_tile(gt), SZ_1M, 16); if (IS_ERR(gt->usm.bb_pool)) { err = PTR_ERR(gt->usm.bb_pool); goto err_force_wake; } } - } else { - struct xe_gt *full_gt = xe_find_full_gt(gt); - - /* - * Media GT's kernel_bb_pool is only used while recording the - * default context during GT init. The USM pool should never - * be needed on the media GT. - */ - gt->kernel_bb_pool = full_gt->kernel_bb_pool; } if (!xe_gt_is_media_type(gt)) { diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index 1114254bc519..b5a5538ae630 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -64,11 +64,11 @@ static int force_reset(struct seq_file *m, void *data) static int sa_info(struct seq_file *m, void *data) { - struct xe_gt *gt = node_to_gt(m->private); + struct xe_tile *tile = gt_to_tile(node_to_gt(m->private)); struct drm_printer p = drm_seq_file_printer(m); - drm_suballoc_dump_debug_info(>->kernel_bb_pool->base, &p, - gt->kernel_bb_pool->gpu_addr); + drm_suballoc_dump_debug_info(&tile->mem.kernel_bb_pool->base, &p, + tile->mem.kernel_bb_pool->gpu_addr); return 0; } diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index f4f3d95ae6b1..1ec140aaf2a7 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -69,10 +69,10 @@ static bool access_is_atomic(enum access_type access_type) return access_type == ACCESS_TYPE_ATOMIC; } -static bool vma_is_valid(struct xe_gt *gt, struct xe_vma *vma) +static bool vma_is_valid(struct xe_tile *tile, struct xe_vma *vma) { - return BIT(gt->info.id) & vma->gt_present && - !(BIT(gt->info.id) & vma->usm.gt_invalidated); + return BIT(tile->id) & vma->tile_present && + !(BIT(tile->id) & vma->usm.tile_invalidated); } static bool vma_matches(struct xe_vma *vma, struct xe_vma *lookup) @@ -152,7 +152,7 @@ retry_userptr: atomic = access_is_atomic(pf->access_type); /* Check if VMA is valid */ - if (vma_is_valid(gt, vma) && !atomic) + if (vma_is_valid(tile, vma) && !atomic) goto unlock_vm; /* TODO: Validate fault */ @@ -208,8 +208,8 @@ retry_userptr: /* Bind VMA only to the GT that has faulted */ trace_xe_vma_pf_bind(vma); - fence = __xe_pt_bind_vma(gt, vma, xe_gt_migrate_engine(gt), NULL, 0, - vma->gt_present & BIT(gt->info.id)); + fence = __xe_pt_bind_vma(tile, vma, xe_gt_migrate_engine(gt), NULL, 0, + vma->tile_present & BIT(tile->id)); if (IS_ERR(fence)) { ret = PTR_ERR(fence); goto unlock_dma_resv; @@ -225,7 +225,7 @@ retry_userptr: if (xe_vma_is_userptr(vma)) ret = xe_vma_userptr_check_repin(vma); - vma->usm.gt_invalidated &= ~BIT(gt->info.id); + vma->usm.tile_invalidated &= ~BIT(tile->id); unlock_dma_resv: if (only_needs_bo_lock(bo)) diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index a040ec896e70..c44560b6dc71 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -278,13 +278,6 @@ struct xe_gt { /** @hw_engines: hardware engines on the GT */ struct xe_hw_engine hw_engines[XE_NUM_HW_ENGINES]; - /** - * @kernel_bb_pool: Pool from which batchbuffers are allocated. - * - * Media GT shares a pool with its primary GT. - */ - struct xe_sa_manager *kernel_bb_pool; - /** @migrate: Migration helper for vram blits and clearing */ struct xe_migrate *migrate; diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 6d550d746909..dd69d097b920 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -273,16 +273,17 @@ int xe_guc_ads_init(struct xe_guc_ads *ads) { struct xe_device *xe = ads_to_xe(ads); struct xe_gt *gt = ads_to_gt(ads); + struct xe_tile *tile = gt_to_tile(gt); struct xe_bo *bo; int err; ads->golden_lrc_size = calculate_golden_lrc_size(ads); ads->regset_size = calculate_regset_size(gt); - bo = xe_bo_create_pin_map(xe, gt, NULL, guc_ads_size(ads) + + bo = xe_bo_create_pin_map(xe, tile, NULL, guc_ads_size(ads) + MAX_GOLDEN_LRC_SIZE, ttm_bo_type_kernel, - XE_BO_CREATE_VRAM_IF_DGFX(gt) | + XE_BO_CREATE_VRAM_IF_DGFX(tile) | XE_BO_CREATE_GGTT_BIT); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 9dc906f2651a..137c184df487 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -130,6 +130,7 @@ int xe_guc_ct_init(struct xe_guc_ct *ct) { struct xe_device *xe = ct_to_xe(ct); struct xe_gt *gt = ct_to_gt(ct); + struct xe_tile *tile = gt_to_tile(gt); struct xe_bo *bo; int err; @@ -145,9 +146,9 @@ int xe_guc_ct_init(struct xe_guc_ct *ct) primelockdep(ct); - bo = xe_bo_create_pin_map(xe, gt, NULL, guc_ct_size(), + bo = xe_bo_create_pin_map(xe, tile, NULL, guc_ct_size(), ttm_bo_type_kernel, - XE_BO_CREATE_VRAM_IF_DGFX(gt) | + XE_BO_CREATE_VRAM_IF_DGFX(tile) | XE_BO_CREATE_GGTT_BIT); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.c b/drivers/gpu/drm/xe/xe_guc_hwconfig.c index a6982f323ed1..c8f875e970ab 100644 --- a/drivers/gpu/drm/xe/xe_guc_hwconfig.c +++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.c @@ -70,6 +70,7 @@ int xe_guc_hwconfig_init(struct xe_guc *guc) { struct xe_device *xe = guc_to_xe(guc); struct xe_gt *gt = guc_to_gt(guc); + struct xe_tile *tile = gt_to_tile(gt); struct xe_bo *bo; u32 size; int err; @@ -94,9 +95,9 @@ int xe_guc_hwconfig_init(struct xe_guc *guc) if (!size) return -EINVAL; - bo = xe_bo_create_pin_map(xe, gt, NULL, PAGE_ALIGN(size), + bo = xe_bo_create_pin_map(xe, tile, NULL, PAGE_ALIGN(size), ttm_bo_type_kernel, - XE_BO_CREATE_VRAM_IF_DGFX(gt) | + XE_BO_CREATE_VRAM_IF_DGFX(tile) | XE_BO_CREATE_GGTT_BIT); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c index 9a7b5d5906c1..403aaafcaba6 100644 --- a/drivers/gpu/drm/xe/xe_guc_log.c +++ b/drivers/gpu/drm/xe/xe_guc_log.c @@ -87,13 +87,13 @@ static void guc_log_fini(struct drm_device *drm, void *arg) int xe_guc_log_init(struct xe_guc_log *log) { struct xe_device *xe = log_to_xe(log); - struct xe_gt *gt = log_to_gt(log); + struct xe_tile *tile = gt_to_tile(log_to_gt(log)); struct xe_bo *bo; int err; - bo = xe_bo_create_pin_map(xe, gt, NULL, guc_log_size(), + bo = xe_bo_create_pin_map(xe, tile, NULL, guc_log_size(), ttm_bo_type_kernel, - XE_BO_CREATE_VRAM_IF_DGFX(gt) | + XE_BO_CREATE_VRAM_IF_DGFX(tile) | XE_BO_CREATE_GGTT_BIT); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index e799faa1c6b8..67faa9ee0006 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -888,6 +888,7 @@ static void pc_fini(struct drm_device *drm, void *arg) int xe_guc_pc_init(struct xe_guc_pc *pc) { struct xe_gt *gt = pc_to_gt(pc); + struct xe_tile *tile = gt_to_tile(gt); struct xe_device *xe = gt_to_xe(gt); struct xe_bo *bo; u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data)); @@ -895,9 +896,9 @@ int xe_guc_pc_init(struct xe_guc_pc *pc) mutex_init(&pc->freq_lock); - bo = xe_bo_create_pin_map(xe, gt, NULL, size, + bo = xe_bo_create_pin_map(xe, tile, NULL, size, ttm_bo_type_kernel, - XE_BO_CREATE_VRAM_IF_DGFX(gt) | + XE_BO_CREATE_VRAM_IF_DGFX(tile) | XE_BO_CREATE_GGTT_BIT); if (IS_ERR(bo)) diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 7e4b0b465244..b12f65a2bab3 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -373,6 +373,7 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe, enum xe_hw_engine_id id) { struct xe_device *xe = gt_to_xe(gt); + struct xe_tile *tile = gt_to_tile(gt); int err; XE_BUG_ON(id >= ARRAY_SIZE(engine_infos) || !engine_infos[id].name); @@ -381,8 +382,8 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe, xe_reg_sr_apply_mmio(&hwe->reg_sr, gt); xe_reg_sr_apply_whitelist(&hwe->reg_whitelist, hwe->mmio_base, gt); - hwe->hwsp = xe_bo_create_pin_map(xe, gt, NULL, SZ_4K, ttm_bo_type_kernel, - XE_BO_CREATE_VRAM_IF_DGFX(gt) | + hwe->hwsp = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K, ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(tile) | XE_BO_CREATE_GGTT_BIT); if (IS_ERR(hwe->hwsp)) { err = PTR_ERR(hwe->hwsp); diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index ae605e7805de..8f25a38f36a5 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -592,7 +592,7 @@ static void *empty_lrc_data(struct xe_hw_engine *hwe) static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm) { - u64 desc = xe_vm_pdp4_descriptor(vm, lrc->full_gt); + u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile); xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc)); xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc)); @@ -607,6 +607,7 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, struct xe_engine *e, struct xe_vm *vm, u32 ring_size) { struct xe_gt *gt = hwe->gt; + struct xe_tile *tile = gt_to_tile(gt); struct xe_device *xe = gt_to_xe(gt); struct iosys_map map; void *init_data = NULL; @@ -619,19 +620,15 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address * via VM bind calls. */ - lrc->bo = xe_bo_create_pin_map(xe, hwe->gt, vm, + lrc->bo = xe_bo_create_pin_map(xe, tile, vm, ring_size + xe_lrc_size(xe, hwe->class), ttm_bo_type_kernel, - XE_BO_CREATE_VRAM_IF_DGFX(hwe->gt) | + XE_BO_CREATE_VRAM_IF_DGFX(tile) | XE_BO_CREATE_GGTT_BIT); if (IS_ERR(lrc->bo)) return PTR_ERR(lrc->bo); - if (xe_gt_is_media_type(hwe->gt)) - lrc->full_gt = xe_find_full_gt(hwe->gt); - else - lrc->full_gt = hwe->gt; - + lrc->tile = gt_to_tile(hwe->gt); lrc->ring.size = ring_size; lrc->ring.tail = 0; diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h index 8fe08535873d..78220336062c 100644 --- a/drivers/gpu/drm/xe/xe_lrc_types.h +++ b/drivers/gpu/drm/xe/xe_lrc_types.h @@ -20,8 +20,8 @@ struct xe_lrc { */ struct xe_bo *bo; - /** @full_gt: full GT which this LRC belongs to */ - struct xe_gt *full_gt; + /** @tile: tile which this LRC belongs to */ + struct xe_tile *tile; /** @flags: LRC flags */ u32 flags; diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 7a2188f02a86..3031a45db490 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -129,6 +129,7 @@ static u64 xe_migrate_vram_ofs(u64 addr) static int xe_migrate_create_cleared_bo(struct xe_migrate *m, struct xe_vm *vm) { struct xe_gt *gt = m->gt; + struct xe_tile *tile = gt_to_tile(gt); struct xe_device *xe = vm->xe; size_t cleared_size; u64 vram_addr; @@ -139,9 +140,9 @@ static int xe_migrate_create_cleared_bo(struct xe_migrate *m, struct xe_vm *vm) cleared_size = xe_device_ccs_bytes(xe, MAX_PREEMPTDISABLE_TRANSFER); cleared_size = PAGE_ALIGN(cleared_size); - m->cleared_bo = xe_bo_create_pin_map(xe, gt, vm, cleared_size, + m->cleared_bo = xe_bo_create_pin_map(xe, tile, vm, cleared_size, ttm_bo_type_kernel, - XE_BO_CREATE_VRAM_IF_DGFX(gt) | + XE_BO_CREATE_VRAM_IF_DGFX(tile) | XE_BO_CREATE_PINNED_BIT); if (IS_ERR(m->cleared_bo)) return PTR_ERR(m->cleared_bo); @@ -161,7 +162,8 @@ static int xe_migrate_prepare_vm(struct xe_gt *gt, struct xe_migrate *m, u32 num_entries = NUM_PT_SLOTS, num_level = vm->pt_root[id]->level; u32 map_ofs, level, i; struct xe_device *xe = gt_to_xe(m->gt); - struct xe_bo *bo, *batch = gt->kernel_bb_pool->bo; + struct xe_tile *tile = gt_to_tile(m->gt); + struct xe_bo *bo, *batch = tile->mem.kernel_bb_pool->bo; u64 entry; int ret; @@ -175,10 +177,10 @@ static int xe_migrate_prepare_vm(struct xe_gt *gt, struct xe_migrate *m, /* Need to be sure everything fits in the first PT, or create more */ XE_BUG_ON(m->batch_base_ofs + batch->size >= SZ_2M); - bo = xe_bo_create_pin_map(vm->xe, m->gt, vm, + bo = xe_bo_create_pin_map(vm->xe, tile, vm, num_entries * XE_PAGE_SIZE, ttm_bo_type_kernel, - XE_BO_CREATE_VRAM_IF_DGFX(m->gt) | + XE_BO_CREATE_VRAM_IF_DGFX(tile) | XE_BO_CREATE_PINNED_BIT); if (IS_ERR(bo)) return PTR_ERR(bo); @@ -984,7 +986,7 @@ err_sync: return fence; } -static void write_pgtable(struct xe_gt *gt, struct xe_bb *bb, u64 ppgtt_ofs, +static void write_pgtable(struct xe_tile *tile, struct xe_bb *bb, u64 ppgtt_ofs, const struct xe_vm_pgtable_update *update, struct xe_migrate_pt_update *pt_update) { @@ -1023,7 +1025,7 @@ static void write_pgtable(struct xe_gt *gt, struct xe_bb *bb, u64 ppgtt_ofs, (chunk * 2 + 1); bb->cs[bb->len++] = lower_32_bits(addr); bb->cs[bb->len++] = upper_32_bits(addr); - ops->populate(pt_update, gt, NULL, bb->cs + bb->len, ofs, chunk, + ops->populate(pt_update, tile, NULL, bb->cs + bb->len, ofs, chunk, update); bb->len += chunk * 2; @@ -1081,7 +1083,7 @@ xe_migrate_update_pgtables_cpu(struct xe_migrate *m, for (i = 0; i < num_updates; i++) { const struct xe_vm_pgtable_update *update = &updates[i]; - ops->populate(pt_update, m->gt, &update->pt_bo->vmap, NULL, + ops->populate(pt_update, gt_to_tile(m->gt), &update->pt_bo->vmap, NULL, update->ofs, update->qwords, update); } @@ -1149,6 +1151,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m, { const struct xe_migrate_pt_update_ops *ops = pt_update->ops; struct xe_gt *gt = m->gt; + struct xe_tile *tile = gt_to_tile(m->gt); struct xe_device *xe = gt_to_xe(gt); struct xe_sched_job *job; struct dma_fence *fence; @@ -1243,7 +1246,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m, addr = xe_migrate_vm_addr(ppgtt_ofs, 0) + (page_ofs / sizeof(u64)) * XE_PAGE_SIZE; for (i = 0; i < num_updates; i++) - write_pgtable(m->gt, bb, addr + i * XE_PAGE_SIZE, + write_pgtable(tile, bb, addr + i * XE_PAGE_SIZE, &updates[i], pt_update); } else { /* phys pages, no preamble required */ @@ -1253,7 +1256,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m, /* Preemption is enabled again by the ring ops. */ emit_arb_clear(bb); for (i = 0; i < num_updates; i++) - write_pgtable(m->gt, bb, 0, &updates[i], pt_update); + write_pgtable(tile, bb, 0, &updates[i], pt_update); } if (!eng) diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h index c283b626c21c..e627f4023d5a 100644 --- a/drivers/gpu/drm/xe/xe_migrate.h +++ b/drivers/gpu/drm/xe/xe_migrate.h @@ -19,6 +19,7 @@ struct xe_migrate; struct xe_migrate_pt_update; struct xe_sync_entry; struct xe_pt; +struct xe_tile; struct xe_vm; struct xe_vm_pgtable_update; struct xe_vma; @@ -31,7 +32,7 @@ struct xe_migrate_pt_update_ops { /** * @populate: Populate a command buffer or page-table with ptes. * @pt_update: Embeddable callback argument. - * @gt: The gt for the current operation. + * @tile: The tile for the current operation. * @map: struct iosys_map into the memory to be populated. * @pos: If @map is NULL, map into the memory to be populated. * @ofs: qword offset into @map, unused if @map is NULL. @@ -43,7 +44,7 @@ struct xe_migrate_pt_update_ops { * page-tables with PTEs. */ void (*populate)(struct xe_migrate_pt_update *pt_update, - struct xe_gt *gt, struct iosys_map *map, + struct xe_tile *tile, struct iosys_map *map, void *pos, u32 ofs, u32 num_qwords, const struct xe_vm_pgtable_update *update); diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index e2cd1946af5a..094058cb5f93 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -165,12 +165,10 @@ u64 gen8_pte_encode(struct xe_vma *vma, struct xe_bo *bo, return __gen8_pte_encode(pte, cache, flags, pt_level); } -static u64 __xe_pt_empty_pte(struct xe_gt *gt, struct xe_vm *vm, +static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm, unsigned int level) { - u8 id = gt->info.id; - - XE_BUG_ON(xe_gt_is_media_type(gt)); + u8 id = tile->id; if (!vm->scratch_bo[id]) return 0; @@ -189,7 +187,7 @@ static u64 __xe_pt_empty_pte(struct xe_gt *gt, struct xe_vm *vm, /** * xe_pt_create() - Create a page-table. * @vm: The vm to create for. - * @gt: The gt to create for. + * @tile: The tile to create for. * @level: The page-table level. * * Allocate and initialize a single struct xe_pt metadata structure. Also @@ -201,7 +199,7 @@ static u64 __xe_pt_empty_pte(struct xe_gt *gt, struct xe_vm *vm, * Return: A valid struct xe_pt pointer on success, Pointer error code on * error. */ -struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_gt *gt, +struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, unsigned int level) { struct xe_pt *pt; @@ -215,9 +213,9 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_gt *gt, if (!pt) return ERR_PTR(-ENOMEM); - bo = xe_bo_create_pin_map(vm->xe, gt, vm, SZ_4K, + bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K, ttm_bo_type_kernel, - XE_BO_CREATE_VRAM_IF_DGFX(gt) | + XE_BO_CREATE_VRAM_IF_DGFX(tile) | XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT | XE_BO_CREATE_PINNED_BIT | XE_BO_CREATE_NO_RESV_EVICT); @@ -241,30 +239,28 @@ err_kfree: /** * xe_pt_populate_empty() - Populate a page-table bo with scratch- or zero * entries. - * @gt: The gt the scratch pagetable of which to use. + * @tile: The tile the scratch pagetable of which to use. * @vm: The vm we populate for. * @pt: The pagetable the bo of which to initialize. * - * Populate the page-table bo of @pt with entries pointing into the gt's + * Populate the page-table bo of @pt with entries pointing into the tile's * scratch page-table tree if any. Otherwise populate with zeros. */ -void xe_pt_populate_empty(struct xe_gt *gt, struct xe_vm *vm, +void xe_pt_populate_empty(struct xe_tile *tile, struct xe_vm *vm, struct xe_pt *pt) { struct iosys_map *map = &pt->bo->vmap; u64 empty; int i; - XE_BUG_ON(xe_gt_is_media_type(gt)); - - if (!vm->scratch_bo[gt->info.id]) { + if (!vm->scratch_bo[tile->id]) { /* * FIXME: Some memory is allocated already allocated to zero? * Find out which memory that is and avoid this memset... */ xe_map_memset(vm->xe, map, 0, 0, SZ_4K); } else { - empty = __xe_pt_empty_pte(gt, vm, pt->level); + empty = __xe_pt_empty_pte(tile, vm, pt->level); for (i = 0; i < XE_PDES; i++) xe_pt_write(vm->xe, map, i, empty); } @@ -318,9 +314,9 @@ void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred) /** * xe_pt_create_scratch() - Setup a scratch memory pagetable tree for the - * given gt and vm. + * given tile and vm. * @xe: xe device. - * @gt: gt to set up for. + * @tile: tile to set up for. * @vm: vm to set up for. * * Sets up a pagetable tree with one page-table per level and a single @@ -329,10 +325,10 @@ void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred) * * Return: 0 on success, negative error code on error. */ -int xe_pt_create_scratch(struct xe_device *xe, struct xe_gt *gt, +int xe_pt_create_scratch(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm) { - u8 id = gt->info.id; + u8 id = tile->id; unsigned int flags; int i; @@ -345,9 +341,9 @@ int xe_pt_create_scratch(struct xe_device *xe, struct xe_gt *gt, if (vm->flags & XE_VM_FLAGS_64K) flags |= XE_BO_CREATE_SYSTEM_BIT; else - flags |= XE_BO_CREATE_VRAM_IF_DGFX(gt); + flags |= XE_BO_CREATE_VRAM_IF_DGFX(tile); - vm->scratch_bo[id] = xe_bo_create_pin_map(xe, gt, vm, SZ_4K, + vm->scratch_bo[id] = xe_bo_create_pin_map(xe, tile, vm, SZ_4K, ttm_bo_type_kernel, flags); if (IS_ERR(vm->scratch_bo[id])) @@ -357,11 +353,11 @@ int xe_pt_create_scratch(struct xe_device *xe, struct xe_gt *gt, vm->scratch_bo[id]->size); for (i = 0; i < vm->pt_root[id]->level; i++) { - vm->scratch_pt[id][i] = xe_pt_create(vm, gt, i); + vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i); if (IS_ERR(vm->scratch_pt[id][i])) return PTR_ERR(vm->scratch_pt[id][i]); - xe_pt_populate_empty(gt, vm, vm->scratch_pt[id][i]); + xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]); } return 0; @@ -410,8 +406,8 @@ struct xe_pt_stage_bind_walk { /* Input parameters for the walk */ /** @vm: The vm we're building for. */ struct xe_vm *vm; - /** @gt: The gt we're building for. */ - struct xe_gt *gt; + /** @tile: The tile we're building for. */ + struct xe_tile *tile; /** @cache: Desired cache level for the ptes */ enum xe_cache_level cache; /** @default_pte: PTE flag only template. No address is associated */ @@ -679,7 +675,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, if (covers || !*child) { u64 flags = 0; - xe_child = xe_pt_create(xe_walk->vm, xe_walk->gt, level - 1); + xe_child = xe_pt_create(xe_walk->vm, xe_walk->tile, level - 1); if (IS_ERR(xe_child)) return PTR_ERR(xe_child); @@ -687,7 +683,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, round_down(addr, 1ull << walk->shifts[level])); if (!covers) - xe_pt_populate_empty(xe_walk->gt, xe_walk->vm, xe_child); + xe_pt_populate_empty(xe_walk->tile, xe_walk->vm, xe_child); *child = &xe_child->base; @@ -696,7 +692,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, * TODO: Suballocate the pt bo to avoid wasting a lot of * memory. */ - if (GRAPHICS_VERx100(gt_to_xe(xe_walk->gt)) >= 1250 && level == 1 && + if (GRAPHICS_VERx100(tile_to_xe(xe_walk->tile)) >= 1250 && level == 1 && covers && xe_pt_scan_64K(addr, next, xe_walk)) { walk->shifts = xe_compact_pt_shifts; flags |= XE_PDE_64K; @@ -719,7 +715,7 @@ static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = { /** * xe_pt_stage_bind() - Build a disconnected page-table tree for a given address * range. - * @gt: The gt we're building for. + * @tile: The tile we're building for. * @vma: The vma indicating the address range. * @entries: Storage for the update entries used for connecting the tree to * the main tree at commit time. @@ -735,7 +731,7 @@ static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = { * Return 0 on success, negative error code on error. */ static int -xe_pt_stage_bind(struct xe_gt *gt, struct xe_vma *vma, +xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, struct xe_vm_pgtable_update *entries, u32 *num_entries) { struct xe_bo *bo = vma->bo; @@ -748,14 +744,14 @@ xe_pt_stage_bind(struct xe_gt *gt, struct xe_vma *vma, .max_level = XE_PT_HIGHEST_LEVEL, }, .vm = vma->vm, - .gt = gt, + .tile = tile, .curs = &curs, .va_curs_start = vma->start, .pte_flags = vma->pte_flags, .wupd.entries = entries, .needs_64K = (vma->vm->flags & XE_VM_FLAGS_64K) && is_vram, }; - struct xe_pt *pt = vma->vm->pt_root[gt->info.id]; + struct xe_pt *pt = vma->vm->pt_root[tile->id]; int ret; if (is_vram) { @@ -849,8 +845,8 @@ struct xe_pt_zap_ptes_walk { struct xe_pt_walk base; /* Input parameters for the walk */ - /** @gt: The gt we're building for */ - struct xe_gt *gt; + /** @tile: The tile we're building for */ + struct xe_tile *tile; /* Output */ /** @needs_invalidate: Whether we need to invalidate TLB*/ @@ -878,7 +874,7 @@ static int xe_pt_zap_ptes_entry(struct xe_ptw *parent, pgoff_t offset, */ if (xe_pt_nonshared_offsets(addr, next, --level, walk, action, &offset, &end_offset)) { - xe_map_memset(gt_to_xe(xe_walk->gt), &xe_child->bo->vmap, + xe_map_memset(tile_to_xe(xe_walk->tile), &xe_child->bo->vmap, offset * sizeof(u64), 0, (end_offset - offset) * sizeof(u64)); xe_walk->needs_invalidate = true; @@ -893,7 +889,7 @@ static const struct xe_pt_walk_ops xe_pt_zap_ptes_ops = { /** * xe_pt_zap_ptes() - Zap (zero) gpu ptes of an address range - * @gt: The gt we're zapping for. + * @tile: The tile we're zapping for. * @vma: GPU VMA detailing address range. * * Eviction and Userptr invalidation needs to be able to zap the @@ -907,7 +903,7 @@ static const struct xe_pt_walk_ops xe_pt_zap_ptes_ops = { * Return: Whether ptes were actually updated and a TLB invalidation is * required. */ -bool xe_pt_zap_ptes(struct xe_gt *gt, struct xe_vma *vma) +bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma) { struct xe_pt_zap_ptes_walk xe_walk = { .base = { @@ -915,11 +911,11 @@ bool xe_pt_zap_ptes(struct xe_gt *gt, struct xe_vma *vma) .shifts = xe_normal_pt_shifts, .max_level = XE_PT_HIGHEST_LEVEL, }, - .gt = gt, + .tile = tile, }; - struct xe_pt *pt = vma->vm->pt_root[gt->info.id]; + struct xe_pt *pt = vma->vm->pt_root[tile->id]; - if (!(vma->gt_present & BIT(gt->info.id))) + if (!(vma->tile_present & BIT(tile->id))) return false; (void)xe_pt_walk_shared(&pt->base, pt->level, vma->start, vma->end + 1, @@ -929,7 +925,7 @@ bool xe_pt_zap_ptes(struct xe_gt *gt, struct xe_vma *vma) } static void -xe_vm_populate_pgtable(struct xe_migrate_pt_update *pt_update, struct xe_gt *gt, +xe_vm_populate_pgtable(struct xe_migrate_pt_update *pt_update, struct xe_tile *tile, struct iosys_map *map, void *data, u32 qword_ofs, u32 num_qwords, const struct xe_vm_pgtable_update *update) @@ -938,11 +934,9 @@ xe_vm_populate_pgtable(struct xe_migrate_pt_update *pt_update, struct xe_gt *gt, u64 *ptr = data; u32 i; - XE_BUG_ON(xe_gt_is_media_type(gt)); - for (i = 0; i < num_qwords; i++) { if (map) - xe_map_wr(gt_to_xe(gt), map, (qword_ofs + i) * + xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) * sizeof(u64), u64, ptes[i].pte); else ptr[i] = ptes[i].pte; @@ -1016,14 +1010,14 @@ static void xe_pt_commit_bind(struct xe_vma *vma, } static int -xe_pt_prepare_bind(struct xe_gt *gt, struct xe_vma *vma, +xe_pt_prepare_bind(struct xe_tile *tile, struct xe_vma *vma, struct xe_vm_pgtable_update *entries, u32 *num_entries, bool rebind) { int err; *num_entries = 0; - err = xe_pt_stage_bind(gt, vma, entries, num_entries); + err = xe_pt_stage_bind(tile, vma, entries, num_entries); if (!err) BUG_ON(!*num_entries); else /* abort! */ @@ -1250,7 +1244,7 @@ static int invalidation_fence_init(struct xe_gt *gt, /** * __xe_pt_bind_vma() - Build and connect a page-table tree for the vma * address range. - * @gt: The gt to bind for. + * @tile: The tile to bind for. * @vma: The vma to bind. * @e: The engine with which to do pipelined page-table updates. * @syncs: Entries to sync on before binding the built tree to the live vm tree. @@ -1270,7 +1264,7 @@ static int invalidation_fence_init(struct xe_gt *gt, * on success, an error pointer on error. */ struct dma_fence * -__xe_pt_bind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e, +__xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e, struct xe_sync_entry *syncs, u32 num_syncs, bool rebind) { @@ -1291,18 +1285,17 @@ __xe_pt_bind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e, bind_pt_update.locked = false; xe_bo_assert_held(vma->bo); xe_vm_assert_held(vm); - XE_BUG_ON(xe_gt_is_media_type(gt)); vm_dbg(&vma->vm->xe->drm, "Preparing bind, with range [%llx...%llx) engine %p.\n", vma->start, vma->end, e); - err = xe_pt_prepare_bind(gt, vma, entries, &num_entries, rebind); + err = xe_pt_prepare_bind(tile, vma, entries, &num_entries, rebind); if (err) goto err; XE_BUG_ON(num_entries > ARRAY_SIZE(entries)); - xe_vm_dbg_print_entries(gt_to_xe(gt), entries, num_entries); + xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries); if (rebind && !xe_vm_no_dma_fences(vma->vm)) { ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); @@ -1310,9 +1303,9 @@ __xe_pt_bind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e, return ERR_PTR(-ENOMEM); } - fence = xe_migrate_update_pgtables(gt->migrate, + fence = xe_migrate_update_pgtables(tile->primary_gt.migrate, vm, vma->bo, - e ? e : vm->eng[gt->info.id], + e ? e : vm->eng[tile->id], entries, num_entries, syncs, num_syncs, &bind_pt_update.base); @@ -1321,7 +1314,7 @@ __xe_pt_bind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e, /* TLB invalidation must be done before signaling rebind */ if (rebind && !xe_vm_no_dma_fences(vma->vm)) { - int err = invalidation_fence_init(gt, ifence, fence, + int err = invalidation_fence_init(&tile->primary_gt, ifence, fence, vma); if (err) { dma_fence_put(fence); @@ -1344,7 +1337,7 @@ __xe_pt_bind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e, bind_pt_update.locked ? &deferred : NULL); /* This vma is live (again?) now */ - vma->gt_present |= BIT(gt->info.id); + vma->tile_present |= BIT(tile->id); if (bind_pt_update.locked) { vma->userptr.initial_bind = true; @@ -1373,8 +1366,8 @@ struct xe_pt_stage_unbind_walk { struct xe_pt_walk base; /* Input parameters for the walk */ - /** @gt: The gt we're unbinding from. */ - struct xe_gt *gt; + /** @tile: The tile we're unbinding from. */ + struct xe_tile *tile; /** * @modified_start: Walk range start, modified to include any @@ -1479,7 +1472,7 @@ static const struct xe_pt_walk_ops xe_pt_stage_unbind_ops = { /** * xe_pt_stage_unbind() - Build page-table update structures for an unbind * operation - * @gt: The gt we're unbinding for. + * @tile: The tile we're unbinding for. * @vma: The vma we're unbinding. * @entries: Caller-provided storage for the update structures. * @@ -1490,7 +1483,7 @@ static const struct xe_pt_walk_ops xe_pt_stage_unbind_ops = { * * Return: The number of entries used. */ -static unsigned int xe_pt_stage_unbind(struct xe_gt *gt, struct xe_vma *vma, +static unsigned int xe_pt_stage_unbind(struct xe_tile *tile, struct xe_vma *vma, struct xe_vm_pgtable_update *entries) { struct xe_pt_stage_unbind_walk xe_walk = { @@ -1499,12 +1492,12 @@ static unsigned int xe_pt_stage_unbind(struct xe_gt *gt, struct xe_vma *vma, .shifts = xe_normal_pt_shifts, .max_level = XE_PT_HIGHEST_LEVEL, }, - .gt = gt, + .tile = tile, .modified_start = vma->start, .modified_end = vma->end + 1, .wupd.entries = entries, }; - struct xe_pt *pt = vma->vm->pt_root[gt->info.id]; + struct xe_pt *pt = vma->vm->pt_root[tile->id]; (void)xe_pt_walk_shared(&pt->base, pt->level, vma->start, vma->end + 1, &xe_walk.base); @@ -1514,19 +1507,17 @@ static unsigned int xe_pt_stage_unbind(struct xe_gt *gt, struct xe_vma *vma, static void xe_migrate_clear_pgtable_callback(struct xe_migrate_pt_update *pt_update, - struct xe_gt *gt, struct iosys_map *map, + struct xe_tile *tile, struct iosys_map *map, void *ptr, u32 qword_ofs, u32 num_qwords, const struct xe_vm_pgtable_update *update) { struct xe_vma *vma = pt_update->vma; - u64 empty = __xe_pt_empty_pte(gt, vma->vm, update->pt->level); + u64 empty = __xe_pt_empty_pte(tile, vma->vm, update->pt->level); int i; - XE_BUG_ON(xe_gt_is_media_type(gt)); - if (map && map->is_iomem) for (i = 0; i < num_qwords; ++i) - xe_map_wr(gt_to_xe(gt), map, (qword_ofs + i) * + xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) * sizeof(u64), u64, empty); else if (map) memset64(map->vaddr + qword_ofs * sizeof(u64), empty, @@ -1577,7 +1568,7 @@ static const struct xe_migrate_pt_update_ops userptr_unbind_ops = { /** * __xe_pt_unbind_vma() - Disconnect and free a page-table tree for the vma * address range. - * @gt: The gt to unbind for. + * @tile: The tile to unbind for. * @vma: The vma to unbind. * @e: The engine with which to do pipelined page-table updates. * @syncs: Entries to sync on before disconnecting the tree to be destroyed. @@ -1595,7 +1586,7 @@ static const struct xe_migrate_pt_update_ops userptr_unbind_ops = { * on success, an error pointer on error. */ struct dma_fence * -__xe_pt_unbind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e, +__xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e, struct xe_sync_entry *syncs, u32 num_syncs) { struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1]; @@ -1614,16 +1605,15 @@ __xe_pt_unbind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e, xe_bo_assert_held(vma->bo); xe_vm_assert_held(vm); - XE_BUG_ON(xe_gt_is_media_type(gt)); vm_dbg(&vma->vm->xe->drm, "Preparing unbind, with range [%llx...%llx) engine %p.\n", vma->start, vma->end, e); - num_entries = xe_pt_stage_unbind(gt, vma, entries); + num_entries = xe_pt_stage_unbind(tile, vma, entries); XE_BUG_ON(num_entries > ARRAY_SIZE(entries)); - xe_vm_dbg_print_entries(gt_to_xe(gt), entries, num_entries); + xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries); ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); if (!ifence) @@ -1634,9 +1624,9 @@ __xe_pt_unbind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e, * clear again here. The eviction may have updated pagetables at a * lower level, because it needs to be more conservative. */ - fence = xe_migrate_update_pgtables(gt->migrate, + fence = xe_migrate_update_pgtables(tile->primary_gt.migrate, vm, NULL, e ? e : - vm->eng[gt->info.id], + vm->eng[tile->id], entries, num_entries, syncs, num_syncs, &unbind_pt_update.base); @@ -1644,7 +1634,7 @@ __xe_pt_unbind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e, int err; /* TLB invalidation must be done before signaling unbind */ - err = invalidation_fence_init(gt, ifence, fence, vma); + err = invalidation_fence_init(&tile->primary_gt, ifence, fence, vma); if (err) { dma_fence_put(fence); kfree(ifence); @@ -1662,18 +1652,18 @@ __xe_pt_unbind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e, DMA_RESV_USAGE_BOOKKEEP); xe_pt_commit_unbind(vma, entries, num_entries, unbind_pt_update.locked ? &deferred : NULL); - vma->gt_present &= ~BIT(gt->info.id); + vma->tile_present &= ~BIT(tile->id); } else { kfree(ifence); } - if (!vma->gt_present) + if (!vma->tile_present) list_del_init(&vma->rebind_link); if (unbind_pt_update.locked) { XE_WARN_ON(!xe_vma_is_userptr(vma)); - if (!vma->gt_present) { + if (!vma->tile_present) { spin_lock(&vm->userptr.invalidated_lock); list_del_init(&vma->userptr.invalidate_link); spin_unlock(&vm->userptr.invalidated_lock); diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h index 1152043e5c63..10f334b9c004 100644 --- a/drivers/gpu/drm/xe/xe_pt.h +++ b/drivers/gpu/drm/xe/xe_pt.h @@ -13,8 +13,8 @@ struct dma_fence; struct xe_bo; struct xe_device; struct xe_engine; -struct xe_gt; struct xe_sync_entry; +struct xe_tile; struct xe_vm; struct xe_vma; @@ -23,27 +23,27 @@ struct xe_vma; unsigned int xe_pt_shift(unsigned int level); -struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_gt *gt, +struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, unsigned int level); -int xe_pt_create_scratch(struct xe_device *xe, struct xe_gt *gt, +int xe_pt_create_scratch(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm); -void xe_pt_populate_empty(struct xe_gt *gt, struct xe_vm *vm, +void xe_pt_populate_empty(struct xe_tile *tile, struct xe_vm *vm, struct xe_pt *pt); void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred); struct dma_fence * -__xe_pt_bind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e, +__xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e, struct xe_sync_entry *syncs, u32 num_syncs, bool rebind); struct dma_fence * -__xe_pt_unbind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e, +__xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e, struct xe_sync_entry *syncs, u32 num_syncs); -bool xe_pt_zap_ptes(struct xe_gt *gt, struct xe_vma *vma); +bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma); u64 gen8_pde_encode(struct xe_bo *bo, u64 bo_offset, const enum xe_cache_level level); diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c index c16f7c14ff52..fee71080bd31 100644 --- a/drivers/gpu/drm/xe/xe_sa.c +++ b/drivers/gpu/drm/xe/xe_sa.c @@ -11,7 +11,6 @@ #include "xe_bo.h" #include "xe_device.h" -#include "xe_gt.h" #include "xe_map.h" static void xe_sa_bo_manager_fini(struct drm_device *drm, void *arg) @@ -33,14 +32,14 @@ static void xe_sa_bo_manager_fini(struct drm_device *drm, void *arg) sa_manager->bo = NULL; } -struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_gt *gt, u32 size, u32 align) +struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 align) { - struct xe_device *xe = gt_to_xe(gt); + struct xe_device *xe = tile_to_xe(tile); u32 managed_size = size - SZ_4K; struct xe_bo *bo; int ret; - struct xe_sa_manager *sa_manager = drmm_kzalloc(>_to_xe(gt)->drm, + struct xe_sa_manager *sa_manager = drmm_kzalloc(&tile_to_xe(tile)->drm, sizeof(*sa_manager), GFP_KERNEL); if (!sa_manager) @@ -48,8 +47,8 @@ struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_gt *gt, u32 size, u32 alig sa_manager->bo = NULL; - bo = xe_bo_create_pin_map(xe, gt, NULL, size, ttm_bo_type_kernel, - XE_BO_CREATE_VRAM_IF_DGFX(gt) | + bo = xe_bo_create_pin_map(xe, tile, NULL, size, ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(tile) | XE_BO_CREATE_GGTT_BIT); if (IS_ERR(bo)) { drm_err(&xe->drm, "failed to allocate bo for sa manager: %ld\n", @@ -90,7 +89,7 @@ struct drm_suballoc *xe_sa_bo_new(struct xe_sa_manager *sa_manager, void xe_sa_bo_flush_write(struct drm_suballoc *sa_bo) { struct xe_sa_manager *sa_manager = to_xe_sa_manager(sa_bo->manager); - struct xe_device *xe = gt_to_xe(sa_manager->bo->gt); + struct xe_device *xe = tile_to_xe(sa_manager->bo->tile); if (!sa_manager->bo->vmap.is_iomem) return; diff --git a/drivers/gpu/drm/xe/xe_sa.h b/drivers/gpu/drm/xe/xe_sa.h index 3063fb34c720..4e96483057d7 100644 --- a/drivers/gpu/drm/xe/xe_sa.h +++ b/drivers/gpu/drm/xe/xe_sa.h @@ -9,9 +9,9 @@ struct dma_fence; struct xe_bo; -struct xe_gt; +struct xe_tile; -struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_gt *gt, u32 size, u32 align); +struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 align); struct drm_suballoc *xe_sa_bo_new(struct xe_sa_manager *sa_manager, u32 size); diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c index 5530a6b6ef31..59d3e25ea550 100644 --- a/drivers/gpu/drm/xe/xe_tile.c +++ b/drivers/gpu/drm/xe/xe_tile.c @@ -7,6 +7,7 @@ #include "xe_device.h" #include "xe_ggtt.h" +#include "xe_sa.h" #include "xe_tile.h" #include "xe_ttm_vram_mgr.h" @@ -76,6 +77,12 @@ int xe_tile_init_noalloc(struct xe_tile *tile) goto err_mem_access; err = xe_ggtt_init_noalloc(tile->mem.ggtt); + if (err) + goto err_mem_access; + + tile->mem.kernel_bb_pool = xe_sa_bo_manager_init(tile, SZ_1M, 16); + if (IS_ERR(tile->mem.kernel_bb_pool)) + err = PTR_ERR(tile->mem.kernel_bb_pool); err_mem_access: xe_device_mem_access_put(tile_to_xe(tile)); diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 5703213bdf1b..2b9b9b4a6711 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -322,6 +322,7 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw) { struct xe_device *xe = uc_fw_to_xe(uc_fw); struct xe_gt *gt = uc_fw_to_gt(uc_fw); + struct xe_tile *tile = gt_to_tile(gt); struct device *dev = xe->drm.dev; const struct firmware *fw = NULL; struct uc_css_header *css; @@ -411,9 +412,9 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw) if (uc_fw->type == XE_UC_FW_TYPE_GUC) guc_read_css_info(uc_fw, css); - obj = xe_bo_create_from_data(xe, gt, fw->data, fw->size, + obj = xe_bo_create_from_data(xe, tile, fw->data, fw->size, ttm_bo_type_kernel, - XE_BO_CREATE_VRAM_IF_DGFX(gt) | + XE_BO_CREATE_VRAM_IF_DGFX(tile) | XE_BO_CREATE_GGTT_BIT); if (IS_ERR(obj)) { drm_notice(&xe->drm, "%s firmware %s: failed to create / populate bo", diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 798cba1bda6b..ecfff4ffd00e 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -465,7 +465,7 @@ int xe_vm_lock_dma_resv(struct xe_vm *vm, struct ww_acquire_ctx *ww, xe_bo_assert_held(vma->bo); list_del_init(&vma->notifier.rebind_link); - if (vma->gt_present && !vma->destroyed) + if (vma->tile_present && !vma->destroyed) list_move_tail(&vma->rebind_link, &vm->rebind_list); } spin_unlock(&vm->notifier.list_lock); @@ -703,7 +703,7 @@ static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, * Tell exec and rebind worker they need to repin and rebind this * userptr. */ - if (!xe_vm_in_fault_mode(vm) && !vma->destroyed && vma->gt_present) { + if (!xe_vm_in_fault_mode(vm) && !vma->destroyed && vma->tile_present) { spin_lock(&vm->userptr.invalidated_lock); list_move_tail(&vma->userptr.invalidate_link, &vm->userptr.invalidated); @@ -821,7 +821,7 @@ struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) xe_vm_assert_held(vm); list_for_each_entry_safe(vma, next, &vm->rebind_list, rebind_link) { - XE_WARN_ON(!vma->gt_present); + XE_WARN_ON(!vma->tile_present); list_del_init(&vma->rebind_link); dma_fence_put(fence); @@ -842,10 +842,10 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, u64 bo_offset_or_userptr, u64 start, u64 end, bool read_only, - u64 gt_mask) + u64 tile_mask) { struct xe_vma *vma; - struct xe_gt *gt; + struct xe_tile *tile; u8 id; XE_BUG_ON(start >= end); @@ -870,12 +870,11 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, if (read_only) vma->pte_flags = XE_PTE_READ_ONLY; - if (gt_mask) { - vma->gt_mask = gt_mask; + if (tile_mask) { + vma->tile_mask = tile_mask; } else { - for_each_gt(gt, vm->xe, id) - if (!xe_gt_is_media_type(gt)) - vma->gt_mask |= 0x1 << id; + for_each_tile(tile, vm->xe, id) + vma->tile_mask |= 0x1 << id; } if (vm->xe->info.platform == XE_PVC) @@ -1162,8 +1161,8 @@ static void vm_destroy_work_func(struct work_struct *w); struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) { struct xe_vm *vm; - int err, i = 0, number_gts = 0; - struct xe_gt *gt; + int err, i = 0, number_tiles = 0; + struct xe_tile *tile; u8 id; vm = kzalloc(sizeof(*vm), GFP_KERNEL); @@ -1215,15 +1214,12 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) vm->flags |= XE_VM_FLAGS_64K; - for_each_gt(gt, xe, id) { - if (xe_gt_is_media_type(gt)) - continue; - + for_each_tile(tile, xe, id) { if (flags & XE_VM_FLAG_MIGRATION && - gt->info.id != XE_VM_FLAG_GT_ID(flags)) + tile->id != XE_VM_FLAG_GT_ID(flags)) continue; - vm->pt_root[id] = xe_pt_create(vm, gt, xe->info.vm_max_level); + vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level); if (IS_ERR(vm->pt_root[id])) { err = PTR_ERR(vm->pt_root[id]); vm->pt_root[id] = NULL; @@ -1232,11 +1228,11 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) } if (flags & XE_VM_FLAG_SCRATCH_PAGE) { - for_each_gt(gt, xe, id) { + for_each_tile(tile, xe, id) { if (!vm->pt_root[id]) continue; - err = xe_pt_create_scratch(xe, gt, vm); + err = xe_pt_create_scratch(xe, tile, vm); if (err) goto err_scratch_pt; } @@ -1253,17 +1249,18 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) } /* Fill pt_root after allocating scratch tables */ - for_each_gt(gt, xe, id) { + for_each_tile(tile, xe, id) { if (!vm->pt_root[id]) continue; - xe_pt_populate_empty(gt, vm, vm->pt_root[id]); + xe_pt_populate_empty(tile, vm, vm->pt_root[id]); } dma_resv_unlock(&vm->resv); /* Kernel migration VM shouldn't have a circular loop.. */ if (!(flags & XE_VM_FLAG_MIGRATION)) { - for_each_gt(gt, xe, id) { + for_each_tile(tile, xe, id) { + struct xe_gt *gt = &tile->primary_gt; struct xe_vm *migrate_vm; struct xe_engine *eng; @@ -1280,11 +1277,11 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) return ERR_CAST(eng); } vm->eng[id] = eng; - number_gts++; + number_tiles++; } } - if (number_gts > 1) + if (number_tiles > 1) vm->composite_fence_ctx = dma_fence_context_alloc(1); mutex_lock(&xe->usm.lock); @@ -1299,7 +1296,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) return vm; err_scratch_pt: - for_each_gt(gt, xe, id) { + for_each_tile(tile, xe, id) { if (!vm->pt_root[id]) continue; @@ -1312,7 +1309,7 @@ err_scratch_pt: xe_bo_put(vm->scratch_bo[id]); } err_destroy_root: - for_each_gt(gt, xe, id) { + for_each_tile(tile, xe, id) { if (vm->pt_root[id]) xe_pt_destroy(vm->pt_root[id], vm->flags, NULL); } @@ -1369,7 +1366,7 @@ void xe_vm_close_and_put(struct xe_vm *vm) struct rb_root contested = RB_ROOT; struct ww_acquire_ctx ww; struct xe_device *xe = vm->xe; - struct xe_gt *gt; + struct xe_tile *tile; u8 id; XE_BUG_ON(vm->preempt.num_engines); @@ -1380,7 +1377,7 @@ void xe_vm_close_and_put(struct xe_vm *vm) if (xe_vm_in_compute_mode(vm)) flush_work(&vm->preempt.rebind_work); - for_each_gt(gt, xe, id) { + for_each_tile(tile, xe, id) { if (vm->eng[id]) { xe_engine_kill(vm->eng[id]); xe_engine_put(vm->eng[id]); @@ -1417,7 +1414,7 @@ void xe_vm_close_and_put(struct xe_vm *vm) * install a fence to resv. Hence it's safe to * destroy the pagetables immediately. */ - for_each_gt(gt, xe, id) { + for_each_tile(tile, xe, id) { if (vm->scratch_bo[id]) { u32 i; @@ -1467,7 +1464,7 @@ static void vm_destroy_work_func(struct work_struct *w) container_of(w, struct xe_vm, destroy_work); struct ww_acquire_ctx ww; struct xe_device *xe = vm->xe; - struct xe_gt *gt; + struct xe_tile *tile; u8 id; void *lookup; @@ -1492,7 +1489,7 @@ static void vm_destroy_work_func(struct work_struct *w) * can be moved to xe_vm_close_and_put. */ xe_vm_lock(vm, &ww, 0, false); - for_each_gt(gt, xe, id) { + for_each_tile(tile, xe, id) { if (vm->pt_root[id]) { xe_pt_destroy(vm->pt_root[id], vm->flags, NULL); vm->pt_root[id] = NULL; @@ -1528,11 +1525,9 @@ struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id) return vm; } -u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_gt *full_gt) +u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile) { - XE_BUG_ON(xe_gt_is_media_type(full_gt)); - - return gen8_pde_encode(vm->pt_root[full_gt->info.id]->bo, 0, + return gen8_pde_encode(vm->pt_root[tile->id]->bo, 0, XE_CACHE_WB); } @@ -1540,32 +1535,30 @@ static struct dma_fence * xe_vm_unbind_vma(struct xe_vma *vma, struct xe_engine *e, struct xe_sync_entry *syncs, u32 num_syncs) { - struct xe_gt *gt; + struct xe_tile *tile; struct dma_fence *fence = NULL; struct dma_fence **fences = NULL; struct dma_fence_array *cf = NULL; struct xe_vm *vm = vma->vm; int cur_fence = 0, i; - int number_gts = hweight_long(vma->gt_present); + int number_tiles = hweight_long(vma->tile_present); int err; u8 id; trace_xe_vma_unbind(vma); - if (number_gts > 1) { - fences = kmalloc_array(number_gts, sizeof(*fences), + if (number_tiles > 1) { + fences = kmalloc_array(number_tiles, sizeof(*fences), GFP_KERNEL); if (!fences) return ERR_PTR(-ENOMEM); } - for_each_gt(gt, vm->xe, id) { - if (!(vma->gt_present & BIT(id))) + for_each_tile(tile, vm->xe, id) { + if (!(vma->tile_present & BIT(id))) goto next; - XE_BUG_ON(xe_gt_is_media_type(gt)); - - fence = __xe_pt_unbind_vma(gt, vma, e, syncs, num_syncs); + fence = __xe_pt_unbind_vma(tile, vma, e, syncs, num_syncs); if (IS_ERR(fence)) { err = PTR_ERR(fence); goto err_fences; @@ -1580,7 +1573,7 @@ next: } if (fences) { - cf = dma_fence_array_create(number_gts, fences, + cf = dma_fence_array_create(number_tiles, fences, vm->composite_fence_ctx, vm->composite_fence_seqno++, false); @@ -1612,32 +1605,31 @@ static struct dma_fence * xe_vm_bind_vma(struct xe_vma *vma, struct xe_engine *e, struct xe_sync_entry *syncs, u32 num_syncs) { - struct xe_gt *gt; + struct xe_tile *tile; struct dma_fence *fence; struct dma_fence **fences = NULL; struct dma_fence_array *cf = NULL; struct xe_vm *vm = vma->vm; int cur_fence = 0, i; - int number_gts = hweight_long(vma->gt_mask); + int number_tiles = hweight_long(vma->tile_mask); int err; u8 id; trace_xe_vma_bind(vma); - if (number_gts > 1) { - fences = kmalloc_array(number_gts, sizeof(*fences), + if (number_tiles > 1) { + fences = kmalloc_array(number_tiles, sizeof(*fences), GFP_KERNEL); if (!fences) return ERR_PTR(-ENOMEM); } - for_each_gt(gt, vm->xe, id) { - if (!(vma->gt_mask & BIT(id))) + for_each_tile(tile, vm->xe, id) { + if (!(vma->tile_mask & BIT(id))) goto next; - XE_BUG_ON(xe_gt_is_media_type(gt)); - fence = __xe_pt_bind_vma(gt, vma, e, syncs, num_syncs, - vma->gt_present & BIT(id)); + fence = __xe_pt_bind_vma(tile, vma, e, syncs, num_syncs, + vma->tile_present & BIT(id)); if (IS_ERR(fence)) { err = PTR_ERR(fence); goto err_fences; @@ -1652,7 +1644,7 @@ next: } if (fences) { - cf = dma_fence_array_create(number_gts, fences, + cf = dma_fence_array_create(number_tiles, fences, vm->composite_fence_ctx, vm->composite_fence_seqno++, false); @@ -2047,7 +2039,7 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma, return err; } - if (vma->gt_mask != (vma->gt_present & ~vma->usm.gt_invalidated)) { + if (vma->tile_mask != (vma->tile_present & ~vma->usm.tile_invalidated)) { return xe_vm_bind(vm, vma, e, vma->bo, syncs, num_syncs, afence); } else { @@ -2649,7 +2641,7 @@ static struct xe_vma *vm_unbind_lookup_vmas(struct xe_vm *vm, first->start, lookup->start - 1, (first->pte_flags & XE_PTE_READ_ONLY), - first->gt_mask); + first->tile_mask); if (first->bo) xe_bo_unlock(first->bo, &ww); if (!new_first) { @@ -2680,7 +2672,7 @@ static struct xe_vma *vm_unbind_lookup_vmas(struct xe_vm *vm, last->start + chunk, last->end, (last->pte_flags & XE_PTE_READ_ONLY), - last->gt_mask); + last->tile_mask); if (last->bo) xe_bo_unlock(last->bo, &ww); if (!new_last) { @@ -2816,7 +2808,7 @@ static struct xe_vma *vm_bind_ioctl_lookup_vma(struct xe_vm *vm, struct xe_bo *bo, u64 bo_offset_or_userptr, u64 addr, u64 range, u32 op, - u64 gt_mask, u32 region) + u64 tile_mask, u32 region) { struct ww_acquire_ctx ww; struct xe_vma *vma, lookup; @@ -2837,7 +2829,7 @@ static struct xe_vma *vm_bind_ioctl_lookup_vma(struct xe_vm *vm, vma = xe_vma_create(vm, bo, bo_offset_or_userptr, addr, addr + range - 1, op & XE_VM_BIND_FLAG_READONLY, - gt_mask); + tile_mask); xe_bo_unlock(bo, &ww); if (!vma) return ERR_PTR(-ENOMEM); @@ -2877,7 +2869,7 @@ static struct xe_vma *vm_bind_ioctl_lookup_vma(struct xe_vm *vm, vma = xe_vma_create(vm, NULL, bo_offset_or_userptr, addr, addr + range - 1, op & XE_VM_BIND_FLAG_READONLY, - gt_mask); + tile_mask); if (!vma) return ERR_PTR(-ENOMEM); @@ -3114,11 +3106,11 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) goto put_engine; } - if (bind_ops[i].gt_mask) { - u64 valid_gts = BIT(xe->info.tile_count) - 1; + if (bind_ops[i].tile_mask) { + u64 valid_tiles = BIT(xe->info.tile_count) - 1; - if (XE_IOCTL_ERR(xe, bind_ops[i].gt_mask & - ~valid_gts)) { + if (XE_IOCTL_ERR(xe, bind_ops[i].tile_mask & + ~valid_tiles)) { err = -EINVAL; goto put_engine; } @@ -3209,11 +3201,11 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) u64 addr = bind_ops[i].addr; u32 op = bind_ops[i].op; u64 obj_offset = bind_ops[i].obj_offset; - u64 gt_mask = bind_ops[i].gt_mask; + u64 tile_mask = bind_ops[i].tile_mask; u32 region = bind_ops[i].region; vmas[i] = vm_bind_ioctl_lookup_vma(vm, bos[i], obj_offset, - addr, range, op, gt_mask, + addr, range, op, tile_mask, region); if (IS_ERR(vmas[i])) { err = PTR_ERR(vmas[i]); @@ -3387,8 +3379,8 @@ void xe_vm_unlock(struct xe_vm *vm, struct ww_acquire_ctx *ww) int xe_vm_invalidate_vma(struct xe_vma *vma) { struct xe_device *xe = vma->vm->xe; - struct xe_gt *gt; - u32 gt_needs_invalidate = 0; + struct xe_tile *tile; + u32 tile_needs_invalidate = 0; int seqno[XE_MAX_TILES_PER_DEVICE]; u8 id; int ret; @@ -3410,25 +3402,29 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) } } - for_each_gt(gt, xe, id) { - if (xe_pt_zap_ptes(gt, vma)) { - gt_needs_invalidate |= BIT(id); + for_each_tile(tile, xe, id) { + if (xe_pt_zap_ptes(tile, vma)) { + tile_needs_invalidate |= BIT(id); xe_device_wmb(xe); - seqno[id] = xe_gt_tlb_invalidation_vma(gt, NULL, vma); + /* + * FIXME: We potentially need to invalidate multiple + * GTs within the tile + */ + seqno[id] = xe_gt_tlb_invalidation_vma(&tile->primary_gt, NULL, vma); if (seqno[id] < 0) return seqno[id]; } } - for_each_gt(gt, xe, id) { - if (gt_needs_invalidate & BIT(id)) { - ret = xe_gt_tlb_invalidation_wait(gt, seqno[id]); + for_each_tile(tile, xe, id) { + if (tile_needs_invalidate & BIT(id)) { + ret = xe_gt_tlb_invalidation_wait(&tile->primary_gt, seqno[id]); if (ret < 0) return ret; } } - vma->usm.gt_invalidated = vma->gt_mask; + vma->usm.tile_invalidated = vma->tile_mask; return 0; } diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 748dc16ebed9..372f26153209 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -54,7 +54,7 @@ xe_vm_find_overlapping_vma(struct xe_vm *vm, const struct xe_vma *vma); #define xe_vm_assert_held(vm) dma_resv_assert_held(&(vm)->resv) -u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_gt *full_gt); +u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile); int xe_vm_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file); diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 203ba9d946b8..c45c5daeeaa7 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -37,17 +37,17 @@ struct xe_vma { /** @bo_offset: offset into BO if not a userptr, unused for userptr */ u64 bo_offset; - /** @gt_mask: GT mask of where to create binding for this VMA */ - u64 gt_mask; + /** @tile_mask: Tile mask of where to create binding for this VMA */ + u64 tile_mask; /** - * @gt_present: GT mask of binding are present for this VMA. + * @tile_present: GT mask of binding are present for this VMA. * protected by vm->lock, vm->resv and for userptrs, * vm->userptr.notifier_lock for writing. Needs either for reading, * but if reading is done under the vm->lock only, it needs to be held * in write mode. */ - u64 gt_present; + u64 tile_present; /** * @destroyed: VMA is destroyed, in the sense that it shouldn't be @@ -132,8 +132,8 @@ struct xe_vma { /** @usm: unified shared memory state */ struct { - /** @gt_invalidated: VMA has been invalidated */ - u64 gt_invalidated; + /** @tile_invalidated: VMA has been invalidated */ + u64 tile_invalidated; } usm; struct { diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 34aff9e15fe6..edd29e7f39eb 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -407,10 +407,10 @@ struct drm_xe_vm_bind_op { __u64 addr; /** - * @gt_mask: Mask for which GTs to create binds for, 0 == All GTs, + * @tile_mask: Mask for which tiles to create binds for, 0 == All tiles, * only applies to creating new VMAs */ - __u64 gt_mask; + __u64 tile_mask; /** @op: Operation to perform (lower 16 bits) and flags (upper 16 bits) */ __u32 op; -- cgit v1.2.3 From 08dea7674533cfd49764bcd09ba84de7143361ab Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 1 Jun 2023 14:52:27 -0700 Subject: drm/xe: Move migration from GT to tile Migration primarily focuses on the memory associated with a tile, so it makes more sense to track this at the tile level (especially since the driver was already skipping migration operations on media GTs). Note that the blitter engine used to perform the migration always lives in the tile's primary GT today. In theory that could change if media GTs ever start including blitter engines in the future, but we can extend the design if/when that happens in the future. v2: - Fix kunit test build - Kerneldoc parameter name update v3: - Removed leftover prototype for removed function. (Gustavo) - Remove unrelated / unwanted error handling change. (Gustavo) Cc: Gustavo Sousa Reviewed-by: Lucas De Marchi Acked-by: Gustavo Sousa Link: https://lore.kernel.org/r/20230601215244.678611-15-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_bo.c | 4 +-- drivers/gpu/drm/xe/tests/xe_migrate.c | 25 +++++++------- drivers/gpu/drm/xe/xe_bo.c | 6 ++-- drivers/gpu/drm/xe/xe_bo_evict.c | 14 ++++---- drivers/gpu/drm/xe/xe_device_types.h | 3 ++ drivers/gpu/drm/xe/xe_engine.c | 2 +- drivers/gpu/drm/xe/xe_gt.c | 27 ++++------------ drivers/gpu/drm/xe/xe_gt.h | 3 -- drivers/gpu/drm/xe/xe_gt_pagefault.c | 2 +- drivers/gpu/drm/xe/xe_gt_types.h | 3 -- drivers/gpu/drm/xe/xe_migrate.c | 61 +++++++++++++++++------------------ drivers/gpu/drm/xe/xe_migrate.h | 4 +-- drivers/gpu/drm/xe/xe_pt.c | 4 +-- drivers/gpu/drm/xe/xe_tile.c | 6 ++++ drivers/gpu/drm/xe/xe_tile.h | 2 ++ drivers/gpu/drm/xe/xe_vm.c | 2 +- drivers/gpu/drm/xe/xe_vm_types.h | 2 +- 17 files changed, 79 insertions(+), 91 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index f933e5df6c12..5309204d8d1b 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -35,7 +35,7 @@ static int ccs_test_migrate(struct xe_gt *gt, struct xe_bo *bo, /* Optionally clear bo *and* CCS data in VRAM. */ if (clear) { - fence = xe_migrate_clear(gt->migrate, bo, bo->ttm.resource); + fence = xe_migrate_clear(gt_to_tile(gt)->migrate, bo, bo->ttm.resource); if (IS_ERR(fence)) { KUNIT_FAIL(test, "Failed to submit bo clear.\n"); return PTR_ERR(fence); @@ -174,7 +174,7 @@ static int evict_test_run_gt(struct xe_device *xe, struct xe_gt *gt, struct kuni struct xe_bo *bo, *external; unsigned int bo_flags = XE_BO_CREATE_USER_BIT | XE_BO_CREATE_VRAM_IF_DGFX(gt_to_tile(gt)); - struct xe_vm *vm = xe_migrate_get_vm(xe_device_get_root_tile(xe)->primary_gt.migrate); + struct xe_vm *vm = xe_migrate_get_vm(xe_device_get_root_tile(xe)->migrate); struct ww_acquire_ctx ww; int err, i; diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index 85ef9bacfe52..d9f1f31c92d2 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -101,14 +101,14 @@ static const struct xe_migrate_pt_update_ops sanity_ops = { static void test_copy(struct xe_migrate *m, struct xe_bo *bo, struct kunit *test) { - struct xe_device *xe = gt_to_xe(m->gt); + struct xe_device *xe = tile_to_xe(m->tile); u64 retval, expected = 0; bool big = bo->size >= SZ_2M; struct dma_fence *fence; const char *str = big ? "Copying big bo" : "Copying small bo"; int err; - struct xe_bo *sysmem = xe_bo_create_locked(xe, gt_to_tile(m->gt), NULL, + struct xe_bo *sysmem = xe_bo_create_locked(xe, m->tile, NULL, bo->size, ttm_bo_type_kernel, XE_BO_CREATE_SYSTEM_BIT); @@ -189,7 +189,7 @@ out_unlock: static void test_pt_update(struct xe_migrate *m, struct xe_bo *pt, struct kunit *test, bool force_gpu) { - struct xe_device *xe = gt_to_xe(m->gt); + struct xe_device *xe = tile_to_xe(m->tile); struct dma_fence *fence; u64 retval, expected; ktime_t then, now; @@ -239,16 +239,15 @@ static void test_pt_update(struct xe_migrate *m, struct xe_bo *pt, static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) { - struct xe_gt *gt = m->gt; - struct xe_tile *tile = gt_to_tile(m->gt); - struct xe_device *xe = gt_to_xe(gt); + struct xe_tile *tile = m->tile; + struct xe_device *xe = tile_to_xe(tile); struct xe_bo *pt, *bo = m->pt_bo, *big, *tiny; struct xe_res_cursor src_it; struct dma_fence *fence; u64 retval, expected; struct xe_bb *bb; int err; - u8 id = gt->info.id; + u8 id = tile->id; err = xe_bo_vmap(bo); if (err) { @@ -287,7 +286,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) goto free_pt; } - bb = xe_bb_new(gt, 32, xe->info.supports_usm); + bb = xe_bb_new(&tile->primary_gt, 32, xe->info.supports_usm); if (IS_ERR(bb)) { KUNIT_FAIL(test, "Failed to create batchbuffer: %li\n", PTR_ERR(bb)); @@ -324,7 +323,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) xe_map_wr(xe, &pt->vmap, 0, u32, 0xdeaddead); expected = 0; - emit_clear(m->gt, bb, xe_migrate_vm_addr(NUM_KERNEL_PDE - 1, 0), 4, 4, + emit_clear(&tile->primary_gt, bb, xe_migrate_vm_addr(NUM_KERNEL_PDE - 1, 0), 4, 4, IS_DGFX(xe)); run_sanity_job(m, xe, bb, 1, "Writing to our newly mapped pagetable", test); @@ -391,14 +390,14 @@ vunmap: static int migrate_test_run_device(struct xe_device *xe) { struct kunit *test = xe_cur_kunit(); - struct xe_gt *gt; + struct xe_tile *tile; int id; - for_each_gt(gt, xe, id) { - struct xe_migrate *m = gt->migrate; + for_each_tile(tile, xe, id) { + struct xe_migrate *m = tile->migrate; struct ww_acquire_ctx ww; - kunit_info(test, "Testing gt id %d.\n", id); + kunit_info(test, "Testing tile id %d.\n", id); xe_vm_lock(m->eng->vm, &ww, 0, true); xe_device_mem_access_get(xe); xe_migrate_sanity_test(m, test); diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 7c59487af86a..8bac1717ca78 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -643,7 +643,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, tile = mem_type_to_tile(xe, old_mem->mem_type); XE_BUG_ON(!tile); - XE_BUG_ON(!tile->primary_gt.migrate); + XE_BUG_ON(!tile->migrate); trace_xe_bo_move(bo); xe_device_mem_access_get(xe); @@ -681,9 +681,9 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, } } else { if (move_lacks_source) - fence = xe_migrate_clear(tile->primary_gt.migrate, bo, new_mem); + fence = xe_migrate_clear(tile->migrate, bo, new_mem); else - fence = xe_migrate_copy(tile->primary_gt.migrate, + fence = xe_migrate_copy(tile->migrate, bo, bo, old_mem, new_mem); if (IS_ERR(fence)) { ret = PTR_ERR(fence); diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c index 9226195bd560..f559a7f3eb3e 100644 --- a/drivers/gpu/drm/xe/xe_bo_evict.c +++ b/drivers/gpu/drm/xe/xe_bo_evict.c @@ -8,7 +8,7 @@ #include "xe_bo.h" #include "xe_device.h" #include "xe_ggtt.h" -#include "xe_gt.h" +#include "xe_tile.h" /** * xe_bo_evict_all - evict all BOs from VRAM @@ -29,7 +29,7 @@ int xe_bo_evict_all(struct xe_device *xe) struct ttm_device *bdev = &xe->ttm; struct ww_acquire_ctx ww; struct xe_bo *bo; - struct xe_gt *gt; + struct xe_tile *tile; struct list_head still_in_list; u32 mem_type; u8 id; @@ -83,8 +83,8 @@ int xe_bo_evict_all(struct xe_device *xe) * Wait for all user BO to be evicted as those evictions depend on the * memory moved below. */ - for_each_gt(gt, xe, id) - xe_gt_migrate_wait(gt); + for_each_tile(tile, xe, id) + xe_tile_migrate_wait(tile); spin_lock(&xe->pinned.lock); for (;;) { @@ -186,7 +186,7 @@ int xe_bo_restore_user(struct xe_device *xe) { struct ww_acquire_ctx ww; struct xe_bo *bo; - struct xe_gt *gt; + struct xe_tile *tile; struct list_head still_in_list; u8 id; int ret; @@ -224,8 +224,8 @@ int xe_bo_restore_user(struct xe_device *xe) spin_unlock(&xe->pinned.lock); /* Wait for validate to complete */ - for_each_gt(gt, xe, id) - xe_gt_migrate_wait(gt); + for_each_tile(tile, xe, id) + xe_tile_migrate_wait(tile); return 0; } diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index ee050b4b4d77..dfcf0a787e01 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -136,6 +136,9 @@ struct xe_tile { */ struct xe_sa_manager *kernel_bb_pool; } mem; + + /** @migrate: Migration helper for vram blits and clearing */ + struct xe_migrate *migrate; }; /** diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c index 4fca422e9e7b..fd39da859442 100644 --- a/drivers/gpu/drm/xe/xe_engine.c +++ b/drivers/gpu/drm/xe/xe_engine.c @@ -560,7 +560,7 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_ERR(xe, !hwe)) return -EINVAL; - migrate_vm = xe_migrate_get_vm(gt->migrate); + migrate_vm = xe_migrate_get_vm(gt_to_tile(gt)->migrate); new = xe_engine_create(xe, migrate_vm, logical_mask, args->width, hwe, ENGINE_FLAG_PERSISTENT | diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 74023a5dc8b2..aa047db4c937 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -43,15 +43,6 @@ #include "xe_wa.h" #include "xe_wopcm.h" -struct xe_gt *xe_find_full_gt(struct xe_gt *gt) -{ - /* - * FIXME: Once the code is prepared for re-enabling, this function will - * be gone. Just return the only possible gt for now. - */ - return gt; -} - int xe_gt_alloc(struct xe_device *xe, struct xe_gt *gt) { XE_BUG_ON(gt->info.type == XE_GT_TYPE_UNINITIALIZED); @@ -169,6 +160,7 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_engine *e) int xe_gt_record_default_lrcs(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); + struct xe_tile *tile = gt_to_tile(gt); struct xe_hw_engine *hwe; enum xe_hw_engine_id id; int err = 0; @@ -192,7 +184,7 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt) if (!default_lrc) return -ENOMEM; - vm = xe_migrate_get_vm(gt->migrate); + vm = xe_migrate_get_vm(tile->migrate); e = xe_engine_create(xe, vm, BIT(hwe->logical_instance), 1, hwe, ENGINE_FLAG_WA); if (IS_ERR(e)) { @@ -383,13 +375,13 @@ static int all_fw_domain_init(struct xe_gt *gt) } if (!xe_gt_is_media_type(gt)) { - gt->migrate = xe_migrate_init(gt); - if (IS_ERR(gt->migrate)) { - err = PTR_ERR(gt->migrate); + struct xe_tile *tile = gt_to_tile(gt); + + tile->migrate = xe_migrate_init(tile); + if (IS_ERR(tile->migrate)) { + err = PTR_ERR(tile->migrate); goto err_force_wake; } - } else { - gt->migrate = xe_find_full_gt(gt)->migrate; } err = xe_uc_init_hw(>->uc); @@ -644,11 +636,6 @@ err_msg: return err; } -void xe_gt_migrate_wait(struct xe_gt *gt) -{ - xe_migrate_wait(gt->migrate); -} - struct xe_hw_engine *xe_gt_hw_engine(struct xe_gt *gt, enum xe_engine_class class, u16 instance, bool logical) diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h index 27e913e9a43a..01c1d226faeb 100644 --- a/drivers/gpu/drm/xe/xe_gt.h +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -24,11 +24,8 @@ void xe_gt_suspend_prepare(struct xe_gt *gt); int xe_gt_suspend(struct xe_gt *gt); int xe_gt_resume(struct xe_gt *gt); void xe_gt_reset_async(struct xe_gt *gt); -void xe_gt_migrate_wait(struct xe_gt *gt); void xe_gt_sanitize(struct xe_gt *gt); -struct xe_gt *xe_find_full_gt(struct xe_gt *gt); - /** * xe_gt_any_hw_engine_by_reset_domain - scan the list of engines and return the * first that matches the same reset domain as @class diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 1ec140aaf2a7..5436667ba82b 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -208,7 +208,7 @@ retry_userptr: /* Bind VMA only to the GT that has faulted */ trace_xe_vma_pf_bind(vma); - fence = __xe_pt_bind_vma(tile, vma, xe_gt_migrate_engine(gt), NULL, 0, + fence = __xe_pt_bind_vma(tile, vma, xe_tile_migrate_engine(tile), NULL, 0, vma->tile_present & BIT(tile->id)); if (IS_ERR(fence)) { ret = PTR_ERR(fence); diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index c44560b6dc71..34d5dd98885e 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -278,9 +278,6 @@ struct xe_gt { /** @hw_engines: hardware engines on the GT */ struct xe_hw_engine hw_engines[XE_NUM_HW_ENGINES]; - /** @migrate: Migration helper for vram blits and clearing */ - struct xe_migrate *migrate; - /** @pcode: GT's PCODE */ struct { /** @lock: protecting GT's PCODE mailbox data */ diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 3031a45db490..794c5c68589d 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -36,8 +36,8 @@ struct xe_migrate { /** @eng: Default engine used for migration */ struct xe_engine *eng; - /** @gt: Backpointer to the gt this struct xe_migrate belongs to. */ - struct xe_gt *gt; + /** @tile: Backpointer to the tile this struct xe_migrate belongs to. */ + struct xe_tile *tile; /** @job_mutex: Timeline mutex for @eng. */ struct mutex job_mutex; /** @pt_bo: Page-table buffer object. */ @@ -70,17 +70,17 @@ struct xe_migrate { #define NUM_PT_PER_BLIT (MAX_PREEMPTDISABLE_TRANSFER / SZ_2M) /** - * xe_gt_migrate_engine() - Get this gt's migrate engine. - * @gt: The gt. + * xe_tile_migrate_engine() - Get this tile's migrate engine. + * @tile: The tile. * - * Returns the default migrate engine of this gt. + * Returns the default migrate engine of this tile. * TODO: Perhaps this function is slightly misplaced, and even unneeded? * * Return: The default migrate engine */ -struct xe_engine *xe_gt_migrate_engine(struct xe_gt *gt) +struct xe_engine *xe_tile_migrate_engine(struct xe_tile *tile) { - return gt->migrate->eng; + return tile->migrate->eng; } static void xe_migrate_fini(struct drm_device *dev, void *arg) @@ -128,8 +128,7 @@ static u64 xe_migrate_vram_ofs(u64 addr) */ static int xe_migrate_create_cleared_bo(struct xe_migrate *m, struct xe_vm *vm) { - struct xe_gt *gt = m->gt; - struct xe_tile *tile = gt_to_tile(gt); + struct xe_tile *tile = m->tile; struct xe_device *xe = vm->xe; size_t cleared_size; u64 vram_addr; @@ -155,14 +154,13 @@ static int xe_migrate_create_cleared_bo(struct xe_migrate *m, struct xe_vm *vm) return 0; } -static int xe_migrate_prepare_vm(struct xe_gt *gt, struct xe_migrate *m, +static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, struct xe_vm *vm) { - u8 id = gt->info.id; + struct xe_device *xe = tile_to_xe(tile); + u8 id = tile->id; u32 num_entries = NUM_PT_SLOTS, num_level = vm->pt_root[id]->level; u32 map_ofs, level, i; - struct xe_device *xe = gt_to_xe(m->gt); - struct xe_tile *tile = gt_to_tile(m->gt); struct xe_bo *bo, *batch = tile->mem.kernel_bb_pool->bo; u64 entry; int ret; @@ -231,7 +229,7 @@ static int xe_migrate_prepare_vm(struct xe_gt *gt, struct xe_migrate *m, m->batch_base_ofs = xe_migrate_vram_ofs(batch_addr); if (xe->info.supports_usm) { - batch = gt->usm.bb_pool->bo; + batch = tile->primary_gt.usm.bb_pool->bo; batch_addr = xe_bo_addr(batch, 0, XE_PAGE_SIZE, &is_vram); m->usm_batch_base_ofs = xe_migrate_vram_ofs(batch_addr); @@ -308,34 +306,33 @@ static int xe_migrate_prepare_vm(struct xe_gt *gt, struct xe_migrate *m, /** * xe_migrate_init() - Initialize a migrate context - * @gt: Back-pointer to the gt we're initializing for. + * @tile: Back-pointer to the tile we're initializing for. * * Return: Pointer to a migrate context on success. Error pointer on error. */ -struct xe_migrate *xe_migrate_init(struct xe_gt *gt) +struct xe_migrate *xe_migrate_init(struct xe_tile *tile) { - struct xe_device *xe = gt_to_xe(gt); + struct xe_device *xe = tile_to_xe(tile); + struct xe_gt *primary_gt = &tile->primary_gt; struct xe_migrate *m; struct xe_vm *vm; struct ww_acquire_ctx ww; int err; - XE_BUG_ON(xe_gt_is_media_type(gt)); - m = drmm_kzalloc(&xe->drm, sizeof(*m), GFP_KERNEL); if (!m) return ERR_PTR(-ENOMEM); - m->gt = gt; + m->tile = tile; /* Special layout, prepared below.. */ vm = xe_vm_create(xe, XE_VM_FLAG_MIGRATION | - XE_VM_FLAG_SET_GT_ID(gt)); + XE_VM_FLAG_SET_TILE_ID(tile)); if (IS_ERR(vm)) return ERR_CAST(vm); xe_vm_lock(vm, &ww, 0, false); - err = xe_migrate_prepare_vm(gt, m, vm); + err = xe_migrate_prepare_vm(tile, m, vm); xe_vm_unlock(vm, &ww); if (err) { xe_vm_close_and_put(vm); @@ -343,9 +340,9 @@ struct xe_migrate *xe_migrate_init(struct xe_gt *gt) } if (xe->info.supports_usm) { - struct xe_hw_engine *hwe = xe_gt_hw_engine(gt, + struct xe_hw_engine *hwe = xe_gt_hw_engine(primary_gt, XE_ENGINE_CLASS_COPY, - gt->usm.reserved_bcs_instance, + primary_gt->usm.reserved_bcs_instance, false); if (!hwe) return ERR_PTR(-EINVAL); @@ -354,7 +351,7 @@ struct xe_migrate *xe_migrate_init(struct xe_gt *gt) BIT(hwe->logical_instance), 1, hwe, ENGINE_FLAG_KERNEL); } else { - m->eng = xe_engine_create_class(xe, gt, vm, + m->eng = xe_engine_create_class(xe, primary_gt, vm, XE_ENGINE_CLASS_COPY, ENGINE_FLAG_KERNEL); } @@ -549,7 +546,7 @@ static u32 xe_migrate_ccs_copy(struct xe_migrate *m, u64 dst_ofs, bool dst_is_vram, u32 dst_size, u64 ccs_ofs, bool copy_ccs) { - struct xe_gt *gt = m->gt; + struct xe_gt *gt = &m->tile->primary_gt; u32 flush_flags = 0; if (xe_device_has_flat_ccs(gt_to_xe(gt)) && !copy_ccs && dst_is_vram) { @@ -613,7 +610,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, struct ttm_resource *src, struct ttm_resource *dst) { - struct xe_gt *gt = m->gt; + struct xe_gt *gt = &m->tile->primary_gt; struct xe_device *xe = gt_to_xe(gt); struct dma_fence *fence = NULL; u64 size = src_bo->size; @@ -876,7 +873,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, struct ttm_resource *dst) { bool clear_vram = mem_type_is_vram(dst->mem_type); - struct xe_gt *gt = m->gt; + struct xe_gt *gt = &m->tile->primary_gt; struct xe_device *xe = gt_to_xe(gt); struct dma_fence *fence = NULL; u64 size = bo->size; @@ -1083,7 +1080,7 @@ xe_migrate_update_pgtables_cpu(struct xe_migrate *m, for (i = 0; i < num_updates; i++) { const struct xe_vm_pgtable_update *update = &updates[i]; - ops->populate(pt_update, gt_to_tile(m->gt), &update->pt_bo->vmap, NULL, + ops->populate(pt_update, m->tile, &update->pt_bo->vmap, NULL, update->ofs, update->qwords, update); } @@ -1150,9 +1147,9 @@ xe_migrate_update_pgtables(struct xe_migrate *m, struct xe_migrate_pt_update *pt_update) { const struct xe_migrate_pt_update_ops *ops = pt_update->ops; - struct xe_gt *gt = m->gt; - struct xe_tile *tile = gt_to_tile(m->gt); - struct xe_device *xe = gt_to_xe(gt); + struct xe_tile *tile = m->tile; + struct xe_gt *gt = &tile->primary_gt; + struct xe_device *xe = tile_to_xe(tile); struct xe_sched_job *job; struct dma_fence *fence; struct drm_suballoc *sa_bo = NULL; diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h index e627f4023d5a..204337ea3b4e 100644 --- a/drivers/gpu/drm/xe/xe_migrate.h +++ b/drivers/gpu/drm/xe/xe_migrate.h @@ -71,7 +71,7 @@ struct xe_migrate_pt_update { struct xe_vma *vma; }; -struct xe_migrate *xe_migrate_init(struct xe_gt *gt); +struct xe_migrate *xe_migrate_init(struct xe_tile *tile); struct dma_fence *xe_migrate_copy(struct xe_migrate *m, struct xe_bo *src_bo, @@ -97,5 +97,5 @@ xe_migrate_update_pgtables(struct xe_migrate *m, void xe_migrate_wait(struct xe_migrate *m); -struct xe_engine *xe_gt_migrate_engine(struct xe_gt *gt); +struct xe_engine *xe_tile_migrate_engine(struct xe_tile *tile); #endif diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 094058cb5f93..41b2be028b8a 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1303,7 +1303,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e, return ERR_PTR(-ENOMEM); } - fence = xe_migrate_update_pgtables(tile->primary_gt.migrate, + fence = xe_migrate_update_pgtables(tile->migrate, vm, vma->bo, e ? e : vm->eng[tile->id], entries, num_entries, @@ -1624,7 +1624,7 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e * clear again here. The eviction may have updated pagetables at a * lower level, because it needs to be more conservative. */ - fence = xe_migrate_update_pgtables(tile->primary_gt.migrate, + fence = xe_migrate_update_pgtables(tile->migrate, vm, NULL, e ? e : vm->eng[tile->id], entries, num_entries, diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c index 59d3e25ea550..fa56323aa988 100644 --- a/drivers/gpu/drm/xe/xe_tile.c +++ b/drivers/gpu/drm/xe/xe_tile.c @@ -7,6 +7,7 @@ #include "xe_device.h" #include "xe_ggtt.h" +#include "xe_migrate.h" #include "xe_sa.h" #include "xe_tile.h" #include "xe_ttm_vram_mgr.h" @@ -88,3 +89,8 @@ err_mem_access: xe_device_mem_access_put(tile_to_xe(tile)); return err; } + +void xe_tile_migrate_wait(struct xe_tile *tile) +{ + xe_migrate_wait(tile->migrate); +} diff --git a/drivers/gpu/drm/xe/xe_tile.h b/drivers/gpu/drm/xe/xe_tile.h index 77529ea136a6..33bf41292195 100644 --- a/drivers/gpu/drm/xe/xe_tile.h +++ b/drivers/gpu/drm/xe/xe_tile.h @@ -11,4 +11,6 @@ struct xe_tile; int xe_tile_alloc(struct xe_tile *tile); int xe_tile_init_noalloc(struct xe_tile *tile); +void xe_tile_migrate_wait(struct xe_tile *tile); + #endif diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index ecfff4ffd00e..7d4c7a66a35f 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1267,7 +1267,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) if (!vm->pt_root[id]) continue; - migrate_vm = xe_migrate_get_vm(gt->migrate); + migrate_vm = xe_migrate_get_vm(tile->migrate); eng = xe_engine_create_class(xe, gt, migrate_vm, XE_ENGINE_CLASS_COPY, ENGINE_FLAG_VM); diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index c45c5daeeaa7..76af6ac0fa84 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -179,7 +179,7 @@ struct xe_vm { #define XE_VM_FLAG_SCRATCH_PAGE BIT(4) #define XE_VM_FLAG_FAULT_MODE BIT(5) #define XE_VM_FLAG_GT_ID(flags) (((flags) >> 6) & 0x3) -#define XE_VM_FLAG_SET_GT_ID(gt) ((gt)->info.id << 6) +#define XE_VM_FLAG_SET_TILE_ID(tile) ((tile)->id << 6) unsigned long flags; /** @composite_fence_ctx: context composite fence */ -- cgit v1.2.3 From ed006ba5e6e8334deb86fbc1e35d2411a4870281 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 1 Jun 2023 14:52:28 -0700 Subject: drm/xe: Clarify 'gt' retrieval for primary tile There are a bunch of places in the driver where we need to perform non-GT MMIO against the platform's primary tile (display code, top-level interrupt enable/disable, driver initialization, etc.). Rename 'to_gt()' to 'xe_primary_mmio_gt()' to clarify that we're trying to get a primary MMIO handle for these top-level operations. In the future we need to move away from xe_gt as the target for MMIO operations (most of which are completely unrelated to GT). v2: - s/xe_primary_mmio_gt/xe_root_mmio_gt/ for more consistency with how we refer to tile 0. (Lucas) v3: - Tweak comment on xe_root_mmio_gt(). (Lucas) Acked-by: Nirmoy Das Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230601215244.678611-16-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 2 +- drivers/gpu/drm/xe/xe_device.h | 13 +++++++++++-- drivers/gpu/drm/xe/xe_irq.c | 6 +++--- drivers/gpu/drm/xe/xe_mmio.c | 6 +++--- drivers/gpu/drm/xe/xe_query.c | 2 +- drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 4 ++-- 6 files changed, 21 insertions(+), 12 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 50ce4e97299e..0ff3b94bd662 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -355,7 +355,7 @@ static void device_kill_persistent_engines(struct xe_device *xe, void xe_device_wmb(struct xe_device *xe) { - struct xe_gt *gt = xe_device_get_gt(xe, 0); + struct xe_gt *gt = xe_root_mmio_gt(xe); wmb(); if (IS_DGFX(xe)) diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index 3516ac1dcbc4..e88f685f3f21 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -66,9 +66,18 @@ static inline struct xe_gt *xe_device_get_gt(struct xe_device *xe, u8 gt_id) } /* - * FIXME: Placeholder until multi-gt lands. Once that lands, kill this function. + * Provide a GT structure suitable for performing non-GT MMIO operations against + * the primary tile. Primarily intended for early tile initialization, display + * handling, top-most interrupt enable/disable, etc. Since anything using the + * MMIO handle returned by this function doesn't need GSI offset translation, + * we'll return the primary GT from the root tile. + * + * FIXME: Fix the driver design so that 'gt' isn't the target of all MMIO + * operations. + * + * Returns the primary gt of the root tile. */ -static inline struct xe_gt *to_gt(struct xe_device *xe) +static inline struct xe_gt *xe_root_mmio_gt(struct xe_device *xe) { return &xe_device_get_root_tile(xe)->primary_gt; } diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index e9614e90efaf..6d9545664386 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -284,7 +284,7 @@ static void gt_irq_handler(struct xe_device *xe, struct xe_gt *gt, static irqreturn_t xelp_irq_handler(int irq, void *arg) { struct xe_device *xe = arg; - struct xe_gt *gt = xe_device_get_gt(xe, 0); /* Only 1 GT here */ + struct xe_gt *gt = xe_root_mmio_gt(xe); u32 master_ctl, gu_misc_iir; long unsigned int intr_dw[2]; u32 identity[32]; @@ -306,7 +306,7 @@ static irqreturn_t xelp_irq_handler(int irq, void *arg) static u32 dg1_intr_disable(struct xe_device *xe) { - struct xe_gt *gt = xe_device_get_gt(xe, 0); + struct xe_gt *gt = xe_root_mmio_gt(xe); u32 val; /* First disable interrupts */ @@ -324,7 +324,7 @@ static u32 dg1_intr_disable(struct xe_device *xe) static void dg1_intr_enable(struct xe_device *xe, bool stall) { - struct xe_gt *gt = xe_device_get_gt(xe, 0); + struct xe_gt *gt = xe_root_mmio_gt(xe); xe_mmio_write32(gt, DG1_MSTR_TILE_INTR, DG1_MSTR_IRQ); if (stall) diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 86f010ac9ccf..7739282d364d 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -328,7 +328,7 @@ int xe_mmio_probe_vram(struct xe_device *xe) static void xe_mmio_probe_tiles(struct xe_device *xe) { - struct xe_gt *gt = &xe_device_get_root_tile(xe)->primary_gt; + struct xe_gt *gt = xe_root_mmio_gt(xe); u32 mtcfg; u8 adj_tile_count; u8 id; @@ -380,7 +380,7 @@ static void mmio_fini(struct drm_device *drm, void *arg) int xe_mmio_init(struct xe_device *xe) { struct xe_tile *root_tile = xe_device_get_root_tile(xe); - struct xe_gt *gt = xe_device_get_gt(xe, 0); + struct xe_gt *gt = xe_root_mmio_gt(xe); const int mmio_bar = 0; int err; @@ -439,7 +439,7 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct xe_device *xe = to_xe_device(dev); - struct xe_gt *gt = xe_device_get_gt(xe, 0); + struct xe_gt *gt = xe_root_mmio_gt(xe); struct drm_xe_mmio *args = data; unsigned int bits_flag, bytes; struct xe_reg reg; diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 799bf68800e7..8087c94dd782 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -259,7 +259,7 @@ static int query_gts(struct xe_device *xe, struct drm_xe_device_query *query) static int query_hwconfig(struct xe_device *xe, struct drm_xe_device_query *query) { - struct xe_gt *gt = xe_device_get_gt(xe, 0); + struct xe_gt *gt = xe_root_mmio_gt(xe); size_t size = xe_guc_hwconfig_size(>->uc.guc); void __user *query_ptr = u64_to_user_ptr(query->data); void *hwconfig; diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index c68325161c19..21ecc734f10a 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -54,7 +54,7 @@ bool xe_ttm_stolen_cpu_access_needs_ggtt(struct xe_device *xe) static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) { struct xe_tile *tile = xe_device_get_root_tile(xe); - struct xe_gt *mmio = &tile->primary_gt; + struct xe_gt *mmio = xe_root_mmio_gt(xe); struct pci_dev *pdev = to_pci_dev(xe->drm.dev); u64 stolen_size; u64 tile_offset; @@ -92,7 +92,7 @@ static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr u32 stolen_size; u32 ggc, gms; - ggc = xe_mmio_read32(to_gt(xe), GGC); + ggc = xe_mmio_read32(xe_root_mmio_gt(xe), GGC); /* check GGMS, should be fixed 0x3 (8MB) */ if (drm_WARN_ON(&xe->drm, (ggc & GGMS_MASK) != GGMS_MASK)) -- cgit v1.2.3 From 68ccb9b2f71b5834b703b982a2a29d5bb3fabbe9 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 1 Jun 2023 14:52:29 -0700 Subject: drm/xe: Drop vram_id The VRAM ID is always the tile ID; there's no need to track it separately within a GT. Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230601215244.678611-17-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pci.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index b91d52205feb..d896d9fa2556 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -32,7 +32,6 @@ struct xe_subplatform_desc { struct xe_gt_desc { enum xe_gt_type type; - u8 vram_id; u32 mmio_adj_limit; u32 mmio_adj_offset; }; @@ -261,7 +260,6 @@ static const struct xe_device_desc dg2_desc = { static const struct xe_gt_desc pvc_gts[] = { { .type = XE_GT_TYPE_REMOTE, - .vram_id = 1, .mmio_adj_limit = 0, .mmio_adj_offset = 0, }, -- cgit v1.2.3 From 1e6c20be6c83817cf68637eb334dafac3a4b2512 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 1 Jun 2023 14:52:30 -0700 Subject: drm/xe: Drop extra_gts[] declarations and XE_GT_TYPE_REMOTE Now that tiles and GTs are handled separately, extra_gts[] doesn't really provide any useful information that we can't just infer directly. The primary GT of the root tile and of the remote tiles behave the same way and don't need independent handling. When we re-add support for media GTs in a future patch, the presence of media can be determined from MEDIA_VER() (i.e., >= 13) and media's GSI offset handling is expected to remain constant for all forseeable future platforms, so it won't need to be provided in a definition structure either. Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230601215244.678611-18-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_types.h | 1 - drivers/gpu/drm/xe/xe_pci.c | 36 ++++++------------------------------ 2 files changed, 6 insertions(+), 31 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 34d5dd98885e..99ab7ec99ccd 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -20,7 +20,6 @@ struct xe_ring_ops; enum xe_gt_type { XE_GT_TYPE_UNINITIALIZED, XE_GT_TYPE_MAIN, - XE_GT_TYPE_REMOTE, XE_GT_TYPE_MEDIA, }; diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index d896d9fa2556..4fbcbfb8a93a 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -44,7 +44,6 @@ struct xe_device_desc { const char *platform_name; const struct xe_subplatform_desc *subplatforms; - const struct xe_gt_desc *extra_gts; enum xe_platform platform; @@ -257,20 +256,11 @@ static const struct xe_device_desc dg2_desc = { DG2_FEATURES, }; -static const struct xe_gt_desc pvc_gts[] = { - { - .type = XE_GT_TYPE_REMOTE, - .mmio_adj_limit = 0, - .mmio_adj_offset = 0, - }, -}; - static const __maybe_unused struct xe_device_desc pvc_desc = { .graphics = &graphics_xehpc, DGFX_FEATURES, PLATFORM(XE_PVC), .require_force_probe = true, - .extra_gts = pvc_gts, }; static const struct xe_device_desc mtl_desc = { @@ -540,28 +530,14 @@ static int xe_info_init(struct xe_device *xe, tile->id = id; gt = &tile->primary_gt; - gt->info.id = id; + gt->info.id = id; /* FIXME: Determine sensible numbering */ gt->tile = tile; + gt->info.type = XE_GT_TYPE_MAIN; + gt->info.__engine_mask = graphics_desc->hw_engine_mask; + if (MEDIA_VER(xe) < 13 && media_desc) + gt->info.__engine_mask |= media_desc->hw_engine_mask; - if (id == 0) { - gt->info.type = XE_GT_TYPE_MAIN; - - gt->info.__engine_mask = graphics_desc->hw_engine_mask; - if (MEDIA_VER(xe) < 13 && media_desc) - gt->info.__engine_mask |= media_desc->hw_engine_mask; - - gt->mmio.adj_limit = 0; - gt->mmio.adj_offset = 0; - } else { - gt->info.type = desc->extra_gts[id - 1].type; - gt->info.__engine_mask = (gt->info.type == XE_GT_TYPE_MEDIA) ? - media_desc->hw_engine_mask : - graphics_desc->hw_engine_mask; - gt->mmio.adj_limit = - desc->extra_gts[id - 1].mmio_adj_limit; - gt->mmio.adj_offset = - desc->extra_gts[id - 1].mmio_adj_offset; - } + /* TODO: Init media GT, if present */ } return 0; -- cgit v1.2.3 From f6929e80cdf540d7106764bda38c4ce0601fee7b Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 1 Jun 2023 14:52:31 -0700 Subject: drm/xe: Allocate GT dynamically In preparation for re-adding media GT support, switch the primary GT within the tile to a dynamic allocation. Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230601215244.678611-19-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_migrate.c | 4 ++-- drivers/gpu/drm/xe/tests/xe_rtp_test.c | 2 +- drivers/gpu/drm/xe/xe_device.c | 4 ---- drivers/gpu/drm/xe/xe_device.h | 8 ++++++-- drivers/gpu/drm/xe/xe_device_types.h | 2 +- drivers/gpu/drm/xe/xe_ggtt.c | 2 +- drivers/gpu/drm/xe/xe_gt.c | 11 ++++++++--- drivers/gpu/drm/xe/xe_gt.h | 2 +- drivers/gpu/drm/xe/xe_migrate.c | 12 ++++++------ drivers/gpu/drm/xe/xe_mmio.c | 2 +- drivers/gpu/drm/xe/xe_pci.c | 8 ++++++-- drivers/gpu/drm/xe/xe_pt.c | 4 ++-- drivers/gpu/drm/xe/xe_vm.c | 6 +++--- 13 files changed, 38 insertions(+), 29 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index d9f1f31c92d2..60266fea7faa 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -286,7 +286,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) goto free_pt; } - bb = xe_bb_new(&tile->primary_gt, 32, xe->info.supports_usm); + bb = xe_bb_new(tile->primary_gt, 32, xe->info.supports_usm); if (IS_ERR(bb)) { KUNIT_FAIL(test, "Failed to create batchbuffer: %li\n", PTR_ERR(bb)); @@ -323,7 +323,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) xe_map_wr(xe, &pt->vmap, 0, u32, 0xdeaddead); expected = 0; - emit_clear(&tile->primary_gt, bb, xe_migrate_vm_addr(NUM_KERNEL_PDE - 1, 0), 4, 4, + emit_clear(tile->primary_gt, bb, xe_migrate_vm_addr(NUM_KERNEL_PDE - 1, 0), 4, 4, IS_DGFX(xe)); run_sanity_job(m, xe, bb, 1, "Writing to our newly mapped pagetable", test); diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c index 45f2614f91ec..b2beba0019cd 100644 --- a/drivers/gpu/drm/xe/tests/xe_rtp_test.c +++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c @@ -237,7 +237,7 @@ static void xe_rtp_process_tests(struct kunit *test) { const struct rtp_test_case *param = test->param_value; struct xe_device *xe = test->priv; - struct xe_gt *gt = &xe_device_get_root_tile(xe)->primary_gt; + struct xe_gt *gt = xe_device_get_root_tile(xe)->primary_gt; struct xe_reg_sr *reg_sr = >->reg_sr; const struct xe_reg_sr_entry *sre, *sr_entry = NULL; struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt); diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 0ff3b94bd662..a4fc5bc54d02 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -249,10 +249,6 @@ int xe_device_probe(struct xe_device *xe) err = xe_tile_alloc(tile); if (err) return err; - - err = xe_gt_alloc(xe, &tile->primary_gt); - if (err) - return err; } err = xe_mmio_init(xe); diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index e88f685f3f21..f2d8479f6ff6 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -58,7 +58,11 @@ static inline struct xe_gt *xe_device_get_gt(struct xe_device *xe, u8 gt_id) struct xe_gt *gt; XE_BUG_ON(gt_id > XE_MAX_TILES_PER_DEVICE); - gt = &xe->tiles[gt_id].primary_gt; + + gt = xe->tiles[gt_id].primary_gt; + if (drm_WARN_ON(&xe->drm, !gt)) + return NULL; + XE_BUG_ON(gt->info.id != gt_id); XE_BUG_ON(gt->info.type == XE_GT_TYPE_UNINITIALIZED); @@ -79,7 +83,7 @@ static inline struct xe_gt *xe_device_get_gt(struct xe_device *xe, u8 gt_id) */ static inline struct xe_gt *xe_root_mmio_gt(struct xe_device *xe) { - return &xe_device_get_root_tile(xe)->primary_gt; + return xe_device_get_root_tile(xe)->primary_gt; } static inline bool xe_device_guc_submission_enabled(struct xe_device *xe) diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index dfcf0a787e01..5cb3fa9e8086 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -74,7 +74,7 @@ struct xe_tile { /** * @primary_gt: Primary GT */ - struct xe_gt primary_gt; + struct xe_gt *primary_gt; /* TODO: Add media GT here */ diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index d395d6fc1af6..8d3638826860 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -196,7 +196,7 @@ void xe_ggtt_invalidate(struct xe_ggtt *ggtt) * TODO: Loop over each GT in tile once media GT support is * re-added */ - struct xe_gt *gt = &ggtt->tile->primary_gt; + struct xe_gt *gt = ggtt->tile->primary_gt; /* TODO: vfunc for GuC vs. non-GuC */ diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index aa047db4c937..f00b82e90106 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -43,13 +43,18 @@ #include "xe_wa.h" #include "xe_wopcm.h" -int xe_gt_alloc(struct xe_device *xe, struct xe_gt *gt) +struct xe_gt *xe_gt_alloc(struct xe_tile *tile) { - XE_BUG_ON(gt->info.type == XE_GT_TYPE_UNINITIALIZED); + struct xe_gt *gt; + gt = drmm_kzalloc(&tile_to_xe(tile)->drm, sizeof(*gt), GFP_KERNEL); + if (!gt) + return ERR_PTR(-ENOMEM); + + gt->tile = tile; gt->ordered_wq = alloc_ordered_workqueue("gt-ordered-wq", 0); - return 0; + return gt; } void xe_gt_sanitize(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h index 01c1d226faeb..21d9044088de 100644 --- a/drivers/gpu/drm/xe/xe_gt.h +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -16,7 +16,7 @@ for_each_if (((hwe__) = (gt__)->hw_engines + (id__)) && \ xe_hw_engine_is_valid((hwe__))) -int xe_gt_alloc(struct xe_device *xe, struct xe_gt *gt); +struct xe_gt *xe_gt_alloc(struct xe_tile *tile); int xe_gt_init_early(struct xe_gt *gt); int xe_gt_init(struct xe_gt *gt); int xe_gt_record_default_lrcs(struct xe_gt *gt); diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 794c5c68589d..f50484759866 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -229,7 +229,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, m->batch_base_ofs = xe_migrate_vram_ofs(batch_addr); if (xe->info.supports_usm) { - batch = tile->primary_gt.usm.bb_pool->bo; + batch = tile->primary_gt->usm.bb_pool->bo; batch_addr = xe_bo_addr(batch, 0, XE_PAGE_SIZE, &is_vram); m->usm_batch_base_ofs = xe_migrate_vram_ofs(batch_addr); @@ -313,7 +313,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, struct xe_migrate *xe_migrate_init(struct xe_tile *tile) { struct xe_device *xe = tile_to_xe(tile); - struct xe_gt *primary_gt = &tile->primary_gt; + struct xe_gt *primary_gt = tile->primary_gt; struct xe_migrate *m; struct xe_vm *vm; struct ww_acquire_ctx ww; @@ -546,7 +546,7 @@ static u32 xe_migrate_ccs_copy(struct xe_migrate *m, u64 dst_ofs, bool dst_is_vram, u32 dst_size, u64 ccs_ofs, bool copy_ccs) { - struct xe_gt *gt = &m->tile->primary_gt; + struct xe_gt *gt = m->tile->primary_gt; u32 flush_flags = 0; if (xe_device_has_flat_ccs(gt_to_xe(gt)) && !copy_ccs && dst_is_vram) { @@ -610,7 +610,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, struct ttm_resource *src, struct ttm_resource *dst) { - struct xe_gt *gt = &m->tile->primary_gt; + struct xe_gt *gt = m->tile->primary_gt; struct xe_device *xe = gt_to_xe(gt); struct dma_fence *fence = NULL; u64 size = src_bo->size; @@ -873,7 +873,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, struct ttm_resource *dst) { bool clear_vram = mem_type_is_vram(dst->mem_type); - struct xe_gt *gt = &m->tile->primary_gt; + struct xe_gt *gt = m->tile->primary_gt; struct xe_device *xe = gt_to_xe(gt); struct dma_fence *fence = NULL; u64 size = bo->size; @@ -1148,7 +1148,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m, { const struct xe_migrate_pt_update_ops *ops = pt_update->ops; struct xe_tile *tile = m->tile; - struct xe_gt *gt = &tile->primary_gt; + struct xe_gt *gt = tile->primary_gt; struct xe_device *xe = tile_to_xe(tile); struct xe_sched_job *job; struct dma_fence *fence; diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 7739282d364d..475b14fe4356 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -209,7 +209,7 @@ static int xe_determine_lmem_bar_size(struct xe_device *xe) int xe_mmio_tile_vram_size(struct xe_tile *tile, u64 *vram_size, u64 *tile_size, u64 *tile_offset) { struct xe_device *xe = tile_to_xe(tile); - struct xe_gt *gt = &tile->primary_gt; + struct xe_gt *gt = tile->primary_gt; u64 offset; int err; u32 reg; diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 4fbcbfb8a93a..be51c9e97a79 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -18,6 +18,7 @@ #include "regs/xe_gt_regs.h" #include "xe_device.h" #include "xe_drv.h" +#include "xe_gt.h" #include "xe_macros.h" #include "xe_module.h" #include "xe_pci_types.h" @@ -529,9 +530,12 @@ static int xe_info_init(struct xe_device *xe, tile->xe = xe; tile->id = id; - gt = &tile->primary_gt; + tile->primary_gt = xe_gt_alloc(tile); + if (IS_ERR(tile->primary_gt)) + return PTR_ERR(tile->primary_gt); + + gt = tile->primary_gt; gt->info.id = id; /* FIXME: Determine sensible numbering */ - gt->tile = tile; gt->info.type = XE_GT_TYPE_MAIN; gt->info.__engine_mask = graphics_desc->hw_engine_mask; if (MEDIA_VER(xe) < 13 && media_desc) diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 41b2be028b8a..bef265715000 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1314,7 +1314,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e, /* TLB invalidation must be done before signaling rebind */ if (rebind && !xe_vm_no_dma_fences(vma->vm)) { - int err = invalidation_fence_init(&tile->primary_gt, ifence, fence, + int err = invalidation_fence_init(tile->primary_gt, ifence, fence, vma); if (err) { dma_fence_put(fence); @@ -1634,7 +1634,7 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e int err; /* TLB invalidation must be done before signaling unbind */ - err = invalidation_fence_init(&tile->primary_gt, ifence, fence, vma); + err = invalidation_fence_init(tile->primary_gt, ifence, fence, vma); if (err) { dma_fence_put(fence); kfree(ifence); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 7d4c7a66a35f..44ad45776141 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1260,7 +1260,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) /* Kernel migration VM shouldn't have a circular loop.. */ if (!(flags & XE_VM_FLAG_MIGRATION)) { for_each_tile(tile, xe, id) { - struct xe_gt *gt = &tile->primary_gt; + struct xe_gt *gt = tile->primary_gt; struct xe_vm *migrate_vm; struct xe_engine *eng; @@ -3410,7 +3410,7 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) * FIXME: We potentially need to invalidate multiple * GTs within the tile */ - seqno[id] = xe_gt_tlb_invalidation_vma(&tile->primary_gt, NULL, vma); + seqno[id] = xe_gt_tlb_invalidation_vma(tile->primary_gt, NULL, vma); if (seqno[id] < 0) return seqno[id]; } @@ -3418,7 +3418,7 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) for_each_tile(tile, xe, id) { if (tile_needs_invalidate & BIT(id)) { - ret = xe_gt_tlb_invalidation_wait(&tile->primary_gt, seqno[id]); + ret = xe_gt_tlb_invalidation_wait(tile->primary_gt, seqno[id]); if (ret < 0) return ret; } -- cgit v1.2.3 From e2682f616b91c0000a02019047605956c85dcca1 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 1 Jun 2023 14:52:32 -0700 Subject: drm/xe: Add media GT to tile This media_gt pointer isn't actually allocated yet. Future patches will start hooking it up at appropriate places in the code, and then creation of the media GT will be added once those infrastructure changes are in place. Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20230601215244.678611-20-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device_types.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 5cb3fa9e8086..16a77703d429 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -76,7 +76,12 @@ struct xe_tile { */ struct xe_gt *primary_gt; - /* TODO: Add media GT here */ + /** + * @media_gt: Media GT + * + * Only present on devices with media version >= 13. + */ + struct xe_gt *media_gt; /** * @mmio: MMIO info for a tile. -- cgit v1.2.3 From 7e485d9816c134c6b54707143ee84f0adcd6c1d7 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 1 Jun 2023 14:52:34 -0700 Subject: drm/xe: Interrupts are delivered per-tile, not per-GT IRQ delivery and handling needs to be handled on a per-tile basis. Note that this is true even for the "GT interrupts" relating to engines and GuCs --- the interrupts relating to both GTs get raised through a single set of registers in the tile's sgunit range. On true multi-tile platforms, interrupts on remote tiles are internally forwarded to the root tile; the first thing the top-level interrupt handler should do is consult the root tile's instance of DG1_MSTR_TILE_INTR to determine which tile(s) had interrupts. This register is also responsible for enabling/disabling top-level reporting of any interrupts to the OS. Although this register technically exists on all tiles, it should only be used on the root tile. The (mis)use of struct xe_gt as a target for MMIO operations in the driver makes the code somewhat confusing since we wind up needing a GT pointer to handle programming that's unrelated to the GT. To mitigate this confusion, all of the xe_gt structures used solely as an MMIO target in interrupt code are renamed to 'mmio' so that it's clear that the structure being passed does not necessarily relate to any specific GT (primary or media) that we might be dealing with interrupts for. Reworking the driver's MMIO handling to not be dependent on xe_gt is planned as a future patch series. Note that GT initialization code currently calls xe_gt_irq_postinstall() in an attempt to enable the HWE interrupts for the GT being initialized. Unfortunately xe_gt_irq_postinstall() doesn't really match its name and does a bunch of other stuff unrelated to the GT interrupts (such as enabling the top-level device interrupts). That will be addressed in future patches. v2: - Clarify commit message with explanation of why DG1_MSTR_TILE_INTR is only used on the root tile, even though it's an sgunit register that is technically present in each tile's MMIO space. (Aravind) - Also clarify that the xe_gt used as a target for MMIO operations may or may not relate to the GT we're dealing with for interrupts. (Lucas) Cc: Aravind Iddamsetty Reviewed-by: Lucas De Marchi Reviewed-by: Aravind Iddamsetty Link: https://lore.kernel.org/r/20230601215244.678611-22-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt.c | 2 +- drivers/gpu/drm/xe/xe_irq.c | 327 ++++++++++++++++++++++++-------------------- drivers/gpu/drm/xe/xe_irq.h | 4 +- 3 files changed, 184 insertions(+), 149 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index f00b82e90106..071d4fbd3efc 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -310,7 +310,7 @@ static int gt_fw_domain_init(struct xe_gt *gt) gt->info.engine_mask = gt->info.__engine_mask; /* Enables per hw engine IRQs */ - xe_gt_irq_postinstall(gt); + xe_gt_irq_postinstall(gt_to_tile(gt)); /* Rerun MCR init as we now have hw engine list */ xe_gt_mcr_init(gt); diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 6d9545664386..4d3ea3b66a7b 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -26,60 +26,66 @@ #define IIR(offset) XE_REG(offset + 0x8) #define IER(offset) XE_REG(offset + 0xc) -static void assert_iir_is_zero(struct xe_gt *gt, struct xe_reg reg) +static void assert_iir_is_zero(struct xe_gt *mmio, struct xe_reg reg) { - u32 val = xe_mmio_read32(gt, reg); + u32 val = xe_mmio_read32(mmio, reg); if (val == 0) return; - drm_WARN(>_to_xe(gt)->drm, 1, + drm_WARN(>_to_xe(mmio)->drm, 1, "Interrupt register 0x%x is not zero: 0x%08x\n", reg.addr, val); - xe_mmio_write32(gt, reg, 0xffffffff); - xe_mmio_read32(gt, reg); - xe_mmio_write32(gt, reg, 0xffffffff); - xe_mmio_read32(gt, reg); + xe_mmio_write32(mmio, reg, 0xffffffff); + xe_mmio_read32(mmio, reg); + xe_mmio_write32(mmio, reg, 0xffffffff); + xe_mmio_read32(mmio, reg); } /* * Unmask and enable the specified interrupts. Does not check current state, * so any bits not specified here will become masked and disabled. */ -static void unmask_and_enable(struct xe_gt *gt, u32 irqregs, u32 bits) +static void unmask_and_enable(struct xe_tile *tile, u32 irqregs, u32 bits) { + struct xe_gt *mmio = tile->primary_gt; + /* * If we're just enabling an interrupt now, it shouldn't already * be raised in the IIR. */ - assert_iir_is_zero(gt, IIR(irqregs)); + assert_iir_is_zero(mmio, IIR(irqregs)); - xe_mmio_write32(gt, IER(irqregs), bits); - xe_mmio_write32(gt, IMR(irqregs), ~bits); + xe_mmio_write32(mmio, IER(irqregs), bits); + xe_mmio_write32(mmio, IMR(irqregs), ~bits); /* Posting read */ - xe_mmio_read32(gt, IMR(irqregs)); + xe_mmio_read32(mmio, IMR(irqregs)); } /* Mask and disable all interrupts. */ -static void mask_and_disable(struct xe_gt *gt, u32 irqregs) +static void mask_and_disable(struct xe_tile *tile, u32 irqregs) { - xe_mmio_write32(gt, IMR(irqregs), ~0); + struct xe_gt *mmio = tile->primary_gt; + + xe_mmio_write32(mmio, IMR(irqregs), ~0); /* Posting read */ - xe_mmio_read32(gt, IMR(irqregs)); + xe_mmio_read32(mmio, IMR(irqregs)); - xe_mmio_write32(gt, IER(irqregs), 0); + xe_mmio_write32(mmio, IER(irqregs), 0); /* IIR can theoretically queue up two events. Be paranoid. */ - xe_mmio_write32(gt, IIR(irqregs), ~0); - xe_mmio_read32(gt, IIR(irqregs)); - xe_mmio_write32(gt, IIR(irqregs), ~0); - xe_mmio_read32(gt, IIR(irqregs)); + xe_mmio_write32(mmio, IIR(irqregs), ~0); + xe_mmio_read32(mmio, IIR(irqregs)); + xe_mmio_write32(mmio, IIR(irqregs), ~0); + xe_mmio_read32(mmio, IIR(irqregs)); } -static u32 xelp_intr_disable(struct xe_gt *gt) +static u32 xelp_intr_disable(struct xe_device *xe) { - xe_mmio_write32(gt, GFX_MSTR_IRQ, 0); + struct xe_gt *mmio = xe_root_mmio_gt(xe); + + xe_mmio_write32(mmio, GFX_MSTR_IRQ, 0); /* * Now with master disabled, get a sample of level indications @@ -87,36 +93,41 @@ static u32 xelp_intr_disable(struct xe_gt *gt) * New indications can and will light up during processing, * and will generate new interrupt after enabling master. */ - return xe_mmio_read32(gt, GFX_MSTR_IRQ); + return xe_mmio_read32(mmio, GFX_MSTR_IRQ); } static u32 -gu_misc_irq_ack(struct xe_gt *gt, const u32 master_ctl) +gu_misc_irq_ack(struct xe_device *xe, const u32 master_ctl) { + struct xe_gt *mmio = xe_root_mmio_gt(xe); u32 iir; if (!(master_ctl & GU_MISC_IRQ)) return 0; - iir = xe_mmio_read32(gt, IIR(GU_MISC_IRQ_OFFSET)); + iir = xe_mmio_read32(mmio, IIR(GU_MISC_IRQ_OFFSET)); if (likely(iir)) - xe_mmio_write32(gt, IIR(GU_MISC_IRQ_OFFSET), iir); + xe_mmio_write32(mmio, IIR(GU_MISC_IRQ_OFFSET), iir); return iir; } -static inline void xelp_intr_enable(struct xe_gt *gt, bool stall) +static inline void xelp_intr_enable(struct xe_device *xe, bool stall) { - xe_mmio_write32(gt, GFX_MSTR_IRQ, MASTER_IRQ); + struct xe_gt *mmio = xe_root_mmio_gt(xe); + + xe_mmio_write32(mmio, GFX_MSTR_IRQ, MASTER_IRQ); if (stall) - xe_mmio_read32(gt, GFX_MSTR_IRQ); + xe_mmio_read32(mmio, GFX_MSTR_IRQ); } -static void gt_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) +static void gt_irq_postinstall(struct xe_tile *tile) { + struct xe_device *xe = tile_to_xe(tile); + struct xe_gt *mmio = tile->primary_gt; u32 irqs, dmask, smask; - u32 ccs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COMPUTE); - u32 bcs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COPY); + u32 ccs_mask = xe_hw_engine_mask_per_class(tile->primary_gt, XE_ENGINE_CLASS_COMPUTE); + u32 bcs_mask = xe_hw_engine_mask_per_class(tile->primary_gt, XE_ENGINE_CLASS_COPY); if (xe_device_guc_submission_enabled(xe)) { irqs = GT_RENDER_USER_INTERRUPT | @@ -132,57 +143,57 @@ static void gt_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) smask = irqs << 16; /* Enable RCS, BCS, VCS and VECS class interrupts. */ - xe_mmio_write32(gt, RENDER_COPY_INTR_ENABLE, dmask); - xe_mmio_write32(gt, VCS_VECS_INTR_ENABLE, dmask); + xe_mmio_write32(mmio, RENDER_COPY_INTR_ENABLE, dmask); + xe_mmio_write32(mmio, VCS_VECS_INTR_ENABLE, dmask); if (ccs_mask) - xe_mmio_write32(gt, CCS_RSVD_INTR_ENABLE, smask); + xe_mmio_write32(mmio, CCS_RSVD_INTR_ENABLE, smask); /* Unmask irqs on RCS, BCS, VCS and VECS engines. */ - xe_mmio_write32(gt, RCS0_RSVD_INTR_MASK, ~smask); - xe_mmio_write32(gt, BCS_RSVD_INTR_MASK, ~smask); + xe_mmio_write32(mmio, RCS0_RSVD_INTR_MASK, ~smask); + xe_mmio_write32(mmio, BCS_RSVD_INTR_MASK, ~smask); if (bcs_mask & (BIT(1)|BIT(2))) - xe_mmio_write32(gt, XEHPC_BCS1_BCS2_INTR_MASK, ~dmask); + xe_mmio_write32(mmio, XEHPC_BCS1_BCS2_INTR_MASK, ~dmask); if (bcs_mask & (BIT(3)|BIT(4))) - xe_mmio_write32(gt, XEHPC_BCS3_BCS4_INTR_MASK, ~dmask); + xe_mmio_write32(mmio, XEHPC_BCS3_BCS4_INTR_MASK, ~dmask); if (bcs_mask & (BIT(5)|BIT(6))) - xe_mmio_write32(gt, XEHPC_BCS5_BCS6_INTR_MASK, ~dmask); + xe_mmio_write32(mmio, XEHPC_BCS5_BCS6_INTR_MASK, ~dmask); if (bcs_mask & (BIT(7)|BIT(8))) - xe_mmio_write32(gt, XEHPC_BCS7_BCS8_INTR_MASK, ~dmask); - xe_mmio_write32(gt, VCS0_VCS1_INTR_MASK, ~dmask); - xe_mmio_write32(gt, VCS2_VCS3_INTR_MASK, ~dmask); - xe_mmio_write32(gt, VECS0_VECS1_INTR_MASK, ~dmask); + xe_mmio_write32(mmio, XEHPC_BCS7_BCS8_INTR_MASK, ~dmask); + xe_mmio_write32(mmio, VCS0_VCS1_INTR_MASK, ~dmask); + xe_mmio_write32(mmio, VCS2_VCS3_INTR_MASK, ~dmask); + xe_mmio_write32(mmio, VECS0_VECS1_INTR_MASK, ~dmask); if (ccs_mask & (BIT(0)|BIT(1))) - xe_mmio_write32(gt, CCS0_CCS1_INTR_MASK, ~dmask); + xe_mmio_write32(mmio, CCS0_CCS1_INTR_MASK, ~dmask); if (ccs_mask & (BIT(2)|BIT(3))) - xe_mmio_write32(gt, CCS2_CCS3_INTR_MASK, ~dmask); + xe_mmio_write32(mmio, CCS2_CCS3_INTR_MASK, ~dmask); /* * RPS interrupts will get enabled/disabled on demand when RPS itself * is enabled/disabled. */ /* TODO: gt->pm_ier, gt->pm_imr */ - xe_mmio_write32(gt, GPM_WGBOXPERF_INTR_ENABLE, 0); - xe_mmio_write32(gt, GPM_WGBOXPERF_INTR_MASK, ~0); + xe_mmio_write32(mmio, GPM_WGBOXPERF_INTR_ENABLE, 0); + xe_mmio_write32(mmio, GPM_WGBOXPERF_INTR_MASK, ~0); /* Same thing for GuC interrupts */ - xe_mmio_write32(gt, GUC_SG_INTR_ENABLE, 0); - xe_mmio_write32(gt, GUC_SG_INTR_MASK, ~0); + xe_mmio_write32(mmio, GUC_SG_INTR_ENABLE, 0); + xe_mmio_write32(mmio, GUC_SG_INTR_MASK, ~0); } -static void xelp_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) +static void xelp_irq_postinstall(struct xe_device *xe, struct xe_tile *tile) { /* TODO: PCH */ - gt_irq_postinstall(xe, gt); + gt_irq_postinstall(tile); - unmask_and_enable(gt, GU_MISC_IRQ_OFFSET, GU_MISC_GSE); + unmask_and_enable(tile, GU_MISC_IRQ_OFFSET, GU_MISC_GSE); - xelp_intr_enable(gt, true); + xelp_intr_enable(xe, true); } static u32 gt_engine_identity(struct xe_device *xe, - struct xe_gt *gt, + struct xe_gt *mmio, const unsigned int bank, const unsigned int bit) { @@ -191,7 +202,7 @@ gt_engine_identity(struct xe_device *xe, lockdep_assert_held(&xe->irq.lock); - xe_mmio_write32(gt, IIR_REG_SELECTOR(bank), BIT(bit)); + xe_mmio_write32(mmio, IIR_REG_SELECTOR(bank), BIT(bit)); /* * NB: Specs do not specify how long to spin wait, @@ -199,7 +210,7 @@ gt_engine_identity(struct xe_device *xe, */ timeout_ts = (local_clock() >> 10) + 100; do { - ident = xe_mmio_read32(gt, INTR_IDENTITY_REG(bank)); + ident = xe_mmio_read32(mmio, INTR_IDENTITY_REG(bank)); } while (!(ident & INTR_DATA_VALID) && !time_after32(local_clock() >> 10, timeout_ts)); @@ -209,7 +220,7 @@ gt_engine_identity(struct xe_device *xe, return 0; } - xe_mmio_write32(gt, INTR_IDENTITY_REG(bank), INTR_DATA_VALID); + xe_mmio_write32(mmio, INTR_IDENTITY_REG(bank), INTR_DATA_VALID); return ident; } @@ -231,10 +242,32 @@ gt_other_irq_handler(struct xe_gt *gt, const u8 instance, const u16 iir) } } -static void gt_irq_handler(struct xe_device *xe, struct xe_gt *gt, +static struct xe_gt *pick_engine_gt(struct xe_tile *tile, + enum xe_engine_class class, + unsigned int instance) +{ + struct xe_device *xe = tile_to_xe(tile); + + if (MEDIA_VER(xe) < 13) + return tile->primary_gt; + + if (class == XE_ENGINE_CLASS_VIDEO_DECODE || + class == XE_ENGINE_CLASS_VIDEO_ENHANCE) + return tile->media_gt; + + if (class == XE_ENGINE_CLASS_OTHER && + instance == OTHER_MEDIA_GUC_INSTANCE) + return tile->media_gt; + + return tile->primary_gt; +} + +static void gt_irq_handler(struct xe_tile *tile, u32 master_ctl, long unsigned int *intr_dw, u32 *identity) { + struct xe_device *xe = tile_to_xe(tile); + struct xe_gt *mmio = tile->primary_gt; unsigned int bank, bit; u16 instance, intr_vec; enum xe_engine_class class; @@ -246,27 +279,26 @@ static void gt_irq_handler(struct xe_device *xe, struct xe_gt *gt, if (!(master_ctl & GT_DW_IRQ(bank))) continue; - if (!xe_gt_is_media_type(gt)) { - intr_dw[bank] = - xe_mmio_read32(gt, GT_INTR_DW(bank)); - for_each_set_bit(bit, intr_dw + bank, 32) - identity[bit] = gt_engine_identity(xe, gt, - bank, bit); - xe_mmio_write32(gt, GT_INTR_DW(bank), - intr_dw[bank]); - } + intr_dw[bank] = xe_mmio_read32(mmio, GT_INTR_DW(bank)); + for_each_set_bit(bit, intr_dw + bank, 32) + identity[bit] = gt_engine_identity(xe, mmio, bank, bit); + xe_mmio_write32(mmio, GT_INTR_DW(bank), intr_dw[bank]); for_each_set_bit(bit, intr_dw + bank, 32) { + struct xe_gt *engine_gt; + class = INTR_ENGINE_CLASS(identity[bit]); instance = INTR_ENGINE_INSTANCE(identity[bit]); intr_vec = INTR_ENGINE_INTR(identity[bit]); + engine_gt = pick_engine_gt(tile, class, instance); + if (class == XE_ENGINE_CLASS_OTHER) { - gt_other_irq_handler(gt, instance, intr_vec); + gt_other_irq_handler(engine_gt, instance, intr_vec); continue; } - hwe = xe_gt_hw_engine(gt, class, instance, false); + hwe = xe_gt_hw_engine(engine_gt, class, instance, false); if (!hwe) continue; @@ -284,60 +316,60 @@ static void gt_irq_handler(struct xe_device *xe, struct xe_gt *gt, static irqreturn_t xelp_irq_handler(int irq, void *arg) { struct xe_device *xe = arg; - struct xe_gt *gt = xe_root_mmio_gt(xe); + struct xe_tile *tile = xe_device_get_root_tile(xe); u32 master_ctl, gu_misc_iir; long unsigned int intr_dw[2]; u32 identity[32]; - master_ctl = xelp_intr_disable(gt); + master_ctl = xelp_intr_disable(xe); if (!master_ctl) { - xelp_intr_enable(gt, false); + xelp_intr_enable(xe, false); return IRQ_NONE; } - gt_irq_handler(xe, gt, master_ctl, intr_dw, identity); + gt_irq_handler(tile, master_ctl, intr_dw, identity); - gu_misc_iir = gu_misc_irq_ack(gt, master_ctl); + gu_misc_iir = gu_misc_irq_ack(xe, master_ctl); - xelp_intr_enable(gt, false); + xelp_intr_enable(xe, false); return IRQ_HANDLED; } static u32 dg1_intr_disable(struct xe_device *xe) { - struct xe_gt *gt = xe_root_mmio_gt(xe); + struct xe_gt *mmio = xe_root_mmio_gt(xe); u32 val; /* First disable interrupts */ - xe_mmio_write32(gt, DG1_MSTR_TILE_INTR, 0); + xe_mmio_write32(mmio, DG1_MSTR_TILE_INTR, 0); /* Get the indication levels and ack the master unit */ - val = xe_mmio_read32(gt, DG1_MSTR_TILE_INTR); + val = xe_mmio_read32(mmio, DG1_MSTR_TILE_INTR); if (unlikely(!val)) return 0; - xe_mmio_write32(gt, DG1_MSTR_TILE_INTR, val); + xe_mmio_write32(mmio, DG1_MSTR_TILE_INTR, val); return val; } static void dg1_intr_enable(struct xe_device *xe, bool stall) { - struct xe_gt *gt = xe_root_mmio_gt(xe); + struct xe_gt *mmio = xe_root_mmio_gt(xe); - xe_mmio_write32(gt, DG1_MSTR_TILE_INTR, DG1_MSTR_IRQ); + xe_mmio_write32(mmio, DG1_MSTR_TILE_INTR, DG1_MSTR_IRQ); if (stall) - xe_mmio_read32(gt, DG1_MSTR_TILE_INTR); + xe_mmio_read32(mmio, DG1_MSTR_TILE_INTR); } -static void dg1_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) +static void dg1_irq_postinstall(struct xe_device *xe, struct xe_tile *tile) { - gt_irq_postinstall(xe, gt); + gt_irq_postinstall(tile); - unmask_and_enable(gt, GU_MISC_IRQ_OFFSET, GU_MISC_GSE); + unmask_and_enable(tile, GU_MISC_IRQ_OFFSET, GU_MISC_GSE); - if (gt->info.id == XE_GT0) + if (tile->id == 0) dg1_intr_enable(xe, true); } @@ -349,8 +381,8 @@ static void dg1_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) static irqreturn_t dg1_irq_handler(int irq, void *arg) { struct xe_device *xe = arg; - struct xe_gt *gt; - u32 master_tile_ctl, master_ctl = 0, tile0_master_ctl = 0, gu_misc_iir; + struct xe_tile *tile; + u32 master_tile_ctl, master_ctl = 0, gu_misc_iir = 0; long unsigned int intr_dw[2]; u32 identity[32]; u8 id; @@ -363,12 +395,13 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg) return IRQ_NONE; } - for_each_gt(gt, xe, id) { - if ((master_tile_ctl & DG1_MSTR_TILE(gt_to_tile(gt)->id)) == 0) + for_each_tile(tile, xe, id) { + struct xe_gt *mmio = tile->primary_gt; + + if ((master_tile_ctl & DG1_MSTR_TILE(tile->id)) == 0) continue; - if (!xe_gt_is_media_type(gt)) - master_ctl = xe_mmio_read32(gt, GFX_MSTR_IRQ); + master_ctl = xe_mmio_read32(mmio, GFX_MSTR_IRQ); /* * We might be in irq handler just when PCIe DPC is initiated @@ -376,118 +409,120 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg) * irq as device is inaccessible. */ if (master_ctl == REG_GENMASK(31, 0)) { - dev_dbg(gt_to_xe(gt)->drm.dev, + dev_dbg(tile_to_xe(tile)->drm.dev, "Ignore this IRQ as device might be in DPC containment.\n"); return IRQ_HANDLED; } - if (!xe_gt_is_media_type(gt)) - xe_mmio_write32(gt, GFX_MSTR_IRQ, master_ctl); - gt_irq_handler(xe, gt, master_ctl, intr_dw, identity); + xe_mmio_write32(mmio, GFX_MSTR_IRQ, master_ctl); + + gt_irq_handler(tile, master_ctl, intr_dw, identity); /* - * Save primary tile's master interrupt register for display - * processing below. + * Display interrupts (including display backlight operations + * that get reported as Gunit GSE) would only be hooked up to + * the primary tile. */ if (id == 0) - tile0_master_ctl = master_ctl; + gu_misc_iir = gu_misc_irq_ack(xe, master_ctl); } - /* Gunit GSE interrupts can trigger display backlight operations */ - gu_misc_iir = gu_misc_irq_ack(gt, tile0_master_ctl); - dg1_intr_enable(xe, false); return IRQ_HANDLED; } -static void gt_irq_reset(struct xe_gt *gt) +static void gt_irq_reset(struct xe_tile *tile) { - u32 ccs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COMPUTE); - u32 bcs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COPY); + struct xe_gt *mmio = tile->primary_gt; + + u32 ccs_mask = xe_hw_engine_mask_per_class(tile->primary_gt, + XE_ENGINE_CLASS_COMPUTE); + u32 bcs_mask = xe_hw_engine_mask_per_class(tile->primary_gt, + XE_ENGINE_CLASS_COPY); /* Disable RCS, BCS, VCS and VECS class engines. */ - xe_mmio_write32(gt, RENDER_COPY_INTR_ENABLE, 0); - xe_mmio_write32(gt, VCS_VECS_INTR_ENABLE, 0); + xe_mmio_write32(mmio, RENDER_COPY_INTR_ENABLE, 0); + xe_mmio_write32(mmio, VCS_VECS_INTR_ENABLE, 0); if (ccs_mask) - xe_mmio_write32(gt, CCS_RSVD_INTR_ENABLE, 0); + xe_mmio_write32(mmio, CCS_RSVD_INTR_ENABLE, 0); /* Restore masks irqs on RCS, BCS, VCS and VECS engines. */ - xe_mmio_write32(gt, RCS0_RSVD_INTR_MASK, ~0); - xe_mmio_write32(gt, BCS_RSVD_INTR_MASK, ~0); + xe_mmio_write32(mmio, RCS0_RSVD_INTR_MASK, ~0); + xe_mmio_write32(mmio, BCS_RSVD_INTR_MASK, ~0); if (bcs_mask & (BIT(1)|BIT(2))) - xe_mmio_write32(gt, XEHPC_BCS1_BCS2_INTR_MASK, ~0); + xe_mmio_write32(mmio, XEHPC_BCS1_BCS2_INTR_MASK, ~0); if (bcs_mask & (BIT(3)|BIT(4))) - xe_mmio_write32(gt, XEHPC_BCS3_BCS4_INTR_MASK, ~0); + xe_mmio_write32(mmio, XEHPC_BCS3_BCS4_INTR_MASK, ~0); if (bcs_mask & (BIT(5)|BIT(6))) - xe_mmio_write32(gt, XEHPC_BCS5_BCS6_INTR_MASK, ~0); + xe_mmio_write32(mmio, XEHPC_BCS5_BCS6_INTR_MASK, ~0); if (bcs_mask & (BIT(7)|BIT(8))) - xe_mmio_write32(gt, XEHPC_BCS7_BCS8_INTR_MASK, ~0); - xe_mmio_write32(gt, VCS0_VCS1_INTR_MASK, ~0); - xe_mmio_write32(gt, VCS2_VCS3_INTR_MASK, ~0); - xe_mmio_write32(gt, VECS0_VECS1_INTR_MASK, ~0); + xe_mmio_write32(mmio, XEHPC_BCS7_BCS8_INTR_MASK, ~0); + xe_mmio_write32(mmio, VCS0_VCS1_INTR_MASK, ~0); + xe_mmio_write32(mmio, VCS2_VCS3_INTR_MASK, ~0); + xe_mmio_write32(mmio, VECS0_VECS1_INTR_MASK, ~0); if (ccs_mask & (BIT(0)|BIT(1))) - xe_mmio_write32(gt, CCS0_CCS1_INTR_MASK, ~0); + xe_mmio_write32(mmio, CCS0_CCS1_INTR_MASK, ~0); if (ccs_mask & (BIT(2)|BIT(3))) - xe_mmio_write32(gt, CCS2_CCS3_INTR_MASK, ~0); + xe_mmio_write32(mmio, CCS2_CCS3_INTR_MASK, ~0); - xe_mmio_write32(gt, GPM_WGBOXPERF_INTR_ENABLE, 0); - xe_mmio_write32(gt, GPM_WGBOXPERF_INTR_MASK, ~0); - xe_mmio_write32(gt, GUC_SG_INTR_ENABLE, 0); - xe_mmio_write32(gt, GUC_SG_INTR_MASK, ~0); + xe_mmio_write32(mmio, GPM_WGBOXPERF_INTR_ENABLE, 0); + xe_mmio_write32(mmio, GPM_WGBOXPERF_INTR_MASK, ~0); + xe_mmio_write32(mmio, GUC_SG_INTR_ENABLE, 0); + xe_mmio_write32(mmio, GUC_SG_INTR_MASK, ~0); } -static void xelp_irq_reset(struct xe_gt *gt) +static void xelp_irq_reset(struct xe_tile *tile) { - xelp_intr_disable(gt); + xelp_intr_disable(tile_to_xe(tile)); - gt_irq_reset(gt); + gt_irq_reset(tile); - mask_and_disable(gt, GU_MISC_IRQ_OFFSET); - mask_and_disable(gt, PCU_IRQ_OFFSET); + mask_and_disable(tile, GU_MISC_IRQ_OFFSET); + mask_and_disable(tile, PCU_IRQ_OFFSET); } -static void dg1_irq_reset(struct xe_gt *gt) +static void dg1_irq_reset(struct xe_tile *tile) { - if (gt->info.id == 0) - dg1_intr_disable(gt_to_xe(gt)); + if (tile->id == 0) + dg1_intr_disable(tile_to_xe(tile)); - gt_irq_reset(gt); + gt_irq_reset(tile); - mask_and_disable(gt, GU_MISC_IRQ_OFFSET); - mask_and_disable(gt, PCU_IRQ_OFFSET); + mask_and_disable(tile, GU_MISC_IRQ_OFFSET); + mask_and_disable(tile, PCU_IRQ_OFFSET); } static void xe_irq_reset(struct xe_device *xe) { - struct xe_gt *gt; + struct xe_tile *tile; u8 id; - for_each_gt(gt, xe, id) { + for_each_tile(tile, xe, id) { if (GRAPHICS_VERx100(xe) >= 1210) - dg1_irq_reset(gt); + dg1_irq_reset(tile); else - xelp_irq_reset(gt); + xelp_irq_reset(tile); } } -void xe_gt_irq_postinstall(struct xe_gt *gt) +void xe_gt_irq_postinstall(struct xe_tile *tile) { - struct xe_device *xe = gt_to_xe(gt); + struct xe_device *xe = tile_to_xe(tile); if (GRAPHICS_VERx100(xe) >= 1210) - dg1_irq_postinstall(xe, gt); + dg1_irq_postinstall(xe, tile); else - xelp_irq_postinstall(xe, gt); + xelp_irq_postinstall(xe, tile); } static void xe_irq_postinstall(struct xe_device *xe) { - struct xe_gt *gt; + struct xe_tile *tile; u8 id; - for_each_gt(gt, xe, id) - xe_gt_irq_postinstall(gt); + for_each_tile(tile, xe, id) + xe_gt_irq_postinstall(tile); } static irq_handler_t xe_irq_handler(struct xe_device *xe) diff --git a/drivers/gpu/drm/xe/xe_irq.h b/drivers/gpu/drm/xe/xe_irq.h index 34ecf22b32d3..69113c21e1cd 100644 --- a/drivers/gpu/drm/xe/xe_irq.h +++ b/drivers/gpu/drm/xe/xe_irq.h @@ -7,10 +7,10 @@ #define _XE_IRQ_H_ struct xe_device; -struct xe_gt; +struct xe_tile; int xe_irq_install(struct xe_device *xe); -void xe_gt_irq_postinstall(struct xe_gt *gt); +void xe_gt_irq_postinstall(struct xe_tile *tile); void xe_irq_shutdown(struct xe_device *xe); void xe_irq_suspend(struct xe_device *xe); void xe_irq_resume(struct xe_device *xe); -- cgit v1.2.3 From 8e758225e52ec1acb5a0645b3750ea85cad82bbc Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 1 Jun 2023 14:52:35 -0700 Subject: drm/xe/irq: Move ASLE backlight interrupt logic Our only use of GUnit interrupts is to handle ASLE backlight operations that are reported as GUnit GSE interrupts. Move the enable/disable of these interrupts to a more sensible place, in the same area where we expect display interrupt code to be added by future patches. Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230601215244.678611-23-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_irq.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 4d3ea3b66a7b..601a54c60aef 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -186,8 +186,6 @@ static void xelp_irq_postinstall(struct xe_device *xe, struct xe_tile *tile) gt_irq_postinstall(tile); - unmask_and_enable(tile, GU_MISC_IRQ_OFFSET, GU_MISC_GSE); - xelp_intr_enable(xe, true); } @@ -367,8 +365,6 @@ static void dg1_irq_postinstall(struct xe_device *xe, struct xe_tile *tile) { gt_irq_postinstall(tile); - unmask_and_enable(tile, GU_MISC_IRQ_OFFSET, GU_MISC_GSE); - if (tile->id == 0) dg1_intr_enable(xe, true); } @@ -478,7 +474,6 @@ static void xelp_irq_reset(struct xe_tile *tile) gt_irq_reset(tile); - mask_and_disable(tile, GU_MISC_IRQ_OFFSET); mask_and_disable(tile, PCU_IRQ_OFFSET); } @@ -489,7 +484,6 @@ static void dg1_irq_reset(struct xe_tile *tile) gt_irq_reset(tile); - mask_and_disable(tile, GU_MISC_IRQ_OFFSET); mask_and_disable(tile, PCU_IRQ_OFFSET); } @@ -504,6 +498,9 @@ static void xe_irq_reset(struct xe_device *xe) else xelp_irq_reset(tile); } + + tile = xe_device_get_root_tile(xe); + mask_and_disable(tile, GU_MISC_IRQ_OFFSET); } void xe_gt_irq_postinstall(struct xe_tile *tile) @@ -523,6 +520,13 @@ static void xe_irq_postinstall(struct xe_device *xe) for_each_tile(tile, xe, id) xe_gt_irq_postinstall(tile); + + /* + * ASLE backlight operations are reported via GUnit GSE interrupts + * on the root tile. + */ + unmask_and_enable(xe_device_get_root_tile(xe), + GU_MISC_IRQ_OFFSET, GU_MISC_GSE); } static irq_handler_t xe_irq_handler(struct xe_device *xe) -- cgit v1.2.3 From 80d6e5874af2bb4a2fdc59029be64aa1d89a196b Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 1 Jun 2023 14:52:36 -0700 Subject: drm/xe/irq: Ensure primary GuC won't clobber media GuC's interrupt mask Although primary and media GuC share a single interrupt enable bit, they each have distinct bits in the mask register. Although we always enable interrupts for the primary GuC before the media GuC today (and never disable either of them), this might not always be the case in the future, so use a RMW when updating the mask register to ensure the other GuC's mask doesn't get clobbered. Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230601215244.678611-24-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index ecc843d91f62..04a57af85d9e 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -532,12 +532,15 @@ static void guc_enable_irq(struct xe_guc *guc) REG_FIELD_PREP(ENGINE0_MASK, GUC_INTR_GUC2HOST) : REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST); + /* Primary GuC and media GuC share a single enable bit */ xe_mmio_write32(gt, GUC_SG_INTR_ENABLE, REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST)); - if (xe_gt_is_media_type(gt)) - xe_mmio_rmw32(gt, GUC_SG_INTR_MASK, events, 0); - else - xe_mmio_write32(gt, GUC_SG_INTR_MASK, ~events); + + /* + * There are separate mask bits for primary and media GuCs, so use + * a RMW operation to avoid clobbering the other GuC's setting. + */ + xe_mmio_rmw32(gt, GUC_SG_INTR_MASK, events, 0); } int xe_guc_enable_communication(struct xe_guc *guc) -- cgit v1.2.3 From 22a22236017631d98c8780cf03734e4383ae69d9 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 1 Jun 2023 14:52:37 -0700 Subject: drm/xe/irq: Untangle postinstall functions The xe_irq_postinstall() never actually gets called after installing the interrupt handler. This oversight seems to get papered over due to the fact that the (misnamed) xe_gt_irq_postinstall does more than it really should and gets called in the middle of the GT initialization. The callstack for postinstall is also a bit muddled with top-level device interrupt enablement happening within platform-specific functions called from the per-tile xe_gt_irq_postinstall() function. Clean this all up by adding the missing call to xe_irq_postinstall() after installing the interrupt handler and pull top-level irq enablement up to xe_irq_postinstall where we'd expect it to be. The xe_gt_irq_postinstall() function is still a bit misnamed here; an upcoming patch will refocus its purpose and rename it. v2: - Squash in patch to actually call xe_irq_postinstall() after installing the interrupt handler. Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230601215244.678611-25-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_irq.c | 37 +++++++++---------------------------- 1 file changed, 9 insertions(+), 28 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 601a54c60aef..09447d521a9f 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -121,7 +121,7 @@ static inline void xelp_intr_enable(struct xe_device *xe, bool stall) xe_mmio_read32(mmio, GFX_MSTR_IRQ); } -static void gt_irq_postinstall(struct xe_tile *tile) +void xe_gt_irq_postinstall(struct xe_tile *tile) { struct xe_device *xe = tile_to_xe(tile); struct xe_gt *mmio = tile->primary_gt; @@ -180,15 +180,6 @@ static void gt_irq_postinstall(struct xe_tile *tile) xe_mmio_write32(mmio, GUC_SG_INTR_MASK, ~0); } -static void xelp_irq_postinstall(struct xe_device *xe, struct xe_tile *tile) -{ - /* TODO: PCH */ - - gt_irq_postinstall(tile); - - xelp_intr_enable(xe, true); -} - static u32 gt_engine_identity(struct xe_device *xe, struct xe_gt *mmio, @@ -361,14 +352,6 @@ static void dg1_intr_enable(struct xe_device *xe, bool stall) xe_mmio_read32(mmio, DG1_MSTR_TILE_INTR); } -static void dg1_irq_postinstall(struct xe_device *xe, struct xe_tile *tile) -{ - gt_irq_postinstall(tile); - - if (tile->id == 0) - dg1_intr_enable(xe, true); -} - /* * Top-level interrupt handler for Xe_LP+ and beyond. These platforms have * a "master tile" interrupt register which must be consulted before the @@ -503,16 +486,6 @@ static void xe_irq_reset(struct xe_device *xe) mask_and_disable(tile, GU_MISC_IRQ_OFFSET); } -void xe_gt_irq_postinstall(struct xe_tile *tile) -{ - struct xe_device *xe = tile_to_xe(tile); - - if (GRAPHICS_VERx100(xe) >= 1210) - dg1_irq_postinstall(xe, tile); - else - xelp_irq_postinstall(xe, tile); -} - static void xe_irq_postinstall(struct xe_device *xe) { struct xe_tile *tile; @@ -527,6 +500,12 @@ static void xe_irq_postinstall(struct xe_device *xe) */ unmask_and_enable(xe_device_get_root_tile(xe), GU_MISC_IRQ_OFFSET, GU_MISC_GSE); + + /* Enable top-level interrupts */ + if (GRAPHICS_VERx100(xe) >= 1210) + dg1_intr_enable(xe, true); + else + xelp_intr_enable(xe, true); } static irq_handler_t xe_irq_handler(struct xe_device *xe) @@ -577,6 +556,8 @@ int xe_irq_install(struct xe_device *xe) return err; } + xe_irq_postinstall(xe); + err = drmm_add_action_or_reset(&xe->drm, irq_uninstall, xe); if (err) return err; -- cgit v1.2.3 From 3e29c149b3d813c25925636135c08bf5d51372b2 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 1 Jun 2023 14:52:38 -0700 Subject: drm/xe: Replace xe_gt_irq_postinstall with xe_irq_enable_hwe The majority of xe_gt_irq_postinstall() is really focused on the hardware engine interrupts; other GT-related interrupts such as the GuC are enabled/disabled independently. Renaming the function and making it truly GT-specific will make it more clear what the intended focus is. Disabling/masking of other interrupts (such as GuC interrupts) is unnecessary since that has already happened during the irq_reset stage, and doing so will become harmful once the media GT is re-enabled since calls to xe_gt_irq_postinstall during media GT initialization would incorrectly disable the primary GT's GuC interrupts. Also, since this function is called from gt_fw_domain_init(), it's not necessary to also call it earlier during xe_irq_postinstall; just xe_irq_resume to handle runtime resume should be sufficient. v2: - Drop unnecessary !gt check. (Lucas) - Reword some comments about enable/unmask for clarity. (Lucas) Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230601215244.678611-26-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt.c | 4 +- drivers/gpu/drm/xe/xe_hw_engine.c | 1 + drivers/gpu/drm/xe/xe_irq.c | 89 +++++++++++++++++++-------------------- drivers/gpu/drm/xe/xe_irq.h | 3 +- 4 files changed, 49 insertions(+), 48 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 071d4fbd3efc..335148f1cd39 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -309,8 +309,8 @@ static int gt_fw_domain_init(struct xe_gt *gt) /* XXX: Fake that we pull the engine mask from hwconfig blob */ gt->info.engine_mask = gt->info.__engine_mask; - /* Enables per hw engine IRQs */ - xe_gt_irq_postinstall(gt_to_tile(gt)); + /* Enable per hw engine IRQs */ + xe_irq_enable_hwe(gt); /* Rerun MCR init as we now have hw engine list */ xe_gt_mcr_init(gt); diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index b12f65a2bab3..b42a0cb50159 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -17,6 +17,7 @@ #include "xe_gt.h" #include "xe_gt_topology.h" #include "xe_hw_fence.h" +#include "xe_irq.h" #include "xe_lrc.h" #include "xe_macros.h" #include "xe_mmio.h" diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 09447d521a9f..d92f03870e59 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -121,13 +121,12 @@ static inline void xelp_intr_enable(struct xe_device *xe, bool stall) xe_mmio_read32(mmio, GFX_MSTR_IRQ); } -void xe_gt_irq_postinstall(struct xe_tile *tile) +/* Enable/unmask the HWE interrupts for a specific GT's engines. */ +void xe_irq_enable_hwe(struct xe_gt *gt) { - struct xe_device *xe = tile_to_xe(tile); - struct xe_gt *mmio = tile->primary_gt; + struct xe_device *xe = gt_to_xe(gt); + u32 ccs_mask, bcs_mask; u32 irqs, dmask, smask; - u32 ccs_mask = xe_hw_engine_mask_per_class(tile->primary_gt, XE_ENGINE_CLASS_COMPUTE); - u32 bcs_mask = xe_hw_engine_mask_per_class(tile->primary_gt, XE_ENGINE_CLASS_COPY); if (xe_device_guc_submission_enabled(xe)) { irqs = GT_RENDER_USER_INTERRUPT | @@ -139,45 +138,44 @@ void xe_gt_irq_postinstall(struct xe_tile *tile) GT_WAIT_SEMAPHORE_INTERRUPT; } + ccs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COMPUTE); + bcs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COPY); + dmask = irqs << 16 | irqs; smask = irqs << 16; - /* Enable RCS, BCS, VCS and VECS class interrupts. */ - xe_mmio_write32(mmio, RENDER_COPY_INTR_ENABLE, dmask); - xe_mmio_write32(mmio, VCS_VECS_INTR_ENABLE, dmask); - if (ccs_mask) - xe_mmio_write32(mmio, CCS_RSVD_INTR_ENABLE, smask); - - /* Unmask irqs on RCS, BCS, VCS and VECS engines. */ - xe_mmio_write32(mmio, RCS0_RSVD_INTR_MASK, ~smask); - xe_mmio_write32(mmio, BCS_RSVD_INTR_MASK, ~smask); - if (bcs_mask & (BIT(1)|BIT(2))) - xe_mmio_write32(mmio, XEHPC_BCS1_BCS2_INTR_MASK, ~dmask); - if (bcs_mask & (BIT(3)|BIT(4))) - xe_mmio_write32(mmio, XEHPC_BCS3_BCS4_INTR_MASK, ~dmask); - if (bcs_mask & (BIT(5)|BIT(6))) - xe_mmio_write32(mmio, XEHPC_BCS5_BCS6_INTR_MASK, ~dmask); - if (bcs_mask & (BIT(7)|BIT(8))) - xe_mmio_write32(mmio, XEHPC_BCS7_BCS8_INTR_MASK, ~dmask); - xe_mmio_write32(mmio, VCS0_VCS1_INTR_MASK, ~dmask); - xe_mmio_write32(mmio, VCS2_VCS3_INTR_MASK, ~dmask); - xe_mmio_write32(mmio, VECS0_VECS1_INTR_MASK, ~dmask); - if (ccs_mask & (BIT(0)|BIT(1))) - xe_mmio_write32(mmio, CCS0_CCS1_INTR_MASK, ~dmask); - if (ccs_mask & (BIT(2)|BIT(3))) - xe_mmio_write32(mmio, CCS2_CCS3_INTR_MASK, ~dmask); + if (!xe_gt_is_media_type(gt)) { + /* Enable interrupts for each engine class */ + xe_mmio_write32(gt, RENDER_COPY_INTR_ENABLE, dmask); + if (ccs_mask) + xe_mmio_write32(gt, CCS_RSVD_INTR_ENABLE, smask); + + /* Unmask interrupts for each engine instance */ + xe_mmio_write32(gt, RCS0_RSVD_INTR_MASK, ~smask); + xe_mmio_write32(gt, BCS_RSVD_INTR_MASK, ~smask); + if (bcs_mask & (BIT(1)|BIT(2))) + xe_mmio_write32(gt, XEHPC_BCS1_BCS2_INTR_MASK, ~dmask); + if (bcs_mask & (BIT(3)|BIT(4))) + xe_mmio_write32(gt, XEHPC_BCS3_BCS4_INTR_MASK, ~dmask); + if (bcs_mask & (BIT(5)|BIT(6))) + xe_mmio_write32(gt, XEHPC_BCS5_BCS6_INTR_MASK, ~dmask); + if (bcs_mask & (BIT(7)|BIT(8))) + xe_mmio_write32(gt, XEHPC_BCS7_BCS8_INTR_MASK, ~dmask); + if (ccs_mask & (BIT(0)|BIT(1))) + xe_mmio_write32(gt, CCS0_CCS1_INTR_MASK, ~dmask); + if (ccs_mask & (BIT(2)|BIT(3))) + xe_mmio_write32(gt, CCS2_CCS3_INTR_MASK, ~dmask); + } - /* - * RPS interrupts will get enabled/disabled on demand when RPS itself - * is enabled/disabled. - */ - /* TODO: gt->pm_ier, gt->pm_imr */ - xe_mmio_write32(mmio, GPM_WGBOXPERF_INTR_ENABLE, 0); - xe_mmio_write32(mmio, GPM_WGBOXPERF_INTR_MASK, ~0); + if (xe_gt_is_media_type(gt) || MEDIA_VER(xe) < 13) { + /* Enable interrupts for each engine class */ + xe_mmio_write32(gt, VCS_VECS_INTR_ENABLE, dmask); - /* Same thing for GuC interrupts */ - xe_mmio_write32(mmio, GUC_SG_INTR_ENABLE, 0); - xe_mmio_write32(mmio, GUC_SG_INTR_MASK, ~0); + /* Unmask interrupts for each engine instance */ + xe_mmio_write32(gt, VCS0_VCS1_INTR_MASK, ~dmask); + xe_mmio_write32(gt, VCS2_VCS3_INTR_MASK, ~dmask); + xe_mmio_write32(gt, VECS0_VECS1_INTR_MASK, ~dmask); + } } static u32 @@ -488,12 +486,6 @@ static void xe_irq_reset(struct xe_device *xe) static void xe_irq_postinstall(struct xe_device *xe) { - struct xe_tile *tile; - u8 id; - - for_each_tile(tile, xe, id) - xe_gt_irq_postinstall(tile); - /* * ASLE backlight operations are reported via GUnit GSE interrupts * on the root tile. @@ -580,9 +572,16 @@ void xe_irq_suspend(struct xe_device *xe) void xe_irq_resume(struct xe_device *xe) { + struct xe_gt *gt; + int id; + spin_lock_irq(&xe->irq.lock); xe->irq.enabled = true; xe_irq_reset(xe); xe_irq_postinstall(xe); + + for_each_gt(gt, xe, id) + xe_irq_enable_hwe(gt); + spin_unlock_irq(&xe->irq.lock); } diff --git a/drivers/gpu/drm/xe/xe_irq.h b/drivers/gpu/drm/xe/xe_irq.h index 69113c21e1cd..bc42bc90d967 100644 --- a/drivers/gpu/drm/xe/xe_irq.h +++ b/drivers/gpu/drm/xe/xe_irq.h @@ -8,11 +8,12 @@ struct xe_device; struct xe_tile; +struct xe_gt; int xe_irq_install(struct xe_device *xe); -void xe_gt_irq_postinstall(struct xe_tile *tile); void xe_irq_shutdown(struct xe_device *xe); void xe_irq_suspend(struct xe_device *xe); void xe_irq_resume(struct xe_device *xe); +void xe_irq_enable_hwe(struct xe_gt *gt); #endif -- cgit v1.2.3 From d78a4778195079e0b2820550efeecb7b25fa764a Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 1 Jun 2023 14:52:39 -0700 Subject: drm/xe: Invalidate TLB on all affected GTs during GGTT updates The GGTT is part of the tile and is shared by the primary and media GTs on platforms with a standalone media architecture. However each of these GTs has its own TLBs caching the page table lookups, and each needs to be invalidated separately. Reviewed-by: Nirmoy Das Link: https://lore.kernel.org/r/20230601215244.678611-27-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ggtt.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 8d3638826860..d67249496113 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -190,13 +190,10 @@ err: #define PVC_GUC_TLB_INV_DESC1 XE_REG(0xcf80) #define PVC_GUC_TLB_INV_DESC1_INVALIDATE REG_BIT(6) -void xe_ggtt_invalidate(struct xe_ggtt *ggtt) +static void ggtt_invalidate_gt_tlb(struct xe_gt *gt) { - /* - * TODO: Loop over each GT in tile once media GT support is - * re-added - */ - struct xe_gt *gt = ggtt->tile->primary_gt; + if (!gt) + return; /* TODO: vfunc for GuC vs. non-GuC */ @@ -221,6 +218,13 @@ void xe_ggtt_invalidate(struct xe_ggtt *ggtt) } } +void xe_ggtt_invalidate(struct xe_ggtt *ggtt) +{ + /* Each GT in a tile has its own TLB to cache GGTT lookups */ + ggtt_invalidate_gt_tlb(ggtt->tile->primary_gt); + ggtt_invalidate_gt_tlb(ggtt->tile->media_gt); +} + void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix) { u64 addr, scratch_pte; -- cgit v1.2.3 From 933b78d678213f5c045c52cbc42bbee6653af250 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 1 Jun 2023 14:52:40 -0700 Subject: drm/xe/tlb: Obtain forcewake when doing GGTT TLB invalidations Updates to the GGTT can happen when there are no in-flight jobs keeping the hardware awake. If the GT is powered down when invalidation is requested, we will not be able to communicate with the GuC (or MMIO) and the invalidation request will go missing. Explicitly grab GT forcewake to ensure the GT and GuC are powered up during the TLB invalidation. Reviewed-by: Lucas De Marchi Reviewed-by: Nirmoy Das Link: https://lore.kernel.org/r/20230601215244.678611-28-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ggtt.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index d67249496113..14b6d68a6324 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -195,6 +195,13 @@ static void ggtt_invalidate_gt_tlb(struct xe_gt *gt) if (!gt) return; + /* + * Invalidation can happen when there's no in-flight work keeping the + * GT awake. We need to explicitly grab forcewake to ensure the GT + * and GuC are accessible. + */ + xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + /* TODO: vfunc for GuC vs. non-GuC */ if (gt->uc.guc.submission_state.enabled) { @@ -216,6 +223,8 @@ static void ggtt_invalidate_gt_tlb(struct xe_gt *gt) xe_mmio_write32(gt, GUC_TLB_INV_CR, GUC_TLB_INV_CR_INVALIDATE); } + + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); } void xe_ggtt_invalidate(struct xe_ggtt *ggtt) -- cgit v1.2.3 From 37efea9ca2583990fbd706af0364ce9feb16bb1a Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 1 Jun 2023 14:52:41 -0700 Subject: drm/xe: Allow GT looping and lookup on standalone media Allow xe_device_get_gt() and for_each_gt() to operate as expected on platforms with standalone media. FIXME: We need to figure out a consistent ID scheme for GTs. This patch keeps the pre-existing behavior of 0/1 being the GT IDs for both PVC (multi-tile) and MTL (multi-GT), but depending on the direction we decide to go with uapi, we may change this in the future (e.g., to return 0/1 on PVC and 0/2 on MTL). Or if we decide we only need to expose tiles to userspace and not GTs, we may not even need ID numbers for the GTs anymore. v2: - Restructure a bit to make the assertions more clear. - Clarify in commit message that the goal here is to preserve existing behavior; UAPI-visible changes may be introduced in the future once we settle on what we really want. v3: - Store total GT count in xe_device for ease of lookup. (Brian) - s/(id__++)/(id__)++/ (Gustavo) Cc: Lucas De Marchi Cc: Gustavo Sousa Cc: Brian Welty Acked-by: Gustavo Sousa Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230601215244.678611-29-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.h | 42 ++++++++++++++++++++++++++++++------ drivers/gpu/drm/xe/xe_device_types.h | 2 ++ drivers/gpu/drm/xe/xe_pci.c | 6 +++++- 3 files changed, 42 insertions(+), 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index f2d8479f6ff6..779f71d066e6 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -53,18 +53,42 @@ static inline struct xe_tile *xe_device_get_root_tile(struct xe_device *xe) return &xe->tiles[0]; } +#define XE_MAX_GT_PER_TILE 2 + +static inline struct xe_gt *xe_tile_get_gt(struct xe_tile *tile, u8 gt_id) +{ + if (drm_WARN_ON(&tile_to_xe(tile)->drm, gt_id > XE_MAX_GT_PER_TILE)) + gt_id = 0; + + return gt_id ? tile->media_gt : tile->primary_gt; +} + static inline struct xe_gt *xe_device_get_gt(struct xe_device *xe, u8 gt_id) { + struct xe_tile *root_tile = xe_device_get_root_tile(xe); struct xe_gt *gt; - XE_BUG_ON(gt_id > XE_MAX_TILES_PER_DEVICE); - - gt = xe->tiles[gt_id].primary_gt; - if (drm_WARN_ON(&xe->drm, !gt)) + /* + * FIXME: This only works for now because multi-tile and standalone + * media are mutually exclusive on the platforms we have today. + * + * id => GT mapping may change once we settle on how we want to handle + * our UAPI. + */ + if (MEDIA_VER(xe) >= 13) { + gt = xe_tile_get_gt(root_tile, gt_id); + } else { + if (drm_WARN_ON(&xe->drm, gt_id > XE_MAX_TILES_PER_DEVICE)) + gt_id = 0; + + gt = xe->tiles[gt_id].primary_gt; + } + + if (!gt) return NULL; - XE_BUG_ON(gt->info.id != gt_id); - XE_BUG_ON(gt->info.type == XE_GT_TYPE_UNINITIALIZED); + drm_WARN_ON(&xe->drm, gt->info.id != gt_id); + drm_WARN_ON(&xe->drm, gt->info.type == XE_GT_TYPE_UNINITIALIZED); return gt; } @@ -100,8 +124,12 @@ static inline void xe_device_guc_submission_disable(struct xe_device *xe) for ((id__) = 0; (id__) < (xe__)->info.tile_count; (id__)++) \ for_each_if ((tile__) = &(xe__)->tiles[(id__)]) +/* + * FIXME: This only works for now since multi-tile and standalone media + * happen to be mutually exclusive. Future platforms may change this... + */ #define for_each_gt(gt__, xe__, id__) \ - for ((id__) = 0; (id__) < (xe__)->info.tile_count; (id__++)) \ + for ((id__) = 0; (id__) < (xe__)->info.gt_count; (id__)++) \ for_each_if ((gt__) = xe_device_get_gt((xe__), (id__))) static inline struct xe_force_wake * gt_to_fw(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 16a77703d429..3b50134cdcc0 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -184,6 +184,8 @@ struct xe_device { u8 vram_flags; /** @tile_count: Number of tiles */ u8 tile_count; + /** @gt_count: Total number of GTs for entire device */ + u8 gt_count; /** @vm_max_level: Max VM level */ u8 vm_max_level; diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index be51c9e97a79..abb2f1326007 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -535,7 +535,11 @@ static int xe_info_init(struct xe_device *xe, return PTR_ERR(tile->primary_gt); gt = tile->primary_gt; - gt->info.id = id; /* FIXME: Determine sensible numbering */ + /* + * FIXME: GT numbering scheme may change depending on UAPI + * decisions. + */ + gt->info.id = xe->info.gt_count++; gt->info.type = XE_GT_TYPE_MAIN; gt->info.__engine_mask = graphics_desc->hw_engine_mask; if (MEDIA_VER(xe) < 13 && media_desc) -- cgit v1.2.3 From 1bc728dcb8adc9f9e88f34940a94bfa314d4f7c3 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 1 Jun 2023 14:52:42 -0700 Subject: drm/xe: Update query uapi to support standalone media Now that a higher GT count can result from either multiple tiles (with one GT each) or an extra media GT within the root tile, we need to update the query code slightly to stop looking at tile_count. FIXME: As noted previously, we need to decide on a formal direction for exposing tiles and/or GTs to userspace. v2: - Drop num_gt() function in favor of stored xe->info.gt_count. (Brian) v3: - Keep XE_QUERY_GT_TYPE_REMOTE around for now. Userspace probably doesn't actually need this, and we may remove it in the future, but for now let's avoid changing uapi. (Brian) Cc: Brian Welty Reviewed-by: Brian Welty Link: https://lore.kernel.org/r/20230601215244.678611-30-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_query.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 8087c94dd782..c4165fa3428e 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -192,7 +192,7 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query) xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K; config->info[XE_QUERY_CONFIG_VA_BITS] = 12 + (9 * (xe->info.vm_max_level + 1)); - config->info[XE_QUERY_CONFIG_GT_COUNT] = xe->info.tile_count; + config->info[XE_QUERY_CONFIG_GT_COUNT] = xe->info.gt_count; config->info[XE_QUERY_CONFIG_MEM_REGION_COUNT] = hweight_long(xe->info.mem_region_mask); config->info[XE_QUERY_CONFIG_MAX_ENGINE_PRIORITY] = @@ -211,7 +211,7 @@ static int query_gts(struct xe_device *xe, struct drm_xe_device_query *query) { struct xe_gt *gt; size_t size = sizeof(struct drm_xe_query_gts) + - xe->info.tile_count * sizeof(struct drm_xe_query_gt); + xe->info.gt_count * sizeof(struct drm_xe_query_gt); struct drm_xe_query_gts __user *query_ptr = u64_to_user_ptr(query->data); struct drm_xe_query_gts *gts; @@ -228,14 +228,14 @@ static int query_gts(struct xe_device *xe, struct drm_xe_device_query *query) if (XE_IOCTL_ERR(xe, !gts)) return -ENOMEM; - gts->num_gt = xe->info.tile_count; + gts->num_gt = xe->info.gt_count; for_each_gt(gt, xe, id) { - if (id == 0) - gts->gts[id].type = XE_QUERY_GT_TYPE_MAIN; - else if (xe_gt_is_media_type(gt)) + if (xe_gt_is_media_type(gt)) gts->gts[id].type = XE_QUERY_GT_TYPE_MEDIA; - else + else if (gt_to_tile(gt)->id > 0) gts->gts[id].type = XE_QUERY_GT_TYPE_REMOTE; + else + gts->gts[id].type = XE_QUERY_GT_TYPE_MAIN; gts->gts[id].instance = id; gts->gts[id].clock_freq = gt->info.clock_freq; if (!IS_DGFX(xe)) @@ -290,7 +290,7 @@ static int query_hwconfig(struct xe_device *xe, static size_t calc_topo_query_size(struct xe_device *xe) { - return xe->info.tile_count * + return xe->info.gt_count * (3 * sizeof(struct drm_xe_query_topology_mask) + sizeof_field(struct xe_gt, fuse_topo.g_dss_mask) + sizeof_field(struct xe_gt, fuse_topo.c_dss_mask) + -- cgit v1.2.3 From 7bfbad97d38f1de4ffbc7d9dce6ee0128459293c Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 1 Jun 2023 14:52:43 -0700 Subject: drm/xe: Reinstate media GT support Now that tiles and GTs are handled separately and other prerequisite changes are in place, we're ready to re-enable the media GT. Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230601215244.678611-31-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 8 ++++++++ drivers/gpu/drm/xe/xe_pci.c | 26 +++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index d8b480f69c5f..76c09526690e 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -8,6 +8,14 @@ #include "regs/xe_reg_defs.h" +/* + * The GSI register range [0x0 - 0x40000) is replicated at a higher offset + * for the media GT. xe_mmio and xe_gt_mcr functions will automatically + * translate offsets by MEDIA_GT_GSI_OFFSET when operating on the media GT. + */ +#define MEDIA_GT_GSI_OFFSET 0x380000 +#define MEDIA_GT_GSI_LENGTH 0x40000 + /* RPM unit config (Gen8+) */ #define RPM_CONFIG0 XE_REG(0xd00) #define RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK REG_GENMASK(5, 3) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index abb2f1326007..208dc7a63f88 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -545,7 +545,31 @@ static int xe_info_init(struct xe_device *xe, if (MEDIA_VER(xe) < 13 && media_desc) gt->info.__engine_mask |= media_desc->hw_engine_mask; - /* TODO: Init media GT, if present */ + if (MEDIA_VER(xe) < 13 || !media_desc) + continue; + + /* + * Allocate and setup media GT for platforms with standalone + * media. + */ + tile->media_gt = xe_gt_alloc(tile); + if (IS_ERR(tile->media_gt)) + return PTR_ERR(tile->media_gt); + + gt = tile->media_gt; + gt->info.type = XE_GT_TYPE_MEDIA; + gt->info.__engine_mask = media_desc->hw_engine_mask; + gt->mmio.adj_offset = MEDIA_GT_GSI_OFFSET; + gt->mmio.adj_limit = MEDIA_GT_GSI_LENGTH; + + /* + * FIXME: At the moment multi-tile and standalone media are + * mutually exclusive on current platforms. We'll need to + * come up with a better way to number GTs if we ever wind + * up with platforms that support both together. + */ + drm_WARN_ON(&xe->drm, id != 0); + gt->info.id = 1; } return 0; -- cgit v1.2.3 From 08516de501fae647fb29bf3b62718de56cc24014 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 1 Jun 2023 14:52:44 -0700 Subject: drm/xe: Add kerneldoc description of multi-tile devices v2: - Fix doubled word. (Lucas) Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230601215244.678611-32-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- Documentation/gpu/xe/index.rst | 1 + Documentation/gpu/xe/xe_tile.rst | 14 ++++++++++ drivers/gpu/drm/xe/xe_tile.c | 57 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 72 insertions(+) create mode 100644 Documentation/gpu/xe/xe_tile.rst (limited to 'drivers/gpu') diff --git a/Documentation/gpu/xe/index.rst b/Documentation/gpu/xe/index.rst index 2fddf9ed251e..5c4d6bb370f3 100644 --- a/Documentation/gpu/xe/index.rst +++ b/Documentation/gpu/xe/index.rst @@ -21,3 +21,4 @@ DG2, etc is provided to prototype the driver. xe_wa xe_rtp xe_firmware + xe_tile diff --git a/Documentation/gpu/xe/xe_tile.rst b/Documentation/gpu/xe/xe_tile.rst new file mode 100644 index 000000000000..c33f68dd95b6 --- /dev/null +++ b/Documentation/gpu/xe/xe_tile.rst @@ -0,0 +1,14 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +================== +Multi-tile Devices +================== + +.. kernel-doc:: drivers/gpu/drm/xe/xe_tile.c + :doc: Multi-tile Design + +Internal API +============ + +.. kernel-doc:: drivers/gpu/drm/xe/xe_tile.c + :internal: diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c index fa56323aa988..6414aa810355 100644 --- a/drivers/gpu/drm/xe/xe_tile.c +++ b/drivers/gpu/drm/xe/xe_tile.c @@ -12,6 +12,63 @@ #include "xe_tile.h" #include "xe_ttm_vram_mgr.h" +/** + * DOC: Multi-tile Design + * + * Different vendors use the term "tile" a bit differently, but in the Intel + * world, a 'tile' is pretty close to what most people would think of as being + * a complete GPU. When multiple GPUs are placed behind a single PCI device, + * that's what is referred to as a "multi-tile device." In such cases, pretty + * much all hardware is replicated per-tile, although certain responsibilities + * like PCI communication, reporting of interrupts to the OS, etc. are handled + * solely by the "root tile." A multi-tile platform takes care of tying the + * tiles together in a way such that interrupt notifications from remote tiles + * are forwarded to the root tile, the per-tile vram is combined into a single + * address space, etc. + * + * In contrast, a "GT" (which officially stands for "Graphics Technology") is + * the subset of a GPU/tile that is responsible for implementing graphics + * and/or media operations. The GT is where a lot of the driver implementation + * happens since it's where the hardware engines, the execution units, and the + * GuC all reside. + * + * Historically most Intel devices were single-tile devices that contained a + * single GT. PVC is an example of an Intel platform built on a multi-tile + * design (i.e., multiple GPUs behind a single PCI device); each PVC tile only + * has a single GT. In contrast, platforms like MTL that have separate chips + * for render and media IP are still only a single logical GPU, but the + * graphics and media IP blocks are each exposed as a separate GT within that + * single GPU. This is important from a software perspective because multi-GT + * platforms like MTL only replicate a subset of the GPU hardware and behave + * differently than multi-tile platforms like PVC where nearly everything is + * replicated. + * + * Per-tile functionality (shared by all GTs within the tile): + * - Complete 4MB MMIO space (containing SGunit/SoC registers, GT + * registers, display registers, etc.) + * - Global GTT + * - VRAM (if discrete) + * - Interrupt flows + * - Migration context + * - kernel batchbuffer pool + * - Primary GT + * - Media GT (if media version >= 13) + * + * Per-GT functionality: + * - GuC + * - Hardware engines + * - Programmable hardware units (subslices, EUs) + * - GSI subset of registers (multiple copies of these registers reside + * within the complete MMIO space provided by the tile, but at different + * offsets --- 0 for render, 0x380000 for media) + * - Multicast register steering + * - TLBs to cache page table translations + * - Reset capability + * - Low-level power management (e.g., C6) + * - Clock frequency + * - MOCS and PAT programming + */ + /** * xe_tile_alloc - Perform per-tile memory allocation * @tile: Tile to perform allocations for -- cgit v1.2.3 From 437bcbab1023e06edd8dbca99f5c44e5d2b30133 Mon Sep 17 00:00:00 2001 From: Gustavo Sousa Date: Thu, 1 Jun 2023 16:44:19 -0300 Subject: drm/xe: Replace deprecated DRM_ERROR() DRM_ERROR() has been deprecated in favor of pr_err(). However, we should prefer to use xe_gt_err() or drm_err() whenever possible so we get gt- or device-specific output with the error message. v2: - Prefer drm_err() over pr_err(). (Matt, Jani) v3: - Prefer xe_gt_err() over drm_err() when possible. (Matt) v4: - Use the already available dev variable instead of xe->drm as parameter to drm_err(). (Matt) Cc: Jani Nikula Cc: Lucas De Marchi Cc: Haridhar Kalvala Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230601194419.1179609-1-gustavo.sousa@intel.com Signed-off-by: Gustavo Sousa Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_reg_sr.c | 7 +++++-- drivers/gpu/drm/xe/xe_reg_sr.h | 3 ++- drivers/gpu/drm/xe/xe_rtp.c | 2 +- drivers/gpu/drm/xe/xe_vm.c | 3 ++- 4 files changed, 10 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c index 434133444d74..8580ff38b82c 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr.c +++ b/drivers/gpu/drm/xe/xe_reg_sr.c @@ -19,6 +19,7 @@ #include "xe_force_wake.h" #include "xe_gt.h" #include "xe_gt_mcr.h" +#include "xe_gt_printk.h" #include "xe_macros.h" #include "xe_mmio.h" #include "xe_reg_whitelist.h" @@ -89,7 +90,8 @@ static void reg_sr_inc_error(struct xe_reg_sr *sr) } int xe_reg_sr_add(struct xe_reg_sr *sr, - const struct xe_reg_sr_entry *e) + const struct xe_reg_sr_entry *e, + struct xe_gt *gt) { unsigned long idx = e->reg.addr; struct xe_reg_sr_entry *pentry = xa_load(&sr->xa, idx); @@ -122,7 +124,8 @@ int xe_reg_sr_add(struct xe_reg_sr *sr, return 0; fail: - DRM_ERROR("Discarding save-restore reg %04lx (clear: %08x, set: %08x, masked: %s, mcr: %s): ret=%d\n", + xe_gt_err(gt, + "discarding save-restore reg %04lx (clear: %08x, set: %08x, masked: %s, mcr: %s): ret=%d\n", idx, e->clr_bits, e->set_bits, str_yes_no(e->reg.masked), str_yes_no(e->reg.mcr), diff --git a/drivers/gpu/drm/xe/xe_reg_sr.h b/drivers/gpu/drm/xe/xe_reg_sr.h index 0bfe66ea29bf..c3001798d9e8 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr.h +++ b/drivers/gpu/drm/xe/xe_reg_sr.h @@ -19,7 +19,8 @@ struct drm_printer; int xe_reg_sr_init(struct xe_reg_sr *sr, const char *name, struct xe_device *xe); void xe_reg_sr_dump(struct xe_reg_sr *sr, struct drm_printer *p); -int xe_reg_sr_add(struct xe_reg_sr *sr, const struct xe_reg_sr_entry *e); +int xe_reg_sr_add(struct xe_reg_sr *sr, const struct xe_reg_sr_entry *e, + struct xe_gt *gt); void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt); void xe_reg_sr_apply_whitelist(struct xe_reg_sr *sr, u32 mmio_base, struct xe_gt *gt); diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index 43a86358efb6..956bd39fe1a0 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -122,7 +122,7 @@ static void rtp_add_sr_entry(const struct xe_rtp_action *action, }; sr_entry.reg.addr += mmio_base; - xe_reg_sr_add(sr, &sr_entry); + xe_reg_sr_add(sr, &sr_entry, gt); } static bool rtp_process_one_sr(const struct xe_rtp_entry_sr *entry, diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 44ad45776141..17b7d543ae49 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -7,6 +7,7 @@ #include +#include #include #include #include @@ -3048,7 +3049,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) } if (XE_IOCTL_ERR(xe, xe_vm_is_closed(vm))) { - DRM_ERROR("VM closed while we began looking up?\n"); + drm_err(dev, "VM closed while we began looking up?\n"); err = -ENOENT; goto put_vm; } -- cgit v1.2.3 From 066d0952489b6ea269823dbbbb85d580ee6d23e0 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 2 Jun 2023 16:52:08 -0700 Subject: drm/xe: Reformat xe_guc_regs.h Reformat the GuC register header according to the same rules used by other register headers: - Register definitions are ordered by offset - Value of #define's start on column 49 - Lowercase used for hex values No functional change. This header has some things that aren't directly related to register definitions (e.g., number of doorbells, doorbell info structure, GuC interrupt vector layout, etc. These items have been moved to the bottom of the header. Cc: Michal Wajdeczko Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20230602235210.1314028-1-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_guc_regs.h | 186 +++++++++++++++++----------------- 1 file changed, 93 insertions(+), 93 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_guc_regs.h b/drivers/gpu/drm/xe/regs/xe_guc_regs.h index b4f27cadb68f..ea8118f16722 100644 --- a/drivers/gpu/drm/xe/regs/xe_guc_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_guc_regs.h @@ -13,65 +13,37 @@ /* Definitions of GuC H/W registers, bits, etc */ -#define GUC_STATUS XE_REG(0xc000) -#define GS_AUTH_STATUS_MASK REG_GENMASK(31, 30) -#define GS_AUTH_STATUS_BAD REG_FIELD_PREP(GS_AUTH_STATUS_MASK, 0x1) -#define GS_AUTH_STATUS_GOOD REG_FIELD_PREP(GS_AUTH_STATUS_MASK, 0x2) -#define GS_MIA_MASK REG_GENMASK(18, 16) -#define GS_MIA_CORE_STATE REG_FIELD_PREP(GS_MIA_MASK, 0x1) -#define GS_MIA_HALT_REQUESTED REG_FIELD_PREP(GS_MIA_MASK, 0x2) -#define GS_MIA_ISR_ENTRY REG_FIELD_PREP(GS_MIA_MASK, 0x4) -#define GS_UKERNEL_MASK REG_GENMASK(15, 8) -#define GS_BOOTROM_MASK REG_GENMASK(7, 1) -#define GS_BOOTROM_RSA_FAILED REG_FIELD_PREP(GS_BOOTROM_MASK, 0x50) -#define GS_BOOTROM_JUMP_PASSED REG_FIELD_PREP(GS_BOOTROM_MASK, 0x76) -#define GS_MIA_IN_RESET REG_BIT(0) - -#define SOFT_SCRATCH(n) XE_REG(0xc180 + (n) * 4) -#define SOFT_SCRATCH_COUNT 16 - -#define UOS_RSA_SCRATCH(i) XE_REG(0xc200 + (i) * 4) -#define UOS_RSA_SCRATCH_COUNT 64 - -#define DMA_ADDR_0_LOW XE_REG(0xc300) -#define DMA_ADDR_0_HIGH XE_REG(0xc304) -#define DMA_ADDR_1_LOW XE_REG(0xc308) -#define DMA_ADDR_1_HIGH XE_REG(0xc30c) -#define DMA_ADDR_SPACE_MASK REG_GENMASK(20, 16) -#define DMA_ADDRESS_SPACE_WOPCM REG_FIELD_PREP(DMA_ADDR_SPACE_MASK, 7) -#define DMA_COPY_SIZE XE_REG(0xc310) -#define DMA_CTRL XE_REG(0xc314) -#define HUC_UKERNEL REG_BIT(9) -#define UOS_MOVE REG_BIT(4) -#define START_DMA REG_BIT(0) -#define DMA_GUC_WOPCM_OFFSET XE_REG(0xc340) -#define GUC_WOPCM_OFFSET_SHIFT 14 -#define GUC_WOPCM_OFFSET_MASK REG_GENMASK(31, GUC_WOPCM_OFFSET_SHIFT) -#define HUC_LOADING_AGENT_GUC REG_BIT(1) -#define GUC_WOPCM_OFFSET_VALID REG_BIT(0) -#define GUC_MAX_IDLE_COUNT XE_REG(0xc3e4) - -#define HUC_STATUS2 XE_REG(0xd3b0) -#define HUC_FW_VERIFIED REG_BIT(7) - -#define HUC_KERNEL_LOAD_INFO XE_REG(0xc1dc) -#define HUC_LOAD_SUCCESSFUL REG_BIT(0) - -#define GUC_WOPCM_SIZE XE_REG(0xc050) -#define GUC_WOPCM_SIZE_MASK REG_GENMASK(31, 12) -#define GUC_WOPCM_SIZE_LOCKED REG_BIT(0) - -#define GT_PM_CONFIG XE_REG(0x13816c) -#define GT_DOORBELL_ENABLE REG_BIT(0) - -#define GTCR XE_REG(0x4274) -#define GTCR_INVALIDATE REG_BIT(0) - -#define GUC_TLB_INV_CR XE_REG(0xcee8) -#define GUC_TLB_INV_CR_INVALIDATE REG_BIT(0) +#define DIST_DBS_POPULATED XE_REG(0xd08) +#define DOORBELLS_PER_SQIDI_MASK REG_GENMASK(23, 16) +#define SQIDIS_DOORBELL_EXIST_MASK REG_GENMASK(15, 0) + +#define DRBREGL(x) XE_REG(0x1000 + (x) * 8) +#define DRB_VALID REG_BIT(0) +#define DRBREGU(x) XE_REG(0x1000 + (x) * 8 + 4) + +#define GTCR XE_REG(0x4274) +#define GTCR_INVALIDATE REG_BIT(0) #define GUC_ARAT_C6DIS XE_REG(0xa178) +#define GUC_STATUS XE_REG(0xc000) +#define GS_AUTH_STATUS_MASK REG_GENMASK(31, 30) +#define GS_AUTH_STATUS_BAD REG_FIELD_PREP(GS_AUTH_STATUS_MASK, 0x1) +#define GS_AUTH_STATUS_GOOD REG_FIELD_PREP(GS_AUTH_STATUS_MASK, 0x2) +#define GS_MIA_MASK REG_GENMASK(18, 16) +#define GS_MIA_CORE_STATE REG_FIELD_PREP(GS_MIA_MASK, 0x1) +#define GS_MIA_HALT_REQUESTED REG_FIELD_PREP(GS_MIA_MASK, 0x2) +#define GS_MIA_ISR_ENTRY REG_FIELD_PREP(GS_MIA_MASK, 0x4) +#define GS_UKERNEL_MASK REG_GENMASK(15, 8) +#define GS_BOOTROM_MASK REG_GENMASK(7, 1) +#define GS_BOOTROM_RSA_FAILED REG_FIELD_PREP(GS_BOOTROM_MASK, 0x50) +#define GS_BOOTROM_JUMP_PASSED REG_FIELD_PREP(GS_BOOTROM_MASK, 0x76) +#define GS_MIA_IN_RESET REG_BIT(0) + +#define GUC_WOPCM_SIZE XE_REG(0xc050) +#define GUC_WOPCM_SIZE_MASK REG_GENMASK(31, 12) +#define GUC_WOPCM_SIZE_LOCKED REG_BIT(0) + #define GUC_SHIM_CONTROL XE_REG(0xc064) #define PVC_GUC_MOCS_INDEX_MASK REG_GENMASK(25, 24) #define PVC_GUC_MOCS_UC_INDEX 1 @@ -86,35 +58,51 @@ #define GUC_ENABLE_READ_CACHE_LOGIC REG_BIT(1) #define GUC_DISABLE_SRAM_INIT_TO_ZEROES REG_BIT(0) +#define SOFT_SCRATCH(n) XE_REG(0xc180 + (n) * 4) +#define SOFT_SCRATCH_COUNT 16 + +#define HUC_KERNEL_LOAD_INFO XE_REG(0xc1dc) +#define HUC_LOAD_SUCCESSFUL REG_BIT(0) + +#define UOS_RSA_SCRATCH(i) XE_REG(0xc200 + (i) * 4) +#define UOS_RSA_SCRATCH_COUNT 64 + +#define DMA_ADDR_0_LOW XE_REG(0xc300) +#define DMA_ADDR_0_HIGH XE_REG(0xc304) +#define DMA_ADDR_1_LOW XE_REG(0xc308) +#define DMA_ADDR_1_HIGH XE_REG(0xc30c) +#define DMA_ADDR_SPACE_MASK REG_GENMASK(20, 16) +#define DMA_ADDRESS_SPACE_WOPCM REG_FIELD_PREP(DMA_ADDR_SPACE_MASK, 7) +#define DMA_COPY_SIZE XE_REG(0xc310) +#define DMA_CTRL XE_REG(0xc314) +#define HUC_UKERNEL REG_BIT(9) +#define UOS_MOVE REG_BIT(4) +#define START_DMA REG_BIT(0) +#define DMA_GUC_WOPCM_OFFSET XE_REG(0xc340) +#define GUC_WOPCM_OFFSET_SHIFT 14 +#define GUC_WOPCM_OFFSET_MASK REG_GENMASK(31, GUC_WOPCM_OFFSET_SHIFT) +#define HUC_LOADING_AGENT_GUC REG_BIT(1) +#define GUC_WOPCM_OFFSET_VALID REG_BIT(0) +#define GUC_MAX_IDLE_COUNT XE_REG(0xc3e4) #define GUC_SEND_INTERRUPT XE_REG(0xc4c8) #define GUC_SEND_TRIGGER REG_BIT(0) -#define GUC_HOST_INTERRUPT XE_REG(0x1901f0) -#define GUC_NUM_DOORBELLS 256 +#define GUC_BCS_RCS_IER XE_REG(0xc550) +#define GUC_VCS2_VCS1_IER XE_REG(0xc554) +#define GUC_WD_VECS_IER XE_REG(0xc558) +#define GUC_PM_P24C_IER XE_REG(0xc55c) -/* format of the HW-monitored doorbell cacheline */ -struct guc_doorbell_info { - u32 db_status; -#define GUC_DOORBELL_DISABLED 0 -#define GUC_DOORBELL_ENABLED 1 +#define GUC_TLB_INV_CR XE_REG(0xcee8) +#define GUC_TLB_INV_CR_INVALIDATE REG_BIT(0) - u32 cookie; - u32 reserved[14]; -} __packed; +#define HUC_STATUS2 XE_REG(0xd3b0) +#define HUC_FW_VERIFIED REG_BIT(7) -#define DRBREGL(x) XE_REG(0x1000 + (x) * 8) -#define DRB_VALID REG_BIT(0) -#define DRBREGU(x) XE_REG(0x1000 + (x) * 8 + 4) - -#define DIST_DBS_POPULATED XE_REG(0xd08) -#define DOORBELLS_PER_SQIDI_MASK REG_GENMASK(23, 16) -#define SQIDIS_DOORBELL_EXIST_MASK REG_GENMASK(15, 0) +#define GT_PM_CONFIG XE_REG(0x13816c) +#define GT_DOORBELL_ENABLE REG_BIT(0) -#define GUC_BCS_RCS_IER XE_REG(0xC550) -#define GUC_VCS2_VCS1_IER XE_REG(0xC554) -#define GUC_WD_VECS_IER XE_REG(0xC558) -#define GUC_PM_P24C_IER XE_REG(0xC55C) +#define GUC_HOST_INTERRUPT XE_REG(0x1901f0) #define VF_SW_FLAG(n) XE_REG(0x190240 + (n) * 4) #define VF_SW_FLAG_COUNT 4 @@ -125,21 +113,33 @@ struct guc_doorbell_info { #define MED_VF_SW_FLAG_COUNT 4 /* GuC Interrupt Vector */ -#define GUC_INTR_GUC2HOST BIT(15) -#define GUC_INTR_EXEC_ERROR BIT(14) -#define GUC_INTR_DISPLAY_EVENT BIT(13) -#define GUC_INTR_SEM_SIG BIT(12) -#define GUC_INTR_IOMMU2GUC BIT(11) -#define GUC_INTR_DOORBELL_RANG BIT(10) -#define GUC_INTR_DMA_DONE BIT(9) -#define GUC_INTR_FATAL_ERROR BIT(8) -#define GUC_INTR_NOTIF_ERROR BIT(7) -#define GUC_INTR_SW_INT_6 BIT(6) -#define GUC_INTR_SW_INT_5 BIT(5) -#define GUC_INTR_SW_INT_4 BIT(4) -#define GUC_INTR_SW_INT_3 BIT(3) -#define GUC_INTR_SW_INT_2 BIT(2) -#define GUC_INTR_SW_INT_1 BIT(1) -#define GUC_INTR_SW_INT_0 BIT(0) +#define GUC_INTR_GUC2HOST REG_BIT(15) +#define GUC_INTR_EXEC_ERROR REG_BIT(14) +#define GUC_INTR_DISPLAY_EVENT REG_BIT(13) +#define GUC_INTR_SEM_SIG REG_BIT(12) +#define GUC_INTR_IOMMU2GUC REG_BIT(11) +#define GUC_INTR_DOORBELL_RANG REG_BIT(10) +#define GUC_INTR_DMA_DONE REG_BIT(9) +#define GUC_INTR_FATAL_ERROR REG_BIT(8) +#define GUC_INTR_NOTIF_ERROR REG_BIT(7) +#define GUC_INTR_SW_INT_6 REG_BIT(6) +#define GUC_INTR_SW_INT_5 REG_BIT(5) +#define GUC_INTR_SW_INT_4 REG_BIT(4) +#define GUC_INTR_SW_INT_3 REG_BIT(3) +#define GUC_INTR_SW_INT_2 REG_BIT(2) +#define GUC_INTR_SW_INT_1 REG_BIT(1) +#define GUC_INTR_SW_INT_0 REG_BIT(0) + +#define GUC_NUM_DOORBELLS 256 + +/* format of the HW-monitored doorbell cacheline */ +struct guc_doorbell_info { + u32 db_status; +#define GUC_DOORBELL_DISABLED 0 +#define GUC_DOORBELL_ENABLED 1 + + u32 cookie; + u32 reserved[14]; +} __packed; #endif -- cgit v1.2.3 From 17a6726c3d3040c0a47d7ec5bd8cc4056a379017 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 2 Jun 2023 16:52:09 -0700 Subject: drm/xe: Initialize MOCS earlier xe_mocs_init_early doesn't touch the hardware, it just sets up internal software state. There's no need to perform this step in the "forcewake held" region. Moving the init earlier will also make the uc_index values available earlier which will be important for an upcoming GuC init patch. Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20230602235210.1314028-2-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 335148f1cd39..3799e663bad3 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -315,8 +315,6 @@ static int gt_fw_domain_init(struct xe_gt *gt) /* Rerun MCR init as we now have hw engine list */ xe_gt_mcr_init(gt); - xe_mocs_init_early(gt); - err = xe_hw_engines_init_early(gt); if (err) goto err_force_wake; @@ -429,6 +427,8 @@ int xe_gt_init(struct xe_gt *gt) if (err) return err; + xe_mocs_init_early(gt); + xe_gt_sysfs_init(gt); err = gt_fw_domain_init(gt); -- cgit v1.2.3 From 1fce9a6f69f57318842bd2771f761f203db6f49c Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 2 Jun 2023 16:52:10 -0700 Subject: drm/xe: Don't hardcode GuC's MOCS index in register header Although PVC is currently the only platform that needs us to program a GuC register with the index of an uncached MOCS entry, it's likely other platforms will need this in the future. Rather than hardcoding PVC's index into the register header, we should just pull the appropriate index from gt->mocs.uc_index to future-proof the code. Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20230602235210.1314028-3-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_guc_regs.h | 5 +---- drivers/gpu/drm/xe/xe_guc.c | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_guc_regs.h b/drivers/gpu/drm/xe/regs/xe_guc_regs.h index ea8118f16722..fcb747201bc1 100644 --- a/drivers/gpu/drm/xe/regs/xe_guc_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_guc_regs.h @@ -45,10 +45,7 @@ #define GUC_WOPCM_SIZE_LOCKED REG_BIT(0) #define GUC_SHIM_CONTROL XE_REG(0xc064) -#define PVC_GUC_MOCS_INDEX_MASK REG_GENMASK(25, 24) -#define PVC_GUC_MOCS_UC_INDEX 1 -#define PVC_GUC_MOCS_INDEX(index) REG_FIELD_PREP(PVC_GUC_MOCS_INDEX_MASK, \ - index) +#define GUC_MOCS_INDEX_MASK REG_GENMASK(25, 24) #define GUC_SHIM_WC_ENABLE REG_BIT(21) #define GUC_ENABLE_MIA_CLOCK_GATING REG_BIT(15) #define GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA REG_BIT(10) diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 04a57af85d9e..e51d8fb4a354 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -328,7 +328,7 @@ static void guc_prepare_xfer(struct xe_guc *guc) GUC_ENABLE_MIA_CACHING; if (xe->info.platform == XE_PVC) - shim_flags |= PVC_GUC_MOCS_INDEX(PVC_GUC_MOCS_UC_INDEX); + shim_flags |= REG_FIELD_PREP(GUC_MOCS_INDEX_MASK, gt->mocs.uc_index); /* Must program this register before loading the ucode with DMA */ xe_mmio_write32(gt, GUC_SHIM_CONTROL, shim_flags); -- cgit v1.2.3 From 433002ca3670769270a2f8f3a5073e9f370b0562 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Mon, 5 Jun 2023 12:08:56 -0400 Subject: drm/xe: REBAR resize should be best effort The resizing of the PCI BAR is a best effort feature. If it is not available, it should not fail the driver probe. Rework the resize to not exit on failure. Fixes: 7f075300a318 ("drm/xe: Simplify rebar sizing") Acked-by: Lucas De Marchi Reviewed-by: Matthew Auld Signed-off-by: Michael J. Ruhl Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_mmio.c | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 475b14fe4356..f7a7f996b37f 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -47,7 +47,7 @@ mask_err: return err; } -static int +static void _resize_bar(struct xe_device *xe, int resno, resource_size_t size) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); @@ -61,18 +61,17 @@ _resize_bar(struct xe_device *xe, int resno, resource_size_t size) if (ret) { drm_info(&xe->drm, "Failed to resize BAR%d to %dM (%pe). Consider enabling 'Resizable BAR' support in your BIOS\n", resno, 1 << bar_size, ERR_PTR(ret)); - return ret; + return; } drm_info(&xe->drm, "BAR%d resized to %dM\n", resno, 1 << bar_size); - return ret; } /* * if force_vram_bar_size is set, attempt to set to the requested size * else set to maximum possible size */ -static int xe_resize_vram_bar(struct xe_device *xe) +static void xe_resize_vram_bar(struct xe_device *xe) { u64 force_vram_bar_size = xe_force_vram_bar_size; struct pci_dev *pdev = to_pci_dev(xe->drm.dev); @@ -83,14 +82,13 @@ static int xe_resize_vram_bar(struct xe_device *xe) u32 bar_size_mask; u32 pci_cmd; int i; - int ret; /* gather some relevant info */ current_size = pci_resource_len(pdev, GEN12_LMEM_BAR); bar_size_mask = pci_rebar_get_possible_sizes(pdev, GEN12_LMEM_BAR); if (!bar_size_mask) - return 0; + return; /* set to a specific size? */ if (force_vram_bar_size) { @@ -104,22 +102,22 @@ static int xe_resize_vram_bar(struct xe_device *xe) drm_info(&xe->drm, "Requested size: %lluMiB is not supported by rebar sizes: 0x%x. Leaving default: %lluMiB\n", (u64)rebar_size >> 20, bar_size_mask, (u64)current_size >> 20); - return 0; + return; } rebar_size = 1ULL << (__fls(bar_size_bit) + BAR_SIZE_SHIFT); if (rebar_size == current_size) - return 0; + return; } else { rebar_size = 1ULL << (__fls(bar_size_mask) + BAR_SIZE_SHIFT); /* only resize if larger than current */ if (rebar_size <= current_size) - return 0; + return; } - drm_info(&xe->drm, "Resizing bar from %lluMiB -> %lluMiB\n", + drm_info(&xe->drm, "Attempting to resize bar from %lluMiB -> %lluMiB\n", (u64)current_size >> 20, (u64)rebar_size >> 20); while (root->parent) @@ -133,17 +131,16 @@ static int xe_resize_vram_bar(struct xe_device *xe) if (!root_res) { drm_info(&xe->drm, "Can't resize VRAM BAR - platform support is missing. Consider enabling 'Resizable BAR' support in your BIOS\n"); - return -1; + return; } pci_read_config_dword(pdev, PCI_COMMAND, &pci_cmd); pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd & ~PCI_COMMAND_MEMORY); - ret = _resize_bar(xe, GEN12_LMEM_BAR, rebar_size); + _resize_bar(xe, GEN12_LMEM_BAR, rebar_size); pci_assign_unassigned_bus_resources(pdev->bus); pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd); - return ret; } static bool xe_pci_resource_valid(struct pci_dev *pdev, int bar) @@ -163,16 +160,13 @@ static bool xe_pci_resource_valid(struct pci_dev *pdev, int bar) static int xe_determine_lmem_bar_size(struct xe_device *xe) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); - int err; if (!xe_pci_resource_valid(pdev, GEN12_LMEM_BAR)) { drm_err(&xe->drm, "pci resource is not valid\n"); return -ENXIO; } - err = xe_resize_vram_bar(xe); - if (err) - return err; + xe_resize_vram_bar(xe); xe->mem.vram.io_start = pci_resource_start(pdev, GEN12_LMEM_BAR); xe->mem.vram.io_size = pci_resource_len(pdev, GEN12_LMEM_BAR); -- cgit v1.2.3 From 882b5d00f96a3a02874da2ffee24508df6d6b860 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 2 Jun 2023 16:10:54 -0700 Subject: drm/xe/wa: Extend scope of Wa_14015795083 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wa_14015795083 was already implemented for DG2 and PVC, but the workaround database has been updated to extend it to more platforms. It should now apply to all platforms with graphics versions 12.00 - 12.60, as well as A-step of Xe_LPG (12.70 / 12.71). Reviewed-by: José Roberto de Souza Link: https://lore.kernel.org/r/20230602231054.1306865-1-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_wa.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index d9906f326d38..e5b3ff669465 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -107,6 +107,10 @@ static const struct xe_rtp_entry_sr gt_was[] = { XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)), XE_RTP_ACTIONS(SET(DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE)) }, + { XE_RTP_NAME("14015795083"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1260)), + XE_RTP_ACTIONS(CLR(MISCCPCTL, DOP_CLOCK_GATE_RENDER_ENABLE)) + }, /* DG1 */ @@ -195,10 +199,6 @@ static const struct xe_rtp_entry_sr gt_was[] = { XE_RTP_RULES(PLATFORM(DG2)), XE_RTP_ACTIONS(CLR(SARB_CHICKEN1, COMP_CKN_IN)) }, - { XE_RTP_NAME("14015795083"), - XE_RTP_RULES(PLATFORM(DG2)), - XE_RTP_ACTIONS(CLR(MISCCPCTL, DOP_CLOCK_GATE_RENDER_ENABLE)) - }, { XE_RTP_NAME("18018781329"), XE_RTP_RULES(PLATFORM(DG2)), XE_RTP_ACTIONS(SET(RENDER_MOD_CTRL, FORCE_MISS_FTLB), @@ -219,10 +219,6 @@ static const struct xe_rtp_entry_sr gt_was[] = { /* PVC */ - { XE_RTP_NAME("14015795083"), - XE_RTP_RULES(PLATFORM(PVC)), - XE_RTP_ACTIONS(CLR(MISCCPCTL, DOP_CLOCK_GATE_RENDER_ENABLE)) - }, { XE_RTP_NAME("18018781329"), XE_RTP_RULES(PLATFORM(PVC)), XE_RTP_ACTIONS(SET(RENDER_MOD_CTRL, FORCE_MISS_FTLB), @@ -234,6 +230,13 @@ static const struct xe_rtp_entry_sr gt_was[] = { XE_RTP_RULES(PLATFORM(PVC)), XE_RTP_ACTIONS(SET(XEHPC_LNCFMISCCFGREG0, XEHPC_OVRLSCCC)) }, + + /* Xe_LPG */ + { XE_RTP_NAME("14015795083"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271), GRAPHICS_STEP(A0, B0)), + XE_RTP_ACTIONS(CLR(MISCCPCTL, DOP_CLOCK_GATE_RENDER_ENABLE)) + }, + {} }; -- cgit v1.2.3 From f1a5a9bf14182ae659cb3b5331021662c1ee1d9a Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 17 Jan 2023 18:34:34 -0800 Subject: drm/xe/guc: Read HXG fields from DW1 of G2H response MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The HXG fields are DW1 not DW0, fix this. Reviewed-by: Rodrigo Vivi Acked-by: Thomas Hellström Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_ct.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 137c184df487..615cc4d4ad69 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -790,13 +790,13 @@ static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len) if (type == GUC_HXG_TYPE_RESPONSE_FAILURE) { g2h_fence->fail = true; g2h_fence->error = - FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, msg[0]); + FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, msg[1]); g2h_fence->hint = - FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, msg[0]); + FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, msg[1]); } else if (type == GUC_HXG_TYPE_NO_RESPONSE_RETRY) { g2h_fence->retry = true; g2h_fence->reason = - FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, msg[0]); + FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, msg[1]); } else if (g2h_fence->response_buffer) { g2h_fence->response_len = response_len; memcpy(g2h_fence->response_buffer, msg + GUC_CTB_MSG_MIN_LEN, -- cgit v1.2.3 From 1011812c642c664b254986fb34264c2ee8d2bb50 Mon Sep 17 00:00:00 2001 From: Gustavo Sousa Date: Fri, 9 Jun 2023 11:38:14 -0300 Subject: drm/xe/reg_sr: Use a single parameter for xe_reg_sr_apply_whitelist() All other parameters can be extracted from a single struct xe_hw_engine reference. This removes redundancy and simplifies the code. Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230609143815.302540-2-gustavo.sousa@intel.com Signed-off-by: Gustavo Sousa Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt.c | 3 +-- drivers/gpu/drm/xe/xe_hw_engine.c | 2 +- drivers/gpu/drm/xe/xe_reg_sr.c | 7 +++++-- drivers/gpu/drm/xe/xe_reg_sr.h | 4 ++-- 4 files changed, 9 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 3799e663bad3..2458397ce8af 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -491,8 +491,7 @@ static int do_gt_restart(struct xe_gt *gt) for_each_hw_engine(hwe, gt, id) { xe_reg_sr_apply_mmio(&hwe->reg_sr, gt); - xe_reg_sr_apply_whitelist(&hwe->reg_whitelist, - hwe->mmio_base, gt); + xe_reg_sr_apply_whitelist(hwe); } return 0; diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index b42a0cb50159..68cd793cdfb5 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -381,7 +381,7 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe, XE_BUG_ON(!(gt->info.engine_mask & BIT(id))); xe_reg_sr_apply_mmio(&hwe->reg_sr, gt); - xe_reg_sr_apply_whitelist(&hwe->reg_whitelist, hwe->mmio_base, gt); + xe_reg_sr_apply_whitelist(hwe); hwe->hwsp = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K, ttm_bo_type_kernel, XE_BO_CREATE_VRAM_IF_DGFX(tile) | diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c index 8580ff38b82c..65e6ad1906c6 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr.c +++ b/drivers/gpu/drm/xe/xe_reg_sr.c @@ -20,6 +20,7 @@ #include "xe_gt.h" #include "xe_gt_mcr.h" #include "xe_gt_printk.h" +#include "xe_hw_engine_types.h" #include "xe_macros.h" #include "xe_mmio.h" #include "xe_reg_whitelist.h" @@ -211,12 +212,14 @@ err_force_wake: drm_err(&xe->drm, "Failed to apply, err=%d\n", err); } -void xe_reg_sr_apply_whitelist(struct xe_reg_sr *sr, u32 mmio_base, - struct xe_gt *gt) +void xe_reg_sr_apply_whitelist(struct xe_hw_engine *hwe) { + struct xe_reg_sr *sr = &hwe->reg_whitelist; + struct xe_gt *gt = hwe->gt; struct xe_device *xe = gt_to_xe(gt); struct xe_reg_sr_entry *entry; struct drm_printer p; + u32 mmio_base = hwe->mmio_base; unsigned long reg; unsigned int slot = 0; int err; diff --git a/drivers/gpu/drm/xe/xe_reg_sr.h b/drivers/gpu/drm/xe/xe_reg_sr.h index c3001798d9e8..e3197c33afe2 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr.h +++ b/drivers/gpu/drm/xe/xe_reg_sr.h @@ -14,6 +14,7 @@ struct xe_device; struct xe_gt; +struct xe_hw_engine; struct drm_printer; int xe_reg_sr_init(struct xe_reg_sr *sr, const char *name, struct xe_device *xe); @@ -22,7 +23,6 @@ void xe_reg_sr_dump(struct xe_reg_sr *sr, struct drm_printer *p); int xe_reg_sr_add(struct xe_reg_sr *sr, const struct xe_reg_sr_entry *e, struct xe_gt *gt); void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt); -void xe_reg_sr_apply_whitelist(struct xe_reg_sr *sr, u32 mmio_base, - struct xe_gt *gt); +void xe_reg_sr_apply_whitelist(struct xe_hw_engine *hwe); #endif -- cgit v1.2.3 From 5eeb8b443875f2a6f751ed2c77cc410fad6b2e61 Mon Sep 17 00:00:00 2001 From: Gustavo Sousa Date: Fri, 9 Jun 2023 11:38:15 -0300 Subject: drm/xe/reg_sr: Apply limit to register whitelisting If RING_MAX_NONPRIV_SLOTS denotes the maximum number of whitelisting slots, then it makes sense to refuse going above it. v2: - Use xe_gt_err() instead of drm_err() for more detailed info in the error message. (Matt) Cc: Matt Roper Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230609143815.302540-3-gustavo.sousa@intel.com Signed-off-by: Gustavo Sousa Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_reg_sr.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c index 65e6ad1906c6..7c88352636d2 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr.c +++ b/drivers/gpu/drm/xe/xe_reg_sr.c @@ -235,6 +235,13 @@ void xe_reg_sr_apply_whitelist(struct xe_hw_engine *hwe) p = drm_debug_printer(KBUILD_MODNAME); xa_for_each(&sr->xa, reg, entry) { + if (slot == RING_MAX_NONPRIV_SLOTS) { + xe_gt_err(gt, + "hwe %s: maximum register whitelist slots (%d) reached, refusing to add more\n", + hwe->name, RING_MAX_NONPRIV_SLOTS); + break; + } + xe_reg_whitelist_print_entry(&p, 0, reg, entry); xe_mmio_write32(gt, RING_FORCE_TO_NONPRIV(mmio_base, slot), reg | entry->set_bits); -- cgit v1.2.3 From 85dbfe47d07cddeac959ccc9352c4b0f1683225b Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Mon, 5 Jun 2023 15:04:54 +0200 Subject: drm/xe: Invalidate TLB also on bind if in scratch page mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For scratch table mode we need to cover the case where a scratch PTE might have been pre-fetched and cached and used instead of that of the newly bound vma. For compute vms, invalidate TLB globally using GuC before signalling bind complete. For !long-running vms, invalidate TLB at batch start. Also document how TLB invalidation works. v2: - Fix a pointer to the comment about TLB invalidation (Jose Souza). - Add a bool to the vm whether we want to invalidate TLB at batch start. - Invalidate TLB also on BCS- and video engines at batch start where needed. - Use BIT() macro instead of explicit shift. Signed-off-by: Thomas Hellström Tested-by: José Roberto de Souza #v1 Reported-by: José Roberto de Souza #v1 Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/291 Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/291 Acked-by: José Roberto de Souza Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gpu_commands.h | 1 + drivers/gpu/drm/xe/xe_pt.c | 17 +++++++++-- drivers/gpu/drm/xe/xe_ring_ops.c | 47 +++++++++++++++++++++++-------- drivers/gpu/drm/xe/xe_vm.c | 2 ++ drivers/gpu/drm/xe/xe_vm_types.h | 3 ++ 5 files changed, 57 insertions(+), 13 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h index 0f9c5b0b8a3b..1a744c508174 100644 --- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h +++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h @@ -73,6 +73,7 @@ #define PIPE_CONTROL_STORE_DATA_INDEX (1<<21) #define PIPE_CONTROL_CS_STALL (1<<20) #define PIPE_CONTROL_GLOBAL_SNAPSHOT_RESET (1<<19) +#define PIPE_CONTROL_TLB_INVALIDATE BIT(18) #define PIPE_CONTROL_PSD_SYNC (1<<17) #define PIPE_CONTROL_QW_WRITE (1<<14) #define PIPE_CONTROL_DEPTH_STALL (1<<13) diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index bef265715000..2c472fafc811 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1297,7 +1297,20 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e, xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries); - if (rebind && !xe_vm_no_dma_fences(vma->vm)) { + /* + * If rebind, we have to invalidate TLB on !LR vms to invalidate + * cached PTEs point to freed memory. on LR vms this is done + * automatically when the context is re-enabled by the rebind worker, + * or in fault mode it was invalidated on PTE zapping. + * + * If !rebind, and scratch enabled VMs, there is a chance the scratch + * PTE is already cached in the TLB so it needs to be invalidated. + * on !LR VMs this is done in the ring ops preceding a batch, but on + * non-faulting LR, in particular on user-space batch buffer chaining, + * it needs to be done here. + */ + if ((rebind && !xe_vm_no_dma_fences(vm) && !vm->batch_invalidate_tlb) || + (!rebind && vm->scratch_bo[tile->id] && xe_vm_in_compute_mode(vm))) { ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); if (!ifence) return ERR_PTR(-ENOMEM); @@ -1313,7 +1326,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e, LLIST_HEAD(deferred); /* TLB invalidation must be done before signaling rebind */ - if (rebind && !xe_vm_no_dma_fences(vma->vm)) { + if (ifence) { int err = invalidation_fence_init(tile->primary_gt, ifence, fence, vma); if (err) { diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index 45117a2ab1a0..215606b5fae0 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -15,6 +15,7 @@ #include "xe_macros.h" #include "xe_sched_job.h" #include "xe_vm_types.h" +#include "xe_vm.h" /* * 3D-related flags that can't be set on _engines_ that lack access to the 3D @@ -74,9 +75,11 @@ static int emit_store_imm_ggtt(u32 addr, u32 value, u32 *dw, int i) return i; } -static int emit_flush_imm_ggtt(u32 addr, u32 value, u32 *dw, int i) +static int emit_flush_imm_ggtt(u32 addr, u32 value, bool invalidate_tlb, + u32 *dw, int i) { - dw[i++] = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; + dw[i++] = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW | + (invalidate_tlb ? MI_INVALIDATE_TLB : 0); dw[i++] = addr | MI_FLUSH_DW_USE_GTT; dw[i++] = 0; dw[i++] = value; @@ -107,7 +110,8 @@ static int emit_flush_invalidate(u32 flag, u32 *dw, int i) return i; } -static int emit_pipe_invalidate(u32 mask_flags, u32 *dw, int i) +static int emit_pipe_invalidate(u32 mask_flags, bool invalidate_tlb, u32 *dw, + int i) { u32 flags = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_COMMAND_CACHE_INVALIDATE | @@ -119,6 +123,9 @@ static int emit_pipe_invalidate(u32 mask_flags, u32 *dw, int i) PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_STORE_DATA_INDEX; + if (invalidate_tlb) + flags |= PIPE_CONTROL_TLB_INVALIDATE; + flags &= ~mask_flags; dw[i++] = GFX_OP_PIPE_CONTROL(6); @@ -170,9 +177,17 @@ static void __emit_job_gen12_copy(struct xe_sched_job *job, struct xe_lrc *lrc, { u32 dw[MAX_JOB_SIZE_DW], i = 0; u32 ppgtt_flag = get_ppgtt_flag(job); - - i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), - seqno, dw, i); + struct xe_vm *vm = job->engine->vm; + + if (vm->batch_invalidate_tlb) { + dw[i++] = preparser_disable(true); + i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), + seqno, true, dw, i); + dw[i++] = preparser_disable(false); + } else { + i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), + seqno, dw, i); + } i = emit_bb_start(batch_addr, ppgtt_flag, dw, i); @@ -181,7 +196,7 @@ static void __emit_job_gen12_copy(struct xe_sched_job *job, struct xe_lrc *lrc, job->user_fence.value, dw, i); - i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, dw, i); + i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i); i = emit_user_interrupt(dw, i); @@ -210,6 +225,7 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, struct xe_gt *gt = job->engine->gt; struct xe_device *xe = gt_to_xe(gt); bool decode = job->engine->class == XE_ENGINE_CLASS_VIDEO_DECODE; + struct xe_vm *vm = job->engine->vm; dw[i++] = preparser_disable(true); @@ -220,10 +236,16 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, else i = emit_aux_table_inv(gt, VE0_AUX_INV, dw, i); } + + if (vm->batch_invalidate_tlb) + i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), + seqno, true, dw, i); + dw[i++] = preparser_disable(false); - i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), - seqno, dw, i); + if (!vm->batch_invalidate_tlb) + i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), + seqno, dw, i); i = emit_bb_start(batch_addr, ppgtt_flag, dw, i); @@ -232,7 +254,7 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, job->user_fence.value, dw, i); - i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, dw, i); + i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i); i = emit_user_interrupt(dw, i); @@ -250,6 +272,7 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, struct xe_gt *gt = job->engine->gt; struct xe_device *xe = gt_to_xe(gt); bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK); + struct xe_vm *vm = job->engine->vm; u32 mask_flags = 0; dw[i++] = preparser_disable(true); @@ -257,7 +280,9 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, mask_flags = PIPE_CONTROL_3D_ARCH_FLAGS; else if (job->engine->class == XE_ENGINE_CLASS_COMPUTE) mask_flags = PIPE_CONTROL_3D_ENGINE_FLAGS; - i = emit_pipe_invalidate(mask_flags, dw, i); + + /* See __xe_pt_bind_vma() for a discussion on TLB invalidations. */ + i = emit_pipe_invalidate(mask_flags, vm->batch_invalidate_tlb, dw, i); /* hsdes: 1809175790 */ if (has_aux_ccs(xe)) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 17b7d543ae49..ea205244fcf9 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1237,11 +1237,13 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) if (err) goto err_scratch_pt; } + vm->batch_invalidate_tlb = true; } if (flags & DRM_XE_VM_CREATE_COMPUTE_MODE) { INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); vm->flags |= XE_VM_FLAG_COMPUTE_MODE; + vm->batch_invalidate_tlb = false; } if (flags & DRM_XE_VM_CREATE_ASYNC_BIND_OPS) { diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 76af6ac0fa84..5242236b4b0e 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -337,6 +337,9 @@ struct xe_vm { /** @capture_once: capture only one error per VM */ bool capture_once; } error_capture; + + /** @batch_invalidate_tlb: Always invalidate TLB before batch start */ + bool batch_invalidate_tlb; }; #endif -- cgit v1.2.3 From 9f8f93bee3efdba3bf7853befe2219e3a300c305 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Fri, 2 Jun 2023 14:44:23 +0200 Subject: drm/xe: Emit a render cache flush after each rcs/ccs batch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to flush render caches before fence signalling, where we might release the memory for reuse. We can't rely on userspace doing this, so flush render caches after the batch, but before user fence- and dma_fence signalling. Copy the cache flush from i915, but omit PIPE_CONTROL_FLUSH_L3, since it should be implied by the other flushes. Also omit PIPE_CONTROL_TLB_INVALIDATE since there should be no apparent need to invalidate TLB after batch completion. v2: - Update Makefile for OOB WA. Signed-off-by: Thomas Hellström Tested-by: José Roberto de Souza Reviewed-by: José Roberto de Souza #1 Reported-by: José Roberto de Souza Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/291 Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/291 Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Makefile | 2 +- drivers/gpu/drm/xe/regs/xe_gpu_commands.h | 3 +++ drivers/gpu/drm/xe/xe_ring_ops.c | 35 +++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_wa_oob.rules | 1 + 4 files changed, 40 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index c914d02d8a8c..73100c246a74 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -40,7 +40,7 @@ quiet_cmd_wa_oob = GEN $(notdir $(generated_oob)) $(generated_oob) &: $(obj)/xe_gen_wa_oob $(srctree)/$(src)/xe_wa_oob.rules $(call cmd,wa_oob) -$(obj)/xe_guc.o $(obj)/xe_wa.o: $(generated_oob) +$(obj)/xe_guc.o $(obj)/xe_wa.o $(obj)/xe_ring_ops.o: $(generated_oob) # Please keep these build lists sorted! diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h index 1a744c508174..12120dd37aa2 100644 --- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h +++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h @@ -66,6 +66,9 @@ #define PVC_MS_MOCS_INDEX_MASK GENMASK(6, 1) #define GFX_OP_PIPE_CONTROL(len) ((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2)) + +#define PIPE_CONTROL0_HDC_PIPELINE_FLUSH BIT(9) /* gen12 */ + #define PIPE_CONTROL_COMMAND_CACHE_INVALIDATE (1<<29) #define PIPE_CONTROL_TILE_CACHE_FLUSH (1<<28) #define PIPE_CONTROL_AMFS_FLUSH (1<<25) diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index 215606b5fae0..4cfd78e1ffa5 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -5,6 +5,7 @@ #include "xe_ring_ops.h" +#include "generated/xe_wa_oob.h" #include "regs/xe_gpu_commands.h" #include "regs/xe_gt_regs.h" #include "regs/xe_lrc_layout.h" @@ -16,6 +17,7 @@ #include "xe_sched_job.h" #include "xe_vm_types.h" #include "xe_vm.h" +#include "xe_wa.h" /* * 3D-related flags that can't be set on _engines_ that lack access to the 3D @@ -152,6 +154,37 @@ static int emit_store_imm_ppgtt_posted(u64 addr, u64 value, return i; } +static int emit_render_cache_flush(struct xe_sched_job *job, u32 *dw, int i) +{ + struct xe_gt *gt = job->engine->gt; + bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK); + u32 flags; + + flags = (PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_TILE_CACHE_FLUSH | + PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_DC_FLUSH_ENABLE | + PIPE_CONTROL_FLUSH_ENABLE); + + if (XE_WA(gt, 1409600907)) + flags |= PIPE_CONTROL_DEPTH_STALL; + + if (lacks_render) + flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS; + else if (job->engine->class == XE_ENGINE_CLASS_COMPUTE) + flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS; + + dw[i++] = GFX_OP_PIPE_CONTROL(6) | PIPE_CONTROL0_HDC_PIPELINE_FLUSH; + dw[i++] = flags; + dw[i++] = 0; + dw[i++] = 0; + dw[i++] = 0; + dw[i++] = 0; + + return i; +} + static int emit_pipe_imm_ggtt(u32 addr, u32 value, bool stall_only, u32 *dw, int i) { @@ -295,6 +328,8 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, i = emit_bb_start(batch_addr, ppgtt_flag, dw, i); + i = emit_render_cache_flush(job, dw, i); + if (job->user_fence.used) i = emit_store_imm_ppgtt_posted(job->user_fence.addr, job->user_fence.value, diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 1ecb10390b28..15c23813398a 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -14,3 +14,4 @@ SUBPLATFORM(DG2, G12) 18020744125 PLATFORM(PVC) 1509372804 PLATFORM(PVC), GRAPHICS_STEP(A0, C0) +1409600907 GRAPHICS_VERSION_RANGE(1200, 1250) -- cgit v1.2.3 From 790bdc7cb2e7dafbac0aafc016dcb7493c925bac Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 9 Jun 2023 11:09:37 -0700 Subject: drm/xe: Handle unmapped userptr in analyze VM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A corner exists where a userptr may have no mapping when analyze VM is called, handle this case. Reviewed-by: José Roberto de Souza Reviewed-by: Thomas Hellström Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index ea205244fcf9..fa4778bfd063 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3455,9 +3455,13 @@ int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id) if (is_userptr) { struct xe_res_cursor cur; - xe_res_first_sg(vma->userptr.sg, 0, XE_PAGE_SIZE, - &cur); - addr = xe_res_dma(&cur); + if (vma->userptr.sg) { + xe_res_first_sg(vma->userptr.sg, 0, XE_PAGE_SIZE, + &cur); + addr = xe_res_dma(&cur); + } else { + addr = 0; + } } else { addr = __xe_bo_addr(vma->bo, 0, XE_PAGE_SIZE, &is_vram); } -- cgit v1.2.3 From 5e3220de6c72349f77977c62a991748d4e0fea26 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 9 Jun 2023 11:19:30 -0700 Subject: drm/xe: Use Xe ordered workqueue for rebind worker MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A mix of the system unbound wq and Xe ordered wq was used for the rebind, only use the Xe ordered wq. This will ensure only 1 rebind is occuring at a time providing a somewhat clunky work around for short comings in TTM wrt to memory contention. Once the TTM memory contention is resolved we should be able to use a dedicated non-ordered workqueue. Also add helper to queue rebind worker to avoid using wrong workqueue going forward. Reviewed-by: José Roberto de Souza Reviewed-by: Thomas Hellström Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_preempt_fence.c | 2 +- drivers/gpu/drm/xe/xe_vm.c | 3 +-- drivers/gpu/drm/xe/xe_vm.h | 6 ++++++ 3 files changed, 8 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.c b/drivers/gpu/drm/xe/xe_preempt_fence.c index 78ad8c209873..219eefeb90ff 100644 --- a/drivers/gpu/drm/xe/xe_preempt_fence.c +++ b/drivers/gpu/drm/xe/xe_preempt_fence.c @@ -25,7 +25,7 @@ static void preempt_fence_work_func(struct work_struct *w) dma_fence_signal(&pfence->base); dma_fence_end_signalling(cookie); - queue_work(system_unbound_wq, &e->vm->preempt.rebind_work); + xe_vm_queue_rebind_worker(e->vm); xe_engine_put(e); } diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index fa4778bfd063..be629783050f 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3086,8 +3086,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) /* Rebinds may have been blocked, give worker a kick */ if (xe_vm_in_compute_mode(vm)) - queue_work(vm->xe->ordered_wq, - &vm->preempt.rebind_work); + xe_vm_queue_rebind_worker(vm); } goto put_engine; diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 372f26153209..bb2996856841 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -124,6 +124,12 @@ int xe_vma_userptr_pin_pages(struct xe_vma *vma); int xe_vma_userptr_check_repin(struct xe_vma *vma); +static inline void xe_vm_queue_rebind_worker(struct xe_vm *vm) +{ + XE_WARN_ON(!xe_vm_in_compute_mode(vm)); + queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work); +} + /* * XE_ONSTACK_TV is used to size the tv_onstack array that is input * to xe_vm_lock_dma_resv() and xe_vm_unlock_dma_resv(). -- cgit v1.2.3 From 3534b18c360525b4cff67b90db45d7b9e365bdf2 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 7 Jun 2023 11:43:52 -0700 Subject: drm/xe: s/XE_PTE_READ_ONLY/XE_PTE_FLAG_READ_ONLY This define is for internal PTE flags rather than fields in the hardware PTEs, rename as such. This will help in an upcoming patch to avoid further confusion. Reviewed-by: Francois Dugast Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.h | 2 +- drivers/gpu/drm/xe/xe_pt.c | 2 +- drivers/gpu/drm/xe/xe_vm.c | 12 +++++++----- 3 files changed, 9 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 29eb7474f018..552fe073e9c5 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -65,7 +65,7 @@ #define XE_PAGE_PRESENT BIT_ULL(0) #define XE_PAGE_RW BIT_ULL(1) -#define XE_PTE_READ_ONLY BIT(0) +#define XE_PTE_FLAG_READ_ONLY BIT(0) #define XE_PL_SYSTEM TTM_PL_SYSTEM #define XE_PL_TT TTM_PL_TT diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 2c472fafc811..1ba93c2861ab 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -102,7 +102,7 @@ static u64 __gen8_pte_encode(u64 pte, enum xe_cache_level cache, u32 flags, { pte |= XE_PAGE_PRESENT | XE_PAGE_RW; - if (unlikely(flags & XE_PTE_READ_ONLY)) + if (unlikely(flags & XE_PTE_FLAG_READ_ONLY)) pte &= ~XE_PAGE_RW; /* FIXME: I don't think the PPAT handling is correct for MTL */ diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index be629783050f..51daa5fd7821 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -61,7 +61,7 @@ int xe_vma_userptr_pin_pages(struct xe_vma *vma) bool in_kthread = !current->mm; unsigned long notifier_seq; int pinned, ret, i; - bool read_only = vma->pte_flags & XE_PTE_READ_ONLY; + bool read_only = vma->pte_flags & XE_PTE_FLAG_READ_ONLY; lockdep_assert_held(&vm->lock); XE_BUG_ON(!xe_vma_is_userptr(vma)); @@ -869,7 +869,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, vma->start = start; vma->end = end; if (read_only) - vma->pte_flags = XE_PTE_READ_ONLY; + vma->pte_flags = XE_PTE_FLAG_READ_ONLY; if (tile_mask) { vma->tile_mask = tile_mask; @@ -923,7 +923,7 @@ static void xe_vma_destroy_late(struct xe_vma *vma) { struct xe_vm *vm = vma->vm; struct xe_device *xe = vm->xe; - bool read_only = vma->pte_flags & XE_PTE_READ_ONLY; + bool read_only = vma->pte_flags & XE_PTE_FLAG_READ_ONLY; if (xe_vma_is_userptr(vma)) { if (vma->userptr.sg) { @@ -2643,7 +2643,8 @@ static struct xe_vma *vm_unbind_lookup_vmas(struct xe_vm *vm, first->userptr.ptr, first->start, lookup->start - 1, - (first->pte_flags & XE_PTE_READ_ONLY), + (first->pte_flags & + XE_PTE_FLAG_READ_ONLY), first->tile_mask); if (first->bo) xe_bo_unlock(first->bo, &ww); @@ -2674,7 +2675,8 @@ static struct xe_vma *vm_unbind_lookup_vmas(struct xe_vm *vm, last->userptr.ptr + chunk, last->start + chunk, last->end, - (last->pte_flags & XE_PTE_READ_ONLY), + (last->pte_flags & + XE_PTE_FLAG_READ_ONLY), last->tile_mask); if (last->bo) xe_bo_unlock(last->bo, &ww); -- cgit v1.2.3 From 11f9eb899ecc8c02b769cf8d2532ba12786a7af7 Mon Sep 17 00:00:00 2001 From: Jocelyn Falempe Date: Thu, 14 Dec 2023 17:38:06 +0100 Subject: drm/mgag200: Fix gamma lut not initialized for G200ER, G200EV, G200SE When mgag200 switched from simple KMS to regular atomic helpers, the initialization of the gamma settings was lost. This leads to a black screen, if the bios/uefi doesn't use the same pixel color depth. This has been fixed with commit ad81e23426a6 ("drm/mgag200: Fix gamma lut not initialized.") for most G200, but G200ER, G200EV, G200SE use their own version of crtc_helper_atomic_enable() and need to be fixed too. Fixes: 1baf9127c482 ("drm/mgag200: Replace simple-KMS with regular atomic helpers") Cc: #v6.1+ Reported-by: Roger Sewell Suggested-by: Roger Sewell Signed-off-by: Jocelyn Falempe Reviewed-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20231214163849.359691-1-jfalempe@redhat.com --- drivers/gpu/drm/mgag200/mgag200_drv.h | 5 +++++ drivers/gpu/drm/mgag200/mgag200_g200er.c | 5 +++++ drivers/gpu/drm/mgag200/mgag200_g200ev.c | 5 +++++ drivers/gpu/drm/mgag200/mgag200_g200se.c | 5 +++++ drivers/gpu/drm/mgag200/mgag200_mode.c | 10 +++++----- 5 files changed, 25 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/mgag200/mgag200_drv.h b/drivers/gpu/drm/mgag200/mgag200_drv.h index 57c7edcab602..765e49fd8911 100644 --- a/drivers/gpu/drm/mgag200/mgag200_drv.h +++ b/drivers/gpu/drm/mgag200/mgag200_drv.h @@ -392,6 +392,11 @@ void mgag200_primary_plane_helper_atomic_disable(struct drm_plane *plane, .destroy = drm_plane_cleanup, \ DRM_GEM_SHADOW_PLANE_FUNCS +void mgag200_crtc_set_gamma_linear(struct mga_device *mdev, const struct drm_format_info *format); +void mgag200_crtc_set_gamma(struct mga_device *mdev, + const struct drm_format_info *format, + struct drm_color_lut *lut); + enum drm_mode_status mgag200_crtc_helper_mode_valid(struct drm_crtc *crtc, const struct drm_display_mode *mode); int mgag200_crtc_helper_atomic_check(struct drm_crtc *crtc, struct drm_atomic_state *new_state); diff --git a/drivers/gpu/drm/mgag200/mgag200_g200er.c b/drivers/gpu/drm/mgag200/mgag200_g200er.c index bce267e0f7de..8d4538b71047 100644 --- a/drivers/gpu/drm/mgag200/mgag200_g200er.c +++ b/drivers/gpu/drm/mgag200/mgag200_g200er.c @@ -202,6 +202,11 @@ static void mgag200_g200er_crtc_helper_atomic_enable(struct drm_crtc *crtc, mgag200_g200er_reset_tagfifo(mdev); + if (crtc_state->gamma_lut) + mgag200_crtc_set_gamma(mdev, format, crtc_state->gamma_lut->data); + else + mgag200_crtc_set_gamma_linear(mdev, format); + mgag200_enable_display(mdev); if (funcs->enable_vidrst) diff --git a/drivers/gpu/drm/mgag200/mgag200_g200ev.c b/drivers/gpu/drm/mgag200/mgag200_g200ev.c index ac957f42abe1..56e6f986bff3 100644 --- a/drivers/gpu/drm/mgag200/mgag200_g200ev.c +++ b/drivers/gpu/drm/mgag200/mgag200_g200ev.c @@ -203,6 +203,11 @@ static void mgag200_g200ev_crtc_helper_atomic_enable(struct drm_crtc *crtc, mgag200_g200ev_set_hiprilvl(mdev); + if (crtc_state->gamma_lut) + mgag200_crtc_set_gamma(mdev, format, crtc_state->gamma_lut->data); + else + mgag200_crtc_set_gamma_linear(mdev, format); + mgag200_enable_display(mdev); if (funcs->enable_vidrst) diff --git a/drivers/gpu/drm/mgag200/mgag200_g200se.c b/drivers/gpu/drm/mgag200/mgag200_g200se.c index bd6e573c9a1a..ff2b3c6622e7 100644 --- a/drivers/gpu/drm/mgag200/mgag200_g200se.c +++ b/drivers/gpu/drm/mgag200/mgag200_g200se.c @@ -334,6 +334,11 @@ static void mgag200_g200se_crtc_helper_atomic_enable(struct drm_crtc *crtc, mgag200_g200se_set_hiprilvl(mdev, adjusted_mode, format); + if (crtc_state->gamma_lut) + mgag200_crtc_set_gamma(mdev, format, crtc_state->gamma_lut->data); + else + mgag200_crtc_set_gamma_linear(mdev, format); + mgag200_enable_display(mdev); if (funcs->enable_vidrst) diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c index af3ce5a6a636..0f0d59938c3a 100644 --- a/drivers/gpu/drm/mgag200/mgag200_mode.c +++ b/drivers/gpu/drm/mgag200/mgag200_mode.c @@ -28,8 +28,8 @@ * This file contains setup code for the CRTC. */ -static void mgag200_crtc_set_gamma_linear(struct mga_device *mdev, - const struct drm_format_info *format) +void mgag200_crtc_set_gamma_linear(struct mga_device *mdev, + const struct drm_format_info *format) { int i; @@ -65,9 +65,9 @@ static void mgag200_crtc_set_gamma_linear(struct mga_device *mdev, } } -static void mgag200_crtc_set_gamma(struct mga_device *mdev, - const struct drm_format_info *format, - struct drm_color_lut *lut) +void mgag200_crtc_set_gamma(struct mga_device *mdev, + const struct drm_format_info *format, + struct drm_color_lut *lut) { int i; -- cgit v1.2.3 From c748a6d77c06a78651030e17da6beb278a1c9470 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Tue, 19 Dec 2023 16:30:24 +0000 Subject: pwm: Rename pwm_apply_state() to pwm_apply_might_sleep() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to introduce a pwm api which can be used from atomic context, we will need two functions for applying pwm changes: int pwm_apply_might_sleep(struct pwm *, struct pwm_state *); int pwm_apply_atomic(struct pwm *, struct pwm_state *); This commit just deals with renaming pwm_apply_state(), a following commit will introduce the pwm_apply_atomic() function. Acked-by: Uwe Kleine-König Acked-by: Guenter Roeck Acked-by: Mark Brown Acked-by: Dmitry Torokhov # for input Acked-by: Hans de Goede Acked-by: Jani Nikula Acked-by: Lee Jones Signed-off-by: Sean Young Signed-off-by: Thierry Reding --- Documentation/driver-api/pwm.rst | 8 ++++---- MAINTAINERS | 2 +- drivers/gpu/drm/i915/display/intel_backlight.c | 6 +++--- drivers/gpu/drm/solomon/ssd130x.c | 2 +- drivers/hwmon/pwm-fan.c | 8 ++++---- drivers/input/misc/da7280.c | 4 ++-- drivers/input/misc/pwm-beeper.c | 4 ++-- drivers/input/misc/pwm-vibra.c | 8 ++++---- drivers/leds/leds-pwm.c | 2 +- drivers/leds/rgb/leds-pwm-multicolor.c | 4 ++-- drivers/media/rc/pwm-ir-tx.c | 4 ++-- drivers/platform/x86/lenovo-yogabook.c | 2 +- drivers/pwm/core.c | 18 ++++++++--------- drivers/pwm/pwm-twl-led.c | 2 +- drivers/pwm/pwm-vt8500.c | 2 +- drivers/pwm/sysfs.c | 10 ++++----- drivers/regulator/pwm-regulator.c | 4 ++-- drivers/video/backlight/lm3630a_bl.c | 2 +- drivers/video/backlight/lp855x_bl.c | 2 +- drivers/video/backlight/pwm_bl.c | 12 +++++------ drivers/video/fbdev/ssd1307fb.c | 2 +- include/linux/pwm.h | 28 +++++++++++++------------- 22 files changed, 68 insertions(+), 68 deletions(-) (limited to 'drivers/gpu') diff --git a/Documentation/driver-api/pwm.rst b/Documentation/driver-api/pwm.rst index bb264490a87a..f1d8197c8c43 100644 --- a/Documentation/driver-api/pwm.rst +++ b/Documentation/driver-api/pwm.rst @@ -41,7 +41,7 @@ the getter, devm_pwm_get() and devm_fwnode_pwm_get(), also exist. After being requested, a PWM has to be configured using:: - int pwm_apply_state(struct pwm_device *pwm, struct pwm_state *state); + int pwm_apply_might_sleep(struct pwm_device *pwm, struct pwm_state *state); This API controls both the PWM period/duty_cycle config and the enable/disable state. @@ -57,13 +57,13 @@ If supported by the driver, the signal can be optimized, for example to improve EMI by phase shifting the individual channels of a chip. The pwm_config(), pwm_enable() and pwm_disable() functions are just wrappers -around pwm_apply_state() and should not be used if the user wants to change +around pwm_apply_might_sleep() and should not be used if the user wants to change several parameter at once. For example, if you see pwm_config() and pwm_{enable,disable}() calls in the same function, this probably means you -should switch to pwm_apply_state(). +should switch to pwm_apply_might_sleep(). The PWM user API also allows one to query the PWM state that was passed to the -last invocation of pwm_apply_state() using pwm_get_state(). Note this is +last invocation of pwm_apply_might_sleep() using pwm_get_state(). Note this is different to what the driver has actually implemented if the request cannot be satisfied exactly with the hardware in use. There is currently no way for consumers to get the actually implemented settings. diff --git a/MAINTAINERS b/MAINTAINERS index 97f51d5ec1cf..c58480595220 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17576,7 +17576,7 @@ F: drivers/video/backlight/pwm_bl.c F: include/dt-bindings/pwm/ F: include/linux/pwm.h F: include/linux/pwm_backlight.h -K: pwm_(config|apply_state|ops) +K: pwm_(config|apply_might_sleep|ops) PXA GPIO DRIVER M: Robert Jarzmik diff --git a/drivers/gpu/drm/i915/display/intel_backlight.c b/drivers/gpu/drm/i915/display/intel_backlight.c index 2e8f17c04522..ff9b9918b0a1 100644 --- a/drivers/gpu/drm/i915/display/intel_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_backlight.c @@ -274,7 +274,7 @@ static void ext_pwm_set_backlight(const struct drm_connector_state *conn_state, struct intel_panel *panel = &to_intel_connector(conn_state->connector)->panel; pwm_set_relative_duty_cycle(&panel->backlight.pwm_state, level, 100); - pwm_apply_state(panel->backlight.pwm, &panel->backlight.pwm_state); + pwm_apply_might_sleep(panel->backlight.pwm, &panel->backlight.pwm_state); } static void @@ -427,7 +427,7 @@ static void ext_pwm_disable_backlight(const struct drm_connector_state *old_conn intel_backlight_set_pwm_level(old_conn_state, level); panel->backlight.pwm_state.enabled = false; - pwm_apply_state(panel->backlight.pwm, &panel->backlight.pwm_state); + pwm_apply_might_sleep(panel->backlight.pwm, &panel->backlight.pwm_state); } void intel_backlight_disable(const struct drm_connector_state *old_conn_state) @@ -749,7 +749,7 @@ static void ext_pwm_enable_backlight(const struct intel_crtc_state *crtc_state, pwm_set_relative_duty_cycle(&panel->backlight.pwm_state, level, 100); panel->backlight.pwm_state.enabled = true; - pwm_apply_state(panel->backlight.pwm, &panel->backlight.pwm_state); + pwm_apply_might_sleep(panel->backlight.pwm, &panel->backlight.pwm_state); } static void __intel_backlight_enable(const struct intel_crtc_state *crtc_state, diff --git a/drivers/gpu/drm/solomon/ssd130x.c b/drivers/gpu/drm/solomon/ssd130x.c index e0174f82e353..cce043a4a1dc 100644 --- a/drivers/gpu/drm/solomon/ssd130x.c +++ b/drivers/gpu/drm/solomon/ssd130x.c @@ -319,7 +319,7 @@ static int ssd130x_pwm_enable(struct ssd130x_device *ssd130x) pwm_init_state(ssd130x->pwm, &pwmstate); pwm_set_relative_duty_cycle(&pwmstate, 50, 100); - pwm_apply_state(ssd130x->pwm, &pwmstate); + pwm_apply_might_sleep(ssd130x->pwm, &pwmstate); /* Enable the PWM */ pwm_enable(ssd130x->pwm); diff --git a/drivers/hwmon/pwm-fan.c b/drivers/hwmon/pwm-fan.c index 6e4516c2ab89..b67bc9e833c0 100644 --- a/drivers/hwmon/pwm-fan.c +++ b/drivers/hwmon/pwm-fan.c @@ -151,7 +151,7 @@ static int pwm_fan_power_on(struct pwm_fan_ctx *ctx) } state->enabled = true; - ret = pwm_apply_state(ctx->pwm, state); + ret = pwm_apply_might_sleep(ctx->pwm, state); if (ret) { dev_err(ctx->dev, "failed to enable PWM\n"); goto disable_regulator; @@ -181,7 +181,7 @@ static int pwm_fan_power_off(struct pwm_fan_ctx *ctx) state->enabled = false; state->duty_cycle = 0; - ret = pwm_apply_state(ctx->pwm, state); + ret = pwm_apply_might_sleep(ctx->pwm, state); if (ret) { dev_err(ctx->dev, "failed to disable PWM\n"); return ret; @@ -207,7 +207,7 @@ static int __set_pwm(struct pwm_fan_ctx *ctx, unsigned long pwm) period = state->period; state->duty_cycle = DIV_ROUND_UP(pwm * (period - 1), MAX_PWM); - ret = pwm_apply_state(ctx->pwm, state); + ret = pwm_apply_might_sleep(ctx->pwm, state); if (ret) return ret; ret = pwm_fan_power_on(ctx); @@ -278,7 +278,7 @@ static int pwm_fan_update_enable(struct pwm_fan_ctx *ctx, long val) state, &enable_regulator); - pwm_apply_state(ctx->pwm, state); + pwm_apply_might_sleep(ctx->pwm, state); pwm_fan_switch_power(ctx, enable_regulator); pwm_fan_update_state(ctx, 0); } diff --git a/drivers/input/misc/da7280.c b/drivers/input/misc/da7280.c index ce82548916bb..c1fa75c0f970 100644 --- a/drivers/input/misc/da7280.c +++ b/drivers/input/misc/da7280.c @@ -352,7 +352,7 @@ static int da7280_haptic_set_pwm(struct da7280_haptic *haptics, bool enabled) state.duty_cycle = period_mag_multi; } - error = pwm_apply_state(haptics->pwm_dev, &state); + error = pwm_apply_might_sleep(haptics->pwm_dev, &state); if (error) dev_err(haptics->dev, "Failed to apply pwm state: %d\n", error); @@ -1175,7 +1175,7 @@ static int da7280_probe(struct i2c_client *client) /* Sync up PWM state and ensure it is off. */ pwm_init_state(haptics->pwm_dev, &state); state.enabled = false; - error = pwm_apply_state(haptics->pwm_dev, &state); + error = pwm_apply_might_sleep(haptics->pwm_dev, &state); if (error) { dev_err(dev, "Failed to apply PWM state: %d\n", error); return error; diff --git a/drivers/input/misc/pwm-beeper.c b/drivers/input/misc/pwm-beeper.c index 1e731d8397c6..5b9aedf4362f 100644 --- a/drivers/input/misc/pwm-beeper.c +++ b/drivers/input/misc/pwm-beeper.c @@ -39,7 +39,7 @@ static int pwm_beeper_on(struct pwm_beeper *beeper, unsigned long period) state.period = period; pwm_set_relative_duty_cycle(&state, 50, 100); - error = pwm_apply_state(beeper->pwm, &state); + error = pwm_apply_might_sleep(beeper->pwm, &state); if (error) return error; @@ -138,7 +138,7 @@ static int pwm_beeper_probe(struct platform_device *pdev) /* Sync up PWM state and ensure it is off. */ pwm_init_state(beeper->pwm, &state); state.enabled = false; - error = pwm_apply_state(beeper->pwm, &state); + error = pwm_apply_might_sleep(beeper->pwm, &state); if (error) { dev_err(dev, "failed to apply initial PWM state: %d\n", error); diff --git a/drivers/input/misc/pwm-vibra.c b/drivers/input/misc/pwm-vibra.c index acac79c488aa..3e5ed685ed8f 100644 --- a/drivers/input/misc/pwm-vibra.c +++ b/drivers/input/misc/pwm-vibra.c @@ -56,7 +56,7 @@ static int pwm_vibrator_start(struct pwm_vibrator *vibrator) pwm_set_relative_duty_cycle(&state, vibrator->level, 0xffff); state.enabled = true; - err = pwm_apply_state(vibrator->pwm, &state); + err = pwm_apply_might_sleep(vibrator->pwm, &state); if (err) { dev_err(pdev, "failed to apply pwm state: %d\n", err); return err; @@ -67,7 +67,7 @@ static int pwm_vibrator_start(struct pwm_vibrator *vibrator) state.duty_cycle = vibrator->direction_duty_cycle; state.enabled = true; - err = pwm_apply_state(vibrator->pwm_dir, &state); + err = pwm_apply_might_sleep(vibrator->pwm_dir, &state); if (err) { dev_err(pdev, "failed to apply dir-pwm state: %d\n", err); pwm_disable(vibrator->pwm); @@ -160,7 +160,7 @@ static int pwm_vibrator_probe(struct platform_device *pdev) /* Sync up PWM state and ensure it is off. */ pwm_init_state(vibrator->pwm, &state); state.enabled = false; - err = pwm_apply_state(vibrator->pwm, &state); + err = pwm_apply_might_sleep(vibrator->pwm, &state); if (err) { dev_err(&pdev->dev, "failed to apply initial PWM state: %d\n", err); @@ -174,7 +174,7 @@ static int pwm_vibrator_probe(struct platform_device *pdev) /* Sync up PWM state and ensure it is off. */ pwm_init_state(vibrator->pwm_dir, &state); state.enabled = false; - err = pwm_apply_state(vibrator->pwm_dir, &state); + err = pwm_apply_might_sleep(vibrator->pwm_dir, &state); if (err) { dev_err(&pdev->dev, "failed to apply initial PWM state: %d\n", err); diff --git a/drivers/leds/leds-pwm.c b/drivers/leds/leds-pwm.c index 2b3bf1353b70..4e3936a39d0e 100644 --- a/drivers/leds/leds-pwm.c +++ b/drivers/leds/leds-pwm.c @@ -54,7 +54,7 @@ static int led_pwm_set(struct led_classdev *led_cdev, led_dat->pwmstate.duty_cycle = duty; led_dat->pwmstate.enabled = true; - return pwm_apply_state(led_dat->pwm, &led_dat->pwmstate); + return pwm_apply_might_sleep(led_dat->pwm, &led_dat->pwmstate); } __attribute__((nonnull)) diff --git a/drivers/leds/rgb/leds-pwm-multicolor.c b/drivers/leds/rgb/leds-pwm-multicolor.c index 46cd062b8b24..e1a81e0109e8 100644 --- a/drivers/leds/rgb/leds-pwm-multicolor.c +++ b/drivers/leds/rgb/leds-pwm-multicolor.c @@ -51,8 +51,8 @@ static int led_pwm_mc_set(struct led_classdev *cdev, priv->leds[i].state.duty_cycle = duty; priv->leds[i].state.enabled = duty > 0; - ret = pwm_apply_state(priv->leds[i].pwm, - &priv->leds[i].state); + ret = pwm_apply_might_sleep(priv->leds[i].pwm, + &priv->leds[i].state); if (ret) break; } diff --git a/drivers/media/rc/pwm-ir-tx.c b/drivers/media/rc/pwm-ir-tx.c index c5f37c03af9c..cf51e2760975 100644 --- a/drivers/media/rc/pwm-ir-tx.c +++ b/drivers/media/rc/pwm-ir-tx.c @@ -68,7 +68,7 @@ static int pwm_ir_tx(struct rc_dev *dev, unsigned int *txbuf, for (i = 0; i < count; i++) { state.enabled = !(i % 2); - pwm_apply_state(pwm, &state); + pwm_apply_might_sleep(pwm, &state); edge = ktime_add_us(edge, txbuf[i]); delta = ktime_us_delta(edge, ktime_get()); @@ -77,7 +77,7 @@ static int pwm_ir_tx(struct rc_dev *dev, unsigned int *txbuf, } state.enabled = false; - pwm_apply_state(pwm, &state); + pwm_apply_might_sleep(pwm, &state); return count; } diff --git a/drivers/platform/x86/lenovo-yogabook.c b/drivers/platform/x86/lenovo-yogabook.c index b8d0239192cb..fd62bf746ebd 100644 --- a/drivers/platform/x86/lenovo-yogabook.c +++ b/drivers/platform/x86/lenovo-yogabook.c @@ -435,7 +435,7 @@ static int yogabook_pdev_set_kbd_backlight(struct yogabook_data *data, u8 level) .enabled = level, }; - pwm_apply_state(data->kbd_bl_pwm, &state); + pwm_apply_might_sleep(data->kbd_bl_pwm, &state); gpiod_set_value(data->kbd_bl_led_enable, level ? 1 : 0); return 0; } diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index b0e50ca9398a..c6228843a1a7 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -326,8 +326,8 @@ struct pwm_device *pwm_request_from_chip(struct pwm_chip *chip, } EXPORT_SYMBOL_GPL(pwm_request_from_chip); -static void pwm_apply_state_debug(struct pwm_device *pwm, - const struct pwm_state *state) +static void pwm_apply_debug(struct pwm_device *pwm, + const struct pwm_state *state) { struct pwm_state *last = &pwm->last; struct pwm_chip *chip = pwm->chip; @@ -433,11 +433,11 @@ static void pwm_apply_state_debug(struct pwm_device *pwm, } /** - * pwm_apply_state() - atomically apply a new state to a PWM device + * pwm_apply_might_sleep() - atomically apply a new state to a PWM device * @pwm: PWM device * @state: new state to apply */ -int pwm_apply_state(struct pwm_device *pwm, const struct pwm_state *state) +int pwm_apply_might_sleep(struct pwm_device *pwm, const struct pwm_state *state) { struct pwm_chip *chip; int err; @@ -445,7 +445,7 @@ int pwm_apply_state(struct pwm_device *pwm, const struct pwm_state *state) /* * Some lowlevel driver's implementations of .apply() make use of * mutexes, also with some drivers only returning when the new - * configuration is active calling pwm_apply_state() from atomic context + * configuration is active calling pwm_apply_might_sleep() from atomic context * is a bad idea. So make it explicit that calling this function might * sleep. */ @@ -475,11 +475,11 @@ int pwm_apply_state(struct pwm_device *pwm, const struct pwm_state *state) * only do this after pwm->state was applied as some * implementations of .get_state depend on this */ - pwm_apply_state_debug(pwm, state); + pwm_apply_debug(pwm, state); return 0; } -EXPORT_SYMBOL_GPL(pwm_apply_state); +EXPORT_SYMBOL_GPL(pwm_apply_might_sleep); /** * pwm_capture() - capture and report a PWM signal @@ -537,7 +537,7 @@ int pwm_adjust_config(struct pwm_device *pwm) state.period = pargs.period; state.polarity = pargs.polarity; - return pwm_apply_state(pwm, &state); + return pwm_apply_might_sleep(pwm, &state); } /* @@ -560,7 +560,7 @@ int pwm_adjust_config(struct pwm_device *pwm) state.duty_cycle = state.period - state.duty_cycle; } - return pwm_apply_state(pwm, &state); + return pwm_apply_might_sleep(pwm, &state); } EXPORT_SYMBOL_GPL(pwm_adjust_config); diff --git a/drivers/pwm/pwm-twl-led.c b/drivers/pwm/pwm-twl-led.c index 8a870d0db3c6..c670ccb81653 100644 --- a/drivers/pwm/pwm-twl-led.c +++ b/drivers/pwm/pwm-twl-led.c @@ -172,7 +172,7 @@ static int twl4030_pwmled_apply(struct pwm_chip *chip, struct pwm_device *pwm, * We cannot skip calling ->config even if state->period == * pwm->state.period && state->duty_cycle == pwm->state.duty_cycle * because we might have exited early in the last call to - * pwm_apply_state because of !state->enabled and so the two values in + * pwm_apply_might_sleep because of !state->enabled and so the two values in * pwm->state might not be configured in hardware. */ ret = twl4030_pwmled_config(chip, pwm, diff --git a/drivers/pwm/pwm-vt8500.c b/drivers/pwm/pwm-vt8500.c index bdea60389487..7bfeacee05d0 100644 --- a/drivers/pwm/pwm-vt8500.c +++ b/drivers/pwm/pwm-vt8500.c @@ -206,7 +206,7 @@ static int vt8500_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, * We cannot skip calling ->config even if state->period == * pwm->state.period && state->duty_cycle == pwm->state.duty_cycle * because we might have exited early in the last call to - * pwm_apply_state because of !state->enabled and so the two values in + * pwm_apply_might_sleep because of !state->enabled and so the two values in * pwm->state might not be configured in hardware. */ err = vt8500_pwm_config(chip, pwm, state->duty_cycle, state->period); diff --git a/drivers/pwm/sysfs.c b/drivers/pwm/sysfs.c index 4edb994fa2e1..1698609d91c8 100644 --- a/drivers/pwm/sysfs.c +++ b/drivers/pwm/sysfs.c @@ -62,7 +62,7 @@ static ssize_t period_store(struct device *child, mutex_lock(&export->lock); pwm_get_state(pwm, &state); state.period = val; - ret = pwm_apply_state(pwm, &state); + ret = pwm_apply_might_sleep(pwm, &state); mutex_unlock(&export->lock); return ret ? : size; @@ -97,7 +97,7 @@ static ssize_t duty_cycle_store(struct device *child, mutex_lock(&export->lock); pwm_get_state(pwm, &state); state.duty_cycle = val; - ret = pwm_apply_state(pwm, &state); + ret = pwm_apply_might_sleep(pwm, &state); mutex_unlock(&export->lock); return ret ? : size; @@ -144,7 +144,7 @@ static ssize_t enable_store(struct device *child, goto unlock; } - ret = pwm_apply_state(pwm, &state); + ret = pwm_apply_might_sleep(pwm, &state); unlock: mutex_unlock(&export->lock); @@ -194,7 +194,7 @@ static ssize_t polarity_store(struct device *child, mutex_lock(&export->lock); pwm_get_state(pwm, &state); state.polarity = polarity; - ret = pwm_apply_state(pwm, &state); + ret = pwm_apply_might_sleep(pwm, &state); mutex_unlock(&export->lock); return ret ? : size; @@ -401,7 +401,7 @@ static int pwm_class_apply_state(struct pwm_export *export, struct pwm_device *pwm, struct pwm_state *state) { - int ret = pwm_apply_state(pwm, state); + int ret = pwm_apply_might_sleep(pwm, state); /* release lock taken in pwm_class_get_state */ mutex_unlock(&export->lock); diff --git a/drivers/regulator/pwm-regulator.c b/drivers/regulator/pwm-regulator.c index 2aff6db748e2..698c420e0869 100644 --- a/drivers/regulator/pwm-regulator.c +++ b/drivers/regulator/pwm-regulator.c @@ -90,7 +90,7 @@ static int pwm_regulator_set_voltage_sel(struct regulator_dev *rdev, pwm_set_relative_duty_cycle(&pstate, drvdata->duty_cycle_table[selector].dutycycle, 100); - ret = pwm_apply_state(drvdata->pwm, &pstate); + ret = pwm_apply_might_sleep(drvdata->pwm, &pstate); if (ret) { dev_err(&rdev->dev, "Failed to configure PWM: %d\n", ret); return ret; @@ -216,7 +216,7 @@ static int pwm_regulator_set_voltage(struct regulator_dev *rdev, pwm_set_relative_duty_cycle(&pstate, dutycycle, duty_unit); - ret = pwm_apply_state(drvdata->pwm, &pstate); + ret = pwm_apply_might_sleep(drvdata->pwm, &pstate); if (ret) { dev_err(&rdev->dev, "Failed to configure PWM: %d\n", ret); return ret; diff --git a/drivers/video/backlight/lm3630a_bl.c b/drivers/video/backlight/lm3630a_bl.c index 8fcb62be597b..a3412c936ca2 100644 --- a/drivers/video/backlight/lm3630a_bl.c +++ b/drivers/video/backlight/lm3630a_bl.c @@ -180,7 +180,7 @@ static int lm3630a_pwm_ctrl(struct lm3630a_chip *pchip, int br, int br_max) pchip->pwmd_state.enabled = pchip->pwmd_state.duty_cycle ? true : false; - return pwm_apply_state(pchip->pwmd, &pchip->pwmd_state); + return pwm_apply_might_sleep(pchip->pwmd, &pchip->pwmd_state); } /* update and get brightness */ diff --git a/drivers/video/backlight/lp855x_bl.c b/drivers/video/backlight/lp855x_bl.c index da1f124db69c..7075bfab59c4 100644 --- a/drivers/video/backlight/lp855x_bl.c +++ b/drivers/video/backlight/lp855x_bl.c @@ -234,7 +234,7 @@ static int lp855x_pwm_ctrl(struct lp855x *lp, int br, int max_br) state.duty_cycle = div_u64(br * state.period, max_br); state.enabled = state.duty_cycle; - return pwm_apply_state(lp->pwm, &state); + return pwm_apply_might_sleep(lp->pwm, &state); } static int lp855x_bl_update_status(struct backlight_device *bl) diff --git a/drivers/video/backlight/pwm_bl.c b/drivers/video/backlight/pwm_bl.c index 289bd9ce4d36..35c716e9043c 100644 --- a/drivers/video/backlight/pwm_bl.c +++ b/drivers/video/backlight/pwm_bl.c @@ -103,7 +103,7 @@ static int pwm_backlight_update_status(struct backlight_device *bl) pwm_get_state(pb->pwm, &state); state.duty_cycle = compute_duty_cycle(pb, brightness, &state); state.enabled = true; - pwm_apply_state(pb->pwm, &state); + pwm_apply_might_sleep(pb->pwm, &state); pwm_backlight_power_on(pb); } else { @@ -120,7 +120,7 @@ static int pwm_backlight_update_status(struct backlight_device *bl) * inactive output. */ state.enabled = !pb->power_supply && !pb->enable_gpio; - pwm_apply_state(pb->pwm, &state); + pwm_apply_might_sleep(pb->pwm, &state); } if (pb->notify_after) @@ -528,7 +528,7 @@ static int pwm_backlight_probe(struct platform_device *pdev) if (!state.period && (data->pwm_period_ns > 0)) state.period = data->pwm_period_ns; - ret = pwm_apply_state(pb->pwm, &state); + ret = pwm_apply_might_sleep(pb->pwm, &state); if (ret) { dev_err(&pdev->dev, "failed to apply initial PWM state: %d\n", ret); @@ -633,7 +633,7 @@ static void pwm_backlight_remove(struct platform_device *pdev) pwm_get_state(pb->pwm, &state); state.duty_cycle = 0; state.enabled = false; - pwm_apply_state(pb->pwm, &state); + pwm_apply_might_sleep(pb->pwm, &state); if (pb->exit) pb->exit(&pdev->dev); @@ -649,7 +649,7 @@ static void pwm_backlight_shutdown(struct platform_device *pdev) pwm_get_state(pb->pwm, &state); state.duty_cycle = 0; state.enabled = false; - pwm_apply_state(pb->pwm, &state); + pwm_apply_might_sleep(pb->pwm, &state); } #ifdef CONFIG_PM_SLEEP @@ -673,7 +673,7 @@ static int pwm_backlight_suspend(struct device *dev) pwm_get_state(pb->pwm, &state); state.duty_cycle = 0; state.enabled = false; - pwm_apply_state(pb->pwm, &state); + pwm_apply_might_sleep(pb->pwm, &state); if (pb->notify_after) pb->notify_after(pb->dev, 0); diff --git a/drivers/video/fbdev/ssd1307fb.c b/drivers/video/fbdev/ssd1307fb.c index 5ae48e36fccb..1a4f90ea7d5a 100644 --- a/drivers/video/fbdev/ssd1307fb.c +++ b/drivers/video/fbdev/ssd1307fb.c @@ -347,7 +347,7 @@ static int ssd1307fb_init(struct ssd1307fb_par *par) pwm_init_state(par->pwm, &pwmstate); pwm_set_relative_duty_cycle(&pwmstate, 50, 100); - pwm_apply_state(par->pwm, &pwmstate); + pwm_apply_might_sleep(par->pwm, &pwmstate); /* Enable the PWM */ pwm_enable(par->pwm); diff --git a/include/linux/pwm.h b/include/linux/pwm.h index f87655c06c82..b64b8a82415c 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -92,8 +92,8 @@ struct pwm_device { * @state: state to fill with the current PWM state * * The returned PWM state represents the state that was applied by a previous call to - * pwm_apply_state(). Drivers may have to slightly tweak that state before programming it to - * hardware. If pwm_apply_state() was never called, this returns either the current hardware + * pwm_apply_might_sleep(). Drivers may have to slightly tweak that state before programming it to + * hardware. If pwm_apply_might_sleep() was never called, this returns either the current hardware * state (if supported) or the default settings. */ static inline void pwm_get_state(const struct pwm_device *pwm, @@ -157,20 +157,20 @@ static inline void pwm_get_args(const struct pwm_device *pwm, } /** - * pwm_init_state() - prepare a new state to be applied with pwm_apply_state() + * pwm_init_state() - prepare a new state to be applied with pwm_apply_might_sleep() * @pwm: PWM device * @state: state to fill with the prepared PWM state * * This functions prepares a state that can later be tweaked and applied - * to the PWM device with pwm_apply_state(). This is a convenient function + * to the PWM device with pwm_apply_might_sleep(). This is a convenient function * that first retrieves the current PWM state and the replaces the period * and polarity fields with the reference values defined in pwm->args. * Once the function returns, you can adjust the ->enabled and ->duty_cycle - * fields according to your needs before calling pwm_apply_state(). + * fields according to your needs before calling pwm_apply_might_sleep(). * * ->duty_cycle is initially set to zero to avoid cases where the current * ->duty_cycle value exceed the pwm_args->period one, which would trigger - * an error if the user calls pwm_apply_state() without adjusting ->duty_cycle + * an error if the user calls pwm_apply_might_sleep() without adjusting ->duty_cycle * first. */ static inline void pwm_init_state(const struct pwm_device *pwm, @@ -226,7 +226,7 @@ pwm_get_relative_duty_cycle(const struct pwm_state *state, unsigned int scale) * * pwm_init_state(pwm, &state); * pwm_set_relative_duty_cycle(&state, 50, 100); - * pwm_apply_state(pwm, &state); + * pwm_apply_might_sleep(pwm, &state); * * This functions returns -EINVAL if @duty_cycle and/or @scale are * inconsistent (@scale == 0 or @duty_cycle > @scale). @@ -304,7 +304,7 @@ struct pwm_chip { #if IS_ENABLED(CONFIG_PWM) /* PWM user APIs */ -int pwm_apply_state(struct pwm_device *pwm, const struct pwm_state *state); +int pwm_apply_might_sleep(struct pwm_device *pwm, const struct pwm_state *state); int pwm_adjust_config(struct pwm_device *pwm); /** @@ -332,7 +332,7 @@ static inline int pwm_config(struct pwm_device *pwm, int duty_ns, state.duty_cycle = duty_ns; state.period = period_ns; - return pwm_apply_state(pwm, &state); + return pwm_apply_might_sleep(pwm, &state); } /** @@ -353,7 +353,7 @@ static inline int pwm_enable(struct pwm_device *pwm) return 0; state.enabled = true; - return pwm_apply_state(pwm, &state); + return pwm_apply_might_sleep(pwm, &state); } /** @@ -372,7 +372,7 @@ static inline void pwm_disable(struct pwm_device *pwm) return; state.enabled = false; - pwm_apply_state(pwm, &state); + pwm_apply_might_sleep(pwm, &state); } /* PWM provider APIs */ @@ -403,8 +403,8 @@ struct pwm_device *devm_fwnode_pwm_get(struct device *dev, struct fwnode_handle *fwnode, const char *con_id); #else -static inline int pwm_apply_state(struct pwm_device *pwm, - const struct pwm_state *state) +static inline int pwm_apply_might_sleep(struct pwm_device *pwm, + const struct pwm_state *state) { might_sleep(); return -ENOTSUPP; @@ -521,7 +521,7 @@ static inline void pwm_apply_args(struct pwm_device *pwm) state.period = pwm->args.period; state.usage_power = false; - pwm_apply_state(pwm, &state); + pwm_apply_might_sleep(pwm, &state); } struct pwm_lookup { -- cgit v1.2.3 From f551103cb964e9e6f5c03b3b8723424723731e76 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 15 Dec 2023 17:49:24 -0500 Subject: sched.h: move pid helpers to pid.h This is needed for killing the sched.h dependency on rcupdate.h, and pid.h is a better place for this code anyways. Signed-off-by: Kent Overstreet --- arch/x86/um/sysrq_64.c | 1 + drivers/gpu/drm/lima/lima_ctx.c | 1 + drivers/irqchip/irq-gic-v4.c | 1 + include/linux/pid.h | 125 ++++++++++++++++++++++++++++++++++++++++ include/linux/sched.h | 122 --------------------------------------- include/linux/sched/signal.h | 1 + ipc/util.h | 1 + kernel/async.c | 5 +- kernel/locking/spinlock_debug.c | 1 + 9 files changed, 134 insertions(+), 124 deletions(-) (limited to 'drivers/gpu') diff --git a/arch/x86/um/sysrq_64.c b/arch/x86/um/sysrq_64.c index ef1eb4f4f612..0bf6de40abff 100644 --- a/arch/x86/um/sysrq_64.c +++ b/arch/x86/um/sysrq_64.c @@ -6,6 +6,7 @@ #include #include +#include #include #include #include diff --git a/drivers/gpu/drm/lima/lima_ctx.c b/drivers/gpu/drm/lima/lima_ctx.c index 891d5cd5019a..8389f2d7d021 100644 --- a/drivers/gpu/drm/lima/lima_ctx.c +++ b/drivers/gpu/drm/lima/lima_ctx.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR MIT /* Copyright 2018-2019 Qiang Yu */ +#include #include #include "lima_device.h" diff --git a/drivers/irqchip/irq-gic-v4.c b/drivers/irqchip/irq-gic-v4.c index 94d56a03b175..ca32ac19d284 100644 --- a/drivers/irqchip/irq-gic-v4.c +++ b/drivers/irqchip/irq-gic-v4.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include diff --git a/include/linux/pid.h b/include/linux/pid.h index f254c3a45b9b..395cacce1179 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -4,7 +4,9 @@ #include #include +#include #include +#include #include /* @@ -204,4 +206,127 @@ pid_t pid_vnr(struct pid *pid); } \ task = tg___; \ } while_each_pid_task(pid, type, task) + +static inline struct pid *task_pid(struct task_struct *task) +{ + return task->thread_pid; +} + +/* + * the helpers to get the task's different pids as they are seen + * from various namespaces + * + * task_xid_nr() : global id, i.e. the id seen from the init namespace; + * task_xid_vnr() : virtual id, i.e. the id seen from the pid namespace of + * current. + * task_xid_nr_ns() : id seen from the ns specified; + * + * see also pid_nr() etc in include/linux/pid.h + */ +pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, struct pid_namespace *ns); + +static inline pid_t task_pid_nr(struct task_struct *tsk) +{ + return tsk->pid; +} + +static inline pid_t task_pid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) +{ + return __task_pid_nr_ns(tsk, PIDTYPE_PID, ns); +} + +static inline pid_t task_pid_vnr(struct task_struct *tsk) +{ + return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL); +} + + +static inline pid_t task_tgid_nr(struct task_struct *tsk) +{ + return tsk->tgid; +} + +/** + * pid_alive - check that a task structure is not stale + * @p: Task structure to be checked. + * + * Test if a process is not yet dead (at most zombie state) + * If pid_alive fails, then pointers within the task structure + * can be stale and must not be dereferenced. + * + * Return: 1 if the process is alive. 0 otherwise. + */ +static inline int pid_alive(const struct task_struct *p) +{ + return p->thread_pid != NULL; +} + +static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) +{ + return __task_pid_nr_ns(tsk, PIDTYPE_PGID, ns); +} + +static inline pid_t task_pgrp_vnr(struct task_struct *tsk) +{ + return __task_pid_nr_ns(tsk, PIDTYPE_PGID, NULL); +} + + +static inline pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) +{ + return __task_pid_nr_ns(tsk, PIDTYPE_SID, ns); +} + +static inline pid_t task_session_vnr(struct task_struct *tsk) +{ + return __task_pid_nr_ns(tsk, PIDTYPE_SID, NULL); +} + +static inline pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) +{ + return __task_pid_nr_ns(tsk, PIDTYPE_TGID, ns); +} + +static inline pid_t task_tgid_vnr(struct task_struct *tsk) +{ + return __task_pid_nr_ns(tsk, PIDTYPE_TGID, NULL); +} + +static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns) +{ + pid_t pid = 0; + + rcu_read_lock(); + if (pid_alive(tsk)) + pid = task_tgid_nr_ns(rcu_dereference(tsk->real_parent), ns); + rcu_read_unlock(); + + return pid; +} + +static inline pid_t task_ppid_nr(const struct task_struct *tsk) +{ + return task_ppid_nr_ns(tsk, &init_pid_ns); +} + +/* Obsolete, do not use: */ +static inline pid_t task_pgrp_nr(struct task_struct *tsk) +{ + return task_pgrp_nr_ns(tsk, &init_pid_ns); +} + +/** + * is_global_init - check if a task structure is init. Since init + * is free to have sub-threads we need to check tgid. + * @tsk: Task structure to be checked. + * + * Check if a task structure is the first user space task the kernel created. + * + * Return: 1 if the task structure is init. 0 otherwise. + */ +static inline int is_global_init(struct task_struct *tsk) +{ + return task_tgid_nr(tsk) == 1; +} + #endif /* _LINUX_PID_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 37cc9d257073..9e2708c2cfa6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1561,114 +1561,6 @@ struct task_struct { */ }; -static inline struct pid *task_pid(struct task_struct *task) -{ - return task->thread_pid; -} - -/* - * the helpers to get the task's different pids as they are seen - * from various namespaces - * - * task_xid_nr() : global id, i.e. the id seen from the init namespace; - * task_xid_vnr() : virtual id, i.e. the id seen from the pid namespace of - * current. - * task_xid_nr_ns() : id seen from the ns specified; - * - * see also pid_nr() etc in include/linux/pid.h - */ -pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, struct pid_namespace *ns); - -static inline pid_t task_pid_nr(struct task_struct *tsk) -{ - return tsk->pid; -} - -static inline pid_t task_pid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) -{ - return __task_pid_nr_ns(tsk, PIDTYPE_PID, ns); -} - -static inline pid_t task_pid_vnr(struct task_struct *tsk) -{ - return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL); -} - - -static inline pid_t task_tgid_nr(struct task_struct *tsk) -{ - return tsk->tgid; -} - -/** - * pid_alive - check that a task structure is not stale - * @p: Task structure to be checked. - * - * Test if a process is not yet dead (at most zombie state) - * If pid_alive fails, then pointers within the task structure - * can be stale and must not be dereferenced. - * - * Return: 1 if the process is alive. 0 otherwise. - */ -static inline int pid_alive(const struct task_struct *p) -{ - return p->thread_pid != NULL; -} - -static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) -{ - return __task_pid_nr_ns(tsk, PIDTYPE_PGID, ns); -} - -static inline pid_t task_pgrp_vnr(struct task_struct *tsk) -{ - return __task_pid_nr_ns(tsk, PIDTYPE_PGID, NULL); -} - - -static inline pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) -{ - return __task_pid_nr_ns(tsk, PIDTYPE_SID, ns); -} - -static inline pid_t task_session_vnr(struct task_struct *tsk) -{ - return __task_pid_nr_ns(tsk, PIDTYPE_SID, NULL); -} - -static inline pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) -{ - return __task_pid_nr_ns(tsk, PIDTYPE_TGID, ns); -} - -static inline pid_t task_tgid_vnr(struct task_struct *tsk) -{ - return __task_pid_nr_ns(tsk, PIDTYPE_TGID, NULL); -} - -static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns) -{ - pid_t pid = 0; - - rcu_read_lock(); - if (pid_alive(tsk)) - pid = task_tgid_nr_ns(rcu_dereference(tsk->real_parent), ns); - rcu_read_unlock(); - - return pid; -} - -static inline pid_t task_ppid_nr(const struct task_struct *tsk) -{ - return task_ppid_nr_ns(tsk, &init_pid_ns); -} - -/* Obsolete, do not use: */ -static inline pid_t task_pgrp_nr(struct task_struct *tsk) -{ - return task_pgrp_nr_ns(tsk, &init_pid_ns); -} - #define TASK_REPORT_IDLE (TASK_REPORT + 1) #define TASK_REPORT_MAX (TASK_REPORT_IDLE << 1) @@ -1712,20 +1604,6 @@ static inline char task_state_to_char(struct task_struct *tsk) return task_index_to_char(task_state_index(tsk)); } -/** - * is_global_init - check if a task structure is init. Since init - * is free to have sub-threads we need to check tgid. - * @tsk: Task structure to be checked. - * - * Check if a task structure is the first user space task the kernel created. - * - * Return: 1 if the task structure is init. 0 otherwise. - */ -static inline int is_global_init(struct task_struct *tsk) -{ - return task_tgid_nr(tsk) == 1; -} - extern struct pid *cad_pid; /* diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 3499c1a8b929..b847d8fa75a9 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/ipc/util.h b/ipc/util.h index 67bdd2aa2c28..a55d6cebe6d3 100644 --- a/ipc/util.h +++ b/ipc/util.h @@ -14,6 +14,7 @@ #include #include #include +#include /* * The IPC ID contains 2 separate numbers - index and sequence number. diff --git a/kernel/async.c b/kernel/async.c index b2c4ba5686ee..79f6a3034b1f 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -46,11 +46,12 @@ asynchronous and synchronous parts of the kernel. #include #include -#include #include -#include +#include +#include #include #include +#include #include #include "workqueue_internal.h" diff --git a/kernel/locking/spinlock_debug.c b/kernel/locking/spinlock_debug.c index 14235671a1a7..87b03d2e41db 100644 --- a/kernel/locking/spinlock_debug.c +++ b/kernel/locking/spinlock_debug.c @@ -12,6 +12,7 @@ #include #include #include +#include void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name, struct lock_class_key *key, short inner) -- cgit v1.2.3 From 6713ee6ca19e3cd43798b4b40f8b13489c724a89 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 7 Jun 2023 11:51:36 -0700 Subject: drm/xe: Move XE_PTE_FLAG_READ_ONLY to xe_vm_types.h XE_PTE_FLAG_READ_ONLY is specific to struct xe_vma, move it from xe_bo.h to xe_vm_types.h to reflect that. Reviewed-by: Francois Dugast Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.h | 2 -- drivers/gpu/drm/xe/xe_vm_types.h | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 552fe073e9c5..dd3d448fee0b 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -65,8 +65,6 @@ #define XE_PAGE_PRESENT BIT_ULL(0) #define XE_PAGE_RW BIT_ULL(1) -#define XE_PTE_FLAG_READ_ONLY BIT(0) - #define XE_PL_SYSTEM TTM_PL_SYSTEM #define XE_PL_TT TTM_PL_TT #define XE_PL_VRAM0 TTM_PL_VRAM diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 5242236b4b0e..a51e84e584b4 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -30,6 +30,7 @@ struct xe_vma { /** @end: end address of this VMA within its address domain */ u64 end; /** @pte_flags: pte flags for this VMA */ +#define XE_PTE_FLAG_READ_ONLY BIT(0) u32 pte_flags; /** @bo: BO if not a userptr, must be NULL is userptr */ -- cgit v1.2.3 From a0ea91db616c386a9b5689dbbb7f57073f993368 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Sun, 11 Jun 2023 15:24:43 -0700 Subject: drm/xe: Rename pte/pde encoding functions Remove the leftover TODO by renameing the functions to use xe prefix. Since the static __gen8_pte_encode() already has a double score, just remove the prefix. Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20230611222447.2837573-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_migrate.c | 2 +- drivers/gpu/drm/xe/xe_migrate.c | 20 +++++++++--------- drivers/gpu/drm/xe/xe_pt.c | 38 +++++++++++++++++------------------ drivers/gpu/drm/xe/xe_pt.h | 10 ++++----- drivers/gpu/drm/xe/xe_vm.c | 4 ++-- 5 files changed, 36 insertions(+), 38 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index 60266fea7faa..4c79c1dfa772 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -300,7 +300,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) /* First part of the test, are we updating our pagetable bo with a new entry? */ xe_map_wr(xe, &bo->vmap, XE_PAGE_SIZE * (NUM_KERNEL_PDE - 1), u64, 0xdeaddeadbeefbeef); - expected = gen8_pte_encode(NULL, pt, 0, XE_CACHE_WB, 0, 0); + expected = xe_pte_encode(NULL, pt, 0, XE_CACHE_WB, 0, 0); if (m->eng->vm->flags & XE_VM_FLAGS_64K) expected |= XE_PTE_PS64; if (xe_bo_is_vram(pt)) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index f50484759866..a62bd7ec8a42 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -189,15 +189,15 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, return ret; } - entry = gen8_pde_encode(bo, bo->size - XE_PAGE_SIZE, XE_CACHE_WB); + entry = xe_pde_encode(bo, bo->size - XE_PAGE_SIZE, XE_CACHE_WB); xe_pt_write(xe, &vm->pt_root[id]->bo->vmap, 0, entry); map_ofs = (num_entries - num_level) * XE_PAGE_SIZE; /* Map the entire BO in our level 0 pt */ for (i = 0, level = 0; i < num_entries; level++) { - entry = gen8_pte_encode(NULL, bo, i * XE_PAGE_SIZE, - XE_CACHE_WB, 0, 0); + entry = xe_pte_encode(NULL, bo, i * XE_PAGE_SIZE, + XE_CACHE_WB, 0, 0); xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64, entry); @@ -215,8 +215,8 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, for (i = 0; i < batch->size; i += vm->flags & XE_VM_FLAGS_64K ? XE_64K_PAGE_SIZE : XE_PAGE_SIZE) { - entry = gen8_pte_encode(NULL, batch, i, - XE_CACHE_WB, 0, 0); + entry = xe_pte_encode(NULL, batch, i, + XE_CACHE_WB, 0, 0); xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64, entry); @@ -242,7 +242,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, if (vm->flags & XE_VM_FLAGS_64K && level == 1) flags = XE_PDE_64K; - entry = gen8_pde_encode(bo, map_ofs + (level - 1) * + entry = xe_pde_encode(bo, map_ofs + (level - 1) * XE_PAGE_SIZE, XE_CACHE_WB); xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE * level, u64, entry | flags); @@ -250,8 +250,8 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, /* Write PDE's that point to our BO. */ for (i = 0; i < num_entries - num_level; i++) { - entry = gen8_pde_encode(bo, i * XE_PAGE_SIZE, - XE_CACHE_WB); + entry = xe_pde_encode(bo, i * XE_PAGE_SIZE, + XE_CACHE_WB); xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE + (i + 1) * 8, u64, entry); @@ -1231,8 +1231,8 @@ xe_migrate_update_pgtables(struct xe_migrate *m, BUG_ON(pt_bo->size != SZ_4K); - addr = gen8_pte_encode(NULL, pt_bo, 0, XE_CACHE_WB, - 0, 0); + addr = xe_pte_encode(NULL, pt_bo, 0, XE_CACHE_WB, + 0, 0); bb->cs[bb->len++] = lower_32_bits(addr); bb->cs[bb->len++] = upper_32_bits(addr); } diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 1ba93c2861ab..29c1b1f0bd7c 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -47,7 +47,7 @@ static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index) } /** - * gen8_pde_encode() - Encode a page-table directory entry pointing to + * xe_pde_encode() - Encode a page-table directory entry pointing to * another page-table. * @bo: The page-table bo of the page-table to point to. * @bo_offset: Offset in the page-table bo to point to. @@ -57,8 +57,8 @@ static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index) * * Return: An encoded page directory entry. No errors. */ -u64 gen8_pde_encode(struct xe_bo *bo, u64 bo_offset, - const enum xe_cache_level level) +u64 xe_pde_encode(struct xe_bo *bo, u64 bo_offset, + const enum xe_cache_level level) { u64 pde; bool is_vram; @@ -97,8 +97,8 @@ static dma_addr_t vma_addr(struct xe_vma *vma, u64 offset, } } -static u64 __gen8_pte_encode(u64 pte, enum xe_cache_level cache, u32 flags, - u32 pt_level) +static u64 __pte_encode(u64 pte, enum xe_cache_level cache, u32 flags, + u32 pt_level) { pte |= XE_PAGE_PRESENT | XE_PAGE_RW; @@ -131,7 +131,7 @@ static u64 __gen8_pte_encode(u64 pte, enum xe_cache_level cache, u32 flags, } /** - * gen8_pte_encode() - Encode a page-table entry pointing to memory. + * xe_pte_encode() - Encode a page-table entry pointing to memory. * @vma: The vma representing the memory to point to. * @bo: If @vma is NULL, representing the memory to point to. * @offset: The offset into @vma or @bo. @@ -140,13 +140,11 @@ static u64 __gen8_pte_encode(u64 pte, enum xe_cache_level cache, u32 flags, * @pt_level: The page-table level of the page-table into which the entry * is to be inserted. * - * TODO: Rename. - * * Return: An encoded page-table entry. No errors. */ -u64 gen8_pte_encode(struct xe_vma *vma, struct xe_bo *bo, - u64 offset, enum xe_cache_level cache, - u32 flags, u32 pt_level) +u64 xe_pte_encode(struct xe_vma *vma, struct xe_bo *bo, + u64 offset, enum xe_cache_level cache, + u32 flags, u32 pt_level) { u64 pte; bool is_vram; @@ -162,7 +160,7 @@ u64 gen8_pte_encode(struct xe_vma *vma, struct xe_bo *bo, pte |= XE_USM_PPGTT_PTE_AE; } - return __gen8_pte_encode(pte, cache, flags, pt_level); + return __pte_encode(pte, cache, flags, pt_level); } static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm, @@ -174,13 +172,13 @@ static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm, return 0; if (level == 0) { - u64 empty = gen8_pte_encode(NULL, vm->scratch_bo[id], 0, - XE_CACHE_WB, 0, 0); + u64 empty = xe_pte_encode(NULL, vm->scratch_bo[id], 0, + XE_CACHE_WB, 0, 0); return empty; } else { - return gen8_pde_encode(vm->scratch_pt[id][level - 1]->bo, 0, - XE_CACHE_WB); + return xe_pde_encode(vm->scratch_pt[id][level - 1]->bo, 0, + XE_CACHE_WB); } } @@ -634,9 +632,9 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, XE_WARN_ON(xe_walk->va_curs_start != addr); - pte = __gen8_pte_encode(xe_res_dma(curs) + xe_walk->dma_offset, - xe_walk->cache, xe_walk->pte_flags, - level); + pte = __pte_encode(xe_res_dma(curs) + xe_walk->dma_offset, + xe_walk->cache, xe_walk->pte_flags, + level); pte |= xe_walk->default_pte; /* @@ -699,7 +697,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, xe_child->is_compact = true; } - pte = gen8_pde_encode(xe_child->bo, 0, xe_walk->cache) | flags; + pte = xe_pde_encode(xe_child->bo, 0, xe_walk->cache) | flags; ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, xe_child, pte); } diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h index 10f334b9c004..54e8a043d353 100644 --- a/drivers/gpu/drm/xe/xe_pt.h +++ b/drivers/gpu/drm/xe/xe_pt.h @@ -45,10 +45,10 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma); -u64 gen8_pde_encode(struct xe_bo *bo, u64 bo_offset, - const enum xe_cache_level level); +u64 xe_pde_encode(struct xe_bo *bo, u64 bo_offset, + const enum xe_cache_level level); -u64 gen8_pte_encode(struct xe_vma *vma, struct xe_bo *bo, - u64 offset, enum xe_cache_level cache, - u32 flags, u32 pt_level); +u64 xe_pte_encode(struct xe_vma *vma, struct xe_bo *bo, + u64 offset, enum xe_cache_level cache, + u32 flags, u32 pt_level); #endif diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 51daa5fd7821..eb2209d2d1cd 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1530,8 +1530,8 @@ struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id) u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile) { - return gen8_pde_encode(vm->pt_root[tile->id]->bo, 0, - XE_CACHE_WB); + return xe_pde_encode(vm->pt_root[tile->id]->bo, 0, + XE_CACHE_WB); } static struct dma_fence * -- cgit v1.2.3 From 90738d86650729cafb6d92191e6568d4b425b20a Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Sun, 11 Jun 2023 15:24:44 -0700 Subject: drm/xe/guc: Fix typo s/enabled/enable/ Fix the log message when it fails to enable CT. Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20230611222447.2837573-2-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_ct.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 615cc4d4ad69..22bc9ce846db 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -312,7 +312,7 @@ int xe_guc_ct_enable(struct xe_guc_ct *ct) return 0; err_out: - drm_err(&xe->drm, "Failed to enabled CT (%d)\n", err); + drm_err(&xe->drm, "Failed to enable CT (%d)\n", err); return err; } -- cgit v1.2.3 From 6dc3a12fb8185f98b525dbdb02fa5b810c4ff0bc Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Sun, 11 Jun 2023 15:24:45 -0700 Subject: drm/xe/guc: Normalize error messages with %#x One of the messages was printed without 0x prefix, so it was not clear if it was decimal or hex: make sure to add the prefix by using %#x. While at it, normalize the other messages in the same function to follow the same pattern. Reviewed-by: Gustavo Sousa Link: https://lore.kernel.org/r/20230611222447.2837573-3-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index e51d8fb4a354..43f862aaacbe 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -647,7 +647,7 @@ retry: GUC_HXG_MSG_0_ORIGIN, 50000, &reply, false); if (ret) { timeout: - drm_err(&xe->drm, "mmio request 0x%08x: no reply 0x%08x\n", + drm_err(&xe->drm, "mmio request %#x: no reply %#x\n", request[0], reply); return ret; } @@ -673,7 +673,7 @@ timeout: GUC_HXG_TYPE_NO_RESPONSE_RETRY) { u32 reason = FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, header); - drm_dbg(&xe->drm, "mmio request %#x: retrying, reason %u\n", + drm_dbg(&xe->drm, "mmio request %#x: retrying, reason %#x\n", request[0], reason); goto retry; } @@ -683,7 +683,7 @@ timeout: u32 hint = FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, header); u32 error = FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, header); - drm_err(&xe->drm, "mmio request %#x: failure %x/%u\n", + drm_err(&xe->drm, "mmio request %#x: failure %#x/%#x\n", request[0], error, hint); return -ENXIO; } -- cgit v1.2.3 From 2846d10339a2cc304a1ae55ce75e61eb7f55eb0b Mon Sep 17 00:00:00 2001 From: Badal Nilawar Date: Fri, 9 Jun 2023 08:19:54 +0530 Subject: drm/xe: Donot apply forcewake while reading actual frequency RPSTAT1 is an sgunit register and thus doesn't need forcewake. MTL_MIRROR_TARGET_WP1 is within an "always on" power domain and thus doesn't require any forcewake to ensure the register is powered up and usable. When GT is RC6 the actual frequency reported will be 0. v2: - Add bspec index (Anshuman) - %s/GEN12_RPSTAT1/GT_PERF_STATUS as per bspec v3: Update Fixes tag Bspec: 51837, 67651 Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Badal Nilawar Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230609024954.987039-1-badal.nilawar@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_pc.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 67faa9ee0006..5d5cf4b0d508 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -31,7 +31,7 @@ #define GEN10_FREQ_INFO_REC XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x5ef0) #define RPE_MASK REG_GENMASK(15, 8) -#define GEN12_RPSTAT1 XE_REG(0x1381b4) +#define GT_PERF_STATUS XE_REG(0x1381b4) #define GEN12_CAGF_MASK REG_GENMASK(19, 11) #define MTL_MIRROR_TARGET_WP1 XE_REG(0xc60) @@ -371,26 +371,18 @@ static ssize_t freq_act_show(struct device *dev, ssize_t ret; xe_device_mem_access_get(gt_to_xe(gt)); - /* - * When in RC6, actual frequency is 0. Let's block RC6 so we are able - * to verify that our freq requests are really happening. - */ - ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (ret) - goto out; + /* When in RC6, actual frequency reported will be 0. */ if (xe->info.platform == XE_METEORLAKE) { freq = xe_mmio_read32(gt, MTL_MIRROR_TARGET_WP1); freq = REG_FIELD_GET(MTL_CAGF_MASK, freq); } else { - freq = xe_mmio_read32(gt, GEN12_RPSTAT1); + freq = xe_mmio_read32(gt, GT_PERF_STATUS); freq = REG_FIELD_GET(GEN12_CAGF_MASK, freq); } ret = sysfs_emit(buf, "%d\n", decode_freq(freq)); - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); -out: xe_device_mem_access_put(gt_to_xe(gt)); return ret; } -- cgit v1.2.3 From 1e80d0c3c44806e6ff885102a937ea838a01f560 Mon Sep 17 00:00:00 2001 From: Riana Tauro Date: Tue, 13 Jun 2023 15:12:32 +0530 Subject: drm/xe: Fix GT looping for standalone media gt_count is only being incremented when initializing the primary GT; since the media GT sets the ID directly, gt_count is not incremented again, resulting in an incorrect count on MTL. Use autoincrement while assigning the media GTs ID to ensure gt_count is correct on MTL and other future platforms with standalone media. Signed-off-by: Riana Tauro Link: https://lore.kernel.org/r/20230613094232.3703549-1-riana.tauro@intel.com [mattrope: Tweaked commit message to focus on gt_count importance] Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 208dc7a63f88..71be80274683 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -569,7 +569,7 @@ static int xe_info_init(struct xe_device *xe, * up with platforms that support both together. */ drm_WARN_ON(&xe->drm, id != 0); - gt->info.id = 1; + gt->info.id = xe->info.gt_count++; } return 0; -- cgit v1.2.3 From 35cbfe561912874a1f0d4b2ceb5fe890f0f58e46 Mon Sep 17 00:00:00 2001 From: Michał Winiarski Date: Tue, 23 May 2023 15:50:19 +0200 Subject: drm/xe: Fix uninitialized variables MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using uninitialized variables leads to undefined behavior. Moreover, it causes the compiler to complain with: ../drivers/gpu/drm/xe/xe_vm.c:3265:40: error: variable 'vma' is uninitialized when used here [-Werror,-Wuninitialized] ../drivers/gpu/drm/xe/xe_rtp.c:118:36: error: variable 'i' is uninitialized when used here [-Werror,-Wuninitialized] ../drivers/gpu/drm/xe/xe_mocs.c:449:3: error: variable 'flags' is uninitialized when used here [-Werror,-Wuninitialized] Signed-off-by: Michał Winiarski Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230523135020.345596-1-michal@hardline.pl Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_mocs.c | 2 +- drivers/gpu/drm/xe/xe_rtp.c | 2 +- drivers/gpu/drm/xe/xe_vm.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index 86277ecb749b..ccc852500eda 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -373,7 +373,7 @@ static const struct xe_mocs_entry mtl_mocs_desc[] = { static unsigned int get_mocs_settings(struct xe_device *xe, struct xe_mocs_info *info) { - unsigned int flags; + unsigned int flags = 0; memset(info, 0, sizeof(struct xe_mocs_info)); diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index 956bd39fe1a0..8aae34df3801 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -136,7 +136,7 @@ static bool rtp_process_one_sr(const struct xe_rtp_entry_sr *entry, if (!rule_matches(xe, gt, hwe, entry->rules, entry->n_rules)) return false; - for (action = &entry->actions[0]; i < entry->n_actions; action++, i++) { + for (i = 0, action = &entry->actions[0]; i < entry->n_actions; action++, i++) { if ((entry->flags & XE_RTP_ENTRY_FLAG_FOREACH_ENGINE) || (action->flags & XE_RTP_ACTION_FLAG_ENGINE_BASE)) mmio_base = hwe->mmio_base; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index eb2209d2d1cd..b8a0fe24d1d0 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3297,7 +3297,7 @@ destroy_vmas: if (!vmas[i]) break; - list_for_each_entry_safe(vma, next, &vma->unbind_link, + list_for_each_entry_safe(vma, next, &vmas[i]->unbind_link, unbind_link) { list_del_init(&vma->unbind_link); if (!vma->destroyed) { -- cgit v1.2.3 From d0e2dd764a6d55cff35e9f609b724fcc62469ba6 Mon Sep 17 00:00:00 2001 From: Michał Winiarski Date: Tue, 23 May 2023 15:50:20 +0200 Subject: drm/xe: Fix check for platform without geometry pipeline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's not possible for the condition checking if we're running on platform without geometry pipeline to ever be true, since gt->fuse_topo.g_dss_mask is an array. It also breaks the build: ../drivers/gpu/drm/xe/xe_rtp.c:183:50: error: address of array 'gt->fuse_topo.g_dss_mask' will always evaluate to 'true' [-Werror,-Wpointer-bool-conversion] Signed-off-by: Michał Winiarski Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230523135020.345596-2-michal@hardline.pl Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_topology.c | 5 +++++ drivers/gpu/drm/xe/xe_gt_topology.h | 2 ++ drivers/gpu/drm/xe/xe_rtp.c | 2 +- 3 files changed, 8 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c index 7c3e347e4d74..d4bbd0a835c2 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.c +++ b/drivers/gpu/drm/xe/xe_gt_topology.c @@ -128,6 +128,11 @@ xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum) return find_next_bit(mask, XE_MAX_DSS_FUSE_BITS, groupnum * groupsize); } +bool xe_dss_mask_empty(const xe_dss_mask_t mask) +{ + return bitmap_empty(mask, XE_MAX_DSS_FUSE_BITS); +} + /** * xe_gt_topology_has_dss_in_quadrant - check fusing of DSS in GT quadrant * @gt: GT to check diff --git a/drivers/gpu/drm/xe/xe_gt_topology.h b/drivers/gpu/drm/xe/xe_gt_topology.h index 5f35deed9128..d1b54fb52ea6 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.h +++ b/drivers/gpu/drm/xe/xe_gt_topology.h @@ -17,6 +17,8 @@ void xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p); unsigned int xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum); +bool xe_dss_mask_empty(const xe_dss_mask_t mask); + bool xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad); diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index 8aae34df3801..fb44cc7521d8 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -315,7 +315,7 @@ bool xe_rtp_match_first_gslice_fused_off(const struct xe_gt *gt, unsigned int dss_per_gslice = 4; unsigned int dss; - if (drm_WARN(>_to_xe(gt)->drm, !gt->fuse_topo.g_dss_mask, + if (drm_WARN(>_to_xe(gt)->drm, xe_dss_mask_empty(gt->fuse_topo.g_dss_mask), "Checking gslice for platform without geometry pipeline\n")) return false; -- cgit v1.2.3 From ff063430caa810f2195d2390e79a990eb101c527 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 8 Jun 2023 11:12:17 -0700 Subject: drm/xe/mtl: Add some initial MTL workarounds This adds a handful of workarounds that apply to production steppings of MTL: - Wa_14018575942 - Wa_22016670082 - Wa_14017856879 - Wa_18019271663 Wa_22016670082 is currently only applied to the primary GT at the moment, but may need to be extended to the media GT in the future if a pending update to the workaround database gets finalized. OOB workarounds will need to be implemented separately in future patches for Wa_14016712196, Wa_16018063123, and Wa_18013179988. Reviewed-by: Radhakrishna Sripada Link: https://lore.kernel.org/r/20230608181217.2385932-1-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 8 ++++++++ drivers/gpu/drm/xe/xe_wa.c | 31 +++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 76c09526690e..d2a0a5c8b02a 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -133,6 +133,9 @@ #define VFG_PREEMPTION_CHICKEN XE_REG(0x83b4, XE_REG_OPTION_MASKED) #define POLYGON_TRIFAN_LINELOOP_DISABLE REG_BIT(4) +#define SQCNT1 XE_REG_MCR(0x8718) +#define ENFORCE_RAR REG_BIT(23) + #define XEHP_SQCM XE_REG_MCR(0x8724) #define EN_32B_ACCESS REG_BIT(30) @@ -268,7 +271,9 @@ #define RENDER_MOD_CTRL XE_REG_MCR(0xcf2c) #define COMP_MOD_CTRL XE_REG_MCR(0xcf30) #define XEHP_VDBX_MOD_CTRL XE_REG_MCR(0xcf34) +#define XELPMP_VDBX_MOD_CTRL XE_REG(0xcf34) #define XEHP_VEBX_MOD_CTRL XE_REG_MCR(0xcf38) +#define XELPMP_VEBX_MOD_CTRL XE_REG(0xcf38) #define FORCE_MISS_FTLB REG_BIT(3) #define XEHP_GAMSTLB_CTRL XE_REG_MCR(0xcf4c) @@ -302,6 +307,9 @@ #define THREAD_EX_ARB_MODE REG_GENMASK(3, 2) #define THREAD_EX_ARB_MODE_RR_AFTER_DEP REG_FIELD_PREP(THREAD_EX_ARB_MODE, 0x2) +#define ROW_CHICKEN3 XE_REG_MCR(0xe49c, XE_REG_OPTION_MASKED) +#define DIS_FIX_EOT1_FLUSH REG_BIT(9) + #define ROW_CHICKEN XE_REG_MCR(0xe4f0, XE_REG_OPTION_MASKED) #define UGM_BACKUP_MODE REG_BIT(13) #define MDQ_ARBITRATION_MODE REG_BIT(12) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index e5b3ff669465..5eaa9bed9d12 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -236,6 +236,22 @@ static const struct xe_rtp_entry_sr gt_was[] = { XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271), GRAPHICS_STEP(A0, B0)), XE_RTP_ACTIONS(CLR(MISCCPCTL, DOP_CLOCK_GATE_RENDER_ENABLE)) }, + { XE_RTP_NAME("14018575942"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271)), + XE_RTP_ACTIONS(SET(RENDER_MOD_CTRL, FORCE_MISS_FTLB), + SET(COMP_MOD_CTRL, FORCE_MISS_FTLB)) + }, + { XE_RTP_NAME("22016670082"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271)), + XE_RTP_ACTIONS(SET(SQCNT1, ENFORCE_RAR)) + }, + + /* Xe_LPM+ */ + { XE_RTP_NAME("14018575942"), + XE_RTP_RULES(MEDIA_VERSION(1300)), + XE_RTP_ACTIONS(SET(XELPMP_VDBX_MOD_CTRL, FORCE_MISS_FTLB), + SET(XELPMP_VEBX_MOD_CTRL, FORCE_MISS_FTLB)) + }, {} }; @@ -502,6 +518,14 @@ static const struct xe_rtp_entry_sr engine_was[] = { GRAPHICS_STEP(B0, C0)), XE_RTP_ACTIONS(SET(CACHE_MODE_SS, DISABLE_ECC)) }, + + /* Xe_LPG */ + { XE_RTP_NAME("14017856879"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(ROW_CHICKEN3, DIS_FIX_EOT1_FLUSH)) + }, + {} }; @@ -580,6 +604,13 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_RULES(PLATFORM(DG2)), XE_RTP_ACTIONS(SET(CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE)) }, + + /* Xe_LPG */ + { XE_RTP_NAME("18019271663"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271)), + XE_RTP_ACTIONS(SET(CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE)) + }, + {} }; -- cgit v1.2.3 From ab10e976fbda8349163ceee2ce99b2bfc97031b8 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Wed, 14 Jun 2023 10:47:54 -0700 Subject: drm/xe: limit GGTT size to GUC_GGTT_TOP The GuC can't access addresses above GUC_GGTT_TOP, so any GuC-accessible objects can't be mapped above that offset. Instead of checking each object to see if GuC may access it or not before mapping it, we just limit the GGTT size to GUC_GGTT_TOP. This wastes a bit of address space (about ~18 MBs, which is in addition to what already removed at the bottom of the GGTT), but it is a good tradeoff to keep the code simple. The in-code comment has also been updated to explain the limitation. Signed-off-by: Daniele Ceraolo Spurio Reviewed-by: Matthew Auld Link: https://lore.kernel.org/r/20230615002521.2587250-1-daniele.ceraolospurio@intel.com/ Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ggtt.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 14b6d68a6324..0722c49585a0 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -24,6 +24,9 @@ #define MTL_GGTT_PTE_PAT0 BIT_ULL(52) #define MTL_GGTT_PTE_PAT1 BIT_ULL(53) +/* GuC addresses above GUC_GGTT_TOP also don't map through the GTT */ +#define GUC_GGTT_TOP 0xFEE00000 + u64 xe_ggtt_pte_encode(struct xe_bo *bo, u64 bo_offset) { struct xe_device *xe = xe_bo_device(bo); @@ -111,12 +114,18 @@ int xe_ggtt_init_noalloc(struct xe_ggtt *ggtt) /* * 8B per entry, each points to a 4KB page. * - * The GuC owns the WOPCM space, thus we can't allocate GGTT address in - * this area. Even though we likely configure the WOPCM to less than the - * maximum value, to simplify the driver load (no need to fetch HuC + - * GuC firmwares and determine there sizes before initializing the GGTT) - * just start the GGTT allocation above the max WOPCM size. This might - * waste space in the GGTT (WOPCM is 2MB on modern platforms) but we can + * The GuC address space is limited on both ends of the GGTT, because + * the GuC shim HW redirects accesses to those addresses to other HW + * areas instead of going through the GGTT. On the bottom end, the GuC + * can't access offsets below the WOPCM size, while on the top side the + * limit is fixed at GUC_GGTT_TOP. To keep things simple, instead of + * checking each object to see if they are accessed by GuC or not, we + * just exclude those areas from the allocator. Additionally, to + * simplify the driver load, we use the maximum WOPCM size in this logic + * instead of the programmed one, so we don't need to wait until the + * actual size to be programmed is determined (which requires FW fetch) + * before initializing the GGTT. These simplifications might waste space + * in the GGTT (about 20-25 MBs depending on the platform) but we can * live with this. * * Another benifit of this is the GuC bootrom can't access anything @@ -125,6 +134,9 @@ int xe_ggtt_init_noalloc(struct xe_ggtt *ggtt) * Starting the GGTT allocations above the WOPCM max give us the correct * placement for free. */ + if (ggtt->size > GUC_GGTT_TOP) + ggtt->size = GUC_GGTT_TOP; + drm_mm_init(&ggtt->mm, xe_wopcm_size(xe), ggtt->size - xe_wopcm_size(xe)); mutex_init(&ggtt->lock); -- cgit v1.2.3 From ee6ad13705286b19f5ffc19000b1d1574208efc9 Mon Sep 17 00:00:00 2001 From: Janga Rahul Kumar Date: Tue, 13 Jun 2023 15:07:40 +0530 Subject: drm/Xe: Use EOPNOTSUPP instead of ENOTSUPP ENOTSUPP is not a standard Unix error should use EOPNOTSUPP instead. v2: Update commit description (Aravind) Reviewed-by: Aravind Iddamsetty Signed-off-by: Janga Rahul Kumar Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_engine.c | 2 +- drivers/gpu/drm/xe/xe_mmio.c | 4 ++-- drivers/gpu/drm/xe/xe_sync.c | 6 +++--- drivers/gpu/drm/xe/xe_vm.c | 8 ++++---- drivers/gpu/drm/xe/xe_wait_user_fence.c | 2 +- 5 files changed, 11 insertions(+), 11 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c index fd39da859442..49d92f089242 100644 --- a/drivers/gpu/drm/xe/xe_engine.c +++ b/drivers/gpu/drm/xe/xe_engine.c @@ -612,7 +612,7 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_ERR(xe, e->vm && xe_vm_in_compute_mode(e->vm) != !!(e->flags & ENGINE_FLAG_COMPUTE_MODE))) { - err = -ENOTSUPP; + err = -EOPNOTSUPP; goto put_engine; } diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index f7a7f996b37f..f1336803b915 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -495,7 +495,7 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data, fallthrough; case DRM_XE_MMIO_8BIT: /* TODO */ case DRM_XE_MMIO_16BIT: /* TODO */ - ret = -ENOTSUPP; + ret = -EOPNOTSUPP; goto exit; } } @@ -513,7 +513,7 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data, fallthrough; case DRM_XE_MMIO_8BIT: /* TODO */ case DRM_XE_MMIO_16BIT: /* TODO */ - ret = -ENOTSUPP; + ret = -EOPNOTSUPP; } } diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index 5acb37a8b2ab..7786b908a3fd 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -120,7 +120,7 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, switch (sync_in.flags & SYNC_FLAGS_TYPE_MASK) { case DRM_XE_SYNC_SYNCOBJ: if (XE_IOCTL_ERR(xe, no_dma_fences && signal)) - return -ENOTSUPP; + return -EOPNOTSUPP; if (XE_IOCTL_ERR(xe, upper_32_bits(sync_in.addr))) return -EINVAL; @@ -138,7 +138,7 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, case DRM_XE_SYNC_TIMELINE_SYNCOBJ: if (XE_IOCTL_ERR(xe, no_dma_fences && signal)) - return -ENOTSUPP; + return -EOPNOTSUPP; if (XE_IOCTL_ERR(xe, upper_32_bits(sync_in.addr))) return -EINVAL; @@ -173,7 +173,7 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, case DRM_XE_SYNC_USER_FENCE: if (XE_IOCTL_ERR(xe, !signal)) - return -ENOTSUPP; + return -EOPNOTSUPP; if (XE_IOCTL_ERR(xe, sync_in.addr & 0x7)) return -EINVAL; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index b8a0fe24d1d0..6edac7d4af87 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1819,10 +1819,10 @@ static int vm_set_error_capture_address(struct xe_device *xe, struct xe_vm *vm, return -EINVAL; if (XE_IOCTL_ERR(xe, !(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS))) - return -ENOTSUPP; + return -EOPNOTSUPP; if (XE_IOCTL_ERR(xe, vm->async_ops.error_capture.addr)) - return -ENOTSUPP; + return -EOPNOTSUPP; vm->async_ops.error_capture.mm = current->mm; vm->async_ops.error_capture.addr = value; @@ -3072,7 +3072,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) if (VM_BIND_OP(bind_ops[0].op) == XE_VM_BIND_OP_RESTART) { if (XE_IOCTL_ERR(xe, !(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS))) - err = -ENOTSUPP; + err = -EOPNOTSUPP; if (XE_IOCTL_ERR(xe, !err && args->num_syncs)) err = EINVAL; if (XE_IOCTL_ERR(xe, !err && !vm->async_ops.error)) @@ -3096,7 +3096,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) if (XE_IOCTL_ERR(xe, !vm->async_ops.error && async != !!(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS))) { - err = -ENOTSUPP; + err = -EOPNOTSUPP; goto put_engine; } diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c index 6c8a60c60087..3122374341d6 100644 --- a/drivers/gpu/drm/xe/xe_wait_user_fence.c +++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c @@ -143,7 +143,7 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_ERR(xe, !vm->async_ops.error_capture.addr)) { xe_vm_put(vm); - return -ENOTSUPP; + return -EOPNOTSUPP; } addr = vm->async_ops.error_capture.addr; -- cgit v1.2.3 From 37430402618db90b53aa782a6c49f66ab0efced0 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 15 Jun 2023 11:22:36 -0700 Subject: drm/xe: NULL binding implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add uAPI and implementation for NULL bindings. A NULL binding is defined as writes dropped and read zero. A single bit in the uAPI has been added which results in a single bit in the PTEs being set. NULL bindings are intendedd to be used to implement VK sparse bindings, in particular residencyNonResidentStrict property. v2: Fix BUG_ON shown in VK testing, fix check patch warning, fix xe_pt_scan_64K, update __gen8_pte_encode to understand NULL bindings, remove else if vma_addr Reviewed-by: Thomas Hellström Suggested-by: Paulo Zanoni Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.h | 1 + drivers/gpu/drm/xe/xe_exec.c | 2 + drivers/gpu/drm/xe/xe_gt_pagefault.c | 4 +- drivers/gpu/drm/xe/xe_pt.c | 54 ++++++++++++++------ drivers/gpu/drm/xe/xe_vm.c | 99 +++++++++++++++++++++++------------- drivers/gpu/drm/xe/xe_vm.h | 12 ++++- drivers/gpu/drm/xe/xe_vm_types.h | 1 + include/uapi/drm/xe_drm.h | 8 +++ 8 files changed, 126 insertions(+), 55 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index dd3d448fee0b..3a148cc6e811 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -61,6 +61,7 @@ #define XE_PPGTT_PTE_LM BIT_ULL(11) #define XE_PDE_64K BIT_ULL(6) #define XE_PTE_PS64 BIT_ULL(8) +#define XE_PTE_NULL BIT_ULL(9) #define XE_PAGE_PRESENT BIT_ULL(0) #define XE_PAGE_RW BIT_ULL(1) diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index e44076ee2e11..4f7694a29348 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -120,6 +120,8 @@ retry: * to a location where the GPU can access it). */ list_for_each_entry(vma, &vm->rebind_list, rebind_link) { + XE_WARN_ON(xe_vma_is_null(vma)); + if (xe_vma_is_userptr(vma)) continue; diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 5436667ba82b..9dd8e5097e65 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -533,8 +533,8 @@ static int handle_acc(struct xe_gt *gt, struct acc *acc) trace_xe_vma_acc(vma); - /* Userptr can't be migrated, nothing to do */ - if (xe_vma_is_userptr(vma)) + /* Userptr or null can't be migrated, nothing to do */ + if (xe_vma_has_no_bo(vma)) goto unlock_vm; /* Lock VM and BOs dma-resv */ diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 29c1b1f0bd7c..fe1c77b139e4 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -81,6 +81,9 @@ u64 xe_pde_encode(struct xe_bo *bo, u64 bo_offset, static dma_addr_t vma_addr(struct xe_vma *vma, u64 offset, size_t page_size, bool *is_vram) { + if (xe_vma_is_null(vma)) + return 0; + if (xe_vma_is_userptr(vma)) { struct xe_res_cursor cur; u64 page; @@ -105,6 +108,9 @@ static u64 __pte_encode(u64 pte, enum xe_cache_level cache, u32 flags, if (unlikely(flags & XE_PTE_FLAG_READ_ONLY)) pte &= ~XE_PAGE_RW; + if (unlikely(flags & XE_PTE_FLAG_NULL)) + pte |= XE_PTE_NULL; + /* FIXME: I don't think the PPAT handling is correct for MTL */ switch (cache) { @@ -557,6 +563,10 @@ static bool xe_pt_hugepte_possible(u64 addr, u64 next, unsigned int level, if (next - xe_walk->va_curs_start > xe_walk->curs->size) return false; + /* null VMA's do not have dma addresses */ + if (xe_walk->pte_flags & XE_PTE_FLAG_NULL) + return true; + /* Is the DMA address huge PTE size aligned? */ size = next - addr; dma = addr - xe_walk->va_curs_start + xe_res_dma(xe_walk->curs); @@ -579,6 +589,10 @@ xe_pt_scan_64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk) if (next > xe_walk->l0_end_addr) return false; + /* null VMA's do not have dma addresses */ + if (xe_walk->pte_flags & XE_PTE_FLAG_NULL) + return true; + xe_res_next(&curs, addr - xe_walk->va_curs_start); for (; addr < next; addr += SZ_64K) { if (!IS_ALIGNED(xe_res_dma(&curs), SZ_64K) || curs.size < SZ_64K) @@ -629,10 +643,12 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, /* Is this a leaf entry ?*/ if (level == 0 || xe_pt_hugepte_possible(addr, next, level, xe_walk)) { struct xe_res_cursor *curs = xe_walk->curs; + bool is_null = xe_walk->pte_flags & XE_PTE_FLAG_NULL; XE_WARN_ON(xe_walk->va_curs_start != addr); - pte = __pte_encode(xe_res_dma(curs) + xe_walk->dma_offset, + pte = __pte_encode(is_null ? 0 : + xe_res_dma(curs) + xe_walk->dma_offset, xe_walk->cache, xe_walk->pte_flags, level); pte |= xe_walk->default_pte; @@ -652,7 +668,8 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, if (unlikely(ret)) return ret; - xe_res_next(curs, next - addr); + if (!is_null) + xe_res_next(curs, next - addr); xe_walk->va_curs_start = next; *action = ACTION_CONTINUE; @@ -759,24 +776,29 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, xe_walk.dma_offset = vram_region_gpu_offset(bo->ttm.resource); xe_walk.cache = XE_CACHE_WB; } else { - if (!xe_vma_is_userptr(vma) && bo->flags & XE_BO_SCANOUT_BIT) + if (!xe_vma_has_no_bo(vma) && bo->flags & XE_BO_SCANOUT_BIT) xe_walk.cache = XE_CACHE_WT; else xe_walk.cache = XE_CACHE_WB; } - if (!xe_vma_is_userptr(vma) && xe_bo_is_stolen(bo)) + if (!xe_vma_has_no_bo(vma) && xe_bo_is_stolen(bo)) xe_walk.dma_offset = xe_ttm_stolen_gpu_offset(xe_bo_device(bo)); xe_bo_assert_held(bo); - if (xe_vma_is_userptr(vma)) - xe_res_first_sg(vma->userptr.sg, 0, vma->end - vma->start + 1, - &curs); - else if (xe_bo_is_vram(bo) || xe_bo_is_stolen(bo)) - xe_res_first(bo->ttm.resource, vma->bo_offset, - vma->end - vma->start + 1, &curs); - else - xe_res_first_sg(xe_bo_get_sg(bo), vma->bo_offset, - vma->end - vma->start + 1, &curs); + + if (!xe_vma_is_null(vma)) { + if (xe_vma_is_userptr(vma)) + xe_res_first_sg(vma->userptr.sg, 0, + vma->end - vma->start + 1, &curs); + else if (xe_bo_is_vram(bo) || xe_bo_is_stolen(bo)) + xe_res_first(bo->ttm.resource, vma->bo_offset, + vma->end - vma->start + 1, &curs); + else + xe_res_first_sg(xe_bo_get_sg(bo), vma->bo_offset, + vma->end - vma->start + 1, &curs); + } else { + curs.size = vma->end - vma->start + 1; + } ret = xe_pt_walk_range(&pt->base, pt->level, vma->start, vma->end + 1, &xe_walk.base); @@ -965,7 +987,7 @@ static void xe_pt_commit_locks_assert(struct xe_vma *vma) if (xe_vma_is_userptr(vma)) lockdep_assert_held_read(&vm->userptr.notifier_lock); - else + else if (!xe_vma_is_null(vma)) dma_resv_assert_held(vma->bo->ttm.base.resv); dma_resv_assert_held(&vm->resv); @@ -1341,7 +1363,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e, DMA_RESV_USAGE_KERNEL : DMA_RESV_USAGE_BOOKKEEP); - if (!xe_vma_is_userptr(vma) && !vma->bo->vm) + if (!xe_vma_has_no_bo(vma) && !vma->bo->vm) dma_resv_add_fence(vma->bo->ttm.base.resv, fence, DMA_RESV_USAGE_BOOKKEEP); xe_pt_commit_bind(vma, entries, num_entries, rebind, @@ -1658,7 +1680,7 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e DMA_RESV_USAGE_BOOKKEEP); /* This fence will be installed by caller when doing eviction */ - if (!xe_vma_is_userptr(vma) && !vma->bo->vm) + if (!xe_vma_has_no_bo(vma) && !vma->bo->vm) dma_resv_add_fence(vma->bo->ttm.base.resv, fence, DMA_RESV_USAGE_BOOKKEEP); xe_pt_commit_unbind(vma, entries, num_entries, diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 6edac7d4af87..5ac819a65cf1 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -590,7 +590,7 @@ retry: goto out_unlock; list_for_each_entry(vma, &vm->rebind_list, rebind_link) { - if (xe_vma_is_userptr(vma) || vma->destroyed) + if (xe_vma_has_no_bo(vma) || vma->destroyed) continue; err = xe_bo_validate(vma->bo, vm, false); @@ -843,6 +843,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, u64 bo_offset_or_userptr, u64 start, u64 end, bool read_only, + bool is_null, u64 tile_mask) { struct xe_vma *vma; @@ -868,8 +869,11 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, vma->vm = vm; vma->start = start; vma->end = end; + vma->pte_flags = 0; if (read_only) - vma->pte_flags = XE_PTE_FLAG_READ_ONLY; + vma->pte_flags |= XE_PTE_FLAG_READ_ONLY; + if (is_null) + vma->pte_flags |= XE_PTE_FLAG_NULL; if (tile_mask) { vma->tile_mask = tile_mask; @@ -886,23 +890,26 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, vma->bo_offset = bo_offset_or_userptr; vma->bo = xe_bo_get(bo); list_add_tail(&vma->bo_link, &bo->vmas); - } else /* userptr */ { - u64 size = end - start + 1; - int err; + } else /* userptr or null */ { + if (!is_null) { + u64 size = end - start + 1; + int err; - vma->userptr.ptr = bo_offset_or_userptr; + vma->userptr.ptr = bo_offset_or_userptr; - err = mmu_interval_notifier_insert(&vma->userptr.notifier, - current->mm, - vma->userptr.ptr, size, - &vma_userptr_notifier_ops); - if (err) { - kfree(vma); - vma = ERR_PTR(err); - return vma; + err = mmu_interval_notifier_insert(&vma->userptr.notifier, + current->mm, + vma->userptr.ptr, size, + &vma_userptr_notifier_ops); + if (err) { + kfree(vma); + vma = ERR_PTR(err); + return vma; + } + + vma->userptr.notifier_seq = LONG_MAX; } - vma->userptr.notifier_seq = LONG_MAX; xe_vm_get(vm); } @@ -942,6 +949,8 @@ static void xe_vma_destroy_late(struct xe_vma *vma) */ mmu_interval_notifier_remove(&vma->userptr.notifier); xe_vm_put(vm); + } else if (xe_vma_is_null(vma)) { + xe_vm_put(vm); } else { xe_bo_put(vma->bo); } @@ -1024,7 +1033,7 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) list_del_init(&vma->userptr.invalidate_link); spin_unlock(&vm->userptr.invalidated_lock); list_del(&vma->userptr_link); - } else { + } else if (!xe_vma_is_null(vma)) { xe_bo_assert_held(vma->bo); list_del(&vma->bo_link); @@ -1393,7 +1402,7 @@ void xe_vm_close_and_put(struct xe_vm *vm) while (vm->vmas.rb_node) { struct xe_vma *vma = to_xe_vma(vm->vmas.rb_node); - if (xe_vma_is_userptr(vma)) { + if (xe_vma_has_no_bo(vma)) { down_read(&vm->userptr.notifier_lock); vma->destroyed = true; up_read(&vm->userptr.notifier_lock); @@ -1402,7 +1411,7 @@ void xe_vm_close_and_put(struct xe_vm *vm) rb_erase(&vma->vm_node, &vm->vmas); /* easy case, remove from VMA? */ - if (xe_vma_is_userptr(vma) || vma->bo->vm) { + if (xe_vma_has_no_bo(vma) || vma->bo->vm) { xe_vma_destroy(vma, NULL); continue; } @@ -2036,7 +2045,7 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma, XE_BUG_ON(region > ARRAY_SIZE(region_to_mem_type)); - if (!xe_vma_is_userptr(vma)) { + if (!xe_vma_has_no_bo(vma)) { err = xe_bo_migrate(vma->bo, region_to_mem_type[region]); if (err) return err; @@ -2645,6 +2654,8 @@ static struct xe_vma *vm_unbind_lookup_vmas(struct xe_vm *vm, lookup->start - 1, (first->pte_flags & XE_PTE_FLAG_READ_ONLY), + (first->pte_flags & + XE_PTE_FLAG_NULL), first->tile_mask); if (first->bo) xe_bo_unlock(first->bo, &ww); @@ -2652,7 +2663,7 @@ static struct xe_vma *vm_unbind_lookup_vmas(struct xe_vm *vm, err = -ENOMEM; goto unwind; } - if (!first->bo) { + if (xe_vma_is_userptr(first)) { err = xe_vma_userptr_pin_pages(new_first); if (err) goto unwind; @@ -2677,6 +2688,7 @@ static struct xe_vma *vm_unbind_lookup_vmas(struct xe_vm *vm, last->end, (last->pte_flags & XE_PTE_FLAG_READ_ONLY), + (last->pte_flags & XE_PTE_FLAG_NULL), last->tile_mask); if (last->bo) xe_bo_unlock(last->bo, &ww); @@ -2684,7 +2696,7 @@ static struct xe_vma *vm_unbind_lookup_vmas(struct xe_vm *vm, err = -ENOMEM; goto unwind; } - if (!last->bo) { + if (xe_vma_is_userptr(last)) { err = xe_vma_userptr_pin_pages(new_last); if (err) goto unwind; @@ -2744,7 +2756,7 @@ static struct xe_vma *vm_prefetch_lookup_vmas(struct xe_vm *vm, *next; struct rb_node *node; - if (!xe_vma_is_userptr(vma)) { + if (!xe_vma_has_no_bo(vma)) { if (!xe_bo_can_migrate(vma->bo, region_to_mem_type[region])) return ERR_PTR(-EINVAL); } @@ -2753,7 +2765,7 @@ static struct xe_vma *vm_prefetch_lookup_vmas(struct xe_vm *vm, while ((node = rb_next(node))) { if (!xe_vma_cmp_vma_cb(lookup, node)) { __vma = to_xe_vma(node); - if (!xe_vma_is_userptr(__vma)) { + if (!xe_vma_has_no_bo(__vma)) { if (!xe_bo_can_migrate(__vma->bo, region_to_mem_type[region])) goto flush_list; } @@ -2767,7 +2779,7 @@ static struct xe_vma *vm_prefetch_lookup_vmas(struct xe_vm *vm, while ((node = rb_prev(node))) { if (!xe_vma_cmp_vma_cb(lookup, node)) { __vma = to_xe_vma(node); - if (!xe_vma_is_userptr(__vma)) { + if (!xe_vma_has_no_bo(__vma)) { if (!xe_bo_can_migrate(__vma->bo, region_to_mem_type[region])) goto flush_list; } @@ -2826,21 +2838,23 @@ static struct xe_vma *vm_bind_ioctl_lookup_vma(struct xe_vm *vm, switch (VM_BIND_OP(op)) { case XE_VM_BIND_OP_MAP: - XE_BUG_ON(!bo); - - err = xe_bo_lock(bo, &ww, 0, true); - if (err) - return ERR_PTR(err); + if (bo) { + err = xe_bo_lock(bo, &ww, 0, true); + if (err) + return ERR_PTR(err); + } vma = xe_vma_create(vm, bo, bo_offset_or_userptr, addr, addr + range - 1, op & XE_VM_BIND_FLAG_READONLY, + op & XE_VM_BIND_FLAG_NULL, tile_mask); - xe_bo_unlock(bo, &ww); + if (bo) + xe_bo_unlock(bo, &ww); if (!vma) return ERR_PTR(-ENOMEM); xe_vm_insert_vma(vm, vma); - if (!bo->vm) { + if (bo && !bo->vm) { vm_insert_extobj(vm, vma); err = add_preempt_fences(vm, bo); if (err) { @@ -2874,6 +2888,7 @@ static struct xe_vma *vm_bind_ioctl_lookup_vma(struct xe_vm *vm, vma = xe_vma_create(vm, NULL, bo_offset_or_userptr, addr, addr + range - 1, op & XE_VM_BIND_FLAG_READONLY, + op & XE_VM_BIND_FLAG_NULL, tile_mask); if (!vma) return ERR_PTR(-ENOMEM); @@ -2899,11 +2914,12 @@ static struct xe_vma *vm_bind_ioctl_lookup_vma(struct xe_vm *vm, #ifdef TEST_VM_ASYNC_OPS_ERROR #define SUPPORTED_FLAGS \ (FORCE_ASYNC_OP_ERROR | XE_VM_BIND_FLAG_ASYNC | \ - XE_VM_BIND_FLAG_READONLY | XE_VM_BIND_FLAG_IMMEDIATE | 0xffff) + XE_VM_BIND_FLAG_READONLY | XE_VM_BIND_FLAG_IMMEDIATE | \ + XE_VM_BIND_FLAG_NULL | 0xffff) #else #define SUPPORTED_FLAGS \ (XE_VM_BIND_FLAG_ASYNC | XE_VM_BIND_FLAG_READONLY | \ - XE_VM_BIND_FLAG_IMMEDIATE | 0xffff) + XE_VM_BIND_FLAG_IMMEDIATE | XE_VM_BIND_FLAG_NULL | 0xffff) #endif #define XE_64K_PAGE_MASK 0xffffull @@ -2951,6 +2967,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, u32 obj = (*bind_ops)[i].obj; u64 obj_offset = (*bind_ops)[i].obj_offset; u32 region = (*bind_ops)[i].region; + bool is_null = op & XE_VM_BIND_FLAG_NULL; if (XE_IOCTL_ERR(xe, (*bind_ops)[i].pad) || XE_IOCTL_ERR(xe, (*bind_ops)[i].reserved[0] || @@ -2984,8 +3001,13 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, if (XE_IOCTL_ERR(xe, VM_BIND_OP(op) > XE_VM_BIND_OP_PREFETCH) || XE_IOCTL_ERR(xe, op & ~SUPPORTED_FLAGS) || + XE_IOCTL_ERR(xe, obj && is_null) || + XE_IOCTL_ERR(xe, obj_offset && is_null) || + XE_IOCTL_ERR(xe, VM_BIND_OP(op) != XE_VM_BIND_OP_MAP && + is_null) || XE_IOCTL_ERR(xe, !obj && - VM_BIND_OP(op) == XE_VM_BIND_OP_MAP) || + VM_BIND_OP(op) == XE_VM_BIND_OP_MAP && + !is_null) || XE_IOCTL_ERR(xe, !obj && VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL) || XE_IOCTL_ERR(xe, addr && @@ -3390,6 +3412,7 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) int ret; XE_BUG_ON(!xe_vm_in_fault_mode(vma->vm)); + XE_WARN_ON(xe_vma_is_null(vma)); trace_xe_vma_usm_invalidate(vma); /* Check that we don't race with page-table updates */ @@ -3452,8 +3475,11 @@ int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id) for (node = rb_first(&vm->vmas); node; node = rb_next(node)) { struct xe_vma *vma = to_xe_vma(node); bool is_userptr = xe_vma_is_userptr(vma); + bool is_null = xe_vma_is_null(vma); - if (is_userptr) { + if (is_null) { + addr = 0; + } else if (is_userptr) { struct xe_res_cursor cur; if (vma->userptr.sg) { @@ -3468,7 +3494,8 @@ int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id) } drm_printf(p, " [%016llx-%016llx] S:0x%016llx A:%016llx %s\n", vma->start, vma->end, vma->end - vma->start + 1ull, - addr, is_userptr ? "USR" : is_vram ? "VRAM" : "SYS"); + addr, is_null ? "NULL" : is_userptr ? "USR" : + is_vram ? "VRAM" : "SYS"); } up_read(&vm->lock); diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index bb2996856841..5edb7771629c 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -115,11 +115,21 @@ static inline void xe_vm_reactivate_rebind(struct xe_vm *vm) } } -static inline bool xe_vma_is_userptr(struct xe_vma *vma) +static inline bool xe_vma_is_null(struct xe_vma *vma) +{ + return vma->pte_flags & XE_PTE_FLAG_NULL; +} + +static inline bool xe_vma_has_no_bo(struct xe_vma *vma) { return !vma->bo; } +static inline bool xe_vma_is_userptr(struct xe_vma *vma) +{ + return xe_vma_has_no_bo(vma) && !xe_vma_is_null(vma); +} + int xe_vma_userptr_pin_pages(struct xe_vma *vma); int xe_vma_userptr_check_repin(struct xe_vma *vma); diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index a51e84e584b4..9b39c5f64afa 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -31,6 +31,7 @@ struct xe_vma { u64 end; /** @pte_flags: pte flags for this VMA */ #define XE_PTE_FLAG_READ_ONLY BIT(0) +#define XE_PTE_FLAG_NULL BIT(1) u32 pte_flags; /** @bo: BO if not a userptr, must be NULL is userptr */ diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 83868af45984..6a991afc563d 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -418,6 +418,14 @@ struct drm_xe_vm_bind_op { * than differing the MAP to the page fault handler. */ #define XE_VM_BIND_FLAG_IMMEDIATE (0x1 << 18) + /* + * When the NULL flag is set, the page tables are setup with a special + * bit which indicates writes are dropped and all reads return zero. In + * the future, the NULL flags will only be valid for XE_VM_BIND_OP_MAP + * operations, the BO handle MBZ, and the BO offset MBZ. This flag is + * intended to implement VK sparse bindings. + */ +#define XE_VM_BIND_FLAG_NULL (0x1 << 19) /** @reserved: Reserved */ __u64 reserved[2]; -- cgit v1.2.3 From 8ae8a2e8dd21bd8bc94c9817874a97239aa867a2 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Sun, 21 May 2023 18:24:20 -0700 Subject: drm/xe: Long running job update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For long running (LR) jobs with the DRM scheduler we must return NULL in run_job which results in signaling the job's finished fence immediately. This prevents LR jobs from creating infinite dma-fences. Signaling job's finished fence immediately breaks flow controlling ring with the DRM scheduler. To work around this, the ring is flow controlled and written in the exec IOCTL. Signaling job's finished fence immediately also breaks the TDR which is used in reset / cleanup entity paths so write a new path for LR entities. v2: Better commit, white space, remove rmb(), better comment next to emit_job() v3 (Thomas): Change LR reference counting, fix working in commit Reviewed-by: Thomas Hellström Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_engine.c | 32 +++++++++++ drivers/gpu/drm/xe/xe_engine.h | 4 ++ drivers/gpu/drm/xe/xe_exec.c | 8 +++ drivers/gpu/drm/xe/xe_guc_engine_types.h | 2 + drivers/gpu/drm/xe/xe_guc_submit.c | 96 +++++++++++++++++++++++++++----- drivers/gpu/drm/xe/xe_trace.h | 5 ++ 6 files changed, 134 insertions(+), 13 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c index 49d92f089242..1843e886a405 100644 --- a/drivers/gpu/drm/xe/xe_engine.c +++ b/drivers/gpu/drm/xe/xe_engine.c @@ -18,6 +18,7 @@ #include "xe_macros.h" #include "xe_migrate.h" #include "xe_pm.h" +#include "xe_ring_ops_types.h" #include "xe_trace.h" #include "xe_vm.h" @@ -679,6 +680,37 @@ static void engine_kill_compute(struct xe_engine *e) up_write(&e->vm->lock); } +/** + * xe_engine_is_lr() - Whether an engine is long-running + * @e: The engine + * + * Return: True if the engine is long-running, false otherwise. + */ +bool xe_engine_is_lr(struct xe_engine *e) +{ + return e->vm && xe_vm_no_dma_fences(e->vm) && + !(e->flags & ENGINE_FLAG_VM); +} + +static s32 xe_engine_num_job_inflight(struct xe_engine *e) +{ + return e->lrc->fence_ctx.next_seqno - xe_lrc_seqno(e->lrc) - 1; +} + +/** + * xe_engine_ring_full() - Whether an engine's ring is full + * @e: The engine + * + * Return: True if the engine's ring is full, false otherwise. + */ +bool xe_engine_ring_full(struct xe_engine *e) +{ + struct xe_lrc *lrc = e->lrc; + s32 max_job = lrc->ring.size / MAX_JOB_SIZE_BYTES; + + return xe_engine_num_job_inflight(e) >= max_job; +} + /** * xe_engine_is_idle() - Whether an engine is idle. * @engine: The engine diff --git a/drivers/gpu/drm/xe/xe_engine.h b/drivers/gpu/drm/xe/xe_engine.h index b95d9b040877..3017e4fe308d 100644 --- a/drivers/gpu/drm/xe/xe_engine.h +++ b/drivers/gpu/drm/xe/xe_engine.h @@ -42,6 +42,10 @@ static inline bool xe_engine_is_parallel(struct xe_engine *engine) return engine->width > 1; } +bool xe_engine_is_lr(struct xe_engine *e); + +bool xe_engine_ring_full(struct xe_engine *e); + bool xe_engine_is_idle(struct xe_engine *engine); void xe_engine_kill(struct xe_engine *e); diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 4f7694a29348..700f65b66d40 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -14,6 +14,7 @@ #include "xe_device.h" #include "xe_engine.h" #include "xe_macros.h" +#include "xe_ring_ops_types.h" #include "xe_sched_job.h" #include "xe_sync.h" #include "xe_vm.h" @@ -302,6 +303,11 @@ retry: goto err_engine_end; } + if (xe_engine_is_lr(engine) && xe_engine_ring_full(engine)) { + err = -EWOULDBLOCK; + goto err_engine_end; + } + job = xe_sched_job_create(engine, xe_engine_is_parallel(engine) ? addresses : &args->address); if (IS_ERR(job)) { @@ -388,6 +394,8 @@ retry: xe_sync_entry_signal(&syncs[i], job, &job->drm.s_fence->finished); + if (xe_engine_is_lr(engine)) + engine->ring_ops->emit_job(job); xe_sched_job_push(job); xe_vm_reactivate_rebind(vm); diff --git a/drivers/gpu/drm/xe/xe_guc_engine_types.h b/drivers/gpu/drm/xe/xe_guc_engine_types.h index 512615d1ce8c..5565412fe7f1 100644 --- a/drivers/gpu/drm/xe/xe_guc_engine_types.h +++ b/drivers/gpu/drm/xe/xe_guc_engine_types.h @@ -31,6 +31,8 @@ struct xe_guc_engine { */ #define MAX_STATIC_MSG_TYPE 3 struct xe_sched_msg static_msgs[MAX_STATIC_MSG_TYPE]; + /** @lr_tdr: long running TDR worker */ + struct work_struct lr_tdr; /** @fini_async: do final fini async from this worker */ struct work_struct fini_async; /** @resume_time: time of last resume */ diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 7be06320dbd7..9c0fd1368b77 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -483,6 +483,14 @@ static void register_engine(struct xe_engine *e) parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE); } + /* + * We must keep a reference for LR engines if engine is registered with + * the GuC as jobs signal immediately and can't destroy an engine if the + * GuC has a reference to it. + */ + if (xe_engine_is_lr(e)) + xe_engine_get(e); + set_engine_registered(e); trace_xe_engine_register(e); if (xe_engine_is_parallel(e)) @@ -645,6 +653,7 @@ guc_engine_run_job(struct drm_sched_job *drm_job) { struct xe_sched_job *job = to_xe_sched_job(drm_job); struct xe_engine *e = job->engine; + bool lr = xe_engine_is_lr(e); XE_BUG_ON((engine_destroyed(e) || engine_pending_disable(e)) && !engine_banned(e) && !engine_suspended(e)); @@ -654,14 +663,19 @@ guc_engine_run_job(struct drm_sched_job *drm_job) if (!engine_killed_or_banned(e) && !xe_sched_job_is_error(job)) { if (!engine_registered(e)) register_engine(e); - e->ring_ops->emit_job(job); + if (!lr) /* LR jobs are emitted in the exec IOCTL */ + e->ring_ops->emit_job(job); submit_engine(e); } - if (test_and_set_bit(JOB_FLAG_SUBMIT, &job->fence->flags)) + if (lr) { + xe_sched_job_set_error(job, -EOPNOTSUPP); + return NULL; + } else if (test_and_set_bit(JOB_FLAG_SUBMIT, &job->fence->flags)) { return job->fence; - else + } else { return dma_fence_get(job->fence); + } } static void guc_engine_free_job(struct drm_sched_job *drm_job) @@ -764,6 +778,55 @@ static void simple_error_capture(struct xe_engine *e) } #endif +static void xe_guc_engine_trigger_cleanup(struct xe_engine *e) +{ + struct xe_guc *guc = engine_to_guc(e); + + if (xe_engine_is_lr(e)) + queue_work(guc_to_gt(guc)->ordered_wq, &e->guc->lr_tdr); + else + xe_sched_tdr_queue_imm(&e->guc->sched); +} + +static void xe_guc_engine_lr_cleanup(struct work_struct *w) +{ + struct xe_guc_engine *ge = + container_of(w, struct xe_guc_engine, lr_tdr); + struct xe_engine *e = ge->engine; + struct xe_gpu_scheduler *sched = &ge->sched; + + XE_WARN_ON(!xe_engine_is_lr(e)); + trace_xe_engine_lr_cleanup(e); + + /* Kill the run_job / process_msg entry points */ + xe_sched_submission_stop(sched); + + /* Engine state now stable, disable scheduling / deregister if needed */ + if (engine_registered(e)) { + struct xe_guc *guc = engine_to_guc(e); + int ret; + + set_engine_banned(e); + disable_scheduling_deregister(guc, e); + + /* + * Must wait for scheduling to be disabled before signalling + * any fences, if GT broken the GT reset code should signal us. + */ + ret = wait_event_timeout(guc->ct.wq, + !engine_pending_disable(e) || + guc_read_stopped(guc), HZ * 5); + if (!ret) { + XE_WARN_ON("Schedule disable failed to respond"); + xe_sched_submission_start(sched); + xe_gt_reset_async(e->gt); + return; + } + } + + xe_sched_submission_start(sched); +} + static enum drm_gpu_sched_stat guc_engine_timedout_job(struct drm_sched_job *drm_job) { @@ -815,7 +878,7 @@ guc_engine_timedout_job(struct drm_sched_job *drm_job) err = -EIO; set_engine_banned(e); xe_engine_get(e); - disable_scheduling_deregister(engine_to_guc(e), e); + disable_scheduling_deregister(guc, e); /* * Must wait for scheduling to be disabled before signalling @@ -848,7 +911,7 @@ guc_engine_timedout_job(struct drm_sched_job *drm_job) */ xe_sched_add_pending_job(sched, job); xe_sched_submission_start(sched); - xe_sched_tdr_queue_imm(&e->guc->sched); + xe_guc_engine_trigger_cleanup(e); /* Mark all outstanding jobs as bad, thus completing them */ spin_lock(&sched->base.job_list_lock); @@ -872,6 +935,8 @@ static void __guc_engine_fini_async(struct work_struct *w) trace_xe_engine_destroy(e); + if (xe_engine_is_lr(e)) + cancel_work_sync(&ge->lr_tdr); if (e->flags & ENGINE_FLAG_PERSISTENT) xe_device_remove_persistent_engines(gt_to_xe(e->gt), e); release_guc_id(guc, e); @@ -889,7 +954,7 @@ static void guc_engine_fini_async(struct xe_engine *e) bool kernel = e->flags & ENGINE_FLAG_KERNEL; INIT_WORK(&e->guc->fini_async, __guc_engine_fini_async); - queue_work(system_unbound_wq, &e->guc->fini_async); + queue_work(system_wq, &e->guc->fini_async); /* We must block on kernel engines so slabs are empty on driver unload */ if (kernel) { @@ -1080,6 +1145,9 @@ static int guc_engine_init(struct xe_engine *e) goto err_sched; e->priority = XE_ENGINE_PRIORITY_NORMAL; + if (xe_engine_is_lr(e)) + INIT_WORK(&e->guc->lr_tdr, xe_guc_engine_lr_cleanup); + mutex_lock(&guc->submission_state.lock); err = alloc_guc_id(guc, e); @@ -1131,7 +1199,7 @@ static void guc_engine_kill(struct xe_engine *e) { trace_xe_engine_kill(e); set_engine_killed(e); - xe_sched_tdr_queue_imm(&e->guc->sched); + xe_guc_engine_trigger_cleanup(e); } static void guc_engine_add_msg(struct xe_engine *e, struct xe_sched_msg *msg, @@ -1283,10 +1351,11 @@ static void guc_engine_stop(struct xe_guc *guc, struct xe_engine *e) xe_sched_submission_stop(sched); /* Clean up lost G2H + reset engine state */ - if (engine_destroyed(e) && engine_registered(e)) { - if (engine_banned(e)) + if (engine_registered(e)) { + if ((engine_banned(e) && engine_destroyed(e)) || + xe_engine_is_lr(e)) xe_engine_put(e); - else + else if (engine_destroyed(e)) __guc_engine_fini(guc, e); } if (e->guc->suspend_pending) { @@ -1501,7 +1570,8 @@ int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) trace_xe_engine_deregister_done(e); clear_engine_registered(e); - if (engine_banned(e)) + + if (engine_banned(e) || xe_engine_is_lr(e)) xe_engine_put(e); else __guc_engine_fini(guc, e); @@ -1538,7 +1608,7 @@ int xe_guc_engine_reset_handler(struct xe_guc *guc, u32 *msg, u32 len) */ set_engine_reset(e); if (!engine_banned(e)) - xe_sched_tdr_queue_imm(&e->guc->sched); + xe_guc_engine_trigger_cleanup(e); return 0; } @@ -1565,7 +1635,7 @@ int xe_guc_engine_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, /* Treat the same as engine reset */ set_engine_reset(e); if (!engine_banned(e)) - xe_sched_tdr_queue_imm(&e->guc->sched); + xe_guc_engine_trigger_cleanup(e); return 0; } diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index 878ab4115d91..8a5d35f15791 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -220,6 +220,11 @@ DEFINE_EVENT(xe_engine, xe_engine_resubmit, TP_ARGS(e) ); +DEFINE_EVENT(xe_engine, xe_engine_lr_cleanup, + TP_PROTO(struct xe_engine *e), + TP_ARGS(e) +); + DECLARE_EVENT_CLASS(xe_sched_job, TP_PROTO(struct xe_sched_job *job), TP_ARGS(job), -- cgit v1.2.3 From 911cd9b3b4e1d10250987864fa19315c772edf9d Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 12 Apr 2023 18:48:41 -0700 Subject: drm/xe: Ensure LR engines are not persistent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With our ref counting scheme long running (LR) engines only close properly if not persistent, ensure that LR engines are non-persistent. v2: spell out LR Signed-off-by: Matthew Brost Reviewed-by: Thomas Hellström Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_engine.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c index 1843e886a405..e72a94a944d0 100644 --- a/drivers/gpu/drm/xe/xe_engine.c +++ b/drivers/gpu/drm/xe/xe_engine.c @@ -599,7 +599,9 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data, return -ENOENT; e = xe_engine_create(xe, vm, logical_mask, - args->width, hwe, ENGINE_FLAG_PERSISTENT); + args->width, hwe, + xe_vm_no_dma_fences(vm) ? 0 : + ENGINE_FLAG_PERSISTENT); xe_vm_put(vm); if (IS_ERR(e)) return PTR_ERR(e); -- cgit v1.2.3 From 73c09901b0240bb6acdd957330e456e808ec52e6 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 27 Mar 2023 18:34:49 -0700 Subject: drm/xe: Only try to lock external BOs in VM bind We only need to try to lock a BO if it's external as non-external BOs share the dma-resv with the already locked VM. Trying to lock non-external BOs caused an issue (list corruption) in an uncoming patch which adds bulk LRU move. Since this code isn't needed, remove it. v2: New commit message, s/mattthew/matthew/ Reviewed-by: Rodrigo Vivi Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 5ac819a65cf1..c98801ee3f55 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2157,9 +2157,11 @@ static int vm_bind_ioctl(struct xe_vm *vm, struct xe_vma *vma, */ xe_bo_get(vbo); - tv_bo.bo = &vbo->ttm; - tv_bo.num_shared = 1; - list_add(&tv_bo.head, &objs); + if (!vbo->vm) { + tv_bo.bo = &vbo->ttm; + tv_bo.num_shared = 1; + list_add(&tv_bo.head, &objs); + } } again: -- cgit v1.2.3 From 7ba4c5f02763cc423bfa0c6a87a8dd5501dc3417 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 7 Jun 2023 08:45:20 -0700 Subject: drm/xe: VM LRU bulk move MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the TTM LRU bulk move for BOs tied to a VM. Update the bulk moves LRU position on every exec. v2: Bulk move for compute VMs, use WARN rather than BUG Reviewed-by: Thomas Hellström Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 31 +++++++++++++++++++++++++++---- drivers/gpu/drm/xe/xe_bo.h | 4 ++-- drivers/gpu/drm/xe/xe_dma_buf.c | 2 +- drivers/gpu/drm/xe/xe_exec.c | 6 ++++++ drivers/gpu/drm/xe/xe_vm.c | 4 ++++ drivers/gpu/drm/xe/xe_vm_types.h | 3 +++ 6 files changed, 43 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 8bac1717ca78..43801994f069 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -992,6 +992,22 @@ static void xe_gem_object_free(struct drm_gem_object *obj) ttm_bo_put(container_of(obj, struct ttm_buffer_object, base)); } +static void xe_gem_object_close(struct drm_gem_object *obj, + struct drm_file *file_priv) +{ + struct xe_bo *bo = gem_to_xe_bo(obj); + + if (bo->vm && !xe_vm_in_fault_mode(bo->vm)) { + struct ww_acquire_ctx ww; + + XE_WARN_ON(!xe_bo_is_user(bo)); + + xe_bo_lock(bo, &ww, 0, false); + ttm_bo_set_bulk_move(&bo->ttm, NULL); + xe_bo_unlock(bo, &ww); + } +} + static bool should_migrate_to_system(struct xe_bo *bo) { struct xe_device *xe = xe_bo_device(bo); @@ -1047,6 +1063,7 @@ static const struct vm_operations_struct xe_gem_vm_ops = { static const struct drm_gem_object_funcs xe_gem_object_funcs = { .free = xe_gem_object_free, + .close = xe_gem_object_close, .mmap = drm_gem_ttm_mmap, .export = xe_gem_prime_export, .vm_ops = &xe_gem_vm_ops, @@ -1088,8 +1105,8 @@ void xe_bo_free(struct xe_bo *bo) struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, struct xe_tile *tile, struct dma_resv *resv, - size_t size, enum ttm_bo_type type, - u32 flags) + struct ttm_lru_bulk_move *bulk, size_t size, + enum ttm_bo_type type, u32 flags) { struct ttm_operation_ctx ctx = { .interruptible = true, @@ -1156,7 +1173,10 @@ struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, return ERR_PTR(err); bo->created = true; - ttm_bo_move_to_lru_tail_unlocked(&bo->ttm); + if (bulk) + ttm_bo_set_bulk_move(&bo->ttm, bulk); + else + ttm_bo_move_to_lru_tail_unlocked(&bo->ttm); return bo; } @@ -1226,7 +1246,10 @@ xe_bo_create_locked_range(struct xe_device *xe, } } - bo = __xe_bo_create_locked(xe, bo, tile, vm ? &vm->resv : NULL, size, + bo = __xe_bo_create_locked(xe, bo, tile, vm ? &vm->resv : NULL, + vm && !xe_vm_in_fault_mode(vm) && + flags & XE_BO_CREATE_USER_BIT ? + &vm->lru_bulk_move : NULL, size, type, flags); if (IS_ERR(bo)) return bo; diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 3a148cc6e811..08ca1d06bf77 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -81,8 +81,8 @@ void xe_bo_free(struct xe_bo *bo); struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, struct xe_tile *tile, struct dma_resv *resv, - size_t size, enum ttm_bo_type type, - u32 flags); + struct ttm_lru_bulk_move *bulk, size_t size, + enum ttm_bo_type type, u32 flags); struct xe_bo * xe_bo_create_locked_range(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c index 9b252cc782b7..975dee1f770f 100644 --- a/drivers/gpu/drm/xe/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/xe_dma_buf.c @@ -199,7 +199,7 @@ xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage, int ret; dma_resv_lock(resv, NULL); - bo = __xe_bo_create_locked(xe, storage, NULL, resv, dma_buf->size, + bo = __xe_bo_create_locked(xe, storage, NULL, resv, NULL, dma_buf->size, ttm_bo_type_sg, XE_BO_CREATE_SYSTEM_BIT); if (IS_ERR(bo)) { ret = PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 700f65b66d40..c52edff9a358 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -399,6 +399,12 @@ retry: xe_sched_job_push(job); xe_vm_reactivate_rebind(vm); + if (!err && !xe_vm_no_dma_fences(vm)) { + spin_lock(&xe->ttm.lru_lock); + ttm_lru_bulk_move_tail(&vm->lru_bulk_move); + spin_unlock(&xe->ttm.lru_lock); + } + err_repin: if (!xe_vm_no_dma_fences(vm)) up_read(&vm->userptr.notifier_lock); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index c98801ee3f55..06ebc1cfc4f7 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -632,6 +632,10 @@ retry: #undef retry_required + spin_lock(&vm->xe->ttm.lru_lock); + ttm_lru_bulk_move_tail(&vm->lru_bulk_move); + spin_unlock(&vm->xe->ttm.lru_lock); + /* Point of no return. */ arm_preempt_fences(vm, &preempt_fences); resume_and_reinstall_preempt_fences(vm); diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 9b39c5f64afa..c148dd49a6ca 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -166,6 +166,9 @@ struct xe_vm { /** Protects @rebind_list and the page-table structures */ struct dma_resv resv; + /** @lru_bulk_move: Bulk LRU move list for this VM's BOs */ + struct ttm_lru_bulk_move lru_bulk_move; + u64 size; struct rb_root vmas; -- cgit v1.2.3 From 8489f30e0c8e47d2d654cfb31825ff37de7e5574 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 15 Jun 2023 18:20:52 +0100 Subject: drm/xe/bo: handle PL_TT -> PL_TT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When moving between PL_VRAM <-> PL_SYSTEM we have to have use PL_TT in the middle as a temporary resource for the actual copy. In some GL workloads it can be seen that once the resource has been moved to the PL_TT we might have to bail out of the ttm_bo_validate(), before finishing the final hop. If this happens the resource is left as TTM_PL_FLAG_TEMPORARY, and when the ttm_bo_validate() is restarted the current placement is always seen as incompatible, requiring us to complete the move. However if the BO allows PL_TT as a possible placement we can end up attempting a PL_TT -> PL_TT move (like when running out of VRAM) which leads to explosions in xe_bo_move(), like triggering the XE_BUG_ON(!tile). Going from TTM_PL_FLAG_TEMPORARY with PL_TT -> PL_VRAM should already work as-is, so it looks like we only need to worry about PL_TT -> PL_TT and it looks like we can just treat it as a dummy move, since no real move is needed. Reported-by: José Roberto de Souza Signed-off-by: Matthew Auld Cc: Thomas Hellström Reviewed-by: Thomas Hellström Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 43801994f069..ad0ea3014bfe 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -603,6 +603,16 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, goto out; } + /* + * Failed multi-hop where the old_mem is still marked as + * TTM_PL_FLAG_TEMPORARY, should just be a dummy move. + */ + if (old_mem->mem_type == XE_PL_TT && + new_mem->mem_type == XE_PL_TT) { + ttm_bo_move_null(ttm_bo, new_mem); + goto out; + } + if (!move_lacks_source && !xe_bo_is_pinned(bo)) { ret = xe_bo_move_notify(bo, ctx); if (ret) -- cgit v1.2.3 From 898f86c23c600c8f70bf1a03e81a7be97038a72d Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 13 Jun 2023 11:03:55 -0700 Subject: drm/xe: Skip applying copy engine fuses Like commit 69a3738ba57f ("drm/i915: Skip applying copy engine fuses"), do not apply copy engine fuses for platforms where MEML3_EN is not relevant for determining the presence of the copy engines. Acked-by: Gustavo Sousa Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230613180356.2906441-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_hw_engine.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 68cd793cdfb5..b7b02c96e998 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -492,6 +492,9 @@ static void read_copy_fuses(struct xe_gt *gt) struct xe_device *xe = gt_to_xe(gt); u32 bcs_mask; + if (GRAPHICS_VERx100(xe) < 1260 || GRAPHICS_VERx100(xe) >= 1270) + return; + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); bcs_mask = xe_mmio_read32(gt, MIRROR_FUSE3); -- cgit v1.2.3 From 5db4afe1db56c10b6edef41ad9309bf0bed87f34 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Fri, 2 Jun 2023 20:27:32 +0300 Subject: drm/xe: Fix unreffed ptr leak on engine lookup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The engine xarray holds a ref to engine, guarded by the lock. While we do lookup for engine, we need to take the ref inside the lock to prevent unreffed pointer escaping and causing potential use-after-free after. v2: remove branch prediction hint (Thomas) Cc: Thomas Hellström Signed-off-by: Mika Kuoppala Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20230602172732.1001057-1-mika.kuoppala@linux.intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_engine.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c index e72a94a944d0..097a1ea06002 100644 --- a/drivers/gpu/drm/xe/xe_engine.c +++ b/drivers/gpu/drm/xe/xe_engine.c @@ -162,10 +162,9 @@ struct xe_engine *xe_engine_lookup(struct xe_file *xef, u32 id) mutex_lock(&xef->engine.lock); e = xa_load(&xef->engine.xa, id); - mutex_unlock(&xef->engine.lock); - if (e) xe_engine_get(e); + mutex_unlock(&xef->engine.lock); return e; } @@ -644,26 +643,27 @@ int xe_engine_get_property_ioctl(struct drm_device *dev, void *data, struct xe_file *xef = to_xe_file(file); struct drm_xe_engine_get_property *args = data; struct xe_engine *e; + int ret; if (XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1])) return -EINVAL; - mutex_lock(&xef->engine.lock); - e = xa_load(&xef->engine.xa, args->engine_id); - mutex_unlock(&xef->engine.lock); - + e = xe_engine_lookup(xef, args->engine_id); if (XE_IOCTL_ERR(xe, !e)) return -ENOENT; switch (args->property) { case XE_ENGINE_GET_PROPERTY_BAN: args->value = !!(e->flags & ENGINE_FLAG_BANNED); + ret = 0; break; default: - return -EINVAL; + ret = -EINVAL; } - return 0; + xe_engine_put(e); + + return ret; } static void engine_kill_compute(struct xe_engine *e) -- cgit v1.2.3 From 1105ac15d2a151bc87c3fe0e79f95c5cde90f1eb Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 31 Mar 2023 09:46:24 +0100 Subject: drm/xe/uapi: restrict system wide accounting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since this is considered an info leak (system wide accounting), rather hide behind perfmon_capable(). v2: - Without perfmon_capable() it likely makes more sense to report as zero, instead of reporting as used == total size. This should give similar behaviour as i915 which rather tracks free instead of used. Signed-off-by: Matthew Auld Cc: Maarten Lankhorst Cc: Thomas Hellström Cc: Gwan-gyeong Mun Cc: Lucas De Marchi Cc: José Roberto de Souza Cc: Filip Hazubski Cc: Carl Zhang Cc: Effie Yu Cc: Gwan-gyeong Mun Reviewed-by: José Roberto de Souza Reviewed-by: Gwan-gyeong Mun Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_query.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index c4165fa3428e..15e171ca7e62 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -128,7 +128,8 @@ static int query_memory_usage(struct xe_device *xe, usage->regions[0].min_page_size = PAGE_SIZE; usage->regions[0].max_page_size = PAGE_SIZE; usage->regions[0].total_size = man->size << PAGE_SHIFT; - usage->regions[0].used = ttm_resource_manager_usage(man); + if (perfmon_capable()) + usage->regions[0].used = ttm_resource_manager_usage(man); usage->num_regions = 1; for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) { @@ -145,8 +146,13 @@ static int query_memory_usage(struct xe_device *xe, SZ_1G; usage->regions[usage->num_regions].total_size = man->size; - usage->regions[usage->num_regions++].used = - ttm_resource_manager_usage(man); + + if (perfmon_capable()) { + usage->regions[usage->num_regions].used = + ttm_resource_manager_usage(man); + } + + usage->num_regions++; } } -- cgit v1.2.3 From 513e82627931d0ac6b74b9c2595008b3573a5158 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 19 Jun 2023 12:00:20 +0100 Subject: drm/xe/bo: consider bo->flags in xe_bo_migrate() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For VRAM allocations the bo->flags can control some characteristics of the underlying memory, like whether it needs to be contiguous, and in the future whether it needs to be in the CPU visible portion. Rather use add_vram() in xe_bo_migrate() which should take care of such things for us. Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: José Roberto de Souza Reviewed-by: José Roberto de Souza Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index ad0ea3014bfe..f54fb7bd184a 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1852,6 +1852,7 @@ static void xe_place_from_ttm_type(u32 mem_type, struct ttm_place *place) */ int xe_bo_migrate(struct xe_bo *bo, u32 mem_type) { + struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev); struct ttm_operation_ctx ctx = { .interruptible = true, .no_wait_gpu = false, @@ -1876,6 +1877,18 @@ int xe_bo_migrate(struct xe_bo *bo, u32 mem_type) placement.placement = &requested; placement.busy_placement = &requested; + /* + * Stolen needs to be handled like below VRAM handling if we ever need + * to support it. + */ + drm_WARN_ON(&xe->drm, mem_type == XE_PL_STOLEN); + + if (mem_type_is_vram(mem_type)) { + u32 c = 0; + + add_vram(xe, bo, &requested, bo->flags, mem_type, &c); + } + return ttm_bo_validate(&bo->ttm, &placement, &ctx); } -- cgit v1.2.3 From 1c2097bbde107effe2183891f92c060aa64bfa8b Mon Sep 17 00:00:00 2001 From: Riana Tauro Date: Fri, 23 Jun 2023 10:54:30 +0530 Subject: drm/xe: add a new sysfs directory for gtidle properties 1) Add a new sysfs directory under devices/gt#/ called gtidle to contain idle properties of GT such as name, idle_status, idle_residency_ms 2) Remove forcewake calls for residency counter v2: - abstract using function pointers (Anshuman) - remove forcewake calls for residency counter - use device_attr (Badal) - move rc functions to guc_pc - change name to gt_idle (Rodrigo) v3: - return error for drmm_add_action_or_reset - replace file and functions with gt_idle prefix to gt_idle_sysfs (Himal) - use enum for gt idle state - move multiplier to gt idle and initialize (Anshuman) - correct doc annotation (Rodrigo) - remove return variable - use kobj_gt instead of new gtidle kobj - move residency_ms to gtidle file - retain xe_guc_pc prefix for functions in guc_rc file (Michal) v4: - fix doc errors in xe_guc_pc file - change u64 to u32 for reading residency counter - keep gtidle states generic GT_IDLE_C[0/6] (Anshuman) v5: - update commit message to include removal of forcewake calls (Anshuman) - return void from sysfs initialization function and add warnings (Andi) v6: - remove extra lines (Anshuman) Signed-off-by: Riana Tauro Acked-by: Rodrigo Vivi Reviewed-by: Anshuman Gupta Reviewed-by: Andi Shyti Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/xe_gt.c | 3 + drivers/gpu/drm/xe/xe_gt_idle_sysfs.c | 162 ++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_gt_idle_sysfs.h | 13 +++ drivers/gpu/drm/xe/xe_gt_idle_sysfs_types.h | 38 +++++++ drivers/gpu/drm/xe/xe_gt_types.h | 4 + drivers/gpu/drm/xe/xe_guc_pc.c | 45 +++----- drivers/gpu/drm/xe/xe_guc_pc.h | 2 + 8 files changed, 239 insertions(+), 29 deletions(-) create mode 100644 drivers/gpu/drm/xe/xe_gt_idle_sysfs.c create mode 100644 drivers/gpu/drm/xe/xe_gt_idle_sysfs.h create mode 100644 drivers/gpu/drm/xe/xe_gt_idle_sysfs_types.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 73100c246a74..8d6d3c070fc8 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -62,6 +62,7 @@ xe-y += xe_bb.o \ xe_gt.o \ xe_gt_clock.o \ xe_gt_debugfs.o \ + xe_gt_idle_sysfs.o \ xe_gt_mcr.o \ xe_gt_pagefault.o \ xe_gt_sysfs.o \ diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 2458397ce8af..bc76678a8276 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -18,6 +18,7 @@ #include "xe_force_wake.h" #include "xe_ggtt.h" #include "xe_gt_clock.h" +#include "xe_gt_idle_sysfs.h" #include "xe_gt_mcr.h" #include "xe_gt_pagefault.h" #include "xe_gt_printk.h" @@ -306,6 +307,8 @@ static int gt_fw_domain_init(struct xe_gt *gt) if (err) goto err_force_wake; + xe_gt_idle_sysfs_init(>->gtidle); + /* XXX: Fake that we pull the engine mask from hwconfig blob */ gt->info.engine_mask = gt->info.__engine_mask; diff --git a/drivers/gpu/drm/xe/xe_gt_idle_sysfs.c b/drivers/gpu/drm/xe/xe_gt_idle_sysfs.c new file mode 100644 index 000000000000..ec77349dea76 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_idle_sysfs.c @@ -0,0 +1,162 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include + +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_gt_idle_sysfs.h" +#include "xe_gt_sysfs.h" +#include "xe_guc_pc.h" + +/** + * DOC: Xe GT Idle + * + * Provides sysfs entries for idle properties of GT + * + * device/gt#/gtidle/name - name of the state + * device/gt#/gtidle/idle_residency_ms - Provides residency of the idle state in ms + * device/gt#/gtidle/idle_status - Provides current idle state + */ + +static struct xe_gt_idle *dev_to_gtidle(struct device *dev) +{ + struct kobject *kobj = &dev->kobj; + + return &kobj_to_gt(kobj->parent)->gtidle; +} + +static struct xe_gt *gtidle_to_gt(struct xe_gt_idle *gtidle) +{ + return container_of(gtidle, struct xe_gt, gtidle); +} + +static struct xe_guc_pc *gtidle_to_pc(struct xe_gt_idle *gtidle) +{ + return >idle_to_gt(gtidle)->uc.guc.pc; +} + +static const char *gt_idle_state_to_string(enum xe_gt_idle_state state) +{ + switch (state) { + case GT_IDLE_C0: + return "gt-c0"; + case GT_IDLE_C6: + return "gt-c6"; + default: + return "unknown"; + } +} + +static u64 get_residency_ms(struct xe_gt_idle *gtidle, u64 cur_residency) +{ + u64 delta, overflow_residency, prev_residency; + + overflow_residency = BIT_ULL(32); + + /* + * Counter wrap handling + * Store previous hw counter values for counter wrap-around handling + * Relying on sufficient frequency of queries otherwise counters can still wrap. + */ + prev_residency = gtidle->prev_residency; + gtidle->prev_residency = cur_residency; + + /* delta */ + if (cur_residency >= prev_residency) + delta = cur_residency - prev_residency; + else + delta = cur_residency + (overflow_residency - prev_residency); + + /* Add delta to extended raw driver copy of idle residency */ + cur_residency = gtidle->cur_residency + delta; + gtidle->cur_residency = cur_residency; + + /* residency multiplier in ns, convert to ms */ + cur_residency = mul_u64_u32_div(cur_residency, gtidle->residency_multiplier, 1e6); + + return cur_residency; +} + +static ssize_t name_show(struct device *dev, + struct device_attribute *attr, char *buff) +{ + struct xe_gt_idle *gtidle = dev_to_gtidle(dev); + + return sysfs_emit(buff, gtidle->name); +} +static DEVICE_ATTR_RO(name); + +static ssize_t idle_status_show(struct device *dev, + struct device_attribute *attr, char *buff) +{ + struct xe_gt_idle *gtidle = dev_to_gtidle(dev); + struct xe_guc_pc *pc = gtidle_to_pc(gtidle); + enum xe_gt_idle_state state; + + state = gtidle->idle_status(pc); + + return sysfs_emit(buff, "%s\n", gt_idle_state_to_string(state)); +} +static DEVICE_ATTR_RO(idle_status); + +static ssize_t idle_residency_ms_show(struct device *dev, + struct device_attribute *attr, char *buff) +{ + struct xe_gt_idle *gtidle = dev_to_gtidle(dev); + struct xe_guc_pc *pc = gtidle_to_pc(gtidle); + u64 residency; + + residency = gtidle->idle_residency(pc); + return sysfs_emit(buff, "%llu\n", get_residency_ms(gtidle, residency)); +} +static DEVICE_ATTR_RO(idle_residency_ms); + +static const struct attribute *gt_idle_attrs[] = { + &dev_attr_name.attr, + &dev_attr_idle_status.attr, + &dev_attr_idle_residency_ms.attr, + NULL, +}; + +static void gt_idle_sysfs_fini(struct drm_device *drm, void *arg) +{ + struct kobject *kobj = arg; + + sysfs_remove_files(kobj, gt_idle_attrs); + kobject_put(kobj); +} + +void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle) +{ + struct xe_gt *gt = gtidle_to_gt(gtidle); + struct xe_device *xe = gt_to_xe(gt); + struct kobject *kobj; + int err; + + kobj = kobject_create_and_add("gtidle", gt->sysfs); + if (!kobj) { + drm_warn(&xe->drm, "%s failed, err: %d\n", __func__, -ENOMEM); + return; + } + + sprintf(gtidle->name, "gt%d-rc\n", gt->info.id); + /* Multiplier for RC6 Residency counter in units of 1.28us */ + gtidle->residency_multiplier = 1280; + gtidle->idle_residency = xe_guc_pc_rc6_residency; + gtidle->idle_status = xe_guc_pc_rc_status; + + err = sysfs_create_files(kobj, gt_idle_attrs); + if (err) { + kobject_put(kobj); + drm_warn(&xe->drm, "failed to register gtidle sysfs, err: %d\n", err); + return; + } + + err = drmm_add_action_or_reset(&xe->drm, gt_idle_sysfs_fini, kobj); + if (err) + drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n", + __func__, err); +} diff --git a/drivers/gpu/drm/xe/xe_gt_idle_sysfs.h b/drivers/gpu/drm/xe/xe_gt_idle_sysfs.h new file mode 100644 index 000000000000..b0973f96c7ab --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_idle_sysfs.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GT_IDLE_SYSFS_H_ +#define _XE_GT_IDLE_SYSFS_H_ + +#include "xe_gt_idle_sysfs_types.h" + +void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle); + +#endif /* _XE_GT_IDLE_SYSFS_H_ */ diff --git a/drivers/gpu/drm/xe/xe_gt_idle_sysfs_types.h b/drivers/gpu/drm/xe/xe_gt_idle_sysfs_types.h new file mode 100644 index 000000000000..f99b447534f3 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_idle_sysfs_types.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GT_IDLE_SYSFS_TYPES_H_ +#define _XE_GT_IDLE_SYSFS_TYPES_H_ + +#include + +struct xe_guc_pc; + +/* States of GT Idle */ +enum xe_gt_idle_state { + GT_IDLE_C0, + GT_IDLE_C6, + GT_IDLE_UNKNOWN, +}; + +/** + * struct xe_gt_idle - A struct that contains idle properties based of gt + */ +struct xe_gt_idle { + /** @name: name */ + char name[16]; + /** @residency_multiplier: residency multiplier in ns */ + u32 residency_multiplier; + /** @cur_residency: raw driver copy of idle residency */ + u64 cur_residency; + /** @prev_residency: previous residency counter */ + u64 prev_residency; + /** @idle_status: get the current idle state */ + enum xe_gt_idle_state (*idle_status)(struct xe_guc_pc *pc); + /** @idle_residency: get idle residency counter */ + u64 (*idle_residency)(struct xe_guc_pc *pc); +}; + +#endif /* _XE_GT_IDLE_SYSFS_TYPES_H_ */ diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 99ab7ec99ccd..7d4de019f9a5 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -7,6 +7,7 @@ #define _XE_GT_TYPES_H_ #include "xe_force_wake_types.h" +#include "xe_gt_idle_sysfs_types.h" #include "xe_hw_engine_types.h" #include "xe_hw_fence_types.h" #include "xe_reg_sr_types.h" @@ -260,6 +261,9 @@ struct xe_gt { /** @uc: micro controllers on the GT */ struct xe_uc uc; + /** @gtidle: idle properties of GT */ + struct xe_gt_idle gtidle; + /** @engine_ops: submission backend engine operations */ const struct xe_engine_ops *engine_ops; diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 5d5cf4b0d508..f02bf1641380 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -76,12 +76,7 @@ * * Render-C states is also a GuC PC feature that is now enabled in Xe for * all platforms. - * Xe's GuC PC provides a sysfs API for Render-C States: * - * device/gt#/rc* *read-only* files: - * - rc_status: Provide the actual immediate status of Render-C: (rc0 or rc6) - * - rc6_residency: Provide the rc6_residency counter in units of 1.28 uSec. - * Prone to overflows. */ static struct xe_guc * @@ -572,10 +567,12 @@ out: } static DEVICE_ATTR_RW(freq_max); -static ssize_t rc_status_show(struct device *dev, - struct device_attribute *attr, char *buff) +/** + * xe_guc_pc_rc_status - get the current Render C state + * @pc: XE_GuC_PC instance + */ +enum xe_gt_idle_state xe_guc_pc_rc_status(struct xe_guc_pc *pc) { - struct xe_guc_pc *pc = dev_to_pc(dev); struct xe_gt *gt = pc_to_gt(pc); u32 reg; @@ -585,37 +582,29 @@ static ssize_t rc_status_show(struct device *dev, switch (REG_FIELD_GET(RCN_MASK, reg)) { case GT_RC6: - return sysfs_emit(buff, "rc6\n"); + return GT_IDLE_C6; case GT_RC0: - return sysfs_emit(buff, "rc0\n"); + return GT_IDLE_C0; default: - return -ENOENT; + return GT_IDLE_UNKNOWN; } } -static DEVICE_ATTR_RO(rc_status); -static ssize_t rc6_residency_show(struct device *dev, - struct device_attribute *attr, char *buff) +/** + * xe_guc_pc_rc6_residency - rc6 residency counter + * @pc: Xe_GuC_PC instance + */ +u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc) { - struct xe_guc_pc *pc = dev_to_pc(dev); struct xe_gt *gt = pc_to_gt(pc); u32 reg; - ssize_t ret; - - xe_device_mem_access_get(pc_to_xe(pc)); - ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (ret) - goto out; + xe_device_mem_access_get(gt_to_xe(gt)); reg = xe_mmio_read32(gt, GT_GFX_RC6); - ret = sysfs_emit(buff, "%u\n", reg); + xe_device_mem_access_put(gt_to_xe(gt)); - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); -out: - xe_device_mem_access_put(pc_to_xe(pc)); - return ret; + return reg; } -static DEVICE_ATTR_RO(rc6_residency); static const struct attribute *pc_attrs[] = { &dev_attr_freq_act.attr, @@ -625,8 +614,6 @@ static const struct attribute *pc_attrs[] = { &dev_attr_freq_rpn.attr, &dev_attr_freq_min.attr, &dev_attr_freq_max.attr, - &dev_attr_rc_status.attr, - &dev_attr_rc6_residency.attr, NULL }; diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h index da29e4934868..976179dbc9a8 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.h +++ b/drivers/gpu/drm/xe/xe_guc_pc.h @@ -12,4 +12,6 @@ int xe_guc_pc_init(struct xe_guc_pc *pc); int xe_guc_pc_start(struct xe_guc_pc *pc); int xe_guc_pc_stop(struct xe_guc_pc *pc); +enum xe_gt_idle_state xe_guc_pc_rc_status(struct xe_guc_pc *pc); +u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc); #endif /* _XE_GUC_PC_H_ */ -- cgit v1.2.3 From 7b076d14f21a48de572e5191614b3e6b2d6ab823 Mon Sep 17 00:00:00 2001 From: Badal Nilawar Date: Fri, 23 Jun 2023 10:54:31 +0530 Subject: drm/xe/mtl: Add support to get C6 residency/status of MTL Add the registers to get C6 residency of MTL SAMedia and C6 status of MTL gts v2: - move register definitions to regs header (Anshuman) - correct reg definition for mtl rc status - make idle_status function common (Badal) v3: - remove extra line in commit message - use only media type check in initialization - use graphics ver check (Anshuman) v4: - remove extra lines (Anshuman) Bspec: 66300 Signed-off-by: Badal Nilawar Signed-off-by: Riana Tauro Reviewed-by: Andi Shyti Reviewed-by: Anshuman Gupta Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 11 ++++++++-- drivers/gpu/drm/xe/xe_gt_idle_sysfs.c | 14 ++++++++---- drivers/gpu/drm/xe/xe_guc_pc.c | 41 ++++++++++++++++++++++++++--------- drivers/gpu/drm/xe/xe_guc_pc.h | 3 ++- 4 files changed, 52 insertions(+), 17 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index d2a0a5c8b02a..55b0f70e1904 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -16,6 +16,11 @@ #define MEDIA_GT_GSI_OFFSET 0x380000 #define MEDIA_GT_GSI_LENGTH 0x40000 +/* MTL workpoint reg to get core C state and actual freq of 3D, SAMedia */ +#define MTL_MIRROR_TARGET_WP1 XE_REG(0xc60) +#define MTL_CAGF_MASK REG_GENMASK(8, 0) +#define MTL_CC_MASK REG_GENMASK(12, 9) + /* RPM unit config (Gen8+) */ #define RPM_CONFIG0 XE_REG(0xd00) #define RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK REG_GENMASK(5, 3) @@ -349,10 +354,12 @@ #define FORCEWAKE_USER BIT(1) #define FORCEWAKE_KERNEL_FALLBACK BIT(15) +#define MTL_MEDIA_MC6 XE_REG(0x138048) + #define GT_CORE_STATUS XE_REG(0x138060) #define RCN_MASK REG_GENMASK(2, 0) -#define GT_RC0 0 -#define GT_RC6 3 +#define GT_C0 0 +#define GT_C6 3 #define GT_GFX_RC6_LOCKED XE_REG(0x138104) #define GT_GFX_RC6 XE_REG(0x138108) diff --git a/drivers/gpu/drm/xe/xe_gt_idle_sysfs.c b/drivers/gpu/drm/xe/xe_gt_idle_sysfs.c index ec77349dea76..7238e96a116c 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle_sysfs.c +++ b/drivers/gpu/drm/xe/xe_gt_idle_sysfs.c @@ -142,11 +142,17 @@ void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle) return; } - sprintf(gtidle->name, "gt%d-rc\n", gt->info.id); - /* Multiplier for RC6 Residency counter in units of 1.28us */ + if (xe_gt_is_media_type(gt)) { + sprintf(gtidle->name, "gt%d-mc\n", gt->info.id); + gtidle->idle_residency = xe_guc_pc_mc6_residency; + } else { + sprintf(gtidle->name, "gt%d-rc\n", gt->info.id); + gtidle->idle_residency = xe_guc_pc_rc6_residency; + } + + /* Multiplier for Residency counter in units of 1.28us */ gtidle->residency_multiplier = 1280; - gtidle->idle_residency = xe_guc_pc_rc6_residency; - gtidle->idle_status = xe_guc_pc_rc_status; + gtidle->idle_status = xe_guc_pc_c_status; err = sysfs_create_files(kobj, gt_idle_attrs); if (err) { diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index f02bf1641380..3093cfeff0c2 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -34,9 +34,6 @@ #define GT_PERF_STATUS XE_REG(0x1381b4) #define GEN12_CAGF_MASK REG_GENMASK(19, 11) -#define MTL_MIRROR_TARGET_WP1 XE_REG(0xc60) -#define MTL_CAGF_MASK REG_GENMASK(8, 0) - #define GT_FREQUENCY_MULTIPLIER 50 #define GEN9_FREQ_SCALER 3 @@ -568,22 +565,30 @@ out: static DEVICE_ATTR_RW(freq_max); /** - * xe_guc_pc_rc_status - get the current Render C state + * xe_guc_pc_c_status - get the current GT C state * @pc: XE_GuC_PC instance */ -enum xe_gt_idle_state xe_guc_pc_rc_status(struct xe_guc_pc *pc) +enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc) { struct xe_gt *gt = pc_to_gt(pc); - u32 reg; + u32 reg, gt_c_state; xe_device_mem_access_get(gt_to_xe(gt)); - reg = xe_mmio_read32(gt, GT_CORE_STATUS); + + if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) { + reg = xe_mmio_read32(gt, MTL_MIRROR_TARGET_WP1); + gt_c_state = REG_FIELD_GET(MTL_CC_MASK, reg); + } else { + reg = xe_mmio_read32(gt, GT_CORE_STATUS); + gt_c_state = REG_FIELD_GET(RCN_MASK, reg); + } + xe_device_mem_access_put(gt_to_xe(gt)); - switch (REG_FIELD_GET(RCN_MASK, reg)) { - case GT_RC6: + switch (gt_c_state) { + case GT_C6: return GT_IDLE_C6; - case GT_RC0: + case GT_C0: return GT_IDLE_C0; default: return GT_IDLE_UNKNOWN; @@ -606,6 +611,22 @@ u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc) return reg; } +/** + * xe_guc_pc_mc6_residency - mc6 residency counter + * @pc: Xe_GuC_PC instance + */ +u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + u64 reg; + + xe_device_mem_access_get(gt_to_xe(gt)); + reg = xe_mmio_read32(gt, MTL_MEDIA_MC6); + xe_device_mem_access_put(gt_to_xe(gt)); + + return reg; +} + static const struct attribute *pc_attrs[] = { &dev_attr_freq_act.attr, &dev_attr_freq_cur.attr, diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h index 976179dbc9a8..370353a40a17 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.h +++ b/drivers/gpu/drm/xe/xe_guc_pc.h @@ -12,6 +12,7 @@ int xe_guc_pc_init(struct xe_guc_pc *pc); int xe_guc_pc_start(struct xe_guc_pc *pc); int xe_guc_pc_stop(struct xe_guc_pc *pc); -enum xe_gt_idle_state xe_guc_pc_rc_status(struct xe_guc_pc *pc); +enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc); u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc); +u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc); #endif /* _XE_GUC_PC_H_ */ -- cgit v1.2.3 From bc2e0215deeaa88dec44ff07e3a2b19283d53cdb Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Mon, 26 Jun 2023 20:17:38 +0200 Subject: drm/xe/bo: Fix swapin when moving to VRAM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a source system resource had been swapped out, we incorrectly assumed that we were lacking source data for a move and therefore cleared the destination instead of swapping in and copying the swapped-out data. Fix this. Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20230626181741.32820-2-thomas.hellstrom@linux.intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index f54fb7bd184a..bdeef3bf40fc 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -568,6 +568,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, struct xe_tile *tile = NULL; struct dma_fence *fence; bool move_lacks_source; + bool tt_has_data; bool needs_clear; int ret = 0; @@ -590,8 +591,10 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, goto out; } - move_lacks_source = !resource_is_vram(old_mem) && - (!ttm || !ttm_tt_is_populated(ttm)); + tt_has_data = ttm && (ttm_tt_is_populated(ttm) || + (ttm->page_flags & TTM_TT_FLAG_SWAPPED)); + + move_lacks_source = !resource_is_vram(old_mem) && !tt_has_data; needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) || (!ttm && ttm_bo->type == ttm_bo_type_device); -- cgit v1.2.3 From 3439cc46619a3f31780cbd4f820384f9586d5ee1 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Mon, 26 Jun 2023 20:17:39 +0200 Subject: drm/xe/bo: Avoid creating a system resource when allocating a fresh VRAM bo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When creating a new bo, on the first move the bo->resource is typically NULL. Our move callback rejected that instructing TTM to create a system resource. In addition a struct ttm_tt with a page-vector was created, although not populated with pages. Similarly when the clearing of VRAM was complete, the system resource was put on a ghost object and freed using the TTM delayed destroy mechanism. This is a lot of pointless work. So avoid creating the system resource and instead change the code to cope with a NULL bo->resource. v2: - Add some code comments (Matthew Brost) v3: - Fix a dereference of old_mem which might be NULL. Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20230626181741.32820-3-thomas.hellstrom@linux.intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 48 ++++++++++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 21 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index bdeef3bf40fc..8920405b0182 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -479,7 +479,6 @@ static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo, * to unconditionally call unmap_attachment() when moving out to system. */ static int xe_bo_move_dmabuf(struct ttm_buffer_object *ttm_bo, - struct ttm_resource *old_res, struct ttm_resource *new_res) { struct dma_buf_attachment *attach = ttm_bo->base.import_attach; @@ -564,6 +563,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev); struct xe_bo *bo = ttm_to_xe_bo(ttm_bo); struct ttm_resource *old_mem = ttm_bo->resource; + u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM; struct ttm_tt *ttm = ttm_bo->ttm; struct xe_tile *tile = NULL; struct dma_fence *fence; @@ -572,35 +572,29 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, bool needs_clear; int ret = 0; - if (!old_mem) { - if (new_mem->mem_type != TTM_PL_SYSTEM) { - hop->mem_type = TTM_PL_SYSTEM; - hop->flags = TTM_PL_FLAG_TEMPORARY; - ret = -EMULTIHOP; - goto out; - } - + /* Bo creation path, moving to system or TT. No clearing required. */ + if (!old_mem && ttm) { ttm_bo_move_null(ttm_bo, new_mem); - goto out; + return 0; } if (ttm_bo->type == ttm_bo_type_sg) { ret = xe_bo_move_notify(bo, ctx); if (!ret) - ret = xe_bo_move_dmabuf(ttm_bo, old_mem, new_mem); + ret = xe_bo_move_dmabuf(ttm_bo, new_mem); goto out; } tt_has_data = ttm && (ttm_tt_is_populated(ttm) || (ttm->page_flags & TTM_TT_FLAG_SWAPPED)); - move_lacks_source = !resource_is_vram(old_mem) && !tt_has_data; + move_lacks_source = !mem_type_is_vram(old_mem_type) && !tt_has_data; needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) || (!ttm && ttm_bo->type == ttm_bo_type_device); if ((move_lacks_source && !needs_clear) || - (old_mem->mem_type == XE_PL_SYSTEM && + (old_mem_type == XE_PL_SYSTEM && new_mem->mem_type == XE_PL_TT)) { ttm_bo_move_null(ttm_bo, new_mem); goto out; @@ -610,7 +604,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, * Failed multi-hop where the old_mem is still marked as * TTM_PL_FLAG_TEMPORARY, should just be a dummy move. */ - if (old_mem->mem_type == XE_PL_TT && + if (old_mem_type == XE_PL_TT && new_mem->mem_type == XE_PL_TT) { ttm_bo_move_null(ttm_bo, new_mem); goto out; @@ -622,7 +616,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, goto out; } - if (old_mem->mem_type == XE_PL_TT && + if (old_mem_type == XE_PL_TT && new_mem->mem_type == XE_PL_SYSTEM) { long timeout = dma_resv_wait_timeout(ttm_bo->base.resv, DMA_RESV_USAGE_BOOKKEEP, @@ -637,8 +631,8 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, } if (!move_lacks_source && - ((old_mem->mem_type == XE_PL_SYSTEM && resource_is_vram(new_mem)) || - (resource_is_vram(old_mem) && + ((old_mem_type == XE_PL_SYSTEM && resource_is_vram(new_mem)) || + (mem_type_is_vram(old_mem_type) && new_mem->mem_type == XE_PL_SYSTEM))) { hop->fpfn = 0; hop->lpfn = 0; @@ -652,8 +646,8 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, tile = bo->tile; else if (resource_is_vram(new_mem)) tile = mem_type_to_tile(xe, new_mem->mem_type); - else if (resource_is_vram(old_mem)) - tile = mem_type_to_tile(xe, old_mem->mem_type); + else if (mem_type_is_vram(old_mem_type)) + tile = mem_type_to_tile(xe, old_mem_type); XE_BUG_ON(!tile); XE_BUG_ON(!tile->migrate); @@ -703,8 +697,20 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, xe_device_mem_access_put(xe); goto out; } - ret = ttm_bo_move_accel_cleanup(ttm_bo, fence, evict, true, - new_mem); + if (!move_lacks_source) { + ret = ttm_bo_move_accel_cleanup(ttm_bo, fence, evict, + true, new_mem); + } else { + /* + * ttm_bo_move_accel_cleanup() may blow up if + * bo->resource == NULL, so just attach the + * fence and set the new resource. + */ + dma_resv_add_fence(ttm_bo->base.resv, fence, + DMA_RESV_USAGE_KERNEL); + ttm_bo_move_null(ttm_bo, new_mem); + } + dma_fence_put(fence); } -- cgit v1.2.3 From 70ff6a999d7cae52b6b418c3110b6245dde9271c Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Mon, 26 Jun 2023 20:17:40 +0200 Subject: drm/xe/bo: Gracefully handle errors from ttm_bo_move_accel_cleanup(). MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function ttm_bo_move_accel_cleanup() attempts to help pipeline a move, and in doing so, needs memory allocations which may fail. Rather than failing in a state where the new resource may freed while accessed by the copy engine, sync uninterruptible and do a failsafe cleanup. v2: - Don't try to attach the signaled fence on ttm_bo_move_accel_cleanup() error. Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20230626181741.32820-4-thomas.hellstrom@linux.intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 8920405b0182..1f4d1790d57c 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -700,6 +700,11 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, if (!move_lacks_source) { ret = ttm_bo_move_accel_cleanup(ttm_bo, fence, evict, true, new_mem); + if (ret) { + dma_fence_wait(fence, false); + ttm_bo_move_null(ttm_bo, new_mem); + ret = 0; + } } else { /* * ttm_bo_move_accel_cleanup() may blow up if -- cgit v1.2.3 From a201c6ee37d63e7c0a2973fb7790e94211b7fa83 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Mon, 26 Jun 2023 20:17:41 +0200 Subject: drm/xe/bo: Evict VRAM to TT rather than to system MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The main difference is that we don't bounce and sync on eviction, allowing for pipelined eviction. Moving forward we also need to be careful with dma mappings which can be released in SYSTEM but may remain in TT. v2: - Remove a stale comment (Matthew Brost) Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20230626181741.32820-5-thomas.hellstrom@linux.intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_dma_buf.c | 5 ++--- drivers/gpu/drm/xe/xe_bo.c | 17 ++++++++++++++++- drivers/gpu/drm/xe/xe_dma_buf.c | 11 ++++------- 3 files changed, 22 insertions(+), 11 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c index 513a3b3362e9..810a035bf720 100644 --- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c @@ -72,7 +72,7 @@ static void check_residency(struct kunit *test, struct xe_bo *exported, } /* Verify that also importer has been evicted to SYSTEM */ - if (!xe_bo_is_mem_type(imported, XE_PL_SYSTEM)) { + if (exported != imported && !xe_bo_is_mem_type(imported, XE_PL_SYSTEM)) { KUNIT_FAIL(test, "Importer wasn't properly evicted.\n"); return; } @@ -91,8 +91,7 @@ static void check_residency(struct kunit *test, struct xe_bo *exported, * possible, saving a migration step as the transfer is just * likely as fast from system memory. */ - if (params->force_different_devices && - params->mem_mask & XE_BO_CREATE_SYSTEM_BIT) + if (params->mem_mask & XE_BO_CREATE_SYSTEM_BIT) KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, XE_PL_TT)); else KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, mem_type)); diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 1f4d1790d57c..17c0c6c2ae65 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -40,6 +40,20 @@ static struct ttm_placement sys_placement = { .busy_placement = &sys_placement_flags, }; +static const struct ttm_place tt_placement_flags = { + .fpfn = 0, + .lpfn = 0, + .mem_type = XE_PL_TT, + .flags = 0, +}; + +static struct ttm_placement tt_placement = { + .num_placement = 1, + .placement = &tt_placement_flags, + .num_busy_placement = 1, + .busy_placement = &sys_placement_flags, +}; + bool mem_type_is_vram(u32 mem_type) { return mem_type >= XE_PL_VRAM0 && mem_type != XE_PL_STOLEN; @@ -225,9 +239,10 @@ static void xe_evict_flags(struct ttm_buffer_object *tbo, case XE_PL_VRAM0: case XE_PL_VRAM1: case XE_PL_STOLEN: + *placement = tt_placement; + break; case XE_PL_TT: default: - /* for now kick out to system */ *placement = sys_placement; break; } diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c index 975dee1f770f..b9bf4b4dd8a5 100644 --- a/drivers/gpu/drm/xe/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/xe_dma_buf.c @@ -81,13 +81,10 @@ static struct sg_table *xe_dma_buf_map(struct dma_buf_attachment *attach, return ERR_PTR(-EOPNOTSUPP); if (!xe_bo_is_pinned(bo)) { - if (!attach->peer2peer || - bo->ttm.resource->mem_type == XE_PL_SYSTEM) { - if (xe_bo_can_migrate(bo, XE_PL_TT)) - r = xe_bo_migrate(bo, XE_PL_TT); - else - r = xe_bo_validate(bo, NULL, false); - } + if (!attach->peer2peer) + r = xe_bo_migrate(bo, XE_PL_TT); + else + r = xe_bo_validate(bo, NULL, false); if (r) return ERR_PTR(r); } -- cgit v1.2.3 From 5835dc7fa6e419627e23015c7dbde120a77ce738 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Thu, 25 May 2023 09:41:44 +0200 Subject: drm/xe: Fix vm refcount races MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a race in xe_vm_lookup() where the vm could disappear after the lookup mutex unlock but before the get. The xe_vm_get() call must be inside the lookup mutex. Also fix a vm close race where multiple callers could potentially succeed in calling xe_vm_close_and_put(). Reported-by: Oded Gabbay Link: https://lists.freedesktop.org/archives/intel-xe/2023-May/004704.html Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20230525074144.178961-1-thomas.hellstrom@linux.intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 06ebc1cfc4f7..bd143acbde0e 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1533,10 +1533,9 @@ struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id) mutex_lock(&xef->vm.lock); vm = xa_load(&xef->vm.xa, id); - mutex_unlock(&xef->vm.lock); - if (vm) xe_vm_get(vm); + mutex_unlock(&xef->vm.lock); return vm; } @@ -2011,27 +2010,26 @@ int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, struct xe_file *xef = to_xe_file(file); struct drm_xe_vm_destroy *args = data; struct xe_vm *vm; + int err = 0; if (XE_IOCTL_ERR(xe, args->pad) || XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1])) return -EINVAL; - vm = xe_vm_lookup(xef, args->vm_id); - if (XE_IOCTL_ERR(xe, !vm)) - return -ENOENT; - xe_vm_put(vm); - - /* FIXME: Extend this check to non-compute mode VMs */ - if (XE_IOCTL_ERR(xe, vm->preempt.num_engines)) - return -EBUSY; - mutex_lock(&xef->vm.lock); - xa_erase(&xef->vm.xa, args->vm_id); + vm = xa_load(&xef->vm.xa, args->vm_id); + if (XE_IOCTL_ERR(xe, !vm)) + err = -ENOENT; + else if (XE_IOCTL_ERR(xe, vm->preempt.num_engines)) + err = -EBUSY; + else + xa_erase(&xef->vm.xa, args->vm_id); mutex_unlock(&xef->vm.lock); - xe_vm_close_and_put(vm); + if (!err) + xe_vm_close_and_put(vm); - return 0; + return err; } static const u32 region_to_mem_type[] = { -- cgit v1.2.3 From c8a740775dfff4467c9dd9f1cad22d8bdc7cccfa Mon Sep 17 00:00:00 2001 From: Anshuman Gupta Date: Wed, 24 May 2023 14:36:53 +0530 Subject: drm/xe/pm: Disable PM on unbounded pcie parent bridge Intel Discrete GFX cards gfx may have multiple PCIe endpoints, they connects to root port via pcie upstream switch port(USP) and virtual pcie switch port(VSP), sometimes VSP pcie devices doesn't bind to pcieport driver. Without pcieport driver, pcie PM comes without any warranty and with unbounded VSP gfx card won't transition to low power pcie Device and Link states therefore assert drm_warn on unbounded VSP and disable xe driver PM support. v2: - Disable Xe PCI PM support. [Rodrigo] v3: - Changed subject and Rebase. v4: - %s/xe_pci_unbounded_bridge_disable_pm/xe_assert_on_unbounded_bridge. [Rodrigo] - Use device_set_pm_not_required() instead of dev_pm_ops NULL assignment. Cc: Rodrigo Vivi Signed-off-by: Anshuman Gupta Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20230524090653.1192566-1-anshuman.gupta@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pci.c | 1 + drivers/gpu/drm/xe/xe_pm.c | 14 ++++++++++++++ drivers/gpu/drm/xe/xe_pm.h | 1 + 3 files changed, 16 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 71be80274683..96f1ee1ea17f 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -617,6 +617,7 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (IS_ERR(xe)) return PTR_ERR(xe); + xe_pm_assert_unbounded_bridge(xe); subplatform_desc = find_subplatform(xe, desc); err = xe_info_init(xe, desc, subplatform_desc); diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index 9a74d15052c4..20e9e522ab80 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -214,3 +214,17 @@ int xe_pm_runtime_get_if_active(struct xe_device *xe) WARN_ON(pm_runtime_suspended(xe->drm.dev)); return pm_runtime_get_if_active(xe->drm.dev, true); } + +void xe_pm_assert_unbounded_bridge(struct xe_device *xe) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + struct pci_dev *bridge = pci_upstream_bridge(pdev); + + if (!bridge) + return; + + if (!bridge->driver) { + drm_warn(&xe->drm, "unbounded parent pci bridge, device won't support any PM support.\n"); + device_set_pm_not_required(&pdev->dev); + } +} diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h index 6a885585f653..8418ee6faac5 100644 --- a/drivers/gpu/drm/xe/xe_pm.h +++ b/drivers/gpu/drm/xe/xe_pm.h @@ -21,5 +21,6 @@ int xe_pm_runtime_get(struct xe_device *xe); int xe_pm_runtime_put(struct xe_device *xe); bool xe_pm_runtime_resume_if_suspended(struct xe_device *xe); int xe_pm_runtime_get_if_active(struct xe_device *xe); +void xe_pm_assert_unbounded_bridge(struct xe_device *xe); #endif -- cgit v1.2.3 From 64c9ae213d2ab1cce824841518e9539f597ee91e Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Tue, 13 Jun 2023 10:47:39 -0700 Subject: drm/xe/kunit: Handle fake device creation for all platform/subplatform cases For platform like Alderlake P there are subplatforms and just Alderlake P. Unlike DG2 in which every flavour is either a G10,G11 or G12 variant. In this case(Alderlake P/S), the Kunit test evaluates the subplatform to NONE and is unable to create a fake device. Removing the condition in xe_pci_fake_device_init() to support this corner case so driver can proceed with the unit testing. Cc: Lucas De Marchi Signed-off-by: Anusha Srivatsa Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230613174740.786041-1-anusha.srivatsa@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_pci.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c index 2178ad71c0da..a40879da2fbe 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci.c +++ b/drivers/gpu/drm/xe/tests/xe_pci.c @@ -139,9 +139,6 @@ int xe_pci_fake_device_init(struct xe_device *xe, enum xe_platform platform, if (subplatform_desc->subplatform == subplatform) break; - if (subplatform == XE_SUBPLATFORM_NONE && subplatform_desc) - return -ENODEV; - if (subplatform != XE_SUBPLATFORM_NONE && !subplatform_desc) return -ENODEV; -- cgit v1.2.3 From 807e7cee6981d9c570f986bebc07829094acb3cb Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Tue, 13 Jun 2023 10:47:40 -0700 Subject: drm/xe: Add missing ADL entries to xe_test_wa With the fake device creation fix in the previous patch, adding Alderlake P platform in xe_wa_test. With this, driver is able to run the kunit test for ADLP properly. Cc: Lucas De Marchi Signed-off-by: Anusha Srivatsa Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230613174740.786041-2-anusha.srivatsa@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_wa_test.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_wa_test.c b/drivers/gpu/drm/xe/tests/xe_wa_test.c index 7a86be830b93..16f7f157c875 100644 --- a/drivers/gpu/drm/xe/tests/xe_wa_test.c +++ b/drivers/gpu/drm/xe/tests/xe_wa_test.c @@ -46,6 +46,9 @@ static const struct platform_test_case cases[] = { PLATFORM_CASE(ALDERLAKE_S, B0), PLATFORM_CASE(ALDERLAKE_S, C0), PLATFORM_CASE(ALDERLAKE_S, D0), + PLATFORM_CASE(ALDERLAKE_P, A0), + PLATFORM_CASE(ALDERLAKE_P, B0), + PLATFORM_CASE(ALDERLAKE_P, C0), SUBPLATFORM_CASE(DG2, G10, A0), SUBPLATFORM_CASE(DG2, G10, A1), SUBPLATFORM_CASE(DG2, G10, B0), -- cgit v1.2.3 From 420c6a6f65f4856f77dba278ae32e2701d8838f3 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Tue, 27 Jun 2023 15:28:56 -0700 Subject: drm/xe: fix HuC FW ordering for DG1 The firmware definitions must be ordered based on platform, from newer to older, which means that the DG1 FW must come before the ADL one. Link: https://gitlab.freedesktop.org/drm/intel/-/issues/8699 Signed-off-by: Daniele Ceraolo Spurio Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20230627222856.3165647-1-daniele.ceraolospurio@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_uc_fw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 2b9b9b4a6711..bc63c0d3e33a 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -111,9 +111,9 @@ struct fw_blobs_by_type { fw_def(TIGERLAKE, major_ver(i915, guc, tgl, 70, 5)) #define XE_HUC_FIRMWARE_DEFS(fw_def, mmp_ver, no_ver) \ + fw_def(DG1, no_ver(i915, huc, dg1)) \ fw_def(ALDERLAKE_P, no_ver(i915, huc, tgl)) \ fw_def(ALDERLAKE_S, no_ver(i915, huc, tgl)) \ - fw_def(DG1, no_ver(i915, huc, dg1)) \ fw_def(ROCKETLAKE, no_ver(i915, huc, tgl)) \ fw_def(TIGERLAKE, no_ver(i915, huc, tgl)) -- cgit v1.2.3 From f07d9a615b7b257bf2c2197262769286ddc75109 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Tue, 27 Jun 2023 17:16:42 -0700 Subject: drm/xe/slpc: Start SLPC before GuC submission on reset The SLPC code has a strict 5ms timeout from when the start command is queued to when we expect the reply to appear in memory. This works if the CT channel is empty, but if the channel is busy there might be an extra delay that causes the process to exceeded the timeout. We see this issue when a reset occurs while userspace keeps submitting, because the submission code is re-enabled first and it will start using the channel to service those submissions. To fix this, we can simply start SLPC before re-enabling submission. This has also the benefit of not allowing submissions to go through with an uninitialized SLPC. Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/375 Signed-off-by: Daniele Ceraolo Spurio Cc: Vinay Belgaumkar Cc: Matthew Brost Reviewed-by: Vinay Belgaumkar Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20230628001642.3170070-1-daniele.ceraolospurio@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 43f862aaacbe..8245bbc58770 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -802,14 +802,10 @@ int xe_guc_start(struct xe_guc *guc) { int ret; - ret = xe_guc_submit_start(guc); - if (ret) - return ret; - ret = xe_guc_pc_start(&guc->pc); XE_WARN_ON(ret); - return 0; + return xe_guc_submit_start(guc); } void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p) -- cgit v1.2.3 From 7f38e1e1063e1b9b2c8368c741ff5e679091e9f8 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Mon, 26 Jun 2023 14:22:20 -0700 Subject: drm/xe: fix bounds checking for 'len' in xe_engine_create_ioctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There's this shared machine running xe.ko and I often log in to see my tmux corrupted by messages such as: usercopy: Kernel memory overwrite attempt detected to wrapped address (offset 0, size 18446660151965198754)! I also sometimes see: kernel BUG at mm/usercopy.c:102! Someone is running a program that's definitely submitting random numbers to this ioctl. If you pass width=65535 and num_placements=32769 then you get a negative 'len', which avoids the EINVAL check, leading to the bug. Switch 'len' to u32. It is the result of the multiplication of two u16 numbers, so it won't be able to overflow back into smaller numbers as an u32. v2: Make len u32 instead of checking for <=0 (José). Signed-off-by: Paulo Zanoni Reviewed-by: José Roberto de Souza Reviewed-by: Matthew Brost Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230626212221.136640-1-paulo.r.zanoni@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_engine.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c index 097a1ea06002..f1b8b22e0216 100644 --- a/drivers/gpu/drm/xe/xe_engine.c +++ b/drivers/gpu/drm/xe/xe_engine.c @@ -522,7 +522,7 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data, struct xe_engine *e = NULL; u32 logical_mask; u32 id; - int len; + u32 len; int err; if (XE_IOCTL_ERR(xe, args->flags) || -- cgit v1.2.3 From 2e60442a4fef935c76cd70858775b92f565642cc Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Mon, 26 Jun 2023 14:22:21 -0700 Subject: drm/xe: properly check bounds for xe_wait_user_fence_ioctl() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If !no_engines, then we use copy_from_user to copy to the 'eci' array, which has XE_HW_ENGINE_MAX_INSTANCE members. The amount of members copied is given by the user in args->num_engines, so add code to check that args->num_engines does not exceed XE_HW_ENGINE_MAX_INSTANCE. It's an unsigned value so there's no need to check for negative values. Fixes error messages such as: Buffer overflow detected (54 < 18446744073709551520)! Reviewed-by: José Roberto de Souza Signed-off-by: Paulo Zanoni Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230626212221.136640-2-paulo.r.zanoni@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_wait_user_fence.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c index 3122374341d6..098e2a4cff3f 100644 --- a/drivers/gpu/drm/xe/xe_wait_user_fence.c +++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c @@ -121,6 +121,9 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data, addr & 0x7)) return -EINVAL; + if (XE_IOCTL_ERR(xe, args->num_engines > XE_HW_ENGINE_MAX_INSTANCE)) + return -EINVAL; + if (!no_engines) { err = copy_from_user(eci, user_eci, sizeof(struct drm_xe_engine_class_instance) * -- cgit v1.2.3 From 5572a004685770f8daad7661c5494b65148ede9f Mon Sep 17 00:00:00 2001 From: Zbigniew Kempczyński Date: Wed, 28 Jun 2023 07:51:41 +0200 Subject: drm/xe: Use nanoseconds instead of jiffies in uapi for user fence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using jiffies as a timeout from userspace is weird even if theoretically exists possiblity of acquiring jiffies via getconf. Unfortunately this method is unreliable and the returned value may vary from the one configured in the kernel config. Now timeout is expressed in nanoseconds and its interpretation depends on setting DRM_XE_UFENCE_WAIT_ABSTIME flag. Relative timeout (flag is not set) means fence expire at now() + timeout. Absolute timeout (flag is set) means that the fence expires at exact point of time. Passing negative timeout means we will wait "forever" by setting wait time to MAX_SCHEDULE_TIMEOUT. Cc: Andi Shyti Reviewed-by: Andi Shyti Link: https://lore.kernel.org/r/20230628055141.398036-2-zbigniew.kempczynski@intel.com Signed-off-by: Zbigniew Kempczyński Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_wait_user_fence.c | 47 ++++++++++++++++++++++++++------- include/uapi/drm/xe_drm.h | 16 +++++++++-- 2 files changed, 51 insertions(+), 12 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c index 098e2a4cff3f..c4420c0dbf9c 100644 --- a/drivers/gpu/drm/xe/xe_wait_user_fence.c +++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c @@ -7,6 +7,7 @@ #include #include +#include #include #include "xe_device.h" @@ -84,6 +85,21 @@ static int check_hw_engines(struct xe_device *xe, DRM_XE_UFENCE_WAIT_VM_ERROR) #define MAX_OP DRM_XE_UFENCE_WAIT_LTE +static unsigned long to_jiffies_timeout(struct drm_xe_wait_user_fence *args) +{ + unsigned long timeout; + + if (args->flags & DRM_XE_UFENCE_WAIT_ABSTIME) + return drm_timeout_abs_to_jiffies(args->timeout); + + if (args->timeout == MAX_SCHEDULE_TIMEOUT || args->timeout == 0) + return args->timeout; + + timeout = nsecs_to_jiffies(args->timeout); + + return timeout ?: 1; +} + int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { @@ -98,7 +114,8 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data, int err; bool no_engines = args->flags & DRM_XE_UFENCE_WAIT_SOFT_OP || args->flags & DRM_XE_UFENCE_WAIT_VM_ERROR; - unsigned long timeout = args->timeout; + unsigned long timeout; + ktime_t start; if (XE_IOCTL_ERR(xe, args->extensions) || XE_IOCTL_ERR(xe, args->pad) || XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1])) @@ -152,8 +169,18 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data, addr = vm->async_ops.error_capture.addr; } - if (XE_IOCTL_ERR(xe, timeout > MAX_SCHEDULE_TIMEOUT)) - return -EINVAL; + /* + * For negative timeout we want to wait "forever" by setting + * MAX_SCHEDULE_TIMEOUT. But we have to assign this value also + * to args->timeout to avoid being zeroed on the signal delivery + * (see arithmetics after wait). + */ + if (args->timeout < 0) + args->timeout = MAX_SCHEDULE_TIMEOUT; + + timeout = to_jiffies_timeout(args); + + start = ktime_get(); /* * FIXME: Very simple implementation at the moment, single wait queue @@ -192,17 +219,17 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data, } else { remove_wait_queue(&xe->ufence_wq, &w_wait); } + + if (!(args->flags & DRM_XE_UFENCE_WAIT_ABSTIME)) { + args->timeout -= ktime_to_ns(ktime_sub(ktime_get(), start)); + if (args->timeout < 0) + args->timeout = 0; + } + if (XE_IOCTL_ERR(xe, err < 0)) return err; else if (XE_IOCTL_ERR(xe, !timeout)) return -ETIME; - /* - * Again very simple, return the time in jiffies that has past, may need - * a more precision - */ - if (args->flags & DRM_XE_UFENCE_WAIT_ABSTIME) - args->timeout = args->timeout - timeout; - return 0; } diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 8e7be1551333..347351a8f618 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -904,8 +904,20 @@ struct drm_xe_wait_user_fence { #define DRM_XE_UFENCE_WAIT_U64 0xffffffffffffffffu /** @mask: comparison mask */ __u64 mask; - - /** @timeout: how long to wait before bailing, value in jiffies */ + /** + * @timeout: how long to wait before bailing, value in nanoseconds. + * Without DRM_XE_UFENCE_WAIT_ABSTIME flag set (relative timeout) + * it contains timeout expressed in nanoseconds to wait (fence will + * expire at now() + timeout). + * When DRM_XE_UFENCE_WAIT_ABSTIME flat is set (absolute timeout) wait + * will end at timeout (uses system MONOTONIC_CLOCK). + * Passing negative timeout leads to neverending wait. + * + * On relative timeout this value is updated with timeout left + * (for restarting the call in case of signal delivery). + * On absolute timeout this value stays intact (restarted call still + * expire at the same point of time). + */ __s64 timeout; /** -- cgit v1.2.3 From e5a845fd8fa4ce61a99c87f37b63530fa4995750 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Wed, 28 Jun 2023 11:08:35 +0530 Subject: drm/xe: Add sysfs entry for tile We have recently introduced tile for each gpu, so lets add sysfs entry per tile for userspace to provide required information specific to tile. V5: - define ktype as const V4: - Reorder headers - Aravind V3: - Make API to return void and add drm_warn - Aravind V2: - Add logs in failure path Reviewed-by: Aravind Iddamsetty Signed-off-by: Tejas Upadhyay Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/xe_device_types.h | 3 ++ drivers/gpu/drm/xe/xe_tile.c | 3 ++ drivers/gpu/drm/xe/xe_tile.h | 2 ++ drivers/gpu/drm/xe/xe_tile_sysfs.c | 59 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_tile_sysfs.h | 19 ++++++++++ drivers/gpu/drm/xe/xe_tile_sysfs_types.h | 27 +++++++++++++++ 7 files changed, 114 insertions(+) create mode 100644 drivers/gpu/drm/xe/xe_tile_sysfs.c create mode 100644 drivers/gpu/drm/xe/xe_tile_sysfs.h create mode 100644 drivers/gpu/drm/xe/xe_tile_sysfs_types.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 8d6d3c070fc8..3ade82cf244e 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -103,6 +103,7 @@ xe-y += xe_bb.o \ xe_step.o \ xe_sync.o \ xe_tile.o \ + xe_tile_sysfs.o \ xe_trace.o \ xe_ttm_sys_mgr.o \ xe_ttm_stolen_mgr.o \ diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 3b50134cdcc0..5517229bb505 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -144,6 +144,9 @@ struct xe_tile { /** @migrate: Migration helper for vram blits and clearing */ struct xe_migrate *migrate; + + /** @sysfs: sysfs' kobj used by xe_tile_sysfs */ + struct kobject *sysfs; }; /** diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c index 6414aa810355..ac70486d09c3 100644 --- a/drivers/gpu/drm/xe/xe_tile.c +++ b/drivers/gpu/drm/xe/xe_tile.c @@ -10,6 +10,7 @@ #include "xe_migrate.h" #include "xe_sa.h" #include "xe_tile.h" +#include "xe_tile_sysfs.h" #include "xe_ttm_vram_mgr.h" /** @@ -142,6 +143,8 @@ int xe_tile_init_noalloc(struct xe_tile *tile) if (IS_ERR(tile->mem.kernel_bb_pool)) err = PTR_ERR(tile->mem.kernel_bb_pool); + xe_tile_sysfs_init(tile); + err_mem_access: xe_device_mem_access_put(tile_to_xe(tile)); return err; diff --git a/drivers/gpu/drm/xe/xe_tile.h b/drivers/gpu/drm/xe/xe_tile.h index 33bf41292195..782c47f8bd45 100644 --- a/drivers/gpu/drm/xe/xe_tile.h +++ b/drivers/gpu/drm/xe/xe_tile.h @@ -6,6 +6,8 @@ #ifndef _XE_TILE_H_ #define _XE_TILE_H_ +#include "xe_device_types.h" + struct xe_tile; int xe_tile_alloc(struct xe_tile *tile); diff --git a/drivers/gpu/drm/xe/xe_tile_sysfs.c b/drivers/gpu/drm/xe/xe_tile_sysfs.c new file mode 100644 index 000000000000..2d64fa54b5a8 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tile_sysfs.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include +#include +#include + +#include "xe_tile.h" +#include "xe_tile_sysfs.h" + +static void xe_tile_sysfs_kobj_release(struct kobject *kobj) +{ + kfree(kobj); +} + +static const struct kobj_type xe_tile_sysfs_kobj_type = { + .release = xe_tile_sysfs_kobj_release, + .sysfs_ops = &kobj_sysfs_ops, +}; + +static void tile_sysfs_fini(struct drm_device *drm, void *arg) +{ + struct xe_tile *tile = arg; + + kobject_put(tile->sysfs); +} + +void xe_tile_sysfs_init(struct xe_tile *tile) +{ + struct xe_device *xe = tile_to_xe(tile); + struct device *dev = xe->drm.dev; + struct kobj_tile *kt; + int err; + + kt = kzalloc(sizeof(*kt), GFP_KERNEL); + if (!kt) + return; + + kobject_init(&kt->base, &xe_tile_sysfs_kobj_type); + kt->tile = tile; + + err = kobject_add(&kt->base, &dev->kobj, "tile%d", tile->id); + if (err) { + kobject_put(&kt->base); + drm_warn(&xe->drm, "failed to register TILE sysfs directory, err: %d\n", err); + return; + } + + tile->sysfs = &kt->base; + + err = drmm_add_action_or_reset(&xe->drm, tile_sysfs_fini, tile); + if (err) { + drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n", + __func__, err); + return; + } +} diff --git a/drivers/gpu/drm/xe/xe_tile_sysfs.h b/drivers/gpu/drm/xe/xe_tile_sysfs.h new file mode 100644 index 000000000000..e4f065039eba --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tile_sysfs.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_TILE_SYSFS_H_ +#define _XE_TILE_SYSFS_H_ + +#include "xe_tile_sysfs_types.h" + +void xe_tile_sysfs_init(struct xe_tile *tile); + +static inline struct xe_tile * +kobj_to_tile(struct kobject *kobj) +{ + return container_of(kobj, struct kobj_tile, base)->tile; +} + +#endif /* _XE_TILE_SYSFS_H_ */ diff --git a/drivers/gpu/drm/xe/xe_tile_sysfs_types.h b/drivers/gpu/drm/xe/xe_tile_sysfs_types.h new file mode 100644 index 000000000000..75906ba11a9e --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tile_sysfs_types.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_TILE_SYSFS_TYPES_H_ +#define _XE_TILE_SYSFS_TYPES_H_ + +#include + +struct xe_tile; + +/** + * struct kobj_tile - A tile's kobject struct that connects the kobject + * and the TILE + * + * When dealing with multiple TILEs, this struct helps to understand which + * TILE needs to be addressed on a given sysfs call. + */ +struct kobj_tile { + /** @base: The actual kobject */ + struct kobject base; + /** @tile: A pointer to the tile itself */ + struct xe_tile *tile; +}; + +#endif /* _XE_TILE_SYSFS_TYPES_H_ */ -- cgit v1.2.3 From 8c82f914a302e394e2a037241d84ca3af6577f97 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Tue, 6 Jun 2023 15:48:38 +0530 Subject: drm/xe: Add GTs under respective tile sysfs With the separation of xe_tile and xe_gt, We now consider a PCI device (xe_device) to contain one or more tiles (struct xe_tile). Each tile will contain one or more GTs (struct xe_gt). So lets align sysfs paths accordingly. TODO: Currently we have gt0 under tile0 and gt1 under tile1 on multi-tile. This GT indexing still under discussion, when it is concluded we need to revisit this change. Reviewed-by: Aravind Iddamsetty Signed-off-by: Tejas Upadhyay Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_sysfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs.c b/drivers/gpu/drm/xe/xe_gt_sysfs.c index c01cc689058c..13570987a756 100644 --- a/drivers/gpu/drm/xe/xe_gt_sysfs.c +++ b/drivers/gpu/drm/xe/xe_gt_sysfs.c @@ -31,7 +31,7 @@ static void gt_sysfs_fini(struct drm_device *drm, void *arg) int xe_gt_sysfs_init(struct xe_gt *gt) { - struct device *dev = gt_to_xe(gt)->drm.dev; + struct xe_tile *tile = gt_to_tile(gt); struct kobj_gt *kg; int err; @@ -42,7 +42,7 @@ int xe_gt_sysfs_init(struct xe_gt *gt) kobject_init(&kg->base, &xe_gt_sysfs_kobj_type); kg->gt = gt; - err = kobject_add(&kg->base, &dev->kobj, "gt%d", gt->info.id); + err = kobject_add(&kg->base, tile->sysfs, "gt%d", gt->info.id); if (err) { kobject_put(&kg->base); return err; -- cgit v1.2.3 From 9641df819772662429721f4b14141308fcf2d667 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Wed, 28 Jun 2023 11:36:16 +0530 Subject: drm/xe: Add sysfs entry to report per tile memory size Add sysfs entry to read per tile physical memory including stolen memory. V5: - rename var name and make it part of vram struct - Lucas V4: - %s/addr_range/physical_vram_size_byes, make it user readable name - Joonas/Aravind - Display in bytes - Joonas/Aravind V3: - Exclude DG1, replace sysfs_create_file/files - Aravind V2: - Use DEVICE_ATTR_RO - Aravind - Dont put kobj on sysfs_file_create fail - Himal - Skip addr_range sysfs create for non dgfx - Himal Reviewed-by: Aravind Iddamsetty Signed-off-by: Tejas Upadhyay Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device_types.h | 5 +++++ drivers/gpu/drm/xe/xe_mmio.c | 1 + drivers/gpu/drm/xe/xe_tile_sysfs.c | 19 +++++++++++++++++++ 3 files changed, 25 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 5517229bb505..c404d250e453 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -124,6 +124,11 @@ struct xe_tile { resource_size_t base; /** @size: size of VRAM. */ resource_size_t size; + /** + * @actual_physical_size: Actual VRAM size + * including stolen mem for tile + */ + resource_size_t actual_physical_size; /** @mapping: pointer to VRAM mappable space */ void *__iomem mapping; } vram; diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index f1336803b915..7d92258cd35d 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -277,6 +277,7 @@ int xe_mmio_probe_vram(struct xe_device *xe) if (err) return err; + tile->mem.vram.actual_physical_size = tile_size; tile->mem.vram.io_start = xe->mem.vram.io_start + tile_offset; tile->mem.vram.io_size = min_t(u64, vram_size, io_size); diff --git a/drivers/gpu/drm/xe/xe_tile_sysfs.c b/drivers/gpu/drm/xe/xe_tile_sysfs.c index 2d64fa54b5a8..16376607c68f 100644 --- a/drivers/gpu/drm/xe/xe_tile_sysfs.c +++ b/drivers/gpu/drm/xe/xe_tile_sysfs.c @@ -20,6 +20,20 @@ static const struct kobj_type xe_tile_sysfs_kobj_type = { .sysfs_ops = &kobj_sysfs_ops, }; +static ssize_t +physical_vram_size_bytes_show(struct device *kdev, struct device_attribute *attr, + char *buf) +{ + struct xe_tile *tile = kobj_to_tile(&kdev->kobj); + + return sysfs_emit(buf, "%llu\n", tile->mem.vram.actual_physical_size); +} + +static DEVICE_ATTR_RO(physical_vram_size_bytes); + +static const struct attribute *physical_memsize_attr = + &dev_attr_physical_vram_size_bytes.attr; + static void tile_sysfs_fini(struct drm_device *drm, void *arg) { struct xe_tile *tile = arg; @@ -50,6 +64,11 @@ void xe_tile_sysfs_init(struct xe_tile *tile) tile->sysfs = &kt->base; + if (IS_DGFX(xe) && xe->info.platform != XE_DG1 && + sysfs_create_file(tile->sysfs, physical_memsize_attr)) + drm_warn(&xe->drm, + "Sysfs creation to read addr_range per tile failed\n"); + err = drmm_add_action_or_reset(&xe->drm, tile_sysfs_fini, tile); if (err) { drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n", -- cgit v1.2.3 From e4b2893c17048aecb195553b60631fcb07360c4e Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Wed, 28 Jun 2023 11:53:16 +0530 Subject: drm/xe: Make usable size of VRAM readable Current size member of vram struct does not give complete information as what "size" contains. Does it contain reserved portions or not. Name it usable size and accordingly describe other size members as well. Reviewed-by: Matthew Brost Reviewed-by: Lucas De Marchi Signed-off-by: Tejas Upadhyay Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 2 +- drivers/gpu/drm/xe/xe_device_types.h | 13 ++++++++++--- drivers/gpu/drm/xe/xe_mmio.c | 6 +++--- drivers/gpu/drm/xe/xe_query.c | 2 +- drivers/gpu/drm/xe/xe_tile.c | 2 +- drivers/gpu/drm/xe/xe_ttm_vram_mgr.c | 3 ++- 6 files changed, 18 insertions(+), 10 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 17c0c6c2ae65..86947a6fcc7c 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -125,7 +125,7 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo, { struct xe_tile *tile = mem_type_to_tile(xe, mem_type); - XE_BUG_ON(!tile->mem.vram.size); + XE_BUG_ON(!tile->mem.vram.usable_size); places[*c] = (struct ttm_place) { .mem_type = mem_type, diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index c404d250e453..db08d64abce1 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -122,11 +122,18 @@ struct xe_tile { resource_size_t io_size; /** @base: offset of VRAM starting base */ resource_size_t base; - /** @size: size of VRAM. */ - resource_size_t size; + /** + * @usable_size: usable size of VRAM + * + * Usable size of VRAM excluding reserved portions + * (e.g stolen mem) + */ + resource_size_t usable_size; /** * @actual_physical_size: Actual VRAM size - * including stolen mem for tile + * + * Actual VRAM size including reserved portions + * (e.g stolen mem) */ resource_size_t actual_physical_size; /** @mapping: pointer to VRAM mappable space */ diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 7d92258cd35d..5effb21db9d4 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -289,13 +289,13 @@ int xe_mmio_probe_vram(struct xe_device *xe) tile->mem.vram.base = tile_offset; /* small bar can limit the visible size. size accordingly */ - tile->mem.vram.size = min_t(u64, vram_size, io_size); + tile->mem.vram.usable_size = min_t(u64, vram_size, io_size); tile->mem.vram.mapping = xe->mem.vram.mapping + tile_offset; drm_info(&xe->drm, "VRAM[%u, %u]: %pa, %pa\n", id, tile->id, - &tile->mem.vram.io_start, &tile->mem.vram.size); + &tile->mem.vram.io_start, &tile->mem.vram.usable_size); - if (tile->mem.vram.io_size < tile->mem.vram.size) + if (tile->mem.vram.io_size < tile->mem.vram.usable_size) drm_info(&xe->drm, "VRAM[%u, %u]: CPU access limited to %pa\n", id, tile->id, &tile->mem.vram.io_size); diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 15e171ca7e62..9acbb27dfcab 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -188,7 +188,7 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query) config->num_params = num_params; config->info[XE_QUERY_CONFIG_REV_AND_DEVICE_ID] = xe->info.devid | (xe->info.revid << 16); - if (xe_device_get_root_tile(xe)->mem.vram.size) + if (xe_device_get_root_tile(xe)->mem.vram.usable_size) config->info[XE_QUERY_CONFIG_FLAGS] = XE_QUERY_CONFIG_FLAGS_HAS_VRAM; if (xe->info.enable_guc) diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c index ac70486d09c3..e0bc2b60ab09 100644 --- a/drivers/gpu/drm/xe/xe_tile.c +++ b/drivers/gpu/drm/xe/xe_tile.c @@ -101,7 +101,7 @@ static int tile_ttm_mgr_init(struct xe_tile *tile) struct xe_device *xe = tile_to_xe(tile); int err; - if (tile->mem.vram.size) { + if (tile->mem.vram.usable_size) { err = xe_ttm_vram_mgr_init(tile, tile->mem.vram_mgr); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c index 1a84abd35fcf..a10fd0366da3 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c @@ -360,7 +360,8 @@ int xe_ttm_vram_mgr_init(struct xe_tile *tile, struct xe_ttm_vram_mgr *mgr) mgr->tile = tile; return __xe_ttm_vram_mgr_init(xe, mgr, XE_PL_VRAM0 + tile->id, - tile->mem.vram.size, tile->mem.vram.io_size, + tile->mem.vram.usable_size, + tile->mem.vram.io_size, PAGE_SIZE); } -- cgit v1.2.3 From 413343584725f1fab9c4c676504cf6478dc3281b Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 14 Jun 2023 13:51:59 -0700 Subject: drm/xe: Return GMD_ID revid properly peek_gmdid() returns the IP version, not the raw value of the GMD_ID register. Make sure we extract and return the rev_id field as well so that it can be used to determine the IP steppings properly. Reviewed-by: Lucas De Marchi Reviewed-by: Gustavo Sousa Link: https://lore.kernel.org/r/20230614205202.3376752-2-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pci.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 96f1ee1ea17f..5c0c2e9b901e 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -374,23 +374,27 @@ find_subplatform(const struct xe_device *xe, const struct xe_device_desc *desc) return NULL; } -static u32 peek_gmdid(struct xe_device *xe, u32 gmdid_offset) +static void peek_gmdid(struct xe_device *xe, u32 gmdid_offset, u32 *ver, u32 *revid) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); void __iomem *map = pci_iomap_range(pdev, 0, gmdid_offset, sizeof(u32)); - u32 ver; + u32 val; if (!map) { drm_err(&xe->drm, "Failed to read GMD_ID (%#x) from PCI BAR.\n", gmdid_offset); - return 0; + *ver = 0; + *revid = 0; + + return; } - ver = ioread32(map); + val = ioread32(map); pci_iounmap(pdev, map); - return REG_FIELD_GET(GMD_ID_ARCH_MASK, ver) * 100 + - REG_FIELD_GET(GMD_ID_RELEASE_MASK, ver); + *ver = REG_FIELD_GET(GMD_ID_ARCH_MASK, val) * 100 + + REG_FIELD_GET(GMD_ID_RELEASE_MASK, val); + *revid = REG_FIELD_GET(GMD_ID_REVID, val); } /* @@ -426,8 +430,7 @@ static void handle_gmdid(struct xe_device *xe, { u32 ver; - ver = peek_gmdid(xe, GMD_ID.addr); - *graphics_revid = REG_FIELD_GET(GMD_ID_REVID, ver); + peek_gmdid(xe, GMD_ID.addr, &ver, graphics_revid); for (int i = 0; i < ARRAY_SIZE(graphics_ip_map); i++) { if (ver == graphics_ip_map[i].ver) { xe->info.graphics_verx100 = ver; @@ -442,8 +445,8 @@ static void handle_gmdid(struct xe_device *xe, ver / 100, ver % 100); } - ver = peek_gmdid(xe, GMD_ID.addr + 0x380000); - *media_revid = REG_FIELD_GET(GMD_ID_REVID, ver); + peek_gmdid(xe, GMD_ID.addr + 0x380000, &ver, media_revid); + for (int i = 0; i < ARRAY_SIZE(media_ip_map); i++) { if (ver == media_ip_map[i].ver) { xe->info.media_verx100 = ver; -- cgit v1.2.3 From 54c5b74a06939bec61aa59421aa1073c0b666c2c Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 14 Jun 2023 13:52:00 -0700 Subject: drm/xe: Don't raise error on fused-off media It's legitimate for the media GMD_ID register to read back as 0x0 if media functionality is fused off or otherwise not present on the platform. Avoid printing an "unknown media version" error message for this case. Reviewed-by: Gustavo Sousa Link: https://lore.kernel.org/r/20230614205202.3376752-3-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pci.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 5c0c2e9b901e..749826548b4a 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -447,6 +447,10 @@ static void handle_gmdid(struct xe_device *xe, peek_gmdid(xe, GMD_ID.addr + 0x380000, &ver, media_revid); + /* Media may legitimately be fused off / not present */ + if (ver == 0) + return; + for (int i = 0; i < ARRAY_SIZE(media_ip_map); i++) { if (ver == media_ip_map[i].ver) { xe->info.media_verx100 = ver; -- cgit v1.2.3 From 98b6d092341128f753cff64b1bceda69c718b6af Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 14 Jun 2023 13:52:01 -0700 Subject: drm/xe: Print proper revid value for unknown media revision If the GMD_ID register reports a higher media revision ID than we're expecting, print the media revid, not the graphics revid, in the debug message. Reviewed-by: Gustavo Sousa Link: https://lore.kernel.org/r/20230614205202.3376752-4-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_step.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_step.c b/drivers/gpu/drm/xe/xe_step.c index 1baf79ba02ad..371cac951e0f 100644 --- a/drivers/gpu/drm/xe/xe_step.c +++ b/drivers/gpu/drm/xe/xe_step.c @@ -235,7 +235,7 @@ struct xe_step_info xe_step_gmdid_get(struct xe_device *xe, if (step.media >= STEP_FUTURE) { step.media = STEP_FUTURE; drm_dbg(&xe->drm, "Media GMD_ID revid value %d treated as future stepping\n", - graphics_gmdid_revid); + media_gmdid_revid); } return step; -- cgit v1.2.3 From c0ab10ee2ee6a2c423f95154e0842a1b19a4c13b Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 14 Jun 2023 13:52:02 -0700 Subject: drm/xe: Enable PCI device earlier Newer Intel platforms require that inspect the contents of the GMD_ID registers very early in the driver initialization process to determine the IP version (and proper init sequences), of the platform. Move the general PCI device setup and enablement slightly earlier, before we start trying to peek at the GMD_ID registers. Reviewed-by: Gustavo Sousa Link: https://lore.kernel.org/r/20230614205202.3376752-5-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pci.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 749826548b4a..e130ffe3ab55 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -627,10 +627,20 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) xe_pm_assert_unbounded_bridge(xe); subplatform_desc = find_subplatform(xe, desc); - err = xe_info_init(xe, desc, subplatform_desc); + pci_set_drvdata(pdev, xe); + err = pci_enable_device(pdev); if (err) goto err_drm_put; + pci_set_master(pdev); + + if (pci_enable_msi(pdev) < 0) + drm_dbg(&xe->drm, "can't enable MSI"); + + err = xe_info_init(xe, desc, subplatform_desc); + if (err) + goto err_pci_disable; + drm_dbg(&xe->drm, "%s %s %04x:%04x dgfx:%d gfx:%s (%d.%02d) media:%s (%d.%02d) dma_m_s:%d tc:%d", desc->platform_name, subplatform_desc ? subplatform_desc->name : "", @@ -650,16 +660,6 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) xe_step_name(xe->info.step.display), xe_step_name(xe->info.step.basedie)); - pci_set_drvdata(pdev, xe); - err = pci_enable_device(pdev); - if (err) - goto err_drm_put; - - pci_set_master(pdev); - - if (pci_enable_msi(pdev) < 0) - drm_dbg(&xe->drm, "can't enable MSI"); - err = xe_device_probe(xe); if (err) goto err_pci_disable; -- cgit v1.2.3 From b747411964cd9011e05f4b9f5624be9ed71532c4 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Thu, 29 Jun 2023 22:51:33 +0200 Subject: drm/xe: Make page-table updates using the default engine happen in order MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the default engine m->eng was used, there is no check for idle and a cpu page-table update may thus happen in parallel with a gpu one. Don't allow CPU page-table updates with the default engine until the engine is idle. Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20230629205134.111849-2-thomas.hellstrom@linux.intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_migrate.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index a62bd7ec8a42..be98690f2bc9 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -1160,9 +1160,10 @@ xe_migrate_update_pgtables(struct xe_migrate *m, int err = 0; bool usm = !eng && xe->info.supports_usm; bool first_munmap_rebind = vma && vma->first_munmap_rebind; + struct xe_engine *eng_override = !eng ? m->eng : eng; /* Use the CPU if no in syncs and engine is idle */ - if (no_in_syncs(syncs, num_syncs) && (!eng || xe_engine_is_idle(eng))) { + if (no_in_syncs(syncs, num_syncs) && xe_engine_is_idle(eng_override)) { fence = xe_migrate_update_pgtables_cpu(m, vm, bo, updates, num_updates, first_munmap_rebind, -- cgit v1.2.3 From 44869c72e847e015649ffd4366df88fe529826bb Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 4 Jul 2023 18:32:41 +0300 Subject: drm/xe/mmio: add xe_mmio_read16() Little by little, make stuff feature complete. Signed-off-by: Jani Nikula Reviewed-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_mmio.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h index 3c547d78afba..4953a9a3f1fb 100644 --- a/drivers/gpu/drm/xe/xe_mmio.h +++ b/drivers/gpu/drm/xe/xe_mmio.h @@ -31,6 +31,16 @@ static inline u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg) return readb(tile->mmio.regs + reg.addr); } +static inline u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg) +{ + struct xe_tile *tile = gt_to_tile(gt); + + if (reg.addr < gt->mmio.adj_limit) + reg.addr += gt->mmio.adj_offset; + + return readw(tile->mmio.regs + reg.addr); +} + static inline void xe_mmio_write32(struct xe_gt *gt, struct xe_reg reg, u32 val) { -- cgit v1.2.3 From c7fac450dd865d2ad3400a1df0e8655df75a465f Mon Sep 17 00:00:00 2001 From: Alan Previn Date: Fri, 2 Jun 2023 11:16:50 -0700 Subject: drm/xe/guc: Fix h2g_write usage of GUC_CTB_MSG_MAX_LEN In the ABI header, GUC_CTB_MSG_MIN_LEN is '1' because GUC_CTB_HDR_LEN is 1. This aligns with H2G/G2H CTB specification where all command formats are defined in units of dwords so that '1' is a dword. Accordingly, GUC_CTB_MSG_MAX_LEN is 256-1 (i.e. 255 dwords). However, h2g_write was incorrectly assuming that GUC_CTB_MSG_MAX_LEN was in bytes. Fix this. v3: Fix nit on #define location.(Matt) v2: By correctly treating GUC_CTB_MSG_MAX_LEN as dwords, it causes a local array to consume 4x the stack size. Rework the function to avoid consuming stack even if the action size is large. (Matt) Signed-off-by: Alan Previn Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_ct.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 22bc9ce846db..e71d069158dc 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -396,24 +396,27 @@ static void g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len) spin_unlock_irq(&ct->fast_lock); } +#define H2G_CT_HEADERS (GUC_CTB_HDR_LEN + 1) /* one DW CTB header and one DW HxG header */ + static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len, u32 ct_fence_value, bool want_response) { struct xe_device *xe = ct_to_xe(ct); struct guc_ctb *h2g = &ct->ctbs.h2g; - u32 cmd[GUC_CTB_MSG_MAX_LEN / sizeof(u32)]; - u32 cmd_len = len + GUC_CTB_HDR_LEN; - u32 cmd_idx = 0, i; + u32 cmd[H2G_CT_HEADERS]; u32 tail = h2g->info.tail; + u32 full_len; struct iosys_map map = IOSYS_MAP_INIT_OFFSET(&h2g->cmds, tail * sizeof(u32)); + full_len = len + GUC_CTB_HDR_LEN; + lockdep_assert_held(&ct->lock); - XE_BUG_ON(len * sizeof(u32) > GUC_CTB_MSG_MAX_LEN); + XE_BUG_ON(full_len > (GUC_CTB_MSG_MAX_LEN - GUC_CTB_HDR_LEN)); XE_BUG_ON(tail > h2g->info.size); /* Command will wrap, zero fill (NOPs), return and check credits again */ - if (tail + cmd_len > h2g->info.size) { + if (tail + full_len > h2g->info.size) { xe_map_memset(xe, &map, 0, 0, (h2g->info.size - tail) * sizeof(u32)); h2g_reserve_space(ct, (h2g->info.size - tail)); @@ -428,30 +431,33 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len, * dw1: HXG header (including action code) * dw2+: action data */ - cmd[cmd_idx++] = FIELD_PREP(GUC_CTB_MSG_0_FORMAT, GUC_CTB_FORMAT_HXG) | + cmd[0] = FIELD_PREP(GUC_CTB_MSG_0_FORMAT, GUC_CTB_FORMAT_HXG) | FIELD_PREP(GUC_CTB_MSG_0_NUM_DWORDS, len) | FIELD_PREP(GUC_CTB_MSG_0_FENCE, ct_fence_value); if (want_response) { - cmd[cmd_idx++] = + cmd[1] = FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION | GUC_HXG_EVENT_MSG_0_DATA0, action[0]); } else { - cmd[cmd_idx++] = + cmd[1] = FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_EVENT) | FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION | GUC_HXG_EVENT_MSG_0_DATA0, action[0]); } - for (i = 1; i < len; ++i) - cmd[cmd_idx++] = action[i]; + + /* H2G header in cmd[1] replaces action[0] so: */ + --len; + ++action; /* Write H2G ensuring visable before descriptor update */ - xe_map_memcpy_to(xe, &map, 0, cmd, cmd_len * sizeof(u32)); + xe_map_memcpy_to(xe, &map, 0, cmd, H2G_CT_HEADERS * sizeof(u32)); + xe_map_memcpy_to(xe, &map, H2G_CT_HEADERS * sizeof(u32), action, len * sizeof(u32)); xe_device_wmb(ct_to_xe(ct)); /* Update local copies */ - h2g->info.tail = (tail + cmd_len) % h2g->info.size; - h2g_reserve_space(ct, cmd_len); + h2g->info.tail = (tail + full_len) % h2g->info.size; + h2g_reserve_space(ct, full_len); /* Update descriptor */ desc_write(xe, h2g, tail, h2g->info.tail); -- cgit v1.2.3 From 43e82fb9ecf0009aeb95e284067a9a24a55a93ed Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Wed, 5 Jul 2023 14:06:33 +0530 Subject: drm/xe: make GT sysfs init return void Currently return from xe_gt_sysfs_init() is ignored and also a failure in xe_gt_sysfs_init() isn't fatal so make it return void. V2 : - add drm_warn in error paths - Himal - Edit commit message - Nirmoy Acked-by: Ashutosh Dixit Reviewed-by: Himal Prasad Ghimiray Reviewed-by: Nirmoy Das Signed-off-by: Tejas Upadhyay Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_sysfs.c | 23 ++++++++++++++--------- drivers/gpu/drm/xe/xe_gt_sysfs.h | 2 +- 2 files changed, 15 insertions(+), 10 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs.c b/drivers/gpu/drm/xe/xe_gt_sysfs.c index 13570987a756..cdfe5995259b 100644 --- a/drivers/gpu/drm/xe/xe_gt_sysfs.c +++ b/drivers/gpu/drm/xe/xe_gt_sysfs.c @@ -29,30 +29,35 @@ static void gt_sysfs_fini(struct drm_device *drm, void *arg) kobject_put(gt->sysfs); } -int xe_gt_sysfs_init(struct xe_gt *gt) +void xe_gt_sysfs_init(struct xe_gt *gt) { struct xe_tile *tile = gt_to_tile(gt); + struct xe_device *xe = gt_to_xe(gt); struct kobj_gt *kg; int err; kg = kzalloc(sizeof(*kg), GFP_KERNEL); - if (!kg) - return -ENOMEM; + if (!kg) { + drm_warn(&xe->drm, "Allocating kobject failed.\n"); + return; + } kobject_init(&kg->base, &xe_gt_sysfs_kobj_type); kg->gt = gt; err = kobject_add(&kg->base, tile->sysfs, "gt%d", gt->info.id); if (err) { + drm_warn(&xe->drm, "failed to add GT sysfs directory, err: %d\n", err); kobject_put(&kg->base); - return err; + return; } gt->sysfs = &kg->base; - err = drmm_add_action_or_reset(>_to_xe(gt)->drm, gt_sysfs_fini, gt); - if (err) - return err; - - return 0; + err = drmm_add_action_or_reset(&xe->drm, gt_sysfs_fini, gt); + if (err) { + drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n", + __func__, err); + return; + } } diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs.h b/drivers/gpu/drm/xe/xe_gt_sysfs.h index ecbfcc5c7d42..e3ec278ca0be 100644 --- a/drivers/gpu/drm/xe/xe_gt_sysfs.h +++ b/drivers/gpu/drm/xe/xe_gt_sysfs.h @@ -8,7 +8,7 @@ #include "xe_gt_sysfs_types.h" -int xe_gt_sysfs_init(struct xe_gt *gt); +void xe_gt_sysfs_init(struct xe_gt *gt); static inline struct xe_gt * kobj_to_gt(struct kobject *kobj) -- cgit v1.2.3 From 55d8ac9631aaa8ae3794341c52009f635a0d3188 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Mon, 3 Jul 2023 14:36:10 +0530 Subject: drm/xe: make kobject type struct as constant Since commit ee6d3dd4ed48 ("driver core: make kobj_type constant.") the driver core allows the usage of const struct kobj_type. Take advantage of this to constify the structure definition to prevent modification at runtime. Reviewed-by: Nirmoy Das Signed-off-by: Tejas Upadhyay Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs.c b/drivers/gpu/drm/xe/xe_gt_sysfs.c index cdfe5995259b..b955940e8dc6 100644 --- a/drivers/gpu/drm/xe/xe_gt_sysfs.c +++ b/drivers/gpu/drm/xe/xe_gt_sysfs.c @@ -17,7 +17,7 @@ static void xe_gt_sysfs_kobj_release(struct kobject *kobj) kfree(kobj); } -static struct kobj_type xe_gt_sysfs_kobj_type = { +static const struct kobj_type xe_gt_sysfs_kobj_type = { .release = xe_gt_sysfs_kobj_release, .sysfs_ops = &kobj_sysfs_ops, }; -- cgit v1.2.3 From 54c9fb7e64fd3f0da1570e3d1c5446605e83210e Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 10 Jul 2023 07:41:21 -0700 Subject: drm/xe: Use internal VM flags in xe_vm_create xe_vm_create used the IOCTL create flags in a few places rather than the internal VM flags and this just happened to work as these values matched. This is risky (and incorrect) as the internal flag values are free to change. Fix this and use the internal VM flag values. Signed-off-by: Matthew Brost Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index bd143acbde0e..a9cf62f7aac6 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1253,13 +1253,13 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) vm->batch_invalidate_tlb = true; } - if (flags & DRM_XE_VM_CREATE_COMPUTE_MODE) { + if (flags & XE_VM_FLAG_COMPUTE_MODE) { INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); vm->flags |= XE_VM_FLAG_COMPUTE_MODE; vm->batch_invalidate_tlb = false; } - if (flags & DRM_XE_VM_CREATE_ASYNC_BIND_OPS) { + if (flags & XE_VM_FLAG_ASYNC_BIND_OPS) { vm->async_ops.fence.context = dma_fence_context_alloc(1); vm->flags |= XE_VM_FLAG_ASYNC_BIND_OPS; } -- cgit v1.2.3 From 9d858b69b0cfb56dd67943138c10d84eeb73380f Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 22 Jun 2023 12:39:48 -0700 Subject: drm/xe: Ban a VM if rebind worker hits an error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We cannot recover a VM if a rebind worker hits an error, ban the VM if happens to ensure we do not attempt to place this VM on the hardware again. A follow up will inform the user if this happens. v2: Return -ECANCELED in exec VM closed or banned, check for closed or banned within VM lock. v3: Fix lockdep splat by looking engine outside of vm->lock v4: Fix error path when engine lookup fails v5: Add debug message in rebind worker on error, update comments wrt locking, add xe_vm_close helper Reviewed-by: Thomas Hellström Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_engine.c | 13 +++++ drivers/gpu/drm/xe/xe_exec.c | 6 +-- drivers/gpu/drm/xe/xe_trace.h | 5 ++ drivers/gpu/drm/xe/xe_vm.c | 104 +++++++++++++++++++++++-------------- drivers/gpu/drm/xe/xe_vm.h | 13 ++++- drivers/gpu/drm/xe/xe_vm_madvise.c | 2 +- drivers/gpu/drm/xe/xe_vm_types.h | 10 ++-- 7 files changed, 107 insertions(+), 46 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c index f1b8b22e0216..af75c9a0ea7b 100644 --- a/drivers/gpu/drm/xe/xe_engine.c +++ b/drivers/gpu/drm/xe/xe_engine.c @@ -597,10 +597,23 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_ERR(xe, !vm)) return -ENOENT; + err = down_read_interruptible(&vm->lock); + if (err) { + xe_vm_put(vm); + return err; + } + + if (XE_IOCTL_ERR(xe, xe_vm_is_closed_or_banned(vm))) { + up_read(&vm->lock); + xe_vm_put(vm); + return -ENOENT; + } + e = xe_engine_create(xe, vm, logical_mask, args->width, hwe, xe_vm_no_dma_fences(vm) ? 0 : ENGINE_FLAG_PERSISTENT); + up_read(&vm->lock); xe_vm_put(vm); if (IS_ERR(e)) return PTR_ERR(e); diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index c52edff9a358..bdf00e59e7a4 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -297,9 +297,9 @@ retry: if (err) goto err_unlock_list; - if (xe_vm_is_closed(engine->vm)) { - drm_warn(&xe->drm, "Trying to schedule after vm is closed\n"); - err = -EIO; + if (xe_vm_is_closed_or_banned(engine->vm)) { + drm_warn(&xe->drm, "Trying to schedule after vm is closed or banned\n"); + err = -ECANCELED; goto err_engine_end; } diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index 8a5d35f15791..d5894570f196 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -478,6 +478,11 @@ DECLARE_EVENT_CLASS(xe_vm, __entry->asid) ); +DEFINE_EVENT(xe_vm, xe_vm_kill, + TP_PROTO(struct xe_vm *vm), + TP_ARGS(vm) +); + DEFINE_EVENT(xe_vm, xe_vm_create, TP_PROTO(struct xe_vm *vm), TP_ARGS(vm) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index a9cf62f7aac6..47e3d37b757c 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -514,6 +514,24 @@ void xe_vm_unlock_dma_resv(struct xe_vm *vm, #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000 +static void xe_vm_kill(struct xe_vm *vm) +{ + struct ww_acquire_ctx ww; + struct xe_engine *e; + + lockdep_assert_held(&vm->lock); + + xe_vm_lock(vm, &ww, 0, false); + vm->flags |= XE_VM_FLAG_BANNED; + trace_xe_vm_kill(vm); + + list_for_each_entry(e, &vm->preempt.engines, compute.link) + e->ops->kill(e); + xe_vm_unlock(vm, &ww); + + /* TODO: Inform user the VM is banned */ +} + static void preempt_rebind_work_func(struct work_struct *w) { struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work); @@ -533,13 +551,14 @@ static void preempt_rebind_work_func(struct work_struct *w) XE_BUG_ON(!xe_vm_in_compute_mode(vm)); trace_xe_vm_rebind_worker_enter(vm); - if (xe_vm_is_closed(vm)) { + down_write(&vm->lock); + + if (xe_vm_is_closed_or_banned(vm)) { + up_write(&vm->lock); trace_xe_vm_rebind_worker_exit(vm); return; } - down_write(&vm->lock); - retry: if (vm->async_ops.error) goto out_unlock_outer; @@ -666,11 +685,14 @@ out_unlock_outer: goto retry; } } + if (err) { + drm_warn(&vm->xe->drm, "VM worker error: %d\n", err); + xe_vm_kill(vm); + } up_write(&vm->lock); free_preempt_fences(&preempt_fences); - XE_WARN_ON(err < 0); /* TODO: Kill VM or put in error state */ trace_xe_vm_rebind_worker_exit(vm); } @@ -1140,11 +1162,12 @@ xe_vm_find_overlapping_vma(struct xe_vm *vm, const struct xe_vma *vma) { struct rb_node *node; - if (xe_vm_is_closed(vm)) + lockdep_assert_held(&vm->lock); + + if (xe_vm_is_closed_or_banned(vm)) return NULL; XE_BUG_ON(vma->end >= vm->size); - lockdep_assert_held(&vm->lock); node = rb_find(vma, &vm->vmas, xe_vma_cmp_vma_cb); @@ -1377,6 +1400,13 @@ mm_closed: wake_up_all(&vm->async_ops.error_capture.wq); } +static void xe_vm_close(struct xe_vm *vm) +{ + down_write(&vm->lock); + vm->size = 0; + up_write(&vm->lock); +} + void xe_vm_close_and_put(struct xe_vm *vm) { struct rb_root contested = RB_ROOT; @@ -1387,8 +1417,8 @@ void xe_vm_close_and_put(struct xe_vm *vm) XE_BUG_ON(vm->preempt.num_engines); - vm->size = 0; - smp_mb(); + xe_vm_close(vm); + flush_async_ops(vm); if (xe_vm_in_compute_mode(vm)) flush_work(&vm->preempt.rebind_work); @@ -3072,30 +3102,34 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) if (err) return err; - vm = xe_vm_lookup(xef, args->vm_id); - if (XE_IOCTL_ERR(xe, !vm)) { - err = -EINVAL; - goto free_objs; - } - - if (XE_IOCTL_ERR(xe, xe_vm_is_closed(vm))) { - drm_err(dev, "VM closed while we began looking up?\n"); - err = -ENOENT; - goto put_vm; - } - if (args->engine_id) { e = xe_engine_lookup(xef, args->engine_id); if (XE_IOCTL_ERR(xe, !e)) { err = -ENOENT; - goto put_vm; + goto free_objs; } + if (XE_IOCTL_ERR(xe, !(e->flags & ENGINE_FLAG_VM))) { err = -EINVAL; goto put_engine; } } + vm = xe_vm_lookup(xef, args->vm_id); + if (XE_IOCTL_ERR(xe, !vm)) { + err = -EINVAL; + goto put_engine; + } + + err = down_write_killable(&vm->lock); + if (err) + goto put_vm; + + if (XE_IOCTL_ERR(xe, xe_vm_is_closed_or_banned(vm))) { + err = -ENOENT; + goto release_vm_lock; + } + if (VM_BIND_OP(bind_ops[0].op) == XE_VM_BIND_OP_RESTART) { if (XE_IOCTL_ERR(xe, !(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS))) err = -EOPNOTSUPP; @@ -3105,10 +3139,8 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) err = -EPROTO; if (!err) { - down_write(&vm->lock); trace_xe_vm_restart(vm); vm_set_async_error(vm, 0); - up_write(&vm->lock); queue_work(system_unbound_wq, &vm->async_ops.work); @@ -3117,13 +3149,13 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) xe_vm_queue_rebind_worker(vm); } - goto put_engine; + goto release_vm_lock; } if (XE_IOCTL_ERR(xe, !vm->async_ops.error && async != !!(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS))) { err = -EOPNOTSUPP; - goto put_engine; + goto release_vm_lock; } for (i = 0; i < args->num_binds; ++i) { @@ -3133,7 +3165,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) if (XE_IOCTL_ERR(xe, range > vm->size) || XE_IOCTL_ERR(xe, addr > vm->size - range)) { err = -EINVAL; - goto put_engine; + goto release_vm_lock; } if (bind_ops[i].tile_mask) { @@ -3142,7 +3174,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) if (XE_IOCTL_ERR(xe, bind_ops[i].tile_mask & ~valid_tiles)) { err = -EINVAL; - goto put_engine; + goto release_vm_lock; } } } @@ -3150,13 +3182,13 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) bos = kzalloc(sizeof(*bos) * args->num_binds, GFP_KERNEL); if (!bos) { err = -ENOMEM; - goto put_engine; + goto release_vm_lock; } vmas = kzalloc(sizeof(*vmas) * args->num_binds, GFP_KERNEL); if (!vmas) { err = -ENOMEM; - goto put_engine; + goto release_vm_lock; } for (i = 0; i < args->num_binds; ++i) { @@ -3211,10 +3243,6 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) goto free_syncs; } - err = down_write_killable(&vm->lock); - if (err) - goto free_syncs; - /* Do some error checking first to make the unwind easier */ for (i = 0; i < args->num_binds; ++i) { u64 range = bind_ops[i].range; @@ -3223,7 +3251,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) err = __vm_bind_ioctl_lookup_vma(vm, bos[i], addr, range, op); if (err) - goto release_vm_lock; + goto free_syncs; } for (i = 0; i < args->num_binds; ++i) { @@ -3343,8 +3371,6 @@ destroy_vmas: break; } } -release_vm_lock: - up_write(&vm->lock); free_syncs: while (num_syncs--) { if (async && j && @@ -3357,11 +3383,13 @@ free_syncs: put_obj: for (i = j; i < args->num_binds; ++i) xe_bo_put(bos[i]); +release_vm_lock: + up_write(&vm->lock); +put_vm: + xe_vm_put(vm); put_engine: if (e) xe_engine_put(e); -put_vm: - xe_vm_put(vm); free_objs: kfree(bos); kfree(vmas); diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 5edb7771629c..02b409dd77d5 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -45,10 +45,21 @@ void xe_vm_unlock(struct xe_vm *vm, struct ww_acquire_ctx *ww); static inline bool xe_vm_is_closed(struct xe_vm *vm) { - /* Only guaranteed not to change when vm->resv is held */ + /* Only guaranteed not to change when vm->lock is held */ return !vm->size; } +static inline bool xe_vm_is_banned(struct xe_vm *vm) +{ + return vm->flags & XE_VM_FLAG_BANNED; +} + +static inline bool xe_vm_is_closed_or_banned(struct xe_vm *vm) +{ + lockdep_assert_held(&vm->lock); + return xe_vm_is_closed(vm) || xe_vm_is_banned(vm); +} + struct xe_vma * xe_vm_find_overlapping_vma(struct xe_vm *vm, const struct xe_vma *vma); diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c index 6c196431a60e..670c80c1f0a3 100644 --- a/drivers/gpu/drm/xe/xe_vm_madvise.c +++ b/drivers/gpu/drm/xe/xe_vm_madvise.c @@ -313,7 +313,7 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_ERR(xe, !vm)) return -EINVAL; - if (XE_IOCTL_ERR(xe, xe_vm_is_closed(vm))) { + if (XE_IOCTL_ERR(xe, xe_vm_is_closed_or_banned(vm))) { err = -ENOENT; goto put_vm; } diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index c148dd49a6ca..3c885211a8d1 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -176,15 +176,19 @@ struct xe_vm { struct xe_bo *scratch_bo[XE_MAX_TILES_PER_DEVICE]; struct xe_pt *scratch_pt[XE_MAX_TILES_PER_DEVICE][XE_VM_MAX_LEVEL]; - /** @flags: flags for this VM, statically setup a creation time */ + /** + * @flags: flags for this VM, statically setup a creation time aside + * from XE_VM_FLAG_BANNED which requires vm->lock to set / read safely + */ #define XE_VM_FLAGS_64K BIT(0) #define XE_VM_FLAG_COMPUTE_MODE BIT(1) #define XE_VM_FLAG_ASYNC_BIND_OPS BIT(2) #define XE_VM_FLAG_MIGRATION BIT(3) #define XE_VM_FLAG_SCRATCH_PAGE BIT(4) #define XE_VM_FLAG_FAULT_MODE BIT(5) -#define XE_VM_FLAG_GT_ID(flags) (((flags) >> 6) & 0x3) -#define XE_VM_FLAG_SET_TILE_ID(tile) ((tile)->id << 6) +#define XE_VM_FLAG_BANNED BIT(6) +#define XE_VM_FLAG_GT_ID(flags) (((flags) >> 7) & 0x3) +#define XE_VM_FLAG_SET_TILE_ID(tile) ((tile)->id << 7) unsigned long flags; /** @composite_fence_ctx: context composite fence */ -- cgit v1.2.3 From 21ed3327e388c24ddbdc3b2e8533f0c3ab99953b Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 22 Jun 2023 13:03:04 -0700 Subject: drm/xe: Add helpers to hide struct xe_vma internals MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will help with the GPUVA port as the internals of struct xe_vma will change. v2: Update comment around helpers Reviewed-by: Thomas Hellström Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 4 +- drivers/gpu/drm/xe/xe_exec.c | 2 +- drivers/gpu/drm/xe/xe_gt_pagefault.c | 7 +- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 14 +-- drivers/gpu/drm/xe/xe_pt.c | 88 +++++++-------- drivers/gpu/drm/xe/xe_trace.h | 10 +- drivers/gpu/drm/xe/xe_vm.c | 163 ++++++++++++++-------------- drivers/gpu/drm/xe/xe_vm.h | 76 ++++++++++--- drivers/gpu/drm/xe/xe_vm_madvise.c | 12 +- 9 files changed, 211 insertions(+), 165 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 86947a6fcc7c..fb351c36cdc2 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -426,7 +426,7 @@ static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo, } list_for_each_entry(vma, &bo->vmas, bo_link) { - struct xe_vm *vm = vma->vm; + struct xe_vm *vm = xe_vma_vm(vma); trace_xe_vma_evict(vma); @@ -454,7 +454,7 @@ static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo, } else { bool vm_resv_locked = false; - struct xe_vm *vm = vma->vm; + struct xe_vm *vm = xe_vma_vm(vma); /* * We need to put the vma on the vm's rebind_list, diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index bdf00e59e7a4..ba13d20ed348 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -126,7 +126,7 @@ retry: if (xe_vma_is_userptr(vma)) continue; - err = xe_bo_validate(vma->bo, vm, false); + err = xe_bo_validate(xe_vma_bo(vma), vm, false); if (err) { xe_vm_unlock_dma_resv(vm, tv_onstack, *tv, ww, objs); *tv = NULL; diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 9dd8e5097e65..4d0f402cc630 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -77,7 +77,8 @@ static bool vma_is_valid(struct xe_tile *tile, struct xe_vma *vma) static bool vma_matches(struct xe_vma *vma, struct xe_vma *lookup) { - if (lookup->start > vma->end || lookup->end < vma->start) + if (xe_vma_start(lookup) > xe_vma_end(vma) - 1 || + xe_vma_end(lookup) - 1 < xe_vma_start(vma)) return false; return true; @@ -171,7 +172,7 @@ retry_userptr: } /* Lock VM and BOs dma-resv */ - bo = vma->bo; + bo = xe_vma_bo(vma); if (only_needs_bo_lock(bo)) { /* This path ensures the BO's LRU is updated */ ret = xe_bo_lock(bo, &ww, xe->info.tile_count, false); @@ -538,7 +539,7 @@ static int handle_acc(struct xe_gt *gt, struct acc *acc) goto unlock_vm; /* Lock VM and BOs dma-resv */ - bo = vma->bo; + bo = xe_vma_bo(vma); if (only_needs_bo_lock(bo)) { /* This path ensures the BO's LRU is updated */ ret = xe_bo_lock(bo, &ww, xe->info.tile_count, false); diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 2fcb477604e2..f77368a16409 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -203,8 +203,8 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, if (!xe->info.has_range_tlb_invalidation) { action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL); } else { - u64 start = vma->start; - u64 length = vma->end - vma->start + 1; + u64 start = xe_vma_start(vma); + u64 length = xe_vma_size(vma); u64 align, end; if (length < SZ_4K) @@ -217,12 +217,12 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, * address mask covering the required range. */ align = roundup_pow_of_two(length); - start = ALIGN_DOWN(vma->start, align); - end = ALIGN(vma->start + length, align); + start = ALIGN_DOWN(xe_vma_start(vma), align); + end = ALIGN(xe_vma_end(vma), align); length = align; while (start + length < end) { length <<= 1; - start = ALIGN_DOWN(vma->start, length); + start = ALIGN_DOWN(xe_vma_start(vma), length); } /* @@ -231,7 +231,7 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, */ if (length >= SZ_2M) { length = max_t(u64, SZ_16M, length); - start = ALIGN_DOWN(vma->start, length); + start = ALIGN_DOWN(xe_vma_start(vma), length); } XE_BUG_ON(length < SZ_4K); @@ -240,7 +240,7 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, XE_BUG_ON(!IS_ALIGNED(start, length)); action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE); - action[len++] = vma->vm->usm.asid; + action[len++] = xe_vma_vm(vma)->usm.asid; action[len++] = lower_32_bits(start); action[len++] = upper_32_bits(start); action[len++] = ilog2(length) - ilog2(SZ_4K); diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index fe1c77b139e4..a697d43ec293 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -96,7 +96,7 @@ static dma_addr_t vma_addr(struct xe_vma *vma, u64 offset, &cur); return xe_res_dma(&cur) + offset; } else { - return xe_bo_addr(vma->bo, offset, page_size, is_vram); + return xe_bo_addr(xe_vma_bo(vma), offset, page_size, is_vram); } } @@ -749,7 +749,7 @@ static int xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, struct xe_vm_pgtable_update *entries, u32 *num_entries) { - struct xe_bo *bo = vma->bo; + struct xe_bo *bo = xe_vma_bo(vma); bool is_vram = !xe_vma_is_userptr(vma) && bo && xe_bo_is_vram(bo); struct xe_res_cursor curs; struct xe_pt_stage_bind_walk xe_walk = { @@ -758,15 +758,15 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, .shifts = xe_normal_pt_shifts, .max_level = XE_PT_HIGHEST_LEVEL, }, - .vm = vma->vm, + .vm = xe_vma_vm(vma), .tile = tile, .curs = &curs, - .va_curs_start = vma->start, + .va_curs_start = xe_vma_start(vma), .pte_flags = vma->pte_flags, .wupd.entries = entries, - .needs_64K = (vma->vm->flags & XE_VM_FLAGS_64K) && is_vram, + .needs_64K = (xe_vma_vm(vma)->flags & XE_VM_FLAGS_64K) && is_vram, }; - struct xe_pt *pt = vma->vm->pt_root[tile->id]; + struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; int ret; if (is_vram) { @@ -788,20 +788,20 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, if (!xe_vma_is_null(vma)) { if (xe_vma_is_userptr(vma)) - xe_res_first_sg(vma->userptr.sg, 0, - vma->end - vma->start + 1, &curs); + xe_res_first_sg(vma->userptr.sg, 0, xe_vma_size(vma), + &curs); else if (xe_bo_is_vram(bo) || xe_bo_is_stolen(bo)) - xe_res_first(bo->ttm.resource, vma->bo_offset, - vma->end - vma->start + 1, &curs); + xe_res_first(bo->ttm.resource, xe_vma_bo_offset(vma), + xe_vma_size(vma), &curs); else - xe_res_first_sg(xe_bo_get_sg(bo), vma->bo_offset, - vma->end - vma->start + 1, &curs); + xe_res_first_sg(xe_bo_get_sg(bo), xe_vma_bo_offset(vma), + xe_vma_size(vma), &curs); } else { - curs.size = vma->end - vma->start + 1; + curs.size = xe_vma_size(vma); } - ret = xe_pt_walk_range(&pt->base, pt->level, vma->start, vma->end + 1, - &xe_walk.base); + ret = xe_pt_walk_range(&pt->base, pt->level, xe_vma_start(vma), + xe_vma_end(vma), &xe_walk.base); *num_entries = xe_walk.wupd.num_used_entries; return ret; @@ -933,13 +933,13 @@ bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma) }, .tile = tile, }; - struct xe_pt *pt = vma->vm->pt_root[tile->id]; + struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; if (!(vma->tile_present & BIT(tile->id))) return false; - (void)xe_pt_walk_shared(&pt->base, pt->level, vma->start, vma->end + 1, - &xe_walk.base); + (void)xe_pt_walk_shared(&pt->base, pt->level, xe_vma_start(vma), + xe_vma_end(vma), &xe_walk.base); return xe_walk.needs_invalidate; } @@ -974,21 +974,21 @@ static void xe_pt_abort_bind(struct xe_vma *vma, continue; for (j = 0; j < entries[i].qwords; j++) - xe_pt_destroy(entries[i].pt_entries[j].pt, vma->vm->flags, NULL); + xe_pt_destroy(entries[i].pt_entries[j].pt, xe_vma_vm(vma)->flags, NULL); kfree(entries[i].pt_entries); } } static void xe_pt_commit_locks_assert(struct xe_vma *vma) { - struct xe_vm *vm = vma->vm; + struct xe_vm *vm = xe_vma_vm(vma); lockdep_assert_held(&vm->lock); if (xe_vma_is_userptr(vma)) lockdep_assert_held_read(&vm->userptr.notifier_lock); else if (!xe_vma_is_null(vma)) - dma_resv_assert_held(vma->bo->ttm.base.resv); + dma_resv_assert_held(xe_vma_bo(vma)->ttm.base.resv); dma_resv_assert_held(&vm->resv); } @@ -1021,7 +1021,7 @@ static void xe_pt_commit_bind(struct xe_vma *vma, if (xe_pt_entry(pt_dir, j_)) xe_pt_destroy(xe_pt_entry(pt_dir, j_), - vma->vm->flags, deferred); + xe_vma_vm(vma)->flags, deferred); pt_dir->dir.entries[j_] = &newpte->base; } @@ -1082,7 +1082,7 @@ static int xe_pt_userptr_inject_eagain(struct xe_vma *vma) static u32 count; if (count++ % divisor == divisor - 1) { - struct xe_vm *vm = vma->vm; + struct xe_vm *vm = xe_vma_vm(vma); vma->userptr.divisor = divisor << 1; spin_lock(&vm->userptr.invalidated_lock); @@ -1125,7 +1125,7 @@ static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) container_of(pt_update, typeof(*userptr_update), base); struct xe_vma *vma = pt_update->vma; unsigned long notifier_seq = vma->userptr.notifier_seq; - struct xe_vm *vm = vma->vm; + struct xe_vm *vm = xe_vma_vm(vma); userptr_update->locked = false; @@ -1296,19 +1296,19 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e, }, .bind = true, }; - struct xe_vm *vm = vma->vm; + struct xe_vm *vm = xe_vma_vm(vma); u32 num_entries; struct dma_fence *fence; struct invalidation_fence *ifence = NULL; int err; bind_pt_update.locked = false; - xe_bo_assert_held(vma->bo); + xe_bo_assert_held(xe_vma_bo(vma)); xe_vm_assert_held(vm); - vm_dbg(&vma->vm->xe->drm, + vm_dbg(&xe_vma_vm(vma)->xe->drm, "Preparing bind, with range [%llx...%llx) engine %p.\n", - vma->start, vma->end, e); + xe_vma_start(vma), xe_vma_end(vma) - 1, e); err = xe_pt_prepare_bind(tile, vma, entries, &num_entries, rebind); if (err) @@ -1337,7 +1337,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e, } fence = xe_migrate_update_pgtables(tile->migrate, - vm, vma->bo, + vm, xe_vma_bo(vma), e ? e : vm->eng[tile->id], entries, num_entries, syncs, num_syncs, @@ -1363,8 +1363,8 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e, DMA_RESV_USAGE_KERNEL : DMA_RESV_USAGE_BOOKKEEP); - if (!xe_vma_has_no_bo(vma) && !vma->bo->vm) - dma_resv_add_fence(vma->bo->ttm.base.resv, fence, + if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) + dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, DMA_RESV_USAGE_BOOKKEEP); xe_pt_commit_bind(vma, entries, num_entries, rebind, bind_pt_update.locked ? &deferred : NULL); @@ -1526,14 +1526,14 @@ static unsigned int xe_pt_stage_unbind(struct xe_tile *tile, struct xe_vma *vma, .max_level = XE_PT_HIGHEST_LEVEL, }, .tile = tile, - .modified_start = vma->start, - .modified_end = vma->end + 1, + .modified_start = xe_vma_start(vma), + .modified_end = xe_vma_end(vma), .wupd.entries = entries, }; - struct xe_pt *pt = vma->vm->pt_root[tile->id]; + struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; - (void)xe_pt_walk_shared(&pt->base, pt->level, vma->start, vma->end + 1, - &xe_walk.base); + (void)xe_pt_walk_shared(&pt->base, pt->level, xe_vma_start(vma), + xe_vma_end(vma), &xe_walk.base); return xe_walk.wupd.num_used_entries; } @@ -1545,7 +1545,7 @@ xe_migrate_clear_pgtable_callback(struct xe_migrate_pt_update *pt_update, const struct xe_vm_pgtable_update *update) { struct xe_vma *vma = pt_update->vma; - u64 empty = __xe_pt_empty_pte(tile, vma->vm, update->pt->level); + u64 empty = __xe_pt_empty_pte(tile, xe_vma_vm(vma), update->pt->level); int i; if (map && map->is_iomem) @@ -1581,7 +1581,7 @@ xe_pt_commit_unbind(struct xe_vma *vma, i++) { if (xe_pt_entry(pt_dir, i)) xe_pt_destroy(xe_pt_entry(pt_dir, i), - vma->vm->flags, deferred); + xe_vma_vm(vma)->flags, deferred); pt_dir->dir.entries[i] = NULL; } @@ -1630,18 +1630,18 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e .vma = vma, }, }; - struct xe_vm *vm = vma->vm; + struct xe_vm *vm = xe_vma_vm(vma); u32 num_entries; struct dma_fence *fence = NULL; struct invalidation_fence *ifence; LLIST_HEAD(deferred); - xe_bo_assert_held(vma->bo); + xe_bo_assert_held(xe_vma_bo(vma)); xe_vm_assert_held(vm); - vm_dbg(&vma->vm->xe->drm, + vm_dbg(&xe_vma_vm(vma)->xe->drm, "Preparing unbind, with range [%llx...%llx) engine %p.\n", - vma->start, vma->end, e); + xe_vma_start(vma), xe_vma_end(vma) - 1, e); num_entries = xe_pt_stage_unbind(tile, vma, entries); XE_BUG_ON(num_entries > ARRAY_SIZE(entries)); @@ -1680,8 +1680,8 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e DMA_RESV_USAGE_BOOKKEEP); /* This fence will be installed by caller when doing eviction */ - if (!xe_vma_has_no_bo(vma) && !vma->bo->vm) - dma_resv_add_fence(vma->bo->ttm.base.resv, fence, + if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) + dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, DMA_RESV_USAGE_BOOKKEEP); xe_pt_commit_unbind(vma, entries, num_entries, unbind_pt_update.locked ? &deferred : NULL); diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index d5894570f196..82ca25d8d017 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -19,7 +19,7 @@ #include "xe_gt_types.h" #include "xe_guc_engine_types.h" #include "xe_sched_job.h" -#include "xe_vm_types.h" +#include "xe_vm.h" DECLARE_EVENT_CLASS(xe_gt_tlb_invalidation_fence, TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence), @@ -374,10 +374,10 @@ DECLARE_EVENT_CLASS(xe_vma, TP_fast_assign( __entry->vma = (unsigned long)vma; - __entry->asid = vma->vm->usm.asid; - __entry->start = vma->start; - __entry->end = vma->end; - __entry->ptr = (u64)vma->userptr.ptr; + __entry->asid = xe_vma_vm(vma)->usm.asid; + __entry->start = xe_vma_start(vma); + __entry->end = xe_vma_end(vma) - 1; + __entry->ptr = xe_vma_userptr(vma); ), TP_printk("vma=0x%016llx, asid=0x%05x, start=0x%012llx, end=0x%012llx, ptr=0x%012llx,", diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 47e3d37b757c..cec96beef334 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -53,15 +53,14 @@ int xe_vma_userptr_check_repin(struct xe_vma *vma) int xe_vma_userptr_pin_pages(struct xe_vma *vma) { - struct xe_vm *vm = vma->vm; + struct xe_vm *vm = xe_vma_vm(vma); struct xe_device *xe = vm->xe; - const unsigned long num_pages = - (vma->end - vma->start + 1) >> PAGE_SHIFT; + const unsigned long num_pages = xe_vma_size(vma) >> PAGE_SHIFT; struct page **pages; bool in_kthread = !current->mm; unsigned long notifier_seq; int pinned, ret, i; - bool read_only = vma->pte_flags & XE_PTE_FLAG_READ_ONLY; + bool read_only = xe_vma_read_only(vma); lockdep_assert_held(&vm->lock); XE_BUG_ON(!xe_vma_is_userptr(vma)); @@ -96,7 +95,8 @@ retry: } while (pinned < num_pages) { - ret = get_user_pages_fast(vma->userptr.ptr + pinned * PAGE_SIZE, + ret = get_user_pages_fast(xe_vma_userptr(vma) + + pinned * PAGE_SIZE, num_pages - pinned, read_only ? 0 : FOLL_WRITE, &pages[pinned]); @@ -299,7 +299,7 @@ void xe_vm_fence_all_extobjs(struct xe_vm *vm, struct dma_fence *fence, struct xe_vma *vma; list_for_each_entry(vma, &vm->extobj.list, extobj.link) - dma_resv_add_fence(vma->bo->ttm.base.resv, fence, usage); + dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, usage); } static void resume_and_reinstall_preempt_fences(struct xe_vm *vm) @@ -448,7 +448,7 @@ int xe_vm_lock_dma_resv(struct xe_vm *vm, struct ww_acquire_ctx *ww, INIT_LIST_HEAD(objs); list_for_each_entry(vma, &vm->extobj.list, extobj.link) { tv_bo->num_shared = num_shared; - tv_bo->bo = &vma->bo->ttm; + tv_bo->bo = &xe_vma_bo(vma)->ttm; list_add_tail(&tv_bo->head, objs); tv_bo++; @@ -463,7 +463,7 @@ int xe_vm_lock_dma_resv(struct xe_vm *vm, struct ww_acquire_ctx *ww, spin_lock(&vm->notifier.list_lock); list_for_each_entry_safe(vma, next, &vm->notifier.rebind_list, notifier.rebind_link) { - xe_bo_assert_held(vma->bo); + xe_bo_assert_held(xe_vma_bo(vma)); list_del_init(&vma->notifier.rebind_link); if (vma->tile_present && !vma->destroyed) @@ -612,7 +612,7 @@ retry: if (xe_vma_has_no_bo(vma) || vma->destroyed) continue; - err = xe_bo_validate(vma->bo, vm, false); + err = xe_bo_validate(xe_vma_bo(vma), vm, false); if (err) goto out_unlock; } @@ -706,7 +706,7 @@ static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, unsigned long cur_seq) { struct xe_vma *vma = container_of(mni, struct xe_vma, userptr.notifier); - struct xe_vm *vm = vma->vm; + struct xe_vm *vm = xe_vma_vm(vma); struct dma_resv_iter cursor; struct dma_fence *fence; long err; @@ -925,7 +925,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, err = mmu_interval_notifier_insert(&vma->userptr.notifier, current->mm, - vma->userptr.ptr, size, + xe_vma_userptr(vma), size, &vma_userptr_notifier_ops); if (err) { kfree(vma); @@ -945,7 +945,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, static bool vm_remove_extobj(struct xe_vma *vma) { if (!list_empty(&vma->extobj.link)) { - vma->vm->extobj.entries--; + xe_vma_vm(vma)->extobj.entries--; list_del_init(&vma->extobj.link); return true; } @@ -954,9 +954,9 @@ static bool vm_remove_extobj(struct xe_vma *vma) static void xe_vma_destroy_late(struct xe_vma *vma) { - struct xe_vm *vm = vma->vm; + struct xe_vm *vm = xe_vma_vm(vma); struct xe_device *xe = vm->xe; - bool read_only = vma->pte_flags & XE_PTE_FLAG_READ_ONLY; + bool read_only = xe_vma_read_only(vma); if (xe_vma_is_userptr(vma)) { if (vma->userptr.sg) { @@ -978,7 +978,7 @@ static void xe_vma_destroy_late(struct xe_vma *vma) } else if (xe_vma_is_null(vma)) { xe_vm_put(vm); } else { - xe_bo_put(vma->bo); + xe_bo_put(xe_vma_bo(vma)); } kfree(vma); @@ -999,7 +999,7 @@ bo_has_vm_references_locked(struct xe_bo *bo, struct xe_vm *vm, struct xe_vma *vma; list_for_each_entry(vma, &bo->vmas, bo_link) { - if (vma != ignore && vma->vm == vm) + if (vma != ignore && xe_vma_vm(vma) == vm) return vma; } @@ -1027,7 +1027,7 @@ static void __vm_insert_extobj(struct xe_vm *vm, struct xe_vma *vma) static void vm_insert_extobj(struct xe_vm *vm, struct xe_vma *vma) { - struct xe_bo *bo = vma->bo; + struct xe_bo *bo = xe_vma_bo(vma); lockdep_assert_held_write(&vm->lock); @@ -1048,7 +1048,7 @@ static void vma_destroy_cb(struct dma_fence *fence, static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) { - struct xe_vm *vm = vma->vm; + struct xe_vm *vm = xe_vma_vm(vma); lockdep_assert_held_write(&vm->lock); XE_BUG_ON(!list_empty(&vma->unbind_link)); @@ -1060,17 +1060,17 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) spin_unlock(&vm->userptr.invalidated_lock); list_del(&vma->userptr_link); } else if (!xe_vma_is_null(vma)) { - xe_bo_assert_held(vma->bo); + xe_bo_assert_held(xe_vma_bo(vma)); list_del(&vma->bo_link); spin_lock(&vm->notifier.list_lock); list_del(&vma->notifier.rebind_link); spin_unlock(&vm->notifier.list_lock); - if (!vma->bo->vm && vm_remove_extobj(vma)) { + if (!xe_vma_bo(vma)->vm && vm_remove_extobj(vma)) { struct xe_vma *other; - other = bo_has_vm_references_locked(vma->bo, vm, NULL); + other = bo_has_vm_references_locked(xe_vma_bo(vma), vm, NULL); if (other) __vm_insert_extobj(vm, other); @@ -1098,13 +1098,13 @@ static void xe_vma_destroy_unlocked(struct xe_vma *vma) { struct ttm_validate_buffer tv[2]; struct ww_acquire_ctx ww; - struct xe_bo *bo = vma->bo; + struct xe_bo *bo = xe_vma_bo(vma); LIST_HEAD(objs); LIST_HEAD(dups); int err; memset(tv, 0, sizeof(tv)); - tv[0].bo = xe_vm_ttm_bo(vma->vm); + tv[0].bo = xe_vm_ttm_bo(xe_vma_vm(vma)); list_add(&tv[0].head, &objs); if (bo) { @@ -1127,11 +1127,11 @@ static struct xe_vma *to_xe_vma(const struct rb_node *node) return (struct xe_vma *)node; } -static int xe_vma_cmp(const struct xe_vma *a, const struct xe_vma *b) +static int xe_vma_cmp(struct xe_vma *a, struct xe_vma *b) { - if (a->end < b->start) { + if (xe_vma_end(a) - 1 < xe_vma_start(b)) { return -1; - } else if (b->end < a->start) { + } else if (xe_vma_end(b) - 1 < xe_vma_start(a)) { return 1; } else { return 0; @@ -1146,19 +1146,19 @@ static bool xe_vma_less_cb(struct rb_node *a, const struct rb_node *b) int xe_vma_cmp_vma_cb(const void *key, const struct rb_node *node) { struct xe_vma *cmp = to_xe_vma(node); - const struct xe_vma *own = key; + struct xe_vma *own = (struct xe_vma *)key; - if (own->start > cmp->end) + if (xe_vma_start(own) > xe_vma_end(cmp) - 1) return 1; - if (own->end < cmp->start) + if (xe_vma_end(own) - 1 < xe_vma_start(cmp)) return -1; return 0; } struct xe_vma * -xe_vm_find_overlapping_vma(struct xe_vm *vm, const struct xe_vma *vma) +xe_vm_find_overlapping_vma(struct xe_vm *vm, struct xe_vma *vma) { struct rb_node *node; @@ -1167,7 +1167,7 @@ xe_vm_find_overlapping_vma(struct xe_vm *vm, const struct xe_vma *vma) if (xe_vm_is_closed_or_banned(vm)) return NULL; - XE_BUG_ON(vma->end >= vm->size); + XE_BUG_ON(xe_vma_end(vma) > vm->size); node = rb_find(vma, &vm->vmas, xe_vma_cmp_vma_cb); @@ -1176,7 +1176,7 @@ xe_vm_find_overlapping_vma(struct xe_vm *vm, const struct xe_vma *vma) static void xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma) { - XE_BUG_ON(vma->vm != vm); + XE_BUG_ON(xe_vma_vm(vma) != vm); lockdep_assert_held(&vm->lock); rb_add(&vma->vm_node, &vm->vmas, xe_vma_less_cb); @@ -1184,7 +1184,7 @@ static void xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma) static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma) { - XE_BUG_ON(vma->vm != vm); + XE_BUG_ON(xe_vma_vm(vma) != vm); lockdep_assert_held(&vm->lock); rb_erase(&vma->vm_node, &vm->vmas); @@ -1445,7 +1445,7 @@ void xe_vm_close_and_put(struct xe_vm *vm) rb_erase(&vma->vm_node, &vm->vmas); /* easy case, remove from VMA? */ - if (xe_vma_has_no_bo(vma) || vma->bo->vm) { + if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) { xe_vma_destroy(vma, NULL); continue; } @@ -1584,7 +1584,7 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct xe_engine *e, struct dma_fence *fence = NULL; struct dma_fence **fences = NULL; struct dma_fence_array *cf = NULL; - struct xe_vm *vm = vma->vm; + struct xe_vm *vm = xe_vma_vm(vma); int cur_fence = 0, i; int number_tiles = hweight_long(vma->tile_present); int err; @@ -1654,7 +1654,7 @@ xe_vm_bind_vma(struct xe_vma *vma, struct xe_engine *e, struct dma_fence *fence; struct dma_fence **fences = NULL; struct dma_fence_array *cf = NULL; - struct xe_vm *vm = vma->vm; + struct xe_vm *vm = xe_vma_vm(vma); int cur_fence = 0, i; int number_tiles = hweight_long(vma->tile_mask); int err; @@ -1840,7 +1840,7 @@ static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma, struct dma_fence *fence; xe_vm_assert_held(vm); - xe_bo_assert_held(vma->bo); + xe_bo_assert_held(xe_vma_bo(vma)); fence = xe_vm_unbind_vma(vma, e, syncs, num_syncs); if (IS_ERR(fence)) @@ -2078,13 +2078,13 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma, XE_BUG_ON(region > ARRAY_SIZE(region_to_mem_type)); if (!xe_vma_has_no_bo(vma)) { - err = xe_bo_migrate(vma->bo, region_to_mem_type[region]); + err = xe_bo_migrate(xe_vma_bo(vma), region_to_mem_type[region]); if (err) return err; } if (vma->tile_mask != (vma->tile_present & ~vma->usm.tile_invalidated)) { - return xe_vm_bind(vm, vma, e, vma->bo, syncs, num_syncs, + return xe_vm_bind(vm, vma, e, xe_vma_bo(vma), syncs, num_syncs, afence); } else { int i; @@ -2180,7 +2180,7 @@ static int vm_bind_ioctl(struct xe_vm *vm, struct xe_vma *vma, xe_vm_tv_populate(vm, &tv_vm); list_add_tail(&tv_vm.head, &objs); - vbo = vma->bo; + vbo = xe_vma_bo(vma); if (vbo) { /* * An unbind can drop the last reference to the BO and @@ -2540,7 +2540,7 @@ static int vm_bind_ioctl_async(struct xe_vm *vm, struct xe_vma *vma, } else { bind_op->op = XE_VM_BIND_FLAG_ASYNC | XE_VM_BIND_OP_MAP; - xe_bo_get(__vma->bo); + xe_bo_get(xe_vma_bo(__vma)); } if (!last) { @@ -2550,7 +2550,7 @@ static int vm_bind_ioctl_async(struct xe_vm *vm, struct xe_vma *vma, } err = __vm_bind_ioctl_async(vm, __vma, e, - __vma->bo, bind_op, last ? + xe_vma_bo(__vma), bind_op, last ? out_syncs : NULL, last ? num_out_syncs : 0); if (err) { @@ -2597,8 +2597,8 @@ static int __vm_bind_ioctl_lookup_vma(struct xe_vm *vm, struct xe_bo *bo, case XE_VM_BIND_OP_PREFETCH: vma = xe_vm_find_overlapping_vma(vm, &lookup); if (XE_IOCTL_ERR(xe, !vma) || - XE_IOCTL_ERR(xe, (vma->start != addr || - vma->end != addr + range - 1) && !async)) + XE_IOCTL_ERR(xe, (xe_vma_start(vma) != addr || + xe_vma_end(vma) != addr + range) && !async)) return -EINVAL; break; case XE_VM_BIND_OP_UNMAP_ALL: @@ -2623,9 +2623,9 @@ static int prep_replacement_vma(struct xe_vm *vm, struct xe_vma *vma) { int err; - if (vma->bo && !vma->bo->vm) { + if (xe_vma_bo(vma) && !xe_vma_bo(vma)->vm) { vm_insert_extobj(vm, vma); - err = add_preempt_fences(vm, vma->bo); + err = add_preempt_fences(vm, xe_vma_bo(vma)); if (err) return err; } @@ -2674,25 +2674,25 @@ static struct xe_vma *vm_unbind_lookup_vmas(struct xe_vm *vm, } } - if (first->start != lookup->start) { + if (xe_vma_start(first) != xe_vma_start(lookup)) { struct ww_acquire_ctx ww; - if (first->bo) - err = xe_bo_lock(first->bo, &ww, 0, true); + if (xe_vma_bo(first)) + err = xe_bo_lock(xe_vma_bo(first), &ww, 0, true); if (err) goto unwind; - new_first = xe_vma_create(first->vm, first->bo, - first->bo ? first->bo_offset : - first->userptr.ptr, - first->start, - lookup->start - 1, - (first->pte_flags & - XE_PTE_FLAG_READ_ONLY), + new_first = xe_vma_create(xe_vma_vm(first), xe_vma_bo(first), + xe_vma_bo(first) ? + xe_vma_bo_offset(first) : + xe_vma_userptr(first), + xe_vma_start(first), + xe_vma_start(lookup) - 1, + xe_vma_read_only(first), (first->pte_flags & XE_PTE_FLAG_NULL), first->tile_mask); - if (first->bo) - xe_bo_unlock(first->bo, &ww); + if (xe_vma_bo(first)) + xe_bo_unlock(xe_vma_bo(first), &ww); if (!new_first) { err = -ENOMEM; goto unwind; @@ -2707,25 +2707,25 @@ static struct xe_vma *vm_unbind_lookup_vmas(struct xe_vm *vm, goto unwind; } - if (last->end != lookup->end) { + if (xe_vma_end(last) != xe_vma_end(lookup)) { struct ww_acquire_ctx ww; - u64 chunk = lookup->end + 1 - last->start; + u64 chunk = xe_vma_end(lookup) - xe_vma_start(last); - if (last->bo) - err = xe_bo_lock(last->bo, &ww, 0, true); + if (xe_vma_bo(last)) + err = xe_bo_lock(xe_vma_bo(last), &ww, 0, true); if (err) goto unwind; - new_last = xe_vma_create(last->vm, last->bo, - last->bo ? last->bo_offset + chunk : - last->userptr.ptr + chunk, - last->start + chunk, - last->end, - (last->pte_flags & - XE_PTE_FLAG_READ_ONLY), + new_last = xe_vma_create(xe_vma_vm(last), xe_vma_bo(last), + xe_vma_bo(last) ? + xe_vma_bo_offset(last) + chunk : + xe_vma_userptr(last) + chunk, + xe_vma_start(last) + chunk, + xe_vma_end(last) - 1, + xe_vma_read_only(last), (last->pte_flags & XE_PTE_FLAG_NULL), last->tile_mask); - if (last->bo) - xe_bo_unlock(last->bo, &ww); + if (xe_vma_bo(last)) + xe_bo_unlock(xe_vma_bo(last), &ww); if (!new_last) { err = -ENOMEM; goto unwind; @@ -2791,7 +2791,7 @@ static struct xe_vma *vm_prefetch_lookup_vmas(struct xe_vm *vm, struct rb_node *node; if (!xe_vma_has_no_bo(vma)) { - if (!xe_bo_can_migrate(vma->bo, region_to_mem_type[region])) + if (!xe_bo_can_migrate(xe_vma_bo(vma), region_to_mem_type[region])) return ERR_PTR(-EINVAL); } @@ -2800,7 +2800,7 @@ static struct xe_vma *vm_prefetch_lookup_vmas(struct xe_vm *vm, if (!xe_vma_cmp_vma_cb(lookup, node)) { __vma = to_xe_vma(node); if (!xe_vma_has_no_bo(__vma)) { - if (!xe_bo_can_migrate(__vma->bo, region_to_mem_type[region])) + if (!xe_bo_can_migrate(xe_vma_bo(__vma), region_to_mem_type[region])) goto flush_list; } list_add_tail(&__vma->unbind_link, &vma->unbind_link); @@ -2814,7 +2814,7 @@ static struct xe_vma *vm_prefetch_lookup_vmas(struct xe_vm *vm, if (!xe_vma_cmp_vma_cb(lookup, node)) { __vma = to_xe_vma(node); if (!xe_vma_has_no_bo(__vma)) { - if (!xe_bo_can_migrate(__vma->bo, region_to_mem_type[region])) + if (!xe_bo_can_migrate(xe_vma_bo(__vma), region_to_mem_type[region])) goto flush_list; } list_add(&__vma->unbind_link, &vma->unbind_link); @@ -2842,7 +2842,7 @@ static struct xe_vma *vm_unbind_all_lookup_vmas(struct xe_vm *vm, xe_bo_assert_held(bo); list_for_each_entry(vma, &bo->vmas, bo_link) { - if (vma->vm != vm) + if (xe_vma_vm(vma) != vm) continue; prep_vma_destroy(vm, vma); @@ -3436,14 +3436,14 @@ void xe_vm_unlock(struct xe_vm *vm, struct ww_acquire_ctx *ww) */ int xe_vm_invalidate_vma(struct xe_vma *vma) { - struct xe_device *xe = vma->vm->xe; + struct xe_device *xe = xe_vma_vm(vma)->xe; struct xe_tile *tile; u32 tile_needs_invalidate = 0; int seqno[XE_MAX_TILES_PER_DEVICE]; u8 id; int ret; - XE_BUG_ON(!xe_vm_in_fault_mode(vma->vm)); + XE_BUG_ON(!xe_vm_in_fault_mode(xe_vma_vm(vma))); XE_WARN_ON(xe_vma_is_null(vma)); trace_xe_vma_usm_invalidate(vma); @@ -3453,11 +3453,11 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) WARN_ON_ONCE(!mmu_interval_check_retry (&vma->userptr.notifier, vma->userptr.notifier_seq)); - WARN_ON_ONCE(!dma_resv_test_signaled(&vma->vm->resv, + WARN_ON_ONCE(!dma_resv_test_signaled(&xe_vma_vm(vma)->resv, DMA_RESV_USAGE_BOOKKEEP)); } else { - xe_bo_assert_held(vma->bo); + xe_bo_assert_held(xe_vma_bo(vma)); } } @@ -3522,10 +3522,11 @@ int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id) addr = 0; } } else { - addr = __xe_bo_addr(vma->bo, 0, XE_PAGE_SIZE, &is_vram); + addr = __xe_bo_addr(xe_vma_bo(vma), 0, XE_PAGE_SIZE, &is_vram); } drm_printf(p, " [%016llx-%016llx] S:0x%016llx A:%016llx %s\n", - vma->start, vma->end, vma->end - vma->start + 1ull, + xe_vma_start(vma), xe_vma_end(vma) - 1, + xe_vma_size(vma), addr, is_null ? "NULL" : is_userptr ? "USR" : is_vram ? "VRAM" : "SYS"); } diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 02b409dd77d5..644a8aa604e8 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -61,7 +61,66 @@ static inline bool xe_vm_is_closed_or_banned(struct xe_vm *vm) } struct xe_vma * -xe_vm_find_overlapping_vma(struct xe_vm *vm, const struct xe_vma *vma); +xe_vm_find_overlapping_vma(struct xe_vm *vm, struct xe_vma *vma); + +/** + * DOC: Provide accessors for vma members to facilitate easy change of + * implementation. + */ +static inline u64 xe_vma_start(struct xe_vma *vma) +{ + return vma->start; +} + +static inline u64 xe_vma_size(struct xe_vma *vma) +{ + return vma->end - vma->start + 1; +} + +static inline u64 xe_vma_end(struct xe_vma *vma) +{ + return xe_vma_start(vma) + xe_vma_size(vma); +} + +static inline u64 xe_vma_bo_offset(struct xe_vma *vma) +{ + return vma->bo_offset; +} + +static inline struct xe_bo *xe_vma_bo(struct xe_vma *vma) +{ + return vma->bo; +} + +static inline struct xe_vm *xe_vma_vm(struct xe_vma *vma) +{ + return vma->vm; +} + +static inline bool xe_vma_read_only(struct xe_vma *vma) +{ + return vma->pte_flags & XE_PTE_FLAG_READ_ONLY; +} + +static inline u64 xe_vma_userptr(struct xe_vma *vma) +{ + return vma->userptr.ptr; +} + +static inline bool xe_vma_is_null(struct xe_vma *vma) +{ + return vma->pte_flags & XE_PTE_FLAG_NULL; +} + +static inline bool xe_vma_has_no_bo(struct xe_vma *vma) +{ + return !xe_vma_bo(vma); +} + +static inline bool xe_vma_is_userptr(struct xe_vma *vma) +{ + return xe_vma_has_no_bo(vma) && !xe_vma_is_null(vma); +} #define xe_vm_assert_held(vm) dma_resv_assert_held(&(vm)->resv) @@ -126,21 +185,6 @@ static inline void xe_vm_reactivate_rebind(struct xe_vm *vm) } } -static inline bool xe_vma_is_null(struct xe_vma *vma) -{ - return vma->pte_flags & XE_PTE_FLAG_NULL; -} - -static inline bool xe_vma_has_no_bo(struct xe_vma *vma) -{ - return !vma->bo; -} - -static inline bool xe_vma_is_userptr(struct xe_vma *vma) -{ - return xe_vma_has_no_bo(vma) && !xe_vma_is_null(vma); -} - int xe_vma_userptr_pin_pages(struct xe_vma *vma); int xe_vma_userptr_check_repin(struct xe_vma *vma); diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c index 670c80c1f0a3..0e8d0c513ee9 100644 --- a/drivers/gpu/drm/xe/xe_vm_madvise.c +++ b/drivers/gpu/drm/xe/xe_vm_madvise.c @@ -30,7 +30,7 @@ static int madvise_preferred_mem_class(struct xe_device *xe, struct xe_vm *vm, struct xe_bo *bo; struct ww_acquire_ctx ww; - bo = vmas[i]->bo; + bo = xe_vma_bo(vmas[i]); err = xe_bo_lock(bo, &ww, 0, true); if (err) @@ -55,7 +55,7 @@ static int madvise_preferred_gt(struct xe_device *xe, struct xe_vm *vm, struct xe_bo *bo; struct ww_acquire_ctx ww; - bo = vmas[i]->bo; + bo = xe_vma_bo(vmas[i]); err = xe_bo_lock(bo, &ww, 0, true); if (err) @@ -91,7 +91,7 @@ static int madvise_preferred_mem_class_gt(struct xe_device *xe, struct xe_bo *bo; struct ww_acquire_ctx ww; - bo = vmas[i]->bo; + bo = xe_vma_bo(vmas[i]); err = xe_bo_lock(bo, &ww, 0, true); if (err) @@ -114,7 +114,7 @@ static int madvise_cpu_atomic(struct xe_device *xe, struct xe_vm *vm, struct xe_bo *bo; struct ww_acquire_ctx ww; - bo = vmas[i]->bo; + bo = xe_vma_bo(vmas[i]); if (XE_IOCTL_ERR(xe, !(bo->flags & XE_BO_CREATE_SYSTEM_BIT))) return -EINVAL; @@ -145,7 +145,7 @@ static int madvise_device_atomic(struct xe_device *xe, struct xe_vm *vm, struct xe_bo *bo; struct ww_acquire_ctx ww; - bo = vmas[i]->bo; + bo = xe_vma_bo(vmas[i]); if (XE_IOCTL_ERR(xe, !(bo->flags & XE_BO_CREATE_VRAM0_BIT) && !(bo->flags & XE_BO_CREATE_VRAM1_BIT))) return -EINVAL; @@ -176,7 +176,7 @@ static int madvise_priority(struct xe_device *xe, struct xe_vm *vm, struct xe_bo *bo; struct ww_acquire_ctx ww; - bo = vmas[i]->bo; + bo = xe_vma_bo(vmas[i]); err = xe_bo_lock(bo, &ww, 0, true); if (err) -- cgit v1.2.3 From 5cecdd0bb6bf4b8979b7d071017560daecfc9200 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 26 Jun 2023 14:55:37 -0700 Subject: drm/xe: Remove __xe_vm_bind forward declaration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Not needed so remove it. Reviewed-by: Thomas Hellström Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index cec96beef334..825abc3c5a45 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -696,11 +696,6 @@ out_unlock_outer: trace_xe_vm_rebind_worker_exit(vm); } -struct async_op_fence; -static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, - struct xe_engine *e, struct xe_sync_entry *syncs, - u32 num_syncs, struct async_op_fence *afence); - static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, const struct mmu_notifier_range *range, unsigned long cur_seq) -- cgit v1.2.3 From b06d47be7c83165d3b3e45e1d5f9520b79c7f5cc Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 7 Jul 2023 22:23:57 -0700 Subject: drm/xe: Port Xe to GPUVA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rather than open coding VM binds and VMA tracking, use the GPUVA library. GPUVA provides a common infrastructure for VM binds to use mmap / munmap semantics and support for VK sparse bindings. The concepts are: 1) xe_vm inherits from drm_gpuva_manager 2) xe_vma inherits from drm_gpuva 3) xe_vma_op inherits from drm_gpuva_op 4) VM bind operations (MAP, UNMAP, PREFETCH, UNMAP_ALL) call into the GPUVA code to generate an VMA operations list which is parsed, committed, and executed. v2 (CI): Add break after default in case statement. v3: Rebase v4: Fix some error handling v5: Use unlocked version VMA in error paths v6: Rebase, address some review feedback mainly Thomas H v7: Fix compile error in xe_vma_op_unwind, address checkpatch Signed-off-by: Matthew Brost Reviewed-by: Thomas Hellström Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Kconfig | 1 + drivers/gpu/drm/xe/tests/xe_migrate.c | 2 +- drivers/gpu/drm/xe/xe_bo.c | 31 +- drivers/gpu/drm/xe/xe_bo.h | 11 +- drivers/gpu/drm/xe/xe_device.c | 2 +- drivers/gpu/drm/xe/xe_exec.c | 4 +- drivers/gpu/drm/xe/xe_gt_pagefault.c | 20 +- drivers/gpu/drm/xe/xe_migrate.c | 14 +- drivers/gpu/drm/xe/xe_pt.c | 46 +- drivers/gpu/drm/xe/xe_pt.h | 2 +- drivers/gpu/drm/xe/xe_vm.c | 1905 ++++++++++++++++----------------- drivers/gpu/drm/xe/xe_vm.h | 59 +- drivers/gpu/drm/xe/xe_vm_madvise.c | 76 +- drivers/gpu/drm/xe/xe_vm_types.h | 179 ++-- 14 files changed, 1175 insertions(+), 1177 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index 0a4854a59c90..2a595bc92ca4 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -21,6 +21,7 @@ config DRM_XE select VMAP_PFN select DRM_TTM select DRM_TTM_HELPER + select DRM_GPUVM select DRM_SCHED select MMU_NOTIFIER select WANT_DEV_COREDUMP diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index 4c79c1dfa772..aedfb3dd559e 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -300,7 +300,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) /* First part of the test, are we updating our pagetable bo with a new entry? */ xe_map_wr(xe, &bo->vmap, XE_PAGE_SIZE * (NUM_KERNEL_PDE - 1), u64, 0xdeaddeadbeefbeef); - expected = xe_pte_encode(NULL, pt, 0, XE_CACHE_WB, 0, 0); + expected = xe_pte_encode(NULL, pt, 0, XE_CACHE_WB, 0); if (m->eng->vm->flags & XE_VM_FLAGS_64K) expected |= XE_PTE_PS64; if (xe_bo_is_vram(pt)) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index fb351c36cdc2..00b8b5e7f197 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -412,7 +412,9 @@ static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo, { struct dma_resv_iter cursor; struct dma_fence *fence; - struct xe_vma *vma; + struct drm_gpuva *gpuva; + struct drm_gem_object *obj = &bo->ttm.base; + struct drm_gpuvm_bo *vm_bo; int ret = 0; dma_resv_assert_held(bo->ttm.base.resv); @@ -425,10 +427,12 @@ static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo, dma_resv_iter_end(&cursor); } - list_for_each_entry(vma, &bo->vmas, bo_link) { - struct xe_vm *vm = xe_vma_vm(vma); + drm_gem_for_each_gpuvm_bo(vm_bo, obj) { + drm_gpuvm_bo_for_each_va(gpuva, vm_bo) { + struct xe_vma *vma = gpuva_to_vma(gpuva); + struct xe_vm *vm = xe_vma_vm(vma); - trace_xe_vma_evict(vma); + trace_xe_vma_evict(vma); if (xe_vm_in_fault_mode(vm)) { /* Wait for pending binds / unbinds. */ @@ -454,7 +458,6 @@ static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo, } else { bool vm_resv_locked = false; - struct xe_vm *vm = xe_vma_vm(vma); /* * We need to put the vma on the vm's rebind_list, @@ -462,9 +465,9 @@ static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo, * that we indeed have it locked, put the vma an the * vm's notifier.rebind_list instead and scoop later. */ - if (dma_resv_trylock(&vm->resv)) + if (dma_resv_trylock(xe_vm_resv(vm))) vm_resv_locked = true; - else if (ctx->resv != &vm->resv) { + else if (ctx->resv != xe_vm_resv(vm)) { spin_lock(&vm->notifier.list_lock); list_move_tail(&vma->notifier.rebind_link, &vm->notifier.rebind_list); @@ -477,7 +480,8 @@ static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo, list_add_tail(&vma->rebind_link, &vm->rebind_list); if (vm_resv_locked) - dma_resv_unlock(&vm->resv); + dma_resv_unlock(xe_vm_resv(vm)); + } } } @@ -1285,7 +1289,7 @@ xe_bo_create_locked_range(struct xe_device *xe, } } - bo = __xe_bo_create_locked(xe, bo, tile, vm ? &vm->resv : NULL, + bo = __xe_bo_create_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL, vm && !xe_vm_in_fault_mode(vm) && flags & XE_BO_CREATE_USER_BIT ? &vm->lru_bulk_move : NULL, size, @@ -1293,6 +1297,13 @@ xe_bo_create_locked_range(struct xe_device *xe, if (IS_ERR(bo)) return bo; + /* + * Note that instead of taking a reference no the drm_gpuvm_resv_bo(), + * to ensure the shared resv doesn't disappear under the bo, the bo + * will keep a reference to the vm, and avoid circular references + * by having all the vm's bo refereferences released at vm close + * time. + */ if (vm && xe_bo_is_user(bo)) xe_vm_get(vm); bo->vm = vm; @@ -1600,7 +1611,7 @@ int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict) xe_vm_assert_held(vm); ctx.allow_res_evict = allow_res_evict; - ctx.resv = &vm->resv; + ctx.resv = xe_vm_resv(vm); } return ttm_bo_validate(&bo->ttm, &bo->placement, &ctx); diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 08ca1d06bf77..53a82ff7bce2 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -9,6 +9,15 @@ #include "xe_bo_types.h" #include "xe_macros.h" #include "xe_vm_types.h" +#include "xe_vm.h" + +/** + * xe_vm_assert_held(vm) - Assert that the vm's reservation object is held. + * @vm: The vm + */ +#define xe_vm_assert_held(vm) dma_resv_assert_held(xe_vm_resv(vm)) + + #define XE_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */ @@ -149,7 +158,7 @@ void xe_bo_unlock(struct xe_bo *bo, struct ww_acquire_ctx *ww); static inline void xe_bo_unlock_vm_held(struct xe_bo *bo) { if (bo) { - XE_BUG_ON(bo->vm && bo->ttm.base.resv != &bo->vm->resv); + XE_BUG_ON(bo->vm && bo->ttm.base.resv != xe_vm_resv(bo->vm)); if (bo->vm) xe_vm_assert_held(bo->vm); else diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index a4fc5bc54d02..6249eef752c5 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -134,7 +134,7 @@ static struct drm_driver driver = { .driver_features = DRIVER_GEM | DRIVER_RENDER | DRIVER_SYNCOBJ | - DRIVER_SYNCOBJ_TIMELINE, + DRIVER_SYNCOBJ_TIMELINE | DRIVER_GEM_GPUVA, .open = xe_file_open, .postclose = xe_file_close, diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index ba13d20ed348..07f4b2e8df16 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -350,7 +350,7 @@ retry: /* Wait behind munmap style rebinds */ if (!xe_vm_no_dma_fences(vm)) { err = drm_sched_job_add_resv_dependencies(&job->drm, - &vm->resv, + xe_vm_resv(vm), DMA_RESV_USAGE_KERNEL); if (err) goto err_put_job; @@ -378,7 +378,7 @@ retry: xe_sched_job_arm(job); if (!xe_vm_no_dma_fences(vm)) { /* Block userptr invalidations / BO eviction */ - dma_resv_add_fence(&vm->resv, + dma_resv_add_fence(xe_vm_resv(vm), &job->drm.s_fence->finished, DMA_RESV_USAGE_BOOKKEEP); diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 4d0f402cc630..d8ff05e25eda 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -75,10 +75,10 @@ static bool vma_is_valid(struct xe_tile *tile, struct xe_vma *vma) !(BIT(tile->id) & vma->usm.tile_invalidated); } -static bool vma_matches(struct xe_vma *vma, struct xe_vma *lookup) +static bool vma_matches(struct xe_vma *vma, u64 page_addr) { - if (xe_vma_start(lookup) > xe_vma_end(vma) - 1 || - xe_vma_end(lookup) - 1 < xe_vma_start(vma)) + if (page_addr > xe_vma_end(vma) - 1 || + page_addr + SZ_4K - 1 < xe_vma_start(vma)) return false; return true; @@ -91,16 +91,14 @@ static bool only_needs_bo_lock(struct xe_bo *bo) static struct xe_vma *lookup_vma(struct xe_vm *vm, u64 page_addr) { - struct xe_vma *vma = NULL, lookup; + struct xe_vma *vma = NULL; - lookup.start = page_addr; - lookup.end = lookup.start + SZ_4K - 1; if (vm->usm.last_fault_vma) { /* Fast lookup */ - if (vma_matches(vm->usm.last_fault_vma, &lookup)) + if (vma_matches(vm->usm.last_fault_vma, page_addr)) vma = vm->usm.last_fault_vma; } if (!vma) - vma = xe_vm_find_overlapping_vma(vm, &lookup); + vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K); return vma; } @@ -489,12 +487,8 @@ static struct xe_vma *get_acc_vma(struct xe_vm *vm, struct acc *acc) { u64 page_va = acc->va_range_base + (ffs(acc->sub_granularity) - 1) * sub_granularity_in_byte(acc->granularity); - struct xe_vma lookup; - - lookup.start = page_va; - lookup.end = lookup.start + SZ_4K - 1; - return xe_vm_find_overlapping_vma(vm, &lookup); + return xe_vm_find_overlapping_vma(vm, page_va, SZ_4K); } static int handle_acc(struct xe_gt *gt, struct acc *acc) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index be98690f2bc9..f05335b16a1a 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -197,7 +197,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, /* Map the entire BO in our level 0 pt */ for (i = 0, level = 0; i < num_entries; level++) { entry = xe_pte_encode(NULL, bo, i * XE_PAGE_SIZE, - XE_CACHE_WB, 0, 0); + XE_CACHE_WB, 0); xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64, entry); @@ -216,7 +216,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, i += vm->flags & XE_VM_FLAGS_64K ? XE_64K_PAGE_SIZE : XE_PAGE_SIZE) { entry = xe_pte_encode(NULL, batch, i, - XE_CACHE_WB, 0, 0); + XE_CACHE_WB, 0); xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64, entry); @@ -1068,7 +1068,7 @@ xe_migrate_update_pgtables_cpu(struct xe_migrate *m, DMA_RESV_USAGE_KERNEL)) return ERR_PTR(-ETIME); - if (wait_vm && !dma_resv_test_signaled(&vm->resv, + if (wait_vm && !dma_resv_test_signaled(xe_vm_resv(vm), DMA_RESV_USAGE_BOOKKEEP)) return ERR_PTR(-ETIME); @@ -1159,7 +1159,8 @@ xe_migrate_update_pgtables(struct xe_migrate *m, u64 addr; int err = 0; bool usm = !eng && xe->info.supports_usm; - bool first_munmap_rebind = vma && vma->first_munmap_rebind; + bool first_munmap_rebind = vma && + vma->gpuva.flags & XE_VMA_FIRST_REBIND; struct xe_engine *eng_override = !eng ? m->eng : eng; /* Use the CPU if no in syncs and engine is idle */ @@ -1232,8 +1233,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m, BUG_ON(pt_bo->size != SZ_4K); - addr = xe_pte_encode(NULL, pt_bo, 0, XE_CACHE_WB, - 0, 0); + addr = xe_pte_encode(NULL, pt_bo, 0, XE_CACHE_WB, 0); bb->cs[bb->len++] = lower_32_bits(addr); bb->cs[bb->len++] = upper_32_bits(addr); } @@ -1281,7 +1281,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m, * trigger preempts before moving forward */ if (first_munmap_rebind) { - err = job_add_deps(job, &vm->resv, + err = job_add_deps(job, xe_vm_resv(vm), DMA_RESV_USAGE_BOOKKEEP); if (err) goto err_job; diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index a697d43ec293..030fd911d189 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -100,15 +100,15 @@ static dma_addr_t vma_addr(struct xe_vma *vma, u64 offset, } } -static u64 __pte_encode(u64 pte, enum xe_cache_level cache, u32 flags, - u32 pt_level) +static u64 __pte_encode(u64 pte, enum xe_cache_level cache, + struct xe_vma *vma, u32 pt_level) { pte |= XE_PAGE_PRESENT | XE_PAGE_RW; - if (unlikely(flags & XE_PTE_FLAG_READ_ONLY)) + if (unlikely(vma && xe_vma_read_only(vma))) pte &= ~XE_PAGE_RW; - if (unlikely(flags & XE_PTE_FLAG_NULL)) + if (unlikely(vma && xe_vma_is_null(vma))) pte |= XE_PTE_NULL; /* FIXME: I don't think the PPAT handling is correct for MTL */ @@ -142,7 +142,6 @@ static u64 __pte_encode(u64 pte, enum xe_cache_level cache, u32 flags, * @bo: If @vma is NULL, representing the memory to point to. * @offset: The offset into @vma or @bo. * @cache: The cache level indicating - * @flags: Currently only supports PTE_READ_ONLY for read-only access. * @pt_level: The page-table level of the page-table into which the entry * is to be inserted. * @@ -150,7 +149,7 @@ static u64 __pte_encode(u64 pte, enum xe_cache_level cache, u32 flags, */ u64 xe_pte_encode(struct xe_vma *vma, struct xe_bo *bo, u64 offset, enum xe_cache_level cache, - u32 flags, u32 pt_level) + u32 pt_level) { u64 pte; bool is_vram; @@ -162,11 +161,11 @@ u64 xe_pte_encode(struct xe_vma *vma, struct xe_bo *bo, if (is_vram) { pte |= XE_PPGTT_PTE_LM; - if (vma && vma->use_atomic_access_pte_bit) + if (vma && vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) pte |= XE_USM_PPGTT_PTE_AE; } - return __pte_encode(pte, cache, flags, pt_level); + return __pte_encode(pte, cache, vma, pt_level); } static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm, @@ -179,7 +178,7 @@ static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm, if (level == 0) { u64 empty = xe_pte_encode(NULL, vm->scratch_bo[id], 0, - XE_CACHE_WB, 0, 0); + XE_CACHE_WB, 0); return empty; } else { @@ -424,10 +423,9 @@ struct xe_pt_stage_bind_walk { */ bool needs_64K; /** - * @pte_flags: Flags determining PTE setup. These are not flags - * encoded directly in the PTE. See @default_pte for those. + * @vma: VMA being mapped */ - u32 pte_flags; + struct xe_vma *vma; /* Also input, but is updated during the walk*/ /** @curs: The DMA address cursor. */ @@ -564,7 +562,7 @@ static bool xe_pt_hugepte_possible(u64 addr, u64 next, unsigned int level, return false; /* null VMA's do not have dma addresses */ - if (xe_walk->pte_flags & XE_PTE_FLAG_NULL) + if (xe_vma_is_null(xe_walk->vma)) return true; /* Is the DMA address huge PTE size aligned? */ @@ -590,7 +588,7 @@ xe_pt_scan_64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk) return false; /* null VMA's do not have dma addresses */ - if (xe_walk->pte_flags & XE_PTE_FLAG_NULL) + if (xe_vma_is_null(xe_walk->vma)) return true; xe_res_next(&curs, addr - xe_walk->va_curs_start); @@ -643,14 +641,13 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, /* Is this a leaf entry ?*/ if (level == 0 || xe_pt_hugepte_possible(addr, next, level, xe_walk)) { struct xe_res_cursor *curs = xe_walk->curs; - bool is_null = xe_walk->pte_flags & XE_PTE_FLAG_NULL; + bool is_null = xe_vma_is_null(xe_walk->vma); XE_WARN_ON(xe_walk->va_curs_start != addr); pte = __pte_encode(is_null ? 0 : xe_res_dma(curs) + xe_walk->dma_offset, - xe_walk->cache, xe_walk->pte_flags, - level); + xe_walk->cache, xe_walk->vma, level); pte |= xe_walk->default_pte; /* @@ -762,7 +759,7 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, .tile = tile, .curs = &curs, .va_curs_start = xe_vma_start(vma), - .pte_flags = vma->pte_flags, + .vma = vma, .wupd.entries = entries, .needs_64K = (xe_vma_vm(vma)->flags & XE_VM_FLAGS_64K) && is_vram, }; @@ -771,7 +768,7 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, if (is_vram) { xe_walk.default_pte = XE_PPGTT_PTE_LM; - if (vma && vma->use_atomic_access_pte_bit) + if (vma && vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; xe_walk.dma_offset = vram_region_gpu_offset(bo->ttm.resource); xe_walk.cache = XE_CACHE_WB; @@ -990,7 +987,7 @@ static void xe_pt_commit_locks_assert(struct xe_vma *vma) else if (!xe_vma_is_null(vma)) dma_resv_assert_held(xe_vma_bo(vma)->ttm.base.resv); - dma_resv_assert_held(&vm->resv); + xe_vm_assert_held(vm); } static void xe_pt_commit_bind(struct xe_vma *vma, @@ -1343,6 +1340,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e, syncs, num_syncs, &bind_pt_update.base); if (!IS_ERR(fence)) { + bool last_munmap_rebind = vma->gpuva.flags & XE_VMA_LAST_REBIND; LLIST_HEAD(deferred); /* TLB invalidation must be done before signaling rebind */ @@ -1358,8 +1356,8 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e, } /* add shared fence now for pagetable delayed destroy */ - dma_resv_add_fence(&vm->resv, fence, !rebind && - vma->last_munmap_rebind ? + dma_resv_add_fence(xe_vm_resv(vm), fence, !rebind && + last_munmap_rebind ? DMA_RESV_USAGE_KERNEL : DMA_RESV_USAGE_BOOKKEEP); @@ -1377,7 +1375,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e, up_read(&vm->userptr.notifier_lock); xe_bo_put_commit(&deferred); } - if (!rebind && vma->last_munmap_rebind && + if (!rebind && last_munmap_rebind && xe_vm_in_compute_mode(vm)) queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work); @@ -1676,7 +1674,7 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e fence = &ifence->base.base; /* add shared fence now for pagetable delayed destroy */ - dma_resv_add_fence(&vm->resv, fence, + dma_resv_add_fence(xe_vm_resv(vm), fence, DMA_RESV_USAGE_BOOKKEEP); /* This fence will be installed by caller when doing eviction */ diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h index 54e8a043d353..aaf4b7b851e2 100644 --- a/drivers/gpu/drm/xe/xe_pt.h +++ b/drivers/gpu/drm/xe/xe_pt.h @@ -50,5 +50,5 @@ u64 xe_pde_encode(struct xe_bo *bo, u64 bo_offset, u64 xe_pte_encode(struct xe_vma *vma, struct xe_bo *bo, u64 offset, enum xe_cache_level cache, - u32 flags, u32 pt_level); + u32 pt_level); #endif diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 825abc3c5a45..297b7977ed87 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -65,7 +65,7 @@ int xe_vma_userptr_pin_pages(struct xe_vma *vma) lockdep_assert_held(&vm->lock); XE_BUG_ON(!xe_vma_is_userptr(vma)); retry: - if (vma->destroyed) + if (vma->gpuva.flags & XE_VMA_DESTROYED) return 0; notifier_seq = mmu_interval_read_begin(&vma->userptr.notifier); @@ -312,7 +312,7 @@ static void resume_and_reinstall_preempt_fences(struct xe_vm *vm) list_for_each_entry(e, &vm->preempt.engines, compute.link) { e->ops->resume(e); - dma_resv_add_fence(&vm->resv, e->compute.pfence, + dma_resv_add_fence(xe_vm_resv(vm), e->compute.pfence, DMA_RESV_USAGE_BOOKKEEP); xe_vm_fence_all_extobjs(vm, e->compute.pfence, DMA_RESV_USAGE_BOOKKEEP); @@ -350,7 +350,7 @@ int xe_vm_add_compute_engine(struct xe_vm *vm, struct xe_engine *e) down_read(&vm->userptr.notifier_lock); - dma_resv_add_fence(&vm->resv, pfence, + dma_resv_add_fence(xe_vm_resv(vm), pfence, DMA_RESV_USAGE_BOOKKEEP); xe_vm_fence_all_extobjs(vm, pfence, DMA_RESV_USAGE_BOOKKEEP); @@ -466,7 +466,7 @@ int xe_vm_lock_dma_resv(struct xe_vm *vm, struct ww_acquire_ctx *ww, xe_bo_assert_held(xe_vma_bo(vma)); list_del_init(&vma->notifier.rebind_link); - if (vma->tile_present && !vma->destroyed) + if (vma->tile_present && !(vma->gpuva.flags & XE_VMA_DESTROYED)) list_move_tail(&vma->rebind_link, &vm->rebind_list); } spin_unlock(&vm->notifier.list_lock); @@ -609,7 +609,8 @@ retry: goto out_unlock; list_for_each_entry(vma, &vm->rebind_list, rebind_link) { - if (xe_vma_has_no_bo(vma) || vma->destroyed) + if (xe_vma_has_no_bo(vma) || + vma->gpuva.flags & XE_VMA_DESTROYED) continue; err = xe_bo_validate(xe_vma_bo(vma), vm, false); @@ -629,7 +630,7 @@ retry: } /* Wait on munmap style VM unbinds */ - wait = dma_resv_wait_timeout(&vm->resv, + wait = dma_resv_wait_timeout(xe_vm_resv(vm), DMA_RESV_USAGE_KERNEL, false, MAX_SCHEDULE_TIMEOUT); if (wait <= 0) { @@ -725,7 +726,8 @@ static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, * Tell exec and rebind worker they need to repin and rebind this * userptr. */ - if (!xe_vm_in_fault_mode(vm) && !vma->destroyed && vma->tile_present) { + if (!xe_vm_in_fault_mode(vm) && + !(vma->gpuva.flags & XE_VMA_DESTROYED) && vma->tile_present) { spin_lock(&vm->userptr.invalidated_lock); list_move_tail(&vma->userptr.invalidate_link, &vm->userptr.invalidated); @@ -740,13 +742,13 @@ static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, * unbinds to complete, and those are attached as BOOKMARK fences * to the vm. */ - dma_resv_iter_begin(&cursor, &vm->resv, + dma_resv_iter_begin(&cursor, xe_vm_resv(vm), DMA_RESV_USAGE_BOOKKEEP); dma_resv_for_each_fence_unlocked(&cursor, fence) dma_fence_enable_sw_signaling(fence); dma_resv_iter_end(&cursor); - err = dma_resv_wait_timeout(&vm->resv, + err = dma_resv_wait_timeout(xe_vm_resv(vm), DMA_RESV_USAGE_BOOKKEEP, false, MAX_SCHEDULE_TIMEOUT); XE_WARN_ON(err <= 0); @@ -792,7 +794,7 @@ int xe_vm_userptr_pin(struct xe_vm *vm) } /* Take lock and move to rebind_list for rebinding. */ - err = dma_resv_lock_interruptible(&vm->resv, NULL); + err = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL); if (err) goto out_err; @@ -801,7 +803,7 @@ int xe_vm_userptr_pin(struct xe_vm *vm) list_move_tail(&vma->rebind_link, &vm->rebind_list); } - dma_resv_unlock(&vm->resv); + dma_resv_unlock(xe_vm_resv(vm)); return 0; @@ -830,7 +832,8 @@ int xe_vm_userptr_check_repin(struct xe_vm *vm) static struct dma_fence * xe_vm_bind_vma(struct xe_vma *vma, struct xe_engine *e, - struct xe_sync_entry *syncs, u32 num_syncs); + struct xe_sync_entry *syncs, u32 num_syncs, + bool first_op, bool last_op); struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) { @@ -851,7 +854,7 @@ struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) trace_xe_vma_rebind_worker(vma); else trace_xe_vma_rebind_exec(vma); - fence = xe_vm_bind_vma(vma, NULL, NULL, 0); + fence = xe_vm_bind_vma(vma, NULL, NULL, 0, false, false); if (IS_ERR(fence)) return fence; } @@ -887,14 +890,14 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, INIT_LIST_HEAD(&vma->notifier.rebind_link); INIT_LIST_HEAD(&vma->extobj.link); - vma->vm = vm; - vma->start = start; - vma->end = end; - vma->pte_flags = 0; + INIT_LIST_HEAD(&vma->gpuva.gem.entry); + vma->gpuva.vm = &vm->gpuvm; + vma->gpuva.va.addr = start; + vma->gpuva.va.range = end - start + 1; if (read_only) - vma->pte_flags |= XE_PTE_FLAG_READ_ONLY; + vma->gpuva.flags |= XE_VMA_READ_ONLY; if (is_null) - vma->pte_flags |= XE_PTE_FLAG_NULL; + vma->gpuva.flags |= DRM_GPUVA_SPARSE; if (tile_mask) { vma->tile_mask = tile_mask; @@ -904,19 +907,30 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, } if (vm->xe->info.platform == XE_PVC) - vma->use_atomic_access_pte_bit = true; + vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT; if (bo) { + struct drm_gpuvm_bo *vm_bo; + xe_bo_assert_held(bo); - vma->bo_offset = bo_offset_or_userptr; - vma->bo = xe_bo_get(bo); - list_add_tail(&vma->bo_link, &bo->vmas); + + vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base); + if (IS_ERR(vm_bo)) { + kfree(vma); + return ERR_CAST(vm_bo); + } + + drm_gem_object_get(&bo->ttm.base); + vma->gpuva.gem.obj = &bo->ttm.base; + vma->gpuva.gem.offset = bo_offset_or_userptr; + drm_gpuva_link(&vma->gpuva, vm_bo); + drm_gpuvm_bo_put(vm_bo); } else /* userptr or null */ { if (!is_null) { u64 size = end - start + 1; int err; - vma->userptr.ptr = bo_offset_or_userptr; + vma->gpuva.gem.offset = bo_offset_or_userptr; err = mmu_interval_notifier_insert(&vma->userptr.notifier, current->mm, @@ -991,11 +1005,19 @@ static struct xe_vma * bo_has_vm_references_locked(struct xe_bo *bo, struct xe_vm *vm, struct xe_vma *ignore) { - struct xe_vma *vma; + struct drm_gpuvm_bo *vm_bo; + struct drm_gpuva *va; + struct drm_gem_object *obj = &bo->ttm.base; + + xe_bo_assert_held(bo); + + drm_gem_for_each_gpuvm_bo(vm_bo, obj) { + drm_gpuvm_bo_for_each_va(va, vm_bo) { + struct xe_vma *vma = gpuva_to_vma(va); - list_for_each_entry(vma, &bo->vmas, bo_link) { - if (vma != ignore && xe_vma_vm(vma) == vm) - return vma; + if (vma != ignore && xe_vma_vm(vma) == vm) + return vma; + } } return NULL; @@ -1016,6 +1038,8 @@ static bool bo_has_vm_references(struct xe_bo *bo, struct xe_vm *vm, static void __vm_insert_extobj(struct xe_vm *vm, struct xe_vma *vma) { + lockdep_assert_held_write(&vm->lock); + list_add(&vma->extobj.link, &vm->extobj.list); vm->extobj.entries++; } @@ -1049,19 +1073,21 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) XE_BUG_ON(!list_empty(&vma->unbind_link)); if (xe_vma_is_userptr(vma)) { - XE_WARN_ON(!vma->destroyed); + XE_WARN_ON(!(vma->gpuva.flags & XE_VMA_DESTROYED)); + spin_lock(&vm->userptr.invalidated_lock); list_del_init(&vma->userptr.invalidate_link); spin_unlock(&vm->userptr.invalidated_lock); list_del(&vma->userptr_link); } else if (!xe_vma_is_null(vma)) { xe_bo_assert_held(xe_vma_bo(vma)); - list_del(&vma->bo_link); spin_lock(&vm->notifier.list_lock); list_del(&vma->notifier.rebind_link); spin_unlock(&vm->notifier.list_lock); + drm_gpuva_unlink(&vma->gpuva); + if (!xe_vma_bo(vma)->vm && vm_remove_extobj(vma)) { struct xe_vma *other; @@ -1116,65 +1142,34 @@ static void xe_vma_destroy_unlocked(struct xe_vma *vma) xe_bo_put(bo); } -static struct xe_vma *to_xe_vma(const struct rb_node *node) -{ - BUILD_BUG_ON(offsetof(struct xe_vma, vm_node) != 0); - return (struct xe_vma *)node; -} - -static int xe_vma_cmp(struct xe_vma *a, struct xe_vma *b) -{ - if (xe_vma_end(a) - 1 < xe_vma_start(b)) { - return -1; - } else if (xe_vma_end(b) - 1 < xe_vma_start(a)) { - return 1; - } else { - return 0; - } -} - -static bool xe_vma_less_cb(struct rb_node *a, const struct rb_node *b) -{ - return xe_vma_cmp(to_xe_vma(a), to_xe_vma(b)) < 0; -} - -int xe_vma_cmp_vma_cb(const void *key, const struct rb_node *node) -{ - struct xe_vma *cmp = to_xe_vma(node); - struct xe_vma *own = (struct xe_vma *)key; - - if (xe_vma_start(own) > xe_vma_end(cmp) - 1) - return 1; - - if (xe_vma_end(own) - 1 < xe_vma_start(cmp)) - return -1; - - return 0; -} - struct xe_vma * -xe_vm_find_overlapping_vma(struct xe_vm *vm, struct xe_vma *vma) +xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range) { - struct rb_node *node; + struct drm_gpuva *gpuva; lockdep_assert_held(&vm->lock); if (xe_vm_is_closed_or_banned(vm)) return NULL; - XE_BUG_ON(xe_vma_end(vma) > vm->size); + XE_BUG_ON(start + range > vm->size); - node = rb_find(vma, &vm->vmas, xe_vma_cmp_vma_cb); + gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range); - return node ? to_xe_vma(node) : NULL; + return gpuva ? gpuva_to_vma(gpuva) : NULL; } -static void xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma) +static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma) { + int err; + XE_BUG_ON(xe_vma_vm(vma) != vm); lockdep_assert_held(&vm->lock); - rb_add(&vma->vm_node, &vm->vmas, xe_vma_less_cb); + err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva); + XE_WARN_ON(err); /* Shouldn't be possible */ + + return err; } static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma) @@ -1182,18 +1177,38 @@ static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma) XE_BUG_ON(xe_vma_vm(vma) != vm); lockdep_assert_held(&vm->lock); - rb_erase(&vma->vm_node, &vm->vmas); + drm_gpuva_remove(&vma->gpuva); if (vm->usm.last_fault_vma == vma) vm->usm.last_fault_vma = NULL; } -static void async_op_work_func(struct work_struct *w); +static struct drm_gpuva_op *xe_vm_op_alloc(void) +{ + struct xe_vma_op *op; + + op = kzalloc(sizeof(*op), GFP_KERNEL); + + if (unlikely(!op)) + return NULL; + + return &op->base; +} + +static void xe_vm_free(struct drm_gpuvm *gpuvm); + +static struct drm_gpuvm_ops gpuvm_ops = { + .op_alloc = xe_vm_op_alloc, + .vm_free = xe_vm_free, +}; + +static void xe_vma_op_work_func(struct work_struct *w); static void vm_destroy_work_func(struct work_struct *w); struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) { + struct drm_gem_object *vm_resv_obj; struct xe_vm *vm; - int err, i = 0, number_tiles = 0; + int err, number_tiles = 0; struct xe_tile *tile; u8 id; @@ -1202,12 +1217,9 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) return ERR_PTR(-ENOMEM); vm->xe = xe; - kref_init(&vm->refcount); - dma_resv_init(&vm->resv); vm->size = 1ull << xe_pt_shift(xe->info.vm_max_level + 1); - vm->vmas = RB_ROOT; vm->flags = flags; init_rwsem(&vm->lock); @@ -1223,7 +1235,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) spin_lock_init(&vm->notifier.list_lock); INIT_LIST_HEAD(&vm->async_ops.pending); - INIT_WORK(&vm->async_ops.work, async_op_work_func); + INIT_WORK(&vm->async_ops.work, xe_vma_op_work_func); spin_lock_init(&vm->async_ops.lock); INIT_WORK(&vm->destroy_work, vm_destroy_work_func); @@ -1239,9 +1251,20 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) xe_device_mem_access_get(xe); } - err = dma_resv_lock_interruptible(&vm->resv, NULL); + vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm); + if (!vm_resv_obj) { + err = -ENOMEM; + goto err_no_resv; + } + + drm_gpuvm_init(&vm->gpuvm, "Xe VM", 0, &xe->drm, vm_resv_obj, + 0, vm->size, 0, 0, &gpuvm_ops); + + drm_gem_object_put(vm_resv_obj); + + err = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL); if (err) - goto err_put; + goto err_close; if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) vm->flags |= XE_VM_FLAGS_64K; @@ -1255,7 +1278,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) if (IS_ERR(vm->pt_root[id])) { err = PTR_ERR(vm->pt_root[id]); vm->pt_root[id] = NULL; - goto err_destroy_root; + goto err_unlock_close; } } @@ -1266,7 +1289,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) err = xe_pt_create_scratch(xe, tile, vm); if (err) - goto err_scratch_pt; + goto err_unlock_close; } vm->batch_invalidate_tlb = true; } @@ -1289,7 +1312,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) xe_pt_populate_empty(tile, vm, vm->pt_root[id]); } - dma_resv_unlock(&vm->resv); + dma_resv_unlock(xe_vm_resv(vm)); /* Kernel migration VM shouldn't have a circular loop.. */ if (!(flags & XE_VM_FLAG_MIGRATION)) { @@ -1307,8 +1330,8 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) ENGINE_FLAG_VM); xe_vm_put(migrate_vm); if (IS_ERR(eng)) { - xe_vm_close_and_put(vm); - return ERR_CAST(eng); + err = PTR_ERR(eng); + goto err_close; } vm->eng[id] = eng; number_tiles++; @@ -1329,27 +1352,13 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) return vm; -err_scratch_pt: - for_each_tile(tile, xe, id) { - if (!vm->pt_root[id]) - continue; +err_unlock_close: + dma_resv_unlock(xe_vm_resv(vm)); +err_close: + xe_vm_close_and_put(vm); + return ERR_PTR(err); - i = vm->pt_root[id]->level; - while (i) - if (vm->scratch_pt[id][--i]) - xe_pt_destroy(vm->scratch_pt[id][i], - vm->flags, NULL); - xe_bo_unpin(vm->scratch_bo[id]); - xe_bo_put(vm->scratch_bo[id]); - } -err_destroy_root: - for_each_tile(tile, xe, id) { - if (vm->pt_root[id]) - xe_pt_destroy(vm->pt_root[id], vm->flags, NULL); - } - dma_resv_unlock(&vm->resv); -err_put: - dma_resv_fini(&vm->resv); +err_no_resv: kfree(vm); if (!(flags & XE_VM_FLAG_MIGRATION)) { xe_device_mem_access_put(xe); @@ -1404,16 +1413,17 @@ static void xe_vm_close(struct xe_vm *vm) void xe_vm_close_and_put(struct xe_vm *vm) { - struct rb_root contested = RB_ROOT; + LIST_HEAD(contested); struct ww_acquire_ctx ww; struct xe_device *xe = vm->xe; struct xe_tile *tile; + struct xe_vma *vma, *next_vma; + struct drm_gpuva *gpuva, *next; u8 id; XE_BUG_ON(vm->preempt.num_engines); xe_vm_close(vm); - flush_async_ops(vm); if (xe_vm_in_compute_mode(vm)) flush_work(&vm->preempt.rebind_work); @@ -1428,16 +1438,16 @@ void xe_vm_close_and_put(struct xe_vm *vm) down_write(&vm->lock); xe_vm_lock(vm, &ww, 0, false); - while (vm->vmas.rb_node) { - struct xe_vma *vma = to_xe_vma(vm->vmas.rb_node); + drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) { + vma = gpuva_to_vma(gpuva); if (xe_vma_has_no_bo(vma)) { down_read(&vm->userptr.notifier_lock); - vma->destroyed = true; + vma->gpuva.flags |= XE_VMA_DESTROYED; up_read(&vm->userptr.notifier_lock); } - rb_erase(&vma->vm_node, &vm->vmas); + xe_vm_remove_vma(vm, vma); /* easy case, remove from VMA? */ if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) { @@ -1445,7 +1455,7 @@ void xe_vm_close_and_put(struct xe_vm *vm) continue; } - rb_add(&vma->vm_node, &contested, xe_vma_less_cb); + list_add_tail(&vma->unbind_link, &contested); } /* @@ -1465,22 +1475,21 @@ void xe_vm_close_and_put(struct xe_vm *vm) xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL); } + if (vm->pt_root[id]) { + xe_pt_destroy(vm->pt_root[id], vm->flags, NULL); + vm->pt_root[id] = NULL; + } } xe_vm_unlock(vm, &ww); - if (contested.rb_node) { - - /* - * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL - * Since we hold a refcount to the bo, we can remove and free - * the members safely without locking. - */ - while (contested.rb_node) { - struct xe_vma *vma = to_xe_vma(contested.rb_node); - - rb_erase(&vma->vm_node, &contested); - xe_vma_destroy_unlocked(vma); - } + /* + * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL + * Since we hold a refcount to the bo, we can remove and free + * the members safely without locking. + */ + list_for_each_entry_safe(vma, next_vma, &contested, unbind_link) { + list_del_init(&vma->unbind_link); + xe_vma_destroy_unlocked(vma); } if (vm->async_ops.error_capture.addr) @@ -1503,7 +1512,6 @@ static void vm_destroy_work_func(struct work_struct *w) { struct xe_vm *vm = container_of(w, struct xe_vm, destroy_work); - struct ww_acquire_ctx ww; struct xe_device *xe = vm->xe; struct xe_tile *tile; u8 id; @@ -1524,29 +1532,17 @@ static void vm_destroy_work_func(struct work_struct *w) } } - /* - * XXX: We delay destroying the PT root until the VM if freed as PT root - * is needed for xe_vm_lock to work. If we remove that dependency this - * can be moved to xe_vm_close_and_put. - */ - xe_vm_lock(vm, &ww, 0, false); - for_each_tile(tile, xe, id) { - if (vm->pt_root[id]) { - xe_pt_destroy(vm->pt_root[id], vm->flags, NULL); - vm->pt_root[id] = NULL; - } - } - xe_vm_unlock(vm, &ww); + for_each_tile(tile, xe, id) + XE_WARN_ON(vm->pt_root[id]); trace_xe_vm_free(vm); dma_fence_put(vm->rebind_fence); - dma_resv_fini(&vm->resv); kfree(vm); } -void xe_vm_free(struct kref *ref) +static void xe_vm_free(struct drm_gpuvm *gpuvm) { - struct xe_vm *vm = container_of(ref, struct xe_vm, refcount); + struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm); /* To destroy the VM we need to be able to sleep */ queue_work(system_unbound_wq, &vm->destroy_work); @@ -1573,7 +1569,8 @@ u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile) static struct dma_fence * xe_vm_unbind_vma(struct xe_vma *vma, struct xe_engine *e, - struct xe_sync_entry *syncs, u32 num_syncs) + struct xe_sync_entry *syncs, u32 num_syncs, + bool first_op, bool last_op) { struct xe_tile *tile; struct dma_fence *fence = NULL; @@ -1598,7 +1595,8 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct xe_engine *e, if (!(vma->tile_present & BIT(id))) goto next; - fence = __xe_pt_unbind_vma(tile, vma, e, syncs, num_syncs); + fence = __xe_pt_unbind_vma(tile, vma, e, first_op ? syncs : NULL, + first_op ? num_syncs : 0); if (IS_ERR(fence)) { err = PTR_ERR(fence); goto err_fences; @@ -1624,8 +1622,11 @@ next: } } - for (i = 0; i < num_syncs; i++) - xe_sync_entry_signal(&syncs[i], NULL, cf ? &cf->base : fence); + if (last_op) { + for (i = 0; i < num_syncs; i++) + xe_sync_entry_signal(&syncs[i], NULL, + cf ? &cf->base : fence); + } return cf ? &cf->base : !fence ? dma_fence_get_stub() : fence; @@ -1643,7 +1644,8 @@ err_fences: static struct dma_fence * xe_vm_bind_vma(struct xe_vma *vma, struct xe_engine *e, - struct xe_sync_entry *syncs, u32 num_syncs) + struct xe_sync_entry *syncs, u32 num_syncs, + bool first_op, bool last_op) { struct xe_tile *tile; struct dma_fence *fence; @@ -1668,7 +1670,8 @@ xe_vm_bind_vma(struct xe_vma *vma, struct xe_engine *e, if (!(vma->tile_mask & BIT(id))) goto next; - fence = __xe_pt_bind_vma(tile, vma, e, syncs, num_syncs, + fence = __xe_pt_bind_vma(tile, vma, e, first_op ? syncs : NULL, + first_op ? num_syncs : 0, vma->tile_present & BIT(id)); if (IS_ERR(fence)) { err = PTR_ERR(fence); @@ -1695,8 +1698,11 @@ next: } } - for (i = 0; i < num_syncs; i++) - xe_sync_entry_signal(&syncs[i], NULL, cf ? &cf->base : fence); + if (last_op) { + for (i = 0; i < num_syncs; i++) + xe_sync_entry_signal(&syncs[i], NULL, + cf ? &cf->base : fence); + } return cf ? &cf->base : fence; @@ -1794,15 +1800,29 @@ int xe_vm_async_fence_wait_start(struct dma_fence *fence) static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_engine *e, struct xe_sync_entry *syncs, - u32 num_syncs, struct async_op_fence *afence) + u32 num_syncs, struct async_op_fence *afence, + bool immediate, bool first_op, bool last_op) { struct dma_fence *fence; xe_vm_assert_held(vm); - fence = xe_vm_bind_vma(vma, e, syncs, num_syncs); - if (IS_ERR(fence)) - return PTR_ERR(fence); + if (immediate) { + fence = xe_vm_bind_vma(vma, e, syncs, num_syncs, first_op, + last_op); + if (IS_ERR(fence)) + return PTR_ERR(fence); + } else { + int i; + + XE_BUG_ON(!xe_vm_in_fault_mode(vm)); + + fence = dma_fence_get_stub(); + if (last_op) { + for (i = 0; i < num_syncs; i++) + xe_sync_entry_signal(&syncs[i], NULL, fence); + } + } if (afence) add_async_op_fence_cb(vm, fence, afence); @@ -1812,32 +1832,35 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_engine *e, struct xe_bo *bo, struct xe_sync_entry *syncs, - u32 num_syncs, struct async_op_fence *afence) + u32 num_syncs, struct async_op_fence *afence, + bool immediate, bool first_op, bool last_op) { int err; xe_vm_assert_held(vm); xe_bo_assert_held(bo); - if (bo) { + if (bo && immediate) { err = xe_bo_validate(bo, vm, true); if (err) return err; } - return __xe_vm_bind(vm, vma, e, syncs, num_syncs, afence); + return __xe_vm_bind(vm, vma, e, syncs, num_syncs, afence, immediate, + first_op, last_op); } static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma, struct xe_engine *e, struct xe_sync_entry *syncs, - u32 num_syncs, struct async_op_fence *afence) + u32 num_syncs, struct async_op_fence *afence, + bool first_op, bool last_op) { struct dma_fence *fence; xe_vm_assert_held(vm); xe_bo_assert_held(xe_vma_bo(vma)); - fence = xe_vm_unbind_vma(vma, e, syncs, num_syncs); + fence = xe_vm_unbind_vma(vma, e, syncs, num_syncs, first_op, last_op); if (IS_ERR(fence)) return PTR_ERR(fence); if (afence) @@ -2066,7 +2089,8 @@ static const u32 region_to_mem_type[] = { static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma, struct xe_engine *e, u32 region, struct xe_sync_entry *syncs, u32 num_syncs, - struct async_op_fence *afence) + struct async_op_fence *afence, bool first_op, + bool last_op) { int err; @@ -2080,14 +2104,16 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma, if (vma->tile_mask != (vma->tile_present & ~vma->usm.tile_invalidated)) { return xe_vm_bind(vm, vma, e, xe_vma_bo(vma), syncs, num_syncs, - afence); + afence, true, first_op, last_op); } else { int i; /* Nothing to do, signal fences now */ - for (i = 0; i < num_syncs; i++) - xe_sync_entry_signal(&syncs[i], NULL, - dma_fence_get_stub()); + if (last_op) { + for (i = 0; i < num_syncs; i++) + xe_sync_entry_signal(&syncs[i], NULL, + dma_fence_get_stub()); + } if (afence) dma_fence_signal(&afence->fence); return 0; @@ -2096,29 +2122,6 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma, #define VM_BIND_OP(op) (op & 0xffff) -static int __vm_bind_ioctl(struct xe_vm *vm, struct xe_vma *vma, - struct xe_engine *e, struct xe_bo *bo, u32 op, - u32 region, struct xe_sync_entry *syncs, - u32 num_syncs, struct async_op_fence *afence) -{ - switch (VM_BIND_OP(op)) { - case XE_VM_BIND_OP_MAP: - return xe_vm_bind(vm, vma, e, bo, syncs, num_syncs, afence); - case XE_VM_BIND_OP_UNMAP: - case XE_VM_BIND_OP_UNMAP_ALL: - return xe_vm_unbind(vm, vma, e, syncs, num_syncs, afence); - case XE_VM_BIND_OP_MAP_USERPTR: - return xe_vm_bind(vm, vma, e, NULL, syncs, num_syncs, afence); - case XE_VM_BIND_OP_PREFETCH: - return xe_vm_prefetch(vm, vma, e, region, syncs, num_syncs, - afence); - break; - default: - XE_BUG_ON("NOT POSSIBLE"); - return -EINVAL; - } -} - struct ttm_buffer_object *xe_vm_ttm_bo(struct xe_vm *vm) { int idx = vm->flags & XE_VM_FLAG_MIGRATION ? @@ -2134,810 +2137,847 @@ static void xe_vm_tv_populate(struct xe_vm *vm, struct ttm_validate_buffer *tv) tv->bo = xe_vm_ttm_bo(vm); } -static bool is_map_op(u32 op) -{ - return VM_BIND_OP(op) == XE_VM_BIND_OP_MAP || - VM_BIND_OP(op) == XE_VM_BIND_OP_MAP_USERPTR; -} - -static bool is_unmap_op(u32 op) +static void vm_set_async_error(struct xe_vm *vm, int err) { - return VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP || - VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL; + lockdep_assert_held(&vm->lock); + vm->async_ops.error = err; } -static int vm_bind_ioctl(struct xe_vm *vm, struct xe_vma *vma, - struct xe_engine *e, struct xe_bo *bo, - struct drm_xe_vm_bind_op *bind_op, - struct xe_sync_entry *syncs, u32 num_syncs, - struct async_op_fence *afence) +static int vm_bind_ioctl_lookup_vma(struct xe_vm *vm, struct xe_bo *bo, + u64 addr, u64 range, u32 op) { - LIST_HEAD(objs); - LIST_HEAD(dups); - struct ttm_validate_buffer tv_bo, tv_vm; - struct ww_acquire_ctx ww; - struct xe_bo *vbo; - int err, i; + struct xe_device *xe = vm->xe; + struct xe_vma *vma; + bool async = !!(op & XE_VM_BIND_FLAG_ASYNC); lockdep_assert_held(&vm->lock); - XE_BUG_ON(!list_empty(&vma->unbind_link)); - - /* Binds deferred to faults, signal fences now */ - if (xe_vm_in_fault_mode(vm) && is_map_op(bind_op->op) && - !(bind_op->op & XE_VM_BIND_FLAG_IMMEDIATE)) { - for (i = 0; i < num_syncs; i++) - xe_sync_entry_signal(&syncs[i], NULL, - dma_fence_get_stub()); - if (afence) - dma_fence_signal(&afence->fence); - return 0; - } - - xe_vm_tv_populate(vm, &tv_vm); - list_add_tail(&tv_vm.head, &objs); - vbo = xe_vma_bo(vma); - if (vbo) { - /* - * An unbind can drop the last reference to the BO and - * the BO is needed for ttm_eu_backoff_reservation so - * take a reference here. - */ - xe_bo_get(vbo); - - if (!vbo->vm) { - tv_bo.bo = &vbo->ttm; - tv_bo.num_shared = 1; - list_add(&tv_bo.head, &objs); - } - } -again: - err = ttm_eu_reserve_buffers(&ww, &objs, true, &dups); - if (!err) { - err = __vm_bind_ioctl(vm, vma, e, bo, - bind_op->op, bind_op->region, syncs, - num_syncs, afence); - ttm_eu_backoff_reservation(&ww, &objs); - if (err == -EAGAIN && xe_vma_is_userptr(vma)) { - lockdep_assert_held_write(&vm->lock); - err = xe_vma_userptr_pin_pages(vma); - if (!err) - goto again; - } + switch (VM_BIND_OP(op)) { + case XE_VM_BIND_OP_MAP: + case XE_VM_BIND_OP_MAP_USERPTR: + vma = xe_vm_find_overlapping_vma(vm, addr, range); + if (XE_IOCTL_ERR(xe, vma && !async)) + return -EBUSY; + break; + case XE_VM_BIND_OP_UNMAP: + case XE_VM_BIND_OP_PREFETCH: + vma = xe_vm_find_overlapping_vma(vm, addr, range); + if (XE_IOCTL_ERR(xe, !vma)) + return -ENODATA; /* Not an actual error, IOCTL + cleans up returns and 0 */ + if (XE_IOCTL_ERR(xe, (xe_vma_start(vma) != addr || + xe_vma_end(vma) != addr + range) && !async)) + return -EINVAL; + break; + case XE_VM_BIND_OP_UNMAP_ALL: + if (XE_IOCTL_ERR(xe, list_empty(&bo->ttm.base.gpuva.list))) + return -ENODATA; /* Not an actual error, IOCTL + cleans up returns and 0 */ + break; + default: + XE_BUG_ON("NOT POSSIBLE"); + return -EINVAL; } - xe_bo_put(vbo); - return err; + return 0; } -struct async_op { - struct xe_vma *vma; - struct xe_engine *engine; - struct xe_bo *bo; - struct drm_xe_vm_bind_op bind_op; - struct xe_sync_entry *syncs; - u32 num_syncs; - struct list_head link; - struct async_op_fence *fence; -}; - -static void async_op_cleanup(struct xe_vm *vm, struct async_op *op) +static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma, + bool post_commit) { - while (op->num_syncs--) - xe_sync_entry_cleanup(&op->syncs[op->num_syncs]); - kfree(op->syncs); - xe_bo_put(op->bo); - if (op->engine) - xe_engine_put(op->engine); - xe_vm_put(vm); - if (op->fence) - dma_fence_put(&op->fence->fence); - kfree(op); + down_read(&vm->userptr.notifier_lock); + vma->gpuva.flags |= XE_VMA_DESTROYED; + up_read(&vm->userptr.notifier_lock); + if (post_commit) + xe_vm_remove_vma(vm, vma); } -static struct async_op *next_async_op(struct xe_vm *vm) +#undef ULL +#define ULL unsigned long long + +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) +static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) { - return list_first_entry_or_null(&vm->async_ops.pending, - struct async_op, link); -} + struct xe_vma *vma; -static void vm_set_async_error(struct xe_vm *vm, int err) + switch (op->op) { + case DRM_GPUVA_OP_MAP: + vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx", + (ULL)op->map.va.addr, (ULL)op->map.va.range); + break; + case DRM_GPUVA_OP_REMAP: + vma = gpuva_to_vma(op->remap.unmap->va); + vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", + (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), + op->unmap.keep ? 1 : 0); + if (op->remap.prev) + vm_dbg(&xe->drm, + "REMAP:PREV: addr=0x%016llx, range=0x%016llx", + (ULL)op->remap.prev->va.addr, + (ULL)op->remap.prev->va.range); + if (op->remap.next) + vm_dbg(&xe->drm, + "REMAP:NEXT: addr=0x%016llx, range=0x%016llx", + (ULL)op->remap.next->va.addr, + (ULL)op->remap.next->va.range); + break; + case DRM_GPUVA_OP_UNMAP: + vma = gpuva_to_vma(op->unmap.va); + vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", + (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), + op->unmap.keep ? 1 : 0); + break; + default: + XE_BUG_ON("NOT POSSIBLE"); + } +} +#else +static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) { - lockdep_assert_held(&vm->lock); - vm->async_ops.error = err; } +#endif -static void async_op_work_func(struct work_struct *w) +/* + * Create operations list from IOCTL arguments, setup operations fields so parse + * and commit steps are decoupled from IOCTL arguments. This step can fail. + */ +static struct drm_gpuva_ops * +vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, + u64 bo_offset_or_userptr, u64 addr, u64 range, + u32 operation, u64 tile_mask, u32 region) { - struct xe_vm *vm = container_of(w, struct xe_vm, async_ops.work); + struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL; + struct ww_acquire_ctx ww; + struct drm_gpuva_ops *ops; + struct drm_gpuva_op *__op; + struct xe_vma_op *op; + struct drm_gpuvm_bo *vm_bo; + int err; - for (;;) { - struct async_op *op; - int err; + lockdep_assert_held_write(&vm->lock); - if (vm->async_ops.error && !xe_vm_is_closed(vm)) - break; + vm_dbg(&vm->xe->drm, + "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx", + VM_BIND_OP(operation), (ULL)addr, (ULL)range, + (ULL)bo_offset_or_userptr); - spin_lock_irq(&vm->async_ops.lock); - op = next_async_op(vm); - if (op) - list_del_init(&op->link); - spin_unlock_irq(&vm->async_ops.lock); + switch (VM_BIND_OP(operation)) { + case XE_VM_BIND_OP_MAP: + case XE_VM_BIND_OP_MAP_USERPTR: + ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, addr, range, + obj, bo_offset_or_userptr); + if (IS_ERR(ops)) + return ops; - if (!op) - break; + drm_gpuva_for_each_op(__op, ops) { + struct xe_vma_op *op = gpuva_op_to_vma_op(__op); - if (!xe_vm_is_closed(vm)) { - bool first, last; + op->tile_mask = tile_mask; + op->map.immediate = + operation & XE_VM_BIND_FLAG_IMMEDIATE; + op->map.read_only = + operation & XE_VM_BIND_FLAG_READONLY; + op->map.is_null = operation & XE_VM_BIND_FLAG_NULL; + } + break; + case XE_VM_BIND_OP_UNMAP: + ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range); + if (IS_ERR(ops)) + return ops; - down_write(&vm->lock); -again: - first = op->vma->first_munmap_rebind; - last = op->vma->last_munmap_rebind; -#ifdef TEST_VM_ASYNC_OPS_ERROR -#define FORCE_ASYNC_OP_ERROR BIT(31) - if (!(op->bind_op.op & FORCE_ASYNC_OP_ERROR)) { - err = vm_bind_ioctl(vm, op->vma, op->engine, - op->bo, &op->bind_op, - op->syncs, op->num_syncs, - op->fence); - } else { - err = -ENOMEM; - op->bind_op.op &= ~FORCE_ASYNC_OP_ERROR; - } -#else - err = vm_bind_ioctl(vm, op->vma, op->engine, op->bo, - &op->bind_op, op->syncs, - op->num_syncs, op->fence); -#endif - /* - * In order for the fencing to work (stall behind - * existing jobs / prevent new jobs from running) all - * the dma-resv slots need to be programmed in a batch - * relative to execs / the rebind worker. The vm->lock - * ensure this. - */ - if (!err && ((first && VM_BIND_OP(op->bind_op.op) == - XE_VM_BIND_OP_UNMAP) || - vm->async_ops.munmap_rebind_inflight)) { - if (last) { - op->vma->last_munmap_rebind = false; - vm->async_ops.munmap_rebind_inflight = - false; - } else { - vm->async_ops.munmap_rebind_inflight = - true; - - async_op_cleanup(vm, op); - - spin_lock_irq(&vm->async_ops.lock); - op = next_async_op(vm); - XE_BUG_ON(!op); - list_del_init(&op->link); - spin_unlock_irq(&vm->async_ops.lock); - - goto again; - } - } - if (err) { - trace_xe_vma_fail(op->vma); - drm_warn(&vm->xe->drm, "Async VM op(%d) failed with %d", - VM_BIND_OP(op->bind_op.op), - err); + drm_gpuva_for_each_op(__op, ops) { + struct xe_vma_op *op = gpuva_op_to_vma_op(__op); - spin_lock_irq(&vm->async_ops.lock); - list_add(&op->link, &vm->async_ops.pending); - spin_unlock_irq(&vm->async_ops.lock); + op->tile_mask = tile_mask; + } + break; + case XE_VM_BIND_OP_PREFETCH: + ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range); + if (IS_ERR(ops)) + return ops; - vm_set_async_error(vm, err); - up_write(&vm->lock); + drm_gpuva_for_each_op(__op, ops) { + struct xe_vma_op *op = gpuva_op_to_vma_op(__op); - if (vm->async_ops.error_capture.addr) - vm_error_capture(vm, err, - op->bind_op.op, - op->bind_op.addr, - op->bind_op.range); - break; - } - up_write(&vm->lock); - } else { - trace_xe_vma_flush(op->vma); + op->tile_mask = tile_mask; + op->prefetch.region = region; + } + break; + case XE_VM_BIND_OP_UNMAP_ALL: + XE_BUG_ON(!bo); - if (is_unmap_op(op->bind_op.op)) { - down_write(&vm->lock); - xe_vma_destroy_unlocked(op->vma); - up_write(&vm->lock); - } + err = xe_bo_lock(bo, &ww, 0, true); + if (err) + return ERR_PTR(err); - if (op->fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, - &op->fence->fence.flags)) { - if (!xe_vm_no_dma_fences(vm)) { - op->fence->started = true; - smp_wmb(); - wake_up_all(&op->fence->wq); - } - dma_fence_signal(&op->fence->fence); - } + vm_bo = drm_gpuvm_bo_find(&vm->gpuvm, obj); + if (!vm_bo) + break; + + ops = drm_gpuvm_bo_unmap_ops_create(vm_bo); + drm_gpuvm_bo_put(vm_bo); + xe_bo_unlock(bo, &ww); + if (IS_ERR(ops)) + return ops; + + drm_gpuva_for_each_op(__op, ops) { + struct xe_vma_op *op = gpuva_op_to_vma_op(__op); + + op->tile_mask = tile_mask; } + break; + default: + XE_BUG_ON("NOT POSSIBLE"); + ops = ERR_PTR(-EINVAL); + } - async_op_cleanup(vm, op); +#ifdef TEST_VM_ASYNC_OPS_ERROR + if (operation & FORCE_ASYNC_OP_ERROR) { + op = list_first_entry_or_null(&ops->list, struct xe_vma_op, + base.entry); + if (op) + op->inject_error = true; } +#endif + + if (!IS_ERR(ops)) + drm_gpuva_for_each_op(__op, ops) + print_op(vm->xe, __op); + + return ops; } -static int __vm_bind_ioctl_async(struct xe_vm *vm, struct xe_vma *vma, - struct xe_engine *e, struct xe_bo *bo, - struct drm_xe_vm_bind_op *bind_op, - struct xe_sync_entry *syncs, u32 num_syncs) +static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, + u64 tile_mask, bool read_only, bool is_null) { - struct async_op *op; - bool installed = false; - u64 seqno; - int i; + struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL; + struct xe_vma *vma; + struct ww_acquire_ctx ww; + int err; - lockdep_assert_held(&vm->lock); + lockdep_assert_held_write(&vm->lock); - op = kmalloc(sizeof(*op), GFP_KERNEL); - if (!op) { - return -ENOMEM; + if (bo) { + err = xe_bo_lock(bo, &ww, 0, true); + if (err) + return ERR_PTR(err); } + vma = xe_vma_create(vm, bo, op->gem.offset, + op->va.addr, op->va.addr + + op->va.range - 1, read_only, is_null, + tile_mask); + if (bo) + xe_bo_unlock(bo, &ww); - if (num_syncs) { - op->fence = kmalloc(sizeof(*op->fence), GFP_KERNEL); - if (!op->fence) { - kfree(op); - return -ENOMEM; + if (xe_vma_is_userptr(vma)) { + err = xe_vma_userptr_pin_pages(vma); + if (err) { + prep_vma_destroy(vm, vma, false); + xe_vma_destroy_unlocked(vma); + return ERR_PTR(err); } + } else if (!xe_vma_has_no_bo(vma) && !bo->vm) { + vm_insert_extobj(vm, vma); + err = add_preempt_fences(vm, bo); + if (err) { + prep_vma_destroy(vm, vma, false); + xe_vma_destroy_unlocked(vma); + return ERR_PTR(err); + } + } + + return vma; +} + +/* + * Parse operations list and create any resources needed for the operations + * prior to fully committing to the operations. This setup can fail. + */ +static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_engine *e, + struct drm_gpuva_ops **ops, int num_ops_list, + struct xe_sync_entry *syncs, u32 num_syncs, + struct list_head *ops_list, bool async) +{ + struct xe_vma_op *last_op = NULL; + struct list_head *async_list = NULL; + struct async_op_fence *fence = NULL; + int err, i; + + lockdep_assert_held_write(&vm->lock); + XE_BUG_ON(num_ops_list > 1 && !async); + + if (num_syncs && async) { + u64 seqno; + + fence = kmalloc(sizeof(*fence), GFP_KERNEL); + if (!fence) + return -ENOMEM; seqno = e ? ++e->bind.fence_seqno : ++vm->async_ops.fence.seqno; - dma_fence_init(&op->fence->fence, &async_op_fence_ops, + dma_fence_init(&fence->fence, &async_op_fence_ops, &vm->async_ops.lock, e ? e->bind.fence_ctx : vm->async_ops.fence.context, seqno); if (!xe_vm_no_dma_fences(vm)) { - op->fence->vm = vm; - op->fence->started = false; - init_waitqueue_head(&op->fence->wq); + fence->vm = vm; + fence->started = false; + init_waitqueue_head(&fence->wq); } - } else { - op->fence = NULL; } - op->vma = vma; - op->engine = e; - op->bo = bo; - op->bind_op = *bind_op; - op->syncs = syncs; - op->num_syncs = num_syncs; - INIT_LIST_HEAD(&op->link); - for (i = 0; i < num_syncs; i++) - installed |= xe_sync_entry_signal(&syncs[i], NULL, - &op->fence->fence); + for (i = 0; i < num_ops_list; ++i) { + struct drm_gpuva_ops *__ops = ops[i]; + struct drm_gpuva_op *__op; - if (!installed && op->fence) - dma_fence_signal(&op->fence->fence); + drm_gpuva_for_each_op(__op, __ops) { + struct xe_vma_op *op = gpuva_op_to_vma_op(__op); + bool first = !async_list; - spin_lock_irq(&vm->async_ops.lock); - list_add_tail(&op->link, &vm->async_ops.pending); - spin_unlock_irq(&vm->async_ops.lock); + XE_BUG_ON(!first && !async); - if (!vm->async_ops.error) - queue_work(system_unbound_wq, &vm->async_ops.work); + INIT_LIST_HEAD(&op->link); + if (first) + async_list = ops_list; + list_add_tail(&op->link, async_list); - return 0; -} - -static int vm_bind_ioctl_async(struct xe_vm *vm, struct xe_vma *vma, - struct xe_engine *e, struct xe_bo *bo, - struct drm_xe_vm_bind_op *bind_op, - struct xe_sync_entry *syncs, u32 num_syncs) -{ - struct xe_vma *__vma, *next; - struct list_head rebind_list; - struct xe_sync_entry *in_syncs = NULL, *out_syncs = NULL; - u32 num_in_syncs = 0, num_out_syncs = 0; - bool first = true, last; - int err; - int i; + if (first) { + op->flags |= XE_VMA_OP_FIRST; + op->num_syncs = num_syncs; + op->syncs = syncs; + } - lockdep_assert_held(&vm->lock); + op->engine = e; - /* Not a linked list of unbinds + rebinds, easy */ - if (list_empty(&vma->unbind_link)) - return __vm_bind_ioctl_async(vm, vma, e, bo, bind_op, - syncs, num_syncs); + switch (op->base.op) { + case DRM_GPUVA_OP_MAP: + { + struct xe_vma *vma; - /* - * Linked list of unbinds + rebinds, decompose syncs into 'in / out' - * passing the 'in' to the first operation and 'out' to the last. Also - * the reference counting is a little tricky, increment the VM / bind - * engine ref count on all but the last operation and increment the BOs - * ref count on each rebind. - */ + vma = new_vma(vm, &op->base.map, + op->tile_mask, op->map.read_only, + op->map.is_null); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto free_fence; + } - XE_BUG_ON(VM_BIND_OP(bind_op->op) != XE_VM_BIND_OP_UNMAP && - VM_BIND_OP(bind_op->op) != XE_VM_BIND_OP_UNMAP_ALL && - VM_BIND_OP(bind_op->op) != XE_VM_BIND_OP_PREFETCH); + op->map.vma = vma; + break; + } + case DRM_GPUVA_OP_REMAP: + if (op->base.remap.prev) { + struct xe_vma *vma; + bool read_only = + op->base.remap.unmap->va->flags & + XE_VMA_READ_ONLY; + bool is_null = + op->base.remap.unmap->va->flags & + DRM_GPUVA_SPARSE; + + vma = new_vma(vm, op->base.remap.prev, + op->tile_mask, read_only, + is_null); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto free_fence; + } + + op->remap.prev = vma; + } - /* Decompose syncs */ - if (num_syncs) { - in_syncs = kmalloc(sizeof(*in_syncs) * num_syncs, GFP_KERNEL); - out_syncs = kmalloc(sizeof(*out_syncs) * num_syncs, GFP_KERNEL); - if (!in_syncs || !out_syncs) { - err = -ENOMEM; - goto out_error; - } + if (op->base.remap.next) { + struct xe_vma *vma; + bool read_only = + op->base.remap.unmap->va->flags & + XE_VMA_READ_ONLY; + + bool is_null = + op->base.remap.unmap->va->flags & + DRM_GPUVA_SPARSE; + + vma = new_vma(vm, op->base.remap.next, + op->tile_mask, read_only, + is_null); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto free_fence; + } + + op->remap.next = vma; + } - for (i = 0; i < num_syncs; ++i) { - bool signal = syncs[i].flags & DRM_XE_SYNC_SIGNAL; + /* XXX: Support no doing remaps */ + op->remap.start = + xe_vma_start(gpuva_to_vma(op->base.remap.unmap->va)); + op->remap.range = + xe_vma_size(gpuva_to_vma(op->base.remap.unmap->va)); + break; + case DRM_GPUVA_OP_UNMAP: + op->unmap.start = + xe_vma_start(gpuva_to_vma(op->base.unmap.va)); + op->unmap.range = + xe_vma_size(gpuva_to_vma(op->base.unmap.va)); + break; + case DRM_GPUVA_OP_PREFETCH: + /* Nothing to do */ + break; + default: + XE_BUG_ON("NOT POSSIBLE"); + } - if (signal) - out_syncs[num_out_syncs++] = syncs[i]; - else - in_syncs[num_in_syncs++] = syncs[i]; + last_op = op; } - } - /* Do unbinds + move rebinds to new list */ - INIT_LIST_HEAD(&rebind_list); - list_for_each_entry_safe(__vma, next, &vma->unbind_link, unbind_link) { - if (__vma->destroyed || - VM_BIND_OP(bind_op->op) == XE_VM_BIND_OP_PREFETCH) { - list_del_init(&__vma->unbind_link); - xe_bo_get(bo); - err = __vm_bind_ioctl_async(xe_vm_get(vm), __vma, - e ? xe_engine_get(e) : NULL, - bo, bind_op, first ? - in_syncs : NULL, - first ? num_in_syncs : 0); - if (err) { - xe_bo_put(bo); - xe_vm_put(vm); - if (e) - xe_engine_put(e); - goto out_error; - } - in_syncs = NULL; - first = false; - } else { - list_move_tail(&__vma->unbind_link, &rebind_list); - } + last_op->ops = __ops; } - last = list_empty(&rebind_list); - if (!last) { - xe_vm_get(vm); - if (e) - xe_engine_get(e); - } - err = __vm_bind_ioctl_async(vm, vma, e, - bo, bind_op, - first ? in_syncs : - last ? out_syncs : NULL, - first ? num_in_syncs : - last ? num_out_syncs : 0); - if (err) { - if (!last) { - xe_vm_put(vm); - if (e) - xe_engine_put(e); - } - goto out_error; - } - in_syncs = NULL; - - /* Do rebinds */ - list_for_each_entry_safe(__vma, next, &rebind_list, unbind_link) { - list_del_init(&__vma->unbind_link); - last = list_empty(&rebind_list); - - if (xe_vma_is_userptr(__vma)) { - bind_op->op = XE_VM_BIND_FLAG_ASYNC | - XE_VM_BIND_OP_MAP_USERPTR; - } else { - bind_op->op = XE_VM_BIND_FLAG_ASYNC | - XE_VM_BIND_OP_MAP; - xe_bo_get(xe_vma_bo(__vma)); - } - if (!last) { - xe_vm_get(vm); - if (e) - xe_engine_get(e); - } + if (!last_op) + return -ENODATA; - err = __vm_bind_ioctl_async(vm, __vma, e, - xe_vma_bo(__vma), bind_op, last ? - out_syncs : NULL, - last ? num_out_syncs : 0); - if (err) { - if (!last) { - xe_vm_put(vm); - if (e) - xe_engine_put(e); - } - goto out_error; - } - } + last_op->flags |= XE_VMA_OP_LAST; + last_op->num_syncs = num_syncs; + last_op->syncs = syncs; + last_op->fence = fence; - kfree(syncs); return 0; -out_error: - kfree(in_syncs); - kfree(out_syncs); - kfree(syncs); - +free_fence: + kfree(fence); return err; } -static int __vm_bind_ioctl_lookup_vma(struct xe_vm *vm, struct xe_bo *bo, - u64 addr, u64 range, u32 op) +static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) { - struct xe_device *xe = vm->xe; - struct xe_vma *vma, lookup; - bool async = !!(op & XE_VM_BIND_FLAG_ASYNC); - - lockdep_assert_held(&vm->lock); + int err = 0; - lookup.start = addr; - lookup.end = addr + range - 1; + lockdep_assert_held_write(&vm->lock); - switch (VM_BIND_OP(op)) { - case XE_VM_BIND_OP_MAP: - case XE_VM_BIND_OP_MAP_USERPTR: - vma = xe_vm_find_overlapping_vma(vm, &lookup); - if (XE_IOCTL_ERR(xe, vma)) - return -EBUSY; + switch (op->base.op) { + case DRM_GPUVA_OP_MAP: + err |= xe_vm_insert_vma(vm, op->map.vma); break; - case XE_VM_BIND_OP_UNMAP: - case XE_VM_BIND_OP_PREFETCH: - vma = xe_vm_find_overlapping_vma(vm, &lookup); - if (XE_IOCTL_ERR(xe, !vma) || - XE_IOCTL_ERR(xe, (xe_vma_start(vma) != addr || - xe_vma_end(vma) != addr + range) && !async)) - return -EINVAL; + case DRM_GPUVA_OP_REMAP: + prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va), + true); + if (op->remap.prev) + err |= xe_vm_insert_vma(vm, op->remap.prev); + if (op->remap.next) + err |= xe_vm_insert_vma(vm, op->remap.next); break; - case XE_VM_BIND_OP_UNMAP_ALL: + case DRM_GPUVA_OP_UNMAP: + prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true); + break; + case DRM_GPUVA_OP_PREFETCH: + /* Nothing to do */ break; default: XE_BUG_ON("NOT POSSIBLE"); - return -EINVAL; } - return 0; -} - -static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma) -{ - down_read(&vm->userptr.notifier_lock); - vma->destroyed = true; - up_read(&vm->userptr.notifier_lock); - xe_vm_remove_vma(vm, vma); + op->flags |= XE_VMA_OP_COMMITTED; + return err; } -static int prep_replacement_vma(struct xe_vm *vm, struct xe_vma *vma) +static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma, + struct xe_vma_op *op) { + LIST_HEAD(objs); + LIST_HEAD(dups); + struct ttm_validate_buffer tv_bo, tv_vm; + struct ww_acquire_ctx ww; + struct xe_bo *vbo; int err; - if (xe_vma_bo(vma) && !xe_vma_bo(vma)->vm) { - vm_insert_extobj(vm, vma); - err = add_preempt_fences(vm, xe_vma_bo(vma)); - if (err) - return err; - } - - return 0; -} + lockdep_assert_held_write(&vm->lock); -/* - * Find all overlapping VMAs in lookup range and add to a list in the returned - * VMA, all of VMAs found will be unbound. Also possibly add 2 new VMAs that - * need to be bound if first / last VMAs are not fully unbound. This is akin to - * how munmap works. - */ -static struct xe_vma *vm_unbind_lookup_vmas(struct xe_vm *vm, - struct xe_vma *lookup) -{ - struct xe_vma *vma = xe_vm_find_overlapping_vma(vm, lookup); - struct rb_node *node; - struct xe_vma *first = vma, *last = vma, *new_first = NULL, - *new_last = NULL, *__vma, *next; - int err = 0; - bool first_munmap_rebind = false; + xe_vm_tv_populate(vm, &tv_vm); + list_add_tail(&tv_vm.head, &objs); + vbo = xe_vma_bo(vma); + if (vbo) { + /* + * An unbind can drop the last reference to the BO and + * the BO is needed for ttm_eu_backoff_reservation so + * take a reference here. + */ + xe_bo_get(vbo); - lockdep_assert_held(&vm->lock); - XE_BUG_ON(!vma); - - node = &vma->vm_node; - while ((node = rb_next(node))) { - if (!xe_vma_cmp_vma_cb(lookup, node)) { - __vma = to_xe_vma(node); - list_add_tail(&__vma->unbind_link, &vma->unbind_link); - last = __vma; - } else { - break; + if (!vbo->vm) { + tv_bo.bo = &vbo->ttm; + tv_bo.num_shared = 1; + list_add(&tv_bo.head, &objs); } } - node = &vma->vm_node; - while ((node = rb_prev(node))) { - if (!xe_vma_cmp_vma_cb(lookup, node)) { - __vma = to_xe_vma(node); - list_add(&__vma->unbind_link, &vma->unbind_link); - first = __vma; - } else { - break; - } +again: + err = ttm_eu_reserve_buffers(&ww, &objs, true, &dups); + if (err) { + xe_bo_put(vbo); + return err; } - if (xe_vma_start(first) != xe_vma_start(lookup)) { - struct ww_acquire_ctx ww; + xe_vm_assert_held(vm); + xe_bo_assert_held(xe_vma_bo(vma)); - if (xe_vma_bo(first)) - err = xe_bo_lock(xe_vma_bo(first), &ww, 0, true); - if (err) - goto unwind; - new_first = xe_vma_create(xe_vma_vm(first), xe_vma_bo(first), - xe_vma_bo(first) ? - xe_vma_bo_offset(first) : - xe_vma_userptr(first), - xe_vma_start(first), - xe_vma_start(lookup) - 1, - xe_vma_read_only(first), - (first->pte_flags & - XE_PTE_FLAG_NULL), - first->tile_mask); - if (xe_vma_bo(first)) - xe_bo_unlock(xe_vma_bo(first), &ww); - if (!new_first) { - err = -ENOMEM; - goto unwind; - } - if (xe_vma_is_userptr(first)) { - err = xe_vma_userptr_pin_pages(new_first); + switch (op->base.op) { + case DRM_GPUVA_OP_MAP: + err = xe_vm_bind(vm, vma, op->engine, xe_vma_bo(vma), + op->syncs, op->num_syncs, op->fence, + op->map.immediate || !xe_vm_in_fault_mode(vm), + op->flags & XE_VMA_OP_FIRST, + op->flags & XE_VMA_OP_LAST); + break; + case DRM_GPUVA_OP_REMAP: + { + bool prev = !!op->remap.prev; + bool next = !!op->remap.next; + + if (!op->remap.unmap_done) { + vm->async_ops.munmap_rebind_inflight = true; + if (prev || next) + vma->gpuva.flags |= XE_VMA_FIRST_REBIND; + err = xe_vm_unbind(vm, vma, op->engine, op->syncs, + op->num_syncs, + !prev && !next ? op->fence : NULL, + op->flags & XE_VMA_OP_FIRST, + op->flags & XE_VMA_OP_LAST && !prev && + !next); if (err) - goto unwind; + break; + op->remap.unmap_done = true; } - err = prep_replacement_vma(vm, new_first); - if (err) - goto unwind; - } - - if (xe_vma_end(last) != xe_vma_end(lookup)) { - struct ww_acquire_ctx ww; - u64 chunk = xe_vma_end(lookup) - xe_vma_start(last); - if (xe_vma_bo(last)) - err = xe_bo_lock(xe_vma_bo(last), &ww, 0, true); - if (err) - goto unwind; - new_last = xe_vma_create(xe_vma_vm(last), xe_vma_bo(last), - xe_vma_bo(last) ? - xe_vma_bo_offset(last) + chunk : - xe_vma_userptr(last) + chunk, - xe_vma_start(last) + chunk, - xe_vma_end(last) - 1, - xe_vma_read_only(last), - (last->pte_flags & XE_PTE_FLAG_NULL), - last->tile_mask); - if (xe_vma_bo(last)) - xe_bo_unlock(xe_vma_bo(last), &ww); - if (!new_last) { - err = -ENOMEM; - goto unwind; - } - if (xe_vma_is_userptr(last)) { - err = xe_vma_userptr_pin_pages(new_last); + if (prev) { + op->remap.prev->gpuva.flags |= XE_VMA_LAST_REBIND; + err = xe_vm_bind(vm, op->remap.prev, op->engine, + xe_vma_bo(op->remap.prev), op->syncs, + op->num_syncs, + !next ? op->fence : NULL, true, false, + op->flags & XE_VMA_OP_LAST && !next); + op->remap.prev->gpuva.flags &= ~XE_VMA_LAST_REBIND; if (err) - goto unwind; + break; + op->remap.prev = NULL; } - err = prep_replacement_vma(vm, new_last); - if (err) - goto unwind; - } - prep_vma_destroy(vm, vma); - if (list_empty(&vma->unbind_link) && (new_first || new_last)) - vma->first_munmap_rebind = true; - list_for_each_entry(__vma, &vma->unbind_link, unbind_link) { - if ((new_first || new_last) && !first_munmap_rebind) { - __vma->first_munmap_rebind = true; - first_munmap_rebind = true; + if (next) { + op->remap.next->gpuva.flags |= XE_VMA_LAST_REBIND; + err = xe_vm_bind(vm, op->remap.next, op->engine, + xe_vma_bo(op->remap.next), + op->syncs, op->num_syncs, + op->fence, true, false, + op->flags & XE_VMA_OP_LAST); + op->remap.next->gpuva.flags &= ~XE_VMA_LAST_REBIND; + if (err) + break; + op->remap.next = NULL; } - prep_vma_destroy(vm, __vma); - } - if (new_first) { - xe_vm_insert_vma(vm, new_first); - list_add_tail(&new_first->unbind_link, &vma->unbind_link); - if (!new_last) - new_first->last_munmap_rebind = true; + vm->async_ops.munmap_rebind_inflight = false; + + break; } - if (new_last) { - xe_vm_insert_vma(vm, new_last); - list_add_tail(&new_last->unbind_link, &vma->unbind_link); - new_last->last_munmap_rebind = true; + case DRM_GPUVA_OP_UNMAP: + err = xe_vm_unbind(vm, vma, op->engine, op->syncs, + op->num_syncs, op->fence, + op->flags & XE_VMA_OP_FIRST, + op->flags & XE_VMA_OP_LAST); + break; + case DRM_GPUVA_OP_PREFETCH: + err = xe_vm_prefetch(vm, vma, op->engine, op->prefetch.region, + op->syncs, op->num_syncs, op->fence, + op->flags & XE_VMA_OP_FIRST, + op->flags & XE_VMA_OP_LAST); + break; + default: + XE_BUG_ON("NOT POSSIBLE"); } - return vma; - -unwind: - list_for_each_entry_safe(__vma, next, &vma->unbind_link, unbind_link) - list_del_init(&__vma->unbind_link); - if (new_last) { - prep_vma_destroy(vm, new_last); - xe_vma_destroy_unlocked(new_last); - } - if (new_first) { - prep_vma_destroy(vm, new_first); - xe_vma_destroy_unlocked(new_first); + ttm_eu_backoff_reservation(&ww, &objs); + if (err == -EAGAIN && xe_vma_is_userptr(vma)) { + lockdep_assert_held_write(&vm->lock); + err = xe_vma_userptr_pin_pages(vma); + if (!err) + goto again; } + xe_bo_put(vbo); - return ERR_PTR(err); + if (err) + trace_xe_vma_fail(vma); + + return err; } -/* - * Similar to vm_unbind_lookup_vmas, find all VMAs in lookup range to prefetch - */ -static struct xe_vma *vm_prefetch_lookup_vmas(struct xe_vm *vm, - struct xe_vma *lookup, - u32 region) +static int xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op) { - struct xe_vma *vma = xe_vm_find_overlapping_vma(vm, lookup), *__vma, - *next; - struct rb_node *node; + int ret = 0; - if (!xe_vma_has_no_bo(vma)) { - if (!xe_bo_can_migrate(xe_vma_bo(vma), region_to_mem_type[region])) - return ERR_PTR(-EINVAL); - } + lockdep_assert_held_write(&vm->lock); - node = &vma->vm_node; - while ((node = rb_next(node))) { - if (!xe_vma_cmp_vma_cb(lookup, node)) { - __vma = to_xe_vma(node); - if (!xe_vma_has_no_bo(__vma)) { - if (!xe_bo_can_migrate(xe_vma_bo(__vma), region_to_mem_type[region])) - goto flush_list; - } - list_add_tail(&__vma->unbind_link, &vma->unbind_link); - } else { - break; - } +#ifdef TEST_VM_ASYNC_OPS_ERROR + if (op->inject_error) { + op->inject_error = false; + return -ENOMEM; } +#endif - node = &vma->vm_node; - while ((node = rb_prev(node))) { - if (!xe_vma_cmp_vma_cb(lookup, node)) { - __vma = to_xe_vma(node); - if (!xe_vma_has_no_bo(__vma)) { - if (!xe_bo_can_migrate(xe_vma_bo(__vma), region_to_mem_type[region])) - goto flush_list; - } - list_add(&__vma->unbind_link, &vma->unbind_link); - } else { - break; - } + switch (op->base.op) { + case DRM_GPUVA_OP_MAP: + ret = __xe_vma_op_execute(vm, op->map.vma, op); + break; + case DRM_GPUVA_OP_REMAP: + { + struct xe_vma *vma; + + if (!op->remap.unmap_done) + vma = gpuva_to_vma(op->base.remap.unmap->va); + else if (op->remap.prev) + vma = op->remap.prev; + else + vma = op->remap.next; + + ret = __xe_vma_op_execute(vm, vma, op); + break; + } + case DRM_GPUVA_OP_UNMAP: + ret = __xe_vma_op_execute(vm, gpuva_to_vma(op->base.unmap.va), + op); + break; + case DRM_GPUVA_OP_PREFETCH: + ret = __xe_vma_op_execute(vm, + gpuva_to_vma(op->base.prefetch.va), + op); + break; + default: + XE_BUG_ON("NOT POSSIBLE"); } - return vma; + return ret; +} -flush_list: - list_for_each_entry_safe(__vma, next, &vma->unbind_link, - unbind_link) - list_del_init(&__vma->unbind_link); +static void xe_vma_op_cleanup(struct xe_vm *vm, struct xe_vma_op *op) +{ + bool last = op->flags & XE_VMA_OP_LAST; - return ERR_PTR(-EINVAL); + if (last) { + while (op->num_syncs--) + xe_sync_entry_cleanup(&op->syncs[op->num_syncs]); + kfree(op->syncs); + if (op->engine) + xe_engine_put(op->engine); + if (op->fence) + dma_fence_put(&op->fence->fence); + } + if (!list_empty(&op->link)) { + spin_lock_irq(&vm->async_ops.lock); + list_del(&op->link); + spin_unlock_irq(&vm->async_ops.lock); + } + if (op->ops) + drm_gpuva_ops_free(&vm->gpuvm, op->ops); + if (last) + xe_vm_put(vm); } -static struct xe_vma *vm_unbind_all_lookup_vmas(struct xe_vm *vm, - struct xe_bo *bo) +static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op, + bool post_commit) { - struct xe_vma *first = NULL, *vma; + lockdep_assert_held_write(&vm->lock); - lockdep_assert_held(&vm->lock); - xe_bo_assert_held(bo); + switch (op->base.op) { + case DRM_GPUVA_OP_MAP: + if (op->map.vma) { + prep_vma_destroy(vm, op->map.vma, post_commit); + xe_vma_destroy_unlocked(op->map.vma); + } + break; + case DRM_GPUVA_OP_UNMAP: + { + struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va); - list_for_each_entry(vma, &bo->vmas, bo_link) { - if (xe_vma_vm(vma) != vm) - continue; + down_read(&vm->userptr.notifier_lock); + vma->gpuva.flags &= ~XE_VMA_DESTROYED; + up_read(&vm->userptr.notifier_lock); + if (post_commit) + xe_vm_insert_vma(vm, vma); + break; + } + case DRM_GPUVA_OP_REMAP: + { + struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va); - prep_vma_destroy(vm, vma); - if (!first) - first = vma; - else - list_add_tail(&vma->unbind_link, &first->unbind_link); + if (op->remap.prev) { + prep_vma_destroy(vm, op->remap.prev, post_commit); + xe_vma_destroy_unlocked(op->remap.prev); + } + if (op->remap.next) { + prep_vma_destroy(vm, op->remap.next, post_commit); + xe_vma_destroy_unlocked(op->remap.next); + } + down_read(&vm->userptr.notifier_lock); + vma->gpuva.flags &= ~XE_VMA_DESTROYED; + up_read(&vm->userptr.notifier_lock); + if (post_commit) + xe_vm_insert_vma(vm, vma); + break; + } + case DRM_GPUVA_OP_PREFETCH: + /* Nothing to do */ + break; + default: + XE_BUG_ON("NOT POSSIBLE"); } +} - return first; +static struct xe_vma_op *next_vma_op(struct xe_vm *vm) +{ + return list_first_entry_or_null(&vm->async_ops.pending, + struct xe_vma_op, link); } -static struct xe_vma *vm_bind_ioctl_lookup_vma(struct xe_vm *vm, - struct xe_bo *bo, - u64 bo_offset_or_userptr, - u64 addr, u64 range, u32 op, - u64 tile_mask, u32 region) +static void xe_vma_op_work_func(struct work_struct *w) { - struct ww_acquire_ctx ww; - struct xe_vma *vma, lookup; - int err; + struct xe_vm *vm = container_of(w, struct xe_vm, async_ops.work); - lockdep_assert_held(&vm->lock); + for (;;) { + struct xe_vma_op *op; + int err; - lookup.start = addr; - lookup.end = addr + range - 1; + if (vm->async_ops.error && !xe_vm_is_closed(vm)) + break; - switch (VM_BIND_OP(op)) { - case XE_VM_BIND_OP_MAP: - if (bo) { - err = xe_bo_lock(bo, &ww, 0, true); - if (err) - return ERR_PTR(err); - } - vma = xe_vma_create(vm, bo, bo_offset_or_userptr, addr, - addr + range - 1, - op & XE_VM_BIND_FLAG_READONLY, - op & XE_VM_BIND_FLAG_NULL, - tile_mask); - if (bo) - xe_bo_unlock(bo, &ww); - if (!vma) - return ERR_PTR(-ENOMEM); + spin_lock_irq(&vm->async_ops.lock); + op = next_vma_op(vm); + spin_unlock_irq(&vm->async_ops.lock); + + if (!op) + break; - xe_vm_insert_vma(vm, vma); - if (bo && !bo->vm) { - vm_insert_extobj(vm, vma); - err = add_preempt_fences(vm, bo); + if (!xe_vm_is_closed(vm)) { + down_write(&vm->lock); + err = xe_vma_op_execute(vm, op); if (err) { - prep_vma_destroy(vm, vma); + drm_warn(&vm->xe->drm, + "Async VM op(%d) failed with %d", + op->base.op, err); + vm_set_async_error(vm, err); + up_write(&vm->lock); + + if (vm->async_ops.error_capture.addr) + vm_error_capture(vm, err, 0, 0, 0); + break; + } + up_write(&vm->lock); + } else { + struct xe_vma *vma; + + switch (op->base.op) { + case DRM_GPUVA_OP_REMAP: + vma = gpuva_to_vma(op->base.remap.unmap->va); + trace_xe_vma_flush(vma); + + down_write(&vm->lock); xe_vma_destroy_unlocked(vma); + up_write(&vm->lock); + break; + case DRM_GPUVA_OP_UNMAP: + vma = gpuva_to_vma(op->base.unmap.va); + trace_xe_vma_flush(vma); + + down_write(&vm->lock); + xe_vma_destroy_unlocked(vma); + up_write(&vm->lock); + break; + default: + /* Nothing to do */ + break; + } - return ERR_PTR(err); + if (op->fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &op->fence->fence.flags)) { + if (!xe_vm_no_dma_fences(vm)) { + op->fence->started = true; + wake_up_all(&op->fence->wq); + } + dma_fence_signal(&op->fence->fence); } } - break; - case XE_VM_BIND_OP_UNMAP: - vma = vm_unbind_lookup_vmas(vm, &lookup); - break; - case XE_VM_BIND_OP_PREFETCH: - vma = vm_prefetch_lookup_vmas(vm, &lookup, region); - break; - case XE_VM_BIND_OP_UNMAP_ALL: - XE_BUG_ON(!bo); - err = xe_bo_lock(bo, &ww, 0, true); + xe_vma_op_cleanup(vm, op); + } +} + +static int vm_bind_ioctl_ops_commit(struct xe_vm *vm, + struct list_head *ops_list, bool async) +{ + struct xe_vma_op *op, *last_op, *next; + int err; + + lockdep_assert_held_write(&vm->lock); + + list_for_each_entry(op, ops_list, link) { + last_op = op; + err = xe_vma_op_commit(vm, op); if (err) - return ERR_PTR(err); - vma = vm_unbind_all_lookup_vmas(vm, bo); - if (!vma) - vma = ERR_PTR(-EINVAL); - xe_bo_unlock(bo, &ww); - break; - case XE_VM_BIND_OP_MAP_USERPTR: - XE_BUG_ON(bo); - - vma = xe_vma_create(vm, NULL, bo_offset_or_userptr, addr, - addr + range - 1, - op & XE_VM_BIND_FLAG_READONLY, - op & XE_VM_BIND_FLAG_NULL, - tile_mask); - if (!vma) - return ERR_PTR(-ENOMEM); + goto unwind; + } - err = xe_vma_userptr_pin_pages(vma); - if (err) { - prep_vma_destroy(vm, vma); - xe_vma_destroy_unlocked(vma); + if (!async) { + err = xe_vma_op_execute(vm, last_op); + if (err) + goto unwind; + xe_vma_op_cleanup(vm, last_op); + } else { + int i; + bool installed = false; - return ERR_PTR(err); - } else { - xe_vm_insert_vma(vm, vma); - } - break; - default: - XE_BUG_ON("NOT POSSIBLE"); - vma = ERR_PTR(-EINVAL); + for (i = 0; i < last_op->num_syncs; i++) + installed |= xe_sync_entry_signal(&last_op->syncs[i], + NULL, + &last_op->fence->fence); + if (!installed && last_op->fence) + dma_fence_signal(&last_op->fence->fence); + + spin_lock_irq(&vm->async_ops.lock); + list_splice_tail(ops_list, &vm->async_ops.pending); + spin_unlock_irq(&vm->async_ops.lock); + + if (!vm->async_ops.error) + queue_work(system_unbound_wq, &vm->async_ops.work); } - return vma; + return 0; + +unwind: + list_for_each_entry_reverse(op, ops_list, link) + xe_vma_op_unwind(vm, op, op->flags & XE_VMA_OP_COMMITTED); + list_for_each_entry_safe(op, next, ops_list, link) + xe_vma_op_cleanup(vm, op); + + return err; +} + +/* + * Unwind operations list, called after a failure of vm_bind_ioctl_ops_create or + * vm_bind_ioctl_ops_parse. + */ +static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, + struct drm_gpuva_ops **ops, + int num_ops_list) +{ + int i; + + for (i = 0; i < num_ops_list; ++i) { + struct drm_gpuva_ops *__ops = ops[i]; + struct drm_gpuva_op *__op; + + if (!__ops) + continue; + + drm_gpuva_for_each_op(__op, __ops) { + struct xe_vma_op *op = gpuva_op_to_vma_op(__op); + + xe_vma_op_unwind(vm, op, false); + } + } } #ifdef TEST_VM_ASYNC_OPS_ERROR @@ -2963,8 +3003,6 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, int i; if (XE_IOCTL_ERR(xe, args->extensions) || - XE_IOCTL_ERR(xe, args->pad || args->pad2) || - XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1]) || XE_IOCTL_ERR(xe, !args->num_binds) || XE_IOCTL_ERR(xe, args->num_binds > MAX_BINDS)) return -EINVAL; @@ -2996,14 +3034,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, u32 obj = (*bind_ops)[i].obj; u64 obj_offset = (*bind_ops)[i].obj_offset; u32 region = (*bind_ops)[i].region; - bool is_null = op & XE_VM_BIND_FLAG_NULL; - - if (XE_IOCTL_ERR(xe, (*bind_ops)[i].pad) || - XE_IOCTL_ERR(xe, (*bind_ops)[i].reserved[0] || - (*bind_ops)[i].reserved[1])) { - err = -EINVAL; - goto free_bind_ops; - } + bool is_null = op & XE_VM_BIND_FLAG_NULL; if (i == 0) { *async = !!(op & XE_VM_BIND_FLAG_ASYNC); @@ -3083,15 +3114,16 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) struct drm_xe_vm_bind *args = data; struct drm_xe_sync __user *syncs_user; struct xe_bo **bos = NULL; - struct xe_vma **vmas = NULL; + struct drm_gpuva_ops **ops = NULL; struct xe_vm *vm; struct xe_engine *e = NULL; u32 num_syncs; struct xe_sync_entry *syncs = NULL; struct drm_xe_vm_bind_op *bind_ops; + LIST_HEAD(ops_list); bool async; int err; - int i, j = 0; + int i; err = vm_bind_ioctl_check_args(xe, args, &bind_ops, &async); if (err) @@ -3180,8 +3212,8 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) goto release_vm_lock; } - vmas = kzalloc(sizeof(*vmas) * args->num_binds, GFP_KERNEL); - if (!vmas) { + ops = kzalloc(sizeof(*ops) * args->num_binds, GFP_KERNEL); + if (!ops) { err = -ENOMEM; goto release_vm_lock; } @@ -3233,7 +3265,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], &syncs_user[num_syncs], false, - xe_vm_in_fault_mode(vm)); + xe_vm_no_dma_fences(vm)); if (err) goto free_syncs; } @@ -3244,7 +3276,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) u64 addr = bind_ops[i].addr; u32 op = bind_ops[i].op; - err = __vm_bind_ioctl_lookup_vma(vm, bos[i], addr, range, op); + err = vm_bind_ioctl_lookup_vma(vm, bos[i], addr, range, op); if (err) goto free_syncs; } @@ -3257,126 +3289,43 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) u64 tile_mask = bind_ops[i].tile_mask; u32 region = bind_ops[i].region; - vmas[i] = vm_bind_ioctl_lookup_vma(vm, bos[i], obj_offset, - addr, range, op, tile_mask, - region); - if (IS_ERR(vmas[i])) { - err = PTR_ERR(vmas[i]); - vmas[i] = NULL; - goto destroy_vmas; + ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset, + addr, range, op, tile_mask, + region); + if (IS_ERR(ops[i])) { + err = PTR_ERR(ops[i]); + ops[i] = NULL; + goto unwind_ops; } } - for (j = 0; j < args->num_binds; ++j) { - struct xe_sync_entry *__syncs; - u32 __num_syncs = 0; - bool first_or_last = j == 0 || j == args->num_binds - 1; - - if (args->num_binds == 1) { - __num_syncs = num_syncs; - __syncs = syncs; - } else if (first_or_last && num_syncs) { - bool first = j == 0; - - __syncs = kmalloc(sizeof(*__syncs) * num_syncs, - GFP_KERNEL); - if (!__syncs) { - err = ENOMEM; - break; - } - - /* in-syncs on first bind, out-syncs on last bind */ - for (i = 0; i < num_syncs; ++i) { - bool signal = syncs[i].flags & - DRM_XE_SYNC_SIGNAL; - - if ((first && !signal) || (!first && signal)) - __syncs[__num_syncs++] = syncs[i]; - } - } else { - __num_syncs = 0; - __syncs = NULL; - } - - if (async) { - bool last = j == args->num_binds - 1; - - /* - * Each pass of async worker drops the ref, take a ref - * here, 1 set of refs taken above - */ - if (!last) { - if (e) - xe_engine_get(e); - xe_vm_get(vm); - } - - err = vm_bind_ioctl_async(vm, vmas[j], e, bos[j], - bind_ops + j, __syncs, - __num_syncs); - if (err && !last) { - if (e) - xe_engine_put(e); - xe_vm_put(vm); - } - if (err) - break; - } else { - XE_BUG_ON(j != 0); /* Not supported */ - err = vm_bind_ioctl(vm, vmas[j], e, bos[j], - bind_ops + j, __syncs, - __num_syncs, NULL); - break; /* Needed so cleanup loops work */ - } - } + err = vm_bind_ioctl_ops_parse(vm, e, ops, args->num_binds, + syncs, num_syncs, &ops_list, async); + if (err) + goto unwind_ops; - /* Most of cleanup owned by the async bind worker */ - if (async && !err) { - up_write(&vm->lock); - if (args->num_binds > 1) - kfree(syncs); - goto free_objs; - } + err = vm_bind_ioctl_ops_commit(vm, &ops_list, async); + up_write(&vm->lock); -destroy_vmas: - for (i = j; err && i < args->num_binds; ++i) { - u32 op = bind_ops[i].op; - struct xe_vma *vma, *next; + for (i = 0; i < args->num_binds; ++i) + xe_bo_put(bos[i]); - if (!vmas[i]) - break; + kfree(bos); + kfree(ops); + if (args->num_binds > 1) + kfree(bind_ops); - list_for_each_entry_safe(vma, next, &vmas[i]->unbind_link, - unbind_link) { - list_del_init(&vma->unbind_link); - if (!vma->destroyed) { - prep_vma_destroy(vm, vma); - xe_vma_destroy_unlocked(vma); - } - } + return err; - switch (VM_BIND_OP(op)) { - case XE_VM_BIND_OP_MAP: - prep_vma_destroy(vm, vmas[i]); - xe_vma_destroy_unlocked(vmas[i]); - break; - case XE_VM_BIND_OP_MAP_USERPTR: - prep_vma_destroy(vm, vmas[i]); - xe_vma_destroy_unlocked(vmas[i]); - break; - } - } +unwind_ops: + vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds); free_syncs: - while (num_syncs--) { - if (async && j && - !(syncs[num_syncs].flags & DRM_XE_SYNC_SIGNAL)) - continue; /* Still in async worker */ + while (num_syncs--) xe_sync_entry_cleanup(&syncs[num_syncs]); - } kfree(syncs); put_obj: - for (i = j; i < args->num_binds; ++i) + for (i = 0; i < args->num_binds; ++i) xe_bo_put(bos[i]); release_vm_lock: up_write(&vm->lock); @@ -3387,10 +3336,10 @@ put_engine: xe_engine_put(e); free_objs: kfree(bos); - kfree(vmas); + kfree(ops); if (args->num_binds > 1) kfree(bind_ops); - return err; + return err == -ENODATA ? 0 : err; } /* @@ -3415,7 +3364,7 @@ int xe_vm_lock(struct xe_vm *vm, struct ww_acquire_ctx *ww, void xe_vm_unlock(struct xe_vm *vm, struct ww_acquire_ctx *ww) { - dma_resv_unlock(&vm->resv); + dma_resv_unlock(xe_vm_resv(vm)); ww_acquire_fini(ww); } @@ -3448,7 +3397,7 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) WARN_ON_ONCE(!mmu_interval_check_retry (&vma->userptr.notifier, vma->userptr.notifier_seq)); - WARN_ON_ONCE(!dma_resv_test_signaled(&xe_vma_vm(vma)->resv, + WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(xe_vma_vm(vma)), DMA_RESV_USAGE_BOOKKEEP)); } else { @@ -3485,7 +3434,7 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id) { - struct rb_node *node; + struct drm_gpuva *gpuva; bool is_vram; uint64_t addr; @@ -3499,8 +3448,8 @@ int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id) drm_printf(p, " VM root: A:0x%llx %s\n", addr, is_vram ? "VRAM" : "SYS"); } - for (node = rb_first(&vm->vmas); node; node = rb_next(node)) { - struct xe_vma *vma = to_xe_vma(node); + drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { + struct xe_vma *vma = gpuva_to_vma(gpuva); bool is_userptr = xe_vma_is_userptr(vma); bool is_null = xe_vma_is_null(vma); diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 644a8aa604e8..d386e72cb974 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -6,6 +6,7 @@ #ifndef _XE_VM_H_ #define _XE_VM_H_ +#include "xe_bo_types.h" #include "xe_macros.h" #include "xe_map.h" #include "xe_vm_types.h" @@ -22,20 +23,19 @@ struct xe_file; struct xe_sync_entry; struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags); -void xe_vm_free(struct kref *ref); struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id); int xe_vma_cmp_vma_cb(const void *key, const struct rb_node *node); static inline struct xe_vm *xe_vm_get(struct xe_vm *vm) { - kref_get(&vm->refcount); + drm_gpuvm_get(&vm->gpuvm); return vm; } static inline void xe_vm_put(struct xe_vm *vm) { - kref_put(&vm->refcount, xe_vm_free); + drm_gpuvm_put(&vm->gpuvm); } int xe_vm_lock(struct xe_vm *vm, struct ww_acquire_ctx *ww, @@ -61,7 +61,22 @@ static inline bool xe_vm_is_closed_or_banned(struct xe_vm *vm) } struct xe_vma * -xe_vm_find_overlapping_vma(struct xe_vm *vm, struct xe_vma *vma); +xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range); + +static inline struct xe_vm *gpuva_to_vm(struct drm_gpuva *gpuva) +{ + return container_of(gpuva->vm, struct xe_vm, gpuvm); +} + +static inline struct xe_vma *gpuva_to_vma(struct drm_gpuva *gpuva) +{ + return container_of(gpuva, struct xe_vma, gpuva); +} + +static inline struct xe_vma_op *gpuva_op_to_vma_op(struct drm_gpuva_op *op) +{ + return container_of(op, struct xe_vma_op, base); +} /** * DOC: Provide accessors for vma members to facilitate easy change of @@ -69,12 +84,12 @@ xe_vm_find_overlapping_vma(struct xe_vm *vm, struct xe_vma *vma); */ static inline u64 xe_vma_start(struct xe_vma *vma) { - return vma->start; + return vma->gpuva.va.addr; } static inline u64 xe_vma_size(struct xe_vma *vma) { - return vma->end - vma->start + 1; + return vma->gpuva.va.range; } static inline u64 xe_vma_end(struct xe_vma *vma) @@ -84,32 +99,33 @@ static inline u64 xe_vma_end(struct xe_vma *vma) static inline u64 xe_vma_bo_offset(struct xe_vma *vma) { - return vma->bo_offset; + return vma->gpuva.gem.offset; } static inline struct xe_bo *xe_vma_bo(struct xe_vma *vma) { - return vma->bo; + return !vma->gpuva.gem.obj ? NULL : + container_of(vma->gpuva.gem.obj, struct xe_bo, ttm.base); } static inline struct xe_vm *xe_vma_vm(struct xe_vma *vma) { - return vma->vm; + return container_of(vma->gpuva.vm, struct xe_vm, gpuvm); } static inline bool xe_vma_read_only(struct xe_vma *vma) { - return vma->pte_flags & XE_PTE_FLAG_READ_ONLY; + return vma->gpuva.flags & XE_VMA_READ_ONLY; } static inline u64 xe_vma_userptr(struct xe_vma *vma) { - return vma->userptr.ptr; + return vma->gpuva.gem.offset; } static inline bool xe_vma_is_null(struct xe_vma *vma) { - return vma->pte_flags & XE_PTE_FLAG_NULL; + return vma->gpuva.flags & DRM_GPUVA_SPARSE; } static inline bool xe_vma_has_no_bo(struct xe_vma *vma) @@ -122,8 +138,6 @@ static inline bool xe_vma_is_userptr(struct xe_vma *vma) return xe_vma_has_no_bo(vma) && !xe_vma_is_null(vma); } -#define xe_vm_assert_held(vm) dma_resv_assert_held(&(vm)->resv) - u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile); int xe_vm_create_ioctl(struct drm_device *dev, void *data, @@ -218,6 +232,23 @@ void xe_vm_fence_all_extobjs(struct xe_vm *vm, struct dma_fence *fence, int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id); +/** + * xe_vm_resv() - Return's the vm's reservation object + * @vm: The vm + * + * Return: Pointer to the vm's reservation object. + */ +static inline struct dma_resv *xe_vm_resv(struct xe_vm *vm) +{ + return drm_gpuvm_resv(&vm->gpuvm); +} + +/** + * xe_vm_assert_held(vm) - Assert that the vm's reservation object is held. + * @vm: The vm + */ +#define xe_vm_assert_held(vm) dma_resv_assert_held(xe_vm_resv(vm)) + #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) #define vm_dbg drm_dbg #else diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c index 0e8d0c513ee9..32f92743d851 100644 --- a/drivers/gpu/drm/xe/xe_vm_madvise.c +++ b/drivers/gpu/drm/xe/xe_vm_madvise.c @@ -210,19 +210,12 @@ static const madvise_func madvise_funcs[] = { [DRM_XE_VM_MADVISE_PIN] = madvise_pin, }; -static struct xe_vma *node_to_vma(const struct rb_node *node) -{ - BUILD_BUG_ON(offsetof(struct xe_vma, vm_node) != 0); - return (struct xe_vma *)node; -} - static struct xe_vma ** get_vmas(struct xe_vm *vm, int *num_vmas, u64 addr, u64 range) { - struct xe_vma **vmas; - struct xe_vma *vma, *__vma, lookup; + struct xe_vma **vmas, **__vmas; + struct drm_gpuva *gpuva; int max_vmas = 8; - struct rb_node *node; lockdep_assert_held(&vm->lock); @@ -230,62 +223,23 @@ get_vmas(struct xe_vm *vm, int *num_vmas, u64 addr, u64 range) if (!vmas) return NULL; - lookup.start = addr; - lookup.end = addr + range - 1; + drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, addr, addr + range) { + struct xe_vma *vma = gpuva_to_vma(gpuva); - vma = xe_vm_find_overlapping_vma(vm, &lookup); - if (!vma) - return vmas; + if (xe_vma_is_userptr(vma)) + continue; - if (!xe_vma_is_userptr(vma)) { - vmas[*num_vmas] = vma; - *num_vmas += 1; - } - - node = &vma->vm_node; - while ((node = rb_next(node))) { - if (!xe_vma_cmp_vma_cb(&lookup, node)) { - __vma = node_to_vma(node); - if (xe_vma_is_userptr(__vma)) - continue; - - if (*num_vmas == max_vmas) { - struct xe_vma **__vmas = - krealloc(vmas, max_vmas * sizeof(*vmas), - GFP_KERNEL); - - if (!__vmas) - return NULL; - vmas = __vmas; - } - vmas[*num_vmas] = __vma; - *num_vmas += 1; - } else { - break; + if (*num_vmas == max_vmas) { + max_vmas <<= 1; + __vmas = krealloc(vmas, max_vmas * sizeof(*vmas), + GFP_KERNEL); + if (!__vmas) + return NULL; + vmas = __vmas; } - } - node = &vma->vm_node; - while ((node = rb_prev(node))) { - if (!xe_vma_cmp_vma_cb(&lookup, node)) { - __vma = node_to_vma(node); - if (xe_vma_is_userptr(__vma)) - continue; - - if (*num_vmas == max_vmas) { - struct xe_vma **__vmas = - krealloc(vmas, max_vmas * sizeof(*vmas), - GFP_KERNEL); - - if (!__vmas) - return NULL; - vmas = __vmas; - } - vmas[*num_vmas] = __vma; - *num_vmas += 1; - } else { - break; - } + vmas[*num_vmas] = vma; + *num_vmas += 1; } return vmas; diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 3c885211a8d1..8aca079006ba 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -6,6 +6,8 @@ #ifndef _XE_VM_TYPES_H_ #define _XE_VM_TYPES_H_ +#include + #include #include #include @@ -14,30 +16,23 @@ #include "xe_device_types.h" #include "xe_pt_types.h" +struct async_op_fence; struct xe_bo; +struct xe_sync_entry; struct xe_vm; -struct xe_vma { - struct rb_node vm_node; - /** @vm: VM which this VMA belongs to */ - struct xe_vm *vm; +#define TEST_VM_ASYNC_OPS_ERROR +#define FORCE_ASYNC_OP_ERROR BIT(31) - /** - * @start: start address of this VMA within its address domain, end - - * start + 1 == VMA size - */ - u64 start; - /** @end: end address of this VMA within its address domain */ - u64 end; - /** @pte_flags: pte flags for this VMA */ -#define XE_PTE_FLAG_READ_ONLY BIT(0) -#define XE_PTE_FLAG_NULL BIT(1) - u32 pte_flags; - - /** @bo: BO if not a userptr, must be NULL is userptr */ - struct xe_bo *bo; - /** @bo_offset: offset into BO if not a userptr, unused for userptr */ - u64 bo_offset; +#define XE_VMA_READ_ONLY DRM_GPUVA_USERBITS +#define XE_VMA_DESTROYED (DRM_GPUVA_USERBITS << 1) +#define XE_VMA_ATOMIC_PTE_BIT (DRM_GPUVA_USERBITS << 2) +#define XE_VMA_FIRST_REBIND (DRM_GPUVA_USERBITS << 3) +#define XE_VMA_LAST_REBIND (DRM_GPUVA_USERBITS << 4) + +struct xe_vma { + /** @gpuva: Base GPUVA object */ + struct drm_gpuva gpuva; /** @tile_mask: Tile mask of where to create binding for this VMA */ u64 tile_mask; @@ -51,40 +46,8 @@ struct xe_vma { */ u64 tile_present; - /** - * @destroyed: VMA is destroyed, in the sense that it shouldn't be - * subject to rebind anymore. This field must be written under - * the vm lock in write mode and the userptr.notifier_lock in - * either mode. Read under the vm lock or the userptr.notifier_lock in - * write mode. - */ - bool destroyed; - - /** - * @first_munmap_rebind: VMA is first in a sequence of ops that triggers - * a rebind (munmap style VM unbinds). This indicates the operation - * using this VMA must wait on all dma-resv slots (wait for pending jobs - * / trigger preempt fences). - */ - bool first_munmap_rebind; - - /** - * @last_munmap_rebind: VMA is first in a sequence of ops that triggers - * a rebind (munmap style VM unbinds). This indicates the operation - * using this VMA must install itself into kernel dma-resv slot (blocks - * future jobs) and kick the rebind work in compute mode. - */ - bool last_munmap_rebind; - - /** @use_atomic_access_pte_bit: Set atomic access bit in PTE */ - bool use_atomic_access_pte_bit; - - union { - /** @bo_link: link into BO if not a userptr */ - struct list_head bo_link; - /** @userptr_link: link into VM repin list if userptr */ - struct list_head userptr_link; - }; + /** @userptr_link: link into VM repin list if userptr */ + struct list_head userptr_link; /** * @rebind_link: link into VM if this VMA needs rebinding, and @@ -107,8 +70,6 @@ struct xe_vma { /** @userptr: user pointer state */ struct { - /** @ptr: user pointer */ - uintptr_t ptr; /** @invalidate_link: Link for the vm::userptr.invalidated list */ struct list_head invalidate_link; /** @@ -153,24 +114,19 @@ struct xe_vma { struct xe_device; -#define xe_vm_assert_held(vm) dma_resv_assert_held(&(vm)->resv) - struct xe_vm { - struct xe_device *xe; + /** @gpuvm: base GPUVM used to track VMAs */ + struct drm_gpuvm gpuvm; - struct kref refcount; + struct xe_device *xe; /* engine used for (un)binding vma's */ struct xe_engine *eng[XE_MAX_TILES_PER_DEVICE]; - /** Protects @rebind_list and the page-table structures */ - struct dma_resv resv; - /** @lru_bulk_move: Bulk LRU move list for this VM's BOs */ struct ttm_lru_bulk_move lru_bulk_move; u64 size; - struct rb_root vmas; struct xe_pt *pt_root[XE_MAX_TILES_PER_DEVICE]; struct xe_bo *scratch_bo[XE_MAX_TILES_PER_DEVICE]; @@ -351,4 +307,99 @@ struct xe_vm { bool batch_invalidate_tlb; }; +/** struct xe_vma_op_map - VMA map operation */ +struct xe_vma_op_map { + /** @vma: VMA to map */ + struct xe_vma *vma; + /** @immediate: Immediate bind */ + bool immediate; + /** @read_only: Read only */ + bool read_only; + /** @is_null: is NULL binding */ + bool is_null; +}; + +/** struct xe_vma_op_unmap - VMA unmap operation */ +struct xe_vma_op_unmap { + /** @start: start of the VMA unmap */ + u64 start; + /** @range: range of the VMA unmap */ + u64 range; +}; + +/** struct xe_vma_op_remap - VMA remap operation */ +struct xe_vma_op_remap { + /** @prev: VMA preceding part of a split mapping */ + struct xe_vma *prev; + /** @next: VMA subsequent part of a split mapping */ + struct xe_vma *next; + /** @start: start of the VMA unmap */ + u64 start; + /** @range: range of the VMA unmap */ + u64 range; + /** @unmap_done: unmap operation in done */ + bool unmap_done; +}; + +/** struct xe_vma_op_prefetch - VMA prefetch operation */ +struct xe_vma_op_prefetch { + /** @region: memory region to prefetch to */ + u32 region; +}; + +/** enum xe_vma_op_flags - flags for VMA operation */ +enum xe_vma_op_flags { + /** @XE_VMA_OP_FIRST: first VMA operation for a set of syncs */ + XE_VMA_OP_FIRST = (0x1 << 0), + /** @XE_VMA_OP_LAST: last VMA operation for a set of syncs */ + XE_VMA_OP_LAST = (0x1 << 1), + /** @XE_VMA_OP_COMMITTED: VMA operation committed */ + XE_VMA_OP_COMMITTED = (0x1 << 2), +}; + +/** struct xe_vma_op - VMA operation */ +struct xe_vma_op { + /** @base: GPUVA base operation */ + struct drm_gpuva_op base; + /** + * @ops: GPUVA ops, when set call drm_gpuva_ops_free after this + * operations is processed + */ + struct drm_gpuva_ops *ops; + /** @engine: engine for this operation */ + struct xe_engine *engine; + /** + * @syncs: syncs for this operation, only used on first and last + * operation + */ + struct xe_sync_entry *syncs; + /** @num_syncs: number of syncs */ + u32 num_syncs; + /** @link: async operation link */ + struct list_head link; + /** + * @fence: async operation fence, signaled on last operation complete + */ + struct async_op_fence *fence; + /** @tile_mask: gt mask for this operation */ + u64 tile_mask; + /** @flags: operation flags */ + enum xe_vma_op_flags flags; + +#ifdef TEST_VM_ASYNC_OPS_ERROR + /** @inject_error: inject error to test async op error handling */ + bool inject_error; +#endif + + union { + /** @map: VMA map operation specific data */ + struct xe_vma_op_map map; + /** @unmap: VMA unmap operation specific data */ + struct xe_vma_op_unmap unmap; + /** @remap: VMA remap operation specific data */ + struct xe_vma_op_remap remap; + /** @prefetch: VMA prefetch operation specific data */ + struct xe_vma_op_prefetch prefetch; + }; +}; #endif -- cgit v1.2.3 From 38fa29dc2b73b54299e973d292ec7fd507d3b8c0 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 10 Jul 2023 10:40:39 +0100 Subject: drm/xe/tlb: drop unnecessary smp_wmb() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit wake_up_all() and wait_event_timeout() already have the correct barriers as per https://www.kernel.org/doc/Documentation/memory-barriers.txt. This should ensure that the seqno_recv write can't be re-ordered wrt to the actual wake_up_all() i.e we get woken up but there is no write. The reader side with wait_event_timeout() also has the correct barriers. With that drop the hand rolled smp_wmb(), which is anyway missing some kind of matching barrier on the reader side. Signed-off-by: Matthew Auld Cc: Matthew Brost Cc: José Roberto de Souza Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index f77368a16409..48eb05f763e9 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -333,8 +333,11 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len) expected_seqno, msg[0]); } + /* + * wake_up_all() and wait_event_timeout() already have the correct + * barriers. + */ gt->tlb_invalidation.seqno_recv = msg[0]; - smp_wmb(); wake_up_all(&guc->ct.wq); fence = list_first_entry_or_null(>->tlb_invalidation.pending_fences, -- cgit v1.2.3 From 86ed09250e068faa840dadcd175d3cd8d174f998 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 10 Jul 2023 10:40:40 +0100 Subject: drm/xe/tlb: ensure we access seqno_recv once MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ensure we load gt->tlb_invalidation.seqno_recv once, and use that for our seqno checking. The gt->tlb_invalidation_seqno_past is a shared global variable and can potentially change at any point here. However the checks here need to operate on a stable version of seqno_recv for this to make any sense. Signed-off-by: Matthew Auld Cc: Matthew Brost Cc: José Roberto de Souza Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 48eb05f763e9..2b60251ea1c0 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -257,15 +257,15 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, static bool tlb_invalidation_seqno_past(struct xe_gt *gt, int seqno) { - if (seqno - gt->tlb_invalidation.seqno_recv < - -(TLB_INVALIDATION_SEQNO_MAX / 2)) + int seqno_recv = READ_ONCE(gt->tlb_invalidation.seqno_recv); + + if (seqno - seqno_recv < -(TLB_INVALIDATION_SEQNO_MAX / 2)) return false; - if (seqno - gt->tlb_invalidation.seqno_recv > - (TLB_INVALIDATION_SEQNO_MAX / 2)) + if (seqno - seqno_recv > (TLB_INVALIDATION_SEQNO_MAX / 2)) return true; - return gt->tlb_invalidation.seqno_recv >= seqno; + return seqno_recv >= seqno; } /** @@ -337,7 +337,7 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len) * wake_up_all() and wait_event_timeout() already have the correct * barriers. */ - gt->tlb_invalidation.seqno_recv = msg[0]; + WRITE_ONCE(gt->tlb_invalidation.seqno_recv, msg[0]); wake_up_all(&guc->ct.wq); fence = list_first_entry_or_null(>->tlb_invalidation.pending_fences, -- cgit v1.2.3 From c4bbc32e09ab9f74c725a8719df2b509c8ad8780 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 10 Jul 2023 10:40:41 +0100 Subject: drm/xe: hold mem_access.ref for CT fast-path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Just checking xe_device_mem_access_ongoing() is not enough, we also need to hold the reference otherwise the ref can transition from 1 -> 0 as we enter g2h_read(), leading to warnings. While we can't do a full rpm sync in the IRQ, we can keep the device awake if the ref is non-zero. Introduce a new helper for this and set it to work in for the CT fast-path. Signed-off-by: Matthew Auld Cc: Matthew Brost Cc: José Roberto de Souza Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 5 +++++ drivers/gpu/drm/xe/xe_device.h | 1 + drivers/gpu/drm/xe/xe_guc_ct.c | 5 ++++- 3 files changed, 10 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 6249eef752c5..bd2e10952989 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -364,6 +364,11 @@ u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size) DIV_ROUND_UP(size, NUM_BYTES_PER_CCS_BYTE) : 0; } +bool xe_device_mem_access_get_if_ongoing(struct xe_device *xe) +{ + return atomic_inc_not_zero(&xe->mem_access.ref); +} + void xe_device_mem_access_get(struct xe_device *xe) { bool resumed = xe_pm_runtime_resume_if_suspended(xe); diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index 779f71d066e6..8e01bbadb149 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -138,6 +138,7 @@ static inline struct xe_force_wake * gt_to_fw(struct xe_gt *gt) } void xe_device_mem_access_get(struct xe_device *xe); +bool xe_device_mem_access_get_if_ongoing(struct xe_device *xe); void xe_device_mem_access_put(struct xe_device *xe); static inline bool xe_device_mem_access_ongoing(struct xe_device *xe) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index e71d069158dc..dcce6ed34370 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -1044,7 +1044,8 @@ void xe_guc_ct_fast_path(struct xe_guc_ct *ct) struct xe_device *xe = ct_to_xe(ct); int len; - if (!xe_device_in_fault_mode(xe) || !xe_device_mem_access_ongoing(xe)) + if (!xe_device_in_fault_mode(xe) || + !xe_device_mem_access_get_if_ongoing(xe)) return; spin_lock(&ct->fast_lock); @@ -1054,6 +1055,8 @@ void xe_guc_ct_fast_path(struct xe_guc_ct *ct) g2h_fast_path(ct, ct->fast_msg, len); } while (len > 0); spin_unlock(&ct->fast_lock); + + xe_device_mem_access_put(xe); } /* Returns less than zero on error, 0 on done, 1 on more available */ -- cgit v1.2.3 From dad33831d8d137ee28b21c3c2296463a01aa5b78 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 10 Jul 2023 10:40:42 +0100 Subject: drm/xe/ct: hold fast_lock when reserving space for g2h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reserving and checking for space on the g2h side relies on the fast_lock, and not the CT lock since we need to release space from the fast CT path. Make sure we hold it when checking for space and reserving it. The main concern is calling __g2h_release_space() as we are reserving something and since the info.space and info.g2h_outstanding operations are not atomic we can get some nonsense values back. Signed-off-by: Matthew Auld Cc: Matthew Brost Cc: José Roberto de Souza Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_ct.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index dcce6ed34370..ba89db1dcfdb 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -346,7 +346,10 @@ static bool h2g_has_room(struct xe_guc_ct *ct, u32 cmd_len) static bool g2h_has_room(struct xe_guc_ct *ct, u32 g2h_len) { - lockdep_assert_held(&ct->lock); + if (!g2h_len) + return true; + + lockdep_assert_held(&ct->fast_lock); return ct->ctbs.g2h.info.space > g2h_len; } @@ -367,15 +370,15 @@ static void h2g_reserve_space(struct xe_guc_ct *ct, u32 cmd_len) ct->ctbs.h2g.info.space -= cmd_len; } -static void g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h) +static void __g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h) { XE_BUG_ON(g2h_len > ct->ctbs.g2h.info.space); if (g2h_len) { - spin_lock_irq(&ct->fast_lock); + lockdep_assert_held(&ct->fast_lock); + ct->ctbs.g2h.info.space -= g2h_len; ct->g2h_outstanding += num_g2h; - spin_unlock_irq(&ct->fast_lock); } } @@ -505,21 +508,26 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, } } + if (g2h_len) + spin_lock_irq(&ct->fast_lock); retry: ret = has_room(ct, len + GUC_CTB_HDR_LEN, g2h_len); if (unlikely(ret)) - goto out; + goto out_unlock; ret = h2g_write(ct, action, len, g2h_fence ? g2h_fence->seqno : 0, !!g2h_fence); if (unlikely(ret)) { if (ret == -EAGAIN) goto retry; - goto out; + goto out_unlock; } - g2h_reserve_space(ct, g2h_len, num_g2h); + __g2h_reserve_space(ct, g2h_len, num_g2h); xe_guc_notify(ct_to_guc(ct)); +out_unlock: + if (g2h_len) + spin_unlock_irq(&ct->fast_lock); out: return ret; } -- cgit v1.2.3 From 4803f6e26f1678b8b5af2924199bc137e7ec5fad Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 10 Jul 2023 10:40:43 +0100 Subject: drm/xe/tlb: increment next seqno after successful CT send MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we are in the middle of a GT reset or similar the CT might be disabled, such that the CT send fails. However we already incremented gt->tlb_invalidation.seqno which might lead to warnings, since we effectively just skipped a seqno: 0000:00:02.0: drm_WARN_ON(expected_seqno != msg[0]) Signed-off-by: Matthew Auld Cc: Matthew Brost Cc: José Roberto de Souza Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 2b60251ea1c0..b4d024bf10be 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -124,10 +124,6 @@ static int send_tlb_invalidation(struct xe_guc *guc, trace_xe_gt_tlb_invalidation_fence_send(fence); } action[1] = seqno; - gt->tlb_invalidation.seqno = (gt->tlb_invalidation.seqno + 1) % - TLB_INVALIDATION_SEQNO_MAX; - if (!gt->tlb_invalidation.seqno) - gt->tlb_invalidation.seqno = 1; ret = xe_guc_ct_send_locked(&guc->ct, action, len, G2H_LEN_DW_TLB_INVALIDATE, 1); if (!ret && fence) { @@ -137,8 +133,13 @@ static int send_tlb_invalidation(struct xe_guc *guc, >->tlb_invalidation.fence_tdr, TLB_TIMEOUT); } - if (!ret) + if (!ret) { + gt->tlb_invalidation.seqno = (gt->tlb_invalidation.seqno + 1) % + TLB_INVALIDATION_SEQNO_MAX; + if (!gt->tlb_invalidation.seqno) + gt->tlb_invalidation.seqno = 1; ret = seqno; + } if (ret < 0 && fence) invalidation_fence_signal(fence); mutex_unlock(&guc->ct.lock); -- cgit v1.2.3 From a4d362bbed8c86a632b5e22bf64d9c5564e3766e Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 10 Jul 2023 10:40:44 +0100 Subject: drm/xe/ct: serialise fast_lock during CT disable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The fast-path CT could be running as we enter a runtime-suspend or potentially a GT reset, however here we only use the ct->fast_lock and not the full ct->lock. Before disabling the CT, also serialise against the fast_lock to ensure any in-progress work finishes before we start nuking the CT related stuff. Once we disable ct->enabled and drop the lock, any new work should fail gracefully, and anything that was in progress should be finished. Signed-off-by: Matthew Auld Cc: Matthew Brost Cc: José Roberto de Souza Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_ct.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index ba89db1dcfdb..acf488b00b66 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -301,8 +301,10 @@ int xe_guc_ct_enable(struct xe_guc_ct *ct) goto err_out; mutex_lock(&ct->lock); + spin_lock_irq(&ct->fast_lock); ct->g2h_outstanding = 0; ct->enabled = true; + spin_unlock_irq(&ct->fast_lock); mutex_unlock(&ct->lock); smp_mb(); @@ -319,8 +321,10 @@ err_out: void xe_guc_ct_disable(struct xe_guc_ct *ct) { - mutex_lock(&ct->lock); - ct->enabled = false; + mutex_lock(&ct->lock); /* Serialise dequeue_one_g2h() */ + spin_lock_irq(&ct->fast_lock); /* Serialise CT fast-path */ + ct->enabled = false; /* Finally disable CT communication */ + spin_unlock_irq(&ct->fast_lock); mutex_unlock(&ct->lock); xa_destroy(&ct->fence_lookup); -- cgit v1.2.3 From 7b24cc3e309f31ad77b2ed136ce7606e0b3f67bb Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 10 Jul 2023 10:40:45 +0100 Subject: drm/xe/gt: tweak placement for signalling TLB fences after GT reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Assumption here is that submission is disabled along with CT, and full GT reset will also nuke TLBs, so should be safe to signal all in-flight TLB fences, but only after the actual reset so move the placement slightly. Signed-off-by: Matthew Auld Cc: Matthew Brost Cc: José Roberto de Souza Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index bc76678a8276..a21d44bfe9e8 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -519,7 +519,6 @@ static int gt_reset(struct xe_gt *gt) xe_uc_stop_prepare(>->uc); xe_gt_pagefault_reset(gt); - xe_gt_tlb_invalidation_reset(gt); err = xe_uc_stop(>->uc); if (err) @@ -529,6 +528,8 @@ static int gt_reset(struct xe_gt *gt) if (err) goto err_out; + xe_gt_tlb_invalidation_reset(gt); + err = do_gt_restart(gt); if (err) goto err_out; -- cgit v1.2.3 From 2ca01fe31b68bab12ccccef91196ea21cd93e065 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 10 Jul 2023 10:40:46 +0100 Subject: drm/xe/tlb: also update seqno_recv during reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We might have various kworkers waiting for TLB flushes to complete which are not tracked with an explicit TLB fence, however at this stage that will never happen since the CT is already disabled, so make sure we signal them here under the assumption that we have completed a full GT reset. v2: - We need to use seqno - 1 here. After acquiring ct->lock the seqno is actually the next users seqno and not the pending one. Signed-off-by: Matthew Auld Cc: Matthew Brost Cc: José Roberto de Souza Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index b4d024bf10be..bf23d97d2fef 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -86,13 +86,33 @@ invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence) * * Signal any pending invalidation fences, should be called during a GT reset */ - void xe_gt_tlb_invalidation_reset(struct xe_gt *gt) +void xe_gt_tlb_invalidation_reset(struct xe_gt *gt) { struct xe_gt_tlb_invalidation_fence *fence, *next; + struct xe_guc *guc = >->uc.guc; + int pending_seqno; - cancel_delayed_work(>->tlb_invalidation.fence_tdr); + /* + * CT channel is already disabled at this point. No new TLB requests can + * appear. + */ mutex_lock(>->uc.guc.ct.lock); + cancel_delayed_work(>->tlb_invalidation.fence_tdr); + /* + * We might have various kworkers waiting for TLB flushes to complete + * which are not tracked with an explicit TLB fence, however at this + * stage that will never happen since the CT is already disabled, so + * make sure we signal them here under the assumption that we have + * completed a full GT reset. + */ + if (gt->tlb_invalidation.seqno == 1) + pending_seqno = TLB_INVALIDATION_SEQNO_MAX - 1; + else + pending_seqno = gt->tlb_invalidation.seqno - 1; + WRITE_ONCE(gt->tlb_invalidation.seqno_recv, pending_seqno); + wake_up_all(&guc->ct.wq); + list_for_each_entry_safe(fence, next, >->tlb_invalidation.pending_fences, link) invalidation_fence_signal(fence); -- cgit v1.2.3 From 4aa5e3594f649d1bc202db302a8d5030d03c02fb Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 10 Jul 2023 10:40:47 +0100 Subject: drm/xe/tlb: print seqno_recv on fence TLB timeout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To help debugging, sample the current seqno_recv and dump it out if we encounter a TLB timeout for the fences path. Signed-off-by: Matthew Auld Cc: Matthew Brost Cc: José Roberto de Souza Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index bf23d97d2fef..29819dc820c5 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -35,8 +35,8 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work) break; trace_xe_gt_tlb_invalidation_fence_timeout(fence); - drm_err(>_to_xe(gt)->drm, "gt%d: TLB invalidation fence timeout, seqno=%d", - gt->info.id, fence->seqno); + drm_err(>_to_xe(gt)->drm, "gt%d: TLB invalidation fence timeout, seqno=%d recv=%d", + gt->info.id, fence->seqno, gt->tlb_invalidation.seqno_recv); list_del(&fence->link); fence->base.error = -ETIME; -- cgit v1.2.3 From 0b688f9b2880c655a8b161ec46932a6fe8da9ea9 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 10 Jul 2023 10:40:48 +0100 Subject: drm/xe/ct: update g2h outstanding for CTB capture MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Looks to always to be zero when inspecting the CTB dump. Signed-off-by: Matthew Auld Cc: José Roberto de Souza Cc: Rodrigo Vivi Cc: Matthew Brost Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_ct.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index acf488b00b66..0b086d17c083 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -1219,6 +1219,7 @@ struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct, if (ct->enabled) { snapshot->ct_enabled = true; + snapshot->g2h_outstanding = READ_ONCE(ct->g2h_outstanding); guc_ctb_snapshot_capture(xe, &ct->ctbs.h2g, &snapshot->h2g, atomic); guc_ctb_snapshot_capture(xe, &ct->ctbs.g2h, -- cgit v1.2.3 From 35c8a964398e1c57968cc94cd6f4e3a64c796357 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 10 Jul 2023 10:40:49 +0100 Subject: drm/xe: handle TLB invalidations from CT fast-path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In various test cases that put the system under a heavy load, we can sometimes see errors with missed TLB invalidations. In such cases we see the interrupt arrive for the invalidation from the GuC, however the actual processing of the completion is pushed onto a workqueue and handled with all the other CT stuff, which might take longer than expected. Since we expect TLB invalidations to complete within a reasonable amount of time (at most ~250ms), and they do seem pretty critical, allow handling directly from the CT fast-path. v2 (José): - Actually use the correct spinlock/unlock_irq, since pending_lock is grabbed from IRQ. v3: - Don't publish the TLB fence on the list until after we fully initialize it and successfully do the CT send. The list is now only protected by the spin_lock pending_lock and we can't hold that across the entire TLB send operation. v4 (Matt Brost): - Be careful with racing against fast CT path writing the seqno, before we have actually published the fence. References: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/297 References: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/320 References: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/449 Signed-off-by: Matthew Auld Cc: Matthew Brost Cc: José Roberto de Souza Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 134 +++++++++++++++++----------- drivers/gpu/drm/xe/xe_gt_types.h | 5 ++ drivers/gpu/drm/xe/xe_guc_ct.c | 12 +-- 3 files changed, 91 insertions(+), 60 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 29819dc820c5..e2b85559257c 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -25,7 +25,7 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work) tlb_invalidation.fence_tdr.work); struct xe_gt_tlb_invalidation_fence *fence, *next; - mutex_lock(>->uc.guc.ct.lock); + spin_lock_irq(>->tlb_invalidation.pending_lock); list_for_each_entry_safe(fence, next, >->tlb_invalidation.pending_fences, link) { s64 since_inval_ms = ktime_ms_delta(ktime_get(), @@ -47,7 +47,7 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work) queue_delayed_work(system_wq, >->tlb_invalidation.fence_tdr, TLB_TIMEOUT); - mutex_unlock(>->uc.guc.ct.lock); + spin_unlock_irq(>->tlb_invalidation.pending_lock); } /** @@ -63,6 +63,7 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt) { gt->tlb_invalidation.seqno = 1; INIT_LIST_HEAD(>->tlb_invalidation.pending_fences); + spin_lock_init(>->tlb_invalidation.pending_lock); spin_lock_init(>->tlb_invalidation.lock); gt->tlb_invalidation.fence_context = dma_fence_context_alloc(1); INIT_DELAYED_WORK(>->tlb_invalidation.fence_tdr, @@ -72,14 +73,20 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt) } static void -invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence) +__invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence) { trace_xe_gt_tlb_invalidation_fence_signal(fence); - list_del(&fence->link); dma_fence_signal(&fence->base); dma_fence_put(&fence->base); } +static void +invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence) +{ + list_del(&fence->link); + __invalidation_fence_signal(fence); +} + /** * xe_gt_tlb_invalidation_reset - Initialize GT TLB invalidation reset * @gt: graphics tile @@ -98,6 +105,7 @@ void xe_gt_tlb_invalidation_reset(struct xe_gt *gt) */ mutex_lock(>->uc.guc.ct.lock); + spin_lock_irq(>->tlb_invalidation.pending_lock); cancel_delayed_work(>->tlb_invalidation.fence_tdr); /* * We might have various kworkers waiting for TLB flushes to complete @@ -116,9 +124,23 @@ void xe_gt_tlb_invalidation_reset(struct xe_gt *gt) list_for_each_entry_safe(fence, next, >->tlb_invalidation.pending_fences, link) invalidation_fence_signal(fence); + spin_unlock_irq(>->tlb_invalidation.pending_lock); mutex_unlock(>->uc.guc.ct.lock); } +static bool tlb_invalidation_seqno_past(struct xe_gt *gt, int seqno) +{ + int seqno_recv = READ_ONCE(gt->tlb_invalidation.seqno_recv); + + if (seqno - seqno_recv < -(TLB_INVALIDATION_SEQNO_MAX / 2)) + return false; + + if (seqno - seqno_recv > (TLB_INVALIDATION_SEQNO_MAX / 2)) + return true; + + return seqno_recv >= seqno; +} + static int send_tlb_invalidation(struct xe_guc *guc, struct xe_gt_tlb_invalidation_fence *fence, u32 *action, int len) @@ -126,7 +148,6 @@ static int send_tlb_invalidation(struct xe_guc *guc, struct xe_gt *gt = guc_to_gt(guc); int seqno; int ret; - bool queue_work; /* * XXX: The seqno algorithm relies on TLB invalidation being processed @@ -137,21 +158,35 @@ static int send_tlb_invalidation(struct xe_guc *guc, mutex_lock(&guc->ct.lock); seqno = gt->tlb_invalidation.seqno; if (fence) { - queue_work = list_empty(>->tlb_invalidation.pending_fences); fence->seqno = seqno; - list_add_tail(&fence->link, - >->tlb_invalidation.pending_fences); trace_xe_gt_tlb_invalidation_fence_send(fence); } action[1] = seqno; ret = xe_guc_ct_send_locked(&guc->ct, action, len, G2H_LEN_DW_TLB_INVALIDATE, 1); if (!ret && fence) { - fence->invalidation_time = ktime_get(); - if (queue_work) - queue_delayed_work(system_wq, - >->tlb_invalidation.fence_tdr, - TLB_TIMEOUT); + spin_lock_irq(>->tlb_invalidation.pending_lock); + /* + * We haven't actually published the TLB fence as per + * pending_fences, but in theory our seqno could have already + * been written as we acquired the pending_lock. In such a case + * we can just go ahead and signal the fence here. + */ + if (tlb_invalidation_seqno_past(gt, seqno)) { + __invalidation_fence_signal(fence); + } else { + fence->invalidation_time = ktime_get(); + list_add_tail(&fence->link, + >->tlb_invalidation.pending_fences); + + if (list_is_singular(>->tlb_invalidation.pending_fences)) + queue_delayed_work(system_wq, + >->tlb_invalidation.fence_tdr, + TLB_TIMEOUT); + } + spin_unlock_irq(>->tlb_invalidation.pending_lock); + } else if (ret < 0 && fence) { + __invalidation_fence_signal(fence); } if (!ret) { gt->tlb_invalidation.seqno = (gt->tlb_invalidation.seqno + 1) % @@ -160,8 +195,6 @@ static int send_tlb_invalidation(struct xe_guc *guc, gt->tlb_invalidation.seqno = 1; ret = seqno; } - if (ret < 0 && fence) - invalidation_fence_signal(fence); mutex_unlock(&guc->ct.lock); return ret; @@ -276,19 +309,6 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, return ret; } -static bool tlb_invalidation_seqno_past(struct xe_gt *gt, int seqno) -{ - int seqno_recv = READ_ONCE(gt->tlb_invalidation.seqno_recv); - - if (seqno - seqno_recv < -(TLB_INVALIDATION_SEQNO_MAX / 2)) - return false; - - if (seqno - seqno_recv > (TLB_INVALIDATION_SEQNO_MAX / 2)) - return true; - - return seqno_recv >= seqno; -} - /** * xe_gt_tlb_invalidation_wait - Wait for TLB to complete * @gt: graphics tile @@ -336,22 +356,31 @@ int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno) int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len) { struct xe_gt *gt = guc_to_gt(guc); - struct xe_gt_tlb_invalidation_fence *fence; - int expected_seqno; - - lockdep_assert_held(&guc->ct.lock); + struct xe_gt_tlb_invalidation_fence *fence, *next; + unsigned long flags; if (unlikely(len != 1)) return -EPROTO; - /* Sanity check on seqno */ - expected_seqno = (gt->tlb_invalidation.seqno_recv + 1) % - TLB_INVALIDATION_SEQNO_MAX; - if (!expected_seqno) - expected_seqno = 1; - if (drm_WARN_ON(>_to_xe(gt)->drm, expected_seqno != msg[0])) { - drm_err(>_to_xe(gt)->drm, "TLB expected_seqno(%d) != msg(%u)\n", - expected_seqno, msg[0]); + /* + * This can also be run both directly from the IRQ handler and also in + * process_g2h_msg(). Only one may process any individual CT message, + * however the order they are processed here could result in skipping a + * seqno. To handle that we just process all the seqnos from the last + * seqno_recv up to and including the one in msg[0]. The delta should be + * very small so there shouldn't be much of pending_fences we actually + * need to iterate over here. + * + * From GuC POV we expect the seqnos to always appear in-order, so if we + * see something later in the timeline we can be sure that anything + * appearing earlier has already signalled, just that we have yet to + * officially process the CT message like if racing against + * process_g2h_msg(). + */ + spin_lock_irqsave(>->tlb_invalidation.pending_lock, flags); + if (tlb_invalidation_seqno_past(gt, msg[0])) { + spin_unlock_irqrestore(>->tlb_invalidation.pending_lock, flags); + return 0; } /* @@ -361,19 +390,24 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len) WRITE_ONCE(gt->tlb_invalidation.seqno_recv, msg[0]); wake_up_all(&guc->ct.wq); - fence = list_first_entry_or_null(>->tlb_invalidation.pending_fences, - typeof(*fence), link); - if (fence) + list_for_each_entry_safe(fence, next, + >->tlb_invalidation.pending_fences, link) { trace_xe_gt_tlb_invalidation_fence_recv(fence); - if (fence && tlb_invalidation_seqno_past(gt, fence->seqno)) { + + if (!tlb_invalidation_seqno_past(gt, fence->seqno)) + break; + invalidation_fence_signal(fence); - if (!list_empty(>->tlb_invalidation.pending_fences)) - mod_delayed_work(system_wq, - >->tlb_invalidation.fence_tdr, - TLB_TIMEOUT); - else - cancel_delayed_work(>->tlb_invalidation.fence_tdr); } + if (!list_empty(>->tlb_invalidation.pending_fences)) + mod_delayed_work(system_wq, + >->tlb_invalidation.fence_tdr, + TLB_TIMEOUT); + else + cancel_delayed_work(>->tlb_invalidation.fence_tdr); + + spin_unlock_irqrestore(>->tlb_invalidation.pending_lock, flags); + return 0; } diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 7d4de019f9a5..28b8e8a86fc9 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -163,6 +163,11 @@ struct xe_gt { * invaliations, protected by CT lock */ struct list_head pending_fences; + /** + * @pending_lock: protects @pending_fences and updating + * @seqno_recv. + */ + spinlock_t pending_lock; /** * @fence_tdr: schedules a delayed call to * xe_gt_tlb_fence_timeout after the timeut interval is over. diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 0b086d17c083..9fb5fd4391d2 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -994,15 +994,8 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path) return 0; switch (FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1])) { - /* - * FIXME: We really should process - * XE_GUC_ACTION_TLB_INVALIDATION_DONE here in the fast-path as - * these critical for page fault performance. We currently can't - * due to TLB invalidation done algorithm expecting the seqno - * returned in-order. With some small changes to the algorithm - * and locking we should be able to support out-of-order seqno. - */ case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC: + case XE_GUC_ACTION_TLB_INVALIDATION_DONE: break; /* Process these in fast-path */ default: return 0; @@ -1056,8 +1049,7 @@ void xe_guc_ct_fast_path(struct xe_guc_ct *ct) struct xe_device *xe = ct_to_xe(ct); int len; - if (!xe_device_in_fault_mode(xe) || - !xe_device_mem_access_get_if_ongoing(xe)) + if (!xe_device_mem_access_get_if_ongoing(xe)) return; spin_lock(&ct->fast_lock); -- cgit v1.2.3 From 356010a1a0c9fbe55d6c7e5dbd273a0fd224469e Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 26 Jun 2023 18:20:40 +0100 Subject: drm/xe/mmio: update gt_count when probing multi-tile It looks like the single-tile PVC in CI dies during module load when doing the pcode init. From the logs we try to access the address 0000000000138124 which doesn't map to anything, however 0x138124 also looks to be the PCODE_MAILBOX register. So looks like the per-tile mmio register mapping is NULL. During probe the tile count is potentially trimmed, since we don't know the real count until we actually probe the device. This seems to be the case for single-tile PVC or similar devices. However it looks like the gt_count is never adjusted to respect this updated tile count. As a result when later doing some for_each_gt() loop, like we do for the pcode, we can get back some GT that maps to some non-existent tile which hasn't been properly set up, leading to crashes. Try to fix this by adjusting the gt_count after probing the tiles for real. v2: Fix typo so it actually builds References: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/383 Signed-off-by: Matthew Auld Cc: Lucas De Marchi Cc: Matt Roper Reviewed-by: Ofir Bitton Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_mmio.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 5effb21db9d4..779f0a18a815 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -335,6 +335,12 @@ static void xe_mmio_probe_tiles(struct xe_device *xe) adj_tile_count = xe->info.tile_count = REG_FIELD_GET(TILE_COUNT, mtcfg) + 1; + /* + * FIXME: Needs some work for standalone media, but should be impossible + * with multi-tile for now. + */ + xe->info.gt_count = xe->info.tile_count; + drm_info(&xe->drm, "tile_count: %d, adj_tile_count %d\n", xe->info.tile_count, adj_tile_count); -- cgit v1.2.3 From b1f8f4b5eec62173955c04d98723a75f2cfd8f42 Mon Sep 17 00:00:00 2001 From: Brian Welty Date: Wed, 12 Jul 2023 18:25:21 -0700 Subject: drm/xe: Fix BUG_ON during bind with prefetch It was missed that print_op needs to include DRM_GPUVA_OP_PREFETCH. Else we hit the impossible BUG_ON: [ 886.371040] ------------[ cut here ]------------ [ 886.371047] kernel BUG at drivers/gpu/drm/xe/xe_vm.c:2234! [ 886.371216] invalid opcode: 0000 [#1] PREEMPT SMP NOPTI [ 886.371229] CPU: 1 PID: 3132 Comm: xe_exec_fault_m [ 886.371257] RIP: 0010:vm_bind_ioctl_ops_create+0x45f/0x470 [xe] ... [ 886.371517] Call Trace: [ 886.371525] [ 886.371531] ? __die_body+0x1a/0x60 [ 886.371546] ? die+0x38/0x60 [ 886.371557] ? do_trap+0x10a/0x120 [ 886.371568] ? vm_bind_ioctl_ops_create+0x45f/0x470 [xe] v2: add debug print for PREFETCH in print_op Fixes: b06d47be7c83 ("drm/xe: Port Xe to GPUVA") Reviewed-by: Matthew Brost Signed-off-by: Brian Welty Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 297b7977ed87..ee67b4fd7320 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2227,6 +2227,11 @@ static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), op->unmap.keep ? 1 : 0); break; + case DRM_GPUVA_OP_PREFETCH: + vma = gpuva_to_vma(op->prefetch.va); + vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx", + (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma)); + break; default: XE_BUG_ON("NOT POSSIBLE"); } -- cgit v1.2.3 From 04194a4f780895799cf83c86d5bb8bc11560a536 Mon Sep 17 00:00:00 2001 From: Brian Welty Date: Wed, 12 Jul 2023 18:25:42 -0700 Subject: drm/xe: Fix lockdep warning from xe_vm_madvise We need to hold vm->lock before the xe_vm_is_closed_or_banned(). Else we get this splat: [ 802.555227] ------------[ cut here ]------------ [ 802.555234] WARNING: CPU: 33 PID: 3122 at drivers/gpu/drm/xe/xe_vm.h:60 [ 802.555515] CPU: 33 PID: 3122 Comm: xe_exec_fault_m Tainted: ... [ 802.555709] Call Trace: [ 802.555714] [ 802.555720] ? __warn+0x81/0x170 [ 802.555737] ? xe_vm_madvise_ioctl+0x2de/0x440 [xe] Fixes: 9d858b69b0cf ("drm/xe: Ban a VM if rebind worker hits an error") Reviewed-by: Matthew Brost Signed-off-by: Brian Welty Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm_madvise.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c index 32f92743d851..5b775f742233 100644 --- a/drivers/gpu/drm/xe/xe_vm_madvise.c +++ b/drivers/gpu/drm/xe/xe_vm_madvise.c @@ -267,11 +267,6 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_ERR(xe, !vm)) return -EINVAL; - if (XE_IOCTL_ERR(xe, xe_vm_is_closed_or_banned(vm))) { - err = -ENOENT; - goto put_vm; - } - if (XE_IOCTL_ERR(xe, !xe_vm_in_fault_mode(vm))) { err = -EINVAL; goto put_vm; @@ -279,6 +274,11 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, down_read(&vm->lock); + if (XE_IOCTL_ERR(xe, xe_vm_is_closed_or_banned(vm))) { + err = -ENOENT; + goto unlock_vm; + } + vmas = get_vmas(vm, &num_vmas, args->addr, args->range); if (XE_IOCTL_ERR(xe, err)) goto unlock_vm; -- cgit v1.2.3 From 4cd6d492595fdcbb158def8b175ca1558363e742 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Tue, 11 Jul 2023 16:24:30 +0200 Subject: drm/xe: Cleanup SPACING style issues Remove almost all existing style issues of type SPACING reported by checkpatch. Signed-off-by: Francois Dugast Reviewed-by: Matthew Brost Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.h | 4 ++-- drivers/gpu/drm/xe/xe_device_types.h | 4 ++-- drivers/gpu/drm/xe/xe_force_wake.c | 2 +- drivers/gpu/drm/xe/xe_gt.h | 2 +- drivers/gpu/drm/xe/xe_gt_clock.c | 8 ++++---- drivers/gpu/drm/xe/xe_gt_types.h | 8 ++++---- drivers/gpu/drm/xe/xe_rtp.h | 4 ++-- drivers/gpu/drm/xe/xe_rtp_helpers.h | 10 +++++----- drivers/gpu/drm/xe/xe_sched_job.c | 2 +- drivers/gpu/drm/xe/xe_uc_fw.c | 4 ++-- 10 files changed, 24 insertions(+), 24 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index 8e01bbadb149..5c827beaea28 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -122,7 +122,7 @@ static inline void xe_device_guc_submission_disable(struct xe_device *xe) #define for_each_tile(tile__, xe__, id__) \ for ((id__) = 0; (id__) < (xe__)->info.tile_count; (id__)++) \ - for_each_if ((tile__) = &(xe__)->tiles[(id__)]) + for_each_if((tile__) = &(xe__)->tiles[(id__)]) /* * FIXME: This only works for now since multi-tile and standalone media @@ -130,7 +130,7 @@ static inline void xe_device_guc_submission_disable(struct xe_device *xe) */ #define for_each_gt(gt__, xe__, id__) \ for ((id__) = 0; (id__) < (xe__)->info.gt_count; (id__)++) \ - for_each_if ((gt__) = xe_device_get_gt((xe__), (id__))) + for_each_if((gt__) = xe_device_get_gt((xe__), (id__))) static inline struct xe_force_wake * gt_to_fw(struct xe_gt *gt) { diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index db08d64abce1..fb2329ccce06 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -47,8 +47,8 @@ struct xe_ggtt; #define tile_to_xe(tile__) \ _Generic(tile__, \ - const struct xe_tile *: (const struct xe_device *)((tile__)->xe), \ - struct xe_tile *: (tile__)->xe) + const struct xe_tile * : (const struct xe_device *)((tile__)->xe), \ + struct xe_tile * : (tile__)->xe) /** * struct xe_tile - hardware tile structure diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c index f0f0592fc598..7403673d532d 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.c +++ b/drivers/gpu/drm/xe/xe_force_wake.c @@ -87,7 +87,7 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw) BIT(0), BIT(16)); } - for (i = XE_HW_ENGINE_VECS0, j =0; i <= XE_HW_ENGINE_VECS3; ++i, ++j) { + for (i = XE_HW_ENGINE_VECS0, j = 0; i <= XE_HW_ENGINE_VECS3; ++i, ++j) { if (!(gt->info.engine_mask & BIT(i))) continue; diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h index 21d9044088de..a523d7941afe 100644 --- a/drivers/gpu/drm/xe/xe_gt.h +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -13,7 +13,7 @@ #define for_each_hw_engine(hwe__, gt__, id__) \ for ((id__) = 0; (id__) < ARRAY_SIZE((gt__)->hw_engines); (id__)++) \ - for_each_if (((hwe__) = (gt__)->hw_engines + (id__)) && \ + for_each_if(((hwe__) = (gt__)->hw_engines + (id__)) && \ xe_hw_engine_is_valid((hwe__))) struct xe_gt *xe_gt_alloc(struct xe_tile *tile); diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c index 7cf11078ff57..932b61e0cf67 100644 --- a/drivers/gpu/drm/xe/xe_gt_clock.c +++ b/drivers/gpu/drm/xe/xe_gt_clock.c @@ -38,13 +38,13 @@ static u32 get_crystal_clock_freq(u32 rpm_config_reg) rpm_config_reg); switch (crystal_clock) { - case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ : + case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ: return f24_mhz; - case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ : + case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ: return f19_2_mhz; - case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ : + case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ: return f38_4_mhz; - case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ : + case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ: return f25_mhz; default: XE_BUG_ON("NOT_POSSIBLE"); diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 28b8e8a86fc9..78a9fe9f0bd3 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -75,13 +75,13 @@ enum xe_steering_type { #define gt_to_tile(gt__) \ _Generic(gt__, \ - const struct xe_gt *: (const struct xe_tile *)((gt__)->tile), \ - struct xe_gt *: (gt__)->tile) + const struct xe_gt * : (const struct xe_tile *)((gt__)->tile), \ + struct xe_gt * : (gt__)->tile) #define gt_to_xe(gt__) \ _Generic(gt__, \ - const struct xe_gt *: (const struct xe_device *)(gt_to_tile(gt__)->xe), \ - struct xe_gt *: gt_to_tile(gt__)->xe) + const struct xe_gt * : (const struct xe_device *)(gt_to_tile(gt__)->xe), \ + struct xe_gt * : gt_to_tile(gt__)->xe) /** * struct xe_gt - A "Graphics Technology" unit of the GPU diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index 8581bd9b1426..04ccb26452ad 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -373,8 +373,8 @@ struct xe_reg_sr; } #define XE_RTP_PROCESS_CTX_INITIALIZER(arg__) _Generic((arg__), \ - struct xe_hw_engine *: (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_ENGINE }, \ - struct xe_gt *: (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_GT }) + struct xe_hw_engine * : (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_ENGINE }, \ + struct xe_gt * : (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_GT }) void xe_rtp_process_ctx_enable_active_tracking(struct xe_rtp_process_ctx *ctx, unsigned long *active_entries, diff --git a/drivers/gpu/drm/xe/xe_rtp_helpers.h b/drivers/gpu/drm/xe/xe_rtp_helpers.h index 1beea434d52d..181b6290fac3 100644 --- a/drivers/gpu/drm/xe/xe_rtp_helpers.h +++ b/drivers/gpu/drm/xe/xe_rtp_helpers.h @@ -14,13 +14,13 @@ * Helper macros - not to be used outside this header. */ #define _XE_ESC(...) __VA_ARGS__ -#define _XE_COUNT_ARGS(...) _XE_ESC(__XE_COUNT_ARGS(__VA_ARGS__,5,4,3,2,1,)) -#define __XE_COUNT_ARGS(_,_5,_4,_3,_2,X_,...) X_ +#define _XE_COUNT_ARGS(...) _XE_ESC(__XE_COUNT_ARGS(__VA_ARGS__, 5, 4, 3, 2, 1,)) +#define __XE_COUNT_ARGS(_, _5, _4, _3, _2, X_, ...) X_ #define _XE_FIRST(...) _XE_ESC(__XE_FIRST(__VA_ARGS__,)) -#define __XE_FIRST(x_,...) x_ +#define __XE_FIRST(x_, ...) x_ #define _XE_TUPLE_TAIL(...) _XE_ESC(__XE_TUPLE_TAIL(__VA_ARGS__)) -#define __XE_TUPLE_TAIL(x_,...) (__VA_ARGS__) +#define __XE_TUPLE_TAIL(x_, ...) (__VA_ARGS__) #define _XE_DROP_FIRST(x_, ...) __VA_ARGS__ @@ -59,7 +59,7 @@ * * XE_RTP_TEST_FOO BANANA XE_RTP_TEST_BAR */ -#define XE_RTP_PASTE_FOREACH(prefix_, sep_, args_) _XE_ESC(_XE_RTP_CONCAT(PASTE_,_XE_COUNT_ARGS args_)(prefix_, sep_, args_)) +#define XE_RTP_PASTE_FOREACH(prefix_, sep_, args_) _XE_ESC(_XE_RTP_CONCAT(PASTE_, _XE_COUNT_ARGS args_)(prefix_, sep_, args_)) #define XE_RTP_PASTE_1(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) #define XE_RTP_PASTE_2(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_1(prefix_, sep_, _XE_TUPLE_TAIL args_) #define XE_RTP_PASTE_3(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_2(prefix_, sep_, _XE_TUPLE_TAIL args_) diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c index 795146dfd663..c87f65c98b3d 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.c +++ b/drivers/gpu/drm/xe/xe_sched_job.c @@ -35,7 +35,7 @@ int __init xe_sched_job_module_init(void) kmem_cache_create("xe_sched_job_parallel", sizeof(struct xe_sched_job) + sizeof(u64) * - XE_HW_ENGINE_MAX_INSTANCE , 0, + XE_HW_ENGINE_MAX_INSTANCE, 0, SLAB_HWCACHE_ALIGN, NULL); if (!xe_sched_job_parallel_slab) { kmem_cache_destroy(xe_sched_job_slab); diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index bc63c0d3e33a..84df4ce45e03 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -100,8 +100,8 @@ struct fw_blobs_by_type { }; #define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver) \ - fw_def(METEORLAKE, mmp_ver( i915, guc, mtl, 70, 6, 4)) \ - fw_def(PVC, mmp_ver( xe, guc, pvc, 70, 6, 4)) \ + fw_def(METEORLAKE, mmp_ver(i915, guc, mtl, 70, 6, 4)) \ + fw_def(PVC, mmp_ver(xe, guc, pvc, 70, 6, 4)) \ fw_def(DG2, major_ver(i915, guc, dg2, 70, 5)) \ fw_def(DG1, major_ver(i915, guc, dg1, 70, 5)) \ fw_def(ALDERLAKE_N, major_ver(i915, guc, tgl, 70, 5)) \ -- cgit v1.2.3 From fb1d55efdfcbfd8711f7b8db65267f370fa0e49b Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Tue, 11 Jul 2023 16:58:20 +0200 Subject: drm/xe: Cleanup OPEN_BRACE style issues Remove almost all existing style issues of type OPEN_BRACE reported by checkpatch. Signed-off-by: Francois Dugast Reviewed-by: Matthew Brost Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_mcr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index 3db550c85e32..ff4075387564 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -429,7 +429,8 @@ static void mcr_lock(struct xe_gt *gt) drm_WARN_ON_ONCE(&xe->drm, ret == -ETIMEDOUT); } -static void mcr_unlock(struct xe_gt *gt) { +static void mcr_unlock(struct xe_gt *gt) +{ /* Release hardware semaphore */ if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) xe_mmio_write32(gt, STEER_SEMAPHORE, 0x1); -- cgit v1.2.3 From 4ab5901cc0ed8951ae58b01740d0037dbbca8558 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Tue, 11 Jul 2023 17:33:55 +0200 Subject: drm/xe: Cleanup POINTER_LOCATION style issues Remove all existing style issues of type POINTER_LOCATION reported by checkpatch. Signed-off-by: Francois Dugast Reviewed-by: Matthew Brost Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.h | 2 +- drivers/gpu/drm/xe/xe_lrc.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index 5c827beaea28..a64828bc6ad2 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -132,7 +132,7 @@ static inline void xe_device_guc_submission_disable(struct xe_device *xe) for ((id__) = 0; (id__) < (xe__)->info.gt_count; (id__)++) \ for_each_if((gt__) = xe_device_get_gt((xe__), (id__))) -static inline struct xe_force_wake * gt_to_fw(struct xe_gt *gt) +static inline struct xe_force_wake *gt_to_fw(struct xe_gt *gt) { return >->mmio.fw; } diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 8f25a38f36a5..ddb1b1d6d00d 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -441,7 +441,7 @@ static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class) } } -static void set_context_control(u32 * regs, struct xe_hw_engine *hwe) +static void set_context_control(u32 *regs, struct xe_hw_engine *hwe) { regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH) | _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) | -- cgit v1.2.3 From 763931d25c7f40226c5e5edd8dcf90f2f2dfcddf Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Tue, 11 Jul 2023 17:35:57 +0200 Subject: drm/xe: Cleanup CODE_INDENT style issues Remove all existing style issues of type CODE_INDENT reported by checkpatch. Signed-off-by: Francois Dugast Reviewed-by: Matthew Brost Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_engine.c | 4 ++-- drivers/gpu/drm/xe/xe_guc_fwif.h | 12 ++++++------ drivers/gpu/drm/xe/xe_guc_submit.c | 14 +++++++------- drivers/gpu/drm/xe/xe_hw_engine.c | 2 +- drivers/gpu/drm/xe/xe_uc.c | 10 +++++----- 5 files changed, 21 insertions(+), 21 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c index af75c9a0ea7b..bd800eaa37a6 100644 --- a/drivers/gpu/drm/xe/xe_engine.c +++ b/drivers/gpu/drm/xe/xe_engine.c @@ -173,7 +173,7 @@ enum xe_engine_priority xe_engine_device_get_max_priority(struct xe_device *xe) { return capable(CAP_SYS_NICE) ? XE_ENGINE_PRIORITY_HIGH : - XE_ENGINE_PRIORITY_NORMAL; + XE_ENGINE_PRIORITY_NORMAL; } static int engine_set_priority(struct xe_device *xe, struct xe_engine *e, @@ -540,7 +540,7 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data, return -EFAULT; if (XE_IOCTL_ERR(xe, eci[0].gt_id >= xe->info.tile_count)) - return -EINVAL; + return -EINVAL; if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) { for_each_gt(gt, xe, id) { diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h index 27d132ce2087..e215e8b2c17a 100644 --- a/drivers/gpu/drm/xe/xe_guc_fwif.h +++ b/drivers/gpu/drm/xe/xe_guc_fwif.h @@ -64,19 +64,19 @@ struct guc_ctxt_registration_info { /* 32-bit KLV structure as used by policy updates and others */ struct guc_klv_generic_dw_t { - u32 kl; - u32 value; + u32 kl; + u32 value; } __packed; /* Format of the UPDATE_CONTEXT_POLICIES H2G data packet */ struct guc_update_engine_policy_header { - u32 action; - u32 guc_id; + u32 action; + u32 guc_id; } __packed; struct guc_update_engine_policy { - struct guc_update_engine_policy_header header; - struct guc_klv_generic_dw_t klv[GUC_CONTEXT_POLICIES_KLV_NUM_IDS]; + struct guc_update_engine_policy_header header; + struct guc_klv_generic_dw_t klv[GUC_CONTEXT_POLICIES_KLV_NUM_IDS]; } __packed; /* GUC_CTL_* - Parameters for loading the GuC */ diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 9c0fd1368b77..0c07cd4ad204 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -353,17 +353,17 @@ static const int xe_engine_prio_to_guc[] = { static void init_policies(struct xe_guc *guc, struct xe_engine *e) { - struct engine_policy policy; + struct engine_policy policy; enum xe_engine_priority prio = e->priority; u32 timeslice_us = e->sched_props.timeslice_us; u32 preempt_timeout_us = e->sched_props.preempt_timeout_us; XE_BUG_ON(!engine_registered(e)); - __guc_engine_policy_start_klv(&policy, e->guc->id); - __guc_engine_policy_add_priority(&policy, xe_engine_prio_to_guc[prio]); - __guc_engine_policy_add_execution_quantum(&policy, timeslice_us); - __guc_engine_policy_add_preemption_timeout(&policy, preempt_timeout_us); + __guc_engine_policy_start_klv(&policy, e->guc->id); + __guc_engine_policy_add_priority(&policy, xe_engine_prio_to_guc[prio]); + __guc_engine_policy_add_execution_quantum(&policy, timeslice_us); + __guc_engine_policy_add_preemption_timeout(&policy, preempt_timeout_us); xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, __guc_engine_policy_action_size(&policy), 0, 0); @@ -373,8 +373,8 @@ static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_engine *e) { struct engine_policy policy; - __guc_engine_policy_start_klv(&policy, e->guc->id); - __guc_engine_policy_add_preemption_timeout(&policy, 1); + __guc_engine_policy_start_klv(&policy, e->guc->id); + __guc_engine_policy_add_preemption_timeout(&policy, 1); xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, __guc_engine_policy_action_size(&policy), 0, 0); diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index b7b02c96e998..1af5cccd1142 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -312,7 +312,7 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe) /* TODO: missing handling of HAS_L3_CCS_READ platforms */ const u8 mocs_read_idx = gt->mocs.uc_index; u32 ring_cmd_cctl_val = REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, mocs_write_idx) | - REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, mocs_read_idx); + REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, mocs_read_idx); struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); const struct xe_rtp_entry_sr engine_entries[] = { /* diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index 70eabf567156..e244d27b55d5 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -201,14 +201,14 @@ int xe_uc_start(struct xe_uc *uc) static void uc_reset_wait(struct xe_uc *uc) { - int ret; + int ret; again: - xe_guc_reset_wait(&uc->guc); + xe_guc_reset_wait(&uc->guc); - ret = xe_uc_reset_prepare(uc); - if (ret) - goto again; + ret = xe_uc_reset_prepare(uc); + if (ret) + goto again; } int xe_uc_suspend(struct xe_uc *uc) -- cgit v1.2.3 From 80c58bdf0ea28ccb2e78647d53524ef86486e3ec Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Thu, 13 Jul 2023 15:38:48 +0200 Subject: drm/xe: Cleanup TRAILING_WHITESPACE style issues Remove all existing style issues of type TRAILING_WHITESPACE reported by checkpatch. Signed-off-by: Francois Dugast Reviewed-by: Matthew Brost Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_wa.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 5eaa9bed9d12..21087f7a4609 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -482,7 +482,7 @@ static const struct xe_rtp_entry_sr engine_was[] = { ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(CACHE_MODE_SS, ENABLE_EU_COUNT_FOR_TDL_FLUSH, - /* + /* * Wa_14012342262 write-only reg, so skip * verification */ @@ -492,7 +492,7 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_RULES(SUBPLATFORM(DG2, G10), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(CACHE_MODE_SS, ENABLE_EU_COUNT_FOR_TDL_FLUSH, - /* + /* * Wa_14012342262 write-only reg, so skip * verification */ -- cgit v1.2.3 From f5b85ab62b0ae0e6b5817312eeb252effaea2453 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Thu, 13 Jul 2023 16:20:20 +0200 Subject: drm/xe: Cleanup COMPLEX_MACRO style issues Remove some style issues of type COMPLEX_MACRO reported by checkpatch. Signed-off-by: Francois Dugast Reviewed-by: Matthew Brost Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_debugfs.c | 2 +- drivers/gpu/drm/xe/xe_guc_debugfs.c | 2 +- drivers/gpu/drm/xe/xe_huc_debugfs.c | 2 +- drivers/gpu/drm/xe/xe_lrc.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index b5a5538ae630..d0092d714ffe 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -171,7 +171,7 @@ void xe_gt_debugfs_register(struct xe_gt *gt) * entry and drm_debugfs_create_files just references the drm_info_list * passed in (e.g. can't define this on the stack). */ -#define DEBUGFS_SIZE ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list) +#define DEBUGFS_SIZE (ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list)) local = drmm_kmalloc(>_to_xe(gt)->drm, DEBUGFS_SIZE, GFP_KERNEL); if (!local) { XE_WARN_ON("Couldn't allocate memory"); diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.c b/drivers/gpu/drm/xe/xe_guc_debugfs.c index 6b72db4d5bb2..0178b1a2d367 100644 --- a/drivers/gpu/drm/xe/xe_guc_debugfs.c +++ b/drivers/gpu/drm/xe/xe_guc_debugfs.c @@ -87,7 +87,7 @@ void xe_guc_debugfs_register(struct xe_guc *guc, struct dentry *parent) struct drm_info_list *local; int i; -#define DEBUGFS_SIZE ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list) +#define DEBUGFS_SIZE (ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list)) local = drmm_kmalloc(&guc_to_xe(guc)->drm, DEBUGFS_SIZE, GFP_KERNEL); if (!local) { XE_WARN_ON("Couldn't allocate memory"); diff --git a/drivers/gpu/drm/xe/xe_huc_debugfs.c b/drivers/gpu/drm/xe/xe_huc_debugfs.c index ee3d8315036a..ae3c21315d59 100644 --- a/drivers/gpu/drm/xe/xe_huc_debugfs.c +++ b/drivers/gpu/drm/xe/xe_huc_debugfs.c @@ -53,7 +53,7 @@ void xe_huc_debugfs_register(struct xe_huc *huc, struct dentry *parent) struct drm_info_list *local; int i; -#define DEBUGFS_SIZE ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list) +#define DEBUGFS_SIZE (ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list)) local = drmm_kmalloc(&huc_to_xe(huc)->drm, DEBUGFS_SIZE, GFP_KERNEL); if (!local) { XE_WARN_ON("Couldn't allocate memory"); diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index ddb1b1d6d00d..d5f782f8d2a6 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -483,7 +483,7 @@ u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc) #define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset #define LRC_SEQNO_PPHWSP_OFFSET 512 -#define LRC_START_SEQNO_PPHWSP_OFFSET LRC_SEQNO_PPHWSP_OFFSET + 8 +#define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8) #define LRC_PARALLEL_PPHWSP_OFFSET 2048 #define LRC_PPHWSP_SIZE SZ_4K -- cgit v1.2.3 From 5ce58303440b7efb21c554cb0b6614482aab8fe9 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Thu, 13 Jul 2023 16:50:35 +0200 Subject: drm/xe: Fix typos Fix minor issues: remove extra ';' and s/Initialise/Initialize/. Signed-off-by: Francois Dugast Reviewed-by: Matthew Brost Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc.c | 2 +- drivers/gpu/drm/xe/xe_vm.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 8245bbc58770..ce8b35dcbc51 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -200,7 +200,7 @@ static void guc_init_params(struct xe_guc *guc) } /* - * Initialise the GuC parameter block before starting the firmware + * Initialize the GuC parameter block before starting the firmware * transfer. These parameters are read by the firmware on startup * and cannot be changed thereafter. */ diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index ee67b4fd7320..2052f1edc1ea 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3361,7 +3361,7 @@ int xe_vm_lock(struct xe_vm *vm, struct ww_acquire_ctx *ww, XE_BUG_ON(!ww); tv_vm.num_shared = num_resv; - tv_vm.bo = xe_vm_ttm_bo(vm);; + tv_vm.bo = xe_vm_ttm_bo(vm); list_add_tail(&tv_vm.head, &objs); return ttm_eu_reserve_buffers(ww, &objs, intr, &dups); -- cgit v1.2.3 From b8c1ba831e675005ff871cd4a4e04ff90326b4ae Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Mon, 17 Jul 2023 10:20:18 +0200 Subject: drm/xe: Prevent flooding the kernel log with XE_IOCTL_ERR Lower log level of XE_IOCTL_ERR macro to debug in order to prevent flooding kernel log. v2: Rename XE_IOCTL_ERR to XE_IOCTL_DBG (Rodrigo Vivi) v3: Rebase v4: Fix style, remove unrelated change about __FILE__ and __LINE__ Link: https://lists.freedesktop.org/archives/intel-xe/2023-May/004704.html Signed-off-by: Francois Dugast Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 26 +++--- drivers/gpu/drm/xe/xe_engine.c | 114 ++++++++++++------------- drivers/gpu/drm/xe/xe_exec.c | 14 +-- drivers/gpu/drm/xe/xe_macros.h | 4 +- drivers/gpu/drm/xe/xe_mmio.c | 14 +-- drivers/gpu/drm/xe/xe_query.c | 30 +++---- drivers/gpu/drm/xe/xe_sync.c | 32 +++---- drivers/gpu/drm/xe/xe_vm.c | 146 ++++++++++++++++---------------- drivers/gpu/drm/xe/xe_vm_madvise.c | 40 ++++----- drivers/gpu/drm/xe/xe_wait_user_fence.c | 30 +++---- 10 files changed, 225 insertions(+), 225 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 00b8b5e7f197..1031cb69219d 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1724,35 +1724,35 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, u32 handle; int err; - if (XE_IOCTL_ERR(xe, args->extensions) || XE_IOCTL_ERR(xe, args->pad) || - XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1])) + if (XE_IOCTL_DBG(xe, args->extensions) || XE_IOCTL_DBG(xe, args->pad) || + XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) return -EINVAL; - if (XE_IOCTL_ERR(xe, args->flags & + if (XE_IOCTL_DBG(xe, args->flags & ~(XE_GEM_CREATE_FLAG_DEFER_BACKING | XE_GEM_CREATE_FLAG_SCANOUT | xe->info.mem_region_mask))) return -EINVAL; /* at least one memory type must be specified */ - if (XE_IOCTL_ERR(xe, !(args->flags & xe->info.mem_region_mask))) + if (XE_IOCTL_DBG(xe, !(args->flags & xe->info.mem_region_mask))) return -EINVAL; - if (XE_IOCTL_ERR(xe, args->handle)) + if (XE_IOCTL_DBG(xe, args->handle)) return -EINVAL; - if (XE_IOCTL_ERR(xe, !args->size)) + if (XE_IOCTL_DBG(xe, !args->size)) return -EINVAL; - if (XE_IOCTL_ERR(xe, args->size > SIZE_MAX)) + if (XE_IOCTL_DBG(xe, args->size > SIZE_MAX)) return -EINVAL; - if (XE_IOCTL_ERR(xe, args->size & ~PAGE_MASK)) + if (XE_IOCTL_DBG(xe, args->size & ~PAGE_MASK)) return -EINVAL; if (args->vm_id) { vm = xe_vm_lookup(xef, args->vm_id); - if (XE_IOCTL_ERR(xe, !vm)) + if (XE_IOCTL_DBG(xe, !vm)) return -ENOENT; err = xe_vm_lock(vm, &ww, 0, true); if (err) { @@ -1795,15 +1795,15 @@ int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data, struct drm_xe_gem_mmap_offset *args = data; struct drm_gem_object *gem_obj; - if (XE_IOCTL_ERR(xe, args->extensions) || - XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1])) + if (XE_IOCTL_DBG(xe, args->extensions) || + XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) return -EINVAL; - if (XE_IOCTL_ERR(xe, args->flags)) + if (XE_IOCTL_DBG(xe, args->flags)) return -EINVAL; gem_obj = drm_gem_object_lookup(file, args->handle); - if (XE_IOCTL_ERR(xe, !gem_obj)) + if (XE_IOCTL_DBG(xe, !gem_obj)) return -ENOENT; /* The mmap offset was set up at BO allocation time. */ diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c index bd800eaa37a6..c31e55c10a33 100644 --- a/drivers/gpu/drm/xe/xe_engine.c +++ b/drivers/gpu/drm/xe/xe_engine.c @@ -179,10 +179,10 @@ xe_engine_device_get_max_priority(struct xe_device *xe) static int engine_set_priority(struct xe_device *xe, struct xe_engine *e, u64 value, bool create) { - if (XE_IOCTL_ERR(xe, value > XE_ENGINE_PRIORITY_HIGH)) + if (XE_IOCTL_DBG(xe, value > XE_ENGINE_PRIORITY_HIGH)) return -EINVAL; - if (XE_IOCTL_ERR(xe, value > xe_engine_device_get_max_priority(xe))) + if (XE_IOCTL_DBG(xe, value > xe_engine_device_get_max_priority(xe))) return -EPERM; return e->ops->set_priority(e, value); @@ -210,33 +210,33 @@ static int engine_set_preemption_timeout(struct xe_device *xe, static int engine_set_compute_mode(struct xe_device *xe, struct xe_engine *e, u64 value, bool create) { - if (XE_IOCTL_ERR(xe, !create)) + if (XE_IOCTL_DBG(xe, !create)) return -EINVAL; - if (XE_IOCTL_ERR(xe, e->flags & ENGINE_FLAG_COMPUTE_MODE)) + if (XE_IOCTL_DBG(xe, e->flags & ENGINE_FLAG_COMPUTE_MODE)) return -EINVAL; - if (XE_IOCTL_ERR(xe, e->flags & ENGINE_FLAG_VM)) + if (XE_IOCTL_DBG(xe, e->flags & ENGINE_FLAG_VM)) return -EINVAL; if (value) { struct xe_vm *vm = e->vm; int err; - if (XE_IOCTL_ERR(xe, xe_vm_in_fault_mode(vm))) + if (XE_IOCTL_DBG(xe, xe_vm_in_fault_mode(vm))) return -EOPNOTSUPP; - if (XE_IOCTL_ERR(xe, !xe_vm_in_compute_mode(vm))) + if (XE_IOCTL_DBG(xe, !xe_vm_in_compute_mode(vm))) return -EOPNOTSUPP; - if (XE_IOCTL_ERR(xe, e->width != 1)) + if (XE_IOCTL_DBG(xe, e->width != 1)) return -EINVAL; e->compute.context = dma_fence_context_alloc(1); spin_lock_init(&e->compute.lock); err = xe_vm_add_compute_engine(vm, e); - if (XE_IOCTL_ERR(xe, err)) + if (XE_IOCTL_DBG(xe, err)) return err; e->flags |= ENGINE_FLAG_COMPUTE_MODE; @@ -249,10 +249,10 @@ static int engine_set_compute_mode(struct xe_device *xe, struct xe_engine *e, static int engine_set_persistence(struct xe_device *xe, struct xe_engine *e, u64 value, bool create) { - if (XE_IOCTL_ERR(xe, !create)) + if (XE_IOCTL_DBG(xe, !create)) return -EINVAL; - if (XE_IOCTL_ERR(xe, e->flags & ENGINE_FLAG_COMPUTE_MODE)) + if (XE_IOCTL_DBG(xe, e->flags & ENGINE_FLAG_COMPUTE_MODE)) return -EINVAL; if (value) @@ -266,7 +266,7 @@ static int engine_set_persistence(struct xe_device *xe, struct xe_engine *e, static int engine_set_job_timeout(struct xe_device *xe, struct xe_engine *e, u64 value, bool create) { - if (XE_IOCTL_ERR(xe, !create)) + if (XE_IOCTL_DBG(xe, !create)) return -EINVAL; if (!capable(CAP_SYS_NICE)) @@ -278,10 +278,10 @@ static int engine_set_job_timeout(struct xe_device *xe, struct xe_engine *e, static int engine_set_acc_trigger(struct xe_device *xe, struct xe_engine *e, u64 value, bool create) { - if (XE_IOCTL_ERR(xe, !create)) + if (XE_IOCTL_DBG(xe, !create)) return -EINVAL; - if (XE_IOCTL_ERR(xe, !xe->info.supports_usm)) + if (XE_IOCTL_DBG(xe, !xe->info.supports_usm)) return -EINVAL; e->usm.acc_trigger = value; @@ -292,10 +292,10 @@ static int engine_set_acc_trigger(struct xe_device *xe, struct xe_engine *e, static int engine_set_acc_notify(struct xe_device *xe, struct xe_engine *e, u64 value, bool create) { - if (XE_IOCTL_ERR(xe, !create)) + if (XE_IOCTL_DBG(xe, !create)) return -EINVAL; - if (XE_IOCTL_ERR(xe, !xe->info.supports_usm)) + if (XE_IOCTL_DBG(xe, !xe->info.supports_usm)) return -EINVAL; e->usm.acc_notify = value; @@ -306,10 +306,10 @@ static int engine_set_acc_notify(struct xe_device *xe, struct xe_engine *e, static int engine_set_acc_granularity(struct xe_device *xe, struct xe_engine *e, u64 value, bool create) { - if (XE_IOCTL_ERR(xe, !create)) + if (XE_IOCTL_DBG(xe, !create)) return -EINVAL; - if (XE_IOCTL_ERR(xe, !xe->info.supports_usm)) + if (XE_IOCTL_DBG(xe, !xe->info.supports_usm)) return -EINVAL; e->usm.acc_granularity = value; @@ -344,12 +344,12 @@ static int engine_user_ext_set_property(struct xe_device *xe, u32 idx; err = __copy_from_user(&ext, address, sizeof(ext)); - if (XE_IOCTL_ERR(xe, err)) + if (XE_IOCTL_DBG(xe, err)) return -EFAULT; - if (XE_IOCTL_ERR(xe, ext.property >= + if (XE_IOCTL_DBG(xe, ext.property >= ARRAY_SIZE(engine_set_property_funcs)) || - XE_IOCTL_ERR(xe, ext.pad)) + XE_IOCTL_DBG(xe, ext.pad)) return -EINVAL; idx = array_index_nospec(ext.property, ARRAY_SIZE(engine_set_property_funcs)); @@ -374,22 +374,22 @@ static int engine_user_extensions(struct xe_device *xe, struct xe_engine *e, int err; u32 idx; - if (XE_IOCTL_ERR(xe, ext_number >= MAX_USER_EXTENSIONS)) + if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS)) return -E2BIG; err = __copy_from_user(&ext, address, sizeof(ext)); - if (XE_IOCTL_ERR(xe, err)) + if (XE_IOCTL_DBG(xe, err)) return -EFAULT; - if (XE_IOCTL_ERR(xe, ext.pad) || - XE_IOCTL_ERR(xe, ext.name >= + if (XE_IOCTL_DBG(xe, ext.pad) || + XE_IOCTL_DBG(xe, ext.name >= ARRAY_SIZE(engine_user_extension_funcs))) return -EINVAL; idx = array_index_nospec(ext.name, ARRAY_SIZE(engine_user_extension_funcs)); err = engine_user_extension_funcs[idx](xe, e, extensions, create); - if (XE_IOCTL_ERR(xe, err)) + if (XE_IOCTL_DBG(xe, err)) return err; if (ext.next_extension) @@ -435,11 +435,11 @@ static u32 bind_engine_logical_mask(struct xe_device *xe, struct xe_gt *gt, enum xe_hw_engine_id id; u32 logical_mask = 0; - if (XE_IOCTL_ERR(xe, width != 1)) + if (XE_IOCTL_DBG(xe, width != 1)) return 0; - if (XE_IOCTL_ERR(xe, num_placements != 1)) + if (XE_IOCTL_DBG(xe, num_placements != 1)) return 0; - if (XE_IOCTL_ERR(xe, eci[0].engine_instance != 0)) + if (XE_IOCTL_DBG(xe, eci[0].engine_instance != 0)) return 0; eci[0].engine_class = DRM_XE_ENGINE_CLASS_COPY; @@ -466,7 +466,7 @@ static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt, u16 gt_id; u32 return_mask = 0, prev_mask; - if (XE_IOCTL_ERR(xe, !xe_device_guc_submission_enabled(xe) && + if (XE_IOCTL_DBG(xe, !xe_device_guc_submission_enabled(xe) && len > 1)) return 0; @@ -479,14 +479,14 @@ static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt, n = j * width + i; hwe = find_hw_engine(xe, eci[n]); - if (XE_IOCTL_ERR(xe, !hwe)) + if (XE_IOCTL_DBG(xe, !hwe)) return 0; - if (XE_IOCTL_ERR(xe, xe_hw_engine_is_reserved(hwe))) + if (XE_IOCTL_DBG(xe, xe_hw_engine_is_reserved(hwe))) return 0; - if (XE_IOCTL_ERR(xe, n && eci[n].gt_id != gt_id) || - XE_IOCTL_ERR(xe, n && eci[n].engine_class != class)) + if (XE_IOCTL_DBG(xe, n && eci[n].gt_id != gt_id) || + XE_IOCTL_DBG(xe, n && eci[n].engine_class != class)) return 0; class = eci[n].engine_class; @@ -498,7 +498,7 @@ static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt, } /* Parallel submissions must be logically contiguous */ - if (i && XE_IOCTL_ERR(xe, current_mask != prev_mask << 1)) + if (i && XE_IOCTL_DBG(xe, current_mask != prev_mask << 1)) return 0; prev_mask = current_mask; @@ -525,21 +525,21 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data, u32 len; int err; - if (XE_IOCTL_ERR(xe, args->flags) || - XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1])) + if (XE_IOCTL_DBG(xe, args->flags) || + XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) return -EINVAL; len = args->width * args->num_placements; - if (XE_IOCTL_ERR(xe, !len || len > XE_HW_ENGINE_MAX_INSTANCE)) + if (XE_IOCTL_DBG(xe, !len || len > XE_HW_ENGINE_MAX_INSTANCE)) return -EINVAL; err = __copy_from_user(eci, user_eci, sizeof(struct drm_xe_engine_class_instance) * len); - if (XE_IOCTL_ERR(xe, err)) + if (XE_IOCTL_DBG(xe, err)) return -EFAULT; - if (XE_IOCTL_ERR(xe, eci[0].gt_id >= xe->info.tile_count)) + if (XE_IOCTL_DBG(xe, eci[0].gt_id >= xe->info.tile_count)) return -EINVAL; if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) { @@ -553,11 +553,11 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data, logical_mask = bind_engine_logical_mask(xe, gt, eci, args->width, args->num_placements); - if (XE_IOCTL_ERR(xe, !logical_mask)) + if (XE_IOCTL_DBG(xe, !logical_mask)) return -EINVAL; hwe = find_hw_engine(xe, eci[0]); - if (XE_IOCTL_ERR(xe, !hwe)) + if (XE_IOCTL_DBG(xe, !hwe)) return -EINVAL; migrate_vm = xe_migrate_get_vm(gt_to_tile(gt)->migrate); @@ -586,15 +586,15 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data, logical_mask = calc_validate_logical_mask(xe, gt, eci, args->width, args->num_placements); - if (XE_IOCTL_ERR(xe, !logical_mask)) + if (XE_IOCTL_DBG(xe, !logical_mask)) return -EINVAL; hwe = find_hw_engine(xe, eci[0]); - if (XE_IOCTL_ERR(xe, !hwe)) + if (XE_IOCTL_DBG(xe, !hwe)) return -EINVAL; vm = xe_vm_lookup(xef, args->vm_id); - if (XE_IOCTL_ERR(xe, !vm)) + if (XE_IOCTL_DBG(xe, !vm)) return -ENOENT; err = down_read_interruptible(&vm->lock); @@ -603,7 +603,7 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data, return err; } - if (XE_IOCTL_ERR(xe, xe_vm_is_closed_or_banned(vm))) { + if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { up_read(&vm->lock); xe_vm_put(vm); return -ENOENT; @@ -621,11 +621,11 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data, if (args->extensions) { err = engine_user_extensions(xe, e, args->extensions, 0, true); - if (XE_IOCTL_ERR(xe, err)) + if (XE_IOCTL_DBG(xe, err)) goto put_engine; } - if (XE_IOCTL_ERR(xe, e->vm && xe_vm_in_compute_mode(e->vm) != + if (XE_IOCTL_DBG(xe, e->vm && xe_vm_in_compute_mode(e->vm) != !!(e->flags & ENGINE_FLAG_COMPUTE_MODE))) { err = -EOPNOTSUPP; goto put_engine; @@ -658,11 +658,11 @@ int xe_engine_get_property_ioctl(struct drm_device *dev, void *data, struct xe_engine *e; int ret; - if (XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1])) + if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) return -EINVAL; e = xe_engine_lookup(xef, args->engine_id); - if (XE_IOCTL_ERR(xe, !e)) + if (XE_IOCTL_DBG(xe, !e)) return -ENOENT; switch (args->property) { @@ -771,14 +771,14 @@ int xe_engine_destroy_ioctl(struct drm_device *dev, void *data, struct drm_xe_engine_destroy *args = data; struct xe_engine *e; - if (XE_IOCTL_ERR(xe, args->pad) || - XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1])) + if (XE_IOCTL_DBG(xe, args->pad) || + XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) return -EINVAL; mutex_lock(&xef->engine.lock); e = xa_erase(&xef->engine.xa, args->engine_id); mutex_unlock(&xef->engine.lock); - if (XE_IOCTL_ERR(xe, !e)) + if (XE_IOCTL_DBG(xe, !e)) return -ENOENT; if (!(e->flags & ENGINE_FLAG_PERSISTENT)) @@ -802,14 +802,14 @@ int xe_engine_set_property_ioctl(struct drm_device *dev, void *data, int ret; u32 idx; - if (XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1])) + if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) return -EINVAL; e = xe_engine_lookup(xef, args->engine_id); - if (XE_IOCTL_ERR(xe, !e)) + if (XE_IOCTL_DBG(xe, !e)) return -ENOENT; - if (XE_IOCTL_ERR(xe, args->property >= + if (XE_IOCTL_DBG(xe, args->property >= ARRAY_SIZE(engine_set_property_funcs))) { ret = -EINVAL; goto out; @@ -818,7 +818,7 @@ int xe_engine_set_property_ioctl(struct drm_device *dev, void *data, idx = array_index_nospec(args->property, ARRAY_SIZE(engine_set_property_funcs)); ret = engine_set_property_funcs[idx](xe, e, args->value, false); - if (XE_IOCTL_ERR(xe, ret)) + if (XE_IOCTL_DBG(xe, ret)) goto out; if (args->extensions) diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 07f4b2e8df16..ff9fa02b5395 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -184,22 +184,22 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) bool write_locked; int err = 0; - if (XE_IOCTL_ERR(xe, args->extensions) || - XE_IOCTL_ERR(xe, args->pad[0] || args->pad[1] || args->pad[2]) || - XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1])) + if (XE_IOCTL_DBG(xe, args->extensions) || + XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) || + XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) return -EINVAL; engine = xe_engine_lookup(xef, args->engine_id); - if (XE_IOCTL_ERR(xe, !engine)) + if (XE_IOCTL_DBG(xe, !engine)) return -ENOENT; - if (XE_IOCTL_ERR(xe, engine->flags & ENGINE_FLAG_VM)) + if (XE_IOCTL_DBG(xe, engine->flags & ENGINE_FLAG_VM)) return -EINVAL; - if (XE_IOCTL_ERR(xe, engine->width != args->num_batch_buffer)) + if (XE_IOCTL_DBG(xe, engine->width != args->num_batch_buffer)) return -EINVAL; - if (XE_IOCTL_ERR(xe, engine->flags & ENGINE_FLAG_BANNED)) { + if (XE_IOCTL_DBG(xe, engine->flags & ENGINE_FLAG_BANNED)) { err = -ECANCELED; goto err_engine; } diff --git a/drivers/gpu/drm/xe/xe_macros.h b/drivers/gpu/drm/xe/xe_macros.h index 0d24c124d202..038cf28604ad 100644 --- a/drivers/gpu/drm/xe/xe_macros.h +++ b/drivers/gpu/drm/xe/xe_macros.h @@ -12,8 +12,8 @@ #define XE_WARN_ON WARN_ON #define XE_BUG_ON BUG_ON -#define XE_IOCTL_ERR(xe, cond) \ - ((cond) && (drm_info(&(xe)->drm, \ +#define XE_IOCTL_DBG(xe, cond) \ + ((cond) && (drm_dbg(&(xe)->drm, \ "Ioctl argument check failed at %s:%d: %s", \ __FILE__, __LINE__, #cond), 1)) diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 779f0a18a815..448b874c7a3c 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -447,14 +447,14 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data, bool allowed; int ret = 0; - if (XE_IOCTL_ERR(xe, args->extensions) || - XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1])) + if (XE_IOCTL_DBG(xe, args->extensions) || + XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) return -EINVAL; - if (XE_IOCTL_ERR(xe, args->flags & ~VALID_MMIO_FLAGS)) + if (XE_IOCTL_DBG(xe, args->flags & ~VALID_MMIO_FLAGS)) return -EINVAL; - if (XE_IOCTL_ERR(xe, !(args->flags & DRM_XE_MMIO_WRITE) && args->value)) + if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_MMIO_WRITE) && args->value)) return -EINVAL; allowed = capable(CAP_SYS_ADMIN); @@ -469,12 +469,12 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data, } } - if (XE_IOCTL_ERR(xe, !allowed)) + if (XE_IOCTL_DBG(xe, !allowed)) return -EPERM; bits_flag = args->flags & DRM_XE_MMIO_BITS_MASK; bytes = 1 << bits_flag; - if (XE_IOCTL_ERR(xe, args->addr + bytes > xe->mmio.size)) + if (XE_IOCTL_DBG(xe, args->addr + bytes > xe->mmio.size)) return -EINVAL; /* @@ -488,7 +488,7 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data, if (args->flags & DRM_XE_MMIO_WRITE) { switch (bits_flag) { case DRM_XE_MMIO_32BIT: - if (XE_IOCTL_ERR(xe, args->value > U32_MAX)) { + if (XE_IOCTL_DBG(xe, args->value > U32_MAX)) { ret = -EINVAL; goto exit; } diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 9acbb27dfcab..4b7869596ba8 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -60,12 +60,12 @@ static int query_engines(struct xe_device *xe, if (query->size == 0) { query->size = size; return 0; - } else if (XE_IOCTL_ERR(xe, query->size != size)) { + } else if (XE_IOCTL_DBG(xe, query->size != size)) { return -EINVAL; } hw_engine_info = kmalloc(size, GFP_KERNEL); - if (XE_IOCTL_ERR(xe, !hw_engine_info)) + if (XE_IOCTL_DBG(xe, !hw_engine_info)) return -ENOMEM; for_each_gt(gt, xe, gt_id) @@ -114,12 +114,12 @@ static int query_memory_usage(struct xe_device *xe, if (query->size == 0) { query->size = size; return 0; - } else if (XE_IOCTL_ERR(xe, query->size != size)) { + } else if (XE_IOCTL_DBG(xe, query->size != size)) { return -EINVAL; } usage = kzalloc(size, GFP_KERNEL); - if (XE_IOCTL_ERR(xe, !usage)) + if (XE_IOCTL_DBG(xe, !usage)) return -ENOMEM; man = ttm_manager_type(&xe->ttm, XE_PL_TT); @@ -177,12 +177,12 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query) if (query->size == 0) { query->size = size; return 0; - } else if (XE_IOCTL_ERR(xe, query->size != size)) { + } else if (XE_IOCTL_DBG(xe, query->size != size)) { return -EINVAL; } config = kzalloc(size, GFP_KERNEL); - if (XE_IOCTL_ERR(xe, !config)) + if (XE_IOCTL_DBG(xe, !config)) return -ENOMEM; config->num_params = num_params; @@ -226,12 +226,12 @@ static int query_gts(struct xe_device *xe, struct drm_xe_device_query *query) if (query->size == 0) { query->size = size; return 0; - } else if (XE_IOCTL_ERR(xe, query->size != size)) { + } else if (XE_IOCTL_DBG(xe, query->size != size)) { return -EINVAL; } gts = kzalloc(size, GFP_KERNEL); - if (XE_IOCTL_ERR(xe, !gts)) + if (XE_IOCTL_DBG(xe, !gts)) return -ENOMEM; gts->num_gt = xe->info.gt_count; @@ -273,12 +273,12 @@ static int query_hwconfig(struct xe_device *xe, if (query->size == 0) { query->size = size; return 0; - } else if (XE_IOCTL_ERR(xe, query->size != size)) { + } else if (XE_IOCTL_DBG(xe, query->size != size)) { return -EINVAL; } hwconfig = kzalloc(size, GFP_KERNEL); - if (XE_IOCTL_ERR(xe, !hwconfig)) + if (XE_IOCTL_DBG(xe, !hwconfig)) return -ENOMEM; xe_device_mem_access_get(xe); @@ -332,7 +332,7 @@ static int query_gt_topology(struct xe_device *xe, if (query->size == 0) { query->size = size; return 0; - } else if (XE_IOCTL_ERR(xe, query->size != size)) { + } else if (XE_IOCTL_DBG(xe, query->size != size)) { return -EINVAL; } @@ -380,15 +380,15 @@ int xe_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file) struct drm_xe_device_query *query = data; u32 idx; - if (XE_IOCTL_ERR(xe, query->extensions) || - XE_IOCTL_ERR(xe, query->reserved[0] || query->reserved[1])) + if (XE_IOCTL_DBG(xe, query->extensions) || + XE_IOCTL_DBG(xe, query->reserved[0] || query->reserved[1])) return -EINVAL; - if (XE_IOCTL_ERR(xe, query->query > ARRAY_SIZE(xe_query_funcs))) + if (XE_IOCTL_DBG(xe, query->query > ARRAY_SIZE(xe_query_funcs))) return -EINVAL; idx = array_index_nospec(query->query, ARRAY_SIZE(xe_query_funcs)); - if (XE_IOCTL_ERR(xe, !xe_query_funcs[idx])) + if (XE_IOCTL_DBG(xe, !xe_query_funcs[idx])) return -EINVAL; return xe_query_funcs[idx](xe, query); diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index 7786b908a3fd..9fcd7802ba30 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -110,44 +110,44 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, if (copy_from_user(&sync_in, sync_user, sizeof(*sync_user))) return -EFAULT; - if (XE_IOCTL_ERR(xe, sync_in.flags & + if (XE_IOCTL_DBG(xe, sync_in.flags & ~(SYNC_FLAGS_TYPE_MASK | DRM_XE_SYNC_SIGNAL)) || - XE_IOCTL_ERR(xe, sync_in.pad) || - XE_IOCTL_ERR(xe, sync_in.reserved[0] || sync_in.reserved[1])) + XE_IOCTL_DBG(xe, sync_in.pad) || + XE_IOCTL_DBG(xe, sync_in.reserved[0] || sync_in.reserved[1])) return -EINVAL; signal = sync_in.flags & DRM_XE_SYNC_SIGNAL; switch (sync_in.flags & SYNC_FLAGS_TYPE_MASK) { case DRM_XE_SYNC_SYNCOBJ: - if (XE_IOCTL_ERR(xe, no_dma_fences && signal)) + if (XE_IOCTL_DBG(xe, no_dma_fences && signal)) return -EOPNOTSUPP; - if (XE_IOCTL_ERR(xe, upper_32_bits(sync_in.addr))) + if (XE_IOCTL_DBG(xe, upper_32_bits(sync_in.addr))) return -EINVAL; sync->syncobj = drm_syncobj_find(xef->drm, sync_in.handle); - if (XE_IOCTL_ERR(xe, !sync->syncobj)) + if (XE_IOCTL_DBG(xe, !sync->syncobj)) return -ENOENT; if (!signal) { sync->fence = drm_syncobj_fence_get(sync->syncobj); - if (XE_IOCTL_ERR(xe, !sync->fence)) + if (XE_IOCTL_DBG(xe, !sync->fence)) return -EINVAL; } break; case DRM_XE_SYNC_TIMELINE_SYNCOBJ: - if (XE_IOCTL_ERR(xe, no_dma_fences && signal)) + if (XE_IOCTL_DBG(xe, no_dma_fences && signal)) return -EOPNOTSUPP; - if (XE_IOCTL_ERR(xe, upper_32_bits(sync_in.addr))) + if (XE_IOCTL_DBG(xe, upper_32_bits(sync_in.addr))) return -EINVAL; - if (XE_IOCTL_ERR(xe, sync_in.timeline_value == 0)) + if (XE_IOCTL_DBG(xe, sync_in.timeline_value == 0)) return -EINVAL; sync->syncobj = drm_syncobj_find(xef->drm, sync_in.handle); - if (XE_IOCTL_ERR(xe, !sync->syncobj)) + if (XE_IOCTL_DBG(xe, !sync->syncobj)) return -ENOENT; if (signal) { @@ -156,7 +156,7 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, return -ENOMEM; } else { sync->fence = drm_syncobj_fence_get(sync->syncobj); - if (XE_IOCTL_ERR(xe, !sync->fence)) + if (XE_IOCTL_DBG(xe, !sync->fence)) return -EINVAL; err = dma_fence_chain_find_seqno(&sync->fence, @@ -167,15 +167,15 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, break; case DRM_XE_SYNC_DMA_BUF: - if (XE_IOCTL_ERR(xe, "TODO")) + if (XE_IOCTL_DBG(xe, "TODO")) return -EINVAL; break; case DRM_XE_SYNC_USER_FENCE: - if (XE_IOCTL_ERR(xe, !signal)) + if (XE_IOCTL_DBG(xe, !signal)) return -EOPNOTSUPP; - if (XE_IOCTL_ERR(xe, sync_in.addr & 0x7)) + if (XE_IOCTL_DBG(xe, sync_in.addr & 0x7)) return -EINVAL; if (exec) { @@ -183,7 +183,7 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, } else { sync->ufence = user_fence_create(xe, sync_in.addr, sync_in.timeline_value); - if (XE_IOCTL_ERR(xe, !sync->ufence)) + if (XE_IOCTL_DBG(xe, !sync->ufence)) return -ENOMEM; } diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 2052f1edc1ea..7f2f17c3b86e 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1875,13 +1875,13 @@ static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma, static int vm_set_error_capture_address(struct xe_device *xe, struct xe_vm *vm, u64 value) { - if (XE_IOCTL_ERR(xe, !value)) + if (XE_IOCTL_DBG(xe, !value)) return -EINVAL; - if (XE_IOCTL_ERR(xe, !(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS))) + if (XE_IOCTL_DBG(xe, !(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS))) return -EOPNOTSUPP; - if (XE_IOCTL_ERR(xe, vm->async_ops.error_capture.addr)) + if (XE_IOCTL_DBG(xe, vm->async_ops.error_capture.addr)) return -EOPNOTSUPP; vm->async_ops.error_capture.mm = current->mm; @@ -1907,13 +1907,13 @@ static int vm_user_ext_set_property(struct xe_device *xe, struct xe_vm *vm, int err; err = __copy_from_user(&ext, address, sizeof(ext)); - if (XE_IOCTL_ERR(xe, err)) + if (XE_IOCTL_DBG(xe, err)) return -EFAULT; - if (XE_IOCTL_ERR(xe, ext.property >= + if (XE_IOCTL_DBG(xe, ext.property >= ARRAY_SIZE(vm_set_property_funcs)) || - XE_IOCTL_ERR(xe, ext.pad) || - XE_IOCTL_ERR(xe, ext.reserved[0] || ext.reserved[1])) + XE_IOCTL_DBG(xe, ext.pad) || + XE_IOCTL_DBG(xe, ext.reserved[0] || ext.reserved[1])) return -EINVAL; return vm_set_property_funcs[ext.property](xe, vm, ext.value); @@ -1934,20 +1934,20 @@ static int vm_user_extensions(struct xe_device *xe, struct xe_vm *vm, struct xe_user_extension ext; int err; - if (XE_IOCTL_ERR(xe, ext_number >= MAX_USER_EXTENSIONS)) + if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS)) return -E2BIG; err = __copy_from_user(&ext, address, sizeof(ext)); - if (XE_IOCTL_ERR(xe, err)) + if (XE_IOCTL_DBG(xe, err)) return -EFAULT; - if (XE_IOCTL_ERR(xe, ext.pad) || - XE_IOCTL_ERR(xe, ext.name >= + if (XE_IOCTL_DBG(xe, ext.pad) || + XE_IOCTL_DBG(xe, ext.name >= ARRAY_SIZE(vm_user_extension_funcs))) return -EINVAL; err = vm_user_extension_funcs[ext.name](xe, vm, extensions); - if (XE_IOCTL_ERR(xe, err)) + if (XE_IOCTL_DBG(xe, err)) return err; if (ext.next_extension) @@ -1973,29 +1973,29 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, int err; u32 flags = 0; - if (XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1])) + if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) return -EINVAL; - if (XE_IOCTL_ERR(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS)) + if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS)) return -EINVAL; - if (XE_IOCTL_ERR(xe, args->flags & DRM_XE_VM_CREATE_SCRATCH_PAGE && + if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_SCRATCH_PAGE && args->flags & DRM_XE_VM_CREATE_FAULT_MODE)) return -EINVAL; - if (XE_IOCTL_ERR(xe, args->flags & DRM_XE_VM_CREATE_COMPUTE_MODE && + if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_COMPUTE_MODE && args->flags & DRM_XE_VM_CREATE_FAULT_MODE)) return -EINVAL; - if (XE_IOCTL_ERR(xe, args->flags & DRM_XE_VM_CREATE_FAULT_MODE && + if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FAULT_MODE && xe_device_in_non_fault_mode(xe))) return -EINVAL; - if (XE_IOCTL_ERR(xe, !(args->flags & DRM_XE_VM_CREATE_FAULT_MODE) && + if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FAULT_MODE) && xe_device_in_fault_mode(xe))) return -EINVAL; - if (XE_IOCTL_ERR(xe, args->flags & DRM_XE_VM_CREATE_FAULT_MODE && + if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FAULT_MODE && !xe->info.supports_usm)) return -EINVAL; @@ -2014,7 +2014,7 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, if (args->extensions) { err = vm_user_extensions(xe, vm, args->extensions, 0); - if (XE_IOCTL_ERR(xe, err)) { + if (XE_IOCTL_DBG(xe, err)) { xe_vm_close_and_put(vm); return err; } @@ -2060,15 +2060,15 @@ int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, struct xe_vm *vm; int err = 0; - if (XE_IOCTL_ERR(xe, args->pad) || - XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1])) + if (XE_IOCTL_DBG(xe, args->pad) || + XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) return -EINVAL; mutex_lock(&xef->vm.lock); vm = xa_load(&xef->vm.xa, args->vm_id); - if (XE_IOCTL_ERR(xe, !vm)) + if (XE_IOCTL_DBG(xe, !vm)) err = -ENOENT; - else if (XE_IOCTL_ERR(xe, vm->preempt.num_engines)) + else if (XE_IOCTL_DBG(xe, vm->preempt.num_engines)) err = -EBUSY; else xa_erase(&xef->vm.xa, args->vm_id); @@ -2156,21 +2156,21 @@ static int vm_bind_ioctl_lookup_vma(struct xe_vm *vm, struct xe_bo *bo, case XE_VM_BIND_OP_MAP: case XE_VM_BIND_OP_MAP_USERPTR: vma = xe_vm_find_overlapping_vma(vm, addr, range); - if (XE_IOCTL_ERR(xe, vma && !async)) + if (XE_IOCTL_DBG(xe, vma && !async)) return -EBUSY; break; case XE_VM_BIND_OP_UNMAP: case XE_VM_BIND_OP_PREFETCH: vma = xe_vm_find_overlapping_vma(vm, addr, range); - if (XE_IOCTL_ERR(xe, !vma)) + if (XE_IOCTL_DBG(xe, !vma)) return -ENODATA; /* Not an actual error, IOCTL cleans up returns and 0 */ - if (XE_IOCTL_ERR(xe, (xe_vma_start(vma) != addr || - xe_vma_end(vma) != addr + range) && !async)) + if (XE_IOCTL_DBG(xe, (xe_vma_start(vma) != addr || + xe_vma_end(vma) != addr + range) && !async)) return -EINVAL; break; case XE_VM_BIND_OP_UNMAP_ALL: - if (XE_IOCTL_ERR(xe, list_empty(&bo->ttm.base.gpuva.list))) + if (XE_IOCTL_DBG(xe, list_empty(&bo->ttm.base.gpuva.list))) return -ENODATA; /* Not an actual error, IOCTL cleans up returns and 0 */ break; @@ -3007,9 +3007,9 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, int err; int i; - if (XE_IOCTL_ERR(xe, args->extensions) || - XE_IOCTL_ERR(xe, !args->num_binds) || - XE_IOCTL_ERR(xe, args->num_binds > MAX_BINDS)) + if (XE_IOCTL_DBG(xe, args->extensions) || + XE_IOCTL_DBG(xe, !args->num_binds) || + XE_IOCTL_DBG(xe, args->num_binds > MAX_BINDS)) return -EINVAL; if (args->num_binds > 1) { @@ -3024,7 +3024,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, err = __copy_from_user(*bind_ops, bind_user, sizeof(struct drm_xe_vm_bind_op) * args->num_binds); - if (XE_IOCTL_ERR(xe, err)) { + if (XE_IOCTL_DBG(xe, err)) { err = -EFAULT; goto free_bind_ops; } @@ -3043,60 +3043,60 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, if (i == 0) { *async = !!(op & XE_VM_BIND_FLAG_ASYNC); - } else if (XE_IOCTL_ERR(xe, !*async) || - XE_IOCTL_ERR(xe, !(op & XE_VM_BIND_FLAG_ASYNC)) || - XE_IOCTL_ERR(xe, VM_BIND_OP(op) == + } else if (XE_IOCTL_DBG(xe, !*async) || + XE_IOCTL_DBG(xe, !(op & XE_VM_BIND_FLAG_ASYNC)) || + XE_IOCTL_DBG(xe, VM_BIND_OP(op) == XE_VM_BIND_OP_RESTART)) { err = -EINVAL; goto free_bind_ops; } - if (XE_IOCTL_ERR(xe, !*async && + if (XE_IOCTL_DBG(xe, !*async && VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL)) { err = -EINVAL; goto free_bind_ops; } - if (XE_IOCTL_ERR(xe, !*async && + if (XE_IOCTL_DBG(xe, !*async && VM_BIND_OP(op) == XE_VM_BIND_OP_PREFETCH)) { err = -EINVAL; goto free_bind_ops; } - if (XE_IOCTL_ERR(xe, VM_BIND_OP(op) > + if (XE_IOCTL_DBG(xe, VM_BIND_OP(op) > XE_VM_BIND_OP_PREFETCH) || - XE_IOCTL_ERR(xe, op & ~SUPPORTED_FLAGS) || - XE_IOCTL_ERR(xe, obj && is_null) || - XE_IOCTL_ERR(xe, obj_offset && is_null) || - XE_IOCTL_ERR(xe, VM_BIND_OP(op) != XE_VM_BIND_OP_MAP && + XE_IOCTL_DBG(xe, op & ~SUPPORTED_FLAGS) || + XE_IOCTL_DBG(xe, obj && is_null) || + XE_IOCTL_DBG(xe, obj_offset && is_null) || + XE_IOCTL_DBG(xe, VM_BIND_OP(op) != XE_VM_BIND_OP_MAP && is_null) || - XE_IOCTL_ERR(xe, !obj && + XE_IOCTL_DBG(xe, !obj && VM_BIND_OP(op) == XE_VM_BIND_OP_MAP && !is_null) || - XE_IOCTL_ERR(xe, !obj && + XE_IOCTL_DBG(xe, !obj && VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL) || - XE_IOCTL_ERR(xe, addr && + XE_IOCTL_DBG(xe, addr && VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL) || - XE_IOCTL_ERR(xe, range && + XE_IOCTL_DBG(xe, range && VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL) || - XE_IOCTL_ERR(xe, obj && + XE_IOCTL_DBG(xe, obj && VM_BIND_OP(op) == XE_VM_BIND_OP_MAP_USERPTR) || - XE_IOCTL_ERR(xe, obj && + XE_IOCTL_DBG(xe, obj && VM_BIND_OP(op) == XE_VM_BIND_OP_PREFETCH) || - XE_IOCTL_ERR(xe, region && + XE_IOCTL_DBG(xe, region && VM_BIND_OP(op) != XE_VM_BIND_OP_PREFETCH) || - XE_IOCTL_ERR(xe, !(BIT(region) & + XE_IOCTL_DBG(xe, !(BIT(region) & xe->info.mem_region_mask)) || - XE_IOCTL_ERR(xe, obj && + XE_IOCTL_DBG(xe, obj && VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP)) { err = -EINVAL; goto free_bind_ops; } - if (XE_IOCTL_ERR(xe, obj_offset & ~PAGE_MASK) || - XE_IOCTL_ERR(xe, addr & ~PAGE_MASK) || - XE_IOCTL_ERR(xe, range & ~PAGE_MASK) || - XE_IOCTL_ERR(xe, !range && VM_BIND_OP(op) != + if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) || + XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) || + XE_IOCTL_DBG(xe, range & ~PAGE_MASK) || + XE_IOCTL_DBG(xe, !range && VM_BIND_OP(op) != XE_VM_BIND_OP_RESTART && VM_BIND_OP(op) != XE_VM_BIND_OP_UNMAP_ALL)) { err = -EINVAL; @@ -3136,19 +3136,19 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) if (args->engine_id) { e = xe_engine_lookup(xef, args->engine_id); - if (XE_IOCTL_ERR(xe, !e)) { + if (XE_IOCTL_DBG(xe, !e)) { err = -ENOENT; goto free_objs; } - if (XE_IOCTL_ERR(xe, !(e->flags & ENGINE_FLAG_VM))) { + if (XE_IOCTL_DBG(xe, !(e->flags & ENGINE_FLAG_VM))) { err = -EINVAL; goto put_engine; } } vm = xe_vm_lookup(xef, args->vm_id); - if (XE_IOCTL_ERR(xe, !vm)) { + if (XE_IOCTL_DBG(xe, !vm)) { err = -EINVAL; goto put_engine; } @@ -3157,17 +3157,17 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) if (err) goto put_vm; - if (XE_IOCTL_ERR(xe, xe_vm_is_closed_or_banned(vm))) { + if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { err = -ENOENT; goto release_vm_lock; } if (VM_BIND_OP(bind_ops[0].op) == XE_VM_BIND_OP_RESTART) { - if (XE_IOCTL_ERR(xe, !(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS))) + if (XE_IOCTL_DBG(xe, !(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS))) err = -EOPNOTSUPP; - if (XE_IOCTL_ERR(xe, !err && args->num_syncs)) + if (XE_IOCTL_DBG(xe, !err && args->num_syncs)) err = EINVAL; - if (XE_IOCTL_ERR(xe, !err && !vm->async_ops.error)) + if (XE_IOCTL_DBG(xe, !err && !vm->async_ops.error)) err = -EPROTO; if (!err) { @@ -3184,7 +3184,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) goto release_vm_lock; } - if (XE_IOCTL_ERR(xe, !vm->async_ops.error && + if (XE_IOCTL_DBG(xe, !vm->async_ops.error && async != !!(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS))) { err = -EOPNOTSUPP; goto release_vm_lock; @@ -3194,8 +3194,8 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) u64 range = bind_ops[i].range; u64 addr = bind_ops[i].addr; - if (XE_IOCTL_ERR(xe, range > vm->size) || - XE_IOCTL_ERR(xe, addr > vm->size - range)) { + if (XE_IOCTL_DBG(xe, range > vm->size) || + XE_IOCTL_DBG(xe, addr > vm->size - range)) { err = -EINVAL; goto release_vm_lock; } @@ -3203,7 +3203,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) if (bind_ops[i].tile_mask) { u64 valid_tiles = BIT(xe->info.tile_count) - 1; - if (XE_IOCTL_ERR(xe, bind_ops[i].tile_mask & + if (XE_IOCTL_DBG(xe, bind_ops[i].tile_mask & ~valid_tiles)) { err = -EINVAL; goto release_vm_lock; @@ -3234,24 +3234,24 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) continue; gem_obj = drm_gem_object_lookup(file, obj); - if (XE_IOCTL_ERR(xe, !gem_obj)) { + if (XE_IOCTL_DBG(xe, !gem_obj)) { err = -ENOENT; goto put_obj; } bos[i] = gem_to_xe_bo(gem_obj); - if (XE_IOCTL_ERR(xe, range > bos[i]->size) || - XE_IOCTL_ERR(xe, obj_offset > + if (XE_IOCTL_DBG(xe, range > bos[i]->size) || + XE_IOCTL_DBG(xe, obj_offset > bos[i]->size - range)) { err = -EINVAL; goto put_obj; } if (bos[i]->flags & XE_BO_INTERNAL_64K) { - if (XE_IOCTL_ERR(xe, obj_offset & + if (XE_IOCTL_DBG(xe, obj_offset & XE_64K_PAGE_MASK) || - XE_IOCTL_ERR(xe, addr & XE_64K_PAGE_MASK) || - XE_IOCTL_ERR(xe, range & XE_64K_PAGE_MASK)) { + XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) || + XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) { err = -EINVAL; goto put_obj; } diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c index 5b775f742233..9abcd742c833 100644 --- a/drivers/gpu/drm/xe/xe_vm_madvise.c +++ b/drivers/gpu/drm/xe/xe_vm_madvise.c @@ -19,10 +19,10 @@ static int madvise_preferred_mem_class(struct xe_device *xe, struct xe_vm *vm, { int i, err; - if (XE_IOCTL_ERR(xe, value > XE_MEM_REGION_CLASS_VRAM)) + if (XE_IOCTL_DBG(xe, value > XE_MEM_REGION_CLASS_VRAM)) return -EINVAL; - if (XE_IOCTL_ERR(xe, value == XE_MEM_REGION_CLASS_VRAM && + if (XE_IOCTL_DBG(xe, value == XE_MEM_REGION_CLASS_VRAM && !xe->info.is_dgfx)) return -EINVAL; @@ -48,7 +48,7 @@ static int madvise_preferred_gt(struct xe_device *xe, struct xe_vm *vm, { int i, err; - if (XE_IOCTL_ERR(xe, value > xe->info.tile_count)) + if (XE_IOCTL_DBG(xe, value > xe->info.tile_count)) return -EINVAL; for (i = 0; i < num_vmas; ++i) { @@ -77,14 +77,14 @@ static int madvise_preferred_mem_class_gt(struct xe_device *xe, u32 gt_id = upper_32_bits(value); u32 mem_class = lower_32_bits(value); - if (XE_IOCTL_ERR(xe, mem_class > XE_MEM_REGION_CLASS_VRAM)) + if (XE_IOCTL_DBG(xe, mem_class > XE_MEM_REGION_CLASS_VRAM)) return -EINVAL; - if (XE_IOCTL_ERR(xe, mem_class == XE_MEM_REGION_CLASS_VRAM && + if (XE_IOCTL_DBG(xe, mem_class == XE_MEM_REGION_CLASS_VRAM && !xe->info.is_dgfx)) return -EINVAL; - if (XE_IOCTL_ERR(xe, gt_id > xe->info.tile_count)) + if (XE_IOCTL_DBG(xe, gt_id > xe->info.tile_count)) return -EINVAL; for (i = 0; i < num_vmas; ++i) { @@ -115,7 +115,7 @@ static int madvise_cpu_atomic(struct xe_device *xe, struct xe_vm *vm, struct ww_acquire_ctx ww; bo = xe_vma_bo(vmas[i]); - if (XE_IOCTL_ERR(xe, !(bo->flags & XE_BO_CREATE_SYSTEM_BIT))) + if (XE_IOCTL_DBG(xe, !(bo->flags & XE_BO_CREATE_SYSTEM_BIT))) return -EINVAL; err = xe_bo_lock(bo, &ww, 0, true); @@ -146,7 +146,7 @@ static int madvise_device_atomic(struct xe_device *xe, struct xe_vm *vm, struct ww_acquire_ctx ww; bo = xe_vma_bo(vmas[i]); - if (XE_IOCTL_ERR(xe, !(bo->flags & XE_BO_CREATE_VRAM0_BIT) && + if (XE_IOCTL_DBG(xe, !(bo->flags & XE_BO_CREATE_VRAM0_BIT) && !(bo->flags & XE_BO_CREATE_VRAM1_BIT))) return -EINVAL; @@ -165,10 +165,10 @@ static int madvise_priority(struct xe_device *xe, struct xe_vm *vm, { int i, err; - if (XE_IOCTL_ERR(xe, value > DRM_XE_VMA_PRIORITY_HIGH)) + if (XE_IOCTL_DBG(xe, value > DRM_XE_VMA_PRIORITY_HIGH)) return -EINVAL; - if (XE_IOCTL_ERR(xe, value == DRM_XE_VMA_PRIORITY_HIGH && + if (XE_IOCTL_DBG(xe, value == DRM_XE_VMA_PRIORITY_HIGH && !capable(CAP_SYS_NICE))) return -EPERM; @@ -255,40 +255,40 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct xe_vma **vmas = NULL; int num_vmas = 0, err = 0, idx; - if (XE_IOCTL_ERR(xe, args->extensions) || - XE_IOCTL_ERR(xe, args->pad || args->pad2) || - XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1])) + if (XE_IOCTL_DBG(xe, args->extensions) || + XE_IOCTL_DBG(xe, args->pad || args->pad2) || + XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) return -EINVAL; - if (XE_IOCTL_ERR(xe, args->property > ARRAY_SIZE(madvise_funcs))) + if (XE_IOCTL_DBG(xe, args->property > ARRAY_SIZE(madvise_funcs))) return -EINVAL; vm = xe_vm_lookup(xef, args->vm_id); - if (XE_IOCTL_ERR(xe, !vm)) + if (XE_IOCTL_DBG(xe, !vm)) return -EINVAL; - if (XE_IOCTL_ERR(xe, !xe_vm_in_fault_mode(vm))) { + if (XE_IOCTL_DBG(xe, !xe_vm_in_fault_mode(vm))) { err = -EINVAL; goto put_vm; } down_read(&vm->lock); - if (XE_IOCTL_ERR(xe, xe_vm_is_closed_or_banned(vm))) { + if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { err = -ENOENT; goto unlock_vm; } vmas = get_vmas(vm, &num_vmas, args->addr, args->range); - if (XE_IOCTL_ERR(xe, err)) + if (XE_IOCTL_DBG(xe, err)) goto unlock_vm; - if (XE_IOCTL_ERR(xe, !vmas)) { + if (XE_IOCTL_DBG(xe, !vmas)) { err = -ENOMEM; goto unlock_vm; } - if (XE_IOCTL_ERR(xe, !num_vmas)) { + if (XE_IOCTL_DBG(xe, !num_vmas)) { err = -EINVAL; goto unlock_vm; } diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c index c4420c0dbf9c..c4202df1d4f0 100644 --- a/drivers/gpu/drm/xe/xe_wait_user_fence.c +++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c @@ -117,51 +117,51 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data, unsigned long timeout; ktime_t start; - if (XE_IOCTL_ERR(xe, args->extensions) || XE_IOCTL_ERR(xe, args->pad) || - XE_IOCTL_ERR(xe, args->reserved[0] || args->reserved[1])) + if (XE_IOCTL_DBG(xe, args->extensions) || XE_IOCTL_DBG(xe, args->pad) || + XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) return -EINVAL; - if (XE_IOCTL_ERR(xe, args->flags & ~VALID_FLAGS)) + if (XE_IOCTL_DBG(xe, args->flags & ~VALID_FLAGS)) return -EINVAL; - if (XE_IOCTL_ERR(xe, args->op > MAX_OP)) + if (XE_IOCTL_DBG(xe, args->op > MAX_OP)) return -EINVAL; - if (XE_IOCTL_ERR(xe, no_engines && + if (XE_IOCTL_DBG(xe, no_engines && (args->num_engines || args->instances))) return -EINVAL; - if (XE_IOCTL_ERR(xe, !no_engines && !args->num_engines)) + if (XE_IOCTL_DBG(xe, !no_engines && !args->num_engines)) return -EINVAL; - if (XE_IOCTL_ERR(xe, !(args->flags & DRM_XE_UFENCE_WAIT_VM_ERROR) && + if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_UFENCE_WAIT_VM_ERROR) && addr & 0x7)) return -EINVAL; - if (XE_IOCTL_ERR(xe, args->num_engines > XE_HW_ENGINE_MAX_INSTANCE)) + if (XE_IOCTL_DBG(xe, args->num_engines > XE_HW_ENGINE_MAX_INSTANCE)) return -EINVAL; if (!no_engines) { err = copy_from_user(eci, user_eci, sizeof(struct drm_xe_engine_class_instance) * args->num_engines); - if (XE_IOCTL_ERR(xe, err)) + if (XE_IOCTL_DBG(xe, err)) return -EFAULT; - if (XE_IOCTL_ERR(xe, check_hw_engines(xe, eci, + if (XE_IOCTL_DBG(xe, check_hw_engines(xe, eci, args->num_engines))) return -EINVAL; } if (args->flags & DRM_XE_UFENCE_WAIT_VM_ERROR) { - if (XE_IOCTL_ERR(xe, args->vm_id >> 32)) + if (XE_IOCTL_DBG(xe, args->vm_id >> 32)) return -EINVAL; vm = xe_vm_lookup(to_xe_file(file), args->vm_id); - if (XE_IOCTL_ERR(xe, !vm)) + if (XE_IOCTL_DBG(xe, !vm)) return -ENOENT; - if (XE_IOCTL_ERR(xe, !vm->async_ops.error_capture.addr)) { + if (XE_IOCTL_DBG(xe, !vm->async_ops.error_capture.addr)) { xe_vm_put(vm); return -EOPNOTSUPP; } @@ -226,9 +226,9 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data, args->timeout = 0; } - if (XE_IOCTL_ERR(xe, err < 0)) + if (XE_IOCTL_DBG(xe, err < 0)) return err; - else if (XE_IOCTL_ERR(xe, !timeout)) + else if (XE_IOCTL_DBG(xe, !timeout)) return -ETIME; return 0; -- cgit v1.2.3 From 3e8e7ee6a375217c4f6a9a96d50e3ae711832d37 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Mon, 17 Jul 2023 16:53:55 +0200 Subject: drm/xe: Cleanup style warnings Reduce the number of warnings reported by checkpatch.pl from 118 to 48 by addressing those warnings types: LEADING_SPACE LINE_SPACING BRACES TRAILING_SEMICOLON CONSTANT_COMPARISON BLOCK_COMMENT_STYLE RETURN_VOID ONE_SEMICOLON SUSPECT_CODE_INDENT LINE_CONTINUATIONS UNNECESSARY_ELSE UNSPECIFIED_INT UNNECESSARY_INT MISORDERED_TYPE Signed-off-by: Francois Dugast Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 2 +- drivers/gpu/drm/xe/xe_exec.c | 1 + drivers/gpu/drm/xe/xe_execlist.h | 2 +- drivers/gpu/drm/xe/xe_gt.h | 2 +- drivers/gpu/drm/xe/xe_guc.c | 2 +- drivers/gpu/drm/xe/xe_guc_ads.c | 2 +- drivers/gpu/drm/xe/xe_guc_ct.c | 3 +- drivers/gpu/drm/xe/xe_guc_fwif.h | 12 ++++-- drivers/gpu/drm/xe/xe_guc_submit.c | 2 +- drivers/gpu/drm/xe/xe_huc.c | 1 + drivers/gpu/drm/xe/xe_irq.c | 11 +++-- drivers/gpu/drm/xe/xe_lrc.c | 80 +++++++++++++++++------------------ drivers/gpu/drm/xe/xe_migrate.c | 3 +- drivers/gpu/drm/xe/xe_pcode.c | 1 + drivers/gpu/drm/xe/xe_reg_whitelist.c | 2 +- drivers/gpu/drm/xe/xe_res_cursor.h | 1 - drivers/gpu/drm/xe/xe_sa.c | 2 +- drivers/gpu/drm/xe/xe_uc_fw.c | 4 +- drivers/gpu/drm/xe/xe_vm.c | 8 ++-- drivers/gpu/drm/xe/xe_vm_doc.h | 4 +- 20 files changed, 75 insertions(+), 70 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 1031cb69219d..9ad5cf3e2463 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1720,7 +1720,7 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, struct ww_acquire_ctx ww; struct xe_vm *vm = NULL; struct xe_bo *bo; - unsigned bo_flags = XE_BO_CREATE_USER_BIT; + unsigned int bo_flags = XE_BO_CREATE_USER_BIT; u32 handle; int err; diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index ff9fa02b5395..0209f325dda0 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -243,6 +243,7 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS) { for (i = 0; i < args->num_syncs; i++) { struct dma_fence *fence = syncs[i].fence; + if (fence) { err = xe_vm_async_fence_wait_start(fence); if (err) diff --git a/drivers/gpu/drm/xe/xe_execlist.h b/drivers/gpu/drm/xe/xe_execlist.h index 6a0442a6eff6..26f600ac8552 100644 --- a/drivers/gpu/drm/xe/xe_execlist.h +++ b/drivers/gpu/drm/xe/xe_execlist.h @@ -11,7 +11,7 @@ struct xe_device; struct xe_gt; -#define xe_execlist_port_assert_held(port) lockdep_assert_held(&(port)->lock); +#define xe_execlist_port_assert_held(port) lockdep_assert_held(&(port)->lock) int xe_execlist_init(struct xe_gt *gt); struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe, diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h index a523d7941afe..7298653a73de 100644 --- a/drivers/gpu/drm/xe/xe_gt.h +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -13,7 +13,7 @@ #define for_each_hw_engine(hwe__, gt__, id__) \ for ((id__) = 0; (id__) < ARRAY_SIZE((gt__)->hw_engines); (id__)++) \ - for_each_if(((hwe__) = (gt__)->hw_engines + (id__)) && \ + for_each_if(((hwe__) = (gt__)->hw_engines + (id__)) && \ xe_hw_engine_is_valid((hwe__))) struct xe_gt *xe_gt_alloc(struct xe_tile *tile); diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index ce8b35dcbc51..d44537abf7da 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -186,7 +186,7 @@ static void guc_init_params(struct xe_guc *guc) int i; BUILD_BUG_ON(sizeof(guc->params) != GUC_CTL_MAX_DWORDS * sizeof(u32)); - BUILD_BUG_ON(SOFT_SCRATCH_COUNT != GUC_CTL_MAX_DWORDS + 2); + BUILD_BUG_ON(GUC_CTL_MAX_DWORDS + 2 != SOFT_SCRATCH_COUNT); params[GUC_CTL_LOG_PARAMS] = guc_ctl_log_params_flags(guc); params[GUC_CTL_FEATURE] = guc_ctl_feature_flags(guc); diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index dd69d097b920..d4c3a5ce3252 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -444,7 +444,7 @@ static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads, xe_gt_any_hw_engine_by_reset_domain(hwe->gt, XE_ENGINE_CLASS_RENDER); struct xe_reg_sr_entry *entry; unsigned long idx; - unsigned count = 0; + unsigned int count = 0; const struct { struct xe_reg reg; bool skip; diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 9fb5fd4391d2..c7992a8667e5 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -716,9 +716,8 @@ retry_same_fence: ptr = xa_store(&ct->fence_lookup, g2h_fence.seqno, &g2h_fence, GFP_KERNEL); - if (IS_ERR(ptr)) { + if (IS_ERR(ptr)) return PTR_ERR(ptr); - } goto retry_same_fence; } else if (unlikely(ret)) { diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h index e215e8b2c17a..7515d7fbb723 100644 --- a/drivers/gpu/drm/xe/xe_guc_fwif.h +++ b/drivers/gpu/drm/xe/xe_guc_fwif.h @@ -140,16 +140,20 @@ struct guc_update_engine_policy { struct guc_policies { u32 submission_queue_depth[GUC_MAX_ENGINE_CLASSES]; - /* In micro seconds. How much time to allow before DPC processing is + /* + * In micro seconds. How much time to allow before DPC processing is * called back via interrupt (to prevent DPC queue drain starving). - * Typically 1000s of micro seconds (example only, not granularity). */ + * Typically 1000s of micro seconds (example only, not granularity). + */ u32 dpc_promote_time; /* Must be set to take these new values. */ u32 is_valid; - /* Max number of WIs to process per call. A large value may keep CS - * idle. */ + /* + * Max number of WIs to process per call. A large value may keep CS + * idle. + */ u32 max_num_work_items; u32 global_flags; diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 0c07cd4ad204..99c9b7139195 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -330,7 +330,7 @@ static void __guc_engine_policy_add_##func(struct engine_policy *policy, \ u32 data) \ { \ XE_BUG_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ - \ +\ policy->h2g.klv[policy->count].kl = \ FIELD_PREP(GUC_KLV_0_KEY, \ GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c index e0377083d1f2..373a65c77946 100644 --- a/drivers/gpu/drm/xe/xe_huc.c +++ b/drivers/gpu/drm/xe/xe_huc.c @@ -68,6 +68,7 @@ int xe_huc_auth(struct xe_huc *huc) struct xe_gt *gt = huc_to_gt(huc); struct xe_guc *guc = huc_to_guc(huc); int ret; + if (xe_uc_fw_is_disabled(&huc->fw)) return 0; diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index d92f03870e59..ca6353243326 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -250,7 +250,7 @@ static struct xe_gt *pick_engine_gt(struct xe_tile *tile, } static void gt_irq_handler(struct xe_tile *tile, - u32 master_ctl, long unsigned int *intr_dw, + u32 master_ctl, unsigned long *intr_dw, u32 *identity) { struct xe_device *xe = tile_to_xe(tile); @@ -305,7 +305,7 @@ static irqreturn_t xelp_irq_handler(int irq, void *arg) struct xe_device *xe = arg; struct xe_tile *tile = xe_device_get_root_tile(xe); u32 master_ctl, gu_misc_iir; - long unsigned int intr_dw[2]; + unsigned long intr_dw[2]; u32 identity[32]; master_ctl = xelp_intr_disable(xe); @@ -360,7 +360,7 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg) struct xe_device *xe = arg; struct xe_tile *tile; u32 master_tile_ctl, master_ctl = 0, gu_misc_iir = 0; - long unsigned int intr_dw[2]; + unsigned long intr_dw[2]; u32 identity[32]; u8 id; @@ -502,11 +502,10 @@ static void xe_irq_postinstall(struct xe_device *xe) static irq_handler_t xe_irq_handler(struct xe_device *xe) { - if (GRAPHICS_VERx100(xe) >= 1210) { + if (GRAPHICS_VERx100(xe) >= 1210) return dg1_irq_handler; - } else { + else return xelp_irq_handler; - } } static void irq_uninstall(struct drm_device *drm, void *arg) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index d5f782f8d2a6..b726599f6228 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -374,46 +374,46 @@ static const u8 dg2_rcs_offsets[] = { }; static const u8 mtl_rcs_offsets[] = { - NOP(1), - LRI(15, POSTED), - REG16(0x244), - REG(0x034), - REG(0x030), - REG(0x038), - REG(0x03c), - REG(0x168), - REG(0x140), - REG(0x110), - REG(0x1c0), - REG(0x1c4), - REG(0x1c8), - REG(0x180), - REG16(0x2b4), - REG(0x120), - REG(0x124), - - NOP(1), - LRI(9, POSTED), - REG16(0x3a8), - REG16(0x28c), - REG16(0x288), - REG16(0x284), - REG16(0x280), - REG16(0x27c), - REG16(0x278), - REG16(0x274), - REG16(0x270), - - NOP(2), - LRI(2, POSTED), - REG16(0x5a8), - REG16(0x5ac), - - NOP(6), - LRI(1, 0), - REG(0x0c8), - - END + NOP(1), + LRI(15, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + REG16(0x2b4), + REG(0x120), + REG(0x124), + + NOP(1), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + NOP(2), + LRI(2, POSTED), + REG16(0x5a8), + REG16(0x5ac), + + NOP(6), + LRI(1, 0), + REG(0x0c8), + + END }; #undef END diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index f05335b16a1a..0c233380d4f2 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -511,7 +511,7 @@ static void emit_copy_ccs(struct xe_gt *gt, struct xe_bb *bb, #define EMIT_COPY_DW 10 static void emit_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, u64 dst_ofs, unsigned int size, - unsigned pitch) + unsigned int pitch) { XE_BUG_ON(size / pitch > S16_MAX); XE_BUG_ON(pitch / 4 > S16_MAX); @@ -1012,6 +1012,7 @@ static void write_pgtable(struct xe_tile *tile, struct xe_bb *bb, u64 ppgtt_ofs, do { u64 addr = ppgtt_ofs + ofs * 8; + chunk = min(update->qwords, 0x1ffU); /* Ensure populatefn can do memset64 by aligning bb->cs */ diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c index 7ab70a83f88d..e3ab1d3a367f 100644 --- a/drivers/gpu/drm/xe/xe_pcode.c +++ b/drivers/gpu/drm/xe/xe_pcode.c @@ -58,6 +58,7 @@ static int pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1, bool atomic) { int err; + lockdep_assert_held(>->pcode.lock); if ((xe_mmio_read32(gt, PCODE_MAILBOX) & PCODE_READY) != 0) diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c index 70892f134718..ea6dd7d71b59 100644 --- a/drivers/gpu/drm/xe/xe_reg_whitelist.c +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c @@ -82,7 +82,7 @@ void xe_reg_whitelist_print_entry(struct drm_printer *p, unsigned int indent, { u32 val = entry->set_bits; const char *access_str = "(invalid)"; - unsigned range_bit = 2; + unsigned int range_bit = 2; u32 range_start, range_end; bool deny; diff --git a/drivers/gpu/drm/xe/xe_res_cursor.h b/drivers/gpu/drm/xe/xe_res_cursor.h index f2ba609712d3..2a6fdd284395 100644 --- a/drivers/gpu/drm/xe/xe_res_cursor.h +++ b/drivers/gpu/drm/xe/xe_res_cursor.h @@ -130,7 +130,6 @@ fallback: cur->node = NULL; cur->mem_type = XE_PL_TT; XE_WARN_ON(res && start + size > res->size); - return; } static inline void __xe_res_sg_next(struct xe_res_cursor *cur) diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c index fee71080bd31..2c4632259edd 100644 --- a/drivers/gpu/drm/xe/xe_sa.c +++ b/drivers/gpu/drm/xe/xe_sa.c @@ -81,7 +81,7 @@ struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 } struct drm_suballoc *xe_sa_bo_new(struct xe_sa_manager *sa_manager, - unsigned size) + unsigned int size) { return drm_suballoc_new(&sa_manager->base, size, GFP_KERNEL, true, 0); } diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 84df4ce45e03..75f7a4cf6cbe 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -147,9 +147,9 @@ struct fw_blobs_by_type { entry__, \ }, -XE_GUC_FIRMWARE_DEFS(XE_UC_MODULE_FIRMWARE, \ +XE_GUC_FIRMWARE_DEFS(XE_UC_MODULE_FIRMWARE, fw_filename_mmp_ver, fw_filename_major_ver) -XE_HUC_FIRMWARE_DEFS(XE_UC_MODULE_FIRMWARE, \ +XE_HUC_FIRMWARE_DEFS(XE_UC_MODULE_FIRMWARE, fw_filename_mmp_ver, fw_filename_no_ver) static struct xe_gt * diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 7f2f17c3b86e..2b9a7618b169 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2163,16 +2163,16 @@ static int vm_bind_ioctl_lookup_vma(struct xe_vm *vm, struct xe_bo *bo, case XE_VM_BIND_OP_PREFETCH: vma = xe_vm_find_overlapping_vma(vm, addr, range); if (XE_IOCTL_DBG(xe, !vma)) - return -ENODATA; /* Not an actual error, IOCTL - cleans up returns and 0 */ + /* Not an actual error, IOCTL cleans up returns and 0 */ + return -ENODATA; if (XE_IOCTL_DBG(xe, (xe_vma_start(vma) != addr || xe_vma_end(vma) != addr + range) && !async)) return -EINVAL; break; case XE_VM_BIND_OP_UNMAP_ALL: if (XE_IOCTL_DBG(xe, list_empty(&bo->ttm.base.gpuva.list))) - return -ENODATA; /* Not an actual error, IOCTL - cleans up returns and 0 */ + /* Not an actual error, IOCTL cleans up returns and 0 */ + return -ENODATA; break; default: XE_BUG_ON("NOT POSSIBLE"); diff --git a/drivers/gpu/drm/xe/xe_vm_doc.h b/drivers/gpu/drm/xe/xe_vm_doc.h index 5b6216964c45..b1b2dc4a6089 100644 --- a/drivers/gpu/drm/xe/xe_vm_doc.h +++ b/drivers/gpu/drm/xe/xe_vm_doc.h @@ -428,8 +428,8 @@ * the list of userptrs mapped in the VM, the list of engines using this VM, and * the array of external BOs mapped in the VM. When adding or removing any of the * aforemented state from the VM should acquire this lock in write mode. The VM - * bind path also acquires this lock in write while while the exec / compute - * mode rebind worker acquire this lock in read mode. + * bind path also acquires this lock in write while the exec / compute mode + * rebind worker acquire this lock in read mode. * * VM dma-resv lock (vm->ttm.base.resv->lock) - WW lock. Protects VM dma-resv * slots which is shared with any private BO in the VM. Expected to be acquired -- cgit v1.2.3 From 1737785ae5313e4941181025858fc90ed4acd314 Mon Sep 17 00:00:00 2001 From: Riana Tauro Date: Mon, 17 Jul 2023 15:29:00 +0530 Subject: drm/xe: remove gucrc disable from suspend path Currently GuCRC is disabled in suspend path for xe. Rc6 is a prerequiste to enable s0ix and should not be disabled for s2idle. There is no requirement to disable GuCRC for S3+. Remove it from xe_guc_pc_stop, thus removing from suspend path. Retain the call in other places where xe_guc_pc_stop is called. v2: add description and return statement to kernel-doc (Rodrigo) v3: update commit message (Rodrigo) v4: add mem_access_get to the gucrc disable function Signed-off-by: Riana Tauro Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt.c | 1 + drivers/gpu/drm/xe/xe_guc_pc.c | 20 +++++++++++++------- drivers/gpu/drm/xe/xe_guc_pc.h | 1 + drivers/gpu/drm/xe/xe_uc.c | 5 +++++ drivers/gpu/drm/xe/xe_uc.h | 1 + 5 files changed, 21 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index a21d44bfe9e8..b31ef2a8ff17 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -517,6 +517,7 @@ static int gt_reset(struct xe_gt *gt) if (err) goto err_msg; + xe_uc_gucrc_disable(>->uc); xe_uc_stop_prepare(>->uc); xe_gt_pagefault_reset(gt); diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 3093cfeff0c2..03dfbde29fe5 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -730,12 +730,20 @@ static int pc_adjust_requested_freq(struct xe_guc_pc *pc) return ret; } -static int pc_gucrc_disable(struct xe_guc_pc *pc) +/** + * xe_guc_pc_gucrc_disable - Disable GuC RC + * @pc: Xe_GuC_PC instance + * + * Disables GuC RC by taking control of RC6 back from GuC. + * + * Return: 0 on success, negative error code on error. + */ +int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc) { struct xe_gt *gt = pc_to_gt(pc); int ret; - xe_device_assert_mem_access(pc_to_xe(pc)); + xe_device_mem_access_get(pc_to_xe(pc)); ret = pc_action_setup_gucrc(pc, XE_GUCRC_HOST_CONTROL); if (ret) @@ -750,6 +758,7 @@ static int pc_gucrc_disable(struct xe_guc_pc *pc) xe_mmio_write32(gt, RC_STATE, 0); XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + xe_device_mem_access_put(pc_to_xe(pc)); return 0; } @@ -827,7 +836,7 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) goto out; if (xe->info.platform == XE_PVC) { - pc_gucrc_disable(pc); + xe_guc_pc_gucrc_disable(pc); ret = 0; goto out; } @@ -850,10 +859,6 @@ int xe_guc_pc_stop(struct xe_guc_pc *pc) xe_device_mem_access_get(pc_to_xe(pc)); - ret = pc_gucrc_disable(pc); - if (ret) - goto out; - mutex_lock(&pc->freq_lock); pc->freq_ready = false; mutex_unlock(&pc->freq_lock); @@ -876,6 +881,7 @@ static void pc_fini(struct drm_device *drm, void *arg) { struct xe_guc_pc *pc = arg; + XE_WARN_ON(xe_guc_pc_gucrc_disable(pc)); XE_WARN_ON(xe_guc_pc_stop(pc)); sysfs_remove_files(pc_to_gt(pc)->sysfs, pc_attrs); xe_bo_unpin_map_no_vm(pc->bo); diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h index 370353a40a17..81833a53b3c9 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.h +++ b/drivers/gpu/drm/xe/xe_guc_pc.h @@ -11,6 +11,7 @@ int xe_guc_pc_init(struct xe_guc_pc *pc); int xe_guc_pc_start(struct xe_guc_pc *pc); int xe_guc_pc_stop(struct xe_guc_pc *pc); +int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc); enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc); u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc); diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index e244d27b55d5..addd6f2681b9 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -176,6 +176,11 @@ int xe_uc_reset_prepare(struct xe_uc *uc) return xe_guc_reset_prepare(&uc->guc); } +void xe_uc_gucrc_disable(struct xe_uc *uc) +{ + XE_WARN_ON(xe_guc_pc_gucrc_disable(&uc->guc.pc)); +} + void xe_uc_stop_prepare(struct xe_uc *uc) { xe_guc_stop_prepare(&uc->guc); diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h index d6efc9ef00d3..42219b361df5 100644 --- a/drivers/gpu/drm/xe/xe_uc.h +++ b/drivers/gpu/drm/xe/xe_uc.h @@ -12,6 +12,7 @@ int xe_uc_init(struct xe_uc *uc); int xe_uc_init_hwconfig(struct xe_uc *uc); int xe_uc_init_post_hwconfig(struct xe_uc *uc); int xe_uc_init_hw(struct xe_uc *uc); +void xe_uc_gucrc_disable(struct xe_uc *uc); int xe_uc_reset_prepare(struct xe_uc *uc); void xe_uc_stop_prepare(struct xe_uc *uc); int xe_uc_stop(struct xe_uc *uc); -- cgit v1.2.3 From ac0be3b5b28ecf4890b3fc3ebaec18e7ce5fcc86 Mon Sep 17 00:00:00 2001 From: Anshuman Gupta Date: Tue, 18 Jul 2023 13:36:59 +0530 Subject: drm/xe/pm: Add pci d3cold_capable support Adding pci d3cold_capable check in order to initialize d3cold_allowed as false statically. It avoids vram save/restore latency during runtime suspend/resume v2: - Added else block to xe_pci_runtime_idle. [Rodrigo] Cc: Rodrigo Vivi Signed-off-by: Anshuman Gupta Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20230718080703.239343-2-anshuman.gupta@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device_types.h | 3 +++ drivers/gpu/drm/xe/xe_pci.c | 29 +++++++++++++++++------------ drivers/gpu/drm/xe/xe_pm.c | 22 ++++++++++++++++++++++ drivers/gpu/drm/xe/xe_pm.h | 1 + 4 files changed, 43 insertions(+), 12 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index fb2329ccce06..7a62c54939a9 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -327,6 +327,9 @@ struct xe_device { bool hold_rpm; } mem_access; + /** d3cold_capable: Indicates if root port is d3cold capable */ + bool d3cold_capable; + /** @d3cold_allowed: Indicates if d3cold is a valid device state */ bool d3cold_allowed; diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index e130ffe3ab55..4ff7be058e4e 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -665,6 +665,7 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_pci_disable; xe_pm_runtime_init(xe); + xe_pm_init(xe); return 0; @@ -777,18 +778,22 @@ static int xe_pci_runtime_idle(struct device *dev) struct pci_dev *pdev = to_pci_dev(dev); struct xe_device *xe = pdev_to_xe_device(pdev); - /* - * TODO: d3cold should be allowed (true) if - * (IS_DGFX(xe) && !xe_device_mem_access_ongoing(xe)) - * but maybe include some other conditions. So, before - * we can re-enable the D3cold, we need to: - * 1. rewrite the VRAM save / restore to avoid buffer object locks - * 2. block D3cold if we have a big amount of device memory in use - * in order to reduce the latency. - * 3. at resume, detect if we really lost power and avoid memory - * restoration if we were only up to d3cold - */ - xe->d3cold_allowed = false; + if (!xe->d3cold_capable) { + xe->d3cold_allowed = false; + } else { + /* + * TODO: d3cold should be allowed (true) if + * (IS_DGFX(xe) && !xe_device_mem_access_ongoing(xe)) + * but maybe include some other conditions. So, before + * we can re-enable the D3cold, we need to: + * 1. rewrite the VRAM save / restore to avoid buffer object locks + * 2. block D3cold if we have a big amount of device memory in use + * in order to reduce the latency. + * 3. at resume, detect if we really lost power and avoid memory + * restoration if we were only up to d3cold + */ + xe->d3cold_allowed = false; + } return 0; } diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index 20e9e522ab80..2f553dcd6139 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -106,6 +106,21 @@ int xe_pm_resume(struct xe_device *xe) return 0; } +static bool xe_pm_pci_d3cold_capable(struct pci_dev *pdev) +{ + struct pci_dev *root_pdev; + + root_pdev = pcie_find_root_port(pdev); + if (!root_pdev) + return false; + + /* D3Cold requires PME capability and _PR3 power resource */ + if (!pci_pme_capable(root_pdev, PCI_D3cold) || !pci_pr3_present(root_pdev)) + return false; + + return true; +} + void xe_pm_runtime_init(struct xe_device *xe) { struct device *dev = xe->drm.dev; @@ -118,6 +133,13 @@ void xe_pm_runtime_init(struct xe_device *xe) pm_runtime_put_autosuspend(dev); } +void xe_pm_init(struct xe_device *xe) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + + xe->d3cold_capable = xe_pm_pci_d3cold_capable(pdev); +} + void xe_pm_runtime_fini(struct xe_device *xe) { struct device *dev = xe->drm.dev; diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h index 8418ee6faac5..864cd0be014a 100644 --- a/drivers/gpu/drm/xe/xe_pm.h +++ b/drivers/gpu/drm/xe/xe_pm.h @@ -14,6 +14,7 @@ int xe_pm_suspend(struct xe_device *xe); int xe_pm_resume(struct xe_device *xe); void xe_pm_runtime_init(struct xe_device *xe); +void xe_pm_init(struct xe_device *xe); void xe_pm_runtime_fini(struct xe_device *xe); int xe_pm_runtime_suspend(struct xe_device *xe); int xe_pm_runtime_resume(struct xe_device *xe); -- cgit v1.2.3 From fddebcbf7a47d661f3eb475de0b75be11c7c3bb8 Mon Sep 17 00:00:00 2001 From: Anshuman Gupta Date: Tue, 18 Jul 2023 13:37:00 +0530 Subject: drm/xe/pm: Refactor xe_pm_runtime_init Wrap xe_pm_runtime_init inside xe_pm_init. Cc: Rodrigo Vivi Signed-off-by: Anshuman Gupta Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20230718080703.239343-3-anshuman.gupta@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pci.c | 1 - drivers/gpu/drm/xe/xe_pm.c | 3 ++- drivers/gpu/drm/xe/xe_pm.h | 1 - 3 files changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 4ff7be058e4e..bc894b3546bf 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -664,7 +664,6 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) goto err_pci_disable; - xe_pm_runtime_init(xe); xe_pm_init(xe); return 0; diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index 2f553dcd6139..03d71dcf2393 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -121,7 +121,7 @@ static bool xe_pm_pci_d3cold_capable(struct pci_dev *pdev) return true; } -void xe_pm_runtime_init(struct xe_device *xe) +static void xe_pm_runtime_init(struct xe_device *xe) { struct device *dev = xe->drm.dev; @@ -137,6 +137,7 @@ void xe_pm_init(struct xe_device *xe) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + xe_pm_runtime_init(xe); xe->d3cold_capable = xe_pm_pci_d3cold_capable(pdev); } diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h index 864cd0be014a..193e5d687353 100644 --- a/drivers/gpu/drm/xe/xe_pm.h +++ b/drivers/gpu/drm/xe/xe_pm.h @@ -13,7 +13,6 @@ struct xe_device; int xe_pm_suspend(struct xe_device *xe); int xe_pm_resume(struct xe_device *xe); -void xe_pm_runtime_init(struct xe_device *xe); void xe_pm_init(struct xe_device *xe); void xe_pm_runtime_fini(struct xe_device *xe); int xe_pm_runtime_suspend(struct xe_device *xe); -- cgit v1.2.3 From b2d756199be822f4de8dd18fe4e3a939e4a06e7a Mon Sep 17 00:00:00 2001 From: Anshuman Gupta Date: Tue, 18 Jul 2023 13:37:01 +0530 Subject: drm/xe/pm: Add vram_d3cold_threshold Sysfs Add per pci device vram_d3cold_threshold Sysfs to control the d3cold allowed knob. Adding a d3cold structure embedded in xe_device to encapsulate d3cold related stuff. v2: - Check total vram before initializing default threshold. [Riana] - Add static scope to vram_d3cold_threshold DEVICE_ATTR. [Riana] v3: - Fixed cosmetics review comment. [Riana] - Fixed CI Hook failures. - Used drmm_mutex_init(). v4: - Fixed kernel-doc warnings. v5: - Added doc explaining need for the device sysfs. [Rodrigo] - Removed TODO comment. Cc: Rodrigo Vivi Signed-off-by: Anshuman Gupta Reviewed-by: Riana Tauro Acked-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20230718080703.239343-4-anshuman.gupta@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/xe_device_sysfs.c | 89 ++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_device_sysfs.h | 13 ++++++ drivers/gpu/drm/xe/xe_device_types.h | 24 ++++++++-- drivers/gpu/drm/xe/xe_pci.c | 10 ++-- drivers/gpu/drm/xe/xe_pm.c | 37 +++++++++++++-- drivers/gpu/drm/xe/xe_pm.h | 3 ++ 7 files changed, 164 insertions(+), 13 deletions(-) create mode 100644 drivers/gpu/drm/xe/xe_device_sysfs.c create mode 100644 drivers/gpu/drm/xe/xe_device_sysfs.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 3ade82cf244e..e5fb874a7aaf 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -52,6 +52,7 @@ xe-y += xe_bb.o \ xe_debugfs.o \ xe_devcoredump.o \ xe_device.o \ + xe_device_sysfs.o \ xe_dma_buf.o \ xe_engine.o \ xe_exec.o \ diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.c b/drivers/gpu/drm/xe/xe_device_sysfs.c new file mode 100644 index 000000000000..99113a5a2b84 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_device_sysfs.c @@ -0,0 +1,89 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include +#include +#include + +#include + +#include "xe_device.h" +#include "xe_device_sysfs.h" +#include "xe_pm.h" + +/** + * DOC: Xe device sysfs + * Xe driver requires exposing certain tunable knobs controlled by user space for + * each graphics device. Considering this, we need to add sysfs attributes at device + * level granularity. + * These sysfs attributes will be available under pci device kobj directory. + * + * vram_d3cold_threshold - Report/change vram used threshold(in MB) below + * which vram save/restore is permissible during runtime D3cold entry/exit. + */ + +static ssize_t +vram_d3cold_threshold_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct xe_device *xe = pdev_to_xe_device(pdev); + int ret; + + if (!xe) + return -EINVAL; + + ret = sysfs_emit(buf, "%d\n", xe->d3cold.vram_threshold); + + return ret; +} + +static ssize_t +vram_d3cold_threshold_store(struct device *dev, struct device_attribute *attr, + const char *buff, size_t count) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct xe_device *xe = pdev_to_xe_device(pdev); + u32 vram_d3cold_threshold; + int ret; + + if (!xe) + return -EINVAL; + + ret = kstrtou32(buff, 0, &vram_d3cold_threshold); + if (ret) + return ret; + + drm_dbg(&xe->drm, "vram_d3cold_threshold: %u\n", vram_d3cold_threshold); + + ret = xe_pm_set_vram_threshold(xe, vram_d3cold_threshold); + + return ret ?: count; +} + +static DEVICE_ATTR_RW(vram_d3cold_threshold); + +static void xe_device_sysfs_fini(struct drm_device *drm, void *arg) +{ + struct xe_device *xe = arg; + + sysfs_remove_file(&xe->drm.dev->kobj, &dev_attr_vram_d3cold_threshold.attr); +} + +void xe_device_sysfs_init(struct xe_device *xe) +{ + struct device *dev = xe->drm.dev; + int ret; + + ret = sysfs_create_file(&dev->kobj, &dev_attr_vram_d3cold_threshold.attr); + if (ret) { + drm_warn(&xe->drm, "Failed to create sysfs file\n"); + return; + } + + ret = drmm_add_action_or_reset(&xe->drm, xe_device_sysfs_fini, xe); + if (ret) + drm_warn(&xe->drm, "Failed to add sysfs fini drm action\n"); +} diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.h b/drivers/gpu/drm/xe/xe_device_sysfs.h new file mode 100644 index 000000000000..38b240684bee --- /dev/null +++ b/drivers/gpu/drm/xe/xe_device_sysfs.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_DEVICE_SYSFS_H_ +#define _XE_DEVICE_SYSFS_H_ + +struct xe_device; + +void xe_device_sysfs_init(struct xe_device *xe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 7a62c54939a9..14b15ecc5617 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -327,11 +327,27 @@ struct xe_device { bool hold_rpm; } mem_access; - /** d3cold_capable: Indicates if root port is d3cold capable */ - bool d3cold_capable; + /** @d3cold: Encapsulate d3cold related stuff */ + struct { + /** capable: Indicates if root port is d3cold capable */ + bool capable; + + /** @allowed: Indicates if d3cold is a valid device state */ + bool allowed; - /** @d3cold_allowed: Indicates if d3cold is a valid device state */ - bool d3cold_allowed; + /** + * @vram_threshold: + * + * This represents the permissible threshold(in megabytes) + * for vram save/restore. d3cold will be disallowed, + * when vram_usages is above or equals the threshold value + * to avoid the vram save/restore latency. + * Default threshold value is 300mb. + */ + u32 vram_threshold; + /** @lock: protect vram_threshold */ + struct mutex lock; + } d3cold; /* For pcode */ struct mutex sb_lock; diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index bc894b3546bf..74aba4f09f7d 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -738,7 +738,7 @@ static int xe_pci_runtime_suspend(struct device *dev) pci_save_state(pdev); - if (xe->d3cold_allowed) { + if (xe->d3cold.allowed) { pci_disable_device(pdev); pci_ignore_hotplug(pdev); pci_set_power_state(pdev, PCI_D3cold); @@ -761,7 +761,7 @@ static int xe_pci_runtime_resume(struct device *dev) pci_restore_state(pdev); - if (xe->d3cold_allowed) { + if (xe->d3cold.allowed) { err = pci_enable_device(pdev); if (err) return err; @@ -777,8 +777,8 @@ static int xe_pci_runtime_idle(struct device *dev) struct pci_dev *pdev = to_pci_dev(dev); struct xe_device *xe = pdev_to_xe_device(pdev); - if (!xe->d3cold_capable) { - xe->d3cold_allowed = false; + if (!xe->d3cold.capable) { + xe->d3cold.allowed = false; } else { /* * TODO: d3cold should be allowed (true) if @@ -791,7 +791,7 @@ static int xe_pci_runtime_idle(struct device *dev) * 3. at resume, detect if we really lost power and avoid memory * restoration if we were only up to d3cold */ - xe->d3cold_allowed = false; + xe->d3cold.allowed = false; } return 0; diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index 03d71dcf2393..261c0ad57b63 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -7,11 +7,13 @@ #include +#include #include #include "xe_bo.h" #include "xe_bo_evict.h" #include "xe_device.h" +#include "xe_device_sysfs.h" #include "xe_ggtt.h" #include "xe_gt.h" #include "xe_irq.h" @@ -137,8 +139,11 @@ void xe_pm_init(struct xe_device *xe) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + drmm_mutex_init(&xe->drm, &xe->d3cold.lock); xe_pm_runtime_init(xe); - xe->d3cold_capable = xe_pm_pci_d3cold_capable(pdev); + xe->d3cold.capable = xe_pm_pci_d3cold_capable(pdev); + xe_device_sysfs_init(xe); + xe_pm_set_vram_threshold(xe, DEFAULT_VRAM_THRESHOLD); } void xe_pm_runtime_fini(struct xe_device *xe) @@ -155,7 +160,7 @@ int xe_pm_runtime_suspend(struct xe_device *xe) u8 id; int err; - if (xe->d3cold_allowed) { + if (xe->d3cold.allowed) { if (xe_device_mem_access_ongoing(xe)) return -EBUSY; @@ -181,7 +186,7 @@ int xe_pm_runtime_resume(struct xe_device *xe) u8 id; int err; - if (xe->d3cold_allowed) { + if (xe->d3cold.allowed) { for_each_gt(gt, xe, id) { err = xe_pcode_init(gt); if (err) @@ -202,7 +207,7 @@ int xe_pm_runtime_resume(struct xe_device *xe) for_each_gt(gt, xe, id) xe_gt_resume(gt); - if (xe->d3cold_allowed) { + if (xe->d3cold.allowed) { err = xe_bo_restore_user(xe); if (err) return err; @@ -251,3 +256,27 @@ void xe_pm_assert_unbounded_bridge(struct xe_device *xe) device_set_pm_not_required(&pdev->dev); } } + +int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold) +{ + struct ttm_resource_manager *man; + u32 vram_total_mb = 0; + int i; + + for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) { + man = ttm_manager_type(&xe->ttm, i); + if (man) + vram_total_mb += DIV_ROUND_UP_ULL(man->size, 1024 * 1024); + } + + drm_dbg(&xe->drm, "Total vram %u mb\n", vram_total_mb); + + if (threshold > vram_total_mb) + return -EINVAL; + + mutex_lock(&xe->d3cold.lock); + xe->d3cold.vram_threshold = threshold; + mutex_unlock(&xe->d3cold.lock); + + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h index 193e5d687353..bbd91a5855cd 100644 --- a/drivers/gpu/drm/xe/xe_pm.h +++ b/drivers/gpu/drm/xe/xe_pm.h @@ -8,6 +8,8 @@ #include +#define DEFAULT_VRAM_THRESHOLD 300 /* in MB */ + struct xe_device; int xe_pm_suspend(struct xe_device *xe); @@ -22,5 +24,6 @@ int xe_pm_runtime_put(struct xe_device *xe); bool xe_pm_runtime_resume_if_suspended(struct xe_device *xe); int xe_pm_runtime_get_if_active(struct xe_device *xe); void xe_pm_assert_unbounded_bridge(struct xe_device *xe); +int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold); #endif -- cgit v1.2.3 From 2ef08b98025bd09b74f68d1801995b0b068afbe7 Mon Sep 17 00:00:00 2001 From: Anshuman Gupta Date: Tue, 18 Jul 2023 13:37:02 +0530 Subject: drm/xe/pm: Toggle d3cold_allowed using vram_usages Adding support to control d3cold by using vram_usages metric from ttm resource manager. When root port is capable of d3cold but xe has disallowed d3cold due to vram_usages above vram_d3ccold_threshol. It is required to disable d3cold to avoid any resume failure because root port can still transition to d3cold when all of pcie endpoints and {upstream, virtual} switch ports will transition to d3hot. Also cleaning up the TODO code comment. v2: - Modify d3cold.allowed in xe_pm_d3cold_allowed_toggle. [Riana] - Cond changed (total_vram_used_mb < xe->d3cold.vram_threshold) according to doc comment. v3: - Added enum instead of true/false argument in d3cold_toggle(). [Rodrigo] - Removed TODO comment. [Rodrigo] Cc: Rodrigo Vivi Signed-off-by: Anshuman Gupta Reviewed-by: Badal Nilawar Acked-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20230718080703.239343-5-anshuman.gupta@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pci.c | 36 +++++++++++++++++++++++++++++++++--- drivers/gpu/drm/xe/xe_pm.c | 25 +++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_pm.h | 1 + 3 files changed, 59 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 74aba4f09f7d..6d04e570735a 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -25,6 +25,11 @@ #include "xe_pm.h" #include "xe_step.h" +enum toggle_d3cold { + D3COLD_DISABLE, + D3COLD_ENABLE, +}; + struct xe_subplatform_desc { enum xe_subplatform subplatform; const char *name; @@ -726,6 +731,28 @@ static int xe_pci_resume(struct device *dev) return 0; } +static void d3cold_toggle(struct pci_dev *pdev, enum toggle_d3cold toggle) +{ + struct xe_device *xe = pdev_to_xe_device(pdev); + struct pci_dev *root_pdev; + + if (!xe->d3cold.capable) + return; + + root_pdev = pcie_find_root_port(pdev); + if (!root_pdev) + return; + + switch (toggle) { + case D3COLD_DISABLE: + pci_d3cold_disable(root_pdev); + break; + case D3COLD_ENABLE: + pci_d3cold_enable(root_pdev); + break; + } +} + static int xe_pci_runtime_suspend(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); @@ -743,6 +770,7 @@ static int xe_pci_runtime_suspend(struct device *dev) pci_ignore_hotplug(pdev); pci_set_power_state(pdev, PCI_D3cold); } else { + d3cold_toggle(pdev, D3COLD_DISABLE); pci_set_power_state(pdev, PCI_D3hot); } @@ -767,6 +795,8 @@ static int xe_pci_runtime_resume(struct device *dev) return err; pci_set_master(pdev); + } else { + d3cold_toggle(pdev, D3COLD_ENABLE); } return xe_pm_runtime_resume(xe); @@ -780,15 +810,15 @@ static int xe_pci_runtime_idle(struct device *dev) if (!xe->d3cold.capable) { xe->d3cold.allowed = false; } else { + xe_pm_d3cold_allowed_toggle(xe); + /* * TODO: d3cold should be allowed (true) if * (IS_DGFX(xe) && !xe_device_mem_access_ongoing(xe)) * but maybe include some other conditions. So, before * we can re-enable the D3cold, we need to: * 1. rewrite the VRAM save / restore to avoid buffer object locks - * 2. block D3cold if we have a big amount of device memory in use - * in order to reduce the latency. - * 3. at resume, detect if we really lost power and avoid memory + * 2. at resume, detect if we really lost power and avoid memory * restoration if we were only up to d3cold */ xe->d3cold.allowed = false; diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index 261c0ad57b63..21964e4d09f8 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -280,3 +280,28 @@ int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold) return 0; } + +void xe_pm_d3cold_allowed_toggle(struct xe_device *xe) +{ + struct ttm_resource_manager *man; + u32 total_vram_used_mb = 0; + u64 vram_used; + int i; + + for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) { + man = ttm_manager_type(&xe->ttm, i); + if (man) { + vram_used = ttm_resource_manager_usage(man); + total_vram_used_mb += DIV_ROUND_UP_ULL(vram_used, 1024 * 1024); + } + } + + mutex_lock(&xe->d3cold.lock); + + if (total_vram_used_mb < xe->d3cold.vram_threshold) + xe->d3cold.allowed = true; + else + xe->d3cold.allowed = false; + + mutex_unlock(&xe->d3cold.lock); +} diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h index bbd91a5855cd..ee30cf025f64 100644 --- a/drivers/gpu/drm/xe/xe_pm.h +++ b/drivers/gpu/drm/xe/xe_pm.h @@ -25,5 +25,6 @@ bool xe_pm_runtime_resume_if_suspended(struct xe_device *xe); int xe_pm_runtime_get_if_active(struct xe_device *xe); void xe_pm_assert_unbounded_bridge(struct xe_device *xe); int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold); +void xe_pm_d3cold_allowed_toggle(struct xe_device *xe); #endif -- cgit v1.2.3 From 09d88e3beb64b8d2e3043fef72dda0df62487e44 Mon Sep 17 00:00:00 2001 From: Anshuman Gupta Date: Tue, 18 Jul 2023 13:37:03 +0530 Subject: drm/xe/pm: Init pcode and restore vram on power lost Don't init pcode and restore VRAM objects in vain. We can rely on primary GT GUC_STATUS to detect whether card has really lost power even when d3cold is allowed by xe. Adding d3cold.lost_power flag to avoid pcode init and vram restoration. Also cleaning up the TODO code comment. v2: - %s/xe_guc_has_lost_power()/xe_guc_in_reset(). - Used existing gt instead of new variable. [Rodrigo] - Added kernel-doc function comment. [Rodrigo] - xe_guc_in_reset() return true if failed to get fw. Cc: Rodrigo Vivi Signed-off-by: Anshuman Gupta Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20230718080703.239343-6-anshuman.gupta@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device_types.h | 3 +++ drivers/gpu/drm/xe/xe_guc.c | 27 +++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_guc.h | 1 + drivers/gpu/drm/xe/xe_pci.c | 2 -- drivers/gpu/drm/xe/xe_pm.c | 13 +++++++++++-- 5 files changed, 42 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 14b15ecc5617..0897719751e9 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -335,6 +335,9 @@ struct xe_device { /** @allowed: Indicates if d3cold is a valid device state */ bool allowed; + /** @power_lost: Indicates if card has really lost power. */ + bool power_lost; + /** * @vram_threshold: * diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index d44537abf7da..ed90d738d673 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -844,3 +844,30 @@ void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p) xe_guc_ct_print(&guc->ct, p, false); xe_guc_submit_print(guc, p); } + +/** + * xe_guc_in_reset() - Detect if GuC MIA is in reset. + * @guc: The GuC object + * + * This function detects runtime resume from d3cold by leveraging + * GUC_STATUS, GUC doesn't get reset during d3hot, + * it strictly to be called from RPM resume handler. + * + * Return: true if failed to get forcewake or GuC MIA is in Reset, + * otherwise false. + */ +bool xe_guc_in_reset(struct xe_guc *guc) +{ + struct xe_gt *gt = guc_to_gt(guc); + u32 status; + int err; + + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (err) + return true; + + status = xe_mmio_read32(gt, GUC_STATUS); + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); + + return status & GS_MIA_IN_RESET; +} diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h index 74a74051f354..f64f22e97169 100644 --- a/drivers/gpu/drm/xe/xe_guc.h +++ b/drivers/gpu/drm/xe/xe_guc.h @@ -35,6 +35,7 @@ void xe_guc_reset_wait(struct xe_guc *guc); void xe_guc_stop_prepare(struct xe_guc *guc); int xe_guc_stop(struct xe_guc *guc); int xe_guc_start(struct xe_guc *guc); +bool xe_guc_in_reset(struct xe_guc *guc); static inline u16 xe_engine_class_to_guc_class(enum xe_engine_class class) { diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 6d04e570735a..ae6e1394ff31 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -818,8 +818,6 @@ static int xe_pci_runtime_idle(struct device *dev) * but maybe include some other conditions. So, before * we can re-enable the D3cold, we need to: * 1. rewrite the VRAM save / restore to avoid buffer object locks - * 2. at resume, detect if we really lost power and avoid memory - * restoration if we were only up to d3cold */ xe->d3cold.allowed = false; } diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index 21964e4d09f8..f336aec7085d 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -16,6 +16,7 @@ #include "xe_device_sysfs.h" #include "xe_ggtt.h" #include "xe_gt.h" +#include "xe_guc.h" #include "xe_irq.h" #include "xe_pcode.h" @@ -186,7 +187,15 @@ int xe_pm_runtime_resume(struct xe_device *xe) u8 id; int err; - if (xe->d3cold.allowed) { + /* + * It can be possible that xe has allowed d3cold but other pcie devices + * in gfx card soc would have blocked d3cold, therefore card has not + * really lost power. Detecting primary Gt power is sufficient. + */ + gt = xe_device_get_gt(xe, 0); + xe->d3cold.power_lost = xe_guc_in_reset(>->uc.guc); + + if (xe->d3cold.allowed && xe->d3cold.power_lost) { for_each_gt(gt, xe, id) { err = xe_pcode_init(gt); if (err) @@ -207,7 +216,7 @@ int xe_pm_runtime_resume(struct xe_device *xe) for_each_gt(gt, xe, id) xe_gt_resume(gt); - if (xe->d3cold.allowed) { + if (xe->d3cold.allowed && xe->d3cold.power_lost) { err = xe_bo_restore_user(xe); if (err) return err; -- cgit v1.2.3 From a00b8f1aae43c46658de0f7f55d8a65acb002159 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 19 Jul 2023 09:38:03 +0100 Subject: drm/xe: fix xe_device_mem_access_get() races MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It looks like there is at least one race here, given that the pm_runtime_suspended() check looks to return false if we are in the process of suspending the device (RPM_SUSPENDING vs RPM_SUSPENDED). We later also do xe_pm_runtime_get_if_active(), but since the device is suspending or has now suspended, this doesn't do anything either. Following from this we can potentially return from xe_device_mem_access_get() with the device suspended or about to be, leading to broken behaviour. Attempt to fix this by always grabbing the runtime ref when our internal ref transitions from 0 -> 1. The hard part is then dealing with the runtime_pm callbacks also calling xe_device_mem_access_get() and deadlocking, which the pm_runtime_suspended() check prevented. v2: - ct->lock looks to be primed with fs_reclaim, so holding that and then allocating memory will cause lockdep to complain. Now that we unconditionally grab the mem_access.lock around mem_access_{get,put}, we need to change the ordering wrt to grabbing the ct->lock, since some of the runtime_pm routines can allocate memory (or at least that's what lockdep seems to suggest). Hopefully not a big deal. It might be that there were already issues with this, just that the atomics where "hiding" the potential issues. v3: - Use Thomas Hellström' idea with tracking the active task that is executing in the resume or suspend callback, in order to avoid recursive resume/suspend calls deadlocking on itself. - Split the ct->lock change. v4: - Add smb_mb() around accessing the pm_callback_task for extra safety. (Thomas Hellström) v5: - Clarify the kernel-doc for the mem_access.lock, given that it is quite strange in what it protects (data vs code). The real motivation is to aid lockdep. (Rodrigo Vivi) v6: - Split out the lock change. We still want this as a lockdep aid but only for the xe_device_mem_access_get() path. Sticking a lock on the put() looks be a no-go, also the runtime_put() there is always async. - Now that the lock is gone move to atomics and rely on the pm code serialising multiple callers on the 0 -> 1 transition. - g2h_worker_func() looks to be the next issue, given that suspend-resume callbacks are using CT, so try to handle that. v7: - Add xe_device_mem_access_get_if_ongoing(), and use it in g2h_worker_func(). v8 (Anshuman): - Just always grab the rpm, instead of just on the 0 -> 1 transition, which is a lot clearer and simplifies the code quite a bit. v9: - Make sure we also adjust the CT fast-path with if-active. Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/258 Signed-off-by: Matthew Auld Cc: Rodrigo Vivi Cc: Thomas Hellström Cc: Matthew Brost Cc: Anshuman Gupta Acked-by: Anshuman Gupta Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 60 ++++++++++++++++++++++++------- drivers/gpu/drm/xe/xe_device.h | 11 ++---- drivers/gpu/drm/xe/xe_device_types.h | 8 +++-- drivers/gpu/drm/xe/xe_guc_ct.c | 41 +++++++++++++++++++--- drivers/gpu/drm/xe/xe_pm.c | 68 +++++++++++++++++++++++------------- drivers/gpu/drm/xe/xe_pm.h | 2 +- 6 files changed, 136 insertions(+), 54 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index bd2e10952989..47401bb49958 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -364,33 +364,67 @@ u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size) DIV_ROUND_UP(size, NUM_BYTES_PER_CCS_BYTE) : 0; } +bool xe_device_mem_access_ongoing(struct xe_device *xe) +{ + if (xe_pm_read_callback_task(xe) != NULL) + return true; + + return atomic_read(&xe->mem_access.ref); +} + +void xe_device_assert_mem_access(struct xe_device *xe) +{ + XE_WARN_ON(!xe_device_mem_access_ongoing(xe)); +} + bool xe_device_mem_access_get_if_ongoing(struct xe_device *xe) { - return atomic_inc_not_zero(&xe->mem_access.ref); + bool active; + + if (xe_pm_read_callback_task(xe) == current) + return true; + + active = xe_pm_runtime_get_if_active(xe); + if (active) { + int ref = atomic_inc_return(&xe->mem_access.ref); + + XE_WARN_ON(ref == S32_MAX); + } + + return active; } void xe_device_mem_access_get(struct xe_device *xe) { - bool resumed = xe_pm_runtime_resume_if_suspended(xe); - int ref = atomic_inc_return(&xe->mem_access.ref); - - if (ref == 1) - xe->mem_access.hold_rpm = xe_pm_runtime_get_if_active(xe); + int ref; + + /* + * This looks racy, but should be fine since the pm_callback_task only + * transitions from NULL -> current (and back to NULL again), during the + * runtime_resume() or runtime_suspend() callbacks, for which there can + * only be a single one running for our device. We only need to prevent + * recursively calling the runtime_get or runtime_put from those + * callbacks, as well as preventing triggering any access_ongoing + * asserts. + */ + if (xe_pm_read_callback_task(xe) == current) + return; - /* The usage counter increased if device was immediately resumed */ - if (resumed) - xe_pm_runtime_put(xe); + xe_pm_runtime_get(xe); + ref = atomic_inc_return(&xe->mem_access.ref); XE_WARN_ON(ref == S32_MAX); } void xe_device_mem_access_put(struct xe_device *xe) { - bool hold = xe->mem_access.hold_rpm; - int ref = atomic_dec_return(&xe->mem_access.ref); + int ref; + + if (xe_pm_read_callback_task(xe) == current) + return; - if (!ref && hold) - xe_pm_runtime_put(xe); + ref = atomic_dec_return(&xe->mem_access.ref); + xe_pm_runtime_put(xe); XE_WARN_ON(ref < 0); } diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index a64828bc6ad2..8b085ffdc5f8 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -141,15 +141,8 @@ void xe_device_mem_access_get(struct xe_device *xe); bool xe_device_mem_access_get_if_ongoing(struct xe_device *xe); void xe_device_mem_access_put(struct xe_device *xe); -static inline bool xe_device_mem_access_ongoing(struct xe_device *xe) -{ - return atomic_read(&xe->mem_access.ref); -} - -static inline void xe_device_assert_mem_access(struct xe_device *xe) -{ - XE_WARN_ON(!xe_device_mem_access_ongoing(xe)); -} +void xe_device_assert_mem_access(struct xe_device *xe); +bool xe_device_mem_access_ongoing(struct xe_device *xe); static inline bool xe_device_in_fault_mode(struct xe_device *xe) { diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 0897719751e9..52c5f7ded7ce 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -323,8 +323,6 @@ struct xe_device { struct { /** @ref: ref count of memory accesses */ atomic_t ref; - /** @hold_rpm: need to put rpm ref back at the end */ - bool hold_rpm; } mem_access; /** @d3cold: Encapsulate d3cold related stuff */ @@ -352,6 +350,12 @@ struct xe_device { struct mutex lock; } d3cold; + /** + * @pm_callback_task: Track the active task that is running in either + * the runtime_suspend or runtime_resume callbacks. + */ + struct task_struct *pm_callback_task; + /* For pcode */ struct mutex sb_lock; diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index c7992a8667e5..5d9ed5de5dbb 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -19,6 +19,7 @@ #include "xe_guc.h" #include "xe_guc_submit.h" #include "xe_map.h" +#include "xe_pm.h" #include "xe_trace.h" /* Used when a CT send wants to block and / or receive data */ @@ -1046,9 +1047,11 @@ static void g2h_fast_path(struct xe_guc_ct *ct, u32 *msg, u32 len) void xe_guc_ct_fast_path(struct xe_guc_ct *ct) { struct xe_device *xe = ct_to_xe(ct); + bool ongoing; int len; - if (!xe_device_mem_access_get_if_ongoing(xe)) + ongoing = xe_device_mem_access_get_if_ongoing(ct_to_xe(ct)); + if (!ongoing && xe_pm_read_callback_task(ct_to_xe(ct)) == NULL) return; spin_lock(&ct->fast_lock); @@ -1059,7 +1062,8 @@ void xe_guc_ct_fast_path(struct xe_guc_ct *ct) } while (len > 0); spin_unlock(&ct->fast_lock); - xe_device_mem_access_put(xe); + if (ongoing) + xe_device_mem_access_put(xe); } /* Returns less than zero on error, 0 on done, 1 on more available */ @@ -1090,9 +1094,36 @@ static int dequeue_one_g2h(struct xe_guc_ct *ct) static void g2h_worker_func(struct work_struct *w) { struct xe_guc_ct *ct = container_of(w, struct xe_guc_ct, g2h_worker); + bool ongoing; int ret; - xe_device_mem_access_get(ct_to_xe(ct)); + /* + * Normal users must always hold mem_access.ref around CT calls. However + * during the runtime pm callbacks we rely on CT to talk to the GuC, but + * at this stage we can't rely on mem_access.ref and even the + * callback_task will be different than current. For such cases we just + * need to ensure we always process the responses from any blocking + * ct_send requests or where we otherwise expect some response when + * initiated from those callbacks (which will need to wait for the below + * dequeue_one_g2h()). The dequeue_one_g2h() will gracefully fail if + * the device has suspended to the point that the CT communication has + * been disabled. + * + * If we are inside the runtime pm callback, we can be the only task + * still issuing CT requests (since that requires having the + * mem_access.ref). It seems like it might in theory be possible to + * receive unsolicited events from the GuC just as we are + * suspending-resuming, but those will currently anyway be lost when + * eventually exiting from suspend, hence no need to wake up the device + * here. If we ever need something stronger than get_if_ongoing() then + * we need to be careful with blocking the pm callbacks from getting CT + * responses, if the worker here is blocked on those callbacks + * completing, creating a deadlock. + */ + ongoing = xe_device_mem_access_get_if_ongoing(ct_to_xe(ct)); + if (!ongoing && xe_pm_read_callback_task(ct_to_xe(ct)) == NULL) + return; + do { mutex_lock(&ct->lock); ret = dequeue_one_g2h(ct); @@ -1106,7 +1137,9 @@ static void g2h_worker_func(struct work_struct *w) kick_reset(ct); } } while (ret == 1); - xe_device_mem_access_put(ct_to_xe(ct)); + + if (ongoing) + xe_device_mem_access_put(ct_to_xe(ct)); } static void guc_ctb_snapshot_capture(struct xe_device *xe, struct guc_ctb *ctb, diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index f336aec7085d..04b995aa848f 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -155,37 +155,65 @@ void xe_pm_runtime_fini(struct xe_device *xe) pm_runtime_forbid(dev); } +static void xe_pm_write_callback_task(struct xe_device *xe, + struct task_struct *task) +{ + WRITE_ONCE(xe->pm_callback_task, task); + + /* + * Just in case it's somehow possible for our writes to be reordered to + * the extent that something else re-uses the task written in + * pm_callback_task. For example after returning from the callback, but + * before the reordered write that resets pm_callback_task back to NULL. + */ + smp_mb(); /* pairs with xe_pm_read_callback_task */ +} + +struct task_struct *xe_pm_read_callback_task(struct xe_device *xe) +{ + smp_mb(); /* pairs with xe_pm_write_callback_task */ + + return READ_ONCE(xe->pm_callback_task); +} + int xe_pm_runtime_suspend(struct xe_device *xe) { struct xe_gt *gt; u8 id; - int err; + int err = 0; - if (xe->d3cold.allowed) { - if (xe_device_mem_access_ongoing(xe)) - return -EBUSY; + if (xe->d3cold.allowed && xe_device_mem_access_ongoing(xe)) + return -EBUSY; + + /* Disable access_ongoing asserts and prevent recursive pm calls */ + xe_pm_write_callback_task(xe, current); + if (xe->d3cold.allowed) { err = xe_bo_evict_all(xe); if (err) - return err; + goto out; } for_each_gt(gt, xe, id) { err = xe_gt_suspend(gt); if (err) - return err; + goto out; } xe_irq_suspend(xe); - - return 0; +out: + xe_pm_write_callback_task(xe, NULL); + return err; } int xe_pm_runtime_resume(struct xe_device *xe) { struct xe_gt *gt; u8 id; - int err; + int err = 0; + + /* Disable access_ongoing asserts and prevent recursive pm calls */ + xe_pm_write_callback_task(xe, current); /* * It can be possible that xe has allowed d3cold but other pcie devices @@ -199,7 +227,7 @@ int xe_pm_runtime_resume(struct xe_device *xe) for_each_gt(gt, xe, id) { err = xe_pcode_init(gt); if (err) - return err; + goto out; } /* @@ -208,7 +236,7 @@ int xe_pm_runtime_resume(struct xe_device *xe) */ err = xe_bo_restore_kernel(xe); if (err) - return err; + goto out; } xe_irq_resume(xe); @@ -219,10 +247,11 @@ int xe_pm_runtime_resume(struct xe_device *xe) if (xe->d3cold.allowed && xe->d3cold.power_lost) { err = xe_bo_restore_user(xe); if (err) - return err; + goto out; } - - return 0; +out: + xe_pm_write_callback_task(xe, NULL); + return err; } int xe_pm_runtime_get(struct xe_device *xe) @@ -236,19 +265,8 @@ int xe_pm_runtime_put(struct xe_device *xe) return pm_runtime_put_autosuspend(xe->drm.dev); } -/* Return true if resume operation happened and usage count was increased */ -bool xe_pm_runtime_resume_if_suspended(struct xe_device *xe) -{ - /* In case we are suspended we need to immediately wake up */ - if (pm_runtime_suspended(xe->drm.dev)) - return !pm_runtime_resume_and_get(xe->drm.dev); - - return false; -} - int xe_pm_runtime_get_if_active(struct xe_device *xe) { - WARN_ON(pm_runtime_suspended(xe->drm.dev)); return pm_runtime_get_if_active(xe->drm.dev, true); } diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h index ee30cf025f64..08a633ce5145 100644 --- a/drivers/gpu/drm/xe/xe_pm.h +++ b/drivers/gpu/drm/xe/xe_pm.h @@ -21,10 +21,10 @@ int xe_pm_runtime_suspend(struct xe_device *xe); int xe_pm_runtime_resume(struct xe_device *xe); int xe_pm_runtime_get(struct xe_device *xe); int xe_pm_runtime_put(struct xe_device *xe); -bool xe_pm_runtime_resume_if_suspended(struct xe_device *xe); int xe_pm_runtime_get_if_active(struct xe_device *xe); void xe_pm_assert_unbounded_bridge(struct xe_device *xe); int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold); void xe_pm_d3cold_allowed_toggle(struct xe_device *xe); +struct task_struct *xe_pm_read_callback_task(struct xe_device *xe); #endif -- cgit v1.2.3 From 2d30332a5ec004effe24d669003bf94e7f167387 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 19 Jul 2023 09:38:04 +0100 Subject: drm/xe/vm: tidy up xe_runtime_pm usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The xe_device_mem_access_get() should be all that's needed here and should now work as expected, without any strange races. In theory should be no functional changes here. Reported-by: Oded Gabbay Signed-off-by: Matthew Auld Cc: Rodrigo Vivi Cc: Thomas Hellström Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 2b9a7618b169..d4c6ddfce1ea 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1245,11 +1245,8 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) INIT_LIST_HEAD(&vm->extobj.list); - if (!(flags & XE_VM_FLAG_MIGRATION)) { - /* We need to immeditatelly exit from any D3 state */ - xe_pm_runtime_get(xe); + if (!(flags & XE_VM_FLAG_MIGRATION)) xe_device_mem_access_get(xe); - } vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm); if (!vm_resv_obj) { @@ -1360,10 +1357,8 @@ err_close: err_no_resv: kfree(vm); - if (!(flags & XE_VM_FLAG_MIGRATION)) { + if (!(flags & XE_VM_FLAG_MIGRATION)) xe_device_mem_access_put(xe); - xe_pm_runtime_put(xe); - } return ERR_PTR(err); } @@ -1522,7 +1517,6 @@ static void vm_destroy_work_func(struct work_struct *w) if (!(vm->flags & XE_VM_FLAG_MIGRATION)) { xe_device_mem_access_put(xe); - xe_pm_runtime_put(xe); if (xe->info.has_asid) { mutex_lock(&xe->usm.lock); -- cgit v1.2.3 From 6bfbd0c589bb89581bb89d2776924c3853296cfc Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 19 Jul 2023 09:38:05 +0100 Subject: drm/xe/debugfs: grab mem_access around forcewake MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need keep the device awake when performing any kind of mmio operation. Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/279 Signed-off-by: Matthew Auld Cc: Rodrigo Vivi Cc: Thomas Hellström Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_debugfs.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c index 7827a785b020..047341d5689a 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.c +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -70,6 +70,8 @@ static int forcewake_open(struct inode *inode, struct file *file) struct xe_gt *gt; u8 id; + xe_device_mem_access_get(xe); + for_each_gt(gt, xe, id) XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL)); @@ -85,6 +87,8 @@ static int forcewake_release(struct inode *inode, struct file *file) for_each_gt(gt, xe, id) XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + xe_device_mem_access_put(xe); + return 0; } -- cgit v1.2.3 From 2d3ab1fa3195d2b0291625fcd0062796aaf15794 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 19 Jul 2023 09:38:06 +0100 Subject: drm/xe/guc_pc: add missing mem_access for freq_rpe_show The mem_access is meant to cover any kind of device level memory access, mmio included. Signed-off-by: Matthew Auld Cc: Matthew Brost Cc: Rodrigo Vivi Cc: Anshuman Gupta Reviewed-by: Anshuman Gupta Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_pc.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 03dfbde29fe5..0927cb669603 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -422,8 +422,12 @@ static ssize_t freq_rpe_show(struct device *dev, struct device_attribute *attr, char *buf) { struct xe_guc_pc *pc = dev_to_pc(dev); + struct xe_gt *gt = pc_to_gt(pc); + struct xe_device *xe = gt_to_xe(gt); + xe_device_mem_access_get(xe); pc_update_rp_values(pc); + xe_device_mem_access_put(xe); return sysfs_emit(buf, "%d\n", pc->rpe_freq); } static DEVICE_ATTR_RO(freq_rpe); -- cgit v1.2.3 From 7da1d76ff647cc08d9400562a75a92e41ba6d7bc Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 19 Jul 2023 09:38:07 +0100 Subject: drm/xe/mmio: grab mem_access in xe_mmio_ioctl Any kind of device memory access should first ensure the device is not suspended, mmio included. Signed-off-by: Matthew Auld Cc: Rodrigo Vivi Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_mmio.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 448b874c7a3c..8d0f07261bfd 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -483,6 +483,7 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data, */ reg = XE_REG(args->addr); + xe_device_mem_access_get(xe); xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); if (args->flags & DRM_XE_MMIO_WRITE) { @@ -526,6 +527,7 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data, exit: xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); + xe_device_mem_access_put(xe); return ret; } -- cgit v1.2.3 From 03af26c9c9767b096cf4b69544f0140898530531 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 19 Jul 2023 09:38:08 +0100 Subject: drm/xe: ensure correct access_put ordering Only call access_put after dropping the forcewake. In theory the device could suspend, but really we want to start asserting that we have a mem_access.ref when touching mmio. Signed-off-by: Matthew Auld Cc: Rodrigo Vivi Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt.c | 8 ++++---- drivers/gpu/drm/xe/xe_gt_debugfs.c | 2 +- drivers/gpu/drm/xe/xe_guc_pc.c | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index b31ef2a8ff17..3e32d38aeeea 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -535,8 +535,8 @@ static int gt_reset(struct xe_gt *gt) if (err) goto err_out; - xe_device_mem_access_put(gt_to_xe(gt)); err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); + xe_device_mem_access_put(gt_to_xe(gt)); XE_WARN_ON(err); xe_gt_info(gt, "reset done\n"); @@ -579,8 +579,8 @@ void xe_gt_suspend_prepare(struct xe_gt *gt) xe_uc_stop_prepare(>->uc); - xe_device_mem_access_put(gt_to_xe(gt)); XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + xe_device_mem_access_put(gt_to_xe(gt)); } int xe_gt_suspend(struct xe_gt *gt) @@ -602,8 +602,8 @@ int xe_gt_suspend(struct xe_gt *gt) if (err) goto err_force_wake; - xe_device_mem_access_put(gt_to_xe(gt)); XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + xe_device_mem_access_put(gt_to_xe(gt)); xe_gt_info(gt, "suspended\n"); return 0; @@ -630,8 +630,8 @@ int xe_gt_resume(struct xe_gt *gt) if (err) goto err_force_wake; - xe_device_mem_access_put(gt_to_xe(gt)); XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + xe_device_mem_access_put(gt_to_xe(gt)); xe_gt_info(gt, "resumed\n"); return 0; diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index d0092d714ffe..f9f653243f20 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -45,8 +45,8 @@ static int hw_engines(struct seq_file *m, void *data) for_each_hw_engine(hwe, gt, id) xe_hw_engine_print(hwe, &p); - xe_device_mem_access_put(xe); err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); + xe_device_mem_access_put(xe); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 0927cb669603..91a3967fd799 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -848,8 +848,8 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) ret = pc_action_setup_gucrc(pc, XE_GUCRC_FIRMWARE_CONTROL); out: - xe_device_mem_access_put(pc_to_xe(pc)); XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + xe_device_mem_access_put(pc_to_xe(pc)); return ret; } -- cgit v1.2.3 From 7eed01a926838d4f6b8c655801e6af5366ccec46 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 19 Jul 2023 09:38:09 +0100 Subject: drm/xe: drop xe_device_mem_access_get() from guc_ct_send MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The callers should already be holding the mem_access reference, before calling into this. Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Rodrigo Vivi Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_ct.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 5d9ed5de5dbb..cb75db30800c 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -625,14 +625,10 @@ static int guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len, XE_BUG_ON(g2h_len && g2h_fence); - xe_device_mem_access_get(ct_to_xe(ct)); - mutex_lock(&ct->lock); ret = guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, g2h_fence); mutex_unlock(&ct->lock); - xe_device_mem_access_put(ct_to_xe(ct)); - return ret; } -- cgit v1.2.3 From e018f44b29ed2de0a09186c728f173d0daaac448 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 19 Jul 2023 09:38:10 +0100 Subject: drm/xe/ggtt: prime ggtt->lock against FS_RECLAIM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Increase the sensitivity of the ggtt->lock by priming it against FS_RECLAIM, such that allocating memory while holding will result in lockdep splats. Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Rodrigo Vivi Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ggtt.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 0722c49585a0..4468c0ae0f6f 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -93,6 +93,16 @@ static void ggtt_fini_noalloc(struct drm_device *drm, void *arg) xe_bo_unpin_map_no_vm(ggtt->scratch); } +static void primelockdep(struct xe_ggtt *ggtt) +{ + if (!IS_ENABLED(CONFIG_LOCKDEP)) + return; + + fs_reclaim_acquire(GFP_KERNEL); + might_lock(&ggtt->lock); + fs_reclaim_release(GFP_KERNEL); +} + int xe_ggtt_init_noalloc(struct xe_ggtt *ggtt) { struct xe_device *xe = tile_to_xe(ggtt->tile); @@ -140,6 +150,7 @@ int xe_ggtt_init_noalloc(struct xe_ggtt *ggtt) drm_mm_init(&ggtt->mm, xe_wopcm_size(xe), ggtt->size - xe_wopcm_size(xe)); mutex_init(&ggtt->lock); + primelockdep(ggtt); return drmm_add_action_or_reset(&xe->drm, ggtt_fini_noalloc, ggtt); } -- cgit v1.2.3 From 7d623575a34539c0302a3ed3ec7321efcb281e37 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 19 Jul 2023 09:38:11 +0100 Subject: drm/xe: drop xe_device_mem_access_get() from invalidation_vma MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lockdep gives the following splat: [ 594.158863] ffff888140da53f0 (&vm->userptr.notifier_lock){++++}-{3:3}, at: vma_userptr_invalidate+0xeb/0x330 [xe] [ 594.158921] but task is already holding lock: [ 594.158926] ffffffff82761940 (mmu_notifier_invalidate_range_start){+.+.}-{0:0}, at: unmap_vmas+0x0/0x1c0 [ 594.158941] which lock already depends on the new lock. [ 594.158947] the existing dependency chain (in reverse order) is: [ 594.158953] -> #5 (mmu_notifier_invalidate_range_start){+.+.}-{0:0}: [ 594.158961] fs_reclaim_acquire+0x68/0xd0 [ 594.158969] __kmem_cache_alloc_node+0x2c/0x1b0 [ 594.158975] kmalloc_node_trace+0x1d/0xb0 [ 594.158983] alloc_worker+0x18/0x50 [ 594.158989] init_rescuer.part.0+0x13/0xa0 [ 594.158995] workqueue_init+0xdf/0x210 [ 594.159001] kernel_init_freeable+0x5c/0x2f0 [ 594.159009] kernel_init+0x11/0x1a0 [ 594.159017] ret_from_fork+0x29/0x50 [ 594.159023] -> #4 (fs_reclaim){+.+.}-{0:0}: [ 594.159031] fs_reclaim_acquire+0xa0/0xd0 [ 594.159037] __kmem_cache_alloc_node+0x2c/0x1b0 [ 594.159042] kmalloc_trace+0x20/0xb0 [ 594.159048] acpi_device_add+0x25a/0x3f0 [ 594.159056] acpi_add_single_object+0x387/0x750 [ 594.159063] acpi_bus_check_add+0x108/0x280 [ 594.159069] acpi_bus_scan+0x34/0xf0 [ 594.159075] acpi_scan_init+0xed/0x2b0 [ 594.159082] acpi_init+0x21e/0x520 [ 594.159087] do_one_initcall+0x53/0x260 [ 594.159092] kernel_init_freeable+0x18a/0x2f0 [ 594.159099] kernel_init+0x11/0x1a0 [ 594.159105] ret_from_fork+0x29/0x50 [ 594.159110] -> #3 (acpi_device_lock){+.+.}-{3:3}: [ 594.159117] __mutex_lock+0x95/0xd10 [ 594.159122] acpi_enable_wakeup_device_power+0x30/0x120 [ 594.159130] __acpi_device_wakeup_enable+0x34/0x110 [ 594.159138] acpi_pm_set_device_wakeup+0x55/0x140 [ 594.159143] __pci_enable_wake+0x56/0xb0 [ 594.159150] pci_finish_runtime_suspend+0x35/0x80 [ 594.159157] pci_pm_runtime_suspend+0xb5/0x1a0 [ 594.159162] __rpm_callback+0x3c/0x110 [ 594.159170] rpm_callback+0x58/0x70 [ 594.159176] rpm_suspend+0x15c/0x6f0 [ 594.159182] pm_runtime_work+0x9b/0xb0 [ 594.159188] process_one_work+0x263/0x520 [ 594.159195] worker_thread+0x4d/0x3b0 [ 594.159200] kthread+0xeb/0x120 [ 594.159206] ret_from_fork+0x29/0x50 [ 594.159211] -> #2 (acpi_wakeup_lock){+.+.}-{3:3}: [ 594.159218] __mutex_lock+0x95/0xd10 [ 594.159223] acpi_pm_set_device_wakeup+0x7a/0x140 [ 594.159228] __pci_enable_wake+0x77/0xb0 [ 594.159234] pci_pm_runtime_resume+0x70/0xd0 [ 594.159240] __rpm_callback+0x3c/0x110 [ 594.159246] rpm_callback+0x58/0x70 [ 594.159252] rpm_resume+0x50d/0x7a0 [ 594.159258] rpm_resume+0x267/0x7a0 [ 594.159264] __pm_runtime_resume+0x45/0x90 [ 594.159270] xe_pm_runtime_resume_and_get+0x12/0x50 [xe] [ 594.159314] xe_device_mem_access_get+0x97/0xc0 [xe] [ 594.159346] hw_engines+0x65/0xf0 [xe] [ 594.159380] seq_read_iter+0x10d/0x4b0 [ 594.159385] seq_read+0x9e/0xd0 [ 594.159390] full_proxy_read+0x4e/0x80 [ 594.159396] vfs_read+0xb6/0x310 [ 594.159401] ksys_read+0x60/0xe0 [ 594.159406] do_syscall_64+0x38/0x90 [ 594.159413] entry_SYSCALL_64_after_hwframe+0x72/0xdc [ 594.159419] -> #1 (&xe->mem_access.lock){+.+.}-{3:3}: [ 594.159427] xe_device_mem_access_get+0x43/0xc0 [xe] [ 594.159457] xe_gt_tlb_invalidation_vma+0x53/0x190 [xe] [ 594.159490] invalidation_fence_init+0x1d2/0x2c0 [xe] [ 594.159529] __xe_pt_unbind_vma+0x151/0x4e0 [xe] [ 594.159564] vm_bind_ioctl+0x48a/0xae0 [xe] [ 594.159602] async_op_work_func+0x20c/0x530 [xe] [ 594.159634] process_one_work+0x263/0x520 [ 594.159640] worker_thread+0x4d/0x3b0 [ 594.159646] kthread+0xeb/0x120 [ 594.159650] ret_from_fork+0x29/0x50 [ 594.159655] -> #0 (&vm->userptr.notifier_lock){++++}-{3:3}: [ 594.159663] __lock_acquire+0x16fa/0x2850 [ 594.159670] lock_acquire+0xd2/0x2e0 [ 594.159676] down_write+0x36/0xd0 [ 594.159681] vma_userptr_invalidate+0xeb/0x330 [xe] [ 594.159714] __mmu_notifier_invalidate_range_start+0x239/0x2a0 [ 594.159722] unmap_vmas+0x1ac/0x1c0 [ 594.159727] unmap_region+0xb5/0x120 [ 594.159732] do_vmi_align_munmap+0x2be/0x430 [ 594.159739] do_vmi_munmap+0xea/0x120 [ 594.159744] __vm_munmap+0x9c/0x160 [ 594.159750] __x64_sys_munmap+0x12/0x20 [ 594.159756] do_syscall_64+0x38/0x90 [ 594.159761] entry_SYSCALL_64_after_hwframe+0x72/0xdc [ 594.159768] other info that might help us debug this: [ 594.159773] Chain exists of: &vm->userptr.notifier_lock --> fs_reclaim --> mmu_notifier_invalidate_range_start [ 594.159785] Possible unsafe locking scenario: [ 594.159790] CPU0 CPU1 [ 594.159794] ---- ---- [ 594.159797] lock(mmu_notifier_invalidate_range_start); [ 594.159802] lock(fs_reclaim); [ 594.159808] lock(mmu_notifier_invalidate_range_start); [ 594.159814] lock(&vm->userptr.notifier_lock); [ 594.159819] The VM should be holding a mem_access.ref so this looks like it should be a false positive and we can just drop the explicit mem_access in xe_gt_tlb_invalidation(). The GGTT invalidation path also takes care to hold mem_access.ref so should be fine there also, and we already assert that we hold access.ref for the GuC communication underneath. Signed-off-by: Matthew Auld Cc: Rodrigo Vivi Cc: Thomas Hellström Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index e2b85559257c..cad0ade595ec 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -248,7 +248,7 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, struct xe_device *xe = gt_to_xe(gt); #define MAX_TLB_INVALIDATION_LEN 7 u32 action[MAX_TLB_INVALIDATION_LEN]; - int len = 0, ret; + int len = 0; XE_BUG_ON(!vma); @@ -302,11 +302,7 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, XE_BUG_ON(len > MAX_TLB_INVALIDATION_LEN); - xe_device_mem_access_get(xe); - ret = send_tlb_invalidation(>->uc.guc, fence, action, len); - xe_device_mem_access_put(xe); - - return ret; + return send_tlb_invalidation(>->uc.guc, fence, action, len); } /** -- cgit v1.2.3 From e3d2309250d49e4558b0abe95924b18f74995607 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 19 Jul 2023 09:38:12 +0100 Subject: drm/xe: add lockdep annotation for xe_device_mem_access_get() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The atomics here might hide potential issues, also rpm core is not holding any lock when calling our rpm resume callback, so add a dummy lock with the idea that xe_pm_runtime_resume() is eventually going to be called when we are holding it. This only needs to happen once and then lockdep can validate all callers and their locks. v2: (Thomas Hellström) - Prefer static lockdep_map instead of full blown mutex. Signed-off-by: Matthew Auld Cc: Rodrigo Vivi Cc: Thomas Hellström Acked-by: Matthew Brost Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 47401bb49958..b1f36c986f0d 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -34,6 +34,12 @@ #include "xe_vm_madvise.h" #include "xe_wait_user_fence.h" +#ifdef CONFIG_LOCKDEP +static struct lockdep_map xe_device_mem_access_lockdep_map = { + .name = "xe_device_mem_access_lockdep_map" +}; +#endif + static int xe_file_open(struct drm_device *dev, struct drm_file *file) { struct xe_file *xef; @@ -410,10 +416,28 @@ void xe_device_mem_access_get(struct xe_device *xe) if (xe_pm_read_callback_task(xe) == current) return; + /* + * Since the resume here is synchronous it can be quite easy to deadlock + * if we are not careful. Also in practice it might be quite timing + * sensitive to ever see the 0 -> 1 transition with the callers locks + * held, so deadlocks might exist but are hard for lockdep to ever see. + * With this in mind, help lockdep learn about the potentially scary + * stuff that can happen inside the runtime_resume callback by acquiring + * a dummy lock (it doesn't protect anything and gets compiled out on + * non-debug builds). Lockdep then only needs to see the + * mem_access_lockdep_map -> runtime_resume callback once, and then can + * hopefully validate all the (callers_locks) -> mem_access_lockdep_map. + * For example if the (callers_locks) are ever grabbed in the + * runtime_resume callback, lockdep should give us a nice splat. + */ + lock_map_acquire(&xe_device_mem_access_lockdep_map); + xe_pm_runtime_get(xe); ref = atomic_inc_return(&xe->mem_access.ref); XE_WARN_ON(ref == S32_MAX); + + lock_map_release(&xe_device_mem_access_lockdep_map); } void xe_device_mem_access_put(struct xe_device *xe) -- cgit v1.2.3 From 6a0612aeabcce6c951788384b94d503b99eefaca Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 12 Jul 2023 17:28:39 +0100 Subject: drm/xe/selftests: hold rpm for evict_test_run_device() We are calling fairly low level things like xe_bo_restore_kernel() which expect caller to be holding mem_access.ref. Since we are doing stuff like evict_all we likely don't want to race with rpm suspend, since that potentially wants to do the same thing, so just wrap the whole test. Signed-off-by: Matthew Auld Cc: Matthew Brost Reviewed-by: Nirmoy Das Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_bo.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index 5309204d8d1b..a63f7a447ca4 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -12,6 +12,7 @@ #include "xe_bo_evict.h" #include "xe_pci.h" +#include "xe_pm.h" static int ccs_test_migrate(struct xe_gt *gt, struct xe_bo *bo, bool clear, u64 get_val, u64 assign_val, @@ -295,9 +296,13 @@ static int evict_test_run_device(struct xe_device *xe) return 0; } + xe_device_mem_access_get(xe); + for_each_gt(gt, xe, id) evict_test_run_gt(xe, gt, test); + xe_device_mem_access_put(xe); + return 0; } -- cgit v1.2.3 From 939902913a25a0feaa9ca34969dd7e5b43fc2502 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 12 Jul 2023 16:27:21 +0100 Subject: drm/xe/selftests: hold rpm for ccs_test_migrate() The GPU job will keep the device awake, however assumption here is that caller of xe_migrate_clear() is also holding mem_access.ref otherwise we hit the asserts in xe_sa_bo_flush_write() prior to the job construction. Signed-off-by: Matthew Auld Cc: Matthew Brost Reviewed-by: Nirmoy Das Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_bo.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index a63f7a447ca4..16e92400e510 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -158,9 +158,13 @@ static int ccs_test_run_device(struct xe_device *xe) return 0; } + xe_device_mem_access_get(xe); + for_each_gt(gt, xe, id) ccs_test_run_gt(xe, gt, test); + xe_device_mem_access_put(xe); + return 0; } -- cgit v1.2.3 From 5a142f9c675ab524a5f18457859ed2002507ea74 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 13 Jul 2023 10:13:33 +0100 Subject: drm/xe/selftests: restart GT after xe_bo_restore_kernel() Test seems to be failing badly after calling xe_bo_restore_kernel(). Taking a snapshot of the CTB and copying back a potentially old version seems risky, depending on what might have been inflight. Also it seems snapshotting the ADS object and copying back results in serious breakage. Normally when calling xe_bo_restore_kernel() we always fully restart the GT, which re-intializes such things. We could potentially skip saving and restoring such objects in xe_bo_evict_all() however seems quite fragile not to also restart the GT. Try to do that here by triggering a GT reset. Signed-off-by: Matthew Auld Cc: Matthew Brost Acked-by: Nirmoy Das Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_bo.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index 16e92400e510..5d60dc6bfe71 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -218,7 +218,21 @@ static int evict_test_run_gt(struct xe_device *xe, struct xe_gt *gt, struct kuni goto cleanup_all; } + xe_gt_sanitize(gt); err = xe_bo_restore_kernel(xe); + /* + * Snapshotting the CTB and copying back a potentially old + * version seems risky, depending on what might have been + * inflight. Also it seems snapshotting the ADS object and + * copying back results in serious breakage. Normally when + * calling xe_bo_restore_kernel() we always fully restart the + * GT, which re-intializes such things. We could potentially + * skip saving and restoring such objects in xe_bo_evict_all() + * however seems quite fragile not to also restart the GT. Try + * to do that here by triggering a GT reset. + */ + xe_gt_reset_async(gt); + flush_work(>->reset.worker); if (err) { KUNIT_FAIL(test, "restore kernel err=%pe\n", ERR_PTR(err)); -- cgit v1.2.3 From ee82d2da9c8ac13486550b2c86068e1d6edddf51 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 13 Jul 2023 10:00:49 +0100 Subject: drm/xe: add missing bulk_move reset It looks like bulk_move is set during object construction, but is only removed on object close, however in various places we might not yet have an actual fd to close, like on the error paths for the gem_create ioctl, and also one internal user for the evict_test_run_gt() selftest. Try to handle those cases by manually resetting the bulk_move. This should prevent triggering: WARNING: CPU: 7 PID: 8252 at drivers/gpu/drm/ttm/ttm_bo.c:327 ttm_bo_release+0x25e/0x2a0 [ttm] v2 (Nirmoy): - It should be safe to just unconditionally call __xe_bo_unset_bulk_move() in most places. Signed-off-by: Matthew Auld Cc: Matthew Brost Reviewed-by: Nirmoy Das Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_bo.c | 7 +++++++ drivers/gpu/drm/xe/xe_bo.c | 26 +++++++++++++++++--------- drivers/gpu/drm/xe/xe_bo.h | 6 ++++++ 3 files changed, 30 insertions(+), 9 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index 5d60dc6bfe71..b32a9068d76c 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -283,6 +283,10 @@ static int evict_test_run_gt(struct xe_device *xe, struct xe_gt *gt, struct kuni xe_bo_unlock(external, &ww); xe_bo_put(external); + + xe_bo_lock(bo, &ww, 0, false); + __xe_bo_unset_bulk_move(bo); + xe_bo_unlock(bo, &ww); xe_bo_put(bo); continue; @@ -293,6 +297,9 @@ cleanup_all: cleanup_external: xe_bo_put(external); cleanup_bo: + xe_bo_lock(bo, &ww, 0, false); + __xe_bo_unset_bulk_move(bo); + xe_bo_unlock(bo, &ww); xe_bo_put(bo); break; } diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 9ad5cf3e2463..a3bb14aa2234 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1327,6 +1327,7 @@ xe_bo_create_locked_range(struct xe_device *xe, return bo; err_unlock_put_bo: + __xe_bo_unset_bulk_move(bo); xe_bo_unlock_vm_held(bo); xe_bo_put(bo); return ERR_PTR(err); @@ -1770,22 +1771,29 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, bo_flags |= args->flags << (ffs(XE_BO_CREATE_SYSTEM_BIT) - 1); bo = xe_bo_create(xe, NULL, vm, args->size, ttm_bo_type_device, bo_flags); - if (vm) { - xe_vm_unlock(vm, &ww); - xe_vm_put(vm); + if (IS_ERR(bo)) { + err = PTR_ERR(bo); + goto out_vm; } - if (IS_ERR(bo)) - return PTR_ERR(bo); - err = drm_gem_handle_create(file, &bo->ttm.base, &handle); - xe_bo_put(bo); if (err) - return err; + goto out_bulk; args->handle = handle; + goto out_put; - return 0; +out_bulk: + if (vm && !xe_vm_in_fault_mode(vm)) + __xe_bo_unset_bulk_move(bo); +out_put: + xe_bo_put(bo); +out_vm: + if (vm) { + xe_vm_unlock(vm, &ww); + xe_vm_put(vm); + } + return err; } int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 53a82ff7bce2..3e98f3c0b85e 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -144,6 +144,12 @@ static inline void xe_bo_put(struct xe_bo *bo) drm_gem_object_put(&bo->ttm.base); } +static inline void __xe_bo_unset_bulk_move(struct xe_bo *bo) +{ + if (bo) + ttm_bo_set_bulk_move(&bo->ttm, NULL); +} + static inline void xe_bo_assert_held(struct xe_bo *bo) { if (bo) -- cgit v1.2.3 From 0d39b6daa5455354c485cb4d521b08740456758e Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 18 Jul 2023 12:39:23 -0700 Subject: drm/xe: Normalize XE_VM_FLAG* names Rename XE_VM_FLAGS_64K to XE_VM_FLAG_64K to follow the other names and s/GT/TILE/ that got missed in commit 08dea7674533 ("drm/xe: Move migration from GT to tile"). Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20230718193924.3084759-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_migrate.c | 2 +- drivers/gpu/drm/xe/xe_migrate.c | 8 ++++---- drivers/gpu/drm/xe/xe_pt.c | 4 ++-- drivers/gpu/drm/xe/xe_vm.c | 6 +++--- drivers/gpu/drm/xe/xe_vm_types.h | 4 ++-- 5 files changed, 12 insertions(+), 12 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index aedfb3dd559e..30e5fdf6ca63 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -301,7 +301,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) xe_map_wr(xe, &bo->vmap, XE_PAGE_SIZE * (NUM_KERNEL_PDE - 1), u64, 0xdeaddeadbeefbeef); expected = xe_pte_encode(NULL, pt, 0, XE_CACHE_WB, 0); - if (m->eng->vm->flags & XE_VM_FLAGS_64K) + if (m->eng->vm->flags & XE_VM_FLAG_64K) expected |= XE_PTE_PS64; if (xe_bo_is_vram(pt)) xe_res_first(pt->ttm.resource, 0, pt->size, &src_it); diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 0c233380d4f2..0515fbef8eec 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -201,7 +201,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64, entry); - if (vm->flags & XE_VM_FLAGS_64K) + if (vm->flags & XE_VM_FLAG_64K) i += 16; else i += 1; @@ -213,7 +213,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, /* Write out batch too */ m->batch_base_ofs = NUM_PT_SLOTS * XE_PAGE_SIZE; for (i = 0; i < batch->size; - i += vm->flags & XE_VM_FLAGS_64K ? XE_64K_PAGE_SIZE : + i += vm->flags & XE_VM_FLAG_64K ? XE_64K_PAGE_SIZE : XE_PAGE_SIZE) { entry = xe_pte_encode(NULL, batch, i, XE_CACHE_WB, 0); @@ -239,7 +239,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, for (level = 1; level < num_level; level++) { u32 flags = 0; - if (vm->flags & XE_VM_FLAGS_64K && level == 1) + if (vm->flags & XE_VM_FLAG_64K && level == 1) flags = XE_PDE_64K; entry = xe_pde_encode(bo, map_ofs + (level - 1) * @@ -462,7 +462,7 @@ static void emit_pte(struct xe_migrate *m, addr = xe_res_dma(cur) & PAGE_MASK; if (is_vram) { /* Is this a 64K PTE entry? */ - if ((m->eng->vm->flags & XE_VM_FLAGS_64K) && + if ((m->eng->vm->flags & XE_VM_FLAG_64K) && !(cur_ofs & (16 * 8 - 1))) { XE_WARN_ON(!IS_ALIGNED(addr, SZ_64K)); addr |= XE_PTE_PS64; diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 030fd911d189..134c74545e8b 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -341,7 +341,7 @@ int xe_pt_create_scratch(struct xe_device *xe, struct xe_tile *tile, * platforms where 64K pages are needed for VRAM. */ flags = XE_BO_CREATE_PINNED_BIT; - if (vm->flags & XE_VM_FLAGS_64K) + if (vm->flags & XE_VM_FLAG_64K) flags |= XE_BO_CREATE_SYSTEM_BIT; else flags |= XE_BO_CREATE_VRAM_IF_DGFX(tile); @@ -761,7 +761,7 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, .va_curs_start = xe_vma_start(vma), .vma = vma, .wupd.entries = entries, - .needs_64K = (xe_vma_vm(vma)->flags & XE_VM_FLAGS_64K) && is_vram, + .needs_64K = (xe_vma_vm(vma)->flags & XE_VM_FLAG_64K) && is_vram, }; struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; int ret; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index d4c6ddfce1ea..668774081be7 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1264,11 +1264,11 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) goto err_close; if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) - vm->flags |= XE_VM_FLAGS_64K; + vm->flags |= XE_VM_FLAG_64K; for_each_tile(tile, xe, id) { if (flags & XE_VM_FLAG_MIGRATION && - tile->id != XE_VM_FLAG_GT_ID(flags)) + tile->id != XE_VM_FLAG_TILE_ID(flags)) continue; vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level); @@ -2119,7 +2119,7 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma, struct ttm_buffer_object *xe_vm_ttm_bo(struct xe_vm *vm) { int idx = vm->flags & XE_VM_FLAG_MIGRATION ? - XE_VM_FLAG_GT_ID(vm->flags) : 0; + XE_VM_FLAG_TILE_ID(vm->flags) : 0; /* Safe to use index 0 as all BO in the VM share a single dma-resv lock */ return &vm->pt_root[idx]->bo->ttm; diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 8aca079006ba..c1af0cd6aced 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -136,14 +136,14 @@ struct xe_vm { * @flags: flags for this VM, statically setup a creation time aside * from XE_VM_FLAG_BANNED which requires vm->lock to set / read safely */ -#define XE_VM_FLAGS_64K BIT(0) +#define XE_VM_FLAG_64K BIT(0) #define XE_VM_FLAG_COMPUTE_MODE BIT(1) #define XE_VM_FLAG_ASYNC_BIND_OPS BIT(2) #define XE_VM_FLAG_MIGRATION BIT(3) #define XE_VM_FLAG_SCRATCH_PAGE BIT(4) #define XE_VM_FLAG_FAULT_MODE BIT(5) #define XE_VM_FLAG_BANNED BIT(6) -#define XE_VM_FLAG_GT_ID(flags) (((flags) >> 7) & 0x3) +#define XE_VM_FLAG_TILE_ID(flags) (((flags) >> 7) & 0x3) #define XE_VM_FLAG_SET_TILE_ID(tile) ((tile)->id << 7) unsigned long flags; -- cgit v1.2.3 From 4d18eac03212fc2d8c3d9715e2261ac50e989403 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 18 Jul 2023 12:39:24 -0700 Subject: drm/xe: Use FIELD_PREP/FIELD_GET for tile id encoding Use FIELD_PREP()/FIELD_GET() to encode the tile id into flags. Besides protecting for eventual overflow it also makes it easier to see a new flag can't be added as BIT(7). Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20230718193924.3084759-2-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm_types.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index c1af0cd6aced..1e3c7b98d775 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -143,8 +143,8 @@ struct xe_vm { #define XE_VM_FLAG_SCRATCH_PAGE BIT(4) #define XE_VM_FLAG_FAULT_MODE BIT(5) #define XE_VM_FLAG_BANNED BIT(6) -#define XE_VM_FLAG_TILE_ID(flags) (((flags) >> 7) & 0x3) -#define XE_VM_FLAG_SET_TILE_ID(tile) ((tile)->id << 7) +#define XE_VM_FLAG_TILE_ID(flags) FIELD_GET(GENMASK(8, 7), flags) +#define XE_VM_FLAG_SET_TILE_ID(tile) FIELD_PREP(GENMASK(8, 7), (tile)->id) unsigned long flags; /** @composite_fence_ctx: context composite fence */ -- cgit v1.2.3 From 955c09e2cc4894b5997f548de1bd3bdfa18e60e4 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Wed, 19 Jul 2023 13:20:59 +0000 Subject: drm/xe: Rely on kmalloc/kzalloc log message Those messages are unnecessary because a generic message is already produced in case of allocation failure. Besides, this also removes a misuse of the XE_IOCTL_DBG macro. Signed-off-by: Francois Dugast Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_debugfs.c | 4 +--- drivers/gpu/drm/xe/xe_guc_debugfs.c | 4 +--- drivers/gpu/drm/xe/xe_huc_debugfs.c | 4 +--- drivers/gpu/drm/xe/xe_query.c | 8 ++++---- 4 files changed, 7 insertions(+), 13 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index f9f653243f20..e622174a866d 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -173,10 +173,8 @@ void xe_gt_debugfs_register(struct xe_gt *gt) */ #define DEBUGFS_SIZE (ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list)) local = drmm_kmalloc(>_to_xe(gt)->drm, DEBUGFS_SIZE, GFP_KERNEL); - if (!local) { - XE_WARN_ON("Couldn't allocate memory"); + if (!local) return; - } memcpy(local, debugfs_list, DEBUGFS_SIZE); #undef DEBUGFS_SIZE diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.c b/drivers/gpu/drm/xe/xe_guc_debugfs.c index 0178b1a2d367..b43c70de7e37 100644 --- a/drivers/gpu/drm/xe/xe_guc_debugfs.c +++ b/drivers/gpu/drm/xe/xe_guc_debugfs.c @@ -89,10 +89,8 @@ void xe_guc_debugfs_register(struct xe_guc *guc, struct dentry *parent) #define DEBUGFS_SIZE (ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list)) local = drmm_kmalloc(&guc_to_xe(guc)->drm, DEBUGFS_SIZE, GFP_KERNEL); - if (!local) { - XE_WARN_ON("Couldn't allocate memory"); + if (!local) return; - } memcpy(local, debugfs_list, DEBUGFS_SIZE); #undef DEBUGFS_SIZE diff --git a/drivers/gpu/drm/xe/xe_huc_debugfs.c b/drivers/gpu/drm/xe/xe_huc_debugfs.c index ae3c21315d59..18585a7eeb9d 100644 --- a/drivers/gpu/drm/xe/xe_huc_debugfs.c +++ b/drivers/gpu/drm/xe/xe_huc_debugfs.c @@ -55,10 +55,8 @@ void xe_huc_debugfs_register(struct xe_huc *huc, struct dentry *parent) #define DEBUGFS_SIZE (ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list)) local = drmm_kmalloc(&huc_to_xe(huc)->drm, DEBUGFS_SIZE, GFP_KERNEL); - if (!local) { - XE_WARN_ON("Couldn't allocate memory"); + if (!local) return; - } memcpy(local, debugfs_list, DEBUGFS_SIZE); #undef DEBUGFS_SIZE diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 4b7869596ba8..f880c9af1651 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -65,7 +65,7 @@ static int query_engines(struct xe_device *xe, } hw_engine_info = kmalloc(size, GFP_KERNEL); - if (XE_IOCTL_DBG(xe, !hw_engine_info)) + if (!hw_engine_info) return -ENOMEM; for_each_gt(gt, xe, gt_id) @@ -182,7 +182,7 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query) } config = kzalloc(size, GFP_KERNEL); - if (XE_IOCTL_DBG(xe, !config)) + if (!config) return -ENOMEM; config->num_params = num_params; @@ -231,7 +231,7 @@ static int query_gts(struct xe_device *xe, struct drm_xe_device_query *query) } gts = kzalloc(size, GFP_KERNEL); - if (XE_IOCTL_DBG(xe, !gts)) + if (!gts) return -ENOMEM; gts->num_gt = xe->info.gt_count; @@ -278,7 +278,7 @@ static int query_hwconfig(struct xe_device *xe, } hwconfig = kzalloc(size, GFP_KERNEL); - if (XE_IOCTL_DBG(xe, !hwconfig)) + if (!hwconfig) return -ENOMEM; xe_device_mem_access_get(xe); -- cgit v1.2.3 From ea82d5aab53f8f13fa0834d0b4341ca0788c2a8f Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Wed, 19 Jul 2023 13:51:07 +0000 Subject: drm/xe/execlist: Remove leftover printk messages Those look like leftover debug and are not even being used. If they were real debug/info, they should be using the drm helpers. Signed-off-by: Francois Dugast Reviewed-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_execlist.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index b0ccc4ff8461..b45594a45fe2 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -47,8 +47,6 @@ static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc, struct xe_device *xe = gt_to_xe(gt); u64 lrc_desc; - printk(KERN_INFO "__start_lrc(%s, 0x%p, %u)\n", hwe->name, lrc, ctx_id); - lrc_desc = xe_lrc_descriptor(lrc); if (GRAPHICS_VERx100(xe) >= 1250) { @@ -125,9 +123,6 @@ static void __xe_execlist_port_idle(struct xe_execlist_port *port) if (!port->running_exl) return; - printk(KERN_INFO "__xe_execlist_port_idle(%d:%d)\n", port->hwe->class, - port->hwe->instance); - xe_lrc_write_ring(&port->hwe->kernel_lrc, noop, sizeof(noop)); __start_lrc(port->hwe, &port->hwe->kernel_lrc, 0); port->running_exl = NULL; @@ -176,9 +171,6 @@ static u64 read_execlist_status(struct xe_hw_engine *hwe) lo = xe_mmio_read32(gt, RING_EXECLIST_STATUS_LO(hwe->mmio_base)); hi = xe_mmio_read32(gt, RING_EXECLIST_STATUS_HI(hwe->mmio_base)); - printk(KERN_INFO "EXECLIST_STATUS %d:%d = 0x%08x %08x\n", hwe->class, - hwe->instance, hi, lo); - return lo | (u64)hi << 32; } -- cgit v1.2.3 From 72e8d73b712d2232019b33d2331099d3071ea94a Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Wed, 19 Jul 2023 13:51:08 +0000 Subject: drm/xe: Cleanup style warnings and errors Fix 6 errors and 20 warnings reported by checkpatch.pl. Signed-off-by: Francois Dugast Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 2 +- drivers/gpu/drm/xe/xe_gt_mcr.c | 2 +- drivers/gpu/drm/xe/xe_gt_sysfs.c | 4 +--- drivers/gpu/drm/xe/xe_guc.c | 14 ++++++-------- drivers/gpu/drm/xe/xe_hw_engine_types.h | 2 +- drivers/gpu/drm/xe/xe_map.h | 2 +- drivers/gpu/drm/xe/xe_migrate.c | 2 +- drivers/gpu/drm/xe/xe_reg_whitelist.c | 24 ++++++++++++++++++------ drivers/gpu/drm/xe/xe_res_cursor.h | 11 +++++------ drivers/gpu/drm/xe/xe_wopcm.c | 10 ++++------ 10 files changed, 39 insertions(+), 34 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index a3bb14aa2234..49c80e95222b 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1831,7 +1831,7 @@ int xe_bo_lock(struct xe_bo *bo, struct ww_acquire_ctx *ww, XE_BUG_ON(!ww); tv_bo.num_shared = num_resv; - tv_bo.bo = &bo->ttm;; + tv_bo.bo = &bo->ttm; list_add_tail(&tv_bo.head, &objs); return ttm_eu_reserve_buffers(ww, &objs, intr, &dups); diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index ff4075387564..c56815af0686 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -273,7 +273,7 @@ static void init_steering_inst0(struct xe_gt *gt) static const struct { const char *name; - void (*init)(struct xe_gt *); + void (*init)(struct xe_gt *gt); } xe_steering_types[] = { [L3BANK] = { "L3BANK", init_steering_l3bank }, [MSLICE] = { "MSLICE", init_steering_mslice }, diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs.c b/drivers/gpu/drm/xe/xe_gt_sysfs.c index b955940e8dc6..c69d2e8a0fe1 100644 --- a/drivers/gpu/drm/xe/xe_gt_sysfs.c +++ b/drivers/gpu/drm/xe/xe_gt_sysfs.c @@ -37,10 +37,8 @@ void xe_gt_sysfs_init(struct xe_gt *gt) int err; kg = kzalloc(sizeof(*kg), GFP_KERNEL); - if (!kg) { - drm_warn(&xe->drm, "Allocating kobject failed.\n"); + if (!kg) return; - } kobject_init(&kg->base, &xe_gt_sysfs_kobj_type); kg->gt = gt; diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index ed90d738d673..8ae026838702 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -396,14 +396,12 @@ static int guc_wait_ucode(struct xe_guc *guc) struct drm_printer p = drm_info_printer(drm->dev); drm_info(drm, "GuC load failed: status = 0x%08X\n", status); - drm_info(drm, "GuC load failed: status: Reset = %d, " - "BootROM = 0x%02X, UKernel = 0x%02X, " - "MIA = 0x%02X, Auth = 0x%02X\n", - REG_FIELD_GET(GS_MIA_IN_RESET, status), - REG_FIELD_GET(GS_BOOTROM_MASK, status), - REG_FIELD_GET(GS_UKERNEL_MASK, status), - REG_FIELD_GET(GS_MIA_MASK, status), - REG_FIELD_GET(GS_AUTH_STATUS_MASK, status)); + drm_info(drm, "GuC load failed: status: Reset = %d, BootROM = 0x%02X, UKernel = 0x%02X, MIA = 0x%02X, Auth = 0x%02X\n", + REG_FIELD_GET(GS_MIA_IN_RESET, status), + REG_FIELD_GET(GS_BOOTROM_MASK, status), + REG_FIELD_GET(GS_UKERNEL_MASK, status), + REG_FIELD_GET(GS_MIA_MASK, status), + REG_FIELD_GET(GS_AUTH_STATUS_MASK, status)); if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) { drm_info(drm, "GuC firmware signature verification failed\n"); diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h index d788e67312b9..803d557cf5aa 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_types.h +++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h @@ -104,7 +104,7 @@ struct xe_hw_engine { /** @fence_irq: fence IRQ to run when a hw engine IRQ is received */ struct xe_hw_fence_irq *fence_irq; /** @irq_handler: IRQ handler to run when hw engine IRQ is received */ - void (*irq_handler)(struct xe_hw_engine *, u16); + void (*irq_handler)(struct xe_hw_engine *hwe, u16 intr_vec); /** @engine_id: id for this hw engine */ enum xe_hw_engine_id engine_id; }; diff --git a/drivers/gpu/drm/xe/xe_map.h b/drivers/gpu/drm/xe/xe_map.h index 032c2e8b5438..f62e0c8b67ab 100644 --- a/drivers/gpu/drm/xe/xe_map.h +++ b/drivers/gpu/drm/xe/xe_map.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: MIT +/* SPDX-License-Identifier: MIT */ /* * Copyright © 2022 Intel Corporation */ diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 0515fbef8eec..827938b666c5 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -475,7 +475,7 @@ static void emit_pte(struct xe_migrate *m, bb->cs[bb->len++] = lower_32_bits(addr); bb->cs[bb->len++] = upper_32_bits(addr); - xe_res_next(cur, min(size, (u32)PAGE_SIZE)); + xe_res_next(cur, min_t(u32, size, PAGE_SIZE)); cur_ofs += 8; } } diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c index ea6dd7d71b59..e83781f9a516 100644 --- a/drivers/gpu/drm/xe/xe_reg_whitelist.c +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c @@ -89,18 +89,30 @@ void xe_reg_whitelist_print_entry(struct drm_printer *p, unsigned int indent, deny = val & RING_FORCE_TO_NONPRIV_DENY; switch (val & RING_FORCE_TO_NONPRIV_RANGE_MASK) { - case RING_FORCE_TO_NONPRIV_RANGE_4: range_bit = 4; break; - case RING_FORCE_TO_NONPRIV_RANGE_16: range_bit = 6; break; - case RING_FORCE_TO_NONPRIV_RANGE_64: range_bit = 8; break; + case RING_FORCE_TO_NONPRIV_RANGE_4: + range_bit = 4; + break; + case RING_FORCE_TO_NONPRIV_RANGE_16: + range_bit = 6; + break; + case RING_FORCE_TO_NONPRIV_RANGE_64: + range_bit = 8; + break; } range_start = reg & REG_GENMASK(25, range_bit); range_end = range_start | REG_GENMASK(range_bit, 0); switch (val & RING_FORCE_TO_NONPRIV_ACCESS_MASK) { - case RING_FORCE_TO_NONPRIV_ACCESS_RW: access_str = "rw"; break; - case RING_FORCE_TO_NONPRIV_ACCESS_RD: access_str = "read"; break; - case RING_FORCE_TO_NONPRIV_ACCESS_WR: access_str = "write"; break; + case RING_FORCE_TO_NONPRIV_ACCESS_RW: + access_str = "rw"; + break; + case RING_FORCE_TO_NONPRIV_ACCESS_RD: + access_str = "read"; + break; + case RING_FORCE_TO_NONPRIV_ACCESS_WR: + access_str = "write"; + break; } drm_printf_indent(p, indent, "REG[0x%x-0x%x]: %s %s access\n", diff --git a/drivers/gpu/drm/xe/xe_res_cursor.h b/drivers/gpu/drm/xe/xe_res_cursor.h index 2a6fdd284395..dda963fe3300 100644 --- a/drivers/gpu/drm/xe/xe_res_cursor.h +++ b/drivers/gpu/drm/xe/xe_res_cursor.h @@ -51,15 +51,14 @@ struct xe_res_cursor { static struct drm_buddy *xe_res_get_buddy(struct ttm_resource *res) { struct xe_device *xe = ttm_to_xe_device(res->bo->bdev); + struct ttm_resource_manager *mgr; - if (res->mem_type != XE_PL_STOLEN) { + if (res->mem_type != XE_PL_STOLEN) return &xe->tiles[res->mem_type - XE_PL_VRAM0].mem.vram_mgr->mm; - } else { - struct ttm_resource_manager *mgr = - ttm_manager_type(&xe->ttm, XE_PL_STOLEN); - return &to_xe_ttm_vram_mgr(mgr)->mm; - } + mgr = ttm_manager_type(&xe->ttm, XE_PL_STOLEN); + + return &to_xe_ttm_vram_mgr(mgr)->mm; } /** diff --git a/drivers/gpu/drm/xe/xe_wopcm.c b/drivers/gpu/drm/xe/xe_wopcm.c index 35fde8965bca..d9acf8783b83 100644 --- a/drivers/gpu/drm/xe/xe_wopcm.c +++ b/drivers/gpu/drm/xe/xe_wopcm.c @@ -47,12 +47,10 @@ */ /* Default WOPCM size is 2MB from Gen11, 1MB on previous platforms */ -#define DGFX_WOPCM_SIZE SZ_4M /* FIXME: Larger size require - for 2 tile PVC, do a proper - probe sooner or later */ -#define MTL_WOPCM_SIZE SZ_4M /* FIXME: Larger size require - for MTL, do a proper probe - sooner or later */ +/* FIXME: Larger size require for 2 tile PVC, do a proper probe sooner or later */ +#define DGFX_WOPCM_SIZE SZ_4M +/* FIXME: Larger size require for MTL, do a proper probe sooner or later */ +#define MTL_WOPCM_SIZE SZ_4M #define GEN11_WOPCM_SIZE SZ_2M #define GEN12_MAX_WOPCM_SIZE SZ_8M -- cgit v1.2.3 From 0043a3e8a1f57e3aca91d4a99ff49031416119b6 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Wed, 19 Jul 2023 18:57:07 +0000 Subject: drm/xe/execlist: Log when using execlist submission Make explicit in the log that execlist submission is used to prevent from silently using it over GuC submission. Signed-off-by: Francois Dugast Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_execlist.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index b45594a45fe2..b15d095b395b 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -318,9 +318,12 @@ static int execlist_engine_init(struct xe_engine *e) { struct drm_gpu_scheduler *sched; struct xe_execlist_engine *exl; + struct xe_device *xe = gt_to_xe(e->gt); int err; - XE_BUG_ON(xe_device_guc_submission_enabled(gt_to_xe(e->gt))); + XE_BUG_ON(xe_device_guc_submission_enabled(xe)); + + drm_info(&xe->drm, "Enabling execlist submission (GuC submission disabled)\n"); exl = kzalloc(sizeof(*exl), GFP_KERNEL); if (!exl) -- cgit v1.2.3 From 845f64bdbfc96cefd7070621b18ff8f50c7857fb Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Sun, 9 Jul 2023 09:54:59 -0700 Subject: drm/xe: Introduce a range-fence utility MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add generic utility to track range conflicts signaled by a dma-fence. Tracking implemented via an interval tree. An example use case being tracking conflicts for pending (un)binds from multiple bind engines. By being generic ths idea would this could moved to the DRM level and used in multiple drivers for similar problems. v2: Make interval tree functions static (CI) v3: Remove non-static cleanup function (CI) Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Signed-off-by: Thomas Hellström Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/xe_range_fence.c | 156 ++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_range_fence.h | 75 +++++++++++++++++ 3 files changed, 232 insertions(+) create mode 100644 drivers/gpu/drm/xe/xe_range_fence.c create mode 100644 drivers/gpu/drm/xe/xe_range_fence.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index e5fb874a7aaf..312e643397ba 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -95,6 +95,7 @@ xe-y += xe_bb.o \ xe_pt.o \ xe_pt_walk.o \ xe_query.o \ + xe_range_fence.o \ xe_reg_sr.o \ xe_reg_whitelist.o \ xe_rtp.o \ diff --git a/drivers/gpu/drm/xe/xe_range_fence.c b/drivers/gpu/drm/xe/xe_range_fence.c new file mode 100644 index 000000000000..d35d9ec58e86 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_range_fence.c @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include +#include +#include + +#include "xe_macros.h" +#include "xe_range_fence.h" + +#define XE_RANGE_TREE_START(_node) ((_node)->start) +#define XE_RANGE_TREE_LAST(_node) ((_node)->last) + +INTERVAL_TREE_DEFINE(struct xe_range_fence, rb, u64, __subtree_last, + XE_RANGE_TREE_START, XE_RANGE_TREE_LAST, static, + xe_range_fence_tree); + +static void +xe_range_fence_signal_notify(struct dma_fence *fence, struct dma_fence_cb *cb) +{ + struct xe_range_fence *rfence = container_of(cb, typeof(*rfence), cb); + struct xe_range_fence_tree *tree = rfence->tree; + + llist_add(&rfence->link, &tree->list); +} + +static bool __xe_range_fence_tree_cleanup(struct xe_range_fence_tree *tree) +{ + struct llist_node *node = llist_del_all(&tree->list); + struct xe_range_fence *rfence, *next; + + llist_for_each_entry_safe(rfence, next, node, link) { + xe_range_fence_tree_remove(rfence, &tree->root); + dma_fence_put(rfence->fence); + kfree(rfence); + } + + return !!node; +} + +/** + * xe_range_fence_insert() - range fence insert + * @tree: range fence tree to insert intoi + * @rfence: range fence + * @ops: range fence ops + * @start: start address of range fence + * @last: last address of range fence + * @fence: dma fence which signals range fence can be removed + freed + * + * Return: 0 on success, non-zero on failure + */ +int xe_range_fence_insert(struct xe_range_fence_tree *tree, + struct xe_range_fence *rfence, + const struct xe_range_fence_ops *ops, + u64 start, u64 last, struct dma_fence *fence) +{ + int err = 0; + + __xe_range_fence_tree_cleanup(tree); + + if (dma_fence_is_signaled(fence)) + goto free; + + rfence->ops = ops; + rfence->start = start; + rfence->last = last; + rfence->tree = tree; + rfence->fence = dma_fence_get(fence); + err = dma_fence_add_callback(fence, &rfence->cb, + xe_range_fence_signal_notify); + if (err == -ENOENT) { + dma_fence_put(fence); + err = 0; + goto free; + } else if (err == 0) { + xe_range_fence_tree_insert(rfence, &tree->root); + return 0; + } + +free: + if (ops->free) + ops->free(rfence); + + return err; +} + +static void xe_range_fence_tree_remove_all(struct xe_range_fence_tree *tree) +{ + struct xe_range_fence *rfence; + bool retry = true; + + rfence = xe_range_fence_tree_iter_first(&tree->root, 0, U64_MAX); + while (rfence) { + /* Should be ok with the minimalistic callback */ + if (dma_fence_remove_callback(rfence->fence, &rfence->cb)) + llist_add(&rfence->link, &tree->list); + rfence = xe_range_fence_tree_iter_next(rfence, 0, U64_MAX); + } + + while (retry) + retry = __xe_range_fence_tree_cleanup(tree); +} + +/** + * xe_range_fence_tree_init() - Init range fence tree + * @tree: range fence tree + */ +void xe_range_fence_tree_init(struct xe_range_fence_tree *tree) +{ + memset(tree, 0, sizeof(*tree)); +} + +/** + * xe_range_fence_tree_fini() - Fini range fence tree + * @tree: range fence tree + */ +void xe_range_fence_tree_fini(struct xe_range_fence_tree *tree) +{ + xe_range_fence_tree_remove_all(tree); + XE_WARN_ON(!RB_EMPTY_ROOT(&tree->root.rb_root)); +} + +/** + * xe_range_fence_tree_first() - range fence tree iterator first + * @tree: range fence tree + * @start: start address of range fence + * @last: last address of range fence + * + * Return: first range fence found in range or NULL + */ +struct xe_range_fence * +xe_range_fence_tree_first(struct xe_range_fence_tree *tree, u64 start, + u64 last) +{ + return xe_range_fence_tree_iter_first(&tree->root, start, last); +} + +/** + * xe_range_fence_tree_next() - range fence tree iterator next + * @rfence: current range fence + * @start: start address of range fence + * @last: last address of range fence + * + * Return: next range fence found in range or NULL + */ +struct xe_range_fence * +xe_range_fence_tree_next(struct xe_range_fence *rfence, u64 start, u64 last) +{ + return xe_range_fence_tree_iter_next(rfence, start, last); +} + +const struct xe_range_fence_ops xe_range_fence_kfree_ops = { + .free = (void (*)(struct xe_range_fence *rfence)) kfree, +}; diff --git a/drivers/gpu/drm/xe/xe_range_fence.h b/drivers/gpu/drm/xe/xe_range_fence.h new file mode 100644 index 000000000000..edd58b34f5c0 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_range_fence.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_RANGE_FENCE_H_ +#define _XE_RANGE_FENCE_H_ + +#include +#include +#include + +struct xe_range_fence_tree; +struct xe_range_fence; + +/** struct xe_range_fence_ops - XE range fence ops */ +struct xe_range_fence_ops { + /** @free: free range fence op */ + void (*free)(struct xe_range_fence *rfence); +}; + +/** struct xe_range_fence - XE range fence (address conflict tracking) */ +struct xe_range_fence { + /** @rb: RB tree node inserted into interval tree */ + struct rb_node rb; + /** @start: start address of range fence is interval tree */ + u64 start; + /** @last: last address (inclusive) of range fence is interval tree */ + u64 last; + /** @__subtree_last: interval tree internal usage */ + u64 __subtree_last; + /** + * @fence: fence signals address in range fence no longer has conflict + */ + struct dma_fence *fence; + /** @tree: interval tree which range fence belongs to */ + struct xe_range_fence_tree *tree; + /** + * @cb: callback when fence signals to remove range fence free from interval tree + */ + struct dma_fence_cb cb; + /** @link: used to defer free of range fence to non-irq context */ + struct llist_node link; + /** @ops: range fence ops */ + const struct xe_range_fence_ops *ops; +}; + +/** struct xe_range_fence_tree - interval tree to store range fences */ +struct xe_range_fence_tree { + /** @root: interval tree root */ + struct rb_root_cached root; + /** @list: list of pending range fences to be freed */ + struct llist_head list; +}; + +extern const struct xe_range_fence_ops xe_range_fence_kfree_ops; + +struct xe_range_fence * +xe_range_fence_tree_first(struct xe_range_fence_tree *tree, u64 start, + u64 last); + +struct xe_range_fence * +xe_range_fence_tree_next(struct xe_range_fence *rfence, u64 start, u64 last); + +void xe_range_fence_tree_init(struct xe_range_fence_tree *tree); + +void xe_range_fence_tree_fini(struct xe_range_fence_tree *tree); + +int xe_range_fence_insert(struct xe_range_fence_tree *tree, + struct xe_range_fence *rfence, + const struct xe_range_fence_ops *ops, + u64 start, u64 end, + struct dma_fence *fence); + +#endif -- cgit v1.2.3 From fd84041d094ce8feb730911ca9c7fdfff1d4fb94 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 19 Jul 2023 14:10:11 -0700 Subject: drm/xe: Make bind engines safe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We currently have a race between bind engines which can result in corrupted page tables leading to faults. A simple example: bind A 0x0000-0x1000, engine A, has unsatisfied in-fence bind B 0x1000-0x2000, engine B, no in-fences exec A uses 0x1000-0x2000 Bind B will pass bind A and exec A will fault. This occurs as bind A programs the root of the page table in a bind job which is held up by an in-fence. Bind B in this case just programs a leaf entry of the structure. To fix use range-fence utility to track cross bind engine conflicts. In the above example bind A would insert an dependency into the range-fence tree with a key of 0x0-0x7fffffffff, bind B would find that dependency and its bind job would scheduled behind the unsatisfied in-fence and bind A's job. Reviewed-by: Maarten Lankhorst Co-developed-by: Thomas Hellström Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_migrate.c | 2 + drivers/gpu/drm/xe/xe_migrate.h | 8 +++ drivers/gpu/drm/xe/xe_pt.c | 115 +++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_vm.c | 8 +++ drivers/gpu/drm/xe/xe_vm_types.h | 7 +++ 5 files changed, 140 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 827938b666c5..d64567403068 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -1074,6 +1074,7 @@ xe_migrate_update_pgtables_cpu(struct xe_migrate *m, return ERR_PTR(-ETIME); if (ops->pre_commit) { + pt_update->job = NULL; err = ops->pre_commit(pt_update); if (err) return ERR_PTR(err); @@ -1295,6 +1296,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m, goto err_job; if (ops->pre_commit) { + pt_update->job = job; err = ops->pre_commit(pt_update); if (err) goto err_job; diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h index 204337ea3b4e..0d62aff6421c 100644 --- a/drivers/gpu/drm/xe/xe_migrate.h +++ b/drivers/gpu/drm/xe/xe_migrate.h @@ -69,6 +69,14 @@ struct xe_migrate_pt_update { const struct xe_migrate_pt_update_ops *ops; /** @vma: The vma we're updating the pagetable for. */ struct xe_vma *vma; + /** @job: The job if a GPU page-table update. NULL otherwise */ + struct xe_sched_job *job; + /** @start: Start of update for the range fence */ + u64 start; + /** @last: Last of update for the range fence */ + u64 last; + /** @tile_id: Tile ID of the update */ + u8 tile_id; }; struct xe_migrate *xe_migrate_init(struct xe_tile *tile); diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 134c74545e8b..0ca7853fc70a 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1116,6 +1116,53 @@ struct xe_pt_migrate_pt_update { bool locked; }; +/* + * This function adds the needed dependencies to a page-table update job + * to make sure racing jobs for separate bind engines don't race writing + * to the same page-table range, wreaking havoc. Initially use a single + * fence for the entire VM. An optimization would use smaller granularity. + */ +static int xe_pt_vm_dependencies(struct xe_sched_job *job, + struct xe_range_fence_tree *rftree, + u64 start, u64 last) +{ + struct xe_range_fence *rtfence; + struct dma_fence *fence; + int err; + + rtfence = xe_range_fence_tree_first(rftree, start, last); + while (rtfence) { + fence = rtfence->fence; + + if (!dma_fence_is_signaled(fence)) { + /* + * Is this a CPU update? GPU is busy updating, so return + * an error + */ + if (!job) + return -ETIME; + + dma_fence_get(fence); + err = drm_sched_job_add_dependency(&job->drm, fence); + if (err) + return err; + } + + rtfence = xe_range_fence_tree_next(rtfence, start, last); + } + + return 0; +} + +static int xe_pt_pre_commit(struct xe_migrate_pt_update *pt_update) +{ + struct xe_range_fence_tree *rftree = + &xe_vma_vm(pt_update->vma)->rftree[pt_update->tile_id]; + + return xe_pt_vm_dependencies(pt_update->job, rftree, + pt_update->start, pt_update->last); +} + static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) { struct xe_pt_migrate_pt_update *userptr_update = @@ -1123,6 +1170,13 @@ static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) struct xe_vma *vma = pt_update->vma; unsigned long notifier_seq = vma->userptr.notifier_seq; struct xe_vm *vm = xe_vma_vm(vma); + int err = xe_pt_vm_dependencies(pt_update->job, + &vm->rftree[pt_update->tile_id], + pt_update->start, + pt_update->last); + + if (err) + return err; userptr_update->locked = false; @@ -1161,6 +1215,7 @@ static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) static const struct xe_migrate_pt_update_ops bind_ops = { .populate = xe_vm_populate_pgtable, + .pre_commit = xe_pt_pre_commit, }; static const struct xe_migrate_pt_update_ops userptr_bind_ops = { @@ -1258,6 +1313,27 @@ static int invalidation_fence_init(struct xe_gt *gt, return ret && ret != -ENOENT ? ret : 0; } +static void xe_pt_calc_rfence_interval(struct xe_vma *vma, + struct xe_pt_migrate_pt_update *update, + struct xe_vm_pgtable_update *entries, + u32 num_entries) +{ + int i, level = 0; + + for (i = 0; i < num_entries; i++) { + const struct xe_vm_pgtable_update *entry = &entries[i]; + + if (entry->pt->level > level) + level = entry->pt->level; + } + + /* Greedy (non-optimal) calculation but simple */ + update->base.start = ALIGN_DOWN(xe_vma_start(vma), + 0x1ull << xe_pt_shift(level)); + update->base.last = ALIGN(xe_vma_end(vma), + 0x1ull << xe_pt_shift(level)) - 1; +} + /** * __xe_pt_bind_vma() - Build and connect a page-table tree for the vma * address range. @@ -1290,6 +1366,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e, .base = { .ops = xe_vma_is_userptr(vma) ? &userptr_bind_ops : &bind_ops, .vma = vma, + .tile_id = tile->id, }, .bind = true, }; @@ -1297,6 +1374,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e, u32 num_entries; struct dma_fence *fence; struct invalidation_fence *ifence = NULL; + struct xe_range_fence *rfence; int err; bind_pt_update.locked = false; @@ -1313,6 +1391,8 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e, XE_BUG_ON(num_entries > ARRAY_SIZE(entries)); xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries); + xe_pt_calc_rfence_interval(vma, &bind_pt_update, entries, + num_entries); /* * If rebind, we have to invalidate TLB on !LR vms to invalidate @@ -1333,6 +1413,12 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e, return ERR_PTR(-ENOMEM); } + rfence = kzalloc(sizeof(*rfence), GFP_KERNEL); + if (!rfence) { + kfree(ifence); + return ERR_PTR(-ENOMEM); + } + fence = xe_migrate_update_pgtables(tile->migrate, vm, xe_vma_bo(vma), e ? e : vm->eng[tile->id], @@ -1342,6 +1428,14 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e, if (!IS_ERR(fence)) { bool last_munmap_rebind = vma->gpuva.flags & XE_VMA_LAST_REBIND; LLIST_HEAD(deferred); + int err; + + err = xe_range_fence_insert(&vm->rftree[tile->id], rfence, + &xe_range_fence_kfree_ops, + bind_pt_update.base.start, + bind_pt_update.base.last, fence); + if (err) + dma_fence_wait(fence, false); /* TLB invalidation must be done before signaling rebind */ if (ifence) { @@ -1380,6 +1474,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e, queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work); } else { + kfree(rfence); kfree(ifence); if (bind_pt_update.locked) up_read(&vm->userptr.notifier_lock); @@ -1589,6 +1684,7 @@ xe_pt_commit_unbind(struct xe_vma *vma, static const struct xe_migrate_pt_update_ops unbind_ops = { .populate = xe_migrate_clear_pgtable_callback, + .pre_commit = xe_pt_pre_commit, }; static const struct xe_migrate_pt_update_ops userptr_unbind_ops = { @@ -1626,12 +1722,15 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e .ops = xe_vma_is_userptr(vma) ? &userptr_unbind_ops : &unbind_ops, .vma = vma, + .tile_id = tile->id, }, }; struct xe_vm *vm = xe_vma_vm(vma); u32 num_entries; struct dma_fence *fence = NULL; struct invalidation_fence *ifence; + struct xe_range_fence *rfence; + LLIST_HEAD(deferred); xe_bo_assert_held(xe_vma_bo(vma)); @@ -1645,11 +1744,19 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e XE_BUG_ON(num_entries > ARRAY_SIZE(entries)); xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries); + xe_pt_calc_rfence_interval(vma, &unbind_pt_update, entries, + num_entries); ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); if (!ifence) return ERR_PTR(-ENOMEM); + rfence = kzalloc(sizeof(*rfence), GFP_KERNEL); + if (!rfence) { + kfree(ifence); + return ERR_PTR(-ENOMEM); + } + /* * Even if we were already evicted and unbind to destroy, we need to * clear again here. The eviction may have updated pagetables at a @@ -1664,6 +1771,13 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e if (!IS_ERR(fence)) { int err; + err = xe_range_fence_insert(&vm->rftree[tile->id], rfence, + &xe_range_fence_kfree_ops, + unbind_pt_update.base.start, + unbind_pt_update.base.last, fence); + if (err) + dma_fence_wait(fence, false); + /* TLB invalidation must be done before signaling unbind */ err = invalidation_fence_init(tile->primary_gt, ifence, fence, vma); if (err) { @@ -1685,6 +1799,7 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e unbind_pt_update.locked ? &deferred : NULL); vma->tile_present &= ~BIT(tile->id); } else { + kfree(rfence); kfree(ifence); } diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 668774081be7..8f1ad906d4b0 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1243,6 +1243,9 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) INIT_LIST_HEAD(&vm->preempt.engines); vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */ + for_each_tile(tile, xe, id) + xe_range_fence_tree_init(&vm->rftree[id]); + INIT_LIST_HEAD(&vm->extobj.list); if (!(flags & XE_VM_FLAG_MIGRATION)) @@ -1356,6 +1359,8 @@ err_close: return ERR_PTR(err); err_no_resv: + for_each_tile(tile, xe, id) + xe_range_fence_tree_fini(&vm->rftree[id]); kfree(vm); if (!(flags & XE_VM_FLAG_MIGRATION)) xe_device_mem_access_put(xe); @@ -1500,6 +1505,9 @@ void xe_vm_close_and_put(struct xe_vm *vm) xe->usm.num_vm_in_non_fault_mode--; mutex_unlock(&xe->usm.lock); + for_each_tile(tile, xe, id) + xe_range_fence_tree_fini(&vm->rftree[id]); + xe_vm_put(vm); } diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 1e3c7b98d775..f571571d966a 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -15,6 +15,7 @@ #include "xe_device_types.h" #include "xe_pt_types.h" +#include "xe_range_fence.h" struct async_op_fence; struct xe_bo; @@ -175,6 +176,12 @@ struct xe_vm { */ struct work_struct destroy_work; + /** + * @rftree: range fence tree to track updates to page table structure. + * Used to implement conflict tracking between independent bind engines. + */ + struct xe_range_fence_tree rftree[XE_MAX_TILES_PER_DEVICE]; + /** @extobj: bookkeeping for external objects. Protected by the vm lock */ struct { /** @enties: number of external BOs attached this VM */ -- cgit v1.2.3 From 3188c0f4c893ce1b232cdf8a3e26ff6139079908 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 19 Jul 2023 14:31:21 -0700 Subject: drm/xe: Remove xe_vma_op_unmap xe_vma_op_unmap isn't used, remove it. Reviewed-by: Rodrigo Vivi Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 5 ----- drivers/gpu/drm/xe/xe_vm_types.h | 10 ---------- 2 files changed, 15 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 8f1ad906d4b0..c2e8d231b9eb 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2520,11 +2520,6 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_engine *e, xe_vma_size(gpuva_to_vma(op->base.remap.unmap->va)); break; case DRM_GPUVA_OP_UNMAP: - op->unmap.start = - xe_vma_start(gpuva_to_vma(op->base.unmap.va)); - op->unmap.range = - xe_vma_size(gpuva_to_vma(op->base.unmap.va)); - break; case DRM_GPUVA_OP_PREFETCH: /* Nothing to do */ break; diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index f571571d966a..21dd0433a6a0 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -326,14 +326,6 @@ struct xe_vma_op_map { bool is_null; }; -/** struct xe_vma_op_unmap - VMA unmap operation */ -struct xe_vma_op_unmap { - /** @start: start of the VMA unmap */ - u64 start; - /** @range: range of the VMA unmap */ - u64 range; -}; - /** struct xe_vma_op_remap - VMA remap operation */ struct xe_vma_op_remap { /** @prev: VMA preceding part of a split mapping */ @@ -401,8 +393,6 @@ struct xe_vma_op { union { /** @map: VMA map operation specific data */ struct xe_vma_op_map map; - /** @unmap: VMA unmap operation specific data */ - struct xe_vma_op_unmap unmap; /** @remap: VMA remap operation specific data */ struct xe_vma_op_remap remap; /** @prefetch: VMA prefetch operation specific data */ -- cgit v1.2.3 From 8f33b4f054fc29a4774d8d10116ef460faeb84a8 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 19 Jul 2023 14:46:01 -0700 Subject: drm/xe: Avoid doing rebinds If we dont change page sizes we can avoid doing rebinds rather just do a partial unbind. The algorithm to determine its page size is greedy as we assume all pages in the removed VMA are the largest page used in the VMA. v2: Don't exceed 100 lines v3: struct xe_vma_op_unmap remove in different patch, remove XXX comment Reviewed-by: Rodrigo Vivi Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pt.c | 1 + drivers/gpu/drm/xe/xe_vm.c | 72 ++++++++++++++++++++++++++++++++++------ drivers/gpu/drm/xe/xe_vm_types.h | 7 ++++ 3 files changed, 70 insertions(+), 10 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 0ca7853fc70a..bc7fa5a850e2 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -668,6 +668,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, if (!is_null) xe_res_next(curs, next - addr); xe_walk->va_curs_start = next; + xe_walk->vma->gpuva.flags |= (XE_VMA_PTE_4K << level); *action = ACTION_CONTINUE; return ret; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index c2e8d231b9eb..bdff1999058f 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2396,6 +2396,16 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, return vma; } +static u64 xe_vma_max_pte_size(struct xe_vma *vma) +{ + if (vma->gpuva.flags & XE_VMA_PTE_1G) + return SZ_1G; + else if (vma->gpuva.flags & XE_VMA_PTE_2M) + return SZ_2M; + + return SZ_4K; +} + /* * Parse operations list and create any resources needed for the operations * prior to fully committing to the operations. This setup can fail. @@ -2472,6 +2482,13 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_engine *e, break; } case DRM_GPUVA_OP_REMAP: + { + struct xe_vma *old = + gpuva_to_vma(op->base.remap.unmap->va); + + op->remap.start = xe_vma_start(old); + op->remap.range = xe_vma_size(old); + if (op->base.remap.prev) { struct xe_vma *vma; bool read_only = @@ -2490,6 +2507,20 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_engine *e, } op->remap.prev = vma; + + /* + * Userptr creates a new SG mapping so + * we must also rebind. + */ + op->remap.skip_prev = !xe_vma_is_userptr(old) && + IS_ALIGNED(xe_vma_end(vma), + xe_vma_max_pte_size(old)); + if (op->remap.skip_prev) { + op->remap.range -= + xe_vma_end(vma) - + xe_vma_start(old); + op->remap.start = xe_vma_end(vma); + } } if (op->base.remap.next) { @@ -2511,14 +2542,21 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_engine *e, } op->remap.next = vma; - } - /* XXX: Support no doing remaps */ - op->remap.start = - xe_vma_start(gpuva_to_vma(op->base.remap.unmap->va)); - op->remap.range = - xe_vma_size(gpuva_to_vma(op->base.remap.unmap->va)); + /* + * Userptr creates a new SG mapping so + * we must also rebind. + */ + op->remap.skip_next = !xe_vma_is_userptr(old) && + IS_ALIGNED(xe_vma_start(vma), + xe_vma_max_pte_size(old)); + if (op->remap.skip_next) + op->remap.range -= + xe_vma_end(old) - + xe_vma_start(vma); + } break; + } case DRM_GPUVA_OP_UNMAP: case DRM_GPUVA_OP_PREFETCH: /* Nothing to do */ @@ -2561,10 +2599,23 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) case DRM_GPUVA_OP_REMAP: prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va), true); - if (op->remap.prev) + + if (op->remap.prev) { err |= xe_vm_insert_vma(vm, op->remap.prev); - if (op->remap.next) + if (!err && op->remap.skip_prev) + op->remap.prev = NULL; + } + if (op->remap.next) { err |= xe_vm_insert_vma(vm, op->remap.next); + if (!err && op->remap.skip_next) + op->remap.next = NULL; + } + + /* Adjust for partial unbind after removin VMA from VM */ + if (!err) { + op->base.remap.unmap->va->va.addr = op->remap.start; + op->base.remap.unmap->va->va.range = op->remap.range; + } break; case DRM_GPUVA_OP_UNMAP: prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true); @@ -2634,9 +2685,10 @@ again: bool next = !!op->remap.next; if (!op->remap.unmap_done) { - vm->async_ops.munmap_rebind_inflight = true; - if (prev || next) + if (prev || next) { + vm->async_ops.munmap_rebind_inflight = true; vma->gpuva.flags |= XE_VMA_FIRST_REBIND; + } err = xe_vm_unbind(vm, vma, op->engine, op->syncs, op->num_syncs, !prev && !next ? op->fence : NULL, diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 21dd0433a6a0..a7386e8c2870 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -30,6 +30,9 @@ struct xe_vm; #define XE_VMA_ATOMIC_PTE_BIT (DRM_GPUVA_USERBITS << 2) #define XE_VMA_FIRST_REBIND (DRM_GPUVA_USERBITS << 3) #define XE_VMA_LAST_REBIND (DRM_GPUVA_USERBITS << 4) +#define XE_VMA_PTE_4K (DRM_GPUVA_USERBITS << 5) +#define XE_VMA_PTE_2M (DRM_GPUVA_USERBITS << 6) +#define XE_VMA_PTE_1G (DRM_GPUVA_USERBITS << 7) struct xe_vma { /** @gpuva: Base GPUVA object */ @@ -336,6 +339,10 @@ struct xe_vma_op_remap { u64 start; /** @range: range of the VMA unmap */ u64 range; + /** @skip_prev: skip prev rebind */ + bool skip_prev; + /** @skip_next: skip next rebind */ + bool skip_next; /** @unmap_done: unmap operation in done */ bool unmap_done; }; -- cgit v1.2.3 From 1655c893af08997175e3404039e79f384c925ee3 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 19 Jul 2023 20:44:25 -0700 Subject: drm/xe: Reduce the number list links in xe_vma Combine the userptr, rebind, and destroy links into a union as the lists these links belong to are mutually exclusive. v2: Adjust which lists are combined (Thomas H) v3: Add kernel doc why this is safe (Thomas H), remove related change of list_del_init -> list_del (Rodrigo) Reviewed-by: Rodrigo Vivi Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 6 ++++-- drivers/gpu/drm/xe/xe_exec.c | 2 +- drivers/gpu/drm/xe/xe_pt.c | 2 +- drivers/gpu/drm/xe/xe_vm.c | 43 ++++++++++++++++++++-------------------- drivers/gpu/drm/xe/xe_vm_types.h | 37 ++++++++++++++++++++-------------- 5 files changed, 49 insertions(+), 41 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 49c80e95222b..a78ac158e967 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -476,8 +476,10 @@ static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo, } xe_vm_assert_held(vm); - if (list_empty(&vma->rebind_link) && vma->tile_present) - list_add_tail(&vma->rebind_link, &vm->rebind_list); + if (list_empty(&vma->combined_links.rebind) && + vma->tile_present) + list_add_tail(&vma->combined_links.rebind, + &vm->rebind_list); if (vm_resv_locked) dma_resv_unlock(xe_vm_resv(vm)); diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 0209f325dda0..89d167a432f6 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -120,7 +120,7 @@ retry: * BOs have valid placements possibly moving an evicted BO back * to a location where the GPU can access it). */ - list_for_each_entry(vma, &vm->rebind_list, rebind_link) { + list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) { XE_WARN_ON(xe_vma_is_null(vma)); if (xe_vma_is_userptr(vma)) diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index bc7fa5a850e2..b67144768af0 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1805,7 +1805,7 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e } if (!vma->tile_present) - list_del_init(&vma->rebind_link); + list_del_init(&vma->combined_links.rebind); if (unbind_pt_update.locked) { XE_WARN_ON(!xe_vma_is_userptr(vma)); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index bdff1999058f..29a950a02a36 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -467,7 +467,8 @@ int xe_vm_lock_dma_resv(struct xe_vm *vm, struct ww_acquire_ctx *ww, list_del_init(&vma->notifier.rebind_link); if (vma->tile_present && !(vma->gpuva.flags & XE_VMA_DESTROYED)) - list_move_tail(&vma->rebind_link, &vm->rebind_list); + list_move_tail(&vma->combined_links.rebind, + &vm->rebind_list); } spin_unlock(&vm->notifier.list_lock); @@ -608,7 +609,7 @@ retry: if (err) goto out_unlock; - list_for_each_entry(vma, &vm->rebind_list, rebind_link) { + list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) { if (xe_vma_has_no_bo(vma) || vma->gpuva.flags & XE_VMA_DESTROYED) continue; @@ -780,17 +781,20 @@ int xe_vm_userptr_pin(struct xe_vm *vm) list_for_each_entry_safe(vma, next, &vm->userptr.invalidated, userptr.invalidate_link) { list_del_init(&vma->userptr.invalidate_link); - list_move_tail(&vma->userptr_link, &vm->userptr.repin_list); + if (list_empty(&vma->combined_links.userptr)) + list_move_tail(&vma->combined_links.userptr, + &vm->userptr.repin_list); } spin_unlock(&vm->userptr.invalidated_lock); /* Pin and move to temporary list */ - list_for_each_entry_safe(vma, next, &vm->userptr.repin_list, userptr_link) { + list_for_each_entry_safe(vma, next, &vm->userptr.repin_list, + combined_links.userptr) { err = xe_vma_userptr_pin_pages(vma); if (err < 0) goto out_err; - list_move_tail(&vma->userptr_link, &tmp_evict); + list_move_tail(&vma->combined_links.userptr, &tmp_evict); } /* Take lock and move to rebind_list for rebinding. */ @@ -798,10 +802,8 @@ int xe_vm_userptr_pin(struct xe_vm *vm) if (err) goto out_err; - list_for_each_entry_safe(vma, next, &tmp_evict, userptr_link) { - list_del_init(&vma->userptr_link); - list_move_tail(&vma->rebind_link, &vm->rebind_list); - } + list_for_each_entry_safe(vma, next, &tmp_evict, combined_links.userptr) + list_move_tail(&vma->combined_links.rebind, &vm->rebind_list); dma_resv_unlock(xe_vm_resv(vm)); @@ -845,10 +847,11 @@ struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) return NULL; xe_vm_assert_held(vm); - list_for_each_entry_safe(vma, next, &vm->rebind_list, rebind_link) { + list_for_each_entry_safe(vma, next, &vm->rebind_list, + combined_links.rebind) { XE_WARN_ON(!vma->tile_present); - list_del_init(&vma->rebind_link); + list_del_init(&vma->combined_links.rebind); dma_fence_put(fence); if (rebind_worker) trace_xe_vma_rebind_worker(vma); @@ -883,9 +886,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, return vma; } - INIT_LIST_HEAD(&vma->rebind_link); - INIT_LIST_HEAD(&vma->unbind_link); - INIT_LIST_HEAD(&vma->userptr_link); + INIT_LIST_HEAD(&vma->combined_links.rebind); INIT_LIST_HEAD(&vma->userptr.invalidate_link); INIT_LIST_HEAD(&vma->notifier.rebind_link); INIT_LIST_HEAD(&vma->extobj.link); @@ -1070,7 +1071,7 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) struct xe_vm *vm = xe_vma_vm(vma); lockdep_assert_held_write(&vm->lock); - XE_BUG_ON(!list_empty(&vma->unbind_link)); + XE_BUG_ON(!list_empty(&vma->combined_links.destroy)); if (xe_vma_is_userptr(vma)) { XE_WARN_ON(!(vma->gpuva.flags & XE_VMA_DESTROYED)); @@ -1078,7 +1079,6 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) spin_lock(&vm->userptr.invalidated_lock); list_del_init(&vma->userptr.invalidate_link); spin_unlock(&vm->userptr.invalidated_lock); - list_del(&vma->userptr_link); } else if (!xe_vma_is_null(vma)) { xe_bo_assert_held(xe_vma_bo(vma)); @@ -1099,9 +1099,6 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) } xe_vm_assert_held(vm); - if (!list_empty(&vma->rebind_link)) - list_del(&vma->rebind_link); - if (fence) { int ret = dma_fence_add_callback(fence, &vma->destroy_cb, vma_destroy_cb); @@ -1451,11 +1448,12 @@ void xe_vm_close_and_put(struct xe_vm *vm) /* easy case, remove from VMA? */ if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) { + list_del_init(&vma->combined_links.rebind); xe_vma_destroy(vma, NULL); continue; } - list_add_tail(&vma->unbind_link, &contested); + list_move_tail(&vma->combined_links.destroy, &contested); } /* @@ -1487,8 +1485,9 @@ void xe_vm_close_and_put(struct xe_vm *vm) * Since we hold a refcount to the bo, we can remove and free * the members safely without locking. */ - list_for_each_entry_safe(vma, next_vma, &contested, unbind_link) { - list_del_init(&vma->unbind_link); + list_for_each_entry_safe(vma, next_vma, &contested, + combined_links.destroy) { + list_del_init(&vma->combined_links.destroy); xe_vma_destroy_unlocked(vma); } diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index a7386e8c2870..00bf0065d514 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -50,21 +50,28 @@ struct xe_vma { */ u64 tile_present; - /** @userptr_link: link into VM repin list if userptr */ - struct list_head userptr_link; - - /** - * @rebind_link: link into VM if this VMA needs rebinding, and - * if it's a bo (not userptr) needs validation after a possible - * eviction. Protected by the vm's resv lock. - */ - struct list_head rebind_link; - - /** - * @unbind_link: link or list head if an unbind of multiple VMAs, in - * single unbind op, is being done. - */ - struct list_head unbind_link; + /** @combined_links: links into lists which are mutually exclusive */ + union { + /** + * @userptr: link into VM repin list if userptr. Protected by + * vm->lock in write mode. + */ + struct list_head userptr; + /** + * @rebind: link into VM if this VMA needs rebinding, and + * if it's a bo (not userptr) needs validation after a possible + * eviction. Protected by the vm's resv lock and typically + * vm->lock is also held in write mode. The only place where + * vm->lock isn't held is the BO eviction path which has + * mutually exclusive execution with userptr. + */ + struct list_head rebind; + /** + * @destroy: link to contested list when VM is being closed. + * Protected by vm->lock in write mode and vm's resv lock. + */ + struct list_head destroy; + } combined_links; /** @destroy_cb: callback to destroy VMA when unbind job is done */ struct dma_fence_cb destroy_cb; -- cgit v1.2.3 From 3daf694ccf8afb936e3508c98738d52b13941397 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 19 Jul 2023 20:50:24 -0700 Subject: drm/xe: Replace list_del_init with list_del for userptr.invalidate_link cleanup This list isn't used again, list_del is the proper call. Reviewed-by: Rodrigo Vivi Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 29a950a02a36..1ea4f20ac723 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1077,7 +1077,7 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) XE_WARN_ON(!(vma->gpuva.flags & XE_VMA_DESTROYED)); spin_lock(&vm->userptr.invalidated_lock); - list_del_init(&vma->userptr.invalidate_link); + list_del(&vma->userptr.invalidate_link); spin_unlock(&vm->userptr.invalidated_lock); } else if (!xe_vma_is_null(vma)) { xe_bo_assert_held(xe_vma_bo(vma)); -- cgit v1.2.3 From 63412a5a6718771214900aec51fc9253b36efcc5 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 19 Jul 2023 21:00:51 -0700 Subject: drm/xe: Change tile masks from u64 to u8 This will save us a few bytes in the xe_vma structure. v2: Use hweight8 rather than hweight_long (Rodrigo) Reviewed-by: Rodrigo Vivi Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 12 ++++++------ drivers/gpu/drm/xe/xe_vm_types.h | 28 ++++++++++++++-------------- 2 files changed, 20 insertions(+), 20 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 1ea4f20ac723..a01504ecd2af 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -871,7 +871,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, u64 start, u64 end, bool read_only, bool is_null, - u64 tile_mask) + u8 tile_mask) { struct xe_vma *vma; struct xe_tile *tile; @@ -1579,7 +1579,7 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct xe_engine *e, struct dma_fence_array *cf = NULL; struct xe_vm *vm = xe_vma_vm(vma); int cur_fence = 0, i; - int number_tiles = hweight_long(vma->tile_present); + int number_tiles = hweight8(vma->tile_present); int err; u8 id; @@ -1654,7 +1654,7 @@ xe_vm_bind_vma(struct xe_vma *vma, struct xe_engine *e, struct dma_fence_array *cf = NULL; struct xe_vm *vm = xe_vma_vm(vma); int cur_fence = 0, i; - int number_tiles = hweight_long(vma->tile_mask); + int number_tiles = hweight8(vma->tile_mask); int err; u8 id; @@ -2250,7 +2250,7 @@ static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) static struct drm_gpuva_ops * vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, u64 bo_offset_or_userptr, u64 addr, u64 range, - u32 operation, u64 tile_mask, u32 region) + u32 operation, u8 tile_mask, u32 region) { struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL; struct ww_acquire_ctx ww; @@ -2354,7 +2354,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, } static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, - u64 tile_mask, bool read_only, bool is_null) + u8 tile_mask, bool read_only, bool is_null) { struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL; struct xe_vma *vma; @@ -3339,7 +3339,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) u64 addr = bind_ops[i].addr; u32 op = bind_ops[i].op; u64 obj_offset = bind_ops[i].obj_offset; - u64 tile_mask = bind_ops[i].tile_mask; + u8 tile_mask = bind_ops[i].tile_mask; u32 region = bind_ops[i].region; ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset, diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 00bf0065d514..4783a460d671 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -38,18 +38,6 @@ struct xe_vma { /** @gpuva: Base GPUVA object */ struct drm_gpuva gpuva; - /** @tile_mask: Tile mask of where to create binding for this VMA */ - u64 tile_mask; - - /** - * @tile_present: GT mask of binding are present for this VMA. - * protected by vm->lock, vm->resv and for userptrs, - * vm->userptr.notifier_lock for writing. Needs either for reading, - * but if reading is done under the vm->lock only, it needs to be held - * in write mode. - */ - u64 tile_present; - /** @combined_links: links into lists which are mutually exclusive */ union { /** @@ -107,9 +95,21 @@ struct xe_vma { /** @usm: unified shared memory state */ struct { /** @tile_invalidated: VMA has been invalidated */ - u64 tile_invalidated; + u8 tile_invalidated; } usm; + /** @tile_mask: Tile mask of where to create binding for this VMA */ + u8 tile_mask; + + /** + * @tile_present: GT mask of binding are present for this VMA. + * protected by vm->lock, vm->resv and for userptrs, + * vm->userptr.notifier_lock for writing. Needs either for reading, + * but if reading is done under the vm->lock only, it needs to be held + * in write mode. + */ + u8 tile_present; + struct { struct list_head rebind_link; } notifier; @@ -395,7 +395,7 @@ struct xe_vma_op { */ struct async_op_fence *fence; /** @tile_mask: gt mask for this operation */ - u64 tile_mask; + u8 tile_mask; /** @flags: operation flags */ enum xe_vma_op_flags flags; -- cgit v1.2.3 From eae553cbe03a7918f2b5dc9bda0dc35f7a7a308d Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 19 Jul 2023 21:04:01 -0700 Subject: drm/xe: Combine destroy_cb and destroy_work in xe_vma into union The callback kicks the worker thus mutually exclusive execution, combining saves a bit of space in xe_vma. Reviewed-by: Rodrigo Vivi Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm_types.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 4783a460d671..b6657b6feb3c 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -61,11 +61,12 @@ struct xe_vma { struct list_head destroy; } combined_links; - /** @destroy_cb: callback to destroy VMA when unbind job is done */ - struct dma_fence_cb destroy_cb; - - /** @destroy_work: worker to destroy this BO */ - struct work_struct destroy_work; + union { + /** @destroy_cb: callback to destroy VMA when unbind job is done */ + struct dma_fence_cb destroy_cb; + /** @destroy_work: worker to destroy this BO */ + struct work_struct destroy_work; + }; /** @userptr: user pointer state */ struct { -- cgit v1.2.3 From a4cc60a55fd9a6bb8b50375d404f317ac2030941 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 19 Jul 2023 21:05:42 -0700 Subject: drm/xe: Only alloc userptr part of xe_vma for userptrs Only alloc userptr part of xe_vma for userptrs, this will save on space in the common BO case. Reviewed-by: Rodrigo Vivi Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 8 ++++-- drivers/gpu/drm/xe/xe_vm_types.h | 56 ++++++++++++++++++++++------------------ 2 files changed, 37 insertions(+), 27 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index a01504ecd2af..a07dc4f846b1 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -880,14 +880,17 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, XE_BUG_ON(start >= end); XE_BUG_ON(end >= vm->size); - vma = kzalloc(sizeof(*vma), GFP_KERNEL); + if (!bo && !is_null) /* userptr */ + vma = kzalloc(sizeof(*vma), GFP_KERNEL); + else + vma = kzalloc(sizeof(*vma) - sizeof(struct xe_userptr), + GFP_KERNEL); if (!vma) { vma = ERR_PTR(-ENOMEM); return vma; } INIT_LIST_HEAD(&vma->combined_links.rebind); - INIT_LIST_HEAD(&vma->userptr.invalidate_link); INIT_LIST_HEAD(&vma->notifier.rebind_link); INIT_LIST_HEAD(&vma->extobj.link); @@ -931,6 +934,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, u64 size = end - start + 1; int err; + INIT_LIST_HEAD(&vma->userptr.invalidate_link); vma->gpuva.gem.offset = bo_offset_or_userptr; err = mmu_interval_notifier_insert(&vma->userptr.notifier, diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index b6657b6feb3c..f7522f9ca40e 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -34,6 +34,31 @@ struct xe_vm; #define XE_VMA_PTE_2M (DRM_GPUVA_USERBITS << 6) #define XE_VMA_PTE_1G (DRM_GPUVA_USERBITS << 7) +/** struct xe_userptr - User pointer */ +struct xe_userptr { + /** @invalidate_link: Link for the vm::userptr.invalidated list */ + struct list_head invalidate_link; + /** + * @notifier: MMU notifier for user pointer (invalidation call back) + */ + struct mmu_interval_notifier notifier; + /** @sgt: storage for a scatter gather table */ + struct sg_table sgt; + /** @sg: allocated scatter gather table */ + struct sg_table *sg; + /** @notifier_seq: notifier sequence number */ + unsigned long notifier_seq; + /** + * @initial_bind: user pointer has been bound at least once. + * write: vm->userptr.notifier_lock in read mode and vm->resv held. + * read: vm->userptr.notifier_lock in write mode or vm->resv held. + */ + bool initial_bind; +#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) + u32 divisor; +#endif +}; + struct xe_vma { /** @gpuva: Base GPUVA object */ struct drm_gpuva gpuva; @@ -68,31 +93,6 @@ struct xe_vma { struct work_struct destroy_work; }; - /** @userptr: user pointer state */ - struct { - /** @invalidate_link: Link for the vm::userptr.invalidated list */ - struct list_head invalidate_link; - /** - * @notifier: MMU notifier for user pointer (invalidation call back) - */ - struct mmu_interval_notifier notifier; - /** @sgt: storage for a scatter gather table */ - struct sg_table sgt; - /** @sg: allocated scatter gather table */ - struct sg_table *sg; - /** @notifier_seq: notifier sequence number */ - unsigned long notifier_seq; - /** - * @initial_bind: user pointer has been bound at least once. - * write: vm->userptr.notifier_lock in read mode and vm->resv held. - * read: vm->userptr.notifier_lock in write mode or vm->resv held. - */ - bool initial_bind; -#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) - u32 divisor; -#endif - } userptr; - /** @usm: unified shared memory state */ struct { /** @tile_invalidated: VMA has been invalidated */ @@ -122,6 +122,12 @@ struct xe_vma { */ struct list_head link; } extobj; + + /** + * @userptr: user pointer state, only allocated for VMAs that are + * user pointers + */ + struct xe_userptr userptr; }; struct xe_device; -- cgit v1.2.3 From 7ead33156483f5e7a699002f2480757aaa34ab08 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 21 Jul 2023 12:16:13 -0700 Subject: drm/xe: Use migrate engine for page fault binds We must use migrate engine for page fault binds in order to avoid a deadlock as the migrate engine has a reserved BCS instance which cannot be stuck on a fault. To use the migrate engine the engine argument to xe_migrate_update_pgtables must be NULL, this was incorrectly wired up so vm->eng[tile_id] was always being used. Fix this. Reviewed-by: Rodrigo Vivi Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pt.c | 3 +-- drivers/gpu/drm/xe/xe_vm.c | 3 ++- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index b67144768af0..d2df51910010 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1421,8 +1421,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e, } fence = xe_migrate_update_pgtables(tile->migrate, - vm, xe_vma_bo(vma), - e ? e : vm->eng[tile->id], + vm, xe_vma_bo(vma), e, entries, num_entries, syncs, num_syncs, &bind_pt_update.base); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index a07dc4f846b1..f3f6f01046a9 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1675,7 +1675,8 @@ xe_vm_bind_vma(struct xe_vma *vma, struct xe_engine *e, if (!(vma->tile_mask & BIT(id))) goto next; - fence = __xe_pt_bind_vma(tile, vma, e, first_op ? syncs : NULL, + fence = __xe_pt_bind_vma(tile, vma, e ? e : vm->eng[id], + first_op ? syncs : NULL, first_op ? num_syncs : 0, vma->tile_present & BIT(id)); if (IS_ERR(fence)) { -- cgit v1.2.3 From 9700a1df0a5568a3eb8483de103d4078e273b36b Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 24 Jul 2023 11:47:44 +0100 Subject: drm/xe: add lockdep annotation for xe_device_mem_access_put() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The main motivation is with d3cold which will make the suspend and resume callbacks even more scary, but is useful regardless. We already have the needed annotation on the acquire side with xe_device_mem_access_get(), and by adding the annotation on the release side we should have a lot more confidence that our locking hierarchy is correct. v2: - Move the annotation into both callbacks for better symmetry. Also don't hold over the entire mem_access_get(); we only need to lockep to understand what is being held upon entering mem_access_get(), and how that matches up with locks in the callbacks. Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Anshuman Gupta Cc: Rodrigo Vivi Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 4 ++-- drivers/gpu/drm/xe/xe_device.h | 4 ++++ drivers/gpu/drm/xe/xe_pm.c | 27 +++++++++++++++++++++++++++ 3 files changed, 33 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index b1f36c986f0d..f948a358f53e 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -35,7 +35,7 @@ #include "xe_wait_user_fence.h" #ifdef CONFIG_LOCKDEP -static struct lockdep_map xe_device_mem_access_lockdep_map = { +struct lockdep_map xe_device_mem_access_lockdep_map = { .name = "xe_device_mem_access_lockdep_map" }; #endif @@ -431,13 +431,13 @@ void xe_device_mem_access_get(struct xe_device *xe) * runtime_resume callback, lockdep should give us a nice splat. */ lock_map_acquire(&xe_device_mem_access_lockdep_map); + lock_map_release(&xe_device_mem_access_lockdep_map); xe_pm_runtime_get(xe); ref = atomic_inc_return(&xe->mem_access.ref); XE_WARN_ON(ref == S32_MAX); - lock_map_release(&xe_device_mem_access_lockdep_map); } void xe_device_mem_access_put(struct xe_device *xe) diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index 8b085ffdc5f8..593accb68281 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -16,6 +16,10 @@ struct xe_file; #include "xe_force_wake.h" #include "xe_macros.h" +#ifdef CONFIG_LOCKDEP +extern struct lockdep_map xe_device_mem_access_lockdep_map; +#endif + static inline struct xe_device *to_xe_device(const struct drm_device *dev) { return container_of(dev, struct xe_device, drm); diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index 04b995aa848f..cb2a00ea28e3 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -188,6 +188,29 @@ int xe_pm_runtime_suspend(struct xe_device *xe) /* Disable access_ongoing asserts and prevent recursive pm calls */ xe_pm_write_callback_task(xe, current); + /* + * The actual xe_device_mem_access_put() is always async underneath, so + * exactly where that is called should makes no difference to us. However + * we still need to be very careful with the locks that this callback + * acquires and the locks that are acquired and held by any callers of + * xe_device_mem_access_get(). We already have the matching annotation + * on that side, but we also need it here. For example lockdep should be + * able to tell us if the following scenario is in theory possible: + * + * CPU0 | CPU1 (kworker) + * lock(A) | + * | xe_pm_runtime_suspend() + * | lock(A) + * xe_device_mem_access_get() | + * + * This will clearly deadlock since rpm core needs to wait for + * xe_pm_runtime_suspend() to complete, but here we are holding lock(A) + * on CPU0 which prevents CPU1 making forward progress. With the + * annotation here and in xe_device_mem_access_get() lockdep will see + * the potential lock inversion and give us a nice splat. + */ + lock_map_acquire(&xe_device_mem_access_lockdep_map); + if (xe->d3cold.allowed) { err = xe_bo_evict_all(xe); if (err) @@ -202,6 +225,7 @@ int xe_pm_runtime_suspend(struct xe_device *xe) xe_irq_suspend(xe); out: + lock_map_release(&xe_device_mem_access_lockdep_map); xe_pm_write_callback_task(xe, NULL); return err; } @@ -215,6 +239,8 @@ int xe_pm_runtime_resume(struct xe_device *xe) /* Disable access_ongoing asserts and prevent recursive pm calls */ xe_pm_write_callback_task(xe, current); + lock_map_acquire(&xe_device_mem_access_lockdep_map); + /* * It can be possible that xe has allowed d3cold but other pcie devices * in gfx card soc would have blocked d3cold, therefore card has not @@ -250,6 +276,7 @@ int xe_pm_runtime_resume(struct xe_device *xe) goto out; } out: + lock_map_release(&xe_device_mem_access_lockdep_map); xe_pm_write_callback_task(xe, NULL); return err; } -- cgit v1.2.3 From 7a060d786cc1d75ffa04256826d805686b8f1043 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 24 Jul 2023 17:34:34 -0700 Subject: drm/xe/mtl: Map PPGTT as CPU:WC On MTL and beyond, the GPU performs non-coherent accesses to the PPGTT page tables. These page tables should be mapped as CPU:WC. Removes CAT errors triggered by xe_exec_basic@once-basic on MTL: xe 0000:00:02.0: [drm:__xe_pt_bind_vma [xe]] Preparing bind, with range [1a0000...1a0fff) engine 0000000000000000. xe 0000:00:02.0: [drm:xe_vm_dbg_print_entries [xe]] 1 entries to update xe 0000:00:02.0: [drm:xe_vm_dbg_print_entries [xe]] 0: Update level 3 at (0 + 1) [0...8000000000) f:0 xe 0000:00:02.0: [drm] Engine memory cat error: guc_id=2 xe 0000:00:02.0: [drm] Engine memory cat error: guc_id=2 xe 0000:00:02.0: [drm] Timedout job: seqno=4294967169, guc_id=2, flags=0x4 v2: - Rename to XE_BO_PAGETABLE to make it more clear that this BO is the pagetable itself, rather than just being bound in the PPGTT. (Lucas) Cc: Lucas De Marchi Reviewed-by: Lucas De Marchi Acked-by: Nirmoy Das Link: https://lore.kernel.org/r/20230725003433.1992137-3-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 16 ++++++++++++---- drivers/gpu/drm/xe/xe_bo.h | 1 + drivers/gpu/drm/xe/xe_pt.c | 3 ++- 3 files changed, 15 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index a78ac158e967..4b7678db88f3 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -301,6 +301,7 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo, struct xe_device *xe = xe_bo_device(bo); struct xe_ttm_tt *tt; unsigned long extra_pages; + enum ttm_caching caching = ttm_cached; int err; tt = kzalloc(sizeof(*tt), GFP_KERNEL); @@ -314,10 +315,17 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo, extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, bo->size), PAGE_SIZE); - /* TODO: Select caching mode */ - err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags, - bo->flags & XE_BO_SCANOUT_BIT ? ttm_write_combined : ttm_cached, - extra_pages); + /* + * Display scanout is always non-coherent with the CPU cache. + * + * For Xe_LPG and beyond, PPGTT PTE lookups are also non-coherent and + * require a CPU:WC mapping. + */ + if (bo->flags & XE_BO_SCANOUT_BIT || + (xe->info.graphics_verx100 >= 1270 && bo->flags & XE_BO_PAGETABLE)) + caching = ttm_write_combined; + + err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags, caching, extra_pages); if (err) { kfree(tt); return NULL; diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 3e98f3c0b85e..12a291925fa9 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -40,6 +40,7 @@ #define XE_BO_DEFER_BACKING BIT(9) #define XE_BO_SCANOUT_BIT BIT(10) #define XE_BO_FIXED_PLACEMENT_BIT BIT(11) +#define XE_BO_PAGETABLE BIT(12) /* this one is trigger internally only */ #define XE_BO_INTERNAL_TEST BIT(30) #define XE_BO_INTERNAL_64K BIT(31) diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index d2df51910010..48a87b50a040 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -221,7 +221,8 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, XE_BO_CREATE_VRAM_IF_DGFX(tile) | XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT | XE_BO_CREATE_PINNED_BIT | - XE_BO_CREATE_NO_RESV_EVICT); + XE_BO_CREATE_NO_RESV_EVICT | + XE_BO_PAGETABLE); if (IS_ERR(bo)) { err = PTR_ERR(bo); goto err_kfree; -- cgit v1.2.3 From 2a6d871bd97722e899780a8e429b0fb5f11dadc6 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 24 Jul 2023 17:34:35 -0700 Subject: drm/xe: xe_engine_create_ioctl should check gt_count, not tile_count Platforms like MTL only have a single tile, but multiple GTs. Ensure XE_ENGINE_CREATE accepts engine creation on gt1 on such platforms. Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230725003433.1992137-4-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_engine.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c index c31e55c10a33..71f778785226 100644 --- a/drivers/gpu/drm/xe/xe_engine.c +++ b/drivers/gpu/drm/xe/xe_engine.c @@ -416,7 +416,7 @@ find_hw_engine(struct xe_device *xe, if (eci.engine_class > ARRAY_SIZE(user_to_xe_engine_class)) return NULL; - if (eci.gt_id >= xe->info.tile_count) + if (eci.gt_id >= xe->info.gt_count) return NULL; idx = array_index_nospec(eci.engine_class, @@ -539,7 +539,7 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, err)) return -EFAULT; - if (XE_IOCTL_DBG(xe, eci[0].gt_id >= xe->info.tile_count)) + if (XE_IOCTL_DBG(xe, eci[0].gt_id >= xe->info.gt_count)) return -EINVAL; if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) { -- cgit v1.2.3 From 6a024f1bfdfe3b535786780f67c38429df17e857 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 31 Mar 2023 09:46:26 +0100 Subject: drm/xe/bo: support tiered vram allocation for small-bar MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the new flag XE_BO_NEEDS_CPU_ACCESS, to force allocating in the mappable part of vram. If no flag is specified we do a topdown allocation, to limit the chances of stealing the precious mappable part, if we don't need it. If this is a full-bar system, then this all gets nooped. For kernel users, it looks like xe_bo_create_pin_map() is the central place which users should call if they want CPU access to the object, so add the flag there. We still need to plumb this through for userspace allocations. Also it looks like page-tables are using pin_map(), which is less than ideal. If we can already use the GPU to do page-table management, then maybe we should just force that for small-bar. Signed-off-by: Matthew Auld Cc: Gwan-gyeong Mun Cc: Thomas Hellström Cc: Lucas De Marchi Reviewed-by: Maarten Lankhorst Reviewed-by: Gwan-gyeong Mun Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_migrate.c | 3 ++- drivers/gpu/drm/xe/xe_bo.c | 48 ++++++++++++++++++++++++----------- drivers/gpu/drm/xe/xe_bo.h | 1 + drivers/gpu/drm/xe/xe_ttm_vram_mgr.c | 4 +++ 4 files changed, 40 insertions(+), 16 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index 30e5fdf6ca63..c332dc54cb70 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -111,7 +111,8 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, struct xe_bo *sysmem = xe_bo_create_locked(xe, m->tile, NULL, bo->size, ttm_bo_type_kernel, - XE_BO_CREATE_SYSTEM_BIT); + XE_BO_CREATE_SYSTEM_BIT | + XE_BO_NEEDS_CPU_ACCESS); if (IS_ERR(sysmem)) { KUNIT_FAIL(test, "Failed to allocate sysmem bo for %s: %li\n", str, PTR_ERR(sysmem)); diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 4b7678db88f3..fa3fc825b730 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -124,19 +124,28 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo, struct ttm_place *places, u32 bo_flags, u32 mem_type, u32 *c) { struct xe_tile *tile = mem_type_to_tile(xe, mem_type); + struct ttm_place place = { .mem_type = mem_type }; + u64 io_size = tile->mem.vram.io_size; XE_BUG_ON(!tile->mem.vram.usable_size); - places[*c] = (struct ttm_place) { - .mem_type = mem_type, - /* - * For eviction / restore on suspend / resume objects - * pinned in VRAM must be contiguous - */ - .flags = bo_flags & (XE_BO_CREATE_PINNED_BIT | - XE_BO_CREATE_GGTT_BIT) ? - TTM_PL_FLAG_CONTIGUOUS : 0, - }; + /* + * For eviction / restore on suspend / resume objects + * pinned in VRAM must be contiguous + */ + if (bo_flags & (XE_BO_CREATE_PINNED_BIT | + XE_BO_CREATE_GGTT_BIT)) + place.flags |= TTM_PL_FLAG_CONTIGUOUS; + + if (io_size < tile->mem.vram.usable_size) { + if (bo_flags & XE_BO_NEEDS_CPU_ACCESS) { + place.fpfn = 0; + place.lpfn = io_size >> PAGE_SHIFT; + } else { + place.flags |= TTM_PL_FLAG_TOPDOWN; + } + } + places[*c] = place; *c += 1; if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID) @@ -385,15 +394,20 @@ static int xe_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resource *mem) { struct xe_device *xe = ttm_to_xe_device(bdev); - struct xe_tile *tile; switch (mem->mem_type) { case XE_PL_SYSTEM: case XE_PL_TT: return 0; case XE_PL_VRAM0: - case XE_PL_VRAM1: - tile = mem_type_to_tile(xe, mem->mem_type); + case XE_PL_VRAM1: { + struct xe_tile *tile = mem_type_to_tile(xe, mem->mem_type); + struct xe_ttm_vram_mgr_resource *vres = + to_xe_ttm_vram_mgr_resource(mem); + + if (vres->used_visible_size < mem->size) + return -EINVAL; + mem->bus.offset = mem->start << PAGE_SHIFT; if (tile->mem.vram.mapping && @@ -408,7 +422,7 @@ static int xe_ttm_io_mem_reserve(struct ttm_device *bdev, mem->bus.caching = ttm_write_combined; #endif return 0; - case XE_PL_STOLEN: + } case XE_PL_STOLEN: return xe_ttm_stolen_io_mem_reserve(xe, mem); default: return -EINVAL; @@ -1376,7 +1390,8 @@ struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile xe_ttm_stolen_cpu_access_needs_ggtt(xe)) flags |= XE_BO_CREATE_GGTT_BIT; - bo = xe_bo_create_locked_range(xe, tile, vm, size, start, end, type, flags); + bo = xe_bo_create_locked_range(xe, tile, vm, size, start, end, type, + flags | XE_BO_NEEDS_CPU_ACCESS); if (IS_ERR(bo)) return bo; @@ -1685,6 +1700,9 @@ int xe_bo_vmap(struct xe_bo *bo) xe_bo_assert_held(bo); + if (!(bo->flags & XE_BO_NEEDS_CPU_ACCESS)) + return -EINVAL; + if (!iosys_map_is_null(&bo->vmap)) return 0; diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 12a291925fa9..e231b2829bef 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -41,6 +41,7 @@ #define XE_BO_SCANOUT_BIT BIT(10) #define XE_BO_FIXED_PLACEMENT_BIT BIT(11) #define XE_BO_PAGETABLE BIT(12) +#define XE_BO_NEEDS_CPU_ACCESS BIT(13) /* this one is trigger internally only */ #define XE_BO_INTERNAL_TEST BIT(30) #define XE_BO_INTERNAL_64K BIT(31) diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c index a10fd0366da3..27e0d40daca8 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c @@ -373,11 +373,15 @@ int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe, struct sg_table **sgt) { struct xe_tile *tile = &xe->tiles[res->mem_type - XE_PL_VRAM0]; + struct xe_ttm_vram_mgr_resource *vres = to_xe_ttm_vram_mgr_resource(res); struct xe_res_cursor cursor; struct scatterlist *sg; int num_entries = 0; int i, r; + if (vres->used_visible_size < res->size) + return -EOPNOTSUPP; + *sgt = kmalloc(sizeof(**sgt), GFP_KERNEL); if (!*sgt) return -ENOMEM; -- cgit v1.2.3 From cd928fced9968558f1c7d724c23b1f8868c39774 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 31 Mar 2023 09:46:27 +0100 Subject: drm/xe/uapi: add the userspace bits for small-bar MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mostly the same as i915. We add a new hint for userspace to force an object into the mappable part of vram. We also need to tell userspace how large the mappable part is. In Vulkan for example, there will be two vram heaps for small-bar systems. And here the size of each heap needs to be known. Likewise the used/avail tracking needs to account for the mappable part. We also limit the available tracking going forward, such that we limit to privileged users only, since these values are system wide and are technically considered an info leak. v2 (Maarten): - s/NEEDS_CPU_ACCESS/NEEDS_VISIBLE_VRAM/ in the uapi. We also no longer require smem as an extra placement. This is more flexible, and lets us use this for clear-color surfaces, since we need CPU access there but we don't want to attach smem, since that effectively disables CCS from kernel pov. - Reject clear-color CCS buffers where NEEDS_VISIBLE_VRAM is not set, instead of migrating it behind the scenes. v3 (José): - Split the changes that limit the accounting for perfmon_capable() into a separate patch. - Use XE_BO_CREATE_VRAM_MASK. v4 (Gwan-gyeong Mun): - Add some kernel-doc for the query bits. v5: - One small kernel-doc correction. The cpu_visible_size and corresponding used tracking are always zero for non XE_MEM_REGION_CLASS_VRAM. v6: - Without perfmon_capable() it likely makes more sense to report as zero, instead of reporting as used == total size. This should give similar behaviour as i915 which rather tracks free instead of used. - Only enforce NEEDS_VISIBLE_VRAM on rc_ccs_cc_plane surfaces when the device is actually small-bar. Testcase: igt/tests/xe_query Testcase: igt/tests/xe_mmap@small-bar Signed-off-by: Matthew Auld Cc: Maarten Lankhorst Cc: Thomas Hellström Cc: Gwan-gyeong Mun Cc: Lucas De Marchi Cc: José Roberto de Souza Cc: Filip Hazubski Cc: Carl Zhang Cc: Effie Yu Reviewed-by: José Roberto de Souza Reviewed-by: Gwan-gyeong Mun Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 13 ++++++++-- drivers/gpu/drm/xe/xe_query.c | 8 ++++-- drivers/gpu/drm/xe/xe_ttm_vram_mgr.c | 18 ++++++++++++++ drivers/gpu/drm/xe/xe_ttm_vram_mgr.h | 4 +++ include/uapi/drm/xe_drm.h | 47 +++++++++++++++++++++++++++++++++++- 5 files changed, 85 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index fa3fc825b730..d89cf93acb61 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1109,7 +1109,6 @@ static vm_fault_t xe_gem_fault(struct vm_fault *vmf) ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot, TTM_BO_VM_NUM_PREFAULT); - drm_dev_exit(idx); } else { ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot); @@ -1760,6 +1759,7 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, args->flags & ~(XE_GEM_CREATE_FLAG_DEFER_BACKING | XE_GEM_CREATE_FLAG_SCANOUT | + XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM | xe->info.mem_region_mask))) return -EINVAL; @@ -1797,6 +1797,14 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, bo_flags |= XE_BO_SCANOUT_BIT; bo_flags |= args->flags << (ffs(XE_BO_CREATE_SYSTEM_BIT) - 1); + + if (args->flags & XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM) { + if (XE_IOCTL_DBG(xe, !(bo_flags & XE_BO_CREATE_VRAM_MASK))) + return -EINVAL; + + bo_flags |= XE_BO_NEEDS_CPU_ACCESS; + } + bo = xe_bo_create(xe, NULL, vm, args->size, ttm_bo_type_device, bo_flags); if (IS_ERR(bo)) { @@ -2081,7 +2089,8 @@ int xe_bo_dumb_create(struct drm_file *file_priv, bo = xe_bo_create(xe, NULL, NULL, args->size, ttm_bo_type_device, XE_BO_CREATE_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) | - XE_BO_CREATE_USER_BIT | XE_BO_SCANOUT_BIT); + XE_BO_CREATE_USER_BIT | XE_BO_SCANOUT_BIT | + XE_BO_NEEDS_CPU_ACCESS); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index f880c9af1651..3997c644f8fc 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -17,6 +17,7 @@ #include "xe_gt.h" #include "xe_guc_hwconfig.h" #include "xe_macros.h" +#include "xe_ttm_vram_mgr.h" static const enum xe_engine_class xe_to_user_engine_class[] = { [XE_ENGINE_CLASS_RENDER] = DRM_XE_ENGINE_CLASS_RENDER, @@ -148,10 +149,13 @@ static int query_memory_usage(struct xe_device *xe, man->size; if (perfmon_capable()) { - usage->regions[usage->num_regions].used = - ttm_resource_manager_usage(man); + xe_ttm_vram_get_used(man, + &usage->regions[usage->num_regions].used, + &usage->regions[usage->num_regions].cpu_visible_used); } + usage->regions[usage->num_regions].cpu_visible_size = + xe_ttm_vram_get_cpu_visible_size(man); usage->num_regions++; } } diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c index 27e0d40daca8..06a54c8bd46f 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c @@ -457,3 +457,21 @@ void xe_ttm_vram_mgr_free_sgt(struct device *dev, enum dma_data_direction dir, sg_free_table(sgt); kfree(sgt); } + +u64 xe_ttm_vram_get_cpu_visible_size(struct ttm_resource_manager *man) +{ + struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man); + + return mgr->visible_size; +} + +void xe_ttm_vram_get_used(struct ttm_resource_manager *man, + u64 *used, u64 *used_visible) +{ + struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man); + + mutex_lock(&mgr->lock); + *used = mgr->mm.size - mgr->mm.avail; + *used_visible = mgr->visible_size - mgr->visible_avail; + mutex_unlock(&mgr->lock); +} diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h index 6e1d6033d739..d184e19a9230 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h @@ -25,6 +25,10 @@ int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe, void xe_ttm_vram_mgr_free_sgt(struct device *dev, enum dma_data_direction dir, struct sg_table *sgt); +u64 xe_ttm_vram_get_cpu_visible_size(struct ttm_resource_manager *man); +void xe_ttm_vram_get_used(struct ttm_resource_manager *man, + u64 *used, u64 *used_visible); + static inline struct xe_ttm_vram_mgr_resource * to_xe_ttm_vram_mgr_resource(struct ttm_resource *res) { diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 347351a8f618..7f29c58f87a3 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -180,8 +180,37 @@ struct drm_xe_query_mem_region { * zero. */ __u64 used; + /** + * @cpu_visible_size: How much of this region can be CPU + * accessed, in bytes. + * + * This will always be <= @total_size, and the remainder (if + * any) will not be CPU accessible. If the CPU accessible part + * is smaller than @total_size then this is referred to as a + * small BAR system. + * + * On systems without small BAR (full BAR), the probed_size will + * always equal the @total_size, since all of it will be CPU + * accessible. + * + * Note this is only tracked for XE_MEM_REGION_CLASS_VRAM + * regions (for other types the value here will always equal + * zero). + */ + __u64 cpu_visible_size; + /** + * @cpu_visible_used: Estimate of CPU visible memory used, in + * bytes. + * + * Requires CAP_PERFMON or CAP_SYS_ADMIN to get reliable + * accounting. Without this the value here will always equal + * zero. Note this is only currently tracked for + * XE_MEM_REGION_CLASS_VRAM regions (for other types the value + * here will always be zero). + */ + __u64 cpu_visible_used; /** @reserved: MBZ */ - __u64 reserved[8]; + __u64 reserved[6]; }; /** @@ -383,6 +412,22 @@ struct drm_xe_gem_create { #define XE_GEM_CREATE_FLAG_DEFER_BACKING (0x1 << 24) #define XE_GEM_CREATE_FLAG_SCANOUT (0x1 << 25) +/* + * When using VRAM as a possible placement, ensure that the corresponding VRAM + * allocation will always use the CPU accessible part of VRAM. This is important + * for small-bar systems (on full-bar systems this gets turned into a noop). + * + * Note: System memory can be used as an extra placement if the kernel should + * spill the allocation to system memory, if space can't be made available in + * the CPU accessible part of VRAM (giving the same behaviour as the i915 + * interface, see I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS). + * + * Note: For clear-color CCS surfaces the kernel needs to read the clear-color + * value stored in the buffer, and on discrete platforms we need to use VRAM for + * display surfaces, therefore the kernel requires setting this flag for such + * objects, otherwise an error is thrown on small-bar systems. + */ +#define XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM (0x1 << 26) /** * @flags: Flags, currently a mask of memory instances of where BO can * be placed -- cgit v1.2.3 From c00ce7f22317006a3f14465637093ae3d2e53463 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 31 Mar 2023 09:46:28 +0100 Subject: drm/xe: fully turn on small-bar support This allows vram_size > io_size, instead of just clamping the vram size to the BAR size, now that the driver supports it. Signed-off-by: Matthew Auld Cc: Gwan-gyeong Mun Cc: Lucas De Marchi Cc: Michael J. Ruhl Reviewed-by: Maarten Lankhorst Reviewed-by: Gwan-gyeong Mun Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_mmio.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 8d0f07261bfd..aa9c573b1243 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -261,11 +261,6 @@ int xe_mmio_probe_vram(struct xe_device *xe) if (err) return err; - /* small bar issues will only cover root tile sizes */ - if (xe->mem.vram.io_size < vram_size) - drm_warn(&xe->drm, "Restricting VRAM size to PCI resource size (0x%llx->0x%llx)\n", - vram_size, (u64)xe->mem.vram.io_size); - drm_info(&xe->drm, "VISIBLE VRAM: %pa, %pa\n", &xe->mem.vram.io_start, &xe->mem.vram.io_size); @@ -287,9 +282,7 @@ int xe_mmio_probe_vram(struct xe_device *xe) } tile->mem.vram.base = tile_offset; - - /* small bar can limit the visible size. size accordingly */ - tile->mem.vram.usable_size = min_t(u64, vram_size, io_size); + tile->mem.vram.usable_size = vram_size; tile->mem.vram.mapping = xe->mem.vram.mapping + tile_offset; drm_info(&xe->drm, "VRAM[%u, %u]: %pa, %pa\n", id, tile->id, @@ -304,7 +297,7 @@ int xe_mmio_probe_vram(struct xe_device *xe) available_size += vram_size; if (total_size > xe->mem.vram.io_size) { - drm_warn(&xe->drm, "VRAM: %pa is larger than resource %pa\n", + drm_info(&xe->drm, "VRAM: %pa is larger than resource %pa\n", &total_size, &xe->mem.vram.io_size); } -- cgit v1.2.3 From c856cc138bf39aa38f1b97def8927c71b2a057c2 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 21 Jul 2023 15:44:50 -0400 Subject: drm/xe/uapi: Remove XE_QUERY_CONFIG_FLAGS_USE_GUC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This config is the only real one. If execlist remains in the code it will forever be experimental and we shouldn't maintain an uapi like that for that experimental piece of code that should never be used by real users. Signed-off-by: Rodrigo Vivi Reviewed-by: José Roberto de Souza --- drivers/gpu/drm/xe/xe_query.c | 3 --- include/uapi/drm/xe_drm.h | 1 - 2 files changed, 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 3997c644f8fc..6ba7baf7c777 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -195,9 +195,6 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query) if (xe_device_get_root_tile(xe)->mem.vram.usable_size) config->info[XE_QUERY_CONFIG_FLAGS] = XE_QUERY_CONFIG_FLAGS_HAS_VRAM; - if (xe->info.enable_guc) - config->info[XE_QUERY_CONFIG_FLAGS] |= - XE_QUERY_CONFIG_FLAGS_USE_GUC; config->info[XE_QUERY_CONFIG_MIN_ALIGNEMENT] = xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K; config->info[XE_QUERY_CONFIG_VA_BITS] = 12 + diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 7f29c58f87a3..259de80376b4 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -246,7 +246,6 @@ struct drm_xe_query_config { #define XE_QUERY_CONFIG_REV_AND_DEVICE_ID 0 #define XE_QUERY_CONFIG_FLAGS 1 #define XE_QUERY_CONFIG_FLAGS_HAS_VRAM (0x1 << 0) - #define XE_QUERY_CONFIG_FLAGS_USE_GUC (0x1 << 1) #define XE_QUERY_CONFIG_MIN_ALIGNEMENT 2 #define XE_QUERY_CONFIG_VA_BITS 3 #define XE_QUERY_CONFIG_GT_COUNT 4 -- cgit v1.2.3 From c8dc15464880d725a18593bdfe6651bd235574c3 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 21 Jul 2023 15:56:36 -0400 Subject: drm/xe: Invert guc vs execlists parameters and info. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The module parameter should reflect the name of the optional, experimental and unsafe option, rather than the default one. Signed-off-by: Rodrigo Vivi Reviewed-by: José Roberto de Souza --- drivers/gpu/drm/xe/xe_debugfs.c | 2 +- drivers/gpu/drm/xe/xe_device.c | 2 +- drivers/gpu/drm/xe/xe_device.h | 7 +------ drivers/gpu/drm/xe/xe_device_types.h | 4 ++-- drivers/gpu/drm/xe/xe_module.c | 6 +++--- drivers/gpu/drm/xe/xe_module.h | 2 +- 6 files changed, 9 insertions(+), 14 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c index 047341d5689a..491506a1e12e 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.c +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -47,7 +47,7 @@ static int info(struct seq_file *m, void *data) drm_printf(&p, "revid %d\n", xe->info.revid); drm_printf(&p, "tile_count %d\n", xe->info.tile_count); drm_printf(&p, "vm_max_level %d\n", xe->info.vm_max_level); - drm_printf(&p, "enable_guc %s\n", str_yes_no(xe->info.enable_guc)); + drm_printf(&p, "force_execlist %s\n", str_yes_no(xe->info.force_execlist)); drm_printf(&p, "supports_usm %s\n", str_yes_no(xe->info.supports_usm)); drm_printf(&p, "has_flat_ccs %s\n", str_yes_no(xe->info.has_flat_ccs)); for_each_gt(gt, xe, id) { diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index f948a358f53e..63ed59c61c84 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -197,7 +197,7 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, xe->info.devid = pdev->device; xe->info.revid = pdev->revision; - xe->info.enable_guc = enable_guc; + xe->info.force_execlist = force_execlist; spin_lock_init(&xe->irq.lock); diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index 593accb68281..61a5cf1f7300 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -116,12 +116,7 @@ static inline struct xe_gt *xe_root_mmio_gt(struct xe_device *xe) static inline bool xe_device_guc_submission_enabled(struct xe_device *xe) { - return xe->info.enable_guc; -} - -static inline void xe_device_guc_submission_disable(struct xe_device *xe) -{ - xe->info.enable_guc = false; + return !xe->info.force_execlist; } #define for_each_tile(tile__, xe__, id__) \ diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 52c5f7ded7ce..c521ffaf3871 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -210,8 +210,8 @@ struct xe_device { u8 supports_usm:1; /** @has_asid: Has address space ID */ u8 has_asid:1; - /** @enable_guc: GuC submission enabled */ - u8 enable_guc:1; + /** @force_execlist: Forced execlist submission */ + u8 force_execlist:1; /** @has_flat_ccs: Whether flat CCS metadata is used */ u8 has_flat_ccs:1; /** @has_4tile: Whether tile-4 tiling is supported */ diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index 496a9001dc3e..ed3772a69762 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -14,9 +14,9 @@ #include "xe_pci.h" #include "xe_sched_job.h" -bool enable_guc = true; -module_param_named_unsafe(enable_guc, enable_guc, bool, 0444); -MODULE_PARM_DESC(enable_guc, "Enable GuC submission"); +bool force_execlist = false; +module_param_named_unsafe(force_execlist, force_execlist, bool, 0444); +MODULE_PARM_DESC(force_execlist, "Force Execlist submission"); u32 xe_force_vram_bar_size; module_param_named(vram_bar_size, xe_force_vram_bar_size, uint, 0600); diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h index 7169907c3365..2c1f9199f909 100644 --- a/drivers/gpu/drm/xe/xe_module.h +++ b/drivers/gpu/drm/xe/xe_module.h @@ -6,7 +6,7 @@ #include /* Module modprobe variables */ -extern bool enable_guc; +extern bool force_execlist; extern bool enable_display; extern u32 xe_force_vram_bar_size; extern int xe_guc_log_level; -- cgit v1.2.3 From 342206b7cc064b8b004474c0baab2c67ced646d0 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 26 Jul 2023 09:33:48 -0700 Subject: drm/xe: Always use xe_vm_queue_rebind_worker helper Do not queue the rebind worker directly, rather use the helper xe_vm_queue_rebind_worker. This ensures we use the correct work queue. Signed-off-by: Matthew Brost Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pt.c | 3 +-- drivers/gpu/drm/xe/xe_vm.h | 14 +++++++------- 2 files changed, 8 insertions(+), 9 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 48a87b50a040..8b75a6145f9b 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1472,8 +1472,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e, } if (!rebind && last_munmap_rebind && xe_vm_in_compute_mode(vm)) - queue_work(vm->xe->ordered_wq, - &vm->preempt.rebind_work); + xe_vm_queue_rebind_worker(vm); } else { kfree(rfence); kfree(ifence); diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index d386e72cb974..a1d30de37d20 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -182,6 +182,12 @@ extern struct ttm_device_funcs xe_ttm_funcs; struct ttm_buffer_object *xe_vm_ttm_bo(struct xe_vm *vm); +static inline void xe_vm_queue_rebind_worker(struct xe_vm *vm) +{ + XE_WARN_ON(!xe_vm_in_compute_mode(vm)); + queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work); +} + /** * xe_vm_reactivate_rebind() - Reactivate the rebind functionality on compute * vms. @@ -195,7 +201,7 @@ static inline void xe_vm_reactivate_rebind(struct xe_vm *vm) { if (xe_vm_in_compute_mode(vm) && vm->preempt.rebind_deactivated) { vm->preempt.rebind_deactivated = false; - queue_work(system_unbound_wq, &vm->preempt.rebind_work); + xe_vm_queue_rebind_worker(vm); } } @@ -203,12 +209,6 @@ int xe_vma_userptr_pin_pages(struct xe_vma *vma); int xe_vma_userptr_check_repin(struct xe_vma *vma); -static inline void xe_vm_queue_rebind_worker(struct xe_vm *vm) -{ - XE_WARN_ON(!xe_vm_in_compute_mode(vm)); - queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work); -} - /* * XE_ONSTACK_TV is used to size the tv_onstack array that is input * to xe_vm_lock_dma_resv() and xe_vm_unlock_dma_resv(). -- cgit v1.2.3 From 70748acb7fb4c9bba5364de0d6fe0801f2addebb Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 26 Jul 2023 09:41:43 -0700 Subject: drm/xe: Signal out-syncs on VM binds if no operations If no operations are generated for VM binds the out-syncs must still be signaled. Signed-off-by: Matthew Brost Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index f3f6f01046a9..787008bf85e1 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3378,6 +3378,8 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) unwind_ops: vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds); free_syncs: + for (i = 0; err == -ENODATA && i < num_syncs; i++) + xe_sync_entry_signal(&syncs[i], NULL, dma_fence_get_stub()); while (num_syncs--) xe_sync_entry_cleanup(&syncs[num_syncs]); -- cgit v1.2.3 From 6aa26f6eb829fb208c569b92837a13e889891db4 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 26 Jul 2023 10:23:49 +0100 Subject: drm/xe/engine: add missing rpm for bind engines Bind engines need to use the migration vm, however we don't have any rpm for such a vm, otherwise the kernel would prevent rpm suspend-resume. There are two issues here, first is the actual engine create which needs to touch the lrc, but since that is in VRAM we trigger loads of missing mem_access asserts. The second issue is when destroying the actual engine, which requires GuC CT to deregister the context. v2 (Rodrigo): - Just use ENGINE_FLAG_VM as the indicator that we need to hold an rpm ref. This also handles the case in xe_vm_create() where we create default bind engines. Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/499 Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/504 Cc: Rodrigo Vivi Cc: Matthew Brost Signed-off-by: Matthew Auld Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_engine.c | 19 +++++++++++++++++++ drivers/gpu/drm/xe/xe_engine_types.h | 1 + 2 files changed, 20 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c index 71f778785226..f60d29b2b506 100644 --- a/drivers/gpu/drm/xe/xe_engine.c +++ b/drivers/gpu/drm/xe/xe_engine.c @@ -76,6 +76,17 @@ static struct xe_engine *__xe_engine_create(struct xe_device *xe, if (err) goto err_lrc; + /* + * Normally the user vm holds an rpm ref to keep the device + * awake, and the context holds a ref for the vm, however for + * some engines we use the kernels migrate vm underneath which + * offers no such rpm ref. Make sure we keep a ref here, so we + * can perform GuC CT actions when needed. Caller is expected to + * have already grabbed the rpm ref outside any sensitive locks. + */ + if (e->flags & ENGINE_FLAG_VM) + drm_WARN_ON(&xe->drm, !xe_device_mem_access_get_if_ongoing(xe)); + return e; err_lrc: @@ -152,6 +163,8 @@ void xe_engine_fini(struct xe_engine *e) xe_lrc_finish(e->lrc + i); if (e->vm) xe_vm_put(e->vm); + if (e->flags & ENGINE_FLAG_VM) + xe_device_mem_access_put(gt_to_xe(e->gt)); kfree(e); } @@ -560,6 +573,9 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, !hwe)) return -EINVAL; + /* The migration vm doesn't hold rpm ref */ + xe_device_mem_access_get(xe); + migrate_vm = xe_migrate_get_vm(gt_to_tile(gt)->migrate); new = xe_engine_create(xe, migrate_vm, logical_mask, args->width, hwe, @@ -568,6 +584,9 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data, (id ? ENGINE_FLAG_BIND_ENGINE_CHILD : 0)); + + xe_device_mem_access_put(xe); /* now held by engine */ + xe_vm_put(migrate_vm); if (IS_ERR(new)) { err = PTR_ERR(new); diff --git a/drivers/gpu/drm/xe/xe_engine_types.h b/drivers/gpu/drm/xe/xe_engine_types.h index a0cd80cb9d7b..f1d531735f6d 100644 --- a/drivers/gpu/drm/xe/xe_engine_types.h +++ b/drivers/gpu/drm/xe/xe_engine_types.h @@ -69,6 +69,7 @@ struct xe_engine { #define ENGINE_FLAG_KERNEL BIT(1) #define ENGINE_FLAG_PERSISTENT BIT(2) #define ENGINE_FLAG_COMPUTE_MODE BIT(3) +/* Caller needs to hold rpm ref when creating engine with ENGINE_FLAG_VM */ #define ENGINE_FLAG_VM BIT(4) #define ENGINE_FLAG_BIND_ENGINE_CHILD BIT(5) #define ENGINE_FLAG_WA BIT(6) -- cgit v1.2.3 From 0e34fdb4a01a3e615c109694b5adc53590ccda19 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 26 Jul 2023 09:07:01 -0700 Subject: drm/xe: Fix checking for unset value Commit 37430402618d ("drm/xe: NULL binding implementation") introduced the NULL binding implementation, but left a case in which the out value is_vram is not set and the caller will use whatever was on stack. Eventually the is_vram out could be removed, but this should at least fix the current bug. Fixes: 37430402618d ("drm/xe: NULL binding implementation") Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230726160708.3967790-4-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 8b75a6145f9b..612de787c19e 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -81,8 +81,10 @@ u64 xe_pde_encode(struct xe_bo *bo, u64 bo_offset, static dma_addr_t vma_addr(struct xe_vma *vma, u64 offset, size_t page_size, bool *is_vram) { - if (xe_vma_is_null(vma)) + if (xe_vma_is_null(vma)) { + *is_vram = 0; return 0; + } if (xe_vma_is_userptr(vma)) { struct xe_res_cursor cur; -- cgit v1.2.3 From 43b5d81e04773d08df1ed3ff8a40936dca726fda Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Wed, 26 Jul 2023 15:25:28 -0700 Subject: drm/xe: fix mcr semaphore locking for MTL in commit 81593af6c88d ("drm/xe: Convert xe_mmio_wait32 to us so we can stop using wait_for_us.") the mcr semaphore register read was accidentally switched from waiting for the register to go to 1 to waiting for the register to go to 0, so we need to flip it back. Signed-off-by: Daniele Ceraolo Spurio Cc: Rodrigo Vivi Cc: Matthew Brost Reviewed-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_mcr.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index c56815af0686..9eb7a6a1348d 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -420,10 +420,11 @@ static void mcr_lock(struct xe_gt *gt) /* * Starting with MTL we also need to grab a semaphore register * to synchronize with external agents (e.g., firmware) that now - * shares the same steering control register. + * shares the same steering control register. The semaphore is obtained + * when a read to the relevant register returns 1. */ if (GRAPHICS_VERx100(xe) >= 1270) - ret = xe_mmio_wait32(gt, STEER_SEMAPHORE, 0, 0x1, 10, NULL, + ret = xe_mmio_wait32(gt, STEER_SEMAPHORE, 0x1, 0x1, 10, NULL, true); drm_WARN_ON_ONCE(&xe->drm, ret == -ETIMEDOUT); @@ -431,7 +432,7 @@ static void mcr_lock(struct xe_gt *gt) static void mcr_unlock(struct xe_gt *gt) { - /* Release hardware semaphore */ + /* Release hardware semaphore - this is done by writing 1 to the register */ if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) xe_mmio_write32(gt, STEER_SEMAPHORE, 0x1); -- cgit v1.2.3 From 621c1fbd9b83fb6a731e0063ad4ea2d89ec20a9c Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 26 Jul 2023 09:07:02 -0700 Subject: drm/xe: Remove vma arg from xe_pte_encode() All the callers pass a NULL vma, so the buffer is always the BO. Remove the argument and the side effects of dealing with it. Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20230726160708.3967790-5-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_migrate.c | 2 +- drivers/gpu/drm/xe/xe_migrate.c | 8 +++--- drivers/gpu/drm/xe/xe_pt.c | 47 ++++++----------------------------- drivers/gpu/drm/xe/xe_pt.h | 4 +-- 4 files changed, 13 insertions(+), 48 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index c332dc54cb70..9e9b228fe315 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -301,7 +301,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) /* First part of the test, are we updating our pagetable bo with a new entry? */ xe_map_wr(xe, &bo->vmap, XE_PAGE_SIZE * (NUM_KERNEL_PDE - 1), u64, 0xdeaddeadbeefbeef); - expected = xe_pte_encode(NULL, pt, 0, XE_CACHE_WB, 0); + expected = xe_pte_encode(pt, 0, XE_CACHE_WB, 0); if (m->eng->vm->flags & XE_VM_FLAG_64K) expected |= XE_PTE_PS64; if (xe_bo_is_vram(pt)) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index d64567403068..0e369a05a5e7 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -196,8 +196,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, /* Map the entire BO in our level 0 pt */ for (i = 0, level = 0; i < num_entries; level++) { - entry = xe_pte_encode(NULL, bo, i * XE_PAGE_SIZE, - XE_CACHE_WB, 0); + entry = xe_pte_encode(bo, i * XE_PAGE_SIZE, XE_CACHE_WB, 0); xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64, entry); @@ -215,8 +214,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, for (i = 0; i < batch->size; i += vm->flags & XE_VM_FLAG_64K ? XE_64K_PAGE_SIZE : XE_PAGE_SIZE) { - entry = xe_pte_encode(NULL, batch, i, - XE_CACHE_WB, 0); + entry = xe_pte_encode(batch, i, XE_CACHE_WB, 0); xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64, entry); @@ -1235,7 +1233,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m, BUG_ON(pt_bo->size != SZ_4K); - addr = xe_pte_encode(NULL, pt_bo, 0, XE_CACHE_WB, 0); + addr = xe_pte_encode(pt_bo, 0, XE_CACHE_WB, 0); bb->cs[bb->len++] = lower_32_bits(addr); bb->cs[bb->len++] = upper_32_bits(addr); } diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 612de787c19e..ef7c258bdd90 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -78,30 +78,6 @@ u64 xe_pde_encode(struct xe_bo *bo, u64 bo_offset, return pde; } -static dma_addr_t vma_addr(struct xe_vma *vma, u64 offset, - size_t page_size, bool *is_vram) -{ - if (xe_vma_is_null(vma)) { - *is_vram = 0; - return 0; - } - - if (xe_vma_is_userptr(vma)) { - struct xe_res_cursor cur; - u64 page; - - *is_vram = false; - page = offset >> PAGE_SHIFT; - offset &= (PAGE_SIZE - 1); - - xe_res_first_sg(vma->userptr.sg, page << PAGE_SHIFT, page_size, - &cur); - return xe_res_dma(&cur) + offset; - } else { - return xe_bo_addr(xe_vma_bo(vma), offset, page_size, is_vram); - } -} - static u64 __pte_encode(u64 pte, enum xe_cache_level cache, struct xe_vma *vma, u32 pt_level) { @@ -140,34 +116,25 @@ static u64 __pte_encode(u64 pte, enum xe_cache_level cache, /** * xe_pte_encode() - Encode a page-table entry pointing to memory. - * @vma: The vma representing the memory to point to. - * @bo: If @vma is NULL, representing the memory to point to. - * @offset: The offset into @vma or @bo. + * @bo: The BO representing the memory to point to. + * @offset: The offset into @bo. * @cache: The cache level indicating * @pt_level: The page-table level of the page-table into which the entry * is to be inserted. * * Return: An encoded page-table entry. No errors. */ -u64 xe_pte_encode(struct xe_vma *vma, struct xe_bo *bo, - u64 offset, enum xe_cache_level cache, +u64 xe_pte_encode(struct xe_bo *bo, u64 offset, enum xe_cache_level cache, u32 pt_level) { u64 pte; bool is_vram; - if (vma) - pte = vma_addr(vma, offset, XE_PAGE_SIZE, &is_vram); - else - pte = xe_bo_addr(bo, offset, XE_PAGE_SIZE, &is_vram); - - if (is_vram) { + pte = xe_bo_addr(bo, offset, XE_PAGE_SIZE, &is_vram); + if (is_vram) pte |= XE_PPGTT_PTE_LM; - if (vma && vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) - pte |= XE_USM_PPGTT_PTE_AE; - } - return __pte_encode(pte, cache, vma, pt_level); + return __pte_encode(pte, cache, NULL, pt_level); } static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm, @@ -179,7 +146,7 @@ static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm, return 0; if (level == 0) { - u64 empty = xe_pte_encode(NULL, vm->scratch_bo[id], 0, + u64 empty = xe_pte_encode(vm->scratch_bo[id], 0, XE_CACHE_WB, 0); return empty; diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h index aaf4b7b851e2..bbb00d6461ff 100644 --- a/drivers/gpu/drm/xe/xe_pt.h +++ b/drivers/gpu/drm/xe/xe_pt.h @@ -48,7 +48,7 @@ bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma); u64 xe_pde_encode(struct xe_bo *bo, u64 bo_offset, const enum xe_cache_level level); -u64 xe_pte_encode(struct xe_vma *vma, struct xe_bo *bo, - u64 offset, enum xe_cache_level cache, +u64 xe_pte_encode(struct xe_bo *bo, u64 offset, enum xe_cache_level cache, u32 pt_level); + #endif -- cgit v1.2.3 From 937b4be72baaba00fa71a02adac3716332876fa3 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 26 Jul 2023 09:07:03 -0700 Subject: drm/xe: Decouple vram check from xe_bo_addr() The output arg is_vram in xe_bo_addr() is unused by several callers. It's also not what the function is mainly doing. Remove the argument and let the interested callers to call xe_bo_is_vram(). Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230726160708.3967790-6-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 15 +++++---------- drivers/gpu/drm/xe/xe_bo.h | 10 +++------- drivers/gpu/drm/xe/xe_ggtt.c | 5 ++--- drivers/gpu/drm/xe/xe_migrate.c | 16 ++++------------ drivers/gpu/drm/xe/xe_pt.c | 10 +++------- drivers/gpu/drm/xe/xe_vm.c | 10 ++++++---- 6 files changed, 23 insertions(+), 43 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index d89cf93acb61..d4e60a96ed64 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1526,12 +1526,11 @@ int xe_bo_pin(struct xe_bo *bo) if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) && bo->flags & XE_BO_INTERNAL_TEST)) { struct ttm_place *place = &(bo->placements[0]); - bool vram; if (mem_type_is_vram(place->mem_type)) { XE_BUG_ON(!(place->flags & TTM_PL_FLAG_CONTIGUOUS)); - place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE, &vram) - + place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE) - vram_region_gpu_offset(bo->ttm.resource)) >> PAGE_SHIFT; place->lpfn = place->fpfn + (bo->size >> PAGE_SHIFT); @@ -1656,8 +1655,7 @@ bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo) * address, such as printing debug information, but not in cases where memory is * written based on this result. */ -dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset, - size_t page_size, bool *is_vram) +dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size) { struct xe_res_cursor cur; u64 page; @@ -1666,9 +1664,7 @@ dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset, page = offset >> PAGE_SHIFT; offset &= (PAGE_SIZE - 1); - *is_vram = xe_bo_is_vram(bo); - - if (!*is_vram && !xe_bo_is_stolen(bo)) { + if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) { XE_BUG_ON(!bo->ttm.ttm); xe_res_first_sg(xe_bo_get_sg(bo), page << PAGE_SHIFT, @@ -1683,12 +1679,11 @@ dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset, } } -dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, - size_t page_size, bool *is_vram) +dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size) { if (!READ_ONCE(bo->ttm.pin_count)) xe_bo_assert_held(bo); - return __xe_bo_addr(bo, offset, page_size, is_vram); + return __xe_bo_addr(bo, offset, page_size); } int xe_bo_vmap(struct xe_bo *bo) diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index e231b2829bef..b8817e13aeeb 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -216,17 +216,13 @@ static inline void xe_bo_unpin_map_no_vm(struct xe_bo *bo) } bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo); -dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset, - size_t page_size, bool *is_vram); -dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, - size_t page_size, bool *is_vram); +dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size); +dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size); static inline dma_addr_t xe_bo_main_addr(struct xe_bo *bo, size_t page_size) { - bool is_vram; - - return xe_bo_addr(bo, 0, page_size, &is_vram); + return xe_bo_addr(bo, 0, page_size); } static inline u32 diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 4468c0ae0f6f..3eea65bd1bcd 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -31,12 +31,11 @@ u64 xe_ggtt_pte_encode(struct xe_bo *bo, u64 bo_offset) { struct xe_device *xe = xe_bo_device(bo); u64 pte; - bool is_vram; - pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE, &is_vram); + pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); pte |= XE_PAGE_PRESENT; - if (is_vram) + if (xe_bo_is_vram(bo)) pte |= XE_GGTT_PTE_LM; /* FIXME: vfunc + pass in caching rules */ diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 0e369a05a5e7..03f50a14c5c2 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -132,7 +132,6 @@ static int xe_migrate_create_cleared_bo(struct xe_migrate *m, struct xe_vm *vm) struct xe_device *xe = vm->xe; size_t cleared_size; u64 vram_addr; - bool is_vram; if (!xe_device_has_flat_ccs(xe)) return 0; @@ -147,8 +146,7 @@ static int xe_migrate_create_cleared_bo(struct xe_migrate *m, struct xe_vm *vm) return PTR_ERR(m->cleared_bo); xe_map_memset(xe, &m->cleared_bo->vmap, 0, 0x00, cleared_size); - vram_addr = xe_bo_addr(m->cleared_bo, 0, XE_PAGE_SIZE, &is_vram); - XE_BUG_ON(!is_vram); + vram_addr = xe_bo_addr(m->cleared_bo, 0, XE_PAGE_SIZE); m->cleared_vram_ofs = xe_migrate_vram_ofs(vram_addr); return 0; @@ -221,15 +219,13 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, level++; } } else { - bool is_vram; - u64 batch_addr = xe_bo_addr(batch, 0, XE_PAGE_SIZE, &is_vram); + u64 batch_addr = xe_bo_addr(batch, 0, XE_PAGE_SIZE); m->batch_base_ofs = xe_migrate_vram_ofs(batch_addr); if (xe->info.supports_usm) { batch = tile->primary_gt->usm.bb_pool->bo; - batch_addr = xe_bo_addr(batch, 0, XE_PAGE_SIZE, - &is_vram); + batch_addr = xe_bo_addr(batch, 0, XE_PAGE_SIZE); m->usm_batch_base_ofs = xe_migrate_vram_ofs(batch_addr); } } @@ -1000,12 +996,8 @@ static void write_pgtable(struct xe_tile *tile, struct xe_bb *bb, u64 ppgtt_ofs, */ XE_BUG_ON(update->qwords > 0x1ff); if (!ppgtt_ofs) { - bool is_vram; - ppgtt_ofs = xe_migrate_vram_ofs(xe_bo_addr(update->pt_bo, 0, - XE_PAGE_SIZE, - &is_vram)); - XE_BUG_ON(!is_vram); + XE_PAGE_SIZE)); } do { diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index ef7c258bdd90..f69f7dbaca55 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -61,13 +61,10 @@ u64 xe_pde_encode(struct xe_bo *bo, u64 bo_offset, const enum xe_cache_level level) { u64 pde; - bool is_vram; - pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE, &is_vram); + pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); pde |= XE_PAGE_PRESENT | XE_PAGE_RW; - XE_WARN_ON(IS_DGFX(xe_bo_device(bo)) && !is_vram); - /* FIXME: I don't think the PPAT handling is correct for MTL */ if (level != XE_CACHE_NONE) @@ -128,10 +125,9 @@ u64 xe_pte_encode(struct xe_bo *bo, u64 offset, enum xe_cache_level cache, u32 pt_level) { u64 pte; - bool is_vram; - pte = xe_bo_addr(bo, offset, XE_PAGE_SIZE, &is_vram); - if (is_vram) + pte = xe_bo_addr(bo, offset, XE_PAGE_SIZE); + if (xe_bo_is_vram(bo)) pte |= XE_PPGTT_PTE_LM; return __pte_encode(pte, cache, NULL, pt_level); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 787008bf85e1..205795823555 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3503,9 +3503,10 @@ int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id) return 0; } if (vm->pt_root[gt_id]) { - addr = xe_bo_addr(vm->pt_root[gt_id]->bo, 0, XE_PAGE_SIZE, - &is_vram); - drm_printf(p, " VM root: A:0x%llx %s\n", addr, is_vram ? "VRAM" : "SYS"); + addr = xe_bo_addr(vm->pt_root[gt_id]->bo, 0, XE_PAGE_SIZE); + is_vram = xe_bo_is_vram(vm->pt_root[gt_id]->bo); + drm_printf(p, " VM root: A:0x%llx %s\n", addr, + is_vram ? "VRAM" : "SYS"); } drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { @@ -3526,7 +3527,8 @@ int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id) addr = 0; } } else { - addr = __xe_bo_addr(xe_vma_bo(vma), 0, XE_PAGE_SIZE, &is_vram); + addr = __xe_bo_addr(xe_vma_bo(vma), 0, XE_PAGE_SIZE); + is_vram = xe_bo_is_vram(xe_vma_bo(vma)); } drm_printf(p, " [%016llx-%016llx] S:0x%016llx A:%016llx %s\n", xe_vma_start(vma), xe_vma_end(vma) - 1, -- cgit v1.2.3 From b23ebae7ab4142ffa53a3d80ba1189d0631994e8 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 26 Jul 2023 09:07:04 -0700 Subject: drm/xe: Set PTE_DM bit for stolen on MTL Integrated graphics 1270 and beyond should set the PTE_LM bit in the PTE when it's stolen memory. Add a new function, xe_bo_is_stolen_devmem(), and use it when encoding the PTE. In some places in the spec the PTE bit is called "Local Memory", abbreviated as LM, and in others it's called "Device Memory" (DM). Since we moved away from "Local Memory" and preferred the "vram" terminology, also rename the macros as DM to follow the name of the new function. Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230726160708.3967790-7-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 15 +++++++++++++++ drivers/gpu/drm/xe/xe_bo.h | 5 +++-- drivers/gpu/drm/xe/xe_ggtt.c | 4 ++-- drivers/gpu/drm/xe/xe_migrate.c | 4 ++-- drivers/gpu/drm/xe/xe_pt.c | 13 +++++++------ 5 files changed, 29 insertions(+), 12 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index d4e60a96ed64..65b56e7a2fde 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -80,6 +80,21 @@ bool xe_bo_is_stolen(struct xe_bo *bo) return bo->ttm.resource->mem_type == XE_PL_STOLEN; } +/** + * xe_bo_is_stolen_devmem - check if BO is of stolen type accessed via PCI BAR + * @bo: The BO + * + * The stolen memory is accessed through the PCI BAR for both DGFX and some + * integrated platforms that have a dedicated bit in the PTE for devmem (DM). + * + * Returns: true if it's stolen memory accessed via PCI BAR, false otherwise. + */ +bool xe_bo_is_stolen_devmem(struct xe_bo *bo) +{ + return xe_bo_is_stolen(bo) && + GRAPHICS_VERx100(xe_bo_device(bo)) >= 1270; +} + static bool xe_bo_is_user(struct xe_bo *bo) { return bo->flags & XE_BO_CREATE_USER_BIT; diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index b8817e13aeeb..a9a32d680208 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -67,9 +67,9 @@ #define XE_PDPE_PS_1G BIT_ULL(7) #define XE_PDE_IPS_64K BIT_ULL(11) -#define XE_GGTT_PTE_LM BIT_ULL(1) +#define XE_GGTT_PTE_DM BIT_ULL(1) #define XE_USM_PPGTT_PTE_AE BIT_ULL(10) -#define XE_PPGTT_PTE_LM BIT_ULL(11) +#define XE_PPGTT_PTE_DM BIT_ULL(11) #define XE_PDE_64K BIT_ULL(6) #define XE_PTE_PS64 BIT_ULL(8) #define XE_PTE_NULL BIT_ULL(9) @@ -239,6 +239,7 @@ void xe_bo_vunmap(struct xe_bo *bo); bool mem_type_is_vram(u32 mem_type); bool xe_bo_is_vram(struct xe_bo *bo); bool xe_bo_is_stolen(struct xe_bo *bo); +bool xe_bo_is_stolen_devmem(struct xe_bo *bo); uint64_t vram_region_gpu_offset(struct ttm_resource *res); bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type); diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 3eea65bd1bcd..bf46b90a76ad 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -35,8 +35,8 @@ u64 xe_ggtt_pte_encode(struct xe_bo *bo, u64 bo_offset) pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); pte |= XE_PAGE_PRESENT; - if (xe_bo_is_vram(bo)) - pte |= XE_GGTT_PTE_LM; + if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) + pte |= XE_GGTT_PTE_DM; /* FIXME: vfunc + pass in caching rules */ if (xe->info.platform == XE_METEORLAKE) { diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 03f50a14c5c2..0405136bc0b1 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -258,7 +258,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, level = 2; ofs = map_ofs + XE_PAGE_SIZE * level + 256 * 8; flags = XE_PAGE_RW | XE_PAGE_PRESENT | PPAT_CACHED | - XE_PPGTT_PTE_LM | XE_PDPE_PS_1G; + XE_PPGTT_PTE_DM | XE_PDPE_PS_1G; /* * Use 1GB pages, it shouldn't matter the physical amount of @@ -463,7 +463,7 @@ static void emit_pte(struct xe_migrate *m, } addr += vram_region_gpu_offset(bo->ttm.resource); - addr |= XE_PPGTT_PTE_LM; + addr |= XE_PPGTT_PTE_DM; } addr |= PPAT_CACHED | XE_PAGE_PRESENT | XE_PAGE_RW; bb->cs[bb->len++] = lower_32_bits(addr); diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index f69f7dbaca55..d9192bf50362 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -127,8 +127,8 @@ u64 xe_pte_encode(struct xe_bo *bo, u64 offset, enum xe_cache_level cache, u64 pte; pte = xe_bo_addr(bo, offset, XE_PAGE_SIZE); - if (xe_bo_is_vram(bo)) - pte |= XE_PPGTT_PTE_LM; + if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) + pte |= XE_PPGTT_PTE_DM; return __pte_encode(pte, cache, NULL, pt_level); } @@ -714,7 +714,8 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, struct xe_vm_pgtable_update *entries, u32 *num_entries) { struct xe_bo *bo = xe_vma_bo(vma); - bool is_vram = !xe_vma_is_userptr(vma) && bo && xe_bo_is_vram(bo); + bool is_devmem = !xe_vma_is_userptr(vma) && bo && + (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)); struct xe_res_cursor curs; struct xe_pt_stage_bind_walk xe_walk = { .base = { @@ -728,13 +729,13 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, .va_curs_start = xe_vma_start(vma), .vma = vma, .wupd.entries = entries, - .needs_64K = (xe_vma_vm(vma)->flags & XE_VM_FLAG_64K) && is_vram, + .needs_64K = (xe_vma_vm(vma)->flags & XE_VM_FLAG_64K) && is_devmem, }; struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; int ret; - if (is_vram) { - xe_walk.default_pte = XE_PPGTT_PTE_LM; + if (is_devmem) { + xe_walk.default_pte = XE_PPGTT_PTE_DM; if (vma && vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; xe_walk.dma_offset = vram_region_gpu_offset(bo->ttm.resource); -- cgit v1.2.3 From 58052eb70cdeaaa2a48ec4369e702d097fee13f6 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 26 Jul 2023 09:07:06 -0700 Subject: drm/xe: Fix MTL+ stolen memory mapping Based on commit 8d8d062be6b9 ("drm/i915/mtl: Fix MTL stolen memory GGTT mapping"). For stolen on MTL and beyond, the address in the PTE is the offset from DSM base. While at it, update the comments explaining each part of the calculation. Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230726160708.3967790-9-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index 21ecc734f10a..271b3fba4129 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -94,11 +94,22 @@ static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr ggc = xe_mmio_read32(xe_root_mmio_gt(xe), GGC); - /* check GGMS, should be fixed 0x3 (8MB) */ + /* + * Check GGMS: it should be fixed 0x3 (8MB), which corresponds to the + * GTT size + */ if (drm_WARN_ON(&xe->drm, (ggc & GGMS_MASK) != GGMS_MASK)) return 0; - mgr->stolen_base = mgr->io_base = pci_resource_start(pdev, 2) + SZ_8M; + /* + * Graphics >= 1270 uses the offset to the GSMBASE as address in the + * PTEs, together with the DM flag being set. Previously there was no + * such flag so the address was the io_base. + * + * DSMBASE = GSMBASE + 8MB + */ + mgr->stolen_base = SZ_8M; + mgr->io_base = pci_resource_start(pdev, 2) + mgr->stolen_base; /* return valid GMS value, -EIO if invalid */ gms = REG_FIELD_GET(GMS_MASK, ggc); -- cgit v1.2.3 From c0d6b6163fd99c5e73eca3b747e704877e070acc Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 26 Jul 2023 09:07:07 -0700 Subject: drm/xe: Carve out top of DSM as reserved Top of DSM contains the WOPCM where kernel driver shouldn't access as it contains data from other HW agents. Carve it out from the stolen memory. On a MTL system, the output now matches the expected values: Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230726160708.3967790-10-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_regs.h | 3 +++ drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 8 +++++++- 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h index e0734c8f922c..b344796bb868 100644 --- a/drivers/gpu/drm/xe/regs/xe_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_regs.h @@ -97,4 +97,7 @@ #define DSMBASE XE_REG(0x1080C0) #define BDSM_MASK REG_GENMASK64(63, 20) +#define STOLEN_RESERVED XE_REG(0x1082c0) +#define WOPCM_SIZE_MASK REG_GENMASK64(8, 7) + #endif diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index 271b3fba4129..7bba8fff5a5d 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -89,7 +89,7 @@ static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); - u32 stolen_size; + u32 stolen_size, wopcm_size; u32 ggc, gms; ggc = xe_mmio_read32(xe_root_mmio_gt(xe), GGC); @@ -124,6 +124,12 @@ static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr return 0; } + /* Carve out the top of DSM as it contains the reserved WOPCM region */ + wopcm_size = REG_FIELD_GET64(WOPCM_SIZE_MASK, + xe_mmio_read64(xe_root_mmio_gt(xe), + STOLEN_RESERVED)); + stolen_size -= (1U << wopcm_size) * SZ_1M; + if (drm_WARN_ON(&xe->drm, stolen_size + SZ_8M > pci_resource_len(pdev, 2))) return 0; -- cgit v1.2.3 From fda48d15a4eade29a41d46d5a6f0bfa7556ccb72 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 26 Jul 2023 09:07:08 -0700 Subject: drm/xe: Sort xe_regs.h Sort it by register address to make it easy to update when needed. v2: Do not create exception for registers with same functionality. Always sort it. Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230726160708.3967790-11-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_regs.h | 63 ++++++++++++++++++++------------------- 1 file changed, 33 insertions(+), 30 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h index b344796bb868..25275a36b280 100644 --- a/drivers/gpu/drm/xe/regs/xe_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_regs.h @@ -7,9 +7,6 @@ #include "regs/xe_reg_defs.h" -#define GU_CNTL XE_REG(0x101010) -#define LMEM_INIT REG_BIT(7) - #define RENDER_RING_BASE 0x02000 #define BSD_RING_BASE 0x1c0000 #define BSD2_RING_BASE 0x1c4000 @@ -45,16 +42,13 @@ #define FF_THREAD_MODE XE_REG(0x20a0) #define FF_TESSELATION_DOP_GATE_DISABLE BIT(19) -#define PVC_RP_STATE_CAP XE_REG(0x281014) -#define MTL_RP_STATE_CAP XE_REG(0x138000) - -#define MTL_MEDIAP_STATE_CAP XE_REG(0x138020) -#define MTL_RP0_CAP_MASK REG_GENMASK(8, 0) -#define MTL_RPN_CAP_MASK REG_GENMASK(24, 16) +#define TIMESTAMP_OVERRIDE XE_REG(0x44074) +#define TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK REG_GENMASK(15, 12) +#define TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK REG_GENMASK(9, 0) -#define MTL_GT_RPE_FREQUENCY XE_REG(0x13800c) -#define MTL_MPE_FREQUENCY XE_REG(0x13802c) -#define MTL_RPE_MASK REG_GENMASK(8, 0) +#define PCU_IRQ_OFFSET 0x444e0 +#define GU_MISC_IRQ_OFFSET 0x444f0 +#define GU_MISC_GSE REG_BIT(27) #define TRANSCODER_A_OFFSET 0x60000 #define TRANSCODER_B_OFFSET 0x61000 @@ -71,33 +65,42 @@ #define SOFTWARE_FLAGS_SPR33 XE_REG(0x4f084) -#define PCU_IRQ_OFFSET 0x444e0 -#define GU_MISC_IRQ_OFFSET 0x444f0 -#define GU_MISC_GSE REG_BIT(27) - -#define GFX_MSTR_IRQ XE_REG(0x190010) -#define MASTER_IRQ REG_BIT(31) -#define GU_MISC_IRQ REG_BIT(29) -#define DISPLAY_IRQ REG_BIT(16) -#define GT_DW_IRQ(x) REG_BIT(x) - -#define DG1_MSTR_TILE_INTR XE_REG(0x190008) -#define DG1_MSTR_IRQ REG_BIT(31) -#define DG1_MSTR_TILE(t) REG_BIT(t) - -#define TIMESTAMP_OVERRIDE XE_REG(0x44074) -#define TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK REG_GENMASK(15, 12) -#define TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK REG_GENMASK(9, 0) +#define GU_CNTL XE_REG(0x101010) +#define LMEM_INIT REG_BIT(7) #define GGC XE_REG(0x108040) #define GMS_MASK REG_GENMASK(15, 8) #define GGMS_MASK REG_GENMASK(7, 6) -#define GSMBASE XE_REG(0x108100) #define DSMBASE XE_REG(0x1080C0) #define BDSM_MASK REG_GENMASK64(63, 20) +#define GSMBASE XE_REG(0x108100) + #define STOLEN_RESERVED XE_REG(0x1082c0) #define WOPCM_SIZE_MASK REG_GENMASK64(8, 7) +#define MTL_RP_STATE_CAP XE_REG(0x138000) + +#define MTL_GT_RPE_FREQUENCY XE_REG(0x13800c) + +#define MTL_MEDIAP_STATE_CAP XE_REG(0x138020) +#define MTL_RPN_CAP_MASK REG_GENMASK(24, 16) +#define MTL_RP0_CAP_MASK REG_GENMASK(8, 0) + +#define MTL_MPE_FREQUENCY XE_REG(0x13802c) +#define MTL_RPE_MASK REG_GENMASK(8, 0) + +#define DG1_MSTR_TILE_INTR XE_REG(0x190008) +#define DG1_MSTR_IRQ REG_BIT(31) +#define DG1_MSTR_TILE(t) REG_BIT(t) + +#define GFX_MSTR_IRQ XE_REG(0x190010) +#define MASTER_IRQ REG_BIT(31) +#define GU_MISC_IRQ REG_BIT(29) +#define DISPLAY_IRQ REG_BIT(16) +#define GT_DW_IRQ(x) REG_BIT(x) + +#define PVC_RP_STATE_CAP XE_REG(0x281014) + #endif -- cgit v1.2.3 From f83a30f466ebbd56355b1f65ec9bcd5087840ffc Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 26 Jul 2023 17:30:42 -0400 Subject: drm/xe: Fix an invalid locking wait context bug We cannot have spin locks around xe_irq_reset, since it will call the intel_display_power_is_enabled() function, and that needs a mutex lock. Hence causing the undesired "[ BUG: Invalid wait context ]" We cannot convert i915's power domain lock to spin lock due to the nested dependency of non-atomic context waits. So, let's move the xe_irq_reset functions from the critical area, while still ensuring that we are protecting the irq.enabled and ensuring the right serialization in the irq handlers. v2: On the first version, I had missed the fact that irq.enabled is checked on the xe/display glue layer, and that i915 display code is actually using the irq spin lock properly. So, this got changed to a version suggested by Matthew Auld. v3: do not use lockdep_assert for display glue. do not save restore irq from inside IRQ or we can get bogus irq restore warnings Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/463 Suggested-by: Matthew Auld Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_irq.c | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index ca6353243326..69629be07de2 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -308,6 +308,13 @@ static irqreturn_t xelp_irq_handler(int irq, void *arg) unsigned long intr_dw[2]; u32 identity[32]; + spin_lock(&xe->irq.lock); + if (!xe->irq.enabled) { + spin_unlock(&xe->irq.lock); + return IRQ_NONE; + } + spin_unlock(&xe->irq.lock); + master_ctl = xelp_intr_disable(xe); if (!master_ctl) { xelp_intr_enable(xe, false); @@ -366,6 +373,13 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg) /* TODO: This really shouldn't be copied+pasted */ + spin_lock(&xe->irq.lock); + if (!xe->irq.enabled) { + spin_unlock(&xe->irq.lock); + return IRQ_NONE; + } + spin_unlock(&xe->irq.lock); + master_tile_ctl = dg1_intr_disable(xe); if (!master_tile_ctl) { dg1_intr_enable(xe, false); @@ -563,10 +577,14 @@ void xe_irq_shutdown(struct xe_device *xe) void xe_irq_suspend(struct xe_device *xe) { + int irq = to_pci_dev(xe->drm.dev)->irq; + spin_lock_irq(&xe->irq.lock); - xe->irq.enabled = false; - xe_irq_reset(xe); + xe->irq.enabled = false; /* no new irqs */ spin_unlock_irq(&xe->irq.lock); + + synchronize_irq(irq); /* flush irqs */ + xe_irq_reset(xe); /* turn irqs off */ } void xe_irq_resume(struct xe_device *xe) @@ -574,13 +592,15 @@ void xe_irq_resume(struct xe_device *xe) struct xe_gt *gt; int id; - spin_lock_irq(&xe->irq.lock); + /* + * lock not needed: + * 1. no irq will arrive before the postinstall + * 2. display is not yet resumed + */ xe->irq.enabled = true; xe_irq_reset(xe); - xe_irq_postinstall(xe); + xe_irq_postinstall(xe); /* turn irqs on */ for_each_gt(gt, xe, id) xe_irq_enable_hwe(gt); - - spin_unlock_irq(&xe->irq.lock); } -- cgit v1.2.3 From 063e09af6e1d9a4f26cdd0eb896c19526cb0afd3 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 26 Jul 2023 17:03:52 -0400 Subject: drm/xe: Invert mask and val in xe_mmio_wait32. The order: 'offset, mask, val'; is more common in other drivers and in special in i915, where any dev could copy a sequence and end up with unexpected behavior. Done with coccinelle: @rule1@ expression gt, reg, val, mask, timeout, out, atomic; @@ - xe_mmio_wait32(gt, reg, val, mask, timeout, out, atomic) + xe_mmio_wait32(gt, reg, mask, val, timeout, out, atomic) spatch -sp_file mmio.cocci *.c *.h compat-i915-headers/intel_uncore.h \ --in-place v2: Rebased after changes on xe_guc_mcr usage of xe_mmio_wait32. Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_force_wake.c | 2 +- drivers/gpu/drm/xe/xe_gt.c | 3 +-- drivers/gpu/drm/xe/xe_guc.c | 25 ++++++++++--------------- drivers/gpu/drm/xe/xe_huc.c | 3 +-- drivers/gpu/drm/xe/xe_mmio.h | 4 ++-- drivers/gpu/drm/xe/xe_pcode.c | 2 +- drivers/gpu/drm/xe/xe_uc_fw.c | 2 +- 7 files changed, 17 insertions(+), 24 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c index 7403673d532d..aba0784b608e 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.c +++ b/drivers/gpu/drm/xe/xe_force_wake.c @@ -120,7 +120,7 @@ static void domain_sleep(struct xe_gt *gt, struct xe_force_wake_domain *domain) static int domain_sleep_wait(struct xe_gt *gt, struct xe_force_wake_domain *domain) { - return xe_mmio_wait32(gt, domain->reg_ack, 0, domain->val, + return xe_mmio_wait32(gt, domain->reg_ack, domain->val, 0, XE_FORCE_WAKE_ACK_TIMEOUT_MS * USEC_PER_MSEC, NULL, false); } diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 3e32d38aeeea..bb7794cf2c1a 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -456,8 +456,7 @@ static int do_gt_reset(struct xe_gt *gt) int err; xe_mmio_write32(gt, GDRST, GRDOM_FULL); - err = xe_mmio_wait32(gt, GDRST, 0, GRDOM_FULL, 5000, - NULL, false); + err = xe_mmio_wait32(gt, GDRST, GRDOM_FULL, 0, 5000, NULL, false); if (err) xe_gt_err(gt, "failed to clear GEN11_GRDOM_FULL (%pe)\n", ERR_PTR(err)); diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 8ae026838702..2530b6243661 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -290,8 +290,7 @@ int xe_guc_reset(struct xe_guc *guc) xe_mmio_write32(gt, GDRST, GRDOM_GUC); - ret = xe_mmio_wait32(gt, GDRST, 0, GRDOM_GUC, 5000, - &gdrst, false); + ret = xe_mmio_wait32(gt, GDRST, GRDOM_GUC, 0, 5000, &gdrst, false); if (ret) { drm_err(&xe->drm, "GuC reset timed out, GEN6_GDRST=0x%8x\n", gdrst); @@ -386,10 +385,9 @@ static int guc_wait_ucode(struct xe_guc *guc) * 200ms. Even at slowest clock, this should be sufficient. And * in the working case, a larger timeout makes no difference. */ - ret = xe_mmio_wait32(guc_to_gt(guc), GUC_STATUS, - FIELD_PREP(GS_UKERNEL_MASK, - XE_GUC_LOAD_STATUS_READY), - GS_UKERNEL_MASK, 200000, &status, false); + ret = xe_mmio_wait32(guc_to_gt(guc), GUC_STATUS, GS_UKERNEL_MASK, + FIELD_PREP(GS_UKERNEL_MASK, XE_GUC_LOAD_STATUS_READY), + 200000, &status, false); if (ret) { struct drm_device *drm = &xe->drm; @@ -639,10 +637,9 @@ retry: xe_guc_notify(guc); - ret = xe_mmio_wait32(gt, reply_reg, - FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, - GUC_HXG_ORIGIN_GUC), - GUC_HXG_MSG_0_ORIGIN, 50000, &reply, false); + ret = xe_mmio_wait32(gt, reply_reg, GUC_HXG_MSG_0_ORIGIN, + FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_GUC), + 50000, &reply, false); if (ret) { timeout: drm_err(&xe->drm, "mmio request %#x: no reply %#x\n", @@ -654,11 +651,9 @@ timeout: if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) == GUC_HXG_TYPE_NO_RESPONSE_BUSY) { - ret = xe_mmio_wait32(gt, reply_reg, - FIELD_PREP(GUC_HXG_MSG_0_TYPE, - GUC_HXG_TYPE_RESPONSE_SUCCESS), - GUC_HXG_MSG_0_TYPE, 1000000, &header, - false); + ret = xe_mmio_wait32(gt, reply_reg, GUC_HXG_MSG_0_TYPE, + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_RESPONSE_SUCCESS), + 1000000, &header, false); if (unlikely(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, header) != GUC_HXG_ORIGIN_GUC)) diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c index 373a65c77946..dc1708b4e94a 100644 --- a/drivers/gpu/drm/xe/xe_huc.c +++ b/drivers/gpu/drm/xe/xe_huc.c @@ -85,8 +85,7 @@ int xe_huc_auth(struct xe_huc *huc) goto fail; } - ret = xe_mmio_wait32(gt, HUC_KERNEL_LOAD_INFO, - HUC_LOAD_SUCCESSFUL, + ret = xe_mmio_wait32(gt, HUC_KERNEL_LOAD_INFO, HUC_LOAD_SUCCESSFUL, HUC_LOAD_SUCCESSFUL, 100000, NULL, false); if (ret) { drm_err(&xe->drm, "HuC: Firmware not verified %d\n", ret); diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h index 4953a9a3f1fb..d24badca8677 100644 --- a/drivers/gpu/drm/xe/xe_mmio.h +++ b/drivers/gpu/drm/xe/xe_mmio.h @@ -107,8 +107,8 @@ static inline int xe_mmio_write32_and_verify(struct xe_gt *gt, return (reg_val & mask) != eval ? -EINVAL : 0; } -static inline int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 val, - u32 mask, u32 timeout_us, u32 *out_val, +static inline int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, + u32 val, u32 timeout_us, u32 *out_val, bool atomic) { ktime_t cur = ktime_get_raw(); diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c index e3ab1d3a367f..7f1bf2297f51 100644 --- a/drivers/gpu/drm/xe/xe_pcode.c +++ b/drivers/gpu/drm/xe/xe_pcode.c @@ -68,7 +68,7 @@ static int pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1, xe_mmio_write32(gt, PCODE_DATA1, data1 ? *data1 : 0); xe_mmio_write32(gt, PCODE_MAILBOX, PCODE_READY | mbox); - err = xe_mmio_wait32(gt, PCODE_MAILBOX, 0, PCODE_READY, + err = xe_mmio_wait32(gt, PCODE_MAILBOX, PCODE_READY, 0, timeout_ms * 1000, NULL, atomic); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 75f7a4cf6cbe..5801c10f3ccc 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -484,7 +484,7 @@ static int uc_fw_xfer(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags) _MASKED_BIT_ENABLE(dma_flags | START_DMA)); /* Wait for DMA to finish */ - ret = xe_mmio_wait32(gt, DMA_CTRL, 0, START_DMA, 100000, &dma_ctrl, + ret = xe_mmio_wait32(gt, DMA_CTRL, START_DMA, 0, 100000, &dma_ctrl, false); if (ret) drm_err(&xe->drm, "DMA for %s fw failed, DMA_CTRL=%u\n", -- cgit v1.2.3 From 4f027e304a6c7ae77150965d10b8a1edee0398a2 Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Thu, 27 Jul 2023 04:56:49 +0530 Subject: drm/xe: Notify Userspace when gt reset fails Send uevent in case of gt reset failure. This intimation can be used by userspace monitoring tool to do the device level reset/reboot when GT reset fails. udevadm can be used to monitor the uevents. v2: - Support only gt failure notification (Rodrigo) v3 - Rectify the comments in header file. v4 - Use pci kobj instead of drm kobj for notification.(Rodrigo) - Cleanup (Badal) v5 - Add tile id and gt id as additional info provided by uevent. - Provide code documentation for the uevent. (Rodrigo) Cc: Aravind Iddamsetty Cc: Tejas Upadhyay Cc: Rodrigo Vivi Reviewed-by: Badal Nilawar Signed-off-by: Himal Prasad Ghimiray Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt.c | 19 +++++++++++++++++++ include/uapi/drm/xe_drm.h | 10 ++++++++++ 2 files changed, 29 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index bb7794cf2c1a..82b987404070 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -8,6 +8,7 @@ #include #include +#include #include "regs/xe_gt_regs.h" #include "xe_bb.h" @@ -499,6 +500,20 @@ static int do_gt_restart(struct xe_gt *gt) return 0; } +static void xe_uevent_gt_reset_failure(struct pci_dev *pdev, u8 tile_id, u8 gt_id) +{ + char *reset_event[4]; + + reset_event[0] = XE_RESET_FAILED_UEVENT "=NEEDS_RESET"; + reset_event[1] = kasprintf(GFP_KERNEL, "TILE_ID=%d", tile_id); + reset_event[2] = kasprintf(GFP_KERNEL, "GT_ID=%d", gt_id); + reset_event[3] = NULL; + kobject_uevent_env(&pdev->dev.kobj, KOBJ_CHANGE, reset_event); + + kfree(reset_event[1]); + kfree(reset_event[2]); +} + static int gt_reset(struct xe_gt *gt) { int err; @@ -549,6 +564,10 @@ err_msg: xe_device_mem_access_put(gt_to_xe(gt)); xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err)); + /* Notify userspace about gt reset failure */ + xe_uevent_gt_reset_failure(to_pci_dev(gt_to_xe(gt)->drm.dev), + gt_to_tile(gt)->id, gt->info.id); + return err; } diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 259de80376b4..3d09e9e9267b 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -16,6 +16,16 @@ extern "C" { * subject to backwards-compatibility constraints. */ +/** + * DOC: uevent generated by xe on it's pci node. + * + * XE_RESET_FAILED_UEVENT - Event is generated when attempt to reset gt + * fails. The value supplied with the event is always "NEEDS_RESET". + * Additional information supplied is tile id and gt id of the gt unit for + * which reset has failed. + */ +#define XE_RESET_FAILED_UEVENT "DEVICE_STATUS" + /** * struct xe_user_extension - Base class for defining a chain of extensions * -- cgit v1.2.3 From 8f3013e0b22206b27f37dcf1b96ce68df3393040 Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Thu, 27 Jul 2023 04:56:50 +0530 Subject: drm/xe: Introduce fault injection for gt reset To trigger gt reset failure: echo 100 > /sys/kernel/debug/dri//fail_gt_reset/probability echo 2 > /sys/kernel/debug/dri//fail_gt_reset/times Cc: Rodrigo Vivi Cc: Lucas De Marchi Reviewed-by: Rodrigo Vivi Signed-off-by: Himal Prasad Ghimiray Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_debugfs.c | 10 ++++++++++ drivers/gpu/drm/xe/xe_gt.c | 8 +++++++- drivers/gpu/drm/xe/xe_gt.h | 14 ++++++++++++++ 3 files changed, 31 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c index 491506a1e12e..2de8a0b9da18 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.c +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -5,6 +5,7 @@ #include "xe_debugfs.h" +#include #include #include @@ -20,6 +21,10 @@ #include "xe_vm.h" #endif +#ifdef CONFIG_FAULT_INJECTION +DECLARE_FAULT_ATTR(gt_reset_failure); +#endif + static struct xe_device *node_to_xe(struct drm_info_node *node) { return to_xe_device(node->minor->dev); @@ -135,4 +140,9 @@ void xe_debugfs_register(struct xe_device *xe) for_each_gt(gt, xe, id) xe_gt_debugfs_register(gt); + +#ifdef CONFIG_FAULT_INJECTION + fault_create_debugfs_attr("fail_gt_reset", root, >_reset_failure); +#endif + } diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 82b987404070..28bf577c7bf2 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -524,6 +524,11 @@ static int gt_reset(struct xe_gt *gt) xe_gt_info(gt, "reset started\n"); + if (xe_fault_inject_gt_reset()) { + err = -ECANCELED; + goto err_fail; + } + xe_gt_sanitize(gt); xe_device_mem_access_get(gt_to_xe(gt)); @@ -562,6 +567,7 @@ err_out: err_msg: XE_WARN_ON(xe_uc_start(>->uc)); xe_device_mem_access_put(gt_to_xe(gt)); +err_fail: xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err)); /* Notify userspace about gt reset failure */ @@ -583,7 +589,7 @@ void xe_gt_reset_async(struct xe_gt *gt) xe_gt_info(gt, "trying reset\n"); /* Don't do a reset while one is already in flight */ - if (xe_uc_reset_prepare(>->uc)) + if (!xe_fault_inject_gt_reset() && xe_uc_reset_prepare(>->uc)) return; xe_gt_info(gt, "reset queued\n"); diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h index 7298653a73de..caded203a8a0 100644 --- a/drivers/gpu/drm/xe/xe_gt.h +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -7,6 +7,7 @@ #define _XE_GT_H_ #include +#include #include "xe_device_types.h" #include "xe_hw_engine.h" @@ -16,6 +17,19 @@ for_each_if(((hwe__) = (gt__)->hw_engines + (id__)) && \ xe_hw_engine_is_valid((hwe__))) +#ifdef CONFIG_FAULT_INJECTION +extern struct fault_attr gt_reset_failure; +static inline bool xe_fault_inject_gt_reset(void) +{ + return should_fail(>_reset_failure, 1); +} +#else +static inline bool xe_fault_inject_gt_reset(void) +{ + return false; +} +#endif + struct xe_gt *xe_gt_alloc(struct xe_tile *tile); int xe_gt_init_early(struct xe_gt *gt); int xe_gt_init(struct xe_gt *gt); -- cgit v1.2.3 From f026520367be5f7e05531d6e601c822596ebe65f Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 25 Jul 2023 18:11:55 -0400 Subject: drm/xe: Only set PCI d3cold_allowed when we are really allowing. First of all it was strange to see: if (allowed) { ... } else { D3COLD_ENABLE } But besides this misalignment, let's also use the pci d3cold_allowed useful to us and know that we are not really allowing d3cold. Cc: Anshuman Gupta Reviewed-by: Anshuman Gupta Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pci.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index ae6e1394ff31..1a79c6a7dd5e 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -766,6 +766,7 @@ static int xe_pci_runtime_suspend(struct device *dev) pci_save_state(pdev); if (xe->d3cold.allowed) { + d3cold_toggle(pdev, D3COLD_ENABLE); pci_disable_device(pdev); pci_ignore_hotplug(pdev); pci_set_power_state(pdev, PCI_D3cold); @@ -795,8 +796,6 @@ static int xe_pci_runtime_resume(struct device *dev) return err; pci_set_master(pdev); - } else { - d3cold_toggle(pdev, D3COLD_ENABLE); } return xe_pm_runtime_resume(xe); -- cgit v1.2.3 From e07aa913161b0338708887a5e78bf57ffdfe67fa Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 25 Jul 2023 18:11:56 -0400 Subject: drm/xe: Move d3cold_allowed decision all together. And let's use the VRAM threshold to keep d3cold temporarily disabled. With this we have the ability to run D3Cold experiments just by touching the vram_d3cold_threshold sysfs entry. Cc: Anshuman Gupta Reviewed-by: Anshuman Gupta Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pci.c | 15 +-------------- drivers/gpu/drm/xe/xe_pm.c | 5 +++++ drivers/gpu/drm/xe/xe_pm.h | 7 ++++++- 3 files changed, 12 insertions(+), 15 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 1a79c6a7dd5e..4cbacc80594b 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -806,20 +806,7 @@ static int xe_pci_runtime_idle(struct device *dev) struct pci_dev *pdev = to_pci_dev(dev); struct xe_device *xe = pdev_to_xe_device(pdev); - if (!xe->d3cold.capable) { - xe->d3cold.allowed = false; - } else { - xe_pm_d3cold_allowed_toggle(xe); - - /* - * TODO: d3cold should be allowed (true) if - * (IS_DGFX(xe) && !xe_device_mem_access_ongoing(xe)) - * but maybe include some other conditions. So, before - * we can re-enable the D3cold, we need to: - * 1. rewrite the VRAM save / restore to avoid buffer object locks - */ - xe->d3cold.allowed = false; - } + xe_pm_d3cold_allowed_toggle(xe); return 0; } diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index cb2a00ea28e3..1c62900d29d8 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -342,6 +342,11 @@ void xe_pm_d3cold_allowed_toggle(struct xe_device *xe) u64 vram_used; int i; + if (!xe->d3cold.capable) { + xe->d3cold.allowed = false; + return; + } + for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) { man = ttm_manager_type(&xe->ttm, i); if (man) { diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h index 08a633ce5145..6b9031f7af24 100644 --- a/drivers/gpu/drm/xe/xe_pm.h +++ b/drivers/gpu/drm/xe/xe_pm.h @@ -8,7 +8,12 @@ #include -#define DEFAULT_VRAM_THRESHOLD 300 /* in MB */ +/* + * TODO: Threshold = 0 will block D3Cold. + * Before we can move this to a higher value (like 300), we need to: + * 1. rewrite the VRAM save / restore to avoid buffer object locks + */ +#define DEFAULT_VRAM_THRESHOLD 0 /* in MB */ struct xe_device; -- cgit v1.2.3 From bba2ec4144f5a7683d9a26cafffca6031361ee66 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 25 Jul 2023 18:11:57 -0400 Subject: drm/xe: Fix the runtime_idle call and d3cold.allowed decision. According to Documentation/power/runtime_pm.txt: int pm_runtime_put(struct device *dev); - decrement the device's usage counter; if the result is 0 then run pm_request_idle(dev) and return its result int pm_runtime_put_autosuspend(struct device *dev); - decrement the device's usage counter; if the result is 0 then run pm_request_autosuspend(dev) and return its result We need to ensure that the idle function is called before suspending so we take the right d3cold.allowed decision and respect the values set on vram_d3cold_threshold sysfs. So we need pm_runtime_put() instead of pm_runtime_put_autosuspend(). Cc: Anshuman Gupta Reviewed-by: Anshuman Gupta Tested-by: Anshuman Gupta Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index 1c62900d29d8..310e413c91a4 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -133,7 +133,7 @@ static void xe_pm_runtime_init(struct xe_device *xe) pm_runtime_set_active(dev); pm_runtime_allow(dev); pm_runtime_mark_last_busy(dev); - pm_runtime_put_autosuspend(dev); + pm_runtime_put(dev); } void xe_pm_init(struct xe_device *xe) @@ -289,7 +289,7 @@ int xe_pm_runtime_get(struct xe_device *xe) int xe_pm_runtime_put(struct xe_device *xe) { pm_runtime_mark_last_busy(xe->drm.dev); - return pm_runtime_put_autosuspend(xe->drm.dev); + return pm_runtime_put(xe->drm.dev); } int xe_pm_runtime_get_if_active(struct xe_device *xe) -- cgit v1.2.3 From a32d82b4cfd63a9bc198bd9faa54844b8d04c5d3 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 25 Jul 2023 18:11:58 -0400 Subject: drm/xe: Only init runtime PM after all d3cold config is in place. We cannot allow runtime pm suspend after we configured the d3cold capable and threshold. Cc: Anshuman Gupta Reviewed-by: Anshuman Gupta Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index 310e413c91a4..a20a2fb34a7d 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -141,10 +141,12 @@ void xe_pm_init(struct xe_device *xe) struct pci_dev *pdev = to_pci_dev(xe->drm.dev); drmm_mutex_init(&xe->drm, &xe->d3cold.lock); - xe_pm_runtime_init(xe); + xe->d3cold.capable = xe_pm_pci_d3cold_capable(pdev); xe_device_sysfs_init(xe); xe_pm_set_vram_threshold(xe, DEFAULT_VRAM_THRESHOLD); + + xe_pm_runtime_init(xe); } void xe_pm_runtime_fini(struct xe_device *xe) -- cgit v1.2.3 From d87c424afaf62f11ded6e66b4bdfbd5f5da8b330 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 25 Jul 2023 18:11:59 -0400 Subject: drm/xe: Ensure memory eviction on s2idle. On discrete cards we cannot allow the pci subsystem to skip the regular suspend and we need to unblock the d3cold. Cc: Anshuman Gupta Reviewed-by: Anshuman Gupta Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pci.c | 54 +++++++++++++++++++++++++++------------------ drivers/gpu/drm/xe/xe_pm.c | 11 +++++++++ 2 files changed, 43 insertions(+), 22 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 4cbacc80594b..6e31b596683e 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -30,6 +30,28 @@ enum toggle_d3cold { D3COLD_ENABLE, }; +static void d3cold_toggle(struct pci_dev *pdev, enum toggle_d3cold toggle) +{ + struct xe_device *xe = pdev_to_xe_device(pdev); + struct pci_dev *root_pdev; + + if (!xe->d3cold.capable) + return; + + root_pdev = pcie_find_root_port(pdev); + if (!root_pdev) + return; + + switch (toggle) { + case D3COLD_DISABLE: + pci_d3cold_disable(root_pdev); + break; + case D3COLD_ENABLE: + pci_d3cold_enable(root_pdev); + break; + } +} + struct xe_subplatform_desc { enum xe_subplatform subplatform; const char *name; @@ -697,6 +719,13 @@ static int xe_pci_suspend(struct device *dev) if (err) return err; + /* + * Enabling D3Cold is needed for S2Idle/S0ix. + * It is save to allow here since xe_pm_suspend has evicted + * the local memory and the direct complete optimization is disabled. + */ + d3cold_toggle(pdev, D3COLD_ENABLE); + pci_save_state(pdev); pci_disable_device(pdev); @@ -712,6 +741,9 @@ static int xe_pci_resume(struct device *dev) struct pci_dev *pdev = to_pci_dev(dev); int err; + /* Give back the D3Cold decision to the runtime P M*/ + d3cold_toggle(pdev, D3COLD_DISABLE); + err = pci_set_power_state(pdev, PCI_D0); if (err) return err; @@ -731,28 +763,6 @@ static int xe_pci_resume(struct device *dev) return 0; } -static void d3cold_toggle(struct pci_dev *pdev, enum toggle_d3cold toggle) -{ - struct xe_device *xe = pdev_to_xe_device(pdev); - struct pci_dev *root_pdev; - - if (!xe->d3cold.capable) - return; - - root_pdev = pcie_find_root_port(pdev); - if (!root_pdev) - return; - - switch (toggle) { - case D3COLD_DISABLE: - pci_d3cold_disable(root_pdev); - break; - case D3COLD_ENABLE: - pci_d3cold_enable(root_pdev); - break; - } -} - static int xe_pci_runtime_suspend(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index a20a2fb34a7d..cdde0d87fd9f 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -128,6 +128,17 @@ static void xe_pm_runtime_init(struct xe_device *xe) { struct device *dev = xe->drm.dev; + /* + * Disable the system suspend direct complete optimization. + * We need to ensure that the regular device suspend/resume functions + * are called since our runtime_pm cannot guarantee local memory + * eviction for d3cold. + * TODO: Check HDA audio dependencies claimed by i915, and then enforce + * this option to integrated graphics as well. + */ + if (IS_DGFX(xe)) + dev_pm_set_driver_flags(dev, DPM_FLAG_NO_DIRECT_COMPLETE); + pm_runtime_use_autosuspend(dev); pm_runtime_set_autosuspend_delay(dev, 1000); pm_runtime_set_active(dev); -- cgit v1.2.3 From fe58a2432b0d07cf56704ecf1ca5e52e6c1e8fff Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 28 Jul 2023 10:56:02 -0700 Subject: drm/xe/mtl: Reduce Wa_14018575942 scope to the CCS engine The MTL version of Wa_14018575942 has been updated to suggest only applying the register change on the CCS engine. Note that DG2 and PVC have a functionally equivalent workaround with Wa_18018781329; for now that one is still applying to all engines, although we'll keep an eye on it in case it changes to be CCS-specific too. Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20230728175601.2343755-2-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_wa.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 21087f7a4609..36c80e9fb758 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -238,21 +238,13 @@ static const struct xe_rtp_entry_sr gt_was[] = { }, { XE_RTP_NAME("14018575942"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271)), - XE_RTP_ACTIONS(SET(RENDER_MOD_CTRL, FORCE_MISS_FTLB), - SET(COMP_MOD_CTRL, FORCE_MISS_FTLB)) + XE_RTP_ACTIONS(SET(COMP_MOD_CTRL, FORCE_MISS_FTLB)) }, { XE_RTP_NAME("22016670082"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271)), XE_RTP_ACTIONS(SET(SQCNT1, ENFORCE_RAR)) }, - /* Xe_LPM+ */ - { XE_RTP_NAME("14018575942"), - XE_RTP_RULES(MEDIA_VERSION(1300)), - XE_RTP_ACTIONS(SET(XELPMP_VDBX_MOD_CTRL, FORCE_MISS_FTLB), - SET(XELPMP_VEBX_MOD_CTRL, FORCE_MISS_FTLB)) - }, - {} }; -- cgit v1.2.3 From 757d9fdfe3db4de6ed5ef9961a301e5be7b2cd74 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 27 Jul 2023 19:00:14 -0700 Subject: drm/xe: Remove XE_GUC_CT_SELFTEST XE_GUC_CT_SELFTEST enabled a debugfs entry to which ran a very simple selftest ensuring the GuC CT code worked. This was added before the kunit framework was available and before submissions were working too. This test isn't worth porting over to the kunit frame as if the GuC CT didn't work, literally almost nothing would work so just remove this. Suggested-by: Oded Gabbay Signed-off-by: Matthew Brost Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_ct.c | 65 ------------------------------------ drivers/gpu/drm/xe/xe_guc_ct.h | 9 ----- drivers/gpu/drm/xe/xe_guc_ct_types.h | 6 ---- drivers/gpu/drm/xe/xe_guc_debugfs.c | 18 ---------- 4 files changed, 98 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index cb75db30800c..3f5084c6ffc8 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -1310,68 +1310,3 @@ void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool atomic) xe_guc_ct_snapshot_print(snapshot, p); xe_guc_ct_snapshot_free(snapshot); } - -#ifdef XE_GUC_CT_SELFTEST -/* - * Disable G2H processing in IRQ handler to force xe_guc_ct_send to enter flow - * control if enough sent, 8k sends is enough. Verify forward process, verify - * credits expected values on exit. - */ -void xe_guc_ct_selftest(struct xe_guc_ct *ct, struct drm_printer *p) -{ - struct guc_ctb *g2h = &ct->ctbs.g2h; - u32 action[] = { XE_GUC_ACTION_SCHED_ENGINE_MODE_SET, 0, 0, 1, }; - u32 bad_action[] = { XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET, 0, 0, }; - int ret; - int i; - - ct->suppress_irq_handler = true; - drm_puts(p, "Starting GuC CT selftest\n"); - - for (i = 0; i < 8192; ++i) { - ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 4, 1); - if (ret) { - drm_printf(p, "Aborted pass %d, ret %d\n", i, ret); - xe_guc_ct_print(ct, p, true); - break; - } - } - - ct->suppress_irq_handler = false; - if (!ret) { - xe_guc_ct_irq_handler(ct); - msleep(200); - if (g2h->info.space != - CIRC_SPACE(0, 0, g2h->info.size) - g2h->info.resv_space) { - drm_printf(p, "Mismatch on space %d, %d\n", - g2h->info.space, - CIRC_SPACE(0, 0, g2h->info.size) - - g2h->info.resv_space); - ret = -EIO; - } - if (ct->g2h_outstanding) { - drm_printf(p, "Outstanding G2H, %d\n", - ct->g2h_outstanding); - ret = -EIO; - } - } - - /* Check failure path for blocking CTs too */ - xe_guc_ct_send_block(ct, bad_action, ARRAY_SIZE(bad_action)); - if (g2h->info.space != - CIRC_SPACE(0, 0, g2h->info.size) - g2h->info.resv_space) { - drm_printf(p, "Mismatch on space %d, %d\n", - g2h->info.space, - CIRC_SPACE(0, 0, g2h->info.size) - - g2h->info.resv_space); - ret = -EIO; - } - if (ct->g2h_outstanding) { - drm_printf(p, "Outstanding G2H, %d\n", - ct->g2h_outstanding); - ret = -EIO; - } - - drm_printf(p, "GuC CT selftest done - %s\n", ret ? "FAIL" : "PASS"); -} -#endif diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h index 3e04ee64652c..f15f8a4857e0 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.h +++ b/drivers/gpu/drm/xe/xe_guc_ct.h @@ -25,13 +25,8 @@ void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool atomic); static inline void xe_guc_ct_irq_handler(struct xe_guc_ct *ct) { wake_up_all(&ct->wq); -#ifdef XE_GUC_CT_SELFTEST - if (!ct->suppress_irq_handler && ct->enabled) - queue_work(system_unbound_wq, &ct->g2h_worker); -#else if (ct->enabled) queue_work(system_unbound_wq, &ct->g2h_worker); -#endif xe_guc_ct_fast_path(ct); } @@ -61,8 +56,4 @@ xe_guc_ct_send_block_no_fail(struct xe_guc_ct *ct, const u32 *action, u32 len) return xe_guc_ct_send_recv_no_fail(ct, action, len, NULL); } -#ifdef XE_GUC_CT_SELFTEST -void xe_guc_ct_selftest(struct xe_guc_ct *ct, struct drm_printer *p); -#endif - #endif diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h index 93046d95b009..dedbd686428b 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct_types.h +++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h @@ -14,8 +14,6 @@ #include "abi/guc_communication_ctb_abi.h" -#define XE_GUC_CT_SELFTEST - struct xe_bo; /** @@ -110,10 +108,6 @@ struct xe_guc_ct { wait_queue_head_t wq; /** @g2h_fence_wq: wait queue used for G2H fencing */ wait_queue_head_t g2h_fence_wq; -#ifdef XE_GUC_CT_SELFTEST - /** @suppress_irq_handler: force flow control to sender */ - bool suppress_irq_handler; -#endif /** @msg: Message buffer */ u32 msg[GUC_CTB_MSG_MAX_LEN]; /** @fast_msg: Message buffer */ diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.c b/drivers/gpu/drm/xe/xe_guc_debugfs.c index b43c70de7e37..167eb5593e03 100644 --- a/drivers/gpu/drm/xe/xe_guc_debugfs.c +++ b/drivers/gpu/drm/xe/xe_guc_debugfs.c @@ -58,27 +58,9 @@ static int guc_log(struct seq_file *m, void *data) return 0; } -#ifdef XE_GUC_CT_SELFTEST -static int guc_ct_selftest(struct seq_file *m, void *data) -{ - struct xe_guc *guc = node_to_guc(m->private); - struct xe_device *xe = guc_to_xe(guc); - struct drm_printer p = drm_seq_file_printer(m); - - xe_device_mem_access_get(xe); - xe_guc_ct_selftest(&guc->ct, &p); - xe_device_mem_access_put(xe); - - return 0; -} -#endif - static const struct drm_info_list debugfs_list[] = { {"guc_info", guc_info, 0}, {"guc_log", guc_log, 0}, -#ifdef XE_GUC_CT_SELFTEST - {"guc_ct_selftest", guc_ct_selftest, 0}, -#endif }; void xe_guc_debugfs_register(struct xe_guc *guc, struct dentry *parent) -- cgit v1.2.3 From 8d7a91fe58c982a7709fabb53a51d87dbf94f6e9 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 27 Jul 2023 19:10:51 -0700 Subject: drm/xe: Remove ct->fence_context This is unused, remove it. Suggested-by: Oded Gabbay Signed-off-by: Matthew Brost Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_ct.c | 1 - drivers/gpu/drm/xe/xe_guc_ct_types.h | 2 -- 2 files changed, 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 3f5084c6ffc8..d322eadbe75a 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -140,7 +140,6 @@ int xe_guc_ct_init(struct xe_guc_ct *ct) mutex_init(&ct->lock); spin_lock_init(&ct->fast_lock); xa_init(&ct->fence_lookup); - ct->fence_context = dma_fence_context_alloc(1); INIT_WORK(&ct->g2h_worker, g2h_worker_func); init_waitqueue_head(&ct->wq); init_waitqueue_head(&ct->g2h_fence_wq); diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h index dedbd686428b..d814d4ee3fc6 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct_types.h +++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h @@ -100,8 +100,6 @@ struct xe_guc_ct { bool enabled; /** @fence_seqno: G2H fence seqno - 16 bits used by CT */ u32 fence_seqno; - /** @fence_context: context for G2H fence */ - u64 fence_context; /** @fence_lookup: G2H fence lookup */ struct xarray fence_lookup; /** @wq: wait queue used for reliable CT sends and freeing G2H credits */ -- cgit v1.2.3 From e3828ebf6cde583b76143e283f8c4a4e8a252145 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 27 Jul 2023 19:36:00 -0700 Subject: drm/xe: Add define WQ_HEADER_SIZE Previously used a a magic '+ 3', use define instead. Suggested-by: Oded Gabbay Signed-off-by: Matthew Brost Reviewed-by: Matthew Auld Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_submit.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 99c9b7139195..a2eeb3ffe548 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -558,8 +558,9 @@ static void wq_item_append(struct xe_engine *e) struct xe_guc *guc = engine_to_guc(e); struct xe_device *xe = guc_to_xe(guc); struct iosys_map map = xe_lrc_parallel_map(e->lrc); - u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + 3]; - u32 wqi_size = (e->width + 3) * sizeof(u32); +#define WQ_HEADER_SIZE 4 /* Includes 1 LRC address too */ + u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + (WQ_HEADER_SIZE - 1)]; + u32 wqi_size = (e->width + (WQ_HEADER_SIZE - 1)) * sizeof(u32); u32 len_dw = (wqi_size / sizeof(u32)) - 1; int i = 0, j; -- cgit v1.2.3 From 3207a32163cdf7b3345a44e255aae614859ea0d6 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Thu, 27 Jul 2023 14:55:28 +0000 Subject: drm/xe/macro: Remove unused constant Remove XE_EXTRA_DEBUG for cleanup as it is not used. Signed-off-by: Francois Dugast Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_macros.h | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_macros.h b/drivers/gpu/drm/xe/xe_macros.h index 038cf28604ad..a7105050bce0 100644 --- a/drivers/gpu/drm/xe/xe_macros.h +++ b/drivers/gpu/drm/xe/xe_macros.h @@ -8,7 +8,6 @@ #include -#define XE_EXTRA_DEBUG 1 #define XE_WARN_ON WARN_ON #define XE_BUG_ON BUG_ON -- cgit v1.2.3 From 99fea6828879381405dba598627aea79fa6edd78 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Thu, 27 Jul 2023 14:55:29 +0000 Subject: drm/xe: Prefer WARN() over BUG() to avoid crashing the kernel Replace calls to XE_BUG_ON() with calls XE_WARN_ON() which in turn calls WARN() instead of BUG(). BUG() crashes the kernel and should only be used when it is absolutely unavoidable in case of catastrophic and unrecoverable failures, which is not the case here. Signed-off-by: Francois Dugast Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bb.c | 8 ++--- drivers/gpu/drm/xe/xe_bo.c | 52 +++++++++++++-------------- drivers/gpu/drm/xe/xe_bo.h | 14 ++++---- drivers/gpu/drm/xe/xe_bo_evict.c | 4 +-- drivers/gpu/drm/xe/xe_execlist.c | 14 ++++---- drivers/gpu/drm/xe/xe_force_wake.c | 4 +-- drivers/gpu/drm/xe/xe_force_wake.h | 4 +-- drivers/gpu/drm/xe/xe_ggtt.c | 12 +++---- drivers/gpu/drm/xe/xe_gt_clock.c | 4 +-- drivers/gpu/drm/xe/xe_gt_debugfs.c | 2 +- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 12 +++---- drivers/gpu/drm/xe/xe_guc.c | 22 ++++++------ drivers/gpu/drm/xe/xe_guc_ads.c | 14 ++++---- drivers/gpu/drm/xe/xe_guc_ct.c | 22 ++++++------ drivers/gpu/drm/xe/xe_guc_hwconfig.c | 2 +- drivers/gpu/drm/xe/xe_guc_log.c | 4 +-- drivers/gpu/drm/xe/xe_guc_submit.c | 48 ++++++++++++------------- drivers/gpu/drm/xe/xe_huc.c | 2 +- drivers/gpu/drm/xe/xe_hw_engine.c | 10 +++--- drivers/gpu/drm/xe/xe_hw_fence.c | 2 +- drivers/gpu/drm/xe/xe_lrc.c | 8 ++--- drivers/gpu/drm/xe/xe_macros.h | 1 - drivers/gpu/drm/xe/xe_migrate.c | 30 ++++++++-------- drivers/gpu/drm/xe/xe_pt.c | 22 ++++++------ drivers/gpu/drm/xe/xe_res_cursor.h | 12 +++---- drivers/gpu/drm/xe/xe_ring_ops.c | 8 ++--- drivers/gpu/drm/xe/xe_sched_job.c | 2 +- drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 4 +-- drivers/gpu/drm/xe/xe_uc_fw.c | 16 ++++----- drivers/gpu/drm/xe/xe_uc_fw.h | 2 +- drivers/gpu/drm/xe/xe_vm.c | 56 ++++++++++++++--------------- drivers/gpu/drm/xe/xe_wait_user_fence.c | 2 +- drivers/gpu/drm/xe/xe_wopcm.c | 18 +++++----- 33 files changed, 218 insertions(+), 219 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c index f9b6b7adf99f..b15a7cb7db4c 100644 --- a/drivers/gpu/drm/xe/xe_bb.c +++ b/drivers/gpu/drm/xe/xe_bb.c @@ -78,7 +78,7 @@ struct xe_sched_job *xe_bb_create_wa_job(struct xe_engine *wa_eng, { u64 addr = batch_base_ofs + drm_suballoc_soffset(bb->bo); - XE_BUG_ON(!(wa_eng->vm->flags & XE_VM_FLAG_MIGRATION)); + XE_WARN_ON(!(wa_eng->vm->flags & XE_VM_FLAG_MIGRATION)); return __xe_bb_create_job(wa_eng, bb, &addr); } @@ -94,8 +94,8 @@ struct xe_sched_job *xe_bb_create_migration_job(struct xe_engine *kernel_eng, 4 * second_idx, }; - BUG_ON(second_idx > bb->len); - BUG_ON(!(kernel_eng->vm->flags & XE_VM_FLAG_MIGRATION)); + XE_WARN_ON(second_idx > bb->len); + XE_WARN_ON(!(kernel_eng->vm->flags & XE_VM_FLAG_MIGRATION)); return __xe_bb_create_job(kernel_eng, bb, addr); } @@ -105,7 +105,7 @@ struct xe_sched_job *xe_bb_create_job(struct xe_engine *kernel_eng, { u64 addr = xe_sa_bo_gpu_addr(bb->bo); - BUG_ON(kernel_eng->vm && kernel_eng->vm->flags & XE_VM_FLAG_MIGRATION); + XE_WARN_ON(kernel_eng->vm && kernel_eng->vm->flags & XE_VM_FLAG_MIGRATION); return __xe_bb_create_job(kernel_eng, bb, &addr); } diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 65b56e7a2fde..cf0faaefd03d 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -103,7 +103,7 @@ static bool xe_bo_is_user(struct xe_bo *bo) static struct xe_tile * mem_type_to_tile(struct xe_device *xe, u32 mem_type) { - XE_BUG_ON(mem_type != XE_PL_STOLEN && !mem_type_is_vram(mem_type)); + XE_WARN_ON(mem_type != XE_PL_STOLEN && !mem_type_is_vram(mem_type)); return &xe->tiles[mem_type == XE_PL_STOLEN ? 0 : (mem_type - XE_PL_VRAM0)]; } @@ -142,7 +142,7 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo, struct ttm_place place = { .mem_type = mem_type }; u64 io_size = tile->mem.vram.io_size; - XE_BUG_ON(!tile->mem.vram.usable_size); + XE_WARN_ON(!tile->mem.vram.usable_size); /* * For eviction / restore on suspend / resume objects @@ -285,7 +285,7 @@ static int xe_tt_map_sg(struct ttm_tt *tt) unsigned long num_pages = tt->num_pages; int ret; - XE_BUG_ON(tt->page_flags & TTM_TT_FLAG_EXTERNAL); + XE_WARN_ON(tt->page_flags & TTM_TT_FLAG_EXTERNAL); if (xe_tt->sg) return 0; @@ -544,8 +544,8 @@ static int xe_bo_move_dmabuf(struct ttm_buffer_object *ttm_bo, ttm); struct sg_table *sg; - XE_BUG_ON(!attach); - XE_BUG_ON(!ttm_bo->ttm); + XE_WARN_ON(!attach); + XE_WARN_ON(!ttm_bo->ttm); if (new_res->mem_type == XE_PL_SYSTEM) goto out; @@ -707,8 +707,8 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, else if (mem_type_is_vram(old_mem_type)) tile = mem_type_to_tile(xe, old_mem_type); - XE_BUG_ON(!tile); - XE_BUG_ON(!tile->migrate); + XE_WARN_ON(!tile); + XE_WARN_ON(!tile->migrate); trace_xe_bo_move(bo); xe_device_mem_access_get(xe); @@ -738,7 +738,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, goto out; } - XE_BUG_ON(new_mem->start != + XE_WARN_ON(new_mem->start != bo->placements->fpfn); iosys_map_set_vaddr_iomem(&bo->vmap, new_addr); @@ -1198,7 +1198,7 @@ struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, int err; /* Only kernel objects should set GT */ - XE_BUG_ON(tile && type != ttm_bo_type_kernel); + XE_WARN_ON(tile && type != ttm_bo_type_kernel); if (XE_WARN_ON(!size)) return ERR_PTR(-EINVAL); @@ -1350,7 +1350,7 @@ xe_bo_create_locked_range(struct xe_device *xe, if (!tile && flags & XE_BO_CREATE_STOLEN_BIT) tile = xe_device_get_root_tile(xe); - XE_BUG_ON(!tile); + XE_WARN_ON(!tile); if (flags & XE_BO_CREATE_STOLEN_BIT && flags & XE_BO_FIXED_PLACEMENT_BIT) { @@ -1481,8 +1481,8 @@ int xe_bo_pin_external(struct xe_bo *bo) struct xe_device *xe = xe_bo_device(bo); int err; - XE_BUG_ON(bo->vm); - XE_BUG_ON(!xe_bo_is_user(bo)); + XE_WARN_ON(bo->vm); + XE_WARN_ON(!xe_bo_is_user(bo)); if (!xe_bo_is_pinned(bo)) { err = xe_bo_validate(bo, NULL, false); @@ -1514,20 +1514,20 @@ int xe_bo_pin(struct xe_bo *bo) int err; /* We currently don't expect user BO to be pinned */ - XE_BUG_ON(xe_bo_is_user(bo)); + XE_WARN_ON(xe_bo_is_user(bo)); /* Pinned object must be in GGTT or have pinned flag */ - XE_BUG_ON(!(bo->flags & (XE_BO_CREATE_PINNED_BIT | + XE_WARN_ON(!(bo->flags & (XE_BO_CREATE_PINNED_BIT | XE_BO_CREATE_GGTT_BIT))); /* * No reason we can't support pinning imported dma-bufs we just don't * expect to pin an imported dma-buf. */ - XE_BUG_ON(bo->ttm.base.import_attach); + XE_WARN_ON(bo->ttm.base.import_attach); /* We only expect at most 1 pin */ - XE_BUG_ON(xe_bo_is_pinned(bo)); + XE_WARN_ON(xe_bo_is_pinned(bo)); err = xe_bo_validate(bo, NULL, false); if (err) @@ -1543,7 +1543,7 @@ int xe_bo_pin(struct xe_bo *bo) struct ttm_place *place = &(bo->placements[0]); if (mem_type_is_vram(place->mem_type)) { - XE_BUG_ON(!(place->flags & TTM_PL_FLAG_CONTIGUOUS)); + XE_WARN_ON(!(place->flags & TTM_PL_FLAG_CONTIGUOUS)); place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE) - vram_region_gpu_offset(bo->ttm.resource)) >> PAGE_SHIFT; @@ -1580,9 +1580,9 @@ void xe_bo_unpin_external(struct xe_bo *bo) { struct xe_device *xe = xe_bo_device(bo); - XE_BUG_ON(bo->vm); - XE_BUG_ON(!xe_bo_is_pinned(bo)); - XE_BUG_ON(!xe_bo_is_user(bo)); + XE_WARN_ON(bo->vm); + XE_WARN_ON(!xe_bo_is_pinned(bo)); + XE_WARN_ON(!xe_bo_is_user(bo)); if (bo->ttm.pin_count == 1 && !list_empty(&bo->pinned_link)) { spin_lock(&xe->pinned.lock); @@ -1603,15 +1603,15 @@ void xe_bo_unpin(struct xe_bo *bo) { struct xe_device *xe = xe_bo_device(bo); - XE_BUG_ON(bo->ttm.base.import_attach); - XE_BUG_ON(!xe_bo_is_pinned(bo)); + XE_WARN_ON(bo->ttm.base.import_attach); + XE_WARN_ON(!xe_bo_is_pinned(bo)); if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) && bo->flags & XE_BO_INTERNAL_TEST)) { struct ttm_place *place = &(bo->placements[0]); if (mem_type_is_vram(place->mem_type)) { - XE_BUG_ON(list_empty(&bo->pinned_link)); + XE_WARN_ON(list_empty(&bo->pinned_link)); spin_lock(&xe->pinned.lock); list_del_init(&bo->pinned_link); @@ -1675,12 +1675,12 @@ dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size) struct xe_res_cursor cur; u64 page; - XE_BUG_ON(page_size > PAGE_SIZE); + XE_WARN_ON(page_size > PAGE_SIZE); page = offset >> PAGE_SHIFT; offset &= (PAGE_SIZE - 1); if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) { - XE_BUG_ON(!bo->ttm.ttm); + XE_WARN_ON(!bo->ttm.ttm); xe_res_first_sg(xe_bo_get_sg(bo), page << PAGE_SHIFT, page_size, &cur); @@ -1874,7 +1874,7 @@ int xe_bo_lock(struct xe_bo *bo, struct ww_acquire_ctx *ww, LIST_HEAD(objs); LIST_HEAD(dups); - XE_BUG_ON(!ww); + XE_WARN_ON(!ww); tv_bo.num_shared = num_resv; tv_bo.bo = &bo->ttm; diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index a9a32d680208..76b8c836deb7 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -166,7 +166,7 @@ void xe_bo_unlock(struct xe_bo *bo, struct ww_acquire_ctx *ww); static inline void xe_bo_unlock_vm_held(struct xe_bo *bo) { if (bo) { - XE_BUG_ON(bo->vm && bo->ttm.base.resv != xe_vm_resv(bo->vm)); + XE_WARN_ON(bo->vm && bo->ttm.base.resv != xe_vm_resv(bo->vm)); if (bo->vm) xe_vm_assert_held(bo->vm); else @@ -178,8 +178,8 @@ static inline void xe_bo_lock_no_vm(struct xe_bo *bo, struct ww_acquire_ctx *ctx) { if (bo) { - XE_BUG_ON(bo->vm || (bo->ttm.type != ttm_bo_type_sg && - bo->ttm.base.resv != &bo->ttm.base._resv)); + XE_WARN_ON(bo->vm || (bo->ttm.type != ttm_bo_type_sg && + bo->ttm.base.resv != &bo->ttm.base._resv)); dma_resv_lock(bo->ttm.base.resv, ctx); } } @@ -187,8 +187,8 @@ static inline void xe_bo_lock_no_vm(struct xe_bo *bo, static inline void xe_bo_unlock_no_vm(struct xe_bo *bo) { if (bo) { - XE_BUG_ON(bo->vm || (bo->ttm.type != ttm_bo_type_sg && - bo->ttm.base.resv != &bo->ttm.base._resv)); + XE_WARN_ON(bo->vm || (bo->ttm.type != ttm_bo_type_sg && + bo->ttm.base.resv != &bo->ttm.base._resv)); dma_resv_unlock(bo->ttm.base.resv); } } @@ -228,8 +228,8 @@ xe_bo_main_addr(struct xe_bo *bo, size_t page_size) static inline u32 xe_bo_ggtt_addr(struct xe_bo *bo) { - XE_BUG_ON(bo->ggtt_node.size > bo->size); - XE_BUG_ON(bo->ggtt_node.start + bo->ggtt_node.size > (1ull << 32)); + XE_WARN_ON(bo->ggtt_node.size > bo->size); + XE_WARN_ON(bo->ggtt_node.start + bo->ggtt_node.size > (1ull << 32)); return bo->ggtt_node.start; } diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c index f559a7f3eb3e..0d5c3a208ab4 100644 --- a/drivers/gpu/drm/xe/xe_bo_evict.c +++ b/drivers/gpu/drm/xe/xe_bo_evict.c @@ -160,8 +160,8 @@ int xe_bo_restore_kernel(struct xe_device *xe) * We expect validate to trigger a move VRAM and our move code * should setup the iosys map. */ - XE_BUG_ON(iosys_map_is_null(&bo->vmap)); - XE_BUG_ON(!xe_bo_is_vram(bo)); + XE_WARN_ON(iosys_map_is_null(&bo->vmap)); + XE_WARN_ON(!xe_bo_is_vram(bo)); xe_bo_put(bo); diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index b15d095b395b..a4d9531e3516 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -50,10 +50,10 @@ static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc, lrc_desc = xe_lrc_descriptor(lrc); if (GRAPHICS_VERx100(xe) >= 1250) { - XE_BUG_ON(!FIELD_FIT(XEHP_SW_CTX_ID, ctx_id)); + XE_WARN_ON(!FIELD_FIT(XEHP_SW_CTX_ID, ctx_id)); lrc_desc |= FIELD_PREP(XEHP_SW_CTX_ID, ctx_id); } else { - XE_BUG_ON(!FIELD_FIT(GEN11_SW_CTX_ID, ctx_id)); + XE_WARN_ON(!FIELD_FIT(GEN11_SW_CTX_ID, ctx_id)); lrc_desc |= FIELD_PREP(GEN11_SW_CTX_ID, ctx_id); } @@ -213,9 +213,9 @@ static void xe_execlist_make_active(struct xe_execlist_engine *exl) struct xe_execlist_port *port = exl->port; enum xe_engine_priority priority = exl->active_priority; - XE_BUG_ON(priority == XE_ENGINE_PRIORITY_UNSET); - XE_BUG_ON(priority < 0); - XE_BUG_ON(priority >= ARRAY_SIZE(exl->port->active)); + XE_WARN_ON(priority == XE_ENGINE_PRIORITY_UNSET); + XE_WARN_ON(priority < 0); + XE_WARN_ON(priority >= ARRAY_SIZE(exl->port->active)); spin_lock_irq(&port->lock); @@ -321,7 +321,7 @@ static int execlist_engine_init(struct xe_engine *e) struct xe_device *xe = gt_to_xe(e->gt); int err; - XE_BUG_ON(xe_device_guc_submission_enabled(xe)); + XE_WARN_ON(xe_device_guc_submission_enabled(xe)); drm_info(&xe->drm, "Enabling execlist submission (GuC submission disabled)\n"); @@ -387,7 +387,7 @@ static void execlist_engine_fini_async(struct work_struct *w) struct xe_execlist_engine *exl = e->execlist; unsigned long flags; - XE_BUG_ON(xe_device_guc_submission_enabled(gt_to_xe(e->gt))); + XE_WARN_ON(xe_device_guc_submission_enabled(gt_to_xe(e->gt))); spin_lock_irqsave(&exl->port->lock, flags); if (WARN_ON(exl->active_priority != XE_ENGINE_PRIORITY_UNSET)) diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c index aba0784b608e..e563de862581 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.c +++ b/drivers/gpu/drm/xe/xe_force_wake.c @@ -45,7 +45,7 @@ void xe_force_wake_init_gt(struct xe_gt *gt, struct xe_force_wake *fw) mutex_init(&fw->lock); /* Assuming gen11+ so assert this assumption is correct */ - XE_BUG_ON(GRAPHICS_VER(gt_to_xe(gt)) < 11); + XE_WARN_ON(GRAPHICS_VER(gt_to_xe(gt)) < 11); if (xe->info.graphics_verx100 >= 1270) { domain_init(&fw->domains[XE_FW_DOMAIN_ID_GT], @@ -67,7 +67,7 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw) int i, j; /* Assuming gen11+ so assert this assumption is correct */ - XE_BUG_ON(GRAPHICS_VER(gt_to_xe(gt)) < 11); + XE_WARN_ON(GRAPHICS_VER(gt_to_xe(gt)) < 11); if (!xe_gt_is_media_type(gt)) domain_init(&fw->domains[XE_FW_DOMAIN_ID_RENDER], diff --git a/drivers/gpu/drm/xe/xe_force_wake.h b/drivers/gpu/drm/xe/xe_force_wake.h index 7c534cdd5fe9..7f304704190e 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.h +++ b/drivers/gpu/drm/xe/xe_force_wake.h @@ -24,7 +24,7 @@ static inline int xe_force_wake_ref(struct xe_force_wake *fw, enum xe_force_wake_domains domain) { - XE_BUG_ON(!domain); + XE_WARN_ON(!domain); return fw->domains[ffs(domain) - 1].ref; } @@ -32,7 +32,7 @@ static inline void xe_force_wake_assert_held(struct xe_force_wake *fw, enum xe_force_wake_domains domain) { - XE_BUG_ON(!(fw->awake_domains & domain)); + XE_WARN_ON(!(fw->awake_domains & domain)); } #endif diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index bf46b90a76ad..286f36b9e229 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -58,8 +58,8 @@ static unsigned int probe_gsm_size(struct pci_dev *pdev) void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte) { - XE_BUG_ON(addr & XE_PTE_MASK); - XE_BUG_ON(addr >= ggtt->size); + XE_WARN_ON(addr & XE_PTE_MASK); + XE_WARN_ON(addr >= ggtt->size); writeq(pte, &ggtt->gsm[addr >> XE_PTE_SHIFT]); } @@ -69,7 +69,7 @@ static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size) u64 end = start + size - 1; u64 scratch_pte; - XE_BUG_ON(start >= end); + XE_WARN_ON(start >= end); if (ggtt->scratch) scratch_pte = xe_ggtt_pte_encode(ggtt->scratch, 0); @@ -266,7 +266,7 @@ void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix) for (addr = 0; addr < ggtt->size; addr += XE_PAGE_SIZE) { unsigned int i = addr / XE_PAGE_SIZE; - XE_BUG_ON(addr > U32_MAX); + XE_WARN_ON(addr > U32_MAX); if (ggtt->gsm[i] == scratch_pte) continue; @@ -315,7 +315,7 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, if (XE_WARN_ON(bo->ggtt_node.size)) { /* Someone's already inserted this BO in the GGTT */ - XE_BUG_ON(bo->ggtt_node.size != bo->size); + XE_WARN_ON(bo->ggtt_node.size != bo->size); return 0; } @@ -378,7 +378,7 @@ void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) return; /* This BO is not currently in the GGTT */ - XE_BUG_ON(bo->ggtt_node.size != bo->size); + XE_WARN_ON(bo->ggtt_node.size != bo->size); xe_ggtt_remove_node(ggtt, &bo->ggtt_node); } diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c index 932b61e0cf67..2f77b8bbcf53 100644 --- a/drivers/gpu/drm/xe/xe_gt_clock.c +++ b/drivers/gpu/drm/xe/xe_gt_clock.c @@ -47,7 +47,7 @@ static u32 get_crystal_clock_freq(u32 rpm_config_reg) case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ: return f25_mhz; default: - XE_BUG_ON("NOT_POSSIBLE"); + XE_WARN_ON("NOT_POSSIBLE"); return 0; } } @@ -58,7 +58,7 @@ int xe_gt_clock_init(struct xe_gt *gt) u32 freq = 0; /* Assuming gen11+ so assert this assumption is correct */ - XE_BUG_ON(GRAPHICS_VER(gt_to_xe(gt)) < 11); + XE_WARN_ON(GRAPHICS_VER(gt_to_xe(gt)) < 11); if (ctc_reg & CTC_SOURCE_DIVIDE_LOGIC) { freq = read_reference_ts_freq(gt); diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index e622174a866d..b871e45af813 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -157,7 +157,7 @@ void xe_gt_debugfs_register(struct xe_gt *gt) char name[8]; int i; - XE_BUG_ON(!minor->debugfs_root); + XE_WARN_ON(!minor->debugfs_root); sprintf(name, "gt%d", gt->info.id); root = debugfs_create_dir(name, minor->debugfs_root); diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index cad0ade595ec..bcbeea62d510 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -250,7 +250,7 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, u32 action[MAX_TLB_INVALIDATION_LEN]; int len = 0; - XE_BUG_ON(!vma); + XE_WARN_ON(!vma); action[len++] = XE_GUC_ACTION_TLB_INVALIDATION; action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */ @@ -288,10 +288,10 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, start = ALIGN_DOWN(xe_vma_start(vma), length); } - XE_BUG_ON(length < SZ_4K); - XE_BUG_ON(!is_power_of_2(length)); - XE_BUG_ON(length & GENMASK(ilog2(SZ_16M) - 1, ilog2(SZ_2M) + 1)); - XE_BUG_ON(!IS_ALIGNED(start, length)); + XE_WARN_ON(length < SZ_4K); + XE_WARN_ON(!is_power_of_2(length)); + XE_WARN_ON(length & GENMASK(ilog2(SZ_16M) - 1, ilog2(SZ_2M) + 1)); + XE_WARN_ON(!IS_ALIGNED(start, length)); action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE); action[len++] = xe_vma_vm(vma)->usm.asid; @@ -300,7 +300,7 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, action[len++] = ilog2(length) - ilog2(SZ_4K); } - XE_BUG_ON(len > MAX_TLB_INVALIDATION_LEN); + XE_WARN_ON(len > MAX_TLB_INVALIDATION_LEN); return send_tlb_invalidation(>->uc.guc, fence, action, len); } diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 2530b6243661..2493c5859948 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -43,9 +43,9 @@ static u32 guc_bo_ggtt_addr(struct xe_guc *guc, { u32 addr = xe_bo_ggtt_addr(bo); - XE_BUG_ON(addr < xe_wopcm_size(guc_to_xe(guc))); - XE_BUG_ON(addr >= GUC_GGTT_TOP); - XE_BUG_ON(bo->size > GUC_GGTT_TOP - addr); + XE_WARN_ON(addr < xe_wopcm_size(guc_to_xe(guc))); + XE_WARN_ON(addr >= GUC_GGTT_TOP); + XE_WARN_ON(bo->size > GUC_GGTT_TOP - addr); return addr; } @@ -612,13 +612,13 @@ int xe_guc_mmio_send_recv(struct xe_guc *guc, const u32 *request, BUILD_BUG_ON(VF_SW_FLAG_COUNT != MED_VF_SW_FLAG_COUNT); - XE_BUG_ON(guc->ct.enabled); - XE_BUG_ON(!len); - XE_BUG_ON(len > VF_SW_FLAG_COUNT); - XE_BUG_ON(len > MED_VF_SW_FLAG_COUNT); - XE_BUG_ON(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, request[0]) != + XE_WARN_ON(guc->ct.enabled); + XE_WARN_ON(!len); + XE_WARN_ON(len > VF_SW_FLAG_COUNT); + XE_WARN_ON(len > MED_VF_SW_FLAG_COUNT); + XE_WARN_ON(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, request[0]) != GUC_HXG_ORIGIN_HOST); - XE_BUG_ON(FIELD_GET(GUC_HXG_MSG_0_TYPE, request[0]) != + XE_WARN_ON(FIELD_GET(GUC_HXG_MSG_0_TYPE, request[0]) != GUC_HXG_TYPE_REQUEST); retry: @@ -724,8 +724,8 @@ static int guc_self_cfg(struct xe_guc *guc, u16 key, u16 len, u64 val) }; int ret; - XE_BUG_ON(len > 2); - XE_BUG_ON(len == 1 && upper_32_bits(val)); + XE_WARN_ON(len > 2); + XE_WARN_ON(len == 1 && upper_32_bits(val)); /* Self config must go over MMIO */ ret = xe_guc_mmio_send(guc, request, ARRAY_SIZE(request)); diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index d4c3a5ce3252..a7da29be2e51 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -118,7 +118,7 @@ struct __guc_ads_blob { static size_t guc_ads_regset_size(struct xe_guc_ads *ads) { - XE_BUG_ON(!ads->regset_size); + XE_WARN_ON(!ads->regset_size); return ads->regset_size; } @@ -312,7 +312,7 @@ int xe_guc_ads_init_post_hwconfig(struct xe_guc_ads *ads) struct xe_gt *gt = ads_to_gt(ads); u32 prev_regset_size = ads->regset_size; - XE_BUG_ON(!ads->bo); + XE_WARN_ON(!ads->bo); ads->golden_lrc_size = calculate_golden_lrc_size(ads); ads->regset_size = calculate_regset_size(gt); @@ -518,7 +518,7 @@ static void guc_mmio_reg_state_init(struct xe_guc_ads *ads) regset_used += count * sizeof(struct guc_mmio_reg); } - XE_BUG_ON(regset_used > ads->regset_size); + XE_WARN_ON(regset_used > ads->regset_size); } static void guc_um_init_params(struct xe_guc_ads *ads) @@ -573,7 +573,7 @@ void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads) offsetof(struct __guc_ads_blob, system_info)); u32 base = xe_bo_ggtt_addr(ads->bo); - XE_BUG_ON(!ads->bo); + XE_WARN_ON(!ads->bo); xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size); guc_policies_init(ads); @@ -597,7 +597,7 @@ void xe_guc_ads_populate(struct xe_guc_ads *ads) offsetof(struct __guc_ads_blob, system_info)); u32 base = xe_bo_ggtt_addr(ads->bo); - XE_BUG_ON(!ads->bo); + XE_WARN_ON(!ads->bo); xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size); guc_policies_init(ads); @@ -647,7 +647,7 @@ static void guc_populate_golden_lrc(struct xe_guc_ads *ads) engine_enabled_masks[guc_class])) continue; - XE_BUG_ON(!gt->default_lrc[class]); + XE_WARN_ON(!gt->default_lrc[class]); real_size = xe_lrc_size(xe, class); alloc_size = PAGE_ALIGN(real_size); @@ -676,7 +676,7 @@ static void guc_populate_golden_lrc(struct xe_guc_ads *ads) offset += alloc_size; } - XE_BUG_ON(total_size != ads->golden_lrc_size); + XE_WARN_ON(total_size != ads->golden_lrc_size); } void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index d322eadbe75a..7fb2690425f8 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -135,7 +135,7 @@ int xe_guc_ct_init(struct xe_guc_ct *ct) struct xe_bo *bo; int err; - XE_BUG_ON(guc_ct_size() % PAGE_SIZE); + XE_WARN_ON(guc_ct_size() % PAGE_SIZE); mutex_init(&ct->lock); spin_lock_init(&ct->fast_lock); @@ -283,7 +283,7 @@ int xe_guc_ct_enable(struct xe_guc_ct *ct) struct xe_device *xe = ct_to_xe(ct); int err; - XE_BUG_ON(ct->enabled); + XE_WARN_ON(ct->enabled); guc_ct_ctb_h2g_init(xe, &ct->ctbs.h2g, &ct->bo->vmap); guc_ct_ctb_g2h_init(xe, &ct->ctbs.g2h, &ct->bo->vmap); @@ -376,7 +376,7 @@ static void h2g_reserve_space(struct xe_guc_ct *ct, u32 cmd_len) static void __g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h) { - XE_BUG_ON(g2h_len > ct->ctbs.g2h.info.space); + XE_WARN_ON(g2h_len > ct->ctbs.g2h.info.space); if (g2h_len) { lockdep_assert_held(&ct->fast_lock); @@ -419,8 +419,8 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len, full_len = len + GUC_CTB_HDR_LEN; lockdep_assert_held(&ct->lock); - XE_BUG_ON(full_len > (GUC_CTB_MSG_MAX_LEN - GUC_CTB_HDR_LEN)); - XE_BUG_ON(tail > h2g->info.size); + XE_WARN_ON(full_len > (GUC_CTB_MSG_MAX_LEN - GUC_CTB_HDR_LEN)); + XE_WARN_ON(tail > h2g->info.size); /* Command will wrap, zero fill (NOPs), return and check credits again */ if (tail + full_len > h2g->info.size) { @@ -478,10 +478,10 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, { int ret; - XE_BUG_ON(g2h_len && g2h_fence); - XE_BUG_ON(num_g2h && g2h_fence); - XE_BUG_ON(g2h_len && !num_g2h); - XE_BUG_ON(!g2h_len && num_g2h); + XE_WARN_ON(g2h_len && g2h_fence); + XE_WARN_ON(num_g2h && g2h_fence); + XE_WARN_ON(g2h_len && !num_g2h); + XE_WARN_ON(!g2h_len && num_g2h); lockdep_assert_held(&ct->lock); if (unlikely(ct->ctbs.h2g.info.broken)) { @@ -552,7 +552,7 @@ static int guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len, unsigned int sleep_period_ms = 1; int ret; - XE_BUG_ON(g2h_len && g2h_fence); + XE_WARN_ON(g2h_len && g2h_fence); lockdep_assert_held(&ct->lock); xe_device_assert_mem_access(ct_to_xe(ct)); @@ -622,7 +622,7 @@ static int guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len, { int ret; - XE_BUG_ON(g2h_len && g2h_fence); + XE_WARN_ON(g2h_len && g2h_fence); mutex_lock(&ct->lock); ret = guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, g2h_fence); diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.c b/drivers/gpu/drm/xe/xe_guc_hwconfig.c index c8f875e970ab..76aed9c348ab 100644 --- a/drivers/gpu/drm/xe/xe_guc_hwconfig.c +++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.c @@ -120,7 +120,7 @@ void xe_guc_hwconfig_copy(struct xe_guc *guc, void *dst) { struct xe_device *xe = guc_to_xe(guc); - XE_BUG_ON(!guc->hwconfig.bo); + XE_WARN_ON(!guc->hwconfig.bo); xe_map_memcpy_from(xe, dst, &guc->hwconfig.bo->vmap, 0, guc->hwconfig.size); diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c index 403aaafcaba6..63904007af0a 100644 --- a/drivers/gpu/drm/xe/xe_guc_log.c +++ b/drivers/gpu/drm/xe/xe_guc_log.c @@ -55,12 +55,12 @@ void xe_guc_log_print(struct xe_guc_log *log, struct drm_printer *p) size_t size; int i, j; - XE_BUG_ON(!log->bo); + XE_WARN_ON(!log->bo); size = log->bo->size; #define DW_PER_READ 128 - XE_BUG_ON(size % (DW_PER_READ * sizeof(u32))); + XE_WARN_ON(size % (DW_PER_READ * sizeof(u32))); for (i = 0; i < size / sizeof(u32); i += DW_PER_READ) { u32 read[DW_PER_READ]; diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index a2eeb3ffe548..9a4c96cb3f42 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -329,7 +329,7 @@ static void __guc_engine_policy_start_klv(struct engine_policy *policy, static void __guc_engine_policy_add_##func(struct engine_policy *policy, \ u32 data) \ { \ - XE_BUG_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ + XE_WARN_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ \ policy->h2g.klv[policy->count].kl = \ FIELD_PREP(GUC_KLV_0_KEY, \ @@ -358,7 +358,7 @@ static void init_policies(struct xe_guc *guc, struct xe_engine *e) u32 timeslice_us = e->sched_props.timeslice_us; u32 preempt_timeout_us = e->sched_props.preempt_timeout_us; - XE_BUG_ON(!engine_registered(e)); + XE_WARN_ON(!engine_registered(e)); __guc_engine_policy_start_klv(&policy, e->guc->id); __guc_engine_policy_add_priority(&policy, xe_engine_prio_to_guc[prio]); @@ -396,7 +396,7 @@ static void __register_mlrc_engine(struct xe_guc *guc, int len = 0; int i; - XE_BUG_ON(!xe_engine_is_parallel(e)); + XE_WARN_ON(!xe_engine_is_parallel(e)); action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; action[len++] = info->flags; @@ -419,7 +419,7 @@ static void __register_mlrc_engine(struct xe_guc *guc, action[len++] = upper_32_bits(xe_lrc_descriptor(lrc)); } - XE_BUG_ON(len > MAX_MLRC_REG_SIZE); + XE_WARN_ON(len > MAX_MLRC_REG_SIZE); #undef MAX_MLRC_REG_SIZE xe_guc_ct_send(&guc->ct, action, len, 0, 0); @@ -453,7 +453,7 @@ static void register_engine(struct xe_engine *e) struct xe_lrc *lrc = e->lrc; struct guc_ctxt_registration_info info; - XE_BUG_ON(engine_registered(e)); + XE_WARN_ON(engine_registered(e)); memset(&info, 0, sizeof(info)); info.context_idx = e->guc->id; @@ -543,7 +543,7 @@ static int wq_noop_append(struct xe_engine *e) if (wq_wait_for_space(e, wq_space_until_wrap(e))) return -ENODEV; - XE_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); + XE_WARN_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); parallel_write(xe, map, wq[e->guc->wqi_tail / sizeof(u32)], FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | @@ -583,13 +583,13 @@ static void wq_item_append(struct xe_engine *e) wqi[i++] = lrc->ring.tail / sizeof(u64); } - XE_BUG_ON(i != wqi_size / sizeof(u32)); + XE_WARN_ON(i != wqi_size / sizeof(u32)); iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch, wq[e->guc->wqi_tail / sizeof(u32)])); xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size); e->guc->wqi_tail += wqi_size; - XE_BUG_ON(e->guc->wqi_tail > WQ_SIZE); + XE_WARN_ON(e->guc->wqi_tail > WQ_SIZE); xe_device_wmb(xe); @@ -608,7 +608,7 @@ static void submit_engine(struct xe_engine *e) int len = 0; bool extra_submit = false; - XE_BUG_ON(!engine_registered(e)); + XE_WARN_ON(!engine_registered(e)); if (xe_engine_is_parallel(e)) wq_item_append(e); @@ -656,8 +656,8 @@ guc_engine_run_job(struct drm_sched_job *drm_job) struct xe_engine *e = job->engine; bool lr = xe_engine_is_lr(e); - XE_BUG_ON((engine_destroyed(e) || engine_pending_disable(e)) && - !engine_banned(e) && !engine_suspended(e)); + XE_WARN_ON((engine_destroyed(e) || engine_pending_disable(e)) && + !engine_banned(e) && !engine_suspended(e)); trace_xe_sched_job_run(job); @@ -984,7 +984,7 @@ static void __guc_engine_process_msg_cleanup(struct xe_sched_msg *msg) struct xe_engine *e = msg->private_data; struct xe_guc *guc = engine_to_guc(e); - XE_BUG_ON(e->flags & ENGINE_FLAG_KERNEL); + XE_WARN_ON(e->flags & ENGINE_FLAG_KERNEL); trace_xe_engine_cleanup_entity(e); if (engine_registered(e)) @@ -1012,9 +1012,9 @@ static void suspend_fence_signal(struct xe_engine *e) { struct xe_guc *guc = engine_to_guc(e); - XE_BUG_ON(!engine_suspended(e) && !engine_killed(e) && - !guc_read_stopped(guc)); - XE_BUG_ON(!e->guc->suspend_pending); + XE_WARN_ON(!engine_suspended(e) && !engine_killed(e) && + !guc_read_stopped(guc)); + XE_WARN_ON(!e->guc->suspend_pending); e->guc->suspend_pending = false; smp_wmb(); @@ -1100,7 +1100,7 @@ static void guc_engine_process_msg(struct xe_sched_msg *msg) __guc_engine_process_msg_resume(msg); break; default: - XE_BUG_ON("Unknown message type"); + XE_WARN_ON("Unknown message type"); } } @@ -1122,7 +1122,7 @@ static int guc_engine_init(struct xe_engine *e) long timeout; int err; - XE_BUG_ON(!xe_device_guc_submission_enabled(guc_to_xe(guc))); + XE_WARN_ON(!xe_device_guc_submission_enabled(guc_to_xe(guc))); ge = kzalloc(sizeof(*ge), GFP_KERNEL); if (!ge) @@ -1286,9 +1286,9 @@ static int guc_engine_set_job_timeout(struct xe_engine *e, u32 job_timeout_ms) { struct xe_gpu_scheduler *sched = &e->guc->sched; - XE_BUG_ON(engine_registered(e)); - XE_BUG_ON(engine_banned(e)); - XE_BUG_ON(engine_killed(e)); + XE_WARN_ON(engine_registered(e)); + XE_WARN_ON(engine_banned(e)); + XE_WARN_ON(engine_killed(e)); sched->base.timeout = job_timeout_ms; @@ -1320,7 +1320,7 @@ static void guc_engine_resume(struct xe_engine *e) { struct xe_sched_msg *msg = e->guc->static_msgs + STATIC_MSG_RESUME; - XE_BUG_ON(e->guc->suspend_pending); + XE_WARN_ON(e->guc->suspend_pending); guc_engine_add_msg(e, msg, RESUME); } @@ -1416,7 +1416,7 @@ int xe_guc_submit_stop(struct xe_guc *guc) struct xe_engine *e; unsigned long index; - XE_BUG_ON(guc_read_stopped(guc) != 1); + XE_WARN_ON(guc_read_stopped(guc) != 1); mutex_lock(&guc->submission_state.lock); @@ -1454,7 +1454,7 @@ int xe_guc_submit_start(struct xe_guc *guc) struct xe_engine *e; unsigned long index; - XE_BUG_ON(guc_read_stopped(guc) != 1); + XE_WARN_ON(guc_read_stopped(guc) != 1); mutex_lock(&guc->submission_state.lock); atomic_dec(&guc->submission_state.stopped); @@ -1484,7 +1484,7 @@ g2h_engine_lookup(struct xe_guc *guc, u32 guc_id) return NULL; } - XE_BUG_ON(e->guc->id != guc_id); + XE_WARN_ON(e->guc->id != guc_id); return e; } diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c index dc1708b4e94a..177cda14864e 100644 --- a/drivers/gpu/drm/xe/xe_huc.c +++ b/drivers/gpu/drm/xe/xe_huc.c @@ -72,7 +72,7 @@ int xe_huc_auth(struct xe_huc *huc) if (xe_uc_fw_is_disabled(&huc->fw)) return 0; - XE_BUG_ON(xe_uc_fw_is_running(&huc->fw)); + XE_WARN_ON(xe_uc_fw_is_running(&huc->fw)); if (!xe_uc_fw_is_loaded(&huc->fw)) return -ENOEXEC; diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 1af5cccd1142..ead5aa285619 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -237,7 +237,7 @@ static void hw_engine_fini(struct drm_device *drm, void *arg) static void hw_engine_mmio_write32(struct xe_hw_engine *hwe, struct xe_reg reg, u32 val) { - XE_BUG_ON(reg.addr & hwe->mmio_base); + XE_WARN_ON(reg.addr & hwe->mmio_base); xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain); reg.addr += hwe->mmio_base; @@ -247,7 +247,7 @@ static void hw_engine_mmio_write32(struct xe_hw_engine *hwe, struct xe_reg reg, static u32 hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg) { - XE_BUG_ON(reg.addr & hwe->mmio_base); + XE_WARN_ON(reg.addr & hwe->mmio_base); xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain); reg.addr += hwe->mmio_base; @@ -351,7 +351,7 @@ static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe, info = &engine_infos[id]; - XE_BUG_ON(hwe->gt); + XE_WARN_ON(hwe->gt); hwe->gt = gt; hwe->class = info->class; @@ -377,8 +377,8 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe, struct xe_tile *tile = gt_to_tile(gt); int err; - XE_BUG_ON(id >= ARRAY_SIZE(engine_infos) || !engine_infos[id].name); - XE_BUG_ON(!(gt->info.engine_mask & BIT(id))); + XE_WARN_ON(id >= ARRAY_SIZE(engine_infos) || !engine_infos[id].name); + XE_WARN_ON(!(gt->info.engine_mask & BIT(id))); xe_reg_sr_apply_mmio(&hwe->reg_sr, gt); xe_reg_sr_apply_whitelist(hwe); diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c b/drivers/gpu/drm/xe/xe_hw_fence.c index ffe1a3992ef5..a6094c81f2ad 100644 --- a/drivers/gpu/drm/xe/xe_hw_fence.c +++ b/drivers/gpu/drm/xe/xe_hw_fence.c @@ -188,7 +188,7 @@ static void xe_hw_fence_release(struct dma_fence *dma_fence) struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence); trace_xe_hw_fence_free(fence); - XE_BUG_ON(!list_empty(&fence->irq_link)); + XE_WARN_ON(!list_empty(&fence->irq_link)); call_rcu(&dma_fence->rcu, fence_free); } diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index b726599f6228..05f3d8d68379 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -108,7 +108,7 @@ static void set_offsets(u32 *regs, *regs |= MI_LRI_LRM_CS_MMIO; regs++; - XE_BUG_ON(!count); + XE_WARN_ON(!count); do { u32 offset = 0; u8 v; @@ -528,7 +528,7 @@ static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \ { \ struct iosys_map map = lrc->bo->vmap; \ \ - XE_BUG_ON(iosys_map_is_null(&map)); \ + XE_WARN_ON(iosys_map_is_null(&map)); \ iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \ return map; \ } \ @@ -759,12 +759,12 @@ void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size) u32 rhs; size_t aligned_size; - XE_BUG_ON(!IS_ALIGNED(size, 4)); + XE_WARN_ON(!IS_ALIGNED(size, 4)); aligned_size = ALIGN(size, 8); ring = __xe_lrc_ring_map(lrc); - XE_BUG_ON(lrc->ring.tail >= lrc->ring.size); + XE_WARN_ON(lrc->ring.tail >= lrc->ring.size); rhs = lrc->ring.size - lrc->ring.tail; if (size > rhs) { __xe_lrc_write_ring(lrc, ring, data, rhs); diff --git a/drivers/gpu/drm/xe/xe_macros.h b/drivers/gpu/drm/xe/xe_macros.h index a7105050bce0..daf56c846d03 100644 --- a/drivers/gpu/drm/xe/xe_macros.h +++ b/drivers/gpu/drm/xe/xe_macros.h @@ -9,7 +9,6 @@ #include #define XE_WARN_ON WARN_ON -#define XE_BUG_ON BUG_ON #define XE_IOCTL_DBG(xe, cond) \ ((cond) && (drm_dbg(&(xe)->drm, \ diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 0405136bc0b1..9c4b432d496f 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -106,7 +106,7 @@ static void xe_migrate_fini(struct drm_device *dev, void *arg) static u64 xe_migrate_vm_addr(u64 slot, u32 level) { - XE_BUG_ON(slot >= NUM_PT_SLOTS); + XE_WARN_ON(slot >= NUM_PT_SLOTS); /* First slot is reserved for mapping of PT bo and bb, start from 1 */ return (slot + 1ULL) << xe_pt_shift(level + 1); @@ -171,7 +171,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, BUILD_BUG_ON(!(NUM_KERNEL_PDE & 1)); /* Need to be sure everything fits in the first PT, or create more */ - XE_BUG_ON(m->batch_base_ofs + batch->size >= SZ_2M); + XE_WARN_ON(m->batch_base_ofs + batch->size >= SZ_2M); bo = xe_bo_create_pin_map(vm->xe, tile, vm, num_entries * XE_PAGE_SIZE, @@ -205,7 +205,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, } if (!IS_DGFX(xe)) { - XE_BUG_ON(xe->info.supports_usm); + XE_WARN_ON(xe->info.supports_usm); /* Write out batch too */ m->batch_base_ofs = NUM_PT_SLOTS * XE_PAGE_SIZE; @@ -487,7 +487,7 @@ static void emit_copy_ccs(struct xe_gt *gt, struct xe_bb *bb, num_ccs_blks = DIV_ROUND_UP(xe_device_ccs_bytes(gt_to_xe(gt), size), NUM_CCS_BYTES_PER_BLOCK); - XE_BUG_ON(num_ccs_blks > NUM_CCS_BLKS_PER_XFER); + XE_WARN_ON(num_ccs_blks > NUM_CCS_BLKS_PER_XFER); *cs++ = XY_CTRL_SURF_COPY_BLT | (src_is_indirect ? 0x0 : 0x1) << SRC_ACCESS_TYPE_SHIFT | (dst_is_indirect ? 0x0 : 0x1) << DST_ACCESS_TYPE_SHIFT | @@ -507,9 +507,9 @@ static void emit_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, u64 dst_ofs, unsigned int size, unsigned int pitch) { - XE_BUG_ON(size / pitch > S16_MAX); - XE_BUG_ON(pitch / 4 > S16_MAX); - XE_BUG_ON(pitch > U16_MAX); + XE_WARN_ON(size / pitch > S16_MAX); + XE_WARN_ON(pitch / 4 > S16_MAX); + XE_WARN_ON(pitch > U16_MAX); bb->cs[bb->len++] = XY_FAST_COPY_BLT_CMD | (10 - 2); bb->cs[bb->len++] = XY_FAST_COPY_BLT_DEPTH_32 | pitch; @@ -569,7 +569,7 @@ static u32 xe_migrate_ccs_copy(struct xe_migrate *m, * At the moment, we don't support copying CCS metadata from * system to system. */ - XE_BUG_ON(!src_is_vram && !dst_is_vram); + XE_WARN_ON(!src_is_vram && !dst_is_vram); emit_copy_ccs(gt, bb, dst_ofs, dst_is_vram, src_ofs, src_is_vram, dst_size); @@ -781,7 +781,7 @@ static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs *cs++ = upper_32_bits(src_ofs); *cs++ = FIELD_PREP(PVC_MS_MOCS_INDEX_MASK, mocs); - XE_BUG_ON(cs - bb->cs != len + bb->len); + XE_WARN_ON(cs - bb->cs != len + bb->len); bb->len += len; } @@ -819,7 +819,7 @@ static void emit_clear_main_copy(struct xe_gt *gt, struct xe_bb *bb, *cs++ = 0; } - XE_BUG_ON(cs - bb->cs != len + bb->len); + XE_WARN_ON(cs - bb->cs != len + bb->len); bb->len += len; } @@ -992,9 +992,9 @@ static void write_pgtable(struct xe_tile *tile, struct xe_bb *bb, u64 ppgtt_ofs, * PDE. This requires a BO that is almost vm->size big. * * This shouldn't be possible in practice.. might change when 16K - * pages are used. Hence the BUG_ON. + * pages are used. Hence the XE_WARN_ON. */ - XE_BUG_ON(update->qwords > 0x1ff); + XE_WARN_ON(update->qwords > 0x1ff); if (!ppgtt_ofs) { ppgtt_ofs = xe_migrate_vram_ofs(xe_bo_addr(update->pt_bo, 0, XE_PAGE_SIZE)); @@ -1184,7 +1184,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m, * Worst case: Sum(2 * (each lower level page size) + (top level page size)) * Should be reasonably bound.. */ - XE_BUG_ON(batch_size >= SZ_128K); + XE_WARN_ON(batch_size >= SZ_128K); bb = xe_bb_new(gt, batch_size, !eng && xe->info.supports_usm); if (IS_ERR(bb)) @@ -1194,7 +1194,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m, if (!IS_DGFX(xe)) { ppgtt_ofs = NUM_KERNEL_PDE - 1; if (eng) { - XE_BUG_ON(num_updates > NUM_VMUSA_WRITES_PER_UNIT); + XE_WARN_ON(num_updates > NUM_VMUSA_WRITES_PER_UNIT); sa_bo = drm_suballoc_new(&m->vm_update_sa, 1, GFP_KERNEL, true, 0); @@ -1223,7 +1223,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m, for (i = 0; i < num_updates; i++) { struct xe_bo *pt_bo = updates[i].pt_bo; - BUG_ON(pt_bo->size != SZ_4K); + XE_WARN_ON(pt_bo->size != SZ_4K); addr = xe_pte_encode(pt_bo, 0, XE_CACHE_WB, 0); bb->cs[bb->len++] = lower_32_bits(addr); diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index d9192bf50362..b82ce01cc4cb 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -106,7 +106,7 @@ static u64 __pte_encode(u64 pte, enum xe_cache_level cache, pte |= XE_PDPE_PS_1G; /* XXX: Does hw support 1 GiB pages? */ - XE_BUG_ON(pt_level > 2); + XE_WARN_ON(pt_level > 2); return pte; } @@ -196,7 +196,7 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, pt->level = level; pt->base.dir = level ? &as_xe_pt_dir(pt)->dir : NULL; - XE_BUG_ON(level > XE_VM_MAX_LEVEL); + XE_WARN_ON(level > XE_VM_MAX_LEVEL); return pt; @@ -265,7 +265,7 @@ void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred) if (!pt) return; - XE_BUG_ON(!list_empty(&pt->bo->vmas)); + XE_WARN_ON(!list_empty(&pt->bo->vmas)); xe_bo_unpin(pt->bo); xe_bo_put_deferred(pt->bo, deferred); @@ -849,8 +849,8 @@ static int xe_pt_zap_ptes_entry(struct xe_ptw *parent, pgoff_t offset, struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); pgoff_t end_offset; - XE_BUG_ON(!*child); - XE_BUG_ON(!level && xe_child->is_compact); + XE_WARN_ON(!*child); + XE_WARN_ON(!level && xe_child->is_compact); /* * Note that we're called from an entry callback, and we're dealing @@ -1004,7 +1004,7 @@ xe_pt_prepare_bind(struct xe_tile *tile, struct xe_vma *vma, *num_entries = 0; err = xe_pt_stage_bind(tile, vma, entries, num_entries); if (!err) - BUG_ON(!*num_entries); + XE_WARN_ON(!*num_entries); else /* abort! */ xe_pt_abort_bind(vma, entries, *num_entries); @@ -1026,7 +1026,7 @@ static void xe_vm_dbg_print_entries(struct xe_device *xe, u64 end; u64 start; - XE_BUG_ON(entry->pt->is_compact); + XE_WARN_ON(entry->pt->is_compact); start = entry->ofs * page_size; end = start + page_size * entry->qwords; vm_dbg(&xe->drm, @@ -1356,7 +1356,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e, err = xe_pt_prepare_bind(tile, vma, entries, &num_entries, rebind); if (err) goto err; - XE_BUG_ON(num_entries > ARRAY_SIZE(entries)); + XE_WARN_ON(num_entries > ARRAY_SIZE(entries)); xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries); xe_pt_calc_rfence_interval(vma, &bind_pt_update, entries, @@ -1515,8 +1515,8 @@ static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset, { struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); - XE_BUG_ON(!*child); - XE_BUG_ON(!level && xe_child->is_compact); + XE_WARN_ON(!*child); + XE_WARN_ON(!level && xe_child->is_compact); xe_pt_check_kill(addr, next, level - 1, xe_child, action, walk); @@ -1707,7 +1707,7 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e xe_vma_start(vma), xe_vma_end(vma) - 1, e); num_entries = xe_pt_stage_unbind(tile, vma, entries); - XE_BUG_ON(num_entries > ARRAY_SIZE(entries)); + XE_WARN_ON(num_entries > ARRAY_SIZE(entries)); xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries); xe_pt_calc_rfence_interval(vma, &unbind_pt_update, entries, diff --git a/drivers/gpu/drm/xe/xe_res_cursor.h b/drivers/gpu/drm/xe/xe_res_cursor.h index dda963fe3300..5cb4b66a5d74 100644 --- a/drivers/gpu/drm/xe/xe_res_cursor.h +++ b/drivers/gpu/drm/xe/xe_res_cursor.h @@ -79,7 +79,7 @@ static inline void xe_res_first(struct ttm_resource *res, if (!res) goto fallback; - XE_BUG_ON(start + size > res->size); + XE_WARN_ON(start + size > res->size); cur->mem_type = res->mem_type; @@ -139,7 +139,7 @@ static inline void __xe_res_sg_next(struct xe_res_cursor *cur) while (start >= sg_dma_len(sgl)) { start -= sg_dma_len(sgl); sgl = sg_next(sgl); - XE_BUG_ON(!sgl); + XE_WARN_ON(!sgl); } cur->start = start; @@ -161,9 +161,9 @@ static inline void xe_res_first_sg(const struct sg_table *sg, u64 start, u64 size, struct xe_res_cursor *cur) { - XE_BUG_ON(!sg); - XE_BUG_ON(!IS_ALIGNED(start, PAGE_SIZE) || - !IS_ALIGNED(size, PAGE_SIZE)); + XE_WARN_ON(!sg); + XE_WARN_ON(!IS_ALIGNED(start, PAGE_SIZE) || + !IS_ALIGNED(size, PAGE_SIZE)); cur->node = NULL; cur->start = start; cur->remaining = size; @@ -187,7 +187,7 @@ static inline void xe_res_next(struct xe_res_cursor *cur, u64 size) struct list_head *next; u64 start; - XE_BUG_ON(size > cur->remaining); + XE_WARN_ON(size > cur->remaining); cur->remaining -= size; if (!cur->remaining) diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index 4cfd78e1ffa5..2d0d392cd691 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -233,7 +233,7 @@ static void __emit_job_gen12_copy(struct xe_sched_job *job, struct xe_lrc *lrc, i = emit_user_interrupt(dw, i); - XE_BUG_ON(i > MAX_JOB_SIZE_DW); + XE_WARN_ON(i > MAX_JOB_SIZE_DW); xe_lrc_write_ring(lrc, dw, i * sizeof(*dw)); } @@ -291,7 +291,7 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, i = emit_user_interrupt(dw, i); - XE_BUG_ON(i > MAX_JOB_SIZE_DW); + XE_WARN_ON(i > MAX_JOB_SIZE_DW); xe_lrc_write_ring(lrc, dw, i * sizeof(*dw)); } @@ -339,7 +339,7 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, i = emit_user_interrupt(dw, i); - XE_BUG_ON(i > MAX_JOB_SIZE_DW); + XE_WARN_ON(i > MAX_JOB_SIZE_DW); xe_lrc_write_ring(lrc, dw, i * sizeof(*dw)); } @@ -369,7 +369,7 @@ static void emit_migration_job_gen12(struct xe_sched_job *job, i = emit_user_interrupt(dw, i); - XE_BUG_ON(i > MAX_JOB_SIZE_DW); + XE_WARN_ON(i > MAX_JOB_SIZE_DW); xe_lrc_write_ring(lrc, dw, i * sizeof(*dw)); } diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c index c87f65c98b3d..85fd5980191c 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.c +++ b/drivers/gpu/drm/xe/xe_sched_job.c @@ -142,7 +142,7 @@ struct xe_sched_job *xe_sched_job_create(struct xe_engine *e, /* Sanity check */ for (j = 0; j < e->width; ++j) - XE_BUG_ON(cf->base.seqno != fences[j]->seqno); + XE_WARN_ON(cf->base.seqno != fences[j]->seqno); job->fence = &cf->base; } diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index 7bba8fff5a5d..be0a25e23929 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -205,7 +205,7 @@ u64 xe_ttm_stolen_io_offset(struct xe_bo *bo, u32 offset) struct xe_ttm_stolen_mgr *mgr = to_stolen_mgr(ttm_mgr); struct xe_res_cursor cur; - XE_BUG_ON(!mgr->io_base); + XE_WARN_ON(!mgr->io_base); if (xe_ttm_stolen_cpu_access_needs_ggtt(xe)) return mgr->io_base + xe_bo_ggtt_addr(bo) + offset; @@ -245,7 +245,7 @@ static int __xe_ttm_stolen_io_mem_reserve_stolen(struct xe_device *xe, #ifdef CONFIG_X86 struct xe_bo *bo = ttm_to_xe_bo(mem->bo); - XE_BUG_ON(IS_DGFX(xe)); + XE_WARN_ON(IS_DGFX(xe)); /* XXX: Require BO to be mapped to GGTT? */ if (drm_WARN_ON(&xe->drm, !(bo->flags & XE_BO_CREATE_GGTT_BIT))) diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 5801c10f3ccc..4b04f6e5388d 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -158,7 +158,7 @@ __uc_fw_to_gt(struct xe_uc_fw *uc_fw, enum xe_uc_fw_type type) if (type == XE_UC_FW_TYPE_GUC) return container_of(uc_fw, struct xe_gt, uc.guc.fw); - XE_BUG_ON(type != XE_UC_FW_TYPE_HUC); + XE_WARN_ON(type != XE_UC_FW_TYPE_HUC); return container_of(uc_fw, struct xe_gt, uc.huc.fw); } @@ -194,7 +194,7 @@ uc_fw_auto_select(struct xe_device *xe, struct xe_uc_fw *uc_fw) u32 count; int i; - XE_BUG_ON(uc_fw->type >= ARRAY_SIZE(blobs_all)); + XE_WARN_ON(uc_fw->type >= ARRAY_SIZE(blobs_all)); entries = blobs_all[uc_fw->type].entries; count = blobs_all[uc_fw->type].count; @@ -223,8 +223,8 @@ size_t xe_uc_fw_copy_rsa(struct xe_uc_fw *uc_fw, void *dst, u32 max_len) struct xe_device *xe = uc_fw_to_xe(uc_fw); u32 size = min_t(u32, uc_fw->rsa_size, max_len); - XE_BUG_ON(size % 4); - XE_BUG_ON(!xe_uc_fw_is_available(uc_fw)); + XE_WARN_ON(size % 4); + XE_WARN_ON(!xe_uc_fw_is_available(uc_fw)); xe_map_memcpy_from(xe, dst, &uc_fw->bo->vmap, xe_uc_fw_rsa_offset(uc_fw), size); @@ -248,7 +248,7 @@ static void guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_header *css) struct xe_gt *gt = uc_fw_to_gt(uc_fw); struct xe_guc *guc = >->uc.guc; - XE_BUG_ON(uc_fw->type != XE_UC_FW_TYPE_GUC); + XE_WARN_ON(uc_fw->type != XE_UC_FW_TYPE_GUC); XE_WARN_ON(uc_fw->major_ver_found < 70); if (uc_fw->major_ver_found > 70 || uc_fw->minor_ver_found >= 6) { @@ -335,8 +335,8 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw) * before we're looked at the HW caps to see if we have uc support */ BUILD_BUG_ON(XE_UC_FIRMWARE_UNINITIALIZED); - XE_BUG_ON(uc_fw->status); - XE_BUG_ON(uc_fw->path); + XE_WARN_ON(uc_fw->status); + XE_WARN_ON(uc_fw->path); uc_fw_auto_select(xe, uc_fw); xe_uc_fw_change_status(uc_fw, uc_fw->path ? *uc_fw->path ? @@ -502,7 +502,7 @@ int xe_uc_fw_upload(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags) int err; /* make sure the status was cleared the last time we reset the uc */ - XE_BUG_ON(xe_uc_fw_is_loaded(uc_fw)); + XE_WARN_ON(xe_uc_fw_is_loaded(uc_fw)); if (!xe_uc_fw_is_loadable(uc_fw)) return -ENOEXEC; diff --git a/drivers/gpu/drm/xe/xe_uc_fw.h b/drivers/gpu/drm/xe/xe_uc_fw.h index e16267e71280..a519c77d4962 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.h +++ b/drivers/gpu/drm/xe/xe_uc_fw.h @@ -104,7 +104,7 @@ static inline enum xe_uc_fw_status __xe_uc_fw_status(struct xe_uc_fw *uc_fw) { /* shouldn't call this before checking hw/blob availability */ - XE_BUG_ON(uc_fw->status == XE_UC_FIRMWARE_UNINITIALIZED); + XE_WARN_ON(uc_fw->status == XE_UC_FIRMWARE_UNINITIALIZED); return uc_fw->status; } diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 205795823555..0bebdac2287c 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -63,7 +63,7 @@ int xe_vma_userptr_pin_pages(struct xe_vma *vma) bool read_only = xe_vma_read_only(vma); lockdep_assert_held(&vm->lock); - XE_BUG_ON(!xe_vma_is_userptr(vma)); + XE_WARN_ON(!xe_vma_is_userptr(vma)); retry: if (vma->gpuva.flags & XE_VMA_DESTROYED) return 0; @@ -252,7 +252,7 @@ static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list) struct dma_fence *fence; link = list->next; - XE_BUG_ON(link == list); + XE_WARN_ON(link == list); fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link), e, e->compute.context, @@ -329,7 +329,7 @@ int xe_vm_add_compute_engine(struct xe_vm *vm, struct xe_engine *e) int err; bool wait; - XE_BUG_ON(!xe_vm_in_compute_mode(vm)); + XE_WARN_ON(!xe_vm_in_compute_mode(vm)); down_write(&vm->lock); @@ -549,7 +549,7 @@ static void preempt_rebind_work_func(struct work_struct *w) long wait; int __maybe_unused tries = 0; - XE_BUG_ON(!xe_vm_in_compute_mode(vm)); + XE_WARN_ON(!xe_vm_in_compute_mode(vm)); trace_xe_vm_rebind_worker_enter(vm); down_write(&vm->lock); @@ -708,7 +708,7 @@ static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, struct dma_fence *fence; long err; - XE_BUG_ON(!xe_vma_is_userptr(vma)); + XE_WARN_ON(!xe_vma_is_userptr(vma)); trace_xe_vma_userptr_invalidate(vma); if (!mmu_notifier_range_blockable(range)) @@ -877,8 +877,8 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, struct xe_tile *tile; u8 id; - XE_BUG_ON(start >= end); - XE_BUG_ON(end >= vm->size); + XE_WARN_ON(start >= end); + XE_WARN_ON(end >= vm->size); if (!bo && !is_null) /* userptr */ vma = kzalloc(sizeof(*vma), GFP_KERNEL); @@ -1075,7 +1075,7 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) struct xe_vm *vm = xe_vma_vm(vma); lockdep_assert_held_write(&vm->lock); - XE_BUG_ON(!list_empty(&vma->combined_links.destroy)); + XE_WARN_ON(!list_empty(&vma->combined_links.destroy)); if (xe_vma_is_userptr(vma)) { XE_WARN_ON(!(vma->gpuva.flags & XE_VMA_DESTROYED)); @@ -1153,7 +1153,7 @@ xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range) if (xe_vm_is_closed_or_banned(vm)) return NULL; - XE_BUG_ON(start + range > vm->size); + XE_WARN_ON(start + range > vm->size); gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range); @@ -1164,7 +1164,7 @@ static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma) { int err; - XE_BUG_ON(xe_vma_vm(vma) != vm); + XE_WARN_ON(xe_vma_vm(vma) != vm); lockdep_assert_held(&vm->lock); err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva); @@ -1175,7 +1175,7 @@ static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma) static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma) { - XE_BUG_ON(xe_vma_vm(vma) != vm); + XE_WARN_ON(xe_vma_vm(vma) != vm); lockdep_assert_held(&vm->lock); drm_gpuva_remove(&vma->gpuva); @@ -1422,7 +1422,7 @@ void xe_vm_close_and_put(struct xe_vm *vm) struct drm_gpuva *gpuva, *next; u8 id; - XE_BUG_ON(vm->preempt.num_engines); + XE_WARN_ON(vm->preempt.num_engines); xe_vm_close(vm); flush_async_ops(vm); @@ -1795,7 +1795,7 @@ int xe_vm_async_fence_wait_start(struct dma_fence *fence) struct async_op_fence *afence = container_of(fence, struct async_op_fence, fence); - XE_BUG_ON(xe_vm_no_dma_fences(afence->vm)); + XE_WARN_ON(xe_vm_no_dma_fences(afence->vm)); smp_rmb(); return wait_event_interruptible(afence->wq, afence->started); @@ -1821,7 +1821,7 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, } else { int i; - XE_BUG_ON(!xe_vm_in_fault_mode(vm)); + XE_WARN_ON(!xe_vm_in_fault_mode(vm)); fence = dma_fence_get_stub(); if (last_op) { @@ -2100,7 +2100,7 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma, { int err; - XE_BUG_ON(region > ARRAY_SIZE(region_to_mem_type)); + XE_WARN_ON(region > ARRAY_SIZE(region_to_mem_type)); if (!xe_vma_has_no_bo(vma)) { err = xe_bo_migrate(xe_vma_bo(vma), region_to_mem_type[region]); @@ -2181,7 +2181,7 @@ static int vm_bind_ioctl_lookup_vma(struct xe_vm *vm, struct xe_bo *bo, return -ENODATA; break; default: - XE_BUG_ON("NOT POSSIBLE"); + XE_WARN_ON("NOT POSSIBLE"); return -EINVAL; } @@ -2239,7 +2239,7 @@ static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma)); break; default: - XE_BUG_ON("NOT POSSIBLE"); + XE_WARN_ON("NOT POSSIBLE"); } } #else @@ -2315,7 +2315,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, } break; case XE_VM_BIND_OP_UNMAP_ALL: - XE_BUG_ON(!bo); + XE_WARN_ON(!bo); err = xe_bo_lock(bo, &ww, 0, true); if (err) @@ -2338,7 +2338,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, } break; default: - XE_BUG_ON("NOT POSSIBLE"); + XE_WARN_ON("NOT POSSIBLE"); ops = ERR_PTR(-EINVAL); } @@ -2425,7 +2425,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_engine *e, int err, i; lockdep_assert_held_write(&vm->lock); - XE_BUG_ON(num_ops_list > 1 && !async); + XE_WARN_ON(num_ops_list > 1 && !async); if (num_syncs && async) { u64 seqno; @@ -2454,7 +2454,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_engine *e, struct xe_vma_op *op = gpuva_op_to_vma_op(__op); bool first = !async_list; - XE_BUG_ON(!first && !async); + XE_WARN_ON(!first && !async); INIT_LIST_HEAD(&op->link); if (first) @@ -2566,7 +2566,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_engine *e, /* Nothing to do */ break; default: - XE_BUG_ON("NOT POSSIBLE"); + XE_WARN_ON("NOT POSSIBLE"); } last_op = op; @@ -2628,7 +2628,7 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) /* Nothing to do */ break; default: - XE_BUG_ON("NOT POSSIBLE"); + XE_WARN_ON("NOT POSSIBLE"); } op->flags |= XE_VMA_OP_COMMITTED; @@ -2746,7 +2746,7 @@ again: op->flags & XE_VMA_OP_LAST); break; default: - XE_BUG_ON("NOT POSSIBLE"); + XE_WARN_ON("NOT POSSIBLE"); } ttm_eu_backoff_reservation(&ww, &objs); @@ -2805,7 +2805,7 @@ static int xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op) op); break; default: - XE_BUG_ON("NOT POSSIBLE"); + XE_WARN_ON("NOT POSSIBLE"); } return ret; @@ -2881,7 +2881,7 @@ static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op, /* Nothing to do */ break; default: - XE_BUG_ON("NOT POSSIBLE"); + XE_WARN_ON("NOT POSSIBLE"); } } @@ -3413,7 +3413,7 @@ int xe_vm_lock(struct xe_vm *vm, struct ww_acquire_ctx *ww, LIST_HEAD(objs); LIST_HEAD(dups); - XE_BUG_ON(!ww); + XE_WARN_ON(!ww); tv_vm.num_shared = num_resv; tv_vm.bo = xe_vm_ttm_bo(vm); @@ -3447,7 +3447,7 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) u8 id; int ret; - XE_BUG_ON(!xe_vm_in_fault_mode(xe_vma_vm(vma))); + XE_WARN_ON(!xe_vm_in_fault_mode(xe_vma_vm(vma))); XE_WARN_ON(xe_vma_is_null(vma)); trace_xe_vma_usm_invalidate(vma); diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c index c4202df1d4f0..761eed3a022f 100644 --- a/drivers/gpu/drm/xe/xe_wait_user_fence.c +++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c @@ -45,7 +45,7 @@ static int do_compare(u64 addr, u64 value, u64 mask, u16 op) passed = (rvalue & mask) <= (value & mask); break; default: - XE_BUG_ON("Not possible"); + XE_WARN_ON("Not possible"); } return passed ? 0 : 1; diff --git a/drivers/gpu/drm/xe/xe_wopcm.c b/drivers/gpu/drm/xe/xe_wopcm.c index d9acf8783b83..9a85bcc18830 100644 --- a/drivers/gpu/drm/xe/xe_wopcm.c +++ b/drivers/gpu/drm/xe/xe_wopcm.c @@ -144,10 +144,10 @@ static int __wopcm_init_regs(struct xe_device *xe, struct xe_gt *gt, u32 mask; int err; - XE_BUG_ON(!(base & GUC_WOPCM_OFFSET_MASK)); - XE_BUG_ON(base & ~GUC_WOPCM_OFFSET_MASK); - XE_BUG_ON(!(size & GUC_WOPCM_SIZE_MASK)); - XE_BUG_ON(size & ~GUC_WOPCM_SIZE_MASK); + XE_WARN_ON(!(base & GUC_WOPCM_OFFSET_MASK)); + XE_WARN_ON(base & ~GUC_WOPCM_OFFSET_MASK); + XE_WARN_ON(!(size & GUC_WOPCM_SIZE_MASK)); + XE_WARN_ON(size & ~GUC_WOPCM_SIZE_MASK); mask = GUC_WOPCM_SIZE_MASK | GUC_WOPCM_SIZE_LOCKED; err = xe_mmio_write32_and_verify(gt, GUC_WOPCM_SIZE, size, mask, @@ -213,9 +213,9 @@ int xe_wopcm_init(struct xe_wopcm *wopcm) drm_dbg(&xe->drm, "WOPCM: %uK\n", wopcm->size / SZ_1K); xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); - XE_BUG_ON(guc_fw_size >= wopcm->size); - XE_BUG_ON(huc_fw_size >= wopcm->size); - XE_BUG_ON(ctx_rsvd + WOPCM_RESERVED_SIZE >= wopcm->size); + XE_WARN_ON(guc_fw_size >= wopcm->size); + XE_WARN_ON(huc_fw_size >= wopcm->size); + XE_WARN_ON(ctx_rsvd + WOPCM_RESERVED_SIZE >= wopcm->size); locked = __wopcm_regs_locked(gt, &guc_wopcm_base, &guc_wopcm_size); if (locked) { @@ -256,8 +256,8 @@ check: guc_fw_size, huc_fw_size)) { wopcm->guc.base = guc_wopcm_base; wopcm->guc.size = guc_wopcm_size; - XE_BUG_ON(!wopcm->guc.base); - XE_BUG_ON(!wopcm->guc.size); + XE_WARN_ON(!wopcm->guc.base); + XE_WARN_ON(!wopcm->guc.size); } else { drm_notice(&xe->drm, "Unsuccessful WOPCM partitioning\n"); return -E2BIG; -- cgit v1.2.3 From f82686ef74b96a51ba6c38f3ce119ba7f7995210 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 28 Jul 2023 20:53:41 -0700 Subject: drm/xe: remove header variable from parse_g2h_msg The header variable is unused, remove it. Reviewed-by: Rodrigo Vivi Suggested-by: Oded Gabbay Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_ct.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 7fb2690425f8..fb1d63ffaee4 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -829,12 +829,11 @@ static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len) static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) { struct xe_device *xe = ct_to_xe(ct); - u32 header, hxg, origin, type; + u32 hxg, origin, type; int ret; lockdep_assert_held(&ct->lock); - header = msg[0]; hxg = msg[1]; origin = FIELD_GET(GUC_HXG_MSG_0_ORIGIN, hxg); -- cgit v1.2.3 From 2a368a09ae1c3f7aebe6210927a1335186d3c6f7 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 25 Jul 2023 17:12:39 +0200 Subject: drm/xe: Fix error paths of __xe_bo_create_locked ttm_bo_init_reserved() calls the destroy() callback if it fails. Because of this, __xe_bo_create_locked is required to be responsible for freeing the bo even when it's passed in as argument. Additionally, if the placement check fails, the bo was kept alive. Fix it too. Reported-by: Oded Gabbay Signed-off-by: Maarten Lankhorst Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index cf0faaefd03d..a12613002766 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1200,8 +1200,10 @@ struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, /* Only kernel objects should set GT */ XE_WARN_ON(tile && type != ttm_bo_type_kernel); - if (XE_WARN_ON(!size)) + if (XE_WARN_ON(!size)) { + xe_bo_free(bo); return ERR_PTR(-EINVAL); + } if (!bo) { bo = xe_bo_alloc(); @@ -1239,8 +1241,10 @@ struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, if (!(flags & XE_BO_FIXED_PLACEMENT_BIT)) { err = __xe_bo_placement_for_flags(xe, bo, bo->flags); - if (WARN_ON(err)) + if (WARN_ON(err)) { + xe_ttm_bo_destroy(&bo->ttm); return ERR_PTR(err); + } } /* Defer populating type_sg bos */ -- cgit v1.2.3 From c22a4ed0c325cd29d7baf07d4cf2c127550b8859 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Tue, 1 Aug 2023 12:28:14 +0200 Subject: drm/xe: Rename xe_engine.[ch] to xe_exec_queue.[ch] This is a preparation commit for a larger renaming of engine to exec queue. Signed-off-by: Francois Dugast Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Makefile | 2 +- drivers/gpu/drm/xe/xe_devcoredump.c | 2 +- drivers/gpu/drm/xe/xe_device.c | 2 +- drivers/gpu/drm/xe/xe_engine.c | 850 ---------------------------------- drivers/gpu/drm/xe/xe_engine.h | 63 --- drivers/gpu/drm/xe/xe_exec.c | 2 +- drivers/gpu/drm/xe/xe_exec_queue.c | 850 ++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_exec_queue.h | 63 +++ drivers/gpu/drm/xe/xe_execlist.c | 2 +- drivers/gpu/drm/xe/xe_gt.c | 2 +- drivers/gpu/drm/xe/xe_guc_submit.c | 2 +- drivers/gpu/drm/xe/xe_migrate.c | 2 +- drivers/gpu/drm/xe/xe_mocs.c | 2 +- drivers/gpu/drm/xe/xe_preempt_fence.c | 2 +- drivers/gpu/drm/xe/xe_query.c | 2 +- drivers/gpu/drm/xe/xe_sched_job.c | 2 +- drivers/gpu/drm/xe/xe_vm.c | 2 +- 17 files changed, 926 insertions(+), 926 deletions(-) delete mode 100644 drivers/gpu/drm/xe/xe_engine.c delete mode 100644 drivers/gpu/drm/xe/xe_engine.h create mode 100644 drivers/gpu/drm/xe/xe_exec_queue.c create mode 100644 drivers/gpu/drm/xe/xe_exec_queue.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 312e643397ba..f8d63c9b97d5 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -54,9 +54,9 @@ xe-y += xe_bb.o \ xe_device.o \ xe_device_sysfs.o \ xe_dma_buf.o \ - xe_engine.o \ xe_exec.o \ xe_execlist.o \ + xe_exec_queue.o \ xe_force_wake.o \ xe_ggtt.o \ xe_gpu_scheduler.o \ diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index f53f4b51233a..61ff97ea7659 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -10,7 +10,7 @@ #include #include "xe_device.h" -#include "xe_engine.h" +#include "xe_exec_queue.h" #include "xe_force_wake.h" #include "xe_gt.h" #include "xe_guc_ct.h" diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 63ed59c61c84..a8ab86379ed6 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -18,7 +18,7 @@ #include "xe_debugfs.h" #include "xe_dma_buf.h" #include "xe_drv.h" -#include "xe_engine.h" +#include "xe_exec_queue.h" #include "xe_exec.h" #include "xe_gt.h" #include "xe_irq.h" diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c deleted file mode 100644 index f60d29b2b506..000000000000 --- a/drivers/gpu/drm/xe/xe_engine.c +++ /dev/null @@ -1,850 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2021 Intel Corporation - */ - -#include "xe_engine.h" - -#include - -#include -#include -#include - -#include "xe_device.h" -#include "xe_gt.h" -#include "xe_hw_fence.h" -#include "xe_lrc.h" -#include "xe_macros.h" -#include "xe_migrate.h" -#include "xe_pm.h" -#include "xe_ring_ops_types.h" -#include "xe_trace.h" -#include "xe_vm.h" - -static struct xe_engine *__xe_engine_create(struct xe_device *xe, - struct xe_vm *vm, - u32 logical_mask, - u16 width, struct xe_hw_engine *hwe, - u32 flags) -{ - struct xe_engine *e; - struct xe_gt *gt = hwe->gt; - int err; - int i; - - e = kzalloc(sizeof(*e) + sizeof(struct xe_lrc) * width, GFP_KERNEL); - if (!e) - return ERR_PTR(-ENOMEM); - - kref_init(&e->refcount); - e->flags = flags; - e->hwe = hwe; - e->gt = gt; - if (vm) - e->vm = xe_vm_get(vm); - e->class = hwe->class; - e->width = width; - e->logical_mask = logical_mask; - e->fence_irq = >->fence_irq[hwe->class]; - e->ring_ops = gt->ring_ops[hwe->class]; - e->ops = gt->engine_ops; - INIT_LIST_HEAD(&e->persistent.link); - INIT_LIST_HEAD(&e->compute.link); - INIT_LIST_HEAD(&e->multi_gt_link); - - /* FIXME: Wire up to configurable default value */ - e->sched_props.timeslice_us = 1 * 1000; - e->sched_props.preempt_timeout_us = 640 * 1000; - - if (xe_engine_is_parallel(e)) { - e->parallel.composite_fence_ctx = dma_fence_context_alloc(1); - e->parallel.composite_fence_seqno = XE_FENCE_INITIAL_SEQNO; - } - if (e->flags & ENGINE_FLAG_VM) { - e->bind.fence_ctx = dma_fence_context_alloc(1); - e->bind.fence_seqno = XE_FENCE_INITIAL_SEQNO; - } - - for (i = 0; i < width; ++i) { - err = xe_lrc_init(e->lrc + i, hwe, e, vm, SZ_16K); - if (err) - goto err_lrc; - } - - err = e->ops->init(e); - if (err) - goto err_lrc; - - /* - * Normally the user vm holds an rpm ref to keep the device - * awake, and the context holds a ref for the vm, however for - * some engines we use the kernels migrate vm underneath which - * offers no such rpm ref. Make sure we keep a ref here, so we - * can perform GuC CT actions when needed. Caller is expected to - * have already grabbed the rpm ref outside any sensitive locks. - */ - if (e->flags & ENGINE_FLAG_VM) - drm_WARN_ON(&xe->drm, !xe_device_mem_access_get_if_ongoing(xe)); - - return e; - -err_lrc: - for (i = i - 1; i >= 0; --i) - xe_lrc_finish(e->lrc + i); - kfree(e); - return ERR_PTR(err); -} - -struct xe_engine *xe_engine_create(struct xe_device *xe, struct xe_vm *vm, - u32 logical_mask, u16 width, - struct xe_hw_engine *hwe, u32 flags) -{ - struct ww_acquire_ctx ww; - struct xe_engine *e; - int err; - - if (vm) { - err = xe_vm_lock(vm, &ww, 0, true); - if (err) - return ERR_PTR(err); - } - e = __xe_engine_create(xe, vm, logical_mask, width, hwe, flags); - if (vm) - xe_vm_unlock(vm, &ww); - - return e; -} - -struct xe_engine *xe_engine_create_class(struct xe_device *xe, struct xe_gt *gt, - struct xe_vm *vm, - enum xe_engine_class class, u32 flags) -{ - struct xe_hw_engine *hwe, *hwe0 = NULL; - enum xe_hw_engine_id id; - u32 logical_mask = 0; - - for_each_hw_engine(hwe, gt, id) { - if (xe_hw_engine_is_reserved(hwe)) - continue; - - if (hwe->class == class) { - logical_mask |= BIT(hwe->logical_instance); - if (!hwe0) - hwe0 = hwe; - } - } - - if (!logical_mask) - return ERR_PTR(-ENODEV); - - return xe_engine_create(xe, vm, logical_mask, 1, hwe0, flags); -} - -void xe_engine_destroy(struct kref *ref) -{ - struct xe_engine *e = container_of(ref, struct xe_engine, refcount); - struct xe_engine *engine, *next; - - if (!(e->flags & ENGINE_FLAG_BIND_ENGINE_CHILD)) { - list_for_each_entry_safe(engine, next, &e->multi_gt_list, - multi_gt_link) - xe_engine_put(engine); - } - - e->ops->fini(e); -} - -void xe_engine_fini(struct xe_engine *e) -{ - int i; - - for (i = 0; i < e->width; ++i) - xe_lrc_finish(e->lrc + i); - if (e->vm) - xe_vm_put(e->vm); - if (e->flags & ENGINE_FLAG_VM) - xe_device_mem_access_put(gt_to_xe(e->gt)); - - kfree(e); -} - -struct xe_engine *xe_engine_lookup(struct xe_file *xef, u32 id) -{ - struct xe_engine *e; - - mutex_lock(&xef->engine.lock); - e = xa_load(&xef->engine.xa, id); - if (e) - xe_engine_get(e); - mutex_unlock(&xef->engine.lock); - - return e; -} - -enum xe_engine_priority -xe_engine_device_get_max_priority(struct xe_device *xe) -{ - return capable(CAP_SYS_NICE) ? XE_ENGINE_PRIORITY_HIGH : - XE_ENGINE_PRIORITY_NORMAL; -} - -static int engine_set_priority(struct xe_device *xe, struct xe_engine *e, - u64 value, bool create) -{ - if (XE_IOCTL_DBG(xe, value > XE_ENGINE_PRIORITY_HIGH)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, value > xe_engine_device_get_max_priority(xe))) - return -EPERM; - - return e->ops->set_priority(e, value); -} - -static int engine_set_timeslice(struct xe_device *xe, struct xe_engine *e, - u64 value, bool create) -{ - if (!capable(CAP_SYS_NICE)) - return -EPERM; - - return e->ops->set_timeslice(e, value); -} - -static int engine_set_preemption_timeout(struct xe_device *xe, - struct xe_engine *e, u64 value, - bool create) -{ - if (!capable(CAP_SYS_NICE)) - return -EPERM; - - return e->ops->set_preempt_timeout(e, value); -} - -static int engine_set_compute_mode(struct xe_device *xe, struct xe_engine *e, - u64 value, bool create) -{ - if (XE_IOCTL_DBG(xe, !create)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, e->flags & ENGINE_FLAG_COMPUTE_MODE)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, e->flags & ENGINE_FLAG_VM)) - return -EINVAL; - - if (value) { - struct xe_vm *vm = e->vm; - int err; - - if (XE_IOCTL_DBG(xe, xe_vm_in_fault_mode(vm))) - return -EOPNOTSUPP; - - if (XE_IOCTL_DBG(xe, !xe_vm_in_compute_mode(vm))) - return -EOPNOTSUPP; - - if (XE_IOCTL_DBG(xe, e->width != 1)) - return -EINVAL; - - e->compute.context = dma_fence_context_alloc(1); - spin_lock_init(&e->compute.lock); - - err = xe_vm_add_compute_engine(vm, e); - if (XE_IOCTL_DBG(xe, err)) - return err; - - e->flags |= ENGINE_FLAG_COMPUTE_MODE; - e->flags &= ~ENGINE_FLAG_PERSISTENT; - } - - return 0; -} - -static int engine_set_persistence(struct xe_device *xe, struct xe_engine *e, - u64 value, bool create) -{ - if (XE_IOCTL_DBG(xe, !create)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, e->flags & ENGINE_FLAG_COMPUTE_MODE)) - return -EINVAL; - - if (value) - e->flags |= ENGINE_FLAG_PERSISTENT; - else - e->flags &= ~ENGINE_FLAG_PERSISTENT; - - return 0; -} - -static int engine_set_job_timeout(struct xe_device *xe, struct xe_engine *e, - u64 value, bool create) -{ - if (XE_IOCTL_DBG(xe, !create)) - return -EINVAL; - - if (!capable(CAP_SYS_NICE)) - return -EPERM; - - return e->ops->set_job_timeout(e, value); -} - -static int engine_set_acc_trigger(struct xe_device *xe, struct xe_engine *e, - u64 value, bool create) -{ - if (XE_IOCTL_DBG(xe, !create)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, !xe->info.supports_usm)) - return -EINVAL; - - e->usm.acc_trigger = value; - - return 0; -} - -static int engine_set_acc_notify(struct xe_device *xe, struct xe_engine *e, - u64 value, bool create) -{ - if (XE_IOCTL_DBG(xe, !create)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, !xe->info.supports_usm)) - return -EINVAL; - - e->usm.acc_notify = value; - - return 0; -} - -static int engine_set_acc_granularity(struct xe_device *xe, struct xe_engine *e, - u64 value, bool create) -{ - if (XE_IOCTL_DBG(xe, !create)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, !xe->info.supports_usm)) - return -EINVAL; - - e->usm.acc_granularity = value; - - return 0; -} - -typedef int (*xe_engine_set_property_fn)(struct xe_device *xe, - struct xe_engine *e, - u64 value, bool create); - -static const xe_engine_set_property_fn engine_set_property_funcs[] = { - [XE_ENGINE_SET_PROPERTY_PRIORITY] = engine_set_priority, - [XE_ENGINE_SET_PROPERTY_TIMESLICE] = engine_set_timeslice, - [XE_ENGINE_SET_PROPERTY_PREEMPTION_TIMEOUT] = engine_set_preemption_timeout, - [XE_ENGINE_SET_PROPERTY_COMPUTE_MODE] = engine_set_compute_mode, - [XE_ENGINE_SET_PROPERTY_PERSISTENCE] = engine_set_persistence, - [XE_ENGINE_SET_PROPERTY_JOB_TIMEOUT] = engine_set_job_timeout, - [XE_ENGINE_SET_PROPERTY_ACC_TRIGGER] = engine_set_acc_trigger, - [XE_ENGINE_SET_PROPERTY_ACC_NOTIFY] = engine_set_acc_notify, - [XE_ENGINE_SET_PROPERTY_ACC_GRANULARITY] = engine_set_acc_granularity, -}; - -static int engine_user_ext_set_property(struct xe_device *xe, - struct xe_engine *e, - u64 extension, - bool create) -{ - u64 __user *address = u64_to_user_ptr(extension); - struct drm_xe_ext_engine_set_property ext; - int err; - u32 idx; - - err = __copy_from_user(&ext, address, sizeof(ext)); - if (XE_IOCTL_DBG(xe, err)) - return -EFAULT; - - if (XE_IOCTL_DBG(xe, ext.property >= - ARRAY_SIZE(engine_set_property_funcs)) || - XE_IOCTL_DBG(xe, ext.pad)) - return -EINVAL; - - idx = array_index_nospec(ext.property, ARRAY_SIZE(engine_set_property_funcs)); - return engine_set_property_funcs[idx](xe, e, ext.value, create); -} - -typedef int (*xe_engine_user_extension_fn)(struct xe_device *xe, - struct xe_engine *e, - u64 extension, - bool create); - -static const xe_engine_set_property_fn engine_user_extension_funcs[] = { - [XE_ENGINE_EXTENSION_SET_PROPERTY] = engine_user_ext_set_property, -}; - -#define MAX_USER_EXTENSIONS 16 -static int engine_user_extensions(struct xe_device *xe, struct xe_engine *e, - u64 extensions, int ext_number, bool create) -{ - u64 __user *address = u64_to_user_ptr(extensions); - struct xe_user_extension ext; - int err; - u32 idx; - - if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS)) - return -E2BIG; - - err = __copy_from_user(&ext, address, sizeof(ext)); - if (XE_IOCTL_DBG(xe, err)) - return -EFAULT; - - if (XE_IOCTL_DBG(xe, ext.pad) || - XE_IOCTL_DBG(xe, ext.name >= - ARRAY_SIZE(engine_user_extension_funcs))) - return -EINVAL; - - idx = array_index_nospec(ext.name, - ARRAY_SIZE(engine_user_extension_funcs)); - err = engine_user_extension_funcs[idx](xe, e, extensions, create); - if (XE_IOCTL_DBG(xe, err)) - return err; - - if (ext.next_extension) - return engine_user_extensions(xe, e, ext.next_extension, - ++ext_number, create); - - return 0; -} - -static const enum xe_engine_class user_to_xe_engine_class[] = { - [DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER, - [DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY, - [DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE, - [DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE, - [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE, -}; - -static struct xe_hw_engine * -find_hw_engine(struct xe_device *xe, - struct drm_xe_engine_class_instance eci) -{ - u32 idx; - - if (eci.engine_class > ARRAY_SIZE(user_to_xe_engine_class)) - return NULL; - - if (eci.gt_id >= xe->info.gt_count) - return NULL; - - idx = array_index_nospec(eci.engine_class, - ARRAY_SIZE(user_to_xe_engine_class)); - - return xe_gt_hw_engine(xe_device_get_gt(xe, eci.gt_id), - user_to_xe_engine_class[idx], - eci.engine_instance, true); -} - -static u32 bind_engine_logical_mask(struct xe_device *xe, struct xe_gt *gt, - struct drm_xe_engine_class_instance *eci, - u16 width, u16 num_placements) -{ - struct xe_hw_engine *hwe; - enum xe_hw_engine_id id; - u32 logical_mask = 0; - - if (XE_IOCTL_DBG(xe, width != 1)) - return 0; - if (XE_IOCTL_DBG(xe, num_placements != 1)) - return 0; - if (XE_IOCTL_DBG(xe, eci[0].engine_instance != 0)) - return 0; - - eci[0].engine_class = DRM_XE_ENGINE_CLASS_COPY; - - for_each_hw_engine(hwe, gt, id) { - if (xe_hw_engine_is_reserved(hwe)) - continue; - - if (hwe->class == - user_to_xe_engine_class[DRM_XE_ENGINE_CLASS_COPY]) - logical_mask |= BIT(hwe->logical_instance); - } - - return logical_mask; -} - -static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt, - struct drm_xe_engine_class_instance *eci, - u16 width, u16 num_placements) -{ - int len = width * num_placements; - int i, j, n; - u16 class; - u16 gt_id; - u32 return_mask = 0, prev_mask; - - if (XE_IOCTL_DBG(xe, !xe_device_guc_submission_enabled(xe) && - len > 1)) - return 0; - - for (i = 0; i < width; ++i) { - u32 current_mask = 0; - - for (j = 0; j < num_placements; ++j) { - struct xe_hw_engine *hwe; - - n = j * width + i; - - hwe = find_hw_engine(xe, eci[n]); - if (XE_IOCTL_DBG(xe, !hwe)) - return 0; - - if (XE_IOCTL_DBG(xe, xe_hw_engine_is_reserved(hwe))) - return 0; - - if (XE_IOCTL_DBG(xe, n && eci[n].gt_id != gt_id) || - XE_IOCTL_DBG(xe, n && eci[n].engine_class != class)) - return 0; - - class = eci[n].engine_class; - gt_id = eci[n].gt_id; - - if (width == 1 || !i) - return_mask |= BIT(eci[n].engine_instance); - current_mask |= BIT(eci[n].engine_instance); - } - - /* Parallel submissions must be logically contiguous */ - if (i && XE_IOCTL_DBG(xe, current_mask != prev_mask << 1)) - return 0; - - prev_mask = current_mask; - } - - return return_mask; -} - -int xe_engine_create_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct xe_device *xe = to_xe_device(dev); - struct xe_file *xef = to_xe_file(file); - struct drm_xe_engine_create *args = data; - struct drm_xe_engine_class_instance eci[XE_HW_ENGINE_MAX_INSTANCE]; - struct drm_xe_engine_class_instance __user *user_eci = - u64_to_user_ptr(args->instances); - struct xe_hw_engine *hwe; - struct xe_vm *vm, *migrate_vm; - struct xe_gt *gt; - struct xe_engine *e = NULL; - u32 logical_mask; - u32 id; - u32 len; - int err; - - if (XE_IOCTL_DBG(xe, args->flags) || - XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) - return -EINVAL; - - len = args->width * args->num_placements; - if (XE_IOCTL_DBG(xe, !len || len > XE_HW_ENGINE_MAX_INSTANCE)) - return -EINVAL; - - err = __copy_from_user(eci, user_eci, - sizeof(struct drm_xe_engine_class_instance) * - len); - if (XE_IOCTL_DBG(xe, err)) - return -EFAULT; - - if (XE_IOCTL_DBG(xe, eci[0].gt_id >= xe->info.gt_count)) - return -EINVAL; - - if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) { - for_each_gt(gt, xe, id) { - struct xe_engine *new; - - if (xe_gt_is_media_type(gt)) - continue; - - eci[0].gt_id = gt->info.id; - logical_mask = bind_engine_logical_mask(xe, gt, eci, - args->width, - args->num_placements); - if (XE_IOCTL_DBG(xe, !logical_mask)) - return -EINVAL; - - hwe = find_hw_engine(xe, eci[0]); - if (XE_IOCTL_DBG(xe, !hwe)) - return -EINVAL; - - /* The migration vm doesn't hold rpm ref */ - xe_device_mem_access_get(xe); - - migrate_vm = xe_migrate_get_vm(gt_to_tile(gt)->migrate); - new = xe_engine_create(xe, migrate_vm, logical_mask, - args->width, hwe, - ENGINE_FLAG_PERSISTENT | - ENGINE_FLAG_VM | - (id ? - ENGINE_FLAG_BIND_ENGINE_CHILD : - 0)); - - xe_device_mem_access_put(xe); /* now held by engine */ - - xe_vm_put(migrate_vm); - if (IS_ERR(new)) { - err = PTR_ERR(new); - if (e) - goto put_engine; - return err; - } - if (id == 0) - e = new; - else - list_add_tail(&new->multi_gt_list, - &e->multi_gt_link); - } - } else { - gt = xe_device_get_gt(xe, eci[0].gt_id); - logical_mask = calc_validate_logical_mask(xe, gt, eci, - args->width, - args->num_placements); - if (XE_IOCTL_DBG(xe, !logical_mask)) - return -EINVAL; - - hwe = find_hw_engine(xe, eci[0]); - if (XE_IOCTL_DBG(xe, !hwe)) - return -EINVAL; - - vm = xe_vm_lookup(xef, args->vm_id); - if (XE_IOCTL_DBG(xe, !vm)) - return -ENOENT; - - err = down_read_interruptible(&vm->lock); - if (err) { - xe_vm_put(vm); - return err; - } - - if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { - up_read(&vm->lock); - xe_vm_put(vm); - return -ENOENT; - } - - e = xe_engine_create(xe, vm, logical_mask, - args->width, hwe, - xe_vm_no_dma_fences(vm) ? 0 : - ENGINE_FLAG_PERSISTENT); - up_read(&vm->lock); - xe_vm_put(vm); - if (IS_ERR(e)) - return PTR_ERR(e); - } - - if (args->extensions) { - err = engine_user_extensions(xe, e, args->extensions, 0, true); - if (XE_IOCTL_DBG(xe, err)) - goto put_engine; - } - - if (XE_IOCTL_DBG(xe, e->vm && xe_vm_in_compute_mode(e->vm) != - !!(e->flags & ENGINE_FLAG_COMPUTE_MODE))) { - err = -EOPNOTSUPP; - goto put_engine; - } - - e->persistent.xef = xef; - - mutex_lock(&xef->engine.lock); - err = xa_alloc(&xef->engine.xa, &id, e, xa_limit_32b, GFP_KERNEL); - mutex_unlock(&xef->engine.lock); - if (err) - goto put_engine; - - args->engine_id = id; - - return 0; - -put_engine: - xe_engine_kill(e); - xe_engine_put(e); - return err; -} - -int xe_engine_get_property_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct xe_device *xe = to_xe_device(dev); - struct xe_file *xef = to_xe_file(file); - struct drm_xe_engine_get_property *args = data; - struct xe_engine *e; - int ret; - - if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) - return -EINVAL; - - e = xe_engine_lookup(xef, args->engine_id); - if (XE_IOCTL_DBG(xe, !e)) - return -ENOENT; - - switch (args->property) { - case XE_ENGINE_GET_PROPERTY_BAN: - args->value = !!(e->flags & ENGINE_FLAG_BANNED); - ret = 0; - break; - default: - ret = -EINVAL; - } - - xe_engine_put(e); - - return ret; -} - -static void engine_kill_compute(struct xe_engine *e) -{ - if (!xe_vm_in_compute_mode(e->vm)) - return; - - down_write(&e->vm->lock); - list_del(&e->compute.link); - --e->vm->preempt.num_engines; - if (e->compute.pfence) { - dma_fence_enable_sw_signaling(e->compute.pfence); - dma_fence_put(e->compute.pfence); - e->compute.pfence = NULL; - } - up_write(&e->vm->lock); -} - -/** - * xe_engine_is_lr() - Whether an engine is long-running - * @e: The engine - * - * Return: True if the engine is long-running, false otherwise. - */ -bool xe_engine_is_lr(struct xe_engine *e) -{ - return e->vm && xe_vm_no_dma_fences(e->vm) && - !(e->flags & ENGINE_FLAG_VM); -} - -static s32 xe_engine_num_job_inflight(struct xe_engine *e) -{ - return e->lrc->fence_ctx.next_seqno - xe_lrc_seqno(e->lrc) - 1; -} - -/** - * xe_engine_ring_full() - Whether an engine's ring is full - * @e: The engine - * - * Return: True if the engine's ring is full, false otherwise. - */ -bool xe_engine_ring_full(struct xe_engine *e) -{ - struct xe_lrc *lrc = e->lrc; - s32 max_job = lrc->ring.size / MAX_JOB_SIZE_BYTES; - - return xe_engine_num_job_inflight(e) >= max_job; -} - -/** - * xe_engine_is_idle() - Whether an engine is idle. - * @engine: The engine - * - * FIXME: Need to determine what to use as the short-lived - * timeline lock for the engines, so that the return value - * of this function becomes more than just an advisory - * snapshot in time. The timeline lock must protect the - * seqno from racing submissions on the same engine. - * Typically vm->resv, but user-created timeline locks use the migrate vm - * and never grabs the migrate vm->resv so we have a race there. - * - * Return: True if the engine is idle, false otherwise. - */ -bool xe_engine_is_idle(struct xe_engine *engine) -{ - if (XE_WARN_ON(xe_engine_is_parallel(engine))) - return false; - - return xe_lrc_seqno(&engine->lrc[0]) == - engine->lrc[0].fence_ctx.next_seqno - 1; -} - -void xe_engine_kill(struct xe_engine *e) -{ - struct xe_engine *engine = e, *next; - - list_for_each_entry_safe(engine, next, &engine->multi_gt_list, - multi_gt_link) { - e->ops->kill(engine); - engine_kill_compute(engine); - } - - e->ops->kill(e); - engine_kill_compute(e); -} - -int xe_engine_destroy_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct xe_device *xe = to_xe_device(dev); - struct xe_file *xef = to_xe_file(file); - struct drm_xe_engine_destroy *args = data; - struct xe_engine *e; - - if (XE_IOCTL_DBG(xe, args->pad) || - XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) - return -EINVAL; - - mutex_lock(&xef->engine.lock); - e = xa_erase(&xef->engine.xa, args->engine_id); - mutex_unlock(&xef->engine.lock); - if (XE_IOCTL_DBG(xe, !e)) - return -ENOENT; - - if (!(e->flags & ENGINE_FLAG_PERSISTENT)) - xe_engine_kill(e); - else - xe_device_add_persistent_engines(xe, e); - - trace_xe_engine_close(e); - xe_engine_put(e); - - return 0; -} - -int xe_engine_set_property_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct xe_device *xe = to_xe_device(dev); - struct xe_file *xef = to_xe_file(file); - struct drm_xe_engine_set_property *args = data; - struct xe_engine *e; - int ret; - u32 idx; - - if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) - return -EINVAL; - - e = xe_engine_lookup(xef, args->engine_id); - if (XE_IOCTL_DBG(xe, !e)) - return -ENOENT; - - if (XE_IOCTL_DBG(xe, args->property >= - ARRAY_SIZE(engine_set_property_funcs))) { - ret = -EINVAL; - goto out; - } - - idx = array_index_nospec(args->property, - ARRAY_SIZE(engine_set_property_funcs)); - ret = engine_set_property_funcs[idx](xe, e, args->value, false); - if (XE_IOCTL_DBG(xe, ret)) - goto out; - - if (args->extensions) - ret = engine_user_extensions(xe, e, args->extensions, 0, - false); -out: - xe_engine_put(e); - - return ret; -} diff --git a/drivers/gpu/drm/xe/xe_engine.h b/drivers/gpu/drm/xe/xe_engine.h deleted file mode 100644 index 3017e4fe308d..000000000000 --- a/drivers/gpu/drm/xe/xe_engine.h +++ /dev/null @@ -1,63 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2021 Intel Corporation - */ - -#ifndef _XE_ENGINE_H_ -#define _XE_ENGINE_H_ - -#include "xe_engine_types.h" -#include "xe_vm_types.h" - -struct drm_device; -struct drm_file; -struct xe_device; -struct xe_file; - -struct xe_engine *xe_engine_create(struct xe_device *xe, struct xe_vm *vm, - u32 logical_mask, u16 width, - struct xe_hw_engine *hw_engine, u32 flags); -struct xe_engine *xe_engine_create_class(struct xe_device *xe, struct xe_gt *gt, - struct xe_vm *vm, - enum xe_engine_class class, u32 flags); - -void xe_engine_fini(struct xe_engine *e); -void xe_engine_destroy(struct kref *ref); - -struct xe_engine *xe_engine_lookup(struct xe_file *xef, u32 id); - -static inline struct xe_engine *xe_engine_get(struct xe_engine *engine) -{ - kref_get(&engine->refcount); - return engine; -} - -static inline void xe_engine_put(struct xe_engine *engine) -{ - kref_put(&engine->refcount, xe_engine_destroy); -} - -static inline bool xe_engine_is_parallel(struct xe_engine *engine) -{ - return engine->width > 1; -} - -bool xe_engine_is_lr(struct xe_engine *e); - -bool xe_engine_ring_full(struct xe_engine *e); - -bool xe_engine_is_idle(struct xe_engine *engine); - -void xe_engine_kill(struct xe_engine *e); - -int xe_engine_create_ioctl(struct drm_device *dev, void *data, - struct drm_file *file); -int xe_engine_destroy_ioctl(struct drm_device *dev, void *data, - struct drm_file *file); -int xe_engine_set_property_ioctl(struct drm_device *dev, void *data, - struct drm_file *file); -int xe_engine_get_property_ioctl(struct drm_device *dev, void *data, - struct drm_file *file); -enum xe_engine_priority xe_engine_device_get_max_priority(struct xe_device *xe); - -#endif diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 89d167a432f6..a043c649249b 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -12,7 +12,7 @@ #include "xe_bo.h" #include "xe_device.h" -#include "xe_engine.h" +#include "xe_exec_queue.h" #include "xe_macros.h" #include "xe_ring_ops_types.h" #include "xe_sched_job.h" diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c new file mode 100644 index 000000000000..f1cfc4b604d4 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -0,0 +1,850 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "xe_exec_queue.h" + +#include + +#include +#include +#include + +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_hw_fence.h" +#include "xe_lrc.h" +#include "xe_macros.h" +#include "xe_migrate.h" +#include "xe_pm.h" +#include "xe_ring_ops_types.h" +#include "xe_trace.h" +#include "xe_vm.h" + +static struct xe_engine *__xe_engine_create(struct xe_device *xe, + struct xe_vm *vm, + u32 logical_mask, + u16 width, struct xe_hw_engine *hwe, + u32 flags) +{ + struct xe_engine *e; + struct xe_gt *gt = hwe->gt; + int err; + int i; + + e = kzalloc(sizeof(*e) + sizeof(struct xe_lrc) * width, GFP_KERNEL); + if (!e) + return ERR_PTR(-ENOMEM); + + kref_init(&e->refcount); + e->flags = flags; + e->hwe = hwe; + e->gt = gt; + if (vm) + e->vm = xe_vm_get(vm); + e->class = hwe->class; + e->width = width; + e->logical_mask = logical_mask; + e->fence_irq = >->fence_irq[hwe->class]; + e->ring_ops = gt->ring_ops[hwe->class]; + e->ops = gt->engine_ops; + INIT_LIST_HEAD(&e->persistent.link); + INIT_LIST_HEAD(&e->compute.link); + INIT_LIST_HEAD(&e->multi_gt_link); + + /* FIXME: Wire up to configurable default value */ + e->sched_props.timeslice_us = 1 * 1000; + e->sched_props.preempt_timeout_us = 640 * 1000; + + if (xe_engine_is_parallel(e)) { + e->parallel.composite_fence_ctx = dma_fence_context_alloc(1); + e->parallel.composite_fence_seqno = XE_FENCE_INITIAL_SEQNO; + } + if (e->flags & ENGINE_FLAG_VM) { + e->bind.fence_ctx = dma_fence_context_alloc(1); + e->bind.fence_seqno = XE_FENCE_INITIAL_SEQNO; + } + + for (i = 0; i < width; ++i) { + err = xe_lrc_init(e->lrc + i, hwe, e, vm, SZ_16K); + if (err) + goto err_lrc; + } + + err = e->ops->init(e); + if (err) + goto err_lrc; + + /* + * Normally the user vm holds an rpm ref to keep the device + * awake, and the context holds a ref for the vm, however for + * some engines we use the kernels migrate vm underneath which + * offers no such rpm ref. Make sure we keep a ref here, so we + * can perform GuC CT actions when needed. Caller is expected to + * have already grabbed the rpm ref outside any sensitive locks. + */ + if (e->flags & ENGINE_FLAG_VM) + drm_WARN_ON(&xe->drm, !xe_device_mem_access_get_if_ongoing(xe)); + + return e; + +err_lrc: + for (i = i - 1; i >= 0; --i) + xe_lrc_finish(e->lrc + i); + kfree(e); + return ERR_PTR(err); +} + +struct xe_engine *xe_engine_create(struct xe_device *xe, struct xe_vm *vm, + u32 logical_mask, u16 width, + struct xe_hw_engine *hwe, u32 flags) +{ + struct ww_acquire_ctx ww; + struct xe_engine *e; + int err; + + if (vm) { + err = xe_vm_lock(vm, &ww, 0, true); + if (err) + return ERR_PTR(err); + } + e = __xe_engine_create(xe, vm, logical_mask, width, hwe, flags); + if (vm) + xe_vm_unlock(vm, &ww); + + return e; +} + +struct xe_engine *xe_engine_create_class(struct xe_device *xe, struct xe_gt *gt, + struct xe_vm *vm, + enum xe_engine_class class, u32 flags) +{ + struct xe_hw_engine *hwe, *hwe0 = NULL; + enum xe_hw_engine_id id; + u32 logical_mask = 0; + + for_each_hw_engine(hwe, gt, id) { + if (xe_hw_engine_is_reserved(hwe)) + continue; + + if (hwe->class == class) { + logical_mask |= BIT(hwe->logical_instance); + if (!hwe0) + hwe0 = hwe; + } + } + + if (!logical_mask) + return ERR_PTR(-ENODEV); + + return xe_engine_create(xe, vm, logical_mask, 1, hwe0, flags); +} + +void xe_engine_destroy(struct kref *ref) +{ + struct xe_engine *e = container_of(ref, struct xe_engine, refcount); + struct xe_engine *engine, *next; + + if (!(e->flags & ENGINE_FLAG_BIND_ENGINE_CHILD)) { + list_for_each_entry_safe(engine, next, &e->multi_gt_list, + multi_gt_link) + xe_engine_put(engine); + } + + e->ops->fini(e); +} + +void xe_engine_fini(struct xe_engine *e) +{ + int i; + + for (i = 0; i < e->width; ++i) + xe_lrc_finish(e->lrc + i); + if (e->vm) + xe_vm_put(e->vm); + if (e->flags & ENGINE_FLAG_VM) + xe_device_mem_access_put(gt_to_xe(e->gt)); + + kfree(e); +} + +struct xe_engine *xe_engine_lookup(struct xe_file *xef, u32 id) +{ + struct xe_engine *e; + + mutex_lock(&xef->engine.lock); + e = xa_load(&xef->engine.xa, id); + if (e) + xe_engine_get(e); + mutex_unlock(&xef->engine.lock); + + return e; +} + +enum xe_engine_priority +xe_engine_device_get_max_priority(struct xe_device *xe) +{ + return capable(CAP_SYS_NICE) ? XE_ENGINE_PRIORITY_HIGH : + XE_ENGINE_PRIORITY_NORMAL; +} + +static int engine_set_priority(struct xe_device *xe, struct xe_engine *e, + u64 value, bool create) +{ + if (XE_IOCTL_DBG(xe, value > XE_ENGINE_PRIORITY_HIGH)) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, value > xe_engine_device_get_max_priority(xe))) + return -EPERM; + + return e->ops->set_priority(e, value); +} + +static int engine_set_timeslice(struct xe_device *xe, struct xe_engine *e, + u64 value, bool create) +{ + if (!capable(CAP_SYS_NICE)) + return -EPERM; + + return e->ops->set_timeslice(e, value); +} + +static int engine_set_preemption_timeout(struct xe_device *xe, + struct xe_engine *e, u64 value, + bool create) +{ + if (!capable(CAP_SYS_NICE)) + return -EPERM; + + return e->ops->set_preempt_timeout(e, value); +} + +static int engine_set_compute_mode(struct xe_device *xe, struct xe_engine *e, + u64 value, bool create) +{ + if (XE_IOCTL_DBG(xe, !create)) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, e->flags & ENGINE_FLAG_COMPUTE_MODE)) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, e->flags & ENGINE_FLAG_VM)) + return -EINVAL; + + if (value) { + struct xe_vm *vm = e->vm; + int err; + + if (XE_IOCTL_DBG(xe, xe_vm_in_fault_mode(vm))) + return -EOPNOTSUPP; + + if (XE_IOCTL_DBG(xe, !xe_vm_in_compute_mode(vm))) + return -EOPNOTSUPP; + + if (XE_IOCTL_DBG(xe, e->width != 1)) + return -EINVAL; + + e->compute.context = dma_fence_context_alloc(1); + spin_lock_init(&e->compute.lock); + + err = xe_vm_add_compute_engine(vm, e); + if (XE_IOCTL_DBG(xe, err)) + return err; + + e->flags |= ENGINE_FLAG_COMPUTE_MODE; + e->flags &= ~ENGINE_FLAG_PERSISTENT; + } + + return 0; +} + +static int engine_set_persistence(struct xe_device *xe, struct xe_engine *e, + u64 value, bool create) +{ + if (XE_IOCTL_DBG(xe, !create)) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, e->flags & ENGINE_FLAG_COMPUTE_MODE)) + return -EINVAL; + + if (value) + e->flags |= ENGINE_FLAG_PERSISTENT; + else + e->flags &= ~ENGINE_FLAG_PERSISTENT; + + return 0; +} + +static int engine_set_job_timeout(struct xe_device *xe, struct xe_engine *e, + u64 value, bool create) +{ + if (XE_IOCTL_DBG(xe, !create)) + return -EINVAL; + + if (!capable(CAP_SYS_NICE)) + return -EPERM; + + return e->ops->set_job_timeout(e, value); +} + +static int engine_set_acc_trigger(struct xe_device *xe, struct xe_engine *e, + u64 value, bool create) +{ + if (XE_IOCTL_DBG(xe, !create)) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, !xe->info.supports_usm)) + return -EINVAL; + + e->usm.acc_trigger = value; + + return 0; +} + +static int engine_set_acc_notify(struct xe_device *xe, struct xe_engine *e, + u64 value, bool create) +{ + if (XE_IOCTL_DBG(xe, !create)) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, !xe->info.supports_usm)) + return -EINVAL; + + e->usm.acc_notify = value; + + return 0; +} + +static int engine_set_acc_granularity(struct xe_device *xe, struct xe_engine *e, + u64 value, bool create) +{ + if (XE_IOCTL_DBG(xe, !create)) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, !xe->info.supports_usm)) + return -EINVAL; + + e->usm.acc_granularity = value; + + return 0; +} + +typedef int (*xe_engine_set_property_fn)(struct xe_device *xe, + struct xe_engine *e, + u64 value, bool create); + +static const xe_engine_set_property_fn engine_set_property_funcs[] = { + [XE_ENGINE_SET_PROPERTY_PRIORITY] = engine_set_priority, + [XE_ENGINE_SET_PROPERTY_TIMESLICE] = engine_set_timeslice, + [XE_ENGINE_SET_PROPERTY_PREEMPTION_TIMEOUT] = engine_set_preemption_timeout, + [XE_ENGINE_SET_PROPERTY_COMPUTE_MODE] = engine_set_compute_mode, + [XE_ENGINE_SET_PROPERTY_PERSISTENCE] = engine_set_persistence, + [XE_ENGINE_SET_PROPERTY_JOB_TIMEOUT] = engine_set_job_timeout, + [XE_ENGINE_SET_PROPERTY_ACC_TRIGGER] = engine_set_acc_trigger, + [XE_ENGINE_SET_PROPERTY_ACC_NOTIFY] = engine_set_acc_notify, + [XE_ENGINE_SET_PROPERTY_ACC_GRANULARITY] = engine_set_acc_granularity, +}; + +static int engine_user_ext_set_property(struct xe_device *xe, + struct xe_engine *e, + u64 extension, + bool create) +{ + u64 __user *address = u64_to_user_ptr(extension); + struct drm_xe_ext_engine_set_property ext; + int err; + u32 idx; + + err = __copy_from_user(&ext, address, sizeof(ext)); + if (XE_IOCTL_DBG(xe, err)) + return -EFAULT; + + if (XE_IOCTL_DBG(xe, ext.property >= + ARRAY_SIZE(engine_set_property_funcs)) || + XE_IOCTL_DBG(xe, ext.pad)) + return -EINVAL; + + idx = array_index_nospec(ext.property, ARRAY_SIZE(engine_set_property_funcs)); + return engine_set_property_funcs[idx](xe, e, ext.value, create); +} + +typedef int (*xe_engine_user_extension_fn)(struct xe_device *xe, + struct xe_engine *e, + u64 extension, + bool create); + +static const xe_engine_set_property_fn engine_user_extension_funcs[] = { + [XE_ENGINE_EXTENSION_SET_PROPERTY] = engine_user_ext_set_property, +}; + +#define MAX_USER_EXTENSIONS 16 +static int engine_user_extensions(struct xe_device *xe, struct xe_engine *e, + u64 extensions, int ext_number, bool create) +{ + u64 __user *address = u64_to_user_ptr(extensions); + struct xe_user_extension ext; + int err; + u32 idx; + + if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS)) + return -E2BIG; + + err = __copy_from_user(&ext, address, sizeof(ext)); + if (XE_IOCTL_DBG(xe, err)) + return -EFAULT; + + if (XE_IOCTL_DBG(xe, ext.pad) || + XE_IOCTL_DBG(xe, ext.name >= + ARRAY_SIZE(engine_user_extension_funcs))) + return -EINVAL; + + idx = array_index_nospec(ext.name, + ARRAY_SIZE(engine_user_extension_funcs)); + err = engine_user_extension_funcs[idx](xe, e, extensions, create); + if (XE_IOCTL_DBG(xe, err)) + return err; + + if (ext.next_extension) + return engine_user_extensions(xe, e, ext.next_extension, + ++ext_number, create); + + return 0; +} + +static const enum xe_engine_class user_to_xe_engine_class[] = { + [DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER, + [DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY, + [DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE, + [DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE, + [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE, +}; + +static struct xe_hw_engine * +find_hw_engine(struct xe_device *xe, + struct drm_xe_engine_class_instance eci) +{ + u32 idx; + + if (eci.engine_class > ARRAY_SIZE(user_to_xe_engine_class)) + return NULL; + + if (eci.gt_id >= xe->info.gt_count) + return NULL; + + idx = array_index_nospec(eci.engine_class, + ARRAY_SIZE(user_to_xe_engine_class)); + + return xe_gt_hw_engine(xe_device_get_gt(xe, eci.gt_id), + user_to_xe_engine_class[idx], + eci.engine_instance, true); +} + +static u32 bind_engine_logical_mask(struct xe_device *xe, struct xe_gt *gt, + struct drm_xe_engine_class_instance *eci, + u16 width, u16 num_placements) +{ + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + u32 logical_mask = 0; + + if (XE_IOCTL_DBG(xe, width != 1)) + return 0; + if (XE_IOCTL_DBG(xe, num_placements != 1)) + return 0; + if (XE_IOCTL_DBG(xe, eci[0].engine_instance != 0)) + return 0; + + eci[0].engine_class = DRM_XE_ENGINE_CLASS_COPY; + + for_each_hw_engine(hwe, gt, id) { + if (xe_hw_engine_is_reserved(hwe)) + continue; + + if (hwe->class == + user_to_xe_engine_class[DRM_XE_ENGINE_CLASS_COPY]) + logical_mask |= BIT(hwe->logical_instance); + } + + return logical_mask; +} + +static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt, + struct drm_xe_engine_class_instance *eci, + u16 width, u16 num_placements) +{ + int len = width * num_placements; + int i, j, n; + u16 class; + u16 gt_id; + u32 return_mask = 0, prev_mask; + + if (XE_IOCTL_DBG(xe, !xe_device_guc_submission_enabled(xe) && + len > 1)) + return 0; + + for (i = 0; i < width; ++i) { + u32 current_mask = 0; + + for (j = 0; j < num_placements; ++j) { + struct xe_hw_engine *hwe; + + n = j * width + i; + + hwe = find_hw_engine(xe, eci[n]); + if (XE_IOCTL_DBG(xe, !hwe)) + return 0; + + if (XE_IOCTL_DBG(xe, xe_hw_engine_is_reserved(hwe))) + return 0; + + if (XE_IOCTL_DBG(xe, n && eci[n].gt_id != gt_id) || + XE_IOCTL_DBG(xe, n && eci[n].engine_class != class)) + return 0; + + class = eci[n].engine_class; + gt_id = eci[n].gt_id; + + if (width == 1 || !i) + return_mask |= BIT(eci[n].engine_instance); + current_mask |= BIT(eci[n].engine_instance); + } + + /* Parallel submissions must be logically contiguous */ + if (i && XE_IOCTL_DBG(xe, current_mask != prev_mask << 1)) + return 0; + + prev_mask = current_mask; + } + + return return_mask; +} + +int xe_engine_create_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_file *xef = to_xe_file(file); + struct drm_xe_engine_create *args = data; + struct drm_xe_engine_class_instance eci[XE_HW_ENGINE_MAX_INSTANCE]; + struct drm_xe_engine_class_instance __user *user_eci = + u64_to_user_ptr(args->instances); + struct xe_hw_engine *hwe; + struct xe_vm *vm, *migrate_vm; + struct xe_gt *gt; + struct xe_engine *e = NULL; + u32 logical_mask; + u32 id; + u32 len; + int err; + + if (XE_IOCTL_DBG(xe, args->flags) || + XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) + return -EINVAL; + + len = args->width * args->num_placements; + if (XE_IOCTL_DBG(xe, !len || len > XE_HW_ENGINE_MAX_INSTANCE)) + return -EINVAL; + + err = __copy_from_user(eci, user_eci, + sizeof(struct drm_xe_engine_class_instance) * + len); + if (XE_IOCTL_DBG(xe, err)) + return -EFAULT; + + if (XE_IOCTL_DBG(xe, eci[0].gt_id >= xe->info.gt_count)) + return -EINVAL; + + if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) { + for_each_gt(gt, xe, id) { + struct xe_engine *new; + + if (xe_gt_is_media_type(gt)) + continue; + + eci[0].gt_id = gt->info.id; + logical_mask = bind_engine_logical_mask(xe, gt, eci, + args->width, + args->num_placements); + if (XE_IOCTL_DBG(xe, !logical_mask)) + return -EINVAL; + + hwe = find_hw_engine(xe, eci[0]); + if (XE_IOCTL_DBG(xe, !hwe)) + return -EINVAL; + + /* The migration vm doesn't hold rpm ref */ + xe_device_mem_access_get(xe); + + migrate_vm = xe_migrate_get_vm(gt_to_tile(gt)->migrate); + new = xe_engine_create(xe, migrate_vm, logical_mask, + args->width, hwe, + ENGINE_FLAG_PERSISTENT | + ENGINE_FLAG_VM | + (id ? + ENGINE_FLAG_BIND_ENGINE_CHILD : + 0)); + + xe_device_mem_access_put(xe); /* now held by engine */ + + xe_vm_put(migrate_vm); + if (IS_ERR(new)) { + err = PTR_ERR(new); + if (e) + goto put_engine; + return err; + } + if (id == 0) + e = new; + else + list_add_tail(&new->multi_gt_list, + &e->multi_gt_link); + } + } else { + gt = xe_device_get_gt(xe, eci[0].gt_id); + logical_mask = calc_validate_logical_mask(xe, gt, eci, + args->width, + args->num_placements); + if (XE_IOCTL_DBG(xe, !logical_mask)) + return -EINVAL; + + hwe = find_hw_engine(xe, eci[0]); + if (XE_IOCTL_DBG(xe, !hwe)) + return -EINVAL; + + vm = xe_vm_lookup(xef, args->vm_id); + if (XE_IOCTL_DBG(xe, !vm)) + return -ENOENT; + + err = down_read_interruptible(&vm->lock); + if (err) { + xe_vm_put(vm); + return err; + } + + if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { + up_read(&vm->lock); + xe_vm_put(vm); + return -ENOENT; + } + + e = xe_engine_create(xe, vm, logical_mask, + args->width, hwe, + xe_vm_no_dma_fences(vm) ? 0 : + ENGINE_FLAG_PERSISTENT); + up_read(&vm->lock); + xe_vm_put(vm); + if (IS_ERR(e)) + return PTR_ERR(e); + } + + if (args->extensions) { + err = engine_user_extensions(xe, e, args->extensions, 0, true); + if (XE_IOCTL_DBG(xe, err)) + goto put_engine; + } + + if (XE_IOCTL_DBG(xe, e->vm && xe_vm_in_compute_mode(e->vm) != + !!(e->flags & ENGINE_FLAG_COMPUTE_MODE))) { + err = -EOPNOTSUPP; + goto put_engine; + } + + e->persistent.xef = xef; + + mutex_lock(&xef->engine.lock); + err = xa_alloc(&xef->engine.xa, &id, e, xa_limit_32b, GFP_KERNEL); + mutex_unlock(&xef->engine.lock); + if (err) + goto put_engine; + + args->engine_id = id; + + return 0; + +put_engine: + xe_engine_kill(e); + xe_engine_put(e); + return err; +} + +int xe_engine_get_property_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_file *xef = to_xe_file(file); + struct drm_xe_engine_get_property *args = data; + struct xe_engine *e; + int ret; + + if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) + return -EINVAL; + + e = xe_engine_lookup(xef, args->engine_id); + if (XE_IOCTL_DBG(xe, !e)) + return -ENOENT; + + switch (args->property) { + case XE_ENGINE_GET_PROPERTY_BAN: + args->value = !!(e->flags & ENGINE_FLAG_BANNED); + ret = 0; + break; + default: + ret = -EINVAL; + } + + xe_engine_put(e); + + return ret; +} + +static void engine_kill_compute(struct xe_engine *e) +{ + if (!xe_vm_in_compute_mode(e->vm)) + return; + + down_write(&e->vm->lock); + list_del(&e->compute.link); + --e->vm->preempt.num_engines; + if (e->compute.pfence) { + dma_fence_enable_sw_signaling(e->compute.pfence); + dma_fence_put(e->compute.pfence); + e->compute.pfence = NULL; + } + up_write(&e->vm->lock); +} + +/** + * xe_engine_is_lr() - Whether an engine is long-running + * @e: The engine + * + * Return: True if the engine is long-running, false otherwise. + */ +bool xe_engine_is_lr(struct xe_engine *e) +{ + return e->vm && xe_vm_no_dma_fences(e->vm) && + !(e->flags & ENGINE_FLAG_VM); +} + +static s32 xe_engine_num_job_inflight(struct xe_engine *e) +{ + return e->lrc->fence_ctx.next_seqno - xe_lrc_seqno(e->lrc) - 1; +} + +/** + * xe_engine_ring_full() - Whether an engine's ring is full + * @e: The engine + * + * Return: True if the engine's ring is full, false otherwise. + */ +bool xe_engine_ring_full(struct xe_engine *e) +{ + struct xe_lrc *lrc = e->lrc; + s32 max_job = lrc->ring.size / MAX_JOB_SIZE_BYTES; + + return xe_engine_num_job_inflight(e) >= max_job; +} + +/** + * xe_engine_is_idle() - Whether an engine is idle. + * @engine: The engine + * + * FIXME: Need to determine what to use as the short-lived + * timeline lock for the engines, so that the return value + * of this function becomes more than just an advisory + * snapshot in time. The timeline lock must protect the + * seqno from racing submissions on the same engine. + * Typically vm->resv, but user-created timeline locks use the migrate vm + * and never grabs the migrate vm->resv so we have a race there. + * + * Return: True if the engine is idle, false otherwise. + */ +bool xe_engine_is_idle(struct xe_engine *engine) +{ + if (XE_WARN_ON(xe_engine_is_parallel(engine))) + return false; + + return xe_lrc_seqno(&engine->lrc[0]) == + engine->lrc[0].fence_ctx.next_seqno - 1; +} + +void xe_engine_kill(struct xe_engine *e) +{ + struct xe_engine *engine = e, *next; + + list_for_each_entry_safe(engine, next, &engine->multi_gt_list, + multi_gt_link) { + e->ops->kill(engine); + engine_kill_compute(engine); + } + + e->ops->kill(e); + engine_kill_compute(e); +} + +int xe_engine_destroy_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_file *xef = to_xe_file(file); + struct drm_xe_engine_destroy *args = data; + struct xe_engine *e; + + if (XE_IOCTL_DBG(xe, args->pad) || + XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) + return -EINVAL; + + mutex_lock(&xef->engine.lock); + e = xa_erase(&xef->engine.xa, args->engine_id); + mutex_unlock(&xef->engine.lock); + if (XE_IOCTL_DBG(xe, !e)) + return -ENOENT; + + if (!(e->flags & ENGINE_FLAG_PERSISTENT)) + xe_engine_kill(e); + else + xe_device_add_persistent_engines(xe, e); + + trace_xe_engine_close(e); + xe_engine_put(e); + + return 0; +} + +int xe_engine_set_property_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_file *xef = to_xe_file(file); + struct drm_xe_engine_set_property *args = data; + struct xe_engine *e; + int ret; + u32 idx; + + if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) + return -EINVAL; + + e = xe_engine_lookup(xef, args->engine_id); + if (XE_IOCTL_DBG(xe, !e)) + return -ENOENT; + + if (XE_IOCTL_DBG(xe, args->property >= + ARRAY_SIZE(engine_set_property_funcs))) { + ret = -EINVAL; + goto out; + } + + idx = array_index_nospec(args->property, + ARRAY_SIZE(engine_set_property_funcs)); + ret = engine_set_property_funcs[idx](xe, e, args->value, false); + if (XE_IOCTL_DBG(xe, ret)) + goto out; + + if (args->extensions) + ret = engine_user_extensions(xe, e, args->extensions, 0, + false); +out: + xe_engine_put(e); + + return ret; +} diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h new file mode 100644 index 000000000000..3017e4fe308d --- /dev/null +++ b/drivers/gpu/drm/xe/xe_exec_queue.h @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _XE_ENGINE_H_ +#define _XE_ENGINE_H_ + +#include "xe_engine_types.h" +#include "xe_vm_types.h" + +struct drm_device; +struct drm_file; +struct xe_device; +struct xe_file; + +struct xe_engine *xe_engine_create(struct xe_device *xe, struct xe_vm *vm, + u32 logical_mask, u16 width, + struct xe_hw_engine *hw_engine, u32 flags); +struct xe_engine *xe_engine_create_class(struct xe_device *xe, struct xe_gt *gt, + struct xe_vm *vm, + enum xe_engine_class class, u32 flags); + +void xe_engine_fini(struct xe_engine *e); +void xe_engine_destroy(struct kref *ref); + +struct xe_engine *xe_engine_lookup(struct xe_file *xef, u32 id); + +static inline struct xe_engine *xe_engine_get(struct xe_engine *engine) +{ + kref_get(&engine->refcount); + return engine; +} + +static inline void xe_engine_put(struct xe_engine *engine) +{ + kref_put(&engine->refcount, xe_engine_destroy); +} + +static inline bool xe_engine_is_parallel(struct xe_engine *engine) +{ + return engine->width > 1; +} + +bool xe_engine_is_lr(struct xe_engine *e); + +bool xe_engine_ring_full(struct xe_engine *e); + +bool xe_engine_is_idle(struct xe_engine *engine); + +void xe_engine_kill(struct xe_engine *e); + +int xe_engine_create_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); +int xe_engine_destroy_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); +int xe_engine_set_property_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); +int xe_engine_get_property_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); +enum xe_engine_priority xe_engine_device_get_max_priority(struct xe_device *xe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index a4d9531e3516..5b6748e1a37f 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -14,7 +14,7 @@ #include "regs/xe_regs.h" #include "xe_bo.h" #include "xe_device.h" -#include "xe_engine.h" +#include "xe_exec_queue.h" #include "xe_gt.h" #include "xe_hw_fence.h" #include "xe_lrc.h" diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 28bf577c7bf2..543b085723c5 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -14,7 +14,7 @@ #include "xe_bb.h" #include "xe_bo.h" #include "xe_device.h" -#include "xe_engine.h" +#include "xe_exec_queue.h" #include "xe_execlist.h" #include "xe_force_wake.h" #include "xe_ggtt.h" diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 9a4c96cb3f42..5198e91eeefb 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -16,7 +16,7 @@ #include "regs/xe_lrc_layout.h" #include "xe_devcoredump.h" #include "xe_device.h" -#include "xe_engine.h" +#include "xe_exec_queue.h" #include "xe_force_wake.h" #include "xe_gpu_scheduler.h" #include "xe_gt.h" diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 9c4b432d496f..60f7226c92ff 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -16,7 +16,7 @@ #include "tests/xe_test.h" #include "xe_bb.h" #include "xe_bo.h" -#include "xe_engine.h" +#include "xe_exec_queue.h" #include "xe_ggtt.h" #include "xe_gt.h" #include "xe_hw_engine.h" diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index ccc852500eda..c9653978fc9f 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -8,7 +8,7 @@ #include "regs/xe_gt_regs.h" #include "xe_bo.h" #include "xe_device.h" -#include "xe_engine.h" +#include "xe_exec_queue.h" #include "xe_gt.h" #include "xe_mmio.h" #include "xe_platform_types.h" diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.c b/drivers/gpu/drm/xe/xe_preempt_fence.c index 219eefeb90ff..e86604e0174d 100644 --- a/drivers/gpu/drm/xe/xe_preempt_fence.c +++ b/drivers/gpu/drm/xe/xe_preempt_fence.c @@ -7,7 +7,7 @@ #include -#include "xe_engine.h" +#include "xe_exec_queue.h" #include "xe_vm.h" static void preempt_fence_work_func(struct work_struct *w) diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 6ba7baf7c777..382851f436b7 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -12,7 +12,7 @@ #include "xe_bo.h" #include "xe_device.h" -#include "xe_engine.h" +#include "xe_exec_queue.h" #include "xe_ggtt.h" #include "xe_gt.h" #include "xe_guc_hwconfig.h" diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c index 85fd5980191c..9944858de4d2 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.c +++ b/drivers/gpu/drm/xe/xe_sched_job.c @@ -9,7 +9,7 @@ #include #include "xe_device.h" -#include "xe_engine.h" +#include "xe_exec_queue.h" #include "xe_gt.h" #include "xe_hw_engine_types.h" #include "xe_hw_fence.h" diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 0bebdac2287c..d3e82c4aed42 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -18,7 +18,7 @@ #include "xe_bo.h" #include "xe_device.h" -#include "xe_engine.h" +#include "xe_exec_queue.h" #include "xe_gt.h" #include "xe_gt_pagefault.h" #include "xe_gt_tlb_invalidation.h" -- cgit v1.2.3 From 9b9529ce379a08e68d65231497dd6bad94281902 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Mon, 31 Jul 2023 17:30:02 +0200 Subject: drm/xe: Rename engine to exec_queue Engine was inappropriately used to refer to execution queues and it also created some confusion with hardware engines. Where it applies the exec_queue variable name is changed to q and comments are also updated. Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/162 Signed-off-by: Francois Dugast Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_migrate.c | 18 +- drivers/gpu/drm/xe/xe_bb.c | 26 +- drivers/gpu/drm/xe/xe_bb.h | 8 +- drivers/gpu/drm/xe/xe_devcoredump.c | 38 +- drivers/gpu/drm/xe/xe_devcoredump.h | 6 +- drivers/gpu/drm/xe/xe_devcoredump_types.h | 2 +- drivers/gpu/drm/xe/xe_device.c | 60 +- drivers/gpu/drm/xe/xe_device.h | 8 +- drivers/gpu/drm/xe/xe_device_types.h | 4 +- drivers/gpu/drm/xe/xe_engine_types.h | 209 ----- drivers/gpu/drm/xe/xe_exec.c | 60 +- drivers/gpu/drm/xe/xe_exec_queue.c | 526 ++++++------- drivers/gpu/drm/xe/xe_exec_queue.h | 64 +- drivers/gpu/drm/xe/xe_exec_queue_types.h | 209 +++++ drivers/gpu/drm/xe/xe_execlist.c | 142 ++-- drivers/gpu/drm/xe/xe_execlist_types.h | 14 +- drivers/gpu/drm/xe/xe_gt.c | 70 +- drivers/gpu/drm/xe/xe_gt_types.h | 6 +- drivers/gpu/drm/xe/xe_guc_ads.c | 2 +- drivers/gpu/drm/xe/xe_guc_ct.c | 10 +- drivers/gpu/drm/xe/xe_guc_engine_types.h | 54 -- drivers/gpu/drm/xe/xe_guc_exec_queue_types.h | 54 ++ drivers/gpu/drm/xe/xe_guc_fwif.h | 6 +- drivers/gpu/drm/xe/xe_guc_submit.c | 1088 +++++++++++++------------- drivers/gpu/drm/xe/xe_guc_submit.h | 20 +- drivers/gpu/drm/xe/xe_guc_submit_types.h | 20 +- drivers/gpu/drm/xe/xe_guc_types.h | 4 +- drivers/gpu/drm/xe/xe_lrc.c | 10 +- drivers/gpu/drm/xe/xe_lrc.h | 4 +- drivers/gpu/drm/xe/xe_migrate.c | 64 +- drivers/gpu/drm/xe/xe_migrate.h | 6 +- drivers/gpu/drm/xe/xe_mocs.h | 2 +- drivers/gpu/drm/xe/xe_preempt_fence.c | 30 +- drivers/gpu/drm/xe/xe_preempt_fence.h | 4 +- drivers/gpu/drm/xe/xe_preempt_fence_types.h | 7 +- drivers/gpu/drm/xe/xe_pt.c | 18 +- drivers/gpu/drm/xe/xe_pt.h | 6 +- drivers/gpu/drm/xe/xe_query.c | 2 +- drivers/gpu/drm/xe/xe_ring_ops.c | 38 +- drivers/gpu/drm/xe/xe_sched_job.c | 74 +- drivers/gpu/drm/xe/xe_sched_job.h | 4 +- drivers/gpu/drm/xe/xe_sched_job_types.h | 6 +- drivers/gpu/drm/xe/xe_trace.h | 140 ++-- drivers/gpu/drm/xe/xe_vm.c | 192 ++--- drivers/gpu/drm/xe/xe_vm.h | 4 +- drivers/gpu/drm/xe/xe_vm_types.h | 16 +- include/uapi/drm/xe_drm.h | 86 +- 47 files changed, 1720 insertions(+), 1721 deletions(-) delete mode 100644 drivers/gpu/drm/xe/xe_engine_types.h create mode 100644 drivers/gpu/drm/xe/xe_exec_queue_types.h delete mode 100644 drivers/gpu/drm/xe/xe_guc_engine_types.h create mode 100644 drivers/gpu/drm/xe/xe_guc_exec_queue_types.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index 9e9b228fe315..5c8d5e78d9bc 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -38,7 +38,7 @@ static int run_sanity_job(struct xe_migrate *m, struct xe_device *xe, struct kunit *test) { u64 batch_base = xe_migrate_batch_base(m, xe->info.supports_usm); - struct xe_sched_job *job = xe_bb_create_migration_job(m->eng, bb, + struct xe_sched_job *job = xe_bb_create_migration_job(m->q, bb, batch_base, second_idx); struct dma_fence *fence; @@ -215,7 +215,7 @@ static void test_pt_update(struct xe_migrate *m, struct xe_bo *pt, xe_map_memset(xe, &pt->vmap, 0, (u8)expected, pt->size); then = ktime_get(); - fence = xe_migrate_update_pgtables(m, NULL, NULL, m->eng, &update, 1, + fence = xe_migrate_update_pgtables(m, NULL, NULL, m->q, &update, 1, NULL, 0, &pt_update); now = ktime_get(); if (sanity_fence_failed(xe, fence, "Migration pagetable update", test)) @@ -257,7 +257,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) return; } - big = xe_bo_create_pin_map(xe, tile, m->eng->vm, SZ_4M, + big = xe_bo_create_pin_map(xe, tile, m->q->vm, SZ_4M, ttm_bo_type_kernel, XE_BO_CREATE_VRAM_IF_DGFX(tile) | XE_BO_CREATE_PINNED_BIT); @@ -266,7 +266,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) goto vunmap; } - pt = xe_bo_create_pin_map(xe, tile, m->eng->vm, XE_PAGE_SIZE, + pt = xe_bo_create_pin_map(xe, tile, m->q->vm, XE_PAGE_SIZE, ttm_bo_type_kernel, XE_BO_CREATE_VRAM_IF_DGFX(tile) | XE_BO_CREATE_PINNED_BIT); @@ -276,7 +276,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) goto free_big; } - tiny = xe_bo_create_pin_map(xe, tile, m->eng->vm, + tiny = xe_bo_create_pin_map(xe, tile, m->q->vm, 2 * SZ_4K, ttm_bo_type_kernel, XE_BO_CREATE_VRAM_IF_DGFX(tile) | @@ -295,14 +295,14 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) } kunit_info(test, "Starting tests, top level PT addr: %lx, special pagetable base addr: %lx\n", - (unsigned long)xe_bo_main_addr(m->eng->vm->pt_root[id]->bo, XE_PAGE_SIZE), + (unsigned long)xe_bo_main_addr(m->q->vm->pt_root[id]->bo, XE_PAGE_SIZE), (unsigned long)xe_bo_main_addr(m->pt_bo, XE_PAGE_SIZE)); /* First part of the test, are we updating our pagetable bo with a new entry? */ xe_map_wr(xe, &bo->vmap, XE_PAGE_SIZE * (NUM_KERNEL_PDE - 1), u64, 0xdeaddeadbeefbeef); expected = xe_pte_encode(pt, 0, XE_CACHE_WB, 0); - if (m->eng->vm->flags & XE_VM_FLAG_64K) + if (m->q->vm->flags & XE_VM_FLAG_64K) expected |= XE_PTE_PS64; if (xe_bo_is_vram(pt)) xe_res_first(pt->ttm.resource, 0, pt->size, &src_it); @@ -399,11 +399,11 @@ static int migrate_test_run_device(struct xe_device *xe) struct ww_acquire_ctx ww; kunit_info(test, "Testing tile id %d.\n", id); - xe_vm_lock(m->eng->vm, &ww, 0, true); + xe_vm_lock(m->q->vm, &ww, 0, true); xe_device_mem_access_get(xe); xe_migrate_sanity_test(m, test); xe_device_mem_access_put(xe); - xe_vm_unlock(m->eng->vm, &ww); + xe_vm_unlock(m->q->vm, &ww); } return 0; diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c index b15a7cb7db4c..38f4ce83a207 100644 --- a/drivers/gpu/drm/xe/xe_bb.c +++ b/drivers/gpu/drm/xe/xe_bb.c @@ -7,7 +7,7 @@ #include "regs/xe_gpu_commands.h" #include "xe_device.h" -#include "xe_engine_types.h" +#include "xe_exec_queue_types.h" #include "xe_gt.h" #include "xe_hw_fence.h" #include "xe_sa.h" @@ -60,30 +60,30 @@ err: } static struct xe_sched_job * -__xe_bb_create_job(struct xe_engine *kernel_eng, struct xe_bb *bb, u64 *addr) +__xe_bb_create_job(struct xe_exec_queue *q, struct xe_bb *bb, u64 *addr) { u32 size = drm_suballoc_size(bb->bo); bb->cs[bb->len++] = MI_BATCH_BUFFER_END; - WARN_ON(bb->len * 4 + bb_prefetch(kernel_eng->gt) > size); + WARN_ON(bb->len * 4 + bb_prefetch(q->gt) > size); xe_sa_bo_flush_write(bb->bo); - return xe_sched_job_create(kernel_eng, addr); + return xe_sched_job_create(q, addr); } -struct xe_sched_job *xe_bb_create_wa_job(struct xe_engine *wa_eng, +struct xe_sched_job *xe_bb_create_wa_job(struct xe_exec_queue *q, struct xe_bb *bb, u64 batch_base_ofs) { u64 addr = batch_base_ofs + drm_suballoc_soffset(bb->bo); - XE_WARN_ON(!(wa_eng->vm->flags & XE_VM_FLAG_MIGRATION)); + XE_WARN_ON(!(q->vm->flags & XE_VM_FLAG_MIGRATION)); - return __xe_bb_create_job(wa_eng, bb, &addr); + return __xe_bb_create_job(q, bb, &addr); } -struct xe_sched_job *xe_bb_create_migration_job(struct xe_engine *kernel_eng, +struct xe_sched_job *xe_bb_create_migration_job(struct xe_exec_queue *q, struct xe_bb *bb, u64 batch_base_ofs, u32 second_idx) @@ -95,18 +95,18 @@ struct xe_sched_job *xe_bb_create_migration_job(struct xe_engine *kernel_eng, }; XE_WARN_ON(second_idx > bb->len); - XE_WARN_ON(!(kernel_eng->vm->flags & XE_VM_FLAG_MIGRATION)); + XE_WARN_ON(!(q->vm->flags & XE_VM_FLAG_MIGRATION)); - return __xe_bb_create_job(kernel_eng, bb, addr); + return __xe_bb_create_job(q, bb, addr); } -struct xe_sched_job *xe_bb_create_job(struct xe_engine *kernel_eng, +struct xe_sched_job *xe_bb_create_job(struct xe_exec_queue *q, struct xe_bb *bb) { u64 addr = xe_sa_bo_gpu_addr(bb->bo); - XE_WARN_ON(kernel_eng->vm && kernel_eng->vm->flags & XE_VM_FLAG_MIGRATION); - return __xe_bb_create_job(kernel_eng, bb, &addr); + XE_WARN_ON(q->vm && q->vm->flags & XE_VM_FLAG_MIGRATION); + return __xe_bb_create_job(q, bb, &addr); } void xe_bb_free(struct xe_bb *bb, struct dma_fence *fence) diff --git a/drivers/gpu/drm/xe/xe_bb.h b/drivers/gpu/drm/xe/xe_bb.h index 0cc9260c9634..c5ae0770bab5 100644 --- a/drivers/gpu/drm/xe/xe_bb.h +++ b/drivers/gpu/drm/xe/xe_bb.h @@ -11,16 +11,16 @@ struct dma_fence; struct xe_gt; -struct xe_engine; +struct xe_exec_queue; struct xe_sched_job; struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 size, bool usm); -struct xe_sched_job *xe_bb_create_job(struct xe_engine *kernel_eng, +struct xe_sched_job *xe_bb_create_job(struct xe_exec_queue *q, struct xe_bb *bb); -struct xe_sched_job *xe_bb_create_migration_job(struct xe_engine *kernel_eng, +struct xe_sched_job *xe_bb_create_migration_job(struct xe_exec_queue *q, struct xe_bb *bb, u64 batch_ofs, u32 second_idx); -struct xe_sched_job *xe_bb_create_wa_job(struct xe_engine *wa_eng, +struct xe_sched_job *xe_bb_create_wa_job(struct xe_exec_queue *q, struct xe_bb *bb, u64 batch_ofs); void xe_bb_free(struct xe_bb *bb, struct dma_fence *fence); diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index 61ff97ea7659..68abc0b195be 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -53,9 +53,9 @@ static struct xe_device *coredump_to_xe(const struct xe_devcoredump *coredump) return container_of(coredump, struct xe_device, devcoredump); } -static struct xe_guc *engine_to_guc(struct xe_engine *e) +static struct xe_guc *exec_queue_to_guc(struct xe_exec_queue *q) { - return &e->gt->uc.guc; + return &q->gt->uc.guc; } static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, @@ -91,7 +91,7 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, drm_printf(&p, "\n**** GuC CT ****\n"); xe_guc_ct_snapshot_print(coredump->snapshot.ct, &p); - xe_guc_engine_snapshot_print(coredump->snapshot.ge, &p); + xe_guc_exec_queue_snapshot_print(coredump->snapshot.ge, &p); drm_printf(&p, "\n**** HW Engines ****\n"); for (i = 0; i < XE_NUM_HW_ENGINES; i++) @@ -112,7 +112,7 @@ static void xe_devcoredump_free(void *data) return; xe_guc_ct_snapshot_free(coredump->snapshot.ct); - xe_guc_engine_snapshot_free(coredump->snapshot.ge); + xe_guc_exec_queue_snapshot_free(coredump->snapshot.ge); for (i = 0; i < XE_NUM_HW_ENGINES; i++) if (coredump->snapshot.hwe[i]) xe_hw_engine_snapshot_free(coredump->snapshot.hwe[i]); @@ -123,14 +123,14 @@ static void xe_devcoredump_free(void *data) } static void devcoredump_snapshot(struct xe_devcoredump *coredump, - struct xe_engine *e) + struct xe_exec_queue *q) { struct xe_devcoredump_snapshot *ss = &coredump->snapshot; - struct xe_guc *guc = engine_to_guc(e); + struct xe_guc *guc = exec_queue_to_guc(q); struct xe_hw_engine *hwe; enum xe_hw_engine_id id; - u32 adj_logical_mask = e->logical_mask; - u32 width_mask = (0x1 << e->width) - 1; + u32 adj_logical_mask = q->logical_mask; + u32 width_mask = (0x1 << q->width) - 1; int i; bool cookie; @@ -138,22 +138,22 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, ss->boot_time = ktime_get_boottime(); cookie = dma_fence_begin_signalling(); - for (i = 0; e->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) { + for (i = 0; q->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) { if (adj_logical_mask & BIT(i)) { adj_logical_mask |= width_mask << i; - i += e->width; + i += q->width; } else { ++i; } } - xe_force_wake_get(gt_to_fw(e->gt), XE_FORCEWAKE_ALL); + xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); coredump->snapshot.ct = xe_guc_ct_snapshot_capture(&guc->ct, true); - coredump->snapshot.ge = xe_guc_engine_snapshot_capture(e); + coredump->snapshot.ge = xe_guc_exec_queue_snapshot_capture(q); - for_each_hw_engine(hwe, e->gt, id) { - if (hwe->class != e->hwe->class || + for_each_hw_engine(hwe, q->gt, id) { + if (hwe->class != q->hwe->class || !(BIT(hwe->logical_instance) & adj_logical_mask)) { coredump->snapshot.hwe[id] = NULL; continue; @@ -161,21 +161,21 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, coredump->snapshot.hwe[id] = xe_hw_engine_snapshot_capture(hwe); } - xe_force_wake_put(gt_to_fw(e->gt), XE_FORCEWAKE_ALL); + xe_force_wake_put(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); dma_fence_end_signalling(cookie); } /** * xe_devcoredump - Take the required snapshots and initialize coredump device. - * @e: The faulty xe_engine, where the issue was detected. + * @q: The faulty xe_exec_queue, where the issue was detected. * * This function should be called at the crash time within the serialized * gt_reset. It is skipped if we still have the core dump device available * with the information of the 'first' snapshot. */ -void xe_devcoredump(struct xe_engine *e) +void xe_devcoredump(struct xe_exec_queue *q) { - struct xe_device *xe = gt_to_xe(e->gt); + struct xe_device *xe = gt_to_xe(q->gt); struct xe_devcoredump *coredump = &xe->devcoredump; if (coredump->captured) { @@ -184,7 +184,7 @@ void xe_devcoredump(struct xe_engine *e) } coredump->captured = true; - devcoredump_snapshot(coredump, e); + devcoredump_snapshot(coredump, q); drm_info(&xe->drm, "Xe device coredump has been created\n"); drm_info(&xe->drm, "Check your /sys/class/drm/card%d/device/devcoredump/data\n", diff --git a/drivers/gpu/drm/xe/xe_devcoredump.h b/drivers/gpu/drm/xe/xe_devcoredump.h index 854882129227..6ac218a5c194 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.h +++ b/drivers/gpu/drm/xe/xe_devcoredump.h @@ -7,12 +7,12 @@ #define _XE_DEVCOREDUMP_H_ struct xe_device; -struct xe_engine; +struct xe_exec_queue; #ifdef CONFIG_DEV_COREDUMP -void xe_devcoredump(struct xe_engine *e); +void xe_devcoredump(struct xe_exec_queue *q); #else -static inline void xe_devcoredump(struct xe_engine *e) +static inline void xe_devcoredump(struct xe_exec_queue *q) { } #endif diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h index c0d711eb6ab3..7fdad9c3d3dd 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump_types.h +++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h @@ -30,7 +30,7 @@ struct xe_devcoredump_snapshot { /** @ct: GuC CT snapshot */ struct xe_guc_ct_snapshot *ct; /** @ge: Guc Engine snapshot */ - struct xe_guc_submit_engine_snapshot *ge; + struct xe_guc_submit_exec_queue_snapshot *ge; /** @hwe: HW Engine snapshot array */ struct xe_hw_engine_snapshot *hwe[XE_NUM_HW_ENGINES]; }; diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index a8ab86379ed6..df1953759c67 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -53,33 +53,33 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file) mutex_init(&xef->vm.lock); xa_init_flags(&xef->vm.xa, XA_FLAGS_ALLOC1); - mutex_init(&xef->engine.lock); - xa_init_flags(&xef->engine.xa, XA_FLAGS_ALLOC1); + mutex_init(&xef->exec_queue.lock); + xa_init_flags(&xef->exec_queue.xa, XA_FLAGS_ALLOC1); file->driver_priv = xef; return 0; } -static void device_kill_persistent_engines(struct xe_device *xe, - struct xe_file *xef); +static void device_kill_persistent_exec_queues(struct xe_device *xe, + struct xe_file *xef); static void xe_file_close(struct drm_device *dev, struct drm_file *file) { struct xe_device *xe = to_xe_device(dev); struct xe_file *xef = file->driver_priv; struct xe_vm *vm; - struct xe_engine *e; + struct xe_exec_queue *q; unsigned long idx; - mutex_lock(&xef->engine.lock); - xa_for_each(&xef->engine.xa, idx, e) { - xe_engine_kill(e); - xe_engine_put(e); + mutex_lock(&xef->exec_queue.lock); + xa_for_each(&xef->exec_queue.xa, idx, q) { + xe_exec_queue_kill(q); + xe_exec_queue_put(q); } - mutex_unlock(&xef->engine.lock); - xa_destroy(&xef->engine.xa); - mutex_destroy(&xef->engine.lock); - device_kill_persistent_engines(xe, xef); + mutex_unlock(&xef->exec_queue.lock); + xa_destroy(&xef->exec_queue.xa); + mutex_destroy(&xef->exec_queue.lock); + device_kill_persistent_exec_queues(xe, xef); mutex_lock(&xef->vm.lock); xa_for_each(&xef->vm.xa, idx, vm) @@ -99,15 +99,15 @@ static const struct drm_ioctl_desc xe_ioctls[] = { DRM_IOCTL_DEF_DRV(XE_VM_CREATE, xe_vm_create_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(XE_VM_DESTROY, xe_vm_destroy_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(XE_VM_BIND, xe_vm_bind_ioctl, DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(XE_ENGINE_CREATE, xe_engine_create_ioctl, + DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_CREATE, xe_exec_queue_create_ioctl, DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(XE_ENGINE_GET_PROPERTY, xe_engine_get_property_ioctl, + DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_GET_PROPERTY, xe_exec_queue_get_property_ioctl, DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(XE_ENGINE_DESTROY, xe_engine_destroy_ioctl, + DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_DESTROY, xe_exec_queue_destroy_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(XE_EXEC, xe_exec_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(XE_MMIO, xe_mmio_ioctl, DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(XE_ENGINE_SET_PROPERTY, xe_engine_set_property_ioctl, + DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_SET_PROPERTY, xe_exec_queue_set_property_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl, DRM_RENDER_ALLOW), @@ -324,33 +324,33 @@ void xe_device_shutdown(struct xe_device *xe) { } -void xe_device_add_persistent_engines(struct xe_device *xe, struct xe_engine *e) +void xe_device_add_persistent_exec_queues(struct xe_device *xe, struct xe_exec_queue *q) { mutex_lock(&xe->persistent_engines.lock); - list_add_tail(&e->persistent.link, &xe->persistent_engines.list); + list_add_tail(&q->persistent.link, &xe->persistent_engines.list); mutex_unlock(&xe->persistent_engines.lock); } -void xe_device_remove_persistent_engines(struct xe_device *xe, - struct xe_engine *e) +void xe_device_remove_persistent_exec_queues(struct xe_device *xe, + struct xe_exec_queue *q) { mutex_lock(&xe->persistent_engines.lock); - if (!list_empty(&e->persistent.link)) - list_del(&e->persistent.link); + if (!list_empty(&q->persistent.link)) + list_del(&q->persistent.link); mutex_unlock(&xe->persistent_engines.lock); } -static void device_kill_persistent_engines(struct xe_device *xe, - struct xe_file *xef) +static void device_kill_persistent_exec_queues(struct xe_device *xe, + struct xe_file *xef) { - struct xe_engine *e, *next; + struct xe_exec_queue *q, *next; mutex_lock(&xe->persistent_engines.lock); - list_for_each_entry_safe(e, next, &xe->persistent_engines.list, + list_for_each_entry_safe(q, next, &xe->persistent_engines.list, persistent.link) - if (e->persistent.xef == xef) { - xe_engine_kill(e); - list_del_init(&e->persistent.link); + if (q->persistent.xef == xef) { + xe_exec_queue_kill(q); + list_del_init(&q->persistent.link); } mutex_unlock(&xe->persistent_engines.lock); } diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index 61a5cf1f7300..71582094834c 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -6,7 +6,7 @@ #ifndef _XE_DEVICE_H_ #define _XE_DEVICE_H_ -struct xe_engine; +struct xe_exec_queue; struct xe_file; #include @@ -41,9 +41,9 @@ int xe_device_probe(struct xe_device *xe); void xe_device_remove(struct xe_device *xe); void xe_device_shutdown(struct xe_device *xe); -void xe_device_add_persistent_engines(struct xe_device *xe, struct xe_engine *e); -void xe_device_remove_persistent_engines(struct xe_device *xe, - struct xe_engine *e); +void xe_device_add_persistent_exec_queues(struct xe_device *xe, struct xe_exec_queue *q); +void xe_device_remove_persistent_exec_queues(struct xe_device *xe, + struct xe_exec_queue *q); void xe_device_wmb(struct xe_device *xe); diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index c521ffaf3871..128e0a953692 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -377,13 +377,13 @@ struct xe_file { struct mutex lock; } vm; - /** @engine: Submission engine state for file */ + /** @exec_queue: Submission exec queue state for file */ struct { /** @xe: xarray to store engines */ struct xarray xa; /** @lock: protects file engine state */ struct mutex lock; - } engine; + } exec_queue; }; #endif diff --git a/drivers/gpu/drm/xe/xe_engine_types.h b/drivers/gpu/drm/xe/xe_engine_types.h deleted file mode 100644 index f1d531735f6d..000000000000 --- a/drivers/gpu/drm/xe/xe_engine_types.h +++ /dev/null @@ -1,209 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2022 Intel Corporation - */ - -#ifndef _XE_ENGINE_TYPES_H_ -#define _XE_ENGINE_TYPES_H_ - -#include - -#include - -#include "xe_gpu_scheduler_types.h" -#include "xe_hw_engine_types.h" -#include "xe_hw_fence_types.h" -#include "xe_lrc_types.h" - -struct xe_execlist_engine; -struct xe_gt; -struct xe_guc_engine; -struct xe_hw_engine; -struct xe_vm; - -enum xe_engine_priority { - XE_ENGINE_PRIORITY_UNSET = -2, /* For execlist usage only */ - XE_ENGINE_PRIORITY_LOW = 0, - XE_ENGINE_PRIORITY_NORMAL, - XE_ENGINE_PRIORITY_HIGH, - XE_ENGINE_PRIORITY_KERNEL, - - XE_ENGINE_PRIORITY_COUNT -}; - -/** - * struct xe_engine - Submission engine - * - * Contains all state necessary for submissions. Can either be a user object or - * a kernel object. - */ -struct xe_engine { - /** @gt: graphics tile this engine can submit to */ - struct xe_gt *gt; - /** - * @hwe: A hardware of the same class. May (physical engine) or may not - * (virtual engine) be where jobs actual engine up running. Should never - * really be used for submissions. - */ - struct xe_hw_engine *hwe; - /** @refcount: ref count of this engine */ - struct kref refcount; - /** @vm: VM (address space) for this engine */ - struct xe_vm *vm; - /** @class: class of this engine */ - enum xe_engine_class class; - /** @priority: priority of this exec queue */ - enum xe_engine_priority priority; - /** - * @logical_mask: logical mask of where job submitted to engine can run - */ - u32 logical_mask; - /** @name: name of this engine */ - char name[MAX_FENCE_NAME_LEN]; - /** @width: width (number BB submitted per exec) of this engine */ - u16 width; - /** @fence_irq: fence IRQ used to signal job completion */ - struct xe_hw_fence_irq *fence_irq; - -#define ENGINE_FLAG_BANNED BIT(0) -#define ENGINE_FLAG_KERNEL BIT(1) -#define ENGINE_FLAG_PERSISTENT BIT(2) -#define ENGINE_FLAG_COMPUTE_MODE BIT(3) -/* Caller needs to hold rpm ref when creating engine with ENGINE_FLAG_VM */ -#define ENGINE_FLAG_VM BIT(4) -#define ENGINE_FLAG_BIND_ENGINE_CHILD BIT(5) -#define ENGINE_FLAG_WA BIT(6) - - /** - * @flags: flags for this engine, should statically setup aside from ban - * bit - */ - unsigned long flags; - - union { - /** @multi_gt_list: list head for VM bind engines if multi-GT */ - struct list_head multi_gt_list; - /** @multi_gt_link: link for VM bind engines if multi-GT */ - struct list_head multi_gt_link; - }; - - union { - /** @execlist: execlist backend specific state for engine */ - struct xe_execlist_engine *execlist; - /** @guc: GuC backend specific state for engine */ - struct xe_guc_engine *guc; - }; - - /** - * @persistent: persistent engine state - */ - struct { - /** @xef: file which this engine belongs to */ - struct xe_file *xef; - /** @link: link in list of persistent engines */ - struct list_head link; - } persistent; - - union { - /** - * @parallel: parallel submission state - */ - struct { - /** @composite_fence_ctx: context composite fence */ - u64 composite_fence_ctx; - /** @composite_fence_seqno: seqno for composite fence */ - u32 composite_fence_seqno; - } parallel; - /** - * @bind: bind submission state - */ - struct { - /** @fence_ctx: context bind fence */ - u64 fence_ctx; - /** @fence_seqno: seqno for bind fence */ - u32 fence_seqno; - } bind; - }; - - /** @sched_props: scheduling properties */ - struct { - /** @timeslice_us: timeslice period in micro-seconds */ - u32 timeslice_us; - /** @preempt_timeout_us: preemption timeout in micro-seconds */ - u32 preempt_timeout_us; - } sched_props; - - /** @compute: compute engine state */ - struct { - /** @pfence: preemption fence */ - struct dma_fence *pfence; - /** @context: preemption fence context */ - u64 context; - /** @seqno: preemption fence seqno */ - u32 seqno; - /** @link: link into VM's list of engines */ - struct list_head link; - /** @lock: preemption fences lock */ - spinlock_t lock; - } compute; - - /** @usm: unified shared memory state */ - struct { - /** @acc_trigger: access counter trigger */ - u32 acc_trigger; - /** @acc_notify: access counter notify */ - u32 acc_notify; - /** @acc_granularity: access counter granularity */ - u32 acc_granularity; - } usm; - - /** @ops: submission backend engine operations */ - const struct xe_engine_ops *ops; - - /** @ring_ops: ring operations for this engine */ - const struct xe_ring_ops *ring_ops; - /** @entity: DRM sched entity for this engine (1 to 1 relationship) */ - struct drm_sched_entity *entity; - /** @lrc: logical ring context for this engine */ - struct xe_lrc lrc[]; -}; - -/** - * struct xe_engine_ops - Submission backend engine operations - */ -struct xe_engine_ops { - /** @init: Initialize engine for submission backend */ - int (*init)(struct xe_engine *e); - /** @kill: Kill inflight submissions for backend */ - void (*kill)(struct xe_engine *e); - /** @fini: Fini engine for submission backend */ - void (*fini)(struct xe_engine *e); - /** @set_priority: Set priority for engine */ - int (*set_priority)(struct xe_engine *e, - enum xe_engine_priority priority); - /** @set_timeslice: Set timeslice for engine */ - int (*set_timeslice)(struct xe_engine *e, u32 timeslice_us); - /** @set_preempt_timeout: Set preemption timeout for engine */ - int (*set_preempt_timeout)(struct xe_engine *e, u32 preempt_timeout_us); - /** @set_job_timeout: Set job timeout for engine */ - int (*set_job_timeout)(struct xe_engine *e, u32 job_timeout_ms); - /** - * @suspend: Suspend engine from executing, allowed to be called - * multiple times in a row before resume with the caveat that - * suspend_wait returns before calling suspend again. - */ - int (*suspend)(struct xe_engine *e); - /** - * @suspend_wait: Wait for an engine to suspend executing, should be - * call after suspend. - */ - void (*suspend_wait)(struct xe_engine *e); - /** - * @resume: Resume engine execution, engine must be in a suspended - * state and dma fence returned from most recent suspend call must be - * signalled when this function is called. - */ - void (*resume)(struct xe_engine *e); -}; - -#endif diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index a043c649249b..629d81a789e7 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -95,19 +95,19 @@ #define XE_EXEC_BIND_RETRY_TIMEOUT_MS 1000 -static int xe_exec_begin(struct xe_engine *e, struct ww_acquire_ctx *ww, +static int xe_exec_begin(struct xe_exec_queue *q, struct ww_acquire_ctx *ww, struct ttm_validate_buffer tv_onstack[], struct ttm_validate_buffer **tv, struct list_head *objs) { - struct xe_vm *vm = e->vm; + struct xe_vm *vm = q->vm; struct xe_vma *vma; LIST_HEAD(dups); ktime_t end = 0; int err = 0; *tv = NULL; - if (xe_vm_no_dma_fences(e->vm)) + if (xe_vm_no_dma_fences(q->vm)) return 0; retry: @@ -153,14 +153,14 @@ retry: return err; } -static void xe_exec_end(struct xe_engine *e, +static void xe_exec_end(struct xe_exec_queue *q, struct ttm_validate_buffer *tv_onstack, struct ttm_validate_buffer *tv, struct ww_acquire_ctx *ww, struct list_head *objs) { - if (!xe_vm_no_dma_fences(e->vm)) - xe_vm_unlock_dma_resv(e->vm, tv_onstack, tv, ww, objs); + if (!xe_vm_no_dma_fences(q->vm)) + xe_vm_unlock_dma_resv(q->vm, tv_onstack, tv, ww, objs); } int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) @@ -170,7 +170,7 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) struct drm_xe_exec *args = data; struct drm_xe_sync __user *syncs_user = u64_to_user_ptr(args->syncs); u64 __user *addresses_user = u64_to_user_ptr(args->address); - struct xe_engine *engine; + struct xe_exec_queue *q; struct xe_sync_entry *syncs = NULL; u64 addresses[XE_HW_ENGINE_MAX_INSTANCE]; struct ttm_validate_buffer tv_onstack[XE_ONSTACK_TV]; @@ -189,30 +189,30 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) return -EINVAL; - engine = xe_engine_lookup(xef, args->engine_id); - if (XE_IOCTL_DBG(xe, !engine)) + q = xe_exec_queue_lookup(xef, args->exec_queue_id); + if (XE_IOCTL_DBG(xe, !q)) return -ENOENT; - if (XE_IOCTL_DBG(xe, engine->flags & ENGINE_FLAG_VM)) + if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_VM)) return -EINVAL; - if (XE_IOCTL_DBG(xe, engine->width != args->num_batch_buffer)) + if (XE_IOCTL_DBG(xe, q->width != args->num_batch_buffer)) return -EINVAL; - if (XE_IOCTL_DBG(xe, engine->flags & ENGINE_FLAG_BANNED)) { + if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_BANNED)) { err = -ECANCELED; - goto err_engine; + goto err_exec_queue; } if (args->num_syncs) { syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL); if (!syncs) { err = -ENOMEM; - goto err_engine; + goto err_exec_queue; } } - vm = engine->vm; + vm = q->vm; for (i = 0; i < args->num_syncs; i++) { err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs++], @@ -222,9 +222,9 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) goto err_syncs; } - if (xe_engine_is_parallel(engine)) { + if (xe_exec_queue_is_parallel(q)) { err = __copy_from_user(addresses, addresses_user, sizeof(u64) * - engine->width); + q->width); if (err) { err = -EFAULT; goto err_syncs; @@ -294,26 +294,26 @@ retry: goto err_unlock_list; } - err = xe_exec_begin(engine, &ww, tv_onstack, &tv, &objs); + err = xe_exec_begin(q, &ww, tv_onstack, &tv, &objs); if (err) goto err_unlock_list; - if (xe_vm_is_closed_or_banned(engine->vm)) { + if (xe_vm_is_closed_or_banned(q->vm)) { drm_warn(&xe->drm, "Trying to schedule after vm is closed or banned\n"); err = -ECANCELED; - goto err_engine_end; + goto err_exec_queue_end; } - if (xe_engine_is_lr(engine) && xe_engine_ring_full(engine)) { + if (xe_exec_queue_is_lr(q) && xe_exec_queue_ring_full(q)) { err = -EWOULDBLOCK; - goto err_engine_end; + goto err_exec_queue_end; } - job = xe_sched_job_create(engine, xe_engine_is_parallel(engine) ? + job = xe_sched_job_create(q, xe_exec_queue_is_parallel(q) ? addresses : &args->address); if (IS_ERR(job)) { err = PTR_ERR(job); - goto err_engine_end; + goto err_exec_queue_end; } /* @@ -395,8 +395,8 @@ retry: xe_sync_entry_signal(&syncs[i], job, &job->drm.s_fence->finished); - if (xe_engine_is_lr(engine)) - engine->ring_ops->emit_job(job); + if (xe_exec_queue_is_lr(q)) + q->ring_ops->emit_job(job); xe_sched_job_push(job); xe_vm_reactivate_rebind(vm); @@ -412,8 +412,8 @@ err_repin: err_put_job: if (err) xe_sched_job_put(job); -err_engine_end: - xe_exec_end(engine, tv_onstack, tv, &ww, &objs); +err_exec_queue_end: + xe_exec_end(q, tv_onstack, tv, &ww, &objs); err_unlock_list: if (write_locked) up_write(&vm->lock); @@ -425,8 +425,8 @@ err_syncs: for (i = 0; i < num_syncs; i++) xe_sync_entry_cleanup(&syncs[i]); kfree(syncs); -err_engine: - xe_engine_put(engine); +err_exec_queue: + xe_exec_queue_put(q); return err; } diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index f1cfc4b604d4..1371829b9e35 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -22,57 +22,57 @@ #include "xe_trace.h" #include "xe_vm.h" -static struct xe_engine *__xe_engine_create(struct xe_device *xe, - struct xe_vm *vm, - u32 logical_mask, - u16 width, struct xe_hw_engine *hwe, - u32 flags) +static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe, + struct xe_vm *vm, + u32 logical_mask, + u16 width, struct xe_hw_engine *hwe, + u32 flags) { - struct xe_engine *e; + struct xe_exec_queue *q; struct xe_gt *gt = hwe->gt; int err; int i; - e = kzalloc(sizeof(*e) + sizeof(struct xe_lrc) * width, GFP_KERNEL); - if (!e) + q = kzalloc(sizeof(*q) + sizeof(struct xe_lrc) * width, GFP_KERNEL); + if (!q) return ERR_PTR(-ENOMEM); - kref_init(&e->refcount); - e->flags = flags; - e->hwe = hwe; - e->gt = gt; + kref_init(&q->refcount); + q->flags = flags; + q->hwe = hwe; + q->gt = gt; if (vm) - e->vm = xe_vm_get(vm); - e->class = hwe->class; - e->width = width; - e->logical_mask = logical_mask; - e->fence_irq = >->fence_irq[hwe->class]; - e->ring_ops = gt->ring_ops[hwe->class]; - e->ops = gt->engine_ops; - INIT_LIST_HEAD(&e->persistent.link); - INIT_LIST_HEAD(&e->compute.link); - INIT_LIST_HEAD(&e->multi_gt_link); + q->vm = xe_vm_get(vm); + q->class = hwe->class; + q->width = width; + q->logical_mask = logical_mask; + q->fence_irq = >->fence_irq[hwe->class]; + q->ring_ops = gt->ring_ops[hwe->class]; + q->ops = gt->exec_queue_ops; + INIT_LIST_HEAD(&q->persistent.link); + INIT_LIST_HEAD(&q->compute.link); + INIT_LIST_HEAD(&q->multi_gt_link); /* FIXME: Wire up to configurable default value */ - e->sched_props.timeslice_us = 1 * 1000; - e->sched_props.preempt_timeout_us = 640 * 1000; + q->sched_props.timeslice_us = 1 * 1000; + q->sched_props.preempt_timeout_us = 640 * 1000; - if (xe_engine_is_parallel(e)) { - e->parallel.composite_fence_ctx = dma_fence_context_alloc(1); - e->parallel.composite_fence_seqno = XE_FENCE_INITIAL_SEQNO; + if (xe_exec_queue_is_parallel(q)) { + q->parallel.composite_fence_ctx = dma_fence_context_alloc(1); + q->parallel.composite_fence_seqno = XE_FENCE_INITIAL_SEQNO; } - if (e->flags & ENGINE_FLAG_VM) { - e->bind.fence_ctx = dma_fence_context_alloc(1); - e->bind.fence_seqno = XE_FENCE_INITIAL_SEQNO; + if (q->flags & EXEC_QUEUE_FLAG_VM) { + q->bind.fence_ctx = dma_fence_context_alloc(1); + q->bind.fence_seqno = XE_FENCE_INITIAL_SEQNO; } for (i = 0; i < width; ++i) { - err = xe_lrc_init(e->lrc + i, hwe, e, vm, SZ_16K); + err = xe_lrc_init(q->lrc + i, hwe, q, vm, SZ_16K); if (err) goto err_lrc; } - err = e->ops->init(e); + err = q->ops->init(q); if (err) goto err_lrc; @@ -84,24 +84,24 @@ static struct xe_engine *__xe_engine_create(struct xe_device *xe, * can perform GuC CT actions when needed. Caller is expected to * have already grabbed the rpm ref outside any sensitive locks. */ - if (e->flags & ENGINE_FLAG_VM) + if (q->flags & EXEC_QUEUE_FLAG_VM) drm_WARN_ON(&xe->drm, !xe_device_mem_access_get_if_ongoing(xe)); - return e; + return q; err_lrc: for (i = i - 1; i >= 0; --i) - xe_lrc_finish(e->lrc + i); - kfree(e); + xe_lrc_finish(q->lrc + i); + kfree(q); return ERR_PTR(err); } -struct xe_engine *xe_engine_create(struct xe_device *xe, struct xe_vm *vm, - u32 logical_mask, u16 width, - struct xe_hw_engine *hwe, u32 flags) +struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm, + u32 logical_mask, u16 width, + struct xe_hw_engine *hwe, u32 flags) { struct ww_acquire_ctx ww; - struct xe_engine *e; + struct xe_exec_queue *q; int err; if (vm) { @@ -109,16 +109,16 @@ struct xe_engine *xe_engine_create(struct xe_device *xe, struct xe_vm *vm, if (err) return ERR_PTR(err); } - e = __xe_engine_create(xe, vm, logical_mask, width, hwe, flags); + q = __xe_exec_queue_create(xe, vm, logical_mask, width, hwe, flags); if (vm) xe_vm_unlock(vm, &ww); - return e; + return q; } -struct xe_engine *xe_engine_create_class(struct xe_device *xe, struct xe_gt *gt, - struct xe_vm *vm, - enum xe_engine_class class, u32 flags) +struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt, + struct xe_vm *vm, + enum xe_engine_class class, u32 flags) { struct xe_hw_engine *hwe, *hwe0 = NULL; enum xe_hw_engine_id id; @@ -138,102 +138,102 @@ struct xe_engine *xe_engine_create_class(struct xe_device *xe, struct xe_gt *gt, if (!logical_mask) return ERR_PTR(-ENODEV); - return xe_engine_create(xe, vm, logical_mask, 1, hwe0, flags); + return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags); } -void xe_engine_destroy(struct kref *ref) +void xe_exec_queue_destroy(struct kref *ref) { - struct xe_engine *e = container_of(ref, struct xe_engine, refcount); - struct xe_engine *engine, *next; + struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount); + struct xe_exec_queue *eq, *next; - if (!(e->flags & ENGINE_FLAG_BIND_ENGINE_CHILD)) { - list_for_each_entry_safe(engine, next, &e->multi_gt_list, + if (!(q->flags & EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD)) { + list_for_each_entry_safe(eq, next, &q->multi_gt_list, multi_gt_link) - xe_engine_put(engine); + xe_exec_queue_put(eq); } - e->ops->fini(e); + q->ops->fini(q); } -void xe_engine_fini(struct xe_engine *e) +void xe_exec_queue_fini(struct xe_exec_queue *q) { int i; - for (i = 0; i < e->width; ++i) - xe_lrc_finish(e->lrc + i); - if (e->vm) - xe_vm_put(e->vm); - if (e->flags & ENGINE_FLAG_VM) - xe_device_mem_access_put(gt_to_xe(e->gt)); + for (i = 0; i < q->width; ++i) + xe_lrc_finish(q->lrc + i); + if (q->vm) + xe_vm_put(q->vm); + if (q->flags & EXEC_QUEUE_FLAG_VM) + xe_device_mem_access_put(gt_to_xe(q->gt)); - kfree(e); + kfree(q); } -struct xe_engine *xe_engine_lookup(struct xe_file *xef, u32 id) +struct xe_exec_queue *xe_exec_queue_lookup(struct xe_file *xef, u32 id) { - struct xe_engine *e; + struct xe_exec_queue *q; - mutex_lock(&xef->engine.lock); - e = xa_load(&xef->engine.xa, id); - if (e) - xe_engine_get(e); - mutex_unlock(&xef->engine.lock); + mutex_lock(&xef->exec_queue.lock); + q = xa_load(&xef->exec_queue.xa, id); + if (q) + xe_exec_queue_get(q); + mutex_unlock(&xef->exec_queue.lock); - return e; + return q; } -enum xe_engine_priority -xe_engine_device_get_max_priority(struct xe_device *xe) +enum xe_exec_queue_priority +xe_exec_queue_device_get_max_priority(struct xe_device *xe) { - return capable(CAP_SYS_NICE) ? XE_ENGINE_PRIORITY_HIGH : - XE_ENGINE_PRIORITY_NORMAL; + return capable(CAP_SYS_NICE) ? XE_EXEC_QUEUE_PRIORITY_HIGH : + XE_EXEC_QUEUE_PRIORITY_NORMAL; } -static int engine_set_priority(struct xe_device *xe, struct xe_engine *e, - u64 value, bool create) +static int exec_queue_set_priority(struct xe_device *xe, struct xe_exec_queue *q, + u64 value, bool create) { - if (XE_IOCTL_DBG(xe, value > XE_ENGINE_PRIORITY_HIGH)) + if (XE_IOCTL_DBG(xe, value > XE_EXEC_QUEUE_PRIORITY_HIGH)) return -EINVAL; - if (XE_IOCTL_DBG(xe, value > xe_engine_device_get_max_priority(xe))) + if (XE_IOCTL_DBG(xe, value > xe_exec_queue_device_get_max_priority(xe))) return -EPERM; - return e->ops->set_priority(e, value); + return q->ops->set_priority(q, value); } -static int engine_set_timeslice(struct xe_device *xe, struct xe_engine *e, - u64 value, bool create) +static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue *q, + u64 value, bool create) { if (!capable(CAP_SYS_NICE)) return -EPERM; - return e->ops->set_timeslice(e, value); + return q->ops->set_timeslice(q, value); } -static int engine_set_preemption_timeout(struct xe_device *xe, - struct xe_engine *e, u64 value, - bool create) +static int exec_queue_set_preemption_timeout(struct xe_device *xe, + struct xe_exec_queue *q, u64 value, + bool create) { if (!capable(CAP_SYS_NICE)) return -EPERM; - return e->ops->set_preempt_timeout(e, value); + return q->ops->set_preempt_timeout(q, value); } -static int engine_set_compute_mode(struct xe_device *xe, struct xe_engine *e, - u64 value, bool create) +static int exec_queue_set_compute_mode(struct xe_device *xe, struct xe_exec_queue *q, + u64 value, bool create) { if (XE_IOCTL_DBG(xe, !create)) return -EINVAL; - if (XE_IOCTL_DBG(xe, e->flags & ENGINE_FLAG_COMPUTE_MODE)) + if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_COMPUTE_MODE)) return -EINVAL; - if (XE_IOCTL_DBG(xe, e->flags & ENGINE_FLAG_VM)) + if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_VM)) return -EINVAL; if (value) { - struct xe_vm *vm = e->vm; + struct xe_vm *vm = q->vm; int err; if (XE_IOCTL_DBG(xe, xe_vm_in_fault_mode(vm))) @@ -242,42 +242,42 @@ static int engine_set_compute_mode(struct xe_device *xe, struct xe_engine *e, if (XE_IOCTL_DBG(xe, !xe_vm_in_compute_mode(vm))) return -EOPNOTSUPP; - if (XE_IOCTL_DBG(xe, e->width != 1)) + if (XE_IOCTL_DBG(xe, q->width != 1)) return -EINVAL; - e->compute.context = dma_fence_context_alloc(1); - spin_lock_init(&e->compute.lock); + q->compute.context = dma_fence_context_alloc(1); + spin_lock_init(&q->compute.lock); - err = xe_vm_add_compute_engine(vm, e); + err = xe_vm_add_compute_exec_queue(vm, q); if (XE_IOCTL_DBG(xe, err)) return err; - e->flags |= ENGINE_FLAG_COMPUTE_MODE; - e->flags &= ~ENGINE_FLAG_PERSISTENT; + q->flags |= EXEC_QUEUE_FLAG_COMPUTE_MODE; + q->flags &= ~EXEC_QUEUE_FLAG_PERSISTENT; } return 0; } -static int engine_set_persistence(struct xe_device *xe, struct xe_engine *e, - u64 value, bool create) +static int exec_queue_set_persistence(struct xe_device *xe, struct xe_exec_queue *q, + u64 value, bool create) { if (XE_IOCTL_DBG(xe, !create)) return -EINVAL; - if (XE_IOCTL_DBG(xe, e->flags & ENGINE_FLAG_COMPUTE_MODE)) + if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_COMPUTE_MODE)) return -EINVAL; if (value) - e->flags |= ENGINE_FLAG_PERSISTENT; + q->flags |= EXEC_QUEUE_FLAG_PERSISTENT; else - e->flags &= ~ENGINE_FLAG_PERSISTENT; + q->flags &= ~EXEC_QUEUE_FLAG_PERSISTENT; return 0; } -static int engine_set_job_timeout(struct xe_device *xe, struct xe_engine *e, - u64 value, bool create) +static int exec_queue_set_job_timeout(struct xe_device *xe, struct xe_exec_queue *q, + u64 value, bool create) { if (XE_IOCTL_DBG(xe, !create)) return -EINVAL; @@ -285,11 +285,11 @@ static int engine_set_job_timeout(struct xe_device *xe, struct xe_engine *e, if (!capable(CAP_SYS_NICE)) return -EPERM; - return e->ops->set_job_timeout(e, value); + return q->ops->set_job_timeout(q, value); } -static int engine_set_acc_trigger(struct xe_device *xe, struct xe_engine *e, - u64 value, bool create) +static int exec_queue_set_acc_trigger(struct xe_device *xe, struct xe_exec_queue *q, + u64 value, bool create) { if (XE_IOCTL_DBG(xe, !create)) return -EINVAL; @@ -297,13 +297,13 @@ static int engine_set_acc_trigger(struct xe_device *xe, struct xe_engine *e, if (XE_IOCTL_DBG(xe, !xe->info.supports_usm)) return -EINVAL; - e->usm.acc_trigger = value; + q->usm.acc_trigger = value; return 0; } -static int engine_set_acc_notify(struct xe_device *xe, struct xe_engine *e, - u64 value, bool create) +static int exec_queue_set_acc_notify(struct xe_device *xe, struct xe_exec_queue *q, + u64 value, bool create) { if (XE_IOCTL_DBG(xe, !create)) return -EINVAL; @@ -311,13 +311,13 @@ static int engine_set_acc_notify(struct xe_device *xe, struct xe_engine *e, if (XE_IOCTL_DBG(xe, !xe->info.supports_usm)) return -EINVAL; - e->usm.acc_notify = value; + q->usm.acc_notify = value; return 0; } -static int engine_set_acc_granularity(struct xe_device *xe, struct xe_engine *e, - u64 value, bool create) +static int exec_queue_set_acc_granularity(struct xe_device *xe, struct xe_exec_queue *q, + u64 value, bool create) { if (XE_IOCTL_DBG(xe, !create)) return -EINVAL; @@ -325,34 +325,34 @@ static int engine_set_acc_granularity(struct xe_device *xe, struct xe_engine *e, if (XE_IOCTL_DBG(xe, !xe->info.supports_usm)) return -EINVAL; - e->usm.acc_granularity = value; + q->usm.acc_granularity = value; return 0; } -typedef int (*xe_engine_set_property_fn)(struct xe_device *xe, - struct xe_engine *e, - u64 value, bool create); - -static const xe_engine_set_property_fn engine_set_property_funcs[] = { - [XE_ENGINE_SET_PROPERTY_PRIORITY] = engine_set_priority, - [XE_ENGINE_SET_PROPERTY_TIMESLICE] = engine_set_timeslice, - [XE_ENGINE_SET_PROPERTY_PREEMPTION_TIMEOUT] = engine_set_preemption_timeout, - [XE_ENGINE_SET_PROPERTY_COMPUTE_MODE] = engine_set_compute_mode, - [XE_ENGINE_SET_PROPERTY_PERSISTENCE] = engine_set_persistence, - [XE_ENGINE_SET_PROPERTY_JOB_TIMEOUT] = engine_set_job_timeout, - [XE_ENGINE_SET_PROPERTY_ACC_TRIGGER] = engine_set_acc_trigger, - [XE_ENGINE_SET_PROPERTY_ACC_NOTIFY] = engine_set_acc_notify, - [XE_ENGINE_SET_PROPERTY_ACC_GRANULARITY] = engine_set_acc_granularity, +typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe, + struct xe_exec_queue *q, + u64 value, bool create); + +static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = { + [XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority, + [XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice, + [XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT] = exec_queue_set_preemption_timeout, + [XE_EXEC_QUEUE_SET_PROPERTY_COMPUTE_MODE] = exec_queue_set_compute_mode, + [XE_EXEC_QUEUE_SET_PROPERTY_PERSISTENCE] = exec_queue_set_persistence, + [XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT] = exec_queue_set_job_timeout, + [XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER] = exec_queue_set_acc_trigger, + [XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY] = exec_queue_set_acc_notify, + [XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY] = exec_queue_set_acc_granularity, }; -static int engine_user_ext_set_property(struct xe_device *xe, - struct xe_engine *e, - u64 extension, - bool create) +static int exec_queue_user_ext_set_property(struct xe_device *xe, + struct xe_exec_queue *q, + u64 extension, + bool create) { u64 __user *address = u64_to_user_ptr(extension); - struct drm_xe_ext_engine_set_property ext; + struct drm_xe_ext_exec_queue_set_property ext; int err; u32 idx; @@ -361,26 +361,26 @@ static int engine_user_ext_set_property(struct xe_device *xe, return -EFAULT; if (XE_IOCTL_DBG(xe, ext.property >= - ARRAY_SIZE(engine_set_property_funcs)) || + ARRAY_SIZE(exec_queue_set_property_funcs)) || XE_IOCTL_DBG(xe, ext.pad)) return -EINVAL; - idx = array_index_nospec(ext.property, ARRAY_SIZE(engine_set_property_funcs)); - return engine_set_property_funcs[idx](xe, e, ext.value, create); + idx = array_index_nospec(ext.property, ARRAY_SIZE(exec_queue_set_property_funcs)); + return exec_queue_set_property_funcs[idx](xe, q, ext.value, create); } -typedef int (*xe_engine_user_extension_fn)(struct xe_device *xe, - struct xe_engine *e, - u64 extension, - bool create); +typedef int (*xe_exec_queue_user_extension_fn)(struct xe_device *xe, + struct xe_exec_queue *q, + u64 extension, + bool create); -static const xe_engine_set_property_fn engine_user_extension_funcs[] = { - [XE_ENGINE_EXTENSION_SET_PROPERTY] = engine_user_ext_set_property, +static const xe_exec_queue_set_property_fn exec_queue_user_extension_funcs[] = { + [XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY] = exec_queue_user_ext_set_property, }; #define MAX_USER_EXTENSIONS 16 -static int engine_user_extensions(struct xe_device *xe, struct xe_engine *e, - u64 extensions, int ext_number, bool create) +static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q, + u64 extensions, int ext_number, bool create) { u64 __user *address = u64_to_user_ptr(extensions); struct xe_user_extension ext; @@ -396,17 +396,17 @@ static int engine_user_extensions(struct xe_device *xe, struct xe_engine *e, if (XE_IOCTL_DBG(xe, ext.pad) || XE_IOCTL_DBG(xe, ext.name >= - ARRAY_SIZE(engine_user_extension_funcs))) + ARRAY_SIZE(exec_queue_user_extension_funcs))) return -EINVAL; idx = array_index_nospec(ext.name, - ARRAY_SIZE(engine_user_extension_funcs)); - err = engine_user_extension_funcs[idx](xe, e, extensions, create); + ARRAY_SIZE(exec_queue_user_extension_funcs)); + err = exec_queue_user_extension_funcs[idx](xe, q, extensions, create); if (XE_IOCTL_DBG(xe, err)) return err; if (ext.next_extension) - return engine_user_extensions(xe, e, ext.next_extension, + return exec_queue_user_extensions(xe, q, ext.next_extension, ++ext_number, create); return 0; @@ -440,9 +440,9 @@ find_hw_engine(struct xe_device *xe, eci.engine_instance, true); } -static u32 bind_engine_logical_mask(struct xe_device *xe, struct xe_gt *gt, - struct drm_xe_engine_class_instance *eci, - u16 width, u16 num_placements) +static u32 bind_exec_queue_logical_mask(struct xe_device *xe, struct xe_gt *gt, + struct drm_xe_engine_class_instance *eci, + u16 width, u16 num_placements) { struct xe_hw_engine *hwe; enum xe_hw_engine_id id; @@ -520,19 +520,19 @@ static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt, return return_mask; } -int xe_engine_create_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) +int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) { struct xe_device *xe = to_xe_device(dev); struct xe_file *xef = to_xe_file(file); - struct drm_xe_engine_create *args = data; + struct drm_xe_exec_queue_create *args = data; struct drm_xe_engine_class_instance eci[XE_HW_ENGINE_MAX_INSTANCE]; struct drm_xe_engine_class_instance __user *user_eci = u64_to_user_ptr(args->instances); struct xe_hw_engine *hwe; struct xe_vm *vm, *migrate_vm; struct xe_gt *gt; - struct xe_engine *e = NULL; + struct xe_exec_queue *q = NULL; u32 logical_mask; u32 id; u32 len; @@ -557,15 +557,15 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data, if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) { for_each_gt(gt, xe, id) { - struct xe_engine *new; + struct xe_exec_queue *new; if (xe_gt_is_media_type(gt)) continue; eci[0].gt_id = gt->info.id; - logical_mask = bind_engine_logical_mask(xe, gt, eci, - args->width, - args->num_placements); + logical_mask = bind_exec_queue_logical_mask(xe, gt, eci, + args->width, + args->num_placements); if (XE_IOCTL_DBG(xe, !logical_mask)) return -EINVAL; @@ -577,28 +577,28 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data, xe_device_mem_access_get(xe); migrate_vm = xe_migrate_get_vm(gt_to_tile(gt)->migrate); - new = xe_engine_create(xe, migrate_vm, logical_mask, - args->width, hwe, - ENGINE_FLAG_PERSISTENT | - ENGINE_FLAG_VM | - (id ? - ENGINE_FLAG_BIND_ENGINE_CHILD : - 0)); + new = xe_exec_queue_create(xe, migrate_vm, logical_mask, + args->width, hwe, + EXEC_QUEUE_FLAG_PERSISTENT | + EXEC_QUEUE_FLAG_VM | + (id ? + EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD : + 0)); xe_device_mem_access_put(xe); /* now held by engine */ xe_vm_put(migrate_vm); if (IS_ERR(new)) { err = PTR_ERR(new); - if (e) - goto put_engine; + if (q) + goto put_exec_queue; return err; } if (id == 0) - e = new; + q = new; else list_add_tail(&new->multi_gt_list, - &e->multi_gt_link); + &q->multi_gt_link); } } else { gt = xe_device_get_gt(xe, eci[0].gt_id); @@ -628,223 +628,223 @@ int xe_engine_create_ioctl(struct drm_device *dev, void *data, return -ENOENT; } - e = xe_engine_create(xe, vm, logical_mask, - args->width, hwe, - xe_vm_no_dma_fences(vm) ? 0 : - ENGINE_FLAG_PERSISTENT); + q = xe_exec_queue_create(xe, vm, logical_mask, + args->width, hwe, + xe_vm_no_dma_fences(vm) ? 0 : + EXEC_QUEUE_FLAG_PERSISTENT); up_read(&vm->lock); xe_vm_put(vm); - if (IS_ERR(e)) - return PTR_ERR(e); + if (IS_ERR(q)) + return PTR_ERR(q); } if (args->extensions) { - err = engine_user_extensions(xe, e, args->extensions, 0, true); + err = exec_queue_user_extensions(xe, q, args->extensions, 0, true); if (XE_IOCTL_DBG(xe, err)) - goto put_engine; + goto put_exec_queue; } - if (XE_IOCTL_DBG(xe, e->vm && xe_vm_in_compute_mode(e->vm) != - !!(e->flags & ENGINE_FLAG_COMPUTE_MODE))) { + if (XE_IOCTL_DBG(xe, q->vm && xe_vm_in_compute_mode(q->vm) != + !!(q->flags & EXEC_QUEUE_FLAG_COMPUTE_MODE))) { err = -EOPNOTSUPP; - goto put_engine; + goto put_exec_queue; } - e->persistent.xef = xef; + q->persistent.xef = xef; - mutex_lock(&xef->engine.lock); - err = xa_alloc(&xef->engine.xa, &id, e, xa_limit_32b, GFP_KERNEL); - mutex_unlock(&xef->engine.lock); + mutex_lock(&xef->exec_queue.lock); + err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL); + mutex_unlock(&xef->exec_queue.lock); if (err) - goto put_engine; + goto put_exec_queue; - args->engine_id = id; + args->exec_queue_id = id; return 0; -put_engine: - xe_engine_kill(e); - xe_engine_put(e); +put_exec_queue: + xe_exec_queue_kill(q); + xe_exec_queue_put(q); return err; } -int xe_engine_get_property_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) +int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) { struct xe_device *xe = to_xe_device(dev); struct xe_file *xef = to_xe_file(file); - struct drm_xe_engine_get_property *args = data; - struct xe_engine *e; + struct drm_xe_exec_queue_get_property *args = data; + struct xe_exec_queue *q; int ret; if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) return -EINVAL; - e = xe_engine_lookup(xef, args->engine_id); - if (XE_IOCTL_DBG(xe, !e)) + q = xe_exec_queue_lookup(xef, args->exec_queue_id); + if (XE_IOCTL_DBG(xe, !q)) return -ENOENT; switch (args->property) { - case XE_ENGINE_GET_PROPERTY_BAN: - args->value = !!(e->flags & ENGINE_FLAG_BANNED); + case XE_EXEC_QUEUE_GET_PROPERTY_BAN: + args->value = !!(q->flags & EXEC_QUEUE_FLAG_BANNED); ret = 0; break; default: ret = -EINVAL; } - xe_engine_put(e); + xe_exec_queue_put(q); return ret; } -static void engine_kill_compute(struct xe_engine *e) +static void exec_queue_kill_compute(struct xe_exec_queue *q) { - if (!xe_vm_in_compute_mode(e->vm)) + if (!xe_vm_in_compute_mode(q->vm)) return; - down_write(&e->vm->lock); - list_del(&e->compute.link); - --e->vm->preempt.num_engines; - if (e->compute.pfence) { - dma_fence_enable_sw_signaling(e->compute.pfence); - dma_fence_put(e->compute.pfence); - e->compute.pfence = NULL; + down_write(&q->vm->lock); + list_del(&q->compute.link); + --q->vm->preempt.num_exec_queues; + if (q->compute.pfence) { + dma_fence_enable_sw_signaling(q->compute.pfence); + dma_fence_put(q->compute.pfence); + q->compute.pfence = NULL; } - up_write(&e->vm->lock); + up_write(&q->vm->lock); } /** - * xe_engine_is_lr() - Whether an engine is long-running - * @e: The engine + * xe_exec_queue_is_lr() - Whether an exec_queue is long-running + * @q: The exec_queue * - * Return: True if the engine is long-running, false otherwise. + * Return: True if the exec_queue is long-running, false otherwise. */ -bool xe_engine_is_lr(struct xe_engine *e) +bool xe_exec_queue_is_lr(struct xe_exec_queue *q) { - return e->vm && xe_vm_no_dma_fences(e->vm) && - !(e->flags & ENGINE_FLAG_VM); + return q->vm && xe_vm_no_dma_fences(q->vm) && + !(q->flags & EXEC_QUEUE_FLAG_VM); } -static s32 xe_engine_num_job_inflight(struct xe_engine *e) +static s32 xe_exec_queue_num_job_inflight(struct xe_exec_queue *q) { - return e->lrc->fence_ctx.next_seqno - xe_lrc_seqno(e->lrc) - 1; + return q->lrc->fence_ctx.next_seqno - xe_lrc_seqno(q->lrc) - 1; } /** - * xe_engine_ring_full() - Whether an engine's ring is full - * @e: The engine + * xe_exec_queue_ring_full() - Whether an exec_queue's ring is full + * @q: The exec_queue * - * Return: True if the engine's ring is full, false otherwise. + * Return: True if the exec_queue's ring is full, false otherwise. */ -bool xe_engine_ring_full(struct xe_engine *e) +bool xe_exec_queue_ring_full(struct xe_exec_queue *q) { - struct xe_lrc *lrc = e->lrc; + struct xe_lrc *lrc = q->lrc; s32 max_job = lrc->ring.size / MAX_JOB_SIZE_BYTES; - return xe_engine_num_job_inflight(e) >= max_job; + return xe_exec_queue_num_job_inflight(q) >= max_job; } /** - * xe_engine_is_idle() - Whether an engine is idle. - * @engine: The engine + * xe_exec_queue_is_idle() - Whether an exec_queue is idle. + * @q: The exec_queue * * FIXME: Need to determine what to use as the short-lived - * timeline lock for the engines, so that the return value + * timeline lock for the exec_queues, so that the return value * of this function becomes more than just an advisory * snapshot in time. The timeline lock must protect the - * seqno from racing submissions on the same engine. + * seqno from racing submissions on the same exec_queue. * Typically vm->resv, but user-created timeline locks use the migrate vm * and never grabs the migrate vm->resv so we have a race there. * - * Return: True if the engine is idle, false otherwise. + * Return: True if the exec_queue is idle, false otherwise. */ -bool xe_engine_is_idle(struct xe_engine *engine) +bool xe_exec_queue_is_idle(struct xe_exec_queue *q) { - if (XE_WARN_ON(xe_engine_is_parallel(engine))) + if (XE_WARN_ON(xe_exec_queue_is_parallel(q))) return false; - return xe_lrc_seqno(&engine->lrc[0]) == - engine->lrc[0].fence_ctx.next_seqno - 1; + return xe_lrc_seqno(&q->lrc[0]) == + q->lrc[0].fence_ctx.next_seqno - 1; } -void xe_engine_kill(struct xe_engine *e) +void xe_exec_queue_kill(struct xe_exec_queue *q) { - struct xe_engine *engine = e, *next; + struct xe_exec_queue *eq = q, *next; - list_for_each_entry_safe(engine, next, &engine->multi_gt_list, + list_for_each_entry_safe(eq, next, &eq->multi_gt_list, multi_gt_link) { - e->ops->kill(engine); - engine_kill_compute(engine); + q->ops->kill(eq); + exec_queue_kill_compute(eq); } - e->ops->kill(e); - engine_kill_compute(e); + q->ops->kill(q); + exec_queue_kill_compute(q); } -int xe_engine_destroy_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) +int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) { struct xe_device *xe = to_xe_device(dev); struct xe_file *xef = to_xe_file(file); - struct drm_xe_engine_destroy *args = data; - struct xe_engine *e; + struct drm_xe_exec_queue_destroy *args = data; + struct xe_exec_queue *q; if (XE_IOCTL_DBG(xe, args->pad) || XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) return -EINVAL; - mutex_lock(&xef->engine.lock); - e = xa_erase(&xef->engine.xa, args->engine_id); - mutex_unlock(&xef->engine.lock); - if (XE_IOCTL_DBG(xe, !e)) + mutex_lock(&xef->exec_queue.lock); + q = xa_erase(&xef->exec_queue.xa, args->exec_queue_id); + mutex_unlock(&xef->exec_queue.lock); + if (XE_IOCTL_DBG(xe, !q)) return -ENOENT; - if (!(e->flags & ENGINE_FLAG_PERSISTENT)) - xe_engine_kill(e); + if (!(q->flags & EXEC_QUEUE_FLAG_PERSISTENT)) + xe_exec_queue_kill(q); else - xe_device_add_persistent_engines(xe, e); + xe_device_add_persistent_exec_queues(xe, q); - trace_xe_engine_close(e); - xe_engine_put(e); + trace_xe_exec_queue_close(q); + xe_exec_queue_put(q); return 0; } -int xe_engine_set_property_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) +int xe_exec_queue_set_property_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) { struct xe_device *xe = to_xe_device(dev); struct xe_file *xef = to_xe_file(file); - struct drm_xe_engine_set_property *args = data; - struct xe_engine *e; + struct drm_xe_exec_queue_set_property *args = data; + struct xe_exec_queue *q; int ret; u32 idx; if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) return -EINVAL; - e = xe_engine_lookup(xef, args->engine_id); - if (XE_IOCTL_DBG(xe, !e)) + q = xe_exec_queue_lookup(xef, args->exec_queue_id); + if (XE_IOCTL_DBG(xe, !q)) return -ENOENT; if (XE_IOCTL_DBG(xe, args->property >= - ARRAY_SIZE(engine_set_property_funcs))) { + ARRAY_SIZE(exec_queue_set_property_funcs))) { ret = -EINVAL; goto out; } idx = array_index_nospec(args->property, - ARRAY_SIZE(engine_set_property_funcs)); - ret = engine_set_property_funcs[idx](xe, e, args->value, false); + ARRAY_SIZE(exec_queue_set_property_funcs)); + ret = exec_queue_set_property_funcs[idx](xe, q, args->value, false); if (XE_IOCTL_DBG(xe, ret)) goto out; if (args->extensions) - ret = engine_user_extensions(xe, e, args->extensions, 0, - false); + ret = exec_queue_user_extensions(xe, q, args->extensions, 0, + false); out: - xe_engine_put(e); + xe_exec_queue_put(q); return ret; } diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h index 3017e4fe308d..94a6abee38a6 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.h +++ b/drivers/gpu/drm/xe/xe_exec_queue.h @@ -3,10 +3,10 @@ * Copyright © 2021 Intel Corporation */ -#ifndef _XE_ENGINE_H_ -#define _XE_ENGINE_H_ +#ifndef _XE_EXEC_QUEUE_H_ +#define _XE_EXEC_QUEUE_H_ -#include "xe_engine_types.h" +#include "xe_exec_queue_types.h" #include "xe_vm_types.h" struct drm_device; @@ -14,50 +14,50 @@ struct drm_file; struct xe_device; struct xe_file; -struct xe_engine *xe_engine_create(struct xe_device *xe, struct xe_vm *vm, - u32 logical_mask, u16 width, - struct xe_hw_engine *hw_engine, u32 flags); -struct xe_engine *xe_engine_create_class(struct xe_device *xe, struct xe_gt *gt, - struct xe_vm *vm, - enum xe_engine_class class, u32 flags); +struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm, + u32 logical_mask, u16 width, + struct xe_hw_engine *hw_engine, u32 flags); +struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt, + struct xe_vm *vm, + enum xe_engine_class class, u32 flags); -void xe_engine_fini(struct xe_engine *e); -void xe_engine_destroy(struct kref *ref); +void xe_exec_queue_fini(struct xe_exec_queue *q); +void xe_exec_queue_destroy(struct kref *ref); -struct xe_engine *xe_engine_lookup(struct xe_file *xef, u32 id); +struct xe_exec_queue *xe_exec_queue_lookup(struct xe_file *xef, u32 id); -static inline struct xe_engine *xe_engine_get(struct xe_engine *engine) +static inline struct xe_exec_queue *xe_exec_queue_get(struct xe_exec_queue *q) { - kref_get(&engine->refcount); - return engine; + kref_get(&q->refcount); + return q; } -static inline void xe_engine_put(struct xe_engine *engine) +static inline void xe_exec_queue_put(struct xe_exec_queue *q) { - kref_put(&engine->refcount, xe_engine_destroy); + kref_put(&q->refcount, xe_exec_queue_destroy); } -static inline bool xe_engine_is_parallel(struct xe_engine *engine) +static inline bool xe_exec_queue_is_parallel(struct xe_exec_queue *q) { - return engine->width > 1; + return q->width > 1; } -bool xe_engine_is_lr(struct xe_engine *e); +bool xe_exec_queue_is_lr(struct xe_exec_queue *q); -bool xe_engine_ring_full(struct xe_engine *e); +bool xe_exec_queue_ring_full(struct xe_exec_queue *q); -bool xe_engine_is_idle(struct xe_engine *engine); +bool xe_exec_queue_is_idle(struct xe_exec_queue *q); -void xe_engine_kill(struct xe_engine *e); +void xe_exec_queue_kill(struct xe_exec_queue *q); -int xe_engine_create_ioctl(struct drm_device *dev, void *data, - struct drm_file *file); -int xe_engine_destroy_ioctl(struct drm_device *dev, void *data, - struct drm_file *file); -int xe_engine_set_property_ioctl(struct drm_device *dev, void *data, - struct drm_file *file); -int xe_engine_get_property_ioctl(struct drm_device *dev, void *data, - struct drm_file *file); -enum xe_engine_priority xe_engine_device_get_max_priority(struct xe_device *xe); +int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); +int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); +int xe_exec_queue_set_property_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); +int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); +enum xe_exec_queue_priority xe_exec_queue_device_get_max_priority(struct xe_device *xe); #endif diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h new file mode 100644 index 000000000000..4506289b8b7b --- /dev/null +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -0,0 +1,209 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_EXEC_QUEUE_TYPES_H_ +#define _XE_EXEC_QUEUE_TYPES_H_ + +#include + +#include + +#include "xe_gpu_scheduler_types.h" +#include "xe_hw_engine_types.h" +#include "xe_hw_fence_types.h" +#include "xe_lrc_types.h" + +struct xe_execlist_exec_queue; +struct xe_gt; +struct xe_guc_exec_queue; +struct xe_hw_engine; +struct xe_vm; + +enum xe_exec_queue_priority { + XE_EXEC_QUEUE_PRIORITY_UNSET = -2, /* For execlist usage only */ + XE_EXEC_QUEUE_PRIORITY_LOW = 0, + XE_EXEC_QUEUE_PRIORITY_NORMAL, + XE_EXEC_QUEUE_PRIORITY_HIGH, + XE_EXEC_QUEUE_PRIORITY_KERNEL, + + XE_EXEC_QUEUE_PRIORITY_COUNT +}; + +/** + * struct xe_exec_queue - Execution queue + * + * Contains all state necessary for submissions. Can either be a user object or + * a kernel object. + */ +struct xe_exec_queue { + /** @gt: graphics tile this exec queue can submit to */ + struct xe_gt *gt; + /** + * @hwe: A hardware of the same class. May (physical engine) or may not + * (virtual engine) be where jobs actual engine up running. Should never + * really be used for submissions. + */ + struct xe_hw_engine *hwe; + /** @refcount: ref count of this exec queue */ + struct kref refcount; + /** @vm: VM (address space) for this exec queue */ + struct xe_vm *vm; + /** @class: class of this exec queue */ + enum xe_engine_class class; + /** @priority: priority of this exec queue */ + enum xe_exec_queue_priority priority; + /** + * @logical_mask: logical mask of where job submitted to exec queue can run + */ + u32 logical_mask; + /** @name: name of this exec queue */ + char name[MAX_FENCE_NAME_LEN]; + /** @width: width (number BB submitted per exec) of this exec queue */ + u16 width; + /** @fence_irq: fence IRQ used to signal job completion */ + struct xe_hw_fence_irq *fence_irq; + +#define EXEC_QUEUE_FLAG_BANNED BIT(0) +#define EXEC_QUEUE_FLAG_KERNEL BIT(1) +#define EXEC_QUEUE_FLAG_PERSISTENT BIT(2) +#define EXEC_QUEUE_FLAG_COMPUTE_MODE BIT(3) +/* Caller needs to hold rpm ref when creating engine with EXEC_QUEUE_FLAG_VM */ +#define EXEC_QUEUE_FLAG_VM BIT(4) +#define EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD BIT(5) +#define EXEC_QUEUE_FLAG_WA BIT(6) + + /** + * @flags: flags for this exec queue, should statically setup aside from ban + * bit + */ + unsigned long flags; + + union { + /** @multi_gt_list: list head for VM bind engines if multi-GT */ + struct list_head multi_gt_list; + /** @multi_gt_link: link for VM bind engines if multi-GT */ + struct list_head multi_gt_link; + }; + + union { + /** @execlist: execlist backend specific state for exec queue */ + struct xe_execlist_exec_queue *execlist; + /** @guc: GuC backend specific state for exec queue */ + struct xe_guc_exec_queue *guc; + }; + + /** + * @persistent: persistent exec queue state + */ + struct { + /** @xef: file which this exec queue belongs to */ + struct xe_file *xef; + /** @link: link in list of persistent exec queues */ + struct list_head link; + } persistent; + + union { + /** + * @parallel: parallel submission state + */ + struct { + /** @composite_fence_ctx: context composite fence */ + u64 composite_fence_ctx; + /** @composite_fence_seqno: seqno for composite fence */ + u32 composite_fence_seqno; + } parallel; + /** + * @bind: bind submission state + */ + struct { + /** @fence_ctx: context bind fence */ + u64 fence_ctx; + /** @fence_seqno: seqno for bind fence */ + u32 fence_seqno; + } bind; + }; + + /** @sched_props: scheduling properties */ + struct { + /** @timeslice_us: timeslice period in micro-seconds */ + u32 timeslice_us; + /** @preempt_timeout_us: preemption timeout in micro-seconds */ + u32 preempt_timeout_us; + } sched_props; + + /** @compute: compute exec queue state */ + struct { + /** @pfence: preemption fence */ + struct dma_fence *pfence; + /** @context: preemption fence context */ + u64 context; + /** @seqno: preemption fence seqno */ + u32 seqno; + /** @link: link into VM's list of exec queues */ + struct list_head link; + /** @lock: preemption fences lock */ + spinlock_t lock; + } compute; + + /** @usm: unified shared memory state */ + struct { + /** @acc_trigger: access counter trigger */ + u32 acc_trigger; + /** @acc_notify: access counter notify */ + u32 acc_notify; + /** @acc_granularity: access counter granularity */ + u32 acc_granularity; + } usm; + + /** @ops: submission backend exec queue operations */ + const struct xe_exec_queue_ops *ops; + + /** @ring_ops: ring operations for this exec queue */ + const struct xe_ring_ops *ring_ops; + /** @entity: DRM sched entity for this exec queue (1 to 1 relationship) */ + struct drm_sched_entity *entity; + /** @lrc: logical ring context for this exec queue */ + struct xe_lrc lrc[]; +}; + +/** + * struct xe_exec_queue_ops - Submission backend exec queue operations + */ +struct xe_exec_queue_ops { + /** @init: Initialize exec queue for submission backend */ + int (*init)(struct xe_exec_queue *q); + /** @kill: Kill inflight submissions for backend */ + void (*kill)(struct xe_exec_queue *q); + /** @fini: Fini exec queue for submission backend */ + void (*fini)(struct xe_exec_queue *q); + /** @set_priority: Set priority for exec queue */ + int (*set_priority)(struct xe_exec_queue *q, + enum xe_exec_queue_priority priority); + /** @set_timeslice: Set timeslice for exec queue */ + int (*set_timeslice)(struct xe_exec_queue *q, u32 timeslice_us); + /** @set_preempt_timeout: Set preemption timeout for exec queue */ + int (*set_preempt_timeout)(struct xe_exec_queue *q, u32 preempt_timeout_us); + /** @set_job_timeout: Set job timeout for exec queue */ + int (*set_job_timeout)(struct xe_exec_queue *q, u32 job_timeout_ms); + /** + * @suspend: Suspend exec queue from executing, allowed to be called + * multiple times in a row before resume with the caveat that + * suspend_wait returns before calling suspend again. + */ + int (*suspend)(struct xe_exec_queue *q); + /** + * @suspend_wait: Wait for an exec queue to suspend executing, should be + * call after suspend. + */ + void (*suspend_wait)(struct xe_exec_queue *q); + /** + * @resume: Resume exec queue execution, exec queue must be in a suspended + * state and dma fence returned from most recent suspend call must be + * signalled when this function is called. + */ + void (*resume)(struct xe_exec_queue *q); +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index 5b6748e1a37f..3b8be55fe19c 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -91,7 +91,7 @@ static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc, } static void __xe_execlist_port_start(struct xe_execlist_port *port, - struct xe_execlist_engine *exl) + struct xe_execlist_exec_queue *exl) { struct xe_device *xe = gt_to_xe(port->hwe->gt); int max_ctx = FIELD_MAX(GEN11_SW_CTX_ID); @@ -109,7 +109,7 @@ static void __xe_execlist_port_start(struct xe_execlist_port *port, port->last_ctx_id = 1; } - __start_lrc(port->hwe, exl->engine->lrc, port->last_ctx_id); + __start_lrc(port->hwe, exl->q->lrc, port->last_ctx_id); port->running_exl = exl; exl->has_run = true; } @@ -128,16 +128,16 @@ static void __xe_execlist_port_idle(struct xe_execlist_port *port) port->running_exl = NULL; } -static bool xe_execlist_is_idle(struct xe_execlist_engine *exl) +static bool xe_execlist_is_idle(struct xe_execlist_exec_queue *exl) { - struct xe_lrc *lrc = exl->engine->lrc; + struct xe_lrc *lrc = exl->q->lrc; return lrc->ring.tail == lrc->ring.old_tail; } static void __xe_execlist_port_start_next_active(struct xe_execlist_port *port) { - struct xe_execlist_engine *exl = NULL; + struct xe_execlist_exec_queue *exl = NULL; int i; xe_execlist_port_assert_held(port); @@ -145,12 +145,12 @@ static void __xe_execlist_port_start_next_active(struct xe_execlist_port *port) for (i = ARRAY_SIZE(port->active) - 1; i >= 0; i--) { while (!list_empty(&port->active[i])) { exl = list_first_entry(&port->active[i], - struct xe_execlist_engine, + struct xe_execlist_exec_queue, active_link); list_del(&exl->active_link); if (xe_execlist_is_idle(exl)) { - exl->active_priority = XE_ENGINE_PRIORITY_UNSET; + exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET; continue; } @@ -198,7 +198,7 @@ static void xe_execlist_port_irq_handler(struct xe_hw_engine *hwe, } static void xe_execlist_port_wake_locked(struct xe_execlist_port *port, - enum xe_engine_priority priority) + enum xe_exec_queue_priority priority) { xe_execlist_port_assert_held(port); @@ -208,25 +208,25 @@ static void xe_execlist_port_wake_locked(struct xe_execlist_port *port, __xe_execlist_port_start_next_active(port); } -static void xe_execlist_make_active(struct xe_execlist_engine *exl) +static void xe_execlist_make_active(struct xe_execlist_exec_queue *exl) { struct xe_execlist_port *port = exl->port; - enum xe_engine_priority priority = exl->active_priority; + enum xe_exec_queue_priority priority = exl->active_priority; - XE_WARN_ON(priority == XE_ENGINE_PRIORITY_UNSET); + XE_WARN_ON(priority == XE_EXEC_QUEUE_PRIORITY_UNSET); XE_WARN_ON(priority < 0); XE_WARN_ON(priority >= ARRAY_SIZE(exl->port->active)); spin_lock_irq(&port->lock); if (exl->active_priority != priority && - exl->active_priority != XE_ENGINE_PRIORITY_UNSET) { + exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET) { /* Priority changed, move it to the right list */ list_del(&exl->active_link); - exl->active_priority = XE_ENGINE_PRIORITY_UNSET; + exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET; } - if (exl->active_priority == XE_ENGINE_PRIORITY_UNSET) { + if (exl->active_priority == XE_EXEC_QUEUE_PRIORITY_UNSET) { exl->active_priority = priority; list_add_tail(&exl->active_link, &port->active[priority]); } @@ -293,10 +293,10 @@ static struct dma_fence * execlist_run_job(struct drm_sched_job *drm_job) { struct xe_sched_job *job = to_xe_sched_job(drm_job); - struct xe_engine *e = job->engine; - struct xe_execlist_engine *exl = job->engine->execlist; + struct xe_exec_queue *q = job->q; + struct xe_execlist_exec_queue *exl = job->q->execlist; - e->ring_ops->emit_job(job); + q->ring_ops->emit_job(job); xe_execlist_make_active(exl); return dma_fence_get(job->fence); @@ -314,11 +314,11 @@ static const struct drm_sched_backend_ops drm_sched_ops = { .free_job = execlist_job_free, }; -static int execlist_engine_init(struct xe_engine *e) +static int execlist_exec_queue_init(struct xe_exec_queue *q) { struct drm_gpu_scheduler *sched; - struct xe_execlist_engine *exl; - struct xe_device *xe = gt_to_xe(e->gt); + struct xe_execlist_exec_queue *exl; + struct xe_device *xe = gt_to_xe(q->gt); int err; XE_WARN_ON(xe_device_guc_submission_enabled(xe)); @@ -329,13 +329,13 @@ static int execlist_engine_init(struct xe_engine *e) if (!exl) return -ENOMEM; - exl->engine = e; + exl->q = q; err = drm_sched_init(&exl->sched, &drm_sched_ops, NULL, 1, - e->lrc[0].ring.size / MAX_JOB_SIZE_BYTES, + q->lrc[0].ring.size / MAX_JOB_SIZE_BYTES, XE_SCHED_HANG_LIMIT, XE_SCHED_JOB_TIMEOUT, - NULL, NULL, e->hwe->name, - gt_to_xe(e->gt)->drm.dev); + NULL, NULL, q->hwe->name, + gt_to_xe(q->gt)->drm.dev); if (err) goto err_free; @@ -344,30 +344,30 @@ static int execlist_engine_init(struct xe_engine *e) if (err) goto err_sched; - exl->port = e->hwe->exl_port; + exl->port = q->hwe->exl_port; exl->has_run = false; - exl->active_priority = XE_ENGINE_PRIORITY_UNSET; - e->execlist = exl; - e->entity = &exl->entity; + exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET; + q->execlist = exl; + q->entity = &exl->entity; - switch (e->class) { + switch (q->class) { case XE_ENGINE_CLASS_RENDER: - sprintf(e->name, "rcs%d", ffs(e->logical_mask) - 1); + sprintf(q->name, "rcs%d", ffs(q->logical_mask) - 1); break; case XE_ENGINE_CLASS_VIDEO_DECODE: - sprintf(e->name, "vcs%d", ffs(e->logical_mask) - 1); + sprintf(q->name, "vcs%d", ffs(q->logical_mask) - 1); break; case XE_ENGINE_CLASS_VIDEO_ENHANCE: - sprintf(e->name, "vecs%d", ffs(e->logical_mask) - 1); + sprintf(q->name, "vecs%d", ffs(q->logical_mask) - 1); break; case XE_ENGINE_CLASS_COPY: - sprintf(e->name, "bcs%d", ffs(e->logical_mask) - 1); + sprintf(q->name, "bcs%d", ffs(q->logical_mask) - 1); break; case XE_ENGINE_CLASS_COMPUTE: - sprintf(e->name, "ccs%d", ffs(e->logical_mask) - 1); + sprintf(q->name, "ccs%d", ffs(q->logical_mask) - 1); break; default: - XE_WARN_ON(e->class); + XE_WARN_ON(q->class); } return 0; @@ -379,96 +379,96 @@ err_free: return err; } -static void execlist_engine_fini_async(struct work_struct *w) +static void execlist_exec_queue_fini_async(struct work_struct *w) { - struct xe_execlist_engine *ee = - container_of(w, struct xe_execlist_engine, fini_async); - struct xe_engine *e = ee->engine; - struct xe_execlist_engine *exl = e->execlist; + struct xe_execlist_exec_queue *ee = + container_of(w, struct xe_execlist_exec_queue, fini_async); + struct xe_exec_queue *q = ee->q; + struct xe_execlist_exec_queue *exl = q->execlist; unsigned long flags; - XE_WARN_ON(xe_device_guc_submission_enabled(gt_to_xe(e->gt))); + XE_WARN_ON(xe_device_guc_submission_enabled(gt_to_xe(q->gt))); spin_lock_irqsave(&exl->port->lock, flags); - if (WARN_ON(exl->active_priority != XE_ENGINE_PRIORITY_UNSET)) + if (WARN_ON(exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET)) list_del(&exl->active_link); spin_unlock_irqrestore(&exl->port->lock, flags); - if (e->flags & ENGINE_FLAG_PERSISTENT) - xe_device_remove_persistent_engines(gt_to_xe(e->gt), e); + if (q->flags & EXEC_QUEUE_FLAG_PERSISTENT) + xe_device_remove_persistent_exec_queues(gt_to_xe(q->gt), q); drm_sched_entity_fini(&exl->entity); drm_sched_fini(&exl->sched); kfree(exl); - xe_engine_fini(e); + xe_exec_queue_fini(q); } -static void execlist_engine_kill(struct xe_engine *e) +static void execlist_exec_queue_kill(struct xe_exec_queue *q) { /* NIY */ } -static void execlist_engine_fini(struct xe_engine *e) +static void execlist_exec_queue_fini(struct xe_exec_queue *q) { - INIT_WORK(&e->execlist->fini_async, execlist_engine_fini_async); - queue_work(system_unbound_wq, &e->execlist->fini_async); + INIT_WORK(&q->execlist->fini_async, execlist_exec_queue_fini_async); + queue_work(system_unbound_wq, &q->execlist->fini_async); } -static int execlist_engine_set_priority(struct xe_engine *e, - enum xe_engine_priority priority) +static int execlist_exec_queue_set_priority(struct xe_exec_queue *q, + enum xe_exec_queue_priority priority) { /* NIY */ return 0; } -static int execlist_engine_set_timeslice(struct xe_engine *e, u32 timeslice_us) +static int execlist_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us) { /* NIY */ return 0; } -static int execlist_engine_set_preempt_timeout(struct xe_engine *e, - u32 preempt_timeout_us) +static int execlist_exec_queue_set_preempt_timeout(struct xe_exec_queue *q, + u32 preempt_timeout_us) { /* NIY */ return 0; } -static int execlist_engine_set_job_timeout(struct xe_engine *e, - u32 job_timeout_ms) +static int execlist_exec_queue_set_job_timeout(struct xe_exec_queue *q, + u32 job_timeout_ms) { /* NIY */ return 0; } -static int execlist_engine_suspend(struct xe_engine *e) +static int execlist_exec_queue_suspend(struct xe_exec_queue *q) { /* NIY */ return 0; } -static void execlist_engine_suspend_wait(struct xe_engine *e) +static void execlist_exec_queue_suspend_wait(struct xe_exec_queue *q) { /* NIY */ } -static void execlist_engine_resume(struct xe_engine *e) +static void execlist_exec_queue_resume(struct xe_exec_queue *q) { /* NIY */ } -static const struct xe_engine_ops execlist_engine_ops = { - .init = execlist_engine_init, - .kill = execlist_engine_kill, - .fini = execlist_engine_fini, - .set_priority = execlist_engine_set_priority, - .set_timeslice = execlist_engine_set_timeslice, - .set_preempt_timeout = execlist_engine_set_preempt_timeout, - .set_job_timeout = execlist_engine_set_job_timeout, - .suspend = execlist_engine_suspend, - .suspend_wait = execlist_engine_suspend_wait, - .resume = execlist_engine_resume, +static const struct xe_exec_queue_ops execlist_exec_queue_ops = { + .init = execlist_exec_queue_init, + .kill = execlist_exec_queue_kill, + .fini = execlist_exec_queue_fini, + .set_priority = execlist_exec_queue_set_priority, + .set_timeslice = execlist_exec_queue_set_timeslice, + .set_preempt_timeout = execlist_exec_queue_set_preempt_timeout, + .set_job_timeout = execlist_exec_queue_set_job_timeout, + .suspend = execlist_exec_queue_suspend, + .suspend_wait = execlist_exec_queue_suspend_wait, + .resume = execlist_exec_queue_resume, }; int xe_execlist_init(struct xe_gt *gt) @@ -477,7 +477,7 @@ int xe_execlist_init(struct xe_gt *gt) if (xe_device_guc_submission_enabled(gt_to_xe(gt))) return 0; - gt->engine_ops = &execlist_engine_ops; + gt->exec_queue_ops = &execlist_exec_queue_ops; return 0; } diff --git a/drivers/gpu/drm/xe/xe_execlist_types.h b/drivers/gpu/drm/xe/xe_execlist_types.h index 9b1239b47292..f94bbf4c53e4 100644 --- a/drivers/gpu/drm/xe/xe_execlist_types.h +++ b/drivers/gpu/drm/xe/xe_execlist_types.h @@ -10,27 +10,27 @@ #include #include -#include "xe_engine_types.h" +#include "xe_exec_queue_types.h" struct xe_hw_engine; -struct xe_execlist_engine; +struct xe_execlist_exec_queue; struct xe_execlist_port { struct xe_hw_engine *hwe; spinlock_t lock; - struct list_head active[XE_ENGINE_PRIORITY_COUNT]; + struct list_head active[XE_EXEC_QUEUE_PRIORITY_COUNT]; u32 last_ctx_id; - struct xe_execlist_engine *running_exl; + struct xe_execlist_exec_queue *running_exl; struct timer_list irq_fail; }; -struct xe_execlist_engine { - struct xe_engine *engine; +struct xe_execlist_exec_queue { + struct xe_exec_queue *q; struct drm_gpu_scheduler sched; @@ -42,7 +42,7 @@ struct xe_execlist_engine { struct work_struct fini_async; - enum xe_engine_priority active_priority; + enum xe_exec_queue_priority active_priority; struct list_head active_link; }; diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 543b085723c5..3077faa1e792 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -26,7 +26,7 @@ #include "xe_gt_sysfs.h" #include "xe_gt_tlb_invalidation.h" #include "xe_gt_topology.h" -#include "xe_guc_engine_types.h" +#include "xe_guc_exec_queue_types.h" #include "xe_hw_fence.h" #include "xe_irq.h" #include "xe_lrc.h" @@ -81,7 +81,7 @@ static void gt_fini(struct drm_device *drm, void *arg) static void gt_reset_worker(struct work_struct *w); -static int emit_nop_job(struct xe_gt *gt, struct xe_engine *e) +static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q) { struct xe_sched_job *job; struct xe_bb *bb; @@ -94,7 +94,7 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_engine *e) return PTR_ERR(bb); batch_ofs = xe_bo_ggtt_addr(gt_to_tile(gt)->mem.kernel_bb_pool->bo); - job = xe_bb_create_wa_job(e, bb, batch_ofs); + job = xe_bb_create_wa_job(q, bb, batch_ofs); if (IS_ERR(job)) { xe_bb_free(bb, NULL); return PTR_ERR(job); @@ -115,9 +115,9 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_engine *e) return 0; } -static int emit_wa_job(struct xe_gt *gt, struct xe_engine *e) +static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) { - struct xe_reg_sr *sr = &e->hwe->reg_lrc; + struct xe_reg_sr *sr = &q->hwe->reg_lrc; struct xe_reg_sr_entry *entry; unsigned long reg; struct xe_sched_job *job; @@ -143,7 +143,7 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_engine *e) } batch_ofs = xe_bo_ggtt_addr(gt_to_tile(gt)->mem.kernel_bb_pool->bo); - job = xe_bb_create_wa_job(e, bb, batch_ofs); + job = xe_bb_create_wa_job(q, bb, batch_ofs); if (IS_ERR(job)) { xe_bb_free(bb, NULL); return PTR_ERR(job); @@ -173,7 +173,7 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt) int err = 0; for_each_hw_engine(hwe, gt, id) { - struct xe_engine *e, *nop_e; + struct xe_exec_queue *q, *nop_q; struct xe_vm *vm; void *default_lrc; @@ -192,58 +192,58 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt) return -ENOMEM; vm = xe_migrate_get_vm(tile->migrate); - e = xe_engine_create(xe, vm, BIT(hwe->logical_instance), 1, - hwe, ENGINE_FLAG_WA); - if (IS_ERR(e)) { - err = PTR_ERR(e); - xe_gt_err(gt, "hwe %s: xe_engine_create failed (%pe)\n", - hwe->name, e); + q = xe_exec_queue_create(xe, vm, BIT(hwe->logical_instance), 1, + hwe, EXEC_QUEUE_FLAG_WA); + if (IS_ERR(q)) { + err = PTR_ERR(q); + xe_gt_err(gt, "hwe %s: xe_exec_queue_create failed (%pe)\n", + hwe->name, q); goto put_vm; } /* Prime golden LRC with known good state */ - err = emit_wa_job(gt, e); + err = emit_wa_job(gt, q); if (err) { xe_gt_err(gt, "hwe %s: emit_wa_job failed (%pe) guc_id=%u\n", - hwe->name, ERR_PTR(err), e->guc->id); - goto put_engine; + hwe->name, ERR_PTR(err), q->guc->id); + goto put_exec_queue; } - nop_e = xe_engine_create(xe, vm, BIT(hwe->logical_instance), - 1, hwe, ENGINE_FLAG_WA); - if (IS_ERR(nop_e)) { - err = PTR_ERR(nop_e); - xe_gt_err(gt, "hwe %s: nop xe_engine_create failed (%pe)\n", - hwe->name, nop_e); - goto put_engine; + nop_q = xe_exec_queue_create(xe, vm, BIT(hwe->logical_instance), + 1, hwe, EXEC_QUEUE_FLAG_WA); + if (IS_ERR(nop_q)) { + err = PTR_ERR(nop_q); + xe_gt_err(gt, "hwe %s: nop xe_exec_queue_create failed (%pe)\n", + hwe->name, nop_q); + goto put_exec_queue; } /* Switch to different LRC */ - err = emit_nop_job(gt, nop_e); + err = emit_nop_job(gt, nop_q); if (err) { xe_gt_err(gt, "hwe %s: nop emit_nop_job failed (%pe) guc_id=%u\n", - hwe->name, ERR_PTR(err), nop_e->guc->id); - goto put_nop_e; + hwe->name, ERR_PTR(err), nop_q->guc->id); + goto put_nop_q; } /* Reload golden LRC to record the effect of any indirect W/A */ - err = emit_nop_job(gt, e); + err = emit_nop_job(gt, q); if (err) { xe_gt_err(gt, "hwe %s: emit_nop_job failed (%pe) guc_id=%u\n", - hwe->name, ERR_PTR(err), e->guc->id); - goto put_nop_e; + hwe->name, ERR_PTR(err), q->guc->id); + goto put_nop_q; } xe_map_memcpy_from(xe, default_lrc, - &e->lrc[0].bo->vmap, - xe_lrc_pphwsp_offset(&e->lrc[0]), + &q->lrc[0].bo->vmap, + xe_lrc_pphwsp_offset(&q->lrc[0]), xe_lrc_size(xe, hwe->class)); gt->default_lrc[hwe->class] = default_lrc; -put_nop_e: - xe_engine_put(nop_e); -put_engine: - xe_engine_put(e); +put_nop_q: + xe_exec_queue_put(nop_q); +put_exec_queue: + xe_exec_queue_put(q); put_vm: xe_vm_put(vm); if (err) diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 78a9fe9f0bd3..c326932e53d7 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -14,7 +14,7 @@ #include "xe_sa_types.h" #include "xe_uc_types.h" -struct xe_engine_ops; +struct xe_exec_queue_ops; struct xe_migrate; struct xe_ring_ops; @@ -269,8 +269,8 @@ struct xe_gt { /** @gtidle: idle properties of GT */ struct xe_gt_idle gtidle; - /** @engine_ops: submission backend engine operations */ - const struct xe_engine_ops *engine_ops; + /** @exec_queue_ops: submission backend exec queue operations */ + const struct xe_exec_queue_ops *exec_queue_ops; /** * @ring_ops: ring operations for this hw engine (1 per engine class) diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index a7da29be2e51..7d1244df959d 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -495,7 +495,7 @@ static void guc_mmio_reg_state_init(struct xe_guc_ads *ads) u8 gc; /* - * 1. Write all MMIO entries for this engine to the table. No + * 1. Write all MMIO entries for this exec queue to the table. No * need to worry about fused-off engines and when there are * entries in the regset: the reg_state_list has been zero'ed * by xe_guc_ads_populate() diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index fb1d63ffaee4..59136b6a7c6f 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -888,11 +888,11 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) ret = xe_guc_deregister_done_handler(guc, payload, adj_len); break; case XE_GUC_ACTION_CONTEXT_RESET_NOTIFICATION: - ret = xe_guc_engine_reset_handler(guc, payload, adj_len); + ret = xe_guc_exec_queue_reset_handler(guc, payload, adj_len); break; case XE_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION: - ret = xe_guc_engine_reset_failure_handler(guc, payload, - adj_len); + ret = xe_guc_exec_queue_reset_failure_handler(guc, payload, + adj_len); break; case XE_GUC_ACTION_SCHED_ENGINE_MODE_DONE: /* Selftest only at the moment */ @@ -902,8 +902,8 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) /* FIXME: Handle this */ break; case XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR: - ret = xe_guc_engine_memory_cat_error_handler(guc, payload, - adj_len); + ret = xe_guc_exec_queue_memory_cat_error_handler(guc, payload, + adj_len); break; case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC: ret = xe_guc_pagefault_handler(guc, payload, adj_len); diff --git a/drivers/gpu/drm/xe/xe_guc_engine_types.h b/drivers/gpu/drm/xe/xe_guc_engine_types.h deleted file mode 100644 index 5565412fe7f1..000000000000 --- a/drivers/gpu/drm/xe/xe_guc_engine_types.h +++ /dev/null @@ -1,54 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2022 Intel Corporation - */ - -#ifndef _XE_GUC_ENGINE_TYPES_H_ -#define _XE_GUC_ENGINE_TYPES_H_ - -#include -#include - -#include "xe_gpu_scheduler_types.h" - -struct dma_fence; -struct xe_engine; - -/** - * struct xe_guc_engine - GuC specific state for an xe_engine - */ -struct xe_guc_engine { - /** @engine: Backpointer to parent xe_engine */ - struct xe_engine *engine; - /** @sched: GPU scheduler for this xe_engine */ - struct xe_gpu_scheduler sched; - /** @entity: Scheduler entity for this xe_engine */ - struct xe_sched_entity entity; - /** - * @static_msgs: Static messages for this xe_engine, used when a message - * needs to sent through the GPU scheduler but memory allocations are - * not allowed. - */ -#define MAX_STATIC_MSG_TYPE 3 - struct xe_sched_msg static_msgs[MAX_STATIC_MSG_TYPE]; - /** @lr_tdr: long running TDR worker */ - struct work_struct lr_tdr; - /** @fini_async: do final fini async from this worker */ - struct work_struct fini_async; - /** @resume_time: time of last resume */ - u64 resume_time; - /** @state: GuC specific state for this xe_engine */ - atomic_t state; - /** @wqi_head: work queue item tail */ - u32 wqi_head; - /** @wqi_tail: work queue item tail */ - u32 wqi_tail; - /** @id: GuC id for this xe_engine */ - u16 id; - /** @suspend_wait: wait queue used to wait on pending suspends */ - wait_queue_head_t suspend_wait; - /** @suspend_pending: a suspend of the engine is pending */ - bool suspend_pending; -}; - -#endif diff --git a/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h b/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h new file mode 100644 index 000000000000..4c39f01e4f52 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _XE_GUC_ENGINE_TYPES_H_ +#define _XE_GUC_ENGINE_TYPES_H_ + +#include +#include + +#include "xe_gpu_scheduler_types.h" + +struct dma_fence; +struct xe_exec_queue; + +/** + * struct xe_guc_exec_queue - GuC specific state for an xe_exec_queue + */ +struct xe_guc_exec_queue { + /** @q: Backpointer to parent xe_exec_queue */ + struct xe_exec_queue *q; + /** @sched: GPU scheduler for this xe_exec_queue */ + struct xe_gpu_scheduler sched; + /** @entity: Scheduler entity for this xe_exec_queue */ + struct xe_sched_entity entity; + /** + * @static_msgs: Static messages for this xe_exec_queue, used when + * a message needs to sent through the GPU scheduler but memory + * allocations are not allowed. + */ +#define MAX_STATIC_MSG_TYPE 3 + struct xe_sched_msg static_msgs[MAX_STATIC_MSG_TYPE]; + /** @lr_tdr: long running TDR worker */ + struct work_struct lr_tdr; + /** @fini_async: do final fini async from this worker */ + struct work_struct fini_async; + /** @resume_time: time of last resume */ + u64 resume_time; + /** @state: GuC specific state for this xe_exec_queue */ + atomic_t state; + /** @wqi_head: work queue item tail */ + u32 wqi_head; + /** @wqi_tail: work queue item tail */ + u32 wqi_tail; + /** @id: GuC id for this exec_queue */ + u16 id; + /** @suspend_wait: wait queue used to wait on pending suspends */ + wait_queue_head_t suspend_wait; + /** @suspend_pending: a suspend of the exec_queue is pending */ + bool suspend_pending; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h index 7515d7fbb723..4216a6d9e478 100644 --- a/drivers/gpu/drm/xe/xe_guc_fwif.h +++ b/drivers/gpu/drm/xe/xe_guc_fwif.h @@ -69,13 +69,13 @@ struct guc_klv_generic_dw_t { } __packed; /* Format of the UPDATE_CONTEXT_POLICIES H2G data packet */ -struct guc_update_engine_policy_header { +struct guc_update_exec_queue_policy_header { u32 action; u32 guc_id; } __packed; -struct guc_update_engine_policy { - struct guc_update_engine_policy_header header; +struct guc_update_exec_queue_policy { + struct guc_update_exec_queue_policy_header header; struct guc_klv_generic_dw_t klv[GUC_CONTEXT_POLICIES_KLV_NUM_IDS]; } __packed; diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 5198e91eeefb..42454c12efb3 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -22,7 +22,7 @@ #include "xe_gt.h" #include "xe_guc.h" #include "xe_guc_ct.h" -#include "xe_guc_engine_types.h" +#include "xe_guc_exec_queue_types.h" #include "xe_guc_submit_types.h" #include "xe_hw_engine.h" #include "xe_hw_fence.h" @@ -48,9 +48,9 @@ guc_to_xe(struct xe_guc *guc) } static struct xe_guc * -engine_to_guc(struct xe_engine *e) +exec_queue_to_guc(struct xe_exec_queue *q) { - return &e->gt->uc.guc; + return &q->gt->uc.guc; } /* @@ -58,140 +58,140 @@ engine_to_guc(struct xe_engine *e) * as the same time (e.g. a suspend can be happning at the same time as schedule * engine done being processed). */ -#define ENGINE_STATE_REGISTERED (1 << 0) +#define EXEC_QUEUE_STATE_REGISTERED (1 << 0) #define ENGINE_STATE_ENABLED (1 << 1) -#define ENGINE_STATE_PENDING_ENABLE (1 << 2) -#define ENGINE_STATE_PENDING_DISABLE (1 << 3) -#define ENGINE_STATE_DESTROYED (1 << 4) +#define EXEC_QUEUE_STATE_PENDING_ENABLE (1 << 2) +#define EXEC_QUEUE_STATE_PENDING_DISABLE (1 << 3) +#define EXEC_QUEUE_STATE_DESTROYED (1 << 4) #define ENGINE_STATE_SUSPENDED (1 << 5) -#define ENGINE_STATE_RESET (1 << 6) +#define EXEC_QUEUE_STATE_RESET (1 << 6) #define ENGINE_STATE_KILLED (1 << 7) -static bool engine_registered(struct xe_engine *e) +static bool exec_queue_registered(struct xe_exec_queue *q) { - return atomic_read(&e->guc->state) & ENGINE_STATE_REGISTERED; + return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_REGISTERED; } -static void set_engine_registered(struct xe_engine *e) +static void set_exec_queue_registered(struct xe_exec_queue *q) { - atomic_or(ENGINE_STATE_REGISTERED, &e->guc->state); + atomic_or(EXEC_QUEUE_STATE_REGISTERED, &q->guc->state); } -static void clear_engine_registered(struct xe_engine *e) +static void clear_exec_queue_registered(struct xe_exec_queue *q) { - atomic_and(~ENGINE_STATE_REGISTERED, &e->guc->state); + atomic_and(~EXEC_QUEUE_STATE_REGISTERED, &q->guc->state); } -static bool engine_enabled(struct xe_engine *e) +static bool exec_queue_enabled(struct xe_exec_queue *q) { - return atomic_read(&e->guc->state) & ENGINE_STATE_ENABLED; + return atomic_read(&q->guc->state) & ENGINE_STATE_ENABLED; } -static void set_engine_enabled(struct xe_engine *e) +static void set_exec_queue_enabled(struct xe_exec_queue *q) { - atomic_or(ENGINE_STATE_ENABLED, &e->guc->state); + atomic_or(ENGINE_STATE_ENABLED, &q->guc->state); } -static void clear_engine_enabled(struct xe_engine *e) +static void clear_exec_queue_enabled(struct xe_exec_queue *q) { - atomic_and(~ENGINE_STATE_ENABLED, &e->guc->state); + atomic_and(~ENGINE_STATE_ENABLED, &q->guc->state); } -static bool engine_pending_enable(struct xe_engine *e) +static bool exec_queue_pending_enable(struct xe_exec_queue *q) { - return atomic_read(&e->guc->state) & ENGINE_STATE_PENDING_ENABLE; + return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_ENABLE; } -static void set_engine_pending_enable(struct xe_engine *e) +static void set_exec_queue_pending_enable(struct xe_exec_queue *q) { - atomic_or(ENGINE_STATE_PENDING_ENABLE, &e->guc->state); + atomic_or(EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state); } -static void clear_engine_pending_enable(struct xe_engine *e) +static void clear_exec_queue_pending_enable(struct xe_exec_queue *q) { - atomic_and(~ENGINE_STATE_PENDING_ENABLE, &e->guc->state); + atomic_and(~EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state); } -static bool engine_pending_disable(struct xe_engine *e) +static bool exec_queue_pending_disable(struct xe_exec_queue *q) { - return atomic_read(&e->guc->state) & ENGINE_STATE_PENDING_DISABLE; + return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_DISABLE; } -static void set_engine_pending_disable(struct xe_engine *e) +static void set_exec_queue_pending_disable(struct xe_exec_queue *q) { - atomic_or(ENGINE_STATE_PENDING_DISABLE, &e->guc->state); + atomic_or(EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state); } -static void clear_engine_pending_disable(struct xe_engine *e) +static void clear_exec_queue_pending_disable(struct xe_exec_queue *q) { - atomic_and(~ENGINE_STATE_PENDING_DISABLE, &e->guc->state); + atomic_and(~EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state); } -static bool engine_destroyed(struct xe_engine *e) +static bool exec_queue_destroyed(struct xe_exec_queue *q) { - return atomic_read(&e->guc->state) & ENGINE_STATE_DESTROYED; + return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_DESTROYED; } -static void set_engine_destroyed(struct xe_engine *e) +static void set_exec_queue_destroyed(struct xe_exec_queue *q) { - atomic_or(ENGINE_STATE_DESTROYED, &e->guc->state); + atomic_or(EXEC_QUEUE_STATE_DESTROYED, &q->guc->state); } -static bool engine_banned(struct xe_engine *e) +static bool exec_queue_banned(struct xe_exec_queue *q) { - return (e->flags & ENGINE_FLAG_BANNED); + return (q->flags & EXEC_QUEUE_FLAG_BANNED); } -static void set_engine_banned(struct xe_engine *e) +static void set_exec_queue_banned(struct xe_exec_queue *q) { - e->flags |= ENGINE_FLAG_BANNED; + q->flags |= EXEC_QUEUE_FLAG_BANNED; } -static bool engine_suspended(struct xe_engine *e) +static bool exec_queue_suspended(struct xe_exec_queue *q) { - return atomic_read(&e->guc->state) & ENGINE_STATE_SUSPENDED; + return atomic_read(&q->guc->state) & ENGINE_STATE_SUSPENDED; } -static void set_engine_suspended(struct xe_engine *e) +static void set_exec_queue_suspended(struct xe_exec_queue *q) { - atomic_or(ENGINE_STATE_SUSPENDED, &e->guc->state); + atomic_or(ENGINE_STATE_SUSPENDED, &q->guc->state); } -static void clear_engine_suspended(struct xe_engine *e) +static void clear_exec_queue_suspended(struct xe_exec_queue *q) { - atomic_and(~ENGINE_STATE_SUSPENDED, &e->guc->state); + atomic_and(~ENGINE_STATE_SUSPENDED, &q->guc->state); } -static bool engine_reset(struct xe_engine *e) +static bool exec_queue_reset(struct xe_exec_queue *q) { - return atomic_read(&e->guc->state) & ENGINE_STATE_RESET; + return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_RESET; } -static void set_engine_reset(struct xe_engine *e) +static void set_exec_queue_reset(struct xe_exec_queue *q) { - atomic_or(ENGINE_STATE_RESET, &e->guc->state); + atomic_or(EXEC_QUEUE_STATE_RESET, &q->guc->state); } -static bool engine_killed(struct xe_engine *e) +static bool exec_queue_killed(struct xe_exec_queue *q) { - return atomic_read(&e->guc->state) & ENGINE_STATE_KILLED; + return atomic_read(&q->guc->state) & ENGINE_STATE_KILLED; } -static void set_engine_killed(struct xe_engine *e) +static void set_exec_queue_killed(struct xe_exec_queue *q) { - atomic_or(ENGINE_STATE_KILLED, &e->guc->state); + atomic_or(ENGINE_STATE_KILLED, &q->guc->state); } -static bool engine_killed_or_banned(struct xe_engine *e) +static bool exec_queue_killed_or_banned(struct xe_exec_queue *q) { - return engine_killed(e) || engine_banned(e); + return exec_queue_killed(q) || exec_queue_banned(q); } static void guc_submit_fini(struct drm_device *drm, void *arg) { struct xe_guc *guc = arg; - xa_destroy(&guc->submission_state.engine_lookup); + xa_destroy(&guc->submission_state.exec_queue_lookup); ida_destroy(&guc->submission_state.guc_ids); bitmap_free(guc->submission_state.guc_ids_bitmap); } @@ -201,7 +201,7 @@ static void guc_submit_fini(struct drm_device *drm, void *arg) #define GUC_ID_NUMBER_SLRC (GUC_ID_MAX - GUC_ID_NUMBER_MLRC) #define GUC_ID_START_MLRC GUC_ID_NUMBER_SLRC -static const struct xe_engine_ops guc_engine_ops; +static const struct xe_exec_queue_ops guc_exec_queue_ops; static void primelockdep(struct xe_guc *guc) { @@ -228,10 +228,10 @@ int xe_guc_submit_init(struct xe_guc *guc) if (!guc->submission_state.guc_ids_bitmap) return -ENOMEM; - gt->engine_ops = &guc_engine_ops; + gt->exec_queue_ops = &guc_exec_queue_ops; mutex_init(&guc->submission_state.lock); - xa_init(&guc->submission_state.engine_lookup); + xa_init(&guc->submission_state.exec_queue_lookup); ida_init(&guc->submission_state.guc_ids); spin_lock_init(&guc->submission_state.suspend.lock); @@ -246,7 +246,7 @@ int xe_guc_submit_init(struct xe_guc *guc) return 0; } -static int alloc_guc_id(struct xe_guc *guc, struct xe_engine *e) +static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) { int ret; void *ptr; @@ -260,11 +260,11 @@ static int alloc_guc_id(struct xe_guc *guc, struct xe_engine *e) */ lockdep_assert_held(&guc->submission_state.lock); - if (xe_engine_is_parallel(e)) { + if (xe_exec_queue_is_parallel(q)) { void *bitmap = guc->submission_state.guc_ids_bitmap; ret = bitmap_find_free_region(bitmap, GUC_ID_NUMBER_MLRC, - order_base_2(e->width)); + order_base_2(q->width)); } else { ret = ida_simple_get(&guc->submission_state.guc_ids, 0, GUC_ID_NUMBER_SLRC, GFP_NOWAIT); @@ -272,12 +272,12 @@ static int alloc_guc_id(struct xe_guc *guc, struct xe_engine *e) if (ret < 0) return ret; - e->guc->id = ret; - if (xe_engine_is_parallel(e)) - e->guc->id += GUC_ID_START_MLRC; + q->guc->id = ret; + if (xe_exec_queue_is_parallel(q)) + q->guc->id += GUC_ID_START_MLRC; - ptr = xa_store(&guc->submission_state.engine_lookup, - e->guc->id, e, GFP_NOWAIT); + ptr = xa_store(&guc->submission_state.exec_queue_lookup, + q->guc->id, q, GFP_NOWAIT); if (IS_ERR(ptr)) { ret = PTR_ERR(ptr); goto err_release; @@ -286,29 +286,29 @@ static int alloc_guc_id(struct xe_guc *guc, struct xe_engine *e) return 0; err_release: - ida_simple_remove(&guc->submission_state.guc_ids, e->guc->id); + ida_simple_remove(&guc->submission_state.guc_ids, q->guc->id); return ret; } -static void release_guc_id(struct xe_guc *guc, struct xe_engine *e) +static void release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) { mutex_lock(&guc->submission_state.lock); - xa_erase(&guc->submission_state.engine_lookup, e->guc->id); - if (xe_engine_is_parallel(e)) + xa_erase(&guc->submission_state.exec_queue_lookup, q->guc->id); + if (xe_exec_queue_is_parallel(q)) bitmap_release_region(guc->submission_state.guc_ids_bitmap, - e->guc->id - GUC_ID_START_MLRC, - order_base_2(e->width)); + q->guc->id - GUC_ID_START_MLRC, + order_base_2(q->width)); else - ida_simple_remove(&guc->submission_state.guc_ids, e->guc->id); + ida_simple_remove(&guc->submission_state.guc_ids, q->guc->id); mutex_unlock(&guc->submission_state.lock); } -struct engine_policy { +struct exec_queue_policy { u32 count; - struct guc_update_engine_policy h2g; + struct guc_update_exec_queue_policy h2g; }; -static u32 __guc_engine_policy_action_size(struct engine_policy *policy) +static u32 __guc_exec_queue_policy_action_size(struct exec_queue_policy *policy) { size_t bytes = sizeof(policy->h2g.header) + (sizeof(policy->h2g.klv[0]) * policy->count); @@ -316,8 +316,8 @@ static u32 __guc_engine_policy_action_size(struct engine_policy *policy) return bytes / sizeof(u32); } -static void __guc_engine_policy_start_klv(struct engine_policy *policy, - u16 guc_id) +static void __guc_exec_queue_policy_start_klv(struct exec_queue_policy *policy, + u16 guc_id) { policy->h2g.header.action = XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES; @@ -325,8 +325,8 @@ static void __guc_engine_policy_start_klv(struct engine_policy *policy, policy->count = 0; } -#define MAKE_ENGINE_POLICY_ADD(func, id) \ -static void __guc_engine_policy_add_##func(struct engine_policy *policy, \ +#define MAKE_EXEC_QUEUE_POLICY_ADD(func, id) \ +static void __guc_exec_queue_policy_add_##func(struct exec_queue_policy *policy, \ u32 data) \ { \ XE_WARN_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ @@ -339,45 +339,45 @@ static void __guc_engine_policy_add_##func(struct engine_policy *policy, \ policy->count++; \ } -MAKE_ENGINE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) -MAKE_ENGINE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) -MAKE_ENGINE_POLICY_ADD(priority, SCHEDULING_PRIORITY) -#undef MAKE_ENGINE_POLICY_ADD +MAKE_EXEC_QUEUE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) +MAKE_EXEC_QUEUE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) +MAKE_EXEC_QUEUE_POLICY_ADD(priority, SCHEDULING_PRIORITY) +#undef MAKE_EXEC_QUEUE_POLICY_ADD -static const int xe_engine_prio_to_guc[] = { - [XE_ENGINE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL, - [XE_ENGINE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL, - [XE_ENGINE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH, - [XE_ENGINE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH, +static const int xe_exec_queue_prio_to_guc[] = { + [XE_EXEC_QUEUE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL, + [XE_EXEC_QUEUE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL, + [XE_EXEC_QUEUE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH, + [XE_EXEC_QUEUE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH, }; -static void init_policies(struct xe_guc *guc, struct xe_engine *e) +static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q) { - struct engine_policy policy; - enum xe_engine_priority prio = e->priority; - u32 timeslice_us = e->sched_props.timeslice_us; - u32 preempt_timeout_us = e->sched_props.preempt_timeout_us; + struct exec_queue_policy policy; + enum xe_exec_queue_priority prio = q->priority; + u32 timeslice_us = q->sched_props.timeslice_us; + u32 preempt_timeout_us = q->sched_props.preempt_timeout_us; - XE_WARN_ON(!engine_registered(e)); + XE_WARN_ON(!exec_queue_registered(q)); - __guc_engine_policy_start_klv(&policy, e->guc->id); - __guc_engine_policy_add_priority(&policy, xe_engine_prio_to_guc[prio]); - __guc_engine_policy_add_execution_quantum(&policy, timeslice_us); - __guc_engine_policy_add_preemption_timeout(&policy, preempt_timeout_us); + __guc_exec_queue_policy_start_klv(&policy, q->guc->id); + __guc_exec_queue_policy_add_priority(&policy, xe_exec_queue_prio_to_guc[prio]); + __guc_exec_queue_policy_add_execution_quantum(&policy, timeslice_us); + __guc_exec_queue_policy_add_preemption_timeout(&policy, preempt_timeout_us); xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, - __guc_engine_policy_action_size(&policy), 0, 0); + __guc_exec_queue_policy_action_size(&policy), 0, 0); } -static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_engine *e) +static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue *q) { - struct engine_policy policy; + struct exec_queue_policy policy; - __guc_engine_policy_start_klv(&policy, e->guc->id); - __guc_engine_policy_add_preemption_timeout(&policy, 1); + __guc_exec_queue_policy_start_klv(&policy, q->guc->id); + __guc_exec_queue_policy_add_preemption_timeout(&policy, 1); xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, - __guc_engine_policy_action_size(&policy), 0, 0); + __guc_exec_queue_policy_action_size(&policy), 0, 0); } #define parallel_read(xe_, map_, field_) \ @@ -388,7 +388,7 @@ static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_engine *e) field_, val_) static void __register_mlrc_engine(struct xe_guc *guc, - struct xe_engine *e, + struct xe_exec_queue *q, struct guc_ctxt_registration_info *info) { #define MAX_MLRC_REG_SIZE (13 + XE_HW_ENGINE_MAX_INSTANCE * 2) @@ -396,7 +396,7 @@ static void __register_mlrc_engine(struct xe_guc *guc, int len = 0; int i; - XE_WARN_ON(!xe_engine_is_parallel(e)); + XE_WARN_ON(!xe_exec_queue_is_parallel(q)); action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; action[len++] = info->flags; @@ -408,12 +408,12 @@ static void __register_mlrc_engine(struct xe_guc *guc, action[len++] = info->wq_base_lo; action[len++] = info->wq_base_hi; action[len++] = info->wq_size; - action[len++] = e->width; + action[len++] = q->width; action[len++] = info->hwlrca_lo; action[len++] = info->hwlrca_hi; - for (i = 1; i < e->width; ++i) { - struct xe_lrc *lrc = e->lrc + i; + for (i = 1; i < q->width; ++i) { + struct xe_lrc *lrc = q->lrc + i; action[len++] = lower_32_bits(xe_lrc_descriptor(lrc)); action[len++] = upper_32_bits(xe_lrc_descriptor(lrc)); @@ -446,24 +446,24 @@ static void __register_engine(struct xe_guc *guc, xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); } -static void register_engine(struct xe_engine *e) +static void register_engine(struct xe_exec_queue *q) { - struct xe_guc *guc = engine_to_guc(e); + struct xe_guc *guc = exec_queue_to_guc(q); struct xe_device *xe = guc_to_xe(guc); - struct xe_lrc *lrc = e->lrc; + struct xe_lrc *lrc = q->lrc; struct guc_ctxt_registration_info info; - XE_WARN_ON(engine_registered(e)); + XE_WARN_ON(exec_queue_registered(q)); memset(&info, 0, sizeof(info)); - info.context_idx = e->guc->id; - info.engine_class = xe_engine_class_to_guc_class(e->class); - info.engine_submit_mask = e->logical_mask; + info.context_idx = q->guc->id; + info.engine_class = xe_engine_class_to_guc_class(q->class); + info.engine_submit_mask = q->logical_mask; info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc)); info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc)); info.flags = CONTEXT_REGISTRATION_FLAG_KMD; - if (xe_engine_is_parallel(e)) { + if (xe_exec_queue_is_parallel(q)) { u32 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc); struct iosys_map map = xe_lrc_parallel_map(lrc); @@ -477,8 +477,8 @@ static void register_engine(struct xe_engine *e) offsetof(struct guc_submit_parallel_scratch, wq[0])); info.wq_size = WQ_SIZE; - e->guc->wqi_head = 0; - e->guc->wqi_tail = 0; + q->guc->wqi_head = 0; + q->guc->wqi_tail = 0; xe_map_memset(xe, &map, 0, 0, PARALLEL_SCRATCH_SIZE - WQ_SIZE); parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE); } @@ -488,38 +488,38 @@ static void register_engine(struct xe_engine *e) * the GuC as jobs signal immediately and can't destroy an engine if the * GuC has a reference to it. */ - if (xe_engine_is_lr(e)) - xe_engine_get(e); + if (xe_exec_queue_is_lr(q)) + xe_exec_queue_get(q); - set_engine_registered(e); - trace_xe_engine_register(e); - if (xe_engine_is_parallel(e)) - __register_mlrc_engine(guc, e, &info); + set_exec_queue_registered(q); + trace_xe_exec_queue_register(q); + if (xe_exec_queue_is_parallel(q)) + __register_mlrc_engine(guc, q, &info); else __register_engine(guc, &info); - init_policies(guc, e); + init_policies(guc, q); } -static u32 wq_space_until_wrap(struct xe_engine *e) +static u32 wq_space_until_wrap(struct xe_exec_queue *q) { - return (WQ_SIZE - e->guc->wqi_tail); + return (WQ_SIZE - q->guc->wqi_tail); } -static int wq_wait_for_space(struct xe_engine *e, u32 wqi_size) +static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size) { - struct xe_guc *guc = engine_to_guc(e); + struct xe_guc *guc = exec_queue_to_guc(q); struct xe_device *xe = guc_to_xe(guc); - struct iosys_map map = xe_lrc_parallel_map(e->lrc); + struct iosys_map map = xe_lrc_parallel_map(q->lrc); unsigned int sleep_period_ms = 1; #define AVAILABLE_SPACE \ - CIRC_SPACE(e->guc->wqi_tail, e->guc->wqi_head, WQ_SIZE) + CIRC_SPACE(q->guc->wqi_tail, q->guc->wqi_head, WQ_SIZE) if (wqi_size > AVAILABLE_SPACE) { try_again: - e->guc->wqi_head = parallel_read(xe, map, wq_desc.head); + q->guc->wqi_head = parallel_read(xe, map, wq_desc.head); if (wqi_size > AVAILABLE_SPACE) { if (sleep_period_ms == 1024) { - xe_gt_reset_async(e->gt); + xe_gt_reset_async(q->gt); return -ENODEV; } @@ -533,52 +533,52 @@ try_again: return 0; } -static int wq_noop_append(struct xe_engine *e) +static int wq_noop_append(struct xe_exec_queue *q) { - struct xe_guc *guc = engine_to_guc(e); + struct xe_guc *guc = exec_queue_to_guc(q); struct xe_device *xe = guc_to_xe(guc); - struct iosys_map map = xe_lrc_parallel_map(e->lrc); - u32 len_dw = wq_space_until_wrap(e) / sizeof(u32) - 1; + struct iosys_map map = xe_lrc_parallel_map(q->lrc); + u32 len_dw = wq_space_until_wrap(q) / sizeof(u32) - 1; - if (wq_wait_for_space(e, wq_space_until_wrap(e))) + if (wq_wait_for_space(q, wq_space_until_wrap(q))) return -ENODEV; XE_WARN_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); - parallel_write(xe, map, wq[e->guc->wqi_tail / sizeof(u32)], + parallel_write(xe, map, wq[q->guc->wqi_tail / sizeof(u32)], FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | FIELD_PREP(WQ_LEN_MASK, len_dw)); - e->guc->wqi_tail = 0; + q->guc->wqi_tail = 0; return 0; } -static void wq_item_append(struct xe_engine *e) +static void wq_item_append(struct xe_exec_queue *q) { - struct xe_guc *guc = engine_to_guc(e); + struct xe_guc *guc = exec_queue_to_guc(q); struct xe_device *xe = guc_to_xe(guc); - struct iosys_map map = xe_lrc_parallel_map(e->lrc); + struct iosys_map map = xe_lrc_parallel_map(q->lrc); #define WQ_HEADER_SIZE 4 /* Includes 1 LRC address too */ u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + (WQ_HEADER_SIZE - 1)]; - u32 wqi_size = (e->width + (WQ_HEADER_SIZE - 1)) * sizeof(u32); + u32 wqi_size = (q->width + (WQ_HEADER_SIZE - 1)) * sizeof(u32); u32 len_dw = (wqi_size / sizeof(u32)) - 1; int i = 0, j; - if (wqi_size > wq_space_until_wrap(e)) { - if (wq_noop_append(e)) + if (wqi_size > wq_space_until_wrap(q)) { + if (wq_noop_append(q)) return; } - if (wq_wait_for_space(e, wqi_size)) + if (wq_wait_for_space(q, wqi_size)) return; wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | FIELD_PREP(WQ_LEN_MASK, len_dw); - wqi[i++] = xe_lrc_descriptor(e->lrc); - wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, e->guc->id) | - FIELD_PREP(WQ_RING_TAIL_MASK, e->lrc->ring.tail / sizeof(u64)); + wqi[i++] = xe_lrc_descriptor(q->lrc); + wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, q->guc->id) | + FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc->ring.tail / sizeof(u64)); wqi[i++] = 0; - for (j = 1; j < e->width; ++j) { - struct xe_lrc *lrc = e->lrc + j; + for (j = 1; j < q->width; ++j) { + struct xe_lrc *lrc = q->lrc + j; wqi[i++] = lrc->ring.tail / sizeof(u64); } @@ -586,55 +586,55 @@ static void wq_item_append(struct xe_engine *e) XE_WARN_ON(i != wqi_size / sizeof(u32)); iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch, - wq[e->guc->wqi_tail / sizeof(u32)])); + wq[q->guc->wqi_tail / sizeof(u32)])); xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size); - e->guc->wqi_tail += wqi_size; - XE_WARN_ON(e->guc->wqi_tail > WQ_SIZE); + q->guc->wqi_tail += wqi_size; + XE_WARN_ON(q->guc->wqi_tail > WQ_SIZE); xe_device_wmb(xe); - map = xe_lrc_parallel_map(e->lrc); - parallel_write(xe, map, wq_desc.tail, e->guc->wqi_tail); + map = xe_lrc_parallel_map(q->lrc); + parallel_write(xe, map, wq_desc.tail, q->guc->wqi_tail); } #define RESUME_PENDING ~0x0ull -static void submit_engine(struct xe_engine *e) +static void submit_exec_queue(struct xe_exec_queue *q) { - struct xe_guc *guc = engine_to_guc(e); - struct xe_lrc *lrc = e->lrc; + struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_lrc *lrc = q->lrc; u32 action[3]; u32 g2h_len = 0; u32 num_g2h = 0; int len = 0; bool extra_submit = false; - XE_WARN_ON(!engine_registered(e)); + XE_WARN_ON(!exec_queue_registered(q)); - if (xe_engine_is_parallel(e)) - wq_item_append(e); + if (xe_exec_queue_is_parallel(q)) + wq_item_append(q); else xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); - if (engine_suspended(e) && !xe_engine_is_parallel(e)) + if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q)) return; - if (!engine_enabled(e) && !engine_suspended(e)) { + if (!exec_queue_enabled(q) && !exec_queue_suspended(q)) { action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET; - action[len++] = e->guc->id; + action[len++] = q->guc->id; action[len++] = GUC_CONTEXT_ENABLE; g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; num_g2h = 1; - if (xe_engine_is_parallel(e)) + if (xe_exec_queue_is_parallel(q)) extra_submit = true; - e->guc->resume_time = RESUME_PENDING; - set_engine_pending_enable(e); - set_engine_enabled(e); - trace_xe_engine_scheduling_enable(e); + q->guc->resume_time = RESUME_PENDING; + set_exec_queue_pending_enable(q); + set_exec_queue_enabled(q); + trace_xe_exec_queue_scheduling_enable(q); } else { action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; - action[len++] = e->guc->id; - trace_xe_engine_submit(e); + action[len++] = q->guc->id; + trace_xe_exec_queue_submit(q); } xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h); @@ -642,31 +642,31 @@ static void submit_engine(struct xe_engine *e) if (extra_submit) { len = 0; action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; - action[len++] = e->guc->id; - trace_xe_engine_submit(e); + action[len++] = q->guc->id; + trace_xe_exec_queue_submit(q); xe_guc_ct_send(&guc->ct, action, len, 0, 0); } } static struct dma_fence * -guc_engine_run_job(struct drm_sched_job *drm_job) +guc_exec_queue_run_job(struct drm_sched_job *drm_job) { struct xe_sched_job *job = to_xe_sched_job(drm_job); - struct xe_engine *e = job->engine; - bool lr = xe_engine_is_lr(e); + struct xe_exec_queue *q = job->q; + bool lr = xe_exec_queue_is_lr(q); - XE_WARN_ON((engine_destroyed(e) || engine_pending_disable(e)) && - !engine_banned(e) && !engine_suspended(e)); + XE_WARN_ON((exec_queue_destroyed(q) || exec_queue_pending_disable(q)) && + !exec_queue_banned(q) && !exec_queue_suspended(q)); trace_xe_sched_job_run(job); - if (!engine_killed_or_banned(e) && !xe_sched_job_is_error(job)) { - if (!engine_registered(e)) - register_engine(e); + if (!exec_queue_killed_or_banned(q) && !xe_sched_job_is_error(job)) { + if (!exec_queue_registered(q)) + register_engine(q); if (!lr) /* LR jobs are emitted in the exec IOCTL */ - e->ring_ops->emit_job(job); - submit_engine(e); + q->ring_ops->emit_job(job); + submit_exec_queue(q); } if (lr) { @@ -679,7 +679,7 @@ guc_engine_run_job(struct drm_sched_job *drm_job) } } -static void guc_engine_free_job(struct drm_sched_job *drm_job) +static void guc_exec_queue_free_job(struct drm_sched_job *drm_job) { struct xe_sched_job *job = to_xe_sched_job(drm_job); @@ -692,37 +692,37 @@ static int guc_read_stopped(struct xe_guc *guc) return atomic_read(&guc->submission_state.stopped); } -#define MAKE_SCHED_CONTEXT_ACTION(e, enable_disable) \ +#define MAKE_SCHED_CONTEXT_ACTION(q, enable_disable) \ u32 action[] = { \ XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET, \ - e->guc->id, \ + q->guc->id, \ GUC_CONTEXT_##enable_disable, \ } static void disable_scheduling_deregister(struct xe_guc *guc, - struct xe_engine *e) + struct xe_exec_queue *q) { - MAKE_SCHED_CONTEXT_ACTION(e, DISABLE); + MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); int ret; - set_min_preemption_timeout(guc, e); + set_min_preemption_timeout(guc, q); smp_rmb(); - ret = wait_event_timeout(guc->ct.wq, !engine_pending_enable(e) || + ret = wait_event_timeout(guc->ct.wq, !exec_queue_pending_enable(q) || guc_read_stopped(guc), HZ * 5); if (!ret) { - struct xe_gpu_scheduler *sched = &e->guc->sched; + struct xe_gpu_scheduler *sched = &q->guc->sched; XE_WARN_ON("Pending enable failed to respond"); xe_sched_submission_start(sched); - xe_gt_reset_async(e->gt); + xe_gt_reset_async(q->gt); xe_sched_tdr_queue_imm(sched); return; } - clear_engine_enabled(e); - set_engine_pending_disable(e); - set_engine_destroyed(e); - trace_xe_engine_scheduling_disable(e); + clear_exec_queue_enabled(q); + set_exec_queue_pending_disable(q); + set_exec_queue_destroyed(q); + trace_xe_exec_queue_scheduling_disable(q); /* * Reserve space for both G2H here as the 2nd G2H is sent from a G2H @@ -733,27 +733,27 @@ static void disable_scheduling_deregister(struct xe_guc *guc, G2H_LEN_DW_DEREGISTER_CONTEXT, 2); } -static void guc_engine_print(struct xe_engine *e, struct drm_printer *p); +static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p); #if IS_ENABLED(CONFIG_DRM_XE_SIMPLE_ERROR_CAPTURE) -static void simple_error_capture(struct xe_engine *e) +static void simple_error_capture(struct xe_exec_queue *q) { - struct xe_guc *guc = engine_to_guc(e); + struct xe_guc *guc = exec_queue_to_guc(q); struct drm_printer p = drm_err_printer(""); struct xe_hw_engine *hwe; enum xe_hw_engine_id id; - u32 adj_logical_mask = e->logical_mask; - u32 width_mask = (0x1 << e->width) - 1; + u32 adj_logical_mask = q->logical_mask; + u32 width_mask = (0x1 << q->width) - 1; int i; bool cookie; - if (e->vm && !e->vm->error_capture.capture_once) { - e->vm->error_capture.capture_once = true; + if (q->vm && !q->vm->error_capture.capture_once) { + q->vm->error_capture.capture_once = true; cookie = dma_fence_begin_signalling(); - for (i = 0; e->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) { + for (i = 0; q->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) { if (adj_logical_mask & BIT(i)) { adj_logical_mask |= width_mask << i; - i += e->width; + i += q->width; } else { ++i; } @@ -761,66 +761,66 @@ static void simple_error_capture(struct xe_engine *e) xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL); xe_guc_ct_print(&guc->ct, &p, true); - guc_engine_print(e, &p); + guc_exec_queue_print(q, &p); for_each_hw_engine(hwe, guc_to_gt(guc), id) { - if (hwe->class != e->hwe->class || + if (hwe->class != q->hwe->class || !(BIT(hwe->logical_instance) & adj_logical_mask)) continue; xe_hw_engine_print(hwe, &p); } - xe_analyze_vm(&p, e->vm, e->gt->info.id); + xe_analyze_vm(&p, q->vm, q->gt->info.id); xe_force_wake_put(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL); dma_fence_end_signalling(cookie); } } #else -static void simple_error_capture(struct xe_engine *e) +static void simple_error_capture(struct xe_exec_queue *q) { } #endif -static void xe_guc_engine_trigger_cleanup(struct xe_engine *e) +static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q) { - struct xe_guc *guc = engine_to_guc(e); + struct xe_guc *guc = exec_queue_to_guc(q); - if (xe_engine_is_lr(e)) - queue_work(guc_to_gt(guc)->ordered_wq, &e->guc->lr_tdr); + if (xe_exec_queue_is_lr(q)) + queue_work(guc_to_gt(guc)->ordered_wq, &q->guc->lr_tdr); else - xe_sched_tdr_queue_imm(&e->guc->sched); + xe_sched_tdr_queue_imm(&q->guc->sched); } -static void xe_guc_engine_lr_cleanup(struct work_struct *w) +static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) { - struct xe_guc_engine *ge = - container_of(w, struct xe_guc_engine, lr_tdr); - struct xe_engine *e = ge->engine; + struct xe_guc_exec_queue *ge = + container_of(w, struct xe_guc_exec_queue, lr_tdr); + struct xe_exec_queue *q = ge->q; struct xe_gpu_scheduler *sched = &ge->sched; - XE_WARN_ON(!xe_engine_is_lr(e)); - trace_xe_engine_lr_cleanup(e); + XE_WARN_ON(!xe_exec_queue_is_lr(q)); + trace_xe_exec_queue_lr_cleanup(q); /* Kill the run_job / process_msg entry points */ xe_sched_submission_stop(sched); /* Engine state now stable, disable scheduling / deregister if needed */ - if (engine_registered(e)) { - struct xe_guc *guc = engine_to_guc(e); + if (exec_queue_registered(q)) { + struct xe_guc *guc = exec_queue_to_guc(q); int ret; - set_engine_banned(e); - disable_scheduling_deregister(guc, e); + set_exec_queue_banned(q); + disable_scheduling_deregister(guc, q); /* * Must wait for scheduling to be disabled before signalling * any fences, if GT broken the GT reset code should signal us. */ ret = wait_event_timeout(guc->ct.wq, - !engine_pending_disable(e) || + !exec_queue_pending_disable(q) || guc_read_stopped(guc), HZ * 5); if (!ret) { XE_WARN_ON("Schedule disable failed to respond"); xe_sched_submission_start(sched); - xe_gt_reset_async(e->gt); + xe_gt_reset_async(q->gt); return; } } @@ -829,27 +829,27 @@ static void xe_guc_engine_lr_cleanup(struct work_struct *w) } static enum drm_gpu_sched_stat -guc_engine_timedout_job(struct drm_sched_job *drm_job) +guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) { struct xe_sched_job *job = to_xe_sched_job(drm_job); struct xe_sched_job *tmp_job; - struct xe_engine *e = job->engine; - struct xe_gpu_scheduler *sched = &e->guc->sched; - struct xe_device *xe = guc_to_xe(engine_to_guc(e)); + struct xe_exec_queue *q = job->q; + struct xe_gpu_scheduler *sched = &q->guc->sched; + struct xe_device *xe = guc_to_xe(exec_queue_to_guc(q)); int err = -ETIME; int i = 0; if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) { - XE_WARN_ON(e->flags & ENGINE_FLAG_KERNEL); - XE_WARN_ON(e->flags & ENGINE_FLAG_VM && !engine_killed(e)); + XE_WARN_ON(q->flags & EXEC_QUEUE_FLAG_KERNEL); + XE_WARN_ON(q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q)); drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx", - xe_sched_job_seqno(job), e->guc->id, e->flags); - simple_error_capture(e); - xe_devcoredump(e); + xe_sched_job_seqno(job), q->guc->id, q->flags); + simple_error_capture(q); + xe_devcoredump(q); } else { drm_dbg(&xe->drm, "Timedout signaled job: seqno=%u, guc_id=%d, flags=0x%lx", - xe_sched_job_seqno(job), e->guc->id, e->flags); + xe_sched_job_seqno(job), q->guc->id, q->flags); } trace_xe_sched_job_timedout(job); @@ -860,26 +860,26 @@ guc_engine_timedout_job(struct drm_sched_job *drm_job) * Kernel jobs should never fail, nor should VM jobs if they do * somethings has gone wrong and the GT needs a reset */ - if (e->flags & ENGINE_FLAG_KERNEL || - (e->flags & ENGINE_FLAG_VM && !engine_killed(e))) { + if (q->flags & EXEC_QUEUE_FLAG_KERNEL || + (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q))) { if (!xe_sched_invalidate_job(job, 2)) { xe_sched_add_pending_job(sched, job); xe_sched_submission_start(sched); - xe_gt_reset_async(e->gt); + xe_gt_reset_async(q->gt); goto out; } } /* Engine state now stable, disable scheduling if needed */ - if (engine_enabled(e)) { - struct xe_guc *guc = engine_to_guc(e); + if (exec_queue_enabled(q)) { + struct xe_guc *guc = exec_queue_to_guc(q); int ret; - if (engine_reset(e)) + if (exec_queue_reset(q)) err = -EIO; - set_engine_banned(e); - xe_engine_get(e); - disable_scheduling_deregister(guc, e); + set_exec_queue_banned(q); + xe_exec_queue_get(q); + disable_scheduling_deregister(guc, q); /* * Must wait for scheduling to be disabled before signalling @@ -891,20 +891,20 @@ guc_engine_timedout_job(struct drm_sched_job *drm_job) */ smp_rmb(); ret = wait_event_timeout(guc->ct.wq, - !engine_pending_disable(e) || + !exec_queue_pending_disable(q) || guc_read_stopped(guc), HZ * 5); if (!ret) { XE_WARN_ON("Schedule disable failed to respond"); xe_sched_add_pending_job(sched, job); xe_sched_submission_start(sched); - xe_gt_reset_async(e->gt); + xe_gt_reset_async(q->gt); xe_sched_tdr_queue_imm(sched); goto out; } } /* Stop fence signaling */ - xe_hw_fence_irq_stop(e->fence_irq); + xe_hw_fence_irq_stop(q->fence_irq); /* * Fence state now stable, stop / start scheduler which cleans up any @@ -912,7 +912,7 @@ guc_engine_timedout_job(struct drm_sched_job *drm_job) */ xe_sched_add_pending_job(sched, job); xe_sched_submission_start(sched); - xe_guc_engine_trigger_cleanup(e); + xe_guc_exec_queue_trigger_cleanup(q); /* Mark all outstanding jobs as bad, thus completing them */ spin_lock(&sched->base.job_list_lock); @@ -921,53 +921,53 @@ guc_engine_timedout_job(struct drm_sched_job *drm_job) spin_unlock(&sched->base.job_list_lock); /* Start fence signaling */ - xe_hw_fence_irq_start(e->fence_irq); + xe_hw_fence_irq_start(q->fence_irq); out: return DRM_GPU_SCHED_STAT_NOMINAL; } -static void __guc_engine_fini_async(struct work_struct *w) +static void __guc_exec_queue_fini_async(struct work_struct *w) { - struct xe_guc_engine *ge = - container_of(w, struct xe_guc_engine, fini_async); - struct xe_engine *e = ge->engine; - struct xe_guc *guc = engine_to_guc(e); + struct xe_guc_exec_queue *ge = + container_of(w, struct xe_guc_exec_queue, fini_async); + struct xe_exec_queue *q = ge->q; + struct xe_guc *guc = exec_queue_to_guc(q); - trace_xe_engine_destroy(e); + trace_xe_exec_queue_destroy(q); - if (xe_engine_is_lr(e)) + if (xe_exec_queue_is_lr(q)) cancel_work_sync(&ge->lr_tdr); - if (e->flags & ENGINE_FLAG_PERSISTENT) - xe_device_remove_persistent_engines(gt_to_xe(e->gt), e); - release_guc_id(guc, e); + if (q->flags & EXEC_QUEUE_FLAG_PERSISTENT) + xe_device_remove_persistent_exec_queues(gt_to_xe(q->gt), q); + release_guc_id(guc, q); xe_sched_entity_fini(&ge->entity); xe_sched_fini(&ge->sched); - if (!(e->flags & ENGINE_FLAG_KERNEL)) { + if (!(q->flags & EXEC_QUEUE_FLAG_KERNEL)) { kfree(ge); - xe_engine_fini(e); + xe_exec_queue_fini(q); } } -static void guc_engine_fini_async(struct xe_engine *e) +static void guc_exec_queue_fini_async(struct xe_exec_queue *q) { - bool kernel = e->flags & ENGINE_FLAG_KERNEL; + bool kernel = q->flags & EXEC_QUEUE_FLAG_KERNEL; - INIT_WORK(&e->guc->fini_async, __guc_engine_fini_async); - queue_work(system_wq, &e->guc->fini_async); + INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async); + queue_work(system_wq, &q->guc->fini_async); /* We must block on kernel engines so slabs are empty on driver unload */ if (kernel) { - struct xe_guc_engine *ge = e->guc; + struct xe_guc_exec_queue *ge = q->guc; flush_work(&ge->fini_async); kfree(ge); - xe_engine_fini(e); + xe_exec_queue_fini(q); } } -static void __guc_engine_fini(struct xe_guc *guc, struct xe_engine *e) +static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q) { /* * Might be done from within the GPU scheduler, need to do async as we @@ -976,104 +976,104 @@ static void __guc_engine_fini(struct xe_guc *guc, struct xe_engine *e) * this we and don't really care when everything is fini'd, just that it * is. */ - guc_engine_fini_async(e); + guc_exec_queue_fini_async(q); } -static void __guc_engine_process_msg_cleanup(struct xe_sched_msg *msg) +static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) { - struct xe_engine *e = msg->private_data; - struct xe_guc *guc = engine_to_guc(e); + struct xe_exec_queue *q = msg->private_data; + struct xe_guc *guc = exec_queue_to_guc(q); - XE_WARN_ON(e->flags & ENGINE_FLAG_KERNEL); - trace_xe_engine_cleanup_entity(e); + XE_WARN_ON(q->flags & EXEC_QUEUE_FLAG_KERNEL); + trace_xe_exec_queue_cleanup_entity(q); - if (engine_registered(e)) - disable_scheduling_deregister(guc, e); + if (exec_queue_registered(q)) + disable_scheduling_deregister(guc, q); else - __guc_engine_fini(guc, e); + __guc_exec_queue_fini(guc, q); } -static bool guc_engine_allowed_to_change_state(struct xe_engine *e) +static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q) { - return !engine_killed_or_banned(e) && engine_registered(e); + return !exec_queue_killed_or_banned(q) && exec_queue_registered(q); } -static void __guc_engine_process_msg_set_sched_props(struct xe_sched_msg *msg) +static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *msg) { - struct xe_engine *e = msg->private_data; - struct xe_guc *guc = engine_to_guc(e); + struct xe_exec_queue *q = msg->private_data; + struct xe_guc *guc = exec_queue_to_guc(q); - if (guc_engine_allowed_to_change_state(e)) - init_policies(guc, e); + if (guc_exec_queue_allowed_to_change_state(q)) + init_policies(guc, q); kfree(msg); } -static void suspend_fence_signal(struct xe_engine *e) +static void suspend_fence_signal(struct xe_exec_queue *q) { - struct xe_guc *guc = engine_to_guc(e); + struct xe_guc *guc = exec_queue_to_guc(q); - XE_WARN_ON(!engine_suspended(e) && !engine_killed(e) && + XE_WARN_ON(!exec_queue_suspended(q) && !exec_queue_killed(q) && !guc_read_stopped(guc)); - XE_WARN_ON(!e->guc->suspend_pending); + XE_WARN_ON(!q->guc->suspend_pending); - e->guc->suspend_pending = false; + q->guc->suspend_pending = false; smp_wmb(); - wake_up(&e->guc->suspend_wait); + wake_up(&q->guc->suspend_wait); } -static void __guc_engine_process_msg_suspend(struct xe_sched_msg *msg) +static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg) { - struct xe_engine *e = msg->private_data; - struct xe_guc *guc = engine_to_guc(e); + struct xe_exec_queue *q = msg->private_data; + struct xe_guc *guc = exec_queue_to_guc(q); - if (guc_engine_allowed_to_change_state(e) && !engine_suspended(e) && - engine_enabled(e)) { - wait_event(guc->ct.wq, e->guc->resume_time != RESUME_PENDING || + if (guc_exec_queue_allowed_to_change_state(q) && !exec_queue_suspended(q) && + exec_queue_enabled(q)) { + wait_event(guc->ct.wq, q->guc->resume_time != RESUME_PENDING || guc_read_stopped(guc)); if (!guc_read_stopped(guc)) { - MAKE_SCHED_CONTEXT_ACTION(e, DISABLE); + MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); s64 since_resume_ms = ktime_ms_delta(ktime_get(), - e->guc->resume_time); - s64 wait_ms = e->vm->preempt.min_run_period_ms - + q->guc->resume_time); + s64 wait_ms = q->vm->preempt.min_run_period_ms - since_resume_ms; - if (wait_ms > 0 && e->guc->resume_time) + if (wait_ms > 0 && q->guc->resume_time) msleep(wait_ms); - set_engine_suspended(e); - clear_engine_enabled(e); - set_engine_pending_disable(e); - trace_xe_engine_scheduling_disable(e); + set_exec_queue_suspended(q); + clear_exec_queue_enabled(q); + set_exec_queue_pending_disable(q); + trace_xe_exec_queue_scheduling_disable(q); xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); } - } else if (e->guc->suspend_pending) { - set_engine_suspended(e); - suspend_fence_signal(e); + } else if (q->guc->suspend_pending) { + set_exec_queue_suspended(q); + suspend_fence_signal(q); } } -static void __guc_engine_process_msg_resume(struct xe_sched_msg *msg) +static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg) { - struct xe_engine *e = msg->private_data; - struct xe_guc *guc = engine_to_guc(e); + struct xe_exec_queue *q = msg->private_data; + struct xe_guc *guc = exec_queue_to_guc(q); - if (guc_engine_allowed_to_change_state(e)) { - MAKE_SCHED_CONTEXT_ACTION(e, ENABLE); + if (guc_exec_queue_allowed_to_change_state(q)) { + MAKE_SCHED_CONTEXT_ACTION(q, ENABLE); - e->guc->resume_time = RESUME_PENDING; - clear_engine_suspended(e); - set_engine_pending_enable(e); - set_engine_enabled(e); - trace_xe_engine_scheduling_enable(e); + q->guc->resume_time = RESUME_PENDING; + clear_exec_queue_suspended(q); + set_exec_queue_pending_enable(q); + set_exec_queue_enabled(q); + trace_xe_exec_queue_scheduling_enable(q); xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); } else { - clear_engine_suspended(e); + clear_exec_queue_suspended(q); } } @@ -1082,22 +1082,22 @@ static void __guc_engine_process_msg_resume(struct xe_sched_msg *msg) #define SUSPEND 3 #define RESUME 4 -static void guc_engine_process_msg(struct xe_sched_msg *msg) +static void guc_exec_queue_process_msg(struct xe_sched_msg *msg) { trace_xe_sched_msg_recv(msg); switch (msg->opcode) { case CLEANUP: - __guc_engine_process_msg_cleanup(msg); + __guc_exec_queue_process_msg_cleanup(msg); break; case SET_SCHED_PROPS: - __guc_engine_process_msg_set_sched_props(msg); + __guc_exec_queue_process_msg_set_sched_props(msg); break; case SUSPEND: - __guc_engine_process_msg_suspend(msg); + __guc_exec_queue_process_msg_suspend(msg); break; case RESUME: - __guc_engine_process_msg_resume(msg); + __guc_exec_queue_process_msg_resume(msg); break; default: XE_WARN_ON("Unknown message type"); @@ -1105,20 +1105,20 @@ static void guc_engine_process_msg(struct xe_sched_msg *msg) } static const struct drm_sched_backend_ops drm_sched_ops = { - .run_job = guc_engine_run_job, - .free_job = guc_engine_free_job, - .timedout_job = guc_engine_timedout_job, + .run_job = guc_exec_queue_run_job, + .free_job = guc_exec_queue_free_job, + .timedout_job = guc_exec_queue_timedout_job, }; static const struct xe_sched_backend_ops xe_sched_ops = { - .process_msg = guc_engine_process_msg, + .process_msg = guc_exec_queue_process_msg, }; -static int guc_engine_init(struct xe_engine *e) +static int guc_exec_queue_init(struct xe_exec_queue *q) { struct xe_gpu_scheduler *sched; - struct xe_guc *guc = engine_to_guc(e); - struct xe_guc_engine *ge; + struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_guc_exec_queue *ge; long timeout; int err; @@ -1128,15 +1128,15 @@ static int guc_engine_init(struct xe_engine *e) if (!ge) return -ENOMEM; - e->guc = ge; - ge->engine = e; + q->guc = ge; + ge->q = q; init_waitqueue_head(&ge->suspend_wait); - timeout = xe_vm_no_dma_fences(e->vm) ? MAX_SCHEDULE_TIMEOUT : HZ * 5; + timeout = xe_vm_no_dma_fences(q->vm) ? MAX_SCHEDULE_TIMEOUT : HZ * 5; err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, NULL, - e->lrc[0].ring.size / MAX_JOB_SIZE_BYTES, + q->lrc[0].ring.size / MAX_JOB_SIZE_BYTES, 64, timeout, guc_to_gt(guc)->ordered_wq, NULL, - e->name, gt_to_xe(e->gt)->drm.dev); + q->name, gt_to_xe(q->gt)->drm.dev); if (err) goto err_free; @@ -1144,45 +1144,45 @@ static int guc_engine_init(struct xe_engine *e) err = xe_sched_entity_init(&ge->entity, sched); if (err) goto err_sched; - e->priority = XE_ENGINE_PRIORITY_NORMAL; + q->priority = XE_EXEC_QUEUE_PRIORITY_NORMAL; - if (xe_engine_is_lr(e)) - INIT_WORK(&e->guc->lr_tdr, xe_guc_engine_lr_cleanup); + if (xe_exec_queue_is_lr(q)) + INIT_WORK(&q->guc->lr_tdr, xe_guc_exec_queue_lr_cleanup); mutex_lock(&guc->submission_state.lock); - err = alloc_guc_id(guc, e); + err = alloc_guc_id(guc, q); if (err) goto err_entity; - e->entity = &ge->entity; + q->entity = &ge->entity; if (guc_read_stopped(guc)) xe_sched_stop(sched); mutex_unlock(&guc->submission_state.lock); - switch (e->class) { + switch (q->class) { case XE_ENGINE_CLASS_RENDER: - sprintf(e->name, "rcs%d", e->guc->id); + sprintf(q->name, "rcs%d", q->guc->id); break; case XE_ENGINE_CLASS_VIDEO_DECODE: - sprintf(e->name, "vcs%d", e->guc->id); + sprintf(q->name, "vcs%d", q->guc->id); break; case XE_ENGINE_CLASS_VIDEO_ENHANCE: - sprintf(e->name, "vecs%d", e->guc->id); + sprintf(q->name, "vecs%d", q->guc->id); break; case XE_ENGINE_CLASS_COPY: - sprintf(e->name, "bcs%d", e->guc->id); + sprintf(q->name, "bcs%d", q->guc->id); break; case XE_ENGINE_CLASS_COMPUTE: - sprintf(e->name, "ccs%d", e->guc->id); + sprintf(q->name, "ccs%d", q->guc->id); break; default: - XE_WARN_ON(e->class); + XE_WARN_ON(q->class); } - trace_xe_engine_create(e); + trace_xe_exec_queue_create(q); return 0; @@ -1196,133 +1196,133 @@ err_free: return err; } -static void guc_engine_kill(struct xe_engine *e) +static void guc_exec_queue_kill(struct xe_exec_queue *q) { - trace_xe_engine_kill(e); - set_engine_killed(e); - xe_guc_engine_trigger_cleanup(e); + trace_xe_exec_queue_kill(q); + set_exec_queue_killed(q); + xe_guc_exec_queue_trigger_cleanup(q); } -static void guc_engine_add_msg(struct xe_engine *e, struct xe_sched_msg *msg, - u32 opcode) +static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg, + u32 opcode) { INIT_LIST_HEAD(&msg->link); msg->opcode = opcode; - msg->private_data = e; + msg->private_data = q; trace_xe_sched_msg_add(msg); - xe_sched_add_msg(&e->guc->sched, msg); + xe_sched_add_msg(&q->guc->sched, msg); } #define STATIC_MSG_CLEANUP 0 #define STATIC_MSG_SUSPEND 1 #define STATIC_MSG_RESUME 2 -static void guc_engine_fini(struct xe_engine *e) +static void guc_exec_queue_fini(struct xe_exec_queue *q) { - struct xe_sched_msg *msg = e->guc->static_msgs + STATIC_MSG_CLEANUP; + struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; - if (!(e->flags & ENGINE_FLAG_KERNEL)) - guc_engine_add_msg(e, msg, CLEANUP); + if (!(q->flags & EXEC_QUEUE_FLAG_KERNEL)) + guc_exec_queue_add_msg(q, msg, CLEANUP); else - __guc_engine_fini(engine_to_guc(e), e); + __guc_exec_queue_fini(exec_queue_to_guc(q), q); } -static int guc_engine_set_priority(struct xe_engine *e, - enum xe_engine_priority priority) +static int guc_exec_queue_set_priority(struct xe_exec_queue *q, + enum xe_exec_queue_priority priority) { struct xe_sched_msg *msg; - if (e->priority == priority || engine_killed_or_banned(e)) + if (q->priority == priority || exec_queue_killed_or_banned(q)) return 0; msg = kmalloc(sizeof(*msg), GFP_KERNEL); if (!msg) return -ENOMEM; - guc_engine_add_msg(e, msg, SET_SCHED_PROPS); - e->priority = priority; + guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); + q->priority = priority; return 0; } -static int guc_engine_set_timeslice(struct xe_engine *e, u32 timeslice_us) +static int guc_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us) { struct xe_sched_msg *msg; - if (e->sched_props.timeslice_us == timeslice_us || - engine_killed_or_banned(e)) + if (q->sched_props.timeslice_us == timeslice_us || + exec_queue_killed_or_banned(q)) return 0; msg = kmalloc(sizeof(*msg), GFP_KERNEL); if (!msg) return -ENOMEM; - e->sched_props.timeslice_us = timeslice_us; - guc_engine_add_msg(e, msg, SET_SCHED_PROPS); + q->sched_props.timeslice_us = timeslice_us; + guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); return 0; } -static int guc_engine_set_preempt_timeout(struct xe_engine *e, - u32 preempt_timeout_us) +static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q, + u32 preempt_timeout_us) { struct xe_sched_msg *msg; - if (e->sched_props.preempt_timeout_us == preempt_timeout_us || - engine_killed_or_banned(e)) + if (q->sched_props.preempt_timeout_us == preempt_timeout_us || + exec_queue_killed_or_banned(q)) return 0; msg = kmalloc(sizeof(*msg), GFP_KERNEL); if (!msg) return -ENOMEM; - e->sched_props.preempt_timeout_us = preempt_timeout_us; - guc_engine_add_msg(e, msg, SET_SCHED_PROPS); + q->sched_props.preempt_timeout_us = preempt_timeout_us; + guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); return 0; } -static int guc_engine_set_job_timeout(struct xe_engine *e, u32 job_timeout_ms) +static int guc_exec_queue_set_job_timeout(struct xe_exec_queue *q, u32 job_timeout_ms) { - struct xe_gpu_scheduler *sched = &e->guc->sched; + struct xe_gpu_scheduler *sched = &q->guc->sched; - XE_WARN_ON(engine_registered(e)); - XE_WARN_ON(engine_banned(e)); - XE_WARN_ON(engine_killed(e)); + XE_WARN_ON(exec_queue_registered(q)); + XE_WARN_ON(exec_queue_banned(q)); + XE_WARN_ON(exec_queue_killed(q)); sched->base.timeout = job_timeout_ms; return 0; } -static int guc_engine_suspend(struct xe_engine *e) +static int guc_exec_queue_suspend(struct xe_exec_queue *q) { - struct xe_sched_msg *msg = e->guc->static_msgs + STATIC_MSG_SUSPEND; + struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; - if (engine_killed_or_banned(e) || e->guc->suspend_pending) + if (exec_queue_killed_or_banned(q) || q->guc->suspend_pending) return -EINVAL; - e->guc->suspend_pending = true; - guc_engine_add_msg(e, msg, SUSPEND); + q->guc->suspend_pending = true; + guc_exec_queue_add_msg(q, msg, SUSPEND); return 0; } -static void guc_engine_suspend_wait(struct xe_engine *e) +static void guc_exec_queue_suspend_wait(struct xe_exec_queue *q) { - struct xe_guc *guc = engine_to_guc(e); + struct xe_guc *guc = exec_queue_to_guc(q); - wait_event(e->guc->suspend_wait, !e->guc->suspend_pending || + wait_event(q->guc->suspend_wait, !q->guc->suspend_pending || guc_read_stopped(guc)); } -static void guc_engine_resume(struct xe_engine *e) +static void guc_exec_queue_resume(struct xe_exec_queue *q) { - struct xe_sched_msg *msg = e->guc->static_msgs + STATIC_MSG_RESUME; + struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME; - XE_WARN_ON(e->guc->suspend_pending); + XE_WARN_ON(q->guc->suspend_pending); - guc_engine_add_msg(e, msg, RESUME); + guc_exec_queue_add_msg(q, msg, RESUME); } /* @@ -1331,49 +1331,49 @@ static void guc_engine_resume(struct xe_engine *e) * really shouldn't do much other than trap into the DRM scheduler which * synchronizes these operations. */ -static const struct xe_engine_ops guc_engine_ops = { - .init = guc_engine_init, - .kill = guc_engine_kill, - .fini = guc_engine_fini, - .set_priority = guc_engine_set_priority, - .set_timeslice = guc_engine_set_timeslice, - .set_preempt_timeout = guc_engine_set_preempt_timeout, - .set_job_timeout = guc_engine_set_job_timeout, - .suspend = guc_engine_suspend, - .suspend_wait = guc_engine_suspend_wait, - .resume = guc_engine_resume, +static const struct xe_exec_queue_ops guc_exec_queue_ops = { + .init = guc_exec_queue_init, + .kill = guc_exec_queue_kill, + .fini = guc_exec_queue_fini, + .set_priority = guc_exec_queue_set_priority, + .set_timeslice = guc_exec_queue_set_timeslice, + .set_preempt_timeout = guc_exec_queue_set_preempt_timeout, + .set_job_timeout = guc_exec_queue_set_job_timeout, + .suspend = guc_exec_queue_suspend, + .suspend_wait = guc_exec_queue_suspend_wait, + .resume = guc_exec_queue_resume, }; -static void guc_engine_stop(struct xe_guc *guc, struct xe_engine *e) +static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) { - struct xe_gpu_scheduler *sched = &e->guc->sched; + struct xe_gpu_scheduler *sched = &q->guc->sched; /* Stop scheduling + flush any DRM scheduler operations */ xe_sched_submission_stop(sched); /* Clean up lost G2H + reset engine state */ - if (engine_registered(e)) { - if ((engine_banned(e) && engine_destroyed(e)) || - xe_engine_is_lr(e)) - xe_engine_put(e); - else if (engine_destroyed(e)) - __guc_engine_fini(guc, e); + if (exec_queue_registered(q)) { + if ((exec_queue_banned(q) && exec_queue_destroyed(q)) || + xe_exec_queue_is_lr(q)) + xe_exec_queue_put(q); + else if (exec_queue_destroyed(q)) + __guc_exec_queue_fini(guc, q); } - if (e->guc->suspend_pending) { - set_engine_suspended(e); - suspend_fence_signal(e); + if (q->guc->suspend_pending) { + set_exec_queue_suspended(q); + suspend_fence_signal(q); } - atomic_and(ENGINE_STATE_DESTROYED | ENGINE_STATE_SUSPENDED, - &e->guc->state); - e->guc->resume_time = 0; - trace_xe_engine_stop(e); + atomic_and(EXEC_QUEUE_STATE_DESTROYED | ENGINE_STATE_SUSPENDED, + &q->guc->state); + q->guc->resume_time = 0; + trace_xe_exec_queue_stop(q); /* * Ban any engine (aside from kernel and engines used for VM ops) with a * started but not complete job or if a job has gone through a GT reset * more than twice. */ - if (!(e->flags & (ENGINE_FLAG_KERNEL | ENGINE_FLAG_VM))) { + if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) { struct xe_sched_job *job = xe_sched_first_pending_job(sched); if (job) { @@ -1381,8 +1381,8 @@ static void guc_engine_stop(struct xe_guc *guc, struct xe_engine *e) !xe_sched_job_completed(job)) || xe_sched_invalidate_job(job, 2)) { trace_xe_sched_job_ban(job); - xe_sched_tdr_queue_imm(&e->guc->sched); - set_engine_banned(e); + xe_sched_tdr_queue_imm(&q->guc->sched); + set_exec_queue_banned(q); } } } @@ -1413,15 +1413,15 @@ void xe_guc_submit_reset_wait(struct xe_guc *guc) int xe_guc_submit_stop(struct xe_guc *guc) { - struct xe_engine *e; + struct xe_exec_queue *q; unsigned long index; XE_WARN_ON(guc_read_stopped(guc) != 1); mutex_lock(&guc->submission_state.lock); - xa_for_each(&guc->submission_state.engine_lookup, index, e) - guc_engine_stop(guc, e); + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) + guc_exec_queue_stop(guc, q); mutex_unlock(&guc->submission_state.lock); @@ -1433,16 +1433,16 @@ int xe_guc_submit_stop(struct xe_guc *guc) return 0; } -static void guc_engine_start(struct xe_engine *e) +static void guc_exec_queue_start(struct xe_exec_queue *q) { - struct xe_gpu_scheduler *sched = &e->guc->sched; + struct xe_gpu_scheduler *sched = &q->guc->sched; - if (!engine_killed_or_banned(e)) { + if (!exec_queue_killed_or_banned(q)) { int i; - trace_xe_engine_resubmit(e); - for (i = 0; i < e->width; ++i) - xe_lrc_set_ring_head(e->lrc + i, e->lrc[i].ring.tail); + trace_xe_exec_queue_resubmit(q); + for (i = 0; i < q->width; ++i) + xe_lrc_set_ring_head(q->lrc + i, q->lrc[i].ring.tail); xe_sched_resubmit_jobs(sched); } @@ -1451,15 +1451,15 @@ static void guc_engine_start(struct xe_engine *e) int xe_guc_submit_start(struct xe_guc *guc) { - struct xe_engine *e; + struct xe_exec_queue *q; unsigned long index; XE_WARN_ON(guc_read_stopped(guc) != 1); mutex_lock(&guc->submission_state.lock); atomic_dec(&guc->submission_state.stopped); - xa_for_each(&guc->submission_state.engine_lookup, index, e) - guc_engine_start(e); + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) + guc_exec_queue_start(q); mutex_unlock(&guc->submission_state.lock); wake_up_all(&guc->ct.wq); @@ -1467,36 +1467,36 @@ int xe_guc_submit_start(struct xe_guc *guc) return 0; } -static struct xe_engine * -g2h_engine_lookup(struct xe_guc *guc, u32 guc_id) +static struct xe_exec_queue * +g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id) { struct xe_device *xe = guc_to_xe(guc); - struct xe_engine *e; + struct xe_exec_queue *q; if (unlikely(guc_id >= GUC_ID_MAX)) { drm_err(&xe->drm, "Invalid guc_id %u", guc_id); return NULL; } - e = xa_load(&guc->submission_state.engine_lookup, guc_id); - if (unlikely(!e)) { + q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id); + if (unlikely(!q)) { drm_err(&xe->drm, "Not engine present for guc_id %u", guc_id); return NULL; } - XE_WARN_ON(e->guc->id != guc_id); + XE_WARN_ON(q->guc->id != guc_id); - return e; + return q; } -static void deregister_engine(struct xe_guc *guc, struct xe_engine *e) +static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q) { u32 action[] = { XE_GUC_ACTION_DEREGISTER_CONTEXT, - e->guc->id, + q->guc->id, }; - trace_xe_engine_deregister(e); + trace_xe_exec_queue_deregister(q); xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action)); } @@ -1504,7 +1504,7 @@ static void deregister_engine(struct xe_guc *guc, struct xe_engine *e) int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) { struct xe_device *xe = guc_to_xe(guc); - struct xe_engine *e; + struct xe_exec_queue *q; u32 guc_id = msg[0]; if (unlikely(len < 2)) { @@ -1512,34 +1512,34 @@ int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) return -EPROTO; } - e = g2h_engine_lookup(guc, guc_id); - if (unlikely(!e)) + q = g2h_exec_queue_lookup(guc, guc_id); + if (unlikely(!q)) return -EPROTO; - if (unlikely(!engine_pending_enable(e) && - !engine_pending_disable(e))) { + if (unlikely(!exec_queue_pending_enable(q) && + !exec_queue_pending_disable(q))) { drm_err(&xe->drm, "Unexpected engine state 0x%04x", - atomic_read(&e->guc->state)); + atomic_read(&q->guc->state)); return -EPROTO; } - trace_xe_engine_scheduling_done(e); + trace_xe_exec_queue_scheduling_done(q); - if (engine_pending_enable(e)) { - e->guc->resume_time = ktime_get(); - clear_engine_pending_enable(e); + if (exec_queue_pending_enable(q)) { + q->guc->resume_time = ktime_get(); + clear_exec_queue_pending_enable(q); smp_wmb(); wake_up_all(&guc->ct.wq); } else { - clear_engine_pending_disable(e); - if (e->guc->suspend_pending) { - suspend_fence_signal(e); + clear_exec_queue_pending_disable(q); + if (q->guc->suspend_pending) { + suspend_fence_signal(q); } else { - if (engine_banned(e)) { + if (exec_queue_banned(q)) { smp_wmb(); wake_up_all(&guc->ct.wq); } - deregister_engine(guc, e); + deregister_exec_queue(guc, q); } } @@ -1549,7 +1549,7 @@ int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) { struct xe_device *xe = guc_to_xe(guc); - struct xe_engine *e; + struct xe_exec_queue *q; u32 guc_id = msg[0]; if (unlikely(len < 1)) { @@ -1557,33 +1557,33 @@ int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) return -EPROTO; } - e = g2h_engine_lookup(guc, guc_id); - if (unlikely(!e)) + q = g2h_exec_queue_lookup(guc, guc_id); + if (unlikely(!q)) return -EPROTO; - if (!engine_destroyed(e) || engine_pending_disable(e) || - engine_pending_enable(e) || engine_enabled(e)) { + if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) || + exec_queue_pending_enable(q) || exec_queue_enabled(q)) { drm_err(&xe->drm, "Unexpected engine state 0x%04x", - atomic_read(&e->guc->state)); + atomic_read(&q->guc->state)); return -EPROTO; } - trace_xe_engine_deregister_done(e); + trace_xe_exec_queue_deregister_done(q); - clear_engine_registered(e); + clear_exec_queue_registered(q); - if (engine_banned(e) || xe_engine_is_lr(e)) - xe_engine_put(e); + if (exec_queue_banned(q) || xe_exec_queue_is_lr(q)) + xe_exec_queue_put(q); else - __guc_engine_fini(guc, e); + __guc_exec_queue_fini(guc, q); return 0; } -int xe_guc_engine_reset_handler(struct xe_guc *guc, u32 *msg, u32 len) +int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len) { struct xe_device *xe = guc_to_xe(guc); - struct xe_engine *e; + struct xe_exec_queue *q; u32 guc_id = msg[0]; if (unlikely(len < 1)) { @@ -1591,34 +1591,34 @@ int xe_guc_engine_reset_handler(struct xe_guc *guc, u32 *msg, u32 len) return -EPROTO; } - e = g2h_engine_lookup(guc, guc_id); - if (unlikely(!e)) + q = g2h_exec_queue_lookup(guc, guc_id); + if (unlikely(!q)) return -EPROTO; drm_info(&xe->drm, "Engine reset: guc_id=%d", guc_id); /* FIXME: Do error capture, most likely async */ - trace_xe_engine_reset(e); + trace_xe_exec_queue_reset(q); /* * A banned engine is a NOP at this point (came from - * guc_engine_timedout_job). Otherwise, kick drm scheduler to cancel + * guc_exec_queue_timedout_job). Otherwise, kick drm scheduler to cancel * jobs by setting timeout of the job to the minimum value kicking - * guc_engine_timedout_job. + * guc_exec_queue_timedout_job. */ - set_engine_reset(e); - if (!engine_banned(e)) - xe_guc_engine_trigger_cleanup(e); + set_exec_queue_reset(q); + if (!exec_queue_banned(q)) + xe_guc_exec_queue_trigger_cleanup(q); return 0; } -int xe_guc_engine_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, - u32 len) +int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, + u32 len) { struct xe_device *xe = guc_to_xe(guc); - struct xe_engine *e; + struct xe_exec_queue *q; u32 guc_id = msg[0]; if (unlikely(len < 1)) { @@ -1626,22 +1626,22 @@ int xe_guc_engine_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, return -EPROTO; } - e = g2h_engine_lookup(guc, guc_id); - if (unlikely(!e)) + q = g2h_exec_queue_lookup(guc, guc_id); + if (unlikely(!q)) return -EPROTO; drm_warn(&xe->drm, "Engine memory cat error: guc_id=%d", guc_id); - trace_xe_engine_memory_cat_error(e); + trace_xe_exec_queue_memory_cat_error(q); /* Treat the same as engine reset */ - set_engine_reset(e); - if (!engine_banned(e)) - xe_guc_engine_trigger_cleanup(e); + set_exec_queue_reset(q); + if (!exec_queue_banned(q)) + xe_guc_exec_queue_trigger_cleanup(q); return 0; } -int xe_guc_engine_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len) +int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len) { struct xe_device *xe = guc_to_xe(guc); u8 guc_class, instance; @@ -1666,16 +1666,16 @@ int xe_guc_engine_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len) } static void -guc_engine_wq_snapshot_capture(struct xe_engine *e, - struct xe_guc_submit_engine_snapshot *snapshot) +guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue *q, + struct xe_guc_submit_exec_queue_snapshot *snapshot) { - struct xe_guc *guc = engine_to_guc(e); + struct xe_guc *guc = exec_queue_to_guc(q); struct xe_device *xe = guc_to_xe(guc); - struct iosys_map map = xe_lrc_parallel_map(e->lrc); + struct iosys_map map = xe_lrc_parallel_map(q->lrc); int i; - snapshot->guc.wqi_head = e->guc->wqi_head; - snapshot->guc.wqi_tail = e->guc->wqi_tail; + snapshot->guc.wqi_head = q->guc->wqi_head; + snapshot->guc.wqi_tail = q->guc->wqi_tail; snapshot->parallel.wq_desc.head = parallel_read(xe, map, wq_desc.head); snapshot->parallel.wq_desc.tail = parallel_read(xe, map, wq_desc.tail); snapshot->parallel.wq_desc.status = parallel_read(xe, map, @@ -1692,8 +1692,8 @@ guc_engine_wq_snapshot_capture(struct xe_engine *e, } static void -guc_engine_wq_snapshot_print(struct xe_guc_submit_engine_snapshot *snapshot, - struct drm_printer *p) +guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, + struct drm_printer *p) { int i; @@ -1714,23 +1714,23 @@ guc_engine_wq_snapshot_print(struct xe_guc_submit_engine_snapshot *snapshot, } /** - * xe_guc_engine_snapshot_capture - Take a quick snapshot of the GuC Engine. - * @e: Xe Engine. + * xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine. + * @q: Xe exec queue. * * This can be printed out in a later stage like during dev_coredump * analysis. * * Returns: a GuC Submit Engine snapshot object that must be freed by the - * caller, using `xe_guc_engine_snapshot_free`. + * caller, using `xe_guc_exec_queue_snapshot_free`. */ -struct xe_guc_submit_engine_snapshot * -xe_guc_engine_snapshot_capture(struct xe_engine *e) +struct xe_guc_submit_exec_queue_snapshot * +xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q) { - struct xe_guc *guc = engine_to_guc(e); + struct xe_guc *guc = exec_queue_to_guc(q); struct xe_device *xe = guc_to_xe(guc); - struct xe_gpu_scheduler *sched = &e->guc->sched; + struct xe_gpu_scheduler *sched = &q->guc->sched; struct xe_sched_job *job; - struct xe_guc_submit_engine_snapshot *snapshot; + struct xe_guc_submit_exec_queue_snapshot *snapshot; int i; snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC); @@ -1740,25 +1740,25 @@ xe_guc_engine_snapshot_capture(struct xe_engine *e) return NULL; } - snapshot->guc.id = e->guc->id; - memcpy(&snapshot->name, &e->name, sizeof(snapshot->name)); - snapshot->class = e->class; - snapshot->logical_mask = e->logical_mask; - snapshot->width = e->width; - snapshot->refcount = kref_read(&e->refcount); + snapshot->guc.id = q->guc->id; + memcpy(&snapshot->name, &q->name, sizeof(snapshot->name)); + snapshot->class = q->class; + snapshot->logical_mask = q->logical_mask; + snapshot->width = q->width; + snapshot->refcount = kref_read(&q->refcount); snapshot->sched_timeout = sched->base.timeout; - snapshot->sched_props.timeslice_us = e->sched_props.timeslice_us; + snapshot->sched_props.timeslice_us = q->sched_props.timeslice_us; snapshot->sched_props.preempt_timeout_us = - e->sched_props.preempt_timeout_us; + q->sched_props.preempt_timeout_us; - snapshot->lrc = kmalloc_array(e->width, sizeof(struct lrc_snapshot), + snapshot->lrc = kmalloc_array(q->width, sizeof(struct lrc_snapshot), GFP_ATOMIC); if (!snapshot->lrc) { drm_err(&xe->drm, "Skipping GuC Engine LRC snapshot.\n"); } else { - for (i = 0; i < e->width; ++i) { - struct xe_lrc *lrc = e->lrc + i; + for (i = 0; i < q->width; ++i) { + struct xe_lrc *lrc = q->lrc + i; snapshot->lrc[i].context_desc = lower_32_bits(xe_lrc_ggtt_addr(lrc)); @@ -1771,12 +1771,12 @@ xe_guc_engine_snapshot_capture(struct xe_engine *e) } } - snapshot->schedule_state = atomic_read(&e->guc->state); - snapshot->engine_flags = e->flags; + snapshot->schedule_state = atomic_read(&q->guc->state); + snapshot->exec_queue_flags = q->flags; - snapshot->parallel_execution = xe_engine_is_parallel(e); + snapshot->parallel_execution = xe_exec_queue_is_parallel(q); if (snapshot->parallel_execution) - guc_engine_wq_snapshot_capture(e, snapshot); + guc_exec_queue_wq_snapshot_capture(q, snapshot); spin_lock(&sched->base.job_list_lock); snapshot->pending_list_size = list_count_nodes(&sched->base.pending_list); @@ -1806,15 +1806,15 @@ xe_guc_engine_snapshot_capture(struct xe_engine *e) } /** - * xe_guc_engine_snapshot_print - Print out a given GuC Engine snapshot. + * xe_guc_exec_queue_snapshot_print - Print out a given GuC Engine snapshot. * @snapshot: GuC Submit Engine snapshot object. * @p: drm_printer where it will be printed out. * * This function prints out a given GuC Submit Engine snapshot object. */ void -xe_guc_engine_snapshot_print(struct xe_guc_submit_engine_snapshot *snapshot, - struct drm_printer *p) +xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, + struct drm_printer *p) { int i; @@ -1846,10 +1846,10 @@ xe_guc_engine_snapshot_print(struct xe_guc_submit_engine_snapshot *snapshot, drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->lrc[i].seqno); } drm_printf(p, "\tSchedule State: 0x%x\n", snapshot->schedule_state); - drm_printf(p, "\tFlags: 0x%lx\n", snapshot->engine_flags); + drm_printf(p, "\tFlags: 0x%lx\n", snapshot->exec_queue_flags); if (snapshot->parallel_execution) - guc_engine_wq_snapshot_print(snapshot, p); + guc_exec_queue_wq_snapshot_print(snapshot, p); for (i = 0; snapshot->pending_list && i < snapshot->pending_list_size; i++) @@ -1860,14 +1860,14 @@ xe_guc_engine_snapshot_print(struct xe_guc_submit_engine_snapshot *snapshot, } /** - * xe_guc_engine_snapshot_free - Free all allocated objects for a given + * xe_guc_exec_queue_snapshot_free - Free all allocated objects for a given * snapshot. * @snapshot: GuC Submit Engine snapshot object. * * This function free all the memory that needed to be allocated at capture * time. */ -void xe_guc_engine_snapshot_free(struct xe_guc_submit_engine_snapshot *snapshot) +void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot) { if (!snapshot) return; @@ -1877,13 +1877,13 @@ void xe_guc_engine_snapshot_free(struct xe_guc_submit_engine_snapshot *snapshot) kfree(snapshot); } -static void guc_engine_print(struct xe_engine *e, struct drm_printer *p) +static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p) { - struct xe_guc_submit_engine_snapshot *snapshot; + struct xe_guc_submit_exec_queue_snapshot *snapshot; - snapshot = xe_guc_engine_snapshot_capture(e); - xe_guc_engine_snapshot_print(snapshot, p); - xe_guc_engine_snapshot_free(snapshot); + snapshot = xe_guc_exec_queue_snapshot_capture(q); + xe_guc_exec_queue_snapshot_print(snapshot, p); + xe_guc_exec_queue_snapshot_free(snapshot); } /** @@ -1895,14 +1895,14 @@ static void guc_engine_print(struct xe_engine *e, struct drm_printer *p) */ void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p) { - struct xe_engine *e; + struct xe_exec_queue *q; unsigned long index; if (!xe_device_guc_submission_enabled(guc_to_xe(guc))) return; mutex_lock(&guc->submission_state.lock); - xa_for_each(&guc->submission_state.engine_lookup, index, e) - guc_engine_print(e, p); + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) + guc_exec_queue_print(q, p); mutex_unlock(&guc->submission_state.lock); } diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h index 4153c2d22013..fc97869c5b86 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.h +++ b/drivers/gpu/drm/xe/xe_guc_submit.h @@ -9,7 +9,7 @@ #include struct drm_printer; -struct xe_engine; +struct xe_exec_queue; struct xe_guc; int xe_guc_submit_init(struct xe_guc *guc); @@ -21,18 +21,18 @@ int xe_guc_submit_start(struct xe_guc *guc); int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len); int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len); -int xe_guc_engine_reset_handler(struct xe_guc *guc, u32 *msg, u32 len); -int xe_guc_engine_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, - u32 len); -int xe_guc_engine_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len); +int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len); +int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, + u32 len); +int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len); -struct xe_guc_submit_engine_snapshot * -xe_guc_engine_snapshot_capture(struct xe_engine *e); +struct xe_guc_submit_exec_queue_snapshot * +xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q); void -xe_guc_engine_snapshot_print(struct xe_guc_submit_engine_snapshot *snapshot, - struct drm_printer *p); +xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, + struct drm_printer *p); void -xe_guc_engine_snapshot_free(struct xe_guc_submit_engine_snapshot *snapshot); +xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot); void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p); #endif diff --git a/drivers/gpu/drm/xe/xe_guc_submit_types.h b/drivers/gpu/drm/xe/xe_guc_submit_types.h index 6765b2c6eab1..649b0a852692 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit_types.h +++ b/drivers/gpu/drm/xe/xe_guc_submit_types.h @@ -79,20 +79,20 @@ struct pending_list_snapshot { }; /** - * struct xe_guc_submit_engine_snapshot - Snapshot for devcoredump + * struct xe_guc_submit_exec_queue_snapshot - Snapshot for devcoredump */ -struct xe_guc_submit_engine_snapshot { - /** @name: name of this engine */ +struct xe_guc_submit_exec_queue_snapshot { + /** @name: name of this exec queue */ char name[MAX_FENCE_NAME_LEN]; - /** @class: class of this engine */ + /** @class: class of this exec queue */ enum xe_engine_class class; /** - * @logical_mask: logical mask of where job submitted to engine can run + * @logical_mask: logical mask of where job submitted to exec queue can run */ u32 logical_mask; - /** @width: width (number BB submitted per exec) of this engine */ + /** @width: width (number BB submitted per exec) of this exec queue */ u16 width; - /** @refcount: ref count of this engine */ + /** @refcount: ref count of this exec queue */ u32 refcount; /** * @sched_timeout: the time after which a job is removed from the @@ -113,8 +113,8 @@ struct xe_guc_submit_engine_snapshot { /** @schedule_state: Schedule State at the moment of Crash */ u32 schedule_state; - /** @engine_flags: Flags of the faulty engine */ - unsigned long engine_flags; + /** @exec_queue_flags: Flags of the faulty exec_queue */ + unsigned long exec_queue_flags; /** @guc: GuC Engine Snapshot */ struct { @@ -122,7 +122,7 @@ struct xe_guc_submit_engine_snapshot { u32 wqi_head; /** @wqi_tail: work queue item tail */ u32 wqi_tail; - /** @id: GuC id for this xe_engine */ + /** @id: GuC id for this exec_queue */ u16 id; } guc; diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h index a304dce4e9f4..a5e58917a499 100644 --- a/drivers/gpu/drm/xe/xe_guc_types.h +++ b/drivers/gpu/drm/xe/xe_guc_types.h @@ -33,8 +33,8 @@ struct xe_guc { struct xe_guc_pc pc; /** @submission_state: GuC submission state */ struct { - /** @engine_lookup: Lookup an xe_engine from guc_id */ - struct xarray engine_lookup; + /** @exec_queue_lookup: Lookup an xe_engine from guc_id */ + struct xarray exec_queue_lookup; /** @guc_ids: used to allocate new guc_ids, single-lrc */ struct ida guc_ids; /** @guc_ids_bitmap: used to allocate new guc_ids, multi-lrc */ diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 05f3d8d68379..09db8da261a3 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -12,7 +12,7 @@ #include "regs/xe_regs.h" #include "xe_bo.h" #include "xe_device.h" -#include "xe_engine_types.h" +#include "xe_exec_queue_types.h" #include "xe_gt.h" #include "xe_hw_fence.h" #include "xe_map.h" @@ -604,7 +604,7 @@ static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm) #define ACC_NOTIFY_S 16 int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, - struct xe_engine *e, struct xe_vm *vm, u32 ring_size) + struct xe_exec_queue *q, struct xe_vm *vm, u32 ring_size) { struct xe_gt *gt = hwe->gt; struct xe_tile *tile = gt_to_tile(gt); @@ -669,12 +669,12 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, RING_CTL_SIZE(lrc->ring.size) | RING_VALID); if (xe->info.has_asid && vm) xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, - (e->usm.acc_granularity << + (q->usm.acc_granularity << ACC_GRANULARITY_S) | vm->usm.asid); if (xe->info.supports_usm && vm) xe_lrc_write_ctx_reg(lrc, PVC_CTX_ACC_CTR_THOLD, - (e->usm.acc_notify << ACC_NOTIFY_S) | - e->usm.acc_trigger); + (q->usm.acc_notify << ACC_NOTIFY_S) | + q->usm.acc_trigger); lrc->desc = GEN8_CTX_VALID; lrc->desc |= INTEL_LEGACY_64B_CONTEXT << GEN8_CTX_ADDRESSING_MODE_SHIFT; diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index e37f89e75ef8..3a6e8fc5a837 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -8,7 +8,7 @@ #include "xe_lrc_types.h" struct xe_device; -struct xe_engine; +struct xe_exec_queue; enum xe_engine_class; struct xe_hw_engine; struct xe_vm; @@ -16,7 +16,7 @@ struct xe_vm; #define LRC_PPHWSP_SCRATCH_ADDR (0x34 * 4) int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, - struct xe_engine *e, struct xe_vm *vm, u32 ring_size); + struct xe_exec_queue *q, struct xe_vm *vm, u32 ring_size); void xe_lrc_finish(struct xe_lrc *lrc); size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class); diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 60f7226c92ff..d0816d2090f0 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -34,8 +34,8 @@ * struct xe_migrate - migrate context. */ struct xe_migrate { - /** @eng: Default engine used for migration */ - struct xe_engine *eng; + /** @q: Default exec queue used for migration */ + struct xe_exec_queue *q; /** @tile: Backpointer to the tile this struct xe_migrate belongs to. */ struct xe_tile *tile; /** @job_mutex: Timeline mutex for @eng. */ @@ -78,9 +78,9 @@ struct xe_migrate { * * Return: The default migrate engine */ -struct xe_engine *xe_tile_migrate_engine(struct xe_tile *tile) +struct xe_exec_queue *xe_tile_migrate_engine(struct xe_tile *tile) { - return tile->migrate->eng; + return tile->migrate->q; } static void xe_migrate_fini(struct drm_device *dev, void *arg) @@ -88,11 +88,11 @@ static void xe_migrate_fini(struct drm_device *dev, void *arg) struct xe_migrate *m = arg; struct ww_acquire_ctx ww; - xe_vm_lock(m->eng->vm, &ww, 0, false); + xe_vm_lock(m->q->vm, &ww, 0, false); xe_bo_unpin(m->pt_bo); if (m->cleared_bo) xe_bo_unpin(m->cleared_bo); - xe_vm_unlock(m->eng->vm, &ww); + xe_vm_unlock(m->q->vm, &ww); dma_fence_put(m->fence); if (m->cleared_bo) @@ -100,8 +100,8 @@ static void xe_migrate_fini(struct drm_device *dev, void *arg) xe_bo_put(m->pt_bo); drm_suballoc_manager_fini(&m->vm_update_sa); mutex_destroy(&m->job_mutex); - xe_vm_close_and_put(m->eng->vm); - xe_engine_put(m->eng); + xe_vm_close_and_put(m->q->vm); + xe_exec_queue_put(m->q); } static u64 xe_migrate_vm_addr(u64 slot, u32 level) @@ -341,20 +341,20 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) if (!hwe) return ERR_PTR(-EINVAL); - m->eng = xe_engine_create(xe, vm, - BIT(hwe->logical_instance), 1, - hwe, ENGINE_FLAG_KERNEL); + m->q = xe_exec_queue_create(xe, vm, + BIT(hwe->logical_instance), 1, + hwe, EXEC_QUEUE_FLAG_KERNEL); } else { - m->eng = xe_engine_create_class(xe, primary_gt, vm, - XE_ENGINE_CLASS_COPY, - ENGINE_FLAG_KERNEL); + m->q = xe_exec_queue_create_class(xe, primary_gt, vm, + XE_ENGINE_CLASS_COPY, + EXEC_QUEUE_FLAG_KERNEL); } - if (IS_ERR(m->eng)) { + if (IS_ERR(m->q)) { xe_vm_close_and_put(vm); - return ERR_CAST(m->eng); + return ERR_CAST(m->q); } if (xe->info.supports_usm) - m->eng->priority = XE_ENGINE_PRIORITY_KERNEL; + m->q->priority = XE_EXEC_QUEUE_PRIORITY_KERNEL; mutex_init(&m->job_mutex); @@ -456,7 +456,7 @@ static void emit_pte(struct xe_migrate *m, addr = xe_res_dma(cur) & PAGE_MASK; if (is_vram) { /* Is this a 64K PTE entry? */ - if ((m->eng->vm->flags & XE_VM_FLAG_64K) && + if ((m->q->vm->flags & XE_VM_FLAG_64K) && !(cur_ofs & (16 * 8 - 1))) { XE_WARN_ON(!IS_ALIGNED(addr, SZ_64K)); addr |= XE_PTE_PS64; @@ -714,7 +714,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, src_L0, ccs_ofs, copy_ccs); mutex_lock(&m->job_mutex); - job = xe_bb_create_migration_job(m->eng, bb, + job = xe_bb_create_migration_job(m->q, bb, xe_migrate_batch_base(m, usm), update_idx); if (IS_ERR(job)) { @@ -938,7 +938,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, } mutex_lock(&m->job_mutex); - job = xe_bb_create_migration_job(m->eng, bb, + job = xe_bb_create_migration_job(m->q, bb, xe_migrate_batch_base(m, usm), update_idx); if (IS_ERR(job)) { @@ -1024,7 +1024,7 @@ static void write_pgtable(struct xe_tile *tile, struct xe_bb *bb, u64 ppgtt_ofs, struct xe_vm *xe_migrate_get_vm(struct xe_migrate *m) { - return xe_vm_get(m->eng->vm); + return xe_vm_get(m->q->vm); } #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) @@ -1106,7 +1106,7 @@ static bool no_in_syncs(struct xe_sync_entry *syncs, u32 num_syncs) * @m: The migrate context. * @vm: The vm we'll be updating. * @bo: The bo whose dma-resv we will await before updating, or NULL if userptr. - * @eng: The engine to be used for the update or NULL if the default + * @q: The exec queue to be used for the update or NULL if the default * migration engine is to be used. * @updates: An array of update descriptors. * @num_updates: Number of descriptors in @updates. @@ -1132,7 +1132,7 @@ struct dma_fence * xe_migrate_update_pgtables(struct xe_migrate *m, struct xe_vm *vm, struct xe_bo *bo, - struct xe_engine *eng, + struct xe_exec_queue *q, const struct xe_vm_pgtable_update *updates, u32 num_updates, struct xe_sync_entry *syncs, u32 num_syncs, @@ -1150,13 +1150,13 @@ xe_migrate_update_pgtables(struct xe_migrate *m, u32 i, batch_size, ppgtt_ofs, update_idx, page_ofs = 0; u64 addr; int err = 0; - bool usm = !eng && xe->info.supports_usm; + bool usm = !q && xe->info.supports_usm; bool first_munmap_rebind = vma && vma->gpuva.flags & XE_VMA_FIRST_REBIND; - struct xe_engine *eng_override = !eng ? m->eng : eng; + struct xe_exec_queue *q_override = !q ? m->q : q; /* Use the CPU if no in syncs and engine is idle */ - if (no_in_syncs(syncs, num_syncs) && xe_engine_is_idle(eng_override)) { + if (no_in_syncs(syncs, num_syncs) && xe_exec_queue_is_idle(q_override)) { fence = xe_migrate_update_pgtables_cpu(m, vm, bo, updates, num_updates, first_munmap_rebind, @@ -1186,14 +1186,14 @@ xe_migrate_update_pgtables(struct xe_migrate *m, */ XE_WARN_ON(batch_size >= SZ_128K); - bb = xe_bb_new(gt, batch_size, !eng && xe->info.supports_usm); + bb = xe_bb_new(gt, batch_size, !q && xe->info.supports_usm); if (IS_ERR(bb)) return ERR_CAST(bb); /* For sysmem PTE's, need to map them in our hole.. */ if (!IS_DGFX(xe)) { ppgtt_ofs = NUM_KERNEL_PDE - 1; - if (eng) { + if (q) { XE_WARN_ON(num_updates > NUM_VMUSA_WRITES_PER_UNIT); sa_bo = drm_suballoc_new(&m->vm_update_sa, 1, @@ -1249,10 +1249,10 @@ xe_migrate_update_pgtables(struct xe_migrate *m, write_pgtable(tile, bb, 0, &updates[i], pt_update); } - if (!eng) + if (!q) mutex_lock(&m->job_mutex); - job = xe_bb_create_migration_job(eng ?: m->eng, bb, + job = xe_bb_create_migration_job(q ?: m->q, bb, xe_migrate_batch_base(m, usm), update_idx); if (IS_ERR(job)) { @@ -1295,7 +1295,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m, fence = dma_fence_get(&job->drm.s_fence->finished); xe_sched_job_push(job); - if (!eng) + if (!q) mutex_unlock(&m->job_mutex); xe_bb_free(bb, fence); @@ -1306,7 +1306,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m, err_job: xe_sched_job_put(job); err_bb: - if (!eng) + if (!q) mutex_unlock(&m->job_mutex); xe_bb_free(bb, NULL); err: diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h index 0d62aff6421c..c729241776ad 100644 --- a/drivers/gpu/drm/xe/xe_migrate.h +++ b/drivers/gpu/drm/xe/xe_migrate.h @@ -14,7 +14,7 @@ struct ttm_resource; struct xe_bo; struct xe_gt; -struct xe_engine; +struct xe_exec_queue; struct xe_migrate; struct xe_migrate_pt_update; struct xe_sync_entry; @@ -97,7 +97,7 @@ struct dma_fence * xe_migrate_update_pgtables(struct xe_migrate *m, struct xe_vm *vm, struct xe_bo *bo, - struct xe_engine *eng, + struct xe_exec_queue *q, const struct xe_vm_pgtable_update *updates, u32 num_updates, struct xe_sync_entry *syncs, u32 num_syncs, @@ -105,5 +105,5 @@ xe_migrate_update_pgtables(struct xe_migrate *m, void xe_migrate_wait(struct xe_migrate *m); -struct xe_engine *xe_tile_migrate_engine(struct xe_tile *tile); +struct xe_exec_queue *xe_tile_migrate_engine(struct xe_tile *tile); #endif diff --git a/drivers/gpu/drm/xe/xe_mocs.h b/drivers/gpu/drm/xe/xe_mocs.h index 25f7b35a76da..d0f1ec4b0336 100644 --- a/drivers/gpu/drm/xe/xe_mocs.h +++ b/drivers/gpu/drm/xe/xe_mocs.h @@ -8,7 +8,7 @@ #include -struct xe_engine; +struct xe_exec_queue; struct xe_gt; void xe_mocs_init_early(struct xe_gt *gt); diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.c b/drivers/gpu/drm/xe/xe_preempt_fence.c index e86604e0174d..7bce2a332603 100644 --- a/drivers/gpu/drm/xe/xe_preempt_fence.c +++ b/drivers/gpu/drm/xe/xe_preempt_fence.c @@ -15,19 +15,19 @@ static void preempt_fence_work_func(struct work_struct *w) bool cookie = dma_fence_begin_signalling(); struct xe_preempt_fence *pfence = container_of(w, typeof(*pfence), preempt_work); - struct xe_engine *e = pfence->engine; + struct xe_exec_queue *q = pfence->q; if (pfence->error) dma_fence_set_error(&pfence->base, pfence->error); else - e->ops->suspend_wait(e); + q->ops->suspend_wait(q); dma_fence_signal(&pfence->base); dma_fence_end_signalling(cookie); - xe_vm_queue_rebind_worker(e->vm); + xe_vm_queue_rebind_worker(q->vm); - xe_engine_put(e); + xe_exec_queue_put(q); } static const char * @@ -46,9 +46,9 @@ static bool preempt_fence_enable_signaling(struct dma_fence *fence) { struct xe_preempt_fence *pfence = container_of(fence, typeof(*pfence), base); - struct xe_engine *e = pfence->engine; + struct xe_exec_queue *q = pfence->q; - pfence->error = e->ops->suspend(e); + pfence->error = q->ops->suspend(q); queue_work(system_unbound_wq, &pfence->preempt_work); return true; } @@ -104,43 +104,43 @@ void xe_preempt_fence_free(struct xe_preempt_fence *pfence) * xe_preempt_fence_alloc(). * @pfence: The struct xe_preempt_fence pointer returned from * xe_preempt_fence_alloc(). - * @e: The struct xe_engine used for arming. + * @q: The struct xe_exec_queue used for arming. * @context: The dma-fence context used for arming. * @seqno: The dma-fence seqno used for arming. * * Inserts the preempt fence into @context's timeline, takes @link off any - * list, and registers the struct xe_engine as the xe_engine to be preempted. + * list, and registers the struct xe_exec_queue as the xe_engine to be preempted. * * Return: A pointer to a struct dma_fence embedded into the preempt fence. * This function doesn't error. */ struct dma_fence * -xe_preempt_fence_arm(struct xe_preempt_fence *pfence, struct xe_engine *e, +xe_preempt_fence_arm(struct xe_preempt_fence *pfence, struct xe_exec_queue *q, u64 context, u32 seqno) { list_del_init(&pfence->link); - pfence->engine = xe_engine_get(e); + pfence->q = xe_exec_queue_get(q); dma_fence_init(&pfence->base, &preempt_fence_ops, - &e->compute.lock, context, seqno); + &q->compute.lock, context, seqno); return &pfence->base; } /** * xe_preempt_fence_create() - Helper to create and arm a preempt fence. - * @e: The struct xe_engine used for arming. + * @q: The struct xe_exec_queue used for arming. * @context: The dma-fence context used for arming. * @seqno: The dma-fence seqno used for arming. * * Allocates and inserts the preempt fence into @context's timeline, - * and registers @e as the struct xe_engine to be preempted. + * and registers @e as the struct xe_exec_queue to be preempted. * * Return: A pointer to the resulting struct dma_fence on success. An error * pointer on error. In particular if allocation fails it returns * ERR_PTR(-ENOMEM); */ struct dma_fence * -xe_preempt_fence_create(struct xe_engine *e, +xe_preempt_fence_create(struct xe_exec_queue *q, u64 context, u32 seqno) { struct xe_preempt_fence *pfence; @@ -149,7 +149,7 @@ xe_preempt_fence_create(struct xe_engine *e, if (IS_ERR(pfence)) return ERR_CAST(pfence); - return xe_preempt_fence_arm(pfence, e, context, seqno); + return xe_preempt_fence_arm(pfence, q, context, seqno); } bool xe_fence_is_xe_preempt(const struct dma_fence *fence) diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.h b/drivers/gpu/drm/xe/xe_preempt_fence.h index 4f3966103203..9406c6fea525 100644 --- a/drivers/gpu/drm/xe/xe_preempt_fence.h +++ b/drivers/gpu/drm/xe/xe_preempt_fence.h @@ -11,7 +11,7 @@ struct list_head; struct dma_fence * -xe_preempt_fence_create(struct xe_engine *e, +xe_preempt_fence_create(struct xe_exec_queue *q, u64 context, u32 seqno); struct xe_preempt_fence *xe_preempt_fence_alloc(void); @@ -19,7 +19,7 @@ struct xe_preempt_fence *xe_preempt_fence_alloc(void); void xe_preempt_fence_free(struct xe_preempt_fence *pfence); struct dma_fence * -xe_preempt_fence_arm(struct xe_preempt_fence *pfence, struct xe_engine *e, +xe_preempt_fence_arm(struct xe_preempt_fence *pfence, struct xe_exec_queue *q, u64 context, u32 seqno); static inline struct xe_preempt_fence * diff --git a/drivers/gpu/drm/xe/xe_preempt_fence_types.h b/drivers/gpu/drm/xe/xe_preempt_fence_types.h index 9d9efd8ff0ed..b54b5c29b533 100644 --- a/drivers/gpu/drm/xe/xe_preempt_fence_types.h +++ b/drivers/gpu/drm/xe/xe_preempt_fence_types.h @@ -9,12 +9,11 @@ #include #include -struct xe_engine; +struct xe_exec_queue; /** * struct xe_preempt_fence - XE preempt fence * - * A preemption fence which suspends the execution of an xe_engine on the * hardware and triggers a callback once the xe_engine is complete. */ struct xe_preempt_fence { @@ -22,8 +21,8 @@ struct xe_preempt_fence { struct dma_fence base; /** @link: link into list of pending preempt fences */ struct list_head link; - /** @engine: xe engine for this preempt fence */ - struct xe_engine *engine; + /** @q: exec queue for this preempt fence */ + struct xe_exec_queue *q; /** @preempt_work: work struct which issues preemption */ struct work_struct preempt_work; /** @error: preempt fence is in error state */ diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index b82ce01cc4cb..c21d2681b419 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1307,7 +1307,7 @@ static void xe_pt_calc_rfence_interval(struct xe_vma *vma, * address range. * @tile: The tile to bind for. * @vma: The vma to bind. - * @e: The engine with which to do pipelined page-table updates. + * @q: The exec_queue with which to do pipelined page-table updates. * @syncs: Entries to sync on before binding the built tree to the live vm tree. * @num_syncs: Number of @sync entries. * @rebind: Whether we're rebinding this vma to the same address range without @@ -1325,7 +1325,7 @@ static void xe_pt_calc_rfence_interval(struct xe_vma *vma, * on success, an error pointer on error. */ struct dma_fence * -__xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e, +__xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q, struct xe_sync_entry *syncs, u32 num_syncs, bool rebind) { @@ -1351,7 +1351,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e, vm_dbg(&xe_vma_vm(vma)->xe->drm, "Preparing bind, with range [%llx...%llx) engine %p.\n", - xe_vma_start(vma), xe_vma_end(vma) - 1, e); + xe_vma_start(vma), xe_vma_end(vma) - 1, q); err = xe_pt_prepare_bind(tile, vma, entries, &num_entries, rebind); if (err) @@ -1388,7 +1388,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e, } fence = xe_migrate_update_pgtables(tile->migrate, - vm, xe_vma_bo(vma), e, + vm, xe_vma_bo(vma), q, entries, num_entries, syncs, num_syncs, &bind_pt_update.base); @@ -1663,7 +1663,7 @@ static const struct xe_migrate_pt_update_ops userptr_unbind_ops = { * address range. * @tile: The tile to unbind for. * @vma: The vma to unbind. - * @e: The engine with which to do pipelined page-table updates. + * @q: The exec_queue with which to do pipelined page-table updates. * @syncs: Entries to sync on before disconnecting the tree to be destroyed. * @num_syncs: Number of @sync entries. * @@ -1679,7 +1679,7 @@ static const struct xe_migrate_pt_update_ops userptr_unbind_ops = { * on success, an error pointer on error. */ struct dma_fence * -__xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e, +__xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q, struct xe_sync_entry *syncs, u32 num_syncs) { struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1]; @@ -1704,7 +1704,7 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e vm_dbg(&xe_vma_vm(vma)->xe->drm, "Preparing unbind, with range [%llx...%llx) engine %p.\n", - xe_vma_start(vma), xe_vma_end(vma) - 1, e); + xe_vma_start(vma), xe_vma_end(vma) - 1, q); num_entries = xe_pt_stage_unbind(tile, vma, entries); XE_WARN_ON(num_entries > ARRAY_SIZE(entries)); @@ -1729,8 +1729,8 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e * lower level, because it needs to be more conservative. */ fence = xe_migrate_update_pgtables(tile->migrate, - vm, NULL, e ? e : - vm->eng[tile->id], + vm, NULL, q ? q : + vm->q[tile->id], entries, num_entries, syncs, num_syncs, &unbind_pt_update.base); diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h index bbb00d6461ff..01be7ab08f87 100644 --- a/drivers/gpu/drm/xe/xe_pt.h +++ b/drivers/gpu/drm/xe/xe_pt.h @@ -12,7 +12,7 @@ struct dma_fence; struct xe_bo; struct xe_device; -struct xe_engine; +struct xe_exec_queue; struct xe_sync_entry; struct xe_tile; struct xe_vm; @@ -35,12 +35,12 @@ void xe_pt_populate_empty(struct xe_tile *tile, struct xe_vm *vm, void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred); struct dma_fence * -__xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e, +__xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q, struct xe_sync_entry *syncs, u32 num_syncs, bool rebind); struct dma_fence * -__xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e, +__xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q, struct xe_sync_entry *syncs, u32 num_syncs); bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma); diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 382851f436b7..7ea235c71385 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -203,7 +203,7 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query) config->info[XE_QUERY_CONFIG_MEM_REGION_COUNT] = hweight_long(xe->info.mem_region_mask); config->info[XE_QUERY_CONFIG_MAX_ENGINE_PRIORITY] = - xe_engine_device_get_max_priority(xe); + xe_exec_queue_device_get_max_priority(xe); if (copy_to_user(query_ptr, config, size)) { kfree(config); diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index 2d0d392cd691..6346ed24e279 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -10,7 +10,7 @@ #include "regs/xe_gt_regs.h" #include "regs/xe_lrc_layout.h" #include "regs/xe_regs.h" -#include "xe_engine_types.h" +#include "xe_exec_queue_types.h" #include "xe_gt.h" #include "xe_lrc.h" #include "xe_macros.h" @@ -156,7 +156,7 @@ static int emit_store_imm_ppgtt_posted(u64 addr, u64 value, static int emit_render_cache_flush(struct xe_sched_job *job, u32 *dw, int i) { - struct xe_gt *gt = job->engine->gt; + struct xe_gt *gt = job->q->gt; bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK); u32 flags; @@ -172,7 +172,7 @@ static int emit_render_cache_flush(struct xe_sched_job *job, u32 *dw, int i) if (lacks_render) flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS; - else if (job->engine->class == XE_ENGINE_CLASS_COMPUTE) + else if (job->q->class == XE_ENGINE_CLASS_COMPUTE) flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS; dw[i++] = GFX_OP_PIPE_CONTROL(6) | PIPE_CONTROL0_HDC_PIPELINE_FLUSH; @@ -202,7 +202,7 @@ static int emit_pipe_imm_ggtt(u32 addr, u32 value, bool stall_only, u32 *dw, static u32 get_ppgtt_flag(struct xe_sched_job *job) { - return !(job->engine->flags & ENGINE_FLAG_WA) ? BIT(8) : 0; + return !(job->q->flags & EXEC_QUEUE_FLAG_WA) ? BIT(8) : 0; } static void __emit_job_gen12_copy(struct xe_sched_job *job, struct xe_lrc *lrc, @@ -210,7 +210,7 @@ static void __emit_job_gen12_copy(struct xe_sched_job *job, struct xe_lrc *lrc, { u32 dw[MAX_JOB_SIZE_DW], i = 0; u32 ppgtt_flag = get_ppgtt_flag(job); - struct xe_vm *vm = job->engine->vm; + struct xe_vm *vm = job->q->vm; if (vm->batch_invalidate_tlb) { dw[i++] = preparser_disable(true); @@ -255,10 +255,10 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, { u32 dw[MAX_JOB_SIZE_DW], i = 0; u32 ppgtt_flag = get_ppgtt_flag(job); - struct xe_gt *gt = job->engine->gt; + struct xe_gt *gt = job->q->gt; struct xe_device *xe = gt_to_xe(gt); - bool decode = job->engine->class == XE_ENGINE_CLASS_VIDEO_DECODE; - struct xe_vm *vm = job->engine->vm; + bool decode = job->q->class == XE_ENGINE_CLASS_VIDEO_DECODE; + struct xe_vm *vm = job->q->vm; dw[i++] = preparser_disable(true); @@ -302,16 +302,16 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, { u32 dw[MAX_JOB_SIZE_DW], i = 0; u32 ppgtt_flag = get_ppgtt_flag(job); - struct xe_gt *gt = job->engine->gt; + struct xe_gt *gt = job->q->gt; struct xe_device *xe = gt_to_xe(gt); bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK); - struct xe_vm *vm = job->engine->vm; + struct xe_vm *vm = job->q->vm; u32 mask_flags = 0; dw[i++] = preparser_disable(true); if (lacks_render) mask_flags = PIPE_CONTROL_3D_ARCH_FLAGS; - else if (job->engine->class == XE_ENGINE_CLASS_COMPUTE) + else if (job->q->class == XE_ENGINE_CLASS_COMPUTE) mask_flags = PIPE_CONTROL_3D_ENGINE_FLAGS; /* See __xe_pt_bind_vma() for a discussion on TLB invalidations. */ @@ -378,14 +378,14 @@ static void emit_job_gen12_copy(struct xe_sched_job *job) { int i; - if (xe_sched_job_is_migration(job->engine)) { - emit_migration_job_gen12(job, job->engine->lrc, + if (xe_sched_job_is_migration(job->q)) { + emit_migration_job_gen12(job, job->q->lrc, xe_sched_job_seqno(job)); return; } - for (i = 0; i < job->engine->width; ++i) - __emit_job_gen12_copy(job, job->engine->lrc + i, + for (i = 0; i < job->q->width; ++i) + __emit_job_gen12_copy(job, job->q->lrc + i, job->batch_addr[i], xe_sched_job_seqno(job)); } @@ -395,8 +395,8 @@ static void emit_job_gen12_video(struct xe_sched_job *job) int i; /* FIXME: Not doing parallel handshake for now */ - for (i = 0; i < job->engine->width; ++i) - __emit_job_gen12_video(job, job->engine->lrc + i, + for (i = 0; i < job->q->width; ++i) + __emit_job_gen12_video(job, job->q->lrc + i, job->batch_addr[i], xe_sched_job_seqno(job)); } @@ -405,8 +405,8 @@ static void emit_job_gen12_render_compute(struct xe_sched_job *job) { int i; - for (i = 0; i < job->engine->width; ++i) - __emit_job_gen12_render_compute(job, job->engine->lrc + i, + for (i = 0; i < job->q->width; ++i) + __emit_job_gen12_render_compute(job, job->q->lrc + i, job->batch_addr[i], xe_sched_job_seqno(job)); } diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c index 9944858de4d2..de2851d24c96 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.c +++ b/drivers/gpu/drm/xe/xe_sched_job.c @@ -57,58 +57,58 @@ static struct xe_sched_job *job_alloc(bool parallel) xe_sched_job_slab, GFP_KERNEL); } -bool xe_sched_job_is_migration(struct xe_engine *e) +bool xe_sched_job_is_migration(struct xe_exec_queue *q) { - return e->vm && (e->vm->flags & XE_VM_FLAG_MIGRATION) && - !(e->flags & ENGINE_FLAG_WA); + return q->vm && (q->vm->flags & XE_VM_FLAG_MIGRATION) && + !(q->flags & EXEC_QUEUE_FLAG_WA); } static void job_free(struct xe_sched_job *job) { - struct xe_engine *e = job->engine; - bool is_migration = xe_sched_job_is_migration(e); + struct xe_exec_queue *q = job->q; + bool is_migration = xe_sched_job_is_migration(q); - kmem_cache_free(xe_engine_is_parallel(job->engine) || is_migration ? + kmem_cache_free(xe_exec_queue_is_parallel(job->q) || is_migration ? xe_sched_job_parallel_slab : xe_sched_job_slab, job); } static struct xe_device *job_to_xe(struct xe_sched_job *job) { - return gt_to_xe(job->engine->gt); + return gt_to_xe(job->q->gt); } -struct xe_sched_job *xe_sched_job_create(struct xe_engine *e, +struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q, u64 *batch_addr) { struct xe_sched_job *job; struct dma_fence **fences; - bool is_migration = xe_sched_job_is_migration(e); + bool is_migration = xe_sched_job_is_migration(q); int err; int i, j; u32 width; /* Migration and kernel engines have their own locking */ - if (!(e->flags & (ENGINE_FLAG_KERNEL | ENGINE_FLAG_VM | - ENGINE_FLAG_WA))) { - lockdep_assert_held(&e->vm->lock); - if (!xe_vm_no_dma_fences(e->vm)) - xe_vm_assert_held(e->vm); + if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM | + EXEC_QUEUE_FLAG_WA))) { + lockdep_assert_held(&q->vm->lock); + if (!xe_vm_no_dma_fences(q->vm)) + xe_vm_assert_held(q->vm); } - job = job_alloc(xe_engine_is_parallel(e) || is_migration); + job = job_alloc(xe_exec_queue_is_parallel(q) || is_migration); if (!job) return ERR_PTR(-ENOMEM); - job->engine = e; + job->q = q; kref_init(&job->refcount); - xe_engine_get(job->engine); + xe_exec_queue_get(job->q); - err = drm_sched_job_init(&job->drm, e->entity, 1, NULL); + err = drm_sched_job_init(&job->drm, q->entity, 1, NULL); if (err) goto err_free; - if (!xe_engine_is_parallel(e)) { - job->fence = xe_lrc_create_seqno_fence(e->lrc); + if (!xe_exec_queue_is_parallel(q)) { + job->fence = xe_lrc_create_seqno_fence(q->lrc); if (IS_ERR(job->fence)) { err = PTR_ERR(job->fence); goto err_sched_job; @@ -116,38 +116,38 @@ struct xe_sched_job *xe_sched_job_create(struct xe_engine *e, } else { struct dma_fence_array *cf; - fences = kmalloc_array(e->width, sizeof(*fences), GFP_KERNEL); + fences = kmalloc_array(q->width, sizeof(*fences), GFP_KERNEL); if (!fences) { err = -ENOMEM; goto err_sched_job; } - for (j = 0; j < e->width; ++j) { - fences[j] = xe_lrc_create_seqno_fence(e->lrc + j); + for (j = 0; j < q->width; ++j) { + fences[j] = xe_lrc_create_seqno_fence(q->lrc + j); if (IS_ERR(fences[j])) { err = PTR_ERR(fences[j]); goto err_fences; } } - cf = dma_fence_array_create(e->width, fences, - e->parallel.composite_fence_ctx, - e->parallel.composite_fence_seqno++, + cf = dma_fence_array_create(q->width, fences, + q->parallel.composite_fence_ctx, + q->parallel.composite_fence_seqno++, false); if (!cf) { - --e->parallel.composite_fence_seqno; + --q->parallel.composite_fence_seqno; err = -ENOMEM; goto err_fences; } /* Sanity check */ - for (j = 0; j < e->width; ++j) + for (j = 0; j < q->width; ++j) XE_WARN_ON(cf->base.seqno != fences[j]->seqno); job->fence = &cf->base; } - width = e->width; + width = q->width; if (is_migration) width = 2; @@ -155,7 +155,7 @@ struct xe_sched_job *xe_sched_job_create(struct xe_engine *e, job->batch_addr[i] = batch_addr[i]; /* All other jobs require a VM to be open which has a ref */ - if (unlikely(e->flags & ENGINE_FLAG_KERNEL)) + if (unlikely(q->flags & EXEC_QUEUE_FLAG_KERNEL)) xe_device_mem_access_get(job_to_xe(job)); xe_device_assert_mem_access(job_to_xe(job)); @@ -164,14 +164,14 @@ struct xe_sched_job *xe_sched_job_create(struct xe_engine *e, err_fences: for (j = j - 1; j >= 0; --j) { - --e->lrc[j].fence_ctx.next_seqno; + --q->lrc[j].fence_ctx.next_seqno; dma_fence_put(fences[j]); } kfree(fences); err_sched_job: drm_sched_job_cleanup(&job->drm); err_free: - xe_engine_put(e); + xe_exec_queue_put(q); job_free(job); return ERR_PTR(err); } @@ -188,9 +188,9 @@ void xe_sched_job_destroy(struct kref *ref) struct xe_sched_job *job = container_of(ref, struct xe_sched_job, refcount); - if (unlikely(job->engine->flags & ENGINE_FLAG_KERNEL)) + if (unlikely(job->q->flags & EXEC_QUEUE_FLAG_KERNEL)) xe_device_mem_access_put(job_to_xe(job)); - xe_engine_put(job->engine); + xe_exec_queue_put(job->q); dma_fence_put(job->fence); drm_sched_job_cleanup(&job->drm); job_free(job); @@ -222,12 +222,12 @@ void xe_sched_job_set_error(struct xe_sched_job *job, int error) trace_xe_sched_job_set_error(job); dma_fence_enable_sw_signaling(job->fence); - xe_hw_fence_irq_run(job->engine->fence_irq); + xe_hw_fence_irq_run(job->q->fence_irq); } bool xe_sched_job_started(struct xe_sched_job *job) { - struct xe_lrc *lrc = job->engine->lrc; + struct xe_lrc *lrc = job->q->lrc; return !__dma_fence_is_later(xe_sched_job_seqno(job), xe_lrc_start_seqno(lrc), @@ -236,7 +236,7 @@ bool xe_sched_job_started(struct xe_sched_job *job) bool xe_sched_job_completed(struct xe_sched_job *job) { - struct xe_lrc *lrc = job->engine->lrc; + struct xe_lrc *lrc = job->q->lrc; /* * Can safely check just LRC[0] seqno as that is last seqno written when diff --git a/drivers/gpu/drm/xe/xe_sched_job.h b/drivers/gpu/drm/xe/xe_sched_job.h index 5315ad8656c2..6ca1d426c036 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.h +++ b/drivers/gpu/drm/xe/xe_sched_job.h @@ -14,7 +14,7 @@ int xe_sched_job_module_init(void); void xe_sched_job_module_exit(void); -struct xe_sched_job *xe_sched_job_create(struct xe_engine *e, +struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q, u64 *batch_addr); void xe_sched_job_destroy(struct kref *ref); @@ -71,6 +71,6 @@ xe_sched_job_add_migrate_flush(struct xe_sched_job *job, u32 flags) job->migrate_flush_flags = flags; } -bool xe_sched_job_is_migration(struct xe_engine *e); +bool xe_sched_job_is_migration(struct xe_exec_queue *q); #endif diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h index 5534bfacaa16..71213ba9735b 100644 --- a/drivers/gpu/drm/xe/xe_sched_job_types.h +++ b/drivers/gpu/drm/xe/xe_sched_job_types.h @@ -10,7 +10,7 @@ #include -struct xe_engine; +struct xe_exec_queue; /** * struct xe_sched_job - XE schedule job (batch buffer tracking) @@ -18,8 +18,8 @@ struct xe_engine; struct xe_sched_job { /** @drm: base DRM scheduler job */ struct drm_sched_job drm; - /** @engine: XE submission engine */ - struct xe_engine *engine; + /** @q: Exec queue */ + struct xe_exec_queue *q; /** @refcount: ref count of this job */ struct kref refcount; /** diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index 82ca25d8d017..5ea458dadf69 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -13,11 +13,11 @@ #include #include "xe_bo_types.h" -#include "xe_engine_types.h" +#include "xe_exec_queue_types.h" #include "xe_gpu_scheduler_types.h" #include "xe_gt_tlb_invalidation_types.h" #include "xe_gt_types.h" -#include "xe_guc_engine_types.h" +#include "xe_guc_exec_queue_types.h" #include "xe_sched_job.h" #include "xe_vm.h" @@ -105,9 +105,9 @@ DEFINE_EVENT(xe_bo, xe_bo_move, TP_ARGS(bo) ); -DECLARE_EVENT_CLASS(xe_engine, - TP_PROTO(struct xe_engine *e), - TP_ARGS(e), +DECLARE_EVENT_CLASS(xe_exec_queue, + TP_PROTO(struct xe_exec_queue *q), + TP_ARGS(q), TP_STRUCT__entry( __field(enum xe_engine_class, class) @@ -120,13 +120,13 @@ DECLARE_EVENT_CLASS(xe_engine, ), TP_fast_assign( - __entry->class = e->class; - __entry->logical_mask = e->logical_mask; - __entry->gt_id = e->gt->info.id; - __entry->width = e->width; - __entry->guc_id = e->guc->id; - __entry->guc_state = atomic_read(&e->guc->state); - __entry->flags = e->flags; + __entry->class = q->class; + __entry->logical_mask = q->logical_mask; + __entry->gt_id = q->gt->info.id; + __entry->width = q->width; + __entry->guc_id = q->guc->id; + __entry->guc_state = atomic_read(&q->guc->state); + __entry->flags = q->flags; ), TP_printk("%d:0x%x, gt=%d, width=%d, guc_id=%d, guc_state=0x%x, flags=0x%x", @@ -135,94 +135,94 @@ DECLARE_EVENT_CLASS(xe_engine, __entry->guc_state, __entry->flags) ); -DEFINE_EVENT(xe_engine, xe_engine_create, - TP_PROTO(struct xe_engine *e), - TP_ARGS(e) +DEFINE_EVENT(xe_exec_queue, xe_exec_queue_create, + TP_PROTO(struct xe_exec_queue *q), + TP_ARGS(q) ); -DEFINE_EVENT(xe_engine, xe_engine_supress_resume, - TP_PROTO(struct xe_engine *e), - TP_ARGS(e) +DEFINE_EVENT(xe_exec_queue, xe_exec_queue_supress_resume, + TP_PROTO(struct xe_exec_queue *q), + TP_ARGS(q) ); -DEFINE_EVENT(xe_engine, xe_engine_submit, - TP_PROTO(struct xe_engine *e), - TP_ARGS(e) +DEFINE_EVENT(xe_exec_queue, xe_exec_queue_submit, + TP_PROTO(struct xe_exec_queue *q), + TP_ARGS(q) ); -DEFINE_EVENT(xe_engine, xe_engine_scheduling_enable, - TP_PROTO(struct xe_engine *e), - TP_ARGS(e) +DEFINE_EVENT(xe_exec_queue, xe_exec_queue_scheduling_enable, + TP_PROTO(struct xe_exec_queue *q), + TP_ARGS(q) ); -DEFINE_EVENT(xe_engine, xe_engine_scheduling_disable, - TP_PROTO(struct xe_engine *e), - TP_ARGS(e) +DEFINE_EVENT(xe_exec_queue, xe_exec_queue_scheduling_disable, + TP_PROTO(struct xe_exec_queue *q), + TP_ARGS(q) ); -DEFINE_EVENT(xe_engine, xe_engine_scheduling_done, - TP_PROTO(struct xe_engine *e), - TP_ARGS(e) +DEFINE_EVENT(xe_exec_queue, xe_exec_queue_scheduling_done, + TP_PROTO(struct xe_exec_queue *q), + TP_ARGS(q) ); -DEFINE_EVENT(xe_engine, xe_engine_register, - TP_PROTO(struct xe_engine *e), - TP_ARGS(e) +DEFINE_EVENT(xe_exec_queue, xe_exec_queue_register, + TP_PROTO(struct xe_exec_queue *q), + TP_ARGS(q) ); -DEFINE_EVENT(xe_engine, xe_engine_deregister, - TP_PROTO(struct xe_engine *e), - TP_ARGS(e) +DEFINE_EVENT(xe_exec_queue, xe_exec_queue_deregister, + TP_PROTO(struct xe_exec_queue *q), + TP_ARGS(q) ); -DEFINE_EVENT(xe_engine, xe_engine_deregister_done, - TP_PROTO(struct xe_engine *e), - TP_ARGS(e) +DEFINE_EVENT(xe_exec_queue, xe_exec_queue_deregister_done, + TP_PROTO(struct xe_exec_queue *q), + TP_ARGS(q) ); -DEFINE_EVENT(xe_engine, xe_engine_close, - TP_PROTO(struct xe_engine *e), - TP_ARGS(e) +DEFINE_EVENT(xe_exec_queue, xe_exec_queue_close, + TP_PROTO(struct xe_exec_queue *q), + TP_ARGS(q) ); -DEFINE_EVENT(xe_engine, xe_engine_kill, - TP_PROTO(struct xe_engine *e), - TP_ARGS(e) +DEFINE_EVENT(xe_exec_queue, xe_exec_queue_kill, + TP_PROTO(struct xe_exec_queue *q), + TP_ARGS(q) ); -DEFINE_EVENT(xe_engine, xe_engine_cleanup_entity, - TP_PROTO(struct xe_engine *e), - TP_ARGS(e) +DEFINE_EVENT(xe_exec_queue, xe_exec_queue_cleanup_entity, + TP_PROTO(struct xe_exec_queue *q), + TP_ARGS(q) ); -DEFINE_EVENT(xe_engine, xe_engine_destroy, - TP_PROTO(struct xe_engine *e), - TP_ARGS(e) +DEFINE_EVENT(xe_exec_queue, xe_exec_queue_destroy, + TP_PROTO(struct xe_exec_queue *q), + TP_ARGS(q) ); -DEFINE_EVENT(xe_engine, xe_engine_reset, - TP_PROTO(struct xe_engine *e), - TP_ARGS(e) +DEFINE_EVENT(xe_exec_queue, xe_exec_queue_reset, + TP_PROTO(struct xe_exec_queue *q), + TP_ARGS(q) ); -DEFINE_EVENT(xe_engine, xe_engine_memory_cat_error, - TP_PROTO(struct xe_engine *e), - TP_ARGS(e) +DEFINE_EVENT(xe_exec_queue, xe_exec_queue_memory_cat_error, + TP_PROTO(struct xe_exec_queue *q), + TP_ARGS(q) ); -DEFINE_EVENT(xe_engine, xe_engine_stop, - TP_PROTO(struct xe_engine *e), - TP_ARGS(e) +DEFINE_EVENT(xe_exec_queue, xe_exec_queue_stop, + TP_PROTO(struct xe_exec_queue *q), + TP_ARGS(q) ); -DEFINE_EVENT(xe_engine, xe_engine_resubmit, - TP_PROTO(struct xe_engine *e), - TP_ARGS(e) +DEFINE_EVENT(xe_exec_queue, xe_exec_queue_resubmit, + TP_PROTO(struct xe_exec_queue *q), + TP_ARGS(q) ); -DEFINE_EVENT(xe_engine, xe_engine_lr_cleanup, - TP_PROTO(struct xe_engine *e), - TP_ARGS(e) +DEFINE_EVENT(xe_exec_queue, xe_exec_queue_lr_cleanup, + TP_PROTO(struct xe_exec_queue *q), + TP_ARGS(q) ); DECLARE_EVENT_CLASS(xe_sched_job, @@ -241,10 +241,10 @@ DECLARE_EVENT_CLASS(xe_sched_job, TP_fast_assign( __entry->seqno = xe_sched_job_seqno(job); - __entry->guc_id = job->engine->guc->id; + __entry->guc_id = job->q->guc->id; __entry->guc_state = - atomic_read(&job->engine->guc->state); - __entry->flags = job->engine->flags; + atomic_read(&job->q->guc->state); + __entry->flags = job->q->flags; __entry->error = job->fence->error; __entry->fence = (unsigned long)job->fence; __entry->batch_addr = (u64)job->batch_addr[0]; @@ -303,7 +303,7 @@ DECLARE_EVENT_CLASS(xe_sched_msg, TP_fast_assign( __entry->opcode = msg->opcode; __entry->guc_id = - ((struct xe_engine *)msg->private_data)->guc->id; + ((struct xe_exec_queue *)msg->private_data)->guc->id; ), TP_printk("guc_id=%d, opcode=%u", __entry->guc_id, diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index d3e82c4aed42..374f111eea9c 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -165,15 +165,15 @@ out: static bool preempt_fences_waiting(struct xe_vm *vm) { - struct xe_engine *e; + struct xe_exec_queue *q; lockdep_assert_held(&vm->lock); xe_vm_assert_held(vm); - list_for_each_entry(e, &vm->preempt.engines, compute.link) { - if (!e->compute.pfence || (e->compute.pfence && - test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, - &e->compute.pfence->flags))) { + list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) { + if (!q->compute.pfence || + (q->compute.pfence && test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, + &q->compute.pfence->flags))) { return true; } } @@ -195,10 +195,10 @@ static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list, lockdep_assert_held(&vm->lock); xe_vm_assert_held(vm); - if (*count >= vm->preempt.num_engines) + if (*count >= vm->preempt.num_exec_queues) return 0; - for (; *count < vm->preempt.num_engines; ++(*count)) { + for (; *count < vm->preempt.num_exec_queues; ++(*count)) { struct xe_preempt_fence *pfence = xe_preempt_fence_alloc(); if (IS_ERR(pfence)) @@ -212,18 +212,18 @@ static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list, static int wait_for_existing_preempt_fences(struct xe_vm *vm) { - struct xe_engine *e; + struct xe_exec_queue *q; xe_vm_assert_held(vm); - list_for_each_entry(e, &vm->preempt.engines, compute.link) { - if (e->compute.pfence) { - long timeout = dma_fence_wait(e->compute.pfence, false); + list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) { + if (q->compute.pfence) { + long timeout = dma_fence_wait(q->compute.pfence, false); if (timeout < 0) return -ETIME; - dma_fence_put(e->compute.pfence); - e->compute.pfence = NULL; + dma_fence_put(q->compute.pfence); + q->compute.pfence = NULL; } } @@ -232,11 +232,11 @@ static int wait_for_existing_preempt_fences(struct xe_vm *vm) static bool xe_vm_is_idle(struct xe_vm *vm) { - struct xe_engine *e; + struct xe_exec_queue *q; xe_vm_assert_held(vm); - list_for_each_entry(e, &vm->preempt.engines, compute.link) { - if (!xe_engine_is_idle(e)) + list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) { + if (!xe_exec_queue_is_idle(q)) return false; } @@ -246,36 +246,36 @@ static bool xe_vm_is_idle(struct xe_vm *vm) static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list) { struct list_head *link; - struct xe_engine *e; + struct xe_exec_queue *q; - list_for_each_entry(e, &vm->preempt.engines, compute.link) { + list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) { struct dma_fence *fence; link = list->next; XE_WARN_ON(link == list); fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link), - e, e->compute.context, - ++e->compute.seqno); - dma_fence_put(e->compute.pfence); - e->compute.pfence = fence; + q, q->compute.context, + ++q->compute.seqno); + dma_fence_put(q->compute.pfence); + q->compute.pfence = fence; } } static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo) { - struct xe_engine *e; + struct xe_exec_queue *q; struct ww_acquire_ctx ww; int err; - err = xe_bo_lock(bo, &ww, vm->preempt.num_engines, true); + err = xe_bo_lock(bo, &ww, vm->preempt.num_exec_queues, true); if (err) return err; - list_for_each_entry(e, &vm->preempt.engines, compute.link) - if (e->compute.pfence) { + list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) + if (q->compute.pfence) { dma_resv_add_fence(bo->ttm.base.resv, - e->compute.pfence, + q->compute.pfence, DMA_RESV_USAGE_BOOKKEEP); } @@ -304,22 +304,22 @@ void xe_vm_fence_all_extobjs(struct xe_vm *vm, struct dma_fence *fence, static void resume_and_reinstall_preempt_fences(struct xe_vm *vm) { - struct xe_engine *e; + struct xe_exec_queue *q; lockdep_assert_held(&vm->lock); xe_vm_assert_held(vm); - list_for_each_entry(e, &vm->preempt.engines, compute.link) { - e->ops->resume(e); + list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) { + q->ops->resume(q); - dma_resv_add_fence(xe_vm_resv(vm), e->compute.pfence, + dma_resv_add_fence(xe_vm_resv(vm), q->compute.pfence, DMA_RESV_USAGE_BOOKKEEP); - xe_vm_fence_all_extobjs(vm, e->compute.pfence, + xe_vm_fence_all_extobjs(vm, q->compute.pfence, DMA_RESV_USAGE_BOOKKEEP); } } -int xe_vm_add_compute_engine(struct xe_vm *vm, struct xe_engine *e) +int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) { struct ttm_validate_buffer tv_onstack[XE_ONSTACK_TV]; struct ttm_validate_buffer *tv; @@ -337,16 +337,16 @@ int xe_vm_add_compute_engine(struct xe_vm *vm, struct xe_engine *e) if (err) goto out_unlock_outer; - pfence = xe_preempt_fence_create(e, e->compute.context, - ++e->compute.seqno); + pfence = xe_preempt_fence_create(q, q->compute.context, + ++q->compute.seqno); if (!pfence) { err = -ENOMEM; goto out_unlock; } - list_add(&e->compute.link, &vm->preempt.engines); - ++vm->preempt.num_engines; - e->compute.pfence = pfence; + list_add(&q->compute.link, &vm->preempt.exec_queues); + ++vm->preempt.num_exec_queues; + q->compute.pfence = pfence; down_read(&vm->userptr.notifier_lock); @@ -518,7 +518,7 @@ void xe_vm_unlock_dma_resv(struct xe_vm *vm, static void xe_vm_kill(struct xe_vm *vm) { struct ww_acquire_ctx ww; - struct xe_engine *e; + struct xe_exec_queue *q; lockdep_assert_held(&vm->lock); @@ -526,8 +526,8 @@ static void xe_vm_kill(struct xe_vm *vm) vm->flags |= XE_VM_FLAG_BANNED; trace_xe_vm_kill(vm); - list_for_each_entry(e, &vm->preempt.engines, compute.link) - e->ops->kill(e); + list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) + q->ops->kill(q); xe_vm_unlock(vm, &ww); /* TODO: Inform user the VM is banned */ @@ -584,7 +584,7 @@ retry: } err = xe_vm_lock_dma_resv(vm, &ww, tv_onstack, &tv, &objs, - false, vm->preempt.num_engines); + false, vm->preempt.num_exec_queues); if (err) goto out_unlock_outer; @@ -833,7 +833,7 @@ int xe_vm_userptr_check_repin(struct xe_vm *vm) } static struct dma_fence * -xe_vm_bind_vma(struct xe_vma *vma, struct xe_engine *e, +xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q, struct xe_sync_entry *syncs, u32 num_syncs, bool first_op, bool last_op); @@ -1241,7 +1241,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) INIT_WORK(&vm->destroy_work, vm_destroy_work_func); - INIT_LIST_HEAD(&vm->preempt.engines); + INIT_LIST_HEAD(&vm->preempt.exec_queues); vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */ for_each_tile(tile, xe, id) @@ -1320,21 +1320,21 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) for_each_tile(tile, xe, id) { struct xe_gt *gt = tile->primary_gt; struct xe_vm *migrate_vm; - struct xe_engine *eng; + struct xe_exec_queue *q; if (!vm->pt_root[id]) continue; migrate_vm = xe_migrate_get_vm(tile->migrate); - eng = xe_engine_create_class(xe, gt, migrate_vm, - XE_ENGINE_CLASS_COPY, - ENGINE_FLAG_VM); + q = xe_exec_queue_create_class(xe, gt, migrate_vm, + XE_ENGINE_CLASS_COPY, + EXEC_QUEUE_FLAG_VM); xe_vm_put(migrate_vm); - if (IS_ERR(eng)) { - err = PTR_ERR(eng); + if (IS_ERR(q)) { + err = PTR_ERR(q); goto err_close; } - vm->eng[id] = eng; + vm->q[id] = q; number_tiles++; } } @@ -1422,7 +1422,7 @@ void xe_vm_close_and_put(struct xe_vm *vm) struct drm_gpuva *gpuva, *next; u8 id; - XE_WARN_ON(vm->preempt.num_engines); + XE_WARN_ON(vm->preempt.num_exec_queues); xe_vm_close(vm); flush_async_ops(vm); @@ -1430,10 +1430,10 @@ void xe_vm_close_and_put(struct xe_vm *vm) flush_work(&vm->preempt.rebind_work); for_each_tile(tile, xe, id) { - if (vm->eng[id]) { - xe_engine_kill(vm->eng[id]); - xe_engine_put(vm->eng[id]); - vm->eng[id] = NULL; + if (vm->q[id]) { + xe_exec_queue_kill(vm->q[id]); + xe_exec_queue_put(vm->q[id]); + vm->q[id] = NULL; } } @@ -1573,7 +1573,7 @@ u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile) } static struct dma_fence * -xe_vm_unbind_vma(struct xe_vma *vma, struct xe_engine *e, +xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q, struct xe_sync_entry *syncs, u32 num_syncs, bool first_op, bool last_op) { @@ -1600,7 +1600,7 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct xe_engine *e, if (!(vma->tile_present & BIT(id))) goto next; - fence = __xe_pt_unbind_vma(tile, vma, e, first_op ? syncs : NULL, + fence = __xe_pt_unbind_vma(tile, vma, q, first_op ? syncs : NULL, first_op ? num_syncs : 0); if (IS_ERR(fence)) { err = PTR_ERR(fence); @@ -1611,8 +1611,8 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct xe_engine *e, fences[cur_fence++] = fence; next: - if (e && vm->pt_root[id] && !list_empty(&e->multi_gt_list)) - e = list_next_entry(e, multi_gt_list); + if (q && vm->pt_root[id] && !list_empty(&q->multi_gt_list)) + q = list_next_entry(q, multi_gt_list); } if (fences) { @@ -1648,7 +1648,7 @@ err_fences: } static struct dma_fence * -xe_vm_bind_vma(struct xe_vma *vma, struct xe_engine *e, +xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q, struct xe_sync_entry *syncs, u32 num_syncs, bool first_op, bool last_op) { @@ -1675,7 +1675,7 @@ xe_vm_bind_vma(struct xe_vma *vma, struct xe_engine *e, if (!(vma->tile_mask & BIT(id))) goto next; - fence = __xe_pt_bind_vma(tile, vma, e ? e : vm->eng[id], + fence = __xe_pt_bind_vma(tile, vma, q ? q : vm->q[id], first_op ? syncs : NULL, first_op ? num_syncs : 0, vma->tile_present & BIT(id)); @@ -1688,8 +1688,8 @@ xe_vm_bind_vma(struct xe_vma *vma, struct xe_engine *e, fences[cur_fence++] = fence; next: - if (e && vm->pt_root[id] && !list_empty(&e->multi_gt_list)) - e = list_next_entry(e, multi_gt_list); + if (q && vm->pt_root[id] && !list_empty(&q->multi_gt_list)) + q = list_next_entry(q, multi_gt_list); } if (fences) { @@ -1805,7 +1805,7 @@ int xe_vm_async_fence_wait_start(struct dma_fence *fence) } static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, - struct xe_engine *e, struct xe_sync_entry *syncs, + struct xe_exec_queue *q, struct xe_sync_entry *syncs, u32 num_syncs, struct async_op_fence *afence, bool immediate, bool first_op, bool last_op) { @@ -1814,7 +1814,7 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, xe_vm_assert_held(vm); if (immediate) { - fence = xe_vm_bind_vma(vma, e, syncs, num_syncs, first_op, + fence = xe_vm_bind_vma(vma, q, syncs, num_syncs, first_op, last_op); if (IS_ERR(fence)) return PTR_ERR(fence); @@ -1836,7 +1836,7 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, return 0; } -static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_engine *e, +static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q, struct xe_bo *bo, struct xe_sync_entry *syncs, u32 num_syncs, struct async_op_fence *afence, bool immediate, bool first_op, bool last_op) @@ -1852,12 +1852,12 @@ static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_engine *e, return err; } - return __xe_vm_bind(vm, vma, e, syncs, num_syncs, afence, immediate, + return __xe_vm_bind(vm, vma, q, syncs, num_syncs, afence, immediate, first_op, last_op); } static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma, - struct xe_engine *e, struct xe_sync_entry *syncs, + struct xe_exec_queue *q, struct xe_sync_entry *syncs, u32 num_syncs, struct async_op_fence *afence, bool first_op, bool last_op) { @@ -1866,7 +1866,7 @@ static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma, xe_vm_assert_held(vm); xe_bo_assert_held(xe_vma_bo(vma)); - fence = xe_vm_unbind_vma(vma, e, syncs, num_syncs, first_op, last_op); + fence = xe_vm_unbind_vma(vma, q, syncs, num_syncs, first_op, last_op); if (IS_ERR(fence)) return PTR_ERR(fence); if (afence) @@ -2074,7 +2074,7 @@ int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, vm = xa_load(&xef->vm.xa, args->vm_id); if (XE_IOCTL_DBG(xe, !vm)) err = -ENOENT; - else if (XE_IOCTL_DBG(xe, vm->preempt.num_engines)) + else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues)) err = -EBUSY; else xa_erase(&xef->vm.xa, args->vm_id); @@ -2093,7 +2093,7 @@ static const u32 region_to_mem_type[] = { }; static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma, - struct xe_engine *e, u32 region, + struct xe_exec_queue *q, u32 region, struct xe_sync_entry *syncs, u32 num_syncs, struct async_op_fence *afence, bool first_op, bool last_op) @@ -2109,7 +2109,7 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma, } if (vma->tile_mask != (vma->tile_present & ~vma->usm.tile_invalidated)) { - return xe_vm_bind(vm, vma, e, xe_vma_bo(vma), syncs, num_syncs, + return xe_vm_bind(vm, vma, q, xe_vma_bo(vma), syncs, num_syncs, afence, true, first_op, last_op); } else { int i; @@ -2414,7 +2414,7 @@ static u64 xe_vma_max_pte_size(struct xe_vma *vma) * Parse operations list and create any resources needed for the operations * prior to fully committing to the operations. This setup can fail. */ -static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_engine *e, +static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, struct drm_gpuva_ops **ops, int num_ops_list, struct xe_sync_entry *syncs, u32 num_syncs, struct list_head *ops_list, bool async) @@ -2434,9 +2434,9 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_engine *e, if (!fence) return -ENOMEM; - seqno = e ? ++e->bind.fence_seqno : ++vm->async_ops.fence.seqno; + seqno = q ? ++q->bind.fence_seqno : ++vm->async_ops.fence.seqno; dma_fence_init(&fence->fence, &async_op_fence_ops, - &vm->async_ops.lock, e ? e->bind.fence_ctx : + &vm->async_ops.lock, q ? q->bind.fence_ctx : vm->async_ops.fence.context, seqno); if (!xe_vm_no_dma_fences(vm)) { @@ -2467,7 +2467,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_engine *e, op->syncs = syncs; } - op->engine = e; + op->q = q; switch (op->base.op) { case DRM_GPUVA_OP_MAP: @@ -2677,7 +2677,7 @@ again: switch (op->base.op) { case DRM_GPUVA_OP_MAP: - err = xe_vm_bind(vm, vma, op->engine, xe_vma_bo(vma), + err = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma), op->syncs, op->num_syncs, op->fence, op->map.immediate || !xe_vm_in_fault_mode(vm), op->flags & XE_VMA_OP_FIRST, @@ -2693,7 +2693,7 @@ again: vm->async_ops.munmap_rebind_inflight = true; vma->gpuva.flags |= XE_VMA_FIRST_REBIND; } - err = xe_vm_unbind(vm, vma, op->engine, op->syncs, + err = xe_vm_unbind(vm, vma, op->q, op->syncs, op->num_syncs, !prev && !next ? op->fence : NULL, op->flags & XE_VMA_OP_FIRST, @@ -2706,7 +2706,7 @@ again: if (prev) { op->remap.prev->gpuva.flags |= XE_VMA_LAST_REBIND; - err = xe_vm_bind(vm, op->remap.prev, op->engine, + err = xe_vm_bind(vm, op->remap.prev, op->q, xe_vma_bo(op->remap.prev), op->syncs, op->num_syncs, !next ? op->fence : NULL, true, false, @@ -2719,7 +2719,7 @@ again: if (next) { op->remap.next->gpuva.flags |= XE_VMA_LAST_REBIND; - err = xe_vm_bind(vm, op->remap.next, op->engine, + err = xe_vm_bind(vm, op->remap.next, op->q, xe_vma_bo(op->remap.next), op->syncs, op->num_syncs, op->fence, true, false, @@ -2734,13 +2734,13 @@ again: break; } case DRM_GPUVA_OP_UNMAP: - err = xe_vm_unbind(vm, vma, op->engine, op->syncs, + err = xe_vm_unbind(vm, vma, op->q, op->syncs, op->num_syncs, op->fence, op->flags & XE_VMA_OP_FIRST, op->flags & XE_VMA_OP_LAST); break; case DRM_GPUVA_OP_PREFETCH: - err = xe_vm_prefetch(vm, vma, op->engine, op->prefetch.region, + err = xe_vm_prefetch(vm, vma, op->q, op->prefetch.region, op->syncs, op->num_syncs, op->fence, op->flags & XE_VMA_OP_FIRST, op->flags & XE_VMA_OP_LAST); @@ -2819,8 +2819,8 @@ static void xe_vma_op_cleanup(struct xe_vm *vm, struct xe_vma_op *op) while (op->num_syncs--) xe_sync_entry_cleanup(&op->syncs[op->num_syncs]); kfree(op->syncs); - if (op->engine) - xe_engine_put(op->engine); + if (op->q) + xe_exec_queue_put(op->q); if (op->fence) dma_fence_put(&op->fence->fence); } @@ -3174,7 +3174,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) struct xe_bo **bos = NULL; struct drm_gpuva_ops **ops = NULL; struct xe_vm *vm; - struct xe_engine *e = NULL; + struct xe_exec_queue *q = NULL; u32 num_syncs; struct xe_sync_entry *syncs = NULL; struct drm_xe_vm_bind_op *bind_ops; @@ -3187,23 +3187,23 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) if (err) return err; - if (args->engine_id) { - e = xe_engine_lookup(xef, args->engine_id); - if (XE_IOCTL_DBG(xe, !e)) { + if (args->exec_queue_id) { + q = xe_exec_queue_lookup(xef, args->exec_queue_id); + if (XE_IOCTL_DBG(xe, !q)) { err = -ENOENT; goto free_objs; } - if (XE_IOCTL_DBG(xe, !(e->flags & ENGINE_FLAG_VM))) { + if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) { err = -EINVAL; - goto put_engine; + goto put_exec_queue; } } vm = xe_vm_lookup(xef, args->vm_id); if (XE_IOCTL_DBG(xe, !vm)) { err = -EINVAL; - goto put_engine; + goto put_exec_queue; } err = down_write_killable(&vm->lock); @@ -3357,7 +3357,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) } } - err = vm_bind_ioctl_ops_parse(vm, e, ops, args->num_binds, + err = vm_bind_ioctl_ops_parse(vm, q, ops, args->num_binds, syncs, num_syncs, &ops_list, async); if (err) goto unwind_ops; @@ -3391,9 +3391,9 @@ release_vm_lock: up_write(&vm->lock); put_vm: xe_vm_put(vm); -put_engine: - if (e) - xe_engine_put(e); +put_exec_queue: + if (q) + xe_exec_queue_put(q); free_objs: kfree(bos); kfree(ops); diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index a1d30de37d20..805236578140 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -18,7 +18,7 @@ struct drm_file; struct ttm_buffer_object; struct ttm_validate_buffer; -struct xe_engine; +struct xe_exec_queue; struct xe_file; struct xe_sync_entry; @@ -164,7 +164,7 @@ static inline bool xe_vm_no_dma_fences(struct xe_vm *vm) return xe_vm_in_compute_mode(vm) || xe_vm_in_fault_mode(vm); } -int xe_vm_add_compute_engine(struct xe_vm *vm, struct xe_engine *e); +int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q); int xe_vm_userptr_pin(struct xe_vm *vm); diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index f7522f9ca40e..f8675c3da3b1 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -138,8 +138,8 @@ struct xe_vm { struct xe_device *xe; - /* engine used for (un)binding vma's */ - struct xe_engine *eng[XE_MAX_TILES_PER_DEVICE]; + /* exec queue used for (un)binding vma's */ + struct xe_exec_queue *q[XE_MAX_TILES_PER_DEVICE]; /** @lru_bulk_move: Bulk LRU move list for this VM's BOs */ struct ttm_lru_bulk_move lru_bulk_move; @@ -278,10 +278,10 @@ struct xe_vm { * an engine again */ s64 min_run_period_ms; - /** @engines: list of engines attached to this VM */ - struct list_head engines; - /** @num_engines: number user engines attached to this VM */ - int num_engines; + /** @exec_queues: list of exec queues attached to this VM */ + struct list_head exec_queues; + /** @num_exec_queues: number exec queues attached to this VM */ + int num_exec_queues; /** * @rebind_deactivated: Whether rebind has been temporarily deactivated * due to no work available. Protected by the vm resv. @@ -386,8 +386,8 @@ struct xe_vma_op { * operations is processed */ struct drm_gpuva_ops *ops; - /** @engine: engine for this operation */ - struct xe_engine *engine; + /** @q: exec queue for this operation */ + struct xe_exec_queue *q; /** * @syncs: syncs for this operation, only used on first and last * operation diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 3d09e9e9267b..86f16d50e9cc 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -103,14 +103,14 @@ struct xe_user_extension { #define DRM_XE_VM_CREATE 0x03 #define DRM_XE_VM_DESTROY 0x04 #define DRM_XE_VM_BIND 0x05 -#define DRM_XE_ENGINE_CREATE 0x06 -#define DRM_XE_ENGINE_DESTROY 0x07 +#define DRM_XE_EXEC_QUEUE_CREATE 0x06 +#define DRM_XE_EXEC_QUEUE_DESTROY 0x07 #define DRM_XE_EXEC 0x08 #define DRM_XE_MMIO 0x09 -#define DRM_XE_ENGINE_SET_PROPERTY 0x0a +#define DRM_XE_EXEC_QUEUE_SET_PROPERTY 0x0a #define DRM_XE_WAIT_USER_FENCE 0x0b #define DRM_XE_VM_MADVISE 0x0c -#define DRM_XE_ENGINE_GET_PROPERTY 0x0d +#define DRM_XE_EXEC_QUEUE_GET_PROPERTY 0x0d /* Must be kept compact -- no holes */ #define DRM_IOCTL_XE_DEVICE_QUERY DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_DEVICE_QUERY, struct drm_xe_device_query) @@ -119,12 +119,12 @@ struct xe_user_extension { #define DRM_IOCTL_XE_VM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_VM_CREATE, struct drm_xe_vm_create) #define DRM_IOCTL_XE_VM_DESTROY DRM_IOW(DRM_COMMAND_BASE + DRM_XE_VM_DESTROY, struct drm_xe_vm_destroy) #define DRM_IOCTL_XE_VM_BIND DRM_IOW(DRM_COMMAND_BASE + DRM_XE_VM_BIND, struct drm_xe_vm_bind) -#define DRM_IOCTL_XE_ENGINE_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_ENGINE_CREATE, struct drm_xe_engine_create) -#define DRM_IOCTL_XE_ENGINE_GET_PROPERTY DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_ENGINE_GET_PROPERTY, struct drm_xe_engine_get_property) -#define DRM_IOCTL_XE_ENGINE_DESTROY DRM_IOW(DRM_COMMAND_BASE + DRM_XE_ENGINE_DESTROY, struct drm_xe_engine_destroy) +#define DRM_IOCTL_XE_EXEC_QUEUE_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_CREATE, struct drm_xe_exec_queue_create) +#define DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_GET_PROPERTY, struct drm_xe_exec_queue_get_property) +#define DRM_IOCTL_XE_EXEC_QUEUE_DESTROY DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_DESTROY, struct drm_xe_exec_queue_destroy) #define DRM_IOCTL_XE_EXEC DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC, struct drm_xe_exec) #define DRM_IOCTL_XE_MMIO DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_MMIO, struct drm_xe_mmio) -#define DRM_IOCTL_XE_ENGINE_SET_PROPERTY DRM_IOW(DRM_COMMAND_BASE + DRM_XE_ENGINE_SET_PROPERTY, struct drm_xe_engine_set_property) +#define DRM_IOCTL_XE_EXEC_QUEUE_SET_PROPERTY DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_SET_PROPERTY, struct drm_xe_exec_queue_set_property) #define DRM_IOCTL_XE_WAIT_USER_FENCE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence) #define DRM_IOCTL_XE_VM_MADVISE DRM_IOW(DRM_COMMAND_BASE + DRM_XE_VM_MADVISE, struct drm_xe_vm_madvise) @@ -649,11 +649,11 @@ struct drm_xe_vm_bind { __u32 vm_id; /** - * @engine_id: engine_id, must be of class DRM_XE_ENGINE_CLASS_VM_BIND - * and engine must have same vm_id. If zero, the default VM bind engine + * @exec_queue_id: exec_queue_id, must be of class DRM_XE_ENGINE_CLASS_VM_BIND + * and exec queue must have same vm_id. If zero, the default VM bind engine * is used. */ - __u32 engine_id; + __u32 exec_queue_id; /** @num_binds: number of binds in this IOCTL */ __u32 num_binds; @@ -685,8 +685,8 @@ struct drm_xe_vm_bind { __u64 reserved[2]; }; -/** struct drm_xe_ext_engine_set_property - engine set property extension */ -struct drm_xe_ext_engine_set_property { +/** struct drm_xe_ext_exec_queue_set_property - exec queue set property extension */ +struct drm_xe_ext_exec_queue_set_property { /** @base: base user extension */ struct xe_user_extension base; @@ -701,32 +701,32 @@ struct drm_xe_ext_engine_set_property { }; /** - * struct drm_xe_engine_set_property - engine set property + * struct drm_xe_exec_queue_set_property - exec queue set property * - * Same namespace for extensions as drm_xe_engine_create + * Same namespace for extensions as drm_xe_exec_queue_create */ -struct drm_xe_engine_set_property { +struct drm_xe_exec_queue_set_property { /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; - /** @engine_id: Engine ID */ - __u32 engine_id; + /** @exec_queue_id: Exec queue ID */ + __u32 exec_queue_id; -#define XE_ENGINE_SET_PROPERTY_PRIORITY 0 -#define XE_ENGINE_SET_PROPERTY_TIMESLICE 1 -#define XE_ENGINE_SET_PROPERTY_PREEMPTION_TIMEOUT 2 +#define XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY 0 +#define XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE 1 +#define XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT 2 /* * Long running or ULLS engine mode. DMA fences not allowed in this * mode. Must match the value of DRM_XE_VM_CREATE_COMPUTE_MODE, serves * as a sanity check the UMD knows what it is doing. Can only be set at * engine create time. */ -#define XE_ENGINE_SET_PROPERTY_COMPUTE_MODE 3 -#define XE_ENGINE_SET_PROPERTY_PERSISTENCE 4 -#define XE_ENGINE_SET_PROPERTY_JOB_TIMEOUT 5 -#define XE_ENGINE_SET_PROPERTY_ACC_TRIGGER 6 -#define XE_ENGINE_SET_PROPERTY_ACC_NOTIFY 7 -#define XE_ENGINE_SET_PROPERTY_ACC_GRANULARITY 8 +#define XE_EXEC_QUEUE_SET_PROPERTY_COMPUTE_MODE 3 +#define XE_EXEC_QUEUE_SET_PROPERTY_PERSISTENCE 4 +#define XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT 5 +#define XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER 6 +#define XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY 7 +#define XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY 8 /** @property: property to set */ __u32 property; @@ -755,25 +755,25 @@ struct drm_xe_engine_class_instance { __u16 gt_id; }; -struct drm_xe_engine_create { -#define XE_ENGINE_EXTENSION_SET_PROPERTY 0 +struct drm_xe_exec_queue_create { +#define XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY 0 /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; - /** @width: submission width (number BB per exec) for this engine */ + /** @width: submission width (number BB per exec) for this exec queue */ __u16 width; - /** @num_placements: number of valid placements for this engine */ + /** @num_placements: number of valid placements for this exec queue */ __u16 num_placements; - /** @vm_id: VM to use for this engine */ + /** @vm_id: VM to use for this exec queue */ __u32 vm_id; /** @flags: MBZ */ __u32 flags; - /** @engine_id: Returned engine ID */ - __u32 engine_id; + /** @exec_queue_id: Returned exec queue ID */ + __u32 exec_queue_id; /** * @instances: user pointer to a 2-d array of struct @@ -788,14 +788,14 @@ struct drm_xe_engine_create { __u64 reserved[2]; }; -struct drm_xe_engine_get_property { +struct drm_xe_exec_queue_get_property { /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; - /** @engine_id: Engine ID */ - __u32 engine_id; + /** @exec_queue_id: Exec queue ID */ + __u32 exec_queue_id; -#define XE_ENGINE_GET_PROPERTY_BAN 0 +#define XE_EXEC_QUEUE_GET_PROPERTY_BAN 0 /** @property: property to get */ __u32 property; @@ -806,9 +806,9 @@ struct drm_xe_engine_get_property { __u64 reserved[2]; }; -struct drm_xe_engine_destroy { - /** @engine_id: Engine ID */ - __u32 engine_id; +struct drm_xe_exec_queue_destroy { + /** @exec_queue_id: Exec queue ID */ + __u32 exec_queue_id; /** @pad: MBZ */ __u32 pad; @@ -855,8 +855,8 @@ struct drm_xe_exec { /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; - /** @engine_id: Engine ID for the batch buffer */ - __u32 engine_id; + /** @exec_queue_id: Exec queue ID for the batch buffer */ + __u32 exec_queue_id; /** @num_syncs: Amount of struct drm_xe_sync in array. */ __u32 num_syncs; -- cgit v1.2.3 From 038ff941afe2b05273d5f07b12e976dae195d8b8 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Fri, 4 Aug 2023 17:17:56 +0530 Subject: drm/xe: Add sysfs entries for engines under its GT Add engines sysfs directory under its GT and create sub directory for all engine class (note its not per instance) present on GT. For example, DUT# cat /sys/class/drm/cardX/device/tileN/gtN/engines/ bcs/ ccs/ V9 : - Add missing drmm_add_action_or_reset V8 : - Rebase V7 : - Remove xe_gt.h from .h and include in .c - Matt V6 : - Add kernel doc and arrange file in make file by alphabet - Matt V5 : - replace xe_engine with xe_hw_engine - Matt V4 : - Rebase to resolve conflicts - CI V3 : - Move code in its own file - Rename API name V2 : - Correct class mask logic - Himal - Remove extra parenthesis Reviewed-by: Niranjana Vishwanathapura Reviewed-by: Matthew Brost Signed-off-by: Tejas Upadhyay Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/xe_gt.c | 7 ++ drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c | 153 ++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h | 13 +++ 4 files changed, 174 insertions(+) create mode 100644 drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c create mode 100644 drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index f8d63c9b97d5..2373832f932e 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -78,6 +78,7 @@ xe-y += xe_bb.o \ xe_guc_pc.o \ xe_guc_submit.o \ xe_hw_engine.o \ + xe_hw_engine_class_sysfs.o \ xe_hw_fence.o \ xe_huc.o \ xe_huc_debugfs.o \ diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 3077faa1e792..13320af4ddd3 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -28,6 +28,7 @@ #include "xe_gt_topology.h" #include "xe_guc_exec_queue_types.h" #include "xe_hw_fence.h" +#include "xe_hw_engine_class_sysfs.h" #include "xe_irq.h" #include "xe_lrc.h" #include "xe_map.h" @@ -323,6 +324,12 @@ static int gt_fw_domain_init(struct xe_gt *gt) if (err) goto err_force_wake; + err = xe_hw_engine_class_sysfs_init(gt); + if (err) + drm_warn(>_to_xe(gt)->drm, + "failed to register engines sysfs directory, err: %d\n", + err); + err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); XE_WARN_ON(err); xe_device_mem_access_put(gt_to_xe(gt)); diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c new file mode 100644 index 000000000000..470a8c356abd --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c @@ -0,0 +1,153 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include +#include +#include + +#include "xe_gt.h" +#include "xe_hw_engine_class_sysfs.h" + +#define MAX_ENGINE_CLASS_NAME_LEN 16 +static void kobj_xe_hw_engine_release(struct kobject *kobj) +{ + kfree(kobj); +} + +static const struct kobj_type kobj_xe_hw_engine_type = { + .release = kobj_xe_hw_engine_release, + .sysfs_ops = &kobj_sysfs_ops +}; + +static void kobj_xe_hw_engine_fini(struct drm_device *drm, void *arg) +{ + struct kobject *kobj = arg; + + kobject_put(kobj); +} + + static struct kobject * +kobj_xe_hw_engine(struct xe_device *xe, struct kobject *parent, char *name) +{ + struct kobject *kobj; + int err = 0; + + kobj = kzalloc(sizeof(*kobj), GFP_KERNEL); + if (!kobj) + return NULL; + + kobject_init(kobj, &kobj_xe_hw_engine_type); + if (kobject_add(kobj, parent, "%s", name)) { + kobject_put(kobj); + return NULL; + } + + err = drmm_add_action_or_reset(&xe->drm, kobj_xe_hw_engine_fini, + kobj); + if (err) + drm_warn(&xe->drm, + "%s: drmm_add_action_or_reset failed, err: %d\n", + __func__, err); + + return kobj; +} + +static void xe_hw_engine_sysfs_kobj_release(struct kobject *kobj) +{ + kfree(kobj); +} + +static const struct kobj_type xe_hw_engine_sysfs_kobj_type = { + .release = xe_hw_engine_sysfs_kobj_release, + .sysfs_ops = &kobj_sysfs_ops, +}; + +static void hw_engine_class_sysfs_fini(struct drm_device *drm, void *arg) +{ + struct kobject *kobj = arg; + + kobject_put(kobj); +} + +/** + * xe_hw_engine_class_sysfs_init - Init HW engine classes on GT. + * @gt: Xe GT. + * + * This routine creates sysfs for HW engine classes and adds methods + * to get/set different scheduling properties for HW engines class. + * + * Returns: Returns error value for failure and 0 for success. + */ +int xe_hw_engine_class_sysfs_init(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + struct kobject *kobj; + u16 class_mask = 0; + int err = 0; + + kobj = kzalloc(sizeof(*kobj), GFP_KERNEL); + if (!kobj) + return -ENOMEM; + + kobject_init(kobj, &xe_hw_engine_sysfs_kobj_type); + + err = kobject_add(kobj, gt->sysfs, "engines"); + if (err) { + kobject_put(kobj); + return err; + } + + for_each_hw_engine(hwe, gt, id) { + char name[MAX_ENGINE_CLASS_NAME_LEN]; + struct kobject *khwe; + + if (hwe->class == XE_ENGINE_CLASS_OTHER || + hwe->class == XE_ENGINE_CLASS_MAX) + continue; + + if ((class_mask >> hwe->class) & 1) + continue; + + class_mask |= 1 << hwe->class; + + switch (hwe->class) { + case XE_ENGINE_CLASS_RENDER: + strcpy(name, "rcs"); + break; + case XE_ENGINE_CLASS_VIDEO_DECODE: + strcpy(name, "vcs"); + break; + case XE_ENGINE_CLASS_VIDEO_ENHANCE: + strcpy(name, "vecs"); + break; + case XE_ENGINE_CLASS_COPY: + strcpy(name, "bcs"); + break; + case XE_ENGINE_CLASS_COMPUTE: + strcpy(name, "ccs"); + break; + default: + kobject_put(kobj); + return -EINVAL; + } + + khwe = kobj_xe_hw_engine(xe, kobj, name); + if (!khwe) { + kobject_put(kobj); + return -EINVAL; + } + } + + err = drmm_add_action_or_reset(&xe->drm, hw_engine_class_sysfs_fini, + kobj); + if (err) + drm_warn(&xe->drm, + "%s: drmm_add_action_or_reset failed, err: %d\n", + __func__, err); + + return err; +} diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h new file mode 100644 index 000000000000..b3916c3cf5b3 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_ENGINE_CLASS_SYSFS_H_ +#define _XE_ENGINE_CLASS_SYSFS_H__ + +struct xe_gt; + +int xe_hw_engine_class_sysfs_init(struct xe_gt *gt); + +#endif -- cgit v1.2.3 From eef55700f302b9af3228f74997e82eaca8635d14 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Fri, 4 Aug 2023 17:36:25 +0530 Subject: drm/xe: Add sysfs for default engine scheduler properties For each HW engine under GT we are adding defaults sysfs entry to list all engine scheduler properties and its default values. So that it will be easier for user to fetch default values of these properties anytime to go back to default. For example, DUT# cat /sys/class/drm/card1/device/tileN/gtN/engines/bcs/.defaults/ job_timeout_ms preempt_timeout_us timeslice_duration_us where, @job_timeout_ms: The time after which a job is removed from the scheduler. @preempt_timeout_us: How long to wait (in microseconds) for a preemption event to occur when submitting a new context. @timeslice_duration_us: Each context is scheduled for execution for the timeslice duration, before switching to the next context. V12: - Add missing drmm_add_action_or_reset and remove sysfs files V11: - Rebase V10: - Remove xe_gt.h inclusion from .h - Matt V9 : - Remove jiffies for job_timeout_ms - Matt V8 : - replace xe_engine with xe_hw_engine - Matt V7 : - Push all errors to one error path at every places - Niranjana - Describe struct member to resolve kernel doc err - CI hooks V6 : - Use engine class interface instead of hw engine in sysfs for better interfacing readability - Niranjana V5 : - Scheduling props should apply per class engine not per hardware engine - Matt - Do not record value of job_timeout_ms if changed based on dma_fence - Matt V4 : - Resolve merge conflicts - CI V3 : - Rearrange code in its own file - Rebase - Update commit message to reflect tile addition V2 : - Use sysfs_create_files in this patch - Niranjana - Handle prototype error for xe_add_engine_defaults - CI hooks - Remove unused member sysfs_hwe - Niranjana Reviewed-by: Niranjana Vishwanathapura Reviewed-by: Matthew Brost Signed-off-by: Tejas Upadhyay Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_exec_queue.c | 6 +- drivers/gpu/drm/xe/xe_gt_types.h | 3 + drivers/gpu/drm/xe/xe_guc_submit.c | 3 +- drivers/gpu/drm/xe/xe_hw_engine.c | 9 ++ drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c | 136 +++++++++++++++++++++----- drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h | 22 +++++ drivers/gpu/drm/xe/xe_hw_engine_types.h | 37 +++++++ 7 files changed, 190 insertions(+), 26 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 1371829b9e35..41a7ae1d1a53 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -53,9 +53,9 @@ static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe, INIT_LIST_HEAD(&q->compute.link); INIT_LIST_HEAD(&q->multi_gt_link); - /* FIXME: Wire up to configurable default value */ - q->sched_props.timeslice_us = 1 * 1000; - q->sched_props.preempt_timeout_us = 640 * 1000; + q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us; + q->sched_props.preempt_timeout_us = + hwe->eclass->sched_props.preempt_timeout_us; if (xe_exec_queue_is_parallel(q)) { q->parallel.composite_fence_ctx = dma_fence_context_alloc(1); diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index c326932e53d7..35b8c19fa8bf 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -286,6 +286,9 @@ struct xe_gt { /** @hw_engines: hardware engines on the GT */ struct xe_hw_engine hw_engines[XE_NUM_HW_ENGINES]; + /** @eclass: per hardware engine class interface on the GT */ + struct xe_hw_engine_class_intf eclass[XE_ENGINE_CLASS_MAX]; + /** @pcode: GT's PCODE */ struct { /** @lock: protecting GT's PCODE mailbox data */ diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 42454c12efb3..e12cd4285e5d 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1132,7 +1132,8 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) ge->q = q; init_waitqueue_head(&ge->suspend_wait); - timeout = xe_vm_no_dma_fences(q->vm) ? MAX_SCHEDULE_TIMEOUT : HZ * 5; + timeout = xe_vm_no_dma_fences(q->vm) ? MAX_SCHEDULE_TIMEOUT : + q->hwe->eclass->sched_props.job_timeout_ms; err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, NULL, q->lrc[0].ring.size / MAX_JOB_SIZE_BYTES, 64, timeout, guc_to_gt(guc)->ordered_wq, NULL, diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index ead5aa285619..b7be7b0acb35 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -362,6 +362,15 @@ static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe, hwe->fence_irq = >->fence_irq[info->class]; hwe->engine_id = id; + if (!gt->eclass[hwe->class].sched_props.job_timeout_ms) { + gt->eclass[hwe->class].sched_props.job_timeout_ms = 5 * 1000; + gt->eclass[hwe->class].sched_props.timeslice_us = 1 * 1000; + gt->eclass[hwe->class].sched_props.preempt_timeout_us = 640 * 1000; + /* Record default props */ + gt->eclass[hwe->class].defaults = gt->eclass[hwe->class].sched_props; + } + hwe->eclass = >->eclass[hwe->class]; + xe_reg_sr_init(&hwe->reg_sr, hwe->name, gt_to_xe(gt)); xe_wa_process_engine(hwe); hw_engine_setup_default_state(hwe); diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c index 470a8c356abd..99a8197765bd 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c @@ -11,6 +11,9 @@ #include "xe_hw_engine_class_sysfs.h" #define MAX_ENGINE_CLASS_NAME_LEN 16 +static int xe_add_hw_engine_class_defaults(struct xe_device *xe, + struct kobject *parent); + static void kobj_xe_hw_engine_release(struct kobject *kobj) { kfree(kobj); @@ -21,37 +24,116 @@ static const struct kobj_type kobj_xe_hw_engine_type = { .sysfs_ops = &kobj_sysfs_ops }; -static void kobj_xe_hw_engine_fini(struct drm_device *drm, void *arg) +static void kobj_xe_hw_engine_class_fini(struct drm_device *drm, void *arg) { struct kobject *kobj = arg; kobject_put(kobj); } - static struct kobject * -kobj_xe_hw_engine(struct xe_device *xe, struct kobject *parent, char *name) + static struct kobj_eclass * +kobj_xe_hw_engine_class(struct xe_device *xe, struct kobject *parent, char *name) +{ + struct kobj_eclass *keclass; + int err = 0; + + keclass = kzalloc(sizeof(*keclass), GFP_KERNEL); + if (!keclass) + return NULL; + + kobject_init(&keclass->base, &kobj_xe_hw_engine_type); + if (kobject_add(&keclass->base, parent, "%s", name)) { + kobject_put(&keclass->base); + return NULL; + } + + err = drmm_add_action_or_reset(&xe->drm, kobj_xe_hw_engine_class_fini, + &keclass->base); + if (err) + drm_warn(&xe->drm, + "%s: drmm_add_action_or_reset failed, err: %d\n", + __func__, err); + return keclass; +} + +static ssize_t job_timeout_default(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent); + + return sprintf(buf, "%u\n", eclass->defaults.job_timeout_ms); +} + +static struct kobj_attribute job_timeout_def = +__ATTR(job_timeout_ms, 0444, job_timeout_default, NULL); + +static ssize_t timeslice_default(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent); + + return sprintf(buf, "%u\n", eclass->defaults.timeslice_us); +} + +static struct kobj_attribute timeslice_duration_def = +__ATTR(timeslice_duration_us, 0444, timeslice_default, NULL); + +static ssize_t preempt_timeout_default(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent); + + return sprintf(buf, "%u\n", eclass->defaults.preempt_timeout_us); +} + +static struct kobj_attribute preempt_timeout_def = +__ATTR(preempt_timeout_us, 0444, preempt_timeout_default, NULL); + +static const struct attribute *defaults[] = { + &job_timeout_def.attr, + ×lice_duration_def.attr, + &preempt_timeout_def.attr, + NULL +}; + +static void hw_engine_class_defaults_fini(struct drm_device *drm, void *arg) +{ + struct kobject *kobj = arg; + + sysfs_remove_files(kobj, defaults); + kobject_put(kobj); +} + +static int xe_add_hw_engine_class_defaults(struct xe_device *xe, + struct kobject *parent) { struct kobject *kobj; int err = 0; kobj = kzalloc(sizeof(*kobj), GFP_KERNEL); if (!kobj) - return NULL; + return -ENOMEM; kobject_init(kobj, &kobj_xe_hw_engine_type); - if (kobject_add(kobj, parent, "%s", name)) { - kobject_put(kobj); - return NULL; - } + err = kobject_add(kobj, parent, "%s", ".defaults"); + if (err) + goto err_object; + + err = sysfs_create_files(kobj, defaults); + if (err) + goto err_object; - err = drmm_add_action_or_reset(&xe->drm, kobj_xe_hw_engine_fini, + err = drmm_add_action_or_reset(&xe->drm, hw_engine_class_defaults_fini, kobj); if (err) drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n", __func__, err); - - return kobj; + return err; +err_object: + kobject_put(kobj); + return err; } static void xe_hw_engine_sysfs_kobj_release(struct kobject *kobj) @@ -96,14 +178,12 @@ int xe_hw_engine_class_sysfs_init(struct xe_gt *gt) kobject_init(kobj, &xe_hw_engine_sysfs_kobj_type); err = kobject_add(kobj, gt->sysfs, "engines"); - if (err) { - kobject_put(kobj); - return err; - } + if (err) + goto err_object; for_each_hw_engine(hwe, gt, id) { char name[MAX_ENGINE_CLASS_NAME_LEN]; - struct kobject *khwe; + struct kobj_eclass *keclass; if (hwe->class == XE_ENGINE_CLASS_OTHER || hwe->class == XE_ENGINE_CLASS_MAX) @@ -131,14 +211,23 @@ int xe_hw_engine_class_sysfs_init(struct xe_gt *gt) strcpy(name, "ccs"); break; default: - kobject_put(kobj); - return -EINVAL; + err = -EINVAL; + goto err_object; + } + + keclass = kobj_xe_hw_engine_class(xe, kobj, name); + if (!keclass) { + err = -EINVAL; + goto err_object; } - khwe = kobj_xe_hw_engine(xe, kobj, name); - if (!khwe) { - kobject_put(kobj); - return -EINVAL; + keclass->eclass = hwe->eclass; + err = xe_add_hw_engine_class_defaults(xe, &keclass->base); + if (err) { + drm_warn(&xe->drm, + "Add .defaults to engines failed!, err: %d\n", + err); + goto err_object; } } @@ -150,4 +239,7 @@ int xe_hw_engine_class_sysfs_init(struct xe_gt *gt) __func__, err); return err; +err_object: + kobject_put(kobj); + return err; } diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h index b3916c3cf5b3..c093f381abbe 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h +++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h @@ -6,8 +6,30 @@ #ifndef _XE_ENGINE_CLASS_SYSFS_H_ #define _XE_ENGINE_CLASS_SYSFS_H__ +#include + struct xe_gt; +struct xe_hw_engine_class_intf; int xe_hw_engine_class_sysfs_init(struct xe_gt *gt); +/** + * struct kobj_eclass - A eclass's kobject struct that connects the kobject and the + * eclass. + * + * When dealing with multiple eclass, this struct helps to understand which eclass + * needs to be addressed on a given sysfs call. + */ +struct kobj_eclass { + /** @base: The actual kobject */ + struct kobject base; + /** @eclass: A pointer to the hw engine class interface */ + struct xe_hw_engine_class_intf *eclass; +}; + +static inline struct xe_hw_engine_class_intf *kobj_to_eclass(struct kobject *kobj) +{ + return container_of(kobj, struct kobj_eclass, base)->eclass; +} + #endif diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h index 803d557cf5aa..97d9ba31b5fc 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_types.h +++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h @@ -63,6 +63,41 @@ struct xe_bo; struct xe_execlist_port; struct xe_gt; +/** + * struct xe_hw_engine_class_intf - per hw engine class struct interface + * + * Contains all the hw engine properties per engine class. + * + * @sched_props: scheduling properties + * @defaults: default scheduling properties + */ +struct xe_hw_engine_class_intf { + /** + * @sched_props: scheduling properties + * @defaults: default scheduling properties + */ + struct { + /** @set_job_timeout: Set job timeout in ms for engine */ + u32 job_timeout_ms; + /** @job_timeout_min: Min job timeout in ms for engine */ + u32 job_timeout_min; + /** @job_timeout_max: Max job timeout in ms for engine */ + u32 job_timeout_max; + /** @timeslice_us: timeslice period in micro-seconds */ + u32 timeslice_us; + /** @timeslice_min: min timeslice period in micro-seconds */ + u32 timeslice_min; + /** @timeslice_max: max timeslice period in micro-seconds */ + u32 timeslice_max; + /** @preempt_timeout_us: preemption timeout in micro-seconds */ + u32 preempt_timeout_us; + /** @preempt_timeout_min: min preemption timeout in micro-seconds */ + u32 preempt_timeout_min; + /** @preempt_timeout_max: max preemption timeout in micro-seconds */ + u32 preempt_timeout_max; + } sched_props, defaults; +}; + /** * struct xe_hw_engine - Hardware engine * @@ -107,6 +142,8 @@ struct xe_hw_engine { void (*irq_handler)(struct xe_hw_engine *hwe, u16 intr_vec); /** @engine_id: id for this hw engine */ enum xe_hw_engine_id engine_id; + /** @eclass: pointer to per hw engine class interface */ + struct xe_hw_engine_class_intf *eclass; }; /** -- cgit v1.2.3 From e91a989ce151f022a7977c1ae4f21ac6d814d632 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Fri, 4 Aug 2023 18:08:25 +0530 Subject: drm/xe: Add job timeout engine property to sysfs The time after which a job is removed from the scheduler. Add sysfs entry to provide user defined job timeout to scheduler. The job timeout can be adjusted per-engine class using, /sys/class/drm/cardX/device/tileN/gtN/engines/ccs/job_timeout_ms V8: - Rebase V7: - Rebase to use s/xe_engine/xe_hw_engine/ - Matt V6: - Remove timeout validation, not relevant - Niranjana - Rebase to use common error path V5: - Rebase to use engine class interface instead of hw engine V4: - Rebase to per class engine props interface V3: - Rebase - Update commit message to reflect tile update V2: - Use sysfs_create_files as part of this patch Reviewed-by: Niranjana Vishwanathapura Reviewed-by: Matthew Brost Signed-off-by: Tejas Upadhyay Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c | 86 +++++++++++++++++++-------- 1 file changed, 62 insertions(+), 24 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c index 99a8197765bd..03e0c29445b7 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c @@ -24,38 +24,34 @@ static const struct kobj_type kobj_xe_hw_engine_type = { .sysfs_ops = &kobj_sysfs_ops }; -static void kobj_xe_hw_engine_class_fini(struct drm_device *drm, void *arg) +static ssize_t job_timeout_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) { - struct kobject *kobj = arg; + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); + u32 timeout; + int err; - kobject_put(kobj); -} + err = kstrtou32(buf, 0, &timeout); + if (err) + return err; - static struct kobj_eclass * -kobj_xe_hw_engine_class(struct xe_device *xe, struct kobject *parent, char *name) -{ - struct kobj_eclass *keclass; - int err = 0; + WRITE_ONCE(eclass->sched_props.job_timeout_ms, timeout); - keclass = kzalloc(sizeof(*keclass), GFP_KERNEL); - if (!keclass) - return NULL; + return count; +} - kobject_init(&keclass->base, &kobj_xe_hw_engine_type); - if (kobject_add(&keclass->base, parent, "%s", name)) { - kobject_put(&keclass->base); - return NULL; - } +static ssize_t job_timeout_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); - err = drmm_add_action_or_reset(&xe->drm, kobj_xe_hw_engine_class_fini, - &keclass->base); - if (err) - drm_warn(&xe->drm, - "%s: drmm_add_action_or_reset failed, err: %d\n", - __func__, err); - return keclass; + return sprintf(buf, "%u\n", eclass->sched_props.job_timeout_ms); } +static struct kobj_attribute job_timeout_attr = +__ATTR(job_timeout_ms, 0644, job_timeout_show, job_timeout_store); + static ssize_t job_timeout_default(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { @@ -97,6 +93,44 @@ static const struct attribute *defaults[] = { NULL }; +static const struct attribute *files[] = { + &job_timeout_attr.attr, + NULL +}; + +static void kobj_xe_hw_engine_class_fini(struct drm_device *drm, void *arg) +{ + struct kobject *kobj = arg; + + sysfs_remove_files(kobj, files); + kobject_put(kobj); +} + + static struct kobj_eclass * +kobj_xe_hw_engine_class(struct xe_device *xe, struct kobject *parent, char *name) +{ + struct kobj_eclass *keclass; + int err = 0; + + keclass = kzalloc(sizeof(*keclass), GFP_KERNEL); + if (!keclass) + return NULL; + + kobject_init(&keclass->base, &kobj_xe_hw_engine_type); + if (kobject_add(&keclass->base, parent, "%s", name)) { + kobject_put(&keclass->base); + return NULL; + } + + err = drmm_add_action_or_reset(&xe->drm, kobj_xe_hw_engine_class_fini, + &keclass->base); + if (err) + drm_warn(&xe->drm, + "%s: drmm_add_action_or_reset failed, err: %d\n", + __func__, err); + return keclass; +} + static void hw_engine_class_defaults_fini(struct drm_device *drm, void *arg) { struct kobject *kobj = arg; @@ -229,6 +263,10 @@ int xe_hw_engine_class_sysfs_init(struct xe_gt *gt) err); goto err_object; } + + err = sysfs_create_files(&keclass->base, files); + if (err) + goto err_object; } err = drmm_add_action_or_reset(&xe->drm, hw_engine_class_sysfs_fini, -- cgit v1.2.3 From bc3a06ead1cd49d3a5e0f707cbd6c8e173307388 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Thu, 3 Aug 2023 18:14:04 +0530 Subject: drm/xe: Add timeslice duration engine property to sysfs Timeslices between multiple context is supported via guc scheduling. Add sysfs entry to provide user defined timeslice duration to guc scheduling. The timeslice duration can be adjusted per-engine class using, /sys/class/drm/cardX/device/tileN/gtN/engines/ccs/timeslice_duration_us V8: - Rebase V7: - Rebase to use s/xe_engine/xe_hw_engine/ - Matt V6: - Remove duration validation, not relevant - Niranjana V5: - Rebase to replace hw engine with eclass interface V4: - Rebase to per class engine props interface V3: - Rebase - Update commit messge to add tile V2: - Rebase Reviewed-by: Niranjana Vishwanathapura Reviewed-by: Matthew Brost Signed-off-by: Tejas Upadhyay Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c | 30 +++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c index 03e0c29445b7..01d5282d2b24 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c @@ -63,6 +63,35 @@ static ssize_t job_timeout_default(struct kobject *kobj, static struct kobj_attribute job_timeout_def = __ATTR(job_timeout_ms, 0444, job_timeout_default, NULL); +static ssize_t timeslice_duration_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); + u32 duration; + int err; + + err = kstrtou32(buf, 0, &duration); + if (err) + return err; + + WRITE_ONCE(eclass->sched_props.timeslice_us, duration); + + return count; +} + +static ssize_t timeslice_duration_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); + + return sprintf(buf, "%u\n", eclass->sched_props.timeslice_us); +} + +static struct kobj_attribute timeslice_duration_attr = + __ATTR(timeslice_duration_us, 0644, timeslice_duration_show, + timeslice_duration_store); + static ssize_t timeslice_default(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { @@ -95,6 +124,7 @@ static const struct attribute *defaults[] = { static const struct attribute *files[] = { &job_timeout_attr.attr, + ×lice_duration_attr.attr, NULL }; -- cgit v1.2.3 From 69838d6330a7cc11de4f06f55122bfdb60693e70 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Thu, 3 Aug 2023 18:18:03 +0530 Subject: drm/xe: Add sysfs for preempt reset timeout The preemption request and timeout is used for higher priority context or kill hung context and reset hardware engine. The preempt timeout can be adjusted per-engine class using, /sys/class/drm/cardX/device/tileN/gtN/engines/ccs/preempt_timeout_us and can be disabled by setting it to 0. V7: - Rebase V6: - Rebase to use s/xe_engine/xe_hw_engine/ - Matt V5: - Remove timeout validation, not relevant - Niranjana V4: - Rebase to replace hw engine with eclass interface V3: - Rebase to per class engine props interface V2: - Rebase - Update commit message to add tile Reviewed-by: Niranjana Vishwanathapura Reviewed-by: Matthew Brost Signed-off-by: Tejas Upadhyay Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c | 29 +++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c index 01d5282d2b24..adbaaee1deca 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c @@ -103,6 +103,34 @@ static ssize_t timeslice_default(struct kobject *kobj, static struct kobj_attribute timeslice_duration_def = __ATTR(timeslice_duration_us, 0444, timeslice_default, NULL); +static ssize_t preempt_timeout_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); + u32 timeout; + int err; + + err = kstrtou32(buf, 0, &timeout); + if (err) + return err; + + WRITE_ONCE(eclass->sched_props.preempt_timeout_us, timeout); + + return count; +} + +static ssize_t preempt_timeout_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); + + return sprintf(buf, "%u\n", eclass->sched_props.preempt_timeout_us); +} + +static struct kobj_attribute preempt_timeout_attr = +__ATTR(preempt_timeout_us, 0644, preempt_timeout_show, preempt_timeout_store); + static ssize_t preempt_timeout_default(struct kobject *kobj, struct kobj_attribute *attr, char *buf) @@ -125,6 +153,7 @@ static const struct attribute *defaults[] = { static const struct attribute *files[] = { &job_timeout_attr.attr, ×lice_duration_attr.attr, + &preempt_timeout_attr.attr, NULL }; -- cgit v1.2.3 From d2776564729739f459e108b5ac83bcea57c44bca Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Fri, 4 Aug 2023 11:54:23 +0530 Subject: drm/xe: Add min/max cap for engine scheduler properties Add sysfs entries for the min, max, and defaults for each of engine scheduler controls for every hardware engine class. Non-elevated user IOCTLs to set these controls must be within the min-max ranges of the sysfs entries, elevated user can set these controls to any value. However, introduced compile time CONFIG min-max values which restricts elevated user to be in compile time min-max range if at all sysfs min/max are violated. Sysfs entries examples are, DUT# cat /sys/class/drm/cardX/device/tileN/gtN/engines/ccs/.defaults/ job_timeout_max job_timeout_ms preempt_timeout_min timeslice_duration_max timeslice_duration_us job_timeout_min preempt_timeout_max preempt_timeout_us timeslice_duration_min DUT# cat /sys/class/drm/card1/device/tileN/gtN/engines/ccs/ .defaults/ job_timeout_min preempt_timeout_max preempt_timeout_us timeslice_duration_min job_timeout_max job_timeout_ms preempt_timeout_min timeslice_duration_max timeslice_duration_us V12: - Rebase V11: - Make engine_get_prop_minmax and enforce_sched_limit static - Matt - use enum in place of string in engine_get_prop_minmax - Matt - no need to use enforce_sched_limit or no need to filter min/max per user type in sysfs - Matt V10: - Add kernel doc for non-static func - Make helper to get min/max for range validation - Matt - Filter min/max per user type V9 : - Rebase to use s/xe_engine/xe_hw_engine/ - Matt V8 : - fix enforce_sched_limit and avoid code duplication - Niranjana - Make sure min < max - Niranjana V7 : - Rebase to replace hw engine with eclass interface - return EINVAL in place of EPERM - Use some APIs to avoid code duplication V6 : - Rebase changes to reflect per engine class props interface - MattB - Use #if ENABLED - MattB - Remove MAX_SCHED_TIMEOUT check as range validation is enough V5 : - Rebase to resolve conflicts - CI V4 : - Rebase - Update commit to reflect tile addition - Use XE_HW macro directly as they are already filtered for CONFIG checks - Niranjana - Add CONFIG for enable/disable min/max limitation on elevated user. Default is enable - Matt/Joonas V3 : - Resolve CI hooks warning for kernel-doc V2 : - Restric min/max setting to #define default min/max for elevated user - Himal - Remove unrelated changes from patch - Niranjana Reviewed-by: Niranjana Vishwanathapura Reviewed-by: Matthew Brost Signed-off-by: Tejas Upadhyay Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Kconfig | 6 + drivers/gpu/drm/xe/Kconfig.profile | 46 ++++ drivers/gpu/drm/xe/xe_exec_queue.c | 90 ++++++- drivers/gpu/drm/xe/xe_hw_engine.c | 8 + drivers/gpu/drm/xe/xe_hw_engine.h | 31 +++ drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c | 333 ++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h | 1 + 7 files changed, 509 insertions(+), 6 deletions(-) create mode 100644 drivers/gpu/drm/xe/Kconfig.profile (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index 2a595bc92ca4..6742ed4feecd 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -63,3 +63,9 @@ depends on DRM_XE depends on EXPERT source "drivers/gpu/drm/xe/Kconfig.debug" endmenu + +menu "drm/xe Profile Guided Optimisation" + visible if EXPERT + depends on DRM_XE + source "drivers/gpu/drm/xe/Kconfig.profile" +endmenu diff --git a/drivers/gpu/drm/xe/Kconfig.profile b/drivers/gpu/drm/xe/Kconfig.profile new file mode 100644 index 000000000000..51951c8149a1 --- /dev/null +++ b/drivers/gpu/drm/xe/Kconfig.profile @@ -0,0 +1,46 @@ +config DRM_XE_JOB_TIMEOUT_MAX + int "Default max job timeout (ms)" + default 10000 # milliseconds + help + Configures the default max job timeout after which job will + be forcefully taken away from scheduler. +config DRM_XE_JOB_TIMEOUT_MIN + int "Default min job timeout (ms)" + default 1 # milliseconds + help + Configures the default min job timeout after which job will + be forcefully taken away from scheduler. +config DRM_XE_TIMESLICE_MAX + int "Default max timeslice duration (us)" + default 10000000 # microseconds + help + Configures the default max timeslice duration between multiple + contexts by guc scheduling. +config DRM_XE_TIMESLICE_MIN + int "Default min timeslice duration (us)" + default 1 # microseconds + help + Configures the default min timeslice duration between multiple + contexts by guc scheduling. +config DRM_XE_PREEMPT_TIMEOUT_MAX + int "Default max preempt timeout (us)" + default 10000000 # microseconds + help + Configures the default max preempt timeout after which context + will be forcefully taken away and higher priority context will + run. +config DRM_XE_PREEMPT_TIMEOUT_MIN + int "Default min preempt timeout (us)" + default 1 # microseconds + help + Configures the default min preempt timeout after which context + will be forcefully taken away and higher priority context will + run. +config DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT + bool "Default configuration of limitation on scheduler timeout" + default y + help + Configures the enablement of limitation on scheduler timeout + to apply to applicable user. For elevated user, all above MIN + and MAX values will apply when this configuration is enable to + apply limitation. By default limitation is applied. diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 41a7ae1d1a53..901c609a657e 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -13,6 +13,7 @@ #include "xe_device.h" #include "xe_gt.h" +#include "xe_hw_engine_class_sysfs.h" #include "xe_hw_fence.h" #include "xe_lrc.h" #include "xe_macros.h" @@ -22,6 +23,13 @@ #include "xe_trace.h" #include "xe_vm.h" +enum xe_exec_queue_sched_prop { + XE_EXEC_QUEUE_JOB_TIMEOUT = 0, + XE_EXEC_QUEUE_TIMESLICE = 1, + XE_EXEC_QUEUE_PREEMPT_TIMEOUT = 2, + XE_EXEC_QUEUE_SCHED_PROP_MAX = 3, +}; + static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm, u32 logical_mask, @@ -201,11 +209,69 @@ static int exec_queue_set_priority(struct xe_device *xe, struct xe_exec_queue *q return q->ops->set_priority(q, value); } +static bool xe_exec_queue_enforce_schedule_limit(void) +{ +#if IS_ENABLED(CONFIG_DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT) + return true; +#else + return !capable(CAP_SYS_NICE); +#endif +} + +static void +xe_exec_queue_get_prop_minmax(struct xe_hw_engine_class_intf *eclass, + enum xe_exec_queue_sched_prop prop, + u32 *min, u32 *max) +{ + switch (prop) { + case XE_EXEC_QUEUE_JOB_TIMEOUT: + *min = eclass->sched_props.job_timeout_min; + *max = eclass->sched_props.job_timeout_max; + break; + case XE_EXEC_QUEUE_TIMESLICE: + *min = eclass->sched_props.timeslice_min; + *max = eclass->sched_props.timeslice_max; + break; + case XE_EXEC_QUEUE_PREEMPT_TIMEOUT: + *min = eclass->sched_props.preempt_timeout_min; + *max = eclass->sched_props.preempt_timeout_max; + break; + default: + break; + } +#if IS_ENABLED(CONFIG_DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT) + if (capable(CAP_SYS_NICE)) { + switch (prop) { + case XE_EXEC_QUEUE_JOB_TIMEOUT: + *min = XE_HW_ENGINE_JOB_TIMEOUT_MIN; + *max = XE_HW_ENGINE_JOB_TIMEOUT_MAX; + break; + case XE_EXEC_QUEUE_TIMESLICE: + *min = XE_HW_ENGINE_TIMESLICE_MIN; + *max = XE_HW_ENGINE_TIMESLICE_MAX; + break; + case XE_EXEC_QUEUE_PREEMPT_TIMEOUT: + *min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN; + *max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX; + break; + default: + break; + } + } +#endif +} + static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue *q, u64 value, bool create) { - if (!capable(CAP_SYS_NICE)) - return -EPERM; + u32 min = 0, max = 0; + + xe_exec_queue_get_prop_minmax(q->hwe->eclass, + XE_EXEC_QUEUE_TIMESLICE, &min, &max); + + if (xe_exec_queue_enforce_schedule_limit() && + !xe_hw_engine_timeout_in_range(value, min, max)) + return -EINVAL; return q->ops->set_timeslice(q, value); } @@ -214,8 +280,14 @@ static int exec_queue_set_preemption_timeout(struct xe_device *xe, struct xe_exec_queue *q, u64 value, bool create) { - if (!capable(CAP_SYS_NICE)) - return -EPERM; + u32 min = 0, max = 0; + + xe_exec_queue_get_prop_minmax(q->hwe->eclass, + XE_EXEC_QUEUE_PREEMPT_TIMEOUT, &min, &max); + + if (xe_exec_queue_enforce_schedule_limit() && + !xe_hw_engine_timeout_in_range(value, min, max)) + return -EINVAL; return q->ops->set_preempt_timeout(q, value); } @@ -279,11 +351,17 @@ static int exec_queue_set_persistence(struct xe_device *xe, struct xe_exec_queue static int exec_queue_set_job_timeout(struct xe_device *xe, struct xe_exec_queue *q, u64 value, bool create) { + u32 min = 0, max = 0; + if (XE_IOCTL_DBG(xe, !create)) return -EINVAL; - if (!capable(CAP_SYS_NICE)) - return -EPERM; + xe_exec_queue_get_prop_minmax(q->hwe->eclass, + XE_EXEC_QUEUE_JOB_TIMEOUT, &min, &max); + + if (xe_exec_queue_enforce_schedule_limit() && + !xe_hw_engine_timeout_in_range(value, min, max)) + return -EINVAL; return q->ops->set_job_timeout(q, value); } diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index b7be7b0acb35..b8fcc6e985cf 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -364,8 +364,16 @@ static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe, if (!gt->eclass[hwe->class].sched_props.job_timeout_ms) { gt->eclass[hwe->class].sched_props.job_timeout_ms = 5 * 1000; + gt->eclass[hwe->class].sched_props.job_timeout_min = XE_HW_ENGINE_JOB_TIMEOUT_MIN; + gt->eclass[hwe->class].sched_props.job_timeout_max = XE_HW_ENGINE_JOB_TIMEOUT_MAX; gt->eclass[hwe->class].sched_props.timeslice_us = 1 * 1000; + gt->eclass[hwe->class].sched_props.timeslice_min = XE_HW_ENGINE_TIMESLICE_MIN; + gt->eclass[hwe->class].sched_props.timeslice_max = XE_HW_ENGINE_TIMESLICE_MAX; gt->eclass[hwe->class].sched_props.preempt_timeout_us = 640 * 1000; + gt->eclass[hwe->class].sched_props.preempt_timeout_min = + XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN; + gt->eclass[hwe->class].sched_props.preempt_timeout_max = + XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX; /* Record default props */ gt->eclass[hwe->class].defaults = gt->eclass[hwe->class].sched_props; } diff --git a/drivers/gpu/drm/xe/xe_hw_engine.h b/drivers/gpu/drm/xe/xe_hw_engine.h index 7eca9d53c7b1..3d37d6d44261 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.h +++ b/drivers/gpu/drm/xe/xe_hw_engine.h @@ -10,6 +10,37 @@ struct drm_printer; +#ifdef CONFIG_DRM_XE_JOB_TIMEOUT_MIN +#define XE_HW_ENGINE_JOB_TIMEOUT_MIN CONFIG_DRM_XE_JOB_TIMEOUT_MIN +#else +#define XE_HW_ENGINE_JOB_TIMEOUT_MIN 1 +#endif +#ifdef CONFIG_DRM_XE_JOB_TIMEOUT_MAX +#define XE_HW_ENGINE_JOB_TIMEOUT_MAX CONFIG_DRM_XE_JOB_TIMEOUT_MAX +#else +#define XE_HW_ENGINE_JOB_TIMEOUT_MAX (10 * 1000) +#endif +#ifdef CONFIG_DRM_XE_TIMESLICE_MIN +#define XE_HW_ENGINE_TIMESLICE_MIN CONFIG_DRM_XE_TIMESLICE_MIN +#else +#define XE_HW_ENGINE_TIMESLICE_MIN 1 +#endif +#ifdef CONFIG_DRM_XE_TIMESLICE_MAX +#define XE_HW_ENGINE_TIMESLICE_MAX CONFIG_DRM_XE_TIMESLICE_MAX +#else +#define XE_HW_ENGINE_TIMESLICE_MAX (10 * 1000 * 1000) +#endif +#ifdef CONFIG_DRM_XE_PREEMPT_TIMEOUT_MIN +#define XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN CONFIG_DRM_XE_PREEMPT_TIMEOUT_MIN +#else +#define XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN 1 +#endif +#ifdef CONFIG_DRM_XE_PREEMPT_TIMEOUT_MAX +#define XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX CONFIG_DRM_XE_PREEMPT_TIMEOUT_MAX +#else +#define XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX (10 * 1000 * 1000) +#endif + int xe_hw_engines_init_early(struct xe_gt *gt); int xe_hw_engines_init(struct xe_gt *gt); void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec); diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c index adbaaee1deca..e49bc14f0ecf 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c @@ -14,6 +14,22 @@ static int xe_add_hw_engine_class_defaults(struct xe_device *xe, struct kobject *parent); +/** + * xe_hw_engine_timeout_in_range - Helper to check if timeout is in range + * @timeout: timeout to validate + * @min: min value of valid range + * @max: max value of valid range + * + * This helper helps to validate if timeout is in min-max range of HW engine + * scheduler. + * + * Returns: Returns false value for failure and true for success. + */ +bool xe_hw_engine_timeout_in_range(u64 timeout, u64 min, u64 max) +{ + return timeout >= min && timeout <= max; +} + static void kobj_xe_hw_engine_release(struct kobject *kobj) { kfree(kobj); @@ -24,11 +40,85 @@ static const struct kobj_type kobj_xe_hw_engine_type = { .sysfs_ops = &kobj_sysfs_ops }; +static ssize_t job_timeout_max_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); + u32 timeout; + int err; + + err = kstrtou32(buf, 0, &timeout); + if (err) + return err; + + if (timeout < eclass->sched_props.job_timeout_min) + return -EINVAL; + + if (!xe_hw_engine_timeout_in_range(timeout, + XE_HW_ENGINE_JOB_TIMEOUT_MIN, + XE_HW_ENGINE_JOB_TIMEOUT_MAX)) + return -EINVAL; + + WRITE_ONCE(eclass->sched_props.job_timeout_max, timeout); + + return count; +} + +static ssize_t job_timeout_max_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); + + return sprintf(buf, "%u\n", eclass->sched_props.job_timeout_max); +} + +static struct kobj_attribute job_timeout_max_attr = +__ATTR(job_timeout_max, 0644, job_timeout_max_show, job_timeout_max_store); + +static ssize_t job_timeout_min_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); + u32 timeout; + int err; + + err = kstrtou32(buf, 0, &timeout); + if (err) + return err; + + if (timeout > eclass->sched_props.job_timeout_max) + return -EINVAL; + + if (!xe_hw_engine_timeout_in_range(timeout, + XE_HW_ENGINE_JOB_TIMEOUT_MIN, + XE_HW_ENGINE_JOB_TIMEOUT_MAX)) + return -EINVAL; + + WRITE_ONCE(eclass->sched_props.job_timeout_min, timeout); + + return count; +} + +static ssize_t job_timeout_min_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); + + return sprintf(buf, "%u\n", eclass->sched_props.job_timeout_min); +} + +static struct kobj_attribute job_timeout_min_attr = +__ATTR(job_timeout_min, 0644, job_timeout_min_show, job_timeout_min_store); + static ssize_t job_timeout_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); + u32 min = eclass->sched_props.job_timeout_min; + u32 max = eclass->sched_props.job_timeout_max; u32 timeout; int err; @@ -36,6 +126,9 @@ static ssize_t job_timeout_store(struct kobject *kobj, if (err) return err; + if (!xe_hw_engine_timeout_in_range(timeout, min, max)) + return -EINVAL; + WRITE_ONCE(eclass->sched_props.job_timeout_ms, timeout); return count; @@ -63,11 +156,35 @@ static ssize_t job_timeout_default(struct kobject *kobj, static struct kobj_attribute job_timeout_def = __ATTR(job_timeout_ms, 0444, job_timeout_default, NULL); +static ssize_t job_timeout_min_default(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent); + + return sprintf(buf, "%u\n", eclass->defaults.job_timeout_min); +} + +static struct kobj_attribute job_timeout_min_def = +__ATTR(job_timeout_min, 0444, job_timeout_min_default, NULL); + +static ssize_t job_timeout_max_default(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent); + + return sprintf(buf, "%u\n", eclass->defaults.job_timeout_max); +} + +static struct kobj_attribute job_timeout_max_def = +__ATTR(job_timeout_max, 0444, job_timeout_max_default, NULL); + static ssize_t timeslice_duration_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); + u32 min = eclass->sched_props.timeslice_min; + u32 max = eclass->sched_props.timeslice_max; u32 duration; int err; @@ -75,11 +192,90 @@ static ssize_t timeslice_duration_store(struct kobject *kobj, if (err) return err; + if (!xe_hw_engine_timeout_in_range(duration, min, max)) + return -EINVAL; + WRITE_ONCE(eclass->sched_props.timeslice_us, duration); return count; } +static ssize_t timeslice_duration_max_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); + u32 duration; + int err; + + err = kstrtou32(buf, 0, &duration); + if (err) + return err; + + if (duration < eclass->sched_props.timeslice_min) + return -EINVAL; + + if (!xe_hw_engine_timeout_in_range(duration, + XE_HW_ENGINE_TIMESLICE_MIN, + XE_HW_ENGINE_TIMESLICE_MAX)) + return -EINVAL; + + WRITE_ONCE(eclass->sched_props.timeslice_max, duration); + + return count; +} + +static ssize_t timeslice_duration_max_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); + + return sprintf(buf, "%u\n", eclass->sched_props.timeslice_max); +} + +static struct kobj_attribute timeslice_duration_max_attr = + __ATTR(timeslice_duration_max, 0644, timeslice_duration_max_show, + timeslice_duration_max_store); + +static ssize_t timeslice_duration_min_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); + u32 duration; + int err; + + err = kstrtou32(buf, 0, &duration); + if (err) + return err; + + if (duration > eclass->sched_props.timeslice_max) + return -EINVAL; + + if (!xe_hw_engine_timeout_in_range(duration, + XE_HW_ENGINE_TIMESLICE_MIN, + XE_HW_ENGINE_TIMESLICE_MAX)) + return -EINVAL; + + WRITE_ONCE(eclass->sched_props.timeslice_min, duration); + + return count; +} + +static ssize_t timeslice_duration_min_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); + + return sprintf(buf, "%u\n", eclass->sched_props.timeslice_min); +} + +static struct kobj_attribute timeslice_duration_min_attr = + __ATTR(timeslice_duration_min, 0644, timeslice_duration_min_show, + timeslice_duration_min_store); + static ssize_t timeslice_duration_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { @@ -103,11 +299,35 @@ static ssize_t timeslice_default(struct kobject *kobj, static struct kobj_attribute timeslice_duration_def = __ATTR(timeslice_duration_us, 0444, timeslice_default, NULL); +static ssize_t timeslice_min_default(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent); + + return sprintf(buf, "%u\n", eclass->defaults.timeslice_min); +} + +static struct kobj_attribute timeslice_duration_min_def = +__ATTR(timeslice_duration_min, 0444, timeslice_min_default, NULL); + +static ssize_t timeslice_max_default(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent); + + return sprintf(buf, "%u\n", eclass->defaults.timeslice_max); +} + +static struct kobj_attribute timeslice_duration_max_def = +__ATTR(timeslice_duration_max, 0444, timeslice_max_default, NULL); + static ssize_t preempt_timeout_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); + u32 min = eclass->sched_props.preempt_timeout_min; + u32 max = eclass->sched_props.preempt_timeout_max; u32 timeout; int err; @@ -115,6 +335,9 @@ static ssize_t preempt_timeout_store(struct kobject *kobj, if (err) return err; + if (!xe_hw_engine_timeout_in_range(timeout, min, max)) + return -EINVAL; + WRITE_ONCE(eclass->sched_props.preempt_timeout_us, timeout); return count; @@ -143,17 +366,127 @@ static ssize_t preempt_timeout_default(struct kobject *kobj, static struct kobj_attribute preempt_timeout_def = __ATTR(preempt_timeout_us, 0444, preempt_timeout_default, NULL); +static ssize_t preempt_timeout_min_default(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent); + + return sprintf(buf, "%u\n", eclass->defaults.preempt_timeout_min); +} + +static struct kobj_attribute preempt_timeout_min_def = +__ATTR(preempt_timeout_min, 0444, preempt_timeout_min_default, NULL); + +static ssize_t preempt_timeout_max_default(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent); + + return sprintf(buf, "%u\n", eclass->defaults.preempt_timeout_max); +} + +static struct kobj_attribute preempt_timeout_max_def = +__ATTR(preempt_timeout_max, 0444, preempt_timeout_max_default, NULL); + +static ssize_t preempt_timeout_max_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); + u32 timeout; + int err; + + err = kstrtou32(buf, 0, &timeout); + if (err) + return err; + + if (timeout < eclass->sched_props.preempt_timeout_min) + return -EINVAL; + + if (!xe_hw_engine_timeout_in_range(timeout, + XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN, + XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX)) + return -EINVAL; + + WRITE_ONCE(eclass->sched_props.preempt_timeout_max, timeout); + + return count; +} + +static ssize_t preempt_timeout_max_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); + + return sprintf(buf, "%u\n", eclass->sched_props.preempt_timeout_max); +} + +static struct kobj_attribute preempt_timeout_max_attr = + __ATTR(preempt_timeout_max, 0644, preempt_timeout_max_show, + preempt_timeout_max_store); + +static ssize_t preempt_timeout_min_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); + u32 timeout; + int err; + + err = kstrtou32(buf, 0, &timeout); + if (err) + return err; + + if (timeout > eclass->sched_props.preempt_timeout_max) + return -EINVAL; + + if (!xe_hw_engine_timeout_in_range(timeout, + XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN, + XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX)) + return -EINVAL; + + WRITE_ONCE(eclass->sched_props.preempt_timeout_min, timeout); + + return count; +} + +static ssize_t preempt_timeout_min_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); + + return sprintf(buf, "%u\n", eclass->sched_props.preempt_timeout_min); +} + +static struct kobj_attribute preempt_timeout_min_attr = + __ATTR(preempt_timeout_min, 0644, preempt_timeout_min_show, + preempt_timeout_min_store); + static const struct attribute *defaults[] = { &job_timeout_def.attr, + &job_timeout_min_def.attr, + &job_timeout_max_def.attr, ×lice_duration_def.attr, + ×lice_duration_min_def.attr, + ×lice_duration_max_def.attr, &preempt_timeout_def.attr, + &preempt_timeout_min_def.attr, + &preempt_timeout_max_def.attr, NULL }; static const struct attribute *files[] = { &job_timeout_attr.attr, + &job_timeout_min_attr.attr, + &job_timeout_max_attr.attr, ×lice_duration_attr.attr, + ×lice_duration_min_attr.attr, + ×lice_duration_max_attr.attr, &preempt_timeout_attr.attr, + &preempt_timeout_min_attr.attr, + &preempt_timeout_max_attr.attr, NULL }; diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h index c093f381abbe..60469fde4147 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h +++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h @@ -12,6 +12,7 @@ struct xe_gt; struct xe_hw_engine_class_intf; int xe_hw_engine_class_sysfs_init(struct xe_gt *gt); +bool xe_hw_engine_timeout_in_range(u64 timeout, u64 min, u64 max); /** * struct kobj_eclass - A eclass's kobject struct that connects the kobject and the -- cgit v1.2.3 From 0c005429005228d7a82e4e8d5d8f24b6192e7aa6 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 3 Aug 2023 16:42:08 -0700 Subject: drm/xe: Fix error path in xe_guc_pc_gucrc_disable() Make sure to always call xe_device_mem_access_put(), even on error. Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20230803234209.881924-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_pc.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 91a3967fd799..19d743f92f43 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -745,25 +745,27 @@ static int pc_adjust_requested_freq(struct xe_guc_pc *pc) int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc) { struct xe_gt *gt = pc_to_gt(pc); - int ret; + int ret = 0; xe_device_mem_access_get(pc_to_xe(pc)); ret = pc_action_setup_gucrc(pc, XE_GUCRC_HOST_CONTROL); if (ret) - return ret; + goto out; ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); if (ret) - return ret; + goto out; xe_mmio_write32(gt, PG_ENABLE, 0); xe_mmio_write32(gt, RC_CONTROL, 0); xe_mmio_write32(gt, RC_STATE, 0); XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + +out: xe_device_mem_access_put(pc_to_xe(pc)); - return 0; + return ret; } static void pc_init_pcode_freq(struct xe_guc_pc *pc) -- cgit v1.2.3 From d8b4494bf184d43295b89156d7656d69f931e418 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 3 Aug 2023 16:42:09 -0700 Subject: drm/xe: Fix error path in xe_guc_pc_start() If the forcewake failed, put xe_device_mem_access. Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20230803234209.881924-2-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_pc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 19d743f92f43..c03bb58e7049 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -825,7 +825,7 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); if (ret) - return ret; + goto out_fail_force_wake; ret = pc_action_reset(pc); if (ret) @@ -851,6 +851,7 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) out: XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); +out_fail_force_wake: xe_device_mem_access_put(pc_to_xe(pc)); return ret; } -- cgit v1.2.3 From 31b57683de2c98ac6a3de7223ef0afd47731265c Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 3 Aug 2023 18:38:50 +0100 Subject: drm/xe/guc_submit: prevent repeated unregister It seems that various things can trigger the lr cleanup worker, including CAT error, engine reset and destroying the actual engine, so seems plausible to end up triggering the worker more than once in some cases. If that does happen we can race with an ongoing engine deregister before it has completed, thus triggering it again and also changing the state back into pending_disable. Checking if the engine has been marked as destroyed looks like it should prevent this. Signed-off-by: Matthew Auld Cc: Matthew Brost Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_submit.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index e12cd4285e5d..19df4b67bfbb 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -802,8 +802,18 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) /* Kill the run_job / process_msg entry points */ xe_sched_submission_stop(sched); - /* Engine state now stable, disable scheduling / deregister if needed */ - if (exec_queue_registered(q)) { + /* + * Engine state now mostly stable, disable scheduling / deregister if + * needed. This cleanup routine might be called multiple times, where + * the actual async engine deregister drops the final engine ref. + * Calling disable_scheduling_deregister will mark the engine as + * destroyed and fire off the CT requests to disable scheduling / + * deregister, which we only want to do once. We also don't want to mark + * the engine as pending_disable again as this may race with the + * xe_guc_deregister_done_handler() which treats it as an unexpected + * state. + */ + if (exec_queue_registered(q) && !exec_queue_destroyed(q)) { struct xe_guc *guc = exec_queue_to_guc(q); int ret; -- cgit v1.2.3 From c47794bdd63d8304fa5d410039e81c6387388340 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 2 Aug 2023 20:15:38 -0700 Subject: drm/xe: Set max pte size when skipping rebinds When a rebind is skipped, we must set the max pte size of the newly created vma to value of the old vma as we do not pte walk for the new vma. Without this future rebinds may be incorrectly skipped due to the wrong max pte size. Null binds are more likely to expose this bug as larger ptes are more frequently used compared to normal bindings. Reviewed-by: Rodrigo Vivi Signed-off-by: Matthew Brost Testcase: dEQP-VK.sparse_resources.buffer.ssbo.sparse_residency.buffer_size_2_24 Reported-by: Paulo Zanoni Fixes: 8f33b4f054fc ("drm/xe: Avoid doing rebinds") Reference: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23045 Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 374f111eea9c..ff7fafe1315b 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2410,6 +2410,20 @@ static u64 xe_vma_max_pte_size(struct xe_vma *vma) return SZ_4K; } +static u64 xe_vma_set_pte_size(struct xe_vma *vma, u64 size) +{ + switch (size) { + case SZ_1G: + vma->gpuva.flags |= XE_VMA_PTE_1G; + break; + case SZ_2M: + vma->gpuva.flags |= XE_VMA_PTE_2M; + break; + } + + return SZ_4K; +} + /* * Parse operations list and create any resources needed for the operations * prior to fully committing to the operations. This setup can fail. @@ -2520,6 +2534,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, IS_ALIGNED(xe_vma_end(vma), xe_vma_max_pte_size(old)); if (op->remap.skip_prev) { + xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); op->remap.range -= xe_vma_end(vma) - xe_vma_start(old); @@ -2554,10 +2569,12 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, op->remap.skip_next = !xe_vma_is_userptr(old) && IS_ALIGNED(xe_vma_start(vma), xe_vma_max_pte_size(old)); - if (op->remap.skip_next) + if (op->remap.skip_next) { + xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); op->remap.range -= xe_vma_end(old) - xe_vma_start(vma); + } } break; } -- cgit v1.2.3 From 7f6c6e5085bd4e02f0fd555be76cf7f105c201e7 Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Thu, 3 Aug 2023 14:44:04 -0400 Subject: drm/xe: Implement HW workaround 14016763929 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To workaround a HW bug on DG2, driver is required to map the whole ppgtt virtual address space before GPU workload submission. Thus set the XE_VM_FLAG_SCRATCH_PAGE flag during vm create so the whole address space is mapped to point to scratch page. v1: - Move the workaround implementation from xe_vm_create to xe_vm_create_ioctl - Brian - Reorder error checking in xe_vm_create_ioctl - Jose - Implement WA only for DG2-G10 and DG2-G12 Signed-off-by: Oak Zeng Reviewed-by: Brian Welty Reviewed-by: José Roberto de Souza Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Makefile | 2 +- drivers/gpu/drm/xe/xe_vm.c | 13 +++++++++---- drivers/gpu/drm/xe/xe_wa_oob.rules | 2 ++ 3 files changed, 12 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 2373832f932e..b470c2394476 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -40,7 +40,7 @@ quiet_cmd_wa_oob = GEN $(notdir $(generated_oob)) $(generated_oob) &: $(obj)/xe_gen_wa_oob $(srctree)/$(src)/xe_wa_oob.rules $(call cmd,wa_oob) -$(obj)/xe_guc.o $(obj)/xe_wa.o $(obj)/xe_ring_ops.o: $(generated_oob) +$(obj)/xe_guc.o $(obj)/xe_wa.o $(obj)/xe_ring_ops.o $(obj)/xe_vm.o: $(generated_oob) # Please keep these build lists sorted! diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index ff7fafe1315b..8b75595b39a2 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -29,6 +29,8 @@ #include "xe_res_cursor.h" #include "xe_sync.h" #include "xe_trace.h" +#include "generated/xe_wa_oob.h" +#include "xe_wa.h" #define TEST_VM_ASYNC_OPS_ERROR @@ -1979,6 +1981,13 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, int err; u32 flags = 0; + if (XE_WA(xe_root_mmio_gt(xe), 14016763929)) + args->flags |= DRM_XE_VM_CREATE_SCRATCH_PAGE; + + if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FAULT_MODE && + !xe->info.supports_usm)) + return -EINVAL; + if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) return -EINVAL; @@ -2001,10 +2010,6 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, xe_device_in_fault_mode(xe))) return -EINVAL; - if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FAULT_MODE && - !xe->info.supports_usm)) - return -EINVAL; - if (args->flags & DRM_XE_VM_CREATE_SCRATCH_PAGE) flags |= XE_VM_FLAG_SCRATCH_PAGE; if (args->flags & DRM_XE_VM_CREATE_COMPUTE_MODE) diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 15c23813398a..ea90dcc933b5 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -15,3 +15,5 @@ 18020744125 PLATFORM(PVC) 1509372804 PLATFORM(PVC), GRAPHICS_STEP(A0, C0) 1409600907 GRAPHICS_VERSION_RANGE(1200, 1250) +14016763929 SUBPLATFORM(DG2, G10) + SUBPLATFORM(DG2, G12) -- cgit v1.2.3 From 17d28aa8bdb11ba77d86a7ff228b1963afb7941d Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 9 Aug 2023 09:44:24 +0100 Subject: drm/xe: don't warn for bogus pagefaults This appears to be easily user triggerable so warning is perhaps too much. Rather just make it debug print. Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/534 Signed-off-by: Matthew Auld Cc: Matthew Brost Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_pagefault.c | 4 ++-- drivers/gpu/drm/xe/xe_guc_submit.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index d8ff05e25eda..21e0e9c7b634 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -265,7 +265,7 @@ static int send_pagefault_reply(struct xe_guc *guc, static void print_pagefault(struct xe_device *xe, struct pagefault *pf) { - drm_warn(&xe->drm, "\n\tASID: %d\n" + drm_dbg(&xe->drm, "\n\tASID: %d\n" "\tVFID: %d\n" "\tPDATA: 0x%04x\n" "\tFaulted Address: 0x%08x%08x\n" @@ -370,7 +370,7 @@ static void pf_queue_work_func(struct work_struct *w) if (unlikely(ret)) { print_pagefault(xe, &pf); pf.fault_unsuccessful = 1; - drm_warn(&xe->drm, "Fault response: Unsuccessful %d\n", ret); + drm_dbg(&xe->drm, "Fault response: Unsuccessful %d\n", ret); } reply.dw0 = FIELD_PREP(PFR_VALID, 1) | diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 19df4b67bfbb..b89964d6562e 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1641,7 +1641,7 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, if (unlikely(!q)) return -EPROTO; - drm_warn(&xe->drm, "Engine memory cat error: guc_id=%d", guc_id); + drm_dbg(&xe->drm, "Engine memory cat error: guc_id=%d", guc_id); trace_xe_exec_queue_memory_cat_error(q); /* Treat the same as engine reset */ -- cgit v1.2.3 From c7e4a611f35c064ed7bf3f1614647941b0228334 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 27 Jul 2023 15:09:21 -0700 Subject: drm/xe: Add Wa_14015150844 for DG2 and Xe_LPG The workaround database tells us to set this bit, even though the bspec indicates the bit doesn't exist on these platforms. Since this is a write-only register, we also can't read back its value to verify whether it's actually working or not. For now we'll trust that the workaround database knows what it's talking about; if not, the hardware will just ignore the attempt to write to a non-existent bit and it shouldn't cause any problems. Reviewed-by: Matt Atwood Link: https://lore.kernel.org/r/20230727220920.2291913-2-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 1 + drivers/gpu/drm/xe/xe_wa.c | 11 +++++++++++ 2 files changed, 12 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 55b0f70e1904..12cfcf918b87 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -328,6 +328,7 @@ #define XEHP_HDC_CHICKEN0 XE_REG_MCR(0xe5f0, XE_REG_OPTION_MASKED) #define LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK REG_GENMASK(13, 11) +#define DIS_ATOMIC_CHAINING_TYPED_WRITES REG_BIT(3) #define RT_CTRL XE_REG_MCR(0xe530) #define DIS_NULL_QUERY REG_BIT(10) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 36c80e9fb758..5cf84a096302 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -490,6 +490,11 @@ static const struct xe_rtp_entry_sr engine_was[] = { */ .read_mask = 0)) }, + { XE_RTP_NAME("14015150844"), + XE_RTP_RULES(PLATFORM(DG2), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(XEHP_HDC_CHICKEN0, DIS_ATOMIC_CHAINING_TYPED_WRITES, + XE_RTP_NOCHECK)) + }, /* PVC */ @@ -517,6 +522,12 @@ static const struct xe_rtp_entry_sr engine_was[] = { FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(ROW_CHICKEN3, DIS_FIX_EOT1_FLUSH)) }, + { XE_RTP_NAME("14015150844"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(XEHP_HDC_CHICKEN0, DIS_ATOMIC_CHAINING_TYPED_WRITES, + XE_RTP_NOCHECK)) + }, {} }; -- cgit v1.2.3 From 3d4b0bfcd97fbb43d4848bafbf605f6d95afa7c8 Mon Sep 17 00:00:00 2001 From: Anshuman Gupta Date: Wed, 2 Aug 2023 12:34:49 +0530 Subject: drm/xe/pm: Add vram_d3cold_threshold for d3cold capable device Do not register vram_d3cold_threshold device sysfs universally for each gfx device, only register sysfs and set the threshold value for d3cold capable devices. Cc: Rodrigo Vivi Signed-off-by: Anshuman Gupta Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/all/20230802070449.2426563-1-anshuman.gupta@intel.com/ Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pm.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index cdde0d87fd9f..5e992e62d0fb 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -154,8 +154,11 @@ void xe_pm_init(struct xe_device *xe) drmm_mutex_init(&xe->drm, &xe->d3cold.lock); xe->d3cold.capable = xe_pm_pci_d3cold_capable(pdev); - xe_device_sysfs_init(xe); - xe_pm_set_vram_threshold(xe, DEFAULT_VRAM_THRESHOLD); + + if (xe->d3cold.capable) { + xe_device_sysfs_init(xe); + xe_pm_set_vram_threshold(xe, DEFAULT_VRAM_THRESHOLD); + } xe_pm_runtime_init(xe); } -- cgit v1.2.3 From ef6ea97228e1a742be64a76991686b7e98592c02 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 9 Aug 2023 09:16:18 +0100 Subject: drm/xe/guc_submit: fixup deregister in job timeout Rather check if the engine is still registered before proceeding with deregister steps. Also the engine being marked as disabled doesn't mean the engine has been disabled or deregistered from GuC pov, and here we are signalling fences so we need to be sure GuC is not still using this context. v2: - Drop the read_stopped() for this path. Since we are signalling fences on error here, best play it safe and wait for the GT reset to mark the engine as disabled, rather than it just being queued. v3 (Matt Brost): - Keep the read_stopped() on the wait event, since there is no need to wait for an already scheduled GT reset. If it is set we can then just bail without signalling anything. Signed-off-by: Matthew Auld Cc: Matthew Brost Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_submit.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index b89964d6562e..b3d765ee47f1 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -881,15 +881,17 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) } /* Engine state now stable, disable scheduling if needed */ - if (exec_queue_enabled(q)) { + if (exec_queue_registered(q)) { struct xe_guc *guc = exec_queue_to_guc(q); int ret; if (exec_queue_reset(q)) err = -EIO; set_exec_queue_banned(q); - xe_exec_queue_get(q); - disable_scheduling_deregister(guc, q); + if (!exec_queue_destroyed(q)) { + xe_exec_queue_get(q); + disable_scheduling_deregister(guc, q); + } /* * Must wait for scheduling to be disabled before signalling @@ -903,7 +905,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) ret = wait_event_timeout(guc->ct.wq, !exec_queue_pending_disable(q) || guc_read_stopped(guc), HZ * 5); - if (!ret) { + if (!ret || guc_read_stopped(guc)) { XE_WARN_ON("Schedule disable failed to respond"); xe_sched_add_pending_job(sched, job); xe_sched_submission_start(sched); -- cgit v1.2.3 From ca8656a2eb0930b991151588fd04e60c75465543 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Tue, 8 Aug 2023 10:12:09 +0100 Subject: drm/xe: skip rebind_list if vma destroyed If we are closing a vm, mark each vma as XE_VMA_DESTROYED and skip touching the rebind_list if this is seen on the eviction path. That way we can safely drop the vm dma-resv lock on the close path without needing to worry about racing with the eviction path trying to add stuff to the rebind_list which can corrupt our contended list, since the destroy and rebind links are the same list entry underneath. References: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/514 Signed-off-by: Matthew Auld Cc: Matthew Brost Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 10 ++++++---- drivers/gpu/drm/xe/xe_vm.c | 1 + 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index a12613002766..81870522a394 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -506,15 +506,17 @@ static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo, vm_resv_locked = true; else if (ctx->resv != xe_vm_resv(vm)) { spin_lock(&vm->notifier.list_lock); - list_move_tail(&vma->notifier.rebind_link, - &vm->notifier.rebind_list); + if (!(vma->gpuva.flags & XE_VMA_DESTROYED)) + list_move_tail(&vma->notifier.rebind_link, + &vm->notifier.rebind_list); spin_unlock(&vm->notifier.list_lock); continue; } xe_vm_assert_held(vm); - if (list_empty(&vma->combined_links.rebind) && - vma->tile_present) + if (vma->tile_present && + !(vma->gpuva.flags & XE_VMA_DESTROYED) && + list_empty(&vma->combined_links.rebind)) list_add_tail(&vma->combined_links.rebind, &vm->rebind_list); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 8b75595b39a2..d683418b817d 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1460,6 +1460,7 @@ void xe_vm_close_and_put(struct xe_vm *vm) } list_move_tail(&vma->combined_links.destroy, &contested); + vma->gpuva.flags |= XE_VMA_DESTROYED; } /* -- cgit v1.2.3 From a20c75dba192af6ba63d618514a064268dbbe7db Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 11 Aug 2023 06:27:34 -0700 Subject: drm/xe: Call __guc_exec_queue_fini_async direct for KERNEL exec_queues Usually we call __guc_exec_queue_fini_async via a worker as the exec_queue fini can be done from within the GPU scheduler which creates a circular dependency without a worker. Kernel exec_queues are fini'd at driver unload (not from within the GPU scheduler) so it is safe to directly call __guc_exec_queue_fini_async. Suggested-by: Oded Gabbay Signed-off-by: Matthew Brost Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_submit.c | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index b3d765ee47f1..8ecfe2b15e28 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -956,27 +956,19 @@ static void __guc_exec_queue_fini_async(struct work_struct *w) xe_sched_entity_fini(&ge->entity); xe_sched_fini(&ge->sched); - if (!(q->flags & EXEC_QUEUE_FLAG_KERNEL)) { - kfree(ge); - xe_exec_queue_fini(q); - } + kfree(ge); + xe_exec_queue_fini(q); } static void guc_exec_queue_fini_async(struct xe_exec_queue *q) { - bool kernel = q->flags & EXEC_QUEUE_FLAG_KERNEL; - INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async); - queue_work(system_wq, &q->guc->fini_async); /* We must block on kernel engines so slabs are empty on driver unload */ - if (kernel) { - struct xe_guc_exec_queue *ge = q->guc; - - flush_work(&ge->fini_async); - kfree(ge); - xe_exec_queue_fini(q); - } + if (q->flags & EXEC_QUEUE_FLAG_KERNEL) + __guc_exec_queue_fini_async(&q->guc->fini_async); + else + queue_work(system_wq, &q->guc->fini_async); } static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q) -- cgit v1.2.3 From 0887a2e7ab620510093d55f4587c407362363b6d Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Tue, 11 Jul 2023 17:46:09 -0400 Subject: drm/xe: Make xe_mem_region struct Make a xe_mem_region structure which will be used in the coming patches. The new structure is used in both xe device level (xe->mem.vram) and xe_tile level (tile->vram). Make the definition of xe_mem_region.dpa_base to be the DPA base of this memory region and change codes according to this new definition. v1: - rename xe_mem_region.base to dpa_base per conversation with Mike Ruhl Signed-off-by: Oak Zeng Reviewed-by: Michael J. Ruhl Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 2 +- drivers/gpu/drm/xe/xe_device_types.h | 94 +++++++++++++++--------------------- drivers/gpu/drm/xe/xe_migrate.c | 2 +- drivers/gpu/drm/xe/xe_mmio.c | 9 ++-- 4 files changed, 47 insertions(+), 60 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 81870522a394..80c5d1a7d41a 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1469,7 +1469,7 @@ uint64_t vram_region_gpu_offset(struct ttm_resource *res) if (res->mem_type == XE_PL_STOLEN) return xe_ttm_stolen_gpu_offset(xe); - return xe->mem.vram.base + tile->mem.vram.base; + return tile->mem.vram.dpa_base; } /** diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 128e0a953692..5575d13395fe 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -50,6 +50,44 @@ struct xe_ggtt; const struct xe_tile * : (const struct xe_device *)((tile__)->xe), \ struct xe_tile * : (tile__)->xe) +/** + * struct xe_mem_region - memory region structure + * This is used to describe a memory region in xe + * device, such as HBM memory or CXL extension memory. + */ +struct xe_mem_region { + /** @io_start: IO start address of this VRAM instance */ + resource_size_t io_start; + /** + * @io_size: IO size of this VRAM instance + * + * This represents how much of this VRAM we can access + * via the CPU through the VRAM BAR. This can be smaller + * than @usable_size, in which case only part of VRAM is CPU + * accessible (typically the first 256M). This + * configuration is known as small-bar. + */ + resource_size_t io_size; + /** @dpa_base: This memory regions's DPA (device physical address) base */ + resource_size_t dpa_base; + /** + * @usable_size: usable size of VRAM + * + * Usable size of VRAM excluding reserved portions + * (e.g stolen mem) + */ + resource_size_t usable_size; + /** + * @actual_physical_size: Actual VRAM size + * + * Actual VRAM size including reserved portions + * (e.g stolen mem) + */ + resource_size_t actual_physical_size; + /** @mapping: pointer to VRAM mappable space */ + void *__iomem mapping; +}; + /** * struct xe_tile - hardware tile structure * @@ -107,38 +145,7 @@ struct xe_tile { * Although VRAM is associated with a specific tile, it can * still be accessed by all tiles' GTs. */ - struct { - /** @io_start: IO start address of this VRAM instance */ - resource_size_t io_start; - /** - * @io_size: IO size of this VRAM instance - * - * This represents how much of this VRAM we can access - * via the CPU through the VRAM BAR. This can be smaller - * than @size, in which case only part of VRAM is CPU - * accessible (typically the first 256M). This - * configuration is known as small-bar. - */ - resource_size_t io_size; - /** @base: offset of VRAM starting base */ - resource_size_t base; - /** - * @usable_size: usable size of VRAM - * - * Usable size of VRAM excluding reserved portions - * (e.g stolen mem) - */ - resource_size_t usable_size; - /** - * @actual_physical_size: Actual VRAM size - * - * Actual VRAM size including reserved portions - * (e.g stolen mem) - */ - resource_size_t actual_physical_size; - /** @mapping: pointer to VRAM mappable space */ - void *__iomem mapping; - } vram; + struct xe_mem_region vram; /** @vram_mgr: VRAM TTM manager */ struct xe_ttm_vram_mgr *vram_mgr; @@ -247,28 +254,7 @@ struct xe_device { /** @mem: memory info for device */ struct { /** @vram: VRAM info for device */ - struct { - /** @io_start: IO start address of VRAM */ - resource_size_t io_start; - /** - * @io_size: IO size of VRAM. - * - * This represents how much of VRAM the CPU can access - * via the VRAM BAR. - * On systems that do not support large BAR IO space, - * this can be smaller than the actual memory size, in - * which case only part of VRAM is CPU accessible - * (typically the first 256M). This configuration is - * known as small-bar. - */ - resource_size_t io_size; - /** @size: Total size of VRAM */ - resource_size_t size; - /** @base: Offset to apply for Device Physical Address control */ - resource_size_t base; - /** @mapping: pointer to VRAM mappable space */ - void *__iomem mapping; - } vram; + struct xe_mem_region vram; /** @sys_mgr: system TTM manager */ struct ttm_resource_manager sys_mgr; } mem; diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index d0816d2090f0..06e85f7162d4 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -264,7 +264,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, * Use 1GB pages, it shouldn't matter the physical amount of * vram is less, when we don't access it. */ - for (pos = 0; pos < xe->mem.vram.size; pos += SZ_1G, ofs += 8) + for (pos = 0; pos < xe->mem.vram.actual_physical_size; pos += SZ_1G, ofs += 8) xe_map_wr(xe, &bo->vmap, ofs, u64, pos | flags); } diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index aa9c573b1243..41ee89247ddb 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -173,7 +173,8 @@ static int xe_determine_lmem_bar_size(struct xe_device *xe) if (!xe->mem.vram.io_size) return -EIO; - xe->mem.vram.base = 0; /* DPA offset */ + /* XXX: Need to change when xe link code is ready */ + xe->mem.vram.dpa_base = 0; /* set up a map to the total memory area. */ xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, xe->mem.vram.io_size); @@ -281,7 +282,7 @@ int xe_mmio_probe_vram(struct xe_device *xe) return -ENODEV; } - tile->mem.vram.base = tile_offset; + tile->mem.vram.dpa_base = xe->mem.vram.dpa_base + tile_offset; tile->mem.vram.usable_size = vram_size; tile->mem.vram.mapping = xe->mem.vram.mapping + tile_offset; @@ -304,10 +305,10 @@ int xe_mmio_probe_vram(struct xe_device *xe) io_size -= min_t(u64, tile_size, io_size); } - xe->mem.vram.size = total_size; + xe->mem.vram.actual_physical_size = total_size; drm_info(&xe->drm, "Total VRAM: %pa, %pa\n", &xe->mem.vram.io_start, - &xe->mem.vram.size); + &xe->mem.vram.actual_physical_size); drm_info(&xe->drm, "Available VRAM: %pa, %pa\n", &xe->mem.vram.io_start, &available_size); -- cgit v1.2.3 From 286089ce692907c48a375676a0c828ac912856c9 Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Fri, 14 Jul 2023 10:42:07 -0400 Subject: drm/xe: Improve vram info debug printing Print both device physical address range and CPU io range of vram. Also print vram's actual size, usable size excluding stolen memory, and CPU io accessible size. V1: - Add back small BAR device info (Matt) Signed-off-by: Oak Zeng Reviewed-by: Michael J. Ruhl Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_mmio.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 41ee89247ddb..bb6823db14d4 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -286,12 +286,13 @@ int xe_mmio_probe_vram(struct xe_device *xe) tile->mem.vram.usable_size = vram_size; tile->mem.vram.mapping = xe->mem.vram.mapping + tile_offset; - drm_info(&xe->drm, "VRAM[%u, %u]: %pa, %pa\n", id, tile->id, - &tile->mem.vram.io_start, &tile->mem.vram.usable_size); - if (tile->mem.vram.io_size < tile->mem.vram.usable_size) - drm_info(&xe->drm, "VRAM[%u, %u]: CPU access limited to %pa\n", id, - tile->id, &tile->mem.vram.io_size); + drm_info(&xe->drm, "Small BAR device\n"); + drm_info(&xe->drm, "VRAM[%u, %u]: Actual physical size %pa, usable size exclude stolen %pa, CPU accessible size %pa\n", id, + tile->id, &tile->mem.vram.actual_physical_size, &tile->mem.vram.usable_size, &tile->mem.vram.io_size); + drm_info(&xe->drm, "VRAM[%u, %u]: DPA range: [%pa-%llx], io range: [%pa-%llx]\n", id, tile->id, + &tile->mem.vram.dpa_base, tile->mem.vram.dpa_base + tile->mem.vram.actual_physical_size, + &tile->mem.vram.io_start, tile->mem.vram.io_start + tile->mem.vram.io_size); /* calculate total size using tile size to get the correct HW sizing */ total_size += tile_size; -- cgit v1.2.3 From 0955d3be8b53971e4e72667918092674a233e329 Mon Sep 17 00:00:00 2001 From: Shekhar Chauhan Date: Mon, 14 Aug 2023 20:33:23 +0530 Subject: drm/xe/dg2: Remove Wa_15010599737 Since this is specific to DirectX, we don't need it on Linux. Signed-off-by: Shekhar Chauhan Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230814150323.874033-1-shekhar.chauhan@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 3 --- drivers/gpu/drm/xe/xe_wa.c | 4 ---- 2 files changed, 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 12cfcf918b87..1179bbd16a97 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -90,9 +90,6 @@ #define XEHP_TILE_ADDR_RANGE(_idx) XE_REG_MCR(0x4900 + (_idx) * 4) #define XEHP_FLAT_CCS_BASE_ADDR XE_REG_MCR(0x4910) -#define CHICKEN_RASTER_1 XE_REG_MCR(0x6204, XE_REG_OPTION_MASKED) -#define DIS_SF_ROUND_NEAREST_EVEN REG_BIT(8) - #define CHICKEN_RASTER_2 XE_REG_MCR(0x6208, XE_REG_OPTION_MASKED) #define TBIMR_FAST_CLIP REG_BIT(5) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 5cf84a096302..e2b6e17d7ec4 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -599,10 +599,6 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_ACTIONS(SET(XEHP_PSS_MODE2, SCOREBOARD_STALL_FLUSH_CONTROL)) }, - { XE_RTP_NAME("15010599737"), - XE_RTP_RULES(PLATFORM(DG2)), - XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN)) - }, { XE_RTP_NAME("18019271663"), XE_RTP_RULES(PLATFORM(DG2)), XE_RTP_ACTIONS(SET(CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE)) -- cgit v1.2.3 From 50b099030bb493604601a985b5fb3a8c5962aab9 Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Mon, 7 Aug 2023 15:43:35 +0000 Subject: drm/xe: Simplify engine class sched_props setting Shortens the too long code lines. Reviewed-by: Tejas Upadhyay Signed-off-by: Niranjana Vishwanathapura Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_hw_engine.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index b8fcc6e985cf..c44540684462 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -362,22 +362,20 @@ static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe, hwe->fence_irq = >->fence_irq[info->class]; hwe->engine_id = id; - if (!gt->eclass[hwe->class].sched_props.job_timeout_ms) { - gt->eclass[hwe->class].sched_props.job_timeout_ms = 5 * 1000; - gt->eclass[hwe->class].sched_props.job_timeout_min = XE_HW_ENGINE_JOB_TIMEOUT_MIN; - gt->eclass[hwe->class].sched_props.job_timeout_max = XE_HW_ENGINE_JOB_TIMEOUT_MAX; - gt->eclass[hwe->class].sched_props.timeslice_us = 1 * 1000; - gt->eclass[hwe->class].sched_props.timeslice_min = XE_HW_ENGINE_TIMESLICE_MIN; - gt->eclass[hwe->class].sched_props.timeslice_max = XE_HW_ENGINE_TIMESLICE_MAX; - gt->eclass[hwe->class].sched_props.preempt_timeout_us = 640 * 1000; - gt->eclass[hwe->class].sched_props.preempt_timeout_min = - XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN; - gt->eclass[hwe->class].sched_props.preempt_timeout_max = - XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX; + hwe->eclass = >->eclass[hwe->class]; + if (!hwe->eclass->sched_props.job_timeout_ms) { + hwe->eclass->sched_props.job_timeout_ms = 5 * 1000; + hwe->eclass->sched_props.job_timeout_min = XE_HW_ENGINE_JOB_TIMEOUT_MIN; + hwe->eclass->sched_props.job_timeout_max = XE_HW_ENGINE_JOB_TIMEOUT_MAX; + hwe->eclass->sched_props.timeslice_us = 1 * 1000; + hwe->eclass->sched_props.timeslice_min = XE_HW_ENGINE_TIMESLICE_MIN; + hwe->eclass->sched_props.timeslice_max = XE_HW_ENGINE_TIMESLICE_MAX; + hwe->eclass->sched_props.preempt_timeout_us = 640 * 1000; + hwe->eclass->sched_props.preempt_timeout_min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN; + hwe->eclass->sched_props.preempt_timeout_max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX; /* Record default props */ - gt->eclass[hwe->class].defaults = gt->eclass[hwe->class].sched_props; + hwe->eclass->defaults = hwe->eclass->sched_props; } - hwe->eclass = >->eclass[hwe->class]; xe_reg_sr_init(&hwe->reg_sr, hwe->name, gt_to_xe(gt)); xe_wa_process_engine(hwe); -- cgit v1.2.3 From a863b4163ab9d3f173aef0f1191a0c0b8ea41634 Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Mon, 7 Aug 2023 14:58:38 +0000 Subject: drm/xe: Add CONFIG_DRM_XE_PREEMPT_TIMEOUT Allow preemption timeout to be specified as a config option. v2: Change unit to microseconds (Tejas) v3: Remove get_default_preempt_timeout() Reviewed-by: Tejas Upadhyay Signed-off-by: Niranjana Vishwanathapura Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Kconfig.profile | 8 ++++++++ drivers/gpu/drm/xe/xe_hw_engine.c | 5 ++++- drivers/gpu/drm/xe/xe_hw_engine.h | 5 +++++ 3 files changed, 17 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Kconfig.profile b/drivers/gpu/drm/xe/Kconfig.profile index 51951c8149a1..ba17a25e8db3 100644 --- a/drivers/gpu/drm/xe/Kconfig.profile +++ b/drivers/gpu/drm/xe/Kconfig.profile @@ -22,6 +22,14 @@ config DRM_XE_TIMESLICE_MIN help Configures the default min timeslice duration between multiple contexts by guc scheduling. +config DRM_XE_PREEMPT_TIMEOUT + int "Preempt timeout (us, jiffy granularity)" + default 640000 # microseconds + help + How long to wait (in microseconds) for a preemption event to occur + when submitting a new context. If the current context does not hit + an arbitration point and yield to HW before the timer expires, the + HW will be reset to allow the more important context to execute. config DRM_XE_PREEMPT_TIMEOUT_MAX int "Default max preempt timeout (us)" default 10000000 # microseconds diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index c44540684462..4c812d04e182 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -370,7 +370,7 @@ static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe, hwe->eclass->sched_props.timeslice_us = 1 * 1000; hwe->eclass->sched_props.timeslice_min = XE_HW_ENGINE_TIMESLICE_MIN; hwe->eclass->sched_props.timeslice_max = XE_HW_ENGINE_TIMESLICE_MAX; - hwe->eclass->sched_props.preempt_timeout_us = 640 * 1000; + hwe->eclass->sched_props.preempt_timeout_us = XE_HW_ENGINE_PREEMPT_TIMEOUT; hwe->eclass->sched_props.preempt_timeout_min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN; hwe->eclass->sched_props.preempt_timeout_max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX; /* Record default props */ @@ -562,6 +562,9 @@ int xe_hw_engines_init_early(struct xe_gt *gt) read_copy_fuses(gt); read_compute_fuses(gt); + BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT < XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN); + BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT > XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX); + for (i = 0; i < ARRAY_SIZE(gt->hw_engines); i++) hw_engine_init_early(gt, >->hw_engines[i], i); diff --git a/drivers/gpu/drm/xe/xe_hw_engine.h b/drivers/gpu/drm/xe/xe_hw_engine.h index 3d37d6d44261..71968ee2f600 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.h +++ b/drivers/gpu/drm/xe/xe_hw_engine.h @@ -30,6 +30,11 @@ struct drm_printer; #else #define XE_HW_ENGINE_TIMESLICE_MAX (10 * 1000 * 1000) #endif +#ifdef CONFIG_DRM_XE_PREEMPT_TIMEOUT +#define XE_HW_ENGINE_PREEMPT_TIMEOUT CONFIG_DRM_XE_PREEMPT_TIMEOUT +#else +#define XE_HW_ENGINE_PREEMPT_TIMEOUT (640 * 1000) +#endif #ifdef CONFIG_DRM_XE_PREEMPT_TIMEOUT_MIN #define XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN CONFIG_DRM_XE_PREEMPT_TIMEOUT_MIN #else -- cgit v1.2.3 From 0b1d1473b355ff3a1447048db24822eb7016c1c2 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Thu, 17 Aug 2023 13:18:25 -0700 Subject: drm/xe: common function to assign queue name The queue name assignment is identical in both GuC and execlists backends, so we can move it to a common function. This will make adding a new entry in the next patch slightly cleaner. Signed-off-by: Daniele Ceraolo Spurio Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230817201831.1583172-2-daniele.ceraolospurio@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_exec_queue.c | 23 +++++++++++++++++++++++ drivers/gpu/drm/xe/xe_exec_queue.h | 1 + drivers/gpu/drm/xe/xe_execlist.c | 20 +------------------- drivers/gpu/drm/xe/xe_guc_submit.c | 20 +------------------- 4 files changed, 26 insertions(+), 38 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 901c609a657e..c2adff770614 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -177,6 +177,29 @@ void xe_exec_queue_fini(struct xe_exec_queue *q) kfree(q); } +void xe_exec_queue_assign_name(struct xe_exec_queue *q, u32 instance) +{ + switch (q->class) { + case XE_ENGINE_CLASS_RENDER: + sprintf(q->name, "rcs%d", instance); + break; + case XE_ENGINE_CLASS_VIDEO_DECODE: + sprintf(q->name, "vcs%d", instance); + break; + case XE_ENGINE_CLASS_VIDEO_ENHANCE: + sprintf(q->name, "vecs%d", instance); + break; + case XE_ENGINE_CLASS_COPY: + sprintf(q->name, "bcs%d", instance); + break; + case XE_ENGINE_CLASS_COMPUTE: + sprintf(q->name, "ccs%d", instance); + break; + default: + XE_WARN_ON(q->class); + } +} + struct xe_exec_queue *xe_exec_queue_lookup(struct xe_file *xef, u32 id) { struct xe_exec_queue *q; diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h index 94a6abee38a6..22499a2f522b 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.h +++ b/drivers/gpu/drm/xe/xe_exec_queue.h @@ -23,6 +23,7 @@ struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe void xe_exec_queue_fini(struct xe_exec_queue *q); void xe_exec_queue_destroy(struct kref *ref); +void xe_exec_queue_assign_name(struct xe_exec_queue *q, u32 instance); struct xe_exec_queue *xe_exec_queue_lookup(struct xe_file *xef, u32 id); diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index 3b8be55fe19c..df91780d8b9f 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -350,25 +350,7 @@ static int execlist_exec_queue_init(struct xe_exec_queue *q) q->execlist = exl; q->entity = &exl->entity; - switch (q->class) { - case XE_ENGINE_CLASS_RENDER: - sprintf(q->name, "rcs%d", ffs(q->logical_mask) - 1); - break; - case XE_ENGINE_CLASS_VIDEO_DECODE: - sprintf(q->name, "vcs%d", ffs(q->logical_mask) - 1); - break; - case XE_ENGINE_CLASS_VIDEO_ENHANCE: - sprintf(q->name, "vecs%d", ffs(q->logical_mask) - 1); - break; - case XE_ENGINE_CLASS_COPY: - sprintf(q->name, "bcs%d", ffs(q->logical_mask) - 1); - break; - case XE_ENGINE_CLASS_COMPUTE: - sprintf(q->name, "ccs%d", ffs(q->logical_mask) - 1); - break; - default: - XE_WARN_ON(q->class); - } + xe_exec_queue_assign_name(q, ffs(q->logical_mask) - 1); return 0; diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 8ecfe2b15e28..55c7b13d15ec 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1167,25 +1167,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) mutex_unlock(&guc->submission_state.lock); - switch (q->class) { - case XE_ENGINE_CLASS_RENDER: - sprintf(q->name, "rcs%d", q->guc->id); - break; - case XE_ENGINE_CLASS_VIDEO_DECODE: - sprintf(q->name, "vcs%d", q->guc->id); - break; - case XE_ENGINE_CLASS_VIDEO_ENHANCE: - sprintf(q->name, "vecs%d", q->guc->id); - break; - case XE_ENGINE_CLASS_COPY: - sprintf(q->name, "bcs%d", q->guc->id); - break; - case XE_ENGINE_CLASS_COMPUTE: - sprintf(q->name, "ccs%d", q->guc->id); - break; - default: - XE_WARN_ON(q->class); - } + xe_exec_queue_assign_name(q, q->guc->id); trace_xe_exec_queue_create(q); -- cgit v1.2.3 From 296549107e4766bb927debd016527c71fb6faf36 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Thu, 17 Aug 2023 13:18:26 -0700 Subject: drm/xe: base definitions for the GSCCS The first step in introducing the GSCCS is to add all the basic defs for it (name, mmio base, class/instance, lrc size etc). Bspec: 60149, 60421, 63752 Signed-off-by: Daniele Ceraolo Spurio Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230817201831.1583172-3-daniele.ceraolospurio@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 1 + drivers/gpu/drm/xe/regs/xe_regs.h | 1 + drivers/gpu/drm/xe/xe_exec_queue.c | 3 +++ drivers/gpu/drm/xe/xe_guc.h | 1 + drivers/gpu/drm/xe/xe_guc_ads.c | 8 ++------ drivers/gpu/drm/xe/xe_hw_engine.c | 7 +++++++ drivers/gpu/drm/xe/xe_hw_engine_types.h | 2 ++ drivers/gpu/drm/xe/xe_lrc.c | 1 + 8 files changed, 18 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 1179bbd16a97..5f635682ce5a 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -376,6 +376,7 @@ #define INTR_ENGINE_CLASS(x) REG_FIELD_GET(GENMASK(18, 16), x) #define INTR_ENGINE_INTR(x) REG_FIELD_GET(GENMASK(15, 0), x) #define OTHER_GUC_INSTANCE 0 +#define OTHER_GSC_INSTANCE 6 #define RENDER_COPY_INTR_ENABLE XE_REG(0x190030) #define VCS_VECS_INTR_ENABLE XE_REG(0x190034) diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h index 25275a36b280..be496a3946d8 100644 --- a/drivers/gpu/drm/xe/regs/xe_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_regs.h @@ -33,6 +33,7 @@ #define XEHPC_BCS6_RING_BASE 0x3ea000 #define XEHPC_BCS7_RING_BASE 0x3ec000 #define XEHPC_BCS8_RING_BASE 0x3ee000 +#define GSCCS_RING_BASE 0x11a000 #define GT_WAIT_SEMAPHORE_INTERRUPT REG_BIT(11) #define GT_CONTEXT_SWITCH_INTERRUPT REG_BIT(8) #define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT REG_BIT(4) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index c2adff770614..f6619688f92f 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -195,6 +195,9 @@ void xe_exec_queue_assign_name(struct xe_exec_queue *q, u32 instance) case XE_ENGINE_CLASS_COMPUTE: sprintf(q->name, "ccs%d", instance); break; + case XE_ENGINE_CLASS_OTHER: + sprintf(q->name, "gsccs%d", instance); + break; default: XE_WARN_ON(q->class); } diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h index f64f22e97169..3addd8fc674a 100644 --- a/drivers/gpu/drm/xe/xe_guc.h +++ b/drivers/gpu/drm/xe/xe_guc.h @@ -51,6 +51,7 @@ static inline u16 xe_engine_class_to_guc_class(enum xe_engine_class class) case XE_ENGINE_CLASS_COMPUTE: return GUC_COMPUTE_CLASS; case XE_ENGINE_CLASS_OTHER: + return GUC_GSC_OTHER_CLASS; default: XE_WARN_ON(class); return -1; diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 7d1244df959d..5edee24b97c9 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -253,9 +253,6 @@ static size_t calculate_golden_lrc_size(struct xe_guc_ads *ads) int class; for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) { - if (class == XE_ENGINE_CLASS_OTHER) - continue; - if (!engine_enable_mask(gt, class)) continue; @@ -350,6 +347,8 @@ static void fill_engine_enable_masks(struct xe_gt *gt, engine_enable_mask(gt, XE_ENGINE_CLASS_VIDEO_ENHANCE)); info_map_write(xe, info_map, engine_enabled_masks[GUC_COMPUTE_CLASS], engine_enable_mask(gt, XE_ENGINE_CLASS_COMPUTE)); + info_map_write(xe, info_map, engine_enabled_masks[GUC_GSC_OTHER_CLASS], + engine_enable_mask(gt, XE_ENGINE_CLASS_OTHER)); } static void guc_prep_golden_lrc_null(struct xe_guc_ads *ads) @@ -638,9 +637,6 @@ static void guc_populate_golden_lrc(struct xe_guc_ads *ads) for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) { u8 guc_class; - if (class == XE_ENGINE_CLASS_OTHER) - continue; - guc_class = xe_engine_class_to_guc_class(class); if (!info_map_read(xe, &info_map, diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 4c812d04e182..32a5bd9d8e6b 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -219,6 +219,13 @@ static const struct engine_info engine_infos[] = { .domain = XE_FW_RENDER, .mmio_base = COMPUTE3_RING_BASE, }, + [XE_HW_ENGINE_GSCCS0] = { + .name = "gsccs0", + .class = XE_ENGINE_CLASS_OTHER, + .instance = OTHER_GSC_INSTANCE, + .domain = XE_FW_GSC, + .mmio_base = GSCCS_RING_BASE, + }, }; static void hw_engine_fini(struct drm_device *drm, void *arg) diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h index 97d9ba31b5fc..cd4bc1412a3f 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_types.h +++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h @@ -53,6 +53,8 @@ enum xe_hw_engine_id { XE_HW_ENGINE_CCS2, XE_HW_ENGINE_CCS3, #define XE_HW_ENGINE_CCS_MASK GENMASK_ULL(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0) + XE_HW_ENGINE_GSCCS0, +#define XE_HW_ENGINE_GSCCS_MASK GENMASK_ULL(XE_HW_ENGINE_GSCCS0, XE_HW_ENGINE_GSCCS0) XE_NUM_HW_ENGINES, }; diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 09db8da261a3..7a7fdcdadf37 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -46,6 +46,7 @@ size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class) case XE_ENGINE_CLASS_COPY: case XE_ENGINE_CLASS_VIDEO_DECODE: case XE_ENGINE_CLASS_VIDEO_ENHANCE: + case XE_ENGINE_CLASS_OTHER: return 2 * SZ_4K; } } -- cgit v1.2.3 From 3d2b5d4e28d9c58ea97704fe1eb663aee2556449 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Thu, 17 Aug 2023 13:18:27 -0700 Subject: drm/xe: add GSCCS irq support The GSCCS has its own enable and mask registers. The interrupt identity for the GSCCS shows OTHER_CLASS instance 6. Bspec: 54029, 54030 Signed-off-by: Daniele Ceraolo Spurio Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230817201831.1583172-4-daniele.ceraolospurio@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 2 ++ drivers/gpu/drm/xe/xe_irq.c | 25 ++++++++++++++++++------- 2 files changed, 20 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 5f635682ce5a..b6e870302cc7 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -380,6 +380,7 @@ #define RENDER_COPY_INTR_ENABLE XE_REG(0x190030) #define VCS_VECS_INTR_ENABLE XE_REG(0x190034) +#define GUNIT_GSC_INTR_ENABLE XE_REG(0x190044) #define CCS_RSVD_INTR_ENABLE XE_REG(0x190048) #define IIR_REG_SELECTOR(x) XE_REG(0x190070 + ((x) * 4)) #define RCS0_RSVD_INTR_MASK XE_REG(0x190090) @@ -389,6 +390,7 @@ #define VECS0_VECS1_INTR_MASK XE_REG(0x1900d0) #define GUC_SG_INTR_MASK XE_REG(0x1900e8) #define GPM_WGBOXPERF_INTR_MASK XE_REG(0x1900ec) +#define GUNIT_GSC_INTR_MASK XE_REG(0x1900f4) #define CCS0_CCS1_INTR_MASK XE_REG(0x190100) #define CCS2_CCS3_INTR_MASK XE_REG(0x190104) #define XEHPC_BCS1_BCS2_INTR_MASK XE_REG(0x190110) diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 69629be07de2..ef434142bcd9 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -175,6 +175,11 @@ void xe_irq_enable_hwe(struct xe_gt *gt) xe_mmio_write32(gt, VCS0_VCS1_INTR_MASK, ~dmask); xe_mmio_write32(gt, VCS2_VCS3_INTR_MASK, ~dmask); xe_mmio_write32(gt, VECS0_VECS1_INTR_MASK, ~dmask); + + if (xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_OTHER)) { + xe_mmio_write32(gt, GUNIT_GSC_INTR_ENABLE, irqs); + xe_mmio_write32(gt, GUNIT_GSC_INTR_MASK, ~irqs); + } } } @@ -243,7 +248,7 @@ static struct xe_gt *pick_engine_gt(struct xe_tile *tile, return tile->media_gt; if (class == XE_ENGINE_CLASS_OTHER && - instance == OTHER_MEDIA_GUC_INSTANCE) + (instance == OTHER_MEDIA_GUC_INSTANCE || instance == OTHER_GSC_INSTANCE)) return tile->media_gt; return tile->primary_gt; @@ -280,16 +285,16 @@ static void gt_irq_handler(struct xe_tile *tile, engine_gt = pick_engine_gt(tile, class, instance); - if (class == XE_ENGINE_CLASS_OTHER) { - gt_other_irq_handler(engine_gt, instance, intr_vec); + hwe = xe_gt_hw_engine(engine_gt, class, instance, false); + if (hwe) { + xe_hw_engine_handle_irq(hwe, intr_vec); continue; } - hwe = xe_gt_hw_engine(engine_gt, class, instance, false); - if (!hwe) + if (class == XE_ENGINE_CLASS_OTHER) { + gt_other_irq_handler(engine_gt, instance, intr_vec); continue; - - xe_hw_engine_handle_irq(hwe, intr_vec); + } } } @@ -457,6 +462,12 @@ static void gt_irq_reset(struct xe_tile *tile) if (ccs_mask & (BIT(2)|BIT(3))) xe_mmio_write32(mmio, CCS2_CCS3_INTR_MASK, ~0); + if (tile->media_gt && + xe_hw_engine_mask_per_class(tile->media_gt, XE_ENGINE_CLASS_OTHER)) { + xe_mmio_write32(mmio, GUNIT_GSC_INTR_ENABLE, 0); + xe_mmio_write32(mmio, GUNIT_GSC_INTR_MASK, ~0); + } + xe_mmio_write32(mmio, GPM_WGBOXPERF_INTR_ENABLE, 0); xe_mmio_write32(mmio, GPM_WGBOXPERF_INTR_MASK, ~0); xe_mmio_write32(mmio, GUC_SG_INTR_ENABLE, 0); -- cgit v1.2.3 From aef61349ef1bf01badfa3ea955ba84048467f691 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Thu, 17 Aug 2023 13:18:28 -0700 Subject: drm/xe: add GSCCS ring ops Like the BCS, the GSCCS doesn't have any special HW that needs handling when emitting commands, so we can re-use the same emit_job code. To make it clear that this is now a shared low-level function, it has been renamed to use the "simple" postfix, instead of "copy", to indicate that it applies to all engines that don't need any additional engine-specific handling. Signed-off-by: Daniele Ceraolo Spurio Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230817201831.1583172-5-daniele.ceraolospurio@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ring_ops.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index 6346ed24e279..36058600e231 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -205,8 +205,9 @@ static u32 get_ppgtt_flag(struct xe_sched_job *job) return !(job->q->flags & EXEC_QUEUE_FLAG_WA) ? BIT(8) : 0; } -static void __emit_job_gen12_copy(struct xe_sched_job *job, struct xe_lrc *lrc, - u64 batch_addr, u32 seqno) +/* for engines that don't require any special HW handling (no EUs, no aux inval, etc) */ +static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc, + u64 batch_addr, u32 seqno) { u32 dw[MAX_JOB_SIZE_DW], i = 0; u32 ppgtt_flag = get_ppgtt_flag(job); @@ -374,6 +375,15 @@ static void emit_migration_job_gen12(struct xe_sched_job *job, xe_lrc_write_ring(lrc, dw, i * sizeof(*dw)); } +static void emit_job_gen12_gsc(struct xe_sched_job *job) +{ + XE_WARN_ON(job->q->width > 1); /* no parallel submission for GSCCS */ + + __emit_job_gen12_simple(job, job->q->lrc, + job->batch_addr[0], + xe_sched_job_seqno(job)); +} + static void emit_job_gen12_copy(struct xe_sched_job *job) { int i; @@ -385,9 +395,9 @@ static void emit_job_gen12_copy(struct xe_sched_job *job) } for (i = 0; i < job->q->width; ++i) - __emit_job_gen12_copy(job, job->q->lrc + i, - job->batch_addr[i], - xe_sched_job_seqno(job)); + __emit_job_gen12_simple(job, job->q->lrc + i, + job->batch_addr[i], + xe_sched_job_seqno(job)); } static void emit_job_gen12_video(struct xe_sched_job *job) @@ -411,6 +421,10 @@ static void emit_job_gen12_render_compute(struct xe_sched_job *job) xe_sched_job_seqno(job)); } +static const struct xe_ring_ops ring_ops_gen12_gsc = { + .emit_job = emit_job_gen12_gsc, +}; + static const struct xe_ring_ops ring_ops_gen12_copy = { .emit_job = emit_job_gen12_copy, }; @@ -427,6 +441,8 @@ const struct xe_ring_ops * xe_ring_ops_get(struct xe_gt *gt, enum xe_engine_class class) { switch (class) { + case XE_ENGINE_CLASS_OTHER: + return &ring_ops_gen12_gsc; case XE_ENGINE_CLASS_COPY: return &ring_ops_gen12_copy; case XE_ENGINE_CLASS_VIDEO_DECODE: -- cgit v1.2.3 From f4c33ae8eca2fa459d0d58baa1a26234598e6b32 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Thu, 17 Aug 2023 13:18:29 -0700 Subject: drm/xe: GSC forcewake support The ID for the GSC forcewake domain already exists, but we're missing the register definitions and the domain intialization, so add that in. v2: move reg definition to be in address order (Matt) Signed-off-by: Daniele Ceraolo Spurio Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230817201831.1583172-6-daniele.ceraolospurio@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 2 ++ drivers/gpu/drm/xe/xe_force_wake.c | 7 +++++++ 2 files changed, 9 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index b6e870302cc7..51d59e1229be 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -39,6 +39,7 @@ #define GMD_ID_RELEASE_MASK REG_GENMASK(21, 14) #define GMD_ID_REVID REG_GENMASK(5, 0) +#define FORCEWAKE_ACK_GSC XE_REG(0xdf8) #define FORCEWAKE_ACK_GT_MTL XE_REG(0xdfc) /* L3 Cache Control */ @@ -256,6 +257,7 @@ #define FORCEWAKE_RENDER XE_REG(0xa278) #define FORCEWAKE_MEDIA_VDBOX(n) XE_REG(0xa540 + (n) * 4) #define FORCEWAKE_MEDIA_VEBOX(n) XE_REG(0xa560 + (n) * 4) +#define FORCEWAKE_GSC XE_REG(0xa618) #define XEHPC_LNCFMISCCFGREG0 XE_REG_MCR(0xb01c, XE_REG_OPTION_MASKED) #define XEHPC_OVRLSCCC REG_BIT(0) diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c index e563de862581..ef7279e0b006 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.c +++ b/drivers/gpu/drm/xe/xe_force_wake.c @@ -97,6 +97,13 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw) FORCEWAKE_ACK_MEDIA_VEBOX(j), BIT(0), BIT(16)); } + + if (gt->info.engine_mask & BIT(XE_HW_ENGINE_GSCCS0)) + domain_init(&fw->domains[XE_FW_DOMAIN_ID_GSC], + XE_FW_DOMAIN_ID_GSC, + FORCEWAKE_GSC, + FORCEWAKE_ACK_GSC, + BIT(0), BIT(16)); } static void domain_wake(struct xe_gt *gt, struct xe_force_wake_domain *domain) -- cgit v1.2.3 From 92939935f478c5a0cc43f87652360ac5c70063b9 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Thu, 17 Aug 2023 13:18:30 -0700 Subject: drm/xe: don't expose the GSCCS to users The kernel is the only expected user of the GSCCS, so we don't want to expose it to userspace. Signed-off-by: Daniele Ceraolo Spurio Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230817201831.1583172-7-daniele.ceraolospurio@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_hw_engine.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 32a5bd9d8e6b..81281e9c02eb 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -795,6 +795,9 @@ bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe) struct xe_gt *gt = hwe->gt; struct xe_device *xe = gt_to_xe(gt); + if (hwe->class == XE_ENGINE_CLASS_OTHER) + return true; + return xe->info.supports_usm && hwe->class == XE_ENGINE_CLASS_COPY && hwe->instance == gt->usm.reserved_bcs_instance; } -- cgit v1.2.3 From 07d7ba13d80aa9a047ac4fa83f59f161ca5f0453 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Thu, 17 Aug 2023 15:17:07 -0700 Subject: drm/xe: enable idle msg and set hysteresis for GSCCS On MTL (and only on MTL) the GSCCS defaults with idle messaging disabled. This means that, once awoken, the GSCCS will never signal its idleness to the GT. To allow the GT to enter the proper low-power state, we need therefore to turn idle messaging on. As part of this, we also need to set a proper hysteresis value for the engine. v2: use MEDIA_VERSION() and CLR() for the RTP rule and action, add reg bit define in descending order (Matt) Bspec: 71496 Signed-off-by: Daniele Ceraolo Spurio Cc: Vinay Belgaumkar Cc: Matt Roper Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230817221707.1602873-1-daniele.ceraolospurio@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_engine_regs.h | 4 ++++ drivers/gpu/drm/xe/xe_hw_engine.c | 16 ++++++++++++++++ 2 files changed, 20 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index 79873bf64e8d..d57fd855086a 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -24,6 +24,10 @@ #define RING_PSMI_CTL(base) XE_REG((base) + 0x50, XE_REG_OPTION_MASKED) #define RC_SEMA_IDLE_MSG_DISABLE REG_BIT(12) #define WAIT_FOR_EVENT_POWER_DOWN_DISABLE REG_BIT(7) +#define IDLE_MSG_DISABLE REG_BIT(0) + +#define RING_PWRCTX_MAXCNT(base) XE_REG((base) + 0x54) +#define IDLE_WAIT_TIME REG_GENMASK(19, 0) #define RING_ACTHD_UDW(base) XE_REG((base) + 0x5c) #define RING_DMA_FADD_UDW(base) XE_REG((base) + 0x60) diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 81281e9c02eb..24b5226f1433 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -339,6 +339,22 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe) ring_cmd_cctl_val, XE_RTP_ACTION_FLAG(ENGINE_BASE))) }, + /* + * To allow the GSC engine to go idle on MTL we need to enable + * idle messaging and set the hysteresis value (we use 0xA=5us + * as recommended in spec). On platforms after MTL this is + * enabled by default. + */ + { XE_RTP_NAME("MTL GSCCS IDLE MSG enable"), + XE_RTP_RULES(MEDIA_VERSION(1300), ENGINE_CLASS(OTHER)), + XE_RTP_ACTIONS(CLR(RING_PSMI_CTL(0), + IDLE_MSG_DISABLE, + XE_RTP_ACTION_FLAG(ENGINE_BASE)), + FIELD_SET(RING_PWRCTX_MAXCNT(0), + IDLE_WAIT_TIME, + 0xA, + XE_RTP_ACTION_FLAG(ENGINE_BASE))) + }, {} }; -- cgit v1.2.3 From 0aef9ff75204485ae6bcc9f7a54f16b3a3536b49 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 17 Aug 2023 16:04:12 -0700 Subject: drm/xe: Stop tracking 4-tile support The choice of Y-major tiling format (either the legacy "TileY" or the newer "Tile4") is based on graphics IP version (12.50 and beyond have Tile4, earlier platforms have TileY). The tracking in xe was originally added to allow re-using display from i915. However as of i915 commit 4ebf43d0488f ("drm/i915: Eliminate has_4tile feature flag"), the display code determines TileY vs Tile4 itself, so this can be removed from xe. Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230817230407.909816-10-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device_types.h | 2 -- drivers/gpu/drm/xe/xe_pci.c | 10 +--------- 2 files changed, 1 insertion(+), 11 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 5575d13395fe..6e852809d3ac 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -221,8 +221,6 @@ struct xe_device { u8 force_execlist:1; /** @has_flat_ccs: Whether flat CCS metadata is used */ u8 has_flat_ccs:1; - /** @has_4tile: Whether tile-4 tiling is supported */ - u8 has_4tile:1; /** @has_llc: Device has a shared CPU+GPU last level cache */ u8 has_llc:1; /** @has_range_tlb_invalidation: Has range based TLB invalidations */ diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 6e31b596683e..32adeda3520c 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -77,12 +77,6 @@ struct xe_device_desc { u8 require_force_probe:1; u8 is_dgfx:1; - /* - * FIXME: Xe doesn't care about presence/lack of 4tile since we can - * already determine that from the graphics IP version. This flag - * should eventually move entirely into the display code's own logic. - */ - u8 has_4tile:1; u8 has_llc:1; }; @@ -265,8 +259,7 @@ static const u16 dg2_g12_ids[] = { XE_DG2_G12_IDS(NOP), 0 }; { XE_SUBPLATFORM_DG2_G11, "G11", dg2_g11_ids }, \ { XE_SUBPLATFORM_DG2_G12, "G12", dg2_g12_ids }, \ { } \ - }, \ - .has_4tile = 1 + } static const struct xe_device_desc ats_m_desc = { .graphics = &graphics_xehpg, @@ -537,7 +530,6 @@ static int xe_info_init(struct xe_device *xe, xe->info.is_dgfx = desc->is_dgfx; xe->info.graphics_name = graphics_desc->name; xe->info.media_name = media_desc ? media_desc->name : "none"; - xe->info.has_4tile = desc->has_4tile; xe->info.has_llc = desc->has_llc; xe->info.dma_mask_size = graphics_desc->dma_mask_size; -- cgit v1.2.3 From 13a3398b927b1578440740f7684bc20883a08521 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 11 Aug 2023 09:06:04 -0700 Subject: drm/xe/xe2: Update render/compute context image sizes The render and compute context are significantly smaller on Xe2 than on previous platforms. Registers: - Render: 3008 dwords -> 12032 bytes -> round to 3 pages - Compute: 1424 dwords -> 5696 bytes -> round to 2 pages We also allocate one additional page for the HWSP, so the total allocation sizes for render and compute are 4 and 3 pages respectively. Bspec: 65182, 56578, 55793 Signed-off-by: Matt Roper Signed-off-by: Lucas De Marchi Reviewed-by: Matt Atwood Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_lrc.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 7a7fdcdadf37..7c15c55964a8 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -37,9 +37,16 @@ size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class) { switch (class) { case XE_ENGINE_CLASS_RENDER: + if (GRAPHICS_VER(xe) >= 20) + return 4 * SZ_4K; + else + return 14 * SZ_4K; case XE_ENGINE_CLASS_COMPUTE: /* 14 pages since graphics_ver == 11 */ - return 14 * SZ_4K; + if (GRAPHICS_VER(xe) >= 20) + return 3 * SZ_4K; + else + return 14 * SZ_4K; default: WARN(1, "Unknown engine class: %d", class); fallthrough; -- cgit v1.2.3 From 015906fff123a3d0c6a44b69663d3041bfaca928 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 11 Aug 2023 09:06:05 -0700 Subject: drm/xe/xe2: Add GT topology readout Xe2 platforms have three DSS fuse registers for both geometry and compute. Bspec: 67171, 67537, 67401, 67536 Signed-off-by: Matt Roper Signed-off-by: Lucas De Marchi Reviewed-by: Matt Atwood Reviewed-by: Balasubramani Vivekanandan Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 3 +++ drivers/gpu/drm/xe/xe_gt_topology.c | 16 +++++++++++----- drivers/gpu/drm/xe/xe_gt_types.h | 2 +- 3 files changed, 15 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 51d59e1229be..c4458671893e 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -162,6 +162,9 @@ #define XELP_GT_GEOMETRY_DSS_ENABLE XE_REG(0x913c) #define XEHP_GT_COMPUTE_DSS_ENABLE XE_REG(0x9144) #define XEHPC_GT_COMPUTE_DSS_ENABLE_EXT XE_REG(0x9148) +#define XE2_GT_COMPUTE_DSS_2 XE_REG(0x914c) +#define XE2_GT_GEOMETRY_DSS_1 XE_REG(0x9150) +#define XE2_GT_GEOMETRY_DSS_2 XE_REG(0x9154) #define GDRST XE_REG(0x941c) #define GRDOM_GUC REG_BIT(3) diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c index d4bbd0a835c2..a8d7f272c30a 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.c +++ b/drivers/gpu/drm/xe/xe_gt_topology.c @@ -65,7 +65,10 @@ load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask) static void get_num_dss_regs(struct xe_device *xe, int *geometry_regs, int *compute_regs) { - if (GRAPHICS_VERx100(xe) == 1260) { + if (GRAPHICS_VER(xe) > 20) { + *geometry_regs = 3; + *compute_regs = 3; + } else if (GRAPHICS_VERx100(xe) == 1260) { *geometry_regs = 0; *compute_regs = 2; } else if (GRAPHICS_VERx100(xe) >= 1250) { @@ -90,15 +93,18 @@ xe_gt_topology_init(struct xe_gt *gt) * Register counts returned shouldn't exceed the number of registers * passed as parameters below. */ - drm_WARN_ON(&xe->drm, num_geometry_regs > 1); - drm_WARN_ON(&xe->drm, num_compute_regs > 2); + drm_WARN_ON(&xe->drm, num_geometry_regs > 3); + drm_WARN_ON(&xe->drm, num_compute_regs > 3); load_dss_mask(gt, gt->fuse_topo.g_dss_mask, num_geometry_regs, - XELP_GT_GEOMETRY_DSS_ENABLE); + XELP_GT_GEOMETRY_DSS_ENABLE, + XE2_GT_GEOMETRY_DSS_1, + XE2_GT_GEOMETRY_DSS_2); load_dss_mask(gt, gt->fuse_topo.c_dss_mask, num_compute_regs, XEHP_GT_COMPUTE_DSS_ENABLE, - XEHPC_GT_COMPUTE_DSS_ENABLE_EXT); + XEHPC_GT_COMPUTE_DSS_ENABLE_EXT, + XE2_GT_COMPUTE_DSS_2); load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss); xe_gt_topology_dump(gt, &p); diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 35b8c19fa8bf..48fd698ff62a 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -24,7 +24,7 @@ enum xe_gt_type { XE_GT_TYPE_MEDIA, }; -#define XE_MAX_DSS_FUSE_REGS 2 +#define XE_MAX_DSS_FUSE_REGS 3 #define XE_MAX_EU_FUSE_REGS 1 typedef unsigned long xe_dss_mask_t[BITS_TO_LONGS(32 * XE_MAX_DSS_FUSE_REGS)]; -- cgit v1.2.3 From 5c82000f54716685791f54330098dc93512d1716 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 11 Aug 2023 09:06:06 -0700 Subject: drm/xe/xe2: Add MCR register steering for primary GT Xe2 uses the same steering control register and steering semaphore register as MTL. As with recent platforms, group/instance 0,0 is sufficient to target a non-terminated instance for most classes of MCR registers; the only types of ranges that need to consider platform fusing to find a non-terminated instance are SLICE/DSS ranges and a new SQIDI_PSMI type of range. Note that the range of valid bits in XE2_NODE_ENABLE_MASK may be reduced for some Xe2 SKUs. However the lowest bits are always valid and only the lowest instance is obtained via __ffs(), so there's no need to complicate the masking with extra platform/subplatform checks. Also note that Wa_14017387313 suggests skipping MCR lock acquisition around GAM and GAMWKR registers to prevent MCR register accesses in an interrupt handler from deadlocking when the steering semaphore is already held outside the interrupt context. At this time Xe never issues MCR accesses from within an interrupt handler so the workaround is not currently needed. v2: - [0x008700-0x0087FF] range to extend up to 0x887F (Matt Attwood) - [0x00EF00-0x00F4FF] -> [0x00F000, 0xFFFF] to follow latest bspec version (Bala) Bspec: 71185 Signed-off-by: Matt Roper Signed-off-by: Lucas De Marchi Reviewed-by: Balasubramani Vivekanandan Reviewed-by: Matt Atwood Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 1 + drivers/gpu/drm/xe/xe_gt_mcr.c | 50 +++++++++++++++++++++++++++++++++++- drivers/gpu/drm/xe/xe_gt_types.h | 1 + 3 files changed, 51 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index c4458671893e..33830cd0df66 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -143,6 +143,7 @@ #define EN_32B_ACCESS REG_BIT(30) #define MIRROR_FUSE3 XE_REG(0x9118) +#define XE2_NODE_ENABLE_MASK REG_GENMASK(31, 16) #define L3BANK_PAIR_COUNT 4 #define L3BANK_MASK REG_GENMASK(3, 0) /* on Xe_HP the same fuses indicates mslices instead of L3 banks */ diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index 9eb7a6a1348d..26f69e52b120 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -181,6 +181,39 @@ static const struct xe_mmio_range dg2_implicit_steering_table[] = { {}, }; +static const struct xe_mmio_range xe2lpg_dss_steering_table[] = { + { 0x005200, 0x0052FF }, /* SLICE */ + { 0x005500, 0x007FFF }, /* SLICE */ + { 0x008140, 0x00815F }, /* SLICE (0x8140-0x814F), DSS (0x8150-0x815F) */ + { 0x0094D0, 0x00955F }, /* SLICE (0x94D0-0x951F), DSS (0x9520-0x955F) */ + { 0x009680, 0x0096FF }, /* DSS */ + { 0x00D800, 0x00D87F }, /* SLICE */ + { 0x00DC00, 0x00DCFF }, /* SLICE */ + { 0x00DE80, 0x00E8FF }, /* DSS (0xE000-0xE0FF reserved) */ + { 0x00E980, 0x00E9FF }, /* SLICE */ + { 0x013000, 0x0133FF }, /* DSS (0x13000-0x131FF), SLICE (0x13200-0x133FF) */ + {}, +}; + +static const struct xe_mmio_range xe2lpg_sqidi_psmi_steering_table[] = { + { 0x000B00, 0x000BFF }, + { 0x001000, 0x001FFF }, + {}, +}; + +static const struct xe_mmio_range xe2lpg_instance0_steering_table[] = { + { 0x004000, 0x004AFF }, /* GAM, rsvd, GAMWKR */ + { 0x008700, 0x00887F }, /* SQIDI, MEMPIPE */ + { 0x00B000, 0x00B3FF }, /* NODE, L3BANK */ + { 0x00C800, 0x00CFFF }, /* GAM */ + { 0x00D880, 0x00D8FF }, /* NODE */ + { 0x00DD00, 0x00DDFF }, /* MEMPIPE */ + { 0x00E900, 0x00E97F }, /* MEMPIPE */ + { 0x00F000, 0x00FFFF }, /* GAM, GAMWKR */ + { 0x013400, 0x0135FF }, /* MEMPIPE */ + {}, +}; + static void init_steering_l3bank(struct xe_gt *gt) { if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) { @@ -265,6 +298,16 @@ static void init_steering_oaddrm(struct xe_gt *gt) gt->steering[DSS].instance_target = 0; /* unused */ } +static void init_steering_sqidi_psmi(struct xe_gt *gt) +{ + u32 mask = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, + xe_mmio_read32(gt, MIRROR_FUSE3)); + u32 select = __ffs(mask); + + gt->steering[SQIDI_PSMI].group_target = select >> 1; + gt->steering[SQIDI_PSMI].instance_target = select & 0x1; +} + static void init_steering_inst0(struct xe_gt *gt) { gt->steering[DSS].group_target = 0; /* unused */ @@ -280,6 +323,7 @@ static const struct { [LNCF] = { "LNCF", NULL }, /* initialized by mslice init */ [DSS] = { "DSS", init_steering_dss }, [OADDRM] = { "OADDRM", init_steering_oaddrm }, + [SQIDI_PSMI] = { "SQIDI_PSMI", init_steering_sqidi_psmi }, [INSTANCE0] = { "INSTANCE 0", init_steering_inst0 }, [IMPLICIT_STEERING] = { "IMPLICIT", NULL }, }; @@ -298,7 +342,11 @@ void xe_gt_mcr_init(struct xe_gt *gt) gt->steering[OADDRM].ranges = xelpmp_oaddrm_steering_table; } else { - if (GRAPHICS_VERx100(xe) >= 1270) { + if (GRAPHICS_VER(xe) >= 20) { + gt->steering[DSS].ranges = xe2lpg_dss_steering_table; + gt->steering[SQIDI_PSMI].ranges = xe2lpg_sqidi_psmi_steering_table; + gt->steering[INSTANCE0].ranges = xe2lpg_instance0_steering_table; + } else if (GRAPHICS_VERx100(xe) >= 1270) { gt->steering[INSTANCE0].ranges = xelpg_instance0_steering_table; gt->steering[L3BANK].ranges = xelpg_l3bank_steering_table; gt->steering[DSS].ranges = xelpg_dss_steering_table; diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 48fd698ff62a..d4310be3e1e7 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -55,6 +55,7 @@ enum xe_steering_type { LNCF, DSS, OADDRM, + SQIDI_PSMI, /* * On some platforms there are multiple types of MCR registers that -- cgit v1.2.3 From 8e99b54508d6fb1a8d1c8d04128ea6634c00cb19 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 11 Aug 2023 09:06:07 -0700 Subject: drm/xe/xe2: Add MCR register steering for media GT Xe2 media has a few types of MCR registers, but all except for "GPMXMT" can safely steer to instance 0,0. GPMXMT follows the same rules that MTL's OADDRM ranges did, so it can re-use the same enum value. Bspec: 71186 Signed-off-by: Matt Roper Signed-off-by: Lucas De Marchi Reviewed-by: Matt Atwood Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_mcr.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index 26f69e52b120..e74d3c5743c8 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -214,6 +214,23 @@ static const struct xe_mmio_range xe2lpg_instance0_steering_table[] = { {}, }; +static const struct xe_mmio_range xe2lpm_gpmxmt_steering_table[] = { + { 0x388160, 0x38817F }, + { 0x389480, 0x3894CF }, + {}, +}; + +static const struct xe_mmio_range xe2lpm_instance0_steering_table[] = { + { 0x384000, 0x3847DF }, /* GAM, rsvd, GAM */ + { 0x384900, 0x384AFF }, /* GAM */ + { 0x389560, 0x3895FF }, /* MEDIAINF */ + { 0x38B600, 0x38B8FF }, /* L3BANK */ + { 0x38C800, 0x38D07F }, /* GAM, MEDIAINF */ + { 0x38F000, 0x38F0FF }, /* GAM */ + { 0x393C00, 0x393C7F }, /* MEDIAINF */ + {}, +}; + static void init_steering_l3bank(struct xe_gt *gt) { if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) { @@ -322,7 +339,7 @@ static const struct { [MSLICE] = { "MSLICE", init_steering_mslice }, [LNCF] = { "LNCF", NULL }, /* initialized by mslice init */ [DSS] = { "DSS", init_steering_dss }, - [OADDRM] = { "OADDRM", init_steering_oaddrm }, + [OADDRM] = { "OADDRM / GPMXMT", init_steering_oaddrm }, [SQIDI_PSMI] = { "SQIDI_PSMI", init_steering_sqidi_psmi }, [INSTANCE0] = { "INSTANCE 0", init_steering_inst0 }, [IMPLICIT_STEERING] = { "IMPLICIT", NULL }, @@ -340,7 +357,12 @@ void xe_gt_mcr_init(struct xe_gt *gt) if (gt->info.type == XE_GT_TYPE_MEDIA) { drm_WARN_ON(&xe->drm, MEDIA_VER(xe) < 13); - gt->steering[OADDRM].ranges = xelpmp_oaddrm_steering_table; + if (MEDIA_VER(xe) >= 20) { + gt->steering[OADDRM].ranges = xe2lpm_gpmxmt_steering_table; + gt->steering[INSTANCE0].ranges = xe2lpm_instance0_steering_table; + } else { + gt->steering[OADDRM].ranges = xelpmp_oaddrm_steering_table; + } } else { if (GRAPHICS_VER(xe) >= 20) { gt->steering[DSS].ranges = xe2lpg_dss_steering_table; -- cgit v1.2.3 From c5fa58146ee0e55ef3e8b28c1aed705c97968336 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 11 Aug 2023 09:06:08 -0700 Subject: drm/xe/xe2: Update context image layouts Engine register state layout has changed a bit on Xe2. We'll also explicitly define a BCS layout to ensure BLIT_SWCTL and BLIT_CCTL are included. Bspec: 65182, 60184, 55793 Signed-off-by: Matt Roper Signed-off-by: Lucas De Marchi Reviewed-by: Matt Atwood Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_lrc.c | 76 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 74 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 7c15c55964a8..2b4219c38359 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -424,6 +424,69 @@ static const u8 mtl_rcs_offsets[] = { END }; +#define XE2_CTX_COMMON \ + NOP(1), /* [0x00] */ \ + LRI(15, POSTED), /* [0x01] */ \ + REG16(0x244), /* [0x02] CTXT_SR_CTL */ \ + REG(0x034), /* [0x04] RING_BUFFER_HEAD */ \ + REG(0x030), /* [0x06] RING_BUFFER_TAIL */ \ + REG(0x038), /* [0x08] RING_BUFFER_START */ \ + REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */ \ + REG(0x168), /* [0x0c] BB_ADDR_UDW */ \ + REG(0x140), /* [0x0e] BB_ADDR */ \ + REG(0x110), /* [0x10] BB_STATE */ \ + REG(0x1c0), /* [0x12] BB_PER_CTX_PTR */ \ + REG(0x1c4), /* [0x14] RCS_INDIRECT_CTX */ \ + REG(0x1c8), /* [0x16] RCS_INDIRECT_CTX_OFFSET */ \ + REG(0x180), /* [0x18] CCID */ \ + REG16(0x2b4), /* [0x1a] SEMAPHORE_TOKEN */ \ + REG(0x120), /* [0x1c] PRT_BB_STATE */ \ + REG(0x124), /* [0x1e] PRT_BB_STATE_UDW */ \ + \ + NOP(1), /* [0x20] */ \ + LRI(9, POSTED), /* [0x21] */ \ + REG16(0x3a8), /* [0x22] CTX_TIMESTAMP */ \ + REG16(0x3ac), /* [0x24] CTX_TIMESTAMP_UDW */ \ + REG(0x108), /* [0x26] INDIRECT_RING_STATE */ \ + REG16(0x284), /* [0x28] dummy reg */ \ + REG16(0x280), /* [0x2a] CS_ACC_CTR_THOLD */ \ + REG16(0x27c), /* [0x2c] CS_CTX_SYS_PASID */ \ + REG16(0x278), /* [0x2e] CS_CTX_ASID */ \ + REG16(0x274), /* [0x30] PTBP_UDW */ \ + REG16(0x270) /* [0x32] PTBP_LDW */ + +static const u8 xe2_rcs_offsets[] = { + XE2_CTX_COMMON, + + NOP(2), /* [0x34] */ + LRI(2, POSTED), /* [0x36] */ + REG16(0x5a8), /* [0x37] CONTEXT_SCHEDULING_ATTRIBUTES */ + REG16(0x5ac), /* [0x39] PREEMPTION_STATUS */ + + NOP(6), /* [0x41] */ + LRI(1, 0), /* [0x47] */ + REG(0x0c8), /* [0x48] R_PWR_CLK_STATE */ + + END +}; + +static const u8 xe2_bcs_offsets[] = { + XE2_CTX_COMMON, + + NOP(4 + 8 + 1), /* [0x34] */ + LRI(2, POSTED), /* [0x41] */ + REG16(0x200), /* [0x42] BCS_SWCTRL */ + REG16(0x204), /* [0x44] BLIT_CCTL */ + + END +}; + +static const u8 xe2_xcs_offsets[] = { + XE2_CTX_COMMON, + + END +}; + #undef END #undef REG16 #undef REG @@ -433,7 +496,9 @@ static const u8 mtl_rcs_offsets[] = { static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class) { if (class == XE_ENGINE_CLASS_RENDER) { - if (GRAPHICS_VERx100(xe) >= 1270) + if (GRAPHICS_VER(xe) >= 20) + return xe2_rcs_offsets; + else if (GRAPHICS_VERx100(xe) >= 1270) return mtl_rcs_offsets; else if (GRAPHICS_VERx100(xe) >= 1255) return dg2_rcs_offsets; @@ -441,8 +506,15 @@ static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class) return xehp_rcs_offsets; else return gen12_rcs_offsets; + } else if (class == XE_ENGINE_CLASS_COPY) { + if (GRAPHICS_VER(xe) >= 20) + return xe2_bcs_offsets; + else + return gen12_xcs_offsets; } else { - if (GRAPHICS_VERx100(xe) >= 1255) + if (GRAPHICS_VER(xe) >= 20) + return xe2_xcs_offsets; + else if (GRAPHICS_VERx100(xe) >= 1255) return dg2_xcs_offsets; else return gen12_xcs_offsets; -- cgit v1.2.3 From 53497182ddf7a98fc33049d51ac3692c2f8097da Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 11 Aug 2023 09:06:09 -0700 Subject: drm/xe/xe2: Handle fused-off CCS engines On Xe2 platforms, availability of the CCS engines is reflected in the FUSE4 register. Bspec: 62483 Cc: Balasubramani Vivekanandan Signed-off-by: Matt Roper Signed-off-by: Lucas De Marchi Reviewed-by: Matt Atwood Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 1 + drivers/gpu/drm/xe/xe_hw_engine.c | 29 ++++++++++++++++++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 33830cd0df66..271ed0cdbe21 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -152,6 +152,7 @@ /* Fuse readout registers for GT */ #define XEHP_FUSE4 XE_REG(0x9114) +#define CCS_EN_MASK REG_GENMASK(19, 16) #define GT_L3_EXC_MASK REG_GENMASK(6, 4) #define GT_VEBOX_VDBOX_DISABLE XE_REG(0x9140) diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 24b5226f1433..dd673a684b70 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -550,7 +550,7 @@ static void read_copy_fuses(struct xe_gt *gt) } } -static void read_compute_fuses(struct xe_gt *gt) +static void read_compute_fuses_from_dss(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); @@ -577,6 +577,33 @@ static void read_compute_fuses(struct xe_gt *gt) } } +static void read_compute_fuses_from_reg(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + u32 ccs_mask; + + ccs_mask = xe_mmio_read32(gt, XEHP_FUSE4); + ccs_mask = REG_FIELD_GET(CCS_EN_MASK, ccs_mask); + + for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) { + if (!(gt->info.engine_mask & BIT(i))) + continue; + + if ((ccs_mask & BIT(j)) == 0) { + gt->info.engine_mask &= ~BIT(i); + drm_info(&xe->drm, "ccs%u fused off\n", j); + } + } +} + +static void read_compute_fuses(struct xe_gt *gt) +{ + if (GRAPHICS_VER(gt_to_xe(gt)) >= 20) + read_compute_fuses_from_reg(gt); + else + read_compute_fuses_from_dss(gt); +} + int xe_hw_engines_init_early(struct xe_gt *gt) { int i; -- cgit v1.2.3 From be6dd3c8e884f7b1a9f76c3ad1efd068b981f7d5 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 11 Aug 2023 09:06:10 -0700 Subject: drm/xe/xe2: AuxCCS is no longer used Starting with Xe2, all platforms (including igpu platforms) use FlatCCS compression rather than AuxCCS. Similar to PVC, any future platforms that don't support FlatCCS should not attempt to fall back to AuxCCS programming. Signed-off-by: Matt Roper Signed-off-by: Lucas De Marchi Reviewed-by: Balasubramani Vivekanandan Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ring_ops.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index 36058600e231..9e23293ec4d3 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -243,9 +243,11 @@ static bool has_aux_ccs(struct xe_device *xe) { /* * PVC is a special case that has no compression of either type - * (FlatCCS or AuxCCS). + * (FlatCCS or AuxCCS). Also, AuxCCS is no longer used from Xe2 + * onward, so any future platforms with no FlatCCS will not have + * AuxCCS either. */ - if (xe->info.platform == XE_PVC) + if (GRAPHICS_VER(xe) >= 20 || xe->info.platform == XE_PVC) return false; return !xe->info.has_flat_ccs; -- cgit v1.2.3 From 2985bedc1c59441f4b0d4724a1c2211e0b6b4a19 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 11 Aug 2023 09:06:11 -0700 Subject: drm/xe/xe2: Define Xe2_LPG IP features Define a common set of Xe2 graphics feature flags and definitions that will be used for all platforms in this family. Several of the feature flags are inherited unchanged from Xe_HP and/or Xe_HPC platforms: - dma_mask_size remains 46 (Bspec 70817) - supports_usm=1 (Bspec 59651) - has_flatccs=1 (Bspec 58797) - has_asid=1 (Bspec 59654, 59265, 60288) - has_range_tlb_invalidate=1 (Bspec 71126) However some of them still need proper implementation in the driver to be used, so they are disabled. Notable Xe2-specific changes: - All Xe2 platforms use a five-level page table, regardless of the virtual address space for the platform. (Bspec 59505) The graphics engine mask represents the Xe2 architecture engines (Bspec 60149), but individual platforms may have a reduced set of usable engines, as reflected by their fusing. Cc: Balasubramani Vivekanandan Signed-off-by: Matt Roper Signed-off-by: Lucas De Marchi Reviewed-by: Balasubramani Vivekanandan Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pci.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 32adeda3520c..46145340ae90 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -164,6 +164,24 @@ static const struct xe_graphics_desc graphics_xelpg = { .has_flat_ccs = 0, }; +#define XE2_GFX_FEATURES \ + .dma_mask_size = 46, \ + .has_asid = 1, \ + .has_flat_ccs = 0 /* FIXME: implementation missing */, \ + .has_range_tlb_invalidation = 1, \ + .supports_usm = 0 /* FIXME: implementation missing */, \ + .vm_max_level = 4, \ + .hw_engine_mask = \ + BIT(XE_HW_ENGINE_RCS0) | \ + BIT(XE_HW_ENGINE_BCS8) | BIT(XE_HW_ENGINE_BCS0) | \ + GENMASK(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0) + +static const struct xe_graphics_desc graphics_xe2 = { + .name = "Xe2_LPG", + + XE2_GFX_FEATURES, +}; + static const struct xe_media_desc media_xem = { .name = "Xe_M", .ver = 12, @@ -296,6 +314,7 @@ static const struct xe_device_desc mtl_desc = { static struct gmdid_map graphics_ip_map[] = { { 1270, &graphics_xelpg }, { 1271, &graphics_xelpg }, + { 2004, &graphics_xe2 }, }; /* Map of GMD_ID values to media IP */ -- cgit v1.2.3 From 595e4a3aade359f8e3bc84bd30746cb5826c4e67 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 11 Aug 2023 09:06:12 -0700 Subject: drm/xe/xe2: Define Xe2_LPM IP features Xe2_LPM media is represented by GMD_ID value 20.00. It provides 1 VD + 1 VE + 1 SFC. Bspec: 70821, 70819 Signed-off-by: Matt Roper Signed-off-by: Lucas De Marchi Reviewed-by: Balasubramani Vivekanandan Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pci.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 46145340ae90..467b0cef6d21 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -209,6 +209,12 @@ static const struct xe_media_desc media_xelpmp = { BIT(XE_HW_ENGINE_VECS0), /* TODO: add GSC0 */ }; +static const struct xe_media_desc media_xe2 = { + .name = "Xe2_LPM", + .hw_engine_mask = + BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VECS0), /* TODO: GSC0 */ +}; + static const struct xe_device_desc tgl_desc = { .graphics = &graphics_xelp, .media = &media_xem, @@ -320,6 +326,7 @@ static struct gmdid_map graphics_ip_map[] = { /* Map of GMD_ID values to media IP */ static struct gmdid_map media_ip_map[] = { { 1300, &media_xelpmp }, + { 2000, &media_xe2 }, }; #define INTEL_VGA_DEVICE(id, info) { \ -- cgit v1.2.3 From e9bb0891e69055cdfc1053f297b1b8b033372975 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 11 Aug 2023 09:06:13 -0700 Subject: drm/xe/xe2: Track VA bits independently of max page table level Starting with Xe2, a 5-level page table is always used, regardless of the actual virtual address range supported by the platform. The two values need to be tracked separately in the device descriptor since Xe2 platforms only have a 48 bit virtual address range. Bspec: 59505, 65637, 70817 Cc: Balasubramani Vivekanandan Signed-off-by: Matt Roper Signed-off-by: Lucas De Marchi Reviewed-by: Balasubramani Vivekanandan Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device_types.h | 2 ++ drivers/gpu/drm/xe/xe_pci.c | 6 ++++++ drivers/gpu/drm/xe/xe_pci_types.h | 1 + drivers/gpu/drm/xe/xe_query.c | 3 +-- drivers/gpu/drm/xe/xe_vm.c | 2 +- 5 files changed, 11 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 6e852809d3ac..552e8a343d8f 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -210,6 +210,8 @@ struct xe_device { u8 gt_count; /** @vm_max_level: Max VM level */ u8 vm_max_level; + /** @va_bits: Maximum bits of a virtual address */ + u8 va_bits; /** @is_dgfx: is discrete device */ u8 is_dgfx:1; diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 467b0cef6d21..8512cd451887 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -94,6 +94,7 @@ static const struct xe_graphics_desc graphics_xelp = { .hw_engine_mask = BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0), .dma_mask_size = 39, + .va_bits = 48, .vm_max_level = 3, }; @@ -105,6 +106,7 @@ static const struct xe_graphics_desc graphics_xelpp = { .hw_engine_mask = BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0), .dma_mask_size = 39, + .va_bits = 48, .vm_max_level = 3, }; @@ -112,6 +114,7 @@ static const struct xe_graphics_desc graphics_xelpp = { .has_range_tlb_invalidation = true, \ .has_flat_ccs = true, \ .dma_mask_size = 46, \ + .va_bits = 48, \ .vm_max_level = 3 static const struct xe_graphics_desc graphics_xehpg = { @@ -145,6 +148,7 @@ static const struct xe_graphics_desc graphics_xehpc = { XE_HP_FEATURES, .dma_mask_size = 52, .max_remote_tiles = 1, + .va_bits = 57, .vm_max_level = 4, .vram_flags = XE_VRAM_FLAGS_NEED64K, @@ -170,6 +174,7 @@ static const struct xe_graphics_desc graphics_xelpg = { .has_flat_ccs = 0 /* FIXME: implementation missing */, \ .has_range_tlb_invalidation = 1, \ .supports_usm = 0 /* FIXME: implementation missing */, \ + .va_bits = 48, \ .vm_max_level = 4, \ .hw_engine_mask = \ BIT(XE_HW_ENGINE_RCS0) | \ @@ -560,6 +565,7 @@ static int xe_info_init(struct xe_device *xe, xe->info.dma_mask_size = graphics_desc->dma_mask_size; xe->info.vram_flags = graphics_desc->vram_flags; + xe->info.va_bits = graphics_desc->va_bits; xe->info.vm_max_level = graphics_desc->vm_max_level; xe->info.supports_usm = graphics_desc->supports_usm; xe->info.has_asid = graphics_desc->has_asid; diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h index ba31b933eb8e..df6ddbc52d7f 100644 --- a/drivers/gpu/drm/xe/xe_pci_types.h +++ b/drivers/gpu/drm/xe/xe_pci_types.h @@ -14,6 +14,7 @@ struct xe_graphics_desc { u8 rel; u8 dma_mask_size; /* available DMA address bits */ + u8 va_bits; u8 vm_max_level; u8 vram_flags; diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 7ea235c71385..1db77a7c9039 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -197,8 +197,7 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query) XE_QUERY_CONFIG_FLAGS_HAS_VRAM; config->info[XE_QUERY_CONFIG_MIN_ALIGNEMENT] = xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K; - config->info[XE_QUERY_CONFIG_VA_BITS] = 12 + - (9 * (xe->info.vm_max_level + 1)); + config->info[XE_QUERY_CONFIG_VA_BITS] = xe->info.va_bits; config->info[XE_QUERY_CONFIG_GT_COUNT] = xe->info.gt_count; config->info[XE_QUERY_CONFIG_MEM_REGION_COUNT] = hweight_long(xe->info.mem_region_mask); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index d683418b817d..a774f9632dda 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1221,7 +1221,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) vm->xe = xe; - vm->size = 1ull << xe_pt_shift(xe->info.vm_max_level + 1); + vm->size = 1ull << xe->info.va_bits; vm->flags = flags; -- cgit v1.2.3 From e4751ab5d2fef45d666e64a8766e08e9d60eccfd Mon Sep 17 00:00:00 2001 From: Balasubramani Vivekanandan Date: Fri, 11 Aug 2023 09:06:14 -0700 Subject: drm/xe/xe2: Add MOCS table Additional minor change to remove L4_2_RESERVED, which will never be required. v2: Make L3/L4 names consistent for GLOB_MOCS defines (Matt Roper) Bspec: 71582 Cc: Matt Roper Signed-off-by: Balasubramani Vivekanandan Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_mocs.c | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index c9653978fc9f..c120090ef9b4 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -62,13 +62,15 @@ struct xe_mocs_info { #define L3_LKUP(value) ((value) << 7) /* Defines for the tables (GLOB_MOCS_0 - GLOB_MOCS_16) */ -#define _L4_CACHEABILITY REG_GENMASK(3, 2) -#define IG_PAT REG_BIT(8) +#define IG_PAT REG_BIT(8) +#define L3_CACHE_POLICY_MASK REG_GENMASK(5, 4) +#define L4_CACHE_POLICY_MASK REG_GENMASK(3, 2) /* Helper defines */ #define GEN9_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */ #define PVC_NUM_MOCS_ENTRIES 3 #define MTL_NUM_MOCS_ENTRIES 16 +#define XE2_NUM_MOCS_ENTRIES 16 /* (e)LLC caching options */ /* @@ -93,10 +95,14 @@ struct xe_mocs_info { #define L3_3_WB _L3_CACHEABILITY(3) /* L4 caching options */ -#define L4_0_WB REG_FIELD_PREP(_L4_CACHEABILITY, 0) -#define L4_1_WT REG_FIELD_PREP(_L4_CACHEABILITY, 1) -#define L4_2_RESERVED REG_FIELD_PREP(_L4_CACHEABILITY, 2) -#define L4_3_UC REG_FIELD_PREP(_L4_CACHEABILITY, 3) +#define L4_0_WB REG_FIELD_PREP(L4_CACHE_POLICY_MASK, 0) +#define L4_1_WT REG_FIELD_PREP(L4_CACHE_POLICY_MASK, 1) +#define L4_3_UC REG_FIELD_PREP(L4_CACHE_POLICY_MASK, 3) + +#define XE2_L3_0_WB REG_FIELD_PREP(L3_CACHE_POLICY_MASK, 0) +/* XD: WB Transient Display */ +#define XE2_L3_1_XD REG_FIELD_PREP(L3_CACHE_POLICY_MASK, 1) +#define XE2_L3_3_UC REG_FIELD_PREP(L3_CACHE_POLICY_MASK, 3) #define MOCS_ENTRY(__idx, __control_value, __l3cc_value) \ [__idx] = { \ @@ -370,6 +376,17 @@ static const struct xe_mocs_entry mtl_mocs_desc[] = { L3_GLBGO(1) | L3_1_UC), }; +static const struct xe_mocs_entry xe2_mocs_table[] = { + /* Defer to PAT */ + MOCS_ENTRY(0, XE2_L3_0_WB | L4_0_WB, 0), + /* Cached L3 + L4 */ + MOCS_ENTRY(1, IG_PAT | XE2_L3_0_WB | L4_0_WB, 0), + /* Uncached L3, Cached L4 */ + MOCS_ENTRY(2, IG_PAT | XE2_L3_3_UC | L4_0_WB, 0), + /* Uncached L3 + L4 */ + MOCS_ENTRY(3, IG_PAT | XE2_L3_3_UC | L4_3_UC, 0), +}; + static unsigned int get_mocs_settings(struct xe_device *xe, struct xe_mocs_info *info) { -- cgit v1.2.3 From 0993b22f93f867b4ed1c1fc3f077fa7e736353d6 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 11 Aug 2023 09:06:15 -0700 Subject: drm/xe/xe2: Program GuC's MOCS on Xe2 and beyond As with PVC, Xe2 platforms require that the index of an uncached MOCS entry be programmed into the GUC_SHIM_CONTROL register. This will likely be needed on future platforms as well. Xe2 also extends the size of the MOCS index register field from two bits to four bits. Since these extra bits were unused on PVC, it should be safe to just increase the size of the mask. Bspec: 60592 Cc: Haridhar Kalvala Signed-off-by: Matt Roper Signed-off-by: Lucas De Marchi Reviewed-by: Balasubramani Vivekanandan Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_guc_regs.h | 2 +- drivers/gpu/drm/xe/xe_guc.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_guc_regs.h b/drivers/gpu/drm/xe/regs/xe_guc_regs.h index fcb747201bc1..ba375fc51a87 100644 --- a/drivers/gpu/drm/xe/regs/xe_guc_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_guc_regs.h @@ -45,7 +45,7 @@ #define GUC_WOPCM_SIZE_LOCKED REG_BIT(0) #define GUC_SHIM_CONTROL XE_REG(0xc064) -#define GUC_MOCS_INDEX_MASK REG_GENMASK(25, 24) +#define GUC_MOCS_INDEX_MASK REG_GENMASK(27, 24) #define GUC_SHIM_WC_ENABLE REG_BIT(21) #define GUC_ENABLE_MIA_CLOCK_GATING REG_BIT(15) #define GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA REG_BIT(10) diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 2493c5859948..e102637c0695 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -326,7 +326,7 @@ static void guc_prepare_xfer(struct xe_guc *guc) shim_flags |= GUC_DISABLE_SRAM_INIT_TO_ZEROES | GUC_ENABLE_MIA_CACHING; - if (xe->info.platform == XE_PVC) + if (GRAPHICS_VER(xe) >= 20 || xe->info.platform == XE_PVC) shim_flags |= REG_FIELD_PREP(GUC_MOCS_INDEX_MASK, gt->mocs.uc_index); /* Must program this register before loading the ucode with DMA */ -- cgit v1.2.3 From 3330361543fca2a60b71ebf02cd5e56bb417b159 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 11 Aug 2023 09:06:16 -0700 Subject: drm/xe/lnl: Add LNL platform definition LNL is an integrated GPU based on the Xe2 architecture. Bspec: 70821 Signed-off-by: Matt Roper Signed-off-by: Lucas De Marchi Reviewed-by: Balasubramani Vivekanandan Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pci.c | 6 ++++++ drivers/gpu/drm/xe/xe_platform_types.h | 1 + include/drm/xe_pciids.h | 5 +++++ 3 files changed, 12 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 8512cd451887..6c2c6723b1b2 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -319,6 +319,11 @@ static const struct xe_device_desc mtl_desc = { PLATFORM(XE_METEORLAKE), }; +static const struct xe_device_desc lnl_desc = { + PLATFORM(XE_LUNARLAKE), + .require_force_probe = true, +}; + #undef PLATFORM /* Map of GMD_ID values to graphics IP */ @@ -356,6 +361,7 @@ static const struct pci_device_id pciidlist[] = { XE_ATS_M_IDS(INTEL_VGA_DEVICE, &ats_m_desc), XE_DG2_IDS(INTEL_VGA_DEVICE, &dg2_desc), XE_MTL_IDS(INTEL_VGA_DEVICE, &mtl_desc), + XE_LNL_IDS(INTEL_VGA_DEVICE, &lnl_desc), { } }; MODULE_DEVICE_TABLE(pci, pciidlist); diff --git a/drivers/gpu/drm/xe/xe_platform_types.h b/drivers/gpu/drm/xe/xe_platform_types.h index abbb8a1f29a8..e378a64a0f86 100644 --- a/drivers/gpu/drm/xe/xe_platform_types.h +++ b/drivers/gpu/drm/xe/xe_platform_types.h @@ -21,6 +21,7 @@ enum xe_platform { XE_DG2, XE_PVC, XE_METEORLAKE, + XE_LUNARLAKE, }; enum xe_subplatform { diff --git a/include/drm/xe_pciids.h b/include/drm/xe_pciids.h index 0d0cf80eb0ba..f6ac6d9772ee 100644 --- a/include/drm/xe_pciids.h +++ b/include/drm/xe_pciids.h @@ -191,4 +191,9 @@ XE_MTL_S_IDS(MACRO__, ## __VA_ARGS__), \ XE_ARL_IDS(MACRO__, ## __VA_ARGS__) +#define XE_LNL_IDS(MACRO__, ...) \ + MACRO__(0x6420, ## __VA_ARGS__), \ + MACRO__(0x64A0, ## __VA_ARGS__), \ + MACRO__(0x64B0, ## __VA_ARGS__) + #endif -- cgit v1.2.3 From 943c01b72f3e9332d7a52ecffa35ef7152e18c5c Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 11 Aug 2023 09:06:17 -0700 Subject: drm/xe/lnl: Add GuC firmware definition Define the GuC firmware to load on the platform. Cc: Balasubramani Vivekanandan Signed-off-by: Matt Roper Signed-off-by: Lucas De Marchi Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_uc_fw.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 4b04f6e5388d..1802b280cd8c 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -100,6 +100,7 @@ struct fw_blobs_by_type { }; #define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver) \ + fw_def(LUNARLAKE, mmp_ver(xe, guc, lnl, 70, 6, 8)) \ fw_def(METEORLAKE, mmp_ver(i915, guc, mtl, 70, 6, 4)) \ fw_def(PVC, mmp_ver(xe, guc, pvc, 70, 6, 4)) \ fw_def(DG2, major_ver(i915, guc, dg2, 70, 5)) \ -- cgit v1.2.3 From 770576f1e1c001ba069e552e08893d56a64015c4 Mon Sep 17 00:00:00 2001 From: Balasubramani Vivekanandan Date: Fri, 11 Aug 2023 09:06:18 -0700 Subject: drm/xe/lnl: Hook up MOCS table LNL uses the Xe2 MOCS table introduced in an earlier patch. Bspec: 71582 Cc: Matt Roper Signed-off-by: Balasubramani Vivekanandan Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_mocs.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index c120090ef9b4..75d025c54eb8 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -395,6 +395,14 @@ static unsigned int get_mocs_settings(struct xe_device *xe, memset(info, 0, sizeof(struct xe_mocs_info)); switch (xe->info.platform) { + case XE_LUNARLAKE: + info->size = ARRAY_SIZE(xe2_mocs_table); + info->table = xe2_mocs_table; + info->n_entries = XE2_NUM_MOCS_ENTRIES; + info->uc_index = 3; + info->wb_index = 1; + info->unused_entries_index = 1; + break; case XE_PVC: info->size = ARRAY_SIZE(pvc_mocs_desc); info->table = pvc_mocs_desc; -- cgit v1.2.3 From 07431945d8ae805746bbd01b052eeefb919911db Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 22 Aug 2023 17:33:13 -0700 Subject: drm/xe: Avoid 64-bit register reads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Intel hardware officially only supports GTTMMADR register accesses of 32-bits or less (although 64-bit accesses to device memory and PTEs in the GSM are fine). Even though we do usually seem to get back reasonable values when performing readq() operations on registers in BAR0, we shouldn't rely on this violation of the spec working consistently. It's likely that even when we do get proper register values back the hardware is internally satisfying the request via a non-atomic sequence of two 32-bit reads, which can be problematic for timestamps and counters if rollover of the lower bits is not considered. Replace xe_mmio_read64() with xe_mmio_read64_2x32() that implements 64-bit register reads as two 32-bit reads and attempts to ensure that the upper dword has stabilized to avoid problematic rollovers for counter and timestamp registers. v2: - Move function from xe_mmio.h to xe_mmio.c. (Lucas) - Convert comment to kerneldoc and note that it shouldn't be used on registers where reads may trigger side effects. (Lucas) Bspec: 60027 Reviewed-by: Lucas De Marchi Reviewed-by: José Roberto de Souza Link: https://lore.kernel.org/r/20230823003312.1356779-3-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_mmio.c | 56 ++++++++++++++++++++++++++++++++-- drivers/gpu/drm/xe/xe_mmio.h | 12 ++------ drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 6 ++-- 3 files changed, 58 insertions(+), 16 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index bb6823db14d4..c2ec52eefb2e 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -228,7 +228,7 @@ int xe_mmio_tile_vram_size(struct xe_tile *tile, u64 *vram_size, u64 *tile_size, reg = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR); offset = (u64)REG_FIELD_GET(GENMASK(31, 8), reg) * SZ_64K; } else { - offset = xe_mmio_read64(gt, GSMBASE); + offset = xe_mmio_read64_2x32(gt, GSMBASE); } /* remove the tile offset so we have just the available size */ @@ -326,7 +326,7 @@ static void xe_mmio_probe_tiles(struct xe_device *xe) if (xe->info.tile_count == 1) return; - mtcfg = xe_mmio_read64(gt, XEHP_MTCFG_ADDR); + mtcfg = xe_mmio_read64_2x32(gt, XEHP_MTCFG_ADDR); adj_tile_count = xe->info.tile_count = REG_FIELD_GET(TILE_COUNT, mtcfg) + 1; @@ -509,7 +509,7 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data, args->value = xe_mmio_read32(gt, reg); break; case DRM_XE_MMIO_64BIT: - args->value = xe_mmio_read64(gt, reg); + args->value = xe_mmio_read64_2x32(gt, reg); break; default: drm_dbg(&xe->drm, "Invalid MMIO bit size"); @@ -526,3 +526,53 @@ exit: return ret; } + +/** + * xe_mmio_read64_2x32() - Read a 64-bit register as two 32-bit reads + * @gt: MMIO target GT + * @reg: register to read value from + * + * Although Intel GPUs have some 64-bit registers, the hardware officially + * only supports GTTMMADR register reads of 32 bits or smaller. Even if + * a readq operation may return a reasonable value, that violation of the + * spec shouldn't be relied upon and all 64-bit register reads should be + * performed as two 32-bit reads of the upper and lower dwords. + * + * When reading registers that may be changing (such as + * counters), a rollover of the lower dword between the two 32-bit reads + * can be problematic. This function attempts to ensure the upper dword has + * stabilized before returning the 64-bit value. + * + * Note that because this function may re-read the register multiple times + * while waiting for the value to stabilize it should not be used to read + * any registers where read operations have side effects. + * + * Returns the value of the 64-bit register. + */ +u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg) +{ + struct xe_reg reg_udw = { .addr = reg.addr + 0x4 }; + u32 ldw, udw, oldudw, retries; + + if (reg.addr < gt->mmio.adj_limit) { + reg.addr += gt->mmio.adj_offset; + reg_udw.addr += gt->mmio.adj_offset; + } + + oldudw = xe_mmio_read32(gt, reg_udw); + for (retries = 5; retries; --retries) { + ldw = xe_mmio_read32(gt, reg); + udw = xe_mmio_read32(gt, reg_udw); + + if (udw == oldudw) + break; + + oldudw = udw; + } + + xe_gt_WARN(gt, retries == 0, + "64-bit read of %#x did not stabilize\n", reg.addr); + + return (u64)udw << 32 | ldw; +} + diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h index d24badca8677..f72c34c7d1d0 100644 --- a/drivers/gpu/drm/xe/xe_mmio.h +++ b/drivers/gpu/drm/xe/xe_mmio.h @@ -11,6 +11,7 @@ #include "regs/xe_reg_defs.h" #include "xe_device_types.h" +#include "xe_gt_printk.h" #include "xe_gt_types.h" struct drm_device; @@ -85,16 +86,6 @@ static inline void xe_mmio_write64(struct xe_gt *gt, writeq(val, tile->mmio.regs + reg.addr); } -static inline u64 xe_mmio_read64(struct xe_gt *gt, struct xe_reg reg) -{ - struct xe_tile *tile = gt_to_tile(gt); - - if (reg.addr < gt->mmio.adj_limit) - reg.addr += gt->mmio.adj_offset; - - return readq(tile->mmio.regs + reg.addr); -} - static inline int xe_mmio_write32_and_verify(struct xe_gt *gt, struct xe_reg reg, u32 val, u32 mask, u32 eval) @@ -155,5 +146,6 @@ static inline bool xe_mmio_in_range(const struct xe_mmio_range *range, int xe_mmio_probe_vram(struct xe_device *xe); int xe_mmio_tile_vram_size(struct xe_tile *tile, u64 *vram_size, u64 *tile_size, u64 *tile_base); +u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg); #endif diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index be0a25e23929..6ba6b1b7f34b 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -67,7 +67,7 @@ static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) } /* Use DSM base address instead for stolen memory */ - mgr->stolen_base = (xe_mmio_read64(mmio, DSMBASE) & BDSM_MASK) - tile_offset; + mgr->stolen_base = (xe_mmio_read64_2x32(mmio, DSMBASE) & BDSM_MASK) - tile_offset; if (drm_WARN_ON(&xe->drm, tile_size < mgr->stolen_base)) return 0; @@ -126,8 +126,8 @@ static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr /* Carve out the top of DSM as it contains the reserved WOPCM region */ wopcm_size = REG_FIELD_GET64(WOPCM_SIZE_MASK, - xe_mmio_read64(xe_root_mmio_gt(xe), - STOLEN_RESERVED)); + xe_mmio_read64_2x32(xe_root_mmio_gt(xe), + STOLEN_RESERVED)); stolen_size -= (1U << wopcm_size) * SZ_1M; if (drm_WARN_ON(&xe->drm, stolen_size + SZ_8M > pci_resource_len(pdev, 2))) -- cgit v1.2.3 From 486b2ef2768222bb4210709ccf5443c3e381346e Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 22 Aug 2023 17:33:14 -0700 Subject: drm/xe: Drop xe_mmio_write64() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The only possible 64-bit register writes in the driver come from the highly questionable MMIO ioctl. That ioctl's register write support only operates for userspace running as root and cannot be used by any real userspace; it exists solely to support the "xe_reg" debug tool in IGT. Since the spec indicates that hardware does not officially support 64-bit register accesses, there's no reason to allow such 64-bit writes, even for debugging. Bspec: 60027 Reviewed-by: Lucas De Marchi Reviewed-by: José Roberto de Souza Link: https://lore.kernel.org/r/20230823003312.1356779-4-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_mmio.c | 3 --- drivers/gpu/drm/xe/xe_mmio.h | 11 ----------- 2 files changed, 14 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index c2ec52eefb2e..3ccc0af4430b 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -490,9 +490,6 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data, } xe_mmio_write32(gt, reg, args->value); break; - case DRM_XE_MMIO_64BIT: - xe_mmio_write64(gt, reg, args->value); - break; default: drm_dbg(&xe->drm, "Invalid MMIO bit size"); fallthrough; diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h index f72c34c7d1d0..cd9fe08ccf4a 100644 --- a/drivers/gpu/drm/xe/xe_mmio.h +++ b/drivers/gpu/drm/xe/xe_mmio.h @@ -75,17 +75,6 @@ static inline u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr, return old; } -static inline void xe_mmio_write64(struct xe_gt *gt, - struct xe_reg reg, u64 val) -{ - struct xe_tile *tile = gt_to_tile(gt); - - if (reg.addr < gt->mmio.adj_limit) - reg.addr += gt->mmio.adj_offset; - - writeq(val, tile->mmio.regs + reg.addr); -} - static inline int xe_mmio_write32_and_verify(struct xe_gt *gt, struct xe_reg reg, u32 val, u32 mask, u32 eval) -- cgit v1.2.3 From 1c66c0f391da32534cf143e6a0f6391776aa9bf8 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Tue, 22 Aug 2023 10:33:32 -0700 Subject: drm/xe: fix submissions without vm Kernel queues can submit privileged batches directly in GGTT, so they don't always need a vm. The submission front-end already supports creating and submitting jobs without a vm, but some parts of the back-end assume the vm is always there. Fix this by handling a lack of vm in the back-end as well. v2: s/XE_BUG_ON/XE_WARN_ON, s/engine/exec_queue Signed-off-by: Daniele Ceraolo Spurio Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20230822173334.1664332-2-daniele.ceraolospurio@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_submit.c | 2 +- drivers/gpu/drm/xe/xe_ring_ops.c | 8 ++++---- drivers/gpu/drm/xe/xe_sched_job.c | 3 +++ 3 files changed, 8 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 55c7b13d15ec..87f2972b7c20 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1136,7 +1136,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) ge->q = q; init_waitqueue_head(&ge->suspend_wait); - timeout = xe_vm_no_dma_fences(q->vm) ? MAX_SCHEDULE_TIMEOUT : + timeout = (q->vm && xe_vm_no_dma_fences(q->vm)) ? MAX_SCHEDULE_TIMEOUT : q->hwe->eclass->sched_props.job_timeout_ms; err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, NULL, q->lrc[0].ring.size / MAX_JOB_SIZE_BYTES, diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index 9e23293ec4d3..2b4127ea1eab 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -213,7 +213,7 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc u32 ppgtt_flag = get_ppgtt_flag(job); struct xe_vm *vm = job->q->vm; - if (vm->batch_invalidate_tlb) { + if (vm && vm->batch_invalidate_tlb) { dw[i++] = preparser_disable(true); i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), seqno, true, dw, i); @@ -273,13 +273,13 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, i = emit_aux_table_inv(gt, VE0_AUX_INV, dw, i); } - if (vm->batch_invalidate_tlb) + if (vm && vm->batch_invalidate_tlb) i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), seqno, true, dw, i); dw[i++] = preparser_disable(false); - if (!vm->batch_invalidate_tlb) + if (!vm || !vm->batch_invalidate_tlb) i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), seqno, dw, i); @@ -318,7 +318,7 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, mask_flags = PIPE_CONTROL_3D_ENGINE_FLAGS; /* See __xe_pt_bind_vma() for a discussion on TLB invalidations. */ - i = emit_pipe_invalidate(mask_flags, vm->batch_invalidate_tlb, dw, i); + i = emit_pipe_invalidate(mask_flags, vm && vm->batch_invalidate_tlb, dw, i); /* hsdes: 1809175790 */ if (has_aux_ccs(xe)) diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c index de2851d24c96..0479d059dc77 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.c +++ b/drivers/gpu/drm/xe/xe_sched_job.c @@ -87,6 +87,9 @@ struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q, int i, j; u32 width; + /* only a kernel context can submit a vm-less job */ + XE_WARN_ON(!q->vm && !(q->flags & EXEC_QUEUE_FLAG_KERNEL)); + /* Migration and kernel engines have their own locking */ if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM | EXEC_QUEUE_FLAG_WA))) { -- cgit v1.2.3 From 923e42381745f55ba27a8805a055b51139af6830 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Tue, 22 Aug 2023 10:33:33 -0700 Subject: drm/xe: split kernel vs permanent engine flags If an engine is only destroyed on driver unload, we can skip its clean-up steps with the GuC because the GuC is going to be tuned off as well, so it doesn't matter if we're in sync with it or not. Currently, we apply this optimization to all engines marked as kernel, but this stops us to supporting kernel engines that don't stick around until unload. To remove this limitation, add a separate flag to indicate if the engine is expected to only be destryed on driver unload and use that to trigger the optimzation. While at it, add a small comment to explain what each engine flag represents. v2: s/XE_BUG_ON/XE_WARN_ON, s/ENGINE/EXEC_QUEUE v3: rebased Signed-off-by: Daniele Ceraolo Spurio Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20230822173334.1664332-3-daniele.ceraolospurio@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_exec_queue.c | 3 +++ drivers/gpu/drm/xe/xe_exec_queue_types.h | 24 ++++++++++++++++-------- drivers/gpu/drm/xe/xe_guc_submit.c | 6 +++--- drivers/gpu/drm/xe/xe_migrate.c | 7 +++++-- 4 files changed, 27 insertions(+), 13 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index f6619688f92f..867465b0c57b 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -41,6 +41,9 @@ static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe, int err; int i; + /* only kernel queues can be permanent */ + XE_WARN_ON((flags & EXEC_QUEUE_FLAG_PERMANENT) && !(flags & EXEC_QUEUE_FLAG_KERNEL)); + q = kzalloc(sizeof(*q) + sizeof(struct xe_lrc) * width, GFP_KERNEL); if (!q) return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 4506289b8b7b..1f0051a91dae 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -65,14 +65,22 @@ struct xe_exec_queue { /** @fence_irq: fence IRQ used to signal job completion */ struct xe_hw_fence_irq *fence_irq; -#define EXEC_QUEUE_FLAG_BANNED BIT(0) -#define EXEC_QUEUE_FLAG_KERNEL BIT(1) -#define EXEC_QUEUE_FLAG_PERSISTENT BIT(2) -#define EXEC_QUEUE_FLAG_COMPUTE_MODE BIT(3) -/* Caller needs to hold rpm ref when creating engine with EXEC_QUEUE_FLAG_VM */ -#define EXEC_QUEUE_FLAG_VM BIT(4) -#define EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD BIT(5) -#define EXEC_QUEUE_FLAG_WA BIT(6) +/* queue no longer allowed to submit */ +#define EXEC_QUEUE_FLAG_BANNED BIT(0) +/* queue used for kernel submission only */ +#define EXEC_QUEUE_FLAG_KERNEL BIT(1) +/* kernel engine only destroyed at driver unload */ +#define EXEC_QUEUE_FLAG_PERMANENT BIT(2) +/* queue keeps running pending jobs after destroy ioctl */ +#define EXEC_QUEUE_FLAG_PERSISTENT BIT(3) +/* queue for use with compute VMs */ +#define EXEC_QUEUE_FLAG_COMPUTE_MODE BIT(4) +/* for VM jobs. Caller needs to hold rpm ref when creating queue with this flag */ +#define EXEC_QUEUE_FLAG_VM BIT(5) +/* child of VM queue for multi-tile VM jobs */ +#define EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD BIT(6) +/* queue used for WA setup */ +#define EXEC_QUEUE_FLAG_WA BIT(7) /** * @flags: flags for this exec queue, should statically setup aside from ban diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 87f2972b7c20..832e79fb0a02 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -965,7 +965,7 @@ static void guc_exec_queue_fini_async(struct xe_exec_queue *q) INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async); /* We must block on kernel engines so slabs are empty on driver unload */ - if (q->flags & EXEC_QUEUE_FLAG_KERNEL) + if (q->flags & EXEC_QUEUE_FLAG_PERMANENT) __guc_exec_queue_fini_async(&q->guc->fini_async); else queue_work(system_wq, &q->guc->fini_async); @@ -988,7 +988,7 @@ static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) struct xe_exec_queue *q = msg->private_data; struct xe_guc *guc = exec_queue_to_guc(q); - XE_WARN_ON(q->flags & EXEC_QUEUE_FLAG_KERNEL); + XE_WARN_ON(q->flags & EXEC_QUEUE_FLAG_PERMANENT); trace_xe_exec_queue_cleanup_entity(q); if (exec_queue_registered(q)) @@ -1208,7 +1208,7 @@ static void guc_exec_queue_fini(struct xe_exec_queue *q) { struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; - if (!(q->flags & EXEC_QUEUE_FLAG_KERNEL)) + if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT)) guc_exec_queue_add_msg(q, msg, CLEANUP); else __guc_exec_queue_fini(exec_queue_to_guc(q), q); diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 06e85f7162d4..6e0d4e2c497a 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -343,11 +343,14 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) m->q = xe_exec_queue_create(xe, vm, BIT(hwe->logical_instance), 1, - hwe, EXEC_QUEUE_FLAG_KERNEL); + hwe, + EXEC_QUEUE_FLAG_KERNEL | + EXEC_QUEUE_FLAG_PERMANENT); } else { m->q = xe_exec_queue_create_class(xe, primary_gt, vm, XE_ENGINE_CLASS_COPY, - EXEC_QUEUE_FLAG_KERNEL); + EXEC_QUEUE_FLAG_KERNEL | + EXEC_QUEUE_FLAG_PERMANENT); } if (IS_ERR(m->q)) { xe_vm_close_and_put(vm); -- cgit v1.2.3 From 9e9526352d6f7f94a4348cebce9859dfebed1dea Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Tue, 22 Aug 2023 10:33:34 -0700 Subject: drm/xe: standardize vm-less kernel submissions The current only submission in the driver that doesn't use a vm is the WA setup. We still pass a vm structure (the migration one), but we don't actually use it at submission time and we instead have an hack to use GGTT for this particular engine. Instead of special-casing the WA engine, we can skip providing a VM and use that as selector for whether to use GGTT or PPGTT. As part of this change, we can drop the special engine flag for the WA engine and switch the WA submission to use the standard job functions instead of dedicated ones. v2: rebased on s/engine/exec_queue Signed-off-by: Daniele Ceraolo Spurio Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20230822173334.1664332-4-daniele.ceraolospurio@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bb.c | 10 ---------- drivers/gpu/drm/xe/xe_bb.h | 2 -- drivers/gpu/drm/xe/xe_exec_queue.c | 4 ++-- drivers/gpu/drm/xe/xe_exec_queue_types.h | 2 -- drivers/gpu/drm/xe/xe_gt.c | 23 +++++++---------------- drivers/gpu/drm/xe/xe_ring_ops.c | 2 +- drivers/gpu/drm/xe/xe_sched_job.c | 6 ++---- 7 files changed, 12 insertions(+), 37 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c index 38f4ce83a207..1fbc2fcddc96 100644 --- a/drivers/gpu/drm/xe/xe_bb.c +++ b/drivers/gpu/drm/xe/xe_bb.c @@ -73,16 +73,6 @@ __xe_bb_create_job(struct xe_exec_queue *q, struct xe_bb *bb, u64 *addr) return xe_sched_job_create(q, addr); } -struct xe_sched_job *xe_bb_create_wa_job(struct xe_exec_queue *q, - struct xe_bb *bb, u64 batch_base_ofs) -{ - u64 addr = batch_base_ofs + drm_suballoc_soffset(bb->bo); - - XE_WARN_ON(!(q->vm->flags & XE_VM_FLAG_MIGRATION)); - - return __xe_bb_create_job(q, bb, &addr); -} - struct xe_sched_job *xe_bb_create_migration_job(struct xe_exec_queue *q, struct xe_bb *bb, u64 batch_base_ofs, diff --git a/drivers/gpu/drm/xe/xe_bb.h b/drivers/gpu/drm/xe/xe_bb.h index c5ae0770bab5..fafacd73dcc3 100644 --- a/drivers/gpu/drm/xe/xe_bb.h +++ b/drivers/gpu/drm/xe/xe_bb.h @@ -20,8 +20,6 @@ struct xe_sched_job *xe_bb_create_job(struct xe_exec_queue *q, struct xe_sched_job *xe_bb_create_migration_job(struct xe_exec_queue *q, struct xe_bb *bb, u64 batch_ofs, u32 second_idx); -struct xe_sched_job *xe_bb_create_wa_job(struct xe_exec_queue *q, - struct xe_bb *bb, u64 batch_ofs); void xe_bb_free(struct xe_bb *bb, struct dma_fence *fence); #endif diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 867465b0c57b..f28bceceb99a 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -95,7 +95,7 @@ static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe, * can perform GuC CT actions when needed. Caller is expected to * have already grabbed the rpm ref outside any sensitive locks. */ - if (q->flags & EXEC_QUEUE_FLAG_VM) + if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM)) drm_WARN_ON(&xe->drm, !xe_device_mem_access_get_if_ongoing(xe)); return q; @@ -174,7 +174,7 @@ void xe_exec_queue_fini(struct xe_exec_queue *q) xe_lrc_finish(q->lrc + i); if (q->vm) xe_vm_put(q->vm); - if (q->flags & EXEC_QUEUE_FLAG_VM) + if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM)) xe_device_mem_access_put(gt_to_xe(q->gt)); kfree(q); diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 1f0051a91dae..4f4190971dcf 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -79,8 +79,6 @@ struct xe_exec_queue { #define EXEC_QUEUE_FLAG_VM BIT(5) /* child of VM queue for multi-tile VM jobs */ #define EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD BIT(6) -/* queue used for WA setup */ -#define EXEC_QUEUE_FLAG_WA BIT(7) /** * @flags: flags for this exec queue, should statically setup aside from ban diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 13320af4ddd3..3d6a7c11bac1 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -87,15 +87,13 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q) struct xe_sched_job *job; struct xe_bb *bb; struct dma_fence *fence; - u64 batch_ofs; long timeout; bb = xe_bb_new(gt, 4, false); if (IS_ERR(bb)) return PTR_ERR(bb); - batch_ofs = xe_bo_ggtt_addr(gt_to_tile(gt)->mem.kernel_bb_pool->bo); - job = xe_bb_create_wa_job(q, bb, batch_ofs); + job = xe_bb_create_job(q, bb); if (IS_ERR(job)) { xe_bb_free(bb, NULL); return PTR_ERR(job); @@ -124,7 +122,6 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) struct xe_sched_job *job; struct xe_bb *bb; struct dma_fence *fence; - u64 batch_ofs; long timeout; int count = 0; @@ -143,8 +140,7 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) } } - batch_ofs = xe_bo_ggtt_addr(gt_to_tile(gt)->mem.kernel_bb_pool->bo); - job = xe_bb_create_wa_job(q, bb, batch_ofs); + job = xe_bb_create_job(q, bb); if (IS_ERR(job)) { xe_bb_free(bb, NULL); return PTR_ERR(job); @@ -168,14 +164,12 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) int xe_gt_record_default_lrcs(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); - struct xe_tile *tile = gt_to_tile(gt); struct xe_hw_engine *hwe; enum xe_hw_engine_id id; int err = 0; for_each_hw_engine(hwe, gt, id) { struct xe_exec_queue *q, *nop_q; - struct xe_vm *vm; void *default_lrc; if (gt->default_lrc[hwe->class]) @@ -192,14 +186,13 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt) if (!default_lrc) return -ENOMEM; - vm = xe_migrate_get_vm(tile->migrate); - q = xe_exec_queue_create(xe, vm, BIT(hwe->logical_instance), 1, - hwe, EXEC_QUEUE_FLAG_WA); + q = xe_exec_queue_create(xe, NULL, BIT(hwe->logical_instance), 1, + hwe, EXEC_QUEUE_FLAG_KERNEL); if (IS_ERR(q)) { err = PTR_ERR(q); xe_gt_err(gt, "hwe %s: xe_exec_queue_create failed (%pe)\n", hwe->name, q); - goto put_vm; + return err; } /* Prime golden LRC with known good state */ @@ -210,8 +203,8 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt) goto put_exec_queue; } - nop_q = xe_exec_queue_create(xe, vm, BIT(hwe->logical_instance), - 1, hwe, EXEC_QUEUE_FLAG_WA); + nop_q = xe_exec_queue_create(xe, NULL, BIT(hwe->logical_instance), + 1, hwe, EXEC_QUEUE_FLAG_KERNEL); if (IS_ERR(nop_q)) { err = PTR_ERR(nop_q); xe_gt_err(gt, "hwe %s: nop xe_exec_queue_create failed (%pe)\n", @@ -245,8 +238,6 @@ put_nop_q: xe_exec_queue_put(nop_q); put_exec_queue: xe_exec_queue_put(q); -put_vm: - xe_vm_put(vm); if (err) break; } diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index 2b4127ea1eab..2238a40b7e8e 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -202,7 +202,7 @@ static int emit_pipe_imm_ggtt(u32 addr, u32 value, bool stall_only, u32 *dw, static u32 get_ppgtt_flag(struct xe_sched_job *job) { - return !(job->q->flags & EXEC_QUEUE_FLAG_WA) ? BIT(8) : 0; + return job->q->vm ? BIT(8) : 0; } /* for engines that don't require any special HW handling (no EUs, no aux inval, etc) */ diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c index 0479d059dc77..b02183147e8e 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.c +++ b/drivers/gpu/drm/xe/xe_sched_job.c @@ -59,8 +59,7 @@ static struct xe_sched_job *job_alloc(bool parallel) bool xe_sched_job_is_migration(struct xe_exec_queue *q) { - return q->vm && (q->vm->flags & XE_VM_FLAG_MIGRATION) && - !(q->flags & EXEC_QUEUE_FLAG_WA); + return q->vm && (q->vm->flags & XE_VM_FLAG_MIGRATION); } static void job_free(struct xe_sched_job *job) @@ -91,8 +90,7 @@ struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q, XE_WARN_ON(!q->vm && !(q->flags & EXEC_QUEUE_FLAG_KERNEL)); /* Migration and kernel engines have their own locking */ - if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM | - EXEC_QUEUE_FLAG_WA))) { + if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) { lockdep_assert_held(&q->vm->lock); if (!xe_vm_no_dma_fences(q->vm)) xe_vm_assert_held(q->vm); -- cgit v1.2.3 From 429d56a6b12c4a00d22dcc8a1ac0394906c92b67 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 23 Aug 2023 18:55:52 +0100 Subject: drm/xe/ct: fix resv_space print Actually print the info.resv_space. Signed-off-by: Matthew Auld Cc: Rodrigo Vivi Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_ct.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 59136b6a7c6f..b92e04ba8f63 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -1180,7 +1180,7 @@ static void guc_ctb_snapshot_print(struct guc_ctb_snapshot *snapshot, u32 head, tail; drm_printf(p, "\tsize: %d\n", snapshot->info.size); - drm_printf(p, "\tresv_space: %d\n", snapshot->info.space); + drm_printf(p, "\tresv_space: %d\n", snapshot->info.resv_space); drm_printf(p, "\thead: %d\n", snapshot->info.head); drm_printf(p, "\ttail: %d\n", snapshot->info.tail); drm_printf(p, "\tspace: %d\n", snapshot->info.space); -- cgit v1.2.3 From 25063811d9c1f32c3223c27cafc0a95e7a86be26 Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Thu, 17 Aug 2023 09:20:44 +0000 Subject: drm/xe/pvc: Blacklist BCS_SWCTRL register Wa_16017236439 requires the BCS_SWCTRL to be privileged. v2: Define and use BCS_SWCTRL() Reviewed-by: Matt Roper Signed-off-by: Niranjana Vishwanathapura Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_engine_regs.h | 2 ++ drivers/gpu/drm/xe/xe_reg_whitelist.c | 6 ++++++ 2 files changed, 8 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index d57fd855086a..1a366d8070f3 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -63,6 +63,8 @@ #define RING_BBADDR(base) XE_REG((base) + 0x140) #define RING_BBADDR_UDW(base) XE_REG((base) + 0x168) +#define BCS_SWCTRL(base) XE_REG((base) + 0x200, XE_REG_OPTION_MASKED) + /* Handling MOCS value in BLIT_CCTL like it was done CMD_CCTL */ #define BLIT_CCTL(base) XE_REG((base) + 0x204) #define BLIT_CCTL_DST_MOCS_MASK REG_GENMASK(14, 9) diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c index e83781f9a516..e66ae1bdaf9c 100644 --- a/drivers/gpu/drm/xe/xe_reg_whitelist.c +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c @@ -50,6 +50,12 @@ static const struct xe_rtp_entry_sr register_whitelist[] = { RING_FORCE_TO_NONPRIV_DENY | RING_FORCE_TO_NONPRIV_RANGE_64)) }, + { XE_RTP_NAME("16017236439"), + XE_RTP_RULES(PLATFORM(PVC), ENGINE_CLASS(COPY)), + XE_RTP_ACTIONS(WHITELIST(BCS_SWCTRL(0), + RING_FORCE_TO_NONPRIV_DENY, + XE_RTP_ACTION_FLAG(ENGINE_BASE))) + }, {} }; -- cgit v1.2.3 From 7407f2e5c356a73ec4a6d7f379e91f205025165c Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Tue, 18 Jul 2023 10:45:28 +0000 Subject: drm/xe/pvc: Force even num engines to use 64B Wa_16017236439 requires that we update BCS_SWCTRL (via indirect context batch buffer) to set 64B transfers when running on an even-numbered BCS engine and 256B on an odd-numbered BCS engine. v2: Move WA from engine_was[] to lrc_was[] Reviewed-by: Matt Roper Signed-off-by: Tejas Upadhyay Signed-off-by: Niranjana Vishwanathapura Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_engine_regs.h | 1 + drivers/gpu/drm/xe/xe_wa.c | 10 ++++++++++ 2 files changed, 11 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index 1a366d8070f3..692213d09cea 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -64,6 +64,7 @@ #define RING_BBADDR_UDW(base) XE_REG((base) + 0x168) #define BCS_SWCTRL(base) XE_REG((base) + 0x200, XE_REG_OPTION_MASKED) +#define BCS_SWCTRL_DISABLE_256B REG_BIT(2) /* Handling MOCS value in BLIT_CCTL like it was done CMD_CCTL */ #define BLIT_CCTL(base) XE_REG((base) + 0x204) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index e2b6e17d7ec4..f45e9452ba0e 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -604,6 +604,16 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_ACTIONS(SET(CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE)) }, + /* PVC */ + + { XE_RTP_NAME("16017236439"), + XE_RTP_RULES(PLATFORM(PVC), ENGINE_CLASS(COPY), + FUNC(xe_rtp_match_even_instance)), + XE_RTP_ACTIONS(SET(BCS_SWCTRL(0), + BCS_SWCTRL_DISABLE_256B, + XE_RTP_ACTION_FLAG(ENGINE_BASE))), + }, + /* Xe_LPG */ { XE_RTP_NAME("18019271663"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271)), -- cgit v1.2.3 From a043fbab7af54c64017269dc96f43f441ed4bcaf Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Wed, 16 Aug 2023 22:14:10 -0700 Subject: drm/xe/pvc: Use fast copy engines as migrate engine on PVC Some copy hardware engine instances are faster than others on PVC. Use a virtual engine of these plus the reserved instance for the migrate engine on PVC. The idea being if a fast instance is available it will be used and the throughput of kernel copies, clears, and pagefault servicing will be higher. v2: Use OOB WA, use all copy engines if no WA is required Reviewed-by: Matt Roper Signed-off-by: Matthew Brost Signed-off-by: Niranjana Vishwanathapura Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Makefile | 2 +- drivers/gpu/drm/xe/xe_migrate.c | 36 ++++++++++++++++++++++++++++++++---- drivers/gpu/drm/xe/xe_wa_oob.rules | 1 + 3 files changed, 34 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index b470c2394476..be93745e8a30 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -40,7 +40,7 @@ quiet_cmd_wa_oob = GEN $(notdir $(generated_oob)) $(generated_oob) &: $(obj)/xe_gen_wa_oob $(srctree)/$(src)/xe_wa_oob.rules $(call cmd,wa_oob) -$(obj)/xe_guc.o $(obj)/xe_wa.o $(obj)/xe_ring_ops.o $(obj)/xe_vm.o: $(generated_oob) +$(obj)/xe_guc.o $(obj)/xe_migrate.o $(obj)/xe_ring_ops.o $(obj)/xe_vm.o $(obj)/xe_wa.o: $(generated_oob) # Please keep these build lists sorted! diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 6e0d4e2c497a..799ad0209279 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -12,6 +12,7 @@ #include #include +#include "generated/xe_wa_oob.h" #include "regs/xe_gpu_commands.h" #include "tests/xe_test.h" #include "xe_bb.h" @@ -29,6 +30,7 @@ #include "xe_sync.h" #include "xe_trace.h" #include "xe_vm.h" +#include "xe_wa.h" /** * struct xe_migrate - migrate context. @@ -298,6 +300,32 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, return 0; } +/* + * Due to workaround 16017236439, odd instance hardware copy engines are + * faster than even instance ones. + * This function returns the mask involving all fast copy engines and the + * reserved copy engine to be used as logical mask for migrate engine. + * Including the reserved copy engine is required to avoid deadlocks due to + * migrate jobs servicing the faults gets stuck behind the job that faulted. + */ +static u32 xe_migrate_usm_logical_mask(struct xe_gt *gt) +{ + u32 logical_mask = 0; + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + + for_each_hw_engine(hwe, gt, id) { + if (hwe->class != XE_ENGINE_CLASS_COPY) + continue; + + if (!XE_WA(gt, 16017236439) || + xe_gt_is_usm_hwe(gt, hwe) || hwe->instance & 1) + logical_mask |= BIT(hwe->logical_instance); + } + + return logical_mask; +} + /** * xe_migrate_init() - Initialize a migrate context * @tile: Back-pointer to the tile we're initializing for. @@ -338,12 +366,12 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) XE_ENGINE_CLASS_COPY, primary_gt->usm.reserved_bcs_instance, false); - if (!hwe) + u32 logical_mask = xe_migrate_usm_logical_mask(primary_gt); + + if (!hwe || !logical_mask) return ERR_PTR(-EINVAL); - m->q = xe_exec_queue_create(xe, vm, - BIT(hwe->logical_instance), 1, - hwe, + m->q = xe_exec_queue_create(xe, vm, logical_mask, 1, hwe, EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_PERMANENT); } else { diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index ea90dcc933b5..599e67169dae 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -17,3 +17,4 @@ 1409600907 GRAPHICS_VERSION_RANGE(1200, 1250) 14016763929 SUBPLATFORM(DG2, G10) SUBPLATFORM(DG2, G12) +16017236439 PLATFORM(PVC) -- cgit v1.2.3 From 14ec22408d2fa1d8671b619474381344b2bc859a Mon Sep 17 00:00:00 2001 From: Zhanjun Dong Date: Thu, 17 Aug 2023 14:30:28 -0700 Subject: drm/xe: Add patch version on guc firmware init Add patch version info on GuC firmware init. This is required info for GuC log decoder. Signed-off-by: Zhanjun Dong Reviewed-by: Daniele Ceraolo Spurio Link: https://lore.kernel.org/r/20230817213028.838531-1-zhanjun.dong@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_uc_fw.c | 13 +++++++------ drivers/gpu/drm/xe/xe_uc_fw_types.h | 2 ++ 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 1802b280cd8c..37ad238148b0 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -400,11 +400,12 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw) css->sw_version); uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MINOR, css->sw_version); + uc_fw->patch_ver_found = FIELD_GET(CSS_SW_VERSION_UC_PATCH, + css->sw_version); - drm_info(&xe->drm, "Using %s firmware (%u.%u) from %s\n", - xe_uc_fw_type_repr(uc_fw->type), - uc_fw->major_ver_found, uc_fw->minor_ver_found, - uc_fw->path); + drm_info(&xe->drm, "Using %s firmware from %s version %u.%u.%u\n", + xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, + uc_fw->major_ver_found, uc_fw->minor_ver_found, uc_fw->patch_ver_found); err = uc_fw_check_version_requirements(uc_fw); if (err) @@ -531,9 +532,9 @@ void xe_uc_fw_print(struct xe_uc_fw *uc_fw, struct drm_printer *p) xe_uc_fw_type_repr(uc_fw->type), uc_fw->path); drm_printf(p, "\tstatus: %s\n", xe_uc_fw_status_repr(uc_fw->status)); - drm_printf(p, "\tversion: wanted %u.%u, found %u.%u\n", + drm_printf(p, "\tversion: wanted %u.%u, found %u.%u.%u\n", uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted, - uc_fw->major_ver_found, uc_fw->minor_ver_found); + uc_fw->major_ver_found, uc_fw->minor_ver_found, uc_fw->patch_ver_found); drm_printf(p, "\tuCode: %u bytes\n", uc_fw->ucode_size); drm_printf(p, "\tRSA: %u bytes\n", uc_fw->rsa_size); diff --git a/drivers/gpu/drm/xe/xe_uc_fw_types.h b/drivers/gpu/drm/xe/xe_uc_fw_types.h index 837f49a2347e..444bff83cdbe 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw_types.h +++ b/drivers/gpu/drm/xe/xe_uc_fw_types.h @@ -106,6 +106,8 @@ struct xe_uc_fw { u16 major_ver_found; /** @minor_ver_found: major version found in firmware blob */ u16 minor_ver_found; + /** @patch_ver_found: patch version found in firmware blob */ + u16 patch_ver_found; /** @rsa_size: RSA size */ u32 rsa_size; -- cgit v1.2.3 From 9c0d779fc67bd1810f74c22e219f4af24a4e1e29 Mon Sep 17 00:00:00 2001 From: Pallavi Mishra Date: Thu, 31 Aug 2023 04:55:58 +0530 Subject: drm/xe: Prevent return with locked vm Reorder vm_id check after the one for VISIBLE_VRAM. This should prevent returning with locked vm in error scenario. Signed-off-by: Pallavi Mishra Cc: Matthew Auld Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 80c5d1a7d41a..3cfd3f37c81e 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1795,17 +1795,6 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, args->size & ~PAGE_MASK)) return -EINVAL; - if (args->vm_id) { - vm = xe_vm_lookup(xef, args->vm_id); - if (XE_IOCTL_DBG(xe, !vm)) - return -ENOENT; - err = xe_vm_lock(vm, &ww, 0, true); - if (err) { - xe_vm_put(vm); - return err; - } - } - if (args->flags & XE_GEM_CREATE_FLAG_DEFER_BACKING) bo_flags |= XE_BO_DEFER_BACKING; @@ -1821,6 +1810,17 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, bo_flags |= XE_BO_NEEDS_CPU_ACCESS; } + if (args->vm_id) { + vm = xe_vm_lookup(xef, args->vm_id); + if (XE_IOCTL_DBG(xe, !vm)) + return -ENOENT; + err = xe_vm_lock(vm, &ww, 0, true); + if (err) { + xe_vm_put(vm); + return err; + } + } + bo = xe_bo_create(xe, NULL, vm, args->size, ttm_bo_type_device, bo_flags); if (IS_ERR(bo)) { -- cgit v1.2.3 From 1da0702c1701c2e1441d86facd9fbb5e73fa374b Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 24 Aug 2023 17:04:45 +0100 Subject: drm/xe: nuke GuC on unload On PVC unloading followed by reloading the module often results in a completely dead machine (seems to be plaguing CI). Resetting the GuC like we do at load seems to cure it at least when locally testing this. v2: - Move pc_fini into guc_fini. We want to do the GuC reset just after calling pc_fini, otherwise we encounter communication failures. It also seems like a good idea to do the reset before we start releasing the various other GuC resources. In the case of pc_fini there is an explicit stop, but for other stuff like logs, ads, ctb there is not. References: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/542 References: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/597 Signed-off-by: Matthew Auld Cc: Matthew Brost Cc: Rodrigo Vivi Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc.c | 17 +++++++++++++++++ drivers/gpu/drm/xe/xe_guc_pc.c | 8 +------- drivers/gpu/drm/xe/xe_guc_pc.h | 1 + drivers/gpu/drm/xe/xe_uc.c | 5 +++++ drivers/gpu/drm/xe/xe_uc.h | 1 + 5 files changed, 25 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index e102637c0695..5d32bcee28b6 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -5,6 +5,8 @@ #include "xe_guc.h" +#include + #include "generated/xe_wa_oob.h" #include "regs/xe_gt_regs.h" #include "regs/xe_guc_regs.h" @@ -20,6 +22,7 @@ #include "xe_guc_submit.h" #include "xe_mmio.h" #include "xe_platform_types.h" +#include "xe_uc.h" #include "xe_uc_fw.h" #include "xe_wa.h" #include "xe_wopcm.h" @@ -217,6 +220,16 @@ static void guc_write_params(struct xe_guc *guc) xe_mmio_write32(gt, SOFT_SCRATCH(1 + i), guc->params[i]); } +static void guc_fini(struct drm_device *drm, void *arg) +{ + struct xe_guc *guc = arg; + + xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL); + xe_guc_pc_fini(&guc->pc); + xe_uc_fini_hw(&guc_to_gt(guc)->uc); + xe_force_wake_put(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL); +} + int xe_guc_init(struct xe_guc *guc) { struct xe_device *xe = guc_to_xe(guc); @@ -244,6 +257,10 @@ int xe_guc_init(struct xe_guc *guc) if (ret) goto out; + ret = drmm_add_action_or_reset(>_to_xe(gt)->drm, guc_fini, guc); + if (ret) + goto out; + guc_init_params(guc); if (xe_gt_is_media_type(gt)) diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index c03bb58e7049..87de1ce40e07 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -884,10 +884,8 @@ out: return ret; } -static void pc_fini(struct drm_device *drm, void *arg) +void xe_guc_pc_fini(struct xe_guc_pc *pc) { - struct xe_guc_pc *pc = arg; - XE_WARN_ON(xe_guc_pc_gucrc_disable(pc)); XE_WARN_ON(xe_guc_pc_stop(pc)); sysfs_remove_files(pc_to_gt(pc)->sysfs, pc_attrs); @@ -925,9 +923,5 @@ int xe_guc_pc_init(struct xe_guc_pc *pc) if (err) return err; - err = drmm_add_action_or_reset(&xe->drm, pc_fini, pc); - if (err) - return err; - return 0; } diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h index 81833a53b3c9..43ea582545b5 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.h +++ b/drivers/gpu/drm/xe/xe_guc_pc.h @@ -9,6 +9,7 @@ #include "xe_guc_pc_types.h" int xe_guc_pc_init(struct xe_guc_pc *pc); +void xe_guc_pc_fini(struct xe_guc_pc *pc); int xe_guc_pc_start(struct xe_guc_pc *pc); int xe_guc_pc_stop(struct xe_guc_pc *pc); int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc); diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index addd6f2681b9..9c8ce504f4da 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -167,6 +167,11 @@ int xe_uc_init_hw(struct xe_uc *uc) return 0; } +int xe_uc_fini_hw(struct xe_uc *uc) +{ + return xe_uc_sanitize_reset(uc); +} + int xe_uc_reset_prepare(struct xe_uc *uc) { /* GuC submission not enabled, nothing to do */ diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h index 42219b361df5..4109ae7028af 100644 --- a/drivers/gpu/drm/xe/xe_uc.h +++ b/drivers/gpu/drm/xe/xe_uc.h @@ -12,6 +12,7 @@ int xe_uc_init(struct xe_uc *uc); int xe_uc_init_hwconfig(struct xe_uc *uc); int xe_uc_init_post_hwconfig(struct xe_uc *uc); int xe_uc_init_hw(struct xe_uc *uc); +int xe_uc_fini_hw(struct xe_uc *uc); void xe_uc_gucrc_disable(struct xe_uc *uc); int xe_uc_reset_prepare(struct xe_uc *uc); void xe_uc_stop_prepare(struct xe_uc *uc); -- cgit v1.2.3 From 9a4566d5e0ae9dd38ef20fab00990e6958c421b4 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Tue, 29 Aug 2023 17:28:43 +0100 Subject: drm/xe: fix has_llc on rkl Matches i915. Assumption going forward is that non-llc + igpu is only a thing on MTL+ which should have explicit coherency pat_index settings for COH_NONE, 1WAY and 2WAY. Signed-off-by: Matthew Auld Cc: Pallavi Mishra Cc: Lucas De Marchi Cc: Matt Roper Reviewed-by: Matt Roper Reviewed-by: Pallavi Mishra Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pci.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 6c2c6723b1b2..08c1edc74d96 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -232,6 +232,7 @@ static const struct xe_device_desc rkl_desc = { .graphics = &graphics_xelp, .media = &media_xem, PLATFORM(XE_ROCKETLAKE), + .has_llc = true, .require_force_probe = true, }; -- cgit v1.2.3 From 35dfb48462d92ce5514f883c461857ca55bdb499 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 31 Aug 2023 07:54:21 -0700 Subject: drm/xe: Convert xe_vma_op_flags to BIT macros Rather than open code the shift for values, use BIT macros. Reviewed-by: Rodrigo Vivi Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm_types.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index f8675c3da3b1..40ce8953bacb 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -370,11 +370,11 @@ struct xe_vma_op_prefetch { /** enum xe_vma_op_flags - flags for VMA operation */ enum xe_vma_op_flags { /** @XE_VMA_OP_FIRST: first VMA operation for a set of syncs */ - XE_VMA_OP_FIRST = (0x1 << 0), + XE_VMA_OP_FIRST = BIT(0), /** @XE_VMA_OP_LAST: last VMA operation for a set of syncs */ - XE_VMA_OP_LAST = (0x1 << 1), + XE_VMA_OP_LAST = BIT(1), /** @XE_VMA_OP_COMMITTED: VMA operation committed */ - XE_VMA_OP_COMMITTED = (0x1 << 2), + XE_VMA_OP_COMMITTED = BIT(2), }; /** struct xe_vma_op - VMA operation */ -- cgit v1.2.3 From 5ef091fc32a4fe7116a4ecc778369f161de9c11a Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Sun, 13 Aug 2023 20:19:20 -0700 Subject: drm/xe: Fixup unwind on VM ops errors Remap ops have 3 parts: unmap, prev, and next. The commit step can fail on any of these. Add a flag for each to these so the unwind is only done the steps that have been committed. v2: (Rodrigo) Use bit macros Reviewed-by: Rodrigo Vivi Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 24 +++++++++++++++++------- drivers/gpu/drm/xe/xe_vm_types.h | 10 +++++++--- 2 files changed, 24 insertions(+), 10 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index a774f9632dda..71f61806df77 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2622,18 +2622,25 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) switch (op->base.op) { case DRM_GPUVA_OP_MAP: err |= xe_vm_insert_vma(vm, op->map.vma); + if (!err) + op->flags |= XE_VMA_OP_COMMITTED; break; case DRM_GPUVA_OP_REMAP: prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va), true); + op->flags |= XE_VMA_OP_COMMITTED; if (op->remap.prev) { err |= xe_vm_insert_vma(vm, op->remap.prev); + if (!err) + op->flags |= XE_VMA_OP_PREV_COMMITTED; if (!err && op->remap.skip_prev) op->remap.prev = NULL; } if (op->remap.next) { err |= xe_vm_insert_vma(vm, op->remap.next); + if (!err) + op->flags |= XE_VMA_OP_NEXT_COMMITTED; if (!err && op->remap.skip_next) op->remap.next = NULL; } @@ -2646,15 +2653,15 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) break; case DRM_GPUVA_OP_UNMAP: prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true); + op->flags |= XE_VMA_OP_COMMITTED; break; case DRM_GPUVA_OP_PREFETCH: - /* Nothing to do */ + op->flags |= XE_VMA_OP_COMMITTED; break; default: XE_WARN_ON("NOT POSSIBLE"); } - op->flags |= XE_VMA_OP_COMMITTED; return err; } @@ -2859,7 +2866,8 @@ static void xe_vma_op_cleanup(struct xe_vm *vm, struct xe_vma_op *op) } static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op, - bool post_commit) + bool post_commit, bool prev_post_commit, + bool next_post_commit) { lockdep_assert_held_write(&vm->lock); @@ -2886,11 +2894,11 @@ static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op, struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va); if (op->remap.prev) { - prep_vma_destroy(vm, op->remap.prev, post_commit); + prep_vma_destroy(vm, op->remap.prev, prev_post_commit); xe_vma_destroy_unlocked(op->remap.prev); } if (op->remap.next) { - prep_vma_destroy(vm, op->remap.next, post_commit); + prep_vma_destroy(vm, op->remap.next, next_post_commit); xe_vma_destroy_unlocked(op->remap.next); } down_read(&vm->userptr.notifier_lock); @@ -3029,7 +3037,9 @@ static int vm_bind_ioctl_ops_commit(struct xe_vm *vm, unwind: list_for_each_entry_reverse(op, ops_list, link) - xe_vma_op_unwind(vm, op, op->flags & XE_VMA_OP_COMMITTED); + xe_vma_op_unwind(vm, op, op->flags & XE_VMA_OP_COMMITTED, + op->flags & XE_VMA_OP_PREV_COMMITTED, + op->flags & XE_VMA_OP_NEXT_COMMITTED); list_for_each_entry_safe(op, next, ops_list, link) xe_vma_op_cleanup(vm, op); @@ -3056,7 +3066,7 @@ static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, drm_gpuva_for_each_op(__op, __ops) { struct xe_vma_op *op = gpuva_op_to_vma_op(__op); - xe_vma_op_unwind(vm, op, false); + xe_vma_op_unwind(vm, op, false, false, false); } } } diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 40ce8953bacb..dfbc53e56a86 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -370,11 +370,15 @@ struct xe_vma_op_prefetch { /** enum xe_vma_op_flags - flags for VMA operation */ enum xe_vma_op_flags { /** @XE_VMA_OP_FIRST: first VMA operation for a set of syncs */ - XE_VMA_OP_FIRST = BIT(0), + XE_VMA_OP_FIRST = BIT(0), /** @XE_VMA_OP_LAST: last VMA operation for a set of syncs */ - XE_VMA_OP_LAST = BIT(1), + XE_VMA_OP_LAST = BIT(1), /** @XE_VMA_OP_COMMITTED: VMA operation committed */ - XE_VMA_OP_COMMITTED = BIT(2), + XE_VMA_OP_COMMITTED = BIT(2), + /** @XE_VMA_OP_PREV_COMMITTED: Previous VMA operation committed */ + XE_VMA_OP_PREV_COMMITTED = BIT(3), + /** @XE_VMA_OP_NEXT_COMMITTED: Next VMA operation committed */ + XE_VMA_OP_NEXT_COMMITTED = BIT(4), }; /** struct xe_vma_op - VMA operation */ -- cgit v1.2.3 From 617eebb9c4807be77ca6f02eee7469e5e111861d Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 16 Aug 2023 20:15:38 -0700 Subject: drm/xe: Fix array of binds If multiple bind ops in an array of binds touch the same address range invalid GPUVA operations are generated as each GPUVA operation is generated based on the orignal GPUVA state. To fix this, after each GPUVA operations is generated, commit the GPUVA operation updating the GPUVA state so subsequent bind ops can see a current GPUVA state. Reviewed-by: Rodrigo Vivi Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 418 +++++++++++++++++++++++---------------------- 1 file changed, 212 insertions(+), 206 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 71f61806df77..24ee74f62385 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2430,24 +2430,73 @@ static u64 xe_vma_set_pte_size(struct xe_vma *vma, u64 size) return SZ_4K; } -/* - * Parse operations list and create any resources needed for the operations - * prior to fully committing to the operations. This setup can fail. - */ +static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) +{ + int err = 0; + + lockdep_assert_held_write(&vm->lock); + + switch (op->base.op) { + case DRM_GPUVA_OP_MAP: + err |= xe_vm_insert_vma(vm, op->map.vma); + if (!err) + op->flags |= XE_VMA_OP_COMMITTED; + break; + case DRM_GPUVA_OP_REMAP: + prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va), + true); + op->flags |= XE_VMA_OP_COMMITTED; + + if (op->remap.prev) { + err |= xe_vm_insert_vma(vm, op->remap.prev); + if (!err) + op->flags |= XE_VMA_OP_PREV_COMMITTED; + if (!err && op->remap.skip_prev) + op->remap.prev = NULL; + } + if (op->remap.next) { + err |= xe_vm_insert_vma(vm, op->remap.next); + if (!err) + op->flags |= XE_VMA_OP_NEXT_COMMITTED; + if (!err && op->remap.skip_next) + op->remap.next = NULL; + } + + /* Adjust for partial unbind after removin VMA from VM */ + if (!err) { + op->base.remap.unmap->va->va.addr = op->remap.start; + op->base.remap.unmap->va->va.range = op->remap.range; + } + break; + case DRM_GPUVA_OP_UNMAP: + prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true); + op->flags |= XE_VMA_OP_COMMITTED; + break; + case DRM_GPUVA_OP_PREFETCH: + op->flags |= XE_VMA_OP_COMMITTED; + break; + default: + XE_WARN_ON("NOT POSSIBLE"); + } + + return err; +} + + static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, - struct drm_gpuva_ops **ops, int num_ops_list, + struct drm_gpuva_ops *ops, struct xe_sync_entry *syncs, u32 num_syncs, - struct list_head *ops_list, bool async) + struct list_head *ops_list, bool last, + bool async) { struct xe_vma_op *last_op = NULL; - struct list_head *async_list = NULL; struct async_op_fence *fence = NULL; - int err, i; + struct drm_gpuva_op *__op; + int err = 0; lockdep_assert_held_write(&vm->lock); - XE_WARN_ON(num_ops_list > 1 && !async); - if (num_syncs && async) { + if (last && num_syncs && async) { u64 seqno; fence = kmalloc(sizeof(*fence), GFP_KERNEL); @@ -2466,145 +2515,145 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, } } - for (i = 0; i < num_ops_list; ++i) { - struct drm_gpuva_ops *__ops = ops[i]; - struct drm_gpuva_op *__op; + drm_gpuva_for_each_op(__op, ops) { + struct xe_vma_op *op = gpuva_op_to_vma_op(__op); + bool first = list_empty(ops_list); - drm_gpuva_for_each_op(__op, __ops) { - struct xe_vma_op *op = gpuva_op_to_vma_op(__op); - bool first = !async_list; + XE_WARN_ON(!first && !async); + + INIT_LIST_HEAD(&op->link); + list_add_tail(&op->link, ops_list); - XE_WARN_ON(!first && !async); + if (first) { + op->flags |= XE_VMA_OP_FIRST; + op->num_syncs = num_syncs; + op->syncs = syncs; + } - INIT_LIST_HEAD(&op->link); - if (first) - async_list = ops_list; - list_add_tail(&op->link, async_list); + op->q = q; + + switch (op->base.op) { + case DRM_GPUVA_OP_MAP: + { + struct xe_vma *vma; - if (first) { - op->flags |= XE_VMA_OP_FIRST; - op->num_syncs = num_syncs; - op->syncs = syncs; + vma = new_vma(vm, &op->base.map, + op->tile_mask, op->map.read_only, + op->map.is_null); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto free_fence; } - op->q = q; + op->map.vma = vma; + break; + } + case DRM_GPUVA_OP_REMAP: + { + struct xe_vma *old = + gpuva_to_vma(op->base.remap.unmap->va); - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - { - struct xe_vma *vma; + op->remap.start = xe_vma_start(old); + op->remap.range = xe_vma_size(old); - vma = new_vma(vm, &op->base.map, - op->tile_mask, op->map.read_only, - op->map.is_null); + if (op->base.remap.prev) { + struct xe_vma *vma; + bool read_only = + op->base.remap.unmap->va->flags & + XE_VMA_READ_ONLY; + bool is_null = + op->base.remap.unmap->va->flags & + DRM_GPUVA_SPARSE; + + vma = new_vma(vm, op->base.remap.prev, + op->tile_mask, read_only, + is_null); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto free_fence; } - op->map.vma = vma; - break; + op->remap.prev = vma; + + /* + * Userptr creates a new SG mapping so + * we must also rebind. + */ + op->remap.skip_prev = !xe_vma_is_userptr(old) && + IS_ALIGNED(xe_vma_end(vma), + xe_vma_max_pte_size(old)); + if (op->remap.skip_prev) { + xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); + op->remap.range -= + xe_vma_end(vma) - + xe_vma_start(old); + op->remap.start = xe_vma_end(vma); + } } - case DRM_GPUVA_OP_REMAP: - { - struct xe_vma *old = - gpuva_to_vma(op->base.remap.unmap->va); - - op->remap.start = xe_vma_start(old); - op->remap.range = xe_vma_size(old); - - if (op->base.remap.prev) { - struct xe_vma *vma; - bool read_only = - op->base.remap.unmap->va->flags & - XE_VMA_READ_ONLY; - bool is_null = - op->base.remap.unmap->va->flags & - DRM_GPUVA_SPARSE; - - vma = new_vma(vm, op->base.remap.prev, - op->tile_mask, read_only, - is_null); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto free_fence; - } - - op->remap.prev = vma; - - /* - * Userptr creates a new SG mapping so - * we must also rebind. - */ - op->remap.skip_prev = !xe_vma_is_userptr(old) && - IS_ALIGNED(xe_vma_end(vma), - xe_vma_max_pte_size(old)); - if (op->remap.skip_prev) { - xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); - op->remap.range -= - xe_vma_end(vma) - - xe_vma_start(old); - op->remap.start = xe_vma_end(vma); - } + + if (op->base.remap.next) { + struct xe_vma *vma; + bool read_only = + op->base.remap.unmap->va->flags & + XE_VMA_READ_ONLY; + + bool is_null = + op->base.remap.unmap->va->flags & + DRM_GPUVA_SPARSE; + + vma = new_vma(vm, op->base.remap.next, + op->tile_mask, read_only, + is_null); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto free_fence; } - if (op->base.remap.next) { - struct xe_vma *vma; - bool read_only = - op->base.remap.unmap->va->flags & - XE_VMA_READ_ONLY; - - bool is_null = - op->base.remap.unmap->va->flags & - DRM_GPUVA_SPARSE; - - vma = new_vma(vm, op->base.remap.next, - op->tile_mask, read_only, - is_null); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto free_fence; - } - - op->remap.next = vma; - - /* - * Userptr creates a new SG mapping so - * we must also rebind. - */ - op->remap.skip_next = !xe_vma_is_userptr(old) && - IS_ALIGNED(xe_vma_start(vma), - xe_vma_max_pte_size(old)); - if (op->remap.skip_next) { - xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); - op->remap.range -= - xe_vma_end(old) - - xe_vma_start(vma); - } + op->remap.next = vma; + + /* + * Userptr creates a new SG mapping so + * we must also rebind. + */ + op->remap.skip_next = !xe_vma_is_userptr(old) && + IS_ALIGNED(xe_vma_start(vma), + xe_vma_max_pte_size(old)); + if (op->remap.skip_next) { + xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); + op->remap.range -= + xe_vma_end(old) - + xe_vma_start(vma); } - break; - } - case DRM_GPUVA_OP_UNMAP: - case DRM_GPUVA_OP_PREFETCH: - /* Nothing to do */ - break; - default: - XE_WARN_ON("NOT POSSIBLE"); } - - last_op = op; + break; + } + case DRM_GPUVA_OP_UNMAP: + case DRM_GPUVA_OP_PREFETCH: + /* Nothing to do */ + break; + default: + XE_WARN_ON("NOT POSSIBLE"); } - last_op->ops = __ops; + last_op = op; + + err = xe_vma_op_commit(vm, op); + if (err) + goto free_fence; } - if (!last_op) - return -ENODATA; + /* FIXME: Unhandled corner case */ + XE_WARN_ON(!last_op && last && !list_empty(ops_list)); - last_op->flags |= XE_VMA_OP_LAST; - last_op->num_syncs = num_syncs; - last_op->syncs = syncs; - last_op->fence = fence; + if (!last_op) + goto free_fence; + last_op->ops = ops; + if (last) { + last_op->flags |= XE_VMA_OP_LAST; + last_op->num_syncs = num_syncs; + last_op->syncs = syncs; + last_op->fence = fence; + } return 0; @@ -2613,58 +2662,6 @@ free_fence: return err; } -static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) -{ - int err = 0; - - lockdep_assert_held_write(&vm->lock); - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - err |= xe_vm_insert_vma(vm, op->map.vma); - if (!err) - op->flags |= XE_VMA_OP_COMMITTED; - break; - case DRM_GPUVA_OP_REMAP: - prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va), - true); - op->flags |= XE_VMA_OP_COMMITTED; - - if (op->remap.prev) { - err |= xe_vm_insert_vma(vm, op->remap.prev); - if (!err) - op->flags |= XE_VMA_OP_PREV_COMMITTED; - if (!err && op->remap.skip_prev) - op->remap.prev = NULL; - } - if (op->remap.next) { - err |= xe_vm_insert_vma(vm, op->remap.next); - if (!err) - op->flags |= XE_VMA_OP_NEXT_COMMITTED; - if (!err && op->remap.skip_next) - op->remap.next = NULL; - } - - /* Adjust for partial unbind after removin VMA from VM */ - if (!err) { - op->base.remap.unmap->va->va.addr = op->remap.start; - op->base.remap.unmap->va->va.range = op->remap.range; - } - break; - case DRM_GPUVA_OP_UNMAP: - prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true); - op->flags |= XE_VMA_OP_COMMITTED; - break; - case DRM_GPUVA_OP_PREFETCH: - op->flags |= XE_VMA_OP_COMMITTED; - break; - default: - XE_WARN_ON("NOT POSSIBLE"); - } - - return err; -} - static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma, struct xe_vma_op *op) { @@ -2882,11 +2879,13 @@ static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op, { struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va); - down_read(&vm->userptr.notifier_lock); - vma->gpuva.flags &= ~XE_VMA_DESTROYED; - up_read(&vm->userptr.notifier_lock); - if (post_commit) - xe_vm_insert_vma(vm, vma); + if (vma) { + down_read(&vm->userptr.notifier_lock); + vma->gpuva.flags &= ~XE_VMA_DESTROYED; + up_read(&vm->userptr.notifier_lock); + if (post_commit) + xe_vm_insert_vma(vm, vma); + } break; } case DRM_GPUVA_OP_REMAP: @@ -2901,11 +2900,13 @@ static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op, prep_vma_destroy(vm, op->remap.next, next_post_commit); xe_vma_destroy_unlocked(op->remap.next); } - down_read(&vm->userptr.notifier_lock); - vma->gpuva.flags &= ~XE_VMA_DESTROYED; - up_read(&vm->userptr.notifier_lock); - if (post_commit) - xe_vm_insert_vma(vm, vma); + if (vma) { + down_read(&vm->userptr.notifier_lock); + vma->gpuva.flags &= ~XE_VMA_DESTROYED; + up_read(&vm->userptr.notifier_lock); + if (post_commit) + xe_vm_insert_vma(vm, vma); + } break; } case DRM_GPUVA_OP_PREFETCH: @@ -2994,20 +2995,16 @@ static void xe_vma_op_work_func(struct work_struct *w) } } -static int vm_bind_ioctl_ops_commit(struct xe_vm *vm, - struct list_head *ops_list, bool async) +static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, + struct list_head *ops_list, bool async) { struct xe_vma_op *op, *last_op, *next; int err; lockdep_assert_held_write(&vm->lock); - list_for_each_entry(op, ops_list, link) { + list_for_each_entry(op, ops_list, link) last_op = op; - err = xe_vma_op_commit(vm, op); - if (err) - goto unwind; - } if (!async) { err = xe_vma_op_execute(vm, last_op); @@ -3046,28 +3043,29 @@ unwind: return err; } -/* - * Unwind operations list, called after a failure of vm_bind_ioctl_ops_create or - * vm_bind_ioctl_ops_parse. - */ static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, struct drm_gpuva_ops **ops, int num_ops_list) { int i; - for (i = 0; i < num_ops_list; ++i) { + for (i = num_ops_list - 1; i; ++i) { struct drm_gpuva_ops *__ops = ops[i]; struct drm_gpuva_op *__op; if (!__ops) continue; - drm_gpuva_for_each_op(__op, __ops) { + drm_gpuva_for_each_op_reverse(__op, __ops) { struct xe_vma_op *op = gpuva_op_to_vma_op(__op); - xe_vma_op_unwind(vm, op, false, false, false); + xe_vma_op_unwind(vm, op, + op->flags & XE_VMA_OP_COMMITTED, + op->flags & XE_VMA_OP_PREV_COMMITTED, + op->flags & XE_VMA_OP_NEXT_COMMITTED); } + + drm_gpuva_ops_free(&vm->gpuvm, __ops); } } @@ -3388,14 +3386,22 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) ops[i] = NULL; goto unwind_ops; } + + err = vm_bind_ioctl_ops_parse(vm, q, ops[i], syncs, num_syncs, + &ops_list, + i == args->num_binds - 1, + async); + if (err) + goto unwind_ops; } - err = vm_bind_ioctl_ops_parse(vm, q, ops, args->num_binds, - syncs, num_syncs, &ops_list, async); - if (err) + /* Nothing to do */ + if (list_empty(&ops_list)) { + err = -ENODATA; goto unwind_ops; + } - err = vm_bind_ioctl_ops_commit(vm, &ops_list, async); + err = vm_bind_ioctl_ops_execute(vm, &ops_list, async); up_write(&vm->lock); for (i = 0; i < args->num_binds; ++i) -- cgit v1.2.3 From 621fd7dc38b7c18d4946a05051f674fcab82d4dd Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Wed, 23 Aug 2023 09:10:20 +0000 Subject: drm/xe/pm: Use PM functions only if CONFIG_PM_SLEEP is enabled This fixes the build without CONFIG_PM_SLEEP such as for riscv. Signed-off-by: Francois Dugast Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pci.c | 48 +++++++++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 23 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 08c1edc74d96..b72d9f568768 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -30,28 +30,6 @@ enum toggle_d3cold { D3COLD_ENABLE, }; -static void d3cold_toggle(struct pci_dev *pdev, enum toggle_d3cold toggle) -{ - struct xe_device *xe = pdev_to_xe_device(pdev); - struct pci_dev *root_pdev; - - if (!xe->d3cold.capable) - return; - - root_pdev = pcie_find_root_port(pdev); - if (!root_pdev) - return; - - switch (toggle) { - case D3COLD_DISABLE: - pci_d3cold_disable(root_pdev); - break; - case D3COLD_ENABLE: - pci_d3cold_enable(root_pdev); - break; - } -} - struct xe_subplatform_desc { enum xe_subplatform subplatform; const char *name; @@ -741,6 +719,28 @@ static void xe_pci_shutdown(struct pci_dev *pdev) } #ifdef CONFIG_PM_SLEEP +static void d3cold_toggle(struct pci_dev *pdev, enum toggle_d3cold toggle) +{ + struct xe_device *xe = pdev_to_xe_device(pdev); + struct pci_dev *root_pdev; + + if (!xe->d3cold.capable) + return; + + root_pdev = pcie_find_root_port(pdev); + if (!root_pdev) + return; + + switch (toggle) { + case D3COLD_DISABLE: + pci_d3cold_disable(root_pdev); + break; + case D3COLD_ENABLE: + pci_d3cold_enable(root_pdev); + break; + } +} + static int xe_pci_suspend(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); @@ -851,12 +851,12 @@ static int xe_pci_runtime_idle(struct device *dev) return 0; } -#endif static const struct dev_pm_ops xe_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(xe_pci_suspend, xe_pci_resume) SET_RUNTIME_PM_OPS(xe_pci_runtime_suspend, xe_pci_runtime_resume, xe_pci_runtime_idle) }; +#endif static struct pci_driver xe_pci_driver = { .name = DRIVER_NAME, @@ -864,7 +864,9 @@ static struct pci_driver xe_pci_driver = { .probe = xe_pci_probe, .remove = xe_pci_remove, .shutdown = xe_pci_shutdown, +#ifdef CONFIG_PM_SLEEP .driver.pm = &xe_pm_ops, +#endif }; int xe_register_pci_driver(void) -- cgit v1.2.3 From fba153b0d0b769bb2379c9e78968036d17bdfb6b Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 1 Sep 2023 15:28:25 +0100 Subject: drm/xe/selftests: consider multi-GT for eviction test We need to sanitize and reset each GT, since xe_bo_evict_all() will evict everything regardless of GT, which can leave other GTs in a broken state. Signed-off-by: Matthew Auld Cc: Nirmoy Das Reviewed-by: Nirmoy Das Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_bo.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index b32a9068d76c..0e4ec22c5667 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -181,7 +181,8 @@ static int evict_test_run_gt(struct xe_device *xe, struct xe_gt *gt, struct kuni XE_BO_CREATE_VRAM_IF_DGFX(gt_to_tile(gt)); struct xe_vm *vm = xe_migrate_get_vm(xe_device_get_root_tile(xe)->migrate); struct ww_acquire_ctx ww; - int err, i; + struct xe_gt *__gt; + int err, i, id; kunit_info(test, "Testing device %s gt id %u vram id %u\n", dev_name(xe->drm.dev), gt->info.id, gt_to_tile(gt)->id); @@ -218,7 +219,8 @@ static int evict_test_run_gt(struct xe_device *xe, struct xe_gt *gt, struct kuni goto cleanup_all; } - xe_gt_sanitize(gt); + for_each_gt(__gt, xe, id) + xe_gt_sanitize(__gt); err = xe_bo_restore_kernel(xe); /* * Snapshotting the CTB and copying back a potentially old @@ -231,8 +233,10 @@ static int evict_test_run_gt(struct xe_device *xe, struct xe_gt *gt, struct kuni * however seems quite fragile not to also restart the GT. Try * to do that here by triggering a GT reset. */ - xe_gt_reset_async(gt); - flush_work(>->reset.worker); + for_each_gt(__gt, xe, id) { + xe_gt_reset_async(__gt); + flush_work(&__gt->reset.worker); + } if (err) { KUNIT_FAIL(test, "restore kernel err=%pe\n", ERR_PTR(err)); -- cgit v1.2.3 From e6a373dc3d1267f828a3e6523fe2e46c6824d3e4 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 1 Sep 2023 15:28:26 +0100 Subject: drm/xe/selftests: make eviction test tile centric The concern here is that we may have platforms with dedicated media GT, and we anyway allocate the object on the tile, which just means running the same test twice (i.e primary vs media GT). Signed-off-by: Matthew Auld Cc: Nirmoy Das Reviewed-by: Nirmoy Das Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_bo.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index 0e4ec22c5667..c448b00a569c 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -174,18 +174,18 @@ void xe_ccs_migrate_kunit(struct kunit *test) } EXPORT_SYMBOL_IF_KUNIT(xe_ccs_migrate_kunit); -static int evict_test_run_gt(struct xe_device *xe, struct xe_gt *gt, struct kunit *test) +static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struct kunit *test) { struct xe_bo *bo, *external; unsigned int bo_flags = XE_BO_CREATE_USER_BIT | - XE_BO_CREATE_VRAM_IF_DGFX(gt_to_tile(gt)); + XE_BO_CREATE_VRAM_IF_DGFX(tile); struct xe_vm *vm = xe_migrate_get_vm(xe_device_get_root_tile(xe)->migrate); struct ww_acquire_ctx ww; struct xe_gt *__gt; int err, i, id; - kunit_info(test, "Testing device %s gt id %u vram id %u\n", - dev_name(xe->drm.dev), gt->info.id, gt_to_tile(gt)->id); + kunit_info(test, "Testing device %s vram id %u\n", + dev_name(xe->drm.dev), tile->id); for (i = 0; i < 2; ++i) { xe_vm_lock(vm, &ww, 0, false); @@ -316,7 +316,7 @@ cleanup_bo: static int evict_test_run_device(struct xe_device *xe) { struct kunit *test = xe_cur_kunit(); - struct xe_gt *gt; + struct xe_tile *tile; int id; if (!IS_DGFX(xe)) { @@ -327,8 +327,8 @@ static int evict_test_run_device(struct xe_device *xe) xe_device_mem_access_get(xe); - for_each_gt(gt, xe, id) - evict_test_run_gt(xe, gt, test); + for_each_tile(tile, xe, id) + evict_test_run_tile(xe, tile, test); xe_device_mem_access_put(xe); -- cgit v1.2.3 From 8bc454baf4036f4684bf30951dc3f6d96eb93f5f Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 6 Sep 2023 12:30:09 -0700 Subject: drm/xe/pat: Use 0 instead of space on error Use 0 in format string instead of space so it shows as [drm] *ERROR* Missing PAT table for platform with graphics version 20.04! instead of [drm] *ERROR* Missing PAT table for platform with graphics version 20. 4! Reviewed-by: Matt Roper Reviewed-by: Gustavo Sousa Link: https://lore.kernel.org/r/20230906193009.1912129-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index b56a65779d26..71e0e047fff3 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -107,7 +107,7 @@ void xe_pat_init(struct xe_gt *gt) * raise an error rather than trying to silently inherit the * most recent platform's behavior. */ - drm_err(&xe->drm, "Missing PAT table for platform with graphics version %d.%2d!\n", + drm_err(&xe->drm, "Missing PAT table for platform with graphics version %d.%02d!\n", GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100); } } -- cgit v1.2.3 From a2112949e5f96c1b95aedfb9e2f0401e6c4f864f Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 5 Sep 2023 18:20:50 -0700 Subject: drm/xe/reg_sr: Simplify check for masked registers For all RTP actions, clr_bits is a superset of the bits being modified. That's also why the check for "changing all bits" can be done with `clr_bits + 1`. So always use clr_bits for setting the upper bits of a masked register. Reviewed-by: Matt Roper Reviewed-by: Mika Kuoppala Link: https://lore.kernel.org/r/20230906012053.1733755-2-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_reg_sr.c | 8 ++++---- drivers/gpu/drm/xe/xe_rtp_types.h | 5 ++++- 2 files changed, 8 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c index 7c88352636d2..264520015861 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr.c +++ b/drivers/gpu/drm/xe/xe_reg_sr.c @@ -153,15 +153,15 @@ static void apply_one_mmio(struct xe_gt *gt, struct xe_reg_sr_entry *entry) u32 val; /* - * If this is a masked register, need to figure what goes on the upper - * 16 bits: it's either the clr_bits (when using FIELD_SET and WR) or - * the set_bits, when using SET. + * If this is a masked register, need to set the upper 16 bits. + * Set them to clr_bits since that is always a superset of the bits + * being modified. * * When it's not masked, we have to read it from hardware, unless we are * supposed to set all bits. */ if (reg.masked) - val = (entry->clr_bits ?: entry->set_bits) << 16; + val = entry->clr_bits << 16; else if (entry->clr_bits + 1) val = (reg.mcr ? xe_gt_mcr_unicast_read_any(gt, reg_mcr) : diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h index d170532a98a5..637acc7626a4 100644 --- a/drivers/gpu/drm/xe/xe_rtp_types.h +++ b/drivers/gpu/drm/xe/xe_rtp_types.h @@ -22,7 +22,10 @@ struct xe_gt; struct xe_rtp_action { /** @reg: Register */ struct xe_reg reg; - /** @clr_bits: bits to clear when updating register */ + /** + * @clr_bits: bits to clear when updating register. It's always a + * superset of bits being modified + */ u32 clr_bits; /** @set_bits: bits to set when updating register */ u32 set_bits; -- cgit v1.2.3 From 46c63b6485b9029aae0a79a82c8c3e03548abc1b Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 5 Sep 2023 18:20:51 -0700 Subject: drm/xe/reg_sr: Use xe_gt_dbg Use xe_gt_dbg() instead of drm_dbg() so the GT is added to the log for easy identification. Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230906012053.1733755-3-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_reg_sr.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c index 264520015861..87adefb56024 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr.c +++ b/drivers/gpu/drm/xe/xe_reg_sr.c @@ -147,7 +147,6 @@ static struct xe_reg_mcr to_xe_reg_mcr(const struct xe_reg reg) static void apply_one_mmio(struct xe_gt *gt, struct xe_reg_sr_entry *entry) { - struct xe_device *xe = gt_to_xe(gt); struct xe_reg reg = entry->reg; struct xe_reg_mcr reg_mcr = to_xe_reg_mcr(reg); u32 val; @@ -176,7 +175,7 @@ static void apply_one_mmio(struct xe_gt *gt, struct xe_reg_sr_entry *entry) */ val |= entry->set_bits; - drm_dbg(&xe->drm, "REG[0x%x] = 0x%08x", reg.addr, val); + xe_gt_dbg(gt, "REG[0x%x] = 0x%08x", reg.addr, val); if (entry->reg.mcr) xe_gt_mcr_multicast_write(gt, reg_mcr, val); @@ -186,7 +185,6 @@ static void apply_one_mmio(struct xe_gt *gt, struct xe_reg_sr_entry *entry) void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt) { - struct xe_device *xe = gt_to_xe(gt); struct xe_reg_sr_entry *entry; unsigned long reg; int err; @@ -194,7 +192,7 @@ void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt) if (xa_empty(&sr->xa)) return; - drm_dbg(&xe->drm, "Applying %s save-restore MMIOs\n", sr->name); + xe_gt_dbg(gt, "Applying %s save-restore MMIOs\n", sr->name); err = xe_force_wake_get(>->mmio.fw, XE_FORCEWAKE_ALL); if (err) @@ -209,7 +207,7 @@ void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt) return; err_force_wake: - drm_err(&xe->drm, "Failed to apply, err=%d\n", err); + xe_gt_err(gt, "Failed to apply, err=%d\n", err); } void xe_reg_sr_apply_whitelist(struct xe_hw_engine *hwe) -- cgit v1.2.3 From 12a66a47018aa2fbe60ea34a4de85a43c0799fb5 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 5 Sep 2023 18:20:52 -0700 Subject: drm/xe: Add dbg messages for LRC WAs Just like the GT and engine workarounds, add debug message with the final value being written to the register for easy debugging. Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230906012053.1733755-4-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 3d6a7c11bac1..9e226b8a005a 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -133,10 +133,14 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) ++count; if (count) { + xe_gt_dbg(gt, "LRC WA %s save-restore batch\n", sr->name); + bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM(count); xa_for_each(&sr->xa, reg, entry) { bb->cs[bb->len++] = reg; bb->cs[bb->len++] = entry->set_bits; + xe_gt_dbg(gt, "REG[0x%lx] = 0x%08x", reg, + entry->set_bits); } } -- cgit v1.2.3 From 278c35822d61ae53d3a1d162b29adda671b11e3b Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 5 Sep 2023 18:20:53 -0700 Subject: drm/xe: Fix LRC workarounds Fix 2 issues when writing LRC workarounds by copying the same handling done when processing other RTP entries: For masked registers, it was not correctly setting the upper 16bits. Differently than i915, the entry itself doesn't set the upper bits for masked registers: this is done when applying them. Testing on ADL-P: Before: [drm:xe_gt_record_default_lrcs [xe]] LRC WA rcs0 save-restore MMIOs [drm:xe_gt_record_default_lrcs [xe]] REG[0x2580] = 0x00000002 ... [drm:xe_gt_record_default_lrcs [xe]] REG[0x7018] = 0x00002000 [drm:xe_gt_record_default_lrcs [xe]] REG[0x7300] = 0x00000040 [drm:xe_gt_record_default_lrcs [xe]] REG[0x7304] = 0x00000200 After: [drm:xe_gt_record_default_lrcs [xe]] LRC WA rcs0 save-restore MMIOs [drm:xe_gt_record_default_lrcs [xe]] REG[0x2580] = 0x00060002 ... [drm:xe_gt_record_default_lrcs [xe]] REG[0x7018] = 0x20002000 [drm:xe_gt_record_default_lrcs [xe]] REG[0x7300] = 0x00400040 [drm:xe_gt_record_default_lrcs [xe]] REG[0x7304] = 0x02000200 All of these registers are masked registers, so writing to them without the relevant bits in the upper 16b doesn't have any effect. Also, this adds support to regular registers; previously it was assumed that LRC entries would only contain masked registers. However this is not true. 0x6604 is not a masked register, but used in workarounds for e.g. ADL-P. See commit 28cf243a341a ("drm/i915/gt: Fix context workarounds with non-masked regs"). In the same test with ADL-P as above: Before: [drm:xe_gt_record_default_lrcs [xe]] REG[0x6604] = 0xe0000000 After: [drm:xe_gt_record_default_lrcs [xe]] REG[0x6604] = 0xe0efef6f As can be seen, now it will read what was in the register rather than completely overwrite the other bits. Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230906012053.1733755-5-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt.c | 41 ++++++++++++++++++++++++++++++++++------- 1 file changed, 34 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 9e226b8a005a..678a276a25dc 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -114,11 +114,20 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q) return 0; } +/* + * Convert back from encoded value to type-safe, only to be used when reg.mcr + * is true + */ +static struct xe_reg_mcr to_xe_reg_mcr(const struct xe_reg reg) +{ + return (const struct xe_reg_mcr){.__reg.raw = reg.raw }; +} + static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) { struct xe_reg_sr *sr = &q->hwe->reg_lrc; struct xe_reg_sr_entry *entry; - unsigned long reg; + unsigned long idx; struct xe_sched_job *job; struct xe_bb *bb; struct dma_fence *fence; @@ -129,18 +138,36 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) if (IS_ERR(bb)) return PTR_ERR(bb); - xa_for_each(&sr->xa, reg, entry) + xa_for_each(&sr->xa, idx, entry) ++count; if (count) { xe_gt_dbg(gt, "LRC WA %s save-restore batch\n", sr->name); bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM(count); - xa_for_each(&sr->xa, reg, entry) { - bb->cs[bb->len++] = reg; - bb->cs[bb->len++] = entry->set_bits; - xe_gt_dbg(gt, "REG[0x%lx] = 0x%08x", reg, - entry->set_bits); + + xa_for_each(&sr->xa, idx, entry) { + struct xe_reg reg = entry->reg; + struct xe_reg_mcr reg_mcr = to_xe_reg_mcr(reg); + u32 val; + + /* + * Skip reading the register if it's not really needed + */ + if (reg.masked) + val = entry->clr_bits << 16; + else if (entry->clr_bits + 1) + val = (reg.mcr ? + xe_gt_mcr_unicast_read_any(gt, reg_mcr) : + xe_mmio_read32(gt, reg)) & (~entry->clr_bits); + else + val = 0; + + val |= entry->set_bits; + + bb->cs[bb->len++] = reg.addr; + bb->cs[bb->len++] = val; + xe_gt_dbg(gt, "REG[0x%x] = 0x%08x", reg.addr, val); } } -- cgit v1.2.3 From 2793fac1dbe068da5965acd9a78a181b33ad469b Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 30 Aug 2023 17:47:14 -0400 Subject: drm/xe/uapi: Typo lingo and other small backwards compatible fixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix typos, lingo and other small things identified during uapi review. v2: Also fix ALIGNMENT typo at xe_query.c v3: Do not touch property to get/set. (Francois) Link: https://lore.kernel.org/all/863bebd0c624d6fc2b38c0a06b63e468b4185128.camel@linux.intel.com/ Suggested-by: Thomas Hellström Cc: Thomas Hellström Signed-off-by: Rodrigo Vivi Reviewed-by: Thomas Hellström Reviewed-by: Francois Dugast --- drivers/gpu/drm/xe/xe_query.c | 2 +- include/uapi/drm/xe_drm.h | 19 ++++++++++--------- 2 files changed, 11 insertions(+), 10 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 1db77a7c9039..c3d396904c7b 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -195,7 +195,7 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query) if (xe_device_get_root_tile(xe)->mem.vram.usable_size) config->info[XE_QUERY_CONFIG_FLAGS] = XE_QUERY_CONFIG_FLAGS_HAS_VRAM; - config->info[XE_QUERY_CONFIG_MIN_ALIGNEMENT] = + config->info[XE_QUERY_CONFIG_MIN_ALIGNMENT] = xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K; config->info[XE_QUERY_CONFIG_VA_BITS] = xe->info.va_bits; config->info[XE_QUERY_CONFIG_GT_COUNT] = xe->info.gt_count; diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 86f16d50e9cc..902b5c4f3f5c 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -256,7 +256,7 @@ struct drm_xe_query_config { #define XE_QUERY_CONFIG_REV_AND_DEVICE_ID 0 #define XE_QUERY_CONFIG_FLAGS 1 #define XE_QUERY_CONFIG_FLAGS_HAS_VRAM (0x1 << 0) -#define XE_QUERY_CONFIG_MIN_ALIGNEMENT 2 +#define XE_QUERY_CONFIG_MIN_ALIGNMENT 2 #define XE_QUERY_CONFIG_VA_BITS 3 #define XE_QUERY_CONFIG_GT_COUNT 4 #define XE_QUERY_CONFIG_MEM_REGION_COUNT 5 @@ -449,7 +449,6 @@ struct drm_xe_gem_create { * If a VM is specified, this BO must: * * 1. Only ever be bound to that VM. - * * 2. Cannot be exported as a PRIME fd. */ __u32 vm_id; @@ -489,7 +488,7 @@ struct drm_xe_gem_mmap_offset { * struct drm_xe_vm_bind_op_error_capture - format of VM bind op error capture */ struct drm_xe_vm_bind_op_error_capture { - /** @error: errno that occured */ + /** @error: errno that occurred */ __s32 error; /** @op: operation that encounter an error */ @@ -609,7 +608,7 @@ struct drm_xe_vm_bind_op { * caused the error will be captured in drm_xe_vm_bind_op_error_capture. * Once the user sees the error (via a ufence + * XE_VM_PROPERTY_BIND_OP_ERROR_CAPTURE_ADDRESS), it should free memory - * via non-async unbinds, and then restart all queue'd async binds op via + * via non-async unbinds, and then restart all queued async binds op via * XE_VM_BIND_OP_RESTART. Or alternatively the user should destroy the * VM. * @@ -620,7 +619,7 @@ struct drm_xe_vm_bind_op { #define XE_VM_BIND_FLAG_ASYNC (0x1 << 17) /* * Valid on a faulting VM only, do the MAP operation immediately rather - * than differing the MAP to the page fault handler. + * than deferring the MAP to the page fault handler. */ #define XE_VM_BIND_FLAG_IMMEDIATE (0x1 << 18) /* @@ -907,7 +906,7 @@ struct drm_xe_mmio { /** * struct drm_xe_wait_user_fence - wait user fence * - * Wait on user fence, XE will wakeup on every HW engine interrupt in the + * Wait on user fence, XE will wake-up on every HW engine interrupt in the * instances list and check if user fence is complete:: * * (*addr & MASK) OP (VALUE & MASK) @@ -1039,9 +1038,11 @@ struct drm_xe_vm_madvise { */ #define DRM_XE_VM_MADVISE_PRIORITY 5 #define DRM_XE_VMA_PRIORITY_LOW 0 -#define DRM_XE_VMA_PRIORITY_NORMAL 1 /* Default */ -#define DRM_XE_VMA_PRIORITY_HIGH 2 /* Must be elevated user */ - /* Pin the VMA in memory, must be elevated user */ + /* Default */ +#define DRM_XE_VMA_PRIORITY_NORMAL 1 + /* Must be user with elevated privileges */ +#define DRM_XE_VMA_PRIORITY_HIGH 2 + /* Pin the VMA in memory, must be user with elevated privileges */ #define DRM_XE_VM_MADVISE_PIN 6 /** @property: property to set */ __u32 property; -- cgit v1.2.3 From 9e6fe003d8c7e35bcd93f0a962b8fdc8889db35b Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 30 Aug 2023 17:47:15 -0400 Subject: drm/xe/uapi: Remove useless max_page_size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The min_page_size is useful information to ensure alignment and it is an API actually in use. However max_page_size doesn't bring any useful information to the userspace hence being not used at all. So, let's remove and only bring it back if that ever gets used. Suggested-by: Thomas Hellström Cc: Thomas Hellström Signed-off-by: Rodrigo Vivi Reviewed-by: Francois Dugast --- drivers/gpu/drm/xe/xe_query.c | 3 --- include/uapi/drm/xe_drm.h | 4 ---- 2 files changed, 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index c3d396904c7b..a951205100fe 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -127,7 +127,6 @@ static int query_memory_usage(struct xe_device *xe, usage->regions[0].mem_class = XE_MEM_REGION_CLASS_SYSMEM; usage->regions[0].instance = 0; usage->regions[0].min_page_size = PAGE_SIZE; - usage->regions[0].max_page_size = PAGE_SIZE; usage->regions[0].total_size = man->size << PAGE_SHIFT; if (perfmon_capable()) usage->regions[0].used = ttm_resource_manager_usage(man); @@ -143,8 +142,6 @@ static int query_memory_usage(struct xe_device *xe, usage->regions[usage->num_regions].min_page_size = xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : PAGE_SIZE; - usage->regions[usage->num_regions].max_page_size = - SZ_1G; usage->regions[usage->num_regions].total_size = man->size; diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 902b5c4f3f5c..00d5cb4ef85e 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -174,10 +174,6 @@ struct drm_xe_query_mem_region { * kernel. */ __u32 min_page_size; - /** - * @max_page_size: Max page-size in bytes for this region. - */ - __u32 max_page_size; /** * @total_size: The usable size in bytes for this region. */ -- cgit v1.2.3 From 9fa81f914a1ce8ee7a5a0ce6f275a636a15bb109 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 8 Sep 2023 15:52:27 -0700 Subject: drm/xe/mmio: Account for GSI offset when checking ranges Change xe_mmio_in_range() to use the same logic to account for the GT's adj_offset as the read and write functions. This is needed when checking ranges for the MCR registers if the GT has an offset to adjust. Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230908225227.1276610-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_mcr.c | 4 ++-- drivers/gpu/drm/xe/xe_mmio.h | 6 +++++- 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index e74d3c5743c8..77925b35cf8d 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -448,7 +448,7 @@ static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt, continue; for (int i = 0; gt->steering[type].ranges[i].end > 0; i++) { - if (xe_mmio_in_range(>->steering[type].ranges[i], reg)) { + if (xe_mmio_in_range(gt, >->steering[type].ranges[i], reg)) { *group = gt->steering[type].group_target; *instance = gt->steering[type].instance_target; return true; @@ -459,7 +459,7 @@ static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt, implicit_ranges = gt->steering[IMPLICIT_STEERING].ranges; if (implicit_ranges) for (int i = 0; implicit_ranges[i].end > 0; i++) - if (xe_mmio_in_range(&implicit_ranges[i], reg)) + if (xe_mmio_in_range(gt, &implicit_ranges[i], reg)) return false; /* diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h index cd9fe08ccf4a..9e0fd4a6fb29 100644 --- a/drivers/gpu/drm/xe/xe_mmio.h +++ b/drivers/gpu/drm/xe/xe_mmio.h @@ -127,9 +127,13 @@ static inline int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, int xe_mmio_ioctl(struct drm_device *dev, void *data, struct drm_file *file); -static inline bool xe_mmio_in_range(const struct xe_mmio_range *range, +static inline bool xe_mmio_in_range(const struct xe_gt *gt, + const struct xe_mmio_range *range, struct xe_reg reg) { + if (reg.addr < gt->mmio.adj_limit) + reg.addr += gt->mmio.adj_offset; + return range && reg.addr >= range->start && reg.addr <= range->end; } -- cgit v1.2.3 From 08a4f00e62bc96eabf7d876933f84600a3dc5e69 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Fri, 8 Sep 2023 11:17:11 +0200 Subject: drm/xe/bo: Simplify xe_bo_lock() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit xe_bo_lock() was, although it only grabbed a single lock, unnecessarily using ttm_eu_reserve_buffers(). Simplify and document the interface. v2: - Update also the xe_display subsystem. v4: - Reinstate a lost dma_resv_reserve_fences(). - Improve on xe_bo_lock() documentation (Matthew Brost) Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20230908091716.36984-2-thomas.hellstrom@linux.intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_bo.c | 24 ++++++++++---------- drivers/gpu/drm/xe/xe_bo.c | 43 ++++++++++++++++++++++-------------- drivers/gpu/drm/xe/xe_bo.h | 5 ++--- drivers/gpu/drm/xe/xe_bo_evict.c | 19 +++++++--------- drivers/gpu/drm/xe/xe_gt_pagefault.c | 41 +++++++++++++--------------------- drivers/gpu/drm/xe/xe_vm.c | 27 +++++++++++----------- drivers/gpu/drm/xe/xe_vm_madvise.c | 30 ++++++++++--------------- 7 files changed, 89 insertions(+), 100 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index c448b00a569c..97788432a122 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -204,9 +204,9 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc goto cleanup_bo; } - xe_bo_lock(external, &ww, 0, false); + xe_bo_lock(external, false); err = xe_bo_pin_external(external); - xe_bo_unlock(external, &ww); + xe_bo_unlock(external); if (err) { KUNIT_FAIL(test, "external bo pin err=%pe\n", ERR_PTR(err)); @@ -272,9 +272,9 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc ERR_PTR(err)); goto cleanup_all; } - xe_bo_lock(external, &ww, 0, false); + xe_bo_lock(external, false); err = xe_bo_validate(external, NULL, false); - xe_bo_unlock(external, &ww); + xe_bo_unlock(external); if (err) { KUNIT_FAIL(test, "external bo valid err=%pe\n", ERR_PTR(err)); @@ -282,28 +282,28 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc } } - xe_bo_lock(external, &ww, 0, false); + xe_bo_lock(external, false); xe_bo_unpin_external(external); - xe_bo_unlock(external, &ww); + xe_bo_unlock(external); xe_bo_put(external); - xe_bo_lock(bo, &ww, 0, false); + xe_bo_lock(bo, false); __xe_bo_unset_bulk_move(bo); - xe_bo_unlock(bo, &ww); + xe_bo_unlock(bo); xe_bo_put(bo); continue; cleanup_all: - xe_bo_lock(external, &ww, 0, false); + xe_bo_lock(external, false); xe_bo_unpin_external(external); - xe_bo_unlock(external, &ww); + xe_bo_unlock(external); cleanup_external: xe_bo_put(external); cleanup_bo: - xe_bo_lock(bo, &ww, 0, false); + xe_bo_lock(bo, false); __xe_bo_unset_bulk_move(bo); - xe_bo_unlock(bo, &ww); + xe_bo_unlock(bo); xe_bo_put(bo); break; } diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 3cfd3f37c81e..ee8e3c940cf4 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1082,13 +1082,11 @@ static void xe_gem_object_close(struct drm_gem_object *obj, struct xe_bo *bo = gem_to_xe_bo(obj); if (bo->vm && !xe_vm_in_fault_mode(bo->vm)) { - struct ww_acquire_ctx ww; - XE_WARN_ON(!xe_bo_is_user(bo)); - xe_bo_lock(bo, &ww, 0, false); + xe_bo_lock(bo, false); ttm_bo_set_bulk_move(&bo->ttm, NULL); - xe_bo_unlock(bo, &ww); + xe_bo_unlock(bo); } } @@ -1873,26 +1871,37 @@ int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data, return 0; } -int xe_bo_lock(struct xe_bo *bo, struct ww_acquire_ctx *ww, - int num_resv, bool intr) +/** + * xe_bo_lock() - Lock the buffer object's dma_resv object + * @bo: The struct xe_bo whose lock is to be taken + * @intr: Whether to perform any wait interruptible + * + * Locks the buffer object's dma_resv object. If the buffer object is + * pointing to a shared dma_resv object, that shared lock is locked. + * + * Return: 0 on success, -EINTR if @intr is true and the wait for a + * contended lock was interrupted. If @intr is set to false, the + * function always returns 0. + */ +int xe_bo_lock(struct xe_bo *bo, bool intr) { - struct ttm_validate_buffer tv_bo; - LIST_HEAD(objs); - LIST_HEAD(dups); + if (intr) + return dma_resv_lock_interruptible(bo->ttm.base.resv, NULL); - XE_WARN_ON(!ww); + dma_resv_lock(bo->ttm.base.resv, NULL); - tv_bo.num_shared = num_resv; - tv_bo.bo = &bo->ttm; - list_add_tail(&tv_bo.head, &objs); - - return ttm_eu_reserve_buffers(ww, &objs, intr, &dups); + return 0; } -void xe_bo_unlock(struct xe_bo *bo, struct ww_acquire_ctx *ww) +/** + * xe_bo_unlock() - Unlock the buffer object's dma_resv object + * @bo: The struct xe_bo whose lock is to be released. + * + * Unlock a buffer object lock that was locked by xe_bo_lock(). + */ +void xe_bo_unlock(struct xe_bo *bo) { dma_resv_unlock(bo->ttm.base.resv); - ww_acquire_fini(ww); } /** diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 76b8c836deb7..c06dafcc93ec 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -158,10 +158,9 @@ static inline void xe_bo_assert_held(struct xe_bo *bo) dma_resv_assert_held((bo)->ttm.base.resv); } -int xe_bo_lock(struct xe_bo *bo, struct ww_acquire_ctx *ww, - int num_resv, bool intr); +int xe_bo_lock(struct xe_bo *bo, bool intr); -void xe_bo_unlock(struct xe_bo *bo, struct ww_acquire_ctx *ww); +void xe_bo_unlock(struct xe_bo *bo); static inline void xe_bo_unlock_vm_held(struct xe_bo *bo) { diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c index 0d5c3a208ab4..49c05ddea164 100644 --- a/drivers/gpu/drm/xe/xe_bo_evict.c +++ b/drivers/gpu/drm/xe/xe_bo_evict.c @@ -27,7 +27,6 @@ int xe_bo_evict_all(struct xe_device *xe) { struct ttm_device *bdev = &xe->ttm; - struct ww_acquire_ctx ww; struct xe_bo *bo; struct xe_tile *tile; struct list_head still_in_list; @@ -62,9 +61,9 @@ int xe_bo_evict_all(struct xe_device *xe) list_move_tail(&bo->pinned_link, &still_in_list); spin_unlock(&xe->pinned.lock); - xe_bo_lock(bo, &ww, 0, false); + xe_bo_lock(bo, false); ret = xe_bo_evict_pinned(bo); - xe_bo_unlock(bo, &ww); + xe_bo_unlock(bo); xe_bo_put(bo); if (ret) { spin_lock(&xe->pinned.lock); @@ -96,9 +95,9 @@ int xe_bo_evict_all(struct xe_device *xe) list_move_tail(&bo->pinned_link, &xe->pinned.evicted); spin_unlock(&xe->pinned.lock); - xe_bo_lock(bo, &ww, 0, false); + xe_bo_lock(bo, false); ret = xe_bo_evict_pinned(bo); - xe_bo_unlock(bo, &ww); + xe_bo_unlock(bo); xe_bo_put(bo); if (ret) return ret; @@ -123,7 +122,6 @@ int xe_bo_evict_all(struct xe_device *xe) */ int xe_bo_restore_kernel(struct xe_device *xe) { - struct ww_acquire_ctx ww; struct xe_bo *bo; int ret; @@ -140,9 +138,9 @@ int xe_bo_restore_kernel(struct xe_device *xe) list_move_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present); spin_unlock(&xe->pinned.lock); - xe_bo_lock(bo, &ww, 0, false); + xe_bo_lock(bo, false); ret = xe_bo_restore_pinned(bo); - xe_bo_unlock(bo, &ww); + xe_bo_unlock(bo); if (ret) { xe_bo_put(bo); return ret; @@ -184,7 +182,6 @@ int xe_bo_restore_kernel(struct xe_device *xe) */ int xe_bo_restore_user(struct xe_device *xe) { - struct ww_acquire_ctx ww; struct xe_bo *bo; struct xe_tile *tile; struct list_head still_in_list; @@ -206,9 +203,9 @@ int xe_bo_restore_user(struct xe_device *xe) xe_bo_get(bo); spin_unlock(&xe->pinned.lock); - xe_bo_lock(bo, &ww, 0, false); + xe_bo_lock(bo, false); ret = xe_bo_restore_pinned(bo); - xe_bo_unlock(bo, &ww); + xe_bo_unlock(bo); xe_bo_put(bo); if (ret) { spin_lock(&xe->pinned.lock); diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 21e0e9c7b634..bdef4b76028b 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -171,20 +171,18 @@ retry_userptr: /* Lock VM and BOs dma-resv */ bo = xe_vma_bo(vma); - if (only_needs_bo_lock(bo)) { - /* This path ensures the BO's LRU is updated */ - ret = xe_bo_lock(bo, &ww, xe->info.tile_count, false); - } else { + if (!only_needs_bo_lock(bo)) { tv_vm.num_shared = xe->info.tile_count; tv_vm.bo = xe_vm_ttm_bo(vm); list_add(&tv_vm.head, &objs); - if (bo) { - tv_bo.bo = &bo->ttm; - tv_bo.num_shared = xe->info.tile_count; - list_add(&tv_bo.head, &objs); - } - ret = ttm_eu_reserve_buffers(&ww, &objs, false, &dups); } + if (bo) { + tv_bo.bo = &bo->ttm; + tv_bo.num_shared = xe->info.tile_count; + list_add(&tv_bo.head, &objs); + } + + ret = ttm_eu_reserve_buffers(&ww, &objs, false, &dups); if (ret) goto unlock_vm; @@ -227,10 +225,7 @@ retry_userptr: vma->usm.tile_invalidated &= ~BIT(tile->id); unlock_dma_resv: - if (only_needs_bo_lock(bo)) - xe_bo_unlock(bo, &ww); - else - ttm_eu_backoff_reservation(&ww, &objs); + ttm_eu_backoff_reservation(&ww, &objs); unlock_vm: if (!ret) vm->usm.last_fault_vma = vma; @@ -534,28 +529,22 @@ static int handle_acc(struct xe_gt *gt, struct acc *acc) /* Lock VM and BOs dma-resv */ bo = xe_vma_bo(vma); - if (only_needs_bo_lock(bo)) { - /* This path ensures the BO's LRU is updated */ - ret = xe_bo_lock(bo, &ww, xe->info.tile_count, false); - } else { + if (!only_needs_bo_lock(bo)) { tv_vm.num_shared = xe->info.tile_count; tv_vm.bo = xe_vm_ttm_bo(vm); list_add(&tv_vm.head, &objs); - tv_bo.bo = &bo->ttm; - tv_bo.num_shared = xe->info.tile_count; - list_add(&tv_bo.head, &objs); - ret = ttm_eu_reserve_buffers(&ww, &objs, false, &dups); } + tv_bo.bo = &bo->ttm; + tv_bo.num_shared = xe->info.tile_count; + list_add(&tv_bo.head, &objs); + ret = ttm_eu_reserve_buffers(&ww, &objs, false, &dups); if (ret) goto unlock_vm; /* Migrate to VRAM, move should invalidate the VMA first */ ret = xe_bo_migrate(bo, XE_PL_VRAM0 + tile->id); - if (only_needs_bo_lock(bo)) - xe_bo_unlock(bo, &ww); - else - ttm_eu_backoff_reservation(&ww, &objs); + ttm_eu_backoff_reservation(&ww, &objs); unlock_vm: up_read(&vm->lock); xe_vm_put(vm); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 24ee74f62385..2bd1fa34256a 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -267,13 +267,16 @@ static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list) static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo) { struct xe_exec_queue *q; - struct ww_acquire_ctx ww; int err; - err = xe_bo_lock(bo, &ww, vm->preempt.num_exec_queues, true); + err = xe_bo_lock(bo, true); if (err) return err; + err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues); + if (err) + goto out_unlock; + list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) if (q->compute.pfence) { dma_resv_add_fence(bo->ttm.base.resv, @@ -281,8 +284,9 @@ static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo) DMA_RESV_USAGE_BOOKKEEP); } - xe_bo_unlock(bo, &ww); - return 0; +out_unlock: + xe_bo_unlock(bo); + return err; } /** @@ -1033,12 +1037,11 @@ bo_has_vm_references_locked(struct xe_bo *bo, struct xe_vm *vm, static bool bo_has_vm_references(struct xe_bo *bo, struct xe_vm *vm, struct xe_vma *ignore) { - struct ww_acquire_ctx ww; bool ret; - xe_bo_lock(bo, &ww, 0, false); + xe_bo_lock(bo, false); ret = !!bo_has_vm_references_locked(bo, vm, ignore); - xe_bo_unlock(bo, &ww); + xe_bo_unlock(bo); return ret; } @@ -2264,7 +2267,6 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, u32 operation, u8 tile_mask, u32 region) { struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL; - struct ww_acquire_ctx ww; struct drm_gpuva_ops *ops; struct drm_gpuva_op *__op; struct xe_vma_op *op; @@ -2323,7 +2325,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, case XE_VM_BIND_OP_UNMAP_ALL: XE_WARN_ON(!bo); - err = xe_bo_lock(bo, &ww, 0, true); + err = xe_bo_lock(bo, true); if (err) return ERR_PTR(err); @@ -2333,7 +2335,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, ops = drm_gpuvm_bo_unmap_ops_create(vm_bo); drm_gpuvm_bo_put(vm_bo); - xe_bo_unlock(bo, &ww); + xe_bo_unlock(bo); if (IS_ERR(ops)) return ops; @@ -2369,13 +2371,12 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, { struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL; struct xe_vma *vma; - struct ww_acquire_ctx ww; int err; lockdep_assert_held_write(&vm->lock); if (bo) { - err = xe_bo_lock(bo, &ww, 0, true); + err = xe_bo_lock(bo, true); if (err) return ERR_PTR(err); } @@ -2384,7 +2385,7 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, op->va.range - 1, read_only, is_null, tile_mask); if (bo) - xe_bo_unlock(bo, &ww); + xe_bo_unlock(bo); if (xe_vma_is_userptr(vma)) { err = xe_vma_userptr_pin_pages(vma); diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c index 9abcd742c833..0648274b90b9 100644 --- a/drivers/gpu/drm/xe/xe_vm_madvise.c +++ b/drivers/gpu/drm/xe/xe_vm_madvise.c @@ -28,16 +28,15 @@ static int madvise_preferred_mem_class(struct xe_device *xe, struct xe_vm *vm, for (i = 0; i < num_vmas; ++i) { struct xe_bo *bo; - struct ww_acquire_ctx ww; bo = xe_vma_bo(vmas[i]); - err = xe_bo_lock(bo, &ww, 0, true); + err = xe_bo_lock(bo, true); if (err) return err; bo->props.preferred_mem_class = value; xe_bo_placement_for_flags(xe, bo, bo->flags); - xe_bo_unlock(bo, &ww); + xe_bo_unlock(bo); } return 0; @@ -53,16 +52,15 @@ static int madvise_preferred_gt(struct xe_device *xe, struct xe_vm *vm, for (i = 0; i < num_vmas; ++i) { struct xe_bo *bo; - struct ww_acquire_ctx ww; bo = xe_vma_bo(vmas[i]); - err = xe_bo_lock(bo, &ww, 0, true); + err = xe_bo_lock(bo, true); if (err) return err; bo->props.preferred_gt = value; xe_bo_placement_for_flags(xe, bo, bo->flags); - xe_bo_unlock(bo, &ww); + xe_bo_unlock(bo); } return 0; @@ -89,17 +87,16 @@ static int madvise_preferred_mem_class_gt(struct xe_device *xe, for (i = 0; i < num_vmas; ++i) { struct xe_bo *bo; - struct ww_acquire_ctx ww; bo = xe_vma_bo(vmas[i]); - err = xe_bo_lock(bo, &ww, 0, true); + err = xe_bo_lock(bo, true); if (err) return err; bo->props.preferred_mem_class = mem_class; bo->props.preferred_gt = gt_id; xe_bo_placement_for_flags(xe, bo, bo->flags); - xe_bo_unlock(bo, &ww); + xe_bo_unlock(bo); } return 0; @@ -112,13 +109,12 @@ static int madvise_cpu_atomic(struct xe_device *xe, struct xe_vm *vm, for (i = 0; i < num_vmas; ++i) { struct xe_bo *bo; - struct ww_acquire_ctx ww; bo = xe_vma_bo(vmas[i]); if (XE_IOCTL_DBG(xe, !(bo->flags & XE_BO_CREATE_SYSTEM_BIT))) return -EINVAL; - err = xe_bo_lock(bo, &ww, 0, true); + err = xe_bo_lock(bo, true); if (err) return err; bo->props.cpu_atomic = !!value; @@ -130,7 +126,7 @@ static int madvise_cpu_atomic(struct xe_device *xe, struct xe_vm *vm, */ if (bo->props.cpu_atomic) ttm_bo_unmap_virtual(&bo->ttm); - xe_bo_unlock(bo, &ww); + xe_bo_unlock(bo); } return 0; @@ -143,18 +139,17 @@ static int madvise_device_atomic(struct xe_device *xe, struct xe_vm *vm, for (i = 0; i < num_vmas; ++i) { struct xe_bo *bo; - struct ww_acquire_ctx ww; bo = xe_vma_bo(vmas[i]); if (XE_IOCTL_DBG(xe, !(bo->flags & XE_BO_CREATE_VRAM0_BIT) && !(bo->flags & XE_BO_CREATE_VRAM1_BIT))) return -EINVAL; - err = xe_bo_lock(bo, &ww, 0, true); + err = xe_bo_lock(bo, true); if (err) return err; bo->props.device_atomic = !!value; - xe_bo_unlock(bo, &ww); + xe_bo_unlock(bo); } return 0; @@ -174,16 +169,15 @@ static int madvise_priority(struct xe_device *xe, struct xe_vm *vm, for (i = 0; i < num_vmas; ++i) { struct xe_bo *bo; - struct ww_acquire_ctx ww; bo = xe_vma_bo(vmas[i]); - err = xe_bo_lock(bo, &ww, 0, true); + err = xe_bo_lock(bo, true); if (err) return err; bo->ttm.priority = value; ttm_bo_move_to_lru_tail(&bo->ttm); - xe_bo_unlock(bo, &ww); + xe_bo_unlock(bo); } return 0; -- cgit v1.2.3 From d00e9cc28e1e42108618e7a146969a26679170a2 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Fri, 8 Sep 2023 11:17:12 +0200 Subject: drm/xe/vm: Simplify and document xe_vm_lock() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The xe_vm_lock() function was unnecessarily using ttm_eu_reserve_buffers(). Simplify and document the interface. v4: - Improve on xe_vm_lock() documentation (Matthew Brost) v5: - Rebase conflict. Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20230908091716.36984-3-thomas.hellstrom@linux.intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_bo.c | 9 +++---- drivers/gpu/drm/xe/tests/xe_migrate.c | 5 ++-- drivers/gpu/drm/xe/xe_bo.c | 5 ++-- drivers/gpu/drm/xe/xe_exec_queue.c | 5 ++-- drivers/gpu/drm/xe/xe_lrc.c | 6 ++--- drivers/gpu/drm/xe/xe_migrate.c | 10 +++----- drivers/gpu/drm/xe/xe_vm.c | 46 +++++++++++++++++------------------ drivers/gpu/drm/xe/xe_vm.h | 5 ++-- 8 files changed, 41 insertions(+), 50 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index 97788432a122..ad6dd6fae853 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -180,7 +180,6 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc unsigned int bo_flags = XE_BO_CREATE_USER_BIT | XE_BO_CREATE_VRAM_IF_DGFX(tile); struct xe_vm *vm = xe_migrate_get_vm(xe_device_get_root_tile(xe)->migrate); - struct ww_acquire_ctx ww; struct xe_gt *__gt; int err, i, id; @@ -188,10 +187,10 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc dev_name(xe->drm.dev), tile->id); for (i = 0; i < 2; ++i) { - xe_vm_lock(vm, &ww, 0, false); + xe_vm_lock(vm, false); bo = xe_bo_create(xe, NULL, vm, 0x10000, ttm_bo_type_device, bo_flags); - xe_vm_unlock(vm, &ww); + xe_vm_unlock(vm); if (IS_ERR(bo)) { KUNIT_FAIL(test, "bo create err=%pe\n", bo); break; @@ -263,9 +262,9 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc if (i) { down_read(&vm->lock); - xe_vm_lock(vm, &ww, 0, false); + xe_vm_lock(vm, false); err = xe_bo_validate(bo, bo->vm, false); - xe_vm_unlock(vm, &ww); + xe_vm_unlock(vm); up_read(&vm->lock); if (err) { KUNIT_FAIL(test, "bo valid err=%pe\n", diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index 5c8d5e78d9bc..8bb081086ca2 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -396,14 +396,13 @@ static int migrate_test_run_device(struct xe_device *xe) for_each_tile(tile, xe, id) { struct xe_migrate *m = tile->migrate; - struct ww_acquire_ctx ww; kunit_info(test, "Testing tile id %d.\n", id); - xe_vm_lock(m->q->vm, &ww, 0, true); + xe_vm_lock(m->q->vm, true); xe_device_mem_access_get(xe); xe_migrate_sanity_test(m, test); xe_device_mem_access_put(xe); - xe_vm_unlock(m->q->vm, &ww); + xe_vm_unlock(m->q->vm); } return 0; diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index ee8e3c940cf4..c33a20420022 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1759,7 +1759,6 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, struct xe_device *xe = to_xe_device(dev); struct xe_file *xef = to_xe_file(file); struct drm_xe_gem_create *args = data; - struct ww_acquire_ctx ww; struct xe_vm *vm = NULL; struct xe_bo *bo; unsigned int bo_flags = XE_BO_CREATE_USER_BIT; @@ -1812,7 +1811,7 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, vm = xe_vm_lookup(xef, args->vm_id); if (XE_IOCTL_DBG(xe, !vm)) return -ENOENT; - err = xe_vm_lock(vm, &ww, 0, true); + err = xe_vm_lock(vm, true); if (err) { xe_vm_put(vm); return err; @@ -1840,7 +1839,7 @@ out_put: xe_bo_put(bo); out_vm: if (vm) { - xe_vm_unlock(vm, &ww); + xe_vm_unlock(vm); xe_vm_put(vm); } return err; diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index f28bceceb99a..a0b5647923ac 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -111,18 +111,17 @@ struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *v u32 logical_mask, u16 width, struct xe_hw_engine *hwe, u32 flags) { - struct ww_acquire_ctx ww; struct xe_exec_queue *q; int err; if (vm) { - err = xe_vm_lock(vm, &ww, 0, true); + err = xe_vm_lock(vm, true); if (err) return ERR_PTR(err); } q = __xe_exec_queue_create(xe, vm, logical_mask, width, hwe, flags); if (vm) - xe_vm_unlock(vm, &ww); + xe_vm_unlock(vm); return q; } diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 2b4219c38359..434fbb364b4b 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -789,16 +789,14 @@ err_lrc_finish: void xe_lrc_finish(struct xe_lrc *lrc) { - struct ww_acquire_ctx ww; - xe_hw_fence_ctx_finish(&lrc->fence_ctx); if (lrc->bo->vm) - xe_vm_lock(lrc->bo->vm, &ww, 0, false); + xe_vm_lock(lrc->bo->vm, false); else xe_bo_lock_no_vm(lrc->bo, NULL); xe_bo_unpin(lrc->bo); if (lrc->bo->vm) - xe_vm_unlock(lrc->bo->vm, &ww); + xe_vm_unlock(lrc->bo->vm); else xe_bo_unlock_no_vm(lrc->bo); xe_bo_put(lrc->bo); diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 799ad0209279..8291798e1aaa 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -88,13 +88,12 @@ struct xe_exec_queue *xe_tile_migrate_engine(struct xe_tile *tile) static void xe_migrate_fini(struct drm_device *dev, void *arg) { struct xe_migrate *m = arg; - struct ww_acquire_ctx ww; - xe_vm_lock(m->q->vm, &ww, 0, false); + xe_vm_lock(m->q->vm, false); xe_bo_unpin(m->pt_bo); if (m->cleared_bo) xe_bo_unpin(m->cleared_bo); - xe_vm_unlock(m->q->vm, &ww); + xe_vm_unlock(m->q->vm); dma_fence_put(m->fence); if (m->cleared_bo) @@ -338,7 +337,6 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) struct xe_gt *primary_gt = tile->primary_gt; struct xe_migrate *m; struct xe_vm *vm; - struct ww_acquire_ctx ww; int err; m = drmm_kzalloc(&xe->drm, sizeof(*m), GFP_KERNEL); @@ -353,9 +351,9 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) if (IS_ERR(vm)) return ERR_CAST(vm); - xe_vm_lock(vm, &ww, 0, false); + xe_vm_lock(vm, false); err = xe_migrate_prepare_vm(tile, m, vm); - xe_vm_unlock(vm, &ww); + xe_vm_unlock(vm); if (err) { xe_vm_close_and_put(vm); return ERR_PTR(err); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 2bd1fa34256a..0ac421c4e184 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -523,18 +523,17 @@ void xe_vm_unlock_dma_resv(struct xe_vm *vm, static void xe_vm_kill(struct xe_vm *vm) { - struct ww_acquire_ctx ww; struct xe_exec_queue *q; lockdep_assert_held(&vm->lock); - xe_vm_lock(vm, &ww, 0, false); + xe_vm_lock(vm, false); vm->flags |= XE_VM_FLAG_BANNED; trace_xe_vm_kill(vm); list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) q->ops->kill(q); - xe_vm_unlock(vm, &ww); + xe_vm_unlock(vm); /* TODO: Inform user the VM is banned */ } @@ -1420,7 +1419,6 @@ static void xe_vm_close(struct xe_vm *vm) void xe_vm_close_and_put(struct xe_vm *vm) { LIST_HEAD(contested); - struct ww_acquire_ctx ww; struct xe_device *xe = vm->xe; struct xe_tile *tile; struct xe_vma *vma, *next_vma; @@ -1443,7 +1441,7 @@ void xe_vm_close_and_put(struct xe_vm *vm) } down_write(&vm->lock); - xe_vm_lock(vm, &ww, 0, false); + xe_vm_lock(vm, false); drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) { vma = gpuva_to_vma(gpuva); @@ -1488,7 +1486,7 @@ void xe_vm_close_and_put(struct xe_vm *vm) vm->pt_root[id] = NULL; } } - xe_vm_unlock(vm, &ww); + xe_vm_unlock(vm); /* * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL @@ -3442,30 +3440,32 @@ free_objs: return err == -ENODATA ? 0 : err; } -/* - * XXX: Using the TTM wrappers for now, likely can call into dma-resv code - * directly to optimize. Also this likely should be an inline function. +/** + * xe_vm_lock() - Lock the vm's dma_resv object + * @vm: The struct xe_vm whose lock is to be locked + * @intr: Whether to perform any wait interruptible + * + * Return: 0 on success, -EINTR if @intr is true and the wait for a + * contended lock was interrupted. If @intr is false, the function + * always returns 0. */ -int xe_vm_lock(struct xe_vm *vm, struct ww_acquire_ctx *ww, - int num_resv, bool intr) +int xe_vm_lock(struct xe_vm *vm, bool intr) { - struct ttm_validate_buffer tv_vm; - LIST_HEAD(objs); - LIST_HEAD(dups); + if (intr) + return dma_resv_lock_interruptible(xe_vm_resv(vm), NULL); - XE_WARN_ON(!ww); - - tv_vm.num_shared = num_resv; - tv_vm.bo = xe_vm_ttm_bo(vm); - list_add_tail(&tv_vm.head, &objs); - - return ttm_eu_reserve_buffers(ww, &objs, intr, &dups); + return dma_resv_lock(xe_vm_resv(vm), NULL); } -void xe_vm_unlock(struct xe_vm *vm, struct ww_acquire_ctx *ww) +/** + * xe_vm_unlock() - Unlock the vm's dma_resv object + * @vm: The struct xe_vm whose lock is to be released. + * + * Unlock a buffer object lock that was locked by xe_vm_lock(). + */ +void xe_vm_unlock(struct xe_vm *vm) { dma_resv_unlock(xe_vm_resv(vm)); - ww_acquire_fini(ww); } /** diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 805236578140..dd20e5c8106f 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -38,10 +38,9 @@ static inline void xe_vm_put(struct xe_vm *vm) drm_gpuvm_put(&vm->gpuvm); } -int xe_vm_lock(struct xe_vm *vm, struct ww_acquire_ctx *ww, - int num_resv, bool intr); +int xe_vm_lock(struct xe_vm *vm, bool intr); -void xe_vm_unlock(struct xe_vm *vm, struct ww_acquire_ctx *ww); +void xe_vm_unlock(struct xe_vm *vm); static inline bool xe_vm_is_closed(struct xe_vm *vm) { -- cgit v1.2.3 From b7ab8c4f028f87b8c79c9f99e12b891fd5430483 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Fri, 8 Sep 2023 11:17:13 +0200 Subject: drm/xe/bo: Remove the lock_no_vm()/unlock_no_vm() interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apart from asserts, it's essentially the same as xe_bo_lock()/xe_bo_unlock(), and the usage intentions of this interface was unclear. Remove it. v2: - Update the xe_display subsystem as well. Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20230908091716.36984-4-thomas.hellstrom@linux.intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_bo.c | 2 +- drivers/gpu/drm/xe/tests/xe_dma_buf.c | 4 ++-- drivers/gpu/drm/xe/tests/xe_migrate.c | 2 +- drivers/gpu/drm/xe/xe_bo.h | 23 ++--------------------- drivers/gpu/drm/xe/xe_dma_buf.c | 5 +++-- drivers/gpu/drm/xe/xe_lrc.c | 10 ++-------- 6 files changed, 11 insertions(+), 35 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index ad6dd6fae853..2c04357377ab 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -143,7 +143,7 @@ static void ccs_test_run_gt(struct xe_device *xe, struct xe_gt *gt, ret = ccs_test_migrate(gt, bo, true, 0ULL, 0ULL, test); out_unlock: - xe_bo_unlock_no_vm(bo); + xe_bo_unlock(bo); xe_bo_put(bo); } diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c index 810a035bf720..1c4d8751be69 100644 --- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c @@ -147,14 +147,14 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe) int err; /* Is everything where we expect it to be? */ - xe_bo_lock_no_vm(import_bo, NULL); + xe_bo_lock(import_bo, false); err = xe_bo_validate(import_bo, NULL, false); if (err && err != -EINTR && err != -ERESTARTSYS) KUNIT_FAIL(test, "xe_bo_validate() failed with err=%d\n", err); check_residency(test, bo, import_bo, dmabuf); - xe_bo_unlock_no_vm(import_bo); + xe_bo_unlock(import_bo); } drm_gem_object_put(import); } else if (PTR_ERR(import) != -EOPNOTSUPP) { diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index 8bb081086ca2..f58cd1da1a34 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -183,7 +183,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, xe_bo_vunmap(sysmem); out_unlock: - xe_bo_unlock_no_vm(sysmem); + xe_bo_unlock(sysmem); xe_bo_put(sysmem); } diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index c06dafcc93ec..d22b2ae7db72 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -173,25 +173,6 @@ static inline void xe_bo_unlock_vm_held(struct xe_bo *bo) } } -static inline void xe_bo_lock_no_vm(struct xe_bo *bo, - struct ww_acquire_ctx *ctx) -{ - if (bo) { - XE_WARN_ON(bo->vm || (bo->ttm.type != ttm_bo_type_sg && - bo->ttm.base.resv != &bo->ttm.base._resv)); - dma_resv_lock(bo->ttm.base.resv, ctx); - } -} - -static inline void xe_bo_unlock_no_vm(struct xe_bo *bo) -{ - if (bo) { - XE_WARN_ON(bo->vm || (bo->ttm.type != ttm_bo_type_sg && - bo->ttm.base.resv != &bo->ttm.base._resv)); - dma_resv_unlock(bo->ttm.base.resv); - } -} - int xe_bo_pin_external(struct xe_bo *bo); int xe_bo_pin(struct xe_bo *bo); void xe_bo_unpin_external(struct xe_bo *bo); @@ -206,9 +187,9 @@ static inline bool xe_bo_is_pinned(struct xe_bo *bo) static inline void xe_bo_unpin_map_no_vm(struct xe_bo *bo) { if (likely(bo)) { - xe_bo_lock_no_vm(bo, NULL); + xe_bo_lock(bo, false); xe_bo_unpin(bo); - xe_bo_unlock_no_vm(bo); + xe_bo_unlock(bo); xe_bo_put(bo); } diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c index b9bf4b4dd8a5..8ce1b582402a 100644 --- a/drivers/gpu/drm/xe/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/xe_dma_buf.c @@ -150,9 +150,10 @@ static int xe_dma_buf_begin_cpu_access(struct dma_buf *dma_buf, if (!reads) return 0; - xe_bo_lock_no_vm(bo, NULL); + /* Can we do interruptible lock here? */ + xe_bo_lock(bo, false); (void)xe_bo_migrate(bo, XE_PL_TT); - xe_bo_unlock_no_vm(bo); + xe_bo_unlock(bo); return 0; } diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 434fbb364b4b..6f899b6a4877 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -790,15 +790,9 @@ err_lrc_finish: void xe_lrc_finish(struct xe_lrc *lrc) { xe_hw_fence_ctx_finish(&lrc->fence_ctx); - if (lrc->bo->vm) - xe_vm_lock(lrc->bo->vm, false); - else - xe_bo_lock_no_vm(lrc->bo, NULL); + xe_bo_lock(lrc->bo, false); xe_bo_unpin(lrc->bo); - if (lrc->bo->vm) - xe_vm_unlock(lrc->bo->vm); - else - xe_bo_unlock_no_vm(lrc->bo); + xe_bo_unlock(lrc->bo); xe_bo_put(lrc->bo); } -- cgit v1.2.3 From d490ecf577903ce5a9e6a3bb3bd08b5a550719c7 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Fri, 8 Sep 2023 11:17:14 +0200 Subject: drm/xe: Rework xe_exec and the VM rebind worker to use the drm_exec helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the calls to ttm_eu_reserve_buffers() by using the drm_exec helper instead. Also make sure the locking loop covers any calls to xe_bo_validate() / ttm_bo_validate() so that these function calls may easily benefit from being called from within an unsealed locking transaction and may thus perform blocking dma_resv locks in the future. For the unlock we remove an assert that the vm->rebind_list is empty when locks are released. Since if the error path is hit with a partly locked list, that assert may no longer hold true we chose to remove it. v3: - Don't accept duplicate bo locks in the rebind worker. v5: - Loop over drm_exec objects in reverse when unlocking. v6: - We can't keep the WW ticket when retrying validation on OOM. Fix. Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20230908091716.36984-5-thomas.hellstrom@linux.intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Kconfig | 2 + drivers/gpu/drm/xe/xe_exec.c | 77 ++++-------- drivers/gpu/drm/xe/xe_vm.c | 271 ++++++++++++++++++++----------------------- drivers/gpu/drm/xe/xe_vm.h | 22 +--- 4 files changed, 153 insertions(+), 219 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index 6742ed4feecd..7bffc039d63f 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -8,6 +8,7 @@ config DRM_XE select SHMEM select TMPFS select DRM_BUDDY + select DRM_EXEC select DRM_KMS_HELPER select DRM_PANEL select DRM_SUBALLOC_HELPER @@ -21,6 +22,7 @@ config DRM_XE select VMAP_PFN select DRM_TTM select DRM_TTM_HELPER + select DRM_EXEC select DRM_GPUVM select DRM_SCHED select MMU_NOTIFIER diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 629d81a789e7..eb7fc3192c22 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -6,6 +6,7 @@ #include "xe_exec.h" #include +#include #include #include #include @@ -93,25 +94,16 @@ * Unlock all */ -#define XE_EXEC_BIND_RETRY_TIMEOUT_MS 1000 - -static int xe_exec_begin(struct xe_exec_queue *q, struct ww_acquire_ctx *ww, - struct ttm_validate_buffer tv_onstack[], - struct ttm_validate_buffer **tv, - struct list_head *objs) +static int xe_exec_begin(struct drm_exec *exec, struct xe_vm *vm) { - struct xe_vm *vm = q->vm; struct xe_vma *vma; LIST_HEAD(dups); - ktime_t end = 0; int err = 0; - *tv = NULL; - if (xe_vm_no_dma_fences(q->vm)) + if (xe_vm_no_dma_fences(vm)) return 0; -retry: - err = xe_vm_lock_dma_resv(vm, ww, tv_onstack, tv, objs, true, 1); + err = xe_vm_lock_dma_resv(vm, exec, 1, true); if (err) return err; @@ -127,42 +119,13 @@ retry: continue; err = xe_bo_validate(xe_vma_bo(vma), vm, false); - if (err) { - xe_vm_unlock_dma_resv(vm, tv_onstack, *tv, ww, objs); - *tv = NULL; + if (err) break; - } - } - - /* - * With multiple active VMs, under memory pressure, it is possible that - * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM. - * Until ttm properly handles locking in such scenarios, best thing the - * driver can do is retry with a timeout. - */ - if (err == -ENOMEM) { - ktime_t cur = ktime_get(); - - end = end ? : ktime_add_ms(cur, XE_EXEC_BIND_RETRY_TIMEOUT_MS); - if (ktime_before(cur, end)) { - msleep(20); - goto retry; - } } return err; } -static void xe_exec_end(struct xe_exec_queue *q, - struct ttm_validate_buffer *tv_onstack, - struct ttm_validate_buffer *tv, - struct ww_acquire_ctx *ww, - struct list_head *objs) -{ - if (!xe_vm_no_dma_fences(q->vm)) - xe_vm_unlock_dma_resv(q->vm, tv_onstack, tv, ww, objs); -} - int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct xe_device *xe = to_xe_device(dev); @@ -173,15 +136,13 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) struct xe_exec_queue *q; struct xe_sync_entry *syncs = NULL; u64 addresses[XE_HW_ENGINE_MAX_INSTANCE]; - struct ttm_validate_buffer tv_onstack[XE_ONSTACK_TV]; - struct ttm_validate_buffer *tv = NULL; + struct drm_exec exec; u32 i, num_syncs = 0; struct xe_sched_job *job; struct dma_fence *rebind_fence; struct xe_vm *vm; - struct ww_acquire_ctx ww; - struct list_head objs; bool write_locked; + ktime_t end = 0; int err = 0; if (XE_IOCTL_DBG(xe, args->extensions) || @@ -294,26 +255,34 @@ retry: goto err_unlock_list; } - err = xe_exec_begin(q, &ww, tv_onstack, &tv, &objs); - if (err) - goto err_unlock_list; + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_until_all_locked(&exec) { + err = xe_exec_begin(&exec, vm); + drm_exec_retry_on_contention(&exec); + if (err && xe_vm_validate_should_retry(&exec, err, &end)) { + err = -EAGAIN; + goto err_unlock_list; + } + if (err) + goto err_exec; + } if (xe_vm_is_closed_or_banned(q->vm)) { drm_warn(&xe->drm, "Trying to schedule after vm is closed or banned\n"); err = -ECANCELED; - goto err_exec_queue_end; + goto err_exec; } if (xe_exec_queue_is_lr(q) && xe_exec_queue_ring_full(q)) { err = -EWOULDBLOCK; - goto err_exec_queue_end; + goto err_exec; } job = xe_sched_job_create(q, xe_exec_queue_is_parallel(q) ? addresses : &args->address); if (IS_ERR(job)) { err = PTR_ERR(job); - goto err_exec_queue_end; + goto err_exec; } /* @@ -412,8 +381,8 @@ err_repin: err_put_job: if (err) xe_sched_job_put(job); -err_exec_queue_end: - xe_exec_end(q, tv_onstack, tv, &ww, &objs); +err_exec: + drm_exec_fini(&exec); err_unlock_list: if (write_locked) up_write(&vm->lock); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 0ac421c4e184..80b374b9cdd1 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -7,6 +7,7 @@ #include +#include #include #include #include @@ -327,10 +328,7 @@ static void resume_and_reinstall_preempt_fences(struct xe_vm *vm) int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) { - struct ttm_validate_buffer tv_onstack[XE_ONSTACK_TV]; - struct ttm_validate_buffer *tv; - struct ww_acquire_ctx ww; - struct list_head objs; + struct drm_exec exec; struct dma_fence *pfence; int err; bool wait; @@ -338,10 +336,13 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) XE_WARN_ON(!xe_vm_in_compute_mode(vm)); down_write(&vm->lock); - - err = xe_vm_lock_dma_resv(vm, &ww, tv_onstack, &tv, &objs, true, 1); - if (err) - goto out_unlock_outer; + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_until_all_locked(&exec) { + err = xe_vm_lock_dma_resv(vm, &exec, 1, true); + drm_exec_retry_on_contention(&exec); + if (err) + goto out_unlock; + } pfence = xe_preempt_fence_create(q, q->compute.context, ++q->compute.seqno); @@ -373,8 +374,7 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) up_read(&vm->userptr.notifier_lock); out_unlock: - xe_vm_unlock_dma_resv(vm, tv_onstack, tv, &ww, &objs); -out_unlock_outer: + drm_exec_fini(&exec); up_write(&vm->lock); return err; @@ -403,68 +403,35 @@ int __xe_vm_userptr_needs_repin(struct xe_vm *vm) * xe_vm_lock_dma_resv() - Lock the vm dma_resv object and the dma_resv * objects of the vm's external buffer objects. * @vm: The vm. - * @ww: Pointer to a struct ww_acquire_ctx locking context. - * @tv_onstack: Array size XE_ONSTACK_TV of storage for the struct - * ttm_validate_buffers used for locking. - * @tv: Pointer to a pointer that on output contains the actual storage used. - * @objs: List head for the buffer objects locked. - * @intr: Whether to lock interruptible. + * @exec: Pointer to a struct drm_exec locking context. * @num_shared: Number of dma-fence slots to reserve in the locked objects. + * @lock_vm: Lock also the vm's dma_resv. * * Locks the vm dma-resv objects and all the dma-resv objects of the - * buffer objects on the vm external object list. The TTM utilities require - * a list of struct ttm_validate_buffers pointing to the actual buffer - * objects to lock. Storage for those struct ttm_validate_buffers should - * be provided in @tv_onstack, and is typically reserved on the stack - * of the caller. If the size of @tv_onstack isn't sufficient, then - * storage will be allocated internally using kvmalloc(). - * - * The function performs deadlock handling internally, and after a - * successful return the ww locking transaction should be considered - * sealed. + * buffer objects on the vm external object list. * * Return: 0 on success, Negative error code on error. In particular if - * @intr is set to true, -EINTR or -ERESTARTSYS may be returned. In case - * of error, any locking performed has been reverted. + * @intr is set to true, -EINTR or -ERESTARTSYS may be returned. */ -int xe_vm_lock_dma_resv(struct xe_vm *vm, struct ww_acquire_ctx *ww, - struct ttm_validate_buffer *tv_onstack, - struct ttm_validate_buffer **tv, - struct list_head *objs, - bool intr, - unsigned int num_shared) -{ - struct ttm_validate_buffer *tv_vm, *tv_bo; +int xe_vm_lock_dma_resv(struct xe_vm *vm, struct drm_exec *exec, + unsigned int num_shared, bool lock_vm) +{ struct xe_vma *vma, *next; - LIST_HEAD(dups); - int err; + int err = 0; lockdep_assert_held(&vm->lock); - if (vm->extobj.entries < XE_ONSTACK_TV) { - tv_vm = tv_onstack; - } else { - tv_vm = kvmalloc_array(vm->extobj.entries + 1, sizeof(*tv_vm), - GFP_KERNEL); - if (!tv_vm) - return -ENOMEM; + if (lock_vm) { + err = drm_exec_prepare_obj(exec, &xe_vm_ttm_bo(vm)->base, num_shared); + if (err) + return err; } - tv_bo = tv_vm + 1; - INIT_LIST_HEAD(objs); list_for_each_entry(vma, &vm->extobj.list, extobj.link) { - tv_bo->num_shared = num_shared; - tv_bo->bo = &xe_vma_bo(vma)->ttm; - - list_add_tail(&tv_bo->head, objs); - tv_bo++; + err = drm_exec_prepare_obj(exec, &xe_vma_bo(vma)->ttm.base, num_shared); + if (err) + return err; } - tv_vm->num_shared = num_shared; - tv_vm->bo = xe_vm_ttm_bo(vm); - list_add_tail(&tv_vm->head, objs); - err = ttm_eu_reserve_buffers(ww, objs, intr, &dups); - if (err) - goto out_err; spin_lock(&vm->notifier.list_lock); list_for_each_entry_safe(vma, next, &vm->notifier.rebind_list, @@ -478,45 +445,7 @@ int xe_vm_lock_dma_resv(struct xe_vm *vm, struct ww_acquire_ctx *ww, } spin_unlock(&vm->notifier.list_lock); - *tv = tv_vm; return 0; - -out_err: - if (tv_vm != tv_onstack) - kvfree(tv_vm); - - return err; -} - -/** - * xe_vm_unlock_dma_resv() - Unlock reservation objects locked by - * xe_vm_lock_dma_resv() - * @vm: The vm. - * @tv_onstack: The @tv_onstack array given to xe_vm_lock_dma_resv(). - * @tv: The value of *@tv given by xe_vm_lock_dma_resv(). - * @ww: The ww_acquire_context used for locking. - * @objs: The list returned from xe_vm_lock_dma_resv(). - * - * Unlocks the reservation objects and frees any memory allocated by - * xe_vm_lock_dma_resv(). - */ -void xe_vm_unlock_dma_resv(struct xe_vm *vm, - struct ttm_validate_buffer *tv_onstack, - struct ttm_validate_buffer *tv, - struct ww_acquire_ctx *ww, - struct list_head *objs) -{ - /* - * Nothing should've been able to enter the list while we were locked, - * since we've held the dma-resvs of all the vm's external objects, - * and holding the dma_resv of an object is required for list - * addition, and we shouldn't add ourselves. - */ - XE_WARN_ON(!list_empty(&vm->notifier.rebind_list)); - - ttm_eu_backoff_reservation(ww, objs); - if (tv && tv != tv_onstack) - kvfree(tv); } #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000 @@ -538,14 +467,94 @@ static void xe_vm_kill(struct xe_vm *vm) /* TODO: Inform user the VM is banned */ } +/** + * xe_vm_validate_should_retry() - Whether to retry after a validate error. + * @exec: The drm_exec object used for locking before validation. + * @err: The error returned from ttm_bo_validate(). + * @end: A ktime_t cookie that should be set to 0 before first use and + * that should be reused on subsequent calls. + * + * With multiple active VMs, under memory pressure, it is possible that + * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM. + * Until ttm properly handles locking in such scenarios, best thing the + * driver can do is retry with a timeout. Check if that is necessary, and + * if so unlock the drm_exec's objects while keeping the ticket to prepare + * for a rerun. + * + * Return: true if a retry after drm_exec_init() is recommended; + * false otherwise. + */ +bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end) +{ + ktime_t cur; + + if (err != -ENOMEM) + return false; + + cur = ktime_get(); + *end = *end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS); + if (!ktime_before(cur, *end)) + return false; + + /* + * We would like to keep the ticket here with + * drm_exec_unlock_all(), but WW mutex asserts currently + * stop us from that. In any case this function could go away + * with proper TTM -EDEADLK handling. + */ + drm_exec_fini(exec); + + msleep(20); + return true; +} + +static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, + bool *done) +{ + struct xe_vma *vma; + int err; + + err = drm_exec_prepare_obj(exec, &xe_vm_ttm_bo(vm)->base, + vm->preempt.num_exec_queues); + if (err) + return err; + + if (xe_vm_is_idle(vm)) { + vm->preempt.rebind_deactivated = true; + *done = true; + return 0; + } + + if (!preempt_fences_waiting(vm)) { + *done = true; + return 0; + } + + err = xe_vm_lock_dma_resv(vm, exec, vm->preempt.num_exec_queues, false); + if (err) + return err; + + err = wait_for_existing_preempt_fences(vm); + if (err) + return err; + + list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) { + if (xe_vma_has_no_bo(vma) || + vma->gpuva.flags & XE_VMA_DESTROYED) + continue; + + err = xe_bo_validate(xe_vma_bo(vma), vm, false); + if (err) + break; + } + + return err; +} + static void preempt_rebind_work_func(struct work_struct *w) { struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work); - struct xe_vma *vma; - struct ttm_validate_buffer tv_onstack[XE_ONSTACK_TV]; - struct ttm_validate_buffer *tv; - struct ww_acquire_ctx ww; - struct list_head objs; + struct drm_exec exec; struct dma_fence *rebind_fence; unsigned int fence_count = 0; LIST_HEAD(preempt_fences); @@ -588,42 +597,25 @@ retry: goto out_unlock_outer; } - err = xe_vm_lock_dma_resv(vm, &ww, tv_onstack, &tv, &objs, - false, vm->preempt.num_exec_queues); - if (err) - goto out_unlock_outer; + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT); - if (xe_vm_is_idle(vm)) { - vm->preempt.rebind_deactivated = true; - goto out_unlock; - } - - /* Fresh preempt fences already installed. Everyting is running. */ - if (!preempt_fences_waiting(vm)) - goto out_unlock; + drm_exec_until_all_locked(&exec) { + bool done = false; - /* - * This makes sure vm is completely suspended and also balances - * xe_engine suspend- and resume; we resume *all* vm engines below. - */ - err = wait_for_existing_preempt_fences(vm); - if (err) - goto out_unlock; + err = xe_preempt_work_begin(&exec, vm, &done); + drm_exec_retry_on_contention(&exec); + if (err && xe_vm_validate_should_retry(&exec, err, &end)) { + err = -EAGAIN; + goto out_unlock_outer; + } + if (err || done) + goto out_unlock; + } err = alloc_preempt_fences(vm, &preempt_fences, &fence_count); if (err) goto out_unlock; - list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) { - if (xe_vma_has_no_bo(vma) || - vma->gpuva.flags & XE_VMA_DESTROYED) - continue; - - err = xe_bo_validate(xe_vma_bo(vma), vm, false); - if (err) - goto out_unlock; - } - rebind_fence = xe_vm_rebind(vm, true); if (IS_ERR(rebind_fence)) { err = PTR_ERR(rebind_fence); @@ -668,30 +660,13 @@ retry: up_read(&vm->userptr.notifier_lock); out_unlock: - xe_vm_unlock_dma_resv(vm, tv_onstack, tv, &ww, &objs); + drm_exec_fini(&exec); out_unlock_outer: if (err == -EAGAIN) { trace_xe_vm_rebind_worker_retry(vm); goto retry; } - /* - * With multiple active VMs, under memory pressure, it is possible that - * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM. - * Until ttm properly handles locking in such scenarios, best thing the - * driver can do is retry with a timeout. Killing the VM or putting it - * in error state after timeout or other error scenarios is still TBD. - */ - if (err == -ENOMEM) { - ktime_t cur = ktime_get(); - - end = end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS); - if (ktime_before(cur, end)) { - msleep(20); - trace_xe_vm_rebind_worker_retry(vm); - goto retry; - } - } if (err) { drm_warn(&vm->xe->drm, "VM worker error: %d\n", err); xe_vm_kill(vm); diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index dd20e5c8106f..a26e84c742f1 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -21,6 +21,7 @@ struct ttm_validate_buffer; struct xe_exec_queue; struct xe_file; struct xe_sync_entry; +struct drm_exec; struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags); @@ -208,23 +209,10 @@ int xe_vma_userptr_pin_pages(struct xe_vma *vma); int xe_vma_userptr_check_repin(struct xe_vma *vma); -/* - * XE_ONSTACK_TV is used to size the tv_onstack array that is input - * to xe_vm_lock_dma_resv() and xe_vm_unlock_dma_resv(). - */ -#define XE_ONSTACK_TV 20 -int xe_vm_lock_dma_resv(struct xe_vm *vm, struct ww_acquire_ctx *ww, - struct ttm_validate_buffer *tv_onstack, - struct ttm_validate_buffer **tv, - struct list_head *objs, - bool intr, - unsigned int num_shared); - -void xe_vm_unlock_dma_resv(struct xe_vm *vm, - struct ttm_validate_buffer *tv_onstack, - struct ttm_validate_buffer *tv, - struct ww_acquire_ctx *ww, - struct list_head *objs); +bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end); + +int xe_vm_lock_dma_resv(struct xe_vm *vm, struct drm_exec *exec, + unsigned int num_shared, bool lock_vm); void xe_vm_fence_all_extobjs(struct xe_vm *vm, struct dma_fence *fence, enum dma_resv_usage usage); -- cgit v1.2.3 From 2714d50936200a65ae52f431b0c004b31655239f Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Fri, 8 Sep 2023 11:17:15 +0200 Subject: drm/xe: Convert pagefaulting code to use drm_exec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the calls into ttm_eu_reserve_buffers with the drm_exec helpers. Also reuse some code. v4: - Kerneldoc xe_vm_prepare_vma(). Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20230908091716.36984-6-thomas.hellstrom@linux.intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_pagefault.c | 107 +++++++++++++++-------------------- drivers/gpu/drm/xe/xe_vm.c | 36 +++++++++++- drivers/gpu/drm/xe/xe_vm.h | 3 + 3 files changed, 84 insertions(+), 62 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index bdef4b76028b..e1e067d3bb87 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -8,6 +8,7 @@ #include #include +#include #include #include @@ -84,11 +85,6 @@ static bool vma_matches(struct xe_vma *vma, u64 page_addr) return true; } -static bool only_needs_bo_lock(struct xe_bo *bo) -{ - return bo && bo->vm; -} - static struct xe_vma *lookup_vma(struct xe_vm *vm, u64 page_addr) { struct xe_vma *vma = NULL; @@ -103,17 +99,45 @@ static struct xe_vma *lookup_vma(struct xe_vm *vm, u64 page_addr) return vma; } +static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma, + bool atomic, unsigned int id) +{ + struct xe_bo *bo = xe_vma_bo(vma); + struct xe_vm *vm = xe_vma_vm(vma); + unsigned int num_shared = 2; /* slots for bind + move */ + int err; + + err = xe_vm_prepare_vma(exec, vma, num_shared); + if (err) + return err; + + if (atomic) { + if (xe_vma_is_userptr(vma)) { + err = -EACCES; + return err; + } + + /* Migrate to VRAM, move should invalidate the VMA first */ + err = xe_bo_migrate(bo, XE_PL_VRAM0 + id); + if (err) + return err; + } else if (bo) { + /* Create backing store if needed */ + err = xe_bo_validate(bo, vm, true); + if (err) + return err; + } + + return 0; +} + static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf) { struct xe_device *xe = gt_to_xe(gt); struct xe_tile *tile = gt_to_tile(gt); + struct drm_exec exec; struct xe_vm *vm; struct xe_vma *vma = NULL; - struct xe_bo *bo; - LIST_HEAD(objs); - LIST_HEAD(dups); - struct ttm_validate_buffer tv_bo, tv_vm; - struct ww_acquire_ctx ww; struct dma_fence *fence; bool write_locked; int ret = 0; @@ -170,35 +194,10 @@ retry_userptr: } /* Lock VM and BOs dma-resv */ - bo = xe_vma_bo(vma); - if (!only_needs_bo_lock(bo)) { - tv_vm.num_shared = xe->info.tile_count; - tv_vm.bo = xe_vm_ttm_bo(vm); - list_add(&tv_vm.head, &objs); - } - if (bo) { - tv_bo.bo = &bo->ttm; - tv_bo.num_shared = xe->info.tile_count; - list_add(&tv_bo.head, &objs); - } - - ret = ttm_eu_reserve_buffers(&ww, &objs, false, &dups); - if (ret) - goto unlock_vm; - - if (atomic) { - if (xe_vma_is_userptr(vma)) { - ret = -EACCES; - goto unlock_dma_resv; - } - - /* Migrate to VRAM, move should invalidate the VMA first */ - ret = xe_bo_migrate(bo, XE_PL_VRAM0 + tile->id); - if (ret) - goto unlock_dma_resv; - } else if (bo) { - /* Create backing store if needed */ - ret = xe_bo_validate(bo, vm, true); + drm_exec_init(&exec, 0); + drm_exec_until_all_locked(&exec) { + ret = xe_pf_begin(&exec, vma, atomic, tile->id); + drm_exec_retry_on_contention(&exec); if (ret) goto unlock_dma_resv; } @@ -225,7 +224,7 @@ retry_userptr: vma->usm.tile_invalidated &= ~BIT(tile->id); unlock_dma_resv: - ttm_eu_backoff_reservation(&ww, &objs); + drm_exec_fini(&exec); unlock_vm: if (!ret) vm->usm.last_fault_vma = vma; @@ -490,13 +489,9 @@ static int handle_acc(struct xe_gt *gt, struct acc *acc) { struct xe_device *xe = gt_to_xe(gt); struct xe_tile *tile = gt_to_tile(gt); + struct drm_exec exec; struct xe_vm *vm; struct xe_vma *vma; - struct xe_bo *bo; - LIST_HEAD(objs); - LIST_HEAD(dups); - struct ttm_validate_buffer tv_bo, tv_vm; - struct ww_acquire_ctx ww; int ret = 0; /* We only support ACC_TRIGGER at the moment */ @@ -528,23 +523,15 @@ static int handle_acc(struct xe_gt *gt, struct acc *acc) goto unlock_vm; /* Lock VM and BOs dma-resv */ - bo = xe_vma_bo(vma); - if (!only_needs_bo_lock(bo)) { - tv_vm.num_shared = xe->info.tile_count; - tv_vm.bo = xe_vm_ttm_bo(vm); - list_add(&tv_vm.head, &objs); + drm_exec_init(&exec, 0); + drm_exec_until_all_locked(&exec) { + ret = xe_pf_begin(&exec, vma, true, tile->id); + drm_exec_retry_on_contention(&exec); + if (ret) + break; } - tv_bo.bo = &bo->ttm; - tv_bo.num_shared = xe->info.tile_count; - list_add(&tv_bo.head, &objs); - ret = ttm_eu_reserve_buffers(&ww, &objs, false, &dups); - if (ret) - goto unlock_vm; - - /* Migrate to VRAM, move should invalidate the VMA first */ - ret = xe_bo_migrate(bo, XE_PL_VRAM0 + tile->id); - ttm_eu_backoff_reservation(&ww, &objs); + drm_exec_fini(&exec); unlock_vm: up_read(&vm->lock); xe_vm_put(vm); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 80b374b9cdd1..52c5235677c5 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -35,6 +35,11 @@ #define TEST_VM_ASYNC_OPS_ERROR +static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm) +{ + return vm->gpuvm.r_obj; +} + /** * xe_vma_userptr_check_repin() - Advisory check for repin needed * @vma: The userptr vma @@ -422,7 +427,7 @@ int xe_vm_lock_dma_resv(struct xe_vm *vm, struct drm_exec *exec, lockdep_assert_held(&vm->lock); if (lock_vm) { - err = drm_exec_prepare_obj(exec, &xe_vm_ttm_bo(vm)->base, num_shared); + err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), num_shared); if (err) return err; } @@ -514,7 +519,7 @@ static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, struct xe_vma *vma; int err; - err = drm_exec_prepare_obj(exec, &xe_vm_ttm_bo(vm)->base, + err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), vm->preempt.num_exec_queues); if (err) return err; @@ -1095,6 +1100,33 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) } } +/** + * xe_vm_prepare_vma() - drm_exec utility to lock a vma + * @exec: The drm_exec object we're currently locking for. + * @vma: The vma for witch we want to lock the vm resv and any attached + * object's resv. + * @num_shared: The number of dma-fence slots to pre-allocate in the + * objects' reservation objects. + * + * Return: 0 on success, negative error code on error. In particular + * may return -EDEADLK on WW transaction contention and -EINTR if + * an interruptible wait is terminated by a signal. + */ +int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma, + unsigned int num_shared) +{ + struct xe_vm *vm = xe_vma_vm(vma); + struct xe_bo *bo = xe_vma_bo(vma); + int err; + + XE_WARN_ON(!vm); + err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), num_shared); + if (!err && bo && !bo->vm) + err = drm_exec_prepare_obj(exec, &bo->ttm.base, num_shared); + + return err; +} + static void xe_vma_destroy_unlocked(struct xe_vma *vma) { struct ttm_validate_buffer tv[2]; diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index a26e84c742f1..ad9ff2b39a30 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -219,6 +219,9 @@ void xe_vm_fence_all_extobjs(struct xe_vm *vm, struct dma_fence *fence, int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id); +int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma, + unsigned int num_shared); + /** * xe_vm_resv() - Return's the vm's reservation object * @vm: The vm -- cgit v1.2.3 From 1f72718215ff2763653a82d9cbc41bfed3186caa Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Fri, 8 Sep 2023 11:17:16 +0200 Subject: drm/xe: Convert remaining instances of ttm_eu_reserve_buffers to drm_exec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The VM_BIND functionality and vma destruction was locking potentially multiple dma_resv objects using the ttm_eu_reserve_buffers() function. Rework those to use the drm_exec helper, taking care that any calls to xe_bo_validate() ends up inside an unsealed locking transaction. v4: - Remove an unbalanced xe_bo_put() (igt and Matthew Brost) v5: - Rebase conflict Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20230908091716.36984-7-thomas.hellstrom@linux.intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 103 ++++++++++++++++----------------------------- drivers/gpu/drm/xe/xe_vm.h | 2 - 2 files changed, 36 insertions(+), 69 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 52c5235677c5..4def60249381 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1129,29 +1129,20 @@ int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma, static void xe_vma_destroy_unlocked(struct xe_vma *vma) { - struct ttm_validate_buffer tv[2]; - struct ww_acquire_ctx ww; - struct xe_bo *bo = xe_vma_bo(vma); - LIST_HEAD(objs); - LIST_HEAD(dups); + struct drm_exec exec; int err; - memset(tv, 0, sizeof(tv)); - tv[0].bo = xe_vm_ttm_bo(xe_vma_vm(vma)); - list_add(&tv[0].head, &objs); - - if (bo) { - tv[1].bo = &xe_bo_get(bo)->ttm; - list_add(&tv[1].head, &objs); + drm_exec_init(&exec, 0); + drm_exec_until_all_locked(&exec) { + err = xe_vm_prepare_vma(&exec, vma, 0); + drm_exec_retry_on_contention(&exec); + if (XE_WARN_ON(err)) + break; } - err = ttm_eu_reserve_buffers(&ww, &objs, false, &dups); - XE_WARN_ON(err); xe_vma_destroy(vma, NULL); - ttm_eu_backoff_reservation(&ww, &objs); - if (bo) - xe_bo_put(bo); + drm_exec_fini(&exec); } struct xe_vma * @@ -2142,21 +2133,6 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma, #define VM_BIND_OP(op) (op & 0xffff) -struct ttm_buffer_object *xe_vm_ttm_bo(struct xe_vm *vm) -{ - int idx = vm->flags & XE_VM_FLAG_MIGRATION ? - XE_VM_FLAG_TILE_ID(vm->flags) : 0; - - /* Safe to use index 0 as all BO in the VM share a single dma-resv lock */ - return &vm->pt_root[idx]->bo->ttm; -} - -static void xe_vm_tv_populate(struct xe_vm *vm, struct ttm_validate_buffer *tv) -{ - tv->num_shared = 1; - tv->bo = xe_vm_ttm_bo(vm); -} - static void vm_set_async_error(struct xe_vm *vm, int err) { lockdep_assert_held(&vm->lock); @@ -2668,42 +2644,16 @@ free_fence: return err; } -static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma, - struct xe_vma_op *op) +static int op_execute(struct drm_exec *exec, struct xe_vm *vm, + struct xe_vma *vma, struct xe_vma_op *op) { - LIST_HEAD(objs); - LIST_HEAD(dups); - struct ttm_validate_buffer tv_bo, tv_vm; - struct ww_acquire_ctx ww; - struct xe_bo *vbo; int err; lockdep_assert_held_write(&vm->lock); - xe_vm_tv_populate(vm, &tv_vm); - list_add_tail(&tv_vm.head, &objs); - vbo = xe_vma_bo(vma); - if (vbo) { - /* - * An unbind can drop the last reference to the BO and - * the BO is needed for ttm_eu_backoff_reservation so - * take a reference here. - */ - xe_bo_get(vbo); - - if (!vbo->vm) { - tv_bo.bo = &vbo->ttm; - tv_bo.num_shared = 1; - list_add(&tv_bo.head, &objs); - } - } - -again: - err = ttm_eu_reserve_buffers(&ww, &objs, true, &dups); - if (err) { - xe_bo_put(vbo); + err = xe_vm_prepare_vma(exec, vma, 1); + if (err) return err; - } xe_vm_assert_held(vm); xe_bo_assert_held(xe_vma_bo(vma)); @@ -2782,17 +2732,36 @@ again: XE_WARN_ON("NOT POSSIBLE"); } - ttm_eu_backoff_reservation(&ww, &objs); + if (err) + trace_xe_vma_fail(vma); + + return err; +} + +static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma, + struct xe_vma_op *op) +{ + struct drm_exec exec; + int err; + +retry_userptr: + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_until_all_locked(&exec) { + err = op_execute(&exec, vm, vma, op); + drm_exec_retry_on_contention(&exec); + if (err) + break; + } + drm_exec_fini(&exec); + if (err == -EAGAIN && xe_vma_is_userptr(vma)) { lockdep_assert_held_write(&vm->lock); err = xe_vma_userptr_pin_pages(vma); if (!err) - goto again; - } - xe_bo_put(vbo); + goto retry_userptr; - if (err) trace_xe_vma_fail(vma); + } return err; } diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index ad9ff2b39a30..cc9dfd8cb770 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -180,8 +180,6 @@ int xe_vm_async_fence_wait_start(struct dma_fence *fence); extern struct ttm_device_funcs xe_ttm_funcs; -struct ttm_buffer_object *xe_vm_ttm_bo(struct xe_vm *vm); - static inline void xe_vm_queue_rebind_worker(struct xe_vm *vm) { XE_WARN_ON(!xe_vm_in_compute_mode(vm)); -- cgit v1.2.3 From 30278e299646a1a8f9c1fd1da33768440f71bb42 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 11 Sep 2023 14:10:32 -0700 Subject: drm/xe: Fix fence reservation accouting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both execs and the preempt rebind worker can issue rebinds. Rebinds require a fence, per tile, inserted into dma-resv slots of the VM and BO (if external). The fence reservation accouting did not take into account the number of fences required for rebinds, fix this. v2: Rebase Reviewed-by: Thomas Hellström Reported-by: Christopher Snowhill Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/518 Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_exec.c | 6 +++++- drivers/gpu/drm/xe/xe_vm.c | 7 ++++++- 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index eb7fc3192c22..293960efcd22 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -103,7 +103,11 @@ static int xe_exec_begin(struct drm_exec *exec, struct xe_vm *vm) if (xe_vm_no_dma_fences(vm)) return 0; - err = xe_vm_lock_dma_resv(vm, exec, 1, true); + /* + * 1 fence for job from exec plus a fence for each tile from a possible + * rebind + */ + err = xe_vm_lock_dma_resv(vm, exec, 1 + vm->xe->info.tile_count, true); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 4def60249381..89f7428f00d7 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -519,8 +519,13 @@ static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, struct xe_vma *vma; int err; + /* + * 1 fence for each preempt fence plus a fence for each tile from a + * possible rebind + */ err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), - vm->preempt.num_exec_queues); + vm->preempt.num_exec_queues + + vm->xe->info.tile_count); if (err) return err; -- cgit v1.2.3 From 5c0553cdc811bb6af4f1bfef178bd07fc16a797e Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Tue, 12 Sep 2023 08:36:33 +0000 Subject: drm/xe: Replace XE_WARN_ON with drm_warn when just printing a string Use the generic drm_warn instead of the driver-specific XE_WARN_ON in cases where XE_WARN_ON is used to unconditionally print a debug message. v2: Rebase Signed-off-by: Francois Dugast Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_debugfs.c | 5 +++-- drivers/gpu/drm/xe/xe_gt_pagefault.c | 3 ++- drivers/gpu/drm/xe/xe_guc_ct.c | 2 +- drivers/gpu/drm/xe/xe_guc_submit.c | 9 ++++++--- drivers/gpu/drm/xe/xe_vm.c | 18 +++++++++--------- drivers/gpu/drm/xe/xe_vm_madvise.c | 2 +- 6 files changed, 22 insertions(+), 17 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index b871e45af813..9229fd5b01cc 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -151,6 +151,7 @@ static const struct drm_info_list debugfs_list[] = { void xe_gt_debugfs_register(struct xe_gt *gt) { + struct xe_device *xe = gt_to_xe(gt); struct drm_minor *minor = gt_to_xe(gt)->drm.primary; struct dentry *root; struct drm_info_list *local; @@ -162,7 +163,7 @@ void xe_gt_debugfs_register(struct xe_gt *gt) sprintf(name, "gt%d", gt->info.id); root = debugfs_create_dir(name, minor->debugfs_root); if (IS_ERR(root)) { - XE_WARN_ON("Create GT directory failed"); + drm_warn(&xe->drm, "Create GT directory failed"); return; } @@ -172,7 +173,7 @@ void xe_gt_debugfs_register(struct xe_gt *gt) * passed in (e.g. can't define this on the stack). */ #define DEBUGFS_SIZE (ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list)) - local = drmm_kmalloc(>_to_xe(gt)->drm, DEBUGFS_SIZE, GFP_KERNEL); + local = drmm_kmalloc(&xe->drm, DEBUGFS_SIZE, GFP_KERNEL); if (!local) return; diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index e1e067d3bb87..4e33ef8c9d6a 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -322,6 +322,7 @@ static bool pf_queue_full(struct pf_queue *pf_queue) int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len) { struct xe_gt *gt = guc_to_gt(guc); + struct xe_device *xe = gt_to_xe(gt); struct pf_queue *pf_queue; unsigned long flags; u32 asid; @@ -340,7 +341,7 @@ int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len) pf_queue->tail = (pf_queue->tail + len) % PF_QUEUE_NUM_DW; queue_work(gt->usm.pf_wq, &pf_queue->worker); } else { - XE_WARN_ON("PF Queue full, shouldn't be possible"); + drm_warn(&xe->drm, "PF Queue full, shouldn't be possible"); } spin_unlock_irqrestore(&pf_queue->lock, flags); diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index b92e04ba8f63..13f2bd586f6a 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -1022,7 +1022,7 @@ static void g2h_fast_path(struct xe_guc_ct *ct, u32 *msg, u32 len) adj_len); break; default: - XE_WARN_ON("NOT_POSSIBLE"); + drm_warn(&xe->drm, "NOT_POSSIBLE"); } if (ret) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 832e79fb0a02..50509891a288 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -703,6 +703,7 @@ static void disable_scheduling_deregister(struct xe_guc *guc, struct xe_exec_queue *q) { MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); + struct xe_device *xe = guc_to_xe(guc); int ret; set_min_preemption_timeout(guc, q); @@ -712,7 +713,7 @@ static void disable_scheduling_deregister(struct xe_guc *guc, if (!ret) { struct xe_gpu_scheduler *sched = &q->guc->sched; - XE_WARN_ON("Pending enable failed to respond"); + drm_warn(&xe->drm, "Pending enable failed to respond"); xe_sched_submission_start(sched); xe_gt_reset_async(q->gt); xe_sched_tdr_queue_imm(sched); @@ -794,6 +795,8 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) struct xe_guc_exec_queue *ge = container_of(w, struct xe_guc_exec_queue, lr_tdr); struct xe_exec_queue *q = ge->q; + struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_device *xe = guc_to_xe(guc); struct xe_gpu_scheduler *sched = &ge->sched; XE_WARN_ON(!xe_exec_queue_is_lr(q)); @@ -828,7 +831,7 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) !exec_queue_pending_disable(q) || guc_read_stopped(guc), HZ * 5); if (!ret) { - XE_WARN_ON("Schedule disable failed to respond"); + drm_warn(&xe->drm, "Schedule disable failed to respond"); xe_sched_submission_start(sched); xe_gt_reset_async(q->gt); return; @@ -906,7 +909,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) !exec_queue_pending_disable(q) || guc_read_stopped(guc), HZ * 5); if (!ret || guc_read_stopped(guc)) { - XE_WARN_ON("Schedule disable failed to respond"); + drm_warn(&xe->drm, "Schedule disable failed to respond"); xe_sched_add_pending_job(sched, job); xe_sched_submission_start(sched); xe_gt_reset_async(q->gt); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 89f7428f00d7..66e8aeb203c9 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1401,7 +1401,7 @@ static void vm_error_capture(struct xe_vm *vm, int err, } if (copy_to_user(address, &capture, sizeof(capture))) - XE_WARN_ON("Copy to user failed"); + drm_warn(&vm->xe->drm, "Copy to user failed"); if (in_kthread) { kthread_unuse_mm(vm->async_ops.error_capture.mm); @@ -2176,7 +2176,7 @@ static int vm_bind_ioctl_lookup_vma(struct xe_vm *vm, struct xe_bo *bo, return -ENODATA; break; default: - XE_WARN_ON("NOT POSSIBLE"); + drm_warn(&xe->drm, "NOT POSSIBLE"); return -EINVAL; } @@ -2234,7 +2234,7 @@ static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma)); break; default: - XE_WARN_ON("NOT POSSIBLE"); + drm_warn(&xe->drm, "NOT POSSIBLE"); } } #else @@ -2332,7 +2332,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, } break; default: - XE_WARN_ON("NOT POSSIBLE"); + drm_warn(&vm->xe->drm, "NOT POSSIBLE"); ops = ERR_PTR(-EINVAL); } @@ -2463,7 +2463,7 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) op->flags |= XE_VMA_OP_COMMITTED; break; default: - XE_WARN_ON("NOT POSSIBLE"); + drm_warn(&vm->xe->drm, "NOT POSSIBLE"); } return err; @@ -2619,7 +2619,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, /* Nothing to do */ break; default: - XE_WARN_ON("NOT POSSIBLE"); + drm_warn(&vm->xe->drm, "NOT POSSIBLE"); } last_op = op; @@ -2734,7 +2734,7 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm, op->flags & XE_VMA_OP_LAST); break; default: - XE_WARN_ON("NOT POSSIBLE"); + drm_warn(&vm->xe->drm, "NOT POSSIBLE"); } if (err) @@ -2812,7 +2812,7 @@ static int xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op) op); break; default: - XE_WARN_ON("NOT POSSIBLE"); + drm_warn(&vm->xe->drm, "NOT POSSIBLE"); } return ret; @@ -2893,7 +2893,7 @@ static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op, /* Nothing to do */ break; default: - XE_WARN_ON("NOT POSSIBLE"); + drm_warn(&vm->xe->drm, "NOT POSSIBLE"); } } diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c index 0648274b90b9..0ef7d483d050 100644 --- a/drivers/gpu/drm/xe/xe_vm_madvise.c +++ b/drivers/gpu/drm/xe/xe_vm_madvise.c @@ -186,7 +186,7 @@ static int madvise_priority(struct xe_device *xe, struct xe_vm *vm, static int madvise_pin(struct xe_device *xe, struct xe_vm *vm, struct xe_vma **vmas, int num_vmas, u64 value) { - XE_WARN_ON("NIY"); + drm_warn(&xe->drm, "NIY"); return 0; } -- cgit v1.2.3 From 1975b5917a94429096f6a2cccc97ed91e0425708 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 12 Sep 2023 20:29:56 +0200 Subject: drm/xe: Introduce Xe assert macros As we are moving away from the controversial XE_BUG_ON macro, relying just on WARN_ON or drm_err does not cover the cases where we want to annotate functions with additional detailed debug checks to assert that all prerequisites are satisfied, without paying footprint or performance penalty on non-debug builds, where all misuses introduced during code integration were already fixed. Introduce family of Xe assert macros that try to follow classic assert() utility and can be compiled out on non-debug builds. Macros are based on drm_WARN, but unlikely to origin, disallow use in expressions since we will compile that code out. As we are operating on the xe pointers, we can print additional information about the device, like tile or GT identifier, that is not available from generic WARN report: [ ] xe 0000:00:02.0: [drm] Assertion `true == false` failed! platform: 1 subplatform: 1 graphics: Xe_LP 12.00 step B0 media: Xe_M 12.00 step B0 display: enabled step D0 tile: 0 VRAM 0 B GT: 0 type 1 [ ] xe 0000:b3:00.0: [drm] Assertion `true == false` failed! platform: 7 subplatform: 3 graphics: Xe_HPG 12.55 step A1 media: Xe_HPM 12.55 step A1 display: disabled step ** tile: 0 VRAM 14.0 GiB GT: 0 type 1 [ ] WARNING: CPU: 0 PID: 2687 at drivers/gpu/drm/xe/xe_device.c:281 xe_device_probe+0x374/0x520 [xe] [ ] RIP: 0010:xe_device_probe+0x374/0x520 [xe] [ ] Call Trace: [ ] ? __warn+0x7b/0x160 [ ] ? xe_device_probe+0x374/0x520 [xe] [ ] ? report_bug+0x1c3/0x1d0 [ ] ? handle_bug+0x42/0x70 [ ] ? exc_invalid_op+0x14/0x70 [ ] ? asm_exc_invalid_op+0x16/0x20 [ ] ? xe_device_probe+0x374/0x520 [xe] [ ] ? xe_device_probe+0x374/0x520 [xe] [ ] xe_pci_probe+0x6e3/0x950 [xe] [ ] ? lockdep_hardirqs_on+0xc7/0x140 [ ] pci_device_probe+0x9e/0x160 [ ] really_probe+0x19d/0x400 v2: use lowercase names v3: apply xe coding style v4: fix non-debug build and improve kernel-doc Signed-off-by: Michal Wajdeczko Cc: Oded Gabbay Cc: Jani Nikula Cc: Rodrigo Vivi Cc: Matthew Brost Cc: Lucas De Marchi Cc: Matt Roper Reviewed-by: Lucas De Marchi Acked-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_assert.h | 174 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 174 insertions(+) create mode 100644 drivers/gpu/drm/xe/xe_assert.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_assert.h b/drivers/gpu/drm/xe/xe_assert.h new file mode 100644 index 000000000000..962aac1bc764 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_assert.h @@ -0,0 +1,174 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_ASSERT_H_ +#define _XE_ASSERT_H_ + +#include + +#include + +#include "xe_device_types.h" +#include "xe_step.h" + +/** + * DOC: Xe ASSERTs + * + * While Xe driver aims to be simpler than legacy i915 driver it is still + * complex enough that some changes introduced while adding new functionality + * could break the existing code. + * + * Adding &drm_WARN or &drm_err to catch unwanted programming usage could lead + * to undesired increased driver footprint and may impact production driver + * performance as this additional code will be always present. + * + * To allow annotate functions with additional detailed debug checks to assert + * that all prerequisites are satisfied, without worrying about footprint or + * performance penalty on production builds where all potential misuses + * introduced during code integration were already fixed, we introduce family + * of Xe assert macros that try to follow classic assert() utility: + * + * * xe_assert() + * * xe_tile_assert() + * * xe_gt_assert() + * + * These macros are implemented on top of &drm_WARN, but unlikely to the origin, + * warning is triggered when provided condition is false. Additionally all above + * assert macros cannot be used in expressions or as a condition, since + * underlying code will be compiled out on non-debug builds. + * + * Note that these macros are not intended for use to cover known gaps in the + * implementation; for such cases use regular &drm_WARN or &drm_err and provide + * valid safe fallback. + * + * Also in cases where performance or footprint is not an issue, developers + * should continue to use the regular &drm_WARN or &drm_err to ensure that bug + * reports from production builds will contain meaningful diagnostics data. + * + * Below code shows how asserts could help in debug to catch unplanned use:: + * + * static void one_igfx(struct xe_device *xe) + * { + * xe_assert(xe, xe->info.is_dgfx == false); + * xe_assert(xe, xe->info.tile_count == 1); + * } + * + * static void two_dgfx(struct xe_device *xe) + * { + * xe_assert(xe, xe->info.is_dgfx); + * xe_assert(xe, xe->info.tile_count == 2); + * } + * + * void foo(struct xe_device *xe) + * { + * if (xe->info.dgfx) + * return two_dgfx(xe); + * return one_igfx(xe); + * } + * + * void bar(struct xe_device *xe) + * { + * if (drm_WARN_ON(xe->drm, xe->info.tile_count > 2)) + * return; + * + * if (xe->info.tile_count == 2) + * return two_dgfx(xe); + * return one_igfx(xe); + * } + */ + +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) +#define __xe_assert_msg(xe, condition, msg, arg...) ({ \ + (void)drm_WARN(&(xe)->drm, !(condition), "[" DRM_NAME "] Assertion `%s` failed!\n" msg, \ + __stringify(condition), ## arg); \ +}) +#else +#define __xe_assert_msg(xe, condition, msg, arg...) ({ \ + typecheck(struct xe_device *, xe); \ + BUILD_BUG_ON_INVALID(condition); \ +}) +#endif + +/** + * xe_assert - warn if condition is false when debugging. + * @xe: the &struct xe_device pointer to which &condition applies + * @condition: condition to check + * + * xe_assert() uses &drm_WARN to emit a warning and print additional information + * that could be read from the &xe pointer if provided &condition is false. + * + * Contrary to &drm_WARN, xe_assert() is effective only on debug builds + * (&CONFIG_DRM_XE_DEBUG must be enabled) and cannot be used in expressions + * or as a condition. + * + * See `Xe ASSERTs`_ for general usage guidelines. + */ +#define xe_assert(xe, condition) xe_assert_msg((xe), condition, "") +#define xe_assert_msg(xe, condition, msg, arg...) ({ \ + struct xe_device *__xe = (xe); \ + __xe_assert_msg(__xe, condition, \ + "platform: %d subplatform: %d\n" \ + "graphics: %s %u.%02u step %s\n" \ + "media: %s %u.%02u step %s\n" \ + msg, \ + __xe->info.platform, __xe->info.subplatform, \ + __xe->info.graphics_name, \ + __xe->info.graphics_verx100 / 100, \ + __xe->info.graphics_verx100 % 100, \ + xe_step_name(__xe->info.step.graphics), \ + __xe->info.media_name, \ + __xe->info.media_verx100 / 100, \ + __xe->info.media_verx100 % 100, \ + xe_step_name(__xe->info.step.media), \ + ## arg); \ +}) + +/** + * xe_tile_assert - warn if condition is false when debugging. + * @tile: the &struct xe_tile pointer to which &condition applies + * @condition: condition to check + * + * xe_tile_assert() uses &drm_WARN to emit a warning and print additional + * information that could be read from the &tile pointer if provided &condition + * is false. + * + * Contrary to &drm_WARN, xe_tile_assert() is effective only on debug builds + * (&CONFIG_DRM_XE_DEBUG must be enabled) and cannot be used in expressions + * or as a condition. + * + * See `Xe ASSERTs`_ for general usage guidelines. + */ +#define xe_tile_assert(tile, condition) xe_tile_assert_msg((tile), condition, "") +#define xe_tile_assert_msg(tile, condition, msg, arg...) ({ \ + struct xe_tile *__tile = (tile); \ + char __buf[10] __maybe_unused; \ + xe_assert_msg(tile_to_xe(__tile), condition, "tile: %u VRAM %s\n" msg, \ + __tile->id, ({ string_get_size(__tile->mem.vram.actual_physical_size, 1, \ + STRING_UNITS_2, __buf, sizeof(__buf)); __buf; }), ## arg); \ +}) + +/** + * xe_gt_assert - warn if condition is false when debugging. + * @gt: the &struct xe_gt pointer to which &condition applies + * @condition: condition to check + * + * xe_gt_assert() uses &drm_WARN to emit a warning and print additional + * information that could be safetely read from the > pointer if provided + * &condition is false. + * + * Contrary to &drm_WARN, xe_gt_assert() is effective only on debug builds + * (&CONFIG_DRM_XE_DEBUG must be enabled) and cannot be used in expressions + * or as a condition. + * + * See `Xe ASSERTs`_ for general usage guidelines. + */ +#define xe_gt_assert(gt, condition) xe_gt_assert_msg((gt), condition, "") +#define xe_gt_assert_msg(gt, condition, msg, arg...) ({ \ + struct xe_gt *__gt = (gt); \ + xe_tile_assert_msg(gt_to_tile(__gt), condition, "GT: %u type %d\n" msg, \ + __gt->info.id, __gt->info.type, ## arg); \ +}) + +#endif -- cgit v1.2.3 From c73acc1eeba5e380a367087cb7b933b946613ee7 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Tue, 12 Sep 2023 08:36:35 +0000 Subject: drm/xe: Use Xe assert macros instead of XE_WARN_ON macro The XE_WARN_ON macro maps to WARN_ON which is not justified in many cases where only a simple debug check is needed. Replace the use of the XE_WARN_ON macro with the new xe_assert macros which relies on drm_*. This takes a struct drm_device argument, which is one of the main changes in this commit. The other main change is that the condition is reversed, as with XE_WARN_ON a message is displayed if the condition is true, whereas with xe_assert it is if the condition is false. v2: - Rebase - Keep WARN splats in xe_wopcm.c (Matt Roper) v3: - Rebase Signed-off-by: Francois Dugast Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bb.c | 8 ++-- drivers/gpu/drm/xe/xe_bo.c | 64 +++++++++++++++------------- drivers/gpu/drm/xe/xe_bo_evict.c | 4 +- drivers/gpu/drm/xe/xe_device.c | 6 +-- drivers/gpu/drm/xe/xe_exec.c | 2 +- drivers/gpu/drm/xe/xe_execlist.c | 12 +++--- drivers/gpu/drm/xe/xe_force_wake.c | 4 +- drivers/gpu/drm/xe/xe_force_wake.h | 6 +-- drivers/gpu/drm/xe/xe_ggtt.c | 14 +++---- drivers/gpu/drm/xe/xe_gt.c | 1 + drivers/gpu/drm/xe/xe_gt_clock.c | 2 +- drivers/gpu/drm/xe/xe_gt_debugfs.c | 2 +- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 12 +++--- drivers/gpu/drm/xe/xe_guc.c | 24 ++++++----- drivers/gpu/drm/xe/xe_guc_ads.c | 22 +++++----- drivers/gpu/drm/xe/xe_guc_ct.c | 29 ++++++------- drivers/gpu/drm/xe/xe_guc_log.c | 4 +- drivers/gpu/drm/xe/xe_guc_pc.c | 2 +- drivers/gpu/drm/xe/xe_guc_submit.c | 65 ++++++++++++++++++----------- drivers/gpu/drm/xe/xe_huc.c | 3 +- drivers/gpu/drm/xe/xe_hw_engine.c | 11 ++--- drivers/gpu/drm/xe/xe_lrc.c | 9 ++-- drivers/gpu/drm/xe/xe_migrate.c | 31 +++++++------- drivers/gpu/drm/xe/xe_mocs.c | 2 +- drivers/gpu/drm/xe/xe_pt.c | 14 +++---- drivers/gpu/drm/xe/xe_ring_ops.c | 13 +++--- drivers/gpu/drm/xe/xe_sched_job.c | 2 +- drivers/gpu/drm/xe/xe_uc.c | 2 +- drivers/gpu/drm/xe/xe_uc_fw.c | 16 +++---- drivers/gpu/drm/xe/xe_vm.c | 48 ++++++++++----------- drivers/gpu/drm/xe/xe_vm.h | 2 +- 31 files changed, 235 insertions(+), 201 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c index 1fbc2fcddc96..f871ba82bc9b 100644 --- a/drivers/gpu/drm/xe/xe_bb.c +++ b/drivers/gpu/drm/xe/xe_bb.c @@ -66,7 +66,7 @@ __xe_bb_create_job(struct xe_exec_queue *q, struct xe_bb *bb, u64 *addr) bb->cs[bb->len++] = MI_BATCH_BUFFER_END; - WARN_ON(bb->len * 4 + bb_prefetch(q->gt) > size); + xe_gt_assert(q->gt, bb->len * 4 + bb_prefetch(q->gt) <= size); xe_sa_bo_flush_write(bb->bo); @@ -84,8 +84,8 @@ struct xe_sched_job *xe_bb_create_migration_job(struct xe_exec_queue *q, 4 * second_idx, }; - XE_WARN_ON(second_idx > bb->len); - XE_WARN_ON(!(q->vm->flags & XE_VM_FLAG_MIGRATION)); + xe_gt_assert(q->gt, second_idx <= bb->len); + xe_gt_assert(q->gt, q->vm->flags & XE_VM_FLAG_MIGRATION); return __xe_bb_create_job(q, bb, addr); } @@ -95,7 +95,7 @@ struct xe_sched_job *xe_bb_create_job(struct xe_exec_queue *q, { u64 addr = xe_sa_bo_gpu_addr(bb->bo); - XE_WARN_ON(q->vm && q->vm->flags & XE_VM_FLAG_MIGRATION); + xe_gt_assert(q->gt, !(q->vm && q->vm->flags & XE_VM_FLAG_MIGRATION)); return __xe_bb_create_job(q, bb, &addr); } diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index c33a20420022..998efceb84a4 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -103,7 +103,7 @@ static bool xe_bo_is_user(struct xe_bo *bo) static struct xe_tile * mem_type_to_tile(struct xe_device *xe, u32 mem_type) { - XE_WARN_ON(mem_type != XE_PL_STOLEN && !mem_type_is_vram(mem_type)); + xe_assert(xe, mem_type == XE_PL_STOLEN || mem_type_is_vram(mem_type)); return &xe->tiles[mem_type == XE_PL_STOLEN ? 0 : (mem_type - XE_PL_VRAM0)]; } @@ -142,7 +142,7 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo, struct ttm_place place = { .mem_type = mem_type }; u64 io_size = tile->mem.vram.io_size; - XE_WARN_ON(!tile->mem.vram.usable_size); + xe_assert(xe, tile->mem.vram.usable_size); /* * For eviction / restore on suspend / resume objects @@ -544,10 +544,11 @@ static int xe_bo_move_dmabuf(struct ttm_buffer_object *ttm_bo, struct dma_buf_attachment *attach = ttm_bo->base.import_attach; struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm, struct xe_ttm_tt, ttm); + struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev); struct sg_table *sg; - XE_WARN_ON(!attach); - XE_WARN_ON(!ttm_bo->ttm); + xe_assert(xe, attach); + xe_assert(xe, ttm_bo->ttm); if (new_res->mem_type == XE_PL_SYSTEM) goto out; @@ -709,8 +710,8 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, else if (mem_type_is_vram(old_mem_type)) tile = mem_type_to_tile(xe, old_mem_type); - XE_WARN_ON(!tile); - XE_WARN_ON(!tile->migrate); + xe_assert(xe, tile); + xe_tile_assert(tile, tile->migrate); trace_xe_bo_move(bo); xe_device_mem_access_get(xe); @@ -740,7 +741,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, goto out; } - XE_WARN_ON(new_mem->start != + xe_assert(xe, new_mem->start == bo->placements->fpfn); iosys_map_set_vaddr_iomem(&bo->vmap, new_addr); @@ -939,9 +940,10 @@ static void __xe_bo_vunmap(struct xe_bo *bo); */ static bool xe_ttm_bo_lock_in_destructor(struct ttm_buffer_object *ttm_bo) { + struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev); bool locked; - XE_WARN_ON(kref_read(&ttm_bo->kref)); + xe_assert(xe, !kref_read(&ttm_bo->kref)); /* * We can typically only race with TTM trylocking under the @@ -952,7 +954,7 @@ static bool xe_ttm_bo_lock_in_destructor(struct ttm_buffer_object *ttm_bo) spin_lock(&ttm_bo->bdev->lru_lock); locked = dma_resv_trylock(ttm_bo->base.resv); spin_unlock(&ttm_bo->bdev->lru_lock); - XE_WARN_ON(!locked); + xe_assert(xe, locked); return locked; } @@ -968,7 +970,7 @@ static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo) return; bo = ttm_to_xe_bo(ttm_bo); - XE_WARN_ON(bo->created && kref_read(&ttm_bo->base.refcount)); + xe_assert(xe_bo_device(bo), !(bo->created && kref_read(&ttm_bo->base.refcount))); /* * Corner case where TTM fails to allocate memory and this BOs resv @@ -1041,12 +1043,13 @@ struct ttm_device_funcs xe_ttm_funcs = { static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo) { struct xe_bo *bo = ttm_to_xe_bo(ttm_bo); + struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev); if (bo->ttm.base.import_attach) drm_prime_gem_destroy(&bo->ttm.base, NULL); drm_gem_object_release(&bo->ttm.base); - WARN_ON(!list_empty(&bo->vmas)); + xe_assert(xe, list_empty(&bo->vmas)); if (bo->ggtt_node.size) xe_ggtt_remove_bo(bo->tile->mem.ggtt, bo); @@ -1082,7 +1085,7 @@ static void xe_gem_object_close(struct drm_gem_object *obj, struct xe_bo *bo = gem_to_xe_bo(obj); if (bo->vm && !xe_vm_in_fault_mode(bo->vm)) { - XE_WARN_ON(!xe_bo_is_user(bo)); + xe_assert(xe_bo_device(bo), xe_bo_is_user(bo)); xe_bo_lock(bo, false); ttm_bo_set_bulk_move(&bo->ttm, NULL); @@ -1198,7 +1201,7 @@ struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, int err; /* Only kernel objects should set GT */ - XE_WARN_ON(tile && type != ttm_bo_type_kernel); + xe_assert(xe, !tile || type == ttm_bo_type_kernel); if (XE_WARN_ON(!size)) { xe_bo_free(bo); @@ -1354,7 +1357,7 @@ xe_bo_create_locked_range(struct xe_device *xe, if (!tile && flags & XE_BO_CREATE_STOLEN_BIT) tile = xe_device_get_root_tile(xe); - XE_WARN_ON(!tile); + xe_assert(xe, tile); if (flags & XE_BO_CREATE_STOLEN_BIT && flags & XE_BO_FIXED_PLACEMENT_BIT) { @@ -1485,8 +1488,8 @@ int xe_bo_pin_external(struct xe_bo *bo) struct xe_device *xe = xe_bo_device(bo); int err; - XE_WARN_ON(bo->vm); - XE_WARN_ON(!xe_bo_is_user(bo)); + xe_assert(xe, !bo->vm); + xe_assert(xe, xe_bo_is_user(bo)); if (!xe_bo_is_pinned(bo)) { err = xe_bo_validate(bo, NULL, false); @@ -1518,20 +1521,20 @@ int xe_bo_pin(struct xe_bo *bo) int err; /* We currently don't expect user BO to be pinned */ - XE_WARN_ON(xe_bo_is_user(bo)); + xe_assert(xe, !xe_bo_is_user(bo)); /* Pinned object must be in GGTT or have pinned flag */ - XE_WARN_ON(!(bo->flags & (XE_BO_CREATE_PINNED_BIT | - XE_BO_CREATE_GGTT_BIT))); + xe_assert(xe, bo->flags & (XE_BO_CREATE_PINNED_BIT | + XE_BO_CREATE_GGTT_BIT)); /* * No reason we can't support pinning imported dma-bufs we just don't * expect to pin an imported dma-buf. */ - XE_WARN_ON(bo->ttm.base.import_attach); + xe_assert(xe, !bo->ttm.base.import_attach); /* We only expect at most 1 pin */ - XE_WARN_ON(xe_bo_is_pinned(bo)); + xe_assert(xe, !xe_bo_is_pinned(bo)); err = xe_bo_validate(bo, NULL, false); if (err) @@ -1547,7 +1550,7 @@ int xe_bo_pin(struct xe_bo *bo) struct ttm_place *place = &(bo->placements[0]); if (mem_type_is_vram(place->mem_type)) { - XE_WARN_ON(!(place->flags & TTM_PL_FLAG_CONTIGUOUS)); + xe_assert(xe, place->flags & TTM_PL_FLAG_CONTIGUOUS); place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE) - vram_region_gpu_offset(bo->ttm.resource)) >> PAGE_SHIFT; @@ -1584,9 +1587,9 @@ void xe_bo_unpin_external(struct xe_bo *bo) { struct xe_device *xe = xe_bo_device(bo); - XE_WARN_ON(bo->vm); - XE_WARN_ON(!xe_bo_is_pinned(bo)); - XE_WARN_ON(!xe_bo_is_user(bo)); + xe_assert(xe, !bo->vm); + xe_assert(xe, xe_bo_is_pinned(bo)); + xe_assert(xe, xe_bo_is_user(bo)); if (bo->ttm.pin_count == 1 && !list_empty(&bo->pinned_link)) { spin_lock(&xe->pinned.lock); @@ -1607,15 +1610,15 @@ void xe_bo_unpin(struct xe_bo *bo) { struct xe_device *xe = xe_bo_device(bo); - XE_WARN_ON(bo->ttm.base.import_attach); - XE_WARN_ON(!xe_bo_is_pinned(bo)); + xe_assert(xe, !bo->ttm.base.import_attach); + xe_assert(xe, xe_bo_is_pinned(bo)); if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) && bo->flags & XE_BO_INTERNAL_TEST)) { struct ttm_place *place = &(bo->placements[0]); if (mem_type_is_vram(place->mem_type)) { - XE_WARN_ON(list_empty(&bo->pinned_link)); + xe_assert(xe, !list_empty(&bo->pinned_link)); spin_lock(&xe->pinned.lock); list_del_init(&bo->pinned_link); @@ -1676,15 +1679,16 @@ bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo) */ dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size) { + struct xe_device *xe = xe_bo_device(bo); struct xe_res_cursor cur; u64 page; - XE_WARN_ON(page_size > PAGE_SIZE); + xe_assert(xe, page_size <= PAGE_SIZE); page = offset >> PAGE_SHIFT; offset &= (PAGE_SIZE - 1); if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) { - XE_WARN_ON(!bo->ttm.ttm); + xe_assert(xe, bo->ttm.ttm); xe_res_first_sg(xe_bo_get_sg(bo), page << PAGE_SHIFT, page_size, &cur); diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c index 49c05ddea164..7a264a9ca06e 100644 --- a/drivers/gpu/drm/xe/xe_bo_evict.c +++ b/drivers/gpu/drm/xe/xe_bo_evict.c @@ -158,8 +158,8 @@ int xe_bo_restore_kernel(struct xe_device *xe) * We expect validate to trigger a move VRAM and our move code * should setup the iosys map. */ - XE_WARN_ON(iosys_map_is_null(&bo->vmap)); - XE_WARN_ON(!xe_bo_is_vram(bo)); + xe_assert(xe, !iosys_map_is_null(&bo->vmap)); + xe_assert(xe, xe_bo_is_vram(bo)); xe_bo_put(bo); diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index df1953759c67..986a02a66166 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -394,7 +394,7 @@ bool xe_device_mem_access_get_if_ongoing(struct xe_device *xe) if (active) { int ref = atomic_inc_return(&xe->mem_access.ref); - XE_WARN_ON(ref == S32_MAX); + xe_assert(xe, ref != S32_MAX); } return active; @@ -436,7 +436,7 @@ void xe_device_mem_access_get(struct xe_device *xe) xe_pm_runtime_get(xe); ref = atomic_inc_return(&xe->mem_access.ref); - XE_WARN_ON(ref == S32_MAX); + xe_assert(xe, ref != S32_MAX); } @@ -450,5 +450,5 @@ void xe_device_mem_access_put(struct xe_device *xe) ref = atomic_dec_return(&xe->mem_access.ref); xe_pm_runtime_put(xe); - XE_WARN_ON(ref < 0); + xe_assert(xe, ref >= 0); } diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 293960efcd22..7cf4215b2b2e 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -117,7 +117,7 @@ static int xe_exec_begin(struct drm_exec *exec, struct xe_vm *vm) * to a location where the GPU can access it). */ list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) { - XE_WARN_ON(xe_vma_is_null(vma)); + xe_assert(vm->xe, !xe_vma_is_null(vma)); if (xe_vma_is_userptr(vma)) continue; diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index df91780d8b9f..5b26b6e35afc 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -12,6 +12,7 @@ #include "regs/xe_gt_regs.h" #include "regs/xe_lrc_layout.h" #include "regs/xe_regs.h" +#include "xe_assert.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_exec_queue.h" @@ -50,10 +51,10 @@ static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc, lrc_desc = xe_lrc_descriptor(lrc); if (GRAPHICS_VERx100(xe) >= 1250) { - XE_WARN_ON(!FIELD_FIT(XEHP_SW_CTX_ID, ctx_id)); + xe_gt_assert(hwe->gt, FIELD_FIT(XEHP_SW_CTX_ID, ctx_id)); lrc_desc |= FIELD_PREP(XEHP_SW_CTX_ID, ctx_id); } else { - XE_WARN_ON(!FIELD_FIT(GEN11_SW_CTX_ID, ctx_id)); + xe_gt_assert(hwe->gt, FIELD_FIT(GEN11_SW_CTX_ID, ctx_id)); lrc_desc |= FIELD_PREP(GEN11_SW_CTX_ID, ctx_id); } @@ -321,7 +322,7 @@ static int execlist_exec_queue_init(struct xe_exec_queue *q) struct xe_device *xe = gt_to_xe(q->gt); int err; - XE_WARN_ON(xe_device_guc_submission_enabled(xe)); + xe_assert(xe, !xe_device_guc_submission_enabled(xe)); drm_info(&xe->drm, "Enabling execlist submission (GuC submission disabled)\n"); @@ -367,9 +368,10 @@ static void execlist_exec_queue_fini_async(struct work_struct *w) container_of(w, struct xe_execlist_exec_queue, fini_async); struct xe_exec_queue *q = ee->q; struct xe_execlist_exec_queue *exl = q->execlist; + struct xe_device *xe = gt_to_xe(q->gt); unsigned long flags; - XE_WARN_ON(xe_device_guc_submission_enabled(gt_to_xe(q->gt))); + xe_assert(xe, !xe_device_guc_submission_enabled(xe)); spin_lock_irqsave(&exl->port->lock, flags); if (WARN_ON(exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET)) @@ -377,7 +379,7 @@ static void execlist_exec_queue_fini_async(struct work_struct *w) spin_unlock_irqrestore(&exl->port->lock, flags); if (q->flags & EXEC_QUEUE_FLAG_PERSISTENT) - xe_device_remove_persistent_exec_queues(gt_to_xe(q->gt), q); + xe_device_remove_persistent_exec_queues(xe, q); drm_sched_entity_fini(&exl->entity); drm_sched_fini(&exl->sched); kfree(exl); diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c index ef7279e0b006..ed2ecb20ce8e 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.c +++ b/drivers/gpu/drm/xe/xe_force_wake.c @@ -45,7 +45,7 @@ void xe_force_wake_init_gt(struct xe_gt *gt, struct xe_force_wake *fw) mutex_init(&fw->lock); /* Assuming gen11+ so assert this assumption is correct */ - XE_WARN_ON(GRAPHICS_VER(gt_to_xe(gt)) < 11); + xe_gt_assert(gt, GRAPHICS_VER(gt_to_xe(gt)) >= 11); if (xe->info.graphics_verx100 >= 1270) { domain_init(&fw->domains[XE_FW_DOMAIN_ID_GT], @@ -67,7 +67,7 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw) int i, j; /* Assuming gen11+ so assert this assumption is correct */ - XE_WARN_ON(GRAPHICS_VER(gt_to_xe(gt)) < 11); + xe_gt_assert(gt, GRAPHICS_VER(gt_to_xe(gt)) >= 11); if (!xe_gt_is_media_type(gt)) domain_init(&fw->domains[XE_FW_DOMAIN_ID_RENDER], diff --git a/drivers/gpu/drm/xe/xe_force_wake.h b/drivers/gpu/drm/xe/xe_force_wake.h index 7f304704190e..83cb157da7cc 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.h +++ b/drivers/gpu/drm/xe/xe_force_wake.h @@ -6,8 +6,8 @@ #ifndef _XE_FORCE_WAKE_H_ #define _XE_FORCE_WAKE_H_ +#include "xe_assert.h" #include "xe_force_wake_types.h" -#include "xe_macros.h" struct xe_gt; @@ -24,7 +24,7 @@ static inline int xe_force_wake_ref(struct xe_force_wake *fw, enum xe_force_wake_domains domain) { - XE_WARN_ON(!domain); + xe_gt_assert(fw->gt, domain); return fw->domains[ffs(domain) - 1].ref; } @@ -32,7 +32,7 @@ static inline void xe_force_wake_assert_held(struct xe_force_wake *fw, enum xe_force_wake_domains domain) { - XE_WARN_ON(!(fw->awake_domains & domain)); + xe_gt_assert(fw->gt, fw->awake_domains & domain); } #endif diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 286f36b9e229..03097f1b7f71 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -58,8 +58,8 @@ static unsigned int probe_gsm_size(struct pci_dev *pdev) void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte) { - XE_WARN_ON(addr & XE_PTE_MASK); - XE_WARN_ON(addr >= ggtt->size); + xe_tile_assert(ggtt->tile, !(addr & XE_PTE_MASK)); + xe_tile_assert(ggtt->tile, addr < ggtt->size); writeq(pte, &ggtt->gsm[addr >> XE_PTE_SHIFT]); } @@ -69,7 +69,7 @@ static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size) u64 end = start + size - 1; u64 scratch_pte; - XE_WARN_ON(start >= end); + xe_tile_assert(ggtt->tile, start < end); if (ggtt->scratch) scratch_pte = xe_ggtt_pte_encode(ggtt->scratch, 0); @@ -230,7 +230,7 @@ static void ggtt_invalidate_gt_tlb(struct xe_gt *gt) int seqno; seqno = xe_gt_tlb_invalidation_guc(gt); - XE_WARN_ON(seqno <= 0); + xe_gt_assert(gt, seqno > 0); if (seqno > 0) xe_gt_tlb_invalidation_wait(gt, seqno); } else if (xe_device_guc_submission_enabled(gt_to_xe(gt))) { @@ -266,7 +266,7 @@ void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix) for (addr = 0; addr < ggtt->size; addr += XE_PAGE_SIZE) { unsigned int i = addr / XE_PAGE_SIZE; - XE_WARN_ON(addr > U32_MAX); + xe_tile_assert(ggtt->tile, addr <= U32_MAX); if (ggtt->gsm[i] == scratch_pte) continue; @@ -315,7 +315,7 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, if (XE_WARN_ON(bo->ggtt_node.size)) { /* Someone's already inserted this BO in the GGTT */ - XE_WARN_ON(bo->ggtt_node.size != bo->size); + xe_tile_assert(ggtt->tile, bo->ggtt_node.size == bo->size); return 0; } @@ -378,7 +378,7 @@ void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) return; /* This BO is not currently in the GGTT */ - XE_WARN_ON(bo->ggtt_node.size != bo->size); + xe_tile_assert(ggtt->tile, bo->ggtt_node.size == bo->size); xe_ggtt_remove_node(ggtt, &bo->ggtt_node); } diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 678a276a25dc..5d86bb2bb94d 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -11,6 +11,7 @@ #include #include "regs/xe_gt_regs.h" +#include "xe_assert.h" #include "xe_bb.h" #include "xe_bo.h" #include "xe_device.h" diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c index 2f77b8bbcf53..9136937324f3 100644 --- a/drivers/gpu/drm/xe/xe_gt_clock.c +++ b/drivers/gpu/drm/xe/xe_gt_clock.c @@ -58,7 +58,7 @@ int xe_gt_clock_init(struct xe_gt *gt) u32 freq = 0; /* Assuming gen11+ so assert this assumption is correct */ - XE_WARN_ON(GRAPHICS_VER(gt_to_xe(gt)) < 11); + xe_gt_assert(gt, GRAPHICS_VER(gt_to_xe(gt)) >= 11); if (ctc_reg & CTC_SOURCE_DIVIDE_LOGIC) { freq = read_reference_ts_freq(gt); diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index 9229fd5b01cc..ec1ae00f6bfc 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -158,7 +158,7 @@ void xe_gt_debugfs_register(struct xe_gt *gt) char name[8]; int i; - XE_WARN_ON(!minor->debugfs_root); + xe_gt_assert(gt, minor->debugfs_root); sprintf(name, "gt%d", gt->info.id); root = debugfs_create_dir(name, minor->debugfs_root); diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index bcbeea62d510..bd6005b9d498 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -250,7 +250,7 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, u32 action[MAX_TLB_INVALIDATION_LEN]; int len = 0; - XE_WARN_ON(!vma); + xe_gt_assert(gt, vma); action[len++] = XE_GUC_ACTION_TLB_INVALIDATION; action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */ @@ -288,10 +288,10 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, start = ALIGN_DOWN(xe_vma_start(vma), length); } - XE_WARN_ON(length < SZ_4K); - XE_WARN_ON(!is_power_of_2(length)); - XE_WARN_ON(length & GENMASK(ilog2(SZ_16M) - 1, ilog2(SZ_2M) + 1)); - XE_WARN_ON(!IS_ALIGNED(start, length)); + xe_gt_assert(gt, length >= SZ_4K); + xe_gt_assert(gt, is_power_of_2(length)); + xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1, ilog2(SZ_2M) + 1))); + xe_gt_assert(gt, IS_ALIGNED(start, length)); action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE); action[len++] = xe_vma_vm(vma)->usm.asid; @@ -300,7 +300,7 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, action[len++] = ilog2(length) - ilog2(SZ_4K); } - XE_WARN_ON(len > MAX_TLB_INVALIDATION_LEN); + xe_gt_assert(gt, len <= MAX_TLB_INVALIDATION_LEN); return send_tlb_invalidation(>->uc.guc, fence, action, len); } diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 5d32bcee28b6..134019fdda7e 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -44,11 +44,12 @@ guc_to_xe(struct xe_guc *guc) static u32 guc_bo_ggtt_addr(struct xe_guc *guc, struct xe_bo *bo) { + struct xe_device *xe = guc_to_xe(guc); u32 addr = xe_bo_ggtt_addr(bo); - XE_WARN_ON(addr < xe_wopcm_size(guc_to_xe(guc))); - XE_WARN_ON(addr >= GUC_GGTT_TOP); - XE_WARN_ON(bo->size > GUC_GGTT_TOP - addr); + xe_assert(xe, addr >= xe_wopcm_size(guc_to_xe(guc))); + xe_assert(xe, addr < GUC_GGTT_TOP); + xe_assert(xe, bo->size <= GUC_GGTT_TOP - addr); return addr; } @@ -629,13 +630,13 @@ int xe_guc_mmio_send_recv(struct xe_guc *guc, const u32 *request, BUILD_BUG_ON(VF_SW_FLAG_COUNT != MED_VF_SW_FLAG_COUNT); - XE_WARN_ON(guc->ct.enabled); - XE_WARN_ON(!len); - XE_WARN_ON(len > VF_SW_FLAG_COUNT); - XE_WARN_ON(len > MED_VF_SW_FLAG_COUNT); - XE_WARN_ON(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, request[0]) != + xe_assert(xe, !guc->ct.enabled); + xe_assert(xe, len); + xe_assert(xe, len <= VF_SW_FLAG_COUNT); + xe_assert(xe, len <= MED_VF_SW_FLAG_COUNT); + xe_assert(xe, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, request[0]) == GUC_HXG_ORIGIN_HOST); - XE_WARN_ON(FIELD_GET(GUC_HXG_MSG_0_TYPE, request[0]) != + xe_assert(xe, FIELD_GET(GUC_HXG_MSG_0_TYPE, request[0]) == GUC_HXG_TYPE_REQUEST); retry: @@ -727,6 +728,7 @@ int xe_guc_mmio_send(struct xe_guc *guc, const u32 *request, u32 len) static int guc_self_cfg(struct xe_guc *guc, u16 key, u16 len, u64 val) { + struct xe_device *xe = guc_to_xe(guc); u32 request[HOST2GUC_SELF_CFG_REQUEST_MSG_LEN] = { FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | @@ -741,8 +743,8 @@ static int guc_self_cfg(struct xe_guc *guc, u16 key, u16 len, u64 val) }; int ret; - XE_WARN_ON(len > 2); - XE_WARN_ON(len == 1 && upper_32_bits(val)); + xe_assert(xe, len <= 2); + xe_assert(xe, len != 1 || !upper_32_bits(val)); /* Self config must go over MMIO */ ret = xe_guc_mmio_send(guc, request, ARRAY_SIZE(request)); diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 5edee24b97c9..efa4d25424b8 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -118,7 +118,9 @@ struct __guc_ads_blob { static size_t guc_ads_regset_size(struct xe_guc_ads *ads) { - XE_WARN_ON(!ads->regset_size); + struct xe_device *xe = ads_to_xe(ads); + + xe_assert(xe, ads->regset_size); return ads->regset_size; } @@ -309,14 +311,14 @@ int xe_guc_ads_init_post_hwconfig(struct xe_guc_ads *ads) struct xe_gt *gt = ads_to_gt(ads); u32 prev_regset_size = ads->regset_size; - XE_WARN_ON(!ads->bo); + xe_gt_assert(gt, ads->bo); ads->golden_lrc_size = calculate_golden_lrc_size(ads); ads->regset_size = calculate_regset_size(gt); - XE_WARN_ON(ads->golden_lrc_size + - (ads->regset_size - prev_regset_size) > - MAX_GOLDEN_LRC_SIZE); + xe_gt_assert(gt, ads->golden_lrc_size + + (ads->regset_size - prev_regset_size) <= + MAX_GOLDEN_LRC_SIZE); return 0; } @@ -517,7 +519,7 @@ static void guc_mmio_reg_state_init(struct xe_guc_ads *ads) regset_used += count * sizeof(struct guc_mmio_reg); } - XE_WARN_ON(regset_used > ads->regset_size); + xe_gt_assert(gt, regset_used <= ads->regset_size); } static void guc_um_init_params(struct xe_guc_ads *ads) @@ -572,7 +574,7 @@ void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads) offsetof(struct __guc_ads_blob, system_info)); u32 base = xe_bo_ggtt_addr(ads->bo); - XE_WARN_ON(!ads->bo); + xe_gt_assert(gt, ads->bo); xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size); guc_policies_init(ads); @@ -596,7 +598,7 @@ void xe_guc_ads_populate(struct xe_guc_ads *ads) offsetof(struct __guc_ads_blob, system_info)); u32 base = xe_bo_ggtt_addr(ads->bo); - XE_WARN_ON(!ads->bo); + xe_gt_assert(gt, ads->bo); xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size); guc_policies_init(ads); @@ -643,7 +645,7 @@ static void guc_populate_golden_lrc(struct xe_guc_ads *ads) engine_enabled_masks[guc_class])) continue; - XE_WARN_ON(!gt->default_lrc[class]); + xe_gt_assert(gt, gt->default_lrc[class]); real_size = xe_lrc_size(xe, class); alloc_size = PAGE_ALIGN(real_size); @@ -672,7 +674,7 @@ static void guc_populate_golden_lrc(struct xe_guc_ads *ads) offset += alloc_size; } - XE_WARN_ON(total_size != ads->golden_lrc_size); + xe_gt_assert(gt, total_size == ads->golden_lrc_size); } void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 13f2bd586f6a..2046bd269bbd 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -135,7 +135,7 @@ int xe_guc_ct_init(struct xe_guc_ct *ct) struct xe_bo *bo; int err; - XE_WARN_ON(guc_ct_size() % PAGE_SIZE); + xe_assert(xe, !(guc_ct_size() % PAGE_SIZE)); mutex_init(&ct->lock); spin_lock_init(&ct->fast_lock); @@ -283,7 +283,7 @@ int xe_guc_ct_enable(struct xe_guc_ct *ct) struct xe_device *xe = ct_to_xe(ct); int err; - XE_WARN_ON(ct->enabled); + xe_assert(xe, !ct->enabled); guc_ct_ctb_h2g_init(xe, &ct->ctbs.h2g, &ct->bo->vmap); guc_ct_ctb_g2h_init(xe, &ct->ctbs.g2h, &ct->bo->vmap); @@ -376,7 +376,7 @@ static void h2g_reserve_space(struct xe_guc_ct *ct, u32 cmd_len) static void __g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h) { - XE_WARN_ON(g2h_len > ct->ctbs.g2h.info.space); + xe_assert(ct_to_xe(ct), g2h_len <= ct->ctbs.g2h.info.space); if (g2h_len) { lockdep_assert_held(&ct->fast_lock); @@ -389,8 +389,8 @@ static void __g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h) static void __g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len) { lockdep_assert_held(&ct->fast_lock); - XE_WARN_ON(ct->ctbs.g2h.info.space + g2h_len > - ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space); + xe_assert(ct_to_xe(ct), ct->ctbs.g2h.info.space + g2h_len <= + ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space); ct->ctbs.g2h.info.space += g2h_len; --ct->g2h_outstanding; @@ -419,8 +419,8 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len, full_len = len + GUC_CTB_HDR_LEN; lockdep_assert_held(&ct->lock); - XE_WARN_ON(full_len > (GUC_CTB_MSG_MAX_LEN - GUC_CTB_HDR_LEN)); - XE_WARN_ON(tail > h2g->info.size); + xe_assert(xe, full_len <= (GUC_CTB_MSG_MAX_LEN - GUC_CTB_HDR_LEN)); + xe_assert(xe, tail <= h2g->info.size); /* Command will wrap, zero fill (NOPs), return and check credits again */ if (tail + full_len > h2g->info.size) { @@ -476,12 +476,13 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len, u32 g2h_len, u32 num_g2h, struct g2h_fence *g2h_fence) { + struct xe_device *xe = ct_to_xe(ct); int ret; - XE_WARN_ON(g2h_len && g2h_fence); - XE_WARN_ON(num_g2h && g2h_fence); - XE_WARN_ON(g2h_len && !num_g2h); - XE_WARN_ON(!g2h_len && num_g2h); + xe_assert(xe, !g2h_len || !g2h_fence); + xe_assert(xe, !num_g2h || !g2h_fence); + xe_assert(xe, !g2h_len || num_g2h); + xe_assert(xe, g2h_len || !num_g2h); lockdep_assert_held(&ct->lock); if (unlikely(ct->ctbs.h2g.info.broken)) { @@ -552,7 +553,7 @@ static int guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len, unsigned int sleep_period_ms = 1; int ret; - XE_WARN_ON(g2h_len && g2h_fence); + xe_assert(ct_to_xe(ct), !g2h_len || !g2h_fence); lockdep_assert_held(&ct->lock); xe_device_assert_mem_access(ct_to_xe(ct)); @@ -622,7 +623,7 @@ static int guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len, { int ret; - XE_WARN_ON(g2h_len && g2h_fence); + xe_assert(ct_to_xe(ct), !g2h_len || !g2h_fence); mutex_lock(&ct->lock); ret = guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, g2h_fence); @@ -798,7 +799,7 @@ static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len) return 0; } - XE_WARN_ON(fence != g2h_fence->seqno); + xe_assert(xe, fence == g2h_fence->seqno); if (type == GUC_HXG_TYPE_RESPONSE_FAILURE) { g2h_fence->fail = true; diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c index 63904007af0a..45c60a9c631c 100644 --- a/drivers/gpu/drm/xe/xe_guc_log.c +++ b/drivers/gpu/drm/xe/xe_guc_log.c @@ -55,12 +55,12 @@ void xe_guc_log_print(struct xe_guc_log *log, struct drm_printer *p) size_t size; int i, j; - XE_WARN_ON(!log->bo); + xe_assert(xe, log->bo); size = log->bo->size; #define DW_PER_READ 128 - XE_WARN_ON(size % (DW_PER_READ * sizeof(u32))); + xe_assert(xe, !(size % (DW_PER_READ * sizeof(u32)))); for (i = 0; i < size / sizeof(u32); i += DW_PER_READ) { u32 read[DW_PER_READ]; diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 87de1ce40e07..99d855680894 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -816,7 +816,7 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data)); int ret; - XE_WARN_ON(!xe_device_guc_submission_enabled(xe)); + xe_gt_assert(gt, xe_device_guc_submission_enabled(xe)); xe_device_mem_access_get(pc_to_xe(pc)); diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 50509891a288..f6b630f53928 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -14,6 +14,7 @@ #include #include "regs/xe_lrc_layout.h" +#include "xe_assert.h" #include "xe_devcoredump.h" #include "xe_device.h" #include "xe_exec_queue.h" @@ -354,11 +355,12 @@ static const int xe_exec_queue_prio_to_guc[] = { static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q) { struct exec_queue_policy policy; + struct xe_device *xe = guc_to_xe(guc); enum xe_exec_queue_priority prio = q->priority; u32 timeslice_us = q->sched_props.timeslice_us; u32 preempt_timeout_us = q->sched_props.preempt_timeout_us; - XE_WARN_ON(!exec_queue_registered(q)); + xe_assert(xe, exec_queue_registered(q)); __guc_exec_queue_policy_start_klv(&policy, q->guc->id); __guc_exec_queue_policy_add_priority(&policy, xe_exec_queue_prio_to_guc[prio]); @@ -392,11 +394,12 @@ static void __register_mlrc_engine(struct xe_guc *guc, struct guc_ctxt_registration_info *info) { #define MAX_MLRC_REG_SIZE (13 + XE_HW_ENGINE_MAX_INSTANCE * 2) + struct xe_device *xe = guc_to_xe(guc); u32 action[MAX_MLRC_REG_SIZE]; int len = 0; int i; - XE_WARN_ON(!xe_exec_queue_is_parallel(q)); + xe_assert(xe, xe_exec_queue_is_parallel(q)); action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; action[len++] = info->flags; @@ -419,7 +422,7 @@ static void __register_mlrc_engine(struct xe_guc *guc, action[len++] = upper_32_bits(xe_lrc_descriptor(lrc)); } - XE_WARN_ON(len > MAX_MLRC_REG_SIZE); + xe_assert(xe, len <= MAX_MLRC_REG_SIZE); #undef MAX_MLRC_REG_SIZE xe_guc_ct_send(&guc->ct, action, len, 0, 0); @@ -453,7 +456,7 @@ static void register_engine(struct xe_exec_queue *q) struct xe_lrc *lrc = q->lrc; struct guc_ctxt_registration_info info; - XE_WARN_ON(exec_queue_registered(q)); + xe_assert(xe, !exec_queue_registered(q)); memset(&info, 0, sizeof(info)); info.context_idx = q->guc->id; @@ -543,7 +546,7 @@ static int wq_noop_append(struct xe_exec_queue *q) if (wq_wait_for_space(q, wq_space_until_wrap(q))) return -ENODEV; - XE_WARN_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); + xe_assert(xe, FIELD_FIT(WQ_LEN_MASK, len_dw)); parallel_write(xe, map, wq[q->guc->wqi_tail / sizeof(u32)], FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | @@ -583,13 +586,13 @@ static void wq_item_append(struct xe_exec_queue *q) wqi[i++] = lrc->ring.tail / sizeof(u64); } - XE_WARN_ON(i != wqi_size / sizeof(u32)); + xe_assert(xe, i == wqi_size / sizeof(u32)); iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch, wq[q->guc->wqi_tail / sizeof(u32)])); xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size); q->guc->wqi_tail += wqi_size; - XE_WARN_ON(q->guc->wqi_tail > WQ_SIZE); + xe_assert(xe, q->guc->wqi_tail <= WQ_SIZE); xe_device_wmb(xe); @@ -601,6 +604,7 @@ static void wq_item_append(struct xe_exec_queue *q) static void submit_exec_queue(struct xe_exec_queue *q) { struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_device *xe = guc_to_xe(guc); struct xe_lrc *lrc = q->lrc; u32 action[3]; u32 g2h_len = 0; @@ -608,7 +612,7 @@ static void submit_exec_queue(struct xe_exec_queue *q) int len = 0; bool extra_submit = false; - XE_WARN_ON(!exec_queue_registered(q)); + xe_assert(xe, exec_queue_registered(q)); if (xe_exec_queue_is_parallel(q)) wq_item_append(q); @@ -654,10 +658,12 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job) { struct xe_sched_job *job = to_xe_sched_job(drm_job); struct xe_exec_queue *q = job->q; + struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_device *xe = guc_to_xe(guc); bool lr = xe_exec_queue_is_lr(q); - XE_WARN_ON((exec_queue_destroyed(q) || exec_queue_pending_disable(q)) && - !exec_queue_banned(q) && !exec_queue_suspended(q)); + xe_assert(xe, !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) || + exec_queue_banned(q) || exec_queue_suspended(q)); trace_xe_sched_job_run(job); @@ -799,7 +805,7 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) struct xe_device *xe = guc_to_xe(guc); struct xe_gpu_scheduler *sched = &ge->sched; - XE_WARN_ON(!xe_exec_queue_is_lr(q)); + xe_assert(xe, xe_exec_queue_is_lr(q)); trace_xe_exec_queue_lr_cleanup(q); /* Kill the run_job / process_msg entry points */ @@ -853,8 +859,8 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) int i = 0; if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) { - XE_WARN_ON(q->flags & EXEC_QUEUE_FLAG_KERNEL); - XE_WARN_ON(q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q)); + xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_KERNEL)); + xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q))); drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx", xe_sched_job_seqno(job), q->guc->id, q->flags); @@ -990,8 +996,9 @@ static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) { struct xe_exec_queue *q = msg->private_data; struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_device *xe = guc_to_xe(guc); - XE_WARN_ON(q->flags & EXEC_QUEUE_FLAG_PERMANENT); + xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_PERMANENT)); trace_xe_exec_queue_cleanup_entity(q); if (exec_queue_registered(q)) @@ -1018,10 +1025,11 @@ static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *ms static void suspend_fence_signal(struct xe_exec_queue *q) { struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_device *xe = guc_to_xe(guc); - XE_WARN_ON(!exec_queue_suspended(q) && !exec_queue_killed(q) && - !guc_read_stopped(guc)); - XE_WARN_ON(!q->guc->suspend_pending); + xe_assert(xe, exec_queue_suspended(q) || exec_queue_killed(q) || + guc_read_stopped(guc)); + xe_assert(xe, q->guc->suspend_pending); q->guc->suspend_pending = false; smp_wmb(); @@ -1125,11 +1133,12 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) { struct xe_gpu_scheduler *sched; struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_device *xe = guc_to_xe(guc); struct xe_guc_exec_queue *ge; long timeout; int err; - XE_WARN_ON(!xe_device_guc_submission_enabled(guc_to_xe(guc))); + xe_assert(xe, xe_device_guc_submission_enabled(guc_to_xe(guc))); ge = kzalloc(sizeof(*ge), GFP_KERNEL); if (!ge) @@ -1275,10 +1284,12 @@ static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q, static int guc_exec_queue_set_job_timeout(struct xe_exec_queue *q, u32 job_timeout_ms) { struct xe_gpu_scheduler *sched = &q->guc->sched; + struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_device *xe = guc_to_xe(guc); - XE_WARN_ON(exec_queue_registered(q)); - XE_WARN_ON(exec_queue_banned(q)); - XE_WARN_ON(exec_queue_killed(q)); + xe_assert(xe, !exec_queue_registered(q)); + xe_assert(xe, !exec_queue_banned(q)); + xe_assert(xe, !exec_queue_killed(q)); sched->base.timeout = job_timeout_ms; @@ -1309,8 +1320,10 @@ static void guc_exec_queue_suspend_wait(struct xe_exec_queue *q) static void guc_exec_queue_resume(struct xe_exec_queue *q) { struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME; + struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_device *xe = guc_to_xe(guc); - XE_WARN_ON(q->guc->suspend_pending); + xe_assert(xe, !q->guc->suspend_pending); guc_exec_queue_add_msg(q, msg, RESUME); } @@ -1405,8 +1418,9 @@ int xe_guc_submit_stop(struct xe_guc *guc) { struct xe_exec_queue *q; unsigned long index; + struct xe_device *xe = guc_to_xe(guc); - XE_WARN_ON(guc_read_stopped(guc) != 1); + xe_assert(xe, guc_read_stopped(guc) == 1); mutex_lock(&guc->submission_state.lock); @@ -1443,8 +1457,9 @@ int xe_guc_submit_start(struct xe_guc *guc) { struct xe_exec_queue *q; unsigned long index; + struct xe_device *xe = guc_to_xe(guc); - XE_WARN_ON(guc_read_stopped(guc) != 1); + xe_assert(xe, guc_read_stopped(guc) == 1); mutex_lock(&guc->submission_state.lock); atomic_dec(&guc->submission_state.stopped); @@ -1474,7 +1489,7 @@ g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id) return NULL; } - XE_WARN_ON(q->guc->id != guc_id); + xe_assert(xe, q->guc->id == guc_id); return q; } diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c index 177cda14864e..c856da1e9422 100644 --- a/drivers/gpu/drm/xe/xe_huc.c +++ b/drivers/gpu/drm/xe/xe_huc.c @@ -6,6 +6,7 @@ #include "xe_huc.h" #include "regs/xe_guc_regs.h" +#include "xe_assert.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_force_wake.h" @@ -72,7 +73,7 @@ int xe_huc_auth(struct xe_huc *huc) if (xe_uc_fw_is_disabled(&huc->fw)) return 0; - XE_WARN_ON(xe_uc_fw_is_running(&huc->fw)); + xe_assert(xe, !xe_uc_fw_is_running(&huc->fw)); if (!xe_uc_fw_is_loaded(&huc->fw)) return -ENOEXEC; diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index dd673a684b70..9c2e212fa4cf 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -10,6 +10,7 @@ #include "regs/xe_engine_regs.h" #include "regs/xe_gt_regs.h" #include "regs/xe_regs.h" +#include "xe_assert.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_execlist.h" @@ -244,7 +245,7 @@ static void hw_engine_fini(struct drm_device *drm, void *arg) static void hw_engine_mmio_write32(struct xe_hw_engine *hwe, struct xe_reg reg, u32 val) { - XE_WARN_ON(reg.addr & hwe->mmio_base); + xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base)); xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain); reg.addr += hwe->mmio_base; @@ -254,7 +255,7 @@ static void hw_engine_mmio_write32(struct xe_hw_engine *hwe, struct xe_reg reg, static u32 hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg) { - XE_WARN_ON(reg.addr & hwe->mmio_base); + xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base)); xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain); reg.addr += hwe->mmio_base; @@ -374,7 +375,7 @@ static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe, info = &engine_infos[id]; - XE_WARN_ON(hwe->gt); + xe_gt_assert(gt, !hwe->gt); hwe->gt = gt; hwe->class = info->class; @@ -415,8 +416,8 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe, struct xe_tile *tile = gt_to_tile(gt); int err; - XE_WARN_ON(id >= ARRAY_SIZE(engine_infos) || !engine_infos[id].name); - XE_WARN_ON(!(gt->info.engine_mask & BIT(id))); + xe_gt_assert(gt, id < ARRAY_SIZE(engine_infos) && engine_infos[id].name); + xe_gt_assert(gt, gt->info.engine_mask & BIT(id)); xe_reg_sr_apply_mmio(&hwe->reg_sr, gt); xe_reg_sr_apply_whitelist(hwe); diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 6f899b6a4877..1410dcab3d90 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -116,7 +116,7 @@ static void set_offsets(u32 *regs, *regs |= MI_LRI_LRM_CS_MMIO; regs++; - XE_WARN_ON(!count); + xe_gt_assert(hwe->gt, count); do { u32 offset = 0; u8 v; @@ -608,7 +608,7 @@ static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \ { \ struct iosys_map map = lrc->bo->vmap; \ \ - XE_WARN_ON(iosys_map_is_null(&map)); \ + xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map)); \ iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \ return map; \ } \ @@ -827,16 +827,17 @@ static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring, void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size) { + struct xe_device *xe = lrc_to_xe(lrc); struct iosys_map ring; u32 rhs; size_t aligned_size; - XE_WARN_ON(!IS_ALIGNED(size, 4)); + xe_assert(xe, IS_ALIGNED(size, 4)); aligned_size = ALIGN(size, 8); ring = __xe_lrc_ring_map(lrc); - XE_WARN_ON(lrc->ring.tail >= lrc->ring.size); + xe_assert(xe, lrc->ring.tail < lrc->ring.size); rhs = lrc->ring.size - lrc->ring.tail; if (size > rhs) { __xe_lrc_write_ring(lrc, ring, data, rhs); diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 8291798e1aaa..713f98baf0ee 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -15,6 +15,7 @@ #include "generated/xe_wa_oob.h" #include "regs/xe_gpu_commands.h" #include "tests/xe_test.h" +#include "xe_assert.h" #include "xe_bb.h" #include "xe_bo.h" #include "xe_exec_queue.h" @@ -172,7 +173,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, BUILD_BUG_ON(!(NUM_KERNEL_PDE & 1)); /* Need to be sure everything fits in the first PT, or create more */ - XE_WARN_ON(m->batch_base_ofs + batch->size >= SZ_2M); + xe_tile_assert(tile, m->batch_base_ofs + batch->size < SZ_2M); bo = xe_bo_create_pin_map(vm->xe, tile, vm, num_entries * XE_PAGE_SIZE, @@ -206,7 +207,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, } if (!IS_DGFX(xe)) { - XE_WARN_ON(xe->info.supports_usm); + xe_tile_assert(tile, !xe->info.supports_usm); /* Write out batch too */ m->batch_base_ofs = NUM_PT_SLOTS * XE_PAGE_SIZE; @@ -487,7 +488,7 @@ static void emit_pte(struct xe_migrate *m, /* Is this a 64K PTE entry? */ if ((m->q->vm->flags & XE_VM_FLAG_64K) && !(cur_ofs & (16 * 8 - 1))) { - XE_WARN_ON(!IS_ALIGNED(addr, SZ_64K)); + xe_tile_assert(m->tile, IS_ALIGNED(addr, SZ_64K)); addr |= XE_PTE_PS64; } @@ -516,7 +517,7 @@ static void emit_copy_ccs(struct xe_gt *gt, struct xe_bb *bb, num_ccs_blks = DIV_ROUND_UP(xe_device_ccs_bytes(gt_to_xe(gt), size), NUM_CCS_BYTES_PER_BLOCK); - XE_WARN_ON(num_ccs_blks > NUM_CCS_BLKS_PER_XFER); + xe_gt_assert(gt, num_ccs_blks <= NUM_CCS_BLKS_PER_XFER); *cs++ = XY_CTRL_SURF_COPY_BLT | (src_is_indirect ? 0x0 : 0x1) << SRC_ACCESS_TYPE_SHIFT | (dst_is_indirect ? 0x0 : 0x1) << DST_ACCESS_TYPE_SHIFT | @@ -536,9 +537,9 @@ static void emit_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, u64 dst_ofs, unsigned int size, unsigned int pitch) { - XE_WARN_ON(size / pitch > S16_MAX); - XE_WARN_ON(pitch / 4 > S16_MAX); - XE_WARN_ON(pitch > U16_MAX); + xe_gt_assert(gt, size / pitch <= S16_MAX); + xe_gt_assert(gt, pitch / 4 <= S16_MAX); + xe_gt_assert(gt, pitch <= U16_MAX); bb->cs[bb->len++] = XY_FAST_COPY_BLT_CMD | (10 - 2); bb->cs[bb->len++] = XY_FAST_COPY_BLT_DEPTH_32 | pitch; @@ -598,7 +599,7 @@ static u32 xe_migrate_ccs_copy(struct xe_migrate *m, * At the moment, we don't support copying CCS metadata from * system to system. */ - XE_WARN_ON(!src_is_vram && !dst_is_vram); + xe_gt_assert(gt, src_is_vram || dst_is_vram); emit_copy_ccs(gt, bb, dst_ofs, dst_is_vram, src_ofs, src_is_vram, dst_size); @@ -810,7 +811,7 @@ static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs *cs++ = upper_32_bits(src_ofs); *cs++ = FIELD_PREP(PVC_MS_MOCS_INDEX_MASK, mocs); - XE_WARN_ON(cs - bb->cs != len + bb->len); + xe_gt_assert(gt, cs - bb->cs == len + bb->len); bb->len += len; } @@ -848,7 +849,7 @@ static void emit_clear_main_copy(struct xe_gt *gt, struct xe_bb *bb, *cs++ = 0; } - XE_WARN_ON(cs - bb->cs != len + bb->len); + xe_gt_assert(gt, cs - bb->cs == len + bb->len); bb->len += len; } @@ -1021,9 +1022,9 @@ static void write_pgtable(struct xe_tile *tile, struct xe_bb *bb, u64 ppgtt_ofs, * PDE. This requires a BO that is almost vm->size big. * * This shouldn't be possible in practice.. might change when 16K - * pages are used. Hence the XE_WARN_ON. + * pages are used. Hence the assert. */ - XE_WARN_ON(update->qwords > 0x1ff); + xe_tile_assert(tile, update->qwords <= 0x1ff); if (!ppgtt_ofs) { ppgtt_ofs = xe_migrate_vram_ofs(xe_bo_addr(update->pt_bo, 0, XE_PAGE_SIZE)); @@ -1213,7 +1214,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m, * Worst case: Sum(2 * (each lower level page size) + (top level page size)) * Should be reasonably bound.. */ - XE_WARN_ON(batch_size >= SZ_128K); + xe_tile_assert(tile, batch_size < SZ_128K); bb = xe_bb_new(gt, batch_size, !q && xe->info.supports_usm); if (IS_ERR(bb)) @@ -1223,7 +1224,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m, if (!IS_DGFX(xe)) { ppgtt_ofs = NUM_KERNEL_PDE - 1; if (q) { - XE_WARN_ON(num_updates > NUM_VMUSA_WRITES_PER_UNIT); + xe_tile_assert(tile, num_updates <= NUM_VMUSA_WRITES_PER_UNIT); sa_bo = drm_suballoc_new(&m->vm_update_sa, 1, GFP_KERNEL, true, 0); @@ -1252,7 +1253,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m, for (i = 0; i < num_updates; i++) { struct xe_bo *pt_bo = updates[i].pt_bo; - XE_WARN_ON(pt_bo->size != SZ_4K); + xe_tile_assert(tile, pt_bo->size == SZ_4K); addr = xe_pte_encode(pt_bo, 0, XE_CACHE_WB, 0); bb->cs[bb->len++] = lower_32_bits(addr); diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index 75d025c54eb8..ada3114be4fa 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -463,7 +463,7 @@ static unsigned int get_mocs_settings(struct xe_device *xe, * is still 0 at this point, we'll assume that it was omitted by * mistake in the switch statement above. */ - XE_WARN_ON(info->unused_entries_index == 0); + xe_assert(xe, info->unused_entries_index != 0); if (XE_WARN_ON(info->size > info->n_entries)) { info->table = NULL; diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index c21d2681b419..680fbe6f38a6 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -196,7 +196,7 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, pt->level = level; pt->base.dir = level ? &as_xe_pt_dir(pt)->dir : NULL; - XE_WARN_ON(level > XE_VM_MAX_LEVEL); + xe_tile_assert(tile, level <= XE_VM_MAX_LEVEL); return pt; @@ -1004,7 +1004,7 @@ xe_pt_prepare_bind(struct xe_tile *tile, struct xe_vma *vma, *num_entries = 0; err = xe_pt_stage_bind(tile, vma, entries, num_entries); if (!err) - XE_WARN_ON(!*num_entries); + xe_tile_assert(tile, *num_entries); else /* abort! */ xe_pt_abort_bind(vma, entries, *num_entries); @@ -1026,7 +1026,7 @@ static void xe_vm_dbg_print_entries(struct xe_device *xe, u64 end; u64 start; - XE_WARN_ON(entry->pt->is_compact); + xe_assert(xe, !entry->pt->is_compact); start = entry->ofs * page_size; end = start + page_size * entry->qwords; vm_dbg(&xe->drm, @@ -1276,7 +1276,7 @@ static int invalidation_fence_init(struct xe_gt *gt, dma_fence_put(&ifence->base.base); /* Creation ref */ } - XE_WARN_ON(ret && ret != -ENOENT); + xe_gt_assert(gt, !ret || ret == -ENOENT); return ret && ret != -ENOENT ? ret : 0; } @@ -1356,7 +1356,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue err = xe_pt_prepare_bind(tile, vma, entries, &num_entries, rebind); if (err) goto err; - XE_WARN_ON(num_entries > ARRAY_SIZE(entries)); + xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries)); xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries); xe_pt_calc_rfence_interval(vma, &bind_pt_update, entries, @@ -1707,7 +1707,7 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu xe_vma_start(vma), xe_vma_end(vma) - 1, q); num_entries = xe_pt_stage_unbind(tile, vma, entries); - XE_WARN_ON(num_entries > ARRAY_SIZE(entries)); + xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries)); xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries); xe_pt_calc_rfence_interval(vma, &unbind_pt_update, entries, @@ -1773,7 +1773,7 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu list_del_init(&vma->combined_links.rebind); if (unbind_pt_update.locked) { - XE_WARN_ON(!xe_vma_is_userptr(vma)); + xe_tile_assert(tile, xe_vma_is_userptr(vma)); if (!vma->tile_present) { spin_lock(&vm->userptr.invalidated_lock); diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index 2238a40b7e8e..6eec7c7e4bc5 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -212,6 +212,7 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc u32 dw[MAX_JOB_SIZE_DW], i = 0; u32 ppgtt_flag = get_ppgtt_flag(job); struct xe_vm *vm = job->q->vm; + struct xe_gt *gt = job->q->gt; if (vm && vm->batch_invalidate_tlb) { dw[i++] = preparser_disable(true); @@ -234,7 +235,7 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc i = emit_user_interrupt(dw, i); - XE_WARN_ON(i > MAX_JOB_SIZE_DW); + xe_gt_assert(gt, i <= MAX_JOB_SIZE_DW); xe_lrc_write_ring(lrc, dw, i * sizeof(*dw)); } @@ -294,7 +295,7 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, i = emit_user_interrupt(dw, i); - XE_WARN_ON(i > MAX_JOB_SIZE_DW); + xe_gt_assert(gt, i <= MAX_JOB_SIZE_DW); xe_lrc_write_ring(lrc, dw, i * sizeof(*dw)); } @@ -342,7 +343,7 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, i = emit_user_interrupt(dw, i); - XE_WARN_ON(i > MAX_JOB_SIZE_DW); + xe_gt_assert(gt, i <= MAX_JOB_SIZE_DW); xe_lrc_write_ring(lrc, dw, i * sizeof(*dw)); } @@ -372,14 +373,16 @@ static void emit_migration_job_gen12(struct xe_sched_job *job, i = emit_user_interrupt(dw, i); - XE_WARN_ON(i > MAX_JOB_SIZE_DW); + xe_gt_assert(job->q->gt, i <= MAX_JOB_SIZE_DW); xe_lrc_write_ring(lrc, dw, i * sizeof(*dw)); } static void emit_job_gen12_gsc(struct xe_sched_job *job) { - XE_WARN_ON(job->q->width > 1); /* no parallel submission for GSCCS */ + struct xe_gt *gt = job->q->gt; + + xe_gt_assert(gt, job->q->width <= 1); /* no parallel submission for GSCCS */ __emit_job_gen12_simple(job, job->q->lrc, job->batch_addr[0], diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c index b02183147e8e..84c700aed8ac 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.c +++ b/drivers/gpu/drm/xe/xe_sched_job.c @@ -143,7 +143,7 @@ struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q, /* Sanity check */ for (j = 0; j < q->width; ++j) - XE_WARN_ON(cf->base.seqno != fences[j]->seqno); + xe_assert(job_to_xe(job), cf->base.seqno == fences[j]->seqno); job->fence = &cf->base; } diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index 9c8ce504f4da..a8ecb5c6e01a 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -162,7 +162,7 @@ int xe_uc_init_hw(struct xe_uc *uc) /* We don't fail the driver load if HuC fails to auth, but let's warn */ ret = xe_huc_auth(&uc->huc); - XE_WARN_ON(ret); + xe_gt_assert(uc_to_gt(uc), !ret); return 0; } diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 37ad238148b0..8aa3ae1384db 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -195,7 +195,7 @@ uc_fw_auto_select(struct xe_device *xe, struct xe_uc_fw *uc_fw) u32 count; int i; - XE_WARN_ON(uc_fw->type >= ARRAY_SIZE(blobs_all)); + xe_assert(xe, uc_fw->type < ARRAY_SIZE(blobs_all)); entries = blobs_all[uc_fw->type].entries; count = blobs_all[uc_fw->type].count; @@ -224,8 +224,8 @@ size_t xe_uc_fw_copy_rsa(struct xe_uc_fw *uc_fw, void *dst, u32 max_len) struct xe_device *xe = uc_fw_to_xe(uc_fw); u32 size = min_t(u32, uc_fw->rsa_size, max_len); - XE_WARN_ON(size % 4); - XE_WARN_ON(!xe_uc_fw_is_available(uc_fw)); + xe_assert(xe, !(size % 4)); + xe_assert(xe, xe_uc_fw_is_available(uc_fw)); xe_map_memcpy_from(xe, dst, &uc_fw->bo->vmap, xe_uc_fw_rsa_offset(uc_fw), size); @@ -249,8 +249,8 @@ static void guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_header *css) struct xe_gt *gt = uc_fw_to_gt(uc_fw); struct xe_guc *guc = >->uc.guc; - XE_WARN_ON(uc_fw->type != XE_UC_FW_TYPE_GUC); - XE_WARN_ON(uc_fw->major_ver_found < 70); + xe_gt_assert(gt, uc_fw->type == XE_UC_FW_TYPE_GUC); + xe_gt_assert(gt, uc_fw->major_ver_found >= 70); if (uc_fw->major_ver_found > 70 || uc_fw->minor_ver_found >= 6) { /* v70.6.0 adds CSS header support */ @@ -336,8 +336,8 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw) * before we're looked at the HW caps to see if we have uc support */ BUILD_BUG_ON(XE_UC_FIRMWARE_UNINITIALIZED); - XE_WARN_ON(uc_fw->status); - XE_WARN_ON(uc_fw->path); + xe_assert(xe, !uc_fw->status); + xe_assert(xe, !uc_fw->path); uc_fw_auto_select(xe, uc_fw); xe_uc_fw_change_status(uc_fw, uc_fw->path ? *uc_fw->path ? @@ -504,7 +504,7 @@ int xe_uc_fw_upload(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags) int err; /* make sure the status was cleared the last time we reset the uc */ - XE_WARN_ON(xe_uc_fw_is_loaded(uc_fw)); + xe_assert(xe, !xe_uc_fw_is_loaded(uc_fw)); if (!xe_uc_fw_is_loadable(uc_fw)) return -ENOEXEC; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 66e8aeb203c9..53add99d4186 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -71,7 +71,7 @@ int xe_vma_userptr_pin_pages(struct xe_vma *vma) bool read_only = xe_vma_read_only(vma); lockdep_assert_held(&vm->lock); - XE_WARN_ON(!xe_vma_is_userptr(vma)); + xe_assert(xe, xe_vma_is_userptr(vma)); retry: if (vma->gpuva.flags & XE_VMA_DESTROYED) return 0; @@ -260,7 +260,7 @@ static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list) struct dma_fence *fence; link = list->next; - XE_WARN_ON(link == list); + xe_assert(vm->xe, link != list); fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link), q, q->compute.context, @@ -338,7 +338,7 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) int err; bool wait; - XE_WARN_ON(!xe_vm_in_compute_mode(vm)); + xe_assert(vm->xe, xe_vm_in_compute_mode(vm)); down_write(&vm->lock); drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT); @@ -573,7 +573,7 @@ static void preempt_rebind_work_func(struct work_struct *w) long wait; int __maybe_unused tries = 0; - XE_WARN_ON(!xe_vm_in_compute_mode(vm)); + xe_assert(vm->xe, xe_vm_in_compute_mode(vm)); trace_xe_vm_rebind_worker_enter(vm); down_write(&vm->lock); @@ -698,7 +698,7 @@ static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, struct dma_fence *fence; long err; - XE_WARN_ON(!xe_vma_is_userptr(vma)); + xe_assert(vm->xe, xe_vma_is_userptr(vma)); trace_xe_vma_userptr_invalidate(vma); if (!mmu_notifier_range_blockable(range)) @@ -839,7 +839,7 @@ struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) xe_vm_assert_held(vm); list_for_each_entry_safe(vma, next, &vm->rebind_list, combined_links.rebind) { - XE_WARN_ON(!vma->tile_present); + xe_assert(vm->xe, vma->tile_present); list_del_init(&vma->combined_links.rebind); dma_fence_put(fence); @@ -867,8 +867,8 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, struct xe_tile *tile; u8 id; - XE_WARN_ON(start >= end); - XE_WARN_ON(end >= vm->size); + xe_assert(vm->xe, start < end); + xe_assert(vm->xe, end < vm->size); if (!bo && !is_null) /* userptr */ vma = kzalloc(sizeof(*vma), GFP_KERNEL); @@ -1064,10 +1064,10 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) struct xe_vm *vm = xe_vma_vm(vma); lockdep_assert_held_write(&vm->lock); - XE_WARN_ON(!list_empty(&vma->combined_links.destroy)); + xe_assert(vm->xe, list_empty(&vma->combined_links.destroy)); if (xe_vma_is_userptr(vma)) { - XE_WARN_ON(!(vma->gpuva.flags & XE_VMA_DESTROYED)); + xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED); spin_lock(&vm->userptr.invalidated_lock); list_del(&vma->userptr.invalidate_link); @@ -1160,7 +1160,7 @@ xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range) if (xe_vm_is_closed_or_banned(vm)) return NULL; - XE_WARN_ON(start + range > vm->size); + xe_assert(vm->xe, start + range <= vm->size); gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range); @@ -1171,7 +1171,7 @@ static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma) { int err; - XE_WARN_ON(xe_vma_vm(vma) != vm); + xe_assert(vm->xe, xe_vma_vm(vma) == vm); lockdep_assert_held(&vm->lock); err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva); @@ -1182,7 +1182,7 @@ static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma) static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma) { - XE_WARN_ON(xe_vma_vm(vma) != vm); + xe_assert(vm->xe, xe_vma_vm(vma) == vm); lockdep_assert_held(&vm->lock); drm_gpuva_remove(&vma->gpuva); @@ -1428,7 +1428,7 @@ void xe_vm_close_and_put(struct xe_vm *vm) struct drm_gpuva *gpuva, *next; u8 id; - XE_WARN_ON(vm->preempt.num_exec_queues); + xe_assert(xe, !vm->preempt.num_exec_queues); xe_vm_close(vm); flush_async_ops(vm); @@ -1505,7 +1505,7 @@ void xe_vm_close_and_put(struct xe_vm *vm) if (vm->async_ops.error_capture.addr) wake_up_all(&vm->async_ops.error_capture.wq); - XE_WARN_ON(!list_empty(&vm->extobj.list)); + xe_assert(xe, list_empty(&vm->extobj.list)); up_write(&vm->lock); mutex_lock(&xe->usm.lock); @@ -1531,7 +1531,7 @@ static void vm_destroy_work_func(struct work_struct *w) void *lookup; /* xe_vm_close_and_put was not called? */ - XE_WARN_ON(vm->size); + xe_assert(xe, !vm->size); if (!(vm->flags & XE_VM_FLAG_MIGRATION)) { xe_device_mem_access_put(xe); @@ -1539,7 +1539,7 @@ static void vm_destroy_work_func(struct work_struct *w) if (xe->info.has_asid) { mutex_lock(&xe->usm.lock); lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid); - XE_WARN_ON(lookup != vm); + xe_assert(xe, lookup == vm); mutex_unlock(&xe->usm.lock); } } @@ -1802,7 +1802,7 @@ int xe_vm_async_fence_wait_start(struct dma_fence *fence) struct async_op_fence *afence = container_of(fence, struct async_op_fence, fence); - XE_WARN_ON(xe_vm_no_dma_fences(afence->vm)); + xe_assert(afence->vm->xe, !xe_vm_no_dma_fences(afence->vm)); smp_rmb(); return wait_event_interruptible(afence->wq, afence->started); @@ -1828,7 +1828,7 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, } else { int i; - XE_WARN_ON(!xe_vm_in_fault_mode(vm)); + xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); fence = dma_fence_get_stub(); if (last_op) { @@ -2110,7 +2110,7 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma, { int err; - XE_WARN_ON(region > ARRAY_SIZE(region_to_mem_type)); + xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type)); if (!xe_vma_has_no_bo(vma)) { err = xe_bo_migrate(xe_vma_bo(vma), region_to_mem_type[region]); @@ -2309,7 +2309,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, } break; case XE_VM_BIND_OP_UNMAP_ALL: - XE_WARN_ON(!bo); + xe_assert(vm->xe, bo); err = xe_bo_lock(bo, true); if (err) @@ -2506,7 +2506,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, struct xe_vma_op *op = gpuva_op_to_vma_op(__op); bool first = list_empty(ops_list); - XE_WARN_ON(!first && !async); + xe_assert(vm->xe, first || async); INIT_LIST_HEAD(&op->link); list_add_tail(&op->link, ops_list); @@ -3468,8 +3468,8 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) u8 id; int ret; - XE_WARN_ON(!xe_vm_in_fault_mode(xe_vma_vm(vma))); - XE_WARN_ON(xe_vma_is_null(vma)); + xe_assert(xe, xe_vm_in_fault_mode(xe_vma_vm(vma))); + xe_assert(xe, !xe_vma_is_null(vma)); trace_xe_vma_usm_invalidate(vma); /* Check that we don't race with page-table updates */ diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index cc9dfd8cb770..694f9e689b05 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -182,7 +182,7 @@ extern struct ttm_device_funcs xe_ttm_funcs; static inline void xe_vm_queue_rebind_worker(struct xe_vm *vm) { - XE_WARN_ON(!xe_vm_in_compute_mode(vm)); + xe_assert(vm->xe, xe_vm_in_compute_mode(vm)); queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work); } -- cgit v1.2.3 From 430003b85ce36e6f9dd6799b6cd5690f9b6c8a2a Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Wed, 6 Sep 2023 16:00:10 -0700 Subject: drm/xe/guc: Switch to major-only GuC FW tracking for MTL Newer HuC binaries for MTL (8.5.1+) require GuC 70.7 or newer, so we need to move on from 70.6.4. Given that the MTL GuC uses major-only version matching in i915, we can do the same here instead of just bumping the version (and having to push the versioned binaries, because they're not there already for i915). Signed-off-by: Daniele Ceraolo Spurio Cc: John Harrison Reviewed-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_uc_fw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 8aa3ae1384db..efc70836453d 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -101,7 +101,7 @@ struct fw_blobs_by_type { #define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver) \ fw_def(LUNARLAKE, mmp_ver(xe, guc, lnl, 70, 6, 8)) \ - fw_def(METEORLAKE, mmp_ver(i915, guc, mtl, 70, 6, 4)) \ + fw_def(METEORLAKE, major_ver(i915, guc, mtl, 70, 7)) \ fw_def(PVC, mmp_ver(xe, guc, pvc, 70, 6, 4)) \ fw_def(DG2, major_ver(i915, guc, dg2, 70, 5)) \ fw_def(DG1, major_ver(i915, guc, dg1, 70, 5)) \ -- cgit v1.2.3 From 8d07691c35bfd08fe16f865b9df04204604b36d5 Mon Sep 17 00:00:00 2001 From: Aravind Iddamsetty Date: Wed, 30 Aug 2023 08:46:50 +0530 Subject: drm/xe: Get GT clock to nanosecs Helper to convert GT clock cycles to nanoseconds. v2: Use DIV_ROUND_CLOSEST_ULL helper(Ashutosh) v3: rename xe_gt_clock_interval_to_ns to xe_gt_clock_cycles_to_ns Reviewed-by: Tejas Upadhyay Reviewed-by: Ashutosh Dixit Signed-off-by: Aravind Iddamsetty Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_clock.c | 5 +++++ drivers/gpu/drm/xe/xe_gt_clock.h | 4 +++- 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c index 9136937324f3..25a18eaad9c4 100644 --- a/drivers/gpu/drm/xe/xe_gt_clock.c +++ b/drivers/gpu/drm/xe/xe_gt_clock.c @@ -78,3 +78,8 @@ int xe_gt_clock_init(struct xe_gt *gt) gt->info.clock_freq = freq; return 0; } + +u64 xe_gt_clock_cycles_to_ns(const struct xe_gt *gt, u64 count) +{ + return DIV_ROUND_CLOSEST_ULL(count * NSEC_PER_SEC, gt->info.clock_freq); +} diff --git a/drivers/gpu/drm/xe/xe_gt_clock.h b/drivers/gpu/drm/xe/xe_gt_clock.h index 511923afd224..aa162722f859 100644 --- a/drivers/gpu/drm/xe/xe_gt_clock.h +++ b/drivers/gpu/drm/xe/xe_gt_clock.h @@ -6,8 +6,10 @@ #ifndef _XE_GT_CLOCK_H_ #define _XE_GT_CLOCK_H_ +#include + struct xe_gt; int xe_gt_clock_init(struct xe_gt *gt); - +u64 xe_gt_clock_cycles_to_ns(const struct xe_gt *gt, u64 count); #endif -- cgit v1.2.3 From cd8534193a4b4e4e0f8c8ee99d96293035e0ffba Mon Sep 17 00:00:00 2001 From: Aravind Iddamsetty Date: Wed, 30 Aug 2023 08:38:33 +0530 Subject: drm/xe: Use spinlock in forcewake instead of mutex In PMU we need to access certain registers which fall under GT power domain for which we need to take forcewake. But as PMU being an atomic context can't expect to have any sleeping calls. Reviewed-by: Ashutosh Dixit Reviewed-by: Rodrigo Vivi Signed-off-by: Aravind Iddamsetty Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_force_wake.c | 14 +++++++------- drivers/gpu/drm/xe/xe_force_wake_types.h | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c index ed2ecb20ce8e..32d6c4dd2807 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.c +++ b/drivers/gpu/drm/xe/xe_force_wake.c @@ -42,7 +42,7 @@ void xe_force_wake_init_gt(struct xe_gt *gt, struct xe_force_wake *fw) struct xe_device *xe = gt_to_xe(gt); fw->gt = gt; - mutex_init(&fw->lock); + spin_lock_init(&fw->lock); /* Assuming gen11+ so assert this assumption is correct */ xe_gt_assert(gt, GRAPHICS_VER(gt_to_xe(gt)) >= 11); @@ -116,7 +116,7 @@ static int domain_wake_wait(struct xe_gt *gt, { return xe_mmio_wait32(gt, domain->reg_ack, domain->val, domain->val, XE_FORCE_WAKE_ACK_TIMEOUT_MS * USEC_PER_MSEC, - NULL, false); + NULL, true); } static void domain_sleep(struct xe_gt *gt, struct xe_force_wake_domain *domain) @@ -129,7 +129,7 @@ static int domain_sleep_wait(struct xe_gt *gt, { return xe_mmio_wait32(gt, domain->reg_ack, domain->val, 0, XE_FORCE_WAKE_ACK_TIMEOUT_MS * USEC_PER_MSEC, - NULL, false); + NULL, true); } #define for_each_fw_domain_masked(domain__, mask__, fw__, tmp__) \ @@ -147,7 +147,7 @@ int xe_force_wake_get(struct xe_force_wake *fw, enum xe_force_wake_domains tmp, woken = 0; int ret, ret2 = 0; - mutex_lock(&fw->lock); + spin_lock(&fw->lock); for_each_fw_domain_masked(domain, domains, fw, tmp) { if (!domain->ref++) { woken |= BIT(domain->id); @@ -162,7 +162,7 @@ int xe_force_wake_get(struct xe_force_wake *fw, domain->id, ret); } fw->awake_domains |= woken; - mutex_unlock(&fw->lock); + spin_unlock(&fw->lock); return ret2; } @@ -176,7 +176,7 @@ int xe_force_wake_put(struct xe_force_wake *fw, enum xe_force_wake_domains tmp, sleep = 0; int ret, ret2 = 0; - mutex_lock(&fw->lock); + spin_lock(&fw->lock); for_each_fw_domain_masked(domain, domains, fw, tmp) { if (!--domain->ref) { sleep |= BIT(domain->id); @@ -191,7 +191,7 @@ int xe_force_wake_put(struct xe_force_wake *fw, domain->id, ret); } fw->awake_domains &= ~sleep; - mutex_unlock(&fw->lock); + spin_unlock(&fw->lock); return ret2; } diff --git a/drivers/gpu/drm/xe/xe_force_wake_types.h b/drivers/gpu/drm/xe/xe_force_wake_types.h index cb782696855b..ed0edc2cdf9f 100644 --- a/drivers/gpu/drm/xe/xe_force_wake_types.h +++ b/drivers/gpu/drm/xe/xe_force_wake_types.h @@ -76,7 +76,7 @@ struct xe_force_wake { /** @gt: back pointers to GT */ struct xe_gt *gt; /** @lock: protects everything force wake struct */ - struct mutex lock; + spinlock_t lock; /** @awake_domains: mask of all domains awake */ enum xe_force_wake_domains awake_domains; /** @domains: force wake domains */ -- cgit v1.2.3 From 3856b0f71f52b8397887c1765e14d0245d722233 Mon Sep 17 00:00:00 2001 From: Aravind Iddamsetty Date: Wed, 30 Aug 2023 08:48:53 +0530 Subject: drm/xe/pmu: Enable PMU interface There are a set of engine group busyness counters provided by HW which are perfect fit to be exposed via PMU perf events. BSPEC: 46559, 46560, 46722, 46729, 52071, 71028 events can be listed using: perf list xe_0000_03_00.0/any-engine-group-busy-gt0/ [Kernel PMU event] xe_0000_03_00.0/copy-group-busy-gt0/ [Kernel PMU event] xe_0000_03_00.0/interrupts/ [Kernel PMU event] xe_0000_03_00.0/media-group-busy-gt0/ [Kernel PMU event] xe_0000_03_00.0/render-group-busy-gt0/ [Kernel PMU event] and can be read using: perf stat -e "xe_0000_8c_00.0/render-group-busy-gt0/" -I 1000 time counts unit events 1.001139062 0 ns xe_0000_8c_00.0/render-group-busy-gt0/ 2.003294678 0 ns xe_0000_8c_00.0/render-group-busy-gt0/ 3.005199582 0 ns xe_0000_8c_00.0/render-group-busy-gt0/ 4.007076497 0 ns xe_0000_8c_00.0/render-group-busy-gt0/ 5.008553068 0 ns xe_0000_8c_00.0/render-group-busy-gt0/ 6.010531563 43520 ns xe_0000_8c_00.0/render-group-busy-gt0/ 7.012468029 44800 ns xe_0000_8c_00.0/render-group-busy-gt0/ 8.013463515 0 ns xe_0000_8c_00.0/render-group-busy-gt0/ 9.015300183 0 ns xe_0000_8c_00.0/render-group-busy-gt0/ 10.017233010 0 ns xe_0000_8c_00.0/render-group-busy-gt0/ 10.971934120 0 ns xe_0000_8c_00.0/render-group-busy-gt0/ The pmu base implementation is taken from i915. v2: Store last known value when device is awake return that while the GT is suspended and then update the driver copy when read during awake. v3: 1. drop init_samples, as storing counters before going to suspend should be sufficient. 2. ported the "drm/i915/pmu: Make PMU sample array two-dimensional" and dropped helpers to store and read samples. 3. use xe_device_mem_access_get_if_ongoing to check if device is active before reading the OA registers. 4. dropped format attr as no longer needed 5. introduce xe_pmu_suspend to call engine_group_busyness_store 6. few other nits. v4: minor nits. v5: take forcewake when accessing the OAG registers v6: 1. drop engine_busyness_sample_type 2. update UAPI documentation v7: 1. update UAPI documentation 2. drop MEDIA_GT specific change for media busyness counter. Co-developed-by: Tvrtko Ursulin Co-developed-by: Bommu Krishnaiah Signed-off-by: Aravind Iddamsetty Reviewed-by: Ashutosh Dixit Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Makefile | 2 + drivers/gpu/drm/xe/regs/xe_gt_regs.h | 5 + drivers/gpu/drm/xe/xe_device.c | 2 + drivers/gpu/drm/xe/xe_device_types.h | 4 + drivers/gpu/drm/xe/xe_gt.c | 2 + drivers/gpu/drm/xe/xe_irq.c | 18 + drivers/gpu/drm/xe/xe_module.c | 5 + drivers/gpu/drm/xe/xe_pmu.c | 654 +++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_pmu.h | 25 ++ drivers/gpu/drm/xe/xe_pmu_types.h | 76 ++++ include/uapi/drm/xe_drm.h | 40 +++ 11 files changed, 833 insertions(+) create mode 100644 drivers/gpu/drm/xe/xe_pmu.c create mode 100644 drivers/gpu/drm/xe/xe_pmu.h create mode 100644 drivers/gpu/drm/xe/xe_pmu_types.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index be93745e8a30..d3b97bc11af7 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -124,6 +124,8 @@ xe-y += xe_bb.o \ obj-$(CONFIG_DRM_XE) += xe.o obj-$(CONFIG_DRM_XE_KUNIT_TEST) += tests/ +xe-$(CONFIG_PERF_EVENTS) += xe_pmu.o + # header test hdrtest_find_args := -not -path xe_rtp_helpers.h diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 271ed0cdbe21..e13fbbdf6929 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -294,6 +294,11 @@ #define INVALIDATION_BROADCAST_MODE_DIS REG_BIT(12) #define GLOBAL_INVALIDATION_MODE REG_BIT(2) +#define XE_OAG_RC0_ANY_ENGINE_BUSY_FREE XE_REG(0xdb80) +#define XE_OAG_ANY_MEDIA_FF_BUSY_FREE XE_REG(0xdba0) +#define XE_OAG_BLT_BUSY_FREE XE_REG(0xdbbc) +#define XE_OAG_RENDER_BUSY_FREE XE_REG(0xdbdc) + #define SAMPLER_MODE XE_REG_MCR(0xe18c, XE_REG_OPTION_MASKED) #define ENABLE_SMALLPL REG_BIT(15) #define SC_DISABLE_POWER_OPTIMIZATION_EBB REG_BIT(9) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 986a02a66166..89bf926bc0f3 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -304,6 +304,8 @@ int xe_device_probe(struct xe_device *xe) xe_debugfs_register(xe); + xe_pmu_register(&xe->pmu); + err = drmm_add_action_or_reset(&xe->drm, xe_device_sanitize, xe); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 552e8a343d8f..496d7f3fb897 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -15,6 +15,7 @@ #include "xe_devcoredump_types.h" #include "xe_gt_types.h" #include "xe_platform_types.h" +#include "xe_pmu.h" #include "xe_step_types.h" struct xe_ggtt; @@ -342,6 +343,9 @@ struct xe_device { */ struct task_struct *pm_callback_task; + /** @pmu: performance monitoring unit */ + struct xe_pmu pmu; + /* For pcode */ struct mutex sb_lock; diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 5d86bb2bb94d..06147f26384f 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -652,6 +652,8 @@ int xe_gt_suspend(struct xe_gt *gt) if (err) goto err_msg; + xe_pmu_suspend(gt); + err = xe_uc_suspend(>->uc); if (err) goto err_force_wake; diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index ef434142bcd9..772b8006d98f 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -26,6 +26,20 @@ #define IIR(offset) XE_REG(offset + 0x8) #define IER(offset) XE_REG(offset + 0xc) +/* + * Interrupt statistic for PMU. Increments the counter only if the + * interrupt originated from the GPU so interrupts from a device which + * shares the interrupt line are not accounted. + */ +static __always_inline void xe_pmu_irq_stats(struct xe_device *xe) +{ + /* + * A clever compiler translates that into INC. A not so clever one + * should at least prevent store tearing. + */ + WRITE_ONCE(xe->pmu.irq_count, xe->pmu.irq_count + 1); +} + static void assert_iir_is_zero(struct xe_gt *mmio, struct xe_reg reg) { u32 val = xe_mmio_read32(mmio, reg); @@ -332,6 +346,8 @@ static irqreturn_t xelp_irq_handler(int irq, void *arg) xelp_intr_enable(xe, false); + xe_pmu_irq_stats(xe); + return IRQ_HANDLED; } @@ -425,6 +441,8 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg) dg1_intr_enable(xe, false); + xe_pmu_irq_stats(xe); + return IRQ_HANDLED; } diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index ed3772a69762..d76fabe056d0 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -12,6 +12,7 @@ #include "xe_hw_fence.h" #include "xe_module.h" #include "xe_pci.h" +#include "xe_pmu.h" #include "xe_sched_job.h" bool force_execlist = false; @@ -45,6 +46,10 @@ static const struct init_funcs init_funcs[] = { .init = xe_sched_job_module_init, .exit = xe_sched_job_module_exit, }, + { + .init = xe_pmu_init, + .exit = xe_pmu_exit, + }, { .init = xe_register_pci_driver, .exit = xe_unregister_pci_driver, diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c new file mode 100644 index 000000000000..abfc0b3aeac4 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pmu.c @@ -0,0 +1,654 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include +#include +#include + +#include "regs/xe_gt_regs.h" +#include "xe_device.h" +#include "xe_gt_clock.h" +#include "xe_mmio.h" + +static cpumask_t xe_pmu_cpumask; +static unsigned int xe_pmu_target_cpu = -1; + +static unsigned int config_gt_id(const u64 config) +{ + return config >> __XE_PMU_GT_SHIFT; +} + +static u64 config_counter(const u64 config) +{ + return config & ~(~0ULL << __XE_PMU_GT_SHIFT); +} + +static void xe_pmu_event_destroy(struct perf_event *event) +{ + struct xe_device *xe = + container_of(event->pmu, typeof(*xe), pmu.base); + + drm_WARN_ON(&xe->drm, event->parent); + + drm_dev_put(&xe->drm); +} + +static u64 __engine_group_busyness_read(struct xe_gt *gt, int sample_type) +{ + u64 val; + + switch (sample_type) { + case __XE_SAMPLE_RENDER_GROUP_BUSY: + val = xe_mmio_read32(gt, XE_OAG_RENDER_BUSY_FREE); + break; + case __XE_SAMPLE_COPY_GROUP_BUSY: + val = xe_mmio_read32(gt, XE_OAG_BLT_BUSY_FREE); + break; + case __XE_SAMPLE_MEDIA_GROUP_BUSY: + val = xe_mmio_read32(gt, XE_OAG_ANY_MEDIA_FF_BUSY_FREE); + break; + case __XE_SAMPLE_ANY_ENGINE_GROUP_BUSY: + val = xe_mmio_read32(gt, XE_OAG_RC0_ANY_ENGINE_BUSY_FREE); + break; + default: + drm_warn(>->tile->xe->drm, "unknown pmu event\n"); + } + + return xe_gt_clock_cycles_to_ns(gt, val * 16); +} + +static u64 engine_group_busyness_read(struct xe_gt *gt, u64 config) +{ + int sample_type = config_counter(config) - 1; + const unsigned int gt_id = gt->info.id; + struct xe_device *xe = gt->tile->xe; + struct xe_pmu *pmu = &xe->pmu; + unsigned long flags; + bool device_awake; + u64 val; + + device_awake = xe_device_mem_access_get_if_ongoing(xe); + if (device_awake) { + XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FW_GT)); + val = __engine_group_busyness_read(gt, sample_type); + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FW_GT)); + xe_device_mem_access_put(xe); + } + + spin_lock_irqsave(&pmu->lock, flags); + + if (device_awake) + pmu->sample[gt_id][sample_type] = val; + else + val = pmu->sample[gt_id][sample_type]; + + spin_unlock_irqrestore(&pmu->lock, flags); + + return val; +} + +static void engine_group_busyness_store(struct xe_gt *gt) +{ + struct xe_pmu *pmu = >->tile->xe->pmu; + unsigned int gt_id = gt->info.id; + unsigned long flags; + int i; + + spin_lock_irqsave(&pmu->lock, flags); + + for (i = __XE_SAMPLE_RENDER_GROUP_BUSY; i <= __XE_SAMPLE_ANY_ENGINE_GROUP_BUSY; i++) + pmu->sample[gt_id][i] = __engine_group_busyness_read(gt, i); + + spin_unlock_irqrestore(&pmu->lock, flags); +} + +static int +config_status(struct xe_device *xe, u64 config) +{ + unsigned int gt_id = config_gt_id(config); + struct xe_gt *gt = xe_device_get_gt(xe, gt_id); + + if (gt_id >= XE_PMU_MAX_GT) + return -ENOENT; + + switch (config_counter(config)) { + case XE_PMU_INTERRUPTS(0): + if (gt_id) + return -ENOENT; + break; + case XE_PMU_RENDER_GROUP_BUSY(0): + case XE_PMU_COPY_GROUP_BUSY(0): + case XE_PMU_ANY_ENGINE_GROUP_BUSY(0): + if (gt->info.type == XE_GT_TYPE_MEDIA) + return -ENOENT; + break; + case XE_PMU_MEDIA_GROUP_BUSY(0): + if (!(gt->info.engine_mask & (BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VECS0)))) + return -ENOENT; + break; + default: + return -ENOENT; + } + + return 0; +} + +static int xe_pmu_event_init(struct perf_event *event) +{ + struct xe_device *xe = + container_of(event->pmu, typeof(*xe), pmu.base); + struct xe_pmu *pmu = &xe->pmu; + int ret; + + if (pmu->closed) + return -ENODEV; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + /* unsupported modes and filters */ + if (event->attr.sample_period) /* no sampling */ + return -EINVAL; + + if (has_branch_stack(event)) + return -EOPNOTSUPP; + + if (event->cpu < 0) + return -EINVAL; + + /* only allow running on one cpu at a time */ + if (!cpumask_test_cpu(event->cpu, &xe_pmu_cpumask)) + return -EINVAL; + + ret = config_status(xe, event->attr.config); + if (ret) + return ret; + + if (!event->parent) { + drm_dev_get(&xe->drm); + event->destroy = xe_pmu_event_destroy; + } + + return 0; +} + +static u64 __xe_pmu_event_read(struct perf_event *event) +{ + struct xe_device *xe = + container_of(event->pmu, typeof(*xe), pmu.base); + const unsigned int gt_id = config_gt_id(event->attr.config); + const u64 config = event->attr.config; + struct xe_gt *gt = xe_device_get_gt(xe, gt_id); + struct xe_pmu *pmu = &xe->pmu; + u64 val; + + switch (config_counter(config)) { + case XE_PMU_INTERRUPTS(0): + val = READ_ONCE(pmu->irq_count); + break; + case XE_PMU_RENDER_GROUP_BUSY(0): + case XE_PMU_COPY_GROUP_BUSY(0): + case XE_PMU_ANY_ENGINE_GROUP_BUSY(0): + case XE_PMU_MEDIA_GROUP_BUSY(0): + val = engine_group_busyness_read(gt, config); + break; + default: + drm_warn(>->tile->xe->drm, "unknown pmu event\n"); + } + + return val; +} + +static void xe_pmu_event_read(struct perf_event *event) +{ + struct xe_device *xe = + container_of(event->pmu, typeof(*xe), pmu.base); + struct hw_perf_event *hwc = &event->hw; + struct xe_pmu *pmu = &xe->pmu; + u64 prev, new; + + if (pmu->closed) { + event->hw.state = PERF_HES_STOPPED; + return; + } +again: + prev = local64_read(&hwc->prev_count); + new = __xe_pmu_event_read(event); + + if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev) + goto again; + + local64_add(new - prev, &event->count); +} + +static void xe_pmu_enable(struct perf_event *event) +{ + /* + * Store the current counter value so we can report the correct delta + * for all listeners. Even when the event was already enabled and has + * an existing non-zero value. + */ + local64_set(&event->hw.prev_count, __xe_pmu_event_read(event)); +} + +static void xe_pmu_event_start(struct perf_event *event, int flags) +{ + struct xe_device *xe = + container_of(event->pmu, typeof(*xe), pmu.base); + struct xe_pmu *pmu = &xe->pmu; + + if (pmu->closed) + return; + + xe_pmu_enable(event); + event->hw.state = 0; +} + +static void xe_pmu_event_stop(struct perf_event *event, int flags) +{ + if (flags & PERF_EF_UPDATE) + xe_pmu_event_read(event); + + event->hw.state = PERF_HES_STOPPED; +} + +static int xe_pmu_event_add(struct perf_event *event, int flags) +{ + struct xe_device *xe = + container_of(event->pmu, typeof(*xe), pmu.base); + struct xe_pmu *pmu = &xe->pmu; + + if (pmu->closed) + return -ENODEV; + + if (flags & PERF_EF_START) + xe_pmu_event_start(event, flags); + + return 0; +} + +static void xe_pmu_event_del(struct perf_event *event, int flags) +{ + xe_pmu_event_stop(event, PERF_EF_UPDATE); +} + +static int xe_pmu_event_event_idx(struct perf_event *event) +{ + return 0; +} + +struct xe_ext_attribute { + struct device_attribute attr; + unsigned long val; +}; + +static ssize_t xe_pmu_event_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct xe_ext_attribute *eattr; + + eattr = container_of(attr, struct xe_ext_attribute, attr); + return sprintf(buf, "config=0x%lx\n", eattr->val); +} + +static ssize_t cpumask_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return cpumap_print_to_pagebuf(true, buf, &xe_pmu_cpumask); +} + +static DEVICE_ATTR_RO(cpumask); + +static struct attribute *xe_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static const struct attribute_group xe_pmu_cpumask_attr_group = { + .attrs = xe_cpumask_attrs, +}; + +#define __event(__counter, __name, __unit) \ +{ \ + .counter = (__counter), \ + .name = (__name), \ + .unit = (__unit), \ + .global = false, \ +} + +#define __global_event(__counter, __name, __unit) \ +{ \ + .counter = (__counter), \ + .name = (__name), \ + .unit = (__unit), \ + .global = true, \ +} + +static struct xe_ext_attribute * +add_xe_attr(struct xe_ext_attribute *attr, const char *name, u64 config) +{ + sysfs_attr_init(&attr->attr.attr); + attr->attr.attr.name = name; + attr->attr.attr.mode = 0444; + attr->attr.show = xe_pmu_event_show; + attr->val = config; + + return ++attr; +} + +static struct perf_pmu_events_attr * +add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name, + const char *str) +{ + sysfs_attr_init(&attr->attr.attr); + attr->attr.attr.name = name; + attr->attr.attr.mode = 0444; + attr->attr.show = perf_event_sysfs_show; + attr->event_str = str; + + return ++attr; +} + +static struct attribute ** +create_event_attributes(struct xe_pmu *pmu) +{ + struct xe_device *xe = container_of(pmu, typeof(*xe), pmu); + static const struct { + unsigned int counter; + const char *name; + const char *unit; + bool global; + } events[] = { + __global_event(0, "interrupts", NULL), + __event(1, "render-group-busy", "ns"), + __event(2, "copy-group-busy", "ns"), + __event(3, "media-group-busy", "ns"), + __event(4, "any-engine-group-busy", "ns"), + }; + + struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter; + struct xe_ext_attribute *xe_attr = NULL, *xe_iter; + struct attribute **attr = NULL, **attr_iter; + unsigned int count = 0; + unsigned int i, j; + struct xe_gt *gt; + + /* Count how many counters we will be exposing. */ + for_each_gt(gt, xe, j) { + for (i = 0; i < ARRAY_SIZE(events); i++) { + u64 config = ___XE_PMU_OTHER(j, events[i].counter); + + if (!config_status(xe, config)) + count++; + } + } + + /* Allocate attribute objects and table. */ + xe_attr = kcalloc(count, sizeof(*xe_attr), GFP_KERNEL); + if (!xe_attr) + goto err_alloc; + + pmu_attr = kcalloc(count, sizeof(*pmu_attr), GFP_KERNEL); + if (!pmu_attr) + goto err_alloc; + + /* Max one pointer of each attribute type plus a termination entry. */ + attr = kcalloc(count * 2 + 1, sizeof(*attr), GFP_KERNEL); + if (!attr) + goto err_alloc; + + xe_iter = xe_attr; + pmu_iter = pmu_attr; + attr_iter = attr; + + for_each_gt(gt, xe, j) { + for (i = 0; i < ARRAY_SIZE(events); i++) { + u64 config = ___XE_PMU_OTHER(j, events[i].counter); + char *str; + + if (config_status(xe, config)) + continue; + + if (events[i].global) + str = kstrdup(events[i].name, GFP_KERNEL); + else + str = kasprintf(GFP_KERNEL, "%s-gt%u", + events[i].name, j); + if (!str) + goto err; + + *attr_iter++ = &xe_iter->attr.attr; + xe_iter = add_xe_attr(xe_iter, str, config); + + if (events[i].unit) { + if (events[i].global) + str = kasprintf(GFP_KERNEL, "%s.unit", + events[i].name); + else + str = kasprintf(GFP_KERNEL, "%s-gt%u.unit", + events[i].name, j); + if (!str) + goto err; + + *attr_iter++ = &pmu_iter->attr.attr; + pmu_iter = add_pmu_attr(pmu_iter, str, + events[i].unit); + } + } + } + + pmu->xe_attr = xe_attr; + pmu->pmu_attr = pmu_attr; + + return attr; + +err: + for (attr_iter = attr; *attr_iter; attr_iter++) + kfree((*attr_iter)->name); + +err_alloc: + kfree(attr); + kfree(xe_attr); + kfree(pmu_attr); + + return NULL; +} + +static void free_event_attributes(struct xe_pmu *pmu) +{ + struct attribute **attr_iter = pmu->events_attr_group.attrs; + + for (; *attr_iter; attr_iter++) + kfree((*attr_iter)->name); + + kfree(pmu->events_attr_group.attrs); + kfree(pmu->xe_attr); + kfree(pmu->pmu_attr); + + pmu->events_attr_group.attrs = NULL; + pmu->xe_attr = NULL; + pmu->pmu_attr = NULL; +} + +static int xe_pmu_cpu_online(unsigned int cpu, struct hlist_node *node) +{ + struct xe_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node); + + /* Select the first online CPU as a designated reader. */ + if (cpumask_empty(&xe_pmu_cpumask)) + cpumask_set_cpu(cpu, &xe_pmu_cpumask); + + return 0; +} + +static int xe_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node) +{ + struct xe_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node); + unsigned int target = xe_pmu_target_cpu; + + /* + * Unregistering an instance generates a CPU offline event which we must + * ignore to avoid incorrectly modifying the shared xe_pmu_cpumask. + */ + if (pmu->closed) + return 0; + + if (cpumask_test_and_clear_cpu(cpu, &xe_pmu_cpumask)) { + target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu); + + /* Migrate events if there is a valid target */ + if (target < nr_cpu_ids) { + cpumask_set_cpu(target, &xe_pmu_cpumask); + xe_pmu_target_cpu = target; + } + } + + if (target < nr_cpu_ids && target != pmu->cpuhp.cpu) { + perf_pmu_migrate_context(&pmu->base, cpu, target); + pmu->cpuhp.cpu = target; + } + + return 0; +} + +static enum cpuhp_state cpuhp_slot = CPUHP_INVALID; + +int xe_pmu_init(void) +{ + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, + "perf/x86/intel/xe:online", + xe_pmu_cpu_online, + xe_pmu_cpu_offline); + if (ret < 0) + pr_notice("Failed to setup cpuhp state for xe PMU! (%d)\n", + ret); + else + cpuhp_slot = ret; + + return 0; +} + +void xe_pmu_exit(void) +{ + if (cpuhp_slot != CPUHP_INVALID) + cpuhp_remove_multi_state(cpuhp_slot); +} + +static int xe_pmu_register_cpuhp_state(struct xe_pmu *pmu) +{ + if (cpuhp_slot == CPUHP_INVALID) + return -EINVAL; + + return cpuhp_state_add_instance(cpuhp_slot, &pmu->cpuhp.node); +} + +static void xe_pmu_unregister_cpuhp_state(struct xe_pmu *pmu) +{ + cpuhp_state_remove_instance(cpuhp_slot, &pmu->cpuhp.node); +} + +void xe_pmu_suspend(struct xe_gt *gt) +{ + engine_group_busyness_store(gt); +} + +static void xe_pmu_unregister(struct drm_device *device, void *arg) +{ + struct xe_pmu *pmu = arg; + + if (!pmu->base.event_init) + return; + + /* + * "Disconnect" the PMU callbacks - since all are atomic synchronize_rcu + * ensures all currently executing ones will have exited before we + * proceed with unregistration. + */ + pmu->closed = true; + synchronize_rcu(); + + xe_pmu_unregister_cpuhp_state(pmu); + + perf_pmu_unregister(&pmu->base); + pmu->base.event_init = NULL; + kfree(pmu->base.attr_groups); + kfree(pmu->name); + free_event_attributes(pmu); +} + +void xe_pmu_register(struct xe_pmu *pmu) +{ + struct xe_device *xe = container_of(pmu, typeof(*xe), pmu); + const struct attribute_group *attr_groups[] = { + &pmu->events_attr_group, + &xe_pmu_cpumask_attr_group, + NULL + }; + + int ret = -ENOMEM; + + spin_lock_init(&pmu->lock); + pmu->cpuhp.cpu = -1; + + pmu->name = kasprintf(GFP_KERNEL, + "xe_%s", + dev_name(xe->drm.dev)); + if (pmu->name) + /* tools/perf reserves colons as special. */ + strreplace((char *)pmu->name, ':', '_'); + + if (!pmu->name) + goto err; + + pmu->events_attr_group.name = "events"; + pmu->events_attr_group.attrs = create_event_attributes(pmu); + if (!pmu->events_attr_group.attrs) + goto err_name; + + pmu->base.attr_groups = kmemdup(attr_groups, sizeof(attr_groups), + GFP_KERNEL); + if (!pmu->base.attr_groups) + goto err_attr; + + pmu->base.module = THIS_MODULE; + pmu->base.task_ctx_nr = perf_invalid_context; + pmu->base.event_init = xe_pmu_event_init; + pmu->base.add = xe_pmu_event_add; + pmu->base.del = xe_pmu_event_del; + pmu->base.start = xe_pmu_event_start; + pmu->base.stop = xe_pmu_event_stop; + pmu->base.read = xe_pmu_event_read; + pmu->base.event_idx = xe_pmu_event_event_idx; + + ret = perf_pmu_register(&pmu->base, pmu->name, -1); + if (ret) + goto err_groups; + + ret = xe_pmu_register_cpuhp_state(pmu); + if (ret) + goto err_unreg; + + ret = drmm_add_action_or_reset(&xe->drm, xe_pmu_unregister, pmu); + if (ret) + goto err_cpuhp; + + return; + +err_cpuhp: + xe_pmu_unregister_cpuhp_state(pmu); +err_unreg: + perf_pmu_unregister(&pmu->base); +err_groups: + kfree(pmu->base.attr_groups); +err_attr: + pmu->base.event_init = NULL; + free_event_attributes(pmu); +err_name: + kfree(pmu->name); +err: + drm_notice(&xe->drm, "Failed to register PMU!\n"); +} diff --git a/drivers/gpu/drm/xe/xe_pmu.h b/drivers/gpu/drm/xe/xe_pmu.h new file mode 100644 index 000000000000..a99d4ddd023e --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pmu.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_PMU_H_ +#define _XE_PMU_H_ + +#include "xe_gt_types.h" +#include "xe_pmu_types.h" + +#if IS_ENABLED(CONFIG_PERF_EVENTS) +int xe_pmu_init(void); +void xe_pmu_exit(void); +void xe_pmu_register(struct xe_pmu *pmu); +void xe_pmu_suspend(struct xe_gt *gt); +#else +static inline int xe_pmu_init(void) { return 0; } +static inline void xe_pmu_exit(void) {} +static inline void xe_pmu_register(struct xe_pmu *pmu) {} +static inline void xe_pmu_suspend(struct xe_gt *gt) {} +#endif + +#endif + diff --git a/drivers/gpu/drm/xe/xe_pmu_types.h b/drivers/gpu/drm/xe/xe_pmu_types.h new file mode 100644 index 000000000000..4ccc7e9042f6 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pmu_types.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_PMU_TYPES_H_ +#define _XE_PMU_TYPES_H_ + +#include +#include +#include + +enum { + __XE_SAMPLE_RENDER_GROUP_BUSY, + __XE_SAMPLE_COPY_GROUP_BUSY, + __XE_SAMPLE_MEDIA_GROUP_BUSY, + __XE_SAMPLE_ANY_ENGINE_GROUP_BUSY, + __XE_NUM_PMU_SAMPLERS +}; + +#define XE_PMU_MAX_GT 2 + +struct xe_pmu { + /** + * @cpuhp: Struct used for CPU hotplug handling. + */ + struct { + struct hlist_node node; + unsigned int cpu; + } cpuhp; + /** + * @base: PMU base. + */ + struct pmu base; + /** + * @closed: xe is unregistering. + */ + bool closed; + /** + * @name: Name as registered with perf core. + */ + const char *name; + /** + * @lock: Lock protecting enable mask and ref count handling. + */ + spinlock_t lock; + /** + * @sample: Current and previous (raw) counters. + * + * These counters are updated when the device is awake. + * + */ + u64 sample[XE_PMU_MAX_GT][__XE_NUM_PMU_SAMPLERS]; + /** + * @irq_count: Number of interrupts + * + * Intentionally unsigned long to avoid atomics or heuristics on 32bit. + * 4e9 interrupts are a lot and postprocessing can really deal with an + * occasional wraparound easily. It's 32bit after all. + */ + unsigned long irq_count; + /** + * @events_attr_group: Device events attribute group. + */ + struct attribute_group events_attr_group; + /** + * @xe_attr: Memory block holding device attributes. + */ + void *xe_attr; + /** + * @pmu_attr: Memory block holding device attributes. + */ + void *pmu_attr; +}; + +#endif diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 00d5cb4ef85e..d48d8e3c898c 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1053,6 +1053,46 @@ struct drm_xe_vm_madvise { __u64 reserved[2]; }; +/** + * DOC: XE PMU event config IDs + * + * Check 'man perf_event_open' to use the ID's XE_PMU_XXXX listed in xe_drm.h + * in 'struct perf_event_attr' as part of perf_event_open syscall to read a + * particular event. + * + * For example to open the XE_PMU_INTERRUPTS(0): + * + * .. code-block:: C + * + * struct perf_event_attr attr; + * long long count; + * int cpu = 0; + * int fd; + * + * memset(&attr, 0, sizeof(struct perf_event_attr)); + * attr.type = type; // eg: /sys/bus/event_source/devices/xe_0000_56_00.0/type + * attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED; + * attr.use_clockid = 1; + * attr.clockid = CLOCK_MONOTONIC; + * attr.config = XE_PMU_INTERRUPTS(0); + * + * fd = syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0); + */ + +/* + * Top bits of every counter are GT id. + */ +#define __XE_PMU_GT_SHIFT (56) + +#define ___XE_PMU_OTHER(gt, x) \ + (((__u64)(x)) | ((__u64)(gt) << __XE_PMU_GT_SHIFT)) + +#define XE_PMU_INTERRUPTS(gt) ___XE_PMU_OTHER(gt, 0) +#define XE_PMU_RENDER_GROUP_BUSY(gt) ___XE_PMU_OTHER(gt, 1) +#define XE_PMU_COPY_GROUP_BUSY(gt) ___XE_PMU_OTHER(gt, 2) +#define XE_PMU_MEDIA_GROUP_BUSY(gt) ___XE_PMU_OTHER(gt, 3) +#define XE_PMU_ANY_ENGINE_GROUP_BUSY(gt) ___XE_PMU_OTHER(gt, 4) + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From c4991ee01d480c45c789b43eb001a978bf016f58 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Wed, 13 Sep 2023 16:28:35 -0700 Subject: drm/xe/uc: Rename guc_submission_enabled() to uc_enabled() The guc_submission_enabled() function is being used as a boolean toggle for all firmwares and all related features, not just GuC submission. We could add additional flags/functions to distinguish and allow different use-cases (e.g. loading HuC but not using GuC submission), but given that not using GuC is a debug-only scenario having a global switch for all FWs is enough. However, we want to make it clear that this switch turns off everything, so rename it to uc_enabled(). v2: rebase on s/XE_WARN_ON/xe_assert Signed-off-by: Daniele Ceraolo Spurio Cc: John Harrison Cc: Matthew Brost Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.h | 2 +- drivers/gpu/drm/xe/xe_exec_queue.c | 2 +- drivers/gpu/drm/xe/xe_execlist.c | 6 +++--- drivers/gpu/drm/xe/xe_ggtt.c | 2 +- drivers/gpu/drm/xe/xe_gt.c | 4 ++-- drivers/gpu/drm/xe/xe_guc_pc.c | 2 +- drivers/gpu/drm/xe/xe_guc_submit.c | 4 ++-- drivers/gpu/drm/xe/xe_hw_engine.c | 4 ++-- drivers/gpu/drm/xe/xe_irq.c | 2 +- drivers/gpu/drm/xe/xe_uc.c | 16 ++++++++-------- 10 files changed, 22 insertions(+), 22 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index 71582094834c..c4232de40ae0 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -114,7 +114,7 @@ static inline struct xe_gt *xe_root_mmio_gt(struct xe_device *xe) return xe_device_get_root_tile(xe)->primary_gt; } -static inline bool xe_device_guc_submission_enabled(struct xe_device *xe) +static inline bool xe_device_uc_enabled(struct xe_device *xe) { return !xe->info.force_execlist; } diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index a0b5647923ac..23789122b5b1 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -585,7 +585,7 @@ static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt, u16 gt_id; u32 return_mask = 0, prev_mask; - if (XE_IOCTL_DBG(xe, !xe_device_guc_submission_enabled(xe) && + if (XE_IOCTL_DBG(xe, !xe_device_uc_enabled(xe) && len > 1)) return 0; diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index 5b26b6e35afc..22dfe91b2b83 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -322,7 +322,7 @@ static int execlist_exec_queue_init(struct xe_exec_queue *q) struct xe_device *xe = gt_to_xe(q->gt); int err; - xe_assert(xe, !xe_device_guc_submission_enabled(xe)); + xe_assert(xe, !xe_device_uc_enabled(xe)); drm_info(&xe->drm, "Enabling execlist submission (GuC submission disabled)\n"); @@ -371,7 +371,7 @@ static void execlist_exec_queue_fini_async(struct work_struct *w) struct xe_device *xe = gt_to_xe(q->gt); unsigned long flags; - xe_assert(xe, !xe_device_guc_submission_enabled(xe)); + xe_assert(xe, !xe_device_uc_enabled(xe)); spin_lock_irqsave(&exl->port->lock, flags); if (WARN_ON(exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET)) @@ -458,7 +458,7 @@ static const struct xe_exec_queue_ops execlist_exec_queue_ops = { int xe_execlist_init(struct xe_gt *gt) { /* GuC submission enabled, nothing to do */ - if (xe_device_guc_submission_enabled(gt_to_xe(gt))) + if (xe_device_uc_enabled(gt_to_xe(gt))) return 0; gt->exec_queue_ops = &execlist_exec_queue_ops; diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 03097f1b7f71..ba34b8784572 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -233,7 +233,7 @@ static void ggtt_invalidate_gt_tlb(struct xe_gt *gt) xe_gt_assert(gt, seqno > 0); if (seqno > 0) xe_gt_tlb_invalidation_wait(gt, seqno); - } else if (xe_device_guc_submission_enabled(gt_to_xe(gt))) { + } else if (xe_device_uc_enabled(gt_to_xe(gt))) { struct xe_device *xe = gt_to_xe(gt); if (xe->info.platform == XE_PVC) { diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 06147f26384f..1aa44d4f9ac1 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -549,7 +549,7 @@ static int gt_reset(struct xe_gt *gt) int err; /* We only support GT resets with GuC submission */ - if (!xe_device_guc_submission_enabled(gt_to_xe(gt))) + if (!xe_device_uc_enabled(gt_to_xe(gt))) return -ENODEV; xe_gt_info(gt, "reset started\n"); @@ -642,7 +642,7 @@ int xe_gt_suspend(struct xe_gt *gt) int err; /* For now suspend/resume is only allowed with GuC */ - if (!xe_device_guc_submission_enabled(gt_to_xe(gt))) + if (!xe_device_uc_enabled(gt_to_xe(gt))) return -ENODEV; xe_gt_sanitize(gt); diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 99d855680894..8a4d299d6cb0 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -816,7 +816,7 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data)); int ret; - xe_gt_assert(gt, xe_device_guc_submission_enabled(xe)); + xe_gt_assert(gt, xe_device_uc_enabled(xe)); xe_device_mem_access_get(pc_to_xe(pc)); diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index f6b630f53928..fd120d8c0af2 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1138,7 +1138,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) long timeout; int err; - xe_assert(xe, xe_device_guc_submission_enabled(guc_to_xe(guc))); + xe_assert(xe, xe_device_uc_enabled(guc_to_xe(guc))); ge = kzalloc(sizeof(*ge), GFP_KERNEL); if (!ge) @@ -1903,7 +1903,7 @@ void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p) struct xe_exec_queue *q; unsigned long index; - if (!xe_device_guc_submission_enabled(guc_to_xe(guc))) + if (!xe_device_uc_enabled(guc_to_xe(guc))) return; mutex_lock(&guc->submission_state.lock); diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 9c2e212fa4cf..a8681089fb60 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -434,7 +434,7 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe, if (err) goto err_hwsp; - if (!xe_device_guc_submission_enabled(xe)) { + if (!xe_device_uc_enabled(xe)) { hwe->exl_port = xe_execlist_port_create(xe, hwe); if (IS_ERR(hwe->exl_port)) { err = PTR_ERR(hwe->exl_port); @@ -442,7 +442,7 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe, } } - if (xe_device_guc_submission_enabled(xe)) + if (xe_device_uc_enabled(xe)) xe_hw_engine_enable_ring(hwe); /* We reserve the highest BCS instance for USM */ diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 772b8006d98f..ff71a3aa08ce 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -142,7 +142,7 @@ void xe_irq_enable_hwe(struct xe_gt *gt) u32 ccs_mask, bcs_mask; u32 irqs, dmask, smask; - if (xe_device_guc_submission_enabled(xe)) { + if (xe_device_uc_enabled(xe)) { irqs = GT_RENDER_USER_INTERRUPT | GT_RENDER_PIPECTL_NOTIFY_INTERRUPT; } else { diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index a8ecb5c6e01a..5b7d6177c1c2 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -32,7 +32,7 @@ int xe_uc_init(struct xe_uc *uc) int ret; /* GuC submission not enabled, nothing to do */ - if (!xe_device_guc_submission_enabled(uc_to_xe(uc))) + if (!xe_device_uc_enabled(uc_to_xe(uc))) return 0; ret = xe_guc_init(&uc->guc); @@ -66,7 +66,7 @@ err: int xe_uc_init_post_hwconfig(struct xe_uc *uc) { /* GuC submission not enabled, nothing to do */ - if (!xe_device_guc_submission_enabled(uc_to_xe(uc))) + if (!xe_device_uc_enabled(uc_to_xe(uc))) return 0; return xe_guc_init_post_hwconfig(&uc->guc); @@ -110,7 +110,7 @@ int xe_uc_init_hwconfig(struct xe_uc *uc) int ret; /* GuC submission not enabled, nothing to do */ - if (!xe_device_guc_submission_enabled(uc_to_xe(uc))) + if (!xe_device_uc_enabled(uc_to_xe(uc))) return 0; ret = xe_guc_min_load_for_hwconfig(&uc->guc); @@ -129,7 +129,7 @@ int xe_uc_init_hw(struct xe_uc *uc) int ret; /* GuC submission not enabled, nothing to do */ - if (!xe_device_guc_submission_enabled(uc_to_xe(uc))) + if (!xe_device_uc_enabled(uc_to_xe(uc))) return 0; ret = xe_uc_sanitize_reset(uc); @@ -175,7 +175,7 @@ int xe_uc_fini_hw(struct xe_uc *uc) int xe_uc_reset_prepare(struct xe_uc *uc) { /* GuC submission not enabled, nothing to do */ - if (!xe_device_guc_submission_enabled(uc_to_xe(uc))) + if (!xe_device_uc_enabled(uc_to_xe(uc))) return 0; return xe_guc_reset_prepare(&uc->guc); @@ -194,7 +194,7 @@ void xe_uc_stop_prepare(struct xe_uc *uc) int xe_uc_stop(struct xe_uc *uc) { /* GuC submission not enabled, nothing to do */ - if (!xe_device_guc_submission_enabled(uc_to_xe(uc))) + if (!xe_device_uc_enabled(uc_to_xe(uc))) return 0; return xe_guc_stop(&uc->guc); @@ -203,7 +203,7 @@ int xe_uc_stop(struct xe_uc *uc) int xe_uc_start(struct xe_uc *uc) { /* GuC submission not enabled, nothing to do */ - if (!xe_device_guc_submission_enabled(uc_to_xe(uc))) + if (!xe_device_uc_enabled(uc_to_xe(uc))) return 0; return xe_guc_start(&uc->guc); @@ -226,7 +226,7 @@ int xe_uc_suspend(struct xe_uc *uc) int ret; /* GuC submission not enabled, nothing to do */ - if (!xe_device_guc_submission_enabled(uc_to_xe(uc))) + if (!xe_device_uc_enabled(uc_to_xe(uc))) return 0; uc_reset_wait(uc); -- cgit v1.2.3 From 757308471dbe9aba28cdaf40848936923216a1f2 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Wed, 13 Sep 2023 16:28:36 -0700 Subject: drm/xe/uc: Fix uC status tracking The current uC status tracking has a few issues: 1) the HuC is moved to "disabled" instead of "not supported" 2) the status is left uninitialized instead of "disabled" when the modparam is used to disable support 3) due to #1, a number of checks are done against "disabled" instead of the appropriate status. Address all of those by making sure to follow the appropriate state transition and checking against the required state. v2: rebase on s/guc_submission_enabled/uc_enabled/ Signed-off-by: Daniele Ceraolo Spurio Cc: John Harrison Cc: Matthew Brost Reviewed-by: John Harrison Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc.c | 3 +++ drivers/gpu/drm/xe/xe_huc.c | 15 +++++++-------- drivers/gpu/drm/xe/xe_uc.c | 10 +++++++--- drivers/gpu/drm/xe/xe_uc_fw.c | 14 ++++++++------ drivers/gpu/drm/xe/xe_wopcm.c | 3 +-- 5 files changed, 26 insertions(+), 19 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 134019fdda7e..84f0b5488783 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -242,6 +242,9 @@ int xe_guc_init(struct xe_guc *guc) if (ret) goto out; + if (!xe_uc_fw_is_enabled(&guc->fw)) + return 0; + ret = xe_guc_log_init(&guc->log); if (ret) goto out; diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c index c856da1e9422..293403d16f25 100644 --- a/drivers/gpu/drm/xe/xe_huc.c +++ b/drivers/gpu/drm/xe/xe_huc.c @@ -43,22 +43,21 @@ int xe_huc_init(struct xe_huc *huc) if (ret) goto out; + if (!xe_uc_fw_is_enabled(&huc->fw)) + return 0; + xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_LOADABLE); return 0; out: - if (xe_uc_fw_is_disabled(&huc->fw)) { - drm_info(&xe->drm, "HuC disabled\n"); - return 0; - } drm_err(&xe->drm, "HuC init failed with %d", ret); return ret; } int xe_huc_upload(struct xe_huc *huc) { - if (xe_uc_fw_is_disabled(&huc->fw)) + if (!xe_uc_fw_is_loadable(&huc->fw)) return 0; return xe_uc_fw_upload(&huc->fw, 0, HUC_UKERNEL); } @@ -70,7 +69,7 @@ int xe_huc_auth(struct xe_huc *huc) struct xe_guc *guc = huc_to_guc(huc); int ret; - if (xe_uc_fw_is_disabled(&huc->fw)) + if (!xe_uc_fw_is_loadable(&huc->fw)) return 0; xe_assert(xe, !xe_uc_fw_is_running(&huc->fw)); @@ -107,7 +106,7 @@ fail: void xe_huc_sanitize(struct xe_huc *huc) { - if (xe_uc_fw_is_disabled(&huc->fw)) + if (!xe_uc_fw_is_loadable(&huc->fw)) return; xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_LOADABLE); } @@ -119,7 +118,7 @@ void xe_huc_print_info(struct xe_huc *huc, struct drm_printer *p) xe_uc_fw_print(&huc->fw, p); - if (xe_uc_fw_is_disabled(&huc->fw)) + if (!xe_uc_fw_is_enabled(&huc->fw)) return; err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index 5b7d6177c1c2..784f53c5f282 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -31,9 +31,10 @@ int xe_uc_init(struct xe_uc *uc) { int ret; - /* GuC submission not enabled, nothing to do */ - if (!xe_device_uc_enabled(uc_to_xe(uc))) - return 0; + /* + * We call the GuC/HuC init functions even if GuC submission is off to + * correctly move our tracking of the FW state to "disabled". + */ ret = xe_guc_init(&uc->guc); if (ret) @@ -43,6 +44,9 @@ int xe_uc_init(struct xe_uc *uc) if (ret) goto err; + if (!xe_device_uc_enabled(uc_to_xe(uc))) + return 0; + ret = xe_wopcm_init(&uc->wopcm); if (ret) goto err; diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index efc70836453d..a890ece72f34 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -340,17 +340,19 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw) xe_assert(xe, !uc_fw->path); uc_fw_auto_select(xe, uc_fw); - xe_uc_fw_change_status(uc_fw, uc_fw->path ? *uc_fw->path ? + xe_uc_fw_change_status(uc_fw, uc_fw->path ? XE_UC_FIRMWARE_SELECTED : - XE_UC_FIRMWARE_DISABLED : XE_UC_FIRMWARE_NOT_SUPPORTED); - /* Transform no huc in the list into firmware disabled */ - if (uc_fw->type == XE_UC_FW_TYPE_HUC && !xe_uc_fw_is_supported(uc_fw)) { + if (!xe_uc_fw_is_supported(uc_fw)) + return 0; + + if (!xe_device_uc_enabled(xe)) { xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_DISABLED); - err = -ENOPKG; - return err; + drm_dbg(&xe->drm, "%s disabled", xe_uc_fw_type_repr(uc_fw->type)); + return 0; } + err = request_firmware(&fw, uc_fw->path, dev); if (err) goto fail; diff --git a/drivers/gpu/drm/xe/xe_wopcm.c b/drivers/gpu/drm/xe/xe_wopcm.c index 9a85bcc18830..bf85d4fa56cc 100644 --- a/drivers/gpu/drm/xe/xe_wopcm.c +++ b/drivers/gpu/drm/xe/xe_wopcm.c @@ -139,8 +139,7 @@ static int __wopcm_init_regs(struct xe_device *xe, struct xe_gt *gt, { u32 base = wopcm->guc.base; u32 size = wopcm->guc.size; - u32 huc_agent = xe_uc_fw_is_disabled(>->uc.huc.fw) ? 0 : - HUC_LOADING_AGENT_GUC; + u32 huc_agent = xe_uc_fw_is_available(>->uc.huc.fw) ? HUC_LOADING_AGENT_GUC : 0; u32 mask; int err; -- cgit v1.2.3 From a455ed04669f03bbb1f22267f1237983e026739f Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Wed, 13 Sep 2023 16:28:37 -0700 Subject: drm/xe/uc: Add GuC/HuC firmware path overrides When testing a new binary and/or debugging binary-related issues, it is useful to have the option to change which binary is loaded without having to update and re-compile the kernel. To support this option, this patch adds 2 new modparams to override the FW path for GuC and HuC. The HuC modparam can also be set to an empty string to disable HuC loading. Note that those modparams only take effect on platforms where we already have a default FW, so we're sure there is support for FW loading and the kernel isn't going to explode in an undefined path. v2: simplify comment (John), rebase on s/guc_submission_enabled/uc_enabled Signed-off-by: Daniele Ceraolo Spurio Cc: John Harrison Cc: Matthew Brost Reviewed-by: John Harrison Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_module.c | 10 ++++++++++ drivers/gpu/drm/xe/xe_module.h | 2 ++ drivers/gpu/drm/xe/xe_uc_fw.c | 30 +++++++++++++++++++++++++++++- 3 files changed, 41 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index d76fabe056d0..82817a46f887 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -27,6 +27,16 @@ int xe_guc_log_level = 5; module_param_named(guc_log_level, xe_guc_log_level, int, 0600); MODULE_PARM_DESC(guc_log_level, "GuC firmware logging level (0=disable, 1..5=enable with verbosity min..max)"); +char *xe_guc_firmware_path; +module_param_named_unsafe(guc_firmware_path, xe_guc_firmware_path, charp, 0400); +MODULE_PARM_DESC(guc_firmware_path, + "GuC firmware path to use instead of the default one"); + +char *xe_huc_firmware_path; +module_param_named_unsafe(huc_firmware_path, xe_huc_firmware_path, charp, 0400); +MODULE_PARM_DESC(huc_firmware_path, + "HuC firmware path to use instead of the default one - empty string disables"); + char *xe_param_force_probe = CONFIG_DRM_XE_FORCE_PROBE; module_param_named_unsafe(force_probe, xe_param_force_probe, charp, 0400); MODULE_PARM_DESC(force_probe, diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h index 2c1f9199f909..e1da1e9ca5cb 100644 --- a/drivers/gpu/drm/xe/xe_module.h +++ b/drivers/gpu/drm/xe/xe_module.h @@ -10,4 +10,6 @@ extern bool force_execlist; extern bool enable_display; extern u32 xe_force_vram_bar_size; extern int xe_guc_log_level; +extern char *xe_guc_firmware_path; +extern char *xe_huc_firmware_path; extern char *xe_param_force_probe; diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index a890ece72f34..2ba0466bc45a 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -15,6 +15,7 @@ #include "xe_gt.h" #include "xe_map.h" #include "xe_mmio.h" +#include "xe_module.h" #include "xe_uc_fw.h" /* @@ -210,6 +211,30 @@ uc_fw_auto_select(struct xe_device *xe, struct xe_uc_fw *uc_fw) } } +static void +uc_fw_override(struct xe_uc_fw *uc_fw) +{ + char *path_override = NULL; + + /* empty string disables, but it's not allowed for GuC */ + switch (uc_fw->type) { + case XE_UC_FW_TYPE_GUC: + if (xe_guc_firmware_path && *xe_guc_firmware_path) + path_override = xe_guc_firmware_path; + break; + case XE_UC_FW_TYPE_HUC: + path_override = xe_huc_firmware_path; + break; + default: + break; + } + + if (path_override) { + uc_fw->path = path_override; + uc_fw->user_overridden = true; + } +} + /** * xe_uc_fw_copy_rsa - copy fw RSA to buffer * @@ -347,7 +372,10 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw) if (!xe_uc_fw_is_supported(uc_fw)) return 0; - if (!xe_device_uc_enabled(xe)) { + uc_fw_override(uc_fw); + + /* an empty path means the firmware is disabled */ + if (!xe_device_uc_enabled(xe) || !(*uc_fw->path)) { xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_DISABLED); drm_dbg(&xe->drm, "%s disabled", xe_uc_fw_type_repr(uc_fw->type)); return 0; -- cgit v1.2.3 From fc678ec7c2e037fcc1bb678403036a9772e61dbd Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Fri, 15 Sep 2023 19:26:06 +0200 Subject: drm/xe: Reinstate pipelined fence enable_signaling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the GPUVA conversion, the xe_bo::vmas member became replaced with drm_gem_object::gpuva.list, however there was a couple of usage instances left using the old member. Most notably the pipelined fence enable_signaling. Remove the xe_bo::vmas member completely, fix usage instances and also enable this pipelined fence enable_signaling even for faulting VM:s since we actually wait for bind fences to complete. v2: - Rebase. v3: - Fix display code build error. Cc: Matthew Brost Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20230915172606.14436-1-thomas.hellstrom@linux.intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 5 ++--- drivers/gpu/drm/xe/xe_bo_types.h | 2 -- drivers/gpu/drm/xe/xe_pt.c | 2 +- 3 files changed, 3 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 998efceb84a4..e812f2b7d5b9 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -456,7 +456,7 @@ static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo, dma_resv_assert_held(bo->ttm.base.resv); - if (!xe_device_in_fault_mode(xe) && !list_empty(&bo->vmas)) { + if (!list_empty(&bo->ttm.base.gpuva.list)) { dma_resv_iter_begin(&cursor, bo->ttm.base.resv, DMA_RESV_USAGE_BOOKKEEP); dma_resv_for_each_fence_unlocked(&cursor, fence) @@ -1049,7 +1049,7 @@ static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo) drm_prime_gem_destroy(&bo->ttm.base, NULL); drm_gem_object_release(&bo->ttm.base); - xe_assert(xe, list_empty(&bo->vmas)); + xe_assert(xe, list_empty(&ttm_bo->base.gpuva.list)); if (bo->ggtt_node.size) xe_ggtt_remove_bo(bo->tile->mem.ggtt, bo); @@ -1232,7 +1232,6 @@ struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, bo->props.preferred_gt = XE_BO_PROPS_INVALID; bo->props.preferred_mem_type = XE_BO_PROPS_INVALID; bo->ttm.priority = DRM_XE_VMA_PRIORITY_NORMAL; - INIT_LIST_HEAD(&bo->vmas); INIT_LIST_HEAD(&bo->pinned_link); drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size); diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index f6ee920303af..27fe72129ee6 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -31,8 +31,6 @@ struct xe_bo { struct xe_vm *vm; /** @tile: Tile this BO is attached to (kernel BO only) */ struct xe_tile *tile; - /** @vmas: List of VMAs for this BO */ - struct list_head vmas; /** @placements: valid placements for this BO */ struct ttm_place placements[XE_BO_MAX_PLACEMENTS]; /** @placement: current placement for this BO */ diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 680fbe6f38a6..21a3dfe99e05 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -265,7 +265,7 @@ void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred) if (!pt) return; - XE_WARN_ON(!list_empty(&pt->bo->vmas)); + XE_WARN_ON(!list_empty(&pt->bo->ttm.base.gpuva.list)); xe_bo_unpin(pt->bo); xe_bo_put_deferred(pt->bo, deferred); -- cgit v1.2.3 From d435a039646eee712f4d5da2405181015c30bb1a Mon Sep 17 00:00:00 2001 From: Gustavo Sousa Date: Fri, 15 Sep 2023 19:02:33 -0300 Subject: drm/xe: Simplify final return from xe_irq_install() At the end of the function, we will always return err no matter it's value. Simplify this by just returning the result of drmm_add_action_or_reset(). Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230915220233.59736-1-gustavo.sousa@intel.com Signed-off-by: Gustavo Sousa Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_irq.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index ff71a3aa08ce..dec3d518b3fc 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -592,11 +592,7 @@ int xe_irq_install(struct xe_device *xe) xe_irq_postinstall(xe); - err = drmm_add_action_or_reset(&xe->drm, irq_uninstall, xe); - if (err) - return err; - - return err; + return drmm_add_action_or_reset(&xe->drm, irq_uninstall, xe); } void xe_irq_shutdown(struct xe_device *xe) -- cgit v1.2.3 From 7764222d54b71a9577cff9296420bf0a780b0c5d Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Wed, 20 Sep 2023 11:50:01 +0200 Subject: drm/xe: Disallow pinning dma-bufs in VRAM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For now only support pinning in TT memory, for two reasons: 1) Avoid pinning in a placement not accessible to some importers. 2) Pinning in VRAM requires PIN accounting which is a to-do. v2: - Adjust the dma-buf kunit test accordingly. Suggested-by: Oded Gabbay Signed-off-by: Thomas Hellström Reviewed-by: Oded Gabbay Link: https://patchwork.freedesktop.org/patch/msgid/20230920095001.5539-1-thomas.hellstrom@linux.intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_dma_buf.c | 16 ++++++++++++---- drivers/gpu/drm/xe/xe_dma_buf.c | 25 +++++++++++++++++++++---- 2 files changed, 33 insertions(+), 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c index 1c4d8751be69..18c00bc03024 100644 --- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c @@ -149,11 +149,19 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe) /* Is everything where we expect it to be? */ xe_bo_lock(import_bo, false); err = xe_bo_validate(import_bo, NULL, false); - if (err && err != -EINTR && err != -ERESTARTSYS) - KUNIT_FAIL(test, - "xe_bo_validate() failed with err=%d\n", err); - check_residency(test, bo, import_bo, dmabuf); + /* Pinning in VRAM is not allowed. */ + if (!is_dynamic(params) && + params->force_different_devices && + !(params->mem_mask & XE_BO_CREATE_SYSTEM_BIT)) + KUNIT_EXPECT_EQ(test, err, -EINVAL); + /* Otherwise only expect interrupts or success. */ + else if (err && err != -EINTR && err != -ERESTARTSYS) + KUNIT_EXPECT_TRUE(test, !err || err == -EINTR || + err == -ERESTARTSYS); + + if (!err) + check_residency(test, bo, import_bo, dmabuf); xe_bo_unlock(import_bo); } drm_gem_object_put(import); diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c index 8ce1b582402a..cfde3be3b0dc 100644 --- a/drivers/gpu/drm/xe/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/xe_dma_buf.c @@ -49,13 +49,30 @@ static int xe_dma_buf_pin(struct dma_buf_attachment *attach) { struct drm_gem_object *obj = attach->dmabuf->priv; struct xe_bo *bo = gem_to_xe_bo(obj); + struct xe_device *xe = xe_bo_device(bo); + int ret; /* - * Migrate to TT first to increase the chance of non-p2p clients - * can attach. + * For now only support pinning in TT memory, for two reasons: + * 1) Avoid pinning in a placement not accessible to some importers. + * 2) Pinning in VRAM requires PIN accounting which is a to-do. */ - (void)xe_bo_migrate(bo, XE_PL_TT); - xe_bo_pin_external(bo); + if (xe_bo_is_pinned(bo) && bo->ttm.resource->placement != XE_PL_TT) { + drm_dbg(&xe->drm, "Can't migrate pinned bo for dma-buf pin.\n"); + return -EINVAL; + } + + ret = xe_bo_migrate(bo, XE_PL_TT); + if (ret) { + if (ret != -EINTR && ret != -ERESTARTSYS) + drm_dbg(&xe->drm, + "Failed migrating dma-buf to TT memory: %pe\n", + ERR_PTR(ret)); + return ret; + } + + ret = xe_bo_pin_external(bo); + xe_assert(xe, !ret); return 0; } -- cgit v1.2.3 From 0d0534750f9d4575abf0da3b41a78e5643e6c8dd Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 13 Sep 2023 16:14:17 -0700 Subject: drm/xe/wa: Apply tile workarounds at probe/resume Although the vast majority of workarounds the driver needs to implement are either GT-based or display-based, there are occasionally workarounds that reside outside those parts of the hardware (i.e., in they target registers in the sgunit/soc); we can consider these to be "tile" workarounds since there will be instance of these registers per tile. The registers in question should only lose their values during a function-level reset, so they only need to be applied during probe and resume; the registers will not be affected by GT/engine resets. Tile workarounds are rare (there's only one, 22010954014, that's relevant to Xe at the moment) so it's probably not worth updating the xe_rtp design to handle tile-level workarounds yet, although we may want to consider that in the future if/when more of these show up on future platforms. Reviewed-by: Lucas De Marchi Acked-by: Jani Nikula Link: https://lore.kernel.org/r/20230913231411.291933-13-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_regs.h | 3 +++ drivers/gpu/drm/xe/xe_pm.c | 5 +++++ drivers/gpu/drm/xe/xe_tile.c | 3 +++ drivers/gpu/drm/xe/xe_wa.c | 20 ++++++++++++++++++++ drivers/gpu/drm/xe/xe_wa.h | 2 ++ drivers/gpu/drm/xe/xe_wa_oob.rules | 1 + 6 files changed, 34 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h index be496a3946d8..d62555757d0f 100644 --- a/drivers/gpu/drm/xe/regs/xe_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_regs.h @@ -69,6 +69,9 @@ #define GU_CNTL XE_REG(0x101010) #define LMEM_INIT REG_BIT(7) +#define XEHP_CLOCK_GATE_DIS XE_REG(0x101014) +#define SGSI_SIDECLK_DIS REG_BIT(17) + #define GGC XE_REG(0x108040) #define GMS_MASK REG_GENMASK(15, 8) #define GGMS_MASK REG_GENMASK(7, 6) diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index 5e992e62d0fb..2c2745f86223 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -19,6 +19,7 @@ #include "xe_guc.h" #include "xe_irq.h" #include "xe_pcode.h" +#include "xe_wa.h" /** * DOC: Xe Power Management @@ -79,10 +80,14 @@ int xe_pm_suspend(struct xe_device *xe) */ int xe_pm_resume(struct xe_device *xe) { + struct xe_tile *tile; struct xe_gt *gt; u8 id; int err; + for_each_tile(tile, xe, id) + xe_wa_apply_tile_workarounds(tile); + for_each_gt(gt, xe, id) { err = xe_pcode_init(gt); if (err) diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c index e0bc2b60ab09..131752a57f65 100644 --- a/drivers/gpu/drm/xe/xe_tile.c +++ b/drivers/gpu/drm/xe/xe_tile.c @@ -12,6 +12,7 @@ #include "xe_tile.h" #include "xe_tile_sysfs.h" #include "xe_ttm_vram_mgr.h" +#include "xe_wa.h" /** * DOC: Multi-tile Design @@ -143,6 +144,8 @@ int xe_tile_init_noalloc(struct xe_tile *tile) if (IS_ERR(tile->mem.kernel_bb_pool)) err = PTR_ERR(tile->mem.kernel_bb_pool); + xe_wa_apply_tile_workarounds(tile); + xe_tile_sysfs_init(tile); err_mem_access: diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index f45e9452ba0e..d84e67a9af07 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -753,3 +753,23 @@ void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p) if (oob_was[idx].name) drm_printf_indent(p, 1, "%s\n", oob_was[idx].name); } + +/* + * Apply tile (non-GT, non-display) workarounds. Think very carefully before + * adding anything to this function; most workarounds should be implemented + * elsewhere. The programming here is primarily for sgunit/soc workarounds, + * which are relatively rare. Since the registers these workarounds target are + * outside the GT, they should only need to be applied once at device + * probe/resume; they will not lose their values on any kind of GT or engine + * reset. + * + * TODO: We may want to move this over to xe_rtp in the future once we have + * enough workarounds to justify the work. + */ +void xe_wa_apply_tile_workarounds(struct xe_tile *tile) +{ + struct xe_gt *mmio = tile->primary_gt; + + if (XE_WA(mmio, 22010954014)) + xe_mmio_rmw32(mmio, XEHP_CLOCK_GATE_DIS, 0, SGSI_SIDECLK_DIS); +} diff --git a/drivers/gpu/drm/xe/xe_wa.h b/drivers/gpu/drm/xe/xe_wa.h index cfe685989524..1b24d66f9d80 100644 --- a/drivers/gpu/drm/xe/xe_wa.h +++ b/drivers/gpu/drm/xe/xe_wa.h @@ -9,12 +9,14 @@ struct drm_printer; struct xe_gt; struct xe_hw_engine; +struct xe_tile; int xe_wa_init(struct xe_gt *gt); void xe_wa_process_oob(struct xe_gt *gt); void xe_wa_process_gt(struct xe_gt *gt); void xe_wa_process_engine(struct xe_hw_engine *hwe); void xe_wa_process_lrc(struct xe_hw_engine *hwe); +void xe_wa_apply_tile_workarounds(struct xe_tile *tile); void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe); void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p); diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 599e67169dae..f3ff774dc4aa 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -18,3 +18,4 @@ 14016763929 SUBPLATFORM(DG2, G10) SUBPLATFORM(DG2, G12) 16017236439 PLATFORM(PVC) +22010954014 PLATFORM(DG2) -- cgit v1.2.3 From cb90d469183cc8335d646484d66bd3c3643683cc Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Thu, 14 Sep 2023 14:48:02 -0700 Subject: drm/xe: Add child contexts to the GuC context lookup The CAT_ERROR message from the GuC provides the guc id of the context that caused the problem, which can be a child context. We therefore need to be able to match that id to the exec_queue that owns it, which we do by adding child context to the context lookup. While at it, fix the error path of the guc id allocation code to correctly free the ids allocated for parallel queues. v2: rebase on s/XE_WARN_ON/xe_assert Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/590 Signed-off-by: Daniele Ceraolo Spurio Cc: Matthew Brost Cc: John Harrison Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_submit.c | 44 ++++++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 14 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index fd120d8c0af2..3e136b60196e 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -247,10 +247,28 @@ int xe_guc_submit_init(struct xe_guc *guc) return 0; } +static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count) +{ + int i; + + lockdep_assert_held(&guc->submission_state.lock); + + for (i = 0; i < xa_count; ++i) + xa_erase(&guc->submission_state.exec_queue_lookup, q->guc->id + i); + + if (xe_exec_queue_is_parallel(q)) + bitmap_release_region(guc->submission_state.guc_ids_bitmap, + q->guc->id - GUC_ID_START_MLRC, + order_base_2(q->width)); + else + ida_simple_remove(&guc->submission_state.guc_ids, q->guc->id); +} + static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) { int ret; void *ptr; + int i; /* * Must use GFP_NOWAIT as this lock is in the dma fence signalling path, @@ -277,30 +295,27 @@ static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) if (xe_exec_queue_is_parallel(q)) q->guc->id += GUC_ID_START_MLRC; - ptr = xa_store(&guc->submission_state.exec_queue_lookup, - q->guc->id, q, GFP_NOWAIT); - if (IS_ERR(ptr)) { - ret = PTR_ERR(ptr); - goto err_release; + for (i = 0; i < q->width; ++i) { + ptr = xa_store(&guc->submission_state.exec_queue_lookup, + q->guc->id + i, q, GFP_NOWAIT); + if (IS_ERR(ptr)) { + ret = PTR_ERR(ptr); + goto err_release; + } } return 0; err_release: - ida_simple_remove(&guc->submission_state.guc_ids, q->guc->id); + __release_guc_id(guc, q, i); + return ret; } static void release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) { mutex_lock(&guc->submission_state.lock); - xa_erase(&guc->submission_state.exec_queue_lookup, q->guc->id); - if (xe_exec_queue_is_parallel(q)) - bitmap_release_region(guc->submission_state.guc_ids_bitmap, - q->guc->id - GUC_ID_START_MLRC, - order_base_2(q->width)); - else - ida_simple_remove(&guc->submission_state.guc_ids, q->guc->id); + __release_guc_id(guc, q, q->width); mutex_unlock(&guc->submission_state.lock); } @@ -1489,7 +1504,8 @@ g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id) return NULL; } - xe_assert(xe, q->guc->id == guc_id); + xe_assert(xe, guc_id >= q->guc->id); + xe_assert(xe, guc_id < (q->guc->id + q->width)); return q; } -- cgit v1.2.3 From 8f965392c4d915195307979640295189eec94df4 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Thu, 14 Sep 2023 17:25:14 +0530 Subject: drm/xe: Add drm-client infrastructure Add drm-client infrastructure to record stats of consumption done by individual drm client. V2: - Typo - CI Reviewed-by: Himal Prasad Ghimiray Signed-off-by: Tejas Upadhyay Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/xe_device.c | 15 ++++++++++- drivers/gpu/drm/xe/xe_device_types.h | 6 +++++ drivers/gpu/drm/xe/xe_drm_client.c | 52 ++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_drm_client.h | 43 +++++++++++++++++++++++++++++ 5 files changed, 116 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/xe/xe_drm_client.c create mode 100644 drivers/gpu/drm/xe/xe_drm_client.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index d3b97bc11af7..9be0848ea370 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -54,6 +54,7 @@ xe-y += xe_bb.o \ xe_device.o \ xe_device_sysfs.o \ xe_dma_buf.o \ + xe_drm_client.o \ xe_exec.o \ xe_execlist.o \ xe_exec_queue.o \ diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 89bf926bc0f3..612dfc92e948 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -17,6 +17,7 @@ #include "xe_bo.h" #include "xe_debugfs.h" #include "xe_dma_buf.h" +#include "xe_drm_client.h" #include "xe_drv.h" #include "xe_exec_queue.h" #include "xe_exec.h" @@ -42,13 +43,24 @@ struct lockdep_map xe_device_mem_access_lockdep_map = { static int xe_file_open(struct drm_device *dev, struct drm_file *file) { + struct xe_device *xe = to_xe_device(dev); + struct xe_drm_client *client; struct xe_file *xef; + int ret = -ENOMEM; xef = kzalloc(sizeof(*xef), GFP_KERNEL); if (!xef) - return -ENOMEM; + return ret; + + client = xe_drm_client_alloc(); + if (!client) { + kfree(xef); + return ret; + } xef->drm = file; + xef->client = client; + xef->xe = xe; mutex_init(&xef->vm.lock); xa_init_flags(&xef->vm.xa, XA_FLAGS_ALLOC1); @@ -88,6 +100,7 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file) xa_destroy(&xef->vm.xa); mutex_destroy(&xef->vm.lock); + xe_drm_client_put(xef->client); kfree(xef); } diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 496d7f3fb897..d748d71bb536 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -356,6 +356,9 @@ struct xe_device { * struct xe_file - file handle for XE driver */ struct xe_file { + /** @xe: xe DEVICE **/ + struct xe_device *xe; + /** @drm: base DRM file */ struct drm_file *drm; @@ -374,6 +377,9 @@ struct xe_file { /** @lock: protects file engine state */ struct mutex lock; } exec_queue; + + /** @client: drm client */ + struct xe_drm_client *client; }; #endif diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c new file mode 100644 index 000000000000..1558ca4e0eb7 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include +#include +#include +#include + +#include "xe_device_types.h" +#include "xe_drm_client.h" + +/** + * xe_drm_client_alloc() - Allocate drm client + * @void: No arg + * + * Allocate drm client struct to track client memory against + * same till client life. Call this API whenever new client + * has opened xe device. + * + * Return: pointer to client struct or NULL if can't allocate + */ +struct xe_drm_client *xe_drm_client_alloc(void) +{ + struct xe_drm_client *client; + + client = kzalloc(sizeof(*client), GFP_KERNEL); + if (!client) + return NULL; + + kref_init(&client->kref); + + return client; +} + +/** + * __xe_drm_client_free() - Free client struct + * @kref: The reference + * + * This frees client struct. Call this API when xe device is closed + * by drm client. + * + * Return: void + */ +void __xe_drm_client_free(struct kref *kref) +{ + struct xe_drm_client *client = + container_of(kref, typeof(*client), kref); + + kfree(client); +} diff --git a/drivers/gpu/drm/xe/xe_drm_client.h b/drivers/gpu/drm/xe/xe_drm_client.h new file mode 100644 index 000000000000..be097cdf5d12 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_drm_client.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_DRM_CLIENT_H_ +#define _XE_DRM_CLIENT_H_ + +#include +#include +#include +#include +#include +#include + +struct drm_file; +struct drm_printer; + +struct xe_drm_client { + struct kref kref; + unsigned int id; +}; + + static inline struct xe_drm_client * +xe_drm_client_get(struct xe_drm_client *client) +{ + kref_get(&client->kref); + return client; +} + +void __xe_drm_client_free(struct kref *kref); + +static inline void xe_drm_client_put(struct xe_drm_client *client) +{ + kref_put(&client->kref, __xe_drm_client_free); +} + +struct xe_drm_client *xe_drm_client_alloc(void); +static inline struct xe_drm_client * +xe_drm_client_get(struct xe_drm_client *client); +static inline void xe_drm_client_put(struct xe_drm_client *client); + +#endif -- cgit v1.2.3 From 85c6ad1aa263a852d51d980575e7c1c305f1697e Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Thu, 14 Sep 2023 15:38:47 +0530 Subject: drm/xe: Interface xe drm client with fdinfo interface DRM core driver has introduced recently fdinfo interface to show memory stats of individual drm client. Lets interface xe drm client to fdinfo interface. V2: - cover call to xe_drm_client_fdinfo under PROC_FS Reviewed-by: Himal Prasad Ghimiray Signed-off-by: Tejas Upadhyay Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 6 ++++++ drivers/gpu/drm/xe/xe_drm_client.c | 17 +++++++++++++++++ drivers/gpu/drm/xe/xe_drm_client.h | 4 +++- 3 files changed, 26 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 612dfc92e948..67ec55810ca3 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -137,6 +137,9 @@ static const struct file_operations xe_driver_fops = { .read = drm_read, .compat_ioctl = drm_compat_ioctl, .llseek = noop_llseek, +#ifdef CONFIG_PROC_FS + .show_fdinfo = drm_show_fdinfo, +#endif }; static void xe_driver_release(struct drm_device *dev) @@ -161,6 +164,9 @@ static struct drm_driver driver = { .dumb_create = xe_bo_dumb_create, .dumb_map_offset = drm_gem_ttm_dumb_map_offset, +#ifdef CONFIG_PROC_FS + .show_fdinfo = xe_drm_client_fdinfo, +#endif .release = &xe_driver_release, .ioctls = xe_ioctls, diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index 1558ca4e0eb7..98c8a0cf2f56 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -50,3 +50,20 @@ void __xe_drm_client_free(struct kref *kref) kfree(client); } + +#ifdef CONFIG_PROC_FS +/** + * xe_drm_client_fdinfo() - Callback for fdinfo interface + * @p: The drm_printer ptr + * @file: The drm_file ptr + * + * This is callabck for drm fdinfo interface. Register this callback + * in drm driver ops for show_fdinfo. + * + * Return: void + */ +void xe_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file) +{ + /* show_meminfo() will be developed here */ +} +#endif diff --git a/drivers/gpu/drm/xe/xe_drm_client.h b/drivers/gpu/drm/xe/xe_drm_client.h index be097cdf5d12..dbe3a083c9df 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.h +++ b/drivers/gpu/drm/xe/xe_drm_client.h @@ -39,5 +39,7 @@ struct xe_drm_client *xe_drm_client_alloc(void); static inline struct xe_drm_client * xe_drm_client_get(struct xe_drm_client *client); static inline void xe_drm_client_put(struct xe_drm_client *client); - +#ifdef CONFIG_PROC_FS +void xe_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file); +#endif #endif -- cgit v1.2.3 From b27970f3e11c616c7a5121537502f6e21a460881 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Thu, 21 Sep 2023 17:11:34 +0530 Subject: drm/xe: Add tracking support for bos per client In order to show per client memory consumption, we need tracking support APIs to add at every bo consumption and removal. Adding APIs here to add tracking calls at places wherever it is applicable. V5: - Rebase V4: - remove client bo before vm_put - spin_lock_irqsave not required - Auld V3: - update .h to return xe_drm_client_remove_bo void - protect xe_drm_client_remove_bo under CONFIG_PROC_FS check - Himal - Fixed Checkpatch error - CI V2: - make xe_drm_client_remove_bo return void - Himal Reviewed-by: Himal Prasad Ghimiray Signed-off-by: Tejas Upadhyay Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 9 +++++++ drivers/gpu/drm/xe/xe_bo_types.h | 10 ++++++++ drivers/gpu/drm/xe/xe_drm_client.c | 48 ++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_drm_client.h | 25 ++++++++++++++++++++ 4 files changed, 92 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index e812f2b7d5b9..eb08a8954742 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -16,6 +16,7 @@ #include "xe_device.h" #include "xe_dma_buf.h" +#include "xe_drm_client.h" #include "xe_ggtt.h" #include "xe_gt.h" #include "xe_map.h" @@ -1054,6 +1055,11 @@ static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo) if (bo->ggtt_node.size) xe_ggtt_remove_bo(bo->tile->mem.ggtt, bo); +#ifdef CONFIG_PROC_FS + if (bo->client) + xe_drm_client_remove_bo(bo); +#endif + if (bo->vm && xe_bo_is_user(bo)) xe_vm_put(bo->vm); @@ -1233,6 +1239,9 @@ struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, bo->props.preferred_mem_type = XE_BO_PROPS_INVALID; bo->ttm.priority = DRM_XE_VMA_PRIORITY_NORMAL; INIT_LIST_HEAD(&bo->pinned_link); +#ifdef CONFIG_PROC_FS + INIT_LIST_HEAD(&bo->client_link); +#endif drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size); diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index 27fe72129ee6..c628625c8a89 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -43,6 +43,16 @@ struct xe_bo { struct ttm_bo_kmap_obj kmap; /** @pinned_link: link to present / evicted list of pinned BO */ struct list_head pinned_link; +#ifdef CONFIG_PROC_FS + /** + * @client: @xe_drm_client which created the bo + */ + struct xe_drm_client *client; + /** + * @client_link: Link into @xe_drm_client.objects_list + */ + struct list_head client_link; +#endif /** @props: BO user controlled properties */ struct { /** @preferred_mem: preferred memory class for this BO */ diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index 98c8a0cf2f56..b5ac9bc1f685 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -8,8 +8,10 @@ #include #include +#include "xe_bo_types.h" #include "xe_device_types.h" #include "xe_drm_client.h" +#include "xe_trace.h" /** * xe_drm_client_alloc() - Allocate drm client @@ -31,6 +33,10 @@ struct xe_drm_client *xe_drm_client_alloc(void) kref_init(&client->kref); +#ifdef CONFIG_PROC_FS + spin_lock_init(&client->bos_lock); + INIT_LIST_HEAD(&client->bos_list); +#endif return client; } @@ -52,6 +58,48 @@ void __xe_drm_client_free(struct kref *kref) } #ifdef CONFIG_PROC_FS +/** + * xe_drm_client_add_bo() - Add BO for tracking client mem usage + * @client: The drm client ptr + * @bo: The xe BO ptr + * + * Add all BO created by individual drm client by calling this function. + * This helps in tracking client memory usage. + * + * Return: void + */ +void xe_drm_client_add_bo(struct xe_drm_client *client, + struct xe_bo *bo) +{ + XE_WARN_ON(bo->client); + XE_WARN_ON(!list_empty(&bo->client_link)); + + spin_lock(&client->bos_lock); + bo->client = xe_drm_client_get(client); + list_add_tail_rcu(&bo->client_link, &client->bos_list); + spin_unlock(&client->bos_lock); +} + +/** + * xe_drm_client_remove_bo() - Remove BO for tracking client mem usage + * @bo: The xe BO ptr + * + * Remove all BO removed by individual drm client by calling this function. + * This helps in tracking client memory usage. + * + * Return: void + */ +void xe_drm_client_remove_bo(struct xe_bo *bo) +{ + struct xe_drm_client *client = bo->client; + + spin_lock(&client->bos_lock); + list_del_rcu(&bo->client_link); + spin_unlock(&client->bos_lock); + + xe_drm_client_put(client); +} + /** * xe_drm_client_fdinfo() - Callback for fdinfo interface * @p: The drm_printer ptr diff --git a/drivers/gpu/drm/xe/xe_drm_client.h b/drivers/gpu/drm/xe/xe_drm_client.h index dbe3a083c9df..a9649aa36011 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.h +++ b/drivers/gpu/drm/xe/xe_drm_client.h @@ -15,10 +15,23 @@ struct drm_file; struct drm_printer; +struct xe_bo; struct xe_drm_client { struct kref kref; unsigned int id; +#ifdef CONFIG_PROC_FS + /** + * @bos_lock: lock protecting @bos_list + */ + spinlock_t bos_lock; + /** + * @bos_list: list of bos created by this client + * + * Protected by @bos_lock. + */ + struct list_head bos_list; +#endif }; static inline struct xe_drm_client * @@ -41,5 +54,17 @@ xe_drm_client_get(struct xe_drm_client *client); static inline void xe_drm_client_put(struct xe_drm_client *client); #ifdef CONFIG_PROC_FS void xe_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file); +void xe_drm_client_add_bo(struct xe_drm_client *client, + struct xe_bo *bo); +void xe_drm_client_remove_bo(struct xe_bo *bo); +#else +static inline void xe_drm_client_add_bo(struct xe_drm_client *client, + struct xe_bo *bo) +{ +} + +static inline void xe_drm_client_remove_bo(struct xe_bo *bo) +{ +} #endif #endif -- cgit v1.2.3 From 9e4e9761e64ea1086629852d30c08307538154ec Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Thu, 10 Aug 2023 11:33:24 +0530 Subject: drm/xe: Record each drm client with its VM Enable accounting of indirect client memory usage. Reviewed-by: Himal Prasad Ghimiray Signed-off-by: Tejas Upadhyay Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 1 + drivers/gpu/drm/xe/xe_vm_types.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 53add99d4186..92d682ee6030 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2058,6 +2058,7 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, } args->vm_id = id; + vm->xef = xef; #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM) /* Warning: Security issue - never enable by default */ diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index dfbc53e56a86..b0f183d00416 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -329,6 +329,8 @@ struct xe_vm { /** @batch_invalidate_tlb: Always invalidate TLB before batch start */ bool batch_invalidate_tlb; + /** @xef: XE file handle for tracking this VM's drm client */ + struct xe_file *xef; }; /** struct xe_vma_op_map - VMA map operation */ -- cgit v1.2.3 From 2ff00c4f77ab68e04f381c721117f98fb3228a11 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Thu, 14 Sep 2023 15:25:16 +0530 Subject: drm/xe: Track page table memory usage for client Account page table memory usage in the owning client memory usage stats. V2: - Minor tweak to if (vm->pt_root[id]) check - Himal Reviewed-by: Himal Prasad Ghimiray Signed-off-by: Tejas Upadhyay Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pt.c | 3 +++ drivers/gpu/drm/xe/xe_vm.c | 7 +++++++ 2 files changed, 10 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 21a3dfe99e05..b3e23d8ed8ee 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -7,6 +7,7 @@ #include "xe_bo.h" #include "xe_device.h" +#include "xe_drm_client.h" #include "xe_gt.h" #include "xe_gt_tlb_invalidation.h" #include "xe_migrate.h" @@ -196,6 +197,8 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, pt->level = level; pt->base.dir = level ? &as_xe_pt_dir(pt)->dir : NULL; + if (vm->xef) + xe_drm_client_add_bo(vm->xef->client, pt->bo); xe_tile_assert(tile, level <= XE_VM_MAX_LEVEL); return pt; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 92d682ee6030..fac722074004 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -19,6 +19,7 @@ #include "xe_bo.h" #include "xe_device.h" +#include "xe_drm_client.h" #include "xe_exec_queue.h" #include "xe_gt.h" #include "xe_gt_pagefault.h" @@ -1981,6 +1982,7 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, struct xe_device *xe = to_xe_device(dev); struct xe_file *xef = to_xe_file(file); struct drm_xe_vm_create *args = data; + struct xe_tile *tile; struct xe_vm *vm; u32 id, asid; int err; @@ -2060,6 +2062,11 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, args->vm_id = id; vm->xef = xef; + /* Record BO memory for VM pagetable created against client */ + for_each_tile(tile, xe, id) + if (vm->pt_root[id]) + xe_drm_client_add_bo(vm->xef->client, vm->pt_root[id]->bo); + #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM) /* Warning: Security issue - never enable by default */ args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE); -- cgit v1.2.3 From 303fb1165765e1629e2a82bd1ebbea676c86b33e Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Tue, 29 Aug 2023 10:52:23 +0530 Subject: drm/xe: Account ring buffer and context state storage Account ring buffers and logical context space against the owning client memory usage stats. Reviewed-by: Himal Prasad Ghimiray Signed-off-by: Tejas Upadhyay Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_lrc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 1410dcab3d90..35ae6e531d8a 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -12,6 +12,7 @@ #include "regs/xe_regs.h" #include "xe_bo.h" #include "xe_device.h" +#include "xe_drm_client.h" #include "xe_exec_queue_types.h" #include "xe_gt.h" #include "xe_hw_fence.h" @@ -739,9 +740,13 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, kfree(init_data); } - if (vm) + if (vm) { xe_lrc_set_ppgtt(lrc, vm); + if (vm->xef) + xe_drm_client_add_bo(vm->xef->client, lrc->bo); + } + xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc)); xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0); xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); -- cgit v1.2.3 From 0845233388f8a26d00acf9bf230cfd4f36aa4c30 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Fri, 15 Sep 2023 23:39:01 +0530 Subject: drm/xe: Implement fdinfo memory stats printing Use the newly added drm_print_memory_stats helper to show memory utilisation of our objects in drm/driver specific fdinfo output. To collect the stats we walk the per memory regions object lists and accumulate object size into the respective drm_memory_stats categories. Objects with multiple possible placements are reported in multiple regions for total and shared sizes, while other categories are counted only for the currently active region. V4: - Remove rcu lock - Auld/Thomas - take refcnt only if its non-zero - Auld - DMA_RESV_USAGE_BOOKKEEP covers all fences - Auld - covert to xe_bo for public objects V3: - dont use xe_bo_get/put, not needed - use designated initializer - Jani - use list_for_each_entry_rcu - Fix Checkpatch err - CI V2: - Use static initializer for mem_type - Himal/Jani Reviewed-by: Himal Prasad Ghimiray Signed-off-by: Tejas Upadhyay Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.h | 11 +++++ drivers/gpu/drm/xe/xe_drm_client.c | 89 +++++++++++++++++++++++++++++++++++++- 2 files changed, 99 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index d22b2ae7db72..23f1b9e74e71 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -6,6 +6,8 @@ #ifndef _XE_BO_H_ #define _XE_BO_H_ +#include + #include "xe_bo_types.h" #include "xe_macros.h" #include "xe_vm_types.h" @@ -247,6 +249,15 @@ static inline size_t xe_bo_ccs_pages_start(struct xe_bo *bo) return PAGE_ALIGN(bo->ttm.base.size); } +static inline bool xe_bo_has_pages(struct xe_bo *bo) +{ + if ((bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm)) || + xe_bo_is_vram(bo)) + return true; + + return false; +} + void __xe_bo_release_dummy(struct kref *kref); /** diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index b5ac9bc1f685..82d1305e831f 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -4,10 +4,12 @@ */ #include +#include #include #include #include +#include "xe_bo.h" #include "xe_bo_types.h" #include "xe_device_types.h" #include "xe_drm_client.h" @@ -100,6 +102,91 @@ void xe_drm_client_remove_bo(struct xe_bo *bo) xe_drm_client_put(client); } +static void bo_meminfo(struct xe_bo *bo, + struct drm_memory_stats stats[TTM_NUM_MEM_TYPES]) +{ + u64 sz = bo->size; + u32 mem_type; + + if (bo->placement.placement) + mem_type = bo->placement.placement->mem_type; + else + mem_type = XE_PL_TT; + + if (bo->ttm.base.handle_count > 1) + stats[mem_type].shared += sz; + else + stats[mem_type].private += sz; + + if (xe_bo_has_pages(bo)) { + stats[mem_type].resident += sz; + + if (!dma_resv_test_signaled(bo->ttm.base.resv, + DMA_RESV_USAGE_BOOKKEEP)) + stats[mem_type].active += sz; + else if (mem_type == XE_PL_SYSTEM) + stats[mem_type].purgeable += sz; + } +} + +static void show_meminfo(struct drm_printer *p, struct drm_file *file) +{ + static const char *const mem_type_to_name[TTM_NUM_MEM_TYPES] = { + [XE_PL_SYSTEM] = "system", + [XE_PL_TT] = "gtt", + [XE_PL_VRAM0] = "vram0", + [XE_PL_VRAM1] = "vram1", + [4 ... 6] = NULL, + [XE_PL_STOLEN] = "stolen" + }; + struct drm_memory_stats stats[TTM_NUM_MEM_TYPES] = {}; + struct xe_file *xef = file->driver_priv; + struct ttm_device *bdev = &xef->xe->ttm; + struct ttm_resource_manager *man; + struct xe_drm_client *client; + struct drm_gem_object *obj; + struct xe_bo *bo; + unsigned int id; + u32 mem_type; + + client = xef->client; + + /* Public objects. */ + spin_lock(&file->table_lock); + idr_for_each_entry(&file->object_idr, obj, id) { + struct xe_bo *bo = gem_to_xe_bo(obj); + + bo_meminfo(bo, stats); + } + spin_unlock(&file->table_lock); + + /* Internal objects. */ + spin_lock(&client->bos_lock); + list_for_each_entry_rcu(bo, &client->bos_list, client_link) { + if (!bo || !kref_get_unless_zero(&bo->ttm.base.refcount)) + continue; + bo_meminfo(bo, stats); + xe_bo_put(bo); + } + spin_unlock(&client->bos_lock); + + for (mem_type = XE_PL_SYSTEM; mem_type < TTM_NUM_MEM_TYPES; ++mem_type) { + if (!mem_type_to_name[mem_type]) + continue; + + man = ttm_manager_type(bdev, mem_type); + + if (man) { + drm_print_memory_stats(p, + &stats[mem_type], + DRM_GEM_OBJECT_RESIDENT | + (mem_type != XE_PL_SYSTEM ? 0 : + DRM_GEM_OBJECT_PURGEABLE), + mem_type_to_name[mem_type]); + } + } +} + /** * xe_drm_client_fdinfo() - Callback for fdinfo interface * @p: The drm_printer ptr @@ -112,6 +199,6 @@ void xe_drm_client_remove_bo(struct xe_bo *bo) */ void xe_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file) { - /* show_meminfo() will be developed here */ + show_meminfo(p, file); } #endif -- cgit v1.2.3 From dbac286d8529d6debc0f56fa9a3ea26f78826997 Mon Sep 17 00:00:00 2001 From: Dani Liberman Date: Mon, 18 Sep 2023 14:48:46 +0300 Subject: drm/xe: proper setting of irq enabled flag IRQ enabled flag should be set only after request irq succeeds. Reviewed-by: Ohad Sharabi Signed-off-by: Dani Liberman Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_irq.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index dec3d518b3fc..e1126eccb50e 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -579,16 +579,14 @@ int xe_irq_install(struct xe_device *xe) return -EINVAL; } - xe->irq.enabled = true; - xe_irq_reset(xe); err = request_irq(irq, irq_handler, IRQF_SHARED, DRIVER_NAME, xe); - if (err < 0) { - xe->irq.enabled = false; + if (err < 0) return err; - } + + xe->irq.enabled = true; xe_irq_postinstall(xe); -- cgit v1.2.3 From 14d25d8d684d0196d160653659c5afbf5af777f0 Mon Sep 17 00:00:00 2001 From: Dani Liberman Date: Mon, 18 Sep 2023 14:48:47 +0300 Subject: drm/xe: change old msi irq api to a new one As a preparation for msix support, changing for new msi irq api which supports both msi and msix. Reviewed-by: Ohad Sharabi Signed-off-by: Dani Liberman Signed-off-by: Rodrigo Vivi [Rebase fixes by Rodrigo] --- drivers/gpu/drm/xe/xe_irq.c | 38 +++++++++++++++++++++++++++++--------- drivers/gpu/drm/xe/xe_pci.c | 3 --- 2 files changed, 29 insertions(+), 12 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index e1126eccb50e..82ddf9d84a56 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -555,23 +555,24 @@ static void irq_uninstall(struct drm_device *drm, void *arg) { struct xe_device *xe = arg; struct pci_dev *pdev = to_pci_dev(xe->drm.dev); - int irq = pdev->irq; + int irq; if (!xe->irq.enabled) return; xe->irq.enabled = false; xe_irq_reset(xe); + + irq = pci_irq_vector(pdev, 0); free_irq(irq, xe); - if (pdev->msi_enabled) - pci_disable_msi(pdev); + pci_free_irq_vectors(pdev); } int xe_irq_install(struct xe_device *xe) { - int irq = to_pci_dev(xe->drm.dev)->irq; + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); irq_handler_t irq_handler; - int err; + int err, irq; irq_handler = xe_irq_handler(xe); if (!irq_handler) { @@ -581,16 +582,35 @@ int xe_irq_install(struct xe_device *xe) xe_irq_reset(xe); - err = request_irq(irq, irq_handler, - IRQF_SHARED, DRIVER_NAME, xe); - if (err < 0) + err = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI); + if (err < 0) { + drm_err(&xe->drm, "MSI: Failed to enable support %d\n", err); return err; + } + + irq = pci_irq_vector(pdev, 0); + err = request_irq(irq, irq_handler, IRQF_SHARED, DRIVER_NAME, xe); + if (err < 0) { + drm_err(&xe->drm, "Failed to request MSI IRQ %d\n", err); + goto free_pci_irq_vectors; + } xe->irq.enabled = true; xe_irq_postinstall(xe); - return drmm_add_action_or_reset(&xe->drm, irq_uninstall, xe); + err = drmm_add_action_or_reset(&xe->drm, irq_uninstall, xe); + if (err) + goto free_irq_handler; + + return 0; + +free_irq_handler: + free_irq(irq, xe); +free_pci_irq_vectors: + pci_free_irq_vectors(pdev); + + return err; } void xe_irq_shutdown(struct xe_device *xe) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index b72d9f568768..7e018ee9db40 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -670,9 +670,6 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_master(pdev); - if (pci_enable_msi(pdev) < 0) - drm_dbg(&xe->drm, "can't enable MSI"); - err = xe_info_init(xe, desc, subplatform_desc); if (err) goto err_pci_disable; -- cgit v1.2.3 From bc18dae50f165bc1c18284fe59d77dd00617b530 Mon Sep 17 00:00:00 2001 From: Dani Liberman Date: Mon, 18 Sep 2023 14:48:48 +0300 Subject: drm/xe: add msix support In future devices we will need to support msix interrupts. Reviewed-by: Ohad Sharabi Signed-off-by: Dani Liberman Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_irq.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 82ddf9d84a56..a91e782e06eb 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -582,16 +582,16 @@ int xe_irq_install(struct xe_device *xe) xe_irq_reset(xe); - err = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI); + err = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI | PCI_IRQ_MSIX); if (err < 0) { - drm_err(&xe->drm, "MSI: Failed to enable support %d\n", err); + drm_err(&xe->drm, "MSI/MSIX: Failed to enable support %d\n", err); return err; } irq = pci_irq_vector(pdev, 0); err = request_irq(irq, irq_handler, IRQF_SHARED, DRIVER_NAME, xe); if (err < 0) { - drm_err(&xe->drm, "Failed to request MSI IRQ %d\n", err); + drm_err(&xe->drm, "Failed to request MSI/MSIX IRQ %d\n", err); goto free_pci_irq_vectors; } -- cgit v1.2.3 From babba646785d6855cba64fb0480beb8d3421cc52 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 22 Sep 2023 10:43:20 -0700 Subject: drm/xe: Accept a const xe device Depending on the context, it's preferred to have a const pointer to make sure nothing is modified underneath. The assert macros only ever read data from xe/tile/gt for printing, so they can be made const by default. Reviewed-by: Michal Wajdeczko Link: https://lore.kernel.org/r/20230922174320.2372617-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_assert.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_assert.h b/drivers/gpu/drm/xe/xe_assert.h index 962aac1bc764..34c142e6cfb0 100644 --- a/drivers/gpu/drm/xe/xe_assert.h +++ b/drivers/gpu/drm/xe/xe_assert.h @@ -86,7 +86,7 @@ }) #else #define __xe_assert_msg(xe, condition, msg, arg...) ({ \ - typecheck(struct xe_device *, xe); \ + typecheck(const struct xe_device *, xe); \ BUILD_BUG_ON_INVALID(condition); \ }) #endif @@ -107,7 +107,7 @@ */ #define xe_assert(xe, condition) xe_assert_msg((xe), condition, "") #define xe_assert_msg(xe, condition, msg, arg...) ({ \ - struct xe_device *__xe = (xe); \ + const struct xe_device *__xe = (xe); \ __xe_assert_msg(__xe, condition, \ "platform: %d subplatform: %d\n" \ "graphics: %s %u.%02u step %s\n" \ @@ -142,7 +142,7 @@ */ #define xe_tile_assert(tile, condition) xe_tile_assert_msg((tile), condition, "") #define xe_tile_assert_msg(tile, condition, msg, arg...) ({ \ - struct xe_tile *__tile = (tile); \ + const struct xe_tile *__tile = (tile); \ char __buf[10] __maybe_unused; \ xe_assert_msg(tile_to_xe(__tile), condition, "tile: %u VRAM %s\n" msg, \ __tile->id, ({ string_get_size(__tile->mem.vram.actual_physical_size, 1, \ @@ -166,7 +166,7 @@ */ #define xe_gt_assert(gt, condition) xe_gt_assert_msg((gt), condition, "") #define xe_gt_assert_msg(gt, condition, msg, arg...) ({ \ - struct xe_gt *__gt = (gt); \ + const struct xe_gt *__gt = (gt); \ xe_tile_assert_msg(gt_to_tile(__gt), condition, "GT: %u type %d\n" msg, \ __gt->info.id, __gt->info.type, ## arg); \ }) -- cgit v1.2.3 From 02cadbb5d123204ce193672007868d18db762172 Mon Sep 17 00:00:00 2001 From: Pallavi Mishra Date: Thu, 21 Sep 2023 03:02:59 +0530 Subject: drm/xe: Align size to PAGE_SIZE Ensure alignment with PAGE_SIZE for the size parameter passed to __xe_bo_create_locked() v2: move size alignment under else condition (Lucas) Signed-off-by: Pallavi Mishra Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230920213259.3458968-1-pallavi.mishra@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index eb08a8954742..1a10d9324a07 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1227,6 +1227,7 @@ struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, flags |= XE_BO_INTERNAL_64K; alignment = SZ_64K >> PAGE_SHIFT; } else { + size = ALIGN(size, PAGE_SIZE); alignment = SZ_4K >> PAGE_SHIFT; } -- cgit v1.2.3 From 5fdd4b21aed8a33fd8e8f8fb3dc2f0c8f659918b Mon Sep 17 00:00:00 2001 From: Shekhar Chauhan Date: Mon, 25 Sep 2023 21:35:43 +0530 Subject: drm/xe: Add Wa_18028616096 Drop UGM per set fragment threshold to 3 BSpec: 54833 Signed-off-by: Shekhar Chauhan Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230925160543.915217-1-shekhar.chauhan@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 1 + drivers/gpu/drm/xe/xe_wa.c | 10 ++++++++++ 2 files changed, 11 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index e13fbbdf6929..3a4c9bcf793f 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -347,6 +347,7 @@ #define FORCE_1_SUB_MESSAGE_PER_FRAGMENT REG_BIT(15) #define LSC_CHICKEN_BIT_0_UDW XE_REG_MCR(0xe7c8 + 4) +#define UGM_FRAGMENT_THRESHOLD_TO_3 REG_BIT(58 - 32) #define DIS_CHAIN_2XSIMD8 REG_BIT(55 - 32) #define FORCE_SLM_FENCE_SCOPE_TO_TILE REG_BIT(42 - 32) #define FORCE_UGM_FENCE_SCOPE_TO_TILE REG_BIT(41 - 32) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index d84e67a9af07..1450af6cab34 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -374,6 +374,16 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN2, PERF_FIX_BALANCING_CFE_DISABLE)) }, + { XE_RTP_NAME("18028616096"), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, UGM_FRAGMENT_THRESHOLD_TO_3)) + }, + { XE_RTP_NAME("18028616096"), + XE_RTP_RULES(SUBPLATFORM(DG2, G12), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, UGM_FRAGMENT_THRESHOLD_TO_3)) + }, { XE_RTP_NAME("16011620976, 22015475538"), XE_RTP_RULES(PLATFORM(DG2), FUNC(xe_rtp_match_first_render_or_compute)), -- cgit v1.2.3 From 51a5d656090e0a865d91f1e6ce0c7a09d71a4b70 Mon Sep 17 00:00:00 2001 From: Gustavo Sousa Date: Tue, 26 Sep 2023 19:19:15 -0300 Subject: drm/xe/irq: Clear GFX_MSTR_IRQ as part of IRQ reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Starting with Xe_LP+, GFX_MSTR_IRQ contains status bits that have W1C behavior. If we do not properly reset them, we would miss delivery of interrupts if a pending bit is set when enabling IRQs. As an example, the display part of our probe routine contains paths where we wait for vblank interrupts. If a display interrupt was already pending when enabling IRQs, we would time out waiting for the vblank. That in fact happened recently when modprobing Xe on a Lunar Lake with a specific configuration; and that's how we found out we were missing this step in the IRQ enabling logic. Fix the issue by clearing GFX_MSTR_IRQ as part of the IRQ reset. v2: - Make resetting GFX_MSTR_IRQ be the last step to avoid bit re-latching. (Ville) v3: - Swap nesting order: guard loop with the IP version check instead of doing the check at each iteration. (Lucas) v4: - Add braces for the "if" statement guarding the loop to make the compiler happy. (Gustavo) BSpec: 50875, 54028, 62357 Cc: Matt Roper Cc: Ville Syrjälä Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230926221914.106843-2-gustavo.sousa@intel.com Signed-off-by: Gustavo Sousa Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_irq.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index a91e782e06eb..4cc5f7086b4c 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -511,6 +511,13 @@ static void dg1_irq_reset(struct xe_tile *tile) mask_and_disable(tile, PCU_IRQ_OFFSET); } +static void dg1_irq_reset_mstr(struct xe_tile *tile) +{ + struct xe_gt *mmio = tile->primary_gt; + + xe_mmio_write32(mmio, GFX_MSTR_IRQ, ~0); +} + static void xe_irq_reset(struct xe_device *xe) { struct xe_tile *tile; @@ -525,6 +532,16 @@ static void xe_irq_reset(struct xe_device *xe) tile = xe_device_get_root_tile(xe); mask_and_disable(tile, GU_MISC_IRQ_OFFSET); + + /* + * The tile's top-level status register should be the last one + * to be reset to avoid possible bit re-latching from lower + * level interrupts. + */ + if (GRAPHICS_VERx100(xe) >= 1210) { + for_each_tile(tile, xe, id) + dg1_irq_reset_mstr(tile); + } } static void xe_irq_postinstall(struct xe_device *xe) -- cgit v1.2.3 From 1951dad5347e8b618f545d2c14f8d2816be61b1f Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 27 Sep 2023 13:51:44 -0700 Subject: drm/xe: Infer service copy functionality from engine list On platforms with multiple BCS engines (i.e., PVC and Xe2), not all BCS engines are created equal. The BCS0 engine is what the specs refer to as a "resource copy engine," which supports the platform's full set of copy/fill instructions. In contast, the non-BCS0 "service copy" engines are more streamlined and only support a subset of the GPU instructions supported by the resource copy engine. Platforms with both types of copy engines always support the MEM_COPY and MEM_SET instructions which can be used for simple copy and fill operations on either type of BCS engine. Since the simple MEM_SET instruction meets the needs of Xe's migrate code (and since the more elaborate XY_FAST_COLOR_BLT instruction isn't available to use on service copy engines), we always prefer to use MEM_SET for clearing buffers on our newer platforms. We've been using a 'has_link_copy_engine' feature flag to keep track of which platforms should use MEM_SET for fills. However a feature flag like this is unnecessary since we can already derive the presence of service copy engines (and in turn the MEM_SET instruction) just by looking at the platform's pre-fusing engine list. Utilizing the engine list for this also avoids mistakes like we've made on Xe2 where we forget to set the feature flag in the IP definition. For clarity, "service copy" is a general term that covers any blitter engines that support a limited subset of the overall blitter instruction set (in practice this is any non-BCS0 blitter engine). The "link copy engines" introduced on PVC and the "paging copy engine" present in Xe2 are both instances of service copy engines. v2: - Rewrite / expand the commit message. (Bala) - Fix checkpatch whitespace error. Bspec: 65019 Cc: Lucas De Marchi Cc: Balasubramani Vivekanandan Reviewed-by: Haridhar Kalvala Link: https://lore.kernel.org/r/20230927205143.2695089-2-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device_types.h | 2 -- drivers/gpu/drm/xe/xe_migrate.c | 34 +++++++++++++++++++++------------- drivers/gpu/drm/xe/xe_pci.c | 2 -- drivers/gpu/drm/xe/xe_pci_types.h | 1 - 4 files changed, 21 insertions(+), 18 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index d748d71bb536..aa9935ff6d84 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -228,8 +228,6 @@ struct xe_device { u8 has_llc:1; /** @has_range_tlb_invalidation: Has range based TLB invalidations */ u8 has_range_tlb_invalidation:1; - /** @has_link_copy_engines: Whether the platform has link copy engines */ - u8 has_link_copy_engine:1; } info; /** @irq: device interrupt state */ diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 713f98baf0ee..2d31e6ff72ea 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -854,28 +854,36 @@ static void emit_clear_main_copy(struct xe_gt *gt, struct xe_bb *bb, bb->len += len; } -static u32 emit_clear_cmd_len(struct xe_device *xe) +static bool has_service_copy_support(struct xe_gt *gt) { - if (xe->info.has_link_copy_engine) + /* + * What we care about is whether the architecture was designed with + * service copy functionality (specifically the new MEM_SET / MEM_COPY + * instructions) so check the architectural engine list rather than the + * actual list since these instructions are usable on BCS0 even if + * all of the actual service copy engines (BCS1-BCS8) have been fused + * off. + */ + return gt->info.__engine_mask & GENMASK(XE_HW_ENGINE_BCS8, + XE_HW_ENGINE_BCS1); +} + +static u32 emit_clear_cmd_len(struct xe_gt *gt) +{ + if (has_service_copy_support(gt)) return PVC_MEM_SET_CMD_LEN_DW; else return XY_FAST_COLOR_BLT_DW; } -static int emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, - u32 size, u32 pitch, bool is_vram) +static void emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, + u32 size, u32 pitch, bool is_vram) { - struct xe_device *xe = gt_to_xe(gt); - - if (xe->info.has_link_copy_engine) { + if (has_service_copy_support(gt)) emit_clear_link_copy(gt, bb, src_ofs, size, pitch); - - } else { + else emit_clear_main_copy(gt, bb, src_ofs, size, pitch, is_vram); - } - - return 0; } /** @@ -928,7 +936,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, batch_size = 2 + pte_update_size(m, clear_vram, src, &src_it, &clear_L0, &clear_L0_ofs, &clear_L0_pt, - emit_clear_cmd_len(xe), 0, + emit_clear_cmd_len(gt), 0, NUM_PT_PER_BLIT); if (xe_device_has_flat_ccs(xe) && clear_vram) batch_size += EMIT_COPY_CCS_DW; diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 7e018ee9db40..fd8d7eddd6f6 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -132,7 +132,6 @@ static const struct xe_graphics_desc graphics_xehpc = { .has_asid = 1, .has_flat_ccs = 0, - .has_link_copy_engine = 1, .supports_usm = 1, }; @@ -556,7 +555,6 @@ static int xe_info_init(struct xe_device *xe, xe->info.has_asid = graphics_desc->has_asid; xe->info.has_flat_ccs = graphics_desc->has_flat_ccs; xe->info.has_range_tlb_invalidation = graphics_desc->has_range_tlb_invalidation; - xe->info.has_link_copy_engine = graphics_desc->has_link_copy_engine; /* * All platforms have at least one primary GT. Any platform with media diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h index df6ddbc52d7f..bd0b0d87413e 100644 --- a/drivers/gpu/drm/xe/xe_pci_types.h +++ b/drivers/gpu/drm/xe/xe_pci_types.h @@ -24,7 +24,6 @@ struct xe_graphics_desc { u8 has_asid:1; u8 has_flat_ccs:1; - u8 has_link_copy_engine:1; u8 has_range_tlb_invalidation:1; u8 supports_usm:1; }; -- cgit v1.2.3 From f24081cd6275748d4f7c5925645436ed406cec12 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 27 Sep 2023 12:38:52 -0700 Subject: drm/xe: Normalize pte/pde encoding Split functions that do only part of the pde/pte encoding and that can be called by the different places. This normalizes how pde/pte are encoded so they can be moved elsewhere in a subsequent change. xe_pte_encode() was calling __pte_encode() with a NULL vma, which is the opposite of what xe_pt_stage_bind_entry() does. Stop passing a NULL vma and just split another function that deals with a vma rather than a bo. Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230927193902.2849159-2-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pt.c | 119 +++++++++++++++++++++++++-------------------- 1 file changed, 67 insertions(+), 52 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index b3e23d8ed8ee..01e7c8815e7d 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -47,91 +47,106 @@ static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index) return container_of(pt_dir->dir.entries[index], struct xe_pt, base); } -/** - * xe_pde_encode() - Encode a page-table directory entry pointing to - * another page-table. - * @bo: The page-table bo of the page-table to point to. - * @bo_offset: Offset in the page-table bo to point to. - * @level: The cache level indicating the caching of @bo. - * - * TODO: Rename. - * - * Return: An encoded page directory entry. No errors. - */ -u64 xe_pde_encode(struct xe_bo *bo, u64 bo_offset, - const enum xe_cache_level level) +static u64 pde_encode_cache(enum xe_cache_level cache) { - u64 pde; - - pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); - pde |= XE_PAGE_PRESENT | XE_PAGE_RW; - /* FIXME: I don't think the PPAT handling is correct for MTL */ - if (level != XE_CACHE_NONE) - pde |= PPAT_CACHED_PDE; - else - pde |= PPAT_UNCACHED; + if (cache != XE_CACHE_NONE) + return PPAT_CACHED_PDE; - return pde; + return PPAT_UNCACHED; } -static u64 __pte_encode(u64 pte, enum xe_cache_level cache, - struct xe_vma *vma, u32 pt_level) +static u64 pte_encode_cache(enum xe_cache_level cache) { - pte |= XE_PAGE_PRESENT | XE_PAGE_RW; - - if (unlikely(vma && xe_vma_read_only(vma))) - pte &= ~XE_PAGE_RW; - - if (unlikely(vma && xe_vma_is_null(vma))) - pte |= XE_PTE_NULL; - /* FIXME: I don't think the PPAT handling is correct for MTL */ - switch (cache) { case XE_CACHE_NONE: - pte |= PPAT_UNCACHED; - break; + return PPAT_UNCACHED; case XE_CACHE_WT: - pte |= PPAT_DISPLAY_ELLC; - break; + return PPAT_DISPLAY_ELLC; default: - pte |= PPAT_CACHED; - break; + return PPAT_CACHED; } +} + +static u64 pte_encode_ps(u32 pt_level) +{ + /* XXX: Does hw support 1 GiB pages? */ + XE_WARN_ON(pt_level > 2); if (pt_level == 1) - pte |= XE_PDE_PS_2M; + return XE_PDE_PS_2M; else if (pt_level == 2) - pte |= XE_PDPE_PS_1G; + return XE_PDPE_PS_1G; - /* XXX: Does hw support 1 GiB pages? */ - XE_WARN_ON(pt_level > 2); + return 0; +} - return pte; +/** + * xe_pde_encode() - Encode a page-table directory entry pointing to + * another page-table. + * @bo: The page-table bo of the page-table to point to. + * @bo_offset: Offset in the page-table bo to point to. + * @cache: The cache level indicating the caching of @bo. + * + * TODO: Rename. + * + * Return: An encoded page directory entry. No errors. + */ +u64 xe_pde_encode(struct xe_bo *bo, u64 bo_offset, + const enum xe_cache_level cache) +{ + u64 pde; + + pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); + pde |= XE_PAGE_PRESENT | XE_PAGE_RW; + pde |= pde_encode_cache(cache); + + return pde; } /** * xe_pte_encode() - Encode a page-table entry pointing to memory. * @bo: The BO representing the memory to point to. - * @offset: The offset into @bo. + * @bo_offset: The offset into @bo. * @cache: The cache level indicating * @pt_level: The page-table level of the page-table into which the entry * is to be inserted. * * Return: An encoded page-table entry. No errors. */ -u64 xe_pte_encode(struct xe_bo *bo, u64 offset, enum xe_cache_level cache, +u64 xe_pte_encode(struct xe_bo *bo, u64 bo_offset, enum xe_cache_level cache, u32 pt_level) { u64 pte; - pte = xe_bo_addr(bo, offset, XE_PAGE_SIZE); + pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); + pte |= XE_PAGE_PRESENT | XE_PAGE_RW; + pte |= pte_encode_cache(cache); + pte |= pte_encode_ps(pt_level); + if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) pte |= XE_PPGTT_PTE_DM; - return __pte_encode(pte, cache, NULL, pt_level); + return pte; +} + +/* Like xe_pte_encode(), but with a vma and a partially-encoded pte */ +static u64 __vma_pte_encode(u64 pte, struct xe_vma *vma, + enum xe_cache_level cache, u32 pt_level) +{ + pte |= XE_PAGE_PRESENT | XE_PAGE_RW; + pte |= pte_encode_cache(cache); + pte |= pte_encode_ps(pt_level); + + if (unlikely(vma && xe_vma_read_only(vma))) + pte &= ~XE_PAGE_RW; + + if (unlikely(vma && xe_vma_is_null(vma))) + pte |= XE_PTE_NULL; + + return pte; } static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm, @@ -614,9 +629,9 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, XE_WARN_ON(xe_walk->va_curs_start != addr); - pte = __pte_encode(is_null ? 0 : - xe_res_dma(curs) + xe_walk->dma_offset, - xe_walk->cache, xe_walk->vma, level); + pte = __vma_pte_encode(is_null ? 0 : + xe_res_dma(curs) + xe_walk->dma_offset, + xe_walk->vma, xe_walk->cache, level); pte |= xe_walk->default_pte; /* -- cgit v1.2.3 From b3bb7d9c561d664707717f8887b665ce8fef69ff Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 27 Sep 2023 12:38:53 -0700 Subject: drm/xe: Remove check for vma == NULL vma at this point can never be NULL as otherwise it would crash earlier in the only caller, xe_pt_stage_bind_entry(). Remove the extra check and avoid adding and removing the bits from the pte. Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230927193902.2849159-3-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pt.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 01e7c8815e7d..c39e3b46df3e 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -136,14 +136,15 @@ u64 xe_pte_encode(struct xe_bo *bo, u64 bo_offset, enum xe_cache_level cache, static u64 __vma_pte_encode(u64 pte, struct xe_vma *vma, enum xe_cache_level cache, u32 pt_level) { - pte |= XE_PAGE_PRESENT | XE_PAGE_RW; + pte |= XE_PAGE_PRESENT; + + if (likely(!xe_vma_read_only(vma))) + pte |= XE_PAGE_RW; + pte |= pte_encode_cache(cache); pte |= pte_encode_ps(pt_level); - if (unlikely(vma && xe_vma_read_only(vma))) - pte &= ~XE_PAGE_RW; - - if (unlikely(vma && xe_vma_is_null(vma))) + if (unlikely(xe_vma_is_null(vma))) pte |= XE_PTE_NULL; return pte; -- cgit v1.2.3 From 0e5e77bd9704edf1713ebed37e2da1b4faa25a52 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 27 Sep 2023 12:38:54 -0700 Subject: drm/xe: Use vfunc for pte/pde ppgtt encoding Move the function to encode pte/pde to be vfuncs inside struct xe_vm. This will allow to easily extend to platforms that don't have a compatible encoding. v2: Fix kunit build Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230927193902.2849159-4-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_migrate.c | 2 +- drivers/gpu/drm/xe/xe_migrate.c | 18 ++--- drivers/gpu/drm/xe/xe_pt.c | 125 +++------------------------------- drivers/gpu/drm/xe/xe_pt.h | 6 -- drivers/gpu/drm/xe/xe_pt_types.h | 14 ++++ drivers/gpu/drm/xe/xe_vm.c | 93 ++++++++++++++++++++++++- drivers/gpu/drm/xe/xe_vm_types.h | 2 + 7 files changed, 128 insertions(+), 132 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index f58cd1da1a34..6906ff9d9c31 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -301,7 +301,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) /* First part of the test, are we updating our pagetable bo with a new entry? */ xe_map_wr(xe, &bo->vmap, XE_PAGE_SIZE * (NUM_KERNEL_PDE - 1), u64, 0xdeaddeadbeefbeef); - expected = xe_pte_encode(pt, 0, XE_CACHE_WB, 0); + expected = m->q->vm->pt_ops->pte_encode_bo(pt, 0, XE_CACHE_WB, 0); if (m->q->vm->flags & XE_VM_FLAG_64K) expected |= XE_PTE_PS64; if (xe_bo_is_vram(pt)) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 2d31e6ff72ea..6884e79199d5 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -189,14 +189,15 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, return ret; } - entry = xe_pde_encode(bo, bo->size - XE_PAGE_SIZE, XE_CACHE_WB); + entry = vm->pt_ops->pde_encode_bo(bo, bo->size - XE_PAGE_SIZE, XE_CACHE_WB); xe_pt_write(xe, &vm->pt_root[id]->bo->vmap, 0, entry); map_ofs = (num_entries - num_level) * XE_PAGE_SIZE; /* Map the entire BO in our level 0 pt */ for (i = 0, level = 0; i < num_entries; level++) { - entry = xe_pte_encode(bo, i * XE_PAGE_SIZE, XE_CACHE_WB, 0); + entry = vm->pt_ops->pte_encode_bo(bo, i * XE_PAGE_SIZE, + XE_CACHE_WB, 0); xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64, entry); @@ -214,7 +215,8 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, for (i = 0; i < batch->size; i += vm->flags & XE_VM_FLAG_64K ? XE_64K_PAGE_SIZE : XE_PAGE_SIZE) { - entry = xe_pte_encode(batch, i, XE_CACHE_WB, 0); + entry = vm->pt_ops->pte_encode_bo(batch, i, + XE_CACHE_WB, 0); xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64, entry); @@ -238,16 +240,16 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, if (vm->flags & XE_VM_FLAG_64K && level == 1) flags = XE_PDE_64K; - entry = xe_pde_encode(bo, map_ofs + (level - 1) * - XE_PAGE_SIZE, XE_CACHE_WB); + entry = vm->pt_ops->pde_encode_bo(bo, map_ofs + (level - 1) * + XE_PAGE_SIZE, XE_CACHE_WB); xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE * level, u64, entry | flags); } /* Write PDE's that point to our BO. */ for (i = 0; i < num_entries - num_level; i++) { - entry = xe_pde_encode(bo, i * XE_PAGE_SIZE, - XE_CACHE_WB); + entry = vm->pt_ops->pde_encode_bo(bo, i * XE_PAGE_SIZE, + XE_CACHE_WB); xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE + (i + 1) * 8, u64, entry); @@ -1263,7 +1265,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m, xe_tile_assert(tile, pt_bo->size == SZ_4K); - addr = xe_pte_encode(pt_bo, 0, XE_CACHE_WB, 0); + addr = vm->pt_ops->pte_encode_bo(pt_bo, 0, XE_CACHE_WB, 0); bb->cs[bb->len++] = lower_32_bits(addr); bb->cs[bb->len++] = upper_32_bits(addr); } diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index c39e3b46df3e..d5f721efdc3c 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -47,109 +47,6 @@ static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index) return container_of(pt_dir->dir.entries[index], struct xe_pt, base); } -static u64 pde_encode_cache(enum xe_cache_level cache) -{ - /* FIXME: I don't think the PPAT handling is correct for MTL */ - - if (cache != XE_CACHE_NONE) - return PPAT_CACHED_PDE; - - return PPAT_UNCACHED; -} - -static u64 pte_encode_cache(enum xe_cache_level cache) -{ - /* FIXME: I don't think the PPAT handling is correct for MTL */ - switch (cache) { - case XE_CACHE_NONE: - return PPAT_UNCACHED; - case XE_CACHE_WT: - return PPAT_DISPLAY_ELLC; - default: - return PPAT_CACHED; - } -} - -static u64 pte_encode_ps(u32 pt_level) -{ - /* XXX: Does hw support 1 GiB pages? */ - XE_WARN_ON(pt_level > 2); - - if (pt_level == 1) - return XE_PDE_PS_2M; - else if (pt_level == 2) - return XE_PDPE_PS_1G; - - return 0; -} - -/** - * xe_pde_encode() - Encode a page-table directory entry pointing to - * another page-table. - * @bo: The page-table bo of the page-table to point to. - * @bo_offset: Offset in the page-table bo to point to. - * @cache: The cache level indicating the caching of @bo. - * - * TODO: Rename. - * - * Return: An encoded page directory entry. No errors. - */ -u64 xe_pde_encode(struct xe_bo *bo, u64 bo_offset, - const enum xe_cache_level cache) -{ - u64 pde; - - pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); - pde |= XE_PAGE_PRESENT | XE_PAGE_RW; - pde |= pde_encode_cache(cache); - - return pde; -} - -/** - * xe_pte_encode() - Encode a page-table entry pointing to memory. - * @bo: The BO representing the memory to point to. - * @bo_offset: The offset into @bo. - * @cache: The cache level indicating - * @pt_level: The page-table level of the page-table into which the entry - * is to be inserted. - * - * Return: An encoded page-table entry. No errors. - */ -u64 xe_pte_encode(struct xe_bo *bo, u64 bo_offset, enum xe_cache_level cache, - u32 pt_level) -{ - u64 pte; - - pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); - pte |= XE_PAGE_PRESENT | XE_PAGE_RW; - pte |= pte_encode_cache(cache); - pte |= pte_encode_ps(pt_level); - - if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) - pte |= XE_PPGTT_PTE_DM; - - return pte; -} - -/* Like xe_pte_encode(), but with a vma and a partially-encoded pte */ -static u64 __vma_pte_encode(u64 pte, struct xe_vma *vma, - enum xe_cache_level cache, u32 pt_level) -{ - pte |= XE_PAGE_PRESENT; - - if (likely(!xe_vma_read_only(vma))) - pte |= XE_PAGE_RW; - - pte |= pte_encode_cache(cache); - pte |= pte_encode_ps(pt_level); - - if (unlikely(xe_vma_is_null(vma))) - pte |= XE_PTE_NULL; - - return pte; -} - static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm, unsigned int level) { @@ -158,15 +55,11 @@ static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm, if (!vm->scratch_bo[id]) return 0; - if (level == 0) { - u64 empty = xe_pte_encode(vm->scratch_bo[id], 0, - XE_CACHE_WB, 0); + if (level > 0) + return vm->pt_ops->pde_encode_bo(vm->scratch_pt[id][level - 1]->bo, + 0, XE_CACHE_WB); - return empty; - } else { - return xe_pde_encode(vm->scratch_pt[id][level - 1]->bo, 0, - XE_CACHE_WB); - } + return vm->pt_ops->pte_encode_bo(vm->scratch_bo[id], 0, XE_CACHE_WB, 0); } /** @@ -618,6 +511,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, struct xe_pt_stage_bind_walk *xe_walk = container_of(walk, typeof(*xe_walk), base); struct xe_pt *xe_parent = container_of(parent, typeof(*xe_parent), base); + struct xe_vm *vm = xe_walk->vm; struct xe_pt *xe_child; bool covers; int ret = 0; @@ -630,9 +524,9 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, XE_WARN_ON(xe_walk->va_curs_start != addr); - pte = __vma_pte_encode(is_null ? 0 : - xe_res_dma(curs) + xe_walk->dma_offset, - xe_walk->vma, xe_walk->cache, level); + pte = vm->pt_ops->pte_encode_vma(is_null ? 0 : + xe_res_dma(curs) + xe_walk->dma_offset, + xe_walk->vma, xe_walk->cache, level); pte |= xe_walk->default_pte; /* @@ -697,7 +591,8 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, xe_child->is_compact = true; } - pte = xe_pde_encode(xe_child->bo, 0, xe_walk->cache) | flags; + pte = vm->pt_ops->pde_encode_bo(xe_child->bo, 0, + xe_walk->cache) | flags; ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, xe_child, pte); } diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h index 01be7ab08f87..d5460e58dbbf 100644 --- a/drivers/gpu/drm/xe/xe_pt.h +++ b/drivers/gpu/drm/xe/xe_pt.h @@ -45,10 +45,4 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma); -u64 xe_pde_encode(struct xe_bo *bo, u64 bo_offset, - const enum xe_cache_level level); - -u64 xe_pte_encode(struct xe_bo *bo, u64 offset, enum xe_cache_level cache, - u32 pt_level); - #endif diff --git a/drivers/gpu/drm/xe/xe_pt_types.h b/drivers/gpu/drm/xe/xe_pt_types.h index 2ed64c0a4485..c58f6926fabf 100644 --- a/drivers/gpu/drm/xe/xe_pt_types.h +++ b/drivers/gpu/drm/xe/xe_pt_types.h @@ -6,8 +6,13 @@ #ifndef _XE_PT_TYPES_H_ #define _XE_PT_TYPES_H_ +#include + #include "xe_pt_walk.h" +struct xe_bo; +struct xe_vma; + enum xe_cache_level { XE_CACHE_NONE, XE_CACHE_WT, @@ -29,6 +34,15 @@ struct xe_pt { #endif }; +struct xe_pt_ops { + u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset, + enum xe_cache_level cache, u32 pt_level); + u64 (*pte_encode_vma)(u64 pte, struct xe_vma *vma, + enum xe_cache_level cache, u32 pt_level); + u64 (*pde_encode_bo)(struct xe_bo *bo, u64 bo_offset, + const enum xe_cache_level cache); +}; + struct xe_pt_entry { struct xe_pt *pt; u64 pte; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index fac722074004..72e27e6809f9 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1210,6 +1210,93 @@ static struct drm_gpuvm_ops gpuvm_ops = { .vm_free = xe_vm_free, }; +static u64 pde_encode_cache(enum xe_cache_level cache) +{ + /* FIXME: I don't think the PPAT handling is correct for MTL */ + + if (cache != XE_CACHE_NONE) + return PPAT_CACHED_PDE; + + return PPAT_UNCACHED; +} + +static u64 pte_encode_cache(enum xe_cache_level cache) +{ + /* FIXME: I don't think the PPAT handling is correct for MTL */ + switch (cache) { + case XE_CACHE_NONE: + return PPAT_UNCACHED; + case XE_CACHE_WT: + return PPAT_DISPLAY_ELLC; + default: + return PPAT_CACHED; + } +} + +static u64 pte_encode_ps(u32 pt_level) +{ + /* XXX: Does hw support 1 GiB pages? */ + XE_WARN_ON(pt_level > 2); + + if (pt_level == 1) + return XE_PDE_PS_2M; + else if (pt_level == 2) + return XE_PDPE_PS_1G; + + return 0; +} + +static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset, + const enum xe_cache_level cache) +{ + u64 pde; + + pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); + pde |= XE_PAGE_PRESENT | XE_PAGE_RW; + pde |= pde_encode_cache(cache); + + return pde; +} + +static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, + enum xe_cache_level cache, u32 pt_level) +{ + u64 pte; + + pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); + pte |= XE_PAGE_PRESENT | XE_PAGE_RW; + pte |= pte_encode_cache(cache); + pte |= pte_encode_ps(pt_level); + + if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) + pte |= XE_PPGTT_PTE_DM; + + return pte; +} + +static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma, + enum xe_cache_level cache, u32 pt_level) +{ + pte |= XE_PAGE_PRESENT; + + if (likely(!xe_vma_read_only(vma))) + pte |= XE_PAGE_RW; + + pte |= pte_encode_cache(cache); + pte |= pte_encode_ps(pt_level); + + if (unlikely(xe_vma_is_null(vma))) + pte |= XE_PTE_NULL; + + return pte; +} + +static const struct xe_pt_ops xelp_pt_ops = { + .pte_encode_bo = xelp_pte_encode_bo, + .pte_encode_vma = xelp_pte_encode_vma, + .pde_encode_bo = xelp_pde_encode_bo, +}; + static void xe_vma_op_work_func(struct work_struct *w); static void vm_destroy_work_func(struct work_struct *w); @@ -1257,6 +1344,8 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) INIT_LIST_HEAD(&vm->extobj.list); + vm->pt_ops = &xelp_pt_ops; + if (!(flags & XE_VM_FLAG_MIGRATION)) xe_device_mem_access_get(xe); @@ -1576,8 +1665,8 @@ struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id) u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile) { - return xe_pde_encode(vm->pt_root[tile->id]->bo, 0, - XE_CACHE_WB); + return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0, + XE_CACHE_WB); } static struct dma_fence * diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index b0f183d00416..9a1075a75606 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -242,6 +242,8 @@ struct xe_vm { bool munmap_rebind_inflight; } async_ops; + const struct xe_pt_ops *pt_ops; + /** @userptr: user pointer state */ struct { /** -- cgit v1.2.3 From 23c8495efeed0d83657de89b44a569ac406bdfad Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 27 Sep 2023 12:38:55 -0700 Subject: drm/xe/migrate: Do not hand-encode pte Instead of encoding the pte, call a new vfunc from xe_vm to handle that. The encoding may not be the same on every platform, so keeping it in one place helps to better support them. Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230927193902.2849159-5-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_migrate.c | 14 ++++++++------ drivers/gpu/drm/xe/xe_pt_types.h | 2 ++ drivers/gpu/drm/xe/xe_vm.c | 23 ++++++++++++++++++++++- 3 files changed, 32 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 6884e79199d5..cd4dbbf6c383 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -261,8 +261,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, level = 2; ofs = map_ofs + XE_PAGE_SIZE * level + 256 * 8; - flags = XE_PAGE_RW | XE_PAGE_PRESENT | PPAT_CACHED | - XE_PPGTT_PTE_DM | XE_PDPE_PS_1G; + flags = vm->pt_ops->pte_encode_addr(0, XE_CACHE_WB, level, true, 0); /* * Use 1GB pages, it shouldn't matter the physical amount of @@ -483,7 +482,8 @@ static void emit_pte(struct xe_migrate *m, ptes -= chunk; while (chunk--) { - u64 addr; + u64 addr, flags = 0; + bool devmem = false; addr = xe_res_dma(cur) & PAGE_MASK; if (is_vram) { @@ -491,13 +491,15 @@ static void emit_pte(struct xe_migrate *m, if ((m->q->vm->flags & XE_VM_FLAG_64K) && !(cur_ofs & (16 * 8 - 1))) { xe_tile_assert(m->tile, IS_ALIGNED(addr, SZ_64K)); - addr |= XE_PTE_PS64; + flags |= XE_PTE_PS64; } addr += vram_region_gpu_offset(bo->ttm.resource); - addr |= XE_PPGTT_PTE_DM; + devmem = true; } - addr |= PPAT_CACHED | XE_PAGE_PRESENT | XE_PAGE_RW; + + addr = m->q->vm->pt_ops->pte_encode_addr(addr, XE_CACHE_WB, + 0, devmem, flags); bb->cs[bb->len++] = lower_32_bits(addr); bb->cs[bb->len++] = upper_32_bits(addr); diff --git a/drivers/gpu/drm/xe/xe_pt_types.h b/drivers/gpu/drm/xe/xe_pt_types.h index c58f6926fabf..64e3921a0f46 100644 --- a/drivers/gpu/drm/xe/xe_pt_types.h +++ b/drivers/gpu/drm/xe/xe_pt_types.h @@ -39,6 +39,8 @@ struct xe_pt_ops { enum xe_cache_level cache, u32 pt_level); u64 (*pte_encode_vma)(u64 pte, struct xe_vma *vma, enum xe_cache_level cache, u32 pt_level); + u64 (*pte_encode_addr)(u64 addr, enum xe_cache_level cache, + u32 pt_level, bool devmem, u64 flags); u64 (*pde_encode_bo)(struct xe_bo *bo, u64 bo_offset, const enum xe_cache_level cache); }; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 72e27e6809f9..1d3569097e5f 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1235,7 +1235,6 @@ static u64 pte_encode_cache(enum xe_cache_level cache) static u64 pte_encode_ps(u32 pt_level) { - /* XXX: Does hw support 1 GiB pages? */ XE_WARN_ON(pt_level > 2); if (pt_level == 1) @@ -1291,9 +1290,31 @@ static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma, return pte; } +static u64 xelp_pte_encode_addr(u64 addr, enum xe_cache_level cache, + u32 pt_level, bool devmem, u64 flags) +{ + u64 pte; + + /* Avoid passing random bits directly as flags */ + XE_WARN_ON(flags & ~XE_PTE_PS64); + + pte = addr; + pte |= XE_PAGE_PRESENT | XE_PAGE_RW; + pte |= pte_encode_cache(cache); + pte |= pte_encode_ps(pt_level); + + if (devmem) + pte |= XE_PPGTT_PTE_DM; + + pte |= flags; + + return pte; +} + static const struct xe_pt_ops xelp_pt_ops = { .pte_encode_bo = xelp_pte_encode_bo, .pte_encode_vma = xelp_pte_encode_vma, + .pte_encode_addr = xelp_pte_encode_addr, .pde_encode_bo = xelp_pde_encode_bo, }; -- cgit v1.2.3 From b445be5710200501bba693fe6f9c614895412b94 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 27 Sep 2023 12:38:56 -0700 Subject: drm/xe: Use vfunc to initialize PAT Split the PAT initialization between SW-only and HW. The _early() only sets up the ops and data structure that are used later to program the tables. This allows the PAT to be easily extended to other platforms. Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230927193902.2849159-6-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 3 ++ drivers/gpu/drm/xe/xe_device_types.h | 13 ++++++++ drivers/gpu/drm/xe/xe_pat.c | 59 +++++++++++++++++++++++++++--------- drivers/gpu/drm/xe/xe_pat.h | 11 +++++++ 4 files changed, 72 insertions(+), 14 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 67ec55810ca3..113fca462141 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -25,6 +25,7 @@ #include "xe_irq.h" #include "xe_mmio.h" #include "xe_module.h" +#include "xe_pat.h" #include "xe_pcode.h" #include "xe_pm.h" #include "xe_query.h" @@ -268,6 +269,8 @@ int xe_device_probe(struct xe_device *xe) int err; u8 id; + xe_pat_init_early(xe); + xe->info.mem_region_mask = 1; for_each_tile(tile, xe, id) { diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index aa9935ff6d84..c4920631677b 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -19,6 +19,7 @@ #include "xe_step_types.h" struct xe_ggtt; +struct xe_pat_ops; #define XE_BO_INVALID_OFFSET LONG_MAX @@ -310,6 +311,18 @@ struct xe_device { atomic_t ref; } mem_access; + /** + * @pat: Encapsulate PAT related stuff + */ + struct { + /** Internal operations to abstract platforms */ + const struct xe_pat_ops *ops; + /** PAT table to program in the HW */ + const u32 *table; + /** Number of PAT entries */ + int n_entries; + } pat; + /** @d3cold: Encapsulate d3cold related stuff */ struct { /** capable: Indicates if root port is d3cold capable */ diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index 71e0e047fff3..28f401c500d8 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -32,6 +32,11 @@ #define TGL_PAT_WC REG_FIELD_PREP(TGL_MEM_TYPE_MASK, 1) #define TGL_PAT_UC REG_FIELD_PREP(TGL_MEM_TYPE_MASK, 0) +struct xe_pat_ops { + void (*program_graphics)(struct xe_gt *gt, const u32 table[], int n_entries); + void (*program_media)(struct xe_gt *gt, const u32 table[], int n_entries); +}; + static const u32 tgl_pat_table[] = { [0] = TGL_PAT_WB, [1] = TGL_PAT_WC, @@ -80,24 +85,37 @@ static void program_pat_mcr(struct xe_gt *gt, const u32 table[], int n_entries) } } -void xe_pat_init(struct xe_gt *gt) -{ - struct xe_device *xe = gt_to_xe(gt); +static const struct xe_pat_ops tgl_pat_ops = { + .program_graphics = program_pat, +}; + +static const struct xe_pat_ops dg2_pat_ops = { + .program_graphics = program_pat_mcr, +}; + +/* + * SAMedia register offsets are adjusted by the write methods and they target + * registers that are not MCR, while for normal GT they are MCR + */ +static const struct xe_pat_ops mtl_pat_ops = { + .program_graphics = program_pat, + .program_media = program_pat_mcr, +}; +void xe_pat_init_early(struct xe_device *xe) +{ if (xe->info.platform == XE_METEORLAKE) { - /* - * SAMedia register offsets are adjusted by the write methods - * and they target registers that are not MCR, while for normal - * GT they are MCR - */ - if (xe_gt_is_media_type(gt)) - program_pat(gt, mtl_pat_table, ARRAY_SIZE(mtl_pat_table)); - else - program_pat_mcr(gt, mtl_pat_table, ARRAY_SIZE(mtl_pat_table)); + xe->pat.ops = &mtl_pat_ops; + xe->pat.table = mtl_pat_table; + xe->pat.n_entries = ARRAY_SIZE(mtl_pat_table); } else if (xe->info.platform == XE_PVC || xe->info.platform == XE_DG2) { - program_pat_mcr(gt, pvc_pat_table, ARRAY_SIZE(pvc_pat_table)); + xe->pat.ops = &dg2_pat_ops; + xe->pat.table = pvc_pat_table; + xe->pat.n_entries = ARRAY_SIZE(pvc_pat_table); } else if (GRAPHICS_VERx100(xe) <= 1210) { - program_pat(gt, tgl_pat_table, ARRAY_SIZE(tgl_pat_table)); + xe->pat.ops = &tgl_pat_ops; + xe->pat.table = tgl_pat_table; + xe->pat.n_entries = ARRAY_SIZE(tgl_pat_table); } else { /* * Going forward we expect to need new PAT settings for most @@ -111,3 +129,16 @@ void xe_pat_init(struct xe_gt *gt) GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100); } } + +void xe_pat_init(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + if (!xe->pat.ops) + return; + + if (xe_gt_is_media_type(gt)) + xe->pat.ops->program_media(gt, xe->pat.table, xe->pat.n_entries); + else + xe->pat.ops->program_graphics(gt, xe->pat.table, xe->pat.n_entries); +} diff --git a/drivers/gpu/drm/xe/xe_pat.h b/drivers/gpu/drm/xe/xe_pat.h index 659de4008131..168e80e63809 100644 --- a/drivers/gpu/drm/xe/xe_pat.h +++ b/drivers/gpu/drm/xe/xe_pat.h @@ -7,7 +7,18 @@ #define _XE_PAT_H_ struct xe_gt; +struct xe_device; +/** + * xe_pat_init_early - SW initialization, setting up data based on device + * @xe: xe device + */ +void xe_pat_init_early(struct xe_device *xe); + +/** + * xe_pat_init - Program HW PAT table + * @gt: GT structure + */ void xe_pat_init(struct xe_gt *gt); #endif -- cgit v1.2.3 From 194bdb859950a4223305ee766a3b9d90c398d158 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 27 Sep 2023 12:38:57 -0700 Subject: drm/xe/dg2: Fix using wrong PAT table DG2 should use the MCR variant to program the PAT registers, like PVC, but shouldn't use the same table as PVC. Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230927193902.2849159-7-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pat.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index 28f401c500d8..a4bebdedbbd9 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -108,10 +108,18 @@ void xe_pat_init_early(struct xe_device *xe) xe->pat.ops = &mtl_pat_ops; xe->pat.table = mtl_pat_table; xe->pat.n_entries = ARRAY_SIZE(mtl_pat_table); - } else if (xe->info.platform == XE_PVC || xe->info.platform == XE_DG2) { + } else if (xe->info.platform == XE_PVC) { xe->pat.ops = &dg2_pat_ops; xe->pat.table = pvc_pat_table; xe->pat.n_entries = ARRAY_SIZE(pvc_pat_table); + } else if (xe->info.platform == XE_DG2) { + /* + * Table is the same as previous platforms, but programming + * method has changed. + */ + xe->pat.ops = &dg2_pat_ops; + xe->pat.table = tgl_pat_table; + xe->pat.n_entries = ARRAY_SIZE(tgl_pat_table); } else if (GRAPHICS_VERx100(xe) <= 1210) { xe->pat.ops = &tgl_pat_ops; xe->pat.table = tgl_pat_table; -- cgit v1.2.3 From 451028644775a5e07aaab3f147fda583e7054de6 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 27 Sep 2023 12:38:58 -0700 Subject: drm/xe/pat: Prefer the arch/IP names Both DG2 and PVC are derived from XeHP, but DG2 should not really re-use something introduced by PVC, so it's odd to have DG2 re-using the PVC programming for PAT. Let's prefer using the architecture and/or IP names. Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230927193902.2849159-8-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pat.c | 112 ++++++++++++++++++++++---------------------- 1 file changed, 56 insertions(+), 56 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index a4bebdedbbd9..4ab1b3dc4d5d 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -14,57 +14,57 @@ 0x4800, 0x4804, \ 0x4848, 0x484c) -#define MTL_L4_POLICY_MASK REG_GENMASK(3, 2) -#define MTL_PAT_3_UC REG_FIELD_PREP(MTL_L4_POLICY_MASK, 3) -#define MTL_PAT_1_WT REG_FIELD_PREP(MTL_L4_POLICY_MASK, 1) -#define MTL_PAT_0_WB REG_FIELD_PREP(MTL_L4_POLICY_MASK, 0) -#define MTL_INDEX_COH_MODE_MASK REG_GENMASK(1, 0) -#define MTL_3_COH_2W REG_FIELD_PREP(MTL_INDEX_COH_MODE_MASK, 3) -#define MTL_2_COH_1W REG_FIELD_PREP(MTL_INDEX_COH_MODE_MASK, 2) -#define MTL_0_COH_NON REG_FIELD_PREP(MTL_INDEX_COH_MODE_MASK, 0) - -#define PVC_CLOS_LEVEL_MASK REG_GENMASK(3, 2) -#define PVC_PAT_CLOS(x) REG_FIELD_PREP(PVC_CLOS_LEVEL_MASK, x) - -#define TGL_MEM_TYPE_MASK REG_GENMASK(1, 0) -#define TGL_PAT_WB REG_FIELD_PREP(TGL_MEM_TYPE_MASK, 3) -#define TGL_PAT_WT REG_FIELD_PREP(TGL_MEM_TYPE_MASK, 2) -#define TGL_PAT_WC REG_FIELD_PREP(TGL_MEM_TYPE_MASK, 1) -#define TGL_PAT_UC REG_FIELD_PREP(TGL_MEM_TYPE_MASK, 0) +#define XELPG_L4_POLICY_MASK REG_GENMASK(3, 2) +#define XELPG_PAT_3_UC REG_FIELD_PREP(XELPG_L4_POLICY_MASK, 3) +#define XELPG_PAT_1_WT REG_FIELD_PREP(XELPG_L4_POLICY_MASK, 1) +#define XELPG_PAT_0_WB REG_FIELD_PREP(XELPG_L4_POLICY_MASK, 0) +#define XELPG_INDEX_COH_MODE_MASK REG_GENMASK(1, 0) +#define XELPG_3_COH_2W REG_FIELD_PREP(XELPG_INDEX_COH_MODE_MASK, 3) +#define XELPG_2_COH_1W REG_FIELD_PREP(XELPG_INDEX_COH_MODE_MASK, 2) +#define XELPG_0_COH_NON REG_FIELD_PREP(XELPG_INDEX_COH_MODE_MASK, 0) + +#define XEHPC_CLOS_LEVEL_MASK REG_GENMASK(3, 2) +#define XEHPC_PAT_CLOS(x) REG_FIELD_PREP(XEHPC_CLOS_LEVEL_MASK, x) + +#define XELP_MEM_TYPE_MASK REG_GENMASK(1, 0) +#define XELP_PAT_WB REG_FIELD_PREP(XELP_MEM_TYPE_MASK, 3) +#define XELP_PAT_WT REG_FIELD_PREP(XELP_MEM_TYPE_MASK, 2) +#define XELP_PAT_WC REG_FIELD_PREP(XELP_MEM_TYPE_MASK, 1) +#define XELP_PAT_UC REG_FIELD_PREP(XELP_MEM_TYPE_MASK, 0) struct xe_pat_ops { void (*program_graphics)(struct xe_gt *gt, const u32 table[], int n_entries); void (*program_media)(struct xe_gt *gt, const u32 table[], int n_entries); }; -static const u32 tgl_pat_table[] = { - [0] = TGL_PAT_WB, - [1] = TGL_PAT_WC, - [2] = TGL_PAT_WT, - [3] = TGL_PAT_UC, - [4] = TGL_PAT_WB, - [5] = TGL_PAT_WB, - [6] = TGL_PAT_WB, - [7] = TGL_PAT_WB, +static const u32 xelp_pat_table[] = { + [0] = XELP_PAT_WB, + [1] = XELP_PAT_WC, + [2] = XELP_PAT_WT, + [3] = XELP_PAT_UC, + [4] = XELP_PAT_WB, + [5] = XELP_PAT_WB, + [6] = XELP_PAT_WB, + [7] = XELP_PAT_WB, }; -static const u32 pvc_pat_table[] = { - [0] = TGL_PAT_UC, - [1] = TGL_PAT_WC, - [2] = TGL_PAT_WT, - [3] = TGL_PAT_WB, - [4] = PVC_PAT_CLOS(1) | TGL_PAT_WT, - [5] = PVC_PAT_CLOS(1) | TGL_PAT_WB, - [6] = PVC_PAT_CLOS(2) | TGL_PAT_WT, - [7] = PVC_PAT_CLOS(2) | TGL_PAT_WB, +static const u32 xehpc_pat_table[] = { + [0] = XELP_PAT_UC, + [1] = XELP_PAT_WC, + [2] = XELP_PAT_WT, + [3] = XELP_PAT_WB, + [4] = XEHPC_PAT_CLOS(1) | XELP_PAT_WT, + [5] = XEHPC_PAT_CLOS(1) | XELP_PAT_WB, + [6] = XEHPC_PAT_CLOS(2) | XELP_PAT_WT, + [7] = XEHPC_PAT_CLOS(2) | XELP_PAT_WB, }; -static const u32 mtl_pat_table[] = { - [0] = MTL_PAT_0_WB, - [1] = MTL_PAT_1_WT, - [2] = MTL_PAT_3_UC, - [3] = MTL_PAT_0_WB | MTL_2_COH_1W, - [4] = MTL_PAT_0_WB | MTL_3_COH_2W, +static const u32 xelpg_pat_table[] = { + [0] = XELPG_PAT_0_WB, + [1] = XELPG_PAT_1_WT, + [2] = XELPG_PAT_3_UC, + [3] = XELPG_PAT_0_WB | XELPG_2_COH_1W, + [4] = XELPG_PAT_0_WB | XELPG_3_COH_2W, }; static void program_pat(struct xe_gt *gt, const u32 table[], int n_entries) @@ -85,11 +85,11 @@ static void program_pat_mcr(struct xe_gt *gt, const u32 table[], int n_entries) } } -static const struct xe_pat_ops tgl_pat_ops = { +static const struct xe_pat_ops xelp_pat_ops = { .program_graphics = program_pat, }; -static const struct xe_pat_ops dg2_pat_ops = { +static const struct xe_pat_ops xehp_pat_ops = { .program_graphics = program_pat_mcr, }; @@ -97,7 +97,7 @@ static const struct xe_pat_ops dg2_pat_ops = { * SAMedia register offsets are adjusted by the write methods and they target * registers that are not MCR, while for normal GT they are MCR */ -static const struct xe_pat_ops mtl_pat_ops = { +static const struct xe_pat_ops xelpg_pat_ops = { .program_graphics = program_pat, .program_media = program_pat_mcr, }; @@ -105,25 +105,25 @@ static const struct xe_pat_ops mtl_pat_ops = { void xe_pat_init_early(struct xe_device *xe) { if (xe->info.platform == XE_METEORLAKE) { - xe->pat.ops = &mtl_pat_ops; - xe->pat.table = mtl_pat_table; - xe->pat.n_entries = ARRAY_SIZE(mtl_pat_table); + xe->pat.ops = &xelpg_pat_ops; + xe->pat.table = xelpg_pat_table; + xe->pat.n_entries = ARRAY_SIZE(xelpg_pat_table); } else if (xe->info.platform == XE_PVC) { - xe->pat.ops = &dg2_pat_ops; - xe->pat.table = pvc_pat_table; - xe->pat.n_entries = ARRAY_SIZE(pvc_pat_table); + xe->pat.ops = &xehp_pat_ops; + xe->pat.table = xehpc_pat_table; + xe->pat.n_entries = ARRAY_SIZE(xehpc_pat_table); } else if (xe->info.platform == XE_DG2) { /* * Table is the same as previous platforms, but programming * method has changed. */ - xe->pat.ops = &dg2_pat_ops; - xe->pat.table = tgl_pat_table; - xe->pat.n_entries = ARRAY_SIZE(tgl_pat_table); + xe->pat.ops = &xehp_pat_ops; + xe->pat.table = xelp_pat_table; + xe->pat.n_entries = ARRAY_SIZE(xelp_pat_table); } else if (GRAPHICS_VERx100(xe) <= 1210) { - xe->pat.ops = &tgl_pat_ops; - xe->pat.table = tgl_pat_table; - xe->pat.n_entries = ARRAY_SIZE(tgl_pat_table); + xe->pat.ops = &xelp_pat_ops; + xe->pat.table = xelp_pat_table; + xe->pat.n_entries = ARRAY_SIZE(xelp_pat_table); } else { /* * Going forward we expect to need new PAT settings for most -- cgit v1.2.3 From 0d68247efcdbf7791122071323719310207354f3 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 27 Sep 2023 12:38:59 -0700 Subject: drm/xe/pat: Keep track of relevant indexes Some of the PAT entries are relevant for internal driver use, which varies per platform. Let the PAT early initialization set what they should point to so the rest of the driver can use them where needed. Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230927193902.2849159-9-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device_types.h | 2 ++ drivers/gpu/drm/xe/xe_pat.c | 12 ++++++++++++ drivers/gpu/drm/xe/xe_pt_types.h | 1 + 3 files changed, 15 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index c4920631677b..1ee8410ec3b1 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -15,6 +15,7 @@ #include "xe_devcoredump_types.h" #include "xe_gt_types.h" #include "xe_platform_types.h" +#include "xe_pt_types.h" #include "xe_pmu.h" #include "xe_step_types.h" @@ -321,6 +322,7 @@ struct xe_device { const u32 *table; /** Number of PAT entries */ int n_entries; + u32 idx[__XE_CACHE_LEVEL_COUNT]; } pat; /** @d3cold: Encapsulate d3cold related stuff */ diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index 4ab1b3dc4d5d..4668ca3932c5 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -108,10 +108,16 @@ void xe_pat_init_early(struct xe_device *xe) xe->pat.ops = &xelpg_pat_ops; xe->pat.table = xelpg_pat_table; xe->pat.n_entries = ARRAY_SIZE(xelpg_pat_table); + xe->pat.idx[XE_CACHE_NONE] = 2; + xe->pat.idx[XE_CACHE_WT] = 1; + xe->pat.idx[XE_CACHE_WB] = 3; } else if (xe->info.platform == XE_PVC) { xe->pat.ops = &xehp_pat_ops; xe->pat.table = xehpc_pat_table; xe->pat.n_entries = ARRAY_SIZE(xehpc_pat_table); + xe->pat.idx[XE_CACHE_NONE] = 0; + xe->pat.idx[XE_CACHE_WT] = 2; + xe->pat.idx[XE_CACHE_WB] = 3; } else if (xe->info.platform == XE_DG2) { /* * Table is the same as previous platforms, but programming @@ -120,10 +126,16 @@ void xe_pat_init_early(struct xe_device *xe) xe->pat.ops = &xehp_pat_ops; xe->pat.table = xelp_pat_table; xe->pat.n_entries = ARRAY_SIZE(xelp_pat_table); + xe->pat.idx[XE_CACHE_NONE] = 3; + xe->pat.idx[XE_CACHE_WT] = 2; + xe->pat.idx[XE_CACHE_WB] = 0; } else if (GRAPHICS_VERx100(xe) <= 1210) { xe->pat.ops = &xelp_pat_ops; xe->pat.table = xelp_pat_table; xe->pat.n_entries = ARRAY_SIZE(xelp_pat_table); + xe->pat.idx[XE_CACHE_NONE] = 3; + xe->pat.idx[XE_CACHE_WT] = 2; + xe->pat.idx[XE_CACHE_WB] = 0; } else { /* * Going forward we expect to need new PAT settings for most diff --git a/drivers/gpu/drm/xe/xe_pt_types.h b/drivers/gpu/drm/xe/xe_pt_types.h index 64e3921a0f46..bf5000499251 100644 --- a/drivers/gpu/drm/xe/xe_pt_types.h +++ b/drivers/gpu/drm/xe/xe_pt_types.h @@ -17,6 +17,7 @@ enum xe_cache_level { XE_CACHE_NONE, XE_CACHE_WT, XE_CACHE_WB, + __XE_CACHE_LEVEL_COUNT, }; #define XE_VM_MAX_LEVEL 4 -- cgit v1.2.3 From fcd75139cd3c76467c8495c750fd6e27787f7e37 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 27 Sep 2023 12:39:00 -0700 Subject: drm/xe: Use pat_index to encode pde/pte Change the xelp_pte_encode() and xelp_pde_encode() functions to use the platform-dependent pat_index. The same function can be used for all platforms as they only need to encode the pat_index bits in the same pte/pde layout. For platforms that don't have the most significant bit, as long as they don't return a bogus index they should be fine. v2: Use the same logic to encode pde as it's compatible with previous logic, it's more future proof and also fixes the cache setting for PVC (Matt Roper) Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230927193902.2849159-10-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.h | 8 +++--- drivers/gpu/drm/xe/xe_migrate.c | 6 +++-- drivers/gpu/drm/xe/xe_pt_types.h | 4 ++- drivers/gpu/drm/xe/xe_vm.c | 56 ++++++++++++++++++++++++++-------------- 4 files changed, 47 insertions(+), 27 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 23f1b9e74e71..5666fd6d7f11 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -48,10 +48,10 @@ #define XE_BO_INTERNAL_TEST BIT(30) #define XE_BO_INTERNAL_64K BIT(31) -#define PPAT_UNCACHED GENMASK_ULL(4, 3) -#define PPAT_CACHED_PDE 0 -#define PPAT_CACHED BIT_ULL(7) -#define PPAT_DISPLAY_ELLC BIT_ULL(4) +#define XELPG_PPGTT_PTE_PAT3 BIT_ULL(62) +#define XE_PPGTT_PTE_PAT2 BIT_ULL(7) +#define XE_PPGTT_PTE_PAT1 BIT_ULL(4) +#define XE_PPGTT_PTE_PAT0 BIT_ULL(3) #define XE_PTE_SHIFT 12 #define XE_PAGE_SIZE (1 << XE_PTE_SHIFT) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index cd4dbbf6c383..77a2468ca85c 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -261,7 +261,8 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, level = 2; ofs = map_ofs + XE_PAGE_SIZE * level + 256 * 8; - flags = vm->pt_ops->pte_encode_addr(0, XE_CACHE_WB, level, true, 0); + flags = vm->pt_ops->pte_encode_addr(xe, 0, XE_CACHE_WB, level, + true, 0); /* * Use 1GB pages, it shouldn't matter the physical amount of @@ -498,7 +499,8 @@ static void emit_pte(struct xe_migrate *m, devmem = true; } - addr = m->q->vm->pt_ops->pte_encode_addr(addr, XE_CACHE_WB, + addr = m->q->vm->pt_ops->pte_encode_addr(m->tile->xe, + addr, XE_CACHE_WB, 0, devmem, flags); bb->cs[bb->len++] = lower_32_bits(addr); bb->cs[bb->len++] = upper_32_bits(addr); diff --git a/drivers/gpu/drm/xe/xe_pt_types.h b/drivers/gpu/drm/xe/xe_pt_types.h index bf5000499251..bd6645295fe6 100644 --- a/drivers/gpu/drm/xe/xe_pt_types.h +++ b/drivers/gpu/drm/xe/xe_pt_types.h @@ -11,6 +11,7 @@ #include "xe_pt_walk.h" struct xe_bo; +struct xe_device; struct xe_vma; enum xe_cache_level { @@ -40,7 +41,8 @@ struct xe_pt_ops { enum xe_cache_level cache, u32 pt_level); u64 (*pte_encode_vma)(u64 pte, struct xe_vma *vma, enum xe_cache_level cache, u32 pt_level); - u64 (*pte_encode_addr)(u64 addr, enum xe_cache_level cache, + u64 (*pte_encode_addr)(struct xe_device *xe, u64 addr, + enum xe_cache_level cache, u32 pt_level, bool devmem, u64 flags); u64 (*pde_encode_bo)(struct xe_bo *bo, u64 bo_offset, const enum xe_cache_level cache); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 1d3569097e5f..6ab115df9c4e 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1210,27 +1210,38 @@ static struct drm_gpuvm_ops gpuvm_ops = { .vm_free = xe_vm_free, }; -static u64 pde_encode_cache(enum xe_cache_level cache) +static u64 pde_encode_cache(struct xe_device *xe, enum xe_cache_level cache) { - /* FIXME: I don't think the PPAT handling is correct for MTL */ + u32 pat_index = xe->pat.idx[cache]; + u64 pte = 0; - if (cache != XE_CACHE_NONE) - return PPAT_CACHED_PDE; + if (pat_index & BIT(0)) + pte |= XE_PPGTT_PTE_PAT0; - return PPAT_UNCACHED; + if (pat_index & BIT(1)) + pte |= XE_PPGTT_PTE_PAT1; + + return pte; } -static u64 pte_encode_cache(enum xe_cache_level cache) +static u64 pte_encode_cache(struct xe_device *xe, enum xe_cache_level cache) { - /* FIXME: I don't think the PPAT handling is correct for MTL */ - switch (cache) { - case XE_CACHE_NONE: - return PPAT_UNCACHED; - case XE_CACHE_WT: - return PPAT_DISPLAY_ELLC; - default: - return PPAT_CACHED; - } + u32 pat_index = xe->pat.idx[cache]; + u64 pte = 0; + + if (pat_index & BIT(0)) + pte |= XE_PPGTT_PTE_PAT0; + + if (pat_index & BIT(1)) + pte |= XE_PPGTT_PTE_PAT1; + + if (pat_index & BIT(2)) + pte |= XE_PPGTT_PTE_PAT2; + + if (pat_index & BIT(3)) + pte |= XELPG_PPGTT_PTE_PAT3; + + return pte; } static u64 pte_encode_ps(u32 pt_level) @@ -1248,11 +1259,12 @@ static u64 pte_encode_ps(u32 pt_level) static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset, const enum xe_cache_level cache) { + struct xe_device *xe = xe_bo_device(bo); u64 pde; pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); pde |= XE_PAGE_PRESENT | XE_PAGE_RW; - pde |= pde_encode_cache(cache); + pde |= pde_encode_cache(xe, cache); return pde; } @@ -1260,11 +1272,12 @@ static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset, static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, enum xe_cache_level cache, u32 pt_level) { + struct xe_device *xe = xe_bo_device(bo); u64 pte; pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); pte |= XE_PAGE_PRESENT | XE_PAGE_RW; - pte |= pte_encode_cache(cache); + pte |= pte_encode_cache(xe, cache); pte |= pte_encode_ps(pt_level); if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) @@ -1276,12 +1289,14 @@ static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma, enum xe_cache_level cache, u32 pt_level) { + struct xe_device *xe = xe_vma_vm(vma)->xe; + pte |= XE_PAGE_PRESENT; if (likely(!xe_vma_read_only(vma))) pte |= XE_PAGE_RW; - pte |= pte_encode_cache(cache); + pte |= pte_encode_cache(xe, cache); pte |= pte_encode_ps(pt_level); if (unlikely(xe_vma_is_null(vma))) @@ -1290,7 +1305,8 @@ static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma, return pte; } -static u64 xelp_pte_encode_addr(u64 addr, enum xe_cache_level cache, +static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr, + enum xe_cache_level cache, u32 pt_level, bool devmem, u64 flags) { u64 pte; @@ -1300,7 +1316,7 @@ static u64 xelp_pte_encode_addr(u64 addr, enum xe_cache_level cache, pte = addr; pte |= XE_PAGE_PRESENT | XE_PAGE_RW; - pte |= pte_encode_cache(cache); + pte |= pte_encode_cache(xe, cache); pte |= pte_encode_ps(pt_level); if (devmem) -- cgit v1.2.3 From 1464f56b47d8db63ad95dad3fd8845ec412dc8d5 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 27 Sep 2023 12:39:01 -0700 Subject: drm/xe: Use vfunc for ggtt pte encoding Use 2 different functions for encoding the ggtt's pte, assigning them during initialization. Main difference is that before Xe-LPG, the pte didn't have the cache bits. v2: Re-use xelp_ggtt_pte_encode_bo() for the common part with xelpg_ggtt_pte_encode_bo() (Matt Roper) Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230927193902.2849159-11-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ggtt.c | 53 ++++++++++++++++++++++++++++---------- drivers/gpu/drm/xe/xe_ggtt.h | 1 - drivers/gpu/drm/xe/xe_ggtt_types.h | 9 +++++++ 3 files changed, 49 insertions(+), 14 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index ba34b8784572..ec7bbb1dc295 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -20,16 +20,15 @@ #include "xe_mmio.h" #include "xe_wopcm.h" -/* FIXME: Common file, preferably auto-gen */ -#define MTL_GGTT_PTE_PAT0 BIT_ULL(52) -#define MTL_GGTT_PTE_PAT1 BIT_ULL(53) +#define XELPG_GGTT_PTE_PAT0 BIT_ULL(52) +#define XELPG_GGTT_PTE_PAT1 BIT_ULL(53) /* GuC addresses above GUC_GGTT_TOP also don't map through the GTT */ #define GUC_GGTT_TOP 0xFEE00000 -u64 xe_ggtt_pte_encode(struct xe_bo *bo, u64 bo_offset) +static u64 xelp_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, + enum xe_cache_level cache) { - struct xe_device *xe = xe_bo_device(bo); u64 pte; pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); @@ -38,11 +37,25 @@ u64 xe_ggtt_pte_encode(struct xe_bo *bo, u64 bo_offset) if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) pte |= XE_GGTT_PTE_DM; - /* FIXME: vfunc + pass in caching rules */ - if (xe->info.platform == XE_METEORLAKE) { - pte |= MTL_GGTT_PTE_PAT0; - pte |= MTL_GGTT_PTE_PAT1; - } + return pte; +} + +static u64 xelpg_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, + enum xe_cache_level cache) +{ + struct xe_device *xe = xe_bo_device(bo); + u32 pat_index = xe->pat.idx[cache]; + u64 pte; + + pte = xelp_ggtt_pte_encode_bo(bo, bo_offset, cache); + + xe_assert(xe, pat_index <= 3); + + if (pat_index & BIT(0)) + pte |= XELPG_GGTT_PTE_PAT0; + + if (pat_index & BIT(1)) + pte |= XELPG_GGTT_PTE_PAT1; return pte; } @@ -72,7 +85,8 @@ static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size) xe_tile_assert(ggtt->tile, start < end); if (ggtt->scratch) - scratch_pte = xe_ggtt_pte_encode(ggtt->scratch, 0); + scratch_pte = ggtt->pt_ops->pte_encode_bo(ggtt->scratch, 0, + XE_CACHE_WB); else scratch_pte = 0; @@ -102,6 +116,14 @@ static void primelockdep(struct xe_ggtt *ggtt) fs_reclaim_release(GFP_KERNEL); } +static const struct xe_ggtt_pt_ops xelp_pt_ops = { + .pte_encode_bo = xelp_ggtt_pte_encode_bo, +}; + +static const struct xe_ggtt_pt_ops xelpg_pt_ops = { + .pte_encode_bo = xelpg_ggtt_pte_encode_bo, +}; + int xe_ggtt_init_noalloc(struct xe_ggtt *ggtt) { struct xe_device *xe = tile_to_xe(ggtt->tile); @@ -146,6 +168,11 @@ int xe_ggtt_init_noalloc(struct xe_ggtt *ggtt) if (ggtt->size > GUC_GGTT_TOP) ggtt->size = GUC_GGTT_TOP; + if (GRAPHICS_VERx100(xe) >= 1270) + ggtt->pt_ops = &xelpg_pt_ops; + else + ggtt->pt_ops = &xelp_pt_ops; + drm_mm_init(&ggtt->mm, xe_wopcm_size(xe), ggtt->size - xe_wopcm_size(xe)); mutex_init(&ggtt->lock); @@ -260,7 +287,7 @@ void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix) { u64 addr, scratch_pte; - scratch_pte = xe_ggtt_pte_encode(ggtt->scratch, 0); + scratch_pte = ggtt->pt_ops->pte_encode_bo(ggtt->scratch, 0, XE_CACHE_WB); printk("%sGlobal GTT:", prefix); for (addr = 0; addr < ggtt->size; addr += XE_PAGE_SIZE) { @@ -301,7 +328,7 @@ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) u64 offset, pte; for (offset = 0; offset < bo->size; offset += XE_PAGE_SIZE) { - pte = xe_ggtt_pte_encode(bo, offset); + pte = ggtt->pt_ops->pte_encode_bo(bo, offset, XE_CACHE_WB); xe_ggtt_set_pte(ggtt, start + offset, pte); } diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h index 8e7360926bea..04bb26b0938e 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.h +++ b/drivers/gpu/drm/xe/xe_ggtt.h @@ -10,7 +10,6 @@ struct drm_printer; -u64 xe_ggtt_pte_encode(struct xe_bo *bo, u64 bo_offset); void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte); void xe_ggtt_invalidate(struct xe_ggtt *ggtt); int xe_ggtt_init_noalloc(struct xe_ggtt *ggtt); diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h index d34b3e733945..486016ea5b67 100644 --- a/drivers/gpu/drm/xe/xe_ggtt_types.h +++ b/drivers/gpu/drm/xe/xe_ggtt_types.h @@ -8,9 +8,16 @@ #include +#include "xe_pt_types.h" + struct xe_bo; struct xe_gt; +struct xe_ggtt_pt_ops { + u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset, + enum xe_cache_level cache); +}; + struct xe_ggtt { struct xe_tile *tile; @@ -25,6 +32,8 @@ struct xe_ggtt { u64 __iomem *gsm; + const struct xe_ggtt_pt_ops *pt_ops; + struct drm_mm mm; }; -- cgit v1.2.3 From 5349bb76d62048e73f6e4a863b40a309c62dc47f Mon Sep 17 00:00:00 2001 From: Ohad Sharabi Date: Thu, 28 Sep 2023 16:56:21 +0300 Subject: drm/xe: do not register to PM if GuC is disabled When working without GuC (i.e. working with execlists), the flow attempts to perform suspend operation which is failing due to a lack of support without GuC. If PM ops are not supported without GuC we may as well avoid PM registration rather than returning errors from various PM flows. Signed-off-by: Ohad Sharabi Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt.c | 4 ---- drivers/gpu/drm/xe/xe_pm.c | 4 ++++ 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 1aa44d4f9ac1..68cd9a7ee087 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -641,10 +641,6 @@ int xe_gt_suspend(struct xe_gt *gt) { int err; - /* For now suspend/resume is only allowed with GuC */ - if (!xe_device_uc_enabled(gt_to_xe(gt))) - return -ENODEV; - xe_gt_sanitize(gt); xe_device_mem_access_get(gt_to_xe(gt)); diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index 2c2745f86223..93a7658da324 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -156,6 +156,10 @@ void xe_pm_init(struct xe_device *xe) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + /* For now suspend/resume is only allowed with GuC */ + if (!xe_device_uc_enabled(xe)) + return; + drmm_mutex_init(&xe->drm, &xe->d3cold.lock); xe->d3cold.capable = xe_pm_pci_d3cold_capable(pdev); -- cgit v1.2.3 From 28b1d9155c3c1651a6e184e1286cebb63ec6b51c Mon Sep 17 00:00:00 2001 From: Bommithi Sakeena Date: Wed, 27 Sep 2023 16:50:11 +0000 Subject: drm/xe: Ensure mutex are destroyed Add missing mutex_destroy calls to fini functions or convert to drmm_mutex_init where fini function is not available. Cc: Matthew Brost Signed-off-by: Bommithi Sakeena Reviewed-by: Niranjana Vishwanathapura Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_ct.c | 2 +- drivers/gpu/drm/xe/xe_guc_pc.c | 1 + drivers/gpu/drm/xe/xe_guc_submit.c | 1 + drivers/gpu/drm/xe/xe_pcode.c | 4 +++- 4 files changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 2046bd269bbd..8b686c8b3339 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -137,7 +137,7 @@ int xe_guc_ct_init(struct xe_guc_ct *ct) xe_assert(xe, !(guc_ct_size() % PAGE_SIZE)); - mutex_init(&ct->lock); + drmm_mutex_init(&xe->drm, &ct->lock); spin_lock_init(&ct->fast_lock); xa_init(&ct->fence_lookup); INIT_WORK(&ct->g2h_worker, g2h_worker_func); diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 8a4d299d6cb0..d9375d1d582f 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -890,6 +890,7 @@ void xe_guc_pc_fini(struct xe_guc_pc *pc) XE_WARN_ON(xe_guc_pc_stop(pc)); sysfs_remove_files(pc_to_gt(pc)->sysfs, pc_attrs); xe_bo_unpin_map_no_vm(pc->bo); + mutex_destroy(&pc->freq_lock); } /** diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 3e136b60196e..d0e60349fc5a 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -195,6 +195,7 @@ static void guc_submit_fini(struct drm_device *drm, void *arg) xa_destroy(&guc->submission_state.exec_queue_lookup); ida_destroy(&guc->submission_state.guc_ids); bitmap_free(guc->submission_state.guc_ids_bitmap); + mutex_destroy(&guc->submission_state.lock); } #define GUC_ID_MAX 65535 diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c index 7f1bf2297f51..4a240acf7625 100644 --- a/drivers/gpu/drm/xe/xe_pcode.c +++ b/drivers/gpu/drm/xe/xe_pcode.c @@ -8,6 +8,8 @@ #include #include +#include + #include "xe_gt.h" #include "xe_mmio.h" #include "xe_pcode_api.h" @@ -276,7 +278,7 @@ int xe_pcode_init(struct xe_gt *gt) */ int xe_pcode_probe(struct xe_gt *gt) { - mutex_init(>->pcode.lock); + drmm_mutex_init(>_to_xe(gt)->drm, >->pcode.lock); if (!IS_DGFX(gt_to_xe(gt))) return 0; -- cgit v1.2.3 From 909faaa66c5ec0d789b6620127329f2b17b01602 Mon Sep 17 00:00:00 2001 From: Bommithi Sakeena Date: Wed, 27 Sep 2023 16:50:12 +0000 Subject: drm/xe: Add a missing mutex_destroy to xe_ttm_vram_mgr Ensure that the mutex is destroyed at fini function. Cc: Maarten Lankhorst Signed-off-by: Bommithi Sakeena Reviewed-by: Niranjana Vishwanathapura Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ttm_vram_mgr.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c index 06a54c8bd46f..285791eb4a79 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c @@ -328,6 +328,8 @@ static void ttm_vram_mgr_fini(struct drm_device *dev, void *arg) ttm_resource_manager_cleanup(&mgr->manager); ttm_set_driver_manager(&xe->ttm, mgr->mem_type, NULL); + + mutex_destroy(&mgr->lock); } int __xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_ttm_vram_mgr *mgr, -- cgit v1.2.3 From 9be79251813d113f9157e92cd8b0eb8563253a09 Mon Sep 17 00:00:00 2001 From: Fei Yang Date: Wed, 27 Sep 2023 21:43:35 -0700 Subject: drm/xe: set PTE_AE for all platforms supporting it Atomic access is supported by PVC, and became a common feature for all platforms starting from Xe2. To enable that XE_VMA_ATOMIC_PTE_BIT needs to be set, then pte encode will eventually set PTE_AE for devmem. Signed-off-by: Fei Yang Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230928044335.1474903-2-fei.yang@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 6ab115df9c4e..2bb0a1669a96 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -901,7 +901,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, vma->tile_mask |= 0x1 << id; } - if (vm->xe->info.platform == XE_PVC) + if (GRAPHICS_VER(vm->xe) >= 20 || vm->xe->info.platform == XE_PVC) vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT; if (bo) { -- cgit v1.2.3 From e2e2d9633706f79e6efaa826cf72cbc12cf531f8 Mon Sep 17 00:00:00 2001 From: Fei Yang Date: Thu, 21 Sep 2023 15:05:00 -0700 Subject: drm/xe: timeout needs to be a signed value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In xe_wait_user_fence_ioctl, the timeout is currently defined as unsigned long. That could potentially pass a negative value to the schedule_timeout() call because nsecs_to_jiffies() returns an unsigned long which gets used as signed long. [ 187.732238] schedule_timeout: wrong timeout value fffffffffffffc18 [ 187.733180] CPU: 0 PID: 792 Comm: test_thread_dim Tainted: G U 6.4.0-xe #1 [ 187.734251] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007 [ 187.735019] Call Trace: [ 187.735373] [ 187.735687] dump_stack_lvl+0x92/0xb0 [ 187.736193] schedule_timeout+0x348/0x430 [ 187.736739] ? __might_fault+0x67/0xd0 [ 187.737255] ? check_chain_key+0x224/0x2d0 [ 187.737812] ? __pfx_schedule_timeout+0x10/0x10 [ 187.738429] ? __might_fault+0x6b/0xd0 [ 187.738946] ? __pfx_lock_release+0x10/0x10 [ 187.739512] ? __pfx_lock_release+0x10/0x10 [ 187.740080] wait_woken+0x86/0x100 [ 187.740556] xe_wait_user_fence_ioctl+0x34b/0xe00 [xe] [ 187.741281] ? __pfx_xe_wait_user_fence_ioctl+0x10/0x10 [xe] [ 187.742075] ? lock_acquire+0x169/0x3d0 [ 187.742601] ? check_chain_key+0x224/0x2d0 [ 187.743158] ? drm_dev_enter+0x9/0xe0 [drm] [ 187.743740] ? __pfx_woken_wake_function+0x10/0x10 [ 187.744388] ? drm_dev_exit+0x11/0x50 [drm] [ 187.744969] ? __pfx_lock_release+0x10/0x10 [ 187.745536] ? __might_fault+0x67/0xd0 [ 187.746052] ? check_chain_key+0x224/0x2d0 [ 187.746610] drm_ioctl_kernel+0x172/0x250 [drm] [ 187.747242] ? __pfx_xe_wait_user_fence_ioctl+0x10/0x10 [xe] [ 187.748037] ? __pfx_drm_ioctl_kernel+0x10/0x10 [drm] [ 187.748729] ? __pfx_xe_wait_user_fence_ioctl+0x10/0x10 [xe] [ 187.749524] ? __pfx_xe_wait_user_fence_ioctl+0x10/0x10 [xe] [ 187.750319] drm_ioctl+0x35e/0x620 [drm] [ 187.750871] ? __pfx_drm_ioctl+0x10/0x10 [drm] [ 187.751495] ? restore_fpregs_from_fpstate+0x99/0x140 [ 187.752172] ? __pfx_restore_fpregs_from_fpstate+0x10/0x10 [ 187.752901] ? mark_held_locks+0x24/0x90 [ 187.753438] __x64_sys_ioctl+0xb4/0xf0 [ 187.753954] do_syscall_64+0x3f/0x90 [ 187.754450] entry_SYSCALL_64_after_hwframe+0x72/0xdc [ 187.755127] RIP: 0033:0x7f4e6651aaff [ 187.755623] Code: 00 48 89 44 24 18 31 c0 48 8d 44 24 60 c7 04 24 10 00 00 00 48 89 44 24 08 48 8d 44 24 20 48 89 44 24 10 b8 10 00 00 00 0f 05 <41> 89 c0 3d 00 f0 ff ff 77 1f 48 8b 44 24 18 64 48 2b 04 25 28 00 [ 187.757995] RSP: 002b:00007fff05f37a50 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [ 187.758995] RAX: ffffffffffffffda RBX: 000055eca47c8130 RCX: 00007f4e6651aaff [ 187.759935] RDX: 00007fff05f37b60 RSI: 00000000c050644b RDI: 0000000000000004 [ 187.760874] RBP: 0000000000000017 R08: 0000000000000017 R09: 7fffffffffffffff [ 187.761814] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 [ 187.762753] R13: 0000000000000000 R14: 0000000000000000 R15: 00007f4e65d19ce0 [ 187.763694] Fixes: 5572a0046857 ("drm/xe: Use nanoseconds instead of jiffies in uapi for user fence") Signed-off-by: Fei Yang Cc: Andi Shyti Cc: Zbigniew Kempczyński Reviewed-by: Andi Shyti Link: https://lore.kernel.org/r/20230921220500.994558-2-fei.yang@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_wait_user_fence.c | 55 ++++++++++++++++++++++----------- 1 file changed, 37 insertions(+), 18 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c index 761eed3a022f..3ac4cd24d5b4 100644 --- a/drivers/gpu/drm/xe/xe_wait_user_fence.c +++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c @@ -85,17 +85,45 @@ static int check_hw_engines(struct xe_device *xe, DRM_XE_UFENCE_WAIT_VM_ERROR) #define MAX_OP DRM_XE_UFENCE_WAIT_LTE -static unsigned long to_jiffies_timeout(struct drm_xe_wait_user_fence *args) +static long to_jiffies_timeout(struct xe_device *xe, + struct drm_xe_wait_user_fence *args) { - unsigned long timeout; + unsigned long long t; + long timeout; - if (args->flags & DRM_XE_UFENCE_WAIT_ABSTIME) - return drm_timeout_abs_to_jiffies(args->timeout); + /* + * For negative timeout we want to wait "forever" by setting + * MAX_SCHEDULE_TIMEOUT. But we have to assign this value also + * to args->timeout to avoid being zeroed on the signal delivery + * (see arithmetics after wait). + */ + if (args->timeout < 0) { + args->timeout = MAX_SCHEDULE_TIMEOUT; + return MAX_SCHEDULE_TIMEOUT; + } - if (args->timeout == MAX_SCHEDULE_TIMEOUT || args->timeout == 0) - return args->timeout; + if (args->timeout == 0) + return 0; - timeout = nsecs_to_jiffies(args->timeout); + /* + * Save the timeout to an u64 variable because nsecs_to_jiffies + * might return a value that overflows s32 variable. + */ + if (args->flags & DRM_XE_UFENCE_WAIT_ABSTIME) + t = drm_timeout_abs_to_jiffies(args->timeout); + else + t = nsecs_to_jiffies(args->timeout); + + /* + * Anything greater then MAX_SCHEDULE_TIMEOUT is meaningless, + * also we don't want to cap it at MAX_SCHEDULE_TIMEOUT because + * apparently user doesn't mean to wait forever, otherwise the + * args->timeout should have been set to a negative value. + */ + if (t > MAX_SCHEDULE_TIMEOUT) + timeout = MAX_SCHEDULE_TIMEOUT - 1; + else + timeout = t; return timeout ?: 1; } @@ -114,7 +142,7 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data, int err; bool no_engines = args->flags & DRM_XE_UFENCE_WAIT_SOFT_OP || args->flags & DRM_XE_UFENCE_WAIT_VM_ERROR; - unsigned long timeout; + long timeout; ktime_t start; if (XE_IOCTL_DBG(xe, args->extensions) || XE_IOCTL_DBG(xe, args->pad) || @@ -169,16 +197,7 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data, addr = vm->async_ops.error_capture.addr; } - /* - * For negative timeout we want to wait "forever" by setting - * MAX_SCHEDULE_TIMEOUT. But we have to assign this value also - * to args->timeout to avoid being zeroed on the signal delivery - * (see arithmetics after wait). - */ - if (args->timeout < 0) - args->timeout = MAX_SCHEDULE_TIMEOUT; - - timeout = to_jiffies_timeout(args); + timeout = to_jiffies_timeout(xe, args); start = ktime_get(); -- cgit v1.2.3 From c489925a154e164a46e4d1f9c62da3332e496edd Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 29 Sep 2023 16:03:33 -0700 Subject: drm/xe/tuning: Add missing engine class rules for LRC tuning The LRC tuning settings we have today are modifying registers that are part of the RCS engine's context; they're not part of the general CSFE context that would apply to all engines. Add ENGINE_CLASS(RENDER) to the RTP rules to properly restrict these to the RCS. Bspec: 46255, 46261 Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20230929230332.3348841-2-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_tuning.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index 412e59de9842..08174dda9bc7 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -29,7 +29,7 @@ static const struct xe_rtp_entry_sr gt_tunings[] = { static const struct xe_rtp_entry_sr lrc_tunings[] = { { XE_RTP_NAME("Tuning: ganged timer, also known as 16011163337"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), /* read verification is ignored due to 1608008084. */ XE_RTP_ACTIONS(FIELD_SET_NO_READ_MASK(FF_MODE2, FF_MODE2_GS_TIMER_MASK, @@ -39,19 +39,19 @@ static const struct xe_rtp_entry_sr lrc_tunings[] = { /* DG2 */ { XE_RTP_NAME("Tuning: L3 cache"), - XE_RTP_RULES(PLATFORM(DG2)), + XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f))) }, { XE_RTP_NAME("Tuning: TDS gang timer"), - XE_RTP_RULES(PLATFORM(DG2)), + XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), /* read verification is ignored as in i915 - need to check enabling */ XE_RTP_ACTIONS(FIELD_SET_NO_READ_MASK(XEHP_FF_MODE2, FF_MODE2_TDS_TIMER_MASK, FF_MODE2_TDS_TIMER_128)) }, { XE_RTP_NAME("Tuning: TBIMR fast clip"), - XE_RTP_RULES(PLATFORM(DG2)), + XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(CHICKEN_RASTER_2, TBIMR_FAST_CLIP)) }, {} -- cgit v1.2.3 From 0dcac63649e37e176224f11f69a3c85653d0d887 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 28 Sep 2023 21:49:59 -0700 Subject: drm/xe/xe2: Extend reserved stolen sizes For xe2, besides the previous sizes, the reserved portion of stolen can also have 16MB and 32MB. Bspec: 53148 Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230929044959.3149265-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_regs.h | 2 +- drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 32 ++++++++++++++++++++++++++++---- 2 files changed, 29 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h index d62555757d0f..1574d11d4e14 100644 --- a/drivers/gpu/drm/xe/regs/xe_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_regs.h @@ -82,7 +82,7 @@ #define GSMBASE XE_REG(0x108100) #define STOLEN_RESERVED XE_REG(0x1082c0) -#define WOPCM_SIZE_MASK REG_GENMASK64(8, 7) +#define WOPCM_SIZE_MASK REG_GENMASK64(9, 7) #define MTL_RP_STATE_CAP XE_REG(0x138000) diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index 6ba6b1b7f34b..79fbd74a3944 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -86,6 +86,29 @@ static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) return ALIGN_DOWN(stolen_size, SZ_1M); } +static u32 get_wopcm_size(struct xe_device *xe) +{ + u32 wopcm_size; + u64 val; + + val = xe_mmio_read64_2x32(xe_root_mmio_gt(xe), STOLEN_RESERVED); + val = REG_FIELD_GET64(WOPCM_SIZE_MASK, val); + + switch (val) { + case 0x5 ... 0x6: + val--; + fallthrough; + case 0x0 ... 0x3: + wopcm_size = (1U << val) * SZ_1M; + break; + default: + WARN(1, "Missing case wopcm_size=%llx\n", val); + wopcm_size = 0; + } + + return wopcm_size; +} + static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); @@ -125,10 +148,11 @@ static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr } /* Carve out the top of DSM as it contains the reserved WOPCM region */ - wopcm_size = REG_FIELD_GET64(WOPCM_SIZE_MASK, - xe_mmio_read64_2x32(xe_root_mmio_gt(xe), - STOLEN_RESERVED)); - stolen_size -= (1U << wopcm_size) * SZ_1M; + wopcm_size = get_wopcm_size(xe); + if (drm_WARN_ON(&xe->drm, !wopcm_size)) + return 0; + + stolen_size -= wopcm_size; if (drm_WARN_ON(&xe->drm, stolen_size + SZ_8M > pci_resource_len(pdev, 2))) return 0; -- cgit v1.2.3 From 2c0ac321d9975d670541eb3da19064f67b3f995b Mon Sep 17 00:00:00 2001 From: Haridhar Kalvala Date: Fri, 29 Sep 2023 14:36:37 -0700 Subject: drm/xe: Adjust mocs field mask definitions Instead of using xe_mocs_index_to_value(), simply define the bitmask with the shift left applied. This will make it easier to adapt to new platforms that simply use the index. This also fixes PVC bug in emit_clear_link_copy() where the MOCS was getting shifted both by PVC_MS_MOCS_INDEX_MASK definition and by the xe_moc_index_to_value function. Bspec: 44509 Cc: Matt Roper Signed-off-by: Haridhar Kalvala Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230929213640.3189912-2-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gpu_commands.h | 4 ++-- drivers/gpu/drm/xe/xe_migrate.c | 6 +++--- drivers/gpu/drm/xe/xe_mocs.h | 12 ------------ 3 files changed, 5 insertions(+), 17 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h index 12120dd37aa2..1ee8d46d92d9 100644 --- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h +++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h @@ -44,7 +44,7 @@ #define DST_ACCESS_TYPE_SHIFT 20 #define CCS_SIZE_MASK 0x3FF #define CCS_SIZE_SHIFT 8 -#define XY_CTRL_SURF_MOCS_MASK GENMASK(31, 25) +#define XY_CTRL_SURF_MOCS_MASK GENMASK(31, 26) #define NUM_CCS_BYTES_PER_BLOCK 256 #define NUM_BYTES_PER_CCS_BYTE 256 #define NUM_CCS_BLKS_PER_XFER 1024 @@ -52,7 +52,7 @@ #define XY_FAST_COLOR_BLT_CMD (2 << 29 | 0x44 << 22) #define XY_FAST_COLOR_BLT_DEPTH_32 (2 << 19) #define XY_FAST_COLOR_BLT_DW 16 -#define XY_FAST_COLOR_BLT_MOCS_MASK GENMASK(27, 21) +#define XY_FAST_COLOR_BLT_MOCS_MASK GENMASK(27, 22) #define XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT 31 #define XY_FAST_COPY_BLT_CMD (2 << 29 | 0x42 << 22) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 77a2468ca85c..52c3a040c606 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -519,7 +519,7 @@ static void emit_copy_ccs(struct xe_gt *gt, struct xe_bb *bb, { u32 *cs = bb->cs + bb->len; u32 num_ccs_blks; - u32 mocs = xe_mocs_index_to_value(gt->mocs.uc_index); + u32 mocs = gt->mocs.uc_index; num_ccs_blks = DIV_ROUND_UP(xe_device_ccs_bytes(gt_to_xe(gt), size), NUM_CCS_BYTES_PER_BLOCK); @@ -806,7 +806,7 @@ static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs u32 size, u32 pitch) { u32 *cs = bb->cs + bb->len; - u32 mocs = xe_mocs_index_to_value(gt->mocs.uc_index); + u32 mocs = gt->mocs.uc_index; u32 len = PVC_MEM_SET_CMD_LEN_DW; *cs++ = PVC_MEM_SET_CMD | PVC_MS_MATRIX | (len - 2); @@ -828,7 +828,7 @@ static void emit_clear_main_copy(struct xe_gt *gt, struct xe_bb *bb, struct xe_device *xe = gt_to_xe(gt); u32 *cs = bb->cs + bb->len; u32 len = XY_FAST_COLOR_BLT_DW; - u32 mocs = xe_mocs_index_to_value(gt->mocs.uc_index); + u32 mocs = gt->mocs.uc_index; if (GRAPHICS_VERx100(xe) < 1250) len = 11; diff --git a/drivers/gpu/drm/xe/xe_mocs.h b/drivers/gpu/drm/xe/xe_mocs.h index d0f1ec4b0336..053754c5a94e 100644 --- a/drivers/gpu/drm/xe/xe_mocs.h +++ b/drivers/gpu/drm/xe/xe_mocs.h @@ -14,16 +14,4 @@ struct xe_gt; void xe_mocs_init_early(struct xe_gt *gt); void xe_mocs_init(struct xe_gt *gt); -/** - * xe_mocs_index_to_value - Translate mocs index to the mocs value exected by - * most blitter commands. - * @mocs_index: index into the mocs tables - * - * Return: The corresponding mocs value to be programmed. - */ -static inline u32 xe_mocs_index_to_value(u32 mocs_index) -{ - return mocs_index << 1; -} - #endif -- cgit v1.2.3 From c690f0e6b7e61826535eb91a28bf99197345faf2 Mon Sep 17 00:00:00 2001 From: Haridhar Kalvala Date: Fri, 29 Sep 2023 14:36:38 -0700 Subject: drm/xe: Rename MEM_SET instruction PVC_MS_* doesn't reflect the real name of the instruction. Rename it to follow the name used in the bspec. Cc: Matt Roper Signed-off-by: Haridhar Kalvala Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230929213640.3189912-3-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gpu_commands.h | 6 +++--- drivers/gpu/drm/xe/xe_migrate.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h index 1ee8d46d92d9..1fdf2e4f1c9f 100644 --- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h +++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h @@ -60,10 +60,10 @@ #define PVC_MEM_SET_CMD (2 << 29 | 0x5b << 22) #define PVC_MEM_SET_CMD_LEN_DW 7 -#define PVC_MS_MATRIX REG_BIT(17) -#define PVC_MS_DATA_FIELD GENMASK(31, 24) +#define PVC_MEM_SET_MATRIX REG_BIT(17) +#define PVC_MEM_SET_DATA_FIELD GENMASK(31, 24) /* Bspec lists field as [6:0], but index alone is from [6:1] */ -#define PVC_MS_MOCS_INDEX_MASK GENMASK(6, 1) +#define PVC_MEM_SET_MOCS_INDEX_MASK GENMASK(6, 1) #define GFX_OP_PIPE_CONTROL(len) ((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2)) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 52c3a040c606..313e3c0a6e90 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -809,13 +809,13 @@ static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs u32 mocs = gt->mocs.uc_index; u32 len = PVC_MEM_SET_CMD_LEN_DW; - *cs++ = PVC_MEM_SET_CMD | PVC_MS_MATRIX | (len - 2); + *cs++ = PVC_MEM_SET_CMD | PVC_MEM_SET_MATRIX | (len - 2); *cs++ = pitch - 1; *cs++ = (size / pitch) - 1; *cs++ = pitch - 1; *cs++ = lower_32_bits(src_ofs); *cs++ = upper_32_bits(src_ofs); - *cs++ = FIELD_PREP(PVC_MS_MOCS_INDEX_MASK, mocs); + *cs++ = FIELD_PREP(PVC_MEM_SET_MOCS_INDEX_MASK, mocs); xe_gt_assert(gt, cs - bb->cs == len + bb->len); -- cgit v1.2.3 From 4bdd8c2ed9572b757521e981cfb35a3581c112c8 Mon Sep 17 00:00:00 2001 From: Haridhar Kalvala Date: Fri, 29 Sep 2023 14:36:39 -0700 Subject: drm/xe/xe2: Set tile y type in XY_FAST_COPY_BLT to Tile4 Set bits 30 and 31 of XY_FAST_COPY_BLT's dword1 for XeHP and above. Destination or source being Y-Major is selected on dword0 and there's nothing to set on dword1. According to the bspec for Xe2, "Behavior is undefined when programmed the value 0". Also for XeHP, the only value allowed in those bits is 0b11, not being possible to select "Legacy Tile-Y" anymore, only the newer Tile4. So, unconditionally set those bits for graphics IP 12.50 and above. v2: Reword commit message and extend it to graphics version >= 12.50 (Matt Roper) Bspec: 57567 Cc: Matt Roper Signed-off-by: Haridhar Kalvala Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230929213640.3189912-4-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gpu_commands.h | 2 ++ drivers/gpu/drm/xe/xe_migrate.c | 9 ++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h index 1fdf2e4f1c9f..cc7b56763f10 100644 --- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h +++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h @@ -57,6 +57,8 @@ #define XY_FAST_COPY_BLT_CMD (2 << 29 | 0x42 << 22) #define XY_FAST_COPY_BLT_DEPTH_32 (3<<24) +#define XY_FAST_COPY_BLT_D1_SRC_TILE4 REG_BIT(31) +#define XY_FAST_COPY_BLT_D1_DST_TILE4 REG_BIT(30) #define PVC_MEM_SET_CMD (2 << 29 | 0x5b << 22) #define PVC_MEM_SET_CMD_LEN_DW 7 diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 313e3c0a6e90..69488a0fada4 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -543,12 +543,19 @@ static void emit_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, u64 dst_ofs, unsigned int size, unsigned int pitch) { + struct xe_device *xe = gt_to_xe(gt); + xe_gt_assert(gt, size / pitch <= S16_MAX); xe_gt_assert(gt, pitch / 4 <= S16_MAX); xe_gt_assert(gt, pitch <= U16_MAX); bb->cs[bb->len++] = XY_FAST_COPY_BLT_CMD | (10 - 2); - bb->cs[bb->len++] = XY_FAST_COPY_BLT_DEPTH_32 | pitch; + if (GRAPHICS_VERx100(xe) >= 1250) + bb->cs[bb->len++] = XY_FAST_COPY_BLT_DEPTH_32 | pitch | + XY_FAST_COPY_BLT_D1_SRC_TILE4 | + XY_FAST_COPY_BLT_D1_DST_TILE4; + else + bb->cs[bb->len++] = XY_FAST_COPY_BLT_DEPTH_32 | pitch; bb->cs[bb->len++] = 0; bb->cs[bb->len++] = (size / pitch) << 16 | pitch / 4; bb->cs[bb->len++] = lower_32_bits(dst_ofs); -- cgit v1.2.3 From 30603b5b0f8678fff799f4e3e2b45b8c08648575 Mon Sep 17 00:00:00 2001 From: Haridhar Kalvala Date: Fri, 29 Sep 2023 14:36:40 -0700 Subject: drm/xe/xe2: Update MOCS fields in blitter instructions Xe2 changes or adds bits for mocs in a few BLT instructions: XY_CTRL_SURF_COPY_BLT, XY_FAST_COLOR_BLT, XY_FAST_COPY_BLT, and MEM_SET. Modify the code to deal with the new location. Unlike Xe1, the MOCS field in those instructions is only the MOCS index and not the Structure_MEMORY_OBJECT_CONTROL_STATE anymore. The pxp bit is now explicitly documented separately. Bspec: 57567,57566,57565,57562 Cc: Matt Roper Signed-off-by: Haridhar Kalvala Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230929213640.3189912-5-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gpu_commands.h | 4 +++ drivers/gpu/drm/xe/xe_migrate.c | 48 ++++++++++++++++++++----------- 2 files changed, 35 insertions(+), 17 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h index cc7b56763f10..21738281bdd0 100644 --- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h +++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h @@ -45,6 +45,7 @@ #define CCS_SIZE_MASK 0x3FF #define CCS_SIZE_SHIFT 8 #define XY_CTRL_SURF_MOCS_MASK GENMASK(31, 26) +#define XE2_XY_CTRL_SURF_MOCS_INDEX_MASK GENMASK(31, 28) #define NUM_CCS_BYTES_PER_BLOCK 256 #define NUM_BYTES_PER_CCS_BYTE 256 #define NUM_CCS_BLKS_PER_XFER 1024 @@ -53,12 +54,14 @@ #define XY_FAST_COLOR_BLT_DEPTH_32 (2 << 19) #define XY_FAST_COLOR_BLT_DW 16 #define XY_FAST_COLOR_BLT_MOCS_MASK GENMASK(27, 22) +#define XE2_XY_FAST_COLOR_BLT_MOCS_INDEX_MASK GENMASK(27, 24) #define XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT 31 #define XY_FAST_COPY_BLT_CMD (2 << 29 | 0x42 << 22) #define XY_FAST_COPY_BLT_DEPTH_32 (3<<24) #define XY_FAST_COPY_BLT_D1_SRC_TILE4 REG_BIT(31) #define XY_FAST_COPY_BLT_D1_DST_TILE4 REG_BIT(30) +#define XE2_XY_FAST_COPY_BLT_MOCS_INDEX_MASK GENMASK(23, 20) #define PVC_MEM_SET_CMD (2 << 29 | 0x5b << 22) #define PVC_MEM_SET_CMD_LEN_DW 7 @@ -66,6 +69,7 @@ #define PVC_MEM_SET_DATA_FIELD GENMASK(31, 24) /* Bspec lists field as [6:0], but index alone is from [6:1] */ #define PVC_MEM_SET_MOCS_INDEX_MASK GENMASK(6, 1) +#define XE2_MEM_SET_MOCS_INDEX_MASK GENMASK(6, 3) #define GFX_OP_PIPE_CONTROL(len) ((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2)) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 69488a0fada4..4b7210c793f5 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -517,23 +517,28 @@ static void emit_copy_ccs(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, bool src_is_indirect, u32 size) { + struct xe_device *xe = gt_to_xe(gt); u32 *cs = bb->cs + bb->len; u32 num_ccs_blks; - u32 mocs = gt->mocs.uc_index; + u32 mocs; num_ccs_blks = DIV_ROUND_UP(xe_device_ccs_bytes(gt_to_xe(gt), size), NUM_CCS_BYTES_PER_BLOCK); xe_gt_assert(gt, num_ccs_blks <= NUM_CCS_BLKS_PER_XFER); + + if (GRAPHICS_VERx100(xe) >= 2000) + mocs = FIELD_PREP(XE2_XY_CTRL_SURF_MOCS_INDEX_MASK, gt->mocs.uc_index); + else + mocs = FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, gt->mocs.uc_index); + *cs++ = XY_CTRL_SURF_COPY_BLT | (src_is_indirect ? 0x0 : 0x1) << SRC_ACCESS_TYPE_SHIFT | (dst_is_indirect ? 0x0 : 0x1) << DST_ACCESS_TYPE_SHIFT | ((num_ccs_blks - 1) & CCS_SIZE_MASK) << CCS_SIZE_SHIFT; *cs++ = lower_32_bits(src_ofs); - *cs++ = upper_32_bits(src_ofs) | - FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, mocs); + *cs++ = upper_32_bits(src_ofs) | mocs; *cs++ = lower_32_bits(dst_ofs); - *cs++ = upper_32_bits(dst_ofs) | - FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, mocs); + *cs++ = upper_32_bits(dst_ofs) | mocs; bb->len = cs - bb->cs; } @@ -544,24 +549,27 @@ static void emit_copy(struct xe_gt *gt, struct xe_bb *bb, unsigned int pitch) { struct xe_device *xe = gt_to_xe(gt); + u32 mocs = 0; + u32 tile_y = 0; xe_gt_assert(gt, size / pitch <= S16_MAX); xe_gt_assert(gt, pitch / 4 <= S16_MAX); xe_gt_assert(gt, pitch <= U16_MAX); - bb->cs[bb->len++] = XY_FAST_COPY_BLT_CMD | (10 - 2); + if (GRAPHICS_VER(xe) >= 20) + mocs = FIELD_PREP(XE2_XY_FAST_COPY_BLT_MOCS_INDEX_MASK, gt->mocs.uc_index); + if (GRAPHICS_VERx100(xe) >= 1250) - bb->cs[bb->len++] = XY_FAST_COPY_BLT_DEPTH_32 | pitch | - XY_FAST_COPY_BLT_D1_SRC_TILE4 | - XY_FAST_COPY_BLT_D1_DST_TILE4; - else - bb->cs[bb->len++] = XY_FAST_COPY_BLT_DEPTH_32 | pitch; + tile_y = XY_FAST_COPY_BLT_D1_SRC_TILE4 | XY_FAST_COPY_BLT_D1_DST_TILE4; + + bb->cs[bb->len++] = XY_FAST_COPY_BLT_CMD | (10 - 2); + bb->cs[bb->len++] = XY_FAST_COPY_BLT_DEPTH_32 | pitch | tile_y | mocs; bb->cs[bb->len++] = 0; bb->cs[bb->len++] = (size / pitch) << 16 | pitch / 4; bb->cs[bb->len++] = lower_32_bits(dst_ofs); bb->cs[bb->len++] = upper_32_bits(dst_ofs); bb->cs[bb->len++] = 0; - bb->cs[bb->len++] = pitch; + bb->cs[bb->len++] = pitch | mocs; bb->cs[bb->len++] = lower_32_bits(src_ofs); bb->cs[bb->len++] = upper_32_bits(src_ofs); } @@ -812,8 +820,8 @@ err_sync: static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, u32 size, u32 pitch) { + struct xe_device *xe = gt_to_xe(gt); u32 *cs = bb->cs + bb->len; - u32 mocs = gt->mocs.uc_index; u32 len = PVC_MEM_SET_CMD_LEN_DW; *cs++ = PVC_MEM_SET_CMD | PVC_MEM_SET_MATRIX | (len - 2); @@ -822,7 +830,10 @@ static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs *cs++ = pitch - 1; *cs++ = lower_32_bits(src_ofs); *cs++ = upper_32_bits(src_ofs); - *cs++ = FIELD_PREP(PVC_MEM_SET_MOCS_INDEX_MASK, mocs); + if (GRAPHICS_VERx100(xe) >= 2000) + *cs++ = FIELD_PREP(XE2_MEM_SET_MOCS_INDEX_MASK, gt->mocs.uc_index); + else + *cs++ = FIELD_PREP(PVC_MEM_SET_MOCS_INDEX_MASK, gt->mocs.uc_index); xe_gt_assert(gt, cs - bb->cs == len + bb->len); @@ -835,15 +846,18 @@ static void emit_clear_main_copy(struct xe_gt *gt, struct xe_bb *bb, struct xe_device *xe = gt_to_xe(gt); u32 *cs = bb->cs + bb->len; u32 len = XY_FAST_COLOR_BLT_DW; - u32 mocs = gt->mocs.uc_index; if (GRAPHICS_VERx100(xe) < 1250) len = 11; *cs++ = XY_FAST_COLOR_BLT_CMD | XY_FAST_COLOR_BLT_DEPTH_32 | (len - 2); - *cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, mocs) | - (pitch - 1); + if (GRAPHICS_VERx100(xe) >= 2000) + *cs++ = FIELD_PREP(XE2_XY_FAST_COLOR_BLT_MOCS_INDEX_MASK, gt->mocs.uc_index) | + (pitch - 1); + else + *cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, gt->mocs.uc_index) | + (pitch - 1); *cs++ = 0; *cs++ = (size / pitch) << 16 | pitch / 4; *cs++ = lower_32_bits(src_ofs); -- cgit v1.2.3 From 328e089bfb376a9817a260542fbea0fe9e0975ac Mon Sep 17 00:00:00 2001 From: Balasubramani Vivekanandan Date: Thu, 28 Sep 2023 22:15:39 -0700 Subject: drm/xe: Leverage ComputeCS read L3 caching On platforms that support read L3 caching, set the default mocs index in CCS RING_CMD_CTL to leverage the read caching in L3. Currently PVC and Xe2 platforms have the support. Bspec: 72161 Signed-off-by: Balasubramani Vivekanandan Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20230929051539.3157441-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_hw_engine.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index a8681089fb60..49128f640e15 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -316,22 +316,25 @@ static void hw_engine_setup_default_state(struct xe_hw_engine *hwe) { struct xe_gt *gt = hwe->gt; + struct xe_device *xe = gt_to_xe(gt); + /* + * RING_CMD_CCTL specifies the default MOCS entry that will be + * used by the command streamer when executing commands that + * don't have a way to explicitly specify a MOCS setting. + * The default should usually reference whichever MOCS entry + * corresponds to uncached behavior, although use of a WB cached + * entry is recommended by the spec in certain circumstances on + * specific platforms. + * Bspec: 72161 + */ const u8 mocs_write_idx = gt->mocs.uc_index; - /* TODO: missing handling of HAS_L3_CCS_READ platforms */ - const u8 mocs_read_idx = gt->mocs.uc_index; + const u8 mocs_read_idx = hwe->class == XE_ENGINE_CLASS_COMPUTE && + (GRAPHICS_VER(xe) >= 20 || xe->info.platform == XE_PVC) ? + gt->mocs.wb_index : gt->mocs.uc_index; u32 ring_cmd_cctl_val = REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, mocs_write_idx) | REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, mocs_read_idx); struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); const struct xe_rtp_entry_sr engine_entries[] = { - /* - * RING_CMD_CCTL specifies the default MOCS entry that will be - * used by the command streamer when executing commands that - * don't have a way to explicitly specify a MOCS setting. - * The default should usually reference whichever MOCS entry - * corresponds to uncached behavior, although use of a WB cached - * entry is recommended by the spec in certain circumstances on - * specific platforms. - */ { XE_RTP_NAME("RING_CMD_CCTL_default_MOCS"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED)), XE_RTP_ACTIONS(FIELD_SET(RING_CMD_CCTL(0), -- cgit v1.2.3 From 9a674bef6cf0ad2e7653381cacda9fbc9c1ea67e Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 29 Sep 2023 13:02:54 -0700 Subject: drm/xe: Fix exec queue usage for unbinds Passing in a NULL exec queue to __xe_pt_unbind_vma results in the migrate exec queue being used. This is not the intent from the VM bind IOCTL, rather a NULL exec queue should use default VM exec queue. Reviewed-by: Niranjana Vishwanathapura Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 2bb0a1669a96..42a5d912e775 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1734,7 +1734,8 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q, if (!(vma->tile_present & BIT(id))) goto next; - fence = __xe_pt_unbind_vma(tile, vma, q, first_op ? syncs : NULL, + fence = __xe_pt_unbind_vma(tile, vma, q ? q : vm->q[id], + first_op ? syncs : NULL, first_op ? num_syncs : 0); if (IS_ERR(fence)) { err = PTR_ERR(fence); -- cgit v1.2.3 From fb1b70607f73af5e5c9d02af203197191ab7abae Mon Sep 17 00:00:00 2001 From: Badal Nilawar Date: Mon, 25 Sep 2023 13:48:38 +0530 Subject: drm/xe/hwmon: Expose power attributes Expose Card reactive sustained (pl1) power limit as power_max and card default power limit (tdp) as power_rated_max. v2: - Fix review comments (Riana) v3: - Use drmm_mutex_init (Matt Brost) - Print error value (Matt Brost) - Convert enums to uppercase (Matt Brost) - Avoid extra reg read in hwmon_is_visible function (Riana) - Use xe_device_assert_mem_access when applicable (Matt Brost) - Add intel-xe@lists.freedesktop.org in Documentation (Matt Brost) v4: - Use prefix xe_hwmon prefix for all functions (Matt Brost/Andi) - %s/hwmon_reg/xe_hwmon_reg (Andi) - Fix review comments (Guenter/Andi) v5: - Fix review comments (Riana) v6: - Use drm_warn in default case (Rodrigo) - s/ENODEV/EOPNOTSUPP (Andi) Acked-by: Rodrigo Vivi Reviewed-by: Riana Tauro Signed-off-by: Badal Nilawar Reviewed-by: Andi Shyti Link: https://lore.kernel.org/r/20230925081842.3566834-2-badal.nilawar@intel.com Signed-off-by: Rodrigo Vivi --- .../ABI/testing/sysfs-driver-intel-xe-hwmon | 22 ++ drivers/gpu/drm/xe/Makefile | 3 + drivers/gpu/drm/xe/regs/xe_gt_regs.h | 4 + drivers/gpu/drm/xe/regs/xe_mchbar_regs.h | 33 ++ drivers/gpu/drm/xe/xe_device.c | 3 + drivers/gpu/drm/xe/xe_device_types.h | 3 + drivers/gpu/drm/xe/xe_hwmon.c | 358 +++++++++++++++++++++ drivers/gpu/drm/xe/xe_hwmon.h | 19 ++ 8 files changed, 445 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon create mode 100644 drivers/gpu/drm/xe/regs/xe_mchbar_regs.h create mode 100644 drivers/gpu/drm/xe/xe_hwmon.c create mode 100644 drivers/gpu/drm/xe/xe_hwmon.h (limited to 'drivers/gpu') diff --git a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon new file mode 100644 index 000000000000..da0197a29fe4 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon @@ -0,0 +1,22 @@ +What: /sys/devices/.../hwmon/hwmon/power1_max +Date: September 2023 +KernelVersion: 6.5 +Contact: intel-xe@lists.freedesktop.org +Description: RW. Card reactive sustained (PL1) power limit in microwatts. + + The power controller will throttle the operating frequency + if the power averaged over a window (typically seconds) + exceeds this limit. A read value of 0 means that the PL1 + power limit is disabled, writing 0 disables the + limit. Writing values > 0 and <= TDP will enable the power limit. + + Only supported for particular Intel xe graphics platforms. + +What: /sys/devices/.../hwmon/hwmon/power1_rated_max +Date: September 2023 +KernelVersion: 6.5 +Contact: intel-xe@lists.freedesktop.org +Description: RO. Card default power limit (default TDP setting). + + Only supported for particular Intel xe graphics platforms. + diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 9be0848ea370..a0e7896a4ef7 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -122,6 +122,9 @@ xe-y += xe_bb.o \ xe_wa.o \ xe_wopcm.o +# graphics hardware monitoring (HWMON) support +xe-$(CONFIG_HWMON) += xe_hwmon.o + obj-$(CONFIG_DRM_XE) += xe.o obj-$(CONFIG_DRM_XE_KUNIT_TEST) += tests/ diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 3a4c9bcf793f..b32b8132f8bf 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -411,4 +411,8 @@ #define XEHPC_BCS5_BCS6_INTR_MASK XE_REG(0x190118) #define XEHPC_BCS7_BCS8_INTR_MASK XE_REG(0x19011c) +#define PVC_GT0_PACKAGE_RAPL_LIMIT XE_REG(0x281008) +#define PVC_GT0_PACKAGE_POWER_SKU_UNIT XE_REG(0x281068) +#define PVC_GT0_PACKAGE_POWER_SKU XE_REG(0x281080) + #endif diff --git a/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h b/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h new file mode 100644 index 000000000000..27f1d42baf6d --- /dev/null +++ b/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_MCHBAR_REGS_H_ +#define _XE_MCHBAR_REGS_H_ + +#include "regs/xe_reg_defs.h" + +/* + * MCHBAR mirror. + * + * This mirrors the MCHBAR MMIO space whose location is determined by + * device 0 function 0's pci config register 0x44 or 0x48 and matches it in + * every way. + */ + +#define MCHBAR_MIRROR_BASE_SNB 0x140000 + +#define PCU_CR_PACKAGE_POWER_SKU XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x5930) +#define PKG_TDP GENMASK_ULL(14, 0) +#define PKG_MIN_PWR GENMASK_ULL(30, 16) +#define PKG_MAX_PWR GENMASK_ULL(46, 32) + +#define PCU_CR_PACKAGE_POWER_SKU_UNIT XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x5938) +#define PKG_PWR_UNIT REG_GENMASK(3, 0) + +#define PCU_CR_PACKAGE_RAPL_LIMIT XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x59a0) +#define PKG_PWR_LIM_1 REG_GENMASK(14, 0) +#define PKG_PWR_LIM_1_EN REG_BIT(15) + +#endif /* _XE_MCHBAR_REGS_H_ */ diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 113fca462141..283fc5990000 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -35,6 +35,7 @@ #include "xe_vm.h" #include "xe_vm_madvise.h" #include "xe_wait_user_fence.h" +#include "xe_hwmon.h" #ifdef CONFIG_LOCKDEP struct lockdep_map xe_device_mem_access_lockdep_map = { @@ -328,6 +329,8 @@ int xe_device_probe(struct xe_device *xe) xe_pmu_register(&xe->pmu); + xe_hwmon_register(xe); + err = drmm_add_action_or_reset(&xe->drm, xe_device_sanitize, xe); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 1ee8410ec3b1..dd52d112d58f 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -359,6 +359,9 @@ struct xe_device { /** @pmu: performance monitoring unit */ struct xe_pmu pmu; + /** @hwmon: hwmon subsystem integration */ + struct xe_hwmon *hwmon; + /* For pcode */ struct mutex sb_lock; diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c new file mode 100644 index 000000000000..997ffe0d0451 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -0,0 +1,358 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include + +#include +#include "regs/xe_gt_regs.h" +#include "regs/xe_mchbar_regs.h" +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_hwmon.h" +#include "xe_mmio.h" + +enum xe_hwmon_reg { + REG_PKG_RAPL_LIMIT, + REG_PKG_POWER_SKU, + REG_PKG_POWER_SKU_UNIT, +}; + +enum xe_hwmon_reg_operation { + REG_READ, + REG_WRITE, + REG_RMW, +}; + +/* + * SF_* - scale factors for particular quantities according to hwmon spec. + */ +#define SF_POWER 1000000 /* microwatts */ + +struct xe_hwmon { + struct device *hwmon_dev; + struct xe_gt *gt; + struct mutex hwmon_lock; /* rmw operations*/ + int scl_shift_power; +}; + +static u32 xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg) +{ + struct xe_device *xe = gt_to_xe(hwmon->gt); + struct xe_reg reg = XE_REG(0); + + switch (hwmon_reg) { + case REG_PKG_RAPL_LIMIT: + if (xe->info.platform == XE_DG2) + reg = PCU_CR_PACKAGE_RAPL_LIMIT; + else if (xe->info.platform == XE_PVC) + reg = PVC_GT0_PACKAGE_RAPL_LIMIT; + break; + case REG_PKG_POWER_SKU: + if (xe->info.platform == XE_DG2) + reg = PCU_CR_PACKAGE_POWER_SKU; + else if (xe->info.platform == XE_PVC) + reg = PVC_GT0_PACKAGE_POWER_SKU; + break; + case REG_PKG_POWER_SKU_UNIT: + if (xe->info.platform == XE_DG2) + reg = PCU_CR_PACKAGE_POWER_SKU_UNIT; + else if (xe->info.platform == XE_PVC) + reg = PVC_GT0_PACKAGE_POWER_SKU_UNIT; + break; + default: + drm_warn(&xe->drm, "Unknown xe hwmon reg id: %d\n", hwmon_reg); + break; + } + + return reg.raw; +} + +static int xe_hwmon_process_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg, + enum xe_hwmon_reg_operation operation, u32 *value, + u32 clr, u32 set) +{ + struct xe_reg reg; + + reg.raw = xe_hwmon_get_reg(hwmon, hwmon_reg); + + if (!reg.raw) + return -EOPNOTSUPP; + + switch (operation) { + case REG_READ: + *value = xe_mmio_read32(hwmon->gt, reg); + return 0; + case REG_WRITE: + xe_mmio_write32(hwmon->gt, reg, *value); + return 0; + case REG_RMW: + *value = xe_mmio_rmw32(hwmon->gt, reg, clr, set); + return 0; + default: + drm_warn(>_to_xe(hwmon->gt)->drm, "Invalid xe hwmon reg operation: %d\n", + operation); + return -EOPNOTSUPP; + } +} + +static int xe_hwmon_process_reg_read64(struct xe_hwmon *hwmon, + enum xe_hwmon_reg hwmon_reg, u64 *value) +{ + struct xe_reg reg; + + reg.raw = xe_hwmon_get_reg(hwmon, hwmon_reg); + + if (!reg.raw) + return -EOPNOTSUPP; + + *value = xe_mmio_read64_2x32(hwmon->gt, reg); + + return 0; +} + +#define PL1_DISABLE 0 + +/* + * HW allows arbitrary PL1 limits to be set but silently clamps these values to + * "typical but not guaranteed" min/max values in REG_PKG_POWER_SKU. Follow the + * same pattern for sysfs, allow arbitrary PL1 limits to be set but display + * clamped values when read. + */ +static int xe_hwmon_power_max_read(struct xe_hwmon *hwmon, long *value) +{ + u32 reg_val; + u64 reg_val64, min, max; + + xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_READ, ®_val, 0, 0); + /* Check if PL1 limit is disabled */ + if (!(reg_val & PKG_PWR_LIM_1_EN)) { + *value = PL1_DISABLE; + return 0; + } + + reg_val = REG_FIELD_GET(PKG_PWR_LIM_1, reg_val); + *value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power); + + xe_hwmon_process_reg_read64(hwmon, REG_PKG_POWER_SKU, ®_val64); + min = REG_FIELD_GET(PKG_MIN_PWR, reg_val64); + min = mul_u64_u32_shr(min, SF_POWER, hwmon->scl_shift_power); + max = REG_FIELD_GET(PKG_MAX_PWR, reg_val64); + max = mul_u64_u32_shr(max, SF_POWER, hwmon->scl_shift_power); + + if (min && max) + *value = clamp_t(u64, *value, min, max); + + return 0; +} + +static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, long value) +{ + u32 reg_val; + + /* Disable PL1 limit and verify, as limit cannot be disabled on all platforms */ + if (value == PL1_DISABLE) { + xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_RMW, ®_val, + PKG_PWR_LIM_1_EN, 0); + xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_READ, ®_val, + PKG_PWR_LIM_1_EN, 0); + + if (reg_val & PKG_PWR_LIM_1_EN) + return -EOPNOTSUPP; + } + + /* Computation in 64-bits to avoid overflow. Round to nearest. */ + reg_val = DIV_ROUND_CLOSEST_ULL((u64)value << hwmon->scl_shift_power, SF_POWER); + reg_val = PKG_PWR_LIM_1_EN | REG_FIELD_PREP(PKG_PWR_LIM_1, reg_val); + + xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_RMW, ®_val, + PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, reg_val); + + return 0; +} + +static int xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, long *value) +{ + u32 reg_val; + + xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU, REG_READ, ®_val, 0, 0); + reg_val = REG_FIELD_GET(PKG_TDP, reg_val); + *value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power); + + return 0; +} + +static const struct hwmon_channel_info *hwmon_info[] = { + HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX), + NULL +}; + +static umode_t +xe_hwmon_power_is_visible(struct xe_hwmon *hwmon, u32 attr, int chan) +{ + switch (attr) { + case hwmon_power_max: + return xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT) ? 0664 : 0; + case hwmon_power_rated_max: + return xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU) ? 0444 : 0; + default: + return 0; + } +} + +static int +xe_hwmon_power_read(struct xe_hwmon *hwmon, u32 attr, int chan, long *val) +{ + switch (attr) { + case hwmon_power_max: + return xe_hwmon_power_max_read(hwmon, val); + case hwmon_power_rated_max: + return xe_hwmon_power_rated_max_read(hwmon, val); + default: + return -EOPNOTSUPP; + } +} + +static int +xe_hwmon_power_write(struct xe_hwmon *hwmon, u32 attr, int chan, long val) +{ + switch (attr) { + case hwmon_power_max: + return xe_hwmon_power_max_write(hwmon, val); + default: + return -EOPNOTSUPP; + } +} + +static umode_t +xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type, + u32 attr, int channel) +{ + struct xe_hwmon *hwmon = (struct xe_hwmon *)drvdata; + int ret; + + xe_device_mem_access_get(gt_to_xe(hwmon->gt)); + + switch (type) { + case hwmon_power: + ret = xe_hwmon_power_is_visible(hwmon, attr, channel); + break; + default: + ret = 0; + break; + } + + xe_device_mem_access_put(gt_to_xe(hwmon->gt)); + + return ret; +} + +static int +xe_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, + int channel, long *val) +{ + struct xe_hwmon *hwmon = dev_get_drvdata(dev); + int ret; + + xe_device_mem_access_get(gt_to_xe(hwmon->gt)); + + switch (type) { + case hwmon_power: + ret = xe_hwmon_power_read(hwmon, attr, channel, val); + break; + default: + ret = -EOPNOTSUPP; + break; + } + + xe_device_mem_access_put(gt_to_xe(hwmon->gt)); + + return ret; +} + +static int +xe_hwmon_write(struct device *dev, enum hwmon_sensor_types type, u32 attr, + int channel, long val) +{ + struct xe_hwmon *hwmon = dev_get_drvdata(dev); + int ret; + + xe_device_mem_access_get(gt_to_xe(hwmon->gt)); + + switch (type) { + case hwmon_power: + ret = xe_hwmon_power_write(hwmon, attr, channel, val); + break; + default: + ret = -EOPNOTSUPP; + break; + } + + xe_device_mem_access_put(gt_to_xe(hwmon->gt)); + + return ret; +} + +static const struct hwmon_ops hwmon_ops = { + .is_visible = xe_hwmon_is_visible, + .read = xe_hwmon_read, + .write = xe_hwmon_write, +}; + +static const struct hwmon_chip_info hwmon_chip_info = { + .ops = &hwmon_ops, + .info = hwmon_info, +}; + +static void +xe_hwmon_get_preregistration_info(struct xe_device *xe) +{ + struct xe_hwmon *hwmon = xe->hwmon; + u32 val_sku_unit = 0; + int ret; + + ret = xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU_UNIT, REG_READ, &val_sku_unit, 0, 0); + /* + * The contents of register PKG_POWER_SKU_UNIT do not change, + * so read it once and store the shift values. + */ + if (!ret) + hwmon->scl_shift_power = REG_FIELD_GET(PKG_PWR_UNIT, val_sku_unit); +} + +void xe_hwmon_register(struct xe_device *xe) +{ + struct device *dev = xe->drm.dev; + struct xe_hwmon *hwmon; + + /* hwmon is available only for dGfx */ + if (!IS_DGFX(xe)) + return; + + hwmon = devm_kzalloc(dev, sizeof(*hwmon), GFP_KERNEL); + if (!hwmon) + return; + + xe->hwmon = hwmon; + + drmm_mutex_init(&xe->drm, &hwmon->hwmon_lock); + + /* primary GT to access device level properties */ + hwmon->gt = xe->tiles[0].primary_gt; + + xe_hwmon_get_preregistration_info(xe); + + drm_dbg(&xe->drm, "Register xe hwmon interface\n"); + + /* hwmon_dev points to device hwmon */ + hwmon->hwmon_dev = devm_hwmon_device_register_with_info(dev, "xe", hwmon, + &hwmon_chip_info, + NULL); + if (IS_ERR(hwmon->hwmon_dev)) { + drm_warn(&xe->drm, "Failed to register xe hwmon (%pe)\n", hwmon->hwmon_dev); + xe->hwmon = NULL; + return; + } +} + diff --git a/drivers/gpu/drm/xe/xe_hwmon.h b/drivers/gpu/drm/xe/xe_hwmon.h new file mode 100644 index 000000000000..c42a1de2cd7a --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hwmon.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_HWMON_H_ +#define _XE_HWMON_H_ + +#include + +struct xe_device; + +#if IS_REACHABLE(CONFIG_HWMON) +void xe_hwmon_register(struct xe_device *xe); +#else +static inline void xe_hwmon_register(struct xe_device *xe) { }; +#endif + +#endif /* _XE_HWMON_H_ */ -- cgit v1.2.3 From 92d44a422d0d9e08ed9020cbf11915909e1f2ad3 Mon Sep 17 00:00:00 2001 From: Badal Nilawar Date: Mon, 25 Sep 2023 13:48:39 +0530 Subject: drm/xe/hwmon: Expose card reactive critical power Expose the card reactive critical (I1) power. I1 is exposed as power1_crit in microwatts (typically for client products) or as curr1_crit in milliamperes (typically for server). v2: Move PCODE_MBOX macro to pcode file (Riana) v3: s/IS_DG2/(gt_to_xe(gt)->info.platform == XE_DG2) v4: Fix review comments (Andi) Acked-by: Rodrigo Vivi Reviewed-by: Riana Tauro Signed-off-by: Badal Nilawar Reviewed-by: Andi Shyti Link: https://lore.kernel.org/r/20230925081842.3566834-3-badal.nilawar@intel.com Signed-off-by: Rodrigo Vivi --- .../ABI/testing/sysfs-driver-intel-xe-hwmon | 26 +++++ drivers/gpu/drm/xe/xe_hwmon.c | 105 ++++++++++++++++++++- drivers/gpu/drm/xe/xe_pcode.h | 5 + drivers/gpu/drm/xe/xe_pcode_api.h | 7 ++ 4 files changed, 142 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon index da0197a29fe4..37263b09b6e4 100644 --- a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon +++ b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon @@ -20,3 +20,29 @@ Description: RO. Card default power limit (default TDP setting). Only supported for particular Intel xe graphics platforms. +What: /sys/devices/.../hwmon/hwmon/power1_crit +Date: September 2023 +KernelVersion: 6.5 +Contact: intel-xe@lists.freedesktop.org +Description: RW. Card reactive critical (I1) power limit in microwatts. + + Card reactive critical (I1) power limit in microwatts is exposed + for client products. The power controller will throttle the + operating frequency if the power averaged over a window exceeds + this limit. + + Only supported for particular Intel xe graphics platforms. + +What: /sys/devices/.../hwmon/hwmon/curr1_crit +Date: September 2023 +KernelVersion: 6.5 +Contact: intel-xe@lists.freedesktop.org +Description: RW. Card reactive critical (I1) power limit in milliamperes. + + Card reactive critical (I1) power limit in milliamperes is + exposed for server products. The power controller will throttle + the operating frequency if the power averaged over a window + exceeds this limit. + + Only supported for particular Intel xe graphics platforms. + diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index 997ffe0d0451..8dff0f90c543 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -12,6 +12,8 @@ #include "xe_gt.h" #include "xe_hwmon.h" #include "xe_mmio.h" +#include "xe_pcode.h" +#include "xe_pcode_api.h" enum xe_hwmon_reg { REG_PKG_RAPL_LIMIT, @@ -29,6 +31,7 @@ enum xe_hwmon_reg_operation { * SF_* - scale factors for particular quantities according to hwmon spec. */ #define SF_POWER 1000000 /* microwatts */ +#define SF_CURR 1000 /* milliamperes */ struct xe_hwmon { struct device *hwmon_dev; @@ -184,18 +187,43 @@ static int xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, long *value) } static const struct hwmon_channel_info *hwmon_info[] = { - HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX), + HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_CRIT), + HWMON_CHANNEL_INFO(curr, HWMON_C_CRIT), NULL }; +/* I1 is exposed as power_crit or as curr_crit depending on bit 31 */ +static int xe_hwmon_pcode_read_i1(struct xe_gt *gt, u32 *uval) +{ + /* Avoid Illegal Subcommand error */ + if (gt_to_xe(gt)->info.platform == XE_DG2) + return -ENXIO; + + return xe_pcode_read(gt, PCODE_MBOX(PCODE_POWER_SETUP, + POWER_SETUP_SUBCOMMAND_READ_I1, 0), + uval, 0); +} + +static int xe_hwmon_pcode_write_i1(struct xe_gt *gt, u32 uval) +{ + return xe_pcode_write(gt, PCODE_MBOX(PCODE_POWER_SETUP, + POWER_SETUP_SUBCOMMAND_WRITE_I1, 0), + uval); +} + static umode_t xe_hwmon_power_is_visible(struct xe_hwmon *hwmon, u32 attr, int chan) { + u32 uval; + switch (attr) { case hwmon_power_max: return xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT) ? 0664 : 0; case hwmon_power_rated_max: return xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU) ? 0444 : 0; + case hwmon_power_crit: + return (xe_hwmon_pcode_read_i1(hwmon->gt, &uval) || + !(uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644; default: return 0; } @@ -204,11 +232,23 @@ xe_hwmon_power_is_visible(struct xe_hwmon *hwmon, u32 attr, int chan) static int xe_hwmon_power_read(struct xe_hwmon *hwmon, u32 attr, int chan, long *val) { + int ret; + u32 uval; + switch (attr) { case hwmon_power_max: return xe_hwmon_power_max_read(hwmon, val); case hwmon_power_rated_max: return xe_hwmon_power_rated_max_read(hwmon, val); + case hwmon_power_crit: + ret = xe_hwmon_pcode_read_i1(hwmon->gt, &uval); + if (ret) + return ret; + if (!(uval & POWER_SETUP_I1_WATTS)) + return -ENODEV; + *val = mul_u64_u32_shr(REG_FIELD_GET(POWER_SETUP_I1_DATA_MASK, uval), + SF_POWER, POWER_SETUP_I1_SHIFT); + return 0; default: return -EOPNOTSUPP; } @@ -217,9 +257,63 @@ xe_hwmon_power_read(struct xe_hwmon *hwmon, u32 attr, int chan, long *val) static int xe_hwmon_power_write(struct xe_hwmon *hwmon, u32 attr, int chan, long val) { + u32 uval; + switch (attr) { case hwmon_power_max: return xe_hwmon_power_max_write(hwmon, val); + case hwmon_power_crit: + uval = DIV_ROUND_CLOSEST_ULL(val << POWER_SETUP_I1_SHIFT, SF_POWER); + return xe_hwmon_pcode_write_i1(hwmon->gt, uval); + default: + return -EOPNOTSUPP; + } +} + +static umode_t +xe_hwmon_curr_is_visible(const struct xe_hwmon *hwmon, u32 attr) +{ + u32 uval; + + switch (attr) { + case hwmon_curr_crit: + return (xe_hwmon_pcode_read_i1(hwmon->gt, &uval) || + (uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644; + default: + return 0; + } +} + +static int +xe_hwmon_curr_read(struct xe_hwmon *hwmon, u32 attr, long *val) +{ + int ret; + u32 uval; + + switch (attr) { + case hwmon_curr_crit: + ret = xe_hwmon_pcode_read_i1(hwmon->gt, &uval); + if (ret) + return ret; + if (uval & POWER_SETUP_I1_WATTS) + return -ENODEV; + *val = mul_u64_u32_shr(REG_FIELD_GET(POWER_SETUP_I1_DATA_MASK, uval), + SF_CURR, POWER_SETUP_I1_SHIFT); + return 0; + default: + return -EOPNOTSUPP; + } +} + +static int +xe_hwmon_curr_write(struct xe_hwmon *hwmon, u32 attr, long val) +{ + u32 uval; + + switch (attr) { + case hwmon_curr_crit: + uval = DIV_ROUND_CLOSEST_ULL(val << POWER_SETUP_I1_SHIFT, SF_CURR); + return xe_hwmon_pcode_write_i1(hwmon->gt, uval); default: return -EOPNOTSUPP; } @@ -238,6 +332,9 @@ xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type, case hwmon_power: ret = xe_hwmon_power_is_visible(hwmon, attr, channel); break; + case hwmon_curr: + ret = xe_hwmon_curr_is_visible(hwmon, attr); + break; default: ret = 0; break; @@ -261,6 +358,9 @@ xe_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, case hwmon_power: ret = xe_hwmon_power_read(hwmon, attr, channel, val); break; + case hwmon_curr: + ret = xe_hwmon_curr_read(hwmon, attr, val); + break; default: ret = -EOPNOTSUPP; break; @@ -284,6 +384,9 @@ xe_hwmon_write(struct device *dev, enum hwmon_sensor_types type, u32 attr, case hwmon_power: ret = xe_hwmon_power_write(hwmon, attr, channel, val); break; + case hwmon_curr: + ret = xe_hwmon_curr_write(hwmon, attr, val); + break; default: ret = -EOPNOTSUPP; break; diff --git a/drivers/gpu/drm/xe/xe_pcode.h b/drivers/gpu/drm/xe/xe_pcode.h index 3b4aa8c1a3ba..08cb1d047cba 100644 --- a/drivers/gpu/drm/xe/xe_pcode.h +++ b/drivers/gpu/drm/xe/xe_pcode.h @@ -22,4 +22,9 @@ int xe_pcode_write_timeout(struct xe_gt *gt, u32 mbox, u32 val, int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request, u32 reply_mask, u32 reply, int timeout_ms); +#define PCODE_MBOX(mbcmd, param1, param2)\ + (FIELD_PREP(PCODE_MB_COMMAND, mbcmd)\ + | FIELD_PREP(PCODE_MB_PARAM1, param1)\ + | FIELD_PREP(PCODE_MB_PARAM2, param2)) + #endif diff --git a/drivers/gpu/drm/xe/xe_pcode_api.h b/drivers/gpu/drm/xe/xe_pcode_api.h index 837ff7c71280..5935cfe30204 100644 --- a/drivers/gpu/drm/xe/xe_pcode_api.h +++ b/drivers/gpu/drm/xe/xe_pcode_api.h @@ -35,6 +35,13 @@ #define DGFX_GET_INIT_STATUS 0x0 #define DGFX_INIT_STATUS_COMPLETE 0x1 +#define PCODE_POWER_SETUP 0x7C +#define POWER_SETUP_SUBCOMMAND_READ_I1 0x4 +#define POWER_SETUP_SUBCOMMAND_WRITE_I1 0x5 +#define POWER_SETUP_I1_WATTS REG_BIT(31) +#define POWER_SETUP_I1_SHIFT 6 /* 10.6 fixed point format */ +#define POWER_SETUP_I1_DATA_MASK REG_GENMASK(15, 0) + struct pcode_err_decode { int errno; const char *str; -- cgit v1.2.3 From fbcdc9d3bf586c459cc66ffe802b0d4ba92e8406 Mon Sep 17 00:00:00 2001 From: Badal Nilawar Date: Mon, 25 Sep 2023 13:48:40 +0530 Subject: drm/xe/hwmon: Expose input voltage attribute Use Xe HWMON subsystem to display the input voltage. v2: - Rename hwm_get_vltg to hwm_get_voltage (Riana) - Use scale factor SF_VOLTAGE (Riana) v3: - %s/gt_perf_status/REG_GT_PERF_STATUS/ - Remove platform check from hwmon_get_voltage() v4: - Fix review comments (Andi) Acked-by: Rodrigo Vivi Reviewed-by: Riana Tauro Signed-off-by: Badal Nilawar Reviewed-by: Andi Shyti Link: https://lore.kernel.org/r/20230925081842.3566834-4-badal.nilawar@intel.com Signed-off-by: Rodrigo Vivi --- .../ABI/testing/sysfs-driver-intel-xe-hwmon | 6 +++ drivers/gpu/drm/xe/regs/xe_gt_regs.h | 3 ++ drivers/gpu/drm/xe/xe_hwmon.c | 58 ++++++++++++++++++++++ 3 files changed, 67 insertions(+) (limited to 'drivers/gpu') diff --git a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon index 37263b09b6e4..7f9407c20864 100644 --- a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon +++ b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon @@ -44,5 +44,11 @@ Description: RW. Card reactive critical (I1) power limit in milliamperes. the operating frequency if the power averaged over a window exceeds this limit. +What: /sys/devices/.../hwmon/hwmon/in0_input +Date: September 2023 +KernelVersion: 6.5 +Contact: intel-xe@lists.freedesktop.org +Description: RO. Current Voltage in millivolt. + Only supported for particular Intel xe graphics platforms. diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index b32b8132f8bf..a9a91195c6d5 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -375,6 +375,9 @@ #define GT_GFX_RC6_LOCKED XE_REG(0x138104) #define GT_GFX_RC6 XE_REG(0x138108) +#define GT_PERF_STATUS XE_REG(0x1381b4) +#define VOLTAGE_MASK REG_GENMASK(10, 0) + #define GT_INTR_DW(x) XE_REG(0x190018 + ((x) * 4)) #define GUC_SG_INTR_ENABLE XE_REG(0x190038) diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index 8dff0f90c543..d89345b85e76 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -3,7 +3,9 @@ * Copyright © 2023 Intel Corporation */ +#include #include +#include #include #include "regs/xe_gt_regs.h" @@ -19,6 +21,7 @@ enum xe_hwmon_reg { REG_PKG_RAPL_LIMIT, REG_PKG_POWER_SKU, REG_PKG_POWER_SKU_UNIT, + REG_GT_PERF_STATUS, }; enum xe_hwmon_reg_operation { @@ -32,6 +35,7 @@ enum xe_hwmon_reg_operation { */ #define SF_POWER 1000000 /* microwatts */ #define SF_CURR 1000 /* milliamperes */ +#define SF_VOLTAGE 1000 /* millivolts */ struct xe_hwmon { struct device *hwmon_dev; @@ -64,6 +68,10 @@ static u32 xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg) else if (xe->info.platform == XE_PVC) reg = PVC_GT0_PACKAGE_POWER_SKU_UNIT; break; + case REG_GT_PERF_STATUS: + if (xe->info.platform == XE_DG2) + reg = GT_PERF_STATUS; + break; default: drm_warn(&xe->drm, "Unknown xe hwmon reg id: %d\n", hwmon_reg); break; @@ -189,6 +197,7 @@ static int xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, long *value) static const struct hwmon_channel_info *hwmon_info[] = { HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_CRIT), HWMON_CHANNEL_INFO(curr, HWMON_C_CRIT), + HWMON_CHANNEL_INFO(in, HWMON_I_INPUT), NULL }; @@ -211,6 +220,18 @@ static int xe_hwmon_pcode_write_i1(struct xe_gt *gt, u32 uval) uval); } +static int xe_hwmon_get_voltage(struct xe_hwmon *hwmon, long *value) +{ + u32 reg_val; + + xe_hwmon_process_reg(hwmon, REG_GT_PERF_STATUS, + REG_READ, ®_val, 0, 0); + /* HW register value in units of 2.5 millivolt */ + *value = DIV_ROUND_CLOSEST(REG_FIELD_GET(VOLTAGE_MASK, reg_val) * 2500, SF_VOLTAGE); + + return 0; +} + static umode_t xe_hwmon_power_is_visible(struct xe_hwmon *hwmon, u32 attr, int chan) { @@ -319,6 +340,37 @@ xe_hwmon_curr_write(struct xe_hwmon *hwmon, u32 attr, long val) } } +static umode_t +xe_hwmon_in_is_visible(struct xe_hwmon *hwmon, u32 attr) +{ + switch (attr) { + case hwmon_in_input: + return xe_hwmon_get_reg(hwmon, REG_GT_PERF_STATUS) ? 0444 : 0; + default: + return 0; + } +} + +static int +xe_hwmon_in_read(struct xe_hwmon *hwmon, u32 attr, long *val) +{ + int ret; + + xe_device_mem_access_get(gt_to_xe(hwmon->gt)); + + switch (attr) { + case hwmon_in_input: + ret = xe_hwmon_get_voltage(hwmon, val); + break; + default: + ret = -EOPNOTSUPP; + } + + xe_device_mem_access_put(gt_to_xe(hwmon->gt)); + + return ret; +} + static umode_t xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type, u32 attr, int channel) @@ -335,6 +387,9 @@ xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type, case hwmon_curr: ret = xe_hwmon_curr_is_visible(hwmon, attr); break; + case hwmon_in: + ret = xe_hwmon_in_is_visible(hwmon, attr); + break; default: ret = 0; break; @@ -361,6 +416,9 @@ xe_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, case hwmon_curr: ret = xe_hwmon_curr_read(hwmon, attr, val); break; + case hwmon_in: + ret = xe_hwmon_in_read(hwmon, attr, val); + break; default: ret = -EOPNOTSUPP; break; -- cgit v1.2.3 From 71d0a32524f98ebb5034d74f204b613bf06e6925 Mon Sep 17 00:00:00 2001 From: Badal Nilawar Date: Mon, 25 Sep 2023 13:48:41 +0530 Subject: drm/xe/hwmon: Expose hwmon energy attribute Expose hwmon energy attribute to show device level energy usage v2: - %s/hwm_/hwmon_/ - Convert enums to upper case v3: - %s/hwmon_/xe_hwmon - Remove gt specific hwmon attributes v4: - %s/REG_PKG_ENERGY_STATUS/REG_ENERGY_STATUS_ALL (Riana) - %s/hwmon_energy_info/xe_hwmon_energy_info (Riana) Acked-by: Rodrigo Vivi Reviewed-by: Riana Tauro Signed-off-by: Badal Nilawar Reviewed-by: Andi Shyti Link: https://lore.kernel.org/r/20230925081842.3566834-5-badal.nilawar@intel.com Signed-off-by: Rodrigo Vivi --- .../ABI/testing/sysfs-driver-intel-xe-hwmon | 7 ++ drivers/gpu/drm/xe/regs/xe_gt_regs.h | 2 + drivers/gpu/drm/xe/regs/xe_mchbar_regs.h | 3 + drivers/gpu/drm/xe/xe_hwmon.c | 105 ++++++++++++++++++++- 4 files changed, 116 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon index 7f9407c20864..1a7a6c23e141 100644 --- a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon +++ b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon @@ -52,3 +52,10 @@ Description: RO. Current Voltage in millivolt. Only supported for particular Intel xe graphics platforms. +What: /sys/devices/.../hwmon/hwmon/energy1_input +Date: September 2023 +KernelVersion: 6.5 +Contact: intel-xe@lists.freedesktop.org +Description: RO. Energy input of device in microjoules. + + Only supported for particular Intel xe graphics platforms. diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index a9a91195c6d5..83519a424aab 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -414,8 +414,10 @@ #define XEHPC_BCS5_BCS6_INTR_MASK XE_REG(0x190118) #define XEHPC_BCS7_BCS8_INTR_MASK XE_REG(0x19011c) +#define PVC_GT0_PACKAGE_ENERGY_STATUS XE_REG(0x281004) #define PVC_GT0_PACKAGE_RAPL_LIMIT XE_REG(0x281008) #define PVC_GT0_PACKAGE_POWER_SKU_UNIT XE_REG(0x281068) +#define PVC_GT0_PLATFORM_ENERGY_STATUS XE_REG(0x28106c) #define PVC_GT0_PACKAGE_POWER_SKU XE_REG(0x281080) #endif diff --git a/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h b/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h index 27f1d42baf6d..d8ecbe1858d1 100644 --- a/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h @@ -25,6 +25,9 @@ #define PCU_CR_PACKAGE_POWER_SKU_UNIT XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x5938) #define PKG_PWR_UNIT REG_GENMASK(3, 0) +#define PKG_ENERGY_UNIT REG_GENMASK(12, 8) + +#define PCU_CR_PACKAGE_ENERGY_STATUS XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x593c) #define PCU_CR_PACKAGE_RAPL_LIMIT XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x59a0) #define PKG_PWR_LIM_1 REG_GENMASK(14, 0) diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index d89345b85e76..734fcca9f71f 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -22,6 +22,7 @@ enum xe_hwmon_reg { REG_PKG_POWER_SKU, REG_PKG_POWER_SKU_UNIT, REG_GT_PERF_STATUS, + REG_PKG_ENERGY_STATUS, }; enum xe_hwmon_reg_operation { @@ -36,12 +37,20 @@ enum xe_hwmon_reg_operation { #define SF_POWER 1000000 /* microwatts */ #define SF_CURR 1000 /* milliamperes */ #define SF_VOLTAGE 1000 /* millivolts */ +#define SF_ENERGY 1000000 /* microjoules */ + +struct xe_hwmon_energy_info { + u32 reg_val_prev; + long accum_energy; /* Accumulated energy for energy1_input */ +}; struct xe_hwmon { struct device *hwmon_dev; struct xe_gt *gt; struct mutex hwmon_lock; /* rmw operations*/ int scl_shift_power; + int scl_shift_energy; + struct xe_hwmon_energy_info ei; /* Energy info for energy1_input */ }; static u32 xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg) @@ -72,6 +81,12 @@ static u32 xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg) if (xe->info.platform == XE_DG2) reg = GT_PERF_STATUS; break; + case REG_PKG_ENERGY_STATUS: + if (xe->info.platform == XE_DG2) + reg = PCU_CR_PACKAGE_ENERGY_STATUS; + else if (xe->info.platform == XE_PVC) + reg = PVC_GT0_PLATFORM_ENERGY_STATUS; + break; default: drm_warn(&xe->drm, "Unknown xe hwmon reg id: %d\n", hwmon_reg); break; @@ -194,10 +209,59 @@ static int xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, long *value) return 0; } +/* + * xe_hwmon_energy_get - Obtain energy value + * + * The underlying energy hardware register is 32-bits and is subject to + * overflow. How long before overflow? For example, with an example + * scaling bit shift of 14 bits (see register *PACKAGE_POWER_SKU_UNIT) and + * a power draw of 1000 watts, the 32-bit counter will overflow in + * approximately 4.36 minutes. + * + * Examples: + * 1 watt: (2^32 >> 14) / 1 W / (60 * 60 * 24) secs/day -> 3 days + * 1000 watts: (2^32 >> 14) / 1000 W / 60 secs/min -> 4.36 minutes + * + * The function significantly increases overflow duration (from 4.36 + * minutes) by accumulating the energy register into a 'long' as allowed by + * the hwmon API. Using x86_64 128 bit arithmetic (see mul_u64_u32_shr()), + * a 'long' of 63 bits, SF_ENERGY of 1e6 (~20 bits) and + * hwmon->scl_shift_energy of 14 bits we have 57 (63 - 20 + 14) bits before + * energy1_input overflows. This at 1000 W is an overflow duration of 278 years. + */ +static void +xe_hwmon_energy_get(struct xe_hwmon *hwmon, long *energy) +{ + struct xe_hwmon_energy_info *ei = &hwmon->ei; + u32 reg_val; + + xe_device_mem_access_get(gt_to_xe(hwmon->gt)); + + mutex_lock(&hwmon->hwmon_lock); + + xe_hwmon_process_reg(hwmon, REG_PKG_ENERGY_STATUS, REG_READ, + ®_val, 0, 0); + + if (reg_val >= ei->reg_val_prev) + ei->accum_energy += reg_val - ei->reg_val_prev; + else + ei->accum_energy += UINT_MAX - ei->reg_val_prev + reg_val; + + ei->reg_val_prev = reg_val; + + *energy = mul_u64_u32_shr(ei->accum_energy, SF_ENERGY, + hwmon->scl_shift_energy); + + mutex_unlock(&hwmon->hwmon_lock); + + xe_device_mem_access_put(gt_to_xe(hwmon->gt)); +} + static const struct hwmon_channel_info *hwmon_info[] = { HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_CRIT), HWMON_CHANNEL_INFO(curr, HWMON_C_CRIT), HWMON_CHANNEL_INFO(in, HWMON_I_INPUT), + HWMON_CHANNEL_INFO(energy, HWMON_E_INPUT), NULL }; @@ -371,6 +435,29 @@ xe_hwmon_in_read(struct xe_hwmon *hwmon, u32 attr, long *val) return ret; } +static umode_t +xe_hwmon_energy_is_visible(struct xe_hwmon *hwmon, u32 attr) +{ + switch (attr) { + case hwmon_energy_input: + return xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS) ? 0444 : 0; + default: + return 0; + } +} + +static int +xe_hwmon_energy_read(struct xe_hwmon *hwmon, u32 attr, long *val) +{ + switch (attr) { + case hwmon_energy_input: + xe_hwmon_energy_get(hwmon, val); + return 0; + default: + return -EOPNOTSUPP; + } +} + static umode_t xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type, u32 attr, int channel) @@ -390,6 +477,9 @@ xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type, case hwmon_in: ret = xe_hwmon_in_is_visible(hwmon, attr); break; + case hwmon_energy: + ret = xe_hwmon_energy_is_visible(hwmon, attr); + break; default: ret = 0; break; @@ -419,6 +509,9 @@ xe_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, case hwmon_in: ret = xe_hwmon_in_read(hwmon, attr, val); break; + case hwmon_energy: + ret = xe_hwmon_energy_read(hwmon, attr, val); + break; default: ret = -EOPNOTSUPP; break; @@ -470,6 +563,7 @@ static void xe_hwmon_get_preregistration_info(struct xe_device *xe) { struct xe_hwmon *hwmon = xe->hwmon; + long energy; u32 val_sku_unit = 0; int ret; @@ -478,8 +572,17 @@ xe_hwmon_get_preregistration_info(struct xe_device *xe) * The contents of register PKG_POWER_SKU_UNIT do not change, * so read it once and store the shift values. */ - if (!ret) + if (!ret) { hwmon->scl_shift_power = REG_FIELD_GET(PKG_PWR_UNIT, val_sku_unit); + hwmon->scl_shift_energy = REG_FIELD_GET(PKG_ENERGY_UNIT, val_sku_unit); + } + + /* + * Initialize 'struct xe_hwmon_energy_info', i.e. set fields to the + * first value of the energy register read + */ + if (xe_hwmon_is_visible(hwmon, hwmon_energy, hwmon_energy_input, 0)) + xe_hwmon_energy_get(hwmon, &energy); } void xe_hwmon_register(struct xe_device *xe) -- cgit v1.2.3 From 5f01a35b10f3d2f55634a471c43e59e3c6f239fd Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Fri, 29 Sep 2023 10:31:02 -0700 Subject: drm/xe/vm: print the correct 'keep' when printing gpuva ops Unions are cool, until they aren't. Signed-off-by: Paulo Zanoni Reviewed-by: Niranjana Vishwanathapura Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 42a5d912e775..ae5578a3e121 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2345,7 +2345,7 @@ static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) vma = gpuva_to_vma(op->remap.unmap->va); vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), - op->unmap.keep ? 1 : 0); + op->remap.unmap->keep ? 1 : 0); if (op->remap.prev) vm_dbg(&xe->drm, "REMAP:PREV: addr=0x%016llx, range=0x%016llx", -- cgit v1.2.3 From 66aca8f04bb982b9f429fbce384beaa4badae21a Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Fri, 29 Sep 2023 10:31:03 -0700 Subject: drm/xe/vm: use list_last_entry() to fetch last_op I would imagine that it's more efficient to fetch ops_list->prev than to walk the whole list forward. Signed-off-by: Paulo Zanoni Reviewed-by: Niranjana Vishwanathapura Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index ae5578a3e121..b61ed51b503d 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3118,8 +3118,7 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, lockdep_assert_held_write(&vm->lock); - list_for_each_entry(op, ops_list, link) - last_op = op; + last_op = list_last_entry(ops_list, struct xe_vma_op, link); if (!async) { err = xe_vma_op_execute(vm, last_op); -- cgit v1.2.3 From 0e1a234618a86cd4f920a09cfe9ac35f87e8c3f6 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Fri, 29 Sep 2023 10:31:04 -0700 Subject: drm/xe: fix range printing for debug messages We're already using the half-open interval notation "[A, B)", that "- 1" there makes it wrong. Also, getting rid of the "-1" makes it much easier to grep for the logs when you're looking for an address that's the end of a vma and the start of another. Signed-off-by: Paulo Zanoni Reviewed-by: Niranjana Vishwanathapura Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index d5f721efdc3c..a7249b2d807c 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1265,7 +1265,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue vm_dbg(&xe_vma_vm(vma)->xe->drm, "Preparing bind, with range [%llx...%llx) engine %p.\n", - xe_vma_start(vma), xe_vma_end(vma) - 1, q); + xe_vma_start(vma), xe_vma_end(vma), q); err = xe_pt_prepare_bind(tile, vma, entries, &num_entries, rebind); if (err) @@ -1618,7 +1618,7 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu vm_dbg(&xe_vma_vm(vma)->xe->drm, "Preparing unbind, with range [%llx...%llx) engine %p.\n", - xe_vma_start(vma), xe_vma_end(vma) - 1, q); + xe_vma_start(vma), xe_vma_end(vma), q); num_entries = xe_pt_stage_unbind(tile, vma, entries); xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries)); -- cgit v1.2.3 From f24cf6cea519cd5c8110ac8dcbdad70e9f2dfb22 Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Tue, 3 Oct 2023 10:21:30 -0700 Subject: drm/xe: Fix RING_MI_MODE label in devcoredump MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a typo in RING_MI_MODE label. Signed-off-by: José Roberto de Souza Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_hw_engine.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 49128f640e15..dc9dd83d99c5 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -770,7 +770,7 @@ void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, drm_printf(p, "\tRING_HEAD: 0x%08x\n", snapshot->reg.ring_head); drm_printf(p, "\tRING_TAIL: 0x%08x\n", snapshot->reg.ring_tail); drm_printf(p, "\tRING_CTL: 0x%08x\n", snapshot->reg.ring_ctl); - drm_printf(p, "\tRING_MODE: 0x%08x\n", snapshot->reg.ring_mi_mode); + drm_printf(p, "\tRING_MI_MODE: 0x%08x\n", snapshot->reg.ring_mi_mode); drm_printf(p, "\tRING_MODE: 0x%08x\n", snapshot->reg.ring_mode); drm_printf(p, "\tRING_IMR: 0x%08x\n", snapshot->reg.ring_imr); -- cgit v1.2.3 From d32c49e318df0a3f334c2d2ff95ce4600df2d6bf Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Tue, 3 Oct 2023 13:16:28 -0700 Subject: drm/xe: Fix devcoredump readout of IPEHR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It was reading (base) + 0x8c but that is not a valid register and instead it should read (base) + 0x68. So here reading the correct register and removing the wrong and duplicated. Reviewed-by: Rodrigo Vivi Signed-off-by: José Roberto de Souza Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_engine_regs.h | 1 - drivers/gpu/drm/xe/xe_hw_engine.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index 692213d09cea..792d431161c6 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -37,7 +37,6 @@ #define RING_DMA_FADD(base) XE_REG((base) + 0x78) #define RING_HWS_PGA(base) XE_REG((base) + 0x80) #define IPEIR(base) XE_REG((base) + 0x88) -#define IPEHR(base) XE_REG((base) + 0x8c) #define RING_HWSTAM(base) XE_REG((base) + 0x98) #define RING_MI_MODE(base) XE_REG((base) + 0x9c) #define RING_NOPID(base) XE_REG((base) + 0x94) diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index dc9dd83d99c5..c2db391cf267 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -728,7 +728,7 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe) snapshot->reg.ring_dma_fadd = hw_engine_mmio_read32(hwe, RING_DMA_FADD(0)); snapshot->reg.ipeir = hw_engine_mmio_read32(hwe, IPEIR(0)); - snapshot->reg.ipehr = hw_engine_mmio_read32(hwe, IPEHR(0)); + snapshot->reg.ipehr = hw_engine_mmio_read32(hwe, RING_IPEHR(0)); if (snapshot->class == XE_ENGINE_CLASS_COMPUTE) snapshot->reg.rcu_mode = xe_mmio_read32(hwe->gt, RCU_MODE); -- cgit v1.2.3 From a8e2e0d7fab79b83cdc3bb2dd192c94564fa4298 Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Tue, 3 Oct 2023 13:19:06 -0700 Subject: drm/xe: Remove devcoredump readout of IPEIR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This register don't exist in gfx12+, so here dropping the readout and print in devcoredump. Reviewed-by: Rodrigo Vivi Signed-off-by: José Roberto de Souza Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_engine_regs.h | 2 -- drivers/gpu/drm/xe/xe_hw_engine.c | 2 -- drivers/gpu/drm/xe/xe_hw_engine_types.h | 2 -- 3 files changed, 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index 792d431161c6..35dd4837dd75 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -31,12 +31,10 @@ #define RING_ACTHD_UDW(base) XE_REG((base) + 0x5c) #define RING_DMA_FADD_UDW(base) XE_REG((base) + 0x60) -#define RING_IPEIR(base) XE_REG((base) + 0x64) #define RING_IPEHR(base) XE_REG((base) + 0x68) #define RING_ACTHD(base) XE_REG((base) + 0x74) #define RING_DMA_FADD(base) XE_REG((base) + 0x78) #define RING_HWS_PGA(base) XE_REG((base) + 0x80) -#define IPEIR(base) XE_REG((base) + 0x88) #define RING_HWSTAM(base) XE_REG((base) + 0x98) #define RING_MI_MODE(base) XE_REG((base) + 0x9c) #define RING_NOPID(base) XE_REG((base) + 0x94) diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index c2db391cf267..f63c821baeb7 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -727,7 +727,6 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe) hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0)); snapshot->reg.ring_dma_fadd = hw_engine_mmio_read32(hwe, RING_DMA_FADD(0)); - snapshot->reg.ipeir = hw_engine_mmio_read32(hwe, IPEIR(0)); snapshot->reg.ipehr = hw_engine_mmio_read32(hwe, RING_IPEHR(0)); if (snapshot->class == XE_ENGINE_CLASS_COMPUTE) @@ -784,7 +783,6 @@ void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, drm_printf(p, "\tDMA_FADDR: 0x%08x_%08x\n", snapshot->reg.ring_dma_fadd_udw, snapshot->reg.ring_dma_fadd); - drm_printf(p, "\tIPEIR: 0x%08x\n", snapshot->reg.ipeir); drm_printf(p, "\tIPEHR: 0x%08x\n\n", snapshot->reg.ipehr); if (snapshot->class == XE_ENGINE_CLASS_COMPUTE) drm_printf(p, "\tRCU_MODE: 0x%08x\n", diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h index cd4bc1412a3f..5d4ee2904240 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_types.h +++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h @@ -217,8 +217,6 @@ struct xe_hw_engine_snapshot { u32 ring_dma_fadd_udw; /** @ring_dma_fadd: RING_DMA_FADD */ u32 ring_dma_fadd; - /** @ipeir: IPEIR */ - u32 ipeir; /** @ipehr: IPEHR */ u32 ipehr; /** @rcu_mode: RCU_MODE */ -- cgit v1.2.3 From 5708a1080a2e455ca9f35e372f107d0c030358de Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 4 Oct 2023 08:03:17 -0700 Subject: drm/xe/xe2: Add missing mocs entry Add index 4 so WB on both L3 and L4 can be used by userspace. Bspec: 71582 Link: https://lore.kernel.org/all/7oqovb356dx2hm5muop3xjqr4kv7m5fzjisch3vmsmxm33ygtv@eib4jielia35/ Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20231004150317.3473731-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_mocs.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index ada3114be4fa..19a8146ded9a 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -385,6 +385,8 @@ static const struct xe_mocs_entry xe2_mocs_table[] = { MOCS_ENTRY(2, IG_PAT | XE2_L3_3_UC | L4_0_WB, 0), /* Uncached L3 + L4 */ MOCS_ENTRY(3, IG_PAT | XE2_L3_3_UC | L4_3_UC, 0), + /* Cached L3 + L4 */ + MOCS_ENTRY(4, IG_PAT | XE2_L3_0_WB | L4_0_WB, 0), }; static unsigned int get_mocs_settings(struct xe_device *xe, -- cgit v1.2.3 From 3a13c2de442d6bfaef9c102cd1092e6cae22b753 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 5 Oct 2023 17:38:55 +0100 Subject: drm/xe/hwmon: fix uaf on unload It doesn't look like you can mix and match devm_ and drmmm_ for a managed resource. For drmmm the resources are all tracked in drm with its own list, and there is only one devm_ resource for the entire list. If the driver itself also adds some of its own devm resources, then those will be released first. In the case of hwmon the devm_kzalloc will be freed before the drmmm_ action to destroy the mutex allocated within, leading to uaf. Since hwmon itself wants to use devm, rather use that for the mutex destroy. Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/766 Signed-off-by: Matthew Auld Cc: Badal Nilawar Cc: Rodrigo Vivi Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_hwmon.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index 734fcca9f71f..9ac05994a967 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -585,6 +585,13 @@ xe_hwmon_get_preregistration_info(struct xe_device *xe) xe_hwmon_energy_get(hwmon, &energy); } +static void xe_hwmon_mutex_destroy(void *arg) +{ + struct xe_hwmon *hwmon = arg; + + mutex_destroy(&hwmon->hwmon_lock); +} + void xe_hwmon_register(struct xe_device *xe) { struct device *dev = xe->drm.dev; @@ -600,7 +607,9 @@ void xe_hwmon_register(struct xe_device *xe) xe->hwmon = hwmon; - drmm_mutex_init(&xe->drm, &hwmon->hwmon_lock); + mutex_init(&hwmon->hwmon_lock); + if (devm_add_action_or_reset(dev, xe_hwmon_mutex_destroy, hwmon)) + return; /* primary GT to access device level properties */ hwmon->gt = xe->tiles[0].primary_gt; -- cgit v1.2.3 From a617b3048abea1cb424963f4354941b335d5a911 Mon Sep 17 00:00:00 2001 From: Matt Atwood Date: Fri, 6 Oct 2023 09:11:47 -0700 Subject: drm/xe: Add infrastructure for per engine tuning Add the infrastructure for per engine tuning in preparation for disable indirect state. v3: Rebase v4: Fix rebasing issues Reviewed-by: Matt Roper Signed-off-by: Matt Atwood Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_hw_engine.c | 2 ++ drivers/gpu/drm/xe/xe_tuning.c | 12 ++++++++++++ drivers/gpu/drm/xe/xe_tuning.h | 1 + 3 files changed, 15 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index f63c821baeb7..b5b084590888 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -25,6 +25,7 @@ #include "xe_reg_sr.h" #include "xe_rtp.h" #include "xe_sched_job.h" +#include "xe_tuning.h" #include "xe_wa.h" #define MAX_MMIO_BASES 3 @@ -405,6 +406,7 @@ static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe, } xe_reg_sr_init(&hwe->reg_sr, hwe->name, gt_to_xe(gt)); + xe_tuning_process_engine(hwe); xe_wa_process_engine(hwe); hw_engine_setup_default_state(hwe); diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index 08174dda9bc7..3ad11c259300 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -27,6 +27,10 @@ static const struct xe_rtp_entry_sr gt_tunings[] = { {} }; +static const struct xe_rtp_entry_sr engine_tunings[] = { + {} +}; + static const struct xe_rtp_entry_sr lrc_tunings[] = { { XE_RTP_NAME("Tuning: ganged timer, also known as 16011163337"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), @@ -65,6 +69,14 @@ void xe_tuning_process_gt(struct xe_gt *gt) } EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_gt); +void xe_tuning_process_engine(struct xe_hw_engine *hwe) +{ + struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); + + xe_rtp_process_to_sr(&ctx, engine_tunings, &hwe->reg_sr); +} +EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_engine); + /** * xe_tuning_process_lrc - process lrc tunings * @hwe: engine instance to process tunings for diff --git a/drivers/gpu/drm/xe/xe_tuning.h b/drivers/gpu/drm/xe/xe_tuning.h index 2b95b0c8effc..4f9c3ac3b516 100644 --- a/drivers/gpu/drm/xe/xe_tuning.h +++ b/drivers/gpu/drm/xe/xe_tuning.h @@ -10,6 +10,7 @@ struct xe_gt; struct xe_hw_engine; void xe_tuning_process_gt(struct xe_gt *gt); +void xe_tuning_process_engine(struct xe_hw_engine *hwe); void xe_tuning_process_lrc(struct xe_hw_engine *hwe); #endif -- cgit v1.2.3 From f8ebadd0df248d7f0b5060fd8a0d956e773d9d78 Mon Sep 17 00:00:00 2001 From: Matt Atwood Date: Fri, 6 Oct 2023 09:47:59 -0700 Subject: drm/xe: add gt tuning for indirect state Force indirect state sampler data to only be in the dynamic state pool, which is more convienent for the UMD. Behavior change mirrors similar change for i915 in commit 16fc9c08f0ec ("drm/i915: disable sampler indirect state in bindless heap") v2: split out per engine tuning into separate patch, commit message (Lucas) v3: rebase v4: Change to match render only, g.ver 1200 to 1271 (MattR) Acked-by: Lionel Landwerlin Reviewed-by: Matt Roper Signed-off-by: Matt Atwood Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 1 + drivers/gpu/drm/xe/xe_tuning.c | 5 +++++ 2 files changed, 6 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 83519a424aab..cd1821d96a5d 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -303,6 +303,7 @@ #define ENABLE_SMALLPL REG_BIT(15) #define SC_DISABLE_POWER_OPTIMIZATION_EBB REG_BIT(9) #define SAMPLER_ENABLE_HEADLESS_MSG REG_BIT(5) +#define INDIRECT_STATE_BASE_ADDR_OVERRIDE REG_BIT(0) #define HALF_SLICE_CHICKEN7 XE_REG_MCR(0xe194, XE_REG_OPTION_MASKED) #define DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA REG_BIT(15) diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index 3ad11c259300..d70519816522 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -28,6 +28,11 @@ static const struct xe_rtp_entry_sr gt_tunings[] = { }; static const struct xe_rtp_entry_sr engine_tunings[] = { + { XE_RTP_NAME("Tuning: Set Indirect State Override"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1271), + ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(SAMPLER_MODE, INDIRECT_STATE_BASE_ADDR_OVERRIDE)) + }, {} }; -- cgit v1.2.3 From 285230832eb794dfd1c9dc63d80367a714dbf75f Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 28 Sep 2023 22:02:48 -0700 Subject: drm/xe/vm: Prefer xe_assert() over XE_WARN_ON() When xelp_pte_encode_addr() was added in commit 23c8495efeed ("drm/xe/migrate: Do not hand-encode pte"), there was no xe pointer for using xe_assert(). This is not the case anymore, so prefer it over XE_WARN_ON(). Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index b61ed51b503d..10ed72228946 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -17,6 +17,7 @@ #include #include +#include "xe_assert.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_drm_client.h" @@ -1312,7 +1313,7 @@ static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr, u64 pte; /* Avoid passing random bits directly as flags */ - XE_WARN_ON(flags & ~XE_PTE_PS64); + xe_assert(xe, !(flags & ~XE_PTE_PS64)); pte = addr; pte |= XE_PAGE_PRESENT | XE_PAGE_RW; -- cgit v1.2.3 From dfc83d4293f3f0b26d38952b3e491c1ed5f36b38 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 28 Sep 2023 22:02:49 -0700 Subject: drm/xe/xe2: Follow XeHPC for TLB invalidation Register GUC_TLB_INV_CR is gone in xe2. When GuC submission is not yet enabled, make sure to follow the same path as XeHPC. Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ggtt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index ec7bbb1dc295..06732461246d 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -263,7 +263,7 @@ static void ggtt_invalidate_gt_tlb(struct xe_gt *gt) } else if (xe_device_uc_enabled(gt_to_xe(gt))) { struct xe_device *xe = gt_to_xe(gt); - if (xe->info.platform == XE_PVC) { + if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) { xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC1, PVC_GUC_TLB_INV_DESC1_INVALIDATE); xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC0, -- cgit v1.2.3 From d9e85dd5c24d9503391440c65a09fdc69d486d55 Mon Sep 17 00:00:00 2001 From: David Kershner Date: Thu, 5 Oct 2023 18:00:39 -0400 Subject: drm/xe/xe_migrate.c: Use DPA offset for page table entries. Device Physical Address (DPA) is the starting offset device memory. Update xe_migrate identity map base PTE entries to start at dpa_base instead of 0. The VM offset value should be 0 relative instead of DPA relative. Reviewed-by: Niranjana Vishwanathapura Reviewed-by: "Michael J. Ruhl" Signed-off-by: David Kershner Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_migrate.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 4b7210c793f5..4dc52ac26d52 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -114,8 +114,13 @@ static u64 xe_migrate_vm_addr(u64 slot, u32 level) return (slot + 1ULL) << xe_pt_shift(level + 1); } -static u64 xe_migrate_vram_ofs(u64 addr) +static u64 xe_migrate_vram_ofs(struct xe_device *xe, u64 addr) { + /* + * Remove the DPA to get a correct offset into identity table for the + * migrate offset + */ + addr -= xe->mem.vram.dpa_base; return addr + (256ULL << xe_pt_shift(2)); } @@ -149,7 +154,7 @@ static int xe_migrate_create_cleared_bo(struct xe_migrate *m, struct xe_vm *vm) xe_map_memset(xe, &m->cleared_bo->vmap, 0, 0x00, cleared_size); vram_addr = xe_bo_addr(m->cleared_bo, 0, XE_PAGE_SIZE); - m->cleared_vram_ofs = xe_migrate_vram_ofs(vram_addr); + m->cleared_vram_ofs = xe_migrate_vram_ofs(xe, vram_addr); return 0; } @@ -225,12 +230,12 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, } else { u64 batch_addr = xe_bo_addr(batch, 0, XE_PAGE_SIZE); - m->batch_base_ofs = xe_migrate_vram_ofs(batch_addr); + m->batch_base_ofs = xe_migrate_vram_ofs(xe, batch_addr); if (xe->info.supports_usm) { batch = tile->primary_gt->usm.bb_pool->bo; batch_addr = xe_bo_addr(batch, 0, XE_PAGE_SIZE); - m->usm_batch_base_ofs = xe_migrate_vram_ofs(batch_addr); + m->usm_batch_base_ofs = xe_migrate_vram_ofs(xe, batch_addr); } } @@ -268,7 +273,9 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, * Use 1GB pages, it shouldn't matter the physical amount of * vram is less, when we don't access it. */ - for (pos = 0; pos < xe->mem.vram.actual_physical_size; pos += SZ_1G, ofs += 8) + for (pos = xe->mem.vram.dpa_base; + pos < xe->mem.vram.actual_physical_size + xe->mem.vram.dpa_base; + pos += SZ_1G, ofs += 8) xe_map_wr(xe, &bo->vmap, ofs, u64, pos | flags); } @@ -443,8 +450,8 @@ static u32 pte_update_size(struct xe_migrate *m, cmds += cmd_size; } else { /* Offset into identity map. */ - *L0_ofs = xe_migrate_vram_ofs(cur->start + - vram_region_gpu_offset(res)); + *L0_ofs = xe_migrate_vram_ofs(tile_to_xe(m->tile), + cur->start + vram_region_gpu_offset(res)); cmds += cmd_size; } @@ -1060,10 +1067,10 @@ static void write_pgtable(struct xe_tile *tile, struct xe_bb *bb, u64 ppgtt_ofs, * pages are used. Hence the assert. */ xe_tile_assert(tile, update->qwords <= 0x1ff); - if (!ppgtt_ofs) { - ppgtt_ofs = xe_migrate_vram_ofs(xe_bo_addr(update->pt_bo, 0, + if (!ppgtt_ofs) + ppgtt_ofs = xe_migrate_vram_ofs(tile_to_xe(tile), + xe_bo_addr(update->pt_bo, 0, XE_PAGE_SIZE)); - } do { u64 addr = ppgtt_ofs + ofs * 8; -- cgit v1.2.3 From cf0b9e94c8c755ae94787d638c655bb38e7a8048 Mon Sep 17 00:00:00 2001 From: David Kershner Date: Thu, 5 Oct 2023 18:00:40 -0400 Subject: drm/xe/tests/xe_migrate.c: Add vram to vram KUNIT test Add missing kunit test to migrate a bo from vram to vram Reviewed-by: Niranjana Vishwanathapura Reviewed-by: "Michael J. Ruhl" Signed-off-by: David Kershner Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_migrate.c | 103 ++++++++++++++++++++++------------ 1 file changed, 66 insertions(+), 37 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index 6906ff9d9c31..0db4b651ff1a 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -99,7 +99,7 @@ static const struct xe_migrate_pt_update_ops sanity_ops = { } } while (0) static void test_copy(struct xe_migrate *m, struct xe_bo *bo, - struct kunit *test) + struct kunit *test, u32 region) { struct xe_device *xe = tile_to_xe(m->tile); u64 retval, expected = 0; @@ -108,83 +108,104 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, const char *str = big ? "Copying big bo" : "Copying small bo"; int err; - struct xe_bo *sysmem = xe_bo_create_locked(xe, m->tile, NULL, + struct xe_bo *remote = xe_bo_create_locked(xe, m->tile, NULL, bo->size, ttm_bo_type_kernel, - XE_BO_CREATE_SYSTEM_BIT | + region | XE_BO_NEEDS_CPU_ACCESS); - if (IS_ERR(sysmem)) { - KUNIT_FAIL(test, "Failed to allocate sysmem bo for %s: %li\n", - str, PTR_ERR(sysmem)); + if (IS_ERR(remote)) { + KUNIT_FAIL(test, "Failed to allocate remote bo for %s: %li\n", + str, PTR_ERR(remote)); return; } - err = xe_bo_validate(sysmem, NULL, false); + err = xe_bo_validate(remote, NULL, false); if (err) { KUNIT_FAIL(test, "Failed to validate system bo for %s: %li\n", str, err); goto out_unlock; } - err = xe_bo_vmap(sysmem); + err = xe_bo_vmap(remote); if (err) { KUNIT_FAIL(test, "Failed to vmap system bo for %s: %li\n", str, err); goto out_unlock; } - xe_map_memset(xe, &sysmem->vmap, 0, 0xd0, sysmem->size); - fence = xe_migrate_clear(m, sysmem, sysmem->ttm.resource); - if (!sanity_fence_failed(xe, fence, big ? "Clearing sysmem big bo" : - "Clearing sysmem small bo", test)) { - retval = xe_map_rd(xe, &sysmem->vmap, 0, u64); - check(retval, expected, "sysmem first offset should be cleared", + xe_map_memset(xe, &remote->vmap, 0, 0xd0, remote->size); + fence = xe_migrate_clear(m, remote, remote->ttm.resource); + if (!sanity_fence_failed(xe, fence, big ? "Clearing remote big bo" : + "Clearing remote small bo", test)) { + retval = xe_map_rd(xe, &remote->vmap, 0, u64); + check(retval, expected, "remote first offset should be cleared", test); - retval = xe_map_rd(xe, &sysmem->vmap, sysmem->size - 8, u64); - check(retval, expected, "sysmem last offset should be cleared", + retval = xe_map_rd(xe, &remote->vmap, remote->size - 8, u64); + check(retval, expected, "remote last offset should be cleared", test); } dma_fence_put(fence); - /* Try to copy 0xc0 from sysmem to vram with 2MB or 64KiB/4KiB pages */ - xe_map_memset(xe, &sysmem->vmap, 0, 0xc0, sysmem->size); + /* Try to copy 0xc0 from remote to vram with 2MB or 64KiB/4KiB pages */ + xe_map_memset(xe, &remote->vmap, 0, 0xc0, remote->size); xe_map_memset(xe, &bo->vmap, 0, 0xd0, bo->size); expected = 0xc0c0c0c0c0c0c0c0; - fence = xe_migrate_copy(m, sysmem, bo, sysmem->ttm.resource, + fence = xe_migrate_copy(m, remote, bo, remote->ttm.resource, bo->ttm.resource); - if (!sanity_fence_failed(xe, fence, big ? "Copying big bo sysmem -> vram" : - "Copying small bo sysmem -> vram", test)) { + if (!sanity_fence_failed(xe, fence, big ? "Copying big bo remote -> vram" : + "Copying small bo remote -> vram", test)) { retval = xe_map_rd(xe, &bo->vmap, 0, u64); check(retval, expected, - "sysmem -> vram bo first offset should be copied", test); + "remote -> vram bo first offset should be copied", test); retval = xe_map_rd(xe, &bo->vmap, bo->size - 8, u64); check(retval, expected, - "sysmem -> vram bo offset should be copied", test); + "remote -> vram bo offset should be copied", test); } dma_fence_put(fence); /* And other way around.. slightly hacky.. */ - xe_map_memset(xe, &sysmem->vmap, 0, 0xd0, sysmem->size); + xe_map_memset(xe, &remote->vmap, 0, 0xd0, remote->size); xe_map_memset(xe, &bo->vmap, 0, 0xc0, bo->size); - fence = xe_migrate_copy(m, bo, sysmem, bo->ttm.resource, - sysmem->ttm.resource); - if (!sanity_fence_failed(xe, fence, big ? "Copying big bo vram -> sysmem" : - "Copying small bo vram -> sysmem", test)) { - retval = xe_map_rd(xe, &sysmem->vmap, 0, u64); + fence = xe_migrate_copy(m, bo, remote, bo->ttm.resource, + remote->ttm.resource); + if (!sanity_fence_failed(xe, fence, big ? "Copying big bo vram -> remote" : + "Copying small bo vram -> remote", test)) { + retval = xe_map_rd(xe, &remote->vmap, 0, u64); check(retval, expected, - "vram -> sysmem bo first offset should be copied", test); - retval = xe_map_rd(xe, &sysmem->vmap, bo->size - 8, u64); + "vram -> remote bo first offset should be copied", test); + retval = xe_map_rd(xe, &remote->vmap, bo->size - 8, u64); check(retval, expected, - "vram -> sysmem bo last offset should be copied", test); + "vram -> remote bo last offset should be copied", test); } dma_fence_put(fence); - xe_bo_vunmap(sysmem); + xe_bo_vunmap(remote); out_unlock: - xe_bo_unlock(sysmem); - xe_bo_put(sysmem); + xe_bo_unlock(remote); + xe_bo_put(remote); +} + +static void test_copy_sysmem(struct xe_migrate *m, struct xe_bo *bo, + struct kunit *test) +{ + test_copy(m, bo, test, XE_BO_CREATE_SYSTEM_BIT); +} + +static void test_copy_vram(struct xe_migrate *m, struct xe_bo *bo, + struct kunit *test) +{ + u32 region; + + if (bo->ttm.resource->mem_type == XE_PL_SYSTEM) + return; + + if (bo->ttm.resource->mem_type == XE_PL_VRAM0) + region = XE_BO_CREATE_VRAM1_BIT; + else + region = XE_BO_CREATE_VRAM0_BIT; + test_copy(m, bo, test, region); } static void test_pt_update(struct xe_migrate *m, struct xe_bo *pt, @@ -349,7 +370,11 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) check(retval, expected, "Command clear small last value", test); kunit_info(test, "Copying small buffer object to system\n"); - test_copy(m, tiny, test); + test_copy_sysmem(m, tiny, test); + if (xe->info.tile_count > 1) { + kunit_info(test, "Copying small buffer object to other vram\n"); + test_copy_vram(m, tiny, test); + } /* Clear a big bo */ kunit_info(test, "Clearing big buffer object\n"); @@ -366,7 +391,11 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) check(retval, expected, "Command clear big last value", test); kunit_info(test, "Copying big buffer object to system\n"); - test_copy(m, big, test); + test_copy_sysmem(m, big, test); + if (xe->info.tile_count > 1) { + kunit_info(test, "Copying big buffer object to other vram\n"); + test_copy_vram(m, big, test); + } kunit_info(test, "Testing page table update using CPU if GPU idle.\n"); test_pt_update(m, pt, test, false); -- cgit v1.2.3 From 811aa4d2074a9e64baeaa4bbc2773ead6247f101 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 6 Oct 2023 11:23:22 -0700 Subject: drm/xe/xe2: Program PAT tables The PAT tables become significantly more complicated on Xe2 platforms. They now control L3, L4, and coherency settings, as well as additional characteristics such as compression. Aside from the main PAT table, there's an additional register that also needs to be programmed with PAT settings for PCI Address Translation Services. Bspec: 71582 Signed-off-by: Matt Roper Reviewed-by: Balasubramani Vivekanandan Link: https://lore.kernel.org/r/20231006182325.3617685-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pat.c | 92 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 91 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index 4668ca3932c5..296763594370 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -10,10 +10,18 @@ #include "xe_gt_mcr.h" #include "xe_mmio.h" +#define _PAT_ATS 0x47fc #define _PAT_INDEX(index) _PICK_EVEN_2RANGES(index, 8, \ 0x4800, 0x4804, \ 0x4848, 0x484c) +#define XE2_NO_PROMOTE REG_BIT(10) +#define XE2_COMP_EN REG_BIT(9) +#define XE2_L3_CLOS REG_GENMASK(7, 6) +#define XE2_L3_POLICY REG_GENMASK(5, 4) +#define XE2_L4_POLICY REG_GENMASK(3, 2) +#define XE2_COH_MODE REG_GENMASK(1, 0) + #define XELPG_L4_POLICY_MASK REG_GENMASK(3, 2) #define XELPG_PAT_3_UC REG_FIELD_PREP(XELPG_L4_POLICY_MASK, 3) #define XELPG_PAT_1_WT REG_FIELD_PREP(XELPG_L4_POLICY_MASK, 1) @@ -67,6 +75,64 @@ static const u32 xelpg_pat_table[] = { [4] = XELPG_PAT_0_WB | XELPG_3_COH_2W, }; +/* + * The Xe2 table is getting large/complicated so it's easier to review if + * provided in a form that exactly matches the bspec's formatting. The meaning + * of the fields here are: + * - no_promote: 0=promotable, 1=no promote + * - comp_en: 0=disable, 1=enable + * - l3clos: L3 class of service (0-3) + * - l3_policy: 0=WB, 1=XD ("WB - Transient Display"), 3=UC + * - l4_policy: 0=WB, 1=WT, 3=UC + * - coh_mode: 0=no snoop, 2=1-way coherent, 3=2-way coherent + * + * Reserved entries should be programmed with the maximum caching, minimum + * coherency (which matches an all-0's encoding), so we can just omit them + * in the table. + */ +#define XE2_PAT(no_promote, comp_en, l3clos, l3_policy, l4_policy, coh_mode) \ + (no_promote ? XE2_NO_PROMOTE : 0) | \ + (comp_en ? XE2_COMP_EN : 0) | \ + REG_FIELD_PREP(XE2_L3_CLOS, l3clos) | \ + REG_FIELD_PREP(XE2_L3_POLICY, l3_policy) | \ + REG_FIELD_PREP(XE2_L4_POLICY, l4_policy) | \ + REG_FIELD_PREP(XE2_COH_MODE, coh_mode) + +static const u32 xe2_pat_table[] = { + [ 0] = XE2_PAT( 0, 0, 0, 0, 3, 0 ), + [ 1] = XE2_PAT( 0, 0, 0, 0, 3, 2 ), + [ 2] = XE2_PAT( 0, 0, 0, 0, 3, 3 ), + [ 3] = XE2_PAT( 0, 0, 0, 3, 3, 0 ), + [ 4] = XE2_PAT( 0, 0, 0, 3, 0, 2 ), + [ 5] = XE2_PAT( 0, 0, 0, 3, 3, 2 ), + [ 6] = XE2_PAT( 1, 0, 0, 1, 3, 0 ), + [ 7] = XE2_PAT( 0, 0, 0, 3, 0, 3 ), + [ 8] = XE2_PAT( 0, 0, 0, 3, 0, 0 ), + [ 9] = XE2_PAT( 0, 1, 0, 0, 3, 0 ), + [10] = XE2_PAT( 0, 1, 0, 3, 0, 0 ), + [11] = XE2_PAT( 1, 1, 0, 1, 3, 0 ), + [12] = XE2_PAT( 0, 1, 0, 3, 3, 0 ), + [13] = XE2_PAT( 0, 0, 0, 0, 0, 0 ), + [14] = XE2_PAT( 0, 1, 0, 0, 0, 0 ), + [15] = XE2_PAT( 1, 1, 0, 1, 1, 0 ), + /* 16..19 are reserved; leave set to all 0's */ + [20] = XE2_PAT( 0, 0, 1, 0, 3, 0 ), + [21] = XE2_PAT( 0, 1, 1, 0, 3, 0 ), + [22] = XE2_PAT( 0, 0, 1, 0, 3, 2 ), + [23] = XE2_PAT( 0, 0, 1, 0, 3, 3 ), + [24] = XE2_PAT( 0, 0, 2, 0, 3, 0 ), + [25] = XE2_PAT( 0, 1, 2, 0, 3, 0 ), + [26] = XE2_PAT( 0, 0, 2, 0, 3, 2 ), + [27] = XE2_PAT( 0, 0, 2, 0, 3, 3 ), + [28] = XE2_PAT( 0, 0, 3, 0, 3, 0 ), + [29] = XE2_PAT( 0, 1, 3, 0, 3, 0 ), + [30] = XE2_PAT( 0, 0, 3, 0, 3, 2 ), + [31] = XE2_PAT( 0, 0, 3, 0, 3, 3 ), +}; + +/* Special PAT values programmed outside the main table */ +#define XE2_PAT_ATS XE2_PAT( 0, 0, 0, 0, 3, 3 ) + static void program_pat(struct xe_gt *gt, const u32 table[], int n_entries) { for (int i = 0; i < n_entries; i++) { @@ -102,9 +168,33 @@ static const struct xe_pat_ops xelpg_pat_ops = { .program_media = program_pat_mcr, }; +static void xe2lpg_program_pat(struct xe_gt *gt, const u32 table[], int n_entries) +{ + program_pat_mcr(gt, table, n_entries); + xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_ATS), XE2_PAT_ATS); +} + +static void xe2lpm_program_pat(struct xe_gt *gt, const u32 table[], int n_entries) +{ + program_pat(gt, table, n_entries); + xe_mmio_write32(gt, XE_REG(_PAT_ATS), XE2_PAT_ATS); +} + +static const struct xe_pat_ops xe2_pat_ops = { + .program_graphics = xe2lpg_program_pat, + .program_media = xe2lpm_program_pat, +}; + void xe_pat_init_early(struct xe_device *xe) { - if (xe->info.platform == XE_METEORLAKE) { + if (GRAPHICS_VER(xe) == 20) { + xe->pat.ops = &xe2_pat_ops; + xe->pat.table = xe2_pat_table; + xe->pat.n_entries = ARRAY_SIZE(xe2_pat_table); + xe->pat.idx[XE_CACHE_NONE] = 3; + xe->pat.idx[XE_CACHE_WT] = 15; + xe->pat.idx[XE_CACHE_WB] = 2; + } else if (xe->info.platform == XE_METEORLAKE) { xe->pat.ops = &xelpg_pat_ops; xe->pat.table = xelpg_pat_table; xe->pat.n_entries = ARRAY_SIZE(xelpg_pat_table); -- cgit v1.2.3 From 5803bdc8ad6f0320b3147de7e565c24b3afe31fb Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 6 Oct 2023 11:23:23 -0700 Subject: drm/xe/xe2: Add one more bit to encode PAT to ppgtt entries Xe2 adds one more bit to cover all the possible 32 entries. Although those entries are not used by internal kernel code paths, it's expected that userspace will make use of it. Bspec: 59510, 67095 Reviewed-by: Pallavi Mishra Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20231006182325.3617685-2-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.h | 1 + drivers/gpu/drm/xe/xe_vm.c | 3 +++ 2 files changed, 4 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 5666fd6d7f11..ba6ffd359ff7 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -49,6 +49,7 @@ #define XE_BO_INTERNAL_64K BIT(31) #define XELPG_PPGTT_PTE_PAT3 BIT_ULL(62) +#define XE2_PPGTT_PTE_PAT4 BIT_ULL(61) #define XE_PPGTT_PTE_PAT2 BIT_ULL(7) #define XE_PPGTT_PTE_PAT1 BIT_ULL(4) #define XE_PPGTT_PTE_PAT0 BIT_ULL(3) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 10ed72228946..665af2646243 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1242,6 +1242,9 @@ static u64 pte_encode_cache(struct xe_device *xe, enum xe_cache_level cache) if (pat_index & BIT(3)) pte |= XELPG_PPGTT_PTE_PAT3; + if (pat_index & (BIT(4))) + pte |= XE2_PPGTT_PTE_PAT4; + return pte; } -- cgit v1.2.3 From 34803f9a4b3ab20dbc09ad13ed5fa98263896b37 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 6 Oct 2023 11:23:24 -0700 Subject: drm/xe/pat: Add debugfs node to dump PAT This is useful to debug cache issues, to double check if the PAT indexes match what they were supposed to be set to from spec. v2: Add separate functions for XeHP, XeHPC and XeLPG so it correctly reads the index based on MCR/REG registers and also decodes the fields (Matt Roper) v3: Starting with XeHPC, do not translate values to human-readable formats as the main goal is to make it easy to compare the table with the spec. Also, share a single array for xelp/xehp str map (Matt Roper) Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20231006182325.3617685-3-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_debugfs.c | 12 +++ drivers/gpu/drm/xe/xe_pat.c | 192 ++++++++++++++++++++++++++++++++++++- drivers/gpu/drm/xe/xe_pat.h | 8 ++ 3 files changed, 211 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index ec1ae00f6bfc..cd6d28c7b923 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -16,6 +16,7 @@ #include "xe_gt_topology.h" #include "xe_hw_engine.h" #include "xe_macros.h" +#include "xe_pat.h" #include "xe_reg_sr.h" #include "xe_reg_whitelist.h" #include "xe_uc_debugfs.h" @@ -138,6 +139,16 @@ static int workarounds(struct seq_file *m, void *data) return 0; } +static int pat(struct seq_file *m, void *data) +{ + struct xe_gt *gt = node_to_gt(m->private); + struct drm_printer p = drm_seq_file_printer(m); + + xe_pat_dump(gt, &p); + + return 0; +} + static const struct drm_info_list debugfs_list[] = { {"hw_engines", hw_engines, 0}, {"force_reset", force_reset, 0}, @@ -147,6 +158,7 @@ static const struct drm_info_list debugfs_list[] = { {"ggtt", ggtt, 0}, {"register-save-restore", register_save_restore, 0}, {"workarounds", workarounds, 0}, + {"pat", pat, 0}, }; void xe_gt_debugfs_register(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index 296763594370..31565ccbb0ec 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -6,6 +6,8 @@ #include "xe_pat.h" #include "regs/xe_reg_defs.h" +#include "xe_assert.h" +#include "xe_device.h" #include "xe_gt.h" #include "xe_gt_mcr.h" #include "xe_mmio.h" @@ -14,6 +16,7 @@ #define _PAT_INDEX(index) _PICK_EVEN_2RANGES(index, 8, \ 0x4800, 0x4804, \ 0x4848, 0x484c) +#define _PAT_PTA 0x4820 #define XE2_NO_PROMOTE REG_BIT(10) #define XE2_COMP_EN REG_BIT(9) @@ -40,9 +43,12 @@ #define XELP_PAT_WC REG_FIELD_PREP(XELP_MEM_TYPE_MASK, 1) #define XELP_PAT_UC REG_FIELD_PREP(XELP_MEM_TYPE_MASK, 0) +static const char *XELP_MEM_TYPE_STR_MAP[] = { "UC", "WC", "WT", "WB" }; + struct xe_pat_ops { void (*program_graphics)(struct xe_gt *gt, const u32 table[], int n_entries); void (*program_media)(struct xe_gt *gt, const u32 table[], int n_entries); + void (*dump)(struct xe_gt *gt, struct drm_printer *p); }; static const u32 xelp_pat_table[] = { @@ -151,14 +157,132 @@ static void program_pat_mcr(struct xe_gt *gt, const u32 table[], int n_entries) } } +static void xelp_dump(struct xe_gt *gt, struct drm_printer *p) +{ + struct xe_device *xe = gt_to_xe(gt); + int i, err; + + xe_device_mem_access_get(xe); + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (err) + goto err_fw; + + drm_printf(p, "PAT table:\n"); + + for (i = 0; i < xe->pat.n_entries; i++) { + u32 pat = xe_mmio_read32(gt, XE_REG(_PAT_INDEX(i))); + u8 mem_type = REG_FIELD_GET(XELP_MEM_TYPE_MASK, pat); + + drm_printf(p, "PAT[%2d] = %s (%#8x)\n", i, + XELP_MEM_TYPE_STR_MAP[mem_type], pat); + } + + err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); +err_fw: + xe_assert(xe, !err); + xe_device_mem_access_put(xe); +} + static const struct xe_pat_ops xelp_pat_ops = { .program_graphics = program_pat, + .dump = xelp_dump, }; +static void xehp_dump(struct xe_gt *gt, struct drm_printer *p) +{ + struct xe_device *xe = gt_to_xe(gt); + int i, err; + + xe_device_mem_access_get(xe); + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (err) + goto err_fw; + + drm_printf(p, "PAT table:\n"); + + for (i = 0; i < xe->pat.n_entries; i++) { + u32 pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i))); + u8 mem_type; + + mem_type = REG_FIELD_GET(XELP_MEM_TYPE_MASK, pat); + + drm_printf(p, "PAT[%2d] = %s (%#8x)\n", i, + XELP_MEM_TYPE_STR_MAP[mem_type], pat); + } + + err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); +err_fw: + xe_assert(xe, !err); + xe_device_mem_access_put(xe); +} + static const struct xe_pat_ops xehp_pat_ops = { .program_graphics = program_pat_mcr, + .dump = xehp_dump, +}; + +static void xehpc_dump(struct xe_gt *gt, struct drm_printer *p) +{ + struct xe_device *xe = gt_to_xe(gt); + int i, err; + + xe_device_mem_access_get(xe); + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (err) + goto err_fw; + + drm_printf(p, "PAT table:\n"); + + for (i = 0; i < xe->pat.n_entries; i++) { + u32 pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i))); + + drm_printf(p, "PAT[%2d] = [ %u, %u ] (%#8x)\n", i, + REG_FIELD_GET(XELP_MEM_TYPE_MASK, pat), + REG_FIELD_GET(XEHPC_CLOS_LEVEL_MASK, pat), pat); + } + + err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); +err_fw: + xe_assert(xe, !err); + xe_device_mem_access_put(xe); +} + +static const struct xe_pat_ops xehpc_pat_ops = { + .program_graphics = program_pat_mcr, + .dump = xehpc_dump, }; +static void xelpg_dump(struct xe_gt *gt, struct drm_printer *p) +{ + struct xe_device *xe = gt_to_xe(gt); + int i, err; + + xe_device_mem_access_get(xe); + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (err) + goto err_fw; + + drm_printf(p, "PAT table:\n"); + + for (i = 0; i < xe->pat.n_entries; i++) { + u32 pat; + + if (xe_gt_is_media_type(gt)) + pat = xe_mmio_read32(gt, XE_REG(_PAT_INDEX(i))); + else + pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i))); + + drm_printf(p, "PAT[%2d] = [ %u, %u ] (%#8x)\n", i, + REG_FIELD_GET(XELPG_L4_POLICY_MASK, pat), + REG_FIELD_GET(XELPG_INDEX_COH_MODE_MASK, pat), pat); + } + + err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); +err_fw: + xe_assert(xe, !err); + xe_device_mem_access_put(xe); +} + /* * SAMedia register offsets are adjusted by the write methods and they target * registers that are not MCR, while for normal GT they are MCR @@ -166,6 +290,7 @@ static const struct xe_pat_ops xehp_pat_ops = { static const struct xe_pat_ops xelpg_pat_ops = { .program_graphics = program_pat, .program_media = program_pat_mcr, + .dump = xelpg_dump, }; static void xe2lpg_program_pat(struct xe_gt *gt, const u32 table[], int n_entries) @@ -180,9 +305,64 @@ static void xe2lpm_program_pat(struct xe_gt *gt, const u32 table[], int n_entrie xe_mmio_write32(gt, XE_REG(_PAT_ATS), XE2_PAT_ATS); } +static void xe2_dump(struct xe_gt *gt, struct drm_printer *p) +{ + struct xe_device *xe = gt_to_xe(gt); + int i, err; + u32 pat; + + xe_device_mem_access_get(xe); + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (err) + goto err_fw; + + drm_printf(p, "PAT table:\n"); + + for (i = 0; i < xe->pat.n_entries; i++) { + if (xe_gt_is_media_type(gt)) + pat = xe_mmio_read32(gt, XE_REG(_PAT_INDEX(i))); + else + pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i))); + + drm_printf(p, "PAT[%2d] = [ %u, %u, %u, %u, %u, %u ] (%#8x)\n", i, + !!(pat & XE2_NO_PROMOTE), + !!(pat & XE2_COMP_EN), + REG_FIELD_GET(XE2_L3_CLOS, pat), + REG_FIELD_GET(XE2_L3_POLICY, pat), + REG_FIELD_GET(XE2_L4_POLICY, pat), + REG_FIELD_GET(XE2_COH_MODE, pat), + pat); + } + + /* + * Also print PTA_MODE, which describes how the hardware accesses + * PPGTT entries. + */ + if (xe_gt_is_media_type(gt)) + pat = xe_mmio_read32(gt, XE_REG(_PAT_PTA)); + else + pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_PTA)); + + drm_printf(p, "Page Table Access:\n"); + drm_printf(p, "PTA_MODE= [ %u, %u, %u, %u, %u, %u ] (%#8x)\n", + !!(pat & XE2_NO_PROMOTE), + !!(pat & XE2_COMP_EN), + REG_FIELD_GET(XE2_L3_CLOS, pat), + REG_FIELD_GET(XE2_L3_POLICY, pat), + REG_FIELD_GET(XE2_L4_POLICY, pat), + REG_FIELD_GET(XE2_COH_MODE, pat), + pat); + + err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); +err_fw: + xe_assert(xe, !err); + xe_device_mem_access_put(xe); +} + static const struct xe_pat_ops xe2_pat_ops = { .program_graphics = xe2lpg_program_pat, .program_media = xe2lpm_program_pat, + .dump = xe2_dump, }; void xe_pat_init_early(struct xe_device *xe) @@ -202,7 +382,7 @@ void xe_pat_init_early(struct xe_device *xe) xe->pat.idx[XE_CACHE_WT] = 1; xe->pat.idx[XE_CACHE_WB] = 3; } else if (xe->info.platform == XE_PVC) { - xe->pat.ops = &xehp_pat_ops; + xe->pat.ops = &xehpc_pat_ops; xe->pat.table = xehpc_pat_table; xe->pat.n_entries = ARRAY_SIZE(xehpc_pat_table); xe->pat.idx[XE_CACHE_NONE] = 0; @@ -252,3 +432,13 @@ void xe_pat_init(struct xe_gt *gt) else xe->pat.ops->program_graphics(gt, xe->pat.table, xe->pat.n_entries); } + +void xe_pat_dump(struct xe_gt *gt, struct drm_printer *p) +{ + struct xe_device *xe = gt_to_xe(gt); + + if (!xe->pat.ops->dump) + return; + + xe->pat.ops->dump(gt, p); +} diff --git a/drivers/gpu/drm/xe/xe_pat.h b/drivers/gpu/drm/xe/xe_pat.h index 168e80e63809..09c491ab9f15 100644 --- a/drivers/gpu/drm/xe/xe_pat.h +++ b/drivers/gpu/drm/xe/xe_pat.h @@ -6,6 +6,7 @@ #ifndef _XE_PAT_H_ #define _XE_PAT_H_ +struct drm_printer; struct xe_gt; struct xe_device; @@ -21,4 +22,11 @@ void xe_pat_init_early(struct xe_device *xe); */ void xe_pat_init(struct xe_gt *gt); +/** + * xe_pat_dump - Dump PAT table + * @gt: GT structure + * @p: Printer to dump info to + */ +void xe_pat_dump(struct xe_gt *gt, struct drm_printer *p); + #endif -- cgit v1.2.3 From d2300987cf5a483acde519d671421b646f8d5390 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 6 Oct 2023 11:23:25 -0700 Subject: drm/xe/gt: Dump PAT table when failing to initialize When failing on early initialization, one cause may be that the PAT configuration is not correct. Dump it for ease of debugging. Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20231006182325.3617685-4-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 68cd9a7ee087..c63e2e4750b1 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -307,6 +307,17 @@ int xe_gt_init_early(struct xe_gt *gt) return 0; } +static void dump_pat_on_error(struct xe_gt *gt) +{ + struct drm_printer p; + char prefix[32]; + + snprintf(prefix, sizeof(prefix), "[GT%u Error]", gt->info.id); + p = drm_debug_printer(prefix); + + xe_pat_dump(gt, &p); +} + static int gt_fw_domain_init(struct xe_gt *gt) { int err, i; @@ -360,6 +371,7 @@ static int gt_fw_domain_init(struct xe_gt *gt) return 0; err_force_wake: + dump_pat_on_error(gt); xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); err_hw_fence_irq: for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) -- cgit v1.2.3 From 1db6f9d4134ec242d294061cdde475d824e1e9ba Mon Sep 17 00:00:00 2001 From: Gustavo Sousa Date: Wed, 4 Oct 2023 10:08:24 -0300 Subject: drm/xe/rtp: Fix doc for XE_RTP_ACTIONS Replace the paragraph that was meant for XE_RTP_RULES with one proper for XE_RTP_ACTIONS. Signed-off-by: Gustavo Sousa Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20231004130824.13909-1-gustavo.sousa@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_rtp.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index 04ccb26452ad..c56fedd126e6 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -350,9 +350,8 @@ struct xe_reg_sr; * XE_RTP_ACTIONS - Helper to set multiple actions to a struct xe_rtp_entry_sr * @...: Actions to be taken * - * At least one rule is needed and up to 4 are supported. Multiple rules are - * AND'ed together, i.e. all the rules must evaluate to true for the entry to - * be processed. See XE_RTP_MATCH_* for the possible match rules. Example: + * At least one action is needed and up to 4 are supported. See XE_RTP_ACTION_* + * for the possible actions. Example: * * .. code-block:: c * -- cgit v1.2.3 From 30e3b2cfb576f6ddf098f6de2a264b1ed75caa4c Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Thu, 5 Oct 2023 13:54:47 -0700 Subject: drm/xe/rplu: s/ADLP/ALDERLAKE_P i915 now uses full names for platforms. So we now have ALDERLAKE instead of ADL. Extend this to xe driver as well. This will make it easier for macro magic usages. v2: Do not make changes to compat-i915-headers/i915_drv.h file with the rest of the changes (Jani) Cc: Jani Nikula Cc: Dnyaneshwar Bhadane Signed-off-by: Anusha Srivatsa Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20231005205450.3177354-3-anusha.srivatsa@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pci.c | 2 +- drivers/gpu/drm/xe/xe_platform_types.h | 2 +- drivers/gpu/drm/xe/xe_step.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index fd8d7eddd6f6..2652e7426258 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -230,7 +230,7 @@ static const struct xe_device_desc adl_p_desc = { .has_llc = 1, .require_force_probe = true, .subplatforms = (const struct xe_subplatform_desc[]) { - { XE_SUBPLATFORM_ADLP_RPLU, "RPLU", adlp_rplu_ids }, + { XE_SUBPLATFORM_ALDERLAKE_P_RPLU, "RPLU", adlp_rplu_ids }, {}, }, }; diff --git a/drivers/gpu/drm/xe/xe_platform_types.h b/drivers/gpu/drm/xe/xe_platform_types.h index e378a64a0f86..b6fe4342f9f6 100644 --- a/drivers/gpu/drm/xe/xe_platform_types.h +++ b/drivers/gpu/drm/xe/xe_platform_types.h @@ -27,7 +27,7 @@ enum xe_platform { enum xe_subplatform { XE_SUBPLATFORM_UNINITIALIZED = 0, XE_SUBPLATFORM_NONE, - XE_SUBPLATFORM_ADLP_RPLU, + XE_SUBPLATFORM_ALDERLAKE_P_RPLU, XE_SUBPLATFORM_DG2_G10, XE_SUBPLATFORM_DG2_G11, XE_SUBPLATFORM_DG2_G12, diff --git a/drivers/gpu/drm/xe/xe_step.c b/drivers/gpu/drm/xe/xe_step.c index 371cac951e0f..903c65405d3a 100644 --- a/drivers/gpu/drm/xe/xe_step.c +++ b/drivers/gpu/drm/xe/xe_step.c @@ -143,7 +143,7 @@ struct xe_step_info xe_step_pre_gmdid_get(struct xe_device *xe) } else if (xe->info.platform == XE_ALDERLAKE_N) { revids = adln_revids; size = ARRAY_SIZE(adln_revids); - } else if (xe->info.subplatform == XE_SUBPLATFORM_ADLP_RPLU) { + } else if (xe->info.subplatform == XE_SUBPLATFORM_ALDERLAKE_P_RPLU) { revids = adlp_rpl_revids; size = ARRAY_SIZE(adlp_rpl_revids); } else if (xe->info.platform == XE_ALDERLAKE_P) { -- cgit v1.2.3 From fcb33ca6d6296d2bd45550e26271797801aeb640 Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Thu, 5 Oct 2023 13:54:48 -0700 Subject: drm/xe/rpls: Add RPLS Support Add RPLS support that was missing apart from the PCI IDs. v2: Also add the support in xe_wa_test kunit v3: rebased. Cc: Dnyaneshwar Bhadane Signed-off-by: Anusha Srivatsa Reviewed-by: Matt Roper (v1) Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20231005205450.3177354-4-anusha.srivatsa@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_wa_test.c | 1 + drivers/gpu/drm/xe/xe_pci.c | 7 +++++++ drivers/gpu/drm/xe/xe_platform_types.h | 1 + 3 files changed, 9 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_wa_test.c b/drivers/gpu/drm/xe/tests/xe_wa_test.c index 16f7f157c875..69c9ea1fa82b 100644 --- a/drivers/gpu/drm/xe/tests/xe_wa_test.c +++ b/drivers/gpu/drm/xe/tests/xe_wa_test.c @@ -49,6 +49,7 @@ static const struct platform_test_case cases[] = { PLATFORM_CASE(ALDERLAKE_P, A0), PLATFORM_CASE(ALDERLAKE_P, B0), PLATFORM_CASE(ALDERLAKE_P, C0), + SUBPLATFORM_CASE(ALDERLAKE_S, RPLS, D0), SUBPLATFORM_CASE(DG2, G10, A0), SUBPLATFORM_CASE(DG2, G10, A1), SUBPLATFORM_CASE(DG2, G10, B0), diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 2652e7426258..0efe01885cf8 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -213,12 +213,18 @@ static const struct xe_device_desc rkl_desc = { .require_force_probe = true, }; +static const u16 adls_rpls_ids[] = { XE_RPLS_IDS(NOP), 0 }; + static const struct xe_device_desc adl_s_desc = { .graphics = &graphics_xelp, .media = &media_xem, PLATFORM(XE_ALDERLAKE_S), .has_llc = 1, .require_force_probe = true, + .subplatforms = (const struct xe_subplatform_desc[]) { + { XE_SUBPLATFORM_ALDERLAKE_S_RPLS, "RPLS", adls_rpls_ids }, + {}, + }, }; static const u16 adlp_rplu_ids[] = { XE_RPLU_IDS(NOP), 0 }; @@ -335,6 +341,7 @@ static const struct pci_device_id pciidlist[] = { XE_ADLP_IDS(INTEL_VGA_DEVICE, &adl_p_desc), XE_ADLN_IDS(INTEL_VGA_DEVICE, &adl_n_desc), XE_RPLP_IDS(INTEL_VGA_DEVICE, &adl_p_desc), + XE_RPLS_IDS(INTEL_VGA_DEVICE, &adl_s_desc), XE_DG1_IDS(INTEL_VGA_DEVICE, &dg1_desc), XE_ATS_M_IDS(INTEL_VGA_DEVICE, &ats_m_desc), XE_DG2_IDS(INTEL_VGA_DEVICE, &dg2_desc), diff --git a/drivers/gpu/drm/xe/xe_platform_types.h b/drivers/gpu/drm/xe/xe_platform_types.h index b6fe4342f9f6..553f53dbd093 100644 --- a/drivers/gpu/drm/xe/xe_platform_types.h +++ b/drivers/gpu/drm/xe/xe_platform_types.h @@ -28,6 +28,7 @@ enum xe_subplatform { XE_SUBPLATFORM_UNINITIALIZED = 0, XE_SUBPLATFORM_NONE, XE_SUBPLATFORM_ALDERLAKE_P_RPLU, + XE_SUBPLATFORM_ALDERLAKE_S_RPLS, XE_SUBPLATFORM_DG2_G10, XE_SUBPLATFORM_DG2_G11, XE_SUBPLATFORM_DG2_G12, -- cgit v1.2.3 From 93b1b5f59d34d86f3debc35693c47e99935c4429 Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Thu, 5 Oct 2023 13:54:49 -0700 Subject: drm/xe/rpls: Add Stepping info for RPLS Add stepping-substepping info. Though it looks weird, the revision ID for the newer stepping is indeed backwards and is in accordance to the spec. v2: s/RPLS/RAPTORLAKE_S (Anusha) v3: rebase (Anusha) Signed-off-by: Anusha Srivatsa Reviewed-by: Matt Roper (v1) Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20231005205450.3177354-5-anusha.srivatsa@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_step.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_step.c b/drivers/gpu/drm/xe/xe_step.c index 903c65405d3a..eaf1b718f26c 100644 --- a/drivers/gpu/drm/xe/xe_step.c +++ b/drivers/gpu/drm/xe/xe_step.c @@ -60,6 +60,11 @@ static const struct xe_step_info adls_revids[] = { [0xC] = { COMMON_GT_MEDIA_STEP(D0), .display = STEP_C0 }, }; +static const struct xe_step_info adls_rpls_revids[] = { + [0x4] = { COMMON_GT_MEDIA_STEP(D0), .display = STEP_D0 }, + [0xC] = { COMMON_GT_MEDIA_STEP(D0), .display = STEP_C0 }, +}; + static const struct xe_step_info adlp_revids[] = { [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A0 }, [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_B0 }, @@ -143,6 +148,9 @@ struct xe_step_info xe_step_pre_gmdid_get(struct xe_device *xe) } else if (xe->info.platform == XE_ALDERLAKE_N) { revids = adln_revids; size = ARRAY_SIZE(adln_revids); + } else if (xe->info.subplatform == XE_SUBPLATFORM_ALDERLAKE_S_RPLS) { + revids = adls_rpls_revids; + size = ARRAY_SIZE(adls_rpls_revids); } else if (xe->info.subplatform == XE_SUBPLATFORM_ALDERLAKE_P_RPLU) { revids = adlp_rpl_revids; size = ARRAY_SIZE(adlp_rpl_revids); -- cgit v1.2.3 From e3fee3aa7a8911b60776127cb2e1c25ef8584f42 Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Thu, 5 Oct 2023 13:54:50 -0700 Subject: drm/xe: Add missing ADL entries to xe_test_wa With all ADl platforms and subplatforms added, also add support to xe_wa_test kunit tests for checking their WAs. Cc: Lucas De Marchi Signed-off-by: Anusha Srivatsa Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20231005205450.3177354-6-anusha.srivatsa@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_wa_test.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_wa_test.c b/drivers/gpu/drm/xe/tests/xe_wa_test.c index 69c9ea1fa82b..6e1127b276ea 100644 --- a/drivers/gpu/drm/xe/tests/xe_wa_test.c +++ b/drivers/gpu/drm/xe/tests/xe_wa_test.c @@ -50,6 +50,7 @@ static const struct platform_test_case cases[] = { PLATFORM_CASE(ALDERLAKE_P, B0), PLATFORM_CASE(ALDERLAKE_P, C0), SUBPLATFORM_CASE(ALDERLAKE_S, RPLS, D0), + SUBPLATFORM_CASE(ALDERLAKE_P, RPLU, E0), SUBPLATFORM_CASE(DG2, G10, A0), SUBPLATFORM_CASE(DG2, G10, A1), SUBPLATFORM_CASE(DG2, G10, B0), -- cgit v1.2.3 From 406be3cc186eec67367b87a2af91cb598ff8e239 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 6 Oct 2023 09:46:15 +0100 Subject: drm/xe/pat: trim the xelp PAT table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don't seem to use the 4-7 pat indexes, even though they are defined by the HW. In a future patch userspace will be able to directly set the pat_index as part of vm_bind and we don't want to allow setting 4-7. Simplest is to just ignore them here. Suggested-by: Matt Roper Signed-off-by: Matthew Auld Cc: Pallavi Mishra Cc: Lucas De Marchi Reviewed-by: Matt Roper Reviewed-by: Lucas De Marchi Reviewed-by: José Roberto de Souza Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pat.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index 31565ccbb0ec..7c1078707aa0 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -56,10 +56,6 @@ static const u32 xelp_pat_table[] = { [1] = XELP_PAT_WC, [2] = XELP_PAT_WT, [3] = XELP_PAT_UC, - [4] = XELP_PAT_WB, - [5] = XELP_PAT_WB, - [6] = XELP_PAT_WB, - [7] = XELP_PAT_WB, }; static const u32 xehpc_pat_table[] = { -- cgit v1.2.3 From e814389ff180514001df424f48645cf30f4a2a1e Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 6 Oct 2023 09:46:16 +0100 Subject: drm/xe: directly use pat_index for pte_encode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In a future patch userspace will be able to directly set the pat_index as part of vm_bind. To support this we need to get away from using xe_cache_level in the low level routines and rather just use the pat_index directly. v2: Rebase v3: Some missed conversions, also prefer tile_to_xe() (Niranjana) v4: remove leftover const (Lucas) Signed-off-by: Matthew Auld Cc: Niranjana Vishwanathapura Cc: Pallavi Mishra Cc: Lucas De Marchi Cc: Matt Roper Reviewed-by: José Roberto de Souza Reviewed-by: Matt Roper Reviewed-by: Lucas De Marchi Reviewed-by: Pallavi Mishra Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_migrate.c | 2 +- drivers/gpu/drm/xe/xe_ggtt.c | 16 +++++++++------- drivers/gpu/drm/xe/xe_ggtt_types.h | 3 +-- drivers/gpu/drm/xe/xe_migrate.c | 19 +++++++++++-------- drivers/gpu/drm/xe/xe_pt.c | 11 ++++++----- drivers/gpu/drm/xe/xe_pt_types.h | 8 ++++---- drivers/gpu/drm/xe/xe_vm.c | 24 +++++++++++------------- 7 files changed, 43 insertions(+), 40 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index 0db4b651ff1a..c984307560ee 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -322,7 +322,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) /* First part of the test, are we updating our pagetable bo with a new entry? */ xe_map_wr(xe, &bo->vmap, XE_PAGE_SIZE * (NUM_KERNEL_PDE - 1), u64, 0xdeaddeadbeefbeef); - expected = m->q->vm->pt_ops->pte_encode_bo(pt, 0, XE_CACHE_WB, 0); + expected = m->q->vm->pt_ops->pte_encode_bo(pt, 0, xe->pat.idx[XE_CACHE_WB], 0); if (m->q->vm->flags & XE_VM_FLAG_64K) expected |= XE_PTE_PS64; if (xe_bo_is_vram(pt)) diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 06732461246d..1368616f20fa 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -27,7 +27,7 @@ #define GUC_GGTT_TOP 0xFEE00000 static u64 xelp_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, - enum xe_cache_level cache) + u16 pat_index) { u64 pte; @@ -41,13 +41,12 @@ static u64 xelp_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, } static u64 xelpg_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, - enum xe_cache_level cache) + u16 pat_index) { struct xe_device *xe = xe_bo_device(bo); - u32 pat_index = xe->pat.idx[cache]; u64 pte; - pte = xelp_ggtt_pte_encode_bo(bo, bo_offset, cache); + pte = xelp_ggtt_pte_encode_bo(bo, bo_offset, pat_index); xe_assert(xe, pat_index <= 3); @@ -79,6 +78,7 @@ void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte) static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size) { + u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[XE_CACHE_WB]; u64 end = start + size - 1; u64 scratch_pte; @@ -86,7 +86,7 @@ static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size) if (ggtt->scratch) scratch_pte = ggtt->pt_ops->pte_encode_bo(ggtt->scratch, 0, - XE_CACHE_WB); + pat_index); else scratch_pte = 0; @@ -285,9 +285,10 @@ void xe_ggtt_invalidate(struct xe_ggtt *ggtt) void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix) { + u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[XE_CACHE_WB]; u64 addr, scratch_pte; - scratch_pte = ggtt->pt_ops->pte_encode_bo(ggtt->scratch, 0, XE_CACHE_WB); + scratch_pte = ggtt->pt_ops->pte_encode_bo(ggtt->scratch, 0, pat_index); printk("%sGlobal GTT:", prefix); for (addr = 0; addr < ggtt->size; addr += XE_PAGE_SIZE) { @@ -324,11 +325,12 @@ int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) { + u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[XE_CACHE_WB]; u64 start = bo->ggtt_node.start; u64 offset, pte; for (offset = 0; offset < bo->size; offset += XE_PAGE_SIZE) { - pte = ggtt->pt_ops->pte_encode_bo(bo, offset, XE_CACHE_WB); + pte = ggtt->pt_ops->pte_encode_bo(bo, offset, pat_index); xe_ggtt_set_pte(ggtt, start + offset, pte); } diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h index 486016ea5b67..d8c584d9a8c3 100644 --- a/drivers/gpu/drm/xe/xe_ggtt_types.h +++ b/drivers/gpu/drm/xe/xe_ggtt_types.h @@ -14,8 +14,7 @@ struct xe_bo; struct xe_gt; struct xe_ggtt_pt_ops { - u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset, - enum xe_cache_level cache); + u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset, u16 pat_index); }; struct xe_ggtt { diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 4dc52ac26d52..134b078b6fee 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -163,6 +163,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, struct xe_vm *vm) { struct xe_device *xe = tile_to_xe(tile); + u16 pat_index = xe->pat.idx[XE_CACHE_WB]; u8 id = tile->id; u32 num_entries = NUM_PT_SLOTS, num_level = vm->pt_root[id]->level; u32 map_ofs, level, i; @@ -194,7 +195,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, return ret; } - entry = vm->pt_ops->pde_encode_bo(bo, bo->size - XE_PAGE_SIZE, XE_CACHE_WB); + entry = vm->pt_ops->pde_encode_bo(bo, bo->size - XE_PAGE_SIZE, pat_index); xe_pt_write(xe, &vm->pt_root[id]->bo->vmap, 0, entry); map_ofs = (num_entries - num_level) * XE_PAGE_SIZE; @@ -202,7 +203,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, /* Map the entire BO in our level 0 pt */ for (i = 0, level = 0; i < num_entries; level++) { entry = vm->pt_ops->pte_encode_bo(bo, i * XE_PAGE_SIZE, - XE_CACHE_WB, 0); + pat_index, 0); xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64, entry); @@ -221,7 +222,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, i += vm->flags & XE_VM_FLAG_64K ? XE_64K_PAGE_SIZE : XE_PAGE_SIZE) { entry = vm->pt_ops->pte_encode_bo(batch, i, - XE_CACHE_WB, 0); + pat_index, 0); xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64, entry); @@ -246,7 +247,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, flags = XE_PDE_64K; entry = vm->pt_ops->pde_encode_bo(bo, map_ofs + (level - 1) * - XE_PAGE_SIZE, XE_CACHE_WB); + XE_PAGE_SIZE, pat_index); xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE * level, u64, entry | flags); } @@ -254,7 +255,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, /* Write PDE's that point to our BO. */ for (i = 0; i < num_entries - num_level; i++) { entry = vm->pt_ops->pde_encode_bo(bo, i * XE_PAGE_SIZE, - XE_CACHE_WB); + pat_index); xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE + (i + 1) * 8, u64, entry); @@ -266,7 +267,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, level = 2; ofs = map_ofs + XE_PAGE_SIZE * level + 256 * 8; - flags = vm->pt_ops->pte_encode_addr(xe, 0, XE_CACHE_WB, level, + flags = vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, true, 0); /* @@ -464,6 +465,7 @@ static void emit_pte(struct xe_migrate *m, struct xe_res_cursor *cur, u32 size, struct xe_bo *bo) { + u16 pat_index = tile_to_xe(m->tile)->pat.idx[XE_CACHE_WB]; u32 ptes; u64 ofs = at_pt * XE_PAGE_SIZE; u64 cur_ofs; @@ -507,7 +509,7 @@ static void emit_pte(struct xe_migrate *m, } addr = m->q->vm->pt_ops->pte_encode_addr(m->tile->xe, - addr, XE_CACHE_WB, + addr, pat_index, 0, devmem, flags); bb->cs[bb->len++] = lower_32_bits(addr); bb->cs[bb->len++] = upper_32_bits(addr); @@ -1226,6 +1228,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m, bool first_munmap_rebind = vma && vma->gpuva.flags & XE_VMA_FIRST_REBIND; struct xe_exec_queue *q_override = !q ? m->q : q; + u16 pat_index = xe->pat.idx[XE_CACHE_WB]; /* Use the CPU if no in syncs and engine is idle */ if (no_in_syncs(syncs, num_syncs) && xe_exec_queue_is_idle(q_override)) { @@ -1297,7 +1300,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m, xe_tile_assert(tile, pt_bo->size == SZ_4K); - addr = vm->pt_ops->pte_encode_bo(pt_bo, 0, XE_CACHE_WB, 0); + addr = vm->pt_ops->pte_encode_bo(pt_bo, 0, pat_index, 0); bb->cs[bb->len++] = lower_32_bits(addr); bb->cs[bb->len++] = upper_32_bits(addr); } diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index a7249b2d807c..15f7c9d5b311 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -50,6 +50,7 @@ static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index) static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm, unsigned int level) { + u16 pat_index = tile_to_xe(tile)->pat.idx[XE_CACHE_WB]; u8 id = tile->id; if (!vm->scratch_bo[id]) @@ -57,9 +58,9 @@ static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm, if (level > 0) return vm->pt_ops->pde_encode_bo(vm->scratch_pt[id][level - 1]->bo, - 0, XE_CACHE_WB); + 0, pat_index); - return vm->pt_ops->pte_encode_bo(vm->scratch_bo[id], 0, XE_CACHE_WB, 0); + return vm->pt_ops->pte_encode_bo(vm->scratch_bo[id], 0, pat_index, 0); } /** @@ -510,6 +511,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, { struct xe_pt_stage_bind_walk *xe_walk = container_of(walk, typeof(*xe_walk), base); + u16 pat_index = tile_to_xe(xe_walk->tile)->pat.idx[xe_walk->cache]; struct xe_pt *xe_parent = container_of(parent, typeof(*xe_parent), base); struct xe_vm *vm = xe_walk->vm; struct xe_pt *xe_child; @@ -526,7 +528,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, pte = vm->pt_ops->pte_encode_vma(is_null ? 0 : xe_res_dma(curs) + xe_walk->dma_offset, - xe_walk->vma, xe_walk->cache, level); + xe_walk->vma, pat_index, level); pte |= xe_walk->default_pte; /* @@ -591,8 +593,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, xe_child->is_compact = true; } - pte = vm->pt_ops->pde_encode_bo(xe_child->bo, 0, - xe_walk->cache) | flags; + pte = vm->pt_ops->pde_encode_bo(xe_child->bo, 0, pat_index) | flags; ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, xe_child, pte); } diff --git a/drivers/gpu/drm/xe/xe_pt_types.h b/drivers/gpu/drm/xe/xe_pt_types.h index bd6645295fe6..82cbf1ef8e57 100644 --- a/drivers/gpu/drm/xe/xe_pt_types.h +++ b/drivers/gpu/drm/xe/xe_pt_types.h @@ -38,14 +38,14 @@ struct xe_pt { struct xe_pt_ops { u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset, - enum xe_cache_level cache, u32 pt_level); + u16 pat_index, u32 pt_level); u64 (*pte_encode_vma)(u64 pte, struct xe_vma *vma, - enum xe_cache_level cache, u32 pt_level); + u16 pat_index, u32 pt_level); u64 (*pte_encode_addr)(struct xe_device *xe, u64 addr, - enum xe_cache_level cache, + u16 pat_index, u32 pt_level, bool devmem, u64 flags); u64 (*pde_encode_bo)(struct xe_bo *bo, u64 bo_offset, - const enum xe_cache_level cache); + u16 pat_index); }; struct xe_pt_entry { diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 665af2646243..035f3232e3b9 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1211,9 +1211,8 @@ static struct drm_gpuvm_ops gpuvm_ops = { .vm_free = xe_vm_free, }; -static u64 pde_encode_cache(struct xe_device *xe, enum xe_cache_level cache) +static u64 pde_encode_pat_index(struct xe_device *xe, u16 pat_index) { - u32 pat_index = xe->pat.idx[cache]; u64 pte = 0; if (pat_index & BIT(0)) @@ -1225,9 +1224,8 @@ static u64 pde_encode_cache(struct xe_device *xe, enum xe_cache_level cache) return pte; } -static u64 pte_encode_cache(struct xe_device *xe, enum xe_cache_level cache) +static u64 pte_encode_pat_index(struct xe_device *xe, u16 pat_index) { - u32 pat_index = xe->pat.idx[cache]; u64 pte = 0; if (pat_index & BIT(0)) @@ -1261,27 +1259,27 @@ static u64 pte_encode_ps(u32 pt_level) } static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset, - const enum xe_cache_level cache) + const u16 pat_index) { struct xe_device *xe = xe_bo_device(bo); u64 pde; pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); pde |= XE_PAGE_PRESENT | XE_PAGE_RW; - pde |= pde_encode_cache(xe, cache); + pde |= pde_encode_pat_index(xe, pat_index); return pde; } static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, - enum xe_cache_level cache, u32 pt_level) + u16 pat_index, u32 pt_level) { struct xe_device *xe = xe_bo_device(bo); u64 pte; pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); pte |= XE_PAGE_PRESENT | XE_PAGE_RW; - pte |= pte_encode_cache(xe, cache); + pte |= pte_encode_pat_index(xe, pat_index); pte |= pte_encode_ps(pt_level); if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) @@ -1291,7 +1289,7 @@ static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, } static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma, - enum xe_cache_level cache, u32 pt_level) + u16 pat_index, u32 pt_level) { struct xe_device *xe = xe_vma_vm(vma)->xe; @@ -1300,7 +1298,7 @@ static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma, if (likely(!xe_vma_read_only(vma))) pte |= XE_PAGE_RW; - pte |= pte_encode_cache(xe, cache); + pte |= pte_encode_pat_index(xe, pat_index); pte |= pte_encode_ps(pt_level); if (unlikely(xe_vma_is_null(vma))) @@ -1310,7 +1308,7 @@ static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma, } static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr, - enum xe_cache_level cache, + u16 pat_index, u32 pt_level, bool devmem, u64 flags) { u64 pte; @@ -1320,7 +1318,7 @@ static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr, pte = addr; pte |= XE_PAGE_PRESENT | XE_PAGE_RW; - pte |= pte_encode_cache(xe, cache); + pte |= pte_encode_pat_index(xe, pat_index); pte |= pte_encode_ps(pt_level); if (devmem) @@ -1707,7 +1705,7 @@ struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id) u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile) { return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0, - XE_CACHE_WB); + tile_to_xe(tile)->pat.idx[XE_CACHE_WB]); } static struct dma_fence * -- cgit v1.2.3 From 399a13323f0d148bf00eff7e9156efe8a97063c0 Mon Sep 17 00:00:00 2001 From: Koby Elbaz Date: Thu, 5 Oct 2023 11:06:14 -0400 Subject: drm/xe: add 28-bit address support in struct xe_reg Xe driver currently supports 22-bit addresses for MMIO access. Future platforms will have additional MMIO extension with larger address spaces, and to access them, the driver will have to support wider address representation. Please note that while the XE_REG macro is used for MMIO access, XE_REG_EXT macro will be used for MMIO-extension access. Signed-off-by: Koby Elbaz Reviewed-by: Ofir Bitton Reviewed-by: Moti Haimovski Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_reg_defs.h | 16 +++++++++++++++- drivers/gpu/drm/xe/xe_device_types.h | 13 +++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_reg_defs.h b/drivers/gpu/drm/xe/regs/xe_reg_defs.h index 478787c75e29..5078a9e69859 100644 --- a/drivers/gpu/drm/xe/regs/xe_reg_defs.h +++ b/drivers/gpu/drm/xe/regs/xe_reg_defs.h @@ -19,7 +19,7 @@ struct xe_reg { union { struct { /** @addr: address */ - u32 addr:22; + u32 addr:28; /** * @masked: register is "masked", with upper 16bits used * to identify the bits that are updated on the lower @@ -35,6 +35,10 @@ struct xe_reg { * value can inspect it. */ u32 mcr:1; + /** + * @ext: access MMIO extension space for current register. + */ + u32 ext:1; }; /** @raw: Raw value with both address and options */ u32 raw; @@ -84,6 +88,16 @@ struct xe_reg_mcr { */ #define XE_REG(r_, ...) ((const struct xe_reg)XE_REG_INITIALIZER(r_, ##__VA_ARGS__)) +/** + * XE_REG_EXT - Create a struct xe_reg from extension offset and additional + * flags + * @r_: Register extension offset + * @...: Additional options like access mode. See struct xe_reg for available + * options. + */ +#define XE_REG_EXT(r_, ...) \ + ((const struct xe_reg)XE_REG_INITIALIZER(r_, ##__VA_ARGS__, .ext = 1)) + /** * XE_REG_MCR - Create a struct xe_reg_mcr from offset and additional flags * @r_: Register offset diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index dd52d112d58f..f7c7d44a6465 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -140,6 +140,19 @@ struct xe_tile { void *regs; } mmio; + /** + * @mmio_ext: MMIO-extension info for a tile. + * + * Each tile has its own additional 256MB (28-bit) MMIO-extension space. + */ + struct { + /** @size: size of tile's additional MMIO-extension space */ + size_t size; + + /** @regs: pointer to tile's additional MMIO-extension space */ + void *regs; + } mmio_ext; + /** @mem: memory management info for tile */ struct { /** -- cgit v1.2.3 From 6360ebd1a12384efa984b44b057b79edce6484df Mon Sep 17 00:00:00 2001 From: Koby Elbaz Date: Thu, 5 Oct 2023 11:06:15 -0400 Subject: drm/xe: add read/write support for MMIO extension space A distinction has to be made when addressing the MMIO space or the additional MMIO extension space. Signed-off-by: Koby Elbaz Reviewed-by: Ofir Bitton Reviewed-by: Moti Haimovski Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_mmio.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h index 9e0fd4a6fb29..ae09f777d711 100644 --- a/drivers/gpu/drm/xe/xe_mmio.h +++ b/drivers/gpu/drm/xe/xe_mmio.h @@ -29,7 +29,7 @@ static inline u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg) if (reg.addr < gt->mmio.adj_limit) reg.addr += gt->mmio.adj_offset; - return readb(tile->mmio.regs + reg.addr); + return readb((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr); } static inline u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg) @@ -39,7 +39,7 @@ static inline u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg) if (reg.addr < gt->mmio.adj_limit) reg.addr += gt->mmio.adj_offset; - return readw(tile->mmio.regs + reg.addr); + return readw((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr); } static inline void xe_mmio_write32(struct xe_gt *gt, @@ -50,7 +50,7 @@ static inline void xe_mmio_write32(struct xe_gt *gt, if (reg.addr < gt->mmio.adj_limit) reg.addr += gt->mmio.adj_offset; - writel(val, tile->mmio.regs + reg.addr); + writel(val, (reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr); } static inline u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg) @@ -60,7 +60,7 @@ static inline u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg) if (reg.addr < gt->mmio.adj_limit) reg.addr += gt->mmio.adj_offset; - return readl(tile->mmio.regs + reg.addr); + return readl((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr); } static inline u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr, -- cgit v1.2.3 From fdef72e02e20d7bc3c4b25607a2f8afa99d509eb Mon Sep 17 00:00:00 2001 From: Koby Elbaz Date: Thu, 5 Oct 2023 11:06:16 -0400 Subject: drm/xe: add a flag to bypass multi-tile config from MTCFG reg Skip reading this register as it is not relevant in the new devices. Signed-off-by: Koby Elbaz Reviewed-by: Ofir Bitton Reviewed-by: Moti Haimovski Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device_types.h | 2 ++ drivers/gpu/drm/xe/xe_mmio.c | 24 +++++++++++++----------- drivers/gpu/drm/xe/xe_pci.c | 2 ++ 3 files changed, 17 insertions(+), 11 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index f7c7d44a6465..a9bf9c784b7c 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -243,6 +243,8 @@ struct xe_device { u8 has_llc:1; /** @has_range_tlb_invalidation: Has range based TLB invalidations */ u8 has_range_tlb_invalidation:1; + /** @bypass_mtcfg: Bypass Multi-Tile configuration from MTCFG register */ + u8 bypass_mtcfg:1; } info; /** @irq: device interrupt state */ diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 3ccc0af4430b..054ad752303f 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -318,26 +318,28 @@ int xe_mmio_probe_vram(struct xe_device *xe) static void xe_mmio_probe_tiles(struct xe_device *xe) { + u8 adj_tile_count = xe->info.tile_count; struct xe_gt *gt = xe_root_mmio_gt(xe); u32 mtcfg; - u8 adj_tile_count; u8 id; if (xe->info.tile_count == 1) return; - mtcfg = xe_mmio_read64_2x32(gt, XEHP_MTCFG_ADDR); - adj_tile_count = xe->info.tile_count = - REG_FIELD_GET(TILE_COUNT, mtcfg) + 1; + if (!xe->info.bypass_mtcfg) { + mtcfg = xe_mmio_read64_2x32(gt, XEHP_MTCFG_ADDR); + adj_tile_count = xe->info.tile_count = + REG_FIELD_GET(TILE_COUNT, mtcfg) + 1; - /* - * FIXME: Needs some work for standalone media, but should be impossible - * with multi-tile for now. - */ - xe->info.gt_count = xe->info.tile_count; + /* + * FIXME: Needs some work for standalone media, but should be impossible + * with multi-tile for now. + */ + xe->info.gt_count = xe->info.tile_count; - drm_info(&xe->drm, "tile_count: %d, adj_tile_count %d\n", - xe->info.tile_count, adj_tile_count); + drm_info(&xe->drm, "tile_count: %d, adj_tile_count %d\n", + xe->info.tile_count, adj_tile_count); + } if (xe->info.tile_count > 1) { const int mmio_bar = 0; diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 0efe01885cf8..f8e813e17458 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -56,6 +56,7 @@ struct xe_device_desc { u8 require_force_probe:1; u8 is_dgfx:1; u8 has_llc:1; + u8 bypass_mtcfg:1; }; #define PLATFORM(x) \ @@ -553,6 +554,7 @@ static int xe_info_init(struct xe_device *xe, xe->info.graphics_name = graphics_desc->name; xe->info.media_name = media_desc ? media_desc->name : "none"; xe->info.has_llc = desc->has_llc; + xe->info.bypass_mtcfg = desc->bypass_mtcfg; xe->info.dma_mask_size = graphics_desc->dma_mask_size; xe->info.vram_flags = graphics_desc->vram_flags; -- cgit v1.2.3 From 866b2b1764341ada0611f54c6b19285c32d20efa Mon Sep 17 00:00:00 2001 From: Koby Elbaz Date: Thu, 5 Oct 2023 11:06:17 -0400 Subject: drm/xe: add MMIO extension support flags Besides the regular MMIO space that exists by default, MMIO extension support & MMIO extension tile size should both be defined per device, and updated from the device's descriptor. Signed-off-by: Koby Elbaz Reviewed-by: Ofir Bitton Reviewed-by: Moti Haimovski Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device_types.h | 4 ++++ drivers/gpu/drm/xe/xe_pci.c | 3 +++ drivers/gpu/drm/xe/xe_pci_types.h | 2 ++ 3 files changed, 9 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index a9bf9c784b7c..b64f810189ff 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -200,6 +200,8 @@ struct xe_device { const char *graphics_name; /** @media_name: media IP name */ const char *media_name; + /** @tile_mmio_ext_size: size of MMIO extension space, per-tile */ + u32 tile_mmio_ext_size; /** @graphics_verx100: graphics IP version */ u32 graphics_verx100; /** @media_verx100: media IP version */ @@ -245,6 +247,8 @@ struct xe_device { u8 has_range_tlb_invalidation:1; /** @bypass_mtcfg: Bypass Multi-Tile configuration from MTCFG register */ u8 bypass_mtcfg:1; + /** @supports_mmio_ext: supports MMIO extension/s */ + u8 supports_mmio_ext:1; } info; /** @irq: device interrupt state */ diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index f8e813e17458..8161982796dd 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -57,6 +57,7 @@ struct xe_device_desc { u8 is_dgfx:1; u8 has_llc:1; u8 bypass_mtcfg:1; + u8 supports_mmio_ext:1; }; #define PLATFORM(x) \ @@ -555,6 +556,8 @@ static int xe_info_init(struct xe_device *xe, xe->info.media_name = media_desc ? media_desc->name : "none"; xe->info.has_llc = desc->has_llc; xe->info.bypass_mtcfg = desc->bypass_mtcfg; + xe->info.supports_mmio_ext = desc->supports_mmio_ext; + xe->info.tile_mmio_ext_size = graphics_desc->tile_mmio_ext_size; xe->info.dma_mask_size = graphics_desc->dma_mask_size; xe->info.vram_flags = graphics_desc->vram_flags; diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h index bd0b0d87413e..dd3546ba6f90 100644 --- a/drivers/gpu/drm/xe/xe_pci_types.h +++ b/drivers/gpu/drm/xe/xe_pci_types.h @@ -20,6 +20,8 @@ struct xe_graphics_desc { u64 hw_engine_mask; /* hardware engines provided by graphics IP */ + u32 tile_mmio_ext_size; /* size of MMIO extension space, per-tile */ + u8 max_remote_tiles:2; u8 has_asid:1; -- cgit v1.2.3 From ef29b390c7345f081412454538ab94c395068153 Mon Sep 17 00:00:00 2001 From: Koby Elbaz Date: Thu, 5 Oct 2023 11:06:18 -0400 Subject: drm/xe: map MMIO BAR according to the num of tiles in device desc When MMIO BAR is initially mapped, the driver assumes a single tile device. However, former memory allocations take all tiles into account. First, a common standard for resource usage is needed here. Second, with the next (6th) patch in this series, the MMIO BAR remapping will be done only if a reduced-tile device is attached. Signed-off-by: Koby Elbaz Reviewed-by: Ofir Bitton Reviewed-by: Moti Haimovski Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_mmio.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 054ad752303f..52e4572e3c4a 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -383,14 +383,13 @@ int xe_mmio_init(struct xe_device *xe) int err; /* - * Map the first 16MB of th BAR, which includes the registers (0-4MB), - * reserved space (4MB-8MB), and GGTT (8MB-16MB) for a single tile. - * This will get remapped later if we determine that we're running - * on a multi-tile system. + * Map the maximum expected BAR size, which will get remapped later + * if we determine that we're running on a reduced-tile system. + * The first 16MB of the BAR, belong to the root tile, and include: + * registers (0-4MB), reserved space (4MB-8MB) and GGTT (8MB-16MB). */ - xe->mmio.size = SZ_16M; - xe->mmio.regs = pci_iomap(to_pci_dev(xe->drm.dev), mmio_bar, - xe->mmio.size); + xe->mmio.size = (SZ_16M + xe->info.tile_mmio_ext_size) * xe->info.tile_count; + xe->mmio.regs = pci_iomap(to_pci_dev(xe->drm.dev), mmio_bar, xe->mmio.size); if (xe->mmio.regs == NULL) { drm_err(&xe->drm, "failed to map registers\n"); return -EIO; -- cgit v1.2.3 From a4e2f3a299ea1c9c4b6d0e51048273eac28256b9 Mon Sep 17 00:00:00 2001 From: Koby Elbaz Date: Thu, 5 Oct 2023 11:06:19 -0400 Subject: drm/xe: refactor xe_mmio_probe_tiles to support MMIO extension In future ASICs, there will be an additional MMIO extension space for all tiles altogether, residing on top of MMIO address space. Signed-off-by: Koby Elbaz Reviewed-by: Ofir Bitton Reviewed-by: Moti Haimovski Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_mmio.c | 70 ++++++++++++++++++++++++-------------------- 1 file changed, 38 insertions(+), 32 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 52e4572e3c4a..e4cf9bfec422 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -318,50 +318,56 @@ int xe_mmio_probe_vram(struct xe_device *xe) static void xe_mmio_probe_tiles(struct xe_device *xe) { - u8 adj_tile_count = xe->info.tile_count; + size_t tile_mmio_size = SZ_16M, tile_mmio_ext_size = xe->info.tile_mmio_ext_size; + u8 id, tile_count = xe->info.tile_count; struct xe_gt *gt = xe_root_mmio_gt(xe); + const int mmio_bar = 0; + struct xe_tile *tile; + void *regs; u32 mtcfg; - u8 id; - if (xe->info.tile_count == 1) - return; + if (tile_count == 1) + goto add_mmio_ext; if (!xe->info.bypass_mtcfg) { mtcfg = xe_mmio_read64_2x32(gt, XEHP_MTCFG_ADDR); - adj_tile_count = xe->info.tile_count = - REG_FIELD_GET(TILE_COUNT, mtcfg) + 1; - - /* - * FIXME: Needs some work for standalone media, but should be impossible - * with multi-tile for now. - */ - xe->info.gt_count = xe->info.tile_count; - - drm_info(&xe->drm, "tile_count: %d, adj_tile_count %d\n", - xe->info.tile_count, adj_tile_count); - } - - if (xe->info.tile_count > 1) { - const int mmio_bar = 0; - struct xe_tile *tile; - size_t size; - void *regs; - - if (adj_tile_count > 1) { + tile_count = REG_FIELD_GET(TILE_COUNT, mtcfg) + 1; + if (tile_count < xe->info.tile_count) { + drm_info(&xe->drm, "tile_count: %d, reduced_tile_count %d\n", + xe->info.tile_count, tile_count); pci_iounmap(to_pci_dev(xe->drm.dev), xe->mmio.regs); - xe->mmio.size = SZ_16M * adj_tile_count; - xe->mmio.regs = pci_iomap(to_pci_dev(xe->drm.dev), - mmio_bar, xe->mmio.size); + xe->mmio.size = (tile_mmio_size + tile_mmio_ext_size) * tile_count; + xe->mmio.regs = pci_iomap(to_pci_dev(xe->drm.dev), mmio_bar, xe->mmio.size); + xe->info.tile_count = tile_count; + + /* + * FIXME: Needs some work for standalone media, but should be impossible + * with multi-tile for now. + */ + xe->info.gt_count = xe->info.tile_count; } + } - size = xe->mmio.size / adj_tile_count; - regs = xe->mmio.regs; + regs = xe->mmio.regs; + for_each_tile(tile, xe, id) { + tile->mmio.size = tile_mmio_size; + tile->mmio.regs = regs; + regs += tile_mmio_size; + } + +add_mmio_ext: + /* By design, there's a contiguous multi-tile MMIO space (16MB hard coded per tile). + * When supported, there could be an additional contiguous multi-tile MMIO extension + * space ON TOP of it, and hence the necessity for distinguished MMIO spaces. + */ + if (xe->info.supports_mmio_ext) { + regs = xe->mmio.regs + tile_mmio_size * tile_count; for_each_tile(tile, xe, id) { - tile->mmio.size = size; - tile->mmio.regs = regs; + tile->mmio_ext.size = tile_mmio_ext_size; + tile->mmio_ext.regs = regs; - regs += size; + regs += tile_mmio_ext_size; } } } -- cgit v1.2.3 From b6f45db5d08ac6ac1827ed64d009f3a25ad293c8 Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Wed, 4 Oct 2023 14:01:42 -0700 Subject: drm/xe: Set PTE_AE for smem allocations in integrated devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without this if a atomic operation is executed in Xe2 integrated GPUs it causes engine memory catastrophic error. This fixes at least 3 failures in piglit sanity and 2 failures in crucible for LNL. v3: - only add PTE_AE to smem in integrated Cc: Matt Roper Cc: Lucas De Marchi Reviewed-by: Lucas De Marchi Reviewed-by: Matt Roper Signed-off-by: José Roberto de Souza Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pt.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 15f7c9d5b311..ab08e4644529 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -628,6 +628,7 @@ static int xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, struct xe_vm_pgtable_update *entries, u32 *num_entries) { + struct xe_device *xe = tile_to_xe(tile); struct xe_bo *bo = xe_vma_bo(vma); bool is_devmem = !xe_vma_is_userptr(vma) && bo && (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)); @@ -649,10 +650,12 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; int ret; + if (vma && (vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) && + (is_devmem || !IS_DGFX(xe))) + xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; + if (is_devmem) { - xe_walk.default_pte = XE_PPGTT_PTE_DM; - if (vma && vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) - xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; + xe_walk.default_pte |= XE_PPGTT_PTE_DM; xe_walk.dma_offset = vram_region_gpu_offset(bo->ttm.resource); xe_walk.cache = XE_CACHE_WB; } else { -- cgit v1.2.3 From 5120243bfb0dabc9f16924a5fc66e8ef26f0f8d3 Mon Sep 17 00:00:00 2001 From: Vitaly Lubart Date: Mon, 28 Aug 2023 13:07:07 +0300 Subject: drm/xe/gsc: add HECI2 register offsets Add HECI2 register offsets for DG1 and DG2 to regs/xe_regs.h Reviewed-by: Rodrigo Vivi Signed-off-by: Vitaly Lubart Signed-off-by: Alexander Usyskin Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_regs.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h index 1574d11d4e14..e4408473e802 100644 --- a/drivers/gpu/drm/xe/regs/xe_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_regs.h @@ -33,6 +33,10 @@ #define XEHPC_BCS6_RING_BASE 0x3ea000 #define XEHPC_BCS7_RING_BASE 0x3ec000 #define XEHPC_BCS8_RING_BASE 0x3ee000 + +#define DG1_GSC_HECI2_BASE 0x00259000 +#define DG2_GSC_HECI2_BASE 0x00374000 + #define GSCCS_RING_BASE 0x11a000 #define GT_WAIT_SEMAPHORE_INTERRUPT REG_BIT(11) #define GT_CONTEXT_SWITCH_INTERRUPT REG_BIT(8) -- cgit v1.2.3 From 437d7a84ada7a4cfeab2d9555c446936c3fb09f4 Mon Sep 17 00:00:00 2001 From: Vitaly Lubart Date: Wed, 30 Aug 2023 13:05:40 +0300 Subject: drm/xe/gsc: add has_heci_gscfi indication to device Mark support of MEI-GSC interaction per device. Add has_heci_gscfi indication to xe_device and xe_pci structures. Mark DG1 and DG2 devices as supported. Reviewed-by: Rodrigo Vivi Signed-off-by: Vitaly Lubart Signed-off-by: Alexander Usyskin Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device_types.h | 3 +++ drivers/gpu/drm/xe/xe_pci.c | 10 ++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index b64f810189ff..f7822dd9f1e4 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -29,6 +29,7 @@ struct xe_pat_ops; #define GRAPHICS_VERx100(xe) ((xe)->info.graphics_verx100) #define MEDIA_VERx100(xe) ((xe)->info.media_verx100) #define IS_DGFX(xe) ((xe)->info.is_dgfx) +#define HAS_HECI_GSCFI(xe) ((xe)->info.has_heci_gscfi) #define XE_VRAM_FLAGS_NEED64K BIT(0) @@ -249,6 +250,8 @@ struct xe_device { u8 bypass_mtcfg:1; /** @supports_mmio_ext: supports MMIO extension/s */ u8 supports_mmio_ext:1; + /** @has_heci_gscfi: device has heci gscfi */ + u8 has_heci_gscfi:1; } info; /** @irq: device interrupt state */ diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 8161982796dd..9963772caabb 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -55,6 +55,8 @@ struct xe_device_desc { u8 require_force_probe:1; u8 is_dgfx:1; + u8 has_heci_gscfi:1; + u8 has_llc:1; u8 bypass_mtcfg:1; u8 supports_mmio_ext:1; @@ -260,6 +262,7 @@ static const struct xe_device_desc dg1_desc = { DGFX_FEATURES, PLATFORM(XE_DG1), .require_force_probe = true, + .has_heci_gscfi = 1, }; static const u16 dg2_g10_ids[] = { XE_DG2_G10_IDS(NOP), XE_ATS_M150_IDS(NOP), 0 }; @@ -269,6 +272,7 @@ static const u16 dg2_g12_ids[] = { XE_DG2_G12_IDS(NOP), 0 }; #define DG2_FEATURES \ DGFX_FEATURES, \ PLATFORM(XE_DG2), \ + .has_heci_gscfi = 1, \ .subplatforms = (const struct xe_subplatform_desc[]) { \ { XE_SUBPLATFORM_DG2_G10, "G10", dg2_g10_ids }, \ { XE_SUBPLATFORM_DG2_G11, "G11", dg2_g11_ids }, \ @@ -552,6 +556,7 @@ static int xe_info_init(struct xe_device *xe, return -ENODEV; xe->info.is_dgfx = desc->is_dgfx; + xe->info.has_heci_gscfi = desc->has_heci_gscfi; xe->info.graphics_name = graphics_desc->name; xe->info.media_name = media_desc ? media_desc->name : "none"; xe->info.has_llc = desc->has_llc; @@ -684,7 +689,7 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) goto err_pci_disable; - drm_dbg(&xe->drm, "%s %s %04x:%04x dgfx:%d gfx:%s (%d.%02d) media:%s (%d.%02d) dma_m_s:%d tc:%d", + drm_dbg(&xe->drm, "%s %s %04x:%04x dgfx:%d gfx:%s (%d.%02d) media:%s (%d.%02d) dma_m_s:%d tc:%d gscfi:%d", desc->platform_name, subplatform_desc ? subplatform_desc->name : "", xe->info.devid, xe->info.revid, @@ -695,7 +700,8 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) xe->info.media_name, xe->info.media_verx100 / 100, xe->info.media_verx100 % 100, - xe->info.dma_mask_size, xe->info.tile_count); + xe->info.dma_mask_size, xe->info.tile_count, + xe->info.has_heci_gscfi); drm_dbg(&xe->drm, "Stepping = (G:%s, M:%s, D:%s, B:%s)\n", xe_step_name(xe->info.step.graphics), -- cgit v1.2.3 From 87a4c85d3a3ed579c86fd2612715ccb94c4001ff Mon Sep 17 00:00:00 2001 From: Vitaly Lubart Date: Mon, 28 Aug 2023 19:24:03 +0300 Subject: drm/xe/gsc: add gsc device support Create mei-gscfi auxiliary device and configure interrupts to be consumed by mei-gsc device driver. Reviewed-by: Rodrigo Vivi Signed-off-by: Vitaly Lubart Signed-off-by: Alexander Usyskin Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Kconfig | 1 + drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/xe_device.c | 4 + drivers/gpu/drm/xe/xe_device_types.h | 4 + drivers/gpu/drm/xe/xe_heci_gsc.c | 222 +++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_heci_gsc.h | 35 ++++++ drivers/gpu/drm/xe/xe_irq.c | 22 +++- 7 files changed, 283 insertions(+), 6 deletions(-) create mode 100644 drivers/gpu/drm/xe/xe_heci_gsc.c create mode 100644 drivers/gpu/drm/xe/xe_heci_gsc.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index 7bffc039d63f..cfa6420b9915 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -27,6 +27,7 @@ config DRM_XE select DRM_SCHED select MMU_NOTIFIER select WANT_DEV_COREDUMP + select AUXILIARY_BUS help Experimental driver for Intel Xe series GPUs diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index a0e7896a4ef7..32eee57b4184 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -78,6 +78,7 @@ xe-y += xe_bb.o \ xe_guc_log.o \ xe_guc_pc.o \ xe_guc_submit.o \ + xe_heci_gsc.o \ xe_hw_engine.o \ xe_hw_engine_class_sysfs.o \ xe_hw_fence.o \ diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 283fc5990000..2bbd3aa2809b 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -321,6 +321,8 @@ int xe_device_probe(struct xe_device *xe) goto err_irq_shutdown; } + xe_heci_gsc_init(xe); + err = drm_dev_register(&xe->drm, 0); if (err) goto err_irq_shutdown; @@ -344,6 +346,8 @@ err_irq_shutdown: void xe_device_remove(struct xe_device *xe) { + xe_heci_gsc_fini(xe); + xe_irq_shutdown(xe); } diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index f7822dd9f1e4..4bc668ff8615 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -13,6 +13,7 @@ #include #include "xe_devcoredump_types.h" +#include "xe_heci_gsc.h" #include "xe_gt_types.h" #include "xe_platform_types.h" #include "xe_pt_types.h" @@ -384,6 +385,9 @@ struct xe_device { /** @hwmon: hwmon subsystem integration */ struct xe_hwmon *hwmon; + /** @heci_gsc: graphics security controller */ + struct xe_heci_gsc heci_gsc; + /* For pcode */ struct mutex sb_lock; diff --git a/drivers/gpu/drm/xe/xe_heci_gsc.c b/drivers/gpu/drm/xe/xe_heci_gsc.c new file mode 100644 index 000000000000..3328ddca42d0 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_heci_gsc.c @@ -0,0 +1,222 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright(c) 2023, Intel Corporation. All rights reserved. + */ + +#include +#include +#include +#include + +#include "regs/xe_regs.h" +#include "xe_device_types.h" +#include "xe_drv.h" +#include "xe_heci_gsc.h" +#include "xe_platform_types.h" + +#define GSC_BAR_LENGTH 0x00000FFC + +static void heci_gsc_irq_mask(struct irq_data *d) +{ + /* generic irq handling */ +} + +static void heci_gsc_irq_unmask(struct irq_data *d) +{ + /* generic irq handling */ +} + +static struct irq_chip heci_gsc_irq_chip = { + .name = "gsc_irq_chip", + .irq_mask = heci_gsc_irq_mask, + .irq_unmask = heci_gsc_irq_unmask, +}; + +static int heci_gsc_irq_init(int irq) +{ + irq_set_chip_and_handler_name(irq, &heci_gsc_irq_chip, + handle_simple_irq, "heci_gsc_irq_handler"); + + return irq_set_chip_data(irq, NULL); +} + +/** + * struct heci_gsc_def - graphics security controller heci interface definitions + * + * @name: name of the heci device + * @bar: address of the mmio bar + * @bar_size: size of the mmio bar + * @use_polling: indication of using polling mode for the device + * @slow_firmware: indication of whether the device is slow (needs longer timeouts) + */ +struct heci_gsc_def { + const char *name; + unsigned long bar; + size_t bar_size; + bool use_polling; + bool slow_firmware; +}; + +/* gsc resources and definitions */ +static const struct heci_gsc_def heci_gsc_def_dg1 = { + .name = "mei-gscfi", + .bar = DG1_GSC_HECI2_BASE, + .bar_size = GSC_BAR_LENGTH, +}; + +static const struct heci_gsc_def heci_gsc_def_dg2 = { + .name = "mei-gscfi", + .bar = DG2_GSC_HECI2_BASE, + .bar_size = GSC_BAR_LENGTH, +}; + +static void heci_gsc_release_dev(struct device *dev) +{ + struct auxiliary_device *aux_dev = to_auxiliary_dev(dev); + struct mei_aux_device *adev = auxiliary_dev_to_mei_aux_dev(aux_dev); + + kfree(adev); +} + +void xe_heci_gsc_fini(struct xe_device *xe) +{ + struct xe_heci_gsc *heci_gsc = &xe->heci_gsc; + + if (!HAS_HECI_GSCFI(xe)) + return; + + if (heci_gsc->adev) { + struct auxiliary_device *aux_dev = &heci_gsc->adev->aux_dev; + + auxiliary_device_delete(aux_dev); + auxiliary_device_uninit(aux_dev); + heci_gsc->adev = NULL; + } + + if (heci_gsc->irq >= 0) + irq_free_desc(heci_gsc->irq); + heci_gsc->irq = -1; +} + +static int heci_gsc_irq_setup(struct xe_device *xe) +{ + struct xe_heci_gsc *heci_gsc = &xe->heci_gsc; + int ret; + + heci_gsc->irq = irq_alloc_desc(0); + if (heci_gsc->irq < 0) { + drm_err(&xe->drm, "gsc irq error %d\n", heci_gsc->irq); + return heci_gsc->irq; + } + + ret = heci_gsc_irq_init(heci_gsc->irq); + if (ret < 0) + drm_err(&xe->drm, "gsc irq init failed %d\n", ret); + + return ret; +} + +static int heci_gsc_add_device(struct xe_device *xe, const struct heci_gsc_def *def) +{ + struct xe_heci_gsc *heci_gsc = &xe->heci_gsc; + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + struct auxiliary_device *aux_dev; + struct mei_aux_device *adev; + int ret; + + adev = kzalloc(sizeof(*adev), GFP_KERNEL); + if (!adev) + return -ENOMEM; + adev->irq = heci_gsc->irq; + adev->bar.parent = &pdev->resource[0]; + adev->bar.start = def->bar + pdev->resource[0].start; + adev->bar.end = adev->bar.start + def->bar_size - 1; + adev->bar.flags = IORESOURCE_MEM; + adev->bar.desc = IORES_DESC_NONE; + adev->slow_firmware = def->slow_firmware; + + aux_dev = &adev->aux_dev; + aux_dev->name = def->name; + aux_dev->id = (pci_domain_nr(pdev->bus) << 16) | + PCI_DEVID(pdev->bus->number, pdev->devfn); + aux_dev->dev.parent = &pdev->dev; + aux_dev->dev.release = heci_gsc_release_dev; + + ret = auxiliary_device_init(aux_dev); + if (ret < 0) { + drm_err(&xe->drm, "gsc aux init failed %d\n", ret); + kfree(adev); + return ret; + } + + heci_gsc->adev = adev; /* needed by the notifier */ + ret = auxiliary_device_add(aux_dev); + if (ret < 0) { + drm_err(&xe->drm, "gsc aux add failed %d\n", ret); + heci_gsc->adev = NULL; + + /* adev will be freed with the put_device() and .release sequence */ + auxiliary_device_uninit(aux_dev); + } + return ret; +} + +void xe_heci_gsc_init(struct xe_device *xe) +{ + struct xe_heci_gsc *heci_gsc = &xe->heci_gsc; + const struct heci_gsc_def *def; + int ret; + + if (!HAS_HECI_GSCFI(xe)) + return; + + heci_gsc->irq = -1; + + if (xe->info.platform == XE_DG2) { + def = &heci_gsc_def_dg2; + } else if (xe->info.platform == XE_DG1) { + def = &heci_gsc_def_dg1; + } else { + drm_warn_once(&xe->drm, "Unknown platform\n"); + return; + } + + if (!def->name) { + drm_warn_once(&xe->drm, "HECI is not implemented!\n"); + return; + } + + if (!def->use_polling) { + ret = heci_gsc_irq_setup(xe); + if (ret) + goto fail; + } + + ret = heci_gsc_add_device(xe, def); + if (ret) + goto fail; + + return; +fail: + xe_heci_gsc_fini(xe); +} + +void xe_heci_gsc_irq_handler(struct xe_device *xe, u32 iir) +{ + int ret; + + if ((iir & GSC_IRQ_INTF(1)) == 0) + return; + + if (!HAS_HECI_GSCFI(xe)) { + drm_warn_once(&xe->drm, "GSC irq: not supported"); + return; + } + + if (xe->heci_gsc.irq < 0) + return; + + ret = generic_handle_irq(xe->heci_gsc.irq); + if (ret) + drm_err_ratelimited(&xe->drm, "error handling GSC irq: %d\n", ret); +} diff --git a/drivers/gpu/drm/xe/xe_heci_gsc.h b/drivers/gpu/drm/xe/xe_heci_gsc.h new file mode 100644 index 000000000000..9db454478fae --- /dev/null +++ b/drivers/gpu/drm/xe/xe_heci_gsc.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright(c) 2023, Intel Corporation. All rights reserved. + */ +#ifndef __XE_HECI_GSC_DEV_H__ +#define __XE_HECI_GSC_DEV_H__ + +#include + +struct xe_device; +struct mei_aux_device; + +/* + * The HECI1 bit corresponds to bit15 and HECI2 to bit14. + * The reason for this is to allow growth for more interfaces in the future. + */ +#define GSC_IRQ_INTF(_x) BIT(15 - (_x)) + +/** + * struct xe_heci_gsc - graphics security controller for xe, HECI interface + * + * @adev : pointer to mei auxiliary device structure + * @irq : irq number + * + */ +struct xe_heci_gsc { + struct mei_aux_device *adev; + int irq; +}; + +void xe_heci_gsc_init(struct xe_device *xe); +void xe_heci_gsc_fini(struct xe_device *xe); +void xe_heci_gsc_irq_handler(struct xe_device *xe, u32 iir); + +#endif /* __XE_HECI_GSC_DEV_H__ */ diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 4cc5f7086b4c..5631e5e1ea20 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -141,6 +141,7 @@ void xe_irq_enable_hwe(struct xe_gt *gt) struct xe_device *xe = gt_to_xe(gt); u32 ccs_mask, bcs_mask; u32 irqs, dmask, smask; + u32 gsc_mask = 0; if (xe_device_uc_enabled(xe)) { irqs = GT_RENDER_USER_INTERRUPT | @@ -190,9 +191,13 @@ void xe_irq_enable_hwe(struct xe_gt *gt) xe_mmio_write32(gt, VCS2_VCS3_INTR_MASK, ~dmask); xe_mmio_write32(gt, VECS0_VECS1_INTR_MASK, ~dmask); - if (xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_OTHER)) { - xe_mmio_write32(gt, GUNIT_GSC_INTR_ENABLE, irqs); - xe_mmio_write32(gt, GUNIT_GSC_INTR_MASK, ~irqs); + if (xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_OTHER)) + gsc_mask = irqs; + else if (HAS_HECI_GSCFI(xe)) + gsc_mask = GSC_IRQ_INTF(1); + if (gsc_mask) { + xe_mmio_write32(gt, GUNIT_GSC_INTR_ENABLE, gsc_mask); + xe_mmio_write32(gt, GUNIT_GSC_INTR_MASK, ~gsc_mask); } } } @@ -306,7 +311,11 @@ static void gt_irq_handler(struct xe_tile *tile, } if (class == XE_ENGINE_CLASS_OTHER) { - gt_other_irq_handler(engine_gt, instance, intr_vec); + /* HECI GSCFI interrupts come from outside of GT */ + if (HAS_HECI_GSCFI(xe) && instance == OTHER_GSC_INSTANCE) + xe_heci_gsc_irq_handler(xe, intr_vec); + else + gt_other_irq_handler(engine_gt, instance, intr_vec); continue; } } @@ -480,8 +489,9 @@ static void gt_irq_reset(struct xe_tile *tile) if (ccs_mask & (BIT(2)|BIT(3))) xe_mmio_write32(mmio, CCS2_CCS3_INTR_MASK, ~0); - if (tile->media_gt && - xe_hw_engine_mask_per_class(tile->media_gt, XE_ENGINE_CLASS_OTHER)) { + if ((tile->media_gt && + xe_hw_engine_mask_per_class(tile->media_gt, XE_ENGINE_CLASS_OTHER)) || + HAS_HECI_GSCFI(tile_to_xe(tile))) { xe_mmio_write32(mmio, GUNIT_GSC_INTR_ENABLE, 0); xe_mmio_write32(mmio, GUNIT_GSC_INTR_MASK, ~0); } -- cgit v1.2.3 From de54bb81d9d43d0b66a63d839963e9d359e0467d Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 16 Oct 2023 09:34:51 -0700 Subject: drm/xe: Make MI_FLUSH_DW immediate size more explicit Despite its name, MI_FLUSH_DW instruction can write an immediate value of either dword size or qword size, depending on the 'length' field of the instruction. Since "length" excludes the first two dwords of the instruction, a value of 2 in the length field implies a dword write and a value of 3 implies a qword write. Even in cases where the flush instruction's post-sync operation is set to "no write" we're still expected to size the overall instruction as if we were doing a dword or qword write (i.e., a length of 1 shouldn't be used on modern platforms). Rather than baking a size of "1" into the #define and then adding another unexplained "+ 1" at all the spots where the definition gets used, lets just create MI_FLUSH_IMM_DW and MI_FLUSH_IMM_QW definitions that should be OR'd into the instruction header to make it more explicit what behavior we're requesting. Bspec: 60229 Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20231016163449.1300701-9-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gpu_commands.h | 5 ++++- drivers/gpu/drm/xe/xe_ring_ops.c | 10 +++++----- 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h index 21738281bdd0..9432a960346b 100644 --- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h +++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h @@ -30,12 +30,15 @@ #define MI_LRI_MMIO_REMAP_EN REG_BIT(17) #define MI_LRI_FORCE_POSTED (1<<12) -#define MI_FLUSH_DW MI_INSTR(0x26, 1) +#define MI_FLUSH_DW MI_INSTR(0x26, 0) #define MI_FLUSH_DW_STORE_INDEX (1<<21) #define MI_INVALIDATE_TLB (1<<18) #define MI_FLUSH_DW_CCS (1<<16) #define MI_FLUSH_DW_OP_STOREDW (1<<14) #define MI_FLUSH_DW_USE_GTT (1<<2) +#define MI_FLUSH_LENGTH GENMASK(5, 0) +#define MI_FLUSH_IMM_DW REG_FIELD_PREP(MI_FLUSH_LENGTH, 2) +#define MI_FLUSH_IMM_QW REG_FIELD_PREP(MI_FLUSH_LENGTH, 3) #define MI_BATCH_BUFFER_START MI_INSTR(0x31, 1) diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index 6eec7c7e4bc5..b95cc7713ff9 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -80,7 +80,7 @@ static int emit_store_imm_ggtt(u32 addr, u32 value, u32 *dw, int i) static int emit_flush_imm_ggtt(u32 addr, u32 value, bool invalidate_tlb, u32 *dw, int i) { - dw[i++] = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW | + dw[i++] = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_IMM_DW | (invalidate_tlb ? MI_INVALIDATE_TLB : 0); dw[i++] = addr | MI_FLUSH_DW_USE_GTT; dw[i++] = 0; @@ -100,9 +100,9 @@ static int emit_bb_start(u64 batch_addr, u32 ppgtt_flag, u32 *dw, int i) static int emit_flush_invalidate(u32 flag, u32 *dw, int i) { - dw[i] = MI_FLUSH_DW + 1; + dw[i] = MI_FLUSH_DW; dw[i] |= flag; - dw[i++] |= MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW | + dw[i++] |= MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_IMM_DW | MI_FLUSH_DW_STORE_INDEX; dw[i++] = LRC_PPHWSP_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; @@ -365,8 +365,8 @@ static void emit_migration_job_gen12(struct xe_sched_job *job, i = emit_bb_start(job->batch_addr[1], BIT(8), dw, i); - dw[i++] = (MI_FLUSH_DW | MI_INVALIDATE_TLB | job->migrate_flush_flags | - MI_FLUSH_DW_OP_STOREDW) + 1; + dw[i++] = MI_FLUSH_DW | MI_INVALIDATE_TLB | job->migrate_flush_flags | + MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_IMM_DW; dw[i++] = xe_lrc_seqno_ggtt_addr(lrc) | MI_FLUSH_DW_USE_GTT; dw[i++] = 0; dw[i++] = seqno; /* value */ -- cgit v1.2.3 From e12a64881e97a78694012646cabd211399db8753 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 16 Oct 2023 09:34:52 -0700 Subject: drm/xe: Separate number of registers from MI_LRI opcode Keeping the number of registers to be loaded as a separate macro from the instruction opcode will simplify some upcoming LRC parsing code. Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20231016163449.1300701-10-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gpu_commands.h | 4 +++- drivers/gpu/drm/xe/xe_gt.c | 2 +- drivers/gpu/drm/xe/xe_lrc.c | 2 +- drivers/gpu/drm/xe/xe_ring_ops.c | 2 +- 4 files changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h index 9432a960346b..ad1e5466671b 100644 --- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h +++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h @@ -25,9 +25,11 @@ #define MI_BATCH_BUFFER_END MI_INSTR(0x0a, 0) #define MI_STORE_DATA_IMM MI_INSTR(0x20, 0) -#define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1) +#define MI_LOAD_REGISTER_IMM MI_INSTR(0x22, 0) #define MI_LRI_LRM_CS_MMIO REG_BIT(19) #define MI_LRI_MMIO_REMAP_EN REG_BIT(17) +#define MI_LRI_LENGTH GENMASK(5, 0) +#define MI_LRI_NUM_REGS(x) REG_FIELD_PREP(MI_LRI_LENGTH, 2 * (x) - 1) #define MI_LRI_FORCE_POSTED (1<<12) #define MI_FLUSH_DW MI_INSTR(0x26, 0) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index c63e2e4750b1..a42ee3b9b8c7 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -145,7 +145,7 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) if (count) { xe_gt_dbg(gt, "LRC WA %s save-restore batch\n", sr->name); - bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM(count); + bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count); xa_for_each(&sr->xa, idx, entry) { struct xe_reg reg = entry->reg; diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 35ae6e531d8a..81463bd5e490 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -111,7 +111,7 @@ static void set_offsets(u32 *regs, flags = *data >> 6; data++; - *regs = MI_LOAD_REGISTER_IMM(count); + *regs = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count); if (flags & POSTED) *regs |= MI_LRI_FORCE_POSTED; *regs |= MI_LRI_LRM_CS_MMIO; diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index b95cc7713ff9..1e36b07d3e01 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -50,7 +50,7 @@ static u32 preparser_disable(bool state) static int emit_aux_table_inv(struct xe_gt *gt, struct xe_reg reg, u32 *dw, int i) { - dw[i++] = MI_LOAD_REGISTER_IMM(1) | MI_LRI_MMIO_REMAP_EN; + dw[i++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1) | MI_LRI_MMIO_REMAP_EN; dw[i++] = reg.addr + gt->mmio.adj_offset; dw[i++] = AUX_INV; dw[i++] = MI_NOOP; -- cgit v1.2.3 From 14a1e6a4a460fceae50fc1cf6b50d36c4ba96a7b Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 16 Oct 2023 09:34:53 -0700 Subject: drm/xe: Clarify number of dwords/qwords stored by MI_STORE_DATA_IMM MI_STORE_DATA_IMM can store either dword values or qword values, and can store more than one value if the instruction's length field is large enough. Create explicit defines to specify the number of dwords/qwords to be stored, which will set the instruction length correctly and, if necessary, turn on the 'store qword' bit. While we're here, also replace an open-coded version of MI_STORE_DATA_IMM with the common macros. Bspec: 60246 Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20231016163449.1300701-11-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gpu_commands.h | 3 +++ drivers/gpu/drm/xe/xe_migrate.c | 9 +++------ drivers/gpu/drm/xe/xe_ring_ops.c | 6 ++---- 3 files changed, 8 insertions(+), 10 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h index ad1e5466671b..8c2e0da694d8 100644 --- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h +++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h @@ -24,6 +24,9 @@ #define MI_BATCH_BUFFER_END MI_INSTR(0x0a, 0) #define MI_STORE_DATA_IMM MI_INSTR(0x20, 0) +#define MI_SDI_GGTT REG_BIT(22) +#define MI_SDI_NUM_DW(x) ((x) + 1) +#define MI_SDI_NUM_QW(x) (REG_BIT(21) | (2 * (x) + 1)) #define MI_LOAD_REGISTER_IMM MI_INSTR(0x22, 0) #define MI_LRI_LRM_CS_MMIO REG_BIT(19) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 134b078b6fee..b81ef1bdecc6 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -482,8 +482,7 @@ static void emit_pte(struct xe_migrate *m, while (ptes) { u32 chunk = min(0x1ffU, ptes); - bb->cs[bb->len++] = MI_STORE_DATA_IMM | BIT(21) | - (chunk * 2 + 1); + bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk); bb->cs[bb->len++] = ofs; bb->cs[bb->len++] = 0; @@ -1083,8 +1082,7 @@ static void write_pgtable(struct xe_tile *tile, struct xe_bb *bb, u64 ppgtt_ofs, if (!(bb->len & 1)) bb->cs[bb->len++] = MI_NOOP; - bb->cs[bb->len++] = MI_STORE_DATA_IMM | BIT(21) | - (chunk * 2 + 1); + bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk); bb->cs[bb->len++] = lower_32_bits(addr); bb->cs[bb->len++] = upper_32_bits(addr); ops->populate(pt_update, tile, NULL, bb->cs + bb->len, ofs, chunk, @@ -1290,8 +1288,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m, emit_arb_clear(bb); /* Map our PT's to gtt */ - bb->cs[bb->len++] = MI_STORE_DATA_IMM | BIT(21) | - (num_updates * 2 + 1); + bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(num_updates); bb->cs[bb->len++] = ppgtt_ofs * XE_PAGE_SIZE + page_ofs; bb->cs[bb->len++] = 0; /* upper_32_bits */ diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index 1e36b07d3e01..da13cc7ba6af 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -69,7 +69,7 @@ static int emit_user_interrupt(u32 *dw, int i) static int emit_store_imm_ggtt(u32 addr, u32 value, u32 *dw, int i) { - dw[i++] = MI_STORE_DATA_IMM | BIT(22) /* GGTT */ | 2; + dw[i++] = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1); dw[i++] = addr; dw[i++] = 0; dw[i++] = value; @@ -140,12 +140,10 @@ static int emit_pipe_invalidate(u32 mask_flags, bool invalidate_tlb, u32 *dw, return i; } -#define MI_STORE_QWORD_IMM_GEN8_POSTED (MI_INSTR(0x20, 3) | (1 << 21)) - static int emit_store_imm_ppgtt_posted(u64 addr, u64 value, u32 *dw, int i) { - dw[i++] = MI_STORE_QWORD_IMM_GEN8_POSTED; + dw[i++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(1); dw[i++] = lower_32_bits(addr); dw[i++] = upper_32_bits(addr); dw[i++] = lower_32_bits(value); -- cgit v1.2.3 From 0134f130e76ad6e323e15ccb00624586c8763075 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 16 Oct 2023 09:34:54 -0700 Subject: drm/xe: Extract MI_* instructions to their own header Extracting the common MI_* instructions that can be used with any engine to their own header will make it easier as we add additional engine instructions in upcoming patches. Also, since the majority of GPU instructions (both MI and non-MI) have a "length" field in bits 7:0 of the instruction header, a common define is added for that. Instruction-specific length fields are still defined for special case instructions that have larger/smaller length fields. v2: - Use "instr" instead of "inst" as the short form of "instruction" everywhere. (Lucas) - Include xe_reg_defs.h instead of the i915 compat header. (Lucas) Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20231016163449.1300701-12-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/instructions/xe_instr_defs.h | 31 +++++++++++++ drivers/gpu/drm/xe/instructions/xe_mi_commands.h | 58 ++++++++++++++++++++++++ drivers/gpu/drm/xe/regs/xe_gpu_commands.h | 41 ----------------- drivers/gpu/drm/xe/xe_bb.c | 1 + drivers/gpu/drm/xe/xe_execlist.c | 1 + drivers/gpu/drm/xe/xe_gt.c | 1 + drivers/gpu/drm/xe/xe_lrc.c | 1 + drivers/gpu/drm/xe/xe_migrate.c | 1 + drivers/gpu/drm/xe/xe_ring_ops.c | 3 +- 9 files changed, 96 insertions(+), 42 deletions(-) create mode 100644 drivers/gpu/drm/xe/instructions/xe_instr_defs.h create mode 100644 drivers/gpu/drm/xe/instructions/xe_mi_commands.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/instructions/xe_instr_defs.h b/drivers/gpu/drm/xe/instructions/xe_instr_defs.h new file mode 100644 index 000000000000..a7ec46395786 --- /dev/null +++ b/drivers/gpu/drm/xe/instructions/xe_instr_defs.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_INSTR_DEFS_H_ +#define _XE_INSTR_DEFS_H_ + +#include "regs/xe_reg_defs.h" + +/* + * The first dword of any GPU instruction is the "instruction header." Bits + * 31:29 identify the general type of the command and determine how exact + * opcodes and sub-opcodes will be encoded in the remaining bits. + */ +#define XE_INSTR_CMD_TYPE GENMASK(31, 29) +#define XE_INSTR_MI REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x0) + +/* + * Most (but not all) instructions have a "length" field in the instruction + * header. The value expected is the total number of dwords for the + * instruction, minus two. + * + * Some instructions have length fields longer or shorter than 8 bits, but + * those are rare. This definition can be used for the common case where + * the length field is from 7:0. + */ +#define XE_INSTR_LEN_MASK GENMASK(7, 0) +#define XE_INSTR_NUM_DW(x) REG_FIELD_PREP(XE_INSTR_LEN_MASK, (x) - 2) + +#endif diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h new file mode 100644 index 000000000000..753ebf1efa78 --- /dev/null +++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_MI_COMMANDS_H_ +#define _XE_MI_COMMANDS_H_ + +#include "instructions/xe_instr_defs.h" + +/* + * MI (Memory Interface) commands are supported by all GT engines. They + * provide general memory operations and command streamer control. MI commands + * have a command type of 0x0 (MI_COMMAND) in bits 31:29 of the instruction + * header dword and a specific MI opcode in bits 28:23. + */ + +#define MI_OPCODE REG_GENMASK(28, 23) +#define MI_SUBOPCODE REG_GENMASK(22, 17) /* used with MI_EXPANSION */ + +#define __MI_INSTR(opcode) \ + (XE_INSTR_MI | REG_FIELD_PREP(MI_OPCODE, opcode)) + +#define MI_NOOP __MI_INSTR(0x0) +#define MI_USER_INTERRUPT __MI_INSTR(0x2) +#define MI_ARB_CHECK __MI_INSTR(0x5) + +#define MI_ARB_ON_OFF __MI_INSTR(0x8) +#define MI_ARB_ENABLE REG_BIT(0) +#define MI_ARB_DISABLE 0x0 + +#define MI_BATCH_BUFFER_END __MI_INSTR(0xA) +#define MI_STORE_DATA_IMM __MI_INSTR(0x20) +#define MI_SDI_GGTT REG_BIT(22) +#define MI_SDI_LEN_DW GENMASK(9, 0) +#define MI_SDI_NUM_DW(x) REG_FIELD_PREP(MI_SDI_LEN_DW, (x) + 3 - 2) +#define MI_SDI_NUM_QW(x) (REG_FIELD_PREP(MI_SDI_LEN_DW, 2 * (x) + 3 - 2) | \ + REG_BIT(21)) + +#define MI_LOAD_REGISTER_IMM __MI_INSTR(0x22) +#define MI_LRI_LRM_CS_MMIO REG_BIT(19) +#define MI_LRI_MMIO_REMAP_EN REG_BIT(17) +#define MI_LRI_NUM_REGS(x) XE_INSTR_NUM_DW(2 * (x) + 1) +#define MI_LRI_FORCE_POSTED REG_BIT(12) + +#define MI_FLUSH_DW __MI_INSTR(0x26) +#define MI_FLUSH_DW_STORE_INDEX REG_BIT(21) +#define MI_INVALIDATE_TLB REG_BIT(18) +#define MI_FLUSH_DW_CCS REG_BIT(16) +#define MI_FLUSH_DW_OP_STOREDW REG_BIT(14) +#define MI_FLUSH_DW_LEN_DW REG_GENMASK(5, 0) +#define MI_FLUSH_IMM_DW REG_FIELD_PREP(MI_FLUSH_DW_LEN_DW, 4 - 2) +#define MI_FLUSH_IMM_QW REG_FIELD_PREP(MI_FLUSH_DW_LEN_DW, 5 - 2) +#define MI_FLUSH_DW_USE_GTT REG_BIT(2) + +#define MI_BATCH_BUFFER_START __MI_INSTR(0x31) + +#endif diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h index 8c2e0da694d8..4402f72481dc 100644 --- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h +++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h @@ -8,45 +8,6 @@ #include "regs/xe_reg_defs.h" -#define INSTR_CLIENT_SHIFT 29 -#define INSTR_MI_CLIENT 0x0 -#define __INSTR(client) ((client) << INSTR_CLIENT_SHIFT) - -#define MI_INSTR(opcode, flags) \ - (__INSTR(INSTR_MI_CLIENT) | (opcode) << 23 | (flags)) - -#define MI_NOOP MI_INSTR(0, 0) -#define MI_USER_INTERRUPT MI_INSTR(0x02, 0) - -#define MI_ARB_ON_OFF MI_INSTR(0x08, 0) -#define MI_ARB_ENABLE (1<<0) -#define MI_ARB_DISABLE (0<<0) - -#define MI_BATCH_BUFFER_END MI_INSTR(0x0a, 0) -#define MI_STORE_DATA_IMM MI_INSTR(0x20, 0) -#define MI_SDI_GGTT REG_BIT(22) -#define MI_SDI_NUM_DW(x) ((x) + 1) -#define MI_SDI_NUM_QW(x) (REG_BIT(21) | (2 * (x) + 1)) - -#define MI_LOAD_REGISTER_IMM MI_INSTR(0x22, 0) -#define MI_LRI_LRM_CS_MMIO REG_BIT(19) -#define MI_LRI_MMIO_REMAP_EN REG_BIT(17) -#define MI_LRI_LENGTH GENMASK(5, 0) -#define MI_LRI_NUM_REGS(x) REG_FIELD_PREP(MI_LRI_LENGTH, 2 * (x) - 1) -#define MI_LRI_FORCE_POSTED (1<<12) - -#define MI_FLUSH_DW MI_INSTR(0x26, 0) -#define MI_FLUSH_DW_STORE_INDEX (1<<21) -#define MI_INVALIDATE_TLB (1<<18) -#define MI_FLUSH_DW_CCS (1<<16) -#define MI_FLUSH_DW_OP_STOREDW (1<<14) -#define MI_FLUSH_DW_USE_GTT (1<<2) -#define MI_FLUSH_LENGTH GENMASK(5, 0) -#define MI_FLUSH_IMM_DW REG_FIELD_PREP(MI_FLUSH_LENGTH, 2) -#define MI_FLUSH_IMM_QW REG_FIELD_PREP(MI_FLUSH_LENGTH, 3) - -#define MI_BATCH_BUFFER_START MI_INSTR(0x31, 1) - #define XY_CTRL_SURF_COPY_BLT ((2 << 29) | (0x48 << 22) | 3) #define SRC_ACCESS_TYPE_SHIFT 21 #define DST_ACCESS_TYPE_SHIFT 20 @@ -106,6 +67,4 @@ #define PIPE_CONTROL_STALL_AT_SCOREBOARD (1<<1) #define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1<<0) -#define MI_ARB_CHECK MI_INSTR(0x05, 0) - #endif diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c index f871ba82bc9b..7c124475c428 100644 --- a/drivers/gpu/drm/xe/xe_bb.c +++ b/drivers/gpu/drm/xe/xe_bb.c @@ -5,6 +5,7 @@ #include "xe_bb.h" +#include "instructions/xe_mi_commands.h" #include "regs/xe_gpu_commands.h" #include "xe_device.h" #include "xe_exec_queue_types.h" diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index 22dfe91b2b83..1541fb64949c 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -7,6 +7,7 @@ #include +#include "instructions/xe_mi_commands.h" #include "regs/xe_engine_regs.h" #include "regs/xe_gpu_commands.h" #include "regs/xe_gt_regs.h" diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index a42ee3b9b8c7..74e1f47bd401 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -10,6 +10,7 @@ #include #include +#include "instructions/xe_mi_commands.h" #include "regs/xe_gt_regs.h" #include "xe_assert.h" #include "xe_bb.h" diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 81463bd5e490..a04867131839 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -5,6 +5,7 @@ #include "xe_lrc.h" +#include "instructions/xe_mi_commands.h" #include "regs/xe_engine_regs.h" #include "regs/xe_gpu_commands.h" #include "regs/xe_gt_regs.h" diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index b81ef1bdecc6..731beb622fe8 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -13,6 +13,7 @@ #include #include "generated/xe_wa_oob.h" +#include "instructions/xe_mi_commands.h" #include "regs/xe_gpu_commands.h" #include "tests/xe_test.h" #include "xe_assert.h" diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index da13cc7ba6af..58676f4b989f 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -6,6 +6,7 @@ #include "xe_ring_ops.h" #include "generated/xe_wa_oob.h" +#include "instructions/xe_mi_commands.h" #include "regs/xe_gpu_commands.h" #include "regs/xe_gt_regs.h" #include "regs/xe_lrc_layout.h" @@ -91,7 +92,7 @@ static int emit_flush_imm_ggtt(u32 addr, u32 value, bool invalidate_tlb, static int emit_bb_start(u64 batch_addr, u32 ppgtt_flag, u32 *dw, int i) { - dw[i++] = MI_BATCH_BUFFER_START | ppgtt_flag; + dw[i++] = MI_BATCH_BUFFER_START | ppgtt_flag | XE_INSTR_NUM_DW(3); dw[i++] = lower_32_bits(batch_addr); dw[i++] = upper_32_bits(batch_addr); -- cgit v1.2.3 From 0f60547f7d2c3db16b151540e6697c7d90a9f93b Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 16 Oct 2023 09:34:55 -0700 Subject: drm/xe/debugfs: Add dump of default LRCs' MI instructions For non-RCS engines, nearly all of the LRC state is composed of MI instructions (specifically MI_LOAD_REGISTER_IMM). Providing a dump interface allows us to verify that the context image layout matches what's documented in the bspec, and also allows us to check whether LRC workarounds are being properly captured by the default state we record at startup. For now, the non-MI instructions found in the RCS and CCS engines will dump as "unknown;" parsing of those will be added in a follow-up patch. v2: - Add raw instruction header as well as decoded meaning. (Lucas) - Check that num_dw isn't greater than remaining_dw for instructions that have a "# dwords" field. (Lucas) - Clarify comment about skipping over ppHWSP. (Lucas) Bspec: 64993 Cc: Lucas De Marchi Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20231016163449.1300701-13-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/instructions/xe_instr_defs.h | 1 + drivers/gpu/drm/xe/instructions/xe_mi_commands.h | 3 + drivers/gpu/drm/xe/xe_gt_debugfs.c | 46 +++++++++++ drivers/gpu/drm/xe/xe_lrc.c | 97 ++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_lrc.h | 5 ++ 5 files changed, 152 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/instructions/xe_instr_defs.h b/drivers/gpu/drm/xe/instructions/xe_instr_defs.h index a7ec46395786..e403b4fcc20a 100644 --- a/drivers/gpu/drm/xe/instructions/xe_instr_defs.h +++ b/drivers/gpu/drm/xe/instructions/xe_instr_defs.h @@ -15,6 +15,7 @@ */ #define XE_INSTR_CMD_TYPE GENMASK(31, 29) #define XE_INSTR_MI REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x0) +#define XE_INSTR_GFXPIPE REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x3) /* * Most (but not all) instructions have a "length" field in the instruction diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h index 753ebf1efa78..1cfa96167fde 100644 --- a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h +++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h @@ -30,6 +30,9 @@ #define MI_ARB_DISABLE 0x0 #define MI_BATCH_BUFFER_END __MI_INSTR(0xA) +#define MI_TOPOLOGY_FILTER __MI_INSTR(0xD) +#define MI_FORCE_WAKEUP __MI_INSTR(0x1D) + #define MI_STORE_DATA_IMM __MI_INSTR(0x20) #define MI_SDI_GGTT REG_BIT(22) #define MI_SDI_LEN_DW GENMASK(9, 0) diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index cd6d28c7b923..c4b67cf09f8f 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -15,6 +15,7 @@ #include "xe_gt_mcr.h" #include "xe_gt_topology.h" #include "xe_hw_engine.h" +#include "xe_lrc.h" #include "xe_macros.h" #include "xe_pat.h" #include "xe_reg_sr.h" @@ -149,6 +150,46 @@ static int pat(struct seq_file *m, void *data) return 0; } +static int rcs_default_lrc(struct seq_file *m, void *data) +{ + struct drm_printer p = drm_seq_file_printer(m); + + xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_RENDER); + return 0; +} + +static int ccs_default_lrc(struct seq_file *m, void *data) +{ + struct drm_printer p = drm_seq_file_printer(m); + + xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_COMPUTE); + return 0; +} + +static int bcs_default_lrc(struct seq_file *m, void *data) +{ + struct drm_printer p = drm_seq_file_printer(m); + + xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_COPY); + return 0; +} + +static int vcs_default_lrc(struct seq_file *m, void *data) +{ + struct drm_printer p = drm_seq_file_printer(m); + + xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_VIDEO_DECODE); + return 0; +} + +static int vecs_default_lrc(struct seq_file *m, void *data) +{ + struct drm_printer p = drm_seq_file_printer(m); + + xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_VIDEO_ENHANCE); + return 0; +} + static const struct drm_info_list debugfs_list[] = { {"hw_engines", hw_engines, 0}, {"force_reset", force_reset, 0}, @@ -159,6 +200,11 @@ static const struct drm_info_list debugfs_list[] = { {"register-save-restore", register_save_restore, 0}, {"workarounds", workarounds, 0}, {"pat", pat, 0}, + {"default_lrc_rcs", rcs_default_lrc}, + {"default_lrc_ccs", ccs_default_lrc}, + {"default_lrc_bcs", bcs_default_lrc}, + {"default_lrc_vcs", vcs_default_lrc}, + {"default_lrc_vecs", vecs_default_lrc}, }; void xe_gt_debugfs_register(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index a04867131839..38e98c54464b 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -16,6 +16,7 @@ #include "xe_drm_client.h" #include "xe_exec_queue_types.h" #include "xe_gt.h" +#include "xe_gt_printk.h" #include "xe_hw_fence.h" #include "xe_map.h" #include "xe_vm.h" @@ -903,3 +904,99 @@ struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc) { return __xe_lrc_parallel_map(lrc); } + +static int instr_dw(u32 cmd_header) +{ + /* Most instructions have the # of dwords (minus 2) in 7:0 */ + return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2; +} + +static int dump_mi_command(struct drm_printer *p, + struct xe_gt *gt, + u32 *dw, + int remaining_dw) +{ + u32 inst_header = *dw; + u32 numdw = instr_dw(inst_header); + u32 opcode = REG_FIELD_GET(MI_OPCODE, inst_header); + int num_noop; + + /* First check for commands that don't have/use a '# DW' field */ + switch (inst_header & MI_OPCODE) { + case MI_NOOP: + num_noop = 1; + while (num_noop < remaining_dw && + (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP) + num_noop++; + drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop); + return num_noop; + + case MI_TOPOLOGY_FILTER: + drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header); + return 1; + + case MI_BATCH_BUFFER_END: + drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header); + /* Return 'remaining_dw' to consume the rest of the LRC */ + return remaining_dw; + } + + /* + * Any remaining commands include a # of dwords. We should make sure + * it doesn't exceed the remaining size of the LRC. + */ + if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) + numdw = remaining_dw; + + switch (inst_header & MI_OPCODE) { + case MI_LOAD_REGISTER_IMM: + drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n", + inst_header, (numdw - 1) / 2); + for (int i = 1; i < numdw; i += 2) + drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]); + return numdw; + + case MI_FORCE_WAKEUP: + drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header); + return numdw; + + default: + drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n", + inst_header, opcode, numdw); + return numdw; + } +} + +void xe_lrc_dump_default(struct drm_printer *p, + struct xe_gt *gt, + enum xe_engine_class hwe_class) +{ + u32 *dw; + int remaining_dw, num_dw; + + if (!gt->default_lrc[hwe_class]) { + drm_printf(p, "No default LRC for class %d\n", hwe_class); + return; + } + + /* + * Skip the beginning of the LRC since it contains the per-process + * hardware status page. + */ + dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE; + remaining_dw = (xe_lrc_size(gt_to_xe(gt), hwe_class) - LRC_PPHWSP_SIZE) / 4; + + while (remaining_dw > 0) { + if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) { + num_dw = dump_mi_command(p, gt, dw, remaining_dw); + } else { + num_dw = min(instr_dw(*dw), remaining_dw); + drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n", + *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw), + num_dw); + } + + dw += num_dw; + remaining_dw -= num_dw; + } +} diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index 3a6e8fc5a837..a7056eda5e0c 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -7,6 +7,7 @@ #include "xe_lrc_types.h" +struct drm_printer; struct xe_device; struct xe_exec_queue; enum xe_engine_class; @@ -47,4 +48,8 @@ struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc); size_t xe_lrc_skip_size(struct xe_device *xe); +void xe_lrc_dump_default(struct drm_printer *p, + struct xe_gt *gt, + enum xe_engine_class); + #endif -- cgit v1.2.3 From 6de492ae5f5ee6edccf1e1fae472bc5f95cec8e6 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 16 Oct 2023 09:34:56 -0700 Subject: drm/xe/debugfs: Include GFXPIPE commands in LRC dump RCS and CCS engines include several non-register gfxpipe commands in their LRC images. Include these in the dump output so that we can see exactly what's inside the context snapshot. v2: - Include raw instruction header in output - Add 3DSTATE_AMFS_TEXTURE_POINTERS and 3DSTATE_MONOFILTER_SIZE. The first was supposed to be removed in Xe_HPG, and the second by gen12, but both still show up in the RCS LRC. v3: - Sanity check that we don't have numdw > remaining_dw. (Lucas) Cc: Lucas De Marchi Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20231016163449.1300701-14-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- .../gpu/drm/xe/instructions/xe_gfxpipe_commands.h | 108 +++++++++++++++++++++ drivers/gpu/drm/xe/xe_lrc.c | 108 +++++++++++++++++++++ 2 files changed, 216 insertions(+) create mode 100644 drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h b/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h new file mode 100644 index 000000000000..b5fbc761265c --- /dev/null +++ b/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h @@ -0,0 +1,108 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GFXPIPE_COMMANDS_H_ +#define _XE_GFXPIPE_COMMANDS_H_ + +#include "instructions/xe_instr_defs.h" + +#define GFXPIPE_PIPELINE REG_GENMASK(28, 27) +#define PIPELINE_COMMON REG_FIELD_PREP(GFXPIPE_PIPELINE, 0x0) +#define PIPELINE_SINGLE_DW REG_FIELD_PREP(GFXPIPE_PIPELINE, 0x1) +#define PIPELINE_COMPUTE REG_FIELD_PREP(GFXPIPE_PIPELINE, 0x2) +#define PIPELINE_3D REG_FIELD_PREP(GFXPIPE_PIPELINE, 0x3) + +#define GFXPIPE_OPCODE REG_GENMASK(26, 24) +#define GFXPIPE_SUBOPCODE REG_GENMASK(23, 16) + +#define GFXPIPE_MATCH_MASK (XE_INSTR_CMD_TYPE | \ + GFXPIPE_PIPELINE | \ + GFXPIPE_OPCODE | \ + GFXPIPE_SUBOPCODE) + +#define GFXPIPE_COMMON_CMD(opcode, subopcode) \ + (XE_INSTR_GFXPIPE | PIPELINE_COMMON | \ + REG_FIELD_PREP(GFXPIPE_OPCODE, opcode) | \ + REG_FIELD_PREP(GFXPIPE_SUBOPCODE, subopcode)) + +#define GFXPIPE_SINGLE_DW_CMD(opcode, subopcode) \ + (XE_INSTR_GFXPIPE | PIPELINE_SINGLE_DW | \ + REG_FIELD_PREP(GFXPIPE_OPCODE, opcode) | \ + REG_FIELD_PREP(GFXPIPE_SUBOPCODE, subopcode)) + +#define GFXPIPE_3D_CMD(opcode, subopcode) \ + (XE_INSTR_GFXPIPE | PIPELINE_3D | \ + REG_FIELD_PREP(GFXPIPE_OPCODE, opcode) | \ + REG_FIELD_PREP(GFXPIPE_SUBOPCODE, subopcode)) + +#define GFXPIPE_COMPUTE_CMD(opcode, subopcode) \ + (XE_INSTR_GFXPIPE | PIPELINE_COMPUTE | \ + REG_FIELD_PREP(GFXPIPE_OPCODE, opcode) | \ + REG_FIELD_PREP(GFXPIPE_SUBOPCODE, subopcode)) + +#define STATE_BASE_ADDRESS GFXPIPE_COMMON_CMD(0x1, 0x1) +#define STATE_SIP GFXPIPE_COMMON_CMD(0x1, 0x2) +#define GPGPU_CSR_BASE_ADDRESS GFXPIPE_COMMON_CMD(0x1, 0x4) +#define STATE_COMPUTE_MODE GFXPIPE_COMMON_CMD(0x1, 0x5) +#define CMD_3DSTATE_BTD GFXPIPE_COMMON_CMD(0x1, 0x6) + +#define CMD_3DSTATE_VF_STATISTICS GFXPIPE_SINGLE_DW_CMD(0x0, 0xB) + +#define PIPELINE_SELECT GFXPIPE_SINGLE_DW_CMD(0x1, 0x4) + +#define CMD_3DSTATE_CLEAR_PARAMS GFXPIPE_3D_CMD(0x0, 0x4) +#define CMD_3DSTATE_DEPTH_BUFFER GFXPIPE_3D_CMD(0x0, 0x5) +#define CMD_3DSTATE_STENCIL_BUFFER GFXPIPE_3D_CMD(0x0, 0x6) +#define CMD_3DSTATE_HIER_DEPTH_BUFFER GFXPIPE_3D_CMD(0x0, 0x7) +#define CMD_3DSTATE_VERTEX_BUFFERS GFXPIPE_3D_CMD(0x0, 0x8) +#define CMD_3DSTATE_INDEX_BUFFER GFXPIPE_3D_CMD(0x0, 0xA) +#define CMD_3DSTATE_VF GFXPIPE_3D_CMD(0x0, 0xC) +#define CMD_3DSTATE_CC_STATE_POINTERS GFXPIPE_3D_CMD(0x0, 0xE) +#define CMD_3DSTATE_WM GFXPIPE_3D_CMD(0x0, 0x14) +#define CMD_3DSTATE_SAMPLE_MASK GFXPIPE_3D_CMD(0x0, 0x18) +#define CMD_3DSTATE_SBE GFXPIPE_3D_CMD(0x0, 0x1F) +#define CMD_3DSTATE_PS GFXPIPE_3D_CMD(0x0, 0x20) +#define CMD_3DSTATE_CPS_POINTERS GFXPIPE_3D_CMD(0x0, 0x22) +#define CMD_3DSTATE_VIEWPORT_STATE_POINTERS_CC GFXPIPE_3D_CMD(0x0, 0x23) +#define CMD_3DSTATE_BLEND_STATE_POINTERS GFXPIPE_3D_CMD(0x0, 0x24) +#define CMD_3DSTATE_BINDING_TABLE_POINTERS_PS GFXPIPE_3D_CMD(0x0, 0x2A) +#define CMD_3DSTATE_SAMPLER_STATE_POINTERS_PS GFXPIPE_3D_CMD(0x0, 0x2F) +#define CMD_3DSTATE_VF_INSTANCING GFXPIPE_3D_CMD(0x0, 0x49) +#define CMD_3DSTATE_VF_TOPOLOGY GFXPIPE_3D_CMD(0x0, 0x4B) +#define CMD_3DSTATE_WM_CHROMAKEY GFXPIPE_3D_CMD(0x0, 0x4C) +#define CMD_3DSTATE_PS_BLEND GFXPIPE_3D_CMD(0x0, 0x4D) +#define CMD_3DSTATE_WM_DEPTH_STENCIL GFXPIPE_3D_CMD(0x0, 0x4E) +#define CMD_3DSTATE_PS_EXTRA GFXPIPE_3D_CMD(0x0, 0x4F) +#define CMD_3DSTATE_SBE_SWIZ GFXPIPE_3D_CMD(0x0, 0x51) +#define CMD_3DSTATE_VFG GFXPIPE_3D_CMD(0x0, 0x57) +#define CMD_3DSTATE_AMFS GFXPIPE_3D_CMD(0x0, 0x6F) +#define CMD_3DSTATE_DEPTH_BOUNDS GFXPIPE_3D_CMD(0x0, 0x71) +#define CMD_3DSTATE_AMFS_TEXTURE_POINTERS GFXPIPE_3D_CMD(0x0, 0x72) +#define CMD_3DSTATE_CONSTANT_TS_POINTER GFXPIPE_3D_CMD(0x0, 0x73) +#define CMD_3DSTATE_MESH_DISTRIB GFXPIPE_3D_CMD(0x0, 0x78) +#define CMD_3DSTATE_SBE_MESH GFXPIPE_3D_CMD(0x0, 0x82) +#define CMD_3DSTATE_CPSIZE_CONTROL_BUFFER GFXPIPE_3D_CMD(0x0, 0x83) + +#define CMD_3DSTATE_CHROMA_KEY GFXPIPE_3D_CMD(0x1, 0x4) +#define CMD_3DSTATE_POLY_STIPPLE_OFFSET GFXPIPE_3D_CMD(0x1, 0x6) +#define CMD_3DSTATE_POLY_STIPPLE_PATTERN GFXPIPE_3D_CMD(0x1, 0x7) +#define CMD_3DSTATE_LINE_STIPPLE GFXPIPE_3D_CMD(0x1, 0x8) +#define CMD_3DSTATE_AA_LINE_PARAMETERS GFXPIPE_3D_CMD(0x1, 0xA) +#define CMD_3DSTATE_MONOFILTER_SIZE GFXPIPE_3D_CMD(0x1, 0x11) +#define CMD_3DSTATE_PUSH_CONSTANT_ALLOC_VS GFXPIPE_3D_CMD(0x1, 0x12) +#define CMD_3DSTATE_PUSH_CONSTANT_ALLOC_HS GFXPIPE_3D_CMD(0x1, 0x13) +#define CMD_3DSTATE_PUSH_CONSTANT_ALLOC_DS GFXPIPE_3D_CMD(0x1, 0x14) +#define CMD_3DSTATE_PUSH_CONSTANT_ALLOC_GS GFXPIPE_3D_CMD(0x1, 0x15) +#define CMD_3DSTATE_PUSH_CONSTANT_ALLOC_PS GFXPIPE_3D_CMD(0x1, 0x16) +#define CMD_3DSTATE_SO_DECL_LIST GFXPIPE_3D_CMD(0x1, 0x17) +#define CMD_3DSTATE_SO_DECL_LIST_DW_LEN REG_GENMASK(8, 0) +#define CMD_3DSTATE_BINDING_TABLE_POOL_ALLOC GFXPIPE_3D_CMD(0x1, 0x19) +#define CMD_3DSTATE_SAMPLE_PATTERN GFXPIPE_3D_CMD(0x1, 0x1C) +#define CMD_3DSTATE_3D_MODE GFXPIPE_3D_CMD(0x1, 0x1E) +#define CMD_3DSTATE_SUBSLICE_HASH_TABLE GFXPIPE_3D_CMD(0x1, 0x1F) +#define CMD_3DSTATE_SLICE_TABLE_STATE_POINTERS GFXPIPE_3D_CMD(0x1, 0x20) +#define CMD_3DSTATE_PTBR_TILE_PASS_INFO GFXPIPE_3D_CMD(0x1, 0x22) + +#endif diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 38e98c54464b..332fc0602074 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -6,6 +6,7 @@ #include "xe_lrc.h" #include "instructions/xe_mi_commands.h" +#include "instructions/xe_gfxpipe_commands.h" #include "regs/xe_engine_regs.h" #include "regs/xe_gpu_commands.h" #include "regs/xe_gt_regs.h" @@ -907,6 +908,15 @@ struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc) static int instr_dw(u32 cmd_header) { + /* GFXPIPE "SINGLE_DW" opcodes are a single dword */ + if ((cmd_header & (XE_INSTR_CMD_TYPE | GFXPIPE_PIPELINE)) == + GFXPIPE_SINGLE_DW_CMD(0, 0)) + return 1; + + /* 3DSTATE_SO_DECL_LIST has a 9-bit dword length rather than 8 */ + if ((cmd_header & GFXPIPE_MATCH_MASK) == CMD_3DSTATE_SO_DECL_LIST) + return REG_FIELD_GET(CMD_3DSTATE_SO_DECL_LIST_DW_LEN, cmd_header) + 2; + /* Most instructions have the # of dwords (minus 2) in 7:0 */ return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2; } @@ -967,6 +977,102 @@ static int dump_mi_command(struct drm_printer *p, } } +static int dump_gfxpipe_command(struct drm_printer *p, + struct xe_gt *gt, + u32 *dw, + int remaining_dw) +{ + u32 numdw = instr_dw(*dw); + u32 pipeline = REG_FIELD_GET(GFXPIPE_PIPELINE, *dw); + u32 opcode = REG_FIELD_GET(GFXPIPE_OPCODE, *dw); + u32 subopcode = REG_FIELD_GET(GFXPIPE_SUBOPCODE, *dw); + + /* + * Make sure we haven't mis-parsed a number of dwords that exceeds the + * remaining size of the LRC. + */ + if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) + numdw = remaining_dw; + + switch (*dw & GFXPIPE_MATCH_MASK) { +#define MATCH(cmd) \ + case cmd: \ + drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \ + return numdw +#define MATCH3D(cmd) \ + case CMD_##cmd: \ + drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \ + return numdw + + MATCH(STATE_BASE_ADDRESS); + MATCH(STATE_SIP); + MATCH(GPGPU_CSR_BASE_ADDRESS); + MATCH(STATE_COMPUTE_MODE); + MATCH3D(3DSTATE_BTD); + + MATCH3D(3DSTATE_VF_STATISTICS); + + MATCH(PIPELINE_SELECT); + + MATCH3D(3DSTATE_CLEAR_PARAMS); + MATCH3D(3DSTATE_DEPTH_BUFFER); + MATCH3D(3DSTATE_STENCIL_BUFFER); + MATCH3D(3DSTATE_HIER_DEPTH_BUFFER); + MATCH3D(3DSTATE_VERTEX_BUFFERS); + MATCH3D(3DSTATE_INDEX_BUFFER); + MATCH3D(3DSTATE_VF); + MATCH3D(3DSTATE_CC_STATE_POINTERS); + MATCH3D(3DSTATE_WM); + MATCH3D(3DSTATE_SAMPLE_MASK); + MATCH3D(3DSTATE_SBE); + MATCH3D(3DSTATE_PS); + MATCH3D(3DSTATE_CPS_POINTERS); + MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC); + MATCH3D(3DSTATE_BLEND_STATE_POINTERS); + MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS); + MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS); + MATCH3D(3DSTATE_VF_INSTANCING); + MATCH3D(3DSTATE_VF_TOPOLOGY); + MATCH3D(3DSTATE_WM_CHROMAKEY); + MATCH3D(3DSTATE_PS_BLEND); + MATCH3D(3DSTATE_WM_DEPTH_STENCIL); + MATCH3D(3DSTATE_PS_EXTRA); + MATCH3D(3DSTATE_SBE_SWIZ); + MATCH3D(3DSTATE_VFG); + MATCH3D(3DSTATE_AMFS); + MATCH3D(3DSTATE_DEPTH_BOUNDS); + MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS); + MATCH3D(3DSTATE_CONSTANT_TS_POINTER); + MATCH3D(3DSTATE_MESH_DISTRIB); + MATCH3D(3DSTATE_SBE_MESH); + MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER); + + MATCH3D(3DSTATE_CHROMA_KEY); + MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET); + MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN); + MATCH3D(3DSTATE_LINE_STIPPLE); + MATCH3D(3DSTATE_AA_LINE_PARAMETERS); + MATCH3D(3DSTATE_MONOFILTER_SIZE); + MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_VS); + MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_HS); + MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_DS); + MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS); + MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS); + MATCH3D(3DSTATE_SO_DECL_LIST); + MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC); + MATCH3D(3DSTATE_SAMPLE_PATTERN); + MATCH3D(3DSTATE_3D_MODE); + MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE); + MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS); + MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO); + + default: + drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n", + *dw, pipeline, opcode, subopcode, numdw); + return numdw; + } +} + void xe_lrc_dump_default(struct drm_printer *p, struct xe_gt *gt, enum xe_engine_class hwe_class) @@ -989,6 +1095,8 @@ void xe_lrc_dump_default(struct drm_printer *p, while (remaining_dw > 0) { if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) { num_dw = dump_mi_command(p, gt, dw, remaining_dw); + } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) { + num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw); } else { num_dw = min(instr_dw(*dw), remaining_dw); drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n", -- cgit v1.2.3 From fd47ded2379265b58dd5ae699fa1f5a14e65fdfc Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Wed, 20 Sep 2023 15:29:21 -0400 Subject: drm/xe: Fix array bounds check for queries Queries are 0-indexed, so a query with value N is invalid if the ARRAY_SIZE is N. Modify the check to account for that. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Niranjana Vishwanathapura Signed-off-by: Francois Dugast Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_query.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index a951205100fe..e0c2203e428e 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -381,7 +381,7 @@ int xe_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file) XE_IOCTL_DBG(xe, query->reserved[0] || query->reserved[1])) return -EINVAL; - if (XE_IOCTL_DBG(xe, query->query > ARRAY_SIZE(xe_query_funcs))) + if (XE_IOCTL_DBG(xe, query->query >= ARRAY_SIZE(xe_query_funcs))) return -EINVAL; idx = array_index_nospec(query->query, ARRAY_SIZE(xe_query_funcs)); -- cgit v1.2.3 From 61d63a59f68c7ab558b020cc675b9f94ef403c5f Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Wed, 20 Sep 2023 15:29:22 -0400 Subject: drm/xe: Set the correct type for xe_to_user_engine_class User engine class is of type u16. Set the same type for the array used to map xe engines to user engines. Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Niranjana Vishwanathapura Signed-off-by: Francois Dugast Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_query.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index e0c2203e428e..cbccd5c3dbc8 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -19,7 +19,7 @@ #include "xe_macros.h" #include "xe_ttm_vram_mgr.h" -static const enum xe_engine_class xe_to_user_engine_class[] = { +static const u16 xe_to_user_engine_class[] = { [XE_ENGINE_CLASS_RENDER] = DRM_XE_ENGINE_CLASS_RENDER, [XE_ENGINE_CLASS_COPY] = DRM_XE_ENGINE_CLASS_COPY, [XE_ENGINE_CLASS_VIDEO_DECODE] = DRM_XE_ENGINE_CLASS_VIDEO_DECODE, -- cgit v1.2.3 From 7793d00d1bf5923e77bbe7ace8089bfdfa19dc38 Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Mon, 14 Aug 2023 15:37:34 -0700 Subject: drm/xe: Correlate engine and cpu timestamps with better accuracy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Perf measurements rely on CPU and engine timestamps to correlate events of interest across these time domains. Current mechanisms get these timestamps separately and the calculated delta between these timestamps lack enough accuracy. To improve the accuracy of these time measurements to within a few us, add a query that returns the engine and cpu timestamps captured as close to each other as possible. Mesa MR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24591 v2: - Fix kernel-doc warnings (CI) - Document input params and group them together (Jose) - s/cs/engine/ (Jose) - Remove padding in the query (Ashutosh) Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: José Roberto de Souza Signed-off-by: Francois Dugast Signed-off-by: Rodrigo Vivi [Rodrigo finished the s/cs/engine renaming] --- drivers/gpu/drm/xe/xe_query.c | 138 ++++++++++++++++++++++++++++++++++++++++++ include/uapi/drm/xe_drm.h | 104 +++++++++++++++++++++++-------- 2 files changed, 218 insertions(+), 24 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index cbccd5c3dbc8..cd3e0f3208a6 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -6,10 +6,12 @@ #include "xe_query.h" #include +#include #include #include +#include "regs/xe_engine_regs.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_exec_queue.h" @@ -17,6 +19,7 @@ #include "xe_gt.h" #include "xe_guc_hwconfig.h" #include "xe_macros.h" +#include "xe_mmio.h" #include "xe_ttm_vram_mgr.h" static const u16 xe_to_user_engine_class[] = { @@ -27,6 +30,14 @@ static const u16 xe_to_user_engine_class[] = { [XE_ENGINE_CLASS_COMPUTE] = DRM_XE_ENGINE_CLASS_COMPUTE, }; +static const enum xe_engine_class user_to_xe_engine_class[] = { + [DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER, + [DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY, + [DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE, + [DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE, + [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE, +}; + static size_t calc_hw_engine_info_size(struct xe_device *xe) { struct xe_hw_engine *hwe; @@ -45,6 +56,132 @@ static size_t calc_hw_engine_info_size(struct xe_device *xe) return i * sizeof(struct drm_xe_engine_class_instance); } +typedef u64 (*__ktime_func_t)(void); +static __ktime_func_t __clock_id_to_func(clockid_t clk_id) +{ + /* + * Use logic same as the perf subsystem to allow user to select the + * reference clock id to be used for timestamps. + */ + switch (clk_id) { + case CLOCK_MONOTONIC: + return &ktime_get_ns; + case CLOCK_MONOTONIC_RAW: + return &ktime_get_raw_ns; + case CLOCK_REALTIME: + return &ktime_get_real_ns; + case CLOCK_BOOTTIME: + return &ktime_get_boottime_ns; + case CLOCK_TAI: + return &ktime_get_clocktai_ns; + default: + return NULL; + } +} + +static void +__read_timestamps(struct xe_gt *gt, + struct xe_reg lower_reg, + struct xe_reg upper_reg, + u64 *engine_ts, + u64 *cpu_ts, + u64 *cpu_delta, + __ktime_func_t cpu_clock) +{ + u32 upper, lower, old_upper, loop = 0; + + upper = xe_mmio_read32(gt, upper_reg); + do { + *cpu_delta = local_clock(); + *cpu_ts = cpu_clock(); + lower = xe_mmio_read32(gt, lower_reg); + *cpu_delta = local_clock() - *cpu_delta; + old_upper = upper; + upper = xe_mmio_read32(gt, upper_reg); + } while (upper != old_upper && loop++ < 2); + + *engine_ts = (u64)upper << 32 | lower; +} + +static int +query_engine_cycles(struct xe_device *xe, + struct drm_xe_device_query *query) +{ + struct drm_xe_query_engine_cycles __user *query_ptr; + struct drm_xe_engine_class_instance *eci; + struct drm_xe_query_engine_cycles resp; + size_t size = sizeof(resp); + __ktime_func_t cpu_clock; + struct xe_hw_engine *hwe; + struct xe_gt *gt; + + if (query->size == 0) { + query->size = size; + return 0; + } else if (XE_IOCTL_DBG(xe, query->size != size)) { + return -EINVAL; + } + + query_ptr = u64_to_user_ptr(query->data); + if (copy_from_user(&resp, query_ptr, size)) + return -EFAULT; + + cpu_clock = __clock_id_to_func(resp.clockid); + if (!cpu_clock) + return -EINVAL; + + eci = &resp.eci; + if (eci->gt_id > XE_MAX_GT_PER_TILE) + return -EINVAL; + + gt = xe_device_get_gt(xe, eci->gt_id); + if (!gt) + return -EINVAL; + + if (eci->engine_class >= ARRAY_SIZE(user_to_xe_engine_class)) + return -EINVAL; + + hwe = xe_gt_hw_engine(gt, user_to_xe_engine_class[eci->engine_class], + eci->engine_instance, true); + if (!hwe) + return -EINVAL; + + resp.engine_frequency = gt->info.clock_freq; + + xe_device_mem_access_get(xe); + xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + + __read_timestamps(gt, + RING_TIMESTAMP(hwe->mmio_base), + RING_TIMESTAMP_UDW(hwe->mmio_base), + &resp.engine_cycles, + &resp.cpu_timestamp, + &resp.cpu_delta, + cpu_clock); + + xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); + xe_device_mem_access_put(xe); + resp.width = 36; + + /* Only write to the output fields of user query */ + if (put_user(resp.engine_frequency, &query_ptr->engine_frequency)) + return -EFAULT; + + if (put_user(resp.cpu_timestamp, &query_ptr->cpu_timestamp)) + return -EFAULT; + + if (put_user(resp.cpu_delta, &query_ptr->cpu_delta)) + return -EFAULT; + + if (put_user(resp.engine_cycles, &query_ptr->engine_cycles)) + return -EFAULT; + + if (put_user(resp.width, &query_ptr->width)) + return -EFAULT; + + return 0; +} + static int query_engines(struct xe_device *xe, struct drm_xe_device_query *query) { @@ -369,6 +506,7 @@ static int (* const xe_query_funcs[])(struct xe_device *xe, query_gts, query_hwconfig, query_gt_topology, + query_engine_cycles, }; int xe_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file) diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index d48d8e3c898c..079213a3df55 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -128,6 +128,25 @@ struct xe_user_extension { #define DRM_IOCTL_XE_WAIT_USER_FENCE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence) #define DRM_IOCTL_XE_VM_MADVISE DRM_IOW(DRM_COMMAND_BASE + DRM_XE_VM_MADVISE, struct drm_xe_vm_madvise) +/** struct drm_xe_engine_class_instance - instance of an engine class */ +struct drm_xe_engine_class_instance { +#define DRM_XE_ENGINE_CLASS_RENDER 0 +#define DRM_XE_ENGINE_CLASS_COPY 1 +#define DRM_XE_ENGINE_CLASS_VIDEO_DECODE 2 +#define DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE 3 +#define DRM_XE_ENGINE_CLASS_COMPUTE 4 + /* + * Kernel only class (not actual hardware engine class). Used for + * creating ordered queues of VM bind operations. + */ +#define DRM_XE_ENGINE_CLASS_VM_BIND 5 + __u16 engine_class; + + __u16 engine_instance; + __u16 gt_id; + __u16 rsvd; +}; + /** * enum drm_xe_memory_class - Supported memory classes. */ @@ -219,6 +238,60 @@ struct drm_xe_query_mem_region { __u64 reserved[6]; }; +/** + * struct drm_xe_query_engine_cycles - correlate CPU and GPU timestamps + * + * If a query is made with a struct drm_xe_device_query where .query is equal to + * DRM_XE_DEVICE_QUERY_ENGINE_CYCLES, then the reply uses struct drm_xe_query_engine_cycles + * in .data. struct drm_xe_query_engine_cycles is allocated by the user and + * .data points to this allocated structure. + * + * The query returns the engine cycles and the frequency that can + * be used to calculate the engine timestamp. In addition the + * query returns a set of cpu timestamps that indicate when the command + * streamer cycle count was captured. + */ +struct drm_xe_query_engine_cycles { + /** + * @eci: This is input by the user and is the engine for which command + * streamer cycles is queried. + */ + struct drm_xe_engine_class_instance eci; + + /** + * @clockid: This is input by the user and is the reference clock id for + * CPU timestamp. For definition, see clock_gettime(2) and + * perf_event_open(2). Supported clock ids are CLOCK_MONOTONIC, + * CLOCK_MONOTONIC_RAW, CLOCK_REALTIME, CLOCK_BOOTTIME, CLOCK_TAI. + */ + __s32 clockid; + + /** @width: Width of the engine cycle counter in bits. */ + __u32 width; + + /** + * @engine_cycles: Engine cycles as read from its register + * at 0x358 offset. + */ + __u64 engine_cycles; + + /** @engine_frequency: Frequency of the engine cycles in Hz. */ + __u64 engine_frequency; + + /** + * @cpu_timestamp: CPU timestamp in ns. The timestamp is captured before + * reading the engine_cycles register using the reference clockid set by the + * user. + */ + __u64 cpu_timestamp; + + /** + * @cpu_delta: Time delta in ns captured around reading the lower dword + * of the engine_cycles register. + */ + __u64 cpu_delta; +}; + /** * struct drm_xe_query_mem_usage - describe memory regions and usage * @@ -385,12 +458,13 @@ struct drm_xe_device_query { /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; -#define DRM_XE_DEVICE_QUERY_ENGINES 0 -#define DRM_XE_DEVICE_QUERY_MEM_USAGE 1 -#define DRM_XE_DEVICE_QUERY_CONFIG 2 -#define DRM_XE_DEVICE_QUERY_GTS 3 -#define DRM_XE_DEVICE_QUERY_HWCONFIG 4 -#define DRM_XE_DEVICE_QUERY_GT_TOPOLOGY 5 +#define DRM_XE_DEVICE_QUERY_ENGINES 0 +#define DRM_XE_DEVICE_QUERY_MEM_USAGE 1 +#define DRM_XE_DEVICE_QUERY_CONFIG 2 +#define DRM_XE_DEVICE_QUERY_GTS 3 +#define DRM_XE_DEVICE_QUERY_HWCONFIG 4 +#define DRM_XE_DEVICE_QUERY_GT_TOPOLOGY 5 +#define DRM_XE_DEVICE_QUERY_ENGINE_CYCLES 6 /** @query: The type of data to query */ __u32 query; @@ -732,24 +806,6 @@ struct drm_xe_exec_queue_set_property { __u64 reserved[2]; }; -/** struct drm_xe_engine_class_instance - instance of an engine class */ -struct drm_xe_engine_class_instance { -#define DRM_XE_ENGINE_CLASS_RENDER 0 -#define DRM_XE_ENGINE_CLASS_COPY 1 -#define DRM_XE_ENGINE_CLASS_VIDEO_DECODE 2 -#define DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE 3 -#define DRM_XE_ENGINE_CLASS_COMPUTE 4 - /* - * Kernel only class (not actual hardware engine class). Used for - * creating ordered queues of VM bind operations. - */ -#define DRM_XE_ENGINE_CLASS_VM_BIND 5 - __u16 engine_class; - - __u16 engine_instance; - __u16 gt_id; -}; - struct drm_xe_exec_queue_create { #define XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY 0 /** @extensions: Pointer to the first extension struct, if any */ -- cgit v1.2.3 From ea0640fc6971f555c8f921e2060376d768685805 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Wed, 20 Sep 2023 15:29:24 -0400 Subject: drm/xe/uapi: Separate VM_BIND's operation and flag Use different members in the drm_xe_vm_bind_op for op and for flags as it is done in other structures. Type is left to u32 to leave enough room for future operations and flags. v2: Remove the XE_VM_BIND_* flags shift (Rodrigo Vivi) Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/303 Signed-off-by: Francois Dugast Signed-off-by: Rodrigo Vivi Reviewed-by: Matthew Brost --- drivers/gpu/drm/xe/xe_vm.c | 29 ++++++++++++++++------------- include/uapi/drm/xe_drm.h | 14 ++++++++------ 2 files changed, 24 insertions(+), 19 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 035f3232e3b9..3ae911ade7e4 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2282,11 +2282,11 @@ static void vm_set_async_error(struct xe_vm *vm, int err) } static int vm_bind_ioctl_lookup_vma(struct xe_vm *vm, struct xe_bo *bo, - u64 addr, u64 range, u32 op) + u64 addr, u64 range, u32 op, u32 flags) { struct xe_device *xe = vm->xe; struct xe_vma *vma; - bool async = !!(op & XE_VM_BIND_FLAG_ASYNC); + bool async = !!(flags & XE_VM_BIND_FLAG_ASYNC); lockdep_assert_held(&vm->lock); @@ -2387,7 +2387,7 @@ static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) static struct drm_gpuva_ops * vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, u64 bo_offset_or_userptr, u64 addr, u64 range, - u32 operation, u8 tile_mask, u32 region) + u32 operation, u32 flags, u8 tile_mask, u32 region) { struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL; struct drm_gpuva_ops *ops; @@ -2416,10 +2416,10 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, op->tile_mask = tile_mask; op->map.immediate = - operation & XE_VM_BIND_FLAG_IMMEDIATE; + flags & XE_VM_BIND_FLAG_IMMEDIATE; op->map.read_only = - operation & XE_VM_BIND_FLAG_READONLY; - op->map.is_null = operation & XE_VM_BIND_FLAG_NULL; + flags & XE_VM_BIND_FLAG_READONLY; + op->map.is_null = flags & XE_VM_BIND_FLAG_NULL; } break; case XE_VM_BIND_OP_UNMAP: @@ -3236,15 +3236,16 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, u64 range = (*bind_ops)[i].range; u64 addr = (*bind_ops)[i].addr; u32 op = (*bind_ops)[i].op; + u32 flags = (*bind_ops)[i].flags; u32 obj = (*bind_ops)[i].obj; u64 obj_offset = (*bind_ops)[i].obj_offset; u32 region = (*bind_ops)[i].region; - bool is_null = op & XE_VM_BIND_FLAG_NULL; + bool is_null = flags & XE_VM_BIND_FLAG_NULL; if (i == 0) { - *async = !!(op & XE_VM_BIND_FLAG_ASYNC); + *async = !!(flags & XE_VM_BIND_FLAG_ASYNC); } else if (XE_IOCTL_DBG(xe, !*async) || - XE_IOCTL_DBG(xe, !(op & XE_VM_BIND_FLAG_ASYNC)) || + XE_IOCTL_DBG(xe, !(flags & XE_VM_BIND_FLAG_ASYNC)) || XE_IOCTL_DBG(xe, VM_BIND_OP(op) == XE_VM_BIND_OP_RESTART)) { err = -EINVAL; @@ -3265,7 +3266,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, if (XE_IOCTL_DBG(xe, VM_BIND_OP(op) > XE_VM_BIND_OP_PREFETCH) || - XE_IOCTL_DBG(xe, op & ~SUPPORTED_FLAGS) || + XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) || XE_IOCTL_DBG(xe, obj && is_null) || XE_IOCTL_DBG(xe, obj_offset && is_null) || XE_IOCTL_DBG(xe, VM_BIND_OP(op) != XE_VM_BIND_OP_MAP && @@ -3480,8 +3481,9 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) u64 range = bind_ops[i].range; u64 addr = bind_ops[i].addr; u32 op = bind_ops[i].op; + u32 flags = bind_ops[i].flags; - err = vm_bind_ioctl_lookup_vma(vm, bos[i], addr, range, op); + err = vm_bind_ioctl_lookup_vma(vm, bos[i], addr, range, op, flags); if (err) goto free_syncs; } @@ -3490,13 +3492,14 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) u64 range = bind_ops[i].range; u64 addr = bind_ops[i].addr; u32 op = bind_ops[i].op; + u32 flags = bind_ops[i].flags; u64 obj_offset = bind_ops[i].obj_offset; u8 tile_mask = bind_ops[i].tile_mask; u32 region = bind_ops[i].region; ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset, - addr, range, op, tile_mask, - region); + addr, range, op, flags, + tile_mask, region); if (IS_ERR(ops[i])) { err = PTR_ERR(ops[i]); ops[i] = NULL; diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 079213a3df55..46db9334159b 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -660,8 +660,10 @@ struct drm_xe_vm_bind_op { #define XE_VM_BIND_OP_RESTART 0x3 #define XE_VM_BIND_OP_UNMAP_ALL 0x4 #define XE_VM_BIND_OP_PREFETCH 0x5 + /** @op: Bind operation to perform */ + __u32 op; -#define XE_VM_BIND_FLAG_READONLY (0x1 << 16) +#define XE_VM_BIND_FLAG_READONLY (0x1 << 0) /* * A bind ops completions are always async, hence the support for out * sync. This flag indicates the allocation of the memory for new page @@ -686,12 +688,12 @@ struct drm_xe_vm_bind_op { * configured in the VM and must be set if the VM is configured with * DRM_XE_VM_CREATE_ASYNC_BIND_OPS and not in an error state. */ -#define XE_VM_BIND_FLAG_ASYNC (0x1 << 17) +#define XE_VM_BIND_FLAG_ASYNC (0x1 << 1) /* * Valid on a faulting VM only, do the MAP operation immediately rather * than deferring the MAP to the page fault handler. */ -#define XE_VM_BIND_FLAG_IMMEDIATE (0x1 << 18) +#define XE_VM_BIND_FLAG_IMMEDIATE (0x1 << 2) /* * When the NULL flag is set, the page tables are setup with a special * bit which indicates writes are dropped and all reads return zero. In @@ -699,9 +701,9 @@ struct drm_xe_vm_bind_op { * operations, the BO handle MBZ, and the BO offset MBZ. This flag is * intended to implement VK sparse bindings. */ -#define XE_VM_BIND_FLAG_NULL (0x1 << 19) - /** @op: Operation to perform (lower 16 bits) and flags (upper 16 bits) */ - __u32 op; +#define XE_VM_BIND_FLAG_NULL (0x1 << 3) + /** @flags: Bind flags */ + __u32 flags; /** @mem_region: Memory region to prefetch VMA to, instance not a mask */ __u32 region; -- cgit v1.2.3 From 78ddc872c6a91d8973ca89209793323efaa86345 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Wed, 20 Sep 2023 15:29:25 -0400 Subject: drm/xe/vm: Remove VM_BIND_OP macro This macro was necessary when bind operations were shifted but this is no longer the case, so removing to simplify code. Signed-off-by: Francois Dugast Signed-off-by: Rodrigo Vivi Reviewed-by: Matthew Brost --- drivers/gpu/drm/xe/xe_vm.c | 42 +++++++++++++++++++----------------------- 1 file changed, 19 insertions(+), 23 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 3ae911ade7e4..1a0546beef87 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2273,8 +2273,6 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma, } } -#define VM_BIND_OP(op) (op & 0xffff) - static void vm_set_async_error(struct xe_vm *vm, int err) { lockdep_assert_held(&vm->lock); @@ -2290,7 +2288,7 @@ static int vm_bind_ioctl_lookup_vma(struct xe_vm *vm, struct xe_bo *bo, lockdep_assert_held(&vm->lock); - switch (VM_BIND_OP(op)) { + switch (op) { case XE_VM_BIND_OP_MAP: case XE_VM_BIND_OP_MAP_USERPTR: vma = xe_vm_find_overlapping_vma(vm, addr, range); @@ -2400,10 +2398,10 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, vm_dbg(&vm->xe->drm, "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx", - VM_BIND_OP(operation), (ULL)addr, (ULL)range, + operation, (ULL)addr, (ULL)range, (ULL)bo_offset_or_userptr); - switch (VM_BIND_OP(operation)) { + switch (operation) { case XE_VM_BIND_OP_MAP: case XE_VM_BIND_OP_MAP_USERPTR: ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, addr, range, @@ -3246,50 +3244,48 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, *async = !!(flags & XE_VM_BIND_FLAG_ASYNC); } else if (XE_IOCTL_DBG(xe, !*async) || XE_IOCTL_DBG(xe, !(flags & XE_VM_BIND_FLAG_ASYNC)) || - XE_IOCTL_DBG(xe, VM_BIND_OP(op) == - XE_VM_BIND_OP_RESTART)) { + XE_IOCTL_DBG(xe, op == XE_VM_BIND_OP_RESTART)) { err = -EINVAL; goto free_bind_ops; } if (XE_IOCTL_DBG(xe, !*async && - VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL)) { + op == XE_VM_BIND_OP_UNMAP_ALL)) { err = -EINVAL; goto free_bind_ops; } if (XE_IOCTL_DBG(xe, !*async && - VM_BIND_OP(op) == XE_VM_BIND_OP_PREFETCH)) { + op == XE_VM_BIND_OP_PREFETCH)) { err = -EINVAL; goto free_bind_ops; } - if (XE_IOCTL_DBG(xe, VM_BIND_OP(op) > - XE_VM_BIND_OP_PREFETCH) || + if (XE_IOCTL_DBG(xe, op > XE_VM_BIND_OP_PREFETCH) || XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) || XE_IOCTL_DBG(xe, obj && is_null) || XE_IOCTL_DBG(xe, obj_offset && is_null) || - XE_IOCTL_DBG(xe, VM_BIND_OP(op) != XE_VM_BIND_OP_MAP && + XE_IOCTL_DBG(xe, op != XE_VM_BIND_OP_MAP && is_null) || XE_IOCTL_DBG(xe, !obj && - VM_BIND_OP(op) == XE_VM_BIND_OP_MAP && + op == XE_VM_BIND_OP_MAP && !is_null) || XE_IOCTL_DBG(xe, !obj && - VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL) || + op == XE_VM_BIND_OP_UNMAP_ALL) || XE_IOCTL_DBG(xe, addr && - VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL) || + op == XE_VM_BIND_OP_UNMAP_ALL) || XE_IOCTL_DBG(xe, range && - VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP_ALL) || + op == XE_VM_BIND_OP_UNMAP_ALL) || XE_IOCTL_DBG(xe, obj && - VM_BIND_OP(op) == XE_VM_BIND_OP_MAP_USERPTR) || + op == XE_VM_BIND_OP_MAP_USERPTR) || XE_IOCTL_DBG(xe, obj && - VM_BIND_OP(op) == XE_VM_BIND_OP_PREFETCH) || + op == XE_VM_BIND_OP_PREFETCH) || XE_IOCTL_DBG(xe, region && - VM_BIND_OP(op) != XE_VM_BIND_OP_PREFETCH) || + op != XE_VM_BIND_OP_PREFETCH) || XE_IOCTL_DBG(xe, !(BIT(region) & xe->info.mem_region_mask)) || XE_IOCTL_DBG(xe, obj && - VM_BIND_OP(op) == XE_VM_BIND_OP_UNMAP)) { + op == XE_VM_BIND_OP_UNMAP)) { err = -EINVAL; goto free_bind_ops; } @@ -3297,9 +3293,9 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) || XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) || XE_IOCTL_DBG(xe, range & ~PAGE_MASK) || - XE_IOCTL_DBG(xe, !range && VM_BIND_OP(op) != + XE_IOCTL_DBG(xe, !range && op != XE_VM_BIND_OP_RESTART && - VM_BIND_OP(op) != XE_VM_BIND_OP_UNMAP_ALL)) { + op != XE_VM_BIND_OP_UNMAP_ALL)) { err = -EINVAL; goto free_bind_ops; } @@ -3363,7 +3359,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) goto release_vm_lock; } - if (VM_BIND_OP(bind_ops[0].op) == XE_VM_BIND_OP_RESTART) { + if (bind_ops[0].op == XE_VM_BIND_OP_RESTART) { if (XE_IOCTL_DBG(xe, !(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS))) err = -EOPNOTSUPP; if (XE_IOCTL_DBG(xe, !err && args->num_syncs)) -- cgit v1.2.3 From 924e6a9789a05ef01ffdf849aa3a3c75f5a29a8b Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Wed, 20 Sep 2023 15:29:26 -0400 Subject: drm/xe/uapi: Remove MMIO ioctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This was previously used in UMD for timestamp correlation, which can now be done with DRM_XE_QUERY_CS_CYCLES. Link: https://lore.kernel.org/all/20230706042044.GR6953@mdroper-desk1.amr.corp.intel.com/ Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/636 Signed-off-by: Francois Dugast Reviewed-by: José Roberto de Souza Reviewed-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 1 - drivers/gpu/drm/xe/xe_mmio.c | 102 ----------------------------------------- drivers/gpu/drm/xe/xe_mmio.h | 3 -- include/uapi/drm/xe_drm.h | 31 ++----------- 4 files changed, 4 insertions(+), 133 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 2bbd3aa2809b..ae0b7349c3e3 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -121,7 +121,6 @@ static const struct drm_ioctl_desc xe_ioctls[] = { DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_DESTROY, xe_exec_queue_destroy_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(XE_EXEC, xe_exec_ioctl, DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(XE_MMIO, xe_mmio_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_SET_PROPERTY, xe_exec_queue_set_property_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl, diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index e4cf9bfec422..0da4f75c07bf 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -429,108 +429,6 @@ int xe_mmio_init(struct xe_device *xe) return 0; } -#define VALID_MMIO_FLAGS (\ - DRM_XE_MMIO_BITS_MASK |\ - DRM_XE_MMIO_READ |\ - DRM_XE_MMIO_WRITE) - -static const struct xe_reg mmio_read_whitelist[] = { - RING_TIMESTAMP(RENDER_RING_BASE), -}; - -int xe_mmio_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct xe_device *xe = to_xe_device(dev); - struct xe_gt *gt = xe_root_mmio_gt(xe); - struct drm_xe_mmio *args = data; - unsigned int bits_flag, bytes; - struct xe_reg reg; - bool allowed; - int ret = 0; - - if (XE_IOCTL_DBG(xe, args->extensions) || - XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, args->flags & ~VALID_MMIO_FLAGS)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_MMIO_WRITE) && args->value)) - return -EINVAL; - - allowed = capable(CAP_SYS_ADMIN); - if (!allowed && ((args->flags & ~DRM_XE_MMIO_BITS_MASK) == DRM_XE_MMIO_READ)) { - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(mmio_read_whitelist); i++) { - if (mmio_read_whitelist[i].addr == args->addr) { - allowed = true; - break; - } - } - } - - if (XE_IOCTL_DBG(xe, !allowed)) - return -EPERM; - - bits_flag = args->flags & DRM_XE_MMIO_BITS_MASK; - bytes = 1 << bits_flag; - if (XE_IOCTL_DBG(xe, args->addr + bytes > xe->mmio.size)) - return -EINVAL; - - /* - * TODO: migrate to xe_gt_mcr to lookup the mmio range and handle - * multicast registers. Steering would need uapi extension. - */ - reg = XE_REG(args->addr); - - xe_device_mem_access_get(xe); - xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - - if (args->flags & DRM_XE_MMIO_WRITE) { - switch (bits_flag) { - case DRM_XE_MMIO_32BIT: - if (XE_IOCTL_DBG(xe, args->value > U32_MAX)) { - ret = -EINVAL; - goto exit; - } - xe_mmio_write32(gt, reg, args->value); - break; - default: - drm_dbg(&xe->drm, "Invalid MMIO bit size"); - fallthrough; - case DRM_XE_MMIO_8BIT: /* TODO */ - case DRM_XE_MMIO_16BIT: /* TODO */ - ret = -EOPNOTSUPP; - goto exit; - } - } - - if (args->flags & DRM_XE_MMIO_READ) { - switch (bits_flag) { - case DRM_XE_MMIO_32BIT: - args->value = xe_mmio_read32(gt, reg); - break; - case DRM_XE_MMIO_64BIT: - args->value = xe_mmio_read64_2x32(gt, reg); - break; - default: - drm_dbg(&xe->drm, "Invalid MMIO bit size"); - fallthrough; - case DRM_XE_MMIO_8BIT: /* TODO */ - case DRM_XE_MMIO_16BIT: /* TODO */ - ret = -EOPNOTSUPP; - } - } - -exit: - xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); - xe_device_mem_access_put(xe); - - return ret; -} - /** * xe_mmio_read64_2x32() - Read a 64-bit register as two 32-bit reads * @gt: MMIO target GT diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h index ae09f777d711..24a23dad7dce 100644 --- a/drivers/gpu/drm/xe/xe_mmio.h +++ b/drivers/gpu/drm/xe/xe_mmio.h @@ -124,9 +124,6 @@ static inline int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, return ret; } -int xe_mmio_ioctl(struct drm_device *dev, void *data, - struct drm_file *file); - static inline bool xe_mmio_in_range(const struct xe_gt *gt, const struct xe_mmio_range *range, struct xe_reg reg) diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 46db9334159b..ad21ba1d6e0b 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -106,11 +106,10 @@ struct xe_user_extension { #define DRM_XE_EXEC_QUEUE_CREATE 0x06 #define DRM_XE_EXEC_QUEUE_DESTROY 0x07 #define DRM_XE_EXEC 0x08 -#define DRM_XE_MMIO 0x09 -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY 0x0a -#define DRM_XE_WAIT_USER_FENCE 0x0b -#define DRM_XE_VM_MADVISE 0x0c -#define DRM_XE_EXEC_QUEUE_GET_PROPERTY 0x0d +#define DRM_XE_EXEC_QUEUE_SET_PROPERTY 0x09 +#define DRM_XE_WAIT_USER_FENCE 0x0a +#define DRM_XE_VM_MADVISE 0x0b +#define DRM_XE_EXEC_QUEUE_GET_PROPERTY 0x0c /* Must be kept compact -- no holes */ #define DRM_IOCTL_XE_DEVICE_QUERY DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_DEVICE_QUERY, struct drm_xe_device_query) @@ -123,7 +122,6 @@ struct xe_user_extension { #define DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_GET_PROPERTY, struct drm_xe_exec_queue_get_property) #define DRM_IOCTL_XE_EXEC_QUEUE_DESTROY DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_DESTROY, struct drm_xe_exec_queue_destroy) #define DRM_IOCTL_XE_EXEC DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC, struct drm_xe_exec) -#define DRM_IOCTL_XE_MMIO DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_MMIO, struct drm_xe_mmio) #define DRM_IOCTL_XE_EXEC_QUEUE_SET_PROPERTY DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_SET_PROPERTY, struct drm_xe_exec_queue_set_property) #define DRM_IOCTL_XE_WAIT_USER_FENCE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence) #define DRM_IOCTL_XE_VM_MADVISE DRM_IOW(DRM_COMMAND_BASE + DRM_XE_VM_MADVISE, struct drm_xe_vm_madvise) @@ -936,27 +934,6 @@ struct drm_xe_exec { __u64 reserved[2]; }; -struct drm_xe_mmio { - /** @extensions: Pointer to the first extension struct, if any */ - __u64 extensions; - - __u32 addr; - -#define DRM_XE_MMIO_8BIT 0x0 -#define DRM_XE_MMIO_16BIT 0x1 -#define DRM_XE_MMIO_32BIT 0x2 -#define DRM_XE_MMIO_64BIT 0x3 -#define DRM_XE_MMIO_BITS_MASK 0x3 -#define DRM_XE_MMIO_READ 0x4 -#define DRM_XE_MMIO_WRITE 0x8 - __u32 flags; - - __u64 value; - - /** @reserved: Reserved */ - __u64 reserved[2]; -}; - /** * struct drm_xe_wait_user_fence - wait user fence * -- cgit v1.2.3 From 5009d554e0d501741de1411db797a593a6fa94bb Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 20 Sep 2023 15:29:27 -0400 Subject: drm/xe: Fix xe_exec_queue_is_idle for parallel exec queues Last little piece to support parallel exec queue is compute mode. Signed-off-by: Matthew Brost Reviewed-by: Niranjana Vishwanathapura Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_exec_queue.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 23789122b5b1..c88acecc3a90 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -867,8 +867,17 @@ bool xe_exec_queue_ring_full(struct xe_exec_queue *q) */ bool xe_exec_queue_is_idle(struct xe_exec_queue *q) { - if (XE_WARN_ON(xe_exec_queue_is_parallel(q))) - return false; + if (xe_exec_queue_is_parallel(q)) { + int i; + + for (i = 0; i < q->width; ++i) { + if (xe_lrc_seqno(&q->lrc[i]) != + q->lrc[i].fence_ctx.next_seqno - 1) + return false; + } + + return true; + } return xe_lrc_seqno(&q->lrc[0]) == q->lrc[0].fence_ctx.next_seqno - 1; -- cgit v1.2.3 From e05c6c9774630702143bf4d35f2a753e61a57622 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 20 Sep 2023 15:29:28 -0400 Subject: drm/xe: Deprecate XE_EXEC_QUEUE_SET_PROPERTY_COMPUTE_MODE implementation We are going to remove XE_EXEC_QUEUE_SET_PROPERTY_COMPUTE_MODE from the uAPI, deprecate the implementation first by making XE_EXEC_QUEUE_SET_PROPERTY_COMPUTE_MODE a NOP. After removal of XE_EXEC_QUEUE_SET_PROPERTY_COMPUTE_MODE the proper is simply inherented from the VM. v2: - Update commit message with explaination of removal (Niranjana) Signed-off-by: Matthew Brost Reviewed-by: Niranjana Vishwanathapura Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_exec_queue.c | 57 ++++++++------------------------ drivers/gpu/drm/xe/xe_exec_queue_types.h | 6 ++-- 2 files changed, 16 insertions(+), 47 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index c88acecc3a90..b9e645e86b4f 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -323,39 +323,6 @@ static int exec_queue_set_preemption_timeout(struct xe_device *xe, static int exec_queue_set_compute_mode(struct xe_device *xe, struct xe_exec_queue *q, u64 value, bool create) { - if (XE_IOCTL_DBG(xe, !create)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_COMPUTE_MODE)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_VM)) - return -EINVAL; - - if (value) { - struct xe_vm *vm = q->vm; - int err; - - if (XE_IOCTL_DBG(xe, xe_vm_in_fault_mode(vm))) - return -EOPNOTSUPP; - - if (XE_IOCTL_DBG(xe, !xe_vm_in_compute_mode(vm))) - return -EOPNOTSUPP; - - if (XE_IOCTL_DBG(xe, q->width != 1)) - return -EINVAL; - - q->compute.context = dma_fence_context_alloc(1); - spin_lock_init(&q->compute.lock); - - err = xe_vm_add_compute_exec_queue(vm, q); - if (XE_IOCTL_DBG(xe, err)) - return err; - - q->flags |= EXEC_QUEUE_FLAG_COMPUTE_MODE; - q->flags &= ~EXEC_QUEUE_FLAG_PERSISTENT; - } - return 0; } @@ -365,7 +332,7 @@ static int exec_queue_set_persistence(struct xe_device *xe, struct xe_exec_queue if (XE_IOCTL_DBG(xe, !create)) return -EINVAL; - if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_COMPUTE_MODE)) + if (XE_IOCTL_DBG(xe, xe_vm_in_compute_mode(q->vm))) return -EINVAL; if (value) @@ -742,18 +709,21 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, xe_vm_put(vm); if (IS_ERR(q)) return PTR_ERR(q); + + if (xe_vm_in_compute_mode(vm)) { + q->compute.context = dma_fence_context_alloc(1); + spin_lock_init(&q->compute.lock); + + err = xe_vm_add_compute_exec_queue(vm, q); + if (XE_IOCTL_DBG(xe, err)) + goto put_exec_queue; + } } if (args->extensions) { err = exec_queue_user_extensions(xe, q, args->extensions, 0, true); if (XE_IOCTL_DBG(xe, err)) - goto put_exec_queue; - } - - if (XE_IOCTL_DBG(xe, q->vm && xe_vm_in_compute_mode(q->vm) != - !!(q->flags & EXEC_QUEUE_FLAG_COMPUTE_MODE))) { - err = -EOPNOTSUPP; - goto put_exec_queue; + goto kill_exec_queue; } q->persistent.xef = xef; @@ -762,14 +732,15 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL); mutex_unlock(&xef->exec_queue.lock); if (err) - goto put_exec_queue; + goto kill_exec_queue; args->exec_queue_id = id; return 0; -put_exec_queue: +kill_exec_queue: xe_exec_queue_kill(q); +put_exec_queue: xe_exec_queue_put(q); return err; } diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 4f4190971dcf..c4813944b017 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -73,12 +73,10 @@ struct xe_exec_queue { #define EXEC_QUEUE_FLAG_PERMANENT BIT(2) /* queue keeps running pending jobs after destroy ioctl */ #define EXEC_QUEUE_FLAG_PERSISTENT BIT(3) -/* queue for use with compute VMs */ -#define EXEC_QUEUE_FLAG_COMPUTE_MODE BIT(4) /* for VM jobs. Caller needs to hold rpm ref when creating queue with this flag */ -#define EXEC_QUEUE_FLAG_VM BIT(5) +#define EXEC_QUEUE_FLAG_VM BIT(4) /* child of VM queue for multi-tile VM jobs */ -#define EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD BIT(6) +#define EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD BIT(5) /** * @flags: flags for this exec queue, should statically setup aside from ban -- cgit v1.2.3 From abce4e4b0742f0a0773213144601ea7e18389228 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 20 Sep 2023 15:29:29 -0400 Subject: drm/xe: Rename exec_queue_kill_compute to xe_vm_remove_compute_exec_queue Much better name and aligns with xe_vm_add_compute_exec_queue. As part of the rename, move the implementation from xe_exec_queue.c to xe_vm.c. Suggested-by: Niranjana Vishwanathapura Signed-off-by: Matthew Brost Reviewed-by: Niranjana Vishwanathapura Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_exec_queue.c | 20 ++------------------ drivers/gpu/drm/xe/xe_vm.c | 21 +++++++++++++++++++++ drivers/gpu/drm/xe/xe_vm.h | 1 + 3 files changed, 24 insertions(+), 18 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index b9e645e86b4f..d400e2bb3785 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -775,22 +775,6 @@ int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data, return ret; } -static void exec_queue_kill_compute(struct xe_exec_queue *q) -{ - if (!xe_vm_in_compute_mode(q->vm)) - return; - - down_write(&q->vm->lock); - list_del(&q->compute.link); - --q->vm->preempt.num_exec_queues; - if (q->compute.pfence) { - dma_fence_enable_sw_signaling(q->compute.pfence); - dma_fence_put(q->compute.pfence); - q->compute.pfence = NULL; - } - up_write(&q->vm->lock); -} - /** * xe_exec_queue_is_lr() - Whether an exec_queue is long-running * @q: The exec_queue @@ -861,11 +845,11 @@ void xe_exec_queue_kill(struct xe_exec_queue *q) list_for_each_entry_safe(eq, next, &eq->multi_gt_list, multi_gt_link) { q->ops->kill(eq); - exec_queue_kill_compute(eq); + xe_vm_remove_compute_exec_queue(q->vm, eq); } q->ops->kill(q); - exec_queue_kill_compute(q); + xe_vm_remove_compute_exec_queue(q->vm, q); } int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 1a0546beef87..d02c0db5e2ae 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -387,6 +387,27 @@ out_unlock: return err; } +/** + * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM + * @vm: The VM. + * @q: The exec_queue + */ +void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) +{ + if (!xe_vm_in_compute_mode(vm)) + return; + + down_write(&vm->lock); + list_del(&q->compute.link); + --vm->preempt.num_exec_queues; + if (q->compute.pfence) { + dma_fence_enable_sw_signaling(q->compute.pfence); + dma_fence_put(q->compute.pfence); + q->compute.pfence = NULL; + } + up_write(&vm->lock); +} + /** * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs * that need repinning. diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 694f9e689b05..59dcbd1adf15 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -165,6 +165,7 @@ static inline bool xe_vm_no_dma_fences(struct xe_vm *vm) } int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q); +void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q); int xe_vm_userptr_pin(struct xe_vm *vm); -- cgit v1.2.3 From bffb2573726beabc8ad70532d5655a976f9053d8 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 20 Sep 2023 15:29:30 -0400 Subject: drm/xe: Remove XE_EXEC_QUEUE_SET_PROPERTY_COMPUTE_MODE from uAPI Functionality of XE_EXEC_QUEUE_SET_PROPERTY_COMPUTE_MODE deprecated in a previous patch, drop from uAPI. The property is just simply inherented from the VM. v2: - Update commit message (Niranjana) Reviewed-by: Niranjana Vishwanathapura Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_exec_queue.c | 7 ------- include/uapi/drm/xe_drm.h | 19 ++++++------------- 2 files changed, 6 insertions(+), 20 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index d400e2bb3785..5714a7195349 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -320,12 +320,6 @@ static int exec_queue_set_preemption_timeout(struct xe_device *xe, return q->ops->set_preempt_timeout(q, value); } -static int exec_queue_set_compute_mode(struct xe_device *xe, struct xe_exec_queue *q, - u64 value, bool create) -{ - return 0; -} - static int exec_queue_set_persistence(struct xe_device *xe, struct xe_exec_queue *q, u64 value, bool create) { @@ -411,7 +405,6 @@ static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = { [XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority, [XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice, [XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT] = exec_queue_set_preemption_timeout, - [XE_EXEC_QUEUE_SET_PROPERTY_COMPUTE_MODE] = exec_queue_set_compute_mode, [XE_EXEC_QUEUE_SET_PROPERTY_PERSISTENCE] = exec_queue_set_persistence, [XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT] = exec_queue_set_job_timeout, [XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER] = exec_queue_set_acc_trigger, diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index ad21ba1d6e0b..2a9e04024723 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -781,21 +781,14 @@ struct drm_xe_exec_queue_set_property { /** @exec_queue_id: Exec queue ID */ __u32 exec_queue_id; -#define XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY 0 +#define XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY 0 #define XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE 1 #define XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT 2 - /* - * Long running or ULLS engine mode. DMA fences not allowed in this - * mode. Must match the value of DRM_XE_VM_CREATE_COMPUTE_MODE, serves - * as a sanity check the UMD knows what it is doing. Can only be set at - * engine create time. - */ -#define XE_EXEC_QUEUE_SET_PROPERTY_COMPUTE_MODE 3 -#define XE_EXEC_QUEUE_SET_PROPERTY_PERSISTENCE 4 -#define XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT 5 -#define XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER 6 -#define XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY 7 -#define XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY 8 +#define XE_EXEC_QUEUE_SET_PROPERTY_PERSISTENCE 3 +#define XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT 4 +#define XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER 5 +#define XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY 6 +#define XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY 7 /** @property: property to set */ __u32 property; -- cgit v1.2.3 From 5dc079d1a8e5e880ae18b4f4585d7dc28e51e68e Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Wed, 20 Sep 2023 15:29:31 -0400 Subject: drm/xe/uapi: Use common drm_xe_ext_set_property extension There really is no difference between 'struct drm_xe_ext_vm_set_property' and 'struct drm_xe_ext_exec_queue_set_property', they are extensions which specify a pair. Replace the two extensions with a single common 'struct drm_xe_ext_set_property' extension. The rationale is that rather than have each XE module (including future modules) invent their own property/value extensions, all XE modules use a common set_property extension when possible. Signed-off-by: Ashutosh Dixit Signed-off-by: Francois Dugast Signed-off-by: Rodrigo Vivi Reviewed-by: Matthew Brost --- drivers/gpu/drm/xe/xe_exec_queue.c | 2 +- drivers/gpu/drm/xe/xe_vm.c | 2 +- include/uapi/drm/xe_drm.h | 21 +++------------------ 3 files changed, 5 insertions(+), 20 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 5714a7195349..38ce777d0ba8 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -418,7 +418,7 @@ static int exec_queue_user_ext_set_property(struct xe_device *xe, bool create) { u64 __user *address = u64_to_user_ptr(extension); - struct drm_xe_ext_exec_queue_set_property ext; + struct drm_xe_ext_set_property ext; int err; u32 idx; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index d02c0db5e2ae..3d350b27732f 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2067,7 +2067,7 @@ static int vm_user_ext_set_property(struct xe_device *xe, struct xe_vm *vm, u64 extension) { u64 __user *address = u64_to_user_ptr(extension); - struct drm_xe_ext_vm_set_property ext; + struct drm_xe_ext_set_property ext; int err; err = __copy_from_user(&ext, address, sizeof(ext)); diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 2a9e04024723..4987a634afc7 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -569,12 +569,11 @@ struct drm_xe_vm_bind_op_error_capture { __u64 size; }; -/** struct drm_xe_ext_vm_set_property - VM set property extension */ -struct drm_xe_ext_vm_set_property { +/** struct drm_xe_ext_set_property - XE set property extension */ +struct drm_xe_ext_set_property { /** @base: base user extension */ struct xe_user_extension base; -#define XE_VM_PROPERTY_BIND_OP_ERROR_CAPTURE_ADDRESS 0 /** @property: property to set */ __u32 property; @@ -590,6 +589,7 @@ struct drm_xe_ext_vm_set_property { struct drm_xe_vm_create { #define XE_VM_EXTENSION_SET_PROPERTY 0 +#define XE_VM_PROPERTY_BIND_OP_ERROR_CAPTURE_ADDRESS 0 /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; @@ -754,21 +754,6 @@ struct drm_xe_vm_bind { __u64 reserved[2]; }; -/** struct drm_xe_ext_exec_queue_set_property - exec queue set property extension */ -struct drm_xe_ext_exec_queue_set_property { - /** @base: base user extension */ - struct xe_user_extension base; - - /** @property: property to set */ - __u32 property; - - /** @pad: MBZ */ - __u32 pad; - - /** @value: property value */ - __u64 value; -}; - /** * struct drm_xe_exec_queue_set_property - exec queue set property * -- cgit v1.2.3 From 7224788f675632956cb9177c039645d72d887cf8 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 20 Sep 2023 15:29:32 -0400 Subject: drm/xe: Kill XE_VM_PROPERTY_BIND_OP_ERROR_CAPTURE_ADDRESS extension MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This extension is currently not used and it is not aligned with the error handling on async VM_BIND. Let's remove it and along with that, since it was the only extension for the vm_create, remove VM extension entirely. v2: rebase on top of the removal of drm_xe_ext_exec_queue_set_property Cc: Thomas Hellström Signed-off-by: Francois Dugast Signed-off-by: Rodrigo Vivi Reviewed-by: Matthew Brost --- drivers/gpu/drm/xe/xe_vm.c | 129 ++------------------------------------------- include/uapi/drm/xe_drm.h | 23 +------- 2 files changed, 4 insertions(+), 148 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 3d350b27732f..c7e3b1fbd931 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1531,37 +1531,6 @@ static void flush_async_ops(struct xe_vm *vm) flush_work(&vm->async_ops.work); } -static void vm_error_capture(struct xe_vm *vm, int err, - u32 op, u64 addr, u64 size) -{ - struct drm_xe_vm_bind_op_error_capture capture; - u64 __user *address = - u64_to_user_ptr(vm->async_ops.error_capture.addr); - bool in_kthread = !current->mm; - - capture.error = err; - capture.op = op; - capture.addr = addr; - capture.size = size; - - if (in_kthread) { - if (!mmget_not_zero(vm->async_ops.error_capture.mm)) - goto mm_closed; - kthread_use_mm(vm->async_ops.error_capture.mm); - } - - if (copy_to_user(address, &capture, sizeof(capture))) - drm_warn(&vm->xe->drm, "Copy to user failed"); - - if (in_kthread) { - kthread_unuse_mm(vm->async_ops.error_capture.mm); - mmput(vm->async_ops.error_capture.mm); - } - -mm_closed: - wake_up_all(&vm->async_ops.error_capture.wq); -} - static void xe_vm_close(struct xe_vm *vm) { down_write(&vm->lock); @@ -2036,91 +2005,6 @@ static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma, return 0; } -static int vm_set_error_capture_address(struct xe_device *xe, struct xe_vm *vm, - u64 value) -{ - if (XE_IOCTL_DBG(xe, !value)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, !(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS))) - return -EOPNOTSUPP; - - if (XE_IOCTL_DBG(xe, vm->async_ops.error_capture.addr)) - return -EOPNOTSUPP; - - vm->async_ops.error_capture.mm = current->mm; - vm->async_ops.error_capture.addr = value; - init_waitqueue_head(&vm->async_ops.error_capture.wq); - - return 0; -} - -typedef int (*xe_vm_set_property_fn)(struct xe_device *xe, struct xe_vm *vm, - u64 value); - -static const xe_vm_set_property_fn vm_set_property_funcs[] = { - [XE_VM_PROPERTY_BIND_OP_ERROR_CAPTURE_ADDRESS] = - vm_set_error_capture_address, -}; - -static int vm_user_ext_set_property(struct xe_device *xe, struct xe_vm *vm, - u64 extension) -{ - u64 __user *address = u64_to_user_ptr(extension); - struct drm_xe_ext_set_property ext; - int err; - - err = __copy_from_user(&ext, address, sizeof(ext)); - if (XE_IOCTL_DBG(xe, err)) - return -EFAULT; - - if (XE_IOCTL_DBG(xe, ext.property >= - ARRAY_SIZE(vm_set_property_funcs)) || - XE_IOCTL_DBG(xe, ext.pad) || - XE_IOCTL_DBG(xe, ext.reserved[0] || ext.reserved[1])) - return -EINVAL; - - return vm_set_property_funcs[ext.property](xe, vm, ext.value); -} - -typedef int (*xe_vm_user_extension_fn)(struct xe_device *xe, struct xe_vm *vm, - u64 extension); - -static const xe_vm_set_property_fn vm_user_extension_funcs[] = { - [XE_VM_EXTENSION_SET_PROPERTY] = vm_user_ext_set_property, -}; - -#define MAX_USER_EXTENSIONS 16 -static int vm_user_extensions(struct xe_device *xe, struct xe_vm *vm, - u64 extensions, int ext_number) -{ - u64 __user *address = u64_to_user_ptr(extensions); - struct xe_user_extension ext; - int err; - - if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS)) - return -E2BIG; - - err = __copy_from_user(&ext, address, sizeof(ext)); - if (XE_IOCTL_DBG(xe, err)) - return -EFAULT; - - if (XE_IOCTL_DBG(xe, ext.pad) || - XE_IOCTL_DBG(xe, ext.name >= - ARRAY_SIZE(vm_user_extension_funcs))) - return -EINVAL; - - err = vm_user_extension_funcs[ext.name](xe, vm, extensions); - if (XE_IOCTL_DBG(xe, err)) - return err; - - if (ext.next_extension) - return vm_user_extensions(xe, vm, ext.next_extension, - ++ext_number); - - return 0; -} - #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_SCRATCH_PAGE | \ DRM_XE_VM_CREATE_COMPUTE_MODE | \ DRM_XE_VM_CREATE_ASYNC_BIND_OPS | \ @@ -2138,6 +2022,9 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, int err; u32 flags = 0; + if (XE_IOCTL_DBG(xe, args->extensions)) + return -EINVAL; + if (XE_WA(xe_root_mmio_gt(xe), 14016763929)) args->flags |= DRM_XE_VM_CREATE_SCRATCH_PAGE; @@ -2180,14 +2067,6 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, if (IS_ERR(vm)) return PTR_ERR(vm); - if (args->extensions) { - err = vm_user_extensions(xe, vm, args->extensions, 0); - if (XE_IOCTL_DBG(xe, err)) { - xe_vm_close_and_put(vm); - return err; - } - } - mutex_lock(&xef->vm.lock); err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL); mutex_unlock(&xef->vm.lock); @@ -3087,8 +2966,6 @@ static void xe_vma_op_work_func(struct work_struct *w) vm_set_async_error(vm, err); up_write(&vm->lock); - if (vm->async_ops.error_capture.addr) - vm_error_capture(vm, err, 0, 0, 0); break; } up_write(&vm->lock); diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 4987a634afc7..e7cf42c7234b 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -552,23 +552,6 @@ struct drm_xe_gem_mmap_offset { __u64 reserved[2]; }; -/** - * struct drm_xe_vm_bind_op_error_capture - format of VM bind op error capture - */ -struct drm_xe_vm_bind_op_error_capture { - /** @error: errno that occurred */ - __s32 error; - - /** @op: operation that encounter an error */ - __u32 op; - - /** @addr: address of bind op */ - __u64 addr; - - /** @size: size of bind */ - __u64 size; -}; - /** struct drm_xe_ext_set_property - XE set property extension */ struct drm_xe_ext_set_property { /** @base: base user extension */ @@ -589,7 +572,6 @@ struct drm_xe_ext_set_property { struct drm_xe_vm_create { #define XE_VM_EXTENSION_SET_PROPERTY 0 -#define XE_VM_PROPERTY_BIND_OP_ERROR_CAPTURE_ADDRESS 0 /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; @@ -674,10 +656,7 @@ struct drm_xe_vm_bind_op { * practice the bind op is good and will complete. * * If this flag is set and doesn't return an error, the bind op can - * still fail and recovery is needed. If configured, the bind op that - * caused the error will be captured in drm_xe_vm_bind_op_error_capture. - * Once the user sees the error (via a ufence + - * XE_VM_PROPERTY_BIND_OP_ERROR_CAPTURE_ADDRESS), it should free memory + * still fail and recovery is needed. It should free memory * via non-async unbinds, and then restart all queued async binds op via * XE_VM_BIND_OP_RESTART. Or alternatively the user should destroy the * VM. -- cgit v1.2.3 From b21ae51dcf41ce12bb8e2a7c989863ee9d04ae4b Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 14 Sep 2023 13:40:49 -0700 Subject: drm/xe/uapi: Kill DRM_XE_UFENCE_WAIT_VM_ERROR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is not used nor does it align VM async document, kill this. Signed-off-by: Matthew Brost Reviewed-by: Thomas Hellström Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 3 --- drivers/gpu/drm/xe/xe_vm_types.h | 11 --------- drivers/gpu/drm/xe/xe_wait_user_fence.c | 43 ++++----------------------------- include/uapi/drm/xe_drm.h | 17 +++---------- 4 files changed, 9 insertions(+), 65 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index c7e3b1fbd931..3132114d187f 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1621,9 +1621,6 @@ void xe_vm_close_and_put(struct xe_vm *vm) xe_vma_destroy_unlocked(vma); } - if (vm->async_ops.error_capture.addr) - wake_up_all(&vm->async_ops.error_capture.wq); - xe_assert(xe, list_empty(&vm->extobj.list)); up_write(&vm->lock); diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 9a1075a75606..828ed0fa7e60 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -215,17 +215,6 @@ struct xe_vm { struct work_struct work; /** @lock: protects list of pending async VM ops and fences */ spinlock_t lock; - /** @error_capture: error capture state */ - struct { - /** @mm: user MM */ - struct mm_struct *mm; - /** - * @addr: user pointer to copy error capture state too - */ - u64 addr; - /** @wq: user fence wait queue for VM errors */ - wait_queue_head_t wq; - } error_capture; /** @fence: fence state */ struct { /** @context: context of async fence */ diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c index 3ac4cd24d5b4..78686908f7fb 100644 --- a/drivers/gpu/drm/xe/xe_wait_user_fence.c +++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c @@ -13,7 +13,6 @@ #include "xe_device.h" #include "xe_gt.h" #include "xe_macros.h" -#include "xe_vm.h" static int do_compare(u64 addr, u64 value, u64 mask, u16 op) { @@ -81,8 +80,7 @@ static int check_hw_engines(struct xe_device *xe, } #define VALID_FLAGS (DRM_XE_UFENCE_WAIT_SOFT_OP | \ - DRM_XE_UFENCE_WAIT_ABSTIME | \ - DRM_XE_UFENCE_WAIT_VM_ERROR) + DRM_XE_UFENCE_WAIT_ABSTIME) #define MAX_OP DRM_XE_UFENCE_WAIT_LTE static long to_jiffies_timeout(struct xe_device *xe, @@ -137,11 +135,9 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data, struct drm_xe_engine_class_instance eci[XE_HW_ENGINE_MAX_INSTANCE]; struct drm_xe_engine_class_instance __user *user_eci = u64_to_user_ptr(args->instances); - struct xe_vm *vm = NULL; u64 addr = args->addr; int err; - bool no_engines = args->flags & DRM_XE_UFENCE_WAIT_SOFT_OP || - args->flags & DRM_XE_UFENCE_WAIT_VM_ERROR; + bool no_engines = args->flags & DRM_XE_UFENCE_WAIT_SOFT_OP; long timeout; ktime_t start; @@ -162,8 +158,7 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, !no_engines && !args->num_engines)) return -EINVAL; - if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_UFENCE_WAIT_VM_ERROR) && - addr & 0x7)) + if (XE_IOCTL_DBG(xe, addr & 0x7)) return -EINVAL; if (XE_IOCTL_DBG(xe, args->num_engines > XE_HW_ENGINE_MAX_INSTANCE)) @@ -181,22 +176,6 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data, return -EINVAL; } - if (args->flags & DRM_XE_UFENCE_WAIT_VM_ERROR) { - if (XE_IOCTL_DBG(xe, args->vm_id >> 32)) - return -EINVAL; - - vm = xe_vm_lookup(to_xe_file(file), args->vm_id); - if (XE_IOCTL_DBG(xe, !vm)) - return -ENOENT; - - if (XE_IOCTL_DBG(xe, !vm->async_ops.error_capture.addr)) { - xe_vm_put(vm); - return -EOPNOTSUPP; - } - - addr = vm->async_ops.error_capture.addr; - } - timeout = to_jiffies_timeout(xe, args); start = ktime_get(); @@ -207,15 +186,8 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data, * hardware engine. Open coding as 'do_compare' can sleep which doesn't * work with the wait_event_* macros. */ - if (vm) - add_wait_queue(&vm->async_ops.error_capture.wq, &w_wait); - else - add_wait_queue(&xe->ufence_wq, &w_wait); + add_wait_queue(&xe->ufence_wq, &w_wait); for (;;) { - if (vm && xe_vm_is_closed(vm)) { - err = -ENODEV; - break; - } err = do_compare(addr, args->value, args->mask, args->op); if (err <= 0) break; @@ -232,12 +204,7 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data, timeout = wait_woken(&w_wait, TASK_INTERRUPTIBLE, timeout); } - if (vm) { - remove_wait_queue(&vm->async_ops.error_capture.wq, &w_wait); - xe_vm_put(vm); - } else { - remove_wait_queue(&xe->ufence_wq, &w_wait); - } + remove_wait_queue(&xe->ufence_wq, &w_wait); if (!(args->flags & DRM_XE_UFENCE_WAIT_ABSTIME)) { args->timeout -= ktime_to_ns(ktime_sub(ktime_get(), start)); diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index e7cf42c7234b..f13974f17be9 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -905,18 +905,10 @@ struct drm_xe_wait_user_fence { /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; - union { - /** - * @addr: user pointer address to wait on, must qword aligned - */ - __u64 addr; - - /** - * @vm_id: The ID of the VM which encounter an error used with - * DRM_XE_UFENCE_WAIT_VM_ERROR. Upper 32 bits must be clear. - */ - __u64 vm_id; - }; + /** + * @addr: user pointer address to wait on, must qword aligned + */ + __u64 addr; #define DRM_XE_UFENCE_WAIT_EQ 0 #define DRM_XE_UFENCE_WAIT_NEQ 1 @@ -929,7 +921,6 @@ struct drm_xe_wait_user_fence { #define DRM_XE_UFENCE_WAIT_SOFT_OP (1 << 0) /* e.g. Wait on VM bind */ #define DRM_XE_UFENCE_WAIT_ABSTIME (1 << 1) -#define DRM_XE_UFENCE_WAIT_VM_ERROR (1 << 2) /** @flags: wait flags */ __u16 flags; -- cgit v1.2.3 From f3e9b1f43458746e7e0211dbe4289412e5c0d16a Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 14 Sep 2023 13:40:50 -0700 Subject: drm/xe: Remove async worker and rework sync binds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Async worker is gone. All jobs and memory allocations done in IOCTL to align with dma fencing rules. Async vs. sync now means when do bind operations complete relative to the IOCTL. Async completes when out-syncs signal while sync completes when the IOCTL returns. In-syncs and out-syncs are only allowed in async mode. If memory allocations fail in the job creation step the VM is killed. This is temporary, eventually a proper unwind will be done and VM will be usable. Signed-off-by: Matthew Brost Reviewed-by: Thomas Hellström Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_exec.c | 43 --- drivers/gpu/drm/xe/xe_exec_queue.c | 7 +- drivers/gpu/drm/xe/xe_exec_queue_types.h | 2 + drivers/gpu/drm/xe/xe_sync.c | 14 +- drivers/gpu/drm/xe/xe_sync.h | 2 +- drivers/gpu/drm/xe/xe_vm.c | 535 +++++++------------------------ drivers/gpu/drm/xe/xe_vm.h | 2 - drivers/gpu/drm/xe/xe_vm_types.h | 7 +- include/uapi/drm/xe_drm.h | 33 +- 9 files changed, 127 insertions(+), 518 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 7cf4215b2b2e..85a8a793f527 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -196,27 +196,6 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) } } - /* - * We can't install a job into the VM dma-resv shared slot before an - * async VM bind passed in as a fence without the risk of deadlocking as - * the bind can trigger an eviction which in turn depends on anything in - * the VM dma-resv shared slots. Not an ideal solution, but we wait for - * all dependent async VM binds to start (install correct fences into - * dma-resv slots) before moving forward. - */ - if (!xe_vm_no_dma_fences(vm) && - vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS) { - for (i = 0; i < args->num_syncs; i++) { - struct dma_fence *fence = syncs[i].fence; - - if (fence) { - err = xe_vm_async_fence_wait_start(fence); - if (err) - goto err_syncs; - } - } - } - retry: if (!xe_vm_no_dma_fences(vm) && xe_vm_userptr_check_repin(vm)) { err = down_write_killable(&vm->lock); @@ -229,28 +208,6 @@ retry: if (err) goto err_syncs; - /* We don't allow execs while the VM is in error state */ - if (vm->async_ops.error) { - err = vm->async_ops.error; - goto err_unlock_list; - } - - /* - * Extreme corner where we exit a VM error state with a munmap style VM - * unbind inflight which requires a rebind. In this case the rebind - * needs to install some fences into the dma-resv slots. The worker to - * do this queued, let that worker make progress by dropping vm->lock, - * flushing the worker and retrying the exec. - */ - if (vm->async_ops.munmap_rebind_inflight) { - if (write_locked) - up_write(&vm->lock); - else - up_read(&vm->lock); - flush_work(&vm->async_ops.work); - goto retry; - } - if (write_locked) { err = xe_vm_userptr_pin(vm); downgrade_write(&vm->lock); diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 38ce777d0ba8..9b373b9ea472 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -621,7 +621,10 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, eci[0].gt_id >= xe->info.gt_count)) return -EINVAL; - if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) { + if (eci[0].engine_class >= DRM_XE_ENGINE_CLASS_VM_BIND_ASYNC) { + bool sync = eci[0].engine_class == + DRM_XE_ENGINE_CLASS_VM_BIND_SYNC; + for_each_gt(gt, xe, id) { struct xe_exec_queue *new; @@ -647,6 +650,8 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, args->width, hwe, EXEC_QUEUE_FLAG_PERSISTENT | EXEC_QUEUE_FLAG_VM | + (sync ? 0 : + EXEC_QUEUE_FLAG_VM_ASYNC) | (id ? EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD : 0)); diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index c4813944b017..4e382304010e 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -77,6 +77,8 @@ struct xe_exec_queue { #define EXEC_QUEUE_FLAG_VM BIT(4) /* child of VM queue for multi-tile VM jobs */ #define EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD BIT(5) +/* VM jobs for this queue are asynchronous */ +#define EXEC_QUEUE_FLAG_VM_ASYNC BIT(6) /** * @flags: flags for this exec queue, should statically setup aside from ban diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index 9fcd7802ba30..73ef259aa387 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -18,7 +18,6 @@ #include "xe_sched_job_types.h" #define SYNC_FLAGS_TYPE_MASK 0x3 -#define SYNC_FLAGS_FENCE_INSTALLED 0x10000 struct user_fence { struct xe_device *xe; @@ -223,12 +222,11 @@ int xe_sync_entry_add_deps(struct xe_sync_entry *sync, struct xe_sched_job *job) return 0; } -bool xe_sync_entry_signal(struct xe_sync_entry *sync, struct xe_sched_job *job, +void xe_sync_entry_signal(struct xe_sync_entry *sync, struct xe_sched_job *job, struct dma_fence *fence) { - if (!(sync->flags & DRM_XE_SYNC_SIGNAL) || - sync->flags & SYNC_FLAGS_FENCE_INSTALLED) - return false; + if (!(sync->flags & DRM_XE_SYNC_SIGNAL)) + return; if (sync->chain_fence) { drm_syncobj_add_point(sync->syncobj, sync->chain_fence, @@ -260,12 +258,6 @@ bool xe_sync_entry_signal(struct xe_sync_entry *sync, struct xe_sched_job *job, job->user_fence.addr = sync->addr; job->user_fence.value = sync->timeline_value; } - - /* TODO: external BO? */ - - sync->flags |= SYNC_FLAGS_FENCE_INSTALLED; - - return true; } void xe_sync_entry_cleanup(struct xe_sync_entry *sync) diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h index 4cbcf7a19911..30958ddc4cdc 100644 --- a/drivers/gpu/drm/xe/xe_sync.h +++ b/drivers/gpu/drm/xe/xe_sync.h @@ -19,7 +19,7 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, int xe_sync_entry_wait(struct xe_sync_entry *sync); int xe_sync_entry_add_deps(struct xe_sync_entry *sync, struct xe_sched_job *job); -bool xe_sync_entry_signal(struct xe_sync_entry *sync, +void xe_sync_entry_signal(struct xe_sync_entry *sync, struct xe_sched_job *job, struct dma_fence *fence); void xe_sync_entry_cleanup(struct xe_sync_entry *sync); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 3132114d187f..89df50f49e11 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -592,7 +592,7 @@ static void preempt_rebind_work_func(struct work_struct *w) unsigned int fence_count = 0; LIST_HEAD(preempt_fences); ktime_t end = 0; - int err; + int err = 0; long wait; int __maybe_unused tries = 0; @@ -608,22 +608,6 @@ static void preempt_rebind_work_func(struct work_struct *w) } retry: - if (vm->async_ops.error) - goto out_unlock_outer; - - /* - * Extreme corner where we exit a VM error state with a munmap style VM - * unbind inflight which requires a rebind. In this case the rebind - * needs to install some fences into the dma-resv slots. The worker to - * do this queued, let that worker make progress by dropping vm->lock - * and trying this again. - */ - if (vm->async_ops.munmap_rebind_inflight) { - up_write(&vm->lock); - flush_work(&vm->async_ops.work); - goto retry; - } - if (xe_vm_userptr_check_repin(vm)) { err = xe_vm_userptr_pin(vm); if (err) @@ -1357,7 +1341,6 @@ static const struct xe_pt_ops xelp_pt_ops = { .pde_encode_bo = xelp_pde_encode_bo, }; -static void xe_vma_op_work_func(struct work_struct *w); static void vm_destroy_work_func(struct work_struct *w); struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) @@ -1390,10 +1373,6 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) INIT_LIST_HEAD(&vm->notifier.rebind_list); spin_lock_init(&vm->notifier.list_lock); - INIT_LIST_HEAD(&vm->async_ops.pending); - INIT_WORK(&vm->async_ops.work, xe_vma_op_work_func); - spin_lock_init(&vm->async_ops.lock); - INIT_WORK(&vm->destroy_work, vm_destroy_work_func); INIT_LIST_HEAD(&vm->preempt.exec_queues); @@ -1458,11 +1437,6 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) vm->batch_invalidate_tlb = false; } - if (flags & XE_VM_FLAG_ASYNC_BIND_OPS) { - vm->async_ops.fence.context = dma_fence_context_alloc(1); - vm->flags |= XE_VM_FLAG_ASYNC_BIND_OPS; - } - /* Fill pt_root after allocating scratch tables */ for_each_tile(tile, xe, id) { if (!vm->pt_root[id]) @@ -1478,6 +1452,9 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) struct xe_gt *gt = tile->primary_gt; struct xe_vm *migrate_vm; struct xe_exec_queue *q; + u32 create_flags = EXEC_QUEUE_FLAG_VM | + ((flags & XE_VM_FLAG_ASYNC_DEFAULT) ? + EXEC_QUEUE_FLAG_VM_ASYNC : 0); if (!vm->pt_root[id]) continue; @@ -1485,7 +1462,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) migrate_vm = xe_migrate_get_vm(tile->migrate); q = xe_exec_queue_create_class(xe, gt, migrate_vm, XE_ENGINE_CLASS_COPY, - EXEC_QUEUE_FLAG_VM); + create_flags); xe_vm_put(migrate_vm); if (IS_ERR(q)) { err = PTR_ERR(q); @@ -1525,12 +1502,6 @@ err_no_resv: return ERR_PTR(err); } -static void flush_async_ops(struct xe_vm *vm) -{ - queue_work(system_unbound_wq, &vm->async_ops.work); - flush_work(&vm->async_ops.work); -} - static void xe_vm_close(struct xe_vm *vm) { down_write(&vm->lock); @@ -1550,7 +1521,6 @@ void xe_vm_close_and_put(struct xe_vm *vm) xe_assert(xe, !vm->preempt.num_exec_queues); xe_vm_close(vm); - flush_async_ops(vm); if (xe_vm_in_compute_mode(vm)) flush_work(&vm->preempt.rebind_work); @@ -1761,10 +1731,8 @@ next: err_fences: if (fences) { - while (cur_fence) { - /* FIXME: Rewind the previous binds? */ + while (cur_fence) dma_fence_put(fences[--cur_fence]); - } kfree(fences); } @@ -1838,100 +1806,24 @@ next: err_fences: if (fences) { - while (cur_fence) { - /* FIXME: Rewind the previous binds? */ + while (cur_fence) dma_fence_put(fences[--cur_fence]); - } kfree(fences); } return ERR_PTR(err); } -struct async_op_fence { - struct dma_fence fence; - struct dma_fence *wait_fence; - struct dma_fence_cb cb; - struct xe_vm *vm; - wait_queue_head_t wq; - bool started; -}; - -static const char *async_op_fence_get_driver_name(struct dma_fence *dma_fence) -{ - return "xe"; -} - -static const char * -async_op_fence_get_timeline_name(struct dma_fence *dma_fence) -{ - return "async_op_fence"; -} - -static const struct dma_fence_ops async_op_fence_ops = { - .get_driver_name = async_op_fence_get_driver_name, - .get_timeline_name = async_op_fence_get_timeline_name, -}; - -static void async_op_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) -{ - struct async_op_fence *afence = - container_of(cb, struct async_op_fence, cb); - - afence->fence.error = afence->wait_fence->error; - dma_fence_signal(&afence->fence); - xe_vm_put(afence->vm); - dma_fence_put(afence->wait_fence); - dma_fence_put(&afence->fence); -} - -static void add_async_op_fence_cb(struct xe_vm *vm, - struct dma_fence *fence, - struct async_op_fence *afence) +static bool xe_vm_sync_mode(struct xe_vm *vm, struct xe_exec_queue *q) { - int ret; - - if (!xe_vm_no_dma_fences(vm)) { - afence->started = true; - smp_wmb(); - wake_up_all(&afence->wq); - } - - afence->wait_fence = dma_fence_get(fence); - afence->vm = xe_vm_get(vm); - dma_fence_get(&afence->fence); - ret = dma_fence_add_callback(fence, &afence->cb, async_op_fence_cb); - if (ret == -ENOENT) { - afence->fence.error = afence->wait_fence->error; - dma_fence_signal(&afence->fence); - } - if (ret) { - xe_vm_put(vm); - dma_fence_put(afence->wait_fence); - dma_fence_put(&afence->fence); - } - XE_WARN_ON(ret && ret != -ENOENT); -} - -int xe_vm_async_fence_wait_start(struct dma_fence *fence) -{ - if (fence->ops == &async_op_fence_ops) { - struct async_op_fence *afence = - container_of(fence, struct async_op_fence, fence); - - xe_assert(afence->vm->xe, !xe_vm_no_dma_fences(afence->vm)); - - smp_rmb(); - return wait_event_interruptible(afence->wq, afence->started); - } - - return 0; + return q ? !(q->flags & EXEC_QUEUE_FLAG_VM_ASYNC) : + !(vm->flags & XE_VM_FLAG_ASYNC_DEFAULT); } static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q, struct xe_sync_entry *syncs, - u32 num_syncs, struct async_op_fence *afence, - bool immediate, bool first_op, bool last_op) + u32 num_syncs, bool immediate, bool first_op, + bool last_op) { struct dma_fence *fence; @@ -1953,17 +1845,18 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, xe_sync_entry_signal(&syncs[i], NULL, fence); } } - if (afence) - add_async_op_fence_cb(vm, fence, afence); + if (last_op && xe_vm_sync_mode(vm, q)) + dma_fence_wait(fence, true); dma_fence_put(fence); + return 0; } static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q, struct xe_bo *bo, struct xe_sync_entry *syncs, - u32 num_syncs, struct async_op_fence *afence, - bool immediate, bool first_op, bool last_op) + u32 num_syncs, bool immediate, bool first_op, + bool last_op) { int err; @@ -1976,14 +1869,13 @@ static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue return err; } - return __xe_vm_bind(vm, vma, q, syncs, num_syncs, afence, immediate, - first_op, last_op); + return __xe_vm_bind(vm, vma, q, syncs, num_syncs, immediate, first_op, + last_op); } static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q, struct xe_sync_entry *syncs, - u32 num_syncs, struct async_op_fence *afence, - bool first_op, bool last_op) + u32 num_syncs, bool first_op, bool last_op) { struct dma_fence *fence; @@ -1993,10 +1885,10 @@ static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma, fence = xe_vm_unbind_vma(vma, q, syncs, num_syncs, first_op, last_op); if (IS_ERR(fence)) return PTR_ERR(fence); - if (afence) - add_async_op_fence_cb(vm, fence, afence); xe_vma_destroy(vma, fence); + if (last_op && xe_vm_sync_mode(vm, q)) + dma_fence_wait(fence, true); dma_fence_put(fence); return 0; @@ -2004,7 +1896,7 @@ static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma, #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_SCRATCH_PAGE | \ DRM_XE_VM_CREATE_COMPUTE_MODE | \ - DRM_XE_VM_CREATE_ASYNC_BIND_OPS | \ + DRM_XE_VM_CREATE_ASYNC_DEFAULT | \ DRM_XE_VM_CREATE_FAULT_MODE) int xe_vm_create_ioctl(struct drm_device *dev, void *data, @@ -2051,12 +1943,15 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, xe_device_in_fault_mode(xe))) return -EINVAL; + if (XE_IOCTL_DBG(xe, args->extensions)) + return -EINVAL; + if (args->flags & DRM_XE_VM_CREATE_SCRATCH_PAGE) flags |= XE_VM_FLAG_SCRATCH_PAGE; if (args->flags & DRM_XE_VM_CREATE_COMPUTE_MODE) flags |= XE_VM_FLAG_COMPUTE_MODE; - if (args->flags & DRM_XE_VM_CREATE_ASYNC_BIND_OPS) - flags |= XE_VM_FLAG_ASYNC_BIND_OPS; + if (args->flags & DRM_XE_VM_CREATE_ASYNC_DEFAULT) + flags |= XE_VM_FLAG_ASYNC_DEFAULT; if (args->flags & DRM_XE_VM_CREATE_FAULT_MODE) flags |= XE_VM_FLAG_FAULT_MODE; @@ -2139,8 +2034,7 @@ static const u32 region_to_mem_type[] = { static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q, u32 region, struct xe_sync_entry *syncs, u32 num_syncs, - struct async_op_fence *afence, bool first_op, - bool last_op) + bool first_op, bool last_op) { int err; @@ -2154,7 +2048,7 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma, if (vma->tile_mask != (vma->tile_present & ~vma->usm.tile_invalidated)) { return xe_vm_bind(vm, vma, q, xe_vma_bo(vma), syncs, num_syncs, - afence, true, first_op, last_op); + true, first_op, last_op); } else { int i; @@ -2164,55 +2058,9 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma, xe_sync_entry_signal(&syncs[i], NULL, dma_fence_get_stub()); } - if (afence) - dma_fence_signal(&afence->fence); - return 0; - } -} - -static void vm_set_async_error(struct xe_vm *vm, int err) -{ - lockdep_assert_held(&vm->lock); - vm->async_ops.error = err; -} - -static int vm_bind_ioctl_lookup_vma(struct xe_vm *vm, struct xe_bo *bo, - u64 addr, u64 range, u32 op, u32 flags) -{ - struct xe_device *xe = vm->xe; - struct xe_vma *vma; - bool async = !!(flags & XE_VM_BIND_FLAG_ASYNC); - - lockdep_assert_held(&vm->lock); - switch (op) { - case XE_VM_BIND_OP_MAP: - case XE_VM_BIND_OP_MAP_USERPTR: - vma = xe_vm_find_overlapping_vma(vm, addr, range); - if (XE_IOCTL_DBG(xe, vma && !async)) - return -EBUSY; - break; - case XE_VM_BIND_OP_UNMAP: - case XE_VM_BIND_OP_PREFETCH: - vma = xe_vm_find_overlapping_vma(vm, addr, range); - if (XE_IOCTL_DBG(xe, !vma)) - /* Not an actual error, IOCTL cleans up returns and 0 */ - return -ENODATA; - if (XE_IOCTL_DBG(xe, (xe_vma_start(vma) != addr || - xe_vma_end(vma) != addr + range) && !async)) - return -EINVAL; - break; - case XE_VM_BIND_OP_UNMAP_ALL: - if (XE_IOCTL_DBG(xe, list_empty(&bo->ttm.base.gpuva.list))) - /* Not an actual error, IOCTL cleans up returns and 0 */ - return -ENODATA; - break; - default: - drm_warn(&xe->drm, "NOT POSSIBLE"); - return -EINVAL; + return 0; } - - return 0; } static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma, @@ -2509,37 +2357,15 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, bool async) { struct xe_vma_op *last_op = NULL; - struct async_op_fence *fence = NULL; struct drm_gpuva_op *__op; int err = 0; lockdep_assert_held_write(&vm->lock); - if (last && num_syncs && async) { - u64 seqno; - - fence = kmalloc(sizeof(*fence), GFP_KERNEL); - if (!fence) - return -ENOMEM; - - seqno = q ? ++q->bind.fence_seqno : ++vm->async_ops.fence.seqno; - dma_fence_init(&fence->fence, &async_op_fence_ops, - &vm->async_ops.lock, q ? q->bind.fence_ctx : - vm->async_ops.fence.context, seqno); - - if (!xe_vm_no_dma_fences(vm)) { - fence->vm = vm; - fence->started = false; - init_waitqueue_head(&fence->wq); - } - } - drm_gpuva_for_each_op(__op, ops) { struct xe_vma_op *op = gpuva_op_to_vma_op(__op); bool first = list_empty(ops_list); - xe_assert(vm->xe, first || async); - INIT_LIST_HEAD(&op->link); list_add_tail(&op->link, ops_list); @@ -2559,10 +2385,8 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, vma = new_vma(vm, &op->base.map, op->tile_mask, op->map.read_only, op->map.is_null); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto free_fence; - } + if (IS_ERR(vma)) + return PTR_ERR(vma); op->map.vma = vma; break; @@ -2587,10 +2411,8 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, vma = new_vma(vm, op->base.remap.prev, op->tile_mask, read_only, is_null); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto free_fence; - } + if (IS_ERR(vma)) + return PTR_ERR(vma); op->remap.prev = vma; @@ -2623,10 +2445,8 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, vma = new_vma(vm, op->base.remap.next, op->tile_mask, read_only, is_null); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto free_fence; - } + if (IS_ERR(vma)) + return PTR_ERR(vma); op->remap.next = vma; @@ -2658,27 +2478,23 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, err = xe_vma_op_commit(vm, op); if (err) - goto free_fence; + return err; } /* FIXME: Unhandled corner case */ XE_WARN_ON(!last_op && last && !list_empty(ops_list)); if (!last_op) - goto free_fence; + return 0; + last_op->ops = ops; if (last) { last_op->flags |= XE_VMA_OP_LAST; last_op->num_syncs = num_syncs; last_op->syncs = syncs; - last_op->fence = fence; } return 0; - -free_fence: - kfree(fence); - return err; } static int op_execute(struct drm_exec *exec, struct xe_vm *vm, @@ -2698,7 +2514,7 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm, switch (op->base.op) { case DRM_GPUVA_OP_MAP: err = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma), - op->syncs, op->num_syncs, op->fence, + op->syncs, op->num_syncs, op->map.immediate || !xe_vm_in_fault_mode(vm), op->flags & XE_VMA_OP_FIRST, op->flags & XE_VMA_OP_LAST); @@ -2709,16 +2525,13 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm, bool next = !!op->remap.next; if (!op->remap.unmap_done) { - if (prev || next) { - vm->async_ops.munmap_rebind_inflight = true; + if (prev || next) vma->gpuva.flags |= XE_VMA_FIRST_REBIND; - } err = xe_vm_unbind(vm, vma, op->q, op->syncs, op->num_syncs, - !prev && !next ? op->fence : NULL, op->flags & XE_VMA_OP_FIRST, - op->flags & XE_VMA_OP_LAST && !prev && - !next); + op->flags & XE_VMA_OP_LAST && + !prev && !next); if (err) break; op->remap.unmap_done = true; @@ -2728,8 +2541,7 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm, op->remap.prev->gpuva.flags |= XE_VMA_LAST_REBIND; err = xe_vm_bind(vm, op->remap.prev, op->q, xe_vma_bo(op->remap.prev), op->syncs, - op->num_syncs, - !next ? op->fence : NULL, true, false, + op->num_syncs, true, false, op->flags & XE_VMA_OP_LAST && !next); op->remap.prev->gpuva.flags &= ~XE_VMA_LAST_REBIND; if (err) @@ -2742,26 +2554,24 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm, err = xe_vm_bind(vm, op->remap.next, op->q, xe_vma_bo(op->remap.next), op->syncs, op->num_syncs, - op->fence, true, false, + true, false, op->flags & XE_VMA_OP_LAST); op->remap.next->gpuva.flags &= ~XE_VMA_LAST_REBIND; if (err) break; op->remap.next = NULL; } - vm->async_ops.munmap_rebind_inflight = false; break; } case DRM_GPUVA_OP_UNMAP: err = xe_vm_unbind(vm, vma, op->q, op->syncs, - op->num_syncs, op->fence, - op->flags & XE_VMA_OP_FIRST, + op->num_syncs, op->flags & XE_VMA_OP_FIRST, op->flags & XE_VMA_OP_LAST); break; case DRM_GPUVA_OP_PREFETCH: err = xe_vm_prefetch(vm, vma, op->q, op->prefetch.region, - op->syncs, op->num_syncs, op->fence, + op->syncs, op->num_syncs, op->flags & XE_VMA_OP_FIRST, op->flags & XE_VMA_OP_LAST); break; @@ -2860,14 +2670,9 @@ static void xe_vma_op_cleanup(struct xe_vm *vm, struct xe_vma_op *op) kfree(op->syncs); if (op->q) xe_exec_queue_put(op->q); - if (op->fence) - dma_fence_put(&op->fence->fence); } - if (!list_empty(&op->link)) { - spin_lock_irq(&vm->async_ops.lock); + if (!list_empty(&op->link)) list_del(&op->link); - spin_unlock_irq(&vm->async_ops.lock); - } if (op->ops) drm_gpuva_ops_free(&vm->gpuvm, op->ops); if (last) @@ -2929,129 +2734,6 @@ static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op, } } -static struct xe_vma_op *next_vma_op(struct xe_vm *vm) -{ - return list_first_entry_or_null(&vm->async_ops.pending, - struct xe_vma_op, link); -} - -static void xe_vma_op_work_func(struct work_struct *w) -{ - struct xe_vm *vm = container_of(w, struct xe_vm, async_ops.work); - - for (;;) { - struct xe_vma_op *op; - int err; - - if (vm->async_ops.error && !xe_vm_is_closed(vm)) - break; - - spin_lock_irq(&vm->async_ops.lock); - op = next_vma_op(vm); - spin_unlock_irq(&vm->async_ops.lock); - - if (!op) - break; - - if (!xe_vm_is_closed(vm)) { - down_write(&vm->lock); - err = xe_vma_op_execute(vm, op); - if (err) { - drm_warn(&vm->xe->drm, - "Async VM op(%d) failed with %d", - op->base.op, err); - vm_set_async_error(vm, err); - up_write(&vm->lock); - - break; - } - up_write(&vm->lock); - } else { - struct xe_vma *vma; - - switch (op->base.op) { - case DRM_GPUVA_OP_REMAP: - vma = gpuva_to_vma(op->base.remap.unmap->va); - trace_xe_vma_flush(vma); - - down_write(&vm->lock); - xe_vma_destroy_unlocked(vma); - up_write(&vm->lock); - break; - case DRM_GPUVA_OP_UNMAP: - vma = gpuva_to_vma(op->base.unmap.va); - trace_xe_vma_flush(vma); - - down_write(&vm->lock); - xe_vma_destroy_unlocked(vma); - up_write(&vm->lock); - break; - default: - /* Nothing to do */ - break; - } - - if (op->fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, - &op->fence->fence.flags)) { - if (!xe_vm_no_dma_fences(vm)) { - op->fence->started = true; - wake_up_all(&op->fence->wq); - } - dma_fence_signal(&op->fence->fence); - } - } - - xe_vma_op_cleanup(vm, op); - } -} - -static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, - struct list_head *ops_list, bool async) -{ - struct xe_vma_op *op, *last_op, *next; - int err; - - lockdep_assert_held_write(&vm->lock); - - last_op = list_last_entry(ops_list, struct xe_vma_op, link); - - if (!async) { - err = xe_vma_op_execute(vm, last_op); - if (err) - goto unwind; - xe_vma_op_cleanup(vm, last_op); - } else { - int i; - bool installed = false; - - for (i = 0; i < last_op->num_syncs; i++) - installed |= xe_sync_entry_signal(&last_op->syncs[i], - NULL, - &last_op->fence->fence); - if (!installed && last_op->fence) - dma_fence_signal(&last_op->fence->fence); - - spin_lock_irq(&vm->async_ops.lock); - list_splice_tail(ops_list, &vm->async_ops.pending); - spin_unlock_irq(&vm->async_ops.lock); - - if (!vm->async_ops.error) - queue_work(system_unbound_wq, &vm->async_ops.work); - } - - return 0; - -unwind: - list_for_each_entry_reverse(op, ops_list, link) - xe_vma_op_unwind(vm, op, op->flags & XE_VMA_OP_COMMITTED, - op->flags & XE_VMA_OP_PREV_COMMITTED, - op->flags & XE_VMA_OP_NEXT_COMMITTED); - list_for_each_entry_safe(op, next, ops_list, link) - xe_vma_op_cleanup(vm, op); - - return err; -} - static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, struct drm_gpuva_ops **ops, int num_ops_list) @@ -3078,6 +2760,31 @@ static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, } } +static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, + struct list_head *ops_list) +{ + struct xe_vma_op *op, *next; + int err; + + lockdep_assert_held_write(&vm->lock); + + list_for_each_entry_safe(op, next, ops_list, link) { + err = xe_vma_op_execute(vm, op); + if (err) { + drm_warn(&vm->xe->drm, "VM op(%d) failed with %d", + op->base.op, err); + /* + * FIXME: Killing VM rather than proper error handling + */ + xe_vm_kill(vm); + return -ENOSPC; + } + xe_vma_op_cleanup(vm, op); + } + + return 0; +} + #ifdef TEST_VM_ASYNC_OPS_ERROR #define SUPPORTED_FLAGS \ (FORCE_ASYNC_OP_ERROR | XE_VM_BIND_FLAG_ASYNC | \ @@ -3086,7 +2793,8 @@ static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, #else #define SUPPORTED_FLAGS \ (XE_VM_BIND_FLAG_ASYNC | XE_VM_BIND_FLAG_READONLY | \ - XE_VM_BIND_FLAG_IMMEDIATE | XE_VM_BIND_FLAG_NULL | 0xffff) + XE_VM_BIND_FLAG_IMMEDIATE | XE_VM_BIND_FLAG_NULL | \ + 0xffff) #endif #define XE_64K_PAGE_MASK 0xffffull @@ -3137,21 +2845,12 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, if (i == 0) { *async = !!(flags & XE_VM_BIND_FLAG_ASYNC); - } else if (XE_IOCTL_DBG(xe, !*async) || - XE_IOCTL_DBG(xe, !(flags & XE_VM_BIND_FLAG_ASYNC)) || - XE_IOCTL_DBG(xe, op == XE_VM_BIND_OP_RESTART)) { - err = -EINVAL; - goto free_bind_ops; - } - - if (XE_IOCTL_DBG(xe, !*async && - op == XE_VM_BIND_OP_UNMAP_ALL)) { - err = -EINVAL; - goto free_bind_ops; - } - - if (XE_IOCTL_DBG(xe, !*async && - op == XE_VM_BIND_OP_PREFETCH)) { + if (XE_IOCTL_DBG(xe, !*async && args->num_syncs)) { + err = -EINVAL; + goto free_bind_ops; + } + } else if (XE_IOCTL_DBG(xe, *async != + !!(flags & XE_VM_BIND_FLAG_ASYNC))) { err = -EINVAL; goto free_bind_ops; } @@ -3188,8 +2887,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) || XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) || XE_IOCTL_DBG(xe, range & ~PAGE_MASK) || - XE_IOCTL_DBG(xe, !range && op != - XE_VM_BIND_OP_RESTART && + XE_IOCTL_DBG(xe, !range && op != XE_VM_BIND_OP_UNMAP_ALL)) { err = -EINVAL; goto free_bind_ops; @@ -3237,6 +2935,12 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) err = -EINVAL; goto put_exec_queue; } + + if (XE_IOCTL_DBG(xe, async != + !!(q->flags & EXEC_QUEUE_FLAG_VM_ASYNC))) { + err = -EINVAL; + goto put_exec_queue; + } } vm = xe_vm_lookup(xef, args->vm_id); @@ -3245,6 +2949,14 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) goto put_exec_queue; } + if (!args->exec_queue_id) { + if (XE_IOCTL_DBG(xe, async != + !!(vm->flags & XE_VM_FLAG_ASYNC_DEFAULT))) { + err = -EINVAL; + goto put_vm; + } + } + err = down_write_killable(&vm->lock); if (err) goto put_vm; @@ -3254,34 +2966,6 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) goto release_vm_lock; } - if (bind_ops[0].op == XE_VM_BIND_OP_RESTART) { - if (XE_IOCTL_DBG(xe, !(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS))) - err = -EOPNOTSUPP; - if (XE_IOCTL_DBG(xe, !err && args->num_syncs)) - err = EINVAL; - if (XE_IOCTL_DBG(xe, !err && !vm->async_ops.error)) - err = -EPROTO; - - if (!err) { - trace_xe_vm_restart(vm); - vm_set_async_error(vm, 0); - - queue_work(system_unbound_wq, &vm->async_ops.work); - - /* Rebinds may have been blocked, give worker a kick */ - if (xe_vm_in_compute_mode(vm)) - xe_vm_queue_rebind_worker(vm); - } - - goto release_vm_lock; - } - - if (XE_IOCTL_DBG(xe, !vm->async_ops.error && - async != !!(vm->flags & XE_VM_FLAG_ASYNC_BIND_OPS))) { - err = -EOPNOTSUPP; - goto release_vm_lock; - } - for (i = 0; i < args->num_binds; ++i) { u64 range = bind_ops[i].range; u64 addr = bind_ops[i].addr; @@ -3367,18 +3051,6 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) goto free_syncs; } - /* Do some error checking first to make the unwind easier */ - for (i = 0; i < args->num_binds; ++i) { - u64 range = bind_ops[i].range; - u64 addr = bind_ops[i].addr; - u32 op = bind_ops[i].op; - u32 flags = bind_ops[i].flags; - - err = vm_bind_ioctl_lookup_vma(vm, bos[i], addr, range, op, flags); - if (err) - goto free_syncs; - } - for (i = 0; i < args->num_binds; ++i) { u64 range = bind_ops[i].range; u64 addr = bind_ops[i].addr; @@ -3411,10 +3083,19 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) goto unwind_ops; } - err = vm_bind_ioctl_ops_execute(vm, &ops_list, async); + xe_vm_get(vm); + if (q) + xe_exec_queue_get(q); + + err = vm_bind_ioctl_ops_execute(vm, &ops_list); + up_write(&vm->lock); - for (i = 0; i < args->num_binds; ++i) + if (q) + xe_exec_queue_put(q); + xe_vm_put(vm); + + for (i = 0; bos && i < args->num_binds; ++i) xe_bo_put(bos[i]); kfree(bos); diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 59dcbd1adf15..45b70ba86553 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -177,8 +177,6 @@ struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker); int xe_vm_invalidate_vma(struct xe_vma *vma); -int xe_vm_async_fence_wait_start(struct dma_fence *fence); - extern struct ttm_device_funcs xe_ttm_funcs; static inline void xe_vm_queue_rebind_worker(struct xe_vm *vm) diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 828ed0fa7e60..97d779d8a7d3 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -17,7 +17,6 @@ #include "xe_pt_types.h" #include "xe_range_fence.h" -struct async_op_fence; struct xe_bo; struct xe_sync_entry; struct xe_vm; @@ -156,7 +155,7 @@ struct xe_vm { */ #define XE_VM_FLAG_64K BIT(0) #define XE_VM_FLAG_COMPUTE_MODE BIT(1) -#define XE_VM_FLAG_ASYNC_BIND_OPS BIT(2) +#define XE_VM_FLAG_ASYNC_DEFAULT BIT(2) #define XE_VM_FLAG_MIGRATION BIT(3) #define XE_VM_FLAG_SCRATCH_PAGE BIT(4) #define XE_VM_FLAG_FAULT_MODE BIT(5) @@ -394,10 +393,6 @@ struct xe_vma_op { u32 num_syncs; /** @link: async operation link */ struct list_head link; - /** - * @fence: async operation fence, signaled on last operation complete - */ - struct async_op_fence *fence; /** @tile_mask: gt mask for this operation */ u8 tile_mask; /** @flags: operation flags */ diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index f13974f17be9..4dc103aa00f1 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -134,10 +134,11 @@ struct drm_xe_engine_class_instance { #define DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE 3 #define DRM_XE_ENGINE_CLASS_COMPUTE 4 /* - * Kernel only class (not actual hardware engine class). Used for + * Kernel only classes (not actual hardware engine class). Used for * creating ordered queues of VM bind operations. */ -#define DRM_XE_ENGINE_CLASS_VM_BIND 5 +#define DRM_XE_ENGINE_CLASS_VM_BIND_ASYNC 5 +#define DRM_XE_ENGINE_CLASS_VM_BIND_SYNC 6 __u16 engine_class; __u16 engine_instance; @@ -577,7 +578,7 @@ struct drm_xe_vm_create { #define DRM_XE_VM_CREATE_SCRATCH_PAGE (0x1 << 0) #define DRM_XE_VM_CREATE_COMPUTE_MODE (0x1 << 1) -#define DRM_XE_VM_CREATE_ASYNC_BIND_OPS (0x1 << 2) +#define DRM_XE_VM_CREATE_ASYNC_DEFAULT (0x1 << 2) #define DRM_XE_VM_CREATE_FAULT_MODE (0x1 << 3) /** @flags: Flags */ __u32 flags; @@ -637,34 +638,12 @@ struct drm_xe_vm_bind_op { #define XE_VM_BIND_OP_MAP 0x0 #define XE_VM_BIND_OP_UNMAP 0x1 #define XE_VM_BIND_OP_MAP_USERPTR 0x2 -#define XE_VM_BIND_OP_RESTART 0x3 -#define XE_VM_BIND_OP_UNMAP_ALL 0x4 -#define XE_VM_BIND_OP_PREFETCH 0x5 +#define XE_VM_BIND_OP_UNMAP_ALL 0x3 +#define XE_VM_BIND_OP_PREFETCH 0x4 /** @op: Bind operation to perform */ __u32 op; #define XE_VM_BIND_FLAG_READONLY (0x1 << 0) - /* - * A bind ops completions are always async, hence the support for out - * sync. This flag indicates the allocation of the memory for new page - * tables and the job to program the pages tables is asynchronous - * relative to the IOCTL. That part of a bind operation can fail under - * memory pressure, the job in practice can't fail unless the system is - * totally shot. - * - * If this flag is clear and the IOCTL doesn't return an error, in - * practice the bind op is good and will complete. - * - * If this flag is set and doesn't return an error, the bind op can - * still fail and recovery is needed. It should free memory - * via non-async unbinds, and then restart all queued async binds op via - * XE_VM_BIND_OP_RESTART. Or alternatively the user should destroy the - * VM. - * - * This flag is only allowed when DRM_XE_VM_CREATE_ASYNC_BIND_OPS is - * configured in the VM and must be set if the VM is configured with - * DRM_XE_VM_CREATE_ASYNC_BIND_OPS and not in an error state. - */ #define XE_VM_BIND_FLAG_ASYNC (0x1 << 1) /* * Valid on a faulting VM only, do the MAP operation immediately rather -- cgit v1.2.3 From e669f10cd3182943058fa84b1e81f3727f6e0520 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 14 Sep 2023 13:40:51 -0700 Subject: drm/xe: Fix VM bind out-sync signaling ordering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A case existed where an out-sync of a later VM bind operation could signal before a previous one if the later operation results in a NOP (e.g. a unbind or prefetch to a VA range without any mappings). This breaks the ordering rules, fix this. This patch also lays the groundwork for users to pass in num_binds == 0 and out-syncs. Signed-off-by: Matthew Brost Reviewed-by: Thomas Hellström Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_exec_queue.c | 75 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_exec_queue.h | 7 +++ drivers/gpu/drm/xe/xe_exec_queue_types.h | 6 +++ drivers/gpu/drm/xe/xe_vm.c | 45 +++++++++++++++---- 4 files changed, 125 insertions(+), 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 9b373b9ea472..8e0620cb89e5 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -156,6 +156,7 @@ void xe_exec_queue_destroy(struct kref *ref) struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount); struct xe_exec_queue *eq, *next; + xe_exec_queue_last_fence_put_unlocked(q); if (!(q->flags & EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD)) { list_for_each_entry_safe(eq, next, &q->multi_gt_list, multi_gt_link) @@ -916,3 +917,77 @@ out: return ret; } + +static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q, + struct xe_vm *vm) +{ + lockdep_assert_held_write(&vm->lock); +} + +/** + * xe_exec_queue_last_fence_put() - Drop ref to last fence + * @q: The exec queue + * @vm: The VM the engine does a bind or exec for + */ +void xe_exec_queue_last_fence_put(struct xe_exec_queue *q, struct xe_vm *vm) +{ + xe_exec_queue_last_fence_lockdep_assert(q, vm); + + if (q->last_fence) { + dma_fence_put(q->last_fence); + q->last_fence = NULL; + } +} + +/** + * xe_exec_queue_last_fence_put_unlocked() - Drop ref to last fence unlocked + * @q: The exec queue + * + * Only safe to be called from xe_exec_queue_destroy(). + */ +void xe_exec_queue_last_fence_put_unlocked(struct xe_exec_queue *q) +{ + if (q->last_fence) { + dma_fence_put(q->last_fence); + q->last_fence = NULL; + } +} + +/** + * xe_exec_queue_last_fence_get() - Get last fence + * @q: The exec queue + * @vm: The VM the engine does a bind or exec for + * + * Get last fence, does not take a ref + * + * Returns: last fence if not signaled, dma fence stub if signaled + */ +struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *q, + struct xe_vm *vm) +{ + xe_exec_queue_last_fence_lockdep_assert(q, vm); + + if (q->last_fence && + test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags)) + xe_exec_queue_last_fence_put(q, vm); + + return q->last_fence ? q->last_fence : dma_fence_get_stub(); +} + +/** + * xe_exec_queue_last_fence_set() - Set last fence + * @q: The exec queue + * @vm: The VM the engine does a bind or exec for + * @fence: The fence + * + * Set the last fence for the engine. Increases reference count for fence, when + * closing engine xe_exec_queue_last_fence_put should be called. + */ +void xe_exec_queue_last_fence_set(struct xe_exec_queue *q, struct xe_vm *vm, + struct dma_fence *fence) +{ + xe_exec_queue_last_fence_lockdep_assert(q, vm); + + xe_exec_queue_last_fence_put(q, vm); + q->last_fence = dma_fence_get(fence); +} diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h index 22499a2f522b..533da1b0c457 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.h +++ b/drivers/gpu/drm/xe/xe_exec_queue.h @@ -61,4 +61,11 @@ int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data, struct drm_file *file); enum xe_exec_queue_priority xe_exec_queue_device_get_max_priority(struct xe_device *xe); +void xe_exec_queue_last_fence_put(struct xe_exec_queue *e, struct xe_vm *vm); +void xe_exec_queue_last_fence_put_unlocked(struct xe_exec_queue *e); +struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *e, + struct xe_vm *vm); +void xe_exec_queue_last_fence_set(struct xe_exec_queue *e, struct xe_vm *vm, + struct dma_fence *fence); + #endif diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 4e382304010e..6826feb650f3 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -65,6 +65,12 @@ struct xe_exec_queue { /** @fence_irq: fence IRQ used to signal job completion */ struct xe_hw_fence_irq *fence_irq; + /** + * @last_fence: last fence on engine, protected by vm->lock in write + * mode if bind engine + */ + struct dma_fence *last_fence; + /* queue no longer allowed to submit */ #define EXEC_QUEUE_FLAG_BANNED BIT(0) /* queue used for kernel submission only */ diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 89df50f49e11..4c8d77c4c7c0 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1524,6 +1524,13 @@ void xe_vm_close_and_put(struct xe_vm *vm) if (xe_vm_in_compute_mode(vm)) flush_work(&vm->preempt.rebind_work); + down_write(&vm->lock); + for_each_tile(tile, xe, id) { + if (vm->q[id]) + xe_exec_queue_last_fence_put(vm->q[id], vm); + } + up_write(&vm->lock); + for_each_tile(tile, xe, id) { if (vm->q[id]) { xe_exec_queue_kill(vm->q[id]); @@ -1665,16 +1672,23 @@ u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile) tile_to_xe(tile)->pat.idx[XE_CACHE_WB]); } +static struct xe_exec_queue * +to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) +{ + return q ? q : vm->q[0]; +} + static struct dma_fence * xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q, struct xe_sync_entry *syncs, u32 num_syncs, bool first_op, bool last_op) { + struct xe_vm *vm = xe_vma_vm(vma); + struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q); struct xe_tile *tile; struct dma_fence *fence = NULL; struct dma_fence **fences = NULL; struct dma_fence_array *cf = NULL; - struct xe_vm *vm = xe_vma_vm(vma); int cur_fence = 0, i; int number_tiles = hweight8(vma->tile_present); int err; @@ -1727,7 +1741,8 @@ next: cf ? &cf->base : fence); } - return cf ? &cf->base : !fence ? dma_fence_get_stub() : fence; + return cf ? &cf->base : !fence ? + xe_exec_queue_last_fence_get(wait_exec_queue, vm) : fence; err_fences: if (fences) { @@ -1826,6 +1841,7 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, bool last_op) { struct dma_fence *fence; + struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q); xe_vm_assert_held(vm); @@ -1839,13 +1855,15 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); - fence = dma_fence_get_stub(); + fence = xe_exec_queue_last_fence_get(wait_exec_queue, vm); if (last_op) { for (i = 0; i < num_syncs; i++) xe_sync_entry_signal(&syncs[i], NULL, fence); } } + if (last_op) + xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence); if (last_op && xe_vm_sync_mode(vm, q)) dma_fence_wait(fence, true); dma_fence_put(fence); @@ -1878,6 +1896,7 @@ static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma, u32 num_syncs, bool first_op, bool last_op) { struct dma_fence *fence; + struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q); xe_vm_assert_held(vm); xe_bo_assert_held(xe_vma_bo(vma)); @@ -1887,6 +1906,8 @@ static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma, return PTR_ERR(fence); xe_vma_destroy(vma, fence); + if (last_op) + xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence); if (last_op && xe_vm_sync_mode(vm, q)) dma_fence_wait(fence, true); dma_fence_put(fence); @@ -2036,6 +2057,7 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma, struct xe_sync_entry *syncs, u32 num_syncs, bool first_op, bool last_op) { + struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q); int err; xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type)); @@ -2054,9 +2076,12 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma, /* Nothing to do, signal fences now */ if (last_op) { - for (i = 0; i < num_syncs; i++) - xe_sync_entry_signal(&syncs[i], NULL, - dma_fence_get_stub()); + for (i = 0; i < num_syncs; i++) { + struct dma_fence *fence = + xe_exec_queue_last_fence_get(wait_exec_queue, vm); + + xe_sync_entry_signal(&syncs[i], NULL, fence); + } } return 0; @@ -3108,8 +3133,12 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) unwind_ops: vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds); free_syncs: - for (i = 0; err == -ENODATA && i < num_syncs; i++) - xe_sync_entry_signal(&syncs[i], NULL, dma_fence_get_stub()); + for (i = 0; err == -ENODATA && i < num_syncs; i++) { + struct dma_fence *fence = + xe_exec_queue_last_fence_get(to_wait_exec_queue(vm, q), vm); + + xe_sync_entry_signal(&syncs[i], NULL, fence); + } while (num_syncs--) xe_sync_entry_cleanup(&syncs[num_syncs]); -- cgit v1.2.3 From 2519450aaa31948d27db0715c24398b2590517f1 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 20 Sep 2023 15:29:34 -0400 Subject: drm/xe/uapi: Replace useless 'instance' per unique gt_id Let's have a single GT ID per GT within the PCI Device Card. Signed-off-by: Rodrigo Vivi Signed-off-by: Francois Dugast Reviewed-by: Matthew Brost --- drivers/gpu/drm/xe/xe_gt_types.h | 2 +- drivers/gpu/drm/xe/xe_pci.c | 4 ---- drivers/gpu/drm/xe/xe_query.c | 2 +- include/uapi/drm/xe_drm.h | 4 ++-- 4 files changed, 4 insertions(+), 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index d4310be3e1e7..d3f2793684e2 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -105,7 +105,7 @@ struct xe_gt { struct { /** @type: type of GT */ enum xe_gt_type type; - /** @id: id of GT */ + /** @id: Unique ID of this GT within the PCI Device */ u8 id; /** @clock_freq: clock frequency */ u32 clock_freq; diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 9963772caabb..eec2b852c7aa 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -593,10 +593,6 @@ static int xe_info_init(struct xe_device *xe, return PTR_ERR(tile->primary_gt); gt = tile->primary_gt; - /* - * FIXME: GT numbering scheme may change depending on UAPI - * decisions. - */ gt->info.id = xe->info.gt_count++; gt->info.type = XE_GT_TYPE_MAIN; gt->info.__engine_mask = graphics_desc->hw_engine_mask; diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index cd3e0f3208a6..3bff06299e65 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -376,7 +376,7 @@ static int query_gts(struct xe_device *xe, struct drm_xe_device_query *query) gts->gts[id].type = XE_QUERY_GT_TYPE_REMOTE; else gts->gts[id].type = XE_QUERY_GT_TYPE_MAIN; - gts->gts[id].instance = id; + gts->gts[id].gt_id = gt->info.id; gts->gts[id].clock_freq = gt->info.clock_freq; if (!IS_DGFX(xe)) gts->gts[id].native_mem_regions = 0x1; diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 53b7b2ddf304..11bc4dc2c78c 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -348,8 +348,8 @@ struct drm_xe_query_gt { #define XE_QUERY_GT_TYPE_MEDIA 2 /** @type: GT type: Main, Remote, or Media */ __u16 type; - /** @instance: Instance of this GT in the GT list */ - __u16 instance; + /** @gt_id: Unique ID of this GT within the PCI Device */ + __u16 gt_id; /** @clock_freq: A clock frequency for timestamp */ __u32 clock_freq; /** @features: Reserved for future information about GT features */ -- cgit v1.2.3 From e16b48378527dbe2f200b792922f59a2bf038507 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 20 Sep 2023 15:29:36 -0400 Subject: drm/xe/uapi: Rename gts to gt_list During the uapi review it was identified a possible confusion with the plural of acronym with a new acronym. So the recommendation is to go with gt_list instead. Suggested-by: Matt Roper Signed-off-by: Rodrigo Vivi Reviewed-by: Matt Roper Signed-off-by: Francois Dugast --- drivers/gpu/drm/xe/xe_query.c | 40 ++++++++++++++++++++-------------------- include/uapi/drm/xe_drm.h | 18 +++++++++--------- 2 files changed, 29 insertions(+), 29 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 3bff06299e65..d37c75a0b028 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -347,14 +347,14 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query) return 0; } -static int query_gts(struct xe_device *xe, struct drm_xe_device_query *query) +static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query) { struct xe_gt *gt; - size_t size = sizeof(struct drm_xe_query_gts) + + size_t size = sizeof(struct drm_xe_query_gt_list) + xe->info.gt_count * sizeof(struct drm_xe_query_gt); - struct drm_xe_query_gts __user *query_ptr = + struct drm_xe_query_gt_list __user *query_ptr = u64_to_user_ptr(query->data); - struct drm_xe_query_gts *gts; + struct drm_xe_query_gt_list *gt_list; u8 id; if (query->size == 0) { @@ -364,34 +364,34 @@ static int query_gts(struct xe_device *xe, struct drm_xe_device_query *query) return -EINVAL; } - gts = kzalloc(size, GFP_KERNEL); - if (!gts) + gt_list = kzalloc(size, GFP_KERNEL); + if (!gt_list) return -ENOMEM; - gts->num_gt = xe->info.gt_count; + gt_list->num_gt = xe->info.gt_count; for_each_gt(gt, xe, id) { if (xe_gt_is_media_type(gt)) - gts->gts[id].type = XE_QUERY_GT_TYPE_MEDIA; + gt_list->gt_list[id].type = XE_QUERY_GT_TYPE_MEDIA; else if (gt_to_tile(gt)->id > 0) - gts->gts[id].type = XE_QUERY_GT_TYPE_REMOTE; + gt_list->gt_list[id].type = XE_QUERY_GT_TYPE_REMOTE; else - gts->gts[id].type = XE_QUERY_GT_TYPE_MAIN; - gts->gts[id].gt_id = gt->info.id; - gts->gts[id].clock_freq = gt->info.clock_freq; + gt_list->gt_list[id].type = XE_QUERY_GT_TYPE_MAIN; + gt_list->gt_list[id].gt_id = gt->info.id; + gt_list->gt_list[id].clock_freq = gt->info.clock_freq; if (!IS_DGFX(xe)) - gts->gts[id].native_mem_regions = 0x1; + gt_list->gt_list[id].native_mem_regions = 0x1; else - gts->gts[id].native_mem_regions = + gt_list->gt_list[id].native_mem_regions = BIT(gt_to_tile(gt)->id) << 1; - gts->gts[id].slow_mem_regions = xe->info.mem_region_mask ^ - gts->gts[id].native_mem_regions; + gt_list->gt_list[id].slow_mem_regions = xe->info.mem_region_mask ^ + gt_list->gt_list[id].native_mem_regions; } - if (copy_to_user(query_ptr, gts, size)) { - kfree(gts); + if (copy_to_user(query_ptr, gt_list, size)) { + kfree(gt_list); return -EFAULT; } - kfree(gts); + kfree(gt_list); return 0; } @@ -503,7 +503,7 @@ static int (* const xe_query_funcs[])(struct xe_device *xe, query_engines, query_memory_usage, query_config, - query_gts, + query_gt_list, query_hwconfig, query_gt_topology, query_engine_cycles, diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 538873361d17..b02a63270972 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -337,7 +337,7 @@ struct drm_xe_query_config { /** * struct drm_xe_query_gt - describe an individual GT. * - * To be used with drm_xe_query_gts, which will return a list with all the + * To be used with drm_xe_query_gt_list, which will return a list with all the * existing GT individual descriptions. * Graphics Technology (GT) is a subset of a GPU/tile that is responsible for * implementing graphics and/or media operations. @@ -374,19 +374,19 @@ struct drm_xe_query_gt { }; /** - * struct drm_xe_query_gts - describe GTs + * struct drm_xe_query_gt_list - A list with GT description items. * * If a query is made with a struct drm_xe_device_query where .query - * is equal to DRM_XE_DEVICE_QUERY_GTS, then the reply uses struct - * drm_xe_query_gts in .data. + * is equal to DRM_XE_DEVICE_QUERY_GT_LIST, then the reply uses struct + * drm_xe_query_gt_list in .data. */ -struct drm_xe_query_gts { - /** @num_gt: number of GTs returned in gts */ +struct drm_xe_query_gt_list { + /** @num_gt: number of GT items returned in gt_list */ __u32 num_gt; /** @pad: MBZ */ __u32 pad; - /** @gts: The GT list returned for this device */ - struct drm_xe_query_gt gts[]; + /** @gt_list: The GT list returned for this device */ + struct drm_xe_query_gt gt_list[]; }; /** @@ -479,7 +479,7 @@ struct drm_xe_device_query { #define DRM_XE_DEVICE_QUERY_ENGINES 0 #define DRM_XE_DEVICE_QUERY_MEM_USAGE 1 #define DRM_XE_DEVICE_QUERY_CONFIG 2 -#define DRM_XE_DEVICE_QUERY_GTS 3 +#define DRM_XE_DEVICE_QUERY_GT_LIST 3 #define DRM_XE_DEVICE_QUERY_HWCONFIG 4 #define DRM_XE_DEVICE_QUERY_GT_TOPOLOGY 5 #define DRM_XE_DEVICE_QUERY_ENGINE_CYCLES 6 -- cgit v1.2.3 From e48d146456e34625c6edafd6350bfaac5004727c Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Wed, 20 Sep 2023 15:29:37 -0400 Subject: drm/xe/uapi: Fix naming of XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY This is used for the priority of an exec queue (not an engine) and should be named accordingly. Signed-off-by: Francois Dugast Signed-off-by: Rodrigo Vivi Reviewed-by: Matthew Brost --- drivers/gpu/drm/xe/xe_query.c | 2 +- include/uapi/drm/xe_drm.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index d37c75a0b028..10b9878ec95a 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -335,7 +335,7 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query) config->info[XE_QUERY_CONFIG_GT_COUNT] = xe->info.gt_count; config->info[XE_QUERY_CONFIG_MEM_REGION_COUNT] = hweight_long(xe->info.mem_region_mask); - config->info[XE_QUERY_CONFIG_MAX_ENGINE_PRIORITY] = + config->info[XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY] = xe_exec_queue_device_get_max_priority(xe); if (copy_to_user(query_ptr, config, size)) { diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index b02a63270972..24bf8f0f52e8 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -328,8 +328,8 @@ struct drm_xe_query_config { #define XE_QUERY_CONFIG_VA_BITS 3 #define XE_QUERY_CONFIG_GT_COUNT 4 #define XE_QUERY_CONFIG_MEM_REGION_COUNT 5 -#define XE_QUERY_CONFIG_MAX_ENGINE_PRIORITY 6 -#define XE_QUERY_CONFIG_NUM_PARAM (XE_QUERY_CONFIG_MAX_ENGINE_PRIORITY + 1) +#define XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY 6 +#define XE_QUERY_CONFIG_NUM_PARAM (XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY + 1) /** @info: array of elements containing the config info */ __u64 info[]; }; -- cgit v1.2.3 From 9b49762740e3f2c240877437116635e73718cd47 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Wed, 4 Oct 2023 10:33:41 -0700 Subject: drm/xe/guc: Bump PVC GuC version to 70.9.1 The PVC GuC version that we're currently using (70.6.4) has a known issue that leads to dropping the disabling of contexts that have pending page faults. This is fixed in newer blobs, so we need to update to a more recent release. Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/696 Signed-off-by: Daniele Ceraolo Spurio Cc: John Harrison Reviewed-by: John Harrison Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_uc_fw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 2ba0466bc45a..32782a52c07f 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -103,7 +103,7 @@ struct fw_blobs_by_type { #define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver) \ fw_def(LUNARLAKE, mmp_ver(xe, guc, lnl, 70, 6, 8)) \ fw_def(METEORLAKE, major_ver(i915, guc, mtl, 70, 7)) \ - fw_def(PVC, mmp_ver(xe, guc, pvc, 70, 6, 4)) \ + fw_def(PVC, mmp_ver(xe, guc, pvc, 70, 9, 1)) \ fw_def(DG2, major_ver(i915, guc, dg2, 70, 5)) \ fw_def(DG1, major_ver(i915, guc, dg1, 70, 5)) \ fw_def(ALDERLAKE_N, major_ver(i915, guc, tgl, 70, 5)) \ -- cgit v1.2.3 From bf6d941c06c9681d0f3d8380e7093d7f79d3eef6 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 18 Oct 2023 13:34:24 +0100 Subject: drm/xe: fix pat[2] programming with 2M/1G pages Bit 7 in the leaf node is normally programmed with pat[2], however with 2M/1G pages that same bit in the PDE/PDPE also toggles 2M/1G pages. For 2M/1G entries the pat[2] is rather moved to bit 12, which is now free given that the address must be aligned to 2M or 1G, leaving bit 7 for toggling 2M/1G pages. Bspec: 59510, 45038 Signed-off-by: Matthew Auld Cc: Lucas De Marchi Cc: Matt Roper Reviewed-by: Matthew Brost Reviewed-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.h | 1 + drivers/gpu/drm/xe/xe_vm.c | 17 +++++++++++------ 2 files changed, 12 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index ba6ffd359ff7..3f4e2818f92c 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -50,6 +50,7 @@ #define XELPG_PPGTT_PTE_PAT3 BIT_ULL(62) #define XE2_PPGTT_PTE_PAT4 BIT_ULL(61) +#define XE_PPGTT_PDE_PDPE_PAT2 BIT_ULL(12) #define XE_PPGTT_PTE_PAT2 BIT_ULL(7) #define XE_PPGTT_PTE_PAT1 BIT_ULL(4) #define XE_PPGTT_PTE_PAT0 BIT_ULL(3) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 4c8d77c4c7c0..05f8c691f5fb 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1229,7 +1229,8 @@ static u64 pde_encode_pat_index(struct xe_device *xe, u16 pat_index) return pte; } -static u64 pte_encode_pat_index(struct xe_device *xe, u16 pat_index) +static u64 pte_encode_pat_index(struct xe_device *xe, u16 pat_index, + u32 pt_level) { u64 pte = 0; @@ -1239,8 +1240,12 @@ static u64 pte_encode_pat_index(struct xe_device *xe, u16 pat_index) if (pat_index & BIT(1)) pte |= XE_PPGTT_PTE_PAT1; - if (pat_index & BIT(2)) - pte |= XE_PPGTT_PTE_PAT2; + if (pat_index & BIT(2)) { + if (pt_level) + pte |= XE_PPGTT_PDE_PDPE_PAT2; + else + pte |= XE_PPGTT_PTE_PAT2; + } if (pat_index & BIT(3)) pte |= XELPG_PPGTT_PTE_PAT3; @@ -1284,7 +1289,7 @@ static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); pte |= XE_PAGE_PRESENT | XE_PAGE_RW; - pte |= pte_encode_pat_index(xe, pat_index); + pte |= pte_encode_pat_index(xe, pat_index, pt_level); pte |= pte_encode_ps(pt_level); if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) @@ -1303,7 +1308,7 @@ static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma, if (likely(!xe_vma_read_only(vma))) pte |= XE_PAGE_RW; - pte |= pte_encode_pat_index(xe, pat_index); + pte |= pte_encode_pat_index(xe, pat_index, pt_level); pte |= pte_encode_ps(pt_level); if (unlikely(xe_vma_is_null(vma))) @@ -1323,7 +1328,7 @@ static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr, pte = addr; pte |= XE_PAGE_PRESENT | XE_PAGE_RW; - pte |= pte_encode_pat_index(xe, pat_index); + pte |= pte_encode_pat_index(xe, pat_index, pt_level); pte |= pte_encode_ps(pt_level); if (devmem) -- cgit v1.2.3 From c85d36be2993d65cfd678e01659ff69a4a803cad Mon Sep 17 00:00:00 2001 From: Brian Welty Date: Tue, 26 Sep 2023 13:59:37 -0700 Subject: drm/xe: Simplify xe_res_get_buddy() We can remove the unnecessary indirection thru xe->tiles[] to get the TTM VRAM manager. This code can be common for VRAM and STOLEN. Signed-off-by: Brian Welty Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_res_cursor.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_res_cursor.h b/drivers/gpu/drm/xe/xe_res_cursor.h index 5cb4b66a5d74..006fc1361967 100644 --- a/drivers/gpu/drm/xe/xe_res_cursor.h +++ b/drivers/gpu/drm/xe/xe_res_cursor.h @@ -50,14 +50,9 @@ struct xe_res_cursor { static struct drm_buddy *xe_res_get_buddy(struct ttm_resource *res) { - struct xe_device *xe = ttm_to_xe_device(res->bo->bdev); struct ttm_resource_manager *mgr; - if (res->mem_type != XE_PL_STOLEN) - return &xe->tiles[res->mem_type - XE_PL_VRAM0].mem.vram_mgr->mm; - - mgr = ttm_manager_type(&xe->ttm, XE_PL_STOLEN); - + mgr = ttm_manager_type(res->bo->bdev, res->mem_type); return &to_xe_ttm_vram_mgr(mgr)->mm; } -- cgit v1.2.3 From bad3644dd8d5b118cdf64dfc71ef9540ee288ddc Mon Sep 17 00:00:00 2001 From: Dnyaneshwar Bhadane Date: Tue, 24 Oct 2023 15:07:37 -0700 Subject: drm/xe/xe2: Add initial workarounds Add the initial collection of gt/engine/lrc workarounds. While at it, add some newlines around the platform/IP comments to make them consistent across all workarounds. v2: - FF_MODE is an MCR register (Matt Roper) - Group 18032247524 with other Xe2 workarounds (Matt Roper) - Move WA changing PSS_CHICKEN to lrc_was[] as for Xe2 that register is part of the render context image (Matt Roper) - Apply WA 16020518922 only on render engine (Matt Roper) Signed-off-by: Dnyaneshwar Bhadane Signed-off-by: Shekhar Chauhan Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20231024220739.224251-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 24 ++++++++++++ drivers/gpu/drm/xe/xe_wa.c | 74 ++++++++++++++++++++++++++++++++++++ 2 files changed, 98 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index cd1821d96a5d..5ad75011aa70 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -94,7 +94,14 @@ #define CHICKEN_RASTER_2 XE_REG_MCR(0x6208, XE_REG_OPTION_MASKED) #define TBIMR_FAST_CLIP REG_BIT(5) +#define FF_MODE XE_REG_MCR(0x6210) +#define DIS_TE_AUTOSTRIP REG_BIT(31) +#define DIS_MESH_PARTIAL_AUTOSTRIP REG_BIT(16) +#define DIS_MESH_AUTOSTRIP REG_BIT(15) + #define VFLSKPD XE_REG_MCR(0x62a8, XE_REG_OPTION_MASKED) +#define DIS_PARTIAL_AUTOSTRIP REG_BIT(9) +#define DIS_AUTOSTRIP REG_BIT(6) #define DIS_OVER_FETCH_CACHE REG_BIT(1) #define DIS_MULT_MISS_RD_SQUASH REG_BIT(0) @@ -111,6 +118,9 @@ #define XEHP_PSS_MODE2 XE_REG_MCR(0x703c, XE_REG_OPTION_MASKED) #define SCOREBOARD_STALL_FLUSH_CONTROL REG_BIT(5) +#define XEHP_PSS_CHICKEN XE_REG_MCR(0x7044, XE_REG_OPTION_MASKED) +#define FD_END_COLLECT REG_BIT(5) + #define HIZ_CHICKEN XE_REG(0x7018, XE_REG_OPTION_MASKED) #define DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE REG_BIT(14) #define HZ_DEPTH_TEST_LE_GE_OPT_DISABLE REG_BIT(13) @@ -133,6 +143,9 @@ #define VF_PREEMPTION XE_REG(0x83a4, XE_REG_OPTION_MASKED) #define PREEMPTION_VERTEX_COUNT REG_GENMASK(15, 0) +#define VF_SCRATCHPAD XE_REG(0x83a8, XE_REG_OPTION_MASKED) +#define XE2_VFG_TED_CREDIT_INTERFACE_DISABLE REG_BIT(13) + #define VFG_PREEMPTION_CHICKEN XE_REG(0x83b4, XE_REG_OPTION_MASKED) #define POLYGON_TRIFAN_LINELOOP_DISABLE REG_BIT(4) @@ -225,6 +238,7 @@ #define MSCUNIT_CLKGATE_DIS REG_BIT(10) #define RCCUNIT_CLKGATE_DIS REG_BIT(7) #define SARBUNIT_CLKGATE_DIS REG_BIT(5) +#define SBEUNIT_CLKGATE_DIS REG_BIT(4) #define UNSLICE_UNIT_LEVEL_CLKGATE2 XE_REG(0x94e4) #define VSUNIT_CLKGATE2_DIS REG_BIT(19) @@ -276,6 +290,8 @@ #define XEHP_L3SCQREG7 XE_REG_MCR(0xb188) #define BLEND_FILL_CACHING_OPT_DIS REG_BIT(3) +#define XEHPC_L3CLOS_MASK(i) XE_REG_MCR(0xb194 + (i) * 8) + #define XEHP_MERT_MOD_CTRL XE_REG_MCR(0xcf28) #define RENDER_MOD_CTRL XE_REG_MCR(0xcf2c) #define COMP_MOD_CTRL XE_REG_MCR(0xcf30) @@ -299,6 +315,9 @@ #define XE_OAG_BLT_BUSY_FREE XE_REG(0xdbbc) #define XE_OAG_RENDER_BUSY_FREE XE_REG(0xdbdc) +#define HALF_SLICE_CHICKEN5 XE_REG_MCR(0xe188, XE_REG_OPTION_MASKED) +#define DISABLE_SAMPLE_G_PERFORMANCE REG_BIT(0) + #define SAMPLER_MODE XE_REG_MCR(0xe18c, XE_REG_OPTION_MASKED) #define ENABLE_SMALLPL REG_BIT(15) #define SC_DISABLE_POWER_OPTIMIZATION_EBB REG_BIT(9) @@ -328,6 +347,7 @@ #define ROW_CHICKEN XE_REG_MCR(0xe4f0, XE_REG_OPTION_MASKED) #define UGM_BACKUP_MODE REG_BIT(13) #define MDQ_ARBITRATION_MODE REG_BIT(12) +#define EARLY_EOT_DIS REG_BIT(1) #define ROW_CHICKEN2 XE_REG_MCR(0xe4f4, XE_REG_OPTION_MASKED) #define DISABLE_READ_SUPPRESSION REG_BIT(15) @@ -345,11 +365,15 @@ #define LSC_CHICKEN_BIT_0 XE_REG_MCR(0xe7c8) #define DISABLE_D8_D16_COASLESCE REG_BIT(30) +#define TGM_WRITE_EOM_FORCE REG_BIT(17) #define FORCE_1_SUB_MESSAGE_PER_FRAGMENT REG_BIT(15) +#define SEQUENTIAL_ACCESS_UPGRADE_DISABLE REG_BIT(13) #define LSC_CHICKEN_BIT_0_UDW XE_REG_MCR(0xe7c8 + 4) #define UGM_FRAGMENT_THRESHOLD_TO_3 REG_BIT(58 - 32) #define DIS_CHAIN_2XSIMD8 REG_BIT(55 - 32) +#define XE2_ALLOC_DPA_STARVE_FIX_DIS REG_BIT(47 - 32) +#define ENABLE_SMP_LD_RENDER_SURFACE_CONTROL REG_BIT(44 - 32) #define FORCE_SLM_FENCE_SCOPE_TO_TILE REG_BIT(42 - 32) #define FORCE_UGM_FENCE_SCOPE_TO_TILE REG_BIT(41 - 32) #define MAXREQS_PER_BANK REG_GENMASK(39 - 32, 37 - 32) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 1450af6cab34..ccb075aac7da 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -232,6 +232,7 @@ static const struct xe_rtp_entry_sr gt_was[] = { }, /* Xe_LPG */ + { XE_RTP_NAME("14015795083"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271), GRAPHICS_STEP(A0, B0)), XE_RTP_ACTIONS(CLR(MISCCPCTL, DOP_CLOCK_GATE_RENDER_ENABLE)) @@ -245,6 +246,20 @@ static const struct xe_rtp_entry_sr gt_was[] = { XE_RTP_ACTIONS(SET(SQCNT1, ENFORCE_RAR)) }, + /* Xe2_LPG */ + + { XE_RTP_NAME("16020975621"), + XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0)), + XE_RTP_ACTIONS(SET(XEHP_SLICE_UNIT_LEVEL_CLKGATE, SBEUNIT_CLKGATE_DIS)) + }, + { XE_RTP_NAME("14018157293"), + XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0)), + XE_RTP_ACTIONS(SET(XEHPC_L3CLOS_MASK(0), ~0), + SET(XEHPC_L3CLOS_MASK(1), ~0), + SET(XEHPC_L3CLOS_MASK(2), ~0), + SET(XEHPC_L3CLOS_MASK(3), ~0)) + }, + {} }; @@ -527,6 +542,7 @@ static const struct xe_rtp_entry_sr engine_was[] = { }, /* Xe_LPG */ + { XE_RTP_NAME("14017856879"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271), FUNC(xe_rtp_match_first_render_or_compute)), @@ -539,6 +555,41 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_NOCHECK)) }, + /* Xe2_LPG */ + + { XE_RTP_NAME("18032247524"), + XE_RTP_RULES(GRAPHICS_VERSION(2004), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, SEQUENTIAL_ACCESS_UPGRADE_DISABLE)) + }, + { XE_RTP_NAME("16018712365"), + XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, XE2_ALLOC_DPA_STARVE_FIX_DIS)) + }, + { XE_RTP_NAME("14018957109"), + XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN5, DISABLE_SAMPLE_G_PERFORMANCE)) + }, + { XE_RTP_NAME("16021540221"), + XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(ROW_CHICKEN4, DISABLE_TDL_PUSH)) + }, + { XE_RTP_NAME("14019322943"), + XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, TGM_WRITE_EOM_FORCE)) + }, + { XE_RTP_NAME("14018471104"), + XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, ENABLE_SMP_LD_RENDER_SURFACE_CONTROL)) + }, + { XE_RTP_NAME("16018737384"), + XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(ROW_CHICKEN, EARLY_EOT_DIS)) + }, + {} }; @@ -625,11 +676,34 @@ static const struct xe_rtp_entry_sr lrc_was[] = { }, /* Xe_LPG */ + { XE_RTP_NAME("18019271663"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271)), XE_RTP_ACTIONS(SET(CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE)) }, + /* Xe2_LPG */ + + { XE_RTP_NAME("16020518922"), + XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0), + ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(FF_MODE, + DIS_TE_AUTOSTRIP | + DIS_MESH_PARTIAL_AUTOSTRIP | + DIS_MESH_AUTOSTRIP), + SET(VFLSKPD, + DIS_PARTIAL_AUTOSTRIP | + DIS_AUTOSTRIP)) + }, + { XE_RTP_NAME("14019386621"), + XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(VF_SCRATCHPAD, XE2_VFG_TED_CREDIT_INTERFACE_DISABLE)) + }, + { XE_RTP_NAME("14019877138"), + XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT)) + }, + {} }; -- cgit v1.2.3 From f6c39feed02117db5dfe988321a1a4dee2a9a3e2 Mon Sep 17 00:00:00 2001 From: Shekhar Chauhan Date: Tue, 24 Oct 2023 15:07:38 -0700 Subject: drm/xe: Add performance tuning settings for MTL and Xe2 Add L3SQCREG5 as part of HW recommended settings. The recommended value in Bspec is 00e0007f. For Xe2-LPG, bits 23:21 don't exist anymore, but it's confirmed with HW engineers that setting them doesn't do anything. They still exist on the media GT, Xe2-LPM, but they are already they are already set as per HW default value. So for Xe2 platform, the only bits that need to be set are 9:0 since HW's default is 0x1ff and the recommended value is 0x7f. Unlike most registers, which have the same relative offset on both the primary and media GT, this register has a different base offset on the media GT. On MTL the register only exists for the primary (graphics) GT, so there's no need to program it on the media gt. Also, it's part of the RCS engine's context, so it needs to be added as a LRC workaround. Bspec: 72161 Signed-off-by: Shekhar Chauhan Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20231024220739.224251-2-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 2 ++ drivers/gpu/drm/xe/xe_tuning.c | 23 +++++++++++++++++++++++ 2 files changed, 25 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 5ad75011aa70..55ceadfc30b0 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -292,6 +292,8 @@ #define XEHPC_L3CLOS_MASK(i) XE_REG_MCR(0xb194 + (i) * 8) +#define XE2LPM_L3SQCREG5 XE_REG_MCR(0xb658) + #define XEHP_MERT_MOD_CTRL XE_REG_MCR(0xcf28) #define RENDER_MOD_CTRL XE_REG_MCR(0xcf2c) #define COMP_MOD_CTRL XE_REG_MCR(0xcf30) diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index d70519816522..53ccd338fd8c 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -24,6 +24,20 @@ static const struct xe_rtp_entry_sr gt_tunings[] = { XE_RTP_RULES(PLATFORM(DG2)), XE_RTP_ACTIONS(SET(XEHP_SQCM, EN_32B_ACCESS)) }, + + /* Xe2 */ + + { XE_RTP_NAME("Tuning: L3 cache"), + XE_RTP_RULES(GRAPHICS_VERSION(2004)), + XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, + REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f))) + }, + { XE_RTP_NAME("Tuning: L3 cache - media"), + XE_RTP_RULES(MEDIA_VERSION(2000)), + XE_RTP_ACTIONS(FIELD_SET(XE2LPM_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, + REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f))) + }, + {} }; @@ -63,6 +77,15 @@ static const struct xe_rtp_entry_sr lrc_tunings[] = { XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(CHICKEN_RASTER_2, TBIMR_FAST_CLIP)) }, + + /* Xe_LPG */ + + { XE_RTP_NAME("Tuning: L3 cache"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, + REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f))) + }, + {} }; -- cgit v1.2.3 From 8656ea9ae8b488ac25fdd332c60e6fd805cde171 Mon Sep 17 00:00:00 2001 From: Balasubramani Vivekanandan Date: Thu, 19 Oct 2023 15:01:39 +0530 Subject: drm/xe: Add event tracing for CTB Event tracing enabled for CTB submissions. Additional minor refactor - Removed a unnecessary ct_to_xe() call. v2: Remove a unwanted comment (Hari) Add missing change to commit message Signed-off-by: Balasubramani Vivekanandan Reviewed-by: Haridhar Kalvala Link: https://lore.kernel.org/intel-xe/20231019093140.1901665-2-balasubramani.vivekanandan@intel.com/ Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_ct.c | 13 +++++++++++-- drivers/gpu/drm/xe/xe_trace.h | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 8b686c8b3339..a84e111bb36a 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -460,7 +460,7 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len, /* Write H2G ensuring visable before descriptor update */ xe_map_memcpy_to(xe, &map, 0, cmd, H2G_CT_HEADERS * sizeof(u32)); xe_map_memcpy_to(xe, &map, H2G_CT_HEADERS * sizeof(u32), action, len * sizeof(u32)); - xe_device_wmb(ct_to_xe(ct)); + xe_device_wmb(xe); /* Update local copies */ h2g->info.tail = (tail + full_len) % h2g->info.size; @@ -469,6 +469,9 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len, /* Update descriptor */ desc_write(xe, h2g, tail, h2g->info.tail); + trace_xe_guc_ctb_h2g(ct_to_gt(ct)->info.id, *(action - 1), full_len, + desc_read(xe, h2g, head), h2g->info.tail); + return 0; } @@ -934,6 +937,7 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path) struct guc_ctb *g2h = &ct->ctbs.g2h; u32 tail, head, len; s32 avail; + u32 action; lockdep_assert_held(&ct->fast_lock); @@ -984,11 +988,13 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path) avail * sizeof(u32)); } + action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1]); + if (fast_path) { if (FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[1]) != GUC_HXG_TYPE_EVENT) return 0; - switch (FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1])) { + switch (action) { case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC: case XE_GUC_ACTION_TLB_INVALIDATION_DONE: break; /* Process these in fast-path */ @@ -1001,6 +1007,9 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path) g2h->info.head = (head + avail) % g2h->info.size; desc_write(xe, g2h, head, g2h->info.head); + trace_xe_guc_ctb_g2h(ct_to_gt(ct)->info.id, action, len, + g2h->info.head, tail); + return len; } diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index 5ea458dadf69..c43bb42aca5b 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -521,6 +521,7 @@ DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_exit, TRACE_EVENT(xe_guc_ct_h2g_flow_control, TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len), TP_ARGS(_head, _tail, size, space, len), +/* GuC */ TP_STRUCT__entry( __field(u32, _head) @@ -568,6 +569,46 @@ TRACE_EVENT(xe_guc_ct_g2h_flow_control, __entry->space, __entry->len) ); +DECLARE_EVENT_CLASS(xe_guc_ctb, + TP_PROTO(u8 gt_id, u32 action, u32 len, u32 _head, u32 tail), + TP_ARGS(gt_id, action, len, _head, tail), + + TP_STRUCT__entry( + __field(u8, gt_id) + __field(u32, action) + __field(u32, len) + __field(u32, tail) + __field(u32, _head) + ), + + TP_fast_assign( + __entry->gt_id = gt_id; + __entry->action = action; + __entry->len = len; + __entry->tail = tail; + __entry->_head = _head; + ), + + TP_printk("gt%d: H2G CTB: action=0x%x, len=%d, tail=%d, head=%d\n", + __entry->gt_id, __entry->action, __entry->len, + __entry->tail, __entry->_head) +); + +DEFINE_EVENT(xe_guc_ctb, xe_guc_ctb_h2g, + TP_PROTO(u8 gt_id, u32 action, u32 len, u32 _head, u32 tail), + TP_ARGS(gt_id, action, len, _head, tail) +); + +DEFINE_EVENT_PRINT(xe_guc_ctb, xe_guc_ctb_g2h, + TP_PROTO(u8 gt_id, u32 action, u32 len, u32 _head, u32 tail), + TP_ARGS(gt_id, action, len, _head, tail), + + TP_printk("gt%d: G2H CTB: action=0x%x, len=%d, tail=%d, head=%d\n", + __entry->gt_id, __entry->action, __entry->len, + __entry->tail, __entry->_head) + +); + #endif /* This part must be outside protection */ -- cgit v1.2.3 From 0d0dda27cf066d1e7537a815fb3990be04cff6bd Mon Sep 17 00:00:00 2001 From: Balasubramani Vivekanandan Date: Thu, 19 Oct 2023 15:01:40 +0530 Subject: drm/xe/trace: Optimize trace definition Make use of EVENT_CLASS to group similar trace events Signed-off-by: Balasubramani Vivekanandan Reviewed-by: Haridhar Kalvala Link: https://lore.kernel.org/intel-xe/20231019093140.1901665-3-balasubramani.vivekanandan@intel.com/ Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_trace.h | 83 +++++++++++++++++++------------------------ 1 file changed, 36 insertions(+), 47 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index c43bb42aca5b..f13cc9a35e91 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -518,55 +518,44 @@ DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_exit, TP_ARGS(vm) ); -TRACE_EVENT(xe_guc_ct_h2g_flow_control, - TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len), - TP_ARGS(_head, _tail, size, space, len), /* GuC */ +DECLARE_EVENT_CLASS(xe_guc_ct_flow_control, + TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len), + TP_ARGS(_head, _tail, size, space, len), - TP_STRUCT__entry( - __field(u32, _head) - __field(u32, _tail) - __field(u32, size) - __field(u32, space) - __field(u32, len) - ), - - TP_fast_assign( - __entry->_head = _head; - __entry->_tail = _tail; - __entry->size = size; - __entry->space = space; - __entry->len = len; - ), - - TP_printk("head=%u, tail=%u, size=%u, space=%u, len=%u", - __entry->_head, __entry->_tail, __entry->size, - __entry->space, __entry->len) -); - -TRACE_EVENT(xe_guc_ct_g2h_flow_control, - TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len), - TP_ARGS(_head, _tail, size, space, len), - - TP_STRUCT__entry( - __field(u32, _head) - __field(u32, _tail) - __field(u32, size) - __field(u32, space) - __field(u32, len) - ), - - TP_fast_assign( - __entry->_head = _head; - __entry->_tail = _tail; - __entry->size = size; - __entry->space = space; - __entry->len = len; - ), - - TP_printk("head=%u, tail=%u, size=%u, space=%u, len=%u", - __entry->_head, __entry->_tail, __entry->size, - __entry->space, __entry->len) + TP_STRUCT__entry( + __field(u32, _head) + __field(u32, _tail) + __field(u32, size) + __field(u32, space) + __field(u32, len) + ), + + TP_fast_assign( + __entry->_head = _head; + __entry->_tail = _tail; + __entry->size = size; + __entry->space = space; + __entry->len = len; + ), + + TP_printk("h2g flow control: head=%u, tail=%u, size=%u, space=%u, len=%u", + __entry->_head, __entry->_tail, __entry->size, + __entry->space, __entry->len) +); + +DEFINE_EVENT(xe_guc_ct_flow_control, xe_guc_ct_h2g_flow_control, + TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len), + TP_ARGS(_head, _tail, size, space, len) +); + +DEFINE_EVENT_PRINT(xe_guc_ct_flow_control, xe_guc_ct_g2h_flow_control, + TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len), + TP_ARGS(_head, _tail, size, space, len), + + TP_printk("g2h flow control: head=%u, tail=%u, size=%u, space=%u, len=%u", + __entry->_head, __entry->_tail, __entry->size, + __entry->space, __entry->len) ); DECLARE_EVENT_CLASS(xe_guc_ctb, -- cgit v1.2.3 From b1543a494c52102f9f5ad29d3dc38d29c7fcfcc4 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 25 Oct 2023 08:17:34 -0700 Subject: drm/xe: Prepare to emit non-register state while recording default LRC On some platforms we need to emit some non-register state while recording an engine class' default LRC. Add the infrastructure to support this; actual per-platform tables will be added in future patches. v2: - Checkpatch whitespace fix - Add extra assertion to ensure num_dw != 0. (Bala) Reviewed-by: Balasubramani Vivekanandan Link: https://lore.kernel.org/r/20231025151732.3461842-6-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt.c | 10 +++++++++- drivers/gpu/drm/xe/xe_lrc.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_lrc.h | 3 +++ 3 files changed, 57 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 74e1f47bd401..8618275b1877 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -136,7 +136,13 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) long timeout; int count = 0; - bb = xe_bb_new(gt, SZ_4K, false); /* Just pick a large BB size */ + if (q->hwe->class == XE_ENGINE_CLASS_RENDER) + /* Big enough to emit all of the context's 3DSTATE */ + bb = xe_bb_new(gt, xe_lrc_size(gt_to_xe(gt), q->hwe->class), false); + else + /* Just pick a large BB size */ + bb = xe_bb_new(gt, SZ_4K, false); + if (IS_ERR(bb)) return PTR_ERR(bb); @@ -173,6 +179,8 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) } } + xe_lrc_emit_hwe_state_instructions(q, bb); + job = xe_bb_create_job(q, bb); if (IS_ERR(job)) { xe_bb_free(bb, NULL); diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 332fc0602074..184707223098 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -12,6 +12,7 @@ #include "regs/xe_gt_regs.h" #include "regs/xe_lrc_layout.h" #include "regs/xe_regs.h" +#include "xe_bb.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_drm_client.h" @@ -1108,3 +1109,47 @@ void xe_lrc_dump_default(struct drm_printer *p, remaining_dw -= num_dw; } } + +struct instr_state { + u32 instr; + u16 num_dw; +}; + +void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb) +{ + struct xe_gt *gt = q->hwe->gt; + struct xe_device *xe = gt_to_xe(gt); + const struct instr_state *state_table = NULL; + int state_table_size = 0; + + /* + * At the moment we only need to emit non-register state for the RCS + * engine. + */ + if (q->hwe->class != XE_ENGINE_CLASS_RENDER) + return; + + switch (GRAPHICS_VERx100(xe)) { + default: + xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n", + GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100); + return; + } + + for (int i = 0; i < state_table_size; i++) { + u32 instr = state_table[i].instr; + u16 num_dw = state_table[i].num_dw; + bool is_single_dw = ((instr & GFXPIPE_PIPELINE) == PIPELINE_SINGLE_DW); + + xe_gt_assert(gt, (instr & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE); + xe_gt_assert(gt, num_dw != 0); + xe_gt_assert(gt, is_single_dw ^ (num_dw > 1)); + + bb->cs[bb->len] = instr; + if (!is_single_dw) + bb->cs[bb->len] |= (num_dw - 2); + + bb->len += num_dw; + } +} + diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index a7056eda5e0c..28b1d3f404d4 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -8,6 +8,7 @@ #include "xe_lrc_types.h" struct drm_printer; +struct xe_bb; struct xe_device; struct xe_exec_queue; enum xe_engine_class; @@ -52,4 +53,6 @@ void xe_lrc_dump_default(struct drm_printer *p, struct xe_gt *gt, enum xe_engine_class); +void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb); + #endif -- cgit v1.2.3 From 72ac304769dde2b84a5471e5db817a29d071fd73 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 25 Oct 2023 08:17:35 -0700 Subject: drm/xe: Emit SVG state on RCS during driver load on DG2 and MTL When recording the default LRC, the expectation is that the hardware's original state settings (both register and instruction) will be written out to the LRC upon first context switch. For many 3DSTATE_* state instructions that don't truly have "default" values, this translates to a simple instruction header (opcodes + dword length) being written to the LRC, followed by an appropriate number of blank dwords as a place holder. When userspace creates a context (which starts as a copy of the default LRC), they'll generally emit real 3DSTATE_* as part of their initialization to select the settings they desire. If they don't emit one of the 3DSTATE instructions, then the zeroed dwords that remain in their LRC image generally translate to various state remaining disabled. This will either be what userspace wants or will lead to very reproducible and easily-debugged problems (rendering glitches, engine hangs). It turns out that a subset of the 3DSTATE instructions, specifically those belonging to the SVG (State Variable - Global) unit, are not only emitting 0's for the instruction's "body" dwords, but also for the instruction header dword if no specific state has been explicitly set before context switch. This means that when the hardware switches to a context that hasn't explicitly provided an appropriate state setting, the hardware will just see a sequence of NOOPs in the spot reserved for that 3DSTATE instruction while executing the LRC, and the actual hardware state setting will unintentionally inherit the configuration used by the previously running context. Now when userspace makes a mistake and forgets to emit an important state instruction they no longer get consistent, easily-reproducible corruption/hangs, but rather erratic behavior where the presence/absence of a problem depends on what other workloads are running on the system and what order the contexts are scheduled on the engine. A specific example of this that came up recently related to mesh shading The OpenGL driver was not specifically emitting a 3DSTATE_MESH_CONTROL to disable mesh shading at context init, so on context switch, mesh shading would either be on or off depending on what the previous context had been doing. Vulkan apps _were_ enabling mesh shading, so running a Vulkan app and then context switching to an OpenGL app resulted in mesh shading still unexpectedly being enabled during OpenGL operation, and since other Mesh-related state was not properly initialized for that context a GPU hang was seen. Due to the specific ordering requirements (Vulkan app runs first, followed by OpenGL app), it took additional debug effort to track down the cause of the problem. There are various workarounds related to this behavior, with current implementations handled in the userspace drivers. E.g., Wa_14019789679 and Wa_22018402687. However it's been suggested that the kernel driver can help simplify things here by emitting zeroed SVG state with proper instruction headers as part of our default context creation (i.e., at the same point we apply LRC workarounds). This will help ensure that any future cases where a userspace driver does not emit an important state setting will result in consistent behavior. Bspec: 46261 Reviewed-by: Balasubramani Vivekanandan Link: https://lore.kernel.org/r/20231025151732.3461842-7-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- .../gpu/drm/xe/instructions/xe_gfxpipe_commands.h | 51 ++++++++++ drivers/gpu/drm/xe/xe_gt.c | 1 + drivers/gpu/drm/xe/xe_lrc.c | 111 ++++++++++++++++++++- 3 files changed, 162 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h b/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h index b5fbc761265c..7be961434719 100644 --- a/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h +++ b/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h @@ -57,34 +57,84 @@ #define CMD_3DSTATE_STENCIL_BUFFER GFXPIPE_3D_CMD(0x0, 0x6) #define CMD_3DSTATE_HIER_DEPTH_BUFFER GFXPIPE_3D_CMD(0x0, 0x7) #define CMD_3DSTATE_VERTEX_BUFFERS GFXPIPE_3D_CMD(0x0, 0x8) +#define CMD_3DSTATE_VERTEX_ELEMENTS GFXPIPE_3D_CMD(0x0, 0x9) #define CMD_3DSTATE_INDEX_BUFFER GFXPIPE_3D_CMD(0x0, 0xA) #define CMD_3DSTATE_VF GFXPIPE_3D_CMD(0x0, 0xC) +#define CMD_3DSTATE_MULTISAMPLE GFXPIPE_3D_CMD(0x0, 0xD) #define CMD_3DSTATE_CC_STATE_POINTERS GFXPIPE_3D_CMD(0x0, 0xE) +#define CMD_3DSTATE_SCISSOR_STATE_POINTERS GFXPIPE_3D_CMD(0x0, 0xF) +#define CMD_3DSTATE_VS GFXPIPE_3D_CMD(0x0, 0x10) +#define CMD_3DSTATE_GS GFXPIPE_3D_CMD(0x0, 0x11) +#define CMD_3DSTATE_CLIP GFXPIPE_3D_CMD(0x0, 0x12) +#define CMD_3DSTATE_SF GFXPIPE_3D_CMD(0x0, 0x13) #define CMD_3DSTATE_WM GFXPIPE_3D_CMD(0x0, 0x14) +#define CMD_3DSTATE_CONSTANT_VS GFXPIPE_3D_CMD(0x0, 0x15) +#define CMD_3DSTATE_CONSTANT_GS GFXPIPE_3D_CMD(0x0, 0x16) #define CMD_3DSTATE_SAMPLE_MASK GFXPIPE_3D_CMD(0x0, 0x18) +#define CMD_3DSTATE_CONSTANT_HS GFXPIPE_3D_CMD(0x0, 0x19) +#define CMD_3DSTATE_CONSTANT_DS GFXPIPE_3D_CMD(0x0, 0x1A) +#define CMD_3DSTATE_HS GFXPIPE_3D_CMD(0x0, 0x1B) +#define CMD_3DSTATE_TE GFXPIPE_3D_CMD(0x0, 0x1C) +#define CMD_3DSTATE_DS GFXPIPE_3D_CMD(0x0, 0x1D) +#define CMD_3DSTATE_STREAMOUT GFXPIPE_3D_CMD(0x0, 0x1E) #define CMD_3DSTATE_SBE GFXPIPE_3D_CMD(0x0, 0x1F) #define CMD_3DSTATE_PS GFXPIPE_3D_CMD(0x0, 0x20) +#define CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP GFXPIPE_3D_CMD(0x0, 0x21) #define CMD_3DSTATE_CPS_POINTERS GFXPIPE_3D_CMD(0x0, 0x22) #define CMD_3DSTATE_VIEWPORT_STATE_POINTERS_CC GFXPIPE_3D_CMD(0x0, 0x23) #define CMD_3DSTATE_BLEND_STATE_POINTERS GFXPIPE_3D_CMD(0x0, 0x24) +#define CMD_3DSTATE_BINDING_TABLE_POINTERS_VS GFXPIPE_3D_CMD(0x0, 0x26) +#define CMD_3DSTATE_BINDING_TABLE_POINTERS_HS GFXPIPE_3D_CMD(0x0, 0x27) +#define CMD_3DSTATE_BINDING_TABLE_POINTERS_DS GFXPIPE_3D_CMD(0x0, 0x28) +#define CMD_3DSTATE_BINDING_TABLE_POINTERS_GS GFXPIPE_3D_CMD(0x0, 0x29) #define CMD_3DSTATE_BINDING_TABLE_POINTERS_PS GFXPIPE_3D_CMD(0x0, 0x2A) +#define CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS GFXPIPE_3D_CMD(0x0, 0x2B) +#define CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS GFXPIPE_3D_CMD(0x0, 0x2C) +#define CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS GFXPIPE_3D_CMD(0x0, 0x2D) +#define CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS GFXPIPE_3D_CMD(0x0, 0x2E) #define CMD_3DSTATE_SAMPLER_STATE_POINTERS_PS GFXPIPE_3D_CMD(0x0, 0x2F) #define CMD_3DSTATE_VF_INSTANCING GFXPIPE_3D_CMD(0x0, 0x49) +#define CMD_3DSTATE_VF_SGVS GFXPIPE_3D_CMD(0x0, 0x4A) #define CMD_3DSTATE_VF_TOPOLOGY GFXPIPE_3D_CMD(0x0, 0x4B) #define CMD_3DSTATE_WM_CHROMAKEY GFXPIPE_3D_CMD(0x0, 0x4C) #define CMD_3DSTATE_PS_BLEND GFXPIPE_3D_CMD(0x0, 0x4D) #define CMD_3DSTATE_WM_DEPTH_STENCIL GFXPIPE_3D_CMD(0x0, 0x4E) #define CMD_3DSTATE_PS_EXTRA GFXPIPE_3D_CMD(0x0, 0x4F) +#define CMD_3DSTATE_RASTER GFXPIPE_3D_CMD(0x0, 0x50) #define CMD_3DSTATE_SBE_SWIZ GFXPIPE_3D_CMD(0x0, 0x51) +#define CMD_3DSTATE_WM_HZ_OP GFXPIPE_3D_CMD(0x0, 0x52) +#define CMD_3DSTATE_VF_COMPONENT_PACKING GFXPIPE_3D_CMD(0x0, 0x55) +#define CMD_3DSTATE_VF_SGVS_2 GFXPIPE_3D_CMD(0x0, 0x56) #define CMD_3DSTATE_VFG GFXPIPE_3D_CMD(0x0, 0x57) +#define CMD_3DSTATE_URB_ALLOC_VS GFXPIPE_3D_CMD(0x0, 0x58) +#define CMD_3DSTATE_URB_ALLOC_HS GFXPIPE_3D_CMD(0x0, 0x59) +#define CMD_3DSTATE_URB_ALLOC_DS GFXPIPE_3D_CMD(0x0, 0x5A) +#define CMD_3DSTATE_URB_ALLOC_GS GFXPIPE_3D_CMD(0x0, 0x5B) +#define CMD_3DSTATE_SO_BUFFER_INDEX_0 GFXPIPE_3D_CMD(0x0, 0x60) +#define CMD_3DSTATE_SO_BUFFER_INDEX_1 GFXPIPE_3D_CMD(0x0, 0x61) +#define CMD_3DSTATE_SO_BUFFER_INDEX_2 GFXPIPE_3D_CMD(0x0, 0x62) +#define CMD_3DSTATE_SO_BUFFER_INDEX_3 GFXPIPE_3D_CMD(0x0, 0x63) +#define CMD_3DSTATE_PRIMITIVE_REPLICATION GFXPIPE_3D_CMD(0x0, 0x6C) +#define CMD_3DSTATE_TBIMR_TILE_PASS_INFO GFXPIPE_3D_CMD(0x0, 0x6E) #define CMD_3DSTATE_AMFS GFXPIPE_3D_CMD(0x0, 0x6F) #define CMD_3DSTATE_DEPTH_BOUNDS GFXPIPE_3D_CMD(0x0, 0x71) #define CMD_3DSTATE_AMFS_TEXTURE_POINTERS GFXPIPE_3D_CMD(0x0, 0x72) #define CMD_3DSTATE_CONSTANT_TS_POINTER GFXPIPE_3D_CMD(0x0, 0x73) +#define CMD_3DSTATE_MESH_CONTROL GFXPIPE_3D_CMD(0x0, 0x77) #define CMD_3DSTATE_MESH_DISTRIB GFXPIPE_3D_CMD(0x0, 0x78) +#define CMD_3DSTATE_TASK_REDISTRIB GFXPIPE_3D_CMD(0x0, 0x79) +#define CMD_3DSTATE_MESH_SHADER GFXPIPE_3D_CMD(0x0, 0x7A) +#define CMD_3DSTATE_MESH_SHADER_DATA GFXPIPE_3D_CMD(0x0, 0x7B) +#define CMD_3DSTATE_TASK_CONTROL GFXPIPE_3D_CMD(0x0, 0x7C) +#define CMD_3DSTATE_TASK_SHADER GFXPIPE_3D_CMD(0x0, 0x7D) +#define CMD_3DSTATE_TASK_SHADER_DATA GFXPIPE_3D_CMD(0x0, 0x7E) +#define CMD_3DSTATE_URB_ALLOC_MESH GFXPIPE_3D_CMD(0x0, 0x7F) +#define CMD_3DSTATE_URB_ALLOC_TASK GFXPIPE_3D_CMD(0x0, 0x80) +#define CMD_3DSTATE_CLIP_MESH GFXPIPE_3D_CMD(0x0, 0x81) #define CMD_3DSTATE_SBE_MESH GFXPIPE_3D_CMD(0x0, 0x82) #define CMD_3DSTATE_CPSIZE_CONTROL_BUFFER GFXPIPE_3D_CMD(0x0, 0x83) +#define CMD_3DSTATE_DRAWING_RECTANGLE GFXPIPE_3D_CMD(0x1, 0x0) #define CMD_3DSTATE_CHROMA_KEY GFXPIPE_3D_CMD(0x1, 0x4) #define CMD_3DSTATE_POLY_STIPPLE_OFFSET GFXPIPE_3D_CMD(0x1, 0x6) #define CMD_3DSTATE_POLY_STIPPLE_PATTERN GFXPIPE_3D_CMD(0x1, 0x7) @@ -98,6 +148,7 @@ #define CMD_3DSTATE_PUSH_CONSTANT_ALLOC_PS GFXPIPE_3D_CMD(0x1, 0x16) #define CMD_3DSTATE_SO_DECL_LIST GFXPIPE_3D_CMD(0x1, 0x17) #define CMD_3DSTATE_SO_DECL_LIST_DW_LEN REG_GENMASK(8, 0) +#define CMD_3DSTATE_SO_BUFFER GFXPIPE_3D_CMD(0x1, 0x18) #define CMD_3DSTATE_BINDING_TABLE_POOL_ALLOC GFXPIPE_3D_CMD(0x1, 0x19) #define CMD_3DSTATE_SAMPLE_PATTERN GFXPIPE_3D_CMD(0x1, 0x1C) #define CMD_3DSTATE_3D_MODE GFXPIPE_3D_CMD(0x1, 0x1E) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 8618275b1877..d380f67b3365 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -10,6 +10,7 @@ #include #include +#include "instructions/xe_gfxpipe_commands.h" #include "instructions/xe_mi_commands.h" #include "regs/xe_gt_regs.h" #include "xe_assert.h" diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 184707223098..944bb2f64660 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -1020,34 +1020,84 @@ static int dump_gfxpipe_command(struct drm_printer *p, MATCH3D(3DSTATE_STENCIL_BUFFER); MATCH3D(3DSTATE_HIER_DEPTH_BUFFER); MATCH3D(3DSTATE_VERTEX_BUFFERS); + MATCH3D(3DSTATE_VERTEX_ELEMENTS); MATCH3D(3DSTATE_INDEX_BUFFER); MATCH3D(3DSTATE_VF); + MATCH3D(3DSTATE_MULTISAMPLE); MATCH3D(3DSTATE_CC_STATE_POINTERS); + MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS); + MATCH3D(3DSTATE_VS); + MATCH3D(3DSTATE_GS); + MATCH3D(3DSTATE_CLIP); + MATCH3D(3DSTATE_SF); MATCH3D(3DSTATE_WM); + MATCH3D(3DSTATE_CONSTANT_VS); + MATCH3D(3DSTATE_CONSTANT_GS); MATCH3D(3DSTATE_SAMPLE_MASK); + MATCH3D(3DSTATE_CONSTANT_HS); + MATCH3D(3DSTATE_CONSTANT_DS); + MATCH3D(3DSTATE_HS); + MATCH3D(3DSTATE_TE); + MATCH3D(3DSTATE_DS); + MATCH3D(3DSTATE_STREAMOUT); MATCH3D(3DSTATE_SBE); MATCH3D(3DSTATE_PS); + MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP); MATCH3D(3DSTATE_CPS_POINTERS); MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC); MATCH3D(3DSTATE_BLEND_STATE_POINTERS); + MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_VS); + MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_HS); + MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_DS); + MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_GS); MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS); + MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_VS); + MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_HS); + MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_DS); + MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_GS); MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS); MATCH3D(3DSTATE_VF_INSTANCING); + MATCH3D(3DSTATE_VF_SGVS); MATCH3D(3DSTATE_VF_TOPOLOGY); MATCH3D(3DSTATE_WM_CHROMAKEY); MATCH3D(3DSTATE_PS_BLEND); MATCH3D(3DSTATE_WM_DEPTH_STENCIL); MATCH3D(3DSTATE_PS_EXTRA); + MATCH3D(3DSTATE_RASTER); MATCH3D(3DSTATE_SBE_SWIZ); + MATCH3D(3DSTATE_WM_HZ_OP); + MATCH3D(3DSTATE_VF_COMPONENT_PACKING); + MATCH3D(3DSTATE_VF_SGVS_2); MATCH3D(3DSTATE_VFG); + MATCH3D(3DSTATE_URB_ALLOC_VS); + MATCH3D(3DSTATE_URB_ALLOC_HS); + MATCH3D(3DSTATE_URB_ALLOC_DS); + MATCH3D(3DSTATE_URB_ALLOC_GS); + MATCH3D(3DSTATE_SO_BUFFER_INDEX_0); + MATCH3D(3DSTATE_SO_BUFFER_INDEX_1); + MATCH3D(3DSTATE_SO_BUFFER_INDEX_2); + MATCH3D(3DSTATE_SO_BUFFER_INDEX_3); + MATCH3D(3DSTATE_PRIMITIVE_REPLICATION); + MATCH3D(3DSTATE_TBIMR_TILE_PASS_INFO); MATCH3D(3DSTATE_AMFS); MATCH3D(3DSTATE_DEPTH_BOUNDS); MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS); MATCH3D(3DSTATE_CONSTANT_TS_POINTER); + MATCH3D(3DSTATE_MESH_CONTROL); MATCH3D(3DSTATE_MESH_DISTRIB); + MATCH3D(3DSTATE_TASK_REDISTRIB); + MATCH3D(3DSTATE_MESH_SHADER); + MATCH3D(3DSTATE_MESH_SHADER_DATA); + MATCH3D(3DSTATE_TASK_CONTROL); + MATCH3D(3DSTATE_TASK_SHADER); + MATCH3D(3DSTATE_TASK_SHADER_DATA); + MATCH3D(3DSTATE_URB_ALLOC_MESH); + MATCH3D(3DSTATE_URB_ALLOC_TASK); + MATCH3D(3DSTATE_CLIP_MESH); MATCH3D(3DSTATE_SBE_MESH); MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER); + MATCH3D(3DSTATE_DRAWING_RECTANGLE); MATCH3D(3DSTATE_CHROMA_KEY); MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET); MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN); @@ -1060,6 +1110,7 @@ static int dump_gfxpipe_command(struct drm_printer *p, MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS); MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS); MATCH3D(3DSTATE_SO_DECL_LIST); + MATCH3D(3DSTATE_SO_BUFFER); MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC); MATCH3D(3DSTATE_SAMPLE_PATTERN); MATCH3D(3DSTATE_3D_MODE); @@ -1115,6 +1166,59 @@ struct instr_state { u16 num_dw; }; +static const struct instr_state xe_hpg_svg_state[] = { + { .instr = CMD_3DSTATE_CONSTANT_VS, .num_dw = 11 }, + { .instr = CMD_3DSTATE_CONSTANT_HS, .num_dw = 11 }, + { .instr = CMD_3DSTATE_CONSTANT_DS, .num_dw = 11 }, + { .instr = CMD_3DSTATE_CONSTANT_GS, .num_dw = 11 }, + { .instr = CMD_3DSTATE_VERTEX_ELEMENTS, .num_dw = 69 }, + { .instr = CMD_3DSTATE_VF_COMPONENT_PACKING, .num_dw = 5 }, + { .instr = CMD_3DSTATE_VF_SGVS, .num_dw = 2 }, + { .instr = CMD_3DSTATE_VF_SGVS_2, .num_dw = 3 }, + { .instr = CMD_3DSTATE_VS, .num_dw = 9 }, + { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_VS, .num_dw = 2 }, + { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS, .num_dw = 2 }, + { .instr = CMD_3DSTATE_URB_ALLOC_VS, .num_dw = 3 }, + { .instr = CMD_3DSTATE_STREAMOUT, .num_dw = 5 }, + { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_0, .num_dw = 8 }, + { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_1, .num_dw = 8 }, + { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_2, .num_dw = 8 }, + { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_3, .num_dw = 8 }, + { .instr = CMD_3DSTATE_CLIP, .num_dw = 4 }, + { .instr = CMD_3DSTATE_PRIMITIVE_REPLICATION, .num_dw = 6 }, + { .instr = CMD_3DSTATE_CLIP_MESH, .num_dw = 2 }, + { .instr = CMD_3DSTATE_SF, .num_dw = 4 }, + { .instr = CMD_3DSTATE_SCISSOR_STATE_POINTERS, .num_dw = 2 }, + { .instr = CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, .num_dw = 2 }, + { .instr = CMD_3DSTATE_RASTER, .num_dw = 5 }, + { .instr = CMD_3DSTATE_TBIMR_TILE_PASS_INFO, .num_dw = 4 }, + { .instr = CMD_3DSTATE_WM_HZ_OP, .num_dw = 6 }, + { .instr = CMD_3DSTATE_MULTISAMPLE, .num_dw = 2 }, + { .instr = CMD_3DSTATE_HS, .num_dw = 9 }, + { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_HS, .num_dw = 2 }, + { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS, .num_dw = 2 }, + { .instr = CMD_3DSTATE_URB_ALLOC_HS, .num_dw = 3 }, + { .instr = CMD_3DSTATE_TASK_CONTROL, .num_dw = 3 }, + { .instr = CMD_3DSTATE_TASK_SHADER, .num_dw = 7 }, + { .instr = CMD_3DSTATE_TASK_SHADER_DATA, .num_dw = 10 }, + { .instr = CMD_3DSTATE_URB_ALLOC_TASK, .num_dw = 3 }, + { .instr = CMD_3DSTATE_TE, .num_dw = 5 }, + { .instr = CMD_3DSTATE_TASK_REDISTRIB, .num_dw = 2 }, + { .instr = CMD_3DSTATE_DS, .num_dw = 11 }, + { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_DS, .num_dw = 2 }, + { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS, .num_dw = 2 }, + { .instr = CMD_3DSTATE_URB_ALLOC_DS, .num_dw = 3 }, + { .instr = CMD_3DSTATE_GS, .num_dw = 10 }, + { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_GS, .num_dw = 2 }, + { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS, .num_dw = 2 }, + { .instr = CMD_3DSTATE_URB_ALLOC_GS, .num_dw = 3 }, + { .instr = CMD_3DSTATE_MESH_CONTROL, .num_dw = 3 }, + { .instr = CMD_3DSTATE_MESH_SHADER_DATA, .num_dw = 10 }, + { .instr = CMD_3DSTATE_URB_ALLOC_MESH, .num_dw = 3 }, + { .instr = CMD_3DSTATE_MESH_SHADER, .num_dw = 8 }, + { .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 }, +}; + void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb) { struct xe_gt *gt = q->hwe->gt; @@ -1130,6 +1234,12 @@ void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *b return; switch (GRAPHICS_VERx100(xe)) { + case 1255: + case 1270: + case 1271: + state_table = xe_hpg_svg_state; + state_table_size = ARRAY_SIZE(xe_hpg_svg_state); + break; default: xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n", GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100); @@ -1152,4 +1262,3 @@ void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *b bb->len += num_dw; } } - -- cgit v1.2.3 From fb24b858a20d720b7ee4396225569ff33a8a4fe3 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 25 Oct 2023 08:17:36 -0700 Subject: drm/xe/xe2: Update SVG state handling As with DG2/MTL, Xe2 also fails to emit instruction headers for SVG state instructions if no explicit state has been set. The SVG part of the LRC is nearly identical to DG2/MTL; the only change is that 3DSTATE_DRAWING_RECTANGLE has been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST, so we can just re-use the same state table and handle that single instruction when we encounter it. Bspec: 65182 Reviewed-by: Balasubramani Vivekanandan Link: https://lore.kernel.org/r/20231025151732.3461842-8-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h | 1 + drivers/gpu/drm/xe/xe_lrc.c | 15 +++++++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h b/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h index 7be961434719..8e6dd061f2ae 100644 --- a/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h +++ b/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h @@ -52,6 +52,7 @@ #define PIPELINE_SELECT GFXPIPE_SINGLE_DW_CMD(0x1, 0x4) +#define CMD_3DSTATE_DRAWING_RECTANGLE_FAST GFXPIPE_3D_CMD(0x0, 0x0) #define CMD_3DSTATE_CLEAR_PARAMS GFXPIPE_3D_CMD(0x0, 0x4) #define CMD_3DSTATE_DEPTH_BUFFER GFXPIPE_3D_CMD(0x0, 0x5) #define CMD_3DSTATE_STENCIL_BUFFER GFXPIPE_3D_CMD(0x0, 0x6) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 944bb2f64660..f8754f061599 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -1015,6 +1015,7 @@ static int dump_gfxpipe_command(struct drm_printer *p, MATCH(PIPELINE_SELECT); + MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST); MATCH3D(3DSTATE_CLEAR_PARAMS); MATCH3D(3DSTATE_DEPTH_BUFFER); MATCH3D(3DSTATE_STENCIL_BUFFER); @@ -1235,8 +1236,7 @@ void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *b switch (GRAPHICS_VERx100(xe)) { case 1255: - case 1270: - case 1271: + case 1270 ... 2004: state_table = xe_hpg_svg_state; state_table_size = ARRAY_SIZE(xe_hpg_svg_state); break; @@ -1255,6 +1255,17 @@ void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *b xe_gt_assert(gt, num_dw != 0); xe_gt_assert(gt, is_single_dw ^ (num_dw > 1)); + /* + * Xe2's SVG context is the same as the one on DG2 / MTL + * except that 3DSTATE_DRAWING_RECTANGLE (non-pipelined) has + * been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST (pipelined). + * Just make the replacement here rather than defining a + * whole separate table for the single trivial change. + */ + if (GRAPHICS_VER(xe) >= 20 && + instr == CMD_3DSTATE_DRAWING_RECTANGLE) + instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST; + bb->cs[bb->len] = instr; if (!is_single_dw) bb->cs[bb->len] |= (num_dw - 2); -- cgit v1.2.3 From 83af834e711ce779afb1ee6a28977b3e4b164354 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 23 Oct 2023 13:41:13 -0700 Subject: drm/xe/mocs: MOCS registers are multicast on Xe_HP and beyond The MOCS registers should be written in an MCR-specific manner on Xe_HP and beyond to prevent any other driver threads or external firmware from putting the hardware into unicast mode while we initialize the MOCS table. Bspec: 66534, 67609, 71185 Cc: Ruthuvikas Ravikumar Reviewed-by: Gustavo Sousa Link: https://lore.kernel.org/r/20231023204112.2856331-2-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 6 ++++-- drivers/gpu/drm/xe/xe_guc_ads.c | 2 +- drivers/gpu/drm/xe/xe_mocs.c | 23 +++++++++++++++-------- 3 files changed, 20 insertions(+), 11 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 55ceadfc30b0..b00fe089525a 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -43,7 +43,8 @@ #define FORCEWAKE_ACK_GT_MTL XE_REG(0xdfc) /* L3 Cache Control */ -#define LNCFCMOCS(i) XE_REG(0xb020 + (i) * 4) +#define XELP_LNCFCMOCS(i) XE_REG(0xb020 + (i) * 4) +#define XEHP_LNCFCMOCS(i) XE_REG_MCR(0xb020 + (i) * 4) #define LNCFCMOCS_REG_COUNT 32 #define MCFG_MCR_SELECTOR XE_REG(0xfd0) @@ -79,7 +80,8 @@ #define PREEMPT_GPGPU_LEVEL_MASK PREEMPT_GPGPU_LEVEL(1, 1) #define PREEMPT_3D_OBJECT_LEVEL REG_BIT(0) -#define GLOBAL_MOCS(i) XE_REG(0x4000 + (i) * 4) /* Global MOCS regs */ +#define XELP_GLOBAL_MOCS(i) XE_REG(0x4000 + (i) * 4) +#define XEHP_GLOBAL_MOCS(i) XE_REG_MCR(0x4000 + (i) * 4) #define CCS_AUX_INV XE_REG(0x4208) #define VD0_AUX_INV XE_REG(0x4218) diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index efa4d25424b8..88789826e781 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -473,7 +473,7 @@ static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads, if (needs_wa_1607983814(xe) && hwe->class == XE_ENGINE_CLASS_RENDER) { for (i = 0; i < LNCFCMOCS_REG_COUNT; i++) { guc_mmio_regset_write_one(ads, regset_map, - LNCFCMOCS(i), count++); + XELP_LNCFCMOCS(i), count++); } } diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index 19a8146ded9a..21972bbef8fd 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -10,6 +10,7 @@ #include "xe_device.h" #include "xe_exec_queue.h" #include "xe_gt.h" +#include "xe_gt_mcr.h" #include "xe_mmio.h" #include "xe_platform_types.h" #include "xe_step_types.h" @@ -491,8 +492,7 @@ static u32 get_entry_control(const struct xe_mocs_info *info, } static void __init_mocs_table(struct xe_gt *gt, - const struct xe_mocs_info *info, - u32 addr) + const struct xe_mocs_info *info) { struct xe_device *xe = gt_to_xe(gt); @@ -505,10 +505,13 @@ static void __init_mocs_table(struct xe_gt *gt, for (i = 0; i < info->n_entries ? (mocs = get_entry_control(info, i)), 1 : 0; i++) { - struct xe_reg reg = XE_REG(addr + i * 4); + mocs_dbg(>_to_xe(gt)->drm, "%d 0x%x 0x%x\n", i, + XELP_GLOBAL_MOCS(i).addr, mocs); - mocs_dbg(>_to_xe(gt)->drm, "%d 0x%x 0x%x\n", i, reg.addr, mocs); - xe_mmio_write32(gt, reg, mocs); + if (GRAPHICS_VERx100(gt_to_xe(gt)) > 1250) + xe_gt_mcr_multicast_write(gt, XEHP_GLOBAL_MOCS(i), mocs); + else + xe_mmio_write32(gt, XELP_GLOBAL_MOCS(i), mocs); } } @@ -542,9 +545,13 @@ static void init_l3cc_table(struct xe_gt *gt, (l3cc = l3cc_combine(get_entry_l3cc(info, 2 * i), get_entry_l3cc(info, 2 * i + 1))), 1 : 0; i++) { - mocs_dbg(>_to_xe(gt)->drm, "%d 0x%x 0x%x\n", i, LNCFCMOCS(i).addr, + mocs_dbg(>_to_xe(gt)->drm, "%d 0x%x 0x%x\n", i, XELP_LNCFCMOCS(i).addr, l3cc); - xe_mmio_write32(gt, LNCFCMOCS(i), l3cc); + + if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1250) + xe_gt_mcr_multicast_write(gt, XEHP_LNCFCMOCS(i), l3cc); + else + xe_mmio_write32(gt, XELP_LNCFCMOCS(i), l3cc); } } @@ -569,7 +576,7 @@ void xe_mocs_init(struct xe_gt *gt) mocs_dbg(>_to_xe(gt)->drm, "flag:0x%x\n", flags); if (flags & HAS_GLOBAL_MOCS) - __init_mocs_table(gt, &table, GLOBAL_MOCS(0).addr); + __init_mocs_table(gt, &table); /* * Initialize the L3CC table as part of mocs initalization to make -- cgit v1.2.3 From 8a93b0b4d1105b7d03b4768f1a08145b24cbd52a Mon Sep 17 00:00:00 2001 From: Badal Nilawar Date: Wed, 25 Oct 2023 21:42:01 +0530 Subject: drm/xe: Extend rpX values extraction for future platforms In existing code flow for future platforms i.e. >1270, the rpX (rp0,rpn and rpe) fused values are read from gen 6 registers. Which is not correct. Unless specified gen 1270 regs should be valid for gen 1270+ platforms as well. Signed-off-by: Badal Nilawar Reviewed-by: Anshuman Gupta Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_pc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index d9375d1d582f..74247e0d3674 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -340,7 +340,7 @@ static void pc_update_rp_values(struct xe_guc_pc *pc) struct xe_gt *gt = pc_to_gt(pc); struct xe_device *xe = gt_to_xe(gt); - if (xe->info.platform == XE_METEORLAKE) + if (GRAPHICS_VERx100(xe) >= 1270) mtl_update_rpe_value(pc); else tgl_update_rpe_value(pc); @@ -365,7 +365,7 @@ static ssize_t freq_act_show(struct device *dev, xe_device_mem_access_get(gt_to_xe(gt)); /* When in RC6, actual frequency reported will be 0. */ - if (xe->info.platform == XE_METEORLAKE) { + if (GRAPHICS_VERx100(xe) >= 1270) { freq = xe_mmio_read32(gt, MTL_MIRROR_TARGET_WP1); freq = REG_FIELD_GET(MTL_CAGF_MASK, freq); } else { @@ -680,7 +680,7 @@ static void pc_init_fused_rp_values(struct xe_guc_pc *pc) struct xe_gt *gt = pc_to_gt(pc); struct xe_device *xe = gt_to_xe(gt); - if (xe->info.platform == XE_METEORLAKE) + if (GRAPHICS_VERx100(xe) >= 1270) mtl_init_fused_rp_values(pc); else tgl_init_fused_rp_values(pc); -- cgit v1.2.3 From a9a95523c84957b7863796b5d1df2f3f5dca4519 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Wed, 25 Oct 2023 10:57:41 -0700 Subject: drm/xe/uc: Prepare for parsing of different header types GSC binaries and newer HuC ones use GSC-style headers instead of the CSS. In preparation for adding support for such parsing, split out the current parsing code to its own function, to make it cleaner to add the new paths. The existing doc section has also been renamed to narrow it to CSS-based binaries. v2: new patch in series, split out from next patch for easier reviewing v3: drop unneeded include (Lucas) Signed-off-by: Daniele Ceraolo Spurio Cc: Alan Previn Cc: John Harrison Cc: Lucas De Marchi Reviewed-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- Documentation/gpu/xe/xe_firmware.rst | 2 +- drivers/gpu/drm/xe/xe_uc_fw.c | 116 ++++++++++++++++++++--------------- drivers/gpu/drm/xe/xe_uc_fw_abi.h | 7 ++- 3 files changed, 71 insertions(+), 54 deletions(-) (limited to 'drivers/gpu') diff --git a/Documentation/gpu/xe/xe_firmware.rst b/Documentation/gpu/xe/xe_firmware.rst index c01246ae99f5..f1ac6f608930 100644 --- a/Documentation/gpu/xe/xe_firmware.rst +++ b/Documentation/gpu/xe/xe_firmware.rst @@ -8,7 +8,7 @@ Firmware Layout =============== .. kernel-doc:: drivers/gpu/drm/xe/xe_uc_fw_abi.h - :doc: Firmware Layout + :doc: CSS-based Firmware Layout Write Once Protected Content Memory (WOPCM) Layout ================================================== diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 32782a52c07f..189a298e5479 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -344,57 +344,22 @@ fail: return -ENOEXEC; } -int xe_uc_fw_init(struct xe_uc_fw *uc_fw) +/* Refer to the "CSS-based Firmware Layout" documentation entry for details */ +static int parse_css_header(struct xe_uc_fw *uc_fw, const void *fw_data, size_t fw_size) { struct xe_device *xe = uc_fw_to_xe(uc_fw); - struct xe_gt *gt = uc_fw_to_gt(uc_fw); - struct xe_tile *tile = gt_to_tile(gt); - struct device *dev = xe->drm.dev; - const struct firmware *fw = NULL; struct uc_css_header *css; - struct xe_bo *obj; size_t size; - int err; - - /* - * we use FIRMWARE_UNINITIALIZED to detect checks against uc_fw->status - * before we're looked at the HW caps to see if we have uc support - */ - BUILD_BUG_ON(XE_UC_FIRMWARE_UNINITIALIZED); - xe_assert(xe, !uc_fw->status); - xe_assert(xe, !uc_fw->path); - - uc_fw_auto_select(xe, uc_fw); - xe_uc_fw_change_status(uc_fw, uc_fw->path ? - XE_UC_FIRMWARE_SELECTED : - XE_UC_FIRMWARE_NOT_SUPPORTED); - - if (!xe_uc_fw_is_supported(uc_fw)) - return 0; - - uc_fw_override(uc_fw); - - /* an empty path means the firmware is disabled */ - if (!xe_device_uc_enabled(xe) || !(*uc_fw->path)) { - xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_DISABLED); - drm_dbg(&xe->drm, "%s disabled", xe_uc_fw_type_repr(uc_fw->type)); - return 0; - } - - err = request_firmware(&fw, uc_fw->path, dev); - if (err) - goto fail; /* Check the size of the blob before examining buffer contents */ - if (unlikely(fw->size < sizeof(struct uc_css_header))) { + if (unlikely(fw_size < sizeof(struct uc_css_header))) { drm_warn(&xe->drm, "%s firmware %s: invalid size: %zu < %zu\n", xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, - fw->size, sizeof(struct uc_css_header)); - err = -ENODATA; - goto fail; + fw_size, sizeof(struct uc_css_header)); + return -ENODATA; } - css = (struct uc_css_header *)fw->data; + css = (struct uc_css_header *)fw_data; /* Check integrity of size values inside CSS header */ size = (css->header_size_dw - css->key_size_dw - css->modulus_size_dw - @@ -403,9 +368,8 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw) drm_warn(&xe->drm, "%s firmware %s: unexpected header size: %zu != %zu\n", xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, - fw->size, sizeof(struct uc_css_header)); - err = -EPROTO; - goto fail; + fw_size, sizeof(struct uc_css_header)); + return -EPROTO; } /* uCode size must calculated from other sizes */ @@ -417,12 +381,11 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw) /* At least, it should have header, uCode and RSA. Size of all three. */ size = sizeof(struct uc_css_header) + uc_fw->ucode_size + uc_fw->rsa_size; - if (unlikely(fw->size < size)) { + if (unlikely(fw_size < size)) { drm_warn(&xe->drm, "%s firmware %s: invalid size: %zu < %zu\n", xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, - fw->size, size); - err = -ENOEXEC; - goto fail; + fw_size, size); + return -ENOEXEC; } /* Get version numbers from the CSS header */ @@ -433,6 +396,60 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw) uc_fw->patch_ver_found = FIELD_GET(CSS_SW_VERSION_UC_PATCH, css->sw_version); + if (uc_fw->type == XE_UC_FW_TYPE_GUC) + guc_read_css_info(uc_fw, css); + + return 0; +} + +static int parse_headers(struct xe_uc_fw *uc_fw, const struct firmware *fw) +{ + return parse_css_header(uc_fw, fw->data, fw->size); +} + +int xe_uc_fw_init(struct xe_uc_fw *uc_fw) +{ + struct xe_device *xe = uc_fw_to_xe(uc_fw); + struct xe_gt *gt = uc_fw_to_gt(uc_fw); + struct xe_tile *tile = gt_to_tile(gt); + struct device *dev = xe->drm.dev; + const struct firmware *fw = NULL; + struct xe_bo *obj; + int err; + + /* + * we use FIRMWARE_UNINITIALIZED to detect checks against uc_fw->status + * before we're looked at the HW caps to see if we have uc support + */ + BUILD_BUG_ON(XE_UC_FIRMWARE_UNINITIALIZED); + xe_assert(xe, !uc_fw->status); + xe_assert(xe, !uc_fw->path); + + uc_fw_auto_select(xe, uc_fw); + xe_uc_fw_change_status(uc_fw, uc_fw->path ? + XE_UC_FIRMWARE_SELECTED : + XE_UC_FIRMWARE_NOT_SUPPORTED); + + if (!xe_uc_fw_is_supported(uc_fw)) + return 0; + + uc_fw_override(uc_fw); + + /* an empty path means the firmware is disabled */ + if (!xe_device_uc_enabled(xe) || !(*uc_fw->path)) { + xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_DISABLED); + drm_dbg(&xe->drm, "%s disabled", xe_uc_fw_type_repr(uc_fw->type)); + return 0; + } + + err = request_firmware(&fw, uc_fw->path, dev); + if (err) + goto fail; + + err = parse_headers(uc_fw, fw); + if (err) + goto fail; + drm_info(&xe->drm, "Using %s firmware from %s version %u.%u.%u\n", xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, uc_fw->major_ver_found, uc_fw->minor_ver_found, uc_fw->patch_ver_found); @@ -441,9 +458,6 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw) if (err) goto fail; - if (uc_fw->type == XE_UC_FW_TYPE_GUC) - guc_read_css_info(uc_fw, css); - obj = xe_bo_create_from_data(xe, tile, fw->data, fw->size, ttm_bo_type_kernel, XE_BO_CREATE_VRAM_IF_DGFX(tile) | diff --git a/drivers/gpu/drm/xe/xe_uc_fw_abi.h b/drivers/gpu/drm/xe/xe_uc_fw_abi.h index 89e994ed4e00..edae7bb3cd72 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw_abi.h +++ b/drivers/gpu/drm/xe/xe_uc_fw_abi.h @@ -10,9 +10,12 @@ #include /** - * DOC: Firmware Layout + * DOC: CSS-based Firmware Layout * - * The GuC/HuC firmware layout looks like this:: + * The CSS-based firmware structure is used for GuC releases on all platforms + * and for HuC releases up to DG1. Starting from DG2/MTL the HuC uses the GSC + * layout instead. + * The CSS firmware layout looks like this:: * * +======================================================================+ * | Firmware blob | -- cgit v1.2.3 From 484ecffac91067e44273afa727fb1b9855058c9a Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Wed, 25 Oct 2023 10:57:42 -0700 Subject: drm/xe/huc: Extract version and binary offset from new HuC headers The GSC-enabled HuC binary starts with a GSC header, which is followed by the legacy-style CSS header and the binary itself. We can parse the GSC headers to find the HuC version and the location of the binary to be used for the DMA transfer. The parsing function has been designed to be re-used for the GSC binary, so the entry names are external parameters (because the GSC uses different ones) and the CSS entry is optional (because the GSC doesn't have it). v2: move new code to uc_fw.c, better comments and error checking, split old code move to separate patch (Lucas), move headers and documentation to uc_fw_abi.h. v3: use 2 separate loops, rework marker check (Lucas) Signed-off-by: Daniele Ceraolo Spurio Cc: Alan Previn Cc: John Harrison Cc: Lucas De Marchi Reviewed-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- Documentation/gpu/xe/xe_firmware.rst | 3 + drivers/gpu/drm/xe/xe_uc_fw.c | 120 ++++++++++++++++++++++++++++++++++- drivers/gpu/drm/xe/xe_uc_fw.h | 2 +- drivers/gpu/drm/xe/xe_uc_fw_abi.h | 120 +++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_uc_fw_types.h | 2 + 5 files changed, 244 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/Documentation/gpu/xe/xe_firmware.rst b/Documentation/gpu/xe/xe_firmware.rst index f1ac6f608930..afcb561cd37d 100644 --- a/Documentation/gpu/xe/xe_firmware.rst +++ b/Documentation/gpu/xe/xe_firmware.rst @@ -10,6 +10,9 @@ Firmware Layout .. kernel-doc:: drivers/gpu/drm/xe/xe_uc_fw_abi.h :doc: CSS-based Firmware Layout +.. kernel-doc:: drivers/gpu/drm/xe/xe_uc_fw_abi.h + :doc: GSC-based Firmware Layout + Write Once Protected Content Memory (WOPCM) Layout ================================================== diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 189a298e5479..d1cc99f86ec0 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -402,9 +402,125 @@ static int parse_css_header(struct xe_uc_fw *uc_fw, const void *fw_data, size_t return 0; } +static bool is_cpd_header(const void *data) +{ + const u32 *marker = data; + + return *marker == GSC_CPD_HEADER_MARKER; +} + +static u32 entry_offset(const struct gsc_cpd_header_v2 *header, const char *name) +{ + const struct gsc_cpd_entry *entry; + int i; + + entry = (void *)header + header->header_length; + + for (i = 0; i < header->num_of_entries; i++, entry++) + if (strcmp(entry->name, name) == 0) + return entry->offset & GSC_CPD_ENTRY_OFFSET_MASK; + + return 0; +} + +/* Refer to the "GSC-based Firmware Layout" documentation entry for details */ +static int parse_cpd_header(struct xe_uc_fw *uc_fw, const void *data, size_t size, + const char *manifest_entry, const char *css_entry) +{ + struct xe_gt *gt = uc_fw_to_gt(uc_fw); + struct xe_device *xe = gt_to_xe(gt); + const struct gsc_cpd_header_v2 *header = data; + const struct gsc_manifest_header *manifest; + size_t min_size = sizeof(*header); + u32 offset; + + /* manifest_entry is mandatory, css_entry is optional */ + xe_assert(xe, manifest_entry); + + if (size < min_size || !is_cpd_header(header)) + return -ENOENT; + + if (header->header_length < sizeof(struct gsc_cpd_header_v2)) { + xe_gt_err(gt, "invalid CPD header length %u!\n", header->header_length); + return -EINVAL; + } + + min_size = header->header_length + sizeof(struct gsc_cpd_entry) * header->num_of_entries; + if (size < min_size) { + xe_gt_err(gt, "FW too small! %zu < %zu\n", size, min_size); + return -ENODATA; + } + + /* Look for the manifest first */ + offset = entry_offset(header, manifest_entry); + if (!offset) { + xe_gt_err(gt, "Failed to find %s manifest!\n", + xe_uc_fw_type_repr(uc_fw->type)); + return -ENODATA; + } + + min_size = offset + sizeof(struct gsc_manifest_header); + if (size < min_size) { + xe_gt_err(gt, "FW too small! %zu < %zu\n", size, min_size); + return -ENODATA; + } + + manifest = data + offset; + + uc_fw->major_ver_found = manifest->fw_version.major; + uc_fw->minor_ver_found = manifest->fw_version.minor; + uc_fw->patch_ver_found = manifest->fw_version.hotfix; + + /* then optionally look for the css header */ + if (css_entry) { + int ret; + + /* + * This section does not contain a CSS entry on DG2. We + * don't support DG2 HuC right now, so no need to handle + * it, just add a reminder in case that changes. + */ + xe_assert(xe, xe->info.platform != XE_DG2); + + offset = entry_offset(header, css_entry); + + /* the CSS header parser will check that the CSS header fits */ + if (offset > size) { + xe_gt_err(gt, "FW too small! %zu < %u\n", size, offset); + return -ENODATA; + } + + ret = parse_css_header(uc_fw, data + offset, size - offset); + if (ret) + return ret; + + uc_fw->css_offset = offset; + } + + return 0; +} + static int parse_headers(struct xe_uc_fw *uc_fw, const struct firmware *fw) { - return parse_css_header(uc_fw, fw->data, fw->size); + int ret; + + /* + * All GuC releases and older HuC ones use CSS headers, while newer HuC + * releases use GSC CPD headers. + */ + switch (uc_fw->type) { + case XE_UC_FW_TYPE_HUC: + ret = parse_cpd_header(uc_fw, fw->data, fw->size, "HUCP.man", "huc_fw"); + if (!ret || ret != -ENOENT) + return ret; + fallthrough; + case XE_UC_FW_TYPE_GUC: + return parse_css_header(uc_fw, fw->data, fw->size); + default: + return -EINVAL; + } + + return 0; } int xe_uc_fw_init(struct xe_uc_fw *uc_fw) @@ -510,7 +626,7 @@ static int uc_fw_xfer(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags) xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); /* Set the source address for the uCode */ - src_offset = uc_fw_ggtt_offset(uc_fw); + src_offset = uc_fw_ggtt_offset(uc_fw) + uc_fw->css_offset; xe_mmio_write32(gt, DMA_ADDR_0_LOW, lower_32_bits(src_offset)); xe_mmio_write32(gt, DMA_ADDR_0_HIGH, upper_32_bits(src_offset)); diff --git a/drivers/gpu/drm/xe/xe_uc_fw.h b/drivers/gpu/drm/xe/xe_uc_fw.h index a519c77d4962..1d1a0c156cdf 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.h +++ b/drivers/gpu/drm/xe/xe_uc_fw.h @@ -21,7 +21,7 @@ void xe_uc_fw_print(struct xe_uc_fw *uc_fw, struct drm_printer *p); static inline u32 xe_uc_fw_rsa_offset(struct xe_uc_fw *uc_fw) { - return sizeof(struct uc_css_header) + uc_fw->ucode_size; + return sizeof(struct uc_css_header) + uc_fw->ucode_size + uc_fw->css_offset; } static inline void xe_uc_fw_change_status(struct xe_uc_fw *uc_fw, diff --git a/drivers/gpu/drm/xe/xe_uc_fw_abi.h b/drivers/gpu/drm/xe/xe_uc_fw_abi.h index edae7bb3cd72..d6725c963251 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw_abi.h +++ b/drivers/gpu/drm/xe/xe_uc_fw_abi.h @@ -85,4 +85,124 @@ struct uc_css_header { } __packed; static_assert(sizeof(struct uc_css_header) == 128); +/** + * DOC: GSC-based Firmware Layout + * + * The GSC-based firmware structure is used for GSC releases on all platforms + * and for HuC releases starting from DG2/MTL. Older HuC releases use the + * CSS-based layout instead. Differently from the CSS headers, the GSC headers + * uses a directory + entries structure (i.e., there is array of addresses + * pointing to specific header extensions identified by a name). Although the + * header structures are the same, some of the entries are specific to GSC while + * others are specific to HuC. The manifest header entry, which includes basic + * information about the binary (like the version) is always present, but it is + * named differently based on the binary type. + * + * The HuC binary starts with a Code Partition Directory (CPD) header. The + * entries we're interested in for use in the driver are: + * + * 1. "HUCP.man": points to the manifest header for the HuC. + * 2. "huc_fw": points to the FW code. On platforms that support load via DMA + * and 2-step HuC authentication (i.e. MTL+) this is a full CSS-based binary, + * while if the GSC is the one doing the load (which only happens on DG2) + * this section only contains the uCode. + * + * The GSC-based HuC firmware layout looks like this:: + * + * +================================================+ + * | CPD Header | + * +================================================+ + * | CPD entries[] | + * | entry1 | + * | ... | + * | entryX | + * | "HUCP.man" | + * | ... | + * | offset >----------------------------|------o + * | ... | | + * | entryY | | + * | "huc_fw" | | + * | ... | | + * | offset >----------------------------|----------o + * +================================================+ | | + * | | + * +================================================+ | | + * | Manifest Header |<-----o | + * | ... | | + * | FW version | | + * | ... | | + * +================================================+ | + * | + * +================================================+ | + * | FW binary |<---------o + * | CSS (MTL+ only) | + * | uCode | + * | RSA Key (MTL+ only) | + * | ... | + * +================================================+ + */ + +struct gsc_version { + u16 major; + u16 minor; + u16 hotfix; + u16 build; +} __packed; + +/* Code partition directory (CPD) structures */ +struct gsc_cpd_header_v2 { + u32 header_marker; +#define GSC_CPD_HEADER_MARKER 0x44504324 + + u32 num_of_entries; + u8 header_version; + u8 entry_version; + u8 header_length; /* in bytes */ + u8 flags; + u32 partition_name; + u32 crc32; +} __packed; + +struct gsc_cpd_entry { + u8 name[12]; + + /* + * Bits 0-24: offset from the beginning of the code partition + * Bit 25: huffman compressed + * Bits 26-31: reserved + */ + u32 offset; +#define GSC_CPD_ENTRY_OFFSET_MASK GENMASK(24, 0) +#define GSC_CPD_ENTRY_HUFFMAN_COMP BIT(25) + + /* + * Module/Item length, in bytes. For Huffman-compressed modules, this + * refers to the uncompressed size. For software-compressed modules, + * this refers to the compressed size. + */ + u32 length; + + u8 reserved[4]; +} __packed; + +struct gsc_manifest_header { + u32 header_type; /* 0x4 for manifest type */ + u32 header_length; /* in dwords */ + u32 header_version; + u32 flags; + u32 vendor; + u32 date; + u32 size; /* In dwords, size of entire manifest (header + extensions) */ + u32 header_id; + u32 internal_data; + struct gsc_version fw_version; + u32 security_version; + struct gsc_version meu_kit_version; + u32 meu_manifest_version; + u8 general_data[4]; + u8 reserved3[56]; + u32 modulus_size; /* in dwords */ + u32 exponent_size; /* in dwords */ +} __packed; + #endif diff --git a/drivers/gpu/drm/xe/xe_uc_fw_types.h b/drivers/gpu/drm/xe/xe_uc_fw_types.h index 444bff83cdbe..1650599303c8 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw_types.h +++ b/drivers/gpu/drm/xe/xe_uc_fw_types.h @@ -113,6 +113,8 @@ struct xe_uc_fw { u32 rsa_size; /** @ucode_size: micro kernel size */ u32 ucode_size; + /** @css_offset: offset within the blob at which the CSS is located */ + u32 css_offset; /** @private_data_size: size of private data found in uC css header */ u32 private_data_size; -- cgit v1.2.3 From b77d8b5c5ec0673086f565f2c07ed6da081483b8 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Wed, 25 Oct 2023 10:57:43 -0700 Subject: drm/xe/huc: HuC is not supported on GTs that don't have video engines On MTL-style multi-gt platforms, the HuC is only available on the media GT, so we need to consider it as not supported on the render GT. Signed-off-by: Daniele Ceraolo Spurio Cc: Alan Previn Cc: John Harrison Reviewed-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_huc.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c index 293403d16f25..386efa180c1c 100644 --- a/drivers/gpu/drm/xe/xe_huc.c +++ b/drivers/gpu/drm/xe/xe_huc.c @@ -35,10 +35,19 @@ huc_to_guc(struct xe_huc *huc) int xe_huc_init(struct xe_huc *huc) { - struct xe_device *xe = huc_to_xe(huc); + struct xe_gt *gt = huc_to_gt(huc); + struct xe_tile *tile = gt_to_tile(gt); + struct xe_device *xe = gt_to_xe(gt); int ret; huc->fw.type = XE_UC_FW_TYPE_HUC; + + /* On platforms with a media GT the HuC is only available there */ + if (tile->media_gt && (gt != tile->media_gt)) { + xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_NOT_SUPPORTED); + return 0; + } + ret = xe_uc_fw_init(&huc->fw); if (ret) goto out; -- cgit v1.2.3 From 185f93f3041fe520c6df16a58bea116077d3f848 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Wed, 25 Oct 2023 10:57:44 -0700 Subject: drm/xe/huc: Don't re-auth HuC if it's already authenticated On newer platforms the HuC survives reset and stays authenticated, so no need to re-authenticate it. Signed-off-by: Daniele Ceraolo Spurio Cc: Alan Previn Cc: John Harrison Reviewed-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_huc.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c index 386efa180c1c..2f176badab26 100644 --- a/drivers/gpu/drm/xe/xe_huc.c +++ b/drivers/gpu/drm/xe/xe_huc.c @@ -83,6 +83,12 @@ int xe_huc_auth(struct xe_huc *huc) xe_assert(xe, !xe_uc_fw_is_running(&huc->fw)); + /* On newer platforms the HuC survives reset, so no need to re-auth */ + if (xe_mmio_read32(gt, HUC_KERNEL_LOAD_INFO) & HUC_LOAD_SUCCESSFUL) { + xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_RUNNING); + return 0; + } + if (!xe_uc_fw_is_loaded(&huc->fw)) return -ENOEXEC; -- cgit v1.2.3 From bfeb4ac55565f527f72e97020a244f8c3585154a Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Wed, 25 Oct 2023 10:57:45 -0700 Subject: drm/xe/huc: Define HuC for MTL MTL uses a versionless GSC-enabled binary. v2: don't use the filename to identify the header type (Lucas) v3: fix commit msg (Lucas) Signed-off-by: Daniele Ceraolo Spurio Cc: Alan Previn Cc: John Harrison Cc: Lucas De Marchi Reviewed-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_uc_fw.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index d1cc99f86ec0..91d4a2272ee7 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -112,12 +112,13 @@ struct fw_blobs_by_type { fw_def(ROCKETLAKE, major_ver(i915, guc, tgl, 70, 5)) \ fw_def(TIGERLAKE, major_ver(i915, guc, tgl, 70, 5)) -#define XE_HUC_FIRMWARE_DEFS(fw_def, mmp_ver, no_ver) \ - fw_def(DG1, no_ver(i915, huc, dg1)) \ - fw_def(ALDERLAKE_P, no_ver(i915, huc, tgl)) \ - fw_def(ALDERLAKE_S, no_ver(i915, huc, tgl)) \ - fw_def(ROCKETLAKE, no_ver(i915, huc, tgl)) \ - fw_def(TIGERLAKE, no_ver(i915, huc, tgl)) +#define XE_HUC_FIRMWARE_DEFS(fw_def, mmp_ver, no_ver) \ + fw_def(METEORLAKE, no_ver(i915, huc_gsc, mtl)) \ + fw_def(DG1, no_ver(i915, huc, dg1)) \ + fw_def(ALDERLAKE_P, no_ver(i915, huc, tgl)) \ + fw_def(ALDERLAKE_S, no_ver(i915, huc, tgl)) \ + fw_def(ROCKETLAKE, no_ver(i915, huc, tgl)) \ + fw_def(TIGERLAKE, no_ver(i915, huc, tgl)) #define MAKE_FW_PATH(dir__, uc__, shortname__, version__) \ __stringify(dir__) "/" __stringify(shortname__) "_" __stringify(uc__) version__ ".bin" -- cgit v1.2.3 From 65e95735882329632559cf71c9efbb4981473b07 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 24 Oct 2023 15:04:12 -0700 Subject: drm/xe: Fix WA 14010918519 write to wrong register FORCE_SLM_FENCE_SCOPE_TO_TILE and FORCE_UGM_FENCE_SCOPE_TO_TILE are in the up dword of LSC_CHICKEN_BIT_0 register. Also, the 14010918519 workaround only applies to early steppings, A*. Eventually those should be dropped, like they were in commit eaeb4b361452 ("drm/i915/dg2: Drop pre-production GT workarounds"), so let's make sure they are annotated appropriately. Reviewed-by: Gustavo Sousa Link: https://lore.kernel.org/r/20231024220412.223868-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_wa.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index ccb075aac7da..ce61609b001c 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -367,9 +367,9 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, FORCE_1_SUB_MESSAGE_PER_FRAGMENT)) }, { XE_RTP_NAME("14010918519"), - XE_RTP_RULES(SUBPLATFORM(DG2, G10), + XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0), FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, FORCE_SLM_FENCE_SCOPE_TO_TILE | FORCE_UGM_FENCE_SCOPE_TO_TILE, /* -- cgit v1.2.3 From b8d70702def26d7597eded092fe43cc584c0d064 Mon Sep 17 00:00:00 2001 From: Priyanka Dandamudi Date: Fri, 27 Oct 2023 10:55:07 +0530 Subject: drm/xe/xe_exec_queue: Add check for access counter granularity Add conditional check for access counter granularity. This check will return -EINVAL if granularity is beyond 64M which is a hardware limitation. v2: Defined XE_ACC_GRANULARITY_128K 0 XE_ACC_GRANULARITY_2M 1 XE_ACC_GRANULARITY_16M 2 XE_ACC_GRANULARITY_64M 3 as part of uAPI. So, that user can also use it.(Oak) v3: Move uAPI to proper location and give proper documentation.(Brian, Oak) Cc: Oak Zeng Cc: Janga Rahul Kumar Cc: Brian Welty Signed-off-by: Priyanka Dandamudi Reviewed-by: Oak Zeng Reviewed-by: Oak Zeng Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_exec_queue.c | 3 +++ include/uapi/drm/xe_drm.h | 14 ++++++++++++++ 2 files changed, 17 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 8e0620cb89e5..f67a6dee4a6f 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -393,6 +393,9 @@ static int exec_queue_set_acc_granularity(struct xe_device *xe, struct xe_exec_q if (XE_IOCTL_DBG(xe, !xe->info.supports_usm)) return -EINVAL; + if (value > XE_ACC_GRANULARITY_64M) + return -EINVAL; + q->usm.acc_granularity = value; return 0; diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 24bf8f0f52e8..9bd7092a7ea4 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -731,6 +731,20 @@ struct drm_xe_vm_bind { __u64 reserved[2]; }; +/* For use with XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY */ + +/* Monitor 128KB contiguous region with 4K sub-granularity */ +#define XE_ACC_GRANULARITY_128K 0 + +/* Monitor 2MB contiguous region with 64KB sub-granularity */ +#define XE_ACC_GRANULARITY_2M 1 + +/* Monitor 16MB contiguous region with 512KB sub-granularity */ +#define XE_ACC_GRANULARITY_16M 2 + +/* Monitor 64MB contiguous region with 2M sub-granularity */ +#define XE_ACC_GRANULARITY_64M 3 + /** * struct drm_xe_exec_queue_set_property - exec queue set property * -- cgit v1.2.3 From 7eea3fb67a30a81c1751097753885657a1ace021 Mon Sep 17 00:00:00 2001 From: Shekhar Chauhan Date: Mon, 30 Oct 2023 20:37:56 +0530 Subject: drm/xe/xelpmp: Extend Wa_22016670082 to Xe_LPM+ Add Xe_LPM+ support to an existing workaround. BSpec: 51762 Signed-off-by: Shekhar Chauhan Link: https://lore.kernel.org/r/20231030150756.1011777-1-shekhar.chauhan@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 1 + drivers/gpu/drm/xe/xe_wa.c | 7 +++++++ 2 files changed, 8 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index b00fe089525a..7a6407e38265 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -152,6 +152,7 @@ #define POLYGON_TRIFAN_LINELOOP_DISABLE REG_BIT(4) #define SQCNT1 XE_REG_MCR(0x8718) +#define XELPMP_SQCNT1 XE_REG(0x8718) #define ENFORCE_RAR REG_BIT(23) #define XEHP_SQCM XE_REG_MCR(0x8724) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index ce61609b001c..2f1782db267b 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -246,6 +246,13 @@ static const struct xe_rtp_entry_sr gt_was[] = { XE_RTP_ACTIONS(SET(SQCNT1, ENFORCE_RAR)) }, + /* Xe_LPM+ */ + + { XE_RTP_NAME("22016670082"), + XE_RTP_RULES(MEDIA_VERSION(1300)), + XE_RTP_ACTIONS(SET(XELPMP_SQCNT1, ENFORCE_RAR)) + }, + /* Xe2_LPG */ { XE_RTP_NAME("16020975621"), -- cgit v1.2.3 From 4202dd9fc43e9d9dba54e1b72a301108cdec84fb Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 25 Oct 2023 18:39:39 +0100 Subject: drm/xe/migrate: fix MI_ARB_ON_OFF usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Spec says: "This is a privileged command; it will not be effective (will be converted to a no-op) if executed from within a non-privileged batch buffer." However here it looks like we are just emitting it inside some bb which was jumped to via the ppGTT, which should be considered a non-privileged address space. It looks like we just need some way of preventing things like the emit_pte() and later copy/clear being preempted in-between so rather just emit directly in the ring for migration jobs. Bspec: 45716 Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Matthew Brost Reviewed-by: Matt Roper Reviewed-by: Thomas Hellström Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_migrate.c | 16 ---------------- drivers/gpu/drm/xe/xe_ring_ops.c | 2 ++ 2 files changed, 2 insertions(+), 16 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 731beb622fe8..67b71244b1f2 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -406,12 +406,6 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) return m; } -static void emit_arb_clear(struct xe_bb *bb) -{ - /* 1 dword */ - bb->cs[bb->len++] = MI_ARB_ON_OFF | MI_ARB_DISABLE; -} - static u64 xe_migrate_res_sizes(struct xe_res_cursor *cur) { /* @@ -745,10 +739,6 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, goto err_sync; } - /* Preemption is enabled again by the ring ops. */ - if (!src_is_vram || !dst_is_vram) - emit_arb_clear(bb); - if (!src_is_vram) emit_pte(m, bb, src_L0_pt, src_is_vram, &src_it, src_L0, src_bo); @@ -994,7 +984,6 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, /* Preemption is enabled again by the ring ops. */ if (!clear_vram) { - emit_arb_clear(bb); emit_pte(m, bb, clear_L0_pt, clear_vram, &src_it, clear_L0, bo); } else { @@ -1285,9 +1274,6 @@ xe_migrate_update_pgtables(struct xe_migrate *m, VM_SA_UPDATE_UNIT_SIZE; } - /* Preemption is enabled again by the ring ops. */ - emit_arb_clear(bb); - /* Map our PT's to gtt */ bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(num_updates); bb->cs[bb->len++] = ppgtt_ofs * XE_PAGE_SIZE + page_ofs; @@ -1316,8 +1302,6 @@ xe_migrate_update_pgtables(struct xe_migrate *m, bb->cs[bb->len++] = MI_BATCH_BUFFER_END; update_idx = bb->len; - /* Preemption is enabled again by the ring ops. */ - emit_arb_clear(bb); for (i = 0; i < num_updates; i++) write_pgtable(tile, bb, 0, &updates[i], pt_update); } diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index 58676f4b989f..59e0aa2d6a4c 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -355,6 +355,8 @@ static void emit_migration_job_gen12(struct xe_sched_job *job, i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), seqno, dw, i); + dw[i++] = MI_ARB_ON_OFF | MI_ARB_DISABLE; /* Enabled again below */ + i = emit_bb_start(job->batch_addr[0], BIT(8), dw, i); /* XXX: Do we need this? Leaving for now. */ -- cgit v1.2.3 From a667cf56dbd64e35f8f34ec47549888fa28878fb Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 25 Oct 2023 18:39:40 +0100 Subject: drm/xe/bo: consider dma-resv fences for clear job MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There could be active fences already in the dma-resv for the object prior to clearing. Make sure to input them as dependencies for the clear job. v2 (Matt B): - We can use USAGE_KERNEL here, since it's only the move fences we care about here. Also add a comment. Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Matthew Brost Reviewed-by: Thomas Hellström Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_migrate.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 67b71244b1f2..53b5b36aca66 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -980,8 +980,6 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, size -= clear_L0; - /* TODO: Add dependencies here */ - /* Preemption is enabled again by the ring ops. */ if (!clear_vram) { emit_pte(m, bb, clear_L0_pt, clear_vram, &src_it, clear_L0, @@ -1010,6 +1008,18 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, } xe_sched_job_add_migrate_flush(job, flush_flags); + if (!fence) { + /* + * There can't be anything userspace related at this + * point, so we just need to respect any potential move + * fences, which are always tracked as + * DMA_RESV_USAGE_KERNEL. + */ + err = job_add_deps(job, bo->ttm.base.resv, + DMA_RESV_USAGE_KERNEL); + if (err) + goto err_job; + } xe_sched_job_arm(job); dma_fence_put(fence); @@ -1024,6 +1034,8 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, xe_bb_free(bb, fence); continue; +err_job: + xe_sched_job_put(job); err: mutex_unlock(&m->job_mutex); xe_bb_free(bb, NULL); -- cgit v1.2.3 From 503a6f4e4f961acbbcac8d36f51226f3d3cfa7b7 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 25 Oct 2023 18:39:41 +0100 Subject: drm/xe/bo: sync kernel fences for KMD buffers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With things like pipelined evictions, VRAM pages can be marked as free and yet still have some active kernel fences, with the idea that the next caller to allocate the memory will respect them. However it looks like we are missing synchronisation for KMD internal buffers, like page-tables, lrc etc. For userspace objects we should already have the required synchronisation for CPU access via the fault handler, and likewise for GPU access when vm_binding them. To fix this synchronise against any kernel fences for all KMD objects at creation. This should resolve some severe corruption seen during evictions. v2 (Matt B): - Revamp the comment explaining this. Also mention why USAGE_KERNEL is correct here. v3 (Thomas): - Make sure to use ctx.interruptible for the wait. Testcase: igt@xe-evict-ccs Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/853 Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/855 Reported-by: Zbigniew Kempczyński Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Matthew Brost Reviewed-by: Thomas Hellström Tested-by: Zbigniew Kempczyński Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 1a10d9324a07..4467d711aa1f 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1269,6 +1269,37 @@ struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, if (err) return ERR_PTR(err); + /* + * The VRAM pages underneath are potentially still being accessed by the + * GPU, as per async GPU clearing and async evictions. However TTM makes + * sure to add any corresponding move/clear fences into the objects + * dma-resv using the DMA_RESV_USAGE_KERNEL slot. + * + * For KMD internal buffers we don't care about GPU clearing, however we + * still need to handle async evictions, where the VRAM is still being + * accessed by the GPU. Most internal callers are not expecting this, + * since they are missing the required synchronisation before accessing + * the memory. To keep things simple just sync wait any kernel fences + * here, if the buffer is designated KMD internal. + * + * For normal userspace objects we should already have the required + * pipelining or sync waiting elsewhere, since we already have to deal + * with things like async GPU clearing. + */ + if (type == ttm_bo_type_kernel) { + long timeout = dma_resv_wait_timeout(bo->ttm.base.resv, + DMA_RESV_USAGE_KERNEL, + ctx.interruptible, + MAX_SCHEDULE_TIMEOUT); + + if (timeout < 0) { + if (!resv) + dma_resv_unlock(bo->ttm.base.resv); + xe_bo_put(bo); + return ERR_PTR(timeout); + } + } + bo->created = true; if (bulk) ttm_bo_set_bulk_move(&bo->ttm, bulk); -- cgit v1.2.3 From b42ff0462d9eb7b84e31152c63c9809b6f743bf8 Mon Sep 17 00:00:00 2001 From: Badal Nilawar Date: Mon, 30 Oct 2023 17:26:16 +0530 Subject: drm/xe/hwmon: Add kernel doc and refactor xe hwmon Add kernel doc and refactor some of the hwmon functions, there is no functionality change. Cc: Anshuman Gupta Cc: Ashutosh Dixit Signed-off-by: Badal Nilawar Reviewed-by: Anshuman Gupta Link: https://patchwork.freedesktop.org/patch/msgid/20231030115618.1382200-2-badal.nilawar@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_hwmon.c | 201 +++++++++++++++++++----------------------- 1 file changed, 91 insertions(+), 110 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index 9ac05994a967..9b7773441f62 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -26,9 +26,9 @@ enum xe_hwmon_reg { }; enum xe_hwmon_reg_operation { - REG_READ, - REG_WRITE, - REG_RMW, + REG_READ32, + REG_RMW32, + REG_READ64, }; /* @@ -39,18 +39,32 @@ enum xe_hwmon_reg_operation { #define SF_VOLTAGE 1000 /* millivolts */ #define SF_ENERGY 1000000 /* microjoules */ +/** + * struct xe_hwmon_energy_info - to accumulate energy + */ struct xe_hwmon_energy_info { + /** @reg_val_prev: previous energy reg val */ u32 reg_val_prev; - long accum_energy; /* Accumulated energy for energy1_input */ + /** @accum_energy: accumulated energy */ + long accum_energy; }; +/** + * struct xe_hwmon - xe hwmon data structure + */ struct xe_hwmon { + /** @hwmon_dev: hwmon device for xe */ struct device *hwmon_dev; + /** @gt: primary gt */ struct xe_gt *gt; - struct mutex hwmon_lock; /* rmw operations*/ + /** @hwmon_lock: lock for rmw operations */ + struct mutex hwmon_lock; + /** @scl_shift_power: pkg power unit */ int scl_shift_power; + /** @scl_shift_energy: pkg energy unit */ int scl_shift_energy; - struct xe_hwmon_energy_info ei; /* Energy info for energy1_input */ + /** @ei: Energy info for energy1_input */ + struct xe_hwmon_energy_info ei; }; static u32 xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg) @@ -95,49 +109,34 @@ static u32 xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg) return reg.raw; } -static int xe_hwmon_process_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg, - enum xe_hwmon_reg_operation operation, u32 *value, - u32 clr, u32 set) +static void xe_hwmon_process_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg, + enum xe_hwmon_reg_operation operation, u64 *value, + u32 clr, u32 set) { struct xe_reg reg; reg.raw = xe_hwmon_get_reg(hwmon, hwmon_reg); if (!reg.raw) - return -EOPNOTSUPP; + return; switch (operation) { - case REG_READ: + case REG_READ32: *value = xe_mmio_read32(hwmon->gt, reg); - return 0; - case REG_WRITE: - xe_mmio_write32(hwmon->gt, reg, *value); - return 0; - case REG_RMW: + break; + case REG_RMW32: *value = xe_mmio_rmw32(hwmon->gt, reg, clr, set); - return 0; + break; + case REG_READ64: + *value = xe_mmio_read64_2x32(hwmon->gt, reg); + break; default: drm_warn(>_to_xe(hwmon->gt)->drm, "Invalid xe hwmon reg operation: %d\n", operation); - return -EOPNOTSUPP; + break; } } -static int xe_hwmon_process_reg_read64(struct xe_hwmon *hwmon, - enum xe_hwmon_reg hwmon_reg, u64 *value) -{ - struct xe_reg reg; - - reg.raw = xe_hwmon_get_reg(hwmon, hwmon_reg); - - if (!reg.raw) - return -EOPNOTSUPP; - - *value = xe_mmio_read64_2x32(hwmon->gt, reg); - - return 0; -} - #define PL1_DISABLE 0 /* @@ -146,42 +145,39 @@ static int xe_hwmon_process_reg_read64(struct xe_hwmon *hwmon, * same pattern for sysfs, allow arbitrary PL1 limits to be set but display * clamped values when read. */ -static int xe_hwmon_power_max_read(struct xe_hwmon *hwmon, long *value) +static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, long *value) { - u32 reg_val; - u64 reg_val64, min, max; + u64 reg_val, min, max; - xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_READ, ®_val, 0, 0); + xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_READ32, ®_val, 0, 0); /* Check if PL1 limit is disabled */ if (!(reg_val & PKG_PWR_LIM_1_EN)) { *value = PL1_DISABLE; - return 0; + return; } reg_val = REG_FIELD_GET(PKG_PWR_LIM_1, reg_val); *value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power); - xe_hwmon_process_reg_read64(hwmon, REG_PKG_POWER_SKU, ®_val64); - min = REG_FIELD_GET(PKG_MIN_PWR, reg_val64); + xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU, REG_READ64, ®_val, 0, 0); + min = REG_FIELD_GET(PKG_MIN_PWR, reg_val); min = mul_u64_u32_shr(min, SF_POWER, hwmon->scl_shift_power); - max = REG_FIELD_GET(PKG_MAX_PWR, reg_val64); + max = REG_FIELD_GET(PKG_MAX_PWR, reg_val); max = mul_u64_u32_shr(max, SF_POWER, hwmon->scl_shift_power); if (min && max) *value = clamp_t(u64, *value, min, max); - - return 0; } static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, long value) { - u32 reg_val; + u64 reg_val; /* Disable PL1 limit and verify, as limit cannot be disabled on all platforms */ if (value == PL1_DISABLE) { - xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_RMW, ®_val, + xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_RMW32, ®_val, PKG_PWR_LIM_1_EN, 0); - xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_READ, ®_val, + xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_READ32, ®_val, PKG_PWR_LIM_1_EN, 0); if (reg_val & PKG_PWR_LIM_1_EN) @@ -192,21 +188,19 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, long value) reg_val = DIV_ROUND_CLOSEST_ULL((u64)value << hwmon->scl_shift_power, SF_POWER); reg_val = PKG_PWR_LIM_1_EN | REG_FIELD_PREP(PKG_PWR_LIM_1, reg_val); - xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_RMW, ®_val, + xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_RMW32, ®_val, PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, reg_val); return 0; } -static int xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, long *value) +static void xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, long *value) { - u32 reg_val; + u64 reg_val; - xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU, REG_READ, ®_val, 0, 0); + xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU, REG_READ32, ®_val, 0, 0); reg_val = REG_FIELD_GET(PKG_TDP, reg_val); *value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power); - - return 0; } /* @@ -233,13 +227,11 @@ static void xe_hwmon_energy_get(struct xe_hwmon *hwmon, long *energy) { struct xe_hwmon_energy_info *ei = &hwmon->ei; - u32 reg_val; - - xe_device_mem_access_get(gt_to_xe(hwmon->gt)); + u64 reg_val; mutex_lock(&hwmon->hwmon_lock); - xe_hwmon_process_reg(hwmon, REG_PKG_ENERGY_STATUS, REG_READ, + xe_hwmon_process_reg(hwmon, REG_PKG_ENERGY_STATUS, REG_READ32, ®_val, 0, 0); if (reg_val >= ei->reg_val_prev) @@ -253,8 +245,6 @@ xe_hwmon_energy_get(struct xe_hwmon *hwmon, long *energy) hwmon->scl_shift_energy); mutex_unlock(&hwmon->hwmon_lock); - - xe_device_mem_access_put(gt_to_xe(hwmon->gt)); } static const struct hwmon_channel_info *hwmon_info[] = { @@ -284,16 +274,39 @@ static int xe_hwmon_pcode_write_i1(struct xe_gt *gt, u32 uval) uval); } -static int xe_hwmon_get_voltage(struct xe_hwmon *hwmon, long *value) +static int xe_hwmon_power_curr_crit_read(struct xe_hwmon *hwmon, long *value, u32 scale_factor) { - u32 reg_val; + int ret; + u32 uval; + + ret = xe_hwmon_pcode_read_i1(hwmon->gt, &uval); + if (ret) + return ret; + + *value = mul_u64_u32_shr(REG_FIELD_GET(POWER_SETUP_I1_DATA_MASK, uval), + scale_factor, POWER_SETUP_I1_SHIFT); + return ret; +} + +static int xe_hwmon_power_curr_crit_write(struct xe_hwmon *hwmon, long value, u32 scale_factor) +{ + int ret; + u32 uval; + + uval = DIV_ROUND_CLOSEST_ULL(value << POWER_SETUP_I1_SHIFT, scale_factor); + ret = xe_hwmon_pcode_write_i1(hwmon->gt, uval); + + return ret; +} + +static void xe_hwmon_get_voltage(struct xe_hwmon *hwmon, long *value) +{ + u64 reg_val; xe_hwmon_process_reg(hwmon, REG_GT_PERF_STATUS, - REG_READ, ®_val, 0, 0); + REG_READ32, ®_val, 0, 0); /* HW register value in units of 2.5 millivolt */ *value = DIV_ROUND_CLOSEST(REG_FIELD_GET(VOLTAGE_MASK, reg_val) * 2500, SF_VOLTAGE); - - return 0; } static umode_t @@ -317,23 +330,15 @@ xe_hwmon_power_is_visible(struct xe_hwmon *hwmon, u32 attr, int chan) static int xe_hwmon_power_read(struct xe_hwmon *hwmon, u32 attr, int chan, long *val) { - int ret; - u32 uval; - switch (attr) { case hwmon_power_max: - return xe_hwmon_power_max_read(hwmon, val); + xe_hwmon_power_max_read(hwmon, val); + return 0; case hwmon_power_rated_max: - return xe_hwmon_power_rated_max_read(hwmon, val); - case hwmon_power_crit: - ret = xe_hwmon_pcode_read_i1(hwmon->gt, &uval); - if (ret) - return ret; - if (!(uval & POWER_SETUP_I1_WATTS)) - return -ENODEV; - *val = mul_u64_u32_shr(REG_FIELD_GET(POWER_SETUP_I1_DATA_MASK, uval), - SF_POWER, POWER_SETUP_I1_SHIFT); + xe_hwmon_power_rated_max_read(hwmon, val); return 0; + case hwmon_power_crit: + return xe_hwmon_power_curr_crit_read(hwmon, val, SF_POWER); default: return -EOPNOTSUPP; } @@ -342,14 +347,11 @@ xe_hwmon_power_read(struct xe_hwmon *hwmon, u32 attr, int chan, long *val) static int xe_hwmon_power_write(struct xe_hwmon *hwmon, u32 attr, int chan, long val) { - u32 uval; - switch (attr) { case hwmon_power_max: return xe_hwmon_power_max_write(hwmon, val); case hwmon_power_crit: - uval = DIV_ROUND_CLOSEST_ULL(val << POWER_SETUP_I1_SHIFT, SF_POWER); - return xe_hwmon_pcode_write_i1(hwmon->gt, uval); + return xe_hwmon_power_curr_crit_write(hwmon, val, SF_POWER); default: return -EOPNOTSUPP; } @@ -372,19 +374,9 @@ xe_hwmon_curr_is_visible(const struct xe_hwmon *hwmon, u32 attr) static int xe_hwmon_curr_read(struct xe_hwmon *hwmon, u32 attr, long *val) { - int ret; - u32 uval; - switch (attr) { case hwmon_curr_crit: - ret = xe_hwmon_pcode_read_i1(hwmon->gt, &uval); - if (ret) - return ret; - if (uval & POWER_SETUP_I1_WATTS) - return -ENODEV; - *val = mul_u64_u32_shr(REG_FIELD_GET(POWER_SETUP_I1_DATA_MASK, uval), - SF_CURR, POWER_SETUP_I1_SHIFT); - return 0; + return xe_hwmon_power_curr_crit_read(hwmon, val, SF_CURR); default: return -EOPNOTSUPP; } @@ -393,12 +385,9 @@ xe_hwmon_curr_read(struct xe_hwmon *hwmon, u32 attr, long *val) static int xe_hwmon_curr_write(struct xe_hwmon *hwmon, u32 attr, long val) { - u32 uval; - switch (attr) { case hwmon_curr_crit: - uval = DIV_ROUND_CLOSEST_ULL(val << POWER_SETUP_I1_SHIFT, SF_CURR); - return xe_hwmon_pcode_write_i1(hwmon->gt, uval); + return xe_hwmon_power_curr_crit_write(hwmon, val, SF_CURR); default: return -EOPNOTSUPP; } @@ -418,21 +407,13 @@ xe_hwmon_in_is_visible(struct xe_hwmon *hwmon, u32 attr) static int xe_hwmon_in_read(struct xe_hwmon *hwmon, u32 attr, long *val) { - int ret; - - xe_device_mem_access_get(gt_to_xe(hwmon->gt)); - switch (attr) { case hwmon_in_input: - ret = xe_hwmon_get_voltage(hwmon, val); - break; + xe_hwmon_get_voltage(hwmon, val); + return 0; default: - ret = -EOPNOTSUPP; + return -EOPNOTSUPP; } - - xe_device_mem_access_put(gt_to_xe(hwmon->gt)); - - return ret; } static umode_t @@ -564,15 +545,15 @@ xe_hwmon_get_preregistration_info(struct xe_device *xe) { struct xe_hwmon *hwmon = xe->hwmon; long energy; - u32 val_sku_unit = 0; - int ret; + u64 val_sku_unit = 0; - ret = xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU_UNIT, REG_READ, &val_sku_unit, 0, 0); /* * The contents of register PKG_POWER_SKU_UNIT do not change, * so read it once and store the shift values. */ - if (!ret) { + if (xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT)) { + xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU_UNIT, + REG_READ32, &val_sku_unit, 0, 0); hwmon->scl_shift_power = REG_FIELD_GET(PKG_PWR_UNIT, val_sku_unit); hwmon->scl_shift_energy = REG_FIELD_GET(PKG_ENERGY_UNIT, val_sku_unit); } -- cgit v1.2.3 From fef6dd12b45a1a15c24c9df30fb2c27e68984665 Mon Sep 17 00:00:00 2001 From: Badal Nilawar Date: Mon, 30 Oct 2023 17:26:17 +0530 Subject: drm/xe/hwmon: Protect hwmon rw attributes with hwmon_lock Take hwmon_lock while accessing hwmon rw attributes. For readonly attributes its not required to take lock as reads are protected by sysfs layer and therefore sequential. Cc: Ashutosh Dixit Cc: Anshuman Gupta Signed-off-by: Badal Nilawar Reviewed-by: Anshuman Gupta Link: https://patchwork.freedesktop.org/patch/msgid/20231030115618.1382200-3-badal.nilawar@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_hwmon.c | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index 9b7773441f62..7b4f88fdf647 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -57,7 +57,7 @@ struct xe_hwmon { struct device *hwmon_dev; /** @gt: primary gt */ struct xe_gt *gt; - /** @hwmon_lock: lock for rmw operations */ + /** @hwmon_lock: lock for rw attributes*/ struct mutex hwmon_lock; /** @scl_shift_power: pkg power unit */ int scl_shift_power; @@ -149,11 +149,13 @@ static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, long *value) { u64 reg_val, min, max; + mutex_lock(&hwmon->hwmon_lock); + xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_READ32, ®_val, 0, 0); /* Check if PL1 limit is disabled */ if (!(reg_val & PKG_PWR_LIM_1_EN)) { *value = PL1_DISABLE; - return; + goto unlock; } reg_val = REG_FIELD_GET(PKG_PWR_LIM_1, reg_val); @@ -167,12 +169,17 @@ static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, long *value) if (min && max) *value = clamp_t(u64, *value, min, max); +unlock: + mutex_unlock(&hwmon->hwmon_lock); } static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, long value) { + int ret = 0; u64 reg_val; + mutex_lock(&hwmon->hwmon_lock); + /* Disable PL1 limit and verify, as limit cannot be disabled on all platforms */ if (value == PL1_DISABLE) { xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_RMW32, ®_val, @@ -180,8 +187,10 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, long value) xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_READ32, ®_val, PKG_PWR_LIM_1_EN, 0); - if (reg_val & PKG_PWR_LIM_1_EN) - return -EOPNOTSUPP; + if (reg_val & PKG_PWR_LIM_1_EN) { + ret = -EOPNOTSUPP; + goto unlock; + } } /* Computation in 64-bits to avoid overflow. Round to nearest. */ @@ -190,8 +199,9 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, long value) xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_RMW32, ®_val, PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, reg_val); - - return 0; +unlock: + mutex_unlock(&hwmon->hwmon_lock); + return ret; } static void xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, long *value) @@ -229,8 +239,6 @@ xe_hwmon_energy_get(struct xe_hwmon *hwmon, long *energy) struct xe_hwmon_energy_info *ei = &hwmon->ei; u64 reg_val; - mutex_lock(&hwmon->hwmon_lock); - xe_hwmon_process_reg(hwmon, REG_PKG_ENERGY_STATUS, REG_READ32, ®_val, 0, 0); @@ -243,8 +251,6 @@ xe_hwmon_energy_get(struct xe_hwmon *hwmon, long *energy) *energy = mul_u64_u32_shr(ei->accum_energy, SF_ENERGY, hwmon->scl_shift_energy); - - mutex_unlock(&hwmon->hwmon_lock); } static const struct hwmon_channel_info *hwmon_info[] = { @@ -279,12 +285,16 @@ static int xe_hwmon_power_curr_crit_read(struct xe_hwmon *hwmon, long *value, u3 int ret; u32 uval; + mutex_lock(&hwmon->hwmon_lock); + ret = xe_hwmon_pcode_read_i1(hwmon->gt, &uval); if (ret) - return ret; + goto unlock; *value = mul_u64_u32_shr(REG_FIELD_GET(POWER_SETUP_I1_DATA_MASK, uval), scale_factor, POWER_SETUP_I1_SHIFT); +unlock: + mutex_unlock(&hwmon->hwmon_lock); return ret; } @@ -293,9 +303,12 @@ static int xe_hwmon_power_curr_crit_write(struct xe_hwmon *hwmon, long value, u3 int ret; u32 uval; + mutex_lock(&hwmon->hwmon_lock); + uval = DIV_ROUND_CLOSEST_ULL(value << POWER_SETUP_I1_SHIFT, scale_factor); ret = xe_hwmon_pcode_write_i1(hwmon->gt, uval); + mutex_unlock(&hwmon->hwmon_lock); return ret; } -- cgit v1.2.3 From 4446fcf220ceab4f6d0cc4ae3b1338a0ceeeb72e Mon Sep 17 00:00:00 2001 From: Badal Nilawar Date: Mon, 30 Oct 2023 17:26:18 +0530 Subject: drm/xe/hwmon: Expose power1_max_interval Expose power1_max_interval, that is the tau corresponding to PL1, as a custom hwmon attribute. Some bit manipulation is needed because of the format of PKG_PWR_LIM_1_TIME in PACKAGE_RAPL_LIMIT register (1.x * power(2,y)) v2: Get rpm wake ref while accessing power1_max_interval v3: %s/hwmon/xe_hwmon/ v4: - As power1_max_interval is rw attr take lock in read function as well - Refine comment about val to fix point conversion (Andi) - Update kernel version and date in doc v5: Fix review comments (Anshuman) Acked-by: Rodrigo Vivi Reviewed-by: Himal Prasad Ghimiray Reviewed-by: Anshuman Gupta Signed-off-by: Badal Nilawar Link: https://patchwork.freedesktop.org/patch/msgid/20231030115618.1382200-4-badal.nilawar@intel.com Signed-off-by: Rodrigo Vivi --- .../ABI/testing/sysfs-driver-intel-xe-hwmon | 9 ++ drivers/gpu/drm/xe/regs/xe_mchbar_regs.h | 8 ++ drivers/gpu/drm/xe/xe_hwmon.c | 153 ++++++++++++++++++++- 3 files changed, 169 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon index 1a7a6c23e141..8c321bc9dc04 100644 --- a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon +++ b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon @@ -59,3 +59,12 @@ Contact: intel-xe@lists.freedesktop.org Description: RO. Energy input of device in microjoules. Only supported for particular Intel xe graphics platforms. + +What: /sys/devices/.../hwmon/hwmon/power1_max_interval +Date: October 2023 +KernelVersion: 6.6 +Contact: intel-xe@lists.freedesktop.org +Description: RW. Sustained power limit interval (Tau in PL1/Tau) in + milliseconds over which sustained power is averaged. + + Only supported for particular Intel xe graphics platforms. diff --git a/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h b/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h index d8ecbe1858d1..519dd1067a19 100644 --- a/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h @@ -22,15 +22,23 @@ #define PKG_TDP GENMASK_ULL(14, 0) #define PKG_MIN_PWR GENMASK_ULL(30, 16) #define PKG_MAX_PWR GENMASK_ULL(46, 32) +#define PKG_MAX_WIN GENMASK_ULL(54, 48) +#define PKG_MAX_WIN_X GENMASK_ULL(54, 53) +#define PKG_MAX_WIN_Y GENMASK_ULL(52, 48) + #define PCU_CR_PACKAGE_POWER_SKU_UNIT XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x5938) #define PKG_PWR_UNIT REG_GENMASK(3, 0) #define PKG_ENERGY_UNIT REG_GENMASK(12, 8) +#define PKG_TIME_UNIT REG_GENMASK(19, 16) #define PCU_CR_PACKAGE_ENERGY_STATUS XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x593c) #define PCU_CR_PACKAGE_RAPL_LIMIT XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x59a0) #define PKG_PWR_LIM_1 REG_GENMASK(14, 0) #define PKG_PWR_LIM_1_EN REG_BIT(15) +#define PKG_PWR_LIM_1_TIME REG_GENMASK(23, 17) +#define PKG_PWR_LIM_1_TIME_X REG_GENMASK(23, 22) +#define PKG_PWR_LIM_1_TIME_Y REG_GENMASK(21, 17) #endif /* _XE_MCHBAR_REGS_H_ */ diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index 7b4f88fdf647..6ef2aa1eae8b 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -38,6 +38,7 @@ enum xe_hwmon_reg_operation { #define SF_CURR 1000 /* milliamperes */ #define SF_VOLTAGE 1000 /* millivolts */ #define SF_ENERGY 1000000 /* microjoules */ +#define SF_TIME 1000 /* milliseconds */ /** * struct xe_hwmon_energy_info - to accumulate energy @@ -63,6 +64,8 @@ struct xe_hwmon { int scl_shift_power; /** @scl_shift_energy: pkg energy unit */ int scl_shift_energy; + /** @scl_shift_time: pkg time unit */ + int scl_shift_time; /** @ei: Energy info for energy1_input */ struct xe_hwmon_energy_info ei; }; @@ -253,6 +256,152 @@ xe_hwmon_energy_get(struct xe_hwmon *hwmon, long *energy) hwmon->scl_shift_energy); } +static ssize_t +xe_hwmon_power1_max_interval_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct xe_hwmon *hwmon = dev_get_drvdata(dev); + u32 x, y, x_w = 2; /* 2 bits */ + u64 r, tau4, out; + + xe_device_mem_access_get(gt_to_xe(hwmon->gt)); + + mutex_lock(&hwmon->hwmon_lock); + + xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, + REG_READ32, &r, 0, 0); + + mutex_unlock(&hwmon->hwmon_lock); + + xe_device_mem_access_put(gt_to_xe(hwmon->gt)); + + x = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_X, r); + y = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_Y, r); + + /* + * tau = 1.x * power(2,y), x = bits(23:22), y = bits(21:17) + * = (4 | x) << (y - 2) + * + * Here (y - 2) ensures a 1.x fixed point representation of 1.x + * As x is 2 bits so 1.x can be 1.0, 1.25, 1.50, 1.75 + * + * As y can be < 2, we compute tau4 = (4 | x) << y + * and then add 2 when doing the final right shift to account for units + */ + tau4 = ((1 << x_w) | x) << y; + + /* val in hwmon interface units (millisec) */ + out = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w); + + return sysfs_emit(buf, "%llu\n", out); +} + +static ssize_t +xe_hwmon_power1_max_interval_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct xe_hwmon *hwmon = dev_get_drvdata(dev); + u32 x, y, rxy, x_w = 2; /* 2 bits */ + u64 tau4, r, max_win; + unsigned long val; + int ret; + + ret = kstrtoul(buf, 0, &val); + if (ret) + return ret; + + /* + * Max HW supported tau in '1.x * power(2,y)' format, x = 0, y = 0x12. + * The hwmon->scl_shift_time default of 0xa results in a max tau of 256 seconds. + * + * The ideal scenario is for PKG_MAX_WIN to be read from the PKG_PWR_SKU register. + * However, it is observed that existing discrete GPUs does not provide correct + * PKG_MAX_WIN value, therefore a using default constant value. For future discrete GPUs + * this may get resolved, in which case PKG_MAX_WIN should be obtained from PKG_PWR_SKU. + */ +#define PKG_MAX_WIN_DEFAULT 0x12ull + + /* + * val must be < max in hwmon interface units. The steps below are + * explained in xe_hwmon_power1_max_interval_show() + */ + r = FIELD_PREP(PKG_MAX_WIN, PKG_MAX_WIN_DEFAULT); + x = REG_FIELD_GET(PKG_MAX_WIN_X, r); + y = REG_FIELD_GET(PKG_MAX_WIN_Y, r); + tau4 = ((1 << x_w) | x) << y; + max_win = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w); + + if (val > max_win) + return -EINVAL; + + /* val in hw units */ + val = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_time, SF_TIME); + + /* + * Convert val to 1.x * power(2,y) + * y = ilog2(val) + * x = (val - (1 << y)) >> (y - 2) + */ + if (!val) { + y = 0; + x = 0; + } else { + y = ilog2(val); + x = (val - (1ul << y)) << x_w >> y; + } + + rxy = REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_X, x) | REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_Y, y); + + xe_device_mem_access_get(gt_to_xe(hwmon->gt)); + + mutex_lock(&hwmon->hwmon_lock); + + xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_RMW32, (u64 *)&r, + PKG_PWR_LIM_1_TIME, rxy); + + mutex_unlock(&hwmon->hwmon_lock); + + xe_device_mem_access_put(gt_to_xe(hwmon->gt)); + + return count; +} + +static SENSOR_DEVICE_ATTR(power1_max_interval, 0664, + xe_hwmon_power1_max_interval_show, + xe_hwmon_power1_max_interval_store, 0); + +static struct attribute *hwmon_attributes[] = { + &sensor_dev_attr_power1_max_interval.dev_attr.attr, + NULL +}; + +static umode_t xe_hwmon_attributes_visible(struct kobject *kobj, + struct attribute *attr, int index) +{ + struct device *dev = kobj_to_dev(kobj); + struct xe_hwmon *hwmon = dev_get_drvdata(dev); + int ret = 0; + + xe_device_mem_access_get(gt_to_xe(hwmon->gt)); + + if (attr == &sensor_dev_attr_power1_max_interval.dev_attr.attr) + ret = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT) ? attr->mode : 0; + + xe_device_mem_access_put(gt_to_xe(hwmon->gt)); + + return ret; +} + +static const struct attribute_group hwmon_attrgroup = { + .attrs = hwmon_attributes, + .is_visible = xe_hwmon_attributes_visible, +}; + +static const struct attribute_group *hwmon_groups[] = { + &hwmon_attrgroup, + NULL +}; + static const struct hwmon_channel_info *hwmon_info[] = { HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_CRIT), HWMON_CHANNEL_INFO(curr, HWMON_C_CRIT), @@ -569,6 +718,7 @@ xe_hwmon_get_preregistration_info(struct xe_device *xe) REG_READ32, &val_sku_unit, 0, 0); hwmon->scl_shift_power = REG_FIELD_GET(PKG_PWR_UNIT, val_sku_unit); hwmon->scl_shift_energy = REG_FIELD_GET(PKG_ENERGY_UNIT, val_sku_unit); + hwmon->scl_shift_time = REG_FIELD_GET(PKG_TIME_UNIT, val_sku_unit); } /* @@ -615,7 +765,8 @@ void xe_hwmon_register(struct xe_device *xe) /* hwmon_dev points to device hwmon */ hwmon->hwmon_dev = devm_hwmon_device_register_with_info(dev, "xe", hwmon, &hwmon_chip_info, - NULL); + hwmon_groups); + if (IS_ERR(hwmon->hwmon_dev)) { drm_warn(&xe->drm, "Failed to register xe hwmon (%pe)\n", hwmon->hwmon_dev); xe->hwmon = NULL; -- cgit v1.2.3 From 4e002016a1e5b5d0b29191a82d4f561f175f3d33 Mon Sep 17 00:00:00 2001 From: Brian Welty Date: Mon, 25 Sep 2023 14:02:32 -0700 Subject: drm/xe: Replace xe_ttm_vram_mgr.tile with xe_mem_region Replace the xe_ttm_vram_mgr.tile pointer with a xe_mem_region pointer instead. The former is currently unused. TTM VRAM regions are exposing device vram and is better to store a pointer directly to the xe_mem_region instead of the tile. This allows to cleanup unnecessary usage of xe_tile from xe_bo.c in later patch. Signed-off-by: Brian Welty Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ttm_vram_mgr.c | 7 +++---- drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h | 6 +++--- 2 files changed, 6 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c index 285791eb4a79..953e5dc0fd80 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c @@ -358,12 +358,11 @@ int __xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_ttm_vram_mgr *mgr, int xe_ttm_vram_mgr_init(struct xe_tile *tile, struct xe_ttm_vram_mgr *mgr) { struct xe_device *xe = tile_to_xe(tile); + struct xe_mem_region *vram = &tile->mem.vram; - mgr->tile = tile; - + mgr->vram = vram; return __xe_ttm_vram_mgr_init(xe, mgr, XE_PL_VRAM0 + tile->id, - tile->mem.vram.usable_size, - tile->mem.vram.io_size, + vram->usable_size, vram->io_size, PAGE_SIZE); } diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h index 48bb991c14a5..2d75cf126289 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h @@ -9,7 +9,7 @@ #include #include -struct xe_tile; +struct xe_mem_region; /** * struct xe_ttm_vram_mgr - XE TTM VRAM manager @@ -17,12 +17,12 @@ struct xe_tile; * Manages placement of TTM resource in VRAM. */ struct xe_ttm_vram_mgr { - /** @tile: Tile which the VRAM belongs to */ - struct xe_tile *tile; /** @manager: Base TTM resource manager */ struct ttm_resource_manager manager; /** @mm: DRM buddy allocator which manages the VRAM */ struct drm_buddy mm; + /** @vram: ptr to details of associated VRAM region */ + struct xe_mem_region *vram; /** @visible_size: Proped size of the CPU visible portion */ u64 visible_size; /** @visible_avail: CPU visible portion still unallocated */ -- cgit v1.2.3 From 4e11a1411ab41416be7f29716a767eb135f7aa74 Mon Sep 17 00:00:00 2001 From: Brian Welty Date: Tue, 26 Sep 2023 12:10:40 -0700 Subject: drm/xe: Remove unused xe_bo_to_tile Unused and would like to remove the memtype_to_tile() which it calls. Signed-off-by: Brian Welty Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 13 ------------- drivers/gpu/drm/xe/xe_bo.h | 2 -- 2 files changed, 15 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 4467d711aa1f..bdd23090c0ad 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -109,19 +109,6 @@ mem_type_to_tile(struct xe_device *xe, u32 mem_type) return &xe->tiles[mem_type == XE_PL_STOLEN ? 0 : (mem_type - XE_PL_VRAM0)]; } -/** - * xe_bo_to_tile() - Get a tile from a BO's memory location - * @bo: The buffer object - * - * Get a tile from a BO's memory location, should be called on BOs in VRAM only. - * - * Return: xe_tile object which is closest to the BO - */ -struct xe_tile *xe_bo_to_tile(struct xe_bo *bo) -{ - return mem_type_to_tile(xe_bo_device(bo), bo->ttm.resource->mem_type); -} - static void try_add_system(struct xe_bo *bo, struct ttm_place *places, u32 bo_flags, u32 *c) { diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 3f4e2818f92c..6283e27bc425 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -122,8 +122,6 @@ struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo, u32 bo_flags); -struct xe_tile *xe_bo_to_tile(struct xe_bo *bo); - static inline struct xe_bo *ttm_to_xe_bo(const struct ttm_buffer_object *bo) { return container_of(bo, struct xe_bo, ttm); -- cgit v1.2.3 From fd0975b7cfee7d3e6db6771193b0cff230b7eec8 Mon Sep 17 00:00:00 2001 From: Brian Welty Date: Mon, 25 Sep 2023 17:12:48 -0700 Subject: drm/xe: Replace usage of mem_type_to_tile Currently mem_type_to_tile() is being used to access the tile's underlying tile.mem.vram. However, this function makes the assumption that a mem_type will only ever map to a single tile. Now that the TTM vram manager contains a pointer to the memory_region, make use of this in xe_bo.c. As such, introduce a helper function res_to_mem_region() to get the ttm_vram_mgr->vram from the BO's resource, and use this to replace usage of mem_type_to_tile(). xe_tile is still needed to choose the migration context, so this part is unchanged. But as this is only renaming usage, function is renamed now to mem_type_to_migrate(). Signed-off-by: Brian Welty Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 63 ++++++++++++++++++++++++++++------------------ 1 file changed, 38 insertions(+), 25 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index bdd23090c0ad..b96d1e7b9bad 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -101,12 +101,24 @@ static bool xe_bo_is_user(struct xe_bo *bo) return bo->flags & XE_BO_CREATE_USER_BIT; } -static struct xe_tile * -mem_type_to_tile(struct xe_device *xe, u32 mem_type) +static struct xe_migrate * +mem_type_to_migrate(struct xe_device *xe, u32 mem_type) { + struct xe_tile *tile; + xe_assert(xe, mem_type == XE_PL_STOLEN || mem_type_is_vram(mem_type)); + tile = &xe->tiles[mem_type == XE_PL_STOLEN ? 0 : (mem_type - XE_PL_VRAM0)]; + return tile->migrate; +} + +static struct xe_mem_region *res_to_mem_region(struct ttm_resource *res) +{ + struct xe_device *xe = ttm_to_xe_device(res->bo->bdev); + struct ttm_resource_manager *mgr; - return &xe->tiles[mem_type == XE_PL_STOLEN ? 0 : (mem_type - XE_PL_VRAM0)]; + xe_assert(xe, resource_is_vram(res)); + mgr = ttm_manager_type(&xe->ttm, res->mem_type); + return to_xe_ttm_vram_mgr(mgr)->vram; } static void try_add_system(struct xe_bo *bo, struct ttm_place *places, @@ -126,11 +138,13 @@ static void try_add_system(struct xe_bo *bo, struct ttm_place *places, static void add_vram(struct xe_device *xe, struct xe_bo *bo, struct ttm_place *places, u32 bo_flags, u32 mem_type, u32 *c) { - struct xe_tile *tile = mem_type_to_tile(xe, mem_type); struct ttm_place place = { .mem_type = mem_type }; - u64 io_size = tile->mem.vram.io_size; + struct xe_mem_region *vram; + u64 io_size; - xe_assert(xe, tile->mem.vram.usable_size); + vram = to_xe_ttm_vram_mgr(ttm_manager_type(&xe->ttm, mem_type))->vram; + xe_assert(xe, vram && vram->usable_size); + io_size = vram->io_size; /* * For eviction / restore on suspend / resume objects @@ -140,7 +154,7 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo, XE_BO_CREATE_GGTT_BIT)) place.flags |= TTM_PL_FLAG_CONTIGUOUS; - if (io_size < tile->mem.vram.usable_size) { + if (io_size < vram->usable_size) { if (bo_flags & XE_BO_NEEDS_CPU_ACCESS) { place.fpfn = 0; place.lpfn = io_size >> PAGE_SHIFT; @@ -404,21 +418,21 @@ static int xe_ttm_io_mem_reserve(struct ttm_device *bdev, return 0; case XE_PL_VRAM0: case XE_PL_VRAM1: { - struct xe_tile *tile = mem_type_to_tile(xe, mem->mem_type); struct xe_ttm_vram_mgr_resource *vres = to_xe_ttm_vram_mgr_resource(mem); + struct xe_mem_region *vram = res_to_mem_region(mem); if (vres->used_visible_size < mem->size) return -EINVAL; mem->bus.offset = mem->start << PAGE_SHIFT; - if (tile->mem.vram.mapping && + if (vram->mapping && mem->placement & TTM_PL_FLAG_CONTIGUOUS) - mem->bus.addr = (u8 *)tile->mem.vram.mapping + + mem->bus.addr = (u8 *)vram->mapping + mem->bus.offset; - mem->bus.offset += tile->mem.vram.io_start; + mem->bus.offset += vram->io_start; mem->bus.is_iomem = true; #if !defined(CONFIG_X86) @@ -614,7 +628,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, struct ttm_resource *old_mem = ttm_bo->resource; u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM; struct ttm_tt *ttm = ttm_bo->ttm; - struct xe_tile *tile = NULL; + struct xe_migrate *migrate = NULL; struct dma_fence *fence; bool move_lacks_source; bool tt_has_data; @@ -692,14 +706,13 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, } if (bo->tile) - tile = bo->tile; + migrate = bo->tile->migrate; else if (resource_is_vram(new_mem)) - tile = mem_type_to_tile(xe, new_mem->mem_type); + migrate = mem_type_to_migrate(xe, new_mem->mem_type); else if (mem_type_is_vram(old_mem_type)) - tile = mem_type_to_tile(xe, old_mem_type); + migrate = mem_type_to_migrate(xe, old_mem_type); - xe_assert(xe, tile); - xe_tile_assert(tile, tile->migrate); + xe_assert(xe, migrate); trace_xe_bo_move(bo); xe_device_mem_access_get(xe); @@ -720,7 +733,8 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, /* Create a new VMAP once kernel BO back in VRAM */ if (!ret && resource_is_vram(new_mem)) { - void *new_addr = tile->mem.vram.mapping + + struct xe_mem_region *vram = res_to_mem_region(new_mem); + void *new_addr = vram->mapping + (new_mem->start << PAGE_SHIFT); if (XE_WARN_ON(new_mem->start == XE_BO_INVALID_OFFSET)) { @@ -737,9 +751,9 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, } } else { if (move_lacks_source) - fence = xe_migrate_clear(tile->migrate, bo, new_mem); + fence = xe_migrate_clear(migrate, bo, new_mem); else - fence = xe_migrate_copy(tile->migrate, + fence = xe_migrate_copy(migrate, bo, bo, old_mem, new_mem); if (IS_ERR(fence)) { ret = PTR_ERR(fence); @@ -908,16 +922,16 @@ err_res_free: static unsigned long xe_ttm_io_mem_pfn(struct ttm_buffer_object *ttm_bo, unsigned long page_offset) { - struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev); struct xe_bo *bo = ttm_to_xe_bo(ttm_bo); - struct xe_tile *tile = mem_type_to_tile(xe, ttm_bo->resource->mem_type); struct xe_res_cursor cursor; + struct xe_mem_region *vram; if (ttm_bo->resource->mem_type == XE_PL_STOLEN) return xe_ttm_stolen_io_offset(bo, page_offset << PAGE_SHIFT) >> PAGE_SHIFT; + vram = res_to_mem_region(ttm_bo->resource); xe_res_first(ttm_bo->resource, (u64)page_offset << PAGE_SHIFT, 0, &cursor); - return (tile->mem.vram.io_start + cursor.start) >> PAGE_SHIFT; + return (vram->io_start + cursor.start) >> PAGE_SHIFT; } static void __xe_bo_vunmap(struct xe_bo *bo); @@ -1492,12 +1506,11 @@ struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, uint64_t vram_region_gpu_offset(struct ttm_resource *res) { struct xe_device *xe = ttm_to_xe_device(res->bo->bdev); - struct xe_tile *tile = mem_type_to_tile(xe, res->mem_type); if (res->mem_type == XE_PL_STOLEN) return xe_ttm_stolen_gpu_offset(xe); - return tile->mem.vram.dpa_base; + return res_to_mem_region(res)->dpa_base; } /** -- cgit v1.2.3 From ebb00b285bef8bcdc46ac4e344d5748539bdd213 Mon Sep 17 00:00:00 2001 From: Pallavi Mishra Date: Wed, 1 Nov 2023 04:38:38 +0530 Subject: drm/xe: Dump CTB during TLB timeout Print CTB info during TLB invalidation timeout event. Reviewed-by: Matthew Brost Signed-off-by: Pallavi Mishra Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index bd6005b9d498..b5c39c55e1fa 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -319,6 +319,7 @@ int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno) { struct xe_device *xe = gt_to_xe(gt); struct xe_guc *guc = >->uc.guc; + struct drm_printer p = drm_err_printer(__func__); int ret; /* @@ -331,6 +332,7 @@ int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno) if (!ret) { drm_err(&xe->drm, "gt%d: TLB invalidation time'd out, seqno=%d, recv=%d\n", gt->info.id, seqno, gt->tlb_invalidation.seqno_recv); + xe_guc_ct_print(&guc->ct, &p, true); return -ETIME; } -- cgit v1.2.3 From 81d11b9d6625d3c2a9ecf68f41f3575e653c0ac7 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 31 Oct 2023 11:46:45 -0700 Subject: drm/xe: Adjust tile_present mask when skipping rebinds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If a rebind is skipped the tile_present mask needs to be updated for the newly created vma to properly reflect the state of the vma. Reported-by: Signed-off-by: Matthew Brost Reviewed-by: Thomas Hellström Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 05f8c691f5fb..7aefa6aa66a1 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2340,6 +2340,10 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) op->flags |= XE_VMA_OP_COMMITTED; break; case DRM_GPUVA_OP_REMAP: + { + u8 tile_present = + gpuva_to_vma(op->base.remap.unmap->va)->tile_present; + prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va), true); op->flags |= XE_VMA_OP_COMMITTED; @@ -2348,15 +2352,21 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) err |= xe_vm_insert_vma(vm, op->remap.prev); if (!err) op->flags |= XE_VMA_OP_PREV_COMMITTED; - if (!err && op->remap.skip_prev) + if (!err && op->remap.skip_prev) { + op->remap.prev->tile_present = + tile_present; op->remap.prev = NULL; + } } if (op->remap.next) { err |= xe_vm_insert_vma(vm, op->remap.next); if (!err) op->flags |= XE_VMA_OP_NEXT_COMMITTED; - if (!err && op->remap.skip_next) + if (!err && op->remap.skip_next) { + op->remap.next->tile_present = + tile_present; op->remap.next = NULL; + } } /* Adjust for partial unbind after removin VMA from VM */ @@ -2365,6 +2375,7 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) op->base.remap.unmap->va->va.range = op->remap.range; } break; + } case DRM_GPUVA_OP_UNMAP: prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true); op->flags |= XE_VMA_OP_COMMITTED; -- cgit v1.2.3 From 74a6c6438ee7b53e7711fc0b7000ed42edd7dad5 Mon Sep 17 00:00:00 2001 From: Brian Welty Date: Tue, 31 Oct 2023 14:12:16 -0700 Subject: drm/xe: Fix dequeue of access counter work item The access counters worker function is fixed to advance the head pointer when dequeuing from the acc_queue. This now matches the similar logic in get_pagefault(). Signed-off-by: Bruce Chang Signed-off-by: Brian Welty Reviewed-by: Stuart Summers Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_pagefault.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 4e33ef8c9d6a..018edf731ae5 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -542,6 +542,8 @@ unlock_vm: #define make_u64(hi__, low__) ((u64)(hi__) << 32 | (u64)(low__)) +#define ACC_MSG_LEN_DW 4 + static int get_acc(struct acc_queue *acc_queue, struct acc *acc) { const struct xe_guc_acc_desc *desc; @@ -562,6 +564,9 @@ static int get_acc(struct acc_queue *acc_queue, struct acc *acc) acc->access_type = FIELD_GET(ACC_TYPE, desc->dw0); acc->va_range_base = make_u64(desc->dw3 & ACC_VIRTUAL_ADDR_RANGE_HI, desc->dw2 & ACC_VIRTUAL_ADDR_RANGE_LO); + + acc_queue->head = (acc_queue->head + ACC_MSG_LEN_DW) % + ACC_QUEUE_NUM_DW; } else { ret = -1; } @@ -589,8 +594,6 @@ static void acc_queue_work_func(struct work_struct *w) } } -#define ACC_MSG_LEN_DW 4 - static bool acc_queue_full(struct acc_queue *acc_queue) { lockdep_assert_held(&acc_queue->lock); -- cgit v1.2.3 From 4d5252b4ca1dc973b8b368c88f9d1e348f9c1906 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 31 Oct 2023 07:05:37 -0700 Subject: drm/xe/xe2: Program correct MOCS registers The LNCFCMOCS registers no longer exist on Xe2 so there's no need to attempt to program them. Since GLOB_MOCS is the only set of MOCS registers now, it's expected to be used for all platforms (both igpu and dgpu) going forward, so adjust the MOCS programming flags accordingly. v2: - Fix typo (global mocs condition is >=, not >) Bspec: 71582 Reviewed-by: Pallavi Mishra Link: https://lore.kernel.org/r/20231031140536.303746-2-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_mocs.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index 21972bbef8fd..46e999257862 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -26,6 +26,7 @@ static inline void mocs_dbg(const struct drm_device *dev, enum { HAS_GLOBAL_MOCS = BIT(0), + HAS_LNCF_MOCS = BIT(1), }; struct xe_mocs_entry { @@ -473,8 +474,10 @@ static unsigned int get_mocs_settings(struct xe_device *xe, return 0; } - if (!IS_DGFX(xe)) + if (!IS_DGFX(xe) || GRAPHICS_VER(xe) >= 20) flags |= HAS_GLOBAL_MOCS; + if (GRAPHICS_VER(xe) < 20) + flags |= HAS_LNCF_MOCS; return flags; } @@ -505,7 +508,7 @@ static void __init_mocs_table(struct xe_gt *gt, for (i = 0; i < info->n_entries ? (mocs = get_entry_control(info, i)), 1 : 0; i++) { - mocs_dbg(>_to_xe(gt)->drm, "%d 0x%x 0x%x\n", i, + mocs_dbg(>_to_xe(gt)->drm, "GLOB_MOCS[%d] 0x%x 0x%x\n", i, XELP_GLOBAL_MOCS(i).addr, mocs); if (GRAPHICS_VERx100(gt_to_xe(gt)) > 1250) @@ -545,7 +548,7 @@ static void init_l3cc_table(struct xe_gt *gt, (l3cc = l3cc_combine(get_entry_l3cc(info, 2 * i), get_entry_l3cc(info, 2 * i + 1))), 1 : 0; i++) { - mocs_dbg(>_to_xe(gt)->drm, "%d 0x%x 0x%x\n", i, XELP_LNCFCMOCS(i).addr, + mocs_dbg(>_to_xe(gt)->drm, "LNCFCMOCS[%d] 0x%x 0x%x\n", i, XELP_LNCFCMOCS(i).addr, l3cc); if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1250) @@ -570,19 +573,18 @@ void xe_mocs_init(struct xe_gt *gt) unsigned int flags; /* - * LLC and eDRAM control values are not applicable to dgfx + * MOCS settings are split between "GLOB_MOCS" and/or "LNCFCMOCS" + * registers depending on platform. + * + * These registers should be programmed before GuC initialization + * since their values will affect some of the memory transactions + * performed by the GuC. */ flags = get_mocs_settings(gt_to_xe(gt), &table); mocs_dbg(>_to_xe(gt)->drm, "flag:0x%x\n", flags); if (flags & HAS_GLOBAL_MOCS) __init_mocs_table(gt, &table); - - /* - * Initialize the L3CC table as part of mocs initalization to make - * sure the LNCFCMOCSx registers are programmed for the subsequent - * memory transactions including guc transactions - */ - if (table.table) + if (flags & HAS_LNCF_MOCS) init_l3cc_table(gt, &table); } -- cgit v1.2.3 From effc560d7a36b8c59219dd5374d9725a9edd85c4 Mon Sep 17 00:00:00 2001 From: Badal Nilawar Date: Wed, 1 Nov 2023 22:02:12 +0530 Subject: drm/xe/mtl: Use 16.67 Mhz freq scale factor to get rpX For mtl and above 16.67 Mhz is the scale factor to calculate rpX frequencies. v2: Fix review comment (Ashutosh) Signed-off-by: Badal Nilawar Reviewed-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20231101163212.1629685-1-badal.nilawar@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_pc.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 74247e0d3674..020c6597cd78 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -313,7 +313,7 @@ static void mtl_update_rpe_value(struct xe_guc_pc *pc) else reg = xe_mmio_read32(gt, MTL_GT_RPE_FREQUENCY); - pc->rpe_freq = REG_FIELD_GET(MTL_RPE_MASK, reg) * GT_FREQUENCY_MULTIPLIER; + pc->rpe_freq = decode_freq(REG_FIELD_GET(MTL_RPE_MASK, reg)); } static void tgl_update_rpe_value(struct xe_guc_pc *pc) @@ -653,10 +653,10 @@ static void mtl_init_fused_rp_values(struct xe_guc_pc *pc) reg = xe_mmio_read32(gt, MTL_MEDIAP_STATE_CAP); else reg = xe_mmio_read32(gt, MTL_RP_STATE_CAP); - pc->rp0_freq = REG_FIELD_GET(MTL_RP0_CAP_MASK, reg) * - GT_FREQUENCY_MULTIPLIER; - pc->rpn_freq = REG_FIELD_GET(MTL_RPN_CAP_MASK, reg) * - GT_FREQUENCY_MULTIPLIER; + + pc->rp0_freq = decode_freq(REG_FIELD_GET(MTL_RP0_CAP_MASK, reg)); + + pc->rpn_freq = decode_freq(REG_FIELD_GET(MTL_RPN_CAP_MASK, reg)); } static void tgl_init_fused_rp_values(struct xe_guc_pc *pc) -- cgit v1.2.3 From 5d30cfe003a98d2f4ad28fe27226f3f2e6784c65 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 2 Nov 2023 05:48:55 -0700 Subject: drm/xe: Add Wa_14019821291 This workaround is primarily implemented by the BIOS. However if the BIOS applies the workaround it will reserve a small piece of our DSM (which should be at the top, right below the WOPCM); we just need to keep that region reserved so that nothing else attempts to re-use it. v2 (Gustavo): - Check for NULL media_gt - Mask bits [5:0] to avoid potential issues in future platforms Signed-off-by: Matt Roper Reviewed-by: Gustavo Sousa Link: https://lore.kernel.org/r/20231102124855.1940491-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Makefile | 2 +- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 2 ++ drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 25 +++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_wa_oob.rules | 1 + 4 files changed, 29 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 32eee57b4184..1d39784e92fd 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -40,7 +40,7 @@ quiet_cmd_wa_oob = GEN $(notdir $(generated_oob)) $(generated_oob) &: $(obj)/xe_gen_wa_oob $(srctree)/$(src)/xe_wa_oob.rules $(call cmd,wa_oob) -$(obj)/xe_guc.o $(obj)/xe_migrate.o $(obj)/xe_ring_ops.o $(obj)/xe_vm.o $(obj)/xe_wa.o: $(generated_oob) +$(obj)/xe_guc.o $(obj)/xe_migrate.o $(obj)/xe_ring_ops.o $(obj)/xe_vm.o $(obj)/xe_wa.o $(obj)/xe_ttm_stolen_mgr.o: $(generated_oob) # Please keep these build lists sorted! diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 7a6407e38265..902c60543de0 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -158,6 +158,8 @@ #define XEHP_SQCM XE_REG_MCR(0x8724) #define EN_32B_ACCESS REG_BIT(30) +#define GSCPSMI_BASE XE_REG(0x880c) + #define MIRROR_FUSE3 XE_REG(0x9118) #define XE2_NODE_ENABLE_MASK REG_GENMASK(31, 16) #define L3BANK_PAIR_COUNT 4 diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index 79fbd74a3944..0c533d36791d 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -11,6 +11,8 @@ #include #include +#include "generated/xe_wa_oob.h" +#include "regs/xe_gt_regs.h" #include "regs/xe_regs.h" #include "xe_bo.h" #include "xe_device.h" @@ -19,6 +21,7 @@ #include "xe_res_cursor.h" #include "xe_ttm_stolen_mgr.h" #include "xe_ttm_vram_mgr.h" +#include "xe_wa.h" struct xe_ttm_stolen_mgr { struct xe_ttm_vram_mgr base; @@ -112,6 +115,7 @@ static u32 get_wopcm_size(struct xe_device *xe) static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + struct xe_gt *media_gt = xe_device_get_root_tile(xe)->media_gt; u32 stolen_size, wopcm_size; u32 ggc, gms; @@ -154,6 +158,27 @@ static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr stolen_size -= wopcm_size; + if (media_gt && XE_WA(media_gt, 14019821291)) { + u64 gscpsmi_base = xe_mmio_read64_2x32(media_gt, GSCPSMI_BASE) + & ~GENMASK_ULL(5, 0); + + /* + * This workaround is primarily implemented by the BIOS. We + * just need to figure out whether the BIOS has applied the + * workaround (meaning the programmed address falls within + * the DSM) and, if so, reserve that part of the DSM to + * prevent accidental reuse. The DSM location should be just + * below the WOPCM. + */ + if (gscpsmi_base >= mgr->io_base && + gscpsmi_base < mgr->io_base + stolen_size) { + xe_gt_dbg(media_gt, + "Reserving %llu bytes of DSM for Wa_14019821291\n", + mgr->io_base + stolen_size - gscpsmi_base); + stolen_size = gscpsmi_base - mgr->io_base; + } + } + if (drm_WARN_ON(&xe->drm, stolen_size + SZ_8M > pci_resource_len(pdev, 2))) return 0; diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index f3ff774dc4aa..752842d734be 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -19,3 +19,4 @@ SUBPLATFORM(DG2, G12) 16017236439 PLATFORM(PVC) 22010954014 PLATFORM(DG2) +14019821291 MEDIA_VERSION_RANGE(1300, 2000) -- cgit v1.2.3 From 27a1a1e2e47d6f12a784413c194a94b6c0d7fdcb Mon Sep 17 00:00:00 2001 From: Carlos Santa Date: Thu, 26 Oct 2023 15:01:27 -0700 Subject: drm/xe: stringify the argument to avoid potential vulnerability This error gets printed inside a sandbox with warnings turned on. /mnt/host/source/src/third_party/kernel/v5.15/drivers/ gpu/drm/xe/xe_gt_idle_sysfs.c:87:26: error: format string is not a string literal (potentially insecure) [-Werror,-Wformat-security] return sysfs_emit(buff, gtidle->name); ^~~~~~~~~~~~ /mnt/host/source/src/third_party/kernel/v5.15/drivers/ gpu/drm/xe/xe_gt_idle_sysfs.c:87:26: note: treat the string as an argument to avoid this return sysfs_emit(buff, gtidle->name); ^ "%s", 1 error generated. CC: Rodrigo Vivi Signed-off-by: Carlos Santa Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_idle_sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_idle_sysfs.c b/drivers/gpu/drm/xe/xe_gt_idle_sysfs.c index 7238e96a116c..8df9840811cd 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle_sysfs.c +++ b/drivers/gpu/drm/xe/xe_gt_idle_sysfs.c @@ -85,7 +85,7 @@ static ssize_t name_show(struct device *dev, { struct xe_gt_idle *gtidle = dev_to_gtidle(dev); - return sysfs_emit(buff, gtidle->name); + return sysfs_emit(buff, "%s\n", gtidle->name); } static DEVICE_ATTR_RO(name); -- cgit v1.2.3 From 571622740288f801042a28598440a098249213fa Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Fri, 27 Oct 2023 11:42:55 +0200 Subject: drm/xe: implement driver initiated function-reset Driver initiated function-reset (FLR) is the highest level of reset that we can trigger from within the driver. In contrast to PCI FLR it doesn't require re-enumeration of PCI BAR. It can be useful in case GT fails to reset. It is also the only way to trigger GSC reset from the driver and can be used in future addition of GSC support. v2: - use regs from xe_regs.h - move the flag to xe.mmio - call flr only on root gt - use BIOS protection check - copy/paste comments from i915 v3: - flr code moved to xe_device.c v4: - needs_flr_on_fini moved to xe_device Signed-off-by: Andrzej Hajda Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_regs.h | 7 ++++ drivers/gpu/drm/xe/xe_device.c | 78 ++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_device_types.h | 3 ++ drivers/gpu/drm/xe/xe_gt.c | 2 + 4 files changed, 90 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h index e4408473e802..7202084198bd 100644 --- a/drivers/gpu/drm/xe/regs/xe_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_regs.h @@ -70,8 +70,15 @@ #define SOFTWARE_FLAGS_SPR33 XE_REG(0x4f084) +#define GU_CNTL_PROTECTED XE_REG(0x10100C) +#define DRIVERINT_FLR_DIS REG_BIT(31) + #define GU_CNTL XE_REG(0x101010) #define LMEM_INIT REG_BIT(7) +#define DRIVERFLR REG_BIT(31) + +#define GU_DEBUG XE_REG(0x101018) +#define DRIVERFLR_STATUS REG_BIT(31) #define XEHP_CLOCK_GATE_DIS XE_REG(0x101014) #define SGSI_SIDECLK_DIS REG_BIT(17) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index ae0b7349c3e3..5869ba7e0cdc 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -5,6 +5,8 @@ #include "xe_device.h" +#include + #include #include #include @@ -252,6 +254,78 @@ err_put: return ERR_PTR(err); } +/* + * The driver-initiated FLR is the highest level of reset that we can trigger + * from within the driver. It is different from the PCI FLR in that it doesn't + * fully reset the SGUnit and doesn't modify the PCI config space and therefore + * it doesn't require a re-enumeration of the PCI BARs. However, the + * driver-initiated FLR does still cause a reset of both GT and display and a + * memory wipe of local and stolen memory, so recovery would require a full HW + * re-init and saving/restoring (or re-populating) the wiped memory. Since we + * perform the FLR as the very last action before releasing access to the HW + * during the driver release flow, we don't attempt recovery at all, because + * if/when a new instance of i915 is bound to the device it will do a full + * re-init anyway. + */ +static void xe_driver_flr(struct xe_device *xe) +{ + const unsigned int flr_timeout = 3 * MICRO; /* specs recommend a 3s wait */ + struct xe_gt *gt = xe_root_mmio_gt(xe); + int ret; + + if (xe_mmio_read32(gt, GU_CNTL_PROTECTED) & DRIVERINT_FLR_DIS) { + drm_info_once(&xe->drm, "BIOS Disabled Driver-FLR\n"); + return; + } + + drm_dbg(&xe->drm, "Triggering Driver-FLR\n"); + + /* + * Make sure any pending FLR requests have cleared by waiting for the + * FLR trigger bit to go to zero. Also clear GU_DEBUG's DRIVERFLR_STATUS + * to make sure it's not still set from a prior attempt (it's a write to + * clear bit). + * Note that we should never be in a situation where a previous attempt + * is still pending (unless the HW is totally dead), but better to be + * safe in case something unexpected happens + */ + ret = xe_mmio_wait32(gt, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false); + if (ret) { + drm_err(&xe->drm, "Driver-FLR-prepare wait for ready failed! %d\n", ret); + return; + } + xe_mmio_write32(gt, GU_DEBUG, DRIVERFLR_STATUS); + + /* Trigger the actual Driver-FLR */ + xe_mmio_rmw32(gt, GU_CNTL, 0, DRIVERFLR); + + /* Wait for hardware teardown to complete */ + ret = xe_mmio_wait32(gt, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false); + if (ret) { + drm_err(&xe->drm, "Driver-FLR-teardown wait completion failed! %d\n", ret); + return; + } + + /* Wait for hardware/firmware re-init to complete */ + ret = xe_mmio_wait32(gt, GU_DEBUG, DRIVERFLR_STATUS, DRIVERFLR_STATUS, + flr_timeout, NULL, false); + if (ret) { + drm_err(&xe->drm, "Driver-FLR-reinit wait completion failed! %d\n", ret); + return; + } + + /* Clear sticky completion status */ + xe_mmio_write32(gt, GU_DEBUG, DRIVERFLR_STATUS); +} + +static void xe_driver_flr_fini(struct drm_device *drm, void *arg) +{ + struct xe_device *xe = arg; + + if (xe->needs_flr_on_fini) + xe_driver_flr(xe); +} + static void xe_device_sanitize(struct drm_device *drm, void *arg) { struct xe_device *xe = arg; @@ -283,6 +357,10 @@ int xe_device_probe(struct xe_device *xe) if (err) return err; + err = drmm_add_action_or_reset(&xe->drm, xe_driver_flr_fini, xe); + if (err) + return err; + for_each_gt(gt, xe, id) { err = xe_pcode_probe(gt); if (err) diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 4bc668ff8615..4425c2484a02 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -388,6 +388,9 @@ struct xe_device { /** @heci_gsc: graphics security controller */ struct xe_heci_gsc heci_gsc; + /** @needs_flr_on_fini: requests function-reset on fini */ + bool needs_flr_on_fini; + /* For pcode */ struct mutex sb_lock; diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index d380f67b3365..73c090762771 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -626,6 +626,8 @@ err_fail: xe_uevent_gt_reset_failure(to_pci_dev(gt_to_xe(gt)->drm.dev), gt_to_tile(gt)->id, gt->info.id); + gt_to_xe(gt)->needs_flr_on_fini = true; + return err; } -- cgit v1.2.3 From e4e4268d950034dc97fbeba480dd4741d72a8df3 Mon Sep 17 00:00:00 2001 From: Brian Welty Date: Tue, 31 Oct 2023 13:32:24 -0700 Subject: drm/xe: Fix pagefault and access counter worker functions When processing G2H messages for pagefault or access counters, we queue a work item and call queue_work(). This fails if the worker thread is already queued to run. The expectation is that the worker function will do more than process a single item and return. It needs to either process all pending items or requeue itself if items are pending. But requeuing will add latency and potential context switch can occur. We don't want to add unnecessary latency and so the worker should process as many faults as it can within a reasonable duration of time. We also do not want to hog the cpu core, so here we execute in a loop and requeue if still running after more than 20 ms. This seems reasonable framework and easy to tune this futher if needed. This resolves issues seen with several igt@xe_exec_fault_mode subtests where the GPU will hang when KMD ignores a pending pagefault. v2: requeue the worker instead of having an internal processing loop. v3: implement hybrid model of v1 and v2 now, run for 20 msec before we will requeue if still running v4: only requeue in worker if queue is non-empty (Matt B) Signed-off-by: Brian Welty Reviewed-by: Matthew Brost Reviewed-by: Stuart Summers Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_pagefault.c | 82 +++++++++++++++++++++--------------- 1 file changed, 48 insertions(+), 34 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 018edf731ae5..9fcbf8773b8b 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -276,10 +276,10 @@ static void print_pagefault(struct xe_device *xe, struct pagefault *pf) #define PF_MSG_LEN_DW 4 -static int get_pagefault(struct pf_queue *pf_queue, struct pagefault *pf) +static bool get_pagefault(struct pf_queue *pf_queue, struct pagefault *pf) { const struct xe_guc_pagefault_desc *desc; - int ret = 0; + bool ret = false; spin_lock_irq(&pf_queue->lock); if (pf_queue->head != pf_queue->tail) { @@ -303,8 +303,7 @@ static int get_pagefault(struct pf_queue *pf_queue, struct pagefault *pf) pf_queue->head = (pf_queue->head + PF_MSG_LEN_DW) % PF_QUEUE_NUM_DW; - } else { - ret = -1; + ret = true; } spin_unlock_irq(&pf_queue->lock); @@ -348,6 +347,8 @@ int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len) return full ? -ENOSPC : 0; } +#define USM_QUEUE_MAX_RUNTIME_MS 20 + static void pf_queue_work_func(struct work_struct *w) { struct pf_queue *pf_queue = container_of(w, struct pf_queue, worker); @@ -355,31 +356,38 @@ static void pf_queue_work_func(struct work_struct *w) struct xe_device *xe = gt_to_xe(gt); struct xe_guc_pagefault_reply reply = {}; struct pagefault pf = {}; + unsigned long threshold; int ret; - ret = get_pagefault(pf_queue, &pf); - if (ret) - return; + threshold = jiffies + msecs_to_jiffies(USM_QUEUE_MAX_RUNTIME_MS); - ret = handle_pagefault(gt, &pf); - if (unlikely(ret)) { - print_pagefault(xe, &pf); - pf.fault_unsuccessful = 1; - drm_dbg(&xe->drm, "Fault response: Unsuccessful %d\n", ret); - } + while (get_pagefault(pf_queue, &pf)) { + ret = handle_pagefault(gt, &pf); + if (unlikely(ret)) { + print_pagefault(xe, &pf); + pf.fault_unsuccessful = 1; + drm_dbg(&xe->drm, "Fault response: Unsuccessful %d\n", ret); + } + + reply.dw0 = FIELD_PREP(PFR_VALID, 1) | + FIELD_PREP(PFR_SUCCESS, pf.fault_unsuccessful) | + FIELD_PREP(PFR_REPLY, PFR_ACCESS) | + FIELD_PREP(PFR_DESC_TYPE, FAULT_RESPONSE_DESC) | + FIELD_PREP(PFR_ASID, pf.asid); - reply.dw0 = FIELD_PREP(PFR_VALID, 1) | - FIELD_PREP(PFR_SUCCESS, pf.fault_unsuccessful) | - FIELD_PREP(PFR_REPLY, PFR_ACCESS) | - FIELD_PREP(PFR_DESC_TYPE, FAULT_RESPONSE_DESC) | - FIELD_PREP(PFR_ASID, pf.asid); + reply.dw1 = FIELD_PREP(PFR_VFID, pf.vfid) | + FIELD_PREP(PFR_ENG_INSTANCE, pf.engine_instance) | + FIELD_PREP(PFR_ENG_CLASS, pf.engine_class) | + FIELD_PREP(PFR_PDATA, pf.pdata); - reply.dw1 = FIELD_PREP(PFR_VFID, pf.vfid) | - FIELD_PREP(PFR_ENG_INSTANCE, pf.engine_instance) | - FIELD_PREP(PFR_ENG_CLASS, pf.engine_class) | - FIELD_PREP(PFR_PDATA, pf.pdata); + send_pagefault_reply(>->uc.guc, &reply); - send_pagefault_reply(>->uc.guc, &reply); + if (time_after(jiffies, threshold) && + pf_queue->head != pf_queue->tail) { + queue_work(gt->usm.pf_wq, w); + break; + } + } } static void acc_queue_work_func(struct work_struct *w); @@ -544,10 +552,10 @@ unlock_vm: #define ACC_MSG_LEN_DW 4 -static int get_acc(struct acc_queue *acc_queue, struct acc *acc) +static bool get_acc(struct acc_queue *acc_queue, struct acc *acc) { const struct xe_guc_acc_desc *desc; - int ret = 0; + bool ret = false; spin_lock(&acc_queue->lock); if (acc_queue->head != acc_queue->tail) { @@ -567,8 +575,7 @@ static int get_acc(struct acc_queue *acc_queue, struct acc *acc) acc_queue->head = (acc_queue->head + ACC_MSG_LEN_DW) % ACC_QUEUE_NUM_DW; - } else { - ret = -1; + ret = true; } spin_unlock(&acc_queue->lock); @@ -581,16 +588,23 @@ static void acc_queue_work_func(struct work_struct *w) struct xe_gt *gt = acc_queue->gt; struct xe_device *xe = gt_to_xe(gt); struct acc acc = {}; + unsigned long threshold; int ret; - ret = get_acc(acc_queue, &acc); - if (ret) - return; + threshold = jiffies + msecs_to_jiffies(USM_QUEUE_MAX_RUNTIME_MS); - ret = handle_acc(gt, &acc); - if (unlikely(ret)) { - print_acc(xe, &acc); - drm_warn(&xe->drm, "ACC: Unsuccessful %d\n", ret); + while (get_acc(acc_queue, &acc)) { + ret = handle_acc(gt, &acc); + if (unlikely(ret)) { + print_acc(xe, &acc); + drm_warn(&xe->drm, "ACC: Unsuccessful %d\n", ret); + } + + if (time_after(jiffies, threshold) && + acc_queue->head != acc_queue->tail) { + queue_work(gt->usm.acc_wq, w); + break; + } } } -- cgit v1.2.3 From 670e811d1fd6aaab485b33081a8b97fa62ff2095 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Tue, 7 Nov 2023 09:24:40 +0100 Subject: drm/xe: Update SPDX deprecated license identifier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "GPL-2.0" SPDX license identifier is deprecated. Update the code to use "GPL-2.0-only" instead. Choose this identifier over "GPL-2.0-or-later" since it's the most restrictive of the two and it's not fully clear that "GPL-2.0" also allows "GPL-2.0-or-later". Cc: Francois Dugast Cc: Rodrigo Vivi Signed-off-by: Thomas Hellström Reviewed-by: Francois Dugast Link: https://patchwork.freedesktop.org/patch/msgid/20231107082440.7568-1-thomas.hellstrom@linux.intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pt_walk.c | 2 +- drivers/gpu/drm/xe/xe_pt_walk.h | 2 +- drivers/gpu/drm/xe/xe_res_cursor.h | 2 +- drivers/gpu/drm/xe/xe_trace.c | 2 +- drivers/gpu/drm/xe/xe_trace.h | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pt_walk.c b/drivers/gpu/drm/xe/xe_pt_walk.c index 0def89af4372..8f6c8d063f39 100644 --- a/drivers/gpu/drm/xe/xe_pt_walk.c +++ b/drivers/gpu/drm/xe/xe_pt_walk.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright © 2022 Intel Corporation */ diff --git a/drivers/gpu/drm/xe/xe_pt_walk.h b/drivers/gpu/drm/xe/xe_pt_walk.h index 42c51fa601ec..ec3d1e9efa6d 100644 --- a/drivers/gpu/drm/xe/xe_pt_walk.h +++ b/drivers/gpu/drm/xe/xe_pt_walk.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright © 2022 Intel Corporation */ diff --git a/drivers/gpu/drm/xe/xe_res_cursor.h b/drivers/gpu/drm/xe/xe_res_cursor.h index 006fc1361967..0a306963aa8e 100644 --- a/drivers/gpu/drm/xe/xe_res_cursor.h +++ b/drivers/gpu/drm/xe/xe_res_cursor.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ /* * Copyright 2020 Advanced Micro Devices, Inc. * diff --git a/drivers/gpu/drm/xe/xe_trace.c b/drivers/gpu/drm/xe/xe_trace.c index 2570c0b859c4..2527c556bff1 100644 --- a/drivers/gpu/drm/xe/xe_trace.c +++ b/drivers/gpu/drm/xe/xe_trace.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright © 2022 Intel Corporation */ diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index f13cc9a35e91..95163c303f3e 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright © 2022 Intel Corporation */ -- cgit v1.2.3 From 6ffef7b6991b4e302dd0aa86f67a0d00b0b8e542 Mon Sep 17 00:00:00 2001 From: Gustavo Sousa Date: Mon, 6 Nov 2023 18:06:57 -0300 Subject: drm/xe/xelpmp: Add Wa_16021867713 This workaround applies to all steppings of Xe_LPM+. Implement the KMD part. Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20231106210655.175109-3-gustavo.sousa@intel.com Signed-off-by: Gustavo Sousa Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_engine_regs.h | 3 +++ drivers/gpu/drm/xe/xe_wa.c | 6 ++++++ 2 files changed, 9 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index 35dd4837dd75..b57dec17eb2d 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -118,4 +118,7 @@ #define VDBOX_CGCTL3F18(base) XE_REG((base) + 0x3f18) #define ALNUNIT_CLKGATE_DIS REG_BIT(13) +#define VDBOX_CGCTL3F1C(base) XE_REG((base) + 0x3f1c) +#define MFXPIPE_CLKGATE_DIS REG_BIT(3) + #endif diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 2f1782db267b..614e114a5342 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -248,6 +248,12 @@ static const struct xe_rtp_entry_sr gt_was[] = { /* Xe_LPM+ */ + { XE_RTP_NAME("16021867713"), + XE_RTP_RULES(MEDIA_VERSION(1300), + ENGINE_CLASS(VIDEO_DECODE)), + XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F1C(0), MFXPIPE_CLKGATE_DIS)), + XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), + }, { XE_RTP_NAME("22016670082"), XE_RTP_RULES(MEDIA_VERSION(1300)), XE_RTP_ACTIONS(SET(XELPMP_SQCNT1, ENFORCE_RAR)) -- cgit v1.2.3 From 04dfef5b41afc85e8de7b0397050cdb51db35eda Mon Sep 17 00:00:00 2001 From: Brian Welty Date: Thu, 2 Nov 2023 16:04:53 -0700 Subject: drm/xe: Fix unbind of unaccessed VMA (fault mode) In fault mode, page table binding is deferred until fault handler. Thus vma->tile_present will be unset unless the VMA is accessed by GPU. During a later unbind, the logic doesn't account for the fact that local fence variable will be NULL in this case, leading to pass NULL into dma_fence_add_callback() and causing few WARN_ONs to print to console. The fix is already present in the code, just hoist the fence variable computation to be done earlier. Resolves warnings seen with igt@xe_exec_fault_mode@once-invalid-fault Signed-off-by: Brian Welty Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 7aefa6aa66a1..b4a4ed28019c 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1740,14 +1740,14 @@ next: } } + fence = cf ? &cf->base : !fence ? + xe_exec_queue_last_fence_get(wait_exec_queue, vm) : fence; if (last_op) { for (i = 0; i < num_syncs; i++) - xe_sync_entry_signal(&syncs[i], NULL, - cf ? &cf->base : fence); + xe_sync_entry_signal(&syncs[i], NULL, fence); } - return cf ? &cf->base : !fence ? - xe_exec_queue_last_fence_get(wait_exec_queue, vm) : fence; + return fence; err_fences: if (fences) { -- cgit v1.2.3 From 37d1eaab34ab9cdd6022a188ce6b77a88f81c7e2 Mon Sep 17 00:00:00 2001 From: Koby Elbaz Date: Sun, 29 Oct 2023 19:53:26 +0200 Subject: drm/xe: move the lmem verification code into a separate function If lmem (VRAM) is not fully initialized, the punit will power down the GT, which will prevent register access from the driver side. That code moved into a corresponding function (xe_verify_lmem_ready) to make the code clearer. Signed-off-by: Koby Elbaz Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20231029175326.626745-1-kelbaz@habana.ai Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_mmio.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 0da4f75c07bf..d8f9fabf715e 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -381,10 +381,27 @@ static void mmio_fini(struct drm_device *drm, void *arg) iounmap(xe->mem.vram.mapping); } +static int xe_verify_lmem_ready(struct xe_device *xe) +{ + struct xe_gt *gt = xe_root_mmio_gt(xe); + + /* + * The boot firmware initializes local memory and assesses its health. + * If memory training fails, the punit will have been instructed to + * keep the GT powered down; we won't be able to communicate with it + * and we should not continue with driver initialization. + */ + if (IS_DGFX(xe) && !(xe_mmio_read32(gt, GU_CNTL) & LMEM_INIT)) { + drm_err(&xe->drm, "VRAM not initialized by firmware\n"); + return -ENODEV; + } + + return 0; +} + int xe_mmio_init(struct xe_device *xe) { struct xe_tile *root_tile = xe_device_get_root_tile(xe); - struct xe_gt *gt = xe_root_mmio_gt(xe); const int mmio_bar = 0; int err; @@ -409,16 +426,9 @@ int xe_mmio_init(struct xe_device *xe) root_tile->mmio.size = xe->mmio.size; root_tile->mmio.regs = xe->mmio.regs; - /* - * The boot firmware initializes local memory and assesses its health. - * If memory training fails, the punit will have been instructed to - * keep the GT powered down; we won't be able to communicate with it - * and we should not continue with driver initialization. - */ - if (IS_DGFX(xe) && !(xe_mmio_read32(gt, GU_CNTL) & LMEM_INIT)) { - drm_err(&xe->drm, "VRAM not initialized by firmware\n"); - return -ENODEV; - } + err = xe_verify_lmem_ready(xe); + if (err) + return err; err = xe_set_dma_info(xe); if (err) -- cgit v1.2.3 From d7925d04c062b8adcbbff9604422f979e9dbedb7 Mon Sep 17 00:00:00 2001 From: Jonathan Cavitt Date: Fri, 3 Nov 2023 14:03:24 -0700 Subject: drm/xe: clear the serviced bits on INTR_IDENTITY_REG The spec for this register, like many other interrupt related ones, asks software to write back '1' to clear the serviced bits. Let's respect the spec. v2: - Update commit message - Add missing CC Signed-off-by: Jonathan Cavitt CC: Daniele Spurio Ceraolo CC: Lucas De Marchi CC: Rodrigo Vivi CC: Paulo Zanoni Reviewed-by: Rodrigo Vivi Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 5631e5e1ea20..ef26120e7aa4 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -231,7 +231,7 @@ gt_engine_identity(struct xe_device *xe, return 0; } - xe_mmio_write32(mmio, INTR_IDENTITY_REG(bank), INTR_DATA_VALID); + xe_mmio_write32(mmio, INTR_IDENTITY_REG(bank), ident); return ident; } -- cgit v1.2.3 From 047d1f6a2f171fc9ea4c286edd6ee0dfef41a298 Mon Sep 17 00:00:00 2001 From: Haridhar Kalvala Date: Wed, 8 Nov 2023 13:03:51 +0530 Subject: drm/xe: Add Wa_14019877138 Enable Force Dispatch Ends Collection for DG2. BSpec: 46001 Signed-off-by: Haridhar Kalvala Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20231108073351.3998413-1-haridhar.kalvala@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_wa.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 614e114a5342..d03e6674519f 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -677,6 +677,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_RULES(PLATFORM(DG2)), XE_RTP_ACTIONS(SET(CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE)) }, + { XE_RTP_NAME("14019877138"), + XE_RTP_RULES(PLATFORM(DG2)), + XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT)) + }, /* PVC */ -- cgit v1.2.3 From 86017f3898d4ac0ab6c01376ef734c23347b38e7 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Tue, 7 Nov 2023 13:55:58 +0200 Subject: drm/xe/gsc: enable pvc support Configure and enable PVC HECI GSC support. Signed-off-by: Alexander Usyskin Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_regs.h | 1 + drivers/gpu/drm/xe/xe_heci_gsc.c | 11 ++++++++++- drivers/gpu/drm/xe/xe_pci.c | 1 + 3 files changed, 12 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h index 7202084198bd..924f7c949d55 100644 --- a/drivers/gpu/drm/xe/regs/xe_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_regs.h @@ -35,6 +35,7 @@ #define XEHPC_BCS8_RING_BASE 0x3ee000 #define DG1_GSC_HECI2_BASE 0x00259000 +#define PVC_GSC_HECI2_BASE 0x00285000 #define DG2_GSC_HECI2_BASE 0x00374000 #define GSCCS_RING_BASE 0x11a000 diff --git a/drivers/gpu/drm/xe/xe_heci_gsc.c b/drivers/gpu/drm/xe/xe_heci_gsc.c index 3328ddca42d0..d8e982e3d9a2 100644 --- a/drivers/gpu/drm/xe/xe_heci_gsc.c +++ b/drivers/gpu/drm/xe/xe_heci_gsc.c @@ -70,6 +70,13 @@ static const struct heci_gsc_def heci_gsc_def_dg2 = { .bar_size = GSC_BAR_LENGTH, }; +static const struct heci_gsc_def heci_gsc_def_pvc = { + .name = "mei-gscfi", + .bar = PVC_GSC_HECI2_BASE, + .bar_size = GSC_BAR_LENGTH, + .slow_firmware = true, +}; + static void heci_gsc_release_dev(struct device *dev) { struct auxiliary_device *aux_dev = to_auxiliary_dev(dev); @@ -172,7 +179,9 @@ void xe_heci_gsc_init(struct xe_device *xe) heci_gsc->irq = -1; - if (xe->info.platform == XE_DG2) { + if (xe->info.platform == XE_PVC) { + def = &heci_gsc_def_pvc; + } else if (xe->info.platform == XE_DG2) { def = &heci_gsc_def_dg2; } else if (xe->info.platform == XE_DG1) { def = &heci_gsc_def_dg1; diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index eec2b852c7aa..40d89d4df87c 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -301,6 +301,7 @@ static const __maybe_unused struct xe_device_desc pvc_desc = { DGFX_FEATURES, PLATFORM(XE_PVC), .require_force_probe = true, + .has_heci_gscfi = 1, }; static const struct xe_device_desc mtl_desc = { -- cgit v1.2.3 From fa85b083733abaef81eecd8693a065657d18e733 Mon Sep 17 00:00:00 2001 From: Pallavi Mishra Date: Wed, 8 Nov 2023 00:54:24 +0530 Subject: drm/xe/tests: Fix migrate test Pass a valid vm to xe_migrate_update_pgtables. Resolves NPD crash seen with igt@xe_live_ktest@migrate Reviewed-by: Brian Welty Reviewed-by: Matthew Brost Signed-off-by: Pallavi Mishra Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_migrate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index c984307560ee..7aad09140d7e 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -236,7 +236,7 @@ static void test_pt_update(struct xe_migrate *m, struct xe_bo *pt, xe_map_memset(xe, &pt->vmap, 0, (u8)expected, pt->size); then = ktime_get(); - fence = xe_migrate_update_pgtables(m, NULL, NULL, m->q, &update, 1, + fence = xe_migrate_update_pgtables(m, m->q->vm, NULL, m->q, &update, 1, NULL, 0, &pt_update); now = ktime_get(); if (sanity_fence_failed(xe, fence, "Migration pagetable update", test)) -- cgit v1.2.3 From 80103a23da50bb3fc5c3c626ca7bc4d45b28340b Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 9 Nov 2023 11:46:07 -0800 Subject: drm/xe: Drop EXECLIST_CONTROL from error state dump EXECLIST_CONTROL ($enginebase + 0x550) is a write-only register; we shouldn't be trying to read or report it as part of the device error state. Bspec: 45910, 60335 Cc: Rodrigo Vivi Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20231109194606.1835284-2-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_hw_engine.c | 4 ---- drivers/gpu/drm/xe/xe_hw_engine_types.h | 2 -- 2 files changed, 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index b5b084590888..e831e63c5e48 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -704,8 +704,6 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe) snapshot->reg.ring_execlist_sq_contents_hi = hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_HI(0)); - snapshot->reg.ring_execlist_control = - hw_engine_mmio_read32(hwe, RING_EXECLIST_CONTROL(0)); snapshot->reg.ring_start = hw_engine_mmio_read32(hwe, RING_START(0)); snapshot->reg.ring_head = hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR; @@ -765,8 +763,6 @@ void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, snapshot->reg.ring_execlist_sq_contents_lo); drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_HI: 0x%08x\n", snapshot->reg.ring_execlist_sq_contents_hi); - drm_printf(p, "\tRING_EXECLIST_CONTROL: 0x%08x\n", - snapshot->reg.ring_execlist_control); drm_printf(p, "\tRING_START: 0x%08x\n", snapshot->reg.ring_start); drm_printf(p, "\tRING_HEAD: 0x%08x\n", snapshot->reg.ring_head); drm_printf(p, "\tRING_TAIL: 0x%08x\n", snapshot->reg.ring_tail); diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h index 5d4ee2904240..39908dec042a 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_types.h +++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h @@ -183,8 +183,6 @@ struct xe_hw_engine_snapshot { u32 ring_execlist_sq_contents_lo; /** @ring_execlist_sq_contents_hi: RING_EXECLIST_SQ_CONTENTS + 4 */ u32 ring_execlist_sq_contents_hi; - /** @ring_execlist_control: RING_EXECLIST_CONTROL */ - u32 ring_execlist_control; /** @ring_start: RING_START */ u32 ring_start; /** @ring_head: RING_HEAD */ -- cgit v1.2.3 From 08987a8b68207e782decb0f4037964ef036a9de4 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 9 Nov 2023 09:51:32 -0800 Subject: drm/xe: Fix build with KUNIT=m Due to the current integration between "live" xe kunit tests and kunit, it's not possible to have a build with the following combination: CONFIG_DRM_XE=y CONFIG_KUNIT=m ... even if kconfig doesn't block it. The reason for the failure is that some compilation units are pulled in xe.ko: drivers/gpu/drm/xe/xe_bo.c:#include "tests/xe_bo.c" drivers/gpu/drm/xe/xe_dma_buf.c:#include "tests/xe_dma_buf.c" drivers/gpu/drm/xe/xe_migrate.c:#include "tests/xe_migrate.c" drivers/gpu/drm/xe/xe_pci.c:#include "tests/xe_pci.c" Those files shouldn't use symbols from kunit, which should be reserved to the tests/*_test.c files. Detangling this dependency doesn't seem very straightforward, so fix the immediate issue instructing kconfig to block the problematic configuration. Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20231109175132.3084142-3-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index cfa6420b9915..46325c64ff22 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only config DRM_XE tristate "Intel Xe Graphics" - depends on DRM && PCI && MMU + depends on DRM && PCI && MMU && (m || (y && KUNIT=y)) select INTERVAL_TREE # we need shmfs for the swappable backing store, and in particular # the shmem_readpage() which depends upon tmpfs -- cgit v1.2.3 From 43efd3ba9f44c46fdb31c8b0f257cf9a2d1b58ae Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Mon, 13 Nov 2023 11:44:02 -0800 Subject: drm/xe: Raise GT frequency before GuC/HuC load Starting GT freq is usually RPn. Raising freq to RP0 will help speed up GuC load times. As an example, this data was collected on DG2- GuC Load time @RPn ~ 41 ms GuC Load time @RP0 ~ 11 ms v2: Raise GT freq before hwconfig init. This will speed up both HuC and GuC loads. Address review comments (Rodrigo). Also add a small usleep after requesting frequency which gives pcode some time to react. v3: Address checkpatch issue Cc: Rodrigo Vivi Reviewed-by: Rodrigo Vivi Signed-off-by: Vinay Belgaumkar Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 5 ++++ drivers/gpu/drm/xe/xe_gt.c | 4 +++ drivers/gpu/drm/xe/xe_guc_pc.c | 49 ++++++++++++++++++++++++++++++++++-- drivers/gpu/drm/xe/xe_guc_pc.h | 1 + 4 files changed, 57 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 902c60543de0..cc27fe8fc363 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -263,6 +263,11 @@ #define RPNSWREQ XE_REG(0xa008) #define REQ_RATIO_MASK REG_GENMASK(31, 23) + +#define RP_CONTROL XE_REG(0xa024) +#define RPSWCTL_MASK REG_GENMASK(10, 9) +#define RPSWCTL_ENABLE REG_FIELD_PREP(RPSWCTL_MASK, 2) +#define RPSWCTL_DISABLE REG_FIELD_PREP(RPSWCTL_MASK, 0) #define RC_CONTROL XE_REG(0xa090) #define RC_STATE XE_REG(0xa094) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 73c090762771..6c885dde5d59 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -30,6 +30,7 @@ #include "xe_gt_tlb_invalidation.h" #include "xe_gt_topology.h" #include "xe_guc_exec_queue_types.h" +#include "xe_guc_pc.h" #include "xe_hw_fence.h" #include "xe_hw_engine_class_sysfs.h" #include "xe_irq.h" @@ -349,6 +350,9 @@ static int gt_fw_domain_init(struct xe_gt *gt) if (err) goto err_force_wake; + /* Raise GT freq to speed up HuC/GuC load */ + xe_guc_pc_init_early(>->uc.guc.pc); + err = xe_uc_init_hwconfig(>->uc); if (err) goto err_force_wake; diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 020c6597cd78..f4ac76d6b2dd 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -247,6 +247,12 @@ static u32 decode_freq(u32 raw) GEN9_FREQ_SCALER); } +static u32 encode_freq(u32 freq) +{ + return DIV_ROUND_CLOSEST(freq * GEN9_FREQ_SCALER, + GT_FREQUENCY_MULTIPLIER); +} + static u32 pc_get_min_freq(struct xe_guc_pc *pc) { u32 freq; @@ -257,6 +263,32 @@ static u32 pc_get_min_freq(struct xe_guc_pc *pc) return decode_freq(freq); } +static void pc_set_manual_rp_ctrl(struct xe_guc_pc *pc, bool enable) +{ + struct xe_gt *gt = pc_to_gt(pc); + u32 state = enable ? RPSWCTL_ENABLE : RPSWCTL_DISABLE; + + /* Allow/Disallow punit to process software freq requests */ + xe_mmio_write32(gt, RP_CONTROL, state); +} + +static void pc_set_cur_freq(struct xe_guc_pc *pc, u32 freq) +{ + struct xe_gt *gt = pc_to_gt(pc); + u32 rpnswreq; + + pc_set_manual_rp_ctrl(pc, true); + + /* Req freq is in units of 16.66 Mhz */ + rpnswreq = REG_FIELD_PREP(REQ_RATIO_MASK, encode_freq(freq)); + xe_mmio_write32(gt, RPNSWREQ, rpnswreq); + + /* Sleep for a small time to allow pcode to respond */ + usleep_range(100, 300); + + pc_set_manual_rp_ctrl(pc, false); +} + static int pc_set_min_freq(struct xe_guc_pc *pc, u32 freq) { /* @@ -685,6 +717,21 @@ static void pc_init_fused_rp_values(struct xe_guc_pc *pc) else tgl_init_fused_rp_values(pc); } + +/** + * xe_guc_pc_init_early - Initialize RPx values and request a higher GT + * frequency to allow faster GuC load times + * @pc: Xe_GuC_PC instance + */ +void xe_guc_pc_init_early(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); + pc_init_fused_rp_values(pc); + pc_set_cur_freq(pc, pc->rp0_freq); +} + static int pc_adjust_freq_bounds(struct xe_guc_pc *pc) { int ret; @@ -918,8 +965,6 @@ int xe_guc_pc_init(struct xe_guc_pc *pc) pc->bo = bo; - pc_init_fused_rp_values(pc); - err = sysfs_create_files(gt->sysfs, pc_attrs); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h index 43ea582545b5..054788e006f3 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.h +++ b/drivers/gpu/drm/xe/xe_guc_pc.h @@ -17,4 +17,5 @@ int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc); enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc); u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc); u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc); +void xe_guc_pc_init_early(struct xe_guc_pc *pc); #endif /* _XE_GUC_PC_H_ */ -- cgit v1.2.3 From a839e365ac88f0fa9f8c7ae92b9e7e66bbd9e4d7 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 6 Nov 2023 10:39:38 -0800 Subject: drm/xe: Use pool of ordered wq for GuC submission To appease lockdep, use a pool of ordered wq for GuC submission rather tha leaving the ordered wq allocation to the drm sched. Without this change eventually lockdep runs out of hash entries (MAX_LOCKDEP_CHAINS is exceeded) as each user allocated exec queue adds more hash table entries to lockdep. A pool old of 256 ordered wq should be enough to have similar behavior with and without lockdep enabled. Signed-off-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_submit.c | 68 +++++++++++++++++++++++++++++++++++--- drivers/gpu/drm/xe/xe_guc_types.h | 7 ++++ 2 files changed, 71 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index d0e60349fc5a..8d5af11fb80d 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -188,6 +188,58 @@ static bool exec_queue_killed_or_banned(struct xe_exec_queue *q) return exec_queue_killed(q) || exec_queue_banned(q); } +#ifdef CONFIG_PROVE_LOCKING +static int alloc_submit_wq(struct xe_guc *guc) +{ + int i; + + for (i = 0; i < NUM_SUBMIT_WQ; ++i) { + guc->submission_state.submit_wq_pool[i] = + alloc_ordered_workqueue("submit_wq", 0); + if (!guc->submission_state.submit_wq_pool[i]) + goto err_free; + } + + return 0; + +err_free: + while (i) + destroy_workqueue(guc->submission_state.submit_wq_pool[--i]); + + return -ENOMEM; +} + +static void free_submit_wq(struct xe_guc *guc) +{ + int i; + + for (i = 0; i < NUM_SUBMIT_WQ; ++i) + destroy_workqueue(guc->submission_state.submit_wq_pool[i]); +} + +static struct workqueue_struct *get_submit_wq(struct xe_guc *guc) +{ + int idx = guc->submission_state.submit_wq_idx++ % NUM_SUBMIT_WQ; + + return guc->submission_state.submit_wq_pool[idx]; +} +#else +static int alloc_submit_wq(struct xe_guc *guc) +{ + return 0; +} + +static void free_submit_wq(struct xe_guc *guc) +{ + +} + +static struct workqueue_struct *get_submit_wq(struct xe_guc *guc) +{ + return NULL; +} +#endif + static void guc_submit_fini(struct drm_device *drm, void *arg) { struct xe_guc *guc = arg; @@ -195,6 +247,7 @@ static void guc_submit_fini(struct drm_device *drm, void *arg) xa_destroy(&guc->submission_state.exec_queue_lookup); ida_destroy(&guc->submission_state.guc_ids); bitmap_free(guc->submission_state.guc_ids_bitmap); + free_submit_wq(guc); mutex_destroy(&guc->submission_state.lock); } @@ -230,6 +283,12 @@ int xe_guc_submit_init(struct xe_guc *guc) if (!guc->submission_state.guc_ids_bitmap) return -ENOMEM; + err = alloc_submit_wq(guc); + if (err) { + bitmap_free(guc->submission_state.guc_ids_bitmap); + return err; + } + gt->exec_queue_ops = &guc_exec_queue_ops; mutex_init(&guc->submission_state.lock); @@ -1166,10 +1225,11 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) timeout = (q->vm && xe_vm_no_dma_fences(q->vm)) ? MAX_SCHEDULE_TIMEOUT : q->hwe->eclass->sched_props.job_timeout_ms; - err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, NULL, - q->lrc[0].ring.size / MAX_JOB_SIZE_BYTES, - 64, timeout, guc_to_gt(guc)->ordered_wq, NULL, - q->name, gt_to_xe(q->gt)->drm.dev); + err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, + get_submit_wq(guc), + q->lrc[0].ring.size / MAX_JOB_SIZE_BYTES, 64, + timeout, guc_to_gt(guc)->ordered_wq, NULL, + q->name, gt_to_xe(q->gt)->drm.dev); if (err) goto err_free; diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h index a5e58917a499..0fdcc05dc16a 100644 --- a/drivers/gpu/drm/xe/xe_guc_types.h +++ b/drivers/gpu/drm/xe/xe_guc_types.h @@ -61,6 +61,13 @@ struct xe_guc { /** @patch: patch version of GuC submission */ u32 patch; } version; +#ifdef CONFIG_PROVE_LOCKING +#define NUM_SUBMIT_WQ 256 + /** @submit_wq_pool: submission ordered workqueues pool */ + struct workqueue_struct *submit_wq_pool[NUM_SUBMIT_WQ]; + /** @submit_wq_idx: submission ordered workqueue index */ + int submit_wq_idx; +#endif /** @enabled: submission is enabled */ bool enabled; } submission_state; -- cgit v1.2.3 From 44e694958b95395bd1c41508c88c8ca141bf9bd7 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 17 Aug 2023 16:30:41 -0400 Subject: drm/xe/display: Implement display support As for display, the intent is to share the display code with the i915 driver so that there is maximum reuse there. We do this by recompiling i915/display code twice. Now that i915 has been adapted to support the Xe build, we can add the xe/display support. This initial work is a collaboration of many people and unfortunately this squashed patch won't fully honor the proper credits. But let's try to add a few from the squashed patches: Co-developed-by: Matthew Brost Co-developed-by: Jani Nikula Co-developed-by: Lucas De Marchi Co-developed-by: Matt Roper Co-developed-by: Mauro Carvalho Chehab Co-developed-by: Rodrigo Vivi Co-developed-by: Dave Airlie Signed-off-by: Maarten Lankhorst Signed-off-by: Rodrigo Vivi Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/.kunitconfig | 1 + drivers/gpu/drm/xe/Kconfig | 22 ++ drivers/gpu/drm/xe/Makefile | 141 ++++++- .../drm/xe/compat-i915-headers/gem/i915_gem_lmem.h | 1 + .../drm/xe/compat-i915-headers/gem/i915_gem_mman.h | 17 + .../xe/compat-i915-headers/gem/i915_gem_object.h | 65 ++++ .../gpu/drm/xe/compat-i915-headers/gt/intel_rps.h | 11 + .../drm/xe/compat-i915-headers/i915_active_types.h | 0 .../gpu/drm/xe/compat-i915-headers/i915_config.h | 19 + .../gpu/drm/xe/compat-i915-headers/i915_debugfs.h | 14 + drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h | 230 ++++++++++++ .../gpu/drm/xe/compat-i915-headers/i915_fixed.h | 6 + .../drm/xe/compat-i915-headers/i915_gpu_error.h | 17 + drivers/gpu/drm/xe/compat-i915-headers/i915_irq.h | 6 + drivers/gpu/drm/xe/compat-i915-headers/i915_reg.h | 6 + .../gpu/drm/xe/compat-i915-headers/i915_reg_defs.h | 6 + .../gpu/drm/xe/compat-i915-headers/i915_trace.h | 6 + .../gpu/drm/xe/compat-i915-headers/i915_utils.h | 6 + drivers/gpu/drm/xe/compat-i915-headers/i915_vgpu.h | 44 +++ drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h | 31 ++ .../drm/xe/compat-i915-headers/i915_vma_types.h | 74 ++++ .../xe/compat-i915-headers/intel_clock_gating.h | 6 + .../drm/xe/compat-i915-headers/intel_mchbar_regs.h | 6 + .../drm/xe/compat-i915-headers/intel_pci_config.h | 6 + .../gpu/drm/xe/compat-i915-headers/intel_pcode.h | 42 +++ .../drm/xe/compat-i915-headers/intel_runtime_pm.h | 22 ++ .../gpu/drm/xe/compat-i915-headers/intel_step.h | 20 + .../gpu/drm/xe/compat-i915-headers/intel_uc_fw.h | 11 + .../gpu/drm/xe/compat-i915-headers/intel_uncore.h | 175 +++++++++ .../gpu/drm/xe/compat-i915-headers/intel_wakeref.h | 8 + .../gpu/drm/xe/compat-i915-headers/pxp/intel_pxp.h | 28 ++ .../drm/xe/compat-i915-headers/soc/intel_dram.h | 6 + .../drm/xe/compat-i915-headers/soc/intel_gmch.h | 6 + .../gpu/drm/xe/compat-i915-headers/soc/intel_pch.h | 6 + .../gpu/drm/xe/compat-i915-headers/vlv_sideband.h | 132 +++++++ .../drm/xe/compat-i915-headers/vlv_sideband_reg.h | 6 + drivers/gpu/drm/xe/display/ext/i915_irq.c | 77 ++++ drivers/gpu/drm/xe/display/ext/i915_utils.c | 22 ++ drivers/gpu/drm/xe/display/intel_fb_bo.c | 74 ++++ drivers/gpu/drm/xe/display/intel_fb_bo.h | 24 ++ drivers/gpu/drm/xe/display/intel_fbdev_fb.c | 104 ++++++ drivers/gpu/drm/xe/display/intel_fbdev_fb.h | 21 ++ drivers/gpu/drm/xe/display/xe_display_rps.c | 17 + drivers/gpu/drm/xe/display/xe_fb_pin.c | 326 ++++++++++++++++ drivers/gpu/drm/xe/display/xe_hdcp_gsc.c | 24 ++ drivers/gpu/drm/xe/display/xe_plane_initial.c | 291 +++++++++++++++ drivers/gpu/drm/xe/regs/xe_reg_defs.h | 2 +- drivers/gpu/drm/xe/regs/xe_regs.h | 13 - drivers/gpu/drm/xe/xe_bo.c | 6 +- drivers/gpu/drm/xe/xe_device.c | 58 ++- drivers/gpu/drm/xe/xe_device_types.h | 86 +++++ drivers/gpu/drm/xe/xe_display.c | 411 +++++++++++++++++++++ drivers/gpu/drm/xe/xe_display.h | 72 ++++ drivers/gpu/drm/xe/xe_ggtt.c | 25 +- drivers/gpu/drm/xe/xe_ggtt.h | 3 +- drivers/gpu/drm/xe/xe_irq.c | 13 +- drivers/gpu/drm/xe/xe_module.c | 4 + drivers/gpu/drm/xe/xe_pci.c | 35 +- drivers/gpu/drm/xe/xe_pm.c | 13 +- 59 files changed, 2873 insertions(+), 51 deletions(-) create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_lmem.h create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_mman.h create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/gt/intel_rps.h create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/i915_active_types.h create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/i915_config.h create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/i915_debugfs.h create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/i915_fixed.h create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/i915_gpu_error.h create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/i915_irq.h create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/i915_reg.h create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/i915_reg_defs.h create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/i915_trace.h create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/i915_utils.h create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/i915_vgpu.h create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/i915_vma_types.h create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/intel_clock_gating.h create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/intel_mchbar_regs.h create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/intel_pci_config.h create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/intel_runtime_pm.h create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/intel_step.h create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/intel_uc_fw.h create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/intel_wakeref.h create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/pxp/intel_pxp.h create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/soc/intel_dram.h create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/soc/intel_gmch.h create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/soc/intel_pch.h create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband.h create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband_reg.h create mode 100644 drivers/gpu/drm/xe/display/ext/i915_irq.c create mode 100644 drivers/gpu/drm/xe/display/ext/i915_utils.c create mode 100644 drivers/gpu/drm/xe/display/intel_fb_bo.c create mode 100644 drivers/gpu/drm/xe/display/intel_fb_bo.h create mode 100644 drivers/gpu/drm/xe/display/intel_fbdev_fb.c create mode 100644 drivers/gpu/drm/xe/display/intel_fbdev_fb.h create mode 100644 drivers/gpu/drm/xe/display/xe_display_rps.c create mode 100644 drivers/gpu/drm/xe/display/xe_fb_pin.c create mode 100644 drivers/gpu/drm/xe/display/xe_hdcp_gsc.c create mode 100644 drivers/gpu/drm/xe/display/xe_plane_initial.c create mode 100644 drivers/gpu/drm/xe/xe_display.c create mode 100644 drivers/gpu/drm/xe/xe_display.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/.kunitconfig b/drivers/gpu/drm/xe/.kunitconfig index 06ed30420a8d..3769af94e391 100644 --- a/drivers/gpu/drm/xe/.kunitconfig +++ b/drivers/gpu/drm/xe/.kunitconfig @@ -6,6 +6,7 @@ CONFIG_DRM=y CONFIG_DRM_FBDEV_EMULATION=y CONFIG_DRM_KMS_HELPER=y CONFIG_DRM_XE=y +CONFIG_DRM_XE_DISPLAY=n CONFIG_EXPERT=y CONFIG_FB=y CONFIG_DRM_XE_KUNIT_TEST=y diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index 46325c64ff22..5b3da06e7ba3 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -12,8 +12,20 @@ config DRM_XE select DRM_KMS_HELPER select DRM_PANEL select DRM_SUBALLOC_HELPER + select DRM_DISPLAY_DP_HELPER + select DRM_DISPLAY_HDCP_HELPER + select DRM_DISPLAY_HDMI_HELPER + select DRM_DISPLAY_HELPER + select DRM_MIPI_DSI select RELAY select IRQ_WORK + # i915 depends on ACPI_VIDEO when ACPI is enabled + # but for select to work, need to select ACPI_VIDEO's dependencies, ick + select BACKLIGHT_CLASS_DEVICE if ACPI + select INPUT if ACPI + select ACPI_VIDEO if X86 && ACPI + select ACPI_BUTTON if ACPI + select ACPI_WMI if ACPI select SYNC_FILE select IOSF_MBI select CRC32 @@ -33,6 +45,16 @@ config DRM_XE If "M" is selected, the module will be called xe. +config DRM_XE_DISPLAY + bool "Enable display support" + depends on DRM_XE && EXPERT && DRM_XE=m + select FB_IOMEM_HELPERS + select I2C + select I2C_ALGOBIT + default y + help + Disable this option only if you want to compile out display support. + config DRM_XE_FORCE_PROBE string "Force probe xe for selected Intel hardware IDs" depends on DRM_XE diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 1d39784e92fd..2777cbf07cc6 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -24,9 +24,6 @@ subdir-ccflags-y += $(call cc-disable-warning, initializer-overrides) subdir-ccflags-y += $(call cc-disable-warning, frame-address) subdir-ccflags-$(CONFIG_DRM_XE_WERROR) += -Werror -# Fine grained warnings disable -CFLAGS_xe_pci.o = $(call cc-disable-warning, override-init) - subdir-ccflags-y += -I$(obj) -I$(srctree)/$(src) # generated sources @@ -126,13 +123,147 @@ xe-y += xe_bb.o \ # graphics hardware monitoring (HWMON) support xe-$(CONFIG_HWMON) += xe_hwmon.o -obj-$(CONFIG_DRM_XE) += xe.o -obj-$(CONFIG_DRM_XE_KUNIT_TEST) += tests/ +# i915 Display compat #defines and #includes +subdir-ccflags-$(CONFIG_DRM_XE_DISPLAY) += \ + -I$(srctree)/$(src)/display/ext \ + -I$(srctree)/$(src)/compat-i915-headers \ + -I$(srctree)/drivers/gpu/drm/xe/display/ \ + -I$(srctree)/drivers/gpu/drm/i915/display/ \ + -Ddrm_i915_gem_object=xe_bo \ + -Ddrm_i915_private=xe_device + +CFLAGS_i915-display/intel_fbdev.o = $(call cc-disable-warning, override-init) +CFLAGS_i915-display/intel_display_device.o = $(call cc-disable-warning, override-init) + +# Rule to build SOC code shared with i915 +$(obj)/i915-soc/%.o: $(srctree)/drivers/gpu/drm/i915/soc/%.c FORCE + $(call cmd,force_checksrc) + $(call if_changed_rule,cc_o_c) + +# Rule to build display code shared with i915 +$(obj)/i915-display/%.o: $(srctree)/drivers/gpu/drm/i915/display/%.c FORCE + $(call cmd,force_checksrc) + $(call if_changed_rule,cc_o_c) + +# Display code specific to xe +xe-$(CONFIG_DRM_XE_DISPLAY) += \ + xe_display.o \ + display/xe_fb_pin.o \ + display/xe_hdcp_gsc.o \ + display/xe_plane_initial.o \ + display/xe_display_rps.o \ + display/intel_fbdev_fb.o \ + display/intel_fb_bo.o \ + display/ext/i915_irq.o \ + display/ext/i915_utils.o + +# SOC code shared with i915 +xe-$(CONFIG_DRM_XE_DISPLAY) += \ + i915-soc/intel_dram.o \ + i915-soc/intel_pch.o + +# Display code shared with i915 +xe-$(CONFIG_DRM_XE_DISPLAY) += \ + i915-display/icl_dsi.o \ + i915-display/intel_atomic.o \ + i915-display/intel_atomic_plane.o \ + i915-display/intel_audio.o \ + i915-display/intel_backlight.o \ + i915-display/intel_bios.o \ + i915-display/intel_bw.o \ + i915-display/intel_cdclk.o \ + i915-display/intel_color.o \ + i915-display/intel_combo_phy.o \ + i915-display/intel_connector.o \ + i915-display/intel_crtc.o \ + i915-display/intel_crtc_state_dump.o \ + i915-display/intel_cursor.o \ + i915-display/intel_cx0_phy.o \ + i915-display/intel_ddi.o \ + i915-display/intel_ddi_buf_trans.o \ + i915-display/intel_display.o \ + i915-display/intel_display_debugfs.o \ + i915-display/intel_display_debugfs_params.o \ + i915-display/intel_display_device.o \ + i915-display/intel_display_driver.o \ + i915-display/intel_display_irq.o \ + i915-display/intel_display_params.o \ + i915-display/intel_display_power.o \ + i915-display/intel_display_power_map.o \ + i915-display/intel_display_power_well.o \ + i915-display/intel_display_trace.o \ + i915-display/intel_display_wa.o \ + i915-display/intel_dkl_phy.o \ + i915-display/intel_dmc.o \ + i915-display/intel_dp.o \ + i915-display/intel_dp_aux.o \ + i915-display/intel_dp_aux_backlight.o \ + i915-display/intel_dp_hdcp.o \ + i915-display/intel_dp_link_training.o \ + i915-display/intel_dp_mst.o \ + i915-display/intel_dpll.o \ + i915-display/intel_dpll_mgr.o \ + i915-display/intel_dpt_common.o \ + i915-display/intel_drrs.o \ + i915-display/intel_dsb.o \ + i915-display/intel_dsi.o \ + i915-display/intel_dsi_dcs_backlight.o \ + i915-display/intel_dsi_vbt.o \ + i915-display/intel_fb.o \ + i915-display/intel_fbc.o \ + i915-display/intel_fdi.o \ + i915-display/intel_fifo_underrun.o \ + i915-display/intel_frontbuffer.o \ + i915-display/intel_global_state.o \ + i915-display/intel_gmbus.o \ + i915-display/intel_hdcp.o \ + i915-display/intel_hdmi.o \ + i915-display/intel_hotplug.o \ + i915-display/intel_hotplug_irq.o \ + i915-display/intel_hti.o \ + i915-display/intel_link_bw.o \ + i915-display/intel_lspcon.o \ + i915-display/intel_modeset_lock.o \ + i915-display/intel_modeset_setup.o \ + i915-display/intel_modeset_verify.o \ + i915-display/intel_panel.o \ + i915-display/intel_pipe_crc.o \ + i915-display/intel_pmdemand.o \ + i915-display/intel_pps.o \ + i915-display/intel_psr.o \ + i915-display/intel_qp_tables.o \ + i915-display/intel_quirks.o \ + i915-display/intel_snps_phy.o \ + i915-display/intel_tc.o \ + i915-display/intel_vblank.o \ + i915-display/intel_vdsc.o \ + i915-display/intel_vga.o \ + i915-display/intel_vrr.o \ + i915-display/intel_wm.o \ + i915-display/skl_scaler.o \ + i915-display/skl_universal_plane.o \ + i915-display/skl_watermark.o xe-$(CONFIG_PERF_EVENTS) += xe_pmu.o +ifeq ($(CONFIG_ACPI),y) + xe-$(CONFIG_DRM_XE_DISPLAY) += \ + i915-display/intel_acpi.o \ + i915-display/intel_opregion.o +endif + +ifeq ($(CONFIG_DRM_FBDEV_EMULATION),y) + xe-$(CONFIG_DRM_XE_DISPLAY) += i915-display/intel_fbdev.o +endif + +obj-$(CONFIG_DRM_XE) += xe.o +obj-$(CONFIG_DRM_XE_KUNIT_TEST) += tests/ + # header test hdrtest_find_args := -not -path xe_rtp_helpers.h +ifneq ($(CONFIG_DRM_XE_DISPLAY),y) + hdrtest_find_args += -not -path display/\* -not -path compat-i915-headers/\* -not -path xe_display.h +endif always-$(CONFIG_DRM_XE_WERROR) += \ $(patsubst %.h,%.hdrtest, $(shell cd $(srctree)/$(src) && find * -name '*.h' $(hdrtest_find_args))) diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_lmem.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_lmem.h new file mode 100644 index 000000000000..710cecca972d --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_lmem.h @@ -0,0 +1 @@ +/* Empty */ diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_mman.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_mman.h new file mode 100644 index 000000000000..650ea2803a97 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_mman.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _I915_GEM_MMAN_H_ +#define _I915_GEM_MMAN_H_ + +#include "xe_bo_types.h" +#include + +static inline int i915_gem_fb_mmap(struct xe_bo *bo, struct vm_area_struct *vma) +{ + return drm_gem_prime_mmap(&bo->ttm.base, vma); +} + +#endif diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h new file mode 100644 index 000000000000..5f19550cc845 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _I915_GEM_OBJECT_H_ +#define _I915_GEM_OBJECT_H_ + +#include + +#include "xe_bo.h" + +#define i915_gem_object_is_shmem(obj) ((obj)->flags & XE_BO_CREATE_SYSTEM_BIT) + +static inline dma_addr_t i915_gem_object_get_dma_address(const struct xe_bo *bo, pgoff_t n) +{ + /* Should never be called */ + WARN_ON(1); + return n; +} + +static inline bool i915_gem_object_is_tiled(const struct xe_bo *bo) +{ + /* legacy tiling is unused */ + return false; +} + +static inline bool i915_gem_object_is_userptr(const struct xe_bo *bo) +{ + /* legacy tiling is unused */ + return false; +} + +static inline int i915_gem_object_read_from_page(struct xe_bo *bo, + u32 ofs, u64 *ptr, u32 size) +{ + struct ttm_bo_kmap_obj map; + void *virtual; + bool is_iomem; + int ret; + + XE_WARN_ON(size != 8); + + ret = xe_bo_lock(bo, true); + if (ret) + return ret; + + ret = ttm_bo_kmap(&bo->ttm, ofs >> PAGE_SHIFT, 1, &map); + if (ret) + goto out_unlock; + + ofs &= ~PAGE_MASK; + virtual = ttm_kmap_obj_virtual(&map, &is_iomem); + if (is_iomem) + *ptr = readq((void __iomem *)(virtual + ofs)); + else + *ptr = *(u64 *)(virtual + ofs); + + ttm_bo_kunmap(&map); +out_unlock: + xe_bo_unlock(bo); + return ret; +} + +#endif diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gt/intel_rps.h b/drivers/gpu/drm/xe/compat-i915-headers/gt/intel_rps.h new file mode 100644 index 000000000000..21fec9cc837c --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/gt/intel_rps.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef __INTEL_RPS_H__ +#define __INTEL_RPS_H__ + +#define gen5_rps_irq_handler(x) ({}) + +#endif /* __INTEL_RPS_H__ */ diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_active_types.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_active_types.h new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_config.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_config.h new file mode 100644 index 000000000000..e835bea08d1b --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_config.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef __I915_CONFIG_H__ +#define __I915_CONFIG_H__ + +#include + +struct drm_i915_private; + +static inline unsigned long +i915_fence_timeout(const struct drm_i915_private *i915) +{ + return MAX_SCHEDULE_TIMEOUT; +} + +#endif /* __I915_CONFIG_H__ */ diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_debugfs.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_debugfs.h new file mode 100644 index 000000000000..b4c47617b64b --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_debugfs.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef __I915_DEBUGFS_H__ +#define __I915_DEBUGFS_H__ + +struct drm_i915_gem_object; +struct seq_file; + +static inline void i915_debugfs_describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) {} + +#endif /* __I915_DEBUGFS_H__ */ diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h new file mode 100644 index 000000000000..c3aa5936667a --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h @@ -0,0 +1,230 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ +#ifndef _XE_I915_DRV_H_ +#define _XE_I915_DRV_H_ + +/* + * "Adaptation header" to allow i915 display to also build for xe driver. + * TODO: refactor i915 and xe so this can cease to exist + */ + +#include + +#include "gem/i915_gem_object.h" + +#include "soc/intel_pch.h" +#include "xe_device.h" +#include "xe_bo.h" +#include "xe_pm.h" +#include "xe_step.h" +#include "i915_gpu_error.h" +#include "i915_reg_defs.h" +#include "i915_utils.h" +#include "intel_step.h" +#include "intel_uc_fw.h" +#include "intel_uncore.h" +#include "intel_runtime_pm.h" +#include + +static inline struct drm_i915_private *to_i915(const struct drm_device *dev) +{ + return container_of(dev, struct drm_i915_private, drm); +} + +static inline struct drm_i915_private *kdev_to_i915(struct device *kdev) +{ + return dev_get_drvdata(kdev); +} + + +#define INTEL_JASPERLAKE 0 +#define INTEL_ELKHARTLAKE 0 +#define IS_PLATFORM(xe, x) ((xe)->info.platform == x) +#define INTEL_INFO(dev_priv) (&((dev_priv)->info)) +#define INTEL_DEVID(dev_priv) ((dev_priv)->info.devid) +#define IS_I830(dev_priv) (dev_priv && 0) +#define IS_I845G(dev_priv) (dev_priv && 0) +#define IS_I85X(dev_priv) (dev_priv && 0) +#define IS_I865G(dev_priv) (dev_priv && 0) +#define IS_I915G(dev_priv) (dev_priv && 0) +#define IS_I915GM(dev_priv) (dev_priv && 0) +#define IS_I945G(dev_priv) (dev_priv && 0) +#define IS_I945GM(dev_priv) (dev_priv && 0) +#define IS_I965G(dev_priv) (dev_priv && 0) +#define IS_I965GM(dev_priv) (dev_priv && 0) +#define IS_G45(dev_priv) (dev_priv && 0) +#define IS_GM45(dev_priv) (dev_priv && 0) +#define IS_G4X(dev_priv) (dev_priv && 0) +#define IS_PINEVIEW(dev_priv) (dev_priv && 0) +#define IS_G33(dev_priv) (dev_priv && 0) +#define IS_IRONLAKE(dev_priv) (dev_priv && 0) +#define IS_IRONLAKE_M(dev_priv) (dev_priv && 0) +#define IS_SANDYBRIDGE(dev_priv) (dev_priv && 0) +#define IS_IVYBRIDGE(dev_priv) (dev_priv && 0) +#define IS_IVB_GT1(dev_priv) (dev_priv && 0) +#define IS_VALLEYVIEW(dev_priv) (dev_priv && 0) +#define IS_CHERRYVIEW(dev_priv) (dev_priv && 0) +#define IS_HASWELL(dev_priv) (dev_priv && 0) +#define IS_BROADWELL(dev_priv) (dev_priv && 0) +#define IS_SKYLAKE(dev_priv) (dev_priv && 0) +#define IS_BROXTON(dev_priv) (dev_priv && 0) +#define IS_KABYLAKE(dev_priv) (dev_priv && 0) +#define IS_GEMINILAKE(dev_priv) (dev_priv && 0) +#define IS_COFFEELAKE(dev_priv) (dev_priv && 0) +#define IS_COMETLAKE(dev_priv) (dev_priv && 0) +#define IS_ICELAKE(dev_priv) (dev_priv && 0) +#define IS_JASPERLAKE(dev_priv) (dev_priv && 0) +#define IS_ELKHARTLAKE(dev_priv) (dev_priv && 0) +#define IS_TIGERLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_TIGERLAKE) +#define IS_ROCKETLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_ROCKETLAKE) +#define IS_DG1(dev_priv) IS_PLATFORM(dev_priv, XE_DG1) +#define IS_ALDERLAKE_S(dev_priv) IS_PLATFORM(dev_priv, XE_ALDERLAKE_S) +#define IS_ALDERLAKE_P(dev_priv) IS_PLATFORM(dev_priv, XE_ALDERLAKE_P) +#define IS_XEHPSDV(dev_priv) (dev_priv && 0) +#define IS_DG2(dev_priv) IS_PLATFORM(dev_priv, XE_DG2) +#define IS_PONTEVECCHIO(dev_priv) IS_PLATFORM(dev_priv, XE_PVC) +#define IS_METEORLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_METEORLAKE) +#define IS_LUNARLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_LUNARLAKE) + +#define IS_HASWELL_ULT(dev_priv) (dev_priv && 0) +#define IS_BROADWELL_ULT(dev_priv) (dev_priv && 0) +#define IS_BROADWELL_ULX(dev_priv) (dev_priv && 0) + +#define IP_VER(ver, rel) ((ver) << 8 | (rel)) + +#define INTEL_DISPLAY_ENABLED(xe) (HAS_DISPLAY((xe)) && !intel_opregion_headless_sku((xe))) + +#define IS_GRAPHICS_VER(xe, first, last) \ + ((xe)->info.graphics_verx100 >= first * 100 && \ + (xe)->info.graphics_verx100 <= (last*100 + 99)) +#define IS_MOBILE(xe) (xe && 0) +#define HAS_LLC(xe) (!IS_DGFX((xe))) + +#define HAS_GMD_ID(xe) GRAPHICS_VERx100(xe) >= 1270 + +/* Workarounds not handled yet */ +#define IS_DISPLAY_STEP(xe, first, last) ({u8 __step = (xe)->info.step.display; first <= __step && __step <= last; }) +#define IS_GRAPHICS_STEP(xe, first, last) ({u8 __step = (xe)->info.step.graphics; first <= __step && __step <= last; }) + +#define IS_LP(xe) (0) +#define IS_GEN9_LP(xe) (0) +#define IS_GEN9_BC(xe) (0) + +#define IS_TIGERLAKE_UY(xe) (xe && 0) +#define IS_COMETLAKE_ULX(xe) (xe && 0) +#define IS_COFFEELAKE_ULX(xe) (xe && 0) +#define IS_KABYLAKE_ULX(xe) (xe && 0) +#define IS_SKYLAKE_ULX(xe) (xe && 0) +#define IS_HASWELL_ULX(xe) (xe && 0) +#define IS_COMETLAKE_ULT(xe) (xe && 0) +#define IS_COFFEELAKE_ULT(xe) (xe && 0) +#define IS_KABYLAKE_ULT(xe) (xe && 0) +#define IS_SKYLAKE_ULT(xe) (xe && 0) + +#define IS_DG1_GRAPHICS_STEP(xe, first, last) (IS_DG1(xe) && IS_GRAPHICS_STEP(xe, first, last)) +#define IS_DG2_GRAPHICS_STEP(xe, variant, first, last) \ + ((xe)->info.subplatform == XE_SUBPLATFORM_DG2_ ## variant && \ + IS_GRAPHICS_STEP(xe, first, last)) +#define IS_XEHPSDV_GRAPHICS_STEP(xe, first, last) (IS_XEHPSDV(xe) && IS_GRAPHICS_STEP(xe, first, last)) + +/* XXX: No basedie stepping support yet */ +#define IS_PVC_BD_STEP(xe, first, last) (!WARN_ON(1) && IS_PONTEVECCHIO(xe)) + +#define IS_TIGERLAKE_DISPLAY_STEP(xe, first, last) (IS_TIGERLAKE(xe) && IS_DISPLAY_STEP(xe, first, last)) +#define IS_ROCKETLAKE_DISPLAY_STEP(xe, first, last) (IS_ROCKETLAKE(xe) && IS_DISPLAY_STEP(xe, first, last)) +#define IS_DG1_DISPLAY_STEP(xe, first, last) (IS_DG1(xe) && IS_DISPLAY_STEP(xe, first, last)) +#define IS_DG2_DISPLAY_STEP(xe, first, last) (IS_DG2(xe) && IS_DISPLAY_STEP(xe, first, last)) +#define IS_ADLP_DISPLAY_STEP(xe, first, last) (IS_ALDERLAKE_P(xe) && IS_DISPLAY_STEP(xe, first, last)) +#define IS_ADLS_DISPLAY_STEP(xe, first, last) (IS_ALDERLAKE_S(xe) && IS_DISPLAY_STEP(xe, first, last)) +#define IS_JSL_EHL_DISPLAY_STEP(xe, first, last) (IS_JSL_EHL(xe) && IS_DISPLAY_STEP(xe, first, last)) +#define IS_MTL_DISPLAY_STEP(xe, first, last) (IS_METEORLAKE(xe) && IS_DISPLAY_STEP(xe, first, last)) + +/* FIXME: Add subplatform here */ +#define IS_MTL_GRAPHICS_STEP(xe, sub, first, last) (IS_METEORLAKE(xe) && IS_DISPLAY_STEP(xe, first, last)) + +#define IS_DG2_G10(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_DG2_G10) +#define IS_DG2_G11(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_DG2_G11) +#define IS_DG2_G12(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_DG2_G12) +#define IS_RAPTORLAKE_U(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_ALDERLAKE_P_RPLU) +#define IS_ICL_WITH_PORT_F(xe) (xe && 0) +#define HAS_FLAT_CCS(xe) (xe_device_has_flat_ccs(xe)) +#define to_intel_bo(x) gem_to_xe_bo((x)) +#define mkwrite_device_info(xe) (INTEL_INFO(xe)) + +#define HAS_128_BYTE_Y_TILING(xe) (xe || 1) + +#define intel_has_gpu_reset(a) (a && 0) + +#include "intel_wakeref.h" + +static inline bool intel_runtime_pm_get(struct xe_runtime_pm *pm) +{ + struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm); + + if (xe_pm_runtime_get(xe) < 0) { + xe_pm_runtime_put(xe); + return false; + } + return true; +} + +static inline bool intel_runtime_pm_get_if_in_use(struct xe_runtime_pm *pm) +{ + struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm); + + return xe_pm_runtime_get_if_active(xe); +} + +static inline void intel_runtime_pm_put_unchecked(struct xe_runtime_pm *pm) +{ + struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm); + + xe_pm_runtime_put(xe); +} + +static inline void intel_runtime_pm_put(struct xe_runtime_pm *pm, bool wakeref) +{ + if (wakeref) + intel_runtime_pm_put_unchecked(pm); +} + +#define intel_runtime_pm_get_raw intel_runtime_pm_get +#define intel_runtime_pm_put_raw intel_runtime_pm_put +#define assert_rpm_wakelock_held(x) do { } while (0) +#define assert_rpm_raw_wakeref_held(x) do { } while (0) + +#define intel_uncore_forcewake_get(x, y) do { } while (0) +#define intel_uncore_forcewake_put(x, y) do { } while (0) + +#define intel_uncore_arm_unclaimed_mmio_detection(x) do { } while (0) + +#define I915_PRIORITY_DISPLAY 0 +struct i915_sched_attr { + int priority; +}; +#define i915_gem_fence_wait_priority(fence, attr) do { (void) attr; } while (0) + +#define with_intel_runtime_pm(rpm, wf) \ + for ((wf) = intel_runtime_pm_get(rpm); (wf); \ + intel_runtime_pm_put((rpm), (wf)), (wf) = 0) + +#define pdev_to_i915 pdev_to_xe_device +#define RUNTIME_INFO(xe) (&(xe)->info.i915_runtime) + +#define FORCEWAKE_ALL XE_FORCEWAKE_ALL +#define HPD_STORM_DEFAULT_THRESHOLD 50 + +#ifdef CONFIG_ARM64 +/* + * arm64 indirectly includes linux/rtc.h, + * which defines a irq_lock, so include it + * here before #define-ing it + */ +#include +#endif + +#define irq_lock irq.lock + +#endif diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_fixed.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_fixed.h new file mode 100644 index 000000000000..12c671fd5235 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_fixed.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#include "../../i915/i915_fixed.h" diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_gpu_error.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_gpu_error.h new file mode 100644 index 000000000000..98e9dd78f670 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_gpu_error.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _I915_GPU_ERROR_H_ +#define _I915_GPU_ERROR_H_ + +struct drm_i915_error_state_buf; + +__printf(2, 3) +static inline void +i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...) +{ +} + +#endif diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_irq.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_irq.h new file mode 100644 index 000000000000..61707a07f91f --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_irq.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#include "../../i915/i915_irq.h" diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_reg.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_reg.h new file mode 100644 index 000000000000..8619ec015ad4 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_reg.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#include "../../i915/i915_reg.h" diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_reg_defs.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_reg_defs.h new file mode 100644 index 000000000000..723279c975b1 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_reg_defs.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#include "../../i915/i915_reg_defs.h" diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_trace.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_trace.h new file mode 100644 index 000000000000..d429d421ac70 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_trace.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#define trace_i915_reg_rw(a...) do { } while (0) diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_utils.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_utils.h new file mode 100644 index 000000000000..1d7c4360e5c0 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_utils.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#include "../../i915/i915_utils.h" diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_vgpu.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_vgpu.h new file mode 100644 index 000000000000..80b024d435dc --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_vgpu.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _I915_VGPU_H_ +#define _I915_VGPU_H_ + +#include + +struct drm_i915_private; +struct i915_ggtt; + +static inline void intel_vgpu_detect(struct drm_i915_private *i915) +{ +} +static inline bool intel_vgpu_active(struct drm_i915_private *i915) +{ + return false; +} +static inline void intel_vgpu_register(struct drm_i915_private *i915) +{ +} +static inline bool intel_vgpu_has_full_ppgtt(struct drm_i915_private *i915) +{ + return false; +} +static inline bool intel_vgpu_has_hwsp_emulation(struct drm_i915_private *i915) +{ + return false; +} +static inline bool intel_vgpu_has_huge_gtt(struct drm_i915_private *i915) +{ + return false; +} +static inline int intel_vgt_balloon(struct i915_ggtt *ggtt) +{ + return 0; +} +static inline void intel_vgt_deballoon(struct i915_ggtt *ggtt) +{ +} + +#endif /* _I915_VGPU_H_ */ diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h new file mode 100644 index 000000000000..e4bbdffcd5f5 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef I915_VMA_H +#define I915_VMA_H + +#include +#include + +/* We don't want these from i915_drm.h in case of Xe */ +#undef I915_TILING_X +#undef I915_TILING_Y +#define I915_TILING_X 0 +#define I915_TILING_Y 0 + +struct xe_bo; + +struct i915_vma { + struct xe_bo *bo, *dpt; + struct drm_mm_node node; +}; + + +static inline u32 i915_ggtt_offset(const struct i915_vma *vma) +{ + return vma->node.start; +} + +#endif diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_vma_types.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma_types.h new file mode 100644 index 000000000000..e7aaf50f5485 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma_types.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#include +#include + +/* XX: Figure out how to handle this vma mapping in xe */ +struct intel_remapped_plane_info { + /* in gtt pages */ + u32 offset:31; + u32 linear:1; + union { + /* in gtt pages for !linear */ + struct { + u16 width; + u16 height; + u16 src_stride; + u16 dst_stride; + }; + + /* in gtt pages for linear */ + u32 size; + }; +} __packed; + +struct intel_remapped_info { + struct intel_remapped_plane_info plane[4]; + /* in gtt pages */ + u32 plane_alignment; +} __packed; + +struct intel_rotation_info { + struct intel_remapped_plane_info plane[2]; +} __packed; + +enum i915_gtt_view_type { + I915_GTT_VIEW_NORMAL = 0, + I915_GTT_VIEW_ROTATED = sizeof(struct intel_rotation_info), + I915_GTT_VIEW_REMAPPED = sizeof(struct intel_remapped_info), +}; + +static inline void assert_i915_gem_gtt_types(void) +{ + BUILD_BUG_ON(sizeof(struct intel_rotation_info) != 2 * sizeof(u32) + 8 * sizeof(u16)); + BUILD_BUG_ON(sizeof(struct intel_remapped_info) != 5 * sizeof(u32) + 16 * sizeof(u16)); + + /* Check that rotation/remapped shares offsets for simplicity */ + BUILD_BUG_ON(offsetof(struct intel_remapped_info, plane[0]) != + offsetof(struct intel_rotation_info, plane[0])); + BUILD_BUG_ON(offsetofend(struct intel_remapped_info, plane[1]) != + offsetofend(struct intel_rotation_info, plane[1])); + + /* As we encode the size of each branch inside the union into its type, + * we have to be careful that each branch has a unique size. + */ + switch ((enum i915_gtt_view_type)0) { + case I915_GTT_VIEW_NORMAL: + case I915_GTT_VIEW_ROTATED: + case I915_GTT_VIEW_REMAPPED: + /* gcc complains if these are identical cases */ + break; + } +} + +struct i915_gtt_view { + enum i915_gtt_view_type type; + union { + /* Members need to contain no holes/padding */ + struct intel_rotation_info rotated; + struct intel_remapped_info remapped; + }; +}; diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_clock_gating.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_clock_gating.h new file mode 100644 index 000000000000..ce986f0e8f38 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_clock_gating.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#include "../../i915/intel_clock_gating.h" diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_mchbar_regs.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_mchbar_regs.h new file mode 100644 index 000000000000..55b316985340 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_mchbar_regs.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#include "../../i915/intel_mchbar_regs.h" diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_pci_config.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_pci_config.h new file mode 100644 index 000000000000..8c15867fd613 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_pci_config.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#include "../../i915/intel_pci_config.h" diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h new file mode 100644 index 000000000000..0c47661bdc6a --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef __INTEL_PCODE_H__ +#define __INTEL_PCODE_H__ + +#include "intel_uncore.h" +#include "xe_pcode.h" + +static inline int +snb_pcode_write_timeout(struct intel_uncore *uncore, u32 mbox, u32 val, + int fast_timeout_us, int slow_timeout_ms) +{ + return xe_pcode_write_timeout(__compat_uncore_to_gt(uncore), mbox, val, + slow_timeout_ms ?: 1); +} + +static inline int +snb_pcode_write(struct intel_uncore *uncore, u32 mbox, u32 val) +{ + + return xe_pcode_write(__compat_uncore_to_gt(uncore), mbox, val); +} + +static inline int +snb_pcode_read(struct intel_uncore *uncore, u32 mbox, u32 *val, u32 *val1) +{ + return xe_pcode_read(__compat_uncore_to_gt(uncore), mbox, val, val1); +} + +static inline int +skl_pcode_request(struct intel_uncore *uncore, u32 mbox, + u32 request, u32 reply_mask, u32 reply, + int timeout_base_ms) +{ + return xe_pcode_request(__compat_uncore_to_gt(uncore), mbox, request, reply_mask, reply, + timeout_base_ms); +} + +#endif /* __INTEL_PCODE_H__ */ diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_runtime_pm.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_runtime_pm.h new file mode 100644 index 000000000000..f7f3286e2c53 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_runtime_pm.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#include "intel_wakeref.h" + +enum i915_drm_suspend_mode { + I915_DRM_SUSPEND_IDLE, + I915_DRM_SUSPEND_MEM, + I915_DRM_SUSPEND_HIBERNATE, +}; + +#define intel_runtime_pm xe_runtime_pm + +static inline void disable_rpm_wakeref_asserts(void *rpm) +{ +} + +static inline void enable_rpm_wakeref_asserts(void *rpm) +{ +} diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_step.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_step.h new file mode 100644 index 000000000000..0006ef812346 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_step.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef __INTEL_STEP_H__ +#define __INTEL_STEP_H__ + +#include "xe_device_types.h" +#include "xe_step.h" + +#define intel_display_step_name xe_display_step_name + +static inline +const char *xe_display_step_name(struct xe_device *xe) +{ + return xe_step_name(xe->info.step.display); +} + +#endif /* __INTEL_STEP_H__ */ diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_uc_fw.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_uc_fw.h new file mode 100644 index 000000000000..009745328992 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_uc_fw.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _INTEL_UC_FW_H_ +#define _INTEL_UC_FW_H_ + +#define INTEL_UC_FIRMWARE_URL "https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git" + +#endif diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h new file mode 100644 index 000000000000..cd26ddc0f69e --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h @@ -0,0 +1,175 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef __INTEL_UNCORE_H__ +#define __INTEL_UNCORE_H__ + +#include "xe_device.h" +#include "xe_device_types.h" +#include "xe_mmio.h" + +static inline struct xe_gt *__compat_uncore_to_gt(struct intel_uncore *uncore) +{ + struct xe_device *xe = container_of(uncore, struct xe_device, uncore); + + return xe_root_mmio_gt(xe); +} + +static inline u32 intel_uncore_read(struct intel_uncore *uncore, + i915_reg_t i915_reg) +{ + struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); + + return xe_mmio_read32(__compat_uncore_to_gt(uncore), reg); +} + +static inline u32 intel_uncore_read8(struct intel_uncore *uncore, + i915_reg_t i915_reg) +{ + struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); + + return xe_mmio_read8(__compat_uncore_to_gt(uncore), reg); +} + +static inline u32 intel_uncore_read16(struct intel_uncore *uncore, + i915_reg_t i915_reg) +{ + struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); + + return xe_mmio_read16(__compat_uncore_to_gt(uncore), reg); +} + +static inline u64 +intel_uncore_read64_2x32(struct intel_uncore *uncore, + i915_reg_t i915_lower_reg, i915_reg_t i915_upper_reg) +{ + struct xe_reg lower_reg = XE_REG(i915_mmio_reg_offset(i915_lower_reg)); + struct xe_reg upper_reg = XE_REG(i915_mmio_reg_offset(i915_upper_reg)); + u32 upper, lower, old_upper; + int loop = 0; + + upper = xe_mmio_read32(__compat_uncore_to_gt(uncore), upper_reg); + do { + old_upper = upper; + lower = xe_mmio_read32(__compat_uncore_to_gt(uncore), lower_reg); + upper = xe_mmio_read32(__compat_uncore_to_gt(uncore), upper_reg); + } while (upper != old_upper && loop++ < 2); + + return (u64)upper << 32 | lower; +} + +static inline void intel_uncore_posting_read(struct intel_uncore *uncore, + i915_reg_t i915_reg) +{ + struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); + + xe_mmio_read32(__compat_uncore_to_gt(uncore), reg); +} + +static inline void intel_uncore_write(struct intel_uncore *uncore, + i915_reg_t i915_reg, u32 val) +{ + struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); + + xe_mmio_write32(__compat_uncore_to_gt(uncore), reg, val); +} + +static inline u32 intel_uncore_rmw(struct intel_uncore *uncore, + i915_reg_t i915_reg, u32 clear, u32 set) +{ + struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); + + return xe_mmio_rmw32(__compat_uncore_to_gt(uncore), reg, clear, set); +} + +static inline int intel_wait_for_register(struct intel_uncore *uncore, + i915_reg_t i915_reg, u32 mask, + u32 value, unsigned int timeout) +{ + struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); + + return xe_mmio_wait32(__compat_uncore_to_gt(uncore), reg, mask, value, + timeout * USEC_PER_MSEC, NULL, false); +} + +static inline int intel_wait_for_register_fw(struct intel_uncore *uncore, + i915_reg_t i915_reg, u32 mask, + u32 value, unsigned int timeout) +{ + struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); + + return xe_mmio_wait32(__compat_uncore_to_gt(uncore), reg, mask, value, + timeout * USEC_PER_MSEC, NULL, false); +} + +static inline int +__intel_wait_for_register(struct intel_uncore *uncore, i915_reg_t i915_reg, + u32 mask, u32 value, unsigned int fast_timeout_us, + unsigned int slow_timeout_ms, u32 *out_value) +{ + struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); + + return xe_mmio_wait32(__compat_uncore_to_gt(uncore), reg, mask, value, + fast_timeout_us + 1000 * slow_timeout_ms, + out_value, false); +} + +static inline u32 intel_uncore_read_fw(struct intel_uncore *uncore, + i915_reg_t i915_reg) +{ + struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); + + return xe_mmio_read32(__compat_uncore_to_gt(uncore), reg); +} + +static inline void intel_uncore_write_fw(struct intel_uncore *uncore, + i915_reg_t i915_reg, u32 val) +{ + struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); + + xe_mmio_write32(__compat_uncore_to_gt(uncore), reg, val); +} + +static inline u32 intel_uncore_read_notrace(struct intel_uncore *uncore, + i915_reg_t i915_reg) +{ + struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); + + return xe_mmio_read32(__compat_uncore_to_gt(uncore), reg); +} + +static inline void intel_uncore_write_notrace(struct intel_uncore *uncore, + i915_reg_t i915_reg, u32 val) +{ + struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); + + xe_mmio_write32(__compat_uncore_to_gt(uncore), reg, val); +} + +static inline void __iomem *intel_uncore_regs(struct intel_uncore *uncore) +{ + struct xe_device *xe = container_of(uncore, struct xe_device, uncore); + + return xe_device_get_root_tile(xe)->mmio.regs; +} + +/* + * The raw_reg_{read,write} macros are intended as a micro-optimization for + * interrupt handlers so that the pointer indirection on uncore->regs can + * be computed once (and presumably cached in a register) instead of generating + * extra load instructions for each MMIO access. + * + * Given that these macros are only intended for non-GSI interrupt registers + * (and the goal is to avoid extra instructions generated by the compiler), + * these macros do not account for uncore->gsi_offset. Any caller that needs + * to use these macros on a GSI register is responsible for adding the + * appropriate GSI offset to the 'base' parameter. + */ +#define raw_reg_read(base, reg) \ + readl(base + i915_mmio_reg_offset(reg)) +#define raw_reg_write(base, reg, value) \ + writel(value, base + i915_mmio_reg_offset(reg)) + +#endif /* __INTEL_UNCORE_H__ */ diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_wakeref.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_wakeref.h new file mode 100644 index 000000000000..1c5e30cf10ca --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_wakeref.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#include + +typedef bool intel_wakeref_t; diff --git a/drivers/gpu/drm/xe/compat-i915-headers/pxp/intel_pxp.h b/drivers/gpu/drm/xe/compat-i915-headers/pxp/intel_pxp.h new file mode 100644 index 000000000000..c2c30ece8f77 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/pxp/intel_pxp.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef __INTEL_PXP_H__ +#define __INTEL_PXP_H__ + +#include +#include + +struct drm_i915_gem_object; +struct intel_pxp; + +static inline int intel_pxp_key_check(struct intel_pxp *pxp, + struct drm_i915_gem_object *obj, + bool assign) +{ + return -ENODEV; +} + +static inline bool +i915_gem_object_is_protected(const struct drm_i915_gem_object *obj) +{ + return false; +} + +#endif diff --git a/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_dram.h b/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_dram.h new file mode 100644 index 000000000000..65707e20c557 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_dram.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#include "../../../i915/soc/intel_dram.h" diff --git a/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_gmch.h b/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_gmch.h new file mode 100644 index 000000000000..33c5257b3a71 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_gmch.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#include "../../../i915/soc/intel_gmch.h" diff --git a/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_pch.h b/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_pch.h new file mode 100644 index 000000000000..9c46556d33a4 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_pch.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#include "../../../i915/soc/intel_pch.h" diff --git a/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband.h b/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband.h new file mode 100644 index 000000000000..ec6f12de5727 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband.h @@ -0,0 +1,132 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2013-2021 Intel Corporation + */ + +#ifndef _VLV_SIDEBAND_H_ +#define _VLV_SIDEBAND_H_ + +#include + +#include "vlv_sideband_reg.h" + +enum pipe; +struct drm_i915_private; + +enum { + VLV_IOSF_SB_BUNIT, + VLV_IOSF_SB_CCK, + VLV_IOSF_SB_CCU, + VLV_IOSF_SB_DPIO, + VLV_IOSF_SB_FLISDSI, + VLV_IOSF_SB_GPIO, + VLV_IOSF_SB_NC, + VLV_IOSF_SB_PUNIT, +}; + +static inline void vlv_iosf_sb_get(struct drm_i915_private *i915, unsigned long ports) +{ +} +static inline u32 vlv_iosf_sb_read(struct drm_i915_private *i915, u8 port, u32 reg) +{ + return 0; +} +static inline void vlv_iosf_sb_write(struct drm_i915_private *i915, + u8 port, u32 reg, u32 val) +{ +} +static inline void vlv_iosf_sb_put(struct drm_i915_private *i915, unsigned long ports) +{ +} +static inline void vlv_bunit_get(struct drm_i915_private *i915) +{ +} +static inline u32 vlv_bunit_read(struct drm_i915_private *i915, u32 reg) +{ + return 0; +} +static inline void vlv_bunit_write(struct drm_i915_private *i915, u32 reg, u32 val) +{ +} +static inline void vlv_bunit_put(struct drm_i915_private *i915) +{ +} +static inline void vlv_cck_get(struct drm_i915_private *i915) +{ +} +static inline u32 vlv_cck_read(struct drm_i915_private *i915, u32 reg) +{ + return 0; +} +static inline void vlv_cck_write(struct drm_i915_private *i915, u32 reg, u32 val) +{ +} +static inline void vlv_cck_put(struct drm_i915_private *i915) +{ +} +static inline void vlv_ccu_get(struct drm_i915_private *i915) +{ +} +static inline u32 vlv_ccu_read(struct drm_i915_private *i915, u32 reg) +{ + return 0; +} +static inline void vlv_ccu_write(struct drm_i915_private *i915, u32 reg, u32 val) +{ +} +static inline void vlv_ccu_put(struct drm_i915_private *i915) +{ +} +static inline void vlv_dpio_get(struct drm_i915_private *i915) +{ +} +static inline u32 vlv_dpio_read(struct drm_i915_private *i915, int pipe, int reg) +{ + return 0; +} +static inline void vlv_dpio_write(struct drm_i915_private *i915, + int pipe, int reg, u32 val) +{ +} +static inline void vlv_dpio_put(struct drm_i915_private *i915) +{ +} +static inline void vlv_flisdsi_get(struct drm_i915_private *i915) +{ +} +static inline u32 vlv_flisdsi_read(struct drm_i915_private *i915, u32 reg) +{ + return 0; +} +static inline void vlv_flisdsi_write(struct drm_i915_private *i915, u32 reg, u32 val) +{ +} +static inline void vlv_flisdsi_put(struct drm_i915_private *i915) +{ +} +static inline void vlv_nc_get(struct drm_i915_private *i915) +{ +} +static inline u32 vlv_nc_read(struct drm_i915_private *i915, u8 addr) +{ + return 0; +} +static inline void vlv_nc_put(struct drm_i915_private *i915) +{ +} +static inline void vlv_punit_get(struct drm_i915_private *i915) +{ +} +static inline u32 vlv_punit_read(struct drm_i915_private *i915, u32 addr) +{ + return 0; +} +static inline int vlv_punit_write(struct drm_i915_private *i915, u32 addr, u32 val) +{ + return 0; +} +static inline void vlv_punit_put(struct drm_i915_private *i915) +{ +} + +#endif /* _VLV_SIDEBAND_H_ */ diff --git a/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband_reg.h b/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband_reg.h new file mode 100644 index 000000000000..949f134ce3cf --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband_reg.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#include "../../i915/vlv_sideband_reg.h" diff --git a/drivers/gpu/drm/xe/display/ext/i915_irq.c b/drivers/gpu/drm/xe/display/ext/i915_irq.c new file mode 100644 index 000000000000..bee191a4a97d --- /dev/null +++ b/drivers/gpu/drm/xe/display/ext/i915_irq.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include "i915_drv.h" +#include "i915_irq.h" +#include "i915_reg.h" +#include "intel_uncore.h" + +void gen3_irq_reset(struct intel_uncore *uncore, i915_reg_t imr, + i915_reg_t iir, i915_reg_t ier) +{ + intel_uncore_write(uncore, imr, 0xffffffff); + intel_uncore_posting_read(uncore, imr); + + intel_uncore_write(uncore, ier, 0); + + /* IIR can theoretically queue up two events. Be paranoid. */ + intel_uncore_write(uncore, iir, 0xffffffff); + intel_uncore_posting_read(uncore, iir); + intel_uncore_write(uncore, iir, 0xffffffff); + intel_uncore_posting_read(uncore, iir); +} + +/* + * We should clear IMR at preinstall/uninstall, and just check at postinstall. + */ +void gen3_assert_iir_is_zero(struct intel_uncore *uncore, i915_reg_t reg) +{ + struct xe_device *xe = container_of(uncore, struct xe_device, uncore); + u32 val = intel_uncore_read(uncore, reg); + + if (val == 0) + return; + + drm_WARN(&xe->drm, 1, + "Interrupt register 0x%x is not zero: 0x%08x\n", + i915_mmio_reg_offset(reg), val); + intel_uncore_write(uncore, reg, 0xffffffff); + intel_uncore_posting_read(uncore, reg); + intel_uncore_write(uncore, reg, 0xffffffff); + intel_uncore_posting_read(uncore, reg); +} + +void gen3_irq_init(struct intel_uncore *uncore, + i915_reg_t imr, u32 imr_val, + i915_reg_t ier, u32 ier_val, + i915_reg_t iir) +{ + gen3_assert_iir_is_zero(uncore, iir); + + intel_uncore_write(uncore, ier, ier_val); + intel_uncore_write(uncore, imr, imr_val); + intel_uncore_posting_read(uncore, imr); +} + +bool intel_irqs_enabled(struct xe_device *xe) +{ + /* + * XXX: i915 has a racy handling of the irq.enabled, since it doesn't + * lock its transitions. Because of that, the irq.enabled sometimes + * is not read with the irq.lock in place. + * However, the most critical cases like vblank and page flips are + * properly using the locks. + * We cannot take the lock in here or run any kind of assert because + * of i915 inconsistency. + * But at this point the xe irq is better protected against races, + * although the full solution would be protecting the i915 side. + */ + return xe->irq.enabled; +} + +void intel_synchronize_irq(struct xe_device *xe) +{ + synchronize_irq(to_pci_dev(xe->drm.dev)->irq); +} diff --git a/drivers/gpu/drm/xe/display/ext/i915_utils.c b/drivers/gpu/drm/xe/display/ext/i915_utils.c new file mode 100644 index 000000000000..981edc2788bc --- /dev/null +++ b/drivers/gpu/drm/xe/display/ext/i915_utils.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include "i915_drv.h" + +bool i915_vtd_active(struct drm_i915_private *i915) +{ + if (device_iommu_mapped(i915->drm.dev)) + return true; + + /* Running as a guest, we assume the host is enforcing VT'd */ + return i915_run_as_guest(); +} + +/* i915 specific, just put here for shutting it up */ +int __i915_inject_probe_error(struct drm_i915_private *i915, int err, + const char *func, int line) +{ + return 0; +} diff --git a/drivers/gpu/drm/xe/display/intel_fb_bo.c b/drivers/gpu/drm/xe/display/intel_fb_bo.c new file mode 100644 index 000000000000..b21da7b745a5 --- /dev/null +++ b/drivers/gpu/drm/xe/display/intel_fb_bo.c @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#include + +#include "i915_drv.h" +#include "intel_display_types.h" +#include "intel_fb_bo.h" + +void intel_fb_bo_framebuffer_fini(struct xe_bo *bo) +{ + if (bo->flags & XE_BO_CREATE_PINNED_BIT) { + /* Unpin our kernel fb first */ + xe_bo_lock(bo, false); + xe_bo_unpin(bo); + xe_bo_unlock(bo); + } + xe_bo_put(bo); +} + +int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb, + struct xe_bo *bo, + struct drm_mode_fb_cmd2 *mode_cmd) +{ + struct drm_i915_private *i915 = to_i915(bo->ttm.base.dev); + int ret; + + xe_bo_get(bo); + + ret = ttm_bo_reserve(&bo->ttm, true, false, NULL); + if (ret) + return ret; + + if (!(bo->flags & XE_BO_SCANOUT_BIT)) { + /* + * XE_BO_SCANOUT_BIT should ideally be set at creation, or is + * automatically set when creating FB. We cannot change caching + * mode when the boect is VM_BINDed, so we can only set + * coherency with display when unbound. + */ + if (XE_IOCTL_DBG(i915, !list_empty(&bo->ttm.base.gpuva.list))) { + ttm_bo_unreserve(&bo->ttm); + return -EINVAL; + } + bo->flags |= XE_BO_SCANOUT_BIT; + } + ttm_bo_unreserve(&bo->ttm); + + return ret; +} + +struct xe_bo *intel_fb_bo_lookup_valid_bo(struct drm_i915_private *i915, + struct drm_file *filp, + const struct drm_mode_fb_cmd2 *mode_cmd) +{ + struct drm_i915_gem_object *bo; + struct drm_gem_object *gem = drm_gem_object_lookup(filp, mode_cmd->handles[0]); + + if (!gem) + return ERR_PTR(-ENOENT); + + bo = gem_to_xe_bo(gem); + /* Require vram placement or dma-buf import */ + if (IS_DGFX(i915) && + !xe_bo_can_migrate(gem_to_xe_bo(gem), XE_PL_VRAM0) && + bo->ttm.type != ttm_bo_type_sg) { + drm_gem_object_put(gem); + return ERR_PTR(-EREMOTE); + } + + return bo; +} diff --git a/drivers/gpu/drm/xe/display/intel_fb_bo.h b/drivers/gpu/drm/xe/display/intel_fb_bo.h new file mode 100644 index 000000000000..5d365b925b7a --- /dev/null +++ b/drivers/gpu/drm/xe/display/intel_fb_bo.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef __INTEL_FB_BO_H__ +#define __INTEL_FB_BO_H__ + +struct drm_file; +struct drm_mode_fb_cmd2; +struct drm_i915_private; +struct intel_framebuffer; +struct xe_bo; + +void intel_fb_bo_framebuffer_fini(struct xe_bo *bo); +int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb, + struct xe_bo *bo, + struct drm_mode_fb_cmd2 *mode_cmd); + +struct xe_bo *intel_fb_bo_lookup_valid_bo(struct drm_i915_private *i915, + struct drm_file *filp, + const struct drm_mode_fb_cmd2 *mode_cmd); + +#endif diff --git a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c new file mode 100644 index 000000000000..51ae3561fd0d --- /dev/null +++ b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c @@ -0,0 +1,104 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#include "intel_fbdev_fb.h" + +#include + +#include "xe_gt.h" +#include "xe_ttm_stolen_mgr.h" + +#include "i915_drv.h" +#include "intel_display_types.h" + +struct drm_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper, + struct drm_fb_helper_surface_size *sizes) +{ + struct drm_framebuffer *fb; + struct drm_device *dev = helper->dev; + struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_mode_fb_cmd2 mode_cmd = {}; + struct drm_i915_gem_object *obj; + int size; + + /* we don't do packed 24bpp */ + if (sizes->surface_bpp == 24) + sizes->surface_bpp = 32; + + mode_cmd.width = sizes->surface_width; + mode_cmd.height = sizes->surface_height; + + mode_cmd.pitches[0] = ALIGN(mode_cmd.width * + DIV_ROUND_UP(sizes->surface_bpp, 8), XE_PAGE_SIZE); + mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp, + sizes->surface_depth); + + size = mode_cmd.pitches[0] * mode_cmd.height; + size = PAGE_ALIGN(size); + obj = ERR_PTR(-ENODEV); + + if (!IS_DGFX(dev_priv)) { + obj = xe_bo_create_pin_map(dev_priv, xe_device_get_root_tile(dev_priv), + NULL, size, + ttm_bo_type_kernel, XE_BO_SCANOUT_BIT | + XE_BO_CREATE_STOLEN_BIT | + XE_BO_CREATE_PINNED_BIT); + if (!IS_ERR(obj)) + drm_info(&dev_priv->drm, "Allocated fbdev into stolen\n"); + else + drm_info(&dev_priv->drm, "Allocated fbdev into stolen failed: %li\n", PTR_ERR(obj)); + } + if (IS_ERR(obj)) { + obj = xe_bo_create_pin_map(dev_priv, xe_device_get_root_tile(dev_priv), NULL, size, + ttm_bo_type_kernel, XE_BO_SCANOUT_BIT | + XE_BO_CREATE_VRAM_IF_DGFX(xe_device_get_root_tile(dev_priv)) | + XE_BO_CREATE_PINNED_BIT); + } + + if (IS_ERR(obj)) { + drm_err(&dev_priv->drm, "failed to allocate framebuffer (%pe)\n", obj); + fb = ERR_PTR(-ENOMEM); + goto err; + } + + fb = intel_framebuffer_create(obj, &mode_cmd); + if (IS_ERR(fb)) { + xe_bo_unpin_map_no_vm(obj); + goto err; + } + + drm_gem_object_put(intel_bo_to_drm_bo(obj)); + return fb; + +err: + return fb; +} + +int intel_fbdev_fb_fill_info(struct drm_i915_private *i915, struct fb_info *info, + struct drm_i915_gem_object *obj, struct i915_vma *vma) +{ + struct pci_dev *pdev = to_pci_dev(i915->drm.dev); + + if (!(obj->flags & XE_BO_CREATE_SYSTEM_BIT)) { + if (obj->flags & XE_BO_CREATE_STOLEN_BIT) + info->fix.smem_start = xe_ttm_stolen_io_offset(obj, 0); + else + info->fix.smem_start = + pci_resource_start(pdev, 2) + + xe_bo_addr(obj, 0, XE_PAGE_SIZE); + + info->fix.smem_len = obj->ttm.base.size; + } else { + /* XXX: Pure fiction, as the BO may not be physically accessible.. */ + info->fix.smem_start = 0; + info->fix.smem_len = obj->ttm.base.size; + } + XE_WARN_ON(iosys_map_is_null(&obj->vmap)); + + info->screen_base = obj->vmap.vaddr_iomem; + info->screen_size = intel_bo_to_drm_bo(obj)->size; + + return 0; +} diff --git a/drivers/gpu/drm/xe/display/intel_fbdev_fb.h b/drivers/gpu/drm/xe/display/intel_fbdev_fb.h new file mode 100644 index 000000000000..ea186772e0bb --- /dev/null +++ b/drivers/gpu/drm/xe/display/intel_fbdev_fb.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef __INTEL_FBDEV_FB_H__ +#define __INTEL_FBDEV_FB_H__ + +struct drm_fb_helper; +struct drm_fb_helper_surface_size; +struct drm_i915_gem_object; +struct drm_i915_private; +struct fb_info; +struct i915_vma; + +struct drm_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper, + struct drm_fb_helper_surface_size *sizes); +int intel_fbdev_fb_fill_info(struct drm_i915_private *i915, struct fb_info *info, + struct drm_i915_gem_object *obj, struct i915_vma *vma); + +#endif diff --git a/drivers/gpu/drm/xe/display/xe_display_rps.c b/drivers/gpu/drm/xe/display/xe_display_rps.c new file mode 100644 index 000000000000..ab21c581c192 --- /dev/null +++ b/drivers/gpu/drm/xe/display/xe_display_rps.c @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include "intel_display_rps.h" + +void intel_display_rps_boost_after_vblank(struct drm_crtc *crtc, + struct dma_fence *fence) +{ +} + +void intel_display_rps_mark_interactive(struct drm_i915_private *i915, + struct intel_atomic_state *state, + bool interactive) +{ +} diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c new file mode 100644 index 000000000000..67b956a6da8d --- /dev/null +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -0,0 +1,326 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "i915_drv.h" +#include "intel_display_types.h" +#include "intel_dpt.h" +#include "intel_fb.h" +#include "intel_fb_pin.h" +#include "xe_ggtt.h" +#include "xe_gt.h" + +#include + +static void +write_dpt_rotated(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs, u32 bo_ofs, + u32 width, u32 height, u32 src_stride, u32 dst_stride) +{ + struct xe_device *xe = xe_bo_device(bo); + struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt; + u32 column, row; + + /* TODO: Maybe rewrite so we can traverse the bo addresses sequentially, + * by writing dpt/ggtt in a different order? + */ + + for (column = 0; column < width; column++) { + u32 src_idx = src_stride * (height - 1) + column + bo_ofs; + + for (row = 0; row < height; row++) { + u64 pte = ggtt->pt_ops->pte_encode_bo(bo, src_idx * XE_PAGE_SIZE, + xe->pat.idx[XE_CACHE_WB]); + + iosys_map_wr(map, *dpt_ofs, u64, pte); + *dpt_ofs += 8; + src_idx -= src_stride; + } + + /* The DE ignores the PTEs for the padding tiles */ + *dpt_ofs += (dst_stride - height) * 8; + } + + /* Align to next page */ + *dpt_ofs = ALIGN(*dpt_ofs, 4096); +} + +static int __xe_pin_fb_vma_dpt(struct intel_framebuffer *fb, + const struct i915_gtt_view *view, + struct i915_vma *vma) +{ + struct xe_device *xe = to_xe_device(fb->base.dev); + struct xe_tile *tile0 = xe_device_get_root_tile(xe); + struct xe_ggtt *ggtt = tile0->mem.ggtt; + struct xe_bo *bo = intel_fb_obj(&fb->base), *dpt; + u32 dpt_size, size = bo->ttm.base.size; + + if (view->type == I915_GTT_VIEW_NORMAL) + dpt_size = ALIGN(size / XE_PAGE_SIZE * 8, XE_PAGE_SIZE); + else + /* display uses 4K tiles instead of bytes here, convert to entries.. */ + dpt_size = ALIGN(intel_rotation_info_size(&view->rotated) * 8, + XE_PAGE_SIZE); + + dpt = xe_bo_create_pin_map(xe, tile0, NULL, dpt_size, + ttm_bo_type_kernel, + XE_BO_CREATE_VRAM0_BIT | + XE_BO_CREATE_GGTT_BIT); + if (IS_ERR(dpt)) + dpt = xe_bo_create_pin_map(xe, tile0, NULL, dpt_size, + ttm_bo_type_kernel, + XE_BO_CREATE_STOLEN_BIT | + XE_BO_CREATE_GGTT_BIT); + if (IS_ERR(dpt)) + dpt = xe_bo_create_pin_map(xe, tile0, NULL, dpt_size, + ttm_bo_type_kernel, + XE_BO_CREATE_SYSTEM_BIT | + XE_BO_CREATE_GGTT_BIT); + if (IS_ERR(dpt)) + return PTR_ERR(dpt); + + if (view->type == I915_GTT_VIEW_NORMAL) { + u32 x; + + for (x = 0; x < size / XE_PAGE_SIZE; x++) { + u64 pte = ggtt->pt_ops->pte_encode_bo(bo, x * XE_PAGE_SIZE, + xe->pat.idx[XE_CACHE_WB]); + + iosys_map_wr(&dpt->vmap, x * 8, u64, pte); + } + } else { + const struct intel_rotation_info *rot_info = &view->rotated; + u32 i, dpt_ofs = 0; + + for (i = 0; i < ARRAY_SIZE(rot_info->plane); i++) + write_dpt_rotated(bo, &dpt->vmap, &dpt_ofs, + rot_info->plane[i].offset, + rot_info->plane[i].width, + rot_info->plane[i].height, + rot_info->plane[i].src_stride, + rot_info->plane[i].dst_stride); + } + + vma->dpt = dpt; + vma->node = dpt->ggtt_node; + return 0; +} + +static void +write_ggtt_rotated(struct xe_bo *bo, struct xe_ggtt *ggtt, u32 *ggtt_ofs, u32 bo_ofs, + u32 width, u32 height, u32 src_stride, u32 dst_stride) +{ + struct xe_device *xe = xe_bo_device(bo); + u32 column, row; + + for (column = 0; column < width; column++) { + u32 src_idx = src_stride * (height - 1) + column + bo_ofs; + + for (row = 0; row < height; row++) { + u64 pte = ggtt->pt_ops->pte_encode_bo(bo, src_idx * XE_PAGE_SIZE, + xe->pat.idx[XE_CACHE_WB]); + + xe_ggtt_set_pte(ggtt, *ggtt_ofs, pte); + *ggtt_ofs += XE_PAGE_SIZE; + src_idx -= src_stride; + } + + /* The DE ignores the PTEs for the padding tiles */ + *ggtt_ofs += (dst_stride - height) * XE_PAGE_SIZE; + } +} + +static int __xe_pin_fb_vma_ggtt(struct intel_framebuffer *fb, + const struct i915_gtt_view *view, + struct i915_vma *vma) +{ + struct xe_bo *bo = intel_fb_obj(&fb->base); + struct xe_device *xe = to_xe_device(fb->base.dev); + struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt; + u32 align; + int ret; + + /* TODO: Consider sharing framebuffer mapping? + * embed i915_vma inside intel_framebuffer + */ + xe_device_mem_access_get(tile_to_xe(ggtt->tile)); + ret = mutex_lock_interruptible(&ggtt->lock); + if (ret) + goto out; + + align = XE_PAGE_SIZE; + if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K) + align = max_t(u32, align, SZ_64K); + + if (bo->ggtt_node.size && view->type == I915_GTT_VIEW_NORMAL) { + vma->node = bo->ggtt_node; + } else if (view->type == I915_GTT_VIEW_NORMAL) { + u32 x, size = bo->ttm.base.size; + + ret = xe_ggtt_insert_special_node_locked(ggtt, &vma->node, size, + align, 0); + if (ret) + goto out_unlock; + + for (x = 0; x < size; x += XE_PAGE_SIZE) { + u64 pte = ggtt->pt_ops->pte_encode_bo(bo, x, + xe->pat.idx[XE_CACHE_WB]); + + xe_ggtt_set_pte(ggtt, vma->node.start + x, pte); + } + } else { + u32 i, ggtt_ofs; + const struct intel_rotation_info *rot_info = &view->rotated; + + /* display seems to use tiles instead of bytes here, so convert it back.. */ + u32 size = intel_rotation_info_size(rot_info) * XE_PAGE_SIZE; + + ret = xe_ggtt_insert_special_node_locked(ggtt, &vma->node, size, + align, 0); + if (ret) + goto out_unlock; + + ggtt_ofs = vma->node.start; + + for (i = 0; i < ARRAY_SIZE(rot_info->plane); i++) + write_ggtt_rotated(bo, ggtt, &ggtt_ofs, + rot_info->plane[i].offset, + rot_info->plane[i].width, + rot_info->plane[i].height, + rot_info->plane[i].src_stride, + rot_info->plane[i].dst_stride); + } + + xe_ggtt_invalidate(ggtt); +out_unlock: + mutex_unlock(&ggtt->lock); +out: + xe_device_mem_access_put(tile_to_xe(ggtt->tile)); + return ret; +} + +static struct i915_vma *__xe_pin_fb_vma(struct intel_framebuffer *fb, + const struct i915_gtt_view *view) +{ + struct drm_device *dev = fb->base.dev; + struct xe_device *xe = to_xe_device(dev); + struct i915_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL); + struct xe_bo *bo = intel_fb_obj(&fb->base); + int ret; + + if (!vma) + return ERR_PTR(-ENODEV); + + /* Remapped view is only required on ADL-P, which xe doesn't support. */ + if (XE_WARN_ON(view->type == I915_GTT_VIEW_REMAPPED)) { + ret = -ENODEV; + goto err; + } + + /* + * Pin the framebuffer, we can't use xe_bo_(un)pin functions as the + * assumptions are incorrect for framebuffers + */ + ret = ttm_bo_reserve(&bo->ttm, false, false, NULL); + if (ret) + goto err; + + if (IS_DGFX(xe)) + ret = xe_bo_migrate(bo, XE_PL_VRAM0); + else + ret = xe_bo_validate(bo, NULL, true); + if (!ret) + ttm_bo_pin(&bo->ttm); + ttm_bo_unreserve(&bo->ttm); + if (ret) + goto err; + + vma->bo = bo; + if (intel_fb_uses_dpt(&fb->base)) + ret = __xe_pin_fb_vma_dpt(fb, view, vma); + else + ret = __xe_pin_fb_vma_ggtt(fb, view, vma); + if (ret) + goto err_unpin; + + return vma; + +err_unpin: + ttm_bo_reserve(&bo->ttm, false, false, NULL); + ttm_bo_unpin(&bo->ttm); + ttm_bo_unreserve(&bo->ttm); +err: + kfree(vma); + return ERR_PTR(ret); +} + +static void __xe_unpin_fb_vma(struct i915_vma *vma) +{ + struct xe_device *xe = to_xe_device(vma->bo->ttm.base.dev); + struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt; + + if (vma->dpt) + xe_bo_unpin_map_no_vm(vma->dpt); + else if (!drm_mm_node_allocated(&vma->bo->ggtt_node) || + vma->bo->ggtt_node.start != vma->node.start) + xe_ggtt_remove_node(ggtt, &vma->node); + + ttm_bo_reserve(&vma->bo->ttm, false, false, NULL); + ttm_bo_unpin(&vma->bo->ttm); + ttm_bo_unreserve(&vma->bo->ttm); + kfree(vma); +} + +struct i915_vma * +intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, + bool phys_cursor, + const struct i915_gtt_view *view, + bool uses_fence, + unsigned long *out_flags) +{ + *out_flags = 0; + + return __xe_pin_fb_vma(to_intel_framebuffer(fb), view); +} + +void intel_unpin_fb_vma(struct i915_vma *vma, unsigned long flags) +{ + __xe_unpin_fb_vma(vma); +} + +int intel_plane_pin_fb(struct intel_plane_state *plane_state) +{ + struct drm_framebuffer *fb = plane_state->hw.fb; + struct xe_bo *bo = intel_fb_obj(fb); + struct i915_vma *vma; + + /* We reject creating !SCANOUT fb's, so this is weird.. */ + drm_WARN_ON(bo->ttm.base.dev, !(bo->flags & XE_BO_SCANOUT_BIT)); + + vma = __xe_pin_fb_vma(to_intel_framebuffer(fb), &plane_state->view.gtt); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + plane_state->ggtt_vma = vma; + return 0; +} + +void intel_plane_unpin_fb(struct intel_plane_state *old_plane_state) +{ + __xe_unpin_fb_vma(old_plane_state->ggtt_vma); + old_plane_state->ggtt_vma = NULL; +} + +/* + * For Xe introduce dummy intel_dpt_create which just return NULL and + * intel_dpt_destroy which does nothing. + */ +struct i915_address_space *intel_dpt_create(struct intel_framebuffer *fb) +{ + return NULL; +} + +void intel_dpt_destroy(struct i915_address_space *vm) +{ + return; +} \ No newline at end of file diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c new file mode 100644 index 000000000000..0453293af8ef --- /dev/null +++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2023, Intel Corporation. + */ + +#include "i915_drv.h" +#include "intel_hdcp_gsc.h" + +int intel_hdcp_gsc_init(struct drm_i915_private *i915) +{ + drm_info(&i915->drm, "HDCP support not yet implemented\n"); + return -ENODEV; +} + +void intel_hdcp_gsc_fini(struct drm_i915_private *i915) +{ +} + +ssize_t intel_hdcp_gsc_msg_send(struct drm_i915_private *i915, u8 *msg_in, + size_t msg_in_len, u8 *msg_out, + size_t msg_out_len) +{ + return -ENODEV; +} diff --git a/drivers/gpu/drm/xe/display/xe_plane_initial.c b/drivers/gpu/drm/xe/display/xe_plane_initial.c new file mode 100644 index 000000000000..ccf83c12b545 --- /dev/null +++ b/drivers/gpu/drm/xe/display/xe_plane_initial.c @@ -0,0 +1,291 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +/* for ioread64 */ +#include + +#include "xe_ggtt.h" + +#include "i915_drv.h" +#include "intel_atomic_plane.h" +#include "intel_display.h" +#include "intel_display_types.h" +#include "intel_fb.h" +#include "intel_fb_pin.h" +#include "intel_frontbuffer.h" +#include "intel_plane_initial.h" + +static bool +intel_reuse_initial_plane_obj(struct drm_i915_private *i915, + const struct intel_initial_plane_config *plane_config, + struct drm_framebuffer **fb) +{ + struct intel_crtc *crtc; + + for_each_intel_crtc(&i915->drm, crtc) { + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + struct intel_plane *plane = + to_intel_plane(crtc->base.primary); + struct intel_plane_state *plane_state = + to_intel_plane_state(plane->base.state); + + if (!crtc_state->uapi.active) + continue; + + if (!plane_state->ggtt_vma) + continue; + + if (intel_plane_ggtt_offset(plane_state) == plane_config->base) { + *fb = plane_state->hw.fb; + return true; + } + } + + return false; +} + +static struct xe_bo * +initial_plane_bo(struct xe_device *xe, + struct intel_initial_plane_config *plane_config) +{ + struct xe_tile *tile0 = xe_device_get_root_tile(xe); + struct xe_bo *bo; + resource_size_t phys_base; + u32 base, size, flags; + u64 page_size = xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K; + + if (plane_config->size == 0) + return NULL; + + flags = XE_BO_CREATE_PINNED_BIT | XE_BO_SCANOUT_BIT | XE_BO_CREATE_GGTT_BIT; + + base = round_down(plane_config->base, page_size); + if (IS_DGFX(xe)) { + u64 __iomem *gte = tile0->mem.ggtt->gsm; + u64 pte; + + gte += base / XE_PAGE_SIZE; + + pte = ioread64(gte); + if (!(pte & XE_GGTT_PTE_DM)) { + drm_err(&xe->drm, + "Initial plane programming missing DM bit\n"); + return NULL; + } + + phys_base = pte & ~(page_size - 1); + flags |= XE_BO_CREATE_VRAM0_BIT; + + /* + * We don't currently expect this to ever be placed in the + * stolen portion. + */ + if (phys_base >= tile0->mem.vram.usable_size) { + drm_err(&xe->drm, + "Initial plane programming using invalid range, phys_base=%pa\n", + &phys_base); + return NULL; + } + + drm_dbg(&xe->drm, + "Using phys_base=%pa, based on initial plane programming\n", + &phys_base); + } else { + struct ttm_resource_manager *stolen = ttm_manager_type(&xe->ttm, XE_PL_STOLEN); + + if (!stolen) + return NULL; + phys_base = base; + flags |= XE_BO_CREATE_STOLEN_BIT; + + /* + * If the FB is too big, just don't use it since fbdev is not very + * important and we should probably use that space with FBC or other + * features. + */ + if (IS_ENABLED(CONFIG_FRAMEBUFFER_CONSOLE) && + plane_config->size * 2 >> PAGE_SHIFT >= stolen->size) + return NULL; + } + + size = round_up(plane_config->base + plane_config->size, + page_size); + size -= base; + + bo = xe_bo_create_pin_map_at(xe, tile0, NULL, size, phys_base, + ttm_bo_type_kernel, flags); + if (IS_ERR(bo)) { + drm_dbg(&xe->drm, + "Failed to create bo phys_base=%pa size %u with flags %x: %li\n", + &phys_base, size, flags, PTR_ERR(bo)); + return NULL; + } + + return bo; +} + +static bool +intel_alloc_initial_plane_obj(struct intel_crtc *crtc, + struct intel_initial_plane_config *plane_config) +{ + struct drm_device *dev = crtc->base.dev; + struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_mode_fb_cmd2 mode_cmd = { 0 }; + struct drm_framebuffer *fb = &plane_config->fb->base; + struct xe_bo *bo; + + switch (fb->modifier) { + case DRM_FORMAT_MOD_LINEAR: + case I915_FORMAT_MOD_X_TILED: + case I915_FORMAT_MOD_Y_TILED: + case I915_FORMAT_MOD_4_TILED: + break; + default: + drm_dbg(&dev_priv->drm, + "Unsupported modifier for initial FB: 0x%llx\n", + fb->modifier); + return false; + } + + mode_cmd.pixel_format = fb->format->format; + mode_cmd.width = fb->width; + mode_cmd.height = fb->height; + mode_cmd.pitches[0] = fb->pitches[0]; + mode_cmd.modifier[0] = fb->modifier; + mode_cmd.flags = DRM_MODE_FB_MODIFIERS; + + bo = initial_plane_bo(dev_priv, plane_config); + if (!bo) + return false; + + if (intel_framebuffer_init(to_intel_framebuffer(fb), + bo, &mode_cmd)) { + drm_dbg_kms(&dev_priv->drm, "intel fb init failed\n"); + goto err_bo; + } + /* Reference handed over to fb */ + xe_bo_put(bo); + + return true; + +err_bo: + xe_bo_unpin_map_no_vm(bo); + return false; +} + +static void +intel_find_initial_plane_obj(struct intel_crtc *crtc, + struct intel_initial_plane_config *plane_config) +{ + struct drm_device *dev = crtc->base.dev; + struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_plane *plane = + to_intel_plane(crtc->base.primary); + struct intel_plane_state *plane_state = + to_intel_plane_state(plane->base.state); + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + struct drm_framebuffer *fb; + struct i915_vma *vma; + + /* + * TODO: + * Disable planes if get_initial_plane_config() failed. + * Make sure things work if the surface base is not page aligned. + */ + if (!plane_config->fb) + return; + + if (intel_alloc_initial_plane_obj(crtc, plane_config)) + fb = &plane_config->fb->base; + else if (!intel_reuse_initial_plane_obj(dev_priv, plane_config, &fb)) + goto nofb; + + plane_state->uapi.rotation = plane_config->rotation; + intel_fb_fill_view(to_intel_framebuffer(fb), + plane_state->uapi.rotation, &plane_state->view); + + vma = intel_pin_and_fence_fb_obj(fb, false, &plane_state->view.gtt, + false, &plane_state->flags); + if (IS_ERR(vma)) + goto nofb; + + plane_state->ggtt_vma = vma; + plane_state->uapi.src_x = 0; + plane_state->uapi.src_y = 0; + plane_state->uapi.src_w = fb->width << 16; + plane_state->uapi.src_h = fb->height << 16; + + plane_state->uapi.crtc_x = 0; + plane_state->uapi.crtc_y = 0; + plane_state->uapi.crtc_w = fb->width; + plane_state->uapi.crtc_h = fb->height; + + plane_state->uapi.fb = fb; + drm_framebuffer_get(fb); + + plane_state->uapi.crtc = &crtc->base; + intel_plane_copy_uapi_to_hw_state(plane_state, plane_state, crtc); + + atomic_or(plane->frontbuffer_bit, &to_intel_frontbuffer(fb)->bits); + + plane_config->vma = vma; + + /* + * Flip to the newly created mapping ASAP, so we can re-use the + * first part of GGTT for WOPCM, prevent flickering, and prevent + * the lookup of sysmem scratch pages. + */ + plane->check_plane(crtc_state, plane_state); + plane->async_flip(plane, crtc_state, plane_state, true); + return; + +nofb: + /* + * We've failed to reconstruct the BIOS FB. Current display state + * indicates that the primary plane is visible, but has a NULL FB, + * which will lead to problems later if we don't fix it up. The + * simplest solution is to just disable the primary plane now and + * pretend the BIOS never had it enabled. + */ + intel_plane_disable_noatomic(crtc, plane); +} + +static void plane_config_fini(struct intel_initial_plane_config *plane_config) +{ + if (plane_config->fb) { + struct drm_framebuffer *fb = &plane_config->fb->base; + + /* We may only have the stub and not a full framebuffer */ + if (drm_framebuffer_read_refcount(fb)) + drm_framebuffer_put(fb); + else + kfree(fb); + } +} + +void intel_crtc_initial_plane_config(struct intel_crtc *crtc) +{ + struct xe_device *xe = to_xe_device(crtc->base.dev); + struct intel_initial_plane_config plane_config = {}; + + /* + * Note that reserving the BIOS fb up front prevents us + * from stuffing other stolen allocations like the ring + * on top. This prevents some ugliness at boot time, and + * can even allow for smooth boot transitions if the BIOS + * fb is large enough for the active pipe configuration. + */ + xe->display.funcs.display->get_initial_plane_config(crtc, &plane_config); + + /* + * If the fb is shared between multiple heads, we'll + * just get the first one. + */ + intel_find_initial_plane_obj(crtc, &plane_config); + + plane_config_fini(&plane_config); +} diff --git a/drivers/gpu/drm/xe/regs/xe_reg_defs.h b/drivers/gpu/drm/xe/regs/xe_reg_defs.h index 5078a9e69859..6e20fc2de9ff 100644 --- a/drivers/gpu/drm/xe/regs/xe_reg_defs.h +++ b/drivers/gpu/drm/xe/regs/xe_reg_defs.h @@ -6,7 +6,7 @@ #ifndef _XE_REG_DEFS_H_ #define _XE_REG_DEFS_H_ -#include "../../i915/i915_reg_defs.h" +#include "compat-i915-headers/i915_reg_defs.h" /** * struct xe_reg - Register definition diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h index 924f7c949d55..ec9372aa739f 100644 --- a/drivers/gpu/drm/xe/regs/xe_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_regs.h @@ -56,19 +56,6 @@ #define GU_MISC_IRQ_OFFSET 0x444f0 #define GU_MISC_GSE REG_BIT(27) -#define TRANSCODER_A_OFFSET 0x60000 -#define TRANSCODER_B_OFFSET 0x61000 -#define TRANSCODER_C_OFFSET 0x62000 -#define TRANSCODER_D_OFFSET 0x63000 -#define TRANSCODER_DSI0_OFFSET 0x6b000 -#define TRANSCODER_DSI1_OFFSET 0x6b800 -#define PIPE_A_OFFSET 0x70000 -#define PIPE_B_OFFSET 0x71000 -#define PIPE_C_OFFSET 0x72000 -#define PIPE_D_OFFSET 0x73000 -#define PIPE_DSI0_OFFSET 0x7b000 -#define PIPE_DSI1_OFFSET 0x7b800 - #define SOFTWARE_FLAGS_SPR33 XE_REG(0x4f084) #define GU_CNTL_PROTECTED XE_REG(0x10100C) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index b96d1e7b9bad..c23a5694a788 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1400,9 +1400,9 @@ xe_bo_create_locked_range(struct xe_device *xe, xe_assert(xe, tile); - if (flags & XE_BO_CREATE_STOLEN_BIT && - flags & XE_BO_FIXED_PLACEMENT_BIT) { - err = xe_ggtt_insert_bo_at(tile->mem.ggtt, bo, start); + if (flags & XE_BO_FIXED_PLACEMENT_BIT) { + err = xe_ggtt_insert_bo_at(tile->mem.ggtt, bo, + start + bo->size, U64_MAX); } else { err = xe_ggtt_insert_bo(tile->mem.ggtt, bo); } diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 5869ba7e0cdc..98d7e7fa12d8 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -18,6 +18,7 @@ #include "regs/xe_regs.h" #include "xe_bo.h" #include "xe_debugfs.h" +#include "xe_display.h" #include "xe_dma_buf.h" #include "xe_drm_client.h" #include "xe_drv.h" @@ -190,6 +191,9 @@ static void xe_device_destroy(struct drm_device *dev, void *dummy) if (xe->ordered_wq) destroy_workqueue(xe->ordered_wq); + if (xe->unordered_wq) + destroy_workqueue(xe->unordered_wq); + ttm_device_fini(&xe->ttm); } @@ -199,6 +203,8 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, struct xe_device *xe; int err; + xe_display_driver_set_hooks(&driver); + err = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &driver); if (err) return ERR_PTR(err); @@ -237,14 +243,16 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, INIT_LIST_HEAD(&xe->pinned.evicted); xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0); - if (!xe->ordered_wq) { - drm_err(&xe->drm, "Failed to allocate xe-ordered-wq\n"); + xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0); + if (!xe->ordered_wq || !xe->unordered_wq) { + drm_err(&xe->drm, "Failed to allocate xe workqueues\n"); err = -ENOMEM; goto err_put; } - drmm_mutex_init(&xe->drm, &xe->sb_lock); - xe->enabled_irq_mask = ~0; + err = xe_display_create(xe); + if (WARN_ON(err)) + goto err_put; return xe; @@ -346,6 +354,9 @@ int xe_device_probe(struct xe_device *xe) xe_pat_init_early(xe); xe->info.mem_region_mask = 1; + err = xe_display_init_nommio(xe); + if (err) + return err; for_each_tile(tile, xe, id) { err = xe_tile_alloc(tile); @@ -367,10 +378,14 @@ int xe_device_probe(struct xe_device *xe) return err; } - err = xe_irq_install(xe); + err = xe_display_init_noirq(xe); if (err) return err; + err = xe_irq_install(xe); + if (err) + goto err; + for_each_gt(gt, xe, id) { err = xe_gt_init_early(gt); if (err) @@ -392,6 +407,16 @@ int xe_device_probe(struct xe_device *xe) /* Allocate and map stolen after potential VRAM resize */ xe_ttm_stolen_mgr_init(xe); + /* + * Now that GT is initialized (TTM in particular), + * we can try to init display, and inherit the initial fb. + * This is the reason the first allocation needs to be done + * inside display. + */ + err = xe_display_init_noaccel(xe); + if (err) + goto err_irq_shutdown; + for_each_gt(gt, xe, id) { err = xe_gt_init(gt); if (err) @@ -400,10 +425,16 @@ int xe_device_probe(struct xe_device *xe) xe_heci_gsc_init(xe); + err = xe_display_init(xe); + if (err) + goto err_fini_display; + err = drm_dev_register(&xe->drm, 0); if (err) goto err_irq_shutdown; + xe_display_register(xe); + xe_debugfs_register(xe); xe_pmu_register(&xe->pmu); @@ -416,13 +447,30 @@ int xe_device_probe(struct xe_device *xe) return 0; +err_fini_display: + xe_display_driver_remove(xe); + err_irq_shutdown: xe_irq_shutdown(xe); +err: + xe_display_fini(xe); return err; } +static void xe_device_remove_display(struct xe_device *xe) +{ + xe_display_unregister(xe); + + drm_dev_unplug(&xe->drm); + xe_display_driver_remove(xe); +} + void xe_device_remove(struct xe_device *xe) { + xe_device_remove_display(xe); + + xe_display_fini(xe); + xe_heci_gsc_fini(xe); xe_irq_shutdown(xe); diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 4425c2484a02..7607ee373605 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -20,6 +20,12 @@ #include "xe_pmu.h" #include "xe_step_types.h" +#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY) +#include "soc/intel_pch.h" +#include "intel_display_core.h" +#include "intel_display_device.h" +#endif + struct xe_ggtt; struct xe_pat_ops; @@ -247,12 +253,20 @@ struct xe_device { u8 has_llc:1; /** @has_range_tlb_invalidation: Has range based TLB invalidations */ u8 has_range_tlb_invalidation:1; + /** @enable_display: display enabled */ + u8 enable_display:1; /** @bypass_mtcfg: Bypass Multi-Tile configuration from MTCFG register */ u8 bypass_mtcfg:1; /** @supports_mmio_ext: supports MMIO extension/s */ u8 supports_mmio_ext:1; /** @has_heci_gscfi: device has heci gscfi */ u8 has_heci_gscfi:1; + +#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY) + struct { + u32 rawclk_freq; + } i915_runtime; +#endif } info; /** @irq: device interrupt state */ @@ -323,6 +337,9 @@ struct xe_device { /** @ordered_wq: used to serialize compute mode resume */ struct workqueue_struct *ordered_wq; + /** @unordered_wq: used to serialize unordered work, mostly display */ + struct workqueue_struct *unordered_wq; + /** @tiles: device tiles */ struct xe_tile tiles[XE_MAX_TILES_PER_DEVICE]; @@ -391,10 +408,79 @@ struct xe_device { /** @needs_flr_on_fini: requests function-reset on fini */ bool needs_flr_on_fini; +#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY) + /* + * Any fields below this point are the ones used by display. + * They are temporarily added here so xe_device can be desguised as + * drm_i915_private during build. After cleanup these should go away, + * migrating to the right sub-structs + */ + struct intel_display display; + enum intel_pch pch_type; + u16 pch_id; + + struct dram_info { + bool wm_lv_0_adjust_needed; + u8 num_channels; + bool symmetric_memory; + enum intel_dram_type { + INTEL_DRAM_UNKNOWN, + INTEL_DRAM_DDR3, + INTEL_DRAM_DDR4, + INTEL_DRAM_LPDDR3, + INTEL_DRAM_LPDDR4, + INTEL_DRAM_DDR5, + INTEL_DRAM_LPDDR5, + } type; + u8 num_qgv_points; + u8 num_psf_gv_points; + } dram_info; + + /* + * edram size in MB. + * Cannot be determined by PCIID. You must always read a register. + */ + u32 edram_size_mb; + + /* To shut up runtime pm macros.. */ + struct xe_runtime_pm {} runtime_pm; + /* For pcode */ struct mutex sb_lock; + /* Should be in struct intel_display */ + u32 skl_preferred_vco_freq, max_dotclk_freq, hti_state; + u8 snps_phy_failed_calibration; + struct drm_atomic_state *modeset_restore_state; + struct list_head global_obj_list; + + union { + /* only to allow build, not used functionally */ + u32 irq_mask; + u32 de_irq_mask[I915_MAX_PIPES]; + }; + u32 pipestat_irq_mask[I915_MAX_PIPES]; + + bool display_irqs_enabled; u32 enabled_irq_mask; + + struct intel_uncore { + spinlock_t lock; + } uncore; + + /* only to allow build, not used functionally */ + struct { + unsigned int hpll_freq; + unsigned int czclk_freq; + unsigned int fsb_freq, mem_freq, is_ddr3; + u8 vblank_enabled; + }; + struct { + const char *dmc_firmware_path; + } params; + + void *pxp; +#endif }; /** diff --git a/drivers/gpu/drm/xe/xe_display.c b/drivers/gpu/drm/xe/xe_display.c new file mode 100644 index 000000000000..edfc7fce1ed3 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_display.c @@ -0,0 +1,411 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include "xe_display.h" +#include "regs/xe_regs.h" + +#include + +#include +#include +#include + +#include "soc/intel_dram.h" +#include "i915_drv.h" /* FIXME: HAS_DISPLAY() depends on this */ +#include "intel_acpi.h" +#include "intel_audio.h" +#include "intel_bw.h" +#include "intel_display.h" +#include "intel_display_driver.h" +#include "intel_display_irq.h" +#include "intel_display_types.h" +#include "intel_dmc.h" +#include "intel_dp.h" +#include "intel_fbdev.h" +#include "intel_hdcp.h" +#include "intel_hotplug.h" +#include "intel_opregion.h" +#include "xe_module.h" + +/* Xe device functions */ + +static bool has_display(struct xe_device *xe) +{ + return HAS_DISPLAY(xe); +} + +/** + * xe_display_driver_probe_defer - Detect if we need to wait for other drivers + * early on + * @pdev: PCI device + * + * Returns: true if probe needs to be deferred, false otherwise + */ +bool xe_display_driver_probe_defer(struct pci_dev *pdev) +{ + if (!enable_display) + return 0; + + return intel_display_driver_probe_defer(pdev); +} + +static void xe_display_last_close(struct drm_device *dev) +{ + struct xe_device *xe = to_xe_device(dev); + + if (xe->info.enable_display) + intel_fbdev_restore_mode(to_xe_device(dev)); +} + +/** + * xe_display_driver_set_hooks - Add driver flags and hooks for display + * @driver: DRM device driver + * + * Set features and function hooks in @driver that are needed for driving the + * display IP. This sets the driver's capability of driving display, regardless + * if the device has it enabled + */ +void xe_display_driver_set_hooks(struct drm_driver *driver) +{ + if (!enable_display) + return; + + driver->driver_features |= DRIVER_MODESET | DRIVER_ATOMIC; + driver->lastclose = xe_display_last_close; +} + +static void unset_display_features(struct xe_device *xe) +{ + xe->drm.driver_features &= ~(DRIVER_MODESET | DRIVER_ATOMIC); +} + +static void display_destroy(struct drm_device *dev, void *dummy) +{ + struct xe_device *xe = to_xe_device(dev); + + destroy_workqueue(xe->display.hotplug.dp_wq); +} + +/** + * xe_display_create - create display struct + * @xe: XE device instance + * + * Initialize all fields used by the display part. + * + * TODO: once everything can be inside a single struct, make the struct opaque + * to the rest of xe and return it to be xe->display. + * + * Returns: 0 on success + */ +int xe_display_create(struct xe_device *xe) +{ + int err; + + spin_lock_init(&xe->display.fb_tracking.lock); + + xe->display.hotplug.dp_wq = alloc_ordered_workqueue("xe-dp", 0); + + drmm_mutex_init(&xe->drm, &xe->sb_lock); + drmm_mutex_init(&xe->drm, &xe->display.backlight.lock); + drmm_mutex_init(&xe->drm, &xe->display.audio.mutex); + drmm_mutex_init(&xe->drm, &xe->display.wm.wm_mutex); + drmm_mutex_init(&xe->drm, &xe->display.pps.mutex); + drmm_mutex_init(&xe->drm, &xe->display.hdcp.hdcp_mutex); + xe->enabled_irq_mask = ~0; + + err = drmm_add_action_or_reset(&xe->drm, display_destroy, NULL); + if (err) + return err; + + return 0; +} + +static void xe_display_fini_nommio(struct drm_device *dev, void *dummy) +{ + struct xe_device *xe = to_xe_device(dev); + + if (!xe->info.enable_display) + return; + + intel_power_domains_cleanup(xe); +} + +int xe_display_init_nommio(struct xe_device *xe) +{ + int err; + + if (!xe->info.enable_display) + return 0; + + /* Fake uncore lock */ + spin_lock_init(&xe->uncore.lock); + + /* This must be called before any calls to HAS_PCH_* */ + intel_detect_pch(xe); + + err = intel_power_domains_init(xe); + if (err) + return err; + + return drmm_add_action_or_reset(&xe->drm, xe_display_fini_nommio, xe); +} + +static void xe_display_fini_noirq(struct drm_device *dev, void *dummy) +{ + struct xe_device *xe = to_xe_device(dev); + + if (!xe->info.enable_display) + return; + + intel_display_driver_remove_noirq(xe); + intel_power_domains_driver_remove(xe); +} + +int xe_display_init_noirq(struct xe_device *xe) +{ + int err; + + if (!xe->info.enable_display) + return 0; + + intel_display_driver_early_probe(xe); + + /* Early display init.. */ + intel_opregion_setup(xe); + + /* + * Fill the dram structure to get the system dram info. This will be + * used for memory latency calculation. + */ + intel_dram_detect(xe); + + intel_bw_init_hw(xe); + + intel_display_device_info_runtime_init(xe); + + err = intel_display_driver_probe_noirq(xe); + if (err) + return err; + + return drmm_add_action_or_reset(&xe->drm, xe_display_fini_noirq, NULL); +} + +static void xe_display_fini_noaccel(struct drm_device *dev, void *dummy) +{ + struct xe_device *xe = to_xe_device(dev); + + if (!xe->info.enable_display) + return; + + intel_display_driver_remove_nogem(xe); +} + +int xe_display_init_noaccel(struct xe_device *xe) +{ + int err; + + if (!xe->info.enable_display) + return 0; + + err = intel_display_driver_probe_nogem(xe); + if (err) + return err; + + return drmm_add_action_or_reset(&xe->drm, xe_display_fini_noaccel, NULL); +} + +int xe_display_init(struct xe_device *xe) +{ + if (!xe->info.enable_display) + return 0; + + return intel_display_driver_probe(xe); +} + +void xe_display_fini(struct xe_device *xe) +{ + if (!xe->info.enable_display) + return; + + /* poll work can call into fbdev, hence clean that up afterwards */ + intel_hpd_poll_fini(xe); + intel_fbdev_fini(xe); + + intel_hdcp_component_fini(xe); + intel_audio_deinit(xe); +} + +void xe_display_register(struct xe_device *xe) +{ + if (!xe->info.enable_display) + return; + + intel_display_driver_register(xe); + intel_register_dsm_handler(); + intel_power_domains_enable(xe); +} + +void xe_display_unregister(struct xe_device *xe) +{ + if (!xe->info.enable_display) + return; + + intel_unregister_dsm_handler(); + intel_power_domains_disable(xe); + intel_display_driver_unregister(xe); +} + +void xe_display_driver_remove(struct xe_device *xe) +{ + if (!xe->info.enable_display) + return; + + intel_display_driver_remove(xe); + + intel_display_device_remove(xe); +} + +/* IRQ-related functions */ + +void xe_display_irq_handler(struct xe_device *xe, u32 master_ctl) +{ + if (!xe->info.enable_display) + return; + + if (master_ctl & DISPLAY_IRQ) + gen11_display_irq_handler(xe); +} + +void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir) +{ + if (!xe->info.enable_display) + return; + + if (gu_misc_iir & GU_MISC_GSE) + intel_opregion_asle_intr(xe); +} + +void xe_display_irq_reset(struct xe_device *xe) +{ + if (!xe->info.enable_display) + return; + + gen11_display_irq_reset(xe); +} + +void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) +{ + if (!xe->info.enable_display) + return; + + if (gt->info.id == XE_GT0) + gen11_de_irq_postinstall(xe); +} + +static void intel_suspend_encoders(struct xe_device *xe) +{ + struct drm_device *dev = &xe->drm; + struct intel_encoder *encoder; + + if (has_display(xe)) + return; + + drm_modeset_lock_all(dev); + for_each_intel_encoder(dev, encoder) + if (encoder->suspend) + encoder->suspend(encoder); + drm_modeset_unlock_all(dev); +} + +void xe_display_pm_suspend(struct xe_device *xe) +{ + if (!xe->info.enable_display) + return; + + /* + * We do a lot of poking in a lot of registers, make sure they work + * properly. + */ + intel_power_domains_disable(xe); + if (has_display(xe)) + drm_kms_helper_poll_disable(&xe->drm); + + intel_display_driver_suspend(xe); + + intel_dp_mst_suspend(xe); + + intel_hpd_cancel_work(xe); + + intel_suspend_encoders(xe); + + intel_opregion_suspend(xe, PCI_D3cold); + + intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_SUSPENDED, true); + + intel_dmc_suspend(xe); +} + +void xe_display_pm_suspend_late(struct xe_device *xe) +{ + if (!xe->info.enable_display) + return; + + intel_power_domains_suspend(xe, I915_DRM_SUSPEND_MEM); + + intel_display_power_suspend_late(xe); +} + +void xe_display_pm_resume_early(struct xe_device *xe) +{ + if (!xe->info.enable_display) + return; + + intel_display_power_resume_early(xe); + + intel_power_domains_resume(xe); +} + +void xe_display_pm_resume(struct xe_device *xe) +{ + if (!xe->info.enable_display) + return; + + intel_dmc_resume(xe); + + if (has_display(xe)) + drm_mode_config_reset(&xe->drm); + + intel_display_driver_init_hw(xe); + intel_hpd_init(xe); + + /* MST sideband requires HPD interrupts enabled */ + intel_dp_mst_resume(xe); + intel_display_driver_resume(xe); + + intel_hpd_poll_disable(xe); + if (has_display(xe)) + drm_kms_helper_poll_enable(&xe->drm); + + intel_opregion_resume(xe); + + intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_RUNNING, false); + + intel_power_domains_enable(xe); +} + +void xe_display_probe(struct xe_device *xe) +{ + if (!xe->info.enable_display) + goto no_display; + + intel_display_device_probe(xe); + + if (has_display(xe)) + return; + +no_display: + xe->info.enable_display = false; + unset_display_features(xe); +} diff --git a/drivers/gpu/drm/xe/xe_display.h b/drivers/gpu/drm/xe/xe_display.h new file mode 100644 index 000000000000..710e56180b52 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_display.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_DISPLAY_H_ +#define _XE_DISPLAY_H_ + +#include "xe_device.h" + +struct drm_driver; + +#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY) + +bool xe_display_driver_probe_defer(struct pci_dev *pdev); +void xe_display_driver_set_hooks(struct drm_driver *driver); +void xe_display_driver_remove(struct xe_device *xe); + +int xe_display_create(struct xe_device *xe); + +void xe_display_probe(struct xe_device *xe); + +int xe_display_init_nommio(struct xe_device *xe); +int xe_display_init_noirq(struct xe_device *xe); +int xe_display_init_noaccel(struct xe_device *xe); +int xe_display_init(struct xe_device *xe); +void xe_display_fini(struct xe_device *xe); + +void xe_display_register(struct xe_device *xe); +void xe_display_unregister(struct xe_device *xe); + +void xe_display_irq_handler(struct xe_device *xe, u32 master_ctl); +void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir); +void xe_display_irq_reset(struct xe_device *xe); +void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt); + +void xe_display_pm_suspend(struct xe_device *xe); +void xe_display_pm_suspend_late(struct xe_device *xe); +void xe_display_pm_resume_early(struct xe_device *xe); +void xe_display_pm_resume(struct xe_device *xe); + +#else + +static inline int xe_display_driver_probe_defer(struct pci_dev *pdev) { return 0; } +static inline void xe_display_driver_set_hooks(struct drm_driver *driver) { } +static inline void xe_display_driver_remove(struct xe_device *xe) {} + +static inline int xe_display_create(struct xe_device *xe) { return 0; } + +static inline void xe_display_probe(struct xe_device *xe) { } + +static inline int xe_display_init_nommio(struct xe_device *xe) { return 0; } +static inline int xe_display_init_noirq(struct xe_device *xe) { return 0; } +static inline int xe_display_init_noaccel(struct xe_device *xe) { return 0; } +static inline int xe_display_init(struct xe_device *xe) { return 0; } +static inline void xe_display_fini(struct xe_device *xe) {} + +static inline void xe_display_register(struct xe_device *xe) {} +static inline void xe_display_unregister(struct xe_device *xe) {} + +static inline void xe_display_irq_handler(struct xe_device *xe, u32 master_ctl) {} +static inline void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir) {} +static inline void xe_display_irq_reset(struct xe_device *xe) {} +static inline void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) {} + +static inline void xe_display_pm_suspend(struct xe_device *xe) {} +static inline void xe_display_pm_suspend_late(struct xe_device *xe) {} +static inline void xe_display_pm_resume_early(struct xe_device *xe) {} +static inline void xe_display_pm_resume(struct xe_device *xe) {} + +#endif /* CONFIG_DRM_XE_DISPLAY */ +#endif /* _XE_DISPLAY_H_ */ diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 1368616f20fa..0e2a41837f16 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -338,9 +338,13 @@ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) } static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, - u64 start, u64 end, u64 alignment) + u64 start, u64 end) { int err; + u64 alignment = XE_PAGE_SIZE; + + if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K) + alignment = SZ_64K; if (XE_WARN_ON(bo->ggtt_node.size)) { /* Someone's already inserted this BO in the GGTT */ @@ -364,26 +368,15 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, return err; } -int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, u64 ofs) +int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, + u64 start, u64 end) { - if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K) { - if (XE_WARN_ON(!IS_ALIGNED(ofs, SZ_64K)) || - XE_WARN_ON(!IS_ALIGNED(bo->size, SZ_64K))) - return -EINVAL; - } - - return __xe_ggtt_insert_bo_at(ggtt, bo, ofs, ofs + bo->size, 0); + return __xe_ggtt_insert_bo_at(ggtt, bo, start, end); } int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) { - u64 alignment; - - alignment = XE_PAGE_SIZE; - if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K) - alignment = SZ_64K; - - return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX, alignment); + return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX); } void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node) diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h index 04bb26b0938e..3faa3c6d0375 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.h +++ b/drivers/gpu/drm/xe/xe_ggtt.h @@ -24,7 +24,8 @@ int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node); void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); -int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, u64 ofs); +int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, + u64 start, u64 end); void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p); diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index ef26120e7aa4..c5315e02fc5b 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -12,6 +12,7 @@ #include "regs/xe_gt_regs.h" #include "regs/xe_regs.h" #include "xe_device.h" +#include "xe_display.h" #include "xe_drv.h" #include "xe_gt.h" #include "xe_guc.h" @@ -351,10 +352,14 @@ static irqreturn_t xelp_irq_handler(int irq, void *arg) gt_irq_handler(tile, master_ctl, intr_dw, identity); + xe_display_irq_handler(xe, master_ctl); + gu_misc_iir = gu_misc_irq_ack(xe, master_ctl); xelp_intr_enable(xe, false); + xe_display_irq_enable(xe, gu_misc_iir); + xe_pmu_irq_stats(xe); return IRQ_HANDLED; @@ -444,11 +449,14 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg) * that get reported as Gunit GSE) would only be hooked up to * the primary tile. */ - if (id == 0) + if (id == 0) { + xe_display_irq_handler(xe, master_ctl); gu_misc_iir = gu_misc_irq_ack(xe, master_ctl); + } } dg1_intr_enable(xe, false); + xe_display_irq_enable(xe, gu_misc_iir); xe_pmu_irq_stats(xe); @@ -542,6 +550,7 @@ static void xe_irq_reset(struct xe_device *xe) tile = xe_device_get_root_tile(xe); mask_and_disable(tile, GU_MISC_IRQ_OFFSET); + xe_display_irq_reset(xe); /* * The tile's top-level status register should be the last one @@ -556,6 +565,8 @@ static void xe_irq_reset(struct xe_device *xe) static void xe_irq_postinstall(struct xe_device *xe) { + xe_display_irq_postinstall(xe, xe_root_mmio_gt(xe)); + /* * ASLE backlight operations are reported via GUnit GSE interrupts * on the root tile. diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index 82817a46f887..7194595e7f31 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -19,6 +19,10 @@ bool force_execlist = false; module_param_named_unsafe(force_execlist, force_execlist, bool, 0444); MODULE_PARM_DESC(force_execlist, "Force Execlist submission"); +bool enable_display = true; +module_param_named(enable_display, enable_display, bool, 0444); +MODULE_PARM_DESC(enable_display, "Enable display"); + u32 xe_force_vram_bar_size; module_param_named(vram_bar_size, xe_force_vram_bar_size, uint, 0600); MODULE_PARM_DESC(vram_bar_size, "Set the vram bar size(in MiB)"); diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 40d89d4df87c..05c6b6df8210 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -17,6 +17,7 @@ #include "regs/xe_regs.h" #include "regs/xe_gt_regs.h" #include "xe_device.h" +#include "xe_display.h" #include "xe_drv.h" #include "xe_gt.h" #include "xe_macros.h" @@ -55,6 +56,7 @@ struct xe_device_desc { u8 require_force_probe:1; u8 is_dgfx:1; + u8 has_display:1; u8 has_heci_gscfi:1; u8 has_llc:1; @@ -62,6 +64,9 @@ struct xe_device_desc { u8 supports_mmio_ext:1; }; +__diag_push(); +__diag_ignore_all("-Woverride-init", "Allow field overrides in table"); + #define PLATFORM(x) \ .platform = (x), \ .platform_name = #x @@ -205,7 +210,8 @@ static const struct xe_device_desc tgl_desc = { .graphics = &graphics_xelp, .media = &media_xem, PLATFORM(XE_TIGERLAKE), - .has_llc = 1, + .has_display = true, + .has_llc = true, .require_force_probe = true, }; @@ -213,6 +219,7 @@ static const struct xe_device_desc rkl_desc = { .graphics = &graphics_xelp, .media = &media_xem, PLATFORM(XE_ROCKETLAKE), + .has_display = true, .has_llc = true, .require_force_probe = true, }; @@ -223,7 +230,8 @@ static const struct xe_device_desc adl_s_desc = { .graphics = &graphics_xelp, .media = &media_xem, PLATFORM(XE_ALDERLAKE_S), - .has_llc = 1, + .has_display = true, + .has_llc = true, .require_force_probe = true, .subplatforms = (const struct xe_subplatform_desc[]) { { XE_SUBPLATFORM_ALDERLAKE_S_RPLS, "RPLS", adls_rpls_ids }, @@ -237,7 +245,8 @@ static const struct xe_device_desc adl_p_desc = { .graphics = &graphics_xelp, .media = &media_xem, PLATFORM(XE_ALDERLAKE_P), - .has_llc = 1, + .has_display = true, + .has_llc = true, .require_force_probe = true, .subplatforms = (const struct xe_subplatform_desc[]) { { XE_SUBPLATFORM_ALDERLAKE_P_RPLU, "RPLU", adlp_rplu_ids }, @@ -249,7 +258,8 @@ static const struct xe_device_desc adl_n_desc = { .graphics = &graphics_xelp, .media = &media_xem, PLATFORM(XE_ALDERLAKE_N), - .has_llc = 1, + .has_display = true, + .has_llc = true, .require_force_probe = true, }; @@ -261,6 +271,7 @@ static const struct xe_device_desc dg1_desc = { .media = &media_xem, DGFX_FEATURES, PLATFORM(XE_DG1), + .has_display = true, .require_force_probe = true, .has_heci_gscfi = 1, }; @@ -286,6 +297,7 @@ static const struct xe_device_desc ats_m_desc = { .require_force_probe = true, DG2_FEATURES, + .has_display = false, }; static const struct xe_device_desc dg2_desc = { @@ -294,12 +306,14 @@ static const struct xe_device_desc dg2_desc = { .require_force_probe = true, DG2_FEATURES, + .has_display = true, }; static const __maybe_unused struct xe_device_desc pvc_desc = { .graphics = &graphics_xehpc, DGFX_FEATURES, PLATFORM(XE_PVC), + .has_display = false, .require_force_probe = true, .has_heci_gscfi = 1, }; @@ -308,6 +322,7 @@ static const struct xe_device_desc mtl_desc = { /* .graphics and .media determined via GMD_ID */ .require_force_probe = true, PLATFORM(XE_METEORLAKE), + .has_display = true, }; static const struct xe_device_desc lnl_desc = { @@ -316,6 +331,7 @@ static const struct xe_device_desc lnl_desc = { }; #undef PLATFORM +__diag_pop(); /* Map of GMD_ID values to graphics IP */ static struct gmdid_map graphics_ip_map[] = { @@ -574,6 +590,9 @@ static int xe_info_init(struct xe_device *xe, xe->info.has_flat_ccs = graphics_desc->has_flat_ccs; xe->info.has_range_tlb_invalidation = graphics_desc->has_range_tlb_invalidation; + xe->info.enable_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) && + enable_display && + desc->has_display; /* * All platforms have at least one primary GT. Any platform with media * version 13 or higher has an additional dedicated media GT. And @@ -668,6 +687,9 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return -ENODEV; } + if (xe_display_driver_probe_defer(pdev)) + return -EPROBE_DEFER; + xe = xe_device_create(pdev, ent); if (IS_ERR(xe)) return PTR_ERR(xe); @@ -686,7 +708,9 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) goto err_pci_disable; - drm_dbg(&xe->drm, "%s %s %04x:%04x dgfx:%d gfx:%s (%d.%02d) media:%s (%d.%02d) dma_m_s:%d tc:%d gscfi:%d", + xe_display_probe(xe); + + drm_dbg(&xe->drm, "%s %s %04x:%04x dgfx:%d gfx:%s (%d.%02d) media:%s (%d.%02d) display:%s dma_m_s:%d tc:%d gscfi:%d", desc->platform_name, subplatform_desc ? subplatform_desc->name : "", xe->info.devid, xe->info.revid, @@ -697,6 +721,7 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) xe->info.media_name, xe->info.media_verx100 / 100, xe->info.media_verx100 % 100, + str_yes_no(xe->info.enable_display), xe->info.dma_mask_size, xe->info.tile_count, xe->info.has_heci_gscfi); diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index 93a7658da324..e31a91cf311c 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -14,6 +14,7 @@ #include "xe_bo_evict.h" #include "xe_device.h" #include "xe_device_sysfs.h" +#include "xe_display.h" #include "xe_ggtt.h" #include "xe_gt.h" #include "xe_guc.h" @@ -61,14 +62,20 @@ int xe_pm_suspend(struct xe_device *xe) if (err) return err; + xe_display_pm_suspend(xe); + for_each_gt(gt, xe, id) { err = xe_gt_suspend(gt); - if (err) + if (err) { + xe_display_pm_resume(xe); return err; + } } xe_irq_suspend(xe); + xe_display_pm_suspend_late(xe); + return 0; } @@ -94,6 +101,8 @@ int xe_pm_resume(struct xe_device *xe) return err; } + xe_display_pm_resume_early(xe); + /* * This only restores pinned memory which is the memory required for the * GT(s) to resume. @@ -104,6 +113,8 @@ int xe_pm_resume(struct xe_device *xe) xe_irq_resume(xe); + xe_display_pm_resume(xe); + for_each_gt(gt, xe, id) xe_gt_resume(gt); -- cgit v1.2.3 From f6761c68c0ace6f4e3df6b03209fab09d472b727 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Fri, 28 Jul 2023 16:13:22 +0200 Subject: drm/xe/display: Improve s2idle handling. We accidentally always pass true as s2idle argument, instead of calculating it in the same way as i915. Suspend modes were removed to achieve compatibility with i915, but accidentally left in the source code. While at it, fix all other cases too, s2idle will go into a D1 state and setting a lower power state should be handled by PCI core. Maybe my laptop stops draining so much power during suspend now? I can only hope.. Signed-off-by: Maarten Lankhorst Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/compat-i915-headers/intel_runtime_pm.h | 6 ------ drivers/gpu/drm/xe/xe_display.c | 6 ++++-- drivers/gpu/drm/xe/xe_pci.c | 6 ------ 3 files changed, 4 insertions(+), 14 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_runtime_pm.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_runtime_pm.h index f7f3286e2c53..89da3cc62f39 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/intel_runtime_pm.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_runtime_pm.h @@ -5,12 +5,6 @@ #include "intel_wakeref.h" -enum i915_drm_suspend_mode { - I915_DRM_SUSPEND_IDLE, - I915_DRM_SUSPEND_MEM, - I915_DRM_SUSPEND_HIBERNATE, -}; - #define intel_runtime_pm xe_runtime_pm static inline void disable_rpm_wakeref_asserts(void *rpm) diff --git a/drivers/gpu/drm/xe/xe_display.c b/drivers/gpu/drm/xe/xe_display.c index edfc7fce1ed3..fa20faf3cc83 100644 --- a/drivers/gpu/drm/xe/xe_display.c +++ b/drivers/gpu/drm/xe/xe_display.c @@ -321,6 +321,7 @@ static void intel_suspend_encoders(struct xe_device *xe) void xe_display_pm_suspend(struct xe_device *xe) { + bool s2idle = acpi_target_system_state() < ACPI_STATE_S3; if (!xe->info.enable_display) return; @@ -340,7 +341,7 @@ void xe_display_pm_suspend(struct xe_device *xe) intel_suspend_encoders(xe); - intel_opregion_suspend(xe, PCI_D3cold); + intel_opregion_suspend(xe, s2idle ? PCI_D1 : PCI_D3cold); intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_SUSPENDED, true); @@ -349,10 +350,11 @@ void xe_display_pm_suspend(struct xe_device *xe) void xe_display_pm_suspend_late(struct xe_device *xe) { + bool s2idle = acpi_target_system_state() < ACPI_STATE_S3; if (!xe->info.enable_display) return; - intel_power_domains_suspend(xe, I915_DRM_SUSPEND_MEM); + intel_power_domains_suspend(xe, s2idle); intel_display_power_suspend_late(xe); } diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 05c6b6df8210..097b68598191 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -795,10 +795,6 @@ static int xe_pci_suspend(struct device *dev) pci_save_state(pdev); pci_disable_device(pdev); - err = pci_set_power_state(pdev, PCI_D3hot); - if (err) - return err; - return 0; } @@ -814,8 +810,6 @@ static int xe_pci_resume(struct device *dev) if (err) return err; - pci_restore_state(pdev); - err = pci_enable_device(pdev); if (err) return err; -- cgit v1.2.3 From e5b6e616c63f0d931e1be0d1c17cc80ec0fd3ea3 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 14 Mar 2023 17:49:02 -0700 Subject: drm/xe/display: Silence kernel-doc warnings related to display Add a "private:" comment to the part of the struct that is not expected to be documented, the one with display-related fields. This silence the following warnings: $ find drivers/gpu/drm/xe -name '*.[ch]' -not -path 'drivers/gpu/drm/xe/display/*' | xargs ./scripts/kernel-doc -Werror -none drivers/gpu/drm/xe/xe_device_types.h:316: warning: Function parameter or member 'display' not described in 'xe_device' drivers/gpu/drm/xe/xe_device_types.h:316: warning: Function parameter or member 'pch_type' not described in 'xe_device' drivers/gpu/drm/xe/xe_device_types.h:316: warning: Function parameter or member 'pch_id' not described in 'xe_device' drivers/gpu/drm/xe/xe_device_types.h:316: warning: Function parameter or member 'wm_lv_0_adjust_needed' not described in 'xe_device' drivers/gpu/drm/xe/xe_device_types.h:316: warning: Function parameter or member 'num_channels' not described in 'xe_device' drivers/gpu/drm/xe/xe_device_types.h:316: warning: Function parameter or member 'symmetric_memory' not described in 'xe_device' drivers/gpu/drm/xe/xe_device_types.h:316: warning: Function parameter or member 'type' not described in 'xe_device' drivers/gpu/drm/xe/xe_device_types.h:316: warning: Function parameter or member 'num_qgv_points' not described in 'xe_device' drivers/gpu/drm/xe/xe_device_types.h:316: warning: Function parameter or member 'num_psf_gv_points' not described in 'xe_device' drivers/gpu/drm/xe/xe_device_types.h:316: warning: Function parameter or member 'dram_info' not described in 'xe_device' drivers/gpu/drm/xe/xe_device_types.h:316: warning: Function parameter or member 'runtime_pm' not described in 'xe_device' drivers/gpu/drm/xe/xe_device_types.h:316: warning: Function parameter or member 'sb_lock' not described in 'xe_device' drivers/gpu/drm/xe/xe_device_types.h:316: warning: Function parameter or member 'skl_preferred_vco_freq' not described in 'xe_device' drivers/gpu/drm/xe/xe_device_types.h:316: warning: Function parameter or member 'max_dotclk_freq' not described in 'xe_device' drivers/gpu/drm/xe/xe_device_types.h:316: warning: Function parameter or member 'hti_state' not described in 'xe_device' drivers/gpu/drm/xe/xe_device_types.h:316: warning: Function parameter or member 'snps_phy_failed_calibration' not described in 'xe_device' drivers/gpu/drm/xe/xe_device_types.h:316: warning: Function parameter or member 'modeset_restore_state' not described in 'xe_device' drivers/gpu/drm/xe/xe_device_types.h:316: warning: Function parameter or member 'global_obj_list' not described in 'xe_device' drivers/gpu/drm/xe/xe_device_types.h:316: warning: Function parameter or member 'de_irq_mask' not described in 'xe_device' drivers/gpu/drm/xe/xe_device_types.h:316: warning: Function parameter or member 'display_irqs_enabled' not described in 'xe_device' drivers/gpu/drm/xe/xe_device_types.h:316: warning: Function parameter or member 'enabled_irq_mask' not described in 'xe_device' drivers/gpu/drm/xe/xe_device_types.h:316: warning: Function parameter or member 'params' not described in 'xe_device' 22 warnings as Errors Fixes: 44e694958b95 ("drm/xe/display: Implement display support") Signed-off-by: Lucas De Marchi Acked-by: Jani Nikula Link: https://lore.kernel.org/r/20230315004902.2622613-1-lucas.demarchi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device_types.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 7607ee373605..be11cadccbd4 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -408,6 +408,8 @@ struct xe_device { /** @needs_flr_on_fini: requests function-reset on fini */ bool needs_flr_on_fini; + /* private: */ + #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY) /* * Any fields below this point are the ones used by display. -- cgit v1.2.3 From f02d48b881e2c0138f570884f8ead14d3f86ba21 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 31 Mar 2023 09:46:27 +0100 Subject: drm/xe/display: ensure clear-color surfaces are cpu mappable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The KMD needs to access the clear-color value stored in the buffer via the CPU. On small-bar systems reject any buffers that are potentially not CPU accessible. Signed-off-by: Matthew Auld Cc: Maarten Lankhorst Cc: Thomas Hellström Cc: Gwan-gyeong Mun Cc: Lucas De Marchi Cc: José Roberto de Souza Cc: Filip Hazubski Cc: Carl Zhang Cc: Effie Yu Reviewed-by: José Roberto de Souza Reviewed-by: Gwan-gyeong Mun [ Split display-related changes from small-bar support ] Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/display/xe_fb_pin.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index 67b956a6da8d..16e04b24daee 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -217,6 +217,23 @@ static struct i915_vma *__xe_pin_fb_vma(struct intel_framebuffer *fb, goto err; } + if (IS_DGFX(to_xe_device(bo->ttm.base.dev)) && + intel_fb_rc_ccs_cc_plane(&fb->base) >= 0 && + !(bo->flags & XE_BO_NEEDS_CPU_ACCESS)) { + struct xe_tile *tile = xe_device_get_root_tile(xe); + + /* + * If we need to able to access the clear-color value stored in + * the buffer, then we require that such buffers are also CPU + * accessible. This is important on small-bar systems where + * only some subset of VRAM is CPU accessible. + */ + if (tile->mem.vram.io_size < tile->mem.vram.usable_size) { + ret = -EINVAL; + goto err; + } + } + /* * Pin the framebuffer, we can't use xe_bo_(un)pin functions as the * assumptions are incorrect for framebuffers -- cgit v1.2.3 From 9914e19cc215d339b618ccae993e16ed7aafb54e Mon Sep 17 00:00:00 2001 From: Koby Elbaz Date: Wed, 30 Aug 2023 09:33:32 +0300 Subject: drm/xe/display: fix error handling flow when device probing fails Upon device probe failure, rolling back the initialization should be done in reversed order. Signed-off-by: Koby Elbaz Reviewed-by: Ohad Sharabi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 98d7e7fa12d8..1202f8007f79 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -427,11 +427,11 @@ int xe_device_probe(struct xe_device *xe) err = xe_display_init(xe); if (err) - goto err_fini_display; + goto err_irq_shutdown; err = drm_dev_register(&xe->drm, 0); if (err) - goto err_irq_shutdown; + goto err_fini_display; xe_display_register(xe); -- cgit v1.2.3 From 04316b4ae6e094569737bababac6f2ef130c0020 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Tue, 5 Sep 2023 19:49:42 +0000 Subject: drm/xe/display: Use acpi_target_system_state only if ACPI_SLEEP is enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes the build error below with CONFIG_ACPI_SLEEP=n: drivers/gpu/drm/xe/xe_display.c:334:23: error: implicit declaration of function ‘acpi_target_system_state’; did you mean ‘acpi_get_system_info’? [-Werror=implicit-function-declaration] 334 | bool s2idle = acpi_target_system_state() < ACPI_STATE_S3; Signed-off-by: Francois Dugast Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_display.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_display.c b/drivers/gpu/drm/xe/xe_display.c index fa20faf3cc83..da10f16e1c12 100644 --- a/drivers/gpu/drm/xe/xe_display.c +++ b/drivers/gpu/drm/xe/xe_display.c @@ -319,9 +319,18 @@ static void intel_suspend_encoders(struct xe_device *xe) drm_modeset_unlock_all(dev); } +static bool suspend_to_idle(void) +{ +#if IS_ENABLED(CONFIG_ACPI_SLEEP) + if (acpi_target_system_state() < ACPI_STATE_S3) + return true; +#endif + return false; +} + void xe_display_pm_suspend(struct xe_device *xe) { - bool s2idle = acpi_target_system_state() < ACPI_STATE_S3; + bool s2idle = suspend_to_idle(); if (!xe->info.enable_display) return; @@ -350,7 +359,7 @@ void xe_display_pm_suspend(struct xe_device *xe) void xe_display_pm_suspend_late(struct xe_device *xe) { - bool s2idle = acpi_target_system_state() < ACPI_STATE_S3; + bool s2idle = suspend_to_idle(); if (!xe->info.enable_display) return; -- cgit v1.2.3 From 9aab7851ff1922930558274fd3983d047d1dfe22 Mon Sep 17 00:00:00 2001 From: Jouni Högander Date: Tue, 12 Sep 2023 09:47:07 +0300 Subject: drm/xe/display: Add struct i915_active for Xe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add empty definition for struct i915_active to kill ifdefs from frontbuffer tracking code. Signed-off-by: Jouni Högander Reviewed-by: Maarten Lankhorst Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/compat-i915-headers/i915_active_types.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_active_types.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_active_types.h index e69de29bb2d1..8c31f9a8b168 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_active_types.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_active_types.h @@ -0,0 +1,13 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef _I915_ACTIVE_TYPES_H_ +#define _I915_ACTIVE_TYPES_H_ + +struct i915_active {}; +#define I915_ACTIVE_RETIRE_SLEEPS 0 + +#endif /* _I915_ACTIVE_TYPES_H_ */ -- cgit v1.2.3 From 1be5ff7f82063dab2e1d86bc21f2deb4cf4908bd Mon Sep 17 00:00:00 2001 From: Jouni Högander Date: Tue, 12 Sep 2023 09:47:01 +0300 Subject: drm/xe/display: Add macro to get i915 device from xe_bo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add helper macro to kill couple of #ifdefs Signed-off-by: Jouni Högander Reviewed-by: Maarten Lankhorst Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo_types.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index c628625c8a89..051fe990c133 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -78,4 +78,7 @@ struct xe_bo { bool created; }; +#define intel_bo_to_drm_bo(bo) (&(bo)->ttm.base) +#define intel_bo_to_i915(bo) to_i915(intel_bo_to_drm_bo(bo)->dev) + #endif -- cgit v1.2.3 From cd494efdb8433f4a78f9bedb3e67d7505690f141 Mon Sep 17 00:00:00 2001 From: Jouni Högander Date: Tue, 12 Sep 2023 09:47:03 +0300 Subject: drm/xe/display: Add frontbuffer setter/getter for xe_bo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Xe is not carrying frontbuffer pointer in xe_bo. Define it's getter as NULL. Setter simply returns pointer which was provided as a parameter. v3: Do not take any references v2: Handle xe_bo_put as well Signed-off-by: Jouni Högander Reviewed-by: Maarten Lankhorst Signed-off-by: Rodrigo Vivi --- .../xe/compat-i915-headers/gem/i915_gem_object_frontbuffer.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object_frontbuffer.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object_frontbuffer.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object_frontbuffer.h new file mode 100644 index 000000000000..2a3f12d2978c --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object_frontbuffer.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _I915_GEM_OBJECT_FRONTBUFFER_H_ +#define _I915_GEM_OBJECT_FRONTBUFFER_H_ + +#define i915_gem_object_get_frontbuffer(obj) NULL +#define i915_gem_object_set_frontbuffer(obj, front) (front) + +#endif -- cgit v1.2.3 From 0071f1713dab8656e6c939d7be980f2ad3e8d312 Mon Sep 17 00:00:00 2001 From: Jouni Högander Date: Tue, 12 Sep 2023 09:47:05 +0300 Subject: drm/xe/display: Add i915_active.h compatibility header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add empty definitions for i915_active_init/fini to kill ifdefs from frontbuffer tracking code. Signed-off-by: Jouni Högander Reviewed-by: Maarten Lankhorst Signed-off-by: Rodrigo Vivi --- .../gpu/drm/xe/compat-i915-headers/i915_active.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/i915_active.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_active.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_active.h new file mode 100644 index 000000000000..6f0ab3753563 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_active.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _I915_ACTIVE_H_ +#define _I915_ACTIVE_H_ + +#include "i915_active_types.h" + +static inline void i915_active_init(struct i915_active *ref, + int (*active)(struct i915_active *ref), + void (*retire)(struct i915_active *ref), + unsigned long flags) +{ + (void) active; + (void) retire; +} + +#define i915_active_fini(active) do { } while (0) + +#endif -- cgit v1.2.3 From fb764a35c7f45a378ae064016c321d61532113b9 Mon Sep 17 00:00:00 2001 From: Jouni Högander Date: Mon, 2 Oct 2023 13:23:56 +0300 Subject: drm/xe/display: Add empty def for i915_gem_object_flush_if_display MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don't need i915_gem_object_flush_if_display on Xe side. Add empty define to tackle compilation errors with display code where it's used. Signed-off-by: Jouni Högander Reviewed-by: Maarten Lankhorst Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 6283e27bc425..9d3b704a1030 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -325,6 +325,8 @@ static inline unsigned int xe_sg_segment_size(struct device *dev) return round_down(max / 2, PAGE_SIZE); } +#define i915_gem_object_flush_if_display(obj) ((void)(obj)) + #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) /** * xe_bo_is_mem_type - Whether the bo currently resides in the given -- cgit v1.2.3 From c3744ceb99e54e41f9f4a7a8938f2e12e0be23f0 Mon Sep 17 00:00:00 2001 From: Jouni Högander Date: Tue, 12 Sep 2023 09:47:09 +0300 Subject: drm/xe/display: Add empty define for i915_ggtt_clear_scanout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add empty define for i915_ggtt_clear_scanout to avoid build failure. Signed-off-by: Jouni Högander Reviewed-by: Maarten Lankhorst Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h index e4bbdffcd5f5..88771f5e03cc 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h @@ -22,6 +22,7 @@ struct i915_vma { struct drm_mm_node node; }; +#define i915_ggtt_clear_scanout(bo) do { } while (0) static inline u32 i915_ggtt_offset(const struct i915_vma *vma) { -- cgit v1.2.3 From c5a2eadd729ba3538f77ea2e055ca1f2efe82092 Mon Sep 17 00:00:00 2001 From: Jouni Högander Date: Wed, 13 Sep 2023 12:54:10 +0300 Subject: drm/xe/display: Xe stolen memory handling for fbc support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add Xe stolen memory handling for fbc. v3: - v2: Add parenthesis around parameter in i915_gem_stolen_node_allocated v2: - define i915_gem_stolen_area_address/size as !WARN_ON(1) - squash common type addition into this patch Signed-off-by: Jouni Högander Signed-off-by: Maarten Lankhorst Reviewed-by: Maarten Lankhorst Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h | 1 + .../drm/xe/compat-i915-headers/i915_gem_stolen.h | 79 ++++++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h index c3aa5936667a..738573ae5f10 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h @@ -19,6 +19,7 @@ #include "xe_bo.h" #include "xe_pm.h" #include "xe_step.h" +#include "i915_gem_stolen.h" #include "i915_gpu_error.h" #include "i915_reg_defs.h" #include "i915_utils.h" diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h new file mode 100644 index 000000000000..888e7a87a925 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h @@ -0,0 +1,79 @@ +#ifndef _I915_GEM_STOLEN_H_ +#define _I915_GEM_STOLEN_H_ + +#include "xe_ttm_stolen_mgr.h" +#include "xe_res_cursor.h" + +struct xe_bo; + +struct i915_stolen_fb { + struct xe_bo *bo; +}; + +static inline int i915_gem_stolen_insert_node_in_range(struct xe_device *xe, + struct i915_stolen_fb *fb, + u32 size, u32 align, + u32 start, u32 end) +{ + struct xe_bo *bo; + int err; + u32 flags = XE_BO_CREATE_PINNED_BIT | XE_BO_CREATE_STOLEN_BIT; + + bo = xe_bo_create_locked_range(xe, xe_device_get_root_tile(xe), + NULL, size, start, end, + ttm_bo_type_kernel, flags); + if (IS_ERR(bo)) { + err = PTR_ERR(bo); + bo = NULL; + return err; + } + err = xe_bo_pin(bo); + xe_bo_unlock_vm_held(bo); + + if (err) { + xe_bo_put(fb->bo); + bo = NULL; + } + + fb->bo = bo; + + return err; +} + +static inline int i915_gem_stolen_insert_node(struct xe_device *xe, + struct i915_stolen_fb *fb, + u32 size, u32 align) +{ + /* Not used on xe */ + BUG_ON(1); + return -ENODEV; +} + +static inline void i915_gem_stolen_remove_node(struct xe_device *xe, + struct i915_stolen_fb *fb) +{ + xe_bo_unpin_map_no_vm(fb->bo); + fb->bo = NULL; +} + +#define i915_gem_stolen_initialized(xe) (!!ttm_manager_type(&(xe)->ttm, XE_PL_STOLEN)) +#define i915_gem_stolen_node_allocated(fb) (!!((fb)->bo)) + +static inline u32 i915_gem_stolen_node_offset(struct i915_stolen_fb *fb) +{ + struct xe_res_cursor res; + + xe_res_first(fb->bo->ttm.resource, 0, 4096, &res); + return res.start; +} + +/* Used for < gen4. These are not supported by Xe */ +#define i915_gem_stolen_area_address(xe) (!WARN_ON(1)) +/* Used for gen9 specific WA. Gen9 is not supported by Xe */ +#define i915_gem_stolen_area_size(xe) (!WARN_ON(1)) + +#define i915_gem_stolen_node_address(xe, fb) (xe_ttm_stolen_gpu_offset(xe) + \ + i915_gem_stolen_node_offset(fb)) +#define i915_gem_stolen_node_size(fb) ((u64)((fb)->bo->ttm.base.size)) + +#endif -- cgit v1.2.3 From c890be73933a3c124ffa08411d8d279aeede4384 Mon Sep 17 00:00:00 2001 From: Jouni Högander Date: Wed, 13 Sep 2023 12:54:11 +0300 Subject: drm/xe/display: Add i915_gem.h compatibility header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add i915_gem.h compatibility header and include it in i915_drv.h. Add empty GEM_BUG_ON definition for fbc code. Signed-off-by: Jouni Högander Reviewed-by: Maarten Lankhorst Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h | 1 + drivers/gpu/drm/xe/compat-i915-headers/i915_gem.h | 9 +++++++++ 2 files changed, 10 insertions(+) create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/i915_gem.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h index 738573ae5f10..e5d1a4a3d8b4 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h @@ -19,6 +19,7 @@ #include "xe_bo.h" #include "xe_pm.h" #include "xe_step.h" +#include "i915_gem.h" #include "i915_gem_stolen.h" #include "i915_gpu_error.h" #include "i915_reg_defs.h" diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_gem.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_gem.h new file mode 100644 index 000000000000..06b723a479c5 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_gem.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef __I915_GEM_H__ +#define __I915_GEM_H__ +#define GEM_BUG_ON +#endif -- cgit v1.2.3 From 08ea5ea2e890e8fbc9875294e6087179574a3057 Mon Sep 17 00:00:00 2001 From: Jouni Högander Date: Wed, 13 Sep 2023 12:54:12 +0300 Subject: drm/xe/display: Add Xe implementation for fence checks used by fbc code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Xe doesn't support legacy fences. Implement legacy fence and fence id checks accordingly. Signed-off-by: Jouni Högander Reviewed-by: Maarten Lankhorst Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h | 1 + drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h | 2 ++ drivers/gpu/drm/xe/compat-i915-headers/intel_gt_types.h | 11 +++++++++++ 3 files changed, 14 insertions(+) create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/intel_gt_types.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h index e5d1a4a3d8b4..5d2a77b52db4 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h @@ -24,6 +24,7 @@ #include "i915_gpu_error.h" #include "i915_reg_defs.h" #include "i915_utils.h" +#include "intel_gt_types.h" #include "intel_step.h" #include "intel_uc_fw.h" #include "intel_uncore.h" diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h index 88771f5e03cc..a20d2638ea7a 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h @@ -24,6 +24,8 @@ struct i915_vma { #define i915_ggtt_clear_scanout(bo) do { } while (0) +#define i915_vma_fence_id(vma) -1 + static inline u32 i915_ggtt_offset(const struct i915_vma *vma) { return vma->node.start; diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_gt_types.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_gt_types.h new file mode 100644 index 000000000000..c15806d6c4f7 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_gt_types.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef __INTEL_GT_TYPES__ +#define __INTEL_GT_TYPES__ + +#define intel_gt_support_legacy_fencing(gt) 0 + +#endif -- cgit v1.2.3 From c79802d100d1dd8b1748ea7dc232f5e059bdc7c5 Mon Sep 17 00:00:00 2001 From: Uma Shankar Date: Fri, 6 Oct 2023 17:26:45 +0530 Subject: drm/xe/display: Create a dummy version for vga decode This introduces an exclusive version of vga decode for xe. Rest of the display changes will be re-used from i915. Currently it adds just a dummy implementation. VGA decode needs to be handled correctly in i915, proper implementation will be adopted once the i915 changes are finalized and merged in upstream. v2: Addressed Arun's review comments Signed-off-by: Uma Shankar Reviewed-by: Arun R Murthy Acked-by: Jani Nikula Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/display/xe_display_misc.c | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) create mode 100644 drivers/gpu/drm/xe/display/xe_display_misc.c (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 2777cbf07cc6..41d92014a45c 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -152,6 +152,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \ display/xe_hdcp_gsc.o \ display/xe_plane_initial.o \ display/xe_display_rps.o \ + display/xe_display_misc.o \ display/intel_fbdev_fb.o \ display/intel_fb_bo.o \ display/ext/i915_irq.o \ diff --git a/drivers/gpu/drm/xe/display/xe_display_misc.c b/drivers/gpu/drm/xe/display/xe_display_misc.c new file mode 100644 index 000000000000..242c2ef4ca93 --- /dev/null +++ b/drivers/gpu/drm/xe/display/xe_display_misc.c @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include "intel_display_types.h" + +struct pci_dev; + +unsigned int intel_gmch_vga_set_decode(struct pci_dev *pdev, bool enable_decode); + +unsigned int intel_gmch_vga_set_decode(struct pci_dev *pdev, bool enable_decode) +{ + /* ToDo: Implement the actual handling of vga decode */ + return 0; +} -- cgit v1.2.3 From 95ab70f134d837a566f2d998b3090f40227a1b60 Mon Sep 17 00:00:00 2001 From: Suraj Kandpal Date: Mon, 16 Oct 2023 14:31:41 +0530 Subject: drm/xe/hdcp: Define intel_hdcp_gsc_check_status in Xe Define intel_hdcp_gsc_check_status in Xe to account for changes in i915 and Xe. intel_hdcp_check_status always returns false as gsc cs interface is not yet ported. intel_hdcp_gsc_cs_required always returns true as going forward gsc cs will always be required by upcoming platforms --v5 -Define intel_hdcp_gsc_cs_required() --v6 -Explain reasons for the return values [Chaitanya] Signed-off-by: Suraj Kandpal Reviewed-by: Chaitanya Kumar Borah Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/display/xe_hdcp_gsc.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c index 0453293af8ef..0f11a39333e2 100644 --- a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c +++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c @@ -6,6 +6,16 @@ #include "i915_drv.h" #include "intel_hdcp_gsc.h" +bool intel_hdcp_gsc_cs_required(struct drm_i915_private *i915) +{ + return true; +} + +bool intel_hdcp_gsc_check_status(struct drm_i915_private *i915) +{ + return false; +} + int intel_hdcp_gsc_init(struct drm_i915_private *i915) { drm_info(&i915->drm, "HDCP support not yet implemented\n"); -- cgit v1.2.3 From ff180adfb923b2619f6a46c5a369d833b543a9f1 Mon Sep 17 00:00:00 2001 From: Juha-Pekka Heikkila Date: Thu, 12 Oct 2023 16:52:08 +0300 Subject: drm/xe/display: Don't try to use vram if not available Trying to get bo from vram when vram not available will cause WARN_ON() hence avoid touching vram if not available. Signed-off-by: Juha-Pekka Heikkila Reviewed-by: Michael J. Ruhl Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/display/xe_fb_pin.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index 16e04b24daee..9fc2147a2f10 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -62,11 +62,12 @@ static int __xe_pin_fb_vma_dpt(struct intel_framebuffer *fb, dpt_size = ALIGN(intel_rotation_info_size(&view->rotated) * 8, XE_PAGE_SIZE); - dpt = xe_bo_create_pin_map(xe, tile0, NULL, dpt_size, - ttm_bo_type_kernel, - XE_BO_CREATE_VRAM0_BIT | - XE_BO_CREATE_GGTT_BIT); - if (IS_ERR(dpt)) + if (IS_DGFX(xe)) + dpt = xe_bo_create_pin_map(xe, tile0, NULL, dpt_size, + ttm_bo_type_kernel, + XE_BO_CREATE_VRAM0_BIT | + XE_BO_CREATE_GGTT_BIT); + else dpt = xe_bo_create_pin_map(xe, tile0, NULL, dpt_size, ttm_bo_type_kernel, XE_BO_CREATE_STOLEN_BIT | -- cgit v1.2.3 From 216d62bb241a73b43dc89f67cdb60304f032956c Mon Sep 17 00:00:00 2001 From: Juha-Pekka Heikkila Date: Thu, 12 Oct 2023 16:52:09 +0300 Subject: drm/xe/display: Add writing of remapped dpt Xe need to use remapped display page table for tiled framebuffers on anywhere else than DG2. Here add function to write such dpt and enable usage of remapped display page tables where needed. Signed-off-by: Juha-Pekka Heikkila Reviewed-by: Michael J. Ruhl Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/display/xe_fb_pin.c | 52 ++++++++++++++++++++++++++++++---- 1 file changed, 46 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index 9fc2147a2f10..722c84a56607 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -45,6 +45,37 @@ write_dpt_rotated(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs, u32 bo_ *dpt_ofs = ALIGN(*dpt_ofs, 4096); } +static void +write_dpt_remapped(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs, + u32 bo_ofs, u32 width, u32 height, u32 src_stride, + u32 dst_stride) +{ + struct xe_device *xe = xe_bo_device(bo); + struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt; + u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset, u16 pat_index) + = ggtt->pt_ops->pte_encode_bo; + u32 column, row; + + for (row = 0; row < height; row++) { + u32 src_idx = src_stride * row + bo_ofs; + + for (column = 0; column < width; column++) { + iosys_map_wr(map, *dpt_ofs, u64, + pte_encode_bo(bo, src_idx * XE_PAGE_SIZE, + xe->pat.idx[XE_CACHE_WB])); + + *dpt_ofs += 8; + src_idx++; + } + + /* The DE ignores the PTEs for the padding tiles */ + *dpt_ofs += (dst_stride - width) * 8; + } + + /* Align to next page */ + *dpt_ofs = ALIGN(*dpt_ofs, 4096); +} + static int __xe_pin_fb_vma_dpt(struct intel_framebuffer *fb, const struct i915_gtt_view *view, struct i915_vma *vma) @@ -57,6 +88,9 @@ static int __xe_pin_fb_vma_dpt(struct intel_framebuffer *fb, if (view->type == I915_GTT_VIEW_NORMAL) dpt_size = ALIGN(size / XE_PAGE_SIZE * 8, XE_PAGE_SIZE); + else if (view->type == I915_GTT_VIEW_REMAPPED) + dpt_size = ALIGN(intel_remapped_info_size(&fb->remapped_view.gtt.remapped) * 8, + XE_PAGE_SIZE); else /* display uses 4K tiles instead of bytes here, convert to entries.. */ dpt_size = ALIGN(intel_rotation_info_size(&view->rotated) * 8, @@ -89,6 +123,18 @@ static int __xe_pin_fb_vma_dpt(struct intel_framebuffer *fb, iosys_map_wr(&dpt->vmap, x * 8, u64, pte); } + } else if (view->type == I915_GTT_VIEW_REMAPPED) { + const struct intel_remapped_info *remap_info = &view->remapped; + u32 i, dpt_ofs = 0; + + for (i = 0; i < ARRAY_SIZE(remap_info->plane); i++) + write_dpt_remapped(bo, &dpt->vmap, &dpt_ofs, + remap_info->plane[i].offset, + remap_info->plane[i].width, + remap_info->plane[i].height, + remap_info->plane[i].src_stride, + remap_info->plane[i].dst_stride); + } else { const struct intel_rotation_info *rot_info = &view->rotated; u32 i, dpt_ofs = 0; @@ -212,12 +258,6 @@ static struct i915_vma *__xe_pin_fb_vma(struct intel_framebuffer *fb, if (!vma) return ERR_PTR(-ENODEV); - /* Remapped view is only required on ADL-P, which xe doesn't support. */ - if (XE_WARN_ON(view->type == I915_GTT_VIEW_REMAPPED)) { - ret = -ENODEV; - goto err; - } - if (IS_DGFX(to_xe_device(bo->ttm.base.dev)) && intel_fb_rc_ccs_cc_plane(&fb->base) >= 0 && !(bo->flags & XE_BO_NEEDS_CPU_ACCESS)) { -- cgit v1.2.3 From 3d78923bd07ad99a33b06eaa69194b35ac1637f1 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 15 Nov 2023 15:15:23 +0100 Subject: drm/xe/guc: Promote guc_to_gt/xe helpers to .h Duplicating these helpers in almost every .c file is a bad idea. Define them as inlines in .h file to allow proper reuse. Signed-off-by: Michal Wajdeczko Cc: Rodrigo Vivi Cc: Jani Nikula Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_pagefault.c | 6 ------ drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 6 ------ drivers/gpu/drm/xe/xe_guc.c | 12 ------------ drivers/gpu/drm/xe/xe_guc.h | 11 +++++++++++ drivers/gpu/drm/xe/xe_guc_debugfs.c | 12 ------------ drivers/gpu/drm/xe/xe_guc_hwconfig.c | 12 ------------ drivers/gpu/drm/xe/xe_guc_submit.c | 12 ------------ 7 files changed, 11 insertions(+), 60 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 9fcbf8773b8b..1e083dda0679 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -59,12 +59,6 @@ struct acc { u8 engine_instance; }; -static struct xe_gt * -guc_to_gt(struct xe_guc *guc) -{ - return container_of(guc, struct xe_gt, uc.guc); -} - static bool access_is_atomic(enum access_type access_type) { return access_type == ACCESS_TYPE_ATOMIC; diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index b5c39c55e1fa..a28f31c05b1b 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -13,12 +13,6 @@ #define TLB_TIMEOUT (HZ / 4) -static struct xe_gt * -guc_to_gt(struct xe_guc *guc) -{ - return container_of(guc, struct xe_gt, uc.guc); -} - static void xe_gt_tlb_fence_timeout(struct work_struct *work) { struct xe_gt *gt = container_of(work, struct xe_gt, diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 84f0b5488783..311c5d539423 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -27,18 +27,6 @@ #include "xe_wa.h" #include "xe_wopcm.h" -static struct xe_gt * -guc_to_gt(struct xe_guc *guc) -{ - return container_of(guc, struct xe_gt, uc.guc); -} - -static struct xe_device * -guc_to_xe(struct xe_guc *guc) -{ - return gt_to_xe(guc_to_gt(guc)); -} - /* GuC addresses above GUC_GGTT_TOP also don't map through the GTT */ #define GUC_GGTT_TOP 0xFEE00000 static u32 guc_bo_ggtt_addr(struct xe_guc *guc, diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h index 3addd8fc674a..d3e49e7fd7c3 100644 --- a/drivers/gpu/drm/xe/xe_guc.h +++ b/drivers/gpu/drm/xe/xe_guc.h @@ -6,6 +6,7 @@ #ifndef _XE_GUC_H_ #define _XE_GUC_H_ +#include "xe_gt.h" #include "xe_guc_types.h" #include "xe_hw_engine_types.h" #include "xe_macros.h" @@ -58,4 +59,14 @@ static inline u16 xe_engine_class_to_guc_class(enum xe_engine_class class) } } +static inline struct xe_gt *guc_to_gt(struct xe_guc *guc) +{ + return container_of(guc, struct xe_gt, uc.guc); +} + +static inline struct xe_device *guc_to_xe(struct xe_guc *guc) +{ + return gt_to_xe(guc_to_gt(guc)); +} + #endif diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.c b/drivers/gpu/drm/xe/xe_guc_debugfs.c index 167eb5593e03..ffd7d53bcc42 100644 --- a/drivers/gpu/drm/xe/xe_guc_debugfs.c +++ b/drivers/gpu/drm/xe/xe_guc_debugfs.c @@ -15,18 +15,6 @@ #include "xe_guc_log.h" #include "xe_macros.h" -static struct xe_gt * -guc_to_gt(struct xe_guc *guc) -{ - return container_of(guc, struct xe_gt, uc.guc); -} - -static struct xe_device * -guc_to_xe(struct xe_guc *guc) -{ - return gt_to_xe(guc_to_gt(guc)); -} - static struct xe_guc *node_to_guc(struct drm_info_node *node) { return node->info_ent->data; diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.c b/drivers/gpu/drm/xe/xe_guc_hwconfig.c index 76aed9c348ab..57d325ec8ce3 100644 --- a/drivers/gpu/drm/xe/xe_guc_hwconfig.c +++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.c @@ -13,18 +13,6 @@ #include "xe_guc.h" #include "xe_map.h" -static struct xe_gt * -guc_to_gt(struct xe_guc *guc) -{ - return container_of(guc, struct xe_gt, uc.guc); -} - -static struct xe_device * -guc_to_xe(struct xe_guc *guc) -{ - return gt_to_xe(guc_to_gt(guc)); -} - static int send_get_hwconfig(struct xe_guc *guc, u32 ggtt_addr, u32 size) { u32 action[] = { diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 8d5af11fb80d..b13c925c56ee 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -36,18 +36,6 @@ #include "xe_trace.h" #include "xe_vm.h" -static struct xe_gt * -guc_to_gt(struct xe_guc *guc) -{ - return container_of(guc, struct xe_gt, uc.guc); -} - -static struct xe_device * -guc_to_xe(struct xe_guc *guc) -{ - return gt_to_xe(guc_to_gt(guc)); -} - static struct xe_guc * exec_queue_to_guc(struct xe_exec_queue *q) { -- cgit v1.2.3 From de84aa96e4427125d00af1706b59584b2cbb0085 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Fri, 10 Nov 2023 15:41:50 +0000 Subject: drm/xe/uapi: Remove useless XE_QUERY_CONFIG_NUM_PARAM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit num_params can be used to retrieve the size of the info array for the specific version of the kernel being used. v2: Also remove XE_QUERY_CONFIG_NUM_PARAM (José Roberto de Souza) Signed-off-by: Francois Dugast Reviewed-by: José Roberto de Souza Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_query.c | 2 +- include/uapi/drm/xe_drm.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 10b9878ec95a..58fb06a63db2 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -305,7 +305,7 @@ static int query_memory_usage(struct xe_device *xe, static int query_config(struct xe_device *xe, struct drm_xe_device_query *query) { - u32 num_params = XE_QUERY_CONFIG_NUM_PARAM; + const u32 num_params = XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY + 1; size_t size = sizeof(struct drm_xe_query_config) + num_params * sizeof(u64); struct drm_xe_query_config __user *query_ptr = diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 9bd7092a7ea4..b9a68f8b69f3 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -329,7 +329,6 @@ struct drm_xe_query_config { #define XE_QUERY_CONFIG_GT_COUNT 4 #define XE_QUERY_CONFIG_MEM_REGION_COUNT 5 #define XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY 6 -#define XE_QUERY_CONFIG_NUM_PARAM (XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY + 1) /** @info: array of elements containing the config info */ __u64 info[]; }; -- cgit v1.2.3 From 1a912c90a278177423128e5b82673575821d0c35 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 10 Nov 2023 15:41:51 +0000 Subject: drm/xe/uapi: Remove GT_TYPE_REMOTE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the split between tile and gt, this is currently unused. Also it is bringing confusion because main vs remote would be more a concept of the tile itself and not about GT. So, the MAIN one is the traditional GT used for every operation in older platforms, and for render/graphics and compute on platforms that contains the stand-alone Media GT. Cc: Matt Roper Cc: Francois Dugast Cc: Carl Zhang Cc: José Roberto de Souza Signed-off-by: Rodrigo Vivi Signed-off-by: Francois Dugast Reviewed-by: José Roberto de Souza --- drivers/gpu/drm/xe/xe_query.c | 2 -- include/uapi/drm/xe_drm.h | 5 ++--- 2 files changed, 2 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 58fb06a63db2..cb3461971dc9 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -372,8 +372,6 @@ static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query for_each_gt(gt, xe, id) { if (xe_gt_is_media_type(gt)) gt_list->gt_list[id].type = XE_QUERY_GT_TYPE_MEDIA; - else if (gt_to_tile(gt)->id > 0) - gt_list->gt_list[id].type = XE_QUERY_GT_TYPE_REMOTE; else gt_list->gt_list[id].type = XE_QUERY_GT_TYPE_MAIN; gt_list->gt_list[id].gt_id = gt->info.id; diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index b9a68f8b69f3..8154cecf6f0d 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -343,9 +343,8 @@ struct drm_xe_query_config { */ struct drm_xe_query_gt { #define XE_QUERY_GT_TYPE_MAIN 0 -#define XE_QUERY_GT_TYPE_REMOTE 1 -#define XE_QUERY_GT_TYPE_MEDIA 2 - /** @type: GT type: Main, Remote, or Media */ +#define XE_QUERY_GT_TYPE_MEDIA 1 + /** @type: GT type: Main or Media */ __u16 type; /** @gt_id: Unique ID of this GT within the PCI Device */ __u16 gt_id; -- cgit v1.2.3 From ddfa2d6a846a571edb4dc6ed29d94b38558ae088 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 10 Nov 2023 15:41:52 +0000 Subject: drm/xe/uapi: Kill VM_MADVISE IOCTL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove unused IOCTL. Without any userspace using it we need to remove before we can be accepted upstream. At this point we are breaking the compatibility for good, so we don't need to break when we are in-tree. So, let's also use this breakage to sort out the IOCTL entries and fix all the small indentation and line issues. Signed-off-by: Rodrigo Vivi Signed-off-by: Francois Dugast Reviewed-by: José Roberto de Souza --- drivers/gpu/drm/xe/Makefile | 1 - drivers/gpu/drm/xe/xe_bo.c | 2 +- drivers/gpu/drm/xe/xe_bo_types.h | 3 + drivers/gpu/drm/xe/xe_device.c | 8 +- drivers/gpu/drm/xe/xe_vm_madvise.c | 299 ------------------------------------- drivers/gpu/drm/xe/xe_vm_madvise.h | 15 -- include/uapi/drm/xe_drm.h | 92 ++---------- 7 files changed, 18 insertions(+), 402 deletions(-) delete mode 100644 drivers/gpu/drm/xe/xe_vm_madvise.c delete mode 100644 drivers/gpu/drm/xe/xe_vm_madvise.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 41d92014a45c..a29b92080c85 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -115,7 +115,6 @@ xe-y += xe_bb.o \ xe_uc_debugfs.o \ xe_uc_fw.o \ xe_vm.o \ - xe_vm_madvise.o \ xe_wait_user_fence.o \ xe_wa.o \ xe_wopcm.o diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index c23a5694a788..5b5f764586fe 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1239,7 +1239,7 @@ struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, bo->props.preferred_mem_class = XE_BO_PROPS_INVALID; bo->props.preferred_gt = XE_BO_PROPS_INVALID; bo->props.preferred_mem_type = XE_BO_PROPS_INVALID; - bo->ttm.priority = DRM_XE_VMA_PRIORITY_NORMAL; + bo->ttm.priority = XE_BO_PRIORITY_NORMAL; INIT_LIST_HEAD(&bo->pinned_link); #ifdef CONFIG_PROC_FS INIT_LIST_HEAD(&bo->client_link); diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index 051fe990c133..4bff60996168 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -19,6 +19,9 @@ struct xe_vm; #define XE_BO_MAX_PLACEMENTS 3 +/* TODO: To be selected with VM_MADVISE */ +#define XE_BO_PRIORITY_NORMAL 1 + /** @xe_bo: XE buffer object */ struct xe_bo { /** @ttm: TTM base buffer object */ diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 1202f8007f79..8be765adf702 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -36,7 +36,6 @@ #include "xe_ttm_stolen_mgr.h" #include "xe_ttm_sys_mgr.h" #include "xe_vm.h" -#include "xe_vm_madvise.h" #include "xe_wait_user_fence.h" #include "xe_hwmon.h" @@ -117,18 +116,17 @@ static const struct drm_ioctl_desc xe_ioctls[] = { DRM_IOCTL_DEF_DRV(XE_VM_CREATE, xe_vm_create_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(XE_VM_DESTROY, xe_vm_destroy_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(XE_VM_BIND, xe_vm_bind_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_EXEC, xe_exec_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_CREATE, xe_exec_queue_create_ioctl, DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_GET_PROPERTY, xe_exec_queue_get_property_ioctl, - DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_DESTROY, xe_exec_queue_destroy_ioctl, DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(XE_EXEC, xe_exec_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_SET_PROPERTY, xe_exec_queue_set_property_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_GET_PROPERTY, xe_exec_queue_get_property_ioctl, + DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl, DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(XE_VM_MADVISE, xe_vm_madvise_ioctl, DRM_RENDER_ALLOW), }; static const struct file_operations xe_driver_fops = { diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c deleted file mode 100644 index 0ef7d483d050..000000000000 --- a/drivers/gpu/drm/xe/xe_vm_madvise.c +++ /dev/null @@ -1,299 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2021 Intel Corporation - */ - -#include "xe_vm_madvise.h" - -#include - -#include -#include - -#include "xe_bo.h" -#include "xe_vm.h" - -static int madvise_preferred_mem_class(struct xe_device *xe, struct xe_vm *vm, - struct xe_vma **vmas, int num_vmas, - u64 value) -{ - int i, err; - - if (XE_IOCTL_DBG(xe, value > XE_MEM_REGION_CLASS_VRAM)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, value == XE_MEM_REGION_CLASS_VRAM && - !xe->info.is_dgfx)) - return -EINVAL; - - for (i = 0; i < num_vmas; ++i) { - struct xe_bo *bo; - - bo = xe_vma_bo(vmas[i]); - - err = xe_bo_lock(bo, true); - if (err) - return err; - bo->props.preferred_mem_class = value; - xe_bo_placement_for_flags(xe, bo, bo->flags); - xe_bo_unlock(bo); - } - - return 0; -} - -static int madvise_preferred_gt(struct xe_device *xe, struct xe_vm *vm, - struct xe_vma **vmas, int num_vmas, u64 value) -{ - int i, err; - - if (XE_IOCTL_DBG(xe, value > xe->info.tile_count)) - return -EINVAL; - - for (i = 0; i < num_vmas; ++i) { - struct xe_bo *bo; - - bo = xe_vma_bo(vmas[i]); - - err = xe_bo_lock(bo, true); - if (err) - return err; - bo->props.preferred_gt = value; - xe_bo_placement_for_flags(xe, bo, bo->flags); - xe_bo_unlock(bo); - } - - return 0; -} - -static int madvise_preferred_mem_class_gt(struct xe_device *xe, - struct xe_vm *vm, - struct xe_vma **vmas, int num_vmas, - u64 value) -{ - int i, err; - u32 gt_id = upper_32_bits(value); - u32 mem_class = lower_32_bits(value); - - if (XE_IOCTL_DBG(xe, mem_class > XE_MEM_REGION_CLASS_VRAM)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, mem_class == XE_MEM_REGION_CLASS_VRAM && - !xe->info.is_dgfx)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, gt_id > xe->info.tile_count)) - return -EINVAL; - - for (i = 0; i < num_vmas; ++i) { - struct xe_bo *bo; - - bo = xe_vma_bo(vmas[i]); - - err = xe_bo_lock(bo, true); - if (err) - return err; - bo->props.preferred_mem_class = mem_class; - bo->props.preferred_gt = gt_id; - xe_bo_placement_for_flags(xe, bo, bo->flags); - xe_bo_unlock(bo); - } - - return 0; -} - -static int madvise_cpu_atomic(struct xe_device *xe, struct xe_vm *vm, - struct xe_vma **vmas, int num_vmas, u64 value) -{ - int i, err; - - for (i = 0; i < num_vmas; ++i) { - struct xe_bo *bo; - - bo = xe_vma_bo(vmas[i]); - if (XE_IOCTL_DBG(xe, !(bo->flags & XE_BO_CREATE_SYSTEM_BIT))) - return -EINVAL; - - err = xe_bo_lock(bo, true); - if (err) - return err; - bo->props.cpu_atomic = !!value; - - /* - * All future CPU accesses must be from system memory only, we - * just invalidate the CPU page tables which will trigger a - * migration on next access. - */ - if (bo->props.cpu_atomic) - ttm_bo_unmap_virtual(&bo->ttm); - xe_bo_unlock(bo); - } - - return 0; -} - -static int madvise_device_atomic(struct xe_device *xe, struct xe_vm *vm, - struct xe_vma **vmas, int num_vmas, u64 value) -{ - int i, err; - - for (i = 0; i < num_vmas; ++i) { - struct xe_bo *bo; - - bo = xe_vma_bo(vmas[i]); - if (XE_IOCTL_DBG(xe, !(bo->flags & XE_BO_CREATE_VRAM0_BIT) && - !(bo->flags & XE_BO_CREATE_VRAM1_BIT))) - return -EINVAL; - - err = xe_bo_lock(bo, true); - if (err) - return err; - bo->props.device_atomic = !!value; - xe_bo_unlock(bo); - } - - return 0; -} - -static int madvise_priority(struct xe_device *xe, struct xe_vm *vm, - struct xe_vma **vmas, int num_vmas, u64 value) -{ - int i, err; - - if (XE_IOCTL_DBG(xe, value > DRM_XE_VMA_PRIORITY_HIGH)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, value == DRM_XE_VMA_PRIORITY_HIGH && - !capable(CAP_SYS_NICE))) - return -EPERM; - - for (i = 0; i < num_vmas; ++i) { - struct xe_bo *bo; - - bo = xe_vma_bo(vmas[i]); - - err = xe_bo_lock(bo, true); - if (err) - return err; - bo->ttm.priority = value; - ttm_bo_move_to_lru_tail(&bo->ttm); - xe_bo_unlock(bo); - } - - return 0; -} - -static int madvise_pin(struct xe_device *xe, struct xe_vm *vm, - struct xe_vma **vmas, int num_vmas, u64 value) -{ - drm_warn(&xe->drm, "NIY"); - return 0; -} - -typedef int (*madvise_func)(struct xe_device *xe, struct xe_vm *vm, - struct xe_vma **vmas, int num_vmas, u64 value); - -static const madvise_func madvise_funcs[] = { - [DRM_XE_VM_MADVISE_PREFERRED_MEM_CLASS] = madvise_preferred_mem_class, - [DRM_XE_VM_MADVISE_PREFERRED_GT] = madvise_preferred_gt, - [DRM_XE_VM_MADVISE_PREFERRED_MEM_CLASS_GT] = - madvise_preferred_mem_class_gt, - [DRM_XE_VM_MADVISE_CPU_ATOMIC] = madvise_cpu_atomic, - [DRM_XE_VM_MADVISE_DEVICE_ATOMIC] = madvise_device_atomic, - [DRM_XE_VM_MADVISE_PRIORITY] = madvise_priority, - [DRM_XE_VM_MADVISE_PIN] = madvise_pin, -}; - -static struct xe_vma ** -get_vmas(struct xe_vm *vm, int *num_vmas, u64 addr, u64 range) -{ - struct xe_vma **vmas, **__vmas; - struct drm_gpuva *gpuva; - int max_vmas = 8; - - lockdep_assert_held(&vm->lock); - - vmas = kmalloc(max_vmas * sizeof(*vmas), GFP_KERNEL); - if (!vmas) - return NULL; - - drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, addr, addr + range) { - struct xe_vma *vma = gpuva_to_vma(gpuva); - - if (xe_vma_is_userptr(vma)) - continue; - - if (*num_vmas == max_vmas) { - max_vmas <<= 1; - __vmas = krealloc(vmas, max_vmas * sizeof(*vmas), - GFP_KERNEL); - if (!__vmas) - return NULL; - vmas = __vmas; - } - - vmas[*num_vmas] = vma; - *num_vmas += 1; - } - - return vmas; -} - -int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct xe_device *xe = to_xe_device(dev); - struct xe_file *xef = to_xe_file(file); - struct drm_xe_vm_madvise *args = data; - struct xe_vm *vm; - struct xe_vma **vmas = NULL; - int num_vmas = 0, err = 0, idx; - - if (XE_IOCTL_DBG(xe, args->extensions) || - XE_IOCTL_DBG(xe, args->pad || args->pad2) || - XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, args->property > ARRAY_SIZE(madvise_funcs))) - return -EINVAL; - - vm = xe_vm_lookup(xef, args->vm_id); - if (XE_IOCTL_DBG(xe, !vm)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, !xe_vm_in_fault_mode(vm))) { - err = -EINVAL; - goto put_vm; - } - - down_read(&vm->lock); - - if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { - err = -ENOENT; - goto unlock_vm; - } - - vmas = get_vmas(vm, &num_vmas, args->addr, args->range); - if (XE_IOCTL_DBG(xe, err)) - goto unlock_vm; - - if (XE_IOCTL_DBG(xe, !vmas)) { - err = -ENOMEM; - goto unlock_vm; - } - - if (XE_IOCTL_DBG(xe, !num_vmas)) { - err = -EINVAL; - goto unlock_vm; - } - - idx = array_index_nospec(args->property, ARRAY_SIZE(madvise_funcs)); - err = madvise_funcs[idx](xe, vm, vmas, num_vmas, args->value); - -unlock_vm: - up_read(&vm->lock); -put_vm: - xe_vm_put(vm); - kfree(vmas); - return err; -} diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.h b/drivers/gpu/drm/xe/xe_vm_madvise.h deleted file mode 100644 index eecd33acd248..000000000000 --- a/drivers/gpu/drm/xe/xe_vm_madvise.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2021 Intel Corporation - */ - -#ifndef _XE_VM_MADVISE_H_ -#define _XE_VM_MADVISE_H_ - -struct drm_device; -struct drm_file; - -int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, - struct drm_file *file); - -#endif diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 8154cecf6f0d..808d92262bcd 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -103,28 +103,26 @@ struct xe_user_extension { #define DRM_XE_VM_CREATE 0x03 #define DRM_XE_VM_DESTROY 0x04 #define DRM_XE_VM_BIND 0x05 -#define DRM_XE_EXEC_QUEUE_CREATE 0x06 -#define DRM_XE_EXEC_QUEUE_DESTROY 0x07 -#define DRM_XE_EXEC 0x08 +#define DRM_XE_EXEC 0x06 +#define DRM_XE_EXEC_QUEUE_CREATE 0x07 +#define DRM_XE_EXEC_QUEUE_DESTROY 0x08 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY 0x09 -#define DRM_XE_WAIT_USER_FENCE 0x0a -#define DRM_XE_VM_MADVISE 0x0b -#define DRM_XE_EXEC_QUEUE_GET_PROPERTY 0x0c - +#define DRM_XE_EXEC_QUEUE_GET_PROPERTY 0x0a +#define DRM_XE_WAIT_USER_FENCE 0x0b /* Must be kept compact -- no holes */ + #define DRM_IOCTL_XE_DEVICE_QUERY DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_DEVICE_QUERY, struct drm_xe_device_query) #define DRM_IOCTL_XE_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_GEM_CREATE, struct drm_xe_gem_create) #define DRM_IOCTL_XE_GEM_MMAP_OFFSET DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_GEM_MMAP_OFFSET, struct drm_xe_gem_mmap_offset) #define DRM_IOCTL_XE_VM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_VM_CREATE, struct drm_xe_vm_create) -#define DRM_IOCTL_XE_VM_DESTROY DRM_IOW(DRM_COMMAND_BASE + DRM_XE_VM_DESTROY, struct drm_xe_vm_destroy) -#define DRM_IOCTL_XE_VM_BIND DRM_IOW(DRM_COMMAND_BASE + DRM_XE_VM_BIND, struct drm_xe_vm_bind) +#define DRM_IOCTL_XE_VM_DESTROY DRM_IOW(DRM_COMMAND_BASE + DRM_XE_VM_DESTROY, struct drm_xe_vm_destroy) +#define DRM_IOCTL_XE_VM_BIND DRM_IOW(DRM_COMMAND_BASE + DRM_XE_VM_BIND, struct drm_xe_vm_bind) +#define DRM_IOCTL_XE_EXEC DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC, struct drm_xe_exec) #define DRM_IOCTL_XE_EXEC_QUEUE_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_CREATE, struct drm_xe_exec_queue_create) +#define DRM_IOCTL_XE_EXEC_QUEUE_DESTROY DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_DESTROY, struct drm_xe_exec_queue_destroy) +#define DRM_IOCTL_XE_EXEC_QUEUE_SET_PROPERTY DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_SET_PROPERTY, struct drm_xe_exec_queue_set_property) #define DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_GET_PROPERTY, struct drm_xe_exec_queue_get_property) -#define DRM_IOCTL_XE_EXEC_QUEUE_DESTROY DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_DESTROY, struct drm_xe_exec_queue_destroy) -#define DRM_IOCTL_XE_EXEC DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC, struct drm_xe_exec) -#define DRM_IOCTL_XE_EXEC_QUEUE_SET_PROPERTY DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_SET_PROPERTY, struct drm_xe_exec_queue_set_property) #define DRM_IOCTL_XE_WAIT_USER_FENCE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence) -#define DRM_IOCTL_XE_VM_MADVISE DRM_IOW(DRM_COMMAND_BASE + DRM_XE_VM_MADVISE, struct drm_xe_vm_madvise) /** struct drm_xe_engine_class_instance - instance of an engine class */ struct drm_xe_engine_class_instance { @@ -978,74 +976,6 @@ struct drm_xe_wait_user_fence { __u64 reserved[2]; }; -struct drm_xe_vm_madvise { - /** @extensions: Pointer to the first extension struct, if any */ - __u64 extensions; - - /** @vm_id: The ID VM in which the VMA exists */ - __u32 vm_id; - - /** @pad: MBZ */ - __u32 pad; - - /** @range: Number of bytes in the VMA */ - __u64 range; - - /** @addr: Address of the VMA to operation on */ - __u64 addr; - - /* - * Setting the preferred location will trigger a migrate of the VMA - * backing store to new location if the backing store is already - * allocated. - * - * For DRM_XE_VM_MADVISE_PREFERRED_MEM_CLASS usage, see enum - * drm_xe_memory_class. - */ -#define DRM_XE_VM_MADVISE_PREFERRED_MEM_CLASS 0 -#define DRM_XE_VM_MADVISE_PREFERRED_GT 1 - /* - * In this case lower 32 bits are mem class, upper 32 are GT. - * Combination provides a single IOCTL plus migrate VMA to preferred - * location. - */ -#define DRM_XE_VM_MADVISE_PREFERRED_MEM_CLASS_GT 2 - /* - * The CPU will do atomic memory operations to this VMA. Must be set on - * some devices for atomics to behave correctly. - */ -#define DRM_XE_VM_MADVISE_CPU_ATOMIC 3 - /* - * The device will do atomic memory operations to this VMA. Must be set - * on some devices for atomics to behave correctly. - */ -#define DRM_XE_VM_MADVISE_DEVICE_ATOMIC 4 - /* - * Priority WRT to eviction (moving from preferred memory location due - * to memory pressure). The lower the priority, the more likely to be - * evicted. - */ -#define DRM_XE_VM_MADVISE_PRIORITY 5 -#define DRM_XE_VMA_PRIORITY_LOW 0 - /* Default */ -#define DRM_XE_VMA_PRIORITY_NORMAL 1 - /* Must be user with elevated privileges */ -#define DRM_XE_VMA_PRIORITY_HIGH 2 - /* Pin the VMA in memory, must be user with elevated privileges */ -#define DRM_XE_VM_MADVISE_PIN 6 - /** @property: property to set */ - __u32 property; - - /** @pad2: MBZ */ - __u32 pad2; - - /** @value: property value */ - __u64 value; - - /** @reserved: Reserved */ - __u64 reserved[2]; -}; - /** * DOC: XE PMU event config IDs * -- cgit v1.2.3 From 4195e5e5e3d544a90a1edac1e21cd53a5117bd1f Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Fri, 10 Nov 2023 15:41:54 +0000 Subject: drm/xe/uapi: Remove unused QUERY_CONFIG_MEM_REGION_COUNT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As part of uAPI cleanup, remove this constant which is not used. Memory regions can be queried with DRM_XE_DEVICE_QUERY_MEM_USAGE. Signed-off-by: Francois Dugast Reviewed-by: José Roberto de Souza Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_query.c | 2 -- include/uapi/drm/xe_drm.h | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index cb3461971dc9..b5cd980f81f9 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -333,8 +333,6 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query) xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K; config->info[XE_QUERY_CONFIG_VA_BITS] = xe->info.va_bits; config->info[XE_QUERY_CONFIG_GT_COUNT] = xe->info.gt_count; - config->info[XE_QUERY_CONFIG_MEM_REGION_COUNT] = - hweight_long(xe->info.mem_region_mask); config->info[XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY] = xe_exec_queue_device_get_max_priority(xe); diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 0f8c5afd3584..1ac9ae0591de 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -311,6 +311,7 @@ struct drm_xe_query_mem_usage { * If a query is made with a struct drm_xe_device_query where .query * is equal to DRM_XE_DEVICE_QUERY_CONFIG, then the reply uses * struct drm_xe_query_config in .data. + * */ struct drm_xe_query_config { /** @num_params: number of parameters returned in info */ @@ -325,8 +326,7 @@ struct drm_xe_query_config { #define XE_QUERY_CONFIG_MIN_ALIGNMENT 2 #define XE_QUERY_CONFIG_VA_BITS 3 #define XE_QUERY_CONFIG_GT_COUNT 4 -#define XE_QUERY_CONFIG_MEM_REGION_COUNT 5 -#define XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY 6 +#define XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY 5 /** @info: array of elements containing the config info */ __u64 info[]; }; -- cgit v1.2.3 From 60f3c7fc5c2464f73a7d64a4cc2dd4707a0d1831 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Fri, 10 Nov 2023 15:41:55 +0000 Subject: drm/xe/uapi: Remove unused QUERY_CONFIG_GT_COUNT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As part of uAPI cleanup, remove this constant which is not used. Number of GTs are provided as num_gt in drm_xe_query_gt_list. Signed-off-by: Francois Dugast Reviewed-by: José Roberto de Souza Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_query.c | 1 - include/uapi/drm/xe_drm.h | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index b5cd980f81f9..e9c8c97a030f 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -332,7 +332,6 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query) config->info[XE_QUERY_CONFIG_MIN_ALIGNMENT] = xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K; config->info[XE_QUERY_CONFIG_VA_BITS] = xe->info.va_bits; - config->info[XE_QUERY_CONFIG_GT_COUNT] = xe->info.gt_count; config->info[XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY] = xe_exec_queue_device_get_max_priority(xe); diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 1ac9ae0591de..097d045d0444 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -325,8 +325,7 @@ struct drm_xe_query_config { #define XE_QUERY_CONFIG_FLAGS_HAS_VRAM (0x1 << 0) #define XE_QUERY_CONFIG_MIN_ALIGNMENT 2 #define XE_QUERY_CONFIG_VA_BITS 3 -#define XE_QUERY_CONFIG_GT_COUNT 4 -#define XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY 5 +#define XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY 4 /** @info: array of elements containing the config info */ __u64 info[]; }; -- cgit v1.2.3 From be13336e07b5cc26c8b971a50ff6dc60d7050417 Mon Sep 17 00:00:00 2001 From: Aravind Iddamsetty Date: Fri, 10 Nov 2023 15:41:56 +0000 Subject: drm/xe/pmu: Drop interrupt pmu event Drop interrupt event from PMU as that is not useful and not being used by any UMD. Cc: Rodrigo Vivi Cc: Tvrtko Ursulin Cc: Francois Dugast Signed-off-by: Aravind Iddamsetty Reviewed-by: Francois Dugast Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_irq.c | 18 ------------------ drivers/gpu/drm/xe/xe_pmu.c | 19 +++++-------------- drivers/gpu/drm/xe/xe_pmu_types.h | 8 -------- include/uapi/drm/xe_drm.h | 13 ++++++------- 4 files changed, 11 insertions(+), 47 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index c5315e02fc5b..25ba5167c1b9 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -27,20 +27,6 @@ #define IIR(offset) XE_REG(offset + 0x8) #define IER(offset) XE_REG(offset + 0xc) -/* - * Interrupt statistic for PMU. Increments the counter only if the - * interrupt originated from the GPU so interrupts from a device which - * shares the interrupt line are not accounted. - */ -static __always_inline void xe_pmu_irq_stats(struct xe_device *xe) -{ - /* - * A clever compiler translates that into INC. A not so clever one - * should at least prevent store tearing. - */ - WRITE_ONCE(xe->pmu.irq_count, xe->pmu.irq_count + 1); -} - static void assert_iir_is_zero(struct xe_gt *mmio, struct xe_reg reg) { u32 val = xe_mmio_read32(mmio, reg); @@ -360,8 +346,6 @@ static irqreturn_t xelp_irq_handler(int irq, void *arg) xe_display_irq_enable(xe, gu_misc_iir); - xe_pmu_irq_stats(xe); - return IRQ_HANDLED; } @@ -458,8 +442,6 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg) dg1_intr_enable(xe, false); xe_display_irq_enable(xe, gu_misc_iir); - xe_pmu_irq_stats(xe); - return IRQ_HANDLED; } diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c index abfc0b3aeac4..b843259578fd 100644 --- a/drivers/gpu/drm/xe/xe_pmu.c +++ b/drivers/gpu/drm/xe/xe_pmu.c @@ -61,7 +61,7 @@ static u64 __engine_group_busyness_read(struct xe_gt *gt, int sample_type) static u64 engine_group_busyness_read(struct xe_gt *gt, u64 config) { - int sample_type = config_counter(config) - 1; + int sample_type = config_counter(config); const unsigned int gt_id = gt->info.id; struct xe_device *xe = gt->tile->xe; struct xe_pmu *pmu = &xe->pmu; @@ -114,10 +114,6 @@ config_status(struct xe_device *xe, u64 config) return -ENOENT; switch (config_counter(config)) { - case XE_PMU_INTERRUPTS(0): - if (gt_id) - return -ENOENT; - break; case XE_PMU_RENDER_GROUP_BUSY(0): case XE_PMU_COPY_GROUP_BUSY(0): case XE_PMU_ANY_ENGINE_GROUP_BUSY(0): @@ -181,13 +177,9 @@ static u64 __xe_pmu_event_read(struct perf_event *event) const unsigned int gt_id = config_gt_id(event->attr.config); const u64 config = event->attr.config; struct xe_gt *gt = xe_device_get_gt(xe, gt_id); - struct xe_pmu *pmu = &xe->pmu; u64 val; switch (config_counter(config)) { - case XE_PMU_INTERRUPTS(0): - val = READ_ONCE(pmu->irq_count); - break; case XE_PMU_RENDER_GROUP_BUSY(0): case XE_PMU_COPY_GROUP_BUSY(0): case XE_PMU_ANY_ENGINE_GROUP_BUSY(0): @@ -361,11 +353,10 @@ create_event_attributes(struct xe_pmu *pmu) const char *unit; bool global; } events[] = { - __global_event(0, "interrupts", NULL), - __event(1, "render-group-busy", "ns"), - __event(2, "copy-group-busy", "ns"), - __event(3, "media-group-busy", "ns"), - __event(4, "any-engine-group-busy", "ns"), + __event(0, "render-group-busy", "ns"), + __event(1, "copy-group-busy", "ns"), + __event(2, "media-group-busy", "ns"), + __event(3, "any-engine-group-busy", "ns"), }; struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter; diff --git a/drivers/gpu/drm/xe/xe_pmu_types.h b/drivers/gpu/drm/xe/xe_pmu_types.h index 4ccc7e9042f6..9cadbd243f57 100644 --- a/drivers/gpu/drm/xe/xe_pmu_types.h +++ b/drivers/gpu/drm/xe/xe_pmu_types.h @@ -51,14 +51,6 @@ struct xe_pmu { * */ u64 sample[XE_PMU_MAX_GT][__XE_NUM_PMU_SAMPLERS]; - /** - * @irq_count: Number of interrupts - * - * Intentionally unsigned long to avoid atomics or heuristics on 32bit. - * 4e9 interrupts are a lot and postprocessing can really deal with an - * occasional wraparound easily. It's 32bit after all. - */ - unsigned long irq_count; /** * @events_attr_group: Device events attribute group. */ diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 097d045d0444..e007dbefd627 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -977,7 +977,7 @@ struct drm_xe_wait_user_fence { * in 'struct perf_event_attr' as part of perf_event_open syscall to read a * particular event. * - * For example to open the XE_PMU_INTERRUPTS(0): + * For example to open the XE_PMU_RENDER_GROUP_BUSY(0): * * .. code-block:: C * @@ -991,7 +991,7 @@ struct drm_xe_wait_user_fence { * attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED; * attr.use_clockid = 1; * attr.clockid = CLOCK_MONOTONIC; - * attr.config = XE_PMU_INTERRUPTS(0); + * attr.config = XE_PMU_RENDER_GROUP_BUSY(0); * * fd = syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0); */ @@ -1004,11 +1004,10 @@ struct drm_xe_wait_user_fence { #define ___XE_PMU_OTHER(gt, x) \ (((__u64)(x)) | ((__u64)(gt) << __XE_PMU_GT_SHIFT)) -#define XE_PMU_INTERRUPTS(gt) ___XE_PMU_OTHER(gt, 0) -#define XE_PMU_RENDER_GROUP_BUSY(gt) ___XE_PMU_OTHER(gt, 1) -#define XE_PMU_COPY_GROUP_BUSY(gt) ___XE_PMU_OTHER(gt, 2) -#define XE_PMU_MEDIA_GROUP_BUSY(gt) ___XE_PMU_OTHER(gt, 3) -#define XE_PMU_ANY_ENGINE_GROUP_BUSY(gt) ___XE_PMU_OTHER(gt, 4) +#define XE_PMU_RENDER_GROUP_BUSY(gt) ___XE_PMU_OTHER(gt, 0) +#define XE_PMU_COPY_GROUP_BUSY(gt) ___XE_PMU_OTHER(gt, 1) +#define XE_PMU_MEDIA_GROUP_BUSY(gt) ___XE_PMU_OTHER(gt, 2) +#define XE_PMU_ANY_ENGINE_GROUP_BUSY(gt) ___XE_PMU_OTHER(gt, 3) #if defined(__cplusplus) } -- cgit v1.2.3 From b646ce9ce99f74d3dee8fd56303b9255d3c278ec Mon Sep 17 00:00:00 2001 From: Brian Welty Date: Mon, 13 Nov 2023 16:49:43 -0800 Subject: drm/xe: Make xe_mmio_tile_vram_size() static During xe_mmio_probe_vram(), we already store the values returned from xe_mmio_tile_vram_size() into the xe_tile structures. There is no need to call xe_mmio_tile_vram_size() again later during setup of the STOLEN region. Just use the values stored in the root tile. Signed-off-by: Brian Welty Reviewed-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_mmio.c | 3 ++- drivers/gpu/drm/xe/xe_mmio.h | 1 - drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 7 ++----- 3 files changed, 4 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index d8f9fabf715e..0baaef53f3a7 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -201,7 +201,8 @@ static int xe_determine_lmem_bar_size(struct xe_device *xe) * NOTE: multi-tile bases will include the tile offset. * */ -int xe_mmio_tile_vram_size(struct xe_tile *tile, u64 *vram_size, u64 *tile_size, u64 *tile_offset) +static int xe_mmio_tile_vram_size(struct xe_tile *tile, u64 *vram_size, + u64 *tile_size, u64 *tile_offset) { struct xe_device *xe = tile_to_xe(tile); struct xe_gt *gt = tile->primary_gt; diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h index 24a23dad7dce..b244e9063caa 100644 --- a/drivers/gpu/drm/xe/xe_mmio.h +++ b/drivers/gpu/drm/xe/xe_mmio.h @@ -135,7 +135,6 @@ static inline bool xe_mmio_in_range(const struct xe_gt *gt, } int xe_mmio_probe_vram(struct xe_device *xe); -int xe_mmio_tile_vram_size(struct xe_tile *tile, u64 *vram_size, u64 *tile_size, u64 *tile_base); u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg); #endif diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index 0c533d36791d..837b522cb91f 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -62,12 +62,9 @@ static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) u64 stolen_size; u64 tile_offset; u64 tile_size; - u64 vram_size; - if (xe_mmio_tile_vram_size(tile, &vram_size, &tile_size, &tile_offset)) { - drm_err(&xe->drm, "Querying total vram size failed\n"); - return 0; - } + tile_offset = tile->mem.vram.io_start - xe->mem.vram.io_start; + tile_size = tile->mem.vram.actual_physical_size; /* Use DSM base address instead for stolen memory */ mgr->stolen_base = (xe_mmio_read64_2x32(mmio, DSMBASE) & BDSM_MASK) - tile_offset; -- cgit v1.2.3 From d5dc73dbd148ef38dbe35f18d2908d2ff343c208 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Tue, 14 Nov 2023 13:34:27 +0000 Subject: drm/xe/uapi: Add missing DRM_ prefix in uAPI constants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Most constants defined in xe_drm.h use DRM_XE_ as prefix which is helpful to identify the name space. Make this systematic and add this prefix where it was missing. v2: - fix vertical alignment of define values - remove double DRM_ in some variables (José Roberto de Souza) v3: Rebase Signed-off-by: Francois Dugast Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 14 ++--- drivers/gpu/drm/xe/xe_exec_queue.c | 22 +++---- drivers/gpu/drm/xe/xe_gt.c | 2 +- drivers/gpu/drm/xe/xe_pmu.c | 24 +++---- drivers/gpu/drm/xe/xe_query.c | 28 ++++----- drivers/gpu/drm/xe/xe_vm.c | 54 ++++++++-------- drivers/gpu/drm/xe/xe_vm_doc.h | 12 ++-- include/uapi/drm/xe_drm.h | 124 ++++++++++++++++++------------------- 8 files changed, 140 insertions(+), 140 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 5b5f764586fe..e8c89b6e06dc 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -209,7 +209,7 @@ static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo, /* The order of placements should indicate preferred location */ - if (bo->props.preferred_mem_class == XE_MEM_REGION_CLASS_SYSMEM) { + if (bo->props.preferred_mem_class == DRM_XE_MEM_REGION_CLASS_SYSMEM) { try_add_system(bo, places, bo_flags, &c); try_add_vram(xe, bo, places, bo_flags, &c); } else { @@ -1814,9 +1814,9 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, return -EINVAL; if (XE_IOCTL_DBG(xe, args->flags & - ~(XE_GEM_CREATE_FLAG_DEFER_BACKING | - XE_GEM_CREATE_FLAG_SCANOUT | - XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM | + ~(DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING | + DRM_XE_GEM_CREATE_FLAG_SCANOUT | + DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM | xe->info.mem_region_mask))) return -EINVAL; @@ -1836,15 +1836,15 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, args->size & ~PAGE_MASK)) return -EINVAL; - if (args->flags & XE_GEM_CREATE_FLAG_DEFER_BACKING) + if (args->flags & DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING) bo_flags |= XE_BO_DEFER_BACKING; - if (args->flags & XE_GEM_CREATE_FLAG_SCANOUT) + if (args->flags & DRM_XE_GEM_CREATE_FLAG_SCANOUT) bo_flags |= XE_BO_SCANOUT_BIT; bo_flags |= args->flags << (ffs(XE_BO_CREATE_SYSTEM_BIT) - 1); - if (args->flags & XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM) { + if (args->flags & DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM) { if (XE_IOCTL_DBG(xe, !(bo_flags & XE_BO_CREATE_VRAM_MASK))) return -EINVAL; diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index f67a6dee4a6f..fbb4d3cca9f6 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -393,7 +393,7 @@ static int exec_queue_set_acc_granularity(struct xe_device *xe, struct xe_exec_q if (XE_IOCTL_DBG(xe, !xe->info.supports_usm)) return -EINVAL; - if (value > XE_ACC_GRANULARITY_64M) + if (value > DRM_XE_ACC_GRANULARITY_64M) return -EINVAL; q->usm.acc_granularity = value; @@ -406,14 +406,14 @@ typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe, u64 value, bool create); static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = { - [XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority, - [XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice, - [XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT] = exec_queue_set_preemption_timeout, - [XE_EXEC_QUEUE_SET_PROPERTY_PERSISTENCE] = exec_queue_set_persistence, - [XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT] = exec_queue_set_job_timeout, - [XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER] = exec_queue_set_acc_trigger, - [XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY] = exec_queue_set_acc_notify, - [XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY] = exec_queue_set_acc_granularity, + [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority, + [DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice, + [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT] = exec_queue_set_preemption_timeout, + [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PERSISTENCE] = exec_queue_set_persistence, + [DRM_XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT] = exec_queue_set_job_timeout, + [DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER] = exec_queue_set_acc_trigger, + [DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY] = exec_queue_set_acc_notify, + [DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY] = exec_queue_set_acc_granularity, }; static int exec_queue_user_ext_set_property(struct xe_device *xe, @@ -445,7 +445,7 @@ typedef int (*xe_exec_queue_user_extension_fn)(struct xe_device *xe, bool create); static const xe_exec_queue_set_property_fn exec_queue_user_extension_funcs[] = { - [XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY] = exec_queue_user_ext_set_property, + [DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY] = exec_queue_user_ext_set_property, }; #define MAX_USER_EXTENSIONS 16 @@ -764,7 +764,7 @@ int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data, return -ENOENT; switch (args->property) { - case XE_EXEC_QUEUE_GET_PROPERTY_BAN: + case DRM_XE_EXEC_QUEUE_GET_PROPERTY_BAN: args->value = !!(q->flags & EXEC_QUEUE_FLAG_BANNED); ret = 0; break; diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 6c885dde5d59..53b39fe91601 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -560,7 +560,7 @@ static void xe_uevent_gt_reset_failure(struct pci_dev *pdev, u8 tile_id, u8 gt_i { char *reset_event[4]; - reset_event[0] = XE_RESET_FAILED_UEVENT "=NEEDS_RESET"; + reset_event[0] = DRM_XE_RESET_FAILED_UEVENT "=NEEDS_RESET"; reset_event[1] = kasprintf(GFP_KERNEL, "TILE_ID=%d", tile_id); reset_event[2] = kasprintf(GFP_KERNEL, "GT_ID=%d", gt_id); reset_event[3] = NULL; diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c index b843259578fd..9d0b7887cfc4 100644 --- a/drivers/gpu/drm/xe/xe_pmu.c +++ b/drivers/gpu/drm/xe/xe_pmu.c @@ -17,12 +17,12 @@ static unsigned int xe_pmu_target_cpu = -1; static unsigned int config_gt_id(const u64 config) { - return config >> __XE_PMU_GT_SHIFT; + return config >> __DRM_XE_PMU_GT_SHIFT; } static u64 config_counter(const u64 config) { - return config & ~(~0ULL << __XE_PMU_GT_SHIFT); + return config & ~(~0ULL << __DRM_XE_PMU_GT_SHIFT); } static void xe_pmu_event_destroy(struct perf_event *event) @@ -114,13 +114,13 @@ config_status(struct xe_device *xe, u64 config) return -ENOENT; switch (config_counter(config)) { - case XE_PMU_RENDER_GROUP_BUSY(0): - case XE_PMU_COPY_GROUP_BUSY(0): - case XE_PMU_ANY_ENGINE_GROUP_BUSY(0): + case DRM_XE_PMU_RENDER_GROUP_BUSY(0): + case DRM_XE_PMU_COPY_GROUP_BUSY(0): + case DRM_XE_PMU_ANY_ENGINE_GROUP_BUSY(0): if (gt->info.type == XE_GT_TYPE_MEDIA) return -ENOENT; break; - case XE_PMU_MEDIA_GROUP_BUSY(0): + case DRM_XE_PMU_MEDIA_GROUP_BUSY(0): if (!(gt->info.engine_mask & (BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VECS0)))) return -ENOENT; break; @@ -180,10 +180,10 @@ static u64 __xe_pmu_event_read(struct perf_event *event) u64 val; switch (config_counter(config)) { - case XE_PMU_RENDER_GROUP_BUSY(0): - case XE_PMU_COPY_GROUP_BUSY(0): - case XE_PMU_ANY_ENGINE_GROUP_BUSY(0): - case XE_PMU_MEDIA_GROUP_BUSY(0): + case DRM_XE_PMU_RENDER_GROUP_BUSY(0): + case DRM_XE_PMU_COPY_GROUP_BUSY(0): + case DRM_XE_PMU_ANY_ENGINE_GROUP_BUSY(0): + case DRM_XE_PMU_MEDIA_GROUP_BUSY(0): val = engine_group_busyness_read(gt, config); break; default: @@ -369,7 +369,7 @@ create_event_attributes(struct xe_pmu *pmu) /* Count how many counters we will be exposing. */ for_each_gt(gt, xe, j) { for (i = 0; i < ARRAY_SIZE(events); i++) { - u64 config = ___XE_PMU_OTHER(j, events[i].counter); + u64 config = ___DRM_XE_PMU_OTHER(j, events[i].counter); if (!config_status(xe, config)) count++; @@ -396,7 +396,7 @@ create_event_attributes(struct xe_pmu *pmu) for_each_gt(gt, xe, j) { for (i = 0; i < ARRAY_SIZE(events); i++) { - u64 config = ___XE_PMU_OTHER(j, events[i].counter); + u64 config = ___DRM_XE_PMU_OTHER(j, events[i].counter); char *str; if (config_status(xe, config)) diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index e9c8c97a030f..565a716302bb 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -261,7 +261,7 @@ static int query_memory_usage(struct xe_device *xe, return -ENOMEM; man = ttm_manager_type(&xe->ttm, XE_PL_TT); - usage->regions[0].mem_class = XE_MEM_REGION_CLASS_SYSMEM; + usage->regions[0].mem_class = DRM_XE_MEM_REGION_CLASS_SYSMEM; usage->regions[0].instance = 0; usage->regions[0].min_page_size = PAGE_SIZE; usage->regions[0].total_size = man->size << PAGE_SHIFT; @@ -273,7 +273,7 @@ static int query_memory_usage(struct xe_device *xe, man = ttm_manager_type(&xe->ttm, i); if (man) { usage->regions[usage->num_regions].mem_class = - XE_MEM_REGION_CLASS_VRAM; + DRM_XE_MEM_REGION_CLASS_VRAM; usage->regions[usage->num_regions].instance = usage->num_regions; usage->regions[usage->num_regions].min_page_size = @@ -305,7 +305,7 @@ static int query_memory_usage(struct xe_device *xe, static int query_config(struct xe_device *xe, struct drm_xe_device_query *query) { - const u32 num_params = XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY + 1; + const u32 num_params = DRM_XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY + 1; size_t size = sizeof(struct drm_xe_query_config) + num_params * sizeof(u64); struct drm_xe_query_config __user *query_ptr = @@ -324,15 +324,15 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query) return -ENOMEM; config->num_params = num_params; - config->info[XE_QUERY_CONFIG_REV_AND_DEVICE_ID] = + config->info[DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID] = xe->info.devid | (xe->info.revid << 16); if (xe_device_get_root_tile(xe)->mem.vram.usable_size) - config->info[XE_QUERY_CONFIG_FLAGS] = - XE_QUERY_CONFIG_FLAGS_HAS_VRAM; - config->info[XE_QUERY_CONFIG_MIN_ALIGNMENT] = + config->info[DRM_XE_QUERY_CONFIG_FLAGS] = + DRM_XE_QUERY_CONFIG_FLAGS_HAS_VRAM; + config->info[DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT] = xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K; - config->info[XE_QUERY_CONFIG_VA_BITS] = xe->info.va_bits; - config->info[XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY] = + config->info[DRM_XE_QUERY_CONFIG_VA_BITS] = xe->info.va_bits; + config->info[DRM_XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY] = xe_exec_queue_device_get_max_priority(xe); if (copy_to_user(query_ptr, config, size)) { @@ -368,9 +368,9 @@ static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query gt_list->num_gt = xe->info.gt_count; for_each_gt(gt, xe, id) { if (xe_gt_is_media_type(gt)) - gt_list->gt_list[id].type = XE_QUERY_GT_TYPE_MEDIA; + gt_list->gt_list[id].type = DRM_XE_QUERY_GT_TYPE_MEDIA; else - gt_list->gt_list[id].type = XE_QUERY_GT_TYPE_MAIN; + gt_list->gt_list[id].type = DRM_XE_QUERY_GT_TYPE_MAIN; gt_list->gt_list[id].gt_id = gt->info.id; gt_list->gt_list[id].clock_freq = gt->info.clock_freq; if (!IS_DGFX(xe)) @@ -468,21 +468,21 @@ static int query_gt_topology(struct xe_device *xe, for_each_gt(gt, xe, id) { topo.gt_id = id; - topo.type = XE_TOPO_DSS_GEOMETRY; + topo.type = DRM_XE_TOPO_DSS_GEOMETRY; query_ptr = copy_mask(query_ptr, &topo, gt->fuse_topo.g_dss_mask, sizeof(gt->fuse_topo.g_dss_mask)); if (IS_ERR(query_ptr)) return PTR_ERR(query_ptr); - topo.type = XE_TOPO_DSS_COMPUTE; + topo.type = DRM_XE_TOPO_DSS_COMPUTE; query_ptr = copy_mask(query_ptr, &topo, gt->fuse_topo.c_dss_mask, sizeof(gt->fuse_topo.c_dss_mask)); if (IS_ERR(query_ptr)) return PTR_ERR(query_ptr); - topo.type = XE_TOPO_EU_PER_DSS; + topo.type = DRM_XE_TOPO_EU_PER_DSS; query_ptr = copy_mask(query_ptr, &topo, gt->fuse_topo.eu_mask_per_dss, sizeof(gt->fuse_topo.eu_mask_per_dss)); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index b4a4ed28019c..66d878bc464a 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2177,8 +2177,8 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, (ULL)bo_offset_or_userptr); switch (operation) { - case XE_VM_BIND_OP_MAP: - case XE_VM_BIND_OP_MAP_USERPTR: + case DRM_XE_VM_BIND_OP_MAP: + case DRM_XE_VM_BIND_OP_MAP_USERPTR: ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, addr, range, obj, bo_offset_or_userptr); if (IS_ERR(ops)) @@ -2189,13 +2189,13 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, op->tile_mask = tile_mask; op->map.immediate = - flags & XE_VM_BIND_FLAG_IMMEDIATE; + flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE; op->map.read_only = - flags & XE_VM_BIND_FLAG_READONLY; - op->map.is_null = flags & XE_VM_BIND_FLAG_NULL; + flags & DRM_XE_VM_BIND_FLAG_READONLY; + op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; } break; - case XE_VM_BIND_OP_UNMAP: + case DRM_XE_VM_BIND_OP_UNMAP: ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range); if (IS_ERR(ops)) return ops; @@ -2206,7 +2206,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, op->tile_mask = tile_mask; } break; - case XE_VM_BIND_OP_PREFETCH: + case DRM_XE_VM_BIND_OP_PREFETCH: ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range); if (IS_ERR(ops)) return ops; @@ -2218,7 +2218,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, op->prefetch.region = region; } break; - case XE_VM_BIND_OP_UNMAP_ALL: + case DRM_XE_VM_BIND_OP_UNMAP_ALL: xe_assert(vm->xe, bo); err = xe_bo_lock(bo, true); @@ -2828,13 +2828,13 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, #ifdef TEST_VM_ASYNC_OPS_ERROR #define SUPPORTED_FLAGS \ - (FORCE_ASYNC_OP_ERROR | XE_VM_BIND_FLAG_ASYNC | \ - XE_VM_BIND_FLAG_READONLY | XE_VM_BIND_FLAG_IMMEDIATE | \ - XE_VM_BIND_FLAG_NULL | 0xffff) + (FORCE_ASYNC_OP_ERROR | DRM_XE_VM_BIND_FLAG_ASYNC | \ + DRM_XE_VM_BIND_FLAG_READONLY | DRM_XE_VM_BIND_FLAG_IMMEDIATE | \ + DRM_XE_VM_BIND_FLAG_NULL | 0xffff) #else #define SUPPORTED_FLAGS \ - (XE_VM_BIND_FLAG_ASYNC | XE_VM_BIND_FLAG_READONLY | \ - XE_VM_BIND_FLAG_IMMEDIATE | XE_VM_BIND_FLAG_NULL | \ + (DRM_XE_VM_BIND_FLAG_ASYNC | DRM_XE_VM_BIND_FLAG_READONLY | \ + DRM_XE_VM_BIND_FLAG_IMMEDIATE | DRM_XE_VM_BIND_FLAG_NULL | \ 0xffff) #endif #define XE_64K_PAGE_MASK 0xffffull @@ -2882,45 +2882,45 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, u32 obj = (*bind_ops)[i].obj; u64 obj_offset = (*bind_ops)[i].obj_offset; u32 region = (*bind_ops)[i].region; - bool is_null = flags & XE_VM_BIND_FLAG_NULL; + bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; if (i == 0) { - *async = !!(flags & XE_VM_BIND_FLAG_ASYNC); + *async = !!(flags & DRM_XE_VM_BIND_FLAG_ASYNC); if (XE_IOCTL_DBG(xe, !*async && args->num_syncs)) { err = -EINVAL; goto free_bind_ops; } } else if (XE_IOCTL_DBG(xe, *async != - !!(flags & XE_VM_BIND_FLAG_ASYNC))) { + !!(flags & DRM_XE_VM_BIND_FLAG_ASYNC))) { err = -EINVAL; goto free_bind_ops; } - if (XE_IOCTL_DBG(xe, op > XE_VM_BIND_OP_PREFETCH) || + if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) || XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) || XE_IOCTL_DBG(xe, obj && is_null) || XE_IOCTL_DBG(xe, obj_offset && is_null) || - XE_IOCTL_DBG(xe, op != XE_VM_BIND_OP_MAP && + XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP && is_null) || XE_IOCTL_DBG(xe, !obj && - op == XE_VM_BIND_OP_MAP && + op == DRM_XE_VM_BIND_OP_MAP && !is_null) || XE_IOCTL_DBG(xe, !obj && - op == XE_VM_BIND_OP_UNMAP_ALL) || + op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || XE_IOCTL_DBG(xe, addr && - op == XE_VM_BIND_OP_UNMAP_ALL) || + op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || XE_IOCTL_DBG(xe, range && - op == XE_VM_BIND_OP_UNMAP_ALL) || + op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || XE_IOCTL_DBG(xe, obj && - op == XE_VM_BIND_OP_MAP_USERPTR) || + op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || XE_IOCTL_DBG(xe, obj && - op == XE_VM_BIND_OP_PREFETCH) || + op == DRM_XE_VM_BIND_OP_PREFETCH) || XE_IOCTL_DBG(xe, region && - op != XE_VM_BIND_OP_PREFETCH) || + op != DRM_XE_VM_BIND_OP_PREFETCH) || XE_IOCTL_DBG(xe, !(BIT(region) & xe->info.mem_region_mask)) || XE_IOCTL_DBG(xe, obj && - op == XE_VM_BIND_OP_UNMAP)) { + op == DRM_XE_VM_BIND_OP_UNMAP)) { err = -EINVAL; goto free_bind_ops; } @@ -2929,7 +2929,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) || XE_IOCTL_DBG(xe, range & ~PAGE_MASK) || XE_IOCTL_DBG(xe, !range && - op != XE_VM_BIND_OP_UNMAP_ALL)) { + op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) { err = -EINVAL; goto free_bind_ops; } diff --git a/drivers/gpu/drm/xe/xe_vm_doc.h b/drivers/gpu/drm/xe/xe_vm_doc.h index b1b2dc4a6089..516f4dc97223 100644 --- a/drivers/gpu/drm/xe/xe_vm_doc.h +++ b/drivers/gpu/drm/xe/xe_vm_doc.h @@ -32,9 +32,9 @@ * Operations * ---------- * - * XE_VM_BIND_OP_MAP - Create mapping for a BO - * XE_VM_BIND_OP_UNMAP - Destroy mapping for a BO / userptr - * XE_VM_BIND_OP_MAP_USERPTR - Create mapping for userptr + * DRM_XE_VM_BIND_OP_MAP - Create mapping for a BO + * DRM_XE_VM_BIND_OP_UNMAP - Destroy mapping for a BO / userptr + * DRM_XE_VM_BIND_OP_MAP_USERPTR - Create mapping for userptr * * Implementation details * ~~~~~~~~~~~~~~~~~~~~~~ @@ -113,7 +113,7 @@ * VM uses to report errors to. The ufence wait interface can be used to wait on * a VM going into an error state. Once an error is reported the VM's async * worker is paused. While the VM's async worker is paused sync, - * XE_VM_BIND_OP_UNMAP operations are allowed (this can free memory). Once the + * DRM_XE_VM_BIND_OP_UNMAP operations are allowed (this can free memory). Once the * uses believe the error state is fixed, the async worker can be resumed via * XE_VM_BIND_OP_RESTART operation. When VM async bind work is restarted, the * first operation processed is the operation that caused the original error. @@ -193,7 +193,7 @@ * In a VM is in fault mode (TODO: link to fault mode), new bind operations that * create mappings are by default are deferred to the page fault handler (first * use). This behavior can be overriden by setting the flag - * XE_VM_BIND_FLAG_IMMEDIATE which indicates to creating the mapping + * DRM_XE_VM_BIND_FLAG_IMMEDIATE which indicates to creating the mapping * immediately. * * User pointer @@ -322,7 +322,7 @@ * * By default, on a faulting VM binds just allocate the VMA and the actual * updating of the page tables is defered to the page fault handler. This - * behavior can be overridden by setting the flag XE_VM_BIND_FLAG_IMMEDIATE in + * behavior can be overridden by setting the flag DRM_XE_VM_BIND_FLAG_IMMEDIATE in * the VM bind which will then do the bind immediately. * * Page fault handler diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index e007dbefd627..3ef49e3baaed 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -19,12 +19,12 @@ extern "C" { /** * DOC: uevent generated by xe on it's pci node. * - * XE_RESET_FAILED_UEVENT - Event is generated when attempt to reset gt + * DRM_XE_RESET_FAILED_UEVENT - Event is generated when attempt to reset gt * fails. The value supplied with the event is always "NEEDS_RESET". * Additional information supplied is tile id and gt id of the gt unit for * which reset has failed. */ -#define XE_RESET_FAILED_UEVENT "DEVICE_STATUS" +#define DRM_XE_RESET_FAILED_UEVENT "DEVICE_STATUS" /** * struct xe_user_extension - Base class for defining a chain of extensions @@ -148,14 +148,14 @@ struct drm_xe_engine_class_instance { * enum drm_xe_memory_class - Supported memory classes. */ enum drm_xe_memory_class { - /** @XE_MEM_REGION_CLASS_SYSMEM: Represents system memory. */ - XE_MEM_REGION_CLASS_SYSMEM = 0, + /** @DRM_XE_MEM_REGION_CLASS_SYSMEM: Represents system memory. */ + DRM_XE_MEM_REGION_CLASS_SYSMEM = 0, /** - * @XE_MEM_REGION_CLASS_VRAM: On discrete platforms, this + * @DRM_XE_MEM_REGION_CLASS_VRAM: On discrete platforms, this * represents the memory that is local to the device, which we * call VRAM. Not valid on integrated platforms. */ - XE_MEM_REGION_CLASS_VRAM + DRM_XE_MEM_REGION_CLASS_VRAM }; /** @@ -215,7 +215,7 @@ struct drm_xe_query_mem_region { * always equal the @total_size, since all of it will be CPU * accessible. * - * Note this is only tracked for XE_MEM_REGION_CLASS_VRAM + * Note this is only tracked for DRM_XE_MEM_REGION_CLASS_VRAM * regions (for other types the value here will always equal * zero). */ @@ -227,7 +227,7 @@ struct drm_xe_query_mem_region { * Requires CAP_PERFMON or CAP_SYS_ADMIN to get reliable * accounting. Without this the value here will always equal * zero. Note this is only currently tracked for - * XE_MEM_REGION_CLASS_VRAM regions (for other types the value + * DRM_XE_MEM_REGION_CLASS_VRAM regions (for other types the value * here will always be zero). */ __u64 cpu_visible_used; @@ -320,12 +320,12 @@ struct drm_xe_query_config { /** @pad: MBZ */ __u32 pad; -#define XE_QUERY_CONFIG_REV_AND_DEVICE_ID 0 -#define XE_QUERY_CONFIG_FLAGS 1 - #define XE_QUERY_CONFIG_FLAGS_HAS_VRAM (0x1 << 0) -#define XE_QUERY_CONFIG_MIN_ALIGNMENT 2 -#define XE_QUERY_CONFIG_VA_BITS 3 -#define XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY 4 +#define DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID 0 +#define DRM_XE_QUERY_CONFIG_FLAGS 1 + #define DRM_XE_QUERY_CONFIG_FLAGS_HAS_VRAM (0x1 << 0) +#define DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT 2 +#define DRM_XE_QUERY_CONFIG_VA_BITS 3 +#define DRM_XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY 4 /** @info: array of elements containing the config info */ __u64 info[]; }; @@ -339,8 +339,8 @@ struct drm_xe_query_config { * implementing graphics and/or media operations. */ struct drm_xe_query_gt { -#define XE_QUERY_GT_TYPE_MAIN 0 -#define XE_QUERY_GT_TYPE_MEDIA 1 +#define DRM_XE_QUERY_GT_TYPE_MAIN 0 +#define DRM_XE_QUERY_GT_TYPE_MEDIA 1 /** @type: GT type: Main or Media */ __u16 type; /** @gt_id: Unique ID of this GT within the PCI Device */ @@ -400,7 +400,7 @@ struct drm_xe_query_topology_mask { * DSS_GEOMETRY ff ff ff ff 00 00 00 00 * means 32 DSS are available for geometry. */ -#define XE_TOPO_DSS_GEOMETRY (1 << 0) +#define DRM_XE_TOPO_DSS_GEOMETRY (1 << 0) /* * To query the mask of Dual Sub Slices (DSS) available for compute * operations. For example a query response containing the following @@ -408,7 +408,7 @@ struct drm_xe_query_topology_mask { * DSS_COMPUTE ff ff ff ff 00 00 00 00 * means 32 DSS are available for compute. */ -#define XE_TOPO_DSS_COMPUTE (1 << 1) +#define DRM_XE_TOPO_DSS_COMPUTE (1 << 1) /* * To query the mask of Execution Units (EU) available per Dual Sub * Slices (DSS). For example a query response containing the following @@ -416,7 +416,7 @@ struct drm_xe_query_topology_mask { * EU_PER_DSS ff ff 00 00 00 00 00 00 * means each DSS has 16 EU. */ -#define XE_TOPO_EU_PER_DSS (1 << 2) +#define DRM_XE_TOPO_EU_PER_DSS (1 << 2) /** @type: type of mask */ __u16 type; @@ -497,8 +497,8 @@ struct drm_xe_gem_create { */ __u64 size; -#define XE_GEM_CREATE_FLAG_DEFER_BACKING (0x1 << 24) -#define XE_GEM_CREATE_FLAG_SCANOUT (0x1 << 25) +#define DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING (0x1 << 24) +#define DRM_XE_GEM_CREATE_FLAG_SCANOUT (0x1 << 25) /* * When using VRAM as a possible placement, ensure that the corresponding VRAM * allocation will always use the CPU accessible part of VRAM. This is important @@ -514,7 +514,7 @@ struct drm_xe_gem_create { * display surfaces, therefore the kernel requires setting this flag for such * objects, otherwise an error is thrown on small-bar systems. */ -#define XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM (0x1 << 26) +#define DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM (0x1 << 26) /** * @flags: Flags, currently a mask of memory instances of where BO can * be placed @@ -581,14 +581,14 @@ struct drm_xe_ext_set_property { }; struct drm_xe_vm_create { -#define XE_VM_EXTENSION_SET_PROPERTY 0 +#define DRM_XE_VM_EXTENSION_SET_PROPERTY 0 /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; -#define DRM_XE_VM_CREATE_SCRATCH_PAGE (0x1 << 0) -#define DRM_XE_VM_CREATE_COMPUTE_MODE (0x1 << 1) -#define DRM_XE_VM_CREATE_ASYNC_DEFAULT (0x1 << 2) -#define DRM_XE_VM_CREATE_FAULT_MODE (0x1 << 3) +#define DRM_XE_VM_CREATE_SCRATCH_PAGE (0x1 << 0) +#define DRM_XE_VM_CREATE_COMPUTE_MODE (0x1 << 1) +#define DRM_XE_VM_CREATE_ASYNC_DEFAULT (0x1 << 2) +#define DRM_XE_VM_CREATE_FAULT_MODE (0x1 << 3) /** @flags: Flags */ __u32 flags; @@ -644,29 +644,29 @@ struct drm_xe_vm_bind_op { */ __u64 tile_mask; -#define XE_VM_BIND_OP_MAP 0x0 -#define XE_VM_BIND_OP_UNMAP 0x1 -#define XE_VM_BIND_OP_MAP_USERPTR 0x2 -#define XE_VM_BIND_OP_UNMAP_ALL 0x3 -#define XE_VM_BIND_OP_PREFETCH 0x4 +#define DRM_XE_VM_BIND_OP_MAP 0x0 +#define DRM_XE_VM_BIND_OP_UNMAP 0x1 +#define DRM_XE_VM_BIND_OP_MAP_USERPTR 0x2 +#define DRM_XE_VM_BIND_OP_UNMAP_ALL 0x3 +#define DRM_XE_VM_BIND_OP_PREFETCH 0x4 /** @op: Bind operation to perform */ __u32 op; -#define XE_VM_BIND_FLAG_READONLY (0x1 << 0) -#define XE_VM_BIND_FLAG_ASYNC (0x1 << 1) +#define DRM_XE_VM_BIND_FLAG_READONLY (0x1 << 0) +#define DRM_XE_VM_BIND_FLAG_ASYNC (0x1 << 1) /* * Valid on a faulting VM only, do the MAP operation immediately rather * than deferring the MAP to the page fault handler. */ -#define XE_VM_BIND_FLAG_IMMEDIATE (0x1 << 2) +#define DRM_XE_VM_BIND_FLAG_IMMEDIATE (0x1 << 2) /* * When the NULL flag is set, the page tables are setup with a special * bit which indicates writes are dropped and all reads return zero. In - * the future, the NULL flags will only be valid for XE_VM_BIND_OP_MAP + * the future, the NULL flags will only be valid for DRM_XE_VM_BIND_OP_MAP * operations, the BO handle MBZ, and the BO offset MBZ. This flag is * intended to implement VK sparse bindings. */ -#define XE_VM_BIND_FLAG_NULL (0x1 << 3) +#define DRM_XE_VM_BIND_FLAG_NULL (0x1 << 3) /** @flags: Bind flags */ __u32 flags; @@ -721,19 +721,19 @@ struct drm_xe_vm_bind { __u64 reserved[2]; }; -/* For use with XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY */ +/* For use with DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY */ /* Monitor 128KB contiguous region with 4K sub-granularity */ -#define XE_ACC_GRANULARITY_128K 0 +#define DRM_XE_ACC_GRANULARITY_128K 0 /* Monitor 2MB contiguous region with 64KB sub-granularity */ -#define XE_ACC_GRANULARITY_2M 1 +#define DRM_XE_ACC_GRANULARITY_2M 1 /* Monitor 16MB contiguous region with 512KB sub-granularity */ -#define XE_ACC_GRANULARITY_16M 2 +#define DRM_XE_ACC_GRANULARITY_16M 2 /* Monitor 64MB contiguous region with 2M sub-granularity */ -#define XE_ACC_GRANULARITY_64M 3 +#define DRM_XE_ACC_GRANULARITY_64M 3 /** * struct drm_xe_exec_queue_set_property - exec queue set property @@ -747,14 +747,14 @@ struct drm_xe_exec_queue_set_property { /** @exec_queue_id: Exec queue ID */ __u32 exec_queue_id; -#define XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY 0 -#define XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE 1 -#define XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT 2 -#define XE_EXEC_QUEUE_SET_PROPERTY_PERSISTENCE 3 -#define XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT 4 -#define XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER 5 -#define XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY 6 -#define XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY 7 +#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY 0 +#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE 1 +#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT 2 +#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PERSISTENCE 3 +#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT 4 +#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER 5 +#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY 6 +#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY 7 /** @property: property to set */ __u32 property; @@ -766,7 +766,7 @@ struct drm_xe_exec_queue_set_property { }; struct drm_xe_exec_queue_create { -#define XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY 0 +#define DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY 0 /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; @@ -805,7 +805,7 @@ struct drm_xe_exec_queue_get_property { /** @exec_queue_id: Exec queue ID */ __u32 exec_queue_id; -#define XE_EXEC_QUEUE_GET_PROPERTY_BAN 0 +#define DRM_XE_EXEC_QUEUE_GET_PROPERTY_BAN 0 /** @property: property to get */ __u32 property; @@ -973,11 +973,11 @@ struct drm_xe_wait_user_fence { /** * DOC: XE PMU event config IDs * - * Check 'man perf_event_open' to use the ID's XE_PMU_XXXX listed in xe_drm.h + * Check 'man perf_event_open' to use the ID's DRM_XE_PMU_XXXX listed in xe_drm.h * in 'struct perf_event_attr' as part of perf_event_open syscall to read a * particular event. * - * For example to open the XE_PMU_RENDER_GROUP_BUSY(0): + * For example to open the DRMXE_PMU_RENDER_GROUP_BUSY(0): * * .. code-block:: C * @@ -991,7 +991,7 @@ struct drm_xe_wait_user_fence { * attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED; * attr.use_clockid = 1; * attr.clockid = CLOCK_MONOTONIC; - * attr.config = XE_PMU_RENDER_GROUP_BUSY(0); + * attr.config = DRM_XE_PMU_RENDER_GROUP_BUSY(0); * * fd = syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0); */ @@ -999,15 +999,15 @@ struct drm_xe_wait_user_fence { /* * Top bits of every counter are GT id. */ -#define __XE_PMU_GT_SHIFT (56) +#define __DRM_XE_PMU_GT_SHIFT (56) -#define ___XE_PMU_OTHER(gt, x) \ - (((__u64)(x)) | ((__u64)(gt) << __XE_PMU_GT_SHIFT)) +#define ___DRM_XE_PMU_OTHER(gt, x) \ + (((__u64)(x)) | ((__u64)(gt) << __DRM_XE_PMU_GT_SHIFT)) -#define XE_PMU_RENDER_GROUP_BUSY(gt) ___XE_PMU_OTHER(gt, 0) -#define XE_PMU_COPY_GROUP_BUSY(gt) ___XE_PMU_OTHER(gt, 1) -#define XE_PMU_MEDIA_GROUP_BUSY(gt) ___XE_PMU_OTHER(gt, 2) -#define XE_PMU_ANY_ENGINE_GROUP_BUSY(gt) ___XE_PMU_OTHER(gt, 3) +#define DRM_XE_PMU_RENDER_GROUP_BUSY(gt) ___DRM_XE_PMU_OTHER(gt, 0) +#define DRM_XE_PMU_COPY_GROUP_BUSY(gt) ___DRM_XE_PMU_OTHER(gt, 1) +#define DRM_XE_PMU_MEDIA_GROUP_BUSY(gt) ___DRM_XE_PMU_OTHER(gt, 2) +#define DRM_XE_PMU_ANY_ENGINE_GROUP_BUSY(gt) ___DRM_XE_PMU_OTHER(gt, 3) #if defined(__cplusplus) } -- cgit v1.2.3 From 3ac4a7896d1c02918ee76acaf7e8160f3d11fa75 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Tue, 14 Nov 2023 13:34:28 +0000 Subject: drm/xe/uapi: Add _FLAG to uAPI constants usable for flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Most constants defined in xe_drm.h which can be used for flags are named DRM_XE_*_FLAG_*, which is helpful to identify them. Make this systematic and add _FLAG where it was missing. Signed-off-by: Francois Dugast Reviewed-by: Rodrigo Vivi Reviewed-by: José Roberto de Souza Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_sync.c | 16 ++++++++-------- drivers/gpu/drm/xe/xe_vm.c | 32 ++++++++++++++++---------------- drivers/gpu/drm/xe/xe_vm_doc.h | 2 +- drivers/gpu/drm/xe/xe_wait_user_fence.c | 10 +++++----- include/uapi/drm/xe_drm.h | 30 +++++++++++++++--------------- 5 files changed, 45 insertions(+), 45 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index 73ef259aa387..eafe53c2f55d 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -110,14 +110,14 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, return -EFAULT; if (XE_IOCTL_DBG(xe, sync_in.flags & - ~(SYNC_FLAGS_TYPE_MASK | DRM_XE_SYNC_SIGNAL)) || + ~(SYNC_FLAGS_TYPE_MASK | DRM_XE_SYNC_FLAG_SIGNAL)) || XE_IOCTL_DBG(xe, sync_in.pad) || XE_IOCTL_DBG(xe, sync_in.reserved[0] || sync_in.reserved[1])) return -EINVAL; - signal = sync_in.flags & DRM_XE_SYNC_SIGNAL; + signal = sync_in.flags & DRM_XE_SYNC_FLAG_SIGNAL; switch (sync_in.flags & SYNC_FLAGS_TYPE_MASK) { - case DRM_XE_SYNC_SYNCOBJ: + case DRM_XE_SYNC_FLAG_SYNCOBJ: if (XE_IOCTL_DBG(xe, no_dma_fences && signal)) return -EOPNOTSUPP; @@ -135,7 +135,7 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, } break; - case DRM_XE_SYNC_TIMELINE_SYNCOBJ: + case DRM_XE_SYNC_FLAG_TIMELINE_SYNCOBJ: if (XE_IOCTL_DBG(xe, no_dma_fences && signal)) return -EOPNOTSUPP; @@ -165,12 +165,12 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, } break; - case DRM_XE_SYNC_DMA_BUF: + case DRM_XE_SYNC_FLAG_DMA_BUF: if (XE_IOCTL_DBG(xe, "TODO")) return -EINVAL; break; - case DRM_XE_SYNC_USER_FENCE: + case DRM_XE_SYNC_FLAG_USER_FENCE: if (XE_IOCTL_DBG(xe, !signal)) return -EOPNOTSUPP; @@ -225,7 +225,7 @@ int xe_sync_entry_add_deps(struct xe_sync_entry *sync, struct xe_sched_job *job) void xe_sync_entry_signal(struct xe_sync_entry *sync, struct xe_sched_job *job, struct dma_fence *fence) { - if (!(sync->flags & DRM_XE_SYNC_SIGNAL)) + if (!(sync->flags & DRM_XE_SYNC_FLAG_SIGNAL)) return; if (sync->chain_fence) { @@ -253,7 +253,7 @@ void xe_sync_entry_signal(struct xe_sync_entry *sync, struct xe_sched_job *job, dma_fence_put(fence); } } else if ((sync->flags & SYNC_FLAGS_TYPE_MASK) == - DRM_XE_SYNC_USER_FENCE) { + DRM_XE_SYNC_FLAG_USER_FENCE) { job->user_fence.used = true; job->user_fence.addr = sync->addr; job->user_fence.value = sync->timeline_value; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 66d878bc464a..e8dd46789537 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1920,10 +1920,10 @@ static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma, return 0; } -#define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_SCRATCH_PAGE | \ - DRM_XE_VM_CREATE_COMPUTE_MODE | \ - DRM_XE_VM_CREATE_ASYNC_DEFAULT | \ - DRM_XE_VM_CREATE_FAULT_MODE) +#define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \ + DRM_XE_VM_CREATE_FLAG_COMPUTE_MODE | \ + DRM_XE_VM_CREATE_FLAG_ASYNC_DEFAULT | \ + DRM_XE_VM_CREATE_FLAG_FAULT_MODE) int xe_vm_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file) @@ -1941,9 +1941,9 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, return -EINVAL; if (XE_WA(xe_root_mmio_gt(xe), 14016763929)) - args->flags |= DRM_XE_VM_CREATE_SCRATCH_PAGE; + args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE; - if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FAULT_MODE && + if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && !xe->info.supports_usm)) return -EINVAL; @@ -1953,32 +1953,32 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS)) return -EINVAL; - if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_SCRATCH_PAGE && - args->flags & DRM_XE_VM_CREATE_FAULT_MODE)) + if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE && + args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) return -EINVAL; - if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_COMPUTE_MODE && - args->flags & DRM_XE_VM_CREATE_FAULT_MODE)) + if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_COMPUTE_MODE && + args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) return -EINVAL; - if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FAULT_MODE && + if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && xe_device_in_non_fault_mode(xe))) return -EINVAL; - if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FAULT_MODE) && + if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) && xe_device_in_fault_mode(xe))) return -EINVAL; if (XE_IOCTL_DBG(xe, args->extensions)) return -EINVAL; - if (args->flags & DRM_XE_VM_CREATE_SCRATCH_PAGE) + if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE) flags |= XE_VM_FLAG_SCRATCH_PAGE; - if (args->flags & DRM_XE_VM_CREATE_COMPUTE_MODE) + if (args->flags & DRM_XE_VM_CREATE_FLAG_COMPUTE_MODE) flags |= XE_VM_FLAG_COMPUTE_MODE; - if (args->flags & DRM_XE_VM_CREATE_ASYNC_DEFAULT) + if (args->flags & DRM_XE_VM_CREATE_FLAG_ASYNC_DEFAULT) flags |= XE_VM_FLAG_ASYNC_DEFAULT; - if (args->flags & DRM_XE_VM_CREATE_FAULT_MODE) + if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) flags |= XE_VM_FLAG_FAULT_MODE; vm = xe_vm_create(xe, flags); diff --git a/drivers/gpu/drm/xe/xe_vm_doc.h b/drivers/gpu/drm/xe/xe_vm_doc.h index 516f4dc97223..bdc6659891a5 100644 --- a/drivers/gpu/drm/xe/xe_vm_doc.h +++ b/drivers/gpu/drm/xe/xe_vm_doc.h @@ -18,7 +18,7 @@ * Scratch page * ------------ * - * If the VM is created with the flag, DRM_XE_VM_CREATE_SCRATCH_PAGE, set the + * If the VM is created with the flag, DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE, set the * entire page table structure defaults pointing to blank page allocated by the * VM. Invalid memory access rather than fault just read / write to this page. * diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c index 78686908f7fb..13562db6c07f 100644 --- a/drivers/gpu/drm/xe/xe_wait_user_fence.c +++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c @@ -79,8 +79,8 @@ static int check_hw_engines(struct xe_device *xe, return 0; } -#define VALID_FLAGS (DRM_XE_UFENCE_WAIT_SOFT_OP | \ - DRM_XE_UFENCE_WAIT_ABSTIME) +#define VALID_FLAGS (DRM_XE_UFENCE_WAIT_FLAG_SOFT_OP | \ + DRM_XE_UFENCE_WAIT_FLAG_ABSTIME) #define MAX_OP DRM_XE_UFENCE_WAIT_LTE static long to_jiffies_timeout(struct xe_device *xe, @@ -107,7 +107,7 @@ static long to_jiffies_timeout(struct xe_device *xe, * Save the timeout to an u64 variable because nsecs_to_jiffies * might return a value that overflows s32 variable. */ - if (args->flags & DRM_XE_UFENCE_WAIT_ABSTIME) + if (args->flags & DRM_XE_UFENCE_WAIT_FLAG_ABSTIME) t = drm_timeout_abs_to_jiffies(args->timeout); else t = nsecs_to_jiffies(args->timeout); @@ -137,7 +137,7 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data, u64_to_user_ptr(args->instances); u64 addr = args->addr; int err; - bool no_engines = args->flags & DRM_XE_UFENCE_WAIT_SOFT_OP; + bool no_engines = args->flags & DRM_XE_UFENCE_WAIT_FLAG_SOFT_OP; long timeout; ktime_t start; @@ -206,7 +206,7 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data, } remove_wait_queue(&xe->ufence_wq, &w_wait); - if (!(args->flags & DRM_XE_UFENCE_WAIT_ABSTIME)) { + if (!(args->flags & DRM_XE_UFENCE_WAIT_FLAG_ABSTIME)) { args->timeout -= ktime_to_ns(ktime_sub(ktime_get(), start)); if (args->timeout < 0) args->timeout = 0; diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 3ef49e3baaed..f6346a8351e4 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -585,10 +585,10 @@ struct drm_xe_vm_create { /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; -#define DRM_XE_VM_CREATE_SCRATCH_PAGE (0x1 << 0) -#define DRM_XE_VM_CREATE_COMPUTE_MODE (0x1 << 1) -#define DRM_XE_VM_CREATE_ASYNC_DEFAULT (0x1 << 2) -#define DRM_XE_VM_CREATE_FAULT_MODE (0x1 << 3) +#define DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE (0x1 << 0) +#define DRM_XE_VM_CREATE_FLAG_COMPUTE_MODE (0x1 << 1) +#define DRM_XE_VM_CREATE_FLAG_ASYNC_DEFAULT (0x1 << 2) +#define DRM_XE_VM_CREATE_FLAG_FAULT_MODE (0x1 << 3) /** @flags: Flags */ __u32 flags; @@ -831,11 +831,11 @@ struct drm_xe_sync { /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; -#define DRM_XE_SYNC_SYNCOBJ 0x0 -#define DRM_XE_SYNC_TIMELINE_SYNCOBJ 0x1 -#define DRM_XE_SYNC_DMA_BUF 0x2 -#define DRM_XE_SYNC_USER_FENCE 0x3 -#define DRM_XE_SYNC_SIGNAL 0x10 +#define DRM_XE_SYNC_FLAG_SYNCOBJ 0x0 +#define DRM_XE_SYNC_FLAG_TIMELINE_SYNCOBJ 0x1 +#define DRM_XE_SYNC_FLAG_DMA_BUF 0x2 +#define DRM_XE_SYNC_FLAG_USER_FENCE 0x3 +#define DRM_XE_SYNC_FLAG_SIGNAL 0x10 __u32 flags; /** @pad: MBZ */ @@ -921,8 +921,8 @@ struct drm_xe_wait_user_fence { /** @op: wait operation (type of comparison) */ __u16 op; -#define DRM_XE_UFENCE_WAIT_SOFT_OP (1 << 0) /* e.g. Wait on VM bind */ -#define DRM_XE_UFENCE_WAIT_ABSTIME (1 << 1) +#define DRM_XE_UFENCE_WAIT_FLAG_SOFT_OP (1 << 0) /* e.g. Wait on VM bind */ +#define DRM_XE_UFENCE_WAIT_FLAG_ABSTIME (1 << 1) /** @flags: wait flags */ __u16 flags; @@ -940,10 +940,10 @@ struct drm_xe_wait_user_fence { __u64 mask; /** * @timeout: how long to wait before bailing, value in nanoseconds. - * Without DRM_XE_UFENCE_WAIT_ABSTIME flag set (relative timeout) + * Without DRM_XE_UFENCE_WAIT_FLAG_ABSTIME flag set (relative timeout) * it contains timeout expressed in nanoseconds to wait (fence will * expire at now() + timeout). - * When DRM_XE_UFENCE_WAIT_ABSTIME flat is set (absolute timeout) wait + * When DRM_XE_UFENCE_WAIT_FLAG_ABSTIME flat is set (absolute timeout) wait * will end at timeout (uses system MONOTONIC_CLOCK). * Passing negative timeout leads to neverending wait. * @@ -956,13 +956,13 @@ struct drm_xe_wait_user_fence { /** * @num_engines: number of engine instances to wait on, must be zero - * when DRM_XE_UFENCE_WAIT_SOFT_OP set + * when DRM_XE_UFENCE_WAIT_FLAG_SOFT_OP set */ __u64 num_engines; /** * @instances: user pointer to array of drm_xe_engine_class_instance to - * wait on, must be NULL when DRM_XE_UFENCE_WAIT_SOFT_OP set + * wait on, must be NULL when DRM_XE_UFENCE_WAIT_FLAG_SOFT_OP set */ __u64 instances; -- cgit v1.2.3 From 5ca2c4b800194b55a863882273b8ca34b56afb35 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Tue, 14 Nov 2023 13:34:29 +0000 Subject: drm/xe/uapi: Change rsvd to pad in struct drm_xe_class_instance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change rsvd to pad in struct drm_xe_class_instance to prevent the field from being used in future. v2: Change from fixup to regular commit because this touches the uAPI (Francois Dugast) Signed-off-by: Umesh Nerlige Ramappa Signed-off-by: Francois Dugast Reviewed-by: Rodrigo Vivi Reviewed-by: José Roberto de Souza Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_query.c | 5 ++++- include/uapi/drm/xe_drm.h | 3 ++- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 565a716302bb..48befd9f0812 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -215,7 +215,10 @@ static int query_engines(struct xe_device *xe, xe_to_user_engine_class[hwe->class]; hw_engine_info[i].engine_instance = hwe->logical_instance; - hw_engine_info[i++].gt_id = gt->info.id; + hw_engine_info[i].gt_id = gt->info.id; + hw_engine_info[i].pad = 0; + + i++; } if (copy_to_user(query_ptr, hw_engine_info, size)) { diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index f6346a8351e4..a8d351c9fa7c 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -141,7 +141,8 @@ struct drm_xe_engine_class_instance { __u16 engine_instance; __u16 gt_id; - __u16 rsvd; + /** @pad: MBZ */ + __u16 pad; }; /** -- cgit v1.2.3 From 45c30d80008264d55915f4b87c6f9bbb3261071c Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 14 Nov 2023 13:34:30 +0000 Subject: drm/xe/uapi: Rename *_mem_regions masks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 'native' doesn't make much sense on integrated devices. - 'slow' is not necessarily true and doesn't go well with opposition to 'native'. Instead, let's use 'near' vs 'far'. It makes sense with all the current Intel GPUs and it is future proof. Right now, there's absolutely no need to define among the 'far' memory, which ones are slower, either in terms of latency, nunmber of hops or bandwidth. In case of this might become a requirement in the future, a new query could be added to indicate the certain 'distance' between a given engine and a memory_region. But for now, this fulfill all of the current requirements in the most straightforward way for the userspace drivers. Signed-off-by: Rodrigo Vivi Signed-off-by: Francois Dugast Reviewed-by: Matt Roper Reviewed-by: José Roberto de Souza --- drivers/gpu/drm/xe/xe_query.c | 8 ++++---- include/uapi/drm/xe_drm.h | 18 ++++++++++-------- 2 files changed, 14 insertions(+), 12 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 48befd9f0812..8b5136460ea6 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -377,12 +377,12 @@ static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query gt_list->gt_list[id].gt_id = gt->info.id; gt_list->gt_list[id].clock_freq = gt->info.clock_freq; if (!IS_DGFX(xe)) - gt_list->gt_list[id].native_mem_regions = 0x1; + gt_list->gt_list[id].near_mem_regions = 0x1; else - gt_list->gt_list[id].native_mem_regions = + gt_list->gt_list[id].near_mem_regions = BIT(gt_to_tile(gt)->id) << 1; - gt_list->gt_list[id].slow_mem_regions = xe->info.mem_region_mask ^ - gt_list->gt_list[id].native_mem_regions; + gt_list->gt_list[id].far_mem_regions = xe->info.mem_region_mask ^ + gt_list->gt_list[id].near_mem_regions; } if (copy_to_user(query_ptr, gt_list, size)) { diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index a8d351c9fa7c..30567500e6cd 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -349,17 +349,19 @@ struct drm_xe_query_gt { /** @clock_freq: A clock frequency for timestamp */ __u32 clock_freq; /** - * @native_mem_regions: Bit mask of instances from - * drm_xe_query_mem_usage that lives on the same GPU/Tile and have - * direct access. + * @near_mem_regions: Bit mask of instances from + * drm_xe_query_mem_usage that are nearest to the current engines + * of this GT. */ - __u64 native_mem_regions; + __u64 near_mem_regions; /** - * @slow_mem_regions: Bit mask of instances from - * drm_xe_query_mem_usage that this GT can indirectly access, although - * they live on a different GPU/Tile. + * @far_mem_regions: Bit mask of instances from + * drm_xe_query_mem_usage that are far from the engines of this GT. + * In general, they have extra indirections when compared to the + * @near_mem_regions. For a discrete device this could mean system + * memory and memory living in a different tile. */ - __u64 slow_mem_regions; + __u64 far_mem_regions; /** @reserved: Reserved */ __u64 reserved[8]; }; -- cgit v1.2.3 From b02606d32376b8d51b33211f8c069b16165390eb Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 14 Nov 2023 13:34:31 +0000 Subject: drm/xe/uapi: Rename query's mem_usage to mem_regions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 'Usage' gives an impression of telemetry information where someone would query to see how the memory is currently used and available size, etc. However this API is more than this. It is about a global view of all the memory regions available in the system and user space needs to have this information so they can then use the mem_region masks that are returned for the engine access. Signed-off-by: Rodrigo Vivi Signed-off-by: Francois Dugast Reviewed-by: Matt Roper Reviewed-by: José Roberto de Souza --- drivers/gpu/drm/xe/xe_query.c | 16 ++++++++-------- include/uapi/drm/xe_drm.h | 14 +++++++------- 2 files changed, 15 insertions(+), 15 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 8b5136460ea6..d495716b2c96 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -230,7 +230,7 @@ static int query_engines(struct xe_device *xe, return 0; } -static size_t calc_memory_usage_size(struct xe_device *xe) +static size_t calc_mem_regions_size(struct xe_device *xe) { u32 num_managers = 1; int i; @@ -239,15 +239,15 @@ static size_t calc_memory_usage_size(struct xe_device *xe) if (ttm_manager_type(&xe->ttm, i)) num_managers++; - return offsetof(struct drm_xe_query_mem_usage, regions[num_managers]); + return offsetof(struct drm_xe_query_mem_regions, regions[num_managers]); } -static int query_memory_usage(struct xe_device *xe, - struct drm_xe_device_query *query) +static int query_mem_regions(struct xe_device *xe, + struct drm_xe_device_query *query) { - size_t size = calc_memory_usage_size(xe); - struct drm_xe_query_mem_usage *usage; - struct drm_xe_query_mem_usage __user *query_ptr = + size_t size = calc_mem_regions_size(xe); + struct drm_xe_query_mem_regions *usage; + struct drm_xe_query_mem_regions __user *query_ptr = u64_to_user_ptr(query->data); struct ttm_resource_manager *man; int ret, i; @@ -499,7 +499,7 @@ static int query_gt_topology(struct xe_device *xe, static int (* const xe_query_funcs[])(struct xe_device *xe, struct drm_xe_device_query *query) = { query_engines, - query_memory_usage, + query_mem_regions, query_config, query_gt_list, query_hwconfig, diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 30567500e6cd..8ec12f9f4132 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -291,13 +291,13 @@ struct drm_xe_query_engine_cycles { }; /** - * struct drm_xe_query_mem_usage - describe memory regions and usage + * struct drm_xe_query_mem_regions - describe memory regions * * If a query is made with a struct drm_xe_device_query where .query - * is equal to DRM_XE_DEVICE_QUERY_MEM_USAGE, then the reply uses - * struct drm_xe_query_mem_usage in .data. + * is equal to DRM_XE_DEVICE_QUERY_MEM_REGIONS, then the reply uses + * struct drm_xe_query_mem_regions in .data. */ -struct drm_xe_query_mem_usage { +struct drm_xe_query_mem_regions { /** @num_regions: number of memory regions returned in @regions */ __u32 num_regions; /** @pad: MBZ */ @@ -350,13 +350,13 @@ struct drm_xe_query_gt { __u32 clock_freq; /** * @near_mem_regions: Bit mask of instances from - * drm_xe_query_mem_usage that are nearest to the current engines + * drm_xe_query_mem_regions that are nearest to the current engines * of this GT. */ __u64 near_mem_regions; /** * @far_mem_regions: Bit mask of instances from - * drm_xe_query_mem_usage that are far from the engines of this GT. + * drm_xe_query_mem_regions that are far from the engines of this GT. * In general, they have extra indirections when compared to the * @near_mem_regions. For a discrete device this could mean system * memory and memory living in a different tile. @@ -470,7 +470,7 @@ struct drm_xe_device_query { __u64 extensions; #define DRM_XE_DEVICE_QUERY_ENGINES 0 -#define DRM_XE_DEVICE_QUERY_MEM_USAGE 1 +#define DRM_XE_DEVICE_QUERY_MEM_REGIONS 1 #define DRM_XE_DEVICE_QUERY_CONFIG 2 #define DRM_XE_DEVICE_QUERY_GT_LIST 3 #define DRM_XE_DEVICE_QUERY_HWCONFIG 4 -- cgit v1.2.3 From 9ad743515cc59275653f719886d1b93fa7a824ab Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 14 Nov 2023 13:34:32 +0000 Subject: drm/xe/uapi: Standardize the FLAG naming and assignment Only cosmetic things. No functional change on this patch. Define every flag with (1 << n) and use singular FLAG name. Signed-off-by: Rodrigo Vivi Signed-off-by: Francois Dugast Reviewed-by: Matthew Brost --- drivers/gpu/drm/xe/xe_query.c | 2 +- include/uapi/drm/xe_drm.h | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index d495716b2c96..61a7d92b7e88 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -331,7 +331,7 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query) xe->info.devid | (xe->info.revid << 16); if (xe_device_get_root_tile(xe)->mem.vram.usable_size) config->info[DRM_XE_QUERY_CONFIG_FLAGS] = - DRM_XE_QUERY_CONFIG_FLAGS_HAS_VRAM; + DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM; config->info[DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT] = xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K; config->info[DRM_XE_QUERY_CONFIG_VA_BITS] = xe->info.va_bits; diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 8ec12f9f4132..236e643be69a 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -323,7 +323,7 @@ struct drm_xe_query_config { #define DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID 0 #define DRM_XE_QUERY_CONFIG_FLAGS 1 - #define DRM_XE_QUERY_CONFIG_FLAGS_HAS_VRAM (0x1 << 0) + #define DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM (1 << 0) #define DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT 2 #define DRM_XE_QUERY_CONFIG_VA_BITS 3 #define DRM_XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY 4 @@ -588,10 +588,10 @@ struct drm_xe_vm_create { /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; -#define DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE (0x1 << 0) -#define DRM_XE_VM_CREATE_FLAG_COMPUTE_MODE (0x1 << 1) -#define DRM_XE_VM_CREATE_FLAG_ASYNC_DEFAULT (0x1 << 2) -#define DRM_XE_VM_CREATE_FLAG_FAULT_MODE (0x1 << 3) +#define DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE (1 << 0) +#define DRM_XE_VM_CREATE_FLAG_COMPUTE_MODE (1 << 1) +#define DRM_XE_VM_CREATE_FLAG_ASYNC_DEFAULT (1 << 2) +#define DRM_XE_VM_CREATE_FLAG_FAULT_MODE (1 << 3) /** @flags: Flags */ __u32 flags; @@ -655,13 +655,13 @@ struct drm_xe_vm_bind_op { /** @op: Bind operation to perform */ __u32 op; -#define DRM_XE_VM_BIND_FLAG_READONLY (0x1 << 0) -#define DRM_XE_VM_BIND_FLAG_ASYNC (0x1 << 1) +#define DRM_XE_VM_BIND_FLAG_READONLY (1 << 0) +#define DRM_XE_VM_BIND_FLAG_ASYNC (1 << 1) /* * Valid on a faulting VM only, do the MAP operation immediately rather * than deferring the MAP to the page fault handler. */ -#define DRM_XE_VM_BIND_FLAG_IMMEDIATE (0x1 << 2) +#define DRM_XE_VM_BIND_FLAG_IMMEDIATE (1 << 2) /* * When the NULL flag is set, the page tables are setup with a special * bit which indicates writes are dropped and all reads return zero. In @@ -669,7 +669,7 @@ struct drm_xe_vm_bind_op { * operations, the BO handle MBZ, and the BO offset MBZ. This flag is * intended to implement VK sparse bindings. */ -#define DRM_XE_VM_BIND_FLAG_NULL (0x1 << 3) +#define DRM_XE_VM_BIND_FLAG_NULL (1 << 3) /** @flags: Bind flags */ __u32 flags; -- cgit v1.2.3 From 4a349c86110a6fab26ce5f4fcb545acf214efed5 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 14 Nov 2023 13:34:33 +0000 Subject: drm/xe/uapi: Differentiate WAIT_OP from WAIT_MASK On one hand the WAIT_OP represents the operation use for waiting such as ==, !=, > and so on. On the other hand, the mask is applied to the value used for comparision. Split those two to bring clarity to the uapi. Signed-off-by: Rodrigo Vivi Signed-off-by: Francois Dugast Reviewed-by: Matt Roper --- drivers/gpu/drm/xe/xe_wait_user_fence.c | 14 +++++++------- include/uapi/drm/xe_drm.h | 21 +++++++++++---------- 2 files changed, 18 insertions(+), 17 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c index 13562db6c07f..4d5c2555ce41 100644 --- a/drivers/gpu/drm/xe/xe_wait_user_fence.c +++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c @@ -25,22 +25,22 @@ static int do_compare(u64 addr, u64 value, u64 mask, u16 op) return -EFAULT; switch (op) { - case DRM_XE_UFENCE_WAIT_EQ: + case DRM_XE_UFENCE_WAIT_OP_EQ: passed = (rvalue & mask) == (value & mask); break; - case DRM_XE_UFENCE_WAIT_NEQ: + case DRM_XE_UFENCE_WAIT_OP_NEQ: passed = (rvalue & mask) != (value & mask); break; - case DRM_XE_UFENCE_WAIT_GT: + case DRM_XE_UFENCE_WAIT_OP_GT: passed = (rvalue & mask) > (value & mask); break; - case DRM_XE_UFENCE_WAIT_GTE: + case DRM_XE_UFENCE_WAIT_OP_GTE: passed = (rvalue & mask) >= (value & mask); break; - case DRM_XE_UFENCE_WAIT_LT: + case DRM_XE_UFENCE_WAIT_OP_LT: passed = (rvalue & mask) < (value & mask); break; - case DRM_XE_UFENCE_WAIT_LTE: + case DRM_XE_UFENCE_WAIT_OP_LTE: passed = (rvalue & mask) <= (value & mask); break; default: @@ -81,7 +81,7 @@ static int check_hw_engines(struct xe_device *xe, #define VALID_FLAGS (DRM_XE_UFENCE_WAIT_FLAG_SOFT_OP | \ DRM_XE_UFENCE_WAIT_FLAG_ABSTIME) -#define MAX_OP DRM_XE_UFENCE_WAIT_LTE +#define MAX_OP DRM_XE_UFENCE_WAIT_OP_LTE static long to_jiffies_timeout(struct xe_device *xe, struct drm_xe_wait_user_fence *args) diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 236e643be69a..b2bd76efd940 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -915,12 +915,12 @@ struct drm_xe_wait_user_fence { */ __u64 addr; -#define DRM_XE_UFENCE_WAIT_EQ 0 -#define DRM_XE_UFENCE_WAIT_NEQ 1 -#define DRM_XE_UFENCE_WAIT_GT 2 -#define DRM_XE_UFENCE_WAIT_GTE 3 -#define DRM_XE_UFENCE_WAIT_LT 4 -#define DRM_XE_UFENCE_WAIT_LTE 5 +#define DRM_XE_UFENCE_WAIT_OP_EQ 0x0 +#define DRM_XE_UFENCE_WAIT_OP_NEQ 0x1 +#define DRM_XE_UFENCE_WAIT_OP_GT 0x2 +#define DRM_XE_UFENCE_WAIT_OP_GTE 0x3 +#define DRM_XE_UFENCE_WAIT_OP_LT 0x4 +#define DRM_XE_UFENCE_WAIT_OP_LTE 0x5 /** @op: wait operation (type of comparison) */ __u16 op; @@ -935,12 +935,13 @@ struct drm_xe_wait_user_fence { /** @value: compare value */ __u64 value; -#define DRM_XE_UFENCE_WAIT_U8 0xffu -#define DRM_XE_UFENCE_WAIT_U16 0xffffu -#define DRM_XE_UFENCE_WAIT_U32 0xffffffffu -#define DRM_XE_UFENCE_WAIT_U64 0xffffffffffffffffu +#define DRM_XE_UFENCE_WAIT_MASK_U8 0xffu +#define DRM_XE_UFENCE_WAIT_MASK_U16 0xffffu +#define DRM_XE_UFENCE_WAIT_MASK_U32 0xffffffffu +#define DRM_XE_UFENCE_WAIT_MASK_U64 0xffffffffffffffffu /** @mask: comparison mask */ __u64 mask; + /** * @timeout: how long to wait before bailing, value in nanoseconds. * Without DRM_XE_UFENCE_WAIT_FLAG_ABSTIME flag set (relative timeout) -- cgit v1.2.3 From aaa115ffaa467782b01cfa81711424315823bdb5 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 14 Nov 2023 13:34:34 +0000 Subject: drm/xe/uapi: Be more specific about the vm_bind prefetch region Let's bring a bit of clarity on this 'region' field that is part of vm_bind operation struct. Rename and document to make it more than obvious that it is a region instance and not a mask and also that it should only be used with the prefetch operation itself. Signed-off-by: Rodrigo Vivi Signed-off-by: Francois Dugast Reviewed-by: Matt Roper --- drivers/gpu/drm/xe/xe_vm.c | 15 ++++++++------- include/uapi/drm/xe_drm.h | 8 ++++++-- 2 files changed, 14 insertions(+), 9 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index e8dd46789537..174441c4ca5a 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2160,7 +2160,8 @@ static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) static struct drm_gpuva_ops * vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, u64 bo_offset_or_userptr, u64 addr, u64 range, - u32 operation, u32 flags, u8 tile_mask, u32 region) + u32 operation, u32 flags, u8 tile_mask, + u32 prefetch_region) { struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL; struct drm_gpuva_ops *ops; @@ -2215,7 +2216,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, struct xe_vma_op *op = gpuva_op_to_vma_op(__op); op->tile_mask = tile_mask; - op->prefetch.region = region; + op->prefetch.region = prefetch_region; } break; case DRM_XE_VM_BIND_OP_UNMAP_ALL: @@ -2881,7 +2882,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, u32 flags = (*bind_ops)[i].flags; u32 obj = (*bind_ops)[i].obj; u64 obj_offset = (*bind_ops)[i].obj_offset; - u32 region = (*bind_ops)[i].region; + u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance; bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; if (i == 0) { @@ -2915,9 +2916,9 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || XE_IOCTL_DBG(xe, obj && op == DRM_XE_VM_BIND_OP_PREFETCH) || - XE_IOCTL_DBG(xe, region && + XE_IOCTL_DBG(xe, prefetch_region && op != DRM_XE_VM_BIND_OP_PREFETCH) || - XE_IOCTL_DBG(xe, !(BIT(region) & + XE_IOCTL_DBG(xe, !(BIT(prefetch_region) & xe->info.mem_region_mask)) || XE_IOCTL_DBG(xe, obj && op == DRM_XE_VM_BIND_OP_UNMAP)) { @@ -3099,11 +3100,11 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) u32 flags = bind_ops[i].flags; u64 obj_offset = bind_ops[i].obj_offset; u8 tile_mask = bind_ops[i].tile_mask; - u32 region = bind_ops[i].region; + u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance; ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset, addr, range, op, flags, - tile_mask, region); + tile_mask, prefetch_region); if (IS_ERR(ops[i])) { err = PTR_ERR(ops[i]); ops[i] = NULL; diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index b2bd76efd940..88f3aca02b08 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -673,8 +673,12 @@ struct drm_xe_vm_bind_op { /** @flags: Bind flags */ __u32 flags; - /** @mem_region: Memory region to prefetch VMA to, instance not a mask */ - __u32 region; + /** + * @prefetch_mem_region_instance: Memory region to prefetch VMA to. + * It is a region instance, not a mask. + * To be used only with %DRM_XE_VM_BIND_OP_PREFETCH operation. + */ + __u32 prefetch_mem_region_instance; /** @reserved: Reserved */ __u64 reserved[2]; -- cgit v1.2.3 From 32dd40fb48c56265ab08d379fecb8bbf62e3c427 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 15 Nov 2023 10:30:30 -0800 Subject: drm/xe/dg2: Wa_18028616096 now applies to all DG2 The workaround database was just updated to extend this workaround to DG2-G11 (whereas previously it applied only to G10 and G12). Reviewed-by: Gustavo Sousa Link: https://lore.kernel.org/r/20231115183029.2649992-2-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_wa.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index d03e6674519f..6572715dfc09 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -403,12 +403,7 @@ static const struct xe_rtp_entry_sr engine_was[] = { PERF_FIX_BALANCING_CFE_DISABLE)) }, { XE_RTP_NAME("18028616096"), - XE_RTP_RULES(SUBPLATFORM(DG2, G10), - FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, UGM_FRAGMENT_THRESHOLD_TO_3)) - }, - { XE_RTP_NAME("18028616096"), - XE_RTP_RULES(SUBPLATFORM(DG2, G12), + XE_RTP_RULES(PLATFORM(DG2), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, UGM_FRAGMENT_THRESHOLD_TO_3)) }, -- cgit v1.2.3 From 4a9b7d29c117fc6e49690728f35b6a16454556f2 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 16 Nov 2023 16:12:39 +0100 Subject: drm/xe/guc: Fix wrong assert about full_len This variable holds full length of the message, including header length so it should be checked against GUC_CTB_MSG_MAX_LEN. Signed-off-by: Michal Wajdeczko Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_ct.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index a84e111bb36a..c44e75074695 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -419,7 +419,7 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len, full_len = len + GUC_CTB_HDR_LEN; lockdep_assert_held(&ct->lock); - xe_assert(xe, full_len <= (GUC_CTB_MSG_MAX_LEN - GUC_CTB_HDR_LEN)); + xe_assert(xe, full_len <= GUC_CTB_MSG_MAX_LEN); xe_assert(xe, tail <= h2g->info.size); /* Command will wrap, zero fill (NOPs), return and check credits again */ -- cgit v1.2.3 From cd1c9c54c34b3a2540fdf49eafd49a61747a6342 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 16 Nov 2023 16:12:40 +0100 Subject: drm/xe/guc: Copy response data from proper registers While copying GuC response from the scratch registers to the buffer, formula to identify next scratch register is broken. Fix it. Signed-off-by: Michal Wajdeczko Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 311c5d539423..aa4af536c1e9 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -703,7 +703,7 @@ proto: response_buf[0] = header; for (i = 1; i < VF_SW_FLAG_COUNT; i++) { - reply_reg.addr += i * sizeof(u32); + reply_reg.addr += sizeof(u32); response_buf[i] = xe_mmio_read32(gt, reply_reg); } } -- cgit v1.2.3 From 1d087cb7d81f9a17760154eef5ac8b894428cdbe Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 16 Nov 2023 16:12:41 +0100 Subject: drm/xe/guc: Fix handling of GUC_HXG_TYPE_NO_RESPONSE_BUSY If GuC responds with the NO_RESPONSE_BUSY message, we extend our timeout while waiting for the actual response, but we wrongly assumed that the next message will be RESPONSE_SUCCESS, missing that we still can get RESPONSE_FAILURE. Change the condition for the expected message type, using only common bits from RESPONSE_SUCCESS and RESPONSE_FAILURE (as they differ, by ABI design, only by the last bit). v2: add comment/checks to the code (Matt) Signed-off-by: Michal Wajdeczko Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index aa4af536c1e9..56edcb2b0e45 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -659,9 +659,20 @@ timeout: header = xe_mmio_read32(gt, reply_reg); if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) == GUC_HXG_TYPE_NO_RESPONSE_BUSY) { - - ret = xe_mmio_wait32(gt, reply_reg, GUC_HXG_MSG_0_TYPE, - FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_RESPONSE_SUCCESS), + /* + * Once we got a BUSY reply we must wait again for the final + * response but this time we can't use ORIGIN mask anymore. + * To spot a right change in the reply, we take advantage that + * response SUCCESS and FAILURE differ only by the single bit + * and all other bits are set and can be used as a new mask. + */ + u32 resp_bits = GUC_HXG_TYPE_RESPONSE_SUCCESS & GUC_HXG_TYPE_RESPONSE_FAILURE; + u32 resp_mask = FIELD_PREP(GUC_HXG_MSG_0_TYPE, resp_bits); + + BUILD_BUG_ON(FIELD_MAX(GUC_HXG_MSG_0_TYPE) != GUC_HXG_TYPE_RESPONSE_SUCCESS); + BUILD_BUG_ON((GUC_HXG_TYPE_RESPONSE_SUCCESS ^ GUC_HXG_TYPE_RESPONSE_FAILURE) != 1); + + ret = xe_mmio_wait32(gt, reply_reg, resp_mask, resp_mask, 1000000, &header, false); if (unlikely(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, header) != -- cgit v1.2.3 From cac74742faea603b288592be118b4f100ed2c863 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 16 Nov 2023 16:12:42 +0100 Subject: drm/xe/guc: Use valid scratch register for posting read There are only 4 scratch registers VF_SW_FLAG(0..3) on each GuC. We shouldn't use non-existing register VF_SW_FLAG(4) for posting read. Signed-off-by: Michal Wajdeczko Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 56edcb2b0e45..6de2ab05bf4e 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -615,7 +615,7 @@ int xe_guc_mmio_send_recv(struct xe_guc *guc, const u32 *request, u32 header, reply; struct xe_reg reply_reg = xe_gt_is_media_type(gt) ? MED_VF_SW_FLAG(0) : VF_SW_FLAG(0); - const u32 LAST_INDEX = VF_SW_FLAG_COUNT; + const u32 LAST_INDEX = VF_SW_FLAG_COUNT - 1; int ret; int i; -- cgit v1.2.3 From e7b4ebd7c6b3d25557aa83b43c3497e31ac89101 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 9 Oct 2023 10:00:38 +0100 Subject: drm/xe/bo: don't hold dma-resv lock over drm_gem_handle_create MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This seems to create a locking inversion with object_name_lock. The lock is held by drm_prime_fd_to_handle when calling our xe_gem_prime_import hook, which might eventually go on to grab the dma-resv lock during the attach. However we also have the opposite locking order in xe_gem_create_ioctl which is holding the dma-resv lock when calling drm_gem_handle_create, which wants to eventually grab object_name_lock: -> #1 (reservation_ww_class_mutex){+.+.}-{3:3}: <4> [635.739288] lock_acquire+0x169/0x3d0 <4> [635.739294] __ww_mutex_lock.constprop.0+0x164/0x1e60 <4> [635.739300] ww_mutex_lock_interruptible+0x42/0x1a0 <4> [635.739305] drm_gem_shmem_pin+0x4b/0x140 [drm_shmem_helper] <4> [635.739317] dma_buf_dynamic_attach+0x101/0x430 <4> [635.739323] xe_gem_prime_import+0xcc/0x2e0 [xe] <4> [635.739499] drm_prime_fd_to_handle_ioctl+0x184/0x2e0 [drm] <4> [635.739594] drm_ioctl_kernel+0x16f/0x250 [drm] <4> [635.739693] drm_ioctl+0x35e/0x620 [drm] <4> [635.739789] __x64_sys_ioctl+0xb7/0xf0 <4> [635.739794] do_syscall_64+0x3c/0x90 <4> [635.739799] entry_SYSCALL_64_after_hwframe+0x6e/0xd8 <4> [635.739805] -> #0 (&dev->object_name_lock){+.+.}-{3:3}: <4> [635.739813] check_prev_add+0x1ba/0x14a0 <4> [635.739818] __lock_acquire+0x203e/0x2ff0 <4> [635.739823] lock_acquire+0x169/0x3d0 <4> [635.739827] __mutex_lock+0x124/0x1310 <4> [635.739832] drm_gem_handle_create+0x32/0x50 [drm] <4> [635.739927] xe_gem_create_ioctl+0x1d3/0x550 [xe] <4> [635.740102] drm_ioctl_kernel+0x16f/0x250 [drm] <4> [635.740197] drm_ioctl+0x35e/0x620 [drm] <4> [635.740293] __x64_sys_ioctl+0xb7/0xf0 <4> [635.740297] do_syscall_64+0x3c/0x90 <4> [635.740302] entry_SYSCALL_64_after_hwframe+0x6e/0xd8 <4> [635.740307] It looks like it should be safe to simply drop the dma-resv lock prior to publishing the object when calling drm_gem_handle_create. Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/743 Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Rodrigo Vivi Reviewed-by: Thomas Hellström Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index e8c89b6e06dc..835eab643263 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1856,14 +1856,16 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, !vm)) return -ENOENT; err = xe_vm_lock(vm, true); - if (err) { - xe_vm_put(vm); - return err; - } + if (err) + goto out_vm; } bo = xe_bo_create(xe, NULL, vm, args->size, ttm_bo_type_device, bo_flags); + + if (vm) + xe_vm_unlock(vm); + if (IS_ERR(bo)) { err = PTR_ERR(bo); goto out_vm; @@ -1877,15 +1879,17 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, goto out_put; out_bulk: - if (vm && !xe_vm_in_fault_mode(vm)) + if (vm && !xe_vm_in_fault_mode(vm)) { + xe_vm_lock(vm, false); __xe_bo_unset_bulk_move(bo); + xe_vm_unlock(vm); + } out_put: xe_bo_put(bo); out_vm: - if (vm) { - xe_vm_unlock(vm); + if (vm) xe_vm_put(vm); - } + return err; } -- cgit v1.2.3 From 8735f8616d65816fd80a4958e570d8f448a6590f Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 17 Nov 2023 09:40:47 -0800 Subject: drm/xe: Fold GEN11_MOCS_ENTRIES into gen12_mocs_desc GEN11_MOCS_ENTRIES dates back from importing the table from the i915 module. The macro was used so the it could be maintained in a single place and platforms would just override with additional entries. With the platforms supported by xe, each of them is just defining individual tables without re-using this define. Move it inside gen12_mocs_desc that is the only user. Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20231117174049.527192-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_mocs.c | 228 +++++++++++++++++++++---------------------- 1 file changed, 112 insertions(+), 116 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index 46e999257862..a072fbccc693 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -149,123 +149,87 @@ struct xe_mocs_info { * For TGL/RKL, all the unspecified MOCS indexes are mapped to L3 UC. */ -#define GEN11_MOCS_ENTRIES \ - /* Entries 0 and 1 are defined per-platform */ \ - /* Base - L3 + LLC */ \ - MOCS_ENTRY(2, \ - LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ - L3_3_WB), \ - /* Base - Uncached */ \ - MOCS_ENTRY(3, \ - LE_1_UC | LE_TC_1_LLC, \ - L3_1_UC), \ - /* Base - L3 */ \ - MOCS_ENTRY(4, \ - LE_1_UC | LE_TC_1_LLC, \ - L3_3_WB), \ - /* Base - LLC */ \ - MOCS_ENTRY(5, \ - LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ - L3_1_UC), \ - /* Age 0 - LLC */ \ - MOCS_ENTRY(6, \ - LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \ - L3_1_UC), \ - /* Age 0 - L3 + LLC */ \ - MOCS_ENTRY(7, \ - LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \ - L3_3_WB), \ - /* Age: Don't Chg. - LLC */ \ - MOCS_ENTRY(8, \ - LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \ - L3_1_UC), \ - /* Age: Don't Chg. - L3 + LLC */ \ - MOCS_ENTRY(9, \ - LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \ - L3_3_WB), \ - /* No AOM - LLC */ \ - MOCS_ENTRY(10, \ - LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \ - L3_1_UC), \ - /* No AOM - L3 + LLC */ \ - MOCS_ENTRY(11, \ - LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \ - L3_3_WB), \ - /* No AOM; Age 0 - LLC */ \ - MOCS_ENTRY(12, \ - LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \ - L3_1_UC), \ - /* No AOM; Age 0 - L3 + LLC */ \ - MOCS_ENTRY(13, \ - LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \ - L3_3_WB), \ - /* No AOM; Age:DC - LLC */ \ - MOCS_ENTRY(14, \ - LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \ - L3_1_UC), \ - /* No AOM; Age:DC - L3 + LLC */ \ - MOCS_ENTRY(15, \ - LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \ - L3_3_WB), \ - /* Self-Snoop - L3 + LLC */ \ - MOCS_ENTRY(18, \ - LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3), \ - L3_3_WB), \ - /* Skip Caching - L3 + LLC(12.5%) */ \ - MOCS_ENTRY(19, \ - LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(7), \ - L3_3_WB), \ - /* Skip Caching - L3 + LLC(25%) */ \ - MOCS_ENTRY(20, \ - LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(3), \ - L3_3_WB), \ - /* Skip Caching - L3 + LLC(50%) */ \ - MOCS_ENTRY(21, \ - LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(1), \ - L3_3_WB), \ - /* Skip Caching - L3 + LLC(75%) */ \ - MOCS_ENTRY(22, \ - LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(3), \ - L3_3_WB), \ - /* Skip Caching - L3 + LLC(87.5%) */ \ - MOCS_ENTRY(23, \ - LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(7), \ - L3_3_WB), \ - /* HW Reserved - SW program but never use */ \ - MOCS_ENTRY(62, \ - LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ - L3_1_UC), \ - /* HW Reserved - SW program but never use */ \ - MOCS_ENTRY(63, \ - LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ - L3_1_UC) - -static const struct xe_mocs_entry dg1_mocs_desc[] = { - /* UC */ - MOCS_ENTRY(1, 0, L3_1_UC), - /* WB - L3 */ - MOCS_ENTRY(5, 0, L3_3_WB), - /* WB - L3 50% */ - MOCS_ENTRY(6, 0, L3_ESC(1) | L3_SCC(1) | L3_3_WB), - /* WB - L3 25% */ - MOCS_ENTRY(7, 0, L3_ESC(1) | L3_SCC(3) | L3_3_WB), - /* WB - L3 12.5% */ - MOCS_ENTRY(8, 0, L3_ESC(1) | L3_SCC(7) | L3_3_WB), - - /* HDC:L1 + L3 */ - MOCS_ENTRY(48, 0, L3_3_WB), - /* HDC:L1 */ - MOCS_ENTRY(49, 0, L3_1_UC), - - /* HW Reserved */ - MOCS_ENTRY(60, 0, L3_1_UC), - MOCS_ENTRY(61, 0, L3_1_UC), - MOCS_ENTRY(62, 0, L3_1_UC), - MOCS_ENTRY(63, 0, L3_1_UC), -}; - static const struct xe_mocs_entry gen12_mocs_desc[] = { - GEN11_MOCS_ENTRIES, + /* Base - L3 + LLC */ + MOCS_ENTRY(2, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + /* Base - Uncached */ + MOCS_ENTRY(3, + LE_1_UC | LE_TC_1_LLC, + L3_1_UC), + /* Base - L3 */ + MOCS_ENTRY(4, + LE_1_UC | LE_TC_1_LLC, + L3_3_WB), + /* Base - LLC */ + MOCS_ENTRY(5, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_1_UC), + /* Age 0 - LLC */ + MOCS_ENTRY(6, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), + L3_1_UC), + /* Age 0 - L3 + LLC */ + MOCS_ENTRY(7, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), + L3_3_WB), + /* Age: Don't Chg. - LLC */ + MOCS_ENTRY(8, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), + L3_1_UC), + /* Age: Don't Chg. - L3 + LLC */ + MOCS_ENTRY(9, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), + L3_3_WB), + /* No AOM - LLC */ + MOCS_ENTRY(10, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), + L3_1_UC), + /* No AOM - L3 + LLC */ + MOCS_ENTRY(11, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), + L3_3_WB), + /* No AOM; Age 0 - LLC */ + MOCS_ENTRY(12, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), + L3_1_UC), + /* No AOM; Age 0 - L3 + LLC */ + MOCS_ENTRY(13, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), + L3_3_WB), + /* No AOM; Age:DC - LLC */ + MOCS_ENTRY(14, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), + L3_1_UC), + /* No AOM; Age:DC - L3 + LLC */ + MOCS_ENTRY(15, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), + L3_3_WB), + /* Self-Snoop - L3 + LLC */ + MOCS_ENTRY(18, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3), + L3_3_WB), + /* Skip Caching - L3 + LLC(12.5%) */ + MOCS_ENTRY(19, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(7), + L3_3_WB), + /* Skip Caching - L3 + LLC(25%) */ + MOCS_ENTRY(20, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(3), + L3_3_WB), + /* Skip Caching - L3 + LLC(50%) */ + MOCS_ENTRY(21, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(1), + L3_3_WB), + /* Skip Caching - L3 + LLC(75%) */ + MOCS_ENTRY(22, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(3), + L3_3_WB), + /* Skip Caching - L3 + LLC(87.5%) */ + MOCS_ENTRY(23, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(7), + L3_3_WB), /* Implicitly enable L1 - HDC:L1 + L3 + LLC */ MOCS_ENTRY(48, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), @@ -290,6 +254,38 @@ static const struct xe_mocs_entry gen12_mocs_desc[] = { MOCS_ENTRY(61, LE_1_UC | LE_TC_1_LLC, L3_3_WB), + /* HW Reserved - SW program but never use */ + MOCS_ENTRY(62, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_1_UC), + /* HW Reserved - SW program but never use */ + MOCS_ENTRY(63, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_1_UC) +}; + +static const struct xe_mocs_entry dg1_mocs_desc[] = { + /* UC */ + MOCS_ENTRY(1, 0, L3_1_UC), + /* WB - L3 */ + MOCS_ENTRY(5, 0, L3_3_WB), + /* WB - L3 50% */ + MOCS_ENTRY(6, 0, L3_ESC(1) | L3_SCC(1) | L3_3_WB), + /* WB - L3 25% */ + MOCS_ENTRY(7, 0, L3_ESC(1) | L3_SCC(3) | L3_3_WB), + /* WB - L3 12.5% */ + MOCS_ENTRY(8, 0, L3_ESC(1) | L3_SCC(7) | L3_3_WB), + + /* HDC:L1 + L3 */ + MOCS_ENTRY(48, 0, L3_3_WB), + /* HDC:L1 */ + MOCS_ENTRY(49, 0, L3_1_UC), + + /* HW Reserved */ + MOCS_ENTRY(60, 0, L3_1_UC), + MOCS_ENTRY(61, 0, L3_1_UC), + MOCS_ENTRY(62, 0, L3_1_UC), + MOCS_ENTRY(63, 0, L3_1_UC), }; static const struct xe_mocs_entry dg2_mocs_desc[] = { -- cgit v1.2.3 From 4399e95102edfceb7a7dd7eb72cd27b776e7d38b Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 17 Nov 2023 09:40:48 -0800 Subject: drm/xe/mocs: Bring comment about mocs back to reality The mocs documentation was copied from i915 and doesn't match the reality in xe. Reword it so it matches what the code is doing. Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20231117174049.527192-2-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_mocs.c | 32 +++++++++++--------------------- 1 file changed, 11 insertions(+), 21 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index a072fbccc693..2d380053b477 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -123,30 +123,20 @@ struct xe_mocs_info { * * These tables are intended to be kept reasonably consistent across * HW platforms, and for ICL+, be identical across OSes. To achieve - * that, for Icelake and above, list of entries is published as part - * of bspec. + * that, the list of entries is published as part of bspec. * - * Entries not part of the following tables are undefined as far as - * userspace is concerned and shouldn't be relied upon. For Gen < 12 - * they will be initialized to PTE. Gen >= 12 don't have a setting for - * PTE and those platforms except TGL/RKL will be initialized L3 WB to - * catch accidental use of reserved and unused mocs indexes. + * Entries not part of the following tables are undefined as far as userspace is + * concerned and shouldn't be relied upon. The last few entries are reserved by + * the hardware. They should be initialized according to bspec and never used. * - * The last few entries are reserved by the hardware. For ICL+ they - * should be initialized according to bspec and never used, for older - * platforms they should never be written to. + * NOTE1: These tables are part of bspec and defined as part of the hardware + * interface. It is expected that, for specific hardware platform, existing + * entries will remain constant and the table will only be updated by adding new + * entries, filling unused positions. * - * NOTE1: These tables are part of bspec and defined as part of hardware - * interface for ICL+. For older platforms, they are part of kernel - * ABI. It is expected that, for specific hardware platform, existing - * entries will remain constant and the table will only be updated by - * adding new entries, filling unused positions. - * - * NOTE2: For GEN >= 12 except TGL and RKL, reserved and unspecified MOCS - * indices have been set to L3 WB. These reserved entries should never - * be used, they may be changed to low performant variants with better - * coherency in the future if more entries are needed. - * For TGL/RKL, all the unspecified MOCS indexes are mapped to L3 UC. + * NOTE2: Reserved and unspecified MOCS indices have been set to L3 WB. These + * reserved entries should never be used. They may be changed to low performant + * variants with better coherency in the future if more entries are needed. */ static const struct xe_mocs_entry gen12_mocs_desc[] = { -- cgit v1.2.3 From 0bc519d20ffa7a450bfa21c644c2de95ae8027dc Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 17 Nov 2023 09:40:49 -0800 Subject: drm/xe: Remove GEN[0-9]*_ prefixes After noticing in logs there were still mentions to GEN6 registers, it was clear commit d9b79ad275e7 ("drm/xe: Drop gen afixes from registers") didn't take care of all the afixes. Some were added later, but there are also constants and strings still using that. Continue the cleanup removing the remaining ones. To keep it consistent with code nearby, a few other changes are made: - Remove prefix in INTEL_LEGACY_64B_CONTEXT - Remove GEN8_CTX_L3LLC_COHERENT since it's unused - Rename GEN9_FREQ_SCALER to GT_FREQUENCY_SCALER v2: Use XELP_ as prefix for NUM_MOCS_ENTRIES and remove changes to MOCS_ENTRIES as this is now done as part of a previous commit (Matt Roper) Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20231117174049.527192-3-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_execlist.c | 16 ++++++++-------- drivers/gpu/drm/xe/xe_gt.c | 2 +- drivers/gpu/drm/xe/xe_guc.c | 2 +- drivers/gpu/drm/xe/xe_guc_pc.c | 18 +++++++++--------- drivers/gpu/drm/xe/xe_lrc.c | 23 +++++++++++------------ drivers/gpu/drm/xe/xe_mmio.c | 14 +++++++------- drivers/gpu/drm/xe/xe_mmio.h | 2 +- drivers/gpu/drm/xe/xe_mocs.c | 8 ++++---- drivers/gpu/drm/xe/xe_pt.c | 2 +- drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 2 +- drivers/gpu/drm/xe/xe_wopcm.c | 15 ++++++++------- 11 files changed, 52 insertions(+), 52 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index 1541fb64949c..d82b50de144e 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -28,14 +28,14 @@ #define XE_EXECLIST_HANG_LIMIT 1 -#define GEN11_SW_CTX_ID_SHIFT 37 -#define GEN11_SW_CTX_ID_WIDTH 11 +#define SW_CTX_ID_SHIFT 37 +#define SW_CTX_ID_WIDTH 11 #define XEHP_SW_CTX_ID_SHIFT 39 #define XEHP_SW_CTX_ID_WIDTH 16 -#define GEN11_SW_CTX_ID \ - GENMASK_ULL(GEN11_SW_CTX_ID_WIDTH + GEN11_SW_CTX_ID_SHIFT - 1, \ - GEN11_SW_CTX_ID_SHIFT) +#define SW_CTX_ID \ + GENMASK_ULL(SW_CTX_ID_WIDTH + SW_CTX_ID_SHIFT - 1, \ + SW_CTX_ID_SHIFT) #define XEHP_SW_CTX_ID \ GENMASK_ULL(XEHP_SW_CTX_ID_WIDTH + XEHP_SW_CTX_ID_SHIFT - 1, \ @@ -55,8 +55,8 @@ static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc, xe_gt_assert(hwe->gt, FIELD_FIT(XEHP_SW_CTX_ID, ctx_id)); lrc_desc |= FIELD_PREP(XEHP_SW_CTX_ID, ctx_id); } else { - xe_gt_assert(hwe->gt, FIELD_FIT(GEN11_SW_CTX_ID, ctx_id)); - lrc_desc |= FIELD_PREP(GEN11_SW_CTX_ID, ctx_id); + xe_gt_assert(hwe->gt, FIELD_FIT(SW_CTX_ID, ctx_id)); + lrc_desc |= FIELD_PREP(SW_CTX_ID, ctx_id); } if (hwe->class == XE_ENGINE_CLASS_COMPUTE) @@ -96,7 +96,7 @@ static void __xe_execlist_port_start(struct xe_execlist_port *port, struct xe_execlist_exec_queue *exl) { struct xe_device *xe = gt_to_xe(port->hwe->gt); - int max_ctx = FIELD_MAX(GEN11_SW_CTX_ID); + int max_ctx = FIELD_MAX(SW_CTX_ID); if (GRAPHICS_VERx100(xe) >= 1250) max_ctx = FIELD_MAX(XEHP_SW_CTX_ID); diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 53b39fe91601..0dddb751c6a4 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -515,7 +515,7 @@ static int do_gt_reset(struct xe_gt *gt) xe_mmio_write32(gt, GDRST, GRDOM_FULL); err = xe_mmio_wait32(gt, GDRST, GRDOM_FULL, 0, 5000, NULL, false); if (err) - xe_gt_err(gt, "failed to clear GEN11_GRDOM_FULL (%pe)\n", + xe_gt_err(gt, "failed to clear GRDOM_FULL (%pe)\n", ERR_PTR(err)); return err; diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 6de2ab05bf4e..08142d8ee052 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -301,7 +301,7 @@ int xe_guc_reset(struct xe_guc *guc) ret = xe_mmio_wait32(gt, GDRST, GRDOM_GUC, 0, 5000, &gdrst, false); if (ret) { - drm_err(&xe->drm, "GuC reset timed out, GEN6_GDRST=0x%8x\n", + drm_err(&xe->drm, "GuC reset timed out, GDRST=0x%8x\n", gdrst); goto err_out; } diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index f4ac76d6b2dd..e9dd6c3d750b 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -23,19 +23,19 @@ #define MCHBAR_MIRROR_BASE_SNB 0x140000 -#define GEN6_RP_STATE_CAP XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x5998) +#define RP_STATE_CAP XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x5998) #define RP0_MASK REG_GENMASK(7, 0) #define RP1_MASK REG_GENMASK(15, 8) #define RPN_MASK REG_GENMASK(23, 16) -#define GEN10_FREQ_INFO_REC XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x5ef0) +#define FREQ_INFO_REC XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x5ef0) #define RPE_MASK REG_GENMASK(15, 8) #define GT_PERF_STATUS XE_REG(0x1381b4) -#define GEN12_CAGF_MASK REG_GENMASK(19, 11) +#define CAGF_MASK REG_GENMASK(19, 11) #define GT_FREQUENCY_MULTIPLIER 50 -#define GEN9_FREQ_SCALER 3 +#define GT_FREQUENCY_SCALER 3 /** * DOC: GuC Power Conservation (PC) @@ -244,12 +244,12 @@ static int pc_action_setup_gucrc(struct xe_guc_pc *pc, u32 mode) static u32 decode_freq(u32 raw) { return DIV_ROUND_CLOSEST(raw * GT_FREQUENCY_MULTIPLIER, - GEN9_FREQ_SCALER); + GT_FREQUENCY_SCALER); } static u32 encode_freq(u32 freq) { - return DIV_ROUND_CLOSEST(freq * GEN9_FREQ_SCALER, + return DIV_ROUND_CLOSEST(freq * GT_FREQUENCY_SCALER, GT_FREQUENCY_MULTIPLIER); } @@ -362,7 +362,7 @@ static void tgl_update_rpe_value(struct xe_guc_pc *pc) if (xe->info.platform == XE_PVC) reg = xe_mmio_read32(gt, PVC_RP_STATE_CAP); else - reg = xe_mmio_read32(gt, GEN10_FREQ_INFO_REC); + reg = xe_mmio_read32(gt, FREQ_INFO_REC); pc->rpe_freq = REG_FIELD_GET(RPE_MASK, reg) * GT_FREQUENCY_MULTIPLIER; } @@ -402,7 +402,7 @@ static ssize_t freq_act_show(struct device *dev, freq = REG_FIELD_GET(MTL_CAGF_MASK, freq); } else { freq = xe_mmio_read32(gt, GT_PERF_STATUS); - freq = REG_FIELD_GET(GEN12_CAGF_MASK, freq); + freq = REG_FIELD_GET(CAGF_MASK, freq); } ret = sysfs_emit(buf, "%d\n", decode_freq(freq)); @@ -702,7 +702,7 @@ static void tgl_init_fused_rp_values(struct xe_guc_pc *pc) if (xe->info.platform == XE_PVC) reg = xe_mmio_read32(gt, PVC_RP_STATE_CAP); else - reg = xe_mmio_read32(gt, GEN6_RP_STATE_CAP); + reg = xe_mmio_read32(gt, RP_STATE_CAP); pc->rp0_freq = REG_FIELD_GET(RP0_MASK, reg) * GT_FREQUENCY_MULTIPLIER; pc->rpn_freq = REG_FIELD_GET(RPN_MASK, reg) * GT_FREQUENCY_MULTIPLIER; } diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index f8754f061599..17c0eb9e62cf 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -23,14 +23,13 @@ #include "xe_map.h" #include "xe_vm.h" -#define GEN8_CTX_VALID (1 << 0) -#define GEN8_CTX_L3LLC_COHERENT (1 << 5) -#define GEN8_CTX_PRIVILEGE (1 << 8) -#define GEN8_CTX_ADDRESSING_MODE_SHIFT 3 -#define INTEL_LEGACY_64B_CONTEXT 3 +#define CTX_VALID (1 << 0) +#define CTX_PRIVILEGE (1 << 8) +#define CTX_ADDRESSING_MODE_SHIFT 3 +#define LEGACY_64B_CONTEXT 3 -#define GEN11_ENGINE_CLASS_SHIFT 61 -#define GEN11_ENGINE_INSTANCE_SHIFT 48 +#define ENGINE_CLASS_SHIFT 61 +#define ENGINE_INSTANCE_SHIFT 48 static struct xe_device * lrc_to_xe(struct xe_lrc *lrc) @@ -765,19 +764,19 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, (q->usm.acc_notify << ACC_NOTIFY_S) | q->usm.acc_trigger); - lrc->desc = GEN8_CTX_VALID; - lrc->desc |= INTEL_LEGACY_64B_CONTEXT << GEN8_CTX_ADDRESSING_MODE_SHIFT; + lrc->desc = CTX_VALID; + lrc->desc |= LEGACY_64B_CONTEXT << CTX_ADDRESSING_MODE_SHIFT; /* TODO: Priority */ /* While this appears to have something about privileged batches or * some such, it really just means PPGTT mode. */ if (vm) - lrc->desc |= GEN8_CTX_PRIVILEGE; + lrc->desc |= CTX_PRIVILEGE; if (GRAPHICS_VERx100(xe) < 1250) { - lrc->desc |= (u64)hwe->instance << GEN11_ENGINE_INSTANCE_SHIFT; - lrc->desc |= (u64)hwe->class << GEN11_ENGINE_CLASS_SHIFT; + lrc->desc |= (u64)hwe->instance << ENGINE_INSTANCE_SHIFT; + lrc->desc |= (u64)hwe->class << ENGINE_CLASS_SHIFT; } arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE; diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 0baaef53f3a7..5b24c592dde5 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -84,8 +84,8 @@ static void xe_resize_vram_bar(struct xe_device *xe) int i; /* gather some relevant info */ - current_size = pci_resource_len(pdev, GEN12_LMEM_BAR); - bar_size_mask = pci_rebar_get_possible_sizes(pdev, GEN12_LMEM_BAR); + current_size = pci_resource_len(pdev, LMEM_BAR); + bar_size_mask = pci_rebar_get_possible_sizes(pdev, LMEM_BAR); if (!bar_size_mask) return; @@ -137,7 +137,7 @@ static void xe_resize_vram_bar(struct xe_device *xe) pci_read_config_dword(pdev, PCI_COMMAND, &pci_cmd); pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd & ~PCI_COMMAND_MEMORY); - _resize_bar(xe, GEN12_LMEM_BAR, rebar_size); + _resize_bar(xe, LMEM_BAR, rebar_size); pci_assign_unassigned_bus_resources(pdev->bus); pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd); @@ -161,15 +161,15 @@ static int xe_determine_lmem_bar_size(struct xe_device *xe) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); - if (!xe_pci_resource_valid(pdev, GEN12_LMEM_BAR)) { + if (!xe_pci_resource_valid(pdev, LMEM_BAR)) { drm_err(&xe->drm, "pci resource is not valid\n"); return -ENXIO; } xe_resize_vram_bar(xe); - xe->mem.vram.io_start = pci_resource_start(pdev, GEN12_LMEM_BAR); - xe->mem.vram.io_size = pci_resource_len(pdev, GEN12_LMEM_BAR); + xe->mem.vram.io_start = pci_resource_start(pdev, LMEM_BAR); + xe->mem.vram.io_size = pci_resource_len(pdev, LMEM_BAR); if (!xe->mem.vram.io_size) return -EIO; @@ -216,7 +216,7 @@ static int xe_mmio_tile_vram_size(struct xe_tile *tile, u64 *vram_size, /* actual size */ if (unlikely(xe->info.platform == XE_DG1)) { - *tile_size = pci_resource_len(to_pci_dev(xe->drm.dev), GEN12_LMEM_BAR); + *tile_size = pci_resource_len(to_pci_dev(xe->drm.dev), LMEM_BAR); *tile_offset = 0; } else { reg = xe_gt_mcr_unicast_read_any(gt, XEHP_TILE_ADDR_RANGE(gt->info.id)); diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h index b244e9063caa..afc503c97eed 100644 --- a/drivers/gpu/drm/xe/xe_mmio.h +++ b/drivers/gpu/drm/xe/xe_mmio.h @@ -18,7 +18,7 @@ struct drm_device; struct drm_file; struct xe_device; -#define GEN12_LMEM_BAR 2 +#define LMEM_BAR 2 int xe_mmio_init(struct xe_device *xe); diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index 2d380053b477..cbb1619f17e0 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -69,7 +69,7 @@ struct xe_mocs_info { #define L4_CACHE_POLICY_MASK REG_GENMASK(3, 2) /* Helper defines */ -#define GEN9_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */ +#define XELP_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */ #define PVC_NUM_MOCS_ENTRIES 3 #define MTL_NUM_MOCS_ENTRIES 16 #define XE2_NUM_MOCS_ENTRIES 16 @@ -419,14 +419,14 @@ static unsigned int get_mocs_settings(struct xe_device *xe, info->table = dg2_mocs_desc; } info->uc_index = 1; - info->n_entries = GEN9_NUM_MOCS_ENTRIES; + info->n_entries = XELP_NUM_MOCS_ENTRIES; info->unused_entries_index = 3; break; case XE_DG1: info->size = ARRAY_SIZE(dg1_mocs_desc); info->table = dg1_mocs_desc; info->uc_index = 1; - info->n_entries = GEN9_NUM_MOCS_ENTRIES; + info->n_entries = XELP_NUM_MOCS_ENTRIES; info->unused_entries_index = 5; break; case XE_TIGERLAKE: @@ -436,7 +436,7 @@ static unsigned int get_mocs_settings(struct xe_device *xe, case XE_ALDERLAKE_N: info->size = ARRAY_SIZE(gen12_mocs_desc); info->table = gen12_mocs_desc; - info->n_entries = GEN9_NUM_MOCS_ENTRIES; + info->n_entries = XELP_NUM_MOCS_ENTRIES; info->uc_index = 3; info->unused_entries_index = 2; break; diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index ab08e4644529..55c37f11ddb4 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -532,7 +532,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, pte |= xe_walk->default_pte; /* - * Set the GEN12_PTE_PS64 hint if possible, otherwise if + * Set the XE_PTE_PS64 hint if possible, otherwise if * this device *requires* 64K PTE size for VRAM, fail. */ if (level == 0 && !xe_parent->is_compact) { diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index 837b522cb91f..d2b00d0bf1e2 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -74,7 +74,7 @@ static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) stolen_size = tile_size - mgr->stolen_base; /* Verify usage fits in the actual resource available */ - if (mgr->stolen_base + stolen_size <= pci_resource_len(pdev, GEN12_LMEM_BAR)) + if (mgr->stolen_base + stolen_size <= pci_resource_len(pdev, LMEM_BAR)) mgr->io_base = tile->mem.vram.io_start + mgr->stolen_base; /* diff --git a/drivers/gpu/drm/xe/xe_wopcm.c b/drivers/gpu/drm/xe/xe_wopcm.c index bf85d4fa56cc..d3a99157e523 100644 --- a/drivers/gpu/drm/xe/xe_wopcm.c +++ b/drivers/gpu/drm/xe/xe_wopcm.c @@ -51,9 +51,10 @@ #define DGFX_WOPCM_SIZE SZ_4M /* FIXME: Larger size require for MTL, do a proper probe sooner or later */ #define MTL_WOPCM_SIZE SZ_4M -#define GEN11_WOPCM_SIZE SZ_2M +#define WOPCM_SIZE SZ_2M + +#define MAX_WOPCM_SIZE SZ_8M -#define GEN12_MAX_WOPCM_SIZE SZ_8M /* 16KB WOPCM (RSVD WOPCM) is reserved from HuC firmware top. */ #define WOPCM_RESERVED_SIZE SZ_16K @@ -65,8 +66,8 @@ /* GuC WOPCM Offset value needs to be aligned to 16KB. */ #define GUC_WOPCM_OFFSET_ALIGNMENT (1UL << GUC_WOPCM_OFFSET_SHIFT) -/* 36KB WOPCM reserved at the end of WOPCM on GEN11. */ -#define GEN11_WOPCM_HW_CTX_RESERVED (SZ_32K + SZ_4K) +/* 36KB WOPCM reserved at the end of WOPCM */ +#define WOPCM_HW_CTX_RESERVED (SZ_32K + SZ_4K) static inline struct xe_gt *wopcm_to_gt(struct xe_wopcm *wopcm) { @@ -80,7 +81,7 @@ static inline struct xe_device *wopcm_to_xe(struct xe_wopcm *wopcm) static u32 context_reserved_size(void) { - return GEN11_WOPCM_HW_CTX_RESERVED; + return WOPCM_HW_CTX_RESERVED; } static bool __check_layout(struct xe_device *xe, u32 wopcm_size, @@ -180,7 +181,7 @@ u32 xe_wopcm_size(struct xe_device *xe) { return IS_DGFX(xe) ? DGFX_WOPCM_SIZE : xe->info.platform == XE_METEORLAKE ? MTL_WOPCM_SIZE : - GEN11_WOPCM_SIZE; + WOPCM_SIZE; } /** @@ -225,7 +226,7 @@ int xe_wopcm_init(struct xe_wopcm *wopcm) * BIOS/IFWI, check against the max allowed wopcm size to * validate if the programmed values align to the wopcm layout. */ - wopcm->size = GEN12_MAX_WOPCM_SIZE; + wopcm->size = MAX_WOPCM_SIZE; goto check; } -- cgit v1.2.3 From 40709aa761acbc78fe6c0405720d79cbf8345095 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 20 Nov 2023 12:08:48 -0800 Subject: drm/xe: Only set xe_vma_op.map fields for GPUVA map operations DRM_XE_VM_BIND_OP_MAP_* IOCTL operations can result in GPUVA unmap, remap, or map operations in vm_bind_ioctl_ops_create. The xe_vma_op.map fields are blindly set which is incorrect for GPUVA unmap or remap operations. Fix this by only setting xe_vma_op.map for GPUVA map operations. Also restructure a bit vm_bind_ioctl_ops_create to make the code a bit more readable. Reported-by: Dafna Hirschfeld Signed-off-by: Matthew Brost Reviewed-by: Brian Welty Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 59 ++++++++++++++-------------------------------- 1 file changed, 18 insertions(+), 41 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 174441c4ca5a..1b4d340d0604 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2182,42 +2182,12 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, case DRM_XE_VM_BIND_OP_MAP_USERPTR: ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, addr, range, obj, bo_offset_or_userptr); - if (IS_ERR(ops)) - return ops; - - drm_gpuva_for_each_op(__op, ops) { - struct xe_vma_op *op = gpuva_op_to_vma_op(__op); - - op->tile_mask = tile_mask; - op->map.immediate = - flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE; - op->map.read_only = - flags & DRM_XE_VM_BIND_FLAG_READONLY; - op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; - } break; case DRM_XE_VM_BIND_OP_UNMAP: ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range); - if (IS_ERR(ops)) - return ops; - - drm_gpuva_for_each_op(__op, ops) { - struct xe_vma_op *op = gpuva_op_to_vma_op(__op); - - op->tile_mask = tile_mask; - } break; case DRM_XE_VM_BIND_OP_PREFETCH: ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range); - if (IS_ERR(ops)) - return ops; - - drm_gpuva_for_each_op(__op, ops) { - struct xe_vma_op *op = gpuva_op_to_vma_op(__op); - - op->tile_mask = tile_mask; - op->prefetch.region = prefetch_region; - } break; case DRM_XE_VM_BIND_OP_UNMAP_ALL: xe_assert(vm->xe, bo); @@ -2233,19 +2203,13 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, ops = drm_gpuvm_bo_unmap_ops_create(vm_bo); drm_gpuvm_bo_put(vm_bo); xe_bo_unlock(bo); - if (IS_ERR(ops)) - return ops; - - drm_gpuva_for_each_op(__op, ops) { - struct xe_vma_op *op = gpuva_op_to_vma_op(__op); - - op->tile_mask = tile_mask; - } break; default: drm_warn(&vm->xe->drm, "NOT POSSIBLE"); ops = ERR_PTR(-EINVAL); } + if (IS_ERR(ops)) + return ops; #ifdef TEST_VM_ASYNC_OPS_ERROR if (operation & FORCE_ASYNC_OP_ERROR) { @@ -2256,9 +2220,22 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, } #endif - if (!IS_ERR(ops)) - drm_gpuva_for_each_op(__op, ops) - print_op(vm->xe, __op); + drm_gpuva_for_each_op(__op, ops) { + struct xe_vma_op *op = gpuva_op_to_vma_op(__op); + + op->tile_mask = tile_mask; + if (__op->op == DRM_GPUVA_OP_MAP) { + op->map.immediate = + flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE; + op->map.read_only = + flags & DRM_XE_VM_BIND_FLAG_READONLY; + op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; + } else if (__op->op == DRM_GPUVA_OP_PREFETCH) { + op->prefetch.region = prefetch_region; + } + + print_op(vm->xe, __op); + } return ops; } -- cgit v1.2.3 From 1d425066f15faa6965fa6361da4c52e4020fd8d0 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 20 Nov 2023 14:19:04 -0800 Subject: drm/xe: Fix modpost warning on kunit modules When built with W=1, the following warnings show up on modpost: MODPOST drivers/gpu/drm/xe/Module.symvers WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/gpu/drm/xe/tests/xe_bo_test.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/gpu/drm/xe/tests/xe_dma_buf_test.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/gpu/drm/xe/tests/xe_migrate_test.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/gpu/drm/xe/tests/xe_pci_test.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/gpu/drm/xe/tests/xe_rtp_test.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/gpu/drm/xe/tests/xe_wa_test.o Add the module description for each of these to fix the warning. Reviewed-by: Gustavo Sousa Link: https://lore.kernel.org/r/20231120221904.695630-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_bo_test.c | 1 + drivers/gpu/drm/xe/tests/xe_dma_buf_test.c | 1 + drivers/gpu/drm/xe/tests/xe_migrate_test.c | 1 + drivers/gpu/drm/xe/tests/xe_pci_test.c | 2 +- drivers/gpu/drm/xe/tests/xe_rtp_test.c | 1 + drivers/gpu/drm/xe/tests/xe_wa_test.c | 1 + 6 files changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_bo_test.c b/drivers/gpu/drm/xe/tests/xe_bo_test.c index 1c868e3635bc..f408f17f2164 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo_test.c +++ b/drivers/gpu/drm/xe/tests/xe_bo_test.c @@ -22,4 +22,5 @@ kunit_test_suite(xe_bo_test_suite); MODULE_AUTHOR("Intel Corporation"); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("xe_bo kunit test"); MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c index 35312bfd5fb7..9f5a9cda8c0f 100644 --- a/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c +++ b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c @@ -21,4 +21,5 @@ kunit_test_suite(xe_dma_buf_test_suite); MODULE_AUTHOR("Intel Corporation"); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("xe_dma_buf kunit test"); MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); diff --git a/drivers/gpu/drm/xe/tests/xe_migrate_test.c b/drivers/gpu/drm/xe/tests/xe_migrate_test.c index 39179eae890b..cf0c173b945f 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate_test.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate_test.c @@ -21,4 +21,5 @@ kunit_test_suite(xe_migrate_test_suite); MODULE_AUTHOR("Intel Corporation"); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("xe_migrate kunit test"); MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.c b/drivers/gpu/drm/xe/tests/xe_pci_test.c index 9c6f6c2c6c6e..daf652517608 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci_test.c +++ b/drivers/gpu/drm/xe/tests/xe_pci_test.c @@ -70,5 +70,5 @@ kunit_test_suite(xe_pci_test_suite); MODULE_AUTHOR("Intel Corporation"); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("xe_pci kunit test"); MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); - diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c index b2beba0019cd..a1d204133cc1 100644 --- a/drivers/gpu/drm/xe/tests/xe_rtp_test.c +++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c @@ -313,4 +313,5 @@ kunit_test_suite(xe_rtp_test_suite); MODULE_AUTHOR("Intel Corporation"); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("xe_rtp kunit test"); MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); diff --git a/drivers/gpu/drm/xe/tests/xe_wa_test.c b/drivers/gpu/drm/xe/tests/xe_wa_test.c index 6e1127b276ea..01ea974591ea 100644 --- a/drivers/gpu/drm/xe/tests/xe_wa_test.c +++ b/drivers/gpu/drm/xe/tests/xe_wa_test.c @@ -138,4 +138,5 @@ kunit_test_suite(xe_rtp_test_suite); MODULE_AUTHOR("Intel Corporation"); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("xe_wa kunit test"); MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); -- cgit v1.2.3 From a6a4ea6d7d37cea9132e67a4d3321a455a6b0736 Mon Sep 17 00:00:00 2001 From: Ruthuvikas Ravikumar Date: Fri, 17 Nov 2023 03:21:52 +0530 Subject: drm/xe: Add mocs kunit This kunit verifies the hardware values of mocs and l3cc registers with the KMD programmed values. v14: Fix CHECK. v13: Remove ret after forcewake. v11: Add KUNIT_ASSERT_EQ_MSG for Forcewake. v9/v10: Add Forcewake Fail. v8: Remove xe_bo.h and xe_pm.h Remove mocs and l3cc from live_mocs. Pull debug and err msg for mocs/l3cc out of if else block. Add HAS_LNCF_MOCS. v7: correct checkpath v6: Change ssize_t type. Change forcewake domain to XE_FW_GT. Update change of MOCS registers are multicast on Xe_HP and beyond patch. v5: Release forcewake. Remove single statement braces. Fix debug statements. v4: Drop stratch and vaddr. Fix debug statements. Fix indentation. v3: Fix checkpath. v2: Fix checkpath. Cc: Aravind Iddamsetty Cc: Mathew D Roper Reviewed-by: Mathew D Roper Signed-off-by: Ruthuvikas Ravikumar Link: https://lore.kernel.org/r/20231116215152.2248859-1-ruthuvikas.ravikumar@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/Makefile | 1 + drivers/gpu/drm/xe/tests/xe_mocs.c | 130 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/tests/xe_mocs_test.c | 24 ++++++ drivers/gpu/drm/xe/tests/xe_mocs_test.h | 13 ++++ drivers/gpu/drm/xe/xe_mocs.c | 4 + 5 files changed, 172 insertions(+) create mode 100644 drivers/gpu/drm/xe/tests/xe_mocs.c create mode 100644 drivers/gpu/drm/xe/tests/xe_mocs_test.c create mode 100644 drivers/gpu/drm/xe/tests/xe_mocs_test.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/Makefile b/drivers/gpu/drm/xe/tests/Makefile index 51f1a7f017d4..39d8a0892274 100644 --- a/drivers/gpu/drm/xe/tests/Makefile +++ b/drivers/gpu/drm/xe/tests/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_DRM_XE_KUNIT_TEST) += \ xe_bo_test.o \ xe_dma_buf_test.o \ xe_migrate_test.o \ + xe_mocs_test.o \ xe_pci_test.o \ xe_rtp_test.o \ xe_wa_test.o diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c new file mode 100644 index 000000000000..7dd34f94e809 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_mocs.c @@ -0,0 +1,130 @@ +// SPDX-License-Identifier: GPL-2.0 AND MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include +#include + +#include "tests/xe_mocs_test.h" +#include "tests/xe_pci_test.h" +#include "tests/xe_test.h" + +#include "xe_pci.h" +#include "xe_gt.h" +#include "xe_mocs.h" +#include "xe_device.h" + +struct live_mocs { + struct xe_mocs_info table; +}; + +static int live_mocs_init(struct live_mocs *arg, struct xe_gt *gt) +{ + unsigned int flags; + struct kunit *test = xe_cur_kunit(); + + memset(arg, 0, sizeof(*arg)); + + flags = get_mocs_settings(gt_to_xe(gt), &arg->table); + + kunit_info(test, "table size %d", arg->table.size); + kunit_info(test, "table uc_index %d", arg->table.uc_index); + kunit_info(test, "table n_entries %d", arg->table.n_entries); + + return flags; +} + +static void read_l3cc_table(struct xe_gt *gt, + const struct xe_mocs_info *info) +{ + unsigned int i; + u32 l3cc; + u32 reg_val; + u32 ret; + + struct kunit *test = xe_cur_kunit(); + + xe_device_mem_access_get(gt_to_xe(gt)); + ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + KUNIT_ASSERT_EQ_MSG(test, ret, 0, "Forcewake Failed.\n"); + mocs_dbg(>_to_xe(gt)->drm, "L3CC entries:%d\n", info->n_entries); + for (i = 0; + i < (info->n_entries + 1) / 2 ? + (l3cc = l3cc_combine(get_entry_l3cc(info, 2 * i), + get_entry_l3cc(info, 2 * i + 1))), 1 : 0; + i++) { + if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1250) + reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_LNCFCMOCS(i)); + else + reg_val = xe_mmio_read32(gt, XELP_LNCFCMOCS(i)); + mocs_dbg(>_to_xe(gt)->drm, "%d 0x%x 0x%x 0x%x\n", i, + XELP_LNCFCMOCS(i).addr, reg_val, l3cc); + if (reg_val != l3cc) + KUNIT_FAIL(test, "l3cc reg 0x%x has incorrect val.\n", + XELP_LNCFCMOCS(i).addr); + } + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); + xe_device_mem_access_put(gt_to_xe(gt)); +} + +static void read_mocs_table(struct xe_gt *gt, + const struct xe_mocs_info *info) +{ + struct xe_device *xe = gt_to_xe(gt); + + unsigned int i; + u32 mocs; + u32 reg_val; + u32 ret; + + struct kunit *test = xe_cur_kunit(); + + xe_device_mem_access_get(gt_to_xe(gt)); + ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + KUNIT_ASSERT_EQ_MSG(test, ret, 0, "Forcewake Failed.\n"); + mocs_dbg(>_to_xe(gt)->drm, "Global MOCS entries:%d\n", info->n_entries); + drm_WARN_ONCE(&xe->drm, !info->unused_entries_index, + "Unused entries index should have been defined\n"); + for (i = 0; + i < info->n_entries ? (mocs = get_entry_control(info, i)), 1 : 0; + i++) { + if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1250) + reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_GLOBAL_MOCS(i)); + else + reg_val = xe_mmio_read32(gt, XELP_GLOBAL_MOCS(i)); + mocs_dbg(>_to_xe(gt)->drm, "%d 0x%x 0x%x 0x%x\n", i, + XELP_GLOBAL_MOCS(i).addr, reg_val, mocs); + if (reg_val != mocs) + KUNIT_FAIL(test, "mocs reg 0x%x has incorrect val.\n", + XELP_GLOBAL_MOCS(i).addr); + } + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); + xe_device_mem_access_put(gt_to_xe(gt)); +} + +static int mocs_kernel_test_run_device(struct xe_device *xe) +{ + /* Basic check the system is configured with the expected mocs table */ + + struct live_mocs mocs; + struct xe_gt *gt; + + unsigned int flags; + int id; + + for_each_gt(gt, xe, id) { + flags = live_mocs_init(&mocs, gt); + if (flags & HAS_GLOBAL_MOCS) + read_mocs_table(gt, &mocs.table); + if (flags & HAS_LNCF_MOCS) + read_l3cc_table(gt, &mocs.table); + } + return 0; +} + +void xe_live_mocs_kernel_kunit(struct kunit *test) +{ + xe_call_for_each_device(mocs_kernel_test_run_device); +} +EXPORT_SYMBOL_IF_KUNIT(xe_live_mocs_kernel_kunit); diff --git a/drivers/gpu/drm/xe/tests/xe_mocs_test.c b/drivers/gpu/drm/xe/tests/xe_mocs_test.c new file mode 100644 index 000000000000..ef56bd517b28 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_mocs_test.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_mocs_test.h" + +#include + +static struct kunit_case xe_mocs_tests[] = { + KUNIT_CASE(xe_live_mocs_kernel_kunit), + {} +}; + +static struct kunit_suite xe_mocs_test_suite = { + .name = "xe_mocs", + .test_cases = xe_mocs_tests, +}; + +kunit_test_suite(xe_mocs_test_suite); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); diff --git a/drivers/gpu/drm/xe/tests/xe_mocs_test.h b/drivers/gpu/drm/xe/tests/xe_mocs_test.h new file mode 100644 index 000000000000..7faa3575e6c3 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_mocs_test.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 AND MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_MOCS_TEST_H_ +#define _XE_MOCS_TEST_H_ + +struct kunit; + +void xe_live_mocs_kernel_kunit(struct kunit *test); + +#endif diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index cbb1619f17e0..12a6d39fcd4a 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -574,3 +574,7 @@ void xe_mocs_init(struct xe_gt *gt) if (flags & HAS_LNCF_MOCS) init_l3cc_table(gt, &table); } + +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) +#include "tests/xe_mocs.c" +#endif -- cgit v1.2.3 From 5c09bd6ccd418f9dc221fd2544d613e3180b928e Mon Sep 17 00:00:00 2001 From: Gustavo Sousa Date: Thu, 16 Nov 2023 18:39:59 -0300 Subject: drm/xe/mmio: Move xe_mmio_wait32() to xe_mmio.c This function is big enough, let's move it to a shared compilation unit. While at it, document it. Here is the output of running bloat-o-metter on the new and old module (execution provided by Lucas): $ ./scripts/bloat-o-meter build64/drivers/gpu/drm/xe/xe.ko{.old,} add/remove: 2/0 grow/shrink: 0/58 up/down: 554/-15645 (-15091) (...) # Lines in between omitted Total: Before=2181322, After=2166231, chg -0.69% The overall reduction in the size is not that significant. Nevertheless, keeping the function as inline arguably does not bring too much benefit as well. As noted by Lucas, we would probably benefit from an inline function that did the fast-path check: do an optimistic first check before entering the wait-logic, which itself would go to a compilation unit. We might come back to implement this in the future if we have data to justify it. v2: - Add note in documentation for @timeout_us regarding the exponential backoff strategy. (Lucas) - Share output of bloat-o-meter in the commit message. (Lucas) Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20231116214000.70573-2-gustavo.sousa@intel.com Signed-off-by: Gustavo Sousa Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_mmio.c | 54 ++++++++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_mmio.h | 39 ++------------------------------ 2 files changed, 56 insertions(+), 37 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 5b24c592dde5..6c035b293bb6 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -489,3 +489,57 @@ u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg) return (u64)udw << 32 | ldw; } +/** + * xe_mmio_wait32() - Wait for a register to match the desired masked value + * @gt: MMIO target GT + * @reg: register to read value from + * @mask: mask to be applied to the value read from the register + * @val: desired value after applying the mask + * @timeout_us: time out after this period of time. Wait logic tries to be + * smart, applying an exponential backoff until @timeout_us is reached. + * @out_val: if not NULL, points where to store the last unmasked value + * @atomic: needs to be true if calling from an atomic context + * + * This function polls for the desired masked value and returns zero on success + * or -ETIMEDOUT if timed out. + * + * Note that @timeout_us represents the minimum amount of time to wait before + * giving up. The actual time taken by this function can be a little more than + * @timeout_us for different reasons, specially in non-atomic contexts. Thus, + * it is possible that this function succeeds even after @timeout_us has passed. + */ +int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us, + u32 *out_val, bool atomic) +{ + ktime_t cur = ktime_get_raw(); + const ktime_t end = ktime_add_us(cur, timeout_us); + int ret = -ETIMEDOUT; + s64 wait = 10; + u32 read; + + for (;;) { + read = xe_mmio_read32(gt, reg); + if ((read & mask) == val) { + ret = 0; + break; + } + + cur = ktime_get_raw(); + if (!ktime_before(cur, end)) + break; + + if (ktime_after(ktime_add_us(cur, wait), end)) + wait = ktime_us_delta(end, cur); + + if (atomic) + udelay(wait); + else + usleep_range(wait, wait << 1); + wait <<= 1; + } + + if (out_val) + *out_val = read; + + return ret; +} diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h index afc503c97eed..218b796629ad 100644 --- a/drivers/gpu/drm/xe/xe_mmio.h +++ b/drivers/gpu/drm/xe/xe_mmio.h @@ -87,43 +87,6 @@ static inline int xe_mmio_write32_and_verify(struct xe_gt *gt, return (reg_val & mask) != eval ? -EINVAL : 0; } -static inline int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, - u32 val, u32 timeout_us, u32 *out_val, - bool atomic) -{ - ktime_t cur = ktime_get_raw(); - const ktime_t end = ktime_add_us(cur, timeout_us); - int ret = -ETIMEDOUT; - s64 wait = 10; - u32 read; - - for (;;) { - read = xe_mmio_read32(gt, reg); - if ((read & mask) == val) { - ret = 0; - break; - } - - cur = ktime_get_raw(); - if (!ktime_before(cur, end)) - break; - - if (ktime_after(ktime_add_us(cur, wait), end)) - wait = ktime_us_delta(end, cur); - - if (atomic) - udelay(wait); - else - usleep_range(wait, wait << 1); - wait <<= 1; - } - - if (out_val) - *out_val = read; - - return ret; -} - static inline bool xe_mmio_in_range(const struct xe_gt *gt, const struct xe_mmio_range *range, struct xe_reg reg) @@ -136,5 +99,7 @@ static inline bool xe_mmio_in_range(const struct xe_gt *gt, int xe_mmio_probe_vram(struct xe_device *xe); u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg); +int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us, + u32 *out_val, bool atomic); #endif -- cgit v1.2.3 From b3f0654f55859cfcd87d4ea5440247451902924b Mon Sep 17 00:00:00 2001 From: Gustavo Sousa Date: Thu, 16 Nov 2023 18:40:00 -0300 Subject: drm/xe/mmio: Make xe_mmio_wait32() aware of interrupts With the current implementation, a preemption or other kind of interrupt might happen between xe_mmio_read32() and ktime_get_raw(). Such an interruption (specially in the case of preemption) might be long enough to cause a timeout without giving a chance of a new check on the register value on a next iteration, which would have happened otherwise. This issue causes some sporadic timeouts in some code paths. As an example, we were experiencing some rare timeouts when waiting for PLL unlock for C10/C20 PHYs (see intel_cx0pll_disable()). After debugging, we found out that the PLL unlock was happening within the expected time period (20us), which suggested a bug in xe_mmio_wait32(). To fix the issue, ensure that we do a last check out of the loop if necessary. This change was tested with the aforementioned PLL unlocking code path. Experiments showed that, before this change, we observed reported timeouts in 54 of 5000 runs; and, after this change, no timeouts were reported in 5000 runs. v2: - Prefer an implementation without a barrier (v1 switched the order of xe_mmio_read32() and ktime_get_raw() calls and added a barrier() in between). (Lucas, Rodrigo) Cc: Rodrigo Vivi Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20231116214000.70573-3-gustavo.sousa@intel.com Signed-off-by: Gustavo Sousa Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_mmio.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 6c035b293bb6..d0a36600e52b 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -538,6 +538,12 @@ int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 t wait <<= 1; } + if (ret != 0) { + read = xe_mmio_read32(gt, reg); + if ((read & mask) == val) + ret = 0; + } + if (out_val) *out_val = read; -- cgit v1.2.3 From 11ea758c145f8340d5ffd7b3831c2bd0e98f8024 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Mon, 20 Nov 2023 16:41:25 +0530 Subject: drm/xe/xe2: Add workaround 14017421178 This workaround applies to Xe2_LPM Reviewed-by: Matt Roper Signed-off-by: Tejas Upadhyay Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_wa.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 6572715dfc09..13962b8a677a 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -273,6 +273,15 @@ static const struct xe_rtp_entry_sr gt_was[] = { SET(XEHPC_L3CLOS_MASK(3), ~0)) }, + /* Xe2_LPM */ + + { XE_RTP_NAME("14017421178"), + XE_RTP_RULES(MEDIA_VERSION(2000), + ENGINE_CLASS(VIDEO_DECODE)), + XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS)), + XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), + }, + {} }; -- cgit v1.2.3 From f25d8291aca1ccfb0118ec4c0e98f6301bff15ec Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Thu, 23 Nov 2023 16:12:11 +0530 Subject: drm/xe/xe2: Add workaround 16021867713 This workaround applies to Xe2_LPM as well Reviewed-by: Matt Roper Signed-off-by: Tejas Upadhyay Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_wa.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 13962b8a677a..5ab5529d9624 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -281,6 +281,12 @@ static const struct xe_rtp_entry_sr gt_was[] = { XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS)), XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), }, + { XE_RTP_NAME("16021867713"), + XE_RTP_RULES(MEDIA_VERSION(2000), + ENGINE_CLASS(VIDEO_DECODE)), + XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F1C(0), MFXPIPE_CLKGATE_DIS)), + XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), + }, {} }; -- cgit v1.2.3 From 8bfbe174d7fabf4c6d26e90a133b3129c4e98cbe Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Thu, 23 Nov 2023 16:09:00 +0530 Subject: drm/xe/xe2: Add workaround 14019449301 This workaround applies to Xe2_LPM V3(MattR): - Reorder reg and wa placement - Add base parameter to reg macro for better definition V2(MattR): - Change name of register - Loop for all engines - Driver permanent WA, applies to all steps Reviewed-by: Matt Roper Signed-off-by: Tejas Upadhyay Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_engine_regs.h | 3 +++ drivers/gpu/drm/xe/xe_wa.c | 5 +++++ 2 files changed, 8 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index b57dec17eb2d..444ff9b83bb1 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -112,6 +112,9 @@ #define RING_EXECLIST_CONTROL(base) XE_REG((base) + 0x550) #define EL_CTRL_LOAD REG_BIT(0) +#define VDBOX_CGCTL3F08(base) XE_REG((base) + 0x3f08) +#define CG3DDISHRS_CLKGATE_DIS REG_BIT(5) + #define VDBOX_CGCTL3F10(base) XE_REG((base) + 0x3f10) #define IECPUNIT_CLKGATE_DIS REG_BIT(22) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 5ab5529d9624..81ae0232146e 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -287,6 +287,11 @@ static const struct xe_rtp_entry_sr gt_was[] = { XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F1C(0), MFXPIPE_CLKGATE_DIS)), XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), }, + { XE_RTP_NAME("14019449301"), + XE_RTP_RULES(MEDIA_VERSION(2000), ENGINE_CLASS(VIDEO_DECODE)), + XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F08(0), CG3DDISHRS_CLKGATE_DIS)), + XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), + }, {} }; -- cgit v1.2.3 From d6d14854ddf362633fbcf050ce19bd0d7b0d9a3a Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 15 Nov 2023 08:38:02 +0100 Subject: drm/xe: Add device flag to indicate SR-IOV support The Single Root I/O Virtualization (SR-IOV) extension to the PCI Express (PCIe) specification suite is supported starting from 12th generation of Intel Graphics processors. Add a device flag that we will use to enable SR-IOV specific code paths and to indicate our readiness to support SR-IOV. We will enable this flag for the specific platforms once all required changes and additions will be ready and merged. Bspec: 52391 Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20231115073804.1861-1-michal.wajdeczko@intel.com Signed-off-by: Michal Wajdeczko Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.h | 5 +++++ drivers/gpu/drm/xe/xe_device_types.h | 2 ++ drivers/gpu/drm/xe/xe_pci.c | 3 ++- 3 files changed, 9 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index c4232de40ae0..54694f98c91a 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -158,6 +158,11 @@ static inline bool xe_device_has_flat_ccs(struct xe_device *xe) return xe->info.has_flat_ccs; } +static inline bool xe_device_has_sriov(struct xe_device *xe) +{ + return xe->info.has_sriov; +} + u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size); #endif diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index be11cadccbd4..f2ba4f746fa2 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -253,6 +253,8 @@ struct xe_device { u8 has_llc:1; /** @has_range_tlb_invalidation: Has range based TLB invalidations */ u8 has_range_tlb_invalidation:1; + /** @has_sriov: Supports SR-IOV */ + u8 has_sriov:1; /** @enable_display: display enabled */ u8 enable_display:1; /** @bypass_mtcfg: Bypass Multi-Tile configuration from MTCFG register */ diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 097b68598191..0a4b83a9fc0b 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -60,6 +60,7 @@ struct xe_device_desc { u8 has_heci_gscfi:1; u8 has_llc:1; + u8 has_sriov:1; u8 bypass_mtcfg:1; u8 supports_mmio_ext:1; }; @@ -531,7 +532,6 @@ static void handle_gmdid(struct xe_device *xe, } } - static int xe_info_init(struct xe_device *xe, const struct xe_device_desc *desc, const struct xe_subplatform_desc *subplatform_desc) @@ -577,6 +577,7 @@ static int xe_info_init(struct xe_device *xe, xe->info.graphics_name = graphics_desc->name; xe->info.media_name = media_desc ? media_desc->name : "none"; xe->info.has_llc = desc->has_llc; + xe->info.has_sriov = desc->has_sriov; xe->info.bypass_mtcfg = desc->bypass_mtcfg; xe->info.supports_mmio_ext = desc->supports_mmio_ext; xe->info.tile_mmio_ext_size = graphics_desc->tile_mmio_ext_size; -- cgit v1.2.3 From 13e5c32c849ace3dd0af9049fc19ce910591db8b Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 15 Nov 2023 08:38:03 +0100 Subject: drm/xe: Prepare for running in different SR-IOV modes We will be adding support for the SR-IOV and driver might be then running, in addition to existing non-virtualized bare-metal mode, also in Physical Function (PF) or Virtual Function (VF) mode. Since these additional modes require some changes to the driver, define enum flag to represent different SR-IOV modes and add a function where we will detect the actual mode in the runtime. We start with a forced bare-metal mode as it is sufficient to enable basic functionality and ensures no impact to existing code. Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20231115073804.1861-2-michal.wajdeczko@intel.com Signed-off-by: Michal Wajdeczko Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Makefile | 3 ++ drivers/gpu/drm/xe/xe_device_types.h | 7 +++++ drivers/gpu/drm/xe/xe_pci.c | 3 ++ drivers/gpu/drm/xe/xe_sriov.c | 55 ++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_sriov.h | 42 +++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_sriov_types.h | 28 ++++++++++++++++++ 6 files changed, 138 insertions(+) create mode 100644 drivers/gpu/drm/xe/xe_sriov.c create mode 100644 drivers/gpu/drm/xe/xe_sriov.h create mode 100644 drivers/gpu/drm/xe/xe_sriov_types.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index a29b92080c85..05a90fd6c3c9 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -122,6 +122,9 @@ xe-y += xe_bb.o \ # graphics hardware monitoring (HWMON) support xe-$(CONFIG_HWMON) += xe_hwmon.o +# graphics virtualization (SR-IOV) support +xe-y += xe_sriov.o + # i915 Display compat #defines and #includes subdir-ccflags-$(CONFIG_DRM_XE_DISPLAY) += \ -I$(srctree)/$(src)/display/ext \ diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index f2ba4f746fa2..2712905c7a91 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -18,6 +18,7 @@ #include "xe_platform_types.h" #include "xe_pt_types.h" #include "xe_pmu.h" +#include "xe_sriov_types.h" #include "xe_step_types.h" #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY) @@ -299,6 +300,12 @@ struct xe_device { struct ttm_resource_manager sys_mgr; } mem; + /** @sriov: device level virtualization data */ + struct { + /** @sriov.__mode: SR-IOV mode (Don't access directly!) */ + enum xe_sriov_mode __mode; + } sriov; + /** @usm: unified memory state */ struct { /** @asid: convert a ASID to VM */ diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 0a4b83a9fc0b..3d163cb3dd8c 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -24,6 +24,7 @@ #include "xe_module.h" #include "xe_pci_types.h" #include "xe_pm.h" +#include "xe_sriov.h" #include "xe_step.h" enum toggle_d3cold { @@ -705,6 +706,8 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_master(pdev); + xe_sriov_probe_early(xe, desc->has_sriov); + err = xe_info_init(xe, desc, subplatform_desc); if (err) goto err_pci_disable; diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c new file mode 100644 index 000000000000..42a0e0c917a0 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov.c @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include "xe_assert.h" +#include "xe_sriov.h" + +/** + * xe_sriov_mode_to_string - Convert enum value to string. + * @mode: the &xe_sriov_mode to convert + * + * Returns: SR-IOV mode as a user friendly string. + */ +const char *xe_sriov_mode_to_string(enum xe_sriov_mode mode) +{ + switch (mode) { + case XE_SRIOV_MODE_NONE: + return "none"; + case XE_SRIOV_MODE_PF: + return "SR-IOV PF"; + case XE_SRIOV_MODE_VF: + return "SR-IOV VF"; + default: + return ""; + } +} + +/** + * xe_sriov_probe_early - Probe a SR-IOV mode. + * @xe: the &xe_device to probe mode on + * @has_sriov: flag indicating hardware support for SR-IOV + * + * This function should be called only once and as soon as possible during + * driver probe to detect whether we are running a SR-IOV Physical Function + * (PF) or a Virtual Function (VF) device. + * + * SR-IOV PF mode detection is based on PCI @dev_is_pf() function. + * SR-IOV VF mode detection is based on dedicated MMIO register read. + */ +void xe_sriov_probe_early(struct xe_device *xe, bool has_sriov) +{ + enum xe_sriov_mode mode = XE_SRIOV_MODE_NONE; + + /* TODO: replace with proper mode detection */ + xe_assert(xe, !has_sriov); + + xe_assert(xe, !xe->sriov.__mode); + xe->sriov.__mode = mode; + xe_assert(xe, xe->sriov.__mode); + + if (has_sriov) + drm_info(&xe->drm, "Running in %s mode\n", + xe_sriov_mode_to_string(xe_device_sriov_mode(xe))); +} diff --git a/drivers/gpu/drm/xe/xe_sriov.h b/drivers/gpu/drm/xe/xe_sriov.h new file mode 100644 index 000000000000..5af73a3172b0 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_SRIOV_H_ +#define _XE_SRIOV_H_ + +#include "xe_assert.h" +#include "xe_device_types.h" +#include "xe_sriov_types.h" + +const char *xe_sriov_mode_to_string(enum xe_sriov_mode mode); + +void xe_sriov_probe_early(struct xe_device *xe, bool has_sriov); + +static inline enum xe_sriov_mode xe_device_sriov_mode(struct xe_device *xe) +{ + xe_assert(xe, xe->sriov.__mode); + return xe->sriov.__mode; +} + +static inline bool xe_device_is_sriov_pf(struct xe_device *xe) +{ + return xe_device_sriov_mode(xe) == XE_SRIOV_MODE_PF; +} + +static inline bool xe_device_is_sriov_vf(struct xe_device *xe) +{ + return xe_device_sriov_mode(xe) == XE_SRIOV_MODE_VF; +} + +#ifdef CONFIG_PCI_IOV +#define IS_SRIOV_PF(xe) xe_device_is_sriov_pf(xe) +#else +#define IS_SRIOV_PF(xe) (typecheck(struct xe_device *, (xe)) && false) +#endif +#define IS_SRIOV_VF(xe) xe_device_is_sriov_vf(xe) + +#define IS_SRIOV(xe) (IS_SRIOV_PF(xe) || IS_SRIOV_VF(xe)) + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_types.h b/drivers/gpu/drm/xe/xe_sriov_types.h new file mode 100644 index 000000000000..999a4311b98b --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_types.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_SRIOV_TYPES_H_ +#define _XE_SRIOV_TYPES_H_ + +#include + +/** + * enum xe_sriov_mode - SR-IOV mode + * @XE_SRIOV_MODE_NONE: bare-metal mode (non-virtualized) + * @XE_SRIOV_MODE_PF: SR-IOV Physical Function (PF) mode + * @XE_SRIOV_MODE_VF: SR-IOV Virtual Function (VF) mode + */ +enum xe_sriov_mode { + /* + * Note: We don't use default enum value 0 to allow catch any too early + * attempt of checking the SR-IOV mode prior to the actual mode probe. + */ + XE_SRIOV_MODE_NONE = 1, + XE_SRIOV_MODE_PF, + XE_SRIOV_MODE_VF, +}; +static_assert(XE_SRIOV_MODE_NONE); + +#endif -- cgit v1.2.3 From 2475ac27df597679ca0426d358877d6f1483d50f Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 15 Nov 2023 08:38:04 +0100 Subject: drm/xe: Print virtualization mode during probe We already print some basic information about the device, add virtualization information, until we expose that elsewhere. Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20231115073804.1861-3-michal.wajdeczko@intel.com Signed-off-by: Michal Wajdeczko Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pci.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 3d163cb3dd8c..066a223a341a 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -735,6 +735,10 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) xe_step_name(xe->info.step.display), xe_step_name(xe->info.step.basedie)); + drm_dbg(&xe->drm, "SR-IOV support: %s (mode: %s)\n", + str_yes_no(xe_device_has_sriov(xe)), + xe_sriov_mode_to_string(xe_device_sriov_mode(xe))); + err = xe_device_probe(xe); if (err) goto err_pci_disable; -- cgit v1.2.3 From 8c54ee8a8606a453a2c907989372aa6f004b7bec Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Thu, 23 Nov 2023 16:31:55 +0100 Subject: drm/xe: Ensure that we don't access the placements array out-of-bounds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ensure, using xe_assert that the various try_add_ functions don't access the bo placements array out-of-bounds. v2: - Remove the places argument to make sure the xe_assert operates on the array we're actually populating. (Matthew Auld) Suggested-by: Ohad Sharabi Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/946 Signed-off-by: Thomas Hellström Reviewed-by: Ohad Sharabi #v1 Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20231123153158.12779-2-thomas.hellstrom@linux.intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 39 ++++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 17 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 835eab643263..c12430cf9f28 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -121,11 +121,13 @@ static struct xe_mem_region *res_to_mem_region(struct ttm_resource *res) return to_xe_ttm_vram_mgr(mgr)->vram; } -static void try_add_system(struct xe_bo *bo, struct ttm_place *places, +static void try_add_system(struct xe_device *xe, struct xe_bo *bo, u32 bo_flags, u32 *c) { + xe_assert(xe, *c < ARRAY_SIZE(bo->placements)); + if (bo_flags & XE_BO_CREATE_SYSTEM_BIT) { - places[*c] = (struct ttm_place) { + bo->placements[*c] = (struct ttm_place) { .mem_type = XE_PL_TT, }; *c += 1; @@ -170,26 +172,30 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo, } static void try_add_vram(struct xe_device *xe, struct xe_bo *bo, - struct ttm_place *places, u32 bo_flags, u32 *c) + u32 bo_flags, u32 *c) { + xe_assert(xe, *c < ARRAY_SIZE(bo->placements)); + if (bo->props.preferred_gt == XE_GT1) { if (bo_flags & XE_BO_CREATE_VRAM1_BIT) - add_vram(xe, bo, places, bo_flags, XE_PL_VRAM1, c); + add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c); if (bo_flags & XE_BO_CREATE_VRAM0_BIT) - add_vram(xe, bo, places, bo_flags, XE_PL_VRAM0, c); + add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c); } else { if (bo_flags & XE_BO_CREATE_VRAM0_BIT) - add_vram(xe, bo, places, bo_flags, XE_PL_VRAM0, c); + add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c); if (bo_flags & XE_BO_CREATE_VRAM1_BIT) - add_vram(xe, bo, places, bo_flags, XE_PL_VRAM1, c); + add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c); } } static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo, - struct ttm_place *places, u32 bo_flags, u32 *c) + u32 bo_flags, u32 *c) { + xe_assert(xe, *c < ARRAY_SIZE(bo->placements)); + if (bo_flags & XE_BO_CREATE_STOLEN_BIT) { - places[*c] = (struct ttm_place) { + bo->placements[*c] = (struct ttm_place) { .mem_type = XE_PL_STOLEN, .flags = bo_flags & (XE_BO_CREATE_PINNED_BIT | XE_BO_CREATE_GGTT_BIT) ? @@ -202,7 +208,6 @@ static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo, static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo, u32 bo_flags) { - struct ttm_place *places = bo->placements; u32 c = 0; bo->props.preferred_mem_type = XE_BO_PROPS_INVALID; @@ -210,22 +215,22 @@ static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo, /* The order of placements should indicate preferred location */ if (bo->props.preferred_mem_class == DRM_XE_MEM_REGION_CLASS_SYSMEM) { - try_add_system(bo, places, bo_flags, &c); - try_add_vram(xe, bo, places, bo_flags, &c); + try_add_system(xe, bo, bo_flags, &c); + try_add_vram(xe, bo, bo_flags, &c); } else { - try_add_vram(xe, bo, places, bo_flags, &c); - try_add_system(bo, places, bo_flags, &c); + try_add_vram(xe, bo, bo_flags, &c); + try_add_system(xe, bo, bo_flags, &c); } - try_add_stolen(xe, bo, places, bo_flags, &c); + try_add_stolen(xe, bo, bo_flags, &c); if (!c) return -EINVAL; bo->placement = (struct ttm_placement) { .num_placement = c, - .placement = places, + .placement = bo->placements, .num_busy_placement = c, - .busy_placement = places, + .busy_placement = bo->placements, }; return 0; -- cgit v1.2.3 From a21fe5ee598109793546b67a32398076ddea2660 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Wed, 22 Nov 2023 12:03:57 +0100 Subject: drm/xe/bo: Rename xe_bo_get_sg() to xe_bo_sg() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using "get" typically refers to obtaining a refcount, which we don't do here so rename to xe_bo_sg(). Suggested-by: Ohad Sharabi Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/946 Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Reviewed-by: Ohad Sharabi Link: https://patchwork.freedesktop.org/patch/msgid/20231122110359.4087-3-thomas.hellstrom@linux.intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_migrate.c | 2 +- drivers/gpu/drm/xe/xe_bo.c | 4 ++-- drivers/gpu/drm/xe/xe_bo.h | 2 +- drivers/gpu/drm/xe/xe_migrate.c | 8 ++++---- drivers/gpu/drm/xe/xe_pt.c | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index 7aad09140d7e..83d6a66ed369 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -328,7 +328,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) if (xe_bo_is_vram(pt)) xe_res_first(pt->ttm.resource, 0, pt->size, &src_it); else - xe_res_first_sg(xe_bo_get_sg(pt), 0, pt->size, &src_it); + xe_res_first_sg(xe_bo_sg(pt), 0, pt->size, &src_it); emit_pte(m, bb, NUM_KERNEL_PDE - 1, xe_bo_is_vram(pt), &src_it, XE_PAGE_SIZE, pt); diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index c12430cf9f28..4c827fd4e382 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -317,7 +317,7 @@ static int xe_tt_map_sg(struct ttm_tt *tt) return 0; } -struct sg_table *xe_bo_get_sg(struct xe_bo *bo) +struct sg_table *xe_bo_sg(struct xe_bo *bo) { struct ttm_tt *tt = bo->ttm.ttm; struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm); @@ -1735,7 +1735,7 @@ dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size) if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) { xe_assert(xe, bo->ttm.ttm); - xe_res_first_sg(xe_bo_get_sg(bo), page << PAGE_SHIFT, + xe_res_first_sg(xe_bo_sg(bo), page << PAGE_SHIFT, page_size, &cur); return xe_res_dma(&cur) + offset; } else { diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 9d3b704a1030..f8bae873418d 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -299,7 +299,7 @@ xe_bo_put_deferred(struct xe_bo *bo, struct llist_head *deferred) void xe_bo_put_commit(struct llist_head *deferred); -struct sg_table *xe_bo_get_sg(struct xe_bo *bo); +struct sg_table *xe_bo_sg(struct xe_bo *bo); /* * xe_sg_segment_size() - Provides upper limit for sg segment size. diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 53b5b36aca66..4aea748c984b 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -682,16 +682,16 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, return ERR_PTR(-EINVAL); if (!src_is_vram) - xe_res_first_sg(xe_bo_get_sg(src_bo), 0, size, &src_it); + xe_res_first_sg(xe_bo_sg(src_bo), 0, size, &src_it); else xe_res_first(src, 0, size, &src_it); if (!dst_is_vram) - xe_res_first_sg(xe_bo_get_sg(dst_bo), 0, size, &dst_it); + xe_res_first_sg(xe_bo_sg(dst_bo), 0, size, &dst_it); else xe_res_first(dst, 0, size, &dst_it); if (copy_system_ccs) - xe_res_first_sg(xe_bo_get_sg(src_bo), xe_bo_ccs_pages_start(src_bo), + xe_res_first_sg(xe_bo_sg(src_bo), xe_bo_ccs_pages_start(src_bo), PAGE_ALIGN(xe_device_ccs_bytes(xe, size)), &ccs_it); @@ -941,7 +941,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, int pass = 0; if (!clear_vram) - xe_res_first_sg(xe_bo_get_sg(bo), 0, bo->size, &src_it); + xe_res_first_sg(xe_bo_sg(bo), 0, bo->size, &src_it); else xe_res_first(src, 0, bo->size, &src_it); diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 55c37f11ddb4..1fd461fb426e 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -677,7 +677,7 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, xe_res_first(bo->ttm.resource, xe_vma_bo_offset(vma), xe_vma_size(vma), &curs); else - xe_res_first_sg(xe_bo_get_sg(bo), xe_vma_bo_offset(vma), + xe_res_first_sg(xe_bo_sg(bo), xe_vma_bo_offset(vma), xe_vma_size(vma), &curs); } else { curs.size = xe_vma_size(vma); -- cgit v1.2.3 From e7c9e049e0ad256214d8c50454e7289174ffa33b Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Wed, 22 Nov 2023 12:03:58 +0100 Subject: drm/xe/bo: Remove leftover trace_printk() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit trace_printk() is not intended for production code. Remove it. Suggested-by: Ohad Sharabi Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/946 Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Reviewed-by: Ohad Sharabi Link: https://patchwork.freedesktop.org/patch/msgid/20231122110359.4087-4-thomas.hellstrom@linux.intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 4c827fd4e382..e19337390812 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -788,7 +788,6 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, } xe_device_mem_access_put(xe); - trace_printk("new_mem->mem_type=%d\n", new_mem->mem_type); out: return ret; -- cgit v1.2.3 From d2f51c50b941f89850c9a9561486938b71c0b9f8 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Fri, 24 Nov 2023 16:33:45 +0100 Subject: drm/xe/vm: Fix ASID XA usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit xa_alloc_cyclic() returns 1 on successful allocation, if wrapping occurs, but the code incorrectly treats that as an error. Fix that. Also, xa_alloc_cyclic() requires xa_init_flags(..., XA_FLAGS_ALLOC), so fix that, and assuming we don't want a zero ASID, instead of using XA_FLAGS_ALLOC1, adjust the xa limits at alloc_cyclic time. v2: - On CONFIG_DRM_XE_DEBUG, Initialize the cyclic ASID allocation in such a way that the next allocated ASID will be the maximum one, and the one following will cause an ASID wrap, (all to have CI test high ASIDs and ASID wraps). v3: - Stricter return value checking from xa_alloc_cyclic() (Matthew Auld) Suggested-by: Ohad Sharabi Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/946 Signed-off-by: Thomas Hellström Reviewed-by: Ohad Sharabi #v1 Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20231124153345.97385-5-thomas.hellstrom@linux.intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 15 ++++++++++++++- drivers/gpu/drm/xe/xe_vm.c | 5 +++-- 2 files changed, 17 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 8be765adf702..d60379d844d2 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -230,7 +230,20 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, init_waitqueue_head(&xe->ufence_wq); drmm_mutex_init(&xe->drm, &xe->usm.lock); - xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC1); + xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC); + + if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) { + /* Trigger a large asid and an early asid wrap. */ + u32 asid; + + BUILD_BUG_ON(XE_MAX_ASID < 2); + err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, NULL, + XA_LIMIT(XE_MAX_ASID - 2, XE_MAX_ASID - 1), + &xe->usm.next_asid, GFP_KERNEL); + drm_WARN_ON(&xe->drm, err); + if (err >= 0) + xa_erase(&xe->usm.asid_to_vm, asid); + } drmm_mutex_init(&xe->drm, &xe->persistent_engines.lock); INIT_LIST_HEAD(&xe->persistent_engines.list); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 1b4d340d0604..7c0ae4373180 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1996,13 +1996,14 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, if (xe->info.has_asid) { mutex_lock(&xe->usm.lock); err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, - XA_LIMIT(0, XE_MAX_ASID - 1), + XA_LIMIT(1, XE_MAX_ASID - 1), &xe->usm.next_asid, GFP_KERNEL); mutex_unlock(&xe->usm.lock); - if (err) { + if (err < 0) { xe_vm_close_and_put(vm); return err; } + err = 0; vm->usm.asid = asid; } -- cgit v1.2.3 From fdb6a05383fab3952c9a56ac716e460134990a69 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Mon, 27 Nov 2023 13:33:49 +0100 Subject: drm/xe: Internally change the compute_mode and no_dma_fence mode naming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The name "compute_mode" can be confusing since compute uses either this mode or fault_mode to achieve the long-running semantics, and compute_mode can, moving forward, enable fault_mode under the hood to work around hardware limitations. Also the name no_dma_fence_mode really refers to what we elsewhere call long-running mode and the mode contrary to what its name suggests allows dma-fences as in-fences. So in an attempt to be more consistent, rename no_dma_fence_mode -> lr_mode compute_mode -> preempt_fence_mode And adjust flags so that preempt_fence_mode sets XE_VM_FLAG_LR_MODE fault_mode sets XE_VM_FLAG_LR_MODE | XE_VM_FLAG_FAULT_MODE v2: - Fix a typo in the commit message (Oak Zeng) Signed-off-by: Thomas Hellström Reviewed-by: Oak Zeng Link: https://patchwork.freedesktop.org/patch/msgid/20231127123349.23698-1-thomas.hellstrom@linux.intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_exec.c | 16 ++++++++-------- drivers/gpu/drm/xe/xe_exec_queue.c | 8 ++++---- drivers/gpu/drm/xe/xe_guc_submit.c | 2 +- drivers/gpu/drm/xe/xe_pt.c | 6 +++--- drivers/gpu/drm/xe/xe_sched_job.c | 2 +- drivers/gpu/drm/xe/xe_sync.c | 6 +++--- drivers/gpu/drm/xe/xe_vm.c | 20 ++++++++++---------- drivers/gpu/drm/xe/xe_vm.h | 16 ++++++++-------- drivers/gpu/drm/xe/xe_vm_types.h | 2 +- 9 files changed, 39 insertions(+), 39 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 85a8a793f527..5ec37df33afe 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -100,7 +100,7 @@ static int xe_exec_begin(struct drm_exec *exec, struct xe_vm *vm) LIST_HEAD(dups); int err = 0; - if (xe_vm_no_dma_fences(vm)) + if (xe_vm_in_lr_mode(vm)) return 0; /* @@ -182,7 +182,7 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) for (i = 0; i < args->num_syncs; i++) { err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs++], &syncs_user[i], true, - xe_vm_no_dma_fences(vm)); + xe_vm_in_lr_mode(vm)); if (err) goto err_syncs; } @@ -197,7 +197,7 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) } retry: - if (!xe_vm_no_dma_fences(vm) && xe_vm_userptr_check_repin(vm)) { + if (!xe_vm_in_lr_mode(vm) && xe_vm_userptr_check_repin(vm)) { err = down_write_killable(&vm->lock); write_locked = true; } else { @@ -279,7 +279,7 @@ retry: } /* Wait behind munmap style rebinds */ - if (!xe_vm_no_dma_fences(vm)) { + if (!xe_vm_in_lr_mode(vm)) { err = drm_sched_job_add_resv_dependencies(&job->drm, xe_vm_resv(vm), DMA_RESV_USAGE_KERNEL); @@ -292,7 +292,7 @@ retry: if (err) goto err_put_job; - if (!xe_vm_no_dma_fences(vm)) { + if (!xe_vm_in_lr_mode(vm)) { err = down_read_interruptible(&vm->userptr.notifier_lock); if (err) goto err_put_job; @@ -307,7 +307,7 @@ retry: * the job and let the DRM scheduler / backend clean up the job. */ xe_sched_job_arm(job); - if (!xe_vm_no_dma_fences(vm)) { + if (!xe_vm_in_lr_mode(vm)) { /* Block userptr invalidations / BO eviction */ dma_resv_add_fence(xe_vm_resv(vm), &job->drm.s_fence->finished, @@ -330,14 +330,14 @@ retry: xe_sched_job_push(job); xe_vm_reactivate_rebind(vm); - if (!err && !xe_vm_no_dma_fences(vm)) { + if (!err && !xe_vm_in_lr_mode(vm)) { spin_lock(&xe->ttm.lru_lock); ttm_lru_bulk_move_tail(&vm->lru_bulk_move); spin_unlock(&xe->ttm.lru_lock); } err_repin: - if (!xe_vm_no_dma_fences(vm)) + if (!xe_vm_in_lr_mode(vm)) up_read(&vm->userptr.notifier_lock); err_put_job: if (err) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index fbb4d3cca9f6..98fc13c89a4d 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -327,7 +327,7 @@ static int exec_queue_set_persistence(struct xe_device *xe, struct xe_exec_queue if (XE_IOCTL_DBG(xe, !create)) return -EINVAL; - if (XE_IOCTL_DBG(xe, xe_vm_in_compute_mode(q->vm))) + if (XE_IOCTL_DBG(xe, xe_vm_in_preempt_fence_mode(q->vm))) return -EINVAL; if (value) @@ -705,14 +705,14 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, q = xe_exec_queue_create(xe, vm, logical_mask, args->width, hwe, - xe_vm_no_dma_fences(vm) ? 0 : + xe_vm_in_lr_mode(vm) ? 0 : EXEC_QUEUE_FLAG_PERSISTENT); up_read(&vm->lock); xe_vm_put(vm); if (IS_ERR(q)) return PTR_ERR(q); - if (xe_vm_in_compute_mode(vm)) { + if (xe_vm_in_preempt_fence_mode(vm)) { q->compute.context = dma_fence_context_alloc(1); spin_lock_init(&q->compute.lock); @@ -785,7 +785,7 @@ int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data, */ bool xe_exec_queue_is_lr(struct xe_exec_queue *q) { - return q->vm && xe_vm_no_dma_fences(q->vm) && + return q->vm && xe_vm_in_lr_mode(q->vm) && !(q->flags & EXEC_QUEUE_FLAG_VM); } diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index b13c925c56ee..32c234d753fd 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1211,7 +1211,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) ge->q = q; init_waitqueue_head(&ge->suspend_wait); - timeout = (q->vm && xe_vm_no_dma_fences(q->vm)) ? MAX_SCHEDULE_TIMEOUT : + timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT : q->hwe->eclass->sched_props.job_timeout_ms; err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, get_submit_wq(guc), diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 1fd461fb426e..c6c9b723db5a 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1292,8 +1292,8 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue * non-faulting LR, in particular on user-space batch buffer chaining, * it needs to be done here. */ - if ((rebind && !xe_vm_no_dma_fences(vm) && !vm->batch_invalidate_tlb) || - (!rebind && vm->scratch_bo[tile->id] && xe_vm_in_compute_mode(vm))) { + if ((rebind && !xe_vm_in_lr_mode(vm) && !vm->batch_invalidate_tlb) || + (!rebind && vm->scratch_bo[tile->id] && xe_vm_in_preempt_fence_mode(vm))) { ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); if (!ifence) return ERR_PTR(-ENOMEM); @@ -1355,7 +1355,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue xe_bo_put_commit(&deferred); } if (!rebind && last_munmap_rebind && - xe_vm_in_compute_mode(vm)) + xe_vm_in_preempt_fence_mode(vm)) xe_vm_queue_rebind_worker(vm); } else { kfree(rfence); diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c index 84c700aed8ac..a9c7ae815bec 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.c +++ b/drivers/gpu/drm/xe/xe_sched_job.c @@ -92,7 +92,7 @@ struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q, /* Migration and kernel engines have their own locking */ if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) { lockdep_assert_held(&q->vm->lock); - if (!xe_vm_no_dma_fences(q->vm)) + if (!xe_vm_in_lr_mode(q->vm)) xe_vm_assert_held(q->vm); } diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index eafe53c2f55d..ea96ba4b41da 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -100,7 +100,7 @@ static void user_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, struct xe_sync_entry *sync, struct drm_xe_sync __user *sync_user, - bool exec, bool no_dma_fences) + bool exec, bool in_lr_mode) { struct drm_xe_sync sync_in; int err; @@ -118,7 +118,7 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, signal = sync_in.flags & DRM_XE_SYNC_FLAG_SIGNAL; switch (sync_in.flags & SYNC_FLAGS_TYPE_MASK) { case DRM_XE_SYNC_FLAG_SYNCOBJ: - if (XE_IOCTL_DBG(xe, no_dma_fences && signal)) + if (XE_IOCTL_DBG(xe, in_lr_mode && signal)) return -EOPNOTSUPP; if (XE_IOCTL_DBG(xe, upper_32_bits(sync_in.addr))) @@ -136,7 +136,7 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, break; case DRM_XE_SYNC_FLAG_TIMELINE_SYNCOBJ: - if (XE_IOCTL_DBG(xe, no_dma_fences && signal)) + if (XE_IOCTL_DBG(xe, in_lr_mode && signal)) return -EOPNOTSUPP; if (XE_IOCTL_DBG(xe, upper_32_bits(sync_in.addr))) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 7c0ae4373180..c33ae4db4e02 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -340,7 +340,7 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) int err; bool wait; - xe_assert(vm->xe, xe_vm_in_compute_mode(vm)); + xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); down_write(&vm->lock); drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT); @@ -394,7 +394,7 @@ out_unlock: */ void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) { - if (!xe_vm_in_compute_mode(vm)) + if (!xe_vm_in_preempt_fence_mode(vm)) return; down_write(&vm->lock); @@ -596,7 +596,7 @@ static void preempt_rebind_work_func(struct work_struct *w) long wait; int __maybe_unused tries = 0; - xe_assert(vm->xe, xe_vm_in_compute_mode(vm)); + xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); trace_xe_vm_rebind_worker_enter(vm); down_write(&vm->lock); @@ -840,7 +840,7 @@ struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) struct xe_vma *vma, *next; lockdep_assert_held(&vm->lock); - if (xe_vm_no_dma_fences(vm) && !rebind_worker) + if (xe_vm_in_lr_mode(vm) && !rebind_worker) return NULL; xe_vm_assert_held(vm); @@ -1436,9 +1436,9 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) vm->batch_invalidate_tlb = true; } - if (flags & XE_VM_FLAG_COMPUTE_MODE) { + if (flags & XE_VM_FLAG_LR_MODE) { INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); - vm->flags |= XE_VM_FLAG_COMPUTE_MODE; + vm->flags |= XE_VM_FLAG_LR_MODE; vm->batch_invalidate_tlb = false; } @@ -1526,7 +1526,7 @@ void xe_vm_close_and_put(struct xe_vm *vm) xe_assert(xe, !vm->preempt.num_exec_queues); xe_vm_close(vm); - if (xe_vm_in_compute_mode(vm)) + if (xe_vm_in_preempt_fence_mode(vm)) flush_work(&vm->preempt.rebind_work); down_write(&vm->lock); @@ -1975,11 +1975,11 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE) flags |= XE_VM_FLAG_SCRATCH_PAGE; if (args->flags & DRM_XE_VM_CREATE_FLAG_COMPUTE_MODE) - flags |= XE_VM_FLAG_COMPUTE_MODE; + flags |= XE_VM_FLAG_LR_MODE; if (args->flags & DRM_XE_VM_CREATE_FLAG_ASYNC_DEFAULT) flags |= XE_VM_FLAG_ASYNC_DEFAULT; if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) - flags |= XE_VM_FLAG_FAULT_MODE; + flags |= XE_VM_FLAG_LR_MODE | XE_VM_FLAG_FAULT_MODE; vm = xe_vm_create(xe, flags); if (IS_ERR(vm)) @@ -3066,7 +3066,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], &syncs_user[num_syncs], false, - xe_vm_no_dma_fences(vm)); + xe_vm_in_lr_mode(vm)); if (err) goto free_syncs; } diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 45b70ba86553..12bb5d79487f 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -149,19 +149,19 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, void xe_vm_close_and_put(struct xe_vm *vm); -static inline bool xe_vm_in_compute_mode(struct xe_vm *vm) +static inline bool xe_vm_in_fault_mode(struct xe_vm *vm) { - return vm->flags & XE_VM_FLAG_COMPUTE_MODE; + return vm->flags & XE_VM_FLAG_FAULT_MODE; } -static inline bool xe_vm_in_fault_mode(struct xe_vm *vm) +static inline bool xe_vm_in_lr_mode(struct xe_vm *vm) { - return vm->flags & XE_VM_FLAG_FAULT_MODE; + return vm->flags & XE_VM_FLAG_LR_MODE; } -static inline bool xe_vm_no_dma_fences(struct xe_vm *vm) +static inline bool xe_vm_in_preempt_fence_mode(struct xe_vm *vm) { - return xe_vm_in_compute_mode(vm) || xe_vm_in_fault_mode(vm); + return xe_vm_in_lr_mode(vm) && !xe_vm_in_fault_mode(vm); } int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q); @@ -181,7 +181,7 @@ extern struct ttm_device_funcs xe_ttm_funcs; static inline void xe_vm_queue_rebind_worker(struct xe_vm *vm) { - xe_assert(vm->xe, xe_vm_in_compute_mode(vm)); + xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work); } @@ -196,7 +196,7 @@ static inline void xe_vm_queue_rebind_worker(struct xe_vm *vm) */ static inline void xe_vm_reactivate_rebind(struct xe_vm *vm) { - if (xe_vm_in_compute_mode(vm) && vm->preempt.rebind_deactivated) { + if (xe_vm_in_preempt_fence_mode(vm) && vm->preempt.rebind_deactivated) { vm->preempt.rebind_deactivated = false; xe_vm_queue_rebind_worker(vm); } diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 97d779d8a7d3..fc2645e07578 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -154,7 +154,7 @@ struct xe_vm { * from XE_VM_FLAG_BANNED which requires vm->lock to set / read safely */ #define XE_VM_FLAG_64K BIT(0) -#define XE_VM_FLAG_COMPUTE_MODE BIT(1) +#define XE_VM_FLAG_LR_MODE BIT(1) #define XE_VM_FLAG_ASYNC_DEFAULT BIT(2) #define XE_VM_FLAG_MIGRATION BIT(3) #define XE_VM_FLAG_SCRATCH_PAGE BIT(4) -- cgit v1.2.3 From f91bacce8dbb5dcb395e1ab9750977fa70ad485e Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 27 Nov 2023 11:03:33 -0800 Subject: drm/xe/dg2: Drop Wa_22014600077 The workaround database has been updated to drop this workaround for all DG2 variants. Reviewed-by: Gustavo Sousa Link: https://lore.kernel.org/r/20231127190332.4099519-2-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 1 - drivers/gpu/drm/xe/xe_wa.c | 21 --------------------- 2 files changed, 22 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index cc27fe8fc363..686930aba77e 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -340,7 +340,6 @@ #define DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA REG_BIT(15) #define CACHE_MODE_SS XE_REG_MCR(0xe420, XE_REG_OPTION_MASKED) -#define ENABLE_EU_COUNT_FOR_TDL_FLUSH REG_BIT(10) #define DISABLE_ECC REG_BIT(5) #define ENABLE_PREFETCH_INTO_IC REG_BIT(3) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 81ae0232146e..e0853ab30c00 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -522,27 +522,6 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA)) }, - { XE_RTP_NAME("22014600077"), - XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(B0, FOREVER), - ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(CACHE_MODE_SS, - ENABLE_EU_COUNT_FOR_TDL_FLUSH, - /* - * Wa_14012342262 write-only reg, so skip - * verification - */ - .read_mask = 0)) - }, - { XE_RTP_NAME("22014600077"), - XE_RTP_RULES(SUBPLATFORM(DG2, G10), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(CACHE_MODE_SS, - ENABLE_EU_COUNT_FOR_TDL_FLUSH, - /* - * Wa_14012342262 write-only reg, so skip - * verification - */ - .read_mask = 0)) - }, { XE_RTP_NAME("14015150844"), XE_RTP_RULES(PLATFORM(DG2), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(XEHP_HDC_CHICKEN0, DIS_ATOMIC_CHAINING_TYPED_WRITES, -- cgit v1.2.3 From a409901f516cf5e25180d98a510708013b33b8ee Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Mon, 20 Nov 2023 10:51:45 +0530 Subject: drm/xe/xe2: Add workaround 14020013138 This workaround applies to Xe2_LPG A0 V3: - Apply rule RENDER class V2(Matt): - Apply WA in lrc context Reviewed-by: Matt Roper Signed-off-by: Tejas Upadhyay Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 3 +++ drivers/gpu/drm/xe/xe_wa.c | 5 +++++ 2 files changed, 8 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 686930aba77e..18b13224480d 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -93,6 +93,9 @@ #define XEHP_TILE_ADDR_RANGE(_idx) XE_REG_MCR(0x4900 + (_idx) * 4) #define XEHP_FLAT_CCS_BASE_ADDR XE_REG_MCR(0x4910) +#define WM_CHICKEN3 XE_REG_MCR(0x5588, XE_REG_OPTION_MASKED) +#define HIZ_PLANE_COMPRESSION_DIS REG_BIT(10) + #define CHICKEN_RASTER_2 XE_REG_MCR(0x6208, XE_REG_OPTION_MASKED) #define TBIMR_FAST_CLIP REG_BIT(5) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index e0853ab30c00..63bd4bb1af03 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -714,6 +714,11 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT)) }, + { XE_RTP_NAME("14020013138"), + XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0), + ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(WM_CHICKEN3, HIZ_PLANE_COMPRESSION_DIS)) + }, {} }; -- cgit v1.2.3 From adce1b393f90c349820cb0cb907f94ce9b3a4485 Mon Sep 17 00:00:00 2001 From: Bommithi Sakeena Date: Fri, 17 Nov 2023 16:06:18 +0000 Subject: drm/xe: Encapsulate all the module parameters Encapsulate all the module parameters in one single global struct variable. This also removes the extra xe_module.h from includes. v2: naming consistency as suggested by Jani and Lucas v3: fix checkpatch errors/warnings v4: adding blank line after struct declaration Cc: Jani Nikula Cc: Lucas De Marchi Signed-off-by: Bommithi Sakeena Reviewed-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 2 +- drivers/gpu/drm/xe/xe_display.c | 4 ++-- drivers/gpu/drm/xe/xe_guc_log.c | 2 +- drivers/gpu/drm/xe/xe_mmio.c | 2 +- drivers/gpu/drm/xe/xe_module.c | 29 ++++++++++++++--------------- drivers/gpu/drm/xe/xe_module.h | 24 +++++++++++++++++------- drivers/gpu/drm/xe/xe_pci.c | 6 +++--- drivers/gpu/drm/xe/xe_uc_fw.c | 6 +++--- 8 files changed, 42 insertions(+), 33 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index d60379d844d2..54202623e255 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -223,7 +223,7 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, xe->info.devid = pdev->device; xe->info.revid = pdev->revision; - xe->info.force_execlist = force_execlist; + xe->info.force_execlist = xe_modparam.force_execlist; spin_lock_init(&xe->irq.lock); diff --git a/drivers/gpu/drm/xe/xe_display.c b/drivers/gpu/drm/xe/xe_display.c index da10f16e1c12..74391d9b11ae 100644 --- a/drivers/gpu/drm/xe/xe_display.c +++ b/drivers/gpu/drm/xe/xe_display.c @@ -45,7 +45,7 @@ static bool has_display(struct xe_device *xe) */ bool xe_display_driver_probe_defer(struct pci_dev *pdev) { - if (!enable_display) + if (!xe_modparam.enable_display) return 0; return intel_display_driver_probe_defer(pdev); @@ -69,7 +69,7 @@ static void xe_display_last_close(struct drm_device *dev) */ void xe_display_driver_set_hooks(struct drm_driver *driver) { - if (!enable_display) + if (!xe_modparam.enable_display) return; driver->driver_features |= DRIVER_MODESET | DRIVER_ATOMIC; diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c index 45c60a9c631c..27c3827bfd05 100644 --- a/drivers/gpu/drm/xe/xe_guc_log.c +++ b/drivers/gpu/drm/xe/xe_guc_log.c @@ -100,7 +100,7 @@ int xe_guc_log_init(struct xe_guc_log *log) xe_map_memset(xe, &bo->vmap, 0, 0, guc_log_size()); log->bo = bo; - log->level = xe_guc_log_level; + log->level = xe_modparam.guc_log_level; err = drmm_add_action_or_reset(&xe->drm, guc_log_fini, log); if (err) diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index d0a36600e52b..0f846272e39c 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -73,7 +73,7 @@ _resize_bar(struct xe_device *xe, int resno, resource_size_t size) */ static void xe_resize_vram_bar(struct xe_device *xe) { - u64 force_vram_bar_size = xe_force_vram_bar_size; + u64 force_vram_bar_size = xe_modparam.force_vram_bar_size; struct pci_dev *pdev = to_pci_dev(xe->drm.dev); struct pci_bus *root = pdev->bus; resource_size_t current_size; diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index 7194595e7f31..1ea883f48c63 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -10,39 +10,38 @@ #include "xe_drv.h" #include "xe_hw_fence.h" -#include "xe_module.h" #include "xe_pci.h" #include "xe_pmu.h" #include "xe_sched_job.h" -bool force_execlist = false; -module_param_named_unsafe(force_execlist, force_execlist, bool, 0444); +struct xe_modparam xe_modparam = { + .enable_display = true, + .guc_log_level = 5, + .force_probe = CONFIG_DRM_XE_FORCE_PROBE, + /* the rest are 0 by default */ +}; + +module_param_named_unsafe(force_execlist, xe_modparam.force_execlist, bool, 0444); MODULE_PARM_DESC(force_execlist, "Force Execlist submission"); -bool enable_display = true; -module_param_named(enable_display, enable_display, bool, 0444); +module_param_named(enable_display, xe_modparam.enable_display, bool, 0444); MODULE_PARM_DESC(enable_display, "Enable display"); -u32 xe_force_vram_bar_size; -module_param_named(vram_bar_size, xe_force_vram_bar_size, uint, 0600); +module_param_named(vram_bar_size, xe_modparam.force_vram_bar_size, uint, 0600); MODULE_PARM_DESC(vram_bar_size, "Set the vram bar size(in MiB)"); -int xe_guc_log_level = 5; -module_param_named(guc_log_level, xe_guc_log_level, int, 0600); +module_param_named(guc_log_level, xe_modparam.guc_log_level, int, 0600); MODULE_PARM_DESC(guc_log_level, "GuC firmware logging level (0=disable, 1..5=enable with verbosity min..max)"); -char *xe_guc_firmware_path; -module_param_named_unsafe(guc_firmware_path, xe_guc_firmware_path, charp, 0400); +module_param_named_unsafe(guc_firmware_path, xe_modparam.guc_firmware_path, charp, 0400); MODULE_PARM_DESC(guc_firmware_path, "GuC firmware path to use instead of the default one"); -char *xe_huc_firmware_path; -module_param_named_unsafe(huc_firmware_path, xe_huc_firmware_path, charp, 0400); +module_param_named_unsafe(huc_firmware_path, xe_modparam.huc_firmware_path, charp, 0400); MODULE_PARM_DESC(huc_firmware_path, "HuC firmware path to use instead of the default one - empty string disables"); -char *xe_param_force_probe = CONFIG_DRM_XE_FORCE_PROBE; -module_param_named_unsafe(force_probe, xe_param_force_probe, charp, 0400); +module_param_named_unsafe(force_probe, xe_modparam.force_probe, charp, 0400); MODULE_PARM_DESC(force_probe, "Force probe options for specified devices. See CONFIG_DRM_XE_FORCE_PROBE for details."); diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h index e1da1e9ca5cb..51d75ff12376 100644 --- a/drivers/gpu/drm/xe/xe_module.h +++ b/drivers/gpu/drm/xe/xe_module.h @@ -3,13 +3,23 @@ * Copyright © 2023 Intel Corporation */ +#ifndef _XE_MODULE_H_ +#define _XE_MODULE_H_ + #include /* Module modprobe variables */ -extern bool force_execlist; -extern bool enable_display; -extern u32 xe_force_vram_bar_size; -extern int xe_guc_log_level; -extern char *xe_guc_firmware_path; -extern char *xe_huc_firmware_path; -extern char *xe_param_force_probe; +struct xe_modparam { + bool force_execlist; + bool enable_display; + u32 force_vram_bar_size; + int guc_log_level; + char *guc_firmware_path; + char *huc_firmware_path; + char *force_probe; +}; + +extern struct xe_modparam xe_modparam; + +#endif + diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 066a223a341a..6d062478c1f2 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -419,12 +419,12 @@ static bool device_id_in_list(u16 device_id, const char *devices, bool negative) static bool id_forced(u16 device_id) { - return device_id_in_list(device_id, xe_param_force_probe, false); + return device_id_in_list(device_id, xe_modparam.force_probe, false); } static bool id_blocked(u16 device_id) { - return device_id_in_list(device_id, xe_param_force_probe, true); + return device_id_in_list(device_id, xe_modparam.force_probe, true); } static const struct xe_subplatform_desc * @@ -593,7 +593,7 @@ static int xe_info_init(struct xe_device *xe, xe->info.has_range_tlb_invalidation = graphics_desc->has_range_tlb_invalidation; xe->info.enable_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) && - enable_display && + xe_modparam.enable_display && desc->has_display; /* * All platforms have at least one primary GT. Any platform with media diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 91d4a2272ee7..2a5f361e8270 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -220,11 +220,11 @@ uc_fw_override(struct xe_uc_fw *uc_fw) /* empty string disables, but it's not allowed for GuC */ switch (uc_fw->type) { case XE_UC_FW_TYPE_GUC: - if (xe_guc_firmware_path && *xe_guc_firmware_path) - path_override = xe_guc_firmware_path; + if (xe_modparam.guc_firmware_path && *xe_modparam.guc_firmware_path) + path_override = xe_modparam.guc_firmware_path; break; case XE_UC_FW_TYPE_HUC: - path_override = xe_huc_firmware_path; + path_override = xe_modparam.huc_firmware_path; break; default: break; -- cgit v1.2.3 From 2e7227b4b733223a0a5b6a7a2685c7ff089c21c5 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Fri, 17 Nov 2023 14:51:44 -0800 Subject: drm/xe/uc: Rework uC version tracking The GSC firmware, support for which is coming soon for Xe, has both a release version (updated on every release) and a compatibility version (update only on interface changes). The GuC has something similar, with a global release version and a submission version (which is also known as the VF compatibility version). The main difference is that for the GuC we still want to check the driver requirement against the release version, while for the GSC we'll need to check against the compatibility version. Instead of special casing the GSC, this patch reworks the FW logic so that we store both versions at the uc_fw level for all binaries and we allow checking against either of the versions. Initially, we'll use it to support GSC, but the logic could be re-used to allow VFs to check against the GuC compatibility version. Note that the GSC version has 4 numbers (major, minor, hotfix, build), so support for that has been added as part of the rework and will be used in follow-up patches. Signed-off-by: Daniele Ceraolo Spurio Cc: John Harrison Cc: Michal Wajdeczko Reviewed-by: John Harrison Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_types.h | 9 --- drivers/gpu/drm/xe/xe_uc_fw.c | 141 ++++++++++++++++++++++-------------- drivers/gpu/drm/xe/xe_uc_fw_types.h | 39 +++++++--- 3 files changed, 114 insertions(+), 75 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h index 0fdcc05dc16a..cd80802e8918 100644 --- a/drivers/gpu/drm/xe/xe_guc_types.h +++ b/drivers/gpu/drm/xe/xe_guc_types.h @@ -52,15 +52,6 @@ struct xe_guc { /** @seqno: suspend fences seqno */ u32 seqno; } suspend; - /** @version: submission version */ - struct { - /** @major: major version of GuC submission */ - u32 major; - /** @minor: minor version of GuC submission */ - u32 minor; - /** @patch: patch version of GuC submission */ - u32 patch; - } version; #ifdef CONFIG_PROVE_LOCKING #define NUM_SUBMIT_WQ 256 /** @submit_wq_pool: submission ordered workqueues pool */ diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 2a5f361e8270..376fbc10c5ea 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -204,9 +204,12 @@ uc_fw_auto_select(struct xe_device *xe, struct xe_uc_fw *uc_fw) for (i = 0; i < count && p <= entries[i].platform; i++) { if (p == entries[i].platform) { uc_fw->path = entries[i].path; - uc_fw->major_ver_wanted = entries[i].major; - uc_fw->minor_ver_wanted = entries[i].minor; + uc_fw->versions.wanted.major = entries[i].major; + uc_fw->versions.wanted.minor = entries[i].minor; uc_fw->full_ver_required = entries[i].full_ver_required; + + /* compatibility version checking coming soon */ + uc_fw->versions.wanted_type = XE_UC_FW_VER_RELEASE; break; } } @@ -273,32 +276,30 @@ static void uc_fw_fini(struct drm_device *drm, void *arg) static void guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_header *css) { struct xe_gt *gt = uc_fw_to_gt(uc_fw); - struct xe_guc *guc = >->uc.guc; + struct xe_uc_fw_version *release = &uc_fw->versions.found[XE_UC_FW_VER_RELEASE]; + struct xe_uc_fw_version *compatibility = &uc_fw->versions.found[XE_UC_FW_VER_COMPATIBILITY]; xe_gt_assert(gt, uc_fw->type == XE_UC_FW_TYPE_GUC); - xe_gt_assert(gt, uc_fw->major_ver_found >= 70); + xe_gt_assert(gt, release->major >= 70); - if (uc_fw->major_ver_found > 70 || uc_fw->minor_ver_found >= 6) { + if (release->major > 70 || release->minor >= 6) { /* v70.6.0 adds CSS header support */ - guc->submission_state.version.major = - FIELD_GET(CSS_SW_VERSION_UC_MAJOR, - css->submission_version); - guc->submission_state.version.minor = - FIELD_GET(CSS_SW_VERSION_UC_MINOR, - css->submission_version); - guc->submission_state.version.patch = - FIELD_GET(CSS_SW_VERSION_UC_PATCH, - css->submission_version); - } else if (uc_fw->minor_ver_found >= 3) { + compatibility->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, + css->submission_version); + compatibility->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, + css->submission_version); + compatibility->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, + css->submission_version); + } else if (release->minor >= 3) { /* v70.3.0 introduced v1.1.0 */ - guc->submission_state.version.major = 1; - guc->submission_state.version.minor = 1; - guc->submission_state.version.patch = 0; + compatibility->major = 1; + compatibility->minor = 1; + compatibility->patch = 0; } else { /* v70.0.0 introduced v1.0.0 */ - guc->submission_state.version.major = 1; - guc->submission_state.version.minor = 0; - guc->submission_state.version.patch = 0; + compatibility->major = 1; + compatibility->minor = 0; + compatibility->patch = 0; } uc_fw->private_data_size = css->private_data_size; @@ -307,30 +308,31 @@ static void guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_header *css) static int uc_fw_check_version_requirements(struct xe_uc_fw *uc_fw) { struct xe_device *xe = uc_fw_to_xe(uc_fw); + struct xe_uc_fw_version *wanted = &uc_fw->versions.wanted; + struct xe_uc_fw_version *found = &uc_fw->versions.found[uc_fw->versions.wanted_type]; /* Driver has no requirement on any version, any is good. */ - if (!uc_fw->major_ver_wanted) + if (!wanted->major) return 0; /* * If full version is required, both major and minor should match. * Otherwise, at least the major version. */ - if (uc_fw->major_ver_wanted != uc_fw->major_ver_found || - (uc_fw->full_ver_required && - uc_fw->minor_ver_wanted != uc_fw->minor_ver_found)) { + if (wanted->major != found->major || + (uc_fw->full_ver_required && wanted->minor != found->minor)) { drm_notice(&xe->drm, "%s firmware %s: unexpected version: %u.%u != %u.%u\n", xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, - uc_fw->major_ver_found, uc_fw->minor_ver_found, - uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); + found->major, found->minor, + wanted->major, wanted->minor); goto fail; } - if (uc_fw->minor_ver_wanted > uc_fw->minor_ver_found) { + if (wanted->minor > found->minor) { drm_notice(&xe->drm, "%s firmware (%u.%u) is recommended, but only (%u.%u) was found in %s\n", xe_uc_fw_type_repr(uc_fw->type), - uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted, - uc_fw->major_ver_found, uc_fw->minor_ver_found, + wanted->major, wanted->minor, + found->major, found->minor, uc_fw->path); drm_info(&xe->drm, "Consider updating your linux-firmware pkg or downloading from %s\n", XE_UC_FIRMWARE_URL); @@ -349,6 +351,7 @@ fail: static int parse_css_header(struct xe_uc_fw *uc_fw, const void *fw_data, size_t fw_size) { struct xe_device *xe = uc_fw_to_xe(uc_fw); + struct xe_uc_fw_version *release = &uc_fw->versions.found[XE_UC_FW_VER_RELEASE]; struct uc_css_header *css; size_t size; @@ -390,12 +393,9 @@ static int parse_css_header(struct xe_uc_fw *uc_fw, const void *fw_data, size_t } /* Get version numbers from the CSS header */ - uc_fw->major_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, - css->sw_version); - uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MINOR, - css->sw_version); - uc_fw->patch_ver_found = FIELD_GET(CSS_SW_VERSION_UC_PATCH, - css->sw_version); + release->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, css->sw_version); + release->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, css->sw_version); + release->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, css->sw_version); if (uc_fw->type == XE_UC_FW_TYPE_GUC) guc_read_css_info(uc_fw, css); @@ -431,6 +431,7 @@ static int parse_cpd_header(struct xe_uc_fw *uc_fw, const void *data, size_t siz struct xe_gt *gt = uc_fw_to_gt(uc_fw); struct xe_device *xe = gt_to_xe(gt); const struct gsc_cpd_header_v2 *header = data; + struct xe_uc_fw_version *release = &uc_fw->versions.found[XE_UC_FW_VER_RELEASE]; const struct gsc_manifest_header *manifest; size_t min_size = sizeof(*header); u32 offset; @@ -468,9 +469,9 @@ static int parse_cpd_header(struct xe_uc_fw *uc_fw, const void *data, size_t siz manifest = data + offset; - uc_fw->major_ver_found = manifest->fw_version.major; - uc_fw->minor_ver_found = manifest->fw_version.minor; - uc_fw->patch_ver_found = manifest->fw_version.hotfix; + release->major = manifest->fw_version.major; + release->minor = manifest->fw_version.minor; + release->patch = manifest->fw_version.hotfix; /* then optionally look for the css header */ if (css_entry) { @@ -524,12 +525,25 @@ static int parse_headers(struct xe_uc_fw *uc_fw, const struct firmware *fw) return 0; } +#define print_uc_fw_version(p_, version_, prefix_, ...) \ +do { \ + struct xe_uc_fw_version *ver_ = (version_); \ + if (ver_->build) \ + drm_printf(p_, prefix_ " version %u.%u.%u.%u\n", ##__VA_ARGS__, \ + ver_->major, ver_->minor, \ + ver_->patch, ver_->build); \ + else \ + drm_printf(p_, prefix_ " version %u.%u.%u\n", ##__VA_ARGS__, \ + ver_->major, ver_->minor, ver_->patch); \ +} while (0) + int xe_uc_fw_init(struct xe_uc_fw *uc_fw) { struct xe_device *xe = uc_fw_to_xe(uc_fw); struct xe_gt *gt = uc_fw_to_gt(uc_fw); struct xe_tile *tile = gt_to_tile(gt); struct device *dev = xe->drm.dev; + struct drm_printer p = drm_info_printer(dev); const struct firmware *fw = NULL; struct xe_bo *obj; int err; @@ -567,9 +581,10 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw) if (err) goto fail; - drm_info(&xe->drm, "Using %s firmware from %s version %u.%u.%u\n", - xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, - uc_fw->major_ver_found, uc_fw->minor_ver_found, uc_fw->patch_ver_found); + print_uc_fw_version(&p, + &uc_fw->versions.found[XE_UC_FW_VER_RELEASE], + "Using %s firmware from %s", + xe_uc_fw_type_repr(uc_fw->type), uc_fw->path); err = uc_fw_check_version_requirements(uc_fw); if (err) @@ -686,26 +701,40 @@ fail: return err; } +static const char *version_type_repr(enum xe_uc_fw_version_types type) +{ + switch (type) { + case XE_UC_FW_VER_RELEASE: + return "release"; + case XE_UC_FW_VER_COMPATIBILITY: + return "compatibility"; + default: + return "Unknown version type"; + } +} void xe_uc_fw_print(struct xe_uc_fw *uc_fw, struct drm_printer *p) { + int i; + drm_printf(p, "%s firmware: %s\n", xe_uc_fw_type_repr(uc_fw->type), uc_fw->path); drm_printf(p, "\tstatus: %s\n", xe_uc_fw_status_repr(uc_fw->status)); - drm_printf(p, "\tversion: wanted %u.%u, found %u.%u.%u\n", - uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted, - uc_fw->major_ver_found, uc_fw->minor_ver_found, uc_fw->patch_ver_found); - drm_printf(p, "\tuCode: %u bytes\n", uc_fw->ucode_size); - drm_printf(p, "\tRSA: %u bytes\n", uc_fw->rsa_size); - - if (uc_fw->type == XE_UC_FW_TYPE_GUC) { - struct xe_gt *gt = uc_fw_to_gt(uc_fw); - struct xe_guc *guc = >->uc.guc; - - drm_printf(p, "\tSubmit version: %u.%u.%u\n", - guc->submission_state.version.major, - guc->submission_state.version.minor, - guc->submission_state.version.patch); + + print_uc_fw_version(p, &uc_fw->versions.wanted, "\twanted %s", + version_type_repr(uc_fw->versions.wanted_type)); + + for (i = 0; i < XE_UC_FW_VER_TYPE_COUNT; i++) { + struct xe_uc_fw_version *ver = &uc_fw->versions.found[i]; + + if (ver->major) + print_uc_fw_version(p, ver, "\tfound %s", + version_type_repr(i)); } + + if (uc_fw->ucode_size) + drm_printf(p, "\tuCode: %u bytes\n", uc_fw->ucode_size); + if (uc_fw->rsa_size) + drm_printf(p, "\tRSA: %u bytes\n", uc_fw->rsa_size); } diff --git a/drivers/gpu/drm/xe/xe_uc_fw_types.h b/drivers/gpu/drm/xe/xe_uc_fw_types.h index 1650599303c8..46c801d8e954 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw_types.h +++ b/drivers/gpu/drm/xe/xe_uc_fw_types.h @@ -59,6 +59,26 @@ enum xe_uc_fw_type { }; #define XE_UC_FW_NUM_TYPES 2 +/** + * struct xe_uc_fw_version - Version for XE micro controller firmware + */ +struct xe_uc_fw_version { + /** @major: major version of the FW */ + u16 major; + /** @minor: minor version of the FW */ + u16 minor; + /** @patch: patch version of the FW */ + u16 patch; + /** @build: build version of the FW (not always available) */ + u16 build; +}; + +enum xe_uc_fw_version_types { + XE_UC_FW_VER_RELEASE, + XE_UC_FW_VER_COMPATIBILITY, + XE_UC_FW_VER_TYPE_COUNT +}; + /** * struct xe_uc_fw - XE micro controller firmware */ @@ -98,16 +118,15 @@ struct xe_uc_fw { * version required per platform. */ - /** @major_ver_wanted: major firmware version wanted by platform */ - u16 major_ver_wanted; - /** @minor_ver_wanted: minor firmware version wanted by platform */ - u16 minor_ver_wanted; - /** @major_ver_found: major version found in firmware blob */ - u16 major_ver_found; - /** @minor_ver_found: major version found in firmware blob */ - u16 minor_ver_found; - /** @patch_ver_found: patch version found in firmware blob */ - u16 patch_ver_found; + /** @versions: FW versions wanted and found */ + struct { + /** @wanted: firmware version wanted by platform */ + struct xe_uc_fw_version wanted; + /** @wanted_type: type of firmware version wanted (release vs compatibility) */ + enum xe_uc_fw_version_types wanted_type; + /** @found: fw versions found in firmware blob */ + struct xe_uc_fw_version found[XE_UC_FW_VER_TYPE_COUNT]; + } versions; /** @rsa_size: RSA size */ u32 rsa_size; -- cgit v1.2.3 From 0d1caff4a367e0cbc28622fab7e39576bac82bb9 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Fri, 17 Nov 2023 14:51:45 -0800 Subject: drm/xe/gsc: Introduce GSC FW Add the basic definitions and init function. Same as HuC, GSC is only supported on the media GT on MTL and newer platforms. Note that the GSC requires submission resources which can't be allocated during init (because we don't have the hwconfig yet), so it can't be marked as loadable at the end of the init function. The allocation of those resources will come in the patch that makes use of them to load the FW. v2: better comment, move num FWs define inside the enum (John) Signed-off-by: Daniele Ceraolo Spurio Cc: Alan Previn Cc: John Harrison Reviewed-by: John Harrison Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/xe_gsc.c | 52 +++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_gsc.h | 13 ++++++++++ drivers/gpu/drm/xe/xe_gsc_types.h | 19 ++++++++++++++ drivers/gpu/drm/xe/xe_uc.c | 9 +++++-- drivers/gpu/drm/xe/xe_uc_fw.c | 23 +++++++++++++--- drivers/gpu/drm/xe/xe_uc_fw.h | 5 +++- drivers/gpu/drm/xe/xe_uc_fw_types.h | 5 ++-- drivers/gpu/drm/xe/xe_uc_types.h | 3 +++ 9 files changed, 121 insertions(+), 9 deletions(-) create mode 100644 drivers/gpu/drm/xe/xe_gsc.c create mode 100644 drivers/gpu/drm/xe/xe_gsc.h create mode 100644 drivers/gpu/drm/xe/xe_gsc_types.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 05a90fd6c3c9..184e2724ce7b 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -58,6 +58,7 @@ xe-y += xe_bb.o \ xe_force_wake.o \ xe_ggtt.o \ xe_gpu_scheduler.o \ + xe_gsc.o \ xe_gt.o \ xe_gt_clock.o \ xe_gt_debugfs.o \ diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c new file mode 100644 index 000000000000..216f36cee0f7 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include "xe_gsc.h" + +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_gt_printk.h" +#include "xe_uc_fw.h" + +static struct xe_gt * +gsc_to_gt(struct xe_gsc *gsc) +{ + return container_of(gsc, struct xe_gt, uc.gsc); +} + +int xe_gsc_init(struct xe_gsc *gsc) +{ + struct xe_gt *gt = gsc_to_gt(gsc); + struct xe_tile *tile = gt_to_tile(gt); + int ret; + + gsc->fw.type = XE_UC_FW_TYPE_GSC; + + /* The GSC uC is only available on the media GT */ + if (tile->media_gt && (gt != tile->media_gt)) { + xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_NOT_SUPPORTED); + return 0; + } + + /* + * Some platforms can have GuC but not GSC. That would cause + * xe_uc_fw_init(gsc) to return a "not supported" failure code and abort + * all firmware loading. So check for GSC being enabled before + * propagating the failure back up. That way the higher level will keep + * going and load GuC as appropriate. + */ + ret = xe_uc_fw_init(&gsc->fw); + if (!xe_uc_fw_is_enabled(&gsc->fw)) + return 0; + else if (ret) + goto out; + + return 0; + +out: + xe_gt_err(gt, "GSC init failed with %d", ret); + return ret; +} + diff --git a/drivers/gpu/drm/xe/xe_gsc.h b/drivers/gpu/drm/xe/xe_gsc.h new file mode 100644 index 000000000000..baa7f21f4204 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gsc.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GSC_H_ +#define _XE_GSC_H_ + +#include "xe_gsc_types.h" + +int xe_gsc_init(struct xe_gsc *gsc); + +#endif diff --git a/drivers/gpu/drm/xe/xe_gsc_types.h b/drivers/gpu/drm/xe/xe_gsc_types.h new file mode 100644 index 000000000000..135f156e3736 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gsc_types.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GSC_TYPES_H_ +#define _XE_GSC_TYPES_H_ + +#include "xe_uc_fw_types.h" + +/** + * struct xe_gsc - GSC + */ +struct xe_gsc { + /** @fw: Generic uC firmware management */ + struct xe_uc_fw fw; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index 784f53c5f282..b67154c78dff 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -6,6 +6,7 @@ #include "xe_uc.h" #include "xe_device.h" +#include "xe_gsc.h" #include "xe_gt.h" #include "xe_guc.h" #include "xe_guc_pc.h" @@ -32,8 +33,8 @@ int xe_uc_init(struct xe_uc *uc) int ret; /* - * We call the GuC/HuC init functions even if GuC submission is off to - * correctly move our tracking of the FW state to "disabled". + * We call the GuC/HuC/GSC init functions even if GuC submission is off + * to correctly move our tracking of the FW state to "disabled". */ ret = xe_guc_init(&uc->guc); @@ -44,6 +45,10 @@ int xe_uc_init(struct xe_uc *uc) if (ret) goto err; + ret = xe_gsc_init(&uc->gsc); + if (ret) + goto err; + if (!xe_device_uc_enabled(uc_to_xe(uc))) return 0; diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 376fbc10c5ea..5eaf6ce0d025 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -158,11 +158,18 @@ XE_HUC_FIRMWARE_DEFS(XE_UC_MODULE_FIRMWARE, static struct xe_gt * __uc_fw_to_gt(struct xe_uc_fw *uc_fw, enum xe_uc_fw_type type) { - if (type == XE_UC_FW_TYPE_GUC) - return container_of(uc_fw, struct xe_gt, uc.guc.fw); + XE_WARN_ON(type >= XE_UC_FW_NUM_TYPES); - XE_WARN_ON(type != XE_UC_FW_TYPE_HUC); - return container_of(uc_fw, struct xe_gt, uc.huc.fw); + switch (type) { + case XE_UC_FW_TYPE_GUC: + return container_of(uc_fw, struct xe_gt, uc.guc.fw); + case XE_UC_FW_TYPE_HUC: + return container_of(uc_fw, struct xe_gt, uc.huc.fw); + case XE_UC_FW_TYPE_GSC: + return container_of(uc_fw, struct xe_gt, uc.gsc.fw); + default: + return NULL; + } } static struct xe_gt *uc_fw_to_gt(struct xe_uc_fw *uc_fw) @@ -197,6 +204,14 @@ uc_fw_auto_select(struct xe_device *xe, struct xe_uc_fw *uc_fw) u32 count; int i; + /* + * GSC FW support is still not fully in place, so we're not defining + * the FW blob yet because we don't want the driver to attempt to load + * it until we're ready for it. + */ + if (uc_fw->type == XE_UC_FW_TYPE_GSC) + return; + xe_assert(xe, uc_fw->type < ARRAY_SIZE(blobs_all)); entries = blobs_all[uc_fw->type].entries; count = blobs_all[uc_fw->type].count; diff --git a/drivers/gpu/drm/xe/xe_uc_fw.h b/drivers/gpu/drm/xe/xe_uc_fw.h index 1d1a0c156cdf..7feafe1695f9 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.h +++ b/drivers/gpu/drm/xe/xe_uc_fw.h @@ -96,8 +96,11 @@ static inline const char *xe_uc_fw_type_repr(enum xe_uc_fw_type type) return "GuC"; case XE_UC_FW_TYPE_HUC: return "HuC"; + case XE_UC_FW_TYPE_GSC: + return "GSC"; + default: + return "uC"; } - return "uC"; } static inline enum xe_uc_fw_status diff --git a/drivers/gpu/drm/xe/xe_uc_fw_types.h b/drivers/gpu/drm/xe/xe_uc_fw_types.h index 46c801d8e954..fc1de0cc9324 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw_types.h +++ b/drivers/gpu/drm/xe/xe_uc_fw_types.h @@ -55,9 +55,10 @@ enum xe_uc_fw_status { enum xe_uc_fw_type { XE_UC_FW_TYPE_GUC = 0, - XE_UC_FW_TYPE_HUC + XE_UC_FW_TYPE_HUC, + XE_UC_FW_TYPE_GSC, + XE_UC_FW_NUM_TYPES }; -#define XE_UC_FW_NUM_TYPES 2 /** * struct xe_uc_fw_version - Version for XE micro controller firmware diff --git a/drivers/gpu/drm/xe/xe_uc_types.h b/drivers/gpu/drm/xe/xe_uc_types.h index 49bef6498b85..9924e4484866 100644 --- a/drivers/gpu/drm/xe/xe_uc_types.h +++ b/drivers/gpu/drm/xe/xe_uc_types.h @@ -6,6 +6,7 @@ #ifndef _XE_UC_TYPES_H_ #define _XE_UC_TYPES_H_ +#include "xe_gsc_types.h" #include "xe_guc_types.h" #include "xe_huc_types.h" #include "xe_wopcm_types.h" @@ -18,6 +19,8 @@ struct xe_uc { struct xe_guc guc; /** @huc: HuC */ struct xe_huc huc; + /** @gsc: Graphics Security Controller */ + struct xe_gsc gsc; /** @wopcm: WOPCM */ struct xe_wopcm wopcm; }; -- cgit v1.2.3 From 985d5a49e8454d64a01ab362e9091788eeed1839 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Fri, 17 Nov 2023 14:51:46 -0800 Subject: drm/xe/gsc: Parse GSC FW header The GSC blob starts with a layout header, from which we can move to the boot directory, which in turns allows us to find the CPD. The CPD uses the same format as the one in the HuC binary, so we can re-use the same parsing code to get to the manifest, which contains the release and security versions of the FW. v2: Fix comments in struct definition (John) Signed-off-by: Daniele Ceraolo Spurio Cc: Alan Previn Cc: John Harrison Cc: Lucas De Marchi Reviewed-by: John Harrison Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gsc_types.h | 3 + drivers/gpu/drm/xe/xe_uc_fw.c | 77 ++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_uc_fw_abi.h | 113 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 193 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gsc_types.h b/drivers/gpu/drm/xe/xe_gsc_types.h index 135f156e3736..1bc50583fe58 100644 --- a/drivers/gpu/drm/xe/xe_gsc_types.h +++ b/drivers/gpu/drm/xe/xe_gsc_types.h @@ -14,6 +14,9 @@ struct xe_gsc { /** @fw: Generic uC firmware management */ struct xe_uc_fw fw; + + /** @security_version: SVN found in the fetched blob */ + u32 security_version; }; #endif diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 5eaf6ce0d025..2d0fb7058d66 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -12,6 +12,7 @@ #include "xe_bo.h" #include "xe_device_types.h" #include "xe_force_wake.h" +#include "xe_gsc.h" #include "xe_gt.h" #include "xe_map.h" #include "xe_mmio.h" @@ -488,6 +489,13 @@ static int parse_cpd_header(struct xe_uc_fw *uc_fw, const void *data, size_t siz release->minor = manifest->fw_version.minor; release->patch = manifest->fw_version.hotfix; + if (uc_fw->type == XE_UC_FW_TYPE_GSC) { + struct xe_gsc *gsc = container_of(uc_fw, struct xe_gsc, fw); + + release->build = manifest->fw_version.build; + gsc->security_version = manifest->security_version; + } + /* then optionally look for the css header */ if (css_entry) { int ret; @@ -517,6 +525,73 @@ static int parse_cpd_header(struct xe_uc_fw *uc_fw, const void *data, size_t siz return 0; } +static int parse_gsc_layout(struct xe_uc_fw *uc_fw, const void *data, size_t size) +{ + struct xe_gt *gt = uc_fw_to_gt(uc_fw); + const struct gsc_layout_pointers *layout = data; + const struct gsc_bpdt_header *bpdt_header = NULL; + const struct gsc_bpdt_entry *bpdt_entry = NULL; + size_t min_size = sizeof(*layout); + int i; + + if (size < min_size) { + xe_gt_err(gt, "GSC FW too small! %zu < %zu\n", size, min_size); + return -ENODATA; + } + + min_size = layout->boot1.offset + layout->boot1.size; + if (size < min_size) { + xe_gt_err(gt, "GSC FW too small for boot section! %zu < %zu\n", + size, min_size); + return -ENODATA; + } + + min_size = sizeof(*bpdt_header); + if (layout->boot1.size < min_size) { + xe_gt_err(gt, "GSC FW boot section too small for BPDT header: %u < %zu\n", + layout->boot1.size, min_size); + return -ENODATA; + } + + bpdt_header = data + layout->boot1.offset; + if (bpdt_header->signature != GSC_BPDT_HEADER_SIGNATURE) { + xe_gt_err(gt, "invalid signature for BPDT header: 0x%08x!\n", + bpdt_header->signature); + return -EINVAL; + } + + min_size += sizeof(*bpdt_entry) * bpdt_header->descriptor_count; + if (layout->boot1.size < min_size) { + xe_gt_err(gt, "GSC FW boot section too small for BPDT entries: %u < %zu\n", + layout->boot1.size, min_size); + return -ENODATA; + } + + bpdt_entry = (void *)bpdt_header + sizeof(*bpdt_header); + for (i = 0; i < bpdt_header->descriptor_count; i++, bpdt_entry++) { + if ((bpdt_entry->type & GSC_BPDT_ENTRY_TYPE_MASK) != + GSC_BPDT_ENTRY_TYPE_GSC_RBE) + continue; + + min_size = bpdt_entry->sub_partition_offset; + + /* the CPD header parser will check that the CPD header fits */ + if (layout->boot1.size < min_size) { + xe_gt_err(gt, "GSC FW boot section too small for CPD offset: %u < %zu\n", + layout->boot1.size, min_size); + return -ENODATA; + } + + return parse_cpd_header(uc_fw, + (void *)bpdt_header + min_size, + layout->boot1.size - min_size, + "RBEP.man", NULL); + } + + xe_gt_err(gt, "couldn't find CPD header in GSC binary!\n"); + return -ENODATA; +} + static int parse_headers(struct xe_uc_fw *uc_fw, const struct firmware *fw) { int ret; @@ -526,6 +601,8 @@ static int parse_headers(struct xe_uc_fw *uc_fw, const struct firmware *fw) * releases use GSC CPD headers. */ switch (uc_fw->type) { + case XE_UC_FW_TYPE_GSC: + return parse_gsc_layout(uc_fw, fw->data, fw->size); case XE_UC_FW_TYPE_HUC: ret = parse_cpd_header(uc_fw, fw->data, fw->size, "HUCP.man", "huc_fw"); if (!ret || ret != -ENOENT) diff --git a/drivers/gpu/drm/xe/xe_uc_fw_abi.h b/drivers/gpu/drm/xe/xe_uc_fw_abi.h index d6725c963251..87ade41209d0 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw_abi.h +++ b/drivers/gpu/drm/xe/xe_uc_fw_abi.h @@ -140,6 +140,58 @@ static_assert(sizeof(struct uc_css_header) == 128); * | RSA Key (MTL+ only) | * | ... | * +================================================+ + * + * The GSC binary starts instead with a layout header, which contains the + * locations of the various partitions of the binary. The one we're interested + * in is the boot1 partition, where we can find a BPDT header followed by + * entries, one of which points to the RBE sub-section of the partition, which + * contains the CPD. The GSC blob does not contain a CSS-based binary, so we + * only need to look for the manifest, which is under the "RBEP.man" CPD entry. + * Note that we have no need to find where the actual FW code is inside the + * image because the GSC ROM will itself parse the headers to find it and load + * it. + * The GSC firmware header layout looks like this:: + * + * +================================================+ + * | Layout Pointers | + * | ... | + * | Boot1 offset >---------------------------|------o + * | ... | | + * +================================================+ | + * | + * +================================================+ | + * | BPDT header |<-----o + * +================================================+ + * | BPDT entries[] | + * | entry1 | + * | ... | + * | entryX | + * | type == GSC_RBE | + * | offset >-----------------------------|------o + * | ... | | + * +================================================+ | + * | + * +================================================+ | + * | CPD Header |<-----o + * +================================================+ + * | CPD entries[] | + * | entry1 | + * | ... | + * | entryX | + * | "RBEP.man" | + * | ... | + * | offset >----------------------------|------o + * | ... | | + * +================================================+ | + * | + * +================================================+ | + * | Manifest Header |<-----o + * | ... | + * | FW version | + * | ... | + * | Security version | + * | ... | + * +================================================+ */ struct gsc_version { @@ -149,6 +201,67 @@ struct gsc_version { u16 build; } __packed; +struct gsc_partition { + u32 offset; + u32 size; +} __packed; + +struct gsc_layout_pointers { + u8 rom_bypass_vector[16]; + + /* size of this header section, not including ROM bypass vector */ + u16 size; + + /* + * bit0: Backup copy of layout pointers exists + * bits1-15: reserved + */ + u8 flags; + + u8 reserved; + + u32 crc32; + + struct gsc_partition datap; + struct gsc_partition boot1; + struct gsc_partition boot2; + struct gsc_partition boot3; + struct gsc_partition boot4; + struct gsc_partition boot5; + struct gsc_partition temp_pages; +} __packed; + +/* Boot partition structures */ +struct gsc_bpdt_header { + u32 signature; +#define GSC_BPDT_HEADER_SIGNATURE 0x000055AA + + u16 descriptor_count; /* num of entries after the header */ + + u8 version; + u8 configuration; + + u32 crc32; + + u32 build_version; + struct gsc_version tool_version; +} __packed; + +struct gsc_bpdt_entry { + /* + * Bits 0-15: BPDT entry type + * Bits 16-17: reserved + * Bit 18: code sub-partition + * Bits 19-31: reserved + */ + u32 type; +#define GSC_BPDT_ENTRY_TYPE_MASK GENMASK(15, 0) +#define GSC_BPDT_ENTRY_TYPE_GSC_RBE 0x1 + + u32 sub_partition_offset; /* from the base of the BPDT header */ + u32 sub_partition_size; +} __packed; + /* Code partition directory (CPD) structures */ struct gsc_cpd_header_v2 { u32 header_marker; -- cgit v1.2.3 From dd0e89e5edc20d3875ed7ded48e7e97118cdfbc8 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Fri, 17 Nov 2023 14:51:47 -0800 Subject: drm/xe/gsc: GSC FW load The GSC FW must be copied in a 4MB stolen memory allocation, whose GGTT address is then passed as a parameter to a dedicated load instruction submitted via the GSC engine. Since the GSC load is relatively slow (up to 250ms), we perform it asynchronously via a worker. This requires us to make sure that the worker has stopped before suspending/unloading. Note that we can't yet use xe_migrate_copy for the copy because it doesn't work with stolen memory right now, so we do a memcpy from the CPU side instead. v2: add comment about timeout value, fix GSC status checking before load (John) Bspec: 65306, 65346 Signed-off-by: Daniele Ceraolo Spurio Cc: Alan Previn Cc: John Harrison Reviewed-by: John Harrison Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/instructions/xe_gsc_commands.h | 34 +++ drivers/gpu/drm/xe/instructions/xe_instr_defs.h | 1 + drivers/gpu/drm/xe/regs/xe_gsc_regs.h | 29 +++ drivers/gpu/drm/xe/xe_gsc.c | 250 ++++++++++++++++++++++ drivers/gpu/drm/xe/xe_gsc.h | 3 + drivers/gpu/drm/xe/xe_gsc_types.h | 17 ++ drivers/gpu/drm/xe/xe_uc.c | 12 +- 7 files changed, 345 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/xe/instructions/xe_gsc_commands.h create mode 100644 drivers/gpu/drm/xe/regs/xe_gsc_regs.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/instructions/xe_gsc_commands.h b/drivers/gpu/drm/xe/instructions/xe_gsc_commands.h new file mode 100644 index 000000000000..c7a833d7f965 --- /dev/null +++ b/drivers/gpu/drm/xe/instructions/xe_gsc_commands.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GSC_COMMANDS_H_ +#define _XE_GSC_COMMANDS_H_ + +#include "instructions/xe_instr_defs.h" + +/* + * All GSCCS-specific commands have fixed length, so we can include it in the + * defines. Note that the generic GSC command header structure includes an + * optional data field in bits 9-21, but there are no commands that actually use + * it; some of the commands are instead defined as having an extended length + * field spanning bits 0-15, even if the extra bits are not required because the + * longest GSCCS command is only 8 dwords. To handle this, the defines below use + * a single field for both data and len. If we ever get a commands that does + * actually have data and this approach doesn't work for it we can re-work it + * at that point. + */ + +#define GSC_OPCODE REG_GENMASK(28, 22) +#define GSC_CMD_DATA_AND_LEN REG_GENMASK(21, 0) + +#define __GSC_INSTR(op, dl) \ + (XE_INSTR_GSC | \ + REG_FIELD_PREP(GSC_OPCODE, op) | \ + REG_FIELD_PREP(GSC_CMD_DATA_AND_LEN, dl)) + +#define GSC_FW_LOAD __GSC_INSTR(1, 2) +#define GSC_FW_LOAD_LIMIT_VALID REG_BIT(31) + +#endif diff --git a/drivers/gpu/drm/xe/instructions/xe_instr_defs.h b/drivers/gpu/drm/xe/instructions/xe_instr_defs.h index e403b4fcc20a..04179b2a48e1 100644 --- a/drivers/gpu/drm/xe/instructions/xe_instr_defs.h +++ b/drivers/gpu/drm/xe/instructions/xe_instr_defs.h @@ -15,6 +15,7 @@ */ #define XE_INSTR_CMD_TYPE GENMASK(31, 29) #define XE_INSTR_MI REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x0) +#define XE_INSTR_GSC REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x2) #define XE_INSTR_GFXPIPE REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x3) /* diff --git a/drivers/gpu/drm/xe/regs/xe_gsc_regs.h b/drivers/gpu/drm/xe/regs/xe_gsc_regs.h new file mode 100644 index 000000000000..22d2ad9cb64d --- /dev/null +++ b/drivers/gpu/drm/xe/regs/xe_gsc_regs.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GSC_REGS_H_ +#define _XE_GSC_REGS_H_ + +#include +#include + +#include "regs/xe_reg_defs.h" + +/* Definitions of GSC H/W registers, bits, etc */ + +#define MTL_GSC_HECI1_BASE 0x00116000 +#define MTL_GSC_HECI2_BASE 0x00117000 + +/* + * The FWSTS register values are FW defined and can be different between + * HECI1 and HECI2 + */ +#define HECI_FWSTS1(base) XE_REG((base) + 0xc40) +#define HECI1_FWSTS1_CURRENT_STATE REG_GENMASK(3, 0) +#define HECI1_FWSTS1_CURRENT_STATE_RESET 0 +#define HECI1_FWSTS1_PROXY_STATE_NORMAL 5 +#define HECI1_FWSTS1_INIT_COMPLETE REG_BIT(9) + +#endif diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index 216f36cee0f7..e014b829bc8a 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -5,10 +5,20 @@ #include "xe_gsc.h" +#include + +#include "xe_bb.h" +#include "xe_bo.h" #include "xe_device.h" +#include "xe_exec_queue.h" #include "xe_gt.h" #include "xe_gt_printk.h" +#include "xe_map.h" +#include "xe_mmio.h" +#include "xe_sched_job.h" #include "xe_uc_fw.h" +#include "instructions/xe_gsc_commands.h" +#include "regs/xe_gsc_regs.h" static struct xe_gt * gsc_to_gt(struct xe_gsc *gsc) @@ -16,6 +26,145 @@ gsc_to_gt(struct xe_gsc *gsc) return container_of(gsc, struct xe_gt, uc.gsc); } +static int memcpy_fw(struct xe_gsc *gsc) +{ + struct xe_gt *gt = gsc_to_gt(gsc); + struct xe_device *xe = gt_to_xe(gt); + u32 fw_size = gsc->fw.size; + void *storage; + + /* + * FIXME: xe_migrate_copy does not work with stolen mem yet, so we use + * a memcpy for now. + */ + storage = kmalloc(fw_size, GFP_KERNEL); + if (!storage) + return -ENOMEM; + + xe_map_memcpy_from(xe, storage, &gsc->fw.bo->vmap, 0, fw_size); + xe_map_memcpy_to(xe, &gsc->private->vmap, 0, storage, fw_size); + xe_map_memset(xe, &gsc->private->vmap, fw_size, 0, gsc->private->size - fw_size); + + kfree(storage); + + return 0; +} + +static int emit_gsc_upload(struct xe_gsc *gsc) +{ + struct xe_gt *gt = gsc_to_gt(gsc); + u64 offset = xe_bo_ggtt_addr(gsc->private); + struct xe_bb *bb; + struct xe_sched_job *job; + struct dma_fence *fence; + long timeout; + + bb = xe_bb_new(gt, 4, false); + if (IS_ERR(bb)) + return PTR_ERR(bb); + + bb->cs[bb->len++] = GSC_FW_LOAD; + bb->cs[bb->len++] = lower_32_bits(offset); + bb->cs[bb->len++] = upper_32_bits(offset); + bb->cs[bb->len++] = (gsc->private->size / SZ_4K) | GSC_FW_LOAD_LIMIT_VALID; + + job = xe_bb_create_job(gsc->q, bb); + if (IS_ERR(job)) { + xe_bb_free(bb, NULL); + return PTR_ERR(job); + } + + xe_sched_job_arm(job); + fence = dma_fence_get(&job->drm.s_fence->finished); + xe_sched_job_push(job); + + timeout = dma_fence_wait_timeout(fence, false, HZ); + dma_fence_put(fence); + xe_bb_free(bb, NULL); + if (timeout < 0) + return timeout; + else if (!timeout) + return -ETIME; + + return 0; +} + +static int gsc_fw_is_loaded(struct xe_gt *gt) +{ + return xe_mmio_read32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE)) & + HECI1_FWSTS1_INIT_COMPLETE; +} + +static int gsc_fw_wait(struct xe_gt *gt) +{ + /* + * GSC load can take up to 250ms from the moment the instruction is + * executed by the GSCCS. To account for possible submission delays or + * other issues, we use a 500ms timeout in the wait here. + */ + return xe_mmio_wait32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE), + HECI1_FWSTS1_INIT_COMPLETE, + HECI1_FWSTS1_INIT_COMPLETE, + 500 * USEC_PER_MSEC, NULL, false); +} + +static int gsc_upload(struct xe_gsc *gsc) +{ + struct xe_gt *gt = gsc_to_gt(gsc); + struct xe_device *xe = gt_to_xe(gt); + int err; + + /* we should only be here if the init step were successful */ + xe_assert(xe, xe_uc_fw_is_loadable(&gsc->fw) && gsc->q); + + if (gsc_fw_is_loaded(gt)) { + xe_gt_err(gt, "GSC already loaded at upload time\n"); + return -EEXIST; + } + + err = memcpy_fw(gsc); + if (err) { + xe_gt_err(gt, "Failed to memcpy GSC FW\n"); + return err; + } + + err = emit_gsc_upload(gsc); + if (err) { + xe_gt_err(gt, "Failed to emit GSC FW upload (%pe)\n", ERR_PTR(err)); + return err; + } + + err = gsc_fw_wait(gt); + if (err) { + xe_gt_err(gt, "Failed to wait for GSC load (%pe)\n", ERR_PTR(err)); + return err; + } + + xe_gt_dbg(gt, "GSC FW async load completed\n"); + + return 0; +} + +static void gsc_work(struct work_struct *work) +{ + struct xe_gsc *gsc = container_of(work, typeof(*gsc), work); + struct xe_gt *gt = gsc_to_gt(gsc); + struct xe_device *xe = gt_to_xe(gt); + int ret; + + xe_device_mem_access_get(xe); + xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC); + + ret = gsc_upload(gsc); + if (ret && ret != -EEXIST) + xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOAD_FAIL); + else + xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED); + + xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC); + xe_device_mem_access_put(xe); +} + int xe_gsc_init(struct xe_gsc *gsc) { struct xe_gt *gt = gsc_to_gt(gsc); @@ -23,6 +172,7 @@ int xe_gsc_init(struct xe_gsc *gsc) int ret; gsc->fw.type = XE_UC_FW_TYPE_GSC; + INIT_WORK(&gsc->work, gsc_work); /* The GSC uC is only available on the media GT */ if (tile->media_gt && (gt != tile->media_gt)) { @@ -50,3 +200,103 @@ out: return ret; } +static void free_resources(struct drm_device *drm, void *arg) +{ + struct xe_gsc *gsc = arg; + + if (gsc->wq) { + destroy_workqueue(gsc->wq); + gsc->wq = NULL; + } + + if (gsc->q) { + xe_exec_queue_put(gsc->q); + gsc->q = NULL; + } + + if (gsc->private) { + xe_bo_unpin_map_no_vm(gsc->private); + gsc->private = NULL; + } +} + +int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc) +{ + struct xe_gt *gt = gsc_to_gt(gsc); + struct xe_tile *tile = gt_to_tile(gt); + struct xe_device *xe = gt_to_xe(gt); + struct xe_hw_engine *hwe = xe_gt_hw_engine(gt, XE_ENGINE_CLASS_OTHER, 0, true); + struct xe_exec_queue *q; + struct workqueue_struct *wq; + struct xe_bo *bo; + int err; + + if (!xe_uc_fw_is_available(&gsc->fw)) + return 0; + + if (!hwe) + return -ENODEV; + + bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4M, + ttm_bo_type_kernel, + XE_BO_CREATE_STOLEN_BIT | + XE_BO_CREATE_GGTT_BIT); + if (IS_ERR(bo)) + return PTR_ERR(bo); + + q = xe_exec_queue_create(xe, NULL, + BIT(hwe->logical_instance), 1, hwe, + EXEC_QUEUE_FLAG_KERNEL | + EXEC_QUEUE_FLAG_PERMANENT); + if (IS_ERR(q)) { + xe_gt_err(gt, "Failed to create queue for GSC submission\n"); + err = PTR_ERR(q); + goto out_bo; + } + + wq = alloc_ordered_workqueue("gsc-ordered-wq", 0); + if (!wq) { + err = -ENOMEM; + goto out_q; + } + + gsc->private = bo; + gsc->q = q; + gsc->wq = wq; + + err = drmm_add_action_or_reset(&xe->drm, free_resources, gsc); + if (err) + return err; + + xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOADABLE); + + return 0; + +out_q: + xe_exec_queue_put(q); +out_bo: + xe_bo_unpin_map_no_vm(bo); + return err; +} + +void xe_gsc_load_start(struct xe_gsc *gsc) +{ + struct xe_gt *gt = gsc_to_gt(gsc); + + if (!xe_uc_fw_is_loadable(&gsc->fw) || !gsc->q) + return; + + /* GSC FW survives GT reset and D3Hot */ + if (gsc_fw_is_loaded(gt)) { + xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED); + return; + } + + queue_work(gsc->wq, &gsc->work); +} + +void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc) +{ + if (xe_uc_fw_is_loadable(&gsc->fw) && gsc->wq) + flush_work(&gsc->work); +} diff --git a/drivers/gpu/drm/xe/xe_gsc.h b/drivers/gpu/drm/xe/xe_gsc.h index baa7f21f4204..f870eddc77d4 100644 --- a/drivers/gpu/drm/xe/xe_gsc.h +++ b/drivers/gpu/drm/xe/xe_gsc.h @@ -9,5 +9,8 @@ #include "xe_gsc_types.h" int xe_gsc_init(struct xe_gsc *gsc); +int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc); +void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc); +void xe_gsc_load_start(struct xe_gsc *gsc); #endif diff --git a/drivers/gpu/drm/xe/xe_gsc_types.h b/drivers/gpu/drm/xe/xe_gsc_types.h index 1bc50583fe58..57fefd66a7ea 100644 --- a/drivers/gpu/drm/xe/xe_gsc_types.h +++ b/drivers/gpu/drm/xe/xe_gsc_types.h @@ -6,8 +6,13 @@ #ifndef _XE_GSC_TYPES_H_ #define _XE_GSC_TYPES_H_ +#include + #include "xe_uc_fw_types.h" +struct xe_bo; +struct xe_exec_queue; + /** * struct xe_gsc - GSC */ @@ -17,6 +22,18 @@ struct xe_gsc { /** @security_version: SVN found in the fetched blob */ u32 security_version; + + /** @private: Private data for use by the GSC FW */ + struct xe_bo *private; + + /** @q: Default queue used for submissions to GSC FW */ + struct xe_exec_queue *q; + + /** @wq: workqueue to handle jobs for delayed load and proxy handling */ + struct workqueue_struct *wq; + + /** @work: delayed load and proxy handling work */ + struct work_struct work; }; #endif diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index b67154c78dff..15dcd1f91e9c 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -74,11 +74,17 @@ err: */ int xe_uc_init_post_hwconfig(struct xe_uc *uc) { + int err; + /* GuC submission not enabled, nothing to do */ if (!xe_device_uc_enabled(uc_to_xe(uc))) return 0; - return xe_guc_init_post_hwconfig(&uc->guc); + err = xe_guc_init_post_hwconfig(&uc->guc); + if (err) + return err; + + return xe_gsc_init_post_hwconfig(&uc->gsc); } static int uc_reset(struct xe_uc *uc) @@ -173,6 +179,9 @@ int xe_uc_init_hw(struct xe_uc *uc) ret = xe_huc_auth(&uc->huc); xe_gt_assert(uc_to_gt(uc), !ret); + /* GSC load is async */ + xe_gsc_load_start(&uc->gsc); + return 0; } @@ -197,6 +206,7 @@ void xe_uc_gucrc_disable(struct xe_uc *uc) void xe_uc_stop_prepare(struct xe_uc *uc) { + xe_gsc_wait_for_worker_completion(&uc->gsc); xe_guc_stop_prepare(&uc->guc); } -- cgit v1.2.3 From aae84bf1cd96889a7d80b6b50131f60aa63899d7 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Fri, 17 Nov 2023 14:51:48 -0800 Subject: drm/xe/gsc: Implement WA 14015076503 When the GSC FW is loaded, we need to inform it when a GSCCS reset is coming and then wait 200ms for it to get ready to process the reset. v2: move WA code to GSC file, use variable in Makefile (John) Signed-off-by: Daniele Ceraolo Spurio Cc: John Harrison Reviewed-by: John Harrison Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Makefile | 11 ++++++++++- drivers/gpu/drm/xe/regs/xe_gsc_regs.h | 10 ++++++++++ drivers/gpu/drm/xe/xe_gsc.c | 29 +++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_gsc.h | 4 ++++ drivers/gpu/drm/xe/xe_gt.c | 5 +++++ drivers/gpu/drm/xe/xe_wa_oob.rules | 1 + 6 files changed, 59 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 184e2724ce7b..5806ebb0256a 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -37,7 +37,16 @@ quiet_cmd_wa_oob = GEN $(notdir $(generated_oob)) $(generated_oob) &: $(obj)/xe_gen_wa_oob $(srctree)/$(src)/xe_wa_oob.rules $(call cmd,wa_oob) -$(obj)/xe_guc.o $(obj)/xe_migrate.o $(obj)/xe_ring_ops.o $(obj)/xe_vm.o $(obj)/xe_wa.o $(obj)/xe_ttm_stolen_mgr.o: $(generated_oob) +uses_generated_oob := \ + $(obj)/xe_gsc.o \ + $(obj)/xe_guc.o \ + $(obj)/xe_migrate.o \ + $(obj)/xe_ring_ops.o \ + $(obj)/xe_vm.o \ + $(obj)/xe_wa.o \ + $(obj)/xe_ttm_stolen_mgr.o + +$(uses_generated_oob): $(generated_oob) # Please keep these build lists sorted! diff --git a/drivers/gpu/drm/xe/regs/xe_gsc_regs.h b/drivers/gpu/drm/xe/regs/xe_gsc_regs.h index 22d2ad9cb64d..9a84b55d66ee 100644 --- a/drivers/gpu/drm/xe/regs/xe_gsc_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gsc_regs.h @@ -16,6 +16,13 @@ #define MTL_GSC_HECI1_BASE 0x00116000 #define MTL_GSC_HECI2_BASE 0x00117000 +#define HECI_H_CSR(base) XE_REG((base) + 0x4) +#define HECI_H_CSR_IE REG_BIT(0) +#define HECI_H_CSR_IS REG_BIT(1) +#define HECI_H_CSR_IG REG_BIT(2) +#define HECI_H_CSR_RDY REG_BIT(3) +#define HECI_H_CSR_RST REG_BIT(4) + /* * The FWSTS register values are FW defined and can be different between * HECI1 and HECI2 @@ -26,4 +33,7 @@ #define HECI1_FWSTS1_PROXY_STATE_NORMAL 5 #define HECI1_FWSTS1_INIT_COMPLETE REG_BIT(9) +#define HECI_H_GS1(base) XE_REG((base) + 0xc4c) +#define HECI_H_GS1_ER_PREP REG_BIT(0) + #endif diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index e014b829bc8a..5731c026a77a 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -7,6 +7,7 @@ #include +#include "generated/xe_wa_oob.h" #include "xe_bb.h" #include "xe_bo.h" #include "xe_device.h" @@ -17,6 +18,7 @@ #include "xe_mmio.h" #include "xe_sched_job.h" #include "xe_uc_fw.h" +#include "xe_wa.h" #include "instructions/xe_gsc_commands.h" #include "regs/xe_gsc_regs.h" @@ -300,3 +302,30 @@ void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc) if (xe_uc_fw_is_loadable(&gsc->fw) && gsc->wq) flush_work(&gsc->work); } + +/* + * wa_14015076503: if the GSC FW is loaded, we need to alert it before doing a + * GSC engine reset by writing a notification bit in the GS1 register and then + * triggering an interrupt to GSC; from the interrupt it will take up to 200ms + * for the FW to get prepare for the reset, so we need to wait for that amount + * of time. + * After the reset is complete we need to then clear the GS1 register. + */ +void xe_gsc_wa_14015076503(struct xe_gt *gt, bool prep) +{ + u32 gs1_set = prep ? HECI_H_GS1_ER_PREP : 0; + u32 gs1_clr = prep ? 0 : HECI_H_GS1_ER_PREP; + + /* WA only applies if the GSC is loaded */ + if (!XE_WA(gt, 14015076503) || !gsc_fw_is_loaded(gt)) + return; + + xe_mmio_rmw32(gt, HECI_H_GS1(MTL_GSC_HECI2_BASE), gs1_clr, gs1_set); + + if (prep) { + /* make sure the reset bit is clear when writing the CSR reg */ + xe_mmio_rmw32(gt, HECI_H_CSR(MTL_GSC_HECI2_BASE), + HECI_H_CSR_RST, HECI_H_CSR_IG); + msleep(200); + } +} diff --git a/drivers/gpu/drm/xe/xe_gsc.h b/drivers/gpu/drm/xe/xe_gsc.h index f870eddc77d4..bc1ef7f31ea2 100644 --- a/drivers/gpu/drm/xe/xe_gsc.h +++ b/drivers/gpu/drm/xe/xe_gsc.h @@ -8,9 +8,13 @@ #include "xe_gsc_types.h" +struct xe_gt; + int xe_gsc_init(struct xe_gsc *gsc); int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc); void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc); void xe_gsc_load_start(struct xe_gsc *gsc); +void xe_gsc_wa_14015076503(struct xe_gt *gt, bool prep); + #endif diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 0dddb751c6a4..00193b02a7e5 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -21,6 +21,7 @@ #include "xe_execlist.h" #include "xe_force_wake.h" #include "xe_ggtt.h" +#include "xe_gsc.h" #include "xe_gt_clock.h" #include "xe_gt_idle_sysfs.h" #include "xe_gt_mcr.h" @@ -512,12 +513,16 @@ static int do_gt_reset(struct xe_gt *gt) { int err; + xe_gsc_wa_14015076503(gt, true); + xe_mmio_write32(gt, GDRST, GRDOM_FULL); err = xe_mmio_wait32(gt, GDRST, GRDOM_FULL, 0, 5000, NULL, false); if (err) xe_gt_err(gt, "failed to clear GRDOM_FULL (%pe)\n", ERR_PTR(err)); + xe_gsc_wa_14015076503(gt, false); + return err; } diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 752842d734be..c7b7d40b5d57 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -20,3 +20,4 @@ 16017236439 PLATFORM(PVC) 22010954014 PLATFORM(DG2) 14019821291 MEDIA_VERSION_RANGE(1300, 2000) +14015076503 MEDIA_VERSION(1300) -- cgit v1.2.3 From f63182b45d67e1ff1e9c65f08adb4d803a5d861f Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Fri, 17 Nov 2023 14:51:49 -0800 Subject: drm/xe/gsc: Trigger a driver flr to cleanup the GSC on unload GSC is only killed by an FLR, so we need to trigger one on unload to make sure we stop it. This is because we assign a chunk of memory to the GSC as part of the FW load, so we need to make sure it stops using it when we release it to the system on driver unload. Note that this is not a problem of the unload per-se, because the GSC will not touch that memory unless there are requests for it coming from the driver; therefore, no accesses will happen while Xe is not loaded, but if we re-load the driver then the GSC might wake up and try to access that old memory location again. Signed-off-by: Daniele Ceraolo Spurio Cc: Alan Previn Reviewed-by: John Harrison Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gsc.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index 5731c026a77a..907585846a89 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -130,6 +130,23 @@ static int gsc_upload(struct xe_gsc *gsc) return err; } + /* + * GSC is only killed by an FLR, so we need to trigger one on unload to + * make sure we stop it. This is because we assign a chunk of memory to + * the GSC as part of the FW load, so we need to make sure it stops + * using it when we release it to the system on driver unload. Note that + * this is not a problem of the unload per-se, because the GSC will not + * touch that memory unless there are requests for it coming from the + * driver; therefore, no accesses will happen while Xe is not loaded, + * but if we re-load the driver then the GSC might wake up and try to + * access that old memory location again. + * Given that an FLR is a very disruptive action (see the FLR function + * for details), we want to do it as the last action before releasing + * the access to the MMIO bar, which means we need to do it as part of + * mmio cleanup. + */ + xe->needs_flr_on_fini = true; + err = emit_gsc_upload(gsc); if (err) { xe_gt_err(gt, "Failed to emit GSC FW upload (%pe)\n", ERR_PTR(err)); -- cgit v1.2.3 From 0881cbe04077785f98496c236386099d20854ad7 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Fri, 17 Nov 2023 14:51:50 -0800 Subject: drm/xe/gsc: Query GSC compatibility version The version is obtained via a dedicated MKHI GSC HECI command. The compatibility version is what we want to match against for the GSC, so we need to call the FW version checker after obtaining the version. Since this is the first time we send a GSC HECI command via the GSCCS, this patch also introduces common infrastructure to send such commands to the GSC. Communication with the GSC FW is done via input/output buffers, whose addresses are provided via a GSCCS command. The buffers contain a generic header and a client-specific packet (e.g. PXP, HDCP); the clients don't care about the header format and/or the GSCCS command in the batch, they only care about their client-specific header. This patch therefore introduces helpers that allow the callers to automatically fill in the input header, submit the GSCCS job and decode the output header, to make it so that the caller only needs to worry about their client-specific input and output messages. v3: squash of 2 separate patches ahead of merge, so that the common functions and their first user are added at the same time Signed-off-by: Daniele Ceraolo Spurio Cc: Alan Previn Cc: Suraj Kandpal Cc: John Harrison Reviewed-by: John Harrison #v1 Reviewed-by: Suraj Kandpal Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/abi/gsc_command_header_abi.h | 46 ++++++ drivers/gpu/drm/xe/abi/gsc_mkhi_commands_abi.h | 39 +++++ drivers/gpu/drm/xe/instructions/xe_gsc_commands.h | 2 + drivers/gpu/drm/xe/xe_gsc.c | 82 ++++++++++ drivers/gpu/drm/xe/xe_gsc_submit.c | 184 ++++++++++++++++++++++ drivers/gpu/drm/xe/xe_gsc_submit.h | 30 ++++ drivers/gpu/drm/xe/xe_uc_fw.c | 18 ++- drivers/gpu/drm/xe/xe_uc_fw.h | 1 + 9 files changed, 397 insertions(+), 6 deletions(-) create mode 100644 drivers/gpu/drm/xe/abi/gsc_command_header_abi.h create mode 100644 drivers/gpu/drm/xe/abi/gsc_mkhi_commands_abi.h create mode 100644 drivers/gpu/drm/xe/xe_gsc_submit.c create mode 100644 drivers/gpu/drm/xe/xe_gsc_submit.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 5806ebb0256a..161e8ead9114 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -68,6 +68,7 @@ xe-y += xe_bb.o \ xe_ggtt.o \ xe_gpu_scheduler.o \ xe_gsc.o \ + xe_gsc_submit.o \ xe_gt.o \ xe_gt_clock.o \ xe_gt_debugfs.o \ diff --git a/drivers/gpu/drm/xe/abi/gsc_command_header_abi.h b/drivers/gpu/drm/xe/abi/gsc_command_header_abi.h new file mode 100644 index 000000000000..a4c2646803b5 --- /dev/null +++ b/drivers/gpu/drm/xe/abi/gsc_command_header_abi.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _ABI_GSC_COMMAND_HEADER_ABI_H +#define _ABI_GSC_COMMAND_HEADER_ABI_H + +#include + +struct intel_gsc_mtl_header { + u32 validity_marker; +#define GSC_HECI_VALIDITY_MARKER 0xA578875A + + u8 heci_client_id; + + u8 reserved1; + + u16 header_version; +#define MTL_GSC_HEADER_VERSION 1 + + /* FW allows host to decide host_session handle as it sees fit. */ + u64 host_session_handle; + + /* handle generated by FW for messages that need to be re-submitted */ + u64 gsc_message_handle; + + u32 message_size; /* lower 20 bits only, upper 12 are reserved */ + + /* + * Flags mask: + * Bit 0: Pending + * Bit 1: Session Cleanup; + * Bits 2-15: Flags + * Bits 16-31: Extension Size + * According to internal spec flags are either input or output + * we distinguish the flags using OUTFLAG or INFLAG + */ + u32 flags; +#define GSC_OUTFLAG_MSG_PENDING BIT(0) +#define GSC_INFLAG_MSG_CLEANUP BIT(1) + + u32 status; +} __packed; + +#endif diff --git a/drivers/gpu/drm/xe/abi/gsc_mkhi_commands_abi.h b/drivers/gpu/drm/xe/abi/gsc_mkhi_commands_abi.h new file mode 100644 index 000000000000..ad4d041873ab --- /dev/null +++ b/drivers/gpu/drm/xe/abi/gsc_mkhi_commands_abi.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _ABI_GSC_MKHI_COMMANDS_ABI_H +#define _ABI_GSC_MKHI_COMMANDS_ABI_H + +#include + +/* Heci client ID for MKHI commands */ +#define HECI_MEADDRESS_MKHI 7 + +/* Generic MKHI header */ +struct gsc_mkhi_header { + u8 group_id; + u8 command; + u8 reserved; + u8 result; +} __packed; + +/* GFX_SRV commands */ +#define MKHI_GROUP_ID_GFX_SRV 0x30 + +#define MKHI_GFX_SRV_GET_HOST_COMPATIBILITY_VERSION (0x42) + +struct gsc_get_compatibility_version_in { + struct gsc_mkhi_header header; +} __packed; + +struct gsc_get_compatibility_version_out { + struct gsc_mkhi_header header; + u16 proj_major; + u16 compat_major; + u16 compat_minor; + u16 reserved[5]; +} __packed; + +#endif diff --git a/drivers/gpu/drm/xe/instructions/xe_gsc_commands.h b/drivers/gpu/drm/xe/instructions/xe_gsc_commands.h index c7a833d7f965..f8949cad9d0f 100644 --- a/drivers/gpu/drm/xe/instructions/xe_gsc_commands.h +++ b/drivers/gpu/drm/xe/instructions/xe_gsc_commands.h @@ -28,6 +28,8 @@ REG_FIELD_PREP(GSC_OPCODE, op) | \ REG_FIELD_PREP(GSC_CMD_DATA_AND_LEN, dl)) +#define GSC_HECI_CMD_PKT __GSC_INSTR(0, 6) + #define GSC_FW_LOAD __GSC_INSTR(1, 2) #define GSC_FW_LOAD_LIMIT_VALID REG_BIT(31) diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index 907585846a89..d8ec04e3c006 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -7,11 +7,13 @@ #include +#include "abi/gsc_mkhi_commands_abi.h" #include "generated/xe_wa_oob.h" #include "xe_bb.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_exec_queue.h" +#include "xe_gsc_submit.h" #include "xe_gt.h" #include "xe_gt_printk.h" #include "xe_map.h" @@ -91,6 +93,78 @@ static int emit_gsc_upload(struct xe_gsc *gsc) return 0; } +#define version_query_wr(xe_, map_, offset_, field_, val_) \ + xe_map_wr_field(xe_, map_, offset_, struct gsc_get_compatibility_version_in, field_, val_) +#define version_query_rd(xe_, map_, offset_, field_) \ + xe_map_rd_field(xe_, map_, offset_, struct gsc_get_compatibility_version_out, field_) + +static u32 emit_version_query_msg(struct xe_device *xe, struct iosys_map *map, u32 wr_offset) +{ + xe_map_memset(xe, map, wr_offset, 0, sizeof(struct gsc_get_compatibility_version_in)); + + version_query_wr(xe, map, wr_offset, header.group_id, MKHI_GROUP_ID_GFX_SRV); + version_query_wr(xe, map, wr_offset, header.command, + MKHI_GFX_SRV_GET_HOST_COMPATIBILITY_VERSION); + + return wr_offset + sizeof(struct gsc_get_compatibility_version_in); +} + +#define GSC_VER_PKT_SZ SZ_4K /* 4K each for input and output */ +static int query_compatibility_version(struct xe_gsc *gsc) +{ + struct xe_uc_fw_version *compat = &gsc->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY]; + struct xe_gt *gt = gsc_to_gt(gsc); + struct xe_tile *tile = gt_to_tile(gt); + struct xe_device *xe = gt_to_xe(gt); + struct xe_bo *bo; + u32 wr_offset; + u32 rd_offset; + u64 ggtt_offset; + int err; + + bo = xe_bo_create_pin_map(xe, tile, NULL, GSC_VER_PKT_SZ * 2, + ttm_bo_type_kernel, + XE_BO_CREATE_SYSTEM_BIT | + XE_BO_CREATE_GGTT_BIT); + if (IS_ERR(bo)) { + xe_gt_err(gt, "failed to allocate bo for GSC version query\n"); + return PTR_ERR(bo); + } + + ggtt_offset = xe_bo_ggtt_addr(bo); + + wr_offset = xe_gsc_emit_header(xe, &bo->vmap, 0, HECI_MEADDRESS_MKHI, 0, + sizeof(struct gsc_get_compatibility_version_in)); + wr_offset = emit_version_query_msg(xe, &bo->vmap, wr_offset); + + err = xe_gsc_pkt_submit_kernel(gsc, ggtt_offset, wr_offset, + ggtt_offset + GSC_VER_PKT_SZ, + GSC_VER_PKT_SZ); + if (err) { + xe_gt_err(gt, + "failed to submit GSC request for compatibility version: %d\n", + err); + goto out_bo; + } + + err = xe_gsc_read_out_header(xe, &bo->vmap, GSC_VER_PKT_SZ, + sizeof(struct gsc_get_compatibility_version_out), + &rd_offset); + if (err) { + xe_gt_err(gt, "HuC: invalid GSC reply for version query (err=%d)\n", err); + return err; + } + + compat->major = version_query_rd(xe, &bo->vmap, rd_offset, compat_major); + compat->minor = version_query_rd(xe, &bo->vmap, rd_offset, compat_minor); + + xe_gt_info(gt, "found GSC cv%u.%u\n", compat->major, compat->minor); + +out_bo: + xe_bo_unpin_map_no_vm(bo); + return err; +} + static int gsc_fw_is_loaded(struct xe_gt *gt) { return xe_mmio_read32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE)) & @@ -159,6 +233,14 @@ static int gsc_upload(struct xe_gsc *gsc) return err; } + err = query_compatibility_version(gsc); + if (err) + return err; + + err = xe_uc_fw_check_version_requirements(&gsc->fw); + if (err) + return err; + xe_gt_dbg(gt, "GSC FW async load completed\n"); return 0; diff --git a/drivers/gpu/drm/xe/xe_gsc_submit.c b/drivers/gpu/drm/xe/xe_gsc_submit.c new file mode 100644 index 000000000000..8c5381e5913f --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gsc_submit.c @@ -0,0 +1,184 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include "xe_gsc_submit.h" + +#include "abi/gsc_command_header_abi.h" +#include "xe_bb.h" +#include "xe_exec_queue.h" +#include "xe_gt_printk.h" +#include "xe_gt_types.h" +#include "xe_map.h" +#include "xe_sched_job.h" +#include "instructions/xe_gsc_commands.h" +#include "regs/xe_gsc_regs.h" + +#define GSC_HDR_SIZE (sizeof(struct intel_gsc_mtl_header)) /* shorthand define */ + +#define mtl_gsc_header_wr(xe_, map_, offset_, field_, val_) \ + xe_map_wr_field(xe_, map_, offset_, struct intel_gsc_mtl_header, field_, val_) + +#define mtl_gsc_header_rd(xe_, map_, offset_, field_) \ + xe_map_rd_field(xe_, map_, offset_, struct intel_gsc_mtl_header, field_) + +/* + * GSC FW allows us to define the host_session_handle as we see fit, as long + * as we use unique identifier for each user, with handle 0 being reserved for + * kernel usage. + * To be able to differentiate which client subsystem owns the given session, we + * include the client id in the top 8 bits of the handle. + */ +#define HOST_SESSION_CLIENT_MASK GENMASK_ULL(63, 56) + +static struct xe_gt * +gsc_to_gt(struct xe_gsc *gsc) +{ + return container_of(gsc, struct xe_gt, uc.gsc); +} + +/** + * xe_gsc_emit_header - write the MTL GSC header in memory + * @xe: the Xe device + * @map: the iosys map to write to + * @offset: offset from the start of the map at which to write the header + * @heci_client_id: client id identifying the type of command (see abi for values) + * @host_session_id: host session ID of the caller + * @payload_size: size of the payload that follows the header + * + * Returns: offset memory location following the header + */ +u32 xe_gsc_emit_header(struct xe_device *xe, struct iosys_map *map, u32 offset, + u8 heci_client_id, u64 host_session_id, u32 payload_size) +{ + xe_assert(xe, !(host_session_id & HOST_SESSION_CLIENT_MASK)); + + if (host_session_id) + host_session_id |= FIELD_PREP(HOST_SESSION_CLIENT_MASK, heci_client_id); + + xe_map_memset(xe, map, offset, 0, GSC_HDR_SIZE); + + mtl_gsc_header_wr(xe, map, offset, validity_marker, GSC_HECI_VALIDITY_MARKER); + mtl_gsc_header_wr(xe, map, offset, heci_client_id, heci_client_id); + mtl_gsc_header_wr(xe, map, offset, host_session_handle, host_session_id); + mtl_gsc_header_wr(xe, map, offset, header_version, MTL_GSC_HEADER_VERSION); + mtl_gsc_header_wr(xe, map, offset, message_size, payload_size + GSC_HDR_SIZE); + + return offset + GSC_HDR_SIZE; +}; + +/** + * xe_gsc_check_and_update_pending - check the pending bit and update the input + * header with the retry handle from the output header + * @xe: the Xe device + * @in: the iosys map containing the input buffer + * @offset_in: offset within the iosys at which the input buffer is located + * @out: the iosys map containing the output buffer + * @offset_out: offset within the iosys at which the output buffer is located + * + * Returns: true if the pending bit was set, false otherwise + */ +bool xe_gsc_check_and_update_pending(struct xe_device *xe, + struct iosys_map *in, u32 offset_in, + struct iosys_map *out, u32 offset_out) +{ + if (mtl_gsc_header_rd(xe, out, offset_out, flags) & GSC_OUTFLAG_MSG_PENDING) { + u64 handle = mtl_gsc_header_rd(xe, out, offset_out, gsc_message_handle); + + mtl_gsc_header_wr(xe, in, offset_in, gsc_message_handle, handle); + + return true; + } + + return false; +} + +/** + * xe_gsc_read_out_header - reads and validates the output header and returns + * the offset of the reply following the header + * @xe: the Xe device + * @map: the iosys map containing the output buffer + * @offset: offset within the iosys at which the output buffer is located + * @min_payload_size: minimum size of the message excluding the gsc header + * @payload_offset: optional pointer to be set to the payload offset + * + * Returns: -errno value on failure, 0 otherwise + */ +int xe_gsc_read_out_header(struct xe_device *xe, + struct iosys_map *map, u32 offset, + u32 min_payload_size, + u32 *payload_offset) +{ + u32 marker = mtl_gsc_header_rd(xe, map, offset, validity_marker); + u32 size = mtl_gsc_header_rd(xe, map, offset, message_size); + u32 payload_size = size - GSC_HDR_SIZE; + + if (marker != GSC_HECI_VALIDITY_MARKER) + return -EPROTO; + + if (size < GSC_HDR_SIZE || payload_size < min_payload_size) + return -ENODATA; + + if (payload_offset) + *payload_offset = offset + GSC_HDR_SIZE; + + return 0; +} + +/** + * xe_gsc_pkt_submit_kernel - submit a kernel heci pkt to the GSC + * @gsc: the GSC uC + * @addr_in: GGTT address of the message to send to the GSC + * @size_in: size of the message to send to the GSC + * @addr_out: GGTT address for the GSC to write the reply to + * @size_out: size of the memory reserved for the reply + */ +int xe_gsc_pkt_submit_kernel(struct xe_gsc *gsc, u64 addr_in, u32 size_in, + u64 addr_out, u32 size_out) +{ + struct xe_gt *gt = gsc_to_gt(gsc); + struct xe_bb *bb; + struct xe_sched_job *job; + struct dma_fence *fence; + long timeout; + + if (size_in < GSC_HDR_SIZE) + return -ENODATA; + + if (size_out < GSC_HDR_SIZE) + return -ENOMEM; + + bb = xe_bb_new(gt, 8, false); + if (IS_ERR(bb)) + return PTR_ERR(bb); + + bb->cs[bb->len++] = GSC_HECI_CMD_PKT; + bb->cs[bb->len++] = lower_32_bits(addr_in); + bb->cs[bb->len++] = upper_32_bits(addr_in); + bb->cs[bb->len++] = size_in; + bb->cs[bb->len++] = lower_32_bits(addr_out); + bb->cs[bb->len++] = upper_32_bits(addr_out); + bb->cs[bb->len++] = size_out; + bb->cs[bb->len++] = 0; + + job = xe_bb_create_job(gsc->q, bb); + if (IS_ERR(job)) { + xe_bb_free(bb, NULL); + return PTR_ERR(job); + } + + xe_sched_job_arm(job); + fence = dma_fence_get(&job->drm.s_fence->finished); + xe_sched_job_push(job); + + timeout = dma_fence_wait_timeout(fence, false, HZ); + dma_fence_put(fence); + xe_bb_free(bb, NULL); + if (timeout < 0) + return timeout; + else if (!timeout) + return -ETIME; + + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_gsc_submit.h b/drivers/gpu/drm/xe/xe_gsc_submit.h new file mode 100644 index 000000000000..0801da5d446a --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gsc_submit.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GSC_SUBMIT_H_ +#define _XE_GSC_SUBMIT_H_ + +#include + +struct iosys_map; +struct xe_device; +struct xe_gsc; + +u32 xe_gsc_emit_header(struct xe_device *xe, struct iosys_map *map, u32 offset, + u8 heci_client_id, u64 host_session_id, u32 payload_size); + +bool xe_gsc_check_and_update_pending(struct xe_device *xe, + struct iosys_map *in, u32 offset_in, + struct iosys_map *out, u32 offset_out); + +int xe_gsc_read_out_header(struct xe_device *xe, + struct iosys_map *map, u32 offset, + u32 min_payload_size, + u32 *payload_offset); + +int xe_gsc_pkt_submit_kernel(struct xe_gsc *gsc, u64 addr_in, u32 size_in, + u64 addr_out, u32 size_out); + +#endif diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 2d0fb7058d66..74f00fdcd968 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -224,8 +224,11 @@ uc_fw_auto_select(struct xe_device *xe, struct xe_uc_fw *uc_fw) uc_fw->versions.wanted.minor = entries[i].minor; uc_fw->full_ver_required = entries[i].full_ver_required; - /* compatibility version checking coming soon */ - uc_fw->versions.wanted_type = XE_UC_FW_VER_RELEASE; + if (uc_fw->type == XE_UC_FW_TYPE_GSC) + uc_fw->versions.wanted_type = XE_UC_FW_VER_COMPATIBILITY; + else + uc_fw->versions.wanted_type = XE_UC_FW_VER_RELEASE; + break; } } @@ -321,7 +324,7 @@ static void guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_header *css) uc_fw->private_data_size = css->private_data_size; } -static int uc_fw_check_version_requirements(struct xe_uc_fw *uc_fw) +int xe_uc_fw_check_version_requirements(struct xe_uc_fw *uc_fw) { struct xe_device *xe = uc_fw_to_xe(uc_fw); struct xe_uc_fw_version *wanted = &uc_fw->versions.wanted; @@ -678,9 +681,12 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw) "Using %s firmware from %s", xe_uc_fw_type_repr(uc_fw->type), uc_fw->path); - err = uc_fw_check_version_requirements(uc_fw); - if (err) - goto fail; + /* for GSC FW we want the compatibility version, which we query after load */ + if (uc_fw->type != XE_UC_FW_TYPE_GSC) { + err = xe_uc_fw_check_version_requirements(uc_fw); + if (err) + goto fail; + } obj = xe_bo_create_from_data(xe, tile, fw->data, fw->size, ttm_bo_type_kernel, diff --git a/drivers/gpu/drm/xe/xe_uc_fw.h b/drivers/gpu/drm/xe/xe_uc_fw.h index 7feafe1695f9..85c20795d1f8 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.h +++ b/drivers/gpu/drm/xe/xe_uc_fw.h @@ -17,6 +17,7 @@ struct drm_printer; int xe_uc_fw_init(struct xe_uc_fw *uc_fw); size_t xe_uc_fw_copy_rsa(struct xe_uc_fw *uc_fw, void *dst, u32 max_len); int xe_uc_fw_upload(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags); +int xe_uc_fw_check_version_requirements(struct xe_uc_fw *uc_fw); void xe_uc_fw_print(struct xe_uc_fw *uc_fw, struct drm_printer *p); static inline u32 xe_uc_fw_rsa_offset(struct xe_uc_fw *uc_fw) -- cgit v1.2.3 From 9897eb855544f0ef0921a5cc4517deb1fcf06c6f Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Fri, 17 Nov 2023 14:51:51 -0800 Subject: drm/xe/gsc: Define GSCCS for MTL Add the GSCCS to the media_xelpmp engine list. Note that since the GSCCS is only used with the GSC FW, we can consider it disabled if we don't have the FW available. v2: mark GSCCS as allowed on the media IP in kunit tests Signed-off-by: Daniele Ceraolo Spurio Cc: Alan Previn Reviewed-by: John Harrison Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_pci_test.c | 9 +++------ drivers/gpu/drm/xe/xe_hw_engine.c | 20 ++++++++++++++++++++ drivers/gpu/drm/xe/xe_pci.c | 2 +- 3 files changed, 24 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.c b/drivers/gpu/drm/xe/tests/xe_pci_test.c index daf652517608..171e4180f1aa 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci_test.c +++ b/drivers/gpu/drm/xe/tests/xe_pci_test.c @@ -33,13 +33,10 @@ static void check_media_ip(const struct xe_media_desc *media) struct kunit *test = xe_cur_kunit(); u64 mask = media->hw_engine_mask; - /* - * VCS and VECS engines are allowed on the media IP - * - * TODO: Add GSCCS once support is added to the driver. - */ + /* VCS, VECS and GSCCS engines are allowed on the media IP */ mask &= ~(XE_HW_ENGINE_VCS_MASK | - XE_HW_ENGINE_VECS_MASK); + XE_HW_ENGINE_VECS_MASK | + XE_HW_ENGINE_GSCCS_MASK); /* Any remaining engines are an error */ KUNIT_ASSERT_EQ(test, mask, 0); diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index e831e63c5e48..c52c26c395a7 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -26,6 +26,7 @@ #include "xe_rtp.h" #include "xe_sched_job.h" #include "xe_tuning.h" +#include "xe_uc_fw.h" #include "xe_wa.h" #define MAX_MMIO_BASES 3 @@ -610,6 +611,24 @@ static void read_compute_fuses(struct xe_gt *gt) read_compute_fuses_from_dss(gt); } +static void check_gsc_availability(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + if (!(gt->info.engine_mask & BIT(XE_HW_ENGINE_GSCCS0))) + return; + + /* + * The GSCCS is only used to communicate with the GSC FW, so if we don't + * have the FW there is nothing we need the engine for and can therefore + * skip its initialization. + */ + if (!xe_uc_fw_is_available(>->uc.gsc.fw)) { + gt->info.engine_mask &= ~BIT(XE_HW_ENGINE_GSCCS0); + drm_info(&xe->drm, "gsccs disabled due to lack of FW\n"); + } +} + int xe_hw_engines_init_early(struct xe_gt *gt) { int i; @@ -617,6 +636,7 @@ int xe_hw_engines_init_early(struct xe_gt *gt) read_media_fuses(gt); read_copy_fuses(gt); read_compute_fuses(gt); + check_gsc_availability(gt); BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT < XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN); BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT > XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX); diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 6d062478c1f2..bbf2f5b64bac 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -199,7 +199,7 @@ static const struct xe_media_desc media_xelpmp = { .name = "Xe_LPM+", .hw_engine_mask = BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) | - BIT(XE_HW_ENGINE_VECS0), /* TODO: add GSC0 */ + BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_GSCCS0) }; static const struct xe_media_desc media_xe2 = { -- cgit v1.2.3 From 5152234e2e7a1d5b0897733f84597df23cde98b1 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Fri, 17 Nov 2023 14:51:52 -0800 Subject: drm/xe/gsc: Define GSC FW for MTL We track GSC FW based on its compatibility version, which is what determines the interface it supports. Also add a modparam override like the ones for GuC and HuC. v2: fix module param description (John) Signed-off-by: Daniele Ceraolo Spurio Cc: Alan Previn Cc: John Harrison Reviewed-by: John Harrison Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_module.c | 4 ++++ drivers/gpu/drm/xe/xe_module.h | 1 + drivers/gpu/drm/xe/xe_uc_fw.c | 20 ++++++++++++-------- 3 files changed, 17 insertions(+), 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index 1ea883f48c63..51bf69b7ab22 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -41,6 +41,10 @@ module_param_named_unsafe(huc_firmware_path, xe_modparam.huc_firmware_path, char MODULE_PARM_DESC(huc_firmware_path, "HuC firmware path to use instead of the default one - empty string disables"); +module_param_named_unsafe(gsc_firmware_path, xe_modparam.gsc_firmware_path, charp, 0400); +MODULE_PARM_DESC(gsc_firmware_path, + "GSC firmware path to use instead of the default one - empty string disables"); + module_param_named_unsafe(force_probe, xe_modparam.force_probe, charp, 0400); MODULE_PARM_DESC(force_probe, "Force probe options for specified devices. See CONFIG_DRM_XE_FORCE_PROBE for details."); diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h index 51d75ff12376..88ef0e8b2bfd 100644 --- a/drivers/gpu/drm/xe/xe_module.h +++ b/drivers/gpu/drm/xe/xe_module.h @@ -16,6 +16,7 @@ struct xe_modparam { int guc_log_level; char *guc_firmware_path; char *huc_firmware_path; + char *gsc_firmware_path; char *force_probe; }; diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 74f00fdcd968..2fcec528a1d1 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -121,6 +121,10 @@ struct fw_blobs_by_type { fw_def(ROCKETLAKE, no_ver(i915, huc, tgl)) \ fw_def(TIGERLAKE, no_ver(i915, huc, tgl)) +/* for the GSC FW we match the compatibility version and not the release one */ +#define XE_GSC_FIRMWARE_DEFS(fw_def, major_ver) \ + fw_def(METEORLAKE, major_ver(i915, gsc, mtl, 1, 0)) + #define MAKE_FW_PATH(dir__, uc__, shortname__, version__) \ __stringify(dir__) "/" __stringify(shortname__) "_" __stringify(uc__) version__ ".bin" @@ -155,6 +159,7 @@ XE_GUC_FIRMWARE_DEFS(XE_UC_MODULE_FIRMWARE, fw_filename_mmp_ver, fw_filename_major_ver) XE_HUC_FIRMWARE_DEFS(XE_UC_MODULE_FIRMWARE, fw_filename_mmp_ver, fw_filename_no_ver) +XE_GSC_FIRMWARE_DEFS(XE_UC_MODULE_FIRMWARE, fw_filename_major_ver) static struct xe_gt * __uc_fw_to_gt(struct xe_uc_fw *uc_fw, enum xe_uc_fw_type type) @@ -196,23 +201,19 @@ uc_fw_auto_select(struct xe_device *xe, struct xe_uc_fw *uc_fw) uc_fw_entry_mmp_ver, uc_fw_entry_no_ver) }; + static const struct uc_fw_entry entries_gsc[] = { + XE_GSC_FIRMWARE_DEFS(XE_UC_FW_ENTRY, uc_fw_entry_major_ver) + }; static const struct fw_blobs_by_type blobs_all[XE_UC_FW_NUM_TYPES] = { [XE_UC_FW_TYPE_GUC] = { entries_guc, ARRAY_SIZE(entries_guc) }, [XE_UC_FW_TYPE_HUC] = { entries_huc, ARRAY_SIZE(entries_huc) }, + [XE_UC_FW_TYPE_GSC] = { entries_gsc, ARRAY_SIZE(entries_gsc) }, }; static const struct uc_fw_entry *entries; enum xe_platform p = xe->info.platform; u32 count; int i; - /* - * GSC FW support is still not fully in place, so we're not defining - * the FW blob yet because we don't want the driver to attempt to load - * it until we're ready for it. - */ - if (uc_fw->type == XE_UC_FW_TYPE_GSC) - return; - xe_assert(xe, uc_fw->type < ARRAY_SIZE(blobs_all)); entries = blobs_all[uc_fw->type].entries; count = blobs_all[uc_fw->type].count; @@ -248,6 +249,9 @@ uc_fw_override(struct xe_uc_fw *uc_fw) case XE_UC_FW_TYPE_HUC: path_override = xe_modparam.huc_firmware_path; break; + case XE_UC_FW_TYPE_GSC: + path_override = xe_modparam.gsc_firmware_path; + break; default: break; } -- cgit v1.2.3 From fcf98d68c00216b61b034f4d164e5c3074db636a Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 27 Nov 2023 09:44:59 +0000 Subject: drm/xe: fix mem_access for early lrc generation We spawn some hw queues during device probe to generate the default LRC for every engine type, however the queue destruction step is typically async. Queue destruction needs to do stuff like GuC context deregister which requires GuC CT, which in turn requires an active mem_access ref. The caller during probe is meant to hold the mem_access token, however due to the async destruction it might have already been dropped if we are unlucky. Similar to how we already handle migrate VMs for which there is no mem_access ref, fix this by keeping the callers token alive, releasing it only when destroying the queue. We can treat a NULL vm as indication that we need to grab our own extra ref. Fixes the following splat sometimes seen during load: [ 1682.899930] WARNING: CPU: 1 PID: 8642 at drivers/gpu/drm/xe/xe_device.c:537 xe_device_assert_mem_access+0x27/0x30 [xe] [ 1682.900209] CPU: 1 PID: 8642 Comm: kworker/u24:97 Tainted: G U W E N 6.6.0-rc3+ #6 [ 1682.900214] Workqueue: submit_wq xe_sched_process_msg_work [xe] [ 1682.900303] RIP: 0010:xe_device_assert_mem_access+0x27/0x30 [xe] [ 1682.900388] Code: 90 90 90 66 0f 1f 00 0f 1f 44 00 00 53 48 89 fb e8 1e 6c 03 00 48 85 c0 74 06 5b c3 cc cc cc cc 8b 83 28 23 00 00 85 c0 75 f0 <0f> 0b 5b c3 cc cc cc cc 90 90 90 90 90 90 90 90 90 90 90 90 90 90 [ 1682.900390] RSP: 0018:ffffc900021cfb68 EFLAGS: 00010246 [ 1682.900394] RAX: 0000000000000000 RBX: ffff8886a96d8000 RCX: 0000000000000000 [ 1682.900396] RDX: 0000000000000001 RSI: ffff8886a6311a00 RDI: ffff8886a96d8000 [ 1682.900398] RBP: ffffc900021cfcc0 R08: 0000000000000001 R09: 0000000000000000 [ 1682.900400] R10: ffffc900021cfcd0 R11: 0000000000000002 R12: 0000000000000004 [ 1682.900402] R13: 0000000000000000 R14: ffff8886a6311990 R15: ffffc900021cfd74 [ 1682.900405] FS: 0000000000000000(0000) GS:ffff888829880000(0000) knlGS:0000000000000000 [ 1682.900407] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1682.900409] CR2: 000055f70bad3fb0 CR3: 000000025243a004 CR4: 00000000003706e0 [ 1682.900412] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 1682.900413] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 1682.900415] Call Trace: [ 1682.900418] [ 1682.900420] ? xe_device_assert_mem_access+0x27/0x30 [xe] [ 1682.900504] ? __warn+0x85/0x170 [ 1682.900510] ? xe_device_assert_mem_access+0x27/0x30 [xe] [ 1682.900596] ? report_bug+0x171/0x1a0 [ 1682.900604] ? handle_bug+0x3c/0x80 [ 1682.900608] ? exc_invalid_op+0x17/0x70 [ 1682.900612] ? asm_exc_invalid_op+0x1a/0x20 [ 1682.900621] ? xe_device_assert_mem_access+0x27/0x30 [xe] [ 1682.900706] ? xe_device_assert_mem_access+0x12/0x30 [xe] [ 1682.900790] guc_ct_send_locked+0xb9/0x1550 [xe] [ 1682.900882] ? lock_acquire+0xca/0x2b0 [ 1682.900885] ? guc_ct_send+0x3c/0x1a0 [xe] [ 1682.900977] ? lock_is_held_type+0x9b/0x110 [ 1682.900984] ? __mutex_lock+0xc0/0xb90 [ 1682.900989] ? __pfx___drm_printfn_info+0x10/0x10 [ 1682.900999] guc_ct_send+0x53/0x1a0 [xe] [ 1682.901090] ? __lock_acquire+0xf22/0x21b0 [ 1682.901097] ? process_one_work+0x1a0/0x500 [ 1682.901109] xe_guc_ct_send+0x19/0x50 [xe] [ 1682.901202] set_min_preemption_timeout+0x75/0xa0 [xe] [ 1682.901294] disable_scheduling_deregister+0x55/0x250 [xe] [ 1682.901383] ? xe_sched_process_msg_work+0x76/0xd0 [xe] [ 1682.901467] ? lock_release+0xc9/0x260 [ 1682.901474] xe_sched_process_msg_work+0x82/0xd0 [xe] [ 1682.901559] process_one_work+0x20a/0x500 v2: Add the splat Signed-off-by: Matthew Auld Cc: Vinay Belgaumkar Cc: Matthew Brost Cc: Rodrigo Vivi Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_exec_queue.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 98fc13c89a4d..2bab6fbd82f5 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -90,12 +90,12 @@ static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe, /* * Normally the user vm holds an rpm ref to keep the device * awake, and the context holds a ref for the vm, however for - * some engines we use the kernels migrate vm underneath which - * offers no such rpm ref. Make sure we keep a ref here, so we - * can perform GuC CT actions when needed. Caller is expected to - * have already grabbed the rpm ref outside any sensitive locks. + * some engines we use the kernels migrate vm underneath which offers no + * such rpm ref, or we lack a vm. Make sure we keep a ref here, so we + * can perform GuC CT actions when needed. Caller is expected to have + * already grabbed the rpm ref outside any sensitive locks. */ - if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM)) + if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM || !vm)) drm_WARN_ON(&xe->drm, !xe_device_mem_access_get_if_ongoing(xe)); return q; @@ -172,10 +172,10 @@ void xe_exec_queue_fini(struct xe_exec_queue *q) for (i = 0; i < q->width; ++i) xe_lrc_finish(q->lrc + i); + if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM || !q->vm)) + xe_device_mem_access_put(gt_to_xe(q->gt)); if (q->vm) xe_vm_put(q->vm); - if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM)) - xe_device_mem_access_put(gt_to_xe(q->gt)); kfree(q); } -- cgit v1.2.3 From 5bb83841a3b9cecc49ae1f02e85909b426a6facc Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 15 Nov 2023 12:58:16 +0100 Subject: drm/xe/kunit: Return number of iterated devices In xe_call_for_each_device() we are already counting number of iterated devices. Lets make that available to the caller too. We will use that functionality in upcoming patches. Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20231115115816.1993-1-michal.wajdeczko@intel.com Signed-off-by: Michal Wajdeczko Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_pci.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c index a40879da2fbe..306ff8cb35cb 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci.c +++ b/drivers/gpu/drm/xe/tests/xe_pci.c @@ -42,8 +42,8 @@ static int dev_to_xe_device_fn(struct device *dev, void *__data) * function. Across each function call, drm_dev_enter() / drm_dev_exit() is * called for the corresponding drm device. * - * Return: Zero or the error code of a call to @xe_fn returning an error - * code. + * Return: Number of devices iterated or + * the error code of a call to @xe_fn returning an error code. */ int xe_call_for_each_device(xe_device_fn xe_fn) { @@ -59,7 +59,7 @@ int xe_call_for_each_device(xe_device_fn xe_fn) if (!data.ndevs) kunit_skip(current->kunit_test, "test runs only on hardware\n"); - return ret; + return ret ?: data.ndevs; } /** -- cgit v1.2.3 From 622f709ca6297d838d9bd8b33196b388909d5951 Mon Sep 17 00:00:00 2001 From: Pallavi Mishra Date: Fri, 11 Aug 2023 01:36:43 +0530 Subject: drm/xe/uapi: Add support for CPU caching mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow userspace to specify the CPU caching mode at object creation. Modify gem create handler and introduce xe_bo_create_user to replace xe_bo_create. In a later patch we will support setting the pat_index as part of vm_bind, where expectation is that the coherency mode extracted from the pat_index must be least 1way coherent if using cpu_caching=wb. v2 - s/smem_caching/smem_cpu_caching/ and s/XE_GEM_CACHING/XE_GEM_CPU_CACHING/. (Matt Roper) - Drop COH_2WAY and just use COH_NONE + COH_AT_LEAST_1WAY; KMD mostly just cares that zeroing/swap-in can't be bypassed with the given smem_caching mode. (Matt Roper) - Fix broken range check for coh_mode and smem_cpu_caching and also don't use constant value, but the already defined macros. (José) - Prefer switch statement for smem_cpu_caching -> ttm_caching. (José) - Add note in kernel-doc for dgpu and coherency modes for system memory. (José) v3 (José): - Make sure to reject coh_mode == 0 for VRAM-only. - Also make sure to actually pass along the (start, end) for __xe_bo_create_locked. v4 - Drop UC caching mode. Can be added back if we need it. (Matt Roper) - s/smem_cpu_caching/cpu_caching. Idea is that VRAM is always WC, but that is currently implicit and KMD controlled. Make it explicit in the uapi with the limitation that it currently must be WC. For VRAM + SYS objects userspace must now select WC. (José) - Make sure to initialize bo_flags. (José) v5 - Make to align with the other uapi and prefix uapi constants with DRM_ (José) v6: - Make it clear that zero cpu_caching is only allowed for kernel objects. (José) v7: (Oak) - With all the changes from the original design, it looks we can further simplify here and drop the explicit coh_mode. We can just infer the coh_mode from the cpu_caching. i.e reject cpu_caching=wb + coh_none. It's one less thing for userspace to maintain so seems worth it. v8: - Make sure to also update the kselftests. Testcase: igt@xe_mmap@cpu-caching Signed-off-by: Pallavi Mishra Co-developed-by: Matthew Auld Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Joonas Lahtinen Cc: Lucas De Marchi Cc: Matt Roper Cc: José Roberto de Souza Cc: Filip Hazubski Cc: Carl Zhang Cc: Effie Yu Cc: Zhengguo Xu Cc: Francois Dugast Cc: Oak Zeng Reviewed-by: José Roberto de Souza Acked-by: Zhengguo Xu Acked-by: Bartosz Dunajski Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_bo.c | 14 +++-- drivers/gpu/drm/xe/tests/xe_dma_buf.c | 4 +- drivers/gpu/drm/xe/xe_bo.c | 100 ++++++++++++++++++++++++++-------- drivers/gpu/drm/xe/xe_bo.h | 14 +++-- drivers/gpu/drm/xe/xe_bo_types.h | 5 ++ drivers/gpu/drm/xe/xe_dma_buf.c | 5 +- include/uapi/drm/xe_drm.h | 19 ++++++- 7 files changed, 122 insertions(+), 39 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index 2c04357377ab..549ab343de80 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -177,8 +177,7 @@ EXPORT_SYMBOL_IF_KUNIT(xe_ccs_migrate_kunit); static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struct kunit *test) { struct xe_bo *bo, *external; - unsigned int bo_flags = XE_BO_CREATE_USER_BIT | - XE_BO_CREATE_VRAM_IF_DGFX(tile); + unsigned int bo_flags = XE_BO_CREATE_VRAM_IF_DGFX(tile); struct xe_vm *vm = xe_migrate_get_vm(xe_device_get_root_tile(xe)->migrate); struct xe_gt *__gt; int err, i, id; @@ -188,16 +187,19 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc for (i = 0; i < 2; ++i) { xe_vm_lock(vm, false); - bo = xe_bo_create(xe, NULL, vm, 0x10000, ttm_bo_type_device, - bo_flags); + bo = xe_bo_create_user(xe, NULL, vm, 0x10000, + DRM_XE_GEM_CPU_CACHING_WC, + ttm_bo_type_device, + bo_flags); xe_vm_unlock(vm); if (IS_ERR(bo)) { KUNIT_FAIL(test, "bo create err=%pe\n", bo); break; } - external = xe_bo_create(xe, NULL, NULL, 0x10000, - ttm_bo_type_device, bo_flags); + external = xe_bo_create_user(xe, NULL, NULL, 0x10000, + DRM_XE_GEM_CPU_CACHING_WC, + ttm_bo_type_device, bo_flags); if (IS_ERR(external)) { KUNIT_FAIL(test, "external bo create err=%pe\n", external); goto cleanup_bo; diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c index 18c00bc03024..81f12422a587 100644 --- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c @@ -116,8 +116,8 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe) return; kunit_info(test, "running %s\n", __func__); - bo = xe_bo_create(xe, NULL, NULL, PAGE_SIZE, ttm_bo_type_device, - XE_BO_CREATE_USER_BIT | params->mem_mask); + bo = xe_bo_create_user(xe, NULL, NULL, PAGE_SIZE, DRM_XE_GEM_CPU_CACHING_WC, + ttm_bo_type_device, params->mem_mask); if (IS_ERR(bo)) { KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", PTR_ERR(bo)); diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index e19337390812..dc1ad3b4dc2a 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -332,7 +332,7 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo, struct xe_device *xe = xe_bo_device(bo); struct xe_ttm_tt *tt; unsigned long extra_pages; - enum ttm_caching caching = ttm_cached; + enum ttm_caching caching; int err; tt = kzalloc(sizeof(*tt), GFP_KERNEL); @@ -346,13 +346,24 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo, extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, bo->size), PAGE_SIZE); + switch (bo->cpu_caching) { + case DRM_XE_GEM_CPU_CACHING_WC: + caching = ttm_write_combined; + break; + default: + caching = ttm_cached; + break; + } + + WARN_ON((bo->flags & XE_BO_CREATE_USER_BIT) && !bo->cpu_caching); + /* * Display scanout is always non-coherent with the CPU cache. * * For Xe_LPG and beyond, PPGTT PTE lookups are also non-coherent and * require a CPU:WC mapping. */ - if (bo->flags & XE_BO_SCANOUT_BIT || + if ((!bo->cpu_caching && bo->flags & XE_BO_SCANOUT_BIT) || (xe->info.graphics_verx100 >= 1270 && bo->flags & XE_BO_PAGETABLE)) caching = ttm_write_combined; @@ -1198,10 +1209,11 @@ void xe_bo_free(struct xe_bo *bo) kfree(bo); } -struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, - struct xe_tile *tile, struct dma_resv *resv, - struct ttm_lru_bulk_move *bulk, size_t size, - enum ttm_bo_type type, u32 flags) +struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, + struct xe_tile *tile, struct dma_resv *resv, + struct ttm_lru_bulk_move *bulk, size_t size, + u16 cpu_caching, enum ttm_bo_type type, + u32 flags) { struct ttm_operation_ctx ctx = { .interruptible = true, @@ -1239,6 +1251,7 @@ struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, bo->tile = tile; bo->size = size; bo->flags = flags; + bo->cpu_caching = cpu_caching; bo->ttm.base.funcs = &xe_gem_object_funcs; bo->props.preferred_mem_class = XE_BO_PROPS_INVALID; bo->props.preferred_gt = XE_BO_PROPS_INVALID; @@ -1354,11 +1367,11 @@ static int __xe_bo_fixed_placement(struct xe_device *xe, return 0; } -struct xe_bo * -xe_bo_create_locked_range(struct xe_device *xe, - struct xe_tile *tile, struct xe_vm *vm, - size_t size, u64 start, u64 end, - enum ttm_bo_type type, u32 flags) +static struct xe_bo * +__xe_bo_create_locked(struct xe_device *xe, + struct xe_tile *tile, struct xe_vm *vm, + size_t size, u64 start, u64 end, + u16 cpu_caching, enum ttm_bo_type type, u32 flags) { struct xe_bo *bo = NULL; int err; @@ -1379,11 +1392,11 @@ xe_bo_create_locked_range(struct xe_device *xe, } } - bo = __xe_bo_create_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL, - vm && !xe_vm_in_fault_mode(vm) && - flags & XE_BO_CREATE_USER_BIT ? - &vm->lru_bulk_move : NULL, size, - type, flags); + bo = ___xe_bo_create_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL, + vm && !xe_vm_in_fault_mode(vm) && + flags & XE_BO_CREATE_USER_BIT ? + &vm->lru_bulk_move : NULL, size, + cpu_caching, type, flags); if (IS_ERR(bo)) return bo; @@ -1423,11 +1436,35 @@ err_unlock_put_bo: return ERR_PTR(err); } +struct xe_bo * +xe_bo_create_locked_range(struct xe_device *xe, + struct xe_tile *tile, struct xe_vm *vm, + size_t size, u64 start, u64 end, + enum ttm_bo_type type, u32 flags) +{ + return __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type, flags); +} + struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, enum ttm_bo_type type, u32 flags) { - return xe_bo_create_locked_range(xe, tile, vm, size, 0, ~0ULL, type, flags); + return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, type, flags); +} + +struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile, + struct xe_vm *vm, size_t size, + u16 cpu_caching, + enum ttm_bo_type type, + u32 flags) +{ + struct xe_bo *bo = __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, + cpu_caching, type, + flags | XE_BO_CREATE_USER_BIT); + if (!IS_ERR(bo)) + xe_bo_unlock_vm_held(bo); + + return bo; } struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile, @@ -1809,7 +1846,7 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, struct drm_xe_gem_create *args = data; struct xe_vm *vm = NULL; struct xe_bo *bo; - unsigned int bo_flags = XE_BO_CREATE_USER_BIT; + unsigned int bo_flags; u32 handle; int err; @@ -1840,6 +1877,7 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, args->size & ~PAGE_MASK)) return -EINVAL; + bo_flags = 0; if (args->flags & DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING) bo_flags |= XE_BO_DEFER_BACKING; @@ -1855,6 +1893,18 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, bo_flags |= XE_BO_NEEDS_CPU_ACCESS; } + if (XE_IOCTL_DBG(xe, !args->cpu_caching || + args->cpu_caching > DRM_XE_GEM_CPU_CACHING_WC)) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_CREATE_VRAM_MASK && + args->cpu_caching != DRM_XE_GEM_CPU_CACHING_WC)) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_SCANOUT_BIT && + args->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) + return -EINVAL; + if (args->vm_id) { vm = xe_vm_lookup(xef, args->vm_id); if (XE_IOCTL_DBG(xe, !vm)) @@ -1864,8 +1914,8 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, goto out_vm; } - bo = xe_bo_create(xe, NULL, vm, args->size, ttm_bo_type_device, - bo_flags); + bo = xe_bo_create_user(xe, NULL, vm, args->size, args->cpu_caching, + ttm_bo_type_device, bo_flags); if (vm) xe_vm_unlock(vm); @@ -2163,10 +2213,12 @@ int xe_bo_dumb_create(struct drm_file *file_priv, args->size = ALIGN(mul_u32_u32(args->pitch, args->height), page_size); - bo = xe_bo_create(xe, NULL, NULL, args->size, ttm_bo_type_device, - XE_BO_CREATE_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) | - XE_BO_CREATE_USER_BIT | XE_BO_SCANOUT_BIT | - XE_BO_NEEDS_CPU_ACCESS); + bo = xe_bo_create_user(xe, NULL, NULL, args->size, + DRM_XE_GEM_CPU_CACHING_WC, + ttm_bo_type_device, + XE_BO_CREATE_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) | + XE_BO_CREATE_USER_BIT | XE_BO_SCANOUT_BIT | + XE_BO_NEEDS_CPU_ACCESS); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index f8bae873418d..6f183568f76d 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -94,10 +94,11 @@ struct sg_table; struct xe_bo *xe_bo_alloc(void); void xe_bo_free(struct xe_bo *bo); -struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, - struct xe_tile *tile, struct dma_resv *resv, - struct ttm_lru_bulk_move *bulk, size_t size, - enum ttm_bo_type type, u32 flags); +struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, + struct xe_tile *tile, struct dma_resv *resv, + struct ttm_lru_bulk_move *bulk, size_t size, + u16 cpu_caching, enum ttm_bo_type type, + u32 flags); struct xe_bo * xe_bo_create_locked_range(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, @@ -109,6 +110,11 @@ struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile, struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, enum ttm_bo_type type, u32 flags); +struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile, + struct xe_vm *vm, size_t size, + u16 cpu_caching, + enum ttm_bo_type type, + u32 flags); struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, enum ttm_bo_type type, u32 flags); diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index 4bff60996168..f71dbc518958 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -79,6 +79,11 @@ struct xe_bo { struct llist_node freed; /** @created: Whether the bo has passed initial creation */ bool created; + /** + * @cpu_caching: CPU caching mode. Currently only used for userspace + * objects. + */ + u16 cpu_caching; }; #define intel_bo_to_drm_bo(bo) (&(bo)->ttm.base) diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c index cfde3be3b0dc..64ed303728fd 100644 --- a/drivers/gpu/drm/xe/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/xe_dma_buf.c @@ -214,8 +214,9 @@ xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage, int ret; dma_resv_lock(resv, NULL); - bo = __xe_bo_create_locked(xe, storage, NULL, resv, NULL, dma_buf->size, - ttm_bo_type_sg, XE_BO_CREATE_SYSTEM_BIT); + bo = ___xe_bo_create_locked(xe, storage, NULL, resv, NULL, dma_buf->size, + 0, /* Will require 1way or 2way for vm_bind */ + ttm_bo_type_sg, XE_BO_CREATE_SYSTEM_BIT); if (IS_ERR(bo)) { ret = PTR_ERR(bo); goto error; diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 88f3aca02b08..ab7d1b26c773 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -541,8 +541,25 @@ struct drm_xe_gem_create { */ __u32 handle; + /** + * @cpu_caching: The CPU caching mode to select for this object. If + * mmaping the object the mode selected here will also be used. + * + * Supported values: + * + * DRM_XE_GEM_CPU_CACHING_WB: Allocate the pages with write-back + * caching. On iGPU this can't be used for scanout surfaces. Currently + * not allowed for objects placed in VRAM. + * + * DRM_XE_GEM_CPU_CACHING_WC: Allocate the pages as write-combined. This + * is uncached. Scanout surfaces should likely use this. All objects + * that can be placed in VRAM must use this. + */ +#define DRM_XE_GEM_CPU_CACHING_WB 1 +#define DRM_XE_GEM_CPU_CACHING_WC 2 + __u16 cpu_caching; /** @pad: MBZ */ - __u32 pad; + __u16 pad; /** @reserved: Reserved */ __u64 reserved[2]; -- cgit v1.2.3 From f6a22e6862737e31d2c0693d2a4f986e71d32da6 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 17 Aug 2023 10:27:43 +0100 Subject: drm/xe/pat: annotate pat_index with coherency mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Future uapi needs to give userspace the ability to select the pat_index for a given vm_bind. However we need to be able to extract the coherency mode from the provided pat_index to ensure it's compatible with the cpu_caching mode set at object creation. There are various security reasons for why this matters. However the pat_index itself is very platform specific, so seems reasonable to annotate each platform definition of the pat table. On some older platforms there is no explicit coherency mode, so we just pick whatever makes sense. v2: - Simplify with COH_AT_LEAST_1_WAY - Add some kernel-doc v3 (Matt Roper): - Some small tweaks v4: - Rebase v5: - Rebase on Xe2 PAT additions v6: - Rebase on removal of coh_mode from uapi Bspec: 45101, 44235 #xe Bspec: 70552, 71582, 59400 #xe2 Signed-off-by: Matthew Auld Cc: Pallavi Mishra Cc: Thomas Hellström Cc: Joonas Lahtinen Cc: Lucas De Marchi Cc: Matt Roper Cc: José Roberto de Souza Cc: Filip Hazubski Cc: Carl Zhang Cc: Effie Yu Cc: Zhengguo Xu Cc: Francois Dugast Reviewed-by: Matt Roper Reviewed-by: José Roberto de Souza Reviewed-by: Pallavi Mishra Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device_types.h | 2 +- drivers/gpu/drm/xe/xe_pat.c | 98 +++++++++++++++++++++--------------- drivers/gpu/drm/xe/xe_pat.h | 31 +++++++++++- 3 files changed, 89 insertions(+), 42 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 2712905c7a91..ea53f9dfc199 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -368,7 +368,7 @@ struct xe_device { /** Internal operations to abstract platforms */ const struct xe_pat_ops *ops; /** PAT table to program in the HW */ - const u32 *table; + const struct xe_pat_table_entry *table; /** Number of PAT entries */ int n_entries; u32 idx[__XE_CACHE_LEVEL_COUNT]; diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index 7c1078707aa0..1892ff81086f 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -5,6 +5,8 @@ #include "xe_pat.h" +#include + #include "regs/xe_reg_defs.h" #include "xe_assert.h" #include "xe_device.h" @@ -46,35 +48,37 @@ static const char *XELP_MEM_TYPE_STR_MAP[] = { "UC", "WC", "WT", "WB" }; struct xe_pat_ops { - void (*program_graphics)(struct xe_gt *gt, const u32 table[], int n_entries); - void (*program_media)(struct xe_gt *gt, const u32 table[], int n_entries); + void (*program_graphics)(struct xe_gt *gt, const struct xe_pat_table_entry table[], + int n_entries); + void (*program_media)(struct xe_gt *gt, const struct xe_pat_table_entry table[], + int n_entries); void (*dump)(struct xe_gt *gt, struct drm_printer *p); }; -static const u32 xelp_pat_table[] = { - [0] = XELP_PAT_WB, - [1] = XELP_PAT_WC, - [2] = XELP_PAT_WT, - [3] = XELP_PAT_UC, +static const struct xe_pat_table_entry xelp_pat_table[] = { + [0] = { XELP_PAT_WB, XE_COH_AT_LEAST_1WAY }, + [1] = { XELP_PAT_WC, XE_COH_NONE }, + [2] = { XELP_PAT_WT, XE_COH_NONE }, + [3] = { XELP_PAT_UC, XE_COH_NONE }, }; -static const u32 xehpc_pat_table[] = { - [0] = XELP_PAT_UC, - [1] = XELP_PAT_WC, - [2] = XELP_PAT_WT, - [3] = XELP_PAT_WB, - [4] = XEHPC_PAT_CLOS(1) | XELP_PAT_WT, - [5] = XEHPC_PAT_CLOS(1) | XELP_PAT_WB, - [6] = XEHPC_PAT_CLOS(2) | XELP_PAT_WT, - [7] = XEHPC_PAT_CLOS(2) | XELP_PAT_WB, +static const struct xe_pat_table_entry xehpc_pat_table[] = { + [0] = { XELP_PAT_UC, XE_COH_NONE }, + [1] = { XELP_PAT_WC, XE_COH_NONE }, + [2] = { XELP_PAT_WT, XE_COH_NONE }, + [3] = { XELP_PAT_WB, XE_COH_AT_LEAST_1WAY }, + [4] = { XEHPC_PAT_CLOS(1) | XELP_PAT_WT, XE_COH_NONE }, + [5] = { XEHPC_PAT_CLOS(1) | XELP_PAT_WB, XE_COH_AT_LEAST_1WAY }, + [6] = { XEHPC_PAT_CLOS(2) | XELP_PAT_WT, XE_COH_NONE }, + [7] = { XEHPC_PAT_CLOS(2) | XELP_PAT_WB, XE_COH_AT_LEAST_1WAY }, }; -static const u32 xelpg_pat_table[] = { - [0] = XELPG_PAT_0_WB, - [1] = XELPG_PAT_1_WT, - [2] = XELPG_PAT_3_UC, - [3] = XELPG_PAT_0_WB | XELPG_2_COH_1W, - [4] = XELPG_PAT_0_WB | XELPG_3_COH_2W, +static const struct xe_pat_table_entry xelpg_pat_table[] = { + [0] = { XELPG_PAT_0_WB, XE_COH_NONE }, + [1] = { XELPG_PAT_1_WT, XE_COH_NONE }, + [2] = { XELPG_PAT_3_UC, XE_COH_NONE }, + [3] = { XELPG_PAT_0_WB | XELPG_2_COH_1W, XE_COH_AT_LEAST_1WAY }, + [4] = { XELPG_PAT_0_WB | XELPG_3_COH_2W, XE_COH_AT_LEAST_1WAY }, }; /* @@ -92,15 +96,18 @@ static const u32 xelpg_pat_table[] = { * coherency (which matches an all-0's encoding), so we can just omit them * in the table. */ -#define XE2_PAT(no_promote, comp_en, l3clos, l3_policy, l4_policy, coh_mode) \ - (no_promote ? XE2_NO_PROMOTE : 0) | \ - (comp_en ? XE2_COMP_EN : 0) | \ - REG_FIELD_PREP(XE2_L3_CLOS, l3clos) | \ - REG_FIELD_PREP(XE2_L3_POLICY, l3_policy) | \ - REG_FIELD_PREP(XE2_L4_POLICY, l4_policy) | \ - REG_FIELD_PREP(XE2_COH_MODE, coh_mode) - -static const u32 xe2_pat_table[] = { +#define XE2_PAT(no_promote, comp_en, l3clos, l3_policy, l4_policy, __coh_mode) \ + { \ + .value = (no_promote ? XE2_NO_PROMOTE : 0) | \ + (comp_en ? XE2_COMP_EN : 0) | \ + REG_FIELD_PREP(XE2_L3_CLOS, l3clos) | \ + REG_FIELD_PREP(XE2_L3_POLICY, l3_policy) | \ + REG_FIELD_PREP(XE2_L4_POLICY, l4_policy) | \ + REG_FIELD_PREP(XE2_COH_MODE, __coh_mode), \ + .coh_mode = __coh_mode ? XE_COH_AT_LEAST_1WAY : XE_COH_NONE \ + } + +static const struct xe_pat_table_entry xe2_pat_table[] = { [ 0] = XE2_PAT( 0, 0, 0, 0, 3, 0 ), [ 1] = XE2_PAT( 0, 0, 0, 0, 3, 2 ), [ 2] = XE2_PAT( 0, 0, 0, 0, 3, 3 ), @@ -133,23 +140,31 @@ static const u32 xe2_pat_table[] = { }; /* Special PAT values programmed outside the main table */ -#define XE2_PAT_ATS XE2_PAT( 0, 0, 0, 0, 3, 3 ) +static const struct xe_pat_table_entry xe2_pat_ats = XE2_PAT( 0, 0, 0, 0, 3, 3 ); + +u16 xe_pat_index_get_coh_mode(struct xe_device *xe, u16 pat_index) +{ + WARN_ON(pat_index >= xe->pat.n_entries); + return xe->pat.table[pat_index].coh_mode; +} -static void program_pat(struct xe_gt *gt, const u32 table[], int n_entries) +static void program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[], + int n_entries) { for (int i = 0; i < n_entries; i++) { struct xe_reg reg = XE_REG(_PAT_INDEX(i)); - xe_mmio_write32(gt, reg, table[i]); + xe_mmio_write32(gt, reg, table[i].value); } } -static void program_pat_mcr(struct xe_gt *gt, const u32 table[], int n_entries) +static void program_pat_mcr(struct xe_gt *gt, const struct xe_pat_table_entry table[], + int n_entries) { for (int i = 0; i < n_entries; i++) { struct xe_reg_mcr reg_mcr = XE_REG_MCR(_PAT_INDEX(i)); - xe_gt_mcr_multicast_write(gt, reg_mcr, table[i]); + xe_gt_mcr_multicast_write(gt, reg_mcr, table[i].value); } } @@ -289,16 +304,18 @@ static const struct xe_pat_ops xelpg_pat_ops = { .dump = xelpg_dump, }; -static void xe2lpg_program_pat(struct xe_gt *gt, const u32 table[], int n_entries) +static void xe2lpg_program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[], + int n_entries) { program_pat_mcr(gt, table, n_entries); - xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_ATS), XE2_PAT_ATS); + xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_ATS), xe2_pat_ats.value); } -static void xe2lpm_program_pat(struct xe_gt *gt, const u32 table[], int n_entries) +static void xe2lpm_program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[], + int n_entries) { program_pat(gt, table, n_entries); - xe_mmio_write32(gt, XE_REG(_PAT_ATS), XE2_PAT_ATS); + xe_mmio_write32(gt, XE_REG(_PAT_ATS), xe2_pat_ats.value); } static void xe2_dump(struct xe_gt *gt, struct drm_printer *p) @@ -396,6 +413,7 @@ void xe_pat_init_early(struct xe_device *xe) xe->pat.idx[XE_CACHE_WT] = 2; xe->pat.idx[XE_CACHE_WB] = 0; } else if (GRAPHICS_VERx100(xe) <= 1210) { + WARN_ON_ONCE(!IS_DGFX(xe) && !xe->info.has_llc); xe->pat.ops = &xelp_pat_ops; xe->pat.table = xelp_pat_table; xe->pat.n_entries = ARRAY_SIZE(xelp_pat_table); diff --git a/drivers/gpu/drm/xe/xe_pat.h b/drivers/gpu/drm/xe/xe_pat.h index 09c491ab9f15..fa0dfbe525cd 100644 --- a/drivers/gpu/drm/xe/xe_pat.h +++ b/drivers/gpu/drm/xe/xe_pat.h @@ -6,9 +6,30 @@ #ifndef _XE_PAT_H_ #define _XE_PAT_H_ +#include + struct drm_printer; -struct xe_gt; struct xe_device; +struct xe_gt; + +/** + * struct xe_pat_table_entry - The pat_index encoding and other meta information. + */ +struct xe_pat_table_entry { + /** + * @value: The platform specific value encoding the various memory + * attributes (this maps to some fixed pat_index). So things like + * caching, coherency, compression etc can be encoded here. + */ + u32 value; + + /** + * @coh_mode: The GPU coherency mode that @value maps to. + */ +#define XE_COH_NONE 1 +#define XE_COH_AT_LEAST_1WAY 2 + u16 coh_mode; +}; /** * xe_pat_init_early - SW initialization, setting up data based on device @@ -29,4 +50,12 @@ void xe_pat_init(struct xe_gt *gt); */ void xe_pat_dump(struct xe_gt *gt, struct drm_printer *p); +/** + * xe_pat_index_get_coh_mode - Extract the coherency mode for the given + * pat_index. + * @xe: xe device + * @pat_index: The pat_index to query + */ +u16 xe_pat_index_get_coh_mode(struct xe_device *xe, u16 pat_index); + #endif -- cgit v1.2.3 From e1fbc4f18d5b4405271e964670b9b054c4397127 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 25 Sep 2023 12:42:18 +0100 Subject: drm/xe/uapi: support pat_index selection with vm_bind MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow userspace to directly control the pat_index for a given vm binding. This should allow directly controlling the coherency, caching behaviour, compression and potentially other stuff in the future for the ppGTT binding. The exact meaning behind the pat_index is very platform specific (see BSpec or PRMs) but effectively maps to some predefined memory attributes. From the KMD pov we only care about the coherency that is provided by the pat_index, which falls into either NONE, 1WAY or 2WAY. The vm_bind coherency mode for the given pat_index needs to be at least 1way coherent when using cpu_caching with DRM_XE_GEM_CPU_CACHING_WB. For platforms that lack the explicit coherency mode attribute, we treat UC/WT/WC as NONE and WB as AT_LEAST_1WAY. For userptr mappings we lack a corresponding gem object, so the expected coherency mode is instead implicit and must fall into either 1WAY or 2WAY. Trying to use NONE will be rejected by the kernel. For imported dma-buf (from a different device) the coherency mode is also implicit and must also be either 1WAY or 2WAY. v2: - Undefined coh_mode(pat_index) can now be treated as programmer error. (Matt Roper) - We now allow gem_create.coh_mode <= coh_mode(pat_index), rather than having to match exactly. This ensures imported dma-buf can always just use 1way (or even 2way), now that we also bundle 1way/2way into at_least_1way. We still require 1way/2way for external dma-buf, but the policy can now be the same for self-import, if desired. - Use u16 for pat_index in uapi. u32 is massive overkill. (José) - Move as much of the pat_index validation as we can into vm_bind_ioctl_check_args. (José) v3 (Matt Roper): - Split the pte_encode() refactoring into separate patch. v4: - Rebase v5: - Check for and reject !coh_mode which would indicate hw reserved pat_index on xe2. v6: - Rebase on removal of coh_mode from uapi. We just need to reject cpu_caching=wb + pat_index with coh_none. Testcase: igt@xe_pat Bspec: 45101, 44235 #xe Bspec: 70552, 71582, 59400 #xe2 Signed-off-by: Matthew Auld Cc: Pallavi Mishra Cc: Thomas Hellström Cc: Joonas Lahtinen Cc: Lucas De Marchi Cc: Matt Roper Cc: José Roberto de Souza Cc: Filip Hazubski Cc: Carl Zhang Cc: Effie Yu Cc: Zhengguo Xu Cc: Francois Dugast Tested-by: José Roberto de Souza Reviewed-by: José Roberto de Souza Acked-by: Zhengguo Xu Acked-by: Bartosz Dunajski Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pt.c | 11 ++----- drivers/gpu/drm/xe/xe_vm.c | 67 +++++++++++++++++++++++++++++++++++----- drivers/gpu/drm/xe/xe_vm_types.h | 7 +++++ include/uapi/drm/xe_drm.h | 48 +++++++++++++++++++++++++++- 4 files changed, 115 insertions(+), 18 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index c6c9b723db5a..3b485313804a 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -290,8 +290,6 @@ struct xe_pt_stage_bind_walk { struct xe_vm *vm; /** @tile: The tile we're building for. */ struct xe_tile *tile; - /** @cache: Desired cache level for the ptes */ - enum xe_cache_level cache; /** @default_pte: PTE flag only template. No address is associated */ u64 default_pte; /** @dma_offset: DMA offset to add to the PTE. */ @@ -511,7 +509,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, { struct xe_pt_stage_bind_walk *xe_walk = container_of(walk, typeof(*xe_walk), base); - u16 pat_index = tile_to_xe(xe_walk->tile)->pat.idx[xe_walk->cache]; + u16 pat_index = xe_walk->vma->pat_index; struct xe_pt *xe_parent = container_of(parent, typeof(*xe_parent), base); struct xe_vm *vm = xe_walk->vm; struct xe_pt *xe_child; @@ -657,13 +655,8 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, if (is_devmem) { xe_walk.default_pte |= XE_PPGTT_PTE_DM; xe_walk.dma_offset = vram_region_gpu_offset(bo->ttm.resource); - xe_walk.cache = XE_CACHE_WB; - } else { - if (!xe_vma_has_no_bo(vma) && bo->flags & XE_BO_SCANOUT_BIT) - xe_walk.cache = XE_CACHE_WT; - else - xe_walk.cache = XE_CACHE_WB; } + if (!xe_vma_has_no_bo(vma) && xe_bo_is_stolen(bo)) xe_walk.dma_offset = xe_ttm_stolen_gpu_offset(xe_bo_device(bo)); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index c33ae4db4e02..a97a310123fc 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -6,6 +6,7 @@ #include "xe_vm.h" #include +#include #include #include @@ -26,6 +27,7 @@ #include "xe_gt_pagefault.h" #include "xe_gt_tlb_invalidation.h" #include "xe_migrate.h" +#include "xe_pat.h" #include "xe_pm.h" #include "xe_preempt_fence.h" #include "xe_pt.h" @@ -868,7 +870,8 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, u64 start, u64 end, bool read_only, bool is_null, - u8 tile_mask) + u8 tile_mask, + u16 pat_index) { struct xe_vma *vma; struct xe_tile *tile; @@ -910,6 +913,8 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, if (GRAPHICS_VER(vm->xe) >= 20 || vm->xe->info.platform == XE_PVC) vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT; + vma->pat_index = pat_index; + if (bo) { struct drm_gpuvm_bo *vm_bo; @@ -2162,7 +2167,7 @@ static struct drm_gpuva_ops * vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, u64 bo_offset_or_userptr, u64 addr, u64 range, u32 operation, u32 flags, u8 tile_mask, - u32 prefetch_region) + u32 prefetch_region, u16 pat_index) { struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL; struct drm_gpuva_ops *ops; @@ -2231,6 +2236,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, op->map.read_only = flags & DRM_XE_VM_BIND_FLAG_READONLY; op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; + op->map.pat_index = pat_index; } else if (__op->op == DRM_GPUVA_OP_PREFETCH) { op->prefetch.region = prefetch_region; } @@ -2242,7 +2248,8 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, } static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, - u8 tile_mask, bool read_only, bool is_null) + u8 tile_mask, bool read_only, bool is_null, + u16 pat_index) { struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL; struct xe_vma *vma; @@ -2258,7 +2265,7 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, vma = xe_vma_create(vm, bo, op->gem.offset, op->va.addr, op->va.addr + op->va.range - 1, read_only, is_null, - tile_mask); + tile_mask, pat_index); if (bo) xe_bo_unlock(bo); @@ -2404,7 +2411,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, vma = new_vma(vm, &op->base.map, op->tile_mask, op->map.read_only, - op->map.is_null); + op->map.is_null, op->map.pat_index); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -2430,7 +2437,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, vma = new_vma(vm, op->base.remap.prev, op->tile_mask, read_only, - is_null); + is_null, old->pat_index); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -2464,7 +2471,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, vma = new_vma(vm, op->base.remap.next, op->tile_mask, read_only, - is_null); + is_null, old->pat_index); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -2862,6 +2869,26 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, u64 obj_offset = (*bind_ops)[i].obj_offset; u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance; bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; + u16 pat_index = (*bind_ops)[i].pat_index; + u16 coh_mode; + + if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) { + err = -EINVAL; + goto free_bind_ops; + } + + pat_index = array_index_nospec(pat_index, xe->pat.n_entries); + (*bind_ops)[i].pat_index = pat_index; + coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); + if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */ + err = -EINVAL; + goto free_bind_ops; + } + + if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) { + err = -EINVAL; + goto free_bind_ops; + } if (i == 0) { *async = !!(flags & DRM_XE_VM_BIND_FLAG_ASYNC); @@ -2892,6 +2919,8 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || XE_IOCTL_DBG(xe, obj && op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || + XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && + op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || XE_IOCTL_DBG(xe, obj && op == DRM_XE_VM_BIND_OP_PREFETCH) || XE_IOCTL_DBG(xe, prefetch_region && @@ -3025,6 +3054,8 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) u64 addr = bind_ops[i].addr; u32 obj = bind_ops[i].obj; u64 obj_offset = bind_ops[i].obj_offset; + u16 pat_index = bind_ops[i].pat_index; + u16 coh_mode; if (!obj) continue; @@ -3052,6 +3083,24 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) goto put_obj; } } + + coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); + if (bos[i]->cpu_caching) { + if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && + bos[i]->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) { + err = -EINVAL; + goto put_obj; + } + } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) { + /* + * Imported dma-buf from a different device should + * require 1way or 2way coherency since we don't know + * how it was mapped on the CPU. Just assume is it + * potentially cached on CPU side. + */ + err = -EINVAL; + goto put_obj; + } } if (args->num_syncs) { @@ -3079,10 +3128,12 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) u64 obj_offset = bind_ops[i].obj_offset; u8 tile_mask = bind_ops[i].tile_mask; u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance; + u16 pat_index = bind_ops[i].pat_index; ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset, addr, range, op, flags, - tile_mask, prefetch_region); + tile_mask, prefetch_region, + pat_index); if (IS_ERR(ops[i])) { err = PTR_ERR(ops[i]); ops[i] = NULL; diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index fc2645e07578..74cdf16a42ad 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -110,6 +110,11 @@ struct xe_vma { */ u8 tile_present; + /** + * @pat_index: The pat index to use when encoding the PTEs for this vma. + */ + u16 pat_index; + struct { struct list_head rebind_link; } notifier; @@ -333,6 +338,8 @@ struct xe_vma_op_map { bool read_only; /** @is_null: is NULL binding */ bool is_null; + /** @pat_index: The pat index to use for this operation. */ + u16 pat_index; }; /** struct xe_vma_op_remap - VMA remap operation */ diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index ab7d1b26c773..1a844fa7af8a 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -636,8 +636,54 @@ struct drm_xe_vm_bind_op { */ __u32 obj; + /** + * @pat_index: The platform defined @pat_index to use for this mapping. + * The index basically maps to some predefined memory attributes, + * including things like caching, coherency, compression etc. The exact + * meaning of the pat_index is platform specific and defined in the + * Bspec and PRMs. When the KMD sets up the binding the index here is + * encoded into the ppGTT PTE. + * + * For coherency the @pat_index needs to be at least 1way coherent when + * drm_xe_gem_create.cpu_caching is DRM_XE_GEM_CPU_CACHING_WB. The KMD + * will extract the coherency mode from the @pat_index and reject if + * there is a mismatch (see note below for pre-MTL platforms). + * + * Note: On pre-MTL platforms there is only a caching mode and no + * explicit coherency mode, but on such hardware there is always a + * shared-LLC (or is dgpu) so all GT memory accesses are coherent with + * CPU caches even with the caching mode set as uncached. It's only the + * display engine that is incoherent (on dgpu it must be in VRAM which + * is always mapped as WC on the CPU). However to keep the uapi somewhat + * consistent with newer platforms the KMD groups the different cache + * levels into the following coherency buckets on all pre-MTL platforms: + * + * ppGTT UC -> COH_NONE + * ppGTT WC -> COH_NONE + * ppGTT WT -> COH_NONE + * ppGTT WB -> COH_AT_LEAST_1WAY + * + * In practice UC/WC/WT should only ever used for scanout surfaces on + * such platforms (or perhaps in general for dma-buf if shared with + * another device) since it is only the display engine that is actually + * incoherent. Everything else should typically use WB given that we + * have a shared-LLC. On MTL+ this completely changes and the HW + * defines the coherency mode as part of the @pat_index, where + * incoherent GT access is possible. + * + * Note: For userptr and externally imported dma-buf the kernel expects + * either 1WAY or 2WAY for the @pat_index. + * + * For DRM_XE_VM_BIND_FLAG_NULL bindings there are no KMD restrictions + * on the @pat_index. For such mappings there is no actual memory being + * mapped (the address in the PTE is invalid), so the various PAT memory + * attributes likely do not apply. Simply leaving as zero is one + * option (still a valid pat_index). + */ + __u16 pat_index; + /** @pad: MBZ */ - __u32 pad; + __u16 pad; union { /** -- cgit v1.2.3 From 473b62763b76e8bb0793ac5f030779c43ecd79e6 Mon Sep 17 00:00:00 2001 From: Fei Yang Date: Wed, 22 Nov 2023 12:45:01 -0800 Subject: drm/xe: explicitly set GGTT access for GuC DMA Confirmed with hardware that setting GGTT memory access for GuC firmware loading is correct for all platforms and required for new platforms going forward. Signed-off-by: Fei Yang Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20231122204501.1353325-2-fei.yang@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_guc_regs.h | 1 + drivers/gpu/drm/xe/xe_uc_fw.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_guc_regs.h b/drivers/gpu/drm/xe/regs/xe_guc_regs.h index ba375fc51a87..92320bbc9d3d 100644 --- a/drivers/gpu/drm/xe/regs/xe_guc_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_guc_regs.h @@ -70,6 +70,7 @@ #define DMA_ADDR_1_HIGH XE_REG(0xc30c) #define DMA_ADDR_SPACE_MASK REG_GENMASK(20, 16) #define DMA_ADDRESS_SPACE_WOPCM REG_FIELD_PREP(DMA_ADDR_SPACE_MASK, 7) +#define DMA_ADDRESS_SPACE_GGTT REG_FIELD_PREP(DMA_ADDR_SPACE_MASK, 8) #define DMA_COPY_SIZE XE_REG(0xc310) #define DMA_CTRL XE_REG(0xc314) #define HUC_UKERNEL REG_BIT(9) diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 2fcec528a1d1..e806e7b6ae42 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -746,7 +746,8 @@ static int uc_fw_xfer(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags) /* Set the source address for the uCode */ src_offset = uc_fw_ggtt_offset(uc_fw) + uc_fw->css_offset; xe_mmio_write32(gt, DMA_ADDR_0_LOW, lower_32_bits(src_offset)); - xe_mmio_write32(gt, DMA_ADDR_0_HIGH, upper_32_bits(src_offset)); + xe_mmio_write32(gt, DMA_ADDR_0_HIGH, + upper_32_bits(src_offset) | DMA_ADDRESS_SPACE_GGTT); /* Set the DMA destination */ xe_mmio_write32(gt, DMA_ADDR_1_LOW, offset); -- cgit v1.2.3 From e784f352f8a1142065a738f544a6566c873d73f6 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 28 Nov 2023 21:32:01 +0100 Subject: drm/xe/guc: Drop ancient GuC CTB definitions Those definitions were applicable for old GuC firmwares only. Reviewed-by: Matthew Brost Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/741 Link: https://lore.kernel.org/r/20231128203203.1147-1-michal.wajdeczko@intel.com Signed-off-by: Michal Wajdeczko Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h | 21 --------------------- 1 file changed, 21 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h index 41244055cc0c..d34f91002789 100644 --- a/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h @@ -165,25 +165,4 @@ static_assert(sizeof(struct guc_ct_buffer_desc) == 64); * - **flags**, holds various bits to control message handling */ -/* - * Definition of the command transport message header (DW0) - * - * bit[4..0] message len (in dwords) - * bit[7..5] reserved - * bit[8] response (G2H only) - * bit[8] write fence to desc (H2G only) - * bit[9] write status to H2G buff (H2G only) - * bit[10] send status back via G2H (H2G only) - * bit[15..11] reserved - * bit[31..16] action code - */ -#define GUC_CT_MSG_LEN_SHIFT 0 -#define GUC_CT_MSG_LEN_MASK 0x1F -#define GUC_CT_MSG_IS_RESPONSE (1 << 8) -#define GUC_CT_MSG_WRITE_FENCE_TO_DESC (1 << 8) -#define GUC_CT_MSG_WRITE_STATUS_TO_BUFF (1 << 9) -#define GUC_CT_MSG_SEND_STATUS (1 << 10) -#define GUC_CT_MSG_ACTION_SHIFT 16 -#define GUC_CT_MSG_ACTION_MASK 0xFFFF - #endif -- cgit v1.2.3 From 0a39ad21796f2f67b7d384c0f0ec0ac901f76519 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 28 Nov 2023 21:32:02 +0100 Subject: drm/xe/guc: Remove obsolete GuC CTB documentation Refer to already described CTB Descriptor and CTB HXG Message. Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20231128203203.1147-2-michal.wajdeczko@intel.com Signed-off-by: Michal Wajdeczko Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h | 45 +--------------------- 1 file changed, 2 insertions(+), 43 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h index d34f91002789..3b83f907ece4 100644 --- a/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h @@ -120,49 +120,8 @@ static_assert(sizeof(struct guc_ct_buffer_desc) == 64); * is based on u32 data stream written to the shared buffer. One buffer can * be used to transmit data only in one direction (one-directional channel). * - * Current status of the each buffer is stored in the buffer descriptor. - * Buffer descriptor holds tail and head fields that represents active data - * stream. The tail field is updated by the data producer (sender), and head - * field is updated by the data consumer (receiver):: - * - * +------------+ - * | DESCRIPTOR | +=================+============+========+ - * +============+ | | MESSAGE(s) | | - * | address |--------->+=================+============+========+ - * +------------+ - * | head | ^-----head--------^ - * +------------+ - * | tail | ^---------tail-----------------^ - * +------------+ - * | size | ^---------------size--------------------^ - * +------------+ - * - * Each message in data stream starts with the single u32 treated as a header, - * followed by optional set of u32 data that makes message specific payload:: - * - * +------------+---------+---------+---------+ - * | MESSAGE | - * +------------+---------+---------+---------+ - * | msg[0] | [1] | ... | [n-1] | - * +------------+---------+---------+---------+ - * | MESSAGE | MESSAGE PAYLOAD | - * + HEADER +---------+---------+---------+ - * | | 0 | ... | n | - * +======+=====+=========+=========+=========+ - * | 31:16| code| | | | - * +------+-----+ | | | - * | 15:5|flags| | | | - * +------+-----+ | | | - * | 4:0| len| | | | - * +------+-----+---------+---------+---------+ - * - * ^-------------len-------------^ - * - * The message header consists of: - * - * - **len**, indicates length of the message payload (in u32) - * - **code**, indicates message code - * - **flags**, holds various bits to control message handling + * Current status of the each buffer is maintained in the `CTB Descriptor`_. + * Each message in data stream is encoded as `CTB HXG Message`_. */ #endif -- cgit v1.2.3 From b67cb798e4227d312fd221deb6a3f0b88b51fc6b Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 28 Nov 2023 21:32:03 +0100 Subject: drm/xe/guc: Include only required GuC ABI headers On i915 we were adding new GuC ABI headers directly to guc_fwif.h file since we were replacing old definitions from that file. On xe driver we could do more and better by including ABI headers only in files that need those definitions. Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/741 Cc: Jani Nikula Acked-by: Jani Nikula Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20231128203203.1147-3-michal.wajdeczko@intel.com Signed-off-by: Michal Wajdeczko Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_pagefault.c | 1 + drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 1 + drivers/gpu/drm/xe/xe_guc.c | 2 ++ drivers/gpu/drm/xe/xe_guc_ct.c | 2 ++ drivers/gpu/drm/xe/xe_guc_fwif.h | 6 ------ drivers/gpu/drm/xe/xe_guc_hwconfig.c | 1 + drivers/gpu/drm/xe/xe_guc_pc.c | 2 ++ drivers/gpu/drm/xe/xe_guc_submit.c | 2 ++ 8 files changed, 11 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 1e083dda0679..0a278df6a97f 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -12,6 +12,7 @@ #include #include +#include "abi/guc_actions_abi.h" #include "xe_bo.h" #include "xe_gt.h" #include "xe_gt_tlb_invalidation.h" diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index a28f31c05b1b..7eef23a00d77 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -5,6 +5,7 @@ #include "xe_gt_tlb_invalidation.h" +#include "abi/guc_actions_abi.h" #include "xe_device.h" #include "xe_gt.h" #include "xe_guc.h" diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 08142d8ee052..e6f680efb29e 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -7,6 +7,8 @@ #include +#include "abi/guc_actions_abi.h" +#include "abi/guc_errors_abi.h" #include "generated/xe_wa_oob.h" #include "regs/xe_gt_regs.h" #include "regs/xe_guc_regs.h" diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index c44e75074695..6295d916e39f 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -11,6 +11,8 @@ #include +#include "abi/guc_actions_abi.h" +#include "abi/guc_klvs_abi.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_gt.h" diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h index 4216a6d9e478..4dd5a88a7826 100644 --- a/drivers/gpu/drm/xe/xe_guc_fwif.h +++ b/drivers/gpu/drm/xe/xe_guc_fwif.h @@ -8,13 +8,7 @@ #include -#include "abi/guc_actions_abi.h" -#include "abi/guc_actions_slpc_abi.h" -#include "abi/guc_communication_ctb_abi.h" -#include "abi/guc_communication_mmio_abi.h" -#include "abi/guc_errors_abi.h" #include "abi/guc_klvs_abi.h" -#include "abi/guc_messages_abi.h" #define G2H_LEN_DW_SCHED_CONTEXT_MODE_SET 4 #define G2H_LEN_DW_DEREGISTER_CONTEXT 3 diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.c b/drivers/gpu/drm/xe/xe_guc_hwconfig.c index 57d325ec8ce3..98bb9bb30705 100644 --- a/drivers/gpu/drm/xe/xe_guc_hwconfig.c +++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.c @@ -7,6 +7,7 @@ #include +#include "abi/guc_actions_abi.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_gt.h" diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index e9dd6c3d750b..2919c6aea403 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -9,6 +9,8 @@ #include +#include "abi/guc_actions_abi.h" +#include "abi/guc_actions_slpc_abi.h" #include "regs/xe_gt_regs.h" #include "regs/xe_regs.h" #include "xe_bo.h" diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 32c234d753fd..ad5e19ecd33c 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -13,6 +13,8 @@ #include +#include "abi/guc_actions_abi.h" +#include "abi/guc_klvs_abi.h" #include "regs/xe_lrc_layout.h" #include "xe_assert.h" #include "xe_devcoredump.h" -- cgit v1.2.3 From c550f64f082b9da305ab7d07b8716389a80b641a Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Fri, 17 Nov 2023 18:06:16 -0800 Subject: drm/xe: Rename xe_gt_idle_sysfs to xe_gt_idle Prep this file to contain C6 toggling as well instead of just sysfs related stuff. Reviewed-by: Rodrigo Vivi Signed-off-by: Vinay Belgaumkar Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Makefile | 2 +- drivers/gpu/drm/xe/xe_gt.c | 2 +- drivers/gpu/drm/xe/xe_gt_idle.c | 168 ++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_gt_idle.h | 13 +++ drivers/gpu/drm/xe/xe_gt_idle_sysfs.c | 168 ---------------------------- drivers/gpu/drm/xe/xe_gt_idle_sysfs.h | 13 --- drivers/gpu/drm/xe/xe_gt_idle_sysfs_types.h | 38 ------- drivers/gpu/drm/xe/xe_gt_idle_types.h | 38 +++++++ drivers/gpu/drm/xe/xe_gt_types.h | 2 +- 9 files changed, 222 insertions(+), 222 deletions(-) create mode 100644 drivers/gpu/drm/xe/xe_gt_idle.c create mode 100644 drivers/gpu/drm/xe/xe_gt_idle.h delete mode 100644 drivers/gpu/drm/xe/xe_gt_idle_sysfs.c delete mode 100644 drivers/gpu/drm/xe/xe_gt_idle_sysfs.h delete mode 100644 drivers/gpu/drm/xe/xe_gt_idle_sysfs_types.h create mode 100644 drivers/gpu/drm/xe/xe_gt_idle_types.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 161e8ead9114..b8ad42fcbea2 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -72,7 +72,7 @@ xe-y += xe_bb.o \ xe_gt.o \ xe_gt_clock.o \ xe_gt_debugfs.o \ - xe_gt_idle_sysfs.o \ + xe_gt_idle.o \ xe_gt_mcr.o \ xe_gt_pagefault.o \ xe_gt_sysfs.o \ diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 00193b02a7e5..8a6fb9641cd6 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -23,7 +23,7 @@ #include "xe_ggtt.h" #include "xe_gsc.h" #include "xe_gt_clock.h" -#include "xe_gt_idle_sysfs.h" +#include "xe_gt_idle.h" #include "xe_gt_mcr.h" #include "xe_gt_pagefault.h" #include "xe_gt_printk.h" diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c new file mode 100644 index 000000000000..e5b7e5d38e76 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_idle.c @@ -0,0 +1,168 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include + +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_gt_idle.h" +#include "xe_gt_sysfs.h" +#include "xe_guc_pc.h" + +/** + * DOC: Xe GT Idle + * + * Contains functions that init GT idle features like C6 + * + * device/gt#/gtidle/name - name of the state + * device/gt#/gtidle/idle_residency_ms - Provides residency of the idle state in ms + * device/gt#/gtidle/idle_status - Provides current idle state + */ + +static struct xe_gt_idle *dev_to_gtidle(struct device *dev) +{ + struct kobject *kobj = &dev->kobj; + + return &kobj_to_gt(kobj->parent)->gtidle; +} + +static struct xe_gt *gtidle_to_gt(struct xe_gt_idle *gtidle) +{ + return container_of(gtidle, struct xe_gt, gtidle); +} + +static struct xe_guc_pc *gtidle_to_pc(struct xe_gt_idle *gtidle) +{ + return >idle_to_gt(gtidle)->uc.guc.pc; +} + +static const char *gt_idle_state_to_string(enum xe_gt_idle_state state) +{ + switch (state) { + case GT_IDLE_C0: + return "gt-c0"; + case GT_IDLE_C6: + return "gt-c6"; + default: + return "unknown"; + } +} + +static u64 get_residency_ms(struct xe_gt_idle *gtidle, u64 cur_residency) +{ + u64 delta, overflow_residency, prev_residency; + + overflow_residency = BIT_ULL(32); + + /* + * Counter wrap handling + * Store previous hw counter values for counter wrap-around handling + * Relying on sufficient frequency of queries otherwise counters can still wrap. + */ + prev_residency = gtidle->prev_residency; + gtidle->prev_residency = cur_residency; + + /* delta */ + if (cur_residency >= prev_residency) + delta = cur_residency - prev_residency; + else + delta = cur_residency + (overflow_residency - prev_residency); + + /* Add delta to extended raw driver copy of idle residency */ + cur_residency = gtidle->cur_residency + delta; + gtidle->cur_residency = cur_residency; + + /* residency multiplier in ns, convert to ms */ + cur_residency = mul_u64_u32_div(cur_residency, gtidle->residency_multiplier, 1e6); + + return cur_residency; +} + +static ssize_t name_show(struct device *dev, + struct device_attribute *attr, char *buff) +{ + struct xe_gt_idle *gtidle = dev_to_gtidle(dev); + + return sysfs_emit(buff, "%s\n", gtidle->name); +} +static DEVICE_ATTR_RO(name); + +static ssize_t idle_status_show(struct device *dev, + struct device_attribute *attr, char *buff) +{ + struct xe_gt_idle *gtidle = dev_to_gtidle(dev); + struct xe_guc_pc *pc = gtidle_to_pc(gtidle); + enum xe_gt_idle_state state; + + state = gtidle->idle_status(pc); + + return sysfs_emit(buff, "%s\n", gt_idle_state_to_string(state)); +} +static DEVICE_ATTR_RO(idle_status); + +static ssize_t idle_residency_ms_show(struct device *dev, + struct device_attribute *attr, char *buff) +{ + struct xe_gt_idle *gtidle = dev_to_gtidle(dev); + struct xe_guc_pc *pc = gtidle_to_pc(gtidle); + u64 residency; + + residency = gtidle->idle_residency(pc); + return sysfs_emit(buff, "%llu\n", get_residency_ms(gtidle, residency)); +} +static DEVICE_ATTR_RO(idle_residency_ms); + +static const struct attribute *gt_idle_attrs[] = { + &dev_attr_name.attr, + &dev_attr_idle_status.attr, + &dev_attr_idle_residency_ms.attr, + NULL, +}; + +static void gt_idle_sysfs_fini(struct drm_device *drm, void *arg) +{ + struct kobject *kobj = arg; + + sysfs_remove_files(kobj, gt_idle_attrs); + kobject_put(kobj); +} + +void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle) +{ + struct xe_gt *gt = gtidle_to_gt(gtidle); + struct xe_device *xe = gt_to_xe(gt); + struct kobject *kobj; + int err; + + kobj = kobject_create_and_add("gtidle", gt->sysfs); + if (!kobj) { + drm_warn(&xe->drm, "%s failed, err: %d\n", __func__, -ENOMEM); + return; + } + + if (xe_gt_is_media_type(gt)) { + sprintf(gtidle->name, "gt%d-mc\n", gt->info.id); + gtidle->idle_residency = xe_guc_pc_mc6_residency; + } else { + sprintf(gtidle->name, "gt%d-rc\n", gt->info.id); + gtidle->idle_residency = xe_guc_pc_rc6_residency; + } + + /* Multiplier for Residency counter in units of 1.28us */ + gtidle->residency_multiplier = 1280; + gtidle->idle_status = xe_guc_pc_c_status; + + err = sysfs_create_files(kobj, gt_idle_attrs); + if (err) { + kobject_put(kobj); + drm_warn(&xe->drm, "failed to register gtidle sysfs, err: %d\n", err); + return; + } + + err = drmm_add_action_or_reset(&xe->drm, gt_idle_sysfs_fini, kobj); + if (err) + drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n", + __func__, err); +} diff --git a/drivers/gpu/drm/xe/xe_gt_idle.h b/drivers/gpu/drm/xe/xe_gt_idle.h new file mode 100644 index 000000000000..9b36bf7db3a7 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_idle.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GT_IDLE_H_ +#define _XE_GT_IDLE_H_ + +#include "xe_gt_idle_types.h" + +void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle); + +#endif /* _XE_GT_IDLE_H_ */ diff --git a/drivers/gpu/drm/xe/xe_gt_idle_sysfs.c b/drivers/gpu/drm/xe/xe_gt_idle_sysfs.c deleted file mode 100644 index 8df9840811cd..000000000000 --- a/drivers/gpu/drm/xe/xe_gt_idle_sysfs.c +++ /dev/null @@ -1,168 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2023 Intel Corporation - */ - -#include - -#include "xe_device.h" -#include "xe_gt.h" -#include "xe_gt_idle_sysfs.h" -#include "xe_gt_sysfs.h" -#include "xe_guc_pc.h" - -/** - * DOC: Xe GT Idle - * - * Provides sysfs entries for idle properties of GT - * - * device/gt#/gtidle/name - name of the state - * device/gt#/gtidle/idle_residency_ms - Provides residency of the idle state in ms - * device/gt#/gtidle/idle_status - Provides current idle state - */ - -static struct xe_gt_idle *dev_to_gtidle(struct device *dev) -{ - struct kobject *kobj = &dev->kobj; - - return &kobj_to_gt(kobj->parent)->gtidle; -} - -static struct xe_gt *gtidle_to_gt(struct xe_gt_idle *gtidle) -{ - return container_of(gtidle, struct xe_gt, gtidle); -} - -static struct xe_guc_pc *gtidle_to_pc(struct xe_gt_idle *gtidle) -{ - return >idle_to_gt(gtidle)->uc.guc.pc; -} - -static const char *gt_idle_state_to_string(enum xe_gt_idle_state state) -{ - switch (state) { - case GT_IDLE_C0: - return "gt-c0"; - case GT_IDLE_C6: - return "gt-c6"; - default: - return "unknown"; - } -} - -static u64 get_residency_ms(struct xe_gt_idle *gtidle, u64 cur_residency) -{ - u64 delta, overflow_residency, prev_residency; - - overflow_residency = BIT_ULL(32); - - /* - * Counter wrap handling - * Store previous hw counter values for counter wrap-around handling - * Relying on sufficient frequency of queries otherwise counters can still wrap. - */ - prev_residency = gtidle->prev_residency; - gtidle->prev_residency = cur_residency; - - /* delta */ - if (cur_residency >= prev_residency) - delta = cur_residency - prev_residency; - else - delta = cur_residency + (overflow_residency - prev_residency); - - /* Add delta to extended raw driver copy of idle residency */ - cur_residency = gtidle->cur_residency + delta; - gtidle->cur_residency = cur_residency; - - /* residency multiplier in ns, convert to ms */ - cur_residency = mul_u64_u32_div(cur_residency, gtidle->residency_multiplier, 1e6); - - return cur_residency; -} - -static ssize_t name_show(struct device *dev, - struct device_attribute *attr, char *buff) -{ - struct xe_gt_idle *gtidle = dev_to_gtidle(dev); - - return sysfs_emit(buff, "%s\n", gtidle->name); -} -static DEVICE_ATTR_RO(name); - -static ssize_t idle_status_show(struct device *dev, - struct device_attribute *attr, char *buff) -{ - struct xe_gt_idle *gtidle = dev_to_gtidle(dev); - struct xe_guc_pc *pc = gtidle_to_pc(gtidle); - enum xe_gt_idle_state state; - - state = gtidle->idle_status(pc); - - return sysfs_emit(buff, "%s\n", gt_idle_state_to_string(state)); -} -static DEVICE_ATTR_RO(idle_status); - -static ssize_t idle_residency_ms_show(struct device *dev, - struct device_attribute *attr, char *buff) -{ - struct xe_gt_idle *gtidle = dev_to_gtidle(dev); - struct xe_guc_pc *pc = gtidle_to_pc(gtidle); - u64 residency; - - residency = gtidle->idle_residency(pc); - return sysfs_emit(buff, "%llu\n", get_residency_ms(gtidle, residency)); -} -static DEVICE_ATTR_RO(idle_residency_ms); - -static const struct attribute *gt_idle_attrs[] = { - &dev_attr_name.attr, - &dev_attr_idle_status.attr, - &dev_attr_idle_residency_ms.attr, - NULL, -}; - -static void gt_idle_sysfs_fini(struct drm_device *drm, void *arg) -{ - struct kobject *kobj = arg; - - sysfs_remove_files(kobj, gt_idle_attrs); - kobject_put(kobj); -} - -void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle) -{ - struct xe_gt *gt = gtidle_to_gt(gtidle); - struct xe_device *xe = gt_to_xe(gt); - struct kobject *kobj; - int err; - - kobj = kobject_create_and_add("gtidle", gt->sysfs); - if (!kobj) { - drm_warn(&xe->drm, "%s failed, err: %d\n", __func__, -ENOMEM); - return; - } - - if (xe_gt_is_media_type(gt)) { - sprintf(gtidle->name, "gt%d-mc\n", gt->info.id); - gtidle->idle_residency = xe_guc_pc_mc6_residency; - } else { - sprintf(gtidle->name, "gt%d-rc\n", gt->info.id); - gtidle->idle_residency = xe_guc_pc_rc6_residency; - } - - /* Multiplier for Residency counter in units of 1.28us */ - gtidle->residency_multiplier = 1280; - gtidle->idle_status = xe_guc_pc_c_status; - - err = sysfs_create_files(kobj, gt_idle_attrs); - if (err) { - kobject_put(kobj); - drm_warn(&xe->drm, "failed to register gtidle sysfs, err: %d\n", err); - return; - } - - err = drmm_add_action_or_reset(&xe->drm, gt_idle_sysfs_fini, kobj); - if (err) - drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n", - __func__, err); -} diff --git a/drivers/gpu/drm/xe/xe_gt_idle_sysfs.h b/drivers/gpu/drm/xe/xe_gt_idle_sysfs.h deleted file mode 100644 index b0973f96c7ab..000000000000 --- a/drivers/gpu/drm/xe/xe_gt_idle_sysfs.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2023 Intel Corporation - */ - -#ifndef _XE_GT_IDLE_SYSFS_H_ -#define _XE_GT_IDLE_SYSFS_H_ - -#include "xe_gt_idle_sysfs_types.h" - -void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle); - -#endif /* _XE_GT_IDLE_SYSFS_H_ */ diff --git a/drivers/gpu/drm/xe/xe_gt_idle_sysfs_types.h b/drivers/gpu/drm/xe/xe_gt_idle_sysfs_types.h deleted file mode 100644 index f99b447534f3..000000000000 --- a/drivers/gpu/drm/xe/xe_gt_idle_sysfs_types.h +++ /dev/null @@ -1,38 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2023 Intel Corporation - */ - -#ifndef _XE_GT_IDLE_SYSFS_TYPES_H_ -#define _XE_GT_IDLE_SYSFS_TYPES_H_ - -#include - -struct xe_guc_pc; - -/* States of GT Idle */ -enum xe_gt_idle_state { - GT_IDLE_C0, - GT_IDLE_C6, - GT_IDLE_UNKNOWN, -}; - -/** - * struct xe_gt_idle - A struct that contains idle properties based of gt - */ -struct xe_gt_idle { - /** @name: name */ - char name[16]; - /** @residency_multiplier: residency multiplier in ns */ - u32 residency_multiplier; - /** @cur_residency: raw driver copy of idle residency */ - u64 cur_residency; - /** @prev_residency: previous residency counter */ - u64 prev_residency; - /** @idle_status: get the current idle state */ - enum xe_gt_idle_state (*idle_status)(struct xe_guc_pc *pc); - /** @idle_residency: get idle residency counter */ - u64 (*idle_residency)(struct xe_guc_pc *pc); -}; - -#endif /* _XE_GT_IDLE_SYSFS_TYPES_H_ */ diff --git a/drivers/gpu/drm/xe/xe_gt_idle_types.h b/drivers/gpu/drm/xe/xe_gt_idle_types.h new file mode 100644 index 000000000000..f99b447534f3 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_idle_types.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GT_IDLE_SYSFS_TYPES_H_ +#define _XE_GT_IDLE_SYSFS_TYPES_H_ + +#include + +struct xe_guc_pc; + +/* States of GT Idle */ +enum xe_gt_idle_state { + GT_IDLE_C0, + GT_IDLE_C6, + GT_IDLE_UNKNOWN, +}; + +/** + * struct xe_gt_idle - A struct that contains idle properties based of gt + */ +struct xe_gt_idle { + /** @name: name */ + char name[16]; + /** @residency_multiplier: residency multiplier in ns */ + u32 residency_multiplier; + /** @cur_residency: raw driver copy of idle residency */ + u64 cur_residency; + /** @prev_residency: previous residency counter */ + u64 prev_residency; + /** @idle_status: get the current idle state */ + enum xe_gt_idle_state (*idle_status)(struct xe_guc_pc *pc); + /** @idle_residency: get idle residency counter */ + u64 (*idle_residency)(struct xe_guc_pc *pc); +}; + +#endif /* _XE_GT_IDLE_SYSFS_TYPES_H_ */ diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index d3f2793684e2..a96ee7d028aa 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -7,7 +7,7 @@ #define _XE_GT_TYPES_H_ #include "xe_force_wake_types.h" -#include "xe_gt_idle_sysfs_types.h" +#include "xe_gt_idle_types.h" #include "xe_hw_engine_types.h" #include "xe_hw_fence_types.h" #include "xe_reg_sr_types.h" -- cgit v1.2.3 From f1cb5f647e8959a1034941d85b311d7485a7095f Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Fri, 17 Nov 2023 16:02:01 -0800 Subject: drm/xe: Add skip_guc_pc flag This flag can be used to disable GuC based power management. This could be used for debug or comparison to host based C6. v2: Fix missing definition Reviewed-by: Rodrigo Vivi Signed-off-by: Vinay Belgaumkar Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device_types.h | 2 ++ drivers/gpu/drm/xe/xe_pci.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index ea53f9dfc199..c4b6aa8fcec1 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -264,6 +264,8 @@ struct xe_device { u8 supports_mmio_ext:1; /** @has_heci_gscfi: device has heci gscfi */ u8 has_heci_gscfi:1; + /** @skip_guc_pc: Skip GuC based PM feature init */ + u8 skip_guc_pc:1; #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY) struct { diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index bbf2f5b64bac..1c4f9081e84c 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -64,6 +64,7 @@ struct xe_device_desc { u8 has_sriov:1; u8 bypass_mtcfg:1; u8 supports_mmio_ext:1; + u8 skip_guc_pc:1; }; __diag_push(); @@ -591,6 +592,7 @@ static int xe_info_init(struct xe_device *xe, xe->info.has_asid = graphics_desc->has_asid; xe->info.has_flat_ccs = graphics_desc->has_flat_ccs; xe->info.has_range_tlb_invalidation = graphics_desc->has_range_tlb_invalidation; + xe->info.skip_guc_pc = desc->skip_guc_pc; xe->info.enable_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) && xe_modparam.enable_display && -- cgit v1.2.3 From 975e4a3795d4f1373be538177525c0b714e0e65e Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Fri, 17 Nov 2023 16:14:49 -0800 Subject: drm/xe: Manually setup C6 when skip_guc_pc is set Skip the init/start/stop GuC PC functions and toggle C6 using register writes instead. Also request max possible frequency as dynamic freq management is disabled. v2: Fix compile warning Reviewed-by: Rodrigo Vivi Signed-off-by: Vinay Belgaumkar Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 4 ++++ drivers/gpu/drm/xe/xe_gt_idle.c | 24 ++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_gt_idle.h | 4 ++++ drivers/gpu/drm/xe/xe_guc_pc.c | 35 ++++++++++++++++++++++++++++++++--- 4 files changed, 64 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 18b13224480d..d318ec0efd7d 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -272,7 +272,11 @@ #define RPSWCTL_ENABLE REG_FIELD_PREP(RPSWCTL_MASK, 2) #define RPSWCTL_DISABLE REG_FIELD_PREP(RPSWCTL_MASK, 0) #define RC_CONTROL XE_REG(0xa090) +#define RC_CTL_HW_ENABLE REG_BIT(31) +#define RC_CTL_TO_MODE REG_BIT(28) +#define RC_CTL_RC6_ENABLE REG_BIT(18) #define RC_STATE XE_REG(0xa094) +#define RC_IDLE_HYSTERSIS XE_REG(0xa0ac) #define PMINTRMSK XE_REG(0xa168) #define PMINTR_DISABLE_REDIRECT_TO_GUC REG_BIT(31) diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c index e5b7e5d38e76..9358f7336889 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.c +++ b/drivers/gpu/drm/xe/xe_gt_idle.c @@ -10,6 +10,8 @@ #include "xe_gt_idle.h" #include "xe_gt_sysfs.h" #include "xe_guc_pc.h" +#include "regs/xe_gt_regs.h" +#include "xe_mmio.h" /** * DOC: Xe GT Idle @@ -166,3 +168,25 @@ void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle) drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n", __func__, err); } + +void xe_gt_idle_enable_c6(struct xe_gt *gt) +{ + xe_device_assert_mem_access(gt_to_xe(gt)); + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); + + /* Units of 1280 ns for a total of 5s */ + xe_mmio_write32(gt, RC_IDLE_HYSTERSIS, 0x3B9ACA); + /* Enable RC6 */ + xe_mmio_write32(gt, RC_CONTROL, + RC_CTL_HW_ENABLE | RC_CTL_TO_MODE | RC_CTL_RC6_ENABLE); +} + +void xe_gt_idle_disable_c6(struct xe_gt *gt) +{ + xe_device_assert_mem_access(gt_to_xe(gt)); + xe_force_wake_assert_held(gt_to_fw(gt), XE_FORCEWAKE_ALL); + + xe_mmio_write32(gt, PG_ENABLE, 0); + xe_mmio_write32(gt, RC_CONTROL, 0); + xe_mmio_write32(gt, RC_STATE, 0); +} diff --git a/drivers/gpu/drm/xe/xe_gt_idle.h b/drivers/gpu/drm/xe/xe_gt_idle.h index 9b36bf7db3a7..69280fd16b03 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.h +++ b/drivers/gpu/drm/xe/xe_gt_idle.h @@ -8,6 +8,10 @@ #include "xe_gt_idle_types.h" +struct xe_gt; + void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle); +void xe_gt_idle_enable_c6(struct xe_gt *gt); +void xe_gt_idle_disable_c6(struct xe_gt *gt); #endif /* _XE_GT_IDLE_H_ */ diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 2919c6aea403..1943893a3fd7 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -16,6 +16,7 @@ #include "xe_bo.h" #include "xe_device.h" #include "xe_gt.h" +#include "xe_gt_idle.h" #include "xe_gt_sysfs.h" #include "xe_gt_types.h" #include "xe_guc_ct.h" @@ -869,13 +870,24 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) xe_device_mem_access_get(pc_to_xe(pc)); - memset(pc->bo->vmap.vaddr, 0, size); - slpc_shared_data_write(pc, header.size, size); - ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); if (ret) goto out_fail_force_wake; + if (xe->info.skip_guc_pc) { + if (xe->info.platform != XE_PVC) + xe_gt_idle_enable_c6(gt); + + /* Request max possible since dynamic freq mgmt is not enabled */ + pc_set_cur_freq(pc, UINT_MAX); + + ret = 0; + goto out; + } + + memset(pc->bo->vmap.vaddr, 0, size); + slpc_shared_data_write(pc, header.size, size); + ret = pc_action_reset(pc); if (ret) goto out; @@ -911,10 +923,17 @@ out_fail_force_wake: */ int xe_guc_pc_stop(struct xe_guc_pc *pc) { + struct xe_device *xe = pc_to_xe(pc); int ret; xe_device_mem_access_get(pc_to_xe(pc)); + if (xe->info.skip_guc_pc) { + xe_gt_idle_disable_c6(pc_to_gt(pc)); + ret = 0; + goto out; + } + mutex_lock(&pc->freq_lock); pc->freq_ready = false; mutex_unlock(&pc->freq_lock); @@ -935,6 +954,13 @@ out: void xe_guc_pc_fini(struct xe_guc_pc *pc) { + struct xe_device *xe = pc_to_xe(pc); + + if (xe->info.skip_guc_pc) { + xe_gt_idle_disable_c6(pc_to_gt(pc)); + return; + } + XE_WARN_ON(xe_guc_pc_gucrc_disable(pc)); XE_WARN_ON(xe_guc_pc_stop(pc)); sysfs_remove_files(pc_to_gt(pc)->sysfs, pc_attrs); @@ -955,6 +981,9 @@ int xe_guc_pc_init(struct xe_guc_pc *pc) u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data)); int err; + if (xe->info.skip_guc_pc) + return 0; + mutex_init(&pc->freq_lock); bo = xe_bo_create_pin_map(xe, tile, NULL, size, -- cgit v1.2.3 From 8e35780233cee1b2d257e6adf4d82b08ded15e88 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 29 Nov 2023 10:37:07 +0000 Subject: drm/xe/mocs: update MOCS table for xe2 Looks like there were some changes at some point here for preferring L4 uncached for some of the indexes. Triple checked the PAT settings also, but that looks all correct as per current BSpec. BSpec: 71582 Signed-off-by: Matthew Auld Cc: Lucas De Marchi Cc: Matt Roper Reviewed-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_mocs.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index 12a6d39fcd4a..ef79552e4f2f 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -366,9 +366,9 @@ static const struct xe_mocs_entry mtl_mocs_desc[] = { static const struct xe_mocs_entry xe2_mocs_table[] = { /* Defer to PAT */ - MOCS_ENTRY(0, XE2_L3_0_WB | L4_0_WB, 0), - /* Cached L3 + L4 */ - MOCS_ENTRY(1, IG_PAT | XE2_L3_0_WB | L4_0_WB, 0), + MOCS_ENTRY(0, XE2_L3_0_WB | L4_3_UC, 0), + /* Cached L3, Uncached L4 */ + MOCS_ENTRY(1, IG_PAT | XE2_L3_0_WB | L4_3_UC, 0), /* Uncached L3, Cached L4 */ MOCS_ENTRY(2, IG_PAT | XE2_L3_3_UC | L4_0_WB, 0), /* Uncached L3 + L4 */ @@ -390,8 +390,8 @@ static unsigned int get_mocs_settings(struct xe_device *xe, info->table = xe2_mocs_table; info->n_entries = XE2_NUM_MOCS_ENTRIES; info->uc_index = 3; - info->wb_index = 1; - info->unused_entries_index = 1; + info->wb_index = 4; + info->unused_entries_index = 4; break; case XE_PVC: info->size = ARRAY_SIZE(pvc_mocs_desc); -- cgit v1.2.3 From 082802a3ee09e764bc1513988d6f5889712fe88f Mon Sep 17 00:00:00 2001 From: Koby Elbaz Date: Tue, 28 Nov 2023 18:53:15 +0200 Subject: drm/xe: add skip_pcode flag Per device, set this flag to enable access to the PCODE uC or to skip it. Signed-off-by: Koby Elbaz Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device_types.h | 2 ++ drivers/gpu/drm/xe/xe_pci.c | 2 ++ drivers/gpu/drm/xe/xe_pcode.c | 9 +++++++++ 3 files changed, 13 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index c4b6aa8fcec1..1a95b1587c86 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -260,6 +260,8 @@ struct xe_device { u8 enable_display:1; /** @bypass_mtcfg: Bypass Multi-Tile configuration from MTCFG register */ u8 bypass_mtcfg:1; + /** @skip_pcode: skip access to PCODE uC */ + u8 skip_pcode:1; /** @supports_mmio_ext: supports MMIO extension/s */ u8 supports_mmio_ext:1; /** @has_heci_gscfi: device has heci gscfi */ diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 1c4f9081e84c..57eecaac53b5 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -63,6 +63,7 @@ struct xe_device_desc { u8 has_llc:1; u8 has_sriov:1; u8 bypass_mtcfg:1; + u8 skip_pcode:1; u8 supports_mmio_ext:1; u8 skip_guc_pc:1; }; @@ -581,6 +582,7 @@ static int xe_info_init(struct xe_device *xe, xe->info.has_llc = desc->has_llc; xe->info.has_sriov = desc->has_sriov; xe->info.bypass_mtcfg = desc->bypass_mtcfg; + xe->info.skip_pcode = desc->skip_pcode; xe->info.supports_mmio_ext = desc->supports_mmio_ext; xe->info.tile_mmio_ext_size = graphics_desc->tile_mmio_ext_size; diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c index 4a240acf7625..b324dc2a5deb 100644 --- a/drivers/gpu/drm/xe/xe_pcode.c +++ b/drivers/gpu/drm/xe/xe_pcode.c @@ -61,6 +61,9 @@ static int pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1, { int err; + if (gt_to_xe(gt)->info.skip_pcode) + return 0; + lockdep_assert_held(>->pcode.lock); if ((xe_mmio_read32(gt, PCODE_MAILBOX) & PCODE_READY) != 0) @@ -249,6 +252,9 @@ int xe_pcode_init(struct xe_gt *gt) int timeout_us = 180000000; /* 3 min */ int ret; + if (gt_to_xe(gt)->info.skip_pcode) + return 0; + if (!IS_DGFX(gt_to_xe(gt))) return 0; @@ -280,6 +286,9 @@ int xe_pcode_probe(struct xe_gt *gt) { drmm_mutex_init(>_to_xe(gt)->drm, >->pcode.lock); + if (gt_to_xe(gt)->info.skip_pcode) + return 0; + if (!IS_DGFX(gt_to_xe(gt))) return 0; -- cgit v1.2.3 From 0c923a68abbfe6d7b4fd2ee37c237aba9d870eaf Mon Sep 17 00:00:00 2001 From: Koby Elbaz Date: Tue, 28 Nov 2023 18:53:16 +0200 Subject: drm/xe: rename bypass_mtcfg to skip_mtcfg Per device, set this flag to access the MTCFG register or to skip it. This is done to standardise Xe driver naming if an access to any HW should be avoided. Signed-off-by: Koby Elbaz Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device_types.h | 4 ++-- drivers/gpu/drm/xe/xe_mmio.c | 2 +- drivers/gpu/drm/xe/xe_pci.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 1a95b1587c86..9a212dbdb8a4 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -258,8 +258,8 @@ struct xe_device { u8 has_sriov:1; /** @enable_display: display enabled */ u8 enable_display:1; - /** @bypass_mtcfg: Bypass Multi-Tile configuration from MTCFG register */ - u8 bypass_mtcfg:1; + /** @skip_mtcfg: skip Multi-Tile configuration from MTCFG register */ + u8 skip_mtcfg:1; /** @skip_pcode: skip access to PCODE uC */ u8 skip_pcode:1; /** @supports_mmio_ext: supports MMIO extension/s */ diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 0f846272e39c..883610fc0abf 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -330,7 +330,7 @@ static void xe_mmio_probe_tiles(struct xe_device *xe) if (tile_count == 1) goto add_mmio_ext; - if (!xe->info.bypass_mtcfg) { + if (!xe->info.skip_mtcfg) { mtcfg = xe_mmio_read64_2x32(gt, XEHP_MTCFG_ADDR); tile_count = REG_FIELD_GET(TILE_COUNT, mtcfg) + 1; if (tile_count < xe->info.tile_count) { diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 57eecaac53b5..1f3b2ce7c044 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -62,7 +62,7 @@ struct xe_device_desc { u8 has_llc:1; u8 has_sriov:1; - u8 bypass_mtcfg:1; + u8 skip_mtcfg:1; u8 skip_pcode:1; u8 supports_mmio_ext:1; u8 skip_guc_pc:1; @@ -581,7 +581,7 @@ static int xe_info_init(struct xe_device *xe, xe->info.media_name = media_desc ? media_desc->name : "none"; xe->info.has_llc = desc->has_llc; xe->info.has_sriov = desc->has_sriov; - xe->info.bypass_mtcfg = desc->bypass_mtcfg; + xe->info.skip_mtcfg = desc->skip_mtcfg; xe->info.skip_pcode = desc->skip_pcode; xe->info.supports_mmio_ext = desc->supports_mmio_ext; xe->info.tile_mmio_ext_size = graphics_desc->tile_mmio_ext_size; -- cgit v1.2.3 From 33acfc7172ab7f9690536710f0938b787f16a46e Mon Sep 17 00:00:00 2001 From: Michał Winiarski Date: Wed, 29 Nov 2023 22:44:56 +0100 Subject: drm/xe: Fix header guard warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Additional underscore in the header guard causes the build to fail with: drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h:6:9: error: '_XE_ENGINE_CLASS_SYSFS_H_' is used as a header guard here, followed by #define of a different macro [-Werror,-Wheader-guard] Signed-off-by: Michał Winiarski Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20231129214509.1174116-1-michal.winiarski@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h index 60469fde4147..ec5ba673b314 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h +++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h @@ -4,7 +4,7 @@ */ #ifndef _XE_ENGINE_CLASS_SYSFS_H_ -#define _XE_ENGINE_CLASS_SYSFS_H__ +#define _XE_ENGINE_CLASS_SYSFS_H_ #include -- cgit v1.2.3 From 0d29a76c639900747fd33b0774764aa78c9667da Mon Sep 17 00:00:00 2001 From: Michał Winiarski Date: Wed, 29 Nov 2023 22:44:57 +0100 Subject: drm/xe: Skip calling drm_dev_put on probe error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DRM device used by Xe is managed, which means that final ref will be dropped on driver detach. Signed-off-by: Michał Winiarski Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20231129214509.1174116-2-michal.winiarski@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 12 +++++------- drivers/gpu/drm/xe/xe_pci.c | 5 +---- 2 files changed, 6 insertions(+), 11 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 54202623e255..296260f142dc 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -215,11 +215,11 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, xe->drm.anon_inode->i_mapping, xe->drm.vma_offset_manager, false, false); if (WARN_ON(err)) - goto err_put; + goto err; err = drmm_add_action_or_reset(&xe->drm, xe_device_destroy, NULL); if (err) - goto err_put; + goto err; xe->info.devid = pdev->device; xe->info.revid = pdev->revision; @@ -258,18 +258,16 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, if (!xe->ordered_wq || !xe->unordered_wq) { drm_err(&xe->drm, "Failed to allocate xe workqueues\n"); err = -ENOMEM; - goto err_put; + goto err; } err = xe_display_create(xe); if (WARN_ON(err)) - goto err_put; + goto err; return xe; -err_put: - drm_dev_put(&xe->drm); - +err: return ERR_PTR(err); } diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 1f3b2ce7c044..a38fea281eb0 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -706,7 +706,7 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_drvdata(pdev, xe); err = pci_enable_device(pdev); if (err) - goto err_drm_put; + return err; pci_set_master(pdev); @@ -754,9 +754,6 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err_pci_disable: pci_disable_device(pdev); -err_drm_put: - drm_dev_put(&xe->drm); - return err; } -- cgit v1.2.3 From f321ef042e69859536ba6c97b9f25a2a8f761ef9 Mon Sep 17 00:00:00 2001 From: Michał Winiarski Date: Wed, 29 Nov 2023 22:44:58 +0100 Subject: drm/xe: Use managed pci_enable_device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Xe uses devres for most of its driver-lifetime resources, use it for pci device as well. Signed-off-by: Michał Winiarski Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20231129214509.1174116-3-michal.winiarski@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pci.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index a38fea281eb0..6aaa16b15058 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -696,25 +696,26 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (xe_display_driver_probe_defer(pdev)) return -EPROBE_DEFER; + err = pcim_enable_device(pdev); + if (err) + return err; + xe = xe_device_create(pdev, ent); if (IS_ERR(xe)) return PTR_ERR(xe); + pci_set_drvdata(pdev, xe); + xe_pm_assert_unbounded_bridge(xe); subplatform_desc = find_subplatform(xe, desc); - pci_set_drvdata(pdev, xe); - err = pci_enable_device(pdev); - if (err) - return err; - pci_set_master(pdev); xe_sriov_probe_early(xe, desc->has_sriov); err = xe_info_init(xe, desc, subplatform_desc); if (err) - goto err_pci_disable; + return err; xe_display_probe(xe); @@ -745,16 +746,11 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err = xe_device_probe(xe); if (err) - goto err_pci_disable; + return err; xe_pm_init(xe); return 0; - -err_pci_disable: - pci_disable_device(pdev); - - return err; } static void xe_pci_shutdown(struct pci_dev *pdev) -- cgit v1.2.3 From 604f7e7777d663033063886b6a5362d0e6092e3a Mon Sep 17 00:00:00 2001 From: Michał Winiarski Date: Wed, 29 Nov 2023 22:44:59 +0100 Subject: drm/xe/irq: Don't call pci_free_irq_vectors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For devres managed devices, pci_alloc_irq_vectors is also managed (see pci_setup_msi_context for reference). PCI device used by Xe is devres managed (it was enabled with pcim_enable_device), which means that calls to pci_free_irq_vectors are redundant and can be safely removed. Signed-off-by: Michał Winiarski Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20231129214509.1174116-4-michal.winiarski@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_irq.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 25ba5167c1b9..d1f5ba4bb745 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -585,7 +585,6 @@ static void irq_uninstall(struct drm_device *drm, void *arg) irq = pci_irq_vector(pdev, 0); free_irq(irq, xe); - pci_free_irq_vectors(pdev); } int xe_irq_install(struct xe_device *xe) @@ -612,7 +611,7 @@ int xe_irq_install(struct xe_device *xe) err = request_irq(irq, irq_handler, IRQF_SHARED, DRIVER_NAME, xe); if (err < 0) { drm_err(&xe->drm, "Failed to request MSI/MSIX IRQ %d\n", err); - goto free_pci_irq_vectors; + return err; } xe->irq.enabled = true; @@ -627,8 +626,6 @@ int xe_irq_install(struct xe_device *xe) free_irq_handler: free_irq(irq, xe); -free_pci_irq_vectors: - pci_free_irq_vectors(pdev); return err; } -- cgit v1.2.3 From 01c2413a5bc2c66ab54b4aebd3078823a148e69e Mon Sep 17 00:00:00 2001 From: Michał Winiarski Date: Wed, 29 Nov 2023 22:45:00 +0100 Subject: drm/xe: Move xe_set_dma_info outside of MMIO setup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MMIO is going to be setup earlier during probe. Move xe_set_dma_info outside of MMIO setup. Signed-off-by: Michał Winiarski Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20231129214509.1174116-5-michal.winiarski@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 26 ++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_mmio.c | 26 -------------------------- 2 files changed, 26 insertions(+), 26 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 296260f142dc..dcb087108393 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -353,6 +353,28 @@ static void xe_device_sanitize(struct drm_device *drm, void *arg) xe_gt_sanitize(gt); } +static int xe_set_dma_info(struct xe_device *xe) +{ + unsigned int mask_size = xe->info.dma_mask_size; + int err; + + dma_set_max_seg_size(xe->drm.dev, xe_sg_segment_size(xe->drm.dev)); + + err = dma_set_mask(xe->drm.dev, DMA_BIT_MASK(mask_size)); + if (err) + goto mask_err; + + err = dma_set_coherent_mask(xe->drm.dev, DMA_BIT_MASK(mask_size)); + if (err) + goto mask_err; + + return 0; + +mask_err: + drm_err(&xe->drm, "Can't set DMA mask/consistent mask (%d)\n", err); + return err; +} + int xe_device_probe(struct xe_device *xe) { struct xe_tile *tile; @@ -367,6 +389,10 @@ int xe_device_probe(struct xe_device *xe) if (err) return err; + err = xe_set_dma_info(xe); + if (err) + return err; + for_each_tile(tile, xe, id) { err = xe_tile_alloc(tile); if (err) diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 883610fc0abf..43f322cd30a1 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -25,28 +25,6 @@ #define BAR_SIZE_SHIFT 20 -static int xe_set_dma_info(struct xe_device *xe) -{ - unsigned int mask_size = xe->info.dma_mask_size; - int err; - - dma_set_max_seg_size(xe->drm.dev, xe_sg_segment_size(xe->drm.dev)); - - err = dma_set_mask(xe->drm.dev, DMA_BIT_MASK(mask_size)); - if (err) - goto mask_err; - - err = dma_set_coherent_mask(xe->drm.dev, DMA_BIT_MASK(mask_size)); - if (err) - goto mask_err; - - return 0; - -mask_err: - drm_err(&xe->drm, "Can't set DMA mask/consistent mask (%d)\n", err); - return err; -} - static void _resize_bar(struct xe_device *xe, int resno, resource_size_t size) { @@ -431,10 +409,6 @@ int xe_mmio_init(struct xe_device *xe) if (err) return err; - err = xe_set_dma_info(xe); - if (err) - return err; - xe_mmio_probe_tiles(xe); return 0; -- cgit v1.2.3 From f4a0a113f103e23adb4f3ba8a0e02ce4973fdedf Mon Sep 17 00:00:00 2001 From: Michał Winiarski Date: Wed, 29 Nov 2023 22:45:01 +0100 Subject: drm/xe: Move xe_mmio_probe_tiles outside of MMIO setup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MMIO is going to be setup earlier during probe. Move xe_mmio_probe_tiles outside of MMIO setup. Signed-off-by: Michał Winiarski Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20231129214509.1174116-6-michal.winiarski@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 2 ++ drivers/gpu/drm/xe/xe_mmio.c | 4 +--- drivers/gpu/drm/xe/xe_mmio.h | 1 + 3 files changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index dcb087108393..785bf2e610b7 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -403,6 +403,8 @@ int xe_device_probe(struct xe_device *xe) if (err) return err; + xe_mmio_probe_tiles(xe); + err = drmm_add_action_or_reset(&xe->drm, xe_driver_flr_fini, xe); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 43f322cd30a1..ab91be1405c9 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -295,7 +295,7 @@ int xe_mmio_probe_vram(struct xe_device *xe) return 0; } -static void xe_mmio_probe_tiles(struct xe_device *xe) +void xe_mmio_probe_tiles(struct xe_device *xe) { size_t tile_mmio_size = SZ_16M, tile_mmio_ext_size = xe->info.tile_mmio_ext_size; u8 id, tile_count = xe->info.tile_count; @@ -409,8 +409,6 @@ int xe_mmio_init(struct xe_device *xe) if (err) return err; - xe_mmio_probe_tiles(xe); - return 0; } diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h index 218b796629ad..c054c27f6925 100644 --- a/drivers/gpu/drm/xe/xe_mmio.h +++ b/drivers/gpu/drm/xe/xe_mmio.h @@ -21,6 +21,7 @@ struct xe_device; #define LMEM_BAR 2 int xe_mmio_init(struct xe_device *xe); +void xe_mmio_probe_tiles(struct xe_device *xe); static inline u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg) { -- cgit v1.2.3 From a682b6a42d4de68419f23d73afa57fc931fed3c6 Mon Sep 17 00:00:00 2001 From: Brian Welty Date: Mon, 20 Nov 2023 10:44:51 -0800 Subject: drm/xe: Support device page faults on integrated platforms Update xe_migrate_prepare_vm() to use the usm batch buffer even for servicing device page faults on integrated platforms. And as we have no VRAM on integrated platforms, device pagefault handler should not attempt to migrate into VRAM. LNL is first integrated platform to support device pagefaults. Signed-off-by: Brian Welty Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_pagefault.c | 2 +- drivers/gpu/drm/xe/xe_migrate.c | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 0a278df6a97f..ccf5a6671faf 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -106,7 +106,7 @@ static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma, if (err) return err; - if (atomic) { + if (atomic && IS_DGFX(vm->xe)) { if (xe_vma_is_userptr(vma)) { err = -EACCES; return err; diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 4aea748c984b..84e138df0172 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -215,10 +215,13 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, } if (!IS_DGFX(xe)) { - xe_tile_assert(tile, !xe->info.supports_usm); - /* Write out batch too */ m->batch_base_ofs = NUM_PT_SLOTS * XE_PAGE_SIZE; + if (xe->info.supports_usm) { + batch = tile->primary_gt->usm.bb_pool->bo; + m->usm_batch_base_ofs = m->batch_base_ofs; + } + for (i = 0; i < batch->size; i += vm->flags & XE_VM_FLAG_64K ? XE_64K_PAGE_SIZE : XE_PAGE_SIZE) { -- cgit v1.2.3 From 594b46ba0c8239f9531ac23a4c6eae5c0fad4cf3 Mon Sep 17 00:00:00 2001 From: Brian Welty Date: Tue, 21 Nov 2023 12:10:37 -0800 Subject: drm/xe/xe2: Respond to TRTT faults as unsuccessful page fault SW is not expected to handle TRTT faults and should report these as unsuccessful page fault in the reply, such that HW can respond by raising a CAT error. Signed-off-by: Brian Welty Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_pagefault.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index ccf5a6671faf..a5358064a4e0 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -34,6 +34,7 @@ struct pagefault { u8 engine_class; u8 engine_instance; u8 fault_unsuccessful; + bool trva_fault; }; enum access_type { @@ -138,6 +139,10 @@ static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf) int ret = 0; bool atomic; + /* SW isn't expected to handle TRTT faults */ + if (pf->trva_fault) + return -EFAULT; + /* ASID to VM */ mutex_lock(&xe->usm.lock); vm = xa_load(&xe->usm.asid_to_vm, pf->asid); @@ -282,6 +287,7 @@ static bool get_pagefault(struct pf_queue *pf_queue, struct pagefault *pf) (pf_queue->data + pf_queue->head); pf->fault_level = FIELD_GET(PFD_FAULT_LEVEL, desc->dw0); + pf->trva_fault = FIELD_GET(XE2_PFD_TRVA_FAULT, desc->dw0); pf->engine_class = FIELD_GET(PFD_ENG_CLASS, desc->dw0); pf->engine_instance = FIELD_GET(PFD_ENG_INSTANCE, desc->dw0); pf->pdata = FIELD_GET(PFD_PDATA_HI, desc->dw1) << -- cgit v1.2.3 From 0ac3d319cbdd25839c5034da65d57e3f82b53f6c Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Thu, 30 Nov 2023 23:29:41 +0530 Subject: drm/xe/xe2: Add workaround 16020292621 Workaround applies to Graphics 20.04 as part of ring submission V4(MattR): - Rule for engine in oob WA not supported, add explicitly V3(MattR): - Pass hwe and rename API name to hint end of ring work - Use existing RING_NOPID API V2: - Marking this WA for 20.04 instead of 20.00 Reviewed-by: Matt Roper Signed-off-by: Tejas Upadhyay Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gpu_commands.h | 1 + drivers/gpu/drm/xe/xe_ring_ops.c | 20 ++++++++++++++++++++ drivers/gpu/drm/xe/xe_wa_oob.rules | 1 + 3 files changed, 22 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h index 4402f72481dc..f1c5bf203b3d 100644 --- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h +++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h @@ -48,6 +48,7 @@ #define PIPE_CONTROL_TILE_CACHE_FLUSH (1<<28) #define PIPE_CONTROL_AMFS_FLUSH (1<<25) #define PIPE_CONTROL_GLOBAL_GTT_IVB (1<<24) +#define PIPE_CONTROL_LRI_POST_SYNC BIT(23) #define PIPE_CONTROL_STORE_DATA_INDEX (1<<21) #define PIPE_CONTROL_CS_STALL (1<<20) #define PIPE_CONTROL_GLOBAL_SNAPSHOT_RESET (1<<19) diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index 59e0aa2d6a4c..1201e42ef823 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -7,6 +7,7 @@ #include "generated/xe_wa_oob.h" #include "instructions/xe_mi_commands.h" +#include "regs/xe_engine_regs.h" #include "regs/xe_gpu_commands.h" #include "regs/xe_gt_regs.h" #include "regs/xe_lrc_layout.h" @@ -184,6 +185,23 @@ static int emit_render_cache_flush(struct xe_sched_job *job, u32 *dw, int i) return i; } +static int emit_pipe_control_to_ring_end(struct xe_hw_engine *hwe, u32 *dw, int i) +{ + if (hwe->class != XE_ENGINE_CLASS_RENDER) + return i; + + if (XE_WA(hwe->gt, 16020292621)) { + dw[i++] = GFX_OP_PIPE_CONTROL(6); + dw[i++] = PIPE_CONTROL_LRI_POST_SYNC; + dw[i++] = RING_NOPID(hwe->mmio_base).addr; + dw[i++] = 0; + dw[i++] = 0; + dw[i++] = 0; + } + + return i; +} + static int emit_pipe_imm_ggtt(u32 addr, u32 value, bool stall_only, u32 *dw, int i) { @@ -342,6 +360,8 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, i = emit_user_interrupt(dw, i); + i = emit_pipe_control_to_ring_end(job->q->hwe, dw, i); + xe_gt_assert(gt, i <= MAX_JOB_SIZE_DW); xe_lrc_write_ring(lrc, dw, i * sizeof(*dw)); diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index c7b7d40b5d57..727bdc429212 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -21,3 +21,4 @@ 22010954014 PLATFORM(DG2) 14019821291 MEDIA_VERSION_RANGE(1300, 2000) 14015076503 MEDIA_VERSION(1300) +16020292621 GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0) -- cgit v1.2.3 From a754391f9c0e16f7ef82c90210da7a12b00dd70d Mon Sep 17 00:00:00 2001 From: Animesh Manna Date: Fri, 1 Dec 2023 14:44:05 +0530 Subject: drm/xe/dsb: DSB implementation for xe Add xe specific DSB buffer handling methods. v1: Initial version. v2: Add null check after dynamic memory allocation of vma. [Uma] Reviewed-by: Uma Shankar Signed-off-by: Animesh Manna Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/display/xe_dsb_buffer.c | 71 ++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+) create mode 100644 drivers/gpu/drm/xe/display/xe_dsb_buffer.c (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index b8ad42fcbea2..e6f98d807783 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -166,6 +166,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \ display/xe_plane_initial.o \ display/xe_display_rps.o \ display/xe_display_misc.o \ + display/xe_dsb_buffer.o \ display/intel_fbdev_fb.o \ display/intel_fb_bo.o \ display/ext/i915_irq.o \ diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c new file mode 100644 index 000000000000..27c2fb1c002a --- /dev/null +++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2023, Intel Corporation. + */ + +#include "i915_drv.h" +#include "i915_vma.h" +#include "intel_display_types.h" +#include "intel_dsb_buffer.h" +#include "xe_bo.h" +#include "xe_gt.h" + +u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf) +{ + return xe_bo_ggtt_addr(dsb_buf->vma->bo); +} + +void intel_dsb_buffer_write(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val) +{ + iosys_map_wr(&dsb_buf->vma->bo->vmap, idx * 4, u32, val); +} + +u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx) +{ + return iosys_map_rd(&dsb_buf->vma->bo->vmap, idx * 4, u32); +} + +void intel_dsb_buffer_memset(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val, size_t size) +{ + WARN_ON(idx > (dsb_buf->buf_size - size) / sizeof(*dsb_buf->cmd_buf)); + + iosys_map_memset(&dsb_buf->vma->bo->vmap, idx * 4, val, size); +} + +bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *dsb_buf, size_t size) +{ + struct drm_i915_private *i915 = to_i915(crtc->base.dev); + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + + vma = kzalloc(sizeof(*vma), GFP_KERNEL); + if (!vma) + return false; + + obj = xe_bo_create_pin_map(i915, xe_device_get_root_tile(i915), + NULL, PAGE_ALIGN(size), + ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(xe_device_get_root_tile(i915)) | + XE_BO_CREATE_GGTT_BIT); + if (IS_ERR(obj)) { + kfree(vma); + return false; + } + + vma->bo = obj; + dsb_buf->vma = vma; + dsb_buf->buf_size = size; + + return true; +} + +void intel_dsb_buffer_cleanup(struct intel_dsb_buffer *dsb_buf) +{ + xe_bo_unpin_map_no_vm(dsb_buf->vma->bo); + kfree(dsb_buf->vma); +} + +void intel_dsb_buffer_flush_map(struct intel_dsb_buffer *dsb_buf) +{ + /* TODO: add xe specific flush_map() for dsb buffer object. */ +} -- cgit v1.2.3 From 4f5ee007f62a1825cec8140b14b28ef532f570f8 Mon Sep 17 00:00:00 2001 From: Michał Winiarski Date: Tue, 5 Dec 2023 02:32:56 +0100 Subject: drm/xe: Split xe_info_init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Parts of xe_info_init are only dealing with processing driver_data. Extract it into xe_info_init_early to be able to use it earlier during probe. Signed-off-by: Michał Winiarski Reviewed-by: Matthew Brost Reviewed-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_pci.c | 3 +- drivers/gpu/drm/xe/xe_pci.c | 78 +++++++++++++++++++++++---------------- 2 files changed, 48 insertions(+), 33 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c index 306ff8cb35cb..d850dca85af1 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci.c +++ b/drivers/gpu/drm/xe/tests/xe_pci.c @@ -143,7 +143,8 @@ int xe_pci_fake_device_init(struct xe_device *xe, enum xe_platform platform, return -ENODEV; done: - xe_info_init(xe, desc, subplatform_desc); + xe_info_init_early(xe, desc, subplatform_desc); + xe_info_init(xe, desc->graphics, desc->media); return 0; } diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 6aaa16b15058..9e35ebfb3341 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -473,16 +473,13 @@ static void peek_gmdid(struct xe_device *xe, u32 gmdid_offset, u32 *ver, u32 *re * media is optional. */ static void handle_pre_gmdid(struct xe_device *xe, - const struct xe_device_desc *desc, - const struct xe_graphics_desc **graphics, - const struct xe_media_desc **media) + const struct xe_graphics_desc *graphics, + const struct xe_media_desc *media) { - *graphics = desc->graphics; - xe->info.graphics_verx100 = (*graphics)->ver * 100 + (*graphics)->rel; + xe->info.graphics_verx100 = graphics->ver * 100 + graphics->rel; - *media = desc->media; - if (*media) - xe->info.media_verx100 = (*media)->ver * 100 + (*media)->rel; + if (media) + xe->info.media_verx100 = media->ver * 100 + media->rel; } @@ -491,7 +488,6 @@ static void handle_pre_gmdid(struct xe_device *xe, * based on the result. */ static void handle_gmdid(struct xe_device *xe, - const struct xe_device_desc *desc, const struct xe_graphics_desc **graphics, const struct xe_media_desc **media, u32 *graphics_revid, @@ -535,32 +531,59 @@ static void handle_gmdid(struct xe_device *xe, } } +/* + * Initialize device info content that only depends on static driver_data + * passed to the driver at probe time from PCI ID table. + */ +static void xe_info_init_early(struct xe_device *xe, + const struct xe_device_desc *desc, + const struct xe_subplatform_desc *subplatform_desc) +{ + xe->info.platform = desc->platform; + xe->info.subplatform = subplatform_desc ? + subplatform_desc->subplatform : XE_SUBPLATFORM_NONE; + + xe->info.is_dgfx = desc->is_dgfx; + xe->info.has_heci_gscfi = desc->has_heci_gscfi; + xe->info.has_llc = desc->has_llc; + xe->info.has_sriov = desc->has_sriov; + xe->info.skip_mtcfg = desc->skip_mtcfg; + xe->info.skip_pcode = desc->skip_pcode; + xe->info.supports_mmio_ext = desc->supports_mmio_ext; + xe->info.skip_guc_pc = desc->skip_guc_pc; + + xe->info.enable_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) && + xe_modparam.enable_display && + desc->has_display; +} + +/* + * Initialize device info content that does require knowledge about + * graphics / media IP version. + * Make sure that GT / tile structures allocated by the driver match the data + * present in device info. + */ static int xe_info_init(struct xe_device *xe, - const struct xe_device_desc *desc, - const struct xe_subplatform_desc *subplatform_desc) + const struct xe_graphics_desc *graphics_desc, + const struct xe_media_desc *media_desc) { - const struct xe_graphics_desc *graphics_desc = NULL; - const struct xe_media_desc *media_desc = NULL; u32 graphics_gmdid_revid = 0, media_gmdid_revid = 0; struct xe_tile *tile; struct xe_gt *gt; u8 id; - xe->info.platform = desc->platform; - xe->info.subplatform = subplatform_desc ? - subplatform_desc->subplatform : XE_SUBPLATFORM_NONE; - /* * If this platform supports GMD_ID, we'll detect the proper IP * descriptor to use from hardware registers. desc->graphics will only * ever be set at this point for platforms before GMD_ID. In that case * the IP descriptions and versions are simply derived from that. */ - if (desc->graphics) { - handle_pre_gmdid(xe, desc, &graphics_desc, &media_desc); + if (graphics_desc) { + handle_pre_gmdid(xe, graphics_desc, media_desc); xe->info.step = xe_step_pre_gmdid_get(xe); } else { - handle_gmdid(xe, desc, &graphics_desc, &media_desc, + xe_assert(xe, !media_desc); + handle_gmdid(xe, &graphics_desc, &media_desc, &graphics_gmdid_revid, &media_gmdid_revid); xe->info.step = xe_step_gmdid_get(xe, graphics_gmdid_revid, @@ -575,15 +598,8 @@ static int xe_info_init(struct xe_device *xe, if (!graphics_desc) return -ENODEV; - xe->info.is_dgfx = desc->is_dgfx; - xe->info.has_heci_gscfi = desc->has_heci_gscfi; xe->info.graphics_name = graphics_desc->name; xe->info.media_name = media_desc ? media_desc->name : "none"; - xe->info.has_llc = desc->has_llc; - xe->info.has_sriov = desc->has_sriov; - xe->info.skip_mtcfg = desc->skip_mtcfg; - xe->info.skip_pcode = desc->skip_pcode; - xe->info.supports_mmio_ext = desc->supports_mmio_ext; xe->info.tile_mmio_ext_size = graphics_desc->tile_mmio_ext_size; xe->info.dma_mask_size = graphics_desc->dma_mask_size; @@ -594,11 +610,7 @@ static int xe_info_init(struct xe_device *xe, xe->info.has_asid = graphics_desc->has_asid; xe->info.has_flat_ccs = graphics_desc->has_flat_ccs; xe->info.has_range_tlb_invalidation = graphics_desc->has_range_tlb_invalidation; - xe->info.skip_guc_pc = desc->skip_guc_pc; - xe->info.enable_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) && - xe_modparam.enable_display && - desc->has_display; /* * All platforms have at least one primary GT. Any platform with media * version 13 or higher has an additional dedicated media GT. And @@ -711,9 +723,11 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_master(pdev); + xe_info_init_early(xe, desc, subplatform_desc); + xe_sriov_probe_early(xe, desc->has_sriov); - err = xe_info_init(xe, desc, subplatform_desc); + err = xe_info_init(xe, desc->graphics, desc->media); if (err) return err; -- cgit v1.2.3 From 7e4ce4518b906a960122f29e8f3426ca95ebee0a Mon Sep 17 00:00:00 2001 From: Michał Winiarski Date: Tue, 5 Dec 2023 02:32:57 +0100 Subject: drm/xe: Introduce xe_tile_init_early and use at earlier point in probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It also merges the GT (which is part of tile) initialization happening at xe_info_init with allocating other per-tile data structures into a common helper function. Signed-off-by: Michał Winiarski Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 6 ------ drivers/gpu/drm/xe/xe_pci.c | 10 +++++----- drivers/gpu/drm/xe/xe_tile.c | 32 +++++++++++++++++++++++++++++++- drivers/gpu/drm/xe/xe_tile.h | 2 +- 4 files changed, 37 insertions(+), 13 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 785bf2e610b7..5e1f73c8c77a 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -393,12 +393,6 @@ int xe_device_probe(struct xe_device *xe) if (err) return err; - for_each_tile(tile, xe, id) { - err = xe_tile_alloc(tile); - if (err) - return err; - } - err = xe_mmio_init(xe); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 9e35ebfb3341..87257716b93e 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -26,6 +26,7 @@ #include "xe_pm.h" #include "xe_sriov.h" #include "xe_step.h" +#include "xe_tile.h" enum toggle_d3cold { D3COLD_DISABLE, @@ -623,12 +624,11 @@ static int xe_info_init(struct xe_device *xe, xe->info.tile_count = 1 + graphics_desc->max_remote_tiles; for_each_tile(tile, xe, id) { - tile->xe = xe; - tile->id = id; + int err; - tile->primary_gt = xe_gt_alloc(tile); - if (IS_ERR(tile->primary_gt)) - return PTR_ERR(tile->primary_gt); + err = xe_tile_init_early(tile, xe, id); + if (err) + return err; gt = tile->primary_gt; gt->info.id = xe->info.gt_count++; diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c index 131752a57f65..c74a4f840d84 100644 --- a/drivers/gpu/drm/xe/xe_tile.c +++ b/drivers/gpu/drm/xe/xe_tile.c @@ -7,6 +7,7 @@ #include "xe_device.h" #include "xe_ggtt.h" +#include "xe_gt.h" #include "xe_migrate.h" #include "xe_sa.h" #include "xe_tile.h" @@ -80,7 +81,7 @@ * * Returns -ENOMEM if allocations fail, otherwise 0. */ -int xe_tile_alloc(struct xe_tile *tile) +static int xe_tile_alloc(struct xe_tile *tile) { struct drm_device *drm = &tile_to_xe(tile)->drm; @@ -97,6 +98,35 @@ int xe_tile_alloc(struct xe_tile *tile) return 0; } +/** + * xe_tile_init_early - Initialize the tile and primary GT + * @tile: Tile to initialize + * @xe: Parent Xe device + * @id: Tile ID + * + * Initializes per-tile resources that don't require any interactions with the + * hardware or any knowledge about the Graphics/Media IP version. + * + * Returns: 0 on success, negative error code on error. + */ +int xe_tile_init_early(struct xe_tile *tile, struct xe_device *xe, u8 id) +{ + int err; + + tile->xe = xe; + tile->id = id; + + err = xe_tile_alloc(tile); + if (err) + return err; + + tile->primary_gt = xe_gt_alloc(tile); + if (IS_ERR(tile->primary_gt)) + return PTR_ERR(tile->primary_gt); + + return 0; +} + static int tile_ttm_mgr_init(struct xe_tile *tile) { struct xe_device *xe = tile_to_xe(tile); diff --git a/drivers/gpu/drm/xe/xe_tile.h b/drivers/gpu/drm/xe/xe_tile.h index 782c47f8bd45..1c9e42ade6b0 100644 --- a/drivers/gpu/drm/xe/xe_tile.h +++ b/drivers/gpu/drm/xe/xe_tile.h @@ -10,7 +10,7 @@ struct xe_tile; -int xe_tile_alloc(struct xe_tile *tile); +int xe_tile_init_early(struct xe_tile *tile, struct xe_device *xe, u8 id); int xe_tile_init_noalloc(struct xe_tile *tile); void xe_tile_migrate_wait(struct xe_tile *tile); -- cgit v1.2.3 From 99e4b1aa8dbe2e23c73229ac1bbd9dc3e6b30c80 Mon Sep 17 00:00:00 2001 From: Michał Winiarski Date: Tue, 5 Dec 2023 02:32:58 +0100 Subject: drm/xe: Map the entire BAR0 and hold onto the initial mapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both MMIO registers and GGTT for root tile will need to be used earlier during probe. Don't rely on tile count to compute the mapping size. Further more, there's no need to remap after figuring out the real resource size. Signed-off-by: Michał Winiarski Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_mmio.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index ab91be1405c9..2e6c94731a5a 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -300,7 +300,6 @@ void xe_mmio_probe_tiles(struct xe_device *xe) size_t tile_mmio_size = SZ_16M, tile_mmio_ext_size = xe->info.tile_mmio_ext_size; u8 id, tile_count = xe->info.tile_count; struct xe_gt *gt = xe_root_mmio_gt(xe); - const int mmio_bar = 0; struct xe_tile *tile; void *regs; u32 mtcfg; @@ -314,9 +313,6 @@ void xe_mmio_probe_tiles(struct xe_device *xe) if (tile_count < xe->info.tile_count) { drm_info(&xe->drm, "tile_count: %d, reduced_tile_count %d\n", xe->info.tile_count, tile_count); - pci_iounmap(to_pci_dev(xe->drm.dev), xe->mmio.regs); - xe->mmio.size = (tile_mmio_size + tile_mmio_ext_size) * tile_count; - xe->mmio.regs = pci_iomap(to_pci_dev(xe->drm.dev), mmio_bar, xe->mmio.size); xe->info.tile_count = tile_count; /* @@ -381,17 +377,17 @@ static int xe_verify_lmem_ready(struct xe_device *xe) int xe_mmio_init(struct xe_device *xe) { struct xe_tile *root_tile = xe_device_get_root_tile(xe); + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); const int mmio_bar = 0; int err; /* - * Map the maximum expected BAR size, which will get remapped later - * if we determine that we're running on a reduced-tile system. + * Map the entire BAR. * The first 16MB of the BAR, belong to the root tile, and include: * registers (0-4MB), reserved space (4MB-8MB) and GGTT (8MB-16MB). */ - xe->mmio.size = (SZ_16M + xe->info.tile_mmio_ext_size) * xe->info.tile_count; - xe->mmio.regs = pci_iomap(to_pci_dev(xe->drm.dev), mmio_bar, xe->mmio.size); + xe->mmio.size = pci_resource_len(pdev, mmio_bar); + xe->mmio.regs = pci_iomap(pdev, mmio_bar, 0); if (xe->mmio.regs == NULL) { drm_err(&xe->drm, "failed to map registers\n"); return -EIO; -- cgit v1.2.3 From 4f122766f9043c30b879b44f7dc2ca540b5422cd Mon Sep 17 00:00:00 2001 From: Michał Winiarski Date: Tue, 5 Dec 2023 02:32:59 +0100 Subject: drm/xe/device: Introduce xe_device_probe_early MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SR-IOV VF doesn't have access to MMIO registers used to determine graphics/media ID. It can however communicate with GuC. Introduce xe_device_probe_early, which initializes enough HW to use MMIO GuC communication. This will allow both VF and PF/native driver to have unified probe ordering. Signed-off-by: Michał Winiarski Reviewed-by: Matt Roper Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 22 ++++++++++++++++++---- drivers/gpu/drm/xe/xe_device.h | 5 +++++ drivers/gpu/drm/xe/xe_mmio.c | 16 ++++++++++------ drivers/gpu/drm/xe/xe_mmio.h | 1 + drivers/gpu/drm/xe/xe_pci.c | 26 +++++++++++++++++++++----- 5 files changed, 55 insertions(+), 15 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 5e1f73c8c77a..f4be4b13a506 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -375,6 +375,24 @@ mask_err: return err; } +/* + * Initialize MMIO resources that don't require any knowledge about tile count. + */ +int xe_device_probe_early(struct xe_device *xe) +{ + int err; + + err = xe_mmio_init(xe); + if (err) + return err; + + err = xe_mmio_root_tile_init(xe); + if (err) + return err; + + return 0; +} + int xe_device_probe(struct xe_device *xe) { struct xe_tile *tile; @@ -393,10 +411,6 @@ int xe_device_probe(struct xe_device *xe) if (err) return err; - err = xe_mmio_init(xe); - if (err) - return err; - xe_mmio_probe_tiles(xe); err = drmm_add_action_or_reset(&xe->drm, xe_driver_flr_fini, xe); diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index 54694f98c91a..3da83b233206 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -37,6 +37,7 @@ static inline struct xe_device *ttm_to_xe_device(struct ttm_device *ttm) struct xe_device *xe_device_create(struct pci_dev *pdev, const struct pci_device_id *ent); +int xe_device_probe_early(struct xe_device *xe); int xe_device_probe(struct xe_device *xe); void xe_device_remove(struct xe_device *xe); void xe_device_shutdown(struct xe_device *xe); @@ -123,6 +124,10 @@ static inline bool xe_device_uc_enabled(struct xe_device *xe) for ((id__) = 0; (id__) < (xe__)->info.tile_count; (id__)++) \ for_each_if((tile__) = &(xe__)->tiles[(id__)]) +#define for_each_remote_tile(tile__, xe__, id__) \ + for ((id__) = 1; (id__) < (xe__)->info.tile_count; (id__)++) \ + for_each_if((tile__) = &(xe__)->tiles[(id__)]) + /* * FIXME: This only works for now since multi-tile and standalone media * happen to be mutually exclusive. Future platforms may change this... diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 2e6c94731a5a..35aeb50b7158 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -15,10 +15,12 @@ #include "regs/xe_regs.h" #include "xe_bo.h" #include "xe_device.h" +#include "xe_ggtt.h" #include "xe_gt.h" #include "xe_gt_mcr.h" #include "xe_macros.h" #include "xe_module.h" +#include "xe_tile.h" #define XEHP_MTCFG_ADDR XE_REG(0x101800) #define TILE_COUNT REG_GENMASK(15, 8) @@ -376,10 +378,8 @@ static int xe_verify_lmem_ready(struct xe_device *xe) int xe_mmio_init(struct xe_device *xe) { - struct xe_tile *root_tile = xe_device_get_root_tile(xe); struct pci_dev *pdev = to_pci_dev(xe->drm.dev); const int mmio_bar = 0; - int err; /* * Map the entire BAR. @@ -393,12 +393,16 @@ int xe_mmio_init(struct xe_device *xe) return -EIO; } - err = drmm_add_action_or_reset(&xe->drm, mmio_fini, xe); - if (err) - return err; + return drmm_add_action_or_reset(&xe->drm, mmio_fini, xe); +} + +int xe_mmio_root_tile_init(struct xe_device *xe) +{ + struct xe_tile *root_tile = xe_device_get_root_tile(xe); + int err; /* Setup first tile; other tiles (if present) will be setup later. */ - root_tile->mmio.size = xe->mmio.size; + root_tile->mmio.size = SZ_16M; root_tile->mmio.regs = xe->mmio.regs; err = xe_verify_lmem_ready(xe); diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h index c054c27f6925..98de5c13c89b 100644 --- a/drivers/gpu/drm/xe/xe_mmio.h +++ b/drivers/gpu/drm/xe/xe_mmio.h @@ -21,6 +21,7 @@ struct xe_device; #define LMEM_BAR 2 int xe_mmio_init(struct xe_device *xe); +int xe_mmio_root_tile_init(struct xe_device *xe); void xe_mmio_probe_tiles(struct xe_device *xe); static inline u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 87257716b93e..d1b8f268ce09 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -536,10 +536,12 @@ static void handle_gmdid(struct xe_device *xe, * Initialize device info content that only depends on static driver_data * passed to the driver at probe time from PCI ID table. */ -static void xe_info_init_early(struct xe_device *xe, - const struct xe_device_desc *desc, - const struct xe_subplatform_desc *subplatform_desc) +static int xe_info_init_early(struct xe_device *xe, + const struct xe_device_desc *desc, + const struct xe_subplatform_desc *subplatform_desc) { + int err; + xe->info.platform = desc->platform; xe->info.subplatform = subplatform_desc ? subplatform_desc->subplatform : XE_SUBPLATFORM_NONE; @@ -556,6 +558,12 @@ static void xe_info_init_early(struct xe_device *xe, xe->info.enable_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) && xe_modparam.enable_display && desc->has_display; + + err = xe_tile_init_early(xe_device_get_root_tile(xe), xe, 0); + if (err) + return err; + + return 0; } /* @@ -623,13 +631,15 @@ static int xe_info_init(struct xe_device *xe, */ xe->info.tile_count = 1 + graphics_desc->max_remote_tiles; - for_each_tile(tile, xe, id) { + for_each_remote_tile(tile, xe, id) { int err; err = xe_tile_init_early(tile, xe, id); if (err) return err; + } + for_each_tile(tile, xe, id) { gt = tile->primary_gt; gt->info.id = xe->info.gt_count++; gt->info.type = XE_GT_TYPE_MAIN; @@ -723,10 +733,16 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_master(pdev); - xe_info_init_early(xe, desc, subplatform_desc); + err = xe_info_init_early(xe, desc, subplatform_desc); + if (err) + return err; xe_sriov_probe_early(xe, desc->has_sriov); + err = xe_device_probe_early(xe); + if (err) + return err; + err = xe_info_init(xe, desc->graphics, desc->media); if (err) return err; -- cgit v1.2.3 From 51fb5ef209b988a3acee3bc7de04bb70aec51ff5 Mon Sep 17 00:00:00 2001 From: Michał Winiarski Date: Tue, 5 Dec 2023 02:33:00 +0100 Subject: drm/xe: Don't "peek" into GMD_ID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that MMIO init got moved to device early, we can use regular xe_mmio_read helpers to get to GMD_ID register. Signed-off-by: Michał Winiarski Reviewed-by: Matthew Brost Reviewed-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pci.c | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index d1b8f268ce09..b2e87746e8d8 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -21,6 +21,7 @@ #include "xe_drv.h" #include "xe_gt.h" #include "xe_macros.h" +#include "xe_mmio.h" #include "xe_module.h" #include "xe_pci_types.h" #include "xe_pm.h" @@ -444,26 +445,22 @@ find_subplatform(const struct xe_device *xe, const struct xe_device_desc *desc) return NULL; } -static void peek_gmdid(struct xe_device *xe, u32 gmdid_offset, u32 *ver, u32 *revid) +enum xe_gmdid_type { + GMDID_GRAPHICS, + GMDID_MEDIA +}; + +static void read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver, u32 *revid) { - struct pci_dev *pdev = to_pci_dev(xe->drm.dev); - void __iomem *map = pci_iomap_range(pdev, 0, gmdid_offset, sizeof(u32)); + struct xe_gt *gt = xe_root_mmio_gt(xe); + struct xe_reg gmdid_reg = GMD_ID; u32 val; - if (!map) { - drm_err(&xe->drm, "Failed to read GMD_ID (%#x) from PCI BAR.\n", - gmdid_offset); - *ver = 0; - *revid = 0; + if (type == GMDID_MEDIA) + gmdid_reg.addr += MEDIA_GT_GSI_OFFSET; - return; - } - - val = ioread32(map); - pci_iounmap(pdev, map); - - *ver = REG_FIELD_GET(GMD_ID_ARCH_MASK, val) * 100 + - REG_FIELD_GET(GMD_ID_RELEASE_MASK, val); + val = xe_mmio_read32(gt, gmdid_reg); + *ver = REG_FIELD_GET(GMD_ID_ARCH_MASK, val) * 100 + REG_FIELD_GET(GMD_ID_RELEASE_MASK, val); *revid = REG_FIELD_GET(GMD_ID_REVID, val); } @@ -496,7 +493,8 @@ static void handle_gmdid(struct xe_device *xe, { u32 ver; - peek_gmdid(xe, GMD_ID.addr, &ver, graphics_revid); + read_gmdid(xe, GMDID_GRAPHICS, &ver, graphics_revid); + for (int i = 0; i < ARRAY_SIZE(graphics_ip_map); i++) { if (ver == graphics_ip_map[i].ver) { xe->info.graphics_verx100 = ver; @@ -511,7 +509,7 @@ static void handle_gmdid(struct xe_device *xe, ver / 100, ver % 100); } - peek_gmdid(xe, GMD_ID.addr + 0x380000, &ver, media_revid); + read_gmdid(xe, GMDID_MEDIA, &ver, media_revid); /* Media may legitimately be fused off / not present */ if (ver == 0) -- cgit v1.2.3 From 1ccd68e967f13a584bf3d45a58865afb0abbf2a4 Mon Sep 17 00:00:00 2001 From: Michał Winiarski Date: Tue, 5 Dec 2023 02:33:01 +0100 Subject: drm/xe: Move system memory management init to earlier point in probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GuC will need to be loaded earlier during probe. And in order to load GuC, we will need the ability to create system memory allocations. Signed-off-by: Michał Winiarski Reviewed-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index f4be4b13a506..dd56a8c3f80d 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -413,6 +413,8 @@ int xe_device_probe(struct xe_device *xe) xe_mmio_probe_tiles(xe); + xe_ttm_sys_mgr_init(xe); + err = drmm_add_action_or_reset(&xe->drm, xe_driver_flr_fini, xe); if (err) return err; @@ -441,8 +443,6 @@ int xe_device_probe(struct xe_device *xe) if (err) goto err_irq_shutdown; - xe_ttm_sys_mgr_init(xe); - for_each_tile(tile, xe, id) { err = xe_tile_init_noalloc(tile); if (err) -- cgit v1.2.3 From b62f828a8368de59eb5b353788ace58fb6154495 Mon Sep 17 00:00:00 2001 From: Michał Winiarski Date: Tue, 5 Dec 2023 02:33:02 +0100 Subject: drm/xe: Move force_wake init to earlier point in probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GuC will need to be loaded earlier during probe. And in order to load GuC, being able to take the forcewake is going to be needed. Signed-off-by: Michał Winiarski Reviewed-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 3 +++ drivers/gpu/drm/xe/xe_gt.c | 2 -- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index dd56a8c3f80d..bcc10b7f23ab 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -415,6 +415,9 @@ int xe_device_probe(struct xe_device *xe) xe_ttm_sys_mgr_init(xe); + for_each_gt(gt, xe, id) + xe_force_wake_init_gt(gt, gt_to_fw(gt)); + err = drmm_add_action_or_reset(&xe->drm, xe_driver_flr_fini, xe); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 8a6fb9641cd6..4db94344bbde 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -293,8 +293,6 @@ int xe_gt_init_early(struct xe_gt *gt) { int err; - xe_force_wake_init_gt(gt, gt_to_fw(gt)); - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); if (err) return err; -- cgit v1.2.3 From 791d0362a9e2d47352ee6b35cc8999cb3404e27c Mon Sep 17 00:00:00 2001 From: Michał Winiarski Date: Tue, 5 Dec 2023 02:33:03 +0100 Subject: drm/xe: Reorder GGTT init to earlier point in probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GuC will need to be loaded earlier during probe. Having functional GGTT is one of the prerequisites. Also rename xe_ggtt_init_noalloc to xe_ggtt_init_early to match the new call site. Signed-off-by: Michał Winiarski Reviewed-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 7 +++++++ drivers/gpu/drm/xe/xe_ggtt.c | 22 ++++++++++++++++++---- drivers/gpu/drm/xe/xe_ggtt.h | 2 +- drivers/gpu/drm/xe/xe_tile.c | 4 ---- 4 files changed, 26 insertions(+), 9 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index bcc10b7f23ab..65e9aa5e6c31 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -24,6 +24,7 @@ #include "xe_drv.h" #include "xe_exec_queue.h" #include "xe_exec.h" +#include "xe_ggtt.h" #include "xe_gt.h" #include "xe_irq.h" #include "xe_mmio.h" @@ -418,6 +419,12 @@ int xe_device_probe(struct xe_device *xe) for_each_gt(gt, xe, id) xe_force_wake_init_gt(gt, gt_to_fw(gt)); + for_each_tile(tile, xe, id) { + err = xe_ggtt_init_early(tile->mem.ggtt); + if (err) + return err; + } + err = drmm_add_action_or_reset(&xe->drm, xe_driver_flr_fini, xe); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 0e2a41837f16..f8bdbd6010f7 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -96,14 +96,20 @@ static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size) } } -static void ggtt_fini_noalloc(struct drm_device *drm, void *arg) +static void ggtt_fini_early(struct drm_device *drm, void *arg) { struct xe_ggtt *ggtt = arg; mutex_destroy(&ggtt->lock); drm_mm_takedown(&ggtt->mm); +} + +static void ggtt_fini(struct drm_device *drm, void *arg) +{ + struct xe_ggtt *ggtt = arg; xe_bo_unpin_map_no_vm(ggtt->scratch); + ggtt->scratch = NULL; } static void primelockdep(struct xe_ggtt *ggtt) @@ -124,7 +130,14 @@ static const struct xe_ggtt_pt_ops xelpg_pt_ops = { .pte_encode_bo = xelpg_ggtt_pte_encode_bo, }; -int xe_ggtt_init_noalloc(struct xe_ggtt *ggtt) +/* + * Early GGTT initialization, which allows to create new mappings usable by the + * GuC. + * Mappings are not usable by the HW engines, as it doesn't have scratch / + * initial clear done to it yet. That will happen in the regular, non-early + * GGTT init. + */ +int xe_ggtt_init_early(struct xe_ggtt *ggtt) { struct xe_device *xe = tile_to_xe(ggtt->tile); struct pci_dev *pdev = to_pci_dev(xe->drm.dev); @@ -178,7 +191,7 @@ int xe_ggtt_init_noalloc(struct xe_ggtt *ggtt) mutex_init(&ggtt->lock); primelockdep(ggtt); - return drmm_add_action_or_reset(&xe->drm, ggtt_fini_noalloc, ggtt); + return drmm_add_action_or_reset(&xe->drm, ggtt_fini_early, ggtt); } static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt) @@ -226,7 +239,8 @@ int xe_ggtt_init(struct xe_ggtt *ggtt) xe_map_memset(xe, &ggtt->scratch->vmap, 0, 0, ggtt->scratch->size); xe_ggtt_initial_clear(ggtt); - return 0; + + return drmm_add_action_or_reset(&xe->drm, ggtt_fini, ggtt); err: ggtt->scratch = NULL; return err; diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h index 3faa3c6d0375..a09c166dff70 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.h +++ b/drivers/gpu/drm/xe/xe_ggtt.h @@ -12,7 +12,7 @@ struct drm_printer; void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte); void xe_ggtt_invalidate(struct xe_ggtt *ggtt); -int xe_ggtt_init_noalloc(struct xe_ggtt *ggtt); +int xe_ggtt_init_early(struct xe_ggtt *ggtt); int xe_ggtt_init(struct xe_ggtt *ggtt); void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix); diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c index c74a4f840d84..044c20881de7 100644 --- a/drivers/gpu/drm/xe/xe_tile.c +++ b/drivers/gpu/drm/xe/xe_tile.c @@ -166,10 +166,6 @@ int xe_tile_init_noalloc(struct xe_tile *tile) if (err) goto err_mem_access; - err = xe_ggtt_init_noalloc(tile->mem.ggtt); - if (err) - goto err_mem_access; - tile->mem.kernel_bb_pool = xe_sa_bo_manager_init(tile, SZ_1M, 16); if (IS_ERR(tile->mem.kernel_bb_pool)) err = PTR_ERR(tile->mem.kernel_bb_pool); -- cgit v1.2.3 From 0e1a47fcabc8ffa6f460c60c2caa04e51170fa22 Mon Sep 17 00:00:00 2001 From: Michał Winiarski Date: Tue, 5 Dec 2023 02:33:04 +0100 Subject: drm/xe: Add a helper for DRM device-lifetime BO create MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A helper for managed BO allocations makes it possible to remove specific "fini" actions and will simplify the following patches adding ability to execute a release action for specific BO directly. Signed-off-by: Michał Winiarski Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 36 ++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_bo.h | 4 ++++ drivers/gpu/drm/xe/xe_ggtt.c | 6 +----- drivers/gpu/drm/xe/xe_guc_ads.c | 20 +++----------------- drivers/gpu/drm/xe/xe_guc_ct.c | 8 +++----- drivers/gpu/drm/xe/xe_guc_hwconfig.c | 18 +++--------------- drivers/gpu/drm/xe/xe_guc_log.c | 19 +++---------------- drivers/gpu/drm/xe/xe_guc_pc.c | 9 +++------ drivers/gpu/drm/xe/xe_hw_engine.c | 8 +++----- drivers/gpu/drm/xe/xe_uc_fw.c | 9 ++++----- 10 files changed, 63 insertions(+), 74 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index dc1ad3b4dc2a..5e3493f21b59 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -1540,6 +1541,41 @@ struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, return bo; } +static void __xe_bo_unpin_map_no_vm(struct drm_device *drm, void *arg) +{ + xe_bo_unpin_map_no_vm(arg); +} + +struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile, + size_t size, u32 flags) +{ + struct xe_bo *bo; + int ret; + + bo = xe_bo_create_pin_map(xe, tile, NULL, size, ttm_bo_type_kernel, flags); + if (IS_ERR(bo)) + return bo; + + ret = drmm_add_action_or_reset(&xe->drm, __xe_bo_unpin_map_no_vm, bo); + if (ret) + return ERR_PTR(ret); + + return bo; +} + +struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, + const void *data, size_t size, u32 flags) +{ + struct xe_bo *bo = xe_managed_bo_create_pin_map(xe, tile, size, flags); + + if (IS_ERR(bo)) + return bo; + + xe_map_memcpy_to(xe, &bo->vmap, 0, data, size); + + return bo; +} + /* * XXX: This is in the VM bind data path, likely should calculate this once and * store, with a recalculation if the BO is moved. diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 6f183568f76d..9b1279aca127 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -124,6 +124,10 @@ struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, const void *data, size_t size, enum ttm_bo_type type, u32 flags); +struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile, + size_t size, u32 flags); +struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, + const void *data, size_t size, u32 flags); int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo, u32 bo_flags); diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index f8bdbd6010f7..374ae4289fa0 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -108,7 +108,6 @@ static void ggtt_fini(struct drm_device *drm, void *arg) { struct xe_ggtt *ggtt = arg; - xe_bo_unpin_map_no_vm(ggtt->scratch); ggtt->scratch = NULL; } @@ -227,10 +226,7 @@ int xe_ggtt_init(struct xe_ggtt *ggtt) else flags |= XE_BO_CREATE_VRAM_IF_DGFX(ggtt->tile); - ggtt->scratch = xe_bo_create_pin_map(xe, ggtt->tile, NULL, XE_PAGE_SIZE, - ttm_bo_type_kernel, - flags); - + ggtt->scratch = xe_managed_bo_create_pin_map(xe, ggtt->tile, XE_PAGE_SIZE, flags); if (IS_ERR(ggtt->scratch)) { err = PTR_ERR(ggtt->scratch); goto err; diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 88789826e781..2f5ff090aa6b 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -202,13 +202,6 @@ static size_t guc_ads_size(struct xe_guc_ads *ads) guc_ads_private_data_size(ads); } -static void guc_ads_fini(struct drm_device *drm, void *arg) -{ - struct xe_guc_ads *ads = arg; - - xe_bo_unpin_map_no_vm(ads->bo); -} - static bool needs_wa_1607983814(struct xe_device *xe) { return GRAPHICS_VERx100(xe) < 1250; @@ -274,25 +267,18 @@ int xe_guc_ads_init(struct xe_guc_ads *ads) struct xe_gt *gt = ads_to_gt(ads); struct xe_tile *tile = gt_to_tile(gt); struct xe_bo *bo; - int err; ads->golden_lrc_size = calculate_golden_lrc_size(ads); ads->regset_size = calculate_regset_size(gt); - bo = xe_bo_create_pin_map(xe, tile, NULL, guc_ads_size(ads) + - MAX_GOLDEN_LRC_SIZE, - ttm_bo_type_kernel, - XE_BO_CREATE_VRAM_IF_DGFX(tile) | - XE_BO_CREATE_GGTT_BIT); + bo = xe_managed_bo_create_pin_map(xe, tile, guc_ads_size(ads) + MAX_GOLDEN_LRC_SIZE, + XE_BO_CREATE_VRAM_IF_DGFX(tile) | + XE_BO_CREATE_GGTT_BIT); if (IS_ERR(bo)) return PTR_ERR(bo); ads->bo = bo; - err = drmm_add_action_or_reset(&xe->drm, guc_ads_fini, ads); - if (err) - return err; - return 0; } diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 6295d916e39f..24a33fa36496 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -114,7 +114,6 @@ static void guc_ct_fini(struct drm_device *drm, void *arg) struct xe_guc_ct *ct = arg; xa_destroy(&ct->fence_lookup); - xe_bo_unpin_map_no_vm(ct->bo); } static void g2h_worker_func(struct work_struct *w); @@ -148,10 +147,9 @@ int xe_guc_ct_init(struct xe_guc_ct *ct) primelockdep(ct); - bo = xe_bo_create_pin_map(xe, tile, NULL, guc_ct_size(), - ttm_bo_type_kernel, - XE_BO_CREATE_VRAM_IF_DGFX(tile) | - XE_BO_CREATE_GGTT_BIT); + bo = xe_managed_bo_create_pin_map(xe, tile, guc_ct_size(), + XE_BO_CREATE_VRAM_IF_DGFX(tile) | + XE_BO_CREATE_GGTT_BIT); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.c b/drivers/gpu/drm/xe/xe_guc_hwconfig.c index 98bb9bb30705..2a13a00917f8 100644 --- a/drivers/gpu/drm/xe/xe_guc_hwconfig.c +++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.c @@ -48,13 +48,6 @@ static int guc_hwconfig_copy(struct xe_guc *guc) return 0; } -static void guc_hwconfig_fini(struct drm_device *drm, void *arg) -{ - struct xe_guc *guc = arg; - - xe_bo_unpin_map_no_vm(guc->hwconfig.bo); -} - int xe_guc_hwconfig_init(struct xe_guc *guc) { struct xe_device *xe = guc_to_xe(guc); @@ -84,19 +77,14 @@ int xe_guc_hwconfig_init(struct xe_guc *guc) if (!size) return -EINVAL; - bo = xe_bo_create_pin_map(xe, tile, NULL, PAGE_ALIGN(size), - ttm_bo_type_kernel, - XE_BO_CREATE_VRAM_IF_DGFX(tile) | - XE_BO_CREATE_GGTT_BIT); + bo = xe_managed_bo_create_pin_map(xe, tile, PAGE_ALIGN(size), + XE_BO_CREATE_VRAM_IF_DGFX(tile) | + XE_BO_CREATE_GGTT_BIT); if (IS_ERR(bo)) return PTR_ERR(bo); guc->hwconfig.bo = bo; guc->hwconfig.size = size; - err = drmm_add_action_or_reset(&xe->drm, guc_hwconfig_fini, guc); - if (err) - return err; - return guc_hwconfig_copy(guc); } diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c index 27c3827bfd05..bcd2f4d34081 100644 --- a/drivers/gpu/drm/xe/xe_guc_log.c +++ b/drivers/gpu/drm/xe/xe_guc_log.c @@ -77,24 +77,15 @@ void xe_guc_log_print(struct xe_guc_log *log, struct drm_printer *p) } } -static void guc_log_fini(struct drm_device *drm, void *arg) -{ - struct xe_guc_log *log = arg; - - xe_bo_unpin_map_no_vm(log->bo); -} - int xe_guc_log_init(struct xe_guc_log *log) { struct xe_device *xe = log_to_xe(log); struct xe_tile *tile = gt_to_tile(log_to_gt(log)); struct xe_bo *bo; - int err; - bo = xe_bo_create_pin_map(xe, tile, NULL, guc_log_size(), - ttm_bo_type_kernel, - XE_BO_CREATE_VRAM_IF_DGFX(tile) | - XE_BO_CREATE_GGTT_BIT); + bo = xe_managed_bo_create_pin_map(xe, tile, guc_log_size(), + XE_BO_CREATE_VRAM_IF_DGFX(tile) | + XE_BO_CREATE_GGTT_BIT); if (IS_ERR(bo)) return PTR_ERR(bo); @@ -102,9 +93,5 @@ int xe_guc_log_init(struct xe_guc_log *log) log->bo = bo; log->level = xe_modparam.guc_log_level; - err = drmm_add_action_or_reset(&xe->drm, guc_log_fini, log); - if (err) - return err; - return 0; } diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 1943893a3fd7..d2605a684b1c 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -964,7 +964,6 @@ void xe_guc_pc_fini(struct xe_guc_pc *pc) XE_WARN_ON(xe_guc_pc_gucrc_disable(pc)); XE_WARN_ON(xe_guc_pc_stop(pc)); sysfs_remove_files(pc_to_gt(pc)->sysfs, pc_attrs); - xe_bo_unpin_map_no_vm(pc->bo); mutex_destroy(&pc->freq_lock); } @@ -986,11 +985,9 @@ int xe_guc_pc_init(struct xe_guc_pc *pc) mutex_init(&pc->freq_lock); - bo = xe_bo_create_pin_map(xe, tile, NULL, size, - ttm_bo_type_kernel, - XE_BO_CREATE_VRAM_IF_DGFX(tile) | - XE_BO_CREATE_GGTT_BIT); - + bo = xe_managed_bo_create_pin_map(xe, tile, size, + XE_BO_CREATE_VRAM_IF_DGFX(tile) | + XE_BO_CREATE_GGTT_BIT); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index c52c26c395a7..108ecbfe593e 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -239,8 +239,6 @@ static void hw_engine_fini(struct drm_device *drm, void *arg) xe_execlist_port_destroy(hwe->exl_port); xe_lrc_finish(&hwe->kernel_lrc); - xe_bo_unpin_map_no_vm(hwe->hwsp); - hwe->gt = NULL; } @@ -428,9 +426,9 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe, xe_reg_sr_apply_mmio(&hwe->reg_sr, gt); xe_reg_sr_apply_whitelist(hwe); - hwe->hwsp = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K, ttm_bo_type_kernel, - XE_BO_CREATE_VRAM_IF_DGFX(tile) | - XE_BO_CREATE_GGTT_BIT); + hwe->hwsp = xe_managed_bo_create_pin_map(xe, tile, SZ_4K, + XE_BO_CREATE_VRAM_IF_DGFX(tile) | + XE_BO_CREATE_GGTT_BIT); if (IS_ERR(hwe->hwsp)) { err = PTR_ERR(hwe->hwsp); goto err_name; diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index e806e7b6ae42..8ad4bcabb8b5 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -292,7 +292,6 @@ static void uc_fw_fini(struct drm_device *drm, void *arg) if (!xe_uc_fw_is_available(uc_fw)) return; - xe_bo_unpin_map_no_vm(uc_fw->bo); xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_SELECTED); } @@ -692,10 +691,9 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw) goto fail; } - obj = xe_bo_create_from_data(xe, tile, fw->data, fw->size, - ttm_bo_type_kernel, - XE_BO_CREATE_VRAM_IF_DGFX(tile) | - XE_BO_CREATE_GGTT_BIT); + obj = xe_managed_bo_create_from_data(xe, tile, fw->data, fw->size, + XE_BO_CREATE_VRAM_IF_DGFX(tile) | + XE_BO_CREATE_GGTT_BIT); if (IS_ERR(obj)) { drm_notice(&xe->drm, "%s firmware %s: failed to create / populate bo", xe_uc_fw_type_repr(uc_fw->type), uc_fw->path); @@ -726,6 +724,7 @@ fail: xe_uc_fw_type_repr(uc_fw->type), XE_UC_FIRMWARE_URL); release_firmware(fw); /* OK even if fw is NULL */ + return err; } -- cgit v1.2.3 From c93ea05191c5b67ecaa784085f8a73e02abcfc76 Mon Sep 17 00:00:00 2001 From: Michał Winiarski Date: Tue, 5 Dec 2023 02:33:05 +0100 Subject: drm/xe/uc: Split xe_uc_fw_init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function does a driver specific "request firmware" step that includes validating the input, followed by wrapping the firmware binary into a buffer object. Split it into smaller parts. Signed-off-by: Michał Winiarski Reviewed-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_uc_fw.c | 80 +++++++++++++++++++++++++++++++++---------- 1 file changed, 61 insertions(+), 19 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 8ad4bcabb8b5..f258eb44fe31 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -635,15 +635,12 @@ do { \ ver_->major, ver_->minor, ver_->patch); \ } while (0) -int xe_uc_fw_init(struct xe_uc_fw *uc_fw) +static int uc_fw_request(struct xe_uc_fw *uc_fw, const struct firmware **firmware_p) { struct xe_device *xe = uc_fw_to_xe(uc_fw); - struct xe_gt *gt = uc_fw_to_gt(uc_fw); - struct xe_tile *tile = gt_to_tile(gt); struct device *dev = xe->drm.dev; struct drm_printer p = drm_info_printer(dev); const struct firmware *fw = NULL; - struct xe_bo *obj; int err; /* @@ -691,9 +688,39 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw) goto fail; } - obj = xe_managed_bo_create_from_data(xe, tile, fw->data, fw->size, - XE_BO_CREATE_VRAM_IF_DGFX(tile) | - XE_BO_CREATE_GGTT_BIT); + *firmware_p = fw; + + return 0; + +fail: + xe_uc_fw_change_status(uc_fw, err == -ENOENT ? + XE_UC_FIRMWARE_MISSING : + XE_UC_FIRMWARE_ERROR); + + drm_notice(&xe->drm, "%s firmware %s: fetch failed with error %d\n", + xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, err); + drm_info(&xe->drm, "%s firmware(s) can be downloaded from %s\n", + xe_uc_fw_type_repr(uc_fw->type), XE_UC_FIRMWARE_URL); + + release_firmware(fw); /* OK even if fw is NULL */ + + return err; +} + +static void uc_fw_release(const struct firmware *fw) +{ + release_firmware(fw); +} + +static int uc_fw_copy(struct xe_uc_fw *uc_fw, const void *data, size_t size, u32 flags) +{ + struct xe_device *xe = uc_fw_to_xe(uc_fw); + struct xe_gt *gt = uc_fw_to_gt(uc_fw); + struct xe_tile *tile = gt_to_tile(gt); + struct xe_bo *obj; + int err; + + obj = xe_managed_bo_create_from_data(xe, tile, data, size, flags); if (IS_ERR(obj)) { drm_notice(&xe->drm, "%s firmware %s: failed to create / populate bo", xe_uc_fw_type_repr(uc_fw->type), uc_fw->path); @@ -702,28 +729,43 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw) } uc_fw->bo = obj; - uc_fw->size = fw->size; - xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_AVAILABLE); + uc_fw->size = size; - release_firmware(fw); + xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_AVAILABLE); err = drmm_add_action_or_reset(&xe->drm, uc_fw_fini, uc_fw); if (err) - return err; + goto fail; return 0; fail: - xe_uc_fw_change_status(uc_fw, err == -ENOENT ? - XE_UC_FIRMWARE_MISSING : - XE_UC_FIRMWARE_ERROR); - - drm_notice(&xe->drm, "%s firmware %s: fetch failed with error %d\n", + xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_ERROR); + drm_notice(&xe->drm, "%s firmware %s: copy failed with error %d\n", xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, err); - drm_info(&xe->drm, "%s firmware(s) can be downloaded from %s\n", - xe_uc_fw_type_repr(uc_fw->type), XE_UC_FIRMWARE_URL); - release_firmware(fw); /* OK even if fw is NULL */ + return err; +} + +int xe_uc_fw_init(struct xe_uc_fw *uc_fw) +{ + const struct firmware *fw = NULL; + struct xe_gt *gt = uc_fw_to_gt(uc_fw); + struct xe_tile *tile = gt_to_tile(gt); + int err; + + err = uc_fw_request(uc_fw, &fw); + if (err) + return err; + + /* no error and no firmware means nothing to copy */ + if (!fw) + return 0; + + err = uc_fw_copy(uc_fw, fw->data, fw->size, + XE_BO_CREATE_VRAM_IF_DGFX(tile) | XE_BO_CREATE_GGTT_BIT); + + uc_fw_release(fw); return err; } -- cgit v1.2.3 From bf2d0d88c3b8d325eee670b2e0b4545de6d30998 Mon Sep 17 00:00:00 2001 From: Michał Winiarski Date: Tue, 5 Dec 2023 02:33:06 +0100 Subject: drm/xe/uc: Store firmware binary in system-memory backed BO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The firmware loading for GuC is about to be moved, and will happen much earlier in the probe process, when local-memory is not yet available. While this has the potential to make the firmware loading process slower, this is only happening during probe and full device reset. Since both are not hot-paths - store all UC-like firmware in system memory. Signed-off-by: Michał Winiarski Reviewed-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_uc_fw.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index f258eb44fe31..9abae65c6b23 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -750,8 +750,6 @@ fail: int xe_uc_fw_init(struct xe_uc_fw *uc_fw) { const struct firmware *fw = NULL; - struct xe_gt *gt = uc_fw_to_gt(uc_fw); - struct xe_tile *tile = gt_to_tile(gt); int err; err = uc_fw_request(uc_fw, &fw); @@ -763,7 +761,7 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw) return 0; err = uc_fw_copy(uc_fw, fw->data, fw->size, - XE_BO_CREATE_VRAM_IF_DGFX(tile) | XE_BO_CREATE_GGTT_BIT); + XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_GGTT_BIT); uc_fw_release(fw); -- cgit v1.2.3 From 7704f32c93cff69d8d0e842638f30e4dc9d93b2a Mon Sep 17 00:00:00 2001 From: Michał Winiarski Date: Tue, 5 Dec 2023 02:33:07 +0100 Subject: drm/xe/uc: Extract xe_uc_sanitize_reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Earlier GuC load will require more fine-grained control over reset. Extract it outside of xe_uc_init_hw. Signed-off-by: Michał Winiarski Reviewed-by: Matthew Brost Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt.c | 4 ++++ drivers/gpu/drm/xe/xe_uc.c | 12 ++++++------ drivers/gpu/drm/xe/xe_uc.h | 2 +- 3 files changed, 11 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 4db94344bbde..a9c71da985d3 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -542,6 +542,10 @@ static int do_gt_restart(struct xe_gt *gt) for_each_hw_engine(hwe, gt, id) xe_hw_engine_enable_ring(hwe); + err = xe_uc_sanitize_reset(>->uc); + if (err) + return err; + err = xe_uc_init_hw(>->uc); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index 15dcd1f91e9c..72a7b3c2577d 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -80,6 +80,10 @@ int xe_uc_init_post_hwconfig(struct xe_uc *uc) if (!xe_device_uc_enabled(uc_to_xe(uc))) return 0; + err = xe_uc_sanitize_reset(uc); + if (err) + return err; + err = xe_guc_init_post_hwconfig(&uc->guc); if (err) return err; @@ -101,13 +105,13 @@ static int uc_reset(struct xe_uc *uc) return 0; } -void xe_uc_sanitize(struct xe_uc *uc) +static void xe_uc_sanitize(struct xe_uc *uc) { xe_huc_sanitize(&uc->huc); xe_guc_sanitize(&uc->guc); } -static int xe_uc_sanitize_reset(struct xe_uc *uc) +int xe_uc_sanitize_reset(struct xe_uc *uc) { xe_uc_sanitize(uc); @@ -147,10 +151,6 @@ int xe_uc_init_hw(struct xe_uc *uc) if (!xe_device_uc_enabled(uc_to_xe(uc))) return 0; - ret = xe_uc_sanitize_reset(uc); - if (ret) - return ret; - ret = xe_huc_upload(&uc->huc); if (ret) return ret; diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h index 4109ae7028af..5d5110c0c834 100644 --- a/drivers/gpu/drm/xe/xe_uc.h +++ b/drivers/gpu/drm/xe/xe_uc.h @@ -19,6 +19,6 @@ void xe_uc_stop_prepare(struct xe_uc *uc); int xe_uc_stop(struct xe_uc *uc); int xe_uc_start(struct xe_uc *uc); int xe_uc_suspend(struct xe_uc *uc); -void xe_uc_sanitize(struct xe_uc *uc); +int xe_uc_sanitize_reset(struct xe_uc *uc); #endif -- cgit v1.2.3 From 4d637a1de2e4da212c1fee505a213a158d6bee1d Mon Sep 17 00:00:00 2001 From: Michał Winiarski Date: Tue, 5 Dec 2023 02:33:08 +0100 Subject: drm/xe/guc: Split GuC params used for "hwconfig" and "post-hwconfig" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move params that are not used for initial "hwconfig" load to "post-hwconfig" phase. Signed-off-by: Michał Winiarski Reviewed-by: Matthew Brost Reviewed-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index e6f680efb29e..482cb0df9f15 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -182,6 +182,26 @@ static void guc_init_params(struct xe_guc *guc) BUILD_BUG_ON(sizeof(guc->params) != GUC_CTL_MAX_DWORDS * sizeof(u32)); BUILD_BUG_ON(GUC_CTL_MAX_DWORDS + 2 != SOFT_SCRATCH_COUNT); + params[GUC_CTL_LOG_PARAMS] = guc_ctl_log_params_flags(guc); + params[GUC_CTL_FEATURE] = 0; + params[GUC_CTL_DEBUG] = guc_ctl_debug_flags(guc); + params[GUC_CTL_ADS] = guc_ctl_ads_flags(guc); + params[GUC_CTL_WA] = 0; + params[GUC_CTL_DEVID] = guc_ctl_devid(guc); + + for (i = 0; i < GUC_CTL_MAX_DWORDS; i++) + drm_dbg(&xe->drm, "GuC param[%2d] = 0x%08x\n", i, params[i]); +} + +static void guc_init_params_post_hwconfig(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + u32 *params = guc->params; + int i; + + BUILD_BUG_ON(sizeof(guc->params) != GUC_CTL_MAX_DWORDS * sizeof(u32)); + BUILD_BUG_ON(GUC_CTL_MAX_DWORDS + 2 != SOFT_SCRATCH_COUNT); + params[GUC_CTL_LOG_PARAMS] = guc_ctl_log_params_flags(guc); params[GUC_CTL_FEATURE] = guc_ctl_feature_flags(guc); params[GUC_CTL_DEBUG] = guc_ctl_debug_flags(guc); @@ -279,6 +299,8 @@ out: */ int xe_guc_init_post_hwconfig(struct xe_guc *guc) { + guc_init_params_post_hwconfig(guc); + return xe_guc_ads_init_post_hwconfig(&guc->ads); } -- cgit v1.2.3 From ff765b7771d874efd3089f90a8944a958ab05874 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 4 Dec 2023 10:51:27 +0000 Subject: drm/xe: add some debug info for d3cold From the CI logs we want to easily know if the machine is capable and allowed to enter d3cold, and can therefore potentially trigger the d3cold RPM suspend and resume path. Signed-off-by: Matthew Auld Cc: Anshuman Gupta Cc: Riana Tauro Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pci.c | 3 +++ drivers/gpu/drm/xe/xe_pm.c | 3 +++ 2 files changed, 6 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index b2e87746e8d8..b85193d1dcc2 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -778,6 +778,9 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) xe_pm_init(xe); + drm_dbg(&xe->drm, "d3cold: capable=%s\n", + str_yes_no(xe->d3cold.capable)); + return 0; } diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index e31a91cf311c..b429c2876a76 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -399,4 +399,7 @@ void xe_pm_d3cold_allowed_toggle(struct xe_device *xe) xe->d3cold.allowed = false; mutex_unlock(&xe->d3cold.lock); + + drm_dbg(&xe->drm, + "d3cold: allowed=%s\n", str_yes_no(xe->d3cold.allowed)); } -- cgit v1.2.3 From 6b8c1edc4f698d7e7e3cd5852bb5b20e93ab01b8 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 22 Nov 2023 14:38:21 +0000 Subject: drm/xe/uapi: Separate bo_create placement from flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Although the flags are about the creation, the memory placement of the BO deserves a proper dedicated field in the uapi. Besides getting more clear, it also allows to remove the 'magic' shifts from the flags that was a concern during the uapi reviews. Signed-off-by: Rodrigo Vivi Signed-off-by: Francois Dugast Reviewed-by: Matthew Brost Reviewed-by: José Roberto de Souza --- drivers/gpu/drm/xe/xe_bo.c | 14 +++++++------- include/uapi/drm/xe_drm.h | 9 ++++++--- 2 files changed, 13 insertions(+), 10 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 5e3493f21b59..fd516ad7478c 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1890,15 +1890,15 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) return -EINVAL; + /* at least one valid memory placement must be specified */ + if (XE_IOCTL_DBG(xe, (args->placement & ~xe->info.mem_region_mask) || + !args->placement)) + return -EINVAL; + if (XE_IOCTL_DBG(xe, args->flags & ~(DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING | DRM_XE_GEM_CREATE_FLAG_SCANOUT | - DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM | - xe->info.mem_region_mask))) - return -EINVAL; - - /* at least one memory type must be specified */ - if (XE_IOCTL_DBG(xe, !(args->flags & xe->info.mem_region_mask))) + DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM))) return -EINVAL; if (XE_IOCTL_DBG(xe, args->handle)) @@ -1920,7 +1920,7 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, if (args->flags & DRM_XE_GEM_CREATE_FLAG_SCANOUT) bo_flags |= XE_BO_SCANOUT_BIT; - bo_flags |= args->flags << (ffs(XE_BO_CREATE_SYSTEM_BIT) - 1); + bo_flags |= args->placement << (ffs(XE_BO_CREATE_SYSTEM_BIT) - 1); if (args->flags & DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM) { if (XE_IOCTL_DBG(xe, !(bo_flags & XE_BO_CREATE_VRAM_MASK))) diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 4c906ff2429e..6edbcd81c195 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -500,8 +500,11 @@ struct drm_xe_gem_create { */ __u64 size; -#define DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING (0x1 << 24) -#define DRM_XE_GEM_CREATE_FLAG_SCANOUT (0x1 << 25) + /** @placement: A mask of memory instances of where BO can be placed. */ + __u32 placement; + +#define DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING (1 << 0) +#define DRM_XE_GEM_CREATE_FLAG_SCANOUT (1 << 1) /* * When using VRAM as a possible placement, ensure that the corresponding VRAM * allocation will always use the CPU accessible part of VRAM. This is important @@ -517,7 +520,7 @@ struct drm_xe_gem_create { * display surfaces, therefore the kernel requires setting this flag for such * objects, otherwise an error is thrown on small-bar systems. */ -#define DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM (0x1 << 26) +#define DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM (1 << 2) /** * @flags: Flags, currently a mask of memory instances of where BO can * be placed -- cgit v1.2.3 From 2bec30715435824c2ea03714038f0ee7a4b5c698 Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Wed, 22 Nov 2023 14:38:22 +0000 Subject: drm/xe: Make DRM_XE_DEVICE_QUERY_ENGINES future proof MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have at least 2 future features(OA and future media engines capabilities) that will require Xe to provide more information about engines to UMDs. But this information should not just be added to drm_xe_engine_class_instance for a couple of reasons: - drm_xe_engine_class_instance is used as input to other structs/uAPIs and those uAPIs don't care about any of these future new engine fields - those new fields are useless information after initialization for some UMDs, so it should not need to carry that around So here my proposal is to make DRM_XE_DEVICE_QUERY_ENGINES return an array of drm_xe_query_engine_info that contain drm_xe_engine_class_instance and 3 u64s to be used for future features. Reference OA: https://patchwork.freedesktop.org/patch/558362/?series=121084&rev=6 v2: Reduce reserved[] to 3 u64 (Matthew Brost) Cc: Francois Dugast Cc: Rodrigo Vivi Signed-off-by: José Roberto de Souza Signed-off-by: Rodrigo Vivi [Rodrigo Rebased] Signed-off-by: Francois Dugast Reviewed-by: José Roberto de Souza --- drivers/gpu/drm/xe/xe_query.c | 15 ++++++++------- include/uapi/drm/xe_drm.h | 27 +++++++++++++++++++++++++-- 2 files changed, 33 insertions(+), 9 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 61a7d92b7e88..0cbfeaeb1330 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -53,7 +53,7 @@ static size_t calc_hw_engine_info_size(struct xe_device *xe) i++; } - return i * sizeof(struct drm_xe_engine_class_instance); + return i * sizeof(struct drm_xe_query_engine_info); } typedef u64 (*__ktime_func_t)(void); @@ -186,9 +186,9 @@ static int query_engines(struct xe_device *xe, struct drm_xe_device_query *query) { size_t size = calc_hw_engine_info_size(xe); - struct drm_xe_engine_class_instance __user *query_ptr = + struct drm_xe_query_engine_info __user *query_ptr = u64_to_user_ptr(query->data); - struct drm_xe_engine_class_instance *hw_engine_info; + struct drm_xe_query_engine_info *hw_engine_info; struct xe_hw_engine *hwe; enum xe_hw_engine_id id; struct xe_gt *gt; @@ -211,12 +211,13 @@ static int query_engines(struct xe_device *xe, if (xe_hw_engine_is_reserved(hwe)) continue; - hw_engine_info[i].engine_class = + hw_engine_info[i].instance.engine_class = xe_to_user_engine_class[hwe->class]; - hw_engine_info[i].engine_instance = + hw_engine_info[i].instance.engine_instance = hwe->logical_instance; - hw_engine_info[i].gt_id = gt->info.id; - hw_engine_info[i].pad = 0; + hw_engine_info[i].instance.gt_id = gt->info.id; + hw_engine_info[i].instance.pad = 0; + memset(hw_engine_info->reserved, 0, sizeof(hw_engine_info->reserved)); i++; } diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 6edbcd81c195..dc657ae9db18 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -124,7 +124,13 @@ struct xe_user_extension { #define DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_GET_PROPERTY, struct drm_xe_exec_queue_get_property) #define DRM_IOCTL_XE_WAIT_USER_FENCE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence) -/** struct drm_xe_engine_class_instance - instance of an engine class */ +/** + * struct drm_xe_engine_class_instance - instance of an engine class + * + * It is returned as part of the @drm_xe_query_engine_info, but it also is + * used as the input of engine selection for both @drm_xe_exec_queue_create + * and @drm_xe_query_engine_cycles + */ struct drm_xe_engine_class_instance { #define DRM_XE_ENGINE_CLASS_RENDER 0 #define DRM_XE_ENGINE_CLASS_COPY 1 @@ -137,14 +143,31 @@ struct drm_xe_engine_class_instance { */ #define DRM_XE_ENGINE_CLASS_VM_BIND_ASYNC 5 #define DRM_XE_ENGINE_CLASS_VM_BIND_SYNC 6 + /** @engine_class: engine class id */ __u16 engine_class; - + /** @engine_instance: engine instance id */ __u16 engine_instance; + /** @gt_id: Unique ID of this GT within the PCI Device */ __u16 gt_id; /** @pad: MBZ */ __u16 pad; }; +/** + * struct drm_xe_query_engine_info - describe hardware engine + * + * If a query is made with a struct @drm_xe_device_query where .query + * is equal to %DRM_XE_DEVICE_QUERY_ENGINES, then the reply uses an array of + * struct @drm_xe_query_engine_info in .data. + */ +struct drm_xe_query_engine_info { + /** @instance: The @drm_xe_engine_class_instance */ + struct drm_xe_engine_class_instance instance; + + /** @reserved: Reserved */ + __u64 reserved[3]; +}; + /** * enum drm_xe_memory_class - Supported memory classes. */ -- cgit v1.2.3 From 4e03b584143e18eabd091061a1716515da928dcb Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 22 Nov 2023 14:38:23 +0000 Subject: drm/xe/uapi: Reject bo creation of unaligned size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For xe bo creation we request passing size which matches system or vram minimum page alignment. This way we want to ensure userspace is aware of region constraints and not aligned allocations will be rejected returning EINVAL. v2: - Rebase, Update uAPI documentation. (Thomas) v3: - Adjust the dma-buf kunit test accordingly. (Thomas) v4: - Fixed rebase conflicts and updated commit message. (Francois) Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Zbigniew Kempczyński Signed-off-by: Thomas Hellström Reviewed-by: Maarten Lankhorst Signed-off-by: Francois Dugast Reviewed-by: José Roberto de Souza Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_dma_buf.c | 10 ++++++++-- drivers/gpu/drm/xe/xe_bo.c | 26 +++++++++++++++++--------- include/uapi/drm/xe_drm.h | 17 +++++++++-------- 3 files changed, 34 insertions(+), 19 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c index 81f12422a587..bb6f6424e06f 100644 --- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c @@ -109,15 +109,21 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe) struct drm_gem_object *import; struct dma_buf *dmabuf; struct xe_bo *bo; + size_t size; /* No VRAM on this device? */ if (!ttm_manager_type(&xe->ttm, XE_PL_VRAM0) && (params->mem_mask & XE_BO_CREATE_VRAM0_BIT)) return; + size = PAGE_SIZE; + if ((params->mem_mask & XE_BO_CREATE_VRAM0_BIT) && + xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) + size = SZ_64K; + kunit_info(test, "running %s\n", __func__); - bo = xe_bo_create_user(xe, NULL, NULL, PAGE_SIZE, DRM_XE_GEM_CPU_CACHING_WC, - ttm_bo_type_device, params->mem_mask); + bo = xe_bo_create_user(xe, NULL, NULL, size, DRM_XE_GEM_CPU_CACHING_WC, + ttm_bo_type_device, XE_BO_CREATE_USER_BIT | params->mem_mask); if (IS_ERR(bo)) { KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", PTR_ERR(bo)); diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index fd516ad7478c..0bd1b3581945 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1222,6 +1222,7 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, }; struct ttm_placement *placement; uint32_t alignment; + size_t aligned_size; int err; /* Only kernel objects should set GT */ @@ -1232,23 +1233,30 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, return ERR_PTR(-EINVAL); } - if (!bo) { - bo = xe_bo_alloc(); - if (IS_ERR(bo)) - return bo; - } - if (flags & (XE_BO_CREATE_VRAM_MASK | XE_BO_CREATE_STOLEN_BIT) && !(flags & XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) { - size = ALIGN(size, SZ_64K); + aligned_size = ALIGN(size, SZ_64K); + if (type != ttm_bo_type_device) + size = ALIGN(size, SZ_64K); flags |= XE_BO_INTERNAL_64K; alignment = SZ_64K >> PAGE_SHIFT; + } else { - size = ALIGN(size, PAGE_SIZE); + aligned_size = ALIGN(size, SZ_4K); + flags &= ~XE_BO_INTERNAL_64K; alignment = SZ_4K >> PAGE_SHIFT; } + if (type == ttm_bo_type_device && aligned_size != size) + return ERR_PTR(-EINVAL); + + if (!bo) { + bo = xe_bo_alloc(); + if (IS_ERR(bo)) + return bo; + } + bo->tile = tile; bo->size = size; bo->flags = flags; @@ -1566,7 +1574,7 @@ struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, const void *data, size_t size, u32 flags) { - struct xe_bo *bo = xe_managed_bo_create_pin_map(xe, tile, size, flags); + struct xe_bo *bo = xe_managed_bo_create_pin_map(xe, tile, ALIGN(size, PAGE_SIZE), flags); if (IS_ERR(bo)) return bo; diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index dc657ae9db18..d7918f6e760f 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -207,11 +207,13 @@ struct drm_xe_query_mem_region { * * When the kernel allocates memory for this region, the * underlying pages will be at least @min_page_size in size. - * - * Important note: When userspace allocates a GTT address which - * can point to memory allocated from this region, it must also - * respect this minimum alignment. This is enforced by the - * kernel. + * Buffer objects with an allowable placement in this region must be + * created with a size aligned to this value. + * GPU virtual address mappings of (parts of) buffer objects that + * may be placed in this region must also have their GPU virtual + * address and range aligned to this value. + * Affected IOCTLS will return %-EINVAL if alignment restrictions are + * not met. */ __u32 min_page_size; /** @@ -517,9 +519,8 @@ struct drm_xe_gem_create { __u64 extensions; /** - * @size: Requested size for the object - * - * The (page-aligned) allocated size for the object will be returned. + * @size: Size of the object to be created, must match region + * (system or vram) minimum alignment (&min_page_size). */ __u64 size; -- cgit v1.2.3 From 4bc9dd98e0a7e8a14386fc8341379ee09e594987 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Wed, 22 Nov 2023 14:38:24 +0000 Subject: drm/xe/uapi: Align on a common way to return arrays (memory regions) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The uAPI provides queries which return arrays of elements. As of now the format used in the struct is different depending on which element is queried. Fix this for memory regions by applying the pattern below: struct drm_xe_query_Xs { __u32 num_Xs; struct drm_xe_X Xs[]; ... } This removes "query" in the name of struct drm_xe_query_mem_region as it is not returned from the query IOCTL. There is no functional change. v2: Only rename drm_xe_query_mem_region to drm_xe_mem_region (José Roberto de Souza) v3: Rename usage to mem_regions in xe_query.c (José Roberto de Souza) Signed-off-by: Francois Dugast Reviewed-by: Rodrigo Vivi Reviewed-by: José Roberto de Souza Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_query.c | 46 ++++++++++++++++++++++--------------------- include/uapi/drm/xe_drm.h | 12 +++++------ 2 files changed, 30 insertions(+), 28 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 0cbfeaeb1330..34474f8b97f6 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -240,14 +240,14 @@ static size_t calc_mem_regions_size(struct xe_device *xe) if (ttm_manager_type(&xe->ttm, i)) num_managers++; - return offsetof(struct drm_xe_query_mem_regions, regions[num_managers]); + return offsetof(struct drm_xe_query_mem_regions, mem_regions[num_managers]); } static int query_mem_regions(struct xe_device *xe, - struct drm_xe_device_query *query) + struct drm_xe_device_query *query) { size_t size = calc_mem_regions_size(xe); - struct drm_xe_query_mem_regions *usage; + struct drm_xe_query_mem_regions *mem_regions; struct drm_xe_query_mem_regions __user *query_ptr = u64_to_user_ptr(query->data); struct ttm_resource_manager *man; @@ -260,50 +260,52 @@ static int query_mem_regions(struct xe_device *xe, return -EINVAL; } - usage = kzalloc(size, GFP_KERNEL); - if (XE_IOCTL_DBG(xe, !usage)) + mem_regions = kzalloc(size, GFP_KERNEL); + if (XE_IOCTL_DBG(xe, !mem_regions)) return -ENOMEM; man = ttm_manager_type(&xe->ttm, XE_PL_TT); - usage->regions[0].mem_class = DRM_XE_MEM_REGION_CLASS_SYSMEM; - usage->regions[0].instance = 0; - usage->regions[0].min_page_size = PAGE_SIZE; - usage->regions[0].total_size = man->size << PAGE_SHIFT; + mem_regions->mem_regions[0].mem_class = DRM_XE_MEM_REGION_CLASS_SYSMEM; + mem_regions->mem_regions[0].instance = 0; + mem_regions->mem_regions[0].min_page_size = PAGE_SIZE; + mem_regions->mem_regions[0].total_size = man->size << PAGE_SHIFT; if (perfmon_capable()) - usage->regions[0].used = ttm_resource_manager_usage(man); - usage->num_regions = 1; + mem_regions->mem_regions[0].used = ttm_resource_manager_usage(man); + mem_regions->num_mem_regions = 1; for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) { man = ttm_manager_type(&xe->ttm, i); if (man) { - usage->regions[usage->num_regions].mem_class = + mem_regions->mem_regions[mem_regions->num_mem_regions].mem_class = DRM_XE_MEM_REGION_CLASS_VRAM; - usage->regions[usage->num_regions].instance = - usage->num_regions; - usage->regions[usage->num_regions].min_page_size = + mem_regions->mem_regions[mem_regions->num_mem_regions].instance = + mem_regions->num_mem_regions; + mem_regions->mem_regions[mem_regions->num_mem_regions].min_page_size = xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : PAGE_SIZE; - usage->regions[usage->num_regions].total_size = + mem_regions->mem_regions[mem_regions->num_mem_regions].total_size = man->size; if (perfmon_capable()) { xe_ttm_vram_get_used(man, - &usage->regions[usage->num_regions].used, - &usage->regions[usage->num_regions].cpu_visible_used); + &mem_regions->mem_regions + [mem_regions->num_mem_regions].used, + &mem_regions->mem_regions + [mem_regions->num_mem_regions].cpu_visible_used); } - usage->regions[usage->num_regions].cpu_visible_size = + mem_regions->mem_regions[mem_regions->num_mem_regions].cpu_visible_size = xe_ttm_vram_get_cpu_visible_size(man); - usage->num_regions++; + mem_regions->num_mem_regions++; } } - if (!copy_to_user(query_ptr, usage, size)) + if (!copy_to_user(query_ptr, mem_regions, size)) ret = 0; else ret = -ENOSPC; - kfree(usage); + kfree(mem_regions); return ret; } diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index d7918f6e760f..863963168dc3 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -183,10 +183,10 @@ enum drm_xe_memory_class { }; /** - * struct drm_xe_query_mem_region - Describes some region as known to + * struct drm_xe_mem_region - Describes some region as known to * the driver. */ -struct drm_xe_query_mem_region { +struct drm_xe_mem_region { /** * @mem_class: The memory class describing this region. * @@ -323,12 +323,12 @@ struct drm_xe_query_engine_cycles { * struct drm_xe_query_mem_regions in .data. */ struct drm_xe_query_mem_regions { - /** @num_regions: number of memory regions returned in @regions */ - __u32 num_regions; + /** @num_mem_regions: number of memory regions returned in @mem_regions */ + __u32 num_mem_regions; /** @pad: MBZ */ __u32 pad; - /** @regions: The returned regions for this device */ - struct drm_xe_query_mem_region regions[]; + /** @mem_regions: The returned memory regions for this device */ + struct drm_xe_mem_region mem_regions[]; }; /** -- cgit v1.2.3 From 71c625aa770d4bd2b0901a9da3820fb89636e1a1 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Wed, 22 Nov 2023 14:38:25 +0000 Subject: drm/xe/uapi: Align on a common way to return arrays (gt) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The uAPI provides queries which return arrays of elements. As of now the format used in the struct is different depending on which element is queried. However, aligning on the new common pattern: struct drm_xe_query_Xs { __u32 num_Xs; struct drm_xe_X Xs[]; ... } ... would mean bringing back the name "gts" which is avoided per commit fca54ba12470 ("drm/xe/uapi: Rename gts to gt_list") so make an exception for gt and leave gt_list. Also, this change removes "query" in the name of struct drm_xe_query_gt as it is not returned from the query IOCTL. There is no functional change. v2: Leave gt_list (Matt Roper) Signed-off-by: Francois Dugast Reviewed-by: Rodrigo Vivi Reviewed-by: Matt Roper Reviewed-by: José Roberto de Souza Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_query.c | 2 +- include/uapi/drm/xe_drm.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 34474f8b97f6..a0e3b0c163f9 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -354,7 +354,7 @@ static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query { struct xe_gt *gt; size_t size = sizeof(struct drm_xe_query_gt_list) + - xe->info.gt_count * sizeof(struct drm_xe_query_gt); + xe->info.gt_count * sizeof(struct drm_xe_gt); struct drm_xe_query_gt_list __user *query_ptr = u64_to_user_ptr(query->data); struct drm_xe_query_gt_list *gt_list; diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 863963168dc3..a8ae845d0c74 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -357,14 +357,14 @@ struct drm_xe_query_config { }; /** - * struct drm_xe_query_gt - describe an individual GT. + * struct drm_xe_gt - describe an individual GT. * * To be used with drm_xe_query_gt_list, which will return a list with all the * existing GT individual descriptions. * Graphics Technology (GT) is a subset of a GPU/tile that is responsible for * implementing graphics and/or media operations. */ -struct drm_xe_query_gt { +struct drm_xe_gt { #define DRM_XE_QUERY_GT_TYPE_MAIN 0 #define DRM_XE_QUERY_GT_TYPE_MEDIA 1 /** @type: GT type: Main or Media */ @@ -404,7 +404,7 @@ struct drm_xe_query_gt_list { /** @pad: MBZ */ __u32 pad; /** @gt_list: The GT list returned for this device */ - struct drm_xe_query_gt gt_list[]; + struct drm_xe_gt gt_list[]; }; /** -- cgit v1.2.3 From 60a6a849fcb338b8a3f3d1ec9ec50c002add925a Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Wed, 22 Nov 2023 14:38:26 +0000 Subject: drm/xe/uapi: Align on a common way to return arrays (engines) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The uAPI provides queries which return arrays of elements. As of now the format used in the struct is different depending on which element is queried. Fix this for engines by applying the pattern below: struct drm_xe_query_Xs { __u32 num_Xs; struct drm_xe_X Xs[]; ... } Instead of directly returning an array of struct drm_xe_query_engine_info, a new struct drm_xe_query_engines is introduced. It contains itself an array of struct drm_xe_engine which holds the information about each engine. v2: Use plural for struct drm_xe_query_engines as multiple engines are returned (José Roberto de Souza) Signed-off-by: Francois Dugast Reviewed-by: Rodrigo Vivi Reviewed-by: José Roberto de Souza Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_query.c | 31 +++++++++-------- include/uapi/drm/xe_drm.h | 78 ++++++++++++++++++++++++++----------------- 2 files changed, 65 insertions(+), 44 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index a0e3b0c163f9..ad9f23e43920 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -53,7 +53,8 @@ static size_t calc_hw_engine_info_size(struct xe_device *xe) i++; } - return i * sizeof(struct drm_xe_query_engine_info); + return sizeof(struct drm_xe_query_engines) + + i * sizeof(struct drm_xe_engine); } typedef u64 (*__ktime_func_t)(void); @@ -186,9 +187,9 @@ static int query_engines(struct xe_device *xe, struct drm_xe_device_query *query) { size_t size = calc_hw_engine_info_size(xe); - struct drm_xe_query_engine_info __user *query_ptr = + struct drm_xe_query_engines __user *query_ptr = u64_to_user_ptr(query->data); - struct drm_xe_query_engine_info *hw_engine_info; + struct drm_xe_query_engines *engines; struct xe_hw_engine *hwe; enum xe_hw_engine_id id; struct xe_gt *gt; @@ -202,8 +203,8 @@ static int query_engines(struct xe_device *xe, return -EINVAL; } - hw_engine_info = kmalloc(size, GFP_KERNEL); - if (!hw_engine_info) + engines = kmalloc(size, GFP_KERNEL); + if (!engines) return -ENOMEM; for_each_gt(gt, xe, gt_id) @@ -211,22 +212,26 @@ static int query_engines(struct xe_device *xe, if (xe_hw_engine_is_reserved(hwe)) continue; - hw_engine_info[i].instance.engine_class = + engines->engines[i].instance.engine_class = xe_to_user_engine_class[hwe->class]; - hw_engine_info[i].instance.engine_instance = + engines->engines[i].instance.engine_instance = hwe->logical_instance; - hw_engine_info[i].instance.gt_id = gt->info.id; - hw_engine_info[i].instance.pad = 0; - memset(hw_engine_info->reserved, 0, sizeof(hw_engine_info->reserved)); + engines->engines[i].instance.gt_id = gt->info.id; + engines->engines[i].instance.pad = 0; + memset(engines->engines[i].reserved, 0, + sizeof(engines->engines[i].reserved)); i++; } - if (copy_to_user(query_ptr, hw_engine_info, size)) { - kfree(hw_engine_info); + engines->pad = 0; + engines->num_engines = i; + + if (copy_to_user(query_ptr, engines, size)) { + kfree(engines); return -EFAULT; } - kfree(hw_engine_info); + kfree(engines); return 0; } diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index a8ae845d0c74..2e58ddcf92f5 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -127,9 +127,9 @@ struct xe_user_extension { /** * struct drm_xe_engine_class_instance - instance of an engine class * - * It is returned as part of the @drm_xe_query_engine_info, but it also is - * used as the input of engine selection for both @drm_xe_exec_queue_create - * and @drm_xe_query_engine_cycles + * It is returned as part of the @drm_xe_engine, but it also is used as + * the input of engine selection for both @drm_xe_exec_queue_create and + * @drm_xe_query_engine_cycles */ struct drm_xe_engine_class_instance { #define DRM_XE_ENGINE_CLASS_RENDER 0 @@ -154,13 +154,9 @@ struct drm_xe_engine_class_instance { }; /** - * struct drm_xe_query_engine_info - describe hardware engine - * - * If a query is made with a struct @drm_xe_device_query where .query - * is equal to %DRM_XE_DEVICE_QUERY_ENGINES, then the reply uses an array of - * struct @drm_xe_query_engine_info in .data. + * struct drm_xe_engine - describe hardware engine */ -struct drm_xe_query_engine_info { +struct drm_xe_engine { /** @instance: The @drm_xe_engine_class_instance */ struct drm_xe_engine_class_instance instance; @@ -168,6 +164,22 @@ struct drm_xe_query_engine_info { __u64 reserved[3]; }; +/** + * struct drm_xe_query_engines - describe engines + * + * If a query is made with a struct @drm_xe_device_query where .query + * is equal to %DRM_XE_DEVICE_QUERY_ENGINES, then the reply uses an array of + * struct @drm_xe_query_engines in .data. + */ +struct drm_xe_query_engines { + /** @num_engines: number of engines returned in @engines */ + __u32 num_engines; + /** @pad: MBZ */ + __u32 pad; + /** @engines: The returned engines for this device */ + struct drm_xe_engine engines[]; +}; + /** * enum drm_xe_memory_class - Supported memory classes. */ @@ -467,28 +479,32 @@ struct drm_xe_query_topology_mask { * * .. code-block:: C * - * struct drm_xe_engine_class_instance *hwe; - * struct drm_xe_device_query query = { - * .extensions = 0, - * .query = DRM_XE_DEVICE_QUERY_ENGINES, - * .size = 0, - * .data = 0, - * }; - * ioctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query); - * hwe = malloc(query.size); - * query.data = (uintptr_t)hwe; - * ioctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query); - * int num_engines = query.size / sizeof(*hwe); - * for (int i = 0; i < num_engines; i++) { - * printf("Engine %d: %s\n", i, - * hwe[i].engine_class == DRM_XE_ENGINE_CLASS_RENDER ? "RENDER": - * hwe[i].engine_class == DRM_XE_ENGINE_CLASS_COPY ? "COPY": - * hwe[i].engine_class == DRM_XE_ENGINE_CLASS_VIDEO_DECODE ? "VIDEO_DECODE": - * hwe[i].engine_class == DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE ? "VIDEO_ENHANCE": - * hwe[i].engine_class == DRM_XE_ENGINE_CLASS_COMPUTE ? "COMPUTE": - * "UNKNOWN"); - * } - * free(hwe); + * struct drm_xe_query_engines *engines; + * struct drm_xe_device_query query = { + * .extensions = 0, + * .query = DRM_XE_DEVICE_QUERY_ENGINES, + * .size = 0, + * .data = 0, + * }; + * ioctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query); + * engines = malloc(query.size); + * query.data = (uintptr_t)engines; + * ioctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query); + * for (int i = 0; i < engines->num_engines; i++) { + * printf("Engine %d: %s\n", i, + * engines->engines[i].instance.engine_class == + * DRM_XE_ENGINE_CLASS_RENDER ? "RENDER": + * engines->engines[i].instance.engine_class == + * DRM_XE_ENGINE_CLASS_COPY ? "COPY": + * engines->engines[i].instance.engine_class == + * DRM_XE_ENGINE_CLASS_VIDEO_DECODE ? "VIDEO_DECODE": + * engines->engines[i].instance.engine_class == + * DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE ? "VIDEO_ENHANCE": + * engines->engines[i].instance.engine_class == + * DRM_XE_ENGINE_CLASS_COMPUTE ? "COMPUTE": + * "UNKNOWN"); + * } + * free(engines); */ struct drm_xe_device_query { /** @extensions: Pointer to the first extension struct, if any */ -- cgit v1.2.3 From 37d078e51b4cba30f90667a2b35e16725d649956 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 22 Nov 2023 14:38:27 +0000 Subject: drm/xe/uapi: Split xe_sync types from flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's continue on the uapi clean-up with more splits with stuff into their own exclusive fields instead of reusing stuff. Signed-off-by: Rodrigo Vivi Signed-off-by: Francois Dugast Reviewed-by: Matthew Brost Reviewed-by: José Roberto de Souza --- drivers/gpu/drm/xe/xe_sync.c | 23 +++++++---------------- drivers/gpu/drm/xe/xe_sync_types.h | 1 + include/uapi/drm/xe_drm.h | 16 ++++++++-------- 3 files changed, 16 insertions(+), 24 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index ea96ba4b41da..936227e79483 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -17,8 +17,6 @@ #include "xe_macros.h" #include "xe_sched_job_types.h" -#define SYNC_FLAGS_TYPE_MASK 0x3 - struct user_fence { struct xe_device *xe; struct kref refcount; @@ -109,15 +107,13 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, if (copy_from_user(&sync_in, sync_user, sizeof(*sync_user))) return -EFAULT; - if (XE_IOCTL_DBG(xe, sync_in.flags & - ~(SYNC_FLAGS_TYPE_MASK | DRM_XE_SYNC_FLAG_SIGNAL)) || - XE_IOCTL_DBG(xe, sync_in.pad) || + if (XE_IOCTL_DBG(xe, sync_in.flags & ~DRM_XE_SYNC_FLAG_SIGNAL) || XE_IOCTL_DBG(xe, sync_in.reserved[0] || sync_in.reserved[1])) return -EINVAL; signal = sync_in.flags & DRM_XE_SYNC_FLAG_SIGNAL; - switch (sync_in.flags & SYNC_FLAGS_TYPE_MASK) { - case DRM_XE_SYNC_FLAG_SYNCOBJ: + switch (sync_in.type) { + case DRM_XE_SYNC_TYPE_SYNCOBJ: if (XE_IOCTL_DBG(xe, in_lr_mode && signal)) return -EOPNOTSUPP; @@ -135,7 +131,7 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, } break; - case DRM_XE_SYNC_FLAG_TIMELINE_SYNCOBJ: + case DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ: if (XE_IOCTL_DBG(xe, in_lr_mode && signal)) return -EOPNOTSUPP; @@ -165,12 +161,7 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, } break; - case DRM_XE_SYNC_FLAG_DMA_BUF: - if (XE_IOCTL_DBG(xe, "TODO")) - return -EINVAL; - break; - - case DRM_XE_SYNC_FLAG_USER_FENCE: + case DRM_XE_SYNC_TYPE_USER_FENCE: if (XE_IOCTL_DBG(xe, !signal)) return -EOPNOTSUPP; @@ -192,6 +183,7 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, return -EINVAL; } + sync->type = sync_in.type; sync->flags = sync_in.flags; sync->timeline_value = sync_in.timeline_value; @@ -252,8 +244,7 @@ void xe_sync_entry_signal(struct xe_sync_entry *sync, struct xe_sched_job *job, user_fence_put(sync->ufence); dma_fence_put(fence); } - } else if ((sync->flags & SYNC_FLAGS_TYPE_MASK) == - DRM_XE_SYNC_FLAG_USER_FENCE) { + } else if (sync->type == DRM_XE_SYNC_TYPE_USER_FENCE) { job->user_fence.used = true; job->user_fence.addr = sync->addr; job->user_fence.value = sync->timeline_value; diff --git a/drivers/gpu/drm/xe/xe_sync_types.h b/drivers/gpu/drm/xe/xe_sync_types.h index 24fccc26cb53..852db5e7884f 100644 --- a/drivers/gpu/drm/xe/xe_sync_types.h +++ b/drivers/gpu/drm/xe/xe_sync_types.h @@ -21,6 +21,7 @@ struct xe_sync_entry { struct user_fence *ufence; u64 addr; u64 timeline_value; + u32 type; u32 flags; }; diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 2e58ddcf92f5..978fca7bb235 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -947,16 +947,16 @@ struct drm_xe_sync { /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; -#define DRM_XE_SYNC_FLAG_SYNCOBJ 0x0 -#define DRM_XE_SYNC_FLAG_TIMELINE_SYNCOBJ 0x1 -#define DRM_XE_SYNC_FLAG_DMA_BUF 0x2 -#define DRM_XE_SYNC_FLAG_USER_FENCE 0x3 -#define DRM_XE_SYNC_FLAG_SIGNAL 0x10 +#define DRM_XE_SYNC_TYPE_SYNCOBJ 0x0 +#define DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ 0x1 +#define DRM_XE_SYNC_TYPE_USER_FENCE 0x2 + /** @type: Type of the this sync object */ + __u32 type; + +#define DRM_XE_SYNC_FLAG_SIGNAL (1 << 0) + /** @flags: Sync Flags */ __u32 flags; - /** @pad: MBZ */ - __u32 pad; - union { __u32 handle; -- cgit v1.2.3 From cad4a0d6af146e14a82a0f7d43613450dc56ff80 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 22 Nov 2023 14:38:28 +0000 Subject: drm/xe/uapi: Kill tile_mask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is currently unused, so by the rules it cannot go upstream. Also there was the desire to convert that to align with the engine_class_instance selection, but the consensus on that one is to remain with the global gt_id. So we are keeping the gt_id there, not converting to a generic sched_group and also killing this tile_mask and only using the default behavior of 0 that is to create a mapping / page_table entry on every tile, similar to what i915. Signed-off-by: Rodrigo Vivi Signed-off-by: Francois Dugast Reviewed-by: Matthew Brost Reviewed-by: José Roberto de Souza --- drivers/gpu/drm/xe/xe_vm.c | 40 +++++++++------------------------------- drivers/gpu/drm/xe/xe_vm_types.h | 2 -- include/uapi/drm/xe_drm.h | 8 +------- 3 files changed, 10 insertions(+), 40 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index a97a310123fc..ff22eddc2578 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -870,7 +870,6 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, u64 start, u64 end, bool read_only, bool is_null, - u8 tile_mask, u16 pat_index) { struct xe_vma *vma; @@ -903,12 +902,8 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, if (is_null) vma->gpuva.flags |= DRM_GPUVA_SPARSE; - if (tile_mask) { - vma->tile_mask = tile_mask; - } else { - for_each_tile(tile, vm->xe, id) - vma->tile_mask |= 0x1 << id; - } + for_each_tile(tile, vm->xe, id) + vma->tile_mask |= 0x1 << id; if (GRAPHICS_VER(vm->xe) >= 20 || vm->xe->info.platform == XE_PVC) vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT; @@ -2166,7 +2161,7 @@ static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) static struct drm_gpuva_ops * vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, u64 bo_offset_or_userptr, u64 addr, u64 range, - u32 operation, u32 flags, u8 tile_mask, + u32 operation, u32 flags, u32 prefetch_region, u16 pat_index) { struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL; @@ -2229,7 +2224,6 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, drm_gpuva_for_each_op(__op, ops) { struct xe_vma_op *op = gpuva_op_to_vma_op(__op); - op->tile_mask = tile_mask; if (__op->op == DRM_GPUVA_OP_MAP) { op->map.immediate = flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE; @@ -2248,8 +2242,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, } static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, - u8 tile_mask, bool read_only, bool is_null, - u16 pat_index) + bool read_only, bool is_null, u16 pat_index) { struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL; struct xe_vma *vma; @@ -2265,7 +2258,7 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, vma = xe_vma_create(vm, bo, op->gem.offset, op->va.addr, op->va.addr + op->va.range - 1, read_only, is_null, - tile_mask, pat_index); + pat_index); if (bo) xe_bo_unlock(bo); @@ -2409,8 +2402,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, { struct xe_vma *vma; - vma = new_vma(vm, &op->base.map, - op->tile_mask, op->map.read_only, + vma = new_vma(vm, &op->base.map, op->map.read_only, op->map.is_null, op->map.pat_index); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -2435,8 +2427,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, op->base.remap.unmap->va->flags & DRM_GPUVA_SPARSE; - vma = new_vma(vm, op->base.remap.prev, - op->tile_mask, read_only, + vma = new_vma(vm, op->base.remap.prev, read_only, is_null, old->pat_index); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -2469,8 +2460,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, op->base.remap.unmap->va->flags & DRM_GPUVA_SPARSE; - vma = new_vma(vm, op->base.remap.next, - op->tile_mask, read_only, + vma = new_vma(vm, op->base.remap.next, read_only, is_null, old->pat_index); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -3024,16 +3014,6 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) err = -EINVAL; goto release_vm_lock; } - - if (bind_ops[i].tile_mask) { - u64 valid_tiles = BIT(xe->info.tile_count) - 1; - - if (XE_IOCTL_DBG(xe, bind_ops[i].tile_mask & - ~valid_tiles)) { - err = -EINVAL; - goto release_vm_lock; - } - } } bos = kzalloc(sizeof(*bos) * args->num_binds, GFP_KERNEL); @@ -3126,14 +3106,12 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) u32 op = bind_ops[i].op; u32 flags = bind_ops[i].flags; u64 obj_offset = bind_ops[i].obj_offset; - u8 tile_mask = bind_ops[i].tile_mask; u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance; u16 pat_index = bind_ops[i].pat_index; ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset, addr, range, op, flags, - tile_mask, prefetch_region, - pat_index); + prefetch_region, pat_index); if (IS_ERR(ops[i])) { err = PTR_ERR(ops[i]); ops[i] = NULL; diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 74cdf16a42ad..e70ec6b2fabe 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -400,8 +400,6 @@ struct xe_vma_op { u32 num_syncs; /** @link: async operation link */ struct list_head link; - /** @tile_mask: gt mask for this operation */ - u8 tile_mask; /** @flags: operation flags */ enum xe_vma_op_flags flags; diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 978fca7bb235..77d54926e18f 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -750,12 +750,6 @@ struct drm_xe_vm_bind_op { /** @addr: Address to operate on, MBZ for UNMAP_ALL */ __u64 addr; - /** - * @tile_mask: Mask for which tiles to create binds for, 0 == All tiles, - * only applies to creating new VMAs - */ - __u64 tile_mask; - #define DRM_XE_VM_BIND_OP_MAP 0x0 #define DRM_XE_VM_BIND_OP_UNMAP 0x1 #define DRM_XE_VM_BIND_OP_MAP_USERPTR 0x2 @@ -790,7 +784,7 @@ struct drm_xe_vm_bind_op { __u32 prefetch_mem_region_instance; /** @reserved: Reserved */ - __u64 reserved[2]; + __u64 reserved[3]; }; struct drm_xe_vm_bind { -- cgit v1.2.3 From 4016d6bf368c4894c834e0652aecd93f7d2a2fab Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 22 Nov 2023 14:38:29 +0000 Subject: drm/xe/uapi: Crystal Reference Clock updates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First of all, let's remove the duplication. But also, let's rename it to remove the word 'frequency' out of it. In general, the first thing people think of frequency is the frequency in which the GTs are operating to execute the GPU instructions. While this frequency here is a crystal reference clock frequency which is the base of everything else, and in this case of this uAPI it is used to calculate a better and precise timestamp. v2: (Suggested by Jose) Remove the engine_cs and keep the GT info one since it might be useful for other SRIOV cases where the engine_cs will be zeroed. So, grabbing from the GT_LIST should be cleaner. v3: Keep comment on put_user() call (José Roberto de Souza) Cc: Matt Roper Umesh Nerlige Ramappa Cc: Jose Souza Signed-off-by: Rodrigo Vivi Signed-off-by: Francois Dugast Reviewed-by: José Roberto de Souza --- drivers/gpu/drm/xe/xe_gt_clock.c | 4 ++-- drivers/gpu/drm/xe/xe_gt_types.h | 4 ++-- drivers/gpu/drm/xe/xe_query.c | 7 +------ include/uapi/drm/xe_drm.h | 11 ++++------- 4 files changed, 9 insertions(+), 17 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c index 25a18eaad9c4..937054e31d72 100644 --- a/drivers/gpu/drm/xe/xe_gt_clock.c +++ b/drivers/gpu/drm/xe/xe_gt_clock.c @@ -75,11 +75,11 @@ int xe_gt_clock_init(struct xe_gt *gt) freq >>= 3 - REG_FIELD_GET(RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, c0); } - gt->info.clock_freq = freq; + gt->info.reference_clock = freq; return 0; } u64 xe_gt_clock_cycles_to_ns(const struct xe_gt *gt, u64 count) { - return DIV_ROUND_CLOSEST_ULL(count * NSEC_PER_SEC, gt->info.clock_freq); + return DIV_ROUND_CLOSEST_ULL(count * NSEC_PER_SEC, gt->info.reference_clock); } diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index a96ee7d028aa..a7263738308e 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -107,8 +107,8 @@ struct xe_gt { enum xe_gt_type type; /** @id: Unique ID of this GT within the PCI Device */ u8 id; - /** @clock_freq: clock frequency */ - u32 clock_freq; + /** @reference_clock: clock frequency */ + u32 reference_clock; /** @engine_mask: mask of engines present on GT */ u64 engine_mask; /** diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index ad9f23e43920..3316eab118b1 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -147,8 +147,6 @@ query_engine_cycles(struct xe_device *xe, if (!hwe) return -EINVAL; - resp.engine_frequency = gt->info.clock_freq; - xe_device_mem_access_get(xe); xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); @@ -165,9 +163,6 @@ query_engine_cycles(struct xe_device *xe, resp.width = 36; /* Only write to the output fields of user query */ - if (put_user(resp.engine_frequency, &query_ptr->engine_frequency)) - return -EFAULT; - if (put_user(resp.cpu_timestamp, &query_ptr->cpu_timestamp)) return -EFAULT; @@ -383,7 +378,7 @@ static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query else gt_list->gt_list[id].type = DRM_XE_QUERY_GT_TYPE_MAIN; gt_list->gt_list[id].gt_id = gt->info.id; - gt_list->gt_list[id].clock_freq = gt->info.clock_freq; + gt_list->gt_list[id].reference_clock = gt->info.reference_clock; if (!IS_DGFX(xe)) gt_list->gt_list[id].near_mem_regions = 0x1; else diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 77d54926e18f..df3e6fcf9b8b 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -281,8 +281,8 @@ struct drm_xe_mem_region { * in .data. struct drm_xe_query_engine_cycles is allocated by the user and * .data points to this allocated structure. * - * The query returns the engine cycles and the frequency that can - * be used to calculate the engine timestamp. In addition the + * The query returns the engine cycles, which along with GT's @reference_clock, + * can be used to calculate the engine timestamp. In addition the * query returns a set of cpu timestamps that indicate when the command * streamer cycle count was captured. */ @@ -310,9 +310,6 @@ struct drm_xe_query_engine_cycles { */ __u64 engine_cycles; - /** @engine_frequency: Frequency of the engine cycles in Hz. */ - __u64 engine_frequency; - /** * @cpu_timestamp: CPU timestamp in ns. The timestamp is captured before * reading the engine_cycles register using the reference clockid set by the @@ -383,8 +380,8 @@ struct drm_xe_gt { __u16 type; /** @gt_id: Unique ID of this GT within the PCI Device */ __u16 gt_id; - /** @clock_freq: A clock frequency for timestamp */ - __u32 clock_freq; + /** @reference_clock: A clock frequency for timestamp */ + __u32 reference_clock; /** * @near_mem_regions: Bit mask of instances from * drm_xe_query_mem_regions that are nearest to the current engines -- cgit v1.2.3 From c3fca1077b9a19e679ec59ff2d2c5f4069e375ae Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 22 Nov 2023 14:38:31 +0000 Subject: drm/xe/uapi: Add Tile ID information to the GT info query MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As an information only. So Userspace can use this information and be able to correlate different GTs. Make API symmetric between Engine and GT info. There's no need right now to include a tile_query entry since there's no other information that we need from tile that is not already exposed through different queries. However, this could be added later if we have different Tile information that could matter to userspace. But let's keep the API ready for a direct reference to Tile ID based on the GT entry. Signed-off-by: Francois Dugast Signed-off-by: Rodrigo Vivi Reviewed-by: José Roberto de Souza --- drivers/gpu/drm/xe/xe_query.c | 1 + include/uapi/drm/xe_drm.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 3316eab118b1..4461dd1c9e40 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -377,6 +377,7 @@ static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query gt_list->gt_list[id].type = DRM_XE_QUERY_GT_TYPE_MEDIA; else gt_list->gt_list[id].type = DRM_XE_QUERY_GT_TYPE_MAIN; + gt_list->gt_list[id].tile_id = gt_to_tile(gt)->id; gt_list->gt_list[id].gt_id = gt->info.id; gt_list->gt_list[id].reference_clock = gt->info.reference_clock; if (!IS_DGFX(xe)) diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index df3e6fcf9b8b..584fe08e775c 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -378,6 +378,8 @@ struct drm_xe_gt { #define DRM_XE_QUERY_GT_TYPE_MEDIA 1 /** @type: GT type: Main or Media */ __u16 type; + /** @tile_id: Tile ID where this GT lives (Information only) */ + __u16 tile_id; /** @gt_id: Unique ID of this GT within the PCI Device */ __u16 gt_id; /** @reference_clock: A clock frequency for timestamp */ -- cgit v1.2.3 From 7a56bd0cfbeafab33030c782c40b009e39c4bbc0 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 22 Nov 2023 14:38:32 +0000 Subject: drm/xe/uapi: Fix various struct padding for 64b alignment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's respect Documentation/process/botching-up-ioctls.rst and add the proper padding for a 64b alignment with all as well as all the required checks and settings for the pads and the reserved entries. v2: Fix remaining holes and double check with pahole (Jose) Ensure with pahole that both 32b and 64b have exact same layout (Thomas) Do not set query's pad and reserved bits to zero since it is redundant and already done by kzalloc (Matt) v3: Fix alignment after rebase (José Roberto de Souza) v4: Fix pad check (Francois Dugast) Cc: Thomas Hellström Cc: Francois Dugast Cc: José Roberto de Souza Cc: Matt Roper Signed-off-by: Rodrigo Vivi Signed-off-by: Francois Dugast Reviewed-by: José Roberto de Souza --- drivers/gpu/drm/xe/xe_bo.c | 3 ++- drivers/gpu/drm/xe/xe_query.c | 1 + drivers/gpu/drm/xe/xe_vm.c | 8 ++++++++ include/uapi/drm/xe_drm.h | 21 ++++++++++++--------- 4 files changed, 23 insertions(+), 10 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 0bd1b3581945..9cc78986dbd3 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1894,7 +1894,8 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, u32 handle; int err; - if (XE_IOCTL_DBG(xe, args->extensions) || XE_IOCTL_DBG(xe, args->pad) || + if (XE_IOCTL_DBG(xe, args->extensions) || + XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) || XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) return -EINVAL; diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 4461dd1c9e40..56d61bf596b2 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -372,6 +372,7 @@ static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query return -ENOMEM; gt_list->num_gt = xe->info.gt_count; + for_each_gt(gt, xe, id) { if (xe_gt_is_media_type(gt)) gt_list->gt_list[id].type = DRM_XE_QUERY_GT_TYPE_MEDIA; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index ff22eddc2578..622a869fd18e 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2825,6 +2825,10 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, int err; int i; + if (XE_IOCTL_DBG(xe, args->pad || args->pad2) || + XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) + return -EINVAL; + if (XE_IOCTL_DBG(xe, args->extensions) || XE_IOCTL_DBG(xe, !args->num_binds) || XE_IOCTL_DBG(xe, args->num_binds > MAX_BINDS)) @@ -2963,6 +2967,10 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) if (err) return err; + if (XE_IOCTL_DBG(xe, args->pad || args->pad2) || + XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) + return -EINVAL; + if (args->exec_queue_id) { q = xe_exec_queue_lookup(xef, args->exec_queue_id); if (XE_IOCTL_DBG(xe, !q)) { diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 584fe08e775c..512c39ea5d50 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -212,8 +212,6 @@ struct drm_xe_mem_region { * a unique pair. */ __u16 instance; - /** @pad: MBZ */ - __u32 pad; /** * @min_page_size: Min page-size in bytes for this region. * @@ -382,6 +380,8 @@ struct drm_xe_gt { __u16 tile_id; /** @gt_id: Unique ID of this GT within the PCI Device */ __u16 gt_id; + /** @pad: MBZ */ + __u16 pad[3]; /** @reference_clock: A clock frequency for timestamp */ __u32 reference_clock; /** @@ -601,7 +601,7 @@ struct drm_xe_gem_create { #define DRM_XE_GEM_CPU_CACHING_WC 2 __u16 cpu_caching; /** @pad: MBZ */ - __u16 pad; + __u16 pad[3]; /** @reserved: Reserved */ __u64 reserved[2]; @@ -782,6 +782,9 @@ struct drm_xe_vm_bind_op { */ __u32 prefetch_mem_region_instance; + /** @pad: MBZ */ + __u32 pad2; + /** @reserved: Reserved */ __u64 reserved[3]; }; @@ -800,12 +803,12 @@ struct drm_xe_vm_bind { */ __u32 exec_queue_id; - /** @num_binds: number of binds in this IOCTL */ - __u32 num_binds; - /** @pad: MBZ */ __u32 pad; + /** @num_binds: number of binds in this IOCTL */ + __u32 num_binds; + union { /** @bind: used if num_binds == 1 */ struct drm_xe_vm_bind_op bind; @@ -817,12 +820,12 @@ struct drm_xe_vm_bind { __u64 vector_of_binds; }; + /** @pad: MBZ */ + __u32 pad2; + /** @num_syncs: amount of syncs to wait on */ __u32 num_syncs; - /** @pad2: MBZ */ - __u32 pad2; - /** @syncs: pointer to struct drm_xe_sync array */ __u64 syncs; -- cgit v1.2.3 From 9329f0667215a5c22d650f870f8a9f5839a5bc5a Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Mon, 27 Nov 2023 16:03:30 +0100 Subject: drm/xe/uapi: Use LR abbrev for long-running vms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently we're using "compute mode" for long running VMs using preempt-fences for memory management, and "fault mode" for long running VMs using page faults. Change this to use the terminology "long-running" abbreviated as LR for long-running VMs. These VMs can then either be in preempt-fence mode or fault mode. The user can force fault mode at creation time, but otherwise the driver can choose to use fault- or preempt-fence mode for long-running vms depending on the device capabilities. Initially unless fault-mode is specified, the driver uses preempt-fence mode. v2: - Fix commit message wording and the documentation around CREATE_FLAG_LR_MODE and CREATE_FLAG_FAULT_MODE Cc: Matthew Brost Cc: Rodrigo Vivi Cc: Francois Dugast Signed-off-by: Thomas Hellström Signed-off-by: Francois Dugast Reviewed-by: José Roberto de Souza Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 8 ++++---- include/uapi/drm/xe_drm.h | 23 ++++++++++++++++++++++- 2 files changed, 26 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 622a869fd18e..f71285e8ef10 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1921,7 +1921,7 @@ static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma, } #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \ - DRM_XE_VM_CREATE_FLAG_COMPUTE_MODE | \ + DRM_XE_VM_CREATE_FLAG_LR_MODE | \ DRM_XE_VM_CREATE_FLAG_ASYNC_DEFAULT | \ DRM_XE_VM_CREATE_FLAG_FAULT_MODE) @@ -1957,7 +1957,7 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) return -EINVAL; - if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_COMPUTE_MODE && + if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) && args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) return -EINVAL; @@ -1974,12 +1974,12 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE) flags |= XE_VM_FLAG_SCRATCH_PAGE; - if (args->flags & DRM_XE_VM_CREATE_FLAG_COMPUTE_MODE) + if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) flags |= XE_VM_FLAG_LR_MODE; if (args->flags & DRM_XE_VM_CREATE_FLAG_ASYNC_DEFAULT) flags |= XE_VM_FLAG_ASYNC_DEFAULT; if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) - flags |= XE_VM_FLAG_LR_MODE | XE_VM_FLAG_FAULT_MODE; + flags |= XE_VM_FLAG_FAULT_MODE; vm = xe_vm_create(xe, flags); if (IS_ERR(vm)) diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 1be67d6bfd95..28230a0cd1ba 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -648,8 +648,29 @@ struct drm_xe_vm_create { __u64 extensions; #define DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE (1 << 0) -#define DRM_XE_VM_CREATE_FLAG_COMPUTE_MODE (1 << 1) + /* + * An LR, or Long Running VM accepts exec submissions + * to its exec_queues that don't have an upper time limit on + * the job execution time. But exec submissions to these + * don't allow any of the flags DRM_XE_SYNC_FLAG_SYNCOBJ, + * DRM_XE_SYNC_FLAG_TIMELINE_SYNCOBJ, DRM_XE_SYNC_FLAG_DMA_BUF, + * used as out-syncobjs, that is, together with DRM_XE_SYNC_FLAG_SIGNAL. + * LR VMs can be created in recoverable page-fault mode using + * DRM_XE_VM_CREATE_FLAG_FAULT_MODE, if the device supports it. + * If that flag is omitted, the UMD can not rely on the slightly + * different per-VM overcommit semantics that are enabled by + * DRM_XE_VM_CREATE_FLAG_FAULT_MODE (see below), but KMD may + * still enable recoverable pagefaults if supported by the device. + */ +#define DRM_XE_VM_CREATE_FLAG_LR_MODE (1 << 1) #define DRM_XE_VM_CREATE_FLAG_ASYNC_DEFAULT (1 << 2) + /* + * DRM_XE_VM_CREATE_FLAG_FAULT_MODE requires also + * DRM_XE_VM_CREATE_FLAG_LR_MODE. It allows memory to be allocated + * on demand when accessed, and also allows per-VM overcommit of memory. + * The xe driver internally uses recoverable pagefaults to implement + * this. + */ #define DRM_XE_VM_CREATE_FLAG_FAULT_MODE (1 << 3) /** @flags: Flags */ __u32 flags; -- cgit v1.2.3 From 0f1d88f2786458a8986920669bd8fb3fec6e618d Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 29 Nov 2023 11:41:15 -0500 Subject: drm/xe/uapi: Kill exec_queue_set_property MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All the properties should be immutable and set upon exec_queue creation using the existent extension. So, let's kill this useless and dangerous uapi. Cc: Francois Dugast Cc: José Roberto de Souza Cc: Matthew Brost Signed-off-by: Rodrigo Vivi Reviewed-by: José Roberto de Souza Signed-off-by: Francois Dugast --- drivers/gpu/drm/xe/xe_device.c | 2 -- drivers/gpu/drm/xe/xe_exec_queue.c | 38 ------------------------------ drivers/gpu/drm/xe/xe_exec_queue.h | 2 -- include/uapi/drm/xe_drm.h | 48 +++++++++++--------------------------- 4 files changed, 13 insertions(+), 77 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 65e9aa5e6c31..8423c817111b 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -122,8 +122,6 @@ static const struct drm_ioctl_desc xe_ioctls[] = { DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_DESTROY, xe_exec_queue_destroy_ioctl, DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_SET_PROPERTY, xe_exec_queue_set_property_ioctl, - DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_GET_PROPERTY, xe_exec_queue_get_property_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl, diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 2bab6fbd82f5..985807d6abbb 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -883,44 +883,6 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, return 0; } -int xe_exec_queue_set_property_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct xe_device *xe = to_xe_device(dev); - struct xe_file *xef = to_xe_file(file); - struct drm_xe_exec_queue_set_property *args = data; - struct xe_exec_queue *q; - int ret; - u32 idx; - - if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) - return -EINVAL; - - q = xe_exec_queue_lookup(xef, args->exec_queue_id); - if (XE_IOCTL_DBG(xe, !q)) - return -ENOENT; - - if (XE_IOCTL_DBG(xe, args->property >= - ARRAY_SIZE(exec_queue_set_property_funcs))) { - ret = -EINVAL; - goto out; - } - - idx = array_index_nospec(args->property, - ARRAY_SIZE(exec_queue_set_property_funcs)); - ret = exec_queue_set_property_funcs[idx](xe, q, args->value, false); - if (XE_IOCTL_DBG(xe, ret)) - goto out; - - if (args->extensions) - ret = exec_queue_user_extensions(xe, q, args->extensions, 0, - false); -out: - xe_exec_queue_put(q); - - return ret; -} - static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q, struct xe_vm *vm) { diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h index 533da1b0c457..d959cc4a1a82 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.h +++ b/drivers/gpu/drm/xe/xe_exec_queue.h @@ -55,8 +55,6 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, struct drm_file *file); -int xe_exec_queue_set_property_ioctl(struct drm_device *dev, void *data, - struct drm_file *file); int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data, struct drm_file *file); enum xe_exec_queue_priority xe_exec_queue_device_get_max_priority(struct xe_device *xe); diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 2ab5ee299be0..0895e4d2a981 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -105,10 +105,9 @@ struct xe_user_extension { #define DRM_XE_VM_BIND 0x05 #define DRM_XE_EXEC_QUEUE_CREATE 0x06 #define DRM_XE_EXEC_QUEUE_DESTROY 0x07 -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY 0x08 -#define DRM_XE_EXEC_QUEUE_GET_PROPERTY 0x09 -#define DRM_XE_EXEC 0x0a -#define DRM_XE_WAIT_USER_FENCE 0x0b +#define DRM_XE_EXEC_QUEUE_GET_PROPERTY 0x08 +#define DRM_XE_EXEC 0x09 +#define DRM_XE_WAIT_USER_FENCE 0x0a /* Must be kept compact -- no holes */ #define DRM_IOCTL_XE_DEVICE_QUERY DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_DEVICE_QUERY, struct drm_xe_device_query) @@ -867,38 +866,17 @@ struct drm_xe_vm_bind { /* Monitor 64MB contiguous region with 2M sub-granularity */ #define DRM_XE_ACC_GRANULARITY_64M 3 -/** - * struct drm_xe_exec_queue_set_property - exec queue set property - * - * Same namespace for extensions as drm_xe_exec_queue_create - */ -struct drm_xe_exec_queue_set_property { - /** @extensions: Pointer to the first extension struct, if any */ - __u64 extensions; - - /** @exec_queue_id: Exec queue ID */ - __u32 exec_queue_id; - -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY 0 -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE 1 -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT 2 -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PERSISTENCE 3 -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT 4 -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER 5 -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY 6 -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY 7 - /** @property: property to set */ - __u32 property; - - /** @value: property value */ - __u64 value; - - /** @reserved: Reserved */ - __u64 reserved[2]; -}; - struct drm_xe_exec_queue_create { -#define DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY 0 +#define DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY 0 +#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY 0 +#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE 1 +#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT 2 +#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PERSISTENCE 3 +#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT 4 +#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER 5 +#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY 6 +#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY 7 + /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; -- cgit v1.2.3 From c3ab84efbd05936cfac87ef6801e03534dc4b0b7 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 5 Dec 2023 07:58:20 -0800 Subject: drm/xe: Expand XE_REG_OPTION_MASKED documentation Expand documentation and add an example to make clear this isn't about generic masks in registers. Also, fix the doc regarding read operations: the mask part has no effect on them. Reviewed-by: Ashutosh Dixit Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20231205155820.2133813-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_reg_defs.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_reg_defs.h b/drivers/gpu/drm/xe/regs/xe_reg_defs.h index 6e20fc2de9ff..c50e7650c09a 100644 --- a/drivers/gpu/drm/xe/regs/xe_reg_defs.h +++ b/drivers/gpu/drm/xe/regs/xe_reg_defs.h @@ -60,7 +60,16 @@ struct xe_reg_mcr { /** * XE_REG_OPTION_MASKED - Register is "masked", with upper 16 bits marking the - * read/written bits on the lower 16 bits. + * written bits on the lower 16 bits. + * + * It only applies to registers explicitly marked in bspec with + * "Access: Masked". Registers with this option can have write operations to + * specific lower bits by setting the corresponding upper bits. Other bits will + * not be affected. This allows register writes without needing a RMW cycle and + * without caching in software the register value. + * + * Example: a write with value 0x00010001 will set bit 0 and all other bits + * retain their previous values. * * To be used with XE_REG(). XE_REG_MCR() and XE_REG_INITIALIZER() */ -- cgit v1.2.3 From 1da0e581983c6f212499d44573b23ae48c1a4d00 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 5 Dec 2023 05:39:51 -0800 Subject: drm/xe/kunit: Remove handling of XE_TEST_SUBPLATFORM_ANY The only user passing XE_TEST_SUBPLATFORM_ANY is xe_pci_fake_device_init_any(), but then the function would return earlier when handling XE_TEST_PLATFORM_ANY. Platforms without a subplatform use XE_SUBPLATFORM_NONE. Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20231129232807.1499826-3-lucas.demarchi@intel.com Link: https://lore.kernel.org/r/20231129232807.1499826-6-lucas.demarchi@intel.com Link: https://lore.kernel.org/r/20231205133954.2089546-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_pci.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c index d850dca85af1..c1aa785cac18 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci.c +++ b/drivers/gpu/drm/xe/tests/xe_pci.c @@ -128,11 +128,6 @@ int xe_pci_fake_device_init(struct xe_device *xe, enum xe_platform platform, if (!ent->device) return -ENODEV; - if (subplatform == XE_TEST_SUBPLATFORM_ANY) { - subplatform_desc = desc->subplatforms; - goto done; - } - for (subplatform_desc = desc->subplatforms; subplatform_desc && subplatform_desc->subplatform; subplatform_desc++) -- cgit v1.2.3 From 5b2a63b40d5620ce453f2a509334ae6feb7b884e Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 5 Dec 2023 05:39:52 -0800 Subject: drm/xe/kunit: Move fake pci data to test-priv Instead of passing as parameter to xe_pci_fake_device_init(), use test->priv to pass parameters down the call stack. The main advantage is that then the data is readily available on other functions by using kunit_get_current_test(). This is a preparation to fix the initialization of fake devices when they were supposed to be using GMD_ID. Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20231129232807.1499826-4-lucas.demarchi@intel.com Link: https://lore.kernel.org/r/20231205133954.2089546-2-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_pci.c | 14 ++++++++------ drivers/gpu/drm/xe/tests/xe_pci_test.h | 17 +++++------------ drivers/gpu/drm/xe/tests/xe_rtp_test.c | 4 +++- drivers/gpu/drm/xe/tests/xe_wa_test.c | 7 ++++++- 4 files changed, 22 insertions(+), 20 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c index c1aa785cac18..b93cb1e96108 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci.c +++ b/drivers/gpu/drm/xe/tests/xe_pci.c @@ -7,6 +7,7 @@ #include "tests/xe_test.h" +#include #include #include @@ -106,14 +107,15 @@ void xe_call_for_each_media_ip(xe_media_fn xe_fn) } EXPORT_SYMBOL_IF_KUNIT(xe_call_for_each_media_ip); -int xe_pci_fake_device_init(struct xe_device *xe, enum xe_platform platform, - enum xe_subplatform subplatform) +int xe_pci_fake_device_init(struct xe_device *xe) { + struct kunit *test = kunit_get_current_test(); + struct xe_pci_fake_data *data = test->priv; const struct pci_device_id *ent = pciidlist; const struct xe_device_desc *desc; const struct xe_subplatform_desc *subplatform_desc; - if (platform == XE_TEST_PLATFORM_ANY) { + if (!data) { desc = (const void *)ent->driver_data; subplatform_desc = NULL; goto done; @@ -121,7 +123,7 @@ int xe_pci_fake_device_init(struct xe_device *xe, enum xe_platform platform, for (ent = pciidlist; ent->device; ent++) { desc = (const void *)ent->driver_data; - if (desc->platform == platform) + if (desc->platform == data->platform) break; } @@ -131,10 +133,10 @@ int xe_pci_fake_device_init(struct xe_device *xe, enum xe_platform platform, for (subplatform_desc = desc->subplatforms; subplatform_desc && subplatform_desc->subplatform; subplatform_desc++) - if (subplatform_desc->subplatform == subplatform) + if (subplatform_desc->subplatform == data->subplatform) break; - if (subplatform != XE_SUBPLATFORM_NONE && !subplatform_desc) + if (data->subplatform != XE_SUBPLATFORM_NONE && !subplatform_desc) return -ENODEV; done: diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.h b/drivers/gpu/drm/xe/tests/xe_pci_test.h index cc0f1d141a4d..b4b3fb2df09c 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci_test.h +++ b/drivers/gpu/drm/xe/tests/xe_pci_test.h @@ -12,13 +12,6 @@ struct xe_device; struct xe_graphics_desc; struct xe_media_desc; -/* - * Some defines just for clarity: these mean the test doesn't care about what - * platform it will get since it doesn't depend on any platform-specific bits - */ -#define XE_TEST_PLATFORM_ANY XE_PLATFORM_UNINITIALIZED -#define XE_TEST_SUBPLATFORM_ANY XE_SUBPLATFORM_UNINITIALIZED - typedef int (*xe_device_fn)(struct xe_device *); typedef void (*xe_graphics_fn)(const struct xe_graphics_desc *); typedef void (*xe_media_fn)(const struct xe_media_desc *); @@ -27,11 +20,11 @@ int xe_call_for_each_device(xe_device_fn xe_fn); void xe_call_for_each_graphics_ip(xe_graphics_fn xe_fn); void xe_call_for_each_media_ip(xe_media_fn xe_fn); -int xe_pci_fake_device_init(struct xe_device *xe, enum xe_platform platform, - enum xe_subplatform subplatform); +struct xe_pci_fake_data { + enum xe_platform platform; + enum xe_subplatform subplatform; +}; -#define xe_pci_fake_device_init_any(xe__) \ - xe_pci_fake_device_init(xe__, XE_TEST_PLATFORM_ANY, \ - XE_TEST_SUBPLATFORM_ANY) +int xe_pci_fake_device_init(struct xe_device *xe); #endif diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c index a1d204133cc1..4a6972897675 100644 --- a/drivers/gpu/drm/xe/tests/xe_rtp_test.c +++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c @@ -281,7 +281,9 @@ static int xe_rtp_test_init(struct kunit *test) drm, DRIVER_GEM); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xe); - ret = xe_pci_fake_device_init_any(xe); + /* Initialize an empty device */ + test->priv = NULL; + ret = xe_pci_fake_device_init(xe); KUNIT_ASSERT_EQ(test, ret, 0); xe->drm.dev = dev; diff --git a/drivers/gpu/drm/xe/tests/xe_wa_test.c b/drivers/gpu/drm/xe/tests/xe_wa_test.c index 01ea974591ea..045afae43891 100644 --- a/drivers/gpu/drm/xe/tests/xe_wa_test.c +++ b/drivers/gpu/drm/xe/tests/xe_wa_test.c @@ -75,6 +75,10 @@ KUNIT_ARRAY_PARAM(platform, cases, platform_desc); static int xe_wa_test_init(struct kunit *test) { const struct platform_test_case *param = test->param_value; + struct xe_pci_fake_data data = { + .platform = param->platform, + .subplatform = param->subplatform, + }; struct xe_device *xe; struct device *dev; int ret; @@ -87,7 +91,8 @@ static int xe_wa_test_init(struct kunit *test) drm, DRIVER_GEM); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xe); - ret = xe_pci_fake_device_init(xe, param->platform, param->subplatform); + test->priv = &data; + ret = xe_pci_fake_device_init(xe); KUNIT_ASSERT_EQ(test, ret, 0); xe->info.step = param->step; -- cgit v1.2.3 From 6cad22853cb89da857ff636607dd0e9880172a43 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 5 Dec 2023 05:39:53 -0800 Subject: drm/xe/kunit: Add stub to read_gmdid Currently it's not possible to test the WAs for platforms using gmdid since they don't have the IP information on the descriptor struct. In order to allow that, add a stub function for read_gmdid() that is activated when the test executes, replacing the iomap and read of the real register. Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20231129232807.1499826-5-lucas.demarchi@intel.com Link: https://lore.kernel.org/r/20231205133954.2089546-3-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_pci.c | 18 ++++++++++++++++++ drivers/gpu/drm/xe/tests/xe_pci_test.h | 6 ++++++ drivers/gpu/drm/xe/xe_pci.c | 3 +++ drivers/gpu/drm/xe/xe_step.h | 2 ++ 4 files changed, 29 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c index b93cb1e96108..602793644f61 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci.c +++ b/drivers/gpu/drm/xe/tests/xe_pci.c @@ -9,6 +9,7 @@ #include #include +#include #include struct kunit_test_data { @@ -107,6 +108,21 @@ void xe_call_for_each_media_ip(xe_media_fn xe_fn) } EXPORT_SYMBOL_IF_KUNIT(xe_call_for_each_media_ip); +static void fake_read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, + u32 *ver, u32 *revid) +{ + struct kunit *test = kunit_get_current_test(); + struct xe_pci_fake_data *data = test->priv; + + if (type == GMDID_MEDIA) { + *ver = data->media_verx100; + *revid = xe_step_to_gmdid(data->media_step); + } else { + *ver = data->graphics_verx100; + *revid = xe_step_to_gmdid(data->graphics_step); + } +} + int xe_pci_fake_device_init(struct xe_device *xe) { struct kunit *test = kunit_get_current_test(); @@ -140,6 +156,8 @@ int xe_pci_fake_device_init(struct xe_device *xe) return -ENODEV; done: + kunit_activate_static_stub(test, read_gmdid, fake_read_gmdid); + xe_info_init_early(xe, desc, subplatform_desc); xe_info_init(xe, desc->graphics, desc->media); diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.h b/drivers/gpu/drm/xe/tests/xe_pci_test.h index b4b3fb2df09c..811ffe5bd9fd 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci_test.h +++ b/drivers/gpu/drm/xe/tests/xe_pci_test.h @@ -6,6 +6,8 @@ #ifndef _XE_PCI_TEST_H_ #define _XE_PCI_TEST_H_ +#include + #include "xe_platform_types.h" struct xe_device; @@ -23,6 +25,10 @@ void xe_call_for_each_media_ip(xe_media_fn xe_fn); struct xe_pci_fake_data { enum xe_platform platform; enum xe_subplatform subplatform; + u32 graphics_verx100; + u32 media_verx100; + u32 graphics_step; + u32 media_step; }; int xe_pci_fake_device_init(struct xe_device *xe); diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index b85193d1dcc2..148890357313 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -5,6 +5,7 @@ #include "xe_pci.h" +#include #include #include #include @@ -456,6 +457,8 @@ static void read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver, struct xe_reg gmdid_reg = GMD_ID; u32 val; + KUNIT_STATIC_STUB_REDIRECT(read_gmdid, xe, type, ver, revid); + if (type == GMDID_MEDIA) gmdid_reg.addr += MEDIA_GT_GSI_OFFSET; diff --git a/drivers/gpu/drm/xe/xe_step.h b/drivers/gpu/drm/xe/xe_step.h index a384b640f2af..686cb59200c2 100644 --- a/drivers/gpu/drm/xe/xe_step.h +++ b/drivers/gpu/drm/xe/xe_step.h @@ -16,6 +16,8 @@ struct xe_step_info xe_step_pre_gmdid_get(struct xe_device *xe); struct xe_step_info xe_step_gmdid_get(struct xe_device *xe, u32 graphics_gmdid_revid, u32 media_gmdid_revid); +static inline u32 xe_step_to_gmdid(enum xe_step step) { return step - STEP_A0; } + const char *xe_step_name(enum xe_step step); #endif -- cgit v1.2.3 From 2a70bbe6170fafde76cf0135c5cbee4bd4bfa0ec Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 5 Dec 2023 05:39:54 -0800 Subject: drm/xe/kunit: Test WAs for MTL and LNL Now that the kunit infra has proper support for GMD_ID platforms, add a few variants of MTL and LNL. v2: Remove bogus check for setting both media and graphics version in test (Matt Roper) Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20231129232807.1499826-6-lucas.demarchi@intel.com Link: https://lore.kernel.org/r/20231205133954.2089546-4-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_wa_test.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_wa_test.c b/drivers/gpu/drm/xe/tests/xe_wa_test.c index 045afae43891..a53c22a19582 100644 --- a/drivers/gpu/drm/xe/tests/xe_wa_test.c +++ b/drivers/gpu/drm/xe/tests/xe_wa_test.c @@ -18,6 +18,8 @@ struct platform_test_case { const char *name; enum xe_platform platform; enum xe_subplatform subplatform; + u32 graphics_verx100; + u32 media_verx100; struct xe_step_info step; }; @@ -38,6 +40,18 @@ struct platform_test_case { .step = { .graphics = STEP_ ## graphics_step__ } \ } +#define GMDID_CASE(platform__, graphics_verx100__, graphics_step__, \ + media_verx100__, media_step__) \ + { \ + .name = #platform__ " (g:" #graphics_step__ ", m:" #media_step__ ")",\ + .platform = XE_ ## platform__, \ + .subplatform = XE_SUBPLATFORM_NONE, \ + .graphics_verx100 = graphics_verx100__, \ + .media_verx100 = media_verx100__, \ + .step = { .graphics = STEP_ ## graphics_step__, \ + .media = STEP_ ## media_step__ } \ + } + static const struct platform_test_case cases[] = { PLATFORM_CASE(TIGERLAKE, B0), PLATFORM_CASE(DG1, A0), @@ -63,6 +77,10 @@ static const struct platform_test_case cases[] = { PLATFORM_CASE(PVC, B0), PLATFORM_CASE(PVC, B1), PLATFORM_CASE(PVC, C0), + GMDID_CASE(METEORLAKE, 1270, A0, 1300, A0), + GMDID_CASE(METEORLAKE, 1271, A0, 1300, A0), + GMDID_CASE(LUNARLAKE, 2004, A0, 2000, A0), + GMDID_CASE(LUNARLAKE, 2004, B0, 2000, A0), }; static void platform_desc(const struct platform_test_case *t, char *desc) @@ -78,6 +96,10 @@ static int xe_wa_test_init(struct kunit *test) struct xe_pci_fake_data data = { .platform = param->platform, .subplatform = param->subplatform, + .graphics_verx100 = param->graphics_verx100, + .media_verx100 = param->media_verx100, + .graphics_step = param->step.graphics, + .media_step = param->step.media, }; struct xe_device *xe; struct device *dev; @@ -95,7 +117,8 @@ static int xe_wa_test_init(struct kunit *test) ret = xe_pci_fake_device_init(xe); KUNIT_ASSERT_EQ(test, ret, 0); - xe->info.step = param->step; + if (!param->graphics_verx100) + xe->info.step = param->step; /* TODO: init hw engines for engine/LRC WAs */ xe->drm.dev = dev; -- cgit v1.2.3 From 7ce5716e13cfb37a86c02fe158403c002eb1b504 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Tue, 28 Nov 2023 17:17:18 -0800 Subject: drm/xe/huc: Prepare for 2-step HuC authentication Starting on MTL, the HuC is authenticated twice, once via GuC (same as with older integrated platforms) and once via GSC; the first authentication allows the HuC to be used for clear-media workloads, while the second one unlocks support for protected content. Ahead of adding the authentication flow via GSC, this patch adds support for differentiating the 2 auth steps and checking if they're complete. Signed-off-by: Daniele Ceraolo Spurio Cc: Alan Previn Cc: John Harrison Reviewed-by: Vivaik Balasubrawmanian Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gsc_regs.h | 2 ++ drivers/gpu/drm/xe/xe_huc.c | 47 +++++++++++++++++++++++++++-------- drivers/gpu/drm/xe/xe_huc.h | 8 +++++- drivers/gpu/drm/xe/xe_uc.c | 2 +- 4 files changed, 47 insertions(+), 12 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gsc_regs.h b/drivers/gpu/drm/xe/regs/xe_gsc_regs.h index 9a84b55d66ee..9886ec9cb08e 100644 --- a/drivers/gpu/drm/xe/regs/xe_gsc_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gsc_regs.h @@ -32,6 +32,8 @@ #define HECI1_FWSTS1_CURRENT_STATE_RESET 0 #define HECI1_FWSTS1_PROXY_STATE_NORMAL 5 #define HECI1_FWSTS1_INIT_COMPLETE REG_BIT(9) +#define HECI_FWSTS5(base) XE_REG((base) + 0xc68) +#define HECI1_FWSTS5_HUC_AUTH_DONE REG_BIT(19) #define HECI_H_GS1(base) XE_REG((base) + 0xc4c) #define HECI_H_GS1_ER_PREP REG_BIT(0) diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c index 2f176badab26..9845165a819c 100644 --- a/drivers/gpu/drm/xe/xe_huc.c +++ b/drivers/gpu/drm/xe/xe_huc.c @@ -5,6 +5,7 @@ #include "xe_huc.h" +#include "regs/xe_gsc_regs.h" #include "regs/xe_guc_regs.h" #include "xe_assert.h" #include "xe_bo.h" @@ -71,7 +72,25 @@ int xe_huc_upload(struct xe_huc *huc) return xe_uc_fw_upload(&huc->fw, 0, HUC_UKERNEL); } -int xe_huc_auth(struct xe_huc *huc) +static const struct { + const char *name; + struct xe_reg reg; + u32 val; +} huc_auth_modes[XE_HUC_AUTH_TYPES_COUNT] = { + [XE_HUC_AUTH_VIA_GUC] = { "GuC", + HUC_KERNEL_LOAD_INFO, + HUC_LOAD_SUCCESSFUL }, + [XE_HUC_AUTH_VIA_GSC] = { "GSC", + HECI_FWSTS5(MTL_GSC_HECI1_BASE), + HECI1_FWSTS5_HUC_AUTH_DONE }, +}; + +static bool huc_is_authenticated(struct xe_gt *gt, enum xe_huc_auth_types type) +{ + return xe_mmio_read32(gt, huc_auth_modes[type].reg) & huc_auth_modes[type].val; +} + +int xe_huc_auth(struct xe_huc *huc, enum xe_huc_auth_types type) { struct xe_device *xe = huc_to_xe(huc); struct xe_gt *gt = huc_to_gt(huc); @@ -84,7 +103,7 @@ int xe_huc_auth(struct xe_huc *huc) xe_assert(xe, !xe_uc_fw_is_running(&huc->fw)); /* On newer platforms the HuC survives reset, so no need to re-auth */ - if (xe_mmio_read32(gt, HUC_KERNEL_LOAD_INFO) & HUC_LOAD_SUCCESSFUL) { + if (huc_is_authenticated(gt, type)) { xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_RUNNING); return 0; } @@ -92,28 +111,36 @@ int xe_huc_auth(struct xe_huc *huc) if (!xe_uc_fw_is_loaded(&huc->fw)) return -ENOEXEC; - ret = xe_guc_auth_huc(guc, xe_bo_ggtt_addr(huc->fw.bo) + - xe_uc_fw_rsa_offset(&huc->fw)); + switch (type) { + case XE_HUC_AUTH_VIA_GUC: + ret = xe_guc_auth_huc(guc, xe_bo_ggtt_addr(huc->fw.bo) + + xe_uc_fw_rsa_offset(&huc->fw)); + break; + default: + XE_WARN_ON(type); + return -EINVAL; + } if (ret) { - drm_err(&xe->drm, "HuC: GuC did not ack Auth request %d\n", - ret); + drm_err(&xe->drm, "Failed to trigger HuC auth via %s: %d\n", + huc_auth_modes[type].name, ret); goto fail; } - ret = xe_mmio_wait32(gt, HUC_KERNEL_LOAD_INFO, HUC_LOAD_SUCCESSFUL, - HUC_LOAD_SUCCESSFUL, 100000, NULL, false); + ret = xe_mmio_wait32(gt, huc_auth_modes[type].reg, huc_auth_modes[type].val, + huc_auth_modes[type].val, 100000, NULL, false); if (ret) { drm_err(&xe->drm, "HuC: Firmware not verified %d\n", ret); goto fail; } xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_RUNNING); - drm_dbg(&xe->drm, "HuC authenticated\n"); + drm_dbg(&xe->drm, "HuC authenticated via %s\n", huc_auth_modes[type].name); return 0; fail: - drm_err(&xe->drm, "HuC authentication failed %d\n", ret); + drm_err(&xe->drm, "HuC: Auth via %s failed: %d\n", + huc_auth_modes[type].name, ret); xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_LOAD_FAIL); return ret; diff --git a/drivers/gpu/drm/xe/xe_huc.h b/drivers/gpu/drm/xe/xe_huc.h index 5802c43b6ce2..b8c387f14b8e 100644 --- a/drivers/gpu/drm/xe/xe_huc.h +++ b/drivers/gpu/drm/xe/xe_huc.h @@ -10,9 +10,15 @@ struct drm_printer; +enum xe_huc_auth_types { + XE_HUC_AUTH_VIA_GUC = 0, + XE_HUC_AUTH_VIA_GSC, + XE_HUC_AUTH_TYPES_COUNT +}; + int xe_huc_init(struct xe_huc *huc); int xe_huc_upload(struct xe_huc *huc); -int xe_huc_auth(struct xe_huc *huc); +int xe_huc_auth(struct xe_huc *huc, enum xe_huc_auth_types type); void xe_huc_sanitize(struct xe_huc *huc); void xe_huc_print_info(struct xe_huc *huc, struct drm_printer *p); diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index 72a7b3c2577d..25e1ddfd2f86 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -176,7 +176,7 @@ int xe_uc_init_hw(struct xe_uc *uc) return ret; /* We don't fail the driver load if HuC fails to auth, but let's warn */ - ret = xe_huc_auth(&uc->huc); + ret = xe_huc_auth(&uc->huc, XE_HUC_AUTH_VIA_GUC); xe_gt_assert(uc_to_gt(uc), !ret); /* GSC load is async */ -- cgit v1.2.3 From d8b1571312b7f77aeae2b2a7a138bb8edaa4f725 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Tue, 28 Nov 2023 17:17:19 -0800 Subject: drm/xe/huc: HuC authentication via GSC HuC authentication via GSC is performed by submitting the appropriate PXP packet to the GSC FW. This packet can trigger a "pending" reply from the FW, so we need to handle that and resubmit. Note that the auth via GSC can only be performed if the HuC has already been authenticated by the GuC. Signed-off-by: Daniele Ceraolo Spurio Cc: Alan Previn Cc: John Harrison Cc: Vivaik Balasubrawmanian Reviewed-by: Vivaik Balasubrawmanian Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/abi/gsc_pxp_commands_abi.h | 59 +++++++++++ drivers/gpu/drm/xe/xe_gsc.c | 14 ++- drivers/gpu/drm/xe/xe_huc.c | 141 +++++++++++++++++++++++++- drivers/gpu/drm/xe/xe_huc.h | 1 + drivers/gpu/drm/xe/xe_huc_types.h | 5 + drivers/gpu/drm/xe/xe_uc_fw.c | 2 + drivers/gpu/drm/xe/xe_uc_fw_types.h | 3 + 7 files changed, 218 insertions(+), 7 deletions(-) create mode 100644 drivers/gpu/drm/xe/abi/gsc_pxp_commands_abi.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/abi/gsc_pxp_commands_abi.h b/drivers/gpu/drm/xe/abi/gsc_pxp_commands_abi.h new file mode 100644 index 000000000000..57520809e48d --- /dev/null +++ b/drivers/gpu/drm/xe/abi/gsc_pxp_commands_abi.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _ABI_GSC_PXP_COMMANDS_ABI_H +#define _ABI_GSC_PXP_COMMANDS_ABI_H + +#include + +/* Heci client ID for PXP commands */ +#define HECI_MEADDRESS_PXP 17 + +#define PXP_APIVER(x, y) (((x) & 0xFFFF) << 16 | ((y) & 0xFFFF)) + +/* + * there are a lot of status codes for PXP, but we only define the cross-API + * common ones that we actually can handle in the kernel driver. Other failure + * codes should be printed to error msg for debug. + */ +enum pxp_status { + PXP_STATUS_SUCCESS = 0x0, + PXP_STATUS_ERROR_API_VERSION = 0x1002, + PXP_STATUS_NOT_READY = 0x100e, + PXP_STATUS_PLATFCONFIG_KF1_NOVERIF = 0x101a, + PXP_STATUS_PLATFCONFIG_KF1_BAD = 0x101f, + PXP_STATUS_OP_NOT_PERMITTED = 0x4013 +}; + +/* Common PXP FW message header */ +struct pxp_cmd_header { + u32 api_version; + u32 command_id; + union { + u32 status; /* out */ + u32 stream_id; /* in */ +#define PXP_CMDHDR_EXTDATA_SESSION_VALID GENMASK(0, 0) +#define PXP_CMDHDR_EXTDATA_APP_TYPE GENMASK(1, 1) +#define PXP_CMDHDR_EXTDATA_SESSION_ID GENMASK(17, 2) + }; + /* Length of the message (excluding the header) */ + u32 buffer_len; +} __packed; + +#define PXP43_CMDID_NEW_HUC_AUTH 0x0000003F /* MTL+ */ + +/* PXP-Input-Packet: HUC Auth-only */ +struct pxp43_new_huc_auth_in { + struct pxp_cmd_header header; + u64 huc_base_address; + u32 huc_size; +} __packed; + +/* PXP-Output-Packet: HUC Load and Authentication or Auth-only */ +struct pxp43_huc_auth_out { + struct pxp_cmd_header header; +} __packed; + +#endif diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index d8ec04e3c006..a8a895cf4b44 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -16,6 +16,7 @@ #include "xe_gsc_submit.h" #include "xe_gt.h" #include "xe_gt_printk.h" +#include "xe_huc.h" #include "xe_map.h" #include "xe_mmio.h" #include "xe_sched_job.h" @@ -257,11 +258,18 @@ static void gsc_work(struct work_struct *work) xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC); ret = gsc_upload(gsc); - if (ret && ret != -EEXIST) + if (ret && ret != -EEXIST) { xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOAD_FAIL); - else - xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED); + goto out; + } + xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED); + + /* HuC auth failure is not fatal */ + if (xe_huc_is_authenticated(>->uc.huc, XE_HUC_AUTH_VIA_GUC)) + xe_huc_auth(>->uc.huc, XE_HUC_AUTH_VIA_GSC); + +out: xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC); xe_device_mem_access_put(xe); } diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c index 9845165a819c..eca109791c6a 100644 --- a/drivers/gpu/drm/xe/xe_huc.c +++ b/drivers/gpu/drm/xe/xe_huc.c @@ -5,14 +5,19 @@ #include "xe_huc.h" +#include + +#include "abi/gsc_pxp_commands_abi.h" #include "regs/xe_gsc_regs.h" #include "regs/xe_guc_regs.h" #include "xe_assert.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_force_wake.h" +#include "xe_gsc_submit.h" #include "xe_gt.h" #include "xe_guc.h" +#include "xe_map.h" #include "xe_mmio.h" #include "xe_uc_fw.h" @@ -34,6 +39,42 @@ huc_to_guc(struct xe_huc *huc) return &container_of(huc, struct xe_uc, huc)->guc; } +static void free_gsc_pkt(struct drm_device *drm, void *arg) +{ + struct xe_huc *huc = arg; + + xe_bo_unpin_map_no_vm(huc->gsc_pkt); + huc->gsc_pkt = NULL; +} + +#define PXP43_HUC_AUTH_INOUT_SIZE SZ_4K +static int huc_alloc_gsc_pkt(struct xe_huc *huc) +{ + struct xe_gt *gt = huc_to_gt(huc); + struct xe_device *xe = gt_to_xe(gt); + struct xe_bo *bo; + int err; + + /* we use a single object for both input and output */ + bo = xe_bo_create_pin_map(xe, gt_to_tile(gt), NULL, + PXP43_HUC_AUTH_INOUT_SIZE * 2, + ttm_bo_type_kernel, + XE_BO_CREATE_SYSTEM_BIT | + XE_BO_CREATE_GGTT_BIT); + if (IS_ERR(bo)) + return PTR_ERR(bo); + + huc->gsc_pkt = bo; + + err = drmm_add_action_or_reset(&xe->drm, free_gsc_pkt, huc); + if (err) { + free_gsc_pkt(&xe->drm, huc); + return err; + } + + return 0; +} + int xe_huc_init(struct xe_huc *huc) { struct xe_gt *gt = huc_to_gt(huc); @@ -56,6 +97,12 @@ int xe_huc_init(struct xe_huc *huc) if (!xe_uc_fw_is_enabled(&huc->fw)) return 0; + if (huc->fw.has_gsc_headers) { + ret = huc_alloc_gsc_pkt(huc); + if (ret) + goto out; + } + xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_LOADABLE); return 0; @@ -72,6 +119,89 @@ int xe_huc_upload(struct xe_huc *huc) return xe_uc_fw_upload(&huc->fw, 0, HUC_UKERNEL); } +#define huc_auth_msg_wr(xe_, map_, offset_, field_, val_) \ + xe_map_wr_field(xe_, map_, offset_, struct pxp43_new_huc_auth_in, field_, val_) +#define huc_auth_msg_rd(xe_, map_, offset_, field_) \ + xe_map_rd_field(xe_, map_, offset_, struct pxp43_huc_auth_out, field_) + +static u32 huc_emit_pxp_auth_msg(struct xe_device *xe, struct iosys_map *map, + u32 wr_offset, u32 huc_offset, u32 huc_size) +{ + xe_map_memset(xe, map, wr_offset, 0, sizeof(struct pxp43_new_huc_auth_in)); + + huc_auth_msg_wr(xe, map, wr_offset, header.api_version, PXP_APIVER(4, 3)); + huc_auth_msg_wr(xe, map, wr_offset, header.command_id, PXP43_CMDID_NEW_HUC_AUTH); + huc_auth_msg_wr(xe, map, wr_offset, header.status, 0); + huc_auth_msg_wr(xe, map, wr_offset, header.buffer_len, + sizeof(struct pxp43_new_huc_auth_in) - sizeof(struct pxp_cmd_header)); + huc_auth_msg_wr(xe, map, wr_offset, huc_base_address, huc_offset); + huc_auth_msg_wr(xe, map, wr_offset, huc_size, huc_size); + + return wr_offset + sizeof(struct pxp43_new_huc_auth_in); +} + +static int huc_auth_via_gsccs(struct xe_huc *huc) +{ + struct xe_gt *gt = huc_to_gt(huc); + struct xe_device *xe = gt_to_xe(gt); + struct xe_bo *pkt = huc->gsc_pkt; + u32 wr_offset; + u32 rd_offset; + u64 ggtt_offset; + u32 out_status; + int retry = 5; + int err = 0; + + if (!pkt) + return -ENODEV; + + ggtt_offset = xe_bo_ggtt_addr(pkt); + + wr_offset = xe_gsc_emit_header(xe, &pkt->vmap, 0, HECI_MEADDRESS_PXP, 0, + sizeof(struct pxp43_new_huc_auth_in)); + wr_offset = huc_emit_pxp_auth_msg(xe, &pkt->vmap, wr_offset, + xe_bo_ggtt_addr(huc->fw.bo), + huc->fw.bo->size); + do { + err = xe_gsc_pkt_submit_kernel(>->uc.gsc, ggtt_offset, wr_offset, + ggtt_offset + PXP43_HUC_AUTH_INOUT_SIZE, + PXP43_HUC_AUTH_INOUT_SIZE); + if (err) + break; + + if (xe_gsc_check_and_update_pending(xe, &pkt->vmap, 0, &pkt->vmap, + PXP43_HUC_AUTH_INOUT_SIZE)) { + err = -EBUSY; + msleep(50); + } + } while (--retry && err == -EBUSY); + + if (err) { + drm_err(&xe->drm, "failed to submit GSC request to auth: %d\n", err); + return err; + } + + err = xe_gsc_read_out_header(xe, &pkt->vmap, PXP43_HUC_AUTH_INOUT_SIZE, + sizeof(struct pxp43_huc_auth_out), &rd_offset); + if (err) { + drm_err(&xe->drm, "HuC: invalid GSC reply for auth (err=%d)\n", err); + return err; + } + + /* + * The GSC will return PXP_STATUS_OP_NOT_PERMITTED if the HuC is already + * authenticated. If the same error is ever returned with HuC not loaded + * we'll still catch it when we check the authentication bit later. + */ + out_status = huc_auth_msg_rd(xe, &pkt->vmap, rd_offset, header.status); + if (out_status != PXP_STATUS_SUCCESS && out_status != PXP_STATUS_OP_NOT_PERMITTED) { + drm_err(&xe->drm, "auth failed with GSC error = 0x%x\n", out_status); + return -EIO; + } + + return 0; +} + static const struct { const char *name; struct xe_reg reg; @@ -85,8 +215,10 @@ static const struct { HECI1_FWSTS5_HUC_AUTH_DONE }, }; -static bool huc_is_authenticated(struct xe_gt *gt, enum xe_huc_auth_types type) +bool xe_huc_is_authenticated(struct xe_huc *huc, enum xe_huc_auth_types type) { + struct xe_gt *gt = huc_to_gt(huc); + return xe_mmio_read32(gt, huc_auth_modes[type].reg) & huc_auth_modes[type].val; } @@ -100,10 +232,8 @@ int xe_huc_auth(struct xe_huc *huc, enum xe_huc_auth_types type) if (!xe_uc_fw_is_loadable(&huc->fw)) return 0; - xe_assert(xe, !xe_uc_fw_is_running(&huc->fw)); - /* On newer platforms the HuC survives reset, so no need to re-auth */ - if (huc_is_authenticated(gt, type)) { + if (xe_huc_is_authenticated(huc, type)) { xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_RUNNING); return 0; } @@ -116,6 +246,9 @@ int xe_huc_auth(struct xe_huc *huc, enum xe_huc_auth_types type) ret = xe_guc_auth_huc(guc, xe_bo_ggtt_addr(huc->fw.bo) + xe_uc_fw_rsa_offset(&huc->fw)); break; + case XE_HUC_AUTH_VIA_GSC: + ret = huc_auth_via_gsccs(huc); + break; default: XE_WARN_ON(type); return -EINVAL; diff --git a/drivers/gpu/drm/xe/xe_huc.h b/drivers/gpu/drm/xe/xe_huc.h index b8c387f14b8e..532017230287 100644 --- a/drivers/gpu/drm/xe/xe_huc.h +++ b/drivers/gpu/drm/xe/xe_huc.h @@ -19,6 +19,7 @@ enum xe_huc_auth_types { int xe_huc_init(struct xe_huc *huc); int xe_huc_upload(struct xe_huc *huc); int xe_huc_auth(struct xe_huc *huc, enum xe_huc_auth_types type); +bool xe_huc_is_authenticated(struct xe_huc *huc, enum xe_huc_auth_types type); void xe_huc_sanitize(struct xe_huc *huc); void xe_huc_print_info(struct xe_huc *huc, struct drm_printer *p); diff --git a/drivers/gpu/drm/xe/xe_huc_types.h b/drivers/gpu/drm/xe/xe_huc_types.h index cae6d19097df..cfbaa5e0dfca 100644 --- a/drivers/gpu/drm/xe/xe_huc_types.h +++ b/drivers/gpu/drm/xe/xe_huc_types.h @@ -8,12 +8,17 @@ #include "xe_uc_fw_types.h" +struct xe_bo; + /** * struct xe_huc - HuC */ struct xe_huc { /** @fw: Generic uC firmware management */ struct xe_uc_fw fw; + + /** @gsc_pkt: bo to store the packet for auth via GSC */ + struct xe_bo *gsc_pkt; }; #endif diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 9abae65c6b23..73d6938c921d 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -528,6 +528,8 @@ static int parse_cpd_header(struct xe_uc_fw *uc_fw, const void *data, size_t siz uc_fw->css_offset = offset; } + uc_fw->has_gsc_headers = true; + return 0; } diff --git a/drivers/gpu/drm/xe/xe_uc_fw_types.h b/drivers/gpu/drm/xe/xe_uc_fw_types.h index fc1de0cc9324..ee914a5d8523 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw_types.h +++ b/drivers/gpu/drm/xe/xe_uc_fw_types.h @@ -112,6 +112,9 @@ struct xe_uc_fw { /** @bo: XE BO for uC firmware */ struct xe_bo *bo; + /** @has_gsc_headers: whether the FW image starts with GSC headers */ + bool has_gsc_headers; + /* * The firmware build process will generate a version header file with * major and minor version defined. The versions are built into CSS -- cgit v1.2.3 From 6a1fd6787d59a1852e89a9e8863673ae4dc9a2ca Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Tue, 5 Dec 2023 10:51:59 +0530 Subject: drm/xe/xe2: Add workaround 14019988906 This workaround applies to Graphics 20.04 as engine workaround V2(MattR): - Reorder bit define - Apply WA for RCS only Reviewed-by: Matt Roper Signed-off-by: Tejas Upadhyay Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 1 + drivers/gpu/drm/xe/xe_wa.c | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index d318ec0efd7d..e8dc463a49f6 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -124,6 +124,7 @@ #define SCOREBOARD_STALL_FLUSH_CONTROL REG_BIT(5) #define XEHP_PSS_CHICKEN XE_REG_MCR(0x7044, XE_REG_OPTION_MASKED) +#define FLSH_IGNORES_PSD REG_BIT(10) #define FD_END_COLLECT REG_BIT(5) #define HIZ_CHICKEN XE_REG(0x7018, XE_REG_OPTION_MASKED) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 63bd4bb1af03..ce897f2d49be 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -719,6 +719,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = { ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(WM_CHICKEN3, HIZ_PLANE_COMPRESSION_DIS)) }, + { XE_RTP_NAME("14019988906"), + XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FLSH_IGNORES_PSD)) + }, {} }; -- cgit v1.2.3 From b279b53015079bda2a311b48892dff362ac8ebc3 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Wed, 6 Dec 2023 22:20:20 +0530 Subject: drm/xe/xe2: Add workaround 18032095049 and 16021639441 This workaround applies to graphics 20.04 on all engines. Workaround has three parts : 1. Pipe flush before MI_ATOMIC - This part isn't relevant to Xe (at least not right now) since we don't use MI_ATOMIC anywhere in the kernel mode driver. 2. Memory-based interrupt masking - Memory-based interrupt processing isn't supported on physical functions, only virtual functions, according to bspec 60352. So this is probably only relevant once SRIOV support lands in the driver. 3. Disabling CSB/timestamp updates to the ghwsp and pphwsp - Workaround is added by this change. The CSB reports to gHWSP and ppHWSP have been discussed as part of a different topic on some internal threads and we've confirmed that neither the KMD nor the GuC firmware use those for anything, so disabling them is always "safe" and should have no functional or performance impact on system operation. The same is true for the timestamp updates in the ppHWSP as well. Given that, it might make sense to just combine these two workarounds into a single record (and single patch) and apply it on all steppings. Disabling the reports for RCS on higher steppings doesn't have any kind of negative impact and will simplify the overall situation. V3(MattR): - Combine WA apply same WA for all engines, no performance impact V2(MattR): - Mention detail in commit message - Reorder bit define - Improve bit naming - Remove workaround part which isnt relevant Reviewed-by: Matt Roper Signed-off-by: Tejas Upadhyay Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_engine_regs.h | 5 +++++ drivers/gpu/drm/xe/xe_wa.c | 14 ++++++++++++++ 2 files changed, 19 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index 444ff9b83bb1..3942db268b01 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -47,6 +47,11 @@ #define RING_ESR(base) XE_REG((base) + 0xb8) #define RING_CMD_CCTL(base) XE_REG((base) + 0xc4, XE_REG_OPTION_MASKED) + +#define CSFE_CHICKEN1_REG(base) XE_REG((base) + 0xd4, XE_REG_OPTION_MASKED) +#define GHWSP_CSB_REPORT_DIS REG_BIT(15) +#define PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS REG_BIT(14) + /* * CMD_CCTL read/write fields take a MOCS value and _not_ a table index. * The lsb of each can be considered a separate enabling bit for encryption. diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index ce897f2d49be..23f1285135b8 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -596,6 +596,20 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(ROW_CHICKEN, EARLY_EOT_DIS)) }, + /* + * These two workarounds are the same, just applying to different + * engines. Although Wa_18032095049 (for the RCS) isn't required on + * all steppings, disabling these reports has no impact for our + * driver or the GuC, so we go ahead and treat it the same as + * Wa_16021639441 which does apply to all steppings. + */ + { XE_RTP_NAME("18032095049, 16021639441"), + XE_RTP_RULES(GRAPHICS_VERSION(2004)), + XE_RTP_ACTIONS(SET(CSFE_CHICKEN1_REG(0), + GHWSP_CSB_REPORT_DIS | + PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS, + XE_RTP_ACTION_FLAG(ENGINE_BASE))) + }, {} }; -- cgit v1.2.3 From 0d97ecce16bd26a1f90022cf0466ff15c4a0bd91 Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Mon, 9 Oct 2023 13:10:27 -0700 Subject: drm/xe: Enable Fixed CCS mode setting Disable dynamic HW load balancing of compute resource assignment to engines and instead enabled fixed mode of mapping compute resources to engines on all platforms with more than one compute engine. By default enable only one CCS engine with all compute slices assigned to it. This is the desired configuration for common workloads. PVC platform supports only the fixed CCS mode (workaround 16016805146). v2: Rebase, make it platform agnostic v3: Minor code refactoring Reviewed-by: Andi Shyti Signed-off-by: Niranjana Vishwanathapura Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/regs/xe_gt_regs.h | 14 +++++++ drivers/gpu/drm/xe/xe_gt.c | 10 +++++ drivers/gpu/drm/xe/xe_gt.h | 2 + drivers/gpu/drm/xe/xe_gt_ccs_mode.c | 78 ++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_gt_ccs_mode.h | 23 +++++++++++ drivers/gpu/drm/xe/xe_gt_types.h | 8 ++++ drivers/gpu/drm/xe/xe_guc_ads.c | 3 ++ drivers/gpu/drm/xe/xe_hw_engine.c | 20 +++++++++ 9 files changed, 159 insertions(+) create mode 100644 drivers/gpu/drm/xe/xe_gt_ccs_mode.c create mode 100644 drivers/gpu/drm/xe/xe_gt_ccs_mode.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index e6f98d807783..b9062e3ce6a9 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -70,6 +70,7 @@ xe-y += xe_bb.o \ xe_gsc.o \ xe_gsc_submit.o \ xe_gt.o \ + xe_gt_ccs_mode.o \ xe_gt_clock.o \ xe_gt_debugfs.o \ xe_gt_idle.o \ diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index e8dc463a49f6..9744ed0be3a5 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -402,8 +402,22 @@ #define COMP_CKN_IN REG_GENMASK(30, 29) #define RCU_MODE XE_REG(0x14800, XE_REG_OPTION_MASKED) +#define RCU_MODE_FIXED_SLICE_CCS_MODE REG_BIT(1) #define RCU_MODE_CCS_ENABLE REG_BIT(0) +/* + * Total of 4 cslices, where each cslice is in the form: + * [0-3] CCS ID + * [4-6] RSVD + * [7] Disabled + */ +#define CCS_MODE XE_REG(0x14804) +#define CCS_MODE_CSLICE_0_3_MASK REG_GENMASK(11, 0) /* 3 bits per cslice */ +#define CCS_MODE_CSLICE_MASK 0x7 /* CCS0-3 + rsvd */ +#define CCS_MODE_CSLICE_WIDTH ilog2(CCS_MODE_CSLICE_MASK + 1) +#define CCS_MODE_CSLICE(cslice, ccs) \ + ((ccs) << ((cslice) * CCS_MODE_CSLICE_WIDTH)) + #define FORCEWAKE_ACK_GT XE_REG(0x130044) #define FORCEWAKE_KERNEL BIT(0) #define FORCEWAKE_USER BIT(1) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index a9c71da985d3..93fccbb4f2ad 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -22,6 +22,7 @@ #include "xe_force_wake.h" #include "xe_ggtt.h" #include "xe_gsc.h" +#include "xe_gt_ccs_mode.h" #include "xe_gt_clock.h" #include "xe_gt_idle.h" #include "xe_gt_mcr.h" @@ -450,6 +451,12 @@ static int all_fw_domain_init(struct xe_gt *gt) if (err) goto err_force_wake; + /* Configure default CCS mode of 1 engine with all resources */ + if (xe_gt_ccs_mode_enabled(gt)) { + gt->ccs_mode = 1; + xe_gt_apply_ccs_mode(gt); + } + err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); XE_WARN_ON(err); xe_device_mem_access_put(gt_to_xe(gt)); @@ -560,6 +567,9 @@ static int do_gt_restart(struct xe_gt *gt) xe_reg_sr_apply_whitelist(hwe); } + /* Get CCS mode in sync between sw/hw */ + xe_gt_apply_ccs_mode(gt); + return 0; } diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h index caded203a8a0..a818cc9c8fd0 100644 --- a/drivers/gpu/drm/xe/xe_gt.h +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -17,6 +17,8 @@ for_each_if(((hwe__) = (gt__)->hw_engines + (id__)) && \ xe_hw_engine_is_valid((hwe__))) +#define CCS_MASK(gt) (((gt)->info.engine_mask & XE_HW_ENGINE_CCS_MASK) >> XE_HW_ENGINE_CCS0) + #ifdef CONFIG_FAULT_INJECTION extern struct fault_attr gt_reset_failure; static inline bool xe_fault_inject_gt_reset(void) diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c new file mode 100644 index 000000000000..541c44c70a84 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include "regs/xe_gt_regs.h" +#include "xe_assert.h" +#include "xe_gt.h" +#include "xe_gt_ccs_mode.h" +#include "xe_mmio.h" + +static void __xe_gt_apply_ccs_mode(struct xe_gt *gt, u32 num_engines) +{ + u32 mode = CCS_MODE_CSLICE_0_3_MASK; /* disable all by default */ + int num_slices = hweight32(CCS_MASK(gt)); + struct xe_device *xe = gt_to_xe(gt); + int width, cslice = 0; + u32 config = 0; + + xe_assert(xe, xe_gt_ccs_mode_enabled(gt)); + + xe_assert(xe, num_engines && num_engines <= num_slices); + xe_assert(xe, !(num_slices % num_engines)); + + /* + * Loop over all available slices and assign each a user engine. + * For example, if there are four compute slices available, the + * assignment of compute slices to compute engines would be, + * + * With 1 engine (ccs0): + * slice 0, 1, 2, 3: ccs0 + * + * With 2 engines (ccs0, ccs1): + * slice 0, 2: ccs0 + * slice 1, 3: ccs1 + * + * With 4 engines (ccs0, ccs1, ccs2, ccs3): + * slice 0: ccs0 + * slice 1: ccs1 + * slice 2: ccs2 + * slice 3: ccs3 + */ + for (width = num_slices / num_engines; width; width--) { + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + + for_each_hw_engine(hwe, gt, id) { + if (hwe->class != XE_ENGINE_CLASS_COMPUTE) + continue; + + if (hwe->logical_instance >= num_engines) + break; + + config |= BIT(hwe->instance) << XE_HW_ENGINE_CCS0; + + /* If a slice is fused off, leave disabled */ + while ((CCS_MASK(gt) & BIT(cslice)) == 0) + cslice++; + + mode &= ~CCS_MODE_CSLICE(cslice, CCS_MODE_CSLICE_MASK); + mode |= CCS_MODE_CSLICE(cslice, hwe->instance); + cslice++; + } + } + + xe_mmio_write32(gt, CCS_MODE, mode); + + xe_gt_info(gt, "CCS_MODE=%x config:%08x, num_engines:%d, num_slices:%d\n", + mode, config, num_engines, num_slices); +} + +void xe_gt_apply_ccs_mode(struct xe_gt *gt) +{ + if (!gt->ccs_mode) + return; + + __xe_gt_apply_ccs_mode(gt, gt->ccs_mode); +} diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.h b/drivers/gpu/drm/xe/xe_gt_ccs_mode.h new file mode 100644 index 000000000000..e8766879f6ec --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GT_CCS_MODE_H_ +#define _XE_GT_CCS_MODE_H_ + +#include "xe_device_types.h" +#include "xe_gt.h" +#include "xe_gt_types.h" +#include "xe_platform_types.h" + +void xe_gt_apply_ccs_mode(struct xe_gt *gt); + +static inline bool xe_gt_ccs_mode_enabled(const struct xe_gt *gt) +{ + /* Check if there are more than one compute engines available */ + return hweight32(CCS_MASK(gt)) > 1; +} + +#endif + diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index a7263738308e..4e48c4643163 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -185,6 +185,14 @@ struct xe_gt { spinlock_t lock; } tlb_invalidation; + /** + * @ccs_mode: Number of compute engines enabled. + * Allows fixed mapping of available compute slices to compute engines. + * By default only the first available compute engine is enabled and all + * available compute slices are allocated to it. + */ + u32 ccs_mode; + /** @usm: unified shared memory state */ struct { /** diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 2f5ff090aa6b..ab115588f88b 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -12,6 +12,7 @@ #include "regs/xe_guc_regs.h" #include "xe_bo.h" #include "xe_gt.h" +#include "xe_gt_ccs_mode.h" #include "xe_guc.h" #include "xe_hw_engine.h" #include "xe_lrc.h" @@ -440,6 +441,8 @@ static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads, { .reg = RING_HWS_PGA(hwe->mmio_base), }, { .reg = RING_IMR(hwe->mmio_base), }, { .reg = RCU_MODE, .skip = hwe != hwe_rcs_reset_domain }, + { .reg = CCS_MODE, + .skip = hwe != hwe_rcs_reset_domain || !xe_gt_ccs_mode_enabled(hwe->gt) }, }; u32 i; diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 108ecbfe593e..c56e7cec350e 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -16,6 +16,7 @@ #include "xe_execlist.h" #include "xe_force_wake.h" #include "xe_gt.h" +#include "xe_gt_ccs_mode.h" #include "xe_gt_topology.h" #include "xe_hw_fence.h" #include "xe_irq.h" @@ -282,6 +283,13 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe) hw_engine_mmio_read32(hwe, RING_MI_MODE(0)); } +static bool xe_hw_engine_match_fixed_cslice_mode(const struct xe_gt *gt, + const struct xe_hw_engine *hwe) +{ + return xe_gt_ccs_mode_enabled(gt) && + xe_rtp_match_first_render_or_compute(gt, hwe); +} + void xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe) { @@ -306,6 +314,12 @@ xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe) blit_cctl_val, XE_RTP_ACTION_FLAG(ENGINE_BASE))) }, + /* Use Fixed slice CCS mode */ + { XE_RTP_NAME("RCU_MODE_FIXED_SLICE_CCS_MODE"), + XE_RTP_RULES(FUNC(xe_hw_engine_match_fixed_cslice_mode)), + XE_RTP_ACTIONS(FIELD_SET(RCU_MODE, RCU_MODE_FIXED_SLICE_CCS_MODE, + RCU_MODE_FIXED_SLICE_CCS_MODE)) + }, {} }; @@ -859,6 +873,12 @@ bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe) if (hwe->class == XE_ENGINE_CLASS_OTHER) return true; + /* Check for engines disabled by ccs_mode setting */ + if (xe_gt_ccs_mode_enabled(gt) && + hwe->class == XE_ENGINE_CLASS_COMPUTE && + hwe->logical_instance >= gt->ccs_mode) + return true; + return xe->info.supports_usm && hwe->class == XE_ENGINE_CLASS_COPY && hwe->instance == gt->usm.reserved_bcs_instance; } -- cgit v1.2.3 From f3bc5bb4d53d2091f03cf43f19e7c9b41db90367 Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Wed, 15 Nov 2023 21:59:04 +0000 Subject: drm/xe: Allow userspace to configure CCS mode Allow user to configure the CCS mode setting through a 'ccs_mode' sysfs interface. Also report the current CCS mode configuration and number of compute slices available through this interface. v2: Rebase, make it platform agnostic v3: Separte out num_cslices sysfs interface and make xe_gt_ccs_mode_sysfs_init() return void Reviewed-by: Andi Shyti Signed-off-by: Niranjana Vishwanathapura Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt.c | 3 ++ drivers/gpu/drm/xe/xe_gt_ccs_mode.c | 103 ++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_gt_ccs_mode.h | 1 + 3 files changed, 107 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 93fccbb4f2ad..154d6c7072b9 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -378,6 +378,9 @@ static int gt_fw_domain_init(struct xe_gt *gt) "failed to register engines sysfs directory, err: %d\n", err); + /* Initialize CCS mode sysfs after early initialization of HW engines */ + xe_gt_ccs_mode_sysfs_init(gt); + err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); XE_WARN_ON(err); xe_device_mem_access_put(gt_to_xe(gt)); diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c index 541c44c70a84..723cf77c830a 100644 --- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c +++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c @@ -3,10 +3,13 @@ * Copyright © 2023 Intel Corporation */ +#include + #include "regs/xe_gt_regs.h" #include "xe_assert.h" #include "xe_gt.h" #include "xe_gt_ccs_mode.h" +#include "xe_gt_sysfs.h" #include "xe_mmio.h" static void __xe_gt_apply_ccs_mode(struct xe_gt *gt, u32 num_engines) @@ -76,3 +79,103 @@ void xe_gt_apply_ccs_mode(struct xe_gt *gt) __xe_gt_apply_ccs_mode(gt, gt->ccs_mode); } + +static ssize_t +num_cslices_show(struct device *kdev, + struct device_attribute *attr, char *buf) +{ + struct xe_gt *gt = kobj_to_gt(&kdev->kobj); + + return sysfs_emit(buf, "%u\n", hweight32(CCS_MASK(gt))); +} + +static DEVICE_ATTR_RO(num_cslices); + +static ssize_t +ccs_mode_show(struct device *kdev, + struct device_attribute *attr, char *buf) +{ + struct xe_gt *gt = kobj_to_gt(&kdev->kobj); + + return sysfs_emit(buf, "%u\n", gt->ccs_mode); +} + +static ssize_t +ccs_mode_store(struct device *kdev, struct device_attribute *attr, + const char *buff, size_t count) +{ + struct xe_gt *gt = kobj_to_gt(&kdev->kobj); + u32 num_engines, num_slices; + int ret; + + ret = kstrtou32(buff, 0, &num_engines); + if (ret) + return ret; + + /* + * Ensure number of engines specified is valid and there is an + * exact multiple of engines for slices. + */ + num_slices = hweight32(CCS_MASK(gt)); + if (!num_engines || num_engines > num_slices || num_slices % num_engines) { + xe_gt_dbg(gt, "Invalid compute config, %d engines %d slices\n", + num_engines, num_slices); + return -EINVAL; + } + + if (gt->ccs_mode != num_engines) { + xe_gt_info(gt, "Setting compute mode to %d\n", num_engines); + gt->ccs_mode = num_engines; + xe_gt_reset_async(gt); + } + + return count; +} + +static DEVICE_ATTR_RW(ccs_mode); + +static const struct attribute *gt_ccs_mode_attrs[] = { + &dev_attr_ccs_mode.attr, + &dev_attr_num_cslices.attr, + NULL, +}; + +static void xe_gt_ccs_mode_sysfs_fini(struct drm_device *drm, void *arg) +{ + struct xe_gt *gt = arg; + + sysfs_remove_files(gt->sysfs, gt_ccs_mode_attrs); +} + +/** + * xe_gt_ccs_mode_sysfs_init - Initialize CCS mode sysfs interfaces + * @gt: GT structure + * + * Through a per-gt 'ccs_mode' sysfs interface, the user can enable a fixed + * number of compute hardware engines to which the available compute slices + * are to be allocated. This user configuration change triggers a gt reset + * and it is expected that there are no open drm clients while doing so. + * The number of available compute slices is exposed to user through a per-gt + * 'num_cslices' sysfs interface. + */ +void xe_gt_ccs_mode_sysfs_init(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + int err; + + if (!xe_gt_ccs_mode_enabled(gt)) + return; + + err = sysfs_create_files(gt->sysfs, gt_ccs_mode_attrs); + if (err) { + drm_warn(&xe->drm, "Sysfs creation for ccs_mode failed err: %d\n", err); + return; + } + + err = drmm_add_action_or_reset(&xe->drm, xe_gt_ccs_mode_sysfs_fini, gt); + if (err) { + sysfs_remove_files(gt->sysfs, gt_ccs_mode_attrs); + drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n", + __func__, err); + } +} diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.h b/drivers/gpu/drm/xe/xe_gt_ccs_mode.h index e8766879f6ec..f39975aaaab0 100644 --- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.h +++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.h @@ -12,6 +12,7 @@ #include "xe_platform_types.h" void xe_gt_apply_ccs_mode(struct xe_gt *gt); +void xe_gt_ccs_mode_sysfs_init(struct xe_gt *gt); static inline bool xe_gt_ccs_mode_enabled(const struct xe_gt *gt) { -- cgit v1.2.3 From 78e2701a2614720d8c47b3a8490bf61c29718e8a Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Wed, 1 Nov 2023 19:02:53 +0000 Subject: drm/xe: Avoid any races around ccs_mode update Ensure that there are no drm clients when changing CCS mode. Allow exec_queue creation only with enabled CCS engines. v2: Rebase Reviewed-by: Andi Shyti Signed-off-by: Niranjana Vishwanathapura Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 9 +++++++++ drivers/gpu/drm/xe/xe_device_types.h | 9 +++++++++ drivers/gpu/drm/xe/xe_gt_ccs_mode.c | 10 ++++++++++ 3 files changed, 28 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 8423c817111b..2e0b2e40d8f3 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -73,6 +73,10 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file) mutex_init(&xef->exec_queue.lock); xa_init_flags(&xef->exec_queue.xa, XA_FLAGS_ALLOC1); + spin_lock(&xe->clients.lock); + xe->clients.count++; + spin_unlock(&xe->clients.lock); + file->driver_priv = xef; return 0; } @@ -105,6 +109,10 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file) xa_destroy(&xef->vm.xa); mutex_destroy(&xef->vm.lock); + spin_lock(&xe->clients.lock); + xe->clients.count--; + spin_unlock(&xe->clients.lock); + xe_drm_client_put(xef->client); kfree(xef); } @@ -225,6 +233,7 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, xe->info.force_execlist = xe_modparam.force_execlist; spin_lock_init(&xe->irq.lock); + spin_lock_init(&xe->clients.lock); init_waitqueue_head(&xe->ufence_wq); diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 9a212dbdb8a4..58442da2f6c5 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -310,6 +310,15 @@ struct xe_device { enum xe_sriov_mode __mode; } sriov; + /** @clients: drm clients info */ + struct { + /** @lock: Protects drm clients info */ + spinlock_t lock; + + /** @count: number of drm clients */ + u64 count; + } clients; + /** @usm: unified memory state */ struct { /** @asid: convert a ASID to VM */ diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c index 723cf77c830a..529fc286cd06 100644 --- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c +++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c @@ -105,6 +105,7 @@ ccs_mode_store(struct device *kdev, struct device_attribute *attr, const char *buff, size_t count) { struct xe_gt *gt = kobj_to_gt(&kdev->kobj); + struct xe_device *xe = gt_to_xe(gt); u32 num_engines, num_slices; int ret; @@ -123,12 +124,21 @@ ccs_mode_store(struct device *kdev, struct device_attribute *attr, return -EINVAL; } + /* CCS mode can only be updated when there are no drm clients */ + spin_lock(&xe->clients.lock); + if (xe->clients.count) { + spin_unlock(&xe->clients.lock); + return -EBUSY; + } + if (gt->ccs_mode != num_engines) { xe_gt_info(gt, "Setting compute mode to %d\n", num_engines); gt->ccs_mode = num_engines; xe_gt_reset_async(gt); } + spin_unlock(&xe->clients.lock); + return count; } -- cgit v1.2.3 From ed750833f165869abf5effed5e02418d754647b0 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 28 Nov 2023 16:15:03 +0100 Subject: drm/xe: Define DRM_XE_DEBUG_SRIOV config We will be using extra logs during enabling of the SR-IOV features or when adding support for new platforms. Define separate config flag to keep that low level logs disabled if we're not debugging. Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20231128151507.1015-2-michal.wajdeczko@intel.com Signed-off-by: Michal Wajdeczko Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Kconfig.debug | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Kconfig.debug b/drivers/gpu/drm/xe/Kconfig.debug index 11bb13c73e7b..549065f57a78 100644 --- a/drivers/gpu/drm/xe/Kconfig.debug +++ b/drivers/gpu/drm/xe/Kconfig.debug @@ -40,6 +40,16 @@ config DRM_XE_DEBUG_VM If in doubt, say "N". +config DRM_XE_DEBUG_SRIOV + bool "Enable extra SR-IOV debugging" + default n + help + Enable extra SR-IOV debugging info. + + Recommended for driver developers only. + + If in doubt, say "N". + config DRM_XE_DEBUG_MEM bool "Enable passing SYS/VRAM addresses to user space" default n -- cgit v1.2.3 From 5bcedc9eabdc6ecd7a11f1e6147f0f601d7cdc77 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 28 Nov 2023 16:15:04 +0100 Subject: drm/xe: Introduce SR-IOV logging macros To simplify logging and help identify SR-IOV specific messages define set of helper macros that will prefix messages based on the current SR-IOV mode. Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20231128151507.1015-3-michal.wajdeczko@intel.com Signed-off-by: Michal Wajdeczko Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_sriov_printk.h | 46 ++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 drivers/gpu/drm/xe/xe_sriov_printk.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_sriov_printk.h b/drivers/gpu/drm/xe/xe_sriov_printk.h new file mode 100644 index 000000000000..117e1d541692 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_printk.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_SRIOV_PRINTK_H_ +#define _XE_SRIOV_PRINTK_H_ + +#include + +#include "xe_device_types.h" +#include "xe_sriov_types.h" + +#define xe_sriov_printk_prefix(xe) \ + ((xe)->sriov.__mode == XE_SRIOV_MODE_PF ? "PF: " : \ + (xe)->sriov.__mode == XE_SRIOV_MODE_VF ? "VF: " : "") + +#define xe_sriov_printk(xe, _level, fmt, ...) \ + drm_##_level(&(xe)->drm, "%s" fmt, xe_sriov_printk_prefix(xe), ##__VA_ARGS__) + +#define xe_sriov_err(xe, fmt, ...) \ + xe_sriov_printk((xe), err, fmt, ##__VA_ARGS__) + +#define xe_sriov_err_ratelimited(xe, fmt, ...) \ + xe_sriov_printk((xe), err_ratelimited, fmt, ##__VA_ARGS__) + +#define xe_sriov_warn(xe, fmt, ...) \ + xe_sriov_printk((xe), warn, fmt, ##__VA_ARGS__) + +#define xe_sriov_notice(xe, fmt, ...) \ + xe_sriov_printk((xe), notice, fmt, ##__VA_ARGS__) + +#define xe_sriov_info(xe, fmt, ...) \ + xe_sriov_printk((xe), info, fmt, ##__VA_ARGS__) + +#define xe_sriov_dbg(xe, fmt, ...) \ + xe_sriov_printk((xe), dbg, fmt, ##__VA_ARGS__) + +/* for low level noisy debug messages */ +#ifdef CONFIG_DRM_XE_DEBUG_SRIOV +#define xe_sriov_dbg_verbose(xe, fmt, ...) xe_sriov_dbg(xe, fmt, ##__VA_ARGS__) +#else +#define xe_sriov_dbg_verbose(xe, fmt, ...) typecheck(struct xe_device *, (xe)) +#endif + +#endif -- cgit v1.2.3 From b1d20405821812ad70d95eefe58cadc6d50b0917 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 28 Nov 2023 16:15:05 +0100 Subject: drm/xe/pf: Introduce Local Memory Translation Table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Local Memory Translation Table (LMTT) provides additional abstraction for Virtual Functions (VF) accessing device VRAM. This code is based on prior work of Michal Winiarski. In this patch we focus only on LMTT initialization. Remaining LMTT functions will be used once we add a VF provisioning to the PF. Bspec: 44117, 52404, 59314 Reviewed-by: Michał Winiarski Link: https://lore.kernel.org/r/20231128151507.1015-4-michal.wajdeczko@intel.com Signed-off-by: Michal Wajdeczko Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Makefile | 5 + drivers/gpu/drm/xe/regs/xe_sriov_regs.h | 17 ++ drivers/gpu/drm/xe/xe_device_types.h | 9 + drivers/gpu/drm/xe/xe_gt.c | 10 + drivers/gpu/drm/xe/xe_lmtt.c | 502 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_lmtt.h | 27 ++ drivers/gpu/drm/xe/xe_lmtt_2l.c | 150 ++++++++++ drivers/gpu/drm/xe/xe_lmtt_ml.c | 161 ++++++++++ drivers/gpu/drm/xe/xe_lmtt_types.h | 63 ++++ 9 files changed, 944 insertions(+) create mode 100644 drivers/gpu/drm/xe/regs/xe_sriov_regs.h create mode 100644 drivers/gpu/drm/xe/xe_lmtt.c create mode 100644 drivers/gpu/drm/xe/xe_lmtt.h create mode 100644 drivers/gpu/drm/xe/xe_lmtt_2l.c create mode 100644 drivers/gpu/drm/xe/xe_lmtt_ml.c create mode 100644 drivers/gpu/drm/xe/xe_lmtt_types.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index b9062e3ce6a9..537828655da9 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -137,6 +137,11 @@ xe-$(CONFIG_HWMON) += xe_hwmon.o # graphics virtualization (SR-IOV) support xe-y += xe_sriov.o +xe-$(CONFIG_PCI_IOV) += \ + xe_lmtt.o \ + xe_lmtt_2l.o \ + xe_lmtt_ml.o + # i915 Display compat #defines and #includes subdir-ccflags-$(CONFIG_DRM_XE_DISPLAY) += \ -I$(srctree)/$(src)/display/ext \ diff --git a/drivers/gpu/drm/xe/regs/xe_sriov_regs.h b/drivers/gpu/drm/xe/regs/xe_sriov_regs.h new file mode 100644 index 000000000000..58a4e0fad1e1 --- /dev/null +++ b/drivers/gpu/drm/xe/regs/xe_sriov_regs.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _REGS_XE_SRIOV_REGS_H_ +#define _REGS_XE_SRIOV_REGS_H_ + +#include "regs/xe_reg_defs.h" + +#define XE2_LMEM_CFG XE_REG(0x48b0) + +#define LMEM_CFG XE_REG(0xcf58) +#define LMEM_EN REG_BIT(31) +#define LMTT_DIR_PTR REG_GENMASK(30, 0) /* in multiples of 64KB */ + +#endif diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 58442da2f6c5..ffe7c6ef26a9 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -15,6 +15,7 @@ #include "xe_devcoredump_types.h" #include "xe_heci_gsc.h" #include "xe_gt_types.h" +#include "xe_lmtt_types.h" #include "xe_platform_types.h" #include "xe_pt_types.h" #include "xe_pmu.h" @@ -186,6 +187,14 @@ struct xe_tile { struct xe_sa_manager *kernel_bb_pool; } mem; + /** @sriov: tile level virtualization data */ + union { + struct { + /** @sriov.pf.lmtt: Local Memory Translation Table. */ + struct xe_lmtt lmtt; + } pf; + } sriov; + /** @migrate: Migration helper for vram blits and clearing */ struct xe_migrate *migrate; diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 154d6c7072b9..6645fa158f09 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -36,6 +36,7 @@ #include "xe_hw_fence.h" #include "xe_hw_engine_class_sysfs.h" #include "xe_irq.h" +#include "xe_lmtt.h" #include "xe_lrc.h" #include "xe_map.h" #include "xe_migrate.h" @@ -46,6 +47,7 @@ #include "xe_ring_ops.h" #include "xe_sa.h" #include "xe_sched_job.h" +#include "xe_sriov.h" #include "xe_tuning.h" #include "xe_uc.h" #include "xe_vm.h" @@ -344,6 +346,8 @@ static int gt_fw_domain_init(struct xe_gt *gt) err = xe_ggtt_init(gt_to_tile(gt)->mem.ggtt); if (err) goto err_force_wake; + if (IS_SRIOV_PF(gt_to_xe(gt))) + xe_lmtt_init(>_to_tile(gt)->sriov.pf.lmtt); } err = xe_uc_init(>->uc); @@ -460,6 +464,9 @@ static int all_fw_domain_init(struct xe_gt *gt) xe_gt_apply_ccs_mode(gt); } + if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt)) + xe_lmtt_init_hw(>_to_tile(gt)->sriov.pf.lmtt); + err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); XE_WARN_ON(err); xe_device_mem_access_put(gt_to_xe(gt)); @@ -560,6 +567,9 @@ static int do_gt_restart(struct xe_gt *gt) if (err) return err; + if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt)) + xe_lmtt_init_hw(>_to_tile(gt)->sriov.pf.lmtt); + xe_mocs_init(gt); err = xe_uc_start(>->uc); if (err) diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c new file mode 100644 index 000000000000..d5ada31ae633 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_lmtt.c @@ -0,0 +1,502 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include + +#include + +#include "regs/xe_sriov_regs.h" + +#include "xe_assert.h" +#include "xe_bo.h" +#include "xe_lmtt.h" +#include "xe_map.h" +#include "xe_mmio.h" +#include "xe_res_cursor.h" +#include "xe_sriov.h" +#include "xe_sriov_printk.h" + +/** + * DOC: Local Memory Translation Table + * + * The Local Memory Translation Table (LMTT) provides additional abstraction + * when Virtual Function (VF) is accessing device Local Memory (VRAM). + * + * The Root LMTT Page Directory contains one entry for each VF. Entries are + * indexed by the function number (1-based, index 0 is unused). + * + * See `Two-Level LMTT Structure`_ and `Multi-Level LMTT Structure`_. + */ + +#define lmtt_assert(lmtt, condition) xe_tile_assert(lmtt_to_tile(lmtt), condition) +#define lmtt_debug(lmtt, msg...) xe_sriov_dbg_verbose(lmtt_to_xe(lmtt), "LMTT: " msg) + +static bool xe_has_multi_level_lmtt(struct xe_device *xe) +{ + return xe->info.platform == XE_PVC; +} + +static struct xe_tile *lmtt_to_tile(struct xe_lmtt *lmtt) +{ + return container_of(lmtt, struct xe_tile, sriov.pf.lmtt); +} + +static struct xe_device *lmtt_to_xe(struct xe_lmtt *lmtt) +{ + return tile_to_xe(lmtt_to_tile(lmtt)); +} + +static u64 lmtt_page_size(struct xe_lmtt *lmtt) +{ + return BIT_ULL(lmtt->ops->lmtt_pte_shift(0)); +} + +static struct xe_lmtt_pt *lmtt_pt_alloc(struct xe_lmtt *lmtt, unsigned int level) +{ + unsigned int num_entries = level ? lmtt->ops->lmtt_pte_num(level) : 0; + struct xe_lmtt_pt *pt; + struct xe_bo *bo; + int err; + + pt = kzalloc(struct_size(pt, entries, num_entries), GFP_KERNEL); + if (!pt) { + err = -ENOMEM; + goto out; + } + + bo = xe_bo_create_pin_map(lmtt_to_xe(lmtt), lmtt_to_tile(lmtt), NULL, + PAGE_ALIGN(lmtt->ops->lmtt_pte_size(level) * + lmtt->ops->lmtt_pte_num(level)), + ttm_bo_type_kernel, + XE_BO_CREATE_VRAM_IF_DGFX(lmtt_to_tile(lmtt)) | + XE_BO_CREATE_PINNED_BIT); + if (IS_ERR(bo)) { + err = PTR_ERR(bo); + goto out_free_pt; + } + + lmtt_assert(lmtt, xe_bo_is_vram(bo)); + + pt->level = level; + pt->bo = bo; + return pt; + +out_free_pt: + kfree(pt); +out: + return ERR_PTR(err); +} + +static void lmtt_pt_free(struct xe_lmtt_pt *pt) +{ + xe_bo_unpin_map_no_vm(pt->bo); + kfree(pt); +} + +static int lmtt_init_pd(struct xe_lmtt *lmtt) +{ + struct xe_lmtt_pt *pd; + + lmtt_assert(lmtt, !lmtt->pd); + lmtt_assert(lmtt, lmtt->ops->lmtt_root_pd_level()); + + pd = lmtt_pt_alloc(lmtt, lmtt->ops->lmtt_root_pd_level()); + if (IS_ERR(pd)) + return PTR_ERR(pd); + + lmtt->pd = pd; + return 0; +} + +static void lmtt_fini_pd(struct xe_lmtt *lmtt) +{ + struct xe_lmtt_pt *pd = lmtt->pd; + unsigned int num_entries = lmtt->ops->lmtt_pte_num(pd->level); + unsigned int n = 0; + + /* make sure we don't leak */ + for (n = 0; n < num_entries; n++) + lmtt_assert(lmtt, !pd->entries[n]); + + lmtt->pd = NULL; + lmtt_pt_free(pd); +} + +static void fini_lmtt(struct drm_device *drm, void *arg) +{ + struct xe_lmtt *lmtt = arg; + + lmtt_assert(lmtt, !(!!lmtt->ops ^ !!lmtt->pd)); + + if (!lmtt->pd) + return; + + lmtt_fini_pd(lmtt); + lmtt->ops = NULL; +} + +/** + * xe_lmtt_init - LMTT software initialization. + * @lmtt: the &xe_lmtt to initialize + * + * The LMTT initialization requires two steps. + * + * The xe_lmtt_init() checks if LMTT is required on current device and selects + * and initialize proper variant of the LMTT Root Directory. Currently supported + * variants are `Two-Level LMTT Structure`_ and `Multi-Level LMTT Structure`_. + * + * In next step xe_lmtt_init_hw() will register this directory on the hardware. + * + * Notes: + * The LMTT allocations are managed and will be implicitly released on driver unload. + * This function shall be called only once and only when running as a PF driver. + * Any LMTT initialization failure should block VFs enabling. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_lmtt_init(struct xe_lmtt *lmtt) +{ + struct xe_device *xe = lmtt_to_xe(lmtt); + int err; + + lmtt_assert(lmtt, IS_SRIOV_PF(xe)); + lmtt_assert(lmtt, !lmtt->ops); + + if (!IS_DGFX(xe)) + return 0; + + if (xe_has_multi_level_lmtt(xe)) + lmtt->ops = &lmtt_ml_ops; + else + lmtt->ops = &lmtt_2l_ops; + + err = lmtt_init_pd(lmtt); + if (unlikely(err)) + goto fail; + + return drmm_add_action_or_reset(&xe->drm, fini_lmtt, lmtt); + +fail: + lmtt->ops = NULL; + return err; +} + +static void lmtt_setup_dir_ptr(struct xe_lmtt *lmtt) +{ + struct xe_tile *tile = lmtt_to_tile(lmtt); + struct xe_device *xe = tile_to_xe(tile); + dma_addr_t offset = xe_bo_main_addr(lmtt->pd->bo, XE_PAGE_SIZE); + + lmtt_debug(lmtt, "DIR offset %pad\n", &offset); + lmtt_assert(lmtt, xe_bo_is_vram(lmtt->pd->bo)); + lmtt_assert(lmtt, IS_ALIGNED(offset, SZ_64K)); + + xe_mmio_write32(tile->primary_gt, + GRAPHICS_VER(xe) >= 20 ? XE2_LMEM_CFG : LMEM_CFG, + LMEM_EN | REG_FIELD_PREP(LMTT_DIR_PTR, offset / SZ_64K)); +} + +/** + * xe_lmtt_init_hw - Perform LMTT hardware initialization. + * @lmtt: the &xe_lmtt to initialize + * + * This function is a second step of the LMTT initialization. + * This function registers LMTT Root Directory prepared in xe_lmtt_init(). + * + * This function shall be called after every hardware reset. + * This function shall be called only when running as a PF driver. + */ +void xe_lmtt_init_hw(struct xe_lmtt *lmtt) +{ + if (!lmtt->pd) + return; + + lmtt_setup_dir_ptr(lmtt); +} + +static void lmtt_write_pte(struct xe_lmtt *lmtt, struct xe_lmtt_pt *pt, + u64 pte, unsigned int idx) +{ + unsigned int level = pt->level; + + lmtt_assert(lmtt, idx <= lmtt->ops->lmtt_pte_num(level)); + lmtt_debug(lmtt, "WRITE level=%u index=%u pte=%#llx\n", level, idx, pte); + + switch (lmtt->ops->lmtt_pte_size(level)) { + case sizeof(u32): + xe_map_wr(lmtt_to_xe(lmtt), &pt->bo->vmap, idx * sizeof(u32), u32, pte); + break; + case sizeof(u64): + xe_map_wr(lmtt_to_xe(lmtt), &pt->bo->vmap, idx * sizeof(u64), u64, pte); + break; + default: + lmtt_assert(lmtt, !!!"invalid pte size"); + } +} + +static void lmtt_destroy_pt(struct xe_lmtt *lmtt, struct xe_lmtt_pt *pd) +{ + unsigned int num_entries = pd->level ? lmtt->ops->lmtt_pte_num(pd->level) : 0; + struct xe_lmtt_pt *pt; + unsigned int i; + + for (i = 0; i < num_entries; i++) { + pt = pd->entries[i]; + pd->entries[i] = NULL; + if (!pt) + continue; + + lmtt_destroy_pt(lmtt, pt); + } + + lmtt_pt_free(pd); +} + +static void lmtt_drop_pages(struct xe_lmtt *lmtt, unsigned int vfid) +{ + struct xe_lmtt_pt *pd = lmtt->pd; + struct xe_lmtt_pt *pt; + + pt = pd->entries[vfid]; + pd->entries[vfid] = NULL; + if (!pt) + return; + + lmtt_write_pte(lmtt, pd, LMTT_PTE_INVALID, vfid); + + lmtt_assert(lmtt, pd->level > 0); + lmtt_assert(lmtt, pt->level == pd->level - 1); + lmtt_destroy_pt(lmtt, pt); +} + +static int __lmtt_alloc_range(struct xe_lmtt *lmtt, struct xe_lmtt_pt *pd, + u64 start, u64 end) +{ + u64 pte_addr_shift = BIT_ULL(lmtt->ops->lmtt_pte_shift(pd->level)); + u64 offset; + int err; + + lmtt_assert(lmtt, pd->level > 0); + + offset = start; + while (offset < end) { + struct xe_lmtt_pt *pt; + u64 next, pde, pt_addr; + unsigned int idx; + + pt = lmtt_pt_alloc(lmtt, pd->level - 1); + if (IS_ERR(pt)) + return PTR_ERR(pt); + + pt_addr = xe_bo_main_addr(pt->bo, XE_PAGE_SIZE); + + idx = lmtt->ops->lmtt_pte_index(offset, pd->level); + pde = lmtt->ops->lmtt_pte_encode(pt_addr, pd->level); + + lmtt_write_pte(lmtt, pd, pde, idx); + + pd->entries[idx] = pt; + + next = min(end, round_up(offset + 1, pte_addr_shift)); + + if (pt->level != 0) { + err = __lmtt_alloc_range(lmtt, pt, offset, next); + if (err) + return err; + } + + offset = next; + } + + return 0; +} + +static int lmtt_alloc_range(struct xe_lmtt *lmtt, unsigned int vfid, u64 start, u64 end) +{ + struct xe_lmtt_pt *pd = lmtt->pd; + struct xe_lmtt_pt *pt; + u64 pt_addr; + u64 pde; + int err; + + lmtt_assert(lmtt, pd->level > 0); + lmtt_assert(lmtt, vfid <= lmtt->ops->lmtt_pte_num(pd->level)); + lmtt_assert(lmtt, IS_ALIGNED(start, lmtt_page_size(lmtt))); + lmtt_assert(lmtt, IS_ALIGNED(end, lmtt_page_size(lmtt))); + + if (pd->entries[vfid]) + return -ENOTEMPTY; + + pt = lmtt_pt_alloc(lmtt, pd->level - 1); + if (IS_ERR(pt)) + return PTR_ERR(pt); + + pt_addr = xe_bo_main_addr(pt->bo, XE_PAGE_SIZE); + + pde = lmtt->ops->lmtt_pte_encode(pt_addr, pd->level); + + lmtt_write_pte(lmtt, pd, pde, vfid); + + pd->entries[vfid] = pt; + + if (pt->level != 0) { + err = __lmtt_alloc_range(lmtt, pt, start, end); + if (err) + goto out_free_pt; + } + + return 0; + +out_free_pt: + lmtt_pt_free(pt); + return err; +} + +static struct xe_lmtt_pt *lmtt_leaf_pt(struct xe_lmtt *lmtt, unsigned int vfid, u64 addr) +{ + struct xe_lmtt_pt *pd = lmtt->pd; + struct xe_lmtt_pt *pt; + + lmtt_assert(lmtt, vfid <= lmtt->ops->lmtt_pte_num(pd->level)); + pt = pd->entries[vfid]; + + while (pt->level) { + lmtt_assert(lmtt, lmtt->ops->lmtt_pte_index(addr, pt->level) <= + lmtt->ops->lmtt_pte_num(pt->level)); + + pt = pt->entries[lmtt->ops->lmtt_pte_index(addr, pt->level)]; + + addr >>= lmtt->ops->lmtt_pte_shift(pt->level); + } + + lmtt_assert(lmtt, lmtt->ops->lmtt_pte_index(addr, pt->level) <= + lmtt->ops->lmtt_pte_num(pt->level)); + lmtt_assert(lmtt, pt->level != pd->level); + lmtt_assert(lmtt, pt->level == 0); + return pt; +} + +static void lmtt_insert_bo(struct xe_lmtt *lmtt, unsigned int vfid, struct xe_bo *bo, u64 start) +{ + u64 page_size = lmtt_page_size(lmtt); + struct xe_res_cursor cur; + struct xe_lmtt_pt *pt; + u64 addr, vram_offset; + + lmtt_assert(lmtt, IS_ALIGNED(start, page_size)); + lmtt_assert(lmtt, IS_ALIGNED(bo->size, page_size)); + lmtt_assert(lmtt, xe_bo_is_vram(bo)); + + vram_offset = vram_region_gpu_offset(bo->ttm.resource); + xe_res_first(bo->ttm.resource, 0, bo->size, &cur); + while (cur.remaining) { + addr = xe_res_dma(&cur); + addr += vram_offset; /* XXX */ + + pt = lmtt_leaf_pt(lmtt, vfid, start); + + lmtt_write_pte(lmtt, pt, lmtt->ops->lmtt_pte_encode(addr, 0), + lmtt->ops->lmtt_pte_index(start, 0)); + + xe_res_next(&cur, page_size); + start += page_size; + } +} + +/** + * xe_lmtt_prepare_pages - Create VF's LMTT Page Tables. + * @lmtt: the &xe_lmtt to update + * @vfid: the VF identifier (1-based) + * @range: top range of LMEM offset to be supported + * + * This function creates empty LMTT page tables for given VF to support + * up to maximum #range LMEM offset. The LMTT page tables created by this + * function must be released using xe_lmtt_drop_pages() function. + * + * Notes: + * This function shall be called only after successful LMTT initialization. + * See xe_lmtt_init(). + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_lmtt_prepare_pages(struct xe_lmtt *lmtt, unsigned int vfid, u64 range) +{ + lmtt_assert(lmtt, lmtt->pd); + lmtt_assert(lmtt, vfid); + + return lmtt_alloc_range(lmtt, vfid, 0, range); +} + +/** + * xe_lmtt_populate_pages - Update VF's LMTT Page Table Entries. + * @lmtt: the &xe_lmtt to update + * @vfid: the VF identifier (1-based) + * @bo: the buffer object with LMEM allocation to be mapped + * @offset: the offset at which #bo should be mapped + * + * This function updates VF's LMTT entries to use given buffer object as a backstore. + * + * Notes: + * This function shall be called only after successful preparation of the + * VF's LMTT Page Tables. See xe_lmtt_prepare(). + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_lmtt_populate_pages(struct xe_lmtt *lmtt, unsigned int vfid, struct xe_bo *bo, u64 offset) +{ + lmtt_assert(lmtt, lmtt->pd); + lmtt_assert(lmtt, vfid); + + lmtt_insert_bo(lmtt, vfid, bo, offset); + return 0; +} + +/** + * xe_lmtt_drop_pages - Remove VF's LMTT Pages. + * @lmtt: the &xe_lmtt to update + * @vfid: the VF identifier (1-based) + * + * This function removes all LMTT Page Tables prepared by xe_lmtt_prepare_pages(). + * + * This function shall be called only after successful LMTT initialization. + * See xe_lmtt_init(). + */ +void xe_lmtt_drop_pages(struct xe_lmtt *lmtt, unsigned int vfid) +{ + lmtt_assert(lmtt, lmtt->pd); + lmtt_assert(lmtt, vfid); + + lmtt_drop_pages(lmtt, vfid); +} + +/** + * xe_lmtt_estimate_pt_size - Estimate size of LMTT PT allocations. + * @lmtt: the &xe_lmtt + * @size: the size of the LMEM to be mapped over LMTT (including any offset) + * + * This function shall be called only by PF. + * + * Return: size of the PT allocation(s) needed to support given LMEM size. + */ +u64 xe_lmtt_estimate_pt_size(struct xe_lmtt *lmtt, u64 size) +{ + unsigned int level = 0; + u64 pt_size; + + lmtt_assert(lmtt, IS_SRIOV_PF(lmtt_to_xe(lmtt))); + lmtt_assert(lmtt, IS_DGFX(lmtt_to_xe(lmtt))); + lmtt_assert(lmtt, lmtt->ops); + + pt_size = PAGE_ALIGN(lmtt->ops->lmtt_pte_size(level) * + lmtt->ops->lmtt_pte_num(level)); + + while (++level < lmtt->ops->lmtt_root_pd_level()) { + pt_size *= lmtt->ops->lmtt_pte_index(size, level) + 1; + pt_size += PAGE_ALIGN(lmtt->ops->lmtt_pte_size(level) * + lmtt->ops->lmtt_pte_num(level)); + } + + return pt_size; +} diff --git a/drivers/gpu/drm/xe/xe_lmtt.h b/drivers/gpu/drm/xe/xe_lmtt.h new file mode 100644 index 000000000000..cb10ef994db6 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_lmtt.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_LMTT_H_ +#define _XE_LMTT_H_ + +#include + +struct xe_bo; +struct xe_lmtt; +struct xe_lmtt_ops; + +#ifdef CONFIG_PCI_IOV +int xe_lmtt_init(struct xe_lmtt *lmtt); +void xe_lmtt_init_hw(struct xe_lmtt *lmtt); +int xe_lmtt_prepare_pages(struct xe_lmtt *lmtt, unsigned int vfid, u64 range); +int xe_lmtt_populate_pages(struct xe_lmtt *lmtt, unsigned int vfid, struct xe_bo *bo, u64 offset); +void xe_lmtt_drop_pages(struct xe_lmtt *lmtt, unsigned int vfid); +u64 xe_lmtt_estimate_pt_size(struct xe_lmtt *lmtt, u64 size); +#else +static inline int xe_lmtt_init(struct xe_lmtt *lmtt) { return 0; } +static inline void xe_lmtt_init_hw(struct xe_lmtt *lmtt) { } +#endif + +#endif diff --git a/drivers/gpu/drm/xe/xe_lmtt_2l.c b/drivers/gpu/drm/xe/xe_lmtt_2l.c new file mode 100644 index 000000000000..84bc5c4212b5 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_lmtt_2l.c @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include +#include +#include +#include + +#include "xe_lmtt_types.h" +#include "xe_macros.h" + +/** + * DOC: Two-Level LMTT Structure + * + * LMHAW (Local Memory Host Address Width) is 37 bit (128GB) + * + * LMGAW (Local Memory Guest Address Width) is 37 bit (128GB) + * + * The following figure illustrates the structure and function of the 2L LMTT:: + * + * LMTT Directory + * (1 Entry per VF) + * +-----------+ LMTT (per VF) + * | | +-----------+ + * | | | | + * | | index: | | + * | | LMEM VF +===========+ + * | | offset --> | PTE | ==> LMEM PF offset + * | | +===========+ + * index: +===========+ | | + * VFID --> | PDE | -----------------> +-----------+ + * +===========+ / \. + * | | / \. + * | | / \. + * | | / \. + * +-----------+ <== [LMTT Directory Ptr] \. + * / \ / \. + * / \ +-----------+-----------------+------+---+ + * / \ | 31:HAW-16 | HAW-17:5 | 4:1 | 0 | + * / \ +===========+=================+======+===+ + * / \ | Reserved | LMEM Page (2MB) | Rsvd | V | + * / \ +-----------+-----------------+------+---+ + * / \. + * +-----------+-----------------+------+---+ + * | 31:HAW-12 | HAW-13:4 | 3:1 | 0 | + * +===========+=================+======+===+ + * | Reserved | LMTT Ptr (64KB) | Rsvd | V | + * +-----------+-----------------+------+---+ + * + */ + +typedef u32 lmtt_2l_pde_t; +typedef u32 lmtt_2l_pte_t; + +#if IS_ENABLED(CONFIG_DRM_XE_LMTT_2L_128GB) +#define LMTT_2L_HAW 37 /* 128 GiB */ +#else +#define LMTT_2L_HAW 35 /* 32 GiB */ +#endif + +#define LMTT_2L_PDE_MAX_NUM 64 /* SRIOV with PF and 63 VFs, index 0 (PF) is unused */ +#define LMTT_2L_PDE_LMTT_PTR GENMASK(LMTT_2L_HAW - 13, 4) +#define LMTT_2L_PDE_VALID BIT(0) + +#define LMTT_2L_PTE_MAX_NUM BIT(LMTT_2L_HAW - ilog2(SZ_2M)) +#define LMTT_2L_PTE_LMEM_PAGE GENMASK(LMTT_2L_HAW - 17, 5) +#define LMTT_2L_PTE_VALID BIT(0) + +static unsigned int lmtt_2l_root_pd_level(void) +{ + return 1; /* implementation is 0-based */ +} + +static unsigned int lmtt_2l_pte_num(unsigned int level) +{ + switch (level) { + case 1: + return LMTT_2L_PDE_MAX_NUM; + case 0: + BUILD_BUG_ON(LMTT_2L_HAW == 37 && LMTT_2L_PTE_MAX_NUM != SZ_64K); + BUILD_BUG_ON(LMTT_2L_HAW == 35 && LMTT_2L_PTE_MAX_NUM != SZ_16K); + return LMTT_2L_PTE_MAX_NUM; + default: + return 0; + } +} + +static unsigned int lmtt_2l_pte_size(unsigned int level) +{ + switch (level) { + case 1: + return sizeof(lmtt_2l_pde_t); + case 0: + return sizeof(lmtt_2l_pte_t); + default: + return 0; + } +} + +static unsigned int lmtt_2l_pte_shift(unsigned int level) +{ + switch (level) { + case 0: + return ilog2(SZ_2M); + default: + return 0; + } +} + +static unsigned int lmtt_2l_pte_index(u64 addr, unsigned int level) +{ + addr >>= lmtt_2l_pte_shift(level); + + switch (level) { + case 0: + /* SZ_2M increments */ + BUILD_BUG_ON_NOT_POWER_OF_2(LMTT_2L_PTE_MAX_NUM); + return addr & (LMTT_2L_PTE_MAX_NUM - 1); + default: + return 0; + } +} + +static u64 lmtt_2l_pte_encode(unsigned long offset, unsigned int level) +{ + switch (level) { + case 0: + XE_WARN_ON(!IS_ALIGNED(offset, SZ_2M)); + XE_WARN_ON(!FIELD_FIT(LMTT_2L_PTE_LMEM_PAGE, offset / SZ_2M)); + return FIELD_PREP(LMTT_2L_PTE_LMEM_PAGE, offset / SZ_2M) | LMTT_2L_PTE_VALID; + case 1: + XE_WARN_ON(!IS_ALIGNED(offset, SZ_64K)); + XE_WARN_ON(!FIELD_FIT(LMTT_2L_PDE_LMTT_PTR, offset / SZ_64K)); + return FIELD_PREP(LMTT_2L_PDE_LMTT_PTR, offset / SZ_64K) | LMTT_2L_PDE_VALID; + default: + XE_WARN_ON(true); + return 0; + } +} + +const struct xe_lmtt_ops lmtt_2l_ops = { + .lmtt_root_pd_level = lmtt_2l_root_pd_level, + .lmtt_pte_num = lmtt_2l_pte_num, + .lmtt_pte_size = lmtt_2l_pte_size, + .lmtt_pte_shift = lmtt_2l_pte_shift, + .lmtt_pte_index = lmtt_2l_pte_index, + .lmtt_pte_encode = lmtt_2l_pte_encode, +}; diff --git a/drivers/gpu/drm/xe/xe_lmtt_ml.c b/drivers/gpu/drm/xe/xe_lmtt_ml.c new file mode 100644 index 000000000000..b21215a2edd6 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_lmtt_ml.c @@ -0,0 +1,161 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include +#include +#include +#include + +#include "xe_lmtt_types.h" +#include "xe_macros.h" + +/** + * DOC: Multi-Level LMTT Structure + * + * LMHAW (Local Memory Host Address Width) is 48 bit (256TB) + * + * LMGAW (Local Memory Guest Address Width) is 48 bit (256TB) + * + * The following figure illustrates the structure and function of the ML LMTT:: + * + * LMTT L3 Directory + * (1 Entry per VF) LMTT L1 Leaf + * +-----------+ +-----------+ + * | | LMTT L2 (per VF) | | + * | | +-----------+ | | + * | | | | index: +===========+ + * | | | | GDPA --> | PTE | => LMEM PF offset + * | | | | 34:21 +===========+ + * | | index: | | | | + * | | LMEM VF +===========+ | | + * | | offset -> | PTE | ----------> +-----------+ + * | | GAW-1:35 +===========+ / \. + * index: +===========+ | | / \. + * VFID --> | PDE | ---------> +-----------+ / \. + * +===========+ / / / \. + * | | / / / \. + * +-----------+ <== [LMTT Directory Ptr] / \. + * / \ / / / \. + * / \ / / +-----------+-----------------+------+---+ + * / /\ / | 31:HAW-16 | HAW-17:5 | 4:1 | 0 | + * / / \ / +===========+=================+======+===+ + * / / \ / | Reserved | LMEM Page (2MB) | Rsvd | V | + * / / +-----------+-----------------+------+---+ + * / / + * +-----------+-----------------+------+---+ + * | 63:HAW-12 | HAW-13:4 | 3:1 | 0 | + * +===========+=================+======+===+ + * | Reserved | LMTT Ptr (64KB) | Rsvd | V | + * +-----------+-----------------+------+---+ + * + */ + +typedef u64 lmtt_ml_pde_t; +typedef u32 lmtt_ml_pte_t; + +#define LMTT_ML_HAW 48 /* 256 TiB */ + +#define LMTT_ML_PDE_MAX_NUM 64 /* SRIOV with PF and 63 VFs, index 0 (PF) is unused */ +#define LMTT_ML_PDE_LMTT_PTR GENMASK_ULL(LMTT_ML_HAW - 13, 4) +#define LMTT_ML_PDE_VALID BIT(0) + +#define LMTT_ML_PDE_L2_SHIFT 35 +#define LMTT_ML_PDE_L2_MAX_NUM BIT_ULL(LMTT_ML_HAW - 35) + +#define LMTT_ML_PTE_MAX_NUM BIT(35 - ilog2(SZ_2M)) +#define LMTT_ML_PTE_LMEM_PAGE GENMASK(LMTT_ML_HAW - 17, 5) +#define LMTT_ML_PTE_VALID BIT(0) + +static unsigned int lmtt_ml_root_pd_level(void) +{ + return 2; /* implementation is 0-based */ +} + +static unsigned int lmtt_ml_pte_num(unsigned int level) +{ + switch (level) { + case 2: + return LMTT_ML_PDE_MAX_NUM; + case 1: + BUILD_BUG_ON(LMTT_ML_HAW == 48 && LMTT_ML_PDE_L2_MAX_NUM != SZ_8K); + return LMTT_ML_PDE_L2_MAX_NUM; + case 0: + BUILD_BUG_ON(LMTT_ML_PTE_MAX_NUM != SZ_16K); + return LMTT_ML_PTE_MAX_NUM; + default: + return 0; + } +} + +static unsigned int lmtt_ml_pte_size(unsigned int level) +{ + switch (level) { + case 2: + case 1: + return sizeof(lmtt_ml_pde_t); + case 0: + return sizeof(lmtt_ml_pte_t); + default: + return 0; + } +} + +static unsigned int lmtt_ml_pte_shift(unsigned int level) +{ + switch (level) { + case 1: + BUILD_BUG_ON(BIT_ULL(LMTT_ML_PDE_L2_SHIFT) != SZ_32G); + return ilog2(SZ_32G); + case 0: + return ilog2(SZ_2M); + default: + return 0; + } +} + +static unsigned int lmtt_ml_pte_index(u64 addr, unsigned int level) +{ + addr >>= lmtt_ml_pte_shift(level); + + switch (level) { + case 1: + /* SZ_32G increments */ + BUILD_BUG_ON_NOT_POWER_OF_2(LMTT_ML_PDE_L2_MAX_NUM); + return addr & (LMTT_ML_PDE_L2_MAX_NUM - 1); + case 0: + /* SZ_2M increments */ + BUILD_BUG_ON_NOT_POWER_OF_2(LMTT_ML_PTE_MAX_NUM); + return addr & (LMTT_ML_PTE_MAX_NUM - 1); + default: + return 0; + } +} + +static u64 lmtt_ml_pte_encode(unsigned long offset, unsigned int level) +{ + switch (level) { + case 0: + XE_WARN_ON(!IS_ALIGNED(offset, SZ_2M)); + XE_WARN_ON(!FIELD_FIT(LMTT_ML_PTE_LMEM_PAGE, offset / SZ_2M)); + return FIELD_PREP(LMTT_ML_PTE_LMEM_PAGE, offset / SZ_2M) | LMTT_ML_PTE_VALID; + case 1: + case 2: + XE_WARN_ON(!IS_ALIGNED(offset, SZ_64K)); + XE_WARN_ON(!FIELD_FIT(LMTT_ML_PDE_LMTT_PTR, offset / SZ_64K)); + return FIELD_PREP(LMTT_ML_PDE_LMTT_PTR, offset / SZ_64K) | LMTT_ML_PDE_VALID; + default: + XE_WARN_ON(true); + return 0; + } +} + +const struct xe_lmtt_ops lmtt_ml_ops = { + .lmtt_root_pd_level = lmtt_ml_root_pd_level, + .lmtt_pte_num = lmtt_ml_pte_num, + .lmtt_pte_size = lmtt_ml_pte_size, + .lmtt_pte_shift = lmtt_ml_pte_shift, + .lmtt_pte_index = lmtt_ml_pte_index, + .lmtt_pte_encode = lmtt_ml_pte_encode, +}; diff --git a/drivers/gpu/drm/xe/xe_lmtt_types.h b/drivers/gpu/drm/xe/xe_lmtt_types.h new file mode 100644 index 000000000000..b37abad23416 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_lmtt_types.h @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_LMTT_TYPES_H_ +#define _XE_LMTT_TYPES_H_ + +#include + +struct xe_bo; +struct xe_lmtt; +struct xe_lmtt_pt; +struct xe_lmtt_ops; + +#define LMTT_PTE_INVALID ULL(0) + +/** + * struct xe_lmtt - Local Memory Translation Table Manager + */ +struct xe_lmtt { + /** @pd: root LMTT Directory */ + struct xe_lmtt_pt *pd; + + /** @ops: LMTT functions */ + const struct xe_lmtt_ops *ops; +}; + +/** + * struct xe_lmtt_pt - Local Memory Translation Table Page Table + * + * Represents single level of the LMTT. + */ +struct xe_lmtt_pt { + /** @level: page table level, 0 is leaf */ + unsigned int level; + + /** @bo: buffer object with actual LMTT PTE values */ + struct xe_bo *bo; + + /** @entries: leaf page tables, exist only for root/non-leaf */ + struct xe_lmtt_pt *entries[]; +}; + +/** + * struct xe_lmtt_ops - Local Memory Translation Table Operations + * + * Provides abstraction of the LMTT variants. + */ +struct xe_lmtt_ops { + /* private: */ + unsigned int (*lmtt_root_pd_level)(void); + unsigned int (*lmtt_pte_num)(unsigned int level); + unsigned int (*lmtt_pte_size)(unsigned int level); + unsigned int (*lmtt_pte_shift)(unsigned int level); + unsigned int (*lmtt_pte_index)(u64 addr, unsigned int level); + u64 (*lmtt_pte_encode)(unsigned long offset, unsigned int level); +}; + +extern const struct xe_lmtt_ops lmtt_2l_ops; +extern const struct xe_lmtt_ops lmtt_ml_ops; + +#endif -- cgit v1.2.3 From a43ac2de4c1c788a8731940470a7de77dd60ccea Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 28 Nov 2023 16:15:06 +0100 Subject: drm/xe/kunit: Enable CONFIG_PCI_IOV in .kunitconfig MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We will add kunit tests for the PF specific code that is by default enabled only under CONFIG_PCI_IOV. Update our .kunitconfig to allow running those test cases by our CI. Reviewed-by: Michał Winiarski Link: https://lore.kernel.org/r/20231128151507.1015-5-michal.wajdeczko@intel.com Signed-off-by: Michal Wajdeczko Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/.kunitconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/.kunitconfig b/drivers/gpu/drm/xe/.kunitconfig index 3769af94e391..9590eac91af3 100644 --- a/drivers/gpu/drm/xe/.kunitconfig +++ b/drivers/gpu/drm/xe/.kunitconfig @@ -1,6 +1,7 @@ # xe dependencies CONFIG_KUNIT=y CONFIG_PCI=y +CONFIG_PCI_IOV=y CONFIG_DEBUG_FS=y CONFIG_DRM=y CONFIG_DRM_FBDEV_EMULATION=y -- cgit v1.2.3 From f1a5d808b2a69304d0df06e23f4465a278b2cdd8 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 28 Nov 2023 16:15:07 +0100 Subject: drm/xe/kunit: Add test for LMTT operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The LMTT variants are abstracted with xe_lmtt_ops. Make sure that both 2L and ML ops implementations are correct. Reviewed-by: Michał Winiarski Link: https://lore.kernel.org/r/20231128151507.1015-6-michal.wajdeczko@intel.com Signed-off-by: Michal Wajdeczko Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_lmtt_test.c | 73 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_lmtt.c | 4 ++ 2 files changed, 77 insertions(+) create mode 100644 drivers/gpu/drm/xe/tests/xe_lmtt_test.c (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_lmtt_test.c b/drivers/gpu/drm/xe/tests/xe_lmtt_test.c new file mode 100644 index 000000000000..1f1557c45ae1 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_lmtt_test.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0 AND MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include + +static const struct lmtt_ops_param { + const char *desc; + const struct xe_lmtt_ops *ops; +} lmtt_ops_params[] = { + { "2-level", &lmtt_2l_ops, }, + { "multi-level", &lmtt_ml_ops, }, +}; + +static void lmtt_ops_param_get_desc(const struct lmtt_ops_param *p, char *desc) +{ + snprintf(desc, KUNIT_PARAM_DESC_SIZE, "%s", p->desc); +} + +KUNIT_ARRAY_PARAM(lmtt_ops, lmtt_ops_params, lmtt_ops_param_get_desc); + +static void test_ops(struct kunit *test) +{ + const struct lmtt_ops_param *p = test->param_value; + const struct xe_lmtt_ops *ops = p->ops; + unsigned int n; + + KUNIT_ASSERT_NOT_NULL(test, ops->lmtt_root_pd_level); + KUNIT_ASSERT_NOT_NULL(test, ops->lmtt_pte_num); + KUNIT_ASSERT_NOT_NULL(test, ops->lmtt_pte_size); + KUNIT_ASSERT_NOT_NULL(test, ops->lmtt_pte_shift); + KUNIT_ASSERT_NOT_NULL(test, ops->lmtt_pte_index); + KUNIT_ASSERT_NOT_NULL(test, ops->lmtt_pte_encode); + + KUNIT_EXPECT_NE(test, ops->lmtt_root_pd_level(), 0); + + for (n = 0; n <= ops->lmtt_root_pd_level(); n++) { + KUNIT_EXPECT_NE_MSG(test, ops->lmtt_pte_num(n), 0, + "level=%u", n); + KUNIT_EXPECT_NE_MSG(test, ops->lmtt_pte_size(n), 0, + "level=%u", n); + KUNIT_EXPECT_NE_MSG(test, ops->lmtt_pte_encode(0, n), LMTT_PTE_INVALID, + "level=%u", n); + } + + for (n = 0; n < ops->lmtt_root_pd_level(); n++) { + u64 addr = BIT_ULL(ops->lmtt_pte_shift(n)); + + KUNIT_EXPECT_NE_MSG(test, ops->lmtt_pte_shift(n), 0, + "level=%u", n); + KUNIT_EXPECT_EQ_MSG(test, ops->lmtt_pte_index(addr - 1, n), 0, + "addr=%#llx level=%u", addr, n); + KUNIT_EXPECT_EQ_MSG(test, ops->lmtt_pte_index(addr + 1, n), 1, + "addr=%#llx level=%u", addr, n); + KUNIT_EXPECT_EQ_MSG(test, ops->lmtt_pte_index(addr * 2 - 1, n), 1, + "addr=%#llx level=%u", addr, n); + KUNIT_EXPECT_EQ_MSG(test, ops->lmtt_pte_index(addr * 2, n), 2, + "addr=%#llx level=%u", addr, n); + } +} + +static struct kunit_case lmtt_test_cases[] = { + KUNIT_CASE_PARAM(test_ops, lmtt_ops_gen_params), + {} +}; + +static struct kunit_suite lmtt_suite = { + .name = "lmtt", + .test_cases = lmtt_test_cases, +}; + +kunit_test_suites(&lmtt_suite); diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c index d5ada31ae633..0d7c5514e092 100644 --- a/drivers/gpu/drm/xe/xe_lmtt.c +++ b/drivers/gpu/drm/xe/xe_lmtt.c @@ -500,3 +500,7 @@ u64 xe_lmtt_estimate_pt_size(struct xe_lmtt *lmtt, u64 size) return pt_size; } + +#if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST) +#include "tests/xe_lmtt_test.c" +#endif -- cgit v1.2.3 From 5a92da34ddb4ec75a037d4a956afa993876c67d4 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 5 Dec 2023 06:52:35 -0800 Subject: drm/xe: Rename info.supports_* to info.has_* Rename supports_mmio_ext and supports_usm to use a has_ prefix so the flags are grouped together. This settles on just one variant for positive info matching ("has_") and one for negative ("skip_"). Also make sure the has_* flags are grouped together in xe_pci.c. Reviewed-by: Koby Elbaz Reviewed-by: Gustavo Sousa Link: https://lore.kernel.org/r/20231205145235.2114761-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_migrate.c | 4 ++-- drivers/gpu/drm/xe/xe_debugfs.c | 2 +- drivers/gpu/drm/xe/xe_device_types.h | 8 ++++---- drivers/gpu/drm/xe/xe_exec_queue.c | 6 +++--- drivers/gpu/drm/xe/xe_gt.c | 2 +- drivers/gpu/drm/xe/xe_gt.h | 2 +- drivers/gpu/drm/xe/xe_gt_pagefault.c | 4 ++-- drivers/gpu/drm/xe/xe_guc_ads.c | 4 ++-- drivers/gpu/drm/xe/xe_hw_engine.c | 4 ++-- drivers/gpu/drm/xe/xe_lrc.c | 2 +- drivers/gpu/drm/xe/xe_migrate.c | 16 ++++++++-------- drivers/gpu/drm/xe/xe_mmio.c | 5 +++-- drivers/gpu/drm/xe/xe_pci.c | 20 ++++++++++---------- drivers/gpu/drm/xe/xe_pci_types.h | 2 +- drivers/gpu/drm/xe/xe_vm.c | 2 +- 15 files changed, 42 insertions(+), 41 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index 83d6a66ed369..47fcd6e6b777 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -37,7 +37,7 @@ static int run_sanity_job(struct xe_migrate *m, struct xe_device *xe, struct xe_bb *bb, u32 second_idx, const char *str, struct kunit *test) { - u64 batch_base = xe_migrate_batch_base(m, xe->info.supports_usm); + u64 batch_base = xe_migrate_batch_base(m, xe->info.has_usm); struct xe_sched_job *job = xe_bb_create_migration_job(m->q, bb, batch_base, second_idx); @@ -308,7 +308,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) goto free_pt; } - bb = xe_bb_new(tile->primary_gt, 32, xe->info.supports_usm); + bb = xe_bb_new(tile->primary_gt, 32, xe->info.has_usm); if (IS_ERR(bb)) { KUNIT_FAIL(test, "Failed to create batchbuffer: %li\n", PTR_ERR(bb)); diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c index 2de8a0b9da18..f1e80be8b930 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.c +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -53,8 +53,8 @@ static int info(struct seq_file *m, void *data) drm_printf(&p, "tile_count %d\n", xe->info.tile_count); drm_printf(&p, "vm_max_level %d\n", xe->info.vm_max_level); drm_printf(&p, "force_execlist %s\n", str_yes_no(xe->info.force_execlist)); - drm_printf(&p, "supports_usm %s\n", str_yes_no(xe->info.supports_usm)); drm_printf(&p, "has_flat_ccs %s\n", str_yes_no(xe->info.has_flat_ccs)); + drm_printf(&p, "has_usm %s\n", str_yes_no(xe->info.has_usm)); for_each_gt(gt, xe, id) { drm_printf(&p, "gt%d force wake %d\n", id, xe_force_wake_ref(gt_to_fw(gt), XE_FW_GT)); diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index ffe7c6ef26a9..d1a48456e9a3 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -251,8 +251,6 @@ struct xe_device { /** @is_dgfx: is discrete device */ u8 is_dgfx:1; - /** @supports_usm: Supports unified shared memory */ - u8 supports_usm:1; /** @has_asid: Has address space ID */ u8 has_asid:1; /** @force_execlist: Forced execlist submission */ @@ -261,18 +259,20 @@ struct xe_device { u8 has_flat_ccs:1; /** @has_llc: Device has a shared CPU+GPU last level cache */ u8 has_llc:1; + /** @has_mmio_ext: Device has extra MMIO address range */ + u8 has_mmio_ext:1; /** @has_range_tlb_invalidation: Has range based TLB invalidations */ u8 has_range_tlb_invalidation:1; /** @has_sriov: Supports SR-IOV */ u8 has_sriov:1; + /** @has_usm: Device has unified shared memory support */ + u8 has_usm:1; /** @enable_display: display enabled */ u8 enable_display:1; /** @skip_mtcfg: skip Multi-Tile configuration from MTCFG register */ u8 skip_mtcfg:1; /** @skip_pcode: skip access to PCODE uC */ u8 skip_pcode:1; - /** @supports_mmio_ext: supports MMIO extension/s */ - u8 supports_mmio_ext:1; /** @has_heci_gscfi: device has heci gscfi */ u8 has_heci_gscfi:1; /** @skip_guc_pc: Skip GuC based PM feature init */ diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 985807d6abbb..85bc25fe99ed 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -362,7 +362,7 @@ static int exec_queue_set_acc_trigger(struct xe_device *xe, struct xe_exec_queue if (XE_IOCTL_DBG(xe, !create)) return -EINVAL; - if (XE_IOCTL_DBG(xe, !xe->info.supports_usm)) + if (XE_IOCTL_DBG(xe, !xe->info.has_usm)) return -EINVAL; q->usm.acc_trigger = value; @@ -376,7 +376,7 @@ static int exec_queue_set_acc_notify(struct xe_device *xe, struct xe_exec_queue if (XE_IOCTL_DBG(xe, !create)) return -EINVAL; - if (XE_IOCTL_DBG(xe, !xe->info.supports_usm)) + if (XE_IOCTL_DBG(xe, !xe->info.has_usm)) return -EINVAL; q->usm.acc_notify = value; @@ -390,7 +390,7 @@ static int exec_queue_set_acc_granularity(struct xe_device *xe, struct xe_exec_q if (XE_IOCTL_DBG(xe, !create)) return -EINVAL; - if (XE_IOCTL_DBG(xe, !xe->info.supports_usm)) + if (XE_IOCTL_DBG(xe, !xe->info.has_usm)) return -EINVAL; if (value > DRM_XE_ACC_GRANULARITY_64M) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 6645fa158f09..fe3c7aac2369 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -435,7 +435,7 @@ static int all_fw_domain_init(struct xe_gt *gt) /* * USM has its only SA pool to non-block behind user operations */ - if (gt_to_xe(gt)->info.supports_usm) { + if (gt_to_xe(gt)->info.has_usm) { gt->usm.bb_pool = xe_sa_bo_manager_init(gt_to_tile(gt), SZ_1M, 16); if (IS_ERR(gt->usm.bb_pool)) { err = PTR_ERR(gt->usm.bb_pool); diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h index a818cc9c8fd0..f3c780bd266d 100644 --- a/drivers/gpu/drm/xe/xe_gt.h +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -65,7 +65,7 @@ static inline bool xe_gt_is_usm_hwe(struct xe_gt *gt, struct xe_hw_engine *hwe) { struct xe_device *xe = gt_to_xe(gt); - return xe->info.supports_usm && hwe->class == XE_ENGINE_CLASS_COPY && + return xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY && hwe->instance == gt->usm.reserved_bcs_instance; } diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index a5358064a4e0..4489aadc7a52 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -398,7 +398,7 @@ int xe_gt_pagefault_init(struct xe_gt *gt) struct xe_device *xe = gt_to_xe(gt); int i; - if (!xe->info.supports_usm) + if (!xe->info.has_usm) return 0; for (i = 0; i < NUM_PF_QUEUE; ++i) { @@ -431,7 +431,7 @@ void xe_gt_pagefault_reset(struct xe_gt *gt) struct xe_device *xe = gt_to_xe(gt); int i; - if (!xe->info.supports_usm) + if (!xe->info.has_usm) return; for (i = 0; i < NUM_PF_QUEUE; ++i) { diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index ab115588f88b..390e6f1bf4e1 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -141,7 +141,7 @@ static size_t guc_ads_um_queues_size(struct xe_guc_ads *ads) { struct xe_device *xe = ads_to_xe(ads); - if (!xe->info.supports_usm) + if (!xe->info.has_usm) return 0; return GUC_UM_QUEUE_SIZE * GUC_UM_HW_QUEUE_MAX; @@ -598,7 +598,7 @@ void xe_guc_ads_populate(struct xe_guc_ads *ads) guc_capture_list_init(ads); guc_doorbell_init(ads); - if (xe->info.supports_usm) { + if (xe->info.has_usm) { guc_um_init_params(ads); ads_blob_write(ads, ads.um_init_data, base + offsetof(struct __guc_ads_blob, um_init_params)); diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index c56e7cec350e..86b863b99065 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -464,7 +464,7 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe, xe_hw_engine_enable_ring(hwe); /* We reserve the highest BCS instance for USM */ - if (xe->info.supports_usm && hwe->class == XE_ENGINE_CLASS_COPY) + if (xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY) gt->usm.reserved_bcs_instance = hwe->instance; err = drmm_add_action_or_reset(&xe->drm, hw_engine_fini, hwe); @@ -879,6 +879,6 @@ bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe) hwe->logical_instance >= gt->ccs_mode) return true; - return xe->info.supports_usm && hwe->class == XE_ENGINE_CLASS_COPY && + return xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY && hwe->instance == gt->usm.reserved_bcs_instance; } diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 17c0eb9e62cf..d6dfbd0bdc70 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -759,7 +759,7 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, (q->usm.acc_granularity << ACC_GRANULARITY_S) | vm->usm.asid); - if (xe->info.supports_usm && vm) + if (xe->info.has_usm && vm) xe_lrc_write_ctx_reg(lrc, PVC_CTX_ACC_CTR_THOLD, (q->usm.acc_notify << ACC_NOTIFY_S) | q->usm.acc_trigger); diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 84e138df0172..2ca927f3fb2a 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -217,7 +217,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, if (!IS_DGFX(xe)) { /* Write out batch too */ m->batch_base_ofs = NUM_PT_SLOTS * XE_PAGE_SIZE; - if (xe->info.supports_usm) { + if (xe->info.has_usm) { batch = tile->primary_gt->usm.bb_pool->bo; m->usm_batch_base_ofs = m->batch_base_ofs; } @@ -237,7 +237,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, m->batch_base_ofs = xe_migrate_vram_ofs(xe, batch_addr); - if (xe->info.supports_usm) { + if (xe->info.has_usm) { batch = tile->primary_gt->usm.bb_pool->bo; batch_addr = xe_bo_addr(batch, 0, XE_PAGE_SIZE); m->usm_batch_base_ofs = xe_migrate_vram_ofs(xe, batch_addr); @@ -374,7 +374,7 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) return ERR_PTR(err); } - if (xe->info.supports_usm) { + if (xe->info.has_usm) { struct xe_hw_engine *hwe = xe_gt_hw_engine(primary_gt, XE_ENGINE_CLASS_COPY, primary_gt->usm.reserved_bcs_instance, @@ -397,7 +397,7 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) xe_vm_close_and_put(vm); return ERR_CAST(m->q); } - if (xe->info.supports_usm) + if (xe->info.has_usm) m->q->priority = XE_EXEC_QUEUE_PRIORITY_KERNEL; mutex_init(&m->job_mutex); @@ -706,7 +706,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, u32 update_idx; u64 ccs_ofs, ccs_size; u32 ccs_pt; - bool usm = xe->info.supports_usm; + bool usm = xe->info.has_usm; src_L0 = xe_migrate_res_sizes(&src_it); dst_L0 = xe_migrate_res_sizes(&dst_it); @@ -956,7 +956,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, struct xe_sched_job *job; struct xe_bb *bb; u32 batch_size, update_idx; - bool usm = xe->info.supports_usm; + bool usm = xe->info.has_usm; clear_L0 = xe_migrate_res_sizes(&src_it); drm_dbg(&xe->drm, "Pass %u, size: %llu\n", pass++, clear_L0); @@ -1227,7 +1227,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m, u32 i, batch_size, ppgtt_ofs, update_idx, page_ofs = 0; u64 addr; int err = 0; - bool usm = !q && xe->info.supports_usm; + bool usm = !q && xe->info.has_usm; bool first_munmap_rebind = vma && vma->gpuva.flags & XE_VMA_FIRST_REBIND; struct xe_exec_queue *q_override = !q ? m->q : q; @@ -1264,7 +1264,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m, */ xe_tile_assert(tile, batch_size < SZ_128K); - bb = xe_bb_new(gt, batch_size, !q && xe->info.supports_usm); + bb = xe_bb_new(gt, batch_size, !q && xe->info.has_usm); if (IS_ERR(bb)) return ERR_CAST(bb); diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 35aeb50b7158..f660cfb79f50 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -333,11 +333,12 @@ void xe_mmio_probe_tiles(struct xe_device *xe) } add_mmio_ext: - /* By design, there's a contiguous multi-tile MMIO space (16MB hard coded per tile). + /* + * By design, there's a contiguous multi-tile MMIO space (16MB hard coded per tile). * When supported, there could be an additional contiguous multi-tile MMIO extension * space ON TOP of it, and hence the necessity for distinguished MMIO spaces. */ - if (xe->info.supports_mmio_ext) { + if (xe->info.has_mmio_ext) { regs = xe->mmio.regs + tile_mmio_size * tile_count; for_each_tile(tile, xe, id) { diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 148890357313..34dcc743e2c7 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -60,15 +60,15 @@ struct xe_device_desc { u8 require_force_probe:1; u8 is_dgfx:1; + u8 has_display:1; u8 has_heci_gscfi:1; - u8 has_llc:1; + u8 has_mmio_ext:1; u8 has_sriov:1; + u8 skip_guc_pc:1; u8 skip_mtcfg:1; u8 skip_pcode:1; - u8 supports_mmio_ext:1; - u8 skip_guc_pc:1; }; __diag_push(); @@ -148,7 +148,7 @@ static const struct xe_graphics_desc graphics_xehpc = { .has_asid = 1, .has_flat_ccs = 0, - .supports_usm = 1, + .has_usm = 1, }; static const struct xe_graphics_desc graphics_xelpg = { @@ -166,7 +166,7 @@ static const struct xe_graphics_desc graphics_xelpg = { .has_asid = 1, \ .has_flat_ccs = 0 /* FIXME: implementation missing */, \ .has_range_tlb_invalidation = 1, \ - .supports_usm = 0 /* FIXME: implementation missing */, \ + .has_usm = 0 /* FIXME: implementation missing */, \ .va_bits = 48, \ .vm_max_level = 4, \ .hw_engine_mask = \ @@ -279,8 +279,8 @@ static const struct xe_device_desc dg1_desc = { DGFX_FEATURES, PLATFORM(XE_DG1), .has_display = true, - .require_force_probe = true, .has_heci_gscfi = 1, + .require_force_probe = true, }; static const u16 dg2_g10_ids[] = { XE_DG2_G10_IDS(NOP), XE_ATS_M150_IDS(NOP), 0 }; @@ -321,8 +321,8 @@ static const __maybe_unused struct xe_device_desc pvc_desc = { DGFX_FEATURES, PLATFORM(XE_PVC), .has_display = false, - .require_force_probe = true, .has_heci_gscfi = 1, + .require_force_probe = true, }; static const struct xe_device_desc mtl_desc = { @@ -550,11 +550,11 @@ static int xe_info_init_early(struct xe_device *xe, xe->info.is_dgfx = desc->is_dgfx; xe->info.has_heci_gscfi = desc->has_heci_gscfi; xe->info.has_llc = desc->has_llc; + xe->info.has_mmio_ext = desc->has_mmio_ext; xe->info.has_sriov = desc->has_sriov; + xe->info.skip_guc_pc = desc->skip_guc_pc; xe->info.skip_mtcfg = desc->skip_mtcfg; xe->info.skip_pcode = desc->skip_pcode; - xe->info.supports_mmio_ext = desc->supports_mmio_ext; - xe->info.skip_guc_pc = desc->skip_guc_pc; xe->info.enable_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) && xe_modparam.enable_display && @@ -616,10 +616,10 @@ static int xe_info_init(struct xe_device *xe, xe->info.vram_flags = graphics_desc->vram_flags; xe->info.va_bits = graphics_desc->va_bits; xe->info.vm_max_level = graphics_desc->vm_max_level; - xe->info.supports_usm = graphics_desc->supports_usm; xe->info.has_asid = graphics_desc->has_asid; xe->info.has_flat_ccs = graphics_desc->has_flat_ccs; xe->info.has_range_tlb_invalidation = graphics_desc->has_range_tlb_invalidation; + xe->info.has_usm = graphics_desc->has_usm; /* * All platforms have at least one primary GT. Any platform with media diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h index dd3546ba6f90..b1ad12fa22d6 100644 --- a/drivers/gpu/drm/xe/xe_pci_types.h +++ b/drivers/gpu/drm/xe/xe_pci_types.h @@ -27,7 +27,7 @@ struct xe_graphics_desc { u8 has_asid:1; u8 has_flat_ccs:1; u8 has_range_tlb_invalidation:1; - u8 supports_usm:1; + u8 has_usm:1; }; struct xe_media_desc { diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index f71285e8ef10..265cc0c5e440 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1944,7 +1944,7 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE; if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && - !xe->info.supports_usm)) + !xe->info.has_usm)) return -EINVAL; if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) -- cgit v1.2.3 From 06d5ae90579e774934552ca023c4bbc56e8253f4 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Tue, 5 Dec 2023 16:41:42 +0200 Subject: drm/xe/vm: Avoid asid lookup if none allocated MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The destroy path can and will get called for incomplete vm objects on error paths, where the asid is not yet allocated. This leads to lookup fail and assert triggered. Fix this by not asserting of asid existence if vm never got assigned one. Cc: Ohad Sharabi Cc: Thomas Hellström Cc: Matthew Auld Signed-off-by: Mika Kuoppala Reviewed-by: Thomas Hellström Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 265cc0c5e440..11667529e40b 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1634,7 +1634,7 @@ static void vm_destroy_work_func(struct work_struct *w) if (!(vm->flags & XE_VM_FLAG_MIGRATION)) { xe_device_mem_access_put(xe); - if (xe->info.has_asid) { + if (xe->info.has_asid && vm->usm.asid) { mutex_lock(&xe->usm.lock); lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid); xe_assert(xe, lookup == vm); -- cgit v1.2.3 From 2ab3cc4bf5a3dd760b697650d5e5bdb240fdf94a Mon Sep 17 00:00:00 2001 From: Sujaritha Sundaresan Date: Fri, 8 Dec 2023 00:11:50 -0500 Subject: drm/xe: Change the name of frequency sysfs attributes Switching the names of frequency sysfs attrbutes to align with required devfreq changes. The name changes are as below; -freq_act -> act_freq -freq_cur -> cur_freq -freq_rpn -> rpn_freq -freq_rpe -> rpe_freq -freq_rp0 -> rp0_freq -freq_min -> min_freq -freq_max -> max_freq Signed-off-by: Sujaritha Sundaresan Reviewed-by: Riana Tauro Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_pc.c | 60 +++++++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 30 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index d2605a684b1c..b1876fbea669 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -60,15 +60,15 @@ * Xe's GuC PC provides a sysfs API for frequency management: * * device/gt#/freq_* *read-only* files: - * - freq_act: The actual resolved frequency decided by PCODE. - * - freq_cur: The current one requested by GuC PC to the Hardware. - * - freq_rpn: The Render Performance (RP) N level, which is the minimal one. - * - freq_rpe: The Render Performance (RP) E level, which is the efficient one. - * - freq_rp0: The Render Performance (RP) 0 level, which is the maximum one. + * - act_freq: The actual resolved frequency decided by PCODE. + * - cur_freq: The current one requested by GuC PC to the Hardware. + * - rpn_freq: The Render Performance (RP) N level, which is the minimal one. + * - rpe_freq: The Render Performance (RP) E level, which is the efficient one. + * - rp0_freq: The Render Performance (RP) 0 level, which is the maximum one. * * device/gt#/freq_* *read-write* files: - * - freq_min: GuC PC min request. - * - freq_max: GuC PC max request. + * - min_freq: GuC PC min request. + * - max_freq: GuC PC max request. * If max <= min, then freq_min becomes a fixed frequency request. * * Render-C States: @@ -388,7 +388,7 @@ static void pc_update_rp_values(struct xe_guc_pc *pc) pc->rpn_freq = min(pc->rpn_freq, pc->rpe_freq); } -static ssize_t freq_act_show(struct device *dev, +static ssize_t act_freq_show(struct device *dev, struct device_attribute *attr, char *buf) { struct kobject *kobj = &dev->kobj; @@ -413,9 +413,9 @@ static ssize_t freq_act_show(struct device *dev, xe_device_mem_access_put(gt_to_xe(gt)); return ret; } -static DEVICE_ATTR_RO(freq_act); +static DEVICE_ATTR_RO(act_freq); -static ssize_t freq_cur_show(struct device *dev, +static ssize_t cur_freq_show(struct device *dev, struct device_attribute *attr, char *buf) { struct kobject *kobj = &dev->kobj; @@ -442,18 +442,18 @@ out: xe_device_mem_access_put(gt_to_xe(gt)); return ret; } -static DEVICE_ATTR_RO(freq_cur); +static DEVICE_ATTR_RO(cur_freq); -static ssize_t freq_rp0_show(struct device *dev, +static ssize_t rp0_freq_show(struct device *dev, struct device_attribute *attr, char *buf) { struct xe_guc_pc *pc = dev_to_pc(dev); return sysfs_emit(buf, "%d\n", pc->rp0_freq); } -static DEVICE_ATTR_RO(freq_rp0); +static DEVICE_ATTR_RO(rp0_freq); -static ssize_t freq_rpe_show(struct device *dev, +static ssize_t rpe_freq_show(struct device *dev, struct device_attribute *attr, char *buf) { struct xe_guc_pc *pc = dev_to_pc(dev); @@ -465,18 +465,18 @@ static ssize_t freq_rpe_show(struct device *dev, xe_device_mem_access_put(xe); return sysfs_emit(buf, "%d\n", pc->rpe_freq); } -static DEVICE_ATTR_RO(freq_rpe); +static DEVICE_ATTR_RO(rpe_freq); -static ssize_t freq_rpn_show(struct device *dev, +static ssize_t rpn_freq_show(struct device *dev, struct device_attribute *attr, char *buf) { struct xe_guc_pc *pc = dev_to_pc(dev); return sysfs_emit(buf, "%d\n", pc->rpn_freq); } -static DEVICE_ATTR_RO(freq_rpn); +static DEVICE_ATTR_RO(rpn_freq); -static ssize_t freq_min_show(struct device *dev, +static ssize_t min_freq_show(struct device *dev, struct device_attribute *attr, char *buf) { struct xe_guc_pc *pc = dev_to_pc(dev); @@ -513,7 +513,7 @@ out: return ret; } -static ssize_t freq_min_store(struct device *dev, struct device_attribute *attr, +static ssize_t min_freq_store(struct device *dev, struct device_attribute *attr, const char *buff, size_t count) { struct xe_guc_pc *pc = dev_to_pc(dev); @@ -543,9 +543,9 @@ out: xe_device_mem_access_put(pc_to_xe(pc)); return ret ?: count; } -static DEVICE_ATTR_RW(freq_min); +static DEVICE_ATTR_RW(min_freq); -static ssize_t freq_max_show(struct device *dev, +static ssize_t max_freq_show(struct device *dev, struct device_attribute *attr, char *buf) { struct xe_guc_pc *pc = dev_to_pc(dev); @@ -571,7 +571,7 @@ out: return ret; } -static ssize_t freq_max_store(struct device *dev, struct device_attribute *attr, +static ssize_t max_freq_store(struct device *dev, struct device_attribute *attr, const char *buff, size_t count) { struct xe_guc_pc *pc = dev_to_pc(dev); @@ -601,7 +601,7 @@ out: xe_device_mem_access_put(pc_to_xe(pc)); return ret ?: count; } -static DEVICE_ATTR_RW(freq_max); +static DEVICE_ATTR_RW(max_freq); /** * xe_guc_pc_c_status - get the current GT C state @@ -667,13 +667,13 @@ u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc) } static const struct attribute *pc_attrs[] = { - &dev_attr_freq_act.attr, - &dev_attr_freq_cur.attr, - &dev_attr_freq_rp0.attr, - &dev_attr_freq_rpe.attr, - &dev_attr_freq_rpn.attr, - &dev_attr_freq_min.attr, - &dev_attr_freq_max.attr, + &dev_attr_act_freq.attr, + &dev_attr_cur_freq.attr, + &dev_attr_rp0_freq.attr, + &dev_attr_rpe_freq.attr, + &dev_attr_rpn_freq.attr, + &dev_attr_min_freq.attr, + &dev_attr_max_freq.attr, NULL }; -- cgit v1.2.3 From bef52b5c7a1904fc6e1bdda4a0e6dc460f562856 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 8 Dec 2023 00:11:51 -0500 Subject: drm/xe: Create a xe_gt_freq component for raw management and sysfs Goals of this new xe_gt_freq component: 1. Detach sysfs controls and raw freq management from GuC SLPC. 2. Create a directory that could later be aligned with devfreq. 3. Encapsulate all the freq control in a single directory. Although we only have one freq domain per GT, already start with a numbered freq0 directory so it could be expanded in the future if multiple domains or PLL are needed. Note: Although in the goal #1, the raw freq management control is mentioned, this patch only starts by the sysfs control. The RP freq configuration and init freq selection are still under the guc_pc, but should be moved to this component in a follow-up patch. v2: - Add /tile# to the doc and remove unnecessary kobject_put (Riana) - s/ssize_t/int on some ret variables (Vinay) Cc: Sujaritha Sundaresan Cc: Vinay Belgaumkar Cc: Riana Tauro Reviewed-by: Vinay Belgaumkar Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/xe_gt.c | 3 + drivers/gpu/drm/xe/xe_gt_freq.c | 216 +++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_gt_freq.h | 13 +++ drivers/gpu/drm/xe/xe_gt_types.h | 3 + drivers/gpu/drm/xe/xe_guc_pc.c | 205 ++++++++++++++++++------------------- drivers/gpu/drm/xe/xe_guc_pc.h | 10 ++ 7 files changed, 347 insertions(+), 104 deletions(-) create mode 100644 drivers/gpu/drm/xe/xe_gt_freq.c create mode 100644 drivers/gpu/drm/xe/xe_gt_freq.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 537828655da9..62ee772a1d39 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -73,6 +73,7 @@ xe-y += xe_bb.o \ xe_gt_ccs_mode.o \ xe_gt_clock.o \ xe_gt_debugfs.o \ + xe_gt_freq.o \ xe_gt_idle.o \ xe_gt_mcr.o \ xe_gt_pagefault.o \ diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index fe3c7aac2369..dfd9cf01a5d5 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -24,6 +24,7 @@ #include "xe_gsc.h" #include "xe_gt_ccs_mode.h" #include "xe_gt_clock.h" +#include "xe_gt_freq.h" #include "xe_gt_idle.h" #include "xe_gt_mcr.h" #include "xe_gt_pagefault.h" @@ -511,6 +512,8 @@ int xe_gt_init(struct xe_gt *gt) if (err) return err; + xe_gt_freq_init(gt); + xe_force_wake_init_engines(gt, gt_to_fw(gt)); err = all_fw_domain_init(gt); diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c new file mode 100644 index 000000000000..2c3830d0e9e5 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_freq.c @@ -0,0 +1,216 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include "xe_gt_freq.h" + +#include +#include + +#include +#include + +#include "xe_device_types.h" +#include "xe_gt_sysfs.h" +#include "xe_guc_pc.h" + +/** + * DOC: Xe GT Frequency Management + * + * This component is responsible for the raw GT frequency management, including + * the sysfs API. + * + * Underneath, Xe enables GuC SLPC automated frequency management. GuC is then + * allowed to request PCODE any frequency between the Minimum and the Maximum + * selected by this component. Furthermore, it is important to highlight that + * PCODE is the ultimate decision maker of the actual running frequency, based + * on thermal and other running conditions. + * + * Xe's Freq provides a sysfs API for frequency management: + * + * device/tile#/gt#/freq0/_freq *read-only* files: + * - act_freq: The actual resolved frequency decided by PCODE. + * - cur_freq: The current one requested by GuC PC to the PCODE. + * - rpn_freq: The Render Performance (RP) N level, which is the minimal one. + * - rpe_freq: The Render Performance (RP) E level, which is the efficient one. + * - rp0_freq: The Render Performance (RP) 0 level, which is the maximum one. + * + * device/tile#/gt#/freq0/_freq *read-write* files: + * - min_freq: Min frequency request. + * - max_freq: Max frequency request. + * If max <= min, then freq_min becomes a fixed frequency request. + */ + +static struct xe_guc_pc * +dev_to_pc(struct device *dev) +{ + return &kobj_to_gt(dev->kobj.parent)->uc.guc.pc; +} + +static ssize_t act_freq_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct xe_guc_pc *pc = dev_to_pc(dev); + + return sysfs_emit(buf, "%d\n", xe_guc_pc_get_act_freq(pc)); +} +static DEVICE_ATTR_RO(act_freq); + +static ssize_t cur_freq_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct xe_guc_pc *pc = dev_to_pc(dev); + u32 freq; + ssize_t ret; + + ret = xe_guc_pc_get_cur_freq(pc, &freq); + if (ret) + return ret; + + return sysfs_emit(buf, "%d\n", freq); +} +static DEVICE_ATTR_RO(cur_freq); + +static ssize_t rp0_freq_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct xe_guc_pc *pc = dev_to_pc(dev); + + return sysfs_emit(buf, "%d\n", xe_guc_pc_get_rp0_freq(pc)); +} +static DEVICE_ATTR_RO(rp0_freq); + +static ssize_t rpe_freq_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct xe_guc_pc *pc = dev_to_pc(dev); + + return sysfs_emit(buf, "%d\n", xe_guc_pc_get_rpe_freq(pc)); +} +static DEVICE_ATTR_RO(rpe_freq); + +static ssize_t rpn_freq_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct xe_guc_pc *pc = dev_to_pc(dev); + + return sysfs_emit(buf, "%d\n", xe_guc_pc_get_rpn_freq(pc)); +} +static DEVICE_ATTR_RO(rpn_freq); + +static ssize_t min_freq_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct xe_guc_pc *pc = dev_to_pc(dev); + u32 freq; + ssize_t ret; + + ret = xe_guc_pc_get_min_freq(pc, &freq); + if (ret) + return ret; + + return sysfs_emit(buf, "%d\n", freq); +} + +static ssize_t min_freq_store(struct device *dev, struct device_attribute *attr, + const char *buff, size_t count) +{ + struct xe_guc_pc *pc = dev_to_pc(dev); + u32 freq; + ssize_t ret; + + ret = kstrtou32(buff, 0, &freq); + if (ret) + return ret; + + ret = xe_guc_pc_set_min_freq(pc, freq); + if (ret) + return ret; + + return count; +} +static DEVICE_ATTR_RW(min_freq); + +static ssize_t max_freq_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct xe_guc_pc *pc = dev_to_pc(dev); + u32 freq; + ssize_t ret; + + ret = xe_guc_pc_get_max_freq(pc, &freq); + if (ret) + return ret; + + return sysfs_emit(buf, "%d\n", freq); +} + +static ssize_t max_freq_store(struct device *dev, struct device_attribute *attr, + const char *buff, size_t count) +{ + struct xe_guc_pc *pc = dev_to_pc(dev); + u32 freq; + ssize_t ret; + + ret = kstrtou32(buff, 0, &freq); + if (ret) + return ret; + + ret = xe_guc_pc_set_max_freq(pc, freq); + if (ret) + return ret; + + return count; +} +static DEVICE_ATTR_RW(max_freq); + +static const struct attribute *freq_attrs[] = { + &dev_attr_act_freq.attr, + &dev_attr_cur_freq.attr, + &dev_attr_rp0_freq.attr, + &dev_attr_rpe_freq.attr, + &dev_attr_rpn_freq.attr, + &dev_attr_min_freq.attr, + &dev_attr_max_freq.attr, + NULL +}; + +static void freq_fini(struct drm_device *drm, void *arg) +{ + struct kobject *kobj = arg; + + sysfs_remove_files(kobj, freq_attrs); + kobject_put(kobj); +} + +/** + * xe_gt_freq_init - Initialize Xe Freq component + * @gt: Xe GT object + * + * It needs to be initialized after GT Sysfs and GuC PC components are ready. + */ +void xe_gt_freq_init(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + int err; + + gt->freq = kobject_create_and_add("freq0", gt->sysfs); + if (!gt->freq) { + drm_warn(&xe->drm, "failed to add freq0 directory to %s, err: %d\n", + kobject_name(gt->sysfs), err); + return; + } + + err = drmm_add_action_or_reset(&xe->drm, freq_fini, gt->freq); + if (err) { + drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n", + __func__, err); + return; + } + + err = sysfs_create_files(gt->freq, freq_attrs); + if (err) + drm_warn(&xe->drm, "failed to add freq attrs to %s, err: %d\n", + kobject_name(gt->freq), err); +} diff --git a/drivers/gpu/drm/xe/xe_gt_freq.h b/drivers/gpu/drm/xe/xe_gt_freq.h new file mode 100644 index 000000000000..f3fe3c90491a --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_freq.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GT_FREQ_H_ +#define _XE_GT_FREQ_H_ + +struct xe_gt; + +void xe_gt_freq_init(struct xe_gt *gt); + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 4e48c4643163..f74684660475 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -307,6 +307,9 @@ struct xe_gt { /** @sysfs: sysfs' kobj used by xe_gt_sysfs */ struct kobject *sysfs; + /** @freq: Main GT freq sysfs control */ + struct kobject *freq; + /** @mocs: info */ struct { /** @uc_index: UC index */ diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index b1876fbea669..2b627ea950f1 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -57,19 +57,6 @@ * * Xe driver enables SLPC with all of its defaults features and frequency * selection, which varies per platform. - * Xe's GuC PC provides a sysfs API for frequency management: - * - * device/gt#/freq_* *read-only* files: - * - act_freq: The actual resolved frequency decided by PCODE. - * - cur_freq: The current one requested by GuC PC to the Hardware. - * - rpn_freq: The Render Performance (RP) N level, which is the minimal one. - * - rpe_freq: The Render Performance (RP) E level, which is the efficient one. - * - rp0_freq: The Render Performance (RP) 0 level, which is the maximum one. - * - * device/gt#/freq_* *read-write* files: - * - min_freq: GuC PC min request. - * - max_freq: GuC PC max request. - * If max <= min, then freq_min becomes a fixed frequency request. * * Render-C States: * ================ @@ -100,12 +87,6 @@ pc_to_gt(struct xe_guc_pc *pc) return container_of(pc, struct xe_gt, uc.guc.pc); } -static struct xe_guc_pc * -dev_to_pc(struct device *dev) -{ - return &kobj_to_gt(&dev->kobj)->uc.guc.pc; -} - static struct iosys_map * pc_to_maps(struct xe_guc_pc *pc) { @@ -388,14 +369,17 @@ static void pc_update_rp_values(struct xe_guc_pc *pc) pc->rpn_freq = min(pc->rpn_freq, pc->rpe_freq); } -static ssize_t act_freq_show(struct device *dev, - struct device_attribute *attr, char *buf) +/** + * xe_guc_pc_get_act_freq - Get Actual running frequency + * @pc: The GuC PC + * + * Returns: The Actual running frequency. Which might be 0 if GT is in Render-C sleep state (RC6). + */ +u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc) { - struct kobject *kobj = &dev->kobj; - struct xe_gt *gt = kobj_to_gt(kobj); + struct xe_gt *gt = pc_to_gt(pc); struct xe_device *xe = gt_to_xe(gt); u32 freq; - ssize_t ret; xe_device_mem_access_get(gt_to_xe(gt)); @@ -408,20 +392,25 @@ static ssize_t act_freq_show(struct device *dev, freq = REG_FIELD_GET(CAGF_MASK, freq); } - ret = sysfs_emit(buf, "%d\n", decode_freq(freq)); + freq = decode_freq(freq); xe_device_mem_access_put(gt_to_xe(gt)); - return ret; + + return freq; } -static DEVICE_ATTR_RO(act_freq); -static ssize_t cur_freq_show(struct device *dev, - struct device_attribute *attr, char *buf) +/** + * xe_guc_pc_get_cur_freq - Get Current requested frequency + * @pc: The GuC PC + * @freq: A pointer to a u32 where the freq value will be returned + * + * Returns: 0 on success, + * -EAGAIN if GuC PC not ready (likely in middle of a reset). + */ +int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq) { - struct kobject *kobj = &dev->kobj; - struct xe_gt *gt = kobj_to_gt(kobj); - u32 freq; - ssize_t ret; + struct xe_gt *gt = pc_to_gt(pc); + int ret; xe_device_mem_access_get(gt_to_xe(gt)); /* @@ -432,56 +421,69 @@ static ssize_t cur_freq_show(struct device *dev, if (ret) goto out; - freq = xe_mmio_read32(gt, RPNSWREQ); + *freq = xe_mmio_read32(gt, RPNSWREQ); - freq = REG_FIELD_GET(REQ_RATIO_MASK, freq); - ret = sysfs_emit(buf, "%d\n", decode_freq(freq)); + *freq = REG_FIELD_GET(REQ_RATIO_MASK, *freq); + *freq = decode_freq(*freq); XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); out: xe_device_mem_access_put(gt_to_xe(gt)); return ret; } -static DEVICE_ATTR_RO(cur_freq); -static ssize_t rp0_freq_show(struct device *dev, - struct device_attribute *attr, char *buf) +/** + * xe_guc_pc_get_rp0_freq - Get the RP0 freq + * @pc: The GuC PC + * + * Returns: RP0 freq. + */ +u32 xe_guc_pc_get_rp0_freq(struct xe_guc_pc *pc) { - struct xe_guc_pc *pc = dev_to_pc(dev); - - return sysfs_emit(buf, "%d\n", pc->rp0_freq); + return pc->rp0_freq; } -static DEVICE_ATTR_RO(rp0_freq); -static ssize_t rpe_freq_show(struct device *dev, - struct device_attribute *attr, char *buf) +/** + * xe_guc_pc_get_rpe_freq - Get the RPe freq + * @pc: The GuC PC + * + * Returns: RPe freq. + */ +u32 xe_guc_pc_get_rpe_freq(struct xe_guc_pc *pc) { - struct xe_guc_pc *pc = dev_to_pc(dev); struct xe_gt *gt = pc_to_gt(pc); struct xe_device *xe = gt_to_xe(gt); xe_device_mem_access_get(xe); pc_update_rp_values(pc); xe_device_mem_access_put(xe); - return sysfs_emit(buf, "%d\n", pc->rpe_freq); + + return pc->rpe_freq; } -static DEVICE_ATTR_RO(rpe_freq); -static ssize_t rpn_freq_show(struct device *dev, - struct device_attribute *attr, char *buf) +/** + * xe_guc_pc_get_rpn_freq - Get the RPn freq + * @pc: The GuC PC + * + * Returns: RPn freq. + */ +u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc) { - struct xe_guc_pc *pc = dev_to_pc(dev); - - return sysfs_emit(buf, "%d\n", pc->rpn_freq); + return pc->rpn_freq; } -static DEVICE_ATTR_RO(rpn_freq); -static ssize_t min_freq_show(struct device *dev, - struct device_attribute *attr, char *buf) +/** + * xe_guc_pc_get_min_freq - Get the min operational frequency + * @pc: The GuC PC + * @freq: A pointer to a u32 where the freq value will be returned + * + * Returns: 0 on success, + * -EAGAIN if GuC PC not ready (likely in middle of a reset). + */ +int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq) { - struct xe_guc_pc *pc = dev_to_pc(dev); struct xe_gt *gt = pc_to_gt(pc); - ssize_t ret; + int ret; xe_device_mem_access_get(pc_to_xe(pc)); mutex_lock(&pc->freq_lock); @@ -503,7 +505,7 @@ static ssize_t min_freq_show(struct device *dev, if (ret) goto fw; - ret = sysfs_emit(buf, "%d\n", pc_get_min_freq(pc)); + *freq = pc_get_min_freq(pc); fw: XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); @@ -513,16 +515,18 @@ out: return ret; } -static ssize_t min_freq_store(struct device *dev, struct device_attribute *attr, - const char *buff, size_t count) +/** + * xe_guc_pc_set_min_freq - Set the minimal operational frequency + * @pc: The GuC PC + * @freq: The selected minimal frequency + * + * Returns: 0 on success, + * -EAGAIN if GuC PC not ready (likely in middle of a reset), + * -EINVAL if value out of bounds. + */ +int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq) { - struct xe_guc_pc *pc = dev_to_pc(dev); - u32 freq; - ssize_t ret; - - ret = kstrtou32(buff, 0, &freq); - if (ret) - return ret; + int ret; xe_device_mem_access_get(pc_to_xe(pc)); mutex_lock(&pc->freq_lock); @@ -541,15 +545,21 @@ static ssize_t min_freq_store(struct device *dev, struct device_attribute *attr, out: mutex_unlock(&pc->freq_lock); xe_device_mem_access_put(pc_to_xe(pc)); - return ret ?: count; + + return ret; } -static DEVICE_ATTR_RW(min_freq); -static ssize_t max_freq_show(struct device *dev, - struct device_attribute *attr, char *buf) +/** + * xe_guc_pc_get_max_freq - Get Maximum operational frequency + * @pc: The GuC PC + * @freq: A pointer to a u32 where the freq value will be returned + * + * Returns: 0 on success, + * -EAGAIN if GuC PC not ready (likely in middle of a reset). + */ +int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq) { - struct xe_guc_pc *pc = dev_to_pc(dev); - ssize_t ret; + int ret; xe_device_mem_access_get(pc_to_xe(pc)); mutex_lock(&pc->freq_lock); @@ -563,7 +573,7 @@ static ssize_t max_freq_show(struct device *dev, if (ret) goto out; - ret = sysfs_emit(buf, "%d\n", pc_get_max_freq(pc)); + *freq = pc_get_max_freq(pc); out: mutex_unlock(&pc->freq_lock); @@ -571,16 +581,18 @@ out: return ret; } -static ssize_t max_freq_store(struct device *dev, struct device_attribute *attr, - const char *buff, size_t count) +/** + * xe_guc_pc_set_max_freq - Set the maximum operational frequency + * @pc: The GuC PC + * @freq: The selected maximum frequency value + * + * Returns: 0 on success, + * -EAGAIN if GuC PC not ready (likely in middle of a reset), + * -EINVAL if value out of bounds. + */ +int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq) { - struct xe_guc_pc *pc = dev_to_pc(dev); - u32 freq; - ssize_t ret; - - ret = kstrtou32(buff, 0, &freq); - if (ret) - return ret; + int ret; xe_device_mem_access_get(pc_to_xe(pc)); mutex_lock(&pc->freq_lock); @@ -599,9 +611,8 @@ static ssize_t max_freq_store(struct device *dev, struct device_attribute *attr, out: mutex_unlock(&pc->freq_lock); xe_device_mem_access_put(pc_to_xe(pc)); - return ret ?: count; + return ret; } -static DEVICE_ATTR_RW(max_freq); /** * xe_guc_pc_c_status - get the current GT C state @@ -666,17 +677,6 @@ u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc) return reg; } -static const struct attribute *pc_attrs[] = { - &dev_attr_act_freq.attr, - &dev_attr_cur_freq.attr, - &dev_attr_rp0_freq.attr, - &dev_attr_rpe_freq.attr, - &dev_attr_rpn_freq.attr, - &dev_attr_min_freq.attr, - &dev_attr_max_freq.attr, - NULL -}; - static void mtl_init_fused_rp_values(struct xe_guc_pc *pc) { struct xe_gt *gt = pc_to_gt(pc); @@ -952,6 +952,10 @@ out: return ret; } +/** + * xe_guc_pc_fini - Finalize GuC's Power Conservation component + * @pc: Xe_GuC_PC instance + */ void xe_guc_pc_fini(struct xe_guc_pc *pc) { struct xe_device *xe = pc_to_xe(pc); @@ -963,7 +967,6 @@ void xe_guc_pc_fini(struct xe_guc_pc *pc) XE_WARN_ON(xe_guc_pc_gucrc_disable(pc)); XE_WARN_ON(xe_guc_pc_stop(pc)); - sysfs_remove_files(pc_to_gt(pc)->sysfs, pc_attrs); mutex_destroy(&pc->freq_lock); } @@ -978,7 +981,6 @@ int xe_guc_pc_init(struct xe_guc_pc *pc) struct xe_device *xe = gt_to_xe(gt); struct xe_bo *bo; u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data)); - int err; if (xe->info.skip_guc_pc) return 0; @@ -992,10 +994,5 @@ int xe_guc_pc_init(struct xe_guc_pc *pc) return PTR_ERR(bo); pc->bo = bo; - - err = sysfs_create_files(gt->sysfs, pc_attrs); - if (err) - return err; - return 0; } diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h index 054788e006f3..cecad8e9300b 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.h +++ b/drivers/gpu/drm/xe/xe_guc_pc.h @@ -14,6 +14,16 @@ int xe_guc_pc_start(struct xe_guc_pc *pc); int xe_guc_pc_stop(struct xe_guc_pc *pc); int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc); +u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc); +int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq); +u32 xe_guc_pc_get_rp0_freq(struct xe_guc_pc *pc); +u32 xe_guc_pc_get_rpe_freq(struct xe_guc_pc *pc); +u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc); +int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq); +int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq); +int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq); +int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq); + enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc); u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc); u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc); -- cgit v1.2.3 From 1c8e9019033728093c04608f44c6e87fec6822e1 Mon Sep 17 00:00:00 2001 From: Sujaritha Sundaresan Date: Fri, 8 Dec 2023 00:11:52 -0500 Subject: drm/xe: Add frequency throttle reasons sysfs attributes Add throttle reasons sysfs attributes under a separate directory. /device/tile/gt/freq0/throttle |- reason_pl1 |- reason_pl2 |- reason_pl4 |- reason_prochot |- reason_ratl |- reason_vr_tdc |- reason_vr_thermalert |- status v2: Remove unnecessary headers and clean-up action (Riana) Signed-off-by: Sujaritha Sundaresan Reviewed-by: Riana Tauro Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/regs/xe_gt_regs.h | 12 ++ drivers/gpu/drm/xe/xe_gt_freq.c | 3 + drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c | 251 ++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h | 16 ++ 5 files changed, 283 insertions(+) create mode 100644 drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c create mode 100644 drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 62ee772a1d39..f4ae063a7005 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -78,6 +78,7 @@ xe-y += xe_bb.o \ xe_gt_mcr.o \ xe_gt_pagefault.o \ xe_gt_sysfs.o \ + xe_gt_throttle_sysfs.o \ xe_gt_tlb_invalidation.o \ xe_gt_topology.o \ xe_guc.o \ diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 9744ed0be3a5..5f5a72e9d0d8 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -478,4 +478,16 @@ #define PVC_GT0_PLATFORM_ENERGY_STATUS XE_REG(0x28106c) #define PVC_GT0_PACKAGE_POWER_SKU XE_REG(0x281080) +#define GT0_PERF_LIMIT_REASONS XE_REG(0x1381a8) +#define GT0_PERF_LIMIT_REASONS_MASK 0xde3 +#define PROCHOT_MASK REG_BIT(0) +#define THERMAL_LIMIT_MASK REG_BIT(1) +#define RATL_MASK REG_BIT(5) +#define VR_THERMALERT_MASK REG_BIT(6) +#define VR_TDC_MASK REG_BIT(7) +#define POWER_LIMIT_4_MASK REG_BIT(8) +#define POWER_LIMIT_1_MASK REG_BIT(10) +#define POWER_LIMIT_2_MASK REG_BIT(11) +#define MTL_MEDIA_PERF_LIMIT_REASONS XE_REG(0x138030) + #endif diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c index 2c3830d0e9e5..08eabcafe7bc 100644 --- a/drivers/gpu/drm/xe/xe_gt_freq.c +++ b/drivers/gpu/drm/xe/xe_gt_freq.c @@ -13,6 +13,7 @@ #include "xe_device_types.h" #include "xe_gt_sysfs.h" +#include "xe_gt_throttle_sysfs.h" #include "xe_guc_pc.h" /** @@ -213,4 +214,6 @@ void xe_gt_freq_init(struct xe_gt *gt) if (err) drm_warn(&xe->drm, "failed to add freq attrs to %s, err: %d\n", kobject_name(gt->freq), err); + + xe_gt_throttle_sysfs_init(gt); } diff --git a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c new file mode 100644 index 000000000000..63d640591a52 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c @@ -0,0 +1,251 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include + +#include +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_gt_sysfs.h" +#include "xe_gt_throttle_sysfs.h" +#include "xe_mmio.h" + +/** + * DOC: Xe GT Throttle + * + * Provides sysfs entries for frequency throttle reasons in GT + * + * device/gt#/freq0/throttle/status - Overall status + * device/gt#/freq0/throttle/reason_pl1 - Frequency throttle due to PL1 + * device/gt#/freq0/throttle/reason_pl2 - Frequency throttle due to PL2 + * device/gt#/freq0/throttle/reason_pl4 - Frequency throttle due to PL4, Iccmax etc. + * device/gt#/freq0/throttle/reason_thermal - Frequency throttle due to thermal + * device/gt#/freq0/throttle/reason_prochot - Frequency throttle due to prochot + * device/gt#/freq0/throttle/reason_ratl - Frequency throttle due to RATL + * device/gt#/freq0/throttle/reason_vr_thermalert - Frequency throttle due to VR THERMALERT + * device/gt#/freq0/throttle/reason_vr_tdc - Frequency throttle due to VR TDC + */ + +static struct xe_gt * +dev_to_gt(struct device *dev) +{ + return kobj_to_gt(dev->kobj.parent); +} + +static u32 read_perf_limit_reasons(struct xe_gt *gt) +{ + u32 reg; + + if (xe_gt_is_media_type(gt)) + reg = xe_mmio_read32(gt, MTL_MEDIA_PERF_LIMIT_REASONS); + else + reg = xe_mmio_read32(gt, GT0_PERF_LIMIT_REASONS); + + return reg; +} + +static u32 read_status(struct xe_gt *gt) +{ + u32 status = read_perf_limit_reasons(gt) & GT0_PERF_LIMIT_REASONS_MASK; + + return status; +} + +static u32 read_reason_pl1(struct xe_gt *gt) +{ + u32 pl1 = read_perf_limit_reasons(gt) & POWER_LIMIT_1_MASK; + + return pl1; +} + +static u32 read_reason_pl2(struct xe_gt *gt) +{ + u32 pl2 = read_perf_limit_reasons(gt) & POWER_LIMIT_2_MASK; + + return pl2; +} + +static u32 read_reason_pl4(struct xe_gt *gt) +{ + u32 pl4 = read_perf_limit_reasons(gt) & POWER_LIMIT_4_MASK; + + return pl4; +} + +static u32 read_reason_thermal(struct xe_gt *gt) +{ + u32 thermal = read_perf_limit_reasons(gt) & THERMAL_LIMIT_MASK; + + return thermal; +} + +static u32 read_reason_prochot(struct xe_gt *gt) +{ + u32 prochot = read_perf_limit_reasons(gt) & PROCHOT_MASK; + + return prochot; +} + +static u32 read_reason_ratl(struct xe_gt *gt) +{ + u32 ratl = read_perf_limit_reasons(gt) & RATL_MASK; + + return ratl; +} + +static u32 read_reason_vr_thermalert(struct xe_gt *gt) +{ + u32 thermalert = read_perf_limit_reasons(gt) & VR_THERMALERT_MASK; + + return thermalert; +} + +static u32 read_reason_vr_tdc(struct xe_gt *gt) +{ + u32 tdc = read_perf_limit_reasons(gt) & VR_TDC_MASK; + + return tdc; +} + +static ssize_t status_show(struct device *dev, + struct device_attribute *attr, + char *buff) +{ + struct xe_gt *gt = dev_to_gt(dev); + bool status = !!read_status(gt); + + return sysfs_emit(buff, "%u\n", status); +} +static DEVICE_ATTR_RO(status); + +static ssize_t reason_pl1_show(struct device *dev, + struct device_attribute *attr, + char *buff) +{ + struct xe_gt *gt = dev_to_gt(dev); + bool pl1 = !!read_reason_pl1(gt); + + return sysfs_emit(buff, "%u\n", pl1); +} +static DEVICE_ATTR_RO(reason_pl1); + +static ssize_t reason_pl2_show(struct device *dev, + struct device_attribute *attr, + char *buff) +{ + struct xe_gt *gt = dev_to_gt(dev); + bool pl2 = !!read_reason_pl2(gt); + + return sysfs_emit(buff, "%u\n", pl2); +} +static DEVICE_ATTR_RO(reason_pl2); + +static ssize_t reason_pl4_show(struct device *dev, + struct device_attribute *attr, + char *buff) +{ + struct xe_gt *gt = dev_to_gt(dev); + bool pl4 = !!read_reason_pl4(gt); + + return sysfs_emit(buff, "%u\n", pl4); +} +static DEVICE_ATTR_RO(reason_pl4); + +static ssize_t reason_thermal_show(struct device *dev, + struct device_attribute *attr, + char *buff) +{ + struct xe_gt *gt = dev_to_gt(dev); + bool thermal = !!read_reason_thermal(gt); + + return sysfs_emit(buff, "%u\n", thermal); +} +static DEVICE_ATTR_RO(reason_thermal); + +static ssize_t reason_prochot_show(struct device *dev, + struct device_attribute *attr, + char *buff) +{ + struct xe_gt *gt = dev_to_gt(dev); + bool prochot = !!read_reason_prochot(gt); + + return sysfs_emit(buff, "%u\n", prochot); +} +static DEVICE_ATTR_RO(reason_prochot); + +static ssize_t reason_ratl_show(struct device *dev, + struct device_attribute *attr, + char *buff) +{ + struct xe_gt *gt = dev_to_gt(dev); + bool ratl = !!read_reason_ratl(gt); + + return sysfs_emit(buff, "%u\n", ratl); +} +static DEVICE_ATTR_RO(reason_ratl); + +static ssize_t reason_vr_thermalert_show(struct device *dev, + struct device_attribute *attr, + char *buff) +{ + struct xe_gt *gt = dev_to_gt(dev); + bool thermalert = !!read_reason_vr_thermalert(gt); + + return sysfs_emit(buff, "%u\n", thermalert); +} +static DEVICE_ATTR_RO(reason_vr_thermalert); + +static ssize_t reason_vr_tdc_show(struct device *dev, + struct device_attribute *attr, + char *buff) +{ + struct xe_gt *gt = dev_to_gt(dev); + bool tdc = !!read_reason_vr_tdc(gt); + + return sysfs_emit(buff, "%u\n", tdc); +} +static DEVICE_ATTR_RO(reason_vr_tdc); + +static struct attribute *throttle_attrs[] = { + &dev_attr_status.attr, + &dev_attr_reason_pl1.attr, + &dev_attr_reason_pl2.attr, + &dev_attr_reason_pl4.attr, + &dev_attr_reason_thermal.attr, + &dev_attr_reason_prochot.attr, + &dev_attr_reason_ratl.attr, + &dev_attr_reason_vr_thermalert.attr, + &dev_attr_reason_vr_tdc.attr, + NULL +}; + +static const struct attribute_group throttle_group_attrs = { + .name = "throttle", + .attrs = throttle_attrs, +}; + +static void gt_throttle_sysfs_fini(struct drm_device *drm, void *arg) +{ + struct xe_gt *gt = arg; + + sysfs_remove_group(gt->freq, &throttle_group_attrs); +} + +void xe_gt_throttle_sysfs_init(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + int err; + + err = sysfs_create_group(gt->freq, &throttle_group_attrs); + if (err) { + drm_warn(&xe->drm, "failed to register throttle sysfs, err: %d\n", err); + return; + } + + err = drmm_add_action_or_reset(&xe->drm, gt_throttle_sysfs_fini, gt); + if (err) + drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n", + __func__, err); +} diff --git a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h new file mode 100644 index 000000000000..3ecfd4beffe1 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GT_THROTTLE_SYSFS_H_ +#define _XE_GT_THROTTLE_SYSFS_H_ + +#include + +struct xe_gt; + +void xe_gt_throttle_sysfs_init(struct xe_gt *gt); + +#endif /* _XE_GT_THROTTLE_SYSFS_H_ */ + -- cgit v1.2.3 From e84d716dd461928b3db344748cd7f87395a2ce74 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Sat, 9 Dec 2023 16:18:41 +0100 Subject: drm/xe: Restrict huge PTEs to 1GiB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a define for the highest level for which we can encode a huge PTE, and use it for page-table building. Also update an assert that checks that we don't try to encode for larger sizes. Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Reviewed-by: Brian Welty Link: https://patchwork.freedesktop.org/patch/msgid/20231209151843.7903-2-thomas.hellstrom@linux.intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pt.c | 3 +++ drivers/gpu/drm/xe/xe_pt.h | 3 +++ drivers/gpu/drm/xe/xe_vm.c | 2 +- 3 files changed, 7 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 3b485313804a..46ef9df34a2e 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -430,6 +430,9 @@ static bool xe_pt_hugepte_possible(u64 addr, u64 next, unsigned int level, { u64 size, dma; + if (level > MAX_HUGEPTE_LEVEL) + return false; + /* Does the virtual range requested cover a huge pte? */ if (!xe_pt_covers(addr, next, level, &xe_walk->base)) return false; diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h index d5460e58dbbf..ba2f3325c84d 100644 --- a/drivers/gpu/drm/xe/xe_pt.h +++ b/drivers/gpu/drm/xe/xe_pt.h @@ -18,6 +18,9 @@ struct xe_tile; struct xe_vm; struct xe_vma; +/* Largest huge pte is currently 1GiB. May become device dependent. */ +#define MAX_HUGEPTE_LEVEL 2 + #define xe_pt_write(xe, map, idx, data) \ xe_map_wr(xe, map, (idx) * sizeof(u64), u64, data) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 11667529e40b..d589beb99fe6 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1258,7 +1258,7 @@ static u64 pte_encode_pat_index(struct xe_device *xe, u16 pat_index, static u64 pte_encode_ps(u32 pt_level) { - XE_WARN_ON(pt_level > 2); + XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL); if (pt_level == 1) return XE_PDE_PS_2M; -- cgit v1.2.3 From 06951c2ee72df2f53b71e7cf2b504d4fa6bba453 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Sat, 9 Dec 2023 16:18:42 +0100 Subject: drm/xe: Use NULL PTEs as scratch PTEs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently scratch PTEs are write-enabled and points to a single scratch page. This has the side effect that buggy applications with out-of-bounds memory accesses may not notice the bad access since what's written may be read back. Instead use NULL PTEs as scratch PTEs. These always return 0 when reading, and writing has no effect. As a slight benefit, we can also use huge NULL PTEs. One drawback pointed out is that debugging may be hampered since previously when inspecting the content of the scratch page, it might be possible to detect writes to out-of-bound addresses and possibly also from where the out-of-bounds address originated. However since the scratch page-table structure is kept, it will be easy to add back the single RW-enabled scratch page under a debug define if needed. Also update the kerneldoc accordingly and move the function to create the scratch page-tables from xe_pt.c to xe_pt.h since it is accessing vm structure internals and this also makes it possible to make it static. v2: - Don't try to encode scratch PTEs larger than 1GiB. - Move xe_pt_create_scratch(), Update kerneldoc. v3: - Rebase. Cc: Brian Welty Cc: Matt Roper Signed-off-by: Thomas Hellström Acked-by: Lucas De Marchi #for general direction. Reviewed-by: Brian Welty Link: https://patchwork.freedesktop.org/patch/msgid/20231209151843.7903-3-thomas.hellstrom@linux.intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pt.c | 65 +++++---------------------------------- drivers/gpu/drm/xe/xe_pt.h | 3 -- drivers/gpu/drm/xe/xe_vm.c | 66 +++++++++++++++++++++++++++++++++------- drivers/gpu/drm/xe/xe_vm.h | 11 +++++++ drivers/gpu/drm/xe/xe_vm_types.h | 1 - 5 files changed, 74 insertions(+), 72 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 46ef9df34a2e..de1030a47588 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -50,17 +50,19 @@ static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index) static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm, unsigned int level) { - u16 pat_index = tile_to_xe(tile)->pat.idx[XE_CACHE_WB]; + struct xe_device *xe = tile_to_xe(tile); + u16 pat_index = xe->pat.idx[XE_CACHE_WB]; u8 id = tile->id; - if (!vm->scratch_bo[id]) + if (!xe_vm_has_scratch(vm)) return 0; - if (level > 0) + if (level > MAX_HUGEPTE_LEVEL) return vm->pt_ops->pde_encode_bo(vm->scratch_pt[id][level - 1]->bo, 0, pat_index); - return vm->pt_ops->pte_encode_bo(vm->scratch_bo[id], 0, pat_index, 0); + return vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, IS_DGFX(xe), 0) | + XE_PTE_NULL; } /** @@ -135,7 +137,7 @@ void xe_pt_populate_empty(struct xe_tile *tile, struct xe_vm *vm, u64 empty; int i; - if (!vm->scratch_bo[tile->id]) { + if (!xe_vm_has_scratch(vm)) { /* * FIXME: Some memory is allocated already allocated to zero? * Find out which memory that is and avoid this memset... @@ -194,57 +196,6 @@ void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred) kfree(pt); } -/** - * xe_pt_create_scratch() - Setup a scratch memory pagetable tree for the - * given tile and vm. - * @xe: xe device. - * @tile: tile to set up for. - * @vm: vm to set up for. - * - * Sets up a pagetable tree with one page-table per level and a single - * leaf bo. All pagetable entries point to the single page-table or, - * for L0, the single bo one level below. - * - * Return: 0 on success, negative error code on error. - */ -int xe_pt_create_scratch(struct xe_device *xe, struct xe_tile *tile, - struct xe_vm *vm) -{ - u8 id = tile->id; - unsigned int flags; - int i; - - /* - * So we don't need to worry about 64K TLB hints when dealing with - * scratch entires, rather keep the scratch page in system memory on - * platforms where 64K pages are needed for VRAM. - */ - flags = XE_BO_CREATE_PINNED_BIT; - if (vm->flags & XE_VM_FLAG_64K) - flags |= XE_BO_CREATE_SYSTEM_BIT; - else - flags |= XE_BO_CREATE_VRAM_IF_DGFX(tile); - - vm->scratch_bo[id] = xe_bo_create_pin_map(xe, tile, vm, SZ_4K, - ttm_bo_type_kernel, - flags); - if (IS_ERR(vm->scratch_bo[id])) - return PTR_ERR(vm->scratch_bo[id]); - - xe_map_memset(vm->xe, &vm->scratch_bo[id]->vmap, 0, 0, - vm->scratch_bo[id]->size); - - for (i = 0; i < vm->pt_root[id]->level; i++) { - vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i); - if (IS_ERR(vm->scratch_pt[id][i])) - return PTR_ERR(vm->scratch_pt[id][i]); - - xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]); - } - - return 0; -} - /** * DOC: Pagetable building * @@ -1289,7 +1240,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue * it needs to be done here. */ if ((rebind && !xe_vm_in_lr_mode(vm) && !vm->batch_invalidate_tlb) || - (!rebind && vm->scratch_bo[tile->id] && xe_vm_in_preempt_fence_mode(vm))) { + (!rebind && xe_vm_has_scratch(vm) && xe_vm_in_preempt_fence_mode(vm))) { ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); if (!ifence) return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h index ba2f3325c84d..71a4fbfcff43 100644 --- a/drivers/gpu/drm/xe/xe_pt.h +++ b/drivers/gpu/drm/xe/xe_pt.h @@ -29,9 +29,6 @@ unsigned int xe_pt_shift(unsigned int level); struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, unsigned int level); -int xe_pt_create_scratch(struct xe_device *xe, struct xe_tile *tile, - struct xe_vm *vm); - void xe_pt_populate_empty(struct xe_tile *tile, struct xe_vm *vm, struct xe_pt *pt); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index d589beb99fe6..e190469ec03a 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1348,6 +1348,57 @@ static const struct xe_pt_ops xelp_pt_ops = { static void vm_destroy_work_func(struct work_struct *w); +/** + * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the + * given tile and vm. + * @xe: xe device. + * @tile: tile to set up for. + * @vm: vm to set up for. + * + * Sets up a pagetable tree with one page-table per level and a single + * leaf PTE. All pagetable entries point to the single page-table or, + * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and + * writes become NOPs. + * + * Return: 0 on success, negative error code on error. + */ +static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile, + struct xe_vm *vm) +{ + u8 id = tile->id; + int i; + + for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) { + vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i); + if (IS_ERR(vm->scratch_pt[id][i])) + return PTR_ERR(vm->scratch_pt[id][i]); + + xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]); + } + + return 0; +} + +static void xe_vm_free_scratch(struct xe_vm *vm) +{ + struct xe_tile *tile; + u8 id; + + if (!xe_vm_has_scratch(vm)) + return; + + for_each_tile(tile, vm->xe, id) { + u32 i; + + if (!vm->pt_root[id]) + continue; + + for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i) + if (vm->scratch_pt[id][i]) + xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL); + } +} + struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) { struct drm_gem_object *vm_resv_obj; @@ -1424,12 +1475,12 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) } } - if (flags & XE_VM_FLAG_SCRATCH_PAGE) { + if (xe_vm_has_scratch(vm)) { for_each_tile(tile, xe, id) { if (!vm->pt_root[id]) continue; - err = xe_pt_create_scratch(xe, tile, vm); + err = xe_vm_create_scratch(xe, tile, vm); if (err) goto err_unlock_close; } @@ -1575,16 +1626,9 @@ void xe_vm_close_and_put(struct xe_vm *vm) * install a fence to resv. Hence it's safe to * destroy the pagetables immediately. */ - for_each_tile(tile, xe, id) { - if (vm->scratch_bo[id]) { - u32 i; + xe_vm_free_scratch(vm); - xe_bo_unpin(vm->scratch_bo[id]); - xe_bo_put(vm->scratch_bo[id]); - for (i = 0; i < vm->pt_root[id]->level; i++) - xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, - NULL); - } + for_each_tile(tile, xe, id) { if (vm->pt_root[id]) { xe_pt_destroy(vm->pt_root[id], vm->flags, NULL); vm->pt_root[id] = NULL; diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 12bb5d79487f..a1907544cc4f 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -63,6 +63,17 @@ static inline bool xe_vm_is_closed_or_banned(struct xe_vm *vm) struct xe_vma * xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range); +/** + * xe_vm_has_scratch() - Whether the vm is configured for scratch PTEs + * @vm: The vm + * + * Return: whether the vm populates unmapped areas with scratch PTEs + */ +static inline bool xe_vm_has_scratch(const struct xe_vm *vm) +{ + return vm->flags & XE_VM_FLAG_SCRATCH_PAGE; +} + static inline struct xe_vm *gpuva_to_vm(struct drm_gpuva *gpuva) { return container_of(gpuva->vm, struct xe_vm, gpuvm); diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index e70ec6b2fabe..15471025a44f 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -151,7 +151,6 @@ struct xe_vm { u64 size; struct xe_pt *pt_root[XE_MAX_TILES_PER_DEVICE]; - struct xe_bo *scratch_bo[XE_MAX_TILES_PER_DEVICE]; struct xe_pt *scratch_pt[XE_MAX_TILES_PER_DEVICE][XE_VM_MAX_LEVEL]; /** -- cgit v1.2.3 From 68661c69e9fa86e78b8b6509aebeada5a15dada5 Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Fri, 1 Dec 2023 12:25:14 -0800 Subject: drm/xe: Check skip_guc_pc before disabling gucrc Also, use the new C6 helper instead of duplicating that code. v2: Check skip flag at the beginning of the function (Rodrigo) Fixes: 975e4a3795d4 ("drm/xe: Manually setup C6 when skip_guc_pc is set") Cc: Rodrigo Vivi Reviewed-by: Rodrigo Vivi Signed-off-by: Vinay Belgaumkar Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_pc.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 2b627ea950f1..f71085228cb3 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -794,9 +794,13 @@ static int pc_adjust_requested_freq(struct xe_guc_pc *pc) */ int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc) { + struct xe_device *xe = pc_to_xe(pc); struct xe_gt *gt = pc_to_gt(pc); int ret = 0; + if (xe->info.skip_guc_pc) + return 0; + xe_device_mem_access_get(pc_to_xe(pc)); ret = pc_action_setup_gucrc(pc, XE_GUCRC_HOST_CONTROL); @@ -807,9 +811,7 @@ int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc) if (ret) goto out; - xe_mmio_write32(gt, PG_ENABLE, 0); - xe_mmio_write32(gt, RC_CONTROL, 0); - xe_mmio_write32(gt, RC_STATE, 0); + xe_gt_idle_disable_c6(gt); XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); -- cgit v1.2.3 From 49e134e16f8111f82f4067da38055db4b4b34a0b Mon Sep 17 00:00:00 2001 From: Aravind Iddamsetty Date: Wed, 6 Dec 2023 15:03:27 +0530 Subject: drm/xe: Fix lockdep warning in xe_force_wake calls Use spin_lock_irqsave, spin_unlock_irqrestore Fix for below: [13994.811263] ======================================================== [13994.811295] WARNING: possible irq lock inversion dependency detected [13994.811326] 6.6.0-rc3-xe #2 Tainted: G U [13994.811358] -------------------------------------------------------- [13994.811388] swapper/0/0 just changed the state of lock: [13994.811416] ffff895c7e044db8 (&cpuctx_lock){-...}-{2:2}, at: __perf_event_read+0xb7/0x3a0 [13994.811494] but this lock took another, HARDIRQ-unsafe lock in the past: [13994.811528] (&fw->lock){+.+.}-{2:2} [13994.811544] and interrupts could create inverse lock ordering between them. [13994.811606] other info that might help us debug this: [13994.811636] Possible interrupt unsafe locking scenario: [13994.811667] CPU0 CPU1 [13994.811691] ---- ---- [13994.811715] lock(&fw->lock); [13994.811744] local_irq_disable(); [13994.811773] lock(&cpuctx_lock); [13994.811810] lock(&fw->lock); [13994.811846] [13994.811865] lock(&cpuctx_lock); [13994.811895] *** DEADLOCK *** v2: Use spin_lock in atomic context and spin_lock_irq in a non atomic context (Matthew Brost) v3: just use spin_lock_irqsave/restore Cc: Matthew Brost Cc: Anshuman Gupta Cc: Ville Syrjala Reviewed-by: Rodrigo Vivi Signed-off-by: Aravind Iddamsetty Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_force_wake.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c index 32d6c4dd2807..9bbe8a5040da 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.c +++ b/drivers/gpu/drm/xe/xe_force_wake.c @@ -145,9 +145,10 @@ int xe_force_wake_get(struct xe_force_wake *fw, struct xe_gt *gt = fw_to_gt(fw); struct xe_force_wake_domain *domain; enum xe_force_wake_domains tmp, woken = 0; + unsigned long flags; int ret, ret2 = 0; - spin_lock(&fw->lock); + spin_lock_irqsave(&fw->lock, flags); for_each_fw_domain_masked(domain, domains, fw, tmp) { if (!domain->ref++) { woken |= BIT(domain->id); @@ -162,7 +163,7 @@ int xe_force_wake_get(struct xe_force_wake *fw, domain->id, ret); } fw->awake_domains |= woken; - spin_unlock(&fw->lock); + spin_unlock_irqrestore(&fw->lock, flags); return ret2; } @@ -174,9 +175,10 @@ int xe_force_wake_put(struct xe_force_wake *fw, struct xe_gt *gt = fw_to_gt(fw); struct xe_force_wake_domain *domain; enum xe_force_wake_domains tmp, sleep = 0; + unsigned long flags; int ret, ret2 = 0; - spin_lock(&fw->lock); + spin_lock_irqsave(&fw->lock, flags); for_each_fw_domain_masked(domain, domains, fw, tmp) { if (!--domain->ref) { sleep |= BIT(domain->id); @@ -191,7 +193,7 @@ int xe_force_wake_put(struct xe_force_wake *fw, domain->id, ret); } fw->awake_domains &= ~sleep; - spin_unlock(&fw->lock); + spin_unlock_irqrestore(&fw->lock, flags); return ret2; } -- cgit v1.2.3 From 24f947d58fe554cf38507b94a43d373acf1e5e73 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Tue, 12 Dec 2023 11:01:43 +0100 Subject: drm/xe: Use DRM GPUVM helpers for external- and evicted objects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adapt to the DRM_GPUVM helpers moving removing a lot of complicated driver-specific code. For now this uses fine-grained locking for the evict list and external object list, which may incur a slight performance penalty in some situations. v2: - Don't lock all bos and validate on LR exec submissions (Matthew Brost) - Add some kerneldoc Signed-off-by: Thomas Hellström Reviewed-by: Rodrigo Vivi Acked-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20231212100144.6833-2-thomas.hellstrom@linux.intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 63 +++------ drivers/gpu/drm/xe/xe_exec.c | 74 +++------- drivers/gpu/drm/xe/xe_vm.c | 292 +++++++++------------------------------ drivers/gpu/drm/xe/xe_vm.h | 19 ++- drivers/gpu/drm/xe/xe_vm_types.h | 67 ++------- 5 files changed, 129 insertions(+), 386 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 9cc78986dbd3..13ebe33bb7a2 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -468,9 +468,9 @@ static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo, { struct dma_resv_iter cursor; struct dma_fence *fence; - struct drm_gpuva *gpuva; struct drm_gem_object *obj = &bo->ttm.base; struct drm_gpuvm_bo *vm_bo; + bool idle = false; int ret = 0; dma_resv_assert_held(bo->ttm.base.resv); @@ -484,14 +484,15 @@ static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo, } drm_gem_for_each_gpuvm_bo(vm_bo, obj) { - drm_gpuvm_bo_for_each_va(gpuva, vm_bo) { - struct xe_vma *vma = gpuva_to_vma(gpuva); - struct xe_vm *vm = xe_vma_vm(vma); + struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm); + struct drm_gpuva *gpuva; - trace_xe_vma_evict(vma); + if (!xe_vm_in_fault_mode(vm)) { + drm_gpuvm_bo_evict(vm_bo, true); + continue; + } - if (xe_vm_in_fault_mode(vm)) { - /* Wait for pending binds / unbinds. */ + if (!idle) { long timeout; if (ctx->no_wait_gpu && @@ -503,45 +504,21 @@ static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo, DMA_RESV_USAGE_BOOKKEEP, ctx->interruptible, MAX_SCHEDULE_TIMEOUT); - if (timeout > 0) { - ret = xe_vm_invalidate_vma(vma); - XE_WARN_ON(ret); - } else if (!timeout) { - ret = -ETIME; - } else { - ret = timeout; - } - - } else { - bool vm_resv_locked = false; + if (!timeout) + return -ETIME; + if (timeout < 0) + return timeout; - /* - * We need to put the vma on the vm's rebind_list, - * but need the vm resv to do so. If we can't verify - * that we indeed have it locked, put the vma an the - * vm's notifier.rebind_list instead and scoop later. - */ - if (dma_resv_trylock(xe_vm_resv(vm))) - vm_resv_locked = true; - else if (ctx->resv != xe_vm_resv(vm)) { - spin_lock(&vm->notifier.list_lock); - if (!(vma->gpuva.flags & XE_VMA_DESTROYED)) - list_move_tail(&vma->notifier.rebind_link, - &vm->notifier.rebind_list); - spin_unlock(&vm->notifier.list_lock); - continue; - } + idle = true; + } - xe_vm_assert_held(vm); - if (vma->tile_present && - !(vma->gpuva.flags & XE_VMA_DESTROYED) && - list_empty(&vma->combined_links.rebind)) - list_add_tail(&vma->combined_links.rebind, - &vm->rebind_list); + drm_gpuvm_bo_for_each_va(gpuva, vm_bo) { + struct xe_vma *vma = gpuva_to_vma(gpuva); - if (vm_resv_locked) - dma_resv_unlock(xe_vm_resv(vm)); - } + trace_xe_vma_evict(vma); + ret = xe_vm_invalidate_vma(vma); + if (XE_WARN_ON(ret)) + return ret; } } diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 5ec37df33afe..15ab1a927613 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -94,40 +94,9 @@ * Unlock all */ -static int xe_exec_begin(struct drm_exec *exec, struct xe_vm *vm) +static int xe_exec_fn(struct drm_gpuvm_exec *vm_exec) { - struct xe_vma *vma; - LIST_HEAD(dups); - int err = 0; - - if (xe_vm_in_lr_mode(vm)) - return 0; - - /* - * 1 fence for job from exec plus a fence for each tile from a possible - * rebind - */ - err = xe_vm_lock_dma_resv(vm, exec, 1 + vm->xe->info.tile_count, true); - if (err) - return err; - - /* - * Validate BOs that have been evicted (i.e. make sure the - * BOs have valid placements possibly moving an evicted BO back - * to a location where the GPU can access it). - */ - list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) { - xe_assert(vm->xe, !xe_vma_is_null(vma)); - - if (xe_vma_is_userptr(vma)) - continue; - - err = xe_bo_validate(xe_vma_bo(vma), vm, false); - if (err) - break; - } - - return err; + return drm_gpuvm_validate(vm_exec->vm, &vm_exec->exec); } int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) @@ -140,7 +109,8 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) struct xe_exec_queue *q; struct xe_sync_entry *syncs = NULL; u64 addresses[XE_HW_ENGINE_MAX_INSTANCE]; - struct drm_exec exec; + struct drm_gpuvm_exec vm_exec = {.extra.fn = xe_exec_fn}; + struct drm_exec *exec = &vm_exec.exec; u32 i, num_syncs = 0; struct xe_sched_job *job; struct dma_fence *rebind_fence; @@ -216,16 +186,18 @@ retry: goto err_unlock_list; } - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT); - drm_exec_until_all_locked(&exec) { - err = xe_exec_begin(&exec, vm); - drm_exec_retry_on_contention(&exec); - if (err && xe_vm_validate_should_retry(&exec, err, &end)) { - err = -EAGAIN; + vm_exec.vm = &vm->gpuvm; + vm_exec.num_fences = 1 + vm->xe->info.tile_count; + vm_exec.flags = DRM_EXEC_INTERRUPTIBLE_WAIT; + if (xe_vm_in_lr_mode(vm)) { + drm_exec_init(exec, vm_exec.flags); + } else { + err = drm_gpuvm_exec_lock(&vm_exec); + if (err) { + if (xe_vm_validate_should_retry(exec, err, &end)) + err = -EAGAIN; goto err_unlock_list; } - if (err) - goto err_exec; } if (xe_vm_is_closed_or_banned(q->vm)) { @@ -307,19 +279,9 @@ retry: * the job and let the DRM scheduler / backend clean up the job. */ xe_sched_job_arm(job); - if (!xe_vm_in_lr_mode(vm)) { - /* Block userptr invalidations / BO eviction */ - dma_resv_add_fence(xe_vm_resv(vm), - &job->drm.s_fence->finished, - DMA_RESV_USAGE_BOOKKEEP); - - /* - * Make implicit sync work across drivers, assuming all external - * BOs are written as we don't pass in a read / write list. - */ - xe_vm_fence_all_extobjs(vm, &job->drm.s_fence->finished, - DMA_RESV_USAGE_WRITE); - } + if (!xe_vm_in_lr_mode(vm)) + drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, &job->drm.s_fence->finished, + DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_WRITE); for (i = 0; i < num_syncs; i++) xe_sync_entry_signal(&syncs[i], job, @@ -343,7 +305,7 @@ err_put_job: if (err) xe_sched_job_put(job); err_exec: - drm_exec_fini(&exec); + drm_exec_fini(exec); err_unlock_list: if (write_locked) up_write(&vm->lock); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index e190469ec03a..7a3b680d01a3 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -299,26 +299,8 @@ out_unlock: return err; } -/** - * xe_vm_fence_all_extobjs() - Add a fence to vm's external objects' resv - * @vm: The vm. - * @fence: The fence to add. - * @usage: The resv usage for the fence. - * - * Loops over all of the vm's external object bindings and adds a @fence - * with the given @usage to all of the external object's reservation - * objects. - */ -void xe_vm_fence_all_extobjs(struct xe_vm *vm, struct dma_fence *fence, - enum dma_resv_usage usage) -{ - struct xe_vma *vma; - - list_for_each_entry(vma, &vm->extobj.list, extobj.link) - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, usage); -} - -static void resume_and_reinstall_preempt_fences(struct xe_vm *vm) +static void resume_and_reinstall_preempt_fences(struct xe_vm *vm, + struct drm_exec *exec) { struct xe_exec_queue *q; @@ -328,16 +310,19 @@ static void resume_and_reinstall_preempt_fences(struct xe_vm *vm) list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) { q->ops->resume(q); - dma_resv_add_fence(xe_vm_resv(vm), q->compute.pfence, - DMA_RESV_USAGE_BOOKKEEP); - xe_vm_fence_all_extobjs(vm, q->compute.pfence, - DMA_RESV_USAGE_BOOKKEEP); + drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->compute.pfence, + DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); } } int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) { - struct drm_exec exec; + struct drm_gpuvm_exec vm_exec = { + .vm = &vm->gpuvm, + .flags = DRM_EXEC_INTERRUPTIBLE_WAIT, + .num_fences = 1, + }; + struct drm_exec *exec = &vm_exec.exec; struct dma_fence *pfence; int err; bool wait; @@ -345,13 +330,9 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); down_write(&vm->lock); - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT); - drm_exec_until_all_locked(&exec) { - err = xe_vm_lock_dma_resv(vm, &exec, 1, true); - drm_exec_retry_on_contention(&exec); - if (err) - goto out_unlock; - } + err = drm_gpuvm_exec_lock(&vm_exec); + if (err) + return err; pfence = xe_preempt_fence_create(q, q->compute.context, ++q->compute.seqno); @@ -366,10 +347,8 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) down_read(&vm->userptr.notifier_lock); - dma_resv_add_fence(xe_vm_resv(vm), pfence, - DMA_RESV_USAGE_BOOKKEEP); - - xe_vm_fence_all_extobjs(vm, pfence, DMA_RESV_USAGE_BOOKKEEP); + drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence, + DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); /* * Check to see if a preemption on VM is in flight or userptr @@ -383,7 +362,7 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) up_read(&vm->userptr.notifier_lock); out_unlock: - drm_exec_fini(&exec); + drm_exec_fini(exec); up_write(&vm->lock); return err; @@ -429,55 +408,6 @@ int __xe_vm_userptr_needs_repin(struct xe_vm *vm) list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN; } -/** - * xe_vm_lock_dma_resv() - Lock the vm dma_resv object and the dma_resv - * objects of the vm's external buffer objects. - * @vm: The vm. - * @exec: Pointer to a struct drm_exec locking context. - * @num_shared: Number of dma-fence slots to reserve in the locked objects. - * @lock_vm: Lock also the vm's dma_resv. - * - * Locks the vm dma-resv objects and all the dma-resv objects of the - * buffer objects on the vm external object list. - * - * Return: 0 on success, Negative error code on error. In particular if - * @intr is set to true, -EINTR or -ERESTARTSYS may be returned. - */ -int xe_vm_lock_dma_resv(struct xe_vm *vm, struct drm_exec *exec, - unsigned int num_shared, bool lock_vm) -{ - struct xe_vma *vma, *next; - int err = 0; - - lockdep_assert_held(&vm->lock); - - if (lock_vm) { - err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), num_shared); - if (err) - return err; - } - - list_for_each_entry(vma, &vm->extobj.list, extobj.link) { - err = drm_exec_prepare_obj(exec, &xe_vma_bo(vma)->ttm.base, num_shared); - if (err) - return err; - } - - spin_lock(&vm->notifier.list_lock); - list_for_each_entry_safe(vma, next, &vm->notifier.rebind_list, - notifier.rebind_link) { - xe_bo_assert_held(xe_vma_bo(vma)); - - list_del_init(&vma->notifier.rebind_link); - if (vma->tile_present && !(vma->gpuva.flags & XE_VMA_DESTROYED)) - list_move_tail(&vma->combined_links.rebind, - &vm->rebind_list); - } - spin_unlock(&vm->notifier.list_lock); - - return 0; -} - #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000 static void xe_vm_kill(struct xe_vm *vm) @@ -526,30 +456,39 @@ bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end) if (!ktime_before(cur, *end)) return false; - /* - * We would like to keep the ticket here with - * drm_exec_unlock_all(), but WW mutex asserts currently - * stop us from that. In any case this function could go away - * with proper TTM -EDEADLK handling. - */ - drm_exec_fini(exec); - msleep(20); return true; } +static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) +{ + struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm); + struct drm_gpuva *gpuva; + int ret; + + lockdep_assert_held(&vm->lock); + drm_gpuvm_bo_for_each_va(gpuva, vm_bo) + list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind, + &vm->rebind_list); + + ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false); + if (ret) + return ret; + + vm_bo->evicted = false; + return 0; +} + static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, bool *done) { - struct xe_vma *vma; int err; /* * 1 fence for each preempt fence plus a fence for each tile from a * possible rebind */ - err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), - vm->preempt.num_exec_queues + + err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, vm->preempt.num_exec_queues + vm->xe->info.tile_count); if (err) return err; @@ -565,7 +504,7 @@ static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, return 0; } - err = xe_vm_lock_dma_resv(vm, exec, vm->preempt.num_exec_queues, false); + err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, vm->preempt.num_exec_queues); if (err) return err; @@ -573,17 +512,7 @@ static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, if (err) return err; - list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) { - if (xe_vma_has_no_bo(vma) || - vma->gpuva.flags & XE_VMA_DESTROYED) - continue; - - err = xe_bo_validate(xe_vma_bo(vma), vm, false); - if (err) - break; - } - - return err; + return drm_gpuvm_validate(&vm->gpuvm, exec); } static void preempt_rebind_work_func(struct work_struct *w) @@ -623,12 +552,13 @@ retry: err = xe_preempt_work_begin(&exec, vm, &done); drm_exec_retry_on_contention(&exec); - if (err && xe_vm_validate_should_retry(&exec, err, &end)) { - err = -EAGAIN; + if (err || done) { + drm_exec_fini(&exec); + if (err && xe_vm_validate_should_retry(&exec, err, &end)) + err = -EAGAIN; + goto out_unlock_outer; } - if (err || done) - goto out_unlock; } err = alloc_preempt_fences(vm, &preempt_fences, &fence_count); @@ -675,7 +605,7 @@ retry: /* Point of no return. */ arm_preempt_fences(vm, &preempt_fences); - resume_and_reinstall_preempt_fences(vm); + resume_and_reinstall_preempt_fences(vm, &exec); up_read(&vm->userptr.notifier_lock); out_unlock: @@ -780,9 +710,8 @@ int xe_vm_userptr_pin(struct xe_vm *vm) list_for_each_entry_safe(vma, next, &vm->userptr.invalidated, userptr.invalidate_link) { list_del_init(&vma->userptr.invalidate_link); - if (list_empty(&vma->combined_links.userptr)) - list_move_tail(&vma->combined_links.userptr, - &vm->userptr.repin_list); + list_move_tail(&vma->combined_links.userptr, + &vm->userptr.repin_list); } spin_unlock(&vm->userptr.invalidated_lock); @@ -791,27 +720,12 @@ int xe_vm_userptr_pin(struct xe_vm *vm) combined_links.userptr) { err = xe_vma_userptr_pin_pages(vma); if (err < 0) - goto out_err; + return err; - list_move_tail(&vma->combined_links.userptr, &tmp_evict); + list_move_tail(&vma->combined_links.userptr, &vm->rebind_list); } - /* Take lock and move to rebind_list for rebinding. */ - err = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL); - if (err) - goto out_err; - - list_for_each_entry_safe(vma, next, &tmp_evict, combined_links.userptr) - list_move_tail(&vma->combined_links.rebind, &vm->rebind_list); - - dma_resv_unlock(xe_vm_resv(vm)); - return 0; - -out_err: - list_splice_tail(&tmp_evict, &vm->userptr.repin_list); - - return err; } /** @@ -890,8 +804,6 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, } INIT_LIST_HEAD(&vma->combined_links.rebind); - INIT_LIST_HEAD(&vma->notifier.rebind_link); - INIT_LIST_HEAD(&vma->extobj.link); INIT_LIST_HEAD(&vma->gpuva.gem.entry); vma->gpuva.vm = &vm->gpuvm; @@ -921,6 +833,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, return ERR_CAST(vm_bo); } + drm_gpuvm_bo_extobj_add(vm_bo); drm_gem_object_get(&bo->ttm.base); vma->gpuva.gem.obj = &bo->ttm.base; vma->gpuva.gem.offset = bo_offset_or_userptr; @@ -953,16 +866,6 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, return vma; } -static bool vm_remove_extobj(struct xe_vma *vma) -{ - if (!list_empty(&vma->extobj.link)) { - xe_vma_vm(vma)->extobj.entries--; - list_del_init(&vma->extobj.link); - return true; - } - return false; -} - static void xe_vma_destroy_late(struct xe_vma *vma) { struct xe_vm *vm = xe_vma_vm(vma); @@ -1003,60 +906,6 @@ static void vma_destroy_work_func(struct work_struct *w) xe_vma_destroy_late(vma); } -static struct xe_vma * -bo_has_vm_references_locked(struct xe_bo *bo, struct xe_vm *vm, - struct xe_vma *ignore) -{ - struct drm_gpuvm_bo *vm_bo; - struct drm_gpuva *va; - struct drm_gem_object *obj = &bo->ttm.base; - - xe_bo_assert_held(bo); - - drm_gem_for_each_gpuvm_bo(vm_bo, obj) { - drm_gpuvm_bo_for_each_va(va, vm_bo) { - struct xe_vma *vma = gpuva_to_vma(va); - - if (vma != ignore && xe_vma_vm(vma) == vm) - return vma; - } - } - - return NULL; -} - -static bool bo_has_vm_references(struct xe_bo *bo, struct xe_vm *vm, - struct xe_vma *ignore) -{ - bool ret; - - xe_bo_lock(bo, false); - ret = !!bo_has_vm_references_locked(bo, vm, ignore); - xe_bo_unlock(bo); - - return ret; -} - -static void __vm_insert_extobj(struct xe_vm *vm, struct xe_vma *vma) -{ - lockdep_assert_held_write(&vm->lock); - - list_add(&vma->extobj.link, &vm->extobj.list); - vm->extobj.entries++; -} - -static void vm_insert_extobj(struct xe_vm *vm, struct xe_vma *vma) -{ - struct xe_bo *bo = xe_vma_bo(vma); - - lockdep_assert_held_write(&vm->lock); - - if (bo_has_vm_references(bo, vm, vma)) - return; - - __vm_insert_extobj(vm, vma); -} - static void vma_destroy_cb(struct dma_fence *fence, struct dma_fence_cb *cb) { @@ -1082,20 +931,7 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) } else if (!xe_vma_is_null(vma)) { xe_bo_assert_held(xe_vma_bo(vma)); - spin_lock(&vm->notifier.list_lock); - list_del(&vma->notifier.rebind_link); - spin_unlock(&vm->notifier.list_lock); - drm_gpuva_unlink(&vma->gpuva); - - if (!xe_vma_bo(vma)->vm && vm_remove_extobj(vma)) { - struct xe_vma *other; - - other = bo_has_vm_references_locked(xe_vma_bo(vma), vm, NULL); - - if (other) - __vm_insert_extobj(vm, other); - } } xe_vm_assert_held(vm); @@ -1213,6 +1049,7 @@ static void xe_vm_free(struct drm_gpuvm *gpuvm); static struct drm_gpuvm_ops gpuvm_ops = { .op_alloc = xe_vm_op_alloc, + .vm_bo_validate = xe_gpuvm_validate, .vm_free = xe_vm_free, }; @@ -1426,9 +1263,6 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) init_rwsem(&vm->userptr.notifier_lock); spin_lock_init(&vm->userptr.invalidated_lock); - INIT_LIST_HEAD(&vm->notifier.rebind_list); - spin_lock_init(&vm->notifier.list_lock); - INIT_WORK(&vm->destroy_work, vm_destroy_work_func); INIT_LIST_HEAD(&vm->preempt.exec_queues); @@ -1437,8 +1271,6 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) for_each_tile(tile, xe, id) xe_range_fence_tree_init(&vm->rftree[id]); - INIT_LIST_HEAD(&vm->extobj.list); - vm->pt_ops = &xelp_pt_ops; if (!(flags & XE_VM_FLAG_MIGRATION)) @@ -1647,7 +1479,6 @@ void xe_vm_close_and_put(struct xe_vm *vm) xe_vma_destroy_unlocked(vma); } - xe_assert(xe, list_empty(&vm->extobj.list)); up_write(&vm->lock); mutex_lock(&xe->usm.lock); @@ -2289,22 +2120,36 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, bool read_only, bool is_null, u16 pat_index) { struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL; + struct drm_exec exec; struct xe_vma *vma; int err; lockdep_assert_held_write(&vm->lock); if (bo) { - err = xe_bo_lock(bo, true); - if (err) - return ERR_PTR(err); + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_until_all_locked(&exec) { + err = 0; + if (!bo->vm) { + err = drm_exec_lock_obj(&exec, xe_vm_obj(vm)); + drm_exec_retry_on_contention(&exec); + } + if (!err) { + err = drm_exec_lock_obj(&exec, &bo->ttm.base); + drm_exec_retry_on_contention(&exec); + } + if (err) { + drm_exec_fini(&exec); + return ERR_PTR(err); + } + } } vma = xe_vma_create(vm, bo, op->gem.offset, op->va.addr, op->va.addr + op->va.range - 1, read_only, is_null, pat_index); if (bo) - xe_bo_unlock(bo); + drm_exec_fini(&exec); if (xe_vma_is_userptr(vma)) { err = xe_vma_userptr_pin_pages(vma); @@ -2314,7 +2159,6 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, return ERR_PTR(err); } } else if (!xe_vma_has_no_bo(vma) && !bo->vm) { - vm_insert_extobj(vm, vma); err = add_preempt_fences(vm, bo); if (err) { prep_vma_destroy(vm, vma, false); diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index a1907544cc4f..cf2f96e8c1ab 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -74,9 +74,20 @@ static inline bool xe_vm_has_scratch(const struct xe_vm *vm) return vm->flags & XE_VM_FLAG_SCRATCH_PAGE; } +/** + * gpuvm_to_vm() - Return the embedding xe_vm from a struct drm_gpuvm pointer + * @gpuvm: The struct drm_gpuvm pointer + * + * Return: Pointer to the embedding struct xe_vm. + */ +static inline struct xe_vm *gpuvm_to_vm(struct drm_gpuvm *gpuvm) +{ + return container_of(gpuvm, struct xe_vm, gpuvm); +} + static inline struct xe_vm *gpuva_to_vm(struct drm_gpuva *gpuva) { - return container_of(gpuva->vm, struct xe_vm, gpuvm); + return gpuvm_to_vm(gpuva->vm); } static inline struct xe_vma *gpuva_to_vma(struct drm_gpuva *gpuva) @@ -219,12 +230,6 @@ int xe_vma_userptr_check_repin(struct xe_vma *vma); bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end); -int xe_vm_lock_dma_resv(struct xe_vm *vm, struct drm_exec *exec, - unsigned int num_shared, bool lock_vm); - -void xe_vm_fence_all_extobjs(struct xe_vm *vm, struct dma_fence *fence, - enum dma_resv_usage usage); - int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id); int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma, diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 15471025a44f..2e023596cb15 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -62,26 +62,17 @@ struct xe_vma { /** @gpuva: Base GPUVA object */ struct drm_gpuva gpuva; - /** @combined_links: links into lists which are mutually exclusive */ + /** + * @combined_links: links into lists which are mutually exclusive. + * Locking: vm lock in write mode OR vm lock in read mode and the vm's + * resv. + */ union { - /** - * @userptr: link into VM repin list if userptr. Protected by - * vm->lock in write mode. - */ + /** @userptr: link into VM repin list if userptr. */ struct list_head userptr; - /** - * @rebind: link into VM if this VMA needs rebinding, and - * if it's a bo (not userptr) needs validation after a possible - * eviction. Protected by the vm's resv lock and typically - * vm->lock is also held in write mode. The only place where - * vm->lock isn't held is the BO eviction path which has - * mutually exclusive execution with userptr. - */ + /** @rebind: link into VM if this VMA needs rebinding. */ struct list_head rebind; - /** - * @destroy: link to contested list when VM is being closed. - * Protected by vm->lock in write mode and vm's resv lock. - */ + /** @destroy: link to contested list when VM is being closed. */ struct list_head destroy; } combined_links; @@ -115,18 +106,6 @@ struct xe_vma { */ u16 pat_index; - struct { - struct list_head rebind_link; - } notifier; - - struct { - /** - * @extobj.link: Link into vm's external object list. - * protected by the vm lock. - */ - struct list_head link; - } extobj; - /** * @userptr: user pointer state, only allocated for VMAs that are * user pointers @@ -180,9 +159,9 @@ struct xe_vm { struct rw_semaphore lock; /** - * @rebind_list: list of VMAs that need rebinding, and if they are - * bos (not userptr), need validation after a possible eviction. The - * list is protected by @resv. + * @rebind_list: list of VMAs that need rebinding. Protected by the + * vm->lock in write mode, OR (the vm->lock in read mode and the + * vm resv). */ struct list_head rebind_list; @@ -202,14 +181,6 @@ struct xe_vm { */ struct xe_range_fence_tree rftree[XE_MAX_TILES_PER_DEVICE]; - /** @extobj: bookkeeping for external objects. Protected by the vm lock */ - struct { - /** @enties: number of external BOs attached this VM */ - u32 entries; - /** @list: list of vmas with external bos attached */ - struct list_head list; - } extobj; - /** @async_ops: async VM operations (bind / unbinds) */ struct { /** @list: list of pending async VM ops */ @@ -299,22 +270,6 @@ struct xe_vm { struct xe_vma *last_fault_vma; } usm; - /** - * @notifier: Lists and locks for temporary usage within notifiers where - * we either can't grab the vm lock or the vm resv. - */ - struct { - /** @notifier.list_lock: lock protecting @rebind_list */ - spinlock_t list_lock; - /** - * @notifier.rebind_list: list of vmas that we want to put on the - * main @rebind_list. This list is protected for writing by both - * notifier.list_lock, and the resv of the bo the vma points to, - * and for reading by the notifier.list_lock only. - */ - struct list_head rebind_list; - } notifier; - /** @error_capture: allow to track errors */ struct { /** @capture_once: capture only one error per VM */ -- cgit v1.2.3 From 35705e32b13cf800a47f10844c4f8d1334d411c7 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Tue, 12 Dec 2023 11:01:44 +0100 Subject: drm/xe: Use DRM_GPUVM_RESV_PROTECTED for gpuvm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use DRM_GPUVM_RESV_PROTECTED to use corse-grained locking for the evict and external object list. Since we are already holding the relevant RESV locks, for now at least, we don't need the fine-grained locking. Signed-off-by: Thomas Hellström Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20231212100144.6833-3-thomas.hellstrom@linux.intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 7a3b680d01a3..0b9510595db0 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1282,8 +1282,8 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) goto err_no_resv; } - drm_gpuvm_init(&vm->gpuvm, "Xe VM", 0, &xe->drm, vm_resv_obj, - 0, vm->size, 0, 0, &gpuvm_ops); + drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm, + vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops); drm_gem_object_put(vm_resv_obj); -- cgit v1.2.3 From 3b97e3b265c97b7cd7dcbdb2f7ef93c6e6f94948 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 12 Dec 2023 09:00:37 -0800 Subject: drm/xe: Use a flags field instead of bools for VMA create MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use a flags field instead of severval bools for VMA create as it is easier to read and less bug prone. Suggested-by: Thomas Hellström Signed-off-by: Matthew Brost Reviewed-by: Thomas Hellström Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 64 ++++++++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 30 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 0b9510595db0..7b38338ab5e2 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -778,17 +778,20 @@ struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) return fence; } +#define VMA_CREATE_FLAG_READ_ONLY BIT(0) +#define VMA_CREATE_FLAG_IS_NULL BIT(1) + static struct xe_vma *xe_vma_create(struct xe_vm *vm, struct xe_bo *bo, u64 bo_offset_or_userptr, u64 start, u64 end, - bool read_only, - bool is_null, - u16 pat_index) + u16 pat_index, unsigned int flags) { struct xe_vma *vma; struct xe_tile *tile; u8 id; + bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY); + bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL); xe_assert(vm->xe, start < end); xe_assert(vm->xe, end < vm->size); @@ -2117,7 +2120,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, } static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, - bool read_only, bool is_null, u16 pat_index) + u16 pat_index, unsigned int flags) { struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL; struct drm_exec exec; @@ -2146,8 +2149,7 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, } vma = xe_vma_create(vm, bo, op->gem.offset, op->va.addr, op->va.addr + - op->va.range - 1, read_only, is_null, - pat_index); + op->va.range - 1, pat_index, flags); if (bo) drm_exec_fini(&exec); @@ -2272,7 +2274,9 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, drm_gpuva_for_each_op(__op, ops) { struct xe_vma_op *op = gpuva_op_to_vma_op(__op); + struct xe_vma *vma; bool first = list_empty(ops_list); + unsigned int flags = 0; INIT_LIST_HEAD(&op->link); list_add_tail(&op->link, ops_list); @@ -2288,10 +2292,13 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, switch (op->base.op) { case DRM_GPUVA_OP_MAP: { - struct xe_vma *vma; + flags |= op->map.read_only ? + VMA_CREATE_FLAG_READ_ONLY : 0; + flags |= op->map.is_null ? + VMA_CREATE_FLAG_IS_NULL : 0; - vma = new_vma(vm, &op->base.map, op->map.read_only, - op->map.is_null, op->map.pat_index); + vma = new_vma(vm, &op->base.map, op->map.pat_index, + flags); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -2307,16 +2314,15 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, op->remap.range = xe_vma_size(old); if (op->base.remap.prev) { - struct xe_vma *vma; - bool read_only = - op->base.remap.unmap->va->flags & - XE_VMA_READ_ONLY; - bool is_null = - op->base.remap.unmap->va->flags & - DRM_GPUVA_SPARSE; - - vma = new_vma(vm, op->base.remap.prev, read_only, - is_null, old->pat_index); + flags |= op->base.remap.unmap->va->flags & + XE_VMA_READ_ONLY ? + VMA_CREATE_FLAG_READ_ONLY : 0; + flags |= op->base.remap.unmap->va->flags & + DRM_GPUVA_SPARSE ? + VMA_CREATE_FLAG_IS_NULL : 0; + + vma = new_vma(vm, op->base.remap.prev, + old->pat_index, flags); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -2339,17 +2345,15 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, } if (op->base.remap.next) { - struct xe_vma *vma; - bool read_only = - op->base.remap.unmap->va->flags & - XE_VMA_READ_ONLY; - - bool is_null = - op->base.remap.unmap->va->flags & - DRM_GPUVA_SPARSE; - - vma = new_vma(vm, op->base.remap.next, read_only, - is_null, old->pat_index); + flags |= op->base.remap.unmap->va->flags & + XE_VMA_READ_ONLY ? + VMA_CREATE_FLAG_READ_ONLY : 0; + flags |= op->base.remap.unmap->va->flags & + DRM_GPUVA_SPARSE ? + VMA_CREATE_FLAG_IS_NULL : 0; + + vma = new_vma(vm, op->base.remap.next, + old->pat_index, flags); if (IS_ERR(vma)) return PTR_ERR(vma); -- cgit v1.2.3 From 53bf60f6d8503c788fee9c30dacef682edbe61fd Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 5 Dec 2023 10:56:17 -0800 Subject: drm/xe: Use a flags field instead of bools for sync parse MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use a flags field instead of severval bools for sync parse as it is easier to read and less bug prone. v2: Pull in header change from subsequent patch Suggested-by: Thomas Hellström Signed-off-by: Matthew Brost Reviewed-by: Thomas Hellström Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_exec.c | 5 +++-- drivers/gpu/drm/xe/xe_sync.c | 4 +++- drivers/gpu/drm/xe/xe_sync.h | 5 ++++- drivers/gpu/drm/xe/xe_vm.c | 5 +++-- 4 files changed, 13 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 15ab1a927613..3c9f801d570b 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -151,8 +151,9 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) for (i = 0; i < args->num_syncs; i++) { err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs++], - &syncs_user[i], true, - xe_vm_in_lr_mode(vm)); + &syncs_user[i], SYNC_PARSE_FLAG_EXEC | + (xe_vm_in_lr_mode(vm) ? + SYNC_PARSE_FLAG_LR_MODE : 0)); if (err) goto err_syncs; } diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index 936227e79483..2a3f508722fc 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -98,10 +98,12 @@ static void user_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, struct xe_sync_entry *sync, struct drm_xe_sync __user *sync_user, - bool exec, bool in_lr_mode) + unsigned int flags) { struct drm_xe_sync sync_in; int err; + bool exec = flags & SYNC_PARSE_FLAG_EXEC; + bool in_lr_mode = flags & SYNC_PARSE_FLAG_LR_MODE; bool signal; if (copy_from_user(&sync_in, sync_user, sizeof(*sync_user))) diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h index 30958ddc4cdc..1b748cec4678 100644 --- a/drivers/gpu/drm/xe/xe_sync.h +++ b/drivers/gpu/drm/xe/xe_sync.h @@ -12,10 +12,13 @@ struct xe_device; struct xe_file; struct xe_sched_job; +#define SYNC_PARSE_FLAG_EXEC BIT(0) +#define SYNC_PARSE_FLAG_LR_MODE BIT(1) + int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, struct xe_sync_entry *sync, struct drm_xe_sync __user *sync_user, - bool exec, bool compute_mode); + unsigned int flags); int xe_sync_entry_wait(struct xe_sync_entry *sync); int xe_sync_entry_add_deps(struct xe_sync_entry *sync, struct xe_sched_job *job); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 7b38338ab5e2..d1e53905268f 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2994,8 +2994,9 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) syncs_user = u64_to_user_ptr(args->syncs); for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], - &syncs_user[num_syncs], false, - xe_vm_in_lr_mode(vm)); + &syncs_user[num_syncs], + xe_vm_in_lr_mode(vm) ? + SYNC_PARSE_FLAG_LR_MODE : 0); if (err) goto free_syncs; } -- cgit v1.2.3 From f5783b5026f76083ef4c53f6240619bd5c7bb9a5 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Mon, 11 Dec 2023 19:20:58 -0500 Subject: drm/xe: Remove vram size info from sysfs This information is already part of the query IOCTL. Let's not duplicate it here in the sysfs. Cc: Matt Roper Reviewed-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_tile_sysfs.c | 23 +---------------------- 1 file changed, 1 insertion(+), 22 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_tile_sysfs.c b/drivers/gpu/drm/xe/xe_tile_sysfs.c index 16376607c68f..0f8d3e7fce46 100644 --- a/drivers/gpu/drm/xe/xe_tile_sysfs.c +++ b/drivers/gpu/drm/xe/xe_tile_sysfs.c @@ -20,20 +20,6 @@ static const struct kobj_type xe_tile_sysfs_kobj_type = { .sysfs_ops = &kobj_sysfs_ops, }; -static ssize_t -physical_vram_size_bytes_show(struct device *kdev, struct device_attribute *attr, - char *buf) -{ - struct xe_tile *tile = kobj_to_tile(&kdev->kobj); - - return sysfs_emit(buf, "%llu\n", tile->mem.vram.actual_physical_size); -} - -static DEVICE_ATTR_RO(physical_vram_size_bytes); - -static const struct attribute *physical_memsize_attr = - &dev_attr_physical_vram_size_bytes.attr; - static void tile_sysfs_fini(struct drm_device *drm, void *arg) { struct xe_tile *tile = arg; @@ -64,15 +50,8 @@ void xe_tile_sysfs_init(struct xe_tile *tile) tile->sysfs = &kt->base; - if (IS_DGFX(xe) && xe->info.platform != XE_DG1 && - sysfs_create_file(tile->sysfs, physical_memsize_attr)) - drm_warn(&xe->drm, - "Sysfs creation to read addr_range per tile failed\n"); - err = drmm_add_action_or_reset(&xe->drm, tile_sysfs_fini, tile); - if (err) { + if (err) drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n", __func__, err); - return; - } } -- cgit v1.2.3 From eb9702ad29863c1ae41d17d8504c7444f280dfff Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 5 Dec 2023 10:39:54 -0800 Subject: drm/xe: Allow num_batch_buffer / num_binds == 0 in IOCTLs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The idea being out-syncs can signal indicating all previous operations on the bind queue are complete. An example use case of this would be support for implementing vkQueueWaitIdle easily. All in-syncs are waited on before signaling out-syncs. This is implemented by forming a composite software fence of in-syncs and installing this fence in the out-syncs and exec queue last fence slot. The last fence must be added as a dependency for jobs on user exec queues as it is possible for the last fence to be a composite software fence (unordered, ioctl with zero bb or binds) rather than hardware fence (ordered, previous job on queue). Cc: Thomas Hellström Signed-off-by: Matthew Brost Reviewed-by: Thomas Hellström Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_exec.c | 27 ++++++++++- drivers/gpu/drm/xe/xe_exec_queue.c | 5 +- drivers/gpu/drm/xe/xe_exec_queue_types.h | 5 +- drivers/gpu/drm/xe/xe_migrate.c | 14 ++++-- drivers/gpu/drm/xe/xe_sched_job.c | 18 ++++++++ drivers/gpu/drm/xe/xe_sched_job.h | 4 ++ drivers/gpu/drm/xe/xe_sync.c | 78 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_sync.h | 6 +++ drivers/gpu/drm/xe/xe_vm.c | 77 ++++++++++++++++++++++--------- 9 files changed, 206 insertions(+), 28 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 3c9f801d570b..ba92e5619da3 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -131,7 +131,8 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_VM)) return -EINVAL; - if (XE_IOCTL_DBG(xe, q->width != args->num_batch_buffer)) + if (XE_IOCTL_DBG(xe, args->num_batch_buffer && + q->width != args->num_batch_buffer)) return -EINVAL; if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_BANNED)) { @@ -207,6 +208,24 @@ retry: goto err_exec; } + if (!args->num_batch_buffer) { + if (!xe_vm_in_lr_mode(vm)) { + struct dma_fence *fence; + + fence = xe_sync_in_fence_get(syncs, num_syncs, q, vm); + if (IS_ERR(fence)) { + err = PTR_ERR(fence); + goto err_exec; + } + for (i = 0; i < num_syncs; i++) + xe_sync_entry_signal(&syncs[i], NULL, fence); + xe_exec_queue_last_fence_set(q, vm, fence); + dma_fence_put(fence); + } + + goto err_exec; + } + if (xe_exec_queue_is_lr(q) && xe_exec_queue_ring_full(q)) { err = -EWOULDBLOCK; goto err_exec; @@ -266,6 +285,10 @@ retry: goto err_put_job; if (!xe_vm_in_lr_mode(vm)) { + err = xe_sched_job_last_fence_add_dep(job, vm); + if (err) + goto err_put_job; + err = down_read_interruptible(&vm->userptr.notifier_lock); if (err) goto err_put_job; @@ -290,6 +313,8 @@ retry: if (xe_exec_queue_is_lr(q)) q->ring_ops->emit_job(job); + if (!xe_vm_in_lr_mode(vm)) + xe_exec_queue_last_fence_set(q, vm, &job->drm.s_fence->finished); xe_sched_job_push(job); xe_vm_reactivate_rebind(vm); diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 85bc25fe99ed..eeb9605dd45f 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -886,7 +886,10 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q, struct xe_vm *vm) { - lockdep_assert_held_write(&vm->lock); + if (q->flags & EXEC_QUEUE_FLAG_VM) + lockdep_assert_held(&vm->lock); + else + xe_vm_assert_held(vm); } /** diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 6826feb650f3..c7aefa1c8c31 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -66,8 +66,9 @@ struct xe_exec_queue { struct xe_hw_fence_irq *fence_irq; /** - * @last_fence: last fence on engine, protected by vm->lock in write - * mode if bind engine + * @last_fence: last fence on exec queue, protected by vm->lock in write + * mode if bind exec queue, protected by dma resv lock if non-bind exec + * queue */ struct dma_fence *last_fence; diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 2ca927f3fb2a..5fd0706a6045 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -1163,17 +1163,24 @@ xe_migrate_update_pgtables_cpu(struct xe_migrate *m, return fence; } -static bool no_in_syncs(struct xe_sync_entry *syncs, u32 num_syncs) +static bool no_in_syncs(struct xe_vm *vm, struct xe_exec_queue *q, + struct xe_sync_entry *syncs, u32 num_syncs) { + struct dma_fence *fence; int i; for (i = 0; i < num_syncs; i++) { - struct dma_fence *fence = syncs[i].fence; + fence = syncs[i].fence; if (fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) return false; } + if (q) { + fence = xe_exec_queue_last_fence_get(q, vm); + if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + return false; + } return true; } @@ -1234,7 +1241,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m, u16 pat_index = xe->pat.idx[XE_CACHE_WB]; /* Use the CPU if no in syncs and engine is idle */ - if (no_in_syncs(syncs, num_syncs) && xe_exec_queue_is_idle(q_override)) { + if (no_in_syncs(vm, q, syncs, num_syncs) && xe_exec_queue_is_idle(q_override)) { fence = xe_migrate_update_pgtables_cpu(m, vm, bo, updates, num_updates, first_munmap_rebind, @@ -1351,6 +1358,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m, goto err_job; } + err = xe_sched_job_last_fence_add_dep(job, vm); for (i = 0; !err && i < num_syncs; i++) err = xe_sync_entry_add_deps(&syncs[i], job); diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c index a9c7ae815bec..01106a1156ad 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.c +++ b/drivers/gpu/drm/xe/xe_sched_job.c @@ -260,3 +260,21 @@ void xe_sched_job_push(struct xe_sched_job *job) drm_sched_entity_push_job(&job->drm); xe_sched_job_put(job); } + +/** + * xe_sched_job_last_fence_add_dep - Add last fence dependency to job + * @job:job to add the last fence dependency to + * @vm: virtual memory job belongs to + * + * Returns: + * 0 on success, or an error on failing to expand the array. + */ +int xe_sched_job_last_fence_add_dep(struct xe_sched_job *job, struct xe_vm *vm) +{ + struct dma_fence *fence; + + fence = xe_exec_queue_last_fence_get(job->q, vm); + dma_fence_get(fence); + + return drm_sched_job_add_dependency(&job->drm, fence); +} diff --git a/drivers/gpu/drm/xe/xe_sched_job.h b/drivers/gpu/drm/xe/xe_sched_job.h index 6ca1d426c036..34f475ba7f50 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.h +++ b/drivers/gpu/drm/xe/xe_sched_job.h @@ -8,6 +8,8 @@ #include "xe_sched_job_types.h" +struct xe_vm; + #define XE_SCHED_HANG_LIMIT 1 #define XE_SCHED_JOB_TIMEOUT LONG_MAX @@ -54,6 +56,8 @@ bool xe_sched_job_completed(struct xe_sched_job *job); void xe_sched_job_arm(struct xe_sched_job *job); void xe_sched_job_push(struct xe_sched_job *job); +int xe_sched_job_last_fence_add_dep(struct xe_sched_job *job, struct xe_vm *vm); + static inline struct xe_sched_job * to_xe_sched_job(struct drm_sched_job *drm) { diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index 2a3f508722fc..e4c220cf9115 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -5,6 +5,7 @@ #include "xe_sync.h" +#include #include #include #include @@ -14,6 +15,7 @@ #include #include "xe_device_types.h" +#include "xe_exec_queue.h" #include "xe_macros.h" #include "xe_sched_job_types.h" @@ -104,6 +106,7 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, int err; bool exec = flags & SYNC_PARSE_FLAG_EXEC; bool in_lr_mode = flags & SYNC_PARSE_FLAG_LR_MODE; + bool disallow_user_fence = flags & SYNC_PARSE_FLAG_DISALLOW_USER_FENCE; bool signal; if (copy_from_user(&sync_in, sync_user, sizeof(*sync_user))) @@ -164,6 +167,9 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, break; case DRM_XE_SYNC_TYPE_USER_FENCE: + if (XE_IOCTL_DBG(xe, disallow_user_fence)) + return -EOPNOTSUPP; + if (XE_IOCTL_DBG(xe, !signal)) return -EOPNOTSUPP; @@ -264,3 +270,75 @@ void xe_sync_entry_cleanup(struct xe_sync_entry *sync) if (sync->ufence) user_fence_put(sync->ufence); } + +/** + * xe_sync_in_fence_get() - Get a fence from syncs, exec queue, and VM + * @sync: input syncs + * @num_sync: number of syncs + * @q: exec queue + * @vm: VM + * + * Get a fence from syncs, exec queue, and VM. If syncs contain in-fences create + * and return a composite fence of all in-fences + last fence. If no in-fences + * return last fence on input exec queue. Caller must drop reference to + * returned fence. + * + * Return: fence on success, ERR_PTR(-ENOMEM) on failure + */ +struct dma_fence * +xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync, + struct xe_exec_queue *q, struct xe_vm *vm) +{ + struct dma_fence **fences = NULL; + struct dma_fence_array *cf = NULL; + struct dma_fence *fence; + int i, num_in_fence = 0, current_fence = 0; + + lockdep_assert_held(&vm->lock); + + /* Count in-fences */ + for (i = 0; i < num_sync; ++i) { + if (sync[i].fence) { + ++num_in_fence; + fence = sync[i].fence; + } + } + + /* Easy case... */ + if (!num_in_fence) { + fence = xe_exec_queue_last_fence_get(q, vm); + dma_fence_get(fence); + return fence; + } + + /* Create composite fence */ + fences = kmalloc_array(num_in_fence + 1, sizeof(*fences), GFP_KERNEL); + if (!fences) + return ERR_PTR(-ENOMEM); + for (i = 0; i < num_sync; ++i) { + if (sync[i].fence) { + dma_fence_get(sync[i].fence); + fences[current_fence++] = sync[i].fence; + } + } + fences[current_fence++] = xe_exec_queue_last_fence_get(q, vm); + dma_fence_get(fences[current_fence - 1]); + cf = dma_fence_array_create(num_in_fence, fences, + vm->composite_fence_ctx, + vm->composite_fence_seqno++, + false); + if (!cf) { + --vm->composite_fence_seqno; + goto err_out; + } + + return &cf->base; + +err_out: + while (current_fence) + dma_fence_put(fences[--current_fence]); + kfree(fences); + kfree(cf); + + return ERR_PTR(-ENOMEM); +} diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h index 1b748cec4678..d284afbe917c 100644 --- a/drivers/gpu/drm/xe/xe_sync.h +++ b/drivers/gpu/drm/xe/xe_sync.h @@ -9,11 +9,14 @@ #include "xe_sync_types.h" struct xe_device; +struct xe_exec_queue; struct xe_file; struct xe_sched_job; +struct xe_vm; #define SYNC_PARSE_FLAG_EXEC BIT(0) #define SYNC_PARSE_FLAG_LR_MODE BIT(1) +#define SYNC_PARSE_FLAG_DISALLOW_USER_FENCE BIT(2) int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, struct xe_sync_entry *sync, @@ -26,5 +29,8 @@ void xe_sync_entry_signal(struct xe_sync_entry *sync, struct xe_sched_job *job, struct dma_fence *fence); void xe_sync_entry_cleanup(struct xe_sync_entry *sync); +struct dma_fence * +xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync, + struct xe_exec_queue *q, struct xe_vm *vm); #endif diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index d1e53905268f..2f3df9ee67c9 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2722,7 +2722,6 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, return -EINVAL; if (XE_IOCTL_DBG(xe, args->extensions) || - XE_IOCTL_DBG(xe, !args->num_binds) || XE_IOCTL_DBG(xe, args->num_binds > MAX_BINDS)) return -EINVAL; @@ -2837,6 +2836,37 @@ free_bind_ops: return err; } +static int vm_bind_ioctl_signal_fences(struct xe_vm *vm, + struct xe_exec_queue *q, + struct xe_sync_entry *syncs, + int num_syncs) +{ + struct dma_fence *fence; + int i, err = 0; + + fence = xe_sync_in_fence_get(syncs, num_syncs, + to_wait_exec_queue(vm, q), vm); + if (IS_ERR(fence)) + return PTR_ERR(fence); + + for (i = 0; i < num_syncs; i++) + xe_sync_entry_signal(&syncs[i], NULL, fence); + + xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm, + fence); + + if (xe_vm_sync_mode(vm, q)) { + long timeout = dma_fence_wait(fence, true); + + if (timeout < 0) + err = -EINTR; + } + + dma_fence_put(fence); + + return err; +} + int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct xe_device *xe = to_xe_device(dev); @@ -2875,7 +2905,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) goto put_exec_queue; } - if (XE_IOCTL_DBG(xe, async != + if (XE_IOCTL_DBG(xe, args->num_binds && async != !!(q->flags & EXEC_QUEUE_FLAG_VM_ASYNC))) { err = -EINVAL; goto put_exec_queue; @@ -2889,7 +2919,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) } if (!args->exec_queue_id) { - if (XE_IOCTL_DBG(xe, async != + if (XE_IOCTL_DBG(xe, args->num_binds && async != !!(vm->flags & XE_VM_FLAG_ASYNC_DEFAULT))) { err = -EINVAL; goto put_vm; @@ -2916,16 +2946,18 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) } } - bos = kzalloc(sizeof(*bos) * args->num_binds, GFP_KERNEL); - if (!bos) { - err = -ENOMEM; - goto release_vm_lock; - } + if (args->num_binds) { + bos = kcalloc(args->num_binds, sizeof(*bos), GFP_KERNEL); + if (!bos) { + err = -ENOMEM; + goto release_vm_lock; + } - ops = kzalloc(sizeof(*ops) * args->num_binds, GFP_KERNEL); - if (!ops) { - err = -ENOMEM; - goto release_vm_lock; + ops = kcalloc(args->num_binds, sizeof(*ops), GFP_KERNEL); + if (!ops) { + err = -ENOMEM; + goto release_vm_lock; + } } for (i = 0; i < args->num_binds; ++i) { @@ -2995,12 +3027,19 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], &syncs_user[num_syncs], - xe_vm_in_lr_mode(vm) ? - SYNC_PARSE_FLAG_LR_MODE : 0); + (xe_vm_in_lr_mode(vm) ? + SYNC_PARSE_FLAG_LR_MODE : 0) | + (!args->num_binds ? + SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0)); if (err) goto free_syncs; } + if (!args->num_binds) { + err = -ENODATA; + goto free_syncs; + } + for (i = 0; i < args->num_binds; ++i) { u64 range = bind_ops[i].range; u64 addr = bind_ops[i].addr; @@ -3058,12 +3097,8 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) unwind_ops: vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds); free_syncs: - for (i = 0; err == -ENODATA && i < num_syncs; i++) { - struct dma_fence *fence = - xe_exec_queue_last_fence_get(to_wait_exec_queue(vm, q), vm); - - xe_sync_entry_signal(&syncs[i], NULL, fence); - } + if (err == -ENODATA) + err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs); while (num_syncs--) xe_sync_entry_cleanup(&syncs[num_syncs]); @@ -3083,7 +3118,7 @@ free_objs: kfree(ops); if (args->num_binds > 1) kfree(bind_ops); - return err == -ENODATA ? 0 : err; + return err; } /** -- cgit v1.2.3 From 7a18d36f88105c0964846dbf9f7f1b0d43e860db Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 12 Dec 2023 13:56:04 -0800 Subject: drm/xe: Remove duplicate RING_MAX_NONPRIV_SLOTS definition The engine register header wound up with two definitions for RING_MAX_NONPRIV_SLOTS, likely due to a rebase mistake. Keep the definition that's in an appropriate place (i.e., with the FORCE_TO_NONPRIV register definition) and remove the other. Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20231212215603.2041841-2-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_engine_regs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index 3942db268b01..1a857c4edcf5 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -40,7 +40,6 @@ #define RING_NOPID(base) XE_REG((base) + 0x94) #define RING_IMR(base) XE_REG((base) + 0xa8) -#define RING_MAX_NONPRIV_SLOTS 12 #define RING_EIR(base) XE_REG((base) + 0xb0) #define RING_EMR(base) XE_REG((base) + 0xb4) -- cgit v1.2.3 From 4cb12b71923b6e2354093fbbde9bcadaec3d813f Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Tue, 12 Dec 2023 23:55:24 +0530 Subject: drm/xe/xe2: Determine bios enablement for flat ccs on igfx MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If bios disables flat ccs on igfx make has_flat_ccs as 0 and notify via drm_dbg. Bspec:59255 v2: - Release forcewake. - Add registers in order. - drop dgfx condition and only add it back in the future when the support for an Xe2 dgpu will be added. - Use drm_dbg instead of drm_info. (Matt) v3: - Address nit(Matt) Cc: Matt Roper Cc: Thomas Hellström Signed-off-by: Himal Prasad Ghimiray Reviewed-by: Matt Roper Acked-by: Thomas Hellström Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 3 +++ drivers/gpu/drm/xe/xe_device.c | 30 ++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 5f5a72e9d0d8..f5bf4c6d1761 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -162,6 +162,9 @@ #define XEHP_SQCM XE_REG_MCR(0x8724) #define EN_32B_ACCESS REG_BIT(30) +#define XE2_FLAT_CCS_BASE_RANGE_LOWER XE_REG_MCR(0x8800) +#define XE2_FLAT_CCS_ENABLE REG_BIT(0) + #define GSCPSMI_BASE XE_REG(0x880c) #define MIRROR_FUSE3 XE_REG(0x9118) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 2e0b2e40d8f3..82e3705b88cd 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -15,6 +15,7 @@ #include #include +#include "regs/xe_gt_regs.h" #include "regs/xe_regs.h" #include "xe_bo.h" #include "xe_debugfs.h" @@ -26,6 +27,7 @@ #include "xe_exec.h" #include "xe_ggtt.h" #include "xe_gt.h" +#include "xe_gt_mcr.h" #include "xe_irq.h" #include "xe_mmio.h" #include "xe_module.h" @@ -401,6 +403,30 @@ int xe_device_probe_early(struct xe_device *xe) return 0; } +static int xe_device_set_has_flat_ccs(struct xe_device *xe) +{ + u32 reg; + int err; + + if (GRAPHICS_VER(xe) < 20 || !xe->info.has_flat_ccs) + return 0; + + struct xe_gt *gt = xe_root_mmio_gt(xe); + + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (err) + return err; + + reg = xe_gt_mcr_unicast_read_any(gt, XE2_FLAT_CCS_BASE_RANGE_LOWER); + xe->info.has_flat_ccs = (reg & XE2_FLAT_CCS_ENABLE); + + if (!xe->info.has_flat_ccs) + drm_dbg(&xe->drm, + "Flat CCS has been disabled in bios, May lead to performance impact"); + + return xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); +} + int xe_device_probe(struct xe_device *xe) { struct xe_tile *tile; @@ -456,6 +482,10 @@ int xe_device_probe(struct xe_device *xe) goto err_irq_shutdown; } + err = xe_device_set_has_flat_ccs(xe); + if (err) + return err; + err = xe_mmio_probe_vram(xe); if (err) goto err_irq_shutdown; -- cgit v1.2.3 From 064686272b7a7371eea32d5e7b89597cf5c70c0b Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Tue, 12 Dec 2023 23:55:25 +0530 Subject: drm/xe/xe2: Modify main memory to ccs memory ratio. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On xe2 platforms each byte of CCS data now represents 512 bytes of main memory data. Cc: Matt Roper Cc: Thomas Hellström Signed-off-by: Himal Prasad Ghimiray Reviewed-by: Thomas Hellström Reviewed-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gpu_commands.h | 2 +- drivers/gpu/drm/xe/xe_device.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h index f1c5bf203b3d..1f9c32e694c6 100644 --- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h +++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h @@ -16,7 +16,7 @@ #define XY_CTRL_SURF_MOCS_MASK GENMASK(31, 26) #define XE2_XY_CTRL_SURF_MOCS_INDEX_MASK GENMASK(31, 28) #define NUM_CCS_BYTES_PER_BLOCK 256 -#define NUM_BYTES_PER_CCS_BYTE 256 +#define NUM_BYTES_PER_CCS_BYTE(_xe) (GRAPHICS_VER(_xe) >= 20 ? 512 : 256) #define NUM_CCS_BLKS_PER_XFER 1024 #define XY_FAST_COLOR_BLT_CMD (2 << 29 | 0x44 << 22) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 82e3705b88cd..221e87584352 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -615,7 +615,7 @@ void xe_device_wmb(struct xe_device *xe) u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size) { return xe_device_has_flat_ccs(xe) ? - DIV_ROUND_UP(size, NUM_BYTES_PER_CCS_BYTE) : 0; + DIV_ROUND_UP(size, NUM_BYTES_PER_CCS_BYTE(xe)) : 0; } bool xe_device_mem_access_ongoing(struct xe_device *xe) -- cgit v1.2.3 From 20561efb0ffd199fec1caaa5a0de439fab69d89a Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Tue, 12 Dec 2023 23:55:26 +0530 Subject: drm/xe/xe2: Allocate extra pages for ccs during bo create MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Incase of bo move from PL_TT to PL_SYSTEM these pages will be used to store ccs metadata from flat ccs. And during bo move to PL_TT from PL_SYSTEM the metadata will be copied from extra pages to flat ccs. This copy of ccs metadata ensures ccs remains unaltered between swapping out of bo to disk and its restore to PL_TT. Bspec:58796 v2: - For dgfx ensure system bit is not set. - Modify comments.(Thomas) v3: - Separate out patch to modify main memory to ccs memory ratio.(Matt) v4: - Update description for commit message. - Make bo allocation routine more readable.(Matt) Cc: Matt Roper Cc: Thomas Hellström Signed-off-by: Himal Prasad Ghimiray Reviewed-by: Thomas Hellström Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 13ebe33bb7a2..c10aa5a63a70 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -2160,19 +2160,24 @@ int xe_bo_evict(struct xe_bo *bo, bool force_alloc) * placed in system memory. * @bo: The xe_bo * - * If a bo has an allowable placement in XE_PL_TT memory, it can't use - * flat CCS compression, because the GPU then has no way to access the - * CCS metadata using relevant commands. For the opposite case, we need to - * allocate storage for the CCS metadata when the BO is not resident in - * VRAM memory. - * * Return: true if extra pages need to be allocated, false otherwise. */ bool xe_bo_needs_ccs_pages(struct xe_bo *bo) { - return bo->ttm.type == ttm_bo_type_device && - !(bo->flags & XE_BO_CREATE_SYSTEM_BIT) && - (bo->flags & XE_BO_CREATE_VRAM_MASK); + struct xe_device *xe = xe_bo_device(bo); + + if (!xe_device_has_flat_ccs(xe) || bo->ttm.type != ttm_bo_type_device) + return false; + + /* On discrete GPUs, if the GPU can access this buffer from + * system memory (i.e., it allows XE_PL_TT placement), FlatCCS + * can't be used since there's no CCS storage associated with + * non-VRAM addresses. + */ + if (IS_DGFX(xe) && (bo->flags & XE_BO_CREATE_SYSTEM_BIT)) + return false; + + return true; } /** -- cgit v1.2.3 From 9cca49021c81d05b84916b87092602be2c412e04 Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Tue, 12 Dec 2023 23:55:27 +0530 Subject: drm/xe/xe2: Updates on XY_CTRL_SURF_COPY_BLT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - The XY_CTRL_SURF_COPY_BLT instruction operating on ccs data expects size in pages of main memory for which CCS data should be copied. - The bitfield representing copy size in XY_CTRL_SURF_COPY_BLT has shifted one bit higher in the instruction. v2: - Fix the num_pages for ccs size calculation. - Address nits (Thomas) v3: - Use FIELD_PREP and FIELD_FIT instead of shifts and numbers.(Matt) Cc: Matt Roper Cc: Thomas Hellström Signed-off-by: Himal Prasad Ghimiray Reviewed-by: Matt Roper Reviewed-by: Thomas Hellström Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gpu_commands.h | 5 ++--- drivers/gpu/drm/xe/xe_migrate.c | 21 +++++++++++++++------ 2 files changed, 17 insertions(+), 9 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h index 1f9c32e694c6..a255946b6f77 100644 --- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h +++ b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h @@ -11,13 +11,12 @@ #define XY_CTRL_SURF_COPY_BLT ((2 << 29) | (0x48 << 22) | 3) #define SRC_ACCESS_TYPE_SHIFT 21 #define DST_ACCESS_TYPE_SHIFT 20 -#define CCS_SIZE_MASK 0x3FF -#define CCS_SIZE_SHIFT 8 +#define CCS_SIZE_MASK GENMASK(17, 8) +#define XE2_CCS_SIZE_MASK GENMASK(18, 9) #define XY_CTRL_SURF_MOCS_MASK GENMASK(31, 26) #define XE2_XY_CTRL_SURF_MOCS_INDEX_MASK GENMASK(31, 28) #define NUM_CCS_BYTES_PER_BLOCK 256 #define NUM_BYTES_PER_CCS_BYTE(_xe) (GRAPHICS_VER(_xe) >= 20 ? 512 : 256) -#define NUM_CCS_BLKS_PER_XFER 1024 #define XY_FAST_COLOR_BLT_CMD (2 << 29 | 0x44 << 22) #define XY_FAST_COLOR_BLT_DEPTH_32 (2 << 19) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 5fd0706a6045..9895bab22f85 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -526,21 +526,30 @@ static void emit_copy_ccs(struct xe_gt *gt, struct xe_bb *bb, struct xe_device *xe = gt_to_xe(gt); u32 *cs = bb->cs + bb->len; u32 num_ccs_blks; + u32 num_pages; + u32 ccs_copy_size; u32 mocs; - num_ccs_blks = DIV_ROUND_UP(xe_device_ccs_bytes(gt_to_xe(gt), size), - NUM_CCS_BYTES_PER_BLOCK); - xe_gt_assert(gt, num_ccs_blks <= NUM_CCS_BLKS_PER_XFER); + if (GRAPHICS_VERx100(xe) >= 2000) { + num_pages = DIV_ROUND_UP(size, XE_PAGE_SIZE); + xe_gt_assert(gt, FIELD_FIT(XE2_CCS_SIZE_MASK, num_pages - 1)); - if (GRAPHICS_VERx100(xe) >= 2000) + ccs_copy_size = REG_FIELD_PREP(XE2_CCS_SIZE_MASK, num_pages - 1); mocs = FIELD_PREP(XE2_XY_CTRL_SURF_MOCS_INDEX_MASK, gt->mocs.uc_index); - else + + } else { + num_ccs_blks = DIV_ROUND_UP(xe_device_ccs_bytes(gt_to_xe(gt), size), + NUM_CCS_BYTES_PER_BLOCK); + xe_gt_assert(gt, FIELD_FIT(CCS_SIZE_MASK, num_ccs_blks - 1)); + + ccs_copy_size = REG_FIELD_PREP(CCS_SIZE_MASK, num_ccs_blks - 1); mocs = FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, gt->mocs.uc_index); + } *cs++ = XY_CTRL_SURF_COPY_BLT | (src_is_indirect ? 0x0 : 0x1) << SRC_ACCESS_TYPE_SHIFT | (dst_is_indirect ? 0x0 : 0x1) << DST_ACCESS_TYPE_SHIFT | - ((num_ccs_blks - 1) & CCS_SIZE_MASK) << CCS_SIZE_SHIFT; + ccs_copy_size; *cs++ = lower_32_bits(src_ofs); *cs++ = upper_32_bits(src_ofs) | mocs; *cs++ = lower_32_bits(dst_ofs); -- cgit v1.2.3 From 9116eabb6d5e26a7eceb6945327e9feb67019d41 Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Tue, 12 Dec 2023 23:55:28 +0530 Subject: drm/xe/xe_migrate: Use NULL 1G PTE mapped at 255GiB VA for ccs clear MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Get rid of the cleared bo, instead use null 1G PTE mapped at 255GiB offset, this can be used for both dgfx and igfx. v2: - Remove xe_migrate::cleared_bo. - Add a comment for NULL mapping.(Thomas) Cc: Thomas Hellström Signed-off-by: Himal Prasad Ghimiray Reviewed-by: Thomas Hellström Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_migrate.c | 65 +++++++---------------------------------- 1 file changed, 11 insertions(+), 54 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 9895bab22f85..c5ed20db8402 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -46,16 +46,12 @@ struct xe_migrate { struct mutex job_mutex; /** @pt_bo: Page-table buffer object. */ struct xe_bo *pt_bo; - /** - * @cleared_bo: Zeroed out bo used as a source for CCS metadata clears - */ - struct xe_bo *cleared_bo; /** @batch_base_ofs: VM offset of the migration batch buffer */ u64 batch_base_ofs; /** @usm_batch_base_ofs: VM offset of the usm batch buffer */ u64 usm_batch_base_ofs; - /** @cleared_vram_ofs: VM offset of @cleared_bo. */ - u64 cleared_vram_ofs; + /** @cleared_mem_ofs: VM offset of @cleared_bo. */ + u64 cleared_mem_ofs; /** * @fence: dma-fence representing the last migration job batch. * Protected by @job_mutex. @@ -93,13 +89,9 @@ static void xe_migrate_fini(struct drm_device *dev, void *arg) xe_vm_lock(m->q->vm, false); xe_bo_unpin(m->pt_bo); - if (m->cleared_bo) - xe_bo_unpin(m->cleared_bo); xe_vm_unlock(m->q->vm); dma_fence_put(m->fence); - if (m->cleared_bo) - xe_bo_put(m->cleared_bo); xe_bo_put(m->pt_bo); drm_suballoc_manager_fini(&m->vm_update_sa); mutex_destroy(&m->job_mutex); @@ -125,41 +117,6 @@ static u64 xe_migrate_vram_ofs(struct xe_device *xe, u64 addr) return addr + (256ULL << xe_pt_shift(2)); } -/* - * For flat CCS clearing we need a cleared chunk of memory to copy from, - * since the CCS clearing mode of XY_FAST_COLOR_BLT appears to be buggy - * (it clears on only 14 bytes in each chunk of 16). - * If clearing the main surface one can use the part of the main surface - * already cleared, but for clearing as part of copying non-compressed - * data out of system memory, we don't readily have a cleared part of - * VRAM to copy from, so create one to use for that case. - */ -static int xe_migrate_create_cleared_bo(struct xe_migrate *m, struct xe_vm *vm) -{ - struct xe_tile *tile = m->tile; - struct xe_device *xe = vm->xe; - size_t cleared_size; - u64 vram_addr; - - if (!xe_device_has_flat_ccs(xe)) - return 0; - - cleared_size = xe_device_ccs_bytes(xe, MAX_PREEMPTDISABLE_TRANSFER); - cleared_size = PAGE_ALIGN(cleared_size); - m->cleared_bo = xe_bo_create_pin_map(xe, tile, vm, cleared_size, - ttm_bo_type_kernel, - XE_BO_CREATE_VRAM_IF_DGFX(tile) | - XE_BO_CREATE_PINNED_BIT); - if (IS_ERR(m->cleared_bo)) - return PTR_ERR(m->cleared_bo); - - xe_map_memset(xe, &m->cleared_bo->vmap, 0, 0x00, cleared_size); - vram_addr = xe_bo_addr(m->cleared_bo, 0, XE_PAGE_SIZE); - m->cleared_vram_ofs = xe_migrate_vram_ofs(xe, vram_addr); - - return 0; -} - static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, struct xe_vm *vm) { @@ -170,7 +127,6 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, u32 map_ofs, level, i; struct xe_bo *bo, *batch = tile->mem.kernel_bb_pool->bo; u64 entry; - int ret; /* Can't bump NUM_PT_SLOTS too high */ BUILD_BUG_ON(NUM_PT_SLOTS > SZ_2M/XE_PAGE_SIZE); @@ -190,12 +146,6 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, if (IS_ERR(bo)) return PTR_ERR(bo); - ret = xe_migrate_create_cleared_bo(m, vm); - if (ret) { - xe_bo_put(bo); - return ret; - } - entry = vm->pt_ops->pde_encode_bo(bo, bo->size - XE_PAGE_SIZE, pat_index); xe_pt_write(xe, &vm->pt_root[id]->bo->vmap, 0, entry); @@ -265,6 +215,13 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, (i + 1) * 8, u64, entry); } + /* Set up a 1GiB NULL mapping at 255GiB offset. */ + level = 2; + xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE * level + 255 * 8, u64, + vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, IS_DGFX(xe), 0) + | XE_PTE_NULL); + m->cleared_mem_ofs = (255ULL << xe_pt_shift(level)); + /* Identity map the entire vram at 256GiB offset */ if (IS_DGFX(xe)) { u64 pos, ofs, flags; @@ -618,7 +575,7 @@ static u32 xe_migrate_ccs_copy(struct xe_migrate *m, * Otherwise if the bo doesn't have any CCS metadata attached, * we still need to clear it for security reasons. */ - u64 ccs_src_ofs = src_is_vram ? src_ofs : m->cleared_vram_ofs; + u64 ccs_src_ofs = src_is_vram ? src_ofs : m->cleared_mem_ofs; emit_copy_ccs(gt, bb, dst_ofs, true, @@ -1006,7 +963,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, clear_vram); if (xe_device_has_flat_ccs(xe) && clear_vram) { emit_copy_ccs(gt, bb, clear_L0_ofs, true, - m->cleared_vram_ofs, false, clear_L0); + m->cleared_mem_ofs, false, clear_L0); flush_flags = MI_FLUSH_DW_CCS; } -- cgit v1.2.3 From 09427526793384fea6a13cc33ffebadb69fdcde4 Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Tue, 5 Dec 2023 17:56:34 +0530 Subject: drm/xe/xe2: Update chunk size for each iteration of ccs copy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In xe2 platform XY_CTRL_SURF_COPY_BLT can handle ccs copy for max of 1024 main surface pages. v2: - Use better logic to determine chunk size (Matt/Thomas) v3: - use function instead of macro(Thomas) Cc: Matt Roper Cc: Thomas Hellström Signed-off-by: Himal Prasad Ghimiray Reviewed-by: Thomas Hellström Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_migrate.c | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index c5ed20db8402..40f49e47d79e 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -65,9 +65,10 @@ struct xe_migrate { }; #define MAX_PREEMPTDISABLE_TRANSFER SZ_8M /* Around 1ms. */ +#define MAX_CCS_LIMITED_TRANSFER SZ_4M /* XE_PAGE_SIZE * (FIELD_MAX(XE2_CCS_SIZE_MASK) + 1) */ #define NUM_KERNEL_PDE 17 #define NUM_PT_SLOTS 32 -#define NUM_PT_PER_BLIT (MAX_PREEMPTDISABLE_TRANSFER / SZ_2M) +#define LEVEL0_PAGE_TABLE_ENCODE_SIZE SZ_2M /** * xe_tile_migrate_engine() - Get this tile's migrate engine. @@ -366,14 +367,22 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) return m; } -static u64 xe_migrate_res_sizes(struct xe_res_cursor *cur) +static u64 max_mem_transfer_per_pass(struct xe_device *xe) +{ + if (!IS_DGFX(xe) && xe_device_has_flat_ccs(xe)) + return MAX_CCS_LIMITED_TRANSFER; + + return MAX_PREEMPTDISABLE_TRANSFER; +} + +static u64 xe_migrate_res_sizes(struct xe_device *xe, struct xe_res_cursor *cur) { /* * For VRAM we use identity mapped pages so we are limited to current * cursor size. For system we program the pages ourselves so we have no * such limitation. */ - return min_t(u64, MAX_PREEMPTDISABLE_TRANSFER, + return min_t(u64, max_mem_transfer_per_pass(xe), mem_type_is_vram(cur->mem_type) ? cur->size : cur->remaining); } @@ -672,10 +681,12 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, u32 update_idx; u64 ccs_ofs, ccs_size; u32 ccs_pt; + bool usm = xe->info.has_usm; + u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE; - src_L0 = xe_migrate_res_sizes(&src_it); - dst_L0 = xe_migrate_res_sizes(&dst_it); + src_L0 = xe_migrate_res_sizes(xe, &src_it); + dst_L0 = xe_migrate_res_sizes(xe, &dst_it); drm_dbg(&xe->drm, "Pass %u, sizes: %llu & %llu\n", pass++, src_L0, dst_L0); @@ -684,18 +695,18 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, batch_size += pte_update_size(m, src_is_vram, src, &src_it, &src_L0, &src_L0_ofs, &src_L0_pt, 0, 0, - NUM_PT_PER_BLIT); + avail_pts); batch_size += pte_update_size(m, dst_is_vram, dst, &dst_it, &src_L0, &dst_L0_ofs, &dst_L0_pt, 0, - NUM_PT_PER_BLIT, NUM_PT_PER_BLIT); + avail_pts, avail_pts); if (copy_system_ccs) { ccs_size = xe_device_ccs_bytes(xe, src_L0); batch_size += pte_update_size(m, false, NULL, &ccs_it, &ccs_size, &ccs_ofs, &ccs_pt, 0, - 2 * NUM_PT_PER_BLIT, - NUM_PT_PER_BLIT); + 2 * avail_pts, + avail_pts); } /* Add copy commands size here */ @@ -922,9 +933,12 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, struct xe_sched_job *job; struct xe_bb *bb; u32 batch_size, update_idx; + bool usm = xe->info.has_usm; + u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE; + + clear_L0 = xe_migrate_res_sizes(xe, &src_it); - clear_L0 = xe_migrate_res_sizes(&src_it); drm_dbg(&xe->drm, "Pass %u, size: %llu\n", pass++, clear_L0); /* Calculate final sizes and batch size.. */ @@ -932,7 +946,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, pte_update_size(m, clear_vram, src, &src_it, &clear_L0, &clear_L0_ofs, &clear_L0_pt, emit_clear_cmd_len(gt), 0, - NUM_PT_PER_BLIT); + avail_pts); if (xe_device_has_flat_ccs(xe) && clear_vram) batch_size += EMIT_COPY_CCS_DW; -- cgit v1.2.3 From 65ef8dbad1db9e35ca7af90e6958134595938d24 Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Tue, 12 Dec 2023 23:55:30 +0530 Subject: drm/xe/xe2: Update emit_pte to use compression enabled PAT index MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For indirect accessed buffer use compression enabled PAT index. v2: - Fix parameter name. v3: - use a relevant define instead of fix number. Cc: Thomas Hellström Cc: Matthew Auld Signed-off-by: Himal Prasad Ghimiray Reviewed-by: Thomas Hellström Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_migrate.c | 2 +- drivers/gpu/drm/xe/xe_migrate.c | 21 +++++++++++++++------ drivers/gpu/drm/xe/xe_pat.c | 1 + drivers/gpu/drm/xe/xe_pt_types.h | 1 + 4 files changed, 18 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index 47fcd6e6b777..d6c23441632a 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -330,7 +330,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) else xe_res_first_sg(xe_bo_sg(pt), 0, pt->size, &src_it); - emit_pte(m, bb, NUM_KERNEL_PDE - 1, xe_bo_is_vram(pt), + emit_pte(m, bb, NUM_KERNEL_PDE - 1, xe_bo_is_vram(pt), false, &src_it, XE_PAGE_SIZE, pt); run_sanity_job(m, xe, bb, bb->len, "Writing PTE for our fake PT", test); diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 40f49e47d79e..48ada083d0b3 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -425,15 +425,24 @@ static u32 pte_update_size(struct xe_migrate *m, static void emit_pte(struct xe_migrate *m, struct xe_bb *bb, u32 at_pt, - bool is_vram, + bool is_vram, bool is_comp_pte, struct xe_res_cursor *cur, u32 size, struct xe_bo *bo) { - u16 pat_index = tile_to_xe(m->tile)->pat.idx[XE_CACHE_WB]; + struct xe_device *xe = tile_to_xe(m->tile); + + u16 pat_index; u32 ptes; u64 ofs = at_pt * XE_PAGE_SIZE; u64 cur_ofs; + /* Indirect access needs compression enabled uncached PAT index */ + if (GRAPHICS_VERx100(xe) >= 2000) + pat_index = is_comp_pte ? xe->pat.idx[XE_CACHE_NONE_COMPRESSION] : + xe->pat.idx[XE_CACHE_NONE]; + else + pat_index = xe->pat.idx[XE_CACHE_WB]; + /* * FIXME: Emitting VRAM PTEs to L0 PTs is forbidden. Currently * we're only emitting VRAM PTEs during sanity tests, so when @@ -720,19 +729,19 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, } if (!src_is_vram) - emit_pte(m, bb, src_L0_pt, src_is_vram, &src_it, src_L0, + emit_pte(m, bb, src_L0_pt, src_is_vram, true, &src_it, src_L0, src_bo); else xe_res_next(&src_it, src_L0); if (!dst_is_vram) - emit_pte(m, bb, dst_L0_pt, dst_is_vram, &dst_it, src_L0, + emit_pte(m, bb, dst_L0_pt, dst_is_vram, true, &dst_it, src_L0, dst_bo); else xe_res_next(&dst_it, src_L0); if (copy_system_ccs) - emit_pte(m, bb, ccs_pt, false, &ccs_it, ccs_size, src_bo); + emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, src_bo); bb->cs[bb->len++] = MI_BATCH_BUFFER_END; update_idx = bb->len; @@ -965,7 +974,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, /* Preemption is enabled again by the ring ops. */ if (!clear_vram) { - emit_pte(m, bb, clear_L0_pt, clear_vram, &src_it, clear_L0, + emit_pte(m, bb, clear_L0_pt, clear_vram, true, &src_it, clear_L0, bo); } else { xe_res_next(&src_it, clear_L0); diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index 1892ff81086f..1ff6bc79e7d4 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -387,6 +387,7 @@ void xe_pat_init_early(struct xe_device *xe) xe->pat.idx[XE_CACHE_NONE] = 3; xe->pat.idx[XE_CACHE_WT] = 15; xe->pat.idx[XE_CACHE_WB] = 2; + xe->pat.idx[XE_CACHE_NONE_COMPRESSION] = 12; /*Applicable on xe2 and beyond */ } else if (xe->info.platform == XE_METEORLAKE) { xe->pat.ops = &xelpg_pat_ops; xe->pat.table = xelpg_pat_table; diff --git a/drivers/gpu/drm/xe/xe_pt_types.h b/drivers/gpu/drm/xe/xe_pt_types.h index 82cbf1ef8e57..cee70cb0f014 100644 --- a/drivers/gpu/drm/xe/xe_pt_types.h +++ b/drivers/gpu/drm/xe/xe_pt_types.h @@ -18,6 +18,7 @@ enum xe_cache_level { XE_CACHE_NONE, XE_CACHE_WT, XE_CACHE_WB, + XE_CACHE_NONE_COMPRESSION, /*UC + COH_NONE + COMPRESSION */ __XE_CACHE_LEVEL_COUNT, }; -- cgit v1.2.3 From 266c85885263022954928b125d46ab7a78c77a69 Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Tue, 12 Dec 2023 23:55:31 +0530 Subject: drm/xe/xe2: Handle flat ccs move for igfx. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Clear flat ccs during user bo creation. - copy ccs meta data between flat ccs and bo during eviction and restore. - Add a bool field ccs_cleared in bo, true means ccs region of bo is already cleared. v2: - Rebase. v3: - Maintain order of xe_bo_move_notify for ttm_bo_type_sg. v4: - xe_migrate_copy can be used to copy src to dst bo on igfx too. Add a bool which handles only ccs metadata copy. v5: - on dgfx ccs should be cleared even if the bo is not compression enabled. Cc: Thomas Hellström Signed-off-by: Himal Prasad Ghimiray Reviewed-by: Thomas Hellström Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_migrate.c | 4 +-- drivers/gpu/drm/xe/xe_bo.c | 33 ++++++++++++------ drivers/gpu/drm/xe/xe_bo_types.h | 4 +++ drivers/gpu/drm/xe/xe_migrate.c | 64 ++++++++++++++++++++--------------- drivers/gpu/drm/xe/xe_migrate.h | 3 +- 5 files changed, 66 insertions(+), 42 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index d6c23441632a..7a32faa2f688 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -152,7 +152,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, expected = 0xc0c0c0c0c0c0c0c0; fence = xe_migrate_copy(m, remote, bo, remote->ttm.resource, - bo->ttm.resource); + bo->ttm.resource, false); if (!sanity_fence_failed(xe, fence, big ? "Copying big bo remote -> vram" : "Copying small bo remote -> vram", test)) { retval = xe_map_rd(xe, &bo->vmap, 0, u64); @@ -169,7 +169,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, xe_map_memset(xe, &bo->vmap, 0, 0xc0, bo->size); fence = xe_migrate_copy(m, bo, remote, bo->ttm.resource, - remote->ttm.resource); + remote->ttm.resource, false); if (!sanity_fence_failed(xe, fence, big ? "Copying big bo vram -> remote" : "Copying small bo vram -> remote", test)) { retval = xe_map_rd(xe, &remote->vmap, 0, u64); diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index c10aa5a63a70..7c0037aecff3 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -627,10 +627,11 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, bool move_lacks_source; bool tt_has_data; bool needs_clear; + bool handle_system_ccs = (!IS_DGFX(xe) && xe_bo_needs_ccs_pages(bo) && + ttm && ttm_tt_is_populated(ttm)) ? true : false; int ret = 0; - - /* Bo creation path, moving to system or TT. No clearing required. */ - if (!old_mem && ttm) { + /* Bo creation path, moving to system or TT. */ + if ((!old_mem && ttm) && !handle_system_ccs) { ttm_bo_move_null(ttm_bo, new_mem); return 0; } @@ -645,14 +646,18 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, tt_has_data = ttm && (ttm_tt_is_populated(ttm) || (ttm->page_flags & TTM_TT_FLAG_SWAPPED)); - move_lacks_source = !mem_type_is_vram(old_mem_type) && !tt_has_data; + move_lacks_source = handle_system_ccs ? (!bo->ccs_cleared) : + (!mem_type_is_vram(old_mem_type) && !tt_has_data); needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) || (!ttm && ttm_bo->type == ttm_bo_type_device); - if ((move_lacks_source && !needs_clear) || - (old_mem_type == XE_PL_SYSTEM && - new_mem->mem_type == XE_PL_TT)) { + if ((move_lacks_source && !needs_clear)) { + ttm_bo_move_null(ttm_bo, new_mem); + goto out; + } + + if (old_mem_type == XE_PL_SYSTEM && new_mem->mem_type == XE_PL_TT && !handle_system_ccs) { ttm_bo_move_null(ttm_bo, new_mem); goto out; } @@ -683,8 +688,11 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, ret = timeout; goto out; } - ttm_bo_move_null(ttm_bo, new_mem); - goto out; + + if (!handle_system_ccs) { + ttm_bo_move_null(ttm_bo, new_mem); + goto out; + } } if (!move_lacks_source && @@ -705,6 +713,8 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, migrate = mem_type_to_migrate(xe, new_mem->mem_type); else if (mem_type_is_vram(old_mem_type)) migrate = mem_type_to_migrate(xe, old_mem_type); + else + migrate = xe->tiles[0].migrate; xe_assert(xe, migrate); @@ -747,8 +757,8 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, if (move_lacks_source) fence = xe_migrate_clear(migrate, bo, new_mem); else - fence = xe_migrate_copy(migrate, - bo, bo, old_mem, new_mem); + fence = xe_migrate_copy(migrate, bo, bo, old_mem, + new_mem, handle_system_ccs); if (IS_ERR(fence)) { ret = PTR_ERR(fence); xe_device_mem_access_put(xe); @@ -1234,6 +1244,7 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, return bo; } + bo->ccs_cleared = false; bo->tile = tile; bo->size = size; bo->flags = flags; diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index f71dbc518958..64c2249a4e40 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -79,6 +79,10 @@ struct xe_bo { struct llist_node freed; /** @created: Whether the bo has passed initial creation */ bool created; + + /** @ccs_cleared */ + bool ccs_cleared; + /** * @cpu_caching: CPU caching mode. Currently only used for userspace * objects. diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 48ada083d0b3..adf1dab5eba2 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -577,14 +577,14 @@ static u64 xe_migrate_batch_base(struct xe_migrate *m, bool usm) static u32 xe_migrate_ccs_copy(struct xe_migrate *m, struct xe_bb *bb, - u64 src_ofs, bool src_is_vram, - u64 dst_ofs, bool dst_is_vram, u32 dst_size, + u64 src_ofs, bool src_is_indirect, + u64 dst_ofs, bool dst_is_indirect, u32 dst_size, u64 ccs_ofs, bool copy_ccs) { struct xe_gt *gt = m->tile->primary_gt; u32 flush_flags = 0; - if (xe_device_has_flat_ccs(gt_to_xe(gt)) && !copy_ccs && dst_is_vram) { + if (xe_device_has_flat_ccs(gt_to_xe(gt)) && !copy_ccs && dst_is_indirect) { /* * If the src is already in vram, then it should already * have been cleared by us, or has been populated by the @@ -593,28 +593,24 @@ static u32 xe_migrate_ccs_copy(struct xe_migrate *m, * Otherwise if the bo doesn't have any CCS metadata attached, * we still need to clear it for security reasons. */ - u64 ccs_src_ofs = src_is_vram ? src_ofs : m->cleared_mem_ofs; + u64 ccs_src_ofs = src_is_indirect ? src_ofs : m->cleared_mem_ofs; emit_copy_ccs(gt, bb, dst_ofs, true, - ccs_src_ofs, src_is_vram, dst_size); + ccs_src_ofs, src_is_indirect, dst_size); flush_flags = MI_FLUSH_DW_CCS; } else if (copy_ccs) { - if (!src_is_vram) + if (!src_is_indirect) src_ofs = ccs_ofs; - else if (!dst_is_vram) + else if (!dst_is_indirect) dst_ofs = ccs_ofs; - /* - * At the moment, we don't support copying CCS metadata from - * system to system. - */ - xe_gt_assert(gt, src_is_vram || dst_is_vram); + xe_gt_assert(gt, src_is_indirect || dst_is_indirect); - emit_copy_ccs(gt, bb, dst_ofs, dst_is_vram, src_ofs, - src_is_vram, dst_size); - if (dst_is_vram) + emit_copy_ccs(gt, bb, dst_ofs, dst_is_indirect, src_ofs, + src_is_indirect, dst_size); + if (dst_is_indirect) flush_flags = MI_FLUSH_DW_CCS; } @@ -630,6 +626,7 @@ static u32 xe_migrate_ccs_copy(struct xe_migrate *m, * the buffer object @dst is currently bound to. * @src: The source TTM resource. * @dst: The dst TTM resource. + * @copy_only_ccs: If true copy only CCS metadata * * Copies the contents of @src to @dst: On flat CCS devices, * the CCS metadata is copied as well if needed, or if not present, @@ -643,7 +640,8 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, struct xe_bo *src_bo, struct xe_bo *dst_bo, struct ttm_resource *src, - struct ttm_resource *dst) + struct ttm_resource *dst, + bool copy_only_ccs) { struct xe_gt *gt = m->tile->primary_gt; struct xe_device *xe = gt_to_xe(gt); @@ -655,6 +653,8 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, u64 src_L0, dst_L0; int pass = 0; int err; + bool src_is_pltt = src->mem_type == XE_PL_TT; + bool dst_is_pltt = dst->mem_type == XE_PL_TT; bool src_is_vram = mem_type_is_vram(src->mem_type); bool dst_is_vram = mem_type_is_vram(dst->mem_type); bool copy_ccs = xe_device_has_flat_ccs(xe) && @@ -719,8 +719,8 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, } /* Add copy commands size here */ - batch_size += EMIT_COPY_DW + - (xe_device_has_flat_ccs(xe) ? EMIT_COPY_CCS_DW : 0); + batch_size += ((copy_only_ccs) ? 0 : EMIT_COPY_DW) + + ((xe_device_has_flat_ccs(xe) ? EMIT_COPY_CCS_DW : 0)); bb = xe_bb_new(gt, batch_size, usm); if (IS_ERR(bb)) { @@ -746,10 +746,13 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, bb->cs[bb->len++] = MI_BATCH_BUFFER_END; update_idx = bb->len; - emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0, - XE_PAGE_SIZE); - flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs, src_is_vram, - dst_L0_ofs, dst_is_vram, + if (!copy_only_ccs) + emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0, XE_PAGE_SIZE); + + flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs, + IS_DGFX(xe) ? src_is_vram : src_is_pltt, + dst_L0_ofs, + IS_DGFX(xe) ? dst_is_vram : dst_is_pltt, src_L0, ccs_ofs, copy_ccs); mutex_lock(&m->job_mutex); @@ -922,6 +925,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, bool clear_vram = mem_type_is_vram(dst->mem_type); struct xe_gt *gt = m->tile->primary_gt; struct xe_device *xe = gt_to_xe(gt); + bool clear_system_ccs = (xe_bo_needs_ccs_pages(bo) && !IS_DGFX(xe)) ? true : false; struct dma_fence *fence = NULL; u64 size = bo->size; struct xe_res_cursor src_it; @@ -954,9 +958,10 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, batch_size = 2 + pte_update_size(m, clear_vram, src, &src_it, &clear_L0, &clear_L0_ofs, &clear_L0_pt, - emit_clear_cmd_len(gt), 0, + clear_system_ccs ? 0 : emit_clear_cmd_len(gt), 0, avail_pts); - if (xe_device_has_flat_ccs(xe) && clear_vram) + + if (xe_device_has_flat_ccs(xe)) batch_size += EMIT_COPY_CCS_DW; /* Clear commands */ @@ -971,7 +976,6 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, } size -= clear_L0; - /* Preemption is enabled again by the ring ops. */ if (!clear_vram) { emit_pte(m, bb, clear_L0_pt, clear_vram, true, &src_it, clear_L0, @@ -982,9 +986,10 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, bb->cs[bb->len++] = MI_BATCH_BUFFER_END; update_idx = bb->len; - emit_clear(gt, bb, clear_L0_ofs, clear_L0, XE_PAGE_SIZE, - clear_vram); - if (xe_device_has_flat_ccs(xe) && clear_vram) { + if (!clear_system_ccs) + emit_clear(gt, bb, clear_L0_ofs, clear_L0, XE_PAGE_SIZE, clear_vram); + + if (xe_device_has_flat_ccs(xe)) { emit_copy_ccs(gt, bb, clear_L0_ofs, true, m->cleared_mem_ofs, false, clear_L0); flush_flags = MI_FLUSH_DW_CCS; @@ -1041,6 +1046,9 @@ err_sync: return ERR_PTR(err); } + if (clear_system_ccs) + bo->ccs_cleared = true; + return fence; } diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h index c729241776ad..951f19318ea4 100644 --- a/drivers/gpu/drm/xe/xe_migrate.h +++ b/drivers/gpu/drm/xe/xe_migrate.h @@ -85,7 +85,8 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, struct xe_bo *src_bo, struct xe_bo *dst_bo, struct ttm_resource *src, - struct ttm_resource *dst); + struct ttm_resource *dst, + bool copy_only_ccs); struct dma_fence *xe_migrate_clear(struct xe_migrate *m, struct xe_bo *bo, -- cgit v1.2.3 From d6abc18d66932adb163803f9c83a5fa90ca63ff4 Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Tue, 12 Dec 2023 23:55:32 +0530 Subject: drm/xe/xe2: Modify xe_bo_test for system memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Modify test to valid ccs clear and copy during evict/restore on igfx. v2: -Vram is associated with tiles not with gt. Use tile based iterator for ccs_test_run_gt. (Matt) Cc: Matt Roper Cc: Thomas Hellström Signed-off-by: Himal Prasad Ghimiray Reviewed-by: Thomas Hellström Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/tests/xe_bo.c | 44 ++++++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 17 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index 549ab343de80..412b2e7ce40c 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -14,7 +14,7 @@ #include "xe_pci.h" #include "xe_pm.h" -static int ccs_test_migrate(struct xe_gt *gt, struct xe_bo *bo, +static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo, bool clear, u64 get_val, u64 assign_val, struct kunit *test) { @@ -36,7 +36,7 @@ static int ccs_test_migrate(struct xe_gt *gt, struct xe_bo *bo, /* Optionally clear bo *and* CCS data in VRAM. */ if (clear) { - fence = xe_migrate_clear(gt_to_tile(gt)->migrate, bo, bo->ttm.resource); + fence = xe_migrate_clear(tile->migrate, bo, bo->ttm.resource); if (IS_ERR(fence)) { KUNIT_FAIL(test, "Failed to submit bo clear.\n"); return PTR_ERR(fence); @@ -91,7 +91,7 @@ static int ccs_test_migrate(struct xe_gt *gt, struct xe_bo *bo, } /* Check last CCS value, or at least last value in page. */ - offset = xe_device_ccs_bytes(gt_to_xe(gt), bo->size); + offset = xe_device_ccs_bytes(tile_to_xe(tile), bo->size); offset = min_t(u32, offset, PAGE_SIZE) / sizeof(u64) - 1; if (cpu_map[offset] != get_val) { KUNIT_FAIL(test, @@ -108,39 +108,45 @@ static int ccs_test_migrate(struct xe_gt *gt, struct xe_bo *bo, return ret; } -static void ccs_test_run_gt(struct xe_device *xe, struct xe_gt *gt, - struct kunit *test) +static void ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile, + struct kunit *test) { struct xe_bo *bo; - u32 vram_bit; + int ret; /* TODO: Sanity check */ - vram_bit = XE_BO_CREATE_VRAM0_BIT << gt_to_tile(gt)->id; - kunit_info(test, "Testing gt id %u vram id %u\n", gt->info.id, - gt_to_tile(gt)->id); + unsigned int bo_flags = XE_BO_CREATE_VRAM_IF_DGFX(tile); + + if (IS_DGFX(xe)) + kunit_info(test, "Testing vram id %u\n", tile->id); + else + kunit_info(test, "Testing system memory\n"); + + bo = xe_bo_create_user(xe, NULL, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC, + ttm_bo_type_device, bo_flags); + + xe_bo_lock(bo, false); - bo = xe_bo_create_locked(xe, NULL, NULL, SZ_1M, ttm_bo_type_device, - vram_bit); if (IS_ERR(bo)) { KUNIT_FAIL(test, "Failed to create bo.\n"); return; } kunit_info(test, "Verifying that CCS data is cleared on creation.\n"); - ret = ccs_test_migrate(gt, bo, false, 0ULL, 0xdeadbeefdeadbeefULL, + ret = ccs_test_migrate(tile, bo, false, 0ULL, 0xdeadbeefdeadbeefULL, test); if (ret) goto out_unlock; kunit_info(test, "Verifying that CCS data survives migration.\n"); - ret = ccs_test_migrate(gt, bo, false, 0xdeadbeefdeadbeefULL, + ret = ccs_test_migrate(tile, bo, false, 0xdeadbeefdeadbeefULL, 0xdeadbeefdeadbeefULL, test); if (ret) goto out_unlock; kunit_info(test, "Verifying that CCS data can be properly cleared.\n"); - ret = ccs_test_migrate(gt, bo, true, 0ULL, 0ULL, test); + ret = ccs_test_migrate(tile, bo, true, 0ULL, 0ULL, test); out_unlock: xe_bo_unlock(bo); @@ -150,7 +156,7 @@ out_unlock: static int ccs_test_run_device(struct xe_device *xe) { struct kunit *test = xe_cur_kunit(); - struct xe_gt *gt; + struct xe_tile *tile; int id; if (!xe_device_has_flat_ccs(xe)) { @@ -160,8 +166,12 @@ static int ccs_test_run_device(struct xe_device *xe) xe_device_mem_access_get(xe); - for_each_gt(gt, xe, id) - ccs_test_run_gt(xe, gt, test); + for_each_tile(tile, xe, id) { + /* For igfx run only for primary tile */ + if (!IS_DGFX(xe) && id > 0) + continue; + ccs_test_run_tile(xe, tile, test); + } xe_device_mem_access_put(xe); -- cgit v1.2.3 From cbdc52c11c9b1df40ade23f622abc3466e4ee96c Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Tue, 12 Dec 2023 23:55:33 +0530 Subject: drm/xe/xe2: Support flat ccs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enable flat ccs for XE2_GFX_FEATURES. Cc: Thomas Hellström Signed-off-by: Himal Prasad Ghimiray Reviewed-by: Thomas Hellström Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 34dcc743e2c7..88fb6c54ef99 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -164,7 +164,7 @@ static const struct xe_graphics_desc graphics_xelpg = { #define XE2_GFX_FEATURES \ .dma_mask_size = 46, \ .has_asid = 1, \ - .has_flat_ccs = 0 /* FIXME: implementation missing */, \ + .has_flat_ccs = 1, \ .has_range_tlb_invalidation = 1, \ .has_usm = 0 /* FIXME: implementation missing */, \ .va_bits = 48, \ -- cgit v1.2.3 From bc17ec0b201ec7b8576576aa0785787671b4afe7 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 14 Dec 2023 10:47:01 -0800 Subject: drm/xe: Drop "_REG" suffix from CSFE_CHICKEN1 We don't use this suffix on any other registers, and it isn't part of the register's official name either, so drop it for consistency. While at it, move the register definition slightly so that it isn't separating RING_CMD_CCTL's definition from its fields. Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20231214184659.2249559-11-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_engine_regs.h | 9 ++++----- drivers/gpu/drm/xe/xe_wa.c | 2 +- 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index 1a857c4edcf5..67da19f9836f 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -46,11 +46,6 @@ #define RING_ESR(base) XE_REG((base) + 0xb8) #define RING_CMD_CCTL(base) XE_REG((base) + 0xc4, XE_REG_OPTION_MASKED) - -#define CSFE_CHICKEN1_REG(base) XE_REG((base) + 0xd4, XE_REG_OPTION_MASKED) -#define GHWSP_CSB_REPORT_DIS REG_BIT(15) -#define PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS REG_BIT(14) - /* * CMD_CCTL read/write fields take a MOCS value and _not_ a table index. * The lsb of each can be considered a separate enabling bit for encryption. @@ -61,6 +56,10 @@ #define CMD_CCTL_WRITE_OVERRIDE_MASK REG_GENMASK(13, 8) #define CMD_CCTL_READ_OVERRIDE_MASK REG_GENMASK(6, 1) +#define CSFE_CHICKEN1(base) XE_REG((base) + 0xd4, XE_REG_OPTION_MASKED) +#define GHWSP_CSB_REPORT_DIS REG_BIT(15) +#define PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS REG_BIT(14) + #define RING_BBADDR(base) XE_REG((base) + 0x140) #define RING_BBADDR_UDW(base) XE_REG((base) + 0x168) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 23f1285135b8..12829748bb6c 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -605,7 +605,7 @@ static const struct xe_rtp_entry_sr engine_was[] = { */ { XE_RTP_NAME("18032095049, 16021639441"), XE_RTP_RULES(GRAPHICS_VERSION(2004)), - XE_RTP_ACTIONS(SET(CSFE_CHICKEN1_REG(0), + XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), GHWSP_CSB_REPORT_DIS | PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS, XE_RTP_ACTION_FLAG(ENGINE_BASE))) -- cgit v1.2.3 From 5ea7fe65fb1cf95d9b48fcc3c7c806ce417357c2 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 14 Dec 2023 10:47:02 -0800 Subject: drm/xe: Move some per-engine register definitions to the engine header Although we only work with the RCS instances today, the FF_SLICE_CS_CHICKEN1[1,2] CS_DEBUG_MODE1, CS_CHICKEN1, and FF_THREAD_MODE registers all have instances on both the RCS and CCS engines. Convert these to parameterized macros and move them to the engine register header. Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20231214184659.2249559-12-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_engine_regs.h | 21 +++++++++++++++++++++ drivers/gpu/drm/xe/regs/xe_gt_regs.h | 18 ------------------ drivers/gpu/drm/xe/regs/xe_regs.h | 3 --- drivers/gpu/drm/xe/xe_wa.c | 16 +++++++++------- 4 files changed, 30 insertions(+), 28 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index 67da19f9836f..e109ef912706 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -39,6 +39,9 @@ #define RING_MI_MODE(base) XE_REG((base) + 0x9c) #define RING_NOPID(base) XE_REG((base) + 0x94) +#define FF_THREAD_MODE(base) XE_REG((base) + 0xa0) +#define FF_TESSELATION_DOP_GATE_DISABLE BIT(19) + #define RING_IMR(base) XE_REG((base) + 0xa8) #define RING_EIR(base) XE_REG((base) + 0xb0) @@ -60,6 +63,16 @@ #define GHWSP_CSB_REPORT_DIS REG_BIT(15) #define PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS REG_BIT(14) +#define FF_SLICE_CS_CHICKEN1(base) XE_REG((base) + 0xe0, XE_REG_OPTION_MASKED) +#define FFSC_PERCTX_PREEMPT_CTRL REG_BIT(14) + +#define FF_SLICE_CS_CHICKEN2(base) XE_REG((base) + 0xe4, XE_REG_OPTION_MASKED) +#define PERF_FIX_BALANCING_CFE_DISABLE REG_BIT(15) + +#define CS_DEBUG_MODE1(base) XE_REG((base) + 0xec, XE_REG_OPTION_MASKED) +#define FF_DOP_CLOCK_GATE_DISABLE REG_BIT(1) +#define REPLAY_MODE_GRANULARITY REG_BIT(0) + #define RING_BBADDR(base) XE_REG((base) + 0x140) #define RING_BBADDR_UDW(base) XE_REG((base) + 0x168) @@ -115,6 +128,14 @@ #define RING_EXECLIST_CONTROL(base) XE_REG((base) + 0x550) #define EL_CTRL_LOAD REG_BIT(0) +#define CS_CHICKEN1(base) XE_REG((base) + 0x580, XE_REG_OPTION_MASKED) +#define PREEMPT_GPGPU_LEVEL(hi, lo) (((hi) << 2) | ((lo) << 1)) +#define PREEMPT_GPGPU_MID_THREAD_LEVEL PREEMPT_GPGPU_LEVEL(0, 0) +#define PREEMPT_GPGPU_THREAD_GROUP_LEVEL PREEMPT_GPGPU_LEVEL(0, 1) +#define PREEMPT_GPGPU_COMMAND_LEVEL PREEMPT_GPGPU_LEVEL(1, 0) +#define PREEMPT_GPGPU_LEVEL_MASK PREEMPT_GPGPU_LEVEL(1, 1) +#define PREEMPT_3D_OBJECT_LEVEL REG_BIT(0) + #define VDBOX_CGCTL3F08(base) XE_REG((base) + 0x3f08) #define CG3DDISHRS_CLKGATE_DIS REG_BIT(5) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index f5bf4c6d1761..4448507ef4ca 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -60,26 +60,8 @@ #define MTL_MCR_GROUPID REG_GENMASK(11, 8) #define MTL_MCR_INSTANCEID REG_GENMASK(3, 0) -#define FF_SLICE_CS_CHICKEN1 XE_REG(0x20e0, XE_REG_OPTION_MASKED) -#define FFSC_PERCTX_PREEMPT_CTRL REG_BIT(14) - -#define FF_SLICE_CS_CHICKEN2 XE_REG(0x20e4, XE_REG_OPTION_MASKED) -#define PERF_FIX_BALANCING_CFE_DISABLE REG_BIT(15) - -#define CS_DEBUG_MODE1 XE_REG(0x20ec, XE_REG_OPTION_MASKED) -#define FF_DOP_CLOCK_GATE_DISABLE REG_BIT(1) -#define REPLAY_MODE_GRANULARITY REG_BIT(0) - #define PS_INVOCATION_COUNT XE_REG(0x2348) -#define CS_CHICKEN1 XE_REG(0x2580, XE_REG_OPTION_MASKED) -#define PREEMPT_GPGPU_LEVEL(hi, lo) (((hi) << 2) | ((lo) << 1)) -#define PREEMPT_GPGPU_MID_THREAD_LEVEL PREEMPT_GPGPU_LEVEL(0, 0) -#define PREEMPT_GPGPU_THREAD_GROUP_LEVEL PREEMPT_GPGPU_LEVEL(0, 1) -#define PREEMPT_GPGPU_COMMAND_LEVEL PREEMPT_GPGPU_LEVEL(1, 0) -#define PREEMPT_GPGPU_LEVEL_MASK PREEMPT_GPGPU_LEVEL(1, 1) -#define PREEMPT_3D_OBJECT_LEVEL REG_BIT(0) - #define XELP_GLOBAL_MOCS(i) XE_REG(0x4000 + (i) * 4) #define XEHP_GLOBAL_MOCS(i) XE_REG_MCR(0x4000 + (i) * 4) #define CCS_AUX_INV XE_REG(0x4208) diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h index ec9372aa739f..4ac71b605487 100644 --- a/drivers/gpu/drm/xe/regs/xe_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_regs.h @@ -45,9 +45,6 @@ #define GT_CS_MASTER_ERROR_INTERRUPT REG_BIT(3) #define GT_RENDER_USER_INTERRUPT REG_BIT(0) -#define FF_THREAD_MODE XE_REG(0x20a0) -#define FF_TESSELATION_DOP_GATE_DISABLE BIT(19) - #define TIMESTAMP_OVERRIDE XE_REG(0x44074) #define TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK REG_GENMASK(15, 12) #define TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK REG_GENMASK(9, 0) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 12829748bb6c..5f61dd87c586 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -299,7 +299,7 @@ static const struct xe_rtp_entry_sr gt_was[] = { static const struct xe_rtp_entry_sr engine_was[] = { { XE_RTP_NAME("22010931296, 18011464164, 14010919138"), XE_RTP_RULES(GRAPHICS_VERSION(1200), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(FF_THREAD_MODE, + XE_RTP_ACTIONS(SET(FF_THREAD_MODE(RENDER_RING_BASE), FF_TESSELATION_DOP_GATE_DISABLE)) }, { XE_RTP_NAME("1409804808"), @@ -320,7 +320,8 @@ static const struct xe_rtp_entry_sr engine_was[] = { }, { XE_RTP_NAME("14010826681, 1606700617, 22010271021, 18019627453"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1255), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE)) + XE_RTP_ACTIONS(SET(CS_DEBUG_MODE1(RENDER_RING_BASE), + FF_DOP_CLOCK_GATE_DISABLE)) }, { XE_RTP_NAME("1406941453"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), @@ -328,7 +329,7 @@ static const struct xe_rtp_entry_sr engine_was[] = { }, { XE_RTP_NAME("FtrPerCtxtPreemptionGranularityControl"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1250), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN1, + XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN1(RENDER_RING_BASE), FFSC_PERCTX_PREEMPT_CTRL)) }, @@ -419,7 +420,7 @@ static const struct xe_rtp_entry_sr engine_was[] = { { XE_RTP_NAME("16015675438"), XE_RTP_RULES(PLATFORM(DG2), FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN2, + XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN2(RENDER_RING_BASE), PERF_FIX_BALANCING_CFE_DISABLE)) }, { XE_RTP_NAME("18028616096"), @@ -481,7 +482,7 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(B0, C0), ENGINE_CLASS(RENDER), FUNC(xe_rtp_match_first_gslice_fused_off)), - XE_RTP_ACTIONS(CLR(CS_DEBUG_MODE1, + XE_RTP_ACTIONS(CLR(CS_DEBUG_MODE1(RENDER_RING_BASE), REPLAY_MODE_GRANULARITY)) }, { XE_RTP_NAME("22010960976, 14013347512"), @@ -540,7 +541,8 @@ static const struct xe_rtp_entry_sr engine_was[] = { }, { XE_RTP_NAME("16015675438"), XE_RTP_RULES(PLATFORM(PVC), FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN2, PERF_FIX_BALANCING_CFE_DISABLE)) + XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN2(RENDER_RING_BASE), + PERF_FIX_BALANCING_CFE_DISABLE)) }, { XE_RTP_NAME("14014999345"), XE_RTP_RULES(PLATFORM(PVC), ENGINE_CLASS(COMPUTE), @@ -622,7 +624,7 @@ static const struct xe_rtp_entry_sr lrc_was[] = { }, { XE_RTP_NAME("WaDisableGPGPUMidThreadPreemption"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)), - XE_RTP_ACTIONS(FIELD_SET(CS_CHICKEN1, + XE_RTP_ACTIONS(FIELD_SET(CS_CHICKEN1(RENDER_RING_BASE), PREEMPT_GPGPU_LEVEL_MASK, PREEMPT_GPGPU_THREAD_GROUP_LEVEL)) }, -- cgit v1.2.3 From 68df8642ea34bf313757b671f57a4d123458c3f8 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 14 Dec 2023 10:47:03 -0800 Subject: drm/xe: Fix whitespace in register definitions Our register headers use tabs to align the definition values. Convert a few definitions that were using spaces instead. Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20231214184659.2249559-13-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_engine_regs.h | 4 ++-- drivers/gpu/drm/xe/regs/xe_regs.h | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index e109ef912706..7f82bef3a0db 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -136,8 +136,8 @@ #define PREEMPT_GPGPU_LEVEL_MASK PREEMPT_GPGPU_LEVEL(1, 1) #define PREEMPT_3D_OBJECT_LEVEL REG_BIT(0) -#define VDBOX_CGCTL3F08(base) XE_REG((base) + 0x3f08) -#define CG3DDISHRS_CLKGATE_DIS REG_BIT(5) +#define VDBOX_CGCTL3F08(base) XE_REG((base) + 0x3f08) +#define CG3DDISHRS_CLKGATE_DIS REG_BIT(5) #define VDBOX_CGCTL3F10(base) XE_REG((base) + 0x3f10) #define IECPUNIT_CLKGATE_DIS REG_BIT(22) diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h index 4ac71b605487..4b427ec8cbff 100644 --- a/drivers/gpu/drm/xe/regs/xe_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_regs.h @@ -34,9 +34,9 @@ #define XEHPC_BCS7_RING_BASE 0x3ec000 #define XEHPC_BCS8_RING_BASE 0x3ee000 -#define DG1_GSC_HECI2_BASE 0x00259000 -#define PVC_GSC_HECI2_BASE 0x00285000 -#define DG2_GSC_HECI2_BASE 0x00374000 +#define DG1_GSC_HECI2_BASE 0x00259000 +#define PVC_GSC_HECI2_BASE 0x00285000 +#define DG2_GSC_HECI2_BASE 0x00374000 #define GSCCS_RING_BASE 0x11a000 #define GT_WAIT_SEMAPHORE_INTERRUPT REG_BIT(11) -- cgit v1.2.3 From f52e4e9065786dd20477879d834c5c33a3ae9498 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 14 Dec 2023 10:47:04 -0800 Subject: drm/xe: Move engine base offsets to engine register header These offsets are primarily used as parameters for the engine register definitions, so it makes more sense to define them in the engine header rather than the general register header. Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20231214184659.2249559-14-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_engine_regs.h | 33 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/regs/xe_regs.h | 28 --------------------------- 2 files changed, 33 insertions(+), 28 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index 7f82bef3a0db..5592774fc690 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -10,6 +10,39 @@ #include "regs/xe_reg_defs.h" +/* + * These *_BASE values represent the MMIO offset where each hardware engine's + * registers start. The other definitions in this header are parameterized + * macros that will take one of these values as a parameter. + */ +#define RENDER_RING_BASE 0x02000 +#define BSD_RING_BASE 0x1c0000 +#define BSD2_RING_BASE 0x1c4000 +#define BSD3_RING_BASE 0x1d0000 +#define BSD4_RING_BASE 0x1d4000 +#define XEHP_BSD5_RING_BASE 0x1e0000 +#define XEHP_BSD6_RING_BASE 0x1e4000 +#define XEHP_BSD7_RING_BASE 0x1f0000 +#define XEHP_BSD8_RING_BASE 0x1f4000 +#define VEBOX_RING_BASE 0x1c8000 +#define VEBOX2_RING_BASE 0x1d8000 +#define XEHP_VEBOX3_RING_BASE 0x1e8000 +#define XEHP_VEBOX4_RING_BASE 0x1f8000 +#define COMPUTE0_RING_BASE 0x1a000 +#define COMPUTE1_RING_BASE 0x1c000 +#define COMPUTE2_RING_BASE 0x1e000 +#define COMPUTE3_RING_BASE 0x26000 +#define BLT_RING_BASE 0x22000 +#define XEHPC_BCS1_RING_BASE 0x3e0000 +#define XEHPC_BCS2_RING_BASE 0x3e2000 +#define XEHPC_BCS3_RING_BASE 0x3e4000 +#define XEHPC_BCS4_RING_BASE 0x3e6000 +#define XEHPC_BCS5_RING_BASE 0x3e8000 +#define XEHPC_BCS6_RING_BASE 0x3ea000 +#define XEHPC_BCS7_RING_BASE 0x3ec000 +#define XEHPC_BCS8_RING_BASE 0x3ee000 +#define GSCCS_RING_BASE 0x11a000 + #define RING_TAIL(base) XE_REG((base) + 0x30) #define RING_HEAD(base) XE_REG((base) + 0x34) diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h index 4b427ec8cbff..b7d3b42ec003 100644 --- a/drivers/gpu/drm/xe/regs/xe_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_regs.h @@ -7,38 +7,10 @@ #include "regs/xe_reg_defs.h" -#define RENDER_RING_BASE 0x02000 -#define BSD_RING_BASE 0x1c0000 -#define BSD2_RING_BASE 0x1c4000 -#define BSD3_RING_BASE 0x1d0000 -#define BSD4_RING_BASE 0x1d4000 -#define XEHP_BSD5_RING_BASE 0x1e0000 -#define XEHP_BSD6_RING_BASE 0x1e4000 -#define XEHP_BSD7_RING_BASE 0x1f0000 -#define XEHP_BSD8_RING_BASE 0x1f4000 -#define VEBOX_RING_BASE 0x1c8000 -#define VEBOX2_RING_BASE 0x1d8000 -#define XEHP_VEBOX3_RING_BASE 0x1e8000 -#define XEHP_VEBOX4_RING_BASE 0x1f8000 -#define COMPUTE0_RING_BASE 0x1a000 -#define COMPUTE1_RING_BASE 0x1c000 -#define COMPUTE2_RING_BASE 0x1e000 -#define COMPUTE3_RING_BASE 0x26000 -#define BLT_RING_BASE 0x22000 -#define XEHPC_BCS1_RING_BASE 0x3e0000 -#define XEHPC_BCS2_RING_BASE 0x3e2000 -#define XEHPC_BCS3_RING_BASE 0x3e4000 -#define XEHPC_BCS4_RING_BASE 0x3e6000 -#define XEHPC_BCS5_RING_BASE 0x3e8000 -#define XEHPC_BCS6_RING_BASE 0x3ea000 -#define XEHPC_BCS7_RING_BASE 0x3ec000 -#define XEHPC_BCS8_RING_BASE 0x3ee000 - #define DG1_GSC_HECI2_BASE 0x00259000 #define PVC_GSC_HECI2_BASE 0x00285000 #define DG2_GSC_HECI2_BASE 0x00374000 -#define GSCCS_RING_BASE 0x11a000 #define GT_WAIT_SEMAPHORE_INTERRUPT REG_BIT(11) #define GT_CONTEXT_SWITCH_INTERRUPT REG_BIT(8) #define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT REG_BIT(4) -- cgit v1.2.3 From 48e70d2a1a9c8d58c48b2840feda3aa3bc330a94 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 14 Dec 2023 10:47:05 -0800 Subject: drm/xe: Move GSC HECI base offsets out of register header These offsets are only used to setup the auxiliary device BAR information and are never used for driver read/write operations. Move them to the GSC HECI file where they're actually used. Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20231214184659.2249559-15-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_regs.h | 4 ---- drivers/gpu/drm/xe/xe_heci_gsc.c | 4 ++++ 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h index b7d3b42ec003..67ce087e21d0 100644 --- a/drivers/gpu/drm/xe/regs/xe_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_regs.h @@ -7,10 +7,6 @@ #include "regs/xe_reg_defs.h" -#define DG1_GSC_HECI2_BASE 0x00259000 -#define PVC_GSC_HECI2_BASE 0x00285000 -#define DG2_GSC_HECI2_BASE 0x00374000 - #define GT_WAIT_SEMAPHORE_INTERRUPT REG_BIT(11) #define GT_CONTEXT_SWITCH_INTERRUPT REG_BIT(8) #define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT REG_BIT(4) diff --git a/drivers/gpu/drm/xe/xe_heci_gsc.c b/drivers/gpu/drm/xe/xe_heci_gsc.c index d8e982e3d9a2..19eda00d5cc4 100644 --- a/drivers/gpu/drm/xe/xe_heci_gsc.c +++ b/drivers/gpu/drm/xe/xe_heci_gsc.c @@ -16,6 +16,10 @@ #define GSC_BAR_LENGTH 0x00000FFC +#define DG1_GSC_HECI2_BASE 0x259000 +#define PVC_GSC_HECI2_BASE 0x285000 +#define DG2_GSC_HECI2_BASE 0x374000 + static void heci_gsc_irq_mask(struct irq_data *d) { /* generic irq handling */ -- cgit v1.2.3 From 93536c2bcfb2c3c5e9b53c83f333f57d9b632e83 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 14 Dec 2023 10:47:06 -0800 Subject: drm/xe: Define interrupt vector bits with the interrupt registers The bit definitions had become a bit orphaned; move them to the same location as the interrupt registers that they're used with. Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20231214184659.2249559-16-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 5 +++++ drivers/gpu/drm/xe/regs/xe_regs.h | 6 ------ drivers/gpu/drm/xe/xe_hw_engine.c | 1 - 3 files changed, 5 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 4448507ef4ca..2c48de2076a6 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -456,6 +456,11 @@ #define XEHPC_BCS3_BCS4_INTR_MASK XE_REG(0x190114) #define XEHPC_BCS5_BCS6_INTR_MASK XE_REG(0x190118) #define XEHPC_BCS7_BCS8_INTR_MASK XE_REG(0x19011c) +#define GT_WAIT_SEMAPHORE_INTERRUPT REG_BIT(11) +#define GT_CONTEXT_SWITCH_INTERRUPT REG_BIT(8) +#define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT REG_BIT(4) +#define GT_CS_MASTER_ERROR_INTERRUPT REG_BIT(3) +#define GT_RENDER_USER_INTERRUPT REG_BIT(0) #define PVC_GT0_PACKAGE_ENERGY_STATUS XE_REG(0x281004) #define PVC_GT0_PACKAGE_RAPL_LIMIT XE_REG(0x281008) diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h index 67ce087e21d0..2c214bb9b671 100644 --- a/drivers/gpu/drm/xe/regs/xe_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_regs.h @@ -7,12 +7,6 @@ #include "regs/xe_reg_defs.h" -#define GT_WAIT_SEMAPHORE_INTERRUPT REG_BIT(11) -#define GT_CONTEXT_SWITCH_INTERRUPT REG_BIT(8) -#define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT REG_BIT(4) -#define GT_CS_MASTER_ERROR_INTERRUPT REG_BIT(3) -#define GT_RENDER_USER_INTERRUPT REG_BIT(0) - #define TIMESTAMP_OVERRIDE XE_REG(0x44074) #define TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK REG_GENMASK(15, 12) #define TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK REG_GENMASK(9, 0) diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 86b863b99065..1fa5cf5eea97 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -9,7 +9,6 @@ #include "regs/xe_engine_regs.h" #include "regs/xe_gt_regs.h" -#include "regs/xe_regs.h" #include "xe_assert.h" #include "xe_bo.h" #include "xe_device.h" -- cgit v1.2.3 From aaa536a8877e61104ccb5ba5287beaa4e959539e Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 14 Dec 2023 10:47:07 -0800 Subject: drm/xe: Re-sort GT register header Keeping the register definitions sorted will make it easy to find existing definitions and prevent accidental introduction of duplicate definitions. v2: - Reorder FUSE3/FUSE4 registers and move GT0_PERF_LIMIT_REASONS / MTL_MEDIA_PERF_LIMIT_REASONS to proper places. (Lucas) Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20231214184659.2249559-17-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 76 ++++++++++++++++++------------------ 1 file changed, 38 insertions(+), 38 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 2c48de2076a6..d7f52a634c11 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -42,11 +42,6 @@ #define FORCEWAKE_ACK_GSC XE_REG(0xdf8) #define FORCEWAKE_ACK_GT_MTL XE_REG(0xdfc) -/* L3 Cache Control */ -#define XELP_LNCFCMOCS(i) XE_REG(0xb020 + (i) * 4) -#define XEHP_LNCFCMOCS(i) XE_REG_MCR(0xb020 + (i) * 4) -#define LNCFCMOCS_REG_COUNT 32 - #define MCFG_MCR_SELECTOR XE_REG(0xfd0) #define MTL_MCR_SELECTOR XE_REG(0xfd4) #define SF_MCR_SELECTOR XE_REG(0xfd8) @@ -102,6 +97,12 @@ #define CACHE_MODE_1 XE_REG(0x7004, XE_REG_OPTION_MASKED) #define MSAA_OPTIMIZATION_REDUC_DISABLE REG_BIT(11) +#define COMMON_SLICE_CHICKEN1 XE_REG(0x7010) + +#define HIZ_CHICKEN XE_REG(0x7018, XE_REG_OPTION_MASKED) +#define DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE REG_BIT(14) +#define HZ_DEPTH_TEST_LE_GE_OPT_DISABLE REG_BIT(13) + #define XEHP_PSS_MODE2 XE_REG_MCR(0x703c, XE_REG_OPTION_MASKED) #define SCOREBOARD_STALL_FLUSH_CONTROL REG_BIT(5) @@ -109,12 +110,6 @@ #define FLSH_IGNORES_PSD REG_BIT(10) #define FD_END_COLLECT REG_BIT(5) -#define HIZ_CHICKEN XE_REG(0x7018, XE_REG_OPTION_MASKED) -#define DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE REG_BIT(14) -#define HZ_DEPTH_TEST_LE_GE_OPT_DISABLE REG_BIT(13) - -#define COMMON_SLICE_CHICKEN1 XE_REG(0x7010) - #define COMMON_SLICE_CHICKEN4 XE_REG(0x7300, XE_REG_OPTION_MASKED) #define DISABLE_TDC_LOAD_BALANCING_CALC REG_BIT(6) @@ -149,6 +144,11 @@ #define GSCPSMI_BASE XE_REG(0x880c) +/* Fuse readout registers for GT */ +#define XEHP_FUSE4 XE_REG(0x9114) +#define CCS_EN_MASK REG_GENMASK(19, 16) +#define GT_L3_EXC_MASK REG_GENMASK(6, 4) + #define MIRROR_FUSE3 XE_REG(0x9118) #define XE2_NODE_ENABLE_MASK REG_GENMASK(31, 16) #define L3BANK_PAIR_COUNT 4 @@ -157,18 +157,14 @@ #define MAX_MSLICES 4 #define MEML3_EN_MASK REG_GENMASK(3, 0) -/* Fuse readout registers for GT */ -#define XEHP_FUSE4 XE_REG(0x9114) -#define CCS_EN_MASK REG_GENMASK(19, 16) -#define GT_L3_EXC_MASK REG_GENMASK(6, 4) +#define XELP_EU_ENABLE XE_REG(0x9134) /* "_DISABLE" on Xe_LP */ +#define XELP_EU_MASK REG_GENMASK(7, 0) +#define XELP_GT_GEOMETRY_DSS_ENABLE XE_REG(0x913c) #define GT_VEBOX_VDBOX_DISABLE XE_REG(0x9140) #define GT_VEBOX_DISABLE_MASK REG_GENMASK(19, 16) #define GT_VDBOX_DISABLE_MASK REG_GENMASK(7, 0) -#define XELP_EU_ENABLE XE_REG(0x9134) /* "_DISABLE" on Xe_LP */ -#define XELP_EU_MASK REG_GENMASK(7, 0) -#define XELP_GT_GEOMETRY_DSS_ENABLE XE_REG(0x913c) #define XEHP_GT_COMPUTE_DSS_ENABLE XE_REG(0x9144) #define XEHPC_GT_COMPUTE_DSS_ENABLE_EXT XE_REG(0x9148) #define XE2_GT_COMPUTE_DSS_2 XE_REG(0x914c) @@ -284,6 +280,11 @@ #define XEHPC_LNCFMISCCFGREG0 XE_REG_MCR(0xb01c, XE_REG_OPTION_MASKED) #define XEHPC_OVRLSCCC REG_BIT(0) +/* L3 Cache Control */ +#define XELP_LNCFCMOCS(i) XE_REG(0xb020 + (i) * 4) +#define XEHP_LNCFCMOCS(i) XE_REG_MCR(0xb020 + (i) * 4) +#define LNCFCMOCS_REG_COUNT 32 + #define XEHP_L3NODEARBCFG XE_REG_MCR(0xb0b4) #define XEHP_LNESPARE REG_BIT(19) @@ -360,13 +361,13 @@ #define PUSH_CONST_DEREF_HOLD_DIS REG_BIT(8) #define DISABLE_DOP_GATING REG_BIT(0) +#define RT_CTRL XE_REG_MCR(0xe530) +#define DIS_NULL_QUERY REG_BIT(10) + #define XEHP_HDC_CHICKEN0 XE_REG_MCR(0xe5f0, XE_REG_OPTION_MASKED) #define LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK REG_GENMASK(13, 11) #define DIS_ATOMIC_CHAINING_TYPED_WRITES REG_BIT(3) -#define RT_CTRL XE_REG_MCR(0xe530) -#define DIS_NULL_QUERY REG_BIT(10) - #define LSC_CHICKEN_BIT_0 XE_REG_MCR(0xe7c8) #define DISABLE_D8_D16_COASLESCE REG_BIT(30) #define TGM_WRITE_EOM_FORCE REG_BIT(17) @@ -408,6 +409,7 @@ #define FORCEWAKE_USER BIT(1) #define FORCEWAKE_KERNEL_FALLBACK BIT(15) +#define MTL_MEDIA_PERF_LIMIT_REASONS XE_REG(0x138030) #define MTL_MEDIA_MC6 XE_REG(0x138048) #define GT_CORE_STATUS XE_REG(0x138060) @@ -418,16 +420,30 @@ #define GT_GFX_RC6_LOCKED XE_REG(0x138104) #define GT_GFX_RC6 XE_REG(0x138108) +#define GT0_PERF_LIMIT_REASONS XE_REG(0x1381a8) +#define GT0_PERF_LIMIT_REASONS_MASK 0xde3 +#define PROCHOT_MASK REG_BIT(0) +#define THERMAL_LIMIT_MASK REG_BIT(1) +#define RATL_MASK REG_BIT(5) +#define VR_THERMALERT_MASK REG_BIT(6) +#define VR_TDC_MASK REG_BIT(7) +#define POWER_LIMIT_4_MASK REG_BIT(8) +#define POWER_LIMIT_1_MASK REG_BIT(10) +#define POWER_LIMIT_2_MASK REG_BIT(11) + #define GT_PERF_STATUS XE_REG(0x1381b4) #define VOLTAGE_MASK REG_GENMASK(10, 0) #define GT_INTR_DW(x) XE_REG(0x190018 + ((x) * 4)) +#define RENDER_COPY_INTR_ENABLE XE_REG(0x190030) +#define VCS_VECS_INTR_ENABLE XE_REG(0x190034) #define GUC_SG_INTR_ENABLE XE_REG(0x190038) #define ENGINE1_MASK REG_GENMASK(31, 16) #define ENGINE0_MASK REG_GENMASK(15, 0) - #define GPM_WGBOXPERF_INTR_ENABLE XE_REG(0x19003c) +#define GUNIT_GSC_INTR_ENABLE XE_REG(0x190044) +#define CCS_RSVD_INTR_ENABLE XE_REG(0x190048) #define INTR_IDENTITY_REG(x) XE_REG(0x190060 + ((x) * 4)) #define INTR_DATA_VALID REG_BIT(31) @@ -437,10 +453,6 @@ #define OTHER_GUC_INSTANCE 0 #define OTHER_GSC_INSTANCE 6 -#define RENDER_COPY_INTR_ENABLE XE_REG(0x190030) -#define VCS_VECS_INTR_ENABLE XE_REG(0x190034) -#define GUNIT_GSC_INTR_ENABLE XE_REG(0x190044) -#define CCS_RSVD_INTR_ENABLE XE_REG(0x190048) #define IIR_REG_SELECTOR(x) XE_REG(0x190070 + ((x) * 4)) #define RCS0_RSVD_INTR_MASK XE_REG(0x190090) #define BCS_RSVD_INTR_MASK XE_REG(0x1900a0) @@ -468,16 +480,4 @@ #define PVC_GT0_PLATFORM_ENERGY_STATUS XE_REG(0x28106c) #define PVC_GT0_PACKAGE_POWER_SKU XE_REG(0x281080) -#define GT0_PERF_LIMIT_REASONS XE_REG(0x1381a8) -#define GT0_PERF_LIMIT_REASONS_MASK 0xde3 -#define PROCHOT_MASK REG_BIT(0) -#define THERMAL_LIMIT_MASK REG_BIT(1) -#define RATL_MASK REG_BIT(5) -#define VR_THERMALERT_MASK REG_BIT(6) -#define VR_TDC_MASK REG_BIT(7) -#define POWER_LIMIT_4_MASK REG_BIT(8) -#define POWER_LIMIT_1_MASK REG_BIT(10) -#define POWER_LIMIT_2_MASK REG_BIT(11) -#define MTL_MEDIA_PERF_LIMIT_REASONS XE_REG(0x138030) - #endif -- cgit v1.2.3 From 1374df38e9267bf4588fbc665b3a20afb479f5ac Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 14 Dec 2023 10:47:08 -0800 Subject: drm/xe: Drop some unnecessary header includes Several files were including register headers that they no longer require. Drop the unnecessary includes to reduce build dependencies. Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20231214184659.2249559-18-matthew.d.roper@intel.com Signed-off-by: Matt Roper Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_execlist.c | 1 - drivers/gpu/drm/xe/xe_ggtt.c | 1 - drivers/gpu/drm/xe/xe_heci_gsc.c | 1 - drivers/gpu/drm/xe/xe_lrc.c | 2 -- drivers/gpu/drm/xe/xe_pci.c | 1 - drivers/gpu/drm/xe/xe_ring_ops.c | 1 - 6 files changed, 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index d82b50de144e..507b6baec3dc 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -12,7 +12,6 @@ #include "regs/xe_gpu_commands.h" #include "regs/xe_gt_regs.h" #include "regs/xe_lrc_layout.h" -#include "regs/xe_regs.h" #include "xe_assert.h" #include "xe_bo.h" #include "xe_device.h" diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 374ae4289fa0..3efd2d066bf7 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -11,7 +11,6 @@ #include #include "regs/xe_gt_regs.h" -#include "regs/xe_regs.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_gt.h" diff --git a/drivers/gpu/drm/xe/xe_heci_gsc.c b/drivers/gpu/drm/xe/xe_heci_gsc.c index 19eda00d5cc4..bfdd33b9b23b 100644 --- a/drivers/gpu/drm/xe/xe_heci_gsc.c +++ b/drivers/gpu/drm/xe/xe_heci_gsc.c @@ -8,7 +8,6 @@ #include #include -#include "regs/xe_regs.h" #include "xe_device_types.h" #include "xe_drv.h" #include "xe_heci_gsc.h" diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index d6dfbd0bdc70..b7fa3831b684 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -9,9 +9,7 @@ #include "instructions/xe_gfxpipe_commands.h" #include "regs/xe_engine_regs.h" #include "regs/xe_gpu_commands.h" -#include "regs/xe_gt_regs.h" #include "regs/xe_lrc_layout.h" -#include "regs/xe_regs.h" #include "xe_bb.h" #include "xe_bo.h" #include "xe_device.h" diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 88fb6c54ef99..dcc5ded1558e 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -15,7 +15,6 @@ #include #include -#include "regs/xe_regs.h" #include "regs/xe_gt_regs.h" #include "xe_device.h" #include "xe_display.h" diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index 1201e42ef823..1e4c06eacd98 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -11,7 +11,6 @@ #include "regs/xe_gpu_commands.h" #include "regs/xe_gt_regs.h" #include "regs/xe_lrc_layout.h" -#include "regs/xe_regs.h" #include "xe_exec_queue_types.h" #include "xe_gt.h" #include "xe_lrc.h" -- cgit v1.2.3 From 40fb5ed290d49b568d8547ecfdc5bd83f217dfe1 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 14 Dec 2023 13:39:52 -0800 Subject: drm/xe: Return error if drm_buddy_init() fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix warning: ../drivers/gpu/drm/xe/xe_ttm_vram_mgr.c: In function ‘__xe_ttm_vram_mgr_init’: ../drivers/gpu/drm/xe/xe_ttm_vram_mgr.c:340:13: error: variable ‘err’ set but not used [-Werror=unused-but-set-variable] 340 | int err; | ^~~ Check for the error return and return it, like done by other drivers. Reviewed-by: Matt Roper Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ttm_vram_mgr.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c index 953e5dc0fd80..115ec745e502 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c @@ -348,6 +348,8 @@ int __xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_ttm_vram_mgr *mgr, ttm_resource_manager_init(man, &xe->ttm, size); err = drm_buddy_init(&mgr->mm, man->size, default_page_size); + if (err) + return err; ttm_set_driver_manager(&xe->ttm, mem_type, &mgr->manager); ttm_resource_manager_set_used(&mgr->manager, true); -- cgit v1.2.3 From 80166e95679742588bd6c17ede46fa46867739f7 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 14 Dec 2023 13:39:53 -0800 Subject: drm/xe/bo: Remove unusued variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit bo is not used since all the checks are against tbo. Fix warning: ../drivers/gpu/drm/xe/xe_bo.c: In function ‘xe_evict_flags’: ../drivers/gpu/drm/xe/xe_bo.c:250:23: error: variable ‘bo’ set but not used [-Werror=unused-but-set-variable] 250 | struct xe_bo *bo; Signed-off-by: Lucas De Marchi Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 7c0037aecff3..8e4a3b1f6b93 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -247,8 +247,6 @@ int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo, static void xe_evict_flags(struct ttm_buffer_object *tbo, struct ttm_placement *placement) { - struct xe_bo *bo; - if (!xe_bo_is_xe_bo(tbo)) { /* Don't handle scatter gather BOs */ if (tbo->type == ttm_bo_type_sg) { @@ -265,8 +263,6 @@ static void xe_evict_flags(struct ttm_buffer_object *tbo, * For xe, sg bos that are evicted to system just triggers a * rebind of the sg list upon subsequent validation to XE_PL_TT. */ - - bo = ttm_to_xe_bo(tbo); switch (tbo->resource->mem_type) { case XE_PL_VRAM0: case XE_PL_VRAM1: -- cgit v1.2.3 From 73486d750f56ec612b2e02aa06ceb2171a8c5e93 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 15 Dec 2023 12:33:31 -0800 Subject: drm/xe/display: Fix dummy __i915_inject_probe_error() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When CONFIG_DRM_I915_DEBUG is not set, a dummy __i915_inject_probe_error() is provided on the xe side. Use the same logic as in drivers/gpu/drm/i915/i915_utils.c to ifdef it out. This fixes the build with W=1 and without that config: CC [M] drivers/gpu/drm/xe/display/ext/i915_utils.o ../drivers/gpu/drm/xe/display/ext/i915_utils.c:19:5: error: no previous prototype for ‘__i915_inject_probe_error’ [-Werror=missing-prototypes] 19 | int __i915_inject_probe_error(struct drm_i915_private *i915, int err, | ^~~~~~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Lucas De Marchi Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/display/ext/i915_utils.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/display/ext/i915_utils.c b/drivers/gpu/drm/xe/display/ext/i915_utils.c index 981edc2788bc..43b10a2cc508 100644 --- a/drivers/gpu/drm/xe/display/ext/i915_utils.c +++ b/drivers/gpu/drm/xe/display/ext/i915_utils.c @@ -14,9 +14,13 @@ bool i915_vtd_active(struct drm_i915_private *i915) return i915_run_as_guest(); } +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG) + /* i915 specific, just put here for shutting it up */ int __i915_inject_probe_error(struct drm_i915_private *i915, int err, const char *func, int line) { return 0; } + +#endif -- cgit v1.2.3 From 7a8bc11782d39e4d35dc7e78405dfe052cbba9cf Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 14 Dec 2023 13:39:54 -0800 Subject: drm/xe: Enable W=1 warnings by default Like done in commit 2250c7ead8ad ("drm/i915: enable W=1 warnings by default") for i915, enable W=1 warnings by default in xe. Reviewed-by: Rodrigo Vivi Acked-by: Jani Nikula Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Makefile | 47 +++++++++++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 19 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index f4ae063a7005..6790c049d89e 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -3,25 +3,34 @@ # Makefile for the drm device driver. This driver provides support for the # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher. -# Add a set of useful warning flags and enable -Werror for CI to prevent -# trivial mistakes from creeping in. We have to do this piecemeal as we reject -# any patch that isn't warning clean, so turning on -Wall -Wextra (or W=1) we -# need to filter out dubious warnings. Still it is our interest -# to keep running locally with W=1 C=1 until we are completely clean. -# -# Note the danger in using -Wall -Wextra is that when CI updates gcc we -# will most likely get a sudden build breakage... Hopefully we will fix -# new warnings before CI updates! -subdir-ccflags-y := -Wall -Wextra -subdir-ccflags-y += $(call cc-disable-warning, unused-parameter) -subdir-ccflags-y += $(call cc-disable-warning, type-limits) -subdir-ccflags-y += $(call cc-disable-warning, missing-field-initializers) -subdir-ccflags-y += $(call cc-disable-warning, unused-but-set-variable) -# clang warnings -subdir-ccflags-y += $(call cc-disable-warning, sign-compare) -subdir-ccflags-y += $(call cc-disable-warning, sometimes-uninitialized) -subdir-ccflags-y += $(call cc-disable-warning, initializer-overrides) -subdir-ccflags-y += $(call cc-disable-warning, frame-address) +# Unconditionally enable W=1 warnings locally +# --- begin copy-paste W=1 warnings from scripts/Makefile.extrawarn +subdir-ccflags-y += -Wextra -Wunused -Wno-unused-parameter +subdir-ccflags-y += -Wmissing-declarations +subdir-ccflags-y += $(call cc-option, -Wrestrict) +subdir-ccflags-y += -Wmissing-format-attribute +subdir-ccflags-y += -Wmissing-prototypes +subdir-ccflags-y += -Wold-style-definition +subdir-ccflags-y += -Wmissing-include-dirs +subdir-ccflags-y += $(call cc-option, -Wunused-but-set-variable) +subdir-ccflags-y += $(call cc-option, -Wunused-const-variable) +subdir-ccflags-y += $(call cc-option, -Wpacked-not-aligned) +subdir-ccflags-y += $(call cc-option, -Wformat-overflow) +subdir-ccflags-y += $(call cc-option, -Wformat-truncation) +subdir-ccflags-y += $(call cc-option, -Wstringop-overflow) +subdir-ccflags-y += $(call cc-option, -Wstringop-truncation) +# The following turn off the warnings enabled by -Wextra +ifeq ($(findstring 2, $(KBUILD_EXTRA_WARN)),) +subdir-ccflags-y += -Wno-missing-field-initializers +subdir-ccflags-y += -Wno-type-limits +subdir-ccflags-y += -Wno-shift-negative-value +endif +ifeq ($(findstring 3, $(KBUILD_EXTRA_WARN)),) +subdir-ccflags-y += -Wno-sign-compare +endif +# --- end copy-paste + +# Enable -Werror in CI and development subdir-ccflags-$(CONFIG_DRM_XE_WERROR) += -Werror subdir-ccflags-y += -I$(obj) -I$(srctree)/$(src) -- cgit v1.2.3 From 9212da07187f86db8bd124b1ce551a18b8a710d6 Mon Sep 17 00:00:00 2001 From: Bommu Krishnaiah Date: Fri, 15 Dec 2023 15:45:33 +0000 Subject: drm/xe/uapi: add exec_queue_id member to drm_xe_wait_user_fence structure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit remove the num_engines/instances members from drm_xe_wait_user_fence structure and add a exec_queue_id member Right now this is only checking if the engine list is sane and nothing else. In the end every operation with this IOCTL is a soft check. So, let's formalize that and only use this IOCTL to wait on the fence. exec_queue_id member will help to user space to get proper error code from kernel while in exec_queue reset Signed-off-by: Bommu Krishnaiah Signed-off-by: Rodrigo Vivi Acked-by: Matthew Brost Reviewed-by: Francois Dugast Acked-by: José Roberto de Souza Acked-by: Mateusz Naklicki Signed-off-by: Francois Dugast --- drivers/gpu/drm/xe/xe_wait_user_fence.c | 65 +-------------------------------- include/uapi/drm/xe_drm.h | 17 +++------ 2 files changed, 7 insertions(+), 75 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c index 4d5c2555ce41..59af65b6ed89 100644 --- a/drivers/gpu/drm/xe/xe_wait_user_fence.c +++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c @@ -50,37 +50,7 @@ static int do_compare(u64 addr, u64 value, u64 mask, u16 op) return passed ? 0 : 1; } -static const enum xe_engine_class user_to_xe_engine_class[] = { - [DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER, - [DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY, - [DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE, - [DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE, - [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE, -}; - -static int check_hw_engines(struct xe_device *xe, - struct drm_xe_engine_class_instance *eci, - int num_engines) -{ - int i; - - for (i = 0; i < num_engines; ++i) { - enum xe_engine_class user_class = - user_to_xe_engine_class[eci[i].engine_class]; - - if (eci[i].gt_id >= xe->info.tile_count) - return -EINVAL; - - if (!xe_gt_hw_engine(xe_device_get_gt(xe, eci[i].gt_id), - user_class, eci[i].engine_instance, true)) - return -EINVAL; - } - - return 0; -} - -#define VALID_FLAGS (DRM_XE_UFENCE_WAIT_FLAG_SOFT_OP | \ - DRM_XE_UFENCE_WAIT_FLAG_ABSTIME) +#define VALID_FLAGS DRM_XE_UFENCE_WAIT_FLAG_ABSTIME #define MAX_OP DRM_XE_UFENCE_WAIT_OP_LTE static long to_jiffies_timeout(struct xe_device *xe, @@ -132,16 +102,13 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data, struct xe_device *xe = to_xe_device(dev); DEFINE_WAIT_FUNC(w_wait, woken_wake_function); struct drm_xe_wait_user_fence *args = data; - struct drm_xe_engine_class_instance eci[XE_HW_ENGINE_MAX_INSTANCE]; - struct drm_xe_engine_class_instance __user *user_eci = - u64_to_user_ptr(args->instances); u64 addr = args->addr; int err; - bool no_engines = args->flags & DRM_XE_UFENCE_WAIT_FLAG_SOFT_OP; long timeout; ktime_t start; if (XE_IOCTL_DBG(xe, args->extensions) || XE_IOCTL_DBG(xe, args->pad) || + XE_IOCTL_DBG(xe, args->pad2) || XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) return -EINVAL; @@ -151,41 +118,13 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, args->op > MAX_OP)) return -EINVAL; - if (XE_IOCTL_DBG(xe, no_engines && - (args->num_engines || args->instances))) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, !no_engines && !args->num_engines)) - return -EINVAL; - if (XE_IOCTL_DBG(xe, addr & 0x7)) return -EINVAL; - if (XE_IOCTL_DBG(xe, args->num_engines > XE_HW_ENGINE_MAX_INSTANCE)) - return -EINVAL; - - if (!no_engines) { - err = copy_from_user(eci, user_eci, - sizeof(struct drm_xe_engine_class_instance) * - args->num_engines); - if (XE_IOCTL_DBG(xe, err)) - return -EFAULT; - - if (XE_IOCTL_DBG(xe, check_hw_engines(xe, eci, - args->num_engines))) - return -EINVAL; - } - timeout = to_jiffies_timeout(xe, args); start = ktime_get(); - /* - * FIXME: Very simple implementation at the moment, single wait queue - * for everything. Could be optimized to have a wait queue for every - * hardware engine. Open coding as 'do_compare' can sleep which doesn't - * work with the wait_event_* macros. - */ add_wait_queue(&xe->ufence_wq, &w_wait); for (;;) { err = do_compare(addr, args->value, args->mask, args->op); diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 0895e4d2a981..5a8e3b326347 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1031,8 +1031,7 @@ struct drm_xe_wait_user_fence { /** @op: wait operation (type of comparison) */ __u16 op; -#define DRM_XE_UFENCE_WAIT_FLAG_SOFT_OP (1 << 0) /* e.g. Wait on VM bind */ -#define DRM_XE_UFENCE_WAIT_FLAG_ABSTIME (1 << 1) +#define DRM_XE_UFENCE_WAIT_FLAG_ABSTIME (1 << 0) /** @flags: wait flags */ __u16 flags; @@ -1065,17 +1064,11 @@ struct drm_xe_wait_user_fence { */ __s64 timeout; - /** - * @num_engines: number of engine instances to wait on, must be zero - * when DRM_XE_UFENCE_WAIT_FLAG_SOFT_OP set - */ - __u64 num_engines; + /** @exec_queue_id: exec_queue_id returned from xe_exec_queue_create_ioctl */ + __u32 exec_queue_id; - /** - * @instances: user pointer to array of drm_xe_engine_class_instance to - * wait on, must be NULL when DRM_XE_UFENCE_WAIT_FLAG_SOFT_OP set - */ - __u64 instances; + /** @pad2: MBZ */ + __u32 pad2; /** @reserved: Reserved */ __u64 reserved[2]; -- cgit v1.2.3 From e670f0b4ef2419a7a51d1726044c8715ff4d4cda Mon Sep 17 00:00:00 2001 From: Bommu Krishnaiah Date: Fri, 15 Dec 2023 15:45:34 +0000 Subject: drm/xe/uapi: Return correct error code for xe_wait_user_fence_ioctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently xe_wait_user_fence_ioctl is not checking exec_queue state and blocking until timeout, with this patch wakeup the blocking wait if exec_queue reset happen and returning proper error code Signed-off-by: Bommu Krishnaiah Cc: Oak Zeng Cc: Kempczynski Zbigniew Cc: Matthew Brost Reviewed-by: Rodrigo Vivi Reviewed-by: Matthew Brost Acked-by: José Roberto de Souza Acked-by: Mateusz Naklicki Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_exec_queue_types.h | 2 ++ drivers/gpu/drm/xe/xe_execlist.c | 7 +++++++ drivers/gpu/drm/xe/xe_guc_submit.c | 10 ++++++++++ drivers/gpu/drm/xe/xe_wait_user_fence.c | 30 ++++++++++++++++++++++++------ 4 files changed, 43 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index c7aefa1c8c31..bcf08b00d94a 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -217,6 +217,8 @@ struct xe_exec_queue_ops { * signalled when this function is called. */ void (*resume)(struct xe_exec_queue *q); + /** @reset_status: check exec queue reset status */ + bool (*reset_status)(struct xe_exec_queue *q); }; #endif diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index 507b6baec3dc..96b5224eb478 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -442,6 +442,12 @@ static void execlist_exec_queue_resume(struct xe_exec_queue *q) /* NIY */ } +static bool execlist_exec_queue_reset_status(struct xe_exec_queue *q) +{ + /* NIY */ + return false; +} + static const struct xe_exec_queue_ops execlist_exec_queue_ops = { .init = execlist_exec_queue_init, .kill = execlist_exec_queue_kill, @@ -453,6 +459,7 @@ static const struct xe_exec_queue_ops execlist_exec_queue_ops = { .suspend = execlist_exec_queue_suspend, .suspend_wait = execlist_exec_queue_suspend_wait, .resume = execlist_exec_queue_resume, + .reset_status = execlist_exec_queue_reset_status, }; int xe_execlist_init(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index ad5e19ecd33c..21ac68e3246f 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -854,6 +854,10 @@ static void simple_error_capture(struct xe_exec_queue *q) static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q) { struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_device *xe = guc_to_xe(guc); + + /** to wakeup xe_wait_user_fence ioctl if exec queue is reset */ + wake_up_all(&xe->ufence_wq); if (xe_exec_queue_is_lr(q)) queue_work(guc_to_gt(guc)->ordered_wq, &q->guc->lr_tdr); @@ -1394,6 +1398,11 @@ static void guc_exec_queue_resume(struct xe_exec_queue *q) guc_exec_queue_add_msg(q, msg, RESUME); } +static bool guc_exec_queue_reset_status(struct xe_exec_queue *q) +{ + return exec_queue_reset(q); +} + /* * All of these functions are an abstraction layer which other parts of XE can * use to trap into the GuC backend. All of these functions, aside from init, @@ -1411,6 +1420,7 @@ static const struct xe_exec_queue_ops guc_exec_queue_ops = { .suspend = guc_exec_queue_suspend, .suspend_wait = guc_exec_queue_suspend_wait, .resume = guc_exec_queue_resume, + .reset_status = guc_exec_queue_reset_status, }; static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c index 59af65b6ed89..b0a7896f7fcb 100644 --- a/drivers/gpu/drm/xe/xe_wait_user_fence.c +++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c @@ -13,6 +13,7 @@ #include "xe_device.h" #include "xe_gt.h" #include "xe_macros.h" +#include "xe_exec_queue.h" static int do_compare(u64 addr, u64 value, u64 mask, u16 op) { @@ -100,10 +101,12 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct xe_device *xe = to_xe_device(dev); + struct xe_file *xef = to_xe_file(file); DEFINE_WAIT_FUNC(w_wait, woken_wake_function); struct drm_xe_wait_user_fence *args = data; + struct xe_exec_queue *q = NULL; u64 addr = args->addr; - int err; + int err = 0; long timeout; ktime_t start; @@ -121,6 +124,12 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, addr & 0x7)) return -EINVAL; + if (args->exec_queue_id) { + q = xe_exec_queue_lookup(xef, args->exec_queue_id); + if (XE_IOCTL_DBG(xe, !q)) + return -ENOENT; + } + timeout = to_jiffies_timeout(xe, args); start = ktime_get(); @@ -136,6 +145,14 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data, break; } + if (q) { + if (q->ops->reset_status(q)) { + drm_info(&xe->drm, "exec gueue reset detected\n"); + err = -EIO; + break; + } + } + if (!timeout) { err = -ETIME; break; @@ -151,10 +168,11 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data, args->timeout = 0; } - if (XE_IOCTL_DBG(xe, err < 0)) - return err; - else if (XE_IOCTL_DBG(xe, !timeout)) - return -ETIME; + if (!timeout && !(err < 0)) + err = -ETIME; + + if (q) + xe_exec_queue_put(q); - return 0; + return err; } -- cgit v1.2.3 From 90a8b23f9b85a05ac3147498c42b32348bfcc274 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Fri, 15 Dec 2023 15:45:37 +0000 Subject: drm/xe/pmu: Remove PMU from Xe till uapi is finalized MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PMU uapi is likely to change in the future. Till the uapi is finalized, remove PMU from Xe. PMU can be re-added after uapi is finalized. v2: Include xe_drm.h in xe/tests/xe_dma_buf.c (Francois) Signed-off-by: Ashutosh Dixit Acked-by: Aravind Iddamsetty Acked-by: Lucas De Marchi Reviewed-by: Umesh Nerlige Ramappa Acked-by: José Roberto de Souza Acked-by: Mateusz Naklicki Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Makefile | 2 - drivers/gpu/drm/xe/regs/xe_gt_regs.h | 5 - drivers/gpu/drm/xe/tests/xe_dma_buf.c | 2 + drivers/gpu/drm/xe/xe_device.c | 2 - drivers/gpu/drm/xe/xe_device_types.h | 4 - drivers/gpu/drm/xe/xe_gt.c | 2 - drivers/gpu/drm/xe/xe_module.c | 5 - drivers/gpu/drm/xe/xe_pmu.c | 645 ---------------------------------- drivers/gpu/drm/xe/xe_pmu.h | 25 -- drivers/gpu/drm/xe/xe_pmu_types.h | 68 ---- include/uapi/drm/xe_drm.h | 40 --- 11 files changed, 2 insertions(+), 798 deletions(-) delete mode 100644 drivers/gpu/drm/xe/xe_pmu.c delete mode 100644 drivers/gpu/drm/xe/xe_pmu.h delete mode 100644 drivers/gpu/drm/xe/xe_pmu_types.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 6790c049d89e..53bd2a8ba1ae 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -276,8 +276,6 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \ i915-display/skl_universal_plane.o \ i915-display/skl_watermark.o -xe-$(CONFIG_PERF_EVENTS) += xe_pmu.o - ifeq ($(CONFIG_ACPI),y) xe-$(CONFIG_DRM_XE_DISPLAY) += \ i915-display/intel_acpi.o \ diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index d7f52a634c11..1dd361046b5d 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -316,11 +316,6 @@ #define INVALIDATION_BROADCAST_MODE_DIS REG_BIT(12) #define GLOBAL_INVALIDATION_MODE REG_BIT(2) -#define XE_OAG_RC0_ANY_ENGINE_BUSY_FREE XE_REG(0xdb80) -#define XE_OAG_ANY_MEDIA_FF_BUSY_FREE XE_REG(0xdba0) -#define XE_OAG_BLT_BUSY_FREE XE_REG(0xdbbc) -#define XE_OAG_RENDER_BUSY_FREE XE_REG(0xdbdc) - #define HALF_SLICE_CHICKEN5 XE_REG_MCR(0xe188, XE_REG_OPTION_MASKED) #define DISABLE_SAMPLE_G_PERFORMANCE REG_BIT(0) diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c index bb6f6424e06f..9f6d571d7fa9 100644 --- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c @@ -3,6 +3,8 @@ * Copyright © 2022 Intel Corporation */ +#include + #include #include diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 221e87584352..d9ae77fe7382 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -529,8 +529,6 @@ int xe_device_probe(struct xe_device *xe) xe_debugfs_register(xe); - xe_pmu_register(&xe->pmu); - xe_hwmon_register(xe); err = drmm_add_action_or_reset(&xe->drm, xe_device_sanitize, xe); diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index d1a48456e9a3..c45ef17b3473 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -18,7 +18,6 @@ #include "xe_lmtt_types.h" #include "xe_platform_types.h" #include "xe_pt_types.h" -#include "xe_pmu.h" #include "xe_sriov_types.h" #include "xe_step_types.h" @@ -427,9 +426,6 @@ struct xe_device { */ struct task_struct *pm_callback_task; - /** @pmu: performance monitoring unit */ - struct xe_pmu pmu; - /** @hwmon: hwmon subsystem integration */ struct xe_hwmon *hwmon; diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index dfd9cf01a5d5..f5d18e98f8b6 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -709,8 +709,6 @@ int xe_gt_suspend(struct xe_gt *gt) if (err) goto err_msg; - xe_pmu_suspend(gt); - err = xe_uc_suspend(>->uc); if (err) goto err_force_wake; diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index 51bf69b7ab22..110b69864656 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -11,7 +11,6 @@ #include "xe_drv.h" #include "xe_hw_fence.h" #include "xe_pci.h" -#include "xe_pmu.h" #include "xe_sched_job.h" struct xe_modparam xe_modparam = { @@ -63,10 +62,6 @@ static const struct init_funcs init_funcs[] = { .init = xe_sched_job_module_init, .exit = xe_sched_job_module_exit, }, - { - .init = xe_pmu_init, - .exit = xe_pmu_exit, - }, { .init = xe_register_pci_driver, .exit = xe_unregister_pci_driver, diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c deleted file mode 100644 index 9d0b7887cfc4..000000000000 --- a/drivers/gpu/drm/xe/xe_pmu.c +++ /dev/null @@ -1,645 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2023 Intel Corporation - */ - -#include -#include -#include - -#include "regs/xe_gt_regs.h" -#include "xe_device.h" -#include "xe_gt_clock.h" -#include "xe_mmio.h" - -static cpumask_t xe_pmu_cpumask; -static unsigned int xe_pmu_target_cpu = -1; - -static unsigned int config_gt_id(const u64 config) -{ - return config >> __DRM_XE_PMU_GT_SHIFT; -} - -static u64 config_counter(const u64 config) -{ - return config & ~(~0ULL << __DRM_XE_PMU_GT_SHIFT); -} - -static void xe_pmu_event_destroy(struct perf_event *event) -{ - struct xe_device *xe = - container_of(event->pmu, typeof(*xe), pmu.base); - - drm_WARN_ON(&xe->drm, event->parent); - - drm_dev_put(&xe->drm); -} - -static u64 __engine_group_busyness_read(struct xe_gt *gt, int sample_type) -{ - u64 val; - - switch (sample_type) { - case __XE_SAMPLE_RENDER_GROUP_BUSY: - val = xe_mmio_read32(gt, XE_OAG_RENDER_BUSY_FREE); - break; - case __XE_SAMPLE_COPY_GROUP_BUSY: - val = xe_mmio_read32(gt, XE_OAG_BLT_BUSY_FREE); - break; - case __XE_SAMPLE_MEDIA_GROUP_BUSY: - val = xe_mmio_read32(gt, XE_OAG_ANY_MEDIA_FF_BUSY_FREE); - break; - case __XE_SAMPLE_ANY_ENGINE_GROUP_BUSY: - val = xe_mmio_read32(gt, XE_OAG_RC0_ANY_ENGINE_BUSY_FREE); - break; - default: - drm_warn(>->tile->xe->drm, "unknown pmu event\n"); - } - - return xe_gt_clock_cycles_to_ns(gt, val * 16); -} - -static u64 engine_group_busyness_read(struct xe_gt *gt, u64 config) -{ - int sample_type = config_counter(config); - const unsigned int gt_id = gt->info.id; - struct xe_device *xe = gt->tile->xe; - struct xe_pmu *pmu = &xe->pmu; - unsigned long flags; - bool device_awake; - u64 val; - - device_awake = xe_device_mem_access_get_if_ongoing(xe); - if (device_awake) { - XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FW_GT)); - val = __engine_group_busyness_read(gt, sample_type); - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FW_GT)); - xe_device_mem_access_put(xe); - } - - spin_lock_irqsave(&pmu->lock, flags); - - if (device_awake) - pmu->sample[gt_id][sample_type] = val; - else - val = pmu->sample[gt_id][sample_type]; - - spin_unlock_irqrestore(&pmu->lock, flags); - - return val; -} - -static void engine_group_busyness_store(struct xe_gt *gt) -{ - struct xe_pmu *pmu = >->tile->xe->pmu; - unsigned int gt_id = gt->info.id; - unsigned long flags; - int i; - - spin_lock_irqsave(&pmu->lock, flags); - - for (i = __XE_SAMPLE_RENDER_GROUP_BUSY; i <= __XE_SAMPLE_ANY_ENGINE_GROUP_BUSY; i++) - pmu->sample[gt_id][i] = __engine_group_busyness_read(gt, i); - - spin_unlock_irqrestore(&pmu->lock, flags); -} - -static int -config_status(struct xe_device *xe, u64 config) -{ - unsigned int gt_id = config_gt_id(config); - struct xe_gt *gt = xe_device_get_gt(xe, gt_id); - - if (gt_id >= XE_PMU_MAX_GT) - return -ENOENT; - - switch (config_counter(config)) { - case DRM_XE_PMU_RENDER_GROUP_BUSY(0): - case DRM_XE_PMU_COPY_GROUP_BUSY(0): - case DRM_XE_PMU_ANY_ENGINE_GROUP_BUSY(0): - if (gt->info.type == XE_GT_TYPE_MEDIA) - return -ENOENT; - break; - case DRM_XE_PMU_MEDIA_GROUP_BUSY(0): - if (!(gt->info.engine_mask & (BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VECS0)))) - return -ENOENT; - break; - default: - return -ENOENT; - } - - return 0; -} - -static int xe_pmu_event_init(struct perf_event *event) -{ - struct xe_device *xe = - container_of(event->pmu, typeof(*xe), pmu.base); - struct xe_pmu *pmu = &xe->pmu; - int ret; - - if (pmu->closed) - return -ENODEV; - - if (event->attr.type != event->pmu->type) - return -ENOENT; - - /* unsupported modes and filters */ - if (event->attr.sample_period) /* no sampling */ - return -EINVAL; - - if (has_branch_stack(event)) - return -EOPNOTSUPP; - - if (event->cpu < 0) - return -EINVAL; - - /* only allow running on one cpu at a time */ - if (!cpumask_test_cpu(event->cpu, &xe_pmu_cpumask)) - return -EINVAL; - - ret = config_status(xe, event->attr.config); - if (ret) - return ret; - - if (!event->parent) { - drm_dev_get(&xe->drm); - event->destroy = xe_pmu_event_destroy; - } - - return 0; -} - -static u64 __xe_pmu_event_read(struct perf_event *event) -{ - struct xe_device *xe = - container_of(event->pmu, typeof(*xe), pmu.base); - const unsigned int gt_id = config_gt_id(event->attr.config); - const u64 config = event->attr.config; - struct xe_gt *gt = xe_device_get_gt(xe, gt_id); - u64 val; - - switch (config_counter(config)) { - case DRM_XE_PMU_RENDER_GROUP_BUSY(0): - case DRM_XE_PMU_COPY_GROUP_BUSY(0): - case DRM_XE_PMU_ANY_ENGINE_GROUP_BUSY(0): - case DRM_XE_PMU_MEDIA_GROUP_BUSY(0): - val = engine_group_busyness_read(gt, config); - break; - default: - drm_warn(>->tile->xe->drm, "unknown pmu event\n"); - } - - return val; -} - -static void xe_pmu_event_read(struct perf_event *event) -{ - struct xe_device *xe = - container_of(event->pmu, typeof(*xe), pmu.base); - struct hw_perf_event *hwc = &event->hw; - struct xe_pmu *pmu = &xe->pmu; - u64 prev, new; - - if (pmu->closed) { - event->hw.state = PERF_HES_STOPPED; - return; - } -again: - prev = local64_read(&hwc->prev_count); - new = __xe_pmu_event_read(event); - - if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev) - goto again; - - local64_add(new - prev, &event->count); -} - -static void xe_pmu_enable(struct perf_event *event) -{ - /* - * Store the current counter value so we can report the correct delta - * for all listeners. Even when the event was already enabled and has - * an existing non-zero value. - */ - local64_set(&event->hw.prev_count, __xe_pmu_event_read(event)); -} - -static void xe_pmu_event_start(struct perf_event *event, int flags) -{ - struct xe_device *xe = - container_of(event->pmu, typeof(*xe), pmu.base); - struct xe_pmu *pmu = &xe->pmu; - - if (pmu->closed) - return; - - xe_pmu_enable(event); - event->hw.state = 0; -} - -static void xe_pmu_event_stop(struct perf_event *event, int flags) -{ - if (flags & PERF_EF_UPDATE) - xe_pmu_event_read(event); - - event->hw.state = PERF_HES_STOPPED; -} - -static int xe_pmu_event_add(struct perf_event *event, int flags) -{ - struct xe_device *xe = - container_of(event->pmu, typeof(*xe), pmu.base); - struct xe_pmu *pmu = &xe->pmu; - - if (pmu->closed) - return -ENODEV; - - if (flags & PERF_EF_START) - xe_pmu_event_start(event, flags); - - return 0; -} - -static void xe_pmu_event_del(struct perf_event *event, int flags) -{ - xe_pmu_event_stop(event, PERF_EF_UPDATE); -} - -static int xe_pmu_event_event_idx(struct perf_event *event) -{ - return 0; -} - -struct xe_ext_attribute { - struct device_attribute attr; - unsigned long val; -}; - -static ssize_t xe_pmu_event_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct xe_ext_attribute *eattr; - - eattr = container_of(attr, struct xe_ext_attribute, attr); - return sprintf(buf, "config=0x%lx\n", eattr->val); -} - -static ssize_t cpumask_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - return cpumap_print_to_pagebuf(true, buf, &xe_pmu_cpumask); -} - -static DEVICE_ATTR_RO(cpumask); - -static struct attribute *xe_cpumask_attrs[] = { - &dev_attr_cpumask.attr, - NULL, -}; - -static const struct attribute_group xe_pmu_cpumask_attr_group = { - .attrs = xe_cpumask_attrs, -}; - -#define __event(__counter, __name, __unit) \ -{ \ - .counter = (__counter), \ - .name = (__name), \ - .unit = (__unit), \ - .global = false, \ -} - -#define __global_event(__counter, __name, __unit) \ -{ \ - .counter = (__counter), \ - .name = (__name), \ - .unit = (__unit), \ - .global = true, \ -} - -static struct xe_ext_attribute * -add_xe_attr(struct xe_ext_attribute *attr, const char *name, u64 config) -{ - sysfs_attr_init(&attr->attr.attr); - attr->attr.attr.name = name; - attr->attr.attr.mode = 0444; - attr->attr.show = xe_pmu_event_show; - attr->val = config; - - return ++attr; -} - -static struct perf_pmu_events_attr * -add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name, - const char *str) -{ - sysfs_attr_init(&attr->attr.attr); - attr->attr.attr.name = name; - attr->attr.attr.mode = 0444; - attr->attr.show = perf_event_sysfs_show; - attr->event_str = str; - - return ++attr; -} - -static struct attribute ** -create_event_attributes(struct xe_pmu *pmu) -{ - struct xe_device *xe = container_of(pmu, typeof(*xe), pmu); - static const struct { - unsigned int counter; - const char *name; - const char *unit; - bool global; - } events[] = { - __event(0, "render-group-busy", "ns"), - __event(1, "copy-group-busy", "ns"), - __event(2, "media-group-busy", "ns"), - __event(3, "any-engine-group-busy", "ns"), - }; - - struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter; - struct xe_ext_attribute *xe_attr = NULL, *xe_iter; - struct attribute **attr = NULL, **attr_iter; - unsigned int count = 0; - unsigned int i, j; - struct xe_gt *gt; - - /* Count how many counters we will be exposing. */ - for_each_gt(gt, xe, j) { - for (i = 0; i < ARRAY_SIZE(events); i++) { - u64 config = ___DRM_XE_PMU_OTHER(j, events[i].counter); - - if (!config_status(xe, config)) - count++; - } - } - - /* Allocate attribute objects and table. */ - xe_attr = kcalloc(count, sizeof(*xe_attr), GFP_KERNEL); - if (!xe_attr) - goto err_alloc; - - pmu_attr = kcalloc(count, sizeof(*pmu_attr), GFP_KERNEL); - if (!pmu_attr) - goto err_alloc; - - /* Max one pointer of each attribute type plus a termination entry. */ - attr = kcalloc(count * 2 + 1, sizeof(*attr), GFP_KERNEL); - if (!attr) - goto err_alloc; - - xe_iter = xe_attr; - pmu_iter = pmu_attr; - attr_iter = attr; - - for_each_gt(gt, xe, j) { - for (i = 0; i < ARRAY_SIZE(events); i++) { - u64 config = ___DRM_XE_PMU_OTHER(j, events[i].counter); - char *str; - - if (config_status(xe, config)) - continue; - - if (events[i].global) - str = kstrdup(events[i].name, GFP_KERNEL); - else - str = kasprintf(GFP_KERNEL, "%s-gt%u", - events[i].name, j); - if (!str) - goto err; - - *attr_iter++ = &xe_iter->attr.attr; - xe_iter = add_xe_attr(xe_iter, str, config); - - if (events[i].unit) { - if (events[i].global) - str = kasprintf(GFP_KERNEL, "%s.unit", - events[i].name); - else - str = kasprintf(GFP_KERNEL, "%s-gt%u.unit", - events[i].name, j); - if (!str) - goto err; - - *attr_iter++ = &pmu_iter->attr.attr; - pmu_iter = add_pmu_attr(pmu_iter, str, - events[i].unit); - } - } - } - - pmu->xe_attr = xe_attr; - pmu->pmu_attr = pmu_attr; - - return attr; - -err: - for (attr_iter = attr; *attr_iter; attr_iter++) - kfree((*attr_iter)->name); - -err_alloc: - kfree(attr); - kfree(xe_attr); - kfree(pmu_attr); - - return NULL; -} - -static void free_event_attributes(struct xe_pmu *pmu) -{ - struct attribute **attr_iter = pmu->events_attr_group.attrs; - - for (; *attr_iter; attr_iter++) - kfree((*attr_iter)->name); - - kfree(pmu->events_attr_group.attrs); - kfree(pmu->xe_attr); - kfree(pmu->pmu_attr); - - pmu->events_attr_group.attrs = NULL; - pmu->xe_attr = NULL; - pmu->pmu_attr = NULL; -} - -static int xe_pmu_cpu_online(unsigned int cpu, struct hlist_node *node) -{ - struct xe_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node); - - /* Select the first online CPU as a designated reader. */ - if (cpumask_empty(&xe_pmu_cpumask)) - cpumask_set_cpu(cpu, &xe_pmu_cpumask); - - return 0; -} - -static int xe_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node) -{ - struct xe_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node); - unsigned int target = xe_pmu_target_cpu; - - /* - * Unregistering an instance generates a CPU offline event which we must - * ignore to avoid incorrectly modifying the shared xe_pmu_cpumask. - */ - if (pmu->closed) - return 0; - - if (cpumask_test_and_clear_cpu(cpu, &xe_pmu_cpumask)) { - target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu); - - /* Migrate events if there is a valid target */ - if (target < nr_cpu_ids) { - cpumask_set_cpu(target, &xe_pmu_cpumask); - xe_pmu_target_cpu = target; - } - } - - if (target < nr_cpu_ids && target != pmu->cpuhp.cpu) { - perf_pmu_migrate_context(&pmu->base, cpu, target); - pmu->cpuhp.cpu = target; - } - - return 0; -} - -static enum cpuhp_state cpuhp_slot = CPUHP_INVALID; - -int xe_pmu_init(void) -{ - int ret; - - ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, - "perf/x86/intel/xe:online", - xe_pmu_cpu_online, - xe_pmu_cpu_offline); - if (ret < 0) - pr_notice("Failed to setup cpuhp state for xe PMU! (%d)\n", - ret); - else - cpuhp_slot = ret; - - return 0; -} - -void xe_pmu_exit(void) -{ - if (cpuhp_slot != CPUHP_INVALID) - cpuhp_remove_multi_state(cpuhp_slot); -} - -static int xe_pmu_register_cpuhp_state(struct xe_pmu *pmu) -{ - if (cpuhp_slot == CPUHP_INVALID) - return -EINVAL; - - return cpuhp_state_add_instance(cpuhp_slot, &pmu->cpuhp.node); -} - -static void xe_pmu_unregister_cpuhp_state(struct xe_pmu *pmu) -{ - cpuhp_state_remove_instance(cpuhp_slot, &pmu->cpuhp.node); -} - -void xe_pmu_suspend(struct xe_gt *gt) -{ - engine_group_busyness_store(gt); -} - -static void xe_pmu_unregister(struct drm_device *device, void *arg) -{ - struct xe_pmu *pmu = arg; - - if (!pmu->base.event_init) - return; - - /* - * "Disconnect" the PMU callbacks - since all are atomic synchronize_rcu - * ensures all currently executing ones will have exited before we - * proceed with unregistration. - */ - pmu->closed = true; - synchronize_rcu(); - - xe_pmu_unregister_cpuhp_state(pmu); - - perf_pmu_unregister(&pmu->base); - pmu->base.event_init = NULL; - kfree(pmu->base.attr_groups); - kfree(pmu->name); - free_event_attributes(pmu); -} - -void xe_pmu_register(struct xe_pmu *pmu) -{ - struct xe_device *xe = container_of(pmu, typeof(*xe), pmu); - const struct attribute_group *attr_groups[] = { - &pmu->events_attr_group, - &xe_pmu_cpumask_attr_group, - NULL - }; - - int ret = -ENOMEM; - - spin_lock_init(&pmu->lock); - pmu->cpuhp.cpu = -1; - - pmu->name = kasprintf(GFP_KERNEL, - "xe_%s", - dev_name(xe->drm.dev)); - if (pmu->name) - /* tools/perf reserves colons as special. */ - strreplace((char *)pmu->name, ':', '_'); - - if (!pmu->name) - goto err; - - pmu->events_attr_group.name = "events"; - pmu->events_attr_group.attrs = create_event_attributes(pmu); - if (!pmu->events_attr_group.attrs) - goto err_name; - - pmu->base.attr_groups = kmemdup(attr_groups, sizeof(attr_groups), - GFP_KERNEL); - if (!pmu->base.attr_groups) - goto err_attr; - - pmu->base.module = THIS_MODULE; - pmu->base.task_ctx_nr = perf_invalid_context; - pmu->base.event_init = xe_pmu_event_init; - pmu->base.add = xe_pmu_event_add; - pmu->base.del = xe_pmu_event_del; - pmu->base.start = xe_pmu_event_start; - pmu->base.stop = xe_pmu_event_stop; - pmu->base.read = xe_pmu_event_read; - pmu->base.event_idx = xe_pmu_event_event_idx; - - ret = perf_pmu_register(&pmu->base, pmu->name, -1); - if (ret) - goto err_groups; - - ret = xe_pmu_register_cpuhp_state(pmu); - if (ret) - goto err_unreg; - - ret = drmm_add_action_or_reset(&xe->drm, xe_pmu_unregister, pmu); - if (ret) - goto err_cpuhp; - - return; - -err_cpuhp: - xe_pmu_unregister_cpuhp_state(pmu); -err_unreg: - perf_pmu_unregister(&pmu->base); -err_groups: - kfree(pmu->base.attr_groups); -err_attr: - pmu->base.event_init = NULL; - free_event_attributes(pmu); -err_name: - kfree(pmu->name); -err: - drm_notice(&xe->drm, "Failed to register PMU!\n"); -} diff --git a/drivers/gpu/drm/xe/xe_pmu.h b/drivers/gpu/drm/xe/xe_pmu.h deleted file mode 100644 index a99d4ddd023e..000000000000 --- a/drivers/gpu/drm/xe/xe_pmu.h +++ /dev/null @@ -1,25 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2023 Intel Corporation - */ - -#ifndef _XE_PMU_H_ -#define _XE_PMU_H_ - -#include "xe_gt_types.h" -#include "xe_pmu_types.h" - -#if IS_ENABLED(CONFIG_PERF_EVENTS) -int xe_pmu_init(void); -void xe_pmu_exit(void); -void xe_pmu_register(struct xe_pmu *pmu); -void xe_pmu_suspend(struct xe_gt *gt); -#else -static inline int xe_pmu_init(void) { return 0; } -static inline void xe_pmu_exit(void) {} -static inline void xe_pmu_register(struct xe_pmu *pmu) {} -static inline void xe_pmu_suspend(struct xe_gt *gt) {} -#endif - -#endif - diff --git a/drivers/gpu/drm/xe/xe_pmu_types.h b/drivers/gpu/drm/xe/xe_pmu_types.h deleted file mode 100644 index 9cadbd243f57..000000000000 --- a/drivers/gpu/drm/xe/xe_pmu_types.h +++ /dev/null @@ -1,68 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2023 Intel Corporation - */ - -#ifndef _XE_PMU_TYPES_H_ -#define _XE_PMU_TYPES_H_ - -#include -#include -#include - -enum { - __XE_SAMPLE_RENDER_GROUP_BUSY, - __XE_SAMPLE_COPY_GROUP_BUSY, - __XE_SAMPLE_MEDIA_GROUP_BUSY, - __XE_SAMPLE_ANY_ENGINE_GROUP_BUSY, - __XE_NUM_PMU_SAMPLERS -}; - -#define XE_PMU_MAX_GT 2 - -struct xe_pmu { - /** - * @cpuhp: Struct used for CPU hotplug handling. - */ - struct { - struct hlist_node node; - unsigned int cpu; - } cpuhp; - /** - * @base: PMU base. - */ - struct pmu base; - /** - * @closed: xe is unregistering. - */ - bool closed; - /** - * @name: Name as registered with perf core. - */ - const char *name; - /** - * @lock: Lock protecting enable mask and ref count handling. - */ - spinlock_t lock; - /** - * @sample: Current and previous (raw) counters. - * - * These counters are updated when the device is awake. - * - */ - u64 sample[XE_PMU_MAX_GT][__XE_NUM_PMU_SAMPLERS]; - /** - * @events_attr_group: Device events attribute group. - */ - struct attribute_group events_attr_group; - /** - * @xe_attr: Memory block holding device attributes. - */ - void *xe_attr; - /** - * @pmu_attr: Memory block holding device attributes. - */ - void *pmu_attr; -}; - -#endif diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index d122f985435a..e1e8fb1846ea 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1074,46 +1074,6 @@ struct drm_xe_wait_user_fence { /** @reserved: Reserved */ __u64 reserved[2]; }; - -/** - * DOC: XE PMU event config IDs - * - * Check 'man perf_event_open' to use the ID's DRM_XE_PMU_XXXX listed in xe_drm.h - * in 'struct perf_event_attr' as part of perf_event_open syscall to read a - * particular event. - * - * For example to open the DRMXE_PMU_RENDER_GROUP_BUSY(0): - * - * .. code-block:: C - * - * struct perf_event_attr attr; - * long long count; - * int cpu = 0; - * int fd; - * - * memset(&attr, 0, sizeof(struct perf_event_attr)); - * attr.type = type; // eg: /sys/bus/event_source/devices/xe_0000_56_00.0/type - * attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED; - * attr.use_clockid = 1; - * attr.clockid = CLOCK_MONOTONIC; - * attr.config = DRM_XE_PMU_RENDER_GROUP_BUSY(0); - * - * fd = syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0); - */ - -/* - * Top bits of every counter are GT id. - */ -#define __DRM_XE_PMU_GT_SHIFT (56) - -#define ___DRM_XE_PMU_OTHER(gt, x) \ - (((__u64)(x)) | ((__u64)(gt) << __DRM_XE_PMU_GT_SHIFT)) - -#define DRM_XE_PMU_RENDER_GROUP_BUSY(gt) ___DRM_XE_PMU_OTHER(gt, 0) -#define DRM_XE_PMU_COPY_GROUP_BUSY(gt) ___DRM_XE_PMU_OTHER(gt, 1) -#define DRM_XE_PMU_MEDIA_GROUP_BUSY(gt) ___DRM_XE_PMU_OTHER(gt, 2) -#define DRM_XE_PMU_ANY_ENGINE_GROUP_BUSY(gt) ___DRM_XE_PMU_OTHER(gt, 3) - #if defined(__cplusplus) } #endif -- cgit v1.2.3 From 7e9337c29fb9251e27d7af092108f05857e733c1 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 15 Dec 2023 15:45:38 +0000 Subject: drm/xe/uapi: Ensure every uapi struct has drm_xe prefix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To ensure consistency and avoid possible later conflicts, let's add drm_xe prefix to xe_user_extension struct. Cc: Francois Dugast Suggested-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi Reviewed-by: Matthew Brost Acked-by: José Roberto de Souza Acked-by: Mateusz Naklicki --- drivers/gpu/drm/xe/xe_exec_queue.c | 2 +- include/uapi/drm/xe_drm.h | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index eeb9605dd45f..aa478c66edbb 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -453,7 +453,7 @@ static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue u64 extensions, int ext_number, bool create) { u64 __user *address = u64_to_user_ptr(extensions); - struct xe_user_extension ext; + struct drm_xe_user_extension ext; int err; u32 idx; diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index e1e8fb1846ea..87ff6eaa788e 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -27,7 +27,7 @@ extern "C" { #define DRM_XE_RESET_FAILED_UEVENT "DEVICE_STATUS" /** - * struct xe_user_extension - Base class for defining a chain of extensions + * struct drm_xe_user_extension - Base class for defining a chain of extensions * * Many interfaces need to grow over time. In most cases we can simply * extend the struct and have userspace pass in more data. Another option, @@ -45,29 +45,29 @@ extern "C" { * * .. code-block:: C * - * struct xe_user_extension ext3 { + * struct drm_xe_user_extension ext3 { * .next_extension = 0, // end * .name = ..., * }; - * struct xe_user_extension ext2 { + * struct drm_xe_user_extension ext2 { * .next_extension = (uintptr_t)&ext3, * .name = ..., * }; - * struct xe_user_extension ext1 { + * struct drm_xe_user_extension ext1 { * .next_extension = (uintptr_t)&ext2, * .name = ..., * }; * - * Typically the struct xe_user_extension would be embedded in some uAPI + * Typically the struct drm_xe_user_extension would be embedded in some uAPI * struct, and in this case we would feed it the head of the chain(i.e ext1), * which would then apply all of the above extensions. * */ -struct xe_user_extension { +struct drm_xe_user_extension { /** * @next_extension: * - * Pointer to the next struct xe_user_extension, or zero if the end. + * Pointer to the next struct drm_xe_user_extension, or zero if the end. */ __u64 next_extension; @@ -78,7 +78,7 @@ struct xe_user_extension { * * Also note that the name space for this is not global for the whole * driver, but rather its scope/meaning is limited to the specific piece - * of uAPI which has embedded the struct xe_user_extension. + * of uAPI which has embedded the struct drm_xe_user_extension. */ __u32 name; @@ -625,7 +625,7 @@ struct drm_xe_gem_mmap_offset { /** struct drm_xe_ext_set_property - XE set property extension */ struct drm_xe_ext_set_property { /** @base: base user extension */ - struct xe_user_extension base; + struct drm_xe_user_extension base; /** @property: property to set */ __u32 property; -- cgit v1.2.3 From d3d767396a02fa225eab7f919b727cff4e3304bc Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 15 Dec 2023 15:45:39 +0000 Subject: drm/xe/uapi: Remove sync binds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove concept of async vs sync VM bind queues, rather make all binds async. The following bits have dropped from the uAPI: DRM_XE_ENGINE_CLASS_VM_BIND_ASYNC DRM_XE_ENGINE_CLASS_VM_BIND_SYNC DRM_XE_VM_CREATE_FLAG_ASYNC_DEFAULT DRM_XE_VM_BIND_FLAG_ASYNC To implement sync binds the UMD is expected to use the out-fence interface. v2: Send correct version v3: Drop drm_xe_syncs Cc: Rodrigo Vivi Cc: Thomas Hellström Cc: Francois Dugast Signed-off-by: Matthew Brost Reviewed-by: Thomas Hellström Acked-by: José Roberto de Souza Acked-by: Mateusz Naklicki Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_exec_queue.c | 7 +-- drivers/gpu/drm/xe/xe_exec_queue_types.h | 2 - drivers/gpu/drm/xe/xe_vm.c | 75 ++++---------------------------- drivers/gpu/drm/xe/xe_vm_types.h | 13 +++--- include/uapi/drm/xe_drm.h | 11 ++--- 5 files changed, 20 insertions(+), 88 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index aa478c66edbb..44fe8097b7cd 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -625,10 +625,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, eci[0].gt_id >= xe->info.gt_count)) return -EINVAL; - if (eci[0].engine_class >= DRM_XE_ENGINE_CLASS_VM_BIND_ASYNC) { - bool sync = eci[0].engine_class == - DRM_XE_ENGINE_CLASS_VM_BIND_SYNC; - + if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) { for_each_gt(gt, xe, id) { struct xe_exec_queue *new; @@ -654,8 +651,6 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, args->width, hwe, EXEC_QUEUE_FLAG_PERSISTENT | EXEC_QUEUE_FLAG_VM | - (sync ? 0 : - EXEC_QUEUE_FLAG_VM_ASYNC) | (id ? EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD : 0)); diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index bcf08b00d94a..3d7e704ec3d9 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -84,8 +84,6 @@ struct xe_exec_queue { #define EXEC_QUEUE_FLAG_VM BIT(4) /* child of VM queue for multi-tile VM jobs */ #define EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD BIT(5) -/* VM jobs for this queue are asynchronous */ -#define EXEC_QUEUE_FLAG_VM_ASYNC BIT(6) /** * @flags: flags for this exec queue, should statically setup aside from ban diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 2f3df9ee67c9..322c1eccecca 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1343,9 +1343,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) struct xe_gt *gt = tile->primary_gt; struct xe_vm *migrate_vm; struct xe_exec_queue *q; - u32 create_flags = EXEC_QUEUE_FLAG_VM | - ((flags & XE_VM_FLAG_ASYNC_DEFAULT) ? - EXEC_QUEUE_FLAG_VM_ASYNC : 0); + u32 create_flags = EXEC_QUEUE_FLAG_VM; if (!vm->pt_root[id]) continue; @@ -1712,12 +1710,6 @@ err_fences: return ERR_PTR(err); } -static bool xe_vm_sync_mode(struct xe_vm *vm, struct xe_exec_queue *q) -{ - return q ? !(q->flags & EXEC_QUEUE_FLAG_VM_ASYNC) : - !(vm->flags & XE_VM_FLAG_ASYNC_DEFAULT); -} - static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q, struct xe_sync_entry *syncs, u32 num_syncs, bool immediate, bool first_op, @@ -1747,8 +1739,6 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, if (last_op) xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence); - if (last_op && xe_vm_sync_mode(vm, q)) - dma_fence_wait(fence, true); dma_fence_put(fence); return 0; @@ -1791,8 +1781,6 @@ static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma, xe_vma_destroy(vma, fence); if (last_op) xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence); - if (last_op && xe_vm_sync_mode(vm, q)) - dma_fence_wait(fence, true); dma_fence_put(fence); return 0; @@ -1800,7 +1788,6 @@ static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma, #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \ DRM_XE_VM_CREATE_FLAG_LR_MODE | \ - DRM_XE_VM_CREATE_FLAG_ASYNC_DEFAULT | \ DRM_XE_VM_CREATE_FLAG_FAULT_MODE) int xe_vm_create_ioctl(struct drm_device *dev, void *data, @@ -1854,8 +1841,6 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, flags |= XE_VM_FLAG_SCRATCH_PAGE; if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) flags |= XE_VM_FLAG_LR_MODE; - if (args->flags & DRM_XE_VM_CREATE_FLAG_ASYNC_DEFAULT) - flags |= XE_VM_FLAG_ASYNC_DEFAULT; if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) flags |= XE_VM_FLAG_FAULT_MODE; @@ -2263,8 +2248,7 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, struct drm_gpuva_ops *ops, struct xe_sync_entry *syncs, u32 num_syncs, - struct list_head *ops_list, bool last, - bool async) + struct list_head *ops_list, bool last) { struct xe_vma_op *last_op = NULL; struct drm_gpuva_op *__op; @@ -2696,23 +2680,22 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, #ifdef TEST_VM_ASYNC_OPS_ERROR #define SUPPORTED_FLAGS \ - (FORCE_ASYNC_OP_ERROR | DRM_XE_VM_BIND_FLAG_ASYNC | \ - DRM_XE_VM_BIND_FLAG_READONLY | DRM_XE_VM_BIND_FLAG_IMMEDIATE | \ - DRM_XE_VM_BIND_FLAG_NULL | 0xffff) + (FORCE_ASYNC_OP_ERROR | DRM_XE_VM_BIND_FLAG_READONLY | \ + DRM_XE_VM_BIND_FLAG_IMMEDIATE | DRM_XE_VM_BIND_FLAG_NULL | 0xffff) #else #define SUPPORTED_FLAGS \ - (DRM_XE_VM_BIND_FLAG_ASYNC | DRM_XE_VM_BIND_FLAG_READONLY | \ + (DRM_XE_VM_BIND_FLAG_READONLY | \ DRM_XE_VM_BIND_FLAG_IMMEDIATE | DRM_XE_VM_BIND_FLAG_NULL | \ 0xffff) #endif #define XE_64K_PAGE_MASK 0xffffull +#define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) #define MAX_BINDS 512 /* FIXME: Picking random upper limit */ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct drm_xe_vm_bind *args, - struct drm_xe_vm_bind_op **bind_ops, - bool *async) + struct drm_xe_vm_bind_op **bind_ops) { int err; int i; @@ -2775,18 +2758,6 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, goto free_bind_ops; } - if (i == 0) { - *async = !!(flags & DRM_XE_VM_BIND_FLAG_ASYNC); - if (XE_IOCTL_DBG(xe, !*async && args->num_syncs)) { - err = -EINVAL; - goto free_bind_ops; - } - } else if (XE_IOCTL_DBG(xe, *async != - !!(flags & DRM_XE_VM_BIND_FLAG_ASYNC))) { - err = -EINVAL; - goto free_bind_ops; - } - if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) || XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) || XE_IOCTL_DBG(xe, obj && is_null) || @@ -2854,14 +2825,6 @@ static int vm_bind_ioctl_signal_fences(struct xe_vm *vm, xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm, fence); - - if (xe_vm_sync_mode(vm, q)) { - long timeout = dma_fence_wait(fence, true); - - if (timeout < 0) - err = -EINTR; - } - dma_fence_put(fence); return err; @@ -2881,18 +2844,13 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) struct xe_sync_entry *syncs = NULL; struct drm_xe_vm_bind_op *bind_ops; LIST_HEAD(ops_list); - bool async; int err; int i; - err = vm_bind_ioctl_check_args(xe, args, &bind_ops, &async); + err = vm_bind_ioctl_check_args(xe, args, &bind_ops); if (err) return err; - if (XE_IOCTL_DBG(xe, args->pad || args->pad2) || - XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) - return -EINVAL; - if (args->exec_queue_id) { q = xe_exec_queue_lookup(xef, args->exec_queue_id); if (XE_IOCTL_DBG(xe, !q)) { @@ -2904,12 +2862,6 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) err = -EINVAL; goto put_exec_queue; } - - if (XE_IOCTL_DBG(xe, args->num_binds && async != - !!(q->flags & EXEC_QUEUE_FLAG_VM_ASYNC))) { - err = -EINVAL; - goto put_exec_queue; - } } vm = xe_vm_lookup(xef, args->vm_id); @@ -2918,14 +2870,6 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) goto put_exec_queue; } - if (!args->exec_queue_id) { - if (XE_IOCTL_DBG(xe, args->num_binds && async != - !!(vm->flags & XE_VM_FLAG_ASYNC_DEFAULT))) { - err = -EINVAL; - goto put_vm; - } - } - err = down_write_killable(&vm->lock); if (err) goto put_vm; @@ -3060,8 +3004,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) err = vm_bind_ioctl_ops_parse(vm, q, ops[i], syncs, num_syncs, &ops_list, - i == args->num_binds - 1, - async); + i == args->num_binds - 1); if (err) goto unwind_ops; } diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 2e023596cb15..63e8a50b88e9 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -138,13 +138,12 @@ struct xe_vm { */ #define XE_VM_FLAG_64K BIT(0) #define XE_VM_FLAG_LR_MODE BIT(1) -#define XE_VM_FLAG_ASYNC_DEFAULT BIT(2) -#define XE_VM_FLAG_MIGRATION BIT(3) -#define XE_VM_FLAG_SCRATCH_PAGE BIT(4) -#define XE_VM_FLAG_FAULT_MODE BIT(5) -#define XE_VM_FLAG_BANNED BIT(6) -#define XE_VM_FLAG_TILE_ID(flags) FIELD_GET(GENMASK(8, 7), flags) -#define XE_VM_FLAG_SET_TILE_ID(tile) FIELD_PREP(GENMASK(8, 7), (tile)->id) +#define XE_VM_FLAG_MIGRATION BIT(2) +#define XE_VM_FLAG_SCRATCH_PAGE BIT(3) +#define XE_VM_FLAG_FAULT_MODE BIT(4) +#define XE_VM_FLAG_BANNED BIT(5) +#define XE_VM_FLAG_TILE_ID(flags) FIELD_GET(GENMASK(7, 6), flags) +#define XE_VM_FLAG_SET_TILE_ID(tile) FIELD_PREP(GENMASK(7, 6), (tile)->id) unsigned long flags; /** @composite_fence_ctx: context composite fence */ diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 87ff6eaa788e..2338d87dcb7d 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -139,8 +139,7 @@ struct drm_xe_engine_class_instance { * Kernel only classes (not actual hardware engine class). Used for * creating ordered queues of VM bind operations. */ -#define DRM_XE_ENGINE_CLASS_VM_BIND_ASYNC 5 -#define DRM_XE_ENGINE_CLASS_VM_BIND_SYNC 6 +#define DRM_XE_ENGINE_CLASS_VM_BIND 5 /** @engine_class: engine class id */ __u16 engine_class; /** @engine_instance: engine instance id */ @@ -660,7 +659,6 @@ struct drm_xe_vm_create { * still enable recoverable pagefaults if supported by the device. */ #define DRM_XE_VM_CREATE_FLAG_LR_MODE (1 << 1) -#define DRM_XE_VM_CREATE_FLAG_ASYNC_DEFAULT (1 << 2) /* * DRM_XE_VM_CREATE_FLAG_FAULT_MODE requires also * DRM_XE_VM_CREATE_FLAG_LR_MODE. It allows memory to be allocated @@ -668,7 +666,7 @@ struct drm_xe_vm_create { * The xe driver internally uses recoverable pagefaults to implement * this. */ -#define DRM_XE_VM_CREATE_FLAG_FAULT_MODE (1 << 3) +#define DRM_XE_VM_CREATE_FLAG_FAULT_MODE (1 << 2) /** @flags: Flags */ __u32 flags; @@ -776,12 +774,11 @@ struct drm_xe_vm_bind_op { __u32 op; #define DRM_XE_VM_BIND_FLAG_READONLY (1 << 0) -#define DRM_XE_VM_BIND_FLAG_ASYNC (1 << 1) /* * Valid on a faulting VM only, do the MAP operation immediately rather * than deferring the MAP to the page fault handler. */ -#define DRM_XE_VM_BIND_FLAG_IMMEDIATE (1 << 2) +#define DRM_XE_VM_BIND_FLAG_IMMEDIATE (1 << 1) /* * When the NULL flag is set, the page tables are setup with a special * bit which indicates writes are dropped and all reads return zero. In @@ -789,7 +786,7 @@ struct drm_xe_vm_bind_op { * operations, the BO handle MBZ, and the BO offset MBZ. This flag is * intended to implement VK sparse bindings. */ -#define DRM_XE_VM_BIND_FLAG_NULL (1 << 3) +#define DRM_XE_VM_BIND_FLAG_NULL (1 << 2) /** @flags: Bind flags */ __u32 flags; -- cgit v1.2.3 From 535881a8c50b79085327e7dbe26a4c55f3e1591b Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 15 Dec 2023 15:45:48 +0000 Subject: drm/xe/uapi: Document the memory_region bitmask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The uAPI should stay generic in regarding to the bitmask. It is the userspace responsibility to check for the type/class of the memory, without any assumption. Also add comments inside the code to explain how it is actually constructed so we don't accidentally change the assignment of the instance and the masks. No functional change in this patch. It only explains and document the memory_region masks. A further follow-up work with the organization of all memory regions around struct xe_mem_regions is desired, but not part of this patch. Signed-off-by: Rodrigo Vivi Reviewed-by: Lucas De Marchi Acked-by: José Roberto de Souza Acked-by: Mateusz Naklicki Signed-off-by: Francois Dugast --- drivers/gpu/drm/xe/xe_query.c | 19 +++++++++++++++++++ include/uapi/drm/xe_drm.h | 23 ++++++++++++++++++----- 2 files changed, 37 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 56d61bf596b2..9b35673b286c 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -266,6 +266,11 @@ static int query_mem_regions(struct xe_device *xe, man = ttm_manager_type(&xe->ttm, XE_PL_TT); mem_regions->mem_regions[0].mem_class = DRM_XE_MEM_REGION_CLASS_SYSMEM; + /* + * The instance needs to be a unique number that represents the index + * in the placement mask used at xe_gem_create_ioctl() for the + * xe_bo_create() placement. + */ mem_regions->mem_regions[0].instance = 0; mem_regions->mem_regions[0].min_page_size = PAGE_SIZE; mem_regions->mem_regions[0].total_size = man->size << PAGE_SHIFT; @@ -381,6 +386,20 @@ static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query gt_list->gt_list[id].tile_id = gt_to_tile(gt)->id; gt_list->gt_list[id].gt_id = gt->info.id; gt_list->gt_list[id].reference_clock = gt->info.reference_clock; + /* + * The mem_regions indexes in the mask below need to + * directly identify the struct + * drm_xe_query_mem_regions' instance constructed at + * query_mem_regions() + * + * For our current platforms: + * Bit 0 -> System Memory + * Bit 1 -> VRAM0 on Tile0 + * Bit 2 -> VRAM1 on Tile1 + * However the uAPI is generic and it's userspace's + * responsibility to check the mem_class, without any + * assumption. + */ if (!IS_DGFX(xe)) gt_list->gt_list[id].near_mem_regions = 0x1; else diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 5a01d033b780..6c719ba8fc8e 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -256,10 +256,9 @@ struct drm_xe_mem_region { */ __u16 mem_class; /** - * @instance: The instance for this region. - * - * The @mem_class and @instance taken together will always give - * a unique pair. + * @instance: The unique ID for this region, which serves as the + * index in the placement bitmask used as argument for + * &DRM_IOCTL_XE_GEM_CREATE */ __u16 instance; /** @@ -404,6 +403,10 @@ struct drm_xe_gt { * @near_mem_regions: Bit mask of instances from * drm_xe_query_mem_regions that are nearest to the current engines * of this GT. + * Each index in this mask refers directly to the struct + * drm_xe_query_mem_regions' instance, no assumptions should + * be made about order. The type of each region is described + * by struct drm_xe_query_mem_regions' mem_class. */ __u64 near_mem_regions; /** @@ -412,6 +415,10 @@ struct drm_xe_gt { * In general, they have extra indirections when compared to the * @near_mem_regions. For a discrete device this could mean system * memory and memory living in a different tile. + * Each index in this mask refers directly to the struct + * drm_xe_query_mem_regions' instance, no assumptions should + * be made about order. The type of each region is described + * by struct drm_xe_query_mem_regions' mem_class. */ __u64 far_mem_regions; /** @reserved: Reserved */ @@ -652,7 +659,13 @@ struct drm_xe_gem_create { */ __u64 size; - /** @placement: A mask of memory instances of where BO can be placed. */ + /** + * @placement: A mask of memory instances of where BO can be placed. + * Each index in this mask refers directly to the struct + * drm_xe_query_mem_regions' instance, no assumptions should + * be made about order. The type of each region is described + * by struct drm_xe_query_mem_regions' mem_class. + */ __u32 placement; #define DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING (1 << 0) -- cgit v1.2.3 From 77a0d4d1cea2140ef56929ab1cfa5e525772c90e Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 15 Dec 2023 15:45:53 +0000 Subject: drm/xe/uapi: Remove reset uevent for now MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This kernel uevent is getting removed for now. It will come back later with a better future proof name. v2: Rebase (Francois Dugast) Cc: Himal Prasad Ghimiray Cc: Lucas De Marchi Cc: Francois Dugast Cc: Aravind Iddamsetty Signed-off-by: Rodrigo Vivi Reviewed-by: Himal Prasad Ghimiray Acked-by: Lucas De Marchi Acked-by: José Roberto de Souza Acked-by: Mateusz Naklicki Signed-off-by: Francois Dugast --- drivers/gpu/drm/xe/xe_gt.c | 18 ------------------ include/uapi/drm/xe_drm.h | 11 ----------- 2 files changed, 29 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index f5d18e98f8b6..3af2adec1295 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -589,20 +589,6 @@ static int do_gt_restart(struct xe_gt *gt) return 0; } -static void xe_uevent_gt_reset_failure(struct pci_dev *pdev, u8 tile_id, u8 gt_id) -{ - char *reset_event[4]; - - reset_event[0] = DRM_XE_RESET_FAILED_UEVENT "=NEEDS_RESET"; - reset_event[1] = kasprintf(GFP_KERNEL, "TILE_ID=%d", tile_id); - reset_event[2] = kasprintf(GFP_KERNEL, "GT_ID=%d", gt_id); - reset_event[3] = NULL; - kobject_uevent_env(&pdev->dev.kobj, KOBJ_CHANGE, reset_event); - - kfree(reset_event[1]); - kfree(reset_event[2]); -} - static int gt_reset(struct xe_gt *gt) { int err; @@ -659,10 +645,6 @@ err_msg: err_fail: xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err)); - /* Notify userspace about gt reset failure */ - xe_uevent_gt_reset_failure(to_pci_dev(gt_to_xe(gt)->drm.dev), - gt_to_tile(gt)->id, gt->info.id); - gt_to_xe(gt)->needs_flr_on_fini = true; return err; diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 919aa72c4481..9fa3ae324731 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -20,7 +20,6 @@ extern "C" { * 2. Extension definition and helper structs * 3. IOCTL's Query structs in the order of the Query's entries. * 4. The rest of IOCTL structs in the order of IOCTL declaration. - * 5. uEvents */ /** @@ -1341,16 +1340,6 @@ struct drm_xe_wait_user_fence { __u64 reserved[2]; }; -/** - * DOC: uevent generated by xe on it's pci node. - * - * DRM_XE_RESET_FAILED_UEVENT - Event is generated when attempt to reset gt - * fails. The value supplied with the event is always "NEEDS_RESET". - * Additional information supplied is tile id and gt id of the gt unit for - * which reset has failed. - */ -#define DRM_XE_RESET_FAILED_UEVENT "DEVICE_STATUS" - #if defined(__cplusplus) } #endif -- cgit v1.2.3 From e157f0f76258f11920fd5859a8ac1473a8ce5340 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Mon, 18 Dec 2023 08:47:02 -0500 Subject: drm/xe: Fix build without CONFIG_FAULT_INJECTION MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ideally this header could be included without the CONFIG_FAULT_INJECTION and it would take care itself for the includes it needs. So, let's temporary workaround this by moving this below and including only when CONFIG_FAULT_INJECTION is selected to avoid build breakages. Another solution would be us including the linux/types.h as well, but this creates unnecessary cases. Reference: https://lore.kernel.org/all/20230816134748.979231-1-himal.prasad.ghimiray@intel.com/ Cc: Himal Prasad Ghimiray Cc: Oded Gabbay Cc: Thomas Hellström Cc: Lucas De Marchi Signed-off-by: Rodrigo Vivi Reviewed-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_debugfs.c | 2 +- drivers/gpu/drm/xe/xe_gt.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c index f1e80be8b930..c56fd7d59f05 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.c +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -5,7 +5,6 @@ #include "xe_debugfs.h" -#include #include #include @@ -22,6 +21,7 @@ #endif #ifdef CONFIG_FAULT_INJECTION +#include /* XXX: fault-inject.h is broken */ DECLARE_FAULT_ATTR(gt_reset_failure); #endif diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h index f3c780bd266d..4486e083f5ef 100644 --- a/drivers/gpu/drm/xe/xe_gt.h +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -7,7 +7,6 @@ #define _XE_GT_H_ #include -#include #include "xe_device_types.h" #include "xe_hw_engine.h" @@ -20,6 +19,7 @@ #define CCS_MASK(gt) (((gt)->info.engine_mask & XE_HW_ENGINE_CCS_MASK) >> XE_HW_ENGINE_CCS0) #ifdef CONFIG_FAULT_INJECTION +#include /* XXX: fault-inject.h is broken */ extern struct fault_attr gt_reset_failure; static inline bool xe_fault_inject_gt_reset(void) { -- cgit v1.2.3 From de991b9af0532a05d5206c065bf343d6a767a9d2 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Tue, 19 Dec 2023 12:00:20 -0800 Subject: drm/xe: Remove ci-only GuC FW definitions As part of the FW definitions, we declare each blob as required via the MODULE_FIRMWARE() macro. This causes the initramfs update (or equivalent process) to look for the blobs on disk when the kernel is installed; therefore, we need to make sure that all FWs we define are available in linux-firmware. We currently don't plan to push the PVC blob to linux-firmware, while the LNL one will only be pushed once we have machines in CI to test it, so we need to remove them from the list for now. Signed-off-by: Daniele Ceraolo Spurio Cc: Rodrigo Vivi Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_uc_fw.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 73d6938c921d..9dff96dfe455 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -102,9 +102,7 @@ struct fw_blobs_by_type { }; #define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver) \ - fw_def(LUNARLAKE, mmp_ver(xe, guc, lnl, 70, 6, 8)) \ fw_def(METEORLAKE, major_ver(i915, guc, mtl, 70, 7)) \ - fw_def(PVC, mmp_ver(xe, guc, pvc, 70, 9, 1)) \ fw_def(DG2, major_ver(i915, guc, dg2, 70, 5)) \ fw_def(DG1, major_ver(i915, guc, dg1, 70, 5)) \ fw_def(ALDERLAKE_N, major_ver(i915, guc, tgl, 70, 5)) \ -- cgit v1.2.3 From b6e1b708176846248c87318786d22465ac96dd2c Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 20 Dec 2023 08:19:23 -0800 Subject: drm/xe: Remove uninitialized variable from warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit "err" is not initialized when failing to create and add the freq0 sysfs file. Remove it from the message. This fixes the following warning with clang: ../drivers/gpu/drm/xe/xe_gt_freq.c:202:30: error: variable 'err' is uninitialized when used here [-Werror,-Wuninitialized] kobject_name(gt->sysfs), err); ^~~ Fixes: bef52b5c7a19 ("drm/xe: Create a xe_gt_freq component for raw management and sysfs") Reviewed-by: Michał Winiarski Link: https://lore.kernel.org/r/20231220161923.3740489-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_freq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c index 08eabcafe7bc..3adfa6686e7c 100644 --- a/drivers/gpu/drm/xe/xe_gt_freq.c +++ b/drivers/gpu/drm/xe/xe_gt_freq.c @@ -198,8 +198,8 @@ void xe_gt_freq_init(struct xe_gt *gt) gt->freq = kobject_create_and_add("freq0", gt->sysfs); if (!gt->freq) { - drm_warn(&xe->drm, "failed to add freq0 directory to %s, err: %d\n", - kobject_name(gt->sysfs), err); + drm_warn(&xe->drm, "failed to add freq0 directory to %s\n", + kobject_name(gt->sysfs)); return; } -- cgit v1.2.3 From ea97a66a221893fb9b4d96688e759d1db2d6e683 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 21 Dec 2023 14:28:04 -0800 Subject: drm/xe: Disable 32bits build Add a dependency on CONFIG_64BIT since currently the xe driver doesn't build on 32bits. It may be enabled again after all the issues are fixed. Signed-off-by: Lucas De Marchi Reviewed-by: Rodrigo Vivi Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20231221222809.4123220-2-lucas.demarchi@intel.com --- drivers/gpu/drm/xe/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index 90c676d14c50..1cced50d8d8c 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only config DRM_XE tristate "Intel Xe Graphics" - depends on DRM && PCI && MMU && (m || (y && KUNIT=y)) + depends on DRM && PCI && MMU && (m || (y && KUNIT=y)) && 64BIT select INTERVAL_TREE # we need shmfs for the swappable backing store, and in particular # the shmem_readpage() which depends upon tmpfs -- cgit v1.2.3 From cb4daf271302d71a6b9a7c01bd0b6d76febd8f0c Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 11 Dec 2023 10:16:24 +0200 Subject: drm: Don't unref the same fb many times by mistake due to deadlock handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we get a deadlock after the fb lookup in drm_mode_page_flip_ioctl() we proceed to unref the fb and then retry the whole thing from the top. But we forget to reset the fb pointer back to NULL, and so if we then get another error during the retry, before the fb lookup, we proceed the unref the same fb again without having gotten another reference. The end result is that the fb will (eventually) end up being freed while it's still in use. Reset fb to NULL once we've unreffed it to avoid doing it again until we've done another fb lookup. This turned out to be pretty easy to hit on a DG2 when doing async flips (and CONFIG_DEBUG_WW_MUTEX_SLOWPATH=y). The first symptom I saw that drm_closefb() simply got stuck in a busy loop while walking the framebuffer list. Fortunately I was able to convince it to oops instead, and from there it was easier to track down the culprit. Cc: stable@vger.kernel.org Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231211081625.25704-1-ville.syrjala@linux.intel.com Acked-by: Javier Martinez Canillas --- drivers/gpu/drm/drm_plane.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c index 9e8e4c60983d..672c655c7a8e 100644 --- a/drivers/gpu/drm/drm_plane.c +++ b/drivers/gpu/drm/drm_plane.c @@ -1503,6 +1503,7 @@ retry: out: if (fb) drm_framebuffer_put(fb); + fb = NULL; if (plane->old_fb) drm_framebuffer_put(plane->old_fb); plane->old_fb = NULL; -- cgit v1.2.3 From 971740a4c3ac2692a8adb958d5f810c47f07e9b5 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 11 Dec 2023 10:16:25 +0200 Subject: drm: Warn when freeing a framebuffer that's still on a list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sprinkle some extra WARNs around so that we might catch premature framebuffer destruction more readily. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231211081625.25704-2-ville.syrjala@linux.intel.com Acked-by: Javier Martinez Canillas --- drivers/gpu/drm/drm_framebuffer.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/drm_framebuffer.c b/drivers/gpu/drm/drm_framebuffer.c index 3cc0ffc28e86..888aadb6a4ac 100644 --- a/drivers/gpu/drm/drm_framebuffer.c +++ b/drivers/gpu/drm/drm_framebuffer.c @@ -461,6 +461,7 @@ int drm_mode_rmfb(struct drm_device *dev, u32 fb_id, INIT_WORK_ONSTACK(&arg.work, drm_mode_rmfb_work_fn); INIT_LIST_HEAD(&arg.fbs); + drm_WARN_ON(dev, !list_empty(&fb->filp_head)); list_add_tail(&fb->filp_head, &arg.fbs); schedule_work(&arg.work); @@ -827,6 +828,8 @@ void drm_framebuffer_free(struct kref *kref) container_of(kref, struct drm_framebuffer, base.refcount); struct drm_device *dev = fb->dev; + drm_WARN_ON(dev, !list_empty(&fb->filp_head)); + /* * The lookup idr holds a weak reference, which has not necessarily been * removed at this point. Check for that. @@ -1119,7 +1122,7 @@ void drm_framebuffer_remove(struct drm_framebuffer *fb) dev = fb->dev; - WARN_ON(!list_empty(&fb->filp_head)); + drm_WARN_ON(dev, !list_empty(&fb->filp_head)); /* * drm ABI mandates that we remove any deleted framebuffers from active -- cgit v1.2.3 From 3ee348eb36f14e9303a7e9757efb91b0bbf3f7a9 Mon Sep 17 00:00:00 2001 From: Andy Yan Date: Sun, 17 Dec 2023 16:44:15 +0800 Subject: drm/rockchip: vop2: Avoid use regmap_reinit_cache at runtime Marek Report a possible irq lock inversion dependency warning when commit 81a06f1d02e5 ("Revert "drm/rockchip: vop2: Use regcache_sync() to fix suspend/resume"") lands linux-next. I can reproduce this warning with: CONFIG_PROVE_LOCKING=y CONFIG_DEBUG_LOCKDEP=y It seems than when use regmap_reinit_cache at runtime whith Mark's commit 3d59c22bbb8d ("drm/rockchip: vop2: Convert to use maple tree register cache"), it will trigger a possible irq lock inversion dependency warning. One solution is switch back to REGCACHE_RBTREE, but it seems that REGCACHE_MAPLE is the future, so I avoid using regmap_reinit_cache, and drop all the regcache when vop is disabled, then we get a fresh start at next enbable time. Fixes: 81a06f1d02e5 ("Revert "drm/rockchip: vop2: Use regcache_sync() to fix suspend/resume"") Reported-by: Marek Szyprowski Closes: https://lore.kernel.org/all/98a9f15d-30ac-47bf-9b93-3aa2c9900f7b@samsung.com/ Signed-off-by: Andy Yan Tested-by: Marek Szyprowski [dropped the large kernel log of the lockdep report from the message] Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/20231217084415.2373043-1-andyshrk@163.com --- drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c index 44508c2dd614..760e11c1be4a 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c @@ -959,12 +959,6 @@ static void vop2_enable(struct vop2 *vop2) return; } - ret = regmap_reinit_cache(vop2->map, &vop2_regmap_config); - if (ret) { - drm_err(vop2->drm, "failed to reinit cache: %d\n", ret); - return; - } - if (vop2->data->soc_id == 3566) vop2_writel(vop2, RK3568_OTP_WIN_EN, 1); @@ -996,6 +990,8 @@ static void vop2_disable(struct vop2 *vop2) pm_runtime_put_sync(vop2->dev); + regcache_drop_region(vop2->map, 0, vop2_regmap_config.max_register); + clk_disable_unprepare(vop2->pclk); clk_disable_unprepare(vop2->aclk); clk_disable_unprepare(vop2->hclk); -- cgit v1.2.3 From f40e61eb538d35661d6dda1de92867954d776c4a Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Tue, 19 Dec 2023 14:26:35 +0800 Subject: drm/rockchip: vop2: clean up some inconsistent indenting No functional modification involved. drivers/gpu/drm/rockchip/rockchip_drm_vop2.c:1708 rk3588_calc_cru_cfg() warn: inconsistent indenting. Reported-by: Abaci Robot Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=7778 Signed-off-by: Jiapeng Chong Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/20231219062635.100718-1-jiapeng.chong@linux.alibaba.com --- drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c index 760e11c1be4a..574103fc79f9 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c @@ -1701,8 +1701,8 @@ static unsigned long rk3588_calc_cru_cfg(struct vop2_video_port *vp, int id, * *if_pixclk_div = dclk_rate / if_pixclk_rate; * *if_dclk_div = dclk_rate / if_dclk_rate; */ - *if_pixclk_div = 2; - *if_dclk_div = 4; + *if_pixclk_div = 2; + *if_dclk_div = 4; } else if (vop2_output_if_is_edp(id)) { /* * edp_pixclk = edp_dclk > dclk_core -- cgit v1.2.3 From 75cbe49f9e2f71a73fed0b677d8d7ff1ffbeaa45 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 15 Dec 2023 15:02:03 -0800 Subject: drm/xe: Fix UBSAN splat in add_preempt_fences() add_preempt_fences() calls dma_resv_reserve_fences() with num_fences == 0 resulting in the below UBSAN splat. Short circuit add_preempt_fences() if num_fences == 0. [ 58.652241] ================================================================================ [ 58.660736] UBSAN: shift-out-of-bounds in ./include/linux/log2.h:57:13 [ 58.667281] shift exponent 64 is too large for 64-bit type 'long unsigned int' [ 58.674539] CPU: 2 PID: 1170 Comm: xe_gpgpu_fill Not tainted 6.6.0-rc3-guc+ #630 [ 58.674545] Hardware name: Intel Corporation Tiger Lake Client Platform/TigerLake U DDR4 SODIMM RVP, BIOS TGLSFWI1.R00.3243.A01.2006102133 06/10/2020 [ 58.674547] Call Trace: [ 58.674548] [ 58.674550] dump_stack_lvl+0x92/0xb0 [ 58.674555] __ubsan_handle_shift_out_of_bounds+0x15a/0x300 [ 58.674559] ? rcu_is_watching+0x12/0x60 [ 58.674564] ? software_resume+0x141/0x210 [ 58.674575] ? new_vma+0x44b/0x600 [xe] [ 58.674606] dma_resv_reserve_fences.cold+0x40/0x66 [ 58.674612] new_vma+0x4b3/0x600 [xe] [ 58.674638] xe_vm_bind_ioctl+0xffd/0x1e00 [xe] [ 58.674663] ? __pfx_xe_vm_bind_ioctl+0x10/0x10 [xe] [ 58.674680] drm_ioctl_kernel+0xc1/0x170 [ 58.674686] ? __pfx_xe_vm_bind_ioctl+0x10/0x10 [xe] [ 58.674703] drm_ioctl+0x247/0x4c0 [ 58.674709] ? find_held_lock+0x2b/0x80 [ 58.674716] __x64_sys_ioctl+0x8c/0xb0 [ 58.674720] do_syscall_64+0x3c/0x90 [ 58.674723] entry_SYSCALL_64_after_hwframe+0x6e/0xd8 [ 58.674727] RIP: 0033:0x7fce4bd1aaff [ 58.674730] Code: 00 48 89 44 24 18 31 c0 48 8d 44 24 60 c7 04 24 10 00 00 00 48 89 44 24 08 48 8d 44 24 20 48 89 44 24 10 b8 10 00 00 00 0f 05 <41> 89 c0 3d 00 f0 ff ff 77 1f 48 8b 44 24 18 64 48 2b 04 25 28 00 [ 58.674731] RSP: 002b:00007ffc57434050 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [ 58.674734] RAX: ffffffffffffffda RBX: 00007ffc574340e0 RCX: 00007fce4bd1aaff [ 58.674736] RDX: 00007ffc574340e0 RSI: 0000000040886445 RDI: 0000000000000003 [ 58.674737] RBP: 0000000040886445 R08: 0000000000000002 R09: 00007ffc574341b0 [ 58.674739] R10: 000055de43eb3780 R11: 0000000000000246 R12: 00007ffc574340e0 [ 58.674740] R13: 0000000000000003 R14: 00007ffc574341b0 R15: 0000000000000001 [ 58.674747] [ 58.674748] ================================================================================ Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: Lucas De Marchi Signed-off-by: Matthew Brost Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20231215230203.719244-1-matthew.brost@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 9180f2d2d71d..0cfe7289b97e 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -279,6 +279,9 @@ static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo) struct xe_exec_queue *q; int err; + if (!vm->preempt.num_exec_queues) + return 0; + err = xe_bo_lock(bo, true); if (err) return err; -- cgit v1.2.3 From 315acff5196f4e2f84a2a2d093000e0c6b0b4d1c Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 18 Dec 2023 08:33:01 -0800 Subject: drm/xe: Fix warning on impossible condition Having a different value for op is not possible: this is already kept out of user-visible warning by the check in xe_wait_user_fence_ioctl() if op > MAX_OP. The warning is useful as if this switch() is not update when a new op is added, it should be triggered. Fix warning as reported by 0-DAY CI Kernel: drivers/gpu/drm/xe/xe_wait_user_fence.c:46:2: warning: variable 'passed' is used uninitialized whenever switch default is taken [-Wsometimes-uninitialized] Closes: https://lore.kernel.org/oe-kbuild-all/202312170357.KPSinwPs-lkp@intel.com/ Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20231218163301.3453285-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_wait_user_fence.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c index b0a7896f7fcb..a75eeba7bfe5 100644 --- a/drivers/gpu/drm/xe/xe_wait_user_fence.c +++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c @@ -46,6 +46,7 @@ static int do_compare(u64 addr, u64 value, u64 mask, u16 op) break; default: XE_WARN_ON("Not possible"); + return -EINVAL; } return passed ? 0 : 1; -- cgit v1.2.3 From 2bd7a06a1208aaacb4e7a2a5436c23bce8d70801 Mon Sep 17 00:00:00 2001 From: Khaled Almahallawy Date: Wed, 13 Dec 2023 13:15:42 -0800 Subject: drm/i915/dp: Fix passing the correct DPCD_REV for drm_dp_set_phy_test_pattern Using link_status to get DPCD_REV fails when disabling/defaulting phy pattern. Use intel_dp->dpcd to access DPCD_REV correctly. Fixes: 8cdf72711928 ("drm/i915/dp: Program vswing, pre-emphasis, test-pattern") Cc: Jani Nikula Cc: Imre Deak Cc: Lee Shawn C Signed-off-by: Khaled Almahallawy Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20231213211542.3585105-3-khaled.almahallawy@intel.com (cherry picked from commit 3ee302ec22d6e1d7d1e6d381b0d507ee80f2135c) --- drivers/gpu/drm/i915/display/intel_dp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index b21bcd40f111..62ce92772367 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -4496,7 +4496,7 @@ static void intel_dp_process_phy_request(struct intel_dp *intel_dp, intel_dp->train_set, crtc_state->lane_count); drm_dp_set_phy_test_pattern(&intel_dp->aux, data, - link_status[DP_DPCD_REV]); + intel_dp->dpcd[DP_DPCD_REV]); } static u8 intel_dp_autotest_phy_pattern(struct intel_dp *intel_dp) -- cgit v1.2.3 From ee11d2d37f5c05bd7bf5ccc820a58f48423d032b Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Mon, 18 Dec 2023 16:05:43 -0800 Subject: drm/i915/perf: Update handling of MMIO triggered reports On XEHP platforms user is not able to find MMIO triggered reports in the OA buffer since i915 squashes the context ID fields. These context ID fields hold the MMIO trigger markers. Update logic to not squash the context ID fields of MMIO triggered reports. Fixes: cba94bbcff08 ("drm/i915/perf: Determine context valid in OA reports") Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20231219000543.1087706-1-umesh.nerlige.ramappa@intel.com (cherry picked from commit 0c68132df6e66244acec1bb5b9e19b0751414389) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_perf.c | 39 ++++++++++++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 7b1c8de2f9cb..2d695818f006 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -772,10 +772,6 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * The reason field includes flags identifying what * triggered this specific report (mostly timer * triggered or e.g. due to a context switch). - * - * In MMIO triggered reports, some platforms do not set the - * reason bit in this field and it is valid to have a reason - * field of zero. */ reason = oa_report_reason(stream, report); ctx_id = oa_context_id(stream, report32); @@ -787,8 +783,41 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * * Note: that we don't clear the valid_ctx_bit so userspace can * understand that the ID has been squashed by the kernel. + * + * Update: + * + * On XEHP platforms the behavior of context id valid bit has + * changed compared to prior platforms. To describe this, we + * define a few terms: + * + * context-switch-report: This is a report with the reason type + * being context-switch. It is generated when a context switches + * out. + * + * context-valid-bit: A bit that is set in the report ID field + * to indicate that a valid context has been loaded. + * + * gpu-idle: A condition characterized by a + * context-switch-report with context-valid-bit set to 0. + * + * On prior platforms, context-id-valid bit is set to 0 only + * when GPU goes idle. In all other reports, it is set to 1. + * + * On XEHP platforms, context-valid-bit is set to 1 in a context + * switch report if a new context switched in. For all other + * reports it is set to 0. + * + * This change in behavior causes an issue with MMIO triggered + * reports. MMIO triggered reports have the markers in the + * context ID field and the context-valid-bit is 0. The logic + * below to squash the context ID would render the report + * useless since the user will not be able to find it in the OA + * buffer. Since MMIO triggered reports exist only on XEHP, + * we should avoid squashing these for XEHP platforms. */ - if (oa_report_ctx_invalid(stream, report)) { + + if (oa_report_ctx_invalid(stream, report) && + GRAPHICS_VER_FULL(stream->engine->i915) < IP_VER(12, 50)) { ctx_id = INVALID_CTX_ID; oa_context_id_squash(stream, report32); } -- cgit v1.2.3 From eee706839333ec0643f1b4898a37588025bf4cb5 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 30 Dec 2023 21:49:10 -0800 Subject: drm/imagination: pvr_device.h: fix all kernel-doc warnings Correct all kernel-doc notation on pvr_device.h so that there are no kernel-doc warnings remaining. pvr_device.h:292: warning: Excess struct member 'active' description in 'pvr_device' pvr_device.h:292: warning: Excess struct member 'idle' description in 'pvr_device' pvr_device.h:292: warning: Excess struct member 'lock' description in 'pvr_device' pvr_device.h:292: warning: Excess struct member 'work' description in 'pvr_device' pvr_device.h:292: warning: Excess struct member 'old_kccb_cmds_executed' description in 'pvr_device' pvr_device.h:292: warning: Excess struct member 'kccb_stall_count' description in 'pvr_device' pvr_device.h:292: warning: Excess struct member 'ccb' description in 'pvr_device' pvr_device.h:292: warning: Excess struct member 'rtn_q' description in 'pvr_device' pvr_device.h:292: warning: Excess struct member 'rtn_obj' description in 'pvr_device' pvr_device.h:292: warning: Excess struct member 'rtn' description in 'pvr_device' pvr_device.h:292: warning: Excess struct member 'slot_count' description in 'pvr_device' pvr_device.h:292: warning: Excess struct member 'reserved_count' description in 'pvr_device' pvr_device.h:292: warning: Excess struct member 'waiters' description in 'pvr_device' pvr_device.h:292: warning: Excess struct member 'fence_ctx' description in 'pvr_device' pvr_device.h:292: warning: Excess struct member 'id' description in 'pvr_device' pvr_device.h:292: warning: Excess struct member 'seqno' description in 'pvr_device' pvr_device.h:292: warning: Excess struct member 'lock' description in 'pvr_device' pvr_device.h:292: warning: Excess struct member 'active' description in 'pvr_device' pvr_device.h:292: warning: Excess struct member 'idle' description in 'pvr_device' pvr_device.h:292: warning: Excess struct member 'lock' description in 'pvr_device' pvr_device.h:292: warning: Excess struct member 'work' description in 'pvr_device' pvr_device.h:292: warning: Excess struct member 'old_kccb_cmds_executed' description in 'pvr_device' pvr_device.h:292: warning: Excess struct member 'kccb_stall_count' description in 'pvr_device' pvr_device.h:292: warning: Excess struct member 'ccb' description in 'pvr_device' pvr_device.h:292: warning: Excess struct member 'rtn_q' description in 'pvr_device' pvr_device.h:292: warning: Excess struct member 'rtn_obj' description in 'pvr_device' pvr_device.h:292: warning: Excess struct member 'rtn' description in 'pvr_device' pvr_device.h:292: warning: Excess struct member 'slot_count' description in 'pvr_device' pvr_device.h:292: warning: Excess struct member 'reserved_count' description in 'pvr_device' pvr_device.h:292: warning: Excess struct member 'waiters' description in 'pvr_device' pvr_device.h:292: warning: Excess struct member 'fence_ctx' description in 'pvr_device' pvr_device.h:292: warning: Excess struct member 'id' description in 'pvr_device' pvr_device.h:292: warning: Excess struct member 'seqno' description in 'pvr_device' pvr_device.h:292: warning: Excess struct member 'lock' description in 'pvr_device' Signed-off-by: Randy Dunlap Cc: Frank Binns Cc: Donald Robson Cc: Matt Coster Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Thomas Zimmermann Cc: David Airlie Cc: Daniel Vetter Cc: dri-devel@lists.freedesktop.org Cc: Jonathan Corbet Cc: Vegard Nossum Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20231231054910.31805-1-rdunlap@infradead.org --- drivers/gpu/drm/imagination/pvr_device.h | 46 +++++++++++++++++--------------- 1 file changed, 25 insertions(+), 21 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/imagination/pvr_device.h b/drivers/gpu/drm/imagination/pvr_device.h index 2ca7e535799f..ecdd5767d8ef 100644 --- a/drivers/gpu/drm/imagination/pvr_device.h +++ b/drivers/gpu/drm/imagination/pvr_device.h @@ -193,13 +193,14 @@ struct pvr_device { * @queues: Queue-related fields. */ struct { - /** @active: Active queue list. */ + /** @queues.active: Active queue list. */ struct list_head active; - /** @idle: Idle queue list. */ + /** @queues.idle: Idle queue list. */ struct list_head idle; - /** @lock: Lock protecting access to the active/idle lists. */ + /** @queues.lock: Lock protecting access to the active/idle + * lists. */ struct mutex lock; } queues; @@ -207,18 +208,18 @@ struct pvr_device { * @watchdog: Watchdog for communications with firmware. */ struct { - /** @work: Work item for watchdog callback. */ + /** @watchdog.work: Work item for watchdog callback. */ struct delayed_work work; /** - * @old_kccb_cmds_executed: KCCB command execution count at last - * watchdog poll. + * @watchdog.old_kccb_cmds_executed: KCCB command execution + * count at last watchdog poll. */ u32 old_kccb_cmds_executed; /** - * @kccb_stall_count: Number of watchdog polls KCCB has been - * stalled for. + * @watchdog.kccb_stall_count: Number of watchdog polls + * KCCB has been stalled for. */ u32 kccb_stall_count; } watchdog; @@ -227,43 +228,46 @@ struct pvr_device { * @kccb: Circular buffer for communications with firmware. */ struct { - /** @ccb: Kernel CCB. */ + /** @kccb.ccb: Kernel CCB. */ struct pvr_ccb ccb; - /** @rtn_q: Waitqueue for KCCB command return waiters. */ + /** @kccb.rtn_q: Waitqueue for KCCB command return waiters. */ wait_queue_head_t rtn_q; - /** @rtn_obj: Object representing KCCB return slots. */ + /** @kccb.rtn_obj: Object representing KCCB return slots. */ struct pvr_fw_object *rtn_obj; /** - * @rtn: Pointer to CPU mapping of KCCB return slots. Must be - * accessed by READ_ONCE()/WRITE_ONCE(). + * @kccb.rtn: Pointer to CPU mapping of KCCB return slots. + * Must be accessed by READ_ONCE()/WRITE_ONCE(). */ u32 *rtn; - /** @slot_count: Total number of KCCB slots available. */ + /** @kccb.slot_count: Total number of KCCB slots available. */ u32 slot_count; - /** @reserved_count: Number of KCCB slots reserved for future use. */ + /** @kccb.reserved_count: Number of KCCB slots reserved for + * future use. */ u32 reserved_count; /** - * @waiters: List of KCCB slot waiters. + * @kccb.waiters: List of KCCB slot waiters. */ struct list_head waiters; - /** @fence_ctx: KCCB fence context. */ + /** @kccb.fence_ctx: KCCB fence context. */ struct { - /** @id: KCCB fence context ID allocated with dma_fence_context_alloc(). */ + /** @kccb.fence_ctx.id: KCCB fence context ID + * allocated with dma_fence_context_alloc(). */ u64 id; - /** @seqno: Sequence number incremented each time a fence is created. */ + /** @kccb.fence_ctx.seqno: Sequence number incremented + * each time a fence is created. */ atomic_t seqno; /** - * @lock: Lock used to synchronize access to fences allocated by this - * context. + * @kccb.fence_ctx.lock: Lock used to synchronize + * access to fences allocated by this context. */ spinlock_t lock; } fence_ctx; -- cgit v1.2.3 From 4d23c1be882ecb7fec6894a68c310fff74cc8bba Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 19 Dec 2023 14:19:30 +0000 Subject: drm/amd/display: remove redundant initialization of variable remainder Variable remainder is being initialized with a value that is never read, the assignment is redundant and can be removed. Also add a newline after the declaration to clean up the coding style. Signed-off-by: Colin Ian King Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/basics/conversion.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/basics/conversion.c b/drivers/gpu/drm/amd/display/dc/basics/conversion.c index e295a839ab47..1090d235086a 100644 --- a/drivers/gpu/drm/amd/display/dc/basics/conversion.c +++ b/drivers/gpu/drm/amd/display/dc/basics/conversion.c @@ -103,7 +103,8 @@ void convert_float_matrix( static uint32_t find_gcd(uint32_t a, uint32_t b) { - uint32_t remainder = 0; + uint32_t remainder; + while (b != 0) { remainder = a % b; a = b; -- cgit v1.2.3 From 5eb8094a9b05ae5b3e49376a6e5a7a004cd0514f Mon Sep 17 00:00:00 2001 From: Mangesh Gadre Date: Tue, 19 Dec 2023 22:59:16 +0800 Subject: drm/amdgpu: Add register read/write debugfs support for AID's MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SMN address is larger than 32 bits for registers on different AID's Updating existing interface to support access to such registers. Signed-off-by: Mangesh Gadre Reviewed-by: Christian König Reviewed-by: Asad Kamal Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 2cebf2145d9a..e485dd3357c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -540,7 +540,11 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf, while (size) { uint32_t value; - value = RREG32_PCIE(*pos); + if (upper_32_bits(*pos)) + value = RREG32_PCIE_EXT(*pos); + else + value = RREG32_PCIE(*pos); + r = put_user(value, (uint32_t *)buf); if (r) goto out; @@ -600,7 +604,10 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user if (r) goto out; - WREG32_PCIE(*pos, value); + if (upper_32_bits(*pos)) + WREG32_PCIE_EXT(*pos, value); + else + WREG32_PCIE(*pos, value); result += 4; buf += 4; -- cgit v1.2.3 From 16927047b396d100a510138bdf9fba65f35b81c2 Mon Sep 17 00:00:00 2001 From: Roman Li Date: Tue, 19 Dec 2023 14:57:11 -0500 Subject: drm/amd/display: Disable IPS by default [Why] Instability is observed on DCN35 if idle power optimization is enabled. [How] Disable IPS until issue is resolved. Signed-off-by: Roman Li Tested-by: Daniel Wheeler Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index fc230ecc7180..ddde330860fc 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1712,6 +1712,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) init_data.nbio_reg_offsets = adev->reg_offset[NBIO_HWIP][0]; init_data.clk_reg_offsets = adev->reg_offset[CLK_HWIP][0]; + init_data.flags.disable_ips = DMUB_IPS_DISABLE_ALL; + /* Enable DWB for tested platforms only */ if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0)) init_data.num_virtual_links = 1; -- cgit v1.2.3 From c71930300fb20d447d19cda2c85037a24a1504ad Mon Sep 17 00:00:00 2001 From: Roman Li Date: Wed, 13 Dec 2023 14:12:16 +0800 Subject: drm/amd/display: enable dcn35 idle power optimization [Why] Idle power optimization was disabled on dcn35 by default. [How] Enable by setting disable_idle_power_optimizations to false. Signed-off-by: Roman Li Reviewed-by: Nicholas Kazlauskas Acked-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c index 810cfe213632..39594e8ffb5e 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c @@ -768,7 +768,7 @@ static const struct dc_debug_options debug_defaults_drv = { .enable_hpo_pg_support = false, .enable_legacy_fast_update = true, .enable_single_display_2to1_odm_policy = false, - .disable_idle_power_optimizations = true, + .disable_idle_power_optimizations = false, .dmcub_emulation = false, .disable_boot_optimizations = false, .disable_unbounded_requesting = false, -- cgit v1.2.3 From a32c6f7f5737cc7e31cd7ad5133f0d96fca12ea6 Mon Sep 17 00:00:00 2001 From: "Stanley.Yang" Date: Fri, 15 Dec 2023 16:13:23 +0800 Subject: drm/amdgpu: Fix ecc irq enable/disable unpaired The ecc_irq is disabled while GPU mode2 reset suspending process, but not be enabled during GPU mode2 reset resume process. Changed from V1: only do sdma/gfx ras_late_init in aldebaran_mode2_restore_ip delete amdgpu_ras_late_resume function Changed from V2: check umc ras supported before put ecc_irq Signed-off-by: Stanley.Yang Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/aldebaran.c | 26 +++++++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 4 ++++ drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 5 +++++ drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 4 ++++ 4 files changed, 38 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c b/drivers/gpu/drm/amd/amdgpu/aldebaran.c index 02f4c6f9d4f6..576067d66bb9 100644 --- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c +++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c @@ -330,6 +330,7 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl, { struct list_head *reset_device_list = reset_context->reset_device_list; struct amdgpu_device *tmp_adev = NULL; + struct amdgpu_ras *con; int r; if (reset_device_list == NULL) @@ -355,7 +356,30 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl, */ amdgpu_register_gpu_instance(tmp_adev); - /* Resume RAS */ + /* Resume RAS, ecc_irq */ + con = amdgpu_ras_get_context(tmp_adev); + if (!amdgpu_sriov_vf(tmp_adev) && con) { + if (tmp_adev->sdma.ras && + tmp_adev->sdma.ras->ras_block.ras_late_init) { + r = tmp_adev->sdma.ras->ras_block.ras_late_init(tmp_adev, + &tmp_adev->sdma.ras->ras_block.ras_comm); + if (r) { + dev_err(tmp_adev->dev, "SDMA failed to execute ras_late_init! ret:%d\n", r); + goto end; + } + } + + if (tmp_adev->gfx.ras && + tmp_adev->gfx.ras->ras_block.ras_late_init) { + r = tmp_adev->gfx.ras->ras_block.ras_late_init(tmp_adev, + &tmp_adev->gfx.ras->ras_block.ras_comm); + if (r) { + dev_err(tmp_adev->dev, "GFX failed to execute ras_late_init! ret:%d\n", r); + goto end; + } + } + } + amdgpu_ras_resume(tmp_adev); /* Update PSP FW topology after reset */ diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index a5a05c16c10d..6c5185608854 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -1041,6 +1041,10 @@ static int gmc_v10_0_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); + if (adev->gmc.ecc_irq.funcs && + amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) + amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index 23d7b548d13f..c9c653cfc765 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -941,6 +941,11 @@ static int gmc_v11_0_hw_fini(void *handle) } amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); + + if (adev->gmc.ecc_irq.funcs && + amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) + amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0); + gmc_v11_0_gart_disable(adev); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 473a774294ce..f9039d64ff2d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -2380,6 +2380,10 @@ static int gmc_v9_0_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); + if (adev->gmc.ecc_irq.funcs && + amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) + amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0); + return 0; } -- cgit v1.2.3 From 8a0f02b7beed7b2b768dbdf3b79960de68f460c5 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Mon, 11 Dec 2023 13:20:02 -0500 Subject: drm/amd/display: Fix subvp+drr logic errors [Why] There is some logic error where the wrong variable was used to check for OTG_MASTER and DPP_PIPE. [How] Add booleans to confirm that the expected pipes were found before validating schedulability. Tested-by: Daniel Wheeler Acked-by: Rodrigo Siqueira Reviewed-by: Samson Tam Reviewed-by: Chaitanya Dhere Signed-off-by: Alvin Lee Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 52 +++++++++++++--------- 1 file changed, 30 insertions(+), 22 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 10c890a9cb7c..cc759b34268b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -806,6 +806,8 @@ static bool subvp_drr_schedulable(struct dc *dc, struct dc_state *context) int16_t drr_stretched_vblank_us = 0; int16_t max_vblank_mallregion = 0; struct dc_stream_state *phantom_stream; + bool subvp_found = false; + bool drr_found = false; // Find SubVP pipe for (i = 0; i < dc->res_pool->pipe_count; i++) { @@ -818,8 +820,10 @@ static bool subvp_drr_schedulable(struct dc *dc, struct dc_state *context) continue; // Find the SubVP pipe - if (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) + if (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) { + subvp_found = true; break; + } } // Find the DRR pipe @@ -827,33 +831,37 @@ static bool subvp_drr_schedulable(struct dc *dc, struct dc_state *context) drr_pipe = &context->res_ctx.pipe_ctx[i]; // We check for master pipe only - if (!resource_is_pipe_type(pipe, OTG_MASTER) || - !resource_is_pipe_type(pipe, DPP_PIPE)) + if (!resource_is_pipe_type(drr_pipe, OTG_MASTER) || + !resource_is_pipe_type(drr_pipe, DPP_PIPE)) continue; if (dc_state_get_pipe_subvp_type(context, drr_pipe) == SUBVP_NONE && drr_pipe->stream->ignore_msa_timing_param && - (drr_pipe->stream->allow_freesync || drr_pipe->stream->vrr_active_variable || drr_pipe->stream->vrr_active_fixed)) + (drr_pipe->stream->allow_freesync || drr_pipe->stream->vrr_active_variable || drr_pipe->stream->vrr_active_fixed)) { + drr_found = true; break; + } } - phantom_stream = dc_state_get_paired_subvp_stream(context, pipe->stream); - main_timing = &pipe->stream->timing; - phantom_timing = &phantom_stream->timing; - drr_timing = &drr_pipe->stream->timing; - prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total / - (double)(phantom_timing->pix_clk_100hz * 100) * 1000000 + - dc->caps.subvp_prefetch_end_to_mall_start_us; - subvp_active_us = main_timing->v_addressable * main_timing->h_total / - (double)(main_timing->pix_clk_100hz * 100) * 1000000; - drr_frame_us = drr_timing->v_total * drr_timing->h_total / - (double)(drr_timing->pix_clk_100hz * 100) * 1000000; - // P-State allow width and FW delays already included phantom_timing->v_addressable - mall_region_us = phantom_timing->v_addressable * phantom_timing->h_total / - (double)(phantom_timing->pix_clk_100hz * 100) * 1000000; - stretched_drr_us = drr_frame_us + mall_region_us + SUBVP_DRR_MARGIN_US; - drr_stretched_vblank_us = (drr_timing->v_total - drr_timing->v_addressable) * drr_timing->h_total / - (double)(drr_timing->pix_clk_100hz * 100) * 1000000 + (stretched_drr_us - drr_frame_us); - max_vblank_mallregion = drr_stretched_vblank_us > mall_region_us ? drr_stretched_vblank_us : mall_region_us; + if (subvp_found && drr_found) { + phantom_stream = dc_state_get_paired_subvp_stream(context, pipe->stream); + main_timing = &pipe->stream->timing; + phantom_timing = &phantom_stream->timing; + drr_timing = &drr_pipe->stream->timing; + prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total / + (double)(phantom_timing->pix_clk_100hz * 100) * 1000000 + + dc->caps.subvp_prefetch_end_to_mall_start_us; + subvp_active_us = main_timing->v_addressable * main_timing->h_total / + (double)(main_timing->pix_clk_100hz * 100) * 1000000; + drr_frame_us = drr_timing->v_total * drr_timing->h_total / + (double)(drr_timing->pix_clk_100hz * 100) * 1000000; + // P-State allow width and FW delays already included phantom_timing->v_addressable + mall_region_us = phantom_timing->v_addressable * phantom_timing->h_total / + (double)(phantom_timing->pix_clk_100hz * 100) * 1000000; + stretched_drr_us = drr_frame_us + mall_region_us + SUBVP_DRR_MARGIN_US; + drr_stretched_vblank_us = (drr_timing->v_total - drr_timing->v_addressable) * drr_timing->h_total / + (double)(drr_timing->pix_clk_100hz * 100) * 1000000 + (stretched_drr_us - drr_frame_us); + max_vblank_mallregion = drr_stretched_vblank_us > mall_region_us ? drr_stretched_vblank_us : mall_region_us; + } /* We consider SubVP + DRR schedulable if the stretched frame duration of the DRR display (i.e. the * highest refresh rate + margin that can support UCLK P-State switch) passes the static analysis -- cgit v1.2.3 From ade13d3fc03a17812e4c677ec898f62b2a8e9485 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Mon, 11 Dec 2023 14:46:14 -0500 Subject: drm/amd/display: Don't allow FPO if no planes In DCN32/321 FPO uses per-pipe P-State force. If there is no plane, then then HUBP is power gated, in which case any programming in HUBP has no effect and the pipe is always asserting P-State allow. This is contrary to what we want to happen for FPO (FW should moderate the P-State assertion), so block FPO if there's no plane for the FPO pipe. Tested-by: Daniel Wheeler Acked-by: Rodrigo Siqueira Reviewed-by: Samson Tam Reviewed-by: Chaitanya Dhere Signed-off-by: Alvin Lee Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn32/dcn32_resource_helpers.c | 18 +++++++++++++++--- drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 10 +++++++++- .../drm/amd/display/dc/resource/dcn32/dcn32_resource.h | 2 +- 3 files changed, 25 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c index 1743ebcd6b2e..cca6436ecdf5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c @@ -524,13 +524,14 @@ static int get_refresh_rate(struct dc_stream_state *fpo_candidate_stream) * * Return: Pointer to FPO stream candidate if config can support FPO, otherwise NULL */ -struct dc_stream_state *dcn32_can_support_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, const struct dc_state *context) +struct dc_stream_state *dcn32_can_support_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, struct dc_state *context) { int refresh_rate = 0; const int minimum_refreshrate_supported = 120; struct dc_stream_state *fpo_candidate_stream = NULL; bool is_fpo_vactive = false; uint32_t fpo_vactive_margin_us = 0; + struct dc_stream_status *fpo_stream_status = NULL; if (context == NULL) return NULL; @@ -559,10 +560,21 @@ struct dc_stream_state *dcn32_can_support_mclk_switch_using_fw_based_vblank_stre DC_FP_END(); if (!is_fpo_vactive || dc->debug.disable_fpo_vactive) return NULL; - } else + } else { fpo_candidate_stream = context->streams[0]; + if (fpo_candidate_stream) + fpo_stream_status = dc_state_get_stream_status(context, fpo_candidate_stream); + } - if (!fpo_candidate_stream) + /* In DCN32/321, FPO uses per-pipe P-State force. + * If there's no planes, HUBP is power gated and + * therefore programming UCLK_PSTATE_FORCE does + * nothing (P-State will always be asserted naturally + * on a pipe that has HUBP power gated. Therefore we + * only want to enable FPO if the FPO pipe has both + * a stream and a plane. + */ + if (!fpo_candidate_stream || !fpo_stream_status || fpo_stream_status->plane_count == 0) return NULL; if (fpo_candidate_stream->sink->edid_caps.panel_patch.disable_fams) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index cc759b34268b..9f37f717a1f8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -3465,7 +3465,15 @@ void dcn32_assign_fpo_vactive_candidate(struct dc *dc, const struct dc_state *co for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { const struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - if (!pipe->stream) + /* In DCN32/321, FPO uses per-pipe P-State force. + * If there's no planes, HUBP is power gated and + * therefore programming UCLK_PSTATE_FORCE does + * nothing (P-State will always be asserted naturally + * on a pipe that has HUBP power gated. Therefore we + * only want to enable FPO if the FPO pipe has both + * a stream and a plane. + */ + if (!pipe->stream || !pipe->plane_state) continue; if (vba->ActiveDRAMClockChangeLatencyMarginPerState[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]] <= 0) { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h index b27fadd452bb..0c87b0fabba7 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h @@ -163,7 +163,7 @@ void dcn32_determine_det_override(struct dc *dc, void dcn32_set_det_allocations(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes); -struct dc_stream_state *dcn32_can_support_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, const struct dc_state *context); +struct dc_stream_state *dcn32_can_support_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, struct dc_state *context); bool dcn32_allow_subvp_with_active_margin(struct pipe_ctx *pipe); -- cgit v1.2.3 From a71e1310a43ffe47b824aae25ae54f9fcc4daa12 Mon Sep 17 00:00:00 2001 From: Relja Vojvodic Date: Mon, 11 Dec 2023 18:00:14 -0500 Subject: drm/amd/display: Add more mechanisms for tests [Why] More information is desired for the test tools. [How] Refactored get_subvp_visual_confirm_color and get_mclk_switch_visual_confirm_color to support the new method of storing the p_state type, which was changed so that it could also be saved and output by the DPM log. Ensured that the p_state type is kept updated by looping through the pipes within commit_planes_for_stream. Tested-by: Daniel Wheeler Acked-by: Rodrigo Siqueira Reviewed-by: Alvin Lee Signed-off-by: Relja Vojvodic Signed-off-by: Alex Deucher --- .../amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c | 90 +++++++++-- drivers/gpu/drm/amd/display/dc/core/dc.c | 37 +++-- .../gpu/drm/amd/display/dc/core/dc_hw_sequencer.c | 172 +++++++++++++-------- drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h | 9 +- drivers/gpu/drm/amd/display/dc/inc/core_types.h | 11 ++ 5 files changed, 229 insertions(+), 90 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c index 95c0b49b531a..51adb13b3b80 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c @@ -25,7 +25,6 @@ #include "dccg.h" #include "clk_mgr_internal.h" - #include "dcn32/dcn32_clk_mgr_smu_msg.h" #include "dcn20/dcn20_clk_mgr.h" #include "dce100/dce_clk_mgr.h" @@ -34,7 +33,7 @@ #include "core_types.h" #include "dm_helpers.h" #include "link.h" - +#include "dc_state_priv.h" #include "atomfirmware.h" #include "smu13_driver_if.h" @@ -458,13 +457,43 @@ static int dcn32_get_dispclk_from_dentist(struct clk_mgr *clk_mgr_base) return 0; } -static void dcn32_auto_dpm_test_log(struct dc_clocks *new_clocks, struct clk_mgr_internal *clk_mgr) +static bool dcn32_check_native_scaling(struct pipe_ctx *pipe) +{ + bool is_native_scaling = false; + int width = pipe->plane_state->src_rect.width; + int height = pipe->plane_state->src_rect.height; + + if (pipe->stream->timing.h_addressable == width && + pipe->stream->timing.v_addressable == height && + pipe->plane_state->dst_rect.width == width && + pipe->plane_state->dst_rect.height == height) + is_native_scaling = true; + + return is_native_scaling; +} + +static void dcn32_auto_dpm_test_log( + struct dc_clocks *new_clocks, + struct clk_mgr_internal *clk_mgr, + struct dc_state *context) { unsigned int dispclk_khz_reg, dppclk_khz_reg, dprefclk_khz_reg, dcfclk_khz_reg, dtbclk_khz_reg, - fclk_khz_reg; + fclk_khz_reg, mall_ss_size_bytes; int dramclk_khz_override, fclk_khz_override, num_fclk_levels; - msleep(5); + struct pipe_ctx *pipe_ctx_list[MAX_PIPES]; + int active_pipe_count = 0; + + for (int i = 0; i < MAX_PIPES; i++) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + + if (pipe_ctx->stream && dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) { + pipe_ctx_list[active_pipe_count] = pipe_ctx; + active_pipe_count++; + } + } + + mall_ss_size_bytes = context->bw_ctx.bw.dcn.mall_ss_size_bytes; dispclk_khz_reg = REG_READ(CLK1_CLK0_CURRENT_CNT); // DISPCLK dppclk_khz_reg = REG_READ(CLK1_CLK1_CURRENT_CNT); // DPPCLK @@ -494,16 +523,49 @@ static void dcn32_auto_dpm_test_log(struct dc_clocks *new_clocks, struct clk_mgr // // AutoDPMTest: clk1:%d - clk2:%d - clk3:%d - clk4:%d\n" //////////////////////////////////////////////////////////////////////////// - if (new_clocks && + if (new_clocks && active_pipe_count > 0 && new_clocks->dramclk_khz > 0 && new_clocks->fclk_khz > 0 && new_clocks->dcfclk_khz > 0 && new_clocks->dppclk_khz > 0) { + uint32_t pix_clk_list[MAX_PIPES] = {0}; + int p_state_list[MAX_PIPES] = {0}; + int disp_src_width_list[MAX_PIPES] = {0}; + int disp_src_height_list[MAX_PIPES] = {0}; + uint64_t disp_src_refresh_list[MAX_PIPES] = {0}; + bool is_scaled_list[MAX_PIPES] = {0}; + + for (int i = 0; i < active_pipe_count; i++) { + struct pipe_ctx *curr_pipe_ctx = pipe_ctx_list[i]; + uint64_t refresh_rate; + + pix_clk_list[i] = curr_pipe_ctx->stream->timing.pix_clk_100hz; + p_state_list[i] = curr_pipe_ctx->p_state_type; + + refresh_rate = (curr_pipe_ctx->stream->timing.pix_clk_100hz * (uint64_t)100 + + curr_pipe_ctx->stream->timing.v_total * curr_pipe_ctx->stream->timing.h_total - (uint64_t)1); + refresh_rate = div_u64(refresh_rate, curr_pipe_ctx->stream->timing.v_total); + refresh_rate = div_u64(refresh_rate, curr_pipe_ctx->stream->timing.h_total); + disp_src_refresh_list[i] = refresh_rate; + + if (curr_pipe_ctx->plane_state) { + is_scaled_list[i] = !(dcn32_check_native_scaling(curr_pipe_ctx)); + disp_src_width_list[i] = curr_pipe_ctx->plane_state->src_rect.width; + disp_src_height_list[i] = curr_pipe_ctx->plane_state->src_rect.height; + } + } + DC_LOG_AUTO_DPM_TEST("AutoDPMTest: dramclk:%d - fclk:%d - " "dcfclk:%d - dppclk:%d - dispclk_hw:%d - " "dppclk_hw:%d - dprefclk_hw:%d - dcfclk_hw:%d - " - "dtbclk_hw:%d - fclk_hw:%d\n", + "dtbclk_hw:%d - fclk_hw:%d - pix_clk_0:%d - pix_clk_1:%d - " + "pix_clk_2:%d - pix_clk_3:%d - mall_ss_size:%d - p_state_type_0:%d - " + "p_state_type_1:%d - p_state_type_2:%d - p_state_type_3:%d - " + "pix_width_0:%d - pix_height_0:%d - refresh_rate_0:%lld - is_scaled_0:%d - " + "pix_width_1:%d - pix_height_1:%d - refresh_rate_1:%lld - is_scaled_1:%d - " + "pix_width_2:%d - pix_height_2:%d - refresh_rate_2:%lld - is_scaled_2:%d - " + "pix_width_3:%d - pix_height_3:%d - refresh_rate_3:%lld - is_scaled_3:%d\n", dramclk_khz_override, fclk_khz_override, new_clocks->dcfclk_khz, @@ -513,7 +575,14 @@ static void dcn32_auto_dpm_test_log(struct dc_clocks *new_clocks, struct clk_mgr dprefclk_khz_reg, dcfclk_khz_reg, dtbclk_khz_reg, - fclk_khz_reg); + fclk_khz_reg, + pix_clk_list[0], pix_clk_list[1], pix_clk_list[3], pix_clk_list[2], + mall_ss_size_bytes, + p_state_list[0], p_state_list[1], p_state_list[2], p_state_list[3], + disp_src_width_list[0], disp_src_height_list[0], disp_src_refresh_list[0], is_scaled_list[0], + disp_src_width_list[1], disp_src_height_list[1], disp_src_refresh_list[1], is_scaled_list[1], + disp_src_width_list[2], disp_src_height_list[2], disp_src_refresh_list[2], is_scaled_list[2], + disp_src_width_list[3], disp_src_height_list[3], disp_src_refresh_list[3], is_scaled_list[3]); } } @@ -686,6 +755,7 @@ static void dcn32_update_clocks(struct clk_mgr *clk_mgr_base, /* DCCG requires KHz precision for DTBCLK */ clk_mgr_base->clks.ref_dtbclk_khz = dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_DTBCLK, khz_to_mhz_ceil(new_clocks->ref_dtbclk_khz)); + dcn32_update_clocks_update_dtb_dto(clk_mgr, context, clk_mgr_base->clks.ref_dtbclk_khz); } @@ -713,8 +783,8 @@ static void dcn32_update_clocks(struct clk_mgr *clk_mgr_base, dmcu->funcs->set_psr_wait_loop(dmcu, clk_mgr_base->clks.dispclk_khz / 1000 / 7); - if (dc->config.enable_auto_dpm_test_logs && safe_to_lower) { - dcn32_auto_dpm_test_log(new_clocks, clk_mgr); + if (dc->config.enable_auto_dpm_test_logs) { + dcn32_auto_dpm_test_log(new_clocks, clk_mgr, context); } } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 043913d65b16..d55de3f5115d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1088,7 +1088,7 @@ static void apply_ctx_interdependent_lock(struct dc *dc, } } -static void dc_update_viusal_confirm_color(struct dc *dc, struct dc_state *context, struct pipe_ctx *pipe_ctx) +static void dc_update_visual_confirm_color(struct dc *dc, struct dc_state *context, struct pipe_ctx *pipe_ctx) { if (dc->ctx->dce_version >= DCN_VERSION_1_0) { memset(&pipe_ctx->visual_confirm_color, 0, sizeof(struct tg_color)); @@ -1108,9 +1108,9 @@ static void dc_update_viusal_confirm_color(struct dc *dc, struct dc_state *conte if (dc->debug.visual_confirm == VISUAL_CONFIRM_MPCTREE) get_mpctree_visual_confirm_color(pipe_ctx, &(pipe_ctx->visual_confirm_color)); else if (dc->debug.visual_confirm == VISUAL_CONFIRM_SUBVP) - get_subvp_visual_confirm_color(dc, context, pipe_ctx, &(pipe_ctx->visual_confirm_color)); + get_subvp_visual_confirm_color(pipe_ctx, &(pipe_ctx->visual_confirm_color)); else if (dc->debug.visual_confirm == VISUAL_CONFIRM_MCLK_SWITCH) - get_mclk_switch_visual_confirm_color(dc, context, pipe_ctx, &(pipe_ctx->visual_confirm_color)); + get_mclk_switch_visual_confirm_color(pipe_ctx, &(pipe_ctx->visual_confirm_color)); } } } @@ -1190,8 +1190,10 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context) dc_state_rem_all_planes_for_stream(dc, old_stream, dangling_context); disable_all_writeback_pipes_for_stream(dc, old_stream, dangling_context); - if (pipe->stream && pipe->plane_state) - dc_update_viusal_confirm_color(dc, context, pipe); + if (pipe->stream && pipe->plane_state) { + set_p_state_switch_method(dc, context, pipe); + dc_update_visual_confirm_color(dc, context, pipe); + } if (dc->hwss.apply_ctx_for_surface) { apply_ctx_interdependent_lock(dc, dc->current_state, old_stream, true); @@ -3377,12 +3379,14 @@ static void commit_planes_for_stream_fast(struct dc *dc, &context->res_ctx, stream); - if (dc->debug.visual_confirm) { - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (pipe->stream && pipe->plane_state) { + set_p_state_switch_method(dc, context, pipe); - if (pipe->stream && pipe->plane_state) - dc_update_viusal_confirm_color(dc, context, pipe); + if (dc->debug.visual_confirm) + dc_update_visual_confirm_color(dc, context, pipe); } } @@ -3531,13 +3535,16 @@ static void commit_planes_for_stream(struct dc *dc, } } - if (dc->debug.visual_confirm) - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - if (pipe->stream && pipe->plane_state) - dc_update_viusal_confirm_color(dc, context, pipe); + if (pipe->stream && pipe->plane_state) { + set_p_state_switch_method(dc, context, pipe); + + if (dc->debug.visual_confirm) + dc_update_visual_confirm_color(dc, context, pipe); } + } if (stream->test_pattern.type != DP_TEST_PATTERN_VIDEO_MODE) { struct pipe_ctx *mpcc_pipe; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c index 48fdfacc413a..9c05b1a07142 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c @@ -426,44 +426,130 @@ void get_hdr_visual_confirm_color( } void get_subvp_visual_confirm_color( - struct dc *dc, - struct dc_state *context, struct pipe_ctx *pipe_ctx, struct tg_color *color) { uint32_t color_value = MAX_TG_COLOR_VALUE; - bool enable_subvp = false; - int i; - - if (!dc->ctx || !dc->ctx->dmub_srv || !pipe_ctx || !context) - return; + if (pipe_ctx) { + switch (pipe_ctx->p_state_type) { + case P_STATE_SUB_VP: + color->color_r_cr = color_value; + color->color_g_y = 0; + color->color_b_cb = 0; + break; + case P_STATE_DRR_SUB_VP: + color->color_r_cr = 0; + color->color_g_y = color_value; + color->color_b_cb = 0; + break; + case P_STATE_V_BLANK_SUB_VP: + color->color_r_cr = 0; + color->color_g_y = 0; + color->color_b_cb = color_value; + break; + default: + break; + } + } +} - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; +void get_mclk_switch_visual_confirm_color( + struct pipe_ctx *pipe_ctx, + struct tg_color *color) +{ + uint32_t color_value = MAX_TG_COLOR_VALUE; - if (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) { - /* SubVP enable - red */ - color->color_g_y = 0; + if (pipe_ctx) { + switch (pipe_ctx->p_state_type) { + case P_STATE_V_BLANK: + color->color_r_cr = color_value; + color->color_g_y = color_value; color->color_b_cb = 0; + break; + case P_STATE_FPO: + color->color_r_cr = 0; + color->color_g_y = color_value; + color->color_b_cb = color_value; + break; + case P_STATE_V_ACTIVE: color->color_r_cr = color_value; - enable_subvp = true; - - if (pipe_ctx->stream == pipe->stream) - return; + color->color_g_y = 0; + color->color_b_cb = color_value; + break; + case P_STATE_SUB_VP: + color->color_r_cr = color_value; + color->color_g_y = 0; + color->color_b_cb = 0; + break; + case P_STATE_DRR_SUB_VP: + color->color_r_cr = 0; + color->color_g_y = color_value; + color->color_b_cb = 0; + break; + case P_STATE_V_BLANK_SUB_VP: + color->color_r_cr = 0; + color->color_g_y = 0; + color->color_b_cb = color_value; + break; + default: break; } } +} - if (enable_subvp && dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_NONE) { - color->color_r_cr = 0; - if (pipe_ctx->stream->allow_freesync == 1) { - /* SubVP enable and DRR on - green */ - color->color_b_cb = 0; - color->color_g_y = color_value; +void set_p_state_switch_method( + struct dc *dc, + struct dc_state *context, + struct pipe_ctx *pipe_ctx) +{ + struct vba_vars_st *vba = &context->bw_ctx.dml.vba; + bool enable_subvp; + + if (!dc->ctx || !dc->ctx->dmub_srv || !pipe_ctx || !vba || !context) + return; + + if (vba->DRAMClockChangeSupport[vba->VoltageLevel][vba->maxMpcComb] != + dm_dram_clock_change_unsupported) { + /* MCLK switching is supported */ + if (!pipe_ctx->has_vactive_margin) { + /* In Vblank - yellow */ + pipe_ctx->p_state_type = P_STATE_V_BLANK; + + if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) { + /* FPO + Vblank - cyan */ + pipe_ctx->p_state_type = P_STATE_FPO; + } } else { - /* SubVP enable and No DRR - blue */ - color->color_g_y = 0; - color->color_b_cb = color_value; + /* In Vactive - pink */ + pipe_ctx->p_state_type = P_STATE_V_ACTIVE; + } + + /* SubVP */ + enable_subvp = false; + + for (int i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (pipe->stream && dc_state_get_paired_subvp_stream(context, pipe->stream) && + dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) { + /* SubVP enable - red */ + pipe_ctx->p_state_type = P_STATE_SUB_VP; + enable_subvp = true; + + if (pipe_ctx->stream == pipe->stream) + return; + break; + } + } + + if (enable_subvp && dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_NONE) { + if (pipe_ctx->stream->allow_freesync == 1) { + /* SubVP enable and DRR on - green */ + pipe_ctx->p_state_type = P_STATE_DRR_SUB_VP; + } else { + /* SubVP enable and No DRR - blue */ + pipe_ctx->p_state_type = P_STATE_V_BLANK_SUB_VP; + } } } } @@ -815,42 +901,6 @@ void hwss_subvp_save_surf_addr(union block_sequence_params *params) dc_dmub_srv_subvp_save_surf_addr(dc_dmub_srv, addr, subvp_index); } -void get_mclk_switch_visual_confirm_color( - struct dc *dc, - struct dc_state *context, - struct pipe_ctx *pipe_ctx, - struct tg_color *color) -{ - uint32_t color_value = MAX_TG_COLOR_VALUE; - struct vba_vars_st *vba = &context->bw_ctx.dml.vba; - - if (!dc->ctx || !dc->ctx->dmub_srv || !pipe_ctx || !vba || !context) - return; - - if (vba->DRAMClockChangeSupport[vba->VoltageLevel][vba->maxMpcComb] != - dm_dram_clock_change_unsupported) { - /* MCLK switching is supported */ - if (!pipe_ctx->has_vactive_margin) { - /* In Vblank - yellow */ - color->color_r_cr = color_value; - color->color_g_y = color_value; - - if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) { - /* FPO + Vblank - cyan */ - color->color_r_cr = 0; - color->color_g_y = color_value; - color->color_b_cb = color_value; - } - } else { - /* In Vactive - pink */ - color->color_r_cr = color_value; - color->color_b_cb = color_value; - } - /* SubVP */ - get_subvp_visual_confirm_color(dc, context, pipe_ctx, color); - } -} - void get_surface_tile_visual_confirm_color( struct pipe_ctx *pipe_ctx, struct tg_color *color) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h index 6ca8c5488d50..a54399383318 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h @@ -455,17 +455,18 @@ void get_mpctree_visual_confirm_color( struct tg_color *color); void get_subvp_visual_confirm_color( - struct dc *dc, - struct dc_state *context, struct pipe_ctx *pipe_ctx, struct tg_color *color); void get_mclk_switch_visual_confirm_color( - struct dc *dc, - struct dc_state *context, struct pipe_ctx *pipe_ctx, struct tg_color *color); +void set_p_state_switch_method( + struct dc *dc, + struct dc_state *context, + struct pipe_ctx *pipe_ctx); + void hwss_execute_sequence(struct dc *dc, struct block_sequence block_sequence[], int num_steps); diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index c3b973abb89a..f74ae0d41d3c 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -381,6 +381,16 @@ union pipe_update_flags { uint32_t raw; }; +enum p_state_switch_method { + P_STATE_UNKNOWN = 0, + P_STATE_V_BLANK = 1, + P_STATE_FPO, + P_STATE_V_ACTIVE, + P_STATE_SUB_VP, + P_STATE_DRR_SUB_VP, + P_STATE_V_BLANK_SUB_VP +}; + struct pipe_ctx { struct dc_plane_state *plane_state; struct dc_stream_state *stream; @@ -429,6 +439,7 @@ struct pipe_ctx { struct dwbc *dwbc; struct mcif_wb *mcif_wb; union pipe_update_flags update_flags; + enum p_state_switch_method p_state_type; struct tg_color visual_confirm_color; bool has_vactive_margin; /* subvp_index: only valid if the pipe is a SUBVP_MAIN*/ -- cgit v1.2.3 From 394e850f1ad73c594bf0296c2f601c71517acfdd Mon Sep 17 00:00:00 2001 From: "Leo (Hanghong) Ma" Date: Tue, 28 Nov 2023 12:31:24 -0500 Subject: drm/amd/display: Add HDMI capacity computations using fixed31_32 [Why] Certain HDMI modes failed at dml cap check for uncompressed video but they can still be supported for compressed video. [How] Add HDMI capacity computations using fixed31_32 in dc side. Tested-by: Daniel Wheeler Reviewed-by: Chris Park Acked-by: Rodrigo Siqueira Signed-off-by: Leo (Hanghong) Ma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 ++ drivers/gpu/drm/amd/display/dc/inc/resource.h | 3 --- drivers/gpu/drm/amd/display/dc/link/link_validation.h | 1 + 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 3ca72c097aa1..c2ee80d6f64c 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -1462,6 +1462,8 @@ bool dc_acquire_release_mpc_3dlut( struct dc_transfer_func **shaper); bool dc_resource_is_dsc_encoding_supported(const struct dc *dc); +void get_audio_check(struct audio_info *aud_modes, + struct audio_check *aud_chk); enum dc_status dc_commit_streams(struct dc *dc, struct dc_stream_state *streams[], diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h index c7a00a28c3b0..c958ef37b78a 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/resource.h +++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h @@ -573,9 +573,6 @@ void update_audio_usage( unsigned int resource_pixel_format_to_bpp(enum surface_pixel_format format); -void get_audio_check(struct audio_info *aud_modes, - struct audio_check *aud_chk); - bool get_temp_dp_link_res(struct dc_link *link, struct link_resource *link_res, struct dc_link_settings *link_settings); diff --git a/drivers/gpu/drm/amd/display/dc/link/link_validation.h b/drivers/gpu/drm/amd/display/dc/link/link_validation.h index 4a954317d0da..595fb05946e9 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_validation.h +++ b/drivers/gpu/drm/amd/display/dc/link/link_validation.h @@ -25,6 +25,7 @@ #ifndef __LINK_VALIDATION_H__ #define __LINK_VALIDATION_H__ #include "link.h" + enum dc_status link_validate_mode_timing( const struct dc_stream_state *stream, struct dc_link *link, -- cgit v1.2.3 From ca1ecae145b20b11ff49062afe6f0bf6707bc244 Mon Sep 17 00:00:00 2001 From: Josip Pavic Date: Mon, 11 Dec 2023 17:50:19 -0500 Subject: drm/amd/display: Add null pointer guards where needed [Why] Some functions whose output is typically checked for null are not being checked for null at several call sites, causing some static analysis tools to throw an error. [How] Add null pointer guards around functions that typically have them at other call sites. Tested-by: Daniel Wheeler Reviewed-by: Sung Lee Reviewed-by: Aric Cyr Acked-by: Rodrigo Siqueira Signed-off-by: Josip Pavic Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 5 +++++ drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 3 +++ drivers/gpu/drm/amd/display/dc/core/dc_stream.c | 3 ++- 3 files changed, 10 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index d55de3f5115d..b7c2eaebf8bf 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -3379,6 +3379,9 @@ static void commit_planes_for_stream_fast(struct dc *dc, &context->res_ctx, stream); + if (!top_pipe_to_program) + return; + for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; @@ -3978,6 +3981,8 @@ static struct dc_state *create_minimal_transition_state(struct dc *dc, dc->debug.force_disable_subvp = true; minimal_transition_context = dc_state_create_copy(base_context); + if (!minimal_transition_context) + return NULL; /* commit minimal state */ if (dc->res_pool->funcs->validate_bandwidth(dc, minimal_transition_context, false)) { diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 716b59bd03b6..f2abc1096ffb 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -2460,6 +2460,9 @@ void resource_remove_otg_master_for_stream_output(struct dc_state *context, struct pipe_ctx *otg_master = resource_get_otg_master_for_stream( &context->res_ctx, stream); + if (!otg_master) + return; + ASSERT(resource_get_odm_slice_count(otg_master) == 1); ASSERT(otg_master->plane_state == NULL); ASSERT(otg_master->stream_res.stream_enc); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index f2b265ed7fc2..54670e0b1518 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -447,7 +447,8 @@ bool dc_stream_add_writeback(struct dc *dc, if (dc->hwss.enable_writeback) { struct dc_stream_status *stream_status = dc_stream_get_status(stream); struct dwbc *dwb = dc->res_pool->dwbc[wb_info->dwb_pipe_inst]; - dwb->otg_inst = stream_status->primary_otg_inst; + if (stream_status) + dwb->otg_inst = stream_status->primary_otg_inst; } if (!dc->hwss.update_bandwidth(dc, dc->current_state)) { -- cgit v1.2.3 From aa5dc05340eb97486a631ce6bccb8d020bf6b56b Mon Sep 17 00:00:00 2001 From: Meenakshikumar Somasundaram Date: Mon, 4 Dec 2023 10:57:45 -0500 Subject: drm/amd/display: Fix minor issues in BW Allocation Phase2 [Why] Fix minor issues in BW Allocation Phase2. [How] - In set_usb4_req_bw_req(), link->dpia_bw_alloc_config.response_ready flag should be reset before writing DPCD REQUEST_BW. - Fix the granularity for value of 2 in get_bw_granularity(). - Removed bandwidth allocation support display fw boot option as the fw would read feature enable status from bios. - Clean up DPIA_EST_BW_CHANGED and DPIA_BW_REQ_SUCCESS cases in dpia_handle_bw_alloc_response(). - Removed allocate_usb4_bw and deallocate_usb4_bw. - Optimized loop in get_lowest_dpia_index(). - Updated link_dp_dpia_allocate_usb4_bandwidth_for_stream() and set_usb4_req_bw_req() to always issue request bw. Tested-by: Daniel Wheeler Reviewed-by: PeiChen Huang Acked-by: Rodrigo Siqueira Signed-off-by: Meenakshikumar Somasundaram Signed-off-by: Alex Deucher --- .../display/dc/link/protocols/link_dp_dpia_bw.c | 221 +++++++++------------ .../display/dc/link/protocols/link_dp_dpia_bw.h | 4 +- 2 files changed, 101 insertions(+), 124 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c index 7581023daa47..d6e1f969bfd5 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c @@ -50,6 +50,7 @@ static bool get_bw_alloc_proceed_flag(struct dc_link *tmp) && tmp->hpd_status && tmp->dpia_bw_alloc_config.bw_alloc_enabled); } + static void reset_bw_alloc_struct(struct dc_link *link) { link->dpia_bw_alloc_config.bw_alloc_enabled = false; @@ -59,6 +60,11 @@ static void reset_bw_alloc_struct(struct dc_link *link) link->dpia_bw_alloc_config.bw_granularity = 0; link->dpia_bw_alloc_config.response_ready = false; } + +#define BW_GRANULARITY_0 4 // 0.25 Gbps +#define BW_GRANULARITY_1 2 // 0.5 Gbps +#define BW_GRANULARITY_2 1 // 1 Gbps + static uint8_t get_bw_granularity(struct dc_link *link) { uint8_t bw_granularity = 0; @@ -71,16 +77,20 @@ static uint8_t get_bw_granularity(struct dc_link *link) switch (bw_granularity & 0x3) { case 0: - bw_granularity = 4; + bw_granularity = BW_GRANULARITY_0; break; case 1: + bw_granularity = BW_GRANULARITY_1; + break; + case 2: default: - bw_granularity = 2; + bw_granularity = BW_GRANULARITY_2; break; } return bw_granularity; } + static int get_estimated_bw(struct dc_link *link) { uint8_t bw_estimated_bw = 0; @@ -93,31 +103,7 @@ static int get_estimated_bw(struct dc_link *link) return bw_estimated_bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity); } -static bool allocate_usb4_bw(int *stream_allocated_bw, int bw_needed, struct dc_link *link) -{ - if (bw_needed > 0) - *stream_allocated_bw += bw_needed; - - return true; -} -static bool deallocate_usb4_bw(int *stream_allocated_bw, int bw_to_dealloc, struct dc_link *link) -{ - bool ret = false; - - if (*stream_allocated_bw > 0) { - *stream_allocated_bw -= bw_to_dealloc; - ret = true; - } else { - //Do nothing for now - ret = true; - } - // Unplug so reset values - if (!link->hpd_status) - reset_bw_alloc_struct(link); - - return ret; -} /* * Read all New BW alloc configuration ex: estimated_bw, allocated_bw, * granuality, Driver_ID, CM_Group, & populate the BW allocation structs @@ -128,7 +114,12 @@ static void init_usb4_bw_struct(struct dc_link *link) // Init the known values link->dpia_bw_alloc_config.bw_granularity = get_bw_granularity(link); link->dpia_bw_alloc_config.estimated_bw = get_estimated_bw(link); + + DC_LOG_DEBUG("%s: bw_granularity(%d), estimated_bw(%d)\n", + __func__, link->dpia_bw_alloc_config.bw_granularity, + link->dpia_bw_alloc_config.estimated_bw); } + static uint8_t get_lowest_dpia_index(struct dc_link *link) { const struct dc *dc_struct = link->dc; @@ -141,12 +132,15 @@ static uint8_t get_lowest_dpia_index(struct dc_link *link) dc_struct->links[i]->ep_type != DISPLAY_ENDPOINT_USB4_DPIA) continue; - if (idx > dc_struct->links[i]->link_index) + if (idx > dc_struct->links[i]->link_index) { idx = dc_struct->links[i]->link_index; + break; + } } return idx; } + /* * Get the Max Available BW or Max Estimated BW for each Host Router * @@ -186,6 +180,7 @@ static int get_host_router_total_bw(struct dc_link *link, uint8_t type) return total_bw; } + /* * Cleanup function for when the dpia is unplugged to reset struct * and perform any required clean up @@ -194,42 +189,50 @@ static int get_host_router_total_bw(struct dc_link *link, uint8_t type) * * return: none */ -static bool dpia_bw_alloc_unplug(struct dc_link *link) +static void dpia_bw_alloc_unplug(struct dc_link *link) { - if (!link) - return true; - - return deallocate_usb4_bw(&link->dpia_bw_alloc_config.sink_allocated_bw, - link->dpia_bw_alloc_config.sink_allocated_bw, link); + if (link) { + DC_LOG_DEBUG("%s: resetting bw alloc config for link(%d)\n", + __func__, link->link_index); + link->dpia_bw_alloc_config.sink_allocated_bw = 0; + reset_bw_alloc_struct(link); + } } + static void set_usb4_req_bw_req(struct dc_link *link, int req_bw) { uint8_t requested_bw; uint32_t temp; - // 1. Add check for this corner case #1 - if (req_bw > link->dpia_bw_alloc_config.estimated_bw) + /* Error check whether request bw greater than allocated */ + if (req_bw > link->dpia_bw_alloc_config.estimated_bw) { + DC_LOG_ERROR("%s: Request bw greater than estimated bw for link(%d)\n", + __func__, link->link_index); req_bw = link->dpia_bw_alloc_config.estimated_bw; + } temp = req_bw * link->dpia_bw_alloc_config.bw_granularity; requested_bw = temp / Kbps_TO_Gbps; - // Always make sure to add more to account for floating points + /* Always make sure to add more to account for floating points */ if (temp % Kbps_TO_Gbps) ++requested_bw; - // 2. Add check for this corner case #2 + /* Error check whether requested and allocated are equal */ req_bw = requested_bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity); - if (req_bw == link->dpia_bw_alloc_config.sink_allocated_bw) - return; + if (req_bw == link->dpia_bw_alloc_config.sink_allocated_bw) { + DC_LOG_ERROR("%s: Request bw equals to allocated bw for link(%d)\n", + __func__, link->link_index); + } - if (core_link_write_dpcd( + link->dpia_bw_alloc_config.response_ready = false; // Reset flag + core_link_write_dpcd( link, REQUESTED_BW, &requested_bw, - sizeof(uint8_t)) == DC_OK) - link->dpia_bw_alloc_config.response_ready = false; // Reset flag + sizeof(uint8_t)); } + /* * Return the response_ready flag from dc_link struct * @@ -241,6 +244,7 @@ static bool get_cm_response_ready_flag(struct dc_link *link) { return link->dpia_bw_alloc_config.response_ready; } + // ------------------------------------------------------------------ // PUBLIC FUNCTIONS // ------------------------------------------------------------------ @@ -277,27 +281,27 @@ bool link_dp_dpia_set_dptx_usb4_bw_alloc_support(struct dc_link *link) DPTX_BW_ALLOCATION_MODE_CONTROL, &response, sizeof(uint8_t)) != DC_OK) { - DC_LOG_DEBUG("%s: **** FAILURE Enabling DPtx BW Allocation Mode Support ***\n", - __func__); + DC_LOG_DEBUG("%s: FAILURE Enabling DPtx BW Allocation Mode Support for link(%d)\n", + __func__, link->link_index); } else { // SUCCESS Enabled DPtx BW Allocation Mode Support - link->dpia_bw_alloc_config.bw_alloc_enabled = true; - DC_LOG_DEBUG("%s: **** SUCCESS Enabling DPtx BW Allocation Mode Support ***\n", - __func__); + DC_LOG_DEBUG("%s: SUCCESS Enabling DPtx BW Allocation Mode Support for link(%d)\n", + __func__, link->link_index); ret = true; init_usb4_bw_struct(link); + link->dpia_bw_alloc_config.bw_alloc_enabled = true; } } out: return ret; } + void dpia_handle_bw_alloc_response(struct dc_link *link, uint8_t bw, uint8_t result) { int bw_needed = 0; int estimated = 0; - int host_router_total_estimated_bw = 0; if (!get_bw_alloc_proceed_flag((link))) return; @@ -306,14 +310,22 @@ void dpia_handle_bw_alloc_response(struct dc_link *link, uint8_t bw, uint8_t res case DPIA_BW_REQ_FAILED: - DC_LOG_DEBUG("%s: *** *** BW REQ FAILURE for DP-TX Request *** ***\n", __func__); + /* + * Ideally, we shouldn't run into this case as we always validate available + * bandwidth and request within that limit + */ + estimated = bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity); - // Update the new Estimated BW value updated by CM - link->dpia_bw_alloc_config.estimated_bw = - bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity); + DC_LOG_ERROR("%s: BW REQ FAILURE for DP-TX Request for link(%d)\n", + __func__, link->link_index); + DC_LOG_ERROR("%s: current estimated_bw(%d), new estimated_bw(%d)\n", + __func__, link->dpia_bw_alloc_config.estimated_bw, estimated); + /* Update the new Estimated BW value updated by CM */ + link->dpia_bw_alloc_config.estimated_bw = estimated; + + /* Allocate the previously requested bandwidth */ set_usb4_req_bw_req(link, link->dpia_bw_alloc_config.estimated_bw); - link->dpia_bw_alloc_config.response_ready = false; /* * If FAIL then it is either: @@ -326,68 +338,34 @@ void dpia_handle_bw_alloc_response(struct dc_link *link, uint8_t bw, uint8_t res case DPIA_BW_REQ_SUCCESS: - DC_LOG_DEBUG("%s: *** BW REQ SUCCESS for DP-TX Request ***\n", __func__); - - // 1. SUCCESS 1st time before any Pruning is done - // 2. SUCCESS after prev. FAIL before any Pruning is done - // 3. SUCCESS after Pruning is done but before enabling link - bw_needed = bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity); - // 1. - if (!link->dpia_bw_alloc_config.sink_allocated_bw) { - - allocate_usb4_bw(&link->dpia_bw_alloc_config.sink_allocated_bw, bw_needed, link); - link->dpia_bw_alloc_config.sink_verified_bw = - link->dpia_bw_alloc_config.sink_allocated_bw; + DC_LOG_DEBUG("%s: BW REQ SUCCESS for DP-TX Request for link(%d)\n", + __func__, link->link_index); + DC_LOG_DEBUG("%s: current allocated_bw(%d), new allocated_bw(%d)\n", + __func__, link->dpia_bw_alloc_config.sink_allocated_bw, bw_needed); - // SUCCESS from first attempt - if (link->dpia_bw_alloc_config.sink_allocated_bw > - link->dpia_bw_alloc_config.sink_max_bw) - link->dpia_bw_alloc_config.sink_verified_bw = - link->dpia_bw_alloc_config.sink_max_bw; - } - // 3. - else if (link->dpia_bw_alloc_config.sink_allocated_bw) { - - // Find out how much do we need to de-alloc - if (link->dpia_bw_alloc_config.sink_allocated_bw > bw_needed) - deallocate_usb4_bw(&link->dpia_bw_alloc_config.sink_allocated_bw, - link->dpia_bw_alloc_config.sink_allocated_bw - bw_needed, link); - else - allocate_usb4_bw(&link->dpia_bw_alloc_config.sink_allocated_bw, - bw_needed - link->dpia_bw_alloc_config.sink_allocated_bw, link); - } - - // 4. If this is the 2nd sink then any unused bw will be reallocated to master DPIA - // => check if estimated_bw changed + link->dpia_bw_alloc_config.sink_allocated_bw = bw_needed; link->dpia_bw_alloc_config.response_ready = true; break; case DPIA_EST_BW_CHANGED: - DC_LOG_DEBUG("%s: *** ESTIMATED BW CHANGED for DP-TX Request ***\n", __func__); - estimated = bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity); - host_router_total_estimated_bw = get_host_router_total_bw(link, HOST_ROUTER_BW_ESTIMATED); - // 1. If due to unplug of other sink - if (estimated == host_router_total_estimated_bw) { - // First update the estimated & max_bw fields - if (link->dpia_bw_alloc_config.estimated_bw < estimated) - link->dpia_bw_alloc_config.estimated_bw = estimated; - } - // 2. If due to realloc bw btw 2 dpia due to plug OR realloc unused Bw - else { - // We lost estimated bw usually due to plug event of other dpia - link->dpia_bw_alloc_config.estimated_bw = estimated; - } + DC_LOG_DEBUG("%s: ESTIMATED BW CHANGED for link(%d)\n", + __func__, link->link_index); + DC_LOG_DEBUG("%s: current estimated_bw(%d), new estimated_bw(%d)\n", + __func__, link->dpia_bw_alloc_config.estimated_bw, estimated); + + link->dpia_bw_alloc_config.estimated_bw = estimated; break; case DPIA_BW_ALLOC_CAPS_CHANGED: - DC_LOG_DEBUG("%s: *** BW ALLOC CAPABILITY CHANGED for DP-TX Request ***\n", __func__); + DC_LOG_ERROR("%s: BW ALLOC CAPABILITY CHANGED to Disabled for link(%d)\n", + __func__, link->link_index); link->dpia_bw_alloc_config.bw_alloc_enabled = false; break; } @@ -409,11 +387,11 @@ int dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int pea set_usb4_req_bw_req(link, link->dpia_bw_alloc_config.sink_max_bw); do { - if (!(timeout > 0)) + if (timeout > 0) timeout--; else break; - fsleep(10 * 1000); + msleep(10); } while (!get_cm_response_ready_flag(link)); if (!timeout) @@ -428,37 +406,36 @@ int dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int pea out: return ret; } -int link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int req_bw) + +bool link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int req_bw) { - int ret = 0; + bool ret = false; uint8_t timeout = 10; + DC_LOG_DEBUG("%s: ENTER: link(%d), hpd_status(%d), current allocated_bw(%d), req_bw(%d)\n", + __func__, link->link_index, link->hpd_status, + link->dpia_bw_alloc_config.sink_allocated_bw, req_bw); + if (!get_bw_alloc_proceed_flag(link)) goto out; - /* - * Sometimes stream uses same timing parameters as the already - * allocated max sink bw so no need to re-alloc - */ - if (req_bw != link->dpia_bw_alloc_config.sink_allocated_bw) { - set_usb4_req_bw_req(link, req_bw); - do { - if (!(timeout > 0)) - timeout--; - else - break; - udelay(10 * 1000); - } while (!get_cm_response_ready_flag(link)); + set_usb4_req_bw_req(link, req_bw); + do { + if (timeout > 0) + timeout--; + else + break; + msleep(10); + } while (!get_cm_response_ready_flag(link)); - if (!timeout) - ret = 0;// ERROR TIMEOUT waiting for response for allocating bw - else if (link->dpia_bw_alloc_config.sink_allocated_bw > 0) - ret = get_host_router_total_bw(link, HOST_ROUTER_BW_ALLOCATED); - } + if (timeout) + ret = true; out: + DC_LOG_DEBUG("%s: EXIT: timeout(%d), ret(%d)\n", __func__, timeout, ret); return ret; } + bool dpia_validate_usb4_bw(struct dc_link **link, int *bw_needed_per_dpia, const unsigned int num_dpias) { bool ret = true; diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h index 7292690383ae..981bc4eb6120 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h @@ -59,9 +59,9 @@ bool link_dp_dpia_set_dptx_usb4_bw_alloc_support(struct dc_link *link); * @link: pointer to the dc_link struct instance * @req_bw: Bw requested by the stream * - * return: allocated bw else return 0 + * return: true if allocated successfully */ -int link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int req_bw); +bool link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int req_bw); /* * Handle the USB4 BW Allocation related functionality here: -- cgit v1.2.3 From ee8ed2506603629f2706712a5282921a115a8da6 Mon Sep 17 00:00:00 2001 From: Camille Cho Date: Thu, 16 Nov 2023 16:19:25 +0800 Subject: drm/amd/display: Correctly restore user_level [Why] BL1_PWM_USER_LEVEL is meant for the user brightness level setting from OS. However, we update it along with other ABM levels to the real PWM value which could be ABMed. [How] Driver to cache and restore the user brightness level setting so that DMUB can retrieve the last user setting in ABM config initialization. Tested-by: Daniel Wheeler Reviewed-by: Anthony Koo Acked-by: Rodrigo Siqueira Signed-off-by: Camille Cho Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce/dce_abm.c | 4 ++-- drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c | 4 ++-- drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c | 4 ++-- drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.h | 2 +- drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c | 7 +++++-- drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c | 7 +++++-- drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c | 7 +++++-- drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c | 7 +++++-- drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c | 7 +++++-- drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c | 7 +++++-- drivers/gpu/drm/amd/display/dc/inc/hw/abm.h | 2 +- drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h | 1 + .../gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c | 3 +++ 13 files changed, 42 insertions(+), 20 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c index 874b132fe1d7..a6006776333d 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c @@ -135,7 +135,7 @@ static void dmcu_set_backlight_level( 0, 1, 80000); } -static void dce_abm_init(struct abm *abm, uint32_t backlight) +static void dce_abm_init(struct abm *abm, uint32_t backlight, uint32_t user_level) { struct dce_abm *abm_dce = TO_DCE_ABM(abm); @@ -162,7 +162,7 @@ static void dce_abm_init(struct abm *abm, uint32_t backlight) BL1_PWM_TARGET_ABM_LEVEL, backlight); REG_UPDATE(BL1_PWM_USER_LEVEL, - BL1_PWM_USER_LEVEL, backlight); + BL1_PWM_USER_LEVEL, user_level); REG_UPDATE_2(DC_ABM1_LS_MIN_MAX_PIXEL_VALUE_THRES, ABM1_LS_MIN_PIXEL_VALUE_THRES, 0, diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c index 8c5e7f858be3..ccc154b0281c 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c @@ -57,9 +57,9 @@ static unsigned int abm_feature_support(struct abm *abm, unsigned int panel_inst return ret; } -static void dmub_abm_init_ex(struct abm *abm, uint32_t backlight) +static void dmub_abm_init_ex(struct abm *abm, uint32_t backlight, uint32_t user_level) { - dmub_abm_init(abm, backlight); + dmub_abm_init(abm, backlight, user_level); } static unsigned int dmub_abm_get_current_backlight_ex(struct abm *abm) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c index 4cff36351f40..f9d6a181164a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c @@ -79,7 +79,7 @@ static void dmub_abm_enable_fractional_pwm(struct dc_context *dc) dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } -void dmub_abm_init(struct abm *abm, uint32_t backlight) +void dmub_abm_init(struct abm *abm, uint32_t backlight, uint32_t user_level) { struct dce_abm *dce_abm = TO_DMUB_ABM(abm); @@ -106,7 +106,7 @@ void dmub_abm_init(struct abm *abm, uint32_t backlight) BL1_PWM_TARGET_ABM_LEVEL, backlight); REG_UPDATE(BL1_PWM_USER_LEVEL, - BL1_PWM_USER_LEVEL, backlight); + BL1_PWM_USER_LEVEL, user_level); REG_UPDATE_2(DC_ABM1_LS_MIN_MAX_PIXEL_VALUE_THRES, ABM1_LS_MIN_PIXEL_VALUE_THRES, 0, diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.h b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.h index 07ea6c8d414f..761685e5b8c9 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.h @@ -30,7 +30,7 @@ struct abm_save_restore; -void dmub_abm_init(struct abm *abm, uint32_t backlight); +void dmub_abm_init(struct abm *abm, uint32_t backlight, uint32_t user_level); bool dmub_abm_set_level(struct abm *abm, uint32_t level, uint8_t panel_mask); unsigned int dmub_abm_get_current_backlight(struct abm *abm); unsigned int dmub_abm_get_target_backlight(struct abm *abm); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index 6457099ed588..fb328cd06cea 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -2593,6 +2593,7 @@ static void init_hw(struct dc *dc) struct dmcu *dmcu; struct dce_hwseq *hws = dc->hwseq; uint32_t backlight = MAX_BACKLIGHT_LEVEL; + uint32_t user_level = MAX_BACKLIGHT_LEVEL; bp = dc->ctx->dc_bios; for (i = 0; i < dc->res_pool->pipe_count; i++) { @@ -2642,13 +2643,15 @@ static void init_hw(struct dc *dc) for (i = 0; i < dc->link_count; i++) { struct dc_link *link = dc->links[i]; - if (link->panel_cntl) + if (link->panel_cntl) { backlight = link->panel_cntl->funcs->hw_init(link->panel_cntl); + user_level = link->panel_cntl->stored_backlight_registers.USER_LEVEL; + } } abm = dc->res_pool->abm; if (abm != NULL) - abm->funcs->abm_init(abm, backlight); + abm->funcs->abm_init(abm, backlight, user_level); dmcu = dc->res_pool->dmcu; if (dmcu != NULL && abm != NULL) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index 1c5e3bb6f0ee..51dd2ae09b2a 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -1490,6 +1490,7 @@ void dcn10_init_hw(struct dc *dc) struct dc_bios *dcb = dc->ctx->dc_bios; struct resource_pool *res_pool = dc->res_pool; uint32_t backlight = MAX_BACKLIGHT_LEVEL; + uint32_t user_level = MAX_BACKLIGHT_LEVEL; bool is_optimized_init_done = false; if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks) @@ -1587,12 +1588,14 @@ void dcn10_init_hw(struct dc *dc) for (i = 0; i < dc->link_count; i++) { struct dc_link *link = dc->links[i]; - if (link->panel_cntl) + if (link->panel_cntl) { backlight = link->panel_cntl->funcs->hw_init(link->panel_cntl); + user_level = link->panel_cntl->stored_backlight_registers.USER_LEVEL; + } } if (abm != NULL) - abm->funcs->abm_init(abm, backlight); + abm->funcs->abm_init(abm, backlight, user_level); if (dmcu != NULL && !dmcu->auto_load_dmcu) dmcu->funcs->dmcu_init(dmcu); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c index 327c227a10ea..c34c13e1e0a4 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c @@ -476,6 +476,7 @@ void dcn30_init_hw(struct dc *dc) int i; int edp_num; uint32_t backlight = MAX_BACKLIGHT_LEVEL; + uint32_t user_level = MAX_BACKLIGHT_LEVEL; if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks) dc->clk_mgr->funcs->init_clocks(dc->clk_mgr); @@ -612,13 +613,15 @@ void dcn30_init_hw(struct dc *dc) for (i = 0; i < dc->link_count; i++) { struct dc_link *link = dc->links[i]; - if (link->panel_cntl) + if (link->panel_cntl) { backlight = link->panel_cntl->funcs->hw_init(link->panel_cntl); + user_level = link->panel_cntl->stored_backlight_registers.USER_LEVEL; + } } for (i = 0; i < dc->res_pool->pipe_count; i++) { if (abms[i] != NULL) - abms[i]->funcs->abm_init(abms[i], backlight); + abms[i]->funcs->abm_init(abms[i], backlight, user_level); } /* power AFMT HDMI memory TODO: may move to dis/en output save power*/ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c index 260860c259f3..7423880fabb6 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c @@ -113,6 +113,7 @@ void dcn31_init_hw(struct dc *dc) struct dc_bios *dcb = dc->ctx->dc_bios; struct resource_pool *res_pool = dc->res_pool; uint32_t backlight = MAX_BACKLIGHT_LEVEL; + uint32_t user_level = MAX_BACKLIGHT_LEVEL; int i; if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks) @@ -224,13 +225,15 @@ void dcn31_init_hw(struct dc *dc) for (i = 0; i < dc->link_count; i++) { struct dc_link *link = dc->links[i]; - if (link->panel_cntl) + if (link->panel_cntl) { backlight = link->panel_cntl->funcs->hw_init(link->panel_cntl); + user_level = link->panel_cntl->stored_backlight_registers.USER_LEVEL; + } } for (i = 0; i < dc->res_pool->pipe_count; i++) { if (abms[i] != NULL) - abms[i]->funcs->abm_init(abms[i], backlight); + abms[i]->funcs->abm_init(abms[i], backlight, user_level); } /* power AFMT HDMI memory TODO: may move to dis/en output save power*/ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c index 6f360f008f67..6c9299c7683d 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c @@ -753,6 +753,7 @@ void dcn32_init_hw(struct dc *dc) int i; int edp_num; uint32_t backlight = MAX_BACKLIGHT_LEVEL; + uint32_t user_level = MAX_BACKLIGHT_LEVEL; if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks) dc->clk_mgr->funcs->init_clocks(dc->clk_mgr); @@ -907,13 +908,15 @@ void dcn32_init_hw(struct dc *dc) for (i = 0; i < dc->link_count; i++) { struct dc_link *link = dc->links[i]; - if (link->panel_cntl) + if (link->panel_cntl) { backlight = link->panel_cntl->funcs->hw_init(link->panel_cntl); + user_level = link->panel_cntl->stored_backlight_registers.USER_LEVEL; + } } for (i = 0; i < dc->res_pool->pipe_count; i++) { if (abms[i] != NULL && abms[i]->funcs != NULL) - abms[i]->funcs->abm_init(abms[i], backlight); + abms[i]->funcs->abm_init(abms[i], backlight, user_level); } /* power AFMT HDMI memory TODO: may move to dis/en output save power*/ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index ad710b4036de..c40f4a06abdb 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -134,6 +134,7 @@ void dcn35_init_hw(struct dc *dc) struct dc_bios *dcb = dc->ctx->dc_bios; struct resource_pool *res_pool = dc->res_pool; uint32_t backlight = MAX_BACKLIGHT_LEVEL; + uint32_t user_level = MAX_BACKLIGHT_LEVEL; int i; if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks) @@ -280,13 +281,15 @@ void dcn35_init_hw(struct dc *dc) for (i = 0; i < dc->link_count; i++) { struct dc_link *link = dc->links[i]; - if (link->panel_cntl) + if (link->panel_cntl) { backlight = link->panel_cntl->funcs->hw_init(link->panel_cntl); + user_level = link->panel_cntl->stored_backlight_registers.USER_LEVEL; + } } if (dc->ctx->dmub_srv) { for (i = 0; i < dc->res_pool->pipe_count; i++) { if (abms[i] != NULL && abms[i]->funcs != NULL) - abms[i]->funcs->abm_init(abms[i], backlight); + abms[i]->funcs->abm_init(abms[i], backlight, user_level); } } diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h b/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h index 9f521cf0fc5a..3f0161d64675 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h @@ -36,7 +36,7 @@ struct abm { }; struct abm_funcs { - void (*abm_init)(struct abm *abm, uint32_t back_light); + void (*abm_init)(struct abm *abm, uint32_t back_light, uint32_t user_level); bool (*set_abm_level)(struct abm *abm, unsigned int abm_level); bool (*set_abm_immediate_disable)(struct abm *abm, unsigned int panel_inst); bool (*set_pipe)(struct abm *abm, unsigned int controller_id, unsigned int panel_inst); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h b/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h index 248adc1705e3..5dcbaa2db964 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h @@ -40,6 +40,7 @@ struct panel_cntl_backlight_registers { unsigned int BL_PWM_PERIOD_CNTL; unsigned int LVTMA_PWRSEQ_REF_DIV_BL_PWM_REF_DIV; unsigned int PANEL_PWRSEQ_REF_DIV2; + unsigned int USER_LEVEL; }; struct panel_cntl_funcs { diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c index bf53a86ea817..e8de68e62403 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c @@ -529,6 +529,9 @@ bool edp_set_backlight_level(const struct dc_link *link, if (dc_is_embedded_signal(link->connector_signal)) { struct pipe_ctx *pipe_ctx = get_pipe_from_link(link); + if (link->panel_cntl) + link->panel_cntl->stored_backlight_registers.USER_LEVEL = backlight_pwm_u16_16; + if (pipe_ctx) { /* Disable brightness ramping when the display is blanked * as it can hang the DMCU -- cgit v1.2.3 From efae5a9eb47b76d5f84c0a0ca2ec95c9ce8a393c Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Mon, 4 Dec 2023 10:09:33 +0800 Subject: drm/amd/display: pbn_div need be updated for hotplug event link_rate sometime will be changed when DP MST connector hotplug, so pbn_div also need be updated; otherwise, it will mismatch with link_rate, causes no output in external monitor. Cc: stable@vger.kernel.org Tested-by: Daniel Wheeler Reviewed-by: Jerry Zuo Acked-by: Rodrigo Siqueira Signed-off-by: Wade Wang Signed-off-by: Wayne Lin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index ddde330860fc..a144024df97c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -7005,8 +7005,7 @@ static int dm_encoder_helper_atomic_check(struct drm_encoder *encoder, if (IS_ERR(mst_state)) return PTR_ERR(mst_state); - if (!mst_state->pbn_div.full) - mst_state->pbn_div.full = dfixed_const(dm_mst_get_pbn_divider(aconnector->mst_root->dc_link)); + mst_state->pbn_div.full = dfixed_const(dm_mst_get_pbn_divider(aconnector->mst_root->dc_link)); if (!state->duplicated) { int max_bpc = conn_state->max_requested_bpc; -- cgit v1.2.3 From ec9ba4821fa52b5efdbc4cdf0a77497990655231 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Mon, 18 Dec 2023 16:17:23 -0500 Subject: drm/amdgpu: Let KFD sync with VM fences MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change the rules for amdgpu_sync_resv to let KFD synchronize with VM fences on page table reservations. This fixes intermittent memory corruption after evictions when using amdgpu_vm_handle_moved to update page tables for VM mappings managed through render nodes. Signed-off-by: Felix Kuehling Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index dcd8c066bc1f..1b013a44ca99 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -191,7 +191,8 @@ static bool amdgpu_sync_test_fence(struct amdgpu_device *adev, /* Never sync to VM updates either. */ if (fence_owner == AMDGPU_FENCE_OWNER_VM && - owner != AMDGPU_FENCE_OWNER_UNDEFINED) + owner != AMDGPU_FENCE_OWNER_UNDEFINED && + owner != AMDGPU_FENCE_OWNER_KFD) return false; /* Ignore fences depending on the sync mode */ -- cgit v1.2.3 From 09b5bc456c63e3caeb854d492177bbfbe7b1cb22 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Wed, 13 Dec 2023 09:46:50 -0500 Subject: drm/amd/display: Assign stream status for FPO + Vactive cases A new check was added to ensure FPO is not enabled when the FPO pipe has 0 planes. This requires the stream status to check the plane count, but the stream status was not assigned for FPO + Vactive cases which leads to FPO not be enabled always. Tested-by: Daniel Wheeler Reviewed-by: Relja Vojvodic Acked-by: Rodrigo Siqueira Signed-off-by: Alvin Lee Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c index cca6436ecdf5..f698a7e63291 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c @@ -554,7 +554,8 @@ struct dc_stream_state *dcn32_can_support_mclk_switch_using_fw_based_vblank_stre DC_FP_START(); dcn32_assign_fpo_vactive_candidate(dc, context, &fpo_candidate_stream); DC_FP_END(); - + if (fpo_candidate_stream) + fpo_stream_status = dc_state_get_stream_status(context, fpo_candidate_stream); DC_FP_START(); is_fpo_vactive = dcn32_find_vactive_pipe(dc, context, dc->debug.fpo_vactive_min_active_margin_us); DC_FP_END(); -- cgit v1.2.3 From 4069d43bfecb45811a2ad5dc63326e4227fa5931 Mon Sep 17 00:00:00 2001 From: Relja Vojvodic Date: Wed, 13 Dec 2023 10:31:06 -0500 Subject: drm/amd/display: Add log end specifier [Why] Some debug tools, sometimes wrap around to multiple lines which causes issues with the DPM test script while it is looking for the logs. Need a way to tell when the log is finished. [How] Added "LOG_END" to the end of the log. Tested-by: Daniel Wheeler Reviewed-by: Alvin Lee Acked-by: Rodrigo Siqueira Signed-off-by: Relja Vojvodic Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c index 51adb13b3b80..aadd07bc68c5 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c @@ -565,7 +565,7 @@ static void dcn32_auto_dpm_test_log( "pix_width_0:%d - pix_height_0:%d - refresh_rate_0:%lld - is_scaled_0:%d - " "pix_width_1:%d - pix_height_1:%d - refresh_rate_1:%lld - is_scaled_1:%d - " "pix_width_2:%d - pix_height_2:%d - refresh_rate_2:%lld - is_scaled_2:%d - " - "pix_width_3:%d - pix_height_3:%d - refresh_rate_3:%lld - is_scaled_3:%d\n", + "pix_width_3:%d - pix_height_3:%d - refresh_rate_3:%lld - is_scaled_3:%d - LOG_END\n", dramclk_khz_override, fclk_khz_override, new_clocks->dcfclk_khz, -- cgit v1.2.3 From 54249f03ab9a7311dad653b449e15c6a939d7732 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Mon, 11 Dec 2023 17:54:57 -0500 Subject: drm/amd/display: Always exit DMCUB idle when called [Why] dc->idle_optimizations_allowed may be desynced with the hardware state. [How] Make sure we always exit out when dc_dmub_srv_exit_low_power_state is called by removing the check. Tested-by: Daniel Wheeler Reviewed-by: Sung joon Kim Acked-by: Rodrigo Siqueira Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index 1d315f7cdce3..4ce8ac966cc8 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -1222,9 +1222,6 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) if (dc->debug.dmcub_emulation) return; - if (!dc->idle_optimizations_allowed) - return; - if (!dc->ctx->dmub_srv || !dc->ctx->dmub_srv->dmub) return; -- cgit v1.2.3 From 60d5d1e76270bac910f9596799cbd831fe09c489 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Mon, 11 Dec 2023 17:57:24 -0500 Subject: drm/amd/display: Wait forever for DMCUB to wake up [Why] If we time out waiting for PMFW to finish the exit sequence and touch the DMCUB register the system will hang in a hard locked state. [How] Pol forever. This covers the case where things take too long but also enables for debugging to occur since the cores won't be hardlocked. Tested-by: Daniel Wheeler Reviewed-by: Sung joon Kim Acked-by: Rodrigo Siqueira Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index 4ce8ac966cc8..7724dcadecba 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -1214,10 +1214,8 @@ static void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle) static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) { - const uint32_t max_num_polls = 10000; uint32_t allow_state = 0; uint32_t commit_state = 0; - int i; if (dc->debug.dmcub_emulation) return; @@ -1244,17 +1242,13 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) udelay(dc->debug.ips2_entry_delay_us); dc->clk_mgr->funcs->exit_low_power_state(dc->clk_mgr); - for (i = 0; i < max_num_polls; ++i) { + for (;;) { commit_state = dc->hwss.get_idle_state(dc); if (commit_state & DMUB_IPS2_COMMIT_MASK) break; udelay(1); - - if (dc->debug.disable_timeout) - i--; } - ASSERT(i < max_num_polls); if (!dc_dmub_srv_is_hw_pwr_up(dc->ctx->dmub_srv, true)) ASSERT(0); @@ -1269,17 +1263,13 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) dc_dmub_srv_notify_idle(dc, false); if (!(allow_state & DMUB_IPS1_ALLOW_MASK)) { - for (i = 0; i < max_num_polls; ++i) { + for (;;) { commit_state = dc->hwss.get_idle_state(dc); if (commit_state & DMUB_IPS1_COMMIT_MASK) break; udelay(1); - - if (dc->debug.disable_timeout) - i--; } - ASSERT(i < max_num_polls); } } -- cgit v1.2.3 From 4b5c5f5ad38b9435518730cc7f8f1e8de9c5cb2f Mon Sep 17 00:00:00 2001 From: Jack Xiao Date: Tue, 19 Dec 2023 17:10:34 +0800 Subject: drm/amdgpu/gfx11: need acquire mutex before access CP_VMID_RESET v2 It's required to take the gfx mutex before access to CP_VMID_RESET, for there is a race condition with CP firmware to write the register. v2: add extra code to ensure the mutex releasing is successful. Signed-off-by: Jack Xiao Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 48 +++++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index bdcf96df69e6..2fbcd9765980 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -4474,11 +4474,43 @@ static int gfx_v11_0_wait_for_idle(void *handle) return -ETIMEDOUT; } +static int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev, + int req) +{ + u32 i, tmp, val; + + for (i = 0; i < adev->usec_timeout; i++) { + /* Request with MeId=2, PipeId=0 */ + tmp = REG_SET_FIELD(0, CP_GFX_INDEX_MUTEX, REQUEST, req); + tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, CLIENTID, 4); + WREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX, tmp); + + val = RREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX); + if (req) { + if (val == tmp) + break; + } else { + tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, + REQUEST, 1); + + /* unlocked or locked by firmware */ + if (val != tmp) + break; + } + udelay(1); + } + + if (i >= adev->usec_timeout) + return -EINVAL; + + return 0; +} + static int gfx_v11_0_soft_reset(void *handle) { u32 grbm_soft_reset = 0; u32 tmp; - int i, j, k; + int r, i, j, k; struct amdgpu_device *adev = (struct amdgpu_device *)handle; tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL); @@ -4518,6 +4550,13 @@ static int gfx_v11_0_soft_reset(void *handle) } } + /* Try to acquire the gfx mutex before access to CP_VMID_RESET */ + r = gfx_v11_0_request_gfx_index_mutex(adev, 1); + if (r) { + DRM_ERROR("Failed to acquire the gfx mutex during soft reset\n"); + return r; + } + WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffffffe); // Read CP_VMID_RESET register three times. @@ -4526,6 +4565,13 @@ static int gfx_v11_0_soft_reset(void *handle) RREG32_SOC15(GC, 0, regCP_VMID_RESET); RREG32_SOC15(GC, 0, regCP_VMID_RESET); + /* release the gfx mutex */ + r = gfx_v11_0_request_gfx_index_mutex(adev, 0); + if (r) { + DRM_ERROR("Failed to release the gfx mutex during soft reset\n"); + return r; + } + for (i = 0; i < adev->usec_timeout; i++) { if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) && !RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE)) -- cgit v1.2.3 From 4e7738bcfb6765ca669fdbd2be2f7f6f239ed3e5 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Tue, 12 Dec 2023 12:10:49 -0500 Subject: drm/amd/display: Switch DMCUB notify idle command to NO_WAIT [Why] Race condition between notification of driver idle and the command being processed. We could theoretically enter idle between the submission and the wait for idle that occurs after. [How] Switch the notification to NO_WAIT to avoid the RPTR access. Tested-by: Daniel Wheeler Reviewed-by: Sung joon Kim Acked-by: Rodrigo Siqueira Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index 7724dcadecba..dadeaa9c92dd 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -1209,7 +1209,8 @@ static void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle) } /* NOTE: This does not use the "wake" interface since this is part of the wake path. */ - dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + /* We also do not perform a wait since DMCUB could enter idle after the notification. */ + dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT); } static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) -- cgit v1.2.3 From 59f1622a5f05d948a7c665a458a3dd76ba73015e Mon Sep 17 00:00:00 2001 From: Meenakshikumar Somasundaram Date: Tue, 5 Dec 2023 00:01:15 -0500 Subject: drm/amd/display: Add dpia display mode validation logic [Why] If bandwidth allocation feature is enabled, connection manager wont limit the dp tunnel bandwidth. So, need to do display mode validation for streams on dpia links to avoid oversubscription of dp tunnel bandwidth. [How] - To read non reduced link rate and lane count and update reported link capability. - To calculate the bandwidth required for streams of dpia links per host router and validate against the allocated bandwidth for the host router. Tested-by: Daniel Wheeler Reviewed-by: PeiChen Huang Reviewed-by: Aric Cyr Acked-by: Rodrigo Siqueira Signed-off-by: Meenakshikumar Somasundaram Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/core/dc_link_exports.c | 2 +- drivers/gpu/drm/amd/display/dc/dc.h | 4 +- drivers/gpu/drm/amd/display/dc/dc_dp_types.h | 6 + drivers/gpu/drm/amd/display/dc/dc_types.h | 2 + .../display/dc/link/protocols/link_dp_dpia_bw.c | 130 +++++++++++++++------ 5 files changed, 104 insertions(+), 40 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c index ed94187c2afa..f365773d5714 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c @@ -497,7 +497,7 @@ void dc_link_enable_hpd_filter(struct dc_link *link, bool enable) link->dc->link_srv->enable_hpd_filter(link, enable); } -bool dc_link_validate(struct dc *dc, const struct dc_stream_state *streams, const unsigned int count) +bool dc_link_dp_dpia_validate(struct dc *dc, const struct dc_stream_state *streams, const unsigned int count) { return dc->link_srv->validate_dpia_bandwidth(streams, count); } diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index c2ee80d6f64c..565de7428b6c 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -2172,11 +2172,11 @@ int dc_link_dp_dpia_handle_usb4_bandwidth_allocation_for_link( * * @dc: pointer to dc struct * @stream: pointer to all possible streams - * @num_streams: number of valid DPIA streams + * @count: number of valid DPIA streams * * return: TRUE if bw used by DPIAs doesn't exceed available BW else return FALSE */ -bool dc_link_validate(struct dc *dc, const struct dc_stream_state *streams, +bool dc_link_dp_dpia_validate(struct dc *dc, const struct dc_stream_state *streams, const unsigned int count); /* Sink Interfaces - A sink corresponds to a display output device */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h index eeeeeef4d717..1cb7765f593a 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h @@ -1377,6 +1377,12 @@ struct dp_trace { #ifndef DP_TUNNELING_STATUS #define DP_TUNNELING_STATUS 0xE0025 /* 1.4a */ #endif +#ifndef DP_TUNNELING_MAX_LINK_RATE +#define DP_TUNNELING_MAX_LINK_RATE 0xE0028 /* 1.4a */ +#endif +#ifndef DP_TUNNELING_MAX_LANE_COUNT +#define DP_TUNNELING_MAX_LANE_COUNT 0xE0029 /* 1.4a */ +#endif #ifndef DPTX_BW_ALLOCATION_MODE_CONTROL #define DPTX_BW_ALLOCATION_MODE_CONTROL 0xE0030 /* 1.4a */ #endif diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index 870cd3932015..4f276169e05a 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -1151,6 +1151,8 @@ struct dc_dpia_bw_alloc { int bw_granularity; // BW Granularity bool bw_alloc_enabled; // The BW Alloc Mode Support is turned ON for all 3: DP-Tx & Dpia & CM bool response_ready; // Response ready from the CM side + uint8_t nrd_max_lane_count; // Non-reduced max lane count + uint8_t nrd_max_link_rate; // Non-reduced max link rate }; #define MAX_SINKS_PER_LINK 4 diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c index d6e1f969bfd5..a7aa8c9da868 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c @@ -59,6 +59,7 @@ static void reset_bw_alloc_struct(struct dc_link *link) link->dpia_bw_alloc_config.estimated_bw = 0; link->dpia_bw_alloc_config.bw_granularity = 0; link->dpia_bw_alloc_config.response_ready = false; + link->dpia_bw_alloc_config.sink_allocated_bw = 0; } #define BW_GRANULARITY_0 4 // 0.25 Gbps @@ -104,6 +105,32 @@ static int get_estimated_bw(struct dc_link *link) return bw_estimated_bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity); } +static int get_non_reduced_max_link_rate(struct dc_link *link) +{ + uint8_t nrd_max_link_rate = 0; + + core_link_read_dpcd( + link, + DP_TUNNELING_MAX_LINK_RATE, + &nrd_max_link_rate, + sizeof(uint8_t)); + + return nrd_max_link_rate; +} + +static int get_non_reduced_max_lane_count(struct dc_link *link) +{ + uint8_t nrd_max_lane_count = 0; + + core_link_read_dpcd( + link, + DP_TUNNELING_MAX_LANE_COUNT, + &nrd_max_lane_count, + sizeof(uint8_t)); + + return nrd_max_lane_count; +} + /* * Read all New BW alloc configuration ex: estimated_bw, allocated_bw, * granuality, Driver_ID, CM_Group, & populate the BW allocation structs @@ -111,13 +138,20 @@ static int get_estimated_bw(struct dc_link *link) */ static void init_usb4_bw_struct(struct dc_link *link) { - // Init the known values + reset_bw_alloc_struct(link); + + /* init the known values */ link->dpia_bw_alloc_config.bw_granularity = get_bw_granularity(link); link->dpia_bw_alloc_config.estimated_bw = get_estimated_bw(link); + link->dpia_bw_alloc_config.nrd_max_link_rate = get_non_reduced_max_link_rate(link); + link->dpia_bw_alloc_config.nrd_max_lane_count = get_non_reduced_max_lane_count(link); DC_LOG_DEBUG("%s: bw_granularity(%d), estimated_bw(%d)\n", __func__, link->dpia_bw_alloc_config.bw_granularity, link->dpia_bw_alloc_config.estimated_bw); + DC_LOG_DEBUG("%s: nrd_max_link_rate(%d), nrd_max_lane_count(%d)\n", + __func__, link->dpia_bw_alloc_config.nrd_max_link_rate, + link->dpia_bw_alloc_config.nrd_max_lane_count); } static uint8_t get_lowest_dpia_index(struct dc_link *link) @@ -142,39 +176,50 @@ static uint8_t get_lowest_dpia_index(struct dc_link *link) } /* - * Get the Max Available BW or Max Estimated BW for each Host Router + * Get the maximum dp tunnel banwidth of host router * - * @link: pointer to the dc_link struct instance - * @type: ESTIMATD BW or MAX AVAILABLE BW + * @dc: pointer to the dc struct instance + * @hr_index: host router index * - * return: response_ready flag from dc_link struct + * return: host router maximum dp tunnel bandwidth */ -static int get_host_router_total_bw(struct dc_link *link, uint8_t type) +static int get_host_router_total_dp_tunnel_bw(const struct dc *dc, uint8_t hr_index) { - const struct dc *dc_struct = link->dc; - uint8_t lowest_dpia_index = get_lowest_dpia_index(link); - uint8_t idx = (link->link_index - lowest_dpia_index) / 2, idx_temp = 0; - struct dc_link *link_temp; + uint8_t lowest_dpia_index = get_lowest_dpia_index(dc->links[0]); + uint8_t hr_index_temp = 0; + struct dc_link *link_dpia_primary, *link_dpia_secondary; int total_bw = 0; - int i; - - for (i = 0; i < MAX_PIPES * 2; ++i) { - if (!dc_struct->links[i] || dc_struct->links[i]->ep_type != DISPLAY_ENDPOINT_USB4_DPIA) - continue; + for (uint8_t i = 0; i < MAX_PIPES * 2; ++i) { - link_temp = dc_struct->links[i]; - if (!link_temp || !link_temp->hpd_status) + if (!dc->links[i] || dc->links[i]->ep_type != DISPLAY_ENDPOINT_USB4_DPIA) continue; - idx_temp = (link_temp->link_index - lowest_dpia_index) / 2; - - if (idx_temp == idx) { - - if (type == HOST_ROUTER_BW_ESTIMATED) - total_bw += link_temp->dpia_bw_alloc_config.estimated_bw; - else if (type == HOST_ROUTER_BW_ALLOCATED) - total_bw += link_temp->dpia_bw_alloc_config.sink_allocated_bw; + hr_index_temp = (dc->links[i]->link_index - lowest_dpia_index) / 2; + + if (hr_index_temp == hr_index) { + link_dpia_primary = dc->links[i]; + link_dpia_secondary = dc->links[i + 1]; + + /** + * If BW allocation enabled on both DPIAs, then + * HR BW = Estimated(dpia_primary) + Allocated(dpia_secondary) + * otherwise HR BW = Estimated(bw alloc enabled dpia) + */ + if ((link_dpia_primary->hpd_status && + link_dpia_primary->dpia_bw_alloc_config.bw_alloc_enabled) && + (link_dpia_secondary->hpd_status && + link_dpia_secondary->dpia_bw_alloc_config.bw_alloc_enabled)) { + total_bw += link_dpia_primary->dpia_bw_alloc_config.estimated_bw + + link_dpia_secondary->dpia_bw_alloc_config.sink_allocated_bw; + } else if (link_dpia_primary->hpd_status && + link_dpia_primary->dpia_bw_alloc_config.bw_alloc_enabled) { + total_bw = link_dpia_primary->dpia_bw_alloc_config.estimated_bw; + } else if (link_dpia_secondary->hpd_status && + link_dpia_secondary->dpia_bw_alloc_config.bw_alloc_enabled) { + total_bw += link_dpia_secondary->dpia_bw_alloc_config.estimated_bw; + } + break; } } @@ -194,7 +239,6 @@ static void dpia_bw_alloc_unplug(struct dc_link *link) if (link) { DC_LOG_DEBUG("%s: resetting bw alloc config for link(%d)\n", __func__, link->link_index); - link->dpia_bw_alloc_config.sink_allocated_bw = 0; reset_bw_alloc_struct(link); } } @@ -397,7 +441,7 @@ int dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int pea if (!timeout) ret = 0;// ERROR TIMEOUT waiting for response for allocating bw else if (link->dpia_bw_alloc_config.sink_allocated_bw > 0) - ret = get_host_router_total_bw(link, HOST_ROUTER_BW_ALLOCATED); + ret = link->dpia_bw_alloc_config.sink_allocated_bw; } //2. Cold Unplug else if (!link->hpd_status) @@ -439,29 +483,41 @@ out: bool dpia_validate_usb4_bw(struct dc_link **link, int *bw_needed_per_dpia, const unsigned int num_dpias) { bool ret = true; - int bw_needed_per_hr[MAX_HR_NUM] = { 0, 0 }; - uint8_t lowest_dpia_index = 0, dpia_index = 0; - uint8_t i; + int bw_needed_per_hr[MAX_HR_NUM] = { 0, 0 }, host_router_total_dp_bw = 0; + uint8_t lowest_dpia_index, i, hr_index; if (!num_dpias || num_dpias > MAX_DPIA_NUM) return ret; - //Get total Host Router BW & Validate against each Host Router max BW + lowest_dpia_index = get_lowest_dpia_index(link[0]); + + /* get total Host Router BW with granularity for the given modes */ for (i = 0; i < num_dpias; ++i) { + int granularity_Gbps = 0; + int bw_granularity = 0; if (!link[i]->dpia_bw_alloc_config.bw_alloc_enabled) continue; - lowest_dpia_index = get_lowest_dpia_index(link[i]); if (link[i]->link_index < lowest_dpia_index) continue; - dpia_index = (link[i]->link_index - lowest_dpia_index) / 2; - bw_needed_per_hr[dpia_index] += bw_needed_per_dpia[i]; - if (bw_needed_per_hr[dpia_index] > get_host_router_total_bw(link[i], HOST_ROUTER_BW_ALLOCATED)) { + granularity_Gbps = (Kbps_TO_Gbps / link[i]->dpia_bw_alloc_config.bw_granularity); + bw_granularity = (bw_needed_per_dpia[i] / granularity_Gbps) * granularity_Gbps + + ((bw_needed_per_dpia[i] % granularity_Gbps) ? granularity_Gbps : 0); - ret = false; - break; + hr_index = (link[i]->link_index - lowest_dpia_index) / 2; + bw_needed_per_hr[hr_index] += bw_granularity; + } + + /* validate against each Host Router max BW */ + for (hr_index = 0; hr_index < MAX_HR_NUM; ++hr_index) { + if (bw_needed_per_hr[hr_index]) { + host_router_total_dp_bw = get_host_router_total_dp_tunnel_bw(link[0]->dc, hr_index); + if (bw_needed_per_hr[hr_index] > host_router_total_dp_bw) { + ret = false; + break; + } } } -- cgit v1.2.3 From 6b2b782ad6a25734ae847d1659bea3f613dbb563 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Tue, 12 Dec 2023 12:17:31 -0500 Subject: drm/amd/display: For FPO and SubVP/DRR configs program vmin/max sel For FPO and SubVP/DRR cases we need to ensure to program OTG_V_TOTAL_MIN/MAX_SEL, otherwise stretching the vblank in FPO / SubVP / DRR cases will not have any effect and we could hit underflow / corruption. Tested-by: Daniel Wheeler Reviewed-by: Aric Cyr Acked-by: Rodrigo Siqueira Signed-off-by: Alvin Lee Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 14 ++++++++ .../amd/display/dc/dcn32/dcn32_resource_helpers.c | 14 -------- .../gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 11 ++++--- .../drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c | 37 ++++++++++++++++++++++ drivers/gpu/drm/amd/display/dc/inc/resource.h | 3 ++ .../amd/display/dc/resource/dcn32/dcn32_resource.c | 2 +- .../amd/display/dc/resource/dcn32/dcn32_resource.h | 3 -- .../display/dc/resource/dcn321/dcn321_resource.c | 2 +- 8 files changed, 62 insertions(+), 24 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index f2abc1096ffb..57f0ddd15923 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -4986,6 +4986,20 @@ enum dc_status update_dp_encoder_resources_for_test_harness(const struct dc *dc, return DC_OK; } +bool resource_subvp_in_use(struct dc *dc, + struct dc_state *context) +{ + uint32_t i; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_NONE) + return true; + } + return false; +} + bool check_subvp_sw_cursor_fallback_req(const struct dc *dc, struct dc_stream_state *stream) { if (!dc->debug.disable_subvp_high_refresh && is_subvp_high_refresh_candidate(stream)) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c index f698a7e63291..44ba1edb7f53 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c @@ -183,20 +183,6 @@ bool dcn32_all_pipes_have_stream_and_plane(struct dc *dc, return true; } -bool dcn32_subvp_in_use(struct dc *dc, - struct dc_state *context) -{ - uint32_t i; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - if (dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_NONE) - return true; - } - return false; -} - bool dcn32_mpo_in_use(struct dc_state *context) { uint32_t i; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 9f37f717a1f8..aa68d010cbfd 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -33,6 +33,7 @@ #include "dcn30/dcn30_resource.h" #include "link.h" #include "dc_state_priv.h" +#include "resource.h" #define DC_LOGGER_INIT(logger) @@ -291,7 +292,7 @@ int dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc, /* for subvp + DRR case, if subvp pipes are still present we support pstate */ if (vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported && - dcn32_subvp_in_use(dc, context)) + resource_subvp_in_use(dc, context)) vba->DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] = temp_clock_change_support; if (vlevel < context->bw_ctx.dml.vba.soc.num_states && @@ -2272,7 +2273,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, unsigned int dummy_latency_index = 0; int maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb; unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed; - bool subvp_in_use = dcn32_subvp_in_use(dc, context); + bool subvp_active = resource_subvp_in_use(dc, context); unsigned int min_dram_speed_mts_margin; bool need_fclk_lat_as_dummy = false; bool is_subvp_p_drr = false; @@ -2281,7 +2282,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, dc_assert_fp_enabled(); /* need to find dummy latency index for subvp */ - if (subvp_in_use) { + if (subvp_active) { /* Override DRAMClockChangeSupport for SubVP + DRR case where the DRR cannot switch without stretching it's VBLANK */ if (!pstate_en) { context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] = dm_dram_clock_change_vblank_w_mall_sub_vp; @@ -2467,7 +2468,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, dc->clk_mgr->bw_params->clk_table.entries[min_dram_speed_mts_offset].memclk_mhz * 16; } - if (!context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching && !subvp_in_use) { + if (!context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching && !subvp_active) { /* find largest table entry that is lower than dram speed, * but lower than DPM0 still uses DPM0 */ @@ -3527,7 +3528,7 @@ void dcn32_set_clock_limits(const struct _vcs_dpi_soc_bounding_box_st *soc_bb) void dcn32_override_min_req_memclk(struct dc *dc, struct dc_state *context) { // WA: restrict FPO and SubVP to use first non-strobe mode (DCN32 BW issue) - if ((context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching || dcn32_subvp_in_use(dc, context)) && + if ((context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching || resource_subvp_in_use(dc, context)) && dc->dml.soc.num_chans <= 8) { int num_mclk_levels = dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_memclk_levels; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index 0dfcb3cdcd20..bc71a9b058fe 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -1882,6 +1882,42 @@ static void dcn20_program_pipe( } } +static void update_vmin_vmax_fams(struct dc *dc, + struct dc_state *context) +{ + uint32_t i; + struct drr_params params = {0}; + bool subvp_in_use = resource_subvp_in_use(dc, context); + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (resource_is_pipe_type(pipe, OTG_MASTER) && + ((subvp_in_use && dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM && + pipe->stream->allow_freesync) || (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching && pipe->stream->fpo_in_use))) { + if (!pipe->stream->vrr_active_variable && !pipe->stream->vrr_active_fixed) { + struct timing_generator *tg = context->res_ctx.pipe_ctx[i].stream_res.tg; + + /* DRR should be configured already if we're in active variable + * or active fixed, so only program if we're not in this state + */ + params.vertical_total_min = pipe->stream->timing.v_total; + params.vertical_total_max = pipe->stream->timing.v_total; + tg->funcs->set_drr(tg, ¶ms); + } + } else { + if (resource_is_pipe_type(pipe, OTG_MASTER) && + !pipe->stream->vrr_active_variable && + !pipe->stream->vrr_active_fixed) { + struct timing_generator *tg = context->res_ctx.pipe_ctx[i].stream_res.tg; + params.vertical_total_min = 0; + params.vertical_total_max = 0; + tg->funcs->set_drr(tg, ¶ms); + } + } + } +} + void dcn20_program_front_end_for_ctx( struct dc *dc, struct dc_state *context) @@ -1958,6 +1994,7 @@ void dcn20_program_front_end_for_ctx( && context->res_ctx.pipe_ctx[i].stream) hws->funcs.blank_pixel_data(dc, &context->res_ctx.pipe_ctx[i], true); + update_vmin_vmax_fams(dc, context); /* Disconnect mpcc */ for (i = 0; i < dc->res_pool->pipe_count; i++) diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h index c958ef37b78a..1d51fed12e20 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/resource.h +++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h @@ -609,6 +609,9 @@ bool dc_resource_acquire_secondary_pipe_for_mpc_odm_legacy( struct pipe_ctx *sec_pipe, bool odm); +bool resource_subvp_in_use(struct dc *dc, + struct dc_state *context); + /* A test harness interface that modifies dp encoder resources in the given dc * state and bypasses the need to revalidate. The interface assumes that the * test harness interface is called with pre-validated link config stored in the diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c index c4d71e7f18af..ac04a9c9a3d8 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c @@ -1899,7 +1899,7 @@ int dcn32_populate_dml_pipes_from_context( static struct dc_cap_funcs cap_funcs = { .get_dcc_compression_cap = dcn20_get_dcc_compression_cap, - .get_subvp_en = dcn32_subvp_in_use, + .get_subvp_en = resource_subvp_in_use, }; void dcn32_calculate_wm_and_dlg(struct dc *dc, struct dc_state *context, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h index 0c87b0fabba7..62611acd4bcb 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h @@ -131,9 +131,6 @@ void dcn32_merge_pipes_for_subvp(struct dc *dc, bool dcn32_all_pipes_have_stream_and_plane(struct dc *dc, struct dc_state *context); -bool dcn32_subvp_in_use(struct dc *dc, - struct dc_state *context); - bool dcn32_mpo_in_use(struct dc_state *context); bool dcn32_any_surfaces_rotated(struct dc *dc, struct dc_state *context); diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c index 74412e5f03fe..e1ab207c46f1 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c @@ -1574,7 +1574,7 @@ static void dcn321_destroy_resource_pool(struct resource_pool **pool) static struct dc_cap_funcs cap_funcs = { .get_dcc_compression_cap = dcn20_get_dcc_compression_cap, - .get_subvp_en = dcn32_subvp_in_use, + .get_subvp_en = resource_subvp_in_use, }; static void dcn321_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) -- cgit v1.2.3 From f6154d8babbb8a98f0d3ea325aafae2e33bfd8be Mon Sep 17 00:00:00 2001 From: Revalla Date: Wed, 13 Dec 2023 19:56:38 +0530 Subject: drm/amd/display: Refactor INIT into component folder [why] Move all init files to hwss folder. [how] moved the dcnxx_init.c and .h files into inside the hwss and cleared the linkage errors. Tested-by: Daniel Wheeler Reviewed-by: Martin Leung Acked-by: Rodrigo Siqueira Signed-off-by: Revalla Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/Makefile | 2 - drivers/gpu/drm/amd/display/dc/dcn10/Makefile | 2 +- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c | 128 --------------- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.h | 33 ---- drivers/gpu/drm/amd/display/dc/dcn20/Makefile | 2 +- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c | 145 ----------------- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.h | 33 ---- drivers/gpu/drm/amd/display/dc/dcn201/Makefile | 3 +- .../gpu/drm/amd/display/dc/dcn201/dcn201_init.c | 136 ---------------- .../gpu/drm/amd/display/dc/dcn201/dcn201_init.h | 33 ---- drivers/gpu/drm/amd/display/dc/dcn21/Makefile | 2 +- drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c | 151 ------------------ drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.h | 33 ---- drivers/gpu/drm/amd/display/dc/dcn30/Makefile | 4 +- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c | 154 ------------------ drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.h | 33 ---- drivers/gpu/drm/amd/display/dc/dcn301/Makefile | 2 +- .../gpu/drm/amd/display/dc/dcn301/dcn301_init.c | 154 ------------------ .../gpu/drm/amd/display/dc/dcn301/dcn301_init.h | 33 ---- drivers/gpu/drm/amd/display/dc/dcn302/Makefile | 12 -- .../gpu/drm/amd/display/dc/dcn302/dcn302_init.c | 41 ----- .../gpu/drm/amd/display/dc/dcn302/dcn302_init.h | 33 ---- .../gpu/drm/amd/display/dc/dcn303/dcn303_init.c | 40 ----- .../gpu/drm/amd/display/dc/dcn303/dcn303_init.h | 33 ---- drivers/gpu/drm/amd/display/dc/dcn31/Makefile | 2 +- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c | 157 ------------------- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.h | 33 ---- drivers/gpu/drm/amd/display/dc/dcn314/Makefile | 3 +- .../gpu/drm/amd/display/dc/dcn314/dcn314_init.c | 163 ------------------- .../gpu/drm/amd/display/dc/dcn314/dcn314_init.h | 34 ---- drivers/gpu/drm/amd/display/dc/dcn32/Makefile | 2 +- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c | 169 -------------------- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.h | 33 ---- drivers/gpu/drm/amd/display/dc/dcn35/Makefile | 2 +- drivers/gpu/drm/amd/display/dc/dcn35/dcn35_init.c | 172 --------------------- drivers/gpu/drm/amd/display/dc/dcn35/dcn35_init.h | 34 ---- drivers/gpu/drm/amd/display/dc/hwss/Makefile | 28 ++-- .../gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.c | 128 +++++++++++++++ .../gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.h | 33 ++++ .../gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.c | 145 +++++++++++++++++ .../gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.h | 33 ++++ .../drm/amd/display/dc/hwss/dcn201/dcn201_init.c | 136 ++++++++++++++++ .../drm/amd/display/dc/hwss/dcn201/dcn201_init.h | 33 ++++ .../gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.c | 151 ++++++++++++++++++ .../gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.h | 33 ++++ .../gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c | 154 ++++++++++++++++++ .../gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.h | 33 ++++ .../drm/amd/display/dc/hwss/dcn301/dcn301_init.c | 154 ++++++++++++++++++ .../drm/amd/display/dc/hwss/dcn301/dcn301_init.h | 33 ++++ .../drm/amd/display/dc/hwss/dcn302/dcn302_init.c | 41 +++++ .../drm/amd/display/dc/hwss/dcn302/dcn302_init.h | 33 ++++ .../drm/amd/display/dc/hwss/dcn303/dcn303_init.c | 40 +++++ .../drm/amd/display/dc/hwss/dcn303/dcn303_init.h | 33 ++++ .../gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c | 157 +++++++++++++++++++ .../gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.h | 33 ++++ .../drm/amd/display/dc/hwss/dcn314/dcn314_init.c | 163 +++++++++++++++++++ .../drm/amd/display/dc/hwss/dcn314/dcn314_init.h | 34 ++++ .../gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c | 169 ++++++++++++++++++++ .../gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.h | 33 ++++ .../gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c | 172 +++++++++++++++++++++ .../gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.h | 34 ++++ .../drm/amd/display/dc/hwss/dcn351/CMakeLists.txt | 4 + .../gpu/drm/amd/display/dc/hwss/dcn351/Makefile | 17 ++ .../drm/amd/display/dc/hwss/dcn351/dcn351_init.c | 171 ++++++++++++++++++++ .../drm/amd/display/dc/hwss/dcn351/dcn351_init.h | 33 ++++ 65 files changed, 2258 insertions(+), 2049 deletions(-) delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn201/dcn201_init.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn201/dcn201_init.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn302/Makefile delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn302/dcn302_init.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn302/dcn302_init.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn303/dcn303_init.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn303/dcn303_init.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.h delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn35/dcn35_init.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dcn35/dcn35_init.h create mode 100644 drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.c create mode 100644 drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.h create mode 100644 drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.c create mode 100644 drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.h create mode 100644 drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.c create mode 100644 drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.h create mode 100644 drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.c create mode 100644 drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.h create mode 100644 drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c create mode 100644 drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.h create mode 100644 drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c create mode 100644 drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.h create mode 100644 drivers/gpu/drm/amd/display/dc/hwss/dcn302/dcn302_init.c create mode 100644 drivers/gpu/drm/amd/display/dc/hwss/dcn302/dcn302_init.h create mode 100644 drivers/gpu/drm/amd/display/dc/hwss/dcn303/dcn303_init.c create mode 100644 drivers/gpu/drm/amd/display/dc/hwss/dcn303/dcn303_init.h create mode 100644 drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c create mode 100644 drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.h create mode 100644 drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c create mode 100644 drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.h create mode 100644 drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c create mode 100644 drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.h create mode 100644 drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c create mode 100644 drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.h create mode 100644 drivers/gpu/drm/amd/display/dc/hwss/dcn351/CMakeLists.txt create mode 100644 drivers/gpu/drm/amd/display/dc/hwss/dcn351/Makefile create mode 100644 drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c create mode 100644 drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/Makefile b/drivers/gpu/drm/amd/display/dc/Makefile index bae4fdddbf97..7991ae468f75 100644 --- a/drivers/gpu/drm/amd/display/dc/Makefile +++ b/drivers/gpu/drm/amd/display/dc/Makefile @@ -34,8 +34,6 @@ DC_LIBS += dcn21 DC_LIBS += dcn201 DC_LIBS += dcn30 DC_LIBS += dcn301 -DC_LIBS += dcn302 -DC_LIBS += dcn303 DC_LIBS += dcn31 DC_LIBS += dcn314 DC_LIBS += dcn32 diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile index 0dd62934a18c..ae6a131be71b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile @@ -22,7 +22,7 @@ # # Makefile for DCN. -DCN10 = dcn10_init.o dcn10_ipp.o \ +DCN10 = dcn10_ipp.o \ dcn10_hw_sequencer_debug.o \ dcn10_dpp.o dcn10_opp.o \ dcn10_hubp.o dcn10_mpc.o \ diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c deleted file mode 100644 index a5bdac79a744..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright 2016-2020 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "hw_sequencer_private.h" -#include "dce110/dce110_hwseq.h" -#include "dcn10/dcn10_hwseq.h" -#include "dcn20/dcn20_hwseq.h" - -static const struct hw_sequencer_funcs dcn10_funcs = { - .program_gamut_remap = dcn10_program_gamut_remap, - .init_hw = dcn10_init_hw, - .power_down_on_boot = dcn10_power_down_on_boot, - .apply_ctx_to_hw = dce110_apply_ctx_to_hw, - .apply_ctx_for_surface = NULL, - .program_front_end_for_ctx = dcn20_program_front_end_for_ctx, - .post_unlock_program_front_end = dcn10_post_unlock_program_front_end, - .wait_for_pending_cleared = dcn10_wait_for_pending_cleared, - .update_plane_addr = dcn10_update_plane_addr, - .update_dchub = dcn10_update_dchub, - .update_pending_status = dcn10_update_pending_status, - .program_output_csc = dcn10_program_output_csc, - .enable_accelerated_mode = dce110_enable_accelerated_mode, - .enable_timing_synchronization = dcn10_enable_timing_synchronization, - .enable_per_frame_crtc_position_reset = dcn10_enable_per_frame_crtc_position_reset, - .update_info_frame = dce110_update_info_frame, - .send_immediate_sdp_message = dcn10_send_immediate_sdp_message, - .enable_stream = dce110_enable_stream, - .disable_stream = dce110_disable_stream, - .unblank_stream = dcn10_unblank_stream, - .blank_stream = dce110_blank_stream, - .enable_audio_stream = dce110_enable_audio_stream, - .disable_audio_stream = dce110_disable_audio_stream, - .disable_plane = dcn10_disable_plane, - .pipe_control_lock = dcn10_pipe_control_lock, - .cursor_lock = dcn10_cursor_lock, - .interdependent_update_lock = dcn10_lock_all_pipes, - .prepare_bandwidth = dcn10_prepare_bandwidth, - .optimize_bandwidth = dcn10_optimize_bandwidth, - .set_drr = dcn10_set_drr, - .get_position = dcn10_get_position, - .set_static_screen_control = dcn10_set_static_screen_control, - .setup_stereo = dcn10_setup_stereo, - .set_avmute = dce110_set_avmute, - .log_hw_state = dcn10_log_hw_state, - .get_hw_state = dcn10_get_hw_state, - .clear_status_bits = dcn10_clear_status_bits, - .wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect, - .edp_backlight_control = dce110_edp_backlight_control, - .edp_power_control = dce110_edp_power_control, - .edp_wait_for_hpd_ready = dce110_edp_wait_for_hpd_ready, - .set_cursor_position = dcn10_set_cursor_position, - .set_cursor_attribute = dcn10_set_cursor_attribute, - .set_cursor_sdr_white_level = dcn10_set_cursor_sdr_white_level, - .setup_periodic_interrupt = dcn10_setup_periodic_interrupt, - .set_clock = dcn10_set_clock, - .get_clock = dcn10_get_clock, - .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, - .calc_vupdate_position = dcn10_calc_vupdate_position, - .power_down = dce110_power_down, - .set_backlight_level = dce110_set_backlight_level, - .set_abm_immediate_disable = dce110_set_abm_immediate_disable, - .set_pipe = dce110_set_pipe, - .enable_lvds_link_output = dce110_enable_lvds_link_output, - .enable_tmds_link_output = dce110_enable_tmds_link_output, - .enable_dp_link_output = dce110_enable_dp_link_output, - .disable_link_output = dce110_disable_link_output, - .get_dcc_en_bits = dcn10_get_dcc_en_bits, - .update_visual_confirm_color = dcn10_update_visual_confirm_color, -}; - -static const struct hwseq_private_funcs dcn10_private_funcs = { - .init_pipes = dcn10_init_pipes, - .update_plane_addr = dcn10_update_plane_addr, - .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, - .program_pipe = dcn10_program_pipe, - .update_mpcc = dcn10_update_mpcc, - .set_input_transfer_func = dcn10_set_input_transfer_func, - .set_output_transfer_func = dcn10_set_output_transfer_func, - .power_down = dce110_power_down, - .enable_display_power_gating = dcn10_dummy_display_power_gating, - .blank_pixel_data = dcn10_blank_pixel_data, - .reset_hw_ctx_wrap = dcn10_reset_hw_ctx_wrap, - .enable_stream_timing = dcn10_enable_stream_timing, - .edp_backlight_control = dce110_edp_backlight_control, - .disable_stream_gating = NULL, - .enable_stream_gating = NULL, - .setup_vupdate_interrupt = dcn10_setup_vupdate_interrupt, - .did_underflow_occur = dcn10_did_underflow_occur, - .init_blank = NULL, - .disable_vga = dcn10_disable_vga, - .bios_golden_init = dcn10_bios_golden_init, - .plane_atomic_disable = dcn10_plane_atomic_disable, - .plane_atomic_power_down = dcn10_plane_atomic_power_down, - .enable_power_gating_plane = dcn10_enable_power_gating_plane, - .dpp_pg_control = dcn10_dpp_pg_control, - .hubp_pg_control = dcn10_hubp_pg_control, - .dsc_pg_control = NULL, - .set_hdr_multiplier = dcn10_set_hdr_multiplier, - .verify_allow_pstate_change_high = dcn10_verify_allow_pstate_change_high, -}; - -void dcn10_hw_sequencer_construct(struct dc *dc) -{ - dc->hwss = dcn10_funcs; - dc->hwseq->funcs = dcn10_private_funcs; -} diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.h deleted file mode 100644 index 8c6fd7b844a4..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright 2016 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DC_DCN10_INIT_H__ -#define __DC_DCN10_INIT_H__ - -struct dc; - -void dcn10_hw_sequencer_construct(struct dc *dc); - -#endif /* __DC_DCN10_INIT_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile index bd760442ff89..3dae3943b056 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile @@ -2,7 +2,7 @@ # # Makefile for DCN. -DCN20 = dcn20_init.o dcn20_dpp.o dcn20_dpp_cm.o dcn20_hubp.o \ +DCN20 = dcn20_dpp.o dcn20_dpp_cm.o dcn20_hubp.o \ dcn20_mpc.o dcn20_opp.o dcn20_hubbub.o dcn20_mmhubbub.o \ dcn20_stream_encoder.o dcn20_link_encoder.o dcn20_dccg.o \ dcn20_vmid.o dcn20_dwb.o dcn20_dwb_scl.o diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c deleted file mode 100644 index 884e3e323338..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Copyright 2016 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dce110/dce110_hwseq.h" -#include "dcn10/dcn10_hwseq.h" -#include "dcn20/dcn20_hwseq.h" - -#include "dcn20_init.h" - -static const struct hw_sequencer_funcs dcn20_funcs = { - .program_gamut_remap = dcn10_program_gamut_remap, - .init_hw = dcn10_init_hw, - .power_down_on_boot = dcn10_power_down_on_boot, - .apply_ctx_to_hw = dce110_apply_ctx_to_hw, - .apply_ctx_for_surface = NULL, - .program_front_end_for_ctx = dcn20_program_front_end_for_ctx, - .wait_for_pending_cleared = dcn10_wait_for_pending_cleared, - .post_unlock_program_front_end = dcn20_post_unlock_program_front_end, - .update_plane_addr = dcn20_update_plane_addr, - .update_dchub = dcn10_update_dchub, - .update_pending_status = dcn10_update_pending_status, - .program_output_csc = dcn20_program_output_csc, - .enable_accelerated_mode = dce110_enable_accelerated_mode, - .enable_timing_synchronization = dcn10_enable_timing_synchronization, - .enable_vblanks_synchronization = dcn10_enable_vblanks_synchronization, - .enable_per_frame_crtc_position_reset = dcn10_enable_per_frame_crtc_position_reset, - .update_info_frame = dce110_update_info_frame, - .send_immediate_sdp_message = dcn10_send_immediate_sdp_message, - .enable_stream = dcn20_enable_stream, - .disable_stream = dce110_disable_stream, - .unblank_stream = dcn20_unblank_stream, - .blank_stream = dce110_blank_stream, - .enable_audio_stream = dce110_enable_audio_stream, - .disable_audio_stream = dce110_disable_audio_stream, - .disable_plane = dcn20_disable_plane, - .pipe_control_lock = dcn20_pipe_control_lock, - .interdependent_update_lock = dcn10_lock_all_pipes, - .cursor_lock = dcn10_cursor_lock, - .prepare_bandwidth = dcn20_prepare_bandwidth, - .optimize_bandwidth = dcn20_optimize_bandwidth, - .update_bandwidth = dcn20_update_bandwidth, - .set_drr = dcn10_set_drr, - .get_position = dcn10_get_position, - .set_static_screen_control = dcn10_set_static_screen_control, - .setup_stereo = dcn10_setup_stereo, - .set_avmute = dce110_set_avmute, - .log_hw_state = dcn10_log_hw_state, - .get_hw_state = dcn10_get_hw_state, - .clear_status_bits = dcn10_clear_status_bits, - .wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect, - .edp_backlight_control = dce110_edp_backlight_control, - .edp_power_control = dce110_edp_power_control, - .edp_wait_for_hpd_ready = dce110_edp_wait_for_hpd_ready, - .set_cursor_position = dcn10_set_cursor_position, - .set_cursor_attribute = dcn10_set_cursor_attribute, - .set_cursor_sdr_white_level = dcn10_set_cursor_sdr_white_level, - .setup_periodic_interrupt = dcn10_setup_periodic_interrupt, - .set_clock = dcn10_set_clock, - .get_clock = dcn10_get_clock, - .program_triplebuffer = dcn20_program_triple_buffer, - .enable_writeback = dcn20_enable_writeback, - .disable_writeback = dcn20_disable_writeback, - .dmdata_status_done = dcn20_dmdata_status_done, - .program_dmdata_engine = dcn20_program_dmdata_engine, - .set_dmdata_attributes = dcn20_set_dmdata_attributes, - .init_sys_ctx = dcn20_init_sys_ctx, - .init_vm_ctx = dcn20_init_vm_ctx, - .set_flip_control_gsl = dcn20_set_flip_control_gsl, - .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, - .calc_vupdate_position = dcn10_calc_vupdate_position, - .set_backlight_level = dce110_set_backlight_level, - .set_abm_immediate_disable = dce110_set_abm_immediate_disable, - .set_pipe = dce110_set_pipe, - .enable_lvds_link_output = dce110_enable_lvds_link_output, - .enable_tmds_link_output = dce110_enable_tmds_link_output, - .enable_dp_link_output = dce110_enable_dp_link_output, - .disable_link_output = dce110_disable_link_output, - .set_disp_pattern_generator = dcn20_set_disp_pattern_generator, - .get_dcc_en_bits = dcn10_get_dcc_en_bits, - .update_visual_confirm_color = dcn10_update_visual_confirm_color, -}; - -static const struct hwseq_private_funcs dcn20_private_funcs = { - .init_pipes = dcn10_init_pipes, - .update_plane_addr = dcn20_update_plane_addr, - .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, - .update_mpcc = dcn20_update_mpcc, - .set_input_transfer_func = dcn20_set_input_transfer_func, - .set_output_transfer_func = dcn20_set_output_transfer_func, - .power_down = dce110_power_down, - .enable_display_power_gating = dcn10_dummy_display_power_gating, - .blank_pixel_data = dcn20_blank_pixel_data, - .reset_hw_ctx_wrap = dcn20_reset_hw_ctx_wrap, - .enable_stream_timing = dcn20_enable_stream_timing, - .edp_backlight_control = dce110_edp_backlight_control, - .disable_stream_gating = dcn20_disable_stream_gating, - .enable_stream_gating = dcn20_enable_stream_gating, - .setup_vupdate_interrupt = dcn20_setup_vupdate_interrupt, - .did_underflow_occur = dcn10_did_underflow_occur, - .init_blank = dcn20_init_blank, - .disable_vga = dcn20_disable_vga, - .bios_golden_init = dcn10_bios_golden_init, - .plane_atomic_disable = dcn20_plane_atomic_disable, - .plane_atomic_power_down = dcn10_plane_atomic_power_down, - .enable_power_gating_plane = dcn20_enable_power_gating_plane, - .dpp_pg_control = dcn20_dpp_pg_control, - .hubp_pg_control = dcn20_hubp_pg_control, - .update_odm = dcn20_update_odm, - .dsc_pg_control = dcn20_dsc_pg_control, - .set_hdr_multiplier = dcn10_set_hdr_multiplier, - .verify_allow_pstate_change_high = dcn10_verify_allow_pstate_change_high, - .wait_for_blank_complete = dcn20_wait_for_blank_complete, - .dccg_init = dcn20_dccg_init, - .set_blend_lut = dcn20_set_blend_lut, - .set_shaper_3dlut = dcn20_set_shaper_3dlut, -}; - -void dcn20_hw_sequencer_construct(struct dc *dc) -{ - dc->hwss = dcn20_funcs; - dc->hwseq->funcs = dcn20_private_funcs; - -} diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.h deleted file mode 100644 index 12277797cd71..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright 2016 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DC_DCN20_INIT_H__ -#define __DC_DCN20_INIT_H__ - -struct dc; - -void dcn20_hw_sequencer_construct(struct dc *dc); - -#endif /* __DC_DCN20_INIT_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/Makefile b/drivers/gpu/drm/amd/display/dc/dcn201/Makefile index a101e6511555..2b0b4f32e13b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn201/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn201/Makefile @@ -1,8 +1,7 @@ # SPDX-License-Identifier: MIT # # Makefile for DCN. -DCN201 = dcn201_init.o \ - dcn201_hubbub.o\ +DCN201 = dcn201_hubbub.o\ dcn201_mpc.o dcn201_hubp.o dcn201_opp.o dcn201_dpp.o \ dcn201_dccg.o dcn201_link_encoder.o diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_init.c b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_init.c deleted file mode 100644 index a13bf6c9386e..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_init.c +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Copyright 2016 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dce110/dce110_hwseq.h" -#include "dcn10/dcn10_hwseq.h" -#include "dcn20/dcn20_hwseq.h" -#include "dcn201/dcn201_hwseq.h" -#include "dcn201_init.h" - -static const struct hw_sequencer_funcs dcn201_funcs = { - .program_gamut_remap = dcn10_program_gamut_remap, - .init_hw = dcn201_init_hw, - .power_down_on_boot = NULL, - .apply_ctx_to_hw = dce110_apply_ctx_to_hw, - .apply_ctx_for_surface = NULL, - .program_front_end_for_ctx = dcn20_program_front_end_for_ctx, - .wait_for_pending_cleared = dcn10_wait_for_pending_cleared, - .post_unlock_program_front_end = dcn10_post_unlock_program_front_end, - .update_plane_addr = dcn201_update_plane_addr, - .update_dchub = dcn10_update_dchub, - .update_pending_status = dcn10_update_pending_status, - .program_output_csc = dcn20_program_output_csc, - .enable_accelerated_mode = dce110_enable_accelerated_mode, - .enable_timing_synchronization = dcn10_enable_timing_synchronization, - .enable_per_frame_crtc_position_reset = dcn10_enable_per_frame_crtc_position_reset, - .update_info_frame = dce110_update_info_frame, - .send_immediate_sdp_message = dcn10_send_immediate_sdp_message, - .enable_stream = dce110_enable_stream, - .disable_stream = dce110_disable_stream, - .unblank_stream = dcn201_unblank_stream, - .blank_stream = dce110_blank_stream, - .enable_audio_stream = dce110_enable_audio_stream, - .disable_audio_stream = dce110_disable_audio_stream, - .disable_plane = dcn10_disable_plane, - .pipe_control_lock = dcn201_pipe_control_lock, - .interdependent_update_lock = dcn10_lock_all_pipes, - .cursor_lock = dcn10_cursor_lock, - .prepare_bandwidth = dcn20_prepare_bandwidth, - .optimize_bandwidth = dcn20_optimize_bandwidth, - .update_bandwidth = dcn20_update_bandwidth, - .set_drr = dcn10_set_drr, - .get_position = dcn10_get_position, - .set_static_screen_control = dcn10_set_static_screen_control, - .setup_stereo = dcn10_setup_stereo, - .set_avmute = dce110_set_avmute, - .log_hw_state = dcn10_log_hw_state, - .get_hw_state = dcn10_get_hw_state, - .clear_status_bits = dcn10_clear_status_bits, - .wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect, - .edp_backlight_control = dce110_edp_backlight_control, - .edp_power_control = dce110_edp_power_control, - .edp_wait_for_hpd_ready = dce110_edp_wait_for_hpd_ready, - .setup_periodic_interrupt = dcn10_setup_periodic_interrupt, - .set_clock = dcn10_set_clock, - .get_clock = dcn10_get_clock, - .program_triplebuffer = dcn20_program_triple_buffer, - .dmdata_status_done = dcn20_dmdata_status_done, - .set_dmdata_attributes = dcn201_set_dmdata_attributes, - .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, - .calc_vupdate_position = dcn10_calc_vupdate_position, - .set_cursor_position = dcn10_set_cursor_position, - .set_cursor_attribute = dcn201_set_cursor_attribute, - .set_cursor_sdr_white_level = dcn10_set_cursor_sdr_white_level, - .set_backlight_level = dce110_set_backlight_level, - .set_abm_immediate_disable = dce110_set_abm_immediate_disable, - .set_pipe = dce110_set_pipe, - .enable_lvds_link_output = dce110_enable_lvds_link_output, - .enable_tmds_link_output = dce110_enable_tmds_link_output, - .enable_dp_link_output = dce110_enable_dp_link_output, - .disable_link_output = dce110_disable_link_output, - .set_disp_pattern_generator = dcn20_set_disp_pattern_generator, - .update_visual_confirm_color = dcn10_update_visual_confirm_color, -}; - -static const struct hwseq_private_funcs dcn201_private_funcs = { - .init_pipes = NULL, - .update_plane_addr = dcn201_update_plane_addr, - .plane_atomic_disconnect = dcn201_plane_atomic_disconnect, - .program_pipe = dcn10_program_pipe, - .update_mpcc = dcn201_update_mpcc, - .set_input_transfer_func = dcn20_set_input_transfer_func, - .set_output_transfer_func = dcn20_set_output_transfer_func, - .power_down = dce110_power_down, - .enable_display_power_gating = dcn10_dummy_display_power_gating, - .blank_pixel_data = dcn20_blank_pixel_data, - .reset_hw_ctx_wrap = dcn10_reset_hw_ctx_wrap, - .enable_stream_timing = dcn20_enable_stream_timing, - .edp_backlight_control = dce110_edp_backlight_control, - .disable_stream_gating = NULL, - .enable_stream_gating = NULL, - .setup_vupdate_interrupt = dcn20_setup_vupdate_interrupt, - .did_underflow_occur = dcn10_did_underflow_occur, - .init_blank = dcn201_init_blank, - .disable_vga = dcn10_disable_vga, - .bios_golden_init = dcn10_bios_golden_init, - .plane_atomic_disable = dcn10_plane_atomic_disable, - .plane_atomic_power_down = dcn10_plane_atomic_power_down, - .enable_power_gating_plane = dcn10_enable_power_gating_plane, - .dpp_pg_control = dcn10_dpp_pg_control, - .hubp_pg_control = dcn10_hubp_pg_control, - .dsc_pg_control = NULL, - .set_hdr_multiplier = dcn10_set_hdr_multiplier, - .verify_allow_pstate_change_high = dcn10_verify_allow_pstate_change_high, - .wait_for_blank_complete = dcn20_wait_for_blank_complete, - .dccg_init = dcn20_dccg_init, - .set_blend_lut = dcn20_set_blend_lut, - .set_shaper_3dlut = dcn20_set_shaper_3dlut, -}; - -void dcn201_hw_sequencer_construct(struct dc *dc) -{ - dc->hwss = dcn201_funcs; - dc->hwseq->funcs = dcn201_private_funcs; -} diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_init.h b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_init.h deleted file mode 100644 index 1168887b033d..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_init.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright 2016 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DC_DCN201_INIT_H__ -#define __DC_DCN201_INIT_H__ - -struct dc; - -void dcn201_hw_sequencer_construct(struct dc *dc); - -#endif /* __DC_DCN201_INIT_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile index dd1eea7212f4..ca92f5c8e7fb 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile @@ -2,7 +2,7 @@ # # Makefile for DCN21. -DCN21 = dcn21_init.o dcn21_hubp.o dcn21_hubbub.o \ +DCN21 = dcn21_hubp.o dcn21_hubbub.o \ dcn21_link_encoder.o dcn21_dccg.o AMD_DAL_DCN21 = $(addprefix $(AMDDALPATH)/dc/dcn21/,$(DCN21)) diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c deleted file mode 100644 index 18249c6b6d81..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c +++ /dev/null @@ -1,151 +0,0 @@ -/* - * Copyright 2016-2020 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dce110/dce110_hwseq.h" -#include "dcn10/dcn10_hwseq.h" -#include "dcn20/dcn20_hwseq.h" -#include "dcn21/dcn21_hwseq.h" - -#include "dcn21_init.h" - -static const struct hw_sequencer_funcs dcn21_funcs = { - .program_gamut_remap = dcn10_program_gamut_remap, - .init_hw = dcn10_init_hw, - .power_down_on_boot = dcn10_power_down_on_boot, - .apply_ctx_to_hw = dce110_apply_ctx_to_hw, - .apply_ctx_for_surface = NULL, - .program_front_end_for_ctx = dcn20_program_front_end_for_ctx, - .wait_for_pending_cleared = dcn10_wait_for_pending_cleared, - .post_unlock_program_front_end = dcn20_post_unlock_program_front_end, - .update_plane_addr = dcn20_update_plane_addr, - .update_dchub = dcn10_update_dchub, - .update_pending_status = dcn10_update_pending_status, - .program_output_csc = dcn20_program_output_csc, - .enable_accelerated_mode = dce110_enable_accelerated_mode, - .enable_timing_synchronization = dcn10_enable_timing_synchronization, - .enable_per_frame_crtc_position_reset = dcn10_enable_per_frame_crtc_position_reset, - .update_info_frame = dce110_update_info_frame, - .send_immediate_sdp_message = dcn10_send_immediate_sdp_message, - .enable_stream = dcn20_enable_stream, - .disable_stream = dce110_disable_stream, - .unblank_stream = dcn20_unblank_stream, - .blank_stream = dce110_blank_stream, - .enable_audio_stream = dce110_enable_audio_stream, - .disable_audio_stream = dce110_disable_audio_stream, - .disable_plane = dcn20_disable_plane, - .pipe_control_lock = dcn20_pipe_control_lock, - .interdependent_update_lock = dcn10_lock_all_pipes, - .cursor_lock = dcn10_cursor_lock, - .prepare_bandwidth = dcn20_prepare_bandwidth, - .optimize_bandwidth = dcn20_optimize_bandwidth, - .update_bandwidth = dcn20_update_bandwidth, - .set_drr = dcn10_set_drr, - .get_position = dcn10_get_position, - .set_static_screen_control = dcn10_set_static_screen_control, - .setup_stereo = dcn10_setup_stereo, - .set_avmute = dce110_set_avmute, - .log_hw_state = dcn10_log_hw_state, - .get_hw_state = dcn10_get_hw_state, - .clear_status_bits = dcn10_clear_status_bits, - .wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect, - .edp_backlight_control = dce110_edp_backlight_control, - .edp_power_control = dce110_edp_power_control, - .edp_wait_for_hpd_ready = dce110_edp_wait_for_hpd_ready, - .set_cursor_position = dcn10_set_cursor_position, - .set_cursor_attribute = dcn10_set_cursor_attribute, - .set_cursor_sdr_white_level = dcn10_set_cursor_sdr_white_level, - .setup_periodic_interrupt = dcn10_setup_periodic_interrupt, - .set_clock = dcn10_set_clock, - .get_clock = dcn10_get_clock, - .program_triplebuffer = dcn20_program_triple_buffer, - .enable_writeback = dcn20_enable_writeback, - .disable_writeback = dcn20_disable_writeback, - .dmdata_status_done = dcn20_dmdata_status_done, - .program_dmdata_engine = dcn20_program_dmdata_engine, - .set_dmdata_attributes = dcn20_set_dmdata_attributes, - .init_sys_ctx = dcn21_init_sys_ctx, - .init_vm_ctx = dcn20_init_vm_ctx, - .set_flip_control_gsl = dcn20_set_flip_control_gsl, - .optimize_pwr_state = dcn21_optimize_pwr_state, - .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, - .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, - .calc_vupdate_position = dcn10_calc_vupdate_position, - .power_down = dce110_power_down, - .set_backlight_level = dcn21_set_backlight_level, - .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, - .set_pipe = dcn21_set_pipe, - .enable_lvds_link_output = dce110_enable_lvds_link_output, - .enable_tmds_link_output = dce110_enable_tmds_link_output, - .enable_dp_link_output = dce110_enable_dp_link_output, - .disable_link_output = dce110_disable_link_output, - .is_abm_supported = dcn21_is_abm_supported, - .set_disp_pattern_generator = dcn20_set_disp_pattern_generator, - .get_dcc_en_bits = dcn10_get_dcc_en_bits, - .update_visual_confirm_color = dcn10_update_visual_confirm_color, -}; - -static const struct hwseq_private_funcs dcn21_private_funcs = { - .init_pipes = dcn10_init_pipes, - .update_plane_addr = dcn20_update_plane_addr, - .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, - .update_mpcc = dcn20_update_mpcc, - .set_input_transfer_func = dcn20_set_input_transfer_func, - .set_output_transfer_func = dcn20_set_output_transfer_func, - .power_down = dce110_power_down, - .enable_display_power_gating = dcn10_dummy_display_power_gating, - .blank_pixel_data = dcn20_blank_pixel_data, - .reset_hw_ctx_wrap = dcn20_reset_hw_ctx_wrap, - .enable_stream_timing = dcn20_enable_stream_timing, - .edp_backlight_control = dce110_edp_backlight_control, - .disable_stream_gating = dcn20_disable_stream_gating, - .enable_stream_gating = dcn20_enable_stream_gating, - .setup_vupdate_interrupt = dcn20_setup_vupdate_interrupt, - .did_underflow_occur = dcn10_did_underflow_occur, - .init_blank = dcn20_init_blank, - .disable_vga = dcn20_disable_vga, - .bios_golden_init = dcn10_bios_golden_init, - .plane_atomic_disable = dcn20_plane_atomic_disable, - .plane_atomic_power_down = dcn10_plane_atomic_power_down, - .enable_power_gating_plane = dcn20_enable_power_gating_plane, - .dpp_pg_control = dcn20_dpp_pg_control, - .hubp_pg_control = dcn20_hubp_pg_control, - .update_odm = dcn20_update_odm, - .dsc_pg_control = dcn20_dsc_pg_control, - .set_hdr_multiplier = dcn10_set_hdr_multiplier, - .verify_allow_pstate_change_high = dcn10_verify_allow_pstate_change_high, - .s0i3_golden_init_wa = dcn21_s0i3_golden_init_wa, - .wait_for_blank_complete = dcn20_wait_for_blank_complete, - .dccg_init = dcn20_dccg_init, - .set_blend_lut = dcn20_set_blend_lut, - .set_shaper_3dlut = dcn20_set_shaper_3dlut, - .PLAT_58856_wa = dcn21_PLAT_58856_wa, -}; - -void dcn21_hw_sequencer_construct(struct dc *dc) -{ - dc->hwss = dcn21_funcs; - dc->hwseq->funcs = dcn21_private_funcs; - -} diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.h b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.h deleted file mode 100644 index 3ed24292648a..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright 2016 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DC_DCN21_INIT_H__ -#define __DC_DCN21_INIT_H__ - -struct dc; - -void dcn21_hw_sequencer_construct(struct dc *dc); - -#endif /* __DC_DCN20_INIT_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile index cd95f322235e..b5b2aa3b3783 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile @@ -23,9 +23,7 @@ # # -DCN30 := \ - dcn30_init.o \ - dcn30_hubbub.o \ +DCN30 := dcn30_hubbub.o \ dcn30_hubp.o \ dcn30_dpp.o \ dcn30_dccg.o \ diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c deleted file mode 100644 index 9894caedffed..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c +++ /dev/null @@ -1,154 +0,0 @@ -/* - * Copyright 2016-2020 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dce110/dce110_hwseq.h" -#include "dcn10/dcn10_hwseq.h" -#include "dcn20/dcn20_hwseq.h" -#include "dcn21/dcn21_hwseq.h" -#include "dcn30/dcn30_hwseq.h" - -#include "dcn30_init.h" - -static const struct hw_sequencer_funcs dcn30_funcs = { - .program_gamut_remap = dcn30_program_gamut_remap, - .init_hw = dcn30_init_hw, - .apply_ctx_to_hw = dce110_apply_ctx_to_hw, - .apply_ctx_for_surface = NULL, - .program_front_end_for_ctx = dcn20_program_front_end_for_ctx, - .wait_for_pending_cleared = dcn10_wait_for_pending_cleared, - .post_unlock_program_front_end = dcn20_post_unlock_program_front_end, - .update_plane_addr = dcn20_update_plane_addr, - .update_dchub = dcn10_update_dchub, - .update_pending_status = dcn10_update_pending_status, - .program_output_csc = dcn20_program_output_csc, - .enable_accelerated_mode = dce110_enable_accelerated_mode, - .enable_timing_synchronization = dcn10_enable_timing_synchronization, - .enable_per_frame_crtc_position_reset = dcn10_enable_per_frame_crtc_position_reset, - .update_info_frame = dcn30_update_info_frame, - .send_immediate_sdp_message = dcn10_send_immediate_sdp_message, - .enable_stream = dcn20_enable_stream, - .disable_stream = dce110_disable_stream, - .unblank_stream = dcn20_unblank_stream, - .blank_stream = dce110_blank_stream, - .enable_audio_stream = dce110_enable_audio_stream, - .disable_audio_stream = dce110_disable_audio_stream, - .disable_plane = dcn20_disable_plane, - .disable_pixel_data = dcn20_disable_pixel_data, - .pipe_control_lock = dcn20_pipe_control_lock, - .interdependent_update_lock = dcn10_lock_all_pipes, - .cursor_lock = dcn10_cursor_lock, - .prepare_bandwidth = dcn30_prepare_bandwidth, - .optimize_bandwidth = dcn20_optimize_bandwidth, - .update_bandwidth = dcn20_update_bandwidth, - .set_drr = dcn10_set_drr, - .get_position = dcn10_get_position, - .set_static_screen_control = dcn30_set_static_screen_control, - .setup_stereo = dcn10_setup_stereo, - .set_avmute = dcn30_set_avmute, - .log_hw_state = dcn10_log_hw_state, - .get_hw_state = dcn10_get_hw_state, - .clear_status_bits = dcn10_clear_status_bits, - .wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect, - .edp_backlight_control = dce110_edp_backlight_control, - .edp_power_control = dce110_edp_power_control, - .edp_wait_for_hpd_ready = dce110_edp_wait_for_hpd_ready, - .edp_wait_for_T12 = dce110_edp_wait_for_T12, - .set_cursor_position = dcn10_set_cursor_position, - .set_cursor_attribute = dcn10_set_cursor_attribute, - .set_cursor_sdr_white_level = dcn10_set_cursor_sdr_white_level, - .setup_periodic_interrupt = dcn10_setup_periodic_interrupt, - .set_clock = dcn10_set_clock, - .get_clock = dcn10_get_clock, - .program_triplebuffer = dcn20_program_triple_buffer, - .enable_writeback = dcn30_enable_writeback, - .disable_writeback = dcn30_disable_writeback, - .update_writeback = dcn30_update_writeback, - .mmhubbub_warmup = dcn30_mmhubbub_warmup, - .dmdata_status_done = dcn20_dmdata_status_done, - .program_dmdata_engine = dcn30_program_dmdata_engine, - .set_dmdata_attributes = dcn20_set_dmdata_attributes, - .init_sys_ctx = dcn20_init_sys_ctx, - .init_vm_ctx = dcn20_init_vm_ctx, - .set_flip_control_gsl = dcn20_set_flip_control_gsl, - .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, - .calc_vupdate_position = dcn10_calc_vupdate_position, - .apply_idle_power_optimizations = dcn30_apply_idle_power_optimizations, - .does_plane_fit_in_mall = dcn30_does_plane_fit_in_mall, - .set_backlight_level = dcn21_set_backlight_level, - .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, - .hardware_release = dcn30_hardware_release, - .set_pipe = dcn21_set_pipe, - .enable_lvds_link_output = dce110_enable_lvds_link_output, - .enable_tmds_link_output = dce110_enable_tmds_link_output, - .enable_dp_link_output = dce110_enable_dp_link_output, - .disable_link_output = dce110_disable_link_output, - .set_disp_pattern_generator = dcn30_set_disp_pattern_generator, - .get_dcc_en_bits = dcn10_get_dcc_en_bits, - .update_visual_confirm_color = dcn10_update_visual_confirm_color, - .is_abm_supported = dcn21_is_abm_supported -}; - -static const struct hwseq_private_funcs dcn30_private_funcs = { - .init_pipes = dcn10_init_pipes, - .update_plane_addr = dcn20_update_plane_addr, - .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, - .update_mpcc = dcn20_update_mpcc, - .set_input_transfer_func = dcn30_set_input_transfer_func, - .set_output_transfer_func = dcn30_set_output_transfer_func, - .power_down = dce110_power_down, - .enable_display_power_gating = dcn10_dummy_display_power_gating, - .blank_pixel_data = dcn20_blank_pixel_data, - .reset_hw_ctx_wrap = dcn20_reset_hw_ctx_wrap, - .enable_stream_timing = dcn20_enable_stream_timing, - .edp_backlight_control = dce110_edp_backlight_control, - .disable_stream_gating = dcn20_disable_stream_gating, - .enable_stream_gating = dcn20_enable_stream_gating, - .setup_vupdate_interrupt = dcn20_setup_vupdate_interrupt, - .did_underflow_occur = dcn10_did_underflow_occur, - .init_blank = dcn20_init_blank, - .disable_vga = dcn20_disable_vga, - .bios_golden_init = dcn10_bios_golden_init, - .plane_atomic_disable = dcn20_plane_atomic_disable, - .plane_atomic_power_down = dcn10_plane_atomic_power_down, - .enable_power_gating_plane = dcn20_enable_power_gating_plane, - .dpp_pg_control = dcn20_dpp_pg_control, - .hubp_pg_control = dcn20_hubp_pg_control, - .program_all_writeback_pipes_in_tree = dcn30_program_all_writeback_pipes_in_tree, - .update_odm = dcn20_update_odm, - .dsc_pg_control = dcn20_dsc_pg_control, - .set_hdr_multiplier = dcn10_set_hdr_multiplier, - .verify_allow_pstate_change_high = dcn10_verify_allow_pstate_change_high, - .wait_for_blank_complete = dcn20_wait_for_blank_complete, - .dccg_init = dcn20_dccg_init, - .set_blend_lut = dcn30_set_blend_lut, - .set_shaper_3dlut = dcn20_set_shaper_3dlut, -}; - -void dcn30_hw_sequencer_construct(struct dc *dc) -{ - dc->hwss = dcn30_funcs; - dc->hwseq->funcs = dcn30_private_funcs; - -} diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.h deleted file mode 100644 index c280ff90bfa3..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright 2020 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DC_DCN30_INIT_H__ -#define __DC_DCN30_INIT_H__ - -struct dc; - -void dcn30_hw_sequencer_construct(struct dc *dc); - -#endif /* __DC_DCN30_INIT_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/Makefile b/drivers/gpu/drm/amd/display/dc/dcn301/Makefile index 090011300dcd..d241f665e40a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn301/Makefile @@ -10,7 +10,7 @@ # # Makefile for dcn30. -DCN301 = dcn301_init.o dcn301_dccg.o \ +DCN301 = dcn301_dccg.o \ dcn301_dio_link_encoder.o dcn301_panel_cntl.o dcn301_hubbub.o AMD_DAL_DCN301 = $(addprefix $(AMDDALPATH)/dc/dcn301/,$(DCN301)) diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c deleted file mode 100644 index 6477009ce065..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c +++ /dev/null @@ -1,154 +0,0 @@ -/* - * Copyright 2016-2020 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dce110/dce110_hwseq.h" -#include "dcn10/dcn10_hwseq.h" -#include "dcn20/dcn20_hwseq.h" -#include "dcn21/dcn21_hwseq.h" -#include "dcn30/dcn30_hwseq.h" -#include "dcn301/dcn301_hwseq.h" - -#include "dcn301_init.h" - -static const struct hw_sequencer_funcs dcn301_funcs = { - .program_gamut_remap = dcn30_program_gamut_remap, - .init_hw = dcn10_init_hw, - .power_down_on_boot = dcn10_power_down_on_boot, - .apply_ctx_to_hw = dce110_apply_ctx_to_hw, - .apply_ctx_for_surface = NULL, - .program_front_end_for_ctx = dcn20_program_front_end_for_ctx, - .wait_for_pending_cleared = dcn10_wait_for_pending_cleared, - .post_unlock_program_front_end = dcn20_post_unlock_program_front_end, - .update_plane_addr = dcn20_update_plane_addr, - .update_dchub = dcn10_update_dchub, - .update_pending_status = dcn10_update_pending_status, - .program_output_csc = dcn20_program_output_csc, - .enable_accelerated_mode = dce110_enable_accelerated_mode, - .enable_timing_synchronization = dcn10_enable_timing_synchronization, - .enable_per_frame_crtc_position_reset = dcn10_enable_per_frame_crtc_position_reset, - .update_info_frame = dcn30_update_info_frame, - .send_immediate_sdp_message = dcn10_send_immediate_sdp_message, - .enable_stream = dcn20_enable_stream, - .disable_stream = dce110_disable_stream, - .unblank_stream = dcn20_unblank_stream, -#ifdef FREESYNC_POWER_OPTIMIZE - .are_streams_coarse_grain_aligned = dcn20_are_streams_coarse_grain_aligned, -#endif - .blank_stream = dce110_blank_stream, - .enable_audio_stream = dce110_enable_audio_stream, - .disable_audio_stream = dce110_disable_audio_stream, - .disable_plane = dcn20_disable_plane, - .pipe_control_lock = dcn20_pipe_control_lock, - .interdependent_update_lock = dcn10_lock_all_pipes, - .cursor_lock = dcn10_cursor_lock, - .prepare_bandwidth = dcn20_prepare_bandwidth, - .optimize_bandwidth = dcn20_optimize_bandwidth, - .update_bandwidth = dcn20_update_bandwidth, - .set_drr = dcn10_set_drr, - .get_position = dcn10_get_position, - .set_static_screen_control = dcn10_set_static_screen_control, - .setup_stereo = dcn10_setup_stereo, - .set_avmute = dcn30_set_avmute, - .log_hw_state = dcn10_log_hw_state, - .get_hw_state = dcn10_get_hw_state, - .clear_status_bits = dcn10_clear_status_bits, - .wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect, - .edp_backlight_control = dce110_edp_backlight_control, - .edp_power_control = dce110_edp_power_control, - .edp_wait_for_hpd_ready = dce110_edp_wait_for_hpd_ready, - .set_cursor_position = dcn10_set_cursor_position, - .set_cursor_attribute = dcn10_set_cursor_attribute, - .set_cursor_sdr_white_level = dcn10_set_cursor_sdr_white_level, - .setup_periodic_interrupt = dcn10_setup_periodic_interrupt, - .set_clock = dcn10_set_clock, - .get_clock = dcn10_get_clock, - .program_triplebuffer = dcn20_program_triple_buffer, - .enable_writeback = dcn30_enable_writeback, - .disable_writeback = dcn30_disable_writeback, - .update_writeback = dcn30_update_writeback, - .mmhubbub_warmup = dcn30_mmhubbub_warmup, - .dmdata_status_done = dcn20_dmdata_status_done, - .program_dmdata_engine = dcn30_program_dmdata_engine, - .set_dmdata_attributes = dcn20_set_dmdata_attributes, - .init_sys_ctx = dcn20_init_sys_ctx, - .init_vm_ctx = dcn20_init_vm_ctx, - .set_flip_control_gsl = dcn20_set_flip_control_gsl, - .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, - .calc_vupdate_position = dcn10_calc_vupdate_position, - .set_backlight_level = dcn21_set_backlight_level, - .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, - .set_pipe = dcn21_set_pipe, - .enable_lvds_link_output = dce110_enable_lvds_link_output, - .enable_tmds_link_output = dce110_enable_tmds_link_output, - .enable_dp_link_output = dce110_enable_dp_link_output, - .disable_link_output = dce110_disable_link_output, - .set_disp_pattern_generator = dcn30_set_disp_pattern_generator, - .get_dcc_en_bits = dcn10_get_dcc_en_bits, - .optimize_pwr_state = dcn21_optimize_pwr_state, - .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, - .update_visual_confirm_color = dcn10_update_visual_confirm_color, -}; - -static const struct hwseq_private_funcs dcn301_private_funcs = { - .init_pipes = dcn10_init_pipes, - .update_plane_addr = dcn20_update_plane_addr, - .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, - .update_mpcc = dcn20_update_mpcc, - .set_input_transfer_func = dcn30_set_input_transfer_func, - .set_output_transfer_func = dcn30_set_output_transfer_func, - .power_down = dce110_power_down, - .enable_display_power_gating = dcn10_dummy_display_power_gating, - .blank_pixel_data = dcn20_blank_pixel_data, - .reset_hw_ctx_wrap = dcn20_reset_hw_ctx_wrap, - .enable_stream_timing = dcn20_enable_stream_timing, - .edp_backlight_control = dce110_edp_backlight_control, - .disable_stream_gating = dcn20_disable_stream_gating, - .enable_stream_gating = dcn20_enable_stream_gating, - .setup_vupdate_interrupt = dcn20_setup_vupdate_interrupt, - .did_underflow_occur = dcn10_did_underflow_occur, - .init_blank = dcn20_init_blank, - .disable_vga = dcn20_disable_vga, - .bios_golden_init = dcn10_bios_golden_init, - .plane_atomic_disable = dcn20_plane_atomic_disable, - .plane_atomic_power_down = dcn10_plane_atomic_power_down, - .enable_power_gating_plane = dcn20_enable_power_gating_plane, - .dpp_pg_control = dcn20_dpp_pg_control, - .hubp_pg_control = dcn20_hubp_pg_control, - .program_all_writeback_pipes_in_tree = dcn30_program_all_writeback_pipes_in_tree, - .update_odm = dcn20_update_odm, - .dsc_pg_control = dcn20_dsc_pg_control, - .set_hdr_multiplier = dcn10_set_hdr_multiplier, - .verify_allow_pstate_change_high = dcn10_verify_allow_pstate_change_high, - .wait_for_blank_complete = dcn20_wait_for_blank_complete, - .dccg_init = dcn20_dccg_init, - .set_blend_lut = dcn30_set_blend_lut, - .set_shaper_3dlut = dcn20_set_shaper_3dlut, -}; - -void dcn301_hw_sequencer_construct(struct dc *dc) -{ - dc->hwss = dcn301_funcs; - dc->hwseq->funcs = dcn301_private_funcs; -} diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.h b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.h deleted file mode 100644 index 0bca48ccbfa2..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright 2020 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DC_DCN30_INIT_H__ -#define __DC_DCN30_INIT_H__ - -struct dc; - -void dcn301_hw_sequencer_construct(struct dc *dc); - -#endif /* __DC_DCN30_INIT_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/Makefile b/drivers/gpu/drm/amd/display/dc/dcn302/Makefile deleted file mode 100644 index 0fcd03569d74..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn302/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -# -# (c) Copyright 2020 Advanced Micro Devices, Inc. All the rights reserved -# -# Authors: AMD -# -# Makefile for dcn302. - -DCN3_02 = dcn302_init.o - -AMD_DAL_DCN3_02 = $(addprefix $(AMDDALPATH)/dc/dcn302/,$(DCN3_02)) - -AMD_DISPLAY_FILES += $(AMD_DAL_DCN3_02) diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_init.c b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_init.c deleted file mode 100644 index 637f9514d37b..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_init.c +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright 2020 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dcn302/dcn302_hwseq.h" - -#include "dcn30/dcn30_init.h" - -#include "dc.h" - -#include "dcn302_init.h" - -void dcn302_hw_sequencer_construct(struct dc *dc) -{ - dcn30_hw_sequencer_construct(dc); - - dc->hwseq->funcs.dpp_pg_control = dcn302_dpp_pg_control; - dc->hwseq->funcs.hubp_pg_control = dcn302_hubp_pg_control; - dc->hwseq->funcs.dsc_pg_control = dcn302_dsc_pg_control; -} diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_init.h b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_init.h deleted file mode 100644 index 899587b93aa1..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_init.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright 2020 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DC_DCN302_INIT_H__ -#define __DC_DCN302_INIT_H__ - -struct dc; - -void dcn302_hw_sequencer_construct(struct dc *dc); - -#endif /* __DC_DCN302_INIT_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_init.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_init.c deleted file mode 100644 index edb4d68b8187..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_init.c +++ /dev/null @@ -1,40 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright (C) 2021 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - */ - -#include "dcn303/dcn303_hwseq.h" -#include "dcn30/dcn30_init.h" -#include "dc.h" - -#include "dcn303_init.h" - -void dcn303_hw_sequencer_construct(struct dc *dc) -{ - dcn30_hw_sequencer_construct(dc); - - dc->hwseq->funcs.dpp_pg_control = dcn303_dpp_pg_control; - dc->hwseq->funcs.hubp_pg_control = dcn303_hubp_pg_control; - dc->hwseq->funcs.dsc_pg_control = dcn303_dsc_pg_control; - dc->hwseq->funcs.enable_power_gating_plane = dcn303_enable_power_gating_plane; -} diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_init.h b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_init.h deleted file mode 100644 index 4949981126d7..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_init.h +++ /dev/null @@ -1,33 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright (C) 2021 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - */ - -#ifndef __DC_DCN303_INIT_H__ -#define __DC_DCN303_INIT_H__ - -struct dc; - -void dcn303_hw_sequencer_construct(struct dc *dc); - -#endif /* __DC_DCN303_INIT_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile index 11a2662e58ef..5d93ac16c03a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile @@ -10,7 +10,7 @@ # # Makefile for dcn31. -DCN31 = dcn31_hubbub.o dcn31_init.o dcn31_hubp.o \ +DCN31 = dcn31_hubbub.o dcn31_hubp.o \ dcn31_dccg.o dcn31_dio_link_encoder.o dcn31_panel_cntl.o \ dcn31_apg.o dcn31_hpo_dp_stream_encoder.o dcn31_hpo_dp_link_encoder.o \ dcn31_afmt.o dcn31_vpg.o diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c deleted file mode 100644 index 669f524bd064..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c +++ /dev/null @@ -1,157 +0,0 @@ -/* - * Copyright 2016 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dce110/dce110_hwseq.h" -#include "dcn10/dcn10_hwseq.h" -#include "dcn20/dcn20_hwseq.h" -#include "dcn21/dcn21_hwseq.h" -#include "dcn30/dcn30_hwseq.h" -#include "dcn301/dcn301_hwseq.h" -#include "dcn31/dcn31_hwseq.h" - -#include "dcn31_init.h" - -static const struct hw_sequencer_funcs dcn31_funcs = { - .program_gamut_remap = dcn30_program_gamut_remap, - .init_hw = dcn31_init_hw, - .power_down_on_boot = dcn10_power_down_on_boot, - .apply_ctx_to_hw = dce110_apply_ctx_to_hw, - .apply_ctx_for_surface = NULL, - .program_front_end_for_ctx = dcn20_program_front_end_for_ctx, - .wait_for_pending_cleared = dcn10_wait_for_pending_cleared, - .post_unlock_program_front_end = dcn20_post_unlock_program_front_end, - .update_plane_addr = dcn20_update_plane_addr, - .update_dchub = dcn10_update_dchub, - .update_pending_status = dcn10_update_pending_status, - .program_output_csc = dcn20_program_output_csc, - .enable_accelerated_mode = dce110_enable_accelerated_mode, - .enable_timing_synchronization = dcn10_enable_timing_synchronization, - .enable_per_frame_crtc_position_reset = dcn10_enable_per_frame_crtc_position_reset, - .update_info_frame = dcn31_update_info_frame, - .send_immediate_sdp_message = dcn10_send_immediate_sdp_message, - .enable_stream = dcn20_enable_stream, - .disable_stream = dce110_disable_stream, - .unblank_stream = dcn20_unblank_stream, - .blank_stream = dce110_blank_stream, - .enable_audio_stream = dce110_enable_audio_stream, - .disable_audio_stream = dce110_disable_audio_stream, - .disable_plane = dcn20_disable_plane, - .disable_pixel_data = dcn20_disable_pixel_data, - .pipe_control_lock = dcn20_pipe_control_lock, - .interdependent_update_lock = dcn10_lock_all_pipes, - .cursor_lock = dcn10_cursor_lock, - .prepare_bandwidth = dcn20_prepare_bandwidth, - .optimize_bandwidth = dcn20_optimize_bandwidth, - .update_bandwidth = dcn20_update_bandwidth, - .set_drr = dcn10_set_drr, - .get_position = dcn10_get_position, - .set_static_screen_control = dcn30_set_static_screen_control, - .setup_stereo = dcn10_setup_stereo, - .set_avmute = dcn30_set_avmute, - .log_hw_state = dcn10_log_hw_state, - .get_hw_state = dcn10_get_hw_state, - .clear_status_bits = dcn10_clear_status_bits, - .wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect, - .edp_backlight_control = dce110_edp_backlight_control, - .edp_power_control = dce110_edp_power_control, - .edp_wait_for_T12 = dce110_edp_wait_for_T12, - .edp_wait_for_hpd_ready = dce110_edp_wait_for_hpd_ready, - .set_cursor_position = dcn10_set_cursor_position, - .set_cursor_attribute = dcn10_set_cursor_attribute, - .set_cursor_sdr_white_level = dcn10_set_cursor_sdr_white_level, - .setup_periodic_interrupt = dcn10_setup_periodic_interrupt, - .set_clock = dcn10_set_clock, - .get_clock = dcn10_get_clock, - .program_triplebuffer = dcn20_program_triple_buffer, - .enable_writeback = dcn30_enable_writeback, - .disable_writeback = dcn30_disable_writeback, - .update_writeback = dcn30_update_writeback, - .mmhubbub_warmup = dcn30_mmhubbub_warmup, - .dmdata_status_done = dcn20_dmdata_status_done, - .program_dmdata_engine = dcn30_program_dmdata_engine, - .set_dmdata_attributes = dcn20_set_dmdata_attributes, - .init_sys_ctx = dcn31_init_sys_ctx, - .init_vm_ctx = dcn20_init_vm_ctx, - .set_flip_control_gsl = dcn20_set_flip_control_gsl, - .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, - .calc_vupdate_position = dcn10_calc_vupdate_position, - .power_down = dce110_power_down, - .set_backlight_level = dcn21_set_backlight_level, - .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, - .set_pipe = dcn21_set_pipe, - .enable_lvds_link_output = dce110_enable_lvds_link_output, - .enable_tmds_link_output = dce110_enable_tmds_link_output, - .enable_dp_link_output = dce110_enable_dp_link_output, - .disable_link_output = dce110_disable_link_output, - .z10_restore = dcn31_z10_restore, - .z10_save_init = dcn31_z10_save_init, - .set_disp_pattern_generator = dcn30_set_disp_pattern_generator, - .optimize_pwr_state = dcn21_optimize_pwr_state, - .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, - .update_visual_confirm_color = dcn10_update_visual_confirm_color, -}; - -static const struct hwseq_private_funcs dcn31_private_funcs = { - .init_pipes = dcn10_init_pipes, - .update_plane_addr = dcn20_update_plane_addr, - .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, - .update_mpcc = dcn20_update_mpcc, - .set_input_transfer_func = dcn30_set_input_transfer_func, - .set_output_transfer_func = dcn30_set_output_transfer_func, - .power_down = dce110_power_down, - .enable_display_power_gating = dcn10_dummy_display_power_gating, - .blank_pixel_data = dcn20_blank_pixel_data, - .reset_hw_ctx_wrap = dcn31_reset_hw_ctx_wrap, - .enable_stream_timing = dcn20_enable_stream_timing, - .edp_backlight_control = dce110_edp_backlight_control, - .disable_stream_gating = dcn20_disable_stream_gating, - .enable_stream_gating = dcn20_enable_stream_gating, - .setup_vupdate_interrupt = dcn20_setup_vupdate_interrupt, - .did_underflow_occur = dcn10_did_underflow_occur, - .init_blank = dcn20_init_blank, - .disable_vga = dcn20_disable_vga, - .bios_golden_init = dcn10_bios_golden_init, - .plane_atomic_disable = dcn20_plane_atomic_disable, - .plane_atomic_power_down = dcn10_plane_atomic_power_down, - .enable_power_gating_plane = dcn31_enable_power_gating_plane, - .hubp_pg_control = dcn31_hubp_pg_control, - .program_all_writeback_pipes_in_tree = dcn30_program_all_writeback_pipes_in_tree, - .update_odm = dcn20_update_odm, - .dsc_pg_control = dcn31_dsc_pg_control, - .set_hdr_multiplier = dcn10_set_hdr_multiplier, - .verify_allow_pstate_change_high = dcn10_verify_allow_pstate_change_high, - .wait_for_blank_complete = dcn20_wait_for_blank_complete, - .dccg_init = dcn20_dccg_init, - .set_blend_lut = dcn30_set_blend_lut, - .set_shaper_3dlut = dcn20_set_shaper_3dlut, - .setup_hpo_hw_control = dcn31_setup_hpo_hw_control, -}; - -void dcn31_hw_sequencer_construct(struct dc *dc) -{ - dc->hwss = dcn31_funcs; - dc->hwseq->funcs = dcn31_private_funcs; - -} diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.h deleted file mode 100644 index a3db08c8bd35..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright 2016 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DC_DCN31_INIT_H__ -#define __DC_DCN31_INIT_H__ - -struct dc; - -void dcn31_hw_sequencer_construct(struct dc *dc); - -#endif /* __DC_DCN31_INIT_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/Makefile b/drivers/gpu/drm/amd/display/dc/dcn314/Makefile index d5c177346a3b..b134ab05aa71 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn314/Makefile @@ -10,8 +10,7 @@ # # Makefile for dcn314. -DCN314 = dcn314_init.o \ - dcn314_dio_stream_encoder.o dcn314_dccg.o +DCN314 = dcn314_dio_stream_encoder.o dcn314_dccg.o AMD_DAL_DCN314 = $(addprefix $(AMDDALPATH)/dc/dcn314/,$(DCN314)) diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c deleted file mode 100644 index ccb7e317e86a..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c +++ /dev/null @@ -1,163 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright 2022 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dce110/dce110_hwseq.h" -#include "dcn10/dcn10_hwseq.h" -#include "dcn20/dcn20_hwseq.h" -#include "dcn21/dcn21_hwseq.h" -#include "dcn30/dcn30_hwseq.h" -#include "dcn301/dcn301_hwseq.h" -#include "dcn31/dcn31_hwseq.h" -#include "dcn314/dcn314_hwseq.h" - -#include "dcn314_init.h" - -static const struct hw_sequencer_funcs dcn314_funcs = { - .program_gamut_remap = dcn30_program_gamut_remap, - .init_hw = dcn31_init_hw, - .power_down_on_boot = dcn10_power_down_on_boot, - .apply_ctx_to_hw = dce110_apply_ctx_to_hw, - .apply_ctx_for_surface = NULL, - .program_front_end_for_ctx = dcn20_program_front_end_for_ctx, - .wait_for_pending_cleared = dcn10_wait_for_pending_cleared, - .post_unlock_program_front_end = dcn20_post_unlock_program_front_end, - .update_plane_addr = dcn20_update_plane_addr, - .update_dchub = dcn10_update_dchub, - .update_pending_status = dcn10_update_pending_status, - .program_output_csc = dcn20_program_output_csc, - .enable_accelerated_mode = dce110_enable_accelerated_mode, - .enable_timing_synchronization = dcn10_enable_timing_synchronization, - .enable_per_frame_crtc_position_reset = dcn10_enable_per_frame_crtc_position_reset, - .update_info_frame = dcn31_update_info_frame, - .send_immediate_sdp_message = dcn10_send_immediate_sdp_message, - .enable_stream = dcn20_enable_stream, - .disable_stream = dce110_disable_stream, - .unblank_stream = dcn20_unblank_stream, - .blank_stream = dce110_blank_stream, - .enable_audio_stream = dce110_enable_audio_stream, - .disable_audio_stream = dce110_disable_audio_stream, - .disable_plane = dcn20_disable_plane, - .disable_pixel_data = dcn20_disable_pixel_data, - .pipe_control_lock = dcn20_pipe_control_lock, - .interdependent_update_lock = dcn10_lock_all_pipes, - .cursor_lock = dcn10_cursor_lock, - .prepare_bandwidth = dcn20_prepare_bandwidth, - .optimize_bandwidth = dcn20_optimize_bandwidth, - .update_bandwidth = dcn20_update_bandwidth, - .set_drr = dcn10_set_drr, - .get_position = dcn10_get_position, - .set_static_screen_control = dcn30_set_static_screen_control, - .setup_stereo = dcn10_setup_stereo, - .set_avmute = dcn30_set_avmute, - .log_hw_state = dcn10_log_hw_state, - .get_hw_state = dcn10_get_hw_state, - .clear_status_bits = dcn10_clear_status_bits, - .wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect, - .edp_backlight_control = dce110_edp_backlight_control, - .edp_power_control = dce110_edp_power_control, - .edp_wait_for_T12 = dce110_edp_wait_for_T12, - .edp_wait_for_hpd_ready = dce110_edp_wait_for_hpd_ready, - .set_cursor_position = dcn10_set_cursor_position, - .set_cursor_attribute = dcn10_set_cursor_attribute, - .set_cursor_sdr_white_level = dcn10_set_cursor_sdr_white_level, - .setup_periodic_interrupt = dcn10_setup_periodic_interrupt, - .set_clock = dcn10_set_clock, - .get_clock = dcn10_get_clock, - .program_triplebuffer = dcn20_program_triple_buffer, - .enable_writeback = dcn30_enable_writeback, - .disable_writeback = dcn30_disable_writeback, - .update_writeback = dcn30_update_writeback, - .mmhubbub_warmup = dcn30_mmhubbub_warmup, - .dmdata_status_done = dcn20_dmdata_status_done, - .program_dmdata_engine = dcn30_program_dmdata_engine, - .set_dmdata_attributes = dcn20_set_dmdata_attributes, - .init_sys_ctx = dcn31_init_sys_ctx, - .init_vm_ctx = dcn20_init_vm_ctx, - .set_flip_control_gsl = dcn20_set_flip_control_gsl, - .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, - .calc_vupdate_position = dcn10_calc_vupdate_position, - .power_down = dce110_power_down, - .set_backlight_level = dcn21_set_backlight_level, - .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, - .set_pipe = dcn21_set_pipe, - .enable_lvds_link_output = dce110_enable_lvds_link_output, - .enable_tmds_link_output = dce110_enable_tmds_link_output, - .enable_dp_link_output = dce110_enable_dp_link_output, - .disable_link_output = dcn314_disable_link_output, - .z10_restore = dcn31_z10_restore, - .z10_save_init = dcn31_z10_save_init, - .set_disp_pattern_generator = dcn30_set_disp_pattern_generator, - .optimize_pwr_state = dcn21_optimize_pwr_state, - .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, - .update_visual_confirm_color = dcn10_update_visual_confirm_color, -}; - -static const struct hwseq_private_funcs dcn314_private_funcs = { - .init_pipes = dcn10_init_pipes, - .update_plane_addr = dcn20_update_plane_addr, - .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, - .update_mpcc = dcn20_update_mpcc, - .set_input_transfer_func = dcn30_set_input_transfer_func, - .set_output_transfer_func = dcn30_set_output_transfer_func, - .power_down = dce110_power_down, - .enable_display_power_gating = dcn10_dummy_display_power_gating, - .blank_pixel_data = dcn20_blank_pixel_data, - .reset_hw_ctx_wrap = dcn31_reset_hw_ctx_wrap, - .enable_stream_timing = dcn20_enable_stream_timing, - .edp_backlight_control = dce110_edp_backlight_control, - .disable_stream_gating = dcn20_disable_stream_gating, - .enable_stream_gating = dcn20_enable_stream_gating, - .setup_vupdate_interrupt = dcn20_setup_vupdate_interrupt, - .did_underflow_occur = dcn10_did_underflow_occur, - .init_blank = dcn20_init_blank, - .disable_vga = dcn20_disable_vga, - .bios_golden_init = dcn10_bios_golden_init, - .plane_atomic_disable = dcn20_plane_atomic_disable, - .plane_atomic_power_down = dcn10_plane_atomic_power_down, - .enable_power_gating_plane = dcn314_enable_power_gating_plane, - .dpp_root_clock_control = dcn314_dpp_root_clock_control, - .hubp_pg_control = dcn31_hubp_pg_control, - .program_all_writeback_pipes_in_tree = dcn30_program_all_writeback_pipes_in_tree, - .update_odm = dcn314_update_odm, - .dsc_pg_control = dcn314_dsc_pg_control, - .set_hdr_multiplier = dcn10_set_hdr_multiplier, - .verify_allow_pstate_change_high = dcn10_verify_allow_pstate_change_high, - .wait_for_blank_complete = dcn20_wait_for_blank_complete, - .dccg_init = dcn20_dccg_init, - .set_blend_lut = dcn30_set_blend_lut, - .set_shaper_3dlut = dcn20_set_shaper_3dlut, - .setup_hpo_hw_control = dcn31_setup_hpo_hw_control, - .calculate_dccg_k1_k2_values = dcn314_calculate_dccg_k1_k2_values, - .set_pixels_per_cycle = dcn314_set_pixels_per_cycle, - .resync_fifo_dccg_dio = dcn314_resync_fifo_dccg_dio, -}; - -void dcn314_hw_sequencer_construct(struct dc *dc) -{ - dc->hwss = dcn314_funcs; - dc->hwseq->funcs = dcn314_private_funcs; - -} diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.h b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.h deleted file mode 100644 index 8f92e66577cf..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.h +++ /dev/null @@ -1,34 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright 2022 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DC_DCN314_INIT_H__ -#define __DC_DCN314_INIT_H__ - -struct dc; - -void dcn314_hw_sequencer_construct(struct dc *dc); - -#endif /* __DC_DCN314_INIT_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/Makefile b/drivers/gpu/drm/amd/display/dc/dcn32/Makefile index 905b74b53092..5314770fff1c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn32/Makefile @@ -10,7 +10,7 @@ # # Makefile for dcn32. -DCN32 = dcn32_hubbub.o dcn32_init.o dcn32_dccg.o \ +DCN32 = dcn32_hubbub.o dcn32_dccg.o \ dcn32_mmhubbub.o dcn32_dpp.o dcn32_hubp.o dcn32_mpc.o \ dcn32_dio_stream_encoder.o dcn32_dio_link_encoder.o dcn32_resource_helpers.o \ dcn32_hpo_dp_link_encoder.o diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c deleted file mode 100644 index 427cfc8c24a4..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c +++ /dev/null @@ -1,169 +0,0 @@ -/* - * Copyright 2022 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dce110/dce110_hwseq.h" -#include "dcn10/dcn10_hwseq.h" -#include "dcn20/dcn20_hwseq.h" -#include "dcn21/dcn21_hwseq.h" -#include "dcn30/dcn30_hwseq.h" -#include "dcn31/dcn31_hwseq.h" -#include "dcn32/dcn32_hwseq.h" -#include "dcn32_init.h" - -static const struct hw_sequencer_funcs dcn32_funcs = { - .program_gamut_remap = dcn30_program_gamut_remap, - .init_hw = dcn32_init_hw, - .apply_ctx_to_hw = dce110_apply_ctx_to_hw, - .apply_ctx_for_surface = NULL, - .program_front_end_for_ctx = dcn20_program_front_end_for_ctx, - .wait_for_pending_cleared = dcn10_wait_for_pending_cleared, - .post_unlock_program_front_end = dcn20_post_unlock_program_front_end, - .update_plane_addr = dcn20_update_plane_addr, - .update_dchub = dcn10_update_dchub, - .update_pending_status = dcn10_update_pending_status, - .program_output_csc = dcn20_program_output_csc, - .enable_accelerated_mode = dce110_enable_accelerated_mode, - .enable_timing_synchronization = dcn10_enable_timing_synchronization, - .enable_per_frame_crtc_position_reset = dcn10_enable_per_frame_crtc_position_reset, - .update_info_frame = dcn31_update_info_frame, - .send_immediate_sdp_message = dcn10_send_immediate_sdp_message, - .enable_stream = dcn20_enable_stream, - .disable_stream = dce110_disable_stream, - .unblank_stream = dcn32_unblank_stream, - .blank_stream = dce110_blank_stream, - .enable_audio_stream = dce110_enable_audio_stream, - .disable_audio_stream = dce110_disable_audio_stream, - .disable_plane = dcn20_disable_plane, - .disable_pixel_data = dcn20_disable_pixel_data, - .pipe_control_lock = dcn20_pipe_control_lock, - .interdependent_update_lock = dcn10_lock_all_pipes, - .cursor_lock = dcn10_cursor_lock, - .prepare_bandwidth = dcn32_prepare_bandwidth, - .optimize_bandwidth = dcn20_optimize_bandwidth, - .update_bandwidth = dcn20_update_bandwidth, - .set_drr = dcn10_set_drr, - .get_position = dcn10_get_position, - .set_static_screen_control = dcn30_set_static_screen_control, - .setup_stereo = dcn10_setup_stereo, - .set_avmute = dcn30_set_avmute, - .log_hw_state = dcn10_log_hw_state, - .get_hw_state = dcn10_get_hw_state, - .clear_status_bits = dcn10_clear_status_bits, - .wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect, - .edp_backlight_control = dce110_edp_backlight_control, - .edp_power_control = dce110_edp_power_control, - .edp_wait_for_hpd_ready = dce110_edp_wait_for_hpd_ready, - .edp_wait_for_T12 = dce110_edp_wait_for_T12, - .set_cursor_position = dcn10_set_cursor_position, - .set_cursor_attribute = dcn10_set_cursor_attribute, - .set_cursor_sdr_white_level = dcn10_set_cursor_sdr_white_level, - .setup_periodic_interrupt = dcn10_setup_periodic_interrupt, - .set_clock = dcn10_set_clock, - .get_clock = dcn10_get_clock, - .program_triplebuffer = dcn20_program_triple_buffer, - .enable_writeback = dcn30_enable_writeback, - .disable_writeback = dcn30_disable_writeback, - .update_writeback = dcn30_update_writeback, - .mmhubbub_warmup = dcn30_mmhubbub_warmup, - .dmdata_status_done = dcn20_dmdata_status_done, - .program_dmdata_engine = dcn30_program_dmdata_engine, - .set_dmdata_attributes = dcn20_set_dmdata_attributes, - .init_sys_ctx = dcn20_init_sys_ctx, - .init_vm_ctx = dcn20_init_vm_ctx, - .set_flip_control_gsl = dcn20_set_flip_control_gsl, - .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, - .calc_vupdate_position = dcn10_calc_vupdate_position, - .apply_idle_power_optimizations = dcn32_apply_idle_power_optimizations, - .does_plane_fit_in_mall = NULL, - .set_backlight_level = dcn21_set_backlight_level, - .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, - .hardware_release = dcn30_hardware_release, - .set_pipe = dcn21_set_pipe, - .enable_lvds_link_output = dce110_enable_lvds_link_output, - .enable_tmds_link_output = dce110_enable_tmds_link_output, - .enable_dp_link_output = dce110_enable_dp_link_output, - .disable_link_output = dcn32_disable_link_output, - .set_disp_pattern_generator = dcn30_set_disp_pattern_generator, - .get_dcc_en_bits = dcn10_get_dcc_en_bits, - .commit_subvp_config = dcn32_commit_subvp_config, - .enable_phantom_streams = dcn32_enable_phantom_streams, - .subvp_pipe_control_lock = dcn32_subvp_pipe_control_lock, - .update_visual_confirm_color = dcn10_update_visual_confirm_color, - .subvp_pipe_control_lock_fast = dcn32_subvp_pipe_control_lock_fast, - .update_phantom_vp_position = dcn32_update_phantom_vp_position, - .update_dsc_pg = dcn32_update_dsc_pg, - .apply_update_flags_for_phantom = dcn32_apply_update_flags_for_phantom, - .blank_phantom = dcn32_blank_phantom, - .is_pipe_topology_transition_seamless = dcn32_is_pipe_topology_transition_seamless, -}; - -static const struct hwseq_private_funcs dcn32_private_funcs = { - .init_pipes = dcn10_init_pipes, - .update_plane_addr = dcn20_update_plane_addr, - .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, - .update_mpcc = dcn20_update_mpcc, - .set_input_transfer_func = dcn32_set_input_transfer_func, - .set_output_transfer_func = dcn32_set_output_transfer_func, - .power_down = dce110_power_down, - .enable_display_power_gating = dcn10_dummy_display_power_gating, - .blank_pixel_data = dcn20_blank_pixel_data, - .reset_hw_ctx_wrap = dcn20_reset_hw_ctx_wrap, - .enable_stream_timing = dcn20_enable_stream_timing, - .edp_backlight_control = dce110_edp_backlight_control, - .disable_stream_gating = dcn20_disable_stream_gating, - .enable_stream_gating = dcn20_enable_stream_gating, - .setup_vupdate_interrupt = dcn20_setup_vupdate_interrupt, - .did_underflow_occur = dcn10_did_underflow_occur, - .init_blank = dcn32_init_blank, - .disable_vga = dcn20_disable_vga, - .bios_golden_init = dcn10_bios_golden_init, - .plane_atomic_disable = dcn20_plane_atomic_disable, - .plane_atomic_power_down = dcn10_plane_atomic_power_down, - .enable_power_gating_plane = dcn32_enable_power_gating_plane, - .hubp_pg_control = dcn32_hubp_pg_control, - .program_all_writeback_pipes_in_tree = dcn30_program_all_writeback_pipes_in_tree, - .update_odm = dcn32_update_odm, - .dsc_pg_control = dcn32_dsc_pg_control, - .dsc_pg_status = dcn32_dsc_pg_status, - .set_hdr_multiplier = dcn10_set_hdr_multiplier, - .verify_allow_pstate_change_high = dcn10_verify_allow_pstate_change_high, - .wait_for_blank_complete = dcn20_wait_for_blank_complete, - .dccg_init = dcn20_dccg_init, - .set_mcm_luts = dcn32_set_mcm_luts, - .program_mall_pipe_config = dcn32_program_mall_pipe_config, - .update_force_pstate = dcn32_update_force_pstate, - .update_mall_sel = dcn32_update_mall_sel, - .calculate_dccg_k1_k2_values = dcn32_calculate_dccg_k1_k2_values, - .set_pixels_per_cycle = dcn32_set_pixels_per_cycle, - .resync_fifo_dccg_dio = dcn32_resync_fifo_dccg_dio, - .is_dp_dig_pixel_rate_div_policy = dcn32_is_dp_dig_pixel_rate_div_policy, -}; - -void dcn32_hw_sequencer_init_functions(struct dc *dc) -{ - dc->hwss = dcn32_funcs; - dc->hwseq->funcs = dcn32_private_funcs; - -} diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.h deleted file mode 100644 index 89a591eb2c23..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright 2022 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DC_DCN32_INIT_H__ -#define __DC_DCN32_INIT_H__ - -struct dc; - -void dcn32_hw_sequencer_init_functions(struct dc *dc); - -#endif /* __DC_DCN32_INIT_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/Makefile b/drivers/gpu/drm/amd/display/dc/dcn35/Makefile index fa7ec82ae5f5..0e317e0c36a0 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn35/Makefile @@ -10,7 +10,7 @@ # # Makefile for DCN35. -DCN35 = dcn35_init.o dcn35_dio_stream_encoder.o \ +DCN35 = dcn35_dio_stream_encoder.o \ dcn35_dio_link_encoder.o dcn35_dccg.o \ dcn35_hubp.o dcn35_hubbub.o \ dcn35_mmhubbub.o dcn35_opp.o dcn35_dpp.o dcn35_pg_cntl.o dcn35_dwb.o diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_init.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_init.c deleted file mode 100644 index a630aa77dcec..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_init.c +++ /dev/null @@ -1,172 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright 2023 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dce110/dce110_hwseq.h" -#include "dcn10/dcn10_hwseq.h" -#include "dcn20/dcn20_hwseq.h" -#include "dcn21/dcn21_hwseq.h" -#include "dcn30/dcn30_hwseq.h" -#include "dcn301/dcn301_hwseq.h" -#include "dcn31/dcn31_hwseq.h" -#include "dcn32/dcn32_hwseq.h" -#include "dcn35/dcn35_hwseq.h" - -#include "dcn35_init.h" - -static const struct hw_sequencer_funcs dcn35_funcs = { - .program_gamut_remap = dcn30_program_gamut_remap, - .init_hw = dcn35_init_hw, - .power_down_on_boot = dcn35_power_down_on_boot, - .apply_ctx_to_hw = dce110_apply_ctx_to_hw, - .apply_ctx_for_surface = NULL, - .program_front_end_for_ctx = dcn20_program_front_end_for_ctx, - .wait_for_pending_cleared = dcn10_wait_for_pending_cleared, - .post_unlock_program_front_end = dcn20_post_unlock_program_front_end, - .update_plane_addr = dcn20_update_plane_addr, - .update_dchub = dcn10_update_dchub, - .update_pending_status = dcn10_update_pending_status, - .program_output_csc = dcn20_program_output_csc, - .enable_accelerated_mode = dce110_enable_accelerated_mode, - .enable_timing_synchronization = dcn10_enable_timing_synchronization, - .enable_per_frame_crtc_position_reset = dcn10_enable_per_frame_crtc_position_reset, - .update_info_frame = dcn31_update_info_frame, - .send_immediate_sdp_message = dcn10_send_immediate_sdp_message, - .enable_stream = dcn20_enable_stream, - .disable_stream = dce110_disable_stream, - .unblank_stream = dcn32_unblank_stream, - .blank_stream = dce110_blank_stream, - .enable_audio_stream = dce110_enable_audio_stream, - .disable_audio_stream = dce110_disable_audio_stream, - .disable_plane = dcn35_disable_plane, - .disable_pixel_data = dcn20_disable_pixel_data, - .pipe_control_lock = dcn20_pipe_control_lock, - .interdependent_update_lock = dcn10_lock_all_pipes, - .cursor_lock = dcn10_cursor_lock, - .prepare_bandwidth = dcn35_prepare_bandwidth, - .optimize_bandwidth = dcn35_optimize_bandwidth, - .update_bandwidth = dcn20_update_bandwidth, - .set_drr = dcn35_set_drr, - .get_position = dcn10_get_position, - .set_static_screen_control = dcn30_set_static_screen_control, - .setup_stereo = dcn10_setup_stereo, - .set_avmute = dcn30_set_avmute, - .log_hw_state = dcn10_log_hw_state, - .get_hw_state = dcn10_get_hw_state, - .clear_status_bits = dcn10_clear_status_bits, - .wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect, - .edp_backlight_control = dce110_edp_backlight_control, - .edp_power_control = dce110_edp_power_control, - .edp_wait_for_T12 = dce110_edp_wait_for_T12, - .edp_wait_for_hpd_ready = dce110_edp_wait_for_hpd_ready, - .set_cursor_position = dcn10_set_cursor_position, - .set_cursor_attribute = dcn10_set_cursor_attribute, - .set_cursor_sdr_white_level = dcn10_set_cursor_sdr_white_level, - .setup_periodic_interrupt = dcn10_setup_periodic_interrupt, - .set_clock = dcn10_set_clock, - .get_clock = dcn10_get_clock, - .program_triplebuffer = dcn20_program_triple_buffer, - .enable_writeback = dcn30_enable_writeback, - .disable_writeback = dcn30_disable_writeback, - .update_writeback = dcn30_update_writeback, - .mmhubbub_warmup = dcn30_mmhubbub_warmup, - .dmdata_status_done = dcn20_dmdata_status_done, - .program_dmdata_engine = dcn30_program_dmdata_engine, - .set_dmdata_attributes = dcn20_set_dmdata_attributes, - .init_sys_ctx = dcn31_init_sys_ctx, - .init_vm_ctx = dcn20_init_vm_ctx, - .set_flip_control_gsl = dcn20_set_flip_control_gsl, - .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, - .calc_vupdate_position = dcn10_calc_vupdate_position, - .power_down = dce110_power_down, - .set_backlight_level = dcn21_set_backlight_level, - .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, - .set_pipe = dcn21_set_pipe, - .enable_lvds_link_output = dce110_enable_lvds_link_output, - .enable_tmds_link_output = dce110_enable_tmds_link_output, - .enable_dp_link_output = dce110_enable_dp_link_output, - .disable_link_output = dcn32_disable_link_output, - .z10_restore = dcn35_z10_restore, - .z10_save_init = dcn31_z10_save_init, - .set_disp_pattern_generator = dcn30_set_disp_pattern_generator, - .optimize_pwr_state = dcn21_optimize_pwr_state, - .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, - .update_visual_confirm_color = dcn10_update_visual_confirm_color, - .apply_idle_power_optimizations = dcn35_apply_idle_power_optimizations, - .update_dsc_pg = dcn32_update_dsc_pg, - .calc_blocks_to_gate = dcn35_calc_blocks_to_gate, - .calc_blocks_to_ungate = dcn35_calc_blocks_to_ungate, - .hw_block_power_up = dcn35_hw_block_power_up, - .hw_block_power_down = dcn35_hw_block_power_down, - .root_clock_control = dcn35_root_clock_control, - .set_idle_state = dcn35_set_idle_state, - .get_idle_state = dcn35_get_idle_state -}; - -static const struct hwseq_private_funcs dcn35_private_funcs = { - .init_pipes = dcn35_init_pipes, - .update_plane_addr = dcn20_update_plane_addr, - .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, - .update_mpcc = dcn20_update_mpcc, - .set_input_transfer_func = dcn32_set_input_transfer_func, - .set_output_transfer_func = dcn32_set_output_transfer_func, - .power_down = dce110_power_down, - .enable_display_power_gating = dcn10_dummy_display_power_gating, - .blank_pixel_data = dcn20_blank_pixel_data, - .reset_hw_ctx_wrap = dcn31_reset_hw_ctx_wrap, - .enable_stream_timing = dcn20_enable_stream_timing, - .edp_backlight_control = dce110_edp_backlight_control, - .setup_vupdate_interrupt = dcn20_setup_vupdate_interrupt, - .did_underflow_occur = dcn10_did_underflow_occur, - .init_blank = dcn20_init_blank, - .disable_vga = NULL, - .bios_golden_init = dcn10_bios_golden_init, - .plane_atomic_disable = dcn35_plane_atomic_disable, - //.plane_atomic_disable = dcn20_plane_atomic_disable,/*todo*/ - //.hubp_pg_control = dcn35_hubp_pg_control, - .enable_power_gating_plane = dcn35_enable_power_gating_plane, - .dpp_root_clock_control = dcn35_dpp_root_clock_control, - .program_all_writeback_pipes_in_tree = dcn30_program_all_writeback_pipes_in_tree, - .update_odm = dcn35_update_odm, - .set_hdr_multiplier = dcn10_set_hdr_multiplier, - .verify_allow_pstate_change_high = dcn10_verify_allow_pstate_change_high, - .wait_for_blank_complete = dcn20_wait_for_blank_complete, - .dccg_init = dcn20_dccg_init, - .set_mcm_luts = dcn32_set_mcm_luts, - .setup_hpo_hw_control = dcn35_setup_hpo_hw_control, - .calculate_dccg_k1_k2_values = dcn32_calculate_dccg_k1_k2_values, - .set_pixels_per_cycle = dcn32_set_pixels_per_cycle, - .is_dp_dig_pixel_rate_div_policy = dcn32_is_dp_dig_pixel_rate_div_policy, - .dsc_pg_control = dcn35_dsc_pg_control, - .dsc_pg_status = dcn32_dsc_pg_status, - .enable_plane = dcn35_enable_plane, -}; - -void dcn35_hw_sequencer_construct(struct dc *dc) -{ - dc->hwss = dcn35_funcs; - dc->hwseq->funcs = dcn35_private_funcs; - -} diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_init.h b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_init.h deleted file mode 100644 index b67015032c35..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_init.h +++ /dev/null @@ -1,34 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright 2023 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __DC_DCN35_INIT_H__ -#define __DC_DCN35_INIT_H__ - -struct dc; - -void dcn35_hw_sequencer_construct(struct dc *dc); - -#endif /* __DC_DCN35_INIT_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/Makefile b/drivers/gpu/drm/amd/display/dc/hwss/Makefile index bccd46bd1815..254136f8e3f9 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/Makefile +++ b/drivers/gpu/drm/amd/display/dc/hwss/Makefile @@ -78,7 +78,7 @@ ifdef CONFIG_DRM_AMD_DC_FP # DCN ############################################################################### -HWSS_DCN10 = dcn10_hwseq.o +HWSS_DCN10 = dcn10_hwseq.o dcn10_init.o AMD_DAL_HWSS_DCN10 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn10/,$(HWSS_DCN10)) @@ -86,7 +86,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN10) ############################################################################### -HWSS_DCN20 = dcn20_hwseq.o +HWSS_DCN20 = dcn20_hwseq.o dcn20_init.o AMD_DAL_HWSS_DCN20 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn20/,$(HWSS_DCN20)) @@ -94,7 +94,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN20) ############################################################################### -HWSS_DCN201 = dcn201_hwseq.o +HWSS_DCN201 = dcn201_hwseq.o dcn201_init.o AMD_DAL_HWSS_DCN201 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn201/,$(HWSS_DCN201)) @@ -102,7 +102,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN201) ############################################################################### -HWSS_DCN21 = dcn21_hwseq.o +HWSS_DCN21 = dcn21_hwseq.o dcn21_init.o AMD_DAL_HWSS_DCN21 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn21/,$(HWSS_DCN21)) @@ -114,7 +114,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN21) ############################################################################### -HWSS_DCN30 = dcn30_hwseq.o +HWSS_DCN30 = dcn30_hwseq.o dcn30_init.o AMD_DAL_HWSS_DCN30 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn30/,$(HWSS_DCN30)) @@ -122,7 +122,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN30) ############################################################################### -HWSS_DCN301 = dcn301_hwseq.o +HWSS_DCN301 = dcn301_hwseq.o dcn301_init.o AMD_DAL_HWSS_DCN301 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn301/,$(HWSS_DCN301)) @@ -130,15 +130,17 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN301) ############################################################################### -HWSS_DCN302 = dcn302_hwseq.o +HWSS_DCN302 = dcn302_hwseq.o dcn302_init.o AMD_DAL_HWSS_DCN302 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn302/,$(HWSS_DCN302)) AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN302) + + ############################################################################### -HWSS_DCN303 = dcn303_hwseq.o +HWSS_DCN303 = dcn303_hwseq.o dcn303_init.o AMD_DAL_HWSS_DCN303 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn303/,$(HWSS_DCN303)) @@ -146,7 +148,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN303) ############################################################################### -HWSS_DCN31 = dcn31_hwseq.o +HWSS_DCN31 = dcn31_hwseq.o dcn31_init.o AMD_DAL_HWSS_DCN31 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn31/,$(HWSS_DCN31)) @@ -154,7 +156,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN31) ############################################################################### -HWSS_DCN314 = dcn314_hwseq.o +HWSS_DCN314 = dcn314_hwseq.o dcn314_init.o AMD_DAL_HWSS_DCN314 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn314/,$(HWSS_DCN314)) @@ -162,7 +164,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN314) ############################################################################### -HWSS_DCN32 = dcn32_hwseq.o +HWSS_DCN32 = dcn32_hwseq.o dcn32_init.o AMD_DAL_HWSS_DCN32 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn32/,$(HWSS_DCN32)) @@ -170,7 +172,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN32) ############################################################################### -HWSS_DCN35 = dcn35_hwseq.o +HWSS_DCN35 = dcn35_hwseq.o dcn35_init.o AMD_DAL_HWSS_DCN35 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn35/,$(HWSS_DCN35)) @@ -180,4 +182,4 @@ AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN35) ############################################################################### -endif \ No newline at end of file +endif diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.c new file mode 100644 index 000000000000..a5bdac79a744 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.c @@ -0,0 +1,128 @@ +/* + * Copyright 2016-2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "hw_sequencer_private.h" +#include "dce110/dce110_hwseq.h" +#include "dcn10/dcn10_hwseq.h" +#include "dcn20/dcn20_hwseq.h" + +static const struct hw_sequencer_funcs dcn10_funcs = { + .program_gamut_remap = dcn10_program_gamut_remap, + .init_hw = dcn10_init_hw, + .power_down_on_boot = dcn10_power_down_on_boot, + .apply_ctx_to_hw = dce110_apply_ctx_to_hw, + .apply_ctx_for_surface = NULL, + .program_front_end_for_ctx = dcn20_program_front_end_for_ctx, + .post_unlock_program_front_end = dcn10_post_unlock_program_front_end, + .wait_for_pending_cleared = dcn10_wait_for_pending_cleared, + .update_plane_addr = dcn10_update_plane_addr, + .update_dchub = dcn10_update_dchub, + .update_pending_status = dcn10_update_pending_status, + .program_output_csc = dcn10_program_output_csc, + .enable_accelerated_mode = dce110_enable_accelerated_mode, + .enable_timing_synchronization = dcn10_enable_timing_synchronization, + .enable_per_frame_crtc_position_reset = dcn10_enable_per_frame_crtc_position_reset, + .update_info_frame = dce110_update_info_frame, + .send_immediate_sdp_message = dcn10_send_immediate_sdp_message, + .enable_stream = dce110_enable_stream, + .disable_stream = dce110_disable_stream, + .unblank_stream = dcn10_unblank_stream, + .blank_stream = dce110_blank_stream, + .enable_audio_stream = dce110_enable_audio_stream, + .disable_audio_stream = dce110_disable_audio_stream, + .disable_plane = dcn10_disable_plane, + .pipe_control_lock = dcn10_pipe_control_lock, + .cursor_lock = dcn10_cursor_lock, + .interdependent_update_lock = dcn10_lock_all_pipes, + .prepare_bandwidth = dcn10_prepare_bandwidth, + .optimize_bandwidth = dcn10_optimize_bandwidth, + .set_drr = dcn10_set_drr, + .get_position = dcn10_get_position, + .set_static_screen_control = dcn10_set_static_screen_control, + .setup_stereo = dcn10_setup_stereo, + .set_avmute = dce110_set_avmute, + .log_hw_state = dcn10_log_hw_state, + .get_hw_state = dcn10_get_hw_state, + .clear_status_bits = dcn10_clear_status_bits, + .wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect, + .edp_backlight_control = dce110_edp_backlight_control, + .edp_power_control = dce110_edp_power_control, + .edp_wait_for_hpd_ready = dce110_edp_wait_for_hpd_ready, + .set_cursor_position = dcn10_set_cursor_position, + .set_cursor_attribute = dcn10_set_cursor_attribute, + .set_cursor_sdr_white_level = dcn10_set_cursor_sdr_white_level, + .setup_periodic_interrupt = dcn10_setup_periodic_interrupt, + .set_clock = dcn10_set_clock, + .get_clock = dcn10_get_clock, + .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, + .calc_vupdate_position = dcn10_calc_vupdate_position, + .power_down = dce110_power_down, + .set_backlight_level = dce110_set_backlight_level, + .set_abm_immediate_disable = dce110_set_abm_immediate_disable, + .set_pipe = dce110_set_pipe, + .enable_lvds_link_output = dce110_enable_lvds_link_output, + .enable_tmds_link_output = dce110_enable_tmds_link_output, + .enable_dp_link_output = dce110_enable_dp_link_output, + .disable_link_output = dce110_disable_link_output, + .get_dcc_en_bits = dcn10_get_dcc_en_bits, + .update_visual_confirm_color = dcn10_update_visual_confirm_color, +}; + +static const struct hwseq_private_funcs dcn10_private_funcs = { + .init_pipes = dcn10_init_pipes, + .update_plane_addr = dcn10_update_plane_addr, + .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, + .program_pipe = dcn10_program_pipe, + .update_mpcc = dcn10_update_mpcc, + .set_input_transfer_func = dcn10_set_input_transfer_func, + .set_output_transfer_func = dcn10_set_output_transfer_func, + .power_down = dce110_power_down, + .enable_display_power_gating = dcn10_dummy_display_power_gating, + .blank_pixel_data = dcn10_blank_pixel_data, + .reset_hw_ctx_wrap = dcn10_reset_hw_ctx_wrap, + .enable_stream_timing = dcn10_enable_stream_timing, + .edp_backlight_control = dce110_edp_backlight_control, + .disable_stream_gating = NULL, + .enable_stream_gating = NULL, + .setup_vupdate_interrupt = dcn10_setup_vupdate_interrupt, + .did_underflow_occur = dcn10_did_underflow_occur, + .init_blank = NULL, + .disable_vga = dcn10_disable_vga, + .bios_golden_init = dcn10_bios_golden_init, + .plane_atomic_disable = dcn10_plane_atomic_disable, + .plane_atomic_power_down = dcn10_plane_atomic_power_down, + .enable_power_gating_plane = dcn10_enable_power_gating_plane, + .dpp_pg_control = dcn10_dpp_pg_control, + .hubp_pg_control = dcn10_hubp_pg_control, + .dsc_pg_control = NULL, + .set_hdr_multiplier = dcn10_set_hdr_multiplier, + .verify_allow_pstate_change_high = dcn10_verify_allow_pstate_change_high, +}; + +void dcn10_hw_sequencer_construct(struct dc *dc) +{ + dc->hwss = dcn10_funcs; + dc->hwseq->funcs = dcn10_private_funcs; +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.h new file mode 100644 index 000000000000..8c6fd7b844a4 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.h @@ -0,0 +1,33 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DC_DCN10_INIT_H__ +#define __DC_DCN10_INIT_H__ + +struct dc; + +void dcn10_hw_sequencer_construct(struct dc *dc); + +#endif /* __DC_DCN10_INIT_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.c new file mode 100644 index 000000000000..884e3e323338 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.c @@ -0,0 +1,145 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dce110/dce110_hwseq.h" +#include "dcn10/dcn10_hwseq.h" +#include "dcn20/dcn20_hwseq.h" + +#include "dcn20_init.h" + +static const struct hw_sequencer_funcs dcn20_funcs = { + .program_gamut_remap = dcn10_program_gamut_remap, + .init_hw = dcn10_init_hw, + .power_down_on_boot = dcn10_power_down_on_boot, + .apply_ctx_to_hw = dce110_apply_ctx_to_hw, + .apply_ctx_for_surface = NULL, + .program_front_end_for_ctx = dcn20_program_front_end_for_ctx, + .wait_for_pending_cleared = dcn10_wait_for_pending_cleared, + .post_unlock_program_front_end = dcn20_post_unlock_program_front_end, + .update_plane_addr = dcn20_update_plane_addr, + .update_dchub = dcn10_update_dchub, + .update_pending_status = dcn10_update_pending_status, + .program_output_csc = dcn20_program_output_csc, + .enable_accelerated_mode = dce110_enable_accelerated_mode, + .enable_timing_synchronization = dcn10_enable_timing_synchronization, + .enable_vblanks_synchronization = dcn10_enable_vblanks_synchronization, + .enable_per_frame_crtc_position_reset = dcn10_enable_per_frame_crtc_position_reset, + .update_info_frame = dce110_update_info_frame, + .send_immediate_sdp_message = dcn10_send_immediate_sdp_message, + .enable_stream = dcn20_enable_stream, + .disable_stream = dce110_disable_stream, + .unblank_stream = dcn20_unblank_stream, + .blank_stream = dce110_blank_stream, + .enable_audio_stream = dce110_enable_audio_stream, + .disable_audio_stream = dce110_disable_audio_stream, + .disable_plane = dcn20_disable_plane, + .pipe_control_lock = dcn20_pipe_control_lock, + .interdependent_update_lock = dcn10_lock_all_pipes, + .cursor_lock = dcn10_cursor_lock, + .prepare_bandwidth = dcn20_prepare_bandwidth, + .optimize_bandwidth = dcn20_optimize_bandwidth, + .update_bandwidth = dcn20_update_bandwidth, + .set_drr = dcn10_set_drr, + .get_position = dcn10_get_position, + .set_static_screen_control = dcn10_set_static_screen_control, + .setup_stereo = dcn10_setup_stereo, + .set_avmute = dce110_set_avmute, + .log_hw_state = dcn10_log_hw_state, + .get_hw_state = dcn10_get_hw_state, + .clear_status_bits = dcn10_clear_status_bits, + .wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect, + .edp_backlight_control = dce110_edp_backlight_control, + .edp_power_control = dce110_edp_power_control, + .edp_wait_for_hpd_ready = dce110_edp_wait_for_hpd_ready, + .set_cursor_position = dcn10_set_cursor_position, + .set_cursor_attribute = dcn10_set_cursor_attribute, + .set_cursor_sdr_white_level = dcn10_set_cursor_sdr_white_level, + .setup_periodic_interrupt = dcn10_setup_periodic_interrupt, + .set_clock = dcn10_set_clock, + .get_clock = dcn10_get_clock, + .program_triplebuffer = dcn20_program_triple_buffer, + .enable_writeback = dcn20_enable_writeback, + .disable_writeback = dcn20_disable_writeback, + .dmdata_status_done = dcn20_dmdata_status_done, + .program_dmdata_engine = dcn20_program_dmdata_engine, + .set_dmdata_attributes = dcn20_set_dmdata_attributes, + .init_sys_ctx = dcn20_init_sys_ctx, + .init_vm_ctx = dcn20_init_vm_ctx, + .set_flip_control_gsl = dcn20_set_flip_control_gsl, + .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, + .calc_vupdate_position = dcn10_calc_vupdate_position, + .set_backlight_level = dce110_set_backlight_level, + .set_abm_immediate_disable = dce110_set_abm_immediate_disable, + .set_pipe = dce110_set_pipe, + .enable_lvds_link_output = dce110_enable_lvds_link_output, + .enable_tmds_link_output = dce110_enable_tmds_link_output, + .enable_dp_link_output = dce110_enable_dp_link_output, + .disable_link_output = dce110_disable_link_output, + .set_disp_pattern_generator = dcn20_set_disp_pattern_generator, + .get_dcc_en_bits = dcn10_get_dcc_en_bits, + .update_visual_confirm_color = dcn10_update_visual_confirm_color, +}; + +static const struct hwseq_private_funcs dcn20_private_funcs = { + .init_pipes = dcn10_init_pipes, + .update_plane_addr = dcn20_update_plane_addr, + .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, + .update_mpcc = dcn20_update_mpcc, + .set_input_transfer_func = dcn20_set_input_transfer_func, + .set_output_transfer_func = dcn20_set_output_transfer_func, + .power_down = dce110_power_down, + .enable_display_power_gating = dcn10_dummy_display_power_gating, + .blank_pixel_data = dcn20_blank_pixel_data, + .reset_hw_ctx_wrap = dcn20_reset_hw_ctx_wrap, + .enable_stream_timing = dcn20_enable_stream_timing, + .edp_backlight_control = dce110_edp_backlight_control, + .disable_stream_gating = dcn20_disable_stream_gating, + .enable_stream_gating = dcn20_enable_stream_gating, + .setup_vupdate_interrupt = dcn20_setup_vupdate_interrupt, + .did_underflow_occur = dcn10_did_underflow_occur, + .init_blank = dcn20_init_blank, + .disable_vga = dcn20_disable_vga, + .bios_golden_init = dcn10_bios_golden_init, + .plane_atomic_disable = dcn20_plane_atomic_disable, + .plane_atomic_power_down = dcn10_plane_atomic_power_down, + .enable_power_gating_plane = dcn20_enable_power_gating_plane, + .dpp_pg_control = dcn20_dpp_pg_control, + .hubp_pg_control = dcn20_hubp_pg_control, + .update_odm = dcn20_update_odm, + .dsc_pg_control = dcn20_dsc_pg_control, + .set_hdr_multiplier = dcn10_set_hdr_multiplier, + .verify_allow_pstate_change_high = dcn10_verify_allow_pstate_change_high, + .wait_for_blank_complete = dcn20_wait_for_blank_complete, + .dccg_init = dcn20_dccg_init, + .set_blend_lut = dcn20_set_blend_lut, + .set_shaper_3dlut = dcn20_set_shaper_3dlut, +}; + +void dcn20_hw_sequencer_construct(struct dc *dc) +{ + dc->hwss = dcn20_funcs; + dc->hwseq->funcs = dcn20_private_funcs; + +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.h new file mode 100644 index 000000000000..12277797cd71 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.h @@ -0,0 +1,33 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DC_DCN20_INIT_H__ +#define __DC_DCN20_INIT_H__ + +struct dc; + +void dcn20_hw_sequencer_construct(struct dc *dc); + +#endif /* __DC_DCN20_INIT_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.c new file mode 100644 index 000000000000..a13bf6c9386e --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.c @@ -0,0 +1,136 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dce110/dce110_hwseq.h" +#include "dcn10/dcn10_hwseq.h" +#include "dcn20/dcn20_hwseq.h" +#include "dcn201/dcn201_hwseq.h" +#include "dcn201_init.h" + +static const struct hw_sequencer_funcs dcn201_funcs = { + .program_gamut_remap = dcn10_program_gamut_remap, + .init_hw = dcn201_init_hw, + .power_down_on_boot = NULL, + .apply_ctx_to_hw = dce110_apply_ctx_to_hw, + .apply_ctx_for_surface = NULL, + .program_front_end_for_ctx = dcn20_program_front_end_for_ctx, + .wait_for_pending_cleared = dcn10_wait_for_pending_cleared, + .post_unlock_program_front_end = dcn10_post_unlock_program_front_end, + .update_plane_addr = dcn201_update_plane_addr, + .update_dchub = dcn10_update_dchub, + .update_pending_status = dcn10_update_pending_status, + .program_output_csc = dcn20_program_output_csc, + .enable_accelerated_mode = dce110_enable_accelerated_mode, + .enable_timing_synchronization = dcn10_enable_timing_synchronization, + .enable_per_frame_crtc_position_reset = dcn10_enable_per_frame_crtc_position_reset, + .update_info_frame = dce110_update_info_frame, + .send_immediate_sdp_message = dcn10_send_immediate_sdp_message, + .enable_stream = dce110_enable_stream, + .disable_stream = dce110_disable_stream, + .unblank_stream = dcn201_unblank_stream, + .blank_stream = dce110_blank_stream, + .enable_audio_stream = dce110_enable_audio_stream, + .disable_audio_stream = dce110_disable_audio_stream, + .disable_plane = dcn10_disable_plane, + .pipe_control_lock = dcn201_pipe_control_lock, + .interdependent_update_lock = dcn10_lock_all_pipes, + .cursor_lock = dcn10_cursor_lock, + .prepare_bandwidth = dcn20_prepare_bandwidth, + .optimize_bandwidth = dcn20_optimize_bandwidth, + .update_bandwidth = dcn20_update_bandwidth, + .set_drr = dcn10_set_drr, + .get_position = dcn10_get_position, + .set_static_screen_control = dcn10_set_static_screen_control, + .setup_stereo = dcn10_setup_stereo, + .set_avmute = dce110_set_avmute, + .log_hw_state = dcn10_log_hw_state, + .get_hw_state = dcn10_get_hw_state, + .clear_status_bits = dcn10_clear_status_bits, + .wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect, + .edp_backlight_control = dce110_edp_backlight_control, + .edp_power_control = dce110_edp_power_control, + .edp_wait_for_hpd_ready = dce110_edp_wait_for_hpd_ready, + .setup_periodic_interrupt = dcn10_setup_periodic_interrupt, + .set_clock = dcn10_set_clock, + .get_clock = dcn10_get_clock, + .program_triplebuffer = dcn20_program_triple_buffer, + .dmdata_status_done = dcn20_dmdata_status_done, + .set_dmdata_attributes = dcn201_set_dmdata_attributes, + .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, + .calc_vupdate_position = dcn10_calc_vupdate_position, + .set_cursor_position = dcn10_set_cursor_position, + .set_cursor_attribute = dcn201_set_cursor_attribute, + .set_cursor_sdr_white_level = dcn10_set_cursor_sdr_white_level, + .set_backlight_level = dce110_set_backlight_level, + .set_abm_immediate_disable = dce110_set_abm_immediate_disable, + .set_pipe = dce110_set_pipe, + .enable_lvds_link_output = dce110_enable_lvds_link_output, + .enable_tmds_link_output = dce110_enable_tmds_link_output, + .enable_dp_link_output = dce110_enable_dp_link_output, + .disable_link_output = dce110_disable_link_output, + .set_disp_pattern_generator = dcn20_set_disp_pattern_generator, + .update_visual_confirm_color = dcn10_update_visual_confirm_color, +}; + +static const struct hwseq_private_funcs dcn201_private_funcs = { + .init_pipes = NULL, + .update_plane_addr = dcn201_update_plane_addr, + .plane_atomic_disconnect = dcn201_plane_atomic_disconnect, + .program_pipe = dcn10_program_pipe, + .update_mpcc = dcn201_update_mpcc, + .set_input_transfer_func = dcn20_set_input_transfer_func, + .set_output_transfer_func = dcn20_set_output_transfer_func, + .power_down = dce110_power_down, + .enable_display_power_gating = dcn10_dummy_display_power_gating, + .blank_pixel_data = dcn20_blank_pixel_data, + .reset_hw_ctx_wrap = dcn10_reset_hw_ctx_wrap, + .enable_stream_timing = dcn20_enable_stream_timing, + .edp_backlight_control = dce110_edp_backlight_control, + .disable_stream_gating = NULL, + .enable_stream_gating = NULL, + .setup_vupdate_interrupt = dcn20_setup_vupdate_interrupt, + .did_underflow_occur = dcn10_did_underflow_occur, + .init_blank = dcn201_init_blank, + .disable_vga = dcn10_disable_vga, + .bios_golden_init = dcn10_bios_golden_init, + .plane_atomic_disable = dcn10_plane_atomic_disable, + .plane_atomic_power_down = dcn10_plane_atomic_power_down, + .enable_power_gating_plane = dcn10_enable_power_gating_plane, + .dpp_pg_control = dcn10_dpp_pg_control, + .hubp_pg_control = dcn10_hubp_pg_control, + .dsc_pg_control = NULL, + .set_hdr_multiplier = dcn10_set_hdr_multiplier, + .verify_allow_pstate_change_high = dcn10_verify_allow_pstate_change_high, + .wait_for_blank_complete = dcn20_wait_for_blank_complete, + .dccg_init = dcn20_dccg_init, + .set_blend_lut = dcn20_set_blend_lut, + .set_shaper_3dlut = dcn20_set_shaper_3dlut, +}; + +void dcn201_hw_sequencer_construct(struct dc *dc) +{ + dc->hwss = dcn201_funcs; + dc->hwseq->funcs = dcn201_private_funcs; +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.h new file mode 100644 index 000000000000..1168887b033d --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.h @@ -0,0 +1,33 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DC_DCN201_INIT_H__ +#define __DC_DCN201_INIT_H__ + +struct dc; + +void dcn201_hw_sequencer_construct(struct dc *dc); + +#endif /* __DC_DCN201_INIT_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.c new file mode 100644 index 000000000000..18249c6b6d81 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.c @@ -0,0 +1,151 @@ +/* + * Copyright 2016-2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dce110/dce110_hwseq.h" +#include "dcn10/dcn10_hwseq.h" +#include "dcn20/dcn20_hwseq.h" +#include "dcn21/dcn21_hwseq.h" + +#include "dcn21_init.h" + +static const struct hw_sequencer_funcs dcn21_funcs = { + .program_gamut_remap = dcn10_program_gamut_remap, + .init_hw = dcn10_init_hw, + .power_down_on_boot = dcn10_power_down_on_boot, + .apply_ctx_to_hw = dce110_apply_ctx_to_hw, + .apply_ctx_for_surface = NULL, + .program_front_end_for_ctx = dcn20_program_front_end_for_ctx, + .wait_for_pending_cleared = dcn10_wait_for_pending_cleared, + .post_unlock_program_front_end = dcn20_post_unlock_program_front_end, + .update_plane_addr = dcn20_update_plane_addr, + .update_dchub = dcn10_update_dchub, + .update_pending_status = dcn10_update_pending_status, + .program_output_csc = dcn20_program_output_csc, + .enable_accelerated_mode = dce110_enable_accelerated_mode, + .enable_timing_synchronization = dcn10_enable_timing_synchronization, + .enable_per_frame_crtc_position_reset = dcn10_enable_per_frame_crtc_position_reset, + .update_info_frame = dce110_update_info_frame, + .send_immediate_sdp_message = dcn10_send_immediate_sdp_message, + .enable_stream = dcn20_enable_stream, + .disable_stream = dce110_disable_stream, + .unblank_stream = dcn20_unblank_stream, + .blank_stream = dce110_blank_stream, + .enable_audio_stream = dce110_enable_audio_stream, + .disable_audio_stream = dce110_disable_audio_stream, + .disable_plane = dcn20_disable_plane, + .pipe_control_lock = dcn20_pipe_control_lock, + .interdependent_update_lock = dcn10_lock_all_pipes, + .cursor_lock = dcn10_cursor_lock, + .prepare_bandwidth = dcn20_prepare_bandwidth, + .optimize_bandwidth = dcn20_optimize_bandwidth, + .update_bandwidth = dcn20_update_bandwidth, + .set_drr = dcn10_set_drr, + .get_position = dcn10_get_position, + .set_static_screen_control = dcn10_set_static_screen_control, + .setup_stereo = dcn10_setup_stereo, + .set_avmute = dce110_set_avmute, + .log_hw_state = dcn10_log_hw_state, + .get_hw_state = dcn10_get_hw_state, + .clear_status_bits = dcn10_clear_status_bits, + .wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect, + .edp_backlight_control = dce110_edp_backlight_control, + .edp_power_control = dce110_edp_power_control, + .edp_wait_for_hpd_ready = dce110_edp_wait_for_hpd_ready, + .set_cursor_position = dcn10_set_cursor_position, + .set_cursor_attribute = dcn10_set_cursor_attribute, + .set_cursor_sdr_white_level = dcn10_set_cursor_sdr_white_level, + .setup_periodic_interrupt = dcn10_setup_periodic_interrupt, + .set_clock = dcn10_set_clock, + .get_clock = dcn10_get_clock, + .program_triplebuffer = dcn20_program_triple_buffer, + .enable_writeback = dcn20_enable_writeback, + .disable_writeback = dcn20_disable_writeback, + .dmdata_status_done = dcn20_dmdata_status_done, + .program_dmdata_engine = dcn20_program_dmdata_engine, + .set_dmdata_attributes = dcn20_set_dmdata_attributes, + .init_sys_ctx = dcn21_init_sys_ctx, + .init_vm_ctx = dcn20_init_vm_ctx, + .set_flip_control_gsl = dcn20_set_flip_control_gsl, + .optimize_pwr_state = dcn21_optimize_pwr_state, + .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, + .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, + .calc_vupdate_position = dcn10_calc_vupdate_position, + .power_down = dce110_power_down, + .set_backlight_level = dcn21_set_backlight_level, + .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, + .set_pipe = dcn21_set_pipe, + .enable_lvds_link_output = dce110_enable_lvds_link_output, + .enable_tmds_link_output = dce110_enable_tmds_link_output, + .enable_dp_link_output = dce110_enable_dp_link_output, + .disable_link_output = dce110_disable_link_output, + .is_abm_supported = dcn21_is_abm_supported, + .set_disp_pattern_generator = dcn20_set_disp_pattern_generator, + .get_dcc_en_bits = dcn10_get_dcc_en_bits, + .update_visual_confirm_color = dcn10_update_visual_confirm_color, +}; + +static const struct hwseq_private_funcs dcn21_private_funcs = { + .init_pipes = dcn10_init_pipes, + .update_plane_addr = dcn20_update_plane_addr, + .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, + .update_mpcc = dcn20_update_mpcc, + .set_input_transfer_func = dcn20_set_input_transfer_func, + .set_output_transfer_func = dcn20_set_output_transfer_func, + .power_down = dce110_power_down, + .enable_display_power_gating = dcn10_dummy_display_power_gating, + .blank_pixel_data = dcn20_blank_pixel_data, + .reset_hw_ctx_wrap = dcn20_reset_hw_ctx_wrap, + .enable_stream_timing = dcn20_enable_stream_timing, + .edp_backlight_control = dce110_edp_backlight_control, + .disable_stream_gating = dcn20_disable_stream_gating, + .enable_stream_gating = dcn20_enable_stream_gating, + .setup_vupdate_interrupt = dcn20_setup_vupdate_interrupt, + .did_underflow_occur = dcn10_did_underflow_occur, + .init_blank = dcn20_init_blank, + .disable_vga = dcn20_disable_vga, + .bios_golden_init = dcn10_bios_golden_init, + .plane_atomic_disable = dcn20_plane_atomic_disable, + .plane_atomic_power_down = dcn10_plane_atomic_power_down, + .enable_power_gating_plane = dcn20_enable_power_gating_plane, + .dpp_pg_control = dcn20_dpp_pg_control, + .hubp_pg_control = dcn20_hubp_pg_control, + .update_odm = dcn20_update_odm, + .dsc_pg_control = dcn20_dsc_pg_control, + .set_hdr_multiplier = dcn10_set_hdr_multiplier, + .verify_allow_pstate_change_high = dcn10_verify_allow_pstate_change_high, + .s0i3_golden_init_wa = dcn21_s0i3_golden_init_wa, + .wait_for_blank_complete = dcn20_wait_for_blank_complete, + .dccg_init = dcn20_dccg_init, + .set_blend_lut = dcn20_set_blend_lut, + .set_shaper_3dlut = dcn20_set_shaper_3dlut, + .PLAT_58856_wa = dcn21_PLAT_58856_wa, +}; + +void dcn21_hw_sequencer_construct(struct dc *dc) +{ + dc->hwss = dcn21_funcs; + dc->hwseq->funcs = dcn21_private_funcs; + +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.h new file mode 100644 index 000000000000..3ed24292648a --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.h @@ -0,0 +1,33 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DC_DCN21_INIT_H__ +#define __DC_DCN21_INIT_H__ + +struct dc; + +void dcn21_hw_sequencer_construct(struct dc *dc); + +#endif /* __DC_DCN20_INIT_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c new file mode 100644 index 000000000000..9894caedffed --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c @@ -0,0 +1,154 @@ +/* + * Copyright 2016-2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dce110/dce110_hwseq.h" +#include "dcn10/dcn10_hwseq.h" +#include "dcn20/dcn20_hwseq.h" +#include "dcn21/dcn21_hwseq.h" +#include "dcn30/dcn30_hwseq.h" + +#include "dcn30_init.h" + +static const struct hw_sequencer_funcs dcn30_funcs = { + .program_gamut_remap = dcn30_program_gamut_remap, + .init_hw = dcn30_init_hw, + .apply_ctx_to_hw = dce110_apply_ctx_to_hw, + .apply_ctx_for_surface = NULL, + .program_front_end_for_ctx = dcn20_program_front_end_for_ctx, + .wait_for_pending_cleared = dcn10_wait_for_pending_cleared, + .post_unlock_program_front_end = dcn20_post_unlock_program_front_end, + .update_plane_addr = dcn20_update_plane_addr, + .update_dchub = dcn10_update_dchub, + .update_pending_status = dcn10_update_pending_status, + .program_output_csc = dcn20_program_output_csc, + .enable_accelerated_mode = dce110_enable_accelerated_mode, + .enable_timing_synchronization = dcn10_enable_timing_synchronization, + .enable_per_frame_crtc_position_reset = dcn10_enable_per_frame_crtc_position_reset, + .update_info_frame = dcn30_update_info_frame, + .send_immediate_sdp_message = dcn10_send_immediate_sdp_message, + .enable_stream = dcn20_enable_stream, + .disable_stream = dce110_disable_stream, + .unblank_stream = dcn20_unblank_stream, + .blank_stream = dce110_blank_stream, + .enable_audio_stream = dce110_enable_audio_stream, + .disable_audio_stream = dce110_disable_audio_stream, + .disable_plane = dcn20_disable_plane, + .disable_pixel_data = dcn20_disable_pixel_data, + .pipe_control_lock = dcn20_pipe_control_lock, + .interdependent_update_lock = dcn10_lock_all_pipes, + .cursor_lock = dcn10_cursor_lock, + .prepare_bandwidth = dcn30_prepare_bandwidth, + .optimize_bandwidth = dcn20_optimize_bandwidth, + .update_bandwidth = dcn20_update_bandwidth, + .set_drr = dcn10_set_drr, + .get_position = dcn10_get_position, + .set_static_screen_control = dcn30_set_static_screen_control, + .setup_stereo = dcn10_setup_stereo, + .set_avmute = dcn30_set_avmute, + .log_hw_state = dcn10_log_hw_state, + .get_hw_state = dcn10_get_hw_state, + .clear_status_bits = dcn10_clear_status_bits, + .wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect, + .edp_backlight_control = dce110_edp_backlight_control, + .edp_power_control = dce110_edp_power_control, + .edp_wait_for_hpd_ready = dce110_edp_wait_for_hpd_ready, + .edp_wait_for_T12 = dce110_edp_wait_for_T12, + .set_cursor_position = dcn10_set_cursor_position, + .set_cursor_attribute = dcn10_set_cursor_attribute, + .set_cursor_sdr_white_level = dcn10_set_cursor_sdr_white_level, + .setup_periodic_interrupt = dcn10_setup_periodic_interrupt, + .set_clock = dcn10_set_clock, + .get_clock = dcn10_get_clock, + .program_triplebuffer = dcn20_program_triple_buffer, + .enable_writeback = dcn30_enable_writeback, + .disable_writeback = dcn30_disable_writeback, + .update_writeback = dcn30_update_writeback, + .mmhubbub_warmup = dcn30_mmhubbub_warmup, + .dmdata_status_done = dcn20_dmdata_status_done, + .program_dmdata_engine = dcn30_program_dmdata_engine, + .set_dmdata_attributes = dcn20_set_dmdata_attributes, + .init_sys_ctx = dcn20_init_sys_ctx, + .init_vm_ctx = dcn20_init_vm_ctx, + .set_flip_control_gsl = dcn20_set_flip_control_gsl, + .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, + .calc_vupdate_position = dcn10_calc_vupdate_position, + .apply_idle_power_optimizations = dcn30_apply_idle_power_optimizations, + .does_plane_fit_in_mall = dcn30_does_plane_fit_in_mall, + .set_backlight_level = dcn21_set_backlight_level, + .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, + .hardware_release = dcn30_hardware_release, + .set_pipe = dcn21_set_pipe, + .enable_lvds_link_output = dce110_enable_lvds_link_output, + .enable_tmds_link_output = dce110_enable_tmds_link_output, + .enable_dp_link_output = dce110_enable_dp_link_output, + .disable_link_output = dce110_disable_link_output, + .set_disp_pattern_generator = dcn30_set_disp_pattern_generator, + .get_dcc_en_bits = dcn10_get_dcc_en_bits, + .update_visual_confirm_color = dcn10_update_visual_confirm_color, + .is_abm_supported = dcn21_is_abm_supported +}; + +static const struct hwseq_private_funcs dcn30_private_funcs = { + .init_pipes = dcn10_init_pipes, + .update_plane_addr = dcn20_update_plane_addr, + .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, + .update_mpcc = dcn20_update_mpcc, + .set_input_transfer_func = dcn30_set_input_transfer_func, + .set_output_transfer_func = dcn30_set_output_transfer_func, + .power_down = dce110_power_down, + .enable_display_power_gating = dcn10_dummy_display_power_gating, + .blank_pixel_data = dcn20_blank_pixel_data, + .reset_hw_ctx_wrap = dcn20_reset_hw_ctx_wrap, + .enable_stream_timing = dcn20_enable_stream_timing, + .edp_backlight_control = dce110_edp_backlight_control, + .disable_stream_gating = dcn20_disable_stream_gating, + .enable_stream_gating = dcn20_enable_stream_gating, + .setup_vupdate_interrupt = dcn20_setup_vupdate_interrupt, + .did_underflow_occur = dcn10_did_underflow_occur, + .init_blank = dcn20_init_blank, + .disable_vga = dcn20_disable_vga, + .bios_golden_init = dcn10_bios_golden_init, + .plane_atomic_disable = dcn20_plane_atomic_disable, + .plane_atomic_power_down = dcn10_plane_atomic_power_down, + .enable_power_gating_plane = dcn20_enable_power_gating_plane, + .dpp_pg_control = dcn20_dpp_pg_control, + .hubp_pg_control = dcn20_hubp_pg_control, + .program_all_writeback_pipes_in_tree = dcn30_program_all_writeback_pipes_in_tree, + .update_odm = dcn20_update_odm, + .dsc_pg_control = dcn20_dsc_pg_control, + .set_hdr_multiplier = dcn10_set_hdr_multiplier, + .verify_allow_pstate_change_high = dcn10_verify_allow_pstate_change_high, + .wait_for_blank_complete = dcn20_wait_for_blank_complete, + .dccg_init = dcn20_dccg_init, + .set_blend_lut = dcn30_set_blend_lut, + .set_shaper_3dlut = dcn20_set_shaper_3dlut, +}; + +void dcn30_hw_sequencer_construct(struct dc *dc) +{ + dc->hwss = dcn30_funcs; + dc->hwseq->funcs = dcn30_private_funcs; + +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.h new file mode 100644 index 000000000000..c280ff90bfa3 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.h @@ -0,0 +1,33 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DC_DCN30_INIT_H__ +#define __DC_DCN30_INIT_H__ + +struct dc; + +void dcn30_hw_sequencer_construct(struct dc *dc); + +#endif /* __DC_DCN30_INIT_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c new file mode 100644 index 000000000000..6477009ce065 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c @@ -0,0 +1,154 @@ +/* + * Copyright 2016-2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dce110/dce110_hwseq.h" +#include "dcn10/dcn10_hwseq.h" +#include "dcn20/dcn20_hwseq.h" +#include "dcn21/dcn21_hwseq.h" +#include "dcn30/dcn30_hwseq.h" +#include "dcn301/dcn301_hwseq.h" + +#include "dcn301_init.h" + +static const struct hw_sequencer_funcs dcn301_funcs = { + .program_gamut_remap = dcn30_program_gamut_remap, + .init_hw = dcn10_init_hw, + .power_down_on_boot = dcn10_power_down_on_boot, + .apply_ctx_to_hw = dce110_apply_ctx_to_hw, + .apply_ctx_for_surface = NULL, + .program_front_end_for_ctx = dcn20_program_front_end_for_ctx, + .wait_for_pending_cleared = dcn10_wait_for_pending_cleared, + .post_unlock_program_front_end = dcn20_post_unlock_program_front_end, + .update_plane_addr = dcn20_update_plane_addr, + .update_dchub = dcn10_update_dchub, + .update_pending_status = dcn10_update_pending_status, + .program_output_csc = dcn20_program_output_csc, + .enable_accelerated_mode = dce110_enable_accelerated_mode, + .enable_timing_synchronization = dcn10_enable_timing_synchronization, + .enable_per_frame_crtc_position_reset = dcn10_enable_per_frame_crtc_position_reset, + .update_info_frame = dcn30_update_info_frame, + .send_immediate_sdp_message = dcn10_send_immediate_sdp_message, + .enable_stream = dcn20_enable_stream, + .disable_stream = dce110_disable_stream, + .unblank_stream = dcn20_unblank_stream, +#ifdef FREESYNC_POWER_OPTIMIZE + .are_streams_coarse_grain_aligned = dcn20_are_streams_coarse_grain_aligned, +#endif + .blank_stream = dce110_blank_stream, + .enable_audio_stream = dce110_enable_audio_stream, + .disable_audio_stream = dce110_disable_audio_stream, + .disable_plane = dcn20_disable_plane, + .pipe_control_lock = dcn20_pipe_control_lock, + .interdependent_update_lock = dcn10_lock_all_pipes, + .cursor_lock = dcn10_cursor_lock, + .prepare_bandwidth = dcn20_prepare_bandwidth, + .optimize_bandwidth = dcn20_optimize_bandwidth, + .update_bandwidth = dcn20_update_bandwidth, + .set_drr = dcn10_set_drr, + .get_position = dcn10_get_position, + .set_static_screen_control = dcn10_set_static_screen_control, + .setup_stereo = dcn10_setup_stereo, + .set_avmute = dcn30_set_avmute, + .log_hw_state = dcn10_log_hw_state, + .get_hw_state = dcn10_get_hw_state, + .clear_status_bits = dcn10_clear_status_bits, + .wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect, + .edp_backlight_control = dce110_edp_backlight_control, + .edp_power_control = dce110_edp_power_control, + .edp_wait_for_hpd_ready = dce110_edp_wait_for_hpd_ready, + .set_cursor_position = dcn10_set_cursor_position, + .set_cursor_attribute = dcn10_set_cursor_attribute, + .set_cursor_sdr_white_level = dcn10_set_cursor_sdr_white_level, + .setup_periodic_interrupt = dcn10_setup_periodic_interrupt, + .set_clock = dcn10_set_clock, + .get_clock = dcn10_get_clock, + .program_triplebuffer = dcn20_program_triple_buffer, + .enable_writeback = dcn30_enable_writeback, + .disable_writeback = dcn30_disable_writeback, + .update_writeback = dcn30_update_writeback, + .mmhubbub_warmup = dcn30_mmhubbub_warmup, + .dmdata_status_done = dcn20_dmdata_status_done, + .program_dmdata_engine = dcn30_program_dmdata_engine, + .set_dmdata_attributes = dcn20_set_dmdata_attributes, + .init_sys_ctx = dcn20_init_sys_ctx, + .init_vm_ctx = dcn20_init_vm_ctx, + .set_flip_control_gsl = dcn20_set_flip_control_gsl, + .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, + .calc_vupdate_position = dcn10_calc_vupdate_position, + .set_backlight_level = dcn21_set_backlight_level, + .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, + .set_pipe = dcn21_set_pipe, + .enable_lvds_link_output = dce110_enable_lvds_link_output, + .enable_tmds_link_output = dce110_enable_tmds_link_output, + .enable_dp_link_output = dce110_enable_dp_link_output, + .disable_link_output = dce110_disable_link_output, + .set_disp_pattern_generator = dcn30_set_disp_pattern_generator, + .get_dcc_en_bits = dcn10_get_dcc_en_bits, + .optimize_pwr_state = dcn21_optimize_pwr_state, + .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, + .update_visual_confirm_color = dcn10_update_visual_confirm_color, +}; + +static const struct hwseq_private_funcs dcn301_private_funcs = { + .init_pipes = dcn10_init_pipes, + .update_plane_addr = dcn20_update_plane_addr, + .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, + .update_mpcc = dcn20_update_mpcc, + .set_input_transfer_func = dcn30_set_input_transfer_func, + .set_output_transfer_func = dcn30_set_output_transfer_func, + .power_down = dce110_power_down, + .enable_display_power_gating = dcn10_dummy_display_power_gating, + .blank_pixel_data = dcn20_blank_pixel_data, + .reset_hw_ctx_wrap = dcn20_reset_hw_ctx_wrap, + .enable_stream_timing = dcn20_enable_stream_timing, + .edp_backlight_control = dce110_edp_backlight_control, + .disable_stream_gating = dcn20_disable_stream_gating, + .enable_stream_gating = dcn20_enable_stream_gating, + .setup_vupdate_interrupt = dcn20_setup_vupdate_interrupt, + .did_underflow_occur = dcn10_did_underflow_occur, + .init_blank = dcn20_init_blank, + .disable_vga = dcn20_disable_vga, + .bios_golden_init = dcn10_bios_golden_init, + .plane_atomic_disable = dcn20_plane_atomic_disable, + .plane_atomic_power_down = dcn10_plane_atomic_power_down, + .enable_power_gating_plane = dcn20_enable_power_gating_plane, + .dpp_pg_control = dcn20_dpp_pg_control, + .hubp_pg_control = dcn20_hubp_pg_control, + .program_all_writeback_pipes_in_tree = dcn30_program_all_writeback_pipes_in_tree, + .update_odm = dcn20_update_odm, + .dsc_pg_control = dcn20_dsc_pg_control, + .set_hdr_multiplier = dcn10_set_hdr_multiplier, + .verify_allow_pstate_change_high = dcn10_verify_allow_pstate_change_high, + .wait_for_blank_complete = dcn20_wait_for_blank_complete, + .dccg_init = dcn20_dccg_init, + .set_blend_lut = dcn30_set_blend_lut, + .set_shaper_3dlut = dcn20_set_shaper_3dlut, +}; + +void dcn301_hw_sequencer_construct(struct dc *dc) +{ + dc->hwss = dcn301_funcs; + dc->hwseq->funcs = dcn301_private_funcs; +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.h new file mode 100644 index 000000000000..0bca48ccbfa2 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.h @@ -0,0 +1,33 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DC_DCN30_INIT_H__ +#define __DC_DCN30_INIT_H__ + +struct dc; + +void dcn301_hw_sequencer_construct(struct dc *dc); + +#endif /* __DC_DCN30_INIT_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn302/dcn302_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn302/dcn302_init.c new file mode 100644 index 000000000000..637f9514d37b --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn302/dcn302_init.c @@ -0,0 +1,41 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dcn302/dcn302_hwseq.h" + +#include "dcn30/dcn30_init.h" + +#include "dc.h" + +#include "dcn302_init.h" + +void dcn302_hw_sequencer_construct(struct dc *dc) +{ + dcn30_hw_sequencer_construct(dc); + + dc->hwseq->funcs.dpp_pg_control = dcn302_dpp_pg_control; + dc->hwseq->funcs.hubp_pg_control = dcn302_hubp_pg_control; + dc->hwseq->funcs.dsc_pg_control = dcn302_dsc_pg_control; +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn302/dcn302_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn302/dcn302_init.h new file mode 100644 index 000000000000..899587b93aa1 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn302/dcn302_init.h @@ -0,0 +1,33 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DC_DCN302_INIT_H__ +#define __DC_DCN302_INIT_H__ + +struct dc; + +void dcn302_hw_sequencer_construct(struct dc *dc); + +#endif /* __DC_DCN302_INIT_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn303/dcn303_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn303/dcn303_init.c new file mode 100644 index 000000000000..edb4d68b8187 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn303/dcn303_init.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright (C) 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + */ + +#include "dcn303/dcn303_hwseq.h" +#include "dcn30/dcn30_init.h" +#include "dc.h" + +#include "dcn303_init.h" + +void dcn303_hw_sequencer_construct(struct dc *dc) +{ + dcn30_hw_sequencer_construct(dc); + + dc->hwseq->funcs.dpp_pg_control = dcn303_dpp_pg_control; + dc->hwseq->funcs.hubp_pg_control = dcn303_hubp_pg_control; + dc->hwseq->funcs.dsc_pg_control = dcn303_dsc_pg_control; + dc->hwseq->funcs.enable_power_gating_plane = dcn303_enable_power_gating_plane; +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn303/dcn303_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn303/dcn303_init.h new file mode 100644 index 000000000000..4949981126d7 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn303/dcn303_init.h @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright (C) 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + */ + +#ifndef __DC_DCN303_INIT_H__ +#define __DC_DCN303_INIT_H__ + +struct dc; + +void dcn303_hw_sequencer_construct(struct dc *dc); + +#endif /* __DC_DCN303_INIT_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c new file mode 100644 index 000000000000..669f524bd064 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c @@ -0,0 +1,157 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dce110/dce110_hwseq.h" +#include "dcn10/dcn10_hwseq.h" +#include "dcn20/dcn20_hwseq.h" +#include "dcn21/dcn21_hwseq.h" +#include "dcn30/dcn30_hwseq.h" +#include "dcn301/dcn301_hwseq.h" +#include "dcn31/dcn31_hwseq.h" + +#include "dcn31_init.h" + +static const struct hw_sequencer_funcs dcn31_funcs = { + .program_gamut_remap = dcn30_program_gamut_remap, + .init_hw = dcn31_init_hw, + .power_down_on_boot = dcn10_power_down_on_boot, + .apply_ctx_to_hw = dce110_apply_ctx_to_hw, + .apply_ctx_for_surface = NULL, + .program_front_end_for_ctx = dcn20_program_front_end_for_ctx, + .wait_for_pending_cleared = dcn10_wait_for_pending_cleared, + .post_unlock_program_front_end = dcn20_post_unlock_program_front_end, + .update_plane_addr = dcn20_update_plane_addr, + .update_dchub = dcn10_update_dchub, + .update_pending_status = dcn10_update_pending_status, + .program_output_csc = dcn20_program_output_csc, + .enable_accelerated_mode = dce110_enable_accelerated_mode, + .enable_timing_synchronization = dcn10_enable_timing_synchronization, + .enable_per_frame_crtc_position_reset = dcn10_enable_per_frame_crtc_position_reset, + .update_info_frame = dcn31_update_info_frame, + .send_immediate_sdp_message = dcn10_send_immediate_sdp_message, + .enable_stream = dcn20_enable_stream, + .disable_stream = dce110_disable_stream, + .unblank_stream = dcn20_unblank_stream, + .blank_stream = dce110_blank_stream, + .enable_audio_stream = dce110_enable_audio_stream, + .disable_audio_stream = dce110_disable_audio_stream, + .disable_plane = dcn20_disable_plane, + .disable_pixel_data = dcn20_disable_pixel_data, + .pipe_control_lock = dcn20_pipe_control_lock, + .interdependent_update_lock = dcn10_lock_all_pipes, + .cursor_lock = dcn10_cursor_lock, + .prepare_bandwidth = dcn20_prepare_bandwidth, + .optimize_bandwidth = dcn20_optimize_bandwidth, + .update_bandwidth = dcn20_update_bandwidth, + .set_drr = dcn10_set_drr, + .get_position = dcn10_get_position, + .set_static_screen_control = dcn30_set_static_screen_control, + .setup_stereo = dcn10_setup_stereo, + .set_avmute = dcn30_set_avmute, + .log_hw_state = dcn10_log_hw_state, + .get_hw_state = dcn10_get_hw_state, + .clear_status_bits = dcn10_clear_status_bits, + .wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect, + .edp_backlight_control = dce110_edp_backlight_control, + .edp_power_control = dce110_edp_power_control, + .edp_wait_for_T12 = dce110_edp_wait_for_T12, + .edp_wait_for_hpd_ready = dce110_edp_wait_for_hpd_ready, + .set_cursor_position = dcn10_set_cursor_position, + .set_cursor_attribute = dcn10_set_cursor_attribute, + .set_cursor_sdr_white_level = dcn10_set_cursor_sdr_white_level, + .setup_periodic_interrupt = dcn10_setup_periodic_interrupt, + .set_clock = dcn10_set_clock, + .get_clock = dcn10_get_clock, + .program_triplebuffer = dcn20_program_triple_buffer, + .enable_writeback = dcn30_enable_writeback, + .disable_writeback = dcn30_disable_writeback, + .update_writeback = dcn30_update_writeback, + .mmhubbub_warmup = dcn30_mmhubbub_warmup, + .dmdata_status_done = dcn20_dmdata_status_done, + .program_dmdata_engine = dcn30_program_dmdata_engine, + .set_dmdata_attributes = dcn20_set_dmdata_attributes, + .init_sys_ctx = dcn31_init_sys_ctx, + .init_vm_ctx = dcn20_init_vm_ctx, + .set_flip_control_gsl = dcn20_set_flip_control_gsl, + .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, + .calc_vupdate_position = dcn10_calc_vupdate_position, + .power_down = dce110_power_down, + .set_backlight_level = dcn21_set_backlight_level, + .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, + .set_pipe = dcn21_set_pipe, + .enable_lvds_link_output = dce110_enable_lvds_link_output, + .enable_tmds_link_output = dce110_enable_tmds_link_output, + .enable_dp_link_output = dce110_enable_dp_link_output, + .disable_link_output = dce110_disable_link_output, + .z10_restore = dcn31_z10_restore, + .z10_save_init = dcn31_z10_save_init, + .set_disp_pattern_generator = dcn30_set_disp_pattern_generator, + .optimize_pwr_state = dcn21_optimize_pwr_state, + .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, + .update_visual_confirm_color = dcn10_update_visual_confirm_color, +}; + +static const struct hwseq_private_funcs dcn31_private_funcs = { + .init_pipes = dcn10_init_pipes, + .update_plane_addr = dcn20_update_plane_addr, + .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, + .update_mpcc = dcn20_update_mpcc, + .set_input_transfer_func = dcn30_set_input_transfer_func, + .set_output_transfer_func = dcn30_set_output_transfer_func, + .power_down = dce110_power_down, + .enable_display_power_gating = dcn10_dummy_display_power_gating, + .blank_pixel_data = dcn20_blank_pixel_data, + .reset_hw_ctx_wrap = dcn31_reset_hw_ctx_wrap, + .enable_stream_timing = dcn20_enable_stream_timing, + .edp_backlight_control = dce110_edp_backlight_control, + .disable_stream_gating = dcn20_disable_stream_gating, + .enable_stream_gating = dcn20_enable_stream_gating, + .setup_vupdate_interrupt = dcn20_setup_vupdate_interrupt, + .did_underflow_occur = dcn10_did_underflow_occur, + .init_blank = dcn20_init_blank, + .disable_vga = dcn20_disable_vga, + .bios_golden_init = dcn10_bios_golden_init, + .plane_atomic_disable = dcn20_plane_atomic_disable, + .plane_atomic_power_down = dcn10_plane_atomic_power_down, + .enable_power_gating_plane = dcn31_enable_power_gating_plane, + .hubp_pg_control = dcn31_hubp_pg_control, + .program_all_writeback_pipes_in_tree = dcn30_program_all_writeback_pipes_in_tree, + .update_odm = dcn20_update_odm, + .dsc_pg_control = dcn31_dsc_pg_control, + .set_hdr_multiplier = dcn10_set_hdr_multiplier, + .verify_allow_pstate_change_high = dcn10_verify_allow_pstate_change_high, + .wait_for_blank_complete = dcn20_wait_for_blank_complete, + .dccg_init = dcn20_dccg_init, + .set_blend_lut = dcn30_set_blend_lut, + .set_shaper_3dlut = dcn20_set_shaper_3dlut, + .setup_hpo_hw_control = dcn31_setup_hpo_hw_control, +}; + +void dcn31_hw_sequencer_construct(struct dc *dc) +{ + dc->hwss = dcn31_funcs; + dc->hwseq->funcs = dcn31_private_funcs; + +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.h new file mode 100644 index 000000000000..a3db08c8bd35 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.h @@ -0,0 +1,33 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DC_DCN31_INIT_H__ +#define __DC_DCN31_INIT_H__ + +struct dc; + +void dcn31_hw_sequencer_construct(struct dc *dc); + +#endif /* __DC_DCN31_INIT_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c new file mode 100644 index 000000000000..ccb7e317e86a --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c @@ -0,0 +1,163 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dce110/dce110_hwseq.h" +#include "dcn10/dcn10_hwseq.h" +#include "dcn20/dcn20_hwseq.h" +#include "dcn21/dcn21_hwseq.h" +#include "dcn30/dcn30_hwseq.h" +#include "dcn301/dcn301_hwseq.h" +#include "dcn31/dcn31_hwseq.h" +#include "dcn314/dcn314_hwseq.h" + +#include "dcn314_init.h" + +static const struct hw_sequencer_funcs dcn314_funcs = { + .program_gamut_remap = dcn30_program_gamut_remap, + .init_hw = dcn31_init_hw, + .power_down_on_boot = dcn10_power_down_on_boot, + .apply_ctx_to_hw = dce110_apply_ctx_to_hw, + .apply_ctx_for_surface = NULL, + .program_front_end_for_ctx = dcn20_program_front_end_for_ctx, + .wait_for_pending_cleared = dcn10_wait_for_pending_cleared, + .post_unlock_program_front_end = dcn20_post_unlock_program_front_end, + .update_plane_addr = dcn20_update_plane_addr, + .update_dchub = dcn10_update_dchub, + .update_pending_status = dcn10_update_pending_status, + .program_output_csc = dcn20_program_output_csc, + .enable_accelerated_mode = dce110_enable_accelerated_mode, + .enable_timing_synchronization = dcn10_enable_timing_synchronization, + .enable_per_frame_crtc_position_reset = dcn10_enable_per_frame_crtc_position_reset, + .update_info_frame = dcn31_update_info_frame, + .send_immediate_sdp_message = dcn10_send_immediate_sdp_message, + .enable_stream = dcn20_enable_stream, + .disable_stream = dce110_disable_stream, + .unblank_stream = dcn20_unblank_stream, + .blank_stream = dce110_blank_stream, + .enable_audio_stream = dce110_enable_audio_stream, + .disable_audio_stream = dce110_disable_audio_stream, + .disable_plane = dcn20_disable_plane, + .disable_pixel_data = dcn20_disable_pixel_data, + .pipe_control_lock = dcn20_pipe_control_lock, + .interdependent_update_lock = dcn10_lock_all_pipes, + .cursor_lock = dcn10_cursor_lock, + .prepare_bandwidth = dcn20_prepare_bandwidth, + .optimize_bandwidth = dcn20_optimize_bandwidth, + .update_bandwidth = dcn20_update_bandwidth, + .set_drr = dcn10_set_drr, + .get_position = dcn10_get_position, + .set_static_screen_control = dcn30_set_static_screen_control, + .setup_stereo = dcn10_setup_stereo, + .set_avmute = dcn30_set_avmute, + .log_hw_state = dcn10_log_hw_state, + .get_hw_state = dcn10_get_hw_state, + .clear_status_bits = dcn10_clear_status_bits, + .wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect, + .edp_backlight_control = dce110_edp_backlight_control, + .edp_power_control = dce110_edp_power_control, + .edp_wait_for_T12 = dce110_edp_wait_for_T12, + .edp_wait_for_hpd_ready = dce110_edp_wait_for_hpd_ready, + .set_cursor_position = dcn10_set_cursor_position, + .set_cursor_attribute = dcn10_set_cursor_attribute, + .set_cursor_sdr_white_level = dcn10_set_cursor_sdr_white_level, + .setup_periodic_interrupt = dcn10_setup_periodic_interrupt, + .set_clock = dcn10_set_clock, + .get_clock = dcn10_get_clock, + .program_triplebuffer = dcn20_program_triple_buffer, + .enable_writeback = dcn30_enable_writeback, + .disable_writeback = dcn30_disable_writeback, + .update_writeback = dcn30_update_writeback, + .mmhubbub_warmup = dcn30_mmhubbub_warmup, + .dmdata_status_done = dcn20_dmdata_status_done, + .program_dmdata_engine = dcn30_program_dmdata_engine, + .set_dmdata_attributes = dcn20_set_dmdata_attributes, + .init_sys_ctx = dcn31_init_sys_ctx, + .init_vm_ctx = dcn20_init_vm_ctx, + .set_flip_control_gsl = dcn20_set_flip_control_gsl, + .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, + .calc_vupdate_position = dcn10_calc_vupdate_position, + .power_down = dce110_power_down, + .set_backlight_level = dcn21_set_backlight_level, + .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, + .set_pipe = dcn21_set_pipe, + .enable_lvds_link_output = dce110_enable_lvds_link_output, + .enable_tmds_link_output = dce110_enable_tmds_link_output, + .enable_dp_link_output = dce110_enable_dp_link_output, + .disable_link_output = dcn314_disable_link_output, + .z10_restore = dcn31_z10_restore, + .z10_save_init = dcn31_z10_save_init, + .set_disp_pattern_generator = dcn30_set_disp_pattern_generator, + .optimize_pwr_state = dcn21_optimize_pwr_state, + .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, + .update_visual_confirm_color = dcn10_update_visual_confirm_color, +}; + +static const struct hwseq_private_funcs dcn314_private_funcs = { + .init_pipes = dcn10_init_pipes, + .update_plane_addr = dcn20_update_plane_addr, + .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, + .update_mpcc = dcn20_update_mpcc, + .set_input_transfer_func = dcn30_set_input_transfer_func, + .set_output_transfer_func = dcn30_set_output_transfer_func, + .power_down = dce110_power_down, + .enable_display_power_gating = dcn10_dummy_display_power_gating, + .blank_pixel_data = dcn20_blank_pixel_data, + .reset_hw_ctx_wrap = dcn31_reset_hw_ctx_wrap, + .enable_stream_timing = dcn20_enable_stream_timing, + .edp_backlight_control = dce110_edp_backlight_control, + .disable_stream_gating = dcn20_disable_stream_gating, + .enable_stream_gating = dcn20_enable_stream_gating, + .setup_vupdate_interrupt = dcn20_setup_vupdate_interrupt, + .did_underflow_occur = dcn10_did_underflow_occur, + .init_blank = dcn20_init_blank, + .disable_vga = dcn20_disable_vga, + .bios_golden_init = dcn10_bios_golden_init, + .plane_atomic_disable = dcn20_plane_atomic_disable, + .plane_atomic_power_down = dcn10_plane_atomic_power_down, + .enable_power_gating_plane = dcn314_enable_power_gating_plane, + .dpp_root_clock_control = dcn314_dpp_root_clock_control, + .hubp_pg_control = dcn31_hubp_pg_control, + .program_all_writeback_pipes_in_tree = dcn30_program_all_writeback_pipes_in_tree, + .update_odm = dcn314_update_odm, + .dsc_pg_control = dcn314_dsc_pg_control, + .set_hdr_multiplier = dcn10_set_hdr_multiplier, + .verify_allow_pstate_change_high = dcn10_verify_allow_pstate_change_high, + .wait_for_blank_complete = dcn20_wait_for_blank_complete, + .dccg_init = dcn20_dccg_init, + .set_blend_lut = dcn30_set_blend_lut, + .set_shaper_3dlut = dcn20_set_shaper_3dlut, + .setup_hpo_hw_control = dcn31_setup_hpo_hw_control, + .calculate_dccg_k1_k2_values = dcn314_calculate_dccg_k1_k2_values, + .set_pixels_per_cycle = dcn314_set_pixels_per_cycle, + .resync_fifo_dccg_dio = dcn314_resync_fifo_dccg_dio, +}; + +void dcn314_hw_sequencer_construct(struct dc *dc) +{ + dc->hwss = dcn314_funcs; + dc->hwseq->funcs = dcn314_private_funcs; + +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.h new file mode 100644 index 000000000000..8f92e66577cf --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DC_DCN314_INIT_H__ +#define __DC_DCN314_INIT_H__ + +struct dc; + +void dcn314_hw_sequencer_construct(struct dc *dc); + +#endif /* __DC_DCN314_INIT_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c new file mode 100644 index 000000000000..427cfc8c24a4 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c @@ -0,0 +1,169 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dce110/dce110_hwseq.h" +#include "dcn10/dcn10_hwseq.h" +#include "dcn20/dcn20_hwseq.h" +#include "dcn21/dcn21_hwseq.h" +#include "dcn30/dcn30_hwseq.h" +#include "dcn31/dcn31_hwseq.h" +#include "dcn32/dcn32_hwseq.h" +#include "dcn32_init.h" + +static const struct hw_sequencer_funcs dcn32_funcs = { + .program_gamut_remap = dcn30_program_gamut_remap, + .init_hw = dcn32_init_hw, + .apply_ctx_to_hw = dce110_apply_ctx_to_hw, + .apply_ctx_for_surface = NULL, + .program_front_end_for_ctx = dcn20_program_front_end_for_ctx, + .wait_for_pending_cleared = dcn10_wait_for_pending_cleared, + .post_unlock_program_front_end = dcn20_post_unlock_program_front_end, + .update_plane_addr = dcn20_update_plane_addr, + .update_dchub = dcn10_update_dchub, + .update_pending_status = dcn10_update_pending_status, + .program_output_csc = dcn20_program_output_csc, + .enable_accelerated_mode = dce110_enable_accelerated_mode, + .enable_timing_synchronization = dcn10_enable_timing_synchronization, + .enable_per_frame_crtc_position_reset = dcn10_enable_per_frame_crtc_position_reset, + .update_info_frame = dcn31_update_info_frame, + .send_immediate_sdp_message = dcn10_send_immediate_sdp_message, + .enable_stream = dcn20_enable_stream, + .disable_stream = dce110_disable_stream, + .unblank_stream = dcn32_unblank_stream, + .blank_stream = dce110_blank_stream, + .enable_audio_stream = dce110_enable_audio_stream, + .disable_audio_stream = dce110_disable_audio_stream, + .disable_plane = dcn20_disable_plane, + .disable_pixel_data = dcn20_disable_pixel_data, + .pipe_control_lock = dcn20_pipe_control_lock, + .interdependent_update_lock = dcn10_lock_all_pipes, + .cursor_lock = dcn10_cursor_lock, + .prepare_bandwidth = dcn32_prepare_bandwidth, + .optimize_bandwidth = dcn20_optimize_bandwidth, + .update_bandwidth = dcn20_update_bandwidth, + .set_drr = dcn10_set_drr, + .get_position = dcn10_get_position, + .set_static_screen_control = dcn30_set_static_screen_control, + .setup_stereo = dcn10_setup_stereo, + .set_avmute = dcn30_set_avmute, + .log_hw_state = dcn10_log_hw_state, + .get_hw_state = dcn10_get_hw_state, + .clear_status_bits = dcn10_clear_status_bits, + .wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect, + .edp_backlight_control = dce110_edp_backlight_control, + .edp_power_control = dce110_edp_power_control, + .edp_wait_for_hpd_ready = dce110_edp_wait_for_hpd_ready, + .edp_wait_for_T12 = dce110_edp_wait_for_T12, + .set_cursor_position = dcn10_set_cursor_position, + .set_cursor_attribute = dcn10_set_cursor_attribute, + .set_cursor_sdr_white_level = dcn10_set_cursor_sdr_white_level, + .setup_periodic_interrupt = dcn10_setup_periodic_interrupt, + .set_clock = dcn10_set_clock, + .get_clock = dcn10_get_clock, + .program_triplebuffer = dcn20_program_triple_buffer, + .enable_writeback = dcn30_enable_writeback, + .disable_writeback = dcn30_disable_writeback, + .update_writeback = dcn30_update_writeback, + .mmhubbub_warmup = dcn30_mmhubbub_warmup, + .dmdata_status_done = dcn20_dmdata_status_done, + .program_dmdata_engine = dcn30_program_dmdata_engine, + .set_dmdata_attributes = dcn20_set_dmdata_attributes, + .init_sys_ctx = dcn20_init_sys_ctx, + .init_vm_ctx = dcn20_init_vm_ctx, + .set_flip_control_gsl = dcn20_set_flip_control_gsl, + .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, + .calc_vupdate_position = dcn10_calc_vupdate_position, + .apply_idle_power_optimizations = dcn32_apply_idle_power_optimizations, + .does_plane_fit_in_mall = NULL, + .set_backlight_level = dcn21_set_backlight_level, + .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, + .hardware_release = dcn30_hardware_release, + .set_pipe = dcn21_set_pipe, + .enable_lvds_link_output = dce110_enable_lvds_link_output, + .enable_tmds_link_output = dce110_enable_tmds_link_output, + .enable_dp_link_output = dce110_enable_dp_link_output, + .disable_link_output = dcn32_disable_link_output, + .set_disp_pattern_generator = dcn30_set_disp_pattern_generator, + .get_dcc_en_bits = dcn10_get_dcc_en_bits, + .commit_subvp_config = dcn32_commit_subvp_config, + .enable_phantom_streams = dcn32_enable_phantom_streams, + .subvp_pipe_control_lock = dcn32_subvp_pipe_control_lock, + .update_visual_confirm_color = dcn10_update_visual_confirm_color, + .subvp_pipe_control_lock_fast = dcn32_subvp_pipe_control_lock_fast, + .update_phantom_vp_position = dcn32_update_phantom_vp_position, + .update_dsc_pg = dcn32_update_dsc_pg, + .apply_update_flags_for_phantom = dcn32_apply_update_flags_for_phantom, + .blank_phantom = dcn32_blank_phantom, + .is_pipe_topology_transition_seamless = dcn32_is_pipe_topology_transition_seamless, +}; + +static const struct hwseq_private_funcs dcn32_private_funcs = { + .init_pipes = dcn10_init_pipes, + .update_plane_addr = dcn20_update_plane_addr, + .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, + .update_mpcc = dcn20_update_mpcc, + .set_input_transfer_func = dcn32_set_input_transfer_func, + .set_output_transfer_func = dcn32_set_output_transfer_func, + .power_down = dce110_power_down, + .enable_display_power_gating = dcn10_dummy_display_power_gating, + .blank_pixel_data = dcn20_blank_pixel_data, + .reset_hw_ctx_wrap = dcn20_reset_hw_ctx_wrap, + .enable_stream_timing = dcn20_enable_stream_timing, + .edp_backlight_control = dce110_edp_backlight_control, + .disable_stream_gating = dcn20_disable_stream_gating, + .enable_stream_gating = dcn20_enable_stream_gating, + .setup_vupdate_interrupt = dcn20_setup_vupdate_interrupt, + .did_underflow_occur = dcn10_did_underflow_occur, + .init_blank = dcn32_init_blank, + .disable_vga = dcn20_disable_vga, + .bios_golden_init = dcn10_bios_golden_init, + .plane_atomic_disable = dcn20_plane_atomic_disable, + .plane_atomic_power_down = dcn10_plane_atomic_power_down, + .enable_power_gating_plane = dcn32_enable_power_gating_plane, + .hubp_pg_control = dcn32_hubp_pg_control, + .program_all_writeback_pipes_in_tree = dcn30_program_all_writeback_pipes_in_tree, + .update_odm = dcn32_update_odm, + .dsc_pg_control = dcn32_dsc_pg_control, + .dsc_pg_status = dcn32_dsc_pg_status, + .set_hdr_multiplier = dcn10_set_hdr_multiplier, + .verify_allow_pstate_change_high = dcn10_verify_allow_pstate_change_high, + .wait_for_blank_complete = dcn20_wait_for_blank_complete, + .dccg_init = dcn20_dccg_init, + .set_mcm_luts = dcn32_set_mcm_luts, + .program_mall_pipe_config = dcn32_program_mall_pipe_config, + .update_force_pstate = dcn32_update_force_pstate, + .update_mall_sel = dcn32_update_mall_sel, + .calculate_dccg_k1_k2_values = dcn32_calculate_dccg_k1_k2_values, + .set_pixels_per_cycle = dcn32_set_pixels_per_cycle, + .resync_fifo_dccg_dio = dcn32_resync_fifo_dccg_dio, + .is_dp_dig_pixel_rate_div_policy = dcn32_is_dp_dig_pixel_rate_div_policy, +}; + +void dcn32_hw_sequencer_init_functions(struct dc *dc) +{ + dc->hwss = dcn32_funcs; + dc->hwseq->funcs = dcn32_private_funcs; + +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.h new file mode 100644 index 000000000000..89a591eb2c23 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.h @@ -0,0 +1,33 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DC_DCN32_INIT_H__ +#define __DC_DCN32_INIT_H__ + +struct dc; + +void dcn32_hw_sequencer_init_functions(struct dc *dc); + +#endif /* __DC_DCN32_INIT_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c new file mode 100644 index 000000000000..a630aa77dcec --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c @@ -0,0 +1,172 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dce110/dce110_hwseq.h" +#include "dcn10/dcn10_hwseq.h" +#include "dcn20/dcn20_hwseq.h" +#include "dcn21/dcn21_hwseq.h" +#include "dcn30/dcn30_hwseq.h" +#include "dcn301/dcn301_hwseq.h" +#include "dcn31/dcn31_hwseq.h" +#include "dcn32/dcn32_hwseq.h" +#include "dcn35/dcn35_hwseq.h" + +#include "dcn35_init.h" + +static const struct hw_sequencer_funcs dcn35_funcs = { + .program_gamut_remap = dcn30_program_gamut_remap, + .init_hw = dcn35_init_hw, + .power_down_on_boot = dcn35_power_down_on_boot, + .apply_ctx_to_hw = dce110_apply_ctx_to_hw, + .apply_ctx_for_surface = NULL, + .program_front_end_for_ctx = dcn20_program_front_end_for_ctx, + .wait_for_pending_cleared = dcn10_wait_for_pending_cleared, + .post_unlock_program_front_end = dcn20_post_unlock_program_front_end, + .update_plane_addr = dcn20_update_plane_addr, + .update_dchub = dcn10_update_dchub, + .update_pending_status = dcn10_update_pending_status, + .program_output_csc = dcn20_program_output_csc, + .enable_accelerated_mode = dce110_enable_accelerated_mode, + .enable_timing_synchronization = dcn10_enable_timing_synchronization, + .enable_per_frame_crtc_position_reset = dcn10_enable_per_frame_crtc_position_reset, + .update_info_frame = dcn31_update_info_frame, + .send_immediate_sdp_message = dcn10_send_immediate_sdp_message, + .enable_stream = dcn20_enable_stream, + .disable_stream = dce110_disable_stream, + .unblank_stream = dcn32_unblank_stream, + .blank_stream = dce110_blank_stream, + .enable_audio_stream = dce110_enable_audio_stream, + .disable_audio_stream = dce110_disable_audio_stream, + .disable_plane = dcn35_disable_plane, + .disable_pixel_data = dcn20_disable_pixel_data, + .pipe_control_lock = dcn20_pipe_control_lock, + .interdependent_update_lock = dcn10_lock_all_pipes, + .cursor_lock = dcn10_cursor_lock, + .prepare_bandwidth = dcn35_prepare_bandwidth, + .optimize_bandwidth = dcn35_optimize_bandwidth, + .update_bandwidth = dcn20_update_bandwidth, + .set_drr = dcn35_set_drr, + .get_position = dcn10_get_position, + .set_static_screen_control = dcn30_set_static_screen_control, + .setup_stereo = dcn10_setup_stereo, + .set_avmute = dcn30_set_avmute, + .log_hw_state = dcn10_log_hw_state, + .get_hw_state = dcn10_get_hw_state, + .clear_status_bits = dcn10_clear_status_bits, + .wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect, + .edp_backlight_control = dce110_edp_backlight_control, + .edp_power_control = dce110_edp_power_control, + .edp_wait_for_T12 = dce110_edp_wait_for_T12, + .edp_wait_for_hpd_ready = dce110_edp_wait_for_hpd_ready, + .set_cursor_position = dcn10_set_cursor_position, + .set_cursor_attribute = dcn10_set_cursor_attribute, + .set_cursor_sdr_white_level = dcn10_set_cursor_sdr_white_level, + .setup_periodic_interrupt = dcn10_setup_periodic_interrupt, + .set_clock = dcn10_set_clock, + .get_clock = dcn10_get_clock, + .program_triplebuffer = dcn20_program_triple_buffer, + .enable_writeback = dcn30_enable_writeback, + .disable_writeback = dcn30_disable_writeback, + .update_writeback = dcn30_update_writeback, + .mmhubbub_warmup = dcn30_mmhubbub_warmup, + .dmdata_status_done = dcn20_dmdata_status_done, + .program_dmdata_engine = dcn30_program_dmdata_engine, + .set_dmdata_attributes = dcn20_set_dmdata_attributes, + .init_sys_ctx = dcn31_init_sys_ctx, + .init_vm_ctx = dcn20_init_vm_ctx, + .set_flip_control_gsl = dcn20_set_flip_control_gsl, + .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, + .calc_vupdate_position = dcn10_calc_vupdate_position, + .power_down = dce110_power_down, + .set_backlight_level = dcn21_set_backlight_level, + .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, + .set_pipe = dcn21_set_pipe, + .enable_lvds_link_output = dce110_enable_lvds_link_output, + .enable_tmds_link_output = dce110_enable_tmds_link_output, + .enable_dp_link_output = dce110_enable_dp_link_output, + .disable_link_output = dcn32_disable_link_output, + .z10_restore = dcn35_z10_restore, + .z10_save_init = dcn31_z10_save_init, + .set_disp_pattern_generator = dcn30_set_disp_pattern_generator, + .optimize_pwr_state = dcn21_optimize_pwr_state, + .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, + .update_visual_confirm_color = dcn10_update_visual_confirm_color, + .apply_idle_power_optimizations = dcn35_apply_idle_power_optimizations, + .update_dsc_pg = dcn32_update_dsc_pg, + .calc_blocks_to_gate = dcn35_calc_blocks_to_gate, + .calc_blocks_to_ungate = dcn35_calc_blocks_to_ungate, + .hw_block_power_up = dcn35_hw_block_power_up, + .hw_block_power_down = dcn35_hw_block_power_down, + .root_clock_control = dcn35_root_clock_control, + .set_idle_state = dcn35_set_idle_state, + .get_idle_state = dcn35_get_idle_state +}; + +static const struct hwseq_private_funcs dcn35_private_funcs = { + .init_pipes = dcn35_init_pipes, + .update_plane_addr = dcn20_update_plane_addr, + .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, + .update_mpcc = dcn20_update_mpcc, + .set_input_transfer_func = dcn32_set_input_transfer_func, + .set_output_transfer_func = dcn32_set_output_transfer_func, + .power_down = dce110_power_down, + .enable_display_power_gating = dcn10_dummy_display_power_gating, + .blank_pixel_data = dcn20_blank_pixel_data, + .reset_hw_ctx_wrap = dcn31_reset_hw_ctx_wrap, + .enable_stream_timing = dcn20_enable_stream_timing, + .edp_backlight_control = dce110_edp_backlight_control, + .setup_vupdate_interrupt = dcn20_setup_vupdate_interrupt, + .did_underflow_occur = dcn10_did_underflow_occur, + .init_blank = dcn20_init_blank, + .disable_vga = NULL, + .bios_golden_init = dcn10_bios_golden_init, + .plane_atomic_disable = dcn35_plane_atomic_disable, + //.plane_atomic_disable = dcn20_plane_atomic_disable,/*todo*/ + //.hubp_pg_control = dcn35_hubp_pg_control, + .enable_power_gating_plane = dcn35_enable_power_gating_plane, + .dpp_root_clock_control = dcn35_dpp_root_clock_control, + .program_all_writeback_pipes_in_tree = dcn30_program_all_writeback_pipes_in_tree, + .update_odm = dcn35_update_odm, + .set_hdr_multiplier = dcn10_set_hdr_multiplier, + .verify_allow_pstate_change_high = dcn10_verify_allow_pstate_change_high, + .wait_for_blank_complete = dcn20_wait_for_blank_complete, + .dccg_init = dcn20_dccg_init, + .set_mcm_luts = dcn32_set_mcm_luts, + .setup_hpo_hw_control = dcn35_setup_hpo_hw_control, + .calculate_dccg_k1_k2_values = dcn32_calculate_dccg_k1_k2_values, + .set_pixels_per_cycle = dcn32_set_pixels_per_cycle, + .is_dp_dig_pixel_rate_div_policy = dcn32_is_dp_dig_pixel_rate_div_policy, + .dsc_pg_control = dcn35_dsc_pg_control, + .dsc_pg_status = dcn32_dsc_pg_status, + .enable_plane = dcn35_enable_plane, +}; + +void dcn35_hw_sequencer_construct(struct dc *dc) +{ + dc->hwss = dcn35_funcs; + dc->hwseq->funcs = dcn35_private_funcs; + +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.h new file mode 100644 index 000000000000..b67015032c35 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DC_DCN35_INIT_H__ +#define __DC_DCN35_INIT_H__ + +struct dc; + +void dcn35_hw_sequencer_construct(struct dc *dc); + +#endif /* __DC_DCN35_INIT_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/CMakeLists.txt b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/CMakeLists.txt new file mode 100644 index 000000000000..951ca2da4486 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/CMakeLists.txt @@ -0,0 +1,4 @@ +dal3_subdirectory_sources( + dcn351_init.c + dcn351_init.h +) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/Makefile b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/Makefile new file mode 100644 index 000000000000..b24ad27fe6ef --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/Makefile @@ -0,0 +1,17 @@ +# +# (c) Copyright 2022 Advanced Micro Devices, Inc. All the rights reserved +# +# All rights reserved. This notice is intended as a precaution against +# inadvertent publication and does not imply publication or any waiver +# of confidentiality. The year included in the foregoing notice is the +# year of creation of the work. +# +# Authors: AMD +# +# Makefile for DCN351. + +DCN351 = dcn351_init.o + +AMD_DAL_DCN351 = $(addprefix $(AMDDALPATH)/dc/dcn351/,$(DCN351)) + +AMD_DISPLAY_FILES += $(AMD_DAL_DCN351) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c new file mode 100644 index 000000000000..143d3fc0221c --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c @@ -0,0 +1,171 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dce110/dce110_hwseq.h" +#include "dcn10/dcn10_hwseq.h" +#include "dcn20/dcn20_hwseq.h" +#include "dcn21/dcn21_hwseq.h" +#include "dcn30/dcn30_hwseq.h" +#include "dcn301/dcn301_hwseq.h" +#include "dcn31/dcn31_hwseq.h" +#include "dcn32/dcn32_hwseq.h" +#include "dcn35/dcn35_hwseq.h" + +#include "dcn351_init.h" + +static const struct hw_sequencer_funcs dcn351_funcs = { + .program_gamut_remap = dcn30_program_gamut_remap, + .init_hw = dcn35_init_hw, + .power_down_on_boot = dcn35_power_down_on_boot, + .apply_ctx_to_hw = dce110_apply_ctx_to_hw, + .apply_ctx_for_surface = NULL, + .program_front_end_for_ctx = dcn20_program_front_end_for_ctx, + .wait_for_pending_cleared = dcn10_wait_for_pending_cleared, + .post_unlock_program_front_end = dcn20_post_unlock_program_front_end, + .update_plane_addr = dcn20_update_plane_addr, + .update_dchub = dcn10_update_dchub, + .update_pending_status = dcn10_update_pending_status, + .program_output_csc = dcn20_program_output_csc, + .enable_accelerated_mode = dce110_enable_accelerated_mode, + .enable_timing_synchronization = dcn10_enable_timing_synchronization, + .enable_per_frame_crtc_position_reset = dcn10_enable_per_frame_crtc_position_reset, + .update_info_frame = dcn31_update_info_frame, + .send_immediate_sdp_message = dcn10_send_immediate_sdp_message, + .enable_stream = dcn20_enable_stream, + .disable_stream = dce110_disable_stream, + .unblank_stream = dcn32_unblank_stream, + .blank_stream = dce110_blank_stream, + .enable_audio_stream = dce110_enable_audio_stream, + .disable_audio_stream = dce110_disable_audio_stream, + .disable_plane = dcn35_disable_plane, + .disable_pixel_data = dcn20_disable_pixel_data, + .pipe_control_lock = dcn20_pipe_control_lock, + .interdependent_update_lock = dcn10_lock_all_pipes, + .cursor_lock = dcn10_cursor_lock, + .prepare_bandwidth = dcn35_prepare_bandwidth, + .optimize_bandwidth = dcn35_optimize_bandwidth, + .update_bandwidth = dcn20_update_bandwidth, + .set_drr = dcn10_set_drr, + .get_position = dcn10_get_position, + .set_static_screen_control = dcn30_set_static_screen_control, + .setup_stereo = dcn10_setup_stereo, + .set_avmute = dcn30_set_avmute, + .log_hw_state = dcn10_log_hw_state, + .get_hw_state = dcn10_get_hw_state, + .clear_status_bits = dcn10_clear_status_bits, + .wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect, + .edp_backlight_control = dce110_edp_backlight_control, + .edp_power_control = dce110_edp_power_control, + .edp_wait_for_T12 = dce110_edp_wait_for_T12, + .edp_wait_for_hpd_ready = dce110_edp_wait_for_hpd_ready, + .set_cursor_position = dcn10_set_cursor_position, + .set_cursor_attribute = dcn10_set_cursor_attribute, + .set_cursor_sdr_white_level = dcn10_set_cursor_sdr_white_level, + .setup_periodic_interrupt = dcn10_setup_periodic_interrupt, + .set_clock = dcn10_set_clock, + .get_clock = dcn10_get_clock, + .program_triplebuffer = dcn20_program_triple_buffer, + .enable_writeback = dcn30_enable_writeback, + .disable_writeback = dcn30_disable_writeback, + .update_writeback = dcn30_update_writeback, + .mmhubbub_warmup = dcn30_mmhubbub_warmup, + .dmdata_status_done = dcn20_dmdata_status_done, + .program_dmdata_engine = dcn30_program_dmdata_engine, + .set_dmdata_attributes = dcn20_set_dmdata_attributes, + .init_sys_ctx = dcn31_init_sys_ctx, + .init_vm_ctx = dcn20_init_vm_ctx, + .set_flip_control_gsl = dcn20_set_flip_control_gsl, + .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, + .calc_vupdate_position = dcn10_calc_vupdate_position, + .power_down = dce110_power_down, + .set_backlight_level = dcn21_set_backlight_level, + .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, + .set_pipe = dcn21_set_pipe, + .enable_lvds_link_output = dce110_enable_lvds_link_output, + .enable_tmds_link_output = dce110_enable_tmds_link_output, + .enable_dp_link_output = dce110_enable_dp_link_output, + .disable_link_output = dcn32_disable_link_output, + .z10_restore = dcn35_z10_restore, + .z10_save_init = dcn31_z10_save_init, + .set_disp_pattern_generator = dcn30_set_disp_pattern_generator, + .optimize_pwr_state = dcn21_optimize_pwr_state, + .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, + .update_visual_confirm_color = dcn10_update_visual_confirm_color, + .apply_idle_power_optimizations = dcn35_apply_idle_power_optimizations, + .update_dsc_pg = dcn32_update_dsc_pg, + .calc_blocks_to_gate = dcn35_calc_blocks_to_gate, + .calc_blocks_to_ungate = dcn35_calc_blocks_to_ungate, + .hw_block_power_up = dcn35_hw_block_power_up, + .hw_block_power_down = dcn35_hw_block_power_down, + .root_clock_control = dcn35_root_clock_control, + .set_idle_state = dcn35_set_idle_state, + .get_idle_state = dcn35_get_idle_state +}; + +static const struct hwseq_private_funcs dcn351_private_funcs = { + .init_pipes = dcn35_init_pipes, + .update_plane_addr = dcn20_update_plane_addr, + .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, + .update_mpcc = dcn20_update_mpcc, + .set_input_transfer_func = dcn32_set_input_transfer_func, + .set_output_transfer_func = dcn32_set_output_transfer_func, + .power_down = dce110_power_down, + .enable_display_power_gating = dcn10_dummy_display_power_gating, + .blank_pixel_data = dcn20_blank_pixel_data, + .reset_hw_ctx_wrap = dcn31_reset_hw_ctx_wrap, + .enable_stream_timing = dcn20_enable_stream_timing, + .edp_backlight_control = dce110_edp_backlight_control, + .setup_vupdate_interrupt = dcn20_setup_vupdate_interrupt, + .did_underflow_occur = dcn10_did_underflow_occur, + .init_blank = dcn20_init_blank, + .disable_vga = NULL, + .bios_golden_init = dcn10_bios_golden_init, + .plane_atomic_disable = dcn35_plane_atomic_disable, + //.plane_atomic_disable = dcn20_plane_atomic_disable,/*todo*/ + //.hubp_pg_control = dcn35_hubp_pg_control, + .enable_power_gating_plane = dcn35_enable_power_gating_plane, + .dpp_root_clock_control = dcn35_dpp_root_clock_control, + .program_all_writeback_pipes_in_tree = dcn30_program_all_writeback_pipes_in_tree, + .update_odm = dcn35_update_odm, + .set_hdr_multiplier = dcn10_set_hdr_multiplier, + .verify_allow_pstate_change_high = dcn10_verify_allow_pstate_change_high, + .wait_for_blank_complete = dcn20_wait_for_blank_complete, + .dccg_init = dcn20_dccg_init, + .set_mcm_luts = dcn32_set_mcm_luts, + .setup_hpo_hw_control = dcn35_setup_hpo_hw_control, + .calculate_dccg_k1_k2_values = dcn32_calculate_dccg_k1_k2_values, + .set_pixels_per_cycle = dcn32_set_pixels_per_cycle, + .is_dp_dig_pixel_rate_div_policy = dcn32_is_dp_dig_pixel_rate_div_policy, + .dsc_pg_control = dcn35_dsc_pg_control, + .dsc_pg_status = dcn32_dsc_pg_status, + .enable_plane = dcn35_enable_plane, +}; + +void dcn351_hw_sequencer_construct(struct dc *dc) +{ + dc->hwss = dcn351_funcs; + dc->hwseq->funcs = dcn351_private_funcs; + +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.h new file mode 100644 index 000000000000..970b01008b23 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.h @@ -0,0 +1,33 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DC_DCN351_INIT_H__ +#define __DC_DCN351_INIT_H__ + +struct dc; + +void dcn351_hw_sequencer_construct(struct dc *dc); + +#endif /* __DC_DCN351_INIT_H__ */ -- cgit v1.2.3 From 9ade4870b87b09e1f132ba92c1ab13a6769d1b0f Mon Sep 17 00:00:00 2001 From: Gabe Teeger Date: Thu, 14 Dec 2023 20:56:58 -0500 Subject: drm/amd/display: Fix Mismatch between pipe and stream [Why] Failing mode validation during dc_commit, leading to blackscreen with an 8k DP2 display during mode change. [What] Fix mixmatch between pipe and stream, which prevented us from recognizing the link as DP2. Tested-by: Daniel Wheeler Reviewed-by: Dmytro Laktyushkin Acked-by: Rodrigo Siqueira Signed-off-by: Gabe Teeger Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dml2/dml2_translation_helper.c | 16 +++++++++++++--- drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c | 6 +++--- 2 files changed, 16 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index bc2959fda5be..fa6a93dd9629 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -1049,9 +1049,10 @@ static void dml2_populate_pipe_to_plane_index_mapping(struct dml2_context *dml2, void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_state *context, struct dml_display_cfg_st *dml_dispcfg) { - int i = 0, j = 0; + int i = 0, j = 0, k = 0; int disp_cfg_stream_location, disp_cfg_plane_location; enum mall_stream_type stream_mall_type; + struct pipe_ctx *current_pipe_context; for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id_valid[i] = false; @@ -1071,6 +1072,15 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat dml2_populate_pipe_to_plane_index_mapping(dml2, context); for (i = 0; i < context->stream_count; i++) { + current_pipe_context = NULL; + for (k = 0; k < MAX_PIPES; k++) { + /* find one pipe allocated to this stream for the purpose of getting + info about the link later */ + if (context->streams[i] == context->res_ctx.pipe_ctx[k].stream) { + current_pipe_context = &context->res_ctx.pipe_ctx[k]; + break; + } + } disp_cfg_stream_location = map_stream_to_dml_display_cfg(dml2, context->streams[i], dml_dispcfg); stream_mall_type = dc_state_get_stream_subvp_type(context, context->streams[i]); @@ -1080,7 +1090,7 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat ASSERT(disp_cfg_stream_location >= 0 && disp_cfg_stream_location <= __DML2_WRAPPER_MAX_STREAMS_PLANES__); populate_dml_timing_cfg_from_stream_state(&dml_dispcfg->timing, disp_cfg_stream_location, context->streams[i]); - populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_stream_location, context->streams[i], &context->res_ctx.pipe_ctx[i]); + populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_stream_location, context->streams[i], current_pipe_context); switch (context->streams[i]->debug.force_odm_combine_segments) { case 2: dml2->v20.dml_core_ctx.policy.ODMUse[disp_cfg_stream_location] = dml_odm_use_policy_combine_2to1; @@ -1137,7 +1147,7 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat if (j >= 1) { populate_dml_timing_cfg_from_stream_state(&dml_dispcfg->timing, disp_cfg_plane_location, context->streams[i]); - populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_plane_location, context->streams[i], &context->res_ctx.pipe_ctx[i]); + populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_plane_location, context->streams[i], current_pipe_context); switch (context->streams[i]->debug.force_odm_combine_segments) { case 2: dml2->v20.dml_core_ctx.policy.ODMUse[disp_cfg_plane_location] = dml_odm_use_policy_combine_2to1; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c index 8c0794af5ade..1068b962d1c1 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c @@ -155,12 +155,12 @@ unsigned int dml2_util_get_maximum_odm_combine_for_output(bool force_odm_4to1, e bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx) { + if (pipe_ctx == NULL || pipe_ctx->stream == NULL) + return false; + /* If this assert is hit then we have a link encoder dynamic management issue */ ASSERT(pipe_ctx->stream_res.hpo_dp_stream_enc ? pipe_ctx->link_res.hpo_dp_link_enc != NULL : true); - if (pipe_ctx->stream == NULL) - return false; - /* Count MST hubs once by treating only 1st remote sink in topology as an encoder */ if (pipe_ctx->stream->link && pipe_ctx->stream->link->remote_sinks[0]) { return (pipe_ctx->stream_res.hpo_dp_stream_enc && -- cgit v1.2.3 From b8a204fb1a97b39a7fcaefbf2c6c4d01aa4f3c57 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Thu, 14 Dec 2023 16:46:35 -0500 Subject: drm/amd/display: Verify disallow bits were cleared for idle [Why] A hang was observed where a read-modify-write access occurred due to the register for idle state being shared between DMCUB and driver. dmcub read - idle allow / no commit driver read - idle allow / no commit driver write - idle disallow / no commit dmcub write - idle allow / commit Resulting in DMCUB re-entering IPS after a disable and keeping the allow high. [How] Long term we need to split commit/allow into two registers or use shared DRAM state, but short term we can reduce the repro rate by ensuring that the disallow went through by bounding the expected worst case scenario. Tested-by: Daniel Wheeler Reviewed-by: Hansen Dsouza Reviewed-by: Ovidiu Bunea Acked-by: Rodrigo Siqueira Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index dadeaa9c92dd..a65c485b0b49 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -1233,8 +1233,16 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) if (!(allow_state & DMUB_IPS2_ALLOW_MASK)) { // Wait for evaluation time - udelay(dc->debug.ips2_eval_delay_us); - commit_state = dc->hwss.get_idle_state(dc); + for (;;) { + udelay(dc->debug.ips2_eval_delay_us); + commit_state = dc->hwss.get_idle_state(dc); + if (commit_state & DMUB_IPS2_ALLOW_MASK) + break; + + /* allow was still set, retry eval delay */ + dc->hwss.set_idle_state(dc, false); + } + if (!(commit_state & DMUB_IPS2_COMMIT_MASK)) { // Tell PMFW to exit low power state dc->clk_mgr->funcs->exit_low_power_state(dc->clk_mgr); -- cgit v1.2.3 From 292c2116b2ae84c7e799ae340981e60551b18f5e Mon Sep 17 00:00:00 2001 From: Relja Vojvodic Date: Fri, 15 Dec 2023 18:15:16 -0500 Subject: drm/amd/display: Fixing stream allocation regression For certain dual display configs that had one display using a 1080p mode, the DPM level used to drive the configs regressed from DPM 0 to DPM 3. This was caused by a missing check that should have only limited the pipe segments on non-phantom pipes. This caused issues with detile buffer allocation, which dissallow subvp from being used Tested-by: Daniel Wheeler Reviewed-by: Dillon Varone Reviewed-by: Martin Leung Acked-by: Rodrigo Siqueira Signed-off-by: Relja Vojvodic Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c index 44ba1edb7f53..e4a328b45c8a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c @@ -276,7 +276,7 @@ static void override_det_for_subvp(struct dc *dc, struct dc_state *context, uint for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; - if (pipe_ctx->stream && pipe_ctx->plane_state && dc_state_get_pipe_subvp_type(context, pipe_ctx)) { + if (pipe_ctx->stream && pipe_ctx->plane_state && dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) { if (pipe_ctx->stream->timing.v_addressable == 1080 && pipe_ctx->stream->timing.h_addressable == 1920) { if (pipe_segments[i] > 4) pipe_segments[i] = 4; -- cgit v1.2.3 From d6398866a6b47e92319ef6efdb0126a4fbb7796a Mon Sep 17 00:00:00 2001 From: Ivan Lipski Date: Mon, 2 Oct 2023 13:47:54 -0400 Subject: Re-revert "drm/amd/display: Enable Replay for static screen use cases" This reverts commit 44e60b14d5a72f91fd0bdeae8da59ae37a3ca8e5. Since, it causes a regression in which eDP displays with PSR support, but no Replay support (Sink support <= 0x03), fail to enable PSR and consequently all IGT amd_psr tests fail. So, revert this until a more suitable fix can be found. This got brought back accidently with the backmerge. Acked-by: Leo Li Signed-off-by: Ivan Lipski Signed-off-by: Hamza Mahfooz Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 22 ---------------------- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c | 9 +-------- drivers/gpu/drm/amd/include/amd_shared.h | 2 -- 3 files changed, 1 insertion(+), 32 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index a144024df97c..d869d0d7bf4c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -67,7 +67,6 @@ #include "amdgpu_dm_debugfs.h" #endif #include "amdgpu_dm_psr.h" -#include "amdgpu_dm_replay.h" #include "ivsrcid/ivsrcid_vislands30.h" @@ -4389,7 +4388,6 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) enum dc_connection_type new_connection_type = dc_connection_none; const struct dc_plane_cap *plane; bool psr_feature_enabled = false; - bool replay_feature_enabled = false; int max_overlay = dm->dc->caps.max_slave_planes; dm->display_indexes_num = dm->dc->caps.max_streams; @@ -4501,20 +4499,6 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) } } - if (!(amdgpu_dc_debug_mask & DC_DISABLE_REPLAY)) { - switch (adev->ip_versions[DCE_HWIP][0]) { - case IP_VERSION(3, 1, 4): - case IP_VERSION(3, 1, 5): - case IP_VERSION(3, 1, 6): - case IP_VERSION(3, 2, 0): - case IP_VERSION(3, 2, 1): - replay_feature_enabled = true; - break; - default: - replay_feature_enabled = amdgpu_dc_feature_mask & DC_REPLAY_MASK; - break; - } - } /* loops over all connectors on the board */ for (i = 0; i < link_cnt; i++) { struct dc_link *link = NULL; @@ -4583,12 +4567,6 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) amdgpu_dm_update_connector_after_detect(aconnector); setup_backlight_device(dm, aconnector); - /* - * Disable psr if replay can be enabled - */ - if (replay_feature_enabled && amdgpu_dm_setup_replay(link, aconnector)) - psr_feature_enabled = false; - if (psr_feature_enabled) amdgpu_dm_set_psr_caps(link); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c index 7545a184e43a..4439e5a27362 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -29,7 +29,6 @@ #include "dc.h" #include "amdgpu.h" #include "amdgpu_dm_psr.h" -#include "amdgpu_dm_replay.h" #include "amdgpu_dm_crtc.h" #include "amdgpu_dm_plane.h" #include "amdgpu_dm_trace.h" @@ -124,12 +123,7 @@ static void amdgpu_dm_crtc_vblank_control_worker(struct work_struct *work) * fill_dc_dirty_rects(). */ if (vblank_work->stream && vblank_work->stream->link) { - /* - * Prioritize replay, instead of psr - */ - if (vblank_work->stream->link->replay_settings.replay_feature_enabled) - amdgpu_dm_replay_enable(vblank_work->stream, false); - else if (vblank_work->enable) { + if (vblank_work->enable) { if (vblank_work->stream->link->psr_settings.psr_version < DC_PSR_VERSION_SU_1 && vblank_work->stream->link->psr_settings.psr_allow_active) amdgpu_dm_psr_disable(vblank_work->stream); @@ -138,7 +132,6 @@ static void amdgpu_dm_crtc_vblank_control_worker(struct work_struct *work) #ifdef CONFIG_DRM_AMD_SECURE_DISPLAY !amdgpu_dm_crc_window_is_activated(&vblank_work->acrtc->base) && #endif - vblank_work->stream->link->panel_config.psr.disallow_replay && vblank_work->acrtc->dm_irq_params.allow_psr_entry) { amdgpu_dm_psr_enable(vblank_work->stream); } diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index c739f12fb937..5cad456f2e10 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -244,7 +244,6 @@ enum DC_FEATURE_MASK { DC_DISABLE_LTTPR_DP2_0 = (1 << 6), //0x40, disabled by default DC_PSR_ALLOW_SMU_OPT = (1 << 7), //0x80, disabled by default DC_PSR_ALLOW_MULTI_DISP_OPT = (1 << 8), //0x100, disabled by default - DC_REPLAY_MASK = (1 << 9), //0x200, disabled by default for dcn < 3.1.4 }; enum DC_DEBUG_MASK { @@ -255,7 +254,6 @@ enum DC_DEBUG_MASK { DC_DISABLE_PSR = 0x10, DC_FORCE_SUBVP_MCLK_SWITCH = 0x20, DC_DISABLE_MPO = 0x40, - DC_DISABLE_REPLAY = 0x50, DC_ENABLE_DPIA_TRACE = 0x80, DC_ENABLE_DML2 = 0x100, DC_DISABLE_PSR_SU = 0x200, -- cgit v1.2.3 From e379787cbc2aa73c63a795ec55140f9b21c27d8c Mon Sep 17 00:00:00 2001 From: Tom Chung Date: Fri, 1 Dec 2023 14:15:45 +0800 Subject: drm/amd/display: Add some functions for Panel Replay [WHY] Prepare for enabling the Panel Replay feature [HOW] - Add some Panel Replay setting functions in DC - Add the Panel Replay resource in dcn35_resource.c - Add debug masks for Panel Replay Tested-by: Daniel Wheeler Acked-by: Rodrigo Siqueira Reviewed-by: Leo Li Signed-off-by: Tom Chung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 34 ++++++++ .../gpu/drm/amd/display/dc/core/dc_link_exports.c | 7 ++ drivers/gpu/drm/amd/display/dc/dc.h | 23 ++++++ drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c | 96 ++++++++++++++++++++-- drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h | 2 + drivers/gpu/drm/amd/display/dc/inc/link.h | 2 + drivers/gpu/drm/amd/display/dc/link/link_factory.c | 1 + .../dc/link/protocols/link_edp_panel_control.c | 27 ++++++ .../dc/link/protocols/link_edp_panel_control.h | 2 + .../amd/display/dc/resource/dcn35/dcn35_resource.c | 13 +++ .../drm/amd/display/modules/power/power_helpers.c | 28 +++++++ .../drm/amd/display/modules/power/power_helpers.h | 5 ++ drivers/gpu/drm/amd/include/amd_shared.h | 1 + 13 files changed, 235 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index b7c2eaebf8bf..2d7205058c64 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -4770,6 +4770,38 @@ bool dc_set_psr_allow_active(struct dc *dc, bool enable) return true; } +/* enable/disable eDP Replay without specify stream for eDP */ +bool dc_set_replay_allow_active(struct dc *dc, bool active) +{ + int i; + bool allow_active; + + for (i = 0; i < dc->current_state->stream_count; i++) { + struct dc_link *link; + struct dc_stream_state *stream = dc->current_state->streams[i]; + + link = stream->link; + if (!link) + continue; + + if (link->replay_settings.replay_feature_enabled) { + if (active && !link->replay_settings.replay_allow_active) { + allow_active = true; + if (!dc_link_set_replay_allow_active(link, &allow_active, + false, false, NULL)) + return false; + } else if (!active && link->replay_settings.replay_allow_active) { + allow_active = false; + if (!dc_link_set_replay_allow_active(link, &allow_active, + true, false, NULL)) + return false; + } + } + } + + return true; +} + void dc_allow_idle_optimizations(struct dc *dc, bool allow) { if (dc->debug.disable_idle_power_optimizations) @@ -5315,6 +5347,8 @@ bool dc_abm_save_restore( struct dc_link *link = stream->sink->link; struct dc_link *edp_links[MAX_NUM_EDP]; + if (link->replay_settings.replay_feature_enabled) + return false; /*find primary pipe associated with stream*/ for (i = 0; i < MAX_PIPES; i++) { diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c index f365773d5714..c6c35037bdb8 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c @@ -467,6 +467,13 @@ bool dc_link_setup_psr(struct dc_link *link, return link->dc->link_srv->edp_setup_psr(link, stream, psr_config, psr_context); } +bool dc_link_set_replay_allow_active(struct dc_link *link, const bool *allow_active, + bool wait, bool force_static, const unsigned int *power_opts) +{ + return link->dc->link_srv->edp_set_replay_allow_active(link, allow_active, wait, + force_static, power_opts); +} + bool dc_link_get_replay_state(const struct dc_link *link, uint64_t *state) { return link->dc->link_srv->edp_get_replay_state(link, state); diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 565de7428b6c..f30a341bc090 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -463,6 +463,12 @@ enum dml_hostvm_override_opts { DML_HOSTVM_OVERRIDE_TRUE = 0x2, }; +enum dc_replay_power_opts { + replay_power_opt_invalid = 0x0, + replay_power_opt_smu_opt_static_screen = 0x1, + replay_power_opt_z10_static_screen = 0x10, +}; + enum dcc_option { DCC_ENABLE = 0, DCC_DISABLE = 1, @@ -2077,6 +2083,20 @@ bool dc_link_setup_psr(struct dc_link *dc_link, const struct dc_stream_state *stream, struct psr_config *psr_config, struct psr_context *psr_context); +/* + * Communicate with DMUB to allow or disallow Panel Replay on the specified link: + * + * @link: pointer to the dc_link struct instance + * @enable: enable(active) or disable(inactive) replay + * @wait: state transition need to wait the active set completed. + * @force_static: force disable(inactive) the replay + * @power_opts: set power optimazation parameters to DMUB. + * + * return: allow Replay active will return true, else will return false. + */ +bool dc_link_set_replay_allow_active(struct dc_link *dc_link, const bool *enable, + bool wait, bool force_static, const unsigned int *power_opts); + bool dc_link_get_replay_state(const struct dc_link *dc_link, uint64_t *state); /* On eDP links this function call will stall until T12 has elapsed. @@ -2321,6 +2341,9 @@ void dc_hardware_release(struct dc *dc); void dc_mclk_switch_using_fw_based_vblank_stretch_shut_down(struct dc *dc); bool dc_set_psr_allow_active(struct dc *dc, bool enable); + +bool dc_set_replay_allow_active(struct dc *dc, bool active); + void dc_z10_restore(const struct dc *dc); void dc_z10_save_init(struct dc *dc); diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c index 28149e53c2a6..38e4797e9476 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c @@ -258,13 +258,97 @@ static void dmub_replay_residency(struct dmub_replay *dmub, uint8_t panel_inst, *residency = 0; } +/** + * Set REPLAY power optimization flags and coasting vtotal. + */ +static void dmub_replay_set_power_opt_and_coasting_vtotal(struct dmub_replay *dmub, + unsigned int power_opt, uint8_t panel_inst, uint16_t coasting_vtotal) +{ + union dmub_rb_cmd cmd; + struct dc_context *dc = dmub->ctx; + + memset(&cmd, 0, sizeof(cmd)); + cmd.replay_set_power_opt_and_coasting_vtotal.header.type = DMUB_CMD__REPLAY; + cmd.replay_set_power_opt_and_coasting_vtotal.header.sub_type = + DMUB_CMD__REPLAY_SET_POWER_OPT_AND_COASTING_VTOTAL; + cmd.replay_set_power_opt_and_coasting_vtotal.header.payload_bytes = + sizeof(struct dmub_rb_cmd_replay_set_power_opt_and_coasting_vtotal); + cmd.replay_set_power_opt_and_coasting_vtotal.replay_set_power_opt_data.power_opt = power_opt; + cmd.replay_set_power_opt_and_coasting_vtotal.replay_set_power_opt_data.panel_inst = panel_inst; + cmd.replay_set_power_opt_and_coasting_vtotal.replay_set_coasting_vtotal_data.coasting_vtotal = coasting_vtotal; + + dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); +} + +/** + * send Replay general cmd to DMUB. + */ +static void dmub_replay_send_cmd(struct dmub_replay *dmub, + enum replay_FW_Message_type msg, union dmub_replay_cmd_set *cmd_element) +{ + union dmub_rb_cmd cmd; + struct dc_context *ctx = NULL; + + if (dmub == NULL || cmd_element == NULL) + return; + + ctx = dmub->ctx; + if (ctx != NULL) { + + if (msg != Replay_Msg_Not_Support) { + memset(&cmd, 0, sizeof(cmd)); + //Header + cmd.replay_set_timing_sync.header.type = DMUB_CMD__REPLAY; + } else + return; + } else + return; + + switch (msg) { + case Replay_Set_Timing_Sync_Supported: + //Header + cmd.replay_set_timing_sync.header.sub_type = + DMUB_CMD__REPLAY_SET_TIMING_SYNC_SUPPORTED; + cmd.replay_set_timing_sync.header.payload_bytes = + sizeof(struct dmub_rb_cmd_replay_set_timing_sync); + //Cmd Body + cmd.replay_set_timing_sync.replay_set_timing_sync_data.panel_inst = + cmd_element->sync_data.panel_inst; + cmd.replay_set_timing_sync.replay_set_timing_sync_data.timing_sync_supported = + cmd_element->sync_data.timing_sync_supported; + break; + case Replay_Set_Residency_Frameupdate_Timer: + //Header + cmd.replay_set_frameupdate_timer.header.sub_type = + DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER; + cmd.replay_set_frameupdate_timer.header.payload_bytes = + sizeof(struct dmub_rb_cmd_replay_set_frameupdate_timer); + //Cmd Body + cmd.replay_set_frameupdate_timer.data.panel_inst = + cmd_element->panel_inst; + cmd.replay_set_frameupdate_timer.data.enable = + cmd_element->timer_data.enable; + cmd.replay_set_frameupdate_timer.data.frameupdate_count = + cmd_element->timer_data.frameupdate_count; + break; + case Replay_Msg_Not_Support: + default: + return; + break; + } + + dc_wake_and_execute_dmub_cmd(ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); +} + static const struct dmub_replay_funcs replay_funcs = { - .replay_copy_settings = dmub_replay_copy_settings, - .replay_enable = dmub_replay_enable, - .replay_get_state = dmub_replay_get_state, - .replay_set_power_opt = dmub_replay_set_power_opt, - .replay_set_coasting_vtotal = dmub_replay_set_coasting_vtotal, - .replay_residency = dmub_replay_residency, + .replay_copy_settings = dmub_replay_copy_settings, + .replay_enable = dmub_replay_enable, + .replay_get_state = dmub_replay_get_state, + .replay_set_power_opt = dmub_replay_set_power_opt, + .replay_set_coasting_vtotal = dmub_replay_set_coasting_vtotal, + .replay_residency = dmub_replay_residency, + .replay_set_power_opt_and_coasting_vtotal = dmub_replay_set_power_opt_and_coasting_vtotal, + .replay_send_cmd = dmub_replay_send_cmd, }; /* diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h index b3ee90a0b8b3..3613aff994d7 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h @@ -51,6 +51,8 @@ struct dmub_replay_funcs { uint8_t panel_inst); void (*replay_residency)(struct dmub_replay *dmub, uint8_t panel_inst, uint32_t *residency, const bool is_start, const bool is_alpm); + void (*replay_set_power_opt_and_coasting_vtotal)(struct dmub_replay *dmub, + unsigned int power_opt, uint8_t panel_inst, uint16_t coasting_vtotal); }; struct dmub_replay *dmub_replay_create(struct dc_context *ctx); diff --git a/drivers/gpu/drm/amd/display/dc/inc/link.h b/drivers/gpu/drm/amd/display/dc/inc/link.h index 7439865d1b50..26fe81f213da 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/link.h +++ b/drivers/gpu/drm/amd/display/dc/inc/link.h @@ -289,6 +289,8 @@ struct link_service { bool (*edp_replay_residency)(const struct dc_link *link, unsigned int *residency, const bool is_start, const bool is_alpm); + bool (*edp_set_replay_power_opt_and_coasting_vtotal)(struct dc_link *link, + const unsigned int *power_opts, uint16_t coasting_vtotal); bool (*edp_wait_for_t12)(struct dc_link *link); bool (*edp_is_ilr_optimization_required)(struct dc_link *link, diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.c b/drivers/gpu/drm/amd/display/dc/link/link_factory.c index 5464d8d26bd3..37d3027c32dc 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_factory.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.c @@ -216,6 +216,7 @@ static void construct_link_service_edp_panel_control(struct link_service *link_s link_srv->edp_send_replay_cmd = edp_send_replay_cmd; link_srv->edp_set_coasting_vtotal = edp_set_coasting_vtotal; link_srv->edp_replay_residency = edp_replay_residency; + link_srv->edp_set_replay_power_opt_and_coasting_vtotal = edp_set_replay_power_opt_and_coasting_vtotal; link_srv->edp_wait_for_t12 = edp_wait_for_t12; link_srv->edp_is_ilr_optimization_required = diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c index e8de68e62403..7f1196528218 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c @@ -1068,6 +1068,33 @@ bool edp_replay_residency(const struct dc_link *link, return true; } +bool edp_set_replay_power_opt_and_coasting_vtotal(struct dc_link *link, + const unsigned int *power_opts, uint16_t coasting_vtotal) +{ + struct dc *dc = link->ctx->dc; + struct dmub_replay *replay = dc->res_pool->replay; + unsigned int panel_inst; + + if (!dc_get_edp_link_panel_inst(dc, link, &panel_inst)) + return false; + + /* Only both power and coasting vtotal changed, this func could return true */ + if (power_opts && link->replay_settings.replay_power_opt_active != *power_opts && + coasting_vtotal && link->replay_settings.coasting_vtotal != coasting_vtotal) { + if (link->replay_settings.replay_feature_enabled && + replay->funcs->replay_set_power_opt_and_coasting_vtotal) { + replay->funcs->replay_set_power_opt_and_coasting_vtotal(replay, + *power_opts, panel_inst, coasting_vtotal); + link->replay_settings.replay_power_opt_active = *power_opts; + link->replay_settings.coasting_vtotal = coasting_vtotal; + } else + return false; + } else + return false; + + return true; +} + static struct abm *get_abm_from_stream_res(const struct dc_link *link) { int i; diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h index b7493ff4fcee..34e521af7bb4 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h @@ -63,6 +63,8 @@ bool edp_set_coasting_vtotal(struct dc_link *link, uint16_t coasting_vtotal); bool edp_replay_residency(const struct dc_link *link, unsigned int *residency, const bool is_start, const bool is_alpm); bool edp_get_replay_state(const struct dc_link *link, uint64_t *state); +bool edp_set_replay_power_opt_and_coasting_vtotal(struct dc_link *link, + const unsigned int *power_opts, uint16_t coasting_vtotal); bool edp_wait_for_t12(struct dc_link *link); bool edp_is_ilr_optimization_required(struct dc_link *link, struct dc_crtc_timing *crtc_timing); diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c index 39594e8ffb5e..761ec9891875 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c @@ -96,6 +96,7 @@ #include "reg_helper.h" #include "dce/dmub_abm.h" #include "dce/dmub_psr.h" +#include "dce/dmub_replay.h" #include "dce/dce_aux.h" #include "dce/dce_i2c.h" #include "dml/dcn31/display_mode_vba_31.h" /*temp*/ @@ -788,6 +789,7 @@ static const struct dc_panel_config panel_config_defaults = { .psr = { .disable_psr = false, .disallow_psrsu = false, + .disallow_replay = false, }, .ilr = { .optimize_edp_link_rate = true, @@ -1546,6 +1548,9 @@ static void dcn35_resource_destruct(struct dcn35_resource_pool *pool) if (pool->base.psr != NULL) dmub_psr_destroy(&pool->base.psr); + if (pool->base.replay != NULL) + dmub_replay_destroy(&pool->base.replay); + if (pool->base.pg_cntl != NULL) dcn_pg_cntl_destroy(&pool->base.pg_cntl); @@ -2030,6 +2035,14 @@ static bool dcn35_resource_construct( goto create_fail; } + /* Replay */ + pool->base.replay = dmub_replay_create(ctx); + if (pool->base.replay == NULL) { + dm_error("DC: failed to create replay obj!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + /* ABM */ for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) { pool->base.multiple_abms[i] = dmub_abm_create(ctx, diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c index 1675314a3ff2..34b848c981e0 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c @@ -973,6 +973,34 @@ bool psr_su_set_dsc_slice_height(struct dc *dc, struct dc_link *link, return true; } +void set_replay_coasting_vtotal(struct dc_link *link, + enum replay_coasting_vtotal_type type, + uint16_t vtotal) +{ + link->replay_settings.coasting_vtotal_table[type] = vtotal; +} + +void calculate_replay_link_off_frame_count(struct dc_link *link, + uint16_t vtotal, uint16_t htotal) +{ + uint8_t max_link_off_frame_count = 0; + uint16_t max_deviation_line = 0, pixel_deviation_per_line = 0; + + max_deviation_line = link->dpcd_caps.pr_info.max_deviation_line; + pixel_deviation_per_line = link->dpcd_caps.pr_info.pixel_deviation_per_line; + + if (htotal != 0 && vtotal != 0) + max_link_off_frame_count = htotal * max_deviation_line / (pixel_deviation_per_line * vtotal); + else + ASSERT(0); + + link->replay_settings.link_off_frame_count_level = + max_link_off_frame_count >= PR_LINK_OFF_FRAME_COUNT_BEST ? PR_LINK_OFF_FRAME_COUNT_BEST : + max_link_off_frame_count >= PR_LINK_OFF_FRAME_COUNT_GOOD ? PR_LINK_OFF_FRAME_COUNT_GOOD : + PR_LINK_OFF_FRAME_COUNT_FAIL; + +} + bool fill_custom_backlight_caps(unsigned int config_no, struct dm_acpi_atif_backlight_caps *caps) { unsigned int data_points_size; diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h index d9e0d67d67f7..c17bbc6fb38c 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h @@ -54,6 +54,11 @@ bool dmub_init_abm_config(struct resource_pool *res_pool, unsigned int inst); void init_replay_config(struct dc_link *link, struct replay_config *pr_config); +void set_replay_coasting_vtotal(struct dc_link *link, + enum replay_coasting_vtotal_type type, + uint16_t vtotal); +void calculate_replay_link_off_frame_count(struct dc_link *link, + uint16_t vtotal, uint16_t htotal); bool is_psr_su_specific_panel(struct dc_link *link); void mod_power_calc_psr_configs(struct psr_config *psr_config, diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index 5cad456f2e10..1dc5dd9b7bf7 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -257,6 +257,7 @@ enum DC_DEBUG_MASK { DC_ENABLE_DPIA_TRACE = 0x80, DC_ENABLE_DML2 = 0x100, DC_DISABLE_PSR_SU = 0x200, + DC_DISABLE_REPLAY = 0x400, }; enum amd_dpm_forced_level; -- cgit v1.2.3 From 29bc46c4da4ab61bb69b2c8099be6f5d7454133f Mon Sep 17 00:00:00 2001 From: Asad Kamal Date: Wed, 20 Dec 2023 16:07:22 +0800 Subject: drm/amd/pm: Use separate metric table for APU Use separate metric table for APU and Non APU systems for smu_v_13_0_6 to get metric data Signed-off-by: Asad Kamal Reviewed-by: Lijo Lazar Reviewed-by: Le Ma Signed-off-by: Alex Deucher --- .../amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h | 90 ++++++++++++++- .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 124 +++++++++++---------- 2 files changed, 156 insertions(+), 58 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h index fef2d290f3f2..8f166aa3043c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h @@ -219,7 +219,95 @@ typedef struct __attribute__((packed, aligned(4))) { uint32_t PCIenReplayARolloverCountAcc; // The Pcie counter itself is accumulated uint32_t PCIeNAKSentCountAcc; // The Pcie counter itself is accumulated uint32_t PCIeNAKReceivedCountAcc; // The Pcie counter itself is accumulated -} MetricsTable_t; +} MetricsTableX_t; + +typedef struct __attribute__((packed, aligned(4))) { + uint32_t AccumulationCounter; + + //TEMPERATURE + uint32_t MaxSocketTemperature; + uint32_t MaxVrTemperature; + uint32_t MaxHbmTemperature; + uint64_t MaxSocketTemperatureAcc; + uint64_t MaxVrTemperatureAcc; + uint64_t MaxHbmTemperatureAcc; + + //POWER + uint32_t SocketPowerLimit; + uint32_t MaxSocketPowerLimit; + uint32_t SocketPower; + + //ENERGY + uint64_t Timestamp; + uint64_t SocketEnergyAcc; + uint64_t CcdEnergyAcc; + uint64_t XcdEnergyAcc; + uint64_t AidEnergyAcc; + uint64_t HbmEnergyAcc; + + //FREQUENCY + uint32_t CclkFrequencyLimit; + uint32_t GfxclkFrequencyLimit; + uint32_t FclkFrequency; + uint32_t UclkFrequency; + uint32_t SocclkFrequency[4]; + uint32_t VclkFrequency[4]; + uint32_t DclkFrequency[4]; + uint32_t LclkFrequency[4]; + uint64_t GfxclkFrequencyAcc[8]; + uint64_t CclkFrequencyAcc[96]; + + //FREQUENCY RANGE + uint32_t MaxCclkFrequency; + uint32_t MinCclkFrequency; + uint32_t MaxGfxclkFrequency; + uint32_t MinGfxclkFrequency; + uint32_t FclkFrequencyTable[4]; + uint32_t UclkFrequencyTable[4]; + uint32_t SocclkFrequencyTable[4]; + uint32_t VclkFrequencyTable[4]; + uint32_t DclkFrequencyTable[4]; + uint32_t LclkFrequencyTable[4]; + uint32_t MaxLclkDpmRange; + uint32_t MinLclkDpmRange; + + //XGMI + uint32_t XgmiWidth; + uint32_t XgmiBitrate; + uint64_t XgmiReadBandwidthAcc[8]; + uint64_t XgmiWriteBandwidthAcc[8]; + + //ACTIVITY + uint32_t SocketC0Residency; + uint32_t SocketGfxBusy; + uint32_t DramBandwidthUtilization; + uint64_t SocketC0ResidencyAcc; + uint64_t SocketGfxBusyAcc; + uint64_t DramBandwidthAcc; + uint32_t MaxDramBandwidth; + uint64_t DramBandwidthUtilizationAcc; + uint64_t PcieBandwidthAcc[4]; + + //THROTTLERS + uint32_t ProchotResidencyAcc; + uint32_t PptResidencyAcc; + uint32_t SocketThmResidencyAcc; + uint32_t VrThmResidencyAcc; + uint32_t HbmThmResidencyAcc; + uint32_t GfxLockXCDMak; + + // New Items at end to maintain driver compatibility + uint32_t GfxclkFrequency[8]; + + //PSNs + uint64_t PublicSerialNumber_AID[4]; + uint64_t PublicSerialNumber_XCD[8]; + uint64_t PublicSerialNumber_CCD[12]; + + //XGMI Data tranfser size + uint64_t XgmiReadDataSizeAcc[8];//in KByte + uint64_t XgmiWriteDataSizeAcc[8];//in KByte +} MetricsTableA_t; #define SMU_VF_METRICS_TABLE_VERSION 0x3 diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 81b217bbdebb..96777a365133 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -248,6 +248,8 @@ struct PPTable_t { #define SMUQ10_TO_UINT(x) ((x) >> 10) #define SMUQ10_FRAC(x) ((x) & 0x3ff) #define SMUQ10_ROUND(x) ((SMUQ10_TO_UINT(x)) + ((SMUQ10_FRAC(x)) >= 0x200)) +#define GET_METRIC_FIELD(field) ((adev->flags & AMD_IS_APU) ?\ + (metrics_a->field) : (metrics_x->field)) struct smu_v13_0_6_dpm_map { enum smu_clk_type clk_type; @@ -330,7 +332,8 @@ static int smu_v13_0_6_tables_init(struct smu_context *smu) SMU_TABLE_INIT(tables, SMU_TABLE_PMSTATUSLOG, SMU13_TOOL_SIZE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); - SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, sizeof(MetricsTable_t), + SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, + max(sizeof(MetricsTableX_t), sizeof(MetricsTableA_t)), PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT); @@ -338,7 +341,8 @@ static int smu_v13_0_6_tables_init(struct smu_context *smu) PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT); - smu_table->metrics_table = kzalloc(sizeof(MetricsTable_t), GFP_KERNEL); + smu_table->metrics_table = kzalloc(max(sizeof(MetricsTableX_t), + sizeof(MetricsTableA_t)), GFP_KERNEL); if (!smu_table->metrics_table) return -ENOMEM; smu_table->metrics_time = 0; @@ -469,9 +473,11 @@ static ssize_t smu_v13_0_6_get_pm_metrics(struct smu_context *smu, static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu) { struct smu_table_context *smu_table = &smu->smu_table; - MetricsTable_t *metrics = (MetricsTable_t *)smu_table->metrics_table; + MetricsTableX_t *metrics_x = (MetricsTableX_t *)smu_table->metrics_table; + MetricsTableA_t *metrics_a = (MetricsTableA_t *)smu_table->metrics_table; struct PPTable_t *pptable = (struct PPTable_t *)smu_table->driver_pptable; + struct amdgpu_device *adev = smu->adev; int ret, i, retry = 100; uint32_t table_version; @@ -483,7 +489,7 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu) return ret; /* Ensure that metrics have been updated */ - if (metrics->AccumulationCounter) + if (GET_METRIC_FIELD(AccumulationCounter)) break; usleep_range(1000, 1100); @@ -500,29 +506,29 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu) table_version; pptable->MaxSocketPowerLimit = - SMUQ10_ROUND(metrics->MaxSocketPowerLimit); + SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketPowerLimit)); pptable->MaxGfxclkFrequency = - SMUQ10_ROUND(metrics->MaxGfxclkFrequency); + SMUQ10_ROUND(GET_METRIC_FIELD(MaxGfxclkFrequency)); pptable->MinGfxclkFrequency = - SMUQ10_ROUND(metrics->MinGfxclkFrequency); + SMUQ10_ROUND(GET_METRIC_FIELD(MinGfxclkFrequency)); for (i = 0; i < 4; ++i) { pptable->FclkFrequencyTable[i] = - SMUQ10_ROUND(metrics->FclkFrequencyTable[i]); + SMUQ10_ROUND(GET_METRIC_FIELD(FclkFrequencyTable)[i]); pptable->UclkFrequencyTable[i] = - SMUQ10_ROUND(metrics->UclkFrequencyTable[i]); + SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequencyTable)[i]); pptable->SocclkFrequencyTable[i] = SMUQ10_ROUND( - metrics->SocclkFrequencyTable[i]); + GET_METRIC_FIELD(SocclkFrequencyTable)[i]); pptable->VclkFrequencyTable[i] = - SMUQ10_ROUND(metrics->VclkFrequencyTable[i]); + SMUQ10_ROUND(GET_METRIC_FIELD(VclkFrequencyTable)[i]); pptable->DclkFrequencyTable[i] = - SMUQ10_ROUND(metrics->DclkFrequencyTable[i]); + SMUQ10_ROUND(GET_METRIC_FIELD(DclkFrequencyTable)[i]); pptable->LclkFrequencyTable[i] = - SMUQ10_ROUND(metrics->LclkFrequencyTable[i]); + SMUQ10_ROUND(GET_METRIC_FIELD(LclkFrequencyTable)[i]); } /* use AID0 serial number by default */ - pptable->PublicSerialNumber_AID = metrics->PublicSerialNumber_AID[0]; + pptable->PublicSerialNumber_AID = GET_METRIC_FIELD(PublicSerialNumber_AID)[0]; pptable->Init = true; } @@ -824,7 +830,8 @@ static int smu_v13_0_6_get_smu_metrics_data(struct smu_context *smu, uint32_t *value) { struct smu_table_context *smu_table = &smu->smu_table; - MetricsTable_t *metrics = (MetricsTable_t *)smu_table->metrics_table; + MetricsTableX_t *metrics_x = (MetricsTableX_t *)smu_table->metrics_table; + MetricsTableA_t *metrics_a = (MetricsTableA_t *)smu_table->metrics_table; struct amdgpu_device *adev = smu->adev; int ret = 0; int xcc_id; @@ -839,50 +846,50 @@ static int smu_v13_0_6_get_smu_metrics_data(struct smu_context *smu, case METRICS_AVERAGE_GFXCLK: if (smu->smc_fw_version >= 0x552F00) { xcc_id = GET_INST(GC, 0); - *value = SMUQ10_ROUND(metrics->GfxclkFrequency[xcc_id]); + *value = SMUQ10_ROUND(GET_METRIC_FIELD(GfxclkFrequency)[xcc_id]); } else { *value = 0; } break; case METRICS_CURR_SOCCLK: case METRICS_AVERAGE_SOCCLK: - *value = SMUQ10_ROUND(metrics->SocclkFrequency[0]); + *value = SMUQ10_ROUND(GET_METRIC_FIELD(SocclkFrequency)[0]); break; case METRICS_CURR_UCLK: case METRICS_AVERAGE_UCLK: - *value = SMUQ10_ROUND(metrics->UclkFrequency); + *value = SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequency)); break; case METRICS_CURR_VCLK: - *value = SMUQ10_ROUND(metrics->VclkFrequency[0]); + *value = SMUQ10_ROUND(GET_METRIC_FIELD(VclkFrequency)[0]); break; case METRICS_CURR_DCLK: - *value = SMUQ10_ROUND(metrics->DclkFrequency[0]); + *value = SMUQ10_ROUND(GET_METRIC_FIELD(DclkFrequency)[0]); break; case METRICS_CURR_FCLK: - *value = SMUQ10_ROUND(metrics->FclkFrequency); + *value = SMUQ10_ROUND(GET_METRIC_FIELD(FclkFrequency)); break; case METRICS_AVERAGE_GFXACTIVITY: - *value = SMUQ10_ROUND(metrics->SocketGfxBusy); + *value = SMUQ10_ROUND(GET_METRIC_FIELD(SocketGfxBusy)); break; case METRICS_AVERAGE_MEMACTIVITY: - *value = SMUQ10_ROUND(metrics->DramBandwidthUtilization); + *value = SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilization)); break; case METRICS_CURR_SOCKETPOWER: - *value = SMUQ10_ROUND(metrics->SocketPower) << 8; + *value = SMUQ10_ROUND(GET_METRIC_FIELD(SocketPower)) << 8; break; case METRICS_TEMPERATURE_HOTSPOT: - *value = SMUQ10_ROUND(metrics->MaxSocketTemperature) * + *value = SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketTemperature)) * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; break; case METRICS_TEMPERATURE_MEM: - *value = SMUQ10_ROUND(metrics->MaxHbmTemperature) * + *value = SMUQ10_ROUND(GET_METRIC_FIELD(MaxHbmTemperature)) * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; break; /* This is the max of all VRs and not just SOC VR. * No need to define another data type for the same. */ case METRICS_TEMPERATURE_VRSOC: - *value = SMUQ10_ROUND(metrics->MaxVrTemperature) * + *value = SMUQ10_ROUND(GET_METRIC_FIELD(MaxVrTemperature)) * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; break; default: @@ -2071,63 +2078,66 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table (struct gpu_metrics_v1_4 *)smu_table->gpu_metrics_table; struct amdgpu_device *adev = smu->adev; int ret = 0, xcc_id, inst, i; - MetricsTable_t *metrics; + MetricsTableX_t *metrics_x; + MetricsTableA_t *metrics_a; u16 link_width_level; - metrics = kzalloc(sizeof(MetricsTable_t), GFP_KERNEL); - ret = smu_v13_0_6_get_metrics_table(smu, metrics, true); + metrics_x = kzalloc(max(sizeof(MetricsTableX_t), sizeof(MetricsTableA_t)), GFP_KERNEL); + ret = smu_v13_0_6_get_metrics_table(smu, metrics_x, true); if (ret) { - kfree(metrics); + kfree(metrics_x); return ret; } + metrics_a = (MetricsTableA_t *)metrics_x; + smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 4); gpu_metrics->temperature_hotspot = - SMUQ10_ROUND(metrics->MaxSocketTemperature); + SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketTemperature)); /* Individual HBM stack temperature is not reported */ gpu_metrics->temperature_mem = - SMUQ10_ROUND(metrics->MaxHbmTemperature); + SMUQ10_ROUND(GET_METRIC_FIELD(MaxHbmTemperature)); /* Reports max temperature of all voltage rails */ gpu_metrics->temperature_vrsoc = - SMUQ10_ROUND(metrics->MaxVrTemperature); + SMUQ10_ROUND(GET_METRIC_FIELD(MaxVrTemperature)); gpu_metrics->average_gfx_activity = - SMUQ10_ROUND(metrics->SocketGfxBusy); + SMUQ10_ROUND(GET_METRIC_FIELD(SocketGfxBusy)); gpu_metrics->average_umc_activity = - SMUQ10_ROUND(metrics->DramBandwidthUtilization); + SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilization)); gpu_metrics->curr_socket_power = - SMUQ10_ROUND(metrics->SocketPower); + SMUQ10_ROUND(GET_METRIC_FIELD(SocketPower)); /* Energy counter reported in 15.259uJ (2^-16) units */ - gpu_metrics->energy_accumulator = metrics->SocketEnergyAcc; + gpu_metrics->energy_accumulator = GET_METRIC_FIELD(SocketEnergyAcc); for (i = 0; i < MAX_GFX_CLKS; i++) { xcc_id = GET_INST(GC, i); if (xcc_id >= 0) gpu_metrics->current_gfxclk[i] = - SMUQ10_ROUND(metrics->GfxclkFrequency[xcc_id]); + SMUQ10_ROUND(GET_METRIC_FIELD(GfxclkFrequency)[xcc_id]); if (i < MAX_CLKS) { gpu_metrics->current_socclk[i] = - SMUQ10_ROUND(metrics->SocclkFrequency[i]); + SMUQ10_ROUND(GET_METRIC_FIELD(SocclkFrequency)[i]); inst = GET_INST(VCN, i); if (inst >= 0) { gpu_metrics->current_vclk0[i] = - SMUQ10_ROUND(metrics->VclkFrequency[inst]); + SMUQ10_ROUND(GET_METRIC_FIELD(VclkFrequency)[inst]); gpu_metrics->current_dclk0[i] = - SMUQ10_ROUND(metrics->DclkFrequency[inst]); + SMUQ10_ROUND(GET_METRIC_FIELD(DclkFrequency)[inst]); } } } - gpu_metrics->current_uclk = SMUQ10_ROUND(metrics->UclkFrequency); + gpu_metrics->current_uclk = SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequency)); /* Throttle status is not reported through metrics now */ gpu_metrics->throttle_status = 0; /* Clock Lock Status. Each bit corresponds to each GFXCLK instance */ - gpu_metrics->gfxclk_lock_status = metrics->GfxLockXCDMak >> GET_INST(GC, 0); + gpu_metrics->gfxclk_lock_status = GET_METRIC_FIELD(GfxLockXCDMak) >> GET_INST(GC, 0); if (!(adev->flags & AMD_IS_APU)) { link_width_level = smu_v13_0_6_get_current_pcie_link_width_level(smu); @@ -2139,38 +2149,38 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table gpu_metrics->pcie_link_speed = smu_v13_0_6_get_current_pcie_link_speed(smu); gpu_metrics->pcie_bandwidth_acc = - SMUQ10_ROUND(metrics->PcieBandwidthAcc[0]); + SMUQ10_ROUND(metrics_x->PcieBandwidthAcc[0]); gpu_metrics->pcie_bandwidth_inst = - SMUQ10_ROUND(metrics->PcieBandwidth[0]); + SMUQ10_ROUND(metrics_x->PcieBandwidth[0]); gpu_metrics->pcie_l0_to_recov_count_acc = - metrics->PCIeL0ToRecoveryCountAcc; + metrics_x->PCIeL0ToRecoveryCountAcc; gpu_metrics->pcie_replay_count_acc = - metrics->PCIenReplayAAcc; + metrics_x->PCIenReplayAAcc; gpu_metrics->pcie_replay_rover_count_acc = - metrics->PCIenReplayARolloverCountAcc; + metrics_x->PCIenReplayARolloverCountAcc; } gpu_metrics->system_clock_counter = ktime_get_boottime_ns(); gpu_metrics->gfx_activity_acc = - SMUQ10_ROUND(metrics->SocketGfxBusyAcc); + SMUQ10_ROUND(GET_METRIC_FIELD(SocketGfxBusyAcc)); gpu_metrics->mem_activity_acc = - SMUQ10_ROUND(metrics->DramBandwidthUtilizationAcc); + SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilizationAcc)); for (i = 0; i < NUM_XGMI_LINKS; i++) { gpu_metrics->xgmi_read_data_acc[i] = - SMUQ10_ROUND(metrics->XgmiReadDataSizeAcc[i]); + SMUQ10_ROUND(GET_METRIC_FIELD(XgmiReadDataSizeAcc)[i]); gpu_metrics->xgmi_write_data_acc[i] = - SMUQ10_ROUND(metrics->XgmiWriteDataSizeAcc[i]); + SMUQ10_ROUND(GET_METRIC_FIELD(XgmiWriteDataSizeAcc)[i]); } - gpu_metrics->xgmi_link_width = SMUQ10_ROUND(metrics->XgmiWidth); - gpu_metrics->xgmi_link_speed = SMUQ10_ROUND(metrics->XgmiBitrate); + gpu_metrics->xgmi_link_width = SMUQ10_ROUND(GET_METRIC_FIELD(XgmiWidth)); + gpu_metrics->xgmi_link_speed = SMUQ10_ROUND(GET_METRIC_FIELD(XgmiBitrate)); - gpu_metrics->firmware_timestamp = metrics->Timestamp; + gpu_metrics->firmware_timestamp = GET_METRIC_FIELD(Timestamp); *table = (void *)gpu_metrics; - kfree(metrics); + kfree(metrics_x); return sizeof(*gpu_metrics); } -- cgit v1.2.3 From 9323b4bf6b85b4450b3e74c7a1b32182a60e030f Mon Sep 17 00:00:00 2001 From: Asad Kamal Date: Wed, 20 Dec 2023 17:09:21 +0800 Subject: drm/amd/pm: Update metric table for jpeg/vcn data Update pmfw metric table to include vcn & jpeg activity for smu_v_13_0_6 Signed-off-by: Asad Kamal Reviewed-by: Lijo Lazar Reviewed-by: Le Ma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h index 8f166aa3043c..7b812b9994d7 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h @@ -123,7 +123,7 @@ typedef enum { VOLTAGE_GUARDBAND_COUNT } GFX_GUARDBAND_e; -#define SMU_METRICS_TABLE_VERSION 0x9 +#define SMU_METRICS_TABLE_VERSION 0xB typedef struct __attribute__((packed, aligned(4))) { uint32_t AccumulationCounter; @@ -219,6 +219,10 @@ typedef struct __attribute__((packed, aligned(4))) { uint32_t PCIenReplayARolloverCountAcc; // The Pcie counter itself is accumulated uint32_t PCIeNAKSentCountAcc; // The Pcie counter itself is accumulated uint32_t PCIeNAKReceivedCountAcc; // The Pcie counter itself is accumulated + + // VCN/JPEG ACTIVITY + uint32_t VcnBusy[4]; + uint32_t JpegBusy[32]; } MetricsTableX_t; typedef struct __attribute__((packed, aligned(4))) { @@ -307,6 +311,10 @@ typedef struct __attribute__((packed, aligned(4))) { //XGMI Data tranfser size uint64_t XgmiReadDataSizeAcc[8];//in KByte uint64_t XgmiWriteDataSizeAcc[8];//in KByte + + // VCN/JPEG ACTIVITY + uint32_t VcnBusy[4]; + uint32_t JpegBusy[32]; } MetricsTableA_t; #define SMU_VF_METRICS_TABLE_VERSION 0x3 -- cgit v1.2.3 From a62503ca854e8a19c95022fa5bec47eeecac570b Mon Sep 17 00:00:00 2001 From: Asad Kamal Date: Wed, 20 Dec 2023 17:21:48 +0800 Subject: drm/amd/pm: Add gpu_metrics_v1_5 Add new gpu_metrics_v1_5 to acquire vcn/jpeg activity & pcie nak error counters Signed-off-by: Asad Kamal Reviewed-by: Lijo Lazar Reviewed-by: Le Ma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/include/kgd_pp_interface.h | 80 ++++++++++++++++++++++++++ drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c | 3 + 2 files changed, 83 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 36a5ad8c00c5..edcb85560ced 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -318,6 +318,7 @@ enum pp_xgmi_plpd_mode { #define MAX_GFX_CLKS 8 #define MAX_CLKS 4 #define NUM_VCN 4 +#define NUM_JPEG_ENG 32 struct seq_file; enum amd_pp_clock_type; @@ -775,6 +776,85 @@ struct gpu_metrics_v1_4 { uint16_t padding; }; +struct gpu_metrics_v1_5 { + struct metrics_table_header common_header; + + /* Temperature (Celsius) */ + uint16_t temperature_hotspot; + uint16_t temperature_mem; + uint16_t temperature_vrsoc; + + /* Power (Watts) */ + uint16_t curr_socket_power; + + /* Utilization (%) */ + uint16_t average_gfx_activity; + uint16_t average_umc_activity; // memory controller + uint16_t vcn_activity[NUM_VCN]; + uint16_t jpeg_activity[NUM_JPEG_ENG]; + + /* Energy (15.259uJ (2^-16) units) */ + uint64_t energy_accumulator; + + /* Driver attached timestamp (in ns) */ + uint64_t system_clock_counter; + + /* Throttle status */ + uint32_t throttle_status; + + /* Clock Lock Status. Each bit corresponds to clock instance */ + uint32_t gfxclk_lock_status; + + /* Link width (number of lanes) and speed (in 0.1 GT/s) */ + uint16_t pcie_link_width; + uint16_t pcie_link_speed; + + /* XGMI bus width and bitrate (in Gbps) */ + uint16_t xgmi_link_width; + uint16_t xgmi_link_speed; + + /* Utilization Accumulated (%) */ + uint32_t gfx_activity_acc; + uint32_t mem_activity_acc; + + /*PCIE accumulated bandwidth (GB/sec) */ + uint64_t pcie_bandwidth_acc; + + /*PCIE instantaneous bandwidth (GB/sec) */ + uint64_t pcie_bandwidth_inst; + + /* PCIE L0 to recovery state transition accumulated count */ + uint64_t pcie_l0_to_recov_count_acc; + + /* PCIE replay accumulated count */ + uint64_t pcie_replay_count_acc; + + /* PCIE replay rollover accumulated count */ + uint64_t pcie_replay_rover_count_acc; + + /* PCIE NAK sent accumulated count */ + uint32_t pcie_nak_sent_count_acc; + + /* PCIE NAK received accumulated count */ + uint32_t pcie_nak_rcvd_count_acc; + + /* XGMI accumulated data transfer size(KiloBytes) */ + uint64_t xgmi_read_data_acc[NUM_XGMI_LINKS]; + uint64_t xgmi_write_data_acc[NUM_XGMI_LINKS]; + + /* PMFW attached timestamp (10ns resolution) */ + uint64_t firmware_timestamp; + + /* Current clocks (Mhz) */ + uint16_t current_gfxclk[MAX_GFX_CLKS]; + uint16_t current_socclk[MAX_CLKS]; + uint16_t current_vclk0[MAX_CLKS]; + uint16_t current_dclk0[MAX_CLKS]; + uint16_t current_uclk; + + uint16_t padding; +}; + /* * gpu_metrics_v2_0 is not recommended as it's not naturally aligned. * Use gpu_metrics_v2_1 or later instead. diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c index 001a5cf09657..00cd615bbcdc 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c @@ -989,6 +989,9 @@ void smu_cmn_init_soft_gpu_metrics(void *table, uint8_t frev, uint8_t crev) case METRICS_VERSION(1, 4): structure_size = sizeof(struct gpu_metrics_v1_4); break; + case METRICS_VERSION(1, 5): + structure_size = sizeof(struct gpu_metrics_v1_5); + break; case METRICS_VERSION(2, 0): structure_size = sizeof(struct gpu_metrics_v2_0); break; -- cgit v1.2.3 From 25272bcf8476cbe58b7a0318fcfad79d2cd8554d Mon Sep 17 00:00:00 2001 From: Asad Kamal Date: Wed, 20 Dec 2023 17:32:48 +0800 Subject: drm/amd/pm: Use gpu_metrics_v1_5 for SMUv13.0.6 Use gpu_metrics_v1_5 for SMUv13.0.6 to fill gpu metric info Signed-off-by: Asad Kamal Reviewed-by: Lijo Lazar Reviewed-by: Le Ma Signed-off-by: Alex Deucher --- .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 29 ++++++++++++++++++---- 1 file changed, 24 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 96777a365133..4ebc6b421c2c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -347,7 +347,7 @@ static int smu_v13_0_6_tables_init(struct smu_context *smu) return -ENOMEM; smu_table->metrics_time = 0; - smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_4); + smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_5); smu_table->gpu_metrics_table = kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL); if (!smu_table->gpu_metrics_table) { @@ -2074,10 +2074,10 @@ static int smu_v13_0_6_get_current_pcie_link_speed(struct smu_context *smu) static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table) { struct smu_table_context *smu_table = &smu->smu_table; - struct gpu_metrics_v1_4 *gpu_metrics = - (struct gpu_metrics_v1_4 *)smu_table->gpu_metrics_table; + struct gpu_metrics_v1_5 *gpu_metrics = + (struct gpu_metrics_v1_5 *)smu_table->gpu_metrics_table; struct amdgpu_device *adev = smu->adev; - int ret = 0, xcc_id, inst, i; + int ret = 0, xcc_id, inst, i, j; MetricsTableX_t *metrics_x; MetricsTableA_t *metrics_a; u16 link_width_level; @@ -2091,7 +2091,7 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table metrics_a = (MetricsTableA_t *)metrics_x; - smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 4); + smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 5); gpu_metrics->temperature_hotspot = SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketTemperature)); @@ -2158,6 +2158,10 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table metrics_x->PCIenReplayAAcc; gpu_metrics->pcie_replay_rover_count_acc = metrics_x->PCIenReplayARolloverCountAcc; + gpu_metrics->pcie_nak_sent_count_acc = + metrics_x->PCIeNAKSentCountAcc; + gpu_metrics->pcie_nak_rcvd_count_acc = + metrics_x->PCIeNAKReceivedCountAcc; } gpu_metrics->system_clock_counter = ktime_get_boottime_ns(); @@ -2174,6 +2178,21 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table SMUQ10_ROUND(GET_METRIC_FIELD(XgmiWriteDataSizeAcc)[i]); } + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + inst = GET_INST(JPEG, i); + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + gpu_metrics->jpeg_activity[(i * adev->jpeg.num_jpeg_rings) + j] = + SMUQ10_ROUND(GET_METRIC_FIELD(JpegBusy) + [(inst * adev->jpeg.num_jpeg_rings) + j]); + } + } + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + inst = GET_INST(VCN, i); + gpu_metrics->vcn_activity[i] = + SMUQ10_ROUND(GET_METRIC_FIELD(VcnBusy)[inst]); + } + gpu_metrics->xgmi_link_width = SMUQ10_ROUND(GET_METRIC_FIELD(XgmiWidth)); gpu_metrics->xgmi_link_speed = SMUQ10_ROUND(GET_METRIC_FIELD(XgmiBitrate)); -- cgit v1.2.3 From abaf0666a65b8bbf7311571cd2b32b076fb8e1f9 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Tue, 19 Dec 2023 18:54:18 +0530 Subject: drm/amdgpu: Cleanup indenting in amdgpu_connector_dvi_detect() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c:1106 amdgpu_connector_dvi_detect() warn: inconsistent indenting Fixes: 8a1de314d189 ("drm/amdgpu: Refactor 'amdgpu_connector_dvi_detect' in amdgpu_connectors.c") Cc: Christian König Cc: Alex Deucher Cc: "Pan, Xinhui" Cc: Rodrigo Siqueira Cc: Aurabindo Pillai Signed-off-by: Srinivasan Shanmugam Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index 96f63fd39b9e..9caba10315a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -1103,7 +1103,7 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force) * DDC line. The latter is more complex because with DVI<->HDMI adapters * you don't really know what's connected to which port as both are digital. */ - amdgpu_connector_shared_ddc(&ret, connector, amdgpu_connector); + amdgpu_connector_shared_ddc(&ret, connector, amdgpu_connector); } } -- cgit v1.2.3 From 091411be7ae899ce23072acf5a83b0b43e9024e1 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Tue, 19 Dec 2023 19:26:22 +0530 Subject: drm/amdgpu: Use kzalloc instead of kmalloc+__GFP_ZERO in amdgpu_ras.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the below smatch warnings: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c:2543 amdgpu_ras_recovery_init() warn: Please consider using kzalloc instead of kmalloc drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c:2830 amdgpu_ras_init() warn: Please consider using kzalloc instead of kmalloc Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index bad62141f708..e541e6925918 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2540,7 +2540,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) return 0; data = &con->eh_data; - *data = kmalloc(sizeof(**data), GFP_KERNEL | __GFP_ZERO); + *data = kzalloc(sizeof(**data), GFP_KERNEL); if (!*data) { ret = -ENOMEM; goto out; @@ -2827,10 +2827,10 @@ int amdgpu_ras_init(struct amdgpu_device *adev) if (con) return 0; - con = kmalloc(sizeof(struct amdgpu_ras) + + con = kzalloc(sizeof(*con) + sizeof(struct ras_manager) * AMDGPU_RAS_BLOCK_COUNT + sizeof(struct ras_manager) * AMDGPU_RAS_MCA_BLOCK_COUNT, - GFP_KERNEL|__GFP_ZERO); + GFP_KERNEL); if (!con) return -ENOMEM; -- cgit v1.2.3 From b57e3ca1fb192962f5b062c2e13e1bab1936292c Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Wed, 20 Dec 2023 19:36:20 +0530 Subject: drm/amdgpu: Use kvcalloc instead of kvmalloc_array in amdgpu_cs_parser_bos() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kvmalloc_array + __GFP_ZERO is the same with kvcalloc. Fixes the below: drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c:873 amdgpu_cs_parser_bos() warn: Please consider using kvcalloc instead of kvmalloc_array Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index e50be6500030..83c7fc09218c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -870,9 +870,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, struct amdgpu_bo *bo = e->bo; int i; - e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, - sizeof(struct page *), - GFP_KERNEL | __GFP_ZERO); + e->user_pages = kvcalloc(bo->tbo.ttm->num_pages, + sizeof(struct page *), + GFP_KERNEL); if (!e->user_pages) { DRM_ERROR("kvmalloc_array failure\n"); r = -ENOMEM; -- cgit v1.2.3 From 4d3ed0befdf4852cec2f203ceac440aa70a0e7f5 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Tue, 19 Dec 2023 12:20:36 +0530 Subject: drm/amd/display: Address function parameter 'context' not described in 'dc_state_rem_all_planes_for_stream' & 'populate_subvp_cmd_drr_info' Fixes the following gcc with W=1: drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_state.c:524: warning: Function parameter or member 'state' not described in 'dc_state_rem_all_planes_for_stream' drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_state.c:524: warning: Excess function parameter 'context' description in 'dc_state_rem_all_planes_for_stream' drivers/gpu/drm/amd/amdgpu/../display/dc/dc_dmub_srv.c:540: warning: Function parameter or member 'context' not described in 'populate_subvp_cmd_drr_info' Suggested-by: Aurabindo Pillai Cc: Dillon Varone Cc: Jun Lei Cc: Hamza Mahfooz Cc: Aurabindo Pillai Cc: Rodrigo Siqueira Cc: Alex Deucher Cc: Srinath Rao Signed-off-by: Srinivasan Shanmugam Reviewed-by: Aurabindo Pillai Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_state.c | 2 +- drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_state.c b/drivers/gpu/drm/amd/display/dc/core/dc_state.c index dd52cab7ecdf..460a8010c79f 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_state.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_state.c @@ -511,7 +511,7 @@ bool dc_state_remove_plane( * * @dc: Current dc state. * @stream: Target stream, which we want to remove the attached plans. - * @context: New context. + * @state: context from which the planes are to be removed. * * Return: * Return true if DC was able to remove all planes from the target diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index a65c485b0b49..2b79a0e5638e 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -519,10 +519,11 @@ void dc_dmub_srv_get_visual_confirm_color_cmd(struct dc *dc, struct pipe_ctx *pi /** * populate_subvp_cmd_drr_info - Helper to populate DRR pipe info for the DMCUB subvp command * - * @dc: [in] current dc state + * @dc: [in] pointer to dc object * @subvp_pipe: [in] pipe_ctx for the SubVP pipe * @vblank_pipe: [in] pipe_ctx for the DRR pipe * @pipe_data: [in] Pipe data which stores the VBLANK/DRR info + * @context: [in] DC state for access to phantom stream * * Populate the DMCUB SubVP command with DRR pipe info. All the information * required for calculating the SubVP + DRR microschedule is populated here. -- cgit v1.2.3 From d95ad8fa96e14b7ce1ab740c53f10d7aff9f6660 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Tue, 19 Dec 2023 12:54:46 +0530 Subject: drm/amd/display: Adjust kdoc for 'dcn35_hw_block_power_down' & 'dcn35_hw_block_power_up' Fixes the following gcc with W=1: drivers/gpu/drm/amd/amdgpu/../display/dc/hwss/dcn35/dcn35_hwseq.c:1124: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst Cc: Charlene Liu Cc: Muhammad Ahmed Cc: Hamza Mahfooz Cc: Rodrigo Siqueira Cc: Aurabindo Pillai Cc: Alex Deucher Cc: Srinath Rao Signed-off-by: Srinivasan Shanmugam Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c | 68 +++++++++++++--------- 1 file changed, 41 insertions(+), 27 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index c40f4a06abdb..9c806385ecbd 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -1123,21 +1123,28 @@ void dcn35_calc_blocks_to_ungate(struct dc *dc, struct dc_state *context, update_state->pg_res_update[PG_HPO] = true; } + /** - * power down sequence - * ONO Region 3, DCPG 25: hpo - SKIPPED - * ONO Region 4, DCPG 0: dchubp0, dpp0 - * ONO Region 6, DCPG 1: dchubp1, dpp1 - * ONO Region 8, DCPG 2: dchubp2, dpp2 - * ONO Region 10, DCPG 3: dchubp3, dpp3 - * ONO Region 1, DCPG 23: dchubbub dchvm dchubbubmem - SKIPPED. PMFW will pwr dwn at IPS2 entry - * ONO Region 5, DCPG 16: dsc0 - * ONO Region 7, DCPG 17: dsc1 - * ONO Region 9, DCPG 18: dsc2 - * ONO Region 11, DCPG 19: dsc3 - * ONO Region 2, DCPG 24: mpc opp optc dwb - * ONO Region 0, DCPG 22: dccg dio dcio - SKIPPED. will be pwr dwn after lono timer is armed -*/ + * dcn35_hw_block_power_down() - power down sequence + * + * The following sequence describes the ON-OFF (ONO) for power down: + * + * ONO Region 3, DCPG 25: hpo - SKIPPED + * ONO Region 4, DCPG 0: dchubp0, dpp0 + * ONO Region 6, DCPG 1: dchubp1, dpp1 + * ONO Region 8, DCPG 2: dchubp2, dpp2 + * ONO Region 10, DCPG 3: dchubp3, dpp3 + * ONO Region 1, DCPG 23: dchubbub dchvm dchubbubmem - SKIPPED. PMFW will pwr dwn at IPS2 entry + * ONO Region 5, DCPG 16: dsc0 + * ONO Region 7, DCPG 17: dsc1 + * ONO Region 9, DCPG 18: dsc2 + * ONO Region 11, DCPG 19: dsc3 + * ONO Region 2, DCPG 24: mpc opp optc dwb + * ONO Region 0, DCPG 22: dccg dio dcio - SKIPPED. will be pwr dwn after lono timer is armed + * + * @dc: Current DC state + * @update_state: update PG sequence states for HW block + */ void dcn35_hw_block_power_down(struct dc *dc, struct pg_block_update *update_state) { @@ -1175,20 +1182,27 @@ void dcn35_hw_block_power_down(struct dc *dc, //domain22, 23, 25 currently always on. } + /** - * power up sequence - * ONO Region 0, DCPG 22: dccg dio dcio - SKIPPED - * ONO Region 2, DCPG 24: mpc opp optc dwb - * ONO Region 5, DCPG 16: dsc0 - * ONO Region 7, DCPG 17: dsc1 - * ONO Region 9, DCPG 18: dsc2 - * ONO Region 11, DCPG 19: dsc3 - * ONO Region 1, DCPG 23: dchubbub dchvm dchubbubmem - SKIPPED. PMFW will power up at IPS2 exit - * ONO Region 4, DCPG 0: dchubp0, dpp0 - * ONO Region 6, DCPG 1: dchubp1, dpp1 - * ONO Region 8, DCPG 2: dchubp2, dpp2 - * ONO Region 10, DCPG 3: dchubp3, dpp3 - * ONO Region 3, DCPG 25: hpo - SKIPPED + * dcn35_hw_block_power_up() - power up sequence + * + * The following sequence describes the ON-OFF (ONO) for power up: + * + * ONO Region 0, DCPG 22: dccg dio dcio - SKIPPED + * ONO Region 2, DCPG 24: mpc opp optc dwb + * ONO Region 5, DCPG 16: dsc0 + * ONO Region 7, DCPG 17: dsc1 + * ONO Region 9, DCPG 18: dsc2 + * ONO Region 11, DCPG 19: dsc3 + * ONO Region 1, DCPG 23: dchubbub dchvm dchubbubmem - SKIPPED. PMFW will power up at IPS2 exit + * ONO Region 4, DCPG 0: dchubp0, dpp0 + * ONO Region 6, DCPG 1: dchubp1, dpp1 + * ONO Region 8, DCPG 2: dchubp2, dpp2 + * ONO Region 10, DCPG 3: dchubp3, dpp3 + * ONO Region 3, DCPG 25: hpo - SKIPPED + * + * @dc: Current DC state + * @update_state: update PG sequence states for HW block */ void dcn35_hw_block_power_up(struct dc *dc, struct pg_block_update *update_state) -- cgit v1.2.3 From 5ce9a6ad8ec48445ff6c999d064f7931f892bf2b Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Wed, 20 Dec 2023 20:07:54 +0530 Subject: drm/amdgpu: Drop redundant unsigned >=0 comparision 'amdgpu_gfx_rlc_init_microcode()' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit unsigned int "version_minor" is always >= 0 Fixes the below: drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c:534 amdgpu_gfx_rlc_init_microcode() warn: always true condition '(version_minor >= 0) => (0-u16max >= 0)' Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c index 35e0ae9acadc..2c3675d91614 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c @@ -531,13 +531,12 @@ int amdgpu_gfx_rlc_init_microcode(struct amdgpu_device *adev, if (version_major == 2 && version_minor == 1) adev->gfx.rlc.is_rlc_v2_1 = true; - if (version_minor >= 0) { - err = amdgpu_gfx_rlc_init_microcode_v2_0(adev); - if (err) { - dev_err(adev->dev, "fail to init rlc v2_0 microcode\n"); - return err; - } + err = amdgpu_gfx_rlc_init_microcode_v2_0(adev); + if (err) { + dev_err(adev->dev, "fail to init rlc v2_0 microcode\n"); + return err; } + if (version_minor >= 1) amdgpu_gfx_rlc_init_microcode_v2_1(adev); if (version_minor >= 2) -- cgit v1.2.3 From b8d55a90fd55b767c25687747e2b24abd1ef8680 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Tue, 26 Dec 2023 15:32:19 +0530 Subject: drm/amdgpu: Fix possible NULL dereference in amdgpu_ras_query_error_status_helper() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Return invalid error code -EINVAL for invalid block id. Fixes the below: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c:1183 amdgpu_ras_query_error_status_helper() error: we previously assumed 'info' could be null (see line 1176) Suggested-by: Hawking Zhang Cc: Tao Zhou Cc: Hawking Zhang Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index e541e6925918..39399d0f2ce5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1176,6 +1176,9 @@ static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev, enum amdgpu_ras_block blk = info ? info->head.block : AMDGPU_RAS_BLOCK_COUNT; struct amdgpu_ras_block_object *block_obj = NULL; + if (blk == AMDGPU_RAS_BLOCK_COUNT) + return -EINVAL; + if (error_query_mode == AMDGPU_RAS_INVALID_ERROR_QUERY) return -EINVAL; -- cgit v1.2.3 From 217e85f97031791fb48a2d374c7bdcf439365b21 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Thu, 21 Dec 2023 08:10:42 +0530 Subject: drm/amdkfd: Fix type of 'dbg_flags' in 'struct kfd_process' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dbg_flags looks to be defined with incorrect data type; to process multiple debug flag options, and hence defined dbg_flags as u32. Fixes the below: drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_packet_manager_v9.c:117 pm_map_process_aldebaran() warn: maybe use && instead of & Fixes: 0de4ec9a0353 ("drm/amdgpu: prepare map process for multi-process debug devices") Suggested-by: Lijo Lazar Cc: Felix Kuehling Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 45366b4ca976..745024b31340 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -970,7 +970,7 @@ struct kfd_process { struct work_struct debug_event_workarea; /* Tracks debug per-vmid request for debug flags */ - bool dbg_flags; + u32 dbg_flags; atomic_t poison; /* Queues are in paused stated because we are in the process of doing a CRIU checkpoint */ -- cgit v1.2.3 From 9cdef4f720376ef0fb0febce1ed2377c19e531f9 Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Mon, 4 Dec 2023 10:09:33 +0800 Subject: drm/amd/display: pbn_div need be updated for hotplug event link_rate sometime will be changed when DP MST connector hotplug, so pbn_div also need be updated; otherwise, it will mismatch with link_rate, causes no output in external monitor. This is a backport to 6.7 and older. Cc: stable@vger.kernel.org Tested-by: Daniel Wheeler Reviewed-by: Jerry Zuo Acked-by: Rodrigo Siqueira Signed-off-by: Wade Wang Signed-off-by: Wayne Lin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index c8c00c2a5224..19fabaec9408 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -6914,8 +6914,7 @@ static int dm_encoder_helper_atomic_check(struct drm_encoder *encoder, if (IS_ERR(mst_state)) return PTR_ERR(mst_state); - if (!mst_state->pbn_div) - mst_state->pbn_div = dm_mst_get_pbn_divider(aconnector->mst_root->dc_link); + mst_state->pbn_div = dm_mst_get_pbn_divider(aconnector->mst_root->dc_link); if (!state->duplicated) { int max_bpc = conn_state->max_requested_bpc; -- cgit v1.2.3 From 0cc9e952e6efa1f6f2597a305ea20d4b6ecc7573 Mon Sep 17 00:00:00 2001 From: Asad Kamal Date: Wed, 20 Dec 2023 16:07:22 +0800 Subject: drm/amd/pm: Use separate metric table for APU Use separate metric table for APU and Non APU systems for smu_v_13_0_6 to get metric data Signed-off-by: Asad Kamal Reviewed-by: Lijo Lazar Reviewed-by: Le Ma Signed-off-by: Alex Deucher --- .../amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h | 90 ++++++++++++++- .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 124 +++++++++++---------- 2 files changed, 156 insertions(+), 58 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h index fef2d290f3f2..8f166aa3043c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h @@ -219,7 +219,95 @@ typedef struct __attribute__((packed, aligned(4))) { uint32_t PCIenReplayARolloverCountAcc; // The Pcie counter itself is accumulated uint32_t PCIeNAKSentCountAcc; // The Pcie counter itself is accumulated uint32_t PCIeNAKReceivedCountAcc; // The Pcie counter itself is accumulated -} MetricsTable_t; +} MetricsTableX_t; + +typedef struct __attribute__((packed, aligned(4))) { + uint32_t AccumulationCounter; + + //TEMPERATURE + uint32_t MaxSocketTemperature; + uint32_t MaxVrTemperature; + uint32_t MaxHbmTemperature; + uint64_t MaxSocketTemperatureAcc; + uint64_t MaxVrTemperatureAcc; + uint64_t MaxHbmTemperatureAcc; + + //POWER + uint32_t SocketPowerLimit; + uint32_t MaxSocketPowerLimit; + uint32_t SocketPower; + + //ENERGY + uint64_t Timestamp; + uint64_t SocketEnergyAcc; + uint64_t CcdEnergyAcc; + uint64_t XcdEnergyAcc; + uint64_t AidEnergyAcc; + uint64_t HbmEnergyAcc; + + //FREQUENCY + uint32_t CclkFrequencyLimit; + uint32_t GfxclkFrequencyLimit; + uint32_t FclkFrequency; + uint32_t UclkFrequency; + uint32_t SocclkFrequency[4]; + uint32_t VclkFrequency[4]; + uint32_t DclkFrequency[4]; + uint32_t LclkFrequency[4]; + uint64_t GfxclkFrequencyAcc[8]; + uint64_t CclkFrequencyAcc[96]; + + //FREQUENCY RANGE + uint32_t MaxCclkFrequency; + uint32_t MinCclkFrequency; + uint32_t MaxGfxclkFrequency; + uint32_t MinGfxclkFrequency; + uint32_t FclkFrequencyTable[4]; + uint32_t UclkFrequencyTable[4]; + uint32_t SocclkFrequencyTable[4]; + uint32_t VclkFrequencyTable[4]; + uint32_t DclkFrequencyTable[4]; + uint32_t LclkFrequencyTable[4]; + uint32_t MaxLclkDpmRange; + uint32_t MinLclkDpmRange; + + //XGMI + uint32_t XgmiWidth; + uint32_t XgmiBitrate; + uint64_t XgmiReadBandwidthAcc[8]; + uint64_t XgmiWriteBandwidthAcc[8]; + + //ACTIVITY + uint32_t SocketC0Residency; + uint32_t SocketGfxBusy; + uint32_t DramBandwidthUtilization; + uint64_t SocketC0ResidencyAcc; + uint64_t SocketGfxBusyAcc; + uint64_t DramBandwidthAcc; + uint32_t MaxDramBandwidth; + uint64_t DramBandwidthUtilizationAcc; + uint64_t PcieBandwidthAcc[4]; + + //THROTTLERS + uint32_t ProchotResidencyAcc; + uint32_t PptResidencyAcc; + uint32_t SocketThmResidencyAcc; + uint32_t VrThmResidencyAcc; + uint32_t HbmThmResidencyAcc; + uint32_t GfxLockXCDMak; + + // New Items at end to maintain driver compatibility + uint32_t GfxclkFrequency[8]; + + //PSNs + uint64_t PublicSerialNumber_AID[4]; + uint64_t PublicSerialNumber_XCD[8]; + uint64_t PublicSerialNumber_CCD[12]; + + //XGMI Data tranfser size + uint64_t XgmiReadDataSizeAcc[8];//in KByte + uint64_t XgmiWriteDataSizeAcc[8];//in KByte +} MetricsTableA_t; #define SMU_VF_METRICS_TABLE_VERSION 0x3 diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 900a2d9e6d85..17bc5ffbfd97 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -245,6 +245,8 @@ struct PPTable_t { #define SMUQ10_TO_UINT(x) ((x) >> 10) #define SMUQ10_FRAC(x) ((x) & 0x3ff) #define SMUQ10_ROUND(x) ((SMUQ10_TO_UINT(x)) + ((SMUQ10_FRAC(x)) >= 0x200)) +#define GET_METRIC_FIELD(field) ((adev->flags & AMD_IS_APU) ?\ + (metrics_a->field) : (metrics_x->field)) struct smu_v13_0_6_dpm_map { enum smu_clk_type clk_type; @@ -327,7 +329,8 @@ static int smu_v13_0_6_tables_init(struct smu_context *smu) SMU_TABLE_INIT(tables, SMU_TABLE_PMSTATUSLOG, SMU13_TOOL_SIZE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); - SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, sizeof(MetricsTable_t), + SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, + max(sizeof(MetricsTableX_t), sizeof(MetricsTableA_t)), PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT); @@ -335,7 +338,8 @@ static int smu_v13_0_6_tables_init(struct smu_context *smu) PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT); - smu_table->metrics_table = kzalloc(sizeof(MetricsTable_t), GFP_KERNEL); + smu_table->metrics_table = kzalloc(max(sizeof(MetricsTableX_t), + sizeof(MetricsTableA_t)), GFP_KERNEL); if (!smu_table->metrics_table) return -ENOMEM; smu_table->metrics_time = 0; @@ -431,9 +435,11 @@ static int smu_v13_0_6_get_metrics_table(struct smu_context *smu, static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu) { struct smu_table_context *smu_table = &smu->smu_table; - MetricsTable_t *metrics = (MetricsTable_t *)smu_table->metrics_table; + MetricsTableX_t *metrics_x = (MetricsTableX_t *)smu_table->metrics_table; + MetricsTableA_t *metrics_a = (MetricsTableA_t *)smu_table->metrics_table; struct PPTable_t *pptable = (struct PPTable_t *)smu_table->driver_pptable; + struct amdgpu_device *adev = smu->adev; int ret, i, retry = 100; /* Store one-time values in driver PPTable */ @@ -444,7 +450,7 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu) return ret; /* Ensure that metrics have been updated */ - if (metrics->AccumulationCounter) + if (GET_METRIC_FIELD(AccumulationCounter)) break; usleep_range(1000, 1100); @@ -454,29 +460,29 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu) return -ETIME; pptable->MaxSocketPowerLimit = - SMUQ10_ROUND(metrics->MaxSocketPowerLimit); + SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketPowerLimit)); pptable->MaxGfxclkFrequency = - SMUQ10_ROUND(metrics->MaxGfxclkFrequency); + SMUQ10_ROUND(GET_METRIC_FIELD(MaxGfxclkFrequency)); pptable->MinGfxclkFrequency = - SMUQ10_ROUND(metrics->MinGfxclkFrequency); + SMUQ10_ROUND(GET_METRIC_FIELD(MinGfxclkFrequency)); for (i = 0; i < 4; ++i) { pptable->FclkFrequencyTable[i] = - SMUQ10_ROUND(metrics->FclkFrequencyTable[i]); + SMUQ10_ROUND(GET_METRIC_FIELD(FclkFrequencyTable)[i]); pptable->UclkFrequencyTable[i] = - SMUQ10_ROUND(metrics->UclkFrequencyTable[i]); + SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequencyTable)[i]); pptable->SocclkFrequencyTable[i] = SMUQ10_ROUND( - metrics->SocclkFrequencyTable[i]); + GET_METRIC_FIELD(SocclkFrequencyTable)[i]); pptable->VclkFrequencyTable[i] = - SMUQ10_ROUND(metrics->VclkFrequencyTable[i]); + SMUQ10_ROUND(GET_METRIC_FIELD(VclkFrequencyTable)[i]); pptable->DclkFrequencyTable[i] = - SMUQ10_ROUND(metrics->DclkFrequencyTable[i]); + SMUQ10_ROUND(GET_METRIC_FIELD(DclkFrequencyTable)[i]); pptable->LclkFrequencyTable[i] = - SMUQ10_ROUND(metrics->LclkFrequencyTable[i]); + SMUQ10_ROUND(GET_METRIC_FIELD(LclkFrequencyTable)[i]); } /* use AID0 serial number by default */ - pptable->PublicSerialNumber_AID = metrics->PublicSerialNumber_AID[0]; + pptable->PublicSerialNumber_AID = GET_METRIC_FIELD(PublicSerialNumber_AID)[0]; pptable->Init = true; } @@ -778,7 +784,8 @@ static int smu_v13_0_6_get_smu_metrics_data(struct smu_context *smu, uint32_t *value) { struct smu_table_context *smu_table = &smu->smu_table; - MetricsTable_t *metrics = (MetricsTable_t *)smu_table->metrics_table; + MetricsTableX_t *metrics_x = (MetricsTableX_t *)smu_table->metrics_table; + MetricsTableA_t *metrics_a = (MetricsTableA_t *)smu_table->metrics_table; struct amdgpu_device *adev = smu->adev; int ret = 0; int xcc_id; @@ -793,50 +800,50 @@ static int smu_v13_0_6_get_smu_metrics_data(struct smu_context *smu, case METRICS_AVERAGE_GFXCLK: if (smu->smc_fw_version >= 0x552F00) { xcc_id = GET_INST(GC, 0); - *value = SMUQ10_ROUND(metrics->GfxclkFrequency[xcc_id]); + *value = SMUQ10_ROUND(GET_METRIC_FIELD(GfxclkFrequency)[xcc_id]); } else { *value = 0; } break; case METRICS_CURR_SOCCLK: case METRICS_AVERAGE_SOCCLK: - *value = SMUQ10_ROUND(metrics->SocclkFrequency[0]); + *value = SMUQ10_ROUND(GET_METRIC_FIELD(SocclkFrequency)[0]); break; case METRICS_CURR_UCLK: case METRICS_AVERAGE_UCLK: - *value = SMUQ10_ROUND(metrics->UclkFrequency); + *value = SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequency)); break; case METRICS_CURR_VCLK: - *value = SMUQ10_ROUND(metrics->VclkFrequency[0]); + *value = SMUQ10_ROUND(GET_METRIC_FIELD(VclkFrequency)[0]); break; case METRICS_CURR_DCLK: - *value = SMUQ10_ROUND(metrics->DclkFrequency[0]); + *value = SMUQ10_ROUND(GET_METRIC_FIELD(DclkFrequency)[0]); break; case METRICS_CURR_FCLK: - *value = SMUQ10_ROUND(metrics->FclkFrequency); + *value = SMUQ10_ROUND(GET_METRIC_FIELD(FclkFrequency)); break; case METRICS_AVERAGE_GFXACTIVITY: - *value = SMUQ10_ROUND(metrics->SocketGfxBusy); + *value = SMUQ10_ROUND(GET_METRIC_FIELD(SocketGfxBusy)); break; case METRICS_AVERAGE_MEMACTIVITY: - *value = SMUQ10_ROUND(metrics->DramBandwidthUtilization); + *value = SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilization)); break; case METRICS_CURR_SOCKETPOWER: - *value = SMUQ10_ROUND(metrics->SocketPower) << 8; + *value = SMUQ10_ROUND(GET_METRIC_FIELD(SocketPower)) << 8; break; case METRICS_TEMPERATURE_HOTSPOT: - *value = SMUQ10_ROUND(metrics->MaxSocketTemperature) * + *value = SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketTemperature)) * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; break; case METRICS_TEMPERATURE_MEM: - *value = SMUQ10_ROUND(metrics->MaxHbmTemperature) * + *value = SMUQ10_ROUND(GET_METRIC_FIELD(MaxHbmTemperature)) * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; break; /* This is the max of all VRs and not just SOC VR. * No need to define another data type for the same. */ case METRICS_TEMPERATURE_VRSOC: - *value = SMUQ10_ROUND(metrics->MaxVrTemperature) * + *value = SMUQ10_ROUND(GET_METRIC_FIELD(MaxVrTemperature)) * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; break; default: @@ -2026,63 +2033,66 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table (struct gpu_metrics_v1_4 *)smu_table->gpu_metrics_table; struct amdgpu_device *adev = smu->adev; int ret = 0, xcc_id, inst, i; - MetricsTable_t *metrics; + MetricsTableX_t *metrics_x; + MetricsTableA_t *metrics_a; u16 link_width_level; - metrics = kzalloc(sizeof(MetricsTable_t), GFP_KERNEL); - ret = smu_v13_0_6_get_metrics_table(smu, metrics, true); + metrics_x = kzalloc(max(sizeof(MetricsTableX_t), sizeof(MetricsTableA_t)), GFP_KERNEL); + ret = smu_v13_0_6_get_metrics_table(smu, metrics_x, true); if (ret) { - kfree(metrics); + kfree(metrics_x); return ret; } + metrics_a = (MetricsTableA_t *)metrics_x; + smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 4); gpu_metrics->temperature_hotspot = - SMUQ10_ROUND(metrics->MaxSocketTemperature); + SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketTemperature)); /* Individual HBM stack temperature is not reported */ gpu_metrics->temperature_mem = - SMUQ10_ROUND(metrics->MaxHbmTemperature); + SMUQ10_ROUND(GET_METRIC_FIELD(MaxHbmTemperature)); /* Reports max temperature of all voltage rails */ gpu_metrics->temperature_vrsoc = - SMUQ10_ROUND(metrics->MaxVrTemperature); + SMUQ10_ROUND(GET_METRIC_FIELD(MaxVrTemperature)); gpu_metrics->average_gfx_activity = - SMUQ10_ROUND(metrics->SocketGfxBusy); + SMUQ10_ROUND(GET_METRIC_FIELD(SocketGfxBusy)); gpu_metrics->average_umc_activity = - SMUQ10_ROUND(metrics->DramBandwidthUtilization); + SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilization)); gpu_metrics->curr_socket_power = - SMUQ10_ROUND(metrics->SocketPower); + SMUQ10_ROUND(GET_METRIC_FIELD(SocketPower)); /* Energy counter reported in 15.259uJ (2^-16) units */ - gpu_metrics->energy_accumulator = metrics->SocketEnergyAcc; + gpu_metrics->energy_accumulator = GET_METRIC_FIELD(SocketEnergyAcc); for (i = 0; i < MAX_GFX_CLKS; i++) { xcc_id = GET_INST(GC, i); if (xcc_id >= 0) gpu_metrics->current_gfxclk[i] = - SMUQ10_ROUND(metrics->GfxclkFrequency[xcc_id]); + SMUQ10_ROUND(GET_METRIC_FIELD(GfxclkFrequency)[xcc_id]); if (i < MAX_CLKS) { gpu_metrics->current_socclk[i] = - SMUQ10_ROUND(metrics->SocclkFrequency[i]); + SMUQ10_ROUND(GET_METRIC_FIELD(SocclkFrequency)[i]); inst = GET_INST(VCN, i); if (inst >= 0) { gpu_metrics->current_vclk0[i] = - SMUQ10_ROUND(metrics->VclkFrequency[inst]); + SMUQ10_ROUND(GET_METRIC_FIELD(VclkFrequency)[inst]); gpu_metrics->current_dclk0[i] = - SMUQ10_ROUND(metrics->DclkFrequency[inst]); + SMUQ10_ROUND(GET_METRIC_FIELD(DclkFrequency)[inst]); } } } - gpu_metrics->current_uclk = SMUQ10_ROUND(metrics->UclkFrequency); + gpu_metrics->current_uclk = SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequency)); /* Throttle status is not reported through metrics now */ gpu_metrics->throttle_status = 0; /* Clock Lock Status. Each bit corresponds to each GFXCLK instance */ - gpu_metrics->gfxclk_lock_status = metrics->GfxLockXCDMak >> GET_INST(GC, 0); + gpu_metrics->gfxclk_lock_status = GET_METRIC_FIELD(GfxLockXCDMak) >> GET_INST(GC, 0); if (!(adev->flags & AMD_IS_APU)) { link_width_level = smu_v13_0_6_get_current_pcie_link_width_level(smu); @@ -2094,38 +2104,38 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table gpu_metrics->pcie_link_speed = smu_v13_0_6_get_current_pcie_link_speed(smu); gpu_metrics->pcie_bandwidth_acc = - SMUQ10_ROUND(metrics->PcieBandwidthAcc[0]); + SMUQ10_ROUND(metrics_x->PcieBandwidthAcc[0]); gpu_metrics->pcie_bandwidth_inst = - SMUQ10_ROUND(metrics->PcieBandwidth[0]); + SMUQ10_ROUND(metrics_x->PcieBandwidth[0]); gpu_metrics->pcie_l0_to_recov_count_acc = - metrics->PCIeL0ToRecoveryCountAcc; + metrics_x->PCIeL0ToRecoveryCountAcc; gpu_metrics->pcie_replay_count_acc = - metrics->PCIenReplayAAcc; + metrics_x->PCIenReplayAAcc; gpu_metrics->pcie_replay_rover_count_acc = - metrics->PCIenReplayARolloverCountAcc; + metrics_x->PCIenReplayARolloverCountAcc; } gpu_metrics->system_clock_counter = ktime_get_boottime_ns(); gpu_metrics->gfx_activity_acc = - SMUQ10_ROUND(metrics->SocketGfxBusyAcc); + SMUQ10_ROUND(GET_METRIC_FIELD(SocketGfxBusyAcc)); gpu_metrics->mem_activity_acc = - SMUQ10_ROUND(metrics->DramBandwidthUtilizationAcc); + SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilizationAcc)); for (i = 0; i < NUM_XGMI_LINKS; i++) { gpu_metrics->xgmi_read_data_acc[i] = - SMUQ10_ROUND(metrics->XgmiReadDataSizeAcc[i]); + SMUQ10_ROUND(GET_METRIC_FIELD(XgmiReadDataSizeAcc)[i]); gpu_metrics->xgmi_write_data_acc[i] = - SMUQ10_ROUND(metrics->XgmiWriteDataSizeAcc[i]); + SMUQ10_ROUND(GET_METRIC_FIELD(XgmiWriteDataSizeAcc)[i]); } - gpu_metrics->xgmi_link_width = SMUQ10_ROUND(metrics->XgmiWidth); - gpu_metrics->xgmi_link_speed = SMUQ10_ROUND(metrics->XgmiBitrate); + gpu_metrics->xgmi_link_width = SMUQ10_ROUND(GET_METRIC_FIELD(XgmiWidth)); + gpu_metrics->xgmi_link_speed = SMUQ10_ROUND(GET_METRIC_FIELD(XgmiBitrate)); - gpu_metrics->firmware_timestamp = metrics->Timestamp; + gpu_metrics->firmware_timestamp = GET_METRIC_FIELD(Timestamp); *table = (void *)gpu_metrics; - kfree(metrics); + kfree(metrics_x); return sizeof(*gpu_metrics); } -- cgit v1.2.3 From ff5ab42b7926682080d87993a850a4d4b8d379e0 Mon Sep 17 00:00:00 2001 From: Asad Kamal Date: Wed, 20 Dec 2023 17:09:21 +0800 Subject: drm/amd/pm: Update metric table for jpeg/vcn data Update pmfw metric table to include vcn & jpeg activity for smu_v_13_0_6 Signed-off-by: Asad Kamal Reviewed-by: Lijo Lazar Reviewed-by: Le Ma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h index 8f166aa3043c..7b812b9994d7 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h @@ -123,7 +123,7 @@ typedef enum { VOLTAGE_GUARDBAND_COUNT } GFX_GUARDBAND_e; -#define SMU_METRICS_TABLE_VERSION 0x9 +#define SMU_METRICS_TABLE_VERSION 0xB typedef struct __attribute__((packed, aligned(4))) { uint32_t AccumulationCounter; @@ -219,6 +219,10 @@ typedef struct __attribute__((packed, aligned(4))) { uint32_t PCIenReplayARolloverCountAcc; // The Pcie counter itself is accumulated uint32_t PCIeNAKSentCountAcc; // The Pcie counter itself is accumulated uint32_t PCIeNAKReceivedCountAcc; // The Pcie counter itself is accumulated + + // VCN/JPEG ACTIVITY + uint32_t VcnBusy[4]; + uint32_t JpegBusy[32]; } MetricsTableX_t; typedef struct __attribute__((packed, aligned(4))) { @@ -307,6 +311,10 @@ typedef struct __attribute__((packed, aligned(4))) { //XGMI Data tranfser size uint64_t XgmiReadDataSizeAcc[8];//in KByte uint64_t XgmiWriteDataSizeAcc[8];//in KByte + + // VCN/JPEG ACTIVITY + uint32_t VcnBusy[4]; + uint32_t JpegBusy[32]; } MetricsTableA_t; #define SMU_VF_METRICS_TABLE_VERSION 0x3 -- cgit v1.2.3 From 7e725c20fea8914ef1829da777f517ce1a93d388 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 20 Dec 2023 12:33:45 -0500 Subject: drm/amd/display: add nv12 bounding box This was included in gpu_info firmware, move it into the driver for consistency with other nv1x parts. Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2318 Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- .../gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c | 110 ++++++++++++++++++++- 1 file changed, 109 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c index ec77b2b41ba3..d2271e308fa0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c @@ -440,7 +440,115 @@ struct _vcs_dpi_soc_bounding_box_st dcn2_0_nv14_soc = { .use_urgent_burst_bw = 0 }; -struct _vcs_dpi_soc_bounding_box_st dcn2_0_nv12_soc = { 0 }; +struct _vcs_dpi_soc_bounding_box_st dcn2_0_nv12_soc = { + .clock_limits = { + { + .state = 0, + .dcfclk_mhz = 560.0, + .fabricclk_mhz = 560.0, + .dispclk_mhz = 513.0, + .dppclk_mhz = 513.0, + .phyclk_mhz = 540.0, + .socclk_mhz = 560.0, + .dscclk_mhz = 171.0, + .dram_speed_mts = 1069.0, + }, + { + .state = 1, + .dcfclk_mhz = 694.0, + .fabricclk_mhz = 694.0, + .dispclk_mhz = 642.0, + .dppclk_mhz = 642.0, + .phyclk_mhz = 600.0, + .socclk_mhz = 694.0, + .dscclk_mhz = 214.0, + .dram_speed_mts = 1324.0, + }, + { + .state = 2, + .dcfclk_mhz = 875.0, + .fabricclk_mhz = 875.0, + .dispclk_mhz = 734.0, + .dppclk_mhz = 734.0, + .phyclk_mhz = 810.0, + .socclk_mhz = 875.0, + .dscclk_mhz = 245.0, + .dram_speed_mts = 1670.0, + }, + { + .state = 3, + .dcfclk_mhz = 1000.0, + .fabricclk_mhz = 1000.0, + .dispclk_mhz = 1100.0, + .dppclk_mhz = 1100.0, + .phyclk_mhz = 810.0, + .socclk_mhz = 1000.0, + .dscclk_mhz = 367.0, + .dram_speed_mts = 2000.0, + }, + { + .state = 4, + .dcfclk_mhz = 1200.0, + .fabricclk_mhz = 1200.0, + .dispclk_mhz = 1284.0, + .dppclk_mhz = 1284.0, + .phyclk_mhz = 810.0, + .socclk_mhz = 1200.0, + .dscclk_mhz = 428.0, + .dram_speed_mts = 2000.0, + }, + { + .state = 5, + .dcfclk_mhz = 1200.0, + .fabricclk_mhz = 1200.0, + .dispclk_mhz = 1284.0, + .dppclk_mhz = 1284.0, + .phyclk_mhz = 810.0, + .socclk_mhz = 1200.0, + .dscclk_mhz = 428.0, + .dram_speed_mts = 2000.0, + }, + }, + + .num_states = 5, + .sr_exit_time_us = 1.9, + .sr_enter_plus_exit_time_us = 4.4, + .urgent_latency_us = 3.0, + .urgent_latency_pixel_data_only_us = 4.0, + .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, + .urgent_latency_vm_data_only_us = 4.0, + .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, + .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, + .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 40.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 40.0, + .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0, + .max_avg_sdp_bw_use_normal_percent = 40.0, + .max_avg_dram_bw_use_normal_percent = 40.0, + .writeback_latency_us = 12.0, + .ideal_dram_bw_after_urgent_percent = 40.0, + .max_request_size_bytes = 256, + .dram_channel_width_bytes = 16, + .fabric_datapath_to_dcn_data_return_bytes = 64, + .dcn_downspread_percent = 0.5, + .downspread_percent = 0.5, + .dram_page_open_time_ns = 50.0, + .dram_rw_turnaround_time_ns = 17.5, + .dram_return_buffer_per_channel_bytes = 8192, + .round_trip_ping_latency_dcfclk_cycles = 131, + .urgent_out_of_order_return_per_channel_bytes = 4096, + .channel_interleave_bytes = 256, + .num_banks = 8, + .num_chans = 16, + .vmm_page_size_bytes = 4096, + .dram_clock_change_latency_us = 45.0, + .writeback_dram_clock_change_latency_us = 23.0, + .return_bus_width_bytes = 64, + .dispclk_dppclk_vco_speed_mhz = 3850, + .xfc_bus_transport_time_us = 20, + .xfc_xbuf_latency_tolerance_us = 50, + .use_urgent_burst_bw = 0, +}; struct _vcs_dpi_ip_params_st dcn2_1_ip = { .odm_capable = 1, -- cgit v1.2.3 From 21f6137c64c65d6808c4a81006956197ca203383 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 20 Dec 2023 12:36:08 -0500 Subject: drm/amdgpu: skip gpu_info fw loading on navi12 It's no longer required. Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2318 Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 8dee52ce26d0..93cf73d6fa11 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2188,15 +2188,8 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) adev->firmware.gpu_info_fw = NULL; - if (adev->mman.discovery_bin) { - /* - * FIXME: The bounding box is still needed by Navi12, so - * temporarily read it from gpu_info firmware. Should be dropped - * when DAL no longer needs it. - */ - if (adev->asic_type != CHIP_NAVI12) - return 0; - } + if (adev->mman.discovery_bin) + return 0; switch (adev->asic_type) { default: -- cgit v1.2.3 From 202260f64519e591b5cd99626e441b6559f571a3 Mon Sep 17 00:00:00 2001 From: Joshua Ashton Date: Mon, 1 Jan 2024 18:28:22 +0000 Subject: drm/amd/display: Fix sending VSC (+ colorimetry) packets for DP/eDP displays without PSR The check for sending the vsc infopacket to the display was gated behind PSR (Panel Self Refresh) being enabled. The vsc infopacket also contains the colorimetry (specifically the container color gamut) information for the stream on modern DP. PSR is typically only supported on mobile phone eDP displays, thus this was not getting sent for typical desktop monitors or TV screens. This functionality is needed for proper HDR10 functionality on DP as it wants BT2020 RGB/YCbCr for the container color space. Cc: stable@vger.kernel.org Cc: Harry Wentland Cc: Xaver Hugl Cc: Melissa Wen Fixes: 15f9dfd545a1 ("drm/amd/display: Register Colorspace property for DP and HDMI") Tested-by: Simon Berz Tested-by: Xaver Hugl Signed-off-by: Joshua Ashton Signed-off-by: Hamza Mahfooz Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 8 +++++--- .../gpu/drm/amd/display/modules/info_packet/info_packet.c | 13 ++++++++----- 2 files changed, 13 insertions(+), 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 19fabaec9408..4e82ee4d74ac 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -6170,8 +6170,9 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A) mod_build_hf_vsif_infopacket(stream, &stream->vsp_infopacket); - - if (stream->link->psr_settings.psr_feature_enabled || stream->link->replay_settings.replay_feature_enabled) { + else if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT || + stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST || + stream->signal == SIGNAL_TYPE_EDP) { // // should decide stream support vsc sdp colorimetry capability // before building vsc info packet @@ -6187,8 +6188,9 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, if (stream->out_transfer_func->tf == TRANSFER_FUNCTION_GAMMA22) tf = TRANSFER_FUNC_GAMMA_22; mod_build_vsc_infopacket(stream, &stream->vsc_infopacket, stream->output_color_space, tf); - aconnector->psr_skip_count = AMDGPU_DM_PSR_ENTRY_DELAY; + if (stream->link->psr_settings.psr_feature_enabled) + aconnector->psr_skip_count = AMDGPU_DM_PSR_ENTRY_DELAY; } finish: dc_sink_release(sink); diff --git a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c index 84f9b412a4f1..738ee763f24a 100644 --- a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c +++ b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c @@ -147,12 +147,15 @@ void mod_build_vsc_infopacket(const struct dc_stream_state *stream, } /* VSC packet set to 4 for PSR-SU, or 2 for PSR1 */ - if (stream->link->psr_settings.psr_version == DC_PSR_VERSION_SU_1) - vsc_packet_revision = vsc_packet_rev4; - else if (stream->link->replay_settings.config.replay_supported) + if (stream->link->psr_settings.psr_feature_enabled) { + if (stream->link->psr_settings.psr_version == DC_PSR_VERSION_SU_1) + vsc_packet_revision = vsc_packet_rev4; + else if (stream->link->psr_settings.psr_version == DC_PSR_VERSION_1) + vsc_packet_revision = vsc_packet_rev2; + } + + if (stream->link->replay_settings.config.replay_supported) vsc_packet_revision = vsc_packet_rev4; - else if (stream->link->psr_settings.psr_version == DC_PSR_VERSION_1) - vsc_packet_revision = vsc_packet_rev2; /* Update to revision 5 for extended colorimetry support */ if (stream->use_vsc_sdp_for_colorimetry) -- cgit v1.2.3 From 21ff3cc851565c01cbc25c64e506fe99c26b51aa Mon Sep 17 00:00:00 2001 From: Asad Kamal Date: Fri, 22 Dec 2023 18:24:20 +0800 Subject: drm/amd/pm: Add mem_busy_percent for GCv9.4.3 apu Expose sysfs entry mem_busy_percent for GC version 9.4.3 APU system Signed-off-by: Asad Kamal Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/amdgpu_pm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 49028dde0f87..20c53eefd680 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -2128,7 +2128,9 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ if (amdgpu_dpm_is_overdrive_supported(adev)) *states = ATTR_STATE_SUPPORTED; } else if (DEVICE_ATTR_IS(mem_busy_percent)) { - if (adev->flags & AMD_IS_APU || gc_ver == IP_VERSION(9, 0, 1)) + if ((adev->flags & AMD_IS_APU && + gc_ver != IP_VERSION(9, 4, 3)) || + gc_ver == IP_VERSION(9, 0, 1)) *states = ATTR_STATE_UNSUPPORTED; } else if (DEVICE_ATTR_IS(pcie_bw)) { /* PCIe Perf counters won't work on APU nodes */ -- cgit v1.2.3 From 43d7e8b0127cc5f77916067431f31b424156cf74 Mon Sep 17 00:00:00 2001 From: Asad Kamal Date: Wed, 20 Dec 2023 17:21:48 +0800 Subject: drm/amd/pm: Add gpu_metrics_v1_5 Add new gpu_metrics_v1_5 to acquire vcn/jpeg activity & pcie nak error counters Signed-off-by: Asad Kamal Reviewed-by: Lijo Lazar Reviewed-by: Le Ma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/include/kgd_pp_interface.h | 80 ++++++++++++++++++++++++++ drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c | 3 + 2 files changed, 83 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 1c5049e894e3..c2ccf3724e37 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -318,6 +318,7 @@ enum pp_xgmi_plpd_mode { #define MAX_GFX_CLKS 8 #define MAX_CLKS 4 #define NUM_VCN 4 +#define NUM_JPEG_ENG 32 struct seq_file; enum amd_pp_clock_type; @@ -774,6 +775,85 @@ struct gpu_metrics_v1_4 { uint16_t padding; }; +struct gpu_metrics_v1_5 { + struct metrics_table_header common_header; + + /* Temperature (Celsius) */ + uint16_t temperature_hotspot; + uint16_t temperature_mem; + uint16_t temperature_vrsoc; + + /* Power (Watts) */ + uint16_t curr_socket_power; + + /* Utilization (%) */ + uint16_t average_gfx_activity; + uint16_t average_umc_activity; // memory controller + uint16_t vcn_activity[NUM_VCN]; + uint16_t jpeg_activity[NUM_JPEG_ENG]; + + /* Energy (15.259uJ (2^-16) units) */ + uint64_t energy_accumulator; + + /* Driver attached timestamp (in ns) */ + uint64_t system_clock_counter; + + /* Throttle status */ + uint32_t throttle_status; + + /* Clock Lock Status. Each bit corresponds to clock instance */ + uint32_t gfxclk_lock_status; + + /* Link width (number of lanes) and speed (in 0.1 GT/s) */ + uint16_t pcie_link_width; + uint16_t pcie_link_speed; + + /* XGMI bus width and bitrate (in Gbps) */ + uint16_t xgmi_link_width; + uint16_t xgmi_link_speed; + + /* Utilization Accumulated (%) */ + uint32_t gfx_activity_acc; + uint32_t mem_activity_acc; + + /*PCIE accumulated bandwidth (GB/sec) */ + uint64_t pcie_bandwidth_acc; + + /*PCIE instantaneous bandwidth (GB/sec) */ + uint64_t pcie_bandwidth_inst; + + /* PCIE L0 to recovery state transition accumulated count */ + uint64_t pcie_l0_to_recov_count_acc; + + /* PCIE replay accumulated count */ + uint64_t pcie_replay_count_acc; + + /* PCIE replay rollover accumulated count */ + uint64_t pcie_replay_rover_count_acc; + + /* PCIE NAK sent accumulated count */ + uint32_t pcie_nak_sent_count_acc; + + /* PCIE NAK received accumulated count */ + uint32_t pcie_nak_rcvd_count_acc; + + /* XGMI accumulated data transfer size(KiloBytes) */ + uint64_t xgmi_read_data_acc[NUM_XGMI_LINKS]; + uint64_t xgmi_write_data_acc[NUM_XGMI_LINKS]; + + /* PMFW attached timestamp (10ns resolution) */ + uint64_t firmware_timestamp; + + /* Current clocks (Mhz) */ + uint16_t current_gfxclk[MAX_GFX_CLKS]; + uint16_t current_socclk[MAX_CLKS]; + uint16_t current_vclk0[MAX_CLKS]; + uint16_t current_dclk0[MAX_CLKS]; + uint16_t current_uclk; + + uint16_t padding; +}; + /* * gpu_metrics_v2_0 is not recommended as it's not naturally aligned. * Use gpu_metrics_v2_1 or later instead. diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c index 001a5cf09657..00cd615bbcdc 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c @@ -989,6 +989,9 @@ void smu_cmn_init_soft_gpu_metrics(void *table, uint8_t frev, uint8_t crev) case METRICS_VERSION(1, 4): structure_size = sizeof(struct gpu_metrics_v1_4); break; + case METRICS_VERSION(1, 5): + structure_size = sizeof(struct gpu_metrics_v1_5); + break; case METRICS_VERSION(2, 0): structure_size = sizeof(struct gpu_metrics_v2_0); break; -- cgit v1.2.3 From f71cee97a644a6dfa3bc0eb69e29a53eab49b98d Mon Sep 17 00:00:00 2001 From: Asad Kamal Date: Wed, 20 Dec 2023 17:32:48 +0800 Subject: drm/amd/pm: Use gpu_metrics_v1_5 for SMUv13.0.6 Use gpu_metrics_v1_5 for SMUv13.0.6 to fill gpu metric info Signed-off-by: Asad Kamal Reviewed-by: Lijo Lazar Reviewed-by: Le Ma Signed-off-by: Alex Deucher --- .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 29 ++++++++++++++++++---- 1 file changed, 24 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 17bc5ffbfd97..b64e07b75937 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -344,7 +344,7 @@ static int smu_v13_0_6_tables_init(struct smu_context *smu) return -ENOMEM; smu_table->metrics_time = 0; - smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_4); + smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_5); smu_table->gpu_metrics_table = kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL); if (!smu_table->gpu_metrics_table) { @@ -2029,10 +2029,10 @@ static int smu_v13_0_6_get_current_pcie_link_speed(struct smu_context *smu) static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table) { struct smu_table_context *smu_table = &smu->smu_table; - struct gpu_metrics_v1_4 *gpu_metrics = - (struct gpu_metrics_v1_4 *)smu_table->gpu_metrics_table; + struct gpu_metrics_v1_5 *gpu_metrics = + (struct gpu_metrics_v1_5 *)smu_table->gpu_metrics_table; struct amdgpu_device *adev = smu->adev; - int ret = 0, xcc_id, inst, i; + int ret = 0, xcc_id, inst, i, j; MetricsTableX_t *metrics_x; MetricsTableA_t *metrics_a; u16 link_width_level; @@ -2046,7 +2046,7 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table metrics_a = (MetricsTableA_t *)metrics_x; - smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 4); + smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 5); gpu_metrics->temperature_hotspot = SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketTemperature)); @@ -2113,6 +2113,10 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table metrics_x->PCIenReplayAAcc; gpu_metrics->pcie_replay_rover_count_acc = metrics_x->PCIenReplayARolloverCountAcc; + gpu_metrics->pcie_nak_sent_count_acc = + metrics_x->PCIeNAKSentCountAcc; + gpu_metrics->pcie_nak_rcvd_count_acc = + metrics_x->PCIeNAKReceivedCountAcc; } gpu_metrics->system_clock_counter = ktime_get_boottime_ns(); @@ -2129,6 +2133,21 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table SMUQ10_ROUND(GET_METRIC_FIELD(XgmiWriteDataSizeAcc)[i]); } + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + inst = GET_INST(JPEG, i); + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + gpu_metrics->jpeg_activity[(i * adev->jpeg.num_jpeg_rings) + j] = + SMUQ10_ROUND(GET_METRIC_FIELD(JpegBusy) + [(inst * adev->jpeg.num_jpeg_rings) + j]); + } + } + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + inst = GET_INST(VCN, i); + gpu_metrics->vcn_activity[i] = + SMUQ10_ROUND(GET_METRIC_FIELD(VcnBusy)[inst]); + } + gpu_metrics->xgmi_link_width = SMUQ10_ROUND(GET_METRIC_FIELD(XgmiWidth)); gpu_metrics->xgmi_link_speed = SMUQ10_ROUND(GET_METRIC_FIELD(XgmiBitrate)); -- cgit v1.2.3 From 38709af26c33e398c3292e96837ccfde41fd9e6b Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea Date: Thu, 4 Jan 2024 16:39:49 +0200 Subject: drm/rockchip: vop2: Drop superfluous include The rockchip_drm_fb.h header contains just a single function which is not directly used by the VOP2 driver. Drop the unnecessary include. Signed-off-by: Cristian Ciocaltea Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/20240104143951.85219-1-cristian.ciocaltea@collabora.com --- drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c index 574103fc79f9..a3a03e9cfe13 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c @@ -35,7 +35,6 @@ #include "rockchip_drm_drv.h" #include "rockchip_drm_gem.h" -#include "rockchip_drm_fb.h" #include "rockchip_drm_vop2.h" #include "rockchip_rgb.h" -- cgit v1.2.3 From 24ab185d9804e48041b370fa9354d6d941ce0a32 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 22 Dec 2023 14:31:50 +1000 Subject: nouveau/gsp: add three notifier callbacks that we see in normal operation (v2) Add NULL callbacks for some things GSP calls that we don't handle, but know about so we avoid the logging. v2: Timur suggested allowing null fn. Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20231222043308.3090089-2-airlied@gmail.com --- drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c index 44fb86841c05..7f831f41b598 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c @@ -298,7 +298,8 @@ retry: struct nvkm_gsp_msgq_ntfy *ntfy = &gsp->msgq.ntfy[i]; if (ntfy->fn == msg->function) { - ntfy->func(ntfy->priv, ntfy->fn, msg->data, msg->length - sizeof(*msg)); + if (ntfy->func) + ntfy->func(ntfy->priv, ntfy->fn, msg->data, msg->length - sizeof(*msg)); break; } } @@ -2186,7 +2187,9 @@ r535_gsp_oneinit(struct nvkm_gsp *gsp) r535_gsp_msg_ntfy_add(gsp, NV_VGPU_MSG_EVENT_MMU_FAULT_QUEUED, r535_gsp_msg_mmu_fault_queued, gsp); r535_gsp_msg_ntfy_add(gsp, NV_VGPU_MSG_EVENT_OS_ERROR_LOG, r535_gsp_msg_os_error_log, gsp); - + r535_gsp_msg_ntfy_add(gsp, NV_VGPU_MSG_EVENT_PERF_BRIDGELESS_INFO_UPDATE, NULL, NULL); + r535_gsp_msg_ntfy_add(gsp, NV_VGPU_MSG_EVENT_UCODE_LIBOS_PRINT, NULL, NULL); + r535_gsp_msg_ntfy_add(gsp, NV_VGPU_MSG_EVENT_GSP_SEND_USER_SHARED_DATA, NULL, NULL); ret = r535_gsp_rm_boot_ctor(gsp); if (ret) return ret; -- cgit v1.2.3 From 34ce62a51e65a8b1fce0eb64dea2719c9429b306 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 22 Dec 2023 14:31:51 +1000 Subject: nouveau/gsp: drop some acpi related debug These were leftover debug, if we need to bring them back do so for debugging later. Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20231222043308.3090089-3-airlied@gmail.com --- drivers/gpu/drm/nouveau/nvkm/engine/disp/r535.c | 7 ------- drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c | 9 --------- 2 files changed, 16 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/r535.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/r535.c index 298035070b3a..1c8c4cca0957 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/r535.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/r535.c @@ -1465,8 +1465,6 @@ r535_disp_oneinit(struct nvkm_disp *disp) bool nvhg = acpi_check_dsm(handle, &NVHG_DSM_GUID, NVHG_DSM_REV, 1ULL << 0x00000014); - printk(KERN_ERR "bl: nbci:%d nvhg:%d\n", nbci, nvhg); - if (nbci || nvhg) { union acpi_object argv4 = { .buffer.type = ACPI_TYPE_BUFFER, @@ -1479,9 +1477,6 @@ r535_disp_oneinit(struct nvkm_disp *disp) if (!obj) { acpi_handle_info(handle, "failed to evaluate _DSM\n"); } else { - printk(KERN_ERR "bl: obj type %d\n", obj->type); - printk(KERN_ERR "bl: obj len %d\n", obj->package.count); - for (int i = 0; i < obj->package.count; i++) { union acpi_object *elt = &obj->package.elements[i]; u32 size; @@ -1491,12 +1486,10 @@ r535_disp_oneinit(struct nvkm_disp *disp) else size = 4; - printk(KERN_ERR "elt %03d: type %d size %d\n", i, elt->type, size); memcpy(&ctrl->backLightData[ctrl->backLightDataSize], &elt->integer.value, size); ctrl->backLightDataSize += size; } - printk(KERN_ERR "bl: data size %d\n", ctrl->backLightDataSize); ctrl->status = 0; ACPI_FREE(obj); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c index 7f831f41b598..365dda6c002a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c @@ -1100,16 +1100,12 @@ r535_gsp_acpi_caps(acpi_handle handle, CAPS_METHOD_DATA *caps) if (!obj) return; - printk(KERN_ERR "nvop: obj type %d\n", obj->type); - printk(KERN_ERR "nvop: obj len %d\n", obj->buffer.length); - if (WARN_ON(obj->type != ACPI_TYPE_BUFFER) || WARN_ON(obj->buffer.length != 4)) return; caps->status = 0; caps->optimusCaps = *(u32 *)obj->buffer.pointer; - printk(KERN_ERR "nvop: caps %08x\n", caps->optimusCaps); ACPI_FREE(obj); @@ -1136,9 +1132,6 @@ r535_gsp_acpi_jt(acpi_handle handle, JT_METHOD_DATA *jt) if (!obj) return; - printk(KERN_ERR "jt: obj type %d\n", obj->type); - printk(KERN_ERR "jt: obj len %d\n", obj->buffer.length); - if (WARN_ON(obj->type != ACPI_TYPE_BUFFER) || WARN_ON(obj->buffer.length != 4)) return; @@ -1147,7 +1140,6 @@ r535_gsp_acpi_jt(acpi_handle handle, JT_METHOD_DATA *jt) jt->jtCaps = *(u32 *)obj->buffer.pointer; jt->jtRevId = (jt->jtCaps & 0xfff00000) >> 20; jt->bSBIOSCaps = 0; - printk(KERN_ERR "jt: caps %08x rev:%04x\n", jt->jtCaps, jt->jtRevId); ACPI_FREE(obj); @@ -1233,7 +1225,6 @@ r535_gsp_acpi_dod(acpi_handle handle, DOD_METHOD_DATA *dod) dod->acpiIdListLen += sizeof(dod->acpiIdList[0]); } - printk(KERN_ERR "_DOD: ok! len:%d\n", dod->acpiIdListLen); dod->status = 0; } #endif -- cgit v1.2.3 From 7854ea0e408d7f2e8faaada1773f3ddf9cb538f5 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 22 Dec 2023 14:31:52 +1000 Subject: nouveau: fix disp disabling with GSP This func ptr here is normally static allocation, but gsp r535 uses a dynamic pointer, so we need to handle that better. This fixes a crash with GSP when you use config=disp=0 to avoid disp problems. Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20231222043308.3090089-4-airlied@gmail.com --- drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c index 457ec5db794d..b24eb1e560bc 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c @@ -209,7 +209,7 @@ nvkm_disp_dtor(struct nvkm_engine *engine) nvkm_head_del(&head); } - if (disp->func->dtor) + if (disp->func && disp->func->dtor) disp->func->dtor(disp); return data; @@ -243,8 +243,10 @@ nvkm_disp_new_(const struct nvkm_disp_func *func, struct nvkm_device *device, spin_lock_init(&disp->client.lock); ret = nvkm_engine_ctor(&nvkm_disp, device, type, inst, true, &disp->engine); - if (ret) + if (ret) { + disp->func = NULL; return ret; + } if (func->super) { disp->super.wq = create_singlethread_workqueue("nvkm-disp"); -- cgit v1.2.3 From a9b9b42b54b2c9251144b3b9d4ebcd83e5f8c230 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 22 Dec 2023 14:31:53 +1000 Subject: nouveau/gsp: free acpi object after use This fixes a memory leak for the acpi dod object. Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20231222043308.3090089-5-airlied@gmail.com --- drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c index 365dda6c002a..1a6d7c89660d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c @@ -1226,6 +1226,7 @@ r535_gsp_acpi_dod(acpi_handle handle, DOD_METHOD_DATA *dod) } dod->status = 0; + kfree(output.pointer); } #endif -- cgit v1.2.3 From 3108cc03236b54c56e34de01227cdacf764c50a8 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 22 Dec 2023 14:31:54 +1000 Subject: nouveau/gsp: free userd allocation. This was being leaked. Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20231222043308.3090089-6-airlied@gmail.com --- drivers/gpu/drm/nouveau/nvkm/engine/fifo/r535.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/r535.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/r535.c index d088e636edc3..b903785056b5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/r535.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/r535.c @@ -242,6 +242,7 @@ r535_chan_id_put(struct nvkm_chan *chan) nvkm_memory_unref(&userd->mem); nvkm_chid_put(runl->chid, userd->chid, &chan->cgrp->lock); list_del(&userd->head); + kfree(userd); } break; -- cgit v1.2.3 From cf22fc2846cee83061082fa373574d6cdeb4c006 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Fri, 22 Dec 2023 14:31:55 +1000 Subject: drm/nouveau/gsp: Fix ACPI MXDM/MXDS method invocations Currently we get an error from ACPI because both of these arguments expect a single argument, and we don't provide one. I'm not totally clear on what that argument does, but we're able to find the missing value from _acpiCacheMethodData() in src/kernel/platform/acpi_common.c in nvidia's driver. So, let's add that - which doesn't get eDP displays to power on quite yet, but gets rid of the argument warning at least. Signed-off-by: Lyude Paul Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20231222043308.3090089-7-airlied@gmail.com --- drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c index 1a6d7c89660d..774ca47b019f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c @@ -1150,6 +1150,8 @@ static void r535_gsp_acpi_mux_id(acpi_handle handle, u32 id, MUX_METHOD_DATA_ELEMENT *mode, MUX_METHOD_DATA_ELEMENT *part) { + union acpi_object mux_arg = { ACPI_TYPE_INTEGER }; + struct acpi_object_list input = { 1, &mux_arg }; acpi_handle iter = NULL, handle_mux = NULL; acpi_status status; unsigned long long value; @@ -1172,14 +1174,18 @@ r535_gsp_acpi_mux_id(acpi_handle handle, u32 id, MUX_METHOD_DATA_ELEMENT *mode, if (!handle_mux) return; - status = acpi_evaluate_integer(handle_mux, "MXDM", NULL, &value); + /* I -think- 0 means "acquire" according to nvidia's driver source */ + input.pointer->integer.type = ACPI_TYPE_INTEGER; + input.pointer->integer.value = 0; + + status = acpi_evaluate_integer(handle_mux, "MXDM", &input, &value); if (ACPI_SUCCESS(status)) { mode->acpiId = id; mode->mode = value; mode->status = 0; } - status = acpi_evaluate_integer(handle_mux, "MXDS", NULL, &value); + status = acpi_evaluate_integer(handle_mux, "MXDS", &input, &value); if (ACPI_SUCCESS(status)) { part->acpiId = id; part->mode = value; -- cgit v1.2.3 From 59f6a3d8db2573aba994b95074d8f9911c969bcb Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 22 Dec 2023 14:31:56 +1000 Subject: nouveau/gsp: convert gsp errors to generic errors This should let the upper layers retry as needed on EAGAIN. There may be other values we will care about in the future, but this covers our present needs. Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20231222043308.3090089-8-airlied@gmail.com --- drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c index 774ca47b019f..263dbe45bb36 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c @@ -70,6 +70,20 @@ struct r535_gsp_msg { #define GSP_MSG_HDR_SIZE offsetof(struct r535_gsp_msg, data) +static int +r535_rpc_status_to_errno(uint32_t rpc_status) +{ + switch (rpc_status) { + case 0x55: /* NV_ERR_NOT_READY */ + case 0x66: /* NV_ERR_TIMEOUT_RETRY */ + return -EAGAIN; + case 0x51: /* NV_ERR_NO_MEMORY */ + return -ENOMEM; + default: + return -EINVAL; + } +} + static void * r535_gsp_msgq_wait(struct nvkm_gsp *gsp, u32 repc, u32 *prepc, int *ptime) { @@ -584,8 +598,9 @@ r535_gsp_rpc_rm_alloc_push(struct nvkm_gsp_object *object, void *argv, u32 repc) return rpc; if (rpc->status) { - nvkm_error(&gsp->subdev, "RM_ALLOC: 0x%x\n", rpc->status); - ret = ERR_PTR(-EINVAL); + ret = ERR_PTR(r535_rpc_status_to_errno(rpc->status)); + if (ret != -EAGAIN) + nvkm_error(&gsp->subdev, "RM_ALLOC: 0x%x\n", rpc->status); } else { ret = repc ? rpc->params : NULL; } @@ -639,9 +654,10 @@ r535_gsp_rpc_rm_ctrl_push(struct nvkm_gsp_object *object, void *argv, u32 repc) return rpc; if (rpc->status) { - nvkm_error(&gsp->subdev, "cli:0x%08x obj:0x%08x ctrl cmd:0x%08x failed: 0x%08x\n", - object->client->object.handle, object->handle, rpc->cmd, rpc->status); - ret = ERR_PTR(-EINVAL); + ret = ERR_PTR(r535_rpc_status_to_errno(rpc->status)); + if (ret != -EAGAIN) + nvkm_error(&gsp->subdev, "cli:0x%08x obj:0x%08x ctrl cmd:0x%08x failed: 0x%08x\n", + object->client->object.handle, object->handle, rpc->cmd, rpc->status); } else { ret = repc ? rpc->params : NULL; } -- cgit v1.2.3 From 4ae3a20102b20d1927737e1a78615386d58e194d Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 22 Dec 2023 14:31:57 +1000 Subject: nouveau/gsp: don't free ctrl messages on errors It looks like for some messages the upper layers need to get access to the results of the message so we can interpret it. Rework the ctrl push interface to not free things and cleanup properly whereever it errors out. Requested-by: Lyude Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20231222043308.3090089-9-airlied@gmail.com --- drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h | 17 ++-- drivers/gpu/drm/nouveau/nvkm/engine/disp/r535.c | 108 ++++++++++++++-------- drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c | 36 +++++--- 3 files changed, 100 insertions(+), 61 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h index 2fa0445d8928..d1437c08645f 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h @@ -187,7 +187,7 @@ struct nvkm_gsp { void (*rpc_done)(struct nvkm_gsp *gsp, void *repv); void *(*rm_ctrl_get)(struct nvkm_gsp_object *, u32 cmd, u32 argc); - void *(*rm_ctrl_push)(struct nvkm_gsp_object *, void *argv, u32 repc); + int (*rm_ctrl_push)(struct nvkm_gsp_object *, void **argv, u32 repc); void (*rm_ctrl_done)(struct nvkm_gsp_object *, void *repv); void *(*rm_alloc_get)(struct nvkm_gsp_object *, u32 oclass, u32 argc); @@ -265,7 +265,7 @@ nvkm_gsp_rm_ctrl_get(struct nvkm_gsp_object *object, u32 cmd, u32 argc) return object->client->gsp->rm->rm_ctrl_get(object, cmd, argc); } -static inline void * +static inline int nvkm_gsp_rm_ctrl_push(struct nvkm_gsp_object *object, void *argv, u32 repc) { return object->client->gsp->rm->rm_ctrl_push(object, argv, repc); @@ -275,21 +275,24 @@ static inline void * nvkm_gsp_rm_ctrl_rd(struct nvkm_gsp_object *object, u32 cmd, u32 repc) { void *argv = nvkm_gsp_rm_ctrl_get(object, cmd, repc); + int ret; if (IS_ERR(argv)) return argv; - return nvkm_gsp_rm_ctrl_push(object, argv, repc); + ret = nvkm_gsp_rm_ctrl_push(object, &argv, repc); + if (ret) + return ERR_PTR(ret); + return argv; } static inline int nvkm_gsp_rm_ctrl_wr(struct nvkm_gsp_object *object, void *argv) { - void *repv = nvkm_gsp_rm_ctrl_push(object, argv, 0); - - if (IS_ERR(repv)) - return PTR_ERR(repv); + int ret = nvkm_gsp_rm_ctrl_push(object, &argv, 0); + if (ret) + return ret; return 0; } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/r535.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/r535.c index 1c8c4cca0957..1b4f988df7ed 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/r535.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/r535.c @@ -282,7 +282,7 @@ r535_sor_bl_get(struct nvkm_ior *sor) { struct nvkm_disp *disp = sor->disp; NV0073_CTRL_SPECIFIC_BACKLIGHT_BRIGHTNESS_PARAMS *ctrl; - int lvl; + int ret, lvl; ctrl = nvkm_gsp_rm_ctrl_get(&disp->rm.objcom, NV0073_CTRL_CMD_SPECIFIC_GET_BACKLIGHT_BRIGHTNESS, @@ -292,9 +292,11 @@ r535_sor_bl_get(struct nvkm_ior *sor) ctrl->displayId = BIT(sor->asy.outp->index); - ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl)); - if (IS_ERR(ctrl)) - return PTR_ERR(ctrl); + ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl)); + if (ret) { + nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl); + return ret; + } lvl = ctrl->brightness; nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl); @@ -649,9 +651,11 @@ r535_conn_new(struct nvkm_disp *disp, u32 id) ctrl->subDeviceInstance = 0; ctrl->displayId = BIT(id); - ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl)); - if (IS_ERR(ctrl)) - return (void *)ctrl; + ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl)); + if (ret) { + nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl); + return ERR_PTR(ret); + } list_for_each_entry(conn, &disp->conns, head) { if (conn->index == ctrl->data[0].index) { @@ -686,7 +690,7 @@ r535_outp_acquire(struct nvkm_outp *outp, bool hda) struct nvkm_disp *disp = outp->disp; struct nvkm_ior *ior; NV0073_CTRL_DFP_ASSIGN_SOR_PARAMS *ctrl; - int or; + int ret, or; ctrl = nvkm_gsp_rm_ctrl_get(&disp->rm.objcom, NV0073_CTRL_CMD_DFP_ASSIGN_SOR, sizeof(*ctrl)); @@ -699,9 +703,11 @@ r535_outp_acquire(struct nvkm_outp *outp, bool hda) if (hda) ctrl->flags |= NVDEF(NV0073_CTRL, DFP_ASSIGN_SOR_FLAGS, AUDIO, OPTIMAL); - ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl)); - if (IS_ERR(ctrl)) - return PTR_ERR(ctrl); + ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl)); + if (ret) { + nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl); + return ret; + } for (or = 0; or < ARRAY_SIZE(ctrl->sorAssignListWithTag); or++) { if (ctrl->sorAssignListWithTag[or].displayMask & BIT(outp->index)) { @@ -727,6 +733,7 @@ static int r535_disp_head_displayid(struct nvkm_disp *disp, int head, u32 *displayid) { NV0073_CTRL_SYSTEM_GET_ACTIVE_PARAMS *ctrl; + int ret; ctrl = nvkm_gsp_rm_ctrl_get(&disp->rm.objcom, NV0073_CTRL_CMD_SYSTEM_GET_ACTIVE, sizeof(*ctrl)); @@ -736,9 +743,11 @@ r535_disp_head_displayid(struct nvkm_disp *disp, int head, u32 *displayid) ctrl->subDeviceInstance = 0; ctrl->head = head; - ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl)); - if (IS_ERR(ctrl)) - return PTR_ERR(ctrl); + ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl)); + if (ret) { + nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl); + return ret; + } *displayid = ctrl->displayId; nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl); @@ -772,9 +781,11 @@ r535_outp_inherit(struct nvkm_outp *outp) ctrl->subDeviceInstance = 0; ctrl->displayId = displayid; - ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl)); - if (IS_ERR(ctrl)) + ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl)); + if (ret) { + nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl); return NULL; + } id = ctrl->index; proto = ctrl->protocol; @@ -825,6 +836,7 @@ r535_outp_dfp_get_info(struct nvkm_outp *outp) { NV0073_CTRL_DFP_GET_INFO_PARAMS *ctrl; struct nvkm_disp *disp = outp->disp; + int ret; ctrl = nvkm_gsp_rm_ctrl_get(&disp->rm.objcom, NV0073_CTRL_CMD_DFP_GET_INFO, sizeof(*ctrl)); if (IS_ERR(ctrl)) @@ -832,9 +844,11 @@ r535_outp_dfp_get_info(struct nvkm_outp *outp) ctrl->displayId = BIT(outp->index); - ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl)); - if (IS_ERR(ctrl)) - return PTR_ERR(ctrl); + ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl)); + if (ret) { + nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl); + return ret; + } nvkm_debug(&disp->engine.subdev, "DFP %08x: flags:%08x flags2:%08x\n", ctrl->displayId, ctrl->flags, ctrl->flags2); @@ -858,9 +872,11 @@ r535_outp_detect(struct nvkm_outp *outp) ctrl->subDeviceInstance = 0; ctrl->displayMask = BIT(outp->index); - ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl)); - if (IS_ERR(ctrl)) - return PTR_ERR(ctrl); + ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl)); + if (ret) { + nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl); + return ret; + } if (ctrl->displayMask & BIT(outp->index)) { ret = r535_outp_dfp_get_info(outp); @@ -895,6 +911,7 @@ r535_dp_mst_id_get(struct nvkm_outp *outp, u32 *pid) { NV0073_CTRL_CMD_DP_TOPOLOGY_ALLOCATE_DISPLAYID_PARAMS *ctrl; struct nvkm_disp *disp = outp->disp; + int ret; ctrl = nvkm_gsp_rm_ctrl_get(&disp->rm.objcom, NV0073_CTRL_CMD_DP_TOPOLOGY_ALLOCATE_DISPLAYID, @@ -904,9 +921,11 @@ r535_dp_mst_id_get(struct nvkm_outp *outp, u32 *pid) ctrl->subDeviceInstance = 0; ctrl->displayId = BIT(outp->index); - ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl)); - if (IS_ERR(ctrl)) - return PTR_ERR(ctrl); + ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl)); + if (ret) { + nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl); + return ret; + } *pid = ctrl->displayIdAssigned; nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl); @@ -964,9 +983,11 @@ r535_dp_train_target(struct nvkm_outp *outp, u8 target, bool mst, u8 link_nr, u8 !(outp->dp.dpcd[DPCD_RC03] & DPCD_RC03_TPS4_SUPPORTED)) ctrl->cmd |= NVDEF(NV0073_CTRL, DP_CMD, POST_LT_ADJ_REQ_GRANTED, YES); - ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl)); - if (IS_ERR(ctrl)) - return PTR_ERR(ctrl); + ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl)); + if (ret) { + nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl); + return ret; + } ret = ctrl->err ? -EIO : 0; nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl); @@ -1036,9 +1057,11 @@ r535_dp_aux_xfer(struct nvkm_outp *outp, u8 type, u32 addr, u8 *data, u8 *psize) ctrl->size = !ctrl->bAddrOnly ? (size - 1) : 0; memcpy(ctrl->data, data, size); - ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl)); - if (IS_ERR(ctrl)) + ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl)); + if (ret) { + nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl); return PTR_ERR(ctrl); + } memcpy(data, ctrl->data, size); *psize = ctrl->size; @@ -1111,10 +1134,13 @@ r535_tmds_edid_get(struct nvkm_outp *outp, u8 *data, u16 *psize) ctrl->subDeviceInstance = 0; ctrl->displayId = BIT(outp->index); - ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl)); - if (IS_ERR(ctrl)) - return PTR_ERR(ctrl); + ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl)); + if (ret) { + nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl); + return ret; + } + ret = -E2BIG; if (ctrl->bufferSize <= *psize) { memcpy(data, ctrl->edidBuffer, ctrl->bufferSize); *psize = ctrl->bufferSize; @@ -1153,9 +1179,11 @@ r535_outp_new(struct nvkm_disp *disp, u32 id) ctrl->subDeviceInstance = 0; ctrl->displayId = BIT(id); - ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl)); - if (IS_ERR(ctrl)) - return PTR_ERR(ctrl); + ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl)); + if (ret) { + nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl); + return ret; + } switch (ctrl->type) { case NV0073_CTRL_SPECIFIC_OR_TYPE_NONE: @@ -1229,9 +1257,11 @@ r535_outp_new(struct nvkm_disp *disp, u32 id) ctrl->sorIndex = ~0; - ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl)); - if (IS_ERR(ctrl)) - return PTR_ERR(ctrl); + ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl)); + if (ret) { + nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl); + return ret; + } switch (NVVAL_GET(ctrl->maxLinkRate, NV0073_CTRL_CMD, DP_GET_CAPS, MAX_LINK_RATE)) { case NV0073_CTRL_CMD_DP_GET_CAPS_MAX_LINK_RATE_1_62: diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c index 263dbe45bb36..642ce0dd0faf 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c @@ -599,13 +599,13 @@ r535_gsp_rpc_rm_alloc_push(struct nvkm_gsp_object *object, void *argv, u32 repc) if (rpc->status) { ret = ERR_PTR(r535_rpc_status_to_errno(rpc->status)); - if (ret != -EAGAIN) + if (PTR_ERR(ret) != -EAGAIN) nvkm_error(&gsp->subdev, "RM_ALLOC: 0x%x\n", rpc->status); } else { ret = repc ? rpc->params : NULL; } - if (IS_ERR_OR_NULL(ret)) + if (ret) nvkm_gsp_rpc_done(gsp, rpc); return ret; @@ -639,30 +639,34 @@ r535_gsp_rpc_rm_ctrl_done(struct nvkm_gsp_object *object, void *repv) { rpc_gsp_rm_control_v03_00 *rpc = container_of(repv, typeof(*rpc), params); + if (!repv) + return; nvkm_gsp_rpc_done(object->client->gsp, rpc); } -static void * -r535_gsp_rpc_rm_ctrl_push(struct nvkm_gsp_object *object, void *argv, u32 repc) +static int +r535_gsp_rpc_rm_ctrl_push(struct nvkm_gsp_object *object, void **argv, u32 repc) { - rpc_gsp_rm_control_v03_00 *rpc = container_of(argv, typeof(*rpc), params); + rpc_gsp_rm_control_v03_00 *rpc = container_of((*argv), typeof(*rpc), params); struct nvkm_gsp *gsp = object->client->gsp; - void *ret; + int ret = 0; rpc = nvkm_gsp_rpc_push(gsp, rpc, true, repc); - if (IS_ERR_OR_NULL(rpc)) - return rpc; + if (IS_ERR_OR_NULL(rpc)) { + *argv = NULL; + return PTR_ERR(rpc); + } if (rpc->status) { - ret = ERR_PTR(r535_rpc_status_to_errno(rpc->status)); + ret = r535_rpc_status_to_errno(rpc->status); if (ret != -EAGAIN) nvkm_error(&gsp->subdev, "cli:0x%08x obj:0x%08x ctrl cmd:0x%08x failed: 0x%08x\n", object->client->object.handle, object->handle, rpc->cmd, rpc->status); - } else { - ret = repc ? rpc->params : NULL; } - if (IS_ERR_OR_NULL(ret)) + if (repc) + *argv = rpc->params; + else nvkm_gsp_rpc_done(gsp, rpc); return ret; @@ -860,9 +864,11 @@ r535_gsp_intr_get_table(struct nvkm_gsp *gsp) if (IS_ERR(ctrl)) return PTR_ERR(ctrl); - ctrl = nvkm_gsp_rm_ctrl_push(&gsp->internal.device.subdevice, ctrl, sizeof(*ctrl)); - if (WARN_ON(IS_ERR(ctrl))) - return PTR_ERR(ctrl); + ret = nvkm_gsp_rm_ctrl_push(&gsp->internal.device.subdevice, &ctrl, sizeof(*ctrl)); + if (WARN_ON(ret)) { + nvkm_gsp_rm_ctrl_done(&gsp->internal.device.subdevice, ctrl); + return ret; + } for (unsigned i = 0; i < ctrl->tableLen; i++) { enum nvkm_subdev_type type; -- cgit v1.2.3 From 9c9dd22ba5d747cceee3d1b1757448b5c48ac705 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 22 Dec 2023 14:31:58 +1000 Subject: nouveau/gsp: always free the alloc messages on r535 Fixes a memory leak seen with kmemleak. Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20231222043308.3090089-10-airlied@gmail.com --- drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c index 642ce0dd0faf..9ee58e2a0eb2 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c @@ -605,8 +605,7 @@ r535_gsp_rpc_rm_alloc_push(struct nvkm_gsp_object *object, void *argv, u32 repc) ret = repc ? rpc->params : NULL; } - if (ret) - nvkm_gsp_rpc_done(gsp, rpc); + nvkm_gsp_rpc_done(gsp, rpc); return ret; } -- cgit v1.2.3 From eacabb5462717a52fccbbbba458365a4f5e61f35 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 22 Dec 2023 14:31:59 +1000 Subject: nouveau: push event block/allowing out of the fence context There is a deadlock between the irq and fctx locks, the irq handling takes irq then fctx lock the fence signalling takes fctx then irq lock This splits the fence signalling path so the code that hits the irq lock is done in a separate work queue. This seems to fix crashes/hangs when using nouveau gsp with i915 primary GPU. Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20231222043308.3090089-11-airlied@gmail.com --- drivers/gpu/drm/nouveau/nouveau_fence.c | 28 +++++++++++++++++++++++----- drivers/gpu/drm/nouveau/nouveau_fence.h | 5 ++++- 2 files changed, 27 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index ca762ea55413..5057d976fa57 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -62,7 +62,7 @@ nouveau_fence_signal(struct nouveau_fence *fence) if (test_bit(DMA_FENCE_FLAG_USER_BITS, &fence->base.flags)) { struct nouveau_fence_chan *fctx = nouveau_fctx(fence); - if (!--fctx->notify_ref) + if (atomic_dec_and_test(&fctx->notify_ref)) drop = 1; } @@ -103,6 +103,7 @@ nouveau_fence_context_kill(struct nouveau_fence_chan *fctx, int error) void nouveau_fence_context_del(struct nouveau_fence_chan *fctx) { + cancel_work_sync(&fctx->allow_block_work); nouveau_fence_context_kill(fctx, 0); nvif_event_dtor(&fctx->event); fctx->dead = 1; @@ -167,6 +168,18 @@ nouveau_fence_wait_uevent_handler(struct nvif_event *event, void *repv, u32 repc return ret; } +static void +nouveau_fence_work_allow_block(struct work_struct *work) +{ + struct nouveau_fence_chan *fctx = container_of(work, struct nouveau_fence_chan, + allow_block_work); + + if (atomic_read(&fctx->notify_ref) == 0) + nvif_event_block(&fctx->event); + else + nvif_event_allow(&fctx->event); +} + void nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_chan *fctx) { @@ -178,6 +191,7 @@ nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_cha } args; int ret; + INIT_WORK(&fctx->allow_block_work, nouveau_fence_work_allow_block); INIT_LIST_HEAD(&fctx->flip); INIT_LIST_HEAD(&fctx->pending); spin_lock_init(&fctx->lock); @@ -521,15 +535,19 @@ static bool nouveau_fence_enable_signaling(struct dma_fence *f) struct nouveau_fence *fence = from_fence(f); struct nouveau_fence_chan *fctx = nouveau_fctx(fence); bool ret; + bool do_work; - if (!fctx->notify_ref++) - nvif_event_allow(&fctx->event); + if (atomic_inc_return(&fctx->notify_ref) == 0) + do_work = true; ret = nouveau_fence_no_signaling(f); if (ret) set_bit(DMA_FENCE_FLAG_USER_BITS, &fence->base.flags); - else if (!--fctx->notify_ref) - nvif_event_block(&fctx->event); + else if (atomic_dec_and_test(&fctx->notify_ref)) + do_work = true; + + if (do_work) + schedule_work(&fctx->allow_block_work); return ret; } diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h index 64d33ae7f356..28f5cf013b89 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.h +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h @@ -3,6 +3,7 @@ #define __NOUVEAU_FENCE_H__ #include +#include #include struct nouveau_drm; @@ -45,7 +46,9 @@ struct nouveau_fence_chan { char name[32]; struct nvif_event event; - int notify_ref, dead, killed; + struct work_struct allow_block_work; + atomic_t notify_ref; + int dead, killed; }; struct nouveau_fence_priv { -- cgit v1.2.3 From eb284f4b37817d2038fdfe1a9d51769730ab7b5f Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Fri, 22 Dec 2023 14:32:00 +1000 Subject: drm/nouveau/dp: Honor GSP link training retry timeouts Turns out that one of the ways that Nvidia's driver handles the pre-LT timeout for eDP panels is by providing a retry timeout in their link training callbacks that we're expected to wait for. Up until now we didn't pay any attention to this parameter. So, start honoring the timeout if link training fails - and retry up to 3 times. The "3 times" bit comes from OpenRM's link training code. [airlied: this fixes the panel on one of my laptops] Signed-off-by: Lyude Paul Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20231222043308.3090089-12-airlied@gmail.com --- drivers/gpu/drm/nouveau/nvkm/engine/disp/r535.c | 64 ++++++++++++++++--------- 1 file changed, 42 insertions(+), 22 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/r535.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/r535.c index 1b4f988df7ed..6a0a4d3b8902 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/r535.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/r535.c @@ -957,40 +957,60 @@ r535_dp_train_target(struct nvkm_outp *outp, u8 target, bool mst, u8 link_nr, u8 { struct nvkm_disp *disp = outp->disp; NV0073_CTRL_DP_CTRL_PARAMS *ctrl; - int ret; - - ctrl = nvkm_gsp_rm_ctrl_get(&disp->rm.objcom, NV0073_CTRL_CMD_DP_CTRL, sizeof(*ctrl)); - if (IS_ERR(ctrl)) - return PTR_ERR(ctrl); + int ret, retries; + u32 cmd, data; - ctrl->subDeviceInstance = 0; - ctrl->displayId = BIT(outp->index); - ctrl->cmd = NVDEF(NV0073_CTRL, DP_CMD, SET_LANE_COUNT, TRUE) | - NVDEF(NV0073_CTRL, DP_CMD, SET_LINK_BW, TRUE) | - NVDEF(NV0073_CTRL, DP_CMD, TRAIN_PHY_REPEATER, YES); - ctrl->data = NVVAL(NV0073_CTRL, DP_DATA, SET_LANE_COUNT, link_nr) | - NVVAL(NV0073_CTRL, DP_DATA, SET_LINK_BW, link_bw) | - NVVAL(NV0073_CTRL, DP_DATA, TARGET, target); + cmd = NVDEF(NV0073_CTRL, DP_CMD, SET_LANE_COUNT, TRUE) | + NVDEF(NV0073_CTRL, DP_CMD, SET_LINK_BW, TRUE) | + NVDEF(NV0073_CTRL, DP_CMD, TRAIN_PHY_REPEATER, YES); + data = NVVAL(NV0073_CTRL, DP_DATA, SET_LANE_COUNT, link_nr) | + NVVAL(NV0073_CTRL, DP_DATA, SET_LINK_BW, link_bw) | + NVVAL(NV0073_CTRL, DP_DATA, TARGET, target); if (mst) - ctrl->cmd |= NVDEF(NV0073_CTRL, DP_CMD, SET_FORMAT_MODE, MULTI_STREAM); + cmd |= NVDEF(NV0073_CTRL, DP_CMD, SET_FORMAT_MODE, MULTI_STREAM); if (outp->dp.dpcd[DPCD_RC02] & DPCD_RC02_ENHANCED_FRAME_CAP) - ctrl->cmd |= NVDEF(NV0073_CTRL, DP_CMD, SET_ENHANCED_FRAMING, TRUE); + cmd |= NVDEF(NV0073_CTRL, DP_CMD, SET_ENHANCED_FRAMING, TRUE); if (target == 0 && (outp->dp.dpcd[DPCD_RC02] & 0x20) && !(outp->dp.dpcd[DPCD_RC03] & DPCD_RC03_TPS4_SUPPORTED)) - ctrl->cmd |= NVDEF(NV0073_CTRL, DP_CMD, POST_LT_ADJ_REQ_GRANTED, YES); + cmd |= NVDEF(NV0073_CTRL, DP_CMD, POST_LT_ADJ_REQ_GRANTED, YES); - ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl)); - if (ret) { - nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl); - return ret; + /* We should retry up to 3 times, but only if GSP asks politely */ + for (retries = 0; retries < 3; ++retries) { + ctrl = nvkm_gsp_rm_ctrl_get(&disp->rm.objcom, NV0073_CTRL_CMD_DP_CTRL, + sizeof(*ctrl)); + if (IS_ERR(ctrl)) + return PTR_ERR(ctrl); + + ctrl->subDeviceInstance = 0; + ctrl->displayId = BIT(outp->index); + ctrl->retryTimeMs = 0; + ctrl->cmd = cmd; + ctrl->data = data; + + ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl)); + if (ret == -EAGAIN && ctrl->retryTimeMs) { + /* + * Device (likely an eDP panel) isn't ready yet, wait for the time specified + * by GSP before retrying again + */ + nvkm_debug(&disp->engine.subdev, + "Waiting %dms for GSP LT panel delay before retrying\n", + ctrl->retryTimeMs); + msleep(ctrl->retryTimeMs); + nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl); + } else { + /* GSP didn't say to retry, or we were successful */ + if (ctrl->err) + ret = -EIO; + nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl); + break; + } } - ret = ctrl->err ? -EIO : 0; - nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl); return ret; } -- cgit v1.2.3 From 196da3f3f76a46905f7daab29c56974f1aba9a7a Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea Date: Fri, 5 Jan 2024 19:40:06 +0200 Subject: drm/rockchip: vop2: Drop unused if_dclk_rate variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 5a028e8f062f ("drm/rockchip: vop2: Add support for rk3588") introduced a variable which ended up being unused: rockchip_drm_vop2.c:1688:23: warning: variable ‘if_dclk_rate’ set but not used [-Wunused-but-set-variable] This has been initially used as part of a formula to compute the clock dividers, but eventually it has been replaced by static values. Drop the variable declaration and move its assignment to the comment block, to serve as documentation of how the constants have been generated. Signed-off-by: Cristian Ciocaltea Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/20240105174007.98054-1-cristian.ciocaltea@collabora.com --- drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c index a3a03e9cfe13..85b3b4871a1d 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c @@ -1680,7 +1680,6 @@ static unsigned long rk3588_calc_cru_cfg(struct vop2_video_port *vp, int id, unsigned long dclk_core_rate = v_pixclk >> 2; unsigned long dclk_rate = v_pixclk; unsigned long dclk_out_rate; - unsigned long if_dclk_rate; unsigned long if_pixclk_rate; int K = 1; @@ -1695,8 +1694,8 @@ static unsigned long rk3588_calc_cru_cfg(struct vop2_video_port *vp, int id, } if_pixclk_rate = (dclk_core_rate << 1) / K; - if_dclk_rate = dclk_core_rate / K; /* + * if_dclk_rate = dclk_core_rate / K; * *if_pixclk_div = dclk_rate / if_pixclk_rate; * *if_dclk_div = dclk_rate / if_dclk_rate; */ -- cgit v1.2.3 From 5df0f0b3b4d4f5eaac19f550a30be8922f2aca95 Mon Sep 17 00:00:00 2001 From: Asad Kamal Date: Fri, 22 Dec 2023 18:24:20 +0800 Subject: drm/amd/pm: Add mem_busy_percent for GCv9.4.3 apu Expose sysfs entry mem_busy_percent for GC version 9.4.3 APU system Signed-off-by: Asad Kamal Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/amdgpu_pm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 2cd995b0ceba..f3cb490fe79b 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -2168,7 +2168,9 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ if (amdgpu_dpm_is_overdrive_supported(adev)) *states = ATTR_STATE_SUPPORTED; } else if (DEVICE_ATTR_IS(mem_busy_percent)) { - if (adev->flags & AMD_IS_APU || gc_ver == IP_VERSION(9, 0, 1)) + if ((adev->flags & AMD_IS_APU && + gc_ver != IP_VERSION(9, 4, 3)) || + gc_ver == IP_VERSION(9, 0, 1)) *states = ATTR_STATE_UNSUPPORTED; } else if (DEVICE_ATTR_IS(pcie_bw)) { /* PCIe Perf counters won't work on APU nodes */ -- cgit v1.2.3 From 6697dbf0afff73fcf2b53e99c4accdab58892e39 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Sun, 31 Dec 2023 17:42:47 +0800 Subject: Revert "drm/amdgpu: enable mca debug mode on APU by default" Not needed any more with firmware fixes Signed-off-by: Hawking Zhang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 39399d0f2ce5..fc42fb6ee191 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -3138,8 +3138,7 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) return 0; - /* enable MCA debug on APU device */ - amdgpu_ras_set_mca_debug_mode(adev, !!(adev->flags & AMD_IS_APU)); + amdgpu_ras_set_mca_debug_mode(adev, false); list_for_each_entry_safe(node, tmp, &adev->ras_list, node) { if (!node->ras_obj) { -- cgit v1.2.3 From 30afdffb3f600d8fd1d5afa1b7187081e1ac85be Mon Sep 17 00:00:00 2001 From: Joshua Ashton Date: Mon, 1 Jan 2024 18:28:22 +0000 Subject: drm/amd/display: Fix sending VSC (+ colorimetry) packets for DP/eDP displays without PSR The check for sending the vsc infopacket to the display was gated behind PSR (Panel Self Refresh) being enabled. The vsc infopacket also contains the colorimetry (specifically the container color gamut) information for the stream on modern DP. PSR is typically only supported on mobile phone eDP displays, thus this was not getting sent for typical desktop monitors or TV screens. This functionality is needed for proper HDR10 functionality on DP as it wants BT2020 RGB/YCbCr for the container color space. Cc: stable@vger.kernel.org Cc: Harry Wentland Cc: Xaver Hugl Cc: Melissa Wen Fixes: 15f9dfd545a1 ("drm/amd/display: Register Colorspace property for DP and HDMI") Tested-by: Simon Berz Tested-by: Xaver Hugl Signed-off-by: Joshua Ashton Signed-off-by: Hamza Mahfooz Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 8 +++++--- .../gpu/drm/amd/display/modules/info_packet/info_packet.c | 13 ++++++++----- 2 files changed, 13 insertions(+), 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index d869d0d7bf4c..d9729cf5ceea 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -6236,8 +6236,9 @@ create_stream_for_sink(struct drm_connector *connector, if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A) mod_build_hf_vsif_infopacket(stream, &stream->vsp_infopacket); - - if (stream->link->psr_settings.psr_feature_enabled || stream->link->replay_settings.replay_feature_enabled) { + else if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT || + stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST || + stream->signal == SIGNAL_TYPE_EDP) { // // should decide stream support vsc sdp colorimetry capability // before building vsc info packet @@ -6253,8 +6254,9 @@ create_stream_for_sink(struct drm_connector *connector, if (stream->out_transfer_func->tf == TRANSFER_FUNCTION_GAMMA22) tf = TRANSFER_FUNC_GAMMA_22; mod_build_vsc_infopacket(stream, &stream->vsc_infopacket, stream->output_color_space, tf); - aconnector->psr_skip_count = AMDGPU_DM_PSR_ENTRY_DELAY; + if (stream->link->psr_settings.psr_feature_enabled) + aconnector->psr_skip_count = AMDGPU_DM_PSR_ENTRY_DELAY; } finish: dc_sink_release(sink); diff --git a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c index 84f9b412a4f1..738ee763f24a 100644 --- a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c +++ b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c @@ -147,12 +147,15 @@ void mod_build_vsc_infopacket(const struct dc_stream_state *stream, } /* VSC packet set to 4 for PSR-SU, or 2 for PSR1 */ - if (stream->link->psr_settings.psr_version == DC_PSR_VERSION_SU_1) - vsc_packet_revision = vsc_packet_rev4; - else if (stream->link->replay_settings.config.replay_supported) + if (stream->link->psr_settings.psr_feature_enabled) { + if (stream->link->psr_settings.psr_version == DC_PSR_VERSION_SU_1) + vsc_packet_revision = vsc_packet_rev4; + else if (stream->link->psr_settings.psr_version == DC_PSR_VERSION_1) + vsc_packet_revision = vsc_packet_rev2; + } + + if (stream->link->replay_settings.config.replay_supported) vsc_packet_revision = vsc_packet_rev4; - else if (stream->link->psr_settings.psr_version == DC_PSR_VERSION_1) - vsc_packet_revision = vsc_packet_rev2; /* Update to revision 5 for extended colorimetry support */ if (stream->use_vsc_sdp_for_colorimetry) -- cgit v1.2.3 From 1b0b232ee4e005e402a9cd21e47cecb6d6f54a29 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 20 Dec 2023 12:33:45 -0500 Subject: drm/amd/display: add nv12 bounding box This was included in gpu_info firmware, move it into the driver for consistency with other nv1x parts. Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2318 Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c | 110 ++++++++++++++++++++- 1 file changed, 109 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c index 1ec8ca1fdb4f..38ab9ad60ef8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c @@ -441,7 +441,115 @@ struct _vcs_dpi_soc_bounding_box_st dcn2_0_nv14_soc = { .use_urgent_burst_bw = 0 }; -struct _vcs_dpi_soc_bounding_box_st dcn2_0_nv12_soc = { 0 }; +struct _vcs_dpi_soc_bounding_box_st dcn2_0_nv12_soc = { + .clock_limits = { + { + .state = 0, + .dcfclk_mhz = 560.0, + .fabricclk_mhz = 560.0, + .dispclk_mhz = 513.0, + .dppclk_mhz = 513.0, + .phyclk_mhz = 540.0, + .socclk_mhz = 560.0, + .dscclk_mhz = 171.0, + .dram_speed_mts = 1069.0, + }, + { + .state = 1, + .dcfclk_mhz = 694.0, + .fabricclk_mhz = 694.0, + .dispclk_mhz = 642.0, + .dppclk_mhz = 642.0, + .phyclk_mhz = 600.0, + .socclk_mhz = 694.0, + .dscclk_mhz = 214.0, + .dram_speed_mts = 1324.0, + }, + { + .state = 2, + .dcfclk_mhz = 875.0, + .fabricclk_mhz = 875.0, + .dispclk_mhz = 734.0, + .dppclk_mhz = 734.0, + .phyclk_mhz = 810.0, + .socclk_mhz = 875.0, + .dscclk_mhz = 245.0, + .dram_speed_mts = 1670.0, + }, + { + .state = 3, + .dcfclk_mhz = 1000.0, + .fabricclk_mhz = 1000.0, + .dispclk_mhz = 1100.0, + .dppclk_mhz = 1100.0, + .phyclk_mhz = 810.0, + .socclk_mhz = 1000.0, + .dscclk_mhz = 367.0, + .dram_speed_mts = 2000.0, + }, + { + .state = 4, + .dcfclk_mhz = 1200.0, + .fabricclk_mhz = 1200.0, + .dispclk_mhz = 1284.0, + .dppclk_mhz = 1284.0, + .phyclk_mhz = 810.0, + .socclk_mhz = 1200.0, + .dscclk_mhz = 428.0, + .dram_speed_mts = 2000.0, + }, + { + .state = 5, + .dcfclk_mhz = 1200.0, + .fabricclk_mhz = 1200.0, + .dispclk_mhz = 1284.0, + .dppclk_mhz = 1284.0, + .phyclk_mhz = 810.0, + .socclk_mhz = 1200.0, + .dscclk_mhz = 428.0, + .dram_speed_mts = 2000.0, + }, + }, + + .num_states = 5, + .sr_exit_time_us = 1.9, + .sr_enter_plus_exit_time_us = 4.4, + .urgent_latency_us = 3.0, + .urgent_latency_pixel_data_only_us = 4.0, + .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, + .urgent_latency_vm_data_only_us = 4.0, + .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, + .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, + .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 40.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 40.0, + .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0, + .max_avg_sdp_bw_use_normal_percent = 40.0, + .max_avg_dram_bw_use_normal_percent = 40.0, + .writeback_latency_us = 12.0, + .ideal_dram_bw_after_urgent_percent = 40.0, + .max_request_size_bytes = 256, + .dram_channel_width_bytes = 16, + .fabric_datapath_to_dcn_data_return_bytes = 64, + .dcn_downspread_percent = 0.5, + .downspread_percent = 0.5, + .dram_page_open_time_ns = 50.0, + .dram_rw_turnaround_time_ns = 17.5, + .dram_return_buffer_per_channel_bytes = 8192, + .round_trip_ping_latency_dcfclk_cycles = 131, + .urgent_out_of_order_return_per_channel_bytes = 4096, + .channel_interleave_bytes = 256, + .num_banks = 8, + .num_chans = 16, + .vmm_page_size_bytes = 4096, + .dram_clock_change_latency_us = 45.0, + .writeback_dram_clock_change_latency_us = 23.0, + .return_bus_width_bytes = 64, + .dispclk_dppclk_vco_speed_mhz = 3850, + .xfc_bus_transport_time_us = 20, + .xfc_xbuf_latency_tolerance_us = 50, + .use_urgent_burst_bw = 0, +}; struct _vcs_dpi_ip_params_st dcn2_1_ip = { .odm_capable = 1, -- cgit v1.2.3 From fb915c87edc2c99bbde148a62bfa97a2c6d991bb Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 20 Dec 2023 12:36:08 -0500 Subject: drm/amdgpu: skip gpu_info fw loading on navi12 It's no longer required. Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2318 Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 85ed0d66a029..5bb444bb36ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2251,15 +2251,8 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) adev->firmware.gpu_info_fw = NULL; - if (adev->mman.discovery_bin) { - /* - * FIXME: The bounding box is still needed by Navi12, so - * temporarily read it from gpu_info firmware. Should be dropped - * when DAL no longer needs it. - */ - if (adev->asic_type != CHIP_NAVI12) - return 0; - } + if (adev->mman.discovery_bin) + return 0; switch (adev->asic_type) { default: -- cgit v1.2.3 From 151374fb6e17ce966e1db8e1e2b35ea517202779 Mon Sep 17 00:00:00 2001 From: Marcelo Mendes Spessoto Junior Date: Thu, 28 Dec 2023 19:15:13 -0300 Subject: drm/amd/display: Removing duplicate copyright text mod_freesync header file has duplicated copyright boilerplate. Drop the duplicate. Signed-off-by: Marcelo Mendes Spessoto Junior Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/modules/inc/mod_freesync.h | 28 ---------------------- 1 file changed, 28 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h b/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h index afe1f6cce528..cc3dc9b589f6 100644 --- a/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h +++ b/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h @@ -1,31 +1,3 @@ -/* - * Copyright 2016 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - - - - /* * Copyright 2016 Advanced Micro Devices, Inc. * -- cgit v1.2.3 From 8e317a811f3d63760d737c4371783f2e98291d40 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Thu, 28 Dec 2023 22:42:04 +0530 Subject: drm/amdgpu: Remove unreachable code in 'atom_skip_src_int()' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the below: drivers/gpu/drm/amd/amdgpu/atom.c:398 atom_skip_src_int() warn: ignoring unreachable code. Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/atom.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c index 2c221000782c..a33e890c70d9 100644 --- a/drivers/gpu/drm/amd/amdgpu/atom.c +++ b/drivers/gpu/drm/amd/amdgpu/atom.c @@ -395,7 +395,6 @@ static void atom_skip_src_int(atom_exec_context *ctx, uint8_t attr, int *ptr) (*ptr)++; return; } - return; } } -- cgit v1.2.3 From c572abffe9f50c8ba33060865449313b3f588c35 Mon Sep 17 00:00:00 2001 From: Le Ma Date: Wed, 3 Jan 2024 15:27:38 +0800 Subject: drm/amdgpu: add param to specify fw bo location for front-door loading This param can help isolating data path issues on new systems in early phase. Signed-off-by: Le Ma Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 5 +++++ drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 3 ++- 4 files changed, 10 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 616b6c911767..9da14436a373 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -254,6 +254,8 @@ extern int amdgpu_agp; extern int amdgpu_wbrf; +extern int fw_bo_location; + #define AMDGPU_VM_MAX_NUM_CTX 4096 #define AMDGPU_SG_THRESHOLD (256*1024*1024) #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 880137774b4e..852cec98ff26 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -210,6 +210,7 @@ int amdgpu_seamless = -1; /* auto */ uint amdgpu_debug_mask; int amdgpu_agp = -1; /* auto */ int amdgpu_wbrf = -1; +int fw_bo_location = -1; static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work); @@ -989,6 +990,10 @@ MODULE_PARM_DESC(wbrf, "Enable Wifi RFI interference mitigation (0 = disabled, 1 = enabled, -1 = auto(default)"); module_param_named(wbrf, amdgpu_wbrf, int, 0444); +MODULE_PARM_DESC(fw_bo_location, + "location to put firmware bo for frontdoor loading (-1 = auto (default), 0 = on ram, 1 = on vram"); +module_param(fw_bo_location, int, 0644); + /* These devices are not supported by amdgpu. * They are supported by the mach64, r128, radeon drivers */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 1bf975b8d083..2addbdf88394 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -466,7 +466,7 @@ static int psp_sw_init(void *handle) } ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG, - amdgpu_sriov_vf(adev) ? + (amdgpu_sriov_vf(adev) || fw_bo_location == 1) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, &psp->fw_pri_bo, &psp->fw_pri_mc_addr, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index b14127429f30..1f67914568f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -1062,7 +1062,8 @@ int amdgpu_ucode_create_bo(struct amdgpu_device *adev) { if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) { amdgpu_bo_create_kernel(adev, adev->firmware.fw_size, PAGE_SIZE, - amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, + (amdgpu_sriov_vf(adev) || fw_bo_location == 1) ? + AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, &adev->firmware.fw_buf, &adev->firmware.fw_buf_mc, &adev->firmware.fw_buf_ptr); -- cgit v1.2.3 From 4f32504a2f85a7b40fe149436881381f48e9c0c0 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Wed, 3 Jan 2024 22:05:16 +0530 Subject: drm/amdgpu: Fix variable 'mca_funcs' dereferenced before NULL check in 'amdgpu_mca_smu_get_mca_entry()' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the below: drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c:377 amdgpu_mca_smu_get_mca_entry() warn: variable dereferenced before check 'mca_funcs' (see line 368) 357 int amdgpu_mca_smu_get_mca_entry(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, 358 int idx, struct mca_bank_entry *entry) 359 { 360 const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; 361 int count; 362 363 switch (type) { 364 case AMDGPU_MCA_ERROR_TYPE_UE: 365 count = mca_funcs->max_ue_count; mca_funcs is dereferenced here. 366 break; 367 case AMDGPU_MCA_ERROR_TYPE_CE: 368 count = mca_funcs->max_ce_count; mca_funcs is dereferenced here. 369 break; 370 default: 371 return -EINVAL; 372 } 373 374 if (idx >= count) 375 return -EINVAL; 376 377 if (mca_funcs && mca_funcs->mca_get_mca_entry) ^^^^^^^^^ Checked too late! Cc: Yang Wang Cc: Hawking Zhang Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c index 8911310f98df..59fafb8392e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c @@ -360,6 +360,9 @@ int amdgpu_mca_smu_get_mca_entry(struct amdgpu_device *adev, enum amdgpu_mca_err const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; int count; + if (!mca_funcs || !mca_funcs->mca_get_mca_entry) + return -EOPNOTSUPP; + switch (type) { case AMDGPU_MCA_ERROR_TYPE_UE: count = mca_funcs->max_ue_count; @@ -374,10 +377,7 @@ int amdgpu_mca_smu_get_mca_entry(struct amdgpu_device *adev, enum amdgpu_mca_err if (idx >= count) return -EINVAL; - if (mca_funcs && mca_funcs->mca_get_mca_entry) - return mca_funcs->mca_get_mca_entry(adev, type, idx, entry); - - return -EOPNOTSUPP; + return mca_funcs->mca_get_mca_entry(adev, type, idx, entry); } #if defined(CONFIG_DEBUG_FS) -- cgit v1.2.3 From 13a1851f923d9a7a78a477497295c2dfd16ad4a4 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Wed, 3 Jan 2024 22:22:44 +0530 Subject: drm/amdgpu: Fix '*fw' from request_firmware() not released in 'amdgpu_ucode_request()' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the below: drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c:1404 amdgpu_ucode_request() warn: '*fw' from request_firmware() not released on lines: 1404. Cc: Mario Limonciello Cc: Lijo Lazar Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 1f67914568f6..d334e42fe0eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -1398,9 +1398,13 @@ int amdgpu_ucode_request(struct amdgpu_device *adev, const struct firmware **fw, if (err) return -ENODEV; + err = amdgpu_ucode_validate(*fw); - if (err) + if (err) { dev_dbg(adev->dev, "\"%s\" failed to validate\n", fw_name); + release_firmware(*fw); + *fw = NULL; + } return err; } -- cgit v1.2.3 From 499839eca34ad62d43025ec0b46b80e77065f6d8 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Thu, 21 Dec 2023 07:16:23 +0530 Subject: drm/amdkfd: Confirm list is non-empty before utilizing list_first_entry in kfd_topology.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before using list_first_entry, make sure to check that list is not empty, if list is empty return -ENODATA. Fixes the below: drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_topology.c:1347 kfd_create_indirect_link_prop() warn: can 'gpu_link' even be NULL? drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_topology.c:1428 kfd_add_peer_prop() warn: can 'iolink1' even be NULL? drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_topology.c:1433 kfd_add_peer_prop() warn: can 'iolink2' even be NULL? Fixes: 0f28cca87e9a ("drm/amdkfd: Extend KFD device topology to surface peer-to-peer links") Cc: Felix Kuehling Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Suggested-by: Felix Kuehling Suggested-by: Lijo Lazar Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 057284bf50bb..58d775a0668d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1342,10 +1342,11 @@ static int kfd_create_indirect_link_prop(struct kfd_topology_device *kdev, int g num_cpu++; } + if (list_empty(&kdev->io_link_props)) + return -ENODATA; + gpu_link = list_first_entry(&kdev->io_link_props, - struct kfd_iolink_properties, list); - if (!gpu_link) - return -ENOMEM; + struct kfd_iolink_properties, list); for (i = 0; i < num_cpu; i++) { /* CPU <--> GPU */ @@ -1423,15 +1424,17 @@ static int kfd_add_peer_prop(struct kfd_topology_device *kdev, peer->gpu->adev)) return ret; + if (list_empty(&kdev->io_link_props)) + return -ENODATA; + iolink1 = list_first_entry(&kdev->io_link_props, - struct kfd_iolink_properties, list); - if (!iolink1) - return -ENOMEM; + struct kfd_iolink_properties, list); + + if (list_empty(&peer->io_link_props)) + return -ENODATA; iolink2 = list_first_entry(&peer->io_link_props, - struct kfd_iolink_properties, list); - if (!iolink2) - return -ENOMEM; + struct kfd_iolink_properties, list); props = kfd_alloc_struct(props); if (!props) -- cgit v1.2.3 From bf2ad4fb8adca89374b54b225d494e0b1956dbea Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Wed, 27 Dec 2023 12:54:44 +0530 Subject: drm/amdgpu: Drop 'fence' check in 'to_amdgpu_amdkfd_fence()' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Return value of container_of(...) can't be null, so null check is not required for 'fence'. Hence drop its NULL check. Fixes the below: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c:93 to_amdgpu_amdkfd_fence() warn: can 'fence' even be NULL? Cc: Felix Kuehling Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c index 469785d33791..1ef758ac5076 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c @@ -90,7 +90,7 @@ struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f) return NULL; fence = container_of(f, struct amdgpu_amdkfd_fence, base); - if (fence && f->ops == &amdkfd_fence_ops) + if (f->ops == &amdkfd_fence_ops) return fence; return NULL; -- cgit v1.2.3 From b1a428b45dc7e47c7acc2ad0d08d8a6dda910c4c Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Fri, 29 Dec 2023 15:07:09 +0530 Subject: drm/amdkfd: Fix iterator used outside loop in 'kfd_add_peer_prop()' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the following about iterator use: drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_topology.c:1456 kfd_add_peer_prop() warn: iterator used outside loop: 'iolink3' Cc: Felix Kuehling Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 58d775a0668d..e5f7c92eebcb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1452,17 +1452,19 @@ static int kfd_add_peer_prop(struct kfd_topology_device *kdev, /* CPU->CPU link*/ cpu_dev = kfd_topology_device_by_proximity_domain(iolink1->node_to); if (cpu_dev) { - list_for_each_entry(iolink3, &cpu_dev->io_link_props, list) - if (iolink3->node_to == iolink2->node_to) - break; - - props->weight += iolink3->weight; - props->min_latency += iolink3->min_latency; - props->max_latency += iolink3->max_latency; - props->min_bandwidth = min(props->min_bandwidth, - iolink3->min_bandwidth); - props->max_bandwidth = min(props->max_bandwidth, - iolink3->max_bandwidth); + list_for_each_entry(iolink3, &cpu_dev->io_link_props, list) { + if (iolink3->node_to != iolink2->node_to) + continue; + + props->weight += iolink3->weight; + props->min_latency += iolink3->min_latency; + props->max_latency += iolink3->max_latency; + props->min_bandwidth = min(props->min_bandwidth, + iolink3->min_bandwidth); + props->max_bandwidth = min(props->max_bandwidth, + iolink3->max_bandwidth); + break; + } } else { WARN(1, "CPU node not found"); } -- cgit v1.2.3 From 2f3be3ca779b11c332441b10e00443a2510f4d7b Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Sun, 24 Dec 2023 16:22:47 +0800 Subject: drm/amd/pm/smu7: fix a memleak in smu7_hwmgr_backend_init The hwmgr->backend, (i.e. data) allocated by kzalloc is not freed in the error-handling paths of smu7_get_evv_voltages and smu7_update_edc_leakage_table. However, it did be freed in the error-handling of phm_initializa_dynamic_state_adjustment_rule_settings, by smu7_hwmgr_backend_fini. So the lack of free in smu7_get_evv_voltages and smu7_update_edc_leakage_table is considered a memleak in this patch. Fixes: 599a7e9fe1b6 ("drm/amd/powerplay: implement smu7 hwmgr to manager asics with smu ip version 7.") Fixes: 8f0804c6b7d0 ("drm/amd/pm: add edc leakage controller setting") Signed-off-by: Zhipeng Lu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c index 11372fcc59c8..b1a8799e2dee 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c @@ -2974,6 +2974,8 @@ static int smu7_hwmgr_backend_init(struct pp_hwmgr *hwmgr) result = smu7_get_evv_voltages(hwmgr); if (result) { pr_info("Get EVV Voltage Failed. Abort Driver loading!\n"); + kfree(hwmgr->backend); + hwmgr->backend = NULL; return -EINVAL; } } else { @@ -3019,8 +3021,10 @@ static int smu7_hwmgr_backend_init(struct pp_hwmgr *hwmgr) } result = smu7_update_edc_leakage_table(hwmgr); - if (result) + if (result) { + smu7_hwmgr_backend_fini(hwmgr); return result; + } return 0; } -- cgit v1.2.3 From c86e5ab2273705c0588ce23daf55e4c12f1f0998 Mon Sep 17 00:00:00 2001 From: Marcelo Mendes Spessoto Junior Date: Fri, 29 Dec 2023 14:41:50 -0300 Subject: drm/amd/display: Fix hdcp1_execution.c codestyle Remove braces from single statement if expression in hdcp1_execution.c file Signed-off-by: Marcelo Mendes Spessoto Junior Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c index 1ddb4f5eac8e..182e7532dda8 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c @@ -63,6 +63,7 @@ static inline enum mod_hdcp_status check_hdcp_capable_dp(struct mod_hdcp *hdcp) static inline enum mod_hdcp_status check_r0p_available_dp(struct mod_hdcp *hdcp) { enum mod_hdcp_status status; + if (is_dp_hdcp(hdcp)) { status = (hdcp->auth.msg.hdcp1.bstatus & DP_BSTATUS_R0_PRIME_READY) ? @@ -131,9 +132,8 @@ static inline uint8_t get_device_count(struct mod_hdcp *hdcp) static inline enum mod_hdcp_status check_device_count(struct mod_hdcp *hdcp) { /* Avoid device count == 0 to do authentication */ - if (0 == get_device_count(hdcp)) { + if (get_device_count(hdcp) == 0) return MOD_HDCP_STATUS_HDCP1_DEVICE_COUNT_MISMATCH_FAILURE; - } /* Some MST display may choose to report the internal panel as an HDCP RX. * To update this condition with 1(because the immediate repeater's internal -- cgit v1.2.3 From f28390cd004cefa531dc4f5c190a2f11901a6f9a Mon Sep 17 00:00:00 2001 From: Marcelo Mendes Spessoto Junior Date: Fri, 29 Dec 2023 14:41:51 -0300 Subject: drm/amd/display: Fix hdcp_psp.c codestyle Fix identation for hdcp_psp.c file Signed-off-by: Marcelo Mendes Spessoto Junior Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c index ee67a35c2a8e..8c137d7c032e 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c @@ -443,7 +443,7 @@ enum mod_hdcp_status mod_hdcp_hdcp1_enable_dp_stream_encryption(struct mod_hdcp for (i = 0; i < MAX_NUM_OF_DISPLAYS; i++) { if (hdcp->displays[i].adjust.disable || hdcp->displays[i].state != MOD_HDCP_DISPLAY_ACTIVE) - continue; + continue; memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory)); @@ -926,7 +926,7 @@ enum mod_hdcp_status mod_hdcp_hdcp2_enable_dp_stream_encryption(struct mod_hdcp for (i = 0; i < MAX_NUM_OF_DISPLAYS; i++) { if (hdcp->displays[i].adjust.disable || hdcp->displays[i].state != MOD_HDCP_DISPLAY_ACTIVE) - continue; + continue; hdcp_cmd->in_msg.hdcp2_enable_dp_stream_encryption.display_handle = hdcp->displays[i].index; hdcp_cmd->in_msg.hdcp2_enable_dp_stream_encryption.session_handle = hdcp->auth.id; -- cgit v1.2.3 From 0c3c952d0512d0e27c191bdb3da85efbf2780ef6 Mon Sep 17 00:00:00 2001 From: Marcelo Mendes Spessoto Junior Date: Fri, 29 Dec 2023 14:41:52 -0300 Subject: drm/amd/display: Fix freesync.c codestyle Remove braces for single statement if expression for freesync.c file Signed-off-by: Marcelo Mendes Spessoto Junior Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/modules/freesync/freesync.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c index 47296d155c3a..3955b7e4b2e2 100644 --- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c +++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c @@ -81,6 +81,7 @@ fail_alloc_context: void mod_freesync_destroy(struct mod_freesync *mod_freesync) { struct core_freesync *core_freesync = NULL; + if (mod_freesync == NULL) return; core_freesync = MOD_FREESYNC_TO_CORE(mod_freesync); @@ -278,9 +279,8 @@ static void apply_below_the_range(struct core_freesync *core_freesync, } } else if (last_render_time_in_us > (max_render_time_in_us + in_out_vrr->btr.margin_in_us / 2)) { /* Enter Below the Range */ - if (!in_out_vrr->btr.btr_active) { + if (!in_out_vrr->btr.btr_active) in_out_vrr->btr.btr_active = true; - } } /* BTR set to "not active" so disengage */ -- cgit v1.2.3 From 30c822afdf9f4b7194384e83f05adefc9da15632 Mon Sep 17 00:00:00 2001 From: Marcelo Mendes Spessoto Junior Date: Fri, 29 Dec 2023 14:41:53 -0300 Subject: drm/amd/display: Fix hdcp_psp.h codestyle Fix identation inside enum and place expressions in define macros inside () for hdcp_psp.h file Signed-off-by: Marcelo Mendes Spessoto Junior Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.h b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.h index 5b71bc96b98c..7844ea91650b 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.h +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.h @@ -98,9 +98,9 @@ enum ta_dtm_encoder_type { * This enum defines software value for dio_output_type */ typedef enum { - TA_DTM_DIO_OUTPUT_TYPE__INVALID, - TA_DTM_DIO_OUTPUT_TYPE__DIRECT, - TA_DTM_DIO_OUTPUT_TYPE__DPIA + TA_DTM_DIO_OUTPUT_TYPE__INVALID, + TA_DTM_DIO_OUTPUT_TYPE__DIRECT, + TA_DTM_DIO_OUTPUT_TYPE__DPIA } ta_dtm_dio_output_type; struct ta_dtm_topology_update_input_v3 { @@ -237,11 +237,11 @@ enum ta_hdcp2_hdcp2_msg_id_max_size { #define TA_HDCP__HDCP1_KSV_LIST_MAX_ENTRIES 127 #define TA_HDCP__HDCP1_V_PRIME_SIZE 20 #define TA_HDCP__HDCP2_TX_BUF_MAX_SIZE \ - TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_NO_STORED_KM + TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_STORED_KM + 6 + (TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_NO_STORED_KM + TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_STORED_KM + 6) // 64 bits boundaries #define TA_HDCP__HDCP2_RX_BUF_MAX_SIZE \ - TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_SEND_CERT + TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_RECEIVER_INFO + 4 + (TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_SEND_CERT + TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_RECEIVER_INFO + 4) enum ta_hdcp_status { TA_HDCP_STATUS__SUCCESS = 0x00, -- cgit v1.2.3 From 31906f4cf6b1ece08f7a16c6c53ef899f1fda009 Mon Sep 17 00:00:00 2001 From: Marcelo Mendes Spessoto Junior Date: Fri, 29 Dec 2023 14:41:54 -0300 Subject: drm/amd/display: Fix hdcp2_execution.c codestyle Remove braces for single statement if expressions and change comparison order for hdcp2_execution.c file Signed-off-by: Marcelo Mendes Spessoto Junior Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c index 91c22b96ebde..733f22bed021 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c @@ -208,9 +208,8 @@ static inline uint8_t get_device_count(struct mod_hdcp *hdcp) static enum mod_hdcp_status check_device_count(struct mod_hdcp *hdcp) { /* Avoid device count == 0 to do authentication */ - if (0 == get_device_count(hdcp)) { + if (get_device_count(hdcp) == 0) return MOD_HDCP_STATUS_HDCP1_DEVICE_COUNT_MISMATCH_FAILURE; - } /* Some MST display may choose to report the internal panel as an HDCP RX. */ /* To update this condition with 1(because the immediate repeater's internal */ @@ -689,9 +688,8 @@ static enum mod_hdcp_status validate_stream_ready(struct mod_hdcp *hdcp, if (is_hdmi_dvi_sl_hdcp(hdcp)) { if (!process_rxstatus(hdcp, event_ctx, input, &status)) goto out; - if (event_ctx->rx_id_list_ready) { + if (event_ctx->rx_id_list_ready) goto out; - } } if (is_hdmi_dvi_sl_hdcp(hdcp)) if (!mod_hdcp_execute_and_set(check_stream_ready_available, -- cgit v1.2.3 From 0783f17e760d3cfa6b79aea94712dc7082d4ae2c Mon Sep 17 00:00:00 2001 From: Marcelo Mendes Spessoto Junior Date: Fri, 29 Dec 2023 14:41:55 -0300 Subject: drm/amd/display: Fix hdcp_log.h codestyle Place HDCP_EVENT_TRACE(hdcp, event) macro content inside do while loop to avoid if-else issues in hdcp_log.h file v2: fix up build (Alex) Signed-off-by: Marcelo Mendes Spessoto Junior Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.h b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.h index c62df3bcc7cb..1d83c1b9da10 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.h +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.h @@ -86,10 +86,12 @@ #define HDCP_CPIRQ_TRACE(hdcp) \ HDCP_LOG_FSM(hdcp, "[Link %d] --> CPIRQ", hdcp->config.index) #define HDCP_EVENT_TRACE(hdcp, event) \ - if (event == MOD_HDCP_EVENT_WATCHDOG_TIMEOUT) \ - HDCP_TIMEOUT_TRACE(hdcp); \ - else if (event == MOD_HDCP_EVENT_CPIRQ) \ - HDCP_CPIRQ_TRACE(hdcp) + do { \ + if (event == MOD_HDCP_EVENT_WATCHDOG_TIMEOUT) \ + HDCP_TIMEOUT_TRACE(hdcp); \ + else if (event == MOD_HDCP_EVENT_CPIRQ) \ + HDCP_CPIRQ_TRACE(hdcp); \ + } while (0) /* TODO: find some way to tell if logging is off to save time */ #define HDCP_DDC_READ_TRACE(hdcp, msg_name, msg, msg_size) do { \ mod_hdcp_dump_binary_message(msg, msg_size, hdcp->buf, \ -- cgit v1.2.3 From 1ac725b300769b179375c9100b81ea0a82b39896 Mon Sep 17 00:00:00 2001 From: Marcelo Mendes Spessoto Junior Date: Fri, 29 Dec 2023 14:41:56 -0300 Subject: drm/amd/display: Fix power_helpers.c codestyle Place define macro expression inside () in power_helpers.c file Signed-off-by: Marcelo Mendes Spessoto Junior Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/modules/power/power_helpers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c index 34b848c981e0..ad98e504c00d 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c @@ -31,7 +31,7 @@ #define DIV_ROUNDUP(a, b) (((a)+((b)/2))/(b)) #define bswap16_based_on_endian(big_endian, value) \ - (big_endian) ? cpu_to_be16(value) : cpu_to_le16(value) + ((big_endian) ? cpu_to_be16(value) : cpu_to_le16(value)) /* Possible Min Reduction config from least aggressive to most aggressive * 0 1 2 3 4 5 6 7 8 9 10 11 12 -- cgit v1.2.3 From c966dc0e9d96dc44423c404a2628236f1200c24e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 22 Nov 2023 23:13:36 +0100 Subject: drm/amd/display: avoid stringop-overflow warnings for dp_decide_lane_settings() gcc prints a warning about a possible array overflow for a couple of callers of dp_decide_lane_settings() after commit 1b56c90018f0 ("Makefile: Enable -Wstringop-overflow globally"): drivers/gpu/drm/amd/amdgpu/../display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c: In function 'dp_perform_fixed_vs_pe_training_sequence_legacy': drivers/gpu/drm/amd/amdgpu/../display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c:426:25: error: 'dp_decide_lane_settings' accessing 4 bytes in a region of size 1 [-Werror=stringop-overflow=] 426 | dp_decide_lane_settings(lt_settings, dpcd_lane_adjust, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 427 | lt_settings->hw_lane_settings, lt_settings->dpcd_lane_settings); | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/amd/amdgpu/../display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c:426:25: note: referencing argument 4 of type 'union dpcd_training_lane[4]' I'm not entirely sure what caused this, but changing the prototype to expect a pointer instead of an array avoids the warnings. Fixes: 7727e7b60f82 ("drm/amd/display: Improve robustness of FIXED_VS link training at DP1 rates") Acked-by: Randy Dunlap Tested-by: Randy Dunlap # build-tested Signed-off-by: Arnd Bergmann Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c | 2 +- drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c index 90339c2dfd84..5a0b04518956 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c @@ -807,7 +807,7 @@ void dp_decide_lane_settings( const struct link_training_settings *lt_settings, const union lane_adjust ln_adjust[LANE_COUNT_DP_MAX], struct dc_lane_settings hw_lane_settings[LANE_COUNT_DP_MAX], - union dpcd_training_lane dpcd_lane_settings[LANE_COUNT_DP_MAX]) + union dpcd_training_lane *dpcd_lane_settings) { uint32_t lane; diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h index 7d027bac8255..851bd17317a0 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h @@ -111,7 +111,7 @@ void dp_decide_lane_settings( const struct link_training_settings *lt_settings, const union lane_adjust ln_adjust[LANE_COUNT_DP_MAX], struct dc_lane_settings hw_lane_settings[LANE_COUNT_DP_MAX], - union dpcd_training_lane dpcd_lane_settings[LANE_COUNT_DP_MAX]); + union dpcd_training_lane *dpcd_lane_settings); enum dc_dp_training_pattern decide_cr_training_pattern( const struct dc_link_settings *link_settings); -- cgit v1.2.3 From 0f35b0a7b8fa402adbffa2565047cdcc4c480153 Mon Sep 17 00:00:00 2001 From: Kaibo Ma Date: Wed, 3 Jan 2024 12:29:56 +0800 Subject: Revert "drm/amdkfd: Relocate TBA/TMA to opposite side of VM hole" That commit causes NULL pointer dereferences in dmesgs when running applications using ROCm, including clinfo, blender, and PyTorch, since v6.6.1. Revert it to fix blender again. This reverts commit 96c211f1f9ef82183493f4ceed4e347b52849149. Closes: https://github.com/ROCm/ROCm/issues/2596 Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/2991 Reviewed-by: Jay Cornwall Signed-off-by: Kaibo Ma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c index 62b205dac63a..6604a3f99c5e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c @@ -330,12 +330,6 @@ static void kfd_init_apertures_vi(struct kfd_process_device *pdd, uint8_t id) pdd->gpuvm_limit = pdd->dev->kfd->shared_resources.gpuvm_size - 1; - /* dGPUs: the reserved space for kernel - * before SVM - */ - pdd->qpd.cwsr_base = SVM_CWSR_BASE; - pdd->qpd.ib_base = SVM_IB_BASE; - pdd->scratch_base = MAKE_SCRATCH_APP_BASE_VI(); pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base); } @@ -345,18 +339,18 @@ static void kfd_init_apertures_v9(struct kfd_process_device *pdd, uint8_t id) pdd->lds_base = MAKE_LDS_APP_BASE_V9(); pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base); - pdd->gpuvm_base = PAGE_SIZE; + /* Raven needs SVM to support graphic handle, etc. Leave the small + * reserved space before SVM on Raven as well, even though we don't + * have to. + * Set gpuvm_base and gpuvm_limit to CANONICAL addresses so that they + * are used in Thunk to reserve SVM. + */ + pdd->gpuvm_base = SVM_USER_BASE; pdd->gpuvm_limit = pdd->dev->kfd->shared_resources.gpuvm_size - 1; pdd->scratch_base = MAKE_SCRATCH_APP_BASE_V9(); pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base); - - /* - * Place TBA/TMA on opposite side of VM hole to prevent - * stray faults from triggering SVM on these pages. - */ - pdd->qpd.cwsr_base = pdd->dev->kfd->shared_resources.gpuvm_size; } int kfd_init_apertures(struct kfd_process *process) @@ -413,6 +407,12 @@ int kfd_init_apertures(struct kfd_process *process) return -EINVAL; } } + + /* dGPUs: the reserved space for kernel + * before SVM + */ + pdd->qpd.cwsr_base = SVM_CWSR_BASE; + pdd->qpd.ib_base = SVM_IB_BASE; } dev_dbg(kfd_device, "node id %u\n", id); -- cgit v1.2.3 From af7cefc618f437556ccb48ddd0c9e8e0cf7fd11d Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Wed, 20 Dec 2023 15:18:05 -0500 Subject: drm/amd/display: Fix recent checkpatch errors in amdgpu_dm - Use tabs, not spaces. - Brace and parentheses placement Signed-off-by: Harry Wentland Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 4 ++-- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 5 ++--- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c | 2 +- 3 files changed, 5 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 2d5af83d40b5..9c1871b866cc 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -747,7 +747,7 @@ enum amdgpu_transfer_function { AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF, AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF, AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF, - AMDGPU_TRANSFER_FUNCTION_COUNT + AMDGPU_TRANSFER_FUNCTION_COUNT }; struct dm_plane_state { @@ -844,7 +844,7 @@ struct dm_crtc_state { int abm_level; - /** + /** * @regamma_tf: * * Pre-defined transfer function for converting internal FB -> wire diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index c6ed0d854b01..36bf65a8cd6e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -630,8 +630,7 @@ static int __set_input_tf(struct dc_color_caps *caps, struct dc_transfer_func *f static enum dc_transfer_func_predefined amdgpu_tf_to_dc_tf(enum amdgpu_transfer_function tf) { - switch (tf) - { + switch (tf) { default: case AMDGPU_TRANSFER_FUNCTION_DEFAULT: case AMDGPU_TRANSFER_FUNCTION_IDENTITY: @@ -1225,7 +1224,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, * plane and CRTC degamma at the same time. Explicitly reject atomic * updates when userspace sets both plane and CRTC degamma properties. */ - if (has_crtc_cm_degamma && ret != -EINVAL){ + if (has_crtc_cm_degamma && ret != -EINVAL) { drm_dbg_kms(crtc->base.crtc->dev, "doesn't support plane and CRTC degamma at the same time\n"); return -EINVAL; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c index 4439e5a27362..6e715ef3a556 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -305,7 +305,7 @@ dm_crtc_additional_color_mgmt(struct drm_crtc *crtc) { struct amdgpu_device *adev = drm_to_adev(crtc->dev); - if(adev->dm.dc->caps.color.mpc.ogam_ram) + if (adev->dm.dc->caps.color.mpc.ogam_ram) drm_object_attach_property(&crtc->base, adev->mode_info.regamma_tf_property, AMDGPU_TRANSFER_FUNCTION_DEFAULT); -- cgit v1.2.3 From d65e0e91664184299d5e6aaa2f4323e43df9b2c7 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Wed, 20 Dec 2023 16:25:48 -0500 Subject: drm/amd/display: Move fixpt_from_s3132 to amdgpu_dm Other environments don't like the unary minus operator on an unsigned value. Signed-off-by: Harry Wentland Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 18 +++++++++++++++--- drivers/gpu/drm/amd/display/include/fixed31_32.h | 12 ------------ 2 files changed, 15 insertions(+), 15 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 36bf65a8cd6e..9b527bffe11a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -85,6 +85,18 @@ void amdgpu_dm_init_color_mod(void) setup_x_points_distribution(); } +static inline struct fixed31_32 amdgpu_dm_fixpt_from_s3132(__u64 x) +{ + struct fixed31_32 val; + + /* If negative, convert to 2's complement. */ + if (x & (1ULL << 63)) + x = -(x & ~(1ULL << 63)); + + val.value = x; + return val; +} + #ifdef AMD_PRIVATE_COLOR /* Pre-defined Transfer Functions (TF) * @@ -430,7 +442,7 @@ static void __drm_ctm_to_dc_matrix(const struct drm_color_ctm *ctm, } /* gamut_remap_matrix[i] = ctm[i - floor(i/4)] */ - matrix[i] = dc_fixpt_from_s3132(ctm->matrix[i - (i / 4)]); + matrix[i] = amdgpu_dm_fixpt_from_s3132(ctm->matrix[i - (i / 4)]); } } @@ -452,7 +464,7 @@ static void __drm_ctm_3x4_to_dc_matrix(const struct drm_color_ctm_3x4 *ctm, */ for (i = 0; i < 12; i++) { /* gamut_remap_matrix[i] = ctm[i - floor(i/4)] */ - matrix[i] = dc_fixpt_from_s3132(ctm->matrix[i]); + matrix[i] = amdgpu_dm_fixpt_from_s3132(ctm->matrix[i]); } } @@ -1136,7 +1148,7 @@ amdgpu_dm_plane_set_color_properties(struct drm_plane_state *plane_state, uint32_t shaper_size, lut3d_size, blend_size; int ret; - dc_plane_state->hdr_mult = dc_fixpt_from_s3132(dm_plane_state->hdr_mult); + dc_plane_state->hdr_mult = amdgpu_dm_fixpt_from_s3132(dm_plane_state->hdr_mult); shaper_lut = __extract_blob_lut(dm_plane_state->shaper_lut, &shaper_size); shaper_size = shaper_lut != NULL ? shaper_size : 0; diff --git a/drivers/gpu/drm/amd/display/include/fixed31_32.h b/drivers/gpu/drm/amd/display/include/fixed31_32.h index 84da1dd34efd..d4cf7ead1d87 100644 --- a/drivers/gpu/drm/amd/display/include/fixed31_32.h +++ b/drivers/gpu/drm/amd/display/include/fixed31_32.h @@ -69,18 +69,6 @@ static const struct fixed31_32 dc_fixpt_epsilon = { 1LL }; static const struct fixed31_32 dc_fixpt_half = { 0x80000000LL }; static const struct fixed31_32 dc_fixpt_one = { 0x100000000LL }; -static inline struct fixed31_32 dc_fixpt_from_s3132(__u64 x) -{ - struct fixed31_32 val; - - /* If negative, convert to 2's complement. */ - if (x & (1ULL << 63)) - x = -(x & ~(1ULL << 63)); - - val.value = x; - return val; -} - /* * @brief * Initialization routines -- cgit v1.2.3 From 16783d8ef08448815e149e40c82fc1e1fc41ddbf Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 3 Jan 2024 11:55:53 -0500 Subject: drm/amdgpu: apply the RV2 system aperture fix to RN/CZN as well These chips needs the same fix. This was previously not seen on then since the AGP aperture expanded the system aperture, but this showed up again when AGP was disabled. Reviewed-and-tested-by: Jiadong Zhu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 4 +++- drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c | 4 +++- drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | 4 +++- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 8 ++++++-- 4 files changed, 15 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index 53a2ba5fcf4b..22175da0e16a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -102,7 +102,9 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); - if (adev->apu_flags & AMD_APU_IS_RAVEN2) + if (adev->apu_flags & (AMD_APU_IS_RAVEN2 | + AMD_APU_IS_RENOIR | + AMD_APU_IS_GREEN_SARDINE)) /* * Raven2 has a HW issue that it is unable to use the * vram which is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c index 55423ff1bb49..95d06da544e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c @@ -139,7 +139,9 @@ gfxhub_v1_2_xcc_init_system_aperture_regs(struct amdgpu_device *adev, WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_SYSTEM_APERTURE_LOW_ADDR, min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); - if (adev->apu_flags & AMD_APU_IS_RAVEN2) + if (adev->apu_flags & (AMD_APU_IS_RAVEN2 | + AMD_APU_IS_RENOIR | + AMD_APU_IS_GREEN_SARDINE)) /* * Raven2 has a HW issue that it is unable to use the * vram which is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index 843219a91736..e3ddd22aa172 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -96,7 +96,9 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); - if (adev->apu_flags & AMD_APU_IS_RAVEN2) + if (adev->apu_flags & (AMD_APU_IS_RAVEN2 | + AMD_APU_IS_RENOIR | + AMD_APU_IS_GREEN_SARDINE)) /* * Raven2 has a HW issue that it is unable to use the vram which * is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. So here is the diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index d9729cf5ceea..10b2a896c498 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1294,7 +1294,9 @@ static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_ /* AGP aperture is disabled */ if (agp_bot > agp_top) { logical_addr_low = adev->gmc.fb_start >> 18; - if (adev->apu_flags & AMD_APU_IS_RAVEN2) + if (adev->apu_flags & (AMD_APU_IS_RAVEN2 | + AMD_APU_IS_RENOIR | + AMD_APU_IS_GREEN_SARDINE)) /* * Raven2 has a HW issue that it is unable to use the vram which * is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. So here is the @@ -1306,7 +1308,9 @@ static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_ logical_addr_high = adev->gmc.fb_end >> 18; } else { logical_addr_low = min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18; - if (adev->apu_flags & AMD_APU_IS_RAVEN2) + if (adev->apu_flags & (AMD_APU_IS_RAVEN2 | + AMD_APU_IS_RENOIR | + AMD_APU_IS_GREEN_SARDINE)) /* * Raven2 has a HW issue that it is unable to use the vram which * is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. So here is the -- cgit v1.2.3 From 3a0fa3bc245ef92838a8296e0055569b8dff94c4 Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Fri, 29 Dec 2023 15:25:00 -0100 Subject: drm/amd/display: fix bandwidth validation failure on DCN 2.1 IGT `amdgpu/amd_color/crtc-lut-accuracy` fails right at the beginning of the test execution, during atomic check, because DC rejects the bandwidth state for a fb sizing 64x64. The test was previously working with the deprecated dc_commit_state(). Now using dc_validate_with_context() approach, the atomic check needs to perform a full state validation. Therefore, set fast_validation to false in the dc_validate_global_state call for atomic check. Cc: stable@vger.kernel.org Fixes: b8272241ff9d ("drm/amd/display: Drop dc_commit_state in favor of dc_commit_streams") Signed-off-by: Melissa Wen Signed-off-by: Hamza Mahfooz Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 10b2a896c498..f6575d7dee97 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -10753,7 +10753,7 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, DRM_DEBUG_DRIVER("drm_dp_mst_atomic_check() failed\n"); goto fail; } - status = dc_validate_global_state(dc, dm_state->context, true); + status = dc_validate_global_state(dc, dm_state->context, false); if (status != DC_OK) { DRM_DEBUG_DRIVER("DC global validation failure: %s (%d)", dc_status_to_str(status), status); -- cgit v1.2.3 From 754d349ed41186e3aba50c3128937be335f9460a Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Fri, 15 Dec 2023 19:41:57 -0500 Subject: drm/amd/display: Allow z8/z10 from driver Copy StutterPeriod from DML2 into DML1 StutterPeriod parameter. Tested-by: Daniel Wheeler Reviewed-by: Muhammad Ahmed Reviewed-by: Aric Cyr Acked-by: Rodrigo Siqueira Signed-off-by: Charlene Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c index 638591ed057d..26307e599614 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c @@ -634,6 +634,8 @@ static bool dml2_validate_and_build_resource(const struct dc *in_dc, struct dc_s dml2_extract_watermark_set(&context->bw_ctx.bw.dcn.watermarks.b, &dml2->v20.dml_core_ctx); memcpy(&context->bw_ctx.bw.dcn.watermarks.c, &dml2->v20.g6_temp_read_watermark_set, sizeof(context->bw_ctx.bw.dcn.watermarks.c)); dml2_extract_watermark_set(&context->bw_ctx.bw.dcn.watermarks.d, &dml2->v20.dml_core_ctx); + //copy for deciding zstate use + context->bw_ctx.dml.vba.StutterPeriod = context->bw_ctx.dml2->v20.dml_core_ctx.mp.StutterPeriod; } return result; -- cgit v1.2.3 From fd37721803c6e73619108f76ad2e12a9aa5fafaf Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 28 Dec 2023 17:47:03 +0300 Subject: mm, treewide: introduce NR_PAGE_ORDERS NR_PAGE_ORDERS defines the number of page orders supported by the page allocator, ranging from 0 to MAX_ORDER, MAX_ORDER + 1 in total. NR_PAGE_ORDERS assists in defining arrays of page orders and allows for more natural iteration over them. [kirill.shutemov@linux.intel.com: fixup for kerneldoc warning] Link: https://lkml.kernel.org/r/20240101111512.7empzyifq7kxtzk3@box Link: https://lkml.kernel.org/r/20231228144704.14033-1-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Reviewed-by: Zi Yan Cc: Linus Torvalds Signed-off-by: Andrew Morton --- Documentation/admin-guide/kdump/vmcoreinfo.rst | 6 +++--- arch/arm64/kvm/hyp/include/nvhe/gfp.h | 2 +- arch/sparc/kernel/traps_64.c | 2 +- drivers/gpu/drm/ttm/tests/ttm_device_test.c | 2 +- drivers/gpu/drm/ttm/ttm_pool.c | 20 ++++++++++---------- include/drm/ttm/ttm_pool.h | 2 +- include/linux/mmzone.h | 6 ++++-- kernel/crash_core.c | 2 +- lib/test_meminit.c | 2 +- mm/compaction.c | 2 +- mm/kmsan/init.c | 2 +- mm/page_alloc.c | 13 ++++++------- mm/page_reporting.c | 2 +- mm/show_mem.c | 8 ++++---- mm/vmstat.c | 12 ++++++------ 15 files changed, 42 insertions(+), 41 deletions(-) (limited to 'drivers/gpu') diff --git a/Documentation/admin-guide/kdump/vmcoreinfo.rst b/Documentation/admin-guide/kdump/vmcoreinfo.rst index 78e4d2e7ba14..3f8769e46b07 100644 --- a/Documentation/admin-guide/kdump/vmcoreinfo.rst +++ b/Documentation/admin-guide/kdump/vmcoreinfo.rst @@ -172,7 +172,7 @@ variables. Offset of the free_list's member. This value is used to compute the number of free pages. -Each zone has a free_area structure array called free_area[MAX_ORDER + 1]. +Each zone has a free_area structure array called free_area[NR_PAGE_ORDERS]. The free_list represents a linked list of free page blocks. (list_head, next|prev) @@ -189,8 +189,8 @@ Offsets of the vmap_area's members. They carry vmalloc-specific information. Makedumpfile gets the start address of the vmalloc region from this. -(zone.free_area, MAX_ORDER + 1) -------------------------------- +(zone.free_area, NR_PAGE_ORDERS) +-------------------------------- Free areas descriptor. User-space tools use this value to iterate the free_area ranges. MAX_ORDER is used by the zone buddy allocator. diff --git a/arch/arm64/kvm/hyp/include/nvhe/gfp.h b/arch/arm64/kvm/hyp/include/nvhe/gfp.h index fe5472a184a3..97c527ef53c2 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/gfp.h +++ b/arch/arm64/kvm/hyp/include/nvhe/gfp.h @@ -16,7 +16,7 @@ struct hyp_pool { * API at EL2. */ hyp_spinlock_t lock; - struct list_head free_area[MAX_ORDER + 1]; + struct list_head free_area[NR_PAGE_ORDERS]; phys_addr_t range_start; phys_addr_t range_end; unsigned short max_order; diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c index 08ffd17d5ec3..523a6e5ee925 100644 --- a/arch/sparc/kernel/traps_64.c +++ b/arch/sparc/kernel/traps_64.c @@ -897,7 +897,7 @@ void __init cheetah_ecache_flush_init(void) /* Now allocate error trap reporting scoreboard. */ sz = NR_CPUS * (2 * sizeof(struct cheetah_err_info)); - for (order = 0; order <= MAX_ORDER; order++) { + for (order = 0; order < NR_PAGE_ORDERS; order++) { if ((PAGE_SIZE << order) >= sz) break; } diff --git a/drivers/gpu/drm/ttm/tests/ttm_device_test.c b/drivers/gpu/drm/ttm/tests/ttm_device_test.c index b1b423b68cdf..19eaff22e6ae 100644 --- a/drivers/gpu/drm/ttm/tests/ttm_device_test.c +++ b/drivers/gpu/drm/ttm/tests/ttm_device_test.c @@ -175,7 +175,7 @@ static void ttm_device_init_pools(struct kunit *test) if (params->pools_init_expected) { for (int i = 0; i < TTM_NUM_CACHING_TYPES; ++i) { - for (int j = 0; j <= MAX_ORDER; ++j) { + for (int j = 0; j < NR_PAGE_ORDERS; ++j) { pt = pool->caching[i].orders[j]; KUNIT_EXPECT_PTR_EQ(test, pt.pool, pool); KUNIT_EXPECT_EQ(test, pt.caching, i); diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index fe610a3cace0..d183bb97c526 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -65,11 +65,11 @@ module_param(page_pool_size, ulong, 0644); static atomic_long_t allocated_pages; -static struct ttm_pool_type global_write_combined[MAX_ORDER + 1]; -static struct ttm_pool_type global_uncached[MAX_ORDER + 1]; +static struct ttm_pool_type global_write_combined[NR_PAGE_ORDERS]; +static struct ttm_pool_type global_uncached[NR_PAGE_ORDERS]; -static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER + 1]; -static struct ttm_pool_type global_dma32_uncached[MAX_ORDER + 1]; +static struct ttm_pool_type global_dma32_write_combined[NR_PAGE_ORDERS]; +static struct ttm_pool_type global_dma32_uncached[NR_PAGE_ORDERS]; static spinlock_t shrinker_lock; static struct list_head shrinker_list; @@ -568,7 +568,7 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev, if (use_dma_alloc || nid != NUMA_NO_NODE) { for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) - for (j = 0; j <= MAX_ORDER; ++j) + for (j = 0; j < NR_PAGE_ORDERS; ++j) ttm_pool_type_init(&pool->caching[i].orders[j], pool, i, j); } @@ -601,7 +601,7 @@ void ttm_pool_fini(struct ttm_pool *pool) if (pool->use_dma_alloc || pool->nid != NUMA_NO_NODE) { for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) - for (j = 0; j <= MAX_ORDER; ++j) + for (j = 0; j < NR_PAGE_ORDERS; ++j) ttm_pool_type_fini(&pool->caching[i].orders[j]); } @@ -656,7 +656,7 @@ static void ttm_pool_debugfs_header(struct seq_file *m) unsigned int i; seq_puts(m, "\t "); - for (i = 0; i <= MAX_ORDER; ++i) + for (i = 0; i < NR_PAGE_ORDERS; ++i) seq_printf(m, " ---%2u---", i); seq_puts(m, "\n"); } @@ -667,7 +667,7 @@ static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt, { unsigned int i; - for (i = 0; i <= MAX_ORDER; ++i) + for (i = 0; i < NR_PAGE_ORDERS; ++i) seq_printf(m, " %8u", ttm_pool_type_count(&pt[i])); seq_puts(m, "\n"); } @@ -776,7 +776,7 @@ int ttm_pool_mgr_init(unsigned long num_pages) spin_lock_init(&shrinker_lock); INIT_LIST_HEAD(&shrinker_list); - for (i = 0; i <= MAX_ORDER; ++i) { + for (i = 0; i < NR_PAGE_ORDERS; ++i) { ttm_pool_type_init(&global_write_combined[i], NULL, ttm_write_combined, i); ttm_pool_type_init(&global_uncached[i], NULL, ttm_uncached, i); @@ -816,7 +816,7 @@ void ttm_pool_mgr_fini(void) { unsigned int i; - for (i = 0; i <= MAX_ORDER; ++i) { + for (i = 0; i < NR_PAGE_ORDERS; ++i) { ttm_pool_type_fini(&global_write_combined[i]); ttm_pool_type_fini(&global_uncached[i]); diff --git a/include/drm/ttm/ttm_pool.h b/include/drm/ttm/ttm_pool.h index 30a347e5aa11..4490d43c63e3 100644 --- a/include/drm/ttm/ttm_pool.h +++ b/include/drm/ttm/ttm_pool.h @@ -74,7 +74,7 @@ struct ttm_pool { bool use_dma32; struct { - struct ttm_pool_type orders[MAX_ORDER + 1]; + struct ttm_pool_type orders[NR_PAGE_ORDERS]; } caching[TTM_NUM_CACHING_TYPES]; }; diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index c18c53353b50..1ea7636dfb76 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -35,6 +35,8 @@ #define IS_MAX_ORDER_ALIGNED(pfn) IS_ALIGNED(pfn, MAX_ORDER_NR_PAGES) +#define NR_PAGE_ORDERS (MAX_ORDER + 1) + /* * PAGE_ALLOC_COSTLY_ORDER is the order at which allocations are deemed * costly to service. That is between allocation orders which should @@ -96,7 +98,7 @@ static inline bool migratetype_is_mergeable(int mt) } #define for_each_migratetype_order(order, type) \ - for (order = 0; order <= MAX_ORDER; order++) \ + for (order = 0; order < NR_PAGE_ORDERS; order++) \ for (type = 0; type < MIGRATE_TYPES; type++) extern int page_group_by_mobility_disabled; @@ -933,7 +935,7 @@ struct zone { CACHELINE_PADDING(_pad1_); /* free areas of different sizes */ - struct free_area free_area[MAX_ORDER + 1]; + struct free_area free_area[NR_PAGE_ORDERS]; #ifdef CONFIG_UNACCEPTED_MEMORY /* Pages to be accepted. All pages on the list are MAX_ORDER */ diff --git a/kernel/crash_core.c b/kernel/crash_core.c index d4313b53837e..56cf4ad7abbb 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -802,7 +802,7 @@ static int __init crash_save_vmcoreinfo_init(void) VMCOREINFO_OFFSET(list_head, prev); VMCOREINFO_OFFSET(vmap_area, va_start); VMCOREINFO_OFFSET(vmap_area, list); - VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER + 1); + VMCOREINFO_LENGTH(zone.free_area, NR_PAGE_ORDERS); log_buf_vmcoreinfo_setup(); VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES); VMCOREINFO_NUMBER(NR_FREE_PAGES); diff --git a/lib/test_meminit.c b/lib/test_meminit.c index 0ae35223d773..0dc173849a54 100644 --- a/lib/test_meminit.c +++ b/lib/test_meminit.c @@ -93,7 +93,7 @@ static int __init test_pages(int *total_failures) int failures = 0, num_tests = 0; int i; - for (i = 0; i <= MAX_ORDER; i++) + for (i = 0; i < NR_PAGE_ORDERS; i++) num_tests += do_alloc_pages_order(i, &failures); REPORT_FAILURES_IN_FN(); diff --git a/mm/compaction.c b/mm/compaction.c index de15a2ef0af5..24f8eb4d6260 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -2229,7 +2229,7 @@ static enum compact_result __compact_finished(struct compact_control *cc) /* Direct compactor: Is a suitable page free? */ ret = COMPACT_NO_SUITABLE_PAGE; - for (order = cc->order; order <= MAX_ORDER; order++) { + for (order = cc->order; order < NR_PAGE_ORDERS; order++) { struct free_area *area = &cc->zone->free_area[order]; bool can_steal; diff --git a/mm/kmsan/init.c b/mm/kmsan/init.c index ffedf4dbc49d..103e2e88ea03 100644 --- a/mm/kmsan/init.c +++ b/mm/kmsan/init.c @@ -96,7 +96,7 @@ void __init kmsan_init_shadow(void) struct metadata_page_pair { struct page *shadow, *origin; }; -static struct metadata_page_pair held_back[MAX_ORDER + 1] __initdata; +static struct metadata_page_pair held_back[NR_PAGE_ORDERS] __initdata; /* * Eager metadata allocation. When the memblock allocator is freeing pages to diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5526797b7f96..ccecf6158ae4 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1560,7 +1560,7 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order, struct page *page; /* Find a page of the appropriate size in the preferred list */ - for (current_order = order; current_order <= MAX_ORDER; ++current_order) { + for (current_order = order; current_order < NR_PAGE_ORDERS; ++current_order) { area = &(zone->free_area[current_order]); page = get_page_from_free_area(area, migratetype); if (!page) @@ -1934,7 +1934,7 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac, continue; spin_lock_irqsave(&zone->lock, flags); - for (order = 0; order <= MAX_ORDER; order++) { + for (order = 0; order < NR_PAGE_ORDERS; order++) { struct free_area *area = &(zone->free_area[order]); page = get_page_from_free_area(area, MIGRATE_HIGHATOMIC); @@ -2044,8 +2044,7 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype, return false; find_smallest: - for (current_order = order; current_order <= MAX_ORDER; - current_order++) { + for (current_order = order; current_order < NR_PAGE_ORDERS; current_order++) { area = &(zone->free_area[current_order]); fallback_mt = find_suitable_fallback(area, current_order, start_migratetype, false, &can_steal); @@ -3000,7 +2999,7 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, return true; /* For a high-order request, check at least one suitable page is free */ - for (o = order; o <= MAX_ORDER; o++) { + for (o = order; o < NR_PAGE_ORDERS; o++) { struct free_area *area = &z->free_area[o]; int mt; @@ -6628,7 +6627,7 @@ bool is_free_buddy_page(struct page *page) unsigned long pfn = page_to_pfn(page); unsigned int order; - for (order = 0; order <= MAX_ORDER; order++) { + for (order = 0; order < NR_PAGE_ORDERS; order++) { struct page *page_head = page - (pfn & ((1 << order) - 1)); if (PageBuddy(page_head) && @@ -6683,7 +6682,7 @@ bool take_page_off_buddy(struct page *page) bool ret = false; spin_lock_irqsave(&zone->lock, flags); - for (order = 0; order <= MAX_ORDER; order++) { + for (order = 0; order < NR_PAGE_ORDERS; order++) { struct page *page_head = page - (pfn & ((1 << order) - 1)); int page_order = buddy_order(page_head); diff --git a/mm/page_reporting.c b/mm/page_reporting.c index b021f482a4cb..66369cc5279b 100644 --- a/mm/page_reporting.c +++ b/mm/page_reporting.c @@ -276,7 +276,7 @@ page_reporting_process_zone(struct page_reporting_dev_info *prdev, return err; /* Process each free list starting from lowest order/mt */ - for (order = page_reporting_order; order <= MAX_ORDER; order++) { + for (order = page_reporting_order; order < NR_PAGE_ORDERS; order++) { for (mt = 0; mt < MIGRATE_TYPES; mt++) { /* We do not pull pages from the isolate free list */ if (is_migrate_isolate(mt)) diff --git a/mm/show_mem.c b/mm/show_mem.c index ba0808d6917f..8dcfafbd283c 100644 --- a/mm/show_mem.c +++ b/mm/show_mem.c @@ -352,8 +352,8 @@ static void show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_z for_each_populated_zone(zone) { unsigned int order; - unsigned long nr[MAX_ORDER + 1], flags, total = 0; - unsigned char types[MAX_ORDER + 1]; + unsigned long nr[NR_PAGE_ORDERS], flags, total = 0; + unsigned char types[NR_PAGE_ORDERS]; if (zone_idx(zone) > max_zone_idx) continue; @@ -363,7 +363,7 @@ static void show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_z printk(KERN_CONT "%s: ", zone->name); spin_lock_irqsave(&zone->lock, flags); - for (order = 0; order <= MAX_ORDER; order++) { + for (order = 0; order < NR_PAGE_ORDERS; order++) { struct free_area *area = &zone->free_area[order]; int type; @@ -377,7 +377,7 @@ static void show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_z } } spin_unlock_irqrestore(&zone->lock, flags); - for (order = 0; order <= MAX_ORDER; order++) { + for (order = 0; order < NR_PAGE_ORDERS; order++) { printk(KERN_CONT "%lu*%lukB ", nr[order], K(1UL) << order); if (nr[order]) diff --git a/mm/vmstat.c b/mm/vmstat.c index 1437ca2f28c5..03ead31c46a0 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1059,7 +1059,7 @@ static void fill_contig_page_info(struct zone *zone, info->free_blocks_total = 0; info->free_blocks_suitable = 0; - for (order = 0; order <= MAX_ORDER; order++) { + for (order = 0; order < NR_PAGE_ORDERS; order++) { unsigned long blocks; /* @@ -1476,7 +1476,7 @@ static void frag_show_print(struct seq_file *m, pg_data_t *pgdat, int order; seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name); - for (order = 0; order <= MAX_ORDER; ++order) + for (order = 0; order < NR_PAGE_ORDERS; ++order) /* * Access to nr_free is lockless as nr_free is used only for * printing purposes. Use data_race to avoid KCSAN warning. @@ -1505,7 +1505,7 @@ static void pagetypeinfo_showfree_print(struct seq_file *m, pgdat->node_id, zone->name, migratetype_names[mtype]); - for (order = 0; order <= MAX_ORDER; ++order) { + for (order = 0; order < NR_PAGE_ORDERS; ++order) { unsigned long freecount = 0; struct free_area *area; struct list_head *curr; @@ -1545,7 +1545,7 @@ static void pagetypeinfo_showfree(struct seq_file *m, void *arg) /* Print header */ seq_printf(m, "%-43s ", "Free pages count per migrate type at order"); - for (order = 0; order <= MAX_ORDER; ++order) + for (order = 0; order < NR_PAGE_ORDERS; ++order) seq_printf(m, "%6d ", order); seq_putc(m, '\n'); @@ -2181,7 +2181,7 @@ static void unusable_show_print(struct seq_file *m, seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name); - for (order = 0; order <= MAX_ORDER; ++order) { + for (order = 0; order < NR_PAGE_ORDERS; ++order) { fill_contig_page_info(zone, order, &info); index = unusable_free_index(order, &info); seq_printf(m, "%d.%03d ", index / 1000, index % 1000); @@ -2233,7 +2233,7 @@ static void extfrag_show_print(struct seq_file *m, seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name); - for (order = 0; order <= MAX_ORDER; ++order) { + for (order = 0; order < NR_PAGE_ORDERS; ++order) { fill_contig_page_info(zone, order, &info); index = __fragmentation_index(order, &info); seq_printf(m, "%2d.%03d ", index / 1000, index % 1000); -- cgit v1.2.3 From 5e0a760b44417f7cadd79de2204d6247109558a0 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 28 Dec 2023 17:47:04 +0300 Subject: mm, treewide: rename MAX_ORDER to MAX_PAGE_ORDER commit 23baf831a32c ("mm, treewide: redefine MAX_ORDER sanely") has changed the definition of MAX_ORDER to be inclusive. This has caused issues with code that was not yet upstream and depended on the previous definition. To draw attention to the altered meaning of the define, rename MAX_ORDER to MAX_PAGE_ORDER. Link: https://lkml.kernel.org/r/20231228144704.14033-2-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Cc: Linus Torvalds Signed-off-by: Andrew Morton --- Documentation/admin-guide/kdump/vmcoreinfo.rst | 2 +- Documentation/admin-guide/kernel-parameters.txt | 24 ++++++++++++------------ Documentation/networking/packet_mmap.rst | 14 +++++++------- arch/arm/Kconfig | 2 +- arch/arm64/Kconfig | 20 ++++++++++---------- arch/arm64/include/asm/sparsemem.h | 2 +- arch/arm64/kvm/hyp/nvhe/page_alloc.c | 3 ++- arch/arm64/mm/hugetlbpage.c | 2 +- arch/m68k/Kconfig.cpu | 2 +- arch/nios2/Kconfig | 2 +- arch/powerpc/Kconfig | 2 +- arch/powerpc/mm/book3s64/iommu_api.c | 2 +- arch/powerpc/mm/hugetlbpage.c | 2 +- arch/powerpc/platforms/powernv/pci-ioda.c | 2 +- arch/sh/mm/Kconfig | 2 +- arch/sparc/Kconfig | 2 +- arch/sparc/kernel/pci_sun4v.c | 2 +- arch/sparc/mm/tsb.c | 4 ++-- arch/um/kernel/um_arch.c | 4 ++-- arch/xtensa/Kconfig | 2 +- drivers/accel/qaic/qaic_data.c | 2 +- drivers/base/regmap/regmap-debugfs.c | 8 ++++---- drivers/block/floppy.c | 2 +- drivers/crypto/ccp/sev-dev.c | 2 +- drivers/crypto/hisilicon/sgl.c | 6 +++--- drivers/gpu/drm/i915/gem/i915_gem_internal.c | 2 +- drivers/gpu/drm/i915/gem/selftests/huge_pages.c | 2 +- drivers/gpu/drm/ttm/tests/ttm_pool_test.c | 8 ++++---- drivers/gpu/drm/ttm/ttm_pool.c | 2 +- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 2 +- drivers/iommu/dma-iommu.c | 2 +- drivers/irqchip/irq-gic-v3-its.c | 4 ++-- drivers/md/dm-bufio.c | 2 +- drivers/md/dm-crypt.c | 2 +- drivers/md/dm-flakey.c | 2 +- drivers/misc/genwqe/card_dev.c | 2 +- drivers/misc/genwqe/card_utils.c | 4 ++-- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 2 +- drivers/net/ethernet/ibm/ibmvnic.h | 4 ++-- drivers/video/fbdev/hyperv_fb.c | 6 +++--- drivers/video/fbdev/vermilion/vermilion.c | 2 +- drivers/virtio/virtio_balloon.c | 2 +- drivers/virtio/virtio_mem.c | 8 ++++---- fs/ramfs/file-nommu.c | 2 +- include/linux/hugetlb.h | 2 +- include/linux/mmzone.h | 14 +++++++------- include/linux/pageblock-flags.h | 4 ++-- include/linux/slab.h | 4 ++-- kernel/dma/pool.c | 6 +++--- kernel/dma/swiotlb.c | 4 ++-- kernel/events/ring_buffer.c | 10 +++++----- mm/Kconfig | 6 +++--- mm/compaction.c | 4 ++-- mm/debug_page_alloc.c | 2 +- mm/debug_vm_pgtable.c | 4 ++-- mm/huge_memory.c | 2 +- mm/hugetlb.c | 4 ++-- mm/internal.h | 2 +- mm/kmsan/init.c | 6 +++--- mm/memblock.c | 7 ++++--- mm/memory_hotplug.c | 9 +++++---- mm/mm_init.c | 22 +++++++++++----------- mm/page_alloc.c | 24 ++++++++++++------------ mm/page_isolation.c | 17 +++++++++-------- mm/page_owner.c | 6 +++--- mm/page_reporting.c | 4 ++-- mm/shuffle.h | 2 +- mm/slab.c | 2 +- mm/slub.c | 4 ++-- mm/vmscan.c | 2 +- mm/vmstat.c | 2 +- net/smc/smc_ib.c | 2 +- security/integrity/ima/ima_crypto.c | 2 +- tools/perf/Documentation/perf-intel-pt.txt | 2 +- tools/testing/memblock/linux/mmzone.h | 6 +++--- tools/testing/selftests/mm/thuge-gen.c | 3 ++- 76 files changed, 186 insertions(+), 181 deletions(-) (limited to 'drivers/gpu') diff --git a/Documentation/admin-guide/kdump/vmcoreinfo.rst b/Documentation/admin-guide/kdump/vmcoreinfo.rst index 3f8769e46b07..bced9e4b6e08 100644 --- a/Documentation/admin-guide/kdump/vmcoreinfo.rst +++ b/Documentation/admin-guide/kdump/vmcoreinfo.rst @@ -193,7 +193,7 @@ from this. -------------------------------- Free areas descriptor. User-space tools use this value to iterate the -free_area ranges. MAX_ORDER is used by the zone buddy allocator. +free_area ranges. NR_PAGE_ORDERS is used by the zone buddy allocator. prb --- diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 65731b060e3f..8a01b8112f0b 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -970,17 +970,17 @@ buddy allocator. Bigger value increase the probability of catching random memory corruption, but reduce the amount of memory for normal system use. The maximum - possible value is MAX_ORDER/2. Setting this parameter - to 1 or 2 should be enough to identify most random - memory corruption problems caused by bugs in kernel or - driver code when a CPU writes to (or reads from) a - random memory location. Note that there exists a class - of memory corruptions problems caused by buggy H/W or - F/W or by drivers badly programming DMA (basically when - memory is written at bus level and the CPU MMU is - bypassed) which are not detectable by - CONFIG_DEBUG_PAGEALLOC, hence this option will not help - tracking down these problems. + possible value is MAX_PAGE_ORDER/2. Setting this + parameter to 1 or 2 should be enough to identify most + random memory corruption problems caused by bugs in + kernel or driver code when a CPU writes to (or reads + from) a random memory location. Note that there exists + a class of memory corruptions problems caused by buggy + H/W or F/W or by drivers badly programming DMA + (basically when memory is written at bus level and the + CPU MMU is bypassed) which are not detectable by + CONFIG_DEBUG_PAGEALLOC, hence this option will not + help tracking down these problems. debug_pagealloc= [KNL] When CONFIG_DEBUG_PAGEALLOC is set, this parameter @@ -4136,7 +4136,7 @@ [KNL] Minimal page reporting order Format: Adjust the minimal page reporting order. The page - reporting is disabled when it exceeds MAX_ORDER. + reporting is disabled when it exceeds MAX_PAGE_ORDER. panic= [KNL] Kernel behaviour on panic: delay timeout > 0: seconds before rebooting diff --git a/Documentation/networking/packet_mmap.rst b/Documentation/networking/packet_mmap.rst index 30a3be3c48f3..dca15d15feaf 100644 --- a/Documentation/networking/packet_mmap.rst +++ b/Documentation/networking/packet_mmap.rst @@ -263,20 +263,20 @@ the name indicates, this function allocates pages of memory, and the second argument is "order" or a power of two number of pages, that is (for PAGE_SIZE == 4096) order=0 ==> 4096 bytes, order=1 ==> 8192 bytes, order=2 ==> 16384 bytes, etc. The maximum size of a -region allocated by __get_free_pages is determined by the MAX_ORDER macro. More -precisely the limit can be calculated as:: +region allocated by __get_free_pages is determined by the MAX_PAGE_ORDER macro. +More precisely the limit can be calculated as:: - PAGE_SIZE << MAX_ORDER + PAGE_SIZE << MAX_PAGE_ORDER In a i386 architecture PAGE_SIZE is 4096 bytes - In a 2.4/i386 kernel MAX_ORDER is 10 - In a 2.6/i386 kernel MAX_ORDER is 11 + In a 2.4/i386 kernel MAX_PAGE_ORDER is 10 + In a 2.6/i386 kernel MAX_PAGE_ORDER is 11 So get_free_pages can allocate as much as 4MB or 8MB in a 2.4/2.6 kernel respectively, with an i386 architecture. User space programs can include /usr/include/sys/user.h and -/usr/include/linux/mmzone.h to get PAGE_SIZE MAX_ORDER declarations. +/usr/include/linux/mmzone.h to get PAGE_SIZE MAX_PAGE_ORDER declarations. The pagesize can also be determined dynamically with the getpagesize (2) system call. @@ -324,7 +324,7 @@ Definitions: (see /proc/slabinfo) depends on the architecture -- ``sizeof(void *)`` depends on the architecture -- PAGE_SIZE or getpagesize (2) - is the value defined with MAX_ORDER + is the value defined with MAX_PAGE_ORDER it's an upper bound of frame's capture size (more on this later) ============== ================================================================ diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index f8567e95f98b..b2ab8db63c4b 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1362,7 +1362,7 @@ config ARCH_FORCE_MAX_ORDER default "10" help The kernel page allocator limits the size of maximal physically - contiguous allocations. The limit is called MAX_ORDER and it + contiguous allocations. The limit is called MAX_PAGE_ORDER and it defines the maximal power of two of number of pages that can be allocated as a single contiguous block. This option allows overriding the default setting when ability to allocate very diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 12d611f3da5d..442539fd06fe 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1520,15 +1520,15 @@ config XEN # include/linux/mmzone.h requires the following to be true: # -# MAX_ORDER + PAGE_SHIFT <= SECTION_SIZE_BITS +# MAX_PAGE_ORDER + PAGE_SHIFT <= SECTION_SIZE_BITS # -# so the maximum value of MAX_ORDER is SECTION_SIZE_BITS - PAGE_SHIFT: +# so the maximum value of MAX_PAGE_ORDER is SECTION_SIZE_BITS - PAGE_SHIFT: # -# | SECTION_SIZE_BITS | PAGE_SHIFT | max MAX_ORDER | default MAX_ORDER | -# ----+-------------------+--------------+-----------------+--------------------+ -# 4K | 27 | 12 | 15 | 10 | -# 16K | 27 | 14 | 13 | 11 | -# 64K | 29 | 16 | 13 | 13 | +# | SECTION_SIZE_BITS | PAGE_SHIFT | max MAX_PAGE_ORDER | default MAX_PAGE_ORDER | +# ----+-------------------+--------------+----------------------+-------------------------+ +# 4K | 27 | 12 | 15 | 10 | +# 16K | 27 | 14 | 13 | 11 | +# 64K | 29 | 16 | 13 | 13 | config ARCH_FORCE_MAX_ORDER int default "13" if ARM64_64K_PAGES @@ -1536,16 +1536,16 @@ config ARCH_FORCE_MAX_ORDER default "10" help The kernel page allocator limits the size of maximal physically - contiguous allocations. The limit is called MAX_ORDER and it + contiguous allocations. The limit is called MAX_PAGE_ORDER and it defines the maximal power of two of number of pages that can be allocated as a single contiguous block. This option allows overriding the default setting when ability to allocate very large blocks of physically contiguous memory is required. The maximal size of allocation cannot exceed the size of the - section, so the value of MAX_ORDER should satisfy + section, so the value of MAX_PAGE_ORDER should satisfy - MAX_ORDER + PAGE_SHIFT <= SECTION_SIZE_BITS + MAX_PAGE_ORDER + PAGE_SHIFT <= SECTION_SIZE_BITS Don't change if unsure. diff --git a/arch/arm64/include/asm/sparsemem.h b/arch/arm64/include/asm/sparsemem.h index 5f5437621029..8a8acc220371 100644 --- a/arch/arm64/include/asm/sparsemem.h +++ b/arch/arm64/include/asm/sparsemem.h @@ -10,7 +10,7 @@ /* * Section size must be at least 512MB for 64K base * page size config. Otherwise it will be less than - * MAX_ORDER and the build process will fail. + * MAX_PAGE_ORDER and the build process will fail. */ #ifdef CONFIG_ARM64_64K_PAGES #define SECTION_SIZE_BITS 29 diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c index b1e392186a0f..e691290d3765 100644 --- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c +++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c @@ -228,7 +228,8 @@ int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages, int i; hyp_spin_lock_init(&pool->lock); - pool->max_order = min(MAX_ORDER, get_order(nr_pages << PAGE_SHIFT)); + pool->max_order = min(MAX_PAGE_ORDER, + get_order(nr_pages << PAGE_SHIFT)); for (i = 0; i <= pool->max_order; i++) INIT_LIST_HEAD(&pool->free_area[i]); pool->range_start = phys; diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index f5aae342632c..8116ac599f80 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -51,7 +51,7 @@ void __init arm64_hugetlb_cma_reserve(void) * page allocator. Just warn if there is any change * breaking this assumption. */ - WARN_ON(order <= MAX_ORDER); + WARN_ON(order <= MAX_PAGE_ORDER); hugetlb_cma_reserve(order); } #endif /* CONFIG_CMA */ diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu index ad69b466a08b..9dcf245c9cbf 100644 --- a/arch/m68k/Kconfig.cpu +++ b/arch/m68k/Kconfig.cpu @@ -402,7 +402,7 @@ config ARCH_FORCE_MAX_ORDER default "10" help The kernel page allocator limits the size of maximal physically - contiguous allocations. The limit is called MAX_ORDER and it + contiguous allocations. The limit is called MAX_PAGE_ORDER and it defines the maximal power of two of number of pages that can be allocated as a single contiguous block. This option allows overriding the default setting when ability to allocate very diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig index d54464021a61..58d9565dc2c7 100644 --- a/arch/nios2/Kconfig +++ b/arch/nios2/Kconfig @@ -50,7 +50,7 @@ config ARCH_FORCE_MAX_ORDER default "10" help The kernel page allocator limits the size of maximal physically - contiguous allocations. The limit is called MAX_ORDER and it + contiguous allocations. The limit is called MAX_PAGE_ORDER and it defines the maximal power of two of number of pages that can be allocated as a single contiguous block. This option allows overriding the default setting when ability to allocate very diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 1f11a62809f2..52d7e3fad553 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -915,7 +915,7 @@ config ARCH_FORCE_MAX_ORDER default "10" help The kernel page allocator limits the size of maximal physically - contiguous allocations. The limit is called MAX_ORDER and it + contiguous allocations. The limit is called MAX_PAGE_ORDER and it defines the maximal power of two of number of pages that can be allocated as a single contiguous block. This option allows overriding the default setting when ability to allocate very diff --git a/arch/powerpc/mm/book3s64/iommu_api.c b/arch/powerpc/mm/book3s64/iommu_api.c index d19fb1f3007d..c0e8d597e4cb 100644 --- a/arch/powerpc/mm/book3s64/iommu_api.c +++ b/arch/powerpc/mm/book3s64/iommu_api.c @@ -97,7 +97,7 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua, } mmap_read_lock(mm); - chunk = (1UL << (PAGE_SHIFT + MAX_ORDER)) / + chunk = (1UL << (PAGE_SHIFT + MAX_PAGE_ORDER)) / sizeof(struct vm_area_struct *); chunk = min(chunk, entries); for (entry = 0; entry < entries; entry += chunk) { diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index f7c683b672c1..0a540b37aab6 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -615,7 +615,7 @@ void __init gigantic_hugetlb_cma_reserve(void) order = mmu_psize_to_shift(MMU_PAGE_16G) - PAGE_SHIFT; if (order) { - VM_WARN_ON(order <= MAX_ORDER); + VM_WARN_ON(order <= MAX_PAGE_ORDER); hugetlb_cma_reserve(order); } } diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 28fac4770073..23f5b5093ec1 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1389,7 +1389,7 @@ static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe) * DMA window can be larger than available memory, which will * cause errors later. */ - const u64 maxblock = 1UL << (PAGE_SHIFT + MAX_ORDER); + const u64 maxblock = 1UL << (PAGE_SHIFT + MAX_PAGE_ORDER); /* * We create the default window as big as we can. The constraint is diff --git a/arch/sh/mm/Kconfig b/arch/sh/mm/Kconfig index 511c17aede4a..455311d9a5e9 100644 --- a/arch/sh/mm/Kconfig +++ b/arch/sh/mm/Kconfig @@ -26,7 +26,7 @@ config ARCH_FORCE_MAX_ORDER default "10" help The kernel page allocator limits the size of maximal physically - contiguous allocations. The limit is called MAX_ORDER and it + contiguous allocations. The limit is called MAX_PAGE:_ORDER and it defines the maximal power of two of number of pages that can be allocated as a single contiguous block. This option allows overriding the default setting when ability to allocate very diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 49849790e66d..204c43cb3d43 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -277,7 +277,7 @@ config ARCH_FORCE_MAX_ORDER default "12" help The kernel page allocator limits the size of maximal physically - contiguous allocations. The limit is called MAX_ORDER and it + contiguous allocations. The limit is called MAX_PAGE_ORDER and it defines the maximal power of two of number of pages that can be allocated as a single contiguous block. This option allows overriding the default setting when ability to allocate very diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c index c80b0a21d709..083e5f05a7f0 100644 --- a/arch/sparc/kernel/pci_sun4v.c +++ b/arch/sparc/kernel/pci_sun4v.c @@ -194,7 +194,7 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size, size = IO_PAGE_ALIGN(size); order = get_order(size); - if (unlikely(order > MAX_ORDER)) + if (unlikely(order > MAX_PAGE_ORDER)) return NULL; npages = size >> IO_PAGE_SHIFT; diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c index 5e2931a18409..6acd8a4c1e2a 100644 --- a/arch/sparc/mm/tsb.c +++ b/arch/sparc/mm/tsb.c @@ -402,8 +402,8 @@ void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long rss) unsigned long new_rss_limit; gfp_t gfp_flags; - if (max_tsb_size > PAGE_SIZE << MAX_ORDER) - max_tsb_size = PAGE_SIZE << MAX_ORDER; + if (max_tsb_size > PAGE_SIZE << MAX_PAGE_ORDER) + max_tsb_size = PAGE_SIZE << MAX_PAGE_ORDER; new_cache_index = 0; for (new_size = 8192; new_size < max_tsb_size; new_size <<= 1UL) { diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index b1bfed0c8528..7a9820797eae 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -373,10 +373,10 @@ int __init linux_main(int argc, char **argv) max_physmem = TASK_SIZE - uml_physmem - iomem_size - MIN_VMALLOC; /* - * Zones have to begin on a 1 << MAX_ORDER page boundary, + * Zones have to begin on a 1 << MAX_PAGE_ORDER page boundary, * so this makes sure that's true for highmem */ - max_physmem &= ~((1 << (PAGE_SHIFT + MAX_ORDER)) - 1); + max_physmem &= ~((1 << (PAGE_SHIFT + MAX_PAGE_ORDER)) - 1); if (physmem_size + iomem_size > max_physmem) { highmem = physmem_size + iomem_size - max_physmem; physmem_size -= highmem; diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 7d792077e5fd..e031eaf36c99 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -793,7 +793,7 @@ config ARCH_FORCE_MAX_ORDER default "10" help The kernel page allocator limits the size of maximal physically - contiguous allocations. The limit is called MAX_ORDER and it + contiguous allocations. The limit is called MAX_PAGE_ORDER and it defines the maximal power of two of number of pages that can be allocated as a single contiguous block. This option allows overriding the default setting when ability to allocate very diff --git a/drivers/accel/qaic/qaic_data.c b/drivers/accel/qaic/qaic_data.c index 4a8e43a7a6a4..aaeb2c9c071a 100644 --- a/drivers/accel/qaic/qaic_data.c +++ b/drivers/accel/qaic/qaic_data.c @@ -451,7 +451,7 @@ static int create_sgt(struct qaic_device *qdev, struct sg_table **sgt_out, u64 s * later */ buf_extra = (PAGE_SIZE - size % PAGE_SIZE) % PAGE_SIZE; - max_order = min(MAX_ORDER - 1, get_order(size)); + max_order = min(MAX_PAGE_ORDER - 1, get_order(size)); } else { /* allocate a single page for book keeping */ nr_pages = 1; diff --git a/drivers/base/regmap/regmap-debugfs.c b/drivers/base/regmap/regmap-debugfs.c index bdd80b73c3e6..fb84cda92a75 100644 --- a/drivers/base/regmap/regmap-debugfs.c +++ b/drivers/base/regmap/regmap-debugfs.c @@ -226,8 +226,8 @@ static ssize_t regmap_read_debugfs(struct regmap *map, unsigned int from, if (*ppos < 0 || !count) return -EINVAL; - if (count > (PAGE_SIZE << MAX_ORDER)) - count = PAGE_SIZE << MAX_ORDER; + if (count > (PAGE_SIZE << MAX_PAGE_ORDER)) + count = PAGE_SIZE << MAX_PAGE_ORDER; buf = kmalloc(count, GFP_KERNEL); if (!buf) @@ -373,8 +373,8 @@ static ssize_t regmap_reg_ranges_read_file(struct file *file, if (*ppos < 0 || !count) return -EINVAL; - if (count > (PAGE_SIZE << MAX_ORDER)) - count = PAGE_SIZE << MAX_ORDER; + if (count > (PAGE_SIZE << MAX_PAGE_ORDER)) + count = PAGE_SIZE << MAX_PAGE_ORDER; buf = kmalloc(count, GFP_KERNEL); if (!buf) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 11114a5d9e5c..d0e41d52d6a9 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -3079,7 +3079,7 @@ static void raw_cmd_free(struct floppy_raw_cmd **ptr) } } -#define MAX_LEN (1UL << MAX_ORDER << PAGE_SHIFT) +#define MAX_LEN (1UL << MAX_PAGE_ORDER << PAGE_SHIFT) static int raw_cmd_copyin(int cmd, void __user *param, struct floppy_raw_cmd **rcmd) diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index fcaccd0b5a65..e4d3f45242f6 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -906,7 +906,7 @@ static int sev_ioctl_do_get_id2(struct sev_issue_cmd *argp) /* * The length of the ID shouldn't be assumed by software since * it may change in the future. The allocation size is limited - * to 1 << (PAGE_SHIFT + MAX_ORDER) by the page allocator. + * to 1 << (PAGE_SHIFT + MAX_PAGE_ORDER) by the page allocator. * If the allocation fails, simply return ENOMEM rather than * warning in the kernel log. */ diff --git a/drivers/crypto/hisilicon/sgl.c b/drivers/crypto/hisilicon/sgl.c index 3df7a256e919..5c1012d7ffa9 100644 --- a/drivers/crypto/hisilicon/sgl.c +++ b/drivers/crypto/hisilicon/sgl.c @@ -70,11 +70,11 @@ struct hisi_acc_sgl_pool *hisi_acc_create_sgl_pool(struct device *dev, HISI_ACC_SGL_ALIGN_SIZE); /* - * the pool may allocate a block of memory of size PAGE_SIZE * 2^MAX_ORDER, + * the pool may allocate a block of memory of size PAGE_SIZE * 2^MAX_PAGE_ORDER, * block size may exceed 2^31 on ia64, so the max of block size is 2^31 */ - block_size = 1 << (PAGE_SHIFT + MAX_ORDER < 32 ? - PAGE_SHIFT + MAX_ORDER : 31); + block_size = 1 << (PAGE_SHIFT + MAX_PAGE_ORDER < 32 ? + PAGE_SHIFT + MAX_PAGE_ORDER : 31); sgl_num_per_block = block_size / sgl_size; block_num = count / sgl_num_per_block; remain_sgl = count % sgl_num_per_block; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c index 6bc26b4b06b8..ea7561ae6e13 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -36,7 +36,7 @@ static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) struct sg_table *st; struct scatterlist *sg; unsigned int npages; /* restricted by sg_alloc_table */ - int max_order = MAX_ORDER; + int max_order = MAX_PAGE_ORDER; unsigned int max_segment; gfp_t gfp; diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index 6b9f6cf50bf6..84c50c4c4af7 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -115,7 +115,7 @@ static int get_huge_pages(struct drm_i915_gem_object *obj) do { struct page *page; - GEM_BUG_ON(order > MAX_ORDER); + GEM_BUG_ON(order > MAX_PAGE_ORDER); page = alloc_pages(GFP | __GFP_ZERO, order); if (!page) goto err; diff --git a/drivers/gpu/drm/ttm/tests/ttm_pool_test.c b/drivers/gpu/drm/ttm/tests/ttm_pool_test.c index 2d9cae8cd984..cceaa18d4e46 100644 --- a/drivers/gpu/drm/ttm/tests/ttm_pool_test.c +++ b/drivers/gpu/drm/ttm/tests/ttm_pool_test.c @@ -109,7 +109,7 @@ static const struct ttm_pool_test_case ttm_pool_basic_cases[] = { }, { .description = "Above the allocation limit", - .order = MAX_ORDER + 1, + .order = MAX_PAGE_ORDER + 1, }, { .description = "One page, with coherent DMA mappings enabled", @@ -118,7 +118,7 @@ static const struct ttm_pool_test_case ttm_pool_basic_cases[] = { }, { .description = "Above the allocation limit, with coherent DMA mappings enabled", - .order = MAX_ORDER + 1, + .order = MAX_PAGE_ORDER + 1, .use_dma_alloc = true, }, }; @@ -165,7 +165,7 @@ static void ttm_pool_alloc_basic(struct kunit *test) fst_page = tt->pages[0]; last_page = tt->pages[tt->num_pages - 1]; - if (params->order <= MAX_ORDER) { + if (params->order <= MAX_PAGE_ORDER) { if (params->use_dma_alloc) { KUNIT_ASSERT_NOT_NULL(test, (void *)fst_page->private); KUNIT_ASSERT_NOT_NULL(test, (void *)last_page->private); @@ -182,7 +182,7 @@ static void ttm_pool_alloc_basic(struct kunit *test) * order 0 blocks */ KUNIT_ASSERT_EQ(test, fst_page->private, - min_t(unsigned int, MAX_ORDER, + min_t(unsigned int, MAX_PAGE_ORDER, params->order)); KUNIT_ASSERT_EQ(test, last_page->private, 0); } diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index d183bb97c526..b62f420a9f96 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -447,7 +447,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt, else gfp_flags |= GFP_HIGHUSER; - for (order = min_t(unsigned int, MAX_ORDER, __fls(num_pages)); + for (order = min_t(unsigned int, MAX_PAGE_ORDER, __fls(num_pages)); num_pages; order = min_t(unsigned int, order, __fls(num_pages))) { struct ttm_pool_type *pt; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 961205ba86d2..925ac6a47bce 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -188,7 +188,7 @@ #ifdef CONFIG_CMA_ALIGNMENT #define Q_MAX_SZ_SHIFT (PAGE_SHIFT + CONFIG_CMA_ALIGNMENT) #else -#define Q_MAX_SZ_SHIFT (PAGE_SHIFT + MAX_ORDER) +#define Q_MAX_SZ_SHIFT (PAGE_SHIFT + MAX_PAGE_ORDER) #endif /* diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 85163a83df2f..e59f50e11ea8 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -884,7 +884,7 @@ static struct page **__iommu_dma_alloc_pages(struct device *dev, struct page **pages; unsigned int i = 0, nid = dev_to_node(dev); - order_mask &= GENMASK(MAX_ORDER, 0); + order_mask &= GENMASK(MAX_PAGE_ORDER, 0); if (!order_mask) return NULL; diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 9a7a74239eab..d097001c1e3e 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -2465,8 +2465,8 @@ static bool its_parse_indirect_baser(struct its_node *its, * feature is not supported by hardware. */ new_order = max_t(u32, get_order(esz << ids), new_order); - if (new_order > MAX_ORDER) { - new_order = MAX_ORDER; + if (new_order > MAX_PAGE_ORDER) { + new_order = MAX_PAGE_ORDER; ids = ilog2(PAGE_ORDER_TO_SIZE(new_order) / (int)esz); pr_warn("ITS@%pa: %s Table too large, reduce ids %llu->%u\n", &its->phys_base, its_base_type_string[type], diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index f03d7dba270c..13c65b7e1ed6 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -1170,7 +1170,7 @@ static void __cache_size_refresh(void) * If the allocation may fail we use __get_free_pages. Memory fragmentation * won't have a fatal effect here, but it just causes flushes of some other * buffers and more I/O will be performed. Don't use __get_free_pages if it - * always fails (i.e. order > MAX_ORDER). + * always fails (i.e. order > MAX_PAGE_ORDER). * * If the allocation shouldn't fail we use __vmalloc. This is only for the * initial reserve allocation, so there's no risk of wasting all vmalloc diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 2ae8560b6a14..855b482cbff1 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1673,7 +1673,7 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned int size) unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; gfp_t gfp_mask = GFP_NOWAIT | __GFP_HIGHMEM; unsigned int remaining_size; - unsigned int order = MAX_ORDER; + unsigned int order = MAX_PAGE_ORDER; retry: if (unlikely(gfp_mask & __GFP_DIRECT_RECLAIM)) diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index f57fb821528d..7916ed9f10e8 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -434,7 +434,7 @@ static struct bio *clone_bio(struct dm_target *ti, struct flakey_c *fc, struct b remaining_size = size; - order = MAX_ORDER; + order = MAX_PAGE_ORDER; while (remaining_size) { struct page *pages; unsigned size_to_add, to_copy; diff --git a/drivers/misc/genwqe/card_dev.c b/drivers/misc/genwqe/card_dev.c index 55fc5b80e649..4441aca2280a 100644 --- a/drivers/misc/genwqe/card_dev.c +++ b/drivers/misc/genwqe/card_dev.c @@ -443,7 +443,7 @@ static int genwqe_mmap(struct file *filp, struct vm_area_struct *vma) if (vsize == 0) return -EINVAL; - if (get_order(vsize) > MAX_ORDER) + if (get_order(vsize) > MAX_PAGE_ORDER) return -ENOMEM; dma_map = kzalloc(sizeof(struct dma_mapping), GFP_KERNEL); diff --git a/drivers/misc/genwqe/card_utils.c b/drivers/misc/genwqe/card_utils.c index 1c798d6b2dfb..a2c4a9b4f871 100644 --- a/drivers/misc/genwqe/card_utils.c +++ b/drivers/misc/genwqe/card_utils.c @@ -210,7 +210,7 @@ u32 genwqe_crc32(u8 *buff, size_t len, u32 init) void *__genwqe_alloc_consistent(struct genwqe_dev *cd, size_t size, dma_addr_t *dma_handle) { - if (get_order(size) > MAX_ORDER) + if (get_order(size) > MAX_PAGE_ORDER) return NULL; return dma_alloc_coherent(&cd->pci_dev->dev, size, dma_handle, @@ -308,7 +308,7 @@ int genwqe_alloc_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl, sgl->write = write; sgl->sgl_size = genwqe_sgl_size(sgl->nr_pages); - if (get_order(sgl->sgl_size) > MAX_ORDER) { + if (get_order(sgl->sgl_size) > MAX_PAGE_ORDER) { dev_err(&pci_dev->dev, "[%s] err: too much memory requested!\n", __func__); return ret; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index b618797a7e8d..f1695c889d3a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -1041,7 +1041,7 @@ static void hns3_init_tx_spare_buffer(struct hns3_enet_ring *ring) return; order = get_order(alloc_size); - if (order > MAX_ORDER) { + if (order > MAX_PAGE_ORDER) { if (net_ratelimit()) dev_warn(ring_to_dev(ring), "failed to allocate tx spare buffer, exceed to max order\n"); return; diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index 4e18b4cefa97..94ac36b1408b 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -48,7 +48,7 @@ * of 4096 jumbo frames (MTU=9000) we will need about 9K*4K = 36MB plus * some padding. * - * But the size of a single DMA region is limited by MAX_ORDER in the + * But the size of a single DMA region is limited by MAX_PAGE_ORDER in the * kernel (about 16MB currently). To support say 4K Jumbo frames, we * use a set of LTBs (struct ltb_set) per pool. * @@ -75,7 +75,7 @@ * pool for the 4MB. Thus the 16 Rx and Tx queues require 32 * 5 = 160 * plus 16 for the TSO pools for a total of 176 LTB mappings per VNIC. */ -#define IBMVNIC_ONE_LTB_MAX ((u32)((1 << MAX_ORDER) * PAGE_SIZE)) +#define IBMVNIC_ONE_LTB_MAX ((u32)((1 << MAX_PAGE_ORDER) * PAGE_SIZE)) #define IBMVNIC_ONE_LTB_SIZE min((u32)(8 << 20), IBMVNIC_ONE_LTB_MAX) #define IBMVNIC_LTB_SET_SIZE (38 << 20) diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c index a80939fe2ee6..6a29d2594b91 100644 --- a/drivers/video/fbdev/hyperv_fb.c +++ b/drivers/video/fbdev/hyperv_fb.c @@ -927,8 +927,8 @@ static phys_addr_t hvfb_get_phymem(struct hv_device *hdev, if (request_size == 0) return -1; - if (order <= MAX_ORDER) { - /* Call alloc_pages if the size is less than 2^MAX_ORDER */ + if (order <= MAX_PAGE_ORDER) { + /* Call alloc_pages if the size is less than 2^MAX_PAGE_ORDER */ page = alloc_pages(GFP_KERNEL | __GFP_ZERO, order); if (!page) return -1; @@ -958,7 +958,7 @@ static void hvfb_release_phymem(struct hv_device *hdev, { unsigned int order = get_order(size); - if (order <= MAX_ORDER) + if (order <= MAX_PAGE_ORDER) __free_pages(pfn_to_page(paddr >> PAGE_SHIFT), order); else dma_free_coherent(&hdev->device, diff --git a/drivers/video/fbdev/vermilion/vermilion.c b/drivers/video/fbdev/vermilion/vermilion.c index 840ead69654b..a32e5b2924c9 100644 --- a/drivers/video/fbdev/vermilion/vermilion.c +++ b/drivers/video/fbdev/vermilion/vermilion.c @@ -197,7 +197,7 @@ static int vmlfb_alloc_vram(struct vml_info *vinfo, va = &vinfo->vram[i]; order = 0; - while (requested > (PAGE_SIZE << order) && order <= MAX_ORDER) + while (requested > (PAGE_SIZE << order) && order <= MAX_PAGE_ORDER) order++; err = vmlfb_alloc_vram_area(va, order, 0); diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 1fe93e93f5bc..59cdc0292dce 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -33,7 +33,7 @@ #define VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG (__GFP_NORETRY | __GFP_NOWARN | \ __GFP_NOMEMALLOC) /* The order of free page blocks to report to host */ -#define VIRTIO_BALLOON_HINT_BLOCK_ORDER MAX_ORDER +#define VIRTIO_BALLOON_HINT_BLOCK_ORDER MAX_PAGE_ORDER /* The size of a free page block in bytes */ #define VIRTIO_BALLOON_HINT_BLOCK_BYTES \ (1 << (VIRTIO_BALLOON_HINT_BLOCK_ORDER + PAGE_SHIFT)) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index fa5226c198cc..8e3223294442 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -1154,13 +1154,13 @@ static void virtio_mem_clear_fake_offline(unsigned long pfn, */ static void virtio_mem_fake_online(unsigned long pfn, unsigned long nr_pages) { - unsigned long order = MAX_ORDER; + unsigned long order = MAX_PAGE_ORDER; unsigned long i; /* * We might get called for ranges that don't cover properly aligned - * MAX_ORDER pages; however, we can only online properly aligned - * pages with an order of MAX_ORDER at maximum. + * MAX_PAGE_ORDER pages; however, we can only online properly aligned + * pages with an order of MAX_PAGE_ORDER at maximum. */ while (!IS_ALIGNED(pfn | nr_pages, 1 << order)) order--; @@ -1280,7 +1280,7 @@ static void virtio_mem_online_page(struct virtio_mem *vm, bool do_online; /* - * We can get called with any order up to MAX_ORDER. If our subblock + * We can get called with any order up to MAX_PAGE_ORDER. If our subblock * size is smaller than that and we have a mixture of plugged and * unplugged subblocks within such a page, we have to process in * smaller granularity. In that case we'll adjust the order exactly once diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c index efb1b4c1a0a4..7a6d980e614d 100644 --- a/fs/ramfs/file-nommu.c +++ b/fs/ramfs/file-nommu.c @@ -70,7 +70,7 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize) /* make various checks */ order = get_order(newsize); - if (unlikely(order > MAX_ORDER)) + if (unlikely(order > MAX_PAGE_ORDER)) return -EFBIG; ret = inode_newsize_ok(inode, newsize); diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 236ec7b63c54..c1ee640d87b1 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -829,7 +829,7 @@ static inline unsigned huge_page_shift(struct hstate *h) static inline bool hstate_is_gigantic(struct hstate *h) { - return huge_page_order(h) > MAX_ORDER; + return huge_page_order(h) > MAX_PAGE_ORDER; } static inline unsigned int pages_per_huge_page(const struct hstate *h) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 1ea7636dfb76..4ed33b127821 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -27,15 +27,15 @@ /* Free memory management - zoned buddy allocator. */ #ifndef CONFIG_ARCH_FORCE_MAX_ORDER -#define MAX_ORDER 10 +#define MAX_PAGE_ORDER 10 #else -#define MAX_ORDER CONFIG_ARCH_FORCE_MAX_ORDER +#define MAX_PAGE_ORDER CONFIG_ARCH_FORCE_MAX_ORDER #endif -#define MAX_ORDER_NR_PAGES (1 << MAX_ORDER) +#define MAX_ORDER_NR_PAGES (1 << MAX_PAGE_ORDER) #define IS_MAX_ORDER_ALIGNED(pfn) IS_ALIGNED(pfn, MAX_ORDER_NR_PAGES) -#define NR_PAGE_ORDERS (MAX_ORDER + 1) +#define NR_PAGE_ORDERS (MAX_PAGE_ORDER + 1) /* * PAGE_ALLOC_COSTLY_ORDER is the order at which allocations are deemed @@ -938,7 +938,7 @@ struct zone { struct free_area free_area[NR_PAGE_ORDERS]; #ifdef CONFIG_UNACCEPTED_MEMORY - /* Pages to be accepted. All pages on the list are MAX_ORDER */ + /* Pages to be accepted. All pages on the list are MAX_PAGE_ORDER */ struct list_head unaccepted_pages; #endif @@ -1748,8 +1748,8 @@ static inline bool movable_only_nodes(nodemask_t *nodes) #define SECTION_BLOCKFLAGS_BITS \ ((1UL << (PFN_SECTION_SHIFT - pageblock_order)) * NR_PAGEBLOCK_BITS) -#if (MAX_ORDER + PAGE_SHIFT) > SECTION_SIZE_BITS -#error Allocator MAX_ORDER exceeds SECTION_SIZE +#if (MAX_PAGE_ORDER + PAGE_SHIFT) > SECTION_SIZE_BITS +#error Allocator MAX_PAGE_ORDER exceeds SECTION_SIZE #endif static inline unsigned long pfn_to_section_nr(unsigned long pfn) diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h index e83c4c095041..3f2409b968ec 100644 --- a/include/linux/pageblock-flags.h +++ b/include/linux/pageblock-flags.h @@ -41,14 +41,14 @@ extern unsigned int pageblock_order; * Huge pages are a constant size, but don't exceed the maximum allocation * granularity. */ -#define pageblock_order min_t(unsigned int, HUGETLB_PAGE_ORDER, MAX_ORDER) +#define pageblock_order min_t(unsigned int, HUGETLB_PAGE_ORDER, MAX_PAGE_ORDER) #endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ #else /* CONFIG_HUGETLB_PAGE */ /* If huge pages are not used, group by MAX_ORDER_NR_PAGES */ -#define pageblock_order MAX_ORDER +#define pageblock_order MAX_PAGE_ORDER #endif /* CONFIG_HUGETLB_PAGE */ diff --git a/include/linux/slab.h b/include/linux/slab.h index d6d6ffeeb9a2..d63823e518c0 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -308,7 +308,7 @@ static inline unsigned int arch_slab_minalign(void) * (PAGE_SIZE*2). Larger requests are passed to the page allocator. */ #define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1) -#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT) +#define KMALLOC_SHIFT_MAX (MAX_PAGE_ORDER + PAGE_SHIFT) #ifndef KMALLOC_SHIFT_LOW #define KMALLOC_SHIFT_LOW 5 #endif @@ -316,7 +316,7 @@ static inline unsigned int arch_slab_minalign(void) #ifdef CONFIG_SLUB #define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1) -#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT) +#define KMALLOC_SHIFT_MAX (MAX_PAGE_ORDER + PAGE_SHIFT) #ifndef KMALLOC_SHIFT_LOW #define KMALLOC_SHIFT_LOW 3 #endif diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c index b481c48a31a6..d10613eb0f63 100644 --- a/kernel/dma/pool.c +++ b/kernel/dma/pool.c @@ -84,8 +84,8 @@ static int atomic_pool_expand(struct gen_pool *pool, size_t pool_size, void *addr; int ret = -ENOMEM; - /* Cannot allocate larger than MAX_ORDER */ - order = min(get_order(pool_size), MAX_ORDER); + /* Cannot allocate larger than MAX_PAGE_ORDER */ + order = min(get_order(pool_size), MAX_PAGE_ORDER); do { pool_size = 1 << (PAGE_SHIFT + order); @@ -190,7 +190,7 @@ static int __init dma_atomic_pool_init(void) /* * If coherent_pool was not used on the command line, default the pool - * sizes to 128KB per 1GB of memory, min 128KB, max MAX_ORDER. + * sizes to 128KB per 1GB of memory, min 128KB, max MAX_PAGE_ORDER. */ if (!atomic_pool_size) { unsigned long pages = totalram_pages() / (SZ_1G / SZ_128K); diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 33d942615be5..176078bf2215 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -686,8 +686,8 @@ static struct io_tlb_pool *swiotlb_alloc_pool(struct device *dev, size_t pool_size; size_t tlb_size; - if (nslabs > SLABS_PER_PAGE << MAX_ORDER) { - nslabs = SLABS_PER_PAGE << MAX_ORDER; + if (nslabs > SLABS_PER_PAGE << MAX_PAGE_ORDER) { + nslabs = SLABS_PER_PAGE << MAX_PAGE_ORDER; nareas = limit_nareas(nareas, nslabs); } diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index e8d82c2f07d0..60ed43d1c29e 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -610,8 +610,8 @@ static struct page *rb_alloc_aux_page(int node, int order) { struct page *page; - if (order > MAX_ORDER) - order = MAX_ORDER; + if (order > MAX_PAGE_ORDER) + order = MAX_PAGE_ORDER; do { page = alloc_pages_node(node, PERF_AUX_GFP, order); @@ -702,9 +702,9 @@ int rb_alloc_aux(struct perf_buffer *rb, struct perf_event *event, /* * kcalloc_node() is unable to allocate buffer if the size is larger - * than: PAGE_SIZE << MAX_ORDER; directly bail out in this case. + * than: PAGE_SIZE << MAX_PAGE_ORDER; directly bail out in this case. */ - if (get_order((unsigned long)nr_pages * sizeof(void *)) > MAX_ORDER) + if (get_order((unsigned long)nr_pages * sizeof(void *)) > MAX_PAGE_ORDER) return -ENOMEM; rb->aux_pages = kcalloc_node(nr_pages, sizeof(void *), GFP_KERNEL, node); @@ -821,7 +821,7 @@ struct perf_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags) size = sizeof(struct perf_buffer); size += nr_pages * sizeof(void *); - if (order_base_2(size) > PAGE_SHIFT+MAX_ORDER) + if (order_base_2(size) > PAGE_SHIFT+MAX_PAGE_ORDER) goto fail; node = (cpu == -1) ? cpu : cpu_to_node(cpu); diff --git a/mm/Kconfig b/mm/Kconfig index 79d563d8f9e0..cb9d470f0bf7 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -381,7 +381,7 @@ config SHUFFLE_PAGE_ALLOCATOR the presence of a memory-side-cache. There are also incidental security benefits as it reduces the predictability of page allocations to compliment SLAB_FREELIST_RANDOM, but the - default granularity of shuffling on the MAX_ORDER i.e, 10th + default granularity of shuffling on the MAX_PAGE_ORDER i.e, 10th order of pages is selected based on cache utilization benefits on x86. @@ -713,8 +713,8 @@ config HUGETLB_PAGE_SIZE_VARIABLE HUGETLB_PAGE_ORDER when there are multiple HugeTLB page sizes available on a platform. - Note that the pageblock_order cannot exceed MAX_ORDER and will be - clamped down to MAX_ORDER. + Note that the pageblock_order cannot exceed MAX_PAGE_ORDER and will be + clamped down to MAX_PAGE_ORDER. config CONTIG_ALLOC def_bool (MEMORY_ISOLATION && COMPACTION) || CMA diff --git a/mm/compaction.c b/mm/compaction.c index 24f8eb4d6260..27ada42924d5 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -999,7 +999,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, * a valid page order. Consider only values in the * valid order range to prevent low_pfn overflow. */ - if (freepage_order > 0 && freepage_order <= MAX_ORDER) { + if (freepage_order > 0 && freepage_order <= MAX_PAGE_ORDER) { low_pfn += (1UL << freepage_order) - 1; nr_scanned += (1UL << freepage_order) - 1; } @@ -1017,7 +1017,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, if (PageCompound(page) && !cc->alloc_contig) { const unsigned int order = compound_order(page); - if (likely(order <= MAX_ORDER)) { + if (likely(order <= MAX_PAGE_ORDER)) { low_pfn += (1UL << order) - 1; nr_scanned += (1UL << order) - 1; } diff --git a/mm/debug_page_alloc.c b/mm/debug_page_alloc.c index f9d145730fd1..6755f0c9d4a3 100644 --- a/mm/debug_page_alloc.c +++ b/mm/debug_page_alloc.c @@ -22,7 +22,7 @@ static int __init debug_guardpage_minorder_setup(char *buf) { unsigned long res; - if (kstrtoul(buf, 10, &res) < 0 || res > MAX_ORDER / 2) { + if (kstrtoul(buf, 10, &res) < 0 || res > MAX_PAGE_ORDER / 2) { pr_err("Bad debug_guardpage_minorder value\n"); return 0; } diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c index e651500e597a..5662e29fe253 100644 --- a/mm/debug_vm_pgtable.c +++ b/mm/debug_vm_pgtable.c @@ -1091,7 +1091,7 @@ debug_vm_pgtable_alloc_huge_page(struct pgtable_debug_args *args, int order) struct page *page = NULL; #ifdef CONFIG_CONTIG_ALLOC - if (order > MAX_ORDER) { + if (order > MAX_PAGE_ORDER) { page = alloc_contig_pages((1 << order), GFP_KERNEL, first_online_node, NULL); if (page) { @@ -1101,7 +1101,7 @@ debug_vm_pgtable_alloc_huge_page(struct pgtable_debug_args *args, int order) } #endif - if (order <= MAX_ORDER) + if (order <= MAX_PAGE_ORDER) page = alloc_pages(GFP_KERNEL, order); return page; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 1a588e29d287..b9a7a57691d7 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -682,7 +682,7 @@ static int __init hugepage_init(void) /* * hugepages can't be allocated by the buddy allocator */ - MAYBE_BUILD_BUG_ON(HPAGE_PMD_ORDER > MAX_ORDER); + MAYBE_BUILD_BUG_ON(HPAGE_PMD_ORDER > MAX_PAGE_ORDER); /* * we use page->mapping and page->index in second tail page * as list_head: assuming THP order >= 2 diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 378e460a6ab4..0d262784ce60 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3410,7 +3410,7 @@ static void __init prep_and_add_bootmem_folios(struct hstate *h, /* * Put bootmem huge pages into the standard lists after mem_map is up. - * Note: This only applies to gigantic (order > MAX_ORDER) pages. + * Note: This only applies to gigantic (order > MAX_PAGE_ORDER) pages. */ static void __init gather_bootmem_prealloc(void) { @@ -4790,7 +4790,7 @@ static int __init default_hugepagesz_setup(char *s) * The number of default huge pages (for this size) could have been * specified as the first hugetlb parameter: hugepages=X. If so, * then default_hstate_max_huge_pages is set. If the default huge - * page size is gigantic (> MAX_ORDER), then the pages must be + * page size is gigantic (> MAX_PAGE_ORDER), then the pages must be * allocated here from bootmem allocator. */ if (default_hstate_max_huge_pages) { diff --git a/mm/internal.h b/mm/internal.h index ac40c3d00336..f309a010d50f 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -335,7 +335,7 @@ static inline bool page_is_buddy(struct page *page, struct page *buddy, * satisfies the following equation: * P = B & ~(1 << O) * - * Assumption: *_mem_map is contiguous at least up to MAX_ORDER + * Assumption: *_mem_map is contiguous at least up to MAX_PAGE_ORDER */ static inline unsigned long __find_buddy_pfn(unsigned long page_pfn, unsigned int order) diff --git a/mm/kmsan/init.c b/mm/kmsan/init.c index 103e2e88ea03..3ac3b8921d36 100644 --- a/mm/kmsan/init.c +++ b/mm/kmsan/init.c @@ -141,7 +141,7 @@ struct smallstack { static struct smallstack collect = { .index = 0, - .order = MAX_ORDER, + .order = MAX_PAGE_ORDER, }; static void smallstack_push(struct smallstack *stack, struct page *pages) @@ -211,8 +211,8 @@ static void kmsan_memblock_discard(void) * order=N-1, * - repeat. */ - collect.order = MAX_ORDER; - for (int i = MAX_ORDER; i >= 0; i--) { + collect.order = MAX_PAGE_ORDER; + for (int i = MAX_PAGE_ORDER; i >= 0; i--) { if (held_back[i].shadow) smallstack_push(&collect, held_back[i].shadow); if (held_back[i].origin) diff --git a/mm/memblock.c b/mm/memblock.c index 4a62f7774b65..8c194d8afeec 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -2113,12 +2113,13 @@ static void __init __free_pages_memory(unsigned long start, unsigned long end) * Free the pages in the largest chunks alignment allows. * * __ffs() behaviour is undefined for 0. start == 0 is - * MAX_ORDER-aligned, set order to MAX_ORDER for the case. + * MAX_PAGE_ORDER-aligned, set order to MAX_PAGE_ORDER for + * the case. */ if (start) - order = min_t(int, MAX_ORDER, __ffs(start)); + order = min_t(int, MAX_PAGE_ORDER, __ffs(start)); else - order = MAX_ORDER; + order = MAX_PAGE_ORDER; while (start + (1UL << order) > end) order--; diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 926e1cfb10e9..b3c0ff52bb72 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -645,7 +645,7 @@ static void online_pages_range(unsigned long start_pfn, unsigned long nr_pages) unsigned long pfn; /* - * Online the pages in MAX_ORDER aligned chunks. The callback might + * Online the pages in MAX_PAGE_ORDER aligned chunks. The callback might * decide to not expose all pages to the buddy (e.g., expose them * later). We account all pages as being online and belonging to this * zone ("present"). @@ -660,12 +660,13 @@ static void online_pages_range(unsigned long start_pfn, unsigned long nr_pages) * Free to online pages in the largest chunks alignment allows. * * __ffs() behaviour is undefined for 0. start == 0 is - * MAX_ORDER-aligned, Set order to MAX_ORDER for the case. + * MAX_PAGE_ORDER-aligned, Set order to MAX_PAGE_ORDER for + * the case. */ if (pfn) - order = min_t(int, MAX_ORDER, __ffs(pfn)); + order = min_t(int, MAX_PAGE_ORDER, __ffs(pfn)); else - order = MAX_ORDER; + order = MAX_PAGE_ORDER; (*online_page_callback)(pfn_to_page(pfn), order); pfn += (1UL << order); diff --git a/mm/mm_init.c b/mm/mm_init.c index 2830eef2b16c..89dc29f1e6c6 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -1455,7 +1455,7 @@ static inline void setup_usemap(struct zone *zone) {} /* Initialise the number of pages represented by NR_PAGEBLOCK_BITS */ void __init set_pageblock_order(void) { - unsigned int order = MAX_ORDER; + unsigned int order = MAX_PAGE_ORDER; /* Check that pageblock_nr_pages has not already been setup */ if (pageblock_order) @@ -1638,7 +1638,7 @@ static void __init alloc_node_mem_map(struct pglist_data *pgdat) start = pgdat->node_start_pfn & ~(MAX_ORDER_NR_PAGES - 1); offset = pgdat->node_start_pfn - start; /* - * The zone's endpoints aren't required to be MAX_ORDER + * The zone's endpoints aren't required to be MAX_PAGE_ORDER * aligned but the node_mem_map endpoints must be in order * for the buddy allocator to function correctly. */ @@ -1964,11 +1964,11 @@ static void __init deferred_free_range(unsigned long pfn, if (nr_pages == MAX_ORDER_NR_PAGES && IS_MAX_ORDER_ALIGNED(pfn)) { for (i = 0; i < nr_pages; i += pageblock_nr_pages) set_pageblock_migratetype(page + i, MIGRATE_MOVABLE); - __free_pages_core(page, MAX_ORDER); + __free_pages_core(page, MAX_PAGE_ORDER); return; } - /* Accept chunks smaller than MAX_ORDER upfront */ + /* Accept chunks smaller than MAX_PAGE_ORDER upfront */ accept_memory(PFN_PHYS(pfn), PFN_PHYS(pfn + nr_pages)); for (i = 0; i < nr_pages; i++, page++, pfn++) { @@ -1991,8 +1991,8 @@ static inline void __init pgdat_init_report_one_done(void) /* * Returns true if page needs to be initialized or freed to buddy allocator. * - * We check if a current MAX_ORDER block is valid by only checking the validity - * of the head pfn. + * We check if a current MAX_PAGE_ORDER block is valid by only checking the + * validity of the head pfn. */ static inline bool __init deferred_pfn_valid(unsigned long pfn) { @@ -2149,8 +2149,8 @@ deferred_init_memmap_chunk(unsigned long start_pfn, unsigned long end_pfn, deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn, start_pfn); /* - * Initialize and free pages in MAX_ORDER sized increments so that we - * can avoid introducing any issues with the buddy allocator. + * Initialize and free pages in MAX_PAGE_ORDER sized increments so that + * we can avoid introducing any issues with the buddy allocator. */ while (spfn < end_pfn) { deferred_init_maxorder(&i, zone, &spfn, &epfn); @@ -2291,7 +2291,7 @@ bool __init deferred_grow_zone(struct zone *zone, unsigned int order) } /* - * Initialize and free pages in MAX_ORDER sized increments so + * Initialize and free pages in MAX_PAGE_ORDER sized increments so * that we can avoid introducing any issues with the buddy * allocator. */ @@ -2509,7 +2509,7 @@ void *__init alloc_large_system_hash(const char *tablename, else table = memblock_alloc_raw(size, SMP_CACHE_BYTES); - } else if (get_order(size) > MAX_ORDER || hashdist) { + } else if (get_order(size) > MAX_PAGE_ORDER || hashdist) { table = vmalloc_huge(size, gfp_flags); virt = true; if (table) @@ -2756,7 +2756,7 @@ void __init mm_core_init(void) /* * page_ext requires contiguous pages, - * bigger than MAX_ORDER unless SPARSEMEM. + * bigger than MAX_PAGE_ORDER unless SPARSEMEM. */ page_ext_init_flatmem(); mem_debugging_and_hardening_init(); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ccecf6158ae4..a01baf0454f8 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -727,7 +727,7 @@ buddy_merge_likely(unsigned long pfn, unsigned long buddy_pfn, unsigned long higher_page_pfn; struct page *higher_page; - if (order >= MAX_ORDER - 1) + if (order >= MAX_PAGE_ORDER - 1) return false; higher_page_pfn = buddy_pfn & pfn; @@ -782,7 +782,7 @@ static inline void __free_one_page(struct page *page, VM_BUG_ON_PAGE(pfn & ((1 << order) - 1), page); VM_BUG_ON_PAGE(bad_range(zone, page), page); - while (order < MAX_ORDER) { + while (order < MAX_PAGE_ORDER) { if (compaction_capture(capc, page, order, migratetype)) { __mod_zone_freepage_state(zone, -(1 << order), migratetype); @@ -1297,7 +1297,7 @@ void __free_pages_core(struct page *page, unsigned int order) atomic_long_add(nr_pages, &page_zone(page)->managed_pages); if (page_contains_unaccepted(page, order)) { - if (order == MAX_ORDER && __free_unaccepted(page)) + if (order == MAX_PAGE_ORDER && __free_unaccepted(page)) return; accept_page(page, order); @@ -1327,7 +1327,7 @@ void __free_pages_core(struct page *page, unsigned int order) * * Note: the function may return non-NULL struct page even for a page block * which contains a memory hole (i.e. there is no physical memory for a subset - * of the pfn range). For example, if the pageblock order is MAX_ORDER, which + * of the pfn range). For example, if the pageblock order is MAX_PAGE_ORDER, which * will fall into 2 sub-sections, and the end pfn of the pageblock may be hole * even though the start pfn is online and valid. This should be safe most of * the time because struct pages are still initialized via init_unavailable_range() @@ -2018,7 +2018,7 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype, * approximates finding the pageblock with the most free pages, which * would be too costly to do exactly. */ - for (current_order = MAX_ORDER; current_order >= min_order; + for (current_order = MAX_PAGE_ORDER; current_order >= min_order; --current_order) { area = &(zone->free_area[current_order]); fallback_mt = find_suitable_fallback(area, current_order, @@ -2056,7 +2056,7 @@ find_smallest: * This should not happen - we already found a suitable fallback * when looking for the largest page. */ - VM_BUG_ON(current_order > MAX_ORDER); + VM_BUG_ON(current_order > MAX_PAGE_ORDER); do_steal: page = get_page_from_free_area(area, fallback_mt); @@ -4533,7 +4533,7 @@ struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid, * There are several places where we assume that the order value is sane * so bail out early if the request is out of bound. */ - if (WARN_ON_ONCE_GFP(order > MAX_ORDER, gfp)) + if (WARN_ON_ONCE_GFP(order > MAX_PAGE_ORDER, gfp)) return NULL; gfp &= gfp_allowed_mask; @@ -4815,7 +4815,7 @@ static void *make_alloc_exact(unsigned long addr, unsigned int order, * minimum number of pages to satisfy the request. alloc_pages() can only * allocate memory in power-of-two pages. * - * This function is also limited by MAX_ORDER. + * This function is also limited by MAX_PAGE_ORDER. * * Memory allocated by this function must be released by free_pages_exact(). * @@ -6373,7 +6373,7 @@ int alloc_contig_range(unsigned long start, unsigned long end, order = 0; outer_start = start; while (!PageBuddy(pfn_to_page(outer_start))) { - if (++order > MAX_ORDER) { + if (++order > MAX_PAGE_ORDER) { outer_start = start; break; } @@ -6635,7 +6635,7 @@ bool is_free_buddy_page(struct page *page) break; } - return order <= MAX_ORDER; + return order <= MAX_PAGE_ORDER; } EXPORT_SYMBOL(is_free_buddy_page); @@ -6807,9 +6807,9 @@ static bool try_to_accept_memory_one(struct zone *zone) __mod_zone_page_state(zone, NR_UNACCEPTED, -MAX_ORDER_NR_PAGES); spin_unlock_irqrestore(&zone->lock, flags); - accept_page(page, MAX_ORDER); + accept_page(page, MAX_PAGE_ORDER); - __free_pages_ok(page, MAX_ORDER, FPI_TO_TAIL); + __free_pages_ok(page, MAX_PAGE_ORDER, FPI_TO_TAIL); if (last) static_branch_dec(&zones_with_unaccepted_pages); diff --git a/mm/page_isolation.c b/mm/page_isolation.c index bcf99ba747a0..cd0ea3668253 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -226,7 +226,7 @@ static void unset_migratetype_isolate(struct page *page, int migratetype) */ if (PageBuddy(page)) { order = buddy_order(page); - if (order >= pageblock_order && order < MAX_ORDER) { + if (order >= pageblock_order && order < MAX_PAGE_ORDER) { buddy = find_buddy_page_pfn(page, page_to_pfn(page), order, NULL); if (buddy && !is_migrate_isolate_page(buddy)) { @@ -290,11 +290,12 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages) * isolate_single_pageblock() * @migratetype: migrate type to set in error recovery. * - * Free and in-use pages can be as big as MAX_ORDER and contain more than one + * Free and in-use pages can be as big as MAX_PAGE_ORDER and contain more than one * pageblock. When not all pageblocks within a page are isolated at the same * time, free page accounting can go wrong. For example, in the case of - * MAX_ORDER = pageblock_order + 1, a MAX_ORDER page has two pagelbocks. - * [ MAX_ORDER ] + * MAX_PAGE_ORDER = pageblock_order + 1, a MAX_PAGE_ORDER page has two + * pagelbocks. + * [ MAX_PAGE_ORDER ] * [ pageblock0 | pageblock1 ] * When either pageblock is isolated, if it is a free page, the page is not * split into separate migratetype lists, which is supposed to; if it is an @@ -451,7 +452,7 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags, * the free page to the right migratetype list. * * head_pfn is not used here as a hugetlb page order - * can be bigger than MAX_ORDER, but after it is + * can be bigger than MAX_PAGE_ORDER, but after it is * freed, the free page order is not. Use pfn within * the range to find the head of the free page. */ @@ -459,7 +460,7 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags, outer_pfn = pfn; while (!PageBuddy(pfn_to_page(outer_pfn))) { /* stop if we cannot find the free page */ - if (++order > MAX_ORDER) + if (++order > MAX_PAGE_ORDER) goto failed; outer_pfn &= ~0UL << order; } @@ -660,8 +661,8 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn, int ret; /* - * Note: pageblock_nr_pages != MAX_ORDER. Then, chunks of free pages - * are not aligned to pageblock_nr_pages. + * Note: pageblock_nr_pages != MAX_PAGE_ORDER. Then, chunks of free + * pages are not aligned to pageblock_nr_pages. * Then we just check migratetype first. */ for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { diff --git a/mm/page_owner.c b/mm/page_owner.c index 040dbf26a986..5634e5d890f8 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -320,7 +320,7 @@ void pagetypeinfo_showmixedcount_print(struct seq_file *m, unsigned long freepage_order; freepage_order = buddy_order_unsafe(page); - if (freepage_order <= MAX_ORDER) + if (freepage_order <= MAX_PAGE_ORDER) pfn += (1UL << freepage_order) - 1; continue; } @@ -555,7 +555,7 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos) if (PageBuddy(page)) { unsigned long freepage_order = buddy_order_unsafe(page); - if (freepage_order <= MAX_ORDER) + if (freepage_order <= MAX_PAGE_ORDER) pfn += (1UL << freepage_order) - 1; continue; } @@ -663,7 +663,7 @@ static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone) if (PageBuddy(page)) { unsigned long order = buddy_order_unsafe(page); - if (order > 0 && order <= MAX_ORDER) + if (order > 0 && order <= MAX_PAGE_ORDER) pfn += (1UL << order) - 1; continue; } diff --git a/mm/page_reporting.c b/mm/page_reporting.c index 66369cc5279b..e4c428e61d8c 100644 --- a/mm/page_reporting.c +++ b/mm/page_reporting.c @@ -20,7 +20,7 @@ static int page_order_update_notify(const char *val, const struct kernel_param * * If param is set beyond this limit, order is set to default * pageblock_order value */ - return param_set_uint_minmax(val, kp, 0, MAX_ORDER); + return param_set_uint_minmax(val, kp, 0, MAX_PAGE_ORDER); } static const struct kernel_param_ops page_reporting_param_ops = { @@ -370,7 +370,7 @@ int page_reporting_register(struct page_reporting_dev_info *prdev) */ if (page_reporting_order == -1) { - if (prdev->order > 0 && prdev->order <= MAX_ORDER) + if (prdev->order > 0 && prdev->order <= MAX_PAGE_ORDER) page_reporting_order = prdev->order; else page_reporting_order = pageblock_order; diff --git a/mm/shuffle.h b/mm/shuffle.h index a6bdf54f96f1..61bbcddeeee6 100644 --- a/mm/shuffle.h +++ b/mm/shuffle.h @@ -4,7 +4,7 @@ #define _MM_SHUFFLE_H #include -#define SHUFFLE_ORDER MAX_ORDER +#define SHUFFLE_ORDER MAX_PAGE_ORDER #ifdef CONFIG_SHUFFLE_PAGE_ALLOCATOR DECLARE_STATIC_KEY_FALSE(page_alloc_shuffle_key); diff --git a/mm/slab.c b/mm/slab.c index 773c79e153f3..073cae923d56 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -465,7 +465,7 @@ static int __init slab_max_order_setup(char *str) { get_option(&str, &slab_max_order); slab_max_order = slab_max_order < 0 ? 0 : - min(slab_max_order, MAX_ORDER); + min(slab_max_order, MAX_PAGE_ORDER); slab_max_order_set = true; return 1; diff --git a/mm/slub.c b/mm/slub.c index a5420be89c8c..ba162e661e2e 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4194,7 +4194,7 @@ static inline int calculate_order(unsigned int size) * Doh this slab cannot be placed using slub_max_order. */ order = get_order(size); - if (order <= MAX_ORDER) + if (order <= MAX_PAGE_ORDER) return order; return -ENOSYS; } @@ -4722,7 +4722,7 @@ __setup("slub_min_order=", setup_slub_min_order); static int __init setup_slub_max_order(char *str) { get_option(&str, (int *)&slub_max_order); - slub_max_order = min_t(unsigned int, slub_max_order, MAX_ORDER); + slub_max_order = min_t(unsigned int, slub_max_order, MAX_PAGE_ORDER); if (slub_min_order > slub_max_order) slub_min_order = slub_max_order; diff --git a/mm/vmscan.c b/mm/vmscan.c index 600ed3cbf7cb..68f0abbb8e59 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -6415,7 +6415,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, * scan_control uses s8 fields for order, priority, and reclaim_idx. * Confirm they are large enough for max values. */ - BUILD_BUG_ON(MAX_ORDER >= S8_MAX); + BUILD_BUG_ON(MAX_PAGE_ORDER >= S8_MAX); BUILD_BUG_ON(DEF_PRIORITY > S8_MAX); BUILD_BUG_ON(MAX_NR_ZONES > S8_MAX); diff --git a/mm/vmstat.c b/mm/vmstat.c index 03ead31c46a0..db79935e4a54 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1092,7 +1092,7 @@ static int __fragmentation_index(unsigned int order, struct contig_page_info *in { unsigned long requested = 1UL << order; - if (WARN_ON_ONCE(order > MAX_ORDER)) + if (WARN_ON_ONCE(order > MAX_PAGE_ORDER)) return 0; if (!info->free_blocks_total) diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 89981dbe46c9..97704a9e84c7 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -844,7 +844,7 @@ long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev) goto out; /* the calculated number of cq entries fits to mlx5 cq allocation */ cqe_size_order = cache_line_size() == 128 ? 7 : 6; - smc_order = MAX_ORDER - cqe_size_order; + smc_order = MAX_PAGE_ORDER - cqe_size_order; if (SMC_MAX_CQE + 2 > (0x00000001 << smc_order) * PAGE_SIZE) cqattr.cqe = (0x00000001 << smc_order) * PAGE_SIZE - 2; smcibdev->roce_cq_send = ib_create_cq(smcibdev->ibdev, diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c index 51ad29940f05..f3738b2c8bcd 100644 --- a/security/integrity/ima/ima_crypto.c +++ b/security/integrity/ima/ima_crypto.c @@ -38,7 +38,7 @@ static int param_set_bufsize(const char *val, const struct kernel_param *kp) size = memparse(val, NULL); order = get_order(size); - if (order > MAX_ORDER) + if (order > MAX_PAGE_ORDER) return -EINVAL; ima_maxorder = order; ima_bufsize = PAGE_SIZE << order; diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt index 4c90cc176f81..2109690b0d5f 100644 --- a/tools/perf/Documentation/perf-intel-pt.txt +++ b/tools/perf/Documentation/perf-intel-pt.txt @@ -683,7 +683,7 @@ Buffer handling ~~~~~~~~~~~~~~~ There may be buffer limitations (i.e. single ToPa entry) which means that actual -buffer sizes are limited to powers of 2 up to 4MiB (MAX_ORDER). In order to +buffer sizes are limited to powers of 2 up to 4MiB (MAX_PAGE_ORDER). In order to provide other sizes, and in particular an arbitrarily large size, multiple buffers are logically concatenated. However an interrupt must be used to switch between buffers. That has two potential problems: diff --git a/tools/testing/memblock/linux/mmzone.h b/tools/testing/memblock/linux/mmzone.h index 134f8eab0768..71546e15bdd3 100644 --- a/tools/testing/memblock/linux/mmzone.h +++ b/tools/testing/memblock/linux/mmzone.h @@ -17,10 +17,10 @@ enum zone_type { }; #define MAX_NR_ZONES __MAX_NR_ZONES -#define MAX_ORDER 10 -#define MAX_ORDER_NR_PAGES (1 << MAX_ORDER) +#define MAX_PAGE_ORDER 10 +#define MAX_ORDER_NR_PAGES (1 << MAX_PAGE_ORDER) -#define pageblock_order MAX_ORDER +#define pageblock_order MAX_PAGE_ORDER #define pageblock_nr_pages BIT(pageblock_order) #define pageblock_align(pfn) ALIGN((pfn), pageblock_nr_pages) #define pageblock_start_pfn(pfn) ALIGN_DOWN((pfn), pageblock_nr_pages) diff --git a/tools/testing/selftests/mm/thuge-gen.c b/tools/testing/selftests/mm/thuge-gen.c index 16ed4dfa7359..622987f12c89 100644 --- a/tools/testing/selftests/mm/thuge-gen.c +++ b/tools/testing/selftests/mm/thuge-gen.c @@ -3,7 +3,8 @@ Before running this huge pages for each huge page size must have been reserved. - For large pages beyond MAX_ORDER (like 1GB on x86) boot options must be used. + For large pages beyond MAX_PAGE_ORDER (like 1GB on x86) boot options must + be used. Also shmmax must be increased. And you need to run as root to work around some weird permissions in shm. And nothing using huge pages should run in parallel. -- cgit v1.2.3 From 89fe46019a62bc1d0cb49c9615cb3520096c4bc1 Mon Sep 17 00:00:00 2001 From: Maíra Canal Date: Tue, 9 Jan 2024 08:30:40 -0300 Subject: drm/v3d: Fix support for register debugging on the RPi 4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RPi 4 uses V3D 4.2, which is currently not supported by the register definition stated at `v3d_core_reg_defs`. We should be able to support V3D 4.2, therefore, change the maximum version of the register definition to 42, not 41. Fixes: 0ad5bc1ce463 ("drm/v3d: fix up register addresses for V3D 7.x") Signed-off-by: Maíra Canal Reviewed-by: Iago Toral Quiroga Link: https://patchwork.freedesktop.org/patch/msgid/20240109113126.929446-1-mcanal@igalia.com --- drivers/gpu/drm/v3d/v3d_debugfs.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/v3d/v3d_debugfs.c b/drivers/gpu/drm/v3d/v3d_debugfs.c index f843a50d5dce..94eafcecc65b 100644 --- a/drivers/gpu/drm/v3d/v3d_debugfs.c +++ b/drivers/gpu/drm/v3d/v3d_debugfs.c @@ -62,9 +62,9 @@ static const struct v3d_reg_def v3d_core_reg_defs[] = { REGDEF(33, 71, V3D_PTB_BPCA), REGDEF(33, 71, V3D_PTB_BPCS), - REGDEF(33, 41, V3D_GMP_STATUS(33)), - REGDEF(33, 41, V3D_GMP_CFG(33)), - REGDEF(33, 41, V3D_GMP_VIO_ADDR(33)), + REGDEF(33, 42, V3D_GMP_STATUS(33)), + REGDEF(33, 42, V3D_GMP_CFG(33)), + REGDEF(33, 42, V3D_GMP_VIO_ADDR(33)), REGDEF(33, 71, V3D_ERR_FDBGO), REGDEF(33, 71, V3D_ERR_FDBGB), @@ -74,13 +74,13 @@ static const struct v3d_reg_def v3d_core_reg_defs[] = { static const struct v3d_reg_def v3d_csd_reg_defs[] = { REGDEF(41, 71, V3D_CSD_STATUS), - REGDEF(41, 41, V3D_CSD_CURRENT_CFG0(41)), - REGDEF(41, 41, V3D_CSD_CURRENT_CFG1(41)), - REGDEF(41, 41, V3D_CSD_CURRENT_CFG2(41)), - REGDEF(41, 41, V3D_CSD_CURRENT_CFG3(41)), - REGDEF(41, 41, V3D_CSD_CURRENT_CFG4(41)), - REGDEF(41, 41, V3D_CSD_CURRENT_CFG5(41)), - REGDEF(41, 41, V3D_CSD_CURRENT_CFG6(41)), + REGDEF(41, 42, V3D_CSD_CURRENT_CFG0(41)), + REGDEF(41, 42, V3D_CSD_CURRENT_CFG1(41)), + REGDEF(41, 42, V3D_CSD_CURRENT_CFG2(41)), + REGDEF(41, 42, V3D_CSD_CURRENT_CFG3(41)), + REGDEF(41, 42, V3D_CSD_CURRENT_CFG4(41)), + REGDEF(41, 42, V3D_CSD_CURRENT_CFG5(41)), + REGDEF(41, 42, V3D_CSD_CURRENT_CFG6(41)), REGDEF(71, 71, V3D_CSD_CURRENT_CFG0(71)), REGDEF(71, 71, V3D_CSD_CURRENT_CFG1(71)), REGDEF(71, 71, V3D_CSD_CURRENT_CFG2(71)), -- cgit v1.2.3 From 47bf0f83fc86df1bf42b385a91aadb910137c5c9 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 2 Jan 2024 15:07:44 -0500 Subject: drm/amdkfd: Fix lock dependency warning ====================================================== WARNING: possible circular locking dependency detected 6.5.0-kfd-fkuehlin #276 Not tainted ------------------------------------------------------ kworker/8:2/2676 is trying to acquire lock: ffff9435aae95c88 ((work_completion)(&svm_bo->eviction_work)){+.+.}-{0:0}, at: __flush_work+0x52/0x550 but task is already holding lock: ffff9435cd8e1720 (&svms->lock){+.+.}-{3:3}, at: svm_range_deferred_list_work+0xe8/0x340 [amdgpu] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (&svms->lock){+.+.}-{3:3}: __mutex_lock+0x97/0xd30 kfd_ioctl_alloc_memory_of_gpu+0x6d/0x3c0 [amdgpu] kfd_ioctl+0x1b2/0x5d0 [amdgpu] __x64_sys_ioctl+0x86/0xc0 do_syscall_64+0x39/0x80 entry_SYSCALL_64_after_hwframe+0x63/0xcd -> #1 (&mm->mmap_lock){++++}-{3:3}: down_read+0x42/0x160 svm_range_evict_svm_bo_worker+0x8b/0x340 [amdgpu] process_one_work+0x27a/0x540 worker_thread+0x53/0x3e0 kthread+0xeb/0x120 ret_from_fork+0x31/0x50 ret_from_fork_asm+0x11/0x20 -> #0 ((work_completion)(&svm_bo->eviction_work)){+.+.}-{0:0}: __lock_acquire+0x1426/0x2200 lock_acquire+0xc1/0x2b0 __flush_work+0x80/0x550 __cancel_work_timer+0x109/0x190 svm_range_bo_release+0xdc/0x1c0 [amdgpu] svm_range_free+0x175/0x180 [amdgpu] svm_range_deferred_list_work+0x15d/0x340 [amdgpu] process_one_work+0x27a/0x540 worker_thread+0x53/0x3e0 kthread+0xeb/0x120 ret_from_fork+0x31/0x50 ret_from_fork_asm+0x11/0x20 other info that might help us debug this: Chain exists of: (work_completion)(&svm_bo->eviction_work) --> &mm->mmap_lock --> &svms->lock Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&svms->lock); lock(&mm->mmap_lock); lock(&svms->lock); lock((work_completion)(&svm_bo->eviction_work)); I believe this cannot really lead to a deadlock in practice, because svm_range_evict_svm_bo_worker only takes the mmap_read_lock if the BO refcount is non-0. That means it's impossible that svm_range_bo_release is running concurrently. However, there is no good way to annotate this. To avoid the problem, take a BO reference in svm_range_schedule_evict_svm_bo instead of in the worker. That way it's impossible for a BO to get freed while eviction work is pending and the cancel_work_sync call in svm_range_bo_release can be eliminated. v2: Use svm_bo_ref_unless_zero and explained why that's safe. Also removed redundant checks that are already done in amdkfd_fence_enable_signaling. Signed-off-by: Felix Kuehling Reviewed-by: Philip Yang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index ac84c4a2ca07..d46c145835e0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -404,14 +404,9 @@ static void svm_range_bo_release(struct kref *kref) spin_lock(&svm_bo->list_lock); } spin_unlock(&svm_bo->list_lock); - if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base)) { - /* We're not in the eviction worker. - * Signal the fence and synchronize with any - * pending eviction work. - */ + if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base)) + /* We're not in the eviction worker. Signal the fence. */ dma_fence_signal(&svm_bo->eviction_fence->base); - cancel_work_sync(&svm_bo->eviction_work); - } dma_fence_put(&svm_bo->eviction_fence->base); amdgpu_bo_unref(&svm_bo->bo); kfree(svm_bo); @@ -3432,13 +3427,14 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange, int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence) { - if (!fence) - return -EINVAL; - - if (dma_fence_is_signaled(&fence->base)) - return 0; - - if (fence->svm_bo) { + /* Dereferencing fence->svm_bo is safe here because the fence hasn't + * signaled yet and we're under the protection of the fence->lock. + * After the fence is signaled in svm_range_bo_release, we cannot get + * here any more. + * + * Reference is dropped in svm_range_evict_svm_bo_worker. + */ + if (svm_bo_ref_unless_zero(fence->svm_bo)) { WRITE_ONCE(fence->svm_bo->evicting, 1); schedule_work(&fence->svm_bo->eviction_work); } @@ -3453,8 +3449,6 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work) int r = 0; svm_bo = container_of(work, struct svm_range_bo, eviction_work); - if (!svm_bo_ref_unless_zero(svm_bo)) - return; /* svm_bo was freed while eviction was pending */ if (mmget_not_zero(svm_bo->eviction_fence->mm)) { mm = svm_bo->eviction_fence->mm; -- cgit v1.2.3 From 17e74e11ac2b46e7514705ae7abfb93ac0e20bd6 Mon Sep 17 00:00:00 2001 From: Martin Tsai Date: Mon, 18 Dec 2023 16:36:44 +0800 Subject: drm/amd/display: To adjust dprefclk by down spread percentage [Why] Panels show corruption with high refresh rate timings when ssc is enabled. [How] Read down-spread percentage from lut to adjust dprefclk. Issues come from S0i3 with this commit has been fixed by SMU. Reviewed-by: Nicholas Kazlauskas Acked-by: Rodrigo Siqueira Signed-off-by: Martin Tsai Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c | 71 +++++++++++++++++++++- .../amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h | 11 ++++ drivers/gpu/drm/amd/display/dc/dce/dce_audio.c | 2 +- .../gpu/drm/amd/display/dc/dce/dce_clock_source.c | 9 ++- .../drm/amd/display/dc/hwss/dce110/dce110_hwseq.c | 2 +- drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h | 1 + drivers/gpu/drm/amd/display/include/audio_types.h | 2 +- 7 files changed, 93 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c index 757528256326..878c0e7b78ab 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c @@ -87,6 +87,20 @@ static const struct IP_BASE CLK_BASE = { { { { 0x00016C00, 0x02401800, 0, 0, 0, #define CLK1_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L #define CLK1_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L +#define regCLK1_CLK2_BYPASS_CNTL 0x029c +#define regCLK1_CLK2_BYPASS_CNTL_BASE_IDX 0 + +#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL__SHIFT 0x0 +#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV__SHIFT 0x10 +#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL_MASK 0x00000007L +#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV_MASK 0x000F0000L + +#define regCLK6_0_CLK6_spll_field_8 0x464b +#define regCLK6_0_CLK6_spll_field_8_BASE_IDX 0 + +#define CLK6_0_CLK6_spll_field_8__spll_ssc_en__SHIFT 0xd +#define CLK6_0_CLK6_spll_field_8__spll_ssc_en_MASK 0x00002000L + #define REG(reg_name) \ (CLK_BASE.instance[0].segment[reg ## reg_name ## _BASE_IDX] + reg ## reg_name) @@ -160,6 +174,37 @@ static void dcn314_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state } } +bool dcn314_is_spll_ssc_enabled(struct clk_mgr *clk_mgr_base) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + uint32_t ssc_enable; + + REG_GET(CLK6_0_CLK6_spll_field_8, spll_ssc_en, &ssc_enable); + + return ssc_enable == 1; +} + +void dcn314_init_clocks(struct clk_mgr *clk_mgr) +{ + struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr); + uint32_t ref_dtbclk = clk_mgr->clks.ref_dtbclk_khz; + + memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks)); + // Assumption is that boot state always supports pstate + clk_mgr->clks.ref_dtbclk_khz = ref_dtbclk; // restore ref_dtbclk + clk_mgr->clks.p_state_change_support = true; + clk_mgr->clks.prev_p_state_change_support = true; + clk_mgr->clks.pwr_state = DCN_PWR_STATE_UNKNOWN; + clk_mgr->clks.zstate_support = DCN_ZSTATE_SUPPORT_UNKNOWN; + + // to adjust dp_dto reference clock if ssc is enable otherwise to apply dprefclk + if (dcn314_is_spll_ssc_enabled(clk_mgr)) + clk_mgr->dp_dto_source_clock_in_khz = + dce_adjust_dp_ref_freq_for_ss(clk_mgr_int, clk_mgr->dprefclk_khz); + else + clk_mgr->dp_dto_source_clock_in_khz = clk_mgr->dprefclk_khz; +} + void dcn314_update_clocks(struct clk_mgr *clk_mgr_base, struct dc_state *context, bool safe_to_lower) @@ -436,6 +481,11 @@ static DpmClocks314_t dummy_clocks; static struct dcn314_watermarks dummy_wms = { 0 }; +static struct dcn314_ss_info_table ss_info_table = { + .ss_divider = 1000, + .ss_percentage = {0, 0, 375, 375, 375} +}; + static void dcn314_build_watermark_ranges(struct clk_bw_params *bw_params, struct dcn314_watermarks *table) { int i, num_valid_sets; @@ -708,13 +758,31 @@ static struct clk_mgr_funcs dcn314_funcs = { .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz, .get_dtb_ref_clk_frequency = dcn31_get_dtb_ref_freq_khz, .update_clocks = dcn314_update_clocks, - .init_clocks = dcn31_init_clocks, + .init_clocks = dcn314_init_clocks, .enable_pme_wa = dcn314_enable_pme_wa, .are_clock_states_equal = dcn314_are_clock_states_equal, .notify_wm_ranges = dcn314_notify_wm_ranges }; extern struct clk_mgr_funcs dcn3_fpga_funcs; +static void dcn314_read_ss_info_from_lut(struct clk_mgr_internal *clk_mgr) +{ + uint32_t clock_source; + //uint32_t ssc_enable; + + REG_GET(CLK1_CLK2_BYPASS_CNTL, CLK2_BYPASS_SEL, &clock_source); + //REG_GET(CLK6_0_CLK6_spll_field_8, spll_ssc_en, &ssc_enable); + + if (dcn314_is_spll_ssc_enabled(&clk_mgr->base) && (clock_source < ARRAY_SIZE(ss_info_table.ss_percentage))) { + clk_mgr->dprefclk_ss_percentage = ss_info_table.ss_percentage[clock_source]; + + if (clk_mgr->dprefclk_ss_percentage != 0) { + clk_mgr->ss_on_dprefclk = true; + clk_mgr->dprefclk_ss_divider = ss_info_table.ss_divider; + } + } +} + void dcn314_clk_mgr_construct( struct dc_context *ctx, struct clk_mgr_dcn314 *clk_mgr, @@ -782,6 +850,7 @@ void dcn314_clk_mgr_construct( clk_mgr->base.base.dprefclk_khz = 600000; clk_mgr->base.base.clks.ref_dtbclk_khz = 600000; dce_clock_read_ss_info(&clk_mgr->base); + dcn314_read_ss_info_from_lut(&clk_mgr->base); /*if bios enabled SS, driver needs to adjust dtb clock, only enable with correct bios*/ clk_mgr->base.base.bw_params = &dcn314_bw_params; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h index 171f84340eb2..002c28e80720 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h @@ -28,6 +28,8 @@ #define __DCN314_CLK_MGR_H__ #include "clk_mgr_internal.h" +#define DCN314_NUM_CLOCK_SOURCES 5 + struct dcn314_watermarks; struct dcn314_smu_watermark_set { @@ -40,9 +42,18 @@ struct clk_mgr_dcn314 { struct dcn314_smu_watermark_set smu_wm_set; }; +struct dcn314_ss_info_table { + uint32_t ss_divider; + uint32_t ss_percentage[DCN314_NUM_CLOCK_SOURCES]; +}; + bool dcn314_are_clock_states_equal(struct dc_clocks *a, struct dc_clocks *b); +bool dcn314_is_spll_ssc_enabled(struct clk_mgr *clk_mgr_base); + +void dcn314_init_clocks(struct clk_mgr *clk_mgr); + void dcn314_update_clocks(struct clk_mgr *clk_mgr_base, struct dc_state *context, bool safe_to_lower); diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c index 140598f18bbd..f0458b8f00af 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c @@ -782,7 +782,7 @@ static void get_azalia_clock_info_dp( /*audio_dto_module = dpDtoSourceClockInkhz * 10,000; * [khz] ->[100Hz] */ azalia_clock_info->audio_dto_module = - pll_info->dp_dto_source_clock_in_khz * 10; + pll_info->audio_dto_source_clock_in_khz * 10; } void dce_aud_wall_dto_setup( diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c index 5d3f6fa1011e..970644b695cd 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c @@ -975,6 +975,9 @@ static bool dcn31_program_pix_clk( look_up_in_video_optimized_rate_tlb(pix_clk_params->requested_pix_clk_100hz / 10); struct bp_pixel_clock_parameters bp_pc_params = {0}; enum transmitter_color_depth bp_pc_colour_depth = TRANSMITTER_COLOR_DEPTH_24; + + if (clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz != 0) + dp_dto_ref_khz = clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz; // For these signal types Driver to program DP_DTO without calling VBIOS Command table if (dc_is_dp_signal(pix_clk_params->signal_type) || dc_is_virtual_signal(pix_clk_params->signal_type)) { if (e) { @@ -1088,6 +1091,10 @@ static bool get_pixel_clk_frequency_100hz( struct dce110_clk_src *clk_src = TO_DCE110_CLK_SRC(clock_source); unsigned int clock_hz = 0; unsigned int modulo_hz = 0; + unsigned int dp_dto_ref_khz = clock_source->ctx->dc->clk_mgr->dprefclk_khz; + + if (clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz != 0) + dp_dto_ref_khz = clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz; if (clock_source->id == CLOCK_SOURCE_ID_DP_DTO) { clock_hz = REG_READ(PHASE[inst]); @@ -1100,7 +1107,7 @@ static bool get_pixel_clk_frequency_100hz( modulo_hz = REG_READ(MODULO[inst]); if (modulo_hz) *pixel_clk_khz = div_u64((uint64_t)clock_hz* - clock_source->ctx->dc->clk_mgr->dprefclk_khz*10, + dp_dto_ref_khz*10, modulo_hz); else *pixel_clk_khz = 0; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index fb328cd06cea..5660f15da291 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -1354,7 +1354,7 @@ static void build_audio_output( if (state->clk_mgr && (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT || pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)) { - audio_output->pll_info.dp_dto_source_clock_in_khz = + audio_output->pll_info.audio_dto_source_clock_in_khz = state->clk_mgr->funcs->get_dp_ref_clk_frequency( state->clk_mgr); } diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h index cbba39d251e5..17e014d3bdc8 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h @@ -333,6 +333,7 @@ struct clk_mgr { bool force_smu_not_present; bool dc_mode_softmax_enabled; int dprefclk_khz; // Used by program pixel clock in clock source funcs, need to figureout where this goes + int dp_dto_source_clock_in_khz; // Used to program DP DTO with ss adjustment on DCN314 int dentist_vco_freq_khz; struct clk_state_registers_and_bypass boot_snapshot; struct clk_bw_params *bw_params; diff --git a/drivers/gpu/drm/amd/display/include/audio_types.h b/drivers/gpu/drm/amd/display/include/audio_types.h index 66a54da0641c..915a031a43cb 100644 --- a/drivers/gpu/drm/amd/display/include/audio_types.h +++ b/drivers/gpu/drm/amd/display/include/audio_types.h @@ -64,7 +64,7 @@ enum audio_dto_source { /* PLL information required for AZALIA DTO calculation */ struct audio_pll_info { - uint32_t dp_dto_source_clock_in_khz; + uint32_t audio_dto_source_clock_in_khz; uint32_t feed_back_divider; enum audio_dto_source dto_source; bool ss_enabled; -- cgit v1.2.3 From 7bdbfb4e36e34eb788e44f27666bf0a2b3b90803 Mon Sep 17 00:00:00 2001 From: George Shen Date: Sun, 17 Dec 2023 17:17:57 -0500 Subject: drm/amd/display: Disconnect phantom pipe OPP from OPTC being disabled [Why] If an OPP is used for a different OPTC without first being disconnected from the previous OPTC, unexpected behaviour can occur. This also applies to phantom pipes, which is what the current logic missed. [How] Disconnect OPPs from OPTC for phantom pipes before disabling OTG master. Also move the disconnection to before the OTG master disable, since the register is double buffered. Reviewed-by: Dillon Varone Acked-by: Rodrigo Siqueira Signed-off-by: George Shen Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c | 19 +++++++++++++------ .../gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c | 12 ++++++------ 2 files changed, 19 insertions(+), 12 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c index 91ea0d4da06a..1788eb29474b 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c @@ -166,12 +166,6 @@ static bool optc32_disable_crtc(struct timing_generator *optc) { struct optc *optc1 = DCN10TG_FROM_TG(optc); - /* disable otg request until end of the first line - * in the vertical blank region - */ - REG_UPDATE(OTG_CONTROL, - OTG_MASTER_EN, 0); - REG_UPDATE_5(OPTC_DATA_SOURCE_SELECT, OPTC_SEG0_SRC_SEL, 0xf, OPTC_SEG1_SRC_SEL, 0xf, @@ -179,6 +173,12 @@ static bool optc32_disable_crtc(struct timing_generator *optc) OPTC_SEG3_SRC_SEL, 0xf, OPTC_NUM_OF_INPUT_SEGMENT, 0); + /* disable otg request until end of the first line + * in the vertical blank region + */ + REG_UPDATE(OTG_CONTROL, + OTG_MASTER_EN, 0); + REG_UPDATE(CONTROL, VTG0_ENABLE, 0); @@ -205,6 +205,13 @@ static void optc32_disable_phantom_otg(struct timing_generator *optc) { struct optc *optc1 = DCN10TG_FROM_TG(optc); + REG_UPDATE_5(OPTC_DATA_SOURCE_SELECT, + OPTC_SEG0_SRC_SEL, 0xf, + OPTC_SEG1_SRC_SEL, 0xf, + OPTC_SEG2_SRC_SEL, 0xf, + OPTC_SEG3_SRC_SEL, 0xf, + OPTC_NUM_OF_INPUT_SEGMENT, 0); + REG_UPDATE(OTG_CONTROL, OTG_MASTER_EN, 0); } diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c index 08a59cf449ca..3d6c1b2c2b4d 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c @@ -138,12 +138,6 @@ static bool optc35_disable_crtc(struct timing_generator *optc) { struct optc *optc1 = DCN10TG_FROM_TG(optc); - /* disable otg request until end of the first line - * in the vertical blank region - */ - REG_UPDATE(OTG_CONTROL, - OTG_MASTER_EN, 0); - REG_UPDATE_5(OPTC_DATA_SOURCE_SELECT, OPTC_SEG0_SRC_SEL, 0xf, OPTC_SEG1_SRC_SEL, 0xf, @@ -151,6 +145,12 @@ static bool optc35_disable_crtc(struct timing_generator *optc) OPTC_SEG3_SRC_SEL, 0xf, OPTC_NUM_OF_INPUT_SEGMENT, 0); + /* disable otg request until end of the first line + * in the vertical blank region + */ + REG_UPDATE(OTG_CONTROL, + OTG_MASTER_EN, 0); + REG_UPDATE(CONTROL, VTG0_ENABLE, 0); -- cgit v1.2.3 From 51c7e6ac24101af3147ebc45627810da367c6b66 Mon Sep 17 00:00:00 2001 From: Martin Leung Date: Wed, 20 Dec 2023 16:54:05 -0500 Subject: drm/amd/display: revert "for FPO & SubVP/DRR config program vmin/max" This reverts commit 6b2b782ad6a25734ae847d1659bea3f613dbb563. The original commit causes issues with certain features when DRR is disabled, need to revisit this change later after resolving issues with new DRR policy. Reviewed-by: Rodrigo Siqueira Signed-off-by: Martin Leung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 14 -------- .../amd/display/dc/dcn32/dcn32_resource_helpers.c | 14 ++++++++ .../gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 11 +++---- .../drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c | 37 ---------------------- drivers/gpu/drm/amd/display/dc/inc/resource.h | 3 -- .../amd/display/dc/resource/dcn32/dcn32_resource.c | 2 +- .../amd/display/dc/resource/dcn32/dcn32_resource.h | 3 ++ .../display/dc/resource/dcn321/dcn321_resource.c | 2 +- 8 files changed, 24 insertions(+), 62 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 57f0ddd15923..f2abc1096ffb 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -4986,20 +4986,6 @@ enum dc_status update_dp_encoder_resources_for_test_harness(const struct dc *dc, return DC_OK; } -bool resource_subvp_in_use(struct dc *dc, - struct dc_state *context) -{ - uint32_t i; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - if (dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_NONE) - return true; - } - return false; -} - bool check_subvp_sw_cursor_fallback_req(const struct dc *dc, struct dc_stream_state *stream) { if (!dc->debug.disable_subvp_high_refresh && is_subvp_high_refresh_candidate(stream)) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c index e4a328b45c8a..87760600e154 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c @@ -183,6 +183,20 @@ bool dcn32_all_pipes_have_stream_and_plane(struct dc *dc, return true; } +bool dcn32_subvp_in_use(struct dc *dc, + struct dc_state *context) +{ + uint32_t i; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_NONE) + return true; + } + return false; +} + bool dcn32_mpo_in_use(struct dc_state *context) { uint32_t i; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index aa68d010cbfd..9f37f717a1f8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -33,7 +33,6 @@ #include "dcn30/dcn30_resource.h" #include "link.h" #include "dc_state_priv.h" -#include "resource.h" #define DC_LOGGER_INIT(logger) @@ -292,7 +291,7 @@ int dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc, /* for subvp + DRR case, if subvp pipes are still present we support pstate */ if (vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported && - resource_subvp_in_use(dc, context)) + dcn32_subvp_in_use(dc, context)) vba->DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] = temp_clock_change_support; if (vlevel < context->bw_ctx.dml.vba.soc.num_states && @@ -2273,7 +2272,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, unsigned int dummy_latency_index = 0; int maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb; unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed; - bool subvp_active = resource_subvp_in_use(dc, context); + bool subvp_in_use = dcn32_subvp_in_use(dc, context); unsigned int min_dram_speed_mts_margin; bool need_fclk_lat_as_dummy = false; bool is_subvp_p_drr = false; @@ -2282,7 +2281,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, dc_assert_fp_enabled(); /* need to find dummy latency index for subvp */ - if (subvp_active) { + if (subvp_in_use) { /* Override DRAMClockChangeSupport for SubVP + DRR case where the DRR cannot switch without stretching it's VBLANK */ if (!pstate_en) { context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] = dm_dram_clock_change_vblank_w_mall_sub_vp; @@ -2468,7 +2467,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, dc->clk_mgr->bw_params->clk_table.entries[min_dram_speed_mts_offset].memclk_mhz * 16; } - if (!context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching && !subvp_active) { + if (!context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching && !subvp_in_use) { /* find largest table entry that is lower than dram speed, * but lower than DPM0 still uses DPM0 */ @@ -3528,7 +3527,7 @@ void dcn32_set_clock_limits(const struct _vcs_dpi_soc_bounding_box_st *soc_bb) void dcn32_override_min_req_memclk(struct dc *dc, struct dc_state *context) { // WA: restrict FPO and SubVP to use first non-strobe mode (DCN32 BW issue) - if ((context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching || resource_subvp_in_use(dc, context)) && + if ((context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching || dcn32_subvp_in_use(dc, context)) && dc->dml.soc.num_chans <= 8) { int num_mclk_levels = dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_memclk_levels; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index bc71a9b058fe..0dfcb3cdcd20 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -1882,42 +1882,6 @@ static void dcn20_program_pipe( } } -static void update_vmin_vmax_fams(struct dc *dc, - struct dc_state *context) -{ - uint32_t i; - struct drr_params params = {0}; - bool subvp_in_use = resource_subvp_in_use(dc, context); - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - if (resource_is_pipe_type(pipe, OTG_MASTER) && - ((subvp_in_use && dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM && - pipe->stream->allow_freesync) || (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching && pipe->stream->fpo_in_use))) { - if (!pipe->stream->vrr_active_variable && !pipe->stream->vrr_active_fixed) { - struct timing_generator *tg = context->res_ctx.pipe_ctx[i].stream_res.tg; - - /* DRR should be configured already if we're in active variable - * or active fixed, so only program if we're not in this state - */ - params.vertical_total_min = pipe->stream->timing.v_total; - params.vertical_total_max = pipe->stream->timing.v_total; - tg->funcs->set_drr(tg, ¶ms); - } - } else { - if (resource_is_pipe_type(pipe, OTG_MASTER) && - !pipe->stream->vrr_active_variable && - !pipe->stream->vrr_active_fixed) { - struct timing_generator *tg = context->res_ctx.pipe_ctx[i].stream_res.tg; - params.vertical_total_min = 0; - params.vertical_total_max = 0; - tg->funcs->set_drr(tg, ¶ms); - } - } - } -} - void dcn20_program_front_end_for_ctx( struct dc *dc, struct dc_state *context) @@ -1994,7 +1958,6 @@ void dcn20_program_front_end_for_ctx( && context->res_ctx.pipe_ctx[i].stream) hws->funcs.blank_pixel_data(dc, &context->res_ctx.pipe_ctx[i], true); - update_vmin_vmax_fams(dc, context); /* Disconnect mpcc */ for (i = 0; i < dc->res_pool->pipe_count; i++) diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h index 1d51fed12e20..c958ef37b78a 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/resource.h +++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h @@ -609,9 +609,6 @@ bool dc_resource_acquire_secondary_pipe_for_mpc_odm_legacy( struct pipe_ctx *sec_pipe, bool odm); -bool resource_subvp_in_use(struct dc *dc, - struct dc_state *context); - /* A test harness interface that modifies dp encoder resources in the given dc * state and bypasses the need to revalidate. The interface assumes that the * test harness interface is called with pre-validated link config stored in the diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c index ac04a9c9a3d8..c4d71e7f18af 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c @@ -1899,7 +1899,7 @@ int dcn32_populate_dml_pipes_from_context( static struct dc_cap_funcs cap_funcs = { .get_dcc_compression_cap = dcn20_get_dcc_compression_cap, - .get_subvp_en = resource_subvp_in_use, + .get_subvp_en = dcn32_subvp_in_use, }; void dcn32_calculate_wm_and_dlg(struct dc *dc, struct dc_state *context, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h index 62611acd4bcb..0c87b0fabba7 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h @@ -131,6 +131,9 @@ void dcn32_merge_pipes_for_subvp(struct dc *dc, bool dcn32_all_pipes_have_stream_and_plane(struct dc *dc, struct dc_state *context); +bool dcn32_subvp_in_use(struct dc *dc, + struct dc_state *context); + bool dcn32_mpo_in_use(struct dc_state *context); bool dcn32_any_surfaces_rotated(struct dc *dc, struct dc_state *context); diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c index e1ab207c46f1..74412e5f03fe 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c @@ -1574,7 +1574,7 @@ static void dcn321_destroy_resource_pool(struct resource_pool **pool) static struct dc_cap_funcs cap_funcs = { .get_dcc_compression_cap = dcn20_get_dcc_compression_cap, - .get_subvp_en = resource_subvp_in_use, + .get_subvp_en = dcn32_subvp_in_use, }; static void dcn321_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) -- cgit v1.2.3 From a465536ebff88fcc42e131a1b09bbe3df829117b Mon Sep 17 00:00:00 2001 From: Martin Leung Date: Wed, 20 Dec 2023 16:56:11 -0500 Subject: drm/amd/display: revert "Optimize VRR updates to only necessary ones" This reverts commit 6e4337f695c25162f0296934152506ad596fcebf. The original commit causes regression in corner case with HDMI at specific timings. Reverting from staging to get the full suite to retest. Reviewed-by: Rodrigo Siqueira Signed-off-by: Martin Leung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 14 +++++++++----- drivers/gpu/drm/amd/display/dc/dc.h | 1 + drivers/gpu/drm/amd/display/dc/dc_stream.h | 2 ++ drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c | 2 +- drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c | 8 ++++---- 5 files changed, 17 insertions(+), 10 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 2d7205058c64..69e726630241 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -411,12 +411,9 @@ bool dc_stream_adjust_vmin_vmax(struct dc *dc, * avoid conflicting with firmware updates. */ if (dc->ctx->dce_version > DCE_VERSION_MAX) - if (dc->optimized_required) + if (dc->optimized_required || dc->wm_optimized_required) return false; - if (!memcmp(&stream->adjust, adjust, sizeof(*adjust))) - return true; - stream->adjust.v_total_max = adjust->v_total_max; stream->adjust.v_total_mid = adjust->v_total_mid; stream->adjust.v_total_mid_frame_num = adjust->v_total_mid_frame_num; @@ -2230,6 +2227,7 @@ void dc_post_update_surfaces_to_stream(struct dc *dc) } dc->optimized_required = false; + dc->wm_optimized_required = false; } bool dc_set_generic_gpio_for_stereo(bool enable, @@ -2652,6 +2650,8 @@ enum surface_update_type dc_check_update_surfaces_for_stream( } else if (memcmp(&dc->current_state->bw_ctx.bw.dcn.clk, &dc->clk_mgr->clks, offsetof(struct dc_clocks, prev_p_state_change_support)) != 0) { dc->optimized_required = true; } + + dc->optimized_required |= dc->wm_optimized_required; } return type; @@ -2859,6 +2859,9 @@ static void copy_stream_update_to_stream(struct dc *dc, if (update->vrr_active_fixed) stream->vrr_active_fixed = *update->vrr_active_fixed; + if (update->crtc_timing_adjust) + stream->adjust = *update->crtc_timing_adjust; + if (update->dpms_off) stream->dpms_off = *update->dpms_off; @@ -4288,7 +4291,8 @@ static bool full_update_required(struct dc *dc, stream_update->mst_bw_update || stream_update->func_shaper || stream_update->lut3d_func || - stream_update->pending_test_pattern)) + stream_update->pending_test_pattern || + stream_update->crtc_timing_adjust)) return true; if (stream) { diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index f30a341bc090..7222f63caf28 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -1036,6 +1036,7 @@ struct dc { /* Require to optimize clocks and bandwidth for added/removed planes */ bool optimized_required; + bool wm_optimized_required; bool idle_optimizations_allowed; bool enable_c20_dtm_b0; diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index a23eebd9933b..ee10941caa59 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -139,6 +139,7 @@ union stream_update_flags { uint32_t wb_update:1; uint32_t dsc_changed : 1; uint32_t mst_bw : 1; + uint32_t crtc_timing_adjust : 1; uint32_t fams_changed : 1; } bits; @@ -325,6 +326,7 @@ struct dc_stream_update { struct dc_3dlut *lut3d_func; struct test_pattern *pending_test_pattern; + struct dc_crtc_timing_adjust *crtc_timing_adjust; }; bool dc_is_stream_unchanged( diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index 51dd2ae09b2a..6dd479e8a348 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -3076,7 +3076,7 @@ void dcn10_prepare_bandwidth( context, false); - dc->optimized_required |= hubbub->funcs->program_watermarks(hubbub, + dc->wm_optimized_required = hubbub->funcs->program_watermarks(hubbub, &context->bw_ctx.bw.dcn.watermarks, dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000, true); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index 0dfcb3cdcd20..e931342fcf4c 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -2159,10 +2159,10 @@ void dcn20_prepare_bandwidth( } /* program dchubbub watermarks: - * For assigning optimized_required, use |= operator since we don't want + * For assigning wm_optimized_required, use |= operator since we don't want * to clear the value if the optimize has not happened yet */ - dc->optimized_required |= hubbub->funcs->program_watermarks(hubbub, + dc->wm_optimized_required |= hubbub->funcs->program_watermarks(hubbub, &context->bw_ctx.bw.dcn.watermarks, dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000, false); @@ -2175,10 +2175,10 @@ void dcn20_prepare_bandwidth( if (hubbub->funcs->program_compbuf_size) { if (context->bw_ctx.dml.ip.min_comp_buffer_size_kbytes) { compbuf_size_kb = context->bw_ctx.dml.ip.min_comp_buffer_size_kbytes; - dc->optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.dml.ip.min_comp_buffer_size_kbytes); + dc->wm_optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.dml.ip.min_comp_buffer_size_kbytes); } else { compbuf_size_kb = context->bw_ctx.bw.dcn.compbuf_size_kb; - dc->optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.bw.dcn.compbuf_size_kb); + dc->wm_optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.bw.dcn.compbuf_size_kb); } hubbub->funcs->program_compbuf_size(hubbub, compbuf_size_kb, false); -- cgit v1.2.3 From 5f3bce13266e6fe2f7a46f94d8bc94d5274e276b Mon Sep 17 00:00:00 2001 From: Peichen Huang Date: Thu, 14 Dec 2023 23:16:34 +0800 Subject: drm/amd/display: Request usb4 bw for mst streams [WHY] When usb4 bandwidth allocation mode is enabled, driver need to request bandwidth from connection manager. For mst link, the requested bandwidth should be big enough for all remote streams. [HOW] - If mst link, the requested bandwidth should be the sum of all mst streams bandwidth added with dp MTPH overhead. - Allocate/deallcate usb4 bandwidth when setting dpms on/off. - When doing display mode validation, driver also need to consider total bandwidth of all mst streams for mst link. Reviewed-by: Cruise Hung Acked-by: Rodrigo Siqueira Signed-off-by: Peichen Huang Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_types.h | 12 +++-- drivers/gpu/drm/amd/display/dc/link/link_dpms.c | 42 ++++++++++++--- .../gpu/drm/amd/display/dc/link/link_validation.c | 60 ++++++++++++++++++---- .../display/dc/link/protocols/link_dp_dpia_bw.c | 59 ++++++++++++++++----- .../display/dc/link/protocols/link_dp_dpia_bw.h | 9 ++++ 5 files changed, 144 insertions(+), 38 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index 4f276169e05a..b08ccb8c68bc 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -1140,23 +1140,25 @@ struct dc_panel_config { } ilr; }; +#define MAX_SINKS_PER_LINK 4 + /* * USB4 DPIA BW ALLOCATION STRUCTS */ struct dc_dpia_bw_alloc { - int sink_verified_bw; // The Verified BW that sink can allocated and use that has been verified already - int sink_allocated_bw; // The Actual Allocated BW that sink currently allocated - int sink_max_bw; // The Max BW that sink can require/support + int remote_sink_req_bw[MAX_SINKS_PER_LINK]; // BW requested by remote sinks + int link_verified_bw; // The Verified BW that link can allocated and use that has been verified already + int link_max_bw; // The Max BW that link can require/support + int allocated_bw; // The Actual Allocated BW for this DPIA int estimated_bw; // The estimated available BW for this DPIA int bw_granularity; // BW Granularity + int dp_overhead; // DP overhead in dp tunneling bool bw_alloc_enabled; // The BW Alloc Mode Support is turned ON for all 3: DP-Tx & Dpia & CM bool response_ready; // Response ready from the CM side uint8_t nrd_max_lane_count; // Non-reduced max lane count uint8_t nrd_max_link_rate; // Non-reduced max link rate }; -#define MAX_SINKS_PER_LINK 4 - enum dc_hpd_enable_select { HPD_EN_FOR_ALL_EDP = 0, HPD_EN_FOR_PRIMARY_EDP_ONLY, diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c index 5fe8b4871c77..3de148004c06 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c @@ -2005,17 +2005,11 @@ static enum dc_status enable_link_dp(struct dc_state *state, } } - /* - * If the link is DP-over-USB4 do the following: - * - Train with fallback when enabling DPIA link. Conventional links are + /* Train with fallback when enabling DPIA link. Conventional links are * trained with fallback during sink detection. - * - Allocate only what the stream needs for bw in Gbps. Inform the CM - * in case stream needs more or less bw from what has been allocated - * earlier at plug time. */ - if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) { + if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) do_fallback = true; - } /* * Temporary w/a to get DP2.0 link rates to work with SST. @@ -2197,6 +2191,32 @@ static enum dc_status enable_link( return status; } +static bool allocate_usb4_bandwidth_for_stream(struct dc_stream_state *stream, int bw) +{ + return true; +} + +static bool allocate_usb4_bandwidth(struct dc_stream_state *stream) +{ + bool ret; + + int bw = dc_bandwidth_in_kbps_from_timing(&stream->timing, + dc_link_get_highest_encoding_format(stream->sink->link)); + + ret = allocate_usb4_bandwidth_for_stream(stream, bw); + + return ret; +} + +static bool deallocate_usb4_bandwidth(struct dc_stream_state *stream) +{ + bool ret; + + ret = allocate_usb4_bandwidth_for_stream(stream, 0); + + return ret; +} + void link_set_dpms_off(struct pipe_ctx *pipe_ctx) { struct dc *dc = pipe_ctx->stream->ctx->dc; @@ -2232,6 +2252,9 @@ void link_set_dpms_off(struct pipe_ctx *pipe_ctx) update_psp_stream_config(pipe_ctx, true); dc->hwss.blank_stream(pipe_ctx); + if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) + deallocate_usb4_bandwidth(pipe_ctx->stream); + if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) deallocate_mst_payload(pipe_ctx); else if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT && @@ -2474,6 +2497,9 @@ void link_set_dpms_on( } } + if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) + allocate_usb4_bandwidth(pipe_ctx->stream); + if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) allocate_mst_payload(pipe_ctx); else if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT && diff --git a/drivers/gpu/drm/amd/display/dc/link/link_validation.c b/drivers/gpu/drm/amd/display/dc/link/link_validation.c index b45fda96eaf6..8fe66c367850 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_validation.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_validation.c @@ -346,23 +346,61 @@ enum dc_status link_validate_mode_timing( return DC_OK; } +/* + * This function calculates the bandwidth required for the stream timing + * and aggregates the stream bandwidth for the respective dpia link + * + * @stream: pointer to the dc_stream_state struct instance + * @num_streams: number of streams to be validated + * + * return: true if validation is succeeded + */ bool link_validate_dpia_bandwidth(const struct dc_stream_state *stream, const unsigned int num_streams) { - bool ret = true; - int bw_needed[MAX_DPIA_NUM]; - struct dc_link *link[MAX_DPIA_NUM]; - - if (!num_streams || num_streams > MAX_DPIA_NUM) - return ret; + int bw_needed[MAX_DPIA_NUM] = {0}; + struct dc_link *dpia_link[MAX_DPIA_NUM] = {0}; + int num_dpias = 0; for (uint8_t i = 0; i < num_streams; ++i) { + if (stream[i].signal == SIGNAL_TYPE_DISPLAY_PORT) { + /* new dpia sst stream, check whether it exceeds max dpia */ + if (num_dpias >= MAX_DPIA_NUM) + return false; - link[i] = stream[i].link; - bw_needed[i] = dc_bandwidth_in_kbps_from_timing(&stream[i].timing, - dc_link_get_highest_encoding_format(link[i])); + dpia_link[num_dpias] = stream[i].link; + bw_needed[num_dpias] = dc_bandwidth_in_kbps_from_timing(&stream[i].timing, + dc_link_get_highest_encoding_format(dpia_link[num_dpias])); + num_dpias++; + } else if (stream[i].signal == SIGNAL_TYPE_DISPLAY_PORT_MST) { + uint8_t j = 0; + /* check whether its a known dpia link */ + for (; j < num_dpias; ++j) { + if (dpia_link[j] == stream[i].link) + break; + } + + if (j == num_dpias) { + /* new dpia mst stream, check whether it exceeds max dpia */ + if (num_dpias >= MAX_DPIA_NUM) + return false; + else { + dpia_link[j] = stream[i].link; + num_dpias++; + } + } + + bw_needed[j] += dc_bandwidth_in_kbps_from_timing(&stream[i].timing, + dc_link_get_highest_encoding_format(dpia_link[j])); + } } - ret = dpia_validate_usb4_bw(link, bw_needed, num_streams); + /* Include dp overheads */ + for (uint8_t i = 0; i < num_dpias; ++i) { + int dp_overhead = 0; + + dp_overhead = link_dp_dpia_get_dp_overhead_in_dp_tunneling(dpia_link[i]); + bw_needed[i] += dp_overhead; + } - return ret; + return dpia_validate_usb4_bw(dpia_link, bw_needed, num_dpias); } diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c index a7aa8c9da868..4ef1a6a1d129 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c @@ -54,12 +54,18 @@ static bool get_bw_alloc_proceed_flag(struct dc_link *tmp) static void reset_bw_alloc_struct(struct dc_link *link) { link->dpia_bw_alloc_config.bw_alloc_enabled = false; - link->dpia_bw_alloc_config.sink_verified_bw = 0; - link->dpia_bw_alloc_config.sink_max_bw = 0; + link->dpia_bw_alloc_config.link_verified_bw = 0; + link->dpia_bw_alloc_config.link_max_bw = 0; + link->dpia_bw_alloc_config.allocated_bw = 0; link->dpia_bw_alloc_config.estimated_bw = 0; link->dpia_bw_alloc_config.bw_granularity = 0; + link->dpia_bw_alloc_config.dp_overhead = 0; link->dpia_bw_alloc_config.response_ready = false; - link->dpia_bw_alloc_config.sink_allocated_bw = 0; + link->dpia_bw_alloc_config.nrd_max_lane_count = 0; + link->dpia_bw_alloc_config.nrd_max_link_rate = 0; + for (int i = 0; i < MAX_SINKS_PER_LINK; i++) + link->dpia_bw_alloc_config.remote_sink_req_bw[i] = 0; + DC_LOG_DEBUG("reset usb4 bw alloc of link(%d)\n", link->link_index); } #define BW_GRANULARITY_0 4 // 0.25 Gbps @@ -210,8 +216,8 @@ static int get_host_router_total_dp_tunnel_bw(const struct dc *dc, uint8_t hr_in link_dpia_primary->dpia_bw_alloc_config.bw_alloc_enabled) && (link_dpia_secondary->hpd_status && link_dpia_secondary->dpia_bw_alloc_config.bw_alloc_enabled)) { - total_bw += link_dpia_primary->dpia_bw_alloc_config.estimated_bw + - link_dpia_secondary->dpia_bw_alloc_config.sink_allocated_bw; + total_bw += link_dpia_primary->dpia_bw_alloc_config.estimated_bw + + link_dpia_secondary->dpia_bw_alloc_config.allocated_bw; } else if (link_dpia_primary->hpd_status && link_dpia_primary->dpia_bw_alloc_config.bw_alloc_enabled) { total_bw = link_dpia_primary->dpia_bw_alloc_config.estimated_bw; @@ -264,7 +270,7 @@ static void set_usb4_req_bw_req(struct dc_link *link, int req_bw) /* Error check whether requested and allocated are equal */ req_bw = requested_bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity); - if (req_bw == link->dpia_bw_alloc_config.sink_allocated_bw) { + if (req_bw == link->dpia_bw_alloc_config.allocated_bw) { DC_LOG_ERROR("%s: Request bw equals to allocated bw for link(%d)\n", __func__, link->link_index); } @@ -387,9 +393,9 @@ void dpia_handle_bw_alloc_response(struct dc_link *link, uint8_t bw, uint8_t res DC_LOG_DEBUG("%s: BW REQ SUCCESS for DP-TX Request for link(%d)\n", __func__, link->link_index); DC_LOG_DEBUG("%s: current allocated_bw(%d), new allocated_bw(%d)\n", - __func__, link->dpia_bw_alloc_config.sink_allocated_bw, bw_needed); + __func__, link->dpia_bw_alloc_config.allocated_bw, bw_needed); - link->dpia_bw_alloc_config.sink_allocated_bw = bw_needed; + link->dpia_bw_alloc_config.allocated_bw = bw_needed; link->dpia_bw_alloc_config.response_ready = true; break; @@ -427,8 +433,8 @@ int dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int pea if (link->hpd_status && peak_bw > 0) { // If DP over USB4 then we need to check BW allocation - link->dpia_bw_alloc_config.sink_max_bw = peak_bw; - set_usb4_req_bw_req(link, link->dpia_bw_alloc_config.sink_max_bw); + link->dpia_bw_alloc_config.link_max_bw = peak_bw; + set_usb4_req_bw_req(link, link->dpia_bw_alloc_config.link_max_bw); do { if (timeout > 0) @@ -440,8 +446,8 @@ int dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int pea if (!timeout) ret = 0;// ERROR TIMEOUT waiting for response for allocating bw - else if (link->dpia_bw_alloc_config.sink_allocated_bw > 0) - ret = link->dpia_bw_alloc_config.sink_allocated_bw; + else if (link->dpia_bw_alloc_config.allocated_bw > 0) + ret = link->dpia_bw_alloc_config.allocated_bw; } //2. Cold Unplug else if (!link->hpd_status) @@ -450,7 +456,6 @@ int dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int pea out: return ret; } - bool link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int req_bw) { bool ret = false; @@ -458,7 +463,7 @@ bool link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int r DC_LOG_DEBUG("%s: ENTER: link(%d), hpd_status(%d), current allocated_bw(%d), req_bw(%d)\n", __func__, link->link_index, link->hpd_status, - link->dpia_bw_alloc_config.sink_allocated_bw, req_bw); + link->dpia_bw_alloc_config.allocated_bw, req_bw); if (!get_bw_alloc_proceed_flag(link)) goto out; @@ -523,3 +528,29 @@ bool dpia_validate_usb4_bw(struct dc_link **link, int *bw_needed_per_dpia, const return ret; } + +int link_dp_dpia_get_dp_overhead_in_dp_tunneling(struct dc_link *link) +{ + int dp_overhead = 0, link_mst_overhead = 0; + + if (!get_bw_alloc_proceed_flag((link))) + return dp_overhead; + + /* if its mst link, add MTPH overhead */ + if ((link->type == dc_connection_mst_branch) && + !link->dpcd_caps.channel_coding_cap.bits.DP_128b_132b_SUPPORTED) { + /* For 8b/10b encoding: MTP is 64 time slots long, slot 0 is used for MTPH + * MST overhead is 1/64 of link bandwidth (excluding any overhead) + */ + const struct dc_link_settings *link_cap = + dc_link_get_link_cap(link); + uint32_t link_bw_in_kbps = + link_cap->link_rate * link_cap->lane_count * LINK_RATE_REF_FREQ_IN_KHZ * 8; + link_mst_overhead = (link_bw_in_kbps / 64) + ((link_bw_in_kbps % 64) ? 1 : 0); + } + + /* add all the overheads */ + dp_overhead = link_mst_overhead; + + return dp_overhead; +} diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h index 981bc4eb6120..3b6d8494f9d5 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h @@ -99,4 +99,13 @@ void dpia_handle_bw_alloc_response(struct dc_link *link, uint8_t bw, uint8_t res */ bool dpia_validate_usb4_bw(struct dc_link **link, int *bw_needed, const unsigned int num_dpias); +/* + * Obtain all the DP overheads in dp tunneling for the dpia link + * + * @link: pointer to the dc_link struct instance + * + * return: DP overheads in DP tunneling + */ +int link_dp_dpia_get_dp_overhead_in_dp_tunneling(struct dc_link *link); + #endif /* DC_INC_LINK_DP_DPIA_BW_H_ */ -- cgit v1.2.3 From bf282eb92b84709d99186ad5940b9997eb3c1ff2 Mon Sep 17 00:00:00 2001 From: Daniel Miess Date: Wed, 20 Dec 2023 10:34:32 -0500 Subject: Revert "drm/amd/display: Fix conversions between bytes and KB" This reverts commit d0f639c5869399bf6dde4d694d5f8c0ab8c0ec46. The previous commit causes failure to light up for 1080p eDP + 8k HDMI panel combo. Reviewed-by: Charlene Liu Acked-by: Rodrigo Siqueira Signed-off-by: Daniel Miess Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dml2/display_mode_core.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c index b95bf27f2fe2..a6b938a12de1 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c @@ -6229,7 +6229,7 @@ static void set_calculate_prefetch_schedule_params(struct display_mode_lib_st *m CalculatePrefetchSchedule_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable; CalculatePrefetchSchedule_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable; CalculatePrefetchSchedule_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; - CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes; + CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; CalculatePrefetchSchedule_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k]; CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled; CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired[k]; @@ -6329,7 +6329,7 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib) mode_lib->ms.NoOfDPPThisState, mode_lib->ms.dpte_group_bytes, s->HostVMInefficiencyFactor, - mode_lib->ms.soc.hostvm_min_page_size_kbytes, + mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels); s->NextMaxVStartup = s->MaxVStartupAllPlanes[j]; @@ -6542,7 +6542,7 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib) mode_lib->ms.cache_display_cfg.plane.HostVMEnable, mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels, mode_lib->ms.cache_display_cfg.plane.GPUVMEnable, - mode_lib->ms.soc.hostvm_min_page_size_kbytes, + mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k], mode_lib->ms.MetaRowBytes[j][k], mode_lib->ms.DPTEBytesPerRow[j][k], @@ -7687,7 +7687,7 @@ dml_bool_t dml_core_mode_support(struct display_mode_lib_st *mode_lib) CalculateVMRowAndSwath_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; CalculateVMRowAndSwath_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; CalculateVMRowAndSwath_params->GPUVMMinPageSizeKBytes = mode_lib->ms.cache_display_cfg.plane.GPUVMMinPageSizeKBytes; - CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes; + CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; CalculateVMRowAndSwath_params->PTEBufferModeOverrideEn = mode_lib->ms.cache_display_cfg.plane.PTEBufferModeOverrideEn; CalculateVMRowAndSwath_params->PTEBufferModeOverrideVal = mode_lib->ms.cache_display_cfg.plane.PTEBufferMode; CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = mode_lib->ms.PTEBufferSizeNotExceededPerState; @@ -7957,7 +7957,7 @@ dml_bool_t dml_core_mode_support(struct display_mode_lib_st *mode_lib) UseMinimumDCFCLK_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; UseMinimumDCFCLK_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable; UseMinimumDCFCLK_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; - UseMinimumDCFCLK_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes; + UseMinimumDCFCLK_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; UseMinimumDCFCLK_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; UseMinimumDCFCLK_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled; UseMinimumDCFCLK_params->ImmediateFlipRequirement = s->ImmediateFlipRequiredFinal; @@ -8699,7 +8699,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc CalculateVMRowAndSwath_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; CalculateVMRowAndSwath_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; CalculateVMRowAndSwath_params->GPUVMMinPageSizeKBytes = mode_lib->ms.cache_display_cfg.plane.GPUVMMinPageSizeKBytes; - CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes; + CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; CalculateVMRowAndSwath_params->PTEBufferModeOverrideEn = mode_lib->ms.cache_display_cfg.plane.PTEBufferModeOverrideEn; CalculateVMRowAndSwath_params->PTEBufferModeOverrideVal = mode_lib->ms.cache_display_cfg.plane.PTEBufferMode; CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = s->dummy_boolean_array[0]; @@ -8805,7 +8805,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc mode_lib->ms.cache_display_cfg.hw.DPPPerSurface, locals->dpte_group_bytes, s->HostVMInefficiencyFactor, - mode_lib->ms.soc.hostvm_min_page_size_kbytes, + mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels); locals->TCalc = 24.0 / locals->DCFCLKDeepSleep; @@ -8995,7 +8995,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc CalculatePrefetchSchedule_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable; CalculatePrefetchSchedule_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable; CalculatePrefetchSchedule_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; - CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes; + CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; CalculatePrefetchSchedule_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k]; CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled; CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired[k]; @@ -9240,7 +9240,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc mode_lib->ms.cache_display_cfg.plane.HostVMEnable, mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels, mode_lib->ms.cache_display_cfg.plane.GPUVMEnable, - mode_lib->ms.soc.hostvm_min_page_size_kbytes, + mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, locals->PDEAndMetaPTEBytesFrame[k], locals->MetaRowByte[k], locals->PixelPTEBytesPerRow[k], -- cgit v1.2.3 From 2476bf4328d1a55db709ce9ad2c274d26040311b Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Thu, 21 Dec 2023 09:42:28 -0500 Subject: drm/amd/display: Update z8 latency Adjust z8 latency for performance. Reviewed-by: Muhammad Ahmed Acked-by: Rodrigo Siqueira Signed-off-by: Charlene Liu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c index 3d12dabd39e4..475c4ec43c01 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c @@ -166,9 +166,9 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { .num_states = 5, .sr_exit_time_us = 14.0, .sr_enter_plus_exit_time_us = 16.0, - .sr_exit_z8_time_us = 525.0, - .sr_enter_plus_exit_z8_time_us = 715.0, - .fclk_change_latency_us = 20.0, + .sr_exit_z8_time_us = 210.0, + .sr_enter_plus_exit_z8_time_us = 320.0, + .fclk_change_latency_us = 24.0, .usr_retraining_latency_us = 2, .writeback_latency_us = 12.0, -- cgit v1.2.3 From ab76bd72ee12d9117c3a16d749ffce84f5b235bf Mon Sep 17 00:00:00 2001 From: Meenakshikumar Somasundaram Date: Tue, 19 Dec 2023 15:51:24 -0500 Subject: drm/amd/display: Dpia hpd status not in sync after S4 [Why] Dpia hpd status not in sync causing driver not enabling BW Alloc after S4. [How] Update hpd_status of the link when querying hpd state from dmub in dpia_query_hpd_status(). Reviewed-by: Nicholas Kazlauskas Acked-by: Rodrigo Siqueira Signed-off-by: Meenakshikumar Somasundaram Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/link/protocols/link_dp_dpia.c | 36 +++++++++++++--------- 1 file changed, 22 insertions(+), 14 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c index 982eda3c46f5..6af42ba9885c 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c @@ -82,25 +82,33 @@ bool dpia_query_hpd_status(struct dc_link *link) { union dmub_rb_cmd cmd = {0}; struct dc_dmub_srv *dmub_srv = link->ctx->dmub_srv; - bool is_hpd_high = false; /* prepare QUERY_HPD command */ cmd.query_hpd.header.type = DMUB_CMD__QUERY_HPD_STATE; cmd.query_hpd.data.instance = link->link_id.enum_id - ENUM_ID_1; cmd.query_hpd.data.ch_type = AUX_CHANNEL_DPIA; - /* Return HPD status reported by DMUB if query successfully executed. */ - if (dc_wake_and_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) && - cmd.query_hpd.data.status == AUX_RET_SUCCESS) - is_hpd_high = cmd.query_hpd.data.result; - - DC_LOG_DEBUG("%s: link(%d) dpia(%d) cmd_status(%d) result(%d)\n", - __func__, - link->link_index, - link->link_id.enum_id - ENUM_ID_1, - cmd.query_hpd.data.status, - cmd.query_hpd.data.result); - - return is_hpd_high; + /* Query dpia hpd status from dmub */ + if (dc_wake_and_execute_dmub_cmd(dmub_srv->ctx, &cmd, + DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) && + cmd.query_hpd.data.status == AUX_RET_SUCCESS) { + DC_LOG_DEBUG("%s: for link(%d) dpia(%d) success, current_hpd_status(%d) new_hpd_status(%d)\n", + __func__, + link->link_index, + link->link_id.enum_id - ENUM_ID_1, + link->hpd_status, + cmd.query_hpd.data.result); + link->hpd_status = cmd.query_hpd.data.result; + } else { + DC_LOG_ERROR("%s: for link(%d) dpia(%d) failed with status(%d), current_hpd_status(%d) new_hpd_status(0)\n", + __func__, + link->link_index, + link->link_id.enum_id - ENUM_ID_1, + cmd.query_hpd.data.status, + link->hpd_status); + link->hpd_status = false; + } + + return link->hpd_status; } -- cgit v1.2.3 From d32156a07575d69916944ce0e2d4a71a4c95979d Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Sun, 17 Dec 2023 19:36:16 -0500 Subject: drm/amd/display: 3.2.266 This version brings along following fixes: - Improve z8/z10 support. - Revert some of the VRR optimization. - Improve usb4 when using MST. Acked-by: Rodrigo Siqueira Signed-off-by: Aric Cyr Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 7222f63caf28..5d7aa882416b 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -51,7 +51,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.265" +#define DC_VER "3.2.266" #define MAX_SURFACES 3 #define MAX_PLANES 6 -- cgit v1.2.3 From 90bd01471d1c7f2d2db3c69259e247357991fe50 Mon Sep 17 00:00:00 2001 From: Candice Li Date: Thu, 4 Jan 2024 09:34:25 +0800 Subject: drm/amdgpu: Drop unnecessary sentences about CE and deferred error. Remove "no user action is needed" for correctable and deferred error to avoid confusion. Signed-off-by: Candice Li Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 14 +++++--------- drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 3 +-- drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c | 3 +-- drivers/gpu/drm/amd/amdgpu/umc_v6_7.c | 2 +- 4 files changed, 8 insertions(+), 14 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index fc42fb6ee191..9a64584318e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1067,8 +1067,7 @@ static void amdgpu_ras_error_print_error_data(struct amdgpu_device *adev, mcm_info = &err_info->mcm_info; if (err_info->ce_count) { dev_info(adev->dev, "socket: %d, die: %d, " - "%lld new correctable hardware errors detected in %s block, " - "no user action is needed\n", + "%lld new correctable hardware errors detected in %s block\n", mcm_info->socket_id, mcm_info->die_id, err_info->ce_count, @@ -1080,8 +1079,7 @@ static void amdgpu_ras_error_print_error_data(struct amdgpu_device *adev, err_info = &err_node->err_info; mcm_info = &err_info->mcm_info; dev_info(adev->dev, "socket: %d, die: %d, " - "%lld correctable hardware errors detected in total in %s block, " - "no user action is needed\n", + "%lld correctable hardware errors detected in total in %s block\n", mcm_info->socket_id, mcm_info->die_id, err_info->ce_count, blk_name); } } @@ -1108,16 +1106,14 @@ static void amdgpu_ras_error_generate_report(struct amdgpu_device *adev, adev->smuio.funcs->get_die_id) { dev_info(adev->dev, "socket: %d, die: %d " "%ld correctable hardware errors " - "detected in %s block, no user " - "action is needed.\n", + "detected in %s block\n", adev->smuio.funcs->get_socket_id(adev), adev->smuio.funcs->get_die_id(adev), ras_mgr->err_data.ce_count, blk_name); } else { dev_info(adev->dev, "%ld correctable hardware errors " - "detected in %s block, no user " - "action is needed.\n", + "detected in %s block\n", ras_mgr->err_data.ce_count, blk_name); } @@ -1920,7 +1916,7 @@ static void amdgpu_ras_interrupt_poison_creation_handler(struct ras_manager *obj struct amdgpu_iv_entry *entry) { dev_info(obj->adev->dev, - "Poison is created, no user action is needed.\n"); + "Poison is created\n"); } static void amdgpu_ras_interrupt_umc_handler(struct ras_manager *obj, diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index 6d24c84924cb..19986ff6a48d 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -401,8 +401,7 @@ static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device if (err_data.ce_count) dev_info(adev->dev, "%ld correctable hardware " - "errors detected in %s block, " - "no user action is needed.\n", + "errors detected in %s block\n", obj->err_data.ce_count, get_ras_block_str(adev->nbio.ras_if)); diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c index 25a3da83e0fb..e90f33780803 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c @@ -597,8 +597,7 @@ static void nbio_v7_9_handle_ras_controller_intr_no_bifring(struct amdgpu_device if (err_data.ce_count) dev_info(adev->dev, "%ld correctable hardware " - "errors detected in %s block, " - "no user action is needed.\n", + "errors detected in %s block\n", obj->err_data.ce_count, get_ras_block_str(adev->nbio.ras_if)); diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c index 530549314ce4..a3ee3c4c650f 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c @@ -64,7 +64,7 @@ static void umc_v6_7_query_error_status_helper(struct amdgpu_device *adev, uint64_t reg_value; if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1) - dev_info(adev->dev, "Deferred error, no user action is needed.\n"); + dev_info(adev->dev, "Deferred error\n"); if (mc_umc_status) dev_info(adev->dev, "MCA STATUS 0x%llx, umc_reg_offset 0x%x\n", mc_umc_status, umc_reg_offset); -- cgit v1.2.3 From f4a94dbb6dc0bed10a5fc63718d00f1de45b12c0 Mon Sep 17 00:00:00 2001 From: Likun Gao Date: Fri, 5 Jan 2024 17:33:34 +0800 Subject: drm/amdgpu: correct the cu count for gfx v11 Correct the algorithm of active CU to skip disabled sa for gfx v11. Signed-off-by: Likun Gao Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 2fbcd9765980..c7242877d5d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -6383,6 +6383,9 @@ static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { + bitmap = i * adev->gfx.config.max_sh_per_se + j; + if (!((gfx_v11_0_get_sa_active_bitmap(adev) >> bitmap) & 1)) + continue; mask = 1; counter = 0; gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff, 0); -- cgit v1.2.3 From fb1e91719983c529f85602fdd08c0b7dbf384b1c Mon Sep 17 00:00:00 2001 From: Candice Li Date: Thu, 4 Jan 2024 10:27:10 +0800 Subject: drm/amdgpu: Support poison error injection via ras_ctrl debugfs Support poison error injection. Signed-off-by: Candice Li Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 9a64584318e0..3f7f5c12961d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -305,11 +305,13 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, return -EINVAL; data->head.block = block_id; - /* only ue and ce errors are supported */ + /* only ue, ce and poison errors are supported */ if (!memcmp("ue", err, 2)) data->head.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; else if (!memcmp("ce", err, 2)) data->head.type = AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE; + else if (!memcmp("poison", err, 6)) + data->head.type = AMDGPU_RAS_ERROR__POISON; else return -EINVAL; @@ -431,9 +433,10 @@ static void amdgpu_ras_instance_mask_check(struct amdgpu_device *adev, * The block is one of: umc, sdma, gfx, etc. * see ras_block_string[] for details * - * The error type is one of: ue, ce, where, + * The error type is one of: ue, ce and poison where, * ue is multi-uncorrectable * ce is single-correctable + * poison is poison * * The sub-block is a the sub-block index, pass 0 if there is no sub-block. * The address and value are hexadecimal numbers, leading 0x is optional. -- cgit v1.2.3 From 73cb81dc548f154547d9205d5b9603ba10e2a402 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Fri, 29 Dec 2023 14:54:35 +0800 Subject: drm/amdgpu: Packed socket_id to ras feature mask Initialize RAS feature mask bit[31:29] with socket_id. Signed-off-by: Hawking Zhang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 3f7f5c12961d..31823a30dea2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2919,6 +2919,11 @@ int amdgpu_ras_init(struct amdgpu_device *adev) amdgpu_ras_query_poison_mode(adev); + /* Packed socket_id to ras feature mask bits[31:29] */ + if (adev->smuio.funcs && + adev->smuio.funcs->get_socket_id) + con->features |= ((adev->smuio.funcs->get_socket_id(adev)) << 29); + /* Get RAS schema for particular SOC */ con->schema = amdgpu_get_ras_schema(adev); -- cgit v1.2.3 From 2a9de42e8d3c82c6990d226198602be44f43f340 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Fri, 29 Dec 2023 15:19:25 -0500 Subject: drm/amdkfd: Fix lock dependency warning with srcu ====================================================== WARNING: possible circular locking dependency detected 6.5.0-kfd-yangp #2289 Not tainted ------------------------------------------------------ kworker/0:2/996 is trying to acquire lock: (srcu){.+.+}-{0:0}, at: __synchronize_srcu+0x5/0x1a0 but task is already holding lock: ((work_completion)(&svms->deferred_list_work)){+.+.}-{0:0}, at: process_one_work+0x211/0x560 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #3 ((work_completion)(&svms->deferred_list_work)){+.+.}-{0:0}: __flush_work+0x88/0x4f0 svm_range_list_lock_and_flush_work+0x3d/0x110 [amdgpu] svm_range_set_attr+0xd6/0x14c0 [amdgpu] kfd_ioctl+0x1d1/0x630 [amdgpu] __x64_sys_ioctl+0x88/0xc0 -> #2 (&info->lock#2){+.+.}-{3:3}: __mutex_lock+0x99/0xc70 amdgpu_amdkfd_gpuvm_restore_process_bos+0x54/0x740 [amdgpu] restore_process_helper+0x22/0x80 [amdgpu] restore_process_worker+0x2d/0xa0 [amdgpu] process_one_work+0x29b/0x560 worker_thread+0x3d/0x3d0 -> #1 ((work_completion)(&(&process->restore_work)->work)){+.+.}-{0:0}: __flush_work+0x88/0x4f0 __cancel_work_timer+0x12c/0x1c0 kfd_process_notifier_release_internal+0x37/0x1f0 [amdgpu] __mmu_notifier_release+0xad/0x240 exit_mmap+0x6a/0x3a0 mmput+0x6a/0x120 do_exit+0x322/0xb90 do_group_exit+0x37/0xa0 __x64_sys_exit_group+0x18/0x20 do_syscall_64+0x38/0x80 -> #0 (srcu){.+.+}-{0:0}: __lock_acquire+0x1521/0x2510 lock_sync+0x5f/0x90 __synchronize_srcu+0x4f/0x1a0 __mmu_notifier_release+0x128/0x240 exit_mmap+0x6a/0x3a0 mmput+0x6a/0x120 svm_range_deferred_list_work+0x19f/0x350 [amdgpu] process_one_work+0x29b/0x560 worker_thread+0x3d/0x3d0 other info that might help us debug this: Chain exists of: srcu --> &info->lock#2 --> (work_completion)(&svms->deferred_list_work) Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock((work_completion)(&svms->deferred_list_work)); lock(&info->lock#2); lock((work_completion)(&svms->deferred_list_work)); sync(srcu); Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index d46c145835e0..4e9f07c7a937 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -2340,8 +2340,10 @@ retry: mutex_unlock(&svms->lock); mmap_write_unlock(mm); - /* Pairs with mmget in svm_range_add_list_work */ - mmput(mm); + /* Pairs with mmget in svm_range_add_list_work. If dropping the + * last mm refcount, schedule release work to avoid circular locking + */ + mmput_async(mm); spin_lock(&svms->deferred_list_lock); } -- cgit v1.2.3 From c147ddc68e741aed78bba796effe049344d87ab8 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 3 Jan 2024 18:13:04 -0500 Subject: drm/amdkfd: Fix sparse __rcu annotation warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Properly mark kfd_process->ef as __rcu and consistently use the right accessor functions. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202312052245.yFpBSgNH-lkp@intel.com/ Signed-off-by: Felix Kuehling Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 4 ++-- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 7 +++++-- 4 files changed, 9 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index cf6ed5fce291..f262b9d89541 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -311,7 +311,7 @@ void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem); int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo); int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info, - struct dma_fence **ef); + struct dma_fence __rcu **ef); int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev, struct kfd_vm_fault_info *info); int amdgpu_amdkfd_gpuvm_import_dmabuf_fd(struct amdgpu_device *adev, int fd, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index d17b2452cb1f..f183d7faeeec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -2802,7 +2802,7 @@ unlock_out: put_task_struct(usertask); } -static void replace_eviction_fence(struct dma_fence **ef, +static void replace_eviction_fence(struct dma_fence __rcu **ef, struct dma_fence *new_ef) { struct dma_fence *old_ef = rcu_replace_pointer(*ef, new_ef, true @@ -2837,7 +2837,7 @@ static void replace_eviction_fence(struct dma_fence **ef, * 7. Add fence to all PD and PT BOs. * 8. Unreserve all BOs */ -int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) +int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu **ef) { struct amdkfd_process_info *process_info = info; struct amdgpu_vm *peer_vm; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 745024b31340..17fbedbf3651 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -917,7 +917,7 @@ struct kfd_process { * fence will be triggered during eviction and new one will be created * during restore */ - struct dma_fence *ef; + struct dma_fence __rcu *ef; /* Work items for evicting and restoring BOs */ struct delayed_work eviction_work; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 71df51fcc1b0..717a60d7a4ea 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1110,6 +1110,7 @@ static void kfd_process_wq_release(struct work_struct *work) { struct kfd_process *p = container_of(work, struct kfd_process, release_work); + struct dma_fence *ef; kfd_process_dequeue_from_all_devices(p); pqm_uninit(&p->pqm); @@ -1118,7 +1119,9 @@ static void kfd_process_wq_release(struct work_struct *work) * destroyed. This allows any BOs to be freed without * triggering pointless evictions or waiting for fences. */ - dma_fence_signal(p->ef); + synchronize_rcu(); + ef = rcu_access_pointer(p->ef); + dma_fence_signal(ef); kfd_process_remove_sysfs(p); @@ -1127,7 +1130,7 @@ static void kfd_process_wq_release(struct work_struct *work) svm_range_list_fini(p); kfd_process_destroy_pdds(p); - dma_fence_put(p->ef); + dma_fence_put(ef); kfd_event_free_process(p); -- cgit v1.2.3 From c2ab9ce0ee7225fc05f58a6671c43b8a3684f530 Mon Sep 17 00:00:00 2001 From: Ivan Lipski Date: Fri, 5 Jan 2024 19:40:50 -0500 Subject: Revert "drm/amd/display: fix bandwidth validation failure on DCN 2.1" This commit causes dmesg-warn on several IGT tests on DCN 3.1.6: *ERROR* link_enc_cfg_validate: Invalid link encoder assignments - 0x1c Affected IGT tests include: - amdgpu/[amd_assr|amd_plane|amd_hotplug] - kms_atomic - kms_color - kms_flip - kms_properties - kms_universal_plane and some other tests This reverts commit 3a0fa3bc245ef92838a8296e0055569b8dff94c4. Cc: Melissa Wen Cc: Hamza Mahfooz Reviewed-by: Rodrigo Siqueira Signed-off-by: Ivan Lipski Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index f6575d7dee97..10b2a896c498 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -10753,7 +10753,7 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, DRM_DEBUG_DRIVER("drm_dp_mst_atomic_check() failed\n"); goto fail; } - status = dc_validate_global_state(dc, dm_state->context, false); + status = dc_validate_global_state(dc, dm_state->context, true); if (status != DC_OK) { DRM_DEBUG_DRIVER("DC global validation failure: %s (%d)", dc_status_to_str(status), status); -- cgit v1.2.3 From 50e60184bfe72400c49f7806af97edaf693ecd45 Mon Sep 17 00:00:00 2001 From: James Zhu Date: Tue, 2 Jan 2024 15:53:01 -0500 Subject: drm/amdgpu: make a correction on comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use a generic comment for AMDGPU_VM_RESERVED_VRAM size. Signed-off-by: James Zhu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index b6cd565562ad..4740dd65b99d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -116,7 +116,7 @@ struct amdgpu_mem_stats; #define AMDGPU_VM_FAULT_STOP_FIRST 1 #define AMDGPU_VM_FAULT_STOP_ALWAYS 2 -/* Reserve 4MB VRAM for page tables */ +/* How much VRAM be reserved for page tables */ #define AMDGPU_VM_RESERVED_VRAM (8ULL << 20) /* -- cgit v1.2.3 From 7075893d1d68b2b3517be250a02d86e76554ed22 Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Fri, 5 Jan 2024 21:02:09 -0100 Subject: drm/amd/display: cleanup inconsistent indenting in amdgpu_dm_color smatch warnings: amdgpu_dm_update_plane_color_mgmt() warn: inconsistent indenting Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202401051643.PPdbmG1U-lkp@intel.com/ Signed-off-by: Melissa Wen Reviewed-by: Rodrigo Siqueira Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 9b527bffe11a..c87b64e464ed 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -1239,7 +1239,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, if (has_crtc_cm_degamma && ret != -EINVAL) { drm_dbg_kms(crtc->base.crtc->dev, "doesn't support plane and CRTC degamma at the same time\n"); - return -EINVAL; + return -EINVAL; } /* If we are here, it means we don't have plane degamma settings, check -- cgit v1.2.3 From a4a9779d7642111b4fb6e7415aae9da9783850bd Mon Sep 17 00:00:00 2001 From: Mika Kahola Date: Tue, 2 Jan 2024 13:57:39 +0200 Subject: drm/i915/display: Fix C20 pll selection for state verification Add pll selection check for C20 as well as clock state verification0. We have been relying on sw state to select A or B pll's. This is incorrect as the hw might see this selection differently. This patch fixes this shortcoming by reading pll selection for both sw and hw states and compares if these two selections match. Fixes: 59be90248b42 ("drm/i915/mtl: C20 state verification") v2: reword commit message and include fix to a original commit (Imre) Compare pll selection (Jani) Signed-off-by: Mika Kahola Reviewed-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20240102115741.118525-2-mika.kahola@intel.com (cherry picked from commit f4304beadd88d074333b23fdc7f35d00ee763e14) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_cx0_phy.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy.c b/drivers/gpu/drm/i915/display/intel_cx0_phy.c index 884a1da36089..6b25e195232f 100644 --- a/drivers/gpu/drm/i915/display/intel_cx0_phy.c +++ b/drivers/gpu/drm/i915/display/intel_cx0_phy.c @@ -3067,24 +3067,29 @@ static void intel_c20pll_state_verify(const struct intel_crtc_state *state, { struct drm_i915_private *i915 = to_i915(crtc->base.dev); const struct intel_c20pll_state *mpll_sw_state = &state->cx0pll_state.c20; - bool use_mplla; + bool sw_use_mpllb = mpll_sw_state->tx[0] & C20_PHY_USE_MPLLB; + bool hw_use_mpllb = mpll_hw_state->tx[0] & C20_PHY_USE_MPLLB; int i; - use_mplla = intel_c20_use_mplla(mpll_hw_state->clock); - if (use_mplla) { - for (i = 0; i < ARRAY_SIZE(mpll_sw_state->mplla); i++) { - I915_STATE_WARN(i915, mpll_hw_state->mplla[i] != mpll_sw_state->mplla[i], - "[CRTC:%d:%s] mismatch in C20MPLLA: Register[%d] (expected 0x%04x, found 0x%04x)", - crtc->base.base.id, crtc->base.name, i, - mpll_sw_state->mplla[i], mpll_hw_state->mplla[i]); - } - } else { + I915_STATE_WARN(i915, sw_use_mpllb != hw_use_mpllb, + "[CRTC:%d:%s] mismatch in C20: Register MPLLB selection (expected %d, found %d)", + crtc->base.base.id, crtc->base.name, + sw_use_mpllb, hw_use_mpllb); + + if (hw_use_mpllb) { for (i = 0; i < ARRAY_SIZE(mpll_sw_state->mpllb); i++) { I915_STATE_WARN(i915, mpll_hw_state->mpllb[i] != mpll_sw_state->mpllb[i], "[CRTC:%d:%s] mismatch in C20MPLLB: Register[%d] (expected 0x%04x, found 0x%04x)", crtc->base.base.id, crtc->base.name, i, mpll_sw_state->mpllb[i], mpll_hw_state->mpllb[i]); } + } else { + for (i = 0; i < ARRAY_SIZE(mpll_sw_state->mplla); i++) { + I915_STATE_WARN(i915, mpll_hw_state->mplla[i] != mpll_sw_state->mplla[i], + "[CRTC:%d:%s] mismatch in C20MPLLA: Register[%d] (expected 0x%04x, found 0x%04x)", + crtc->base.base.id, crtc->base.name, i, + mpll_sw_state->mplla[i], mpll_hw_state->mplla[i]); + } } for (i = 0; i < ARRAY_SIZE(mpll_sw_state->tx); i++) { -- cgit v1.2.3 From ae8986e681e9c26fb6c140ae1ed41e6d74d38fc4 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 3 Jan 2024 17:26:09 +0200 Subject: drm/i915/dp: Fix the PSR debugfs entries wrt. MST connectors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MST connectors don't have a static attached encoder, as their encoder can change depending on the pipe they use; so the encoder for an MST connector can't be retrieved using intel_dp_attached_encoder() (which may return NULL for MST). Most of the PSR debugfs entries depend on a static connector -> encoder mapping which is only true for eDP and SST DP connectors and not for MST. These debugfs entries were enabled for MST connectors as well recently to provide PR information for them, but handling MST connectors needs more changes. Fix this by not adding for now the PSR entries on MST connectors. To make things more uniform add the entries for SST connectors on all platforms, not just on platforms supporting DP2.0. v2: - Keep adding the entries for SST connectors. (Jouni) - Add a TODO: comment for MST support. Fixes: ef75c25e8fed ("drm/i915/panelreplay: Debugfs support for panel replay") Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/9850 Cc: Animesh Manna Cc: Jouni Högander Reviewed-by: Jouni Högander Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20240103152609.2434100-1-imre.deak@intel.com (cherry picked from commit 9b0b61c5bc08e1aa55a0c1e7cda28f952b2d02cc) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_psr.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index b6e2e70e1290..8f702c3fc62d 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -3319,11 +3319,11 @@ void intel_psr_connector_debugfs_add(struct intel_connector *connector) struct drm_i915_private *i915 = to_i915(connector->base.dev); struct dentry *root = connector->base.debugfs_entry; - if (connector->base.connector_type != DRM_MODE_CONNECTOR_eDP) { - if (!(HAS_DP20(i915) && - connector->base.connector_type == DRM_MODE_CONNECTOR_DisplayPort)) - return; - } + /* TODO: Add support for MST connectors as well. */ + if ((connector->base.connector_type != DRM_MODE_CONNECTOR_eDP && + connector->base.connector_type != DRM_MODE_CONNECTOR_DisplayPort) || + connector->mst_port) + return; debugfs_create_file("i915_psr_sink_status", 0444, root, connector, &i915_psr_sink_status_fops); -- cgit v1.2.3 From 11809687954ab2a073ec5a4bafd8281a42ff407a Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 4 Jan 2024 18:46:00 +0200 Subject: drm/i915: don't make assumptions about intel_wakeref_t type intel_wakeref_t is supposed to be a mostly opaque cookie to its users. It should only be checked for being non-zero and set to zero. Debug logging its actual value is meaningless. Switch to just debug logging whether the async_put_wakeref is non-zero. The issue dates back to much earlier than commit b49e894c3fd8 ("drm/i915: Replace custom intel runtime_pm tracker with ref_tracker library"), but this is the one that brought about a build failure due to the printf format. Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/r/20240102111222.2db11208@canb.auug.org.au Fixes: b49e894c3fd8 ("drm/i915: Replace custom intel runtime_pm tracker with ref_tracker library") Cc: Andrzej Hajda Cc: Imre Deak Signed-off-by: Jani Nikula Reviewed-by: Imre Deak Reviewed-by: Andrzej Hajda Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20240104164600.783371-1-jani.nikula@intel.com (cherry picked from commit de06b42edc5bf05aefbb7e2f59475d6022ed57e1) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_display_power.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 5f091502719b..6fd4fa52253a 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -405,8 +405,8 @@ print_async_put_domains_state(struct i915_power_domains *power_domains) struct drm_i915_private, display.power.domains); - drm_dbg(&i915->drm, "async_put_wakeref %lu\n", - power_domains->async_put_wakeref); + drm_dbg(&i915->drm, "async_put_wakeref: %s\n", + str_yes_no(power_domains->async_put_wakeref)); print_power_domains(power_domains, "async_put_domains[0]", &power_domains->async_put_domains[0]); -- cgit v1.2.3 From 584ebbefd12296c6bad009c8a0c9e610eb8283c8 Mon Sep 17 00:00:00 2001 From: Ankit Nautiyal Date: Wed, 13 Dec 2023 14:46:29 +0530 Subject: drm/i915/dp: Fix the max DSC bpc supported by source Use correct helper for getting max DSC bpc supported by the source. Fixes: 1c56e9a39833 ("drm/i915/dp: Get optimal link config to have best compressed bpp") Cc: Ankit Nautiyal Cc: Stanislav Lisovskiy Cc: Jani Nikula Signed-off-by: Ankit Nautiyal Reviewed-by: Swati Sharma Link: https://patchwork.freedesktop.org/patch/msgid/20231213091632.431557-3-ankit.k.nautiyal@intel.com (cherry picked from commit cd7b0b2dd3d9fecc6057c07b40e8087db2f9f71a) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_dp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 3b2482bf683f..c3b906ebe542 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -2101,7 +2101,7 @@ static int intel_dp_dsc_compute_pipe_bpp(struct intel_dp *intel_dp, } } - dsc_max_bpc = intel_dp_dsc_min_src_input_bpc(i915); + dsc_max_bpc = intel_dp_dsc_max_src_input_bpc(i915); if (!dsc_max_bpc) return -EINVAL; -- cgit v1.2.3 From 30e18a89fb1f84718a174bc02807bd9a590e2bd0 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 26 Dec 2023 11:54:29 -0800 Subject: drm/i915/gem: reconcile Excess struct member kernel-doc warnings Document nested struct members with full names as described in Documentation/doc-guide/kernel-doc.rst. i915_gem_context_types.h:420: warning: Excess struct member 'lock' description in 'i915_gem_context' Signed-off-by: Randy Dunlap Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: Tvrtko Ursulin Cc: intel-gfx@lists.freedesktop.org Cc: Jonathan Corbet Cc: dri-devel@lists.freedesktop.org Reviewed-by: Rodrigo Vivi Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20231226195432.10891-1-rdunlap@infradead.org (cherry picked from commit 7353c3d7c15017140a8b984e41f94be0bf535e73) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gem/i915_gem_context_types.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index c573c067779f..03bc7f9d191b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -412,9 +412,9 @@ struct i915_gem_context { /** @stale: tracks stale engines to be destroyed */ struct { - /** @lock: guards engines */ + /** @stale.lock: guards engines */ spinlock_t lock; - /** @engines: list of stale engines */ + /** @stale.engines: list of stale engines */ struct list_head engines; } stale; }; -- cgit v1.2.3 From 53cd65a9c95109eef402db0ed7822b7c9a8ad732 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 28 Dec 2023 15:49:46 -0800 Subject: drm/i915/gt: reconcile Excess struct member kernel-doc warnings Document nested struct members with full names as described in Documentation/doc-guide/kernel-doc.rst. intel_gsc.h:34: warning: Excess struct member 'gem_obj' description in 'intel_gsc' Also add missing field member descriptions. Signed-off-by: Randy Dunlap Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: Tvrtko Ursulin Cc: intel-gfx@lists.freedesktop.org Cc: Jonathan Corbet Cc: dri-devel@lists.freedesktop.org Reviewed-by: Rodrigo Vivi Cc: Andi Shyti Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20231228234946.12405-1-rdunlap@infradead.org (cherry picked from commit cd1d91115ff1929ec346d85f512ef260ddf8131e) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_gsc.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/intel_gsc.h b/drivers/gpu/drm/i915/gt/intel_gsc.h index 7ab3ca0f9f26..013c64251448 100644 --- a/drivers/gpu/drm/i915/gt/intel_gsc.h +++ b/drivers/gpu/drm/i915/gt/intel_gsc.h @@ -21,8 +21,11 @@ struct mei_aux_device; /** * struct intel_gsc - graphics security controller * - * @gem_obj: scratch memory GSC operations - * @intf : gsc interface + * @intf: gsc interface + * @intf.adev: MEI aux. device for this @intf + * @intf.gem_obj: scratch memory GSC operations + * @intf.irq: IRQ for this device (%-1 for no IRQ) + * @intf.id: this interface's id number/index */ struct intel_gsc { struct intel_gsc_intf { -- cgit v1.2.3 From af3cfcad492f2ffbef5de36c8ee1e8f8a701938f Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 26 Dec 2023 11:54:31 -0800 Subject: drm/i915/guc: reconcile Excess struct member kernel-doc warnings Document nested struct members with full names as described in Documentation/doc-guide/kernel-doc.rst. intel_guc.h:305: warning: Excess struct member 'lock' description in 'intel_guc' intel_guc.h:305: warning: Excess struct member 'guc_ids' description in 'intel_guc' intel_guc.h:305: warning: Excess struct member 'num_guc_ids' description in 'intel_guc' intel_guc.h:305: warning: Excess struct member 'guc_ids_bitmap' description in 'intel_guc' intel_guc.h:305: warning: Excess struct member 'guc_id_list' description in 'intel_guc' intel_guc.h:305: warning: Excess struct member 'guc_ids_in_use' description in 'intel_guc' intel_guc.h:305: warning: Excess struct member 'destroyed_contexts' description in 'intel_guc' intel_guc.h:305: warning: Excess struct member 'destroyed_worker' description in 'intel_guc' intel_guc.h:305: warning: Excess struct member 'reset_fail_worker' description in 'intel_guc' intel_guc.h:305: warning: Excess struct member 'reset_fail_mask' description in 'intel_guc' intel_guc.h:305: warning: Excess struct member 'sched_disable_delay_ms' description in 'intel_guc' intel_guc.h:305: warning: Excess struct member 'sched_disable_gucid_threshold' description in 'intel_guc' intel_guc.h:305: warning: Excess struct member 'lock' description in 'intel_guc' intel_guc.h:305: warning: Excess struct member 'gt_stamp' description in 'intel_guc' intel_guc.h:305: warning: Excess struct member 'ping_delay' description in 'intel_guc' intel_guc.h:305: warning: Excess struct member 'work' description in 'intel_guc' intel_guc.h:305: warning: Excess struct member 'shift' description in 'intel_guc' intel_guc.h:305: warning: Excess struct member 'last_stat_jiffies' description in 'intel_guc' 18 warnings as Errors Signed-off-by: Randy Dunlap Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: Tvrtko Ursulin Cc: intel-gfx@lists.freedesktop.org Cc: Jonathan Corbet Cc: dri-devel@lists.freedesktop.org Reviewed-by: Rodrigo Vivi Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20231226195432.10891-3-rdunlap@infradead.org (cherry picked from commit e4cf1a70fad3e2107503e99cfe9cc0c9cba19dad) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/uc/intel_guc.h | 75 +++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 33 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index e22c12ce245a..813cc888e6fa 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -105,61 +105,67 @@ struct intel_guc { */ struct { /** - * @lock: protects everything in submission_state, - * ce->guc_id.id, and ce->guc_id.ref when transitioning in and - * out of zero + * @submission_state.lock: protects everything in + * submission_state, ce->guc_id.id, and ce->guc_id.ref + * when transitioning in and out of zero */ spinlock_t lock; /** - * @guc_ids: used to allocate new guc_ids, single-lrc + * @submission_state.guc_ids: used to allocate new + * guc_ids, single-lrc */ struct ida guc_ids; /** - * @num_guc_ids: Number of guc_ids, selftest feature to be able - * to reduce this number while testing. + * @submission_state.num_guc_ids: Number of guc_ids, selftest + * feature to be able to reduce this number while testing. */ int num_guc_ids; /** - * @guc_ids_bitmap: used to allocate new guc_ids, multi-lrc + * @submission_state.guc_ids_bitmap: used to allocate + * new guc_ids, multi-lrc */ unsigned long *guc_ids_bitmap; /** - * @guc_id_list: list of intel_context with valid guc_ids but no - * refs + * @submission_state.guc_id_list: list of intel_context + * with valid guc_ids but no refs */ struct list_head guc_id_list; /** - * @guc_ids_in_use: Number single-lrc guc_ids in use + * @submission_state.guc_ids_in_use: Number single-lrc + * guc_ids in use */ unsigned int guc_ids_in_use; /** - * @destroyed_contexts: list of contexts waiting to be destroyed - * (deregistered with the GuC) + * @submission_state.destroyed_contexts: list of contexts + * waiting to be destroyed (deregistered with the GuC) */ struct list_head destroyed_contexts; /** - * @destroyed_worker: worker to deregister contexts, need as we - * need to take a GT PM reference and can't from destroy - * function as it might be in an atomic context (no sleeping) + * @submission_state.destroyed_worker: worker to deregister + * contexts, need as we need to take a GT PM reference and + * can't from destroy function as it might be in an atomic + * context (no sleeping) */ struct work_struct destroyed_worker; /** - * @reset_fail_worker: worker to trigger a GT reset after an - * engine reset fails + * @submission_state.reset_fail_worker: worker to trigger + * a GT reset after an engine reset fails */ struct work_struct reset_fail_worker; /** - * @reset_fail_mask: mask of engines that failed to reset + * @submission_state.reset_fail_mask: mask of engines that + * failed to reset */ intel_engine_mask_t reset_fail_mask; /** - * @sched_disable_delay_ms: schedule disable delay, in ms, for - * contexts + * @submission_state.sched_disable_delay_ms: schedule + * disable delay, in ms, for contexts */ unsigned int sched_disable_delay_ms; /** - * @sched_disable_gucid_threshold: threshold of min remaining available - * guc_ids before we start bypassing the schedule disable delay + * @submission_state.sched_disable_gucid_threshold: + * threshold of min remaining available guc_ids before + * we start bypassing the schedule disable delay */ unsigned int sched_disable_gucid_threshold; } submission_state; @@ -243,37 +249,40 @@ struct intel_guc { */ struct { /** - * @lock: Lock protecting the below fields and the engine stats. + * @timestamp.lock: Lock protecting the below fields and + * the engine stats. */ spinlock_t lock; /** - * @gt_stamp: 64 bit extended value of the GT timestamp. + * @timestamp.gt_stamp: 64-bit extended value of the GT + * timestamp. */ u64 gt_stamp; /** - * @ping_delay: Period for polling the GT timestamp for - * overflow. + * @timestamp.ping_delay: Period for polling the GT + * timestamp for overflow. */ unsigned long ping_delay; /** - * @work: Periodic work to adjust GT timestamp, engine and - * context usage for overflows. + * @timestamp.work: Periodic work to adjust GT timestamp, + * engine and context usage for overflows. */ struct delayed_work work; /** - * @shift: Right shift value for the gpm timestamp + * @timestamp.shift: Right shift value for the gpm timestamp */ u32 shift; /** - * @last_stat_jiffies: jiffies at last actual stats collection time - * We use this timestamp to ensure we don't oversample the - * stats because runtime power management events can trigger - * stats collection at much higher rates than required. + * @timestamp.last_stat_jiffies: jiffies at last actual + * stats collection time. We use this timestamp to ensure + * we don't oversample the stats because runtime power + * management events can trigger stats collection at much + * higher rates than required. */ unsigned long last_stat_jiffies; } timestamp; -- cgit v1.2.3 From d505a16e00c35919fd9fe5735894645e0f70a415 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 26 Dec 2023 11:54:32 -0800 Subject: drm/i915/perf: reconcile Excess struct member kernel-doc warnings Document nested struct members with full names as described in Documentation/doc-guide/kernel-doc.rst. i915_perf_types.h:341: warning: Excess struct member 'ptr_lock' description in 'i915_perf_stream' i915_perf_types.h:341: warning: Excess struct member 'head' description in 'i915_perf_stream' i915_perf_types.h:341: warning: Excess struct member 'tail' description in 'i915_perf_stream' 3 warnings as Errors Signed-off-by: Randy Dunlap Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: Tvrtko Ursulin Cc: intel-gfx@lists.freedesktop.org Cc: Jonathan Corbet Cc: dri-devel@lists.freedesktop.org Reviewed-by: Rodrigo Vivi Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20231226195432.10891-4-rdunlap@infradead.org (cherry picked from commit aa253baca534357e033bd29b074ce1eade2a9362) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_perf_types.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h index 13b1ae9b96c7..46445248d193 100644 --- a/drivers/gpu/drm/i915/i915_perf_types.h +++ b/drivers/gpu/drm/i915/i915_perf_types.h @@ -291,7 +291,8 @@ struct i915_perf_stream { int size_exponent; /** - * @ptr_lock: Locks reads and writes to all head/tail state + * @oa_buffer.ptr_lock: Locks reads and writes to all + * head/tail state * * Consider: the head and tail pointer state needs to be read * consistently from a hrtimer callback (atomic context) and @@ -313,7 +314,8 @@ struct i915_perf_stream { spinlock_t ptr_lock; /** - * @head: Although we can always read back the head pointer register, + * @oa_buffer.head: Although we can always read back + * the head pointer register, * we prefer to avoid trusting the HW state, just to avoid any * risk that some hardware condition could * somehow bump the * head pointer unpredictably and cause us to forward the wrong @@ -322,7 +324,8 @@ struct i915_perf_stream { u32 head; /** - * @tail: The last verified tail that can be read by userspace. + * @oa_buffer.tail: The last verified tail that can be + * read by userspace. */ u32 tail; } oa_buffer; -- cgit v1.2.3 From 778e73d2411abc8f3a2d60dbf038acaec218792e Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Wed, 3 Jan 2024 11:15:09 +0100 Subject: drm/hyperv: Remove firmware framebuffers with aperture helper Replace use of screen_info state with the correct interface from the aperture helpers. The state is only for architecture and firmware code. It is not guaranteed to contain valid data. Drivers are thus not allowed to use it. For removing conflicting firmware framebuffers, there are aperture helpers. Hence replace screen_info with the correct function that will remove conflicting framebuffers for the hyperv-drm driver. Also move the call to the correct place within the driver. Signed-off-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Signed-off-by: Helge Deller --- drivers/gpu/drm/hyperv/hyperv_drm_drv.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c b/drivers/gpu/drm/hyperv/hyperv_drm_drv.c index d511d17c5bdf..cff85086f2d6 100644 --- a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c +++ b/drivers/gpu/drm/hyperv/hyperv_drm_drv.c @@ -7,7 +7,6 @@ #include #include #include -#include #include #include @@ -73,11 +72,6 @@ static int hyperv_setup_vram(struct hyperv_drm_device *hv, struct drm_device *dev = &hv->dev; int ret; - if (IS_ENABLED(CONFIG_SYSFB)) - drm_aperture_remove_conflicting_framebuffers(screen_info.lfb_base, - screen_info.lfb_size, - &hyperv_driver); - hv->fb_size = (unsigned long)hv->mmio_megabytes * 1024 * 1024; ret = vmbus_allocate_mmio(&hv->mem, hdev, 0, -1, hv->fb_size, 0x100000, @@ -130,6 +124,8 @@ static int hyperv_vmbus_probe(struct hv_device *hdev, goto err_hv_set_drv_data; } + drm_aperture_remove_framebuffers(&hyperv_driver); + ret = hyperv_setup_vram(hv, hdev); if (ret) goto err_vmbus_close; -- cgit v1.2.3 From 205e18c13545ab43cc4fe4930732b4feef551198 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 10 Jan 2024 11:14:05 +1000 Subject: nouveau/gsp: handle engines in runl without nonstall interrupts. It appears on TU106 GPUs (2070), that some of the nvdec engines are in the runlist but have no valid nonstall interrupt, nouveau didn't handle that too well. This should let nouveau/gsp work on those. Cc: stable@vger.kernel.org # v6.7+ Signed-off-by: Dave Airlie Link: https://lore.kernel.org/all/20240110011826.3996289-1-airlied@gmail.com/ --- drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c | 4 ++++ drivers/gpu/drm/nouveau/nvkm/engine/fifo/r535.c | 2 +- drivers/gpu/drm/nouveau/nvkm/subdev/gsp/base.c | 8 ++------ 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c index c8ce7ff18713..e74493a4569e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c @@ -550,6 +550,10 @@ ga100_fifo_nonstall_ctor(struct nvkm_fifo *fifo) struct nvkm_engn *engn = list_first_entry(&runl->engns, typeof(*engn), head); runl->nonstall.vector = engn->func->nonstall(engn); + + /* if no nonstall vector just keep going */ + if (runl->nonstall.vector == -1) + continue; if (runl->nonstall.vector < 0) { RUNL_ERROR(runl, "nonstall %d", runl->nonstall.vector); return runl->nonstall.vector; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/r535.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/r535.c index d088e636edc3..de2ebe8f2134 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/r535.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/r535.c @@ -350,7 +350,7 @@ r535_engn_nonstall(struct nvkm_engn *engn) int ret; ret = nvkm_gsp_intr_nonstall(subdev->device->gsp, subdev->type, subdev->inst); - WARN_ON(ret < 0); + WARN_ON(ret == -ENOENT); return ret; } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/base.c index 04bceaa28a19..da1bebb896f7 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/base.c @@ -25,12 +25,8 @@ int nvkm_gsp_intr_nonstall(struct nvkm_gsp *gsp, enum nvkm_subdev_type type, int inst) { for (int i = 0; i < gsp->intr_nr; i++) { - if (gsp->intr[i].type == type && gsp->intr[i].inst == inst) { - if (gsp->intr[i].nonstall != ~0) - return gsp->intr[i].nonstall; - - return -EINVAL; - } + if (gsp->intr[i].type == type && gsp->intr[i].inst == inst) + return gsp->intr[i].nonstall; } return -ENOENT; -- cgit v1.2.3 From 7b1a8a5fcee4a85be1f540ac0e09761d421e562d Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Thu, 4 Jan 2024 08:18:32 -0800 Subject: drm/xe: Fix definition of intel_wakeref_t MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit i915 defines it as unsigned long so Xe should do the same to avoid compilation warnings: CC [M] drivers/gpu/drm/i915/i915_gem.o CC [M] drivers/gpu/drm/xe/i915-display/intel_display_power_well.o In file included from ./include/drm/drm_mm.h:51, from drivers/gpu/drm/xe/xe_bo_types.h:11, from drivers/gpu/drm/xe/xe_bo.h:11, from ./drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h:11, from ./drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h:15, from drivers/gpu/drm/i915/display/intel_display_power.c:8: drivers/gpu/drm/i915/display/intel_display_power.c: In function ‘print_async_put_domains_state’: drivers/gpu/drm/i915/display/intel_display_power.c:408:29: warning: format ‘%lu’ expects argument of type ‘long unsigned int’, but argument 5 has type ‘int’ [-Wformat=] 408 | drm_dbg(&i915->drm, "async_put_wakeref %lu\n", | ^~~~~~~~~~~~~~~~~~~~~~~~~ 409 | power_domains->async_put_wakeref); | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | | | int ./include/drm/drm_print.h:410:39: note: in definition of macro ‘drm_dev_dbg’ 410 | __drm_dev_dbg(NULL, dev, cat, fmt, ##__VA_ARGS__) | ^~~ ./include/drm/drm_print.h:510:33: note: in expansion of macro ‘drm_dbg_driver’ 510 | #define drm_dbg(drm, fmt, ...) drm_dbg_driver(drm, fmt, ##__VA_ARGS__) | ^~~~~~~~~~~~~~ drivers/gpu/drm/i915/display/intel_display_power.c:408:9: note: in expansion of macro ‘drm_dbg’ 408 | drm_dbg(&i915->drm, "async_put_wakeref %lu\n", | ^~~~~~~ drivers/gpu/drm/i915/display/intel_display_power.c:408:50: note: format string is defined here 408 | drm_dbg(&i915->drm, "async_put_wakeref %lu\n", | ~~^ | | | long unsigned int | %u CC [M] drivers/gpu/drm/i915/i915_gem_evict.o CC [M] drivers/gpu/drm/i915/i915_gem_gtt.o CC [M] drivers/gpu/drm/xe/i915-display/intel_display_trace.o CC [M] drivers/gpu/drm/xe/i915-display/intel_display_wa.o CC [M] drivers/gpu/drm/i915/i915_query.o Fixes: 44e694958b95 ("drm/xe/display: Implement display support") Cc: Maarten Lankhorst Reviewed-by: Jani Nikula Signed-off-by: José Roberto de Souza (cherry picked from commit fdbadf504375886a0320ac6f84c850322a6b32e1) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/compat-i915-headers/intel_wakeref.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_wakeref.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_wakeref.h index 1c5e30cf10ca..ecb1c0707706 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/intel_wakeref.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_wakeref.h @@ -5,4 +5,4 @@ #include -typedef bool intel_wakeref_t; +typedef unsigned long intel_wakeref_t; -- cgit v1.2.3 From 56c253daabc8bd9dfbae52c3d9e0dd34977347a6 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 4 Jan 2024 00:00:39 -0800 Subject: drm/xe: Fix exec IOCTL long running exec queue ring full condition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The intent is to return -EWOULDBLOCK to the user if a long running exec queue is full during the exec IOCTL. -EWOULDBLOCK aliases to -EAGAIN which results in the exec IOCTL doing a retry loop. Fix this by ensuring the retry loop is broken when returning -EWOULDBLOCK. Fixes: 8ae8a2e8dd21 ("drm/xe: Long running job update") Reported-by: Sai Gowtham Ch Signed-off-by: Matthew Brost Reviewed-by: Brian Welty (cherry picked from commit 97d0047cbb17318431eaf37dfe1a6855539340f9) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_exec.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index d30c0d0689bc..b853feed9ccc 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -115,7 +115,7 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) struct xe_sched_job *job; struct dma_fence *rebind_fence; struct xe_vm *vm; - bool write_locked; + bool write_locked, skip_retry = false; ktime_t end = 0; int err = 0; @@ -227,7 +227,8 @@ retry: } if (xe_exec_queue_is_lr(q) && xe_exec_queue_ring_full(q)) { - err = -EWOULDBLOCK; + err = -EWOULDBLOCK; /* Aliased to -EAGAIN */ + skip_retry = true; goto err_exec; } @@ -337,7 +338,7 @@ err_unlock_list: up_write(&vm->lock); else up_read(&vm->lock); - if (err == -EAGAIN) + if (err == -EAGAIN && !skip_retry) goto retry; err_syncs: for (i = 0; i < num_syncs; i++) -- cgit v1.2.3 From 457f4439833487acb18abdd55e95fbb17d43fdca Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Fri, 22 Dec 2023 18:59:04 +0100 Subject: drm/xe/vm: Fix an error path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If using the VM_BIND_OP_UNMAP_ALL without any bound vmas for the vm, we will end up dereferencing an uninitialized variable and leak a bo lock. Fix this. v2: - Updated commit message (Lucas De Marchi) Reported-by: Dafna Hirschfeld Closes: https://lore.kernel.org/intel-xe/jrwua7ckbiozfcaodx4gg2h4taiuxs53j5zlpf3qzvyhyiyl2d@pbs3plurokrj/ Suggested-by: Dafna Hirschfeld Fixes: b06d47be7c83 ("drm/xe: Port Xe to GPUVA") Signed-off-by: Thomas Hellström Acked-by: Lucas De Marchi Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20231222175904.16732-1-thomas.hellstrom@linux.intel.com (cherry picked from commit 9d0c1c5618be02c5acda7e6bbb728007b0632984) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_vm.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 0cfe7289b97e..b0e3cab6a584 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2063,9 +2063,11 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, if (err) return ERR_PTR(err); - vm_bo = drm_gpuvm_bo_find(&vm->gpuvm, obj); - if (!vm_bo) - break; + vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj); + if (IS_ERR(vm_bo)) { + xe_bo_unlock(bo); + return ERR_CAST(vm_bo); + } ops = drm_gpuvm_bo_unmap_ops_create(vm_bo); drm_gpuvm_bo_put(vm_bo); -- cgit v1.2.3 From 3ec276d06698189506f508f87c0f4f17c11e0251 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Tue, 9 Jan 2024 12:24:02 +0100 Subject: drm/xe: Use __iomem for the regs pointer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The regs pointer points to IO memory. Annotate it properly and fix the corresponding sparse warning. Fixes: a4e2f3a299ea ("drm/xe: refactor xe_mmio_probe_tiles to support MMIO extension") Cc: Koby Elbaz Cc: Ofir Bitton Cc: Moti Haimovski Cc: Rodrigo Vivi Signed-off-by: Thomas Hellström Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240109112405.108136-2-thomas.hellstrom@linux.intel.com (cherry picked from commit 9d03bf30e78673d827484bbc17a6fd8f5e43a039) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_mmio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index f660cfb79f50..c8c5d74b6e90 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -303,7 +303,7 @@ void xe_mmio_probe_tiles(struct xe_device *xe) u8 id, tile_count = xe->info.tile_count; struct xe_gt *gt = xe_root_mmio_gt(xe); struct xe_tile *tile; - void *regs; + void __iomem *regs; u32 mtcfg; if (tile_count == 1) -- cgit v1.2.3 From 77232e6a28447c2942558d05f1c3115bdf95a9e7 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Tue, 9 Jan 2024 12:24:03 +0100 Subject: drm/xe: Annotate xe_mem_region::mapping with __iomem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pointer points to IO memory, but the __iomem annotation was incorrectly placed. Annotate it correctly, update its usage accordingly and fix the corresponding sparse error. Fixes: 0887a2e7ab62 ("drm/xe: Make xe_mem_region struct") Cc: Oak Zeng Cc: Michael J. Ruhl Cc: Matthew Brost Cc: Rodrigo Vivi Signed-off-by: Thomas Hellström Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240109112405.108136-3-thomas.hellstrom@linux.intel.com (cherry picked from commit 20855b62a30538361e587cfc7c5245f07d4f826a) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_bo.c | 4 ++-- drivers/gpu/drm/xe/xe_device_types.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 8e4a3b1f6b93..3cd29bd015a0 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -442,7 +442,7 @@ static int xe_ttm_io_mem_reserve(struct ttm_device *bdev, if (vram->mapping && mem->placement & TTM_PL_FLAG_CONTIGUOUS) - mem->bus.addr = (u8 *)vram->mapping + + mem->bus.addr = (u8 __force *)vram->mapping + mem->bus.offset; mem->bus.offset += vram->io_start; @@ -734,7 +734,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, /* Create a new VMAP once kernel BO back in VRAM */ if (!ret && resource_is_vram(new_mem)) { struct xe_mem_region *vram = res_to_mem_region(new_mem); - void *new_addr = vram->mapping + + void __iomem *new_addr = vram->mapping + (new_mem->start << PAGE_SHIFT); if (XE_WARN_ON(new_mem->start == XE_BO_INVALID_OFFSET)) { diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index c45ef17b3473..4b38c6bc6c76 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -97,7 +97,7 @@ struct xe_mem_region { */ resource_size_t actual_physical_size; /** @mapping: pointer to VRAM mappable space */ - void *__iomem mapping; + void __iomem *mapping; }; /** -- cgit v1.2.3 From 5c63e7574739c034e072dea0e0a6fcbe8d538666 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Tue, 9 Jan 2024 12:24:04 +0100 Subject: drm/xe: Annotate multiple mmio pointers with __iomem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are a couple of pointers pointing to MMIO space. Annotate them with __iomem and fix the corresponding sparse warnings. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Fixes: 3b0d4a557996 ("drm/xe: Move register MMIO into xe_tile") Fixes: 399a13323f0d ("drm/xe: add 28-bit address support in struct xe_reg") Cc: Rodrigo Vivi Cc: Matthew Brost Cc: Lucas De Marchi Cc: Matt Roper Cc: Koby Elbaz Cc: Ofir Bitton Cc: Moti Haimovski Signed-off-by: Thomas Hellström Reviewed-by: Lucas De Marchi Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240109112405.108136-4-thomas.hellstrom@linux.intel.com (cherry picked from commit 9d612ee52c6096bc70d43f54921ba2831ffbf1ad) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_device_types.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 4b38c6bc6c76..5dc9127a2029 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -146,7 +146,7 @@ struct xe_tile { size_t size; /** @regs: pointer to tile's MMIO space (starting with registers) */ - void *regs; + void __iomem *regs; } mmio; /** @@ -159,7 +159,7 @@ struct xe_tile { size_t size; /** @regs: pointer to tile's additional MMIO-extension space */ - void *regs; + void __iomem *regs; } mmio_ext; /** @mem: memory management info for tile */ @@ -301,7 +301,7 @@ struct xe_device { /** @size: size of MMIO space for device */ size_t size; /** @regs: pointer to MMIO space for device */ - void *regs; + void __iomem *regs; } mmio; /** @mem: memory info for device */ -- cgit v1.2.3 From 98949068eb559a31f162ab37f56a89bf6c3698ad Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Tue, 9 Jan 2024 12:24:05 +0100 Subject: drm/xe: Annotate xe_ttm_stolen_mgr::mapping with __iomem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pointer points to IO memory, but the __iomem annotation was incorrectly placed. Annotate it correctly, update its usage accordingly and fix the corresponding sparse error. Fixes: d8b52a02cb40 ("drm/xe: Implement stolen memory.") Cc: Maarten Lankhorst Cc: Matthew Brost Cc: Rodrigo Vivi Signed-off-by: Thomas Hellström Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240109112405.108136-5-thomas.hellstrom@linux.intel.com (cherry picked from commit dcddb6f0b06d454c9a3b2b240a43f0e7310c7f7c) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index d2b00d0bf1e2..e5d7d5e2bec1 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -31,7 +31,7 @@ struct xe_ttm_stolen_mgr { /* GPU base offset */ resource_size_t stolen_base; - void *__iomem mapping; + void __iomem *mapping; }; static inline struct xe_ttm_stolen_mgr * @@ -275,7 +275,7 @@ static int __xe_ttm_stolen_io_mem_reserve_bar2(struct xe_device *xe, drm_WARN_ON(&xe->drm, !(mem->placement & TTM_PL_FLAG_CONTIGUOUS)); if (mem->placement & TTM_PL_FLAG_CONTIGUOUS && mgr->mapping) - mem->bus.addr = (u8 *)mgr->mapping + mem->bus.offset; + mem->bus.addr = (u8 __force *)mgr->mapping + mem->bus.offset; mem->bus.offset += mgr->io_base; mem->bus.is_iomem = true; -- cgit v1.2.3 From fef257eb6dcb9f39baee9ac44f064cd796ecfd0b Mon Sep 17 00:00:00 2001 From: Brian Welty Date: Fri, 5 Jan 2024 11:04:39 -0800 Subject: drm/xe: Fix guc_exec_queue_set_priority MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to set q->priority prior to calling guc_exec_queue_add_msg() as that will call init_policies() and sets the scheduling properties to those stored in the exec_queue. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Brian Welty Signed-off-by: Matthew Brost Reviewed-by: Matthew Brost (cherry picked from commit b16483f9f8120b530327879fa3ea576e897946da) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_guc_submit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 21ac68e3246f..5de3ac47c462 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1308,8 +1308,8 @@ static int guc_exec_queue_set_priority(struct xe_exec_queue *q, if (!msg) return -ENOMEM; - guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); q->priority = priority; + guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); return 0; } -- cgit v1.2.3 From 19c02225242498eea9267d444ee1276016368d49 Mon Sep 17 00:00:00 2001 From: Brian Welty Date: Fri, 5 Jan 2024 11:04:40 -0800 Subject: drm/xe: Fix modifying exec_queue priority in xe_migrate_init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After exec_queue has been created, we cannot simply modify q->priority. This needs to be done by the backend via q->ops. However in this case, it would be more efficient to simply pass a flag when creating the exec_queue and set the desired priority upfront during queue creation. To that end: new flag EXEC_QUEUE_FLAG_HIGH_PRIORITY is introduced. The priority field is moved to be with other scheduling properties and is now exec_queue.sched_props.priority. This is no longer set to initial value by the backend, but is now set within __xe_exec_queue_create(). Fixes: b4eecedc75c1 ("drm/xe: Fix potential deadlock handling page faults") Signed-off-by: Brian Welty Signed-off-by: Matthew Brost Reviewed-by: Matthew Brost (cherry picked from commit a8004af338f6b3319476ecbed63ea49bf393fc1f) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_exec_queue.c | 5 +++++ drivers/gpu/drm/xe/xe_exec_queue_types.h | 6 ++++-- drivers/gpu/drm/xe/xe_guc_submit.c | 7 +++---- drivers/gpu/drm/xe/xe_migrate.c | 5 ++--- 4 files changed, 14 insertions(+), 9 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 44fe8097b7cd..bcfc4127c7c5 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -67,6 +67,11 @@ static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe, q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us; q->sched_props.preempt_timeout_us = hwe->eclass->sched_props.preempt_timeout_us; + if (q->flags & EXEC_QUEUE_FLAG_KERNEL && + q->flags & EXEC_QUEUE_FLAG_HIGH_PRIORITY) + q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_KERNEL; + else + q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_NORMAL; if (xe_exec_queue_is_parallel(q)) { q->parallel.composite_fence_ctx = dma_fence_context_alloc(1); diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 3d7e704ec3d9..8d4b7feb8c30 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -52,8 +52,6 @@ struct xe_exec_queue { struct xe_vm *vm; /** @class: class of this exec queue */ enum xe_engine_class class; - /** @priority: priority of this exec queue */ - enum xe_exec_queue_priority priority; /** * @logical_mask: logical mask of where job submitted to exec queue can run */ @@ -84,6 +82,8 @@ struct xe_exec_queue { #define EXEC_QUEUE_FLAG_VM BIT(4) /* child of VM queue for multi-tile VM jobs */ #define EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD BIT(5) +/* kernel exec_queue only, set priority to highest level */ +#define EXEC_QUEUE_FLAG_HIGH_PRIORITY BIT(6) /** * @flags: flags for this exec queue, should statically setup aside from ban @@ -142,6 +142,8 @@ struct xe_exec_queue { u32 timeslice_us; /** @preempt_timeout_us: preemption timeout in micro-seconds */ u32 preempt_timeout_us; + /** @priority: priority of this exec queue */ + enum xe_exec_queue_priority priority; } sched_props; /** @compute: compute exec queue state */ diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 5de3ac47c462..54ffcfcdd41f 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -421,7 +421,7 @@ static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q) { struct exec_queue_policy policy; struct xe_device *xe = guc_to_xe(guc); - enum xe_exec_queue_priority prio = q->priority; + enum xe_exec_queue_priority prio = q->sched_props.priority; u32 timeslice_us = q->sched_props.timeslice_us; u32 preempt_timeout_us = q->sched_props.preempt_timeout_us; @@ -1231,7 +1231,6 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) err = xe_sched_entity_init(&ge->entity, sched); if (err) goto err_sched; - q->priority = XE_EXEC_QUEUE_PRIORITY_NORMAL; if (xe_exec_queue_is_lr(q)) INIT_WORK(&q->guc->lr_tdr, xe_guc_exec_queue_lr_cleanup); @@ -1301,14 +1300,14 @@ static int guc_exec_queue_set_priority(struct xe_exec_queue *q, { struct xe_sched_msg *msg; - if (q->priority == priority || exec_queue_killed_or_banned(q)) + if (q->sched_props.priority == priority || exec_queue_killed_or_banned(q)) return 0; msg = kmalloc(sizeof(*msg), GFP_KERNEL); if (!msg) return -ENOMEM; - q->priority = priority; + q->sched_props.priority = priority; guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); return 0; diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index adf1dab5eba2..02fca8f9adc2 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -344,7 +344,8 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) m->q = xe_exec_queue_create(xe, vm, logical_mask, 1, hwe, EXEC_QUEUE_FLAG_KERNEL | - EXEC_QUEUE_FLAG_PERMANENT); + EXEC_QUEUE_FLAG_PERMANENT | + EXEC_QUEUE_FLAG_HIGH_PRIORITY); } else { m->q = xe_exec_queue_create_class(xe, primary_gt, vm, XE_ENGINE_CLASS_COPY, @@ -355,8 +356,6 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) xe_vm_close_and_put(vm); return ERR_CAST(m->q); } - if (xe->info.has_usm) - m->q->priority = XE_EXEC_QUEUE_PRIORITY_KERNEL; mutex_init(&m->job_mutex); -- cgit v1.2.3 From 23ca3d2fe367794d2816530fa6b141339fddc1c6 Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Mon, 8 Jan 2024 14:58:42 -0800 Subject: drm/xe: Check skip_guc_pc before setting SLPC flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Don't set SLPC GuC feature ctl flag if skip_guc_pc is true. v2: Skip the freq related sysfs creation as well (Badal) v3: Remove unnecessary parenthesis (Lucas) Fixes: 975e4a3795d4 ("drm/xe: Manually setup C6 when skip_guc_pc is set") Fixes: bef52b5c7a19 ("drm/xe: Create a xe_gt_freq component for raw management and sysfs") Reviewed-by: Lucas De Marchi Signed-off-by: Vinay Belgaumkar Link: https://lore.kernel.org/r/20240108225842.966066-1-vinay.belgaumkar@intel.com Signed-off-by: Rodrigo Vivi (cherry picked from commit 69cac0a8f3ef8db4d62441c4a2686ec676c9facd) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_gt_freq.c | 3 +++ drivers/gpu/drm/xe/xe_guc.c | 7 ++++++- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c index 3adfa6686e7c..e5b0f4ecdbe8 100644 --- a/drivers/gpu/drm/xe/xe_gt_freq.c +++ b/drivers/gpu/drm/xe/xe_gt_freq.c @@ -196,6 +196,9 @@ void xe_gt_freq_init(struct xe_gt *gt) struct xe_device *xe = gt_to_xe(gt); int err; + if (xe->info.skip_guc_pc) + return; + gt->freq = kobject_create_and_add("freq0", gt->sysfs); if (!gt->freq) { drm_warn(&xe->drm, "failed to add freq0 directory to %s\n", diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 482cb0df9f15..0a61390c64a7 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -60,7 +60,12 @@ static u32 guc_ctl_debug_flags(struct xe_guc *guc) static u32 guc_ctl_feature_flags(struct xe_guc *guc) { - return GUC_CTL_ENABLE_SLPC; + u32 flags = 0; + + if (!guc_to_xe(guc)->info.skip_guc_pc) + flags |= GUC_CTL_ENABLE_SLPC; + + return flags; } static u32 guc_ctl_log_params_flags(struct xe_guc *guc) -- cgit v1.2.3 From 190db3b1da8f40131d6153de7469abce16766302 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 10 Jan 2024 06:48:29 -0800 Subject: drm/xe: Fix build bug for GCC 11 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Building drivers/gpu/drm/xe/xe_gt_pagefault.c with GCC 11 results in the following build errors: ./include/linux/fortify-string.h:57:33: error: writing 16 bytes into a region of size 0 [-Werror=stringop-overflow=] 57 | #define __underlying_memcpy __builtin_memcpy | ^ ./include/linux/fortify-string.h:644:9: note: in expansion of macro ‘__underlying_memcpy’ 644 | __underlying_##op(p, q, __fortify_size); \ | ^~~~~~~~~~~~~ ./include/linux/fortify-string.h:689:26: note: in expansion of macro ‘__fortify_memcpy_chk’ 689 | #define memcpy(p, q, s) __fortify_memcpy_chk(p, q, s, \ | ^~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/xe/xe_gt_pagefault.c:340:17: note: in expansion of macro ‘memcpy’ 340 | memcpy(pf_queue->data + pf_queue->tail, msg, len * sizeof(u32)); | ^~~~~~ In file included from drivers/gpu/drm/xe/xe_device_types.h:17, from drivers/gpu/drm/xe/xe_vm_types.h:16, from drivers/gpu/drm/xe/xe_bo.h:13, from drivers/gpu/drm/xe/xe_gt_pagefault.c:16: drivers/gpu/drm/xe/xe_gt_types.h:102:25: note: at offset [1144, 265324] into destination object ‘tile’ of size 8 102 | struct xe_tile *tile; | ^~~~ Fix these by removing -Wstringop-overflow from drm/xe builds. Closes: https://lore.kernel.org/all/45ad1d0f-a10f-483e-848a-76a30252edbe@paulmck-laptop/ Fixes: 7a8bc11782d3 ("drm/xe: Enable W=1 warnings by default") Suggested-by: Stephen Rothwell Signed-off-by: Paul E. McKenney [ This particular warning is broken on GCC11. In future changes it will be moved to the normal C flags in the top level Makefile (out of Makefile.extrawarn), but accounting for the compiler support. Just remove it out of xe's forced extra warnings for now ] Signed-off-by: Lucas De Marchi (cherry picked from commit a109d19992294736abd4f4232ea639e03eb1f9e7) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/Makefile | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 53bd2a8ba1ae..efcf0ab7a1a6 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -17,7 +17,6 @@ subdir-ccflags-y += $(call cc-option, -Wunused-const-variable) subdir-ccflags-y += $(call cc-option, -Wpacked-not-aligned) subdir-ccflags-y += $(call cc-option, -Wformat-overflow) subdir-ccflags-y += $(call cc-option, -Wformat-truncation) -subdir-ccflags-y += $(call cc-option, -Wstringop-overflow) subdir-ccflags-y += $(call cc-option, -Wstringop-truncation) # The following turn off the warnings enabled by -Wextra ifeq ($(findstring 2, $(KBUILD_EXTRA_WARN)),) -- cgit v1.2.3 From ffd915e41a4a2277fd8041dc77603df59acf3e01 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 5 Jan 2024 15:22:23 +0300 Subject: drm/xe/device: clean up on error in probe() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This error path should clean up before returning. Smatch detected this bug: drivers/gpu/drm/xe/xe_device.c:487 xe_device_probe() warn: missing unwind goto? Fixes: 4cb12b71923b ("drm/xe/xe2: Determine bios enablement for flat ccs on igfx") Signed-off-by: Dan Carpenter Signed-off-by: Matthew Brost Reviewed-by: Matthew Brost (cherry picked from commit c10da95afa68060e13c5f920d96671943a7e54d9) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index d9ae77fe7382..b8d8da546670 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -484,7 +484,7 @@ int xe_device_probe(struct xe_device *xe) err = xe_device_set_has_flat_ccs(xe); if (err) - return err; + goto err_irq_shutdown; err = xe_mmio_probe_vram(xe); if (err) -- cgit v1.2.3 From 616576df35193bbadac31dc42a32d5943e183f45 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 5 Jan 2024 15:20:35 +0300 Subject: drm/xe/selftests: Fix an error pointer dereference bug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Check if "bo" is an error pointer before calling xe_bo_lock() on it. Fixes: d6abc18d6693 ("drm/xe/xe2: Modify xe_bo_test for system memory") Signed-off-by: Dan Carpenter Signed-off-by: Matthew Brost Reviewed-by: Matthew Brost (cherry picked from commit 88ec23528b32ddb9ce2e8492f2629b0056353697) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/tests/xe_bo.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index 412b2e7ce40c..3436fd9cf2b2 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -125,14 +125,13 @@ static void ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile, bo = xe_bo_create_user(xe, NULL, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC, ttm_bo_type_device, bo_flags); - - xe_bo_lock(bo, false); - if (IS_ERR(bo)) { KUNIT_FAIL(test, "Failed to create bo.\n"); return; } + xe_bo_lock(bo, false); + kunit_info(test, "Verifying that CCS data is cleared on creation.\n"); ret = ccs_test_migrate(tile, bo, false, 0ULL, 0xdeadbeefdeadbeefULL, test); -- cgit v1.2.3 From ec32f4f1bed87f0b87b9b0091231c8685db1138c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 5 Jan 2024 15:20:22 +0300 Subject: drm/xe: unlock on error path in xe_vm_add_compute_exec_queue() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop the "&vm->lock" before returning. Fixes: 24f947d58fe5 ("drm/xe: Use DRM GPUVM helpers for external- and evicted objects") Signed-off-by: Dan Carpenter Signed-off-by: Matthew Brost Reviewed-by: Matthew Brost (cherry picked from commit cf46019e8550a810cc023af7aa020ba43103b44d) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_vm.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index b0e3cab6a584..10b6995fbf29 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -335,13 +335,13 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) down_write(&vm->lock); err = drm_gpuvm_exec_lock(&vm_exec); if (err) - return err; + goto out_up_write; pfence = xe_preempt_fence_create(q, q->compute.context, ++q->compute.seqno); if (!pfence) { err = -ENOMEM; - goto out_unlock; + goto out_fini; } list_add(&q->compute.link, &vm->preempt.exec_queues); @@ -364,8 +364,9 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) up_read(&vm->userptr.notifier_lock); -out_unlock: +out_fini: drm_exec_fini(exec); +out_up_write: up_write(&vm->lock); return err; -- cgit v1.2.3 From 7425c43c268f859426d02ccb3f043bdbae31cca9 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Wed, 10 Jan 2024 17:34:15 +0100 Subject: drm/xe/migrate: Fix CCS copy for small VRAM copy chunks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the migrate code is using the identity map for addressing VRAM, copy chunks may become as small as 64K if the VRAM resource is fragmented. However, a chunk size smaller that 1MiB may lead to the *next* chunk's offset into the CCS metadata backup memory may not be page-aligned, and the XY_CTRL_SURF_COPY_BLT command can't handle that, and even if it could, the current code doesn't handle the offset calculaton correctly. To fix this, make sure we align the size of VRAM copy chunks to 1MiB. If the remaining data to copy is smaller than that, that's not a problem, so use the remaining size. If the VRAM copy cunk becomes fragmented due to the size alignment restriction, don't use the identity map, but instead emit PTEs into the page-table like we do for system memory. v2: - Rebase v3: - Future proof somewhat by taking into account the real data size to flat CCS metadata size ratio. (Matt Roper) - Invert a couple of if-statements for better readability. - Fix support for 4K-granularity VRAM sizes. (Tested on DG1). v4: - Fix up code comments - Fix debug printout format typo. v5: - Add a Fixes: tag. Cc: Matt Roper Cc: Matthew Auld Cc: Matthew Brost Fixes: e89b384cde62 ("drm/xe/migrate: Update emit_pte to cope with a size level than 4k") Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240110163415.524165-1-thomas.hellstrom@linux.intel.com (cherry picked from commit ef51d7542d143f3fd9a48d4e2c307563661668aa) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/tests/xe_migrate.c | 2 +- drivers/gpu/drm/xe/xe_migrate.c | 128 +++++++++++++++++++++------------- 2 files changed, 80 insertions(+), 50 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index 7a32faa2f688..a6523df0f1d3 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -331,7 +331,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) xe_res_first_sg(xe_bo_sg(pt), 0, pt->size, &src_it); emit_pte(m, bb, NUM_KERNEL_PDE - 1, xe_bo_is_vram(pt), false, - &src_it, XE_PAGE_SIZE, pt); + &src_it, XE_PAGE_SIZE, pt->ttm.resource); run_sanity_job(m, xe, bb, bb->len, "Writing PTE for our fake PT", test); diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 02fca8f9adc2..e05e9e7282b6 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -62,6 +62,8 @@ struct xe_migrate { * out of the pt_bo. */ struct drm_suballoc_manager vm_update_sa; + /** @min_chunk_size: For dgfx, Minimum chunk size */ + u64 min_chunk_size; }; #define MAX_PREEMPTDISABLE_TRANSFER SZ_8M /* Around 1ms. */ @@ -363,6 +365,19 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) if (err) return ERR_PTR(err); + if (IS_DGFX(xe)) { + if (xe_device_has_flat_ccs(xe)) + /* min chunk size corresponds to 4K of CCS Metadata */ + m->min_chunk_size = SZ_4K * SZ_64K / + xe_device_ccs_bytes(xe, SZ_64K); + else + /* Somewhat arbitrary to avoid a huge amount of blits */ + m->min_chunk_size = SZ_64K; + m->min_chunk_size = roundup_pow_of_two(m->min_chunk_size); + drm_dbg(&xe->drm, "Migrate min chunk size is 0x%08llx\n", + (unsigned long long)m->min_chunk_size); + } + return m; } @@ -374,16 +389,35 @@ static u64 max_mem_transfer_per_pass(struct xe_device *xe) return MAX_PREEMPTDISABLE_TRANSFER; } -static u64 xe_migrate_res_sizes(struct xe_device *xe, struct xe_res_cursor *cur) +static u64 xe_migrate_res_sizes(struct xe_migrate *m, struct xe_res_cursor *cur) { - /* - * For VRAM we use identity mapped pages so we are limited to current - * cursor size. For system we program the pages ourselves so we have no - * such limitation. - */ - return min_t(u64, max_mem_transfer_per_pass(xe), - mem_type_is_vram(cur->mem_type) ? cur->size : - cur->remaining); + struct xe_device *xe = tile_to_xe(m->tile); + u64 size = min_t(u64, max_mem_transfer_per_pass(xe), cur->remaining); + + if (mem_type_is_vram(cur->mem_type)) { + /* + * VRAM we want to blit in chunks with sizes aligned to + * min_chunk_size in order for the offset to CCS metadata to be + * page-aligned. If it's the last chunk it may be smaller. + * + * Another constraint is that we need to limit the blit to + * the VRAM block size, unless size is smaller than + * min_chunk_size. + */ + u64 chunk = max_t(u64, cur->size, m->min_chunk_size); + + size = min_t(u64, size, chunk); + if (size > m->min_chunk_size) + size = round_down(size, m->min_chunk_size); + } + + return size; +} + +static bool xe_migrate_allow_identity(u64 size, const struct xe_res_cursor *cur) +{ + /* If the chunk is not fragmented, allow identity map. */ + return cur->size >= size; } static u32 pte_update_size(struct xe_migrate *m, @@ -396,7 +430,12 @@ static u32 pte_update_size(struct xe_migrate *m, u32 cmds = 0; *L0_pt = pt_ofs; - if (!is_vram) { + if (is_vram && xe_migrate_allow_identity(*L0, cur)) { + /* Offset into identity map. */ + *L0_ofs = xe_migrate_vram_ofs(tile_to_xe(m->tile), + cur->start + vram_region_gpu_offset(res)); + cmds += cmd_size; + } else { /* Clip L0 to available size */ u64 size = min(*L0, (u64)avail_pts * SZ_2M); u64 num_4k_pages = DIV_ROUND_UP(size, XE_PAGE_SIZE); @@ -412,11 +451,6 @@ static u32 pte_update_size(struct xe_migrate *m, /* Each chunk has a single blit command */ cmds += cmd_size; - } else { - /* Offset into identity map. */ - *L0_ofs = xe_migrate_vram_ofs(tile_to_xe(m->tile), - cur->start + vram_region_gpu_offset(res)); - cmds += cmd_size; } return cmds; @@ -426,10 +460,10 @@ static void emit_pte(struct xe_migrate *m, struct xe_bb *bb, u32 at_pt, bool is_vram, bool is_comp_pte, struct xe_res_cursor *cur, - u32 size, struct xe_bo *bo) + u32 size, struct ttm_resource *res) { struct xe_device *xe = tile_to_xe(m->tile); - + struct xe_vm *vm = m->q->vm; u16 pat_index; u32 ptes; u64 ofs = at_pt * XE_PAGE_SIZE; @@ -442,13 +476,6 @@ static void emit_pte(struct xe_migrate *m, else pat_index = xe->pat.idx[XE_CACHE_WB]; - /* - * FIXME: Emitting VRAM PTEs to L0 PTs is forbidden. Currently - * we're only emitting VRAM PTEs during sanity tests, so when - * that's moved to a Kunit test, we should condition VRAM PTEs - * on running tests. - */ - ptes = DIV_ROUND_UP(size, XE_PAGE_SIZE); while (ptes) { @@ -468,20 +495,22 @@ static void emit_pte(struct xe_migrate *m, addr = xe_res_dma(cur) & PAGE_MASK; if (is_vram) { - /* Is this a 64K PTE entry? */ - if ((m->q->vm->flags & XE_VM_FLAG_64K) && - !(cur_ofs & (16 * 8 - 1))) { - xe_tile_assert(m->tile, IS_ALIGNED(addr, SZ_64K)); + if (vm->flags & XE_VM_FLAG_64K) { + u64 va = cur_ofs * XE_PAGE_SIZE / 8; + + xe_assert(xe, (va & (SZ_64K - 1)) == + (addr & (SZ_64K - 1))); + flags |= XE_PTE_PS64; } - addr += vram_region_gpu_offset(bo->ttm.resource); + addr += vram_region_gpu_offset(res); devmem = true; } - addr = m->q->vm->pt_ops->pte_encode_addr(m->tile->xe, - addr, pat_index, - 0, devmem, flags); + addr = vm->pt_ops->pte_encode_addr(m->tile->xe, + addr, pat_index, + 0, devmem, flags); bb->cs[bb->len++] = lower_32_bits(addr); bb->cs[bb->len++] = upper_32_bits(addr); @@ -693,8 +722,8 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, bool usm = xe->info.has_usm; u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE; - src_L0 = xe_migrate_res_sizes(xe, &src_it); - dst_L0 = xe_migrate_res_sizes(xe, &dst_it); + src_L0 = xe_migrate_res_sizes(m, &src_it); + dst_L0 = xe_migrate_res_sizes(m, &dst_it); drm_dbg(&xe->drm, "Pass %u, sizes: %llu & %llu\n", pass++, src_L0, dst_L0); @@ -715,6 +744,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, &ccs_ofs, &ccs_pt, 0, 2 * avail_pts, avail_pts); + xe_assert(xe, IS_ALIGNED(ccs_it.start, PAGE_SIZE)); } /* Add copy commands size here */ @@ -727,20 +757,20 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, goto err_sync; } - if (!src_is_vram) - emit_pte(m, bb, src_L0_pt, src_is_vram, true, &src_it, src_L0, - src_bo); - else + if (src_is_vram && xe_migrate_allow_identity(src_L0, &src_it)) xe_res_next(&src_it, src_L0); - - if (!dst_is_vram) - emit_pte(m, bb, dst_L0_pt, dst_is_vram, true, &dst_it, src_L0, - dst_bo); else + emit_pte(m, bb, src_L0_pt, src_is_vram, true, &src_it, src_L0, + src); + + if (dst_is_vram && xe_migrate_allow_identity(src_L0, &dst_it)) xe_res_next(&dst_it, src_L0); + else + emit_pte(m, bb, dst_L0_pt, dst_is_vram, true, &dst_it, src_L0, + dst); if (copy_system_ccs) - emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, src_bo); + emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, src); bb->cs[bb->len++] = MI_BATCH_BUFFER_END; update_idx = bb->len; @@ -949,7 +979,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, bool usm = xe->info.has_usm; u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE; - clear_L0 = xe_migrate_res_sizes(xe, &src_it); + clear_L0 = xe_migrate_res_sizes(m, &src_it); drm_dbg(&xe->drm, "Pass %u, size: %llu\n", pass++, clear_L0); @@ -976,12 +1006,12 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, size -= clear_L0; /* Preemption is enabled again by the ring ops. */ - if (!clear_vram) { - emit_pte(m, bb, clear_L0_pt, clear_vram, true, &src_it, clear_L0, - bo); - } else { + if (clear_vram && xe_migrate_allow_identity(clear_L0, &src_it)) xe_res_next(&src_it, clear_L0); - } + else + emit_pte(m, bb, clear_L0_pt, clear_vram, true, &src_it, clear_L0, + dst); + bb->cs[bb->len++] = MI_BATCH_BUFFER_END; update_idx = bb->len; -- cgit v1.2.3 From 8049e3954aeaaeb488cd4e371526721c7fca297e Mon Sep 17 00:00:00 2001 From: Brian Welty Date: Wed, 10 Jan 2024 16:21:11 -0800 Subject: drm/xe: Fix bounds checking in __xe_bo_placement_for_flags() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Requesting all memory regions on PVC will fill bo->placements up to XE_BO_MAX_PLACEMENTS. The subsequent call to try_add_stolen() will trip over the bounds checking even though XE_PL_STOLEN is not expected to be used in this case. This is hit with igt@xe_exec_fault_mode@once-basic-prefetch: xe 0000:8c:00.0: [drm] Assertion `*c < (sizeof(bo->placements) / sizeof((bo->placements)[0]) + ((int)(sizeof(struct { int:(-!!(__builtin_types_compatible_p(typeof((bo->placements)), typeof(&(bo->placements)[0])))); }))))` failed! WARNING: CPU: 30 PID: 6161 at drivers/gpu/drm/xe/xe_bo.c:203 __xe_bo_placement_for_flags+0x218/0x240 [xe] Is fixed here by moving the bounds checks closer to where we actually write into the bo->placement array. Fixes: 8c54ee8a8606 ("drm/xe: Ensure that we don't access the placements array out-of-bounds") Link: https://patchwork.freedesktop.org/patch/msgid/20240111002111.10190-1-brian.welty@intel.com Signed-off-by: Matthew Brost Signed-off-by: Brian Welty Reviewed-by: Matthew Brost (cherry picked from commit 52e3fa3e3ea3ee05e32c1a8d72bb3ae306a4da64) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_bo.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 3cd29bd015a0..0b0e262e2166 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -125,9 +125,9 @@ static struct xe_mem_region *res_to_mem_region(struct ttm_resource *res) static void try_add_system(struct xe_device *xe, struct xe_bo *bo, u32 bo_flags, u32 *c) { - xe_assert(xe, *c < ARRAY_SIZE(bo->placements)); - if (bo_flags & XE_BO_CREATE_SYSTEM_BIT) { + xe_assert(xe, *c < ARRAY_SIZE(bo->placements)); + bo->placements[*c] = (struct ttm_place) { .mem_type = XE_PL_TT, }; @@ -145,6 +145,8 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo, struct xe_mem_region *vram; u64 io_size; + xe_assert(xe, *c < ARRAY_SIZE(bo->placements)); + vram = to_xe_ttm_vram_mgr(ttm_manager_type(&xe->ttm, mem_type))->vram; xe_assert(xe, vram && vram->usable_size); io_size = vram->io_size; @@ -175,8 +177,6 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo, static void try_add_vram(struct xe_device *xe, struct xe_bo *bo, u32 bo_flags, u32 *c) { - xe_assert(xe, *c < ARRAY_SIZE(bo->placements)); - if (bo->props.preferred_gt == XE_GT1) { if (bo_flags & XE_BO_CREATE_VRAM1_BIT) add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c); @@ -193,9 +193,9 @@ static void try_add_vram(struct xe_device *xe, struct xe_bo *bo, static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo, u32 bo_flags, u32 *c) { - xe_assert(xe, *c < ARRAY_SIZE(bo->placements)); - if (bo_flags & XE_BO_CREATE_STOLEN_BIT) { + xe_assert(xe, *c < ARRAY_SIZE(bo->placements)); + bo->placements[*c] = (struct ttm_place) { .mem_type = XE_PL_STOLEN, .flags = bo_flags & (XE_BO_CREATE_PINNED_BIT | -- cgit v1.2.3 From bf3ff145df184698a8a80b33265064638572366f Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 11 Jan 2024 12:47:16 +0200 Subject: drm/xe: display support should not depend on EXPERT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the DRM_XE_DISPLAY config dependency on EXPERT. I can only presume the idea was only experts should be able to disable it, but the effect is the opposite. Reported-by: Eero Tamminen Reviewed-by: Francois Dugast Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20240111104716.3548744-1-jani.nikula@intel.com (cherry picked from commit 1c7531f50eaa425eca8ff726287b8df3a4a51e55) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index 1cced50d8d8c..e36ae1f0d885 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -47,7 +47,7 @@ config DRM_XE config DRM_XE_DISPLAY bool "Enable display support" - depends on DRM_XE && EXPERT && DRM_XE=m + depends on DRM_XE && DRM_XE=m select FB_IOMEM_HELPERS select I2C select I2C_ALGOBIT -- cgit v1.2.3 From 02eed83abc1395a1207591aafad9bcfc5cb1abcb Mon Sep 17 00:00:00 2001 From: Dafna Hirschfeld Date: Sun, 7 Jan 2024 15:07:00 +0200 Subject: drm/amdkfd: fixes for HMM mem allocation Fix err return value and reset pgmap->type after checking it. Fixes: c83dee9b6394 ("drm/amdkfd: add SPM support for SVM") Reviewed-by: Felix Kuehling Signed-off-by: Dafna Hirschfeld Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index d630100b9e91..f856901055d3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -1026,7 +1026,7 @@ int kgd2kfd_init_zone_device(struct amdgpu_device *adev) } else { res = devm_request_free_mem_region(adev->dev, &iomem_resource, size); if (IS_ERR(res)) - return -ENOMEM; + return PTR_ERR(res); pgmap->range.start = res->start; pgmap->range.end = res->end; pgmap->type = MEMORY_DEVICE_PRIVATE; @@ -1042,10 +1042,10 @@ int kgd2kfd_init_zone_device(struct amdgpu_device *adev) r = devm_memremap_pages(adev->dev, pgmap); if (IS_ERR(r)) { pr_err("failed to register HMM device memory\n"); - /* Disable SVM support capability */ - pgmap->type = 0; if (pgmap->type == MEMORY_DEVICE_PRIVATE) devm_release_mem_region(adev->dev, res->start, resource_size(res)); + /* Disable SVM support capability */ + pgmap->type = 0; return PTR_ERR(r); } -- cgit v1.2.3 From 25852d4b97572ff62ffee574cb8bb4bc551af23a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 2 Oct 2023 14:27:13 -0400 Subject: drm/amdgpu: fix avg vs input power reporting on smu7 Hawaii, Bonaire, Fiji, and Tonga support average power, the others support current power. Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c index b1a8799e2dee..aa91730e4eaf 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c @@ -3999,6 +3999,7 @@ static int smu7_read_sensor(struct pp_hwmgr *hwmgr, int idx, uint32_t sclk, mclk, activity_percent; uint32_t offset, val_vid; struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct amdgpu_device *adev = hwmgr->adev; /* size must be at least 4 bytes for all sensors */ if (*size < 4) @@ -4042,7 +4043,21 @@ static int smu7_read_sensor(struct pp_hwmgr *hwmgr, int idx, *size = 4; return 0; case AMDGPU_PP_SENSOR_GPU_INPUT_POWER: - return smu7_get_gpu_power(hwmgr, (uint32_t *)value); + if ((adev->asic_type != CHIP_HAWAII) && + (adev->asic_type != CHIP_BONAIRE) && + (adev->asic_type != CHIP_FIJI) && + (adev->asic_type != CHIP_TONGA)) + return smu7_get_gpu_power(hwmgr, (uint32_t *)value); + else + return -EOPNOTSUPP; + case AMDGPU_PP_SENSOR_GPU_AVG_POWER: + if ((adev->asic_type != CHIP_HAWAII) && + (adev->asic_type != CHIP_BONAIRE) && + (adev->asic_type != CHIP_FIJI) && + (adev->asic_type != CHIP_TONGA)) + return -EOPNOTSUPP; + else + return smu7_get_gpu_power(hwmgr, (uint32_t *)value); case AMDGPU_PP_SENSOR_VDDGFX: if ((data->vr_config & VRCONF_VDDGFX_MASK) == (VR_SVI2_PLANE_2 << VRCONF_VDDGFX_SHIFT)) -- cgit v1.2.3 From d02069850fc102b07ae923535d5e212f2c8a34e9 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 2 Oct 2023 14:43:06 -0400 Subject: drm/amdgpu: fall back to INPUT power for AVG power via INFO IOCTL For backwards compatibility with userspace. Fixes: 47f1724db4fe ("drm/amd: Introduce `AMDGPU_PP_SENSOR_GPU_INPUT_POWER`") Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2897 Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index b5ebafd4a3ad..bf4f48fe438d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -1105,7 +1105,12 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_AVG_POWER, (void *)&ui32, &ui32_size)) { - return -EINVAL; + /* fall back to input power for backwards compat */ + if (amdgpu_dpm_read_sensor(adev, + AMDGPU_PP_SENSOR_GPU_INPUT_POWER, + (void *)&ui32, &ui32_size)) { + return -EINVAL; + } } ui32 >>= 8; break; -- cgit v1.2.3 From 6127d7df4a5b66783da5a55ff60b3920a9c315a2 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 7 Dec 2023 10:36:56 -0500 Subject: drm/amdgpu/pm: clarify debugfs pm output On APUs power is SoC power, not just GPU. Clarify that for UVD/VCE/VCN the IP is powered down, not disabled which can confusing and lead to concerns that the IP is actually not available. Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/amdgpu_pm.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index f3cb490fe79b..087d57850304 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -4349,11 +4349,19 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB, (void *)&value, &size)) seq_printf(m, "\t%u mV (VDDNB)\n", value); size = sizeof(uint32_t); - if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_AVG_POWER, (void *)&query, &size)) - seq_printf(m, "\t%u.%02u W (average GPU)\n", query >> 8, query & 0xff); + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_AVG_POWER, (void *)&query, &size)) { + if (adev->flags & AMD_IS_APU) + seq_printf(m, "\t%u.%02u W (average SoC including CPU)\n", query >> 8, query & 0xff); + else + seq_printf(m, "\t%u.%02u W (average SoC)\n", query >> 8, query & 0xff); + } size = sizeof(uint32_t); - if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_INPUT_POWER, (void *)&query, &size)) - seq_printf(m, "\t%u.%02u W (current GPU)\n", query >> 8, query & 0xff); + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_INPUT_POWER, (void *)&query, &size)) { + if (adev->flags & AMD_IS_APU) + seq_printf(m, "\t%u.%02u W (current SoC including CPU)\n", query >> 8, query & 0xff); + else + seq_printf(m, "\t%u.%02u W (current SoC)\n", query >> 8, query & 0xff); + } size = sizeof(value); seq_printf(m, "\n"); @@ -4379,9 +4387,9 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a /* VCN clocks */ if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VCN_POWER_STATE, (void *)&value, &size)) { if (!value) { - seq_printf(m, "VCN: Disabled\n"); + seq_printf(m, "VCN: Powered down\n"); } else { - seq_printf(m, "VCN: Enabled\n"); + seq_printf(m, "VCN: Powered up\n"); if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_DCLK, (void *)&value, &size)) seq_printf(m, "\t%u MHz (DCLK)\n", value/100); if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_VCLK, (void *)&value, &size)) @@ -4393,9 +4401,9 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a /* UVD clocks */ if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_POWER, (void *)&value, &size)) { if (!value) { - seq_printf(m, "UVD: Disabled\n"); + seq_printf(m, "UVD: Powered down\n"); } else { - seq_printf(m, "UVD: Enabled\n"); + seq_printf(m, "UVD: Powered up\n"); if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_DCLK, (void *)&value, &size)) seq_printf(m, "\t%u MHz (DCLK)\n", value/100); if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_VCLK, (void *)&value, &size)) @@ -4407,9 +4415,9 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a /* VCE clocks */ if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VCE_POWER, (void *)&value, &size)) { if (!value) { - seq_printf(m, "VCE: Disabled\n"); + seq_printf(m, "VCE: Powered down\n"); } else { - seq_printf(m, "VCE: Enabled\n"); + seq_printf(m, "VCE: Powered up\n"); if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VCE_ECCLK, (void *)&value, &size)) seq_printf(m, "\t%u MHz (ECCLK)\n", value/100); } -- cgit v1.2.3 From 8f8cb7124e86c68ab09aa446664192d3829a40be Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Mon, 8 Jan 2024 15:46:12 +0800 Subject: drm/amdgpu: update headers for nbio v7.11 This patch is to update headers for nbio v7.11. Signed-off-by: Yifan Zhang Acked-by: Alex Deucher Reviewed-by: Tim Huang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h index 7ee3d291120d..6f80bfa7e41a 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h @@ -8707,10 +8707,10 @@ #define regBIF_BX1_MM_CFGREGS_CNTL_BASE_IDX 2 #define regBIF_BX1_BX_RESET_CNTL 0x00f0 #define regBIF_BX1_BX_RESET_CNTL_BASE_IDX 2 -#define regBIF_BX1_INTERRUPT_CNTL 0x8e11 -#define regBIF_BX1_INTERRUPT_CNTL_BASE_IDX 5 -#define regBIF_BX1_INTERRUPT_CNTL2 0x8e12 -#define regBIF_BX1_INTERRUPT_CNTL2_BASE_IDX 5 +#define regBIF_BX1_INTERRUPT_CNTL 0x00f1 +#define regBIF_BX1_INTERRUPT_CNTL_BASE_IDX 2 +#define regBIF_BX1_INTERRUPT_CNTL2 0x00f2 +#define regBIF_BX1_INTERRUPT_CNTL2_BASE_IDX 2 #define regBIF_BX1_CLKREQB_PAD_CNTL 0x00f8 #define regBIF_BX1_CLKREQB_PAD_CNTL_BASE_IDX 2 #define regBIF_BX1_BIF_FEATURES_CONTROL_MISC 0x00fb -- cgit v1.2.3 From c9edcc1864f8529fd24441da40a1275232b5efc4 Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Mon, 8 Jan 2024 16:05:27 +0800 Subject: drm/amdgpu: update ATHUB_MISC_CNTL offset for athub v3.3 This patch to update ATHUB_MISC_CNTL offset for athub v3.3 v2: correct a typo (Tim) v3: correct patch title (Lang) Signed-off-by: Yifan Zhang Acked-by: Alex Deucher Reviewed-by: Tim Huang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/athub_v3_0.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c index f0737fb3a999..d1bba9c64e16 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c @@ -30,6 +30,8 @@ #define regATHUB_MISC_CNTL_V3_0_1 0x00d7 #define regATHUB_MISC_CNTL_V3_0_1_BASE_IDX 0 +#define regATHUB_MISC_CNTL_V3_3_0 0x00d8 +#define regATHUB_MISC_CNTL_V3_3_0_BASE_IDX 0 static uint32_t athub_v3_0_get_cg_cntl(struct amdgpu_device *adev) @@ -40,6 +42,9 @@ static uint32_t athub_v3_0_get_cg_cntl(struct amdgpu_device *adev) case IP_VERSION(3, 0, 1): data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_0_1); break; + case IP_VERSION(3, 3, 0): + data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_3_0); + break; default: data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL); break; @@ -53,6 +58,9 @@ static void athub_v3_0_set_cg_cntl(struct amdgpu_device *adev, uint32_t data) case IP_VERSION(3, 0, 1): WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_0_1, data); break; + case IP_VERSION(3, 3, 0): + WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_3_0, data); + break; default: WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data); break; -- cgit v1.2.3 From 6616b5e1999146b1304abe78232af810080c67e3 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Fri, 5 Jan 2024 12:05:09 +0530 Subject: drm/amd/powerplay: Fix kzalloc parameter 'ATOM_Tonga_PPM_Table' in 'get_platform_power_management_table()' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In 'struct phm_ppm_table *ptr' allocation using kzalloc, an incorrect structure type is passed to sizeof() in kzalloc, larger structure types were used, thus using correct type 'struct phm_ppm_table' fixes the below: drivers/gpu/drm/amd/amdgpu/../pm/powerplay/hwmgr/process_pptables_v1_0.c:203 get_platform_power_management_table() warn: struct type mismatch 'phm_ppm_table vs _ATOM_Tonga_PPM_Table' Cc: Eric Huang Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/powerplay/hwmgr/process_pptables_v1_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/process_pptables_v1_0.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/process_pptables_v1_0.c index f2a55c1413f5..17882f8dfdd3 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/process_pptables_v1_0.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/process_pptables_v1_0.c @@ -200,7 +200,7 @@ static int get_platform_power_management_table( struct pp_hwmgr *hwmgr, ATOM_Tonga_PPM_Table *atom_ppm_table) { - struct phm_ppm_table *ptr = kzalloc(sizeof(ATOM_Tonga_PPM_Table), GFP_KERNEL); + struct phm_ppm_table *ptr = kzalloc(sizeof(*ptr), GFP_KERNEL); struct phm_ppt_v1_information *pp_table_information = (struct phm_ppt_v1_information *)(hwmgr->pptable); -- cgit v1.2.3 From 30d8dffab7d00da7fd13ecdb7d41a1f25ed6a4af Mon Sep 17 00:00:00 2001 From: Victor Lu Date: Tue, 19 Dec 2023 11:55:09 -0500 Subject: drm/amdgpu: Do not program VM_L2_CNTL under SRIOV VM_L2_CNTL* should not be programmed on driver unload under SRIOV. These regs are skipped during SRIOV driver init. Signed-off-by: Victor Lu Reviewed-by: Vignesh Chander Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c index 95d06da544e2..49aecdcee006 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c @@ -456,10 +456,12 @@ static void gfxhub_v1_2_xcc_gart_disable(struct amdgpu_device *adev, WREG32_SOC15_RLC(GC, GET_INST(GC, j), regMC_VM_MX_L1_TLB_CNTL, tmp); /* Setup L2 cache */ - tmp = RREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0); - WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL, tmp); - WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL3, 0); + if (!amdgpu_sriov_vf(adev)) { + tmp = RREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0); + WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL, tmp); + WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL3, 0); + } } } -- cgit v1.2.3 From fac4ebd79fed60e79cccafdad45a2bb8d3795044 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Thu, 4 Jan 2024 15:26:42 +0530 Subject: drm/amdgpu: Fix with right return code '-EIO' in 'amdgpu_gmc_vram_checking()' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The amdgpu_gmc_vram_checking() function in emulation checks whether all of the memory range of shared system memory could be accessed by GPU, from this aspect, -EIO is returned for error scenarios. Fixes the below: drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c:919 gmc_v6_0_hw_init() warn: missing error code? 'r' drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c:1103 gmc_v7_0_hw_init() warn: missing error code? 'r' drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c:1223 gmc_v8_0_hw_init() warn: missing error code? 'r' drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c:2344 gmc_v9_0_hw_init() warn: missing error code? 'r' Cc: Xiaojian Du Cc: Lijo Lazar Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Suggested-by: Christian König Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index d2f273d77e59..55784a9f26c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -1045,21 +1045,28 @@ int amdgpu_gmc_vram_checking(struct amdgpu_device *adev) * seconds, so here, we just pick up three parts for emulation. */ ret = memcmp(vram_ptr, cptr, 10); - if (ret) - return ret; + if (ret) { + ret = -EIO; + goto release_buffer; + } ret = memcmp(vram_ptr + (size / 2), cptr, 10); - if (ret) - return ret; + if (ret) { + ret = -EIO; + goto release_buffer; + } ret = memcmp(vram_ptr + size - 10, cptr, 10); - if (ret) - return ret; + if (ret) { + ret = -EIO; + goto release_buffer; + } +release_buffer: amdgpu_bo_free_kernel(&vram_bo, &vram_gpu, &vram_ptr); - return 0; + return ret; } static ssize_t current_memory_partition_show( -- cgit v1.2.3 From 8e8272f0dc22e11b2791dc778b07bd66c208d5a8 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Fri, 5 Jan 2024 10:08:58 +0530 Subject: drm/amdgpu: Fix unsigned comparison with less than zero in vpe_u1_8_from_fraction() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The variables 'numerator' and 'denominator', are unsigned 16-bit integer types, that can never be less than 0. Thus fixing the below: drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c:62 vpe_u1_8_from_fraction() warn: unsigned 'numerator' is never less than zero. drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c:63 vpe_u1_8_from_fraction() warn: unsigned 'denominator' is never less than zero. Cc: Peyton Lee Cc: Lang Yu Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Reviewed-by: Peyton Lee Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c index 6f149b54d4d3..b9a15d51eb5c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c @@ -59,11 +59,8 @@ static inline uint16_t complete_integer_division_u16( static uint16_t vpe_u1_8_from_fraction(uint16_t numerator, uint16_t denominator) { - bool arg1_negative = numerator < 0; - bool arg2_negative = denominator < 0; - - uint16_t arg1_value = (uint16_t)(arg1_negative ? -numerator : numerator); - uint16_t arg2_value = (uint16_t)(arg2_negative ? -denominator : denominator); + u16 arg1_value = numerator; + u16 arg2_value = denominator; uint16_t remainder; @@ -100,9 +97,6 @@ static uint16_t vpe_u1_8_from_fraction(uint16_t numerator, uint16_t denominator) res_value += summand; } - if (arg1_negative ^ arg2_negative) - res_value = -res_value; - return res_value; } -- cgit v1.2.3 From 8a44fdd3cf91debbd09b43bd2519ad2b2486ccf4 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Thu, 21 Dec 2023 18:13:11 +0530 Subject: drm/amdgpu: Release 'adev->pm.fw' before return in 'amdgpu_device_need_post()' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In function 'amdgpu_device_need_post(struct amdgpu_device *adev)' - 'adev->pm.fw' may not be released before return. Using the function release_firmware() to release adev->pm.fw. Thus fixing the below: drivers/gpu/drm/amd/amdgpu/amdgpu_device.c:1571 amdgpu_device_need_post() warn: 'adev->pm.fw' from request_firmware() not released on lines: 1554. Cc: Monk Liu Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Suggested-by: Lijo Lazar Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 5bb444bb36ce..ecbc58269951 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1544,6 +1544,7 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev) return true; fw_ver = *((uint32_t *)adev->pm.fw->data + 69); + release_firmware(adev->pm.fw); if (fw_ver < 0x00160e00) return true; } -- cgit v1.2.3 From 2b9a073b7304f4a9e130d04794c91a0c4f9a5c12 Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Tue, 9 Jan 2024 09:19:22 +0800 Subject: drm/amdgpu: update regGL2C_CTRL4 value in golden setting This patch to update regGL2C_CTRL4 in golden setting. Signed-off-by: Yifan Zhang Reviewed-by: Tim Huang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.7.x --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index c7242877d5d3..0ea0866c261f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -115,7 +115,7 @@ static const struct soc15_reg_golden golden_settings_gc_11_5_0[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3), SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL, 0xffffffff, 0xf37fff3f), SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xfffffffb, 0x00f40188), - SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL4, 0xf0ffffff, 0x8000b007), + SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL4, 0xf0ffffff, 0x80009007), SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf1ffffff, 0x00880007), SOC15_REG_GOLDEN_VALUE(GC, 0, regPC_CONFIG_CNTL_1, 0xffffffff, 0x00010000), SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000), -- cgit v1.2.3 From 7073934f5d73f8b53308963cee36f0d389ea857c Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Mon, 8 Jan 2024 21:20:28 +0530 Subject: drm/amd/display: Fix variable deferencing before NULL check in edp_setup_replay() In edp_setup_replay(), 'struct dc *dc' & 'struct dmub_replay *replay' was dereferenced before the pointer 'link' & 'replay' NULL check. Fixes the below: drivers/gpu/drm/amd/amdgpu/../display/dc/link/protocols/link_edp_panel_control.c:947 edp_setup_replay() warn: variable dereferenced before check 'link' (see line 933) Cc: stable@vger.kernel.org Cc: Bhawanpreet Lakha Cc: Harry Wentland Cc: Rodrigo Siqueira Cc: Aurabindo Pillai Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Reviewed-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- .../amd/display/dc/link/protocols/link_edp_panel_control.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c index 7f1196528218..046d3e205415 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c @@ -930,8 +930,8 @@ bool edp_get_replay_state(const struct dc_link *link, uint64_t *state) bool edp_setup_replay(struct dc_link *link, const struct dc_stream_state *stream) { /* To-do: Setup Replay */ - struct dc *dc = link->ctx->dc; - struct dmub_replay *replay = dc->res_pool->replay; + struct dc *dc; + struct dmub_replay *replay; int i; unsigned int panel_inst; struct replay_context replay_context = { 0 }; @@ -947,6 +947,10 @@ bool edp_setup_replay(struct dc_link *link, const struct dc_stream_state *stream if (!link) return false; + dc = link->ctx->dc; + + replay = dc->res_pool->replay; + if (!replay) return false; @@ -975,8 +979,7 @@ bool edp_setup_replay(struct dc_link *link, const struct dc_stream_state *stream replay_context.line_time_in_ns = lineTimeInNs; - if (replay) - link->replay_settings.replay_feature_enabled = + link->replay_settings.replay_feature_enabled = replay->funcs->replay_copy_settings(replay, link, &replay_context, panel_inst); if (link->replay_settings.replay_feature_enabled) { -- cgit v1.2.3 From 6c5683bd9ecaa7f199c3122c1010ece5d59b1aef Mon Sep 17 00:00:00 2001 From: Le Ma Date: Tue, 9 Jan 2024 12:06:25 +0800 Subject: Revert "drm/amdgpu: add param to specify fw bo location for front-door loading" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit c572abffe9f50c8ba33060865449313b3f588c35. Will use debug module param instead of independent module param. Signed-off-by: Le Ma Reviewed-by: Lijo Lazar Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 -- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 5 ----- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 3 +-- 4 files changed, 2 insertions(+), 10 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 9da14436a373..616b6c911767 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -254,8 +254,6 @@ extern int amdgpu_agp; extern int amdgpu_wbrf; -extern int fw_bo_location; - #define AMDGPU_VM_MAX_NUM_CTX 4096 #define AMDGPU_SG_THRESHOLD (256*1024*1024) #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 852cec98ff26..880137774b4e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -210,7 +210,6 @@ int amdgpu_seamless = -1; /* auto */ uint amdgpu_debug_mask; int amdgpu_agp = -1; /* auto */ int amdgpu_wbrf = -1; -int fw_bo_location = -1; static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work); @@ -990,10 +989,6 @@ MODULE_PARM_DESC(wbrf, "Enable Wifi RFI interference mitigation (0 = disabled, 1 = enabled, -1 = auto(default)"); module_param_named(wbrf, amdgpu_wbrf, int, 0444); -MODULE_PARM_DESC(fw_bo_location, - "location to put firmware bo for frontdoor loading (-1 = auto (default), 0 = on ram, 1 = on vram"); -module_param(fw_bo_location, int, 0644); - /* These devices are not supported by amdgpu. * They are supported by the mach64, r128, radeon drivers */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 2addbdf88394..1bf975b8d083 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -466,7 +466,7 @@ static int psp_sw_init(void *handle) } ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG, - (amdgpu_sriov_vf(adev) || fw_bo_location == 1) ? + amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, &psp->fw_pri_bo, &psp->fw_pri_mc_addr, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index d334e42fe0eb..0efb2568cb65 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -1062,8 +1062,7 @@ int amdgpu_ucode_create_bo(struct amdgpu_device *adev) { if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) { amdgpu_bo_create_kernel(adev, adev->firmware.fw_size, PAGE_SIZE, - (amdgpu_sriov_vf(adev) || fw_bo_location == 1) ? - AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, + amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, &adev->firmware.fw_buf, &adev->firmware.fw_buf_mc, &adev->firmware.fw_buf_ptr); -- cgit v1.2.3 From d20e1aec8862e48a352ca86969cee6f530dd41d5 Mon Sep 17 00:00:00 2001 From: Le Ma Date: Tue, 9 Jan 2024 17:44:39 +0800 Subject: drm/amdgpu: add debug flag to place fw bo on vram for frontdoor loading MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use debug_mask=0x8 param to help isolating data path issues on new systems in early phase. v2: rename the flag for explicitness (lijo) Signed-off-by: Le Ma Reviewed-by: Lijo Lazar Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 6 ++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 3 ++- 4 files changed, 10 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 616b6c911767..3d8a48f46b01 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1144,6 +1144,7 @@ struct amdgpu_device { bool debug_vm; bool debug_largebar; bool debug_disable_soft_recovery; + bool debug_use_vram_fw_buf; }; static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 880137774b4e..0776b0c5e4e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -128,6 +128,7 @@ enum AMDGPU_DEBUG_MASK { AMDGPU_DEBUG_VM = BIT(0), AMDGPU_DEBUG_LARGEBAR = BIT(1), AMDGPU_DEBUG_DISABLE_GPU_SOFT_RECOVERY = BIT(2), + AMDGPU_DEBUG_USE_VRAM_FW_BUF = BIT(3), }; unsigned int amdgpu_vram_limit = UINT_MAX; @@ -2117,6 +2118,11 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev) pr_info("debug: soft reset for GPU recovery disabled\n"); adev->debug_disable_soft_recovery = true; } + + if (amdgpu_debug_mask & AMDGPU_DEBUG_USE_VRAM_FW_BUF) { + pr_info("debug: place fw in vram for frontdoor loading\n"); + adev->debug_use_vram_fw_buf = true; + } } static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 1bf975b8d083..0328616473f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -466,7 +466,7 @@ static int psp_sw_init(void *handle) } ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG, - amdgpu_sriov_vf(adev) ? + (amdgpu_sriov_vf(adev) || adev->debug_use_vram_fw_buf) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, &psp->fw_pri_bo, &psp->fw_pri_mc_addr, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 0efb2568cb65..3e12763e477a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -1062,7 +1062,8 @@ int amdgpu_ucode_create_bo(struct amdgpu_device *adev) { if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) { amdgpu_bo_create_kernel(adev, adev->firmware.fw_size, PAGE_SIZE, - amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, + (amdgpu_sriov_vf(adev) || adev->debug_use_vram_fw_buf) ? + AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, &adev->firmware.fw_buf, &adev->firmware.fw_buf_mc, &adev->firmware.fw_buf_ptr); -- cgit v1.2.3 From 51258acdc4758d43f03ec9cab6f3fa72a2838f0e Mon Sep 17 00:00:00 2001 From: Le Ma Date: Tue, 9 Jan 2024 18:06:35 +0800 Subject: drm/amdgpu: move debug options init prior to amdgpu device init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To bring debug options into effect in early initialization phase Signed-off-by: Le Ma Reviewed-by: Lijo Lazar Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 0776b0c5e4e4..5c9caf5fa075 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2234,6 +2234,8 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, pci_set_drvdata(pdev, ddev); + amdgpu_init_debug_options(adev); + ret = amdgpu_driver_load_kms(adev, flags); if (ret) goto err_pci; @@ -2314,8 +2316,6 @@ retry_init: amdgpu_get_secondary_funcs(adev); } - amdgpu_init_debug_options(adev); - return 0; err_pci: -- cgit v1.2.3 From c3d5e297dcae88274dc6924db337a2159279eced Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 9 Jan 2024 10:45:42 -0500 Subject: drm/amdgpu: drop exp hw support check for GC 9.4.3 No longer needed. Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.7.x --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 0431eafa86b5..c7d60dd0fb97 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -1963,8 +1963,6 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); break; case IP_VERSION(9, 4, 3): - if (!amdgpu_exp_hw_support) - return -EINVAL; amdgpu_device_ip_block_add(adev, &gfx_v9_4_3_ip_block); break; case IP_VERSION(10, 1, 10): -- cgit v1.2.3 From d7a254fad873775ce6c32b77796c81e81e6b7f2e Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Tue, 9 Jan 2024 16:57:26 +0530 Subject: drm/amdkfd: Fix 'node' NULL check in 'svm_range_get_range_boundaries()' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Range interval [start, last] is ordered by rb_tree, rb_prev, rb_next return value still needs NULL check, thus modified from "node" to "rb_node". Fixes the below: drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_svm.c:2691 svm_range_get_range_boundaries() warn: can 'node' even be NULL? Suggested-by: Philip Yang Cc: Felix Kuehling Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 4e9f07c7a937..c50a0dc9c9c0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -2654,6 +2654,7 @@ svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr, { struct vm_area_struct *vma; struct interval_tree_node *node; + struct rb_node *rb_node; unsigned long start_limit, end_limit; vma = vma_lookup(p->mm, addr << PAGE_SHIFT); @@ -2673,16 +2674,15 @@ svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr, if (node) { end_limit = min(end_limit, node->start); /* Last range that ends before the fault address */ - node = container_of(rb_prev(&node->rb), - struct interval_tree_node, rb); + rb_node = rb_prev(&node->rb); } else { /* Last range must end before addr because * there was no range after addr */ - node = container_of(rb_last(&p->svms.objects.rb_root), - struct interval_tree_node, rb); + rb_node = rb_last(&p->svms.objects.rb_root); } - if (node) { + if (rb_node) { + node = container_of(rb_node, struct interval_tree_node, rb); if (node->last >= addr) { WARN(1, "Overlap with prev node and page fault addr\n"); return -EFAULT; -- cgit v1.2.3 From 91739a897c12dcec699e53f390be1b4abdeef3a0 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Thu, 11 Jan 2024 09:47:33 +0530 Subject: drm/amd/pm: Add error log for smu v13.0.6 reset For all mode-2 reset fail cases, add error log. Signed-off-by: Lijo Lazar Reviewed-by: Asad Kamal Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.7.x --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 4ebc6b421c2c..7513d1cfeebd 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -2235,17 +2235,18 @@ static int smu_v13_0_6_mode2_reset(struct smu_context *smu) continue; } - if (ret) { - dev_err(adev->dev, - "failed to send mode2 message \tparam: 0x%08x error code %d\n", - SMU_RESET_MODE_2, ret); + if (ret) goto out; - } + } while (ret == -ETIME && timeout); out: mutex_unlock(&smu->message_lock); + if (ret) + dev_err(adev->dev, "failed to send mode2 reset, error code %d", + ret); + return ret; } -- cgit v1.2.3 From a992c90d8ed3929b70ae815ce21ca5651cc0a692 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Thu, 11 Jan 2024 15:28:53 +0530 Subject: drm/amd/pm: Fix smuv13.0.6 current clock reporting When current clock is equal to max dpm level clock, the level is not indicated correctly with *. Fix by comparing current clock against dpm level value. Signed-off-by: Lijo Lazar Reviewed-by: Asad Kamal Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.7.x --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 7513d1cfeebd..a28649f21093 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -970,7 +970,9 @@ static int smu_v13_0_6_print_clks(struct smu_context *smu, char *buf, int size, if (i < (clocks.num_levels - 1)) clk2 = clocks.data[i + 1].clocks_in_khz / 1000; - if (curr_clk >= clk1 && curr_clk < clk2) { + if (curr_clk == clk1) { + level = i; + } else if (curr_clk >= clk1 && curr_clk < clk2) { level = (curr_clk - clk1) <= (clk2 - curr_clk) ? i : i + 1; -- cgit v1.2.3 From d7643fe6fb76edb1f2f1497bf5e8b8f4774b5129 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 10 Jan 2024 13:46:47 -0700 Subject: drm/amd/display: Avoid enum conversion warning Clang warns (or errors with CONFIG_WERROR=y) when performing arithmetic with different enumerated types, which is usually a bug: drivers/gpu/drm/amd/amdgpu/../display/dc/link/protocols/link_dp_dpia_bw.c:548:24: error: arithmetic between different enumeration types ('const enum dc_link_rate' and 'const enum dc_lane_count') [-Werror,-Wenum-enum-conversion] 548 | link_cap->link_rate * link_cap->lane_count * LINK_RATE_REF_FREQ_IN_KHZ * 8; | ~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~~~~~~~ 1 error generated. In this case, there is not a problem because the enumerated types are basically treated as '#define' values. Add an explicit cast to an integral type to silence the warning. Closes: https://github.com/ClangBuiltLinux/linux/issues/1976 Fixes: 5f3bce13266e ("drm/amd/display: Request usb4 bw for mst streams") Signed-off-by: Nathan Chancellor Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c index 4ef1a6a1d129..dd0d2b206462 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c @@ -544,8 +544,9 @@ int link_dp_dpia_get_dp_overhead_in_dp_tunneling(struct dc_link *link) */ const struct dc_link_settings *link_cap = dc_link_get_link_cap(link); - uint32_t link_bw_in_kbps = - link_cap->link_rate * link_cap->lane_count * LINK_RATE_REF_FREQ_IN_KHZ * 8; + uint32_t link_bw_in_kbps = (uint32_t)link_cap->link_rate * + (uint32_t)link_cap->lane_count * + LINK_RATE_REF_FREQ_IN_KHZ * 8; link_mst_overhead = (link_bw_in_kbps / 64) + ((link_bw_in_kbps % 64) ? 1 : 0); } -- cgit v1.2.3 From e965a707276760cc010eb77fba64b08ee9e8781f Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 23 Nov 2023 10:40:21 +0100 Subject: drm: remove I2C_CLASS_DDC support After removal of the legacy EEPROM driver and I2C_CLASS_DDC support in olpc_dcon there's no i2c client driver left supporting I2C_CLASS_DDC. Class-based device auto-detection is a legacy mechanism and shouldn't be used in new code. So we can remove this class completely now. Acked-by: Alex Deucher Acked-by: Dmitry Baryshkov Acked-by: Harry Wentland Acked-by: Heiko Stuebner Acked-by: Jani Nikula Acked-by: Jernej Skrabec Reviewed-by: Thomas Zimmermann Reviewed-by: Andi Shyti Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Neil Armstrong Signed-off-by: Heiner Kallweit Signed-off-by: Wolfram Sang --- drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c | 1 - drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 - drivers/gpu/drm/ast/ast_i2c.c | 1 - drivers/gpu/drm/bridge/synopsys/dw-hdmi.c | 1 - drivers/gpu/drm/display/drm_dp_helper.c | 1 - drivers/gpu/drm/display/drm_dp_mst_topology.c | 1 - drivers/gpu/drm/gma500/cdv_intel_dp.c | 1 - drivers/gpu/drm/gma500/intel_gmbus.c | 1 - drivers/gpu/drm/gma500/oaktrail_hdmi_i2c.c | 1 - drivers/gpu/drm/gma500/psb_intel_sdvo.c | 1 - drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c | 1 - drivers/gpu/drm/i915/display/intel_gmbus.c | 1 - drivers/gpu/drm/i915/display/intel_sdvo.c | 1 - drivers/gpu/drm/loongson/lsdc_i2c.c | 1 - drivers/gpu/drm/mediatek/mtk_hdmi_ddc.c | 1 - drivers/gpu/drm/mgag200/mgag200_i2c.c | 1 - drivers/gpu/drm/msm/hdmi/hdmi_i2c.c | 1 - drivers/gpu/drm/radeon/radeon_i2c.c | 1 - drivers/gpu/drm/rockchip/inno_hdmi.c | 1 - drivers/gpu/drm/rockchip/rk3066_hdmi.c | 1 - drivers/gpu/drm/sun4i/sun4i_hdmi_i2c.c | 1 - 21 files changed, 21 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c index 82608df43396..d79cb13e1aa8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c @@ -175,7 +175,6 @@ struct amdgpu_i2c_chan *amdgpu_i2c_create(struct drm_device *dev, i2c->rec = *rec; i2c->adapter.owner = THIS_MODULE; - i2c->adapter.class = I2C_CLASS_DDC; i2c->adapter.dev.parent = dev->dev; i2c->dev = dev; i2c_set_adapdata(&i2c->adapter, i2c); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index ee97814ebd99..bbd511567a73 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -7533,7 +7533,6 @@ create_i2c(struct ddc_service *ddc_service, if (!i2c) return NULL; i2c->base.owner = THIS_MODULE; - i2c->base.class = I2C_CLASS_DDC; i2c->base.dev.parent = &adev->pdev->dev; i2c->base.algo = &amdgpu_dm_i2c_algo; snprintf(i2c->base.name, sizeof(i2c->base.name), "AMDGPU DM i2c hw bus %d", link_index); diff --git a/drivers/gpu/drm/ast/ast_i2c.c b/drivers/gpu/drm/ast/ast_i2c.c index 0e845e7acd9b..e5d3f7121de4 100644 --- a/drivers/gpu/drm/ast/ast_i2c.c +++ b/drivers/gpu/drm/ast/ast_i2c.c @@ -120,7 +120,6 @@ struct ast_i2c_chan *ast_i2c_create(struct drm_device *dev) return NULL; i2c->adapter.owner = THIS_MODULE; - i2c->adapter.class = I2C_CLASS_DDC; i2c->adapter.dev.parent = dev->dev; i2c->dev = dev; i2c_set_adapdata(&i2c->adapter, i2c); diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c index 52d91a0df85e..aca5bb0866f8 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c @@ -515,7 +515,6 @@ static struct i2c_adapter *dw_hdmi_i2c_adapter(struct dw_hdmi *hdmi) init_completion(&i2c->cmp); adap = &i2c->adap; - adap->class = I2C_CLASS_DDC; adap->owner = THIS_MODULE; adap->dev.parent = hdmi->dev; adap->algo = &dw_hdmi_algorithm; diff --git a/drivers/gpu/drm/display/drm_dp_helper.c b/drivers/gpu/drm/display/drm_dp_helper.c index f3680f4e6970..ac901f4b48f2 100644 --- a/drivers/gpu/drm/display/drm_dp_helper.c +++ b/drivers/gpu/drm/display/drm_dp_helper.c @@ -2102,7 +2102,6 @@ int drm_dp_aux_register(struct drm_dp_aux *aux) if (!aux->ddc.algo) drm_dp_aux_init(aux); - aux->ddc.class = I2C_CLASS_DDC; aux->ddc.owner = THIS_MODULE; aux->ddc.dev.parent = aux->dev; diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c index 0e0d0e76de06..4376e2c1f2d4 100644 --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c @@ -5803,7 +5803,6 @@ static int drm_dp_mst_register_i2c_bus(struct drm_dp_mst_port *port) aux->ddc.algo_data = aux; aux->ddc.retries = 3; - aux->ddc.class = I2C_CLASS_DDC; aux->ddc.owner = THIS_MODULE; /* FIXME: set the kdev of the port's connector as parent */ aux->ddc.dev.parent = parent_dev; diff --git a/drivers/gpu/drm/gma500/cdv_intel_dp.c b/drivers/gpu/drm/gma500/cdv_intel_dp.c index 8992a95076f2..dd1eb7e9877d 100644 --- a/drivers/gpu/drm/gma500/cdv_intel_dp.c +++ b/drivers/gpu/drm/gma500/cdv_intel_dp.c @@ -855,7 +855,6 @@ cdv_intel_dp_i2c_init(struct gma_connector *connector, memset(&intel_dp->adapter, '\0', sizeof (intel_dp->adapter)); intel_dp->adapter.owner = THIS_MODULE; - intel_dp->adapter.class = I2C_CLASS_DDC; strncpy (intel_dp->adapter.name, name, sizeof(intel_dp->adapter.name) - 1); intel_dp->adapter.name[sizeof(intel_dp->adapter.name) - 1] = '\0'; intel_dp->adapter.algo_data = &intel_dp->algo; diff --git a/drivers/gpu/drm/gma500/intel_gmbus.c b/drivers/gpu/drm/gma500/intel_gmbus.c index 09cedabf4776..aa45509859f2 100644 --- a/drivers/gpu/drm/gma500/intel_gmbus.c +++ b/drivers/gpu/drm/gma500/intel_gmbus.c @@ -411,7 +411,6 @@ int gma_intel_setup_gmbus(struct drm_device *dev) struct intel_gmbus *bus = &dev_priv->gmbus[i]; bus->adapter.owner = THIS_MODULE; - bus->adapter.class = I2C_CLASS_DDC; snprintf(bus->adapter.name, sizeof(bus->adapter.name), "gma500 gmbus %s", diff --git a/drivers/gpu/drm/gma500/oaktrail_hdmi_i2c.c b/drivers/gpu/drm/gma500/oaktrail_hdmi_i2c.c index fc9a34ed58bd..6daa6669ed23 100644 --- a/drivers/gpu/drm/gma500/oaktrail_hdmi_i2c.c +++ b/drivers/gpu/drm/gma500/oaktrail_hdmi_i2c.c @@ -168,7 +168,6 @@ static struct i2c_adapter oaktrail_hdmi_i2c_adapter = { .name = "oaktrail_hdmi_i2c", .nr = 3, .owner = THIS_MODULE, - .class = I2C_CLASS_DDC, .algo = &oaktrail_hdmi_i2c_algorithm, }; diff --git a/drivers/gpu/drm/gma500/psb_intel_sdvo.c b/drivers/gpu/drm/gma500/psb_intel_sdvo.c index d6fd5d726216..e4f914deceba 100644 --- a/drivers/gpu/drm/gma500/psb_intel_sdvo.c +++ b/drivers/gpu/drm/gma500/psb_intel_sdvo.c @@ -2426,7 +2426,6 @@ psb_intel_sdvo_init_ddc_proxy(struct psb_intel_sdvo *sdvo, struct drm_device *dev) { sdvo->ddc.owner = THIS_MODULE; - sdvo->ddc.class = I2C_CLASS_DDC; snprintf(sdvo->ddc.name, I2C_NAME_SIZE, "SDVO DDC proxy"); sdvo->ddc.dev.parent = dev->dev; sdvo->ddc.algo_data = sdvo; diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c index 410bd019bb35..e6e48651c15c 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c @@ -81,7 +81,6 @@ int hibmc_ddc_create(struct drm_device *drm_dev, struct hibmc_connector *connector) { connector->adapter.owner = THIS_MODULE; - connector->adapter.class = I2C_CLASS_DDC; snprintf(connector->adapter.name, I2C_NAME_SIZE, "HIS i2c bit bus"); connector->adapter.dev.parent = drm_dev->dev; i2c_set_adapdata(&connector->adapter, connector); diff --git a/drivers/gpu/drm/i915/display/intel_gmbus.c b/drivers/gpu/drm/i915/display/intel_gmbus.c index 40d7b6f3f489..e9e4dcf345f9 100644 --- a/drivers/gpu/drm/i915/display/intel_gmbus.c +++ b/drivers/gpu/drm/i915/display/intel_gmbus.c @@ -899,7 +899,6 @@ int intel_gmbus_setup(struct drm_i915_private *i915) } bus->adapter.owner = THIS_MODULE; - bus->adapter.class = I2C_CLASS_DDC; snprintf(bus->adapter.name, sizeof(bus->adapter.name), "i915 gmbus %s", gmbus_pin->name); diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index a636f42ceae5..5e64d1baffe8 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -3311,7 +3311,6 @@ intel_sdvo_init_ddc_proxy(struct intel_sdvo_ddc *ddc, ddc->ddc_bus = ddc_bus; ddc->ddc.owner = THIS_MODULE; - ddc->ddc.class = I2C_CLASS_DDC; snprintf(ddc->ddc.name, I2C_NAME_SIZE, "SDVO %c DDC%d", port_name(sdvo->base.port), ddc_bus); ddc->ddc.dev.parent = &pdev->dev; diff --git a/drivers/gpu/drm/loongson/lsdc_i2c.c b/drivers/gpu/drm/loongson/lsdc_i2c.c index 9625d0b1d0b4..ce90c25536d2 100644 --- a/drivers/gpu/drm/loongson/lsdc_i2c.c +++ b/drivers/gpu/drm/loongson/lsdc_i2c.c @@ -154,7 +154,6 @@ int lsdc_create_i2c_chan(struct drm_device *ddev, adapter = &li2c->adapter; adapter->algo_data = &li2c->bit; adapter->owner = THIS_MODULE; - adapter->class = I2C_CLASS_DDC; adapter->dev.parent = ddev->dev; adapter->nr = -1; diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi_ddc.c b/drivers/gpu/drm/mediatek/mtk_hdmi_ddc.c index d675c954befe..54e46e440e0f 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi_ddc.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi_ddc.c @@ -297,7 +297,6 @@ static int mtk_hdmi_ddc_probe(struct platform_device *pdev) strscpy(ddc->adap.name, "mediatek-hdmi-ddc", sizeof(ddc->adap.name)); ddc->adap.owner = THIS_MODULE; - ddc->adap.class = I2C_CLASS_DDC; ddc->adap.algo = &mtk_hdmi_ddc_algorithm; ddc->adap.retries = 3; ddc->adap.dev.of_node = dev->of_node; diff --git a/drivers/gpu/drm/mgag200/mgag200_i2c.c b/drivers/gpu/drm/mgag200/mgag200_i2c.c index 0c48bdf3e7f8..423eb302be7e 100644 --- a/drivers/gpu/drm/mgag200/mgag200_i2c.c +++ b/drivers/gpu/drm/mgag200/mgag200_i2c.c @@ -106,7 +106,6 @@ int mgag200_i2c_init(struct mga_device *mdev, struct mga_i2c_chan *i2c) i2c->data = BIT(info->i2c.data_bit); i2c->clock = BIT(info->i2c.clock_bit); i2c->adapter.owner = THIS_MODULE; - i2c->adapter.class = I2C_CLASS_DDC; i2c->adapter.dev.parent = dev->dev; i2c->dev = dev; i2c_set_adapdata(&i2c->adapter, i2c); diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_i2c.c b/drivers/gpu/drm/msm/hdmi/hdmi_i2c.c index de182c004843..7aa500d24240 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_i2c.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_i2c.c @@ -249,7 +249,6 @@ struct i2c_adapter *msm_hdmi_i2c_init(struct hdmi *hdmi) i2c->owner = THIS_MODULE; - i2c->class = I2C_CLASS_DDC; snprintf(i2c->name, sizeof(i2c->name), "msm hdmi i2c"); i2c->dev.parent = &hdmi->pdev->dev; i2c->algo = &msm_hdmi_i2c_algorithm; diff --git a/drivers/gpu/drm/radeon/radeon_i2c.c b/drivers/gpu/drm/radeon/radeon_i2c.c index 314d066e68e9..3d174390a8af 100644 --- a/drivers/gpu/drm/radeon/radeon_i2c.c +++ b/drivers/gpu/drm/radeon/radeon_i2c.c @@ -918,7 +918,6 @@ struct radeon_i2c_chan *radeon_i2c_create(struct drm_device *dev, i2c->rec = *rec; i2c->adapter.owner = THIS_MODULE; - i2c->adapter.class = I2C_CLASS_DDC; i2c->adapter.dev.parent = dev->dev; i2c->dev = dev; i2c_set_adapdata(&i2c->adapter, i2c); diff --git a/drivers/gpu/drm/rockchip/inno_hdmi.c b/drivers/gpu/drm/rockchip/inno_hdmi.c index 6e5b922a121e..a7739b27c42b 100644 --- a/drivers/gpu/drm/rockchip/inno_hdmi.c +++ b/drivers/gpu/drm/rockchip/inno_hdmi.c @@ -793,7 +793,6 @@ static struct i2c_adapter *inno_hdmi_i2c_adapter(struct inno_hdmi *hdmi) init_completion(&i2c->cmp); adap = &i2c->adap; - adap->class = I2C_CLASS_DDC; adap->owner = THIS_MODULE; adap->dev.parent = hdmi->dev; adap->dev.of_node = hdmi->dev->of_node; diff --git a/drivers/gpu/drm/rockchip/rk3066_hdmi.c b/drivers/gpu/drm/rockchip/rk3066_hdmi.c index fa6e592e0276..7a3f71aa2ca9 100644 --- a/drivers/gpu/drm/rockchip/rk3066_hdmi.c +++ b/drivers/gpu/drm/rockchip/rk3066_hdmi.c @@ -725,7 +725,6 @@ static struct i2c_adapter *rk3066_hdmi_i2c_adapter(struct rk3066_hdmi *hdmi) init_completion(&i2c->cmpltn); adap = &i2c->adap; - adap->class = I2C_CLASS_DDC; adap->owner = THIS_MODULE; adap->dev.parent = hdmi->dev; adap->dev.of_node = hdmi->dev->of_node; diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi_i2c.c b/drivers/gpu/drm/sun4i/sun4i_hdmi_i2c.c index d1a65a921f5a..f5f62eb0eeca 100644 --- a/drivers/gpu/drm/sun4i/sun4i_hdmi_i2c.c +++ b/drivers/gpu/drm/sun4i/sun4i_hdmi_i2c.c @@ -302,7 +302,6 @@ int sun4i_hdmi_i2c_create(struct device *dev, struct sun4i_hdmi *hdmi) return -ENOMEM; adap->owner = THIS_MODULE; - adap->class = I2C_CLASS_DDC; adap->algo = &sun4i_hdmi_i2c_algorithm; strscpy(adap->name, "sun4i_hdmi_i2c adapter", sizeof(adap->name)); i2c_set_adapdata(adap, hdmi); -- cgit v1.2.3 From f21682b362b67833e4f4f481c30abcb432861b0c Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 13 Nov 2023 12:37:15 +0100 Subject: drm/amd/pm: Remove I2C_CLASS_SPD support I2C_CLASS_SPD was used to expose the EEPROM content to user space, via the legacy eeprom driver. Now that this driver has been removed, we can remove I2C_CLASS_SPD support. at24 driver with explicit instantiation should be used instead. Signed-off-by: Heiner Kallweit Acked-by: Alex Deucher Signed-off-by: Wolfram Sang --- drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c | 1 - drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 1 - drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 1 - 3 files changed, 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index 1a6675d70a4b..bc56a29e63fa 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -1527,7 +1527,6 @@ static int aldebaran_i2c_control_init(struct smu_context *smu) smu_i2c->port = 0; mutex_init(&smu_i2c->mutex); control->owner = THIS_MODULE; - control->class = I2C_CLASS_SPD; control->dev.parent = &adev->pdev->dev; control->algo = &aldebaran_i2c_algo; snprintf(control->name, sizeof(control->name), "AMDGPU SMU 0"); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 82c4e1f1c6f0..c0e62bab97a1 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -2720,7 +2720,6 @@ static int smu_v13_0_0_i2c_control_init(struct smu_context *smu) smu_i2c->port = i; mutex_init(&smu_i2c->mutex); control->owner = THIS_MODULE; - control->class = I2C_CLASS_SPD; control->dev.parent = &adev->pdev->dev; control->algo = &smu_v13_0_0_i2c_algo; snprintf(control->name, sizeof(control->name), "AMDGPU SMU %d", i); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 0e5a77c3c2e2..8bedd8d2ab2f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -1884,7 +1884,6 @@ static int smu_v13_0_6_i2c_control_init(struct smu_context *smu) smu_i2c->port = i; mutex_init(&smu_i2c->mutex); control->owner = THIS_MODULE; - control->class = I2C_CLASS_SPD; control->dev.parent = &adev->pdev->dev; control->algo = &smu_v13_0_6_i2c_algo; snprintf(control->name, sizeof(control->name), "AMDGPU SMU %d", i); -- cgit v1.2.3 From 3bb9b1f958c3d986ed90a3ff009f1e77e9553207 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Wed, 10 Jan 2024 20:58:35 +0530 Subject: drm/amd/display: Fix late derefrence 'dsc' check in 'link_set_dsc_pps_packet()' In link_set_dsc_pps_packet(), 'struct display_stream_compressor *dsc' was dereferenced in a DC_LOGGER_INIT(dsc->ctx->logger); before the 'dsc' NULL pointer check. Fixes the below: drivers/gpu/drm/amd/amdgpu/../display/dc/link/link_dpms.c:905 link_set_dsc_pps_packet() warn: variable dereferenced before check 'dsc' (see line 903) Cc: stable@vger.kernel.org Cc: Aurabindo Pillai Cc: Rodrigo Siqueira Cc: Hamza Mahfooz Cc: Wenjing Liu Cc: Qingqing Zhuo Signed-off-by: Srinivasan Shanmugam Reviewed-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/link/link_dpms.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c index 3de148004c06..3cbfbf8d107e 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c @@ -900,11 +900,15 @@ bool link_set_dsc_pps_packet(struct pipe_ctx *pipe_ctx, bool enable, bool immedi { struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc; struct dc_stream_state *stream = pipe_ctx->stream; - DC_LOGGER_INIT(dsc->ctx->logger); - if (!pipe_ctx->stream->timing.flags.DSC || !dsc) + if (!pipe_ctx->stream->timing.flags.DSC) return false; + if (!dsc) + return false; + + DC_LOGGER_INIT(dsc->ctx->logger); + if (enable) { struct dsc_config dsc_cfg; uint8_t dsc_packed_pps[128]; -- cgit v1.2.3 From aa36d8971fccb55ef3241cbfff9d1799e31d8628 Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Thu, 28 Dec 2023 21:36:39 -0500 Subject: drm/amd/display: Init link enc resources in dc_state only if res_pool presents [Why & How] res_pool is not initialized in all situations such as virtual environments, and therefore link encoder resources should not be initialized if res_pool is NULL. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Martin Leung Acked-by: Alex Hung Signed-off-by: Dillon Varone Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_state.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_state.c b/drivers/gpu/drm/amd/display/dc/core/dc_state.c index 460a8010c79f..56feee0ff01b 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_state.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_state.c @@ -267,7 +267,8 @@ void dc_state_construct(struct dc *dc, struct dc_state *state) state->clk_mgr = dc->clk_mgr; /* Initialise DIG link encoder resource tracking variables. */ - link_enc_cfg_init(dc, state); + if (dc->res_pool) + link_enc_cfg_init(dc, state); } void dc_state_destruct(struct dc_state *state) -- cgit v1.2.3 From 8a51cc097dd590a86e8eec5398934ef389ff9a7b Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Thu, 28 Dec 2023 13:19:33 -0500 Subject: drm/amd/display: Add logging resource checks [Why] When mapping resources, resources could be unavailable. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Sung joon Kim Acked-by: Alex Hung Signed-off-by: Charlene Liu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 4 +++- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 4 ++++ drivers/gpu/drm/amd/display/dc/core/dc_state.c | 5 +++-- 3 files changed, 10 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 69e726630241..aa7c02ba948e 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -3522,7 +3522,7 @@ static void commit_planes_for_stream(struct dc *dc, top_pipe_to_program = resource_get_otg_master_for_stream( &context->res_ctx, stream); - + ASSERT(top_pipe_to_program != NULL); for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i]; @@ -4345,6 +4345,8 @@ static bool should_commit_minimal_transition_for_windowed_mpo_odm(struct dc *dc, cur_pipe = resource_get_otg_master_for_stream(&dc->current_state->res_ctx, stream); new_pipe = resource_get_otg_master_for_stream(&context->res_ctx, stream); + if (!cur_pipe || !new_pipe) + return false; cur_is_odm_in_use = resource_get_odm_slice_count(cur_pipe) > 1; new_is_odm_in_use = resource_get_odm_slice_count(new_pipe) > 1; if (cur_is_odm_in_use == new_is_odm_in_use) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index f2abc1096ffb..9fbdb09697fd 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -2194,6 +2194,10 @@ void resource_log_pipe_topology_update(struct dc *dc, struct dc_state *state) for (stream_idx = 0; stream_idx < state->stream_count; stream_idx++) { otg_master = resource_get_otg_master_for_stream( &state->res_ctx, state->streams[stream_idx]); + if (!otg_master || otg_master->stream_res.tg == NULL) { + DC_LOG_DC("topology update: otg_master NULL stream_idx %d!\n", stream_idx); + return; + } slice_count = resource_get_opp_heads_for_otg_master(otg_master, &state->res_ctx, opp_heads); for (slice_idx = 0; slice_idx < slice_count; slice_idx++) { diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_state.c b/drivers/gpu/drm/amd/display/dc/core/dc_state.c index 56feee0ff01b..88c6436b28b6 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_state.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_state.c @@ -434,8 +434,9 @@ bool dc_state_add_plane( otg_master_pipe = resource_get_otg_master_for_stream( &state->res_ctx, stream); - added = resource_append_dpp_pipes_for_plane_composition(state, - dc->current_state, pool, otg_master_pipe, plane_state); + if (otg_master_pipe) + added = resource_append_dpp_pipes_for_plane_composition(state, + dc->current_state, pool, otg_master_pipe, plane_state); if (added) { stream_status->plane_states[stream_status->plane_count] = -- cgit v1.2.3 From 4b56f7d47be87cde5f368b67bc7fac53a2c3e8d2 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Fri, 15 Dec 2023 11:01:42 -0500 Subject: drm/amd/display: Port DENTIST hang and TDR fixes to OTG disable W/A [Why] We can experience DENTIST hangs during optimize_bandwidth or TDRs if FIFO is toggled and hangs. [How] Port the DCN35 fixes to DCN314. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Charlene Liu Acked-by: Alex Hung Signed-off-by: Nicholas Kazlauskas Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c index 878c0e7b78ab..a84f1e376dee 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c @@ -145,30 +145,27 @@ static int dcn314_get_active_display_cnt_wa( return display_count; } -static void dcn314_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context, bool disable) +static void dcn314_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context, + bool safe_to_lower, bool disable) { struct dc *dc = clk_mgr_base->ctx->dc; int i; for (i = 0; i < dc->res_pool->pipe_count; ++i) { - struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + struct pipe_ctx *pipe = safe_to_lower + ? &context->res_ctx.pipe_ctx[i] + : &dc->current_state->res_ctx.pipe_ctx[i]; if (pipe->top_pipe || pipe->prev_odm_pipe) continue; if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal))) { - struct stream_encoder *stream_enc = pipe->stream_res.stream_enc; - if (disable) { - if (stream_enc && stream_enc->funcs->disable_fifo) - pipe->stream_res.stream_enc->funcs->disable_fifo(stream_enc); + if (pipe->stream_res.tg && pipe->stream_res.tg->funcs->immediate_disable_crtc) + pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg); - pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg); reset_sync_context_for_pipe(dc, context, i); } else { pipe->stream_res.tg->funcs->enable_crtc(pipe->stream_res.tg); - - if (stream_enc && stream_enc->funcs->enable_fifo) - pipe->stream_res.stream_enc->funcs->enable_fifo(stream_enc); } } } @@ -297,11 +294,11 @@ void dcn314_update_clocks(struct clk_mgr *clk_mgr_base, } if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) { - dcn314_disable_otg_wa(clk_mgr_base, context, true); + dcn314_disable_otg_wa(clk_mgr_base, context, safe_to_lower, true); clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz; dcn314_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz); - dcn314_disable_otg_wa(clk_mgr_base, context, false); + dcn314_disable_otg_wa(clk_mgr_base, context, safe_to_lower, false); update_dispclk = true; } -- cgit v1.2.3 From 3ba2a0bfd8cf94eb225e1c60dff16e5c35bde1da Mon Sep 17 00:00:00 2001 From: Ilya Bakoulin Date: Wed, 3 Jan 2024 09:42:04 -0500 Subject: drm/amd/display: Clear OPTC mem select on disable [Why] Not clearing the memory select bits prior to OPTC disable can cause DSC corruption issues when attempting to reuse a memory instance for another OPTC that enables ODM. [How] Clear the memory select bits prior to disabling an OPTC. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Charlene Liu Acked-by: Alex Hung Signed-off-by: Ilya Bakoulin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c | 3 +++ drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c | 3 +++ 2 files changed, 6 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c index 1788eb29474b..823493543325 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c @@ -173,6 +173,9 @@ static bool optc32_disable_crtc(struct timing_generator *optc) OPTC_SEG3_SRC_SEL, 0xf, OPTC_NUM_OF_INPUT_SEGMENT, 0); + REG_UPDATE(OPTC_MEMORY_CONFIG, + OPTC_MEM_SEL, 0); + /* disable otg request until end of the first line * in the vertical blank region */ diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c index 3d6c1b2c2b4d..5b1547508850 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c @@ -145,6 +145,9 @@ static bool optc35_disable_crtc(struct timing_generator *optc) OPTC_SEG3_SRC_SEL, 0xf, OPTC_NUM_OF_INPUT_SEGMENT, 0); + REG_UPDATE(OPTC_MEMORY_CONFIG, + OPTC_MEM_SEL, 0); + /* disable otg request until end of the first line * in the vertical blank region */ -- cgit v1.2.3 From d3579f5df0536c2f0fabaa3ea80bb2d179884195 Mon Sep 17 00:00:00 2001 From: Ovidiu Bunea Date: Mon, 18 Dec 2023 21:40:45 -0500 Subject: drm/amd/display: Fix DML2 watermark calculation [Why] core_mode_programming in DML2 should output watermark calculations to locals, but it incorrectly uses mode_lib [How] update code to match HW DML2 Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Charlene Liu Acked-by: Alex Hung Signed-off-by: Ovidiu Bunea Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c index a6b938a12de1..9be5ebf3a8c0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c @@ -9446,13 +9446,13 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc CalculateWatermarks_params->CompressedBufferSizeInkByte = locals->CompressedBufferSizeInkByte; // Output - CalculateWatermarks_params->Watermark = &s->dummy_watermark; // Watermarks *Watermark - CalculateWatermarks_params->DRAMClockChangeSupport = &mode_lib->ms.support.DRAMClockChangeSupport[0]; - CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = &s->dummy_single_array[0][0]; // dml_float_t *MaxActiveDRAMClockChangeLatencySupported[] - CalculateWatermarks_params->SubViewportLinesNeededInMALL = &mode_lib->ms.SubViewportLinesNeededInMALL[j]; // dml_uint_t SubViewportLinesNeededInMALL[] - CalculateWatermarks_params->FCLKChangeSupport = &mode_lib->ms.support.FCLKChangeSupport[0]; - CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &s->dummy_single[0]; // dml_float_t *MaxActiveFCLKChangeLatencySupported - CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->ms.support.USRRetrainingSupport[0]; + CalculateWatermarks_params->Watermark = &locals->Watermark; // Watermarks *Watermark + CalculateWatermarks_params->DRAMClockChangeSupport = &locals->DRAMClockChangeSupport; + CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = locals->MaxActiveDRAMClockChangeLatencySupported; // dml_float_t *MaxActiveDRAMClockChangeLatencySupported[] + CalculateWatermarks_params->SubViewportLinesNeededInMALL = locals->SubViewportLinesNeededInMALL; // dml_uint_t SubViewportLinesNeededInMALL[] + CalculateWatermarks_params->FCLKChangeSupport = &locals->FCLKChangeSupport; + CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &locals->MaxActiveFCLKChangeLatencySupported; // dml_float_t *MaxActiveFCLKChangeLatencySupported + CalculateWatermarks_params->USRRetrainingSupport = &locals->USRRetrainingSupport; CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( &mode_lib->scratch, -- cgit v1.2.3 From bfe79f5fff1300d96203383582b078c7b0aec80a Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Tue, 2 Jan 2024 14:20:37 +0800 Subject: drm/amd/display: Align the returned error code with legacy DP [Why] For usb4 connector, AUX transaction is handled by dmub utilizing a differnt code path comparing to legacy DP connector. If the usb4 DP connector is disconnected, AUX access will report EBUSY and cause igt@kms_dp_aux_dev fail. [How] Align the error code with the one reported by legacy DP as EIO. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Acked-by: Alex Hung Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index eaf8d9f48244..85b7f58a7f35 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -979,6 +979,11 @@ int dm_helper_dmub_aux_transfer_sync( struct aux_payload *payload, enum aux_return_code_type *operation_result) { + if (!link->hpd_status) { + *operation_result = AUX_RET_ERROR_HPD_DISCON; + return -1; + } + return amdgpu_dm_process_dmub_aux_transfer_sync(ctx, link->link_index, payload, operation_result); } -- cgit v1.2.3 From bc03c02cc1991a066b23e69bbcc0f66e8f1f7453 Mon Sep 17 00:00:00 2001 From: Ma Jun Date: Fri, 12 Jan 2024 13:33:24 +0800 Subject: drm/amdgpu: Fix the null pointer when load rlc firmware If the RLC firmware is invalid because of wrong header size, the pointer to the rlc firmware is released in function amdgpu_ucode_request. There will be a null pointer error in subsequent use. So skip validation to fix it. Fixes: 3da9b71563cb ("drm/amd: Use `amdgpu_ucode_*` helpers for GFX10") Signed-off-by: Ma Jun Acked-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 73f6d7e72c73..d63cab294883 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3996,16 +3996,13 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) if (!amdgpu_sriov_vf(adev)) { snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix); - err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name); - /* don't check this. There are apparently firmwares in the wild with - * incorrect size in the header - */ - if (err == -ENODEV) - goto out; + err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); if (err) - dev_dbg(adev->dev, - "gfx10: amdgpu_ucode_request() failed \"%s\"\n", - fw_name); + goto out; + + /* don't validate this firmware. There are apparently firmwares + * in the wild with incorrect size in the header + */ rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; version_major = le16_to_cpu(rlc_hdr->header.header_version_major); version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); -- cgit v1.2.3 From 05638ff6dd6f0f38734b6b3ee2c7cf15520f5c00 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 13 Jan 2024 15:58:21 +0100 Subject: drm/amd/display: Fix a switch statement in populate_dml_output_cfg_from_stream_state() It is likely that the statement related to 'dml_edp' is misplaced. So move it in the correct "case SIGNAL_TYPE_EDP". Fixes: 7966f319c66d ("drm/amd/display: Introduce DML2") Signed-off-by: Christophe JAILLET Signed-off-by: Hamza Mahfooz Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index fa6a93dd9629..64d01a9cd68c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -626,8 +626,8 @@ static void populate_dml_output_cfg_from_stream_state(struct dml_output_cfg_st * if (is_dp2p0_output_encoder(pipe)) out->OutputEncoder[location] = dml_dp2p0; break; - out->OutputEncoder[location] = dml_edp; case SIGNAL_TYPE_EDP: + out->OutputEncoder[location] = dml_edp; break; case SIGNAL_TYPE_HDMI_TYPE_A: case SIGNAL_TYPE_DVI_SINGLE_LINK: -- cgit v1.2.3 From 3c4e4eb5d872118fef1708abe933a410c5e07e3a Mon Sep 17 00:00:00 2001 From: Flora Cui Date: Wed, 10 Jan 2024 19:23:56 +0800 Subject: drm/amdkfd: init drm_client with funcs hook otherwise drm_client_dev_unregister() would try to kfree(&adev->kfd.client). Fixes: 1819200166ce ("drm/amdkfd: Export DMABufs from KFD using GEM handles") Signed-off-by: Flora Cui Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 067690ba7bff..81af6bf2f052 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -138,6 +138,9 @@ static void amdgpu_amdkfd_reset_work(struct work_struct *work) amdgpu_device_gpu_recover(adev, NULL, &reset_context); } +static const struct drm_client_funcs kfd_client_funcs = { + .unregister = drm_client_release, +}; void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) { int i; @@ -161,7 +164,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) .enable_mes = adev->enable_mes, }; - ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", NULL); + ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", &kfd_client_funcs); if (ret) { dev_err(adev->dev, "Failed to init DRM client: %d\n", ret); return; -- cgit v1.2.3 From fb1c93c2e9604a884467a773790016199f78ca08 Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 10 Jan 2024 15:19:29 +0100 Subject: drm/amdgpu: revert "Adjust removal control flow for smu v13_0_2" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Calling amdgpu_device_ip_resume_phase1() during shutdown leaves the HW in an active state and is an unbalanced use of the IP callbacks. Using the IP callbacks like this can lead to memory leaks, double free and imbalanced reference counters. Leaving the HW in an active state can lead to DMA accesses to memory now freed by the driver. Both is a complete no-go for driver unload so completely revert the workaround for now. This reverts commit f5c7e7797060255dbc8160734ccc5ad6183c5e04. Signed-off-by: Christian König Acked-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 32 +----------------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 32 ------------------------------ drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h | 1 - 4 files changed, 1 insertion(+), 65 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index ecbc58269951..b158d27d0a71 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5246,7 +5246,6 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, struct amdgpu_device *tmp_adev = NULL; bool need_full_reset, skip_hw_reset, vram_lost = false; int r = 0; - bool gpu_reset_for_dev_remove = 0; /* Try reset handler method first */ tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, @@ -5266,10 +5265,6 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags); - gpu_reset_for_dev_remove = - test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) && - test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); - /* * ASIC reset has to be done on all XGMI hive nodes ASAP * to allow proper links negotiation in FW (within 1 sec) @@ -5312,18 +5307,6 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, amdgpu_ras_intr_cleared(); } - /* Since the mode1 reset affects base ip blocks, the - * phase1 ip blocks need to be resumed. Otherwise there - * will be a BIOS signature error and the psp bootloader - * can't load kdb on the next amdgpu install. - */ - if (gpu_reset_for_dev_remove) { - list_for_each_entry(tmp_adev, device_list_handle, reset_list) - amdgpu_device_ip_resume_phase1(tmp_adev); - - goto end; - } - list_for_each_entry(tmp_adev, device_list_handle, reset_list) { if (need_full_reset) { /* post card */ @@ -5560,11 +5543,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, int i, r = 0; bool need_emergency_restart = false; bool audio_suspended = false; - bool gpu_reset_for_dev_remove = false; - - gpu_reset_for_dev_remove = - test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) && - test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); /* * Special case: RAS triggered and full reset isn't supported @@ -5602,7 +5580,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) { list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { list_add_tail(&tmp_adev->reset_list, &device_list); - if (gpu_reset_for_dev_remove && adev->shutdown) + if (adev->shutdown) tmp_adev->shutdown = true; } if (!list_is_first(&adev->reset_list, &device_list)) @@ -5687,10 +5665,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, retry: /* Rest of adevs pre asic reset from XGMI hive. */ list_for_each_entry(tmp_adev, device_list_handle, reset_list) { - if (gpu_reset_for_dev_remove) { - /* Workaroud for ASICs need to disable SMC first */ - amdgpu_device_smu_fini_early(tmp_adev); - } r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context); /*TODO Should we stop ?*/ if (r) { @@ -5722,9 +5696,6 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */ r = amdgpu_do_asic_reset(device_list_handle, reset_context); if (r && r == -EAGAIN) goto retry; - - if (!r && gpu_reset_for_dev_remove) - goto recover_end; } skip_hw_reset: @@ -5780,7 +5751,6 @@ skip_sched_resume: amdgpu_ras_set_error_query_ready(tmp_adev, true); } -recover_end: tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, reset_list); amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 5c9caf5fa075..cc69005f5b46 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2337,38 +2337,6 @@ amdgpu_pci_remove(struct pci_dev *pdev) pm_runtime_forbid(dev->dev); } - if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 2) && - !amdgpu_sriov_vf(adev)) { - bool need_to_reset_gpu = false; - - if (adev->gmc.xgmi.num_physical_nodes > 1) { - struct amdgpu_hive_info *hive; - - hive = amdgpu_get_xgmi_hive(adev); - if (hive->device_remove_count == 0) - need_to_reset_gpu = true; - hive->device_remove_count++; - amdgpu_put_xgmi_hive(hive); - } else { - need_to_reset_gpu = true; - } - - /* Workaround for ASICs need to reset SMU. - * Called only when the first device is removed. - */ - if (need_to_reset_gpu) { - struct amdgpu_reset_context reset_context; - - adev->shutdown = true; - memset(&reset_context, 0, sizeof(reset_context)); - reset_context.method = AMD_RESET_METHOD_NONE; - reset_context.reset_req_dev = adev; - set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); - set_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context.flags); - amdgpu_device_gpu_recover(adev, NULL, &reset_context); - } - } - amdgpu_driver_unload_kms(dev); /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h index b0335a1c5e90..19899f6b9b2b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h @@ -32,7 +32,6 @@ enum AMDGPU_RESET_FLAGS { AMDGPU_NEED_FULL_RESET = 0, AMDGPU_SKIP_HW_RESET = 1, - AMDGPU_RESET_FOR_DEVICE_REMOVE = 2, }; struct amdgpu_reset_context { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h index 6cab882e8061..1592c63b3099 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h @@ -43,7 +43,6 @@ struct amdgpu_hive_info { } pstate; struct amdgpu_reset_domain *reset_domain; - uint32_t device_remove_count; atomic_t ras_recovery; }; -- cgit v1.2.3 From b2139c96dc954b58b81bc670fc4ea5f034ed062c Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Sat, 13 Jan 2024 14:32:27 +0530 Subject: drm/amd/display: Drop 'acrtc' and add 'new_crtc_state' NULL check for writeback requests. Return value of 'to_amdgpu_crtc' which is container_of(...) can't be null, so it's null check 'acrtc' is dropped. Fixing the below: drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:9302 amdgpu_dm_atomic_commit_tail() error: we previously assumed 'acrtc' could be null (see line 9299) Added 'new_crtc_state' NULL check for function 'drm_atomic_get_new_crtc_state' that retrieves the new state for a CRTC, while enabling writeback requests. Cc: stable@vger.kernel.org Cc: Alex Hung Cc: Aurabindo Pillai Cc: Rodrigo Siqueira Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Alex Hung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 10b2a896c498..d55eeb30ccb2 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -9293,10 +9293,10 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) if (!new_con_state->writeback_job) continue; - new_crtc_state = NULL; + new_crtc_state = drm_atomic_get_new_crtc_state(state, &acrtc->base); - if (acrtc) - new_crtc_state = drm_atomic_get_new_crtc_state(state, &acrtc->base); + if (!new_crtc_state) + continue; if (acrtc->wb_enabled) continue; -- cgit v1.2.3 From aa0901a9008eeb2710292aff94e615adf7884d5f Mon Sep 17 00:00:00 2001 From: Ori Messinger Date: Wed, 22 Nov 2023 00:12:13 -0500 Subject: drm/amdgpu: Enable GFXOFF for Compute on GFX11 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On GFX version 11, GFXOFF was disabled due to a MES KIQ firmware issue, which has since been fixed after version 64. This patch only re-enables GFXOFF for GFX version 11 if the GPU's MES KIQ firmware version is newer than version 64. V2: Keep GFXOFF disabled on GFX11 if MES KIQ is below version 64. V3: Add parentheses to avoid GCC warning for parentheses: "suggest parentheses around comparison in operand of ‘&’" V4: Remove "V3" from commit title V5: Change commit description and insert 'Acked-by' Signed-off-by: Ori Messinger Acked-by: Alex Deucher Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 81af6bf2f052..77e263660288 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -698,10 +698,8 @@ err: void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle) { enum amd_powergating_state state = idle ? AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE; - /* Temporary workaround to fix issues observed in some - * compute applications when GFXOFF is enabled on GFX11. - */ - if (IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 11) { + if (IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 11 && + ((adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK) <= 64)) { pr_debug("GFXOFF is %s\n", idle ? "enabled" : "disabled"); amdgpu_gfx_off_ctrl(adev, idle); } else if ((IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 9) && -- cgit v1.2.3 From cacea81390fd8c8c85404e5eb2adeb83d87a912e Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 18 Jan 2024 06:19:57 +1000 Subject: nouveau/vmm: don't set addr on the fail path to avoid warning nvif_vmm_put gets called if addr is set, but if the allocation fails we don't need to call put, otherwise we get a warning like [523232.435671] ------------[ cut here ]------------ [523232.435674] WARNING: CPU: 8 PID: 1505697 at drivers/gpu/drm/nouveau/nvif/vmm.c:68 nvif_vmm_put+0x72/0x80 [nouveau] [523232.435795] Modules linked in: uinput rfcomm snd_seq_dummy snd_hrtimer nf_conntrack_netbios_ns nf_conntrack_broadcast nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip_set nf_tables nfnetlink qrtr bnep sunrpc binfmt_misc intel_rapl_msr intel_rapl_common intel_uncore_frequency intel_uncore_frequency_common isst_if_common iwlmvm nfit libnvdimm vfat fat x86_pkg_temp_thermal intel_powerclamp mac80211 snd_soc_avs snd_soc_hda_codec coretemp snd_hda_ext_core snd_soc_core snd_hda_codec_realtek kvm_intel snd_hda_codec_hdmi snd_compress snd_hda_codec_generic ac97_bus snd_pcm_dmaengine snd_hda_intel libarc4 snd_intel_dspcfg snd_intel_sdw_acpi snd_hda_codec kvm iwlwifi snd_hda_core btusb snd_hwdep btrtl snd_seq btintel irqbypass btbcm rapl snd_seq_device eeepc_wmi btmtk intel_cstate iTCO_wdt cfg80211 snd_pcm asus_wmi bluetooth intel_pmc_bxt iTCO_vendor_support snd_timer ledtrig_audio pktcdvd snd mei_me [523232.435828] sparse_keymap intel_uncore i2c_i801 platform_profile wmi_bmof mei pcspkr ioatdma soundcore i2c_smbus rfkill idma64 dca joydev acpi_tad loop zram nouveau drm_ttm_helper ttm video drm_exec drm_gpuvm gpu_sched crct10dif_pclmul i2c_algo_bit nvme crc32_pclmul crc32c_intel drm_display_helper polyval_clmulni nvme_core polyval_generic e1000e mxm_wmi cec ghash_clmulni_intel r8169 sha512_ssse3 nvme_common wmi pinctrl_sunrisepoint uas usb_storage ip6_tables ip_tables fuse [523232.435849] CPU: 8 PID: 1505697 Comm: gnome-shell Tainted: G W 6.6.0-rc7-nvk-uapi+ #12 [523232.435851] Hardware name: System manufacturer System Product Name/ROG STRIX X299-E GAMING II, BIOS 1301 09/24/2021 [523232.435852] RIP: 0010:nvif_vmm_put+0x72/0x80 [nouveau] [523232.435934] Code: 00 00 48 89 e2 be 02 00 00 00 48 c7 04 24 00 00 00 00 48 89 44 24 08 e8 fc bf ff ff 85 c0 75 0a 48 c7 43 08 00 00 00 00 eb b3 <0f> 0b eb f2 e8 f5 c9 b2 e6 0f 1f 44 00 00 90 90 90 90 90 90 90 90 [523232.435936] RSP: 0018:ffffc900077ffbd8 EFLAGS: 00010282 [523232.435937] RAX: 00000000fffffffe RBX: ffffc900077ffc00 RCX: 0000000000000010 [523232.435938] RDX: 0000000000000010 RSI: ffffc900077ffb38 RDI: ffffc900077ffbd8 [523232.435940] RBP: ffff888e1c4f2140 R08: 0000000000000000 R09: 0000000000000000 [523232.435940] R10: 0000000000000000 R11: 0000000000000000 R12: ffff888503811800 [523232.435941] R13: ffffc900077ffca0 R14: ffff888e1c4f2140 R15: ffff88810317e1e0 [523232.435942] FS: 00007f933a769640(0000) GS:ffff88905fa00000(0000) knlGS:0000000000000000 [523232.435943] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [523232.435944] CR2: 00007f930bef7000 CR3: 00000005d0322001 CR4: 00000000003706e0 [523232.435945] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [523232.435946] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [523232.435964] Call Trace: [523232.435965] [523232.435966] ? nvif_vmm_put+0x72/0x80 [nouveau] [523232.436051] ? __warn+0x81/0x130 [523232.436055] ? nvif_vmm_put+0x72/0x80 [nouveau] [523232.436138] ? report_bug+0x171/0x1a0 [523232.436142] ? handle_bug+0x3c/0x80 [523232.436144] ? exc_invalid_op+0x17/0x70 [523232.436145] ? asm_exc_invalid_op+0x1a/0x20 [523232.436149] ? nvif_vmm_put+0x72/0x80 [nouveau] [523232.436230] ? nvif_vmm_put+0x64/0x80 [nouveau] [523232.436342] nouveau_vma_del+0x80/0xd0 [nouveau] [523232.436506] nouveau_vma_new+0x1a0/0x210 [nouveau] [523232.436671] nouveau_gem_object_open+0x1d0/0x1f0 [nouveau] [523232.436835] drm_gem_handle_create_tail+0xd1/0x180 [523232.436840] drm_prime_fd_to_handle_ioctl+0x12e/0x200 [523232.436844] ? __pfx_drm_prime_fd_to_handle_ioctl+0x10/0x10 [523232.436847] drm_ioctl_kernel+0xd3/0x180 [523232.436849] drm_ioctl+0x26d/0x4b0 [523232.436851] ? __pfx_drm_prime_fd_to_handle_ioctl+0x10/0x10 [523232.436855] nouveau_drm_ioctl+0x5a/0xb0 [nouveau] [523232.437032] __x64_sys_ioctl+0x94/0xd0 [523232.437036] do_syscall_64+0x5d/0x90 [523232.437040] ? syscall_exit_to_user_mode+0x2b/0x40 [523232.437044] ? do_syscall_64+0x6c/0x90 [523232.437046] entry_SYSCALL_64_after_hwframe+0x6e/0xd8 Reported-by: Faith Ekstrand Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20240117213852.295565-1-airlied@gmail.com --- drivers/gpu/drm/nouveau/nouveau_vmm.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/nouveau/nouveau_vmm.c b/drivers/gpu/drm/nouveau/nouveau_vmm.c index a6602c012671..3dda885df5b2 100644 --- a/drivers/gpu/drm/nouveau/nouveau_vmm.c +++ b/drivers/gpu/drm/nouveau/nouveau_vmm.c @@ -108,6 +108,9 @@ nouveau_vma_new(struct nouveau_bo *nvbo, struct nouveau_vmm *vmm, } else { ret = nvif_vmm_get(&vmm->vmm, PTES, false, mem->mem.page, 0, mem->mem.size, &tmp); + if (ret) + goto done; + vma->addr = tmp.addr; } -- cgit v1.2.3